From c13e0b6b968c096459dfdd7991298376e698e52f Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Mon, 24 Apr 2023 14:55:38 -0700 Subject: [PATCH 001/512] Add multimodal support including ViT, CLIP, StableDiffusion, InstructPix2Pix and DreamBooth --- Jenkinsfile | 316 +-- LICENSE | 527 ++++- examples/multimodal/convert_ckpt_to_nemo.py | 164 ++ .../clip/conf/megatron_clip_config.yaml | 249 +++ .../clip/conf/megatron_clip_export.yaml | 16 + .../conf/megatron_clip_imagenet_zeroshot.yaml | 17 + .../clip/conf/megatron_clip_infer.yaml | 13 + .../foundation/clip/megatron_clip_export.py | 146 ++ .../clip/megatron_clip_imagenet_zeroshot.py | 130 ++ .../foundation/clip/megatron_clip_infer.py | 96 + .../foundation/clip/megatron_clip_pretrain.py | 90 + .../dreambooth/conf/dreambooth.yaml | 214 ++ .../dreambooth/conf/dreambooth_export.yaml | 23 + .../dreambooth/conf/dreambooth_infer.yaml | 32 + .../generative/dreambooth/dreambooth.py | 144 ++ .../dreambooth/dreambooth_export.py | 198 ++ .../generative/dreambooth/dreambooth_infer.py | 46 + .../instruct_pix2pix/conf/sd_edit.yaml | 23 + .../instruct_pix2pix/conf/sd_export.yaml | 20 + .../instruct_pix2pix/conf/sd_finetune.yaml | 167 ++ .../instruct_pix2pix/sd_edit_cli.py | 175 ++ .../instruct_pix2pix/sd_edit_export.py | 284 +++ .../instruct_pix2pix/sd_finetune.py | 88 + .../stable_diffusion/conf/sd_export.yaml | 23 + .../stable_diffusion/conf/sd_fid_images.yaml | 45 + .../stable_diffusion/conf/sd_infer.yaml | 31 + .../stable_diffusion/conf/sd_train.yaml | 169 ++ .../stable_diffusion/generate_fid_images.py | 85 + .../generative/stable_diffusion/sd_export.py | 198 ++ .../generative/stable_diffusion/sd_infer.py | 46 + .../generative/stable_diffusion/sd_train.py | 88 + ...egatron_gpt_prompt_learning_inference.yaml | 1 + .../megatron_bart_pretraining.py | 4 +- .../megatron_bert_pretraining.py | 4 +- .../megatron_gpt_pretraining.py | 4 +- .../megatron_gpt_prompt_learning.py | 4 +- .../megatron_lm_ckpt_to_nemo.py | 4 +- .../megatron_retro_cal_shape.py | 6 +- .../megatron_retro_fine_tune.py | 6 +- .../megatron_retro_mutransfer_pretrain.py | 6 +- .../megatron_retro_pretraining.py | 6 +- .../megatron_t5_lm_adaptation_finetune.py | 4 +- .../megatron_t5_pretraining.py | 4 +- .../megatron_t5_seq2seq_eval.py | 4 +- .../megatron_t5_seq2seq_finetune.py | 4 +- .../tuning/megatron_gpt_adapter_tuning.py | 4 +- .../tuning/megatron_gpt_ia3_tuning.py | 4 +- .../tuning/megatron_t5_adapter_tuning.py | 4 +- .../tuning/megatron_t5_ia3_tuning.py | 4 +- .../megatron_nmt_training.py | 4 +- examples/vision/convert_ckpt_to_nemo.py | 152 ++ .../megatron_vit_classification_config.yaml | 163 ++ .../megatron_vit_classification_evaluate.yaml | 15 + .../megatron_vit_classification_export.yaml | 16 + .../megatron_vit_classification_infer.yaml | 12 + .../megatron_vit_classification_evaluate.py | 137 ++ .../megatron_vit_classification_export.py | 122 ++ .../megatron_vit_classification_finetune.py | 95 + .../megatron_vit_classification_infer.py | 154 ++ .../megatron_vit_classification_pretrain.py | 87 + nemo/collections/multimodal/__init__.py | 13 + nemo/collections/multimodal/data/__init__.py | 13 + .../multimodal/data/clip/__init__.py | 13 + .../data/clip/augmentations/__init__.py | 13 + .../data/clip/augmentations/augmentations.py | 103 + .../multimodal/data/clip/clip_dataset.py | 193 ++ .../data/clip/imagenet_zeroshot_data.py | 263 +++ .../multimodal/data/common/__init__.py | 13 + .../multimodal/data/common/data_samplers.py | 123 ++ .../multimodal/data/common/webdataset.py | 241 +++ .../data/dreambooth/dreambooth_dataset.py | 86 + .../data/instruct_pix2pix/__init__.py | 13 + .../data/instruct_pix2pix/edit_dataset.py | 135 ++ .../data/stable_diffusion/__init__.py | 13 + .../stable_diffusion/augmentation/__init__.py | 13 + .../augmentation/augmentations.py | 72 + .../stable_diffusion_dataset.py | 100 + .../data/stable_diffusion/wds_sampler.py | 70 + .../data/stable_diffusion/webdataset.py | 389 ++++ .../data/stable_diffusion/webdataset_utils.py | 280 +++ .../collections/multimodal/losses/__init__.py | 13 + .../multimodal/losses/clip_loss.py | 122 ++ .../collections/multimodal/models/__init__.py | 13 + .../multimodal/models/clip/__init__.py | 13 + .../models/clip/megatron_clip_models.py | 977 +++++++++ .../multimodal/models/dreambooth/__init__.py | 13 + .../models/dreambooth/dreambooth.py | 503 +++++ .../multimodal/models/dreambooth/util.py | 147 ++ .../models/instruct_pix2pix/__init__.py | 13 + .../models/instruct_pix2pix/ldm/__init__.py | 13 + .../models/instruct_pix2pix/ldm/ddpm_edit.py | 259 +++ .../models/multimodal_base_model.py | 604 ++++++ .../models/stable_diffusion/__init__.py | 13 + .../stable_diffusion/diffusion_model.py | 78 + .../models/stable_diffusion/ldm/__init__.py | 13 + .../stable_diffusion/ldm/autoencoder.py | 551 +++++ .../models/stable_diffusion/ldm/ddpm.py | 1769 +++++++++++++++++ .../stable_diffusion/ldm/ddpm_legacy.py | 1482 ++++++++++++++ .../models/stable_diffusion/ldm_config.py | 141 ++ .../stable_diffusion/samplers/__init__.py | 16 + .../stable_diffusion/samplers/base_sampler.py | 226 +++ .../models/stable_diffusion/samplers/ddim.py | 77 + .../stable_diffusion/samplers/k_diffusion.py | 725 +++++++ .../models/stable_diffusion/samplers/plms.py | 58 + .../multimodal/modules/__init__.py | 13 + .../modules/stable_diffusion/__init__.py | 13 + .../modules/stable_diffusion/attention.py | 376 ++++ .../diffusionmodules/__init__.py | 13 + .../diffusionmodules/model.py | 856 ++++++++ .../diffusionmodules/openaimodel.py | 1208 +++++++++++ .../stable_diffusion/diffusionmodules/util.py | 276 +++ .../distributions/__init__.py | 13 + .../distributions/distributions.py | 105 + .../stable_diffusion/encoders/__init__.py | 13 + .../stable_diffusion/encoders/modules.py | 212 ++ .../encoders/x_transformer.py | 655 ++++++ nemo/collections/multimodal/parts/__init__.py | 13 + .../parts/stable_diffusion/__init__.py | 13 + .../parts/stable_diffusion/lr_scheduler.py | 113 ++ .../parts/stable_diffusion/pipeline.py | 185 ++ .../parts/stable_diffusion/utils.py | 214 ++ nemo/collections/multimodal/parts/utils.py | 138 ++ .../megatron/megatron_batch_samplers.py | 9 +- .../megatron_retrieval_model.py | 6 +- .../megatron_t5_prompt_learning_model.py | 4 +- .../megatron/adapters/parallel_adapters.py | 2 +- .../nlp/modules/common/megatron/attention.py | 2 +- .../common/megatron/fused_layer_norm.py | 2 +- .../modules/common/megatron/fused_softmax.py | 7 +- .../modules/common/megatron/language_model.py | 9 + .../nlp/modules/common/megatron/module.py | 2 +- nemo/collections/nlp/parts/nlp_overrides.py | 22 +- nemo/collections/vision/__init__.py | 38 + nemo/collections/vision/data/__init__.py | 13 + .../vision/data/imagenet_classnames.py | 179 ++ .../vision/data/megatron/__init__.py | 13 + .../vision/data/megatron/autoaugment.py | 305 +++ .../vision/data/megatron/data_samplers.py | 96 + .../vision/data/megatron/image_folder.py | 284 +++ .../vision/data/megatron/vit_dataset.py | 285 +++ nemo/collections/vision/losses/__init__.py | 13 + nemo/collections/vision/metrics/__init__.py | 13 + nemo/collections/vision/models/__init__.py | 13 + .../megatron_vit_classification_models.py | 826 ++++++++ .../vision/models/vision_base_model.py | 507 +++++ nemo/collections/vision/modules/__init__.py | 13 + .../vision/modules/common/__init__.py | 13 + .../modules/common/megatron/__init__.py | 13 + .../common/megatron/vision_transformer.py | 490 +++++ .../vision/modules/vit/__init__.py | 13 + .../vision/modules/vit/vit_backbone.py | 382 ++++ nemo/collections/vision/parts/__init__.py | 13 + nemo/package_info.py | 2 +- nemo/utils/trt_utils.py | 59 + requirements/requirements_vision.txt | 8 + scripts/fid-eval-text2img/TFinception_V3.py | 246 +++ .../fid-eval-text2img/compute_clip_score.py | 115 ++ scripts/fid-eval-text2img/compute_fid.py | 350 ++++ scripts/fid-eval-text2img/eval_fid.py | 98 + scripts/fid-eval-text2img/fid_dataset.py | 129 ++ scripts/fid-eval-text2img/plot.py | 37 + .../collections/multimodal/test_clip_model.py | 489 +++++ tests/collections/vision/test_vit_model.py | 391 ++++ tutorials/00_NeMo_Primer.ipynb | 2 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 2 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 2 +- tutorials/asr/ASR_for_telephony_speech.ipynb | 2 +- tutorials/asr/ASR_with_NeMo.ipynb | 4 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 2 +- tutorials/asr/ASR_with_Transducers.ipynb | 2 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 2 +- tutorials/asr/Multilang_ASR.ipynb | 2 +- tutorials/asr/Offline_ASR.ipynb | 2 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 2 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 2 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 2 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 2 +- .../asr/Self_Supervised_Pre_Training.ipynb | 2 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 2 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 2 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 2 +- ...Language_Models_for_Downstream_Tasks.ipynb | 2 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 2 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 2 +- .../nlp/Punctuation_and_Capitalization.ipynb | 2 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 2 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- .../ASR_with_SpeakerDiarization.ipynb | 2 +- .../Speaker_Diarization_Inference.ipynb | 2 +- .../Speaker_Diarization_Training.ipynb | 4 +- .../Speaker_Identification_Verification.ipynb | 2 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 2 +- tutorials/tools/Multispeaker_Simulator.ipynb | 2 +- .../tts/Aligner_Inference_Examples.ipynb | 2 +- .../Evaluation_MelCepstralDistortion.ipynb | 2 +- .../tts/FastPitch_ChineseTTS_Training.ipynb | 2 +- tutorials/tts/FastPitch_Finetuning.ipynb | 2 +- .../tts/FastPitch_GermanTTS_Training.ipynb | 2 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 2 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 2 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- utils/flash-attention.patch | 73 + utils/triton.patch | 53 + 227 files changed, 24811 insertions(+), 282 deletions(-) create mode 100644 examples/multimodal/convert_ckpt_to_nemo.py create mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml create mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml create mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml create mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml create mode 100644 examples/multimodal/foundation/clip/megatron_clip_export.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_infer.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_pretrain.py create mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth.yaml create mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml create mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml create mode 100644 examples/multimodal/generative/dreambooth/dreambooth.py create mode 100644 examples/multimodal/generative/dreambooth/dreambooth_export.py create mode 100644 examples/multimodal/generative/dreambooth/dreambooth_infer.py create mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml create mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml create mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml create mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py create mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py create mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_finetune.py create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/generate_fid_images.py create mode 100644 examples/multimodal/generative/stable_diffusion/sd_export.py create mode 100644 examples/multimodal/generative/stable_diffusion/sd_infer.py create mode 100644 examples/multimodal/generative/stable_diffusion/sd_train.py create mode 100644 examples/vision/convert_ckpt_to_nemo.py create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml create mode 100644 examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_evaluate.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_export.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_finetune.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_infer.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_pretrain.py create mode 100644 nemo/collections/multimodal/__init__.py create mode 100644 nemo/collections/multimodal/data/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/augmentations/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/augmentations/augmentations.py create mode 100644 nemo/collections/multimodal/data/clip/clip_dataset.py create mode 100644 nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py create mode 100644 nemo/collections/multimodal/data/common/__init__.py create mode 100644 nemo/collections/multimodal/data/common/data_samplers.py create mode 100644 nemo/collections/multimodal/data/common/webdataset.py create mode 100644 nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py create mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/__init__.py create mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/webdataset.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py create mode 100644 nemo/collections/multimodal/losses/__init__.py create mode 100644 nemo/collections/multimodal/losses/clip_loss.py create mode 100644 nemo/collections/multimodal/models/__init__.py create mode 100644 nemo/collections/multimodal/models/clip/__init__.py create mode 100644 nemo/collections/multimodal/models/clip/megatron_clip_models.py create mode 100644 nemo/collections/multimodal/models/dreambooth/__init__.py create mode 100644 nemo/collections/multimodal/models/dreambooth/dreambooth.py create mode 100644 nemo/collections/multimodal/models/dreambooth/util.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/__init__.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py create mode 100644 nemo/collections/multimodal/models/multimodal_base_model.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm_config.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py create mode 100644 nemo/collections/multimodal/modules/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/attention.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py create mode 100644 nemo/collections/multimodal/parts/__init__.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/pipeline.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/utils.py create mode 100644 nemo/collections/multimodal/parts/utils.py create mode 100644 nemo/collections/vision/__init__.py create mode 100644 nemo/collections/vision/data/__init__.py create mode 100644 nemo/collections/vision/data/imagenet_classnames.py create mode 100644 nemo/collections/vision/data/megatron/__init__.py create mode 100644 nemo/collections/vision/data/megatron/autoaugment.py create mode 100644 nemo/collections/vision/data/megatron/data_samplers.py create mode 100644 nemo/collections/vision/data/megatron/image_folder.py create mode 100644 nemo/collections/vision/data/megatron/vit_dataset.py create mode 100644 nemo/collections/vision/losses/__init__.py create mode 100644 nemo/collections/vision/metrics/__init__.py create mode 100644 nemo/collections/vision/models/__init__.py create mode 100644 nemo/collections/vision/models/megatron_vit_classification_models.py create mode 100644 nemo/collections/vision/models/vision_base_model.py create mode 100644 nemo/collections/vision/modules/__init__.py create mode 100644 nemo/collections/vision/modules/common/__init__.py create mode 100644 nemo/collections/vision/modules/common/megatron/__init__.py create mode 100644 nemo/collections/vision/modules/common/megatron/vision_transformer.py create mode 100644 nemo/collections/vision/modules/vit/__init__.py create mode 100644 nemo/collections/vision/modules/vit/vit_backbone.py create mode 100644 nemo/collections/vision/parts/__init__.py create mode 100644 nemo/utils/trt_utils.py create mode 100644 requirements/requirements_vision.txt create mode 100644 scripts/fid-eval-text2img/TFinception_V3.py create mode 100644 scripts/fid-eval-text2img/compute_clip_score.py create mode 100644 scripts/fid-eval-text2img/compute_fid.py create mode 100644 scripts/fid-eval-text2img/eval_fid.py create mode 100644 scripts/fid-eval-text2img/fid_dataset.py create mode 100644 scripts/fid-eval-text2img/plot.py create mode 100644 tests/collections/multimodal/test_clip_model.py create mode 100644 tests/collections/vision/test_vit_model.py create mode 100644 utils/flash-attention.patch create mode 100644 utils/triton.patch diff --git a/Jenkinsfile b/Jenkinsfile index 3e4895715df4..f8c0605670df 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -94,8 +94,8 @@ pipeline { stage('L0: Unit Tests CPU') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } steps { @@ -106,8 +106,8 @@ pipeline { stage('L2: ASR dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -192,8 +192,8 @@ pipeline { stage('L2: ASR dev run - part two') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -222,8 +222,8 @@ pipeline { stage('L2: Speech to Text EMA') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } steps { @@ -243,8 +243,8 @@ pipeline { stage('L2: Speaker dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -366,8 +366,8 @@ pipeline { // stage('L2: ASR DALI dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -434,8 +434,8 @@ pipeline { // stage('L2: ASR RNNT dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -496,8 +496,8 @@ pipeline { // stage('L2: Hybrid ASR RNNT-CTC dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -526,8 +526,8 @@ pipeline { stage('L2: ASR Multi-dataloader dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -574,8 +574,8 @@ pipeline { stage('L2: ASR Adapters') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -619,8 +619,8 @@ pipeline { stage('L2: Megatron T5 Adapter PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -665,8 +665,8 @@ pipeline { stage('L2: Megatron T5 Adapter TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -709,8 +709,8 @@ pipeline { stage('L2: Megatron T5 IA3 PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -755,8 +755,8 @@ pipeline { stage('L2: Megatron T5 IA3 TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -799,8 +799,8 @@ pipeline { stage('L2: Megatron GPT Adapter TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -842,8 +842,8 @@ pipeline { stage('L2: Megatron GPT Adapter PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -886,8 +886,8 @@ pipeline { stage('L2: Speech Transcription') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -907,8 +907,8 @@ pipeline { stage('L2: Transducer alignment') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -924,8 +924,8 @@ pipeline { stage('L2: Segmentation Tool') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } stages { @@ -980,8 +980,8 @@ pipeline { stage('L2: G2P Models') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1062,8 +1062,8 @@ pipeline { // stage('L2: Multi-GPU Megatron finetuning') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1089,8 +1089,8 @@ pipeline { stage('L2: STS-b') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1149,8 +1149,8 @@ pipeline { stage('L2: Dialogue Classification') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1320,8 +1320,8 @@ pipeline { stage('L2: Dialogue Generation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1386,8 +1386,8 @@ pipeline { // stage('L2: Dialogue Generation Part 2') { // when { // anyOf { -// branch 'main' -// changeRequest target: 'main' +// branch 'r1.17.0' +// changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1416,8 +1416,8 @@ pipeline { stage('L2: COPY') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1446,8 +1446,8 @@ pipeline { stage('L2: Duplex Text Normalization') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1484,8 +1484,8 @@ pipeline { // stage('L2: MegaBERT Token Classification') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1510,8 +1510,8 @@ pipeline { stage('L2: BERT Text Classification') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1539,8 +1539,8 @@ pipeline { stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1598,8 +1598,8 @@ pipeline { stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1659,8 +1659,8 @@ pipeline { stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1720,8 +1720,8 @@ pipeline { stage('L2: Intent and Slot Classification Tasks') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1760,8 +1760,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Text Classification') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1789,8 +1789,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Autoresume') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1820,8 +1820,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1841,8 +1841,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -1864,8 +1864,8 @@ pipeline { stage('L2: Parallel NLP Examples 2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -1989,8 +1989,8 @@ pipeline { stage('Punctuation & Capitalization tarred dataset') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2048,8 +2048,8 @@ pipeline { stage('Punctuation & Capitalization, Different ways of passing labels to model') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2156,8 +2156,8 @@ pipeline { stage('Punctuation & Capitalization inference') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2182,8 +2182,8 @@ pipeline { stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2244,8 +2244,8 @@ pipeline { stage('L2: Entity Linking') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2272,8 +2272,8 @@ pipeline { stage('L2: NMT Attention is All You Need Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2395,8 +2395,8 @@ pipeline { stage('L2: NMT Attention is All You Need Inference') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2431,8 +2431,8 @@ pipeline { stage('L2: NMT Attention is All You Need Finetuning') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2466,8 +2466,8 @@ pipeline { stage('L2: NMT Tarred Dataset Creation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2520,8 +2520,8 @@ pipeline { stage('L2: Megatron NMT Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2619,8 +2619,8 @@ pipeline { // stage('L2: NMT Bottleneck Fallback') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -2666,8 +2666,8 @@ pipeline { // stage('L2: NMT Bottleneck Architecture') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -2749,8 +2749,8 @@ pipeline { // stage('L2: NMT Bottleneck LVM') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -2832,8 +2832,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training with Pipeline Paralleism') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2902,8 +2902,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -2973,8 +2973,8 @@ pipeline { stage('L2: Megatron RETRO Pretraining and Resume Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3045,8 +3045,8 @@ pipeline { stage('L2: Megatron RETRO muTransfer Pretraining Performance') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3128,8 +3128,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: BioMegatron Bert NER Task') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3146,8 +3146,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3230,8 +3230,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3382,8 +3382,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3399,8 +3399,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval PP2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3418,8 +3418,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3453,8 +3453,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP2 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3497,8 +3497,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -3542,8 +3542,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -3569,8 +3569,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Change Partitions') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3616,8 +3616,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3712,8 +3712,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3808,8 +3808,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3878,8 +3878,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 w/ Mixture of Expert Pretraining') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3923,8 +3923,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP1 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -3964,8 +3964,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP2 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4009,8 +4009,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.17.0' + // changeRequest target: 'r1.17.0' // } // } // failFast true @@ -4053,8 +4053,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4133,8 +4133,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Eval') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4150,8 +4150,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4219,8 +4219,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4292,8 +4292,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 GLUE/XNLI Finetuning') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true @@ -4402,8 +4402,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: TTS Fast dev runs 1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } parallel { @@ -4548,8 +4548,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L??: Speech Checkpoints tests') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.17.0' + changeRequest target: 'r1.17.0' } } failFast true diff --git a/LICENSE b/LICENSE index f49a4e16e68b..551b265159a3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,307 @@ +The following applies to all files unless otherwise noted: + +NVIDIA NEMO FRAMEWORK MULTIMODAL PRE-RELEASE EVALUATION LICENSE + +IMPORTANT NOTICE – PLEASE READ AND AGREE BEFORE USING THE CONTAINER. This license +agreement (“Agreement”) is a legal agreement between you, whether an individual or +entity ("you”) and NVIDIA Corporation ("NVIDIA") and governs your use of an early +access version of the NVIDIA NeMo framework multimodal container and all its contents +(“CONTAINER”). This Agreement can be accepted only by an adult of legal age of +majority in the country in which the CONTAINER is used. If you don’t have the required +age or authority to accept this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use the CONTAINER. You +agree to use the CONTAINER only for purposes that are permitted by this Agreement and +any applicable law or regulation in the relevant jurisdictions. + +1. License. +1.1 Subject to the terms of this Agreement, NVIDIA grants you a non-exclusive, +revocable, non-transferable, non-sublicensable (except as expressly granted in this +Agreement), license to: (a) install and use copies of the CONTAINER, and (b) modify and +create derivative works of sample or example source code delivered by NVIDIA as part +of the CONTAINER (if applicable), all the foregoing only for your internal use to +evaluate or develop and test services and applications with the CONTAINER, without use +in production. + +2. Limitations. +Your license to use the CONTAINER and derivative works of the CONTAINER is restricted +as follows: + +2.1 The CONTAINER may run on any computing system with or without NVIDIA GPUs, except +for the NVIDIA proprietary software (such as CUDA and TensorRT software) in the +CONTAINER which is licensed only to run on systems with NVIDIA GPUs. The NVIDIA +proprietary software in the CONTAINER may be present on systems without NVIDIA GPUs, +as long as it is not running on such systems. For components governed by open source +software licenses, see the information in the “Components Under Other Licenses” section +below. + +2.2 The CONTAINER and derivative works may not be used in any commercial-ready products +or services, or separately to even for development, test or evaluation of other NVIDIA +non-NVIDIA products or services. + +2.3 You may not reverse engineer, decompile, or disassemble the CONTAINER components +provided in binary form, nor attempt in any other manner to obtain source code of such +CONTAINER components. + +2.4 You may not change or remove copyright or other proprietary notices in the +CONTAINER. + +2.5 Except as expressly granted in this Agreement, you may not copy, sell, rent, +sublicense, transfer, distribute, modify or create derivative works of the CONTAINER, +or make its functionality available to others. + +2.6 Data generated with use of the CONTAINER may not be used for deployment purposes. + +2.7 You may not bypass, disable, or circumvent any technical limitation, encryption, +security, digital rights management or authentication mechanism in the CONTAINER. + +2.8 You may not replace any NVIDIA software components that are governed by this +Agreement with other software that implements NVIDIA APIs. + +2.9 You may not use the CONTAINER for the purpose of developing competing products or +technologies or assisting a third party in such activities. + +2.10 You may not use the CONTAINER in any manner that would cause it to become subject +to an open source software license; subject to the terms in the “Components Under Other +Licenses” section below. + +2.11 Unless you have an agreement with NVIDIA for this purpose, you may not use the +CONTAINER provided under this Agreement and derivative works in a system or +application where the use of or application where the use of or failure of such system +or application developed with CONTAINER provided by NVIDIA could result in injury, +death or catastrophic damage. NVIDIA will not be liable to you or any third party, in +whole or in part, for any claims or damages arising from these uses. You are solely +responsible for ensuring that systems and applications developed with the CONTAINER +as a whole include sufficient safety and redundancy features and comply with all +applicable legal and regulatory standards and requirements. + +3. Your Privacy: Collection and Use of Information. +Please review the NVIDIA Privacy Policy, located at +https://www.nvidia.com/en-us/about-nvidia/privacy-policy, which explains NVIDIA’s +policy for collecting and using data, as well as visit the NVIDIA Privacy Center, +located at https://www.nvidia.com/en-us/privacy-center, to manage your consent and +privacy preferences. NVIDIA may require certain personal information such as name, +email address and entitlement information including survey responses to deliver or +provide the CONTAINER to you. + +4. Authorized Users. +You may allow employees and contractors of your entity or of your subsidiary(ies) to +access and use the CONTAINER from your secure network to perform the work authorized +by this Agreement on your behalf. If you are an academic institution, you may allow +users enrolled or employed by the academic institution to access and use the CONTAINER +as authorized by this Agreement from your secure network. You are responsible for the +compliance with the terms of this Agreement by your authorized users. Any act or +omission that if committed by you would constitute a breach of this Agreement will be +deemed to constitute a breach of this Agreement if committed by your authorized users. + +5. Confidentiality. +You agree that you will not use, nor authorize others to use, NVIDIA Confidential +Information, except as necessary for the performance of this Agreement, and that you +will not disclose NVIDIA Confidential Information to any third party, except to +permitted users under this Agreement that have a need to know such Confidential +Information for the purpose of this Agreement, provided that each such recipient is +subject to a written agreement that includes confidentiality obligations consistent +with these terms. You agree to use all reasonable efforts to maintain the +confidentiality of NVIDIA Confidential Information in your possession or control, but +in no event less than the efforts that you ordinarily use with respect to your own +Confidential Information of similar nature and importance. “Confidential Information” +means the output, and any results of benchmarking or other competitive analysis or +regression or performance data relating to the CONTAINER. + +6. Pre-Release Versions. +CONTAINER versions or specific features identified as alpha, beta, preview, early +access or otherwise as pre-release may not be fully functional, may contain errors or +design flaws, and may have reduced or different security, privacy, availability, and +reliability standards relative to commercial versions of NVIDIA offerings. You may use +a pre-release CONTAINER at your own risk, understanding that such versions are not +intended for use in production or business-critical systems. NVIDIA may choose not to +make available a commercial version of any pre-release CONTAINER. NVIDIA may also +choose to abandon development and terminate the availability of a pre-release CONTAINER +at any time without liability. + +7. Updates. +NVIDIA may, at its option, make available patches, workarounds or other updates to the +CONTAINER. Unless the updates are provided with their separate governing terms, they +are deemed part of the CONTAINER licensed to you as provided in this Agreement. + +8. Components Under Other Licenses. +The CONTAINER may include or be distributed with components provided with separate +legal notices or terms that accompany the components, such as open source software +terms and other license terms ("Other Licenses”). The components are subject to the +applicable Other Licenses, including any proprietary notices, disclaimers, +requirements and extended use rights; except that this Agreement will prevail regarding +the use of third-party open source software, unless a third-party open source software +license requires its license terms to prevail. Open source software license means any +software, data or documentation subject to any license identified as an open source +license by the Open Source Initiative (http://opensource.org), Free Software Foundation (http://www.fsf.org) or +other similar open source organization or listed by the Software Package Data Exchange +(SPDX) Workgroup under the Linux Foundation (http://www.spdx.org). + +You acknowledge and agree that it is your sole responsibility to obtain any additional +third-party licenses required to make, have made, use, have used, sell, import, and +offer for sale your products or services that include or incorporate components under +Other Licenses, including, without limitation, audio and/or video encoders and decoders +and implementations of technical standards. NVIDIA does not grant to you under this +Agreement any necessary patent or other rights, including standard essential patent +rights, with respect to components under Other Licenses. + +9. Termination. +This Agreement will automatically terminate without notice from NVIDIA if you fail to +comply with any of the terms in this Agreement or if you commence or participate in any +legal proceeding against NVIDIA with respect to the CONTAINER. Additionally, either +party may terminate this Agreement at any time with prior written notice to the other +party. Upon any termination, you must stop using and destroy all copies of the CONTAINER +and derivative works. Upon written request, you will certify in writing that you have +complied with your commitments under this section. All provisions will survive +termination, except for the licenses granted to you. + +10. Ownership. +The CONTAINER, including all intellectual property rights, is and will remain the sole +and exclusive property of NVIDIA or its licensors. Except as expressly granted in this +Agreement, (i) NVIDIA reserves all rights, interests, and remedies in connection with +the CONTAINER, and (ii) no other license or right is granted to you by implication, +estoppel or otherwise. You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this Agreement. + +11. Feedback. +You may, but you are not obligated to, provide suggestions, requests, fixes, +modifications, enhancements, or other feedback regarding the CONTAINER +(collectively, “Feedback”). Feedback, even if designated as confidential by you, +will not create any confidentiality obligation for NVIDIA or its affiliates. If +you provide Feedback, you hereby grant NVIDIA, its affiliates and its designees +a non-exclusive, perpetual, irrevocable, sublicensable, worldwide, royalty-free, +fully paid-up and transferable license, under your intellectual property rights, +to publicly perform, publicly display, reproduce, use, make, have made, sell, +offer for sale, distribute (through multiple tiers of distribution), import, +create derivative works of and otherwise commercialize and exploit the Feedback +at NVIDIA’s discretion. You will not give Feedback (i) that you have reason to +believe is subject to any restriction that impairs the exercise of the grant +stated in this section, such as third-party intellectual property rights; or +(ii) subject to license terms which seek to require any product incorporating or +developed using such Feedback, or other intellectual property of NVIDIA or its +affiliates, to be licensed to or otherwise shared with any third party. + +12. Disclaimer of Warranties. +THE CONTAINER IS PROVIDED BY NVIDIA AS-IS AND WITH ALL FAULTS. TO THE FULLEST +EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA DISCLAIMS ALL WARRANTIES AND +REPRESENTATIONS OF ANY KIND, WHETHER EXPRESS, IMPLIED OR STATUTORY, RELATING TO +OR ARISING UNDER THIS AGREEMENT, INCLUDING, WITHOUT LIMITATION, THE WARRANTIES +OF TITLE, NONINFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, +USAGE OF TRADE AND COURSE OF DEALING. WITHOUT LIMITING THE FOREGOING, NVIDIA +DOES NOT WARRANT THAT THE CONTAINER WILL MEET YOUR REQUIREMENTS; THAT ANY +DEFECTS OR ERRORS WILL BE CORRECTED; THAT ANY CERTAIN CONTENT WILL BE AVAILABLE; +OR THAT THE CONTAINER IS FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS. NO +INFORMATION OR ADVICE GIVEN BY NVIDIA WILL IN ANY WAY INCREASE THE SCOPE OF ANY +WARRANTY EXPRESSLY PROVIDED IN THIS AGREEMENT. NVIDIA does not warrant or assume +responsibility for the accuracy or completeness of any third-party information, +text, graphics, links contained in THE CONTAINER. + +13. Limitations of Liability. +TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL NVIDIA BE +LIABLE FOR ANY (I) INDIRECT, PUNITIVE, SPECIAL, INCIDENTAL OR CONSEQUENTIAL +DAMAGES, OR (II) DAMAGES FOR THE (A) COST OF PROCURING SUBSTITUTE GOODS OR (B) +LOSS OF PROFITS, REVENUES, USE, DATA OR GOODWILL ARISING OUT OF OR RELATED TO +THIS AGREEMENT, WHETHER BASED ON BREACH OF CONTRACT, TORT (INCLUDING NEGLIGENCE), +STRICT LIABILITY, OR OTHERWISE, AND EVEN IF NVIDIA HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES AND EVEN IF A PARTY'S REMEDIES FAIL THEIR ESSENTIAL +PURPOSE. + +ADDITIONALLY, TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA’S TOTAL +CUMULATIVE AGGREGATE LIABILITY FOR ANY AND ALL LIABILITIES, OBLIGATIONS OR +CLAIMS ARISING OUT OF OR RELATED TO THIS AGREEMENT WILL NOT EXCEED FIVE U.S. +DOLLARS (US$5). + +14. Governing Law and Jurisdiction. +This Agreement will be governed in all respects by the laws of the United States +and the laws of the State of Delaware, without regard to conflict of laws +principles or the United Nations Convention on Contracts for the International Sale +of Goods. The state and federal courts residing in Santa Clara County, +California will have exclusive jurisdiction over any dispute or claim arising +out of or related to this Agreement, and the parties irrevocably consent to +personal jurisdiction and venue in those courts; except that either party may +apply for injunctive remedies or an equivalent type of urgent legal relief in +any jurisdiction. + +15. No Assignment. +NVIDIA may assign, delegate or transfer its rights or obligations under this +Agreement by any means or operation of law. You may not, without NVIDIA’s prior +written consent, assign, delegate or transfer any of your rights or obligations +under this Agreement by any means or operation of law, and any attempt to do so +is null and void. + +16. Waiver. +No failure or delay by a party to enforce any Agreement term or obligation will +operate as a waiver by that party, nor prevent the enforcement of such term or +obligation later. + +17. Export. +You agree to comply with all applicable export, import, trade and economic +sanctions laws and regulations, including U.S. Export Administration Regulations +and Office of Foreign Assets Control regulations. These laws include +restrictions on destinations, end-users and end-use. + +18. Government Use. + The CONTAINER, including related documentation (“Protected Items”) is a +“Commercial product” as this term is defined at 48 C.F.R. 2.101, consisting of +“commercial computer software” and “commercial computer software documentation” +as such terms are used in, respectively, 48 C.F.R. 12.212 and 48 C.F.R. +227.7202 & 252.227-7014(a)(1). Before any Protected Items are supplied to the +U.S. Government, you will (i) inform the U.S. Government in writing that the +Protected Items are and must be treated as commercial computer software and +commercial computer software documentation developed at private expense; (ii) +inform the U.S. Government that the Protected Items are provided subject to the +terms of this Agreement; and (iii) mark the Protected Items as commercial +computer software and commercial computer software documentation developed at +private expense. In no event will you permit the U.S. Government to acquire +rights in Protected Items beyond those specified in 48 C.F.R. 52.227-19(b)(1)- +(2) or 252.227-7013(c) except as expressly approved by NVIDIA in writing. + +19. Notices. +Unless otherwise specifically stated in this Agreement, all notices, requests, +consents and other communications, which are required or permitted under this +Agreement, will be in writing to the address below and will be effective (i) +upon receipt if by personal delivery, (ii) upon receipt if by certified or +registered mail (return receipt requested), or (iii) one (1) day after it is +sent if by next day delivery by a major commercial delivery service. Please +direct your legal notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United States of America, +Attention: Legal Department. + +20. Force Majeure. +Neither party will be liable during any period where an event or circumstance +prevents or delays that party from performing its obligations under this +Agreement and that event or circumstance: (i) is not within the reasonable +control of that party and is not the result of that party’s negligence, and (ii) +cannot be overcome or avoided by that party using reasonably diligent efforts. + +21. Entire Agreement. +Regarding the subject matter of this Agreement, the parties agree that (i) this +Agreement constitutes the entire and exclusive agreement between the parties and +supersedes all prior and contemporaneous communications and (ii) any additional +or different terms or conditions, whether contained in purchase orders, order +acknowledgments, invoices or otherwise, will not be binding on the receiving party +and are null and void. a court of competent jurisdiction rules that a provision of +this Agreement is unenforceable, that provision will be deemed modified to the +extent necessary to make it enforceable and the remainder of this Agreement will +continue in full force and effect. Any amendment to this Agreement must be in writing +and signed by authorized representatives of both parties. + +22. Licensing. +If the terms in this license are not suitable for your organization, or for any +questions regarding this license, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + +(v. April 10, 2023) + +-- + +This repository also contains code from Colossal-AI, and K-diffusion. Files from these +organizations have notices at the top of each file. Below are licenses used in those +files, as indicated. + + +------------- LICENSE FOR from Colossal-AI for Stable Diffusion -------------- + +Copyright 2021- HPC-AI Technology Inc. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -186,7 +490,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2021- HPC-AI Technology Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -198,4 +502,223 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. + + ## Some of colossal-ai's code is derived from others projects, which is subject to the following copyright notice: + + Copyright 2021 The Alpa team. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://github.com/alpa-projects/alpa/blob/979a45a3e6187df941ef4a4c4c6eea664527d68d/LICENSE + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ------------------------------------------------- + + Copyright 2018-2020 Philippe Tillet + Copyright 2020-2022 OpenAI + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + ---------------- LICENSE FOR Microsoft Deepspeed ---------------- + + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE + + ---------------- LICENSE FOR NVIDIA Megatron-LM ---------------- + + Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of NVIDIA CORPORATION nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------- LICENSE FOR NVIDIA Apex ---------------- + + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------- LICENSE FOR Facebook Fairscale ---------------- + + Copyright (c) Facebook, Inc. and its affiliates + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + ---------------- LICENSE FOR Flash Attention ---------------- + + BSD 3-Clause License + + Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------- LICENSE FOR Facebook xFormers ---------------- + + From xFormers: + + Copyright (c) Facebook, Inc. and its affiliates + + + === + + BSD 3-Clause License + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + +------------- LICENSE FOR from K-diffusion for Diffusion Inference -------------- + +Copyright (c) 2022 Katherine Crowson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py new file mode 100644 index 000000000000..e9ba1072f089 --- /dev/null +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -0,0 +1,164 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert PTL checkpoints into nemo checkpoint. + Example to run this conversion script: + python -m torch.distributed.launch --nproc_per_node= * \ + convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size +""" + +import os +import torch +from apex.transformer import parallel_state +from argparse import ArgumentParser +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--checkpoint_folder", + type=str, + default=None, + required=True, + help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", + ) + parser.add_argument( + "--checkpoint_name", + type=str, + default=None, + required=True, + help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", + ) + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument( + "--model_type", type=str, required=False, default="megatron_clip" + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size_=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + # inject model parallel rank + checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) + + logging.info( + f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' + ) + + if args.model_type == 'megatron_clip': + model = MegatronCLIPModel.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) + elif args.model_type == 'stable_diffusion': + model = MegatronLatentDiffusion.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) + elif args.model_type == 'instruct_pix2pix': + model = MegatronLatentDiffusionEdit.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) + elif args.model_type == 'dreambooth': + model = MegatronLatentDiffusion.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) + else: + raise ValueError(f"Unrecognized model_type {args.model_type}.") + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml new file mode 100644 index 000000000000..1aed73d362a7 --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml @@ -0,0 +1,249 @@ +name: megatron_clip +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + output_dim: 512 + # As the number of devices used to train increases, so does the space complexity of + # the logit matrix. Using a naïve all-gather scheme, space complexity will be + # `O(n^2)`. Instead, complexity may become effectively linear if the flags + # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one + # numerical results as the naïve method. + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: ${trainer.precision} + # vision configs + patch_dim: 16 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + output_dim: ${model.output_dim} + class_token_length: 8 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + + text: + precision: ${trainer.precision} + # text configs + output_dim: ${model.output_dim} + + # model architecture + encoder_seq_length: 77 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 512 + ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 8 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.2 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 1e-5 \ No newline at end of file diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml new file mode 100644 index 000000000000..86abd360b01e --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml @@ -0,0 +1,16 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +infer: + out_path: 'megatron-clip' + max_batch_size: 64 + max_dim: 224 + max_text: 64 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml new file mode 100755 index 000000000000..79bdac888887 --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml @@ -0,0 +1,17 @@ +trainer: + devices: 8 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: bf16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} + micro_batch_size: 1000 + global_batch_size: 8000 + + data: + num_workers: 2 + imagenet_val: ??? # path to imagenet val folder + diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml new file mode 100755 index 000000000000..215cd17841ae --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml @@ -0,0 +1,13 @@ +image_path: ??? # Path to a image for inference +texts: ??? # List of texts to compute similarity + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/foundation/clip/megatron_clip_export.py b/examples/multimodal/foundation/clip/megatron_clip_export.py new file mode 100644 index 000000000000..f11366ec9be1 --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_export.py @@ -0,0 +1,146 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from typing import List, Optional, Dict + +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.classes.exportable import Exportable +from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType +from nemo.utils import logging +from nemo.utils.trt_utils import build_engine + + +class CLIPWrapper(torch.nn.Module, Exportable): + def __init__(self, vision_encoder, text_encoder, text_transform): + super(CLIPWrapper, self).__init__() + self.vision_encoder = vision_encoder + self.text_encoder = text_encoder + self.text_transform = text_transform + + def forward(self, image, texts): + image_features = self.vision_encoder(image) + text_features = self.text_encoder(texts) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + return text_probs + + # For onnx export + def input_example(self, max_batch=8, max_dim=224, max_text=64): + """ + Generates input examples for tracing etc. + Returns: + A tuple of input examples. + """ + sample = next(self.parameters()) + images = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) + texts = self.text_transform(["a girl"] * max_text).to(sample.device) + return (images, texts) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "images": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "texts": NeuralType(('H', 'D'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"text_probs": NeuralType(('B', 'H'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['images', 'texts'] + + @property + def output_names(self) -> List[str]: + return ['text_probs'] + + +@hydra_runner(config_path="conf", config_name="megatron_clip_export") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + output_dir = cfg.infer.out_path + max_batch_size = cfg.infer.max_batch_size + max_dim = cfg.infer.max_dim + max_text = cfg.infer.max_text + trt_precision = cfg.trainer.precision + cfg.trainer.precision = 32 + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + model_cfg.text.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronCLIPModel, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier, + ) + + if model.cfg.get("megatron_amp_O2", False): + vision_encoder = model.model.module.vision_encoder + text_encoder = model.model.module.text_encoder + else: + vision_encoder = model.model.vision_encoder + text_encoder = model.model.text_encoder + + val_image_transform, text_transform = get_preprocess_fns( + model.cfg, + model.tokenizer, + is_train=False, + ) + + os.makedirs(f"{output_dir}/onnx/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + + clip_model = CLIPWrapper(vision_encoder, text_encoder, text_transform) + dynamic_axes = {'images': {0: 'B'}, 'texts_input': {0, 'H'}} + clip_model.export(f"{output_dir}/onnx/clip.onnx", dynamic_axes=None) + + input_profile = {} + bs1_example = clip_model.input_example(max_batch=1, max_dim=max_dim, max_text=1) + bsmax_example = clip_model.input_example(max_batch=max_batch_size, max_dim=max_dim, max_text=max_text) + input_profile['images'] = [tuple(bs1_example[0].shape), tuple(bsmax_example[0].shape), + tuple(bsmax_example[0].shape)] + input_profile['texts'] = [tuple(bs1_example[1].shape), tuple(bsmax_example[1].shape), tuple(bsmax_example[1].shape)] + build_engine( + f"{output_dir}/onnx/clip.onnx", + f"{output_dir}/plan/clip.plan", + fp16=(trt_precision == 16), + input_profile=input_profile, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py new file mode 100644 index 000000000000..7a7b7a86282b --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py @@ -0,0 +1,130 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch +import torch.nn.functional as F +from PIL import Image +from apex.transformer import parallel_state +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from tqdm import tqdm + +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns, ImagenetClassnameDataset +from nemo.collections.multimodal.data.clip.clip_dataset import tokenize, build_imagenet_validation_dataloader +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import openai_imagenet_template, imagenet_classnames +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +def accuracy(output, target, topk=(1,)): + pred = output.topk(max(topk), 1, True, True)[1].t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] + + +@hydra_runner(config_path="conf", config_name="megatron_clip_imagenet_zeroshot") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + model_cfg.text.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronCLIPModel, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier, + ) + + if model.cfg.get("megatron_amp_O2", False): + vision_encoder = model.model.module.vision_encoder + text_encoder = model.model.module.text_encoder + else: + vision_encoder = model.model.vision_encoder + text_encoder = model.model.text_encoder + + # get autocast_dtype + if trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(trainer.precision) == 32: + autocast_dtype = torch.float + elif int(trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with open_dict(cfg): + cfg.model["vision"] = model.cfg.vision + cfg.model["text"] = model.cfg.text + + imagenet_val = build_imagenet_validation_dataloader(cfg.model, model.tokenizer) + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + # build imagenet classification classifier + classifier = [] + for texts in imagenet_val["texts"]: + texts = texts.cuda(non_blocking=True) + class_embeddings = text_encoder(texts) + class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) + class_embedding /= class_embedding.norm() + classifier.append(class_embedding) + classifier = torch.stack(classifier, dim=1) + + top1, top5, n = 0., 0., 0. + for images, target in tqdm(imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + images = images.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + # predict + image_features = vision_encoder(images) + image_features = F.normalize(image_features, dim=-1) + logits = 100. * image_features @ classifier + + # measure accuracy + acc1, acc5 = accuracy(logits, target, topk=(1, 5)) + top1 += acc1 + top5 += acc5 + n += images.size(0) + + logging.info('Finished zero-shot imagenet.') + top1 = (top1 / n) + top5 = (top5 / n) + + imagenet_metric = torch.zeros(2).cuda() + imagenet_metric[0], imagenet_metric[1] = top1, top5 + imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) + + if is_global_rank_zero: + logging.info(f"Zero-shot CLIP accuracy Top-1: {imagenet_metric[0]:.4f}; Top-5: {imagenet_metric[1]:.4f}") + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_infer.py b/examples/multimodal/foundation/clip/megatron_clip_infer.py new file mode 100644 index 000000000000..d2ce890e616b --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_infer.py @@ -0,0 +1,96 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment + +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +@hydra_runner(config_path="conf", config_name="megatron_clip_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + model_cfg.text.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronCLIPModel, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier, + ) + + if model.cfg.get("megatron_amp_O2", False): + vision_encoder = model.model.module.vision_encoder + text_encoder = model.model.module.text_encoder + else: + vision_encoder = model.model.vision_encoder + text_encoder = model.model.text_encoder + + val_image_transform, text_transform = get_preprocess_fns( + model.cfg, + model.tokenizer, + is_train=False, + ) + + # get autocast_dtype + if trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(trainer.precision) == 32: + autocast_dtype = torch.float + elif int(trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + image = Image.open(cfg.image_path).convert('RGB') + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + image = val_image_transform(image).unsqueeze(0).cuda() + texts = text_transform(cfg.texts).cuda() + image_features = vision_encoder(image) + text_features = text_encoder(texts) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + if is_global_rank_zero: + print(f"Given image's CLIP text probability: ", list(zip(cfg.texts, text_probs[0].cpu().numpy()))) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py new file mode 100644 index 000000000000..8d5b33f86c02 --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_clip_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + assert (cfg.trainer.devices * cfg.trainer.num_nodes) * cfg.model.micro_batch_size == \ + cfg.model.global_batch_size, "Gradient accumulation is not supported in CLIP yet." + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronCLIPModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml new file mode 100644 index 000000000000..cea0fe4995f1 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml @@ -0,0 +1,214 @@ +name: Dreambooth + +trainer: + devices: 2 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 400 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +exp_manager: + exp_dir: null + name: ${name} + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 100 + every_n_epochs: 0 + monitor: reduced_train_loss + save_on_train_epoch_end: False + filename: '${name}-{step}' + save_top_k: -1 + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 4 # will use more micro batches to reach global batch size + + with_prior_preservation: True + pretrained_ckpt: + prior_loss_weight: 0.5 + train_text_encoder: False + restore_from_path: /ckpts/v1-5-pruned.ckpt #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed + + + + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: /ckpts/unet-v1-5.bin #load unet weights for finetuning, can use .ckpt ckpts from various sources + from_NeMo: False #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + noise_scheduler: + _target_: nemo.collections.multimodal.models.dreambooth.util.sd_noise_scheduler + parameterization: eps + v_posterior: 0 + given_betas: + beta_schedule: linear + timesteps: 1000 + linear_start: 0.00085 + linear_end: 0.012 + cosine_s: 8e-3 + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-6 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 1 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + name: pbss + num_workers: 4 + instance_dir: /datasets/instance_dir + instance_prompt: a photo of a sks dog + regularization_dir: /datasets/nemo_dogs + regularization_prompt: a photo of a dog + num_reg_images: 200 + num_images_per_prompt: 4 + train_batch_size: 2 + resolution: 512 + center_crop: True + +##The below infer config is to use inference script generating regularization images +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: ${model.data.num_images_per_prompt} + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + output_type: 'pil' + save_to_file: False + out_path: ${model.data.regularization_dir} + prompts: ${model.data.regularization_prompt} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml new file mode 100644 index 000000000000..ca9f2e224171 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml @@ -0,0 +1,23 @@ +name: stable-diffusion-export + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 4 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 100 + sampler_type: 'DDIM' + eta: 0 + out_path: 'dreambooth' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml new file mode 100644 index 000000000000..fc8d35443767 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml @@ -0,0 +1,32 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 4 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 100 + sampler_type: 'DDIM' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'dreambooth' + seed: 123 + prompts: + - 'a photo of a sks dog' + - 'a photo of a sks dog in the Acropolis' + - 'a photo of a sks dog in front of eiffel tower' + - 'a photo of sks dog sleeping' + - 'a photo of a sks dog riding a bike' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py new file mode 100644 index 000000000000..b712de22ad50 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pytorch_lightning as pl +import torch +from apex.transformer import parallel_state +from apex.transformer.pipeline_parallel.utils import _GLOBAL_NUM_MICROBATCHES_CALCULATOR +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +def prepare_reg_data(cfg): + reg_dir = cfg.model.data.regularization_dir + num_reg_images = cfg.model.data.num_reg_images + num_images_per_prompt = cfg.model.data.num_images_per_prompt + reg_prompt = cfg.model.data.regularization_prompt + os.makedirs(reg_dir, exist_ok=True) + NUM_REG_IMAGES = len(os.listdir(reg_dir)) + if NUM_REG_IMAGES < num_reg_images: + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.micro_batch_size = cfg.model.micro_batch_size + model_cfg.global_batch_size = cfg.model.global_batch_size + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier) + model = megatron_diffusion_model.model + rng = torch.Generator() + rng.manual_seed(trainer.global_rank * 100 + cfg.model.seed) + images_to_generate = cfg.model.data.num_reg_images - NUM_REG_IMAGES + images_to_generate = images_to_generate // trainer.world_size + + logging.info(f"No enough images in regularization folder, generating {images_to_generate} from provided ckpt") + + for i in range(images_to_generate // num_images_per_prompt + 1): + output = pipeline(model, cfg, verbose=False, rng=rng) + for text_prompt, pils in zip(reg_prompt, output): + for idx, image in enumerate(pils): + image.save(os.path.join(cfg.infer.out_path, + f'{reg_prompt}_{trainer.global_rank}_{NUM_REG_IMAGES + i * num_images_per_prompt + idx}.png')) + del model + del trainer + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +@hydra_runner(config_path='conf', config_name='dreambooth.yaml') +def main(cfg): + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + torch.backends.cuda.matmul.allow_tf32 = True + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + prepare_reg_data(cfg) + parallel_state.destroy_model_parallel() + + callbacks = [] + trainer = Trainer(plugins=plugins, strategy=strategy, callbacks=callbacks, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + # update resume from checkpoint found by exp_manager + if cfg.model.get("resume_from_checkpoint") is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronDreamBooth(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + torch.multiprocessing.set_start_method('spawn') + main() diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py new file mode 100644 index 000000000000..8f396bd02cad --- /dev/null +++ b/examples/multimodal/generative/dreambooth/dreambooth_export.py @@ -0,0 +1,198 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import os +import time +import torch +import torch.nn as nn +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner +from nemo.utils.trt_utils import build_engine + + +@hydra_runner(config_path='conf', config_name='dreambooth_export') +def main(cfg): + batch_size = cfg.infer.get('num_images_per_prompt', 1) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + fp16 = 16 == cfg.trainer.get("precision", 32) + if cfg.trainer.get("precision", 32) == "bf16": + print("BF16 not supported for export, will use fp32") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + in_channels = model.model.diffusion_model.in_channels + shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] + fake_text = [""] + out = model.cond_stage_model(fake_text) + output_dir = cfg.infer.out_path + os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + deployment_conf = OmegaConf.create({ + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vae': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'downsampling_factor': downsampling_factor, + 'in_channels': in_channels, + 'height': height, + 'width': width, + }) + deployment_conf.sampler.eta = cfg.infer.get('eta', 0) + deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) + deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") + + ### UNet Export + x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") + cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") + input_names = ["x", "t", "context"] + output_names = ["logits"] + torch.onnx.export(model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "x": {0: 'B'}, + "t": {0: 'B'}, + "context": {0: 'B'} + }, + opset_version=17) + input_profile_unet = {} + input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 + input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 + input_profile_unet["context"] = [(2 * batch_size, *(cc.shape[1:]))] * 3 + deployment_conf.unet.x = input_profile_unet["x"][0] + deployment_conf.unet.t = input_profile_unet["t"][0] + deployment_conf.unet.context = input_profile_unet["context"][0] + deployment_conf.unet.logits = input_profile_unet["x"][0] + + ### VAE Export + class VAEWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, z): + outputs = self.model.decode(z=z) + return outputs + + input_names = ["z"] + output_names = ["logits"] + z = torch.randn(1, *shape_of_internal, device="cuda") + torch.onnx.export(VAEWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "z": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17) + input_profile_vae = {} + input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 + deployment_conf.vae.z = input_profile_vae["z"][0] + + ### CLIP Export + class CLIPWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model(input_ids=input_ids) + return outputs.last_hidden_state + + input_names = ["tokens"] + output_names = ["logits"] + tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") + torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "tokens": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17, + do_constant_folding=True, + export_params=True) + input_profile_clip = {} + input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 + deployment_conf.clip.tokens = input_profile_clip["tokens"][0] + deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) + deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + deployment_conf.clip.max_length = model.cond_stage_model.max_length + with open(f"{output_dir}/plan/conf.yaml", "wb") as f: + OmegaConf.save(config=deployment_conf, f=f.name) + del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out + torch.cuda.empty_cache() + gc.collect() + + build_engine( + f"{output_dir}/onnx/unet/unet.onnx", + f"{output_dir}/plan/unet.plan", + fp16=fp16, + input_profile=input_profile_unet, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/vae/vae.onnx", + f"{output_dir}/plan/vae.plan", + fp16=fp16, + input_profile=input_profile_vae, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/clip/clip.onnx", + f"{output_dir}/plan/clip.plan", + fp16=fp16, + input_profile=input_profile_clip, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/dreambooth/dreambooth_infer.py b/examples/multimodal/generative/dreambooth/dreambooth_infer.py new file mode 100644 index 000000000000..581637352c8e --- /dev/null +++ b/examples/multimodal/generative/dreambooth/dreambooth_infer.py @@ -0,0 +1,46 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='dreambooth_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml new file mode 100644 index 000000000000..75eed9d9b6bf --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml @@ -0,0 +1,23 @@ +edit: + resolution: 256 + steps: 100 + input: path/to/input/picture + outpath: path/to/output/folder + prompt: "" + cfg_text: 7.5 + cfg_image: 1.2 + num_images_per_prompt: 8 + combine_images: [ 2, 4 ] # [row, column] + seed: 1234 + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained instruct pix2pix .nemo file + precision: ${trainer.precision} + diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml new file mode 100644 index 000000000000..8acfbc201874 --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml @@ -0,0 +1,20 @@ +edit: + resolution: 256 + steps: 100 + input: path/to/input/picture + out_path: "instruct_pix2pix" + cfg_text: 7.5 + cfg_image: 1.2 + num_images_per_prompt: 8 + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained instruct pix2pix .nemo file + precision: ${trainer.precision} + diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml new file mode 100644 index 000000000000..f04fab019c4a --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml @@ -0,0 +1,167 @@ +name: instruct-pix2pix-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 10000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 1 + val_check_interval: 100 + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: instruct-pix2pix + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + save_top_k: 4 + mode: min + monitor: val/loss + filename: 'instruct-pix2pix--{val/loss:.4f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + ckpt_path: null # load checkpoint weights from previous stages for fine-tuning + precision: ${trainer.precision} + micro_batch_size: 32 + global_batch_size: 32 # `= micro_batch_size * total_devices` fake global batch size for sampler + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: edited + cond_stage_key: edit # txt for cifar, caption for pbss + image_size: 32 + channels: 4 + cond_stage_trainable: false + conditioning_key: hybrid + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + image_size: 32 # unused + in_channels: 8 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 100 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + data: + # Path to instruct-pix2pix dataset must be specified by the user. + # https://github.com/timothybrooks/instruct-pix2pix#generated-dataset + data_path: ??? + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py new file mode 100644 index 000000000000..47529feff757 --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py @@ -0,0 +1,175 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import einops +import math +import numpy as np +import os +import random +import sys +import torch +import torch.nn as nn +from PIL import Image, ImageOps +from argparse import ArgumentParser +from einops import rearrange, repeat +from omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch import autocast + +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser +from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import sample_euler_ancestral +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging + + +class CFGDenoiser(nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + + def forward(self, z, sigma, cond, uncond, text_cfg_scale, image_cfg_scale): + cfg_z = einops.repeat(z, "b ... -> (n b) ...", n=3) + cfg_sigma = einops.repeat(sigma, "b ... -> (n b) ...", n=3) + cfg_cond = { + "c_crossattn": [torch.cat([cond["c_crossattn"][0], uncond["c_crossattn"][0], uncond["c_crossattn"][0]])], + "c_concat": [torch.cat([cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]])], + } + out_cond, out_img_cond, out_uncond = self.inner_model(cfg_z, cfg_sigma, cond=cfg_cond).chunk(3) + out = out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) + return out + + +@hydra_runner(config_path='conf', config_name='sd_edit') +def main(cfg): + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + with open_dict(cfg): + edit_cfg = cfg.pop("edit") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusionEdit, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier, + ) + + # inference use the latent diffusion part of megatron wrapper + model = megatron_diffusion_model.model + model_wrap = DiscreteEpsDDPMDenoiser(model) + model_wrap_cfg = CFGDenoiser(model_wrap) + null_token = model.get_learned_conditioning([""]) + + seed = random.randint(0, 100000) if edit_cfg.seed is None else edit_cfg.seed + input_image = Image.open(edit_cfg.input).convert("RGB") + width, height = input_image.size + factor = edit_cfg.resolution / max(width, height) + factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) + width = int((width * factor) // 64) * 64 + height = int((height * factor) // 64) * 64 + input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) + + if edit_cfg.prompt == "": + input_image.save(edit_cfg.output) + return + + # get autocast_dtype + if trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(trainer.precision) == 32: + autocast_dtype = torch.float + elif int(trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + num_images_per_prompt = edit_cfg.num_images_per_prompt + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + cond = {} + cond["c_crossattn"] = [ + repeat(model.get_learned_conditioning([edit_cfg.prompt]), + "1 ... -> n ...", n=num_images_per_prompt) + ] + input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1 + input_image = rearrange(input_image, "h w c -> 1 c h w").cuda(non_blocking=True) + cond["c_concat"] = [ + repeat(model.encode_first_stage(input_image).mode(), + "1 ... -> n ...", n=num_images_per_prompt) + ] + + uncond = {} + uncond["c_crossattn"] = [ + repeat(null_token, "1 ... -> n ...", n=num_images_per_prompt) + ] + uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])] + + sigmas = model_wrap.get_sigmas(edit_cfg.steps) + + extra_args = { + "cond": cond, + "uncond": uncond, + "text_cfg_scale": edit_cfg.cfg_text, + "image_cfg_scale": edit_cfg.cfg_image, + } + torch.manual_seed(seed) + z = torch.randn_like(cond["c_concat"][0]) + z = z * sigmas[0] + z = sample_euler_ancestral(model_wrap_cfg, z, sigmas, extra_args=extra_args) + x = model.decode_first_stage(z) + x = torch.clamp((x + 1.0) / 2.0, min=0.0, max=1.0) + x = 255.0 * rearrange(x, "n c h w -> n h w c") + + os.makedirs(edit_cfg.outpath, exist_ok=True) + if edit_cfg.get("combine_images") is None: + for idx, image in enumerate(x): + edited_image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) + save_path = os.path.join(edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_{idx}.jpg') + edited_image.save(save_path) + logging.info(f"Edited image saved to: {save_path}") + else: + row, column = edit_cfg.combine_images + width, height = x.size(2), x.size(1) + total_width, total_height = width * column, height * row + edited_image = Image.new('RGB', (total_width, total_height)) + x_offset = 0 + y_offset = 0 + for idx, image in enumerate(x): + image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) + edited_image.paste(image, (x_offset, y_offset)) + x_offset += image.size[0] + if (idx + 1) % column == 0: + x_offset = 0 + y_offset += height + save_path = os.path.join(edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_combine.jpg') + edited_image.save(save_path) + logging.info(f"Edited image saved to: {save_path}") + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py new file mode 100644 index 000000000000..8ef4e9a73b9e --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py @@ -0,0 +1,284 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import einops +import gc +import math +import numpy as np +import os +import random +import sys +import torch +import torch.nn as nn +from PIL import Image, ImageOps +from argparse import ArgumentParser +from einops import rearrange, repeat +from omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.trt_utils import build_engine + + +class CFGDenoiser(nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + + def forward(self, z, sigma, cond, uncond, text_cfg_scale, image_cfg_scale): + cfg_z = einops.repeat(z, "b ... -> (n b) ...", n=3) + cfg_sigma = einops.repeat(sigma, "b ... -> (n b) ...", n=3) + cfg_cond = { + "c_crossattn": [torch.cat([cond["c_crossattn"][0], uncond["c_crossattn"][0], uncond["c_crossattn"][0]])], + "c_concat": [torch.cat([cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]])], + } + print(cfg_z.shape, cfg_sigma.shape) + out_cond, out_img_cond, out_uncond = self.inner_model(cfg_z, cfg_sigma, cond=cfg_cond).chunk(3) + out = out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) + return out + + +@hydra_runner(config_path='conf', config_name='sd_export') +def main(cfg): + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + fp16 = 16 == cfg.trainer.get("precision", 32) + if cfg.trainer.get("precision", 32) == "bf16": + print("BF16 not supported for export, will use fp32") + with open_dict(cfg): + edit_cfg = cfg.pop("edit") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusionEdit, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier, + ) + + # inference use the latent diffusion part of megatron wrapper + model = megatron_diffusion_model.model + model.eval() + model_wrap = DiscreteEpsDDPMDenoiser(model) + model_wrap_cfg = CFGDenoiser(model_wrap) + null_token = model.get_learned_conditioning([""]) + + input_image = Image.open(edit_cfg.input).convert("RGB") + width, height = input_image.size + factor = edit_cfg.resolution / max(width, height) + factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) + width = int((width * factor) // 64) * 64 + height = int((height * factor) // 64) * 64 + input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) + batch_size = edit_cfg.get("num_images_per_prompt", 1) + + output_dir = edit_cfg.out_path + + os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + deployment_conf = OmegaConf.create({ + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vaee': OmegaConf.create({}), + 'vaed': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'height': height, + 'width': width, + 'resolution': edit_cfg.resolution, + 'steps': edit_cfg.steps, + 'text_cfg_scale': edit_cfg.cfg_text, + 'image_cfg_scale': edit_cfg.cfg_image, + }) + + fake_text = [""] + out_cond = model.cond_stage_model(fake_text) + + ### VAE Encode Export + class VAEEncodeWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, x): + h = self.model.encoder(x) + moments = self.model.quant_conv(h) + return moments + + input_names = ["x"] + output_names = ["logits"] + x = torch.randn(1, 3, width, height, device="cuda") + # z = torch.randn(1, *shape_of_internal, device="cuda") + torch.onnx.export(VAEEncodeWrapper(model.first_stage_model), + (x,), + f"{output_dir}/onnx/vae/vae_encode.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "x": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17) + input_profile_vaee = {} + input_profile_vaee["x"] = [(1, *(x.shape[1:]))] * 3 + with torch.no_grad(): + out_vaee = VAEEncodeWrapper(model.first_stage_model)(x) + deployment_conf.vaee.x = input_profile_vaee["x"][0] + deployment_conf.vaee.logits = tuple(out_vaee.shape) + + x = torch.randn(3, *(out_vaee.shape[1:]), device="cuda") + t = torch.randn(3, device="cuda") + cc = torch.randn(3, out_cond.shape[1], out_cond.shape[2], device="cuda") + # x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") + # cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") + input_names = ["x", "t", "context"] + output_names = ["logits"] + torch.onnx.export(model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "x": {0: 'B'}, + "t": {0: 'B'}, + "context": {0: 'B'} + }, + opset_version=17) + + input_profile_unet = {} + input_profile_unet["x"] = [(3 * batch_size, *(x.shape[1:]))] * 3 + input_profile_unet["t"] = [(3 * batch_size, *(t.shape[1:]))] * 3 + input_profile_unet["context"] = [(3 * batch_size, *(cc.shape[1:]))] * 3 + with torch.no_grad(): + out_unet = model.model.diffusion_model(x, t, context=cc) + deployment_conf.unet.x = input_profile_unet["x"][0] + deployment_conf.unet.t = input_profile_unet["t"][0] + deployment_conf.unet.context = input_profile_unet["context"][0] + deployment_conf.unet.logits = (3 * batch_size, *(out_unet.shape[1:])) + + ### VAE Decode Export + class VAEDecodeWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, z): + outputs = self.model.decode(z=z) + return outputs + + input_names = ["z"] + output_names = ["logits"] + z = torch.randn(1, *(out_unet.shape[1:]), device="cuda") + # z = torch.randn(1, *shape_of_internal, device="cuda") + torch.onnx.export(VAEDecodeWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae_decode.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "z": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17) + input_profile_vaed = {} + input_profile_vaed["z"] = [(batch_size, *(z.shape[1:]))] * 3 + deployment_conf.vaed.z = input_profile_vaed["z"][0] + deployment_conf.vaed.logits = (batch_size, 3, height, width) + + ### CLIP Export + class CLIPWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model(input_ids=input_ids) + return outputs.last_hidden_state + + input_names = ["tokens"] + output_names = ["logits"] + tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") + torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "tokens": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17, + do_constant_folding=True, + export_params=True) + input_profile_clip = {} + input_profile_clip["tokens"] = [(1, *(tokens.shape[1:]))] * 3 + deployment_conf.clip.tokens = input_profile_clip["tokens"][0] + deployment_conf.clip.logits = (1, model.cond_stage_model.max_length, out_cond.shape[2]) + deployment_conf.clip.max_length = model.cond_stage_model.max_length + with open(f"{output_dir}/plan/conf.yaml", "wb") as f: + OmegaConf.save(config=deployment_conf, f=f.name) + del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out_cond, out_vaee, out_unet + torch.cuda.empty_cache() + gc.collect() + build_engine( + f"{output_dir}/onnx/unet/unet.onnx", + f"{output_dir}/plan/unet.plan", + fp16=fp16, + input_profile=input_profile_unet, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/vae/vae_decode.onnx", + f"{output_dir}/plan/vae_decode.plan", + fp16=fp16, + input_profile=input_profile_vaed, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/vae/vae_encode.onnx", + f"{output_dir}/plan/vae_encode.plan", + fp16=fp16, + input_profile=input_profile_vaee, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/clip/clip.onnx", + f"{output_dir}/plan/clip.plan", + fp16=fp16, + input_profile=input_profile_clip, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py b/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py new file mode 100644 index 000000000000..2f4f4339a468 --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py @@ -0,0 +1,88 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks.timer import Timer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.strategies.ddp import DDPStrategy +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="sd_finetune") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + # update resume from checkpoint found by exp_manager + if cfg.model.get("resume_from_checkpoint") is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronLatentDiffusionEdit(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml new file mode 100644 index 000000000000..546c937955a1 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml @@ -0,0 +1,23 @@ +name: stable-diffusion-export + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 1 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + out_path: 'stable-diffusion' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml new file mode 100644 index 000000000000..e526bc52d673 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml @@ -0,0 +1,45 @@ +name: stable-diffusion-train + +fid: + classifier_free_guidance: + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + nnodes_per_cfg: 1 + ntasks_per_node: 8 + local_task_id: null + num_images_to_eval: 30000 + coco_captions_path: /coco2014/coco2014_val_sampled_30k/captions + coco_images_path: /coco2014/coco2014_val/images_256 + save_path: output + +infer: + unconditional_guidance_scale: null + num_images_per_prompt: 1 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + output_type: 'pil' + save_to_file: False # We need to rename and maintain the order of images for clip score calculation, so we will save it outside the inference pipeline + out_path: ${fid.save_path} + seed: 123 + prompts: + +trainer: + devices: ${fid.ntasks_per_node} + num_nodes: 1 + accelerator: gpu + precision: 32 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml new file mode 100644 index 000000000000..e6655891338a --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml @@ -0,0 +1,31 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 4 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'stable-diffusion' + seed: 123 + prompts: + - 'A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat.' + - 'A cute corgi lives in a house made out of sushi.' + - 'A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him.' + - 'A brain riding a rocketship heading towards the moon.' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml new file mode 100644 index 000000000000..693d92604103 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -0,0 +1,169 @@ +name: stable-diffusion-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: True + max_epochs: 2 # PTL default. In practice, max_steps will be reached first. + max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + strategy: + bucket_cap_mb: 256 + gradient_as_bucket_view: True + find_unused_parameters: False + allreduce_precision: 32 + +exp_manager: + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: nemo-sd + name: ${name} + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + base_learning_rate: 1.0e-4 + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + learning_rate: 1.0e-04 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + from_NeMo: #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + + scheduler_config: + cls: nemo.collections.multimodal.parts.stable_diffusion.lr_scheduler.LambdaLinearScheduler + warm_up_steps: [ 0 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.0e-6 ] + f_max: [ 1.e-4 ] + f_min: [ 1.e-10 ] + + + data: + num_workers: 16 + train: + batch_size: 4 + dataset_path: + - /datasets/coyo/test.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: True + local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py new file mode 100644 index 000000000000..e305f0cad773 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py @@ -0,0 +1,85 @@ +import os +import time +import torch +from omegaconf.omegaconf import open_dict + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='sd_fid_images') +def main(cfg): + # Read configuration parameters + nnodes_per_cfg = cfg.fid.nnodes_per_cfg + ntasks_per_node = cfg.fid.ntasks_per_node + local_task_id = cfg.fid.local_task_id + num_images_to_eval = cfg.fid.num_images_to_eval + path = cfg.fid.coco_captions_path + + node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) + node_id_per_cfg = node_id % nnodes_per_cfg + + current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] + with open_dict(cfg): + cfg.infer.unconditional_guidance_scale = current_node_cfg + save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) + + # Read and store captions + captions = [] + caption_files = sorted(os.listdir(path)) + assert len(caption_files) >= num_images_to_eval + for file in caption_files[:num_images_to_eval]: + with open(os.path.join(path, file), 'r') as f: + captions += f.readlines() + + # Calculate partition sizes and select the partition for the current node + partition_size_per_node = num_images_to_eval // nnodes_per_cfg + start_idx = node_id_per_cfg * partition_size_per_node + end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None + captions = captions[start_idx:end_idx] + + local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) + partition_size_per_task = int(len(captions) // ntasks_per_node) + + # Select the partition for the current task + start_idx = local_task_id * partition_size_per_task + end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None + input = captions[start_idx:end_idx] + + print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") + + os.makedirs(save_path, exist_ok=True) + + # Modify the model configuration + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.global_batch_size = model_cfg.micro_batch_size * ntasks_per_node + + # Set up the trainer and model for inference + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + # Generate images using the model and save them + for i, prompt in enumerate(input): + cfg.infer.prompts = [prompt] + rng = torch.Generator().manual_seed(cfg.infer.seed + local_task_id * 10 + node_id_per_cfg * 100 + i * 1000) + output = pipeline(model, cfg, rng=rng) + for image in output[0]: + image_num = i + partition_size_per_node * node_id_per_cfg + partition_size_per_task * local_task_id + image.save(os.path.join(save_path, f'image{image_num:06d}.png')) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py new file mode 100644 index 000000000000..062c32a85bb8 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -0,0 +1,198 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import os +import time +import torch +import torch.nn as nn +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner +from nemo.utils.trt_utils import build_engine + + +@hydra_runner(config_path='conf', config_name='sd_export') +def main(cfg): + # setup default values for inference configs + + batch_size = cfg.infer.get('num_images_per_prompt', 1) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + fp16 = 16 == cfg.trainer.get("precision", 32) + if cfg.trainer.get("precision", 32) == "bf16": + print("BF16 not supported for export, will use fp32") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + in_channels = model.model.diffusion_model.in_channels + shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] + fake_text = [""] + out = model.cond_stage_model(fake_text) + + output_dir = cfg.infer.out_path + os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + deployment_conf = OmegaConf.create({ + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vae': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'downsampling_factor': downsampling_factor, + 'in_channels': in_channels, + 'height': height, + 'width': width, + }) + deployment_conf.sampler.eta = cfg.infer.get('eta', 0) + deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) + deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") + + ### UNet Export + x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") + cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") + input_names = ["x", "t", "context"] + output_names = ["logits"] + torch.onnx.export(model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "x": {0: 'B'}, + "t": {0: 'B'}, + "context": {0: 'B'} + }, + opset_version=17) + input_profile_unet = {} + input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 + input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 + input_profile_unet["context"] = [(2 * batch_size, *(cc.shape[1:]))] * 3 + deployment_conf.unet.x = input_profile_unet["x"][0] + deployment_conf.unet.t = input_profile_unet["t"][0] + deployment_conf.unet.context = input_profile_unet["context"][0] + deployment_conf.unet.logits = input_profile_unet["x"][0] + + ### VAE Export + class VAEWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, z): + outputs = self.model.decode(z=z) + return outputs + + input_names = ["z"] + output_names = ["logits"] + z = torch.randn(1, *shape_of_internal, device="cuda") + torch.onnx.export(VAEWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "z": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17) + input_profile_vae = {} + input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 + deployment_conf.vae.z = input_profile_vae["z"][0] + + ### CLIP Export + class CLIPWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model(input_ids=input_ids) + return outputs.last_hidden_state + + input_names = ["tokens"] + output_names = ["logits"] + tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") + torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + "tokens": {0: 'B'}, + "logits": {0: 'B'} + }, + opset_version=17, + do_constant_folding=True, + export_params=True) + input_profile_clip = {} + input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 + deployment_conf.clip.tokens = input_profile_clip["tokens"][0] + deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) + deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + deployment_conf.clip.max_length = model.cond_stage_model.max_length + with open(f"{output_dir}/plan/conf.yaml", "wb") as f: + OmegaConf.save(config=deployment_conf, f=f.name) + del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out + torch.cuda.empty_cache() + gc.collect() + + build_engine( + f"{output_dir}/onnx/unet/unet.onnx", + f"{output_dir}/plan/unet.plan", + fp16=fp16, + input_profile=input_profile_unet, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/vae/vae.onnx", + f"{output_dir}/plan/vae.plan", + fp16=fp16, + input_profile=input_profile_vae, + timing_cache=None, + workspace_size=0, + ) + build_engine( + f"{output_dir}/onnx/clip/clip.onnx", + f"{output_dir}/plan/clip.plan", + fp16=fp16, + input_profile=input_profile_clip, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_infer.py b/examples/multimodal/generative/stable_diffusion/sd_infer.py new file mode 100644 index 000000000000..0de9d4ed32f7 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/sd_infer.py @@ -0,0 +1,46 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='sd_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + + torch.backends.cuda.matmul.allow_tf32 = True + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py new file mode 100644 index 000000000000..4339971cfa8a --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -0,0 +1,88 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import numpy as np +import os +import pytorch_lightning as pl +import torch +import torch.utils.data as data +from datetime import timedelta +from hydra.utils import instantiate +from omegaconf import OmegaConf +from pytorch_lightning.strategies.ddp import DDPStrategy +from torch.distributed.algorithms.ddp_comm_hooks.default_hooks import fp16_compress_hook + +from nemo.collections.multimodal.data.stable_diffusion.wds_sampler import WebDataloaderSamplerCallback +from nemo.collections.multimodal.data.stable_diffusion.webdataset import WebDatasetWithRawText +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm_legacy import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm_config import LatentDiffusionModelConfig +from nemo.core.config import hydra_runner +from nemo.utils.exp_manager import StatelessTimer, exp_manager + + +@hydra_runner(config_path='conf', config_name='sd_train.yaml') +def main(cfg): + pl.seed_everything(42) + + # Tune for DDP + if isinstance(cfg.trainer.strategy, str): + strategy = cfg.trainer.strategy + else: + ddp_config = dict(cfg.trainer.strategy) + if str(ddp_config.pop("allreduce_precision", "32")) == "16": # can be bf16 + ddp_config["ddp_comm_hook"] = fp16_compress_hook + ddp_config["timeout"] = timedelta(seconds=180) + strategy = DDPStrategy(**ddp_config) + del cfg.trainer.strategy + + batch_size = cfg.model.data.train.batch_size + dataset = WebDatasetWithRawText( + dataset_cfg=cfg.model.data, + is_train=True, + ) + data = torch.utils.data.DataLoader(dataset, + batch_size=batch_size, + num_workers=cfg.model.data.num_workers, + pin_memory=True, + drop_last=False) + global_bs = cfg.trainer.devices * cfg.trainer.num_nodes * batch_size + + callbacks = [] + if not cfg.model.data.webdataset.infinite_sampler: + wds_sampler = WebDataloaderSamplerCallback( + batch_size=batch_size, + gradient_accumulation=cfg.trainer.accumulate_grad_batches) + callbacks.append(wds_sampler) + + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = pl.Trainer(**cfg.trainer, + plugins=plugins, + callbacks=callbacks, + strategy=strategy) + exp_manager(trainer, cfg.get("exp_manager", None)) + if not cfg.model.data.webdataset.infinite_sampler and trainer._checkpoint_connector.resume_from_checkpoint_fit_path is not None: + # Reusming from previous training session + wds_sampler.resume_flag = True + + model = LatentDiffusion(cfg.model, trainer).cuda() + model.learning_rate = cfg.model.base_learning_rate * global_bs * cfg.trainer.accumulate_grad_batches + + trainer.fit(model, data) + + +if __name__ == '__main__': + main() diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml index 05099917e912..33ca3f06ddfe 100644 --- a/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_prompt_learning_inference.yaml @@ -24,6 +24,7 @@ pipeline_model_parallel_size: -1 gpt_model_file: null # GPT nemo file path virtual_prompt_model_file: ??? # path to a MegatronGPTPromptLearningModel model if you want to use soft prompts pred_file_path: ??? # Path will model predictions will be written +max_seq_length: 8192 # this will filter out inputs whose length is longer than the set value form the generation process. data_paths: # paths to .jsonl files you want to perform inference on num_workers: 8 \ No newline at end of file diff --git a/examples/nlp/language_modeling/megatron_bart_pretraining.py b/examples/nlp/language_modeling/megatron_bart_pretraining.py index f47ea5963cb6..761fb3bd061c 100644 --- a/examples/nlp/language_modeling/megatron_bart_pretraining.py +++ b/examples/nlp/language_modeling/megatron_bart_pretraining.py @@ -36,7 +36,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -51,7 +51,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py index e6abee295a1a..1f8fff9e92a0 100644 --- a/examples/nlp/language_modeling/megatron_bert_pretraining.py +++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py @@ -37,7 +37,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -54,7 +54,7 @@ def main(cfg) -> None: init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_gpt_pretraining.py b/examples/nlp/language_modeling/megatron_gpt_pretraining.py index 596e3c59e753..cb2b8f7d52c0 100644 --- a/examples/nlp/language_modeling/megatron_gpt_pretraining.py +++ b/examples/nlp/language_modeling/megatron_gpt_pretraining.py @@ -38,7 +38,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -55,7 +55,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py index 3e3ed72453be..191e560e9659 100644 --- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py +++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py @@ -47,7 +47,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) @@ -62,7 +62,7 @@ def main(cfg) -> None: if cfg.model.pipeline_model_parallel_size > 1 else True, # turn off the grad scale for pipeline parallel LM model ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py b/examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py index 5404e4b18f8b..f7785041c59b 100644 --- a/examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py +++ b/examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py @@ -171,7 +171,7 @@ def parse_weights(weight_dict: OrderedDict, parent_key: str, total: list, conver converted[final_key] = weight_dict[key] -def add_optimizer_state(lm_checkpoint, new_checkpoint, megatron_amp_o2=True): +def add_optimizer_state(lm_checkpoint, new_checkpoint, megatron_amp_O2=True): # this method is to convert lm_checkpoint optimizer states for nemo checkpoint OPTIMIZER_KEY = 'optimizer' FP32_FP16_KEY = 'fp32_from_fp16_params' @@ -182,7 +182,7 @@ def add_optimizer_state(lm_checkpoint, new_checkpoint, megatron_amp_o2=True): NEW_LR_SCHEDULER = 'lr_schedulers' if OPTIMIZER_KEY in lm_checkpoint and OPTIMIZER_KEY in lm_checkpoint[OPTIMIZER_KEY]: opt_state = lm_checkpoint[OPTIMIZER_KEY][OPTIMIZER_KEY] - if megatron_amp_o2: + if megatron_amp_O2: opt_dict = dict() if LR_SCHEDULER in lm_checkpoint: sched = lm_checkpoint[LR_SCHEDULER] diff --git a/examples/nlp/language_modeling/megatron_retro_cal_shape.py b/examples/nlp/language_modeling/megatron_retro_cal_shape.py index 7dc1f1dcf207..866de2368863 100644 --- a/examples/nlp/language_modeling/megatron_retro_cal_shape.py +++ b/examples/nlp/language_modeling/megatron_retro_cal_shape.py @@ -29,10 +29,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -45,7 +45,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_fine_tune.py b/examples/nlp/language_modeling/megatron_retro_fine_tune.py index 60f0a58b97fd..b02dadb3a755 100644 --- a/examples/nlp/language_modeling/megatron_retro_fine_tune.py +++ b/examples/nlp/language_modeling/megatron_retro_fine_tune.py @@ -73,10 +73,10 @@ def main(cfg) -> None: # import torch.multiprocessing as mp # mp.set_start_method("spawn", force=True) ##################################################### - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, timeout=datetime.timedelta(seconds=18000), @@ -90,7 +90,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py index e785b3ef88c9..df6bfb8c9a16 100644 --- a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py +++ b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py @@ -35,10 +35,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -51,7 +51,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_pretraining.py b/examples/nlp/language_modeling/megatron_retro_pretraining.py index 4ab0aa9e63e1..a21c5c3c7ccb 100644 --- a/examples/nlp/language_modeling/megatron_retro_pretraining.py +++ b/examples/nlp/language_modeling/megatron_retro_pretraining.py @@ -38,10 +38,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -54,7 +54,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py index 59cbc69f820e..d367515b824b 100644 --- a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py +++ b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py @@ -37,7 +37,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce @@ -52,7 +52,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_pretraining.py b/examples/nlp/language_modeling/megatron_t5_pretraining.py index f7d4afe2d75f..4023482ba5c4 100644 --- a/examples/nlp/language_modeling/megatron_t5_pretraining.py +++ b/examples/nlp/language_modeling/megatron_t5_pretraining.py @@ -36,7 +36,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] strategy = NLPDDPStrategy( @@ -52,7 +52,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py index fcda65af8661..cb8097645953 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py @@ -71,7 +71,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -86,7 +86,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py index 010db82cbcbe..5ffe26c38563 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py @@ -135,7 +135,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -150,7 +150,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py index 45f6d2006533..187a0cc6c3f5 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py @@ -60,7 +60,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -77,7 +77,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py index d1ea73d0d79a..14eda3af3492 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py @@ -60,7 +60,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -77,7 +77,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py index 8cbb94876c61..4ac880ef8ba2 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py @@ -60,7 +60,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -77,7 +77,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py index cd1e48df657d..0b0b081bd6c2 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py @@ -60,7 +60,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -77,7 +77,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/machine_translation/megatron_nmt_training.py b/examples/nlp/machine_translation/megatron_nmt_training.py index 7fd211447196..e0ea73b62b6e 100644 --- a/examples/nlp/machine_translation/megatron_nmt_training.py +++ b/examples/nlp/machine_translation/megatron_nmt_training.py @@ -42,7 +42,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -57,7 +57,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py new file mode 100644 index 000000000000..599ec8e089c7 --- /dev/null +++ b/examples/vision/convert_ckpt_to_nemo.py @@ -0,0 +1,152 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert PTL checkpoints into nemo checkpoint. + Example to run this conversion script: + python -m torch.distributed.launch --nproc_per_node= * \ + convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size +""" + +import os +import torch +from apex.transformer import parallel_state +from argparse import ArgumentParser +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--checkpoint_folder", + type=str, + default=None, + required=True, + help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", + ) + parser.add_argument( + "--checkpoint_name", + type=str, + default=None, + required=True, + help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", + ) + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=True, default=None) + parser.add_argument("--tensor_model_parallel_size", type=int, required=True, default=None) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=True, default=None) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument( + "--model_type", type=str, required=True, default="vit_classification" + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size_=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + # inject model parallel rank + checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) + + logging.info( + f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' + ) + + if args.model_type == 'vit_classification': + model = MegatronVitClassificationModel.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) + else: + raise ValueError(f"Unrecognized model_type {args.model_type}.") + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml new file mode 100755 index 000000000000..ef6486b8b584 --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml @@ -0,0 +1,163 @@ +# shared by ViT classification pretraining and fine-tuning + +name: megatron_vit_classify +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 95000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_vit_classification + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classification--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 256 # limited by GPU memory + global_batch_size: 4096 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # vision configs + vision_pretraining_type: "classify" + num_classes: 1000 + patch_dim: 16 + img_h: 224 + img_w: 224 + classes_fraction: 1.0 + data_per_class_fraction: 1.0 + num_channels: 3 + drop_path_rate: 0.0 + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.1 # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + ## Activation Checkpointing + # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. + # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + # 'full' will checkpoint the entire transformer layer. + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. + # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + data: + # Path to image dataset must be specified by the user. + # Supports List + # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", + data_path: ??? + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + data_sharding: False + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.999 + sched: + name: CosineAnnealing + warmup_steps: 10000 + constant_steps: 0 + min_lr: 1e-5 \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml new file mode 100755 index 000000000000..4b9a71bedc7d --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml @@ -0,0 +1,15 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} + micro_batch_size: 512 # we only supports DP=1 eval at the moment, GBS=MBS + + data: + num_workers: 2 + imagenet_val: ??? # path to imagenet val folder \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml new file mode 100644 index 000000000000..cbcca277b3f5 --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml @@ -0,0 +1,16 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + + +infer: + out_path: 'megatron-vit' + max_batch_size: 64 + max_dim: 384 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml new file mode 100755 index 000000000000..553abb5bc23b --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml @@ -0,0 +1,12 @@ +data_path: ??? # Path to a image folder for inference + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py new file mode 100644 index 000000000000..037113ca6571 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py @@ -0,0 +1,137 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm + +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_evaluate") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + find_unused_parameters=False, + ) + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + model_cfg = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # These configs are required to be off during inference. + with open_dict(model_cfg): + model_cfg.precision = trainer.precision + if trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + model.eval() + + val_transform = ClassificationTransform( + model.cfg, + (model.cfg.img_h, model.cfg.img_w), + train=False + ) + val_data = ImageFolder( + root=cfg.model.data.imagenet_val, + transform=val_transform, + ) + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + test_loader = DataLoader( + val_data, + batch_size=cfg.model.micro_batch_size, + num_workers=cfg.model.data.num_workers, + ) + + # get autocast_dtype + if trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(trainer.precision) == 32: + autocast_dtype = torch.float + elif int(trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + total = correct = 0. + for tokens, labels in tqdm(test_loader): + logits = model(tokens.cuda()) + class_indices = torch.argmax(logits, -1) + correct += (class_indices == labels.cuda()).float().sum() + total += len(labels) + + if is_global_rank_zero: + print(f"ViT Imagenet 1K Evaluation Accuracy: {correct / total:.4f}") + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_export.py b/examples/vision/vision_transformer/megatron_vit_classification_export.py new file mode 100644 index 000000000000..76ef0d606b69 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_export.py @@ -0,0 +1,122 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames +from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero +from nemo.utils.trt_utils import build_engine + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_export") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + output_dir = cfg.infer.out_path + max_batch_size = cfg.infer.max_batch_size + max_dim = cfg.infer.max_dim + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + find_unused_parameters=False, + ) + print(type(cfg.trainer.precision)) + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + trt_precision = cfg.trainer.precision + cfg.trainer.precision = 32 + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + model_cfg = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # These configs are required to be off during inference. + with open_dict(model_cfg): + model_cfg.precision = int(trainer.precision) if trainer.precision.isdigit() else trainer.precision + print(type(model_cfg.precision)) + if trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + model.eval() + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + os.makedirs(f"{output_dir}/onnx/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + + model.export(f"{output_dir}/onnx/vit.onnx", dynamic_axes={'tokens': {0: 'B'}}) + + input_profile = {} + bs1_example = model.input_example(max_batch=1, max_dim=max_dim)[0] + bsmax_example = model.input_example(max_batch=max_batch_size, max_dim=max_dim)[0] + input_profile['tokens'] = [tuple(bs1_example.shape), tuple(bsmax_example.shape), tuple(bsmax_example.shape)] + build_engine( + f"{output_dir}/onnx/vit.onnx", + f"{output_dir}/plan/vit.plan", + fp16=(trt_precision == 16), + input_profile=input_profile, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py new file mode 100644 index 000000000000..fd466128105c --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py @@ -0,0 +1,95 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, + NLPSaveRestoreConnector, +) +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.finetune = True + cfg.model.precision = cfg.trainer.precision + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_infer.py b/examples/vision/vision_transformer/megatron_vit_classification_infer.py new file mode 100644 index 000000000000..8f65c1fedf53 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_infer.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames +from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() + + +class ImageFolderDataset(Dataset): + def __init__(self, folder_path, transform=None): + self.folder_path = folder_path + self.transform = transform + # Use glob to find all image files in folder_path + image_paths = [] + for ext in _IMG_EXTENSIONS + [x.upper() for x in _IMG_EXTENSIONS]: + search_pattern = os.path.join(folder_path, f"*.{ext}") + image_paths += glob.glob(search_pattern) + self.image_paths = image_paths + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_path = self.image_paths[idx] + image = Image.open(image_path).convert('RGB') + if self.transform is not None: + image = self.transform(image) + return image + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + find_unused_parameters=False, + ) + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + model_cfg = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # These configs are required to be off during inference. + with open_dict(model_cfg): + model_cfg.precision = trainer.precision + if trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + model.eval() + + test_transform = ClassificationTransform( + cfg.model, + (model_cfg.img_h, model_cfg.img_w), + train=False + ) + test_data = ImageFolderDataset( + folder_path=cfg.data_path, + transform=test_transform, + ) + test_loader = DataLoader(test_data, batch_size=8) + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + # get autocast_dtype + if trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(trainer.precision) == 32: + autocast_dtype = torch.float + elif int(trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + class_names = [] + for tokens in test_loader: + logits = model(tokens.cuda()) + class_indices = torch.argmax(logits, -1) + class_names += [imagenet_classnames[x] for x in class_indices] + + if is_global_rank_zero: + filenames = [os.path.basename(f) for f in test_data.image_paths] + print(f"Predicted classes: ", list(zip(filenames, class_names))) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py new file mode 100644 index 000000000000..5be257e917f8 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronVitClassificationModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/__init__.py b/nemo/collections/multimodal/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/__init__.py b/nemo/collections/multimodal/data/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/__init__.py b/nemo/collections/multimodal/data/clip/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/__init__.py b/nemo/collections/multimodal/data/clip/augmentations/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/augmentations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py new file mode 100644 index 000000000000..12f16793b4f1 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py @@ -0,0 +1,103 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is adapted from public repo +https://github.com/mlfoundations/open_clip/blob/28c994406e39a5babc749c76871d92f33e9c558d/src/open_clip/transform.py +by @yaoyu-33 +""" +import torch +import torch.nn as nn +import torchvision.transforms.functional as F +from torchvision.transforms import Normalize, Compose, RandomResizedCrop, InterpolationMode, ToTensor, Resize, \ + CenterCrop +from typing import Optional, Sequence, Tuple + +OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) +OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) + + +class ResizeMaxSize(nn.Module): + + def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, fn='max', fill=0): + super().__init__() + if not isinstance(max_size, int): + raise TypeError(f"Size should be int. Got {type(max_size)}") + self.max_size = max_size + self.interpolation = interpolation + self.fn = min if fn == 'min' else min + self.fill = fill + + def forward(self, img): + if isinstance(img, torch.Tensor): + height, width = img.shape[:2] + else: + width, height = img.size + scale = self.max_size / float(max(height, width)) + if scale != 1.0: + new_size = tuple(round(dim * scale) for dim in (height, width)) + img = F.resize(img, new_size, self.interpolation) + pad_h = self.max_size - new_size[0] + pad_w = self.max_size - new_size[1] + img = F.pad(img, padding=[pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2], fill=self.fill) + return img + + +def _convert_to_rgb(image): + return image.convert('RGB') + + +def image_transform( + image_size: int, + is_train: bool, + mean: Optional[Tuple[float, ...]] = None, + std: Optional[Tuple[float, ...]] = None, + resize_longest_max: bool = False, + fill_color: int = 0, +): + mean = mean or OPENAI_DATASET_MEAN + if not isinstance(mean, (list, tuple)): + mean = (mean,) * 3 + + std = std or OPENAI_DATASET_STD + if not isinstance(std, (list, tuple)): + std = (std,) * 3 + + if isinstance(image_size, (list, tuple)) and image_size[0] == image_size[1]: + # for square size, pass size as int so that Resize() uses aspect preserving shortest edge + image_size = image_size[0] + + normalize = Normalize(mean=mean, std=std) + if is_train: + return Compose([ + RandomResizedCrop(image_size, scale=(0.9, 1.0), interpolation=InterpolationMode.BICUBIC), + _convert_to_rgb, + ToTensor(), + normalize, + ]) + else: + if resize_longest_max: + transforms = [ + ResizeMaxSize(image_size, fill=fill_color) + ] + else: + transforms = [ + Resize(image_size, interpolation=InterpolationMode.BICUBIC), + CenterCrop(image_size), + ] + transforms.extend([ + _convert_to_rgb, + ToTensor(), + normalize, + ]) + return Compose(transforms) diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py new file mode 100644 index 000000000000..927639a86ce1 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/clip_dataset.py @@ -0,0 +1,193 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from functools import partial +from torch.utils.data import Dataset +from typing import Any, List, Union, Dict, Optional + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import openai_imagenet_template, imagenet_classnames +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import MegatronPretrainingBatchSampler +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + +try: + from apex.transformer import parallel_state + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int = 77) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + tokenizer: + Tokenizer loaded in NeMo NeMo + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + + bos_id = tokenizer.bos_id + eos_id = tokenizer.eos_id + all_tokens = [[bos_id] + tokenizer.text_to_ids(text) + [eos_id] for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + tokens = tokens[:context_length] # Truncate + tokens[-1] = eos_id + result[i, :len(tokens)] = torch.tensor(tokens) + + if texts_is_str: + result = result[0] + return result + + +def get_preprocess_fns(model_cfg, tokenizer=None, is_train=True): + # Define transforms + img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) + img_mean = model_cfg.vision.get("img_mean") + img_std = model_cfg.vision.get("img_std") + img_transform = image_transform( + img_size, + is_train=is_train, + mean=img_mean, + std=img_std, + ) + text_transform = lambda x: x + if tokenizer is not None: + text_transform = partial( + tokenize, + tokenizer=tokenizer, + context_length=model_cfg.text.get("max_position_embeddings"), + ) + return img_transform, text_transform + + +def build_train_valid_datasets( + model_cfg, + consumed_samples, + tokenizer=None, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + out_dict['captions'] = input[1] + yield out_dict + + def transform_fn(sample, img_transform, text_transform): + image, text = sample["jpg"], sample["txt"] + return img_transform(image), text_transform(text) + + train_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=True) + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=partial(transform_fn, img_transform=train_img_transform, text_transform=text_transform), + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): + val_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=0, + map_fn=partial(transform_fn, img_transform=val_img_transform, text_transform=text_transform), + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data + + +# For zero-shot imagenet validation +def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): + val_image_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) + data_cfg = model_cfg.data + + imagenet_val = {} + + imagenet_path = data_cfg.get("imagenet_val") + if imagenet_path is None: + return None + + image_dataset = ImageFolder( + root=imagenet_path, + transform=val_image_transform, + ) + # image_dataset = RandomSeedDataset(val_data) + image_batch_sampler = MegatronPretrainingBatchSampler( + total_samples=len(image_dataset), + consumed_samples=0, + micro_batch_size=model_cfg.micro_batch_size, + global_batch_size=model_cfg.global_batch_size, + # TODO (yuya): if grad acc is not 1, this might not work as expected. + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=False, + ) + imagenet_val["images"] = torch.utils.data.DataLoader( + image_dataset, + batch_sampler=image_batch_sampler, + num_workers=min(data_cfg.num_workers, 2), + pin_memory=True, + persistent_workers=True, + ) + + text_dataset = ImagenetClassnameDataset(imagenet_classnames, openai_imagenet_template, text_transform) + imagenet_val["texts"] = torch.utils.data.DataLoader( + text_dataset, + batch_size=text_dataset.num_templates, + num_workers=0, + pin_memory=True, + persistent_workers=False, + drop_last=False, + ) + return imagenet_val + + +class ImagenetClassnameDataset(Dataset): + def __init__(self, classnames, templates, text_transform): + self.num_templates = len(templates) + self.samples = [] + for classname in classnames: + texts = [template(classname) for template in templates] + self.samples.extend(text_transform(texts)) + + def __getitem__(self, index): + return self.samples[index] + + def __len__(self): + return len(self.samples) diff --git a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py new file mode 100644 index 000000000000..86e3bbec42a0 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py @@ -0,0 +1,263 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +imagenet_classnames = ["tench", "goldfish", "great white shark", "tiger shark", "hammerhead shark", "electric ray", + "stingray", "rooster", "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", + "indigo bunting", "American robin", "bulbul", "jay", "magpie", "chickadee", "American dipper", + "kite (bird of prey)", "bald eagle", "vulture", "great grey owl", "fire salamander", + "smooth newt", "newt", "spotted salamander", "axolotl", "American bullfrog", "tree frog", + "tailed frog", "loggerhead sea turtle", "leatherback sea turtle", "mud turtle", "terrapin", + "box turtle", "banded gecko", "green iguana", "Carolina anole", + "desert grassland whiptail lizard", "agama", "frilled-necked lizard", "alligator lizard", + "Gila monster", "European green lizard", "chameleon", "Komodo dragon", "Nile crocodile", + "American alligator", "triceratops", "worm snake", "ring-necked snake", + "eastern hog-nosed snake", "smooth green snake", "kingsnake", "garter snake", "water snake", + "vine snake", "night snake", "boa constrictor", "African rock python", "Indian cobra", + "green mamba", "sea snake", "Saharan horned viper", "eastern diamondback rattlesnake", + "sidewinder rattlesnake", "trilobite", "harvestman", "scorpion", "yellow garden spider", + "barn spider", "European garden spider", "southern black widow", "tarantula", "wolf spider", + "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse", "prairie grouse", "peafowl", + "quail", "partridge", "african grey parrot", "macaw", "sulphur-crested cockatoo", "lorikeet", + "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "duck", + "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", + "koala", "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", + "snail", "slug", "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", + "fiddler crab", "red king crab", "American lobster", "spiny lobster", "crayfish", "hermit crab", + "isopod", "white stork", "black stork", "spoonbill", "flamingo", "little blue heron", + "great egret", "bittern bird", "crane bird", "limpkin", "common gallinule", "American coot", + "bustard", "ruddy turnstone", "dunlin", "common redshank", "dowitcher", "oystercatcher", + "pelican", "king penguin", "albatross", "grey whale", "killer whale", "dugong", "sea lion", + "Chihuahua", "Japanese Chin", "Maltese", "Pekingese", "Shih Tzu", "King Charles Spaniel", + "Papillon", "toy terrier", "Rhodesian Ridgeback", "Afghan Hound", "Basset Hound", "Beagle", + "Bloodhound", "Bluetick Coonhound", "Black and Tan Coonhound", "Treeing Walker Coonhound", + "English foxhound", "Redbone Coonhound", "borzoi", "Irish Wolfhound", "Italian Greyhound", + "Whippet", "Ibizan Hound", "Norwegian Elkhound", "Otterhound", "Saluki", "Scottish Deerhound", + "Weimaraner", "Staffordshire Bull Terrier", "American Staffordshire Terrier", + "Bedlington Terrier", "Border Terrier", "Kerry Blue Terrier", "Irish Terrier", + "Norfolk Terrier", "Norwich Terrier", "Yorkshire Terrier", "Wire Fox Terrier", + "Lakeland Terrier", "Sealyham Terrier", "Airedale Terrier", "Cairn Terrier", + "Australian Terrier", "Dandie Dinmont Terrier", "Boston Terrier", "Miniature Schnauzer", + "Giant Schnauzer", "Standard Schnauzer", "Scottish Terrier", "Tibetan Terrier", + "Australian Silky Terrier", "Soft-coated Wheaten Terrier", "West Highland White Terrier", + "Lhasa Apso", "Flat-Coated Retriever", "Curly-coated Retriever", "Golden Retriever", + "Labrador Retriever", "Chesapeake Bay Retriever", "German Shorthaired Pointer", "Vizsla", + "English Setter", "Irish Setter", "Gordon Setter", "Brittany dog", "Clumber Spaniel", + "English Springer Spaniel", "Welsh Springer Spaniel", "Cocker Spaniel", "Sussex Spaniel", + "Irish Water Spaniel", "Kuvasz", "Schipperke", "Groenendael dog", "Malinois", "Briard", + "Australian Kelpie", "Komondor", "Old English Sheepdog", "Shetland Sheepdog", "collie", + "Border Collie", "Bouvier des Flandres dog", "Rottweiler", "German Shepherd Dog", "Dobermann", + "Miniature Pinscher", "Greater Swiss Mountain Dog", "Bernese Mountain Dog", + "Appenzeller Sennenhund", "Entlebucher Sennenhund", "Boxer", "Bullmastiff", "Tibetan Mastiff", + "French Bulldog", "Great Dane", "St. Bernard", "husky", "Alaskan Malamute", "Siberian Husky", + "Dalmatian", "Affenpinscher", "Basenji", "pug", "Leonberger", "Newfoundland dog", + "Great Pyrenees dog", "Samoyed", "Pomeranian", "Chow Chow", "Keeshond", "brussels griffon", + "Pembroke Welsh Corgi", "Cardigan Welsh Corgi", "Toy Poodle", "Miniature Poodle", + "Standard Poodle", "Mexican hairless dog (xoloitzcuintli)", "grey wolf", "Alaskan tundra wolf", + "red wolf or maned wolf", "coyote", "dingo", "dhole", "African wild dog", "hyena", "red fox", + "kit fox", "Arctic fox", "grey fox", "tabby cat", "tiger cat", "Persian cat", "Siamese cat", + "Egyptian Mau", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", + "cheetah", "brown bear", "American black bear", "polar bear", "sloth bear", "mongoose", + "meerkat", "tiger beetle", "ladybug", "ground beetle", "longhorn beetle", "leaf beetle", + "dung beetle", "rhinoceros beetle", "weevil", "fly", "bee", "ant", "grasshopper", + "cricket insect", "stick insect", "cockroach", "praying mantis", "cicada", "leafhopper", + "lacewing", "dragonfly", "damselfly", "red admiral butterfly", "ringlet butterfly", + "monarch butterfly", "small white butterfly", "sulphur butterfly", "gossamer-winged butterfly", + "starfish", "sea urchin", "sea cucumber", "cottontail rabbit", "hare", "Angora rabbit", + "hamster", "porcupine", "fox squirrel", "marmot", "beaver", "guinea pig", "common sorrel horse", + "zebra", "pig", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo", "bison", + "ram (adult male sheep)", "bighorn sheep", "Alpine ibex", "hartebeest", "impala (antelope)", + "gazelle", "arabian camel", "llama", "weasel", "mink", "European polecat", + "black-footed ferret", "otter", "skunk", "badger", "armadillo", "three-toed sloth", "orangutan", + "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas monkey", "baboon", "macaque", + "langur", "black-and-white colobus", "proboscis monkey", "marmoset", "white-headed capuchin", + "howler monkey", "titi monkey", "Geoffroy's spider monkey", "common squirrel monkey", + "ring-tailed lemur", "indri", "Asian elephant", "African bush elephant", "red panda", + "giant panda", "snoek fish", "eel", "silver salmon", "rock beauty fish", "clownfish", + "sturgeon", "gar fish", "lionfish", "pufferfish", "abacus", "abaya", "academic gown", + "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance", + "amphibious vehicle", "analog clock", "apiary", "apron", "trash can", "assault rifle", + "backpack", "bakery", "balance beam", "balloon", "ballpoint pen", "Band-Aid", "banjo", + "baluster / handrail", "barbell", "barber chair", "barbershop", "barn", "barometer", "barrel", + "wheelbarrow", "baseball", "basketball", "bassinet", "bassoon", "swimming cap", "bath towel", + "bathtub", "station wagon", "lighthouse", "beaker", "military hat (bearskin or shako)", + "beer bottle", "beer glass", "bell tower", "baby bib", "tandem bicycle", "bikini", + "ring binder", "binoculars", "birdhouse", "boathouse", "bobsleigh", "bolo tie", "poke bonnet", + "bookcase", "bookstore", "bottle cap", "hunting bow", "bow tie", "brass memorial plaque", "bra", + "breakwater", "breastplate", "broom", "bucket", "buckle", "bulletproof vest", + "high-speed train", "butcher shop", "taxicab", "cauldron", "candle", "cannon", "canoe", + "can opener", "cardigan", "car mirror", "carousel", "tool kit", "cardboard box / carton", + "car wheel", "automated teller machine", "cassette", "cassette player", "castle", "catamaran", + "CD player", "cello", "mobile phone", "chain", "chain-link fence", "chain mail", "chainsaw", + "storage chest", "chiffonier", "bell or wind chime", "china cabinet", "Christmas stocking", + "church", "movie theater", "cleaver", "cliff dwelling", "cloak", "clogs", "cocktail shaker", + "coffee mug", "coffeemaker", "spiral or coil", "combination lock", "computer keyboard", + "candy store", "container ship", "convertible", "corkscrew", "cornet", "cowboy boot", + "cowboy hat", "cradle", "construction crane", "crash helmet", "crate", "infant bed", + "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", + "rotary dial telephone", "diaper", "digital clock", "digital watch", "dining table", + "dishcloth", "dishwasher", "disc brake", "dock", "dog sled", "dome", "doormat", "drilling rig", + "drum", "drumstick", "dumbbell", "Dutch oven", "electric fan", "electric guitar", + "electric locomotive", "entertainment center", "envelope", "espresso machine", "face powder", + "feather boa", "filing cabinet", "fireboat", "fire truck", "fire screen", "flagpole", "flute", + "folding chair", "football helmet", "forklift", "fountain", "fountain pen", "four-poster bed", + "freight car", "French horn", "frying pan", "fur coat", "garbage truck", + "gas mask or respirator", "gas pump", "goblet", "go-kart", "golf ball", "golf cart", "gondola", + "gong", "gown", "grand piano", "greenhouse", "radiator grille", "grocery store", "guillotine", + "hair clip", "hair spray", "half-track", "hammer", "hamper", "hair dryer", "hand-held computer", + "handkerchief", "hard disk drive", "harmonica", "harp", "combine harvester", "hatchet", + "holster", "home theater", "honeycomb", "hook", "hoop skirt", "gymnastic horizontal bar", + "horse-drawn vehicle", "hourglass", "iPod", "clothes iron", "carved pumpkin", "jeans", "jeep", + "T-shirt", "jigsaw puzzle", "rickshaw", "joystick", "kimono", "knee pad", "knot", "lab coat", + "ladle", "lampshade", "laptop computer", "lawn mower", "lens cap", "letter opener", "library", + "lifeboat", "lighter", "limousine", "ocean liner", "lipstick", "slip-on shoe", "lotion", + "music speaker", "loupe magnifying glass", "sawmill", "magnetic compass", "messenger bag", + "mailbox", "tights", "one-piece bathing suit", "manhole cover", "maraca", "marimba", "mask", + "matchstick", "maypole", "maze", "measuring cup", "medicine cabinet", "megalith", "microphone", + "microwave oven", "military uniform", "milk can", "minibus", "miniskirt", "minivan", "missile", + "mitten", "mixing bowl", "mobile home", "ford model t", "modem", "monastery", "monitor", + "moped", "mortar and pestle", "graduation cap", "mosque", "mosquito net", "vespa", + "mountain bike", "tent", "computer mouse", "mousetrap", "moving van", "muzzle", "metal nail", + "neck brace", "necklace", "baby pacifier", "notebook computer", "obelisk", "oboe", "ocarina", + "odometer", "oil filter", "pipe organ", "oscilloscope", "overskirt", "bullock cart", + "oxygen mask", "product packet / packaging", "paddle", "paddle wheel", "padlock", "paintbrush", + "pajamas", "palace", "pan flute", "paper towel", "parachute", "parallel bars", "park bench", + "parking meter", "railroad car", "patio", "payphone", "pedestal", "pencil case", + "pencil sharpener", "perfume", "Petri dish", "photocopier", "plectrum", "Pickelhaube", + "picket fence", "pickup truck", "pier", "piggy bank", "pill bottle", "pillow", "ping-pong ball", + "pinwheel", "pirate ship", "drink pitcher", "block plane", "planetarium", "plastic bag", + "plate rack", "farm plow", "plunger", "Polaroid camera", "pole", "police van", "poncho", + "pool table", "soda bottle", "plant pot", "potter's wheel", "power drill", "prayer rug", + "printer", "prison", "missile", "projector", "hockey puck", "punching bag", "purse", "quill", + "quilt", "race car", "racket", "radiator", "radio", "radio telescope", "rain barrel", + "recreational vehicle", "fishing casting reel", "reflex camera", "refrigerator", + "remote control", "restaurant", "revolver", "rifle", "rocking chair", "rotisserie", "eraser", + "rugby ball", "ruler measuring stick", "sneaker", "safe", "safety pin", "salt shaker", "sandal", + "sarong", "saxophone", "scabbard", "weighing scale", "school bus", "schooner", "scoreboard", + "CRT monitor", "screw", "screwdriver", "seat belt", "sewing machine", "shield", "shoe store", + "shoji screen / room divider", "shopping basket", "shopping cart", "shovel", "shower cap", + "shower curtain", "ski", "balaclava ski mask", "sleeping bag", "slide rule", "sliding door", + "slot machine", "snorkel", "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", + "solar thermal collector", "sombrero", "soup bowl", "keyboard space bar", "space heater", + "space shuttle", "spatula", "motorboat", "spider web", "spindle", "sports car", "spotlight", + "stage", "steam locomotive", "through arch bridge", "steel drum", "stethoscope", "scarf", + "stone wall", "stopwatch", "stove", "strainer", "tram", "stretcher", "couch", "stupa", + "submarine", "suit", "sundial", "sunglasses", "sunglasses", "sunscreen", "suspension bridge", + "mop", "sweatshirt", "swim trunks / shorts", "swing", "electrical switch", "syringe", + "table lamp", "tank", "tape player", "teapot", "teddy bear", "television", "tennis ball", + "thatched roof", "front curtain", "thimble", "threshing machine", "throne", "tile roof", + "toaster", "tobacco shop", "toilet seat", "torch", "totem pole", "tow truck", "toy store", + "tractor", "semi-trailer truck", "tray", "trench coat", "tricycle", "trimaran", "tripod", + "triumphal arch", "trolleybus", "trombone", "hot tub", "turnstile", "typewriter keyboard", + "umbrella", "unicycle", "upright piano", "vacuum cleaner", "vase", "vaulted or arched ceiling", + "velvet fabric", "vending machine", "vestment", "viaduct", "violin", "volleyball", + "waffle iron", "wall clock", "wallet", "wardrobe", "military aircraft", "sink", + "washing machine", "water bottle", "water jug", "water tower", "whiskey jug", "whistle", + "hair wig", "window screen", "window shade", "Windsor tie", "wine bottle", "airplane wing", + "wok", "wooden spoon", "wool", "split-rail fence", "shipwreck", "sailboat", "yurt", "website", + "comic book", "crossword", "traffic or street sign", "traffic light", "dust jacket", "menu", + "plate", "guacamole", "consomme", "hot pot", "trifle", "ice cream", "popsicle", "baguette", + "bagel", "pretzel", "cheeseburger", "hot dog", "mashed potatoes", "cabbage", "broccoli", + "cauliflower", "zucchini", "spaghetti squash", "acorn squash", "butternut squash", "cucumber", + "artichoke", "bell pepper", "cardoon", "mushroom", "Granny Smith apple", "strawberry", "orange", + "lemon", "fig", "pineapple", "banana", "jackfruit", "cherimoya (custard apple)", "pomegranate", + "hay", "carbonara", "chocolate syrup", "dough", "meatloaf", "pizza", "pot pie", "burrito", + "red wine", "espresso", "tea cup", "eggnog", "mountain", "bubble", "cliff", "coral reef", + "geyser", "lakeshore", "promontory", "sandbar", "beach", "valley", "volcano", "baseball player", + "bridegroom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn", + "rose hip", "horse chestnut seed", "coral fungus", "agaric", "gyromitra", "stinkhorn mushroom", + "earth star fungus", "hen of the woods mushroom", "bolete", "corn cob", "toilet paper"] + +openai_imagenet_template = [ + lambda c: f'a bad photo of a {c}.', + lambda c: f'a photo of many {c}.', + lambda c: f'a sculpture of a {c}.', + lambda c: f'a photo of the hard to see {c}.', + lambda c: f'a low resolution photo of the {c}.', + lambda c: f'a rendering of a {c}.', + lambda c: f'graffiti of a {c}.', + lambda c: f'a bad photo of the {c}.', + lambda c: f'a cropped photo of the {c}.', + lambda c: f'a tattoo of a {c}.', + lambda c: f'the embroidered {c}.', + lambda c: f'a photo of a hard to see {c}.', + lambda c: f'a bright photo of a {c}.', + lambda c: f'a photo of a clean {c}.', + lambda c: f'a photo of a dirty {c}.', + lambda c: f'a dark photo of the {c}.', + lambda c: f'a drawing of a {c}.', + lambda c: f'a photo of my {c}.', + lambda c: f'the plastic {c}.', + lambda c: f'a photo of the cool {c}.', + lambda c: f'a close-up photo of a {c}.', + lambda c: f'a black and white photo of the {c}.', + lambda c: f'a painting of the {c}.', + lambda c: f'a painting of a {c}.', + lambda c: f'a pixelated photo of the {c}.', + lambda c: f'a sculpture of the {c}.', + lambda c: f'a bright photo of the {c}.', + lambda c: f'a cropped photo of a {c}.', + lambda c: f'a plastic {c}.', + lambda c: f'a photo of the dirty {c}.', + lambda c: f'a jpeg corrupted photo of a {c}.', + lambda c: f'a blurry photo of the {c}.', + lambda c: f'a photo of the {c}.', + lambda c: f'a good photo of the {c}.', + lambda c: f'a rendering of the {c}.', + lambda c: f'a {c} in a video game.', + lambda c: f'a photo of one {c}.', + lambda c: f'a doodle of a {c}.', + lambda c: f'a close-up photo of the {c}.', + lambda c: f'a photo of a {c}.', + lambda c: f'the origami {c}.', + lambda c: f'the {c} in a video game.', + lambda c: f'a sketch of a {c}.', + lambda c: f'a doodle of the {c}.', + lambda c: f'a origami {c}.', + lambda c: f'a low resolution photo of a {c}.', + lambda c: f'the toy {c}.', + lambda c: f'a rendition of the {c}.', + lambda c: f'a photo of the clean {c}.', + lambda c: f'a photo of a large {c}.', + lambda c: f'a rendition of a {c}.', + lambda c: f'a photo of a nice {c}.', + lambda c: f'a photo of a weird {c}.', + lambda c: f'a blurry photo of a {c}.', + lambda c: f'a cartoon {c}.', + lambda c: f'art of a {c}.', + lambda c: f'a sketch of the {c}.', + lambda c: f'a embroidered {c}.', + lambda c: f'a pixelated photo of a {c}.', + lambda c: f'itap of the {c}.', + lambda c: f'a jpeg corrupted photo of the {c}.', + lambda c: f'a good photo of a {c}.', + lambda c: f'a plushie {c}.', + lambda c: f'a photo of the nice {c}.', + lambda c: f'a photo of the small {c}.', + lambda c: f'a photo of the weird {c}.', + lambda c: f'the cartoon {c}.', + lambda c: f'art of the {c}.', + lambda c: f'a drawing of the {c}.', + lambda c: f'a photo of the large {c}.', + lambda c: f'a black and white photo of a {c}.', + lambda c: f'the plushie {c}.', + lambda c: f'a dark photo of a {c}.', + lambda c: f'itap of a {c}.', + lambda c: f'graffiti of the {c}.', + lambda c: f'a toy {c}.', + lambda c: f'itap of my {c}.', + lambda c: f'a photo of a cool {c}.', + lambda c: f'a photo of a small {c}.', + lambda c: f'a tattoo of the {c}.', +] diff --git a/nemo/collections/multimodal/data/common/__init__.py b/nemo/collections/multimodal/data/common/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/common/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/common/data_samplers.py b/nemo/collections/multimodal/data/common/data_samplers.py new file mode 100644 index 000000000000..5693ee617c1b --- /dev/null +++ b/nemo/collections/multimodal/data/common/data_samplers.py @@ -0,0 +1,123 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from multiprocessing import Value +from webdataset.pytorch import IterableDataset + + +class SharedEpoch: + def __init__(self, epoch: int = 0): + self.shared_epoch = Value('i', epoch) + + def set_value(self, epoch): + self.shared_epoch.value = epoch + + def get_value(self): + return self.shared_epoch.value + + +class WDSUrlsRandomSampler(IterableDataset): + + def __init__( + self, + urls, + total_urls: int, + chunk_size: int, + consumed_samples: int, + data_parallel_rank: int, + data_parallel_size: int, + drop_last: bool, + data_sharding: bool, + ): + r"""Sampler for WebDataset Urls with data parallelism. + Args: + urls : The urls of the tar files from which to sample. + total_urls (int): Total number of urls in the dataset. + chunk_size (int): Number of objects per tar file. + consumed_samples (int): Number of samples consumed so far by the training process. + **Note samples here is not urls.** + data_parallel_rank (int): Rank of the current data parallel process. + data_parallel_size (int): Number of data parallel processes. + drop_last (bool): If True, drop the remaining urls if the number is smaller than `data_parallel_size`. + If False, pad the urls until its size is divisible by `data_parallel_size`. + data_sharding (bool): If True, use data sharding before data shuffling, i.e. only shuffle within the data parallel group. + """ + super().__init__() + self.urls = urls + self.total_urls = total_urls + self.chunk_size = chunk_size + self.consumed_samples = consumed_samples + assert consumed_samples % data_parallel_size == 0 + self.consumed_urls = consumed_samples // data_parallel_size // chunk_size * data_parallel_size + + self.data_parallel_rank = data_parallel_rank + self.data_parallel_size = data_parallel_size + self.drop_last = drop_last + self.data_sharding = data_sharding + self.epoch = SharedEpoch() + + self.remaining_urls = self.total_urls % self.data_parallel_size + + def __len__(self): + if self.drop_last: + return self.total_urls // self.data_parallel_size + else: + return (self.total_urls + self.data_parallel_size - 1) // self.data_parallel_size + + def __iter__(self): + worker_info = torch.utils.data.get_worker_info() + if worker_info is not None: + worker_id, num_workers = worker_info.id, worker_info.num_workers + + self.consumed_urls = self.consumed_samples // self.data_parallel_size \ + // self.chunk_size * self.data_parallel_size + + if self.drop_last or self.remaining_urls == 0: + active_total_urls = self.total_urls - self.remaining_urls + else: + active_total_urls = self.total_urls + self.data_parallel_size - self.remaining_urls + + self.epoch.set_value(self.consumed_urls // active_total_urls) + current_epoch_urls = self.consumed_urls % active_total_urls + + # data sharding and random sampling + if self.data_sharding: + bucket_size = active_total_urls // self.data_parallel_size + bucket_offset = current_epoch_urls // self.data_parallel_size + start_idx = self.data_parallel_rank * bucket_size + + g = torch.Generator() + g.manual_seed(self.epoch.get_value()) + random_idx = torch.randperm(bucket_size, generator=g).tolist() + idx_range = [start_idx + x for x in random_idx[bucket_offset:]] + else: + full_bucket_size = active_total_urls + full_bucket_offset = current_epoch_urls + g = torch.Generator() + g.manual_seed(self.epoch.get_value()) + idx_range_total = \ + torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_active = idx_range_total[full_bucket_offset:] + idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size] + + # Use additional permutation to replace out-of-range indices when drop_last is False + additional_random_idx = torch.randperm(self.total_urls, generator=g).tolist() + for n, idx in enumerate(idx_range): + self.consumed_samples += self.data_parallel_size * self.chunk_size + if worker_info is not None and n % num_workers != worker_id: + continue + if idx < self.total_urls: + yield dict(url=self.urls[idx]) + else: + yield dict(url=self.urls[additional_random_idx[idx - self.total_urls]]) diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py new file mode 100644 index 000000000000..aec2396d2b72 --- /dev/null +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -0,0 +1,241 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import itertools +import os +import os +import pickle +import random +import re +import torch.distributed as dist +from PIL import Image +from PIL import Image +from botocore.config import Config +from typing import Callable +from typing import Callable, Union, List +from webdataset.filters import _shuffle +from webdataset.utils import pytorch_worker_info + +import webdataset as wds +from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch, WDSUrlsRandomSampler +from nemo.core.classes import IterableDataset as NeMoIterableDataset +from nemo.utils import logging +from webdataset import WebDataset + +try: + from apex.transformer import parallel_state + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +Image.MAX_IMAGE_PIXELS = 933120000 +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() +from webdataset import warn_and_continue + + +class detshuffle2(wds.PipelineStage): + def __init__( + self, + bufsize=1000, + initial=100, + seed=0, + epoch=-1, + ): + self.bufsize = bufsize + self.initial = initial + self.seed = seed + self.epoch = epoch + + def run(self, src): + if isinstance(self.epoch, SharedEpoch): + epoch = self.epoch.get_value() + else: + # NOTE: this is epoch tracking is problematic in a multiprocess (dataloader workers or train) + # situation as different workers may wrap at different times (or not at all). + self.epoch += 1 + epoch = self.epoch + rng = random.Random() + # This seed to be deterministic AND the same across all nodes/workers in each epoch + if parallel_state.is_unitialized(): + seed = self.seed + epoch + else: + seed = self.seed + epoch + (100 * parallel_state.get_data_parallel_rank()) + rng.seed(seed) + return _shuffle(src, self.bufsize, self.initial, rng) + + +def pil_loader(key, data): + r""" + Function to load an image. + If the image is corrupt, it returns a black image. + Args: + key: Image key. + data: Image data stream. + """ + extension = re.sub(r".*[.]", "", key) + if extension.lower() not in _IMG_EXTENSIONS: + return None + + with io.BytesIO(data) as stream: + img = Image.open(stream) + img.load() + img = img.convert("RGB") + + return img + + +def get_world_size(): + r"""Get world size. How many GPUs are available in this job.""" + world_size = 1 + if dist.is_available(): + if dist.is_initialized(): + world_size = dist.get_world_size() + return world_size + + +class WebDatasetCommon(NeMoIterableDataset): + + def __init__(self, + dataset_cfg, + map_fn: Callable, + compose_fn: Union[Callable, List[Callable]], + consumed_samples: int, + filter_fn: Callable = None, + gen_cfg=None, + decode_fn: Callable = None, + is_train=True): + + super().__init__() + self.dataset_cfg = dataset_cfg + self.num_workers = dataset_cfg.num_workers + self.world_size = get_world_size() + self.webdata_cfg = dataset_cfg.webdataset + self.infinite_sampler = self.webdata_cfg.get("infinite_sampler", False) + self.gen_cfg = gen_cfg + self.consumed_samples = consumed_samples + + self.local_root_path = self.webdata_cfg.local_root_path + logging.info(f'Read Webdataset locally. Data stores at {self.local_root_path}') + if is_train: + dataset_path = dataset_cfg.train.dataset_path + self.augmentations = dataset_cfg.train.get("augmentations", None) + self.filterings = dataset_cfg.train.get("filterings", None) + else: + dataset_path = dataset_cfg.validation.dataset_path + self.augmentations = dataset_cfg.validation.get("augmentations", None) + self.filterings = dataset_cfg.validation.get("filterings", None) + + # wdinfo in a dict containing webdata information + self.wdinfo = dict() + if dataset_path[0].endswith(".pkl"): + for dset_info_path in dataset_path: + with open(dset_info_path, 'rb') as fp: + dset_info = pickle.load(fp) + if 'tar_files' not in self.wdinfo: + self.wdinfo['tar_files'] = dset_info['tar_files'] + self.wdinfo['total_key_count'] = dset_info['total_key_count'] + self.wdinfo['chunk_size'] = dset_info['chunk_size'] + else: + self.wdinfo['tar_files'].extend(dset_info['tar_files']) + self.wdinfo['total_key_count'] += dset_info['total_key_count'] + train_info = self.wdinfo + else: + train_info = self.wdinfo + train_info['tar_files'] = map(wds.shardlists.expand_urls, dataset_path) + train_info['tar_files'] = list(itertools.chain.from_iterable(train_info['tar_files'])) + train_info['chunk_size'] = self.webdata_cfg.get("chunk_size", 1000) + train_info['total_key_count'] = train_info['chunk_size'] * len(train_info['tar_files']) + + decode_fn = pil_loader if decode_fn is None else decode_fn + shards_train_list = train_info["tar_files"] + num_shards = len(shards_train_list) + assert num_shards > 0, "Did not find any training data." + + # Shuffle buffer: + shuffle_buffer_size = train_info["chunk_size"] + + if self.filterings is not None: + # TODO : Not a good way of estimating filtering (We expect user to give estimated portion) + # We should estimate in someway. This is anyway used only in progress bar + logging.info(f'Estimated {self.filterings.estimated_portion} will be remaining after filtering') + train_info["total_key_count"] = int(train_info["total_key_count"] * self.filterings.estimated_portion) + + from webdataset import warn_and_continue + train_dataset, epoch = self._get_webdataset_and_epoch() + train_dataset = train_dataset.compose(detshuffle2(bufsize=shuffle_buffer_size, epoch=epoch)) + train_dataset = train_dataset.decode(decode_fn, handler=warn_and_continue) + + if self.filterings is not None: + if self.filterings.resolution is not None: + train_dataset = train_dataset.select(filter_fn) + + # traindataset.to_tuple("").map_tuple(fns) + train_dataset = train_dataset.map(map_fn) + if not isinstance(compose_fn, list): + compose_fn = [compose_fn] + for fn in compose_fn: + train_dataset = train_dataset.compose(fn) + train_dataset.total_images = train_info["total_key_count"] + + if train_info["total_key_count"] != train_info["chunk_size"] * len(train_info["tar_files"]): + logging.warning("Total image count is not equal to chunk_size * number of tar files.") + + if self.infinite_sampler: + rank, world_size, worker_id, num_workers = pytorch_worker_info() + nbatches = train_dataset.total_images // world_size // self.num_workers + logging.info(f'Setting nbatches={nbatches} for infinite sampler. world_size={world_size}') + train_dataset = train_dataset.with_epoch(nbatches=nbatches) + + logging.info("Total number of training shards: %d", num_shards) + logging.info("Total training key count: %d", train_dataset.total_images) + + self._dataset = train_dataset + + def _get_webdataset_and_epoch(self): + train_info = self.wdinfo + chunk_size = train_info["chunk_size"] + shards_train_list = train_info["tar_files"] + shards_train_list = [os.path.join(self.local_root_path, x) for x in shards_train_list] + epoch = 0 + + if not self.infinite_sampler: + logging.info(f'Initiating Webdataset Random Sampler..') + assert self.filterings is None, 'Webdataset Random Sampler should not be used with filters. Switch to infinite sampler' + shards_train_list = WDSUrlsRandomSampler( + urls=shards_train_list, + total_urls=len(shards_train_list), + chunk_size=chunk_size, + consumed_samples=self.consumed_samples, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=True, + data_sharding=self.dataset_cfg.train.get("data_sharding", True), + ) + epoch = shards_train_list.epoch + + train_dataset = WebDataset( + shards_train_list, + handler=warn_and_continue, + resampled=self.infinite_sampler or False, + ) + + return train_dataset, epoch + + def __iter__(self): + return self._dataset.__iter__() + + def __len__(self): + world_size = get_world_size() + return self._dataset.total_images // world_size diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py new file mode 100644 index 000000000000..bd9a183ac356 --- /dev/null +++ b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle +import torch +from PIL import Image +from pathlib import Path +from torch.utils.data import Dataset +from torchvision import transforms + + +class DreamBoothDataset(Dataset): + """ + A dataset to prepare the instance and class images with the prompts for fine-tuning the model. + It pre-processes the images and the tokenizes prompts. + """ + + def __init__( + self, + instance_data_root, + instance_prompt, + reg_data_root=None, + reg_prompt=None, + size=512, + center_crop=False, + repeat=100, + ): + self.size = size + self.center_crop = center_crop + + self.instance_data_root = Path(instance_data_root) + if not self.instance_data_root.exists(): + raise ValueError("Instance images root doesn't exists.") + + self.instance_images_path = list(Path(instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + self.instance_prompt = instance_prompt + self._length = self.num_instance_images * repeat + + if reg_data_root is not None: + self.reg_data_root = Path(reg_data_root) + self.reg_images_path = list(self.reg_data_root.iterdir()) + self.num_reg_images = len(self.reg_images_path) + self._length = max(self.num_reg_images, self.num_instance_images) + self.reg_prompt = reg_prompt + else: + self.reg_data_root = None + + self.image_transforms = transforms.Compose( + [ + transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR), + transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size), + transforms.ToTensor(), + transforms.Normalize([0.5], [0.5]), + ] + ) + + def __len__(self): + return self._length + + def __getitem__(self, index): + example = {} + instance_image = Image.open(self.instance_images_path[index % self.num_instance_images]) + if not instance_image.mode == "RGB": + instance_image = instance_image.convert("RGB") + example["instance_images"] = self.image_transforms(instance_image) + example["instance_prompt"] = self.instance_prompt + + if self.reg_data_root: + reg_image = Image.open(self.reg_images_path[index % self.num_reg_images]) + if not reg_image.mode == "RGB": + reg_image = reg_image.convert("RGB") + example["reg_images"] = self.image_transforms(reg_image) + example["reg_prompt"] = self.reg_prompt + + return example diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py new file mode 100644 index 000000000000..999ef62aaf3b --- /dev/null +++ b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py @@ -0,0 +1,135 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import json +import math +import numpy as np +import torch +import torchvision +from PIL import Image +from einops import rearrange +from pathlib import Path +from torch.utils.data import Dataset +from typing import Any + +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import construct_image_augmentations + + +class EditDataset(Dataset): + def __init__( + self, + path: str, + split: str = "train", + splits: tuple[float, float, float] = (0.95, 0.04, 0.01), + min_resize_res: int = 256, + max_resize_res: int = 256, + crop_res: int = 256, + flip_prob: float = 0.0, + ): + assert split in ("train", "val", "test") + assert sum(splits) == 1 + self.path = path + self.min_resize_res = min_resize_res + self.max_resize_res = max_resize_res + self.crop_res = crop_res + self.flip_prob = flip_prob + + with open(Path(self.path, "seeds.json")) as f: + self.seeds = json.load(f) + + split_0, split_1 = { + "train": (0.0, splits[0]), + "val": (splits[0], splits[0] + splits[1]), + "test": (splits[0] + splits[1], 1.0), + }[split] + + idx_0 = math.floor(split_0 * len(self.seeds)) + idx_1 = math.floor(split_1 * len(self.seeds)) + self.seeds = self.seeds[idx_0:idx_1] + + def __len__(self) -> int: + return len(self.seeds) + + def __getitem__(self, i: int) -> dict[str, Any]: + name, seeds = self.seeds[i] + propt_dir = Path(self.path, name) + seed = seeds[torch.randint(0, len(seeds), ()).item()] + with open(propt_dir.joinpath("prompt.json")) as fp: + prompt = json.load(fp)["edit"] + + image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) + image_1 = Image.open(propt_dir.joinpath(f"{seed}_1.jpg")) + + resize_res = torch.randint(self.min_resize_res, self.max_resize_res + 1, ()).item() + image_0 = image_0.resize((resize_res, resize_res), Image.Resampling.LANCZOS) + image_1 = image_1.resize((resize_res, resize_res), Image.Resampling.LANCZOS) + + image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") + image_1 = rearrange(2 * torch.tensor(np.array(image_1)).float() / 255 - 1, "h w c -> c h w") + + crop = torchvision.transforms.RandomCrop(self.crop_res) + flip = torchvision.transforms.RandomHorizontalFlip(float(self.flip_prob)) + image_0, image_1 = flip(crop(torch.cat((image_0, image_1)))).chunk(2) + + return dict(edited=image_1, edit=dict(c_concat=image_0, c_crossattn=prompt)) + + +class EditDatasetEval(Dataset): + def __init__( + self, + path: str, + split: str = "train", + splits: tuple[float, float, float] = (0.9, 0.05, 0.05), + res: int = 256, + ): + assert split in ("train", "val", "test") + assert sum(splits) == 1 + self.path = path + self.res = res + + with open(Path(self.path, "seeds.json")) as f: + self.seeds = json.load(f) + + split_0, split_1 = { + "train": (0.0, splits[0]), + "val": (splits[0], splits[0] + splits[1]), + "test": (splits[0] + splits[1], 1.0), + }[split] + + idx_0 = math.floor(split_0 * len(self.seeds)) + idx_1 = math.floor(split_1 * len(self.seeds)) + self.seeds = self.seeds[idx_0:idx_1] + + def __len__(self) -> int: + return len(self.seeds) + + def __getitem__(self, i: int) -> dict[str, Any]: + name, seeds = self.seeds[i] + propt_dir = Path(self.path, name) + seed = seeds[torch.randint(0, len(seeds), ()).item()] + with open(propt_dir.joinpath("prompt.json")) as fp: + prompt = json.load(fp) + edit = prompt["edit"] + input_prompt = prompt["input"] + output_prompt = prompt["output"] + + image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) + + reize_res = torch.randint(self.res, self.res + 1, ()).item() + image_0 = image_0.resize((reize_res, reize_res), Image.Resampling.LANCZOS) + + image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") + + return dict(image_0=image_0, input_prompt=input_prompt, edit=edit, output_prompt=output_prompt) diff --git a/nemo/collections/multimodal/data/stable_diffusion/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py new file mode 100644 index 000000000000..3e83a5fd5c69 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py @@ -0,0 +1,72 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +import torchvision.transforms as transforms + + +def construct_clip_augmentations(n_px=224): + def _convert_image_to_rgb(image): + return image.convert("RGB") + + return transforms.Compose([ + transforms.Resize(n_px, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(n_px), + _convert_image_to_rgb, + transforms.ToTensor(), + transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ]) + + +def construct_image_augmentations(augmentation_dict, normalize=True): + train_img_transform = [] + for aug in augmentation_dict: + if aug == 'resize_smallest_side': + img_size = int(augmentation_dict[aug]) + train_img_transform.append(transforms.Resize( + img_size, interpolation=transforms.InterpolationMode.BICUBIC, antialias=True) + ) + + elif aug == 'center_crop_h_w': + img_w, img_h = augmentation_dict[aug].split(',') + img_w = int(img_w) + img_h = int(img_h) + train_img_transform.append(transforms.CenterCrop((img_w, img_h))) + + elif aug == 'random_crop_h_w': + img_w, img_h = augmentation_dict[aug].split(',') + img_w = int(img_w) + img_h = int(img_h) + train_img_transform.append(transforms.RandomCrop((img_w, img_h))) + + elif aug == 'horizontal_flip': + enabled = augmentation_dict[aug] + if enabled: + train_img_transform.append(transforms.RandomHorizontalFlip(p=0.5)) + else: + raise ValueError('Augmentation not supported') + + # Always need to convert data to tensor + train_img_transform.append(transforms.ToTensor()) + if normalize: + train_img_transform.append(transforms.Normalize( + (0.5, 0.5, 0.5), + (0.5, 0.5, 0.5) + )) + train_img_transform = transforms.Compose(train_img_transform) + return train_img_transform + + +def identical_transform(x): + return x diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py new file mode 100644 index 000000000000..d68d52aac9b1 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) + + +def build_train_valid_datasets( + model_cfg, + consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0].permute(1, 2, 0) + out_dict['captions'] = input[1] + yield out_dict + + def transform_fn(sample): + image, text = sample["jpg"], sample["txt"] + # TODO : If no agumentations just return the image ? + img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) + text_transform = identical_transform + return img_transform(image), text_transform(text) + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data + + +def build_train_valid_precached_datasets( + model_cfg, + consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) + out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) + yield out_dict + + def transform_fn(sample): + return sample['pickle'] + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data diff --git a/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py b/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py new file mode 100644 index 000000000000..dca19f6948f6 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py @@ -0,0 +1,70 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import torch +import torch.distributed as dist +from pytorch_lightning import Callback + + +class WDSSampler: + def __init__(self, mode): + self.mode = mode + assert self.mode in ['train', 'val'] + + def set_epoch(self, epoch, pseudo_epoch=None, start_index=0): + if self.mode == 'train': + world_size = dist.get_world_size() + num_samples_read_so_far = start_index * world_size + os.environ["WDS_EPOCH_NUM"] = str(epoch) + os.environ["WDS_START_INDEX"] = str(num_samples_read_so_far) + print(f'set WDS_EPOCH_NUM={epoch}; WDS_START_INDEX={num_samples_read_so_far}; start_index={start_index}') + else: + pass + + +class WebDataloaderSamplerCallback(Callback): + def __init__(self, batch_size, gradient_accumulation=1): + super().__init__() + self.batch_size = batch_size + self.train_sampler = WDSSampler(mode='train') + self.val_sampler = WDSSampler(mode='val') + self.resume_flag = False + self.ga = gradient_accumulation + + def on_train_epoch_start(self, trainer, pl_module): + # For most cases, epoch should start from 0 (start_index = 0), + # except for the case when we resume the checkpoint and start the epoch the first time + if self.resume_flag: + # We calculate the start_index by estimating the global steps / len(dataloader) + num_iters = trainer.global_step % trainer.num_training_batches + self.resume_flag = False + else: + num_iters = 0 + + # We assume that the batch size, # GPUs between different runs remain the same + # When ga is larger than 1, num_iters only records steps with back propagation + # The actual consumed samples needs to multiply with ga batches + consumed_samples_per_GPU = num_iters * self.batch_size * self.ga + # This part assume that when we resume, we are using the same num of gpus and also same batchsize as before + epoch = trainer.global_step * self.ga // trainer.num_training_batches + print( + f'WebdataLoaderSampler Calculated epoch={epoch}, num_iters={num_iters}, num_training_batches={trainer.num_training_batches}') + if pl_module.current_epoch != epoch: + print(f'Warning: Calculated Epoch={epoch} is not equal to pyt-lightning epoch={pl_module.current_epoch}') + + self.train_sampler.set_epoch(epoch, start_index=consumed_samples_per_GPU) + + def on_validation_epoch_start(self, trainer, pl_module): + # For validation, we don't care if we finish or not because we never go through a complete epoch of validation set for now + self.val_sampler.set_epoch(pl_module.current_epoch) diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset.py new file mode 100644 index 000000000000..06411285a267 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/webdataset.py @@ -0,0 +1,389 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import boto3 +import io +import json +import os +import pickle +import random +import re +import torch.distributed as dist +from PIL import Image +from botocore.config import Config +from torch.utils.data import IterableDataset +from webdataset.utils import pytorch_worker_info + +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) +from nemo.collections.multimodal.data.stable_diffusion.webdataset_utils import WebDataset +from nemo.core.classes import IterableDataset as NeMoIterableDataset + +Image.MAX_IMAGE_PIXELS = 933120000 +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() +from webdataset import warn_and_continue + + +def pil_loader(key, data): + r""" + Function to load an image. + If the image is corrupt, it returns a black image. + Args: + key: Image key. + data: Image data stream. + """ + + extension = re.sub(r".*[.]", "", key) + if extension.lower() not in _IMG_EXTENSIONS: + return None + + with io.BytesIO(data) as stream: + img = Image.open(stream) + img.load() + img = img.convert("RGB") + + return img + + +def get_world_size(): + r"""Get world size. How many GPUs are available in this job.""" + world_size = 1 + if dist.is_available(): + if dist.is_initialized(): + world_size = dist.get_world_size() + return world_size + + +def repeat_list(x, n): + r""" + Function to repeat the list to a fixed shape. + n is the desired length of the extended list. + Args: + x (list): Input list + n (int): Desired length + """ + if n == 0: + return [] + assert len(x) > 0 + + x_extended = [] + while len(x_extended) < n: + x_extended = x_extended + x + x_extended = x_extended[0:n] + + return x_extended + + +def build_resolution_filter(value=None, method='larger', image_idx=0): + assert method == 'larger' or method == 'smaller' + if method == 'larger': + print(f'Only Selecting images with resolution >= {value}') + return lambda x: x[image_idx].size[0] >= value and x[image_idx].size[1] >= value + + print(f'Only Selecting images with resolution <= {value}') + return lambda x: x[image_idx].size[0] <= value and x[image_idx].size[1] <= value + + +class ShardListWithResumes(IterableDataset): + r""" + An iterable dataset that is compatible with custom resets. + Can be restored from an iteration number and index number. + """ + + def __init__( + self, + urls, + epoch_shuffle=False, + shuffle=True, + split_by_node=True, + split_by_worker=True, + chunk_size=1, + resume_flag=True, + verbose=False + ): + r"""Create a ShardList. + Args: + urls (list): a list of URLs as a Python list or brace notation string + epoch_shuffle (bool): Shuffles the whole epoch. If disabled, each node will see the same set of urls. + shuffle (bool): shuffle samples before iterating. + split_by_node (bool): split shards by node if True + chunk_size (int): chunk size used in webdataset creation + resume_flag (bool): If enabled, resumes from a specific iteration and epoch number. + verbose (bool): Prints some logs if true + """ + super().__init__() + + self.verbose = verbose + self.epoch = 0 + self.start_index = 0 + self.epoch_shuffle = epoch_shuffle + self.shuffle = shuffle + self.split_by_node = split_by_node + self.split_by_worker = split_by_worker + self.chunk_size = chunk_size + self.resume_flag = resume_flag + self.urls = urls + + def set_epoch(self, epoch, start_index): + r"""Set the current epoch. Used for per-node shuffling. + Args: + epoch (int): Epoch number + start_index (int): iteraton number + """ + self.epoch = epoch + self.start_index = start_index + + def __iter__(self): + r"""Return an iterator over the shards.""" + + rank, world_size, worker_id, num_workers = pytorch_worker_info() + + # Setting epoch and start index + if self.resume_flag: + self.epoch = int(os.environ['WDS_EPOCH_NUM']) + + # This tells us number of chunks that have been seen by one GPU + self.start_index = int(os.environ['WDS_START_INDEX']) // self.chunk_size + + urls = self.urls + + # Shuffling the entire epoch before splitting among nodes and workers. + if self.epoch_shuffle: + if self.shuffle: + raise ValueError("If epoch_shuffle is used, do not use shuffle.") + + if self.verbose: + print("PytorchShardList epochshuffle {}".format(self.epoch)) + random.Random(self.epoch).shuffle(urls) + + num_urls = len(urls) + + # Splitting the shards by worker and node + + # Extending urls so that each workers receive the same number of batches. + # This serves the job of ddp_equalize. + nworkers_all = world_size * num_workers + if num_urls % nworkers_all > 0: + num_urls_per_process = (num_urls // nworkers_all) + 1 + else: + num_urls_per_process = num_urls // nworkers_all + extended_url_list_size = num_urls_per_process * nworkers_all + urls = repeat_list(urls, extended_url_list_size) + + # print(f'Total Number of URLS before spliting: {num_urls}') + if self.split_by_node: + urls = urls[rank::world_size] + + if self.split_by_worker: + urls = urls[worker_id::num_workers] + + if self.verbose: + print( + f'Number of URLs after splitting: {len(urls)}. rank/world_size={rank}/{world_size} worker_id/num_workers={worker_id}/{num_workers}') + + if self.shuffle: + random.Random(self.epoch + 17).shuffle(urls) + + # This tells us the number of chunks seen by one worker. + # Do not iterate over the seen chunks. + start_index_per_worker = self.start_index // (num_workers * world_size) + urls = urls[start_index_per_worker:] + + if self.verbose: + print( + f'Number of URLS after using start_index_per_worker: {len(urls)}. self.start_index={self.start_index} start_index_per_worker={start_index_per_worker}') + print( + f'PytorchShardList Rank=<{rank}/{world_size}> Worker=<{worker_id}/{num_workers}> receives {len(urls)} URLs (TARs)') + + for url in urls: + yield dict(url=url) + + +class WebDatasetBase(NeMoIterableDataset): + def __init__(self, cfg, is_train=True): + r""" + Webdataloader class + Args: + cfg: Dataset Config + is_train (bool): Is the dataset used in training mode? + """ + super().__init__() + + self.cfg = cfg + self.num_workers = self.cfg.num_workers + self.world_size = get_world_size() + self.webdata_cfg = self.cfg.webdataset + self.infinite_sampler = self.webdata_cfg.infinite_sampler + if is_train: + dataset_path = cfg.train.dataset_path + self.batch_size = self.cfg.train.batch_size + self.augmentations = self.cfg.train.augmentations + self.filterings = self.cfg.train.filterings + else: + dataset_path = cfg.val.dataset_path + self.batch_size = self.val.batch_size + self.augmentations = self.cfg.val.augmentations + self.filterings = self.cfg.val.filterings + + if getattr(self.webdata_cfg, 'object_store', False): + # Initializing PBSS + print(f'Init PBSS using credentials file at {self.webdata_cfg.pbss_credentials_file}') + self.use_object_store = True + assert self.webdata_cfg.pbss_credentials_file is not None + with open(self.webdata_cfg.pbss_credentials_file) as fin: + self.credentials = json.load(fin) + config = Config(connect_timeout=30, + signature_version="s3", + retries={"max_attempts": 999999}) + self.s3 = boto3.client('s3', **self.credentials, config=config) + self.bucket = self.webdata_cfg.bucket + self.local_root_path = None + else: + self.use_object_store = False + self.s3 = None + self.bucket = None + self.local_root_path = self.webdata_cfg.local_root_path + print(f'Read Webdataset locally. Data stores at {self.local_root_path}') + + # Concatenate all dataset infos + + # wdinfo in a dict containing webdata information + self.wdinfo = dict() + for dset_info_path in dataset_path: + with open(dset_info_path, 'rb') as fp: + dset_info = pickle.load(fp) + if 'tar_files' not in self.wdinfo: + self.wdinfo['tar_files'] = dset_info['tar_files'] + self.wdinfo['total_key_count'] = dset_info['total_key_count'] + self.wdinfo['chunk_size'] = dset_info['chunk_size'] + else: + self.wdinfo['tar_files'].extend(dset_info['tar_files']) + self.wdinfo['total_key_count'] += dset_info['total_key_count'] + + def build_dataset(self, **kwargs): + raise ValueError('build_dataset function not implemented') + + +class WebDatasetWithRawText(WebDatasetBase): + def __init__(self, dataset_cfg, is_train=True): + r""" + Webdataloader class + Args: + dataset_cfg: Dataset config + is_train (bool): Is the dataset used in training mode? + """ + super().__init__(dataset_cfg, is_train=is_train) + # For adding corruptions and obtaining image pyramid + # TODO Add this for SR256/SR1024 training + # self.corruption_gen = ImagePyramidWithCorruptions( + # cfg=cfg, is_inference=is_inference, is_test=is_test + # ) + + # Construct augmentations + self.img_transform = construct_image_augmentations(self.augmentations) + self.text_transform = identical_transform + self.verbose = dataset_cfg.get("verbose", False) + self.build_dataset() + + def build_dataset(self): + """See base class.""" + + train_info = self.wdinfo + + shards_train_list = train_info["tar_files"] + num_shards = len(shards_train_list) + assert num_shards > 0, "Did not find any training data." + + chunk_size = train_info["chunk_size"] + + # Shuffle buffer: + shuffle_buffer_size = train_info["chunk_size"] + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0].permute(1, 2, 0) + + out_dict['captions'] = input[1] + yield out_dict + + # Train dataset object + from webdataset import warn_and_continue + if self.infinite_sampler: + rank, world_size, worker_id, num_workers = pytorch_worker_info() + epoch_length = train_info["total_key_count"] // self.batch_size // world_size + print(f'Using infinite sampler, world_size={world_size}. The epoch length will be set to: {epoch_length}') + else: + print(f'Initiating ShardListWithResumes..') + shards_train_list = ShardListWithResumes( + urls=shards_train_list, + epoch_shuffle=True, + shuffle=False, + split_by_node=True, + split_by_worker=True, + chunk_size=chunk_size, + resume_flag=True, + verbose=self.verbose, + ) + + train_dataset = ( + WebDataset( + shards_train_list, + load_from_object_store=self.use_object_store, + s3_client=self.s3, + s3_bucket_name=self.bucket, + local_root_path=self.local_root_path, + handler=warn_and_continue, + resampled=self.infinite_sampler, + ) + .shuffle(shuffle_buffer_size) # Shuffling the buffer + .decode(pil_loader, handler=warn_and_continue) # Decoding the data + .to_tuple("jpg txt") # Splitting into tuple + ) + if self.filterings is not None: + if self.filterings.resolution is not None: + train_dataset = train_dataset.select(build_resolution_filter(**self.filterings.resolution, image_idx=0)) + + # Add additional augmentation + train_dataset = (train_dataset + .map_tuple( + self.img_transform, + self.text_transform + ) # Augmentation + .compose(tuple_to_dict) # Converting tuple to data dict + ) + + train_dataset.total_images = train_info["total_key_count"] + # Set epoch length if using infinite sampler + if self.infinite_sampler: + rank, world_size, worker_id, num_workers = pytorch_worker_info() + nbatches = train_dataset.total_images // world_size // self.num_workers + print(f'Setting nbatches={nbatches} for infinite sampler. world_size={world_size}') + train_dataset = train_dataset.with_epoch(nbatches=nbatches) + + print("Total number of training shards: %d", num_shards) + print("Total training key count: %d", train_dataset.total_images) + + self._dataset = train_dataset + + def __iter__(self): + return self._dataset.__iter__() + + def __len__(self): + world_size = get_world_size() + # In Webdataset multi-gpu training settings, each GPU will be assigned with different portions of + # training data, therefore divde the dataset size by the number of GPUs. + return self._dataset.total_images // world_size diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py new file mode 100644 index 000000000000..3fcc99b3ce26 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py @@ -0,0 +1,280 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import os +import sys +import webdataset.gopen as gopen_webdata +import yaml +from urllib.parse import urlparse +from webdataset.compat import FluidInterface +from webdataset.handlers import reraise_exception +from webdataset.pipeline import DataPipeline +from webdataset.pytorch import IterableDataset +from webdataset.tariterators import tar_file_expander, group_by_keys + +from webdataset import cache +from webdataset import filters +from webdataset import shardlists + +# Number of attempts to read aws objects. +_NUM_OBJECT_STORE_READ_ATTEMPTS = 10 + + +def gopen(url, mode="rb", bufsize=8192, **kw): + r"""Open the URL. + This uses the `gopen_schemes` dispatch table to dispatch based + on scheme. + Support for the following schemes is built-in: pipe, file, + http, https, sftp, ftps, scp. + When no scheme is given the url is treated as a file. + You can use the OPEN_VERBOSE argument to get info about + files being opened. + + This implementation is based on webdataset's gopen, + with the modification of supporting reading from s3 object_store: + https://webdataset.github.io/webdataset/api/webdataset/gopen.html#gopen + Args: + url (list[str]): the source URL + mode (str): the mode ("rb", "r") + bufsize (int): the buffer size + """ + global fallback_gopen + verbose = int(os.environ.get("GOPEN_VERBOSE", 0)) + if verbose: + print("GOPEN", url, gopen_webdata.info, file=sys.stderr) + + assert mode in ["rb", "wb"], mode + if url == "-": + if mode == "rb": + return sys.stdin.buffer + elif mode == "wb": + return sys.stdout.buffer + else: + raise ValueError(f"unknown mode {mode}") + + # If we specify 'object_store' in keyword arguments, + # then we would load from AWS. + # In this case, you also need to specify s3_client and s3_bucket_name + # in arguments. + if 'object_store' in kw and kw['object_store']: + # Load from object store + attempt = 0 + + while attempt < _NUM_OBJECT_STORE_READ_ATTEMPTS: + try: + s3_response_object = kw['s3_client'].get_object(Bucket=kw['s3_bucket_name'], Key=url) + object_content = s3_response_object['Body'].read() + + # This is a check to verify is the object is fully read. + full_read = (s3_response_object['ContentLength'] == len(object_content)) + if full_read: + return io.BytesIO(object_content) + else: + attempt += 1 + except Exception as e: # noqa + # If there is an exception (usually connectivity error or protocol error), read again + attempt += 1 + print(e) + print('Retrying tar file download, attempt {}'.format(attempt)) + continue + raise ConnectionError('Unable to read {} from PBSS. {} attempts tried.'.format(url, attempt)) + + # Append root path to the url if dataset is stored on local disk system + elif 'local_root_path' in kw and kw['local_root_path'] is not None: + url = os.path.join(kw['local_root_path'], url) + + # For all other gopen schemes, use the native webdataset gopen functions. + pr = urlparse(url) + if pr.scheme == "": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(url, mode, buffering=bufsize) + if pr.scheme == "file": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(pr.path, mode, buffering=bufsize) + handler = gopen_webdata.gopen_schemes["__default__"] + handler = gopen_webdata.gopen_schemes.get(pr.scheme, handler) + return handler(url, mode, bufsize, **kw) + + +def url_opener(data, handler=reraise_exception, **kw): + r"""Given a stream of url names (packaged in `dict(url=url)`), yield opened streams. + + Args: + data: Iterator of dictionaires containing url paths. + handler: Exception handler. + """ + for sample in data: + assert isinstance(sample, dict), sample + assert "url" in sample + url = sample["url"] + try: + stream = gopen(url, **kw) + sample.update(stream=stream) + yield sample + except Exception as exn: + exn.args = exn.args + (url,) + if handler(exn): + continue + else: + break + + +# Define a new tarfile_samples +def tarfile_samples(src, handler=reraise_exception, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None): + r""" + Given an iterator of filenames, this function opens the URL streams + and groups data by keys. + + Args: + src: Iterator of data dictionaires containing URL names. + handler: Exception handler. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + streams = url_opener( + src, + handler=handler, + object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + files = tar_file_expander(streams, handler=handler) + samples = group_by_keys(files, handler=handler) + return samples + + +tarfile_to_samples = filters.pipelinefilter(tarfile_samples) + + +class WebDataset(DataPipeline, FluidInterface): + r"""Webdataset class modified to support loading from object store.""" + + def __init__( + self, + urls, + handler=reraise_exception, + resampled=False, + shardshuffle=None, + cache_size=-1, + cache_dir=None, + detshuffle=False, + nodesplitter=shardlists.single_node_only, + verbose=False, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, + ): + r""" + Args: + urls: An iterator containing a list of url names. + handler: Exception handler. + resampled: If true, sample shards from shard list with replacement. + shardshuffle: If true, shuffles the entire shard list. + cache_size: Size of cache. + cache_dir: Path to store cache. + detshuffle: Whether to use deterministic shuffling when shardshuffle is True. + nodesplitter: Function for splitting urls among nodes. + verbose: If True, prints logs. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + super().__init__() + if isinstance(urls, IterableDataset): + assert not resampled + self.append(urls) + elif isinstance(urls, str) and ( + urls.endswith(".yaml") or urls.endswith(".yml") + ): + with (open(urls)) as stream: + spec = yaml.safe_load(stream) + assert "datasets" in spec + self.append(shardlists.MultiShardSample(spec)) + elif isinstance(urls, dict): + assert "datasets" in urls + self.append(shardlists.MultiShardSample(urls)) + elif resampled: + self.append(shardlists.ResampledShards(urls)) + else: + self.append(shardlists.SimpleShardList(urls)) + self.append(nodesplitter) + self.append(shardlists.split_by_worker) + if shardshuffle is True: + shardshuffle = 100 + if shardshuffle is not None: + if detshuffle: + self.append(filters.detshuffle(shardshuffle)) + else: + self.append(filters.shuffle(shardshuffle)) + if cache_dir is None or cache_size == 0: + self.append(tarfile_to_samples( + handler=handler, + load_from_object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + )) + else: + + # We dont use cache. + assert cache_size == -1 or cache_size > 0 + self.append( + cache.cached_tarfile_to_samples( + handler=handler, + verbose=verbose, + cache_size=cache_size, + cache_dir=cache_dir, + ) + ) + + +class WDSSampler: + r""" + A sampler function for setting the epoch number and iteration number. + In webdataset, information is propagated using environment flags. + In our case, + WDS_EPOCH_NUM: Epoch number + WDS_START_INDEX: Start index in this epoch. + """ + + def __init__(self, mode): + self.mode = mode + assert self.mode in ['train', 'val'] + + def set_epoch(self, epoch): + if self.mode == 'train': + os.environ["WDS_EPOCH_NUM"] = str(epoch) + else: + pass + + def set_iteration(self, start_index): + # start_index should be iters * batch_size + # It is the number of samples that have been seen by one GPU + if self.mode == 'train': + os.environ["WDS_START_INDEX"] = str(start_index) + else: + pass diff --git a/nemo/collections/multimodal/losses/__init__.py b/nemo/collections/multimodal/losses/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/losses/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/losses/clip_loss.py b/nemo/collections/multimodal/losses/clip_loss.py new file mode 100644 index 000000000000..e4ad3b3fae57 --- /dev/null +++ b/nemo/collections/multimodal/losses/clip_loss.py @@ -0,0 +1,122 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.distributed.nn +import torch.nn as nn +from torch import distributed as dist +from torch.nn import functional as F + +from nemo.collections.nlp.modules.common.megatron.utils import ( + average_losses_across_data_parallel_group, +) + +try: + from apex.transformer import parallel_state + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +def gather_features( + image_features, + text_features, + local_loss=False, + gather_with_grad=False, +): + data_parallel_world_size = parallel_state.get_data_parallel_world_size() + data_parallel_rank = parallel_state.get_data_parallel_rank() + data_parallel_group = parallel_state.get_data_parallel_group() + + if gather_with_grad: + # TODO (yuya): this is not working in current version of pytorch + # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/loss.py#L48 + all_image_features = torch.cat(torch.distributed.nn.all_gather(image_features), dim=0) + all_text_features = torch.cat(torch.distributed.nn.all_gather(text_features), dim=0) + + else: + gathered_image_features = [torch.zeros_like(image_features) for _ in range(data_parallel_world_size)] + gathered_text_features = [torch.zeros_like(text_features) for _ in range(data_parallel_world_size)] + dist.all_gather(gathered_image_features, image_features, group=data_parallel_group) + dist.all_gather(gathered_text_features, text_features, group=data_parallel_group) + # TODO (yuya): check what's this + if not local_loss: + # ensure grads for local rank when all_* features don't have a gradient + # https://amsword.medium.com/gradient-backpropagation-with-torch-distributed-all-gather-9f3941a381f8 + gathered_image_features[data_parallel_rank] = image_features + gathered_text_features[data_parallel_rank] = text_features + all_image_features = torch.cat(gathered_image_features, dim=0) + all_text_features = torch.cat(gathered_text_features, dim=0) + + return all_image_features, all_text_features + + +class ClipLoss(nn.Module): + + def __init__( + self, + local_loss=False, + gather_with_grad=False, + cache_labels=False, + ): + super().__init__() + self.local_loss = local_loss + self.gather_with_grad = gather_with_grad + self.cache_labels = cache_labels + + # cache state + self.prev_num_logits = 0 + self.labels = {} + + self.world_size = parallel_state.get_data_parallel_world_size() + self.rank = parallel_state.get_data_parallel_rank() + + def forward(self, output_tensor): + image_features, text_features, logit_scale = output_tensor + device = image_features.device + if self.world_size > 1: + all_image_features, all_text_features = gather_features( + image_features, text_features, + self.local_loss, self.gather_with_grad) + + if self.local_loss: + logits_per_image = logit_scale * image_features @ all_text_features.T + logits_per_text = logit_scale * text_features @ all_image_features.T + else: + logits_per_image = logit_scale * all_image_features @ all_text_features.T + logits_per_text = logits_per_image.T + else: + logits_per_image = logit_scale * image_features @ text_features.T + logits_per_text = logit_scale * text_features @ image_features.T + + # calculated ground-truth and cache if enabled + num_logits = logits_per_image.shape[0] + if self.prev_num_logits != num_logits or device not in self.labels: + labels = torch.arange(num_logits, device=device, dtype=torch.long) + if self.world_size > 1 and self.local_loss: + labels = labels + num_logits * self.rank + if self.cache_labels: + self.labels[device] = labels + self.prev_num_logits = num_logits + else: + labels = self.labels[device] + + total_loss = ( + F.cross_entropy(logits_per_image, labels) + + F.cross_entropy(logits_per_text, labels) + ) / 2 + + # TODO (yuya): this is not necessary; not necessary if global! + reduced_loss = average_losses_across_data_parallel_group([total_loss]) + return total_loss, {"loss": reduced_loss} diff --git a/nemo/collections/multimodal/models/__init__.py b/nemo/collections/multimodal/models/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/__init__.py b/nemo/collections/multimodal/models/clip/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/clip/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py new file mode 100644 index 000000000000..c871d9361ef5 --- /dev/null +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -0,0 +1,977 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import numpy as np +import torch +import torch.nn.functional as F +from functools import partial +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.trainer.trainer import Trainer +from tqdm import tqdm +from typing import Any, List, Optional, Union + +from nemo.collections.multimodal.data.clip.clip_dataset import tokenize, build_train_valid_datasets, \ + build_imagenet_validation_dataloader +from nemo.collections.multimodal.losses.clip_loss import ClipLoss +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model +from nemo.collections.nlp.modules.common.megatron.module import ( + MegatronModule, + Float16Module, +) +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + get_linear_layer, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, +) +from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + +try: + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.schedules.common import build_model + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( + forward_backward_pipelining_without_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( + _forward_backward_pipelining_with_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +class CLIPVisionTransformer(MegatronModule): + """Vision Transformer Model.""" + + def __init__(self, model_cfg, + pre_process=True, post_process=True): + super(CLIPVisionTransformer, self).__init__() + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + + self.hidden_size = model_cfg.hidden_size + self.output_dim = model_cfg.output_dim + self.global_average_pool = model_cfg.global_average_pool + self.pre_process = pre_process + self.post_process = post_process + self.backbone = VitBackbone( + model_cfg, + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + class_token=False, + single_token_output=False, + ) + + if self.post_process: + self.head = torch.nn.Linear( + self.hidden_size, + self.output_dim, + bias=False, + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.backbone.set_input_tensor(input_tensor) + + def forward(self, input): + hidden_states = self.backbone(input) + + if self.post_process: + if self.global_average_pool: + hidden_states = hidden_states.mean(dim=1) + else: + hidden_states = hidden_states[:, 0] + hidden_states = self.head(hidden_states) + # print("vision_head", hidden_states.shape) + return hidden_states + + +class CLIPTextTransformer(MegatronModule): + """Text Transformer Model.""" + + def __init__(self, model_cfg, padded_vocab_size, + pre_process=True, post_process=True): + super(CLIPTextTransformer, self).__init__() + + self.output_dim = model_cfg.output_dim + self.pre_process = pre_process + self.post_process = post_process + self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy + self.sequence_parallel = model_cfg.sequence_parallel + self.gradient_accumulation_fusion = model_cfg.gradient_accumulation_fusion + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + self.language_model, self._language_model_key = get_language_model( + vocab_size=padded_vocab_size, + hidden_size=model_cfg.hidden_size, + hidden_dropout=model_cfg.hidden_dropout, + attention_dropout=model_cfg.attention_dropout, + num_tokentypes=0, + max_position_embeddings=model_cfg.max_position_embeddings, + num_layers=model_cfg.num_layers, + num_attention_heads=model_cfg.num_attention_heads, + apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, + kv_channels=model_cfg.kv_channels, + ffn_hidden_size=model_cfg.ffn_hidden_size, + add_pooler=False, + encoder_attn_mask_type=AttnMaskType.causal, + position_embedding_type=model_cfg.get("position_embedding_type", "learned_absolute"), + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + init_method_std=model_cfg.init_method_std, + use_cpu_initialization=model_cfg.use_cpu_initialization, + precision=model_cfg.precision, + fp32_residual_connection=model_cfg.fp32_residual_connection, + activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, + activations_checkpoint_method=model_cfg.activations_checkpoint_method, + activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, + activations_checkpoint_layers_per_pipeline=model_cfg.activations_checkpoint_layers_per_pipeline, + normalization=model_cfg.normalization, + layernorm_epsilon=model_cfg.layernorm_epsilon, + bias_activation_fusion=model_cfg.bias_activation_fusion, + bias_dropout_add_fusion=model_cfg.bias_dropout_add_fusion, + masked_softmax_fusion=model_cfg.masked_softmax_fusion, + gradient_accumulation_fusion=model_cfg.gradient_accumulation_fusion, + persist_layer_norm=model_cfg.persist_layer_norm, + openai_gelu=model_cfg.openai_gelu, + onnx_safe=model_cfg.onnx_safe, + megatron_legacy=model_cfg.megatron_legacy, + sequence_parallel=model_cfg.sequence_parallel, + transformer_engine=model_cfg.transformer_engine, + fp8=model_cfg.fp8, + fp8_e4m3=model_cfg.fp8_e4m3, + fp8_hybrid=model_cfg.fp8_hybrid, + fp8_margin=model_cfg.fp8_margin, + fp8_interval=model_cfg.fp8_interval, + fp8_amax_history_len=model_cfg.fp8_amax_history_len, + fp8_amax_compute_algo=model_cfg.fp8_amax_compute_algo, + reduce_amax=model_cfg.get('reduce_amax', True), + use_emha=model_cfg.use_emha, + ) + + self.initialize_word_embeddings( + init_method=init_method_normal(model_cfg.init_method_std), + vocab_size=padded_vocab_size, + hidden_size=model_cfg.hidden_size, + ) + + # TODO (yuya): check this position id + self.position_ids = None + if self.pre_process: + self.position_ids = torch.arange(model_cfg.max_position_embeddings).expand(1, -1).cuda() + + if self.post_process: + self.head = torch.nn.Linear( + model_cfg.hidden_size, + self.output_dim, + bias=False, + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.language_model.set_input_tensor(input_tensor) + + def forward( + self, + input_ids, + ): + # input_ids: [b, s] + # position_ids: [b, s] + # attention_mask: [1, 1, s, s] + + hidden_states = self.language_model( + input_ids, + self.position_ids, + None, + token_type_ids=None, + layer_past=None, + get_key_value=False, + encoder_input=None, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + checkpoint_activations_all_layers=None, + ) + + if self.post_process: + # shape = [seq, bsz, hidden] + # take features from the eot embedding (eot_token is the highest number in each sequence) + hidden_states = hidden_states[input_ids.argmax(dim=-1), torch.arange(hidden_states.shape[1])] + return self.head(hidden_states) + + return hidden_states + + +class CLIPModel(MegatronModule): + """CLIP Model""" + + def __init__(self, model_cfg, padded_vocab_size, + pre_process=True, post_process=True): + super(CLIPModel, self).__init__() + + self.pre_process = pre_process + self.post_process = post_process + self.vision_encoder = CLIPVisionTransformer( + model_cfg.vision, + pre_process=self.pre_process, + post_process=self.post_process, + ) + self.text_encoder = CLIPTextTransformer( + model_cfg.text, + padded_vocab_size, + pre_process=self.pre_process, + post_process=self.post_process, + ) + + self.logit_scale = torch.nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # TODO (yuya): fix this + pass + + def forward(self, images, captions): + image_features = self.vision_encoder(images) + text_features = self.text_encoder(captions) + + if self.post_process: + return F.normalize(image_features, dim=-1), \ + F.normalize(text_features, dim=-1), \ + self.logit_scale.exp() + + return image_features, text_features + + +class MegatronCLIPModel(MegatronMultimodalModel): + """Megatron CLIP Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + self.imagenet_val = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') + + # build_model returns a list of modules which are used for interleaved pipeline parallelism + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + + # if we're not using interleaved, then self.model is a module. + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: + self.model = self.model[0] + + if self.megatron_amp_O2: + + if not self.with_distributed_adam: + # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type + if isinstance(self.model, list): + for module in self.model: + module.cuda(torch.cuda.current_device()) + else: + self.model.cuda(torch.cuda.current_device()) + + # Model wrapper to convert both model and inputs to half precision + # TODO (yuya): check this; FP16 Module might not work; when self.model is a list? + if isinstance(self.model, list): + converted_model = [] + for module in self.model: + converted_model.append(Float16Module(module=module, precision=cfg.precision)) + self.model = converted_model + else: + self.model = Float16Module(module=self.model, precision=cfg.precision) + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + model = CLIPModel( + model_cfg=self.cfg, + padded_vocab_size=self.padded_vocab_size, + pre_process=pre_process, + post_process=post_process, + ) + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.get('do_layer_norm_weight_decay', False): + if isinstance(self.model, list): + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) + else: + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) + + else: + self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) + + def configure_optimizers(self): + + if self.with_distributed_adam: + + # Disable overlapped grad sync for embedding grad when + # pipeline parallelism is enabled + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[0] # only the first virtual rank has the embeddings + else: + module = self.model + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[-1] # only the last virtual rank has the embeddings + else: + module = self.model + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + + # Disable overlapped grad sync for layer norm grads when + # sequence parallelism is enabled + for param in self.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True + + return super().configure_optimizers() + + def forward(self, image, text): + output_tensor = self.model(image, text) + return output_tensor + + def _get_fwd_bwd_function(self): + if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + fwd_bwd_function = _forward_backward_pipelining_with_interleaving + else: + fwd_bwd_function = forward_backward_pipelining_without_interleaving + else: + fwd_bwd_function = forward_backward_no_pipelining + return fwd_bwd_function + + def training_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + if parallel_state.is_pipeline_first_stage(ignore_virtual=True) or parallel_state.is_pipeline_last_stage( + ignore_virtual=True + ): + # we prepare the micro batches for the apex fwd/bwd function + batch_for_pipeline = self.process_global_batch(batch) + else: + # The intermediate pipeline stages do not need any inputs from data loader + # GPT3 uses decoder with AttnMask:causal, thus doesn't need attention_mask + batch_for_pipeline = None + + # TODO (yuya): fix this shape + tensor_shape = None + + # handle asynchronous grad reduction + if self.with_distributed_adam: + if self.megatron_amp_O2: + # copy grads to main grad + custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=True) + else: + # keep grad tensors around + custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=False) + else: + if self.megatron_amp_O2 and not self.cfg.get('sequence_parallel', False): + custom_sync_context_handler = self._optimizer.no_sync + else: + # TODO: enable async grad all reduce for O1/autocast mixed precision training + custom_sync_context_handler = None + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = self._get_fwd_bwd_function() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=False, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + custom_sync_context_handler=custom_sync_context_handler, + sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + sync_batch_comm=self.cfg.get('sync_batch_comm', False), + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0).cuda() + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + # TODO (yuya): check if this is needed in text transformer + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + # # when using pipeline parallelism the first and last stage must keep embeddings in sync + # self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + ) + + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_module_grads(self, module, grads): + for param in module.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def allreduce_sequence_parallel_gradients(self): + """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. + Modified from megatron-lm: + https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 + """ + + grads = [] + if isinstance(self.model, list): + for module in self.model: + self._append_module_grads(module, grads) + else: + self._append_module_grads(self.model, grads) + + coalesced = torch._utils._flatten_dense_tensors(grads) + torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + def get_forward_output_and_loss_func(self): + loss_func = ClipLoss( + local_loss=self.cfg.local_loss, + gather_with_grad=self.cfg.gather_with_grad, + ) + + def fwd_output_and_loss_func(batch, model): + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + batch = [x.cuda(non_blocking=True) for x in batch] + images, captions = batch + else: + # GPT3 uses only causal mask, which doesn't need attention mask + if parallel_state.is_pipeline_first_stage(): + # Fist pipeline stage needs only the tokens and position_ids + images = batch[0].cuda(non_blocking=True) + captions = batch[1].cuda(non_blocking=True) + else: + # Intermediate / Last pipeline stage doesn't need any inputs + images, captions = None, None + + output_tensor = model(images, captions) + return output_tensor, loss_func + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def zero_shot_classifier(self): + if self.cfg.get("megatron_amp_O2", False): + text_encoder = self.model.module.text_encoder + else: + text_encoder = self.model.text_encoder + + with torch.no_grad(): + zeroshot_weights = [] + for texts in self.imagenet_val["texts"]: + texts = texts.cuda(non_blocking=True) + # TODO (yuya): distributed not working + with torch.cuda.amp.autocast( + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + class_embeddings = text_encoder(texts) + class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) + class_embedding /= class_embedding.norm() + zeroshot_weights.append(class_embedding) + zeroshot_weights = torch.stack(zeroshot_weights, dim=1) + return zeroshot_weights + + def zero_shot_eval(self): + + def accuracy(output, target, topk=(1,)): + pred = output.topk(max(topk), 1, True, True)[1].t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] + + logging.info('Starting zero-shot imagenet.') + + logging.info('Building zero-shot classifier') + classifier = self.zero_shot_classifier() + + logging.info('Using classifier') + + if self.cfg.get("megatron_amp_O2", False): + vision_encoder = self.model.module.vision_encoder + else: + vision_encoder = self.model.vision_encoder + with torch.no_grad(): + top1, top5, n = 0., 0., 0. + for images, target in tqdm(self.imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + images = images.cuda(non_blocking=True).to(self.autocast_dtype) + target = target.cuda(non_blocking=True) + # predict + with torch.cuda.amp.autocast( + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + image_features = vision_encoder(images) + image_features = F.normalize(image_features, dim=-1) + logits = 100. * image_features @ classifier + + # measure accuracy + acc1, acc5 = accuracy(logits, target, topk=(1, 5)) + top1 += acc1 + top5 += acc5 + n += images.size(0) + + logging.info('Finished zero-shot imagenet.') + top1 = (top1 / n) + top5 = (top5 / n) + return top1, top5 + + def validation_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + batch_for_pipeline = self.process_global_batch(batch, self.cfg.global_batch_size) + tensor_shape = None # Placeholder + + # run forward passes for an entire global batch + # we do this inside validation_step to support pipeline parallelism + fwd_bwd_function = self._get_fwd_bwd_function() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=True, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + ) + + def _get_metric_with_batch_size(metric_key): + # only the last stage of the pipeline returns losses + if losses_reduced_per_micro_batch: + loss_with_batch_size_list = [ + [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] + for loss_reduced in losses_reduced_per_micro_batch + ] + # actual_batch_size = batch[0].shape[0] # Might be lesser than global_batch_size if drop_last=False + # expected_batch_size = self.cfg.global_batch_size // parallel_state.get_data_parallel_world_size() + # if actual_batch_size == expected_batch_size: + # loss_with_batch_size_list = [ + # [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] + # for loss_reduced in losses_reduced_per_micro_batch + # ] + # else: + # loss_with_batch_size_list = [] + # total_samples_remaining = actual_batch_size + # for loss_reduced in losses_reduced_per_micro_batch: + # if total_samples_remaining <= 0: + # break + # if total_samples_remaining // self.cfg.micro_batch_size >= 1: + # loss_with_batch_size_list.append( + # [loss_reduced[metric_key].item(), self.cfg.micro_batch_size]) + # else: + # loss_with_batch_size_list.append([loss_reduced[metric_key].item(), total_samples_remaining]) + # total_samples_remaining = total_samples_remaining - self.cfg.micro_batch_size + else: + # we're not on the last pipeline stage so no losses + loss_with_batch_size_list = [] + return loss_with_batch_size_list + + return _get_metric_with_batch_size('loss') + + def validation_epoch_end(self, outputs): + # TODO (yuya): need fix later, check with Sean + if not outputs: + return + + # Run zero shot imagenet evaluation + if self.imagenet_val is not None: + imagenet_metric = torch.zeros(2).cuda() + imagenet_metric[0], imagenet_metric[1] = self.zero_shot_eval() + imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) + self.log('imagenet_top1', imagenet_metric[0], prog_bar=True, rank_zero_only=True) + self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True) + + if parallel_state.is_pipeline_last_stage(): + loss_outputs = [output[0] for output in outputs] + + def _get_average_metric(metric_outputs): + # only the last pipeline parallel stages return metric with their batch size + total_num_samples = 0 + total_metric = 0 + for metric_with_batch_size in metric_outputs: + metric_with_batch_size_array = np.array(metric_with_batch_size).flatten() + batch_metrices = metric_with_batch_size_array[0::2] + batch_sizes = metric_with_batch_size_array[1::2] + total_num_samples += sum(batch_sizes) + total_metric += np.dot(batch_metrices, batch_sizes) + + avg_metric = total_metric / total_num_samples + return avg_metric + + averaged_metrics = torch.tensor( + [_get_average_metric(loss_outputs)], + dtype=torch.float32).cuda() + else: + averaged_metrics = torch.tensor([0.0], dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + averaged_loss = averaged_metrics + + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True) + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def process_global_batch(self, global_batch, global_batch_size=None): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + images = global_batch["images"] # images + captions = global_batch["captions"] + + expected_batch_size = None + if global_batch_size is not None: + expected_batch_size = global_batch_size // parallel_state.get_data_parallel_world_size() + current_batch_size = images.shape[0] + if expected_batch_size is not None and expected_batch_size > current_batch_size: + raise NotImplementedError("Please turn on drop_last.") + + return [images, captions] + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for CLIP...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0), + tokenizer=self.tokenizer, + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for CLIP.') + + return self._train_ds, self._validation_ds, self._test_ds + + # def build_pretraining_data_loader(self, dataset, consumed_samples, dataset_type=None, drop_last=True): + # """Build dataloader given an input dataset.""" + # + # return torch.utils.data.DataLoader( + # dataset, num_workers=self.cfg.data.num_workers, pin_memory=True, + # ) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last virtual stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) + # num_parameters_on_device -= num_word_embedding_parameters + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) + # + # num_parameters_on_device -= num_word_embedding_parameters + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda() + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = self.cfg.global_batch_size // ( + self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) + self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + if self.cfg.data.get("imagenet_val") is not None: + self.imagenet_val = build_imagenet_validation_dataloader(self.cfg, self.tokenizer) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + # module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + # self.model.sync_initial_word_embeddings() + pass + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=cfg.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=cfg.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, pin_memory=True, + ) + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + raise NotImplementedError + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None + + def on_save_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def on_load_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() diff --git a/nemo/collections/multimodal/models/dreambooth/__init__.py b/nemo/collections/multimodal/models/dreambooth/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py new file mode 100644 index 000000000000..5c091421c238 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -0,0 +1,503 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import os +import pytorch_lightning as pl +import torch +from abc import ABC +from apex import amp +from apex.contrib.clip_grad import clip_grad_norm_ +from functools import partial +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.utilities import GradClipAlgorithmType +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torch.optim.lr_scheduler import LambdaLR +from typing import Any, Dict, Optional, Union + +from nemo.collections.multimodal.data.dreambooth.dreambooth_dataset import DreamBoothDataset +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ + extract_into_tensor, noise_like +from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( + MegatronPretrainingRandomBatchSampler, +) +from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank +from nemo.core.classes import ModelPT +from nemo.core.classes.common import Serialization +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +try: + from apex.contrib.clip_grad import clip_grad_norm_ + from apex import amp + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.schedules.common import build_model + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( + forward_backward_pipelining_without_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( + _forward_backward_pipelining_with_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def _collate_fn(examples, with_prior_preservation=False): + if with_prior_preservation: + prompts = [ + [example["instance_prompt"], example["reg_prompt"]] + for example in examples + ] + images = [example["instance_images"] for example in examples] + \ + [example["reg_images"] for example in examples] + else: + prompts = [[example["instance_prompt"]] for example in examples] + images = [example["instance_images"] for example in examples] + + images = torch.stack(images) + images = images.to(memory_format=torch.contiguous_format).float() + + return prompts, images + + +class DreamBooth(torch.nn.Module, Serialization): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.with_prior_preservation = self.cfg.with_prior_preservation + self.num_reg_images = self.cfg.data.num_reg_images + self.pretrained_ckpt = self.cfg.pretrained_ckpt + self.prior_loss_weight = self.cfg.prior_loss_weight + self.num_images_per_prompt = self.cfg.data.num_images_per_prompt + + self.train_text_encoder = self.cfg.train_text_encoder + self.instantiate_text_encoder(self.cfg.cond_stage_config) + + self.inductor = self.cfg.inductor + self.inductor_cudagraphs = self.cfg.inductor_cudagraphs + + self.instantiate_vae(self.cfg.first_stage_config) + self.instantiate_unet(self.cfg.unet_config) + + self.scale_factor = self.cfg.scale_factor + self.num_timesteps = self.cfg.noise_scheduler.timesteps + self.parameterization = self.cfg.noise_scheduler.parameterization + self.get_noise_scheduler(self.cfg.noise_scheduler) + + self.rng = torch.Generator(device=torch.cuda.current_device(), ) + + def instantiate_unet(self, cfg): + self.unet = DreamBooth.from_config_dict(cfg) + self.unet.train() + if self.inductor: + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.inductor_cudagraphs + self.unet = optimize("inductor")(self.unet) + + def instantiate_vae(self, cfg): + model = DreamBooth.from_config_dict(cfg) + self.vae = model.eval() + self.vae.train = disabled_train + for param in self.vae.parameters(): + param.requires_grad = False + + def instantiate_text_encoder(self, cfg): + model = DreamBooth.from_config_dict(cfg) + if self.train_text_encoder: + self.text_encoder = model.train() + for param in self.text_encoder.parameters(): + param.requires_grad = True + else: + self.text_encoder = model.eval() + self.text_encoder.train = disabled_train + for param in self.text_encoder.parameters(): + param.requires_grad = False + + def get_noise_scheduler(self, cfg): + model = DreamBooth.from_config_dict(cfg) + self.noise_scheduler = model.eval() + + def forward(self, batch): + x, cond = batch + + latents = self.vae.encode(x).sample().detach() + latents = latents * self.scale_factor + + noise = randn_like(latents, generator=self.rng) + t = torch.randint(0, self.num_timesteps, (latents.shape[0],), generator=self.rng, device=latents.device).long() + x_noisy = self.noise_scheduler(x_start=latents, t=t, noise=noise) + + # cond = self.text_encoder([t[0] for t in batch["prompts"]]) + # if self.with_prior_preservation: + # cond_prior = self.text_encoder([t[1] for t in batch["prompts"]]) + # cond = torch.cat([cond, cond_prior], dim=0) + + model_output = self.unet(x_noisy, t, cond) + + if self.parameterization == "x0": + target = latents + elif self.parameterization == "eps": + target = noise + else: + raise NotImplementedError() + + if self.with_prior_preservation: + model_pred, model_pred_prior = torch.chunk(model_output, 2, dim=0) + target, target_prior = torch.chunk(target, 2, dim=0) + loss = torch.nn.functional.mse_loss(model_pred.float(), target.float(), reduction="mean") + prior_loss = torch.nn.functional.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean") + loss = loss + prior_loss * self.prior_loss_weight + + else: + loss = torch.nn.functional.mse_loss(target.float(), model_output.float(), reduction="mean") + + return loss + + def parameters(self): + params = list(self.unet.parameters()) + if self.train_text_encoder: + # print(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.text_encoder.parameters()) + return params + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + pass + + +class MegatronDreamBooth(MegatronMultimodalModel): + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + self.model = self.model_provider_func() + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = DreamBooth(cfg=self.cfg) + return model + + def forward(self, batch): + output_tensor = self.model(batch) + return output_tensor + + def training_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + # we prepare the micro batches for the apex fwd/bwd function + batch_for_pipeline = self.process_global_batch(batch) + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + losses_reduced_per_micro_batch = forward_backward_no_pipelining( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=False, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + custom_sync_context_handler=None, + sequence_parallel_enabled=False, + sync_batch_comm=False, + ) + + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + ) + return loss_mean + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + batch_for_pipeline = self.process_global_batch(batch) + + losses_reduced_per_micro_batch = forward_backward_no_pipelining( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=True, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + ) + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + val_loss_mean = loss_tensor.mean() + else: + val_loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + self.log(val_loss_mean, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_module_grads(self, module, grads): + for param in module.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def process_global_batch(self, global_batch, global_batch_size=None): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + prompts, images = global_batch + + # DB has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + images = images.cuda(non_blocking=True) + + cond = self.model.text_encoder([t[0] for t in prompts]) + if self.cfg.with_prior_preservation: + cond_prior = self.model.text_encoder([t[1] for t in prompts]) + cond = torch.cat([cond, cond_prior], dim=0) + + return images, cond + + def get_forward_output_and_loss_func(self): + def fwd_output_and_loss_func(batch, model): + batch = [x.cuda(non_blocking=True) for x in batch] + loss = model(batch) + + def dummy(output_tensor): + return loss, {'loss': loss} + + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # Batch size need to be provided for webdatset + self._num_micro_batches = self.cfg.global_batch_size // ( + self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) + self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + + def setup_training_data(self, cfg): + if self.cfg.with_prior_preservation: + if cfg.regularization_dir is None: + raise ValueError("Regularization images must be provided to train with prior preservation loss") + if cfg.regularization_prompt is None: + raise ValueError("Regularization prompts must be provided to train with prior preservation loss") + + train_dataset = DreamBoothDataset( + instance_data_root=cfg.instance_dir, + instance_prompt=cfg.instance_prompt, + reg_data_root=cfg.regularization_dir if self.cfg.with_prior_preservation else None, + reg_prompt=cfg.regularization_prompt if self.cfg.with_prior_preservation else None, + size=cfg.resolution, + center_crop=cfg.center_crop, + ) + + batch_sampler = MegatronPretrainingRandomBatchSampler( + total_samples=len(train_dataset), + consumed_samples=self.compute_consumed_samples(0), + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=False, + ) + + self._train_dl = torch.utils.data.DataLoader( + train_dataset, + # batch_size=self._global_batch_size_on_this_data_parallel_rank, + batch_sampler=batch_sampler, + collate_fn=partial(_collate_fn, with_prior_preservation=self.cfg.with_prior_preservation), + num_workers=cfg.num_workers, + pin_memory=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + pass + + def setup_test_data(self, cfg): + pass + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() diff --git a/nemo/collections/multimodal/models/dreambooth/util.py b/nemo/collections/multimodal/models/dreambooth/util.py new file mode 100644 index 000000000000..be20d9447505 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/util.py @@ -0,0 +1,147 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from functools import partial + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ + extract_into_tensor, noise_like +from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists +from nemo.core.classes.common import Serialization + + +class DiffusionWrapper(torch.nn.Module, Serialization): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + if isinstance(diff_model_config, nn.Module): + self.diffusion_model = diff_model_config + else: + self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + def forward(self, x_noisy, t, cond, return_ids=False): + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + x_recon = self.apply_step(x_noisy, t, **cond) + return x_recon + + def apply_step(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out + + +class sd_noise_scheduler(nn.Module): + def __init__(self, + parameterization='eps', + v_posterior=0, + given_betas=None, + beta_schedule='linear', + timesteps=1000, + linear_start=0.00085, + linear_end=0.012, + cosine_s=8e-3): + super().__init__() + self.parameterization = parameterization + self.v_posterior = v_posterior + self.register_schedule(given_betas=given_betas, + beta_schedule=beta_schedule, + timesteps=timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s) + + def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, + cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( + 1. - alphas_cumprod) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + def forward(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py new file mode 100644 index 000000000000..3a98c756e414 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -0,0 +1,259 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/timothybrooks/instruct-pix2pix/blob/2afcb7e45bd350765f21a58a0c135871e9dc5a78/stable_diffusion/ldm/models/diffusion/ddpm_edit.py +""" + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from contextlib import contextmanager +from einops import rearrange, repeat +from functools import partial +from pytorch_lightning.utilities.distributed import rank_zero_only +from torch.optim.lr_scheduler import LambdaLR +from torchvision.utils import make_grid +from tqdm import tqdm + +from nemo.collections.multimodal.data.instruct_pix2pix.edit_dataset import EditDataset +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion, LatentDiffusion +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( + MegatronPretrainingBatchSampler, + MegatronPretrainingRandomBatchSampler, +) +from nemo.utils import logging + +try: + from apex.transformer import parallel_state + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +class LatentDiffusionEdit(LatentDiffusion): + + def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): + pl_sd = torch.load(path, map_location="cpu") + if "state_dict" in list(pl_sd.keys()): + pl_sd = pl_sd["state_dict"] + sd = {} + + first_key = list(pl_sd.keys())[0] + # State keys of model trained with TorchDynamo changed from + # "model.xxx" to "model._orig_mod.xxx" + for k, v in pl_sd.items(): + new_k = k.replace("._orig_mod", "") + # compatibility for stable diffusion old checkpoint + # remove megatron wrapper prefix + if first_key == "model.betas": + new_k = new_k.lstrip("model.") + sd[new_k] = v + keys = list(sd.keys()) + + # Our model adds additional channels to the first layer to condition on an input image. + # For the first layer, copy existing channel weights and initialize new channel weights to zero. + input_keys = [ + "model.diffusion_model.input_blocks.0.0.weight", + ] + + self_sd = self.state_dict() + for input_key in input_keys: + if input_key not in sd or input_key not in self_sd: + continue + + input_weight = self_sd[input_key] + if input_weight.size() != sd[input_key].size(): + print(f"Manual init: {input_key}") + input_weight.zero_() + input_weight[:, :4, :, :].copy_(sd[input_key]) + ignore_keys.append(input_key) + + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( + sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + @torch.no_grad() + def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, + cond_key=None, return_original_cond=False, bs=None, uncond=0.05): + x = batch[k] + if bs is not None: + x = x[:bs] + + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + cond_key = cond_key or self.cond_stage_key + xc = batch[cond_key] + if bs is not None: + xc["c_crossattn"] = xc["c_crossattn"][:bs] + xc["c_concat"] = xc["c_concat"][:bs] + cond = {} + + # To support classifier-free guidance, randomly drop out only text conditioning 5%, only image conditioning 5%, and both 5%. + random = torch.rand(x.size(0), device=x.device) + prompt_mask = rearrange(random < 2 * uncond, "n -> n 1 1") + input_mask = 1 - rearrange((random >= uncond).float() * (random < 3 * uncond).float(), "n -> n 1 1 1") + + null_prompt = self.get_learned_conditioning([""]) + cond["c_crossattn"] = torch.where(prompt_mask, null_prompt, + self.get_learned_conditioning(xc["c_crossattn"]).detach()) + cond["c_concat"] = input_mask * self.encode_first_stage((xc["c_concat"].to(x.device))).mode().detach() + + out = [z, cond] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + +class MegatronLatentDiffusionEdit(MegatronLatentDiffusion): + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = LatentDiffusionEdit(cfg=self.cfg) + return model + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + # TODO (yuya): set up splits ratio and other params + if self.cfg.data.data_path is not None: + self._train_ds = EditDataset(path=self.cfg.data.data_path, split="train", flip_prob=0.5) + self._validation_ds = EditDataset(path=self.cfg.data.data_path, split="val") + self._test_ds = EditDataset(path=self.cfg.data.data_path, split="test") + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._validation_dl = self.build_pretraining_data_loader( + self._validation_ds, consumed_samples, drop_last + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples, drop_last) + + def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): + """Build dataloader given an input dataset.""" + + if dataset is None: + return None + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self._cfg.data, 'dataloader_type') and self._cfg.data.dataloader_type is not None: + # TODO (yuya): fix this + if self._cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingBatchSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self._cfg.micro_batch_size, + global_batch_size=self._cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + elif self._cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomBatchSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self._cfg.micro_batch_size, + global_batch_size=self._cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + else: + raise Exception(f'{self._cfg.dataloader_type} dataloader type is not supported.') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + # Torch dataloader. + return torch.utils.data.DataLoader( + dataset, batch_sampler=batch_sampler, num_workers=self._cfg.data.num_workers, pin_memory=True, + ) diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py new file mode 100644 index 000000000000..c509162a1870 --- /dev/null +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -0,0 +1,604 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import hashlib +import json +import os +import re +import torch +from omegaconf import open_dict +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator +from pytorch_lightning.trainer.trainer import Trainer +from pytorch_lightning.utilities import rank_zero_only +from pytorch_lightning.utilities.cloud_io import load as pl_load +from pytorch_lightning.utilities.migration import pl_legacy_patch +from transformers import TRANSFORMERS_CACHE +from typing import Any, Union, Dict, Optional + +from nemo.collections.nlp.modules.common.megatron.clip_grads import ( + clip_grad_norm_distributed_optimizer, + clip_grad_norm_fp32, +) +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer, get_tokenizer +from nemo.collections.nlp.parts.nlp_overrides import GradScaler +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.core.classes import ModelPT +from nemo.core.classes.exportable import Exportable +from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler +from nemo.utils import AppState, logging +from nemo.utils.get_rank import is_global_rank_zero + +try: + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +__all__ = ['MultimodalModel', 'MegatronMultimodalModel'] + +NEMO_MULTIMODAL_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_multimodal_tmp") + +os.makedirs(NEMO_MULTIMODAL_TMP, exist_ok=True) + + +class MultimodalModel(ModelPT, Exportable): + """Base class for Multimodal Models. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + + super().__init__(cfg, trainer) + + # handles model parallel save and restore logic + self._save_restore_connector = NLPSaveRestoreConnector() + + # TODO(yuya): check below + @property + def input_module(self): + return self + + @property + def output_module(self): + return self + + @property + def is_model_parallel_initialized(self): + app_state = AppState() + if app_state.model_parallel_group is not None: + return True + else: + return False + + @classmethod + def load_from_checkpoint( + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, + ): + """ + Loads ModelPT from checkpoint, with some maintenance of restoration. + For documentation, please refer to LightningModule.load_from_checkpoin() documentation. + """ + checkpoint = None + try: + cls._set_model_restore_state(is_being_restored=True) + # TODO: replace with proper PTL API + with pl_legacy_patch(): + if map_location is not None: + checkpoint = pl_load(checkpoint_path, map_location=map_location) + else: + checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) + + if hparams_file is not None: + extension = hparams_file.split(".")[-1] + if extension.lower() == "csv": + hparams = load_hparams_from_tags_csv(hparams_file) + elif extension.lower() in ("yml", "yaml"): + hparams = load_hparams_from_yaml(hparams_file) + else: + raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") + + hparams["on_gpu"] = False + + # overwrite hparams by the given file + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams + + # for past checkpoint need to add the new key + if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} + # override the hparams with values that were passed in + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) + # TODO: can we do this without overriding? + config_kwargs = kwargs.copy() + if 'trainer' in config_kwargs: + config_kwargs.pop('trainer') + cfg.update(config_kwargs) + + # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error + if cfg: + if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): + cfg.unet_config.from_pretrained = None + if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): + cfg.first_stage_config.from_pretrained = None + + # compatibility for stable diffusion old checkpoint tweaks + first_key = list(checkpoint['state_dict'].keys())[0] + if first_key == "betas": + # insert "model." into for megatron wrapper + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = "model." + key + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + elif first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids': + # remap state keys from dreambooth + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', "") + new_key = new_key.replace('unet', 'model.diffusion_model') + new_key = new_key.replace('vae', 'first_stage_model') + new_key = new_key.replace('text_encoder', 'cond_stage_model') + new_key = new_key.replace('.noise_scheduler', '') + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + # compatibility for inductor in inference + if not cfg.get('inductor', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if cfg.get('megatron_amp_O2', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('model.', 'model.module.', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) + # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg + + checkpoint = model + + finally: + cls._set_model_restore_state(is_being_restored=False) + return checkpoint + + +class MegatronMultimodalModel(MultimodalModel): + """ + Megatron multimodal base class + It does the following things: + 1. Initialize the model parallel for nemo given the model parallel parameters. + 2. Turn on all the nvidia optimizations. + 3. If using distributed optimizer, configure to be compatible with + O2-level optimizations and/or model parallelism. + 4. Perform gradient clipping: `grad_clip_pl_default` triggers the + PyTorch Lightning default implementation, `with_distributed_adam` + triggers the distributed optimizer's implementation, + `megatron_amp_O2` triggers gradient clipping on the main grads, + and otherwise gradient clipping is performed on the model grads. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + # FIXME: switch to self._cfg + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if trainer is None: + raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_config() + + self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' + + # used in NVIDIA NGC PyTorch containers + self._enable_nvidia_optimizations() + + if self._cfg.get('use_cpu_initialization', False) is False: + torch.cuda.set_device(trainer.local_rank) + + # buffer used during train_step for logging average loss over gradient accumulation steps + self._reduced_loss_buffer = [] + + initialize_model_parallel_for_nemo( + world_size=trainer.world_size, + global_rank=trainer.global_rank, + local_rank=trainer.local_rank, + tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), + pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), + virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), + pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), + micro_batch_size=cfg.get('micro_batch_size'), + global_batch_size=cfg.get('global_batch_size'), + seed=self.cfg.get('seed', 1234), + apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), + ) + + self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False + + if hasattr(self._cfg, "tokenizer") or ( + hasattr(self._cfg, "encoder_tokenizer") and hasattr(self._cfg, "decoder_tokenizer") + ): + # build tokenizer (defaults to nemo supported tokenizers) + self._build_tokenizer() + + # manipulate vocabulary (e.g., pad vocabulary for better efficiency) + self._build_vocab() + + # TODO: remove this when PTL 1.7.3 is released + _FxValidator.functions["configure_gradient_clipping"] = { + "allowed_on_step": (False, True), + "allowed_on_epoch": (False, True), + "default_on_step": True, + "default_on_epoch": False, + } + + def _enable_nvidia_optimizations(self): + "These optimizations are present in NVIDIA NGC PyTorch Containers" + + # NVIDIA container version check + nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) + if nvidia_torch_version is not None: + NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + try: + NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) + except Exception: + NVIDIA_TORCH_MINOR = 0 + + # Apex Persistent layer norm is supported from Nvidia PyTorch container v21.11 + if NVIDIA_TORCH_MAJOR < 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR < 11): + self.cfg.persist_layer_norm = False + + if NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11): + # NVFUSER + torch._C._jit_set_profiling_executor(True) + torch._C._jit_set_profiling_mode(True) + torch._C._jit_override_can_fuse_on_cpu(False) + torch._C._jit_override_can_fuse_on_gpu(False) + torch._C._jit_set_texpr_fuser_enabled(False) + torch._C._jit_set_nvfuser_enabled(True) + torch._C._debug_set_autodiff_subgraph_inlining(False) + + else: + # Not a Nvidia container. NVFUSER Dependency check is on users + pass + + def _build_tokenizer(self): + """ + Default tokenizer is based on available nemo tokenizers. + Override this method to use an external tokenizer. + All tokenizers are expected to provide compatible interface. + Override default Encoder-decoder tokenizer to use legacy=True for sentencepiece. + """ + if hasattr(self._cfg.tokenizer, "sentencepiece_legacy"): + legacy = self._cfg.tokenizer.sentencepiece_legacy + else: + legacy = True if self._cfg.tokenizer.library == 'sentencepiece' else False + self.tokenizer = get_nmt_tokenizer( + library=self._cfg.tokenizer.library, + model_name=self._cfg.tokenizer.type, + tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.model), + vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.vocab_file), + merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.merge_file), + delimiter=self.cfg.tokenizer.get('delimiter', None), + legacy=legacy, + ) + + def _build_vocab(self): + """ + Manipulate vocabulary (e.g., pad vocabulary for increased performance)/ + """ + # TODO: add config to allow to disable it? + self.padded_vocab_size = self._vocab_size_with_padding( + orig_vocab_size=self.tokenizer.vocab_size, + make_vocab_size_divisible_by=self._cfg.get('make_vocab_size_divisible_by', 128), + tensor_model_parallel_size=self._cfg.get('tensor_model_parallel_size', 1), + ) + + def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size): + """Pad vocab size so it is divisible by model parallel size and + still having GPU friendly size.""" + + after = orig_vocab_size + multiple = make_vocab_size_divisible_by * tensor_model_parallel_size + while (after % multiple) != 0: + after += 1 + logging.info( + f'Padded vocab_size: {after}, original vocab_size: {orig_vocab_size}, dummy tokens: {after - orig_vocab_size}.' + ) + return after + + def on_train_start(self) -> None: + super().on_train_start() + self.init_global_step = self.trainer.global_step + + def _get_parameters(self): + """ + private method to load all the trainable parameters from optimizer param groups + """ + params = [] + for param_group in self._optimizer_param_groups: + for param in param_group['params']: + params.append(param) + return params + + def configure_gradient_clipping(self, *args, **kwargs): + """PTL hook to configure gradients. + We use gradient clipping implementation from megatron-lm. + """ + clip_val = self.trainer.gradient_clip_val + if clip_val is None: + return + + clip_val = float(clip_val) + if clip_val <= 0: + return + + if self.grad_clip_pl_default: + # use the default behavior + return super().configure_gradient_clipping(*args, **kwargs) + + if self.with_distributed_adam: + grad_norm = clip_grad_norm_distributed_optimizer(self._optimizer, clip_val) + else: + if self.megatron_amp_O2: + # grep fp32 master parameters for gradient clipping + parameters = self._optimizer.get_parameters() + else: + parameters = self._get_parameters() + grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) + + self.log('grad_norm', grad_norm, rank_zero_only=True) + + def allreduce_gradients(self): + """Reduce gradients across data parallel ranks. + Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/model/distributed.py#L188 + """ + # Bucketize and all-reduce + buckets = {} + for param in self.parameters(): + if param.requires_grad and param.grad is not None: + tp = param.data.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(param) + # param.main_grad = param.grad + + # For each bucket, all-reduce and copy all-reduced grads. + for tp in buckets: + bucket = buckets[tp] + grads = [param.grad.data for param in bucket] + coalesced = torch._utils._flatten_dense_tensors(grads) + coalesced /= parallel_state.get_data_parallel_world_size() + torch.distributed.all_reduce(coalesced, group=parallel_state.get_data_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + def reduce_overlap_gradients(self): + """Reduce grads if overlapped grad sync is enabled + + Used for pipeline parallelism with the distributed Adam + optimizer. In the first pipeline stage, the grad sync is + overlapped with the final backward pass. In other pipeline + stages, the grad sync is deferred until the bubble overhead. + + """ + if self.with_distributed_adam: + self._optimizer.try_grad_sync( + p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + + def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[int] = 0) -> None: + super().on_train_batch_end(outputs, batch, batch_idx) + + # TODO: Replace with newer override for scheduler.step() instead of + # search for plugins for fp16 GradScalar + if self.trainer.precision_plugin is not None and isinstance( + self.trainer.precision_plugin, NativeMixedPrecisionPlugin + ): + precision_plugin = self.trainer.precision_plugin + + if ( + hasattr(precision_plugin, 'scaler') + and precision_plugin.scaler is not None + and isinstance(precision_plugin.scaler, GradScaler) + ): + grad_scaler = precision_plugin.scaler + + # If the grad scaler skipped its optimizer step due to infs/nans, + # decrement the step of all schedulers. + if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True: + scheduler_cfgs = self.trainer.lr_scheduler_configs + + if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization: + return + + for scheduler_cfg in scheduler_cfgs: + # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up + # as well as update the optimizer lr in all param groups + scheduler_cfg.scheduler.last_epoch -= 2 + scheduler_cfg.scheduler.step() + + # Increase the max step count by 1 + + # Reset the optimizer update skipped to `None` - this is to prevent scheduler no-ops during + # accumulated gradient updates. + grad_scaler.optimizer_update_skipped = None + + def setup_optimization( + self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, + ): + optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() + if self.with_distributed_adam: + + # Allocate grads since we are storing between microbatches + optim_kwargs['contiguous_grad_buffer'] = True + + if self.megatron_amp_O2: + # Match param allgather with model dtype + if hasattr(self, 'autocast_dtype'): + optim_kwargs['param_sync_dtype'] = self.autocast_dtype + if self.autocast_dtype == torch.float: + optim_kwargs['store_params'] = False + elif self.autocast_dtype == torch.float16: + optim_kwargs['store_params'] = True + elif self.autocast_dtype == torch.bfloat16: + optim_kwargs['store_params'] = False + optim_kwargs['store_param_remainders'] = True + else: + # Assume FP32 params, so no need to store main params + optim_kwargs['store_params'] = False + + return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) + + def configure_optimizers(self): + self.setup_optimization() + + # Wrap the baseline optimizer with the optimizer class with master parameters + if self.megatron_amp_O2 and not self.with_distributed_adam and self._optimizer is not None: + if self.cfg.precision == 'bf16': + fp32_grad_accum = True + contiguous_grad_bucket = True + elif self.cfg.precision == 16: + fp32_grad_accum = False + # TODO: contiguous grad bucket for fp16 is also planned to be supported + contiguous_grad_bucket = False + raise ValueError( + "fp16 training is not yet supported with O2. Please set megatron_amp_O2 to False in the model config." + ) + + # if using tensor parallel only, we automatically use async grad all-reduce + # if using pipeline parallel or sequence parallel or gradient accumulation fusion, then we disable it + # if self.cfg.get('pipeline_model_parallel_size', 1) == 1 and not ( + # self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) + # ): + # async_grad_allreduce = True + # else: + # async_grad_allreduce = False + + async_grad_allreduce = False + + if async_grad_allreduce: + # we need this to be configurable until make_nccl_premul_sum is in public PyTorch. + # currently cannot be imported in PyTorch 1.12.0 + grad_div_ar_fusion = self.cfg.get('grad_div_ar_fusion', False) + else: + grad_div_ar_fusion = False + + self._optimizer = MainParamsOptimizerWrapper( + self._optimizer, + fp32_grad_accum=fp32_grad_accum, + contiguous_grad_bucket=contiguous_grad_bucket, + async_grad_allreduce=async_grad_allreduce, + grad_div_ar_fusion=grad_div_ar_fusion, + grad_allreduce_chunk_size_mb=self.cfg.get('grad_allreduce_chunk_size_mb', 125), + ) + + assert self._trainer.max_steps is not None, "'max_steps' is missing in trainer config." + if hasattr(self._cfg.optim, 'sched'): + sched_config = self._cfg.optim.sched + sched_config['max_steps'] = self._trainer.max_steps + self._scheduler = prepare_lr_scheduler( + optimizer=self._optimizer, scheduler_config=sched_config, train_dataloader=self._train_dl + ) + + # Configure distributed optimizer + if self.with_distributed_adam: + # Initialize params so that main grads are available + # Note: Consolidate grads without overlap + self._optimizer.init_params( + p for p in self.parameters() if getattr(p, '_disable_overlap_grad_sync', False) + ) + self._optimizer.init_params(self.parameters()) + + if self._scheduler is None: + return self._optimizer + else: + return [self._optimizer], [self._scheduler] + + def compute_consumed_samples(self, steps_since_resume=0): + app_state = AppState() + consumed_samples = ( + self.init_consumed_samples + + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() + ) + return int(consumed_samples) + + def _extract_consumed_samples_from_ckpt(self, ckpt_path): + try: + init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) + except (ValueError, TypeError, IndexError): + logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") + init_consumed_samples = 0 + + return init_consumed_samples + + def _validate_config(self): + """ Certain configurations might be incompatible or discouraged. We can check for them here.""" + + if self.cfg.get('sequence_parallel', False) and self.cfg.get('tensor_model_parallel_size', 1) == 1: + logging.info( + "Sequence parallel should only be used with tensor parallel size > 1. Setting sequence parallel to False" + ) + with open_dict(self.cfg): + self.cfg.sequence_parallel = False + + if ( + self.cfg.get('gradient_accumulation_fusion', False) + and self.cfg.get('pipeline_model_parallel_size', 1) == 1 + ): + logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") + with open_dict(self.cfg): + self.cfg.gradient_accumulation_fusion = False + + if self.cfg.get('gradient_accumulation_fusion', False) and not self.cfg.get('megatron_amp_O2', False): + logging.info("Gradient accumulation fusion can only be used with megatron amp O2 mixed precision.") + with open_dict(self.cfg): + self.cfg.gradient_accumulation_fusion = False + + def is_data_parallel_rank_zero(self): + if is_global_rank_zero(): + return True + else: + try: + data_parallel_rank = parallel_state.get_data_parallel_rank() + except: + data_parallel_rank = None + + if data_parallel_rank is not None and data_parallel_rank == 0: + return True + else: + return False diff --git a/nemo/collections/multimodal/models/stable_diffusion/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py new file mode 100644 index 000000000000..6bda186f137d --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py @@ -0,0 +1,78 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +import torch +from abc import ABC, abstractclassmethod +from typing import Any, Optional + +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.core.classes import ModelPT +from nemo.utils import logging + + +class DiffusionModel(ModelPT, ABC): + @abstractclassmethod + def get_conditioning(self, c: Any) -> Any: + """ + Encode conditioning c. + For txt2img use-case, the input conditioning would be the plain text, + and output would be the encoded embedding for the corresponding text; + For img2img use-case, the input conditioning would be the raw image, + and output would be the corresponding image embedding + + Args: + c: conditioning + + Returns: + encoded conditioning + """ + pass + + @abstractclassmethod + def apply_model(self, x_t: torch.Tensor, t: torch.Tensor, c: Optional[torch.Tensor]) -> torch.Tensor: + """ + Apply Diffusion model. + If c is not given, the model acts as an unconditional diffusion model. + For diffusion model that applies on the pixel space, x_t should be in the pixel space; + for diffusion model that applies on the latent space, x_t is in latent space. + + Args: + x_t: noisy input x at timestamp t + t: timestamp + c: conditioning + + Returns: + Predicted result that has the same shape as x_t + """ + + def on_train_start(self) -> None: + super().on_train_start() + self.init_global_step = self.trainer.global_step + + def _extract_consumed_samples_from_ckpt(self, ckpt_path): + try: + init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) + except (ValueError, TypeError, IndexError): + logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") + init_consumed_samples = 0 + + return init_consumed_samples + + def compute_consumed_samples(self, steps_since_resume=0): + consumed_samples = ( + self.init_consumed_samples + + steps_since_resume * self.trainer.world_size + * self.cfg.micro_batch_size * self.trainer.accumulate_grad_batches + ) + return int(consumed_samples) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py new file mode 100644 index 000000000000..9db8d2271329 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -0,0 +1,551 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytorch_lightning as pl +import torch +import torch.nn.functional as F +from contextlib import contextmanager +from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.model import Encoder, Decoder +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import \ + DiagonalGaussianDistribution +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config + + +class VQModel(pl.LightningModule): + def __init__(self, + ddconfig, + lossconfig, + n_embed, + embed_dim, + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + batch_resize_range=None, + scheduler_config=None, + lr_g_factor=1.0, + remap=None, + sane_index_shape=False, # tell vector quantizer to return indices as bhw + ): + super().__init__() + self.embed_dim = embed_dim + self.n_embed = n_embed + self.image_key = image_key + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + self.loss = instantiate_from_config(lossconfig) + self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, + remap=remap, + sane_index_shape=sane_index_shape) + self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + if colorize_nlabels is not None: + assert type(colorize_nlabels) == int + self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) + if monitor is not None: + self.monitor = monitor + self.batch_resize_range = batch_resize_range + if self.batch_resize_range is not None: + print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.") + + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) + self.scheduler_config = scheduler_config + self.lr_g_factor = lr_g_factor + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.parameters()) + self.model_ema.copy_to(self) + if context is not None: + print(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.parameters()) + if context is not None: + print(f"{context}: Restored training weights") + + def init_from_ckpt(self, path, ignore_keys=list()): + sd = torch.load(path, map_location="cpu")["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + print(f"Unexpected Keys: {unexpected}") + + def on_train_batch_end(self, *args, **kwargs): + if self.use_ema: + self.model_ema(self) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + quant, emb_loss, info = self.quantize(h) + return quant, emb_loss, info + + def encode_to_prequant(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, quant): + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def decode_code(self, code_b): + quant_b = self.quantize.embed_code(code_b) + dec = self.decode(quant_b) + return dec + + def forward(self, input, return_pred_indices=False): + quant, diff, (_, _, ind) = self.encode(input) + dec = self.decode(quant) + if return_pred_indices: + return dec, diff, ind + return dec, diff + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() + if self.batch_resize_range is not None: + lower_size = self.batch_resize_range[0] + upper_size = self.batch_resize_range[1] + if self.global_step <= 4: + # do the first few batches with max size to avoid later oom + new_resize = upper_size + else: + new_resize = np.random.choice(np.arange(lower_size, upper_size + 16, 16)) + if new_resize != x.shape[2]: + x = F.interpolate(x, size=new_resize, mode="bicubic") + x = x.detach() + return x + + def training_step(self, batch, batch_idx, optimizer_idx): + # https://github.com/pytorch/pytorch/issues/37142 + # try not to fool the heuristics + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + + if optimizer_idx == 0: + # autoencode + aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train", + predicted_indices=ind) + + self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return aeloss + + if optimizer_idx == 1: + # discriminator + discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train") + self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return discloss + + def validation_step(self, batch, batch_idx): + log_dict = self._validation_step(batch, batch_idx) + with self.ema_scope(): + log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema") + return log_dict + + def _validation_step(self, batch, batch_idx, suffix=""): + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind + ) + + discloss, log_dict_disc = self.loss(qloss, x, xrec, 1, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind + ) + rec_loss = log_dict_ae[f"val{suffix}/rec_loss"] + self.log(f"val{suffix}/rec_loss", rec_loss, + prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) + self.log(f"val{suffix}/aeloss", aeloss, + prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) + if version.parse(pl.__version__) >= version.parse('1.4.0'): + del log_dict_ae[f"val{suffix}/rec_loss"] + self.log_dict(log_dict_ae) + self.log_dict(log_dict_disc) + return self.log_dict + + def configure_optimizers(self): + lr_d = self.learning_rate + lr_g = self.lr_g_factor * self.learning_rate + print("lr_d", lr_d) + print("lr_g", lr_g) + opt_ae = torch.optim.Adam(list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quantize.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr_g, betas=(0.5, 0.9)) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), + lr=lr_d, betas=(0.5, 0.9)) + + if self.scheduler_config is not None: + scheduler = instantiate_from_config(self.scheduler_config) + + print("Setting up LambdaLR scheduler...") + scheduler = [ + { + 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }, + { + 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }, + ] + return [opt_ae, opt_disc], scheduler + return [opt_ae, opt_disc], [] + + def get_last_layer(self): + return self.decoder.conv_out.weight + + def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): + log = dict() + x = self.get_input(batch, self.image_key) + x = x.to(self.device) + if only_inputs: + log["inputs"] = x + return log + xrec, _ = self(x) + if x.shape[1] > 3: + # colorize with random projection + assert xrec.shape[1] > 3 + x = self.to_rgb(x) + xrec = self.to_rgb(xrec) + log["inputs"] = x + log["reconstructions"] = xrec + if plot_ema: + with self.ema_scope(): + xrec_ema, _ = self(x) + if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema) + log["reconstructions_ema"] = xrec_ema + return log + + def to_rgb(self, x): + assert self.image_key == "segmentation" + if not hasattr(self, "colorize"): + self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) + x = F.conv2d(x, weight=self.colorize) + x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + return x + + +class VQModelInterface(VQModel): + def __init__(self, embed_dim, *args, **kwargs): + super().__init__(embed_dim=embed_dim, *args, **kwargs) + self.embed_dim = embed_dim + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + +class AutoencoderKL(pl.LightningModule): + def __init__(self, + ddconfig, + embed_dim, + lossconfig=None, # TODO make it configurable + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + from_pretrained: str = None + ): + super().__init__() + self.image_key = image_key + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + self.loss = torch.nn.Identity() # instantiate_from_config(lossconfig) + assert ddconfig["double_z"] + self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + self.embed_dim = embed_dim + if colorize_nlabels is not None: + assert type(colorize_nlabels) == int + self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) + if monitor is not None: + self.monitor = monitor + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) + from diffusers.modeling_utils import load_state_dict + if from_pretrained is not None: + state_dict = load_state_dict(from_pretrained) + self._load_pretrained_model(state_dict) + + def _state_key_mapping(self, state_dict: dict): + import re + res_dict = {} + key_list = state_dict.keys() + key_str = " ".join(key_list) + up_block_pattern = re.compile('upsamplers') + p1 = re.compile('mid.block_[0-9]') + p2 = re.compile('decoder.up.[0-9]') + up_blocks_count = int(len(re.findall(up_block_pattern, key_str)) / 2 + 1) + for key_, val_ in state_dict.items(): + key_ = key_.replace("up_blocks", "up").replace("down_blocks", "down").replace('resnets', 'block') \ + .replace('mid_block', 'mid').replace("mid.block.", "mid.block_") \ + .replace('mid.attentions.0.key', 'mid.attn_1.k') \ + .replace('mid.attentions.0.query', 'mid.attn_1.q') \ + .replace('mid.attentions.0.value', 'mid.attn_1.v') \ + .replace('mid.attentions.0.group_norm', 'mid.attn_1.norm') \ + .replace('mid.attentions.0.proj_attn', 'mid.attn_1.proj_out') \ + .replace('upsamplers.0', 'upsample') \ + .replace('downsamplers.0', 'downsample') \ + .replace('conv_shortcut', 'nin_shortcut') \ + .replace('conv_norm_out', 'norm_out') + + mid_list = re.findall(p1, key_) + if len(mid_list) != 0: + mid_str = mid_list[0] + mid_id = int(mid_str[-1]) + 1 + key_ = key_.replace(mid_str, mid_str[:-1] + str(mid_id)) + + up_list = re.findall(p2, key_) + if len(up_list) != 0: + up_str = up_list[0] + up_id = up_blocks_count - 1 - int(up_str[-1]) + key_ = key_.replace(up_str, up_str[:-1] + str(up_id)) + res_dict[key_] = val_ + return res_dict + + def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False): + state_dict = self._state_key_mapping(state_dict) + model_state_dict = self.state_dict() + loaded_keys = [k for k in state_dict.keys()] + expected_keys = list(model_state_dict.keys()) + original_loaded_keys = loaded_keys + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + def _find_mismatched_keys( + state_dict, + model_state_dict, + loaded_keys, + ignore_mismatched_sizes, + ): + mismatched_keys = [] + if ignore_mismatched_sizes: + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + + if ( + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + ): + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + if state_dict is not None: + # Whole checkpoint + mismatched_keys = _find_mismatched_keys( + state_dict, + model_state_dict, + original_loaded_keys, + ignore_mismatched_sizes, + ) + error_msgs = self._load_state_dict_into_model(state_dict) + return missing_keys, unexpected_keys, mismatched_keys, error_msgs + + def _load_state_dict_into_model(self, state_dict): + # Convert old format to new format if needed from a PyTorch state_dict + # copy state_dict so _load_from_state_dict can modify it + state_dict = state_dict.copy() + error_msgs = [] + + # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants + # so we need to apply the function recursively. + def load(module: torch.nn.Module, prefix=""): + args = (state_dict, prefix, {}, True, [], [], error_msgs) + module._load_from_state_dict(*args) + + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + ".") + + load(self) + + return error_msgs + + def init_from_ckpt(self, path, ignore_keys=list()): + sd = torch.load(path, map_location="cpu")["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + self.load_state_dict(sd, strict=False) + print(f"Restored from {path}") + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, input, sample_posterior=True): + posterior = self.encode(input) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec, posterior + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() + return x + + def training_step(self, batch, batch_idx, optimizer_idx): + inputs = self.get_input(batch, self.image_key) + reconstructions, posterior = self(inputs) + + if optimizer_idx == 0: + # train encoder+decoder+logvar + aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train") + self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) + self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False) + return aeloss + + if optimizer_idx == 1: + # train the discriminator + discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train") + + self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) + self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False) + return discloss + + def validation_step(self, batch, batch_idx): + inputs = self.get_input(batch, self.image_key) + reconstructions, posterior = self(inputs) + aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, 0, self.global_step, + last_layer=self.get_last_layer(), split="val") + + discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, 1, self.global_step, + last_layer=self.get_last_layer(), split="val") + + self.log("val/rec_loss", log_dict_ae["val/rec_loss"]) + self.log_dict(log_dict_ae) + self.log_dict(log_dict_disc) + return self.log_dict + + def configure_optimizers(self): + lr = self.learning_rate + opt_ae = torch.optim.Adam(list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr, betas=(0.5, 0.9)) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), + lr=lr, betas=(0.5, 0.9)) + return [opt_ae, opt_disc], [] + + def get_last_layer(self): + return self.decoder.conv_out.weight + + @torch.no_grad() + def log_images(self, batch, only_inputs=False, **kwargs): + log = dict() + x = self.get_input(batch, self.image_key) + x = x.to(self.device) + if not only_inputs: + xrec, posterior = self(x) + if x.shape[1] > 3: + # colorize with random projection + assert xrec.shape[1] > 3 + x = self.to_rgb(x) + xrec = self.to_rgb(xrec) + log["samples"] = self.decode(torch.randn_like(posterior.sample())) + log["reconstructions"] = xrec + log["inputs"] = x + return log + + def to_rgb(self, x): + assert self.image_key == "segmentation" + if not hasattr(self, "colorize"): + self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) + x = F.conv2d(x, weight=self.colorize) + x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + return x + + +class IdentityFirstStage(torch.nn.Module): + def __init__(self, *args, vq_interface=False, **kwargs): + self.vq_interface = vq_interface # TODO: Should be true by default but check to not break older stuff + super().__init__() + + def encode(self, x, *args, **kwargs): + return x + + def decode(self, x, *args, **kwargs): + return x + + def quantize(self, x, *args, **kwargs): + if self.vq_interface: + return x, None, [None, None, None] + return x + + def forward(self, x, *args, **kwargs): + return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py new file mode 100644 index 000000000000..8227ed00f37b --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -0,0 +1,1769 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from contextlib import contextmanager +from einops import rearrange, repeat +from functools import partial +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.utilities.distributed import rank_zero_only +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torch.optim.lr_scheduler import LambdaLR +from torchvision.utils import make_grid +from tqdm import tqdm +from typing import Any, Dict, Optional, Union + +from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import build_train_valid_datasets, \ + build_train_valid_precached_datasets +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import VQModelInterface, IdentityFirstStage, \ + AutoencoderKL +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ + extract_into_tensor, noise_like +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import normal_kl, \ + DiagonalGaussianDistribution +from nemo.collections.multimodal.parts.stable_diffusion.utils import log_txt_as_img, exists, default, ismap, isimage, \ + mean_flat, count_params +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import Serialization +from nemo.utils import logging + +try: + from apex.contrib.clip_grad import clip_grad_norm_ + from apex import amp + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.schedules.common import build_model + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( + forward_backward_pipelining_without_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( + _forward_backward_pipelining_with_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +__conditioning_keys__ = {'concat': 'c_concat', + 'crossattn': 'c_crossattn', + 'adm': 'y'} + + +def random_dropout(embeddings, drop_rate): + r""" + Function to perform random dropout for embeddings. + When we drop embeddings, we zero them out. + Args: + embeddings (tensor): Input embeddings + drop_rate (float): Rate of dropping the embedding. + """ + nsamples = embeddings.shape[0] + zero_flag = torch.ones( + nsamples, 1, 1 + ).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.bernoulli(zero_flag).cuda(non_blocking=True) + embeddings = embeddings * zero_flag + return embeddings + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def uniform_on_device(r1, r2, shape, device): + return (r1 - r2) * torch.rand(*shape, device=device) + r2 + + +class DDPM(torch.nn.Module): + + def __init__(self, cfg): + super().__init__() + assert cfg.parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' + self.parameterization = cfg.parameterization + logging.info(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") + self.cond_stage_model = None + self.clip_denoised = cfg.clip_denoised + self.log_every_t = cfg.log_every_t + self.first_stage_key = cfg.first_stage_key + self.image_size = cfg.image_size # try conv? + self.channels = cfg.channels + self.use_positional_encodings = cfg.use_positional_encodings + self.model = DiffusionWrapper(cfg.unet_config, cfg.conditioning_key) + count_params(self.model, verbose=True) + + self.v_posterior = cfg.v_posterior + self.original_elbo_weight = cfg.original_elbo_weight + self.l_simple_weight = cfg.l_simple_weight + + self.register_schedule(given_betas=cfg.given_betas, beta_schedule=cfg.beta_schedule, timesteps=cfg.timesteps, + linear_start=cfg.linear_start, linear_end=cfg.linear_end, cosine_s=cfg.cosine_s) + + self.loss_type = cfg.loss_type + + self.learn_logvar = cfg.learn_logvar + self.logvar = torch.full(fill_value=cfg.logvar_init, size=(self.num_timesteps,)) + if self.learn_logvar: + self.logvar = nn.Parameter(self.logvar, requires_grad=True) + + self.rng = torch.Generator(device=torch.cuda.current_device(), ) + + def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, + cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( + 1. - alphas_cumprod) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): + pl_sd = torch.load(path, map_location="cpu") + if "state_dict" in list(pl_sd.keys()): + pl_sd = pl_sd["state_dict"] + + sd = {} + first_key = list(pl_sd.keys())[0] + # State keys of model trained with TorchDynamo changed from + # "model.xxx" to "model._orig_mod.xxx" + for k, v in pl_sd.items(): + new_k = k.replace("._orig_mod", "") + # compatibility for stable diffusion old checkpoint + # remove megatron wrapper prefix + if first_key == "model.betas": + new_k = new_k.lstrip("model.") + sd[new_k] = v + + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + logging.info("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( + sd, strict=False) + logging.info(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + logging.info(f"Missing Keys: {missing}") + if len(unexpected) > 0: + logging.info(f"Unexpected Keys: {unexpected}") + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool): + model_out = self.model(x, t) + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + if clip_denoised: + x_recon.clamp_(-1., 1.) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_loop(self, shape, return_intermediates=False): + device = self.betas.device + b = shape[0] + img = torch.randn(shape, generator=self.rng, device=device) + intermediates = [img] + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): + img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long), + clip_denoised=self.clip_denoised) + if i % self.log_every_t == 0 or i == self.num_timesteps - 1: + intermediates.append(img) + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, batch_size=16, return_intermediates=False): + image_size = self.image_size + channels = self.channels + return self.p_sample_loop((batch_size, channels, image_size, image_size), + return_intermediates=return_intermediates) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def get_loss(self, pred, target, mean=True): + if self.loss_type == 'l1': + loss = (target - pred).abs() + if mean: + loss = loss.mean() + elif self.loss_type == 'l2': + if mean: + loss = torch.nn.functional.mse_loss(target, pred) + else: + loss = torch.nn.functional.mse_loss(target, pred, reduction='none') + else: + raise NotImplementedError("unknown loss type '{loss_type}'") + + return loss + + def p_losses(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_out = self.model(x_noisy, t) + + loss_dict = {} + if self.parameterization == "eps": + target = noise + elif self.parameterization == "x0": + target = x_start + else: + raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") + + loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) + + log_prefix = 'train' if self.training else 'val' + + loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) + loss_simple = loss.mean() * self.l_simple_weight + + loss_vlb = (self.lvlb_weights[t] * loss).mean() + loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) + + loss = loss_simple + self.original_elbo_weight * loss_vlb + + loss_dict.update({f'{log_prefix}/loss': loss}) + + return loss, loss_dict + + def forward(self, x, *args, **kwargs): + # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size + # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' + t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() + return self.p_losses(x, t, *args, **kwargs) + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = rearrange(x, 'b h w c -> b c h w') + x = x.to(memory_format=torch.contiguous_format) + return x + + def shared_step(self, batch): + x = self.get_input(batch, self.first_stage_key) + loss, loss_dict = self(x) + return loss, loss_dict + + def _get_rows_from_list(self, samples): + n_imgs_per_row = len(samples) + denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): + log = dict() + x = self.get_input(batch, self.first_stage_key) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + x = x[:N] + log["inputs"] = x + + # get diffusion row + diffusion_row = list() + x_start = x[:n_row] + + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.long() + noise = randn_like(x_start, generator=self.rng) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + diffusion_row.append(x_noisy) + + log["diffusion_row"] = self._get_rows_from_list(diffusion_row) + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) + + log["samples"] = samples + log["denoise_row"] = self._get_rows_from_list(denoise_row) + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + +class LatentDiffusion(DDPM, Serialization): + """main class""" + + def __init__(self, cfg): + self.num_timesteps_cond = default(cfg.num_timesteps_cond, 1) + self.scale_by_std = cfg.scale_by_std + assert self.num_timesteps_cond <= cfg.timesteps + # for backwards compatibility after implementation of DiffusionWrapper + if cfg.conditioning_key is None: + conditioning_key = 'concat' if cfg.concat_mode else 'crossattn' + else: + conditioning_key = cfg.conditioning_key + if cfg.cond_stage_config == '__is_unconditional__': + conditioning_key = None + ckpt_path = cfg.ckpt_path + ignore_keys = cfg.ignore_keys + cfg.conditioning_key = conditioning_key + super().__init__(cfg=cfg) + self.concat_mode = cfg.concat_mode + self.cond_stage_trainable = cfg.cond_stage_trainable + self.cond_stage_key = cfg.cond_stage_key + + self.num_downs = 0 + if "ddconfig" in cfg.first_stage_config and "ch_mult" in cfg.first_stage_config.ddconfig: + self.num_downs = len(cfg.first_stage_config.ddconfig.ch_mult) - 1 + if not cfg.scale_by_std: + self.scale_factor = cfg.scale_factor + else: + self.register_buffer('scale_factor', torch.tensor(cfg.scale_factor)) + self.instantiate_first_stage(cfg.first_stage_config) + self.instantiate_cond_stage(cfg.cond_stage_config) + self.cond_stage_forward = cfg.cond_stage_forward + self.clip_denoised = False + self.bbox_tokenizer = None + self.text_embedding_dropout_rate = cfg.text_embedding_dropout_rate + self.fused_opt = cfg.fused_opt + + self.restarted_from_ckpt = False + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys) + self.restarted_from_ckpt = True + + # Fusing VAE and CLIP doesn't give benefit + if cfg.get("inductor", False): + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) + self.model = optimize("inductor")(self.model) + + def make_cond_schedule(self, ): + self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) + ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() + self.cond_ids[:self.num_timesteps_cond] = ids + + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + # only for very first batch + # set rescale weight to 1./std of encodings + logging.info("### USING STD-RESCALING ###") + x = super().get_input(batch, self.first_stage_key) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + del self.scale_factor + self.register_buffer('scale_factor', 1. / z.flatten().std()) + logging.info(f"setting self.scale_factor to {self.scale_factor}") + logging.info("### USING STD-RESCALING ###") + + def register_schedule(self, + given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) + + self.shorten_cond_schedule = self.num_timesteps_cond > 1 + if self.shorten_cond_schedule: + self.make_cond_schedule() + + def instantiate_first_stage(self, config): + model = LatentDiffusion.from_config_dict(config) + self.first_stage_model = model.eval() + self.first_stage_model.train = disabled_train + for param in self.first_stage_model.parameters(): + param.requires_grad = False + + def instantiate_cond_stage(self, config): + if not self.cond_stage_trainable: + if config == "__is_first_stage__": + logging.info("Using first stage also as cond stage.") + self.cond_stage_model = self.first_stage_model + elif config == "__is_unconditional__": + logging.info(f"Training {self.__class__.__name__} as an unconditional model.") + self.cond_stage_model = None + # self.be_unconditional = True + else: + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model.eval() + self.cond_stage_model.train = disabled_train + for param in self.cond_stage_model.parameters(): + param.requires_grad = False + else: + assert config != '__is_first_stage__' + assert config != '__is_unconditional__' + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model + + def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): + denoise_row = [] + for zd in tqdm(samples, desc=desc): + denoise_row.append(self.decode_first_stage(zd, + force_not_quantize=force_no_decoder_quantization)) + n_imgs_per_row = len(denoise_row) + denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W + denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") + return self.scale_factor * z + + def get_learned_conditioning(self, c): + if self.cond_stage_forward is None: + if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): + c = self.cond_stage_model.encode(c) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + else: + c = self.cond_stage_model(c) + else: + assert hasattr(self.cond_stage_model, self.cond_stage_forward) + c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) + return c + + def meshgrid(self, h, w): + y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) + x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) + + arr = torch.cat([y, x], dim=-1) + return arr + + def delta_border(self, h, w): + """ + :param h: height + :param w: width + :return: normalized distance to image border, + wtith min distance = 0 at border and max dist = 0.5 at image center + """ + lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) + arr = self.meshgrid(h, w) / lower_right_corner + dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] + dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] + edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] + return edge_dist + + def get_weighting(self, h, w, Ly, Lx, device): + weighting = self.delta_border(h, w) + weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"], + self.split_input_params["clip_max_weight"], ) + weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) + + if self.split_input_params["tie_braker"]: + L_weighting = self.delta_border(Ly, Lx) + L_weighting = torch.clip(L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"]) + + L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) + weighting = weighting * L_weighting + return weighting + + def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code + """ + :param x: img of size (bs, c, h, w) + :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) + """ + bs, nc, h, w = x.shape + + # number of crops in image + Ly = (h - kernel_size[0]) // stride[0] + 1 + Lx = (w - kernel_size[1]) // stride[1] + 1 + + if uf == 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) + + weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) + + elif uf > 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, padding=0, + stride=(stride[0] * uf, stride[1] * uf)) + fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) + + elif df > 1 and uf == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, padding=0, + stride=(stride[0] // df, stride[1] // df)) + fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) + + else: + raise NotImplementedError + + return fold, unfold, normalization, weighting + + @torch.no_grad() + def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, + cond_key=None, return_original_cond=False, bs=None): + if self.first_stage_key.endswith('encoded'): + gaussian_parameters = batch[self.first_stage_key] + encoder_posterior = DiagonalGaussianDistribution(gaussian_parameters) + else: + x = super().get_input(batch, k) + if bs is not None: + x = x[:bs] + + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + + if self.model.conditioning_key is not None: + if cond_key is None: + cond_key = self.cond_stage_key + if cond_key != self.first_stage_key: + if cond_key in ['captions', 'coordinates_bbox', 'txt'] or cond_key.endswith("encoded"): + xc = batch[cond_key] + elif cond_key == 'class_label': + xc = batch + else: + xc = super().get_input(batch, cond_key) + else: + xc = x + if (not self.cond_stage_trainable or force_c_encode) and (not cond_key.endswith('encoded')): + if isinstance(xc, dict) or isinstance(xc, list): + # import pudb; pudb.set_trace() + c = self.get_learned_conditioning(xc) + else: + c = self.get_learned_conditioning(xc) + else: + c = xc + if bs is not None: + c = c[:bs] + + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + ckey = __conditioning_keys__[self.model.conditioning_key] + c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} + + if self.text_embedding_dropout_rate > 0: + assert (self.text_embedding_dropout_rate < 1.) + c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) + + else: + c = None + xc = None + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + c = {'pos_x': pos_x, 'pos_y': pos_y} + out = [z, c] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + # same as above but without decorator + def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + @torch.no_grad() + def encode_first_stage(self, x): + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + df = self.split_input_params["vqf"] + self.split_input_params['original_image_size'] = x.shape[-2:] + bs, nc, h, w = x.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) + z = unfold(x) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) + o = o * weighting + + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization + return decoded + + else: + return self.first_stage_model.encode(x) + else: + return self.first_stage_model.encode(x) + + def shared_step(self, batch, **kwargs): + x, c = self.get_input(batch, self.first_stage_key) + loss = self(x, c) + return loss + + def forward(self, x, c, *args, **kwargs): + t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() + if self.model.conditioning_key is not None: + assert c is not None + if self.cond_stage_trainable: + c = self.get_learned_conditioning(c) + if self.shorten_cond_schedule: # TODO: drop this option + tc = self.cond_ids[t] + c = self.q_sample(x_start=c, t=tc, noise=randn_like(c.float(), generator=self.rng)) + return self.p_losses(x, c, t, *args, **kwargs) + + def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset + def rescale_bbox(bbox): + x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2]) + y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3]) + w = min(bbox[2] / crop_coordinates[2], 1 - x0) + h = min(bbox[3] / crop_coordinates[3], 1 - y0) + return x0, y0, w, h + + return [rescale_bbox(b) for b in bboxes] + + def apply_model(self, x_noisy, t, cond, return_ids=False): + + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + for key in cond: + if not isinstance(cond[key], list): + cond[key] = [cond[key]] + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + + if hasattr(self, "split_input_params"): + assert len(cond) == 1 # todo can only deal with one conditioning atm + assert not return_ids + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + + h, w = x_noisy.shape[-2:] + + fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) + + z = unfold(x_noisy) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] + + if self.cond_stage_key in ["image", "LR_image", "segmentation", + 'bbox_img'] and self.model.conditioning_key: # todo check for completeness + c_key = next(iter(cond.keys())) # get key + c = next(iter(cond.values())) # get value + assert (len(c) == 1) # todo extend to list with more than one elem + c = c[0] # get element + + c = unfold(c) + c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] + + elif self.cond_stage_key == 'coordinates_bbox': + assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size' + + # assuming padding of unfold is always 0 and its dilation is always 1 + n_patches_per_row = int((w - ks[0]) / stride[0] + 1) + full_img_h, full_img_w = self.split_input_params['original_image_size'] + # as we are operating on latents, we need the factor from the original image size to the + # spatial latent size to properly rescale the crops for regenerating the bbox annotations + num_downs = self.first_stage_model.encoder.num_resolutions - 1 + rescale_latent = 2 ** (num_downs) + + # get top left postions of patches as conforming for the bbbox tokenizer, therefore we + # need to rescale the tl patch coordinates to be in between (0,1) + tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h) + for patch_nr in range(z.shape[-1])] + + # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) + patch_limits = [(x_tl, y_tl, + rescale_latent * ks[0] / full_img_w, + rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates] + # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] + + # tokenize crop coordinates for the bounding boxes of the respective patches + patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None] + for bbox in patch_limits] # list of length l with tensors of shape (1, 2) + logging.info(patch_limits_tknzd[0].shape) + # cut tknzd crop position from conditioning + assert isinstance(cond, dict), 'cond must be dict to be fed into model' + cut_cond = cond['c_crossattn'][0][..., :-2] + logging.info(cut_cond.shape) + + adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) + adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') + logging.info(adapted_cond.shape) + adapted_cond = self.get_learned_conditioning(adapted_cond) + logging.info(adapted_cond.shape) + adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) + logging.info(adapted_cond.shape) + + cond_list = [{'c_crossattn': [e]} for e in adapted_cond] + + else: + cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient + + # apply model by loop over crops + output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] + assert not isinstance(output_list[0], + tuple) # todo cant deal with multiple model outputs check this never happens + + o = torch.stack(output_list, axis=-1) + o = o * weighting + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + x_recon = fold(o) / normalization + + else: + x_recon = self.model(x_noisy, t, **cond) + + if isinstance(x_recon, tuple) and not return_ids: + return x_recon[0] + else: + return x_recon + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \ + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + This term can't be optimized, as it only depends on the encoder. + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def p_losses(self, x_start, cond, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_output = self.apply_model(x_noisy, t, cond) + + loss_dict = {} + prefix = 'train' if self.training else 'val' + + if self.parameterization == "x0": + target = x_start + elif self.parameterization == "eps": + target = noise + else: + raise NotImplementedError() + + loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) + loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) + self.logvar = self.logvar.cuda(non_blocking=True) + logvar_t = self.logvar[t].cuda(non_blocking=True) + loss = loss_simple / torch.exp(logvar_t) + logvar_t + # loss = loss_simple / torch.exp(self.logvar) + self.logvar + if self.learn_logvar: + loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) + loss_dict.update({'logvar': self.logvar.data.mean()}) + + loss = self.l_simple_weight * loss.mean() + + loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) + loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() + loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) + loss += (self.original_elbo_weight * loss_vlb) + loss_dict.update({f'{prefix}/loss': loss}) + + return loss, loss_dict + + def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False, + return_x0=False, score_corrector=None, corrector_kwargs=None): + t_in = t + model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) + + if score_corrector is not None: + assert self.parameterization == "eps" + model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) + + if return_codebook_ids: + model_out, logits = model_out + + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + else: + raise NotImplementedError() + + if clip_denoised: + x_recon.clamp_(-1., 1.) + if quantize_denoised: + x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + if return_codebook_ids: + return model_mean, posterior_variance, posterior_log_variance, logits + elif return_x0: + return model_mean, posterior_variance, posterior_log_variance, x_recon + else: + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, + return_codebook_ids=False, quantize_denoised=False, return_x0=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None): + b, *_, device = *x.shape, x.device + outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if return_codebook_ids: + raise DeprecationWarning("Support dropped.") + model_mean, _, model_log_variance, logits = outputs + elif return_x0: + model_mean, _, model_log_variance, x0 = outputs + else: + model_mean, _, model_log_variance = outputs + + noise = noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + + if return_codebook_ids: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) + if return_x0: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 + else: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False, + img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0., + score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None, + log_every_t=None): + if not log_every_t: + log_every_t = self.log_every_t + timesteps = self.num_timesteps + if batch_size is not None: + b = batch_size if batch_size is not None else shape[0] + shape = [batch_size] + list(shape) + else: + b = batch_size = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.rng, device=torch.cuda.current_device()) + else: + img = x_T + intermediates = [] + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', + total=timesteps) if verbose else reversed( + range(0, timesteps)) + if type(temperature) == float: + temperature = [temperature] * timesteps + + for i in iterator: + ts = torch.full((b,), i, device=torch.cuda.current_device(), dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) + + img, x0_partial = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, return_x0=True, + temperature=temperature[i], noise_dropout=noise_dropout, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if mask is not None: + assert x0 is not None + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(x0_partial) + if callback: callback(i) + if img_callback: img_callback(img, i) + return img, intermediates + + @torch.no_grad() + def p_sample_loop(self, cond, shape, return_intermediates=False, + x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False, + mask=None, x0=None, img_callback=None, start_T=None, + log_every_t=None): + + if not log_every_t: + log_every_t = self.log_every_t + device = self.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.rng, device=device) + else: + img = x_T + + intermediates = [img] + if timesteps is None: + timesteps = self.num_timesteps + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed( + range(0, timesteps)) + + if mask is not None: + assert x0 is not None + assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match + + for i in iterator: + ts = torch.full((b,), i, device=device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) + + img = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised) + if mask is not None: + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(img) + if callback: callback(i) + if img_callback: img_callback(img, i) + + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None, + verbose=True, timesteps=None, quantize_denoised=False, + mask=None, x0=None, shape=None, **kwargs): + if shape is None: + shape = (batch_size, self.channels, self.image_size, self.image_size) + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + return self.p_sample_loop(cond, + shape, + return_intermediates=return_intermediates, x_T=x_T, + verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised, + mask=mask, x0=x0) + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + + if ddim: + ddim_sampler = DDIMSampler(self) + shape = (self.channels, self.image_size, self.image_size) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, + shape, cond, verbose=False, **kwargs) + + else: + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, + return_intermediates=True, **kwargs) + + return samples, intermediates + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None, + quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, + plot_diffusion_rows=True, **kwargs): + + use_ddim = ddim_steps is not None + + log = dict() + z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + log["inputs"] = x + log["reconstruction"] = xrec + if self.model.conditioning_key is not None: + if hasattr(self.cond_stage_model, "decode"): + xc = self.cond_stage_model.decode(c) + log["conditioning"] = xc + elif self.cond_stage_key in ["caption"]: + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) + log["conditioning"] = xc + elif self.cond_stage_key == 'class_label': + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) + log['conditioning'] = xc + elif isimage(xc): + log["conditioning"] = xc + if ismap(xc): + log["original_conditioning"] = self.to_rgb(xc) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.long() + noise = randn_like(z_start, generator=self.rng) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance( + self.first_stage_model, IdentityFirstStage): + # also display when quantizing x0 while sampling + with self.ema_scope("Plotting Quantized Denoised"): + samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta, + quantize_denoised=True) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, + # quantize_denoised=True) + x_samples = self.decode_first_stage(samples) + log["samples_x0_quantized"] = x_samples + + if inpaint: + # make a simple center square + b, h, w = z.shape[0], z.shape[2], z.shape[3] + mask = torch.ones(N, h, w) + # zeros will be filled in + mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0. + mask = mask[:, None, ...] + with self.ema_scope("Plotting Inpaint"): + samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples) + log["samples_inpainting"] = x_samples + log["mask"] = mask + + # outpaint + with self.ema_scope("Plotting Outpaint"): + samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples) + log["samples_outpainting"] = x_samples + + if plot_progressive_rows: + with self.ema_scope("Plotting Progressives"): + img, progressives = self.progressive_denoising(c, + shape=(self.channels, self.image_size, self.image_size), + batch_size=N) + prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") + log["progressive_row"] = prog_row + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def parameters(self): + params = list(self.model.parameters()) + if self.cond_stage_trainable: + logging.info(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.cond_stage_model.parameters()) + if self.learn_logvar: + logging.info('Diffusion model optimizing logvar') + params.append(self.logvar) + return params + + @torch.no_grad() + def to_rgb(self, x): + x = x.float() + if not hasattr(self, "colorize"): + self.colorize = torch.randn(3, x.shape[1], 1, 1, generator=self.rng).to(x) + x = nn.functional.conv2d(x, weight=self.colorize) + x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + return x + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # only required for pipeline parallelism + pass + + +class MegatronLatentDiffusion(MegatronMultimodalModel): + """Megatron LatentDiffusion Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + self.conditioning_keys = [] + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = LatentDiffusion(cfg=self.cfg) + return model + + def forward(self, x, c, *args, **kwargs): + output_tensor = self.model(x, c, *args, **kwargs) + return output_tensor + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: + assert self.cfg.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = \ + batch[self.cfg.first_stage_key].cuda(non_blocking=True) + self.model.on_train_batch_start(batch, batch_idx) + + def training_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + # we prepare the micro batches for the apex fwd/bwd function + batch_for_pipeline = self.process_global_batch(batch) + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + losses_reduced_per_micro_batch = forward_backward_no_pipelining( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=False, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + custom_sync_context_handler=None, + sequence_parallel_enabled=False, + sync_batch_comm=False, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale) + + self.log_dict(loss_dict, prog_bar=False, + logger=True, on_step=True, rank_zero_only=True) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_module_grads(self, module, grads): + for param in module.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def get_forward_output_and_loss_func(self): + + def fwd_output_and_loss_func(batch, model): + batch = [x.cuda(non_blocking=True) for x in batch] + if len(self.conditioning_keys) == 0: + x, c = batch + else: + x = batch[0] + c = {} + for idx, key in enumerate(self.conditioning_keys): + c[key] = batch[1 + idx] + loss, loss_dict = model(x, c) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + batch_for_pipeline = self.process_global_batch(batch) + + losses_reduced_per_micro_batch = forward_backward_no_pipelining( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=True, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + ) + # only the last stages of the pipeline return losses + val_loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + val_loss_dict[key] = loss_tensor.mean() + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def process_global_batch(self, global_batch, global_batch_size=None): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + global_batch[self.cfg.first_stage_key] = \ + global_batch[self.cfg.first_stage_key].cuda(non_blocking=True) + if isinstance(global_batch[self.cfg.cond_stage_key], torch.Tensor): + # in the case of precached text embeddings, cond_stage is also a tensor + global_batch[self.cfg.cond_stage_key] = global_batch[self.cfg.cond_stage_key].cuda(non_blocking=True) + + # SD has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + x, c = self.model.get_input(global_batch, self.cfg.first_stage_key) + + if not isinstance(c, dict): + return [x, c] + + if len(self.conditioning_keys) == 0: + self.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in self.conditioning_keys] + return [x, *c_list] + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = self.cfg.global_batch_size // ( + self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) + self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Stable Diffusion...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + if self.cfg.first_stage_key.endswith("encoded"): + self._train_ds, self._validation_ds = build_train_valid_precached_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0), + ) + else: + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0) + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._global_batch_size_on_this_data_parallel_rank, + num_workers=cfg.num_workers, pin_memory=True, + ) + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + +class DiffusionWrapper(pl.LightningModule, Serialization): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py new file mode 100644 index 000000000000..4f0f495aaf1b --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py @@ -0,0 +1,1482 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +wild mixture of +https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +https://github.com/openai/improved-diffusion/blob/e94489283bb876ac1477d5dd7709bbbd2d9902ce/improved_diffusion/gaussian_diffusion.py +https://github.com/CompVis/taming-transformers +-- merci +""" + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from apex import amp +from apex.contrib.clip_grad import clip_grad_norm_ +from contextlib import contextmanager +from einops import rearrange, repeat +from functools import partial +from omegaconf import open_dict +from pytorch_lightning.utilities import GradClipAlgorithmType +from pytorch_lightning.utilities.distributed import rank_zero_only +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torch.optim.lr_scheduler import LambdaLR +from torchvision.utils import make_grid +from tqdm import tqdm + +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import VQModelInterface, IdentityFirstStage, \ + AutoencoderKL +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import random_dropout +from nemo.collections.multimodal.models.stable_diffusion.ldm_config import DDPMDiffusionModelConfig, \ + LatentDiffusionModelConfig +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ + extract_into_tensor, noise_like +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import normal_kl, \ + DiagonalGaussianDistribution +from nemo.collections.multimodal.parts.stable_diffusion.utils import log_txt_as_img, exists, default, ismap, isimage, \ + mean_flat, count_params +from nemo.core.classes.common import Serialization +from nemo.utils import logging + +__conditioning_keys__ = {'concat': 'c_concat', + 'crossattn': 'c_crossattn', + 'adm': 'y'} + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def uniform_on_device(r1, r2, shape, device): + return (r1 - r2) * torch.rand(*shape, device=device) + r2 + + +class DDPM(DiffusionModel): + # classic DDPM with Gaussian diffusion, in image space + def apply_model(self, x_t, t, c): + return None + + def get_conditioning(self, c): + return c + + def list_available_models(self): + return None + + def setup_training_data(self, cfg): + return None + + def setup_validation_data(self, cfg): + return None + + def __init__(self, cfg: DDPMDiffusionModelConfig, trainer=None): + super().__init__(cfg=cfg, trainer=trainer) + assert cfg.parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' + self.use_fp16 = (trainer is not None) and (trainer.precision == 16) + self.parameterization = cfg.parameterization + print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") + self.cond_stage_model = None + self.clip_denoised = cfg.clip_denoised + self.log_every_t = cfg.log_every_t + self.first_stage_key = cfg.first_stage_key + self.image_size = cfg.image_size # try conv? + self.channels = cfg.channels + self.use_positional_encodings = cfg.use_positional_encodings + if self.use_fp16: + with open_dict(cfg.unet_config): + cfg.unet_config.use_fp16 = True + self.model = DiffusionWrapper(cfg.unet_config, cfg.conditioning_key) + count_params(self.model, verbose=True) + self.use_ema = cfg.use_ema + if self.use_ema: + self.model_ema = LitEma(self.model) + print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") + + self.use_scheduler = cfg.scheduler_config is not None + if self.use_scheduler: + self.scheduler_config = cfg.scheduler_config + + self.v_posterior = cfg.v_posterior + self.original_elbo_weight = cfg.original_elbo_weight + self.l_simple_weight = cfg.l_simple_weight + + if cfg.monitor is not None: + self.monitor = cfg.monitor + # if cfg.ckpt_path is not None: + # self.init_from_ckpt(cfg.ckpt_path, ignore_keys=cfg.ignore_keys, only_model=cfg.load_only_unet) + + self.register_schedule(given_betas=cfg.given_betas, beta_schedule=cfg.beta_schedule, timesteps=cfg.timesteps, + linear_start=cfg.linear_start, linear_end=cfg.linear_end, cosine_s=cfg.cosine_s) + + self.loss_type = cfg.loss_type + + self.learn_logvar = cfg.learn_logvar + self.logvar = torch.full(fill_value=cfg.logvar_init, size=(self.num_timesteps,)) + if self.learn_logvar: + self.logvar = nn.Parameter(self.logvar, requires_grad=True) + self.learning_rate = cfg.learning_rate + + def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, + cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( + 1. - alphas_cumprod) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.model.parameters()) + self.model_ema.copy_to(self.model) + if context is not None: + print(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.model.parameters()) + if context is not None: + print(f"{context}: Restored training weights") + + def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): + sd = torch.load(path, map_location="cpu") + if "state_dict" in list(sd.keys()): + sd = sd["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( + sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool): + model_out = self.model(x, t) + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + if clip_denoised: + x_recon.clamp_(-1., 1.) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_loop(self, shape, return_intermediates=False): + device = self.betas.device + b = shape[0] + img = torch.randn(shape, device=device) + intermediates = [img] + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): + img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long), + clip_denoised=self.clip_denoised) + if i % self.log_every_t == 0 or i == self.num_timesteps - 1: + intermediates.append(img) + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, batch_size=16, return_intermediates=False): + image_size = self.image_size + channels = self.channels + return self.p_sample_loop((batch_size, channels, image_size, image_size), + return_intermediates=return_intermediates) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def get_loss(self, pred, target, mean=True): + if self.use_fp16: + target = target.half() + + if self.loss_type == 'l1': + loss = (target - pred).abs() + if mean: + loss = loss.mean() + elif self.loss_type == 'l2': + if mean: + loss = torch.nn.functional.mse_loss(target, pred) + else: + loss = torch.nn.functional.mse_loss(target, pred, reduction='none') + else: + raise NotImplementedError("unknown loss type '{loss_type}'") + + return loss + + def p_losses(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_out = self.model(x_noisy, t) + + loss_dict = {} + if self.parameterization == "eps": + target = noise + elif self.parameterization == "x0": + target = x_start + else: + raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") + + loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) + + log_prefix = 'train' if self.training else 'val' + + loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) + loss_simple = loss.mean() * self.l_simple_weight + + loss_vlb = (self.lvlb_weights[t] * loss).mean() + loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) + + loss = loss_simple + self.original_elbo_weight * loss_vlb + + loss_dict.update({f'{log_prefix}/loss': loss}) + + return loss, loss_dict + + def forward(self, x, *args, **kwargs): + # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size + # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' + t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() + return self.p_losses(x, t, *args, **kwargs) + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = rearrange(x, 'b h w c -> b c h w') + x = x.to(memory_format=torch.contiguous_format).float() + if self.use_fp16: + x = x.half() + return x + + def shared_step(self, batch): + x = self.get_input(batch, self.first_stage_key) + loss, loss_dict = self(x) + return loss, loss_dict + + def training_step(self, batch, batch_idx): + loss, loss_dict = self.shared_step(batch) + + self.log_dict(loss_dict, prog_bar=False, + logger=True, on_step=True, on_epoch=True) + + self.log("global_step", self.global_step, + prog_bar=True, logger=True, on_step=True, on_epoch=False) + + if self.use_scheduler: + lr = self.optimizers().param_groups[0]['lr'] + self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False) + + return loss + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + _, loss_dict_no_ema = self.shared_step(batch) + with self.ema_scope(): + _, loss_dict_ema = self.shared_step(batch) + loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema} + self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) + self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def on_train_batch_end(self, *args, **kwargs): + if self.use_ema: + self.model_ema(self.model) + + def _get_rows_from_list(self, samples): + n_imgs_per_row = len(samples) + denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): + log = dict() + x = self.get_input(batch, self.first_stage_key) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + x = x.to(self.device)[:N] + log["inputs"] = x + + # get diffusion row + diffusion_row = list() + x_start = x[:n_row] + + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(x_start) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + diffusion_row.append(x_noisy) + + log["diffusion_row"] = self._get_rows_from_list(diffusion_row) + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) + + log["samples"] = samples + log["denoise_row"] = self._get_rows_from_list(denoise_row) + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def configure_optimizers(self): + lr = self.learning_rate + params = list(self.model.parameters()) + if self.learn_logvar: + params = params + [self.logvar] + opt = torch.optim.AdamW(params, lr=lr) + return opt + + +class LatentDiffusion(DDPM): + """main class""" + + def __init__(self, cfg: LatentDiffusionModelConfig, trainer=None): + self.num_timesteps_cond = default(cfg.num_timesteps_cond, 1) + self.scale_by_std = cfg.scale_by_std + assert self.num_timesteps_cond <= cfg.timesteps + # for backwards compatibility after implementation of DiffusionWrapper + if cfg.conditioning_key is None: + conditioning_key = 'concat' if cfg.concat_mode else 'crossattn' + else: + conditioning_key = cfg.conditioning_key + if cfg.cond_stage_config == '__is_unconditional__': + conditioning_key = None + ckpt_path = cfg.ckpt_path + ignore_keys = cfg.ignore_keys + cfg.conditioning_key = conditioning_key + super().__init__(cfg=cfg, trainer=trainer) + self.concat_mode = cfg.concat_mode + self.cond_stage_trainable = cfg.cond_stage_trainable + self.cond_stage_key = cfg.cond_stage_key + try: + self.num_downs = len(cfg.first_stage_config.ddconfig.ch_mult) - 1 + except: + self.num_downs = 0 + if not cfg.scale_by_std: + self.scale_factor = cfg.scale_factor + else: + self.register_buffer('scale_factor', torch.tensor(cfg.scale_factor)) + if self.use_fp16: + with open_dict(cfg.cond_stage_config): + cfg.cond_stage_config.update({"use_fp16": True}) + else: + with open_dict(cfg.cond_stage_config): + cfg.cond_stage_config.update({"use_fp16": False}) + self.instantiate_first_stage(cfg.first_stage_config) + self.instantiate_cond_stage(cfg.cond_stage_config) + self.cond_stage_forward = cfg.cond_stage_forward + self.clip_denoised = False + self.bbox_tokenizer = None + self.text_embedding_dropout_rate = cfg.text_embedding_dropout_rate + self.fused_opt = cfg.fused_opt + + self.restarted_from_ckpt = False + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys) + self.restarted_from_ckpt = True + + # Fusing VAE and CLIP doesn't give benefit + if cfg.get("inductor", False): + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) + self.model = optimize("inductor")(self.model) + + def make_cond_schedule(self, ): + self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) + ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() + self.cond_ids[:self.num_timesteps_cond] = ids + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + # only for very first batch + if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt: + assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' + # set rescale weight to 1./std of encodings + print("### USING STD-RESCALING ###") + x = super().get_input(batch, self.first_stage_key) + x = x.to(self.device) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + del self.scale_factor + self.register_buffer('scale_factor', 1. / z.flatten().std()) + print(f"setting self.scale_factor to {self.scale_factor}") + print("### USING STD-RESCALING ###") + + def register_schedule(self, + given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) + + self.shorten_cond_schedule = self.num_timesteps_cond > 1 + if self.shorten_cond_schedule: + self.make_cond_schedule() + + def instantiate_first_stage(self, config): + model = LatentDiffusion.from_config_dict(config) + self.first_stage_model = model.eval() + self.first_stage_model.train = disabled_train + for param in self.first_stage_model.parameters(): + param.requires_grad = False + + def instantiate_cond_stage(self, config): + if not self.cond_stage_trainable: + if config == "__is_first_stage__": + print("Using first stage also as cond stage.") + self.cond_stage_model = self.first_stage_model + elif config == "__is_unconditional__": + print(f"Training {self.__class__.__name__} as an unconditional model.") + self.cond_stage_model = None + # self.be_unconditional = True + else: + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model.eval() + self.cond_stage_model.train = disabled_train + for param in self.cond_stage_model.parameters(): + param.requires_grad = False + else: + assert config != '__is_first_stage__' + assert config != '__is_unconditional__' + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model + + def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): + denoise_row = [] + for zd in tqdm(samples, desc=desc): + denoise_row.append(self.decode_first_stage(zd.to(self.device), + force_not_quantize=force_no_decoder_quantization)) + n_imgs_per_row = len(denoise_row) + denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W + denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") + return self.scale_factor * z + + def get_learned_conditioning(self, c): + if self.cond_stage_forward is None: + if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): + c = self.cond_stage_model.encode(c) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + else: + c = self.cond_stage_model(c) + else: + assert hasattr(self.cond_stage_model, self.cond_stage_forward) + c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) + return c + + def meshgrid(self, h, w): + y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) + x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) + + arr = torch.cat([y, x], dim=-1) + return arr + + def delta_border(self, h, w): + """ + :param h: height + :param w: width + :return: normalized distance to image border, + wtith min distance = 0 at border and max dist = 0.5 at image center + """ + lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) + arr = self.meshgrid(h, w) / lower_right_corner + dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] + dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] + edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] + return edge_dist + + def get_weighting(self, h, w, Ly, Lx, device): + weighting = self.delta_border(h, w) + weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"], + self.split_input_params["clip_max_weight"], ) + weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) + + if self.split_input_params["tie_braker"]: + L_weighting = self.delta_border(Ly, Lx) + L_weighting = torch.clip(L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"]) + + L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) + weighting = weighting * L_weighting + return weighting + + def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code + """ + :param x: img of size (bs, c, h, w) + :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) + """ + bs, nc, h, w = x.shape + + # number of crops in image + Ly = (h - kernel_size[0]) // stride[0] + 1 + Lx = (w - kernel_size[1]) // stride[1] + 1 + + if uf == 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) + + weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) + + elif uf > 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, padding=0, + stride=(stride[0] * uf, stride[1] * uf)) + fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) + + elif df > 1 and uf == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, padding=0, + stride=(stride[0] // df, stride[1] // df)) + fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) + + else: + raise NotImplementedError + + return fold, unfold, normalization, weighting + + @torch.no_grad() + def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, + cond_key=None, return_original_cond=False, bs=None): + x = super().get_input(batch, k) + if bs is not None: + x = x[:bs] + x = x.to(self.device) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + + if self.model.conditioning_key is not None: + if cond_key is None: + cond_key = self.cond_stage_key + if cond_key != self.first_stage_key: + if cond_key in ['captions', 'coordinates_bbox', 'txt']: + xc = batch[cond_key] + elif cond_key == 'class_label': + xc = batch + else: + xc = super().get_input(batch, cond_key).to(self.device) + else: + xc = x + if not self.cond_stage_trainable or force_c_encode: + if isinstance(xc, dict) or isinstance(xc, list): + # import pudb; pudb.set_trace() + c = self.get_learned_conditioning(xc) + else: + c = self.get_learned_conditioning(xc.to(self.device)) + else: + c = xc + if bs is not None: + c = c[:bs] + + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + ckey = __conditioning_keys__[self.model.conditioning_key] + c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} + + if self.text_embedding_dropout_rate > 0: + assert (self.text_embedding_dropout_rate < 1.) + c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) + + else: + c = None + xc = None + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + c = {'pos_x': pos_x, 'pos_y': pos_y} + out = [z, c] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + # same as above but without decorator + def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + @torch.no_grad() + def encode_first_stage(self, x): + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + df = self.split_input_params["vqf"] + self.split_input_params['original_image_size'] = x.shape[-2:] + bs, nc, h, w = x.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) + z = unfold(x) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) + o = o * weighting + + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization + return decoded + + else: + return self.first_stage_model.encode(x) + else: + return self.first_stage_model.encode(x) + + def shared_step(self, batch, **kwargs): + x, c = self.get_input(batch, self.first_stage_key) + loss = self(x, c) + return loss + + def forward(self, x, c, *args, **kwargs): + t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() + if self.model.conditioning_key is not None: + assert c is not None + if self.cond_stage_trainable: + c = self.get_learned_conditioning(c) + if self.shorten_cond_schedule: # TODO: drop this option + tc = self.cond_ids[t].to(self.device) + c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float())) + return self.p_losses(x, c, t, *args, **kwargs) + + def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset + def rescale_bbox(bbox): + x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2]) + y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3]) + w = min(bbox[2] / crop_coordinates[2], 1 - x0) + h = min(bbox[3] / crop_coordinates[3], 1 - y0) + return x0, y0, w, h + + return [rescale_bbox(b) for b in bboxes] + + def apply_model(self, x_noisy, t, cond, return_ids=False): + + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + + if hasattr(self, "split_input_params"): + assert len(cond) == 1 # todo can only deal with one conditioning atm + assert not return_ids + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + + h, w = x_noisy.shape[-2:] + + fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) + + z = unfold(x_noisy) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] + + if self.cond_stage_key in ["image", "LR_image", "segmentation", + 'bbox_img'] and self.model.conditioning_key: # todo check for completeness + c_key = next(iter(cond.keys())) # get key + c = next(iter(cond.values())) # get value + assert (len(c) == 1) # todo extend to list with more than one elem + c = c[0] # get element + + c = unfold(c) + c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] + + elif self.cond_stage_key == 'coordinates_bbox': + assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size' + + # assuming padding of unfold is always 0 and its dilation is always 1 + n_patches_per_row = int((w - ks[0]) / stride[0] + 1) + full_img_h, full_img_w = self.split_input_params['original_image_size'] + # as we are operating on latents, we need the factor from the original image size to the + # spatial latent size to properly rescale the crops for regenerating the bbox annotations + num_downs = self.first_stage_model.encoder.num_resolutions - 1 + rescale_latent = 2 ** (num_downs) + + # get top left postions of patches as conforming for the bbbox tokenizer, therefore we + # need to rescale the tl patch coordinates to be in between (0,1) + tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h) + for patch_nr in range(z.shape[-1])] + + # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) + patch_limits = [(x_tl, y_tl, + rescale_latent * ks[0] / full_img_w, + rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates] + # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] + + # tokenize crop coordinates for the bounding boxes of the respective patches + patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device) + for bbox in patch_limits] # list of length l with tensors of shape (1, 2) + print(patch_limits_tknzd[0].shape) + # cut tknzd crop position from conditioning + assert isinstance(cond, dict), 'cond must be dict to be fed into model' + cut_cond = cond['c_crossattn'][0][..., :-2].to(self.device) + print(cut_cond.shape) + + adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) + adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') + print(adapted_cond.shape) + adapted_cond = self.get_learned_conditioning(adapted_cond) + print(adapted_cond.shape) + adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) + print(adapted_cond.shape) + + cond_list = [{'c_crossattn': [e]} for e in adapted_cond] + + else: + cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient + + # apply model by loop over crops + output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] + assert not isinstance(output_list[0], + tuple) # todo cant deal with multiple model outputs check this never happens + + o = torch.stack(output_list, axis=-1) + o = o * weighting + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + x_recon = fold(o) / normalization + + else: + x_recon = self.model(x_noisy, t, **cond) + + if isinstance(x_recon, tuple) and not return_ids: + return x_recon[0] + else: + return x_recon + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \ + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + This term can't be optimized, as it only depends on the encoder. + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def p_losses(self, x_start, cond, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_output = self.apply_model(x_noisy, t, cond) + + loss_dict = {} + prefix = 'train' if self.training else 'val' + + if self.parameterization == "x0": + target = x_start + elif self.parameterization == "eps": + target = noise + else: + raise NotImplementedError() + + loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) + loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) + self.logvar = self.logvar.to(self.device) + logvar_t = self.logvar[t].to(self.device) + loss = loss_simple / torch.exp(logvar_t) + logvar_t + # loss = loss_simple / torch.exp(self.logvar) + self.logvar + if self.learn_logvar: + loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) + loss_dict.update({'logvar': self.logvar.data.mean()}) + + loss = self.l_simple_weight * loss.mean() + + loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) + loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() + loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) + loss += (self.original_elbo_weight * loss_vlb) + loss_dict.update({f'{prefix}/loss': loss}) + + return loss, loss_dict + + def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False, + return_x0=False, score_corrector=None, corrector_kwargs=None): + t_in = t + model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) + + if score_corrector is not None: + assert self.parameterization == "eps" + model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) + + if return_codebook_ids: + model_out, logits = model_out + + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + else: + raise NotImplementedError() + + if clip_denoised: + x_recon.clamp_(-1., 1.) + if quantize_denoised: + x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + if return_codebook_ids: + return model_mean, posterior_variance, posterior_log_variance, logits + elif return_x0: + return model_mean, posterior_variance, posterior_log_variance, x_recon + else: + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, + return_codebook_ids=False, quantize_denoised=False, return_x0=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None): + b, *_, device = *x.shape, x.device + outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if return_codebook_ids: + raise DeprecationWarning("Support dropped.") + model_mean, _, model_log_variance, logits = outputs + elif return_x0: + model_mean, _, model_log_variance, x0 = outputs + else: + model_mean, _, model_log_variance = outputs + + noise = noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + + if return_codebook_ids: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) + if return_x0: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 + else: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False, + img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0., + score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None, + log_every_t=None): + if not log_every_t: + log_every_t = self.log_every_t + timesteps = self.num_timesteps + if batch_size is not None: + b = batch_size if batch_size is not None else shape[0] + shape = [batch_size] + list(shape) + else: + b = batch_size = shape[0] + if x_T is None: + img = torch.randn(shape, device=self.device) + else: + img = x_T + intermediates = [] + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', + total=timesteps) if verbose else reversed( + range(0, timesteps)) + if type(temperature) == float: + temperature = [temperature] * timesteps + + for i in iterator: + ts = torch.full((b,), i, device=self.device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img, x0_partial = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, return_x0=True, + temperature=temperature[i], noise_dropout=noise_dropout, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if mask is not None: + assert x0 is not None + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(x0_partial) + if callback: callback(i) + if img_callback: img_callback(img, i) + return img, intermediates + + @torch.no_grad() + def p_sample_loop(self, cond, shape, return_intermediates=False, + x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False, + mask=None, x0=None, img_callback=None, start_T=None, + log_every_t=None): + + if not log_every_t: + log_every_t = self.log_every_t + device = self.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, device=device) + else: + img = x_T + + intermediates = [img] + if timesteps is None: + timesteps = self.num_timesteps + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed( + range(0, timesteps)) + + if mask is not None: + assert x0 is not None + assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match + + for i in iterator: + ts = torch.full((b,), i, device=device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised) + if mask is not None: + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(img) + if callback: callback(i) + if img_callback: img_callback(img, i) + + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None, + verbose=True, timesteps=None, quantize_denoised=False, + mask=None, x0=None, shape=None, **kwargs): + if shape is None: + shape = (batch_size, self.channels, self.image_size, self.image_size) + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + return self.p_sample_loop(cond, + shape, + return_intermediates=return_intermediates, x_T=x_T, + verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised, + mask=mask, x0=x0) + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + + if ddim: + ddim_sampler = DDIMSampler(self) + shape = (self.channels, self.image_size, self.image_size) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, + shape, cond, verbose=False, **kwargs) + + else: + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, + return_intermediates=True, **kwargs) + + return samples, intermediates + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None, + quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, + plot_diffusion_rows=True, **kwargs): + + use_ddim = ddim_steps is not None + + log = dict() + z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + log["inputs"] = x + log["reconstruction"] = xrec + if self.model.conditioning_key is not None: + if hasattr(self.cond_stage_model, "decode"): + xc = self.cond_stage_model.decode(c) + log["conditioning"] = xc + elif self.cond_stage_key in ["caption"]: + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) + log["conditioning"] = xc + elif self.cond_stage_key == 'class_label': + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) + log['conditioning'] = xc + elif isimage(xc): + log["conditioning"] = xc + if ismap(xc): + log["original_conditioning"] = self.to_rgb(xc) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(z_start) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance( + self.first_stage_model, IdentityFirstStage): + # also display when quantizing x0 while sampling + with self.ema_scope("Plotting Quantized Denoised"): + samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta, + quantize_denoised=True) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, + # quantize_denoised=True) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_x0_quantized"] = x_samples + + if inpaint: + # make a simple center square + b, h, w = z.shape[0], z.shape[2], z.shape[3] + mask = torch.ones(N, h, w).to(self.device) + # zeros will be filled in + mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0. + mask = mask[:, None, ...] + with self.ema_scope("Plotting Inpaint"): + samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_inpainting"] = x_samples + log["mask"] = mask + + # outpaint + with self.ema_scope("Plotting Outpaint"): + samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_outpainting"] = x_samples + + if plot_progressive_rows: + with self.ema_scope("Plotting Progressives"): + img, progressives = self.progressive_denoising(c, + shape=(self.channels, self.image_size, self.image_size), + batch_size=N) + prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") + log["progressive_row"] = prog_row + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def configure_optimizers(self): + lr = self.learning_rate + params = list(self.model.parameters()) + if self.cond_stage_trainable: + print(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.cond_stage_model.parameters()) + if self.learn_logvar: + print('Diffusion model optimizing logvar') + params.append(self.logvar) + fused_opt = self.fused_opt + try: + from apex.optimizers import FusedAdam + except: # noqa + fused_opt = False + if fused_opt: + opt = FusedAdam(params, lr=lr, adam_w_mode=True) + print("Using fused AdamW") + else: + opt = torch.optim.AdamW(params, lr=lr) + if self.use_scheduler: + scheduler = LatentDiffusion.from_config_dict(self.scheduler_config) + + print("Setting up LambdaLR scheduler...") + scheduler = [ + { + 'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }] + return [opt], scheduler + return opt + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + if gradient_clip_val <= 0: + return + + if gradient_clip_algorithm is None: + gradient_clip_algorithm = self.trainer.gradient_clip_algorithm or "norm" + + gradient_clip_algorithm = GradClipAlgorithmType(gradient_clip_algorithm) + + if not hasattr(self, "grad_clip_logged"): + logging.info("Gradient clipping: val, %f; algo, %s", + gradient_clip_val, gradient_clip_algorithm) + self.grad_clip_logged = True + + parameters = amp.master_params(optimizer) + if gradient_clip_algorithm == GradClipAlgorithmType.VALUE: + torch.nn.utils.clip_grad_value_(parameters, clip_value=gradient_clip_val) + elif gradient_clip_algorithm == GradClipAlgorithmType.NORM: + clip_grad_norm_(parameters, gradient_clip_val) + + @torch.no_grad() + def to_rgb(self, x): + x = x.float() + if not hasattr(self, "colorize"): + self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x) + x = nn.functional.conv2d(x, weight=self.colorize) + x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + return x + + +class DiffusionWrapper(pl.LightningModule, Serialization): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py new file mode 100644 index 000000000000..2b461915dbdd --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py @@ -0,0 +1,141 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field +from typing import Any, List, Optional + +from nemo.core.config import modelPT as model_cfg + + +@dataclass +class LDMUnetConfig: + cls: Optional[str] = 'nemo.collections.multimodal.modules.diffusionmodules.openaimodel.UNetModel' + image_size: Optional[int] = 32 # unused + in_channels: Optional[int] = 4 + out_channels: Optional[int] = 4 + model_channels: Optional[int] = 320 + attention_resolutions: Optional[List[int]] = field(default_factory=lambda: [4, 2, 1]) + num_res_blocks: Optional[int] = 2 + channel_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) + num_heads: Optional[int] = 8 + use_spatial_transformer: Optional[bool] = True + transformer_depth: Optional[int] = 1 + context_dim: Optional[int] = 768 + use_checkpoint: Optional[bool] = True + legacy: Optional[bool] = False + use_flash_attention: Optional[bool] = False + + +@dataclass +class SchedulerConfig: + cls: Optional[str] = 'nemo.collections.multimodal.parts.lr_scheduler.LambdaLinearScheduler' + warm_up_steps: Optional[List[int]] = field(default_factory=lambda: [10000]) + cycle_lengths: Optional[List[int]] = field( + default_factory=lambda: [10000000000000]) # incredibly large number to prevent corner cases + f_start: Optional[List[float]] = field(default_factory=lambda: [1.e-6]) + f_max: Optional[List[float]] = field(default_factory=lambda: [1.]) + f_min: Optional[List[float]] = field(default_factory=lambda: [1.]) + + +@dataclass +class CLIPEmbedderConfig: + cls: Optional[str] = 'nemo.collections.multimodal.modules.encoders.modules.FrozenCLIPEmbedder' + version: Optional[str] = 'openai/clip-vit-large-patch14' + device: Optional[str] = 'cuda' + max_length: Optional[int] = 77 + + +@dataclass +class LDMEncoderConfig: + double_z: Optional[bool] = True + z_channels: Optional[int] = 4 + resolution: Optional[int] = 256 + in_channels: Optional[int] = 3 + out_ch: Optional[int] = 3 + ch: Optional[int] = 128 + ch_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) + num_res_blocks: Optional[int] = 2 + attn_resolutions: Optional[List[int]] = field(default_factory=lambda: []) + dropout: Optional[float] = 0. + + +@dataclass +class LDMFirstStageConfig: # Autoencoder + cls: Optional[str] = 'nemo.collections.multimodal.models.ldm.autoencoder.AutoencoderKL' + embed_dim: Optional[int] = 4 + monitor: Optional[str] = 'val/rec_loss' + ddconfig: Optional[LDMEncoderConfig] = LDMEncoderConfig() + + +@dataclass +class DDPMDiffusionModelConfig(model_cfg.ModelConfig): + unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() + timesteps: Optional[int] = 1000 + beta_schedule: Optional[str] = 'linear' + loss_type: Optional[str] = 'l2' + ckpt_path: Optional[str] = None + ignore_keys: Optional[List[str]] = field(default_factory=list) + load_only_unet: Optional[bool] = False + monitor: Optional[str] = 'val/loss' + use_ema: Optional[bool] = True + first_stage_key: Optional[str] = 'image' + image_size: Optional[int] = 256 + channels: Optional[int] = 3 + log_every_t: Optional[int] = 100 + clip_denoised: Optional[bool] = True + linear_start: Optional[float] = 1e-4 + linear_end: Optional[float] = 2e-2 + cosine_s: Optional[float] = 8e-3 + given_betas: Optional[float] = None + original_elbo_weight: Optional[float] = 0. + v_posterior: Optional[float] = 0. # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta + l_simple_weight: Optional[float] = 1. + conditioning_key: Optional[str] = None + parameterization: Optional[str] = 'eps' # all assuming fixed variance schedules + scheduler_config: Optional[Any] = None + use_positional_encodings: Optional[bool] = False + learn_logvar: Optional[bool] = False + logvar_init: Optional[float] = 0. + learning_rate: Optional[float] = 1.0e-04 + + +@dataclass +class LatentDiffusionModelConfig(DDPMDiffusionModelConfig): + # Overrite Default values + linear_start: Optional[float] = 0.00085 + linear_end: Optional[float] = 0.0120 + num_timesteps_cond: Optional[int] = 1 + log_every_t: Optional[int] = 200 + timesteps: Optional[int] = 1000 + first_stage_key: Optional[str] = 'jpg' + cond_stage_key: Optional[str] = 'txt' + image_size: Optional[int] = 64 + channels: Optional[int] = 4 + cond_stage_trainable: Optional[bool] = False + conditioning_key: Optional[str] = 'crossattn' + monitor: Optional[str] = 'val/loss_simple_ema' + scale_factor: Optional[float] = 0.18215 + use_ema: Optional[bool] = False # TODO + unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() + first_stage_config: Optional[LDMFirstStageConfig] = LDMFirstStageConfig() + scheduler_config: Optional[SchedulerConfig] = SchedulerConfig() + # New attributes in additon to DDPMDiffusionModel + concat_mode: Optional[bool] = True + trainable: Optional[bool] = False + cond_stage_config: Optional[CLIPEmbedderConfig] = CLIPEmbedderConfig() + cond_stage_forward: Optional[Any] = None + scale_by_std: Optional[bool] = False + text_embedding_dropout_rate: Optional[float] = 0 + fused_opt: Optional[bool] = False + inductor: Optional[bool] = False + inductor_cudagraphs: Optional[bool] = False diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py new file mode 100644 index 000000000000..e36354bb3b54 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum + +Sampler = Enum('Sampler', ['PLMS', 'DDIM']) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py new file mode 100644 index 000000000000..edae8b8b17a8 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -0,0 +1,226 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +from abc import ABC, abstractmethod +from tqdm import tqdm + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_ddim_sampling_parameters, \ + make_ddim_timesteps, noise_like + + +class AbstractBaseSampler(ABC): + + def __init__(self, model, sampler, schedule="linear", **kwargs): + super().__init__() + self.model = model + self.ddpm_num_timesteps = model.num_timesteps + self.schedule = schedule + assert isinstance(sampler, Sampler), "Sampler should be of ENUM type Sampler" + self.sampler = sampler + + def register_buffer(self, name, attr): + if type(attr) == torch.Tensor: + if attr.device != torch.device("cuda"): + attr = attr.to(torch.device("cuda")) + setattr(self, name, attr) + + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): + self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps, + num_ddpm_timesteps=self.ddpm_num_timesteps, verbose=verbose) + alphas_cumprod = self.model.alphas_cumprod + assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep' + to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) + self.register_buffer('betas', to_torch(self.model.betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev)) + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu()))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu()))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu()))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1))) + # ddim sampling parameters + ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(), + ddim_timesteps=self.ddim_timesteps, + eta=ddim_eta, verbose=verbose) + self.register_buffer('ddim_sigmas', ddim_sigmas) + self.register_buffer('ddim_alphas', ddim_alphas) + self.register_buffer('ddim_alphas_prev', ddim_alphas_prev) + self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas)) + sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( + (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * ( + 1 - self.alphas_cumprod / self.alphas_cumprod_prev)) + self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps) + + @abstractmethod + def p_sampling_fn(self): + pass + + @torch.no_grad() + def sample(self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0., + mask=None, + x0=None, + temperature=1., + noise_dropout=0., + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1., + unconditional_conditioning=None, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs + ): + if conditioning is not None: + if isinstance(conditioning, dict): + cbs = conditioning[list(conditioning.keys())[0]][0].shape[0] + if cbs != batch_size: + print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") + else: + if conditioning.shape[0] != batch_size: + print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") + self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) + # sampling + C, H, W = shape + size = (batch_size, C, H, W) + print(f'Data shape for sampling is {size}, eta {eta}') + samples, intermediates = self.sampling_fn(conditioning, size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return samples, intermediates + + @torch.no_grad() + def sampling_fn(self, cond, shape, + x_T=None, ddim_use_original_steps=False, + callback=None, timesteps=None, quantize_denoised=False, + mask=None, x0=None, img_callback=None, log_every_t=100, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, + unconditional_guidance_scale=1., unconditional_conditioning=None, ): + device = self.model.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.model.rng, device=device) + else: + img = x_T + if timesteps is None: + timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps + elif timesteps is not None and not ddim_use_original_steps: + subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 + timesteps = self.ddim_timesteps[:subset_end] + intermediates = {'x_inter': [img], 'pred_x0': [img]} + # TODO: Is this needed + if self.sampler is Sampler.PLMS: + time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) + else: + time_range = reversed(range(0, timesteps)) if ddim_use_original_steps else np.flip(timesteps) + total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] + print(f"Running {self.sampler.name} Sampling with {total_steps} timesteps") + iterator = tqdm(time_range, desc=f'{self.sampler.name} Sampler', total=total_steps) + old_eps = [] + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full((b,), step, device=device, dtype=torch.long) + if self.sampler is Sampler.PLMS: + ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long) + else: + old_eps = None + ts_next = None + if mask is not None: + assert x0 is not None + img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? + img = img_orig * mask + (1. - mask) * img + outs = self.p_sampling_fn(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps, + quantize_denoised=quantize_denoised, temperature=temperature, + noise_dropout=noise_dropout, score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + old_eps=old_eps, t_next=ts_next) + img, pred_x0 = outs[0], outs[1] + if self.sampler is Sampler.PLMS: + e_t = outs[2] + old_eps.append(e_t) + if len(old_eps) >= 4: + old_eps.pop(0) + if callback: callback(i) + if img_callback: img_callback(pred_x0, i) + if index % log_every_t == 0 or index == total_steps - 1: + intermediates['x_inter'].append(img) + intermediates['pred_x0'].append(pred_x0) + return img, intermediates + + def _get_model_output(self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, + corrector_kwargs): + if unconditional_conditioning is None or unconditional_guidance_scale == 1.: + e_t = self.model.apply_model(x, t, c) + elif isinstance(c, dict): + raise NotImplementedError + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t] * 2) + c_in = torch.cat([unconditional_conditioning, c]) + e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) + e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) + if score_corrector is not None: + assert self.model.parameterization == "eps" + e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) + return e_t + + def _get_x_prev_and_pred_x0(self, use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, + temperature, noise_dropout): + alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas + alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev + sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas + + # select parameters corresponding to the currently considered timestep + a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) + a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) + sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) + sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) + # current prediction for x_0 + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + if quantize_denoised: + pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) + # direction pointing to x_t + dir_xt = (1. - a_prev - sigma_t ** 2).sqrt() * e_t + noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise + return x_prev, pred_x0 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py new file mode 100644 index 000000000000..37179d0a46ed --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py @@ -0,0 +1,77 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import numpy as np +import torch +from tqdm import tqdm + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import extract_into_tensor +from nemo.collections.multimodal.parts.utils import randn_like + + +class DDIMSampler(AbstractBaseSampler): + + def __init__(self, model, schedule="linear", **kwargs): + super().__init__(model, sampler=Sampler.DDIM, schedule="linear", **kwargs) + + @torch.no_grad() + def p_sampling_fn(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, + unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None): + b, *_, device = *x.shape, x.device + e_t = self._get_model_output(x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, + corrector_kwargs) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t, quantize_denoised, + repeat_noise, temperature, noise_dropout) + return x_prev, pred_x0 + + @torch.no_grad() + def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): + # fast, but does not allow for exact reconstruction + # t serves as an index to gather the correct alphas + if use_original_steps: + sqrt_alphas_cumprod = self.sqrt_alphas_cumprod + sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod + else: + sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas) + sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas + + if noise is None: + noise = randn_like(x0, generator=self.model.rng) + return (extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 + + extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise) + + @torch.no_grad() + def decode(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None, + use_original_steps=False): + + timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps + timesteps = timesteps[:t_start] + + time_range = np.flip(timesteps) + total_steps = timesteps.shape[0] + print(f"Running DDIM Sampling with {total_steps} timesteps") + + iterator = tqdm(time_range, desc='Decoding image', total=total_steps) + x_dec = x_latent + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long) + x_dec, _ = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning) + return x_dec diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py new file mode 100644 index 000000000000..e28fc98880d8 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py @@ -0,0 +1,725 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import torch +import torchsde +from scipy import integrate +from torch import nn +from torchdiffeq import odeint +from tqdm.auto import trange, tqdm + + +def append_zero(x): + return torch.cat([x, x.new_zeros([1])]) + + +def append_dims(x, target_dims): + """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" + dims_to_append = target_dims - x.ndim + if dims_to_append < 0: + raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') + return x[(...,) + (None,) * dims_to_append] + + +def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'): + """Constructs the noise schedule of Karras et al. (2022).""" + ramp = torch.linspace(0, 1, n) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return append_zero(sigmas).to(device) + + +def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'): + """Constructs an exponential noise schedule.""" + sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), n, device=device).exp() + return append_zero(sigmas) + + +def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1., device='cpu'): + """Constructs an polynomial in log sigma noise schedule.""" + ramp = torch.linspace(1, 0, n, device=device) ** rho + sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min)) + return append_zero(sigmas) + + +def get_sigmas_vp(n, beta_d=19.9, beta_min=0.1, eps_s=1e-3, device='cpu'): + """Constructs a continuous VP noise schedule.""" + t = torch.linspace(1, eps_s, n, device=device) + sigmas = torch.sqrt(torch.exp(beta_d * t ** 2 / 2 + beta_min * t) - 1) + return append_zero(sigmas) + + +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / append_dims(sigma, x.ndim) + + +def get_ancestral_step(sigma_from, sigma_to, eta=1.): + """Calculates the noise level (sigma_down) to step down to and the amount + of noise to add (sigma_up) when doing an ancestral sampling step.""" + if not eta: + return sigma_to, 0. + sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5) + sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 + return sigma_down, sigma_up + + +def default_noise_sampler(x): + return lambda sigma, sigma_next: torch.randn_like(x) + + +class BatchedBrownianTree: + """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" + + def __init__(self, x, t0, t1, seed=None, **kwargs): + t0, t1, self.sign = self.sort(t0, t1) + w0 = kwargs.get('w0', torch.zeros_like(x)) + if seed is None: + seed = torch.randint(0, 2 ** 63 - 1, []).item() + self.batched = True + try: + assert len(seed) == x.shape[0] + w0 = w0[0] + except TypeError: + seed = [seed] + self.batched = False + self.trees = [torchsde.BrownianTree(t0, w0, t1, entropy=s, **kwargs) for s in seed] + + @staticmethod + def sort(a, b): + return (a, b, 1) if a < b else (b, a, -1) + + def __call__(self, t0, t1): + t0, t1, sign = self.sort(t0, t1) + w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign) + return w if self.batched else w[0] + + +class BrownianTreeNoiseSampler: + """A noise sampler backed by a torchsde.BrownianTree. + + Args: + x (Tensor): The tensor whose shape, device and dtype to use to generate + random samples. + sigma_min (float): The low end of the valid interval. + sigma_max (float): The high end of the valid interval. + seed (int or List[int]): The random seed. If a list of seeds is + supplied instead of a single integer, then the noise sampler will + use one BrownianTree per batch item, each with its own seed. + transform (callable): A function that maps sigma to the sampler's + internal timestep. + """ + + def __init__(self, x, sigma_min, sigma_max, seed=None, transform=lambda x: x): + self.transform = transform + t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max)) + self.tree = BatchedBrownianTree(x, t0, t1, seed) + + def __call__(self, sigma, sigma_next): + t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next)) + return self.tree(t0, t1) / (t1 - t0).abs().sqrt() + + +@torch.no_grad() +def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., + s_tmax=float('inf'), s_noise=1.): + """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + # Euler method + x = x + d * dt + return x + + +@torch.no_grad() +def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., + noise_sampler=None): + """Ancestral sampling with Euler method steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., + s_tmax=float('inf'), s_noise=1.): + """Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + if sigmas[i + 1] == 0: + # Euler method + x = x + d * dt + else: + # Heun's method + x_2 = x + d * dt + denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) + d_2 = to_d(x_2, sigmas[i + 1], denoised_2) + d_prime = (d + d_2) / 2 + x = x + d_prime * dt + return x + + +@torch.no_grad() +def sample_dpm_2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., + s_tmax=float('inf'), s_noise=1.): + """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + dt = sigmas[i + 1] - sigma_hat + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigma_hat.log().lerp(sigmas[i + 1].log(), 0.5).exp() + dt_1 = sigma_mid - sigma_hat + dt_2 = sigmas[i + 1] - sigma_hat + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + return x + + +@torch.no_grad() +def sample_dpm_2_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., + noise_sampler=None): + """Ancestral sampling with DPM-Solver second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + if sigma_down == 0: + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigmas[i].log().lerp(sigma_down.log(), 0.5).exp() + dt_1 = sigma_mid - sigmas[i] + dt_2 = sigma_down - sigmas[i] + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +def linear_multistep_coeff(order, t, i, j): + if order - 1 > i: + raise ValueError(f'Order {order} too high for step {i}') + + def fn(tau): + prod = 1. + for k in range(order): + if j == k: + continue + prod *= (tau - t[i - k]) / (t[i - j] - t[i - k]) + return prod + + return integrate.quad(fn, t[i], t[i + 1], epsrel=1e-4)[0] + + +@torch.no_grad() +def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, order=4): + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigmas_cpu = sigmas.detach().cpu().numpy() + ds = [] + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + d = to_d(x, sigmas[i], denoised) + ds.append(d) + if len(ds) > order: + ds.pop(0) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + cur_order = min(i + 1, order) + coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)] + x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds))) + return x + + +@torch.no_grad() +def log_likelihood(model, x, sigma_min, sigma_max, extra_args=None, atol=1e-4, rtol=1e-4): + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + v = torch.randint_like(x, 2) * 2 - 1 + fevals = 0 + + def ode_fn(sigma, x): + nonlocal fevals + with torch.enable_grad(): + x = x[0].detach().requires_grad_() + denoised = model(x, sigma * s_in, **extra_args) + d = to_d(x, sigma, denoised) + fevals += 1 + grad = torch.autograd.grad((d * v).sum(), x)[0] + d_ll = (v * grad).flatten(1).sum(1) + return d.detach(), d_ll + + x_min = x, x.new_zeros([x.shape[0]]) + t = x.new_tensor([sigma_min, sigma_max]) + sol = odeint(ode_fn, x_min, t, atol=atol, rtol=rtol, method='dopri5') + latent, delta_ll = sol[0][-1], sol[1][-1] + ll_prior = torch.distributions.Normal(0, sigma_max).log_prob(latent).flatten(1).sum(1) + return ll_prior + delta_ll, {'fevals': fevals} + + +class PIDStepSizeController: + """A PID controller for ODE adaptive step size control.""" + + def __init__(self, h, pcoeff, icoeff, dcoeff, order=1, accept_safety=0.81, eps=1e-8): + self.h = h + self.b1 = (pcoeff + icoeff + dcoeff) / order + self.b2 = -(pcoeff + 2 * dcoeff) / order + self.b3 = dcoeff / order + self.accept_safety = accept_safety + self.eps = eps + self.errs = [] + + def limiter(self, x): + return 1 + math.atan(x - 1) + + def propose_step(self, error): + inv_error = 1 / (float(error) + self.eps) + if not self.errs: + self.errs = [inv_error, inv_error, inv_error] + self.errs[0] = inv_error + factor = self.errs[0] ** self.b1 * self.errs[1] ** self.b2 * self.errs[2] ** self.b3 + factor = self.limiter(factor) + accept = factor >= self.accept_safety + if accept: + self.errs[2] = self.errs[1] + self.errs[1] = self.errs[0] + self.h *= factor + return accept + + +class DPMSolver(nn.Module): + """DPM-Solver. See https://arxiv.org/abs/2206.00927.""" + + def __init__(self, model, extra_args=None, eps_callback=None, info_callback=None): + super().__init__() + self.model = model + self.extra_args = {} if extra_args is None else extra_args + self.eps_callback = eps_callback + self.info_callback = info_callback + + def t(self, sigma): + return -sigma.log() + + def sigma(self, t): + return t.neg().exp() + + def eps(self, eps_cache, key, x, t, *args, **kwargs): + if key in eps_cache: + return eps_cache[key], eps_cache + sigma = self.sigma(t) * x.new_ones([x.shape[0]]) + eps = (x - self.model(x, sigma, *args, **self.extra_args, **kwargs)) / self.sigma(t) + if self.eps_callback is not None: + self.eps_callback() + return eps, {key: eps, **eps_cache} + + def dpm_solver_1_step(self, x, t, t_next, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + x_1 = x - self.sigma(t_next) * h.expm1() * eps + return x_1, eps_cache + + def dpm_solver_2_step(self, x, t, t_next, r1=1 / 2, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + x_2 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / (2 * r1) * h.expm1() * (eps_r1 - eps) + return x_2, eps_cache + + def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + s2 = t + r2 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + u2 = x - self.sigma(s2) * (r2 * h).expm1() * eps - self.sigma(s2) * (r2 / r1) * ( + (r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) + eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2) + x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps) + return x_3, eps_cache + + def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_sampler=None): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if not t_end > t_start and eta: + raise ValueError('eta must be 0 for reverse sampling') + + m = math.floor(nfe / 3) + 1 + ts = torch.linspace(t_start, t_end, m + 1, device=x.device) + + if nfe % 3 == 0: + orders = [3] * (m - 2) + [2, 1] + else: + orders = [3] * (m - 1) + [nfe % 3] + + for i in range(len(orders)): + eps_cache = {} + t, t_next = ts[i], ts[i + 1] + if eta: + sd, su = get_ancestral_step(self.sigma(t), self.sigma(t_next), eta) + t_next_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5 + else: + t_next_, su = t_next, 0. + + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + denoised = x - self.sigma(t) * eps + if self.info_callback is not None: + self.info_callback({'x': x, 'i': i, 't': ts[i], 't_up': t, 'denoised': denoised}) + + if orders[i] == 1: + x, eps_cache = self.dpm_solver_1_step(x, t, t_next_, eps_cache=eps_cache) + elif orders[i] == 2: + x, eps_cache = self.dpm_solver_2_step(x, t, t_next_, eps_cache=eps_cache) + else: + x, eps_cache = self.dpm_solver_3_step(x, t, t_next_, eps_cache=eps_cache) + + x = x + su * s_noise * noise_sampler(self.sigma(t), self.sigma(t_next)) + + return x + + def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., + dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if order not in {2, 3}: + raise ValueError('order should be 2 or 3') + forward = t_end > t_start + if not forward and eta: + raise ValueError('eta must be 0 for reverse sampling') + h_init = abs(h_init) * (1 if forward else -1) + atol = torch.tensor(atol) + rtol = torch.tensor(rtol) + s = t_start + x_prev = x + accept = True + pid = PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) + info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} + + while s < t_end - 1e-5 if forward else s > t_end + 1e-5: + eps_cache = {} + t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) + if eta: + sd, su = get_ancestral_step(self.sigma(s), self.sigma(t), eta) + t_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 + else: + t_, su = t, 0. + + eps, eps_cache = self.eps(eps_cache, 'eps', x, s) + denoised = x - self.sigma(s) * eps + + if order == 2: + x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) + else: + x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) + delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) + error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 + accept = pid.propose_step(error) + if accept: + x_prev = x_low + x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) + s = t + info['n_accept'] += 1 + else: + info['n_reject'] += 1 + info['nfe'] += order + info['steps'] += 1 + + if self.info_callback is not None: + self.info_callback( + {'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, + 'h': pid.h, **info}) + + return x, info + + +@torch.no_grad() +def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., + noise_sampler=None): + """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(total=n, disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback( + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) + return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), n, eta, s_noise, noise_sampler) + + +@torch.no_grad() +def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, + rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, + eta=0., s_noise=1., noise_sampler=None, return_info=False): + """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback( + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) + x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), order, rtol, atol, h_init, + pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler) + if return_info: + return x, info + return x + + +@torch.no_grad() +def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., + noise_sampler=None): + """Ancestral sampling with DPM-Solver++(2S) second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigma_down == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++(2S) + t, t_next = t_fn(sigmas[i]), t_fn(sigma_down) + r = 1 / 2 + h = t_next - t + s = t + r * h + x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_2 + # Noise addition + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., + noise_sampler=None, r=1 / 2): + """DPM-Solver++ (stochastic).""" + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigmas[i + 1] - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++ + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + s = t + h * r + fac = 1 / (2 * r) + + # Step 1 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta) + s_ = t_fn(sd) + x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised + x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + + # Step 2 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta) + t_next_ = t_fn(sd) + denoised_d = (1 - fac) * denoised + fac * denoised_2 + x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d + x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su + return x + + +@torch.no_grad() +def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): + """DPM-Solver++(2M).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + old_denoised = None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + if old_denoised is None or sigmas[i + 1] == 0: + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised + else: + h_last = t - t_fn(sigmas[i - 1]) + r = h_last / h + denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d + old_denoised = denoised + return x + + +class DiscreteSchedule(nn.Module): + """A mapping between continuous noise levels (sigmas) and a list of discrete noise + levels.""" + + def __init__(self, sigmas, quantize): + super().__init__() + self.register_buffer('sigmas', sigmas) + self.register_buffer('log_sigmas', sigmas.log()) + self.quantize = quantize + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def get_sigmas(self, n=None): + if n is None: + return append_zero(self.sigmas.flip(0)) + t_max = len(self.sigmas) - 1 + t = torch.linspace(t_max, 0, n, device=self.sigmas.device) + return append_zero(self.t_to_sigma(t)) + + def sigma_to_t(self, sigma, quantize=None): + quantize = self.quantize if quantize is None else quantize + log_sigma = sigma.log() + dists = log_sigma - self.log_sigmas[:, None] + if quantize: + return dists.abs().argmin(dim=0).view(sigma.shape) + low_idx = dists.ge(0).cumsum(dim=0).argmax(dim=0).clamp(max=self.log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + low, high = self.log_sigmas[low_idx], self.log_sigmas[high_idx] + w = (low - log_sigma) / (low - high) + w = w.clamp(0, 1) + t = (1 - w) * low_idx + w * high_idx + return t.view(sigma.shape) + + def t_to_sigma(self, t): + t = t.float() + low_idx, high_idx, w = t.floor().long(), t.ceil().long(), t.frac() + log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] + return log_sigma.exp() + + +class DiscreteEpsDDPMDenoiser(DiscreteSchedule): + """A wrapper for discrete schedule DDPM models that output eps (the predicted + noise).""" + + def __init__(self, model, quantize=False): + alphas_cumprod = model.alphas_cumprod + super().__init__(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, quantize) + self.inner_model = model + self.sigma_data = 1. + + def get_scalings(self, sigma): + c_out = -sigma + c_in = 1 / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 + return c_out, c_in + + def get_eps(self, *args, **kwargs): + return self.inner_model.apply_model(*args, **kwargs) + + def loss(self, input, noise, sigma, **kwargs): + c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] + noised_input = input + noise * append_dims(sigma, input.ndim) + eps = self.get_eps(noised_input * c_in, self.sigma_to_t(sigma), **kwargs) + return (eps - noise).pow(2).flatten(1).mean(1) + + def forward(self, input, sigma, **kwargs): + c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] + eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs) + return input + eps * c_out diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py new file mode 100644 index 000000000000..c5c6ab49e0e3 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py @@ -0,0 +1,58 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import torch + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler + + +class PLMSSampler(AbstractBaseSampler): + def __init__(self, model, schedule="linear", **kwargs): + super().__init__(model, sampler=Sampler.PLMS, schedule="linear", **kwargs) + + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=False): + if ddim_eta != 0: + raise ValueError('ddim_eta must be 0 for PLMS') + super().make_schedule(ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=False) + + @torch.no_grad() + def p_sampling_fn(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, + unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None): + b, *_, device = *x.shape, x.device + e_t = self._get_model_output(x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, + corrector_kwargs) + if len(old_eps) == 0: + # Pseudo Improved Euler (2nd order) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t, + quantize_denoised, repeat_noise, temperature, noise_dropout) + e_t_next = self._get_model_output(x_prev, t_next, unconditional_conditioning, unconditional_guidance_scale, + score_corrector, c, corrector_kwargs) + e_t_prime = (e_t + e_t_next) / 2 + elif len(old_eps) == 1: + # 2nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (3 * e_t - old_eps[-1]) / 2 + elif len(old_eps) == 2: + # 3nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 + elif len(old_eps) >= 3: + # 4nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 + + x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t_prime, + quantize_denoised, repeat_noise, temperature, noise_dropout) + + return x_prev, pred_x0, e_t diff --git a/nemo/collections/multimodal/modules/__init__.py b/nemo/collections/multimodal/modules/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py new file mode 100644 index 000000000000..5167f5810a3e --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -0,0 +1,376 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import torch +import torch.nn.functional as F +from einops import rearrange, repeat +from inspect import isfunction +from torch import nn, einsum +from torch._dynamo import disable + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import checkpoint + + +def check_cuda(): + if not torch.cuda.is_available(): + raise RuntimeError('CUDA is not available') + cur_device = torch.cuda.current_device() + dprops = torch.cuda.get_device_properties(cur_device) + + is_sm75 = dprops.major == 7 and dprops.minor == 5 + is_sm8x = dprops.major == 8 and dprops.minor >= 0 + is_sm90 = dprops.major == 9 and dprops.minor >= 0 + + return is_sm8x or is_sm75 or is_sm90 + + +try: + from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func + from flash_attn.flash_attention import FlashAttention + + flash_attn_installed = check_cuda() + print("FlashAttention Installed") + + # Disable TorchDynamo on FlashAttention + flash_attn_unpadded_kvpacked_func = disable(flash_attn_unpadded_kvpacked_func) + FlashAttention.forward = disable(FlashAttention.forward) +except ImportError: + flash_attn_installed = False + + +def exists(val): + return val is not None + + +def uniq(arr): + return {el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + if isinstance(d, (torch.Tensor, float, int)): + return d + return d() if isfunction(d) else d + + +def max_neg_value(t): + return -torch.finfo(t.dtype).max + + +def init_(tensor): + dim = tensor.shape[-1] + std = 1 / math.sqrt(dim) + tensor.uniform_(-std, std) + return tensor + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential( + nn.Linear(dim, inner_dim), + nn.GELU() + ) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential( + project_in, + nn.Dropout(dropout), + nn.Linear(inner_dim, dim_out) + ) + + def forward(self, x): + return self.net(x) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def Normalize(in_channels): + return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + + +class LinearAttention(nn.Module): + def __init__(self, dim, heads=4, dim_head=32): + super().__init__() + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) + self.to_out = nn.Conv2d(hidden_dim, dim, 1) + + def forward(self, x): + b, c, h, w = x.shape + qkv = self.to_qkv(x) + q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3) + k = k.softmax(dim=-1) + context = torch.einsum('bhdn,bhen->bhde', k, v) + out = torch.einsum('bhde,bhdn->bhen', context, q) + out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w) + return self.to_out(out) + + +class SpatialSelfAttention(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.k = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.v = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = rearrange(q, 'b c h w -> b (h w) c') + k = rearrange(k, 'b c h w -> b c (h w)') + w_ = torch.einsum('bij,bjk->bik', q, k) + + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = rearrange(v, 'b c h w -> b c (h w)') + w_ = rearrange(w_, 'b i j -> b j i') + h_ = torch.einsum('bij,bjk->bik', v, w_) + h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h) + h_ = self.proj_out(h_) + + return x + h_ + + +# b n (h d) -> (b h) n d +def rearrange_heads_outer(t: torch.Tensor, h: int) -> torch.Tensor: + b, n, ch = t.shape + return t.view(b, n, h, -1).transpose(1, 2).reshape(b * h, n, -1) + + +# (b h) n d -> b n (h d) +def rearrange_heads_inner(t: torch.Tensor, h: int) -> torch.Tensor: + b = t.shape[0] // h + n = t.shape[1] + return t.view(b, h, n, -1).transpose(1, 2).reshape(b, n, -1) + + +class CrossAttention(nn.Module): + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., use_flash_attention=False): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + # make attention part be aware of self-attention/cross-attention + self.context_dim = context_dim + self.query_dim = query_dim + self.dim_head = dim_head + + self.scale = dim_head ** -0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias=False) + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, query_dim), + nn.Dropout(dropout) + ) + self.use_flash_attention = use_flash_attention + + if context_dim == query_dim and dim_head <= 128 and (dim_head % 8) == 0 and flash_attn_installed: + self.flash_attn = FlashAttention(self.scale) + + def forward(self, x, context=None, mask=None): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + out = self._attention(q, k, v, mask) + + return self.to_out(out) + + def _attention(self, q, k, v, mask=None): + h = self.heads + + if not flash_attn_installed or not self.use_flash_attention or q.dtype == torch.float32 or ( + self.dim_head > 128 or (self.dim_head % 8) != 0) or mask is not None: + # original implementation + # b n (h d) -> (b h) n d + q = rearrange_heads_outer(q, h) + k = rearrange_heads_outer(k, h) + v = rearrange_heads_outer(v, h) + + sim = einsum('b i d, b j d -> b i j', q, k) * self.scale + + if exists(mask): + # standard stable diffusion does not run into here + mask = mask.view(mask.shape[0], -1) + b, j = mask.shape + mask = mask.unsqueeze(1).expand(b, h, j).reshape(b * h, 1, j) # b j -> (b h) () j + sim.masked_fill_(~mask, self.max_neg[sim.dtype]) + + # attention, what we cannot get enough of + attn = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', attn, v) + + # (b h) n d -> b n (h d) + out = rearrange_heads_inner(out, h) + elif self.context_dim == self.query_dim: + # self-attention + qkv = torch.stack([q, k, v], dim=2) + b, s, t, hd = qkv.shape + d = hd // h + qkv = qkv.view(b, s, t, h, d) + + out, _ = self.flash_attn(qkv) + out = out.view(b, s, hd) + else: + # cross-attention + kv = torch.stack([k, v], dim=2) + + s_q = q.shape[1] + b, s_kv, t, hd = kv.shape + d = hd // h + + q = q.view(b * s_q, h, d) + kv = kv.view(b * s_kv, t, h, d) + + cu_seqlens_q = torch.arange(0, (b + 1) * s_q, step=s_q, dtype=torch.int32, device=q.device) + cu_seqlens_k = torch.arange(0, (b + 1) * s_kv, step=s_kv, dtype=torch.int32, device=kv.device) + + out = flash_attn_unpadded_kvpacked_func(q, kv, cu_seqlens_q, cu_seqlens_k, s_q, s_kv, 0.0, self.scale) + + out = out.view(b, s_q, hd) + + return out + + +class BasicTransformerBlock(nn.Module): + def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, use_checkpoint=False, + use_flash_attention=False): + super().__init__() + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, + use_flash_attention=use_flash_attention) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, + heads=n_heads, dim_head=d_head, dropout=dropout, + use_flash_attention=use_flash_attention) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.use_checkpoint = use_checkpoint + + def forward(self, x, context=None): + return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) + + def _forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class SpatialTransformer(nn.Module): + """ + Transformer block for image-like data. + First, project the input (aka embedding) + and reshape to b, t, d. + Then apply standard transformer action. + Finally, reshape to image + """ + + def __init__(self, in_channels, n_heads, d_head, + depth=1, dropout=0., context_dim=None, use_checkpoint=False, + use_flash_attention=False): + super().__init__() + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = Normalize(in_channels) + + self.proj_in = nn.Conv2d(in_channels, + inner_dim, + kernel_size=1, + stride=1, + padding=0) + + self.transformer_blocks = nn.ModuleList( + [BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim, + use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention) + for d in range(depth)] + ) + + self.proj_out = zero_module(nn.Conv2d(inner_dim, + in_channels, + kernel_size=1, + stride=1, + padding=0)) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c + x = x.contiguous() # workaround for dynamo ddp bug + for block in self.transformer_blocks: + x = block(x, context=context) + x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w + x = x.contiguous() # workaround for dynamo ddp bug + x = self.proj_out(x) + return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py new file mode 100644 index 000000000000..6f953f817336 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py @@ -0,0 +1,856 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pytorch_diffusion + derived encoder decoder +import math +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange + +from nemo.collections.multimodal.modules.stable_diffusion.attention import LinearAttention +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config + + +def get_timestep_embedding(timesteps, embedding_dim): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: + From Fairseq. + Build sinusoidal embeddings. + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + assert len(timesteps.shape) == 1 + + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb) + emb = emb.to(device=timesteps.device) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb + + +def nonlinearity(x): + # swish + return torch.nn.functional.silu(x) + + +def Normalize(in_channels, num_groups=32): + return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True) + + +class Upsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 + # TODO(yuya): Remove this cast once the issue is fixed in PyTorch + # https://github.com/pytorch/pytorch/issues/86679 + dtype = x.dtype + if dtype == torch.bfloat16: + x = x.to(torch.float32) + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + if dtype == torch.bfloat16: + x = x.to(dtype) + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=3, + stride=2, + padding=0) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) + return x + + +class ResnetBlock(nn.Module): + def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, + dropout, temb_channels=512): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + + self.norm1 = Normalize(in_channels) + self.conv1 = torch.nn.Conv2d(in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + if temb_channels > 0: + self.temb_proj = torch.nn.Linear(temb_channels, + out_channels) + self.norm2 = Normalize(out_channels) + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d(out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = torch.nn.Conv2d(in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + else: + self.nin_shortcut = torch.nn.Conv2d(in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = nonlinearity(h) + h = self.conv1(h) + + if temb is not None: + h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] + + h = self.norm2(h) + h = nonlinearity(h) + h = self.dropout(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + + return x + h + + +class LinAttnBlock(LinearAttention): + """to match AttnBlock usage""" + + def __init__(self, in_channels): + super().__init__(dim=in_channels, heads=1, dim_head=in_channels) + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.k = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.v = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, c, h * w) + q = q.permute(0, 2, 1) # b,hw,c + k = k.reshape(b, c, h * w) # b,c,hw + w_ = torch.bmm(q, k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j] + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = v.reshape(b, c, h * w) + w_ = w_.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q) + h_ = torch.bmm(v, w_) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j] + h_ = h_.reshape(b, c, h, w) + + h_ = self.proj_out(h_) + + return x + h_ + + +def make_attn(in_channels, attn_type="vanilla"): + assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown' + print(f"making attention of type '{attn_type}' with {in_channels} in_channels") + if attn_type == "vanilla": + return AttnBlock(in_channels) + elif attn_type == "none": + return nn.Identity(in_channels) + else: + return LinAttnBlock(in_channels) + + +class Model(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, use_timestep=True, use_linear_attn=False, attn_type="vanilla"): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = self.ch * 4 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + self.use_timestep = use_timestep + if self.use_timestep: + # timestep embedding + self.temb = nn.Module() + self.temb.dense = nn.ModuleList([ + torch.nn.Linear(self.ch, + self.temb_ch), + torch.nn.Linear(self.temb_ch, + self.temb_ch), + ]) + + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, + self.ch, + kernel_size=3, + stride=1, + padding=1) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + skip_in = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + if i_block == self.num_res_blocks: + skip_in = ch * in_ch_mult[i_level] + block.append(ResnetBlock(in_channels=block_in + skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, + out_ch, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x, t=None, context=None): + # assert x.shape[2] == x.shape[3] == self.resolution + if context is not None: + # assume aligned context, cat along channel axis + x = torch.cat((x, context), dim=1) + if self.use_timestep: + # timestep embedding + assert t is not None + temb = get_timestep_embedding(t, self.ch) + temb = self.temb.dense[0](temb) + temb = nonlinearity(temb) + temb = self.temb.dense[1](temb) + else: + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block]( + torch.cat([h, hs.pop()], dim=1), temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + def get_last_layer(self): + return self.conv_out.weight + + +class Encoder(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, z_channels, double_z=True, use_linear_attn=False, attn_type="vanilla", + **ignore_kwargs): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, + self.ch, + kernel_size=3, + stride=1, + padding=1) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, + 2 * z_channels if double_z else z_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + # timestep embedding + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, z_channels, give_pre_end=False, tanh_out=False, use_linear_attn=False, + attn_type="vanilla", **ignorekwargs): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.tanh_out = tanh_out + + # compute in_ch_mult, block_in and curr_res at lowest res + in_ch_mult = (1,) + tuple(ch_mult) + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + print("Working with z of shape {} = {} dimensions.".format( + self.z_shape, np.prod(self.z_shape))) + + # z to block_in + self.conv_in = torch.nn.Conv2d(z_channels, + block_in, + kernel_size=3, + stride=1, + padding=1) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, + out_ch, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, z): + # assert z.shape[1:] == self.z_shape[1:] + self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h, temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + if self.tanh_out: + h = torch.tanh(h) + return h + + +class SimpleDecoder(nn.Module): + def __init__(self, in_channels, out_channels, *args, **kwargs): + super().__init__() + self.model = nn.ModuleList([nn.Conv2d(in_channels, in_channels, 1), + ResnetBlock(in_channels=in_channels, + out_channels=2 * in_channels, + temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=2 * in_channels, + out_channels=4 * in_channels, + temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=4 * in_channels, + out_channels=2 * in_channels, + temb_channels=0, dropout=0.0), + nn.Conv2d(2 * in_channels, in_channels, 1), + Upsample(in_channels, with_conv=True)]) + # end + self.norm_out = Normalize(in_channels) + self.conv_out = torch.nn.Conv2d(in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + for i, layer in enumerate(self.model): + if i in [1, 2, 3]: + x = layer(x, None) + else: + x = layer(x) + + h = self.norm_out(x) + h = nonlinearity(h) + x = self.conv_out(h) + return x + + +class UpsampleDecoder(nn.Module): + def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, + ch_mult=(2, 2), dropout=0.0): + super().__init__() + # upsampling + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + block_in = in_channels + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.res_blocks = nn.ModuleList() + self.upsample_blocks = nn.ModuleList() + for i_level in range(self.num_resolutions): + res_block = [] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + res_block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + self.res_blocks.append(nn.ModuleList(res_block)) + if i_level != self.num_resolutions - 1: + self.upsample_blocks.append(Upsample(block_in, True)) + curr_res = curr_res * 2 + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, + out_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + # upsampling + h = x + for k, i_level in enumerate(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.res_blocks[i_level][i_block](h, None) + if i_level != self.num_resolutions - 1: + h = self.upsample_blocks[k](h) + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class LatentRescaler(nn.Module): + def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2): + super().__init__() + # residual block, interpolate, residual block + self.factor = factor + self.conv_in = nn.Conv2d(in_channels, + mid_channels, + kernel_size=3, + stride=1, + padding=1) + self.res_block1 = nn.ModuleList([ResnetBlock(in_channels=mid_channels, + out_channels=mid_channels, + temb_channels=0, + dropout=0.0) for _ in range(depth)]) + self.attn = AttnBlock(mid_channels) + self.res_block2 = nn.ModuleList([ResnetBlock(in_channels=mid_channels, + out_channels=mid_channels, + temb_channels=0, + dropout=0.0) for _ in range(depth)]) + + self.conv_out = nn.Conv2d(mid_channels, + out_channels, + kernel_size=1, + ) + + def forward(self, x): + x = self.conv_in(x) + for block in self.res_block1: + x = block(x, None) + x = torch.nn.functional.interpolate(x, size=( + int(round(x.shape[2] * self.factor)), int(round(x.shape[3] * self.factor)))) + x = self.attn(x) + for block in self.res_block2: + x = block(x, None) + x = self.conv_out(x) + return x + + +class MergedRescaleEncoder(nn.Module): + def __init__(self, in_channels, ch, resolution, out_ch, num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, + ch_mult=(1, 2, 4, 8), rescale_factor=1.0, rescale_module_depth=1): + super().__init__() + intermediate_chn = ch * ch_mult[-1] + self.encoder = Encoder(in_channels=in_channels, num_res_blocks=num_res_blocks, ch=ch, ch_mult=ch_mult, + z_channels=intermediate_chn, double_z=False, resolution=resolution, + attn_resolutions=attn_resolutions, dropout=dropout, resamp_with_conv=resamp_with_conv, + out_ch=None) + self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=intermediate_chn, + mid_channels=intermediate_chn, out_channels=out_ch, depth=rescale_module_depth) + + def forward(self, x): + x = self.encoder(x) + x = self.rescaler(x) + return x + + +class MergedRescaleDecoder(nn.Module): + def __init__(self, z_channels, out_ch, resolution, num_res_blocks, attn_resolutions, ch, ch_mult=(1, 2, 4, 8), + dropout=0.0, resamp_with_conv=True, rescale_factor=1.0, rescale_module_depth=1): + super().__init__() + tmp_chn = z_channels * ch_mult[-1] + self.decoder = Decoder(out_ch=out_ch, z_channels=tmp_chn, attn_resolutions=attn_resolutions, dropout=dropout, + resamp_with_conv=resamp_with_conv, in_channels=None, num_res_blocks=num_res_blocks, + ch_mult=ch_mult, resolution=resolution, ch=ch) + self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=z_channels, mid_channels=tmp_chn, + out_channels=tmp_chn, depth=rescale_module_depth) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Upsampler(nn.Module): + def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2): + super().__init__() + assert out_size >= in_size + num_blocks = int(np.log2(out_size // in_size)) + 1 + factor_up = 1. + (out_size % in_size) + print( + f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}") + self.rescaler = LatentRescaler(factor=factor_up, in_channels=in_channels, mid_channels=2 * in_channels, + out_channels=in_channels) + self.decoder = Decoder(out_ch=out_channels, resolution=out_size, z_channels=in_channels, num_res_blocks=2, + attn_resolutions=[], in_channels=None, ch=in_channels, + ch_mult=[ch_mult for _ in range(num_blocks)]) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Resize(nn.Module): + def __init__(self, in_channels=None, learned=False, mode="bilinear"): + super().__init__() + self.with_conv = learned + self.mode = mode + if self.with_conv: + print(f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode") + raise NotImplementedError() + assert in_channels is not None + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d(in_channels, + in_channels, + kernel_size=4, + stride=2, + padding=1) + + def forward(self, x, scale_factor=1.0): + if scale_factor == 1.0: + return x + else: + x = torch.nn.functional.interpolate(x, mode=self.mode, align_corners=False, scale_factor=scale_factor) + return x + + +class FirstStagePostProcessor(nn.Module): + + def __init__(self, ch_mult: list, in_channels, + pretrained_model: nn.Module = None, + reshape=False, + n_channels=None, + dropout=0., + pretrained_config=None): + super().__init__() + if pretrained_config is None: + assert pretrained_model is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' + self.pretrained_model = pretrained_model + else: + assert pretrained_config is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' + self.instantiate_pretrained(pretrained_config) + + self.do_reshape = reshape + + if n_channels is None: + n_channels = self.pretrained_model.encoder.ch + + self.proj_norm = Normalize(in_channels, num_groups=in_channels // 2) + self.proj = nn.Conv2d(in_channels, n_channels, kernel_size=3, + stride=1, padding=1) + + blocks = [] + downs = [] + ch_in = n_channels + for m in ch_mult: + blocks.append(ResnetBlock(in_channels=ch_in, out_channels=m * n_channels, dropout=dropout)) + ch_in = m * n_channels + downs.append(Downsample(ch_in, with_conv=False)) + + self.model = nn.ModuleList(blocks) + self.downsampler = nn.ModuleList(downs) + + def instantiate_pretrained(self, config): + model = instantiate_from_config(config) + self.pretrained_model = model.eval() + # self.pretrained_model.train = False + for param in self.pretrained_model.parameters(): + param.requires_grad = False + + @torch.no_grad() + def encode_with_pretrained(self, x): + c = self.pretrained_model.encode(x) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + return c + + def forward(self, x): + z_fs = self.encode_with_pretrained(x) + z = self.proj_norm(z_fs) + z = self.proj(z) + z = nonlinearity(z) + + for submodel, downmodel in zip(self.model, self.downsampler): + z = submodel(z, temb=None) + z = downmodel(z) + + if self.do_reshape: + z = rearrange(z, 'b c h w -> b (h w) c') + return z diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py new file mode 100644 index 000000000000..01090950d790 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -0,0 +1,1208 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import numpy as np +import torch +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from abc import abstractmethod +from functools import partial +from typing import Iterable + +from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + checkpoint, + conv_nd, + linear, + avg_pool_nd, + zero_module, + normalization, + timestep_embedding, +) + + +# dummy replace +def convert_module_to_f16(x): + pass + + +def convert_module_to_f32(x): + pass + + +## go +class AttentionPool2d(nn.Module): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + self.positional_embedding = nn.Parameter(th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + x = x.reshape(b, c, -1) # NC(HW) + x = th.cat([x.mean(dim=-1, keepdim=True), x], dim=-1) # NC(HW+1) + x = x + self.positional_embedding[None, :, :].to(x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb, context=None): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, SpatialTransformer): + x = layer(x, context) + else: + x = layer(x) + return x + + +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=padding) + + def forward(self, x): + assert x.shape[1] == self.channels + + # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 + # TODO(yuya): Remove this cast once the issue is fixed in PyTorch + # https://github.com/pytorch/pytorch/issues/86679 + dtype = x.dtype + if dtype == torch.bfloat16: + x = x.to(torch.float32) + if self.dims == 3: + x = F.interpolate( + x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest" + ) + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if dtype == torch.bfloat16: + x = x.to(dtype) + + if self.use_conv: + x = self.conv(x) + return x + + +class TransposedUpsample(nn.Module): + 'Learned 2x upsampling without padding' + + def __init__(self, channels, out_channels=None, ks=5): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + + self.up = nn.ConvTranspose2d(self.channels, self.out_channels, kernel_size=ks, stride=2) + + def forward(self, x): + return self.up(x) + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd( + dims, self.channels, self.out_channels, 3, stride=stride, padding=padding + ) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + nn.SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + nn.SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + nn.SiLU(), + nn.Dropout(p=dropout), + zero_module( + conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1) + ), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd( + dims, channels, self.out_channels, 3, padding=1 + ) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint( + self._forward, (x, emb), self.parameters(), self.use_checkpoint + ) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb).type(h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Module): + """ + An attention block that allows spatial positions to attend to each other. + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x,), self.parameters(), + True) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! + # return pt_checkpoint(self._forward, x) # pytorch + + def _forward(self, x): + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += th.DoubleTensor([matmul_ops]) + + +class QKVAttentionLegacy(nn.Module): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum( + "bct,bcs->bts", q * scale, k * scale + ) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum("bts,bcs->bct", weight, v) + return a.reshape(bs, -1, length) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Module): + """ + The full UNet model with attention and timestep embedding. + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + from_pretrained: str = None, + from_NeMo=False, + # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF + use_flash_attention: bool = False, + ): + super().__init__() + if use_spatial_transformer: + assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + from omegaconf.listconfig import ListConfig + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( + ch, num_heads, dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( + ch, num_heads, dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=model_channels * mult, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = model_channels * mult + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( + ch, num_heads, dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + ) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) + if resblock_updown + else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), + ) + if self.predict_codebook_ids: + self.id_predictor = nn.Sequential( + normalization(ch), + conv_nd(dims, model_channels, n_embed, 1), + # nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits + ) + from diffusers.modeling_utils import load_state_dict + if from_pretrained is not None: + if from_NeMo: + state_dict = torch.load(from_pretrained, map_location='cpu') + self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) + else: + state_dict = load_state_dict(from_pretrained) + self._load_pretrained_model(state_dict) + + def _input_blocks_mapping(self, input_dict): + res_dict = {} + for key_, value_ in input_dict.items(): + id_0 = int(key_[13]) + if "resnets" in key_: + id_1 = int(key_[23]) + target_id = 3 * id_0 + 1 + id_1 + post_fix = key_[25:].replace('time_emb_proj', 'emb_layers.1') \ + .replace('norm1', 'in_layers.0') \ + .replace('norm2', 'out_layers.0') \ + .replace('conv1', 'in_layers.2') \ + .replace('conv2', 'out_layers.3') \ + .replace('conv_shortcut', 'skip_connection') + res_dict["input_blocks." + str(target_id) + '.0.' + post_fix] = value_ + elif "attentions" in key_: + id_1 = int(key_[26]) + target_id = 3 * id_0 + 1 + id_1 + post_fix = key_[28:] + res_dict["input_blocks." + str(target_id) + '.1.' + post_fix] = value_ + elif "downsamplers" in key_: + post_fix = key_[35:] + target_id = 3 * (id_0 + 1) + res_dict["input_blocks." + str(target_id) + '.0.op.' + post_fix] = value_ + return res_dict + + def _mid_blocks_mapping(self, mid_dict): + res_dict = {} + for key_, value_ in mid_dict.items(): + if "resnets" in key_: + temp_key_ = key_.replace('time_emb_proj', 'emb_layers.1') \ + .replace('norm1', 'in_layers.0') \ + .replace('norm2', 'out_layers.0') \ + .replace('conv1', 'in_layers.2') \ + .replace('conv2', 'out_layers.3') \ + .replace('conv_shortcut', 'skip_connection') \ + .replace('middle_block.resnets.0', 'middle_block.0') \ + .replace('middle_block.resnets.1', 'middle_block.2') + res_dict[temp_key_] = value_ + elif "attentions" in key_: + res_dict[key_.replace('attentions.0', '1')] = value_ + return res_dict + + def _other_blocks_mapping(self, other_dict): + res_dict = {} + for key_, value_ in other_dict.items(): + tmp_key = key_.replace('conv_in', 'input_blocks.0.0') \ + .replace('time_embedding.linear_1', 'time_embed.0') \ + .replace('time_embedding.linear_2', 'time_embed.2') \ + .replace('conv_norm_out', 'out.0') \ + .replace('conv_out', 'out.2') + res_dict[tmp_key] = value_ + return res_dict + + def _output_blocks_mapping(self, output_dict): + res_dict = {} + for key_, value_ in output_dict.items(): + id_0 = int(key_[14]) + if "resnets" in key_: + id_1 = int(key_[24]) + target_id = 3 * id_0 + id_1 + post_fix = key_[26:].replace('time_emb_proj', 'emb_layers.1') \ + .replace('norm1', 'in_layers.0') \ + .replace('norm2', 'out_layers.0') \ + .replace('conv1', 'in_layers.2') \ + .replace('conv2', 'out_layers.3') \ + .replace('conv_shortcut', 'skip_connection') + res_dict["output_blocks." + str(target_id) + '.0.' + post_fix] = value_ + elif "attentions" in key_: + id_1 = int(key_[27]) + target_id = 3 * id_0 + id_1 + post_fix = key_[29:] + res_dict["output_blocks." + str(target_id) + '.1.' + post_fix] = value_ + elif "upsamplers" in key_: + post_fix = key_[34:] + target_id = 3 * (id_0 + 1) - 1 + mid_str = '.2.conv.' if target_id != 2 else '.1.conv.' + res_dict["output_blocks." + str(target_id) + mid_str + post_fix] = value_ + return res_dict + + def _state_key_mapping(self, state_dict: dict): + import re + res_dict = {} + input_dict = {} + mid_dict = {} + output_dict = {} + other_dict = {} + for key_, value_ in state_dict.items(): + if "down_blocks" in key_: + input_dict[key_.replace('down_blocks', 'input_blocks')] = value_ + elif "up_blocks" in key_: + output_dict[key_.replace('up_blocks', 'output_blocks')] = value_ + elif "mid_block" in key_: + mid_dict[key_.replace('mid_block', 'middle_block')] = value_ + else: + other_dict[key_] = value_ + + input_dict = self._input_blocks_mapping(input_dict) + output_dict = self._output_blocks_mapping(output_dict) + mid_dict = self._mid_blocks_mapping(mid_dict) + other_dict = self._other_blocks_mapping(other_dict) + # key_list = state_dict.keys() + # key_str = " ".join(key_list) + + # for key_, val_ in state_dict.items(): + # key_ = key_.replace("down_blocks", "input_blocks")\ + # .replace("up_blocks", 'output_blocks') + # res_dict[key_] = val_ + res_dict.update(input_dict) + res_dict.update(output_dict) + res_dict.update(mid_dict) + res_dict.update(other_dict) + + return res_dict + + def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from_NeMo=False): + if from_NeMo: + state_dict = self._strip_unet_key_prefix(state_dict) + else: + state_dict = self._state_key_mapping(state_dict) + model_state_dict = self.state_dict() + loaded_keys = [k for k in state_dict.keys()] + expected_keys = list(model_state_dict.keys()) + original_loaded_keys = loaded_keys + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + def _find_mismatched_keys( + state_dict, + model_state_dict, + loaded_keys, + ignore_mismatched_sizes, + ): + mismatched_keys = [] + if ignore_mismatched_sizes: + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + + if ( + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + ): + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + if state_dict is not None: + # Whole checkpoint + mismatched_keys = _find_mismatched_keys( + state_dict, + model_state_dict, + original_loaded_keys, + ignore_mismatched_sizes, + ) + error_msgs = self._load_state_dict_into_model(state_dict) + return missing_keys, unexpected_keys, mismatched_keys, error_msgs + + # TODO MMY maybe combine these cases of key prefix + def _strip_unet_key_prefix(self, state_dict): + re_state_dict = {} + for key_, value_ in state_dict.items(): + if key_.startswith('model.diffusion_model'): + re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ + if key_.startswith('model.model.diffusion_model'): + re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ + if key_.startswith('model._orig_mod.diffusion_model.'): + re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model.model._orig_mod.diffusion_model.'): + re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ + return re_state_dict + + def _load_state_dict_into_model(self, state_dict): + # Convert old format to new format if needed from a PyTorch state_dict + # copy state_dict so _load_from_state_dict can modify it + state_dict = state_dict.copy() + error_msgs = [] + + # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants + # so we need to apply the function recursively. + def load(module: torch.nn.Module, prefix=""): + args = (state_dict, prefix, {}, True, [], [], error_msgs) + module._load_from_state_dict(*args) + + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + ".") + + load(self) + + return error_msgs + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.input_blocks.apply(convert_module_to_f16) + self.middle_block.apply(convert_module_to_f16) + self.output_blocks.apply(convert_module_to_f16) + + def convert_to_fp32(self): + """ + Convert the torso of the model to float32. + """ + self.input_blocks.apply(convert_module_to_f32) + self.middle_block.apply(convert_module_to_f32) + self.output_blocks.apply(convert_module_to_f32) + + def forward(self, x, timesteps=None, context=None, y=None, **kwargs): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param context: conditioning plugged in via crossattn + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == ( + self.num_classes is not None + ), "must specify y if and only if the model is class-conditional" + hs = [] + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + if self.num_classes is not None: + assert y.shape == (x.shape[0],) + emb = emb + self.label_emb(y) + + # future support + if self.dtype == th.float32: + self.dtype == x.dtype + + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + for module in self.output_blocks: + h = th.cat([h, hs.pop()], dim=1) + h = module(h, emb, context) + h = h.type(self.dtype) + if self.predict_codebook_ids: + return self.id_predictor(h) + else: + return self.out(h) + + +class EncoderUNetModel(nn.Module): + """ + The half UNet model with attention and timestep embedding. + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + *args, + **kwargs + ): + super().__init__() + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + nn.AdaptiveAvgPool2d((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + AttentionPool2d( + (image_size // ds), ch, num_head_channels, out_channels + ), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + nn.SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.input_blocks.apply(convert_module_to_f16) + self.middle_block.apply(convert_module_to_f16) + + def convert_to_fp32(self): + """ + Convert the torso of the model to float32. + """ + self.input_blocks.apply(convert_module_to_f32) + self.middle_block.apply(convert_module_to_f32) + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels), use_fp16=self.use_fp16) + + # future support + if self.dtype == th.float32: + self.dtype == x.dtype + + results = [] + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = th.cat(results, axis=-1) + return self.out(h) + else: + h = h.type(x.dtype) + return self.out(h) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py new file mode 100644 index 000000000000..d50545106da5 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -0,0 +1,276 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# adopted from +# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py +# and +# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +# and +# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py +# +# thanks! + + +import math +import numpy as np +import os +import torch +import torch.nn as nn +from einops import repeat +from torch._dynamo import disable + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == "linear": + betas = ( + torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + ) + + elif schedule == "cosine": + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + ) + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "sqrt_linear": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + elif schedule == "sqrt": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): + if ddim_discr_method == 'uniform': + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == 'quad': + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * .8), num_ddim_timesteps)) ** 2).astype(int) + else: + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f'Selected timesteps for ddim sampler: {steps_out}') + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) + if verbose: + print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') + print(f'For the chosen value of eta, which is {eta}, ' + f'this results in the following sigma_t schedule for ddim sampler {sigmas}') + return sigmas, alphas, alphas_prev + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +def extract_into_tensor(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t) + return out.reshape(b, *((1,) * (len(x_shape) - 1))) + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +# Temporary hack to get rid of TorchDynamo issue with DDP +# TODO: remove this if https://github.com/pytorch/pytorch/issues/94574 fixed +@disable +def get_idx(end, device): + return torch.arange(start=0, end=end, dtype=torch.float32, device=device) + + +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False): + """ + Create sinusoidal timestep embeddings. + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + if not repeat_only: + half = dim // 2 + idx = get_idx(half, timesteps.device) + freqs = torch.exp(-math.log(max_period) / half * idx) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + else: + embedding = repeat(timesteps, 'b -> b d', d=dim) + if use_fp16: + return embedding.half() + else: + return embedding + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +# PyTorch 1.7 has SiLU, but we support PyTorch 1.5. +class SiLU(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + def forward(self, x): + return super().forward(x.float()).type(x.dtype) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py new file mode 100644 index 000000000000..096b3acf94f3 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py @@ -0,0 +1,105 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch + + +class AbstractDistribution: + def sample(self): + raise NotImplementedError() + + def mode(self): + raise NotImplementedError() + + +class DiracDistribution(AbstractDistribution): + def __init__(self, value): + self.value = value + + def sample(self): + return self.value + + def mode(self): + return self.value + + +class DiagonalGaussianDistribution(object): + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) + self.logvar = torch.clamp(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = torch.exp(0.5 * self.logvar) + self.var = torch.exp(self.logvar) + if self.deterministic: + self.var = self.std = torch.zeros_like(self.mean, device=self.parameters.device) + + def sample(self): + x = self.mean + self.std * torch.randn(self.mean.shape, device=self.parameters.device) + return x + + def kl(self, other=None): + if self.deterministic: + return torch.Tensor([0.]) + else: + if other is None: + return 0.5 * torch.sum(torch.pow(self.mean, 2) + + self.var - 1.0 - self.logvar, + dim=[1, 2, 3]) + else: + return 0.5 * torch.sum( + torch.pow(self.mean - other.mean, 2) / other.var + + self.var / other.var - 1.0 - self.logvar + other.logvar, + dim=[1, 2, 3]) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return torch.Tensor([0.]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * torch.sum( + logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, + dim=dims) + + def mode(self): + return self.mean + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 + Compute the KL divergence between two gaussians. + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, torch.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for torch.exp(). + logvar1, logvar2 = [ + x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) + for x in (logvar1, logvar2) + ] + + return 0.5 * ( + -1.0 + + logvar2 + - logvar1 + + torch.exp(logvar1 - logvar2) + + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) + ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py new file mode 100644 index 000000000000..10d9a03362f4 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -0,0 +1,212 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import kornia +import torch +import torch.nn as nn +from einops import rearrange, repeat +from functools import partial +from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextConfig +from transformers.models.clip.modeling_clip import CLIPTextTransformer + +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder, \ + TransformerWrapper # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test + + +class AbstractEncoder(nn.Module): + def __init__(self): + super().__init__() + + def encode(self, *args, **kwargs): + raise NotImplementedError + + +class ClassEmbedder(nn.Module): + def __init__(self, embed_dim, n_classes=1000, key='class'): + super().__init__() + self.key = key + self.embedding = nn.Embedding(n_classes, embed_dim) + + def forward(self, batch, key=None): + if key is None: + key = self.key + # this is for use in crossattn + c = batch[key][:, None] + c = self.embedding(c) + return c + + +class TransformerEmbedder(AbstractEncoder): + """Some transformer encoder layers""" + + def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"): + super().__init__() + self.device = device + self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len, + attn_layers=Encoder(dim=n_embed, depth=n_layer)) + + def forward(self, tokens): + tokens = tokens.to(self.device) # meh + z = self.transformer(tokens, return_embeddings=True) + return z + + def encode(self, x): + return self(x) + + +class BERTTokenizer(AbstractEncoder): + """ Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)""" + + def __init__(self, device="cuda", vq_interface=True, max_length=77): + super().__init__() + from transformers import BertTokenizerFast # TODO: add to reuquirements + self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") + self.device = device + self.vq_interface = vq_interface + self.max_length = max_length + + def forward(self, text): + batch_encoding = self.tokenizer(text, truncation=True, max_length=self.max_length, return_length=True, + return_overflowing_tokens=False, padding="max_length", return_tensors="pt") + tokens = batch_encoding["input_ids"].to(self.device) + return tokens + + @torch.no_grad() + def encode(self, text): + tokens = self(text) + if not self.vq_interface: + return tokens + return None, None, [None, None, tokens] + + def decode(self, text): + return text + + +class BERTEmbedder(AbstractEncoder): + """Uses the BERT tokenizr model and add some transformer encoder layers""" + + def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77, + device="cuda", use_tokenizer=True, embedding_dropout=0.0): + super().__init__() + self.use_tknz_fn = use_tokenizer + if self.use_tknz_fn: + self.tknz_fn = BERTTokenizer(vq_interface=False, max_length=max_seq_len) + self.device = device + self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len, + attn_layers=Encoder(dim=n_embed, depth=n_layer), + emb_dropout=embedding_dropout) + + def forward(self, text): + if self.use_tknz_fn: + tokens = self.tknz_fn(text) # .to(self.device) + else: + tokens = text + z = self.transformer(tokens, return_embeddings=True) + return z + + def encode(self, text): + # output of length 77 + return self(text) + + +class SpatialRescaler(nn.Module): + def __init__(self, + n_stages=1, + method='bilinear', + multiplier=0.5, + in_channels=3, + out_channels=None, + bias=False): + super().__init__() + self.n_stages = n_stages + assert self.n_stages >= 0 + assert method in ['nearest', 'linear', 'bilinear', 'trilinear', 'bicubic', 'area'] + self.multiplier = multiplier + self.interpolator = partial(torch.nn.functional.interpolate, mode=method) + self.remap_output = out_channels is not None + if self.remap_output: + print(f'Spatial Rescaler mapping from {in_channels} to {out_channels} channels after resizing.') + self.channel_mapper = nn.Conv2d(in_channels, out_channels, 1, bias=bias) + + def forward(self, x): + for stage in range(self.n_stages): + x = self.interpolator(x, scale_factor=self.multiplier) + + if self.remap_output: + x = self.channel_mapper(x) + return x + + def encode(self, x): + return self(x) + + +class CLIPTextModelZero(CLIPTextModel): + config_class = CLIPTextConfig + + def __init__(self, config: CLIPTextConfig): + super().__init__(config) + self.text_model = CLIPTextTransformerZero(config) + + +class CLIPTextTransformerZero(CLIPTextTransformer): + def _build_causal_attention_mask(self, bsz, seq_len, dtype, device=None): # TODO mmy check dtype + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(bsz, seq_len, seq_len, device=device) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + mask = mask.unsqueeze(1) # expand mask + return mask.half() + + +class FrozenCLIPEmbedder(AbstractEncoder): + """Uses the CLIP transformer encoder for text (from Hugging Face)""" + + def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, use_fp16=False): + super().__init__() + self.tokenizer = CLIPTokenizer.from_pretrained(version) + if use_fp16: + self.transformer = CLIPTextModelZero.from_pretrained(version) + else: + self.transformer = CLIPTextModel.from_pretrained(version) + self.device = device + self.max_length = max_length + self.freeze() + + def freeze(self): + self.transformer = self.transformer.eval() + for param in self.parameters(): + param.requires_grad = False + + def forward(self, text): + batch_encoding = self.tokenizer(text, truncation=True, max_length=self.max_length, return_length=True, + return_overflowing_tokens=False, padding="max_length", return_tensors="pt") + tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) + outputs = self.transformer(input_ids=tokens) + + z = outputs.last_hidden_state + + # # Pad the seq length to multiple of 8 + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def encode(self, text): + return self(text) + + +if __name__ == "__main__": + from ldm.util import count_params + + model = FrozenCLIPEmbedder() + count_params(model, verbose=True) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py new file mode 100644 index 000000000000..69fa52e2b352 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py @@ -0,0 +1,655 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""shout-out to https://github.com/lucidrains/x-transformers/tree/main/x_transformers""" +import torch +import torch.nn.functional as F +from collections import namedtuple +from einops import rearrange, repeat, reduce +from functools import partial +from inspect import isfunction +from torch import nn, einsum + +# constants + +DEFAULT_DIM_HEAD = 64 + +Intermediates = namedtuple('Intermediates', [ + 'pre_softmax_attn', + 'post_softmax_attn' +]) + +LayerIntermediates = namedtuple('Intermediates', [ + 'hiddens', + 'attn_intermediates' +]) + + +class AbsolutePositionalEmbedding(nn.Module): + def __init__(self, dim, max_seq_len): + super().__init__() + self.emb = nn.Embedding(max_seq_len, dim) + self.init_() + + def init_(self): + nn.init.normal_(self.emb.weight, std=0.02) + + def forward(self, x): + n = torch.arange(x.shape[1], device=x.device) + return self.emb(n)[None, :, :] + + +class FixedPositionalEmbedding(nn.Module): + def __init__(self, dim): + super().__init__() + inv_freq = 1. / (10000 ** (torch.arange(0, dim, 2).float() / dim)) + self.register_buffer('inv_freq', inv_freq) + + def forward(self, x, seq_dim=1, offset=0): + t = torch.arange(x.shape[seq_dim], device=x.device).type_as(self.inv_freq) + offset + sinusoid_inp = torch.einsum('i , j -> i j', t, self.inv_freq) + emb = torch.cat((sinusoid_inp.sin(), sinusoid_inp.cos()), dim=-1) + return emb[None, :, :] + + +# helpers + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def always(val): + def inner(*args, **kwargs): + return val + + return inner + + +def not_equals(val): + def inner(x): + return x != val + + return inner + + +def equals(val): + def inner(x): + return x == val + + return inner + + +def max_neg_value(tensor): + return -torch.finfo(tensor.dtype).max + + +# keyword argument helpers + +def pick_and_pop(keys, d): + values = list(map(lambda key: d.pop(key), keys)) + return dict(zip(keys, values)) + + +def group_dict_by_key(cond, d): + return_val = [dict(), dict()] + for key in d.keys(): + match = bool(cond(key)) + ind = int(not match) + return_val[ind][key] = d[key] + return (*return_val,) + + +def string_begins_with(prefix, str): + return str.startswith(prefix) + + +def group_by_key_prefix(prefix, d): + return group_dict_by_key(partial(string_begins_with, prefix), d) + + +def groupby_prefix_and_trim(prefix, d): + kwargs_with_prefix, kwargs = group_dict_by_key(partial(string_begins_with, prefix), d) + kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix):], x[1]), tuple(kwargs_with_prefix.items()))) + return kwargs_without_prefix, kwargs + + +# classes +class Scale(nn.Module): + def __init__(self, value, fn): + super().__init__() + self.value = value + self.fn = fn + + def forward(self, x, **kwargs): + x, *rest = self.fn(x, **kwargs) + return (x * self.value, *rest) + + +class Rezero(nn.Module): + def __init__(self, fn): + super().__init__() + self.fn = fn + self.g = nn.Parameter(torch.zeros(1)) + + def forward(self, x, **kwargs): + x, *rest = self.fn(x, **kwargs) + return (x * self.g, *rest) + + +class ScaleNorm(nn.Module): + def __init__(self, dim, eps=1e-5): + super().__init__() + self.scale = dim ** -0.5 + self.eps = eps + self.g = nn.Parameter(torch.ones(1)) + + def forward(self, x): + norm = torch.norm(x, dim=-1, keepdim=True) * self.scale + return x / norm.clamp(min=self.eps) * self.g + + +class RMSNorm(nn.Module): + def __init__(self, dim, eps=1e-8): + super().__init__() + self.scale = dim ** -0.5 + self.eps = eps + self.g = nn.Parameter(torch.ones(dim)) + + def forward(self, x): + norm = torch.norm(x, dim=-1, keepdim=True) * self.scale + return x / norm.clamp(min=self.eps) * self.g + + +class Residual(nn.Module): + def forward(self, x, residual): + return x + residual + + +class GRUGating(nn.Module): + def __init__(self, dim): + super().__init__() + self.gru = nn.GRUCell(dim, dim) + + def forward(self, x, residual): + gated_output = self.gru( + rearrange(x, 'b n d -> (b n) d'), + rearrange(residual, 'b n d -> (b n) d') + ) + + return gated_output.reshape_as(x) + + +# feedforward + +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential( + nn.Linear(dim, inner_dim), + nn.GELU() + ) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential( + project_in, + nn.Dropout(dropout), + nn.Linear(inner_dim, dim_out) + ) + + def forward(self, x): + return self.net(x) + + +# attention. +class Attention(nn.Module): + def __init__( + self, + dim, + dim_head=DEFAULT_DIM_HEAD, + heads=8, + causal=False, + mask=None, + talking_heads=False, + sparse_topk=None, + use_entmax15=False, + num_mem_kv=0, + dropout=0., + on_attn=False + ): + super().__init__() + if use_entmax15: + raise NotImplementedError("Check out entmax activation instead of softmax activation!") + self.scale = dim_head ** -0.5 + self.heads = heads + self.causal = causal + self.mask = mask + + inner_dim = dim_head * heads + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_k = nn.Linear(dim, inner_dim, bias=False) + self.to_v = nn.Linear(dim, inner_dim, bias=False) + self.dropout = nn.Dropout(dropout) + + # talking heads + self.talking_heads = talking_heads + if talking_heads: + self.pre_softmax_proj = nn.Parameter(torch.randn(heads, heads)) + self.post_softmax_proj = nn.Parameter(torch.randn(heads, heads)) + + # explicit topk sparse attention + self.sparse_topk = sparse_topk + + # entmax + # self.attn_fn = entmax15 if use_entmax15 else F.softmax + self.attn_fn = F.softmax + + # add memory key / values + self.num_mem_kv = num_mem_kv + if num_mem_kv > 0: + self.mem_k = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) + self.mem_v = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) + + # attention on attention + self.attn_on_attn = on_attn + self.to_out = nn.Sequential(nn.Linear(inner_dim, dim * 2), nn.GLU()) if on_attn else nn.Linear(inner_dim, dim) + + def forward( + self, + x, + context=None, + mask=None, + context_mask=None, + rel_pos=None, + sinusoidal_emb=None, + prev_attn=None, + mem=None + ): + b, n, _, h, talking_heads, device = *x.shape, self.heads, self.talking_heads, x.device + kv_input = default(context, x) + + q_input = x + k_input = kv_input + v_input = kv_input + + if exists(mem): + k_input = torch.cat((mem, k_input), dim=-2) + v_input = torch.cat((mem, v_input), dim=-2) + + if exists(sinusoidal_emb): + # in shortformer, the query would start at a position offset depending on the past cached memory + offset = k_input.shape[-2] - q_input.shape[-2] + q_input = q_input + sinusoidal_emb(q_input, offset=offset) + k_input = k_input + sinusoidal_emb(k_input) + + q = self.to_q(q_input) + k = self.to_k(k_input) + v = self.to_v(v_input) + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), (q, k, v)) + + input_mask = None + if any(map(exists, (mask, context_mask))): + q_mask = default(mask, lambda: torch.ones((b, n), device=device).bool()) + k_mask = q_mask if not exists(context) else context_mask + k_mask = default(k_mask, lambda: torch.ones((b, k.shape[-2]), device=device).bool()) + q_mask = rearrange(q_mask, 'b i -> b () i ()') + k_mask = rearrange(k_mask, 'b j -> b () () j') + input_mask = q_mask * k_mask + + if self.num_mem_kv > 0: + mem_k, mem_v = map(lambda t: repeat(t, 'h n d -> b h n d', b=b), (self.mem_k, self.mem_v)) + k = torch.cat((mem_k, k), dim=-2) + v = torch.cat((mem_v, v), dim=-2) + if exists(input_mask): + input_mask = F.pad(input_mask, (self.num_mem_kv, 0), value=True) + + dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale + mask_value = max_neg_value(dots) + + if exists(prev_attn): + dots = dots + prev_attn + + pre_softmax_attn = dots + + if talking_heads: + dots = einsum('b h i j, h k -> b k i j', dots, self.pre_softmax_proj).contiguous() + + if exists(rel_pos): + dots = rel_pos(dots) + + if exists(input_mask): + dots.masked_fill_(~input_mask, mask_value) + del input_mask + + if self.causal: + i, j = dots.shape[-2:] + r = torch.arange(i, device=device) + mask = rearrange(r, 'i -> () () i ()') < rearrange(r, 'j -> () () () j') + mask = F.pad(mask, (j - i, 0), value=False) + dots.masked_fill_(mask, mask_value) + del mask + + if exists(self.sparse_topk) and self.sparse_topk < dots.shape[-1]: + top, _ = dots.topk(self.sparse_topk, dim=-1) + vk = top[..., -1].unsqueeze(-1).expand_as(dots) + mask = dots < vk + dots.masked_fill_(mask, mask_value) + del mask + + attn = self.attn_fn(dots, dim=-1) + post_softmax_attn = attn + + attn = self.dropout(attn) + + if talking_heads: + attn = einsum('b h i j, h k -> b k i j', attn, self.post_softmax_proj).contiguous() + + out = einsum('b h i j, b h j d -> b h i d', attn, v) + out = rearrange(out, 'b h n d -> b n (h d)') + + intermediates = Intermediates( + pre_softmax_attn=pre_softmax_attn, + post_softmax_attn=post_softmax_attn + ) + + return self.to_out(out), intermediates + + +class AttentionLayers(nn.Module): + def __init__( + self, + dim, + depth, + heads=8, + causal=False, + cross_attend=False, + only_cross=False, + use_scalenorm=False, + use_rmsnorm=False, + use_rezero=False, + rel_pos_num_buckets=32, + rel_pos_max_distance=128, + position_infused_attn=False, + custom_layers=None, + sandwich_coef=None, + par_ratio=None, + residual_attn=False, + cross_residual_attn=False, + macaron=False, + pre_norm=True, + gate_residual=False, + **kwargs + ): + super().__init__() + ff_kwargs, kwargs = groupby_prefix_and_trim('ff_', kwargs) + attn_kwargs, _ = groupby_prefix_and_trim('attn_', kwargs) + + dim_head = attn_kwargs.get('dim_head', DEFAULT_DIM_HEAD) + + self.dim = dim + self.depth = depth + self.layers = nn.ModuleList([]) + + self.has_pos_emb = position_infused_attn + self.pia_pos_emb = FixedPositionalEmbedding(dim) if position_infused_attn else None + self.rotary_pos_emb = always(None) + + assert rel_pos_num_buckets <= rel_pos_max_distance, 'number of relative position buckets must be less than the relative position max distance' + self.rel_pos = None + + self.pre_norm = pre_norm + + self.residual_attn = residual_attn + self.cross_residual_attn = cross_residual_attn + + norm_class = ScaleNorm if use_scalenorm else nn.LayerNorm + norm_class = RMSNorm if use_rmsnorm else norm_class + norm_fn = partial(norm_class, dim) + + norm_fn = nn.Identity if use_rezero else norm_fn + branch_fn = Rezero if use_rezero else None + + if cross_attend and not only_cross: + default_block = ('a', 'c', 'f') + elif cross_attend and only_cross: + default_block = ('c', 'f') + else: + default_block = ('a', 'f') + + if macaron: + default_block = ('f',) + default_block + + if exists(custom_layers): + layer_types = custom_layers + elif exists(par_ratio): + par_depth = depth * len(default_block) + assert 1 < par_ratio <= par_depth, 'par ratio out of range' + default_block = tuple(filter(not_equals('f'), default_block)) + par_attn = par_depth // par_ratio + depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper + par_width = (depth_cut + depth_cut // par_attn) // par_attn + assert len(default_block) <= par_width, 'default block is too large for par_ratio' + par_block = default_block + ('f',) * (par_width - len(default_block)) + par_head = par_block * par_attn + layer_types = par_head + ('f',) * (par_depth - len(par_head)) + elif exists(sandwich_coef): + assert sandwich_coef > 0 and sandwich_coef <= depth, 'sandwich coefficient should be less than the depth' + layer_types = ('a',) * sandwich_coef + default_block * (depth - sandwich_coef) + ('f',) * sandwich_coef + else: + layer_types = default_block * depth + + self.layer_types = layer_types + self.num_attn_layers = len(list(filter(equals('a'), layer_types))) + + for layer_type in self.layer_types: + if layer_type == 'a': + layer = Attention(dim, heads=heads, causal=causal, **attn_kwargs) + elif layer_type == 'c': + layer = Attention(dim, heads=heads, **attn_kwargs) + elif layer_type == 'f': + layer = FeedForward(dim, **ff_kwargs) + layer = layer if not macaron else Scale(0.5, layer) + else: + raise Exception(f'invalid layer type {layer_type}') + + if isinstance(layer, Attention) and exists(branch_fn): + layer = branch_fn(layer) + + if gate_residual: + residual_fn = GRUGating(dim) + else: + residual_fn = Residual() + + self.layers.append(nn.ModuleList([ + norm_fn(), + layer, + residual_fn + ])) + + def forward( + self, + x, + context=None, + mask=None, + context_mask=None, + mems=None, + return_hiddens=False + ): + hiddens = [] + intermediates = [] + prev_attn = None + prev_cross_attn = None + + mems = mems.copy() if exists(mems) else [None] * self.num_attn_layers + + for ind, (layer_type, (norm, block, residual_fn)) in enumerate(zip(self.layer_types, self.layers)): + is_last = ind == (len(self.layers) - 1) + + if layer_type == 'a': + hiddens.append(x) + layer_mem = mems.pop(0) + + residual = x + + if self.pre_norm: + x = norm(x) + + if layer_type == 'a': + out, inter = block(x, mask=mask, sinusoidal_emb=self.pia_pos_emb, rel_pos=self.rel_pos, + prev_attn=prev_attn, mem=layer_mem) + elif layer_type == 'c': + out, inter = block(x, context=context, mask=mask, context_mask=context_mask, prev_attn=prev_cross_attn) + elif layer_type == 'f': + out = block(x) + + x = residual_fn(out, residual) + + if layer_type in ('a', 'c'): + intermediates.append(inter) + + if layer_type == 'a' and self.residual_attn: + prev_attn = inter.pre_softmax_attn + elif layer_type == 'c' and self.cross_residual_attn: + prev_cross_attn = inter.pre_softmax_attn + + if not self.pre_norm and not is_last: + x = norm(x) + + if return_hiddens: + intermediates = LayerIntermediates( + hiddens=hiddens, + attn_intermediates=intermediates + ) + + return x, intermediates + + return x + + +class Encoder(AttentionLayers): + def __init__(self, **kwargs): + assert 'causal' not in kwargs, 'cannot set causality on encoder' + super().__init__(causal=False, **kwargs) + + +class TransformerWrapper(nn.Module): + def __init__( + self, + *, + num_tokens, + max_seq_len, + attn_layers, + emb_dim=None, + max_mem_len=0., + emb_dropout=0., + num_memory_tokens=None, + tie_embedding=False, + use_pos_emb=True + ): + super().__init__() + assert isinstance(attn_layers, AttentionLayers), 'attention layers must be one of Encoder or Decoder' + + dim = attn_layers.dim + emb_dim = default(emb_dim, dim) + + self.max_seq_len = max_seq_len + self.max_mem_len = max_mem_len + self.num_tokens = num_tokens + + self.token_emb = nn.Embedding(num_tokens, emb_dim) + self.pos_emb = AbsolutePositionalEmbedding(emb_dim, max_seq_len) if ( + use_pos_emb and not attn_layers.has_pos_emb) else always(0) + self.emb_dropout = nn.Dropout(emb_dropout) + + self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity() + self.attn_layers = attn_layers + self.norm = nn.LayerNorm(dim) + + self.init_() + + self.to_logits = nn.Linear(dim, num_tokens) if not tie_embedding else lambda t: t @ self.token_emb.weight.t() + + # memory tokens (like [cls]) from Memory Transformers paper + num_memory_tokens = default(num_memory_tokens, 0) + self.num_memory_tokens = num_memory_tokens + if num_memory_tokens > 0: + self.memory_tokens = nn.Parameter(torch.randn(num_memory_tokens, dim)) + + # let funnel encoder know number of memory tokens, if specified + if hasattr(attn_layers, 'num_memory_tokens'): + attn_layers.num_memory_tokens = num_memory_tokens + + def init_(self): + nn.init.normal_(self.token_emb.weight, std=0.02) + + def forward( + self, + x, + return_embeddings=False, + mask=None, + return_mems=False, + return_attn=False, + mems=None, + **kwargs + ): + b, n, device, num_mem = *x.shape, x.device, self.num_memory_tokens + x = self.token_emb(x) + x += self.pos_emb(x) + x = self.emb_dropout(x) + + x = self.project_emb(x) + + if num_mem > 0: + mem = repeat(self.memory_tokens, 'n d -> b n d', b=b) + x = torch.cat((mem, x), dim=1) + + # auto-handle masking after appending memory tokens + if exists(mask): + mask = F.pad(mask, (num_mem, 0), value=True) + + x, intermediates = self.attn_layers(x, mask=mask, mems=mems, return_hiddens=True, **kwargs) + x = self.norm(x) + + mem, x = x[:, :num_mem], x[:, num_mem:] + + out = self.to_logits(x) if not return_embeddings else x + + if return_mems: + hiddens = intermediates.hiddens + new_mems = list(map(lambda pair: torch.cat(pair, dim=-2), zip(mems, hiddens))) if exists(mems) else hiddens + new_mems = list(map(lambda t: t[..., -self.max_mem_len:, :].detach(), new_mems)) + return out, new_mems + + if return_attn: + attn_maps = list(map(lambda t: t.post_softmax_attn, intermediates.attn_intermediates)) + return out, attn_maps + + return out diff --git a/nemo/collections/multimodal/parts/__init__.py b/nemo/collections/multimodal/parts/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/stable_diffusion/__init__.py b/nemo/collections/multimodal/parts/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py new file mode 100644 index 000000000000..e72249b1ff0e --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py @@ -0,0 +1,113 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + + +class LambdaWarmUpCosineScheduler: + """ + note: use with a base_lr of 1.0 + """ + + def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0): + self.lr_warm_up_steps = warm_up_steps + self.lr_start = lr_start + self.lr_min = lr_min + self.lr_max = lr_max + self.lr_max_decay_steps = max_decay_steps + self.last_lr = 0. + self.verbosity_interval = verbosity_interval + + def schedule(self, n, **kwargs): + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") + if n < self.lr_warm_up_steps: + lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start + self.last_lr = lr + return lr + else: + t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) + t = min(t, 1.0) + lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * ( + 1 + np.cos(t * np.pi)) + self.last_lr = lr + return lr + + def __call__(self, n, **kwargs): + return self.schedule(n, **kwargs) + + +class LambdaWarmUpCosineScheduler2: + """ + supports repeated iterations, configurable via lists + note: use with a base_lr of 1.0. + """ + + def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0): + assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths) + self.lr_warm_up_steps = warm_up_steps + self.f_start = f_start + self.f_min = f_min + self.f_max = f_max + self.cycle_lengths = cycle_lengths + self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths)) + self.last_f = 0. + self.verbosity_interval = verbosity_interval + + def find_in_interval(self, n): + interval = 0 + for cl in self.cum_cycles[1:]: + if n <= cl: + return interval + interval += 1 + + def schedule(self, n, **kwargs): + cycle = self.find_in_interval(n) + n = n - self.cum_cycles[cycle] + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " + f"current cycle {cycle}") + if n < self.lr_warm_up_steps[cycle]: + f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] + self.last_f = f + return f + else: + t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]) + t = min(t, 1.0) + f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * ( + 1 + np.cos(t * np.pi)) + self.last_f = f + return f + + def __call__(self, n, **kwargs): + return self.schedule(n, **kwargs) + + +class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): + + def schedule(self, n, **kwargs): + cycle = self.find_in_interval(n) + n = n - self.cum_cycles[cycle] + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " + f"current cycle {cycle}") + + if n < self.lr_warm_up_steps[cycle]: + f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] + self.last_f = f + return f + else: + f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / ( + self.cycle_lengths[cycle]) + self.last_f = f + return f diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py new file mode 100644 index 000000000000..72d9bff4bacd --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -0,0 +1,185 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pickle +import time +import torch +from PIL import Image +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) + + +def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): + c = cond_stage_model.encode(batch_size * [prompt]) + if unconditional_guidance_scale != 1.: + uc = cond_stage_model.encode(batch_size * [""]) + else: + uc = None + return c, uc + + +def initialize_sampler(model, sampler_type): + if sampler_type == 'DDIM': + sampler = DDIMSampler(model) + elif sampler_type == 'PLMS': + sampler = PLMSSampler(model) + else: + raise ValueError(f'Sampler {sampler_type} is not supported for {cls.__name__}') + return sampler + + +def decode_images(model, samples): + images = model.decode_first_stage(samples) + + images = torch.clamp((images + 1.) / 2., min=0., max=1.) + + return images + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def torch_to_numpy(images): + numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] + return numpy_images + + +def pipeline(model, cfg, verbose=True, rng=None): + # setup default values for inference configs + unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + batch_size = cfg.infer.get('num_images_per_prompt', 1) + prompts = cfg.infer.get('prompts', []) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + sampler_type = cfg.infer.get('sampler_type', 'DDIM') + inference_steps = cfg.infer.get('inference_steps', 50) + output_type = cfg.infer.get('output_type', 'pil') + save_to_file = cfg.infer.get('save_to_file', True) + out_path = cfg.infer.get('out_path', '') + eta = cfg.infer.get('eta', 0) + + # get autocast_dtype + if cfg.trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(cfg.trainer.precision) == 32: + autocast_dtype = torch.float + elif int(cfg.trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + + in_channels = model.model.diffusion_model.in_channels + + sampler = initialize_sampler(model, sampler_type.upper()) + + output = [] + throughput = [] + + if isinstance(prompts, str): + prompts = [prompts] + + for prompt in prompts: + tic = time.perf_counter() + tic_total = tic + cond, u_cond = encode_prompt(model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size) + toc = time.perf_counter() + conditioning_time = toc - tic + + latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] + latents = torch.randn( + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], + generator=rng).to( + torch.cuda.current_device()) + + tic = time.perf_counter() + samples, intermediates = sampler.sample( + S=inference_steps, + conditioning=cond, + batch_size=batch_size, + shape=latent_shape, + verbose=False, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=u_cond, + eta=eta, + x_T=latents + ) + toc = time.perf_counter() + sampling_time = toc - tic + + tic = time.perf_counter() + images = decode_images(model, samples) + toc = time.perf_counter() + decode_time = toc - tic + + toc_total = time.perf_counter() + total_time = toc_total - tic_total + output.append(images) + + throughput.append({ + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + }) + + # Convert output type and save to disk + if output_type == 'torch': + output = torch.cat(output, dim=0) + else: + output = torch_to_numpy(output) + if output_type == 'pil': + output = [numpy_to_pil(x) for x in output] + + if save_to_file: + os.makedirs(out_path, exist_ok=True) + if output_type == 'pil': + for text_prompt, pils in zip(prompts, output): + for idx, image in enumerate(pils): + image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) + else: + with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: + pickle.dump(output, f) + else: + return output + + ave_metrics = {} + for key in throughput[0].keys(): + ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) + if verbose: + print(ave_metrics) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/utils.py b/nemo/collections/multimodal/parts/stable_diffusion/utils.py new file mode 100644 index 000000000000..5c204f0af1fc --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/utils.py @@ -0,0 +1,214 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import multiprocessing as mp +import numpy as np +import torch +from PIL import Image, ImageDraw, ImageFont +from collections import abc +from einops import rearrange +from functools import partial +from inspect import isfunction +from queue import Queue +from threading import Thread + + +def log_txt_as_img(wh, xc, size=10): + # wh a tuple of (width, height) + # xc a list of captions to plot + b = len(xc) + txts = list() + for bi in range(b): + txt = Image.new("RGB", wh, color="white") + draw = ImageDraw.Draw(txt) + font = ImageFont.truetype('data/DejaVuSans.ttf', size=size) + nc = int(40 * (wh[0] / 256)) + lines = "\n".join(xc[bi][start:start + nc] for start in range(0, len(xc[bi]), nc)) + + try: + draw.text((0, 0), lines, fill="black", font=font) + except UnicodeEncodeError: + print("Cant encode string for logging. Skipping.") + + txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 + txts.append(txt) + txts = np.stack(txts) + txts = torch.tensor(txts) + return txts + + +def ismap(x): + if not isinstance(x, torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] > 3) + + +def isimage(x): + if not isinstance(x, torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1) + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def mean_flat(tensor): + """ + https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86 + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def count_params(model, verbose=False): + total_params = sum(p.numel() for p in model.parameters()) + if verbose: + print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.") + return total_params + + +def instantiate_from_config(config): + if not "target" in config: + if config == '__is_first_stage__': + return None + elif config == "__is_unconditional__": + return None + raise KeyError("Expected key `target` to instantiate.") + return get_obj_from_str(config["target"])(**config.get("params", dict())) + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + print(f'Getting module=<{module}>, cls=<{cls}>') + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + +def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False): + # create dummy dataset instance + + # run prefetching + if idx_to_fn: + res = func(data, worker_id=idx) + else: + res = func(data) + Q.put([idx, res]) + Q.put("Done") + + +def parallel_data_prefetch( + func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False +): + # if target_data_type not in ["ndarray", "list"]: + # raise ValueError( + # "Data, which is passed to parallel_data_prefetch has to be either of type list or ndarray." + # ) + if isinstance(data, np.ndarray) and target_data_type == "list": + raise ValueError("list expected but function got ndarray.") + elif isinstance(data, abc.Iterable): + if isinstance(data, dict): + print( + f'WARNING:"data" argument passed to parallel_data_prefetch is a dict: Using only its values and disregarding keys.' + ) + data = list(data.values()) + if target_data_type == "ndarray": + data = np.asarray(data) + else: + data = list(data) + else: + raise TypeError( + f"The data, that shall be processed parallel has to be either an np.ndarray or an Iterable, but is actually {type(data)}." + ) + + if cpu_intensive: + Q = mp.Queue(1000) + proc = mp.Process + else: + Q = Queue(1000) + proc = Thread + # spawn processes + if target_data_type == "ndarray": + arguments = [ + [func, Q, part, i, use_worker_id] + for i, part in enumerate(np.array_split(data, n_proc)) + ] + else: + step = ( + int(len(data) / n_proc + 1) + if len(data) % n_proc != 0 + else int(len(data) / n_proc) + ) + arguments = [ + [func, Q, part, i, use_worker_id] + for i, part in enumerate( + [data[i: i + step] for i in range(0, len(data), step)] + ) + ] + processes = [] + for i in range(n_proc): + p = proc(target=_do_parallel_data_prefetch, args=arguments[i]) + processes += [p] + + # start processes + print(f"Start prefetching...") + import time + + start = time.time() + gather_res = [[] for _ in range(n_proc)] + try: + for p in processes: + p.start() + + k = 0 + while k < n_proc: + # get result + res = Q.get() + if res == "Done": + k += 1 + else: + gather_res[res[0]] = res[1] + + except Exception as e: + print("Exception: ", e) + for p in processes: + p.terminate() + + raise e + finally: + for p in processes: + p.join() + print(f"Prefetching complete. [{time.time() - start} sec.]") + + if target_data_type == 'ndarray': + if not isinstance(gather_res[0], np.ndarray): + return np.concatenate([np.asarray(r) for r in gather_res], axis=0) + + # order outputs + return np.concatenate(gather_res, axis=0) + elif target_data_type == 'list': + out = [] + for r in gather_res: + out.extend(r) + return out + else: + return gather_res diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py new file mode 100644 index 000000000000..53b61023efba --- /dev/null +++ b/nemo/collections/multimodal/parts/utils.py @@ -0,0 +1,138 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import torch +from PIL import Image +from apex.transformer import parallel_state +from omegaconf import OmegaConf, DictConfig, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from typing import Dict, Tuple, Any, Callable + +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def randn_like(x, generator=None): + return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) + + +def setup_trainer_and_model_for_inference( + model_provider: Any, + cfg: DictConfig, + model_cfg_modifier: Callable, +) -> Tuple[Trainer, Any]: + """ + Set up a trainer and NeMo model for inference. + + Args: + model_provider (Any): An object that provides the NeMo model. + cfg (DictConfig): The configuration dictionary, containing the + necessary settings for the trainer and the model. + model_cfg_modifier (Callable): A function that modifies the model + configuration for inference. + + Returns: + Tuple[Trainer, Any]: A tuple containing the trainer and the model. + """ + + # Check if we need to use the TorchElasticEnvironment plugin for the trainer. + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # Use the NLPDDPStrategy for the distributed data parallel strategy. + # We don't use DDP for async grad allreduce and don't find unused parameters. + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, + find_unused_parameters=False, + ) + + # Set up the trainer with the specified plugins and strategy. + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + # Create the NLPSaveRestoreConnector object for model saving and restoring. + save_restore_connector = NLPSaveRestoreConnector() + + if cfg.model.restore_from_path.endswith(".nemo"): + # Set the model_extracted_dir attribute if the restore path is a directory. + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + # Restore the model configuration from the specified path and modify it for inference. + model_cfg = model_provider.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + with open_dict(model_cfg): + model_cfg_modifier(model_cfg) # modify the configuration for inference + + # assert ( + # cfg.trainer.devices * cfg.trainer.num_nodes + # == model_cfg.get("tensor_model_parallel_size", 1) * model_cfg.get("pipeline_model_parallel_size", 1) + # ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # Restore the model from the specified path and configuration, and set it up for inference. + model = model_provider.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + elif cfg.model.restore_from_path.endswith(".ckpt"): + logging.warning( + "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!") + + model = model_provider.load_from_checkpoint( + cfg.model.restore_from_path, + hparams_file=cfg.model.get("hparams_file"), + trainer=trainer, + ) + + else: + raise ValueError(f"Unrecognized checkpoint type: {cfg.model.restore_from_path}") + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + model = model.cuda() # move the model to the GPU + model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients + + # Return the trainer and model objects. + return trainer, model diff --git a/nemo/collections/nlp/data/language_modeling/megatron/megatron_batch_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/megatron_batch_samplers.py index c9791bc0147d..8b06ac951a66 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/megatron_batch_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/megatron_batch_samplers.py @@ -76,8 +76,6 @@ def __init__( # Sanity checks. if total_samples <= 0: raise RuntimeError("no sample to consume: {}".format(total_samples)) - if consumed_samples >= total_samples: - raise RuntimeError("no samples left to consume: {}, {}".format(consumed_samples, total_samples)) if micro_batch_size <= 0: raise RuntimeError(f"micro_batch_size size must be greater than 0, but {micro_batch_size}") if data_parallel_size <= 0: @@ -198,6 +196,13 @@ def __init__( ) self.last_batch_size = self.total_samples % self._global_batch_size + def __len__(self): + num_available_samples = self.total_samples + if self.drop_last: + return num_available_samples // self.global_batch_size + else: + return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size + def __iter__(self): active_total_samples = self.total_samples - self.last_batch_size self.epoch = self.consumed_samples // active_total_samples diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py index a9c659e48696..ad918b3a5e3a 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py @@ -83,9 +83,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # TODO does not support PP yet self.model = self.model_provider_func(pre_process=True, post_process=True, add_encoder=True, add_decoder=True) - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -273,7 +273,7 @@ def training_step(self, batch, batch_idx): if self.with_distributed_adam: # gradients are reduced internally in distributed optimizer pass - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: # while async grad allreduce is enabled, bprop will keep moving forward without waiting for # the finish of async grad AR works. Hence, to guarantee the correctness of grads reduction, # we cannot start weight update until all async grad AR works are done. diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index b3c08dff7ae8..629b4d8b7217 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -147,7 +147,7 @@ def forward( return output, encoder_input def load_frozen_model(self, cfg, trainer): - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) # TODO: Fix this once apex patches FusedScaledMaskedSoftmax. # This is a workaround for the fact that `masked_softmax_fusion` has issues with certain input sizes that may be present while finetuning. @@ -159,7 +159,7 @@ def load_frozen_model(self, cfg, trainer): t5_cfg.decoder.masked_softmax_fusion = False else: t5_cfg.masked_softmax_fusion = False - t5_cfg.megatron_amp_O2 = self.megatron_amp_o2 + t5_cfg.megatron_amp_O2 = self.megatron_amp_O2 # hack to make the _GLOBAL_NUM_MICROBATCHES_CALCULATOR initialize t5_cfg.micro_batch_size = cfg.get('micro_batch_size', 4) t5_cfg.global_batch_size = cfg.get('global_batch_size', 4) diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index e6480362bc85..979214d1971f 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -135,7 +135,7 @@ def __init__( raise NotImplementedError("row_init_method should be zero, normal or xavier") if norm_type == 'mixedfusedlayernorm': - self.layer_norm = MixedFusedLayerNorm(in_features, 1e-5, sequence_parallel_enbaled=False) + self.layer_norm = MixedFusedLayerNorm(in_features, 1e-5, sequence_parallel_enabled=False) elif norm_type == 'layernorm': self.layer_norm = nn.LayerNorm(in_features) else: diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index 5c2267a25e44..5b91a10eb264 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -673,7 +673,7 @@ def __init__( super(CoreAttention, self).__init__() self.precision = precision - self.fp16 = precision == 16 + self.fp16 = (precision == 16 or precision == '16') self.bf16 = precision == 'bf16' self.multi_query_attention = multi_query_attention diff --git a/nemo/collections/nlp/modules/common/megatron/fused_layer_norm.py b/nemo/collections/nlp/modules/common/megatron/fused_layer_norm.py index 0a5eebe42d11..6adab73dd8c6 100644 --- a/nemo/collections/nlp/modules/common/megatron/fused_layer_norm.py +++ b/nemo/collections/nlp/modules/common/megatron/fused_layer_norm.py @@ -59,4 +59,4 @@ def get_layer_norm(hidden_size, eps=1e-5, persist_layer_norm=False, sequence_par if persist_layer_norm: return FastLayerNorm(hidden_size, eps, sequence_parallel_enabled=sequence_parallel) else: - return MixedFusedLayerNorm(hidden_size, eps, sequence_parallel_enbaled=sequence_parallel) + return MixedFusedLayerNorm(hidden_size, eps, sequence_parallel_enabled=sequence_parallel) diff --git a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py index 3dc0a00c55bd..2c914a67dd12 100644 --- a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py +++ b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py @@ -51,9 +51,10 @@ def forward_torch_softmax(self, input, mask): input = input * self.scale mask_output = self.mask_func(input, mask) if mask is not None else input probs = torch.nn.Softmax(dim=-1)(mask_output) - all_k_masked = mask.all(axis=-1) - zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None] - probs = probs * zero_attention_mask + if mask is not None: + all_k_masked = mask.all(axis=-1) + zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None] + probs = probs * zero_attention_mask if self.input_in_float16 and self.softmax_in_fp32: if self.input_in_fp16: diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index 0ab2ae79bed1..aa8a8a08da64 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -273,6 +273,13 @@ def __init__( # Initialize the position embeddings. self.init_method(self.position_embeddings.weight) + if self.position_embedding_type == 'learned_parameters': + # Position embedding (learn parameters directly). + self.position_embeddings = torch.nn.Parameter(torch.empty(max_sequence_length, self.hidden_size)) + self._position_embeddings_key = 'position_embeddings' + # Initialize the position embeddings. + self.init_method(self.position_embeddings) + # Token type embedding. # Add this as an optional field that can be added through # method call so we can load a pretrain model without @@ -323,6 +330,8 @@ def forward(self, input_ids, position_ids=None, token_type_ids=None): assert position_ids is not None position_embeddings = self.position_embeddings(position_ids) embeddings = words_embeddings + position_embeddings + elif self.position_embedding_type == 'learned_parameters': + embeddings = words_embeddings + self.position_embeddings else: embeddings = words_embeddings if token_type_ids is not None: diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py index 52464b819c2f..7d36716f928d 100644 --- a/nemo/collections/nlp/modules/common/megatron/module.py +++ b/nemo/collections/nlp/modules/common/megatron/module.py @@ -262,7 +262,7 @@ def __init__(self, module, precision): super().__init__() self.precision = precision - if precision == 16: + if precision == 16 or precision == '16': self.add_module('module', module.half()) def float16_converter(val): diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 3b11eb838a2f..391760ac54cb 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -111,7 +111,7 @@ def configure_ddp(self): Sets find_unused_parameters to False to use activation-checkpoint-recomputation. """ - if (hasattr(self.model, 'megatron_amp_o2') and self.model.megatron_amp_o2) or ( + if (hasattr(self.model, 'megatron_amp_O2') and self.model.megatron_amp_O2) or ( hasattr(self.model, 'with_distributed_adam') and self.model.with_distributed_adam ): # do not use DDP if using megatron amp O2 or distributed optimizer @@ -349,6 +349,26 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict + # compatibility for inductor in inference + if not conf.get('inductor', False): + new_state_dict = {} + for key in state_dict.keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = state_dict[key] + state_dict = new_state_dict + + # Modify state key for Dreambooth inference + if conf.get('target') == 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion': + new_state_dict = {} + for key in state_dict.keys(): + new_key = key.replace('unet', 'model.diffusion_model') + new_key = new_key.replace('vae', 'first_stage_model') + new_key = new_key.replace('text_encoder', 'cond_stage_model') + new_key = new_key.replace('.noise_scheduler', '') + new_state_dict[new_key] = state_dict[key] + state_dict = new_state_dict + + return state_dict def restore_from( diff --git a/nemo/collections/vision/__init__.py b/nemo/collections/vision/__init__.py new file mode 100644 index 000000000000..edf1849f216f --- /dev/null +++ b/nemo/collections/vision/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.collections.vision import data, losses, models, modules +from nemo.package_info import __version__ + +# Set collection version equal to NeMo version. +__version = __version__ + +# Authorship. +__author__ = "NVIDIA Corporation" + +# Set collection name. +__description__ = "Computer Vision collection" +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/__init__.py b/nemo/collections/vision/data/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/imagenet_classnames.py b/nemo/collections/vision/data/imagenet_classnames.py new file mode 100644 index 000000000000..2b15b544df0f --- /dev/null +++ b/nemo/collections/vision/data/imagenet_classnames.py @@ -0,0 +1,179 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +imagenet_classnames = ["tench", "goldfish", "great white shark", "tiger shark", "hammerhead shark", "electric ray", + "stingray", "rooster", "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", + "indigo bunting", "American robin", "bulbul", "jay", "magpie", "chickadee", "American dipper", + "kite (bird of prey)", "bald eagle", "vulture", "great grey owl", "fire salamander", + "smooth newt", "newt", "spotted salamander", "axolotl", "American bullfrog", "tree frog", + "tailed frog", "loggerhead sea turtle", "leatherback sea turtle", "mud turtle", "terrapin", + "box turtle", "banded gecko", "green iguana", "Carolina anole", + "desert grassland whiptail lizard", "agama", "frilled-necked lizard", "alligator lizard", + "Gila monster", "European green lizard", "chameleon", "Komodo dragon", "Nile crocodile", + "American alligator", "triceratops", "worm snake", "ring-necked snake", + "eastern hog-nosed snake", "smooth green snake", "kingsnake", "garter snake", "water snake", + "vine snake", "night snake", "boa constrictor", "African rock python", "Indian cobra", + "green mamba", "sea snake", "Saharan horned viper", "eastern diamondback rattlesnake", + "sidewinder rattlesnake", "trilobite", "harvestman", "scorpion", "yellow garden spider", + "barn spider", "European garden spider", "southern black widow", "tarantula", "wolf spider", + "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse", "prairie grouse", "peafowl", + "quail", "partridge", "african grey parrot", "macaw", "sulphur-crested cockatoo", "lorikeet", + "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "duck", + "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", + "koala", "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", + "snail", "slug", "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", + "fiddler crab", "red king crab", "American lobster", "spiny lobster", "crayfish", "hermit crab", + "isopod", "white stork", "black stork", "spoonbill", "flamingo", "little blue heron", + "great egret", "bittern bird", "crane bird", "limpkin", "common gallinule", "American coot", + "bustard", "ruddy turnstone", "dunlin", "common redshank", "dowitcher", "oystercatcher", + "pelican", "king penguin", "albatross", "grey whale", "killer whale", "dugong", "sea lion", + "Chihuahua", "Japanese Chin", "Maltese", "Pekingese", "Shih Tzu", "King Charles Spaniel", + "Papillon", "toy terrier", "Rhodesian Ridgeback", "Afghan Hound", "Basset Hound", "Beagle", + "Bloodhound", "Bluetick Coonhound", "Black and Tan Coonhound", "Treeing Walker Coonhound", + "English foxhound", "Redbone Coonhound", "borzoi", "Irish Wolfhound", "Italian Greyhound", + "Whippet", "Ibizan Hound", "Norwegian Elkhound", "Otterhound", "Saluki", "Scottish Deerhound", + "Weimaraner", "Staffordshire Bull Terrier", "American Staffordshire Terrier", + "Bedlington Terrier", "Border Terrier", "Kerry Blue Terrier", "Irish Terrier", + "Norfolk Terrier", "Norwich Terrier", "Yorkshire Terrier", "Wire Fox Terrier", + "Lakeland Terrier", "Sealyham Terrier", "Airedale Terrier", "Cairn Terrier", + "Australian Terrier", "Dandie Dinmont Terrier", "Boston Terrier", "Miniature Schnauzer", + "Giant Schnauzer", "Standard Schnauzer", "Scottish Terrier", "Tibetan Terrier", + "Australian Silky Terrier", "Soft-coated Wheaten Terrier", "West Highland White Terrier", + "Lhasa Apso", "Flat-Coated Retriever", "Curly-coated Retriever", "Golden Retriever", + "Labrador Retriever", "Chesapeake Bay Retriever", "German Shorthaired Pointer", "Vizsla", + "English Setter", "Irish Setter", "Gordon Setter", "Brittany dog", "Clumber Spaniel", + "English Springer Spaniel", "Welsh Springer Spaniel", "Cocker Spaniel", "Sussex Spaniel", + "Irish Water Spaniel", "Kuvasz", "Schipperke", "Groenendael dog", "Malinois", "Briard", + "Australian Kelpie", "Komondor", "Old English Sheepdog", "Shetland Sheepdog", "collie", + "Border Collie", "Bouvier des Flandres dog", "Rottweiler", "German Shepherd Dog", "Dobermann", + "Miniature Pinscher", "Greater Swiss Mountain Dog", "Bernese Mountain Dog", + "Appenzeller Sennenhund", "Entlebucher Sennenhund", "Boxer", "Bullmastiff", "Tibetan Mastiff", + "French Bulldog", "Great Dane", "St. Bernard", "husky", "Alaskan Malamute", "Siberian Husky", + "Dalmatian", "Affenpinscher", "Basenji", "pug", "Leonberger", "Newfoundland dog", + "Great Pyrenees dog", "Samoyed", "Pomeranian", "Chow Chow", "Keeshond", "brussels griffon", + "Pembroke Welsh Corgi", "Cardigan Welsh Corgi", "Toy Poodle", "Miniature Poodle", + "Standard Poodle", "Mexican hairless dog (xoloitzcuintli)", "grey wolf", "Alaskan tundra wolf", + "red wolf or maned wolf", "coyote", "dingo", "dhole", "African wild dog", "hyena", "red fox", + "kit fox", "Arctic fox", "grey fox", "tabby cat", "tiger cat", "Persian cat", "Siamese cat", + "Egyptian Mau", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", + "cheetah", "brown bear", "American black bear", "polar bear", "sloth bear", "mongoose", + "meerkat", "tiger beetle", "ladybug", "ground beetle", "longhorn beetle", "leaf beetle", + "dung beetle", "rhinoceros beetle", "weevil", "fly", "bee", "ant", "grasshopper", + "cricket insect", "stick insect", "cockroach", "praying mantis", "cicada", "leafhopper", + "lacewing", "dragonfly", "damselfly", "red admiral butterfly", "ringlet butterfly", + "monarch butterfly", "small white butterfly", "sulphur butterfly", "gossamer-winged butterfly", + "starfish", "sea urchin", "sea cucumber", "cottontail rabbit", "hare", "Angora rabbit", + "hamster", "porcupine", "fox squirrel", "marmot", "beaver", "guinea pig", "common sorrel horse", + "zebra", "pig", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo", "bison", + "ram (adult male sheep)", "bighorn sheep", "Alpine ibex", "hartebeest", "impala (antelope)", + "gazelle", "arabian camel", "llama", "weasel", "mink", "European polecat", + "black-footed ferret", "otter", "skunk", "badger", "armadillo", "three-toed sloth", "orangutan", + "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas monkey", "baboon", "macaque", + "langur", "black-and-white colobus", "proboscis monkey", "marmoset", "white-headed capuchin", + "howler monkey", "titi monkey", "Geoffroy's spider monkey", "common squirrel monkey", + "ring-tailed lemur", "indri", "Asian elephant", "African bush elephant", "red panda", + "giant panda", "snoek fish", "eel", "silver salmon", "rock beauty fish", "clownfish", + "sturgeon", "gar fish", "lionfish", "pufferfish", "abacus", "abaya", "academic gown", + "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance", + "amphibious vehicle", "analog clock", "apiary", "apron", "trash can", "assault rifle", + "backpack", "bakery", "balance beam", "balloon", "ballpoint pen", "Band-Aid", "banjo", + "baluster / handrail", "barbell", "barber chair", "barbershop", "barn", "barometer", "barrel", + "wheelbarrow", "baseball", "basketball", "bassinet", "bassoon", "swimming cap", "bath towel", + "bathtub", "station wagon", "lighthouse", "beaker", "military hat (bearskin or shako)", + "beer bottle", "beer glass", "bell tower", "baby bib", "tandem bicycle", "bikini", + "ring binder", "binoculars", "birdhouse", "boathouse", "bobsleigh", "bolo tie", "poke bonnet", + "bookcase", "bookstore", "bottle cap", "hunting bow", "bow tie", "brass memorial plaque", "bra", + "breakwater", "breastplate", "broom", "bucket", "buckle", "bulletproof vest", + "high-speed train", "butcher shop", "taxicab", "cauldron", "candle", "cannon", "canoe", + "can opener", "cardigan", "car mirror", "carousel", "tool kit", "cardboard box / carton", + "car wheel", "automated teller machine", "cassette", "cassette player", "castle", "catamaran", + "CD player", "cello", "mobile phone", "chain", "chain-link fence", "chain mail", "chainsaw", + "storage chest", "chiffonier", "bell or wind chime", "china cabinet", "Christmas stocking", + "church", "movie theater", "cleaver", "cliff dwelling", "cloak", "clogs", "cocktail shaker", + "coffee mug", "coffeemaker", "spiral or coil", "combination lock", "computer keyboard", + "candy store", "container ship", "convertible", "corkscrew", "cornet", "cowboy boot", + "cowboy hat", "cradle", "construction crane", "crash helmet", "crate", "infant bed", + "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", + "rotary dial telephone", "diaper", "digital clock", "digital watch", "dining table", + "dishcloth", "dishwasher", "disc brake", "dock", "dog sled", "dome", "doormat", "drilling rig", + "drum", "drumstick", "dumbbell", "Dutch oven", "electric fan", "electric guitar", + "electric locomotive", "entertainment center", "envelope", "espresso machine", "face powder", + "feather boa", "filing cabinet", "fireboat", "fire truck", "fire screen", "flagpole", "flute", + "folding chair", "football helmet", "forklift", "fountain", "fountain pen", "four-poster bed", + "freight car", "French horn", "frying pan", "fur coat", "garbage truck", + "gas mask or respirator", "gas pump", "goblet", "go-kart", "golf ball", "golf cart", "gondola", + "gong", "gown", "grand piano", "greenhouse", "radiator grille", "grocery store", "guillotine", + "hair clip", "hair spray", "half-track", "hammer", "hamper", "hair dryer", "hand-held computer", + "handkerchief", "hard disk drive", "harmonica", "harp", "combine harvester", "hatchet", + "holster", "home theater", "honeycomb", "hook", "hoop skirt", "gymnastic horizontal bar", + "horse-drawn vehicle", "hourglass", "iPod", "clothes iron", "carved pumpkin", "jeans", "jeep", + "T-shirt", "jigsaw puzzle", "rickshaw", "joystick", "kimono", "knee pad", "knot", "lab coat", + "ladle", "lampshade", "laptop computer", "lawn mower", "lens cap", "letter opener", "library", + "lifeboat", "lighter", "limousine", "ocean liner", "lipstick", "slip-on shoe", "lotion", + "music speaker", "loupe magnifying glass", "sawmill", "magnetic compass", "messenger bag", + "mailbox", "tights", "one-piece bathing suit", "manhole cover", "maraca", "marimba", "mask", + "matchstick", "maypole", "maze", "measuring cup", "medicine cabinet", "megalith", "microphone", + "microwave oven", "military uniform", "milk can", "minibus", "miniskirt", "minivan", "missile", + "mitten", "mixing bowl", "mobile home", "ford model t", "modem", "monastery", "monitor", + "moped", "mortar and pestle", "graduation cap", "mosque", "mosquito net", "vespa", + "mountain bike", "tent", "computer mouse", "mousetrap", "moving van", "muzzle", "metal nail", + "neck brace", "necklace", "baby pacifier", "notebook computer", "obelisk", "oboe", "ocarina", + "odometer", "oil filter", "pipe organ", "oscilloscope", "overskirt", "bullock cart", + "oxygen mask", "product packet / packaging", "paddle", "paddle wheel", "padlock", "paintbrush", + "pajamas", "palace", "pan flute", "paper towel", "parachute", "parallel bars", "park bench", + "parking meter", "railroad car", "patio", "payphone", "pedestal", "pencil case", + "pencil sharpener", "perfume", "Petri dish", "photocopier", "plectrum", "Pickelhaube", + "picket fence", "pickup truck", "pier", "piggy bank", "pill bottle", "pillow", "ping-pong ball", + "pinwheel", "pirate ship", "drink pitcher", "block plane", "planetarium", "plastic bag", + "plate rack", "farm plow", "plunger", "Polaroid camera", "pole", "police van", "poncho", + "pool table", "soda bottle", "plant pot", "potter's wheel", "power drill", "prayer rug", + "printer", "prison", "missile", "projector", "hockey puck", "punching bag", "purse", "quill", + "quilt", "race car", "racket", "radiator", "radio", "radio telescope", "rain barrel", + "recreational vehicle", "fishing casting reel", "reflex camera", "refrigerator", + "remote control", "restaurant", "revolver", "rifle", "rocking chair", "rotisserie", "eraser", + "rugby ball", "ruler measuring stick", "sneaker", "safe", "safety pin", "salt shaker", "sandal", + "sarong", "saxophone", "scabbard", "weighing scale", "school bus", "schooner", "scoreboard", + "CRT monitor", "screw", "screwdriver", "seat belt", "sewing machine", "shield", "shoe store", + "shoji screen / room divider", "shopping basket", "shopping cart", "shovel", "shower cap", + "shower curtain", "ski", "balaclava ski mask", "sleeping bag", "slide rule", "sliding door", + "slot machine", "snorkel", "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", + "solar thermal collector", "sombrero", "soup bowl", "keyboard space bar", "space heater", + "space shuttle", "spatula", "motorboat", "spider web", "spindle", "sports car", "spotlight", + "stage", "steam locomotive", "through arch bridge", "steel drum", "stethoscope", "scarf", + "stone wall", "stopwatch", "stove", "strainer", "tram", "stretcher", "couch", "stupa", + "submarine", "suit", "sundial", "sunglasses", "sunglasses", "sunscreen", "suspension bridge", + "mop", "sweatshirt", "swim trunks / shorts", "swing", "electrical switch", "syringe", + "table lamp", "tank", "tape player", "teapot", "teddy bear", "television", "tennis ball", + "thatched roof", "front curtain", "thimble", "threshing machine", "throne", "tile roof", + "toaster", "tobacco shop", "toilet seat", "torch", "totem pole", "tow truck", "toy store", + "tractor", "semi-trailer truck", "tray", "trench coat", "tricycle", "trimaran", "tripod", + "triumphal arch", "trolleybus", "trombone", "hot tub", "turnstile", "typewriter keyboard", + "umbrella", "unicycle", "upright piano", "vacuum cleaner", "vase", "vaulted or arched ceiling", + "velvet fabric", "vending machine", "vestment", "viaduct", "violin", "volleyball", + "waffle iron", "wall clock", "wallet", "wardrobe", "military aircraft", "sink", + "washing machine", "water bottle", "water jug", "water tower", "whiskey jug", "whistle", + "hair wig", "window screen", "window shade", "Windsor tie", "wine bottle", "airplane wing", + "wok", "wooden spoon", "wool", "split-rail fence", "shipwreck", "sailboat", "yurt", "website", + "comic book", "crossword", "traffic or street sign", "traffic light", "dust jacket", "menu", + "plate", "guacamole", "consomme", "hot pot", "trifle", "ice cream", "popsicle", "baguette", + "bagel", "pretzel", "cheeseburger", "hot dog", "mashed potatoes", "cabbage", "broccoli", + "cauliflower", "zucchini", "spaghetti squash", "acorn squash", "butternut squash", "cucumber", + "artichoke", "bell pepper", "cardoon", "mushroom", "Granny Smith apple", "strawberry", "orange", + "lemon", "fig", "pineapple", "banana", "jackfruit", "cherimoya (custard apple)", "pomegranate", + "hay", "carbonara", "chocolate syrup", "dough", "meatloaf", "pizza", "pot pie", "burrito", + "red wine", "espresso", "tea cup", "eggnog", "mountain", "bubble", "cliff", "coral reef", + "geyser", "lakeshore", "promontory", "sandbar", "beach", "valley", "volcano", "baseball player", + "bridegroom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn", + "rose hip", "horse chestnut seed", "coral fungus", "agaric", "gyromitra", "stinkhorn mushroom", + "earth star fungus", "hen of the woods mushroom", "bolete", "corn cob", "toilet paper"] diff --git a/nemo/collections/vision/data/megatron/__init__.py b/nemo/collections/vision/data/megatron/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/data/megatron/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/megatron/autoaugment.py b/nemo/collections/vision/data/megatron/autoaugment.py new file mode 100644 index 000000000000..7038a7434d51 --- /dev/null +++ b/nemo/collections/vision/data/megatron/autoaugment.py @@ -0,0 +1,305 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Code adapted from https://github.com/DeepVoltaire/AutoAugment. + +This module implements the fixed AutoAugment data augmentation policy for ImageNet provided in +Appendix A, Table 9 of reference [1]. It does not include any of the search code for augmentation +policies. + +Reference: +[1] https://arxiv.org/abs/1805.09501 +""" + +import numpy as np +import random +from PIL import Image +from PIL import ImageEnhance +from PIL import ImageOps + +_MAX_LEVEL = 10 # Maximum integer strength of an augmentation, if applicable. + + +class ImageNetPolicy: + """Definition of an ImageNetPolicy. + + Implements a fixed AutoAugment data augmentation policy targeted at + ImageNet training by randomly applying at runtime one of the 25 pre-defined + data augmentation sub-policies provided in Reference [1]. + + Usage example as a Pytorch Transform: + >>> transform=transforms.Compose([transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + """Initialize an ImageNetPolicy. + + Args: + fillcolor (tuple): RGB color components of the color to be used for + filling when needed (default: (128, 128, 128), which + corresponds to gray). + """ + # Instantiate a list of sub-policies. + # Each entry of the list is a SubPolicy which consists of + # two augmentation operations, + # each of those parametrized as operation, probability, magnitude. + # Those two operations are applied sequentially on the image upon call. + self.policies = [ + SubPolicy("posterize", 0.4, 8, "rotate", 0.6, 9, fillcolor), + SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), + SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), + SubPolicy("posterize", 0.6, 7, "posterize", 0.6, 6, fillcolor), + SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), + SubPolicy("equalize", 0.4, 4, "rotate", 0.8, 8, fillcolor), + SubPolicy("solarize", 0.6, 3, "equalize", 0.6, 7, fillcolor), + SubPolicy("posterize", 0.8, 5, "equalize", 1.0, 2, fillcolor), + SubPolicy("rotate", 0.2, 3, "solarize", 0.6, 8, fillcolor), + SubPolicy("equalize", 0.6, 8, "posterize", 0.4, 6, fillcolor), + SubPolicy("rotate", 0.8, 8, "color", 0.4, 0, fillcolor), + SubPolicy("rotate", 0.4, 9, "equalize", 0.6, 2, fillcolor), + SubPolicy("equalize", 0.0, 7, "equalize", 0.8, 8, fillcolor), + SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), + SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), + SubPolicy("rotate", 0.8, 8, "color", 1.0, 2, fillcolor), + SubPolicy("color", 0.8, 8, "solarize", 0.8, 7, fillcolor), + SubPolicy("sharpness", 0.4, 7, "invert", 0.6, 8, fillcolor), + SubPolicy("shearX", 0.6, 5, "equalize", 1.0, 9, fillcolor), + SubPolicy("color", 0.4, 0, "equalize", 0.6, 3, fillcolor), + SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), + SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), + SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), + SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), + SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), + ] + + def __call__(self, img): + """Define call method for ImageNetPolicy class.""" + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + """Define repr method for ImageNetPolicy class.""" + return "ImageNetPolicy" + + +class SubPolicy: + """Definition of a SubPolicy. + + A SubPolicy consists of two augmentation operations, + each of those parametrized as operation, probability, magnitude. + The two operations are applied sequentially on the image upon call. + """ + + def __init__( + self, + operation1, + probability1, + magnitude_idx1, + operation2, + probability2, + magnitude_idx2, + fillcolor, + ): + """Initialize a SubPolicy. + + Args: + operation1 (str): Key specifying the first augmentation operation. + There are fourteen key values altogether (see supported_ops below + listing supported operations). probability1 (float): Probability + within [0., 1.] of applying the first augmentation operation. + magnitude_idx1 (int): Integer specifiying the strength of the first + operation as an index further used to derive the magnitude from a + range of possible values. + operation2 (str): Key specifying the second augmentation operation. + probability2 (float): Probability within [0., 1.] of applying the + second augmentation operation. + magnitude_idx2 (int): Integer specifiying the strength of the + second operation as an index further used to derive the magnitude + from a range of possible values. + fillcolor (tuple): RGB color components of the color to be used for + filling. + Returns: + """ + # List of supported operations for operation1 and operation2. + supported_ops = [ + "shearX", + "shearY", + "translateX", + "translateY", + "rotate", + "color", + "posterize", + "solarize", + "contrast", + "sharpness", + "brightness", + "autocontrast", + "equalize", + "invert", + ] + assert (operation1 in supported_ops) and ( + operation2 in supported_ops + ), "SubPolicy:one of oper1 or oper2 refers to an unsupported operation." + + assert ( + 0.0 <= probability1 <= 1.0 and 0.0 <= probability2 <= 1.0 + ), "SubPolicy: prob1 and prob2 should be within [0., 1.]." + + assert ( + isinstance(magnitude_idx1, int) and 0 <= magnitude_idx1 <= 10 + ), "SubPolicy: idx1 should be specified as an integer within [0, 10]." + + assert ( + isinstance(magnitude_idx2, int) and 0 <= magnitude_idx2 <= 10 + ), "SubPolicy: idx2 should be specified as an integer within [0, 10]." + + # Define a dictionary where each key refers to a specific type of + # augmentation and the corresponding value is a range of ten possible + # magnitude values for that augmentation. + num_levels = _MAX_LEVEL + 1 + ranges = { + "shearX": np.linspace(0, 0.3, num_levels), + "shearY": np.linspace(0, 0.3, num_levels), + "translateX": np.linspace(0, 150 / 331, num_levels), + "translateY": np.linspace(0, 150 / 331, num_levels), + "rotate": np.linspace(0, 30, num_levels), + "color": np.linspace(0.0, 0.9, num_levels), + "posterize": np.round(np.linspace(8, 4, num_levels), 0).astype( + np.int + ), + "solarize": np.linspace(256, 0, num_levels), # range [0, 256] + "contrast": np.linspace(0.0, 0.9, num_levels), + "sharpness": np.linspace(0.0, 0.9, num_levels), + "brightness": np.linspace(0.0, 0.9, num_levels), + "autocontrast": [0] + * num_levels, # This augmentation doesn't use magnitude parameter. + "equalize": [0] + * num_levels, # This augmentation doesn't use magnitude parameter. + "invert": [0] + * num_levels, # This augmentation doesn't use magnitude parameter. + } + + def rotate_with_fill(img, magnitude): + """Define rotation transformation with fill. + + The input image is first rotated, then it is blended together with + a gray mask of the same size. Note that fillcolor as defined + elsewhere in this module doesn't apply here. + + Args: + magnitude (float): rotation angle in degrees. + Returns: + rotated_filled (PIL Image): rotated image with gray filling for + disoccluded areas unveiled by the rotation. + """ + rotated = img.convert("RGBA").rotate(magnitude) + rotated_filled = Image.composite( + rotated, Image.new("RGBA", rotated.size, (128,) * 4), rotated + ) + return rotated_filled.convert(img.mode) + + # Define a dictionary of augmentation functions where each key refers + # to a specific type of augmentation and the corresponding value defines + # the augmentation itself using a lambda function. + # pylint: disable=unnecessary-lambda + func_dict = { + "shearX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "shearY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "translateX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, + 0, + magnitude * img.size[0] * random.choice([-1, 1]), + 0, + 1, + 0, + ), + fillcolor=fillcolor, + ), + "translateY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, + 0, + 0, + 0, + 1, + magnitude * img.size[1] * random.choice([-1, 1]), + ), + fillcolor=fillcolor, + ), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( + 1 + magnitude * random.choice([-1, 1]) + ), + "posterize": lambda img, magnitude: ImageOps.posterize( + img, magnitude + ), + "solarize": lambda img, magnitude: ImageOps.solarize( + img, magnitude + ), + "contrast": lambda img, magnitude: ImageEnhance.Contrast( + img + ).enhance(1 + magnitude * random.choice([-1, 1])), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness( + img + ).enhance(1 + magnitude * random.choice([-1, 1])), + "brightness": lambda img, magnitude: ImageEnhance.Brightness( + img + ).enhance(1 + magnitude * random.choice([-1, 1])), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img), + } + + # Store probability, function and magnitude of the first augmentation + # for the sub-policy. + self.probability1 = probability1 + self.operation1 = func_dict[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + + # Store probability, function and magnitude of the second augmentation + # for the sub-policy. + self.probability2 = probability2 + self.operation2 = func_dict[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + """Define call method for SubPolicy class.""" + # Randomly apply operation 1. + if random.random() < self.probability1: + img = self.operation1(img, self.magnitude1) + + # Randomly apply operation 2. + if random.random() < self.probability2: + img = self.operation2(img, self.magnitude2) + + return img diff --git a/nemo/collections/vision/data/megatron/data_samplers.py b/nemo/collections/vision/data/megatron/data_samplers.py new file mode 100644 index 000000000000..59f3d2de3fd4 --- /dev/null +++ b/nemo/collections/vision/data/megatron/data_samplers.py @@ -0,0 +1,96 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch.utils.data import Dataset + +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import BaseMegatronBatchSampler +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + + +class MegatronVisionPretrainingRandomBatchSampler(BaseMegatronBatchSampler): + + def __init__( + self, + dataset: Dataset, + total_samples: int, + consumed_samples: int, + micro_batch_size: int, + global_batch_size: int, + data_parallel_rank: int, + data_parallel_size: int, + drop_last: bool, + data_sharding: bool, + ) -> None: + super().__init__( + total_samples=total_samples, + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + global_batch_size=global_batch_size, + data_parallel_rank=data_parallel_rank, + data_parallel_size=data_parallel_size, + drop_last=drop_last, + ) + self.dataset = dataset + self.data_sharding = data_sharding + self.last_batch_size = self.total_samples % self.global_batch_size + + def __len__(self): + num_available_samples = self.total_samples + if self.drop_last: + return num_available_samples // self.global_batch_size + else: + return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size + + def __iter__(self): + active_total_samples = self.total_samples - self.last_batch_size + self.epoch = self.consumed_samples // active_total_samples + current_epoch_samples = self.consumed_samples % active_total_samples + assert current_epoch_samples % (self.micro_batch_size * self.data_parallel_size) == 0 + + if isinstance(self.dataset, RandomSeedDataset): + self.dataset.set_epoch(self.epoch) + + # data sharding and random sampling + if self.data_sharding: + bucket_size = (self.total_samples // (self.micro_batch_size * self.data_parallel_size)) \ + * self.micro_batch_size + bucket_offset = current_epoch_samples // self.data_parallel_size + start_idx = self.data_parallel_rank * bucket_size + + g = torch.Generator() + g.manual_seed(self.epoch) + random_idx = torch.randperm(bucket_size, generator=g).tolist() + idx_range = [start_idx + x for x in random_idx[bucket_offset:]] + else: + full_bucket_size = (self.total_samples // self.micro_batch_size) \ + * self.micro_batch_size + full_bucket_offset = current_epoch_samples + g = torch.Generator() + g.manual_seed(self.epoch) + idx_range_total = \ + torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_active = idx_range_total[full_bucket_offset:] + idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size] + + batch = [] + # Last batch if not complete will be dropped. + for idx in idx_range: + batch.append(idx) + if len(batch) == self._global_batch_size_on_this_data_parallel_rank: + self.consumed_samples += self._global_batch_size + yield batch + batch = [] + # Check the last partial batch and see drop_last is set + if len(batch) > 0 and not self.drop_last: + yield batch diff --git a/nemo/collections/vision/data/megatron/image_folder.py b/nemo/collections/vision/data/megatron/image_folder.py new file mode 100644 index 000000000000..1ffa852408fa --- /dev/null +++ b/nemo/collections/vision/data/megatron/image_folder.py @@ -0,0 +1,284 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# code taken from +# https://github.com/pytorch/vision/blob/main/torchvision/datasets/folder.py +# added support for classes_fraction and data_per_class_fraction + +import numpy as np +import os +import os.path +from PIL import Image +from torchvision.datasets import VisionDataset +from typing import Any, Callable, cast, Dict, List, Optional, Tuple + + +def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool: + """Checks if a file is an allowed extension. + Args: + filename (string): path to a file + extensions (tuple of strings): extensions to consider (lowercase) + Returns: + bool: True if the filename ends with one of given extensions + """ + return filename.lower().endswith(extensions) + + +def is_image_file(filename: str) -> bool: + """Checks if a file is an allowed image extension. + Args: + filename (string): path to a file + Returns: + bool: True if the filename ends with a known image extension + """ + return has_file_allowed_extension(filename, IMG_EXTENSIONS) + + +def make_dataset( + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, +) -> List[Tuple[str, int]]: + """Generates a list of samples of a form (path_to_sample, class). + Args: + directory (str): root dataset directory + class_to_idx (Dict[str, int]): dictionary mapping class name to class index + extensions (optional): A list of allowed extensions. + Either extensions or is_valid_file should be passed. Defaults to None. + is_valid_file (optional): A function that takes path of a file + and checks if the file is a valid file + (used to check of corrupt files) both extensions and + is_valid_file should not be passed. Defaults to None. + Raises: + ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None. + Returns: + List[Tuple[str, int]]: samples of a form (path_to_sample, class) + """ + instances = [] + directory = os.path.expanduser(directory) + both_none = extensions is None and is_valid_file is None + both_something = extensions is not None and is_valid_file is not None + if both_none or both_something: + raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time") + if extensions is not None: + def is_valid_file(x: str) -> bool: + return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions)) + is_valid_file = cast(Callable[[str], bool], is_valid_file) + for target_class in sorted(class_to_idx.keys()): + class_index = class_to_idx[target_class] + target_dir = os.path.join(directory, target_class) + if not os.path.isdir(target_dir): + continue + local_instances = [] + for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)): + for fname in sorted(fnames): + path = os.path.join(root, fname) + if is_valid_file(path): + item = path, class_index + local_instances.append(item) + + instances.extend(local_instances[0:int(len(local_instances) * data_per_class_fraction)]) + + return instances + + +class DatasetFolder(VisionDataset): + """A generic data loader where the samples are arranged in this way: :: + root/class_x/xxx.ext + root/class_x/xxy.ext + root/class_x/[...]/xxz.ext + root/class_y/123.ext + root/class_y/nsdf3.ext + root/class_y/[...]/asd932_.ext + Args: + root (string): Root directory path. + loader (callable): A function to load a sample given its path. + extensions (tuple[string]): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + E.g, ``transforms.RandomCrop`` for images. + target_transform (callable, optional): A function/transform that takes + in the target and transforms it. + is_valid_file (callable, optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + Attributes: + classes (list): List of the class names sorted alphabetically. + class_to_idx (dict): Dict with items (class_name, class_index). + samples (list): List of (sample path, class_index) tuples + targets (list): The class_index value for each image in the dataset + """ + + def __init__( + self, + root: str, + loader: Callable[[str], Any], + extensions: Optional[Tuple[str, ...]] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + is_valid_file: Optional[Callable[[str], bool]] = None, + ) -> None: + super(DatasetFolder, self).__init__(root, transform=transform, + target_transform=target_transform) + self.classes_fraction = classes_fraction + self.data_per_class_fraction = data_per_class_fraction + classes, class_to_idx = self._find_classes(self.root) + samples = self.make_dataset(self.root, + class_to_idx, + self.data_per_class_fraction, + extensions, + is_valid_file) + if len(samples) == 0: + msg = "Found 0 files in subfolders of: {}\n".format(self.root) + if extensions is not None: + msg += "Supported extensions are: {}".format(",".join(extensions)) + raise RuntimeError(msg) + + self.loader = loader + self.extensions = extensions + self.total = len(samples) + self.classes = classes + self.class_to_idx = class_to_idx + self.samples = samples + self.targets = [s[1] for s in samples] + + @staticmethod + def make_dataset( + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, + ) -> List[Tuple[str, int]]: + return make_dataset(directory, + class_to_idx, + data_per_class_fraction, + extensions=extensions, + is_valid_file=is_valid_file) + + def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]: + """ + Finds the class folders in a dataset. + Args: + dir (string): Root directory path. + Returns: + tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary. + Ensures: + No class is a subdirectory of another. + """ + all_classes = [d.name for d in os.scandir(dir) if d.is_dir()] + classes = all_classes[0:int(len(all_classes) * self.classes_fraction)] + classes.sort() + class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} + return classes, class_to_idx + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Args: + index (int): Index + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + curr_index = index + for x in range(self.total): + try: + path, target = self.samples[curr_index] + sample = self.loader(path) + break + except Exception as e: + curr_index = np.random.randint(0, self.total) + + if self.transform is not None: + sample = self.transform(sample) + if self.target_transform is not None: + target = self.target_transform(target) + + return sample, target + + def __len__(self) -> int: + return len(self.samples) + + +IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') + + +def pil_loader(path: str) -> Image.Image: + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +# TODO: specify the return type +def accimage_loader(path: str) -> Any: + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path: str) -> Any: + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class ImageFolder(DatasetFolder): + """A generic data loader where the images are arranged in this way: :: + root/dog/xxx.png + root/dog/xxy.png + root/dog/[...]/xxz.png + root/cat/123.png + root/cat/nsdf3.png + root/cat/[...]/asd932_.png + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + is_valid_file (callable, optional): A function that takes path of an Image file + and check if the file is a valid file (used to check of corrupt files) + Attributes: + classes (list): List of the class names sorted alphabetically. + class_to_idx (dict): Dict with items (class_name, class_index). + imgs (list): List of (image path, class_index) tuples + """ + + def __init__( + self, + root: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + loader: Callable[[str], Any] = default_loader, + is_valid_file: Optional[Callable[[str], bool]] = None, + ): + super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None, + transform=transform, + target_transform=target_transform, + classes_fraction=classes_fraction, + data_per_class_fraction=data_per_class_fraction, + is_valid_file=is_valid_file) + self.imgs = self.samples diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py new file mode 100644 index 000000000000..057698153275 --- /dev/null +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -0,0 +1,285 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import random +import torch +import torchvision.transforms as T +from PIL import Image, ImageFilter, ImageOps +from torch.utils.data import Dataset + +from nemo.collections.vision.data.megatron.autoaugment import ImageNetPolicy +from nemo.collections.vision.data.megatron.image_folder import ImageFolder + + +def _to_torch_data_type(precision): + if precision == 'bf16': + return torch.bfloat16 + elif int(precision) == 16: + return torch.float16 + elif int(precision) == 32: + return torch.float32 + else: + raise ValueError(f"Cannot recognize precision {precision}") + + +class RandomSeedDataset(Dataset): + + def __init__(self, dataset, seed=1234): + self.base_seed = seed + self.curr_seed = seed + self.dataset = dataset + + def __len__(self): + return len(self.dataset) + + def set_epoch(self, epoch): + self.curr_seed = self.base_seed + epoch + + def __getitem__(self, idx): + seed = idx + self.curr_seed + torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + return self.dataset[idx] + + +class GaussianBlur(object): + """ + Apply Gaussian Blur to the PIL image. + """ + + def __init__(self, p=0.5, radius_min=0.1, radius_max=2.): + self.prob = p + self.radius_min = radius_min + self.radius_max = radius_max + + def __call__(self, img): + do_it = random.random() <= self.prob + if not do_it: + return img + + return img.filter( + ImageFilter.GaussianBlur( + radius=random.uniform(self.radius_min, self.radius_max) + ) + ) + + +class Solarization(object): + """ + Apply Solarization to the PIL image. + """ + + def __init__(self, p): + self.p = p + + def __call__(self, img): + if random.random() < self.p: + return ImageOps.solarize(img) + else: + return img + + +class ClassificationTransform(): + def __init__(self, model_cfg, image_size, train=True): + self.data_type = _to_torch_data_type(model_cfg.precision) + if train: + self.transform = T.Compose([ + T.RandomResizedCrop(image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type) + ]) + else: + self.transform = T.Compose([ + T.Resize(image_size), + T.CenterCrop(image_size), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type) + ]) + + def __call__(self, input): + output = self.transform(input) + return output + + +class InpaintingTransform(): + def __init__(self, model_cfg, image_size, train=True): + self.mask_factor = model_cfg.mask_factor + self.mask_type = model_cfg.mask_type + self.image_size = image_size + self.patch_size = model_cfg.patch_dim + self.mask_size = int(self.mask_factor * (image_size[0] / self.patch_size) * (image_size[1] / self.patch_size)) + self.train = train + self.data_type = _to_torch_data_type(model_cfg.precision) + + if self.train: + self.transform = T.Compose([ + T.RandomResizedCrop(self.image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.ConvertImageDtype(self.data_type) + ]) + else: + self.transform = T.Compose([ + T.Resize(self.image_size, interpolation=2), + T.CenterCrop(self.image_size), + T.ToTensor(), + T.ConvertImageDtype(self.data_type) + ]) + + def gen_mask(self, image_size, mask_size, mask_type, patch_size): + # output: mask as a list with indices for missing patches + action_list = [[0, 1], [0, -1], [1, 0], [-1, 0]] + assert image_size[0] == image_size[1] + img_size_patch = image_size[0] // patch_size + + # drop masked patches + mask = torch.zeros((image_size[0], image_size[1]), dtype=torch.float) + + if mask_type == 'random': + x = torch.randint(0, img_size_patch, ()) + y = torch.randint(0, img_size_patch, ()) + for i in range(mask_size): + r = torch.randint(0, len(action_list), ()) + x = torch.clamp(x + action_list[r][0], min=0, max=img_size_patch - 1) + y = torch.clamp(y + action_list[r][1], min=0, max=img_size_patch - 1) + x_offset = x * patch_size + y_offset = y * patch_size + mask[x_offset:x_offset + patch_size, y_offset:y_offset + patch_size] = 1 + else: + assert mask_type == 'row' + count = 0 + for x in reversed(range(img_size_patch)): + for y in reversed(range(img_size_patch)): + if (count < mask_size): + count += 1 + x_offset = x * patch_size + y_offset = y * patch_size + mask[x_offset:x_offset + patch_size, y_offset:y_offset + patch_size] = 1 + return mask + + def __call__(self, input): + trans_input = self.transform(input) + mask = self.gen_mask(self.image_size, self.mask_size, + self.mask_type, self.patch_size) + mask = mask.unsqueeze(dim=0) + return trans_input, mask + + +class DinoTransform(object): + def __init__(self, model_cfg, image_size, train=True): + self.data_type = _to_torch_data_type(model_cfg.precision) + flip_and_color_jitter = T.Compose([ + T.RandomHorizontalFlip(p=0.5), + T.RandomApply( + [T.ColorJitter(brightness=0.4, contrast=0.4, + saturation=0.2, hue=0.1)], + p=0.8 + ), + T.RandomGrayscale(p=0.2), + ]) + + if model_cfg.precision in [16, "bf16"]: + normalize = T.Compose([ + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type) + ]) + else: + normalize = T.Compose([ + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + + # first global crop + scale_const = 0.4 + self.global_transform1 = T.Compose([ + T.RandomResizedCrop(image_size, + scale=(scale_const, 1), + interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(1.0), + normalize + ]) + # second global crop + self.global_transform2 = T.Compose([ + T.RandomResizedCrop(image_size, + scale=(scale_const, 1), + interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(0.1), + Solarization(0.2), + normalize + ]) + # transformation for the local small crops + self.local_crops_number = model_cfg.dino_local_crops_number + self.local_transform = T.Compose([ + T.RandomResizedCrop(model_cfg.dino_local_img_size, + scale=(0.05, scale_const), + interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(p=0.5), + normalize + ]) + + def __call__(self, image): + crops = [] + crops.append(self.global_transform1(image)) + crops.append(self.global_transform2(image)) + for _ in range(self.local_crops_number): + crops.append(self.local_transform(image)) + return crops + + +def build_train_valid_datasets(model_cfg, data_path, image_size=224): + if model_cfg.vision_pretraining_type == 'classify': + train_transform = ClassificationTransform(model_cfg, image_size) + val_transform = ClassificationTransform(model_cfg, image_size, train=False) + elif model_cfg.vision_pretraining_type == 'inpaint': + train_transform = InpaintingTransform(model_cfg, image_size, train=False) + val_transform = InpaintingTransform(model_cfg, image_size, train=False) + elif model_cfg.vision_pretraining_type == 'dino': + train_transform = DinoTransform(model_cfg, image_size, train=True) + val_transform = ClassificationTransform(model_cfg, image_size, train=False) + else: + raise Exception('{} vit pretraining type is not supported.'.format( + model_cfg.vit_pretraining_type)) + + # training dataset + train_data_path = data_path[0] if len(data_path) <= 2 else data_path[2] + train_data = ImageFolder( + root=train_data_path, + transform=train_transform, + classes_fraction=model_cfg.classes_fraction, + data_per_class_fraction=model_cfg.data_per_class_fraction + ) + train_data = RandomSeedDataset(train_data) + + # validation dataset + val_data_path = data_path[1] + val_data = ImageFolder( + root=val_data_path, + transform=val_transform + ) + val_data = RandomSeedDataset(val_data) + + return train_data, val_data diff --git a/nemo/collections/vision/losses/__init__.py b/nemo/collections/vision/losses/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/losses/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/metrics/__init__.py b/nemo/collections/vision/metrics/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/metrics/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/models/__init__.py b/nemo/collections/vision/models/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py new file mode 100644 index 000000000000..cedd9c388638 --- /dev/null +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -0,0 +1,826 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import numpy as np +import torch +from functools import partial +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.trainer.trainer import Trainer +from typing import Any, Optional, List, Dict + +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( + MegatronPretrainingBatchSampler, +) +from nemo.collections.nlp.modules.common.megatron.module import ( + MegatronModule, + Float16Module, +) +from nemo.collections.nlp.modules.common.megatron.utils import ( + get_linear_layer, + init_method_normal, + scaled_init_method_normal, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, +) +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomBatchSampler +from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets +from nemo.collections.vision.models.vision_base_model import MegatronVisionModel +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead +from nemo.core.classes.common import PretrainedModelInfo +from nemo.core.neural_types import ChannelType, NeuralType +from nemo.utils import logging + +try: + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.schedules.common import build_model + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( + forward_backward_pipelining_without_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( + _forward_backward_pipelining_with_interleaving, + ) + from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + +class VitClassificationModel(MegatronModule): + """Vision Transformer Model.""" + + def __init__(self, model_cfg, num_classes, finetune=False, + pre_process=True, post_process=True): + super(VitClassificationModel, self).__init__() + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + + self.hidden_size = model_cfg.hidden_size + self.num_classes = num_classes + self.finetune = finetune + self.pre_process = pre_process + self.post_process = post_process + self.backbone = VitBackbone( + model_cfg, + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + single_token_output=True + ) + + if self.post_process: + if not self.finetune: + self.head = VitMlpHead(self.hidden_size, self.num_classes) + else: + self.head = get_linear_layer( + self.hidden_size, + self.num_classes, + torch.nn.init.zeros_ + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.backbone.set_input_tensor(input_tensor) + + def forward(self, input): + hidden_states = self.backbone(input) + + if self.post_process: + hidden_states = self.head(hidden_states) + hidden_states = hidden_states.contiguous() + return hidden_states + + +class MegatronVitClassificationModel(MegatronVisionModel): + """Megatron Vision Transformer Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # TODO(yuya): clean up all default values + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') + + # build_model returns a list of modules which are used for interleaved pipeline parallelism + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + + # if we're not using interleaved, then self.model is a module. + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: + self.model = self.model[0] + + if self.megatron_amp_O2: + + if not self.with_distributed_adam: + # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type + if isinstance(self.model, list): + for module in self.model: + module.cuda(torch.cuda.current_device()) + else: + self.model.cuda(torch.cuda.current_device()) + + # Model wrapper to convert both model and inputs to half precision + if isinstance(self.model, list): + converted_model = [] + for module in self.model: + converted_model.append(Float16Module(module=module, precision=cfg.precision)) + self.model = converted_model + else: + self.model = Float16Module(module=self.model, precision=cfg.precision) + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + model = VitClassificationModel( + model_cfg=self.cfg, + num_classes=self.cfg.get("num_classes"), # TODO(yuya): clean this up + finetune=self.cfg.get("finetune", False), + pre_process=pre_process, + post_process=post_process, + ) + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.get('do_layer_norm_weight_decay', False): + if isinstance(self.model, list): + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) + else: + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) + + else: + self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) + + def configure_optimizers(self): + + if self.with_distributed_adam: + + # Disable overlapped grad sync for embedding grad when + # pipeline parallelism is enabled + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[0] # only the first virtual rank has the embeddings + else: + module = self.model + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[-1] # only the last virtual rank has the embeddings + else: + module = self.model + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + + # Disable overlapped grad sync for layer norm grads when + # sequence parallelism is enabled + for param in self.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True + + return super().configure_optimizers() + + def forward(self, tokens): + output_tensor = self.model(tokens) + return output_tensor + + def _get_fwd_bwd_function(self): + if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + fwd_bwd_function = _forward_backward_pipelining_with_interleaving + else: + fwd_bwd_function = forward_backward_pipelining_without_interleaving + else: + fwd_bwd_function = forward_backward_no_pipelining + return fwd_bwd_function + + def training_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + if parallel_state.is_pipeline_first_stage(ignore_virtual=True) or parallel_state.is_pipeline_last_stage( + ignore_virtual=True + ): + # we prepare the micro batches for the apex fwd/bwd function + batch_for_pipeline = self.process_global_batch(batch) + else: + # The intermediate pipeline stages do not need any inputs from data loader + # GPT3 uses decoder with AttnMask:causal, thus doesn't need attention_mask + batch_for_pipeline = None + + # TODO (yuya): fix this shape + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + # handle asynchronous grad reduction + if self.with_distributed_adam: + if self.megatron_amp_O2: + # copy grads to main grad + custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=True) + else: + # keep grad tensors around + custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=False) + else: + if self.megatron_amp_O2 and not self.cfg.get('sequence_parallel', False): + custom_sync_context_handler = self._optimizer.no_sync + else: + # TODO: enable async grad all reduce for O1/autocast mixed precision training + custom_sync_context_handler = None + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = self._get_fwd_bwd_function() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=False, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + custom_sync_context_handler=custom_sync_context_handler, + sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0).cuda() + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # launch grad reductions + # Note: grads in first pipeline stage have already been + # reduced + if not parallel_state.is_pipeline_first_stage(): + self.reduce_overlap_gradients() + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + # # when using pipeline parallelism the first and last stage must keep embeddings in sync + # self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + ) + + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_module_grads(self, module, grads): + for param in module.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def allreduce_sequence_parallel_gradients(self): + """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. + Modified from megatron-lm: + https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 + """ + + grads = [] + if isinstance(self.model, list): + for module in self.model: + self._append_module_grads(module, grads) + else: + self._append_module_grads(self.model, grads) + + coalesced = torch._utils._flatten_dense_tensors(grads) + torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + # def allreduce_first_last_embeddings(self): + # + # # Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/training.py#L407 + # # All-reduce word_embeddings' grad across first and last stages to ensure + # # that word_embeddings parameters stay in sync. + # # This should only run for models that support pipelined model parallelism + # # (BERT and GPT-2). + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and ( + # parallel_state.is_pipeline_first_stage(ignore_virtual=True) + # or parallel_state.is_pipeline_last_stage(ignore_virtual=True) + # ): + # if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + # if isinstance(self.model, list): + # module = self.model[0] # only the first virtual rank has the embeddings + # else: + # module = self.model + # if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + # if isinstance(self.model, list): + # module = self.model[-1] # only the last virtual rank has the embeddings + # else: + # module = self.model + # if module.share_token_embeddings: + # word_embeddings_weight = module.word_embeddings_weight() + # if self.megatron_amp_O2: + # # O2 recipe stores a "main" copy of weights and grads + # grad = word_embeddings_weight.main_grad + # else: + # grad = word_embeddings_weight.grad + # torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group()) + + def get_forward_output_and_loss_func(self): + + def loss_func(labels, output_tensor): + logits = output_tensor.contiguous().float() + loss = torch.nn.functional.cross_entropy(logits, labels) + + outputs = torch.argmax(logits, -1) + correct = (outputs == labels).float() + accuracy = torch.mean(correct) + + averaged_loss = average_losses_across_data_parallel_group([loss, accuracy]) + + return loss, {"loss": averaged_loss[0], "accuracy": averaged_loss[1]} + + def fwd_output_and_loss_func(batch, model): + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + batch = [x.cuda(non_blocking=True) for x in batch] + tokens, labels = batch + else: + # Vision transformer doesn't need attention mask + if parallel_state.is_pipeline_first_stage(): + # Fist pipeline stage needs only the tokens and position_ids + tokens = batch[0].cuda(non_blocking=True) + labels = None + elif parallel_state.is_pipeline_last_stage(): + # Last pipeline stage needs only the labels and loss_mask + labels = batch[1].cuda(non_blocking=True) + tokens = None + else: + # Intermediate pipeline stage doesn't need any inputs + tokens, labels = None, None + + output_tensor = model(tokens) + return output_tensor, partial(loss_func, labels) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def validation_step(self, batch, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + batch_for_pipeline = self.process_global_batch(batch, self.cfg.global_batch_size) + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + # run forward passes for an entire global batch + # we do this inside validation_step to support pipeline parallelism + fwd_bwd_function = self._get_fwd_bwd_function() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + batch=batch_for_pipeline, + model=self.model, + forward_only=True, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + ) + + def _get_metric_with_batch_size(metric_key): + # only the last stage of the pipeline returns losses + if losses_reduced_per_micro_batch: + actual_batch_size = batch[0].shape[0] # Might be lesser than global_batch_size if drop_last=False + expected_batch_size = self.cfg.global_batch_size // parallel_state.get_data_parallel_world_size() + if actual_batch_size == expected_batch_size: + loss_with_batch_size_list = [ + [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] + for loss_reduced in losses_reduced_per_micro_batch + ] + else: + loss_with_batch_size_list = [] + total_samples_remaining = actual_batch_size + for loss_reduced in losses_reduced_per_micro_batch: + if total_samples_remaining <= 0: + break + if total_samples_remaining // self.cfg.micro_batch_size >= 1: + loss_with_batch_size_list.append( + [loss_reduced[metric_key].item(), self.cfg.micro_batch_size]) + else: + loss_with_batch_size_list.append([loss_reduced[metric_key].item(), total_samples_remaining]) + total_samples_remaining = total_samples_remaining - self.cfg.micro_batch_size + else: + # we're not on the last pipeline stage so no losses + loss_with_batch_size_list = [] + return loss_with_batch_size_list + + return _get_metric_with_batch_size('loss'), _get_metric_with_batch_size('accuracy') + + def validation_epoch_end(self, outputs): + # TODO (yuya): need fix later, check with Sean + if not outputs: + return + + if parallel_state.is_pipeline_last_stage(): + loss_outputs = [output[0] for output in outputs] + acc_outputs = [output[1] for output in outputs] + + def _get_average_metric(metric_outputs): + # only the last pipeline parallel stages return metric with their batch size + total_num_samples = 0 + total_metric = 0 + for metric_with_batch_size in metric_outputs: + metric_with_batch_size_array = np.array(metric_with_batch_size).flatten() + batch_metrices = metric_with_batch_size_array[0::2] + batch_sizes = metric_with_batch_size_array[1::2] + total_num_samples += sum(batch_sizes) + total_metric += np.dot(batch_metrices, batch_sizes) + + avg_metric = total_metric / total_num_samples + return avg_metric + + averaged_metrics = torch.tensor( + [_get_average_metric(loss_outputs), _get_average_metric(acc_outputs)], + dtype=torch.float32).cuda() + else: + averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + + averaged_loss, averaged_acc = averaged_metrics + + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True) + self.log('val_accuracy', averaged_acc, prog_bar=True, rank_zero_only=True) + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def process_global_batch(self, global_batch, global_batch_size=None): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + tokens = global_batch[0] # images + labels = global_batch[1] + + expected_batch_size = None + if global_batch_size is not None: + expected_batch_size = global_batch_size // parallel_state.get_data_parallel_world_size() + current_batch_size = tokens.shape[0] + if expected_batch_size is not None and expected_batch_size > current_batch_size: + logging.info( + 'Got batch size of ' + + str(current_batch_size) + + ' , expected batch size :' + + str(expected_batch_size) + + '. Appending dummy data.' + ) + pad_length = expected_batch_size - current_batch_size + pad_dim = (int(pad_length), tokens.shape[1]) + tokens = torch.cat((tokens, torch.ones(pad_dim, dtype=tokens.dtype))) + labels = torch.cat((labels, torch.ones(pad_dim, dtype=labels.dtype))) + + return [tokens, labels] + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for ViT...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, + data_path=self.cfg.data.data_path, + image_size=(self.cfg.img_h, self.cfg.img_w), + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for ViT.') + + return self._train_ds, self._validation_ds, self._test_ds + + def build_pretraining_data_loader(self, dataset, consumed_samples, dataset_type=None, drop_last=True): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingBatchSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronVisionPretrainingRandomBatchSampler( + dataset=dataset, + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + data_sharding=self.cfg.data.get("data_sharding", True), + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + return torch.utils.data.DataLoader( + dataset, batch_sampler=batch_sampler, num_workers=self.cfg.data.num_workers, pin_memory=True, + ) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last virtual stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) + # num_parameters_on_device -= num_word_embedding_parameters + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) + # + # num_parameters_on_device -= num_word_embedding_parameters + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda() + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + # module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + # self.model.sync_initial_word_embeddings() + pass + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.data.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._validation_dl = self.build_pretraining_data_loader( + self._validation_ds, consumed_samples, "validation", drop_last + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples) + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + raise NotImplementedError + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None + + def on_save_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def on_load_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + # For onnx export + def input_example(self, max_batch=8, max_dim=384): + """ + Generates input examples for tracing etc. + Returns: + A tuple of input examples. + """ + sample = next(self.parameters()) + tokens = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'D'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py new file mode 100644 index 000000000000..0d594afb8b50 --- /dev/null +++ b/nemo/collections/vision/models/vision_base_model.py @@ -0,0 +1,507 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import torch +from omegaconf import open_dict +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator +from pytorch_lightning.trainer.trainer import Trainer +from pytorch_lightning.utilities.cloud_io import load as pl_load +from pytorch_lightning.utilities.migration import pl_legacy_patch +from transformers import TRANSFORMERS_CACHE +from typing import Any, Union, Dict, Optional + +from nemo.collections.nlp.modules.common.megatron.clip_grads import ( + clip_grad_norm_distributed_optimizer, + clip_grad_norm_fp32, +) +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import GradScaler +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.core.classes import ModelPT +from nemo.core.classes.exportable import Exportable +from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler +from nemo.utils import AppState, logging +from nemo.utils.get_rank import is_global_rank_zero + +try: + from apex.transformer import parallel_state + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +__all__ = ['VisionModel', 'MegatronVisionModel'] + +NEMO_VISION_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_vision_tmp") + +os.makedirs(NEMO_VISION_TMP, exist_ok=True) + + +class VisionModel(ModelPT, Exportable): + """Base class for Vision Models. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + + super().__init__(cfg, trainer) + + # handles model parallel save and restore logic + self._save_restore_connector = NLPSaveRestoreConnector() + + # TODO(yuya): check below + @property + def input_module(self): + return self + + @property + def output_module(self): + return self + + @property + def is_model_parallel_initialized(self): + app_state = AppState() + if app_state.model_parallel_group is not None: + return True + else: + return False + + @classmethod + def load_from_checkpoint( + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, + ): + """ + Loads ModelPT from checkpoint, with some maintenance of restoration. + For documentation, please refer to LightningModule.load_from_checkpoin() documentation. + """ + checkpoint = None + try: + cls._set_model_restore_state(is_being_restored=True) + # TODO: replace with proper PTL API + with pl_legacy_patch(): + if map_location is not None: + checkpoint = pl_load(checkpoint_path, map_location=map_location) + else: + checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) + + if hparams_file is not None: + extension = hparams_file.split(".")[-1] + if extension.lower() == "csv": + hparams = load_hparams_from_tags_csv(hparams_file) + elif extension.lower() in ("yml", "yaml"): + hparams = load_hparams_from_yaml(hparams_file) + else: + raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") + + hparams["on_gpu"] = False + + # overwrite hparams by the given file + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams + + # for past checkpoint need to add the new key + if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} + # override the hparams with values that were passed in + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) + # TODO: can we do this without overriding? + config_kwargs = kwargs.copy() + if 'trainer' in config_kwargs: + config_kwargs.pop('trainer') + cfg.update(config_kwargs) + + if cfg.get('megatron_amp_O2', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('model.', 'model.module.', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) + # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg + + checkpoint = model + + finally: + cls._set_model_restore_state(is_being_restored=False) + return checkpoint + + +class MegatronVisionModel(VisionModel): + """ + Megatron vision base class + It does the following things: + 1. Initialize the model parallel for nemo given the model parallel parameters. + 2. Turn on all the nvidia optimizations. + 3. If using distributed optimizer, configure to be compatible with + O2-level optimizations and/or model parallelism. + 4. Perform gradient clipping: `grad_clip_pl_default` triggers the + PyTorch Lightning default implementation, `with_distributed_adam` + triggers the distributed optimizer's implementation, + `megatron_amp_O2` triggers gradient clipping on the main grads, + and otherwise gradient clipping is performed on the model grads. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + # FIXME: switch to self._cfg + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if trainer is None: + raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") + + super().__init__(cfg, trainer=trainer) + + self._validate_config() + + self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' + + # used in NVIDIA NGC PyTorch containers + self._enable_nvidia_optimizations() + + if self._cfg.get('use_cpu_initialization', False) is False: + torch.cuda.set_device(trainer.local_rank) + + # buffer used during train_step for logging average loss over gradient accumulation steps + self._reduced_loss_buffer = [] + + initialize_model_parallel_for_nemo( + world_size=trainer.world_size, + global_rank=trainer.global_rank, + local_rank=trainer.local_rank, + tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), + pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), + virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), + pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), + micro_batch_size=cfg.get('micro_batch_size'), + global_batch_size=cfg.get('global_batch_size'), + seed=self.cfg.get('seed', 1234), + apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), + ) + + self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False + + # TODO: remove this when PTL 1.7.3 is released + _FxValidator.functions["configure_gradient_clipping"] = { + "allowed_on_step": (False, True), + "allowed_on_epoch": (False, True), + "default_on_step": True, + "default_on_epoch": False, + } + + def _enable_nvidia_optimizations(self): + "These optimizations are present in NVIDIA NGC PyTorch Containers" + + # NVIDIA container version check + nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) + if nvidia_torch_version is not None: + NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + try: + NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) + except Exception: + NVIDIA_TORCH_MINOR = 0 + + # Apex Persistent layer norm is supported from Nvidia PyTorch container v21.11 + if NVIDIA_TORCH_MAJOR < 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR < 11): + self.cfg.persist_layer_norm = False + + if NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11): + # NVFUSER + torch._C._jit_set_profiling_executor(True) + torch._C._jit_set_profiling_mode(True) + torch._C._jit_override_can_fuse_on_cpu(False) + torch._C._jit_override_can_fuse_on_gpu(False) + torch._C._jit_set_texpr_fuser_enabled(False) + torch._C._jit_set_nvfuser_enabled(True) + torch._C._debug_set_autodiff_subgraph_inlining(False) + + else: + # Not a Nvidia container. NVFUSER Dependency check is on users + pass + + def on_train_start(self) -> None: + super().on_train_start() + self.init_global_step = self.trainer.global_step + + def _get_parameters(self): + """ + private method to load all the trainable parameters from optimizer param groups + """ + params = [] + for param_group in self._optimizer_param_groups: + for param in param_group['params']: + params.append(param) + return params + + def configure_gradient_clipping(self, *args, **kwargs): + """PTL hook to configure gradients. + We use gradient clipping implementation from megatron-lm. + """ + clip_val = self.trainer.gradient_clip_val + if clip_val is None: + return + + clip_val = float(clip_val) + if clip_val <= 0: + return + + if self.grad_clip_pl_default: + # use the default behavior + return super().configure_gradient_clipping(*args, **kwargs) + + if self.with_distributed_adam: + grad_norm = clip_grad_norm_distributed_optimizer(self._optimizer, clip_val) + else: + if self.megatron_amp_O2: + # grep fp32 master parameters for gradient clipping + parameters = self._optimizer.get_parameters() + else: + parameters = self._get_parameters() + grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) + + self.log('grad_norm', grad_norm, rank_zero_only=True) + + def allreduce_gradients(self): + """Reduce gradients across data parallel ranks. + Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/model/distributed.py#L188 + """ + # Bucketize and all-reduce + buckets = {} + for param in self.parameters(): + if param.requires_grad and param.grad is not None: + tp = param.data.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(param) + # param.main_grad = param.grad + + # For each bucket, all-reduce and copy all-reduced grads. + for tp in buckets: + bucket = buckets[tp] + grads = [param.grad.data for param in bucket] + coalesced = torch._utils._flatten_dense_tensors(grads) + coalesced /= parallel_state.get_data_parallel_world_size() + torch.distributed.all_reduce(coalesced, group=parallel_state.get_data_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + def reduce_overlap_gradients(self): + """Reduce grads if overlapped grad sync is enabled + + Used for pipeline parallelism with the distributed Adam + optimizer. In the first pipeline stage, the grad sync is + overlapped with the final backward pass. In other pipeline + stages, the grad sync is deferred until the bubble overhead. + + """ + if self.with_distributed_adam: + self._optimizer.try_grad_sync( + p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + + def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[int] = 0) -> None: + super().on_train_batch_end(outputs, batch, batch_idx) + + # TODO: Replace with newer override for scheduler.step() instead of + # search for plugins for fp16 GradScalar + if self.trainer.precision_plugin is not None and isinstance( + self.trainer.precision_plugin, NativeMixedPrecisionPlugin + ): + precision_plugin = self.trainer.precision_plugin + + if ( + hasattr(precision_plugin, 'scaler') + and precision_plugin.scaler is not None + and isinstance(precision_plugin.scaler, GradScaler) + ): + grad_scaler = precision_plugin.scaler + + # If the grad scaler skipped its optimizer step due to infs/nans, + # decrement the step of all schedulers. + if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True: + scheduler_cfgs = self.trainer.lr_scheduler_configs + + if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization: + return + + for scheduler_cfg in scheduler_cfgs: + # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up + # as well as update the optimizer lr in all param groups + scheduler_cfg.scheduler.last_epoch -= 2 + scheduler_cfg.scheduler.step() + + # Increase the max step count by 1 + + # Reset the optimizer update skipped to `None` - this is to prevent scheduler no-ops during + # accumulated gradient updates. + grad_scaler.optimizer_update_skipped = None + + def setup_optimization( + self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, + ): + optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() + if self.with_distributed_adam: + + # Allocate grads since we are storing between microbatches + optim_kwargs['contiguous_grad_buffer'] = True + + if self.megatron_amp_O2: + # Match param allgather with model dtype + if hasattr(self, 'autocast_dtype'): + optim_kwargs['param_sync_dtype'] = self.autocast_dtype + if self.autocast_dtype == torch.float: + optim_kwargs['store_params'] = False + elif self.autocast_dtype == torch.float16: + optim_kwargs['store_params'] = True + elif self.autocast_dtype == torch.bfloat16: + optim_kwargs['store_params'] = False + optim_kwargs['store_param_remainders'] = True + else: + # Assume FP32 params, so no need to store main params + optim_kwargs['store_params'] = False + + return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) + + def configure_optimizers(self): + self.setup_optimization() + + # Wrap the baseline optimizer with the optimizer class with master parameters + if self.megatron_amp_O2 and not self.with_distributed_adam and self._optimizer is not None: + if self.cfg.precision == 'bf16': + fp32_grad_accum = True + contiguous_grad_bucket = True + elif self.cfg.precision == 16: + fp32_grad_accum = False + # TODO: contiguous grad bucket for fp16 is also planned to be supported + contiguous_grad_bucket = False + raise ValueError( + "fp16 training is not yet supported with O2. Please set megatron_amp_O2 to False in the model config." + ) + + # if using tensor parallel only, we automatically use async grad all-reduce + # if using pipeline parallel or sequence parallel or gradient accumulation fusion, then we disable it + # if self.cfg.get('pipeline_model_parallel_size', 1) == 1 and not ( + # self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) + # ): + # async_grad_allreduce = True + # else: + # async_grad_allreduce = False + + async_grad_allreduce = False + + if async_grad_allreduce: + # we need this to be configurable until make_nccl_premul_sum is in public PyTorch. + # currently cannot be imported in PyTorch 1.12.0 + grad_div_ar_fusion = self.cfg.get('grad_div_ar_fusion', False) + else: + grad_div_ar_fusion = False + + self._optimizer = MainParamsOptimizerWrapper( + self._optimizer, + fp32_grad_accum=fp32_grad_accum, + contiguous_grad_bucket=contiguous_grad_bucket, + async_grad_allreduce=async_grad_allreduce, + grad_div_ar_fusion=grad_div_ar_fusion, + grad_allreduce_chunk_size_mb=self.cfg.get('grad_allreduce_chunk_size_mb', 125), + ) + + assert self._trainer.max_steps is not None, "'max_steps' is missing in trainer config." + if hasattr(self._cfg.optim, 'sched'): + sched_config = self._cfg.optim.sched + sched_config['max_steps'] = self._trainer.max_steps + self._scheduler = prepare_lr_scheduler( + optimizer=self._optimizer, scheduler_config=sched_config, train_dataloader=self._train_dl + ) + + # Configure distributed optimizer + if self.with_distributed_adam: + # Initialize params so that main grads are available + # Note: Consolidate grads without overlap + self._optimizer.init_params( + p for p in self.parameters() if getattr(p, '_disable_overlap_grad_sync', False) + ) + self._optimizer.init_params(self.parameters()) + + if self._scheduler is None: + return self._optimizer + else: + return [self._optimizer], [self._scheduler] + + def compute_consumed_samples(self, steps_since_resume=0): + app_state = AppState() + consumed_samples = ( + self.init_consumed_samples + + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() + ) + return int(consumed_samples) + + def _extract_consumed_samples_from_ckpt(self, ckpt_path): + try: + init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) + except (ValueError, TypeError, IndexError): + logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") + init_consumed_samples = 0 + + return init_consumed_samples + + def _validate_config(self): + """ Certain configurations might be incompatible or discouraged. We can check for them here.""" + + if self.cfg.get('sequence_parallel', False) and self.cfg.get('tensor_model_parallel_size', 1) == 1: + logging.info( + "Sequence parallel should only be used with tensor parallel size > 1. Setting sequence parallel to False" + ) + with open_dict(self.cfg): + self.cfg.sequence_parallel = False + + if ( + self.cfg.get('gradient_accumulation_fusion', False) + and self.cfg.get('pipeline_model_parallel_size', 1) == 1 + ): + logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") + with open_dict(self.cfg): + self.cfg.gradient_accumulation_fusion = False + + if self.cfg.get('gradient_accumulation_fusion', False) and not self.cfg.get('megatron_amp_O2', False): + logging.info("Gradient accumulation fusion can only be used with megatron amp O2 mixed precision.") + with open_dict(self.cfg): + self.cfg.gradient_accumulation_fusion = False + + def is_data_parallel_rank_zero(self): + if is_global_rank_zero(): + return True + else: + try: + data_parallel_rank = parallel_state.get_data_parallel_rank() + except: + data_parallel_rank = None + + if data_parallel_rank is not None and data_parallel_rank == 0: + return True + else: + return False diff --git a/nemo/collections/vision/modules/__init__.py b/nemo/collections/vision/modules/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/__init__.py b/nemo/collections/vision/modules/common/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/common/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/__init__.py b/nemo/collections/vision/modules/common/megatron/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/common/megatron/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py new file mode 100644 index 000000000000..42e3e3e52bdc --- /dev/null +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -0,0 +1,490 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# coding=utf-8 + + +"""Transformer.""" + +import torch + +from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType +from nemo.collections.nlp.modules.common.megatron.module import MegatronModule +from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformerLayer_, ParallelTransformer +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults + +try: + from apex.transformer import parallel_state, tensor_parallel + from apex.transformer.enums import AttnMaskType, AttnType, ModelType + from apex.transformer.utils import divide as safe_divide + from apex.transformer.parallel_state import get_tensor_model_parallel_world_size + from apex.normalization import MixedFusedRMSNorm + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + + # fake missing classes with None attributes + ModelType = AttnMaskType = AttnType = LayerType = ApexGuardDefaults() + +""" We use the following notation throughout this file: + h: hidden size + n: number of attention heads + p: number of model parallel partitions + np: n/p + hp: h/p + hn: h/n + b: batch size + s: sequence length + l: number of layers + Transformer takes input of size [s, b, h] and returns a + tensor of the same size. We use the following arguments: + hyperparameters: transformer hyperparameters +""" + + +class DropPath(MegatronModule): + """Drop paths (Stochastic Depth) per sample + (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=0.): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, hidden_state): + if self.drop_prob == 0. or not self.training: + return hidden_state + keep_prob = 1 - self.drop_prob + # work with diff dim tensors, not just 2D ConvNets + # hidden_state: [s, b, h] + shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2) + random_tensor = keep_prob + \ + torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) + random_tensor.floor_() # binarize + output = hidden_state.div(keep_prob) * random_tensor + return output + + +class ParallelVisionTransformerLayer_(ParallelTransformerLayer_): + """A single transformer layer. + + Transformer layer takes input with size [s, b, h] and returns an + output of the same size. + """ + + def __init__( + self, + init_method, + output_layer_init_method, + layer_number, + hidden_size, + ffn_hidden_size, + num_attention_heads, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + fp32_residual_connection=False, + precision=16, + apply_query_key_layer_scaling=True, + kv_channels=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + bias_dropout_add_fusion=True, + persist_layer_norm=False, + use_cpu_initialization=False, + bias_activation_fusion=True, + openai_gelu=False, + onnx_safe=False, + masked_softmax_fusion=True, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + activation='gelu', + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + activations_checkpoint_granularity=None, + sequence_parallel=False, + gradient_accumulation_fusion=False, + normalize_attention_scores=True, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + drop_path_rate = kwargs.pop("drop_path_rate") + super(ParallelVisionTransformerLayer_, self).__init__(**kwargs) + + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None + + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + rotary_pos_emb=None, + # list of positional embedding tensors, first one self attention, second one and third one are for cross attention (q, k) + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, + ): + # Self attention. + if rotary_pos_emb is not None: + # self attention pos_emb is (q, q) + self_attention_pos_emb = (rotary_pos_emb[0], rotary_pos_emb[0]) + cross_attention_pos_emb = (rotary_pos_emb[1], rotary_pos_emb[2]) + else: + self_attention_pos_emb = None + cross_attention_pos_emb = None + + if self.layer_type != LayerType.retrieval_decoder_after_self_attn: + # hidden_states: [b, s, h] + + # Pre-LN: x -> LN -> MHA -> Residual -> LN -> MLP -> Residual + # Post-LN: x -> MHA -> Residual -> LN -> MLP -> Residual -> LN + # Normformer: x -> LN -> MHA -> LN -> Residual -> MLP (w/LN) -> Residual + + residual = hidden_states + # Layer norm at the beginning of the transformer layer. + if self.transformer_block_type in ['pre_ln', 'normformer']: + hidden_states = self.input_layernorm(hidden_states) + + attention_output, attention_bias = self.self_attention( + hidden_states, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + rotary_pos_emb=self_attention_pos_emb, + relative_position_bias=self_attention_relative_position_bias, + checkpoint_core_attention=checkpoint_core_attention, + ) + + if get_key_value: + attention_output, presents = attention_output + + # If normformer, apply norm on the output of the self attention. + if self.transformer_block_type == 'normformer': + # Normformer normalization + attention_output = ( + attention_output + attention_bias if attention_bias is not None else attention_output + ) + attention_output = self.post_attention_normformer_norm(attention_output) + attention_bias = None + + # jit scripting for a nn.module (with dropout) is not + # trigerring the fusion kernel. For now, we use two + # different nn.functional routines to account for varying + # dropout semantics during training and inference phases. + + if self.drop_path is None: + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='attention' + ) + if attention_bias is not None: + attention_bias = attention_bias.expand_as(residual) + + layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) + else: + assert self.transformer_block_type != 'normformer', "Normfomer doesn't support drop_path" + out = torch.nn.functional.dropout(attention_output + attention_bias, + p=self.hidden_dropout, + training=self.training) + layernorm_input = residual + self.drop_path(out) + # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}") + + # Post-LN normalization after residual + if self.transformer_block_type == 'post_ln': + normalization_output = self.input_layernorm(layernorm_input) + layernorm_input = normalization_output + elif self.transformer_block_type in ['pre_ln', 'normformer']: + # Layer norm post the self attention. + normalization_output = self.post_attention_layernorm(layernorm_input) + else: + layernorm_input, normalization_output = hidden_states + + if self.layer_type == LayerType.decoder_pre_mlp: + return layernorm_input, normalization_output + + if ( + self.layer_type == LayerType.decoder + or self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_encoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn + ): + if ( + self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn + ): + attention_output, attention_bias = self.inter_attention( + normalization_output, + enc_dec_attn_mask, + encoder_output=encoder_output, + rotary_pos_emb=cross_attention_pos_emb, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + checkpoint_core_attention=checkpoint_core_attention, + ) + else: + attention_output, attention_bias = self.inter_attention( + normalization_output, + enc_dec_attn_mask, + encoder_output=encoder_output, + rotary_pos_emb=cross_attention_pos_emb, + relative_position_bias=cross_attention_relative_position_bias, + checkpoint_core_attention=checkpoint_core_attention, + ) + + # If normformer, apply norm on the output of the self attention. + if self.transformer_block_type == 'normformer': + # Normformer normalization + attention_output = ( + attention_output + attention_bias if attention_bias is not None else attention_output + ) + attention_output = self.post_inter_attention_normformer_norm(attention_output) + attention_bias = None + + residual = layernorm_input + + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='attention' + ) + + layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) + # print(f"Layer: {self.layer_number} Cross-Attention checksum {layernorm_input.sum()}") + normalization_output = self.post_inter_attention_layernorm(layernorm_input) + # Post-LN normalization after residual + if self.transformer_block_type == 'post_ln': + layernorm_input = normalization_output + # MLP. + mlp_output, mlp_bias = self.mlp(normalization_output) + + residual = layernorm_input + + if self.drop_path is None: + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='mlp' + ) + + output = bias_dropout_add_func(mlp_output, mlp_bias, residual, self.hidden_dropout) + + else: + out = torch.nn.functional.dropout(mlp_output + mlp_bias, + p=self.hidden_dropout, + training=self.training) + output = residual + self.drop_path(out) + # print(f"Layer: {self.layer_number} MLP + Dropout + Residual checksum {output.sum()}") + + if self.transformer_block_type == 'post_ln': + output = self.post_attention_layernorm(output) + + if get_key_value: + output = [output, presents] + + return output + + +class ParallelVisionTransformerLayer(ParallelVisionTransformerLayer_): + def __init__(self, **kwargs): + super(ParallelVisionTransformerLayer, self).__init__(**kwargs) + precision = kwargs['precision'] + if precision == 'bf16': + self.dtype = torch.bfloat16 + elif int(precision) == 16: + self.dtype = torch.float16 + elif int(precision) == 32: + self.dtype = torch.float32 + else: + raise ValueError(f"Cannot recognize precision {precision}") + + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + rotary_pos_emb=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + if self.dtype == torch.float32: + return super().forward( + **kwargs + ) + with torch.autocast(device_type="cuda", dtype=self.dtype): + return super().forward( + **kwargs + ) + + +class ParallelVisionTransformer(ParallelTransformer): + """Transformer class.""" + + def __init__( + self, + init_method, + output_layer_init_method, + num_layers, + hidden_size, + ffn_hidden_size, + num_attention_heads, + apply_query_key_layer_scaling=True, + kv_channels=None, + layer_type=LayerType.encoder, # it can be a list of types or single type + self_attn_mask_type=AttnMaskType.padding, + pre_process=True, + post_process=True, + precision=16, + fp32_residual_connection=False, + activations_checkpoint_method=None, + activations_checkpoint_num_layers=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + use_cpu_initialization=False, + bias_activation_fusion=True, + bias_dropout_add_fusion=True, + masked_softmax_fusion=True, + persist_layer_norm=False, + openai_gelu=False, + onnx_safe=False, + activation='gelu', + model_type=ModelType.encoder_or_decoder, + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + layer_number_offset=0, # this is use only for attention norm_factor scaling + activations_checkpoint_granularity=None, + sequence_parallel=False, + gradient_accumulation_fusion=False, + normalize_attention_scores=True, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + self.drop_path_rate = kwargs.pop("drop_path_rate") + super(ParallelVisionTransformer, self).__init__(**kwargs) + + self.num_layers = self.get_num_layers(num_layers) + + self.drop_path_rates = [ + rate.item() for rate in + torch.linspace(0, self.drop_path_rate, + self.num_layers * parallel_state.get_pipeline_model_parallel_world_size()) + ] + + # Rebuild with vision transformer layers. + def build_layer(layer_number): + if isinstance(layer_type, list): + lt = layer_type[layer_number - 1] + else: + lt = layer_type + return ParallelVisionTransformerLayer( + init_method=init_method, + output_layer_init_method=output_layer_init_method, + layer_number=layer_number + layer_number_offset, + hidden_size=hidden_size, + ffn_hidden_size=ffn_hidden_size, + num_attention_heads=num_attention_heads, + apply_query_key_layer_scaling=apply_query_key_layer_scaling, + kv_channels=kv_channels, + layer_type=lt, + self_attn_mask_type=self_attn_mask_type, + precision=precision, + fp32_residual_connection=fp32_residual_connection, + layernorm_epsilon=layernorm_epsilon, + hidden_dropout=hidden_dropout, + attention_dropout=attention_dropout, + ffn_dropout=ffn_dropout, + drop_path_rate=self.drop_path_rates[layer_number - 1], + use_cpu_initialization=use_cpu_initialization, + bias_activation_fusion=bias_activation_fusion, + bias_dropout_add_fusion=bias_dropout_add_fusion, + masked_softmax_fusion=masked_softmax_fusion, + persist_layer_norm=persist_layer_norm, + openai_gelu=openai_gelu, + onnx_safe=onnx_safe, + activation=activation, + megatron_legacy=megatron_legacy, + bias=bias, + chunk_size=chunk_size, + normalization=normalization, + transformer_block_type=transformer_block_type, + headscale=headscale, + activations_checkpoint_granularity=activations_checkpoint_granularity, + sequence_parallel=sequence_parallel, + gradient_accumulation_fusion=gradient_accumulation_fusion, + normalize_attention_scores=normalize_attention_scores, + ) + + if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + assert num_layers % parallel_state.get_virtual_pipeline_model_parallel_world_size() == 0, ( + 'num_layers_per_stage must be divisible by ' 'virtual_pipeline_model_parallel_size' + ) + + # self.model_type != ModelType.encoder_and_decoder + assert self.model_type.value != 2, f'virtual pipeline parallel currently only supported for GPT' + + # Number of layers in each model chunk is the number of layers in the stage, + # divided by the number of model chunks in a stage. + self.num_layers = self.num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() + # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0] [2] [4] [6] + # Stage 1: [1] [3] [5] [7] + # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0, 1] [4, 5] + # Stage 1: [2, 3] [6, 7] + offset = parallel_state.get_virtual_pipeline_model_parallel_rank() * ( + num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() + ) + (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) + else: + # Each stage gets a contiguous set of layers. + if ( + self.model_type == ModelType.encoder_and_decoder + and parallel_state.get_pipeline_model_parallel_world_size() > 1 + ): + pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() + if layer_type == LayerType.encoder: + offset = pipeline_rank * self.num_layers + else: + num_ranks_in_enc = parallel_state.get_pipeline_model_parallel_split_rank() + offset = (pipeline_rank - num_ranks_in_enc) * self.num_layers + else: + offset = parallel_state.get_pipeline_model_parallel_rank() * self.num_layers + + self.layers = torch.nn.ModuleList([build_layer(i + 1 + offset) for i in range(self.num_layers)]) diff --git a/nemo/collections/vision/modules/vit/__init__.py b/nemo/collections/vision/modules/vit/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/vit/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py new file mode 100644 index 000000000000..e14fb23ffbb4 --- /dev/null +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -0,0 +1,382 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Vision Transformer(VIT) model.""" + +import einops +import math +import torch +import torch.nn.functional as F +from functools import partial + +from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm +from nemo.collections.nlp.modules.common.megatron.module import MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + init_method_normal, + scaled_init_method_normal, +) +from nemo.collections.vision.modules.common.megatron.vision_transformer import ParallelVisionTransformer + +try: + import apex + from apex.transformer import tensor_parallel + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + # fake missing classes with None attributes + AttnMaskType = ApexGuardDefaults() + LayerType = ApexGuardDefaults() + + +class DropPatch(MegatronModule): + """ + https://arxiv.org/abs/2212.00794 + """ + + def __init__(self, prob, class_token_length=8, exclude_cls_tokens=True): + assert 0 <= prob < 1. + super(DropPatch, self).__init__() + self.prob = prob + self.class_token_length = class_token_length + self.exclude_cls_tokens = exclude_cls_tokens # exclude CLS token + + def __call__(self, x): + if self.prob == 0. or not self.training: + return x + + class_token_length = self.class_token_length + if self.exclude_cls_tokens: + cls_tokens, x = x[:, :class_token_length], x[:, class_token_length:] + + batch, num_tokens, _, device = *x.shape, x.device + + batch_indices = torch.arange(batch, device=device) + batch_indices = batch_indices[..., None] + + keep_prob = 1 - self.prob + num_patches_keep = max(1, int(num_tokens * keep_prob)) + + rand = torch.randn(batch, num_tokens, device=device) + patch_indices_keep = rand.topk(num_patches_keep, dim=-1).indices + + x = x[batch_indices, patch_indices_keep] + + if self.exclude_cls_tokens: + x = torch.cat((cls_tokens, x), dim=1) + + return x + + +class VitMlpHead(MegatronModule): + """Pooler layer. + + Pool hidden states of a specific token (for example start of the + sequence) and add a linear transformation followed by a tanh. + + Arguments: + hidden_size: hidden size + init_method: weight initialization method for the linear layer. + bias is set to zero. + """ + + def __init__(self, hidden_size, num_classes): + super(VitMlpHead, self).__init__() + self.dense_in = torch.nn.Linear(hidden_size, hidden_size) + self.relu = torch.nn.ReLU() + self.dense_out = torch.nn.Linear(hidden_size, num_classes) + torch.nn.init.constant_(self.dense_out.bias, -10) + + def forward(self, hidden_states): + # hidden_states: [b, 1, h] + # sequence_index: index of the token to pool. + dense_in_result = self.dense_in(hidden_states) + tanh_result = torch.tanh(dense_in_result) + dense_out_result = self.dense_out(tanh_result) + return dense_out_result + + +def isPerfectSquare(x): + if (x >= 0): + sr = math.sqrt(x) + return (int(sr) * int(sr) == x) + return False + + +def twod_interpolate_position_embeddings_hook( + model_cfg, + class_token_present, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, +): + num_patches_per_dim_h = model_cfg.img_h // model_cfg.patch_dim + num_patches_per_dim_w = model_cfg.img_w // model_cfg.patch_dim + num_patches = num_patches_per_dim_h * num_patches_per_dim_w + hidden_size = model_cfg.hidden_size + class_token_length = model_cfg.get("class_token_length", 8) + + key = prefix + "weight" + + assert key in state_dict + if key in state_dict: + input_param = state_dict[key] + + input_seq_len = input_param.shape[0] + assert (isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - class_token_length)) + input_has_class_token = not isPerfectSquare(input_seq_len) + num_tok_input = input_seq_len - class_token_length if input_has_class_token else input_seq_len + num_tok_output = num_patches + output_has_class_token = class_token_present + + # update input_param and load it to state_dict[key] + if input_has_class_token: + input_param_tok = input_param[:class_token_length, :] + input_param_grid = input_param[class_token_length:, :] + else: + input_param_tok = torch.zeros(class_token_length, hidden_size) + input_param_grid = input_param + + assert input_param.shape[1] == hidden_size + + if num_tok_input != num_tok_output: + gs_input = int(math.sqrt(num_tok_input)) + gs_new = (num_patches_per_dim_h, num_patches_per_dim_w) + + input_param_grid = input_param_grid.transpose(0, 1).contiguous() + input_param_grid = input_param_grid.reshape( + (1, -1, gs_input, gs_input) + ) + input_param_grid = input_param_grid.float() + scale_factor = (gs_new[0] / gs_input, gs_new[1] / gs_input) + + input_param_grid = F.interpolate( + input_param_grid, scale_factor=scale_factor, mode="bilinear" + ) + + input_param_grid = input_param_grid.half() + input_param_grid = input_param_grid.reshape((-1, num_tok_output)) + input_param_grid = input_param_grid.transpose(0, 1).contiguous() + + assert input_param_grid.shape[1] == hidden_size + + input_param = input_param_grid + assert ( + input_param.shape[0] == num_tok_output + and input_param.shape[1] == hidden_size + ) + + if output_has_class_token: + input_param = torch.cat((input_param_tok, input_param), dim=0) + + state_dict[key] = input_param + + +class VitBackbone(MegatronModule): + """Vision Transformer Model.""" + + def __init__(self, + model_cfg, + init_method=None, + scaled_init_method=None, + pre_process=True, + post_process=True, + class_token=True, + single_token_output=False): + super(VitBackbone, self).__init__(share_token_embeddings=False) + + self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy + num_layers = model_cfg.num_layers + init_method_std = model_cfg.init_method_std + if init_method is None: + init_method = init_method_normal(init_method_std) + if scaled_init_method is None: + scaled_init_method = scaled_init_method_normal(init_method_std, num_layers) + + self.pre_process = pre_process + self.post_process = post_process + self.class_token = class_token + self.hidden_size = model_cfg.hidden_size + self.patch_dim = model_cfg.patch_dim + self.img_h = model_cfg.img_h + self.img_w = model_cfg.img_w + self.single_token_output = single_token_output + self.drop_patch_rate = model_cfg.get("drop_patch_rate", 0.) + self.drop_path_rate = model_cfg.get("drop_path_rate", 0.) + preprocess_layernorm = model_cfg.get("preprocess_layernorm", False) + + assert self.img_h % self.patch_dim == 0 + assert self.img_w % self.patch_dim == 0 + self.num_patches_per_dim_h = self.img_h // self.patch_dim + self.num_patches_per_dim_w = self.img_w // self.patch_dim + self.num_patches = self.num_patches_per_dim_h * self.num_patches_per_dim_w + class_token_length = model_cfg.get("class_token_length", 8) + self.seq_length = self.num_patches + (class_token_length if self.class_token else 0) + self.flatten_dim = self.patch_dim * self.patch_dim * model_cfg.num_channels + self.input_tensor = None + self.position_ids = None + self.preprocess_layernorm = None + + if self.pre_process: + # cls_token + if self.class_token: + self.cls_token = torch.nn.Parameter( + torch.randn(1, class_token_length, self.hidden_size) + ) + torch.nn.init.zeros_(self.cls_token) + self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda() + + # Linear encoder + self.linear_encoder = torch.nn.Linear( + self.flatten_dim, self.hidden_size + ) + + # embedding + self.position_embedding_type = model_cfg.get("position_embedding_type", "learned_absolute") + + if self.position_embedding_type == "learned_absolute": + self.position_embeddings = torch.nn.Embedding( + self.seq_length, self.hidden_size + ) + init_method_normal(model_cfg.init_method_std)( + self.position_embeddings.weight + ) + + class_token_present = self.class_token + self.position_embeddings._register_load_state_dict_pre_hook( + partial( + twod_interpolate_position_embeddings_hook, + model_cfg, + class_token_present + ) + ) + elif self.position_embedding_type == "learned_parameters": + self.position_embeddings = torch.nn.Parameter( + torch.empty(self.seq_length, self.hidden_size) + ) + init_method_normal(model_cfg.init_method_std)( + self.position_embeddings + ) + else: + raise ValueError(f"Unrecognized positional embedding type {self.position_embedding_type}!") + + self.embedding_dropout = torch.nn.Dropout(model_cfg.hidden_dropout) + self.drop_patch = DropPatch( + self.drop_patch_rate, + class_token_length=class_token_length, + exclude_cls_tokens=self.class_token + ) + + if preprocess_layernorm: + self.preprocess_layernorm = get_layer_norm( + model_cfg.hidden_size, model_cfg.layernorm_epsilon, model_cfg.persist_layer_norm, + sequence_parallel=model_cfg.sequence_parallel + ) + + self.transformer = ParallelVisionTransformer( + init_method=init_method, + output_layer_init_method=scaled_init_method, + num_layers=model_cfg.num_layers, + hidden_size=model_cfg.hidden_size, + num_attention_heads=model_cfg.num_attention_heads, + apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, + kv_channels=model_cfg.kv_channels, + ffn_hidden_size=model_cfg.ffn_hidden_size, + # self_attn_mask_type=self.encoder_attn_mask_type, # TODO (yuya) + pre_process=self.pre_process, + post_process=self.post_process, + precision=model_cfg.precision, + fp32_residual_connection=model_cfg.fp32_residual_connection, + activations_checkpoint_method=model_cfg.activations_checkpoint_method, + activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, + normalization=model_cfg.normalization, + layernorm_epsilon=model_cfg.layernorm_epsilon, + hidden_dropout=model_cfg.hidden_dropout, + attention_dropout=model_cfg.attention_dropout, + drop_path_rate=model_cfg.drop_path_rate, + use_cpu_initialization=model_cfg.use_cpu_initialization, + bias_activation_fusion=model_cfg.get("bias_activation_fusion", False), + persist_layer_norm=model_cfg.persist_layer_norm, + openai_gelu=model_cfg.openai_gelu, + onnx_safe=model_cfg.onnx_safe, + masked_softmax_fusion=model_cfg.masked_softmax_fusion, + megatron_legacy=model_cfg.megatron_legacy, + sequence_parallel=model_cfg.sequence_parallel, + activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, + gradient_accumulation_fusion=model_cfg.gradient_accumulation_fusion, + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.transformer.set_input_tensor(input_tensor) + + def forward(self, input): + + if self.pre_process: + rearranged_input = einops.rearrange( + input, + "b c (h p1) (w p2) -> b (h w) (p1 p2 c)", + p1=self.patch_dim, + p2=self.patch_dim, + ) + + # [b num_patch patch_dim*patch_dim*c] -> [b, s, h]; s:=num_patch, h:=hidden + encoder_output = self.linear_encoder(rearranged_input) + + concatenated_tokens = encoder_output + if self.class_token: + cls_tokens = self.cls_token.expand(encoder_output.shape[0], -1, -1) + concatenated_tokens = torch.cat((cls_tokens, encoder_output), dim=1) + + if self.position_embedding_type == "learned_absolute": + token_embeddings = concatenated_tokens + \ + self.position_embeddings(self.position_ids[:, :concatenated_tokens.shape[1]]) + elif self.position_embedding_type == "learned_parameters": + token_embeddings = concatenated_tokens + self.position_embeddings + + # a patch_dropout of 0. would mean it is disabled and this function would do nothing but return what was passed in + token_embeddings = self.drop_patch(token_embeddings) + + if self.preprocess_layernorm is not None: + token_embeddings = self.preprocess_layernorm(token_embeddings) + + # [b s h] => [s b h] + token_embeddings = token_embeddings.transpose(0, 1).contiguous() + hidden_states = self.embedding_dropout(token_embeddings) + else: + hidden_states = input + + # 0 represents masking, 1 represents not masking + # attention_mask = torch.zeros( + # [1, 1, hidden_states.shape[0], hidden_states.shape[0]], + # device=hidden_states.device, + # dtype=torch.bool, + # ) + hidden_states = self.transformer(hidden_states, None) + + if self.post_process: + # [s b h] => [b s h] + if self.single_token_output: + hidden_states = hidden_states[0] + else: + hidden_states = hidden_states.transpose(0, 1).contiguous() + + return hidden_states diff --git a/nemo/collections/vision/parts/__init__.py b/nemo/collections/vision/parts/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/parts/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/package_info.py b/nemo/package_info.py index 1655a7d860af..10bb89c73aff 100644 --- a/nemo/package_info.py +++ b/nemo/package_info.py @@ -16,7 +16,7 @@ MAJOR = 1 MINOR = 18 PATCH = 0 -PRE_RELEASE = 'rc0' +PRE_RELEASE = '' # Use the following formatting: (major, minor, patch, pre-release) VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) diff --git a/nemo/utils/trt_utils.py b/nemo/utils/trt_utils.py new file mode 100644 index 000000000000..03ff5126b2a5 --- /dev/null +++ b/nemo/utils/trt_utils.py @@ -0,0 +1,59 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from polygraphy.backend.trt import CreateConfig, Profile +from polygraphy.backend.trt import ( + engine_from_network, + network_from_onnx_path, + save_engine, +) + +import tensorrt as trt + +def build_engine( + onnx_path, + output_path, + fp16, + input_profile=None, + enable_refit=False, + enable_preview=False, + timing_cache=None, + workspace_size=0, + ): + print(f"Building TensorRT engine for {onnx_path}: {output_path}") + p = Profile() + if input_profile: + for name, dims in input_profile.items(): + assert len(dims) == 3 + p.add(name, min=dims[0], opt=dims[1], max=dims[2]) + + preview_features = None + + config_kwargs = {} + if workspace_size > 0: + config_kwargs["memory_pool_limits"] = { + trt.MemoryPoolType.WORKSPACE: workspace_size + } + engine = engine_from_network( + network_from_onnx_path(onnx_path), + config=CreateConfig( + fp16=fp16, + refittable=enable_refit, + profiles=[p], + preview_features=preview_features, + load_timing_cache=timing_cache, + **config_kwargs, + ), + save_timing_cache=timing_cache, + ) + save_engine(engine, path=output_path) \ No newline at end of file diff --git a/requirements/requirements_vision.txt b/requirements/requirements_vision.txt new file mode 100644 index 000000000000..d3d4b29db16f --- /dev/null +++ b/requirements/requirements_vision.txt @@ -0,0 +1,8 @@ +boto3 +einops +flask_restful +ftfy +gdown +matplotlib>=3.3.2 +nltk>=3.6.5 +numpy diff --git a/scripts/fid-eval-text2img/TFinception_V3.py b/scripts/fid-eval-text2img/TFinception_V3.py new file mode 100644 index 000000000000..8fdcbff13117 --- /dev/null +++ b/scripts/fid-eval-text2img/TFinception_V3.py @@ -0,0 +1,246 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# share: outside-ok + +""" +Modified from +https://github.com/mseitzer/pytorch-fid + +Code adapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead +of Tensorflow +Copyright 2018 Institute of Bioinformatics, JKU Linz +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import torch +import torch.nn.functional as F +from torch import nn + +# from imaginaire.utils.misc import apply_imagenet_normalization + + +try: + from torchvision.models.utils import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + +from torchvision.models import inception, inception_v3, vgg16 + +# Inception weights ported to Pytorch from +# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz +FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases' \ + '/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' + + +class SwAV(nn.Module): + def __init__(self): + super().__init__() + self.model = torch.hub.load('facebookresearch/swav', 'resnet50', + pretrained=True) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate( + x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + return y + + +class Vgg16(nn.Module): + def __init__(self): + super().__init__() + self.model = vgg16(pretrained=True, init_weights=False) + self.model.classifier = torch.nn.Sequential( + *[self.model.classifier[i] for i in range(4)] + ) + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate( + x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + return y + + +class InceptionV3(nn.Module): + def __init__(self): + super().__init__() + self.model = inception_v3(transform_input=False, + pretrained=True, + init_weights=False) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate( + x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + return y + + +class TFInceptionV3(nn.Module): + def __init__(self): + super().__init__() + self.model = inception_v3(transform_input=False, + num_classes=1008, + aux_logits=False, + pretrained=False, + init_weights=False) + self.model.Mixed_5b = FIDInceptionA(192, pool_features=32) + self.model.Mixed_5c = FIDInceptionA(256, pool_features=64) + self.model.Mixed_5d = FIDInceptionA(288, pool_features=64) + self.model.Mixed_6b = FIDInceptionC(768, channels_7x7=128) + self.model.Mixed_6c = FIDInceptionC(768, channels_7x7=160) + self.model.Mixed_6d = FIDInceptionC(768, channels_7x7=160) + self.model.Mixed_6e = FIDInceptionC(768, channels_7x7=192) + self.model.Mixed_7b = FIDInceptionE_1(1280) + self.model.Mixed_7c = FIDInceptionE_2(2048) + + state_dict = load_state_dict_from_url( + FID_WEIGHTS_URL, progress=True, map_location='cpu' + ) + self.model.load_state_dict(state_dict) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + # x = apply_imagenet_normalization(x) + y = self.model(F.interpolate( + x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + return y + + +class FIDInceptionA(inception.InceptionA): + """InceptionA block patched for FID computation""" + + def __init__(self, in_channels, pool_features): + super(FIDInceptionA, self).__init__(in_channels, pool_features) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionC(inception.InceptionC): + """InceptionC block patched for FID computation""" + + def __init__(self, in_channels, channels_7x7): + super(FIDInceptionC, self).__init__(in_channels, channels_7x7) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionE_1(inception.InceptionE): + """First InceptionE block patched for FID computation""" + + def __init__(self, in_channels): + super(FIDInceptionE_1, self).__init__(in_channels) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionE_2(inception.InceptionE): + """Second InceptionE block patched for FID computation""" + + def __init__(self, in_channels): + super(FIDInceptionE_2, self).__init__(in_channels) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + # Patch: The FID Inception model uses max pooling instead of average + # pooling. This is likely an error in this specific Inception + # implementation, as other Inception models use average pooling here + # (which matches the description in the paper). + branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) diff --git a/scripts/fid-eval-text2img/compute_clip_score.py b/scripts/fid-eval-text2img/compute_clip_score.py new file mode 100644 index 000000000000..8a9e2e482bb2 --- /dev/null +++ b/scripts/fid-eval-text2img/compute_clip_score.py @@ -0,0 +1,115 @@ +""" +python clip_script.py --captions_path /path/to/coco2014_val/captions \ + --fid_images_path /path/to/synthetic_images \ + --output_path /path/to/output/clip_scores.csv + +1. `--captions_path`: The path to the real images captions directory. In this example, + it is set to `/path/to/coco2014_val/captions`. This path should point to the + directory containing the COCO 2014 validation dataset captions. + +2. `--fid_images_path`: The path to the directory containing subfolders with synthetic + images. In this example, it is set to `/path/to/synthetic_images`. Each subfolder + should contain a set of synthetic images for which you want to compute CLIP scores + against the captions from `--captions_path`. + +3. `--output_path`: The path to the output CSV file where the CLIP scores will be saved. + In this example, it is set to `/path/to/output/clip_scores.csv`. This file will + contain a table with two columns: `cfg` and `clip_score`. The `cfg` + column lists the names of the subfolders in `--fid_images_path`, and the + `clip_score` column lists the corresponding average CLIP scores between the synthetic + images in each subfolder and the captions from `--captions_path`. +""" + +import open_clip +import torch +import torch.nn as nn +from PIL import Image +from glob import glob +from tqdm import tqdm +import os +import argparse +import csv + + +class CLIPEncoder(nn.Module): + def __init__(self, clip_version='ViT-B/32', pretrained='', cache_dir=None, device='cuda'): + super().__init__() + + self.clip_version = clip_version + if not pretrained: + if self.clip_version == 'ViT-H-14': + self.pretrained = 'laion2b_s32b_b79k' + elif self.clip_version == 'ViT-g-14': + self.pretrained = 'laion2b_s12b_b42k' + else: + self.pretrained = 'openai' + + self.model, _, self.preprocess = open_clip.create_model_and_transforms( + self.clip_version, pretrained=self.pretrained, cache_dir=cache_dir) + + self.model.eval() + self.model.to(device) + + self.device = device + + @torch.no_grad() + def get_clip_score(self, text, image): + if isinstance(image, str): # filenmae + image = Image.open(image) + if isinstance(image, Image.Image): # PIL Image + image = self.preprocess(image).unsqueeze(0).to(self.device) + image_features = self.model.encode_image(image).float() + image_features /= image_features.norm(dim=-1, keepdim=True) + + if not isinstance(text, (list, tuple)): + text = [text] + text = open_clip.tokenize(text).to(self.device) + text_features = self.model.encode_text(text).float() + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = image_features @ text_features.T + + return similarity + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--captions_path', default='/coco2014/coco2014_val_sampled_30k/captions/', type=str) + parser.add_argument('--fid_images_path', default=None, type=str) + parser.add_argument('--output_path', default='./clip_scores.csv', type=str) + args = parser.parse_args() + + captions_path = args.captions_path + print('Init CLIP Encoder..') + encoder = CLIPEncoder(clip_version='ViT-L-14') + + # Create output CSV file + with open(args.output_path, 'w', newline='') as csvfile: + fieldnames = ['cfg', 'clip_score'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # Iterate through subfolders in fid_images_path + for subfolder in os.listdir(args.fid_images_path): + subfolder_path = os.path.join(args.fid_images_path, subfolder) + if os.path.isdir(subfolder_path): + images = sorted(glob(f'{subfolder_path}/*.png'), key=lambda x: (int(x.split('/')[-1].strip('.png').split('_')[1]))) + texts = sorted(glob(f'{captions_path}/*.txt')) + print(images[:5], texts[:5]) + assert len(images) == len(texts) + print(f'Number of images text pairs: {len(images)}') + + ave_sim = 0. + count = 0 + for text, img in zip(tqdm(texts), images): + with open(text, 'r') as f: + text = f.read().strip() + sim = encoder.get_clip_score(text, img) + ave_sim += sim + count += 1 + if count % 2000 == 0: + print(ave_sim / count) + + ave_sim /= count + print(f'The CLIP similarity for CFG {subfolder}: {ave_sim}') + + # Write CLIP score to output CSV file + writer.writerow({'cfg': subfolder, 'clip_score': ave_sim}) \ No newline at end of file diff --git a/scripts/fid-eval-text2img/compute_fid.py b/scripts/fid-eval-text2img/compute_fid.py new file mode 100644 index 000000000000..e5b173c71944 --- /dev/null +++ b/scripts/fid-eval-text2img/compute_fid.py @@ -0,0 +1,350 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections +import numpy as np +import os +import torch +import torch.distributed as dist +import torch.nn.functional as F +from scipy import linalg +from torch import nn + +from TFinception_V3 import SwAV, TFInceptionV3, InceptionV3, Vgg16 + + +def network_init(network='inception'): + # inception = inception_v3(pretrained=True, transform_input=False) + # inception = inception.to('cuda') + # inception.eval() + # inception.fc = torch.nn.Sequential() + + if dist.is_initialized() and not is_local_master(): + # Make sure only the first process in distributed training downloads + # the model, and the others will use the cache + # noinspection PyUnresolvedReferences + torch.distributed.barrier() + + if network == 'tf_inception': + model = TFInceptionV3() + elif network == 'inception': + model = InceptionV3() + elif network == 'vgg16': + model = Vgg16() + elif network == 'swav': + model = SwAV() + elif network == 'clean_inception': + model = CleanInceptionV3() + else: + raise NotImplementedError(f'Network "{network}" is not supported!') + + if dist.is_initialized() and is_local_master(): + # Make sure only the first process in distributed training downloads + # the model, and the others will use the cache + # noinspection PyUnresolvedReferences + dist.barrier() + + model = model.to('cuda').eval() + return model + + +def _calculate_frechet_distance(act_1, act_2, eps=1e-6): + mu1 = np.mean(act_1.cpu().numpy(), axis=0) + sigma1 = np.cov(act_1.cpu().numpy(), rowvar=False) + mu2 = np.mean(act_2.cpu().numpy(), axis=0) + sigma2 = np.cov(act_2.cpu().numpy(), rowvar=False) + mu1 = np.atleast_1d(mu1) + mu2 = np.atleast_1d(mu2) + sigma1 = np.atleast_2d(sigma1) + sigma2 = np.atleast_2d(sigma2) + assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths' + assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions' + diff = mu1 - mu2 + # Product might be almost singular + covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) + if not np.isfinite(covmean).all(): + msg = ('fid calculation produces singular product; ' + 'adding %s to diagonal of cov estimates') % eps + print(msg) + offset = np.eye(sigma1.shape[0]) * eps + covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) + + # Numerical error might give slight imaginary component + if np.iscomplexobj(covmean): + if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): + m = np.max(np.abs(covmean.imag)) + print('Imaginary component {}'.format(m)) + # raise ValueError('Imaginary component {}'.format(m)) + covmean = covmean.real + tr_covmean = np.trace(covmean) + return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace( + sigma2) - 2 * tr_covmean)} + + +def is_master(): + r"""check if current process is the master""" + return get_rank() == 0 + + +def get_rank(): + r"""Get rank of the thread.""" + rank = 0 + if dist.is_available(): + if dist.is_initialized(): + rank = dist.get_rank() + return rank + + +def is_local_master(): + return torch.cuda.current_device() == 0 + + +def load_or_compute_activations(act_path, data_loader, key_real, key_fake, + generator=None, sample_size=None, + preprocess=None, + is_video=False, few_shot_video=False, + network='inception', + **kwargs): + r"""Load mean and covariance from saved npy file if exists. Otherwise, compute the mean and covariance. + + Args: + act_path (str or None): Location for the numpy file to store or to load the activations. + data_loader (obj): PyTorch dataloader object. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + generator (obj): PyTorch trainer network. + sample_size (int): How many samples to be used for computing the KID. + preprocess (func): The preprocess function to be applied to the data. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + network (str): Which recognition backbone to use. + Returns: + (torch.Tensor) Feature activations. + """ + if act_path is not None and os.path.exists(act_path): + # Loading precomputed activations. + print('Load activations from {}'.format(act_path)) + act = torch.load(act_path, map_location='cpu').cuda() + else: + # Compute activations. + if is_video: + act = get_video_activations( + data_loader, key_real, key_fake, generator, + sample_size, preprocess, few_shot_video, network, **kwargs) + else: + act = get_activations( + data_loader, key_real, key_fake, generator, + sample_size, preprocess, True, network, **kwargs) + if act_path is not None and is_local_master(): + print('Save activations to {}'.format(act_path)) + if not os.path.exists(os.path.dirname(act_path)): + os.makedirs(os.path.dirname(act_path), exist_ok=True) + torch.save(act, act_path) + return act + + +@torch.no_grad() +def compute_fid(fid_path, data_loader, net_G, + key_real='images', key_fake='fake_images', + sample_size=None, preprocess=None, return_act=False, + is_video=False, few_shot_video=False, **kwargs): + r"""Compute the fid score. + + Args: + fid_path (str): Location for the numpy file to store or to load the statistics. + data_loader (obj): PyTorch dataloader object. + net_G (obj): For image generation modes, net_G is the generator network. + For video generation models, net_G is the trainer. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + sample_size (int or tuple): How many samples to be used. + preprocess (func): The preprocess function to be applied to the data. + return_act (bool): If ``True``, also returns feature activations of + real and fake data. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + Returns: + (float): FID value. + """ + print('Computing FID.') + act_path = os.path.join(os.path.dirname(fid_path), + 'activations_real.npy') + # Get the fake mean and covariance. + fake_act = load_or_compute_activations( + None, data_loader, key_real, key_fake, net_G, + sample_size, preprocess, is_video=is_video, + few_shot_video=few_shot_video, **kwargs + ) + + # Get the ground truth mean and covariance. + real_act = load_or_compute_activations( + act_path, data_loader, key_real, key_fake, None, + sample_size, preprocess, is_video=is_video, + few_shot_video=few_shot_video, **kwargs + ) + + if is_master(): + fid = _calculate_frechet_distance(fake_act, real_act)["FID"] + if return_act: + return fid, real_act, fake_act + else: + return fid + elif return_act: + return None, None, None + else: + return None + + +def get_world_size(): + r"""Get world size. How many GPUs are available in this job.""" + world_size = 1 + if dist.is_available(): + if dist.is_initialized(): + world_size = dist.get_world_size() + return world_size + + +def dist_all_gather_tensor(tensor): + r""" gather to all ranks """ + world_size = get_world_size() + if world_size < 2: + return [tensor] + tensor_list = [ + torch.ones_like(tensor) for _ in range(dist.get_world_size())] + with torch.no_grad(): + dist.all_gather(tensor_list, tensor) + return tensor_list + + +def to_device(data, device): + r"""Move all tensors inside data to device. + + Args: + data (dict, list, or tensor): Input data. + device (str): 'cpu' or 'cuda'. + """ + assert device in ['cpu', 'cuda'] + string_classes = (str, bytes) + if isinstance(data, torch.Tensor): + data = data.to(torch.device(device)) + return data + elif isinstance(data, collections.abc.Mapping): + return type(data)({key: to_device(data[key], device) for key in data}) + elif isinstance(data, collections.abc.Sequence) and not isinstance(data, string_classes): + return type(data)([to_device(d, device) for d in data]) + else: + return data + + +def to_cuda(data): + r"""Move all tensors inside data to gpu. + + Args: + data (dict, list, or tensor): Input data. + """ + return to_device(data, 'cuda') + + +@torch.no_grad() +def get_activations( + data_loader, key_real, key_fake, + generator=None, sample_size=None, preprocess=None, align_corners=True, network='inception', **kwargs): + r"""Compute activation values and pack them in a list. + + Args: + data_loader (obj): PyTorch dataloader object. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + generator (obj): PyTorch trainer network. + sample_size (int): How many samples to use for FID. + preprocess (func): Pre-processing function to use. + align_corners (bool): The ``'align_corners'`` parameter to be used for `torch.nn.functional.interpolate`. + network (str): Which recognition backbone to use. + Returns: + batch_y (tensor): Inception features of the current batch. Note that only the master gpu will get it. + """ + model = network_init(network) + batch_y = [] + world_size = get_world_size() + + # Iterate through the dataset to compute the activation. + for it, data in enumerate(data_loader): + data = to_cuda(data) + # Preprocess the data. + if preprocess is not None: + data = preprocess(data) + # Load real data if the generator is not specified. + if generator is None: + images = data[key_real] + if torch.max(images) > 1: + images = images / 255. # convert RGB to (0,1) + else: + # Compute the generated image. + text = data[1]['caption'] ### input is captions + net_G_output = generator(text, **kwargs) + images = net_G_output + # Clamp the image for models that do not set the output to between + # -1, 1. For models that employ tanh, this has no effect. + images.clamp_(-1, 1) + y = model(images, align_corners=align_corners) + # y = network_forward(model, images, align_corners=align_corners) + batch_y.append(y) + if sample_size is not None and data_loader.batch_size * world_size * (it + 1) >= sample_size: + # Reach the number of samples we need. + break + + batch_y = torch.cat(dist_all_gather_tensor(torch.cat(batch_y))) + if sample_size is not None: + batch_y = batch_y[:sample_size] + print(f"Computed feature activations of size {batch_y.shape}") + return batch_y + + +@torch.no_grad() +def compute_fid_data(folder_to_store_real_act, data_loader_a, data_loader_b, + key_a='images', key_b='images', sample_size=None, + is_video=False, few_shot_video=False, network='inception', **kwargs): + r"""Compute the fid score between two datasets. + + Args: + folder_to_store_real_act (str): Location to store the statistics or to load the statistics. + data_loader_a (obj): PyTorch dataloader object for dataset a. + data_loader_b (obj): PyTorch dataloader object for dataset b. + key_a (str): Dictionary key value for images in the dataset a. + key_b (str): Dictionary key value for images in the dataset b. + sample_size (int or None): How many samples to be used for computing the FID. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + network (str): Which recognition backbone to use. + Returns: + (float): FID value. + """ + print('Computing FID.') + if folder_to_store_real_act is None: + path_a = None + else: + path_a = os.path.join(os.path.dirname(folder_to_store_real_act), 'activations_a.npy') + # min_data_size = min(len(data_loader_a.dataset), len(data_loader_b.dataset)) + # sample_size = min_data_size if sample_size is None else min(sample_size, min_data_size) + + act_a = load_or_compute_activations( + path_a, data_loader_a, key_a, key_b, None, + sample_size=sample_size, is_video=is_video, + few_shot_video=few_shot_video, network=network, **kwargs) + act_b = load_or_compute_activations( + None, data_loader_b, key_a, key_b, None, + sample_size=sample_size, is_video=is_video, + few_shot_video=few_shot_video, network=network, **kwargs) + print(act_a.shape, act_b.shape) + if is_master(): + return _calculate_frechet_distance(act_a, act_b)["FID"] diff --git a/scripts/fid-eval-text2img/eval_fid.py b/scripts/fid-eval-text2img/eval_fid.py new file mode 100644 index 000000000000..a1565eebfbd3 --- /dev/null +++ b/scripts/fid-eval-text2img/eval_fid.py @@ -0,0 +1,98 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Example usage: + python eval_fid.py \ + --coco_images_path /path/to/coco2014_val \ + --fid_images_path /path/to/synthetic_images \ + --output_path /path/to/output/fid_scores.csv + +1. `--coco_images_path`: The path to the real images directory. In this example, + it is set to `/path/to/coco2014_val`. This path should point to the + directory containing the COCO 2014 validation dataset images, resized + to 256x256 pixels. + +2. `--fid_images_path`: The path to the directory containing subfolders + with synthetic images. In this example, it is set to + `/path/to/synthetic_images`. Each subfolder should contain a + set of synthetic images for which you want to compute FID scores + against the real images from `--coco_images_path`. + +3. `--output_path`: The path to the output CSV file where the FID scores + will be saved. In this example, it is set to + `/path/to/output/fid_scores.csv`. This file will contain a table with + two columns: `cfg` and `fid`. The `cfg` column lists the + names of the subfolders in `--fid_images_path`, and the `fid` column + lists the corresponding FID scores between the synthetic images in + each subfolder and the real images from `--coco_images_path`. +""" + +import argparse +import csv +import os +import torch + +from compute_fid import compute_fid_data +from fid_dataset import CustomDataset + +if __name__ == '__main__': + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument('--coco_images_path', default='/coco2014/coco2014_val/images_256', type=str) + parser.add_argument('--fid_images_path', default=None, type=str) + parser.add_argument('--output_path', default='./fid_scores.csv', type=str) + args = parser.parse_args() + + # Set paths for synthetic images and real images + fid_images_path = args.fid_images_path + real_path = args.coco_images_path + + # Create dataset and data loader for real images + real_dataset = CustomDataset(real_path) + loader_real = torch.utils.data.DataLoader( + real_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False + ) + + # Create output CSV file + with open(args.output_path, 'w', newline='') as csvfile: + fieldnames = ['cfg', 'fid'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # Iterate through subfolders in fid_images_path + for subfolder in os.listdir(fid_images_path): + subfolder_path = os.path.join(fid_images_path, subfolder) + if os.path.isdir(subfolder_path): + # Create dataset and data loader for synthetic images in subfolder + synthetic_dataset = CustomDataset(subfolder_path, target_size=256) + loader_synthetic = torch.utils.data.DataLoader( + synthetic_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False + ) + + # Compute FID score between synthetic images in subfolder and real images + fid = compute_fid_data( + './', loader_real, loader_synthetic, + key_a=0, + key_b=0, + sample_size=None, + is_video=False, + few_shot_video=False, + network='tf_inception', + interpolation_mode='bilinear' + ) + + print(f"The FID score between {subfolder_path} and {real_path} is {fid}") + + # Write FID score to output CSV file + writer.writerow({'cfg': subfolder, 'fid': fid}) diff --git a/scripts/fid-eval-text2img/fid_dataset.py b/scripts/fid-eval-text2img/fid_dataset.py new file mode 100644 index 000000000000..de1f07a8221a --- /dev/null +++ b/scripts/fid-eval-text2img/fid_dataset.py @@ -0,0 +1,129 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import os +import torch +import torch.utils.data as data +import torchvision.transforms as transforms +from PIL import Image +from pycocotools.coco import COCO +from torchvision.io import read_image, ImageReadMode + + +def _pil_interp(method): + if method == 'bicubic': + return Image.BICUBIC + elif method == 'lanczos': + return Image.LANCZOS + elif method == 'hamming': + return Image.HAMMING + else: + # default bilinear, do we want to allow nearest? + return Image.BILINEAR + + +def _size_tuple(size): + if isinstance(size, int): + return size, size + else: + assert len(size) == 2 + return size + + +class CenterCropResize: + + def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)): + self.target_size = _size_tuple(target_size) + self.interpolation = interpolation + self.fill_color = fill_color + + def __call__(self, img): + w, h = img.size + img = np.array(img).astype(np.uint8) + crop = min(w, h) + img = img[(h - crop) // 2:(h + crop) // 2, + (w - crop) // 2:(w + crop) // 2] + image = Image.fromarray(img) + if self.target_size is not None: + interp_method = _pil_interp(self.interpolation) + new_img = image.resize(self.target_size, resample=interp_method) + return new_img + + +class CustomDataset(data.Dataset): + def __init__(self, root, target_size=None): + self.root = root + self.files = [f for f in os.listdir(self.root) if os.path.isfile(os.path.join(self.root, f))] + self.transform = transforms.ToTensor() + self.target_size = target_size + + def __len__(self): + return len(self.files) + + def __getitem__(self, index): + file = self.files[index] + # image = read_image(os.path.join(self.root, file), mode=ImageReadMode.RGB).type(torch.float32) / 255 + image = Image.open(os.path.join(self.root, file)).convert('RGB') + if self.target_size is not None: + image = image.resize((self.target_size, self.target_size), resample=Image.BICUBIC) + image = self.transform(image) + image = 2 * image - 1 + return image, file + + +class CocoDataset(data.Dataset): + def __init__(self, root, ann_file, captions, transform=None, target_size=None): + self.root = root + self.coco = None + self.captions = captions + self.img_ids = [x['image_id'] for x in self.captions] + self.has_annotations = 'image_info' not in ann_file + self.transforms = [transforms.ToTensor()] + if transform is not None: + self.transforms.append(transform) + self.target_size = target_size + self.img_ids_invalid = [] + self.img_infos = [] + self._load_annotations(ann_file) + + def _load_annotations(self, ann_file): + assert self.coco is None + self.coco = COCO(ann_file) + img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) + for img_id in self.img_ids: + info = self.coco.loadImgs([img_id])[0] + valid_annotation = not self.has_annotations or img_id in img_ids_with_ann + if valid_annotation and min(info['width'], info['height']) >= 32: + self.img_infos.append(info) + else: + self.img_ids_invalid.append(img_id) + + def __len__(self): + return len(self.img_infos) + + def _compose(self, image): + for t in self.transforms[::-1]: + image = t(image) + return image + + def __getitem__(self, index): + img_id = self.img_ids[index] + img_info = self.img_infos[index] + cap = self.captions[index] + path = img_info['file_name'] + image = Image.open(os.path.join(self.root, path)).convert('RGB') + if self.target_size is not None: + image = image.resize((512, 512)) + image = self._compose(image) + return image, cap diff --git a/scripts/fid-eval-text2img/plot.py b/scripts/fid-eval-text2img/plot.py new file mode 100644 index 000000000000..6947db7b36ba --- /dev/null +++ b/scripts/fid-eval-text2img/plot.py @@ -0,0 +1,37 @@ +""" +python plot_fid_vs_clip.py \ + --fid_scores_csv path/to/fid_scores.csv \ + --clip_scores_csv path/to/clip_scores.csv +Replace path/to/fid_scores.csv and path/to/clip_scores.csv with the paths +to the respective CSV files. The script will display the plot with FID +scores against CLIP scores, with cfg values annotated on each point. +""" + +import argparse +import pandas as pd +import matplotlib.pyplot as plt + +def plot_fid_vs_clip(fid_scores_csv, clip_scores_csv): + fid_scores = pd.read_csv(fid_scores_csv) + clip_scores = pd.read_csv(clip_scores_csv) + merged_data = pd.merge(fid_scores, clip_scores, on='cfg') + + fig, ax = plt.subplots() + ax.plot(merged_data['clip_score'], merged_data['fid'], marker='o', linestyle='-') # Connect points with a line + + for i, txt in enumerate(merged_data['cfg']): + ax.annotate(txt, (merged_data['clip_score'][i], merged_data['fid'][i])) + + ax.set_xlabel('CLIP Score') + ax.set_ylabel('FID') + ax.set_title('FID vs CLIP Score') + + plt.show() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--fid_scores_csv', required=True, type=str, help='Path to the FID scores CSV file') + parser.add_argument('--clip_scores_csv', required=True, type=str, help='Path to the CLIP scores CSV file') + args = parser.parse_args() + + plot_fid_vs_clip(args.fid_scores_csv, args.clip_scores_csv) diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py new file mode 100644 index 000000000000..7838a68b7924 --- /dev/null +++ b/tests/collections/multimodal/test_clip_model.py @@ -0,0 +1,489 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.clip.megatron_clip_models import ( + CLIPVisionTransformer, + CLIPTextTransformer, + CLIPModel, + MegatronCLIPModel, +) +from nemo.collections.multimodal.data.clip.clip_dataset import build_train_valid_datasets +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + + model_cfg_string = """ + precision: 16 + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 2 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + output_dim: 64 + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: 16 + # vision configs + patch_dim: 16 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + output_dim: 64 + class_token_length: 8 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: 196 + position_embedding_type: learned_absolute + num_layers: 2 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + + text: + precision: 16 + # text configs + output_dim: 64 + + # model architecture + encoder_seq_length: 77 + max_position_embeddings: 77 + position_embedding_type: learned_absolute + num_layers: 2 + hidden_size: 512 + ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 8 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 1 + dataset_type: webdataset + + train: + data_path: # List of paths to pkl files or tar files + - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_{000000..000001}.tar + drop_last: True # drop_last = False is not implemented yet + validation: # List of paths to pkl files or tar files + data_path: + - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_000002.tar + drop_last: True # drop_last = False is not implemented yet + webdataset: + object_store: False + bucket: datasets + pbss_credentials_file: pbss_credential + local_root_path: / # tar files local root path + chunk_size: 1000 # if data path is list of tar files, chunk_size needs to be provided + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.2 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 1e-5 + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 + max_steps: 4 + log_every_n_steps: 1 + val_check_interval: 4 + limit_val_batches: 2 + limit_test_batches: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def clip_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['vision']['precision'] = precision + model_cfg['text']['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronCLIPModel(cfg=cfg, trainer=trainer) + + def dummy(): + return + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + +def build_datasets(cfg, tokenizer): + return build_train_valid_datasets( + model_cfg=cfg, + consumed_samples=0, + tokenizer=tokenizer, + ) + +@pytest.mark.run_only_on('GPU') +class TestMegatronCLIPModel: + @pytest.mark.unit + def test_constructor(self, clip_trainer_and_model): + clip_model = clip_trainer_and_model[1] + assert isinstance(clip_model, MegatronCLIPModel) + + num_weights = clip_model.num_weights + assert num_weights == 46643969 + + @pytest.mark.unit + def test_build_dataset(self, clip_trainer_and_model, test_data_dir): + clip_model = clip_trainer_and_model[1] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=clip_model.cfg, + consumed_samples=0, + tokenizer=clip_model.tokenizer, + ) + assert len(train_ds) == 2000 + assert len(validation_ds) == 1000 + sample = next(iter(train_ds)) + assert "captions" in sample + assert "images" in sample + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + + @pytest.mark.unit + def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): + trainer, clip_model = clip_trainer_and_model + + dtype = None + if clip_model.cfg['precision'] == 32: + dtype = torch.float + elif clip_model.cfg['precision'] == 16: + dtype = torch.float16 + elif clip_model.cfg['precision'] == 'bf16': + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {clip_model.cfg['precision']} is not supported.") + + clip_model.eval() + _, validation_ds = build_datasets(clip_model.cfg, clip_model.tokenizer) + + val_loader = torch.utils.data.DataLoader(validation_ds, batch_size=4) + batch = next(iter(val_loader)) + + tokens = batch["images"] + texts = batch["captions"] + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = clip_model( + image=tokens.cuda(), + text=texts.cuda(), + ) + # output is (B, #classes) + # assert output_tensor.shape == torch.Size([B, clip_model.cfg['num_classes']]) + # assert output_tensor.dtype == dtype + + # @pytest.mark.unit + # def test_vit_backbone(self, model_cfg, trainer_cfg, precision): + # initialize_model_parallel_for_nemo( + # world_size=1, + # global_rank=0, + # local_rank=0, + # tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), + # pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), + # virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), + # pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), + # micro_batch_size=model_cfg.get('micro_batch_size'), + # global_batch_size=model_cfg.get('global_batch_size'), + # seed=model_cfg.get('seed', 1234), + # apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), + # ) + # + # dtype = None + # if trainer_cfg['precision'] == 32: + # dtype = torch.float + # elif trainer_cfg['precision'] == 16: + # dtype = torch.float16 + # elif trainer_cfg['precision'] == 'bf16': + # dtype = torch.bfloat16 + # else: + # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + # + # vit_backbone = VitBackbone( + # model_cfg, + # init_method=None, + # scaled_init_method=None, + # pre_process=True, + # post_process=True, + # single_token_output=True + # ).cuda() + # vit_backbone.eval() + # + # # shape: (B, C, H, W) + # tokens = torch.rand((6, 3, 224, 224)) + # + # with torch.no_grad(): + # B, C, H, W = tokens.shape + # assert H == W + # with torch.autocast('cuda', dtype=dtype): + # output_tensor = vit_backbone( + # tokens.cuda(), + # ) + # # output is (B, #classes) + # assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) + # assert output_tensor.dtype == dtype + # + # @pytest.mark.unit + # def test_vit_head(self, model_cfg, trainer_cfg, precision): + # dtype = None + # if trainer_cfg['precision'] == 32: + # dtype = torch.float + # elif trainer_cfg['precision'] == 16: + # dtype = torch.float16 + # elif trainer_cfg['precision'] == 'bf16': + # dtype = torch.bfloat16 + # else: + # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + # + # vit_head = VitMlpHead( + # 24, 50, + # ).cuda() + # vit_head.eval() + # + # hidden = torch.rand((6, 24)) + # + # with torch.no_grad(): + # with torch.autocast('cuda', dtype=dtype): + # output_tensor = vit_head( + # hidden.cuda(), + # ) + # # output is (B, #classes) + # assert output_tensor.shape == torch.Size([6, 50]) + # assert output_tensor.dtype == dtype \ No newline at end of file diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py new file mode 100644 index 000000000000..2d65baf9c90c --- /dev/null +++ b/tests/collections/vision/test_vit_model.py @@ -0,0 +1,391 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + model_cfg_string = """ + precision: 16 + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 4 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + + # vision configs + vision_pretraining_type: "classify" + num_classes: 1000 + patch_dim: 16 + img_h: 224 + img_w: 224 + classes_fraction: 1.0 + data_per_class_fraction: 1.0 + num_channels: 3 + drop_path_rate: 0.0 + + # model architecture + encoder_seq_length: 4 + max_position_embeddings: ${.encoder_seq_length} + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.1 # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + ## Activation Checkpointing + # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. + # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + # 'full' will checkpoint the entire transformer layer. + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. + # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + data: + # Path to image dataset must be specified by the user. + # Supports List + # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", + data_path: "dummy/path" + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + data_sharding: False + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [0] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.999 + sched: + name: CosineAnnealing + warmup_steps: 10000 + constant_steps: 0 + min_lr: 1e-5 + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 + max_steps: 4 + log_every_n_steps: 1 + val_check_interval: 4 + limit_val_batches: 2 + limit_test_batches: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_vit_classify + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def vit_classification_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronVitClassificationModel(cfg=cfg, trainer=trainer) + + return trainer, model + + +def build_datasets(cfg, test_data_dir): + data_path = [ + os.path.join(test_data_dir, "vision/tiny_imagenet/train"), + os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + ] + return build_train_valid_datasets( + model_cfg=cfg, + data_path=data_path, + image_size=(cfg.img_h, cfg.img_w), + ) + + +@pytest.mark.run_only_on('GPU') +class TestMegatronVitClassificationModel: + @pytest.mark.unit + def test_constructor(self, vit_classification_trainer_and_model): + vit_classification_model = vit_classification_trainer_and_model[1] + assert isinstance(vit_classification_model, MegatronVitClassificationModel) + + num_weights = vit_classification_model.num_weights + assert num_weights == 87169000 + + @pytest.mark.unit + def test_build_dataset(self, vit_classification_trainer_and_model, test_data_dir): + vit_classification_model = vit_classification_trainer_and_model[1] + data_path = [ + os.path.join(test_data_dir, "vision/tiny_imagenet/train"), + os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + ] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=vit_classification_model.cfg, + data_path=data_path, + image_size=(vit_classification_model.cfg.img_h, vit_classification_model.cfg.img_w), + ) + assert len(train_ds) == 20 + assert len(validation_ds) == 20 + assert train_ds[0][0].shape == torch.Size([3, 224, 224]) + assert validation_ds[0][0].shape == torch.Size([3, 224, 224]) + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + @pytest.mark.unit + def test_forward(self, vit_classification_trainer_and_model, test_data_dir): + trainer, vit_classification_model = vit_classification_trainer_and_model + + dtype = None + if vit_classification_model.cfg['precision'] == 32: + dtype = torch.float + elif vit_classification_model.cfg['precision'] == 16: + dtype = torch.float16 + elif vit_classification_model.cfg['precision'] == 'bf16': + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {vit_classification_model.cfg['precision']} is not supported.") + + vit_classification_model.eval() + _, validation_ds = build_datasets(vit_classification_model.cfg, test_data_dir) + + # shape: (B, C, H, W) + images = [validation_ds[i][0] for i in range(4)] + tokens = torch.stack(images, dim=0) + + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_classification_model.forward( + tokens=tokens.cuda(), + ) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([B, vit_classification_model.cfg['num_classes']]) + assert output_tensor.dtype == dtype + + @pytest.mark.unit + def test_vit_backbone(self, model_cfg, trainer_cfg, precision): + initialize_model_parallel_for_nemo( + world_size=1, + global_rank=0, + local_rank=0, + tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), + pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), + virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), + pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), + micro_batch_size=model_cfg.get('micro_batch_size'), + global_batch_size=model_cfg.get('global_batch_size'), + seed=model_cfg.get('seed', 1234), + apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), + ) + + dtype = None + if trainer_cfg['precision'] == 32: + dtype = torch.float + elif trainer_cfg['precision'] == 16: + dtype = torch.float16 + elif trainer_cfg['precision'] == 'bf16': + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + + vit_backbone = VitBackbone( + model_cfg, + init_method=None, + scaled_init_method=None, + pre_process=True, + post_process=True, + single_token_output=True + ).cuda() + vit_backbone.eval() + + # shape: (B, C, H, W) + tokens = torch.rand((6, 3, 224, 224)) + + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_backbone( + tokens.cuda(), + ) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) + assert output_tensor.dtype == dtype + + @pytest.mark.unit + def test_vit_head(self, model_cfg, trainer_cfg, precision): + dtype = None + if trainer_cfg['precision'] == 32: + dtype = torch.float + elif trainer_cfg['precision'] == 16: + dtype = torch.float16 + elif trainer_cfg['precision'] == 'bf16': + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + + vit_head = VitMlpHead( + 24, 50, + ).cuda() + vit_head.eval() + + hidden = torch.rand((6, 24)) + + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_head( + hidden.cuda(), + ) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([6, 50]) + assert output_tensor.dtype == dtype diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 50aa60260b35..4acf685ad5c3 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 6f230e62c1a3..d18e285b7de8 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 51a91a3c7053..40cf4753dbd4 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index e8fb33aba11f..69603686be08 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index 1b951e7b9e8c..73c83788c295 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index ec65f1cc2bb3..268501558c77 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -39,7 +39,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index f673809db0bb..5d214ae0e719 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -27,7 +27,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index cf669f178845..5ff13b9e1847 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -53,7 +53,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -587,7 +587,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'main'\n", + " BRANCH = 'r1.17.0'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index 50e4f4536908..e5204549af1c 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index f0efdf1cb363..d2c59f02393a 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index 62f18e227510..822256cbfcbe 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -27,7 +27,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index 04d72e4a9b37..ce513700cf09 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -45,7 +45,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index 8026e6ddf546..1d2fd0dde4d5 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -43,7 +43,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 8320cc8a07c9..4e58b0607d32 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -101,7 +101,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 4c2a7b8d577b..22df23009392 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -51,7 +51,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index 29913fe0fe73..a209f29ab7ee 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -22,7 +22,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 6c99856f04ad..2579d9fa51ae 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -26,7 +26,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index 3af050ef9028..49b14402e09e 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -31,7 +31,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index da15b5009b6a..93398903d4ff 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -26,7 +26,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 8bf717bd7b56..db65b35e7149 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -28,7 +28,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 0f0270c1ad75..49a0dd9ed8c2 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -27,7 +27,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 8aece9c0bcf8..34e0457d30f4 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -60,7 +60,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index 5d4d5b188e18..af625b7781a0 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -27,7 +27,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 3813152e5d5f..85d9061c6b43 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 16da898f8b55..143be330e082 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -49,7 +49,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index c4406a4f04ee..4dc860b79471 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index 8cf540b27114..fdcc91d95ea0 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index faa93de12514..049908b3a9a5 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index c63d2a8b1689..d78bf2377c51 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index 323bfa1c49b8..302b1dd85fc5 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index ddd3bdd4f929..0050b07fb343 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 0d7a1d5c8de5..1f909eefdfd5 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index d8fe75940b09..0cc61712d7de 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.17.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index 104d69df18e2..ba80f5a34892 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index f925d2bc59b0..669be8d0643e 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.17.0'" ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index 3dc3d6ce192e..0a3d9073e96e 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -62,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 1519c234372b..82f64d5e692a 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 5580bc4cf946..e030bf8a5ba6 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index 7217b0fb6756..80f532ba0b8d 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index b7c25cb416ef..88103290e8fd 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 5b5b74e7bf11..5137f4c65c89 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 1cd13c429e92..915c034dff55 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.17.0'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 0e8fadde8041..e61b5a76d3a2 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.17.0'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.17.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 69df7b27b02d..faee7a4b6392 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index ea943b35e0d0..192d46217cd3 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 212419146967..d9282f6317f0 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index 3c56df2bbba0..896e4dce7e80 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index 8e3ae9c1f131..d7f89e3c25c0 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index d22258885db8..1a0c78173714 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index c2a9caf1ea72..2fa7d912f51b 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index 611e1e3b6e66..b4aacabe623c 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 699f1b131408..24744eecbad0 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 53eec74d761b..4d5a9a35b2ec 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index 794d4b71ff44..baec811d9285 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index 0e4350f8b1ad..b7603d37e374 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 558c0d95d30b..0d938127c00b 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index eda5bba0aa1e..068ba166e8ec 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index bbf7e78339d9..cf7a4dc9f697 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index 195b773fb5ee..1b0aa7b43642 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index 99306744dd05..8a78f8dbb6e5 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index 6442fb403c80..78635ed6fc22 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index e2ae5082e608..005a1b5bae1c 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.17.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/utils/flash-attention.patch b/utils/flash-attention.patch new file mode 100644 index 000000000000..52a771dfdd9e --- /dev/null +++ b/utils/flash-attention.patch @@ -0,0 +1,73 @@ +diff --git a/csrc/flash_attn/fmha_api.cpp b/csrc/flash_attn/fmha_api.cpp +index 43b6f4c..064e8fe 100644 +--- a/csrc/flash_attn/fmha_api.cpp ++++ b/csrc/flash_attn/fmha_api.cpp +@@ -207,6 +207,11 @@ mha_fwd(const at::Tensor &q, // total_q x num_heads x head_size, total_q + bool is_sm75 = dprops->major == 7 && dprops->minor == 5; + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } + TORCH_CHECK(is_sm8x || is_sm75); + auto stream = at::cuda::getCurrentCUDAStream().stream(); + bool is_dropout = p_dropout > 0.0; +@@ -358,6 +363,11 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size + bool is_sm75 = dprops->major == 7 && dprops->minor == 5; + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } + TORCH_CHECK(is_sm8x || is_sm75); + auto launch = &run_fmha_bwd; + +@@ -406,7 +416,7 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size + TORCH_CHECK(batch_size > 0); + TORCH_CHECK((head_size % 8 == 0) && (head_size <= 128)); + if (head_size > 64) { // TODO: eventually we should support SM86 and SM70 with d=128 as well +- TORCH_CHECK(is_sm80); ++ TORCH_CHECK(is_sm80 || is_sm90); + } + + CHECK_SHAPE(q, total_q, num_heads, head_size); +@@ -648,7 +658,12 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size + auto dprops = at::cuda::getCurrentDeviceProperties(); + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; +- TORCH_CHECK(dprops->major == 8 && dprops->minor >= 0); ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } ++ TORCH_CHECK(is_sm8x); + auto launch = &run_fmha_block_dgrad_fp16_sm80; + + bool is_dropout = p_dropout > 0.0; +@@ -698,7 +713,7 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size + TORCH_CHECK(batch_size > 0); + TORCH_CHECK(head_size == 16 || head_size == 32 || head_size == 64 || head_size == 128); + if (head_size == 128) { // TODO: eventually we should support SM86 and SM70 with d=128 as well +- TORCH_CHECK(is_sm80); ++ TORCH_CHECK(is_sm80 || is_sm90); + } + + CHECK_SHAPE(q, total_q, num_heads, head_size); +diff --git a/setup.py b/setup.py +index 1cd61e5..10eb2b5 100644 +--- a/setup.py ++++ b/setup.py +@@ -112,6 +112,8 @@ cc_flag.append("-gencode") + cc_flag.append("arch=compute_75,code=sm_75") + cc_flag.append("-gencode") + cc_flag.append("arch=compute_80,code=sm_80") ++cc_flag.append("-gencode") ++cc_flag.append("arch=compute_90,code=sm_90") + + subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) + ext_modules.append( diff --git a/utils/triton.patch b/utils/triton.patch new file mode 100644 index 000000000000..66c2ac97609e --- /dev/null +++ b/utils/triton.patch @@ -0,0 +1,53 @@ +diff --git a/lib/driver/llvm.cc b/lib/driver/llvm.cc +index a73e6541d..01e38f825 100644 +--- a/lib/driver/llvm.cc ++++ b/lib/driver/llvm.cc +@@ -25,6 +25,7 @@ + #endif + #include + #include ++#include + #include "triton/driver/llvm.h" + #include "triton/driver/dispatch.h" + #include "triton/driver/error.h" +@@ -148,7 +149,9 @@ namespace triton + + int vptx(int version) + { +- if (version >= 11040) ++ if (version >= 12000) ++ return 80; ++ else if (version >= 11040) + return 74; + // if(version >= 11030) return 73; + // if(version >= 11020) return 72; +@@ -163,15 +166,15 @@ namespace triton + std::string llir_to_ptx(llvm::Module *module, int cc, int version) + { + // LLVM version in use may not officially support target hardware +- int max_nvvm_cc = 75; +- int max_nvvm_ptx = 74; ++ int max_nvvm_cc = 90; ++ int max_nvvm_ptx = 80; + // options + auto options = llvm::cl::getRegisteredOptions(); + auto *short_ptr = static_cast *>(options["nvptx-short-ptr"]); + assert(short_ptr); + short_ptr->setValue(true); + // compute capability +- std::string sm = "sm_" + std::to_string(cc); ++ std::string sm = cc == 90 ? "sm_90a" : "sm_" + std::to_string(cc); + // max PTX version + int ptx = vptx(version); + int ptx_major = ptx / 10; +@@ -244,7 +247,9 @@ namespace triton + ofs.close(); + std::string cmd; + int err; +- cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; ++ cmd = cc == 90 ? ++ ptxas + " -v --gpu-name=sm_90a " + fsrc + " -o " + fsrc + ".o 2> " + flog : ++ ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; + err = system(cmd.c_str()); + if (err != 0) + { From 0fd07337b5d3a76ffd6c04a687f9588083595662 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Thu, 27 Apr 2023 16:20:00 -0700 Subject: [PATCH 002/512] Update multimodal models to use megatron-core --- examples/multimodal/convert_ckpt_to_nemo.py | 9 +- .../clip/megatron_clip_imagenet_zeroshot.py | 12 +- .../generative/dreambooth/dreambooth.py | 11 +- examples/vision/convert_ckpt_to_nemo.py | 9 +- .../multimodal/data/clip/clip_dataset.py | 29 +- .../multimodal/data/common/webdataset.py | 8 +- .../multimodal/losses/clip_loss.py | 8 +- .../models/clip/megatron_clip_models.py | 253 ++++++---------- .../models/dreambooth/dreambooth.py | 147 +++++----- .../models/instruct_pix2pix/ldm/ddpm_edit.py | 18 +- .../models/multimodal_base_model.py | 43 ++- .../models/stable_diffusion/ldm/ddpm.py | 165 ++++++----- nemo/collections/multimodal/parts/utils.py | 10 +- .../megatron/data_samplers.py | 13 +- .../nlp/modules/common/megatron/mlp.py | 1 - .../vision/data/megatron/data_samplers.py | 30 +- .../megatron_vit_classification_models.py | 275 ++++++------------ .../vision/models/vision_base_model.py | 43 ++- .../common/megatron/vision_transformer.py | 12 +- .../vision/modules/vit/vit_backbone.py | 1 - 20 files changed, 548 insertions(+), 549 deletions(-) diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index e9ba1072f089..ca739537af33 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -26,7 +26,6 @@ import os import torch -from apex.transformer import parallel_state from argparse import ArgumentParser from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -40,6 +39,14 @@ from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def get_args(): parser = ArgumentParser() diff --git a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py index 7a7b7a86282b..cd0755c9d7ec 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py +++ b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py @@ -16,7 +16,6 @@ import torch import torch.nn.functional as F from PIL import Image -from apex.transformer import parallel_state from omegaconf.omegaconf import OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment @@ -36,6 +35,14 @@ from nemo.utils import logging from nemo.utils.get_rank import is_global_rank_zero +try: + from megatron.core import parallel_state, tensor_parallel + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def accuracy(output, target, topk=(1,)): pred = output.topk(max(topk), 1, True, True)[1].t() @@ -101,6 +108,9 @@ def model_cfg_modifier(model_cfg): top1, top5, n = 0., 0., 0. for images, target in tqdm(imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + if images is None or target is None: + continue + images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # predict diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index b712de22ad50..d6b636a3ee54 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -14,8 +14,7 @@ import os import pytorch_lightning as pl import torch -from apex.transformer import parallel_state -from apex.transformer.pipeline_parallel.utils import _GLOBAL_NUM_MICROBATCHES_CALCULATOR + from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment @@ -36,6 +35,14 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def prepare_reg_data(cfg): reg_dir = cfg.model.data.regularization_dir diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py index 599ec8e089c7..f12ee7555de4 100644 --- a/examples/vision/convert_ckpt_to_nemo.py +++ b/examples/vision/convert_ckpt_to_nemo.py @@ -26,7 +26,6 @@ import os import torch -from apex.transformer import parallel_state from argparse import ArgumentParser from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -37,6 +36,14 @@ from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def get_args(): parser = ArgumentParser() diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py index 927639a86ce1..61388c94270b 100644 --- a/nemo/collections/multimodal/data/clip/clip_dataset.py +++ b/nemo/collections/multimodal/data/clip/clip_dataset.py @@ -13,22 +13,28 @@ # limitations under the License. import torch from functools import partial -from torch.utils.data import Dataset +from torch.utils.data import default_collate, Dataset from typing import Any, List, Union, Dict, Optional from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import openai_imagenet_template, imagenet_classnames from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import MegatronPretrainingBatchSampler +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) + from nemo.collections.vision.data.megatron.image_folder import ImageFolder from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset try: - from apex.transformer import parallel_state + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True - HAVE_APEX = True except (ImportError, ModuleNotFoundError): - HAVE_APEX = False + + HAVE_MEGATRON_CORE = False def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int = 77) -> torch.LongTensor: @@ -147,21 +153,26 @@ def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): root=imagenet_path, transform=val_image_transform, ) - # image_dataset = RandomSeedDataset(val_data) - image_batch_sampler = MegatronPretrainingBatchSampler( + + image_batch_sampler = MegatronPretrainingSampler( total_samples=len(image_dataset), consumed_samples=0, micro_batch_size=model_cfg.micro_batch_size, - global_batch_size=model_cfg.global_batch_size, - # TODO (yuya): if grad acc is not 1, this might not work as expected. data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=False, ) + + def custom_collate(batch): + if len(batch) == 0: + return None, None + else: + return default_collate(batch) imagenet_val["images"] = torch.utils.data.DataLoader( image_dataset, batch_sampler=image_batch_sampler, num_workers=min(data_cfg.num_workers, 2), + collate_fn=custom_collate, pin_memory=True, persistent_workers=True, ) diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py index aec2396d2b72..0dd47a5a9658 100644 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -34,11 +34,13 @@ from webdataset import WebDataset try: - from apex.transformer import parallel_state + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True - HAVE_APEX = True except (ImportError, ModuleNotFoundError): - HAVE_APEX = False + + HAVE_MEGATRON_CORE = False Image.MAX_IMAGE_PIXELS = 933120000 _IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() diff --git a/nemo/collections/multimodal/losses/clip_loss.py b/nemo/collections/multimodal/losses/clip_loss.py index e4ad3b3fae57..73100eaf88bf 100644 --- a/nemo/collections/multimodal/losses/clip_loss.py +++ b/nemo/collections/multimodal/losses/clip_loss.py @@ -22,11 +22,13 @@ ) try: - from apex.transformer import parallel_state + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True - HAVE_APEX = True except (ImportError, ModuleNotFoundError): - HAVE_APEX = False + + HAVE_MEGATRON_CORE = False def gather_features( diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index c871d9361ef5..54a6af64c92c 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -18,6 +18,7 @@ import torch.nn.functional as F from functools import partial from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer from tqdm import tqdm from typing import Any, List, Optional, Union @@ -26,6 +27,7 @@ build_imagenet_validation_dataloader from nemo.collections.multimodal.losses.clip_loss import ClipLoss from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model from nemo.collections.nlp.modules.common.megatron.module import ( MegatronModule, @@ -47,21 +49,22 @@ from nemo.utils import logging try: - from apex.transformer import parallel_state - from apex.transformer.pipeline_parallel.schedules.common import build_model - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( - forward_backward_pipelining_without_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( - _forward_backward_pipelining_with_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True except (ImportError, ModuleNotFoundError): HAVE_APEX = False +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False class CLIPVisionTransformer(MegatronModule): """Vision Transformer Model.""" @@ -284,6 +287,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) # this prevents base constructor from initializing tokenizer self.tokenizer = None @@ -298,11 +305,19 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') # build_model returns a list of modules which are used for interleaved pipeline parallelism - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) + if isinstance(self.trainer.accelerator, CPUAccelerator): + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=True, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + else: + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) # if we're not using interleaved, then self.model is a module. if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: @@ -337,6 +352,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" model = CLIPModel( @@ -397,17 +420,7 @@ def forward(self, image, text): output_tensor = self.model(image, text) return output_tensor - def _get_fwd_bwd_function(self): - if self.cfg.get('pipeline_model_parallel_size', 1) > 1: - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: - fwd_bwd_function = _forward_backward_pipelining_with_interleaving - else: - fwd_bwd_function = forward_backward_pipelining_without_interleaving - else: - fwd_bwd_function = forward_backward_no_pipelining - return fwd_bwd_function - - def training_step(self, batch, batch_idx): + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size @@ -417,52 +430,27 @@ def training_step(self, batch, batch_idx): The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. """ - # we zero grads here because we also call backward in the apex fwd/bwd functions + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() - if parallel_state.is_pipeline_first_stage(ignore_virtual=True) or parallel_state.is_pipeline_last_stage( - ignore_virtual=True - ): - # we prepare the micro batches for the apex fwd/bwd function - batch_for_pipeline = self.process_global_batch(batch) - else: - # The intermediate pipeline stages do not need any inputs from data loader - # GPT3 uses decoder with AttnMask:causal, thus doesn't need attention_mask - batch_for_pipeline = None - # TODO (yuya): fix this shape tensor_shape = None - # handle asynchronous grad reduction - if self.with_distributed_adam: - if self.megatron_amp_O2: - # copy grads to main grad - custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=True) - else: - # keep grad tensors around - custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=False) - else: - if self.megatron_amp_O2 and not self.cfg.get('sequence_parallel', False): - custom_sync_context_handler = self._optimizer.no_sync - else: - # TODO: enable async grad all reduce for O1/autocast mixed precision training - custom_sync_context_handler = None - # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = self._get_fwd_bwd_function() + fwd_bwd_function = get_forward_backward_func() losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=False, tensor_shape=tensor_shape, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - custom_sync_context_handler=custom_sync_context_handler, - sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), - sync_batch_comm=self.cfg.get('sync_batch_comm', False), + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses @@ -505,17 +493,18 @@ def training_step(self, batch, batch_idx): if self.cfg.precision == 16: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: - self.log('loss_scale', loss_scale) + self.log('loss_scale', loss_scale, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) self.log( 'consumed_samples', self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), prog_bar=True, rank_zero_only=True, + batch_size=1, ) return loss_mean @@ -533,9 +522,12 @@ def optimizer_zero_grad(self, *args, **kwargs): """ pass - def _append_module_grads(self, module, grads): + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + for param in module.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: if self.megatron_amp_O2: grad = param.main_grad else: @@ -551,9 +543,9 @@ def allreduce_sequence_parallel_gradients(self): grads = [] if isinstance(self.model, list): for module in self.model: - self._append_module_grads(module, grads) + self._append_sequence_parallel_module_grads(module, grads) else: - self._append_module_grads(self.model, grads) + self._append_sequence_parallel_module_grads(self.model, grads) coalesced = torch._utils._flatten_dense_tensors(grads) torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) @@ -566,16 +558,17 @@ def get_forward_output_and_loss_func(self): gather_with_grad=self.cfg.gather_with_grad, ) - def fwd_output_and_loss_func(batch, model): + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) if parallel_state.get_pipeline_model_parallel_world_size() == 1: - batch = [x.cuda(non_blocking=True) for x in batch] - images, captions = batch + images = batch["images"].cuda(non_blocking=True) + captions = batch["captions"].cuda(non_blocking=True) else: # GPT3 uses only causal mask, which doesn't need attention mask if parallel_state.is_pipeline_first_stage(): # Fist pipeline stage needs only the tokens and position_ids - images = batch[0].cuda(non_blocking=True) - captions = batch[1].cuda(non_blocking=True) + images = batch["images"].cuda(non_blocking=True) + captions = batch["captions"].cuda(non_blocking=True) else: # Intermediate / Last pipeline stage doesn't need any inputs images, captions = None, None @@ -634,6 +627,9 @@ def accuracy(output, target, topk=(1,)): with torch.no_grad(): top1, top5, n = 0., 0., 0. for images, target in tqdm(self.imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + if images is None or target is None: + continue + images = images.cuda(non_blocking=True).to(self.autocast_dtype) target = target.cuda(non_blocking=True) # predict @@ -656,63 +652,42 @@ def accuracy(output, target, topk=(1,)): top5 = (top5 / n) return top1, top5 - def validation_step(self, batch, batch_idx): + def validation_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - batch_for_pipeline = self.process_global_batch(batch, self.cfg.global_batch_size) tensor_shape = None # Placeholder # run forward passes for an entire global batch # we do this inside validation_step to support pipeline parallelism - fwd_bwd_function = self._get_fwd_bwd_function() + fwd_bwd_function = get_forward_backward_func() losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=True, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) - def _get_metric_with_batch_size(metric_key): + def _get_metric(metric_key): # only the last stage of the pipeline returns losses if losses_reduced_per_micro_batch: - loss_with_batch_size_list = [ - [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] - for loss_reduced in losses_reduced_per_micro_batch - ] - # actual_batch_size = batch[0].shape[0] # Might be lesser than global_batch_size if drop_last=False - # expected_batch_size = self.cfg.global_batch_size // parallel_state.get_data_parallel_world_size() - # if actual_batch_size == expected_batch_size: - # loss_with_batch_size_list = [ - # [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] - # for loss_reduced in losses_reduced_per_micro_batch - # ] - # else: - # loss_with_batch_size_list = [] - # total_samples_remaining = actual_batch_size - # for loss_reduced in losses_reduced_per_micro_batch: - # if total_samples_remaining <= 0: - # break - # if total_samples_remaining // self.cfg.micro_batch_size >= 1: - # loss_with_batch_size_list.append( - # [loss_reduced[metric_key].item(), self.cfg.micro_batch_size]) - # else: - # loss_with_batch_size_list.append([loss_reduced[metric_key].item(), total_samples_remaining]) - # total_samples_remaining = total_samples_remaining - self.cfg.micro_batch_size + loss_tensors_list = [loss_reduced[metric_key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.vstack(loss_tensors_list) + loss_mean = loss_tensor.mean(axis=0) else: - # we're not on the last pipeline stage so no losses - loss_with_batch_size_list = [] - return loss_with_batch_size_list + loss_mean = torch.tensor([0.0]).cuda() + return loss_mean[0] - return _get_metric_with_batch_size('loss') + return _get_metric('loss') def validation_epoch_end(self, outputs): # TODO (yuya): need fix later, check with Sean @@ -724,38 +699,24 @@ def validation_epoch_end(self, outputs): imagenet_metric = torch.zeros(2).cuda() imagenet_metric[0], imagenet_metric[1] = self.zero_shot_eval() imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) - self.log('imagenet_top1', imagenet_metric[0], prog_bar=True, rank_zero_only=True) - self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True) + self.log('imagenet_top1', imagenet_metric[0], prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True, batch_size=1) if parallel_state.is_pipeline_last_stage(): - loss_outputs = [output[0] for output in outputs] - - def _get_average_metric(metric_outputs): - # only the last pipeline parallel stages return metric with their batch size - total_num_samples = 0 - total_metric = 0 - for metric_with_batch_size in metric_outputs: - metric_with_batch_size_array = np.array(metric_with_batch_size).flatten() - batch_metrices = metric_with_batch_size_array[0::2] - batch_sizes = metric_with_batch_size_array[1::2] - total_num_samples += sum(batch_sizes) - total_metric += np.dot(batch_metrices, batch_sizes) - - avg_metric = total_metric / total_num_samples - return avg_metric - averaged_metrics = torch.tensor( - [_get_average_metric(loss_outputs)], - dtype=torch.float32).cuda() + [torch.stack(outputs).mean()], + dtype=torch.float32, device='cuda') else: - averaged_metrics = torch.tensor([0.0], dtype=torch.float32).cuda() + averaged_metrics = torch.tensor([0.0], dtype=torch.float32, device='cuda') # we can only log on one rank if it is rank zero so we broadcast from last rank torch.distributed.broadcast(averaged_metrics, get_last_rank()) averaged_loss = averaged_metrics - self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True) - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True) + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + + return averaged_loss def test_step(self, batch, batch_idx): return self.validation_step(batch, batch_idx) @@ -764,22 +725,6 @@ def test_epoch_end(self, outputs): averaged_loss = average_losses_across_data_parallel_group(outputs) logging.info(f'test_loss: {averaged_loss[0]}') - def process_global_batch(self, global_batch, global_batch_size=None): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - images = global_batch["images"] # images - captions = global_batch["captions"] - - expected_batch_size = None - if global_batch_size is not None: - expected_batch_size = global_batch_size // parallel_state.get_data_parallel_world_size() - current_batch_size = images.shape[0] - if expected_batch_size is not None and expected_batch_size > current_batch_size: - raise NotImplementedError("Please turn on drop_last.") - - return [images, captions] - def build_train_valid_test_datasets(self): logging.info('Building datasets for CLIP...') if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): @@ -802,13 +747,6 @@ def build_train_valid_test_datasets(self): return self._train_ds, self._validation_ds, self._test_ds - # def build_pretraining_data_loader(self, dataset, consumed_samples, dataset_type=None, drop_last=True): - # """Build dataloader given an input dataset.""" - # - # return torch.utils.data.DataLoader( - # dataset, num_workers=self.cfg.data.num_workers, pin_memory=True, - # ) - def setup(self, stage=None): """ PTL hook that is executed after DDP spawns. We setup datasets here as megatron datasets require DDP to instantiate. @@ -863,9 +801,8 @@ def setup(self, stage=None): self.build_train_valid_test_datasets() # Batch size need to be provided for webdatset - self._num_micro_batches = self.cfg.global_batch_size // ( - self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) - self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size self.setup_training_data(self.cfg.data) self.setup_validation_data(self.cfg.data) @@ -893,7 +830,7 @@ def setup_training_data(self, cfg): ) self._train_dl = torch.utils.data.DataLoader( self._train_ds, - batch_size=self._global_batch_size_on_this_data_parallel_rank, + batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, drop_last=cfg.train.get("drop_last", True), @@ -908,7 +845,7 @@ def setup_validation_data(self, cfg): ) self._validation_dl = torch.utils.data.DataLoader( self._validation_ds, - batch_size=self._global_batch_size_on_this_data_parallel_rank, + batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, drop_last=cfg.train.get("drop_last", True), @@ -922,7 +859,7 @@ def setup_test_data(self, cfg): f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' ) self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._global_batch_size_on_this_data_parallel_rank, + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index 5c091421c238..ff28f6c1e93d 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -16,8 +16,7 @@ import pytorch_lightning as pl import torch from abc import ABC -from apex import amp -from apex.contrib.clip_grad import clip_grad_norm_ + from functools import partial from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer @@ -33,8 +32,8 @@ extract_into_tensor, noise_like from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists from nemo.collections.multimodal.parts.utils import randn_like -from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( - MegatronPretrainingRandomBatchSampler, +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, ) from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank from nemo.core.classes import ModelPT @@ -44,23 +43,23 @@ from nemo.utils.exp_manager import exp_manager try: - from apex.contrib.clip_grad import clip_grad_norm_ + from apex.transformer.pipeline_parallel.utils import get_num_microbatches from apex import amp - from apex.transformer import parallel_state - from apex.transformer.pipeline_parallel.schedules.common import build_model - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( - forward_backward_pipelining_without_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( - _forward_backward_pipelining_with_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining from apex.transformer.enums import AttnMaskType HAVE_APEX = True except (ImportError, ModuleNotFoundError): HAVE_APEX = False +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def disabled_train(self, mode=True): """Overwrite model.train with this function to make sure train/eval mode @@ -110,6 +109,7 @@ def __init__(self, cfg): self.parameterization = self.cfg.noise_scheduler.parameterization self.get_noise_scheduler(self.cfg.noise_scheduler) + self.model_type = None self.rng = torch.Generator(device=torch.cuda.current_device(), ) def instantiate_unet(self, cfg): @@ -198,6 +198,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) # this prevents base constructor from initializing tokenizer self.tokenizer = None @@ -218,6 +222,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + def model_provider_func(self, pre_process=True, post_process=True): """Model depends on pipeline paralellism.""" model = DreamBooth(cfg=self.cfg) @@ -227,7 +239,7 @@ def forward(self, batch): output_tensor = self.model(batch) return output_tensor - def training_step(self, batch, batch_idx): + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size @@ -240,22 +252,21 @@ def training_step(self, batch, batch_idx): # we zero grads here because we also call backward in the apex fwd/bwd functions self._optimizer.zero_grad() - # we prepare the micro batches for the apex fwd/bwd function - batch_for_pipeline = self.process_global_batch(batch) - # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism - losses_reduced_per_micro_batch = forward_backward_no_pipelining( + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=False, - tensor_shape=None, # required by pipeline parallelism + tensor_shape=None, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - custom_sync_context_handler=None, - sequence_parallel_enabled=False, - sync_batch_comm=False, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses @@ -291,32 +302,36 @@ def training_step(self, batch, batch_idx): if self.cfg.precision == 16: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: - self.log('loss_scale', loss_scale) + self.log('loss_scale', loss_scale, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) self.log( 'consumed_samples', self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), prog_bar=True, rank_zero_only=True, + batch_size=1, ) return loss_mean - @torch.no_grad() - def validation_step(self, batch, batch_idx): - batch_for_pipeline = self.process_global_batch(batch) + def validation_step(self, dataloader_iter, batch_idx): + fwd_bwd_function = get_forward_backward_func() - losses_reduced_per_micro_batch = forward_backward_no_pipelining( + losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=True, - tensor_shape=None, # required by pipeline parallelism + tensor_shape=None, dtype=self.autocast_dtype, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) + # only the last stages of the pipeline return losses if losses_reduced_per_micro_batch: # average loss across micro batches @@ -326,7 +341,7 @@ def validation_step(self, batch, batch_idx): else: val_loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) - self.log(val_loss_mean, prog_bar=False, logger=True, on_step=False, on_epoch=True) + self.log(val_loss_mean, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) def backward(self, *args, **kwargs): """ LightningModule hook to do backward. @@ -341,38 +356,40 @@ def optimizer_zero_grad(self, *args, **kwargs): """ pass - def _append_module_grads(self, module, grads): + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + for param in module.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: if self.megatron_amp_O2: grad = param.main_grad else: grad = param.grad grads.append(grad.data) - def process_global_batch(self, global_batch, global_batch_size=None): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - # noise_map, condition - prompts, images = global_batch - - # DB has more dedicated structure for encoding, so we enable autocasting here as well - with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, - ): - images = images.cuda(non_blocking=True) - - cond = self.model.text_encoder([t[0] for t in prompts]) - if self.cfg.with_prior_preservation: - cond_prior = self.model.text_encoder([t[1] for t in prompts]) - cond = torch.cat([cond, cond_prior], dim=0) - - return images, cond - def get_forward_output_and_loss_func(self): - def fwd_output_and_loss_func(batch, model): + def process_batch(batch): + # noise_map, condition + prompts, images = batch + + # DB has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + images = images.cuda(non_blocking=True) + + cond = self.model.text_encoder([t[0] for t in prompts]) + if self.cfg.with_prior_preservation: + cond_prior = self.model.text_encoder([t[1] for t in prompts]) + cond = torch.cat([cond, cond_prior], dim=0) + + return images, cond + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) batch = [x.cuda(non_blocking=True) for x in batch] loss = model(batch) @@ -427,9 +444,8 @@ def setup(self, stage=None): self.init_global_step = self.trainer.global_step # Batch size need to be provided for webdatset - self._num_micro_batches = self.cfg.global_batch_size // ( - self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) - self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size self.setup_training_data(self.cfg.data) @@ -449,7 +465,7 @@ def setup_training_data(self, cfg): center_crop=cfg.center_crop, ) - batch_sampler = MegatronPretrainingRandomBatchSampler( + batch_sampler = MegatronPretrainingRandomSampler( total_samples=len(train_dataset), consumed_samples=self.compute_consumed_samples(0), micro_batch_size=self.cfg.micro_batch_size, @@ -461,7 +477,6 @@ def setup_training_data(self, cfg): self._train_dl = torch.utils.data.DataLoader( train_dataset, - # batch_size=self._global_batch_size_on_this_data_parallel_rank, batch_sampler=batch_sampler, collate_fn=partial(_collate_fn, with_prior_preservation=self.cfg.with_prior_preservation), num_workers=cfg.num_workers, diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py index 3a98c756e414..eb35b638ff8c 100644 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -29,18 +29,20 @@ from nemo.collections.multimodal.data.instruct_pix2pix.edit_dataset import EditDataset from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion, LatentDiffusion -from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( - MegatronPretrainingBatchSampler, - MegatronPretrainingRandomBatchSampler, +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingSampler, + MegatronPretrainingRandomSampler, ) from nemo.utils import logging try: - from apex.transformer import parallel_state + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True - HAVE_APEX = True except (ImportError, ModuleNotFoundError): - HAVE_APEX = False + + HAVE_MEGATRON_CORE = False class LatentDiffusionEdit(LatentDiffusion): @@ -229,7 +231,7 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru if hasattr(self._cfg.data, 'dataloader_type') and self._cfg.data.dataloader_type is not None: # TODO (yuya): fix this if self._cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingBatchSampler( + batch_sampler = MegatronPretrainingSampler( total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self._cfg.micro_batch_size, @@ -239,7 +241,7 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru drop_last=drop_last, ) elif self._cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronPretrainingRandomBatchSampler( + batch_sampler = MegatronPretrainingRandomSampler( total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self._cfg.micro_batch_size, diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index c509162a1870..d7119f0c120d 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -37,7 +37,7 @@ ) from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer, get_tokenizer -from nemo.collections.nlp.parts.nlp_overrides import GradScaler +from nemo.collections.nlp.parts.nlp_overrides import NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, GradScaler from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector from nemo.core.classes import ModelPT from nemo.core.classes.exportable import Exportable @@ -46,13 +46,20 @@ from nemo.utils.get_rank import is_global_rank_zero try: - from apex.transformer import parallel_state from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True except (ImportError, ModuleNotFoundError): HAVE_APEX = False +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + + __all__ = ['MultimodalModel', 'MegatronMultimodalModel'] NEMO_MULTIMODAL_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_multimodal_tmp") @@ -208,11 +215,12 @@ class MegatronMultimodalModel(MultimodalModel): """ def __init__(self, cfg: DictConfig, trainer: Trainer): - # FIXME: switch to self._cfg - if not HAVE_APEX: + + if not HAVE_MEGATRON_CORE: raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if trainer is None: raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") @@ -233,16 +241,29 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] + # Overrides used when converting checkpoints + if os.environ.get(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, "false").lower() == "true": + app_state = AppState() + init_world_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + init_global_rank = app_state.global_rank + init_local_rank = app_state.local_rank + else: + init_world_size = trainer.world_size + init_global_rank = trainer.global_rank + init_local_rank = trainer.local_rank + initialize_model_parallel_for_nemo( - world_size=trainer.world_size, - global_rank=trainer.global_rank, - local_rank=trainer.local_rank, + world_size=init_world_size, + global_rank=init_global_rank, + local_rank=init_local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), micro_batch_size=cfg.get('micro_batch_size'), global_batch_size=cfg.get('global_batch_size'), + rampup_batch_size=cfg.get('rampup_batch_size'), + use_fp8=cfg.get('fp8', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -381,7 +402,7 @@ def configure_gradient_clipping(self, *args, **kwargs): parameters = self._get_parameters() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) - self.log('grad_norm', grad_norm, rank_zero_only=True) + self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) def allreduce_gradients(self): """Reduce gradients across data parallel ranks. @@ -421,8 +442,8 @@ def reduce_overlap_gradients(self): p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) ) - def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[int] = 0) -> None: - super().on_train_batch_end(outputs, batch, batch_idx) + def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: + super().on_train_batch_end(outputs, dataloader_iter, batch_idx) # TODO: Replace with newer override for scheduler.step() instead of # search for plugins for fp16 GradScalar diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 8227ed00f37b..609a651f41fe 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -20,6 +20,7 @@ from einops import rearrange, repeat from functools import partial from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning import Trainer from pytorch_lightning.utilities.distributed import rank_zero_only from torch._dynamo import optimize @@ -43,28 +44,30 @@ from nemo.collections.multimodal.parts.stable_diffusion.utils import log_txt_as_img, exists, default, ismap, isimage, \ mean_flat, count_params from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.parts.utils_funcs import get_last_rank from nemo.core.classes.common import Serialization from nemo.utils import logging try: - from apex.contrib.clip_grad import clip_grad_norm_ + from apex.transformer.pipeline_parallel.utils import get_num_microbatches from apex import amp - from apex.transformer import parallel_state - from apex.transformer.pipeline_parallel.schedules.common import build_model - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( - forward_backward_pipelining_without_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( - _forward_backward_pipelining_with_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining from apex.transformer.enums import AttnMaskType HAVE_APEX = True except (ImportError, ModuleNotFoundError): HAVE_APEX = False +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + __conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} @@ -112,6 +115,7 @@ def __init__(self, cfg): self.channels = cfg.channels self.use_positional_encodings = cfg.use_positional_encodings self.model = DiffusionWrapper(cfg.unet_config, cfg.conditioning_key) + self.model_type = None count_params(self.model, verbose=True) self.v_posterior = cfg.v_posterior @@ -1373,6 +1377,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) # this prevents base constructor from initializing tokenizer self.tokenizer = None @@ -1396,6 +1404,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + def model_provider_func(self, pre_process=True, post_process=True): """Model depends on pipeline paralellism.""" model = LatentDiffusion(cfg=self.cfg) @@ -1414,7 +1430,7 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): batch[self.cfg.first_stage_key].cuda(non_blocking=True) self.model.on_train_batch_start(batch, batch_idx) - def training_step(self, batch, batch_idx): + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size @@ -1423,26 +1439,26 @@ def training_step(self, batch, batch_idx): Microbatches are then moved to GPU during the pipeline. The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. """ + tensor_shape = None # Placeholder - # we zero grads here because we also call backward in the apex fwd/bwd functions + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() - # we prepare the micro batches for the apex fwd/bwd function - batch_for_pipeline = self.process_global_batch(batch) - # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism - losses_reduced_per_micro_batch = forward_backward_no_pipelining( + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=False, - tensor_shape=None, # required by pipeline parallelism + tensor_shape=tensor_shape, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - custom_sync_context_handler=None, - sequence_parallel_enabled=False, - sync_batch_comm=False, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # losses_reduced_per_micro_batch is a list of dictionaries @@ -1482,19 +1498,20 @@ def training_step(self, batch, batch_idx): if self.cfg.precision == 16: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: - self.log('loss_scale', loss_scale) + self.log('loss_scale', loss_scale, batch_size=1) self.log_dict(loss_dict, prog_bar=False, - logger=True, on_step=True, rank_zero_only=True) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) self.log( 'consumed_samples', self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), prog_bar=True, rank_zero_only=True, + batch_size=1, ) return loss_mean @@ -1511,9 +1528,12 @@ def optimizer_zero_grad(self, *args, **kwargs): """ pass - def _append_module_grads(self, module, grads): + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + for param in module.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: if self.megatron_amp_O2: grad = param.main_grad else: @@ -1522,7 +1542,35 @@ def _append_module_grads(self, module, grads): def get_forward_output_and_loss_func(self): - def fwd_output_and_loss_func(batch, model): + def process_batch(batch): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + batch[self.cfg.first_stage_key] = \ + batch[self.cfg.first_stage_key].cuda(non_blocking=True) + if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): + # in the case of precached text embeddings, cond_stage is also a tensor + batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) + + # SD has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + x, c = self.model.get_input(batch, self.cfg.first_stage_key) + + if not isinstance(c, dict): + return [x, c] + + if len(self.conditioning_keys) == 0: + self.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in self.conditioning_keys] + return [x, *c_list] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) batch = [x.cuda(non_blocking=True) for x in batch] if len(self.conditioning_keys) == 0: x, c = batch @@ -1547,18 +1595,22 @@ def fwd_output_only_func(batch, model): return fwd_output_only_func - @torch.no_grad() - def validation_step(self, batch, batch_idx): - batch_for_pipeline = self.process_global_batch(batch) + def validation_step(self, dataloader_iter, batch_idx): + tensor_shape = None # Placeholder + fwd_bwd_function = get_forward_backward_func() - losses_reduced_per_micro_batch = forward_backward_no_pipelining( + losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=True, - tensor_shape=None, # required by pipeline parallelism + tensor_shape=tensor_shape, dtype=self.autocast_dtype, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) + # only the last stages of the pipeline return losses val_loss_dict = {} if losses_reduced_per_micro_batch: @@ -1568,33 +1620,7 @@ def validation_step(self, batch, batch_idx): loss_tensor = torch.stack(loss_tensors_list) val_loss_dict[key] = loss_tensor.mean() - self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True) - - def process_global_batch(self, global_batch, global_batch_size=None): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - # noise_map, condition - global_batch[self.cfg.first_stage_key] = \ - global_batch[self.cfg.first_stage_key].cuda(non_blocking=True) - if isinstance(global_batch[self.cfg.cond_stage_key], torch.Tensor): - # in the case of precached text embeddings, cond_stage is also a tensor - global_batch[self.cfg.cond_stage_key] = global_batch[self.cfg.cond_stage_key].cuda(non_blocking=True) - - # SD has more dedicated structure for encoding, so we enable autocasting here as well - with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, - ): - x, c = self.model.get_input(global_batch, self.cfg.first_stage_key) - - if not isinstance(c, dict): - return [x, c] - - if len(self.conditioning_keys) == 0: - self.conditioning_keys = list(c.keys()) - c_list = [c[key] for key in self.conditioning_keys] - return [x, *c_list] + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) def setup(self, stage=None): """ PTL hook that is executed after DDP spawns. @@ -1637,9 +1663,8 @@ def setup(self, stage=None): self.build_train_valid_test_datasets() # Batch size need to be provided for webdatset - self._num_micro_batches = self.cfg.global_batch_size // ( - self.cfg.micro_batch_size * parallel_state.get_data_parallel_world_size()) - self._global_batch_size_on_this_data_parallel_rank = self._num_micro_batches * self.cfg.micro_batch_size + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size self.setup_training_data(self.cfg.data) self.setup_validation_data(self.cfg.data) @@ -1679,7 +1704,7 @@ def setup_training_data(self, cfg): ) self._train_dl = torch.utils.data.DataLoader( self._train_ds, - batch_size=self._global_batch_size_on_this_data_parallel_rank, + batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, drop_last=True, @@ -1694,7 +1719,7 @@ def setup_validation_data(self, cfg): ) self._validation_dl = torch.utils.data.DataLoader( self._validation_ds, - batch_size=self._global_batch_size_on_this_data_parallel_rank, + batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, drop_last=False, @@ -1708,7 +1733,7 @@ def setup_test_data(self, cfg): f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' ) self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._global_batch_size_on_this_data_parallel_rank, + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 53b61023efba..346ef5571768 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -14,7 +14,7 @@ import os import torch from PIL import Image -from apex.transformer import parallel_state + from omegaconf import OmegaConf, DictConfig, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment @@ -27,6 +27,14 @@ from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False def numpy_to_pil(images): """ diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py index edc58ee999c2..fe763dd2bd02 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py @@ -38,8 +38,6 @@ def __init__( # Sanity checks. if total_samples <= 0: raise RuntimeError("no sample to consume: {}".format(total_samples)) - if consumed_samples >= total_samples: - raise RuntimeError("no samples left to consume: {}, {}".format(consumed_samples, total_samples)) if micro_batch_size <= 0: raise RuntimeError(f"micro_batch_size size must be greater than 0, but {micro_batch_size}") if data_parallel_size <= 0: @@ -68,6 +66,7 @@ def __init__( self.consumed_samples = consumed_samples self.micro_batch_size = micro_batch_size self.data_parallel_rank = data_parallel_rank + self.data_parallel_size = data_parallel_size self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size self.drop_last = drop_last self.global_batch_size = global_batch_size @@ -150,6 +149,16 @@ def __init__( ), "`MegatronPretrainingRandomSampler` does not support sample padding" self.last_batch_size = self.total_samples % self.micro_batch_times_data_parallel_size + def __len__(self): + num_available_samples: int = self.total_samples + if self.global_batch_size is not None: + if self.drop_last: + return num_available_samples // self.global_batch_size + else: + return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size + else: + return (num_available_samples - 1) // self.micro_batch_times_data_parallel_size + 1 + def __iter__(self): active_total_samples = self.total_samples - self.last_batch_size self.epoch = self.consumed_samples // active_total_samples diff --git a/nemo/collections/nlp/modules/common/megatron/mlp.py b/nemo/collections/nlp/modules/common/megatron/mlp.py index 43e30784c63a..1deff2c1fdc4 100644 --- a/nemo/collections/nlp/modules/common/megatron/mlp.py +++ b/nemo/collections/nlp/modules/common/megatron/mlp.py @@ -31,7 +31,6 @@ try: from apex.normalization import MixedFusedRMSNorm - from apex.transformer import parallel_state, tensor_parallel HAVE_APEX = True diff --git a/nemo/collections/vision/data/megatron/data_samplers.py b/nemo/collections/vision/data/megatron/data_samplers.py index 59f3d2de3fd4..f42ee6672115 100644 --- a/nemo/collections/vision/data/megatron/data_samplers.py +++ b/nemo/collections/vision/data/megatron/data_samplers.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import torch +from typing import Any, Optional, List, Dict from torch.utils.data import Dataset -from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import BaseMegatronBatchSampler +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset -class MegatronVisionPretrainingRandomBatchSampler(BaseMegatronBatchSampler): +class MegatronVisionPretrainingRandomSampler(MegatronPretrainingRandomSampler): def __init__( self, @@ -26,44 +27,38 @@ def __init__( total_samples: int, consumed_samples: int, micro_batch_size: int, - global_batch_size: int, data_parallel_rank: int, data_parallel_size: int, - drop_last: bool, data_sharding: bool, + drop_last: bool = True, + global_batch_size: Optional[int] = None, + pad_samples_to_global_batch_size: Optional[bool] = False, ) -> None: super().__init__( total_samples=total_samples, consumed_samples=consumed_samples, micro_batch_size=micro_batch_size, - global_batch_size=global_batch_size, data_parallel_rank=data_parallel_rank, data_parallel_size=data_parallel_size, drop_last=drop_last, + global_batch_size=global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, ) self.dataset = dataset self.data_sharding = data_sharding - self.last_batch_size = self.total_samples % self.global_batch_size - - def __len__(self): - num_available_samples = self.total_samples - if self.drop_last: - return num_available_samples // self.global_batch_size - else: - return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size def __iter__(self): active_total_samples = self.total_samples - self.last_batch_size self.epoch = self.consumed_samples // active_total_samples current_epoch_samples = self.consumed_samples % active_total_samples - assert current_epoch_samples % (self.micro_batch_size * self.data_parallel_size) == 0 + assert current_epoch_samples % self.micro_batch_times_data_parallel_size == 0 if isinstance(self.dataset, RandomSeedDataset): self.dataset.set_epoch(self.epoch) # data sharding and random sampling if self.data_sharding: - bucket_size = (self.total_samples // (self.micro_batch_size * self.data_parallel_size)) \ + bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) \ * self.micro_batch_size bucket_offset = current_epoch_samples // self.data_parallel_size start_idx = self.data_parallel_rank * bucket_size @@ -87,10 +82,11 @@ def __iter__(self): # Last batch if not complete will be dropped. for idx in idx_range: batch.append(idx) - if len(batch) == self._global_batch_size_on_this_data_parallel_rank: - self.consumed_samples += self._global_batch_size + if len(batch) == self.micro_batch_size: + self.consumed_samples += self.micro_batch_times_data_parallel_size yield batch batch = [] + # Check the last partial batch and see drop_last is set if len(batch) > 0 and not self.drop_last: yield batch diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index cedd9c388638..cefa8478a200 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -17,12 +17,14 @@ import torch from functools import partial from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer from typing import Any, Optional, List, Dict -from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( - MegatronPretrainingBatchSampler, +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingSampler, ) +from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.module import ( MegatronModule, Float16Module, @@ -36,7 +38,7 @@ get_params_for_weight_decay_optimization, ) from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomBatchSampler +from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomSampler from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets from nemo.collections.vision.models.vision_base_model import MegatronVisionModel from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead @@ -45,20 +47,25 @@ from nemo.utils import logging try: - from apex.transformer import parallel_state - from apex.transformer.pipeline_parallel.schedules.common import build_model - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_without_interleaving import ( - forward_backward_pipelining_without_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_pipelining_with_interleaving import ( - _forward_backward_pipelining_with_interleaving, - ) - from apex.transformer.pipeline_parallel.schedules.fwd_bwd_no_pipelining import forward_backward_no_pipelining + import apex.transformer.pipeline_parallel.utils + from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True + except (ImportError, ModuleNotFoundError): + HAVE_APEX = False +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + class VitClassificationModel(MegatronModule): """Vision Transformer Model.""" @@ -118,6 +125,11 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + super().__init__(cfg, trainer=trainer) self._validate_trainer() @@ -129,11 +141,19 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') # build_model returns a list of modules which are used for interleaved pipeline parallelism - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) + if isinstance(self.trainer.accelerator, CPUAccelerator): + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=True, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + else: + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) # if we're not using interleaved, then self.model is a module. if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: @@ -167,6 +187,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" model = VitClassificationModel( @@ -228,17 +256,7 @@ def forward(self, tokens): output_tensor = self.model(tokens) return output_tensor - def _get_fwd_bwd_function(self): - if self.cfg.get('pipeline_model_parallel_size', 1) > 1: - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: - fwd_bwd_function = _forward_backward_pipelining_with_interleaving - else: - fwd_bwd_function = forward_backward_pipelining_without_interleaving - else: - fwd_bwd_function = forward_backward_no_pipelining - return fwd_bwd_function - - def training_step(self, batch, batch_idx): + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size @@ -248,51 +266,27 @@ def training_step(self, batch, batch_idx): The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. """ - # we zero grads here because we also call backward in the apex fwd/bwd functions + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() - if parallel_state.is_pipeline_first_stage(ignore_virtual=True) or parallel_state.is_pipeline_last_stage( - ignore_virtual=True - ): - # we prepare the micro batches for the apex fwd/bwd function - batch_for_pipeline = self.process_global_batch(batch) - else: - # The intermediate pipeline stages do not need any inputs from data loader - # GPT3 uses decoder with AttnMask:causal, thus doesn't need attention_mask - batch_for_pipeline = None - # TODO (yuya): fix this shape tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] - # handle asynchronous grad reduction - if self.with_distributed_adam: - if self.megatron_amp_O2: - # copy grads to main grad - custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=True) - else: - # keep grad tensors around - custom_sync_context_handler = lambda: self._optimizer.no_sync(greedy_grad_copy=False) - else: - if self.megatron_amp_O2 and not self.cfg.get('sequence_parallel', False): - custom_sync_context_handler = self._optimizer.no_sync - else: - # TODO: enable async grad all reduce for O1/autocast mixed precision training - custom_sync_context_handler = None - # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = self._get_fwd_bwd_function() + fwd_bwd_function = get_forward_backward_func() losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=False, tensor_shape=tensor_shape, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - custom_sync_context_handler=custom_sync_context_handler, - sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses @@ -337,17 +331,18 @@ def training_step(self, batch, batch_idx): if self.cfg.precision == 16: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: - self.log('loss_scale', loss_scale) + self.log('loss_scale', loss_scale, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True) + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) self.log( 'consumed_samples', self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), prog_bar=True, rank_zero_only=True, + batch_size=1, ) return loss_mean @@ -365,9 +360,12 @@ def optimizer_zero_grad(self, *args, **kwargs): """ pass - def _append_module_grads(self, module, grads): + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + for param in module.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: if self.megatron_amp_O2: grad = param.main_grad else: @@ -383,45 +381,15 @@ def allreduce_sequence_parallel_gradients(self): grads = [] if isinstance(self.model, list): for module in self.model: - self._append_module_grads(module, grads) + self._append_sequence_parallel_module_grads(module, grads) else: - self._append_module_grads(self.model, grads) + self._append_sequence_parallel_module_grads(self.model, grads) coalesced = torch._utils._flatten_dense_tensors(grads) torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): buf.copy_(synced) - # def allreduce_first_last_embeddings(self): - # - # # Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/training.py#L407 - # # All-reduce word_embeddings' grad across first and last stages to ensure - # # that word_embeddings parameters stay in sync. - # # This should only run for models that support pipelined model parallelism - # # (BERT and GPT-2). - # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and ( - # parallel_state.is_pipeline_first_stage(ignore_virtual=True) - # or parallel_state.is_pipeline_last_stage(ignore_virtual=True) - # ): - # if parallel_state.is_pipeline_first_stage(ignore_virtual=True): - # if isinstance(self.model, list): - # module = self.model[0] # only the first virtual rank has the embeddings - # else: - # module = self.model - # if parallel_state.is_pipeline_last_stage(ignore_virtual=True): - # if isinstance(self.model, list): - # module = self.model[-1] # only the last virtual rank has the embeddings - # else: - # module = self.model - # if module.share_token_embeddings: - # word_embeddings_weight = module.word_embeddings_weight() - # if self.megatron_amp_O2: - # # O2 recipe stores a "main" copy of weights and grads - # grad = word_embeddings_weight.main_grad - # else: - # grad = word_embeddings_weight.grad - # torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group()) - def get_forward_output_and_loss_func(self): def loss_func(labels, output_tensor): @@ -436,7 +404,8 @@ def loss_func(labels, output_tensor): return loss, {"loss": averaged_loss[0], "accuracy": averaged_loss[1]} - def fwd_output_and_loss_func(batch, model): + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) if parallel_state.get_pipeline_model_parallel_world_size() == 1: batch = [x.cuda(non_blocking=True) for x in batch] tokens, labels = batch @@ -465,59 +434,43 @@ def fwd_output_only_func(batch, model): return fwd_output_only_func - def validation_step(self, batch, batch_idx): + def validation_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch a number of microbatches depending on the global batch size and model parallel size from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - batch_for_pipeline = self.process_global_batch(batch, self.cfg.global_batch_size) tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] # run forward passes for an entire global batch # we do this inside validation_step to support pipeline parallelism - fwd_bwd_function = self._get_fwd_bwd_function() + fwd_bwd_function = get_forward_backward_func() losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - batch=batch_for_pipeline, - model=self.model, + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), forward_only=True, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - sequence_parallel_enabled=self.cfg.get('sequence_parallel', False), + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) - def _get_metric_with_batch_size(metric_key): + def _get_metric(metric_key): # only the last stage of the pipeline returns losses if losses_reduced_per_micro_batch: - actual_batch_size = batch[0].shape[0] # Might be lesser than global_batch_size if drop_last=False - expected_batch_size = self.cfg.global_batch_size // parallel_state.get_data_parallel_world_size() - if actual_batch_size == expected_batch_size: - loss_with_batch_size_list = [ - [loss_reduced[metric_key].item(), self.cfg.micro_batch_size] - for loss_reduced in losses_reduced_per_micro_batch - ] - else: - loss_with_batch_size_list = [] - total_samples_remaining = actual_batch_size - for loss_reduced in losses_reduced_per_micro_batch: - if total_samples_remaining <= 0: - break - if total_samples_remaining // self.cfg.micro_batch_size >= 1: - loss_with_batch_size_list.append( - [loss_reduced[metric_key].item(), self.cfg.micro_batch_size]) - else: - loss_with_batch_size_list.append([loss_reduced[metric_key].item(), total_samples_remaining]) - total_samples_remaining = total_samples_remaining - self.cfg.micro_batch_size + loss_tensors_list = [loss_reduced[metric_key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.vstack(loss_tensors_list) + loss_mean = loss_tensor.mean(axis=0) else: - # we're not on the last pipeline stage so no losses - loss_with_batch_size_list = [] - return loss_with_batch_size_list + loss_mean = torch.tensor([0.0]).cuda() + return loss_mean[0] - return _get_metric_with_batch_size('loss'), _get_metric_with_batch_size('accuracy') + return _get_metric('loss'), _get_metric('accuracy') def validation_epoch_end(self, outputs): # TODO (yuya): need fix later, check with Sean @@ -528,34 +481,22 @@ def validation_epoch_end(self, outputs): loss_outputs = [output[0] for output in outputs] acc_outputs = [output[1] for output in outputs] - def _get_average_metric(metric_outputs): - # only the last pipeline parallel stages return metric with their batch size - total_num_samples = 0 - total_metric = 0 - for metric_with_batch_size in metric_outputs: - metric_with_batch_size_array = np.array(metric_with_batch_size).flatten() - batch_metrices = metric_with_batch_size_array[0::2] - batch_sizes = metric_with_batch_size_array[1::2] - total_num_samples += sum(batch_sizes) - total_metric += np.dot(batch_metrices, batch_sizes) - - avg_metric = total_metric / total_num_samples - return avg_metric - averaged_metrics = torch.tensor( - [_get_average_metric(loss_outputs), _get_average_metric(acc_outputs)], - dtype=torch.float32).cuda() + [torch.stack(loss_outputs).mean(), torch.stack(acc_outputs).mean()], + dtype=torch.float32, device='cuda') else: - averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32).cuda() + averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32, device='cuda') # we can only log on one rank if it is rank zero so we broadcast from last rank torch.distributed.broadcast(averaged_metrics, get_last_rank()) averaged_loss, averaged_acc = averaged_metrics - self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True) - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True) - self.log('val_accuracy', averaged_acc, prog_bar=True, rank_zero_only=True) + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_accuracy', averaged_acc, prog_bar=True, rank_zero_only=True, batch_size=1) + + return averaged_loss def test_step(self, batch, batch_idx): return self.validation_step(batch, batch_idx) @@ -564,32 +505,6 @@ def test_epoch_end(self, outputs): averaged_loss = average_losses_across_data_parallel_group(outputs) logging.info(f'test_loss: {averaged_loss[0]}') - def process_global_batch(self, global_batch, global_batch_size=None): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - tokens = global_batch[0] # images - labels = global_batch[1] - - expected_batch_size = None - if global_batch_size is not None: - expected_batch_size = global_batch_size // parallel_state.get_data_parallel_world_size() - current_batch_size = tokens.shape[0] - if expected_batch_size is not None and expected_batch_size > current_batch_size: - logging.info( - 'Got batch size of ' - + str(current_batch_size) - + ' , expected batch size :' - + str(expected_batch_size) - + '. Appending dummy data.' - ) - pad_length = expected_batch_size - current_batch_size - pad_dim = (int(pad_length), tokens.shape[1]) - tokens = torch.cat((tokens, torch.ones(pad_dim, dtype=tokens.dtype))) - labels = torch.cat((labels, torch.ones(pad_dim, dtype=labels.dtype))) - - return [tokens, labels] - def build_train_valid_test_datasets(self): logging.info('Building datasets for ViT...') if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): @@ -612,29 +527,27 @@ def build_train_valid_test_datasets(self): return self._train_ds, self._validation_ds, self._test_ds - def build_pretraining_data_loader(self, dataset, consumed_samples, dataset_type=None, drop_last=True): + def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): """Buld dataloader given an input dataset.""" logging.info(f'Building dataloader with consumed samples: {consumed_samples}') # Megatron sampler if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: if self.cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingBatchSampler( + batch_sampler = MegatronPretrainingSampler( total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self.cfg.micro_batch_size, - global_batch_size=self.cfg.global_batch_size, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=drop_last, ) elif self.cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronVisionPretrainingRandomBatchSampler( + batch_sampler = MegatronVisionPretrainingRandomSampler( dataset=dataset, total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self.cfg.micro_batch_size, - global_batch_size=self.cfg.global_batch_size, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=drop_last, @@ -735,7 +648,7 @@ def setup_validation_data(self, cfg): logging.info(f'Drop last in validation dataset is set to False') drop_last = False self._validation_dl = self.build_pretraining_data_loader( - self._validation_ds, consumed_samples, "validation", drop_last + self._validation_ds, consumed_samples, ) def setup_test_data(self, cfg): diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py index 0d594afb8b50..1f07bfeab6a9 100644 --- a/nemo/collections/vision/models/vision_base_model.py +++ b/nemo/collections/vision/models/vision_base_model.py @@ -32,7 +32,7 @@ clip_grad_norm_fp32, ) from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.parts.nlp_overrides import GradScaler +from nemo.collections.nlp.parts.nlp_overrides import NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, GradScaler from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector from nemo.core.classes import ModelPT from nemo.core.classes.exportable import Exportable @@ -41,13 +41,20 @@ from nemo.utils.get_rank import is_global_rank_zero try: - from apex.transformer import parallel_state from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True except (ImportError, ModuleNotFoundError): HAVE_APEX = False +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + + __all__ = ['VisionModel', 'MegatronVisionModel'] NEMO_VISION_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_vision_tmp") @@ -167,11 +174,12 @@ class MegatronVisionModel(VisionModel): """ def __init__(self, cfg: DictConfig, trainer: Trainer): - # FIXME: switch to self._cfg - if not HAVE_APEX: + + if not HAVE_MEGATRON_CORE: raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) + if trainer is None: raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") @@ -190,16 +198,29 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] + # Overrides used when converting checkpoints + if os.environ.get(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, "false").lower() == "true": + app_state = AppState() + init_world_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + init_global_rank = app_state.global_rank + init_local_rank = app_state.local_rank + else: + init_world_size = trainer.world_size + init_global_rank = trainer.global_rank + init_local_rank = trainer.local_rank + initialize_model_parallel_for_nemo( - world_size=trainer.world_size, - global_rank=trainer.global_rank, - local_rank=trainer.local_rank, + world_size=init_world_size, + global_rank=init_global_rank, + local_rank=init_local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), micro_batch_size=cfg.get('micro_batch_size'), global_batch_size=cfg.get('global_batch_size'), + rampup_batch_size=cfg.get('rampup_batch_size'), + use_fp8=cfg.get('fp8', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -284,7 +305,7 @@ def configure_gradient_clipping(self, *args, **kwargs): parameters = self._get_parameters() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) - self.log('grad_norm', grad_norm, rank_zero_only=True) + self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) def allreduce_gradients(self): """Reduce gradients across data parallel ranks. @@ -324,8 +345,8 @@ def reduce_overlap_gradients(self): p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) ) - def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[int] = 0) -> None: - super().on_train_batch_end(outputs, batch, batch_idx) + def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: + super().on_train_batch_end(outputs, dataloader_iter, batch_idx) # TODO: Replace with newer override for scheduler.step() instead of # search for plugins for fp16 GradScalar diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py index 42e3e3e52bdc..2a3c0f6e457c 100644 --- a/nemo/collections/vision/modules/common/megatron/vision_transformer.py +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -24,10 +24,8 @@ from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults try: - from apex.transformer import parallel_state, tensor_parallel from apex.transformer.enums import AttnMaskType, AttnType, ModelType from apex.transformer.utils import divide as safe_divide - from apex.transformer.parallel_state import get_tensor_model_parallel_world_size from apex.normalization import MixedFusedRMSNorm HAVE_APEX = True @@ -39,6 +37,16 @@ # fake missing classes with None attributes ModelType = AttnMaskType = AttnType = LayerType = ApexGuardDefaults() +try: + from megatron.core import parallel_state, tensor_parallel + from megatron.core.parallel_state import get_tensor_model_parallel_world_size + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + """ We use the following notation throughout this file: h: hidden size n: number of attention heads diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index e14fb23ffbb4..53684775090c 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -31,7 +31,6 @@ try: import apex - from apex.transformer import tensor_parallel from apex.transformer.enums import AttnMaskType HAVE_APEX = True From f3bd8ce0c698464b2c0cb8f50a0e22974f244ee2 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 1 May 2023 11:52:35 -0700 Subject: [PATCH 003/512] Fix data sampler length issue --- nemo/collections/multimodal/data/clip/clip_dataset.py | 1 + .../nlp/data/language_modeling/megatron/data_samplers.py | 5 ++++- .../vision/models/megatron_vit_classification_models.py | 2 ++ nemo/utils/timers.py | 6 ++++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py index 61388c94270b..4153e4f1f271 100644 --- a/nemo/collections/multimodal/data/clip/clip_dataset.py +++ b/nemo/collections/multimodal/data/clip/clip_dataset.py @@ -158,6 +158,7 @@ def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): total_samples=len(image_dataset), consumed_samples=0, micro_batch_size=model_cfg.micro_batch_size, + global_batch_size=model_cfg.global_batch_size, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=False, diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py index fe763dd2bd02..b3d167419494 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py @@ -157,7 +157,10 @@ def __len__(self): else: return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size else: - return (num_available_samples - 1) // self.micro_batch_times_data_parallel_size + 1 + if self.drop_last: + return num_available_samples // self.micro_batch_times_data_parallel_size + else: + return (num_available_samples - 1) // self.micro_batch_times_data_parallel_size def __iter__(self): active_total_samples = self.total_samples - self.last_batch_size diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index cefa8478a200..ef0e0d7fe66a 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -538,6 +538,7 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=drop_last, @@ -548,6 +549,7 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=drop_last, diff --git a/nemo/utils/timers.py b/nemo/utils/timers.py index ecd2a7e06319..6bb84d51c198 100644 --- a/nemo/utils/timers.py +++ b/nemo/utils/timers.py @@ -126,6 +126,12 @@ def stop(self, name=""): self.timers[name] = timer_data + def is_active(self, name=""): + timer_data = self.timers.get(name, {}) + if "start" in timer_data: + return True + return False + def active_timers(self): """ Return list of all active named timers From cc539c6bd199c2deda4fb537c267336a2aa84119 Mon Sep 17 00:00:00 2001 From: Maanu Grover Date: Tue, 2 May 2023 00:21:26 -0500 Subject: [PATCH 004/512] run formatting --- examples/multimodal/convert_ckpt_to_nemo.py | 28 +- .../foundation/clip/megatron_clip_export.py | 28 +- .../clip/megatron_clip_imagenet_zeroshot.py | 36 +- .../foundation/clip/megatron_clip_infer.py | 23 +- .../foundation/clip/megatron_clip_pretrain.py | 7 +- .../generative/dreambooth/dreambooth.py | 15 +- .../dreambooth/dreambooth_export.py | 100 +- .../generative/dreambooth/dreambooth_infer.py | 4 +- .../instruct_pix2pix/sd_edit_cli.py | 51 +- .../instruct_pix2pix/sd_edit_export.py | 137 +- .../stable_diffusion/generate_fid_images.py | 4 +- .../generative/stable_diffusion/sd_export.py | 100 +- .../generative/stable_diffusion/sd_infer.py | 4 +- .../generative/stable_diffusion/sd_train.py | 30 +- examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- examples/vision/convert_ckpt_to_nemo.py | 13 +- .../megatron_vit_classification_evaluate.py | 39 +- .../megatron_vit_classification_export.py | 15 +- .../megatron_vit_classification_finetune.py | 2 +- .../megatron_vit_classification_infer.py | 31 +- .../data/clip/augmentations/augmentations.py | 53 +- .../multimodal/data/clip/clip_dataset.py | 33 +- .../data/clip/imagenet_zeroshot_data.py | 1167 ++++++++++++++--- .../multimodal/data/common/data_samplers.py | 32 +- .../multimodal/data/common/webdataset.py | 48 +- .../data/dreambooth/dreambooth_dataset.py | 19 +- .../data/instruct_pix2pix/edit_dataset.py | 29 +- .../augmentation/augmentations.py | 25 +- .../stable_diffusion_dataset.py | 8 +- .../data/stable_diffusion/wds_sampler.py | 3 +- .../data/stable_diffusion/webdataset.py | 51 +- .../data/stable_diffusion/webdataset_utils.py | 78 +- .../multimodal/losses/clip_loss.py | 24 +- .../models/clip/megatron_clip_models.py | 92 +- .../models/dreambooth/dreambooth.py | 38 +- .../multimodal/models/dreambooth/util.py | 102 +- .../models/instruct_pix2pix/ldm/ddpm_edit.py | 38 +- .../models/multimodal_base_model.py | 42 +- .../stable_diffusion/diffusion_model.py | 11 +- .../stable_diffusion/ldm/autoencoder.py | 258 ++-- .../models/stable_diffusion/ldm/ddpm.py | 598 ++++++--- .../stable_diffusion/ldm/ddpm_legacy.py | 588 ++++++--- .../models/stable_diffusion/ldm_config.py | 21 +- .../stable_diffusion/samplers/base_sampler.py | 206 +-- .../models/stable_diffusion/samplers/ddim.py | 61 +- .../stable_diffusion/samplers/k_diffusion.py | 203 ++- .../models/stable_diffusion/samplers/plms.py | 69 +- .../modules/stable_diffusion/attention.py | 132 +- .../diffusionmodules/model.py | 487 +++---- .../diffusionmodules/openaimodel.py | 334 +++-- .../stable_diffusion/diffusionmodules/util.py | 24 +- .../distributions/distributions.py | 31 +- .../stable_diffusion/encoders/modules.py | 68 +- .../encoders/x_transformer.py | 212 ++- .../parts/stable_diffusion/lr_scheduler.py | 25 +- .../parts/stable_diffusion/pipeline.py | 43 +- .../parts/stable_diffusion/utils.py | 28 +- nemo/collections/multimodal/parts/utils.py | 28 +- .../nlp/modules/common/megatron/attention.py | 2 +- nemo/collections/nlp/parts/nlp_overrides.py | 6 +- .../vision/data/imagenet_classnames.py | 1167 ++++++++++++++--- .../vision/data/megatron/autoaugment.py | 81 +- .../vision/data/megatron/data_samplers.py | 37 +- .../vision/data/megatron/image_folder.py | 100 +- .../vision/data/megatron/vit_dataset.py | 188 ++- .../megatron_vit_classification_models.py | 44 +- .../vision/models/vision_base_model.py | 40 +- .../common/megatron/vision_transformer.py | 268 ++-- .../vision/modules/vit/vit_backbone.py | 118 +- nemo/utils/trt_utils.py | 33 +- scripts/fid-eval-text2img/TFinception_V3.py | 49 +- .../fid-eval-text2img/compute_clip_score.py | 21 +- scripts/fid-eval-text2img/compute_fid.py | 145 +- scripts/fid-eval-text2img/eval_fid.py | 6 +- scripts/fid-eval-text2img/fid_dataset.py | 9 +- scripts/fid-eval-text2img/plot.py | 5 +- .../collections/multimodal/test_clip_model.py | 29 +- tests/collections/vision/test_vit_model.py | 24 +- tools/asr_evaluator/asr_evaluator.py | 3 +- .../scripts/run_ctc_segmentation.py | 2 +- 81 files changed, 5271 insertions(+), 3086 deletions(-) diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index ca739537af33..9aa9bf5876c6 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -25,8 +25,9 @@ """ import os -import torch from argparse import ArgumentParser + +import torch from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -48,6 +49,7 @@ HAVE_MEGATRON_CORE = False + def get_args(): parser = ArgumentParser() parser.add_argument( @@ -83,9 +85,7 @@ def get_args(): default=None, help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", ) - parser.add_argument( - "--model_type", type=str, required=False, default="megatron_clip" - ) + parser.add_argument("--model_type", type=str, required=False, default="megatron_clip") parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") @@ -141,17 +141,21 @@ def convert(local_rank, rank, world_size, args): ) if args.model_type == 'megatron_clip': - model = MegatronCLIPModel.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronCLIPModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) elif args.model_type == 'stable_diffusion': - model = MegatronLatentDiffusion.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronLatentDiffusion.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) elif args.model_type == 'instruct_pix2pix': - model = MegatronLatentDiffusionEdit.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronLatentDiffusionEdit.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) elif args.model_type == 'dreambooth': - model = MegatronLatentDiffusion.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronLatentDiffusion.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/multimodal/foundation/clip/megatron_clip_export.py b/examples/multimodal/foundation/clip/megatron_clip_export.py index f11366ec9be1..3dbf0fe0fe1d 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_export.py +++ b/examples/multimodal/foundation/clip/megatron_clip_export.py @@ -13,10 +13,11 @@ # limitations under the License. import os +from typing import Dict, List, Optional + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict -from typing import List, Optional, Dict +from PIL import Image from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel @@ -101,9 +102,7 @@ def model_cfg_modifier(model_cfg): model_cfg.activations_checkpoint_method = None trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier, + model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, ) if model.cfg.get("megatron_amp_O2", False): @@ -113,11 +112,7 @@ def model_cfg_modifier(model_cfg): vision_encoder = model.model.vision_encoder text_encoder = model.model.text_encoder - val_image_transform, text_transform = get_preprocess_fns( - model.cfg, - model.tokenizer, - is_train=False, - ) + val_image_transform, text_transform = get_preprocess_fns(model.cfg, model.tokenizer, is_train=False,) os.makedirs(f"{output_dir}/onnx/", exist_ok=True) os.makedirs(f"{output_dir}/plan/", exist_ok=True) @@ -129,9 +124,16 @@ def model_cfg_modifier(model_cfg): input_profile = {} bs1_example = clip_model.input_example(max_batch=1, max_dim=max_dim, max_text=1) bsmax_example = clip_model.input_example(max_batch=max_batch_size, max_dim=max_dim, max_text=max_text) - input_profile['images'] = [tuple(bs1_example[0].shape), tuple(bsmax_example[0].shape), - tuple(bsmax_example[0].shape)] - input_profile['texts'] = [tuple(bs1_example[1].shape), tuple(bsmax_example[1].shape), tuple(bsmax_example[1].shape)] + input_profile['images'] = [ + tuple(bs1_example[0].shape), + tuple(bsmax_example[0].shape), + tuple(bsmax_example[0].shape), + ] + input_profile['texts'] = [ + tuple(bs1_example[1].shape), + tuple(bsmax_example[1].shape), + tuple(bsmax_example[1].shape), + ] build_engine( f"{output_dir}/onnx/clip.onnx", f"{output_dir}/plan/clip.plan", diff --git a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py index cd0755c9d7ec..c58d71af93f5 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py +++ b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py @@ -13,24 +13,26 @@ # limitations under the License. import os + import torch import torch.nn.functional as F -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from tqdm import tqdm -from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns, ImagenetClassnameDataset -from nemo.collections.multimodal.data.clip.clip_dataset import tokenize, build_imagenet_validation_dataloader -from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import openai_imagenet_template, imagenet_classnames +from nemo.collections.multimodal.data.clip.clip_dataset import ( + ImagenetClassnameDataset, + build_imagenet_validation_dataloader, + get_preprocess_fns, + tokenize, +) +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.get_rank import is_global_rank_zero @@ -44,6 +46,7 @@ HAVE_MEGATRON_CORE = False + def accuracy(output, target, topk=(1,)): pred = output.topk(max(topk), 1, True, True)[1].t() correct = pred.eq(target.view(1, -1).expand_as(pred)) @@ -67,9 +70,7 @@ def model_cfg_modifier(model_cfg): model_cfg.activations_checkpoint_method = None trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier, + model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, ) if model.cfg.get("megatron_amp_O2", False): @@ -94,8 +95,9 @@ def model_cfg_modifier(model_cfg): cfg.model["text"] = model.cfg.text imagenet_val = build_imagenet_validation_dataloader(cfg.model, model.tokenizer) - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): # build imagenet classification classifier classifier = [] for texts in imagenet_val["texts"]: @@ -106,7 +108,7 @@ def model_cfg_modifier(model_cfg): classifier.append(class_embedding) classifier = torch.stack(classifier, dim=1) - top1, top5, n = 0., 0., 0. + top1, top5, n = 0.0, 0.0, 0.0 for images, target in tqdm(imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): if images is None or target is None: continue @@ -116,7 +118,7 @@ def model_cfg_modifier(model_cfg): # predict image_features = vision_encoder(images) image_features = F.normalize(image_features, dim=-1) - logits = 100. * image_features @ classifier + logits = 100.0 * image_features @ classifier # measure accuracy acc1, acc5 = accuracy(logits, target, topk=(1, 5)) @@ -125,8 +127,8 @@ def model_cfg_modifier(model_cfg): n += images.size(0) logging.info('Finished zero-shot imagenet.') - top1 = (top1 / n) - top5 = (top5 / n) + top1 = top1 / n + top5 = top5 / n imagenet_metric = torch.zeros(2).cuda() imagenet_metric[0], imagenet_metric[1] = top1, top5 diff --git a/examples/multimodal/foundation/clip/megatron_clip_infer.py b/examples/multimodal/foundation/clip/megatron_clip_infer.py index d2ce890e616b..d6ba3c1d5938 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_infer.py +++ b/examples/multimodal/foundation/clip/megatron_clip_infer.py @@ -13,19 +13,17 @@ # limitations under the License. import os + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.get_rank import is_global_rank_zero @@ -48,9 +46,7 @@ def model_cfg_modifier(model_cfg): model_cfg.activations_checkpoint_method = None trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier, + model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, ) if model.cfg.get("megatron_amp_O2", False): @@ -60,11 +56,7 @@ def model_cfg_modifier(model_cfg): vision_encoder = model.model.vision_encoder text_encoder = model.model.text_encoder - val_image_transform, text_transform = get_preprocess_fns( - model.cfg, - model.tokenizer, - is_train=False, - ) + val_image_transform, text_transform = get_preprocess_fns(model.cfg, model.tokenizer, is_train=False,) # get autocast_dtype if trainer.precision == 'bf16': @@ -77,8 +69,9 @@ def model_cfg_modifier(model_cfg): raise ValueError('precision must be in [32, 16, "bf16"]') image = Image.open(cfg.image_path).convert('RGB') - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): image = val_image_transform(image).unsqueeze(0).cuda() texts = text_transform(cfg.texts).cuda() image_features = vision_encoder(image) diff --git a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py index 8d5b33f86c02..e7f81e17c2d1 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py +++ b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py @@ -35,8 +35,11 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - assert (cfg.trainer.devices * cfg.trainer.num_nodes) * cfg.model.micro_batch_size == \ - cfg.model.global_batch_size, "Gradient accumulation is not supported in CLIP yet." + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( + "Gradient accumulation is not supported in CLIP yet." + ) megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index d6b636a3ee54..038dbb8b763c 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -44,6 +44,7 @@ HAVE_MEGATRON_CORE = False + def prepare_reg_data(cfg): reg_dir = cfg.model.data.regularization_dir num_reg_images = cfg.model.data.num_reg_images @@ -52,6 +53,7 @@ def prepare_reg_data(cfg): os.makedirs(reg_dir, exist_ok=True) NUM_REG_IMAGES = len(os.listdir(reg_dir)) if NUM_REG_IMAGES < num_reg_images: + def model_cfg_modifier(model_cfg): model_cfg.precision = cfg.trainer.precision model_cfg.ckpt_path = None @@ -64,9 +66,8 @@ def model_cfg_modifier(model_cfg): model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier) + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) model = megatron_diffusion_model.model rng = torch.Generator() rng.manual_seed(trainer.global_rank * 100 + cfg.model.seed) @@ -79,8 +80,12 @@ def model_cfg_modifier(model_cfg): output = pipeline(model, cfg, verbose=False, rng=rng) for text_prompt, pils in zip(reg_prompt, output): for idx, image in enumerate(pils): - image.save(os.path.join(cfg.infer.out_path, - f'{reg_prompt}_{trainer.global_rank}_{NUM_REG_IMAGES + i * num_images_per_prompt + idx}.png')) + image.save( + os.path.join( + cfg.infer.out_path, + f'{reg_prompt}_{trainer.global_rank}_{NUM_REG_IMAGES + i * num_images_per_prompt + idx}.png', + ) + ) del model del trainer if torch.cuda.is_available(): diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py index 8f396bd02cad..ae2b1ed7d227 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth_export.py +++ b/examples/multimodal/generative/dreambooth/dreambooth_export.py @@ -14,12 +14,12 @@ import gc import os import time + import torch import torch.nn as nn from omegaconf.omegaconf import OmegaConf, open_dict -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference from nemo.core.config import hydra_runner from nemo.utils.trt_utils import build_engine @@ -45,9 +45,7 @@ def model_cfg_modifier(model_cfg): model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() @@ -61,17 +59,19 @@ def model_cfg_modifier(model_cfg): os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create({ - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vae': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'downsampling_factor': downsampling_factor, - 'in_channels': in_channels, - 'height': height, - 'width': width, - }) + deployment_conf = OmegaConf.create( + { + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vae': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'downsampling_factor': downsampling_factor, + 'in_channels': in_channels, + 'height': height, + 'width': width, + } + ) deployment_conf.sampler.eta = cfg.infer.get('eta', 0) deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") @@ -81,18 +81,16 @@ def model_cfg_modifier(model_cfg): cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") input_names = ["x", "t", "context"] output_names = ["logits"] - torch.onnx.export(model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "x": {0: 'B'}, - "t": {0: 'B'}, - "context": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, + opset_version=17, + ) input_profile_unet = {} input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 @@ -115,17 +113,16 @@ def forward(self, z): input_names = ["z"] output_names = ["logits"] z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export(VAEWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "z": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + VAEWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + ) input_profile_vae = {} input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 deployment_conf.vae.z = input_profile_vae["z"][0] @@ -143,19 +140,18 @@ def forward(self, input_ids): input_names = ["tokens"] output_names = ["logits"] tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "tokens": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17, - do_constant_folding=True, - export_params=True) + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) input_profile_clip = {} input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] diff --git a/examples/multimodal/generative/dreambooth/dreambooth_infer.py b/examples/multimodal/generative/dreambooth/dreambooth_infer.py index 581637352c8e..e652fa68ddcd 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth_infer.py +++ b/examples/multimodal/generative/dreambooth/dreambooth_infer.py @@ -31,9 +31,7 @@ def model_cfg_modifier(model_cfg): model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py index 47529feff757..a777eb8d7700 100644 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py @@ -13,30 +13,30 @@ # limitations under the License. from __future__ import annotations -import einops import math -import numpy as np import os import random import sys +from argparse import ArgumentParser + +import einops +import numpy as np import torch import torch.nn as nn -from PIL import Image, ImageOps -from argparse import ArgumentParser from einops import rearrange, repeat from omegaconf import OmegaConf, open_dict +from PIL import Image, ImageOps from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch import autocast from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit -from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser -from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import sample_euler_ancestral -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, +from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import ( + DiscreteEpsDDPMDenoiser, + sample_euler_ancestral, ) +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner from nemo.utils import logging @@ -72,9 +72,7 @@ def model_cfg_modifier(model_cfg): model_cfg.inductor = False trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusionEdit, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier, + model_provider=MegatronLatentDiffusionEdit, cfg=cfg, model_cfg_modifier=model_cfg_modifier, ) # inference use the latent diffusion part of megatron wrapper @@ -107,24 +105,21 @@ def model_cfg_modifier(model_cfg): raise ValueError('precision must be in [32, 16, "bf16"]') num_images_per_prompt = edit_cfg.num_images_per_prompt - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): cond = {} cond["c_crossattn"] = [ - repeat(model.get_learned_conditioning([edit_cfg.prompt]), - "1 ... -> n ...", n=num_images_per_prompt) + repeat(model.get_learned_conditioning([edit_cfg.prompt]), "1 ... -> n ...", n=num_images_per_prompt) ] input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1 input_image = rearrange(input_image, "h w c -> 1 c h w").cuda(non_blocking=True) cond["c_concat"] = [ - repeat(model.encode_first_stage(input_image).mode(), - "1 ... -> n ...", n=num_images_per_prompt) + repeat(model.encode_first_stage(input_image).mode(), "1 ... -> n ...", n=num_images_per_prompt) ] uncond = {} - uncond["c_crossattn"] = [ - repeat(null_token, "1 ... -> n ...", n=num_images_per_prompt) - ] + uncond["c_crossattn"] = [repeat(null_token, "1 ... -> n ...", n=num_images_per_prompt)] uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])] sigmas = model_wrap.get_sigmas(edit_cfg.steps) @@ -147,8 +142,10 @@ def model_cfg_modifier(model_cfg): if edit_cfg.get("combine_images") is None: for idx, image in enumerate(x): edited_image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) - save_path = os.path.join(edit_cfg.outpath, - f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_{idx}.jpg') + save_path = os.path.join( + edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_{idx}.jpg', + ) edited_image.save(save_path) logging.info(f"Edited image saved to: {save_path}") else: @@ -165,8 +162,10 @@ def model_cfg_modifier(model_cfg): if (idx + 1) % column == 0: x_offset = 0 y_offset += height - save_path = os.path.join(edit_cfg.outpath, - f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_combine.jpg') + save_path = os.path.join( + edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_combine.jpg', + ) edited_image.save(save_path) logging.info(f"Edited image saved to: {save_path}") diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py index 8ef4e9a73b9e..1d4d868e2859 100644 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py @@ -11,27 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import einops import gc import math -import numpy as np import os import random import sys +from argparse import ArgumentParser + +import einops +import numpy as np import torch import torch.nn as nn -from PIL import Image, ImageOps -from argparse import ArgumentParser from einops import rearrange, repeat from omegaconf import OmegaConf, open_dict +from PIL import Image, ImageOps from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.trt_utils import build_engine @@ -71,9 +69,7 @@ def model_cfg_modifier(model_cfg): model_cfg.inductor = False trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusionEdit, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier, + model_provider=MegatronLatentDiffusionEdit, cfg=cfg, model_cfg_modifier=model_cfg_modifier, ) # inference use the latent diffusion part of megatron wrapper @@ -98,20 +94,22 @@ def model_cfg_modifier(model_cfg): os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create({ - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vaee': OmegaConf.create({}), - 'vaed': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'height': height, - 'width': width, - 'resolution': edit_cfg.resolution, - 'steps': edit_cfg.steps, - 'text_cfg_scale': edit_cfg.cfg_text, - 'image_cfg_scale': edit_cfg.cfg_image, - }) + deployment_conf = OmegaConf.create( + { + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vaee': OmegaConf.create({}), + 'vaed': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'height': height, + 'width': width, + 'resolution': edit_cfg.resolution, + 'steps': edit_cfg.steps, + 'text_cfg_scale': edit_cfg.cfg_text, + 'image_cfg_scale': edit_cfg.cfg_image, + } + ) fake_text = [""] out_cond = model.cond_stage_model(fake_text) @@ -131,17 +129,16 @@ def forward(self, x): output_names = ["logits"] x = torch.randn(1, 3, width, height, device="cuda") # z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export(VAEEncodeWrapper(model.first_stage_model), - (x,), - f"{output_dir}/onnx/vae/vae_encode.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "x": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + VAEEncodeWrapper(model.first_stage_model), + (x,), + f"{output_dir}/onnx/vae/vae_encode.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"x": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + ) input_profile_vaee = {} input_profile_vaee["x"] = [(1, *(x.shape[1:]))] * 3 with torch.no_grad(): @@ -156,18 +153,16 @@ def forward(self, x): # cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") input_names = ["x", "t", "context"] output_names = ["logits"] - torch.onnx.export(model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "x": {0: 'B'}, - "t": {0: 'B'}, - "context": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, + opset_version=17, + ) input_profile_unet = {} input_profile_unet["x"] = [(3 * batch_size, *(x.shape[1:]))] * 3 @@ -194,17 +189,16 @@ def forward(self, z): output_names = ["logits"] z = torch.randn(1, *(out_unet.shape[1:]), device="cuda") # z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export(VAEDecodeWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae_decode.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "z": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + VAEDecodeWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae_decode.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + ) input_profile_vaed = {} input_profile_vaed["z"] = [(batch_size, *(z.shape[1:]))] * 3 deployment_conf.vaed.z = input_profile_vaed["z"][0] @@ -223,19 +217,18 @@ def forward(self, input_ids): input_names = ["tokens"] output_names = ["logits"] tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "tokens": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17, - do_constant_folding=True, - export_params=True) + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) input_profile_clip = {} input_profile_clip["tokens"] = [(1, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] diff --git a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py index e305f0cad773..c1a37cd953a6 100644 --- a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py +++ b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py @@ -64,9 +64,7 @@ def model_cfg_modifier(model_cfg): # Set up the trainer and model for inference trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index 062c32a85bb8..b00d5e2c0b1d 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -14,12 +14,12 @@ import gc import os import time + import torch import torch.nn as nn from omegaconf.omegaconf import OmegaConf, open_dict -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference from nemo.core.config import hydra_runner from nemo.utils.trt_utils import build_engine @@ -44,9 +44,7 @@ def model_cfg_modifier(model_cfg): model_cfg.unet_config.use_flash_attention = False trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() @@ -61,17 +59,19 @@ def model_cfg_modifier(model_cfg): os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create({ - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vae': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'downsampling_factor': downsampling_factor, - 'in_channels': in_channels, - 'height': height, - 'width': width, - }) + deployment_conf = OmegaConf.create( + { + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vae': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'downsampling_factor': downsampling_factor, + 'in_channels': in_channels, + 'height': height, + 'width': width, + } + ) deployment_conf.sampler.eta = cfg.infer.get('eta', 0) deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") @@ -81,18 +81,16 @@ def model_cfg_modifier(model_cfg): cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") input_names = ["x", "t", "context"] output_names = ["logits"] - torch.onnx.export(model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "x": {0: 'B'}, - "t": {0: 'B'}, - "context": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + model.model.diffusion_model, + (x, t, cc), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, + opset_version=17, + ) input_profile_unet = {} input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 @@ -115,17 +113,16 @@ def forward(self, z): input_names = ["z"] output_names = ["logits"] z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export(VAEWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "z": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17) + torch.onnx.export( + VAEWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + ) input_profile_vae = {} input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 deployment_conf.vae.z = input_profile_vae["z"][0] @@ -143,19 +140,18 @@ def forward(self, input_ids): input_names = ["tokens"] output_names = ["logits"] tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export(CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - "tokens": {0: 'B'}, - "logits": {0: 'B'} - }, - opset_version=17, - do_constant_folding=True, - export_params=True) + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) input_profile_clip = {} input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] diff --git a/examples/multimodal/generative/stable_diffusion/sd_infer.py b/examples/multimodal/generative/stable_diffusion/sd_infer.py index 0de9d4ed32f7..0fe9a0064e47 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_infer.py +++ b/examples/multimodal/generative/stable_diffusion/sd_infer.py @@ -31,9 +31,7 @@ def model_cfg_modifier(model_cfg): torch.backends.cuda.matmul.allow_tf32 = True trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py index 4339971cfa8a..f08a8e0e666d 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_train.py +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse -import numpy as np import os +from datetime import timedelta + +import numpy as np import pytorch_lightning as pl import torch import torch.utils.data as data -from datetime import timedelta from hydra.utils import instantiate from omegaconf import OmegaConf from pytorch_lightning.strategies.ddp import DDPStrategy @@ -47,34 +48,29 @@ def main(cfg): del cfg.trainer.strategy batch_size = cfg.model.data.train.batch_size - dataset = WebDatasetWithRawText( - dataset_cfg=cfg.model.data, - is_train=True, + dataset = WebDatasetWithRawText(dataset_cfg=cfg.model.data, is_train=True,) + data = torch.utils.data.DataLoader( + dataset, batch_size=batch_size, num_workers=cfg.model.data.num_workers, pin_memory=True, drop_last=False ) - data = torch.utils.data.DataLoader(dataset, - batch_size=batch_size, - num_workers=cfg.model.data.num_workers, - pin_memory=True, - drop_last=False) global_bs = cfg.trainer.devices * cfg.trainer.num_nodes * batch_size callbacks = [] if not cfg.model.data.webdataset.infinite_sampler: wds_sampler = WebDataloaderSamplerCallback( - batch_size=batch_size, - gradient_accumulation=cfg.trainer.accumulate_grad_batches) + batch_size=batch_size, gradient_accumulation=cfg.trainer.accumulate_grad_batches + ) callbacks.append(wds_sampler) plugins = [] if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) - trainer = pl.Trainer(**cfg.trainer, - plugins=plugins, - callbacks=callbacks, - strategy=strategy) + trainer = pl.Trainer(**cfg.trainer, plugins=plugins, callbacks=callbacks, strategy=strategy) exp_manager(trainer, cfg.get("exp_manager", None)) - if not cfg.model.data.webdataset.infinite_sampler and trainer._checkpoint_connector.resume_from_checkpoint_fit_path is not None: + if ( + not cfg.model.data.webdataset.infinite_sampler + and trainer._checkpoint_connector.resume_from_checkpoint_fit_path is not None + ): # Reusming from previous training session wds_sampler.resume_flag = True diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index e7bffa888653..3bb241659e48 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf -from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index ff7b2b0675ea..0f67aa11e09b 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch -from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager +from utils import get_model """ This script supports training of G2PModels diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py index f12ee7555de4..686fe1ef8e63 100644 --- a/examples/vision/convert_ckpt_to_nemo.py +++ b/examples/vision/convert_ckpt_to_nemo.py @@ -25,8 +25,9 @@ """ import os -import torch from argparse import ArgumentParser + +import torch from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -45,6 +46,7 @@ HAVE_MEGATRON_CORE = False + def get_args(): parser = ArgumentParser() parser.add_argument( @@ -80,9 +82,7 @@ def get_args(): default=None, help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", ) - parser.add_argument( - "--model_type", type=str, required=True, default="vit_classification" - ) + parser.add_argument("--model_type", type=str, required=True, default="vit_classification") parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") @@ -138,8 +138,9 @@ def convert(local_rank, rank, world_size, args): ) if args.model_type == 'vit_classification': - model = MegatronVitClassificationModel.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronVitClassificationModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py index 037113ca6571..0d9f9671d98d 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py @@ -14,18 +14,16 @@ import glob import os + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch.utils.data import DataLoader, Dataset from tqdm import tqdm -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames from nemo.collections.vision.data.megatron.image_folder import ImageFolder from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform @@ -42,8 +40,7 @@ def main(cfg) -> None: plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - find_unused_parameters=False, + no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce ) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) @@ -63,8 +60,8 @@ def main(cfg) -> None: ) assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" # These configs are required to be off during inference. @@ -86,15 +83,8 @@ def main(cfg) -> None: model.eval() - val_transform = ClassificationTransform( - model.cfg, - (model.cfg.img_h, model.cfg.img_w), - train=False - ) - val_data = ImageFolder( - root=cfg.model.data.imagenet_val, - transform=val_transform, - ) + val_transform = ClassificationTransform(model.cfg, (model.cfg.img_h, model.cfg.img_w), train=False) + val_data = ImageFolder(root=cfg.model.data.imagenet_val, transform=val_transform,) # initialize apex DDP strategy def dummy(): @@ -104,11 +94,7 @@ def dummy(): trainer.strategy.launcher.launch(dummy, trainer=trainer) trainer.strategy.setup_environment() - test_loader = DataLoader( - val_data, - batch_size=cfg.model.micro_batch_size, - num_workers=cfg.model.data.num_workers, - ) + test_loader = DataLoader(val_data, batch_size=cfg.model.micro_batch_size, num_workers=cfg.model.data.num_workers,) # get autocast_dtype if trainer.precision == 'bf16': @@ -120,9 +106,10 @@ def dummy(): else: raise ValueError('precision must be in [32, 16, "bf16"]') - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): - total = correct = 0. + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + total = correct = 0.0 for tokens, labels in tqdm(test_loader): logits = model(tokens.cuda()) class_indices = torch.argmax(logits, -1) diff --git a/examples/vision/vision_transformer/megatron_vit_classification_export.py b/examples/vision/vision_transformer/megatron_vit_classification_export.py index 76ef0d606b69..8a732e0a6558 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_export.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_export.py @@ -14,17 +14,15 @@ import glob import os + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch.utils.data import DataLoader, Dataset -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel @@ -44,8 +42,7 @@ def main(cfg) -> None: max_dim = cfg.infer.max_dim plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - find_unused_parameters=False, + no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce ) print(type(cfg.trainer.precision)) if cfg.get('cluster_type', None) == 'BCP': @@ -67,8 +64,8 @@ def main(cfg) -> None: ) assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" # These configs are required to be off during inference. diff --git a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py index fd466128105c..32206927ee3d 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py @@ -22,8 +22,8 @@ GradScaler, MegatronHalfPrecisionPlugin, NLPDDPStrategy, - PipelineMixedPrecisionPlugin, NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, ) from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel from nemo.core.config import hydra_runner diff --git a/examples/vision/vision_transformer/megatron_vit_classification_infer.py b/examples/vision/vision_transformer/megatron_vit_classification_infer.py index 8f65c1fedf53..70831629a1cc 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_infer.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_infer.py @@ -14,17 +14,15 @@ import glob import os + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch.utils.data import DataLoader, Dataset -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel @@ -64,8 +62,7 @@ def main(cfg) -> None: plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - find_unused_parameters=False, + no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce ) if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) @@ -85,8 +82,8 @@ def main(cfg) -> None: ) assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" # These configs are required to be off during inference. @@ -108,15 +105,8 @@ def main(cfg) -> None: model.eval() - test_transform = ClassificationTransform( - cfg.model, - (model_cfg.img_h, model_cfg.img_w), - train=False - ) - test_data = ImageFolderDataset( - folder_path=cfg.data_path, - transform=test_transform, - ) + test_transform = ClassificationTransform(cfg.model, (model_cfg.img_h, model_cfg.img_w), train=False) + test_data = ImageFolderDataset(folder_path=cfg.data_path, transform=test_transform,) test_loader = DataLoader(test_data, batch_size=8) # initialize apex DDP strategy @@ -137,8 +127,9 @@ def dummy(): else: raise ValueError('precision must be in [32, 16, "bf16"]') - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): class_names = [] for tokens in test_loader: logits = model(tokens.cuda()) diff --git a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py index 12f16793b4f1..2cf3dad64464 100644 --- a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py +++ b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py @@ -16,19 +16,26 @@ https://github.com/mlfoundations/open_clip/blob/28c994406e39a5babc749c76871d92f33e9c558d/src/open_clip/transform.py by @yaoyu-33 """ +from typing import Optional, Sequence, Tuple + import torch import torch.nn as nn import torchvision.transforms.functional as F -from torchvision.transforms import Normalize, Compose, RandomResizedCrop, InterpolationMode, ToTensor, Resize, \ - CenterCrop -from typing import Optional, Sequence, Tuple +from torchvision.transforms import ( + CenterCrop, + Compose, + InterpolationMode, + Normalize, + RandomResizedCrop, + Resize, + ToTensor, +) OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) class ResizeMaxSize(nn.Module): - def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, fn='max', fill=0): super().__init__() if not isinstance(max_size, int): @@ -58,12 +65,12 @@ def _convert_to_rgb(image): def image_transform( - image_size: int, - is_train: bool, - mean: Optional[Tuple[float, ...]] = None, - std: Optional[Tuple[float, ...]] = None, - resize_longest_max: bool = False, - fill_color: int = 0, + image_size: int, + is_train: bool, + mean: Optional[Tuple[float, ...]] = None, + std: Optional[Tuple[float, ...]] = None, + resize_longest_max: bool = False, + fill_color: int = 0, ): mean = mean or OPENAI_DATASET_MEAN if not isinstance(mean, (list, tuple)): @@ -79,25 +86,23 @@ def image_transform( normalize = Normalize(mean=mean, std=std) if is_train: - return Compose([ - RandomResizedCrop(image_size, scale=(0.9, 1.0), interpolation=InterpolationMode.BICUBIC), - _convert_to_rgb, - ToTensor(), - normalize, - ]) + return Compose( + [ + RandomResizedCrop(image_size, scale=(0.9, 1.0), interpolation=InterpolationMode.BICUBIC), + _convert_to_rgb, + ToTensor(), + normalize, + ] + ) else: if resize_longest_max: - transforms = [ - ResizeMaxSize(image_size, fill=fill_color) - ] + transforms = [ResizeMaxSize(image_size, fill=fill_color)] else: transforms = [ Resize(image_size, interpolation=InterpolationMode.BICUBIC), CenterCrop(image_size), ] - transforms.extend([ - _convert_to_rgb, - ToTensor(), - normalize, - ]) + transforms.extend( + [_convert_to_rgb, ToTensor(), normalize,] + ) return Compose(transforms) diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py index 4153e4f1f271..f63a86dc9174 100644 --- a/nemo/collections/multimodal/data/clip/clip_dataset.py +++ b/nemo/collections/multimodal/data/clip/clip_dataset.py @@ -11,19 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import torch from functools import partial -from torch.utils.data import default_collate, Dataset -from typing import Any, List, Union, Dict, Optional +from typing import Any, Dict, List, Optional, Union + +import torch +from torch.utils.data import Dataset, default_collate from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform -from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import openai_imagenet_template, imagenet_classnames +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( MegatronPretrainingRandomSampler, MegatronPretrainingSampler, ) - from nemo.collections.vision.data.megatron.image_folder import ImageFolder from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset @@ -68,7 +68,7 @@ def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int = if len(tokens) > context_length: tokens = tokens[:context_length] # Truncate tokens[-1] = eos_id - result[i, :len(tokens)] = torch.tensor(tokens) + result[i, : len(tokens)] = torch.tensor(tokens) if texts_is_str: result = result[0] @@ -80,26 +80,17 @@ def get_preprocess_fns(model_cfg, tokenizer=None, is_train=True): img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) img_mean = model_cfg.vision.get("img_mean") img_std = model_cfg.vision.get("img_std") - img_transform = image_transform( - img_size, - is_train=is_train, - mean=img_mean, - std=img_std, - ) + img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) text_transform = lambda x: x if tokenizer is not None: text_transform = partial( - tokenize, - tokenizer=tokenizer, - context_length=model_cfg.text.get("max_position_embeddings"), + tokenize, tokenizer=tokenizer, context_length=model_cfg.text.get("max_position_embeddings"), ) return img_transform, text_transform def build_train_valid_datasets( - model_cfg, - consumed_samples, - tokenizer=None, + model_cfg, consumed_samples, tokenizer=None, ): data_cfg = model_cfg.data @@ -149,10 +140,7 @@ def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): if imagenet_path is None: return None - image_dataset = ImageFolder( - root=imagenet_path, - transform=val_image_transform, - ) + image_dataset = ImageFolder(root=imagenet_path, transform=val_image_transform,) image_batch_sampler = MegatronPretrainingSampler( total_samples=len(image_dataset), @@ -169,6 +157,7 @@ def custom_collate(batch): return None, None else: return default_collate(batch) + imagenet_val["images"] = torch.utils.data.DataLoader( image_dataset, batch_sampler=image_batch_sampler, diff --git a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py index 86e3bbec42a0..c7387d37eba7 100644 --- a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py +++ b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py @@ -13,171 +13,1008 @@ # limitations under the License. -imagenet_classnames = ["tench", "goldfish", "great white shark", "tiger shark", "hammerhead shark", "electric ray", - "stingray", "rooster", "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", - "indigo bunting", "American robin", "bulbul", "jay", "magpie", "chickadee", "American dipper", - "kite (bird of prey)", "bald eagle", "vulture", "great grey owl", "fire salamander", - "smooth newt", "newt", "spotted salamander", "axolotl", "American bullfrog", "tree frog", - "tailed frog", "loggerhead sea turtle", "leatherback sea turtle", "mud turtle", "terrapin", - "box turtle", "banded gecko", "green iguana", "Carolina anole", - "desert grassland whiptail lizard", "agama", "frilled-necked lizard", "alligator lizard", - "Gila monster", "European green lizard", "chameleon", "Komodo dragon", "Nile crocodile", - "American alligator", "triceratops", "worm snake", "ring-necked snake", - "eastern hog-nosed snake", "smooth green snake", "kingsnake", "garter snake", "water snake", - "vine snake", "night snake", "boa constrictor", "African rock python", "Indian cobra", - "green mamba", "sea snake", "Saharan horned viper", "eastern diamondback rattlesnake", - "sidewinder rattlesnake", "trilobite", "harvestman", "scorpion", "yellow garden spider", - "barn spider", "European garden spider", "southern black widow", "tarantula", "wolf spider", - "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse", "prairie grouse", "peafowl", - "quail", "partridge", "african grey parrot", "macaw", "sulphur-crested cockatoo", "lorikeet", - "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "duck", - "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", - "koala", "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", - "snail", "slug", "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", - "fiddler crab", "red king crab", "American lobster", "spiny lobster", "crayfish", "hermit crab", - "isopod", "white stork", "black stork", "spoonbill", "flamingo", "little blue heron", - "great egret", "bittern bird", "crane bird", "limpkin", "common gallinule", "American coot", - "bustard", "ruddy turnstone", "dunlin", "common redshank", "dowitcher", "oystercatcher", - "pelican", "king penguin", "albatross", "grey whale", "killer whale", "dugong", "sea lion", - "Chihuahua", "Japanese Chin", "Maltese", "Pekingese", "Shih Tzu", "King Charles Spaniel", - "Papillon", "toy terrier", "Rhodesian Ridgeback", "Afghan Hound", "Basset Hound", "Beagle", - "Bloodhound", "Bluetick Coonhound", "Black and Tan Coonhound", "Treeing Walker Coonhound", - "English foxhound", "Redbone Coonhound", "borzoi", "Irish Wolfhound", "Italian Greyhound", - "Whippet", "Ibizan Hound", "Norwegian Elkhound", "Otterhound", "Saluki", "Scottish Deerhound", - "Weimaraner", "Staffordshire Bull Terrier", "American Staffordshire Terrier", - "Bedlington Terrier", "Border Terrier", "Kerry Blue Terrier", "Irish Terrier", - "Norfolk Terrier", "Norwich Terrier", "Yorkshire Terrier", "Wire Fox Terrier", - "Lakeland Terrier", "Sealyham Terrier", "Airedale Terrier", "Cairn Terrier", - "Australian Terrier", "Dandie Dinmont Terrier", "Boston Terrier", "Miniature Schnauzer", - "Giant Schnauzer", "Standard Schnauzer", "Scottish Terrier", "Tibetan Terrier", - "Australian Silky Terrier", "Soft-coated Wheaten Terrier", "West Highland White Terrier", - "Lhasa Apso", "Flat-Coated Retriever", "Curly-coated Retriever", "Golden Retriever", - "Labrador Retriever", "Chesapeake Bay Retriever", "German Shorthaired Pointer", "Vizsla", - "English Setter", "Irish Setter", "Gordon Setter", "Brittany dog", "Clumber Spaniel", - "English Springer Spaniel", "Welsh Springer Spaniel", "Cocker Spaniel", "Sussex Spaniel", - "Irish Water Spaniel", "Kuvasz", "Schipperke", "Groenendael dog", "Malinois", "Briard", - "Australian Kelpie", "Komondor", "Old English Sheepdog", "Shetland Sheepdog", "collie", - "Border Collie", "Bouvier des Flandres dog", "Rottweiler", "German Shepherd Dog", "Dobermann", - "Miniature Pinscher", "Greater Swiss Mountain Dog", "Bernese Mountain Dog", - "Appenzeller Sennenhund", "Entlebucher Sennenhund", "Boxer", "Bullmastiff", "Tibetan Mastiff", - "French Bulldog", "Great Dane", "St. Bernard", "husky", "Alaskan Malamute", "Siberian Husky", - "Dalmatian", "Affenpinscher", "Basenji", "pug", "Leonberger", "Newfoundland dog", - "Great Pyrenees dog", "Samoyed", "Pomeranian", "Chow Chow", "Keeshond", "brussels griffon", - "Pembroke Welsh Corgi", "Cardigan Welsh Corgi", "Toy Poodle", "Miniature Poodle", - "Standard Poodle", "Mexican hairless dog (xoloitzcuintli)", "grey wolf", "Alaskan tundra wolf", - "red wolf or maned wolf", "coyote", "dingo", "dhole", "African wild dog", "hyena", "red fox", - "kit fox", "Arctic fox", "grey fox", "tabby cat", "tiger cat", "Persian cat", "Siamese cat", - "Egyptian Mau", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", - "cheetah", "brown bear", "American black bear", "polar bear", "sloth bear", "mongoose", - "meerkat", "tiger beetle", "ladybug", "ground beetle", "longhorn beetle", "leaf beetle", - "dung beetle", "rhinoceros beetle", "weevil", "fly", "bee", "ant", "grasshopper", - "cricket insect", "stick insect", "cockroach", "praying mantis", "cicada", "leafhopper", - "lacewing", "dragonfly", "damselfly", "red admiral butterfly", "ringlet butterfly", - "monarch butterfly", "small white butterfly", "sulphur butterfly", "gossamer-winged butterfly", - "starfish", "sea urchin", "sea cucumber", "cottontail rabbit", "hare", "Angora rabbit", - "hamster", "porcupine", "fox squirrel", "marmot", "beaver", "guinea pig", "common sorrel horse", - "zebra", "pig", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo", "bison", - "ram (adult male sheep)", "bighorn sheep", "Alpine ibex", "hartebeest", "impala (antelope)", - "gazelle", "arabian camel", "llama", "weasel", "mink", "European polecat", - "black-footed ferret", "otter", "skunk", "badger", "armadillo", "three-toed sloth", "orangutan", - "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas monkey", "baboon", "macaque", - "langur", "black-and-white colobus", "proboscis monkey", "marmoset", "white-headed capuchin", - "howler monkey", "titi monkey", "Geoffroy's spider monkey", "common squirrel monkey", - "ring-tailed lemur", "indri", "Asian elephant", "African bush elephant", "red panda", - "giant panda", "snoek fish", "eel", "silver salmon", "rock beauty fish", "clownfish", - "sturgeon", "gar fish", "lionfish", "pufferfish", "abacus", "abaya", "academic gown", - "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance", - "amphibious vehicle", "analog clock", "apiary", "apron", "trash can", "assault rifle", - "backpack", "bakery", "balance beam", "balloon", "ballpoint pen", "Band-Aid", "banjo", - "baluster / handrail", "barbell", "barber chair", "barbershop", "barn", "barometer", "barrel", - "wheelbarrow", "baseball", "basketball", "bassinet", "bassoon", "swimming cap", "bath towel", - "bathtub", "station wagon", "lighthouse", "beaker", "military hat (bearskin or shako)", - "beer bottle", "beer glass", "bell tower", "baby bib", "tandem bicycle", "bikini", - "ring binder", "binoculars", "birdhouse", "boathouse", "bobsleigh", "bolo tie", "poke bonnet", - "bookcase", "bookstore", "bottle cap", "hunting bow", "bow tie", "brass memorial plaque", "bra", - "breakwater", "breastplate", "broom", "bucket", "buckle", "bulletproof vest", - "high-speed train", "butcher shop", "taxicab", "cauldron", "candle", "cannon", "canoe", - "can opener", "cardigan", "car mirror", "carousel", "tool kit", "cardboard box / carton", - "car wheel", "automated teller machine", "cassette", "cassette player", "castle", "catamaran", - "CD player", "cello", "mobile phone", "chain", "chain-link fence", "chain mail", "chainsaw", - "storage chest", "chiffonier", "bell or wind chime", "china cabinet", "Christmas stocking", - "church", "movie theater", "cleaver", "cliff dwelling", "cloak", "clogs", "cocktail shaker", - "coffee mug", "coffeemaker", "spiral or coil", "combination lock", "computer keyboard", - "candy store", "container ship", "convertible", "corkscrew", "cornet", "cowboy boot", - "cowboy hat", "cradle", "construction crane", "crash helmet", "crate", "infant bed", - "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", - "rotary dial telephone", "diaper", "digital clock", "digital watch", "dining table", - "dishcloth", "dishwasher", "disc brake", "dock", "dog sled", "dome", "doormat", "drilling rig", - "drum", "drumstick", "dumbbell", "Dutch oven", "electric fan", "electric guitar", - "electric locomotive", "entertainment center", "envelope", "espresso machine", "face powder", - "feather boa", "filing cabinet", "fireboat", "fire truck", "fire screen", "flagpole", "flute", - "folding chair", "football helmet", "forklift", "fountain", "fountain pen", "four-poster bed", - "freight car", "French horn", "frying pan", "fur coat", "garbage truck", - "gas mask or respirator", "gas pump", "goblet", "go-kart", "golf ball", "golf cart", "gondola", - "gong", "gown", "grand piano", "greenhouse", "radiator grille", "grocery store", "guillotine", - "hair clip", "hair spray", "half-track", "hammer", "hamper", "hair dryer", "hand-held computer", - "handkerchief", "hard disk drive", "harmonica", "harp", "combine harvester", "hatchet", - "holster", "home theater", "honeycomb", "hook", "hoop skirt", "gymnastic horizontal bar", - "horse-drawn vehicle", "hourglass", "iPod", "clothes iron", "carved pumpkin", "jeans", "jeep", - "T-shirt", "jigsaw puzzle", "rickshaw", "joystick", "kimono", "knee pad", "knot", "lab coat", - "ladle", "lampshade", "laptop computer", "lawn mower", "lens cap", "letter opener", "library", - "lifeboat", "lighter", "limousine", "ocean liner", "lipstick", "slip-on shoe", "lotion", - "music speaker", "loupe magnifying glass", "sawmill", "magnetic compass", "messenger bag", - "mailbox", "tights", "one-piece bathing suit", "manhole cover", "maraca", "marimba", "mask", - "matchstick", "maypole", "maze", "measuring cup", "medicine cabinet", "megalith", "microphone", - "microwave oven", "military uniform", "milk can", "minibus", "miniskirt", "minivan", "missile", - "mitten", "mixing bowl", "mobile home", "ford model t", "modem", "monastery", "monitor", - "moped", "mortar and pestle", "graduation cap", "mosque", "mosquito net", "vespa", - "mountain bike", "tent", "computer mouse", "mousetrap", "moving van", "muzzle", "metal nail", - "neck brace", "necklace", "baby pacifier", "notebook computer", "obelisk", "oboe", "ocarina", - "odometer", "oil filter", "pipe organ", "oscilloscope", "overskirt", "bullock cart", - "oxygen mask", "product packet / packaging", "paddle", "paddle wheel", "padlock", "paintbrush", - "pajamas", "palace", "pan flute", "paper towel", "parachute", "parallel bars", "park bench", - "parking meter", "railroad car", "patio", "payphone", "pedestal", "pencil case", - "pencil sharpener", "perfume", "Petri dish", "photocopier", "plectrum", "Pickelhaube", - "picket fence", "pickup truck", "pier", "piggy bank", "pill bottle", "pillow", "ping-pong ball", - "pinwheel", "pirate ship", "drink pitcher", "block plane", "planetarium", "plastic bag", - "plate rack", "farm plow", "plunger", "Polaroid camera", "pole", "police van", "poncho", - "pool table", "soda bottle", "plant pot", "potter's wheel", "power drill", "prayer rug", - "printer", "prison", "missile", "projector", "hockey puck", "punching bag", "purse", "quill", - "quilt", "race car", "racket", "radiator", "radio", "radio telescope", "rain barrel", - "recreational vehicle", "fishing casting reel", "reflex camera", "refrigerator", - "remote control", "restaurant", "revolver", "rifle", "rocking chair", "rotisserie", "eraser", - "rugby ball", "ruler measuring stick", "sneaker", "safe", "safety pin", "salt shaker", "sandal", - "sarong", "saxophone", "scabbard", "weighing scale", "school bus", "schooner", "scoreboard", - "CRT monitor", "screw", "screwdriver", "seat belt", "sewing machine", "shield", "shoe store", - "shoji screen / room divider", "shopping basket", "shopping cart", "shovel", "shower cap", - "shower curtain", "ski", "balaclava ski mask", "sleeping bag", "slide rule", "sliding door", - "slot machine", "snorkel", "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", - "solar thermal collector", "sombrero", "soup bowl", "keyboard space bar", "space heater", - "space shuttle", "spatula", "motorboat", "spider web", "spindle", "sports car", "spotlight", - "stage", "steam locomotive", "through arch bridge", "steel drum", "stethoscope", "scarf", - "stone wall", "stopwatch", "stove", "strainer", "tram", "stretcher", "couch", "stupa", - "submarine", "suit", "sundial", "sunglasses", "sunglasses", "sunscreen", "suspension bridge", - "mop", "sweatshirt", "swim trunks / shorts", "swing", "electrical switch", "syringe", - "table lamp", "tank", "tape player", "teapot", "teddy bear", "television", "tennis ball", - "thatched roof", "front curtain", "thimble", "threshing machine", "throne", "tile roof", - "toaster", "tobacco shop", "toilet seat", "torch", "totem pole", "tow truck", "toy store", - "tractor", "semi-trailer truck", "tray", "trench coat", "tricycle", "trimaran", "tripod", - "triumphal arch", "trolleybus", "trombone", "hot tub", "turnstile", "typewriter keyboard", - "umbrella", "unicycle", "upright piano", "vacuum cleaner", "vase", "vaulted or arched ceiling", - "velvet fabric", "vending machine", "vestment", "viaduct", "violin", "volleyball", - "waffle iron", "wall clock", "wallet", "wardrobe", "military aircraft", "sink", - "washing machine", "water bottle", "water jug", "water tower", "whiskey jug", "whistle", - "hair wig", "window screen", "window shade", "Windsor tie", "wine bottle", "airplane wing", - "wok", "wooden spoon", "wool", "split-rail fence", "shipwreck", "sailboat", "yurt", "website", - "comic book", "crossword", "traffic or street sign", "traffic light", "dust jacket", "menu", - "plate", "guacamole", "consomme", "hot pot", "trifle", "ice cream", "popsicle", "baguette", - "bagel", "pretzel", "cheeseburger", "hot dog", "mashed potatoes", "cabbage", "broccoli", - "cauliflower", "zucchini", "spaghetti squash", "acorn squash", "butternut squash", "cucumber", - "artichoke", "bell pepper", "cardoon", "mushroom", "Granny Smith apple", "strawberry", "orange", - "lemon", "fig", "pineapple", "banana", "jackfruit", "cherimoya (custard apple)", "pomegranate", - "hay", "carbonara", "chocolate syrup", "dough", "meatloaf", "pizza", "pot pie", "burrito", - "red wine", "espresso", "tea cup", "eggnog", "mountain", "bubble", "cliff", "coral reef", - "geyser", "lakeshore", "promontory", "sandbar", "beach", "valley", "volcano", "baseball player", - "bridegroom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn", - "rose hip", "horse chestnut seed", "coral fungus", "agaric", "gyromitra", "stinkhorn mushroom", - "earth star fungus", "hen of the woods mushroom", "bolete", "corn cob", "toilet paper"] +imagenet_classnames = [ + "tench", + "goldfish", + "great white shark", + "tiger shark", + "hammerhead shark", + "electric ray", + "stingray", + "rooster", + "hen", + "ostrich", + "brambling", + "goldfinch", + "house finch", + "junco", + "indigo bunting", + "American robin", + "bulbul", + "jay", + "magpie", + "chickadee", + "American dipper", + "kite (bird of prey)", + "bald eagle", + "vulture", + "great grey owl", + "fire salamander", + "smooth newt", + "newt", + "spotted salamander", + "axolotl", + "American bullfrog", + "tree frog", + "tailed frog", + "loggerhead sea turtle", + "leatherback sea turtle", + "mud turtle", + "terrapin", + "box turtle", + "banded gecko", + "green iguana", + "Carolina anole", + "desert grassland whiptail lizard", + "agama", + "frilled-necked lizard", + "alligator lizard", + "Gila monster", + "European green lizard", + "chameleon", + "Komodo dragon", + "Nile crocodile", + "American alligator", + "triceratops", + "worm snake", + "ring-necked snake", + "eastern hog-nosed snake", + "smooth green snake", + "kingsnake", + "garter snake", + "water snake", + "vine snake", + "night snake", + "boa constrictor", + "African rock python", + "Indian cobra", + "green mamba", + "sea snake", + "Saharan horned viper", + "eastern diamondback rattlesnake", + "sidewinder rattlesnake", + "trilobite", + "harvestman", + "scorpion", + "yellow garden spider", + "barn spider", + "European garden spider", + "southern black widow", + "tarantula", + "wolf spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse", + "prairie grouse", + "peafowl", + "quail", + "partridge", + "african grey parrot", + "macaw", + "sulphur-crested cockatoo", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "duck", + "red-breasted merganser", + "goose", + "black swan", + "tusker", + "echidna", + "platypus", + "wallaby", + "koala", + "wombat", + "jellyfish", + "sea anemone", + "brain coral", + "flatworm", + "nematode", + "conch", + "snail", + "slug", + "sea slug", + "chiton", + "chambered nautilus", + "Dungeness crab", + "rock crab", + "fiddler crab", + "red king crab", + "American lobster", + "spiny lobster", + "crayfish", + "hermit crab", + "isopod", + "white stork", + "black stork", + "spoonbill", + "flamingo", + "little blue heron", + "great egret", + "bittern bird", + "crane bird", + "limpkin", + "common gallinule", + "American coot", + "bustard", + "ruddy turnstone", + "dunlin", + "common redshank", + "dowitcher", + "oystercatcher", + "pelican", + "king penguin", + "albatross", + "grey whale", + "killer whale", + "dugong", + "sea lion", + "Chihuahua", + "Japanese Chin", + "Maltese", + "Pekingese", + "Shih Tzu", + "King Charles Spaniel", + "Papillon", + "toy terrier", + "Rhodesian Ridgeback", + "Afghan Hound", + "Basset Hound", + "Beagle", + "Bloodhound", + "Bluetick Coonhound", + "Black and Tan Coonhound", + "Treeing Walker Coonhound", + "English foxhound", + "Redbone Coonhound", + "borzoi", + "Irish Wolfhound", + "Italian Greyhound", + "Whippet", + "Ibizan Hound", + "Norwegian Elkhound", + "Otterhound", + "Saluki", + "Scottish Deerhound", + "Weimaraner", + "Staffordshire Bull Terrier", + "American Staffordshire Terrier", + "Bedlington Terrier", + "Border Terrier", + "Kerry Blue Terrier", + "Irish Terrier", + "Norfolk Terrier", + "Norwich Terrier", + "Yorkshire Terrier", + "Wire Fox Terrier", + "Lakeland Terrier", + "Sealyham Terrier", + "Airedale Terrier", + "Cairn Terrier", + "Australian Terrier", + "Dandie Dinmont Terrier", + "Boston Terrier", + "Miniature Schnauzer", + "Giant Schnauzer", + "Standard Schnauzer", + "Scottish Terrier", + "Tibetan Terrier", + "Australian Silky Terrier", + "Soft-coated Wheaten Terrier", + "West Highland White Terrier", + "Lhasa Apso", + "Flat-Coated Retriever", + "Curly-coated Retriever", + "Golden Retriever", + "Labrador Retriever", + "Chesapeake Bay Retriever", + "German Shorthaired Pointer", + "Vizsla", + "English Setter", + "Irish Setter", + "Gordon Setter", + "Brittany dog", + "Clumber Spaniel", + "English Springer Spaniel", + "Welsh Springer Spaniel", + "Cocker Spaniel", + "Sussex Spaniel", + "Irish Water Spaniel", + "Kuvasz", + "Schipperke", + "Groenendael dog", + "Malinois", + "Briard", + "Australian Kelpie", + "Komondor", + "Old English Sheepdog", + "Shetland Sheepdog", + "collie", + "Border Collie", + "Bouvier des Flandres dog", + "Rottweiler", + "German Shepherd Dog", + "Dobermann", + "Miniature Pinscher", + "Greater Swiss Mountain Dog", + "Bernese Mountain Dog", + "Appenzeller Sennenhund", + "Entlebucher Sennenhund", + "Boxer", + "Bullmastiff", + "Tibetan Mastiff", + "French Bulldog", + "Great Dane", + "St. Bernard", + "husky", + "Alaskan Malamute", + "Siberian Husky", + "Dalmatian", + "Affenpinscher", + "Basenji", + "pug", + "Leonberger", + "Newfoundland dog", + "Great Pyrenees dog", + "Samoyed", + "Pomeranian", + "Chow Chow", + "Keeshond", + "brussels griffon", + "Pembroke Welsh Corgi", + "Cardigan Welsh Corgi", + "Toy Poodle", + "Miniature Poodle", + "Standard Poodle", + "Mexican hairless dog (xoloitzcuintli)", + "grey wolf", + "Alaskan tundra wolf", + "red wolf or maned wolf", + "coyote", + "dingo", + "dhole", + "African wild dog", + "hyena", + "red fox", + "kit fox", + "Arctic fox", + "grey fox", + "tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat", + "Egyptian Mau", + "cougar", + "lynx", + "leopard", + "snow leopard", + "jaguar", + "lion", + "tiger", + "cheetah", + "brown bear", + "American black bear", + "polar bear", + "sloth bear", + "mongoose", + "meerkat", + "tiger beetle", + "ladybug", + "ground beetle", + "longhorn beetle", + "leaf beetle", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant", + "grasshopper", + "cricket insect", + "stick insect", + "cockroach", + "praying mantis", + "cicada", + "leafhopper", + "lacewing", + "dragonfly", + "damselfly", + "red admiral butterfly", + "ringlet butterfly", + "monarch butterfly", + "small white butterfly", + "sulphur butterfly", + "gossamer-winged butterfly", + "starfish", + "sea urchin", + "sea cucumber", + "cottontail rabbit", + "hare", + "Angora rabbit", + "hamster", + "porcupine", + "fox squirrel", + "marmot", + "beaver", + "guinea pig", + "common sorrel horse", + "zebra", + "pig", + "wild boar", + "warthog", + "hippopotamus", + "ox", + "water buffalo", + "bison", + "ram (adult male sheep)", + "bighorn sheep", + "Alpine ibex", + "hartebeest", + "impala (antelope)", + "gazelle", + "arabian camel", + "llama", + "weasel", + "mink", + "European polecat", + "black-footed ferret", + "otter", + "skunk", + "badger", + "armadillo", + "three-toed sloth", + "orangutan", + "gorilla", + "chimpanzee", + "gibbon", + "siamang", + "guenon", + "patas monkey", + "baboon", + "macaque", + "langur", + "black-and-white colobus", + "proboscis monkey", + "marmoset", + "white-headed capuchin", + "howler monkey", + "titi monkey", + "Geoffroy's spider monkey", + "common squirrel monkey", + "ring-tailed lemur", + "indri", + "Asian elephant", + "African bush elephant", + "red panda", + "giant panda", + "snoek fish", + "eel", + "silver salmon", + "rock beauty fish", + "clownfish", + "sturgeon", + "gar fish", + "lionfish", + "pufferfish", + "abacus", + "abaya", + "academic gown", + "accordion", + "acoustic guitar", + "aircraft carrier", + "airliner", + "airship", + "altar", + "ambulance", + "amphibious vehicle", + "analog clock", + "apiary", + "apron", + "trash can", + "assault rifle", + "backpack", + "bakery", + "balance beam", + "balloon", + "ballpoint pen", + "Band-Aid", + "banjo", + "baluster / handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel", + "wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "swimming cap", + "bath towel", + "bathtub", + "station wagon", + "lighthouse", + "beaker", + "military hat (bearskin or shako)", + "beer bottle", + "beer glass", + "bell tower", + "baby bib", + "tandem bicycle", + "bikini", + "ring binder", + "binoculars", + "birdhouse", + "boathouse", + "bobsleigh", + "bolo tie", + "poke bonnet", + "bookcase", + "bookstore", + "bottle cap", + "hunting bow", + "bow tie", + "brass memorial plaque", + "bra", + "breakwater", + "breastplate", + "broom", + "bucket", + "buckle", + "bulletproof vest", + "high-speed train", + "butcher shop", + "taxicab", + "cauldron", + "candle", + "cannon", + "canoe", + "can opener", + "cardigan", + "car mirror", + "carousel", + "tool kit", + "cardboard box / carton", + "car wheel", + "automated teller machine", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello", + "mobile phone", + "chain", + "chain-link fence", + "chain mail", + "chainsaw", + "storage chest", + "chiffonier", + "bell or wind chime", + "china cabinet", + "Christmas stocking", + "church", + "movie theater", + "cleaver", + "cliff dwelling", + "cloak", + "clogs", + "cocktail shaker", + "coffee mug", + "coffeemaker", + "spiral or coil", + "combination lock", + "computer keyboard", + "candy store", + "container ship", + "convertible", + "corkscrew", + "cornet", + "cowboy boot", + "cowboy hat", + "cradle", + "construction crane", + "crash helmet", + "crate", + "infant bed", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam", + "desk", + "desktop computer", + "rotary dial telephone", + "diaper", + "digital clock", + "digital watch", + "dining table", + "dishcloth", + "dishwasher", + "disc brake", + "dock", + "dog sled", + "dome", + "doormat", + "drilling rig", + "drum", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso machine", + "face powder", + "feather boa", + "filing cabinet", + "fireboat", + "fire truck", + "fire screen", + "flagpole", + "flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster bed", + "freight car", + "French horn", + "frying pan", + "fur coat", + "garbage truck", + "gas mask or respirator", + "gas pump", + "goblet", + "go-kart", + "golf ball", + "golf cart", + "gondola", + "gong", + "gown", + "grand piano", + "greenhouse", + "radiator grille", + "grocery store", + "guillotine", + "hair clip", + "hair spray", + "half-track", + "hammer", + "hamper", + "hair dryer", + "hand-held computer", + "handkerchief", + "hard disk drive", + "harmonica", + "harp", + "combine harvester", + "hatchet", + "holster", + "home theater", + "honeycomb", + "hook", + "hoop skirt", + "gymnastic horizontal bar", + "horse-drawn vehicle", + "hourglass", + "iPod", + "clothes iron", + "carved pumpkin", + "jeans", + "jeep", + "T-shirt", + "jigsaw puzzle", + "rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat", + "ladle", + "lampshade", + "laptop computer", + "lawn mower", + "lens cap", + "letter opener", + "library", + "lifeboat", + "lighter", + "limousine", + "ocean liner", + "lipstick", + "slip-on shoe", + "lotion", + "music speaker", + "loupe magnifying glass", + "sawmill", + "magnetic compass", + "messenger bag", + "mailbox", + "tights", + "one-piece bathing suit", + "manhole cover", + "maraca", + "marimba", + "mask", + "matchstick", + "maypole", + "maze", + "measuring cup", + "medicine cabinet", + "megalith", + "microphone", + "microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home", + "ford model t", + "modem", + "monastery", + "monitor", + "moped", + "mortar and pestle", + "graduation cap", + "mosque", + "mosquito net", + "vespa", + "mountain bike", + "tent", + "computer mouse", + "mousetrap", + "moving van", + "muzzle", + "metal nail", + "neck brace", + "necklace", + "baby pacifier", + "notebook computer", + "obelisk", + "oboe", + "ocarina", + "odometer", + "oil filter", + "pipe organ", + "oscilloscope", + "overskirt", + "bullock cart", + "oxygen mask", + "product packet / packaging", + "paddle", + "paddle wheel", + "padlock", + "paintbrush", + "pajamas", + "palace", + "pan flute", + "paper towel", + "parachute", + "parallel bars", + "park bench", + "parking meter", + "railroad car", + "patio", + "payphone", + "pedestal", + "pencil case", + "pencil sharpener", + "perfume", + "Petri dish", + "photocopier", + "plectrum", + "Pickelhaube", + "picket fence", + "pickup truck", + "pier", + "piggy bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate ship", + "drink pitcher", + "block plane", + "planetarium", + "plastic bag", + "plate rack", + "farm plow", + "plunger", + "Polaroid camera", + "pole", + "police van", + "poncho", + "pool table", + "soda bottle", + "plant pot", + "potter's wheel", + "power drill", + "prayer rug", + "printer", + "prison", + "missile", + "projector", + "hockey puck", + "punching bag", + "purse", + "quill", + "quilt", + "race car", + "racket", + "radiator", + "radio", + "radio telescope", + "rain barrel", + "recreational vehicle", + "fishing casting reel", + "reflex camera", + "refrigerator", + "remote control", + "restaurant", + "revolver", + "rifle", + "rocking chair", + "rotisserie", + "eraser", + "rugby ball", + "ruler measuring stick", + "sneaker", + "safe", + "safety pin", + "salt shaker", + "sandal", + "sarong", + "saxophone", + "scabbard", + "weighing scale", + "school bus", + "schooner", + "scoreboard", + "CRT monitor", + "screw", + "screwdriver", + "seat belt", + "sewing machine", + "shield", + "shoe store", + "shoji screen / room divider", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "balaclava ski mask", + "sleeping bag", + "slide rule", + "sliding door", + "slot machine", + "snorkel", + "snowmobile", + "snowplow", + "soap dispenser", + "soccer ball", + "sock", + "solar thermal collector", + "sombrero", + "soup bowl", + "keyboard space bar", + "space heater", + "space shuttle", + "spatula", + "motorboat", + "spider web", + "spindle", + "sports car", + "spotlight", + "stage", + "steam locomotive", + "through arch bridge", + "steel drum", + "stethoscope", + "scarf", + "stone wall", + "stopwatch", + "stove", + "strainer", + "tram", + "stretcher", + "couch", + "stupa", + "submarine", + "suit", + "sundial", + "sunglasses", + "sunglasses", + "sunscreen", + "suspension bridge", + "mop", + "sweatshirt", + "swim trunks / shorts", + "swing", + "electrical switch", + "syringe", + "table lamp", + "tank", + "tape player", + "teapot", + "teddy bear", + "television", + "tennis ball", + "thatched roof", + "front curtain", + "thimble", + "threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop", + "toilet seat", + "torch", + "totem pole", + "tow truck", + "toy store", + "tractor", + "semi-trailer truck", + "tray", + "trench coat", + "tricycle", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus", + "trombone", + "hot tub", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle", + "upright piano", + "vacuum cleaner", + "vase", + "vaulted or arched ceiling", + "velvet fabric", + "vending machine", + "vestment", + "viaduct", + "violin", + "volleyball", + "waffle iron", + "wall clock", + "wallet", + "wardrobe", + "military aircraft", + "sink", + "washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "hair wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "airplane wing", + "wok", + "wooden spoon", + "wool", + "split-rail fence", + "shipwreck", + "sailboat", + "yurt", + "website", + "comic book", + "crossword", + "traffic or street sign", + "traffic light", + "dust jacket", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot", + "trifle", + "ice cream", + "popsicle", + "baguette", + "bagel", + "pretzel", + "cheeseburger", + "hot dog", + "mashed potatoes", + "cabbage", + "broccoli", + "cauliflower", + "zucchini", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber", + "artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith apple", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple", + "banana", + "jackfruit", + "cherimoya (custard apple)", + "pomegranate", + "hay", + "carbonara", + "chocolate syrup", + "dough", + "meatloaf", + "pizza", + "pot pie", + "burrito", + "red wine", + "espresso", + "tea cup", + "eggnog", + "mountain", + "bubble", + "cliff", + "coral reef", + "geyser", + "lakeshore", + "promontory", + "sandbar", + "beach", + "valley", + "volcano", + "baseball player", + "bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper", + "corn", + "acorn", + "rose hip", + "horse chestnut seed", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn mushroom", + "earth star fungus", + "hen of the woods mushroom", + "bolete", + "corn cob", + "toilet paper", +] openai_imagenet_template = [ lambda c: f'a bad photo of a {c}.', diff --git a/nemo/collections/multimodal/data/common/data_samplers.py b/nemo/collections/multimodal/data/common/data_samplers.py index 5693ee617c1b..7944087fee72 100644 --- a/nemo/collections/multimodal/data/common/data_samplers.py +++ b/nemo/collections/multimodal/data/common/data_samplers.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import torch from multiprocessing import Value + +import torch from webdataset.pytorch import IterableDataset @@ -28,17 +29,16 @@ def get_value(self): class WDSUrlsRandomSampler(IterableDataset): - def __init__( - self, - urls, - total_urls: int, - chunk_size: int, - consumed_samples: int, - data_parallel_rank: int, - data_parallel_size: int, - drop_last: bool, - data_sharding: bool, + self, + urls, + total_urls: int, + chunk_size: int, + consumed_samples: int, + data_parallel_rank: int, + data_parallel_size: int, + drop_last: bool, + data_sharding: bool, ): r"""Sampler for WebDataset Urls with data parallelism. Args: @@ -80,8 +80,9 @@ def __iter__(self): if worker_info is not None: worker_id, num_workers = worker_info.id, worker_info.num_workers - self.consumed_urls = self.consumed_samples // self.data_parallel_size \ - // self.chunk_size * self.data_parallel_size + self.consumed_urls = ( + self.consumed_samples // self.data_parallel_size // self.chunk_size * self.data_parallel_size + ) if self.drop_last or self.remaining_urls == 0: active_total_urls = self.total_urls - self.remaining_urls @@ -106,10 +107,9 @@ def __iter__(self): full_bucket_offset = current_epoch_urls g = torch.Generator() g.manual_seed(self.epoch.get_value()) - idx_range_total = \ - torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() idx_range_active = idx_range_total[full_bucket_offset:] - idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size] + idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] # Use additional permutation to replace out-of-range indices when drop_last is False additional_random_idx = torch.randperm(self.total_urls, generator=g).tolist() diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py index 0dd47a5a9658..5ce77d6f3ba7 100644 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -14,24 +14,22 @@ import io import itertools import os -import os import pickle import random import re +from typing import Callable, List, Union + import torch.distributed as dist -from PIL import Image -from PIL import Image +import webdataset as wds from botocore.config import Config -from typing import Callable -from typing import Callable, Union, List +from PIL import Image +from webdataset import WebDataset from webdataset.filters import _shuffle from webdataset.utils import pytorch_worker_info -import webdataset as wds from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch, WDSUrlsRandomSampler from nemo.core.classes import IterableDataset as NeMoIterableDataset from nemo.utils import logging -from webdataset import WebDataset try: from megatron.core import parallel_state @@ -49,11 +47,7 @@ class detshuffle2(wds.PipelineStage): def __init__( - self, - bufsize=1000, - initial=100, - seed=0, - epoch=-1, + self, bufsize=1000, initial=100, seed=0, epoch=-1, ): self.bufsize = bufsize self.initial = initial @@ -108,16 +102,17 @@ def get_world_size(): class WebDatasetCommon(NeMoIterableDataset): - - def __init__(self, - dataset_cfg, - map_fn: Callable, - compose_fn: Union[Callable, List[Callable]], - consumed_samples: int, - filter_fn: Callable = None, - gen_cfg=None, - decode_fn: Callable = None, - is_train=True): + def __init__( + self, + dataset_cfg, + map_fn: Callable, + compose_fn: Union[Callable, List[Callable]], + consumed_samples: int, + filter_fn: Callable = None, + gen_cfg=None, + decode_fn: Callable = None, + is_train=True, + ): super().__init__() self.dataset_cfg = dataset_cfg @@ -175,6 +170,7 @@ def __init__(self, train_info["total_key_count"] = int(train_info["total_key_count"] * self.filterings.estimated_portion) from webdataset import warn_and_continue + train_dataset, epoch = self._get_webdataset_and_epoch() train_dataset = train_dataset.compose(detshuffle2(bufsize=shuffle_buffer_size, epoch=epoch)) train_dataset = train_dataset.decode(decode_fn, handler=warn_and_continue) @@ -214,7 +210,9 @@ def _get_webdataset_and_epoch(self): if not self.infinite_sampler: logging.info(f'Initiating Webdataset Random Sampler..') - assert self.filterings is None, 'Webdataset Random Sampler should not be used with filters. Switch to infinite sampler' + assert ( + self.filterings is None + ), 'Webdataset Random Sampler should not be used with filters. Switch to infinite sampler' shards_train_list = WDSUrlsRandomSampler( urls=shards_train_list, total_urls=len(shards_train_list), @@ -228,9 +226,7 @@ def _get_webdataset_and_epoch(self): epoch = shards_train_list.epoch train_dataset = WebDataset( - shards_train_list, - handler=warn_and_continue, - resampled=self.infinite_sampler or False, + shards_train_list, handler=warn_and_continue, resampled=self.infinite_sampler or False, ) return train_dataset, epoch diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py index bd9a183ac356..5392d4ed827d 100644 --- a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py +++ b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import pickle +from pathlib import Path + import torch from PIL import Image -from pathlib import Path from torch.utils.data import Dataset from torchvision import transforms @@ -26,14 +27,14 @@ class DreamBoothDataset(Dataset): """ def __init__( - self, - instance_data_root, - instance_prompt, - reg_data_root=None, - reg_prompt=None, - size=512, - center_crop=False, - repeat=100, + self, + instance_data_root, + instance_prompt, + reg_data_root=None, + reg_prompt=None, + size=512, + center_crop=False, + repeat=100, ): self.size = size self.center_crop = center_crop diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py index 999ef62aaf3b..37f4518528a9 100644 --- a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py +++ b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py @@ -15,28 +15,29 @@ import json import math +from pathlib import Path +from typing import Any + import numpy as np import torch import torchvision -from PIL import Image from einops import rearrange -from pathlib import Path +from PIL import Image from torch.utils.data import Dataset -from typing import Any from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import construct_image_augmentations class EditDataset(Dataset): def __init__( - self, - path: str, - split: str = "train", - splits: tuple[float, float, float] = (0.95, 0.04, 0.01), - min_resize_res: int = 256, - max_resize_res: int = 256, - crop_res: int = 256, - flip_prob: float = 0.0, + self, + path: str, + split: str = "train", + splits: tuple[float, float, float] = (0.95, 0.04, 0.01), + min_resize_res: int = 256, + max_resize_res: int = 256, + crop_res: int = 256, + flip_prob: float = 0.0, ): assert split in ("train", "val", "test") assert sum(splits) == 1 @@ -88,11 +89,7 @@ def __getitem__(self, i: int) -> dict[str, Any]: class EditDatasetEval(Dataset): def __init__( - self, - path: str, - split: str = "train", - splits: tuple[float, float, float] = (0.9, 0.05, 0.05), - res: int = 256, + self, path: str, split: str = "train", splits: tuple[float, float, float] = (0.9, 0.05, 0.05), res: int = 256, ): assert split in ("train", "val", "test") assert sum(splits) == 1 diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py index 3e83a5fd5c69..eba00f96c0c2 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py +++ b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py @@ -20,13 +20,15 @@ def construct_clip_augmentations(n_px=224): def _convert_image_to_rgb(image): return image.convert("RGB") - return transforms.Compose([ - transforms.Resize(n_px, interpolation=transforms.InterpolationMode.BICUBIC), - transforms.CenterCrop(n_px), - _convert_image_to_rgb, - transforms.ToTensor(), - transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), - ]) + return transforms.Compose( + [ + transforms.Resize(n_px, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(n_px), + _convert_image_to_rgb, + transforms.ToTensor(), + transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ] + ) def construct_image_augmentations(augmentation_dict, normalize=True): @@ -34,8 +36,8 @@ def construct_image_augmentations(augmentation_dict, normalize=True): for aug in augmentation_dict: if aug == 'resize_smallest_side': img_size = int(augmentation_dict[aug]) - train_img_transform.append(transforms.Resize( - img_size, interpolation=transforms.InterpolationMode.BICUBIC, antialias=True) + train_img_transform.append( + transforms.Resize(img_size, interpolation=transforms.InterpolationMode.BICUBIC, antialias=True) ) elif aug == 'center_crop_h_w': @@ -60,10 +62,7 @@ def construct_image_augmentations(augmentation_dict, normalize=True): # Always need to convert data to tensor train_img_transform.append(transforms.ToTensor()) if normalize: - train_img_transform.append(transforms.Normalize( - (0.5, 0.5, 0.5), - (0.5, 0.5, 0.5) - )) + train_img_transform.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) train_img_transform = transforms.Compose(train_img_transform) return train_img_transform diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py index d68d52aac9b1..9c98ec52b6d7 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -21,8 +21,7 @@ def build_train_valid_datasets( - model_cfg, - consumed_samples, + model_cfg, consumed_samples, ): data_cfg = model_cfg.data @@ -36,7 +35,7 @@ def tuple_to_dict(inp): def transform_fn(sample): image, text = sample["jpg"], sample["txt"] - # TODO : If no agumentations just return the image ? + # TODO : If no agumentations just return the image ? img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) text_transform = identical_transform return img_transform(image), text_transform(text) @@ -63,8 +62,7 @@ def transform_fn(sample): def build_train_valid_precached_datasets( - model_cfg, - consumed_samples, + model_cfg, consumed_samples, ): data_cfg = model_cfg.data diff --git a/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py b/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py index dca19f6948f6..c1664a028a3a 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py +++ b/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py @@ -59,7 +59,8 @@ def on_train_epoch_start(self, trainer, pl_module): # This part assume that when we resume, we are using the same num of gpus and also same batchsize as before epoch = trainer.global_step * self.ga // trainer.num_training_batches print( - f'WebdataLoaderSampler Calculated epoch={epoch}, num_iters={num_iters}, num_training_batches={trainer.num_training_batches}') + f'WebdataLoaderSampler Calculated epoch={epoch}, num_iters={num_iters}, num_training_batches={trainer.num_training_batches}' + ) if pl_module.current_epoch != epoch: print(f'Warning: Calculated Epoch={epoch} is not equal to pyt-lightning epoch={pl_module.current_epoch}') diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset.py index 06411285a267..ab49c7f89026 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/webdataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/webdataset.py @@ -11,16 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import boto3 import io import json import os import pickle import random import re + +import boto3 import torch.distributed as dist -from PIL import Image from botocore.config import Config +from PIL import Image from torch.utils.data import IterableDataset from webdataset.utils import pytorch_worker_info @@ -103,15 +104,15 @@ class ShardListWithResumes(IterableDataset): """ def __init__( - self, - urls, - epoch_shuffle=False, - shuffle=True, - split_by_node=True, - split_by_worker=True, - chunk_size=1, - resume_flag=True, - verbose=False + self, + urls, + epoch_shuffle=False, + shuffle=True, + split_by_node=True, + split_by_worker=True, + chunk_size=1, + resume_flag=True, + verbose=False, ): r"""Create a ShardList. Args: @@ -191,7 +192,8 @@ def __iter__(self): if self.verbose: print( - f'Number of URLs after splitting: {len(urls)}. rank/world_size={rank}/{world_size} worker_id/num_workers={worker_id}/{num_workers}') + f'Number of URLs after splitting: {len(urls)}. rank/world_size={rank}/{world_size} worker_id/num_workers={worker_id}/{num_workers}' + ) if self.shuffle: random.Random(self.epoch + 17).shuffle(urls) @@ -203,9 +205,11 @@ def __iter__(self): if self.verbose: print( - f'Number of URLS after using start_index_per_worker: {len(urls)}. self.start_index={self.start_index} start_index_per_worker={start_index_per_worker}') + f'Number of URLS after using start_index_per_worker: {len(urls)}. self.start_index={self.start_index} start_index_per_worker={start_index_per_worker}' + ) print( - f'PytorchShardList Rank=<{rank}/{world_size}> Worker=<{worker_id}/{num_workers}> receives {len(urls)} URLs (TARs)') + f'PytorchShardList Rank=<{rank}/{world_size}> Worker=<{worker_id}/{num_workers}> receives {len(urls)} URLs (TARs)' + ) for url in urls: yield dict(url=url) @@ -244,9 +248,7 @@ def __init__(self, cfg, is_train=True): assert self.webdata_cfg.pbss_credentials_file is not None with open(self.webdata_cfg.pbss_credentials_file) as fin: self.credentials = json.load(fin) - config = Config(connect_timeout=30, - signature_version="s3", - retries={"max_attempts": 999999}) + config = Config(connect_timeout=30, signature_version="s3", retries={"max_attempts": 999999}) self.s3 = boto3.client('s3', **self.credentials, config=config) self.bucket = self.webdata_cfg.bucket self.local_root_path = None @@ -322,6 +324,7 @@ def tuple_to_dict(inp): # Train dataset object from webdataset import warn_and_continue + if self.infinite_sampler: rank, world_size, worker_id, num_workers = pytorch_worker_info() epoch_length = train_info["total_key_count"] // self.batch_size // world_size @@ -355,16 +358,14 @@ def tuple_to_dict(inp): ) if self.filterings is not None: if self.filterings.resolution is not None: - train_dataset = train_dataset.select(build_resolution_filter(**self.filterings.resolution, image_idx=0)) + train_dataset = train_dataset.select( + build_resolution_filter(**self.filterings.resolution, image_idx=0) + ) # Add additional augmentation - train_dataset = (train_dataset - .map_tuple( - self.img_transform, - self.text_transform - ) # Augmentation - .compose(tuple_to_dict) # Converting tuple to data dict - ) + train_dataset = train_dataset.map_tuple(self.img_transform, self.text_transform).compose( # Augmentation + tuple_to_dict + ) # Converting tuple to data dict train_dataset.total_images = train_info["total_key_count"] # Set epoch length if using infinite sampler diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py index 3fcc99b3ce26..2e5b7d6505ab 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py +++ b/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py @@ -14,18 +14,16 @@ import io import os import sys +from urllib.parse import urlparse + import webdataset.gopen as gopen_webdata import yaml -from urllib.parse import urlparse +from webdataset import cache, filters, shardlists from webdataset.compat import FluidInterface from webdataset.handlers import reraise_exception from webdataset.pipeline import DataPipeline from webdataset.pytorch import IterableDataset -from webdataset.tariterators import tar_file_expander, group_by_keys - -from webdataset import cache -from webdataset import filters -from webdataset import shardlists +from webdataset.tariterators import group_by_keys, tar_file_expander # Number of attempts to read aws objects. _NUM_OBJECT_STORE_READ_ATTEMPTS = 10 @@ -77,7 +75,7 @@ def gopen(url, mode="rb", bufsize=8192, **kw): object_content = s3_response_object['Body'].read() # This is a check to verify is the object is fully read. - full_read = (s3_response_object['ContentLength'] == len(object_content)) + full_read = s3_response_object['ContentLength'] == len(object_content) if full_read: return io.BytesIO(object_content) else: @@ -131,11 +129,14 @@ def url_opener(data, handler=reraise_exception, **kw): # Define a new tarfile_samples -def tarfile_samples(src, handler=reraise_exception, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None): +def tarfile_samples( + src, + handler=reraise_exception, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, +): r""" Given an iterator of filenames, this function opens the URL streams and groups data by keys. @@ -170,20 +171,20 @@ class WebDataset(DataPipeline, FluidInterface): r"""Webdataset class modified to support loading from object store.""" def __init__( - self, - urls, - handler=reraise_exception, - resampled=False, - shardshuffle=None, - cache_size=-1, - cache_dir=None, - detshuffle=False, - nodesplitter=shardlists.single_node_only, - verbose=False, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None, + self, + urls, + handler=reraise_exception, + resampled=False, + shardshuffle=None, + cache_size=-1, + cache_dir=None, + detshuffle=False, + nodesplitter=shardlists.single_node_only, + verbose=False, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, ): r""" Args: @@ -207,9 +208,7 @@ def __init__( if isinstance(urls, IterableDataset): assert not resampled self.append(urls) - elif isinstance(urls, str) and ( - urls.endswith(".yaml") or urls.endswith(".yml") - ): + elif isinstance(urls, str) and (urls.endswith(".yaml") or urls.endswith(".yml")): with (open(urls)) as stream: spec = yaml.safe_load(stream) assert "datasets" in spec @@ -231,23 +230,22 @@ def __init__( else: self.append(filters.shuffle(shardshuffle)) if cache_dir is None or cache_size == 0: - self.append(tarfile_to_samples( - handler=handler, - load_from_object_store=load_from_object_store, - s3_client=s3_client, - s3_bucket_name=s3_bucket_name, - local_root_path=local_root_path, - )) + self.append( + tarfile_to_samples( + handler=handler, + load_from_object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + ) else: # We dont use cache. assert cache_size == -1 or cache_size > 0 self.append( cache.cached_tarfile_to_samples( - handler=handler, - verbose=verbose, - cache_size=cache_size, - cache_dir=cache_dir, + handler=handler, verbose=verbose, cache_size=cache_size, cache_dir=cache_dir, ) ) diff --git a/nemo/collections/multimodal/losses/clip_loss.py b/nemo/collections/multimodal/losses/clip_loss.py index 73100eaf88bf..5eb84b020aed 100644 --- a/nemo/collections/multimodal/losses/clip_loss.py +++ b/nemo/collections/multimodal/losses/clip_loss.py @@ -17,9 +17,7 @@ from torch import distributed as dist from torch.nn import functional as F -from nemo.collections.nlp.modules.common.megatron.utils import ( - average_losses_across_data_parallel_group, -) +from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group try: from megatron.core import parallel_state @@ -32,10 +30,7 @@ def gather_features( - image_features, - text_features, - local_loss=False, - gather_with_grad=False, + image_features, text_features, local_loss=False, gather_with_grad=False, ): data_parallel_world_size = parallel_state.get_data_parallel_world_size() data_parallel_rank = parallel_state.get_data_parallel_rank() @@ -65,12 +60,8 @@ def gather_features( class ClipLoss(nn.Module): - def __init__( - self, - local_loss=False, - gather_with_grad=False, - cache_labels=False, + self, local_loss=False, gather_with_grad=False, cache_labels=False, ): super().__init__() self.local_loss = local_loss @@ -89,8 +80,8 @@ def forward(self, output_tensor): device = image_features.device if self.world_size > 1: all_image_features, all_text_features = gather_features( - image_features, text_features, - self.local_loss, self.gather_with_grad) + image_features, text_features, self.local_loss, self.gather_with_grad + ) if self.local_loss: logits_per_image = logit_scale * image_features @ all_text_features.T @@ -114,10 +105,7 @@ def forward(self, output_tensor): else: labels = self.labels[device] - total_loss = ( - F.cross_entropy(logits_per_image, labels) + - F.cross_entropy(logits_per_text, labels) - ) / 2 + total_loss = (F.cross_entropy(logits_per_image, labels) + F.cross_entropy(logits_per_text, labels)) / 2 # TODO (yuya): this is not necessary; not necessary if global! reduced_loss = average_losses_across_data_parallel_group([total_loss]) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index 54a6af64c92c..22707c6eaec6 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -13,35 +13,36 @@ # limitations under the License. import itertools +from functools import partial +from typing import Any, List, Optional, Union + import numpy as np import torch import torch.nn.functional as F -from functools import partial from omegaconf.dictconfig import DictConfig from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer from tqdm import tqdm -from typing import Any, List, Optional, Union -from nemo.collections.multimodal.data.clip.clip_dataset import tokenize, build_train_valid_datasets, \ - build_imagenet_validation_dataloader +from nemo.collections.multimodal.data.clip.clip_dataset import ( + build_imagenet_validation_dataloader, + build_train_valid_datasets, + tokenize, +) from nemo.collections.multimodal.losses.clip_loss import ClipLoss from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model -from nemo.collections.nlp.modules.common.megatron.module import ( - MegatronModule, - Float16Module, -) +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule from nemo.collections.nlp.modules.common.megatron.utils import ( ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, get_linear_layer, + get_params_for_weight_decay_optimization, init_method_normal, parallel_lm_logits, scaled_init_method_normal, - average_losses_across_data_parallel_group, - get_all_params_for_weight_decay_optimization, - get_params_for_weight_decay_optimization, ) from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead @@ -66,11 +67,11 @@ HAVE_MEGATRON_CORE = False + class CLIPVisionTransformer(MegatronModule): """Vision Transformer Model.""" - def __init__(self, model_cfg, - pre_process=True, post_process=True): + def __init__(self, model_cfg, pre_process=True, post_process=True): super(CLIPVisionTransformer, self).__init__() scaled_init_method = ( @@ -95,11 +96,7 @@ def __init__(self, model_cfg, ) if self.post_process: - self.head = torch.nn.Linear( - self.hidden_size, - self.output_dim, - bias=False, - ) + self.head = torch.nn.Linear(self.hidden_size, self.output_dim, bias=False,) def set_input_tensor(self, input_tensor): """See megatron.model.transformer.set_input_tensor()""" @@ -121,8 +118,7 @@ def forward(self, input): class CLIPTextTransformer(MegatronModule): """Text Transformer Model.""" - def __init__(self, model_cfg, padded_vocab_size, - pre_process=True, post_process=True): + def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process=True): super(CLIPTextTransformer, self).__init__() self.output_dim = model_cfg.output_dim @@ -199,19 +195,14 @@ def __init__(self, model_cfg, padded_vocab_size, self.position_ids = torch.arange(model_cfg.max_position_embeddings).expand(1, -1).cuda() if self.post_process: - self.head = torch.nn.Linear( - model_cfg.hidden_size, - self.output_dim, - bias=False, - ) + self.head = torch.nn.Linear(model_cfg.hidden_size, self.output_dim, bias=False,) def set_input_tensor(self, input_tensor): """See megatron.model.transformer.set_input_tensor()""" self.language_model.set_input_tensor(input_tensor) def forward( - self, - input_ids, + self, input_ids, ): # input_ids: [b, s] # position_ids: [b, s] @@ -242,22 +233,16 @@ def forward( class CLIPModel(MegatronModule): """CLIP Model""" - def __init__(self, model_cfg, padded_vocab_size, - pre_process=True, post_process=True): + def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process=True): super(CLIPModel, self).__init__() self.pre_process = pre_process self.post_process = post_process self.vision_encoder = CLIPVisionTransformer( - model_cfg.vision, - pre_process=self.pre_process, - post_process=self.post_process, + model_cfg.vision, pre_process=self.pre_process, post_process=self.post_process, ) self.text_encoder = CLIPTextTransformer( - model_cfg.text, - padded_vocab_size, - pre_process=self.pre_process, - post_process=self.post_process, + model_cfg.text, padded_vocab_size, pre_process=self.pre_process, post_process=self.post_process, ) self.logit_scale = torch.nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) @@ -272,9 +257,7 @@ def forward(self, images, captions): text_features = self.text_encoder(captions) if self.post_process: - return F.normalize(image_features, dim=-1), \ - F.normalize(text_features, dim=-1), \ - self.logit_scale.exp() + return F.normalize(image_features, dim=-1), F.normalize(text_features, dim=-1), self.logit_scale.exp() return image_features, text_features @@ -553,10 +536,7 @@ def allreduce_sequence_parallel_gradients(self): buf.copy_(synced) def get_forward_output_and_loss_func(self): - loss_func = ClipLoss( - local_loss=self.cfg.local_loss, - gather_with_grad=self.cfg.gather_with_grad, - ) + loss_func = ClipLoss(local_loss=self.cfg.local_loss, gather_with_grad=self.cfg.gather_with_grad,) def fwd_output_and_loss_func(dataloader_iter, model): batch = next(dataloader_iter) @@ -596,8 +576,7 @@ def zero_shot_classifier(self): texts = texts.cuda(non_blocking=True) # TODO (yuya): distributed not working with torch.cuda.amp.autocast( - enabled=self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): class_embeddings = text_encoder(texts) class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) @@ -607,7 +586,6 @@ def zero_shot_classifier(self): return zeroshot_weights def zero_shot_eval(self): - def accuracy(output, target, topk=(1,)): pred = output.topk(max(topk), 1, True, True)[1].t() correct = pred.eq(target.view(1, -1).expand_as(pred)) @@ -625,7 +603,7 @@ def accuracy(output, target, topk=(1,)): else: vision_encoder = self.model.vision_encoder with torch.no_grad(): - top1, top5, n = 0., 0., 0. + top1, top5, n = 0.0, 0.0, 0.0 for images, target in tqdm(self.imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): if images is None or target is None: continue @@ -634,12 +612,11 @@ def accuracy(output, target, topk=(1,)): target = target.cuda(non_blocking=True) # predict with torch.cuda.amp.autocast( - enabled=self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): image_features = vision_encoder(images) image_features = F.normalize(image_features, dim=-1) - logits = 100. * image_features @ classifier + logits = 100.0 * image_features @ classifier # measure accuracy acc1, acc5 = accuracy(logits, target, topk=(1, 5)) @@ -648,8 +625,8 @@ def accuracy(output, target, topk=(1,)): n += images.size(0) logging.info('Finished zero-shot imagenet.') - top1 = (top1 / n) - top5 = (top5 / n) + top1 = top1 / n + top5 = top5 / n return top1, top5 def validation_step(self, dataloader_iter, batch_idx): @@ -703,9 +680,7 @@ def validation_epoch_end(self, outputs): self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True, batch_size=1) if parallel_state.is_pipeline_last_stage(): - averaged_metrics = torch.tensor( - [torch.stack(outputs).mean()], - dtype=torch.float32, device='cuda') + averaged_metrics = torch.tensor([torch.stack(outputs).mean()], dtype=torch.float32, device='cuda') else: averaged_metrics = torch.tensor([0.0], dtype=torch.float32, device='cuda') @@ -731,9 +706,7 @@ def build_train_valid_test_datasets(self): raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0), - tokenizer=self.tokenizer, + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), tokenizer=self.tokenizer, ) self._test_ds = None @@ -859,8 +832,7 @@ def setup_test_data(self, cfg): f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' ) self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, pin_memory=True, + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index ff28f6c1e93d..7e90d06035ef 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -11,30 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import os -import pytorch_lightning as pl -import torch from abc import ABC - from functools import partial +from typing import Any, Dict, Optional, Union + +import numpy as np +import pytorch_lightning as pl +import torch from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.utilities import GradClipAlgorithmType from torch._dynamo import optimize from torch._inductor import config as inductor_config from torch.optim.lr_scheduler import LambdaLR -from typing import Any, Dict, Optional, Union from nemo.collections.multimodal.data.dreambooth.dreambooth_dataset import DreamBoothDataset from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ - extract_into_tensor, noise_like +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists from nemo.collections.multimodal.parts.utils import randn_like -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, -) +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank from nemo.core.classes import ModelPT from nemo.core.classes.common import Serialization @@ -43,9 +44,9 @@ from nemo.utils.exp_manager import exp_manager try: - from apex.transformer.pipeline_parallel.utils import get_num_microbatches from apex import amp from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True except (ImportError, ModuleNotFoundError): @@ -61,6 +62,7 @@ HAVE_MEGATRON_CORE = False + def disabled_train(self, mode=True): """Overwrite model.train with this function to make sure train/eval mode does not change anymore.""" @@ -69,12 +71,8 @@ def disabled_train(self, mode=True): def _collate_fn(examples, with_prior_preservation=False): if with_prior_preservation: - prompts = [ - [example["instance_prompt"], example["reg_prompt"]] - for example in examples - ] - images = [example["instance_images"] for example in examples] + \ - [example["reg_images"] for example in examples] + prompts = [[example["instance_prompt"], example["reg_prompt"]] for example in examples] + images = [example["instance_images"] for example in examples] + [example["reg_images"] for example in examples] else: prompts = [[example["instance_prompt"]] for example in examples] images = [example["instance_images"] for example in examples] @@ -110,7 +108,7 @@ def __init__(self, cfg): self.get_noise_scheduler(self.cfg.noise_scheduler) self.model_type = None - self.rng = torch.Generator(device=torch.cuda.current_device(), ) + self.rng = torch.Generator(device=torch.cuda.current_device(),) def instantiate_unet(self, cfg): self.unet = DreamBooth.from_config_dict(cfg) @@ -192,7 +190,6 @@ def set_input_tensor(self, input_tensor): class MegatronDreamBooth(MegatronMultimodalModel): - def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( @@ -375,8 +372,7 @@ def process_batch(batch): # DB has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): images = images.cuda(non_blocking=True) diff --git a/nemo/collections/multimodal/models/dreambooth/util.py b/nemo/collections/multimodal/models/dreambooth/util.py index be20d9447505..8f8a142f99f3 100644 --- a/nemo/collections/multimodal/models/dreambooth/util.py +++ b/nemo/collections/multimodal/models/dreambooth/util.py @@ -11,14 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import partial + import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn -from functools import partial -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ - extract_into_tensor, noise_like +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists from nemo.core.classes.common import Serialization @@ -68,37 +72,49 @@ def apply_step(self, x, t, c_concat: list = None, c_crossattn: list = None): class sd_noise_scheduler(nn.Module): - def __init__(self, - parameterization='eps', - v_posterior=0, - given_betas=None, - beta_schedule='linear', - timesteps=1000, - linear_start=0.00085, - linear_end=0.012, - cosine_s=8e-3): + def __init__( + self, + parameterization='eps', + v_posterior=0, + given_betas=None, + beta_schedule='linear', + timesteps=1000, + linear_start=0.00085, + linear_end=0.012, + cosine_s=8e-3, + ): super().__init__() self.parameterization = parameterization self.v_posterior = v_posterior - self.register_schedule(given_betas=given_betas, - beta_schedule=beta_schedule, - timesteps=timesteps, - linear_start=linear_start, - linear_end=linear_end, - cosine_s=cosine_s) - - def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, - linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + self.register_schedule( + given_betas=given_betas, + beta_schedule=beta_schedule, + timesteps=timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s, + ) + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): if exists(given_betas): betas = given_betas else: - betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, - cosine_s=cosine_s) - alphas = 1. - betas + betas = make_beta_schedule( + beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s + ) + alphas = 1.0 - betas alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - timesteps, = betas.shape + (timesteps,) = betas.shape self.num_timesteps = int(timesteps) self.linear_start = linear_start self.linear_end = linear_end @@ -112,28 +128,32 @@ def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps= # calculations for diffusion q(x_t | x_{t-1}) and others self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( - 1. - alphas_cumprod) + self.v_posterior * betas + posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( + 1.0 - alphas_cumprod + ) + self.v_posterior * betas # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.register_buffer('posterior_variance', to_torch(posterior_variance)) # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer('posterior_mean_coef1', to_torch( - betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) - self.register_buffer('posterior_mean_coef2', to_torch( - (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + self.register_buffer( + 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) + ) + self.register_buffer( + 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) + ) if self.parameterization == "eps": lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + ) elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) else: raise NotImplementedError("mu not supported") # TODO how to choose this term @@ -143,5 +163,7 @@ def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps= def forward(self, x_start, t, noise=None): noise = default(noise, lambda: torch.randn_like(x_start)) - return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py index eb35b638ff8c..f2ef6488c8db 100644 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -15,23 +15,24 @@ https://github.com/timothybrooks/instruct-pix2pix/blob/2afcb7e45bd350765f21a58a0c135871e9dc5a78/stable_diffusion/ldm/models/diffusion/ddpm_edit.py """ +from contextlib import contextmanager +from functools import partial + import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn -from contextlib import contextmanager from einops import rearrange, repeat -from functools import partial from pytorch_lightning.utilities.distributed import rank_zero_only from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm from nemo.collections.multimodal.data.instruct_pix2pix.edit_dataset import EditDataset -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion, LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingSampler, MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, ) from nemo.utils import logging @@ -46,7 +47,6 @@ class LatentDiffusionEdit(LatentDiffusion): - def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): pl_sd = torch.load(path, map_location="cpu") if "state_dict" in list(pl_sd.keys()): @@ -88,8 +88,9 @@ def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): if k.startswith(ik): print("Deleting key {} from state_dict.".format(k)) del sd[k] - missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( - sd, strict=False) + missing, unexpected = ( + self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) + ) print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") if len(missing) > 0: print(f"Missing Keys: {missing}") @@ -97,8 +98,17 @@ def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): print(f"Unexpected Keys: {unexpected}") @torch.no_grad() - def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, - cond_key=None, return_original_cond=False, bs=None, uncond=0.05): + def get_input( + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + uncond=0.05, + ): x = batch[k] if bs is not None: x = x[:bs] @@ -118,8 +128,9 @@ def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=F input_mask = 1 - rearrange((random >= uncond).float() * (random < 3 * uncond).float(), "n -> n 1 1 1") null_prompt = self.get_learned_conditioning([""]) - cond["c_crossattn"] = torch.where(prompt_mask, null_prompt, - self.get_learned_conditioning(xc["c_crossattn"]).detach()) + cond["c_crossattn"] = torch.where( + prompt_mask, null_prompt, self.get_learned_conditioning(xc["c_crossattn"]).detach() + ) cond["c_concat"] = input_mask * self.encode_first_stage((xc["c_concat"].to(x.device))).mode().detach() out = [z, cond] @@ -132,7 +143,6 @@ def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=F class MegatronLatentDiffusionEdit(MegatronLatentDiffusion): - def model_provider_func(self, pre_process=True, post_process=True): """Model depends on pipeline paralellism.""" model = LatentDiffusionEdit(cfg=self.cfg) @@ -205,9 +215,7 @@ def setup_validation_data(self, cfg): if not self.cfg.get('validation_drop_last', True): logging.info(f'Drop last in validation dataset is set to False') drop_last = False - self._validation_dl = self.build_pretraining_data_loader( - self._validation_ds, consumed_samples, drop_last - ) + self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples, drop_last) def setup_test_data(self, cfg): if hasattr(self, '_test_ds') and self._test_ds is not None: diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index d7119f0c120d..ad13eb6c9064 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -17,6 +17,8 @@ import json import os import re +from typing import Any, Dict, Optional, Union + import torch from omegaconf import open_dict from omegaconf.dictconfig import DictConfig @@ -29,7 +31,6 @@ from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.migration import pl_legacy_patch from transformers import TRANSFORMERS_CACHE -from typing import Any, Union, Dict, Optional from nemo.collections.nlp.modules.common.megatron.clip_grads import ( clip_grad_norm_distributed_optimizer, @@ -37,8 +38,11 @@ ) from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer, get_tokenizer -from nemo.collections.nlp.parts.nlp_overrides import NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, GradScaler -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.nlp.parts.nlp_overrides import ( + NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, + GradScaler, + NLPSaveRestoreConnector, +) from nemo.core.classes import ModelPT from nemo.core.classes.exportable import Exportable from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler @@ -97,12 +101,12 @@ def is_model_parallel_initialized(self): @classmethod def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, ): """ Loads ModelPT from checkpoint, with some maintenance of restoration. @@ -271,7 +275,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False if hasattr(self._cfg, "tokenizer") or ( - hasattr(self._cfg, "encoder_tokenizer") and hasattr(self._cfg, "decoder_tokenizer") + hasattr(self._cfg, "encoder_tokenizer") and hasattr(self._cfg, "decoder_tokenizer") ): # build tokenizer (defaults to nemo supported tokenizers) self._build_tokenizer() @@ -448,14 +452,14 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus # TODO: Replace with newer override for scheduler.step() instead of # search for plugins for fp16 GradScalar if self.trainer.precision_plugin is not None and isinstance( - self.trainer.precision_plugin, NativeMixedPrecisionPlugin + self.trainer.precision_plugin, NativeMixedPrecisionPlugin ): precision_plugin = self.trainer.precision_plugin if ( - hasattr(precision_plugin, 'scaler') - and precision_plugin.scaler is not None - and isinstance(precision_plugin.scaler, GradScaler) + hasattr(precision_plugin, 'scaler') + and precision_plugin.scaler is not None + and isinstance(precision_plugin.scaler, GradScaler) ): grad_scaler = precision_plugin.scaler @@ -480,7 +484,7 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus grad_scaler.optimizer_update_skipped = None def setup_optimization( - self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, + self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() if self.with_distributed_adam: @@ -573,8 +577,8 @@ def configure_optimizers(self): def compute_consumed_samples(self, steps_since_resume=0): app_state = AppState() consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() + self.init_consumed_samples + + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() ) return int(consumed_samples) @@ -598,8 +602,8 @@ def _validate_config(self): self.cfg.sequence_parallel = False if ( - self.cfg.get('gradient_accumulation_fusion', False) - and self.cfg.get('pipeline_model_parallel_size', 1) == 1 + self.cfg.get('gradient_accumulation_fusion', False) + and self.cfg.get('pipeline_model_parallel_size', 1) == 1 ): logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") with open_dict(self.cfg): diff --git a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py index 6bda186f137d..c3ca34b35233 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py +++ b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -import torch from abc import ABC, abstractclassmethod from typing import Any, Optional +import torch + from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.core.classes import ModelPT from nemo.utils import logging @@ -71,8 +72,10 @@ def _extract_consumed_samples_from_ckpt(self, ckpt_path): def compute_consumed_samples(self, steps_since_resume=0): consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * self.trainer.world_size - * self.cfg.micro_batch_size * self.trainer.accumulate_grad_batches + self.init_consumed_samples + + steps_since_resume + * self.trainer.world_size + * self.cfg.micro_batch_size + * self.trainer.accumulate_grad_batches ) return int(consumed_samples) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py index 9db8d2271329..7ac641d4ec1f 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -11,35 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from contextlib import contextmanager + import pytorch_lightning as pl import torch import torch.nn.functional as F -from contextlib import contextmanager from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.model import Encoder, Decoder -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import \ - DiagonalGaussianDistribution +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.model import Decoder, Encoder +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config class VQModel(pl.LightningModule): - def __init__(self, - ddconfig, - lossconfig, - n_embed, - embed_dim, - ckpt_path=None, - ignore_keys=[], - image_key="image", - colorize_nlabels=None, - monitor=None, - batch_resize_range=None, - scheduler_config=None, - lr_g_factor=1.0, - remap=None, - sane_index_shape=False, # tell vector quantizer to return indices as bhw - ): + def __init__( + self, + ddconfig, + lossconfig, + n_embed, + embed_dim, + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + batch_resize_range=None, + scheduler_config=None, + lr_g_factor=1.0, + remap=None, + sane_index_shape=False, # tell vector quantizer to return indices as bhw + ): super().__init__() self.embed_dim = embed_dim self.n_embed = n_embed @@ -47,9 +50,7 @@ def __init__(self, self.encoder = Encoder(**ddconfig) self.decoder = Decoder(**ddconfig) self.loss = instantiate_from_config(lossconfig) - self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, - remap=remap, - sane_index_shape=sane_index_shape) + self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, remap=remap, sane_index_shape=sane_index_shape) self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) if colorize_nlabels is not None: @@ -153,17 +154,25 @@ def training_step(self, batch, batch_idx, optimizer_idx): if optimizer_idx == 0: # autoencode - aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, - last_layer=self.get_last_layer(), split="train", - predicted_indices=ind) + aeloss, log_dict_ae = self.loss( + qloss, + x, + xrec, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + predicted_indices=ind, + ) self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True) return aeloss if optimizer_idx == 1: # discriminator - discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, - last_layer=self.get_last_layer(), split="train") + discloss, log_dict_disc = self.loss( + qloss, x, xrec, optimizer_idx, self.global_step, last_layer=self.get_last_layer(), split="train" + ) self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True) return discloss @@ -176,24 +185,34 @@ def validation_step(self, batch, batch_idx): def _validation_step(self, batch, batch_idx, suffix=""): x = self.get_input(batch, self.image_key) xrec, qloss, ind = self(x, return_pred_indices=True) - aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0, - self.global_step, - last_layer=self.get_last_layer(), - split="val" + suffix, - predicted_indices=ind - ) - - discloss, log_dict_disc = self.loss(qloss, x, xrec, 1, - self.global_step, - last_layer=self.get_last_layer(), - split="val" + suffix, - predicted_indices=ind - ) + aeloss, log_dict_ae = self.loss( + qloss, + x, + xrec, + 0, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind, + ) + + discloss, log_dict_disc = self.loss( + qloss, + x, + xrec, + 1, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind, + ) rec_loss = log_dict_ae[f"val{suffix}/rec_loss"] - self.log(f"val{suffix}/rec_loss", rec_loss, - prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) - self.log(f"val{suffix}/aeloss", aeloss, - prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) + self.log( + f"val{suffix}/rec_loss", rec_loss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True + ) + self.log( + f"val{suffix}/aeloss", aeloss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True + ) if version.parse(pl.__version__) >= version.parse('1.4.0'): del log_dict_ae[f"val{suffix}/rec_loss"] self.log_dict(log_dict_ae) @@ -205,30 +224,24 @@ def configure_optimizers(self): lr_g = self.lr_g_factor * self.learning_rate print("lr_d", lr_d) print("lr_g", lr_g) - opt_ae = torch.optim.Adam(list(self.encoder.parameters()) + - list(self.decoder.parameters()) + - list(self.quantize.parameters()) + - list(self.quant_conv.parameters()) + - list(self.post_quant_conv.parameters()), - lr=lr_g, betas=(0.5, 0.9)) - opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), - lr=lr_d, betas=(0.5, 0.9)) + opt_ae = torch.optim.Adam( + list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quantize.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr_g, + betas=(0.5, 0.9), + ) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr_d, betas=(0.5, 0.9)) if self.scheduler_config is not None: scheduler = instantiate_from_config(self.scheduler_config) print("Setting up LambdaLR scheduler...") scheduler = [ - { - 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), - 'interval': 'step', - 'frequency': 1 - }, - { - 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), - 'interval': 'step', - 'frequency': 1 - }, + {'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, + {'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, ] return [opt_ae, opt_disc], scheduler return [opt_ae, opt_disc], [] @@ -254,7 +267,8 @@ def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): if plot_ema: with self.ema_scope(): xrec_ema, _ = self(x) - if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema) + if x.shape[1] > 3: + xrec_ema = self.to_rgb(xrec_ema) log["reconstructions_ema"] = xrec_ema return log @@ -263,7 +277,7 @@ def to_rgb(self, x): if not hasattr(self, "colorize"): self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) x = F.conv2d(x, weight=self.colorize) - x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 return x @@ -289,17 +303,18 @@ def decode(self, h, force_not_quantize=False): class AutoencoderKL(pl.LightningModule): - def __init__(self, - ddconfig, - embed_dim, - lossconfig=None, # TODO make it configurable - ckpt_path=None, - ignore_keys=[], - image_key="image", - colorize_nlabels=None, - monitor=None, - from_pretrained: str = None - ): + def __init__( + self, + ddconfig, + embed_dim, + lossconfig=None, # TODO make it configurable + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + from_pretrained: str = None, + ): super().__init__() self.image_key = image_key self.encoder = Encoder(**ddconfig) @@ -317,12 +332,14 @@ def __init__(self, if ckpt_path is not None: self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) from diffusers.modeling_utils import load_state_dict + if from_pretrained is not None: state_dict = load_state_dict(from_pretrained) self._load_pretrained_model(state_dict) def _state_key_mapping(self, state_dict: dict): import re + res_dict = {} key_list = state_dict.keys() key_str = " ".join(key_list) @@ -331,17 +348,22 @@ def _state_key_mapping(self, state_dict: dict): p2 = re.compile('decoder.up.[0-9]') up_blocks_count = int(len(re.findall(up_block_pattern, key_str)) / 2 + 1) for key_, val_ in state_dict.items(): - key_ = key_.replace("up_blocks", "up").replace("down_blocks", "down").replace('resnets', 'block') \ - .replace('mid_block', 'mid').replace("mid.block.", "mid.block_") \ - .replace('mid.attentions.0.key', 'mid.attn_1.k') \ - .replace('mid.attentions.0.query', 'mid.attn_1.q') \ - .replace('mid.attentions.0.value', 'mid.attn_1.v') \ - .replace('mid.attentions.0.group_norm', 'mid.attn_1.norm') \ - .replace('mid.attentions.0.proj_attn', 'mid.attn_1.proj_out') \ - .replace('upsamplers.0', 'upsample') \ - .replace('downsamplers.0', 'downsample') \ - .replace('conv_shortcut', 'nin_shortcut') \ + key_ = ( + key_.replace("up_blocks", "up") + .replace("down_blocks", "down") + .replace('resnets', 'block') + .replace('mid_block', 'mid') + .replace("mid.block.", "mid.block_") + .replace('mid.attentions.0.key', 'mid.attn_1.k') + .replace('mid.attentions.0.query', 'mid.attn_1.q') + .replace('mid.attentions.0.value', 'mid.attn_1.v') + .replace('mid.attentions.0.group_norm', 'mid.attn_1.norm') + .replace('mid.attentions.0.proj_attn', 'mid.attn_1.proj_out') + .replace('upsamplers.0', 'upsample') + .replace('downsamplers.0', 'downsample') + .replace('conv_shortcut', 'nin_shortcut') .replace('conv_norm_out', 'norm_out') + ) mid_list = re.findall(p1, key_) if len(mid_list) != 0: @@ -367,10 +389,7 @@ def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False): unexpected_keys = list(set(loaded_keys) - set(expected_keys)) def _find_mismatched_keys( - state_dict, - model_state_dict, - loaded_keys, - ignore_mismatched_sizes, + state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, ): mismatched_keys = [] if ignore_mismatched_sizes: @@ -378,8 +397,8 @@ def _find_mismatched_keys( model_key = checkpoint_key if ( - model_key in model_state_dict - and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape ): mismatched_keys.append( (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) @@ -390,10 +409,7 @@ def _find_mismatched_keys( if state_dict is not None: # Whole checkpoint mismatched_keys = _find_mismatched_keys( - state_dict, - model_state_dict, - original_loaded_keys, - ignore_mismatched_sizes, + state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, ) error_msgs = self._load_state_dict_into_model(state_dict) return missing_keys, unexpected_keys, mismatched_keys, error_msgs @@ -462,16 +478,30 @@ def training_step(self, batch, batch_idx, optimizer_idx): if optimizer_idx == 0: # train encoder+decoder+logvar - aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step, - last_layer=self.get_last_layer(), split="train") + aeloss, log_dict_ae = self.loss( + inputs, + reconstructions, + posterior, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + ) self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False) return aeloss if optimizer_idx == 1: # train the discriminator - discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step, - last_layer=self.get_last_layer(), split="train") + discloss, log_dict_disc = self.loss( + inputs, + reconstructions, + posterior, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + ) self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False) @@ -480,11 +510,13 @@ def training_step(self, batch, batch_idx, optimizer_idx): def validation_step(self, batch, batch_idx): inputs = self.get_input(batch, self.image_key) reconstructions, posterior = self(inputs) - aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, 0, self.global_step, - last_layer=self.get_last_layer(), split="val") + aeloss, log_dict_ae = self.loss( + inputs, reconstructions, posterior, 0, self.global_step, last_layer=self.get_last_layer(), split="val" + ) - discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, 1, self.global_step, - last_layer=self.get_last_layer(), split="val") + discloss, log_dict_disc = self.loss( + inputs, reconstructions, posterior, 1, self.global_step, last_layer=self.get_last_layer(), split="val" + ) self.log("val/rec_loss", log_dict_ae["val/rec_loss"]) self.log_dict(log_dict_ae) @@ -493,13 +525,15 @@ def validation_step(self, batch, batch_idx): def configure_optimizers(self): lr = self.learning_rate - opt_ae = torch.optim.Adam(list(self.encoder.parameters()) + - list(self.decoder.parameters()) + - list(self.quant_conv.parameters()) + - list(self.post_quant_conv.parameters()), - lr=lr, betas=(0.5, 0.9)) - opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), - lr=lr, betas=(0.5, 0.9)) + opt_ae = torch.optim.Adam( + list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr, + betas=(0.5, 0.9), + ) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr, betas=(0.5, 0.9)) return [opt_ae, opt_disc], [] def get_last_layer(self): @@ -527,7 +561,7 @@ def to_rgb(self, x): if not hasattr(self, "colorize"): self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) x = F.conv2d(x, weight=self.colorize) - x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 609a651f41fe..a535582850c2 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -12,37 +12,55 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools +from contextlib import contextmanager +from functools import partial +from typing import Any, Dict, Optional, Union + import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn -from contextlib import contextmanager from einops import rearrange, repeat -from functools import partial from omegaconf import DictConfig, OmegaConf, open_dict -from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning import Trainer +from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.utilities.distributed import rank_zero_only from torch._dynamo import optimize from torch._inductor import config as inductor_config from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm -from typing import Any, Dict, Optional, Union -from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import build_train_valid_datasets, \ - build_train_valid_precached_datasets +from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import ( + build_train_valid_datasets, + build_train_valid_precached_datasets, +) from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import VQModelInterface, IdentityFirstStage, \ - AutoencoderKL +from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import ( + AutoencoderKL, + IdentityFirstStage, + VQModelInterface, +) from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ - extract_into_tensor, noise_like -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import normal_kl, \ - DiagonalGaussianDistribution -from nemo.collections.multimodal.parts.stable_diffusion.utils import log_txt_as_img, exists, default, ismap, isimage, \ - mean_flat, count_params +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, + normal_kl, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import ( + count_params, + default, + exists, + isimage, + ismap, + log_txt_as_img, + mean_flat, +) from nemo.collections.multimodal.parts.utils import randn_like from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.parts.utils_funcs import get_last_rank @@ -50,9 +68,9 @@ from nemo.utils import logging try: - from apex.transformer.pipeline_parallel.utils import get_num_microbatches from apex import amp from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches HAVE_APEX = True except (ImportError, ModuleNotFoundError): @@ -68,9 +86,7 @@ HAVE_MEGATRON_CORE = False -__conditioning_keys__ = {'concat': 'c_concat', - 'crossattn': 'c_crossattn', - 'adm': 'y'} +__conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} def random_dropout(embeddings, drop_rate): @@ -82,9 +98,7 @@ def random_dropout(embeddings, drop_rate): drop_rate (float): Rate of dropping the embedding. """ nsamples = embeddings.shape[0] - zero_flag = torch.ones( - nsamples, 1, 1 - ).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.ones(nsamples, 1, 1).to(embeddings.dtype) * (1 - drop_rate) zero_flag = torch.bernoulli(zero_flag).cuda(non_blocking=True) embeddings = embeddings * zero_flag return embeddings @@ -101,7 +115,6 @@ def uniform_on_device(r1, r2, shape, device): class DDPM(torch.nn.Module): - def __init__(self, cfg): super().__init__() assert cfg.parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' @@ -122,8 +135,14 @@ def __init__(self, cfg): self.original_elbo_weight = cfg.original_elbo_weight self.l_simple_weight = cfg.l_simple_weight - self.register_schedule(given_betas=cfg.given_betas, beta_schedule=cfg.beta_schedule, timesteps=cfg.timesteps, - linear_start=cfg.linear_start, linear_end=cfg.linear_end, cosine_s=cfg.cosine_s) + self.register_schedule( + given_betas=cfg.given_betas, + beta_schedule=cfg.beta_schedule, + timesteps=cfg.timesteps, + linear_start=cfg.linear_start, + linear_end=cfg.linear_end, + cosine_s=cfg.cosine_s, + ) self.loss_type = cfg.loss_type @@ -132,20 +151,28 @@ def __init__(self, cfg): if self.learn_logvar: self.logvar = nn.Parameter(self.logvar, requires_grad=True) - self.rng = torch.Generator(device=torch.cuda.current_device(), ) - - def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, - linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + self.rng = torch.Generator(device=torch.cuda.current_device(),) + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): if exists(given_betas): betas = given_betas else: - betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, - cosine_s=cosine_s) - alphas = 1. - betas + betas = make_beta_schedule( + beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s + ) + alphas = 1.0 - betas alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - timesteps, = betas.shape + (timesteps,) = betas.shape self.num_timesteps = int(timesteps) self.linear_start = linear_start self.linear_end = linear_end @@ -159,28 +186,32 @@ def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps= # calculations for diffusion q(x_t | x_{t-1}) and others self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( - 1. - alphas_cumprod) + self.v_posterior * betas + posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( + 1.0 - alphas_cumprod + ) + self.v_posterior * betas # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.register_buffer('posterior_variance', to_torch(posterior_variance)) # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer('posterior_mean_coef1', to_torch( - betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) - self.register_buffer('posterior_mean_coef2', to_torch( - (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + self.register_buffer( + 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) + ) + self.register_buffer( + 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) + ) if self.parameterization == "eps": lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + ) elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) else: raise NotImplementedError("mu not supported") # TODO how to choose this term @@ -211,8 +242,9 @@ def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): if k.startswith(ik): logging.info("Deleting key {} from state_dict.".format(k)) del sd[k] - missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( - sd, strict=False) + missing, unexpected = ( + self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) + ) logging.info(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") if len(missing) > 0: logging.info(f"Missing Keys: {missing}") @@ -226,21 +258,21 @@ def q_mean_variance(self, x_start, t): :param t: the number of diffusion steps (minus 1). Here, 0 means one step. :return: A tuple (mean, variance, log_variance), all of x_start's shape. """ - mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) return mean, variance, log_variance def predict_start_from_noise(self, x_t, t, noise): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise ) def q_posterior(self, x_start, x_t, t): posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + - extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t ) posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) @@ -253,7 +285,7 @@ def p_mean_variance(self, x, t, clip_denoised: bool): elif self.parameterization == "x0": x_recon = model_out if clip_denoised: - x_recon.clamp_(-1., 1.) + x_recon.clamp_(-1.0, 1.0) model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) return model_mean, posterior_variance, posterior_log_variance @@ -274,8 +306,9 @@ def p_sample_loop(self, shape, return_intermediates=False): img = torch.randn(shape, generator=self.rng, device=device) intermediates = [img] for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): - img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long), - clip_denoised=self.clip_denoised) + img = self.p_sample( + img, torch.full((b,), i, device=device, dtype=torch.long), clip_denoised=self.clip_denoised + ) if i % self.log_every_t == 0 or i == self.num_timesteps - 1: intermediates.append(img) if return_intermediates: @@ -286,13 +319,16 @@ def p_sample_loop(self, shape, return_intermediates=False): def sample(self, batch_size=16, return_intermediates=False): image_size = self.image_size channels = self.channels - return self.p_sample_loop((batch_size, channels, image_size, image_size), - return_intermediates=return_intermediates) + return self.p_sample_loop( + (batch_size, channels, image_size, image_size), return_intermediates=return_intermediates + ) def q_sample(self, x_start, t, noise=None): noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) def get_loss(self, pred, target, mean=True): if self.loss_type == 'l1': @@ -451,10 +487,10 @@ def __init__(self, cfg): inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) self.model = optimize("inductor")(self.model) - def make_cond_schedule(self, ): + def make_cond_schedule(self,): self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() - self.cond_ids[:self.num_timesteps_cond] = ids + self.cond_ids[: self.num_timesteps_cond] = ids def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): # only for very first batch @@ -464,13 +500,19 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): encoder_posterior = self.encode_first_stage(x) z = self.get_first_stage_encoding(encoder_posterior).detach() del self.scale_factor - self.register_buffer('scale_factor', 1. / z.flatten().std()) + self.register_buffer('scale_factor', 1.0 / z.flatten().std()) logging.info(f"setting self.scale_factor to {self.scale_factor}") logging.info("### USING STD-RESCALING ###") - def register_schedule(self, - given_betas=None, beta_schedule="linear", timesteps=1000, - linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) self.shorten_cond_schedule = self.num_timesteps_cond > 1 @@ -508,8 +550,7 @@ def instantiate_cond_stage(self, config): def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): denoise_row = [] for zd in tqdm(samples, desc=desc): - denoise_row.append(self.decode_first_stage(zd, - force_not_quantize=force_no_decoder_quantization)) + denoise_row.append(self.decode_first_stage(zd, force_not_quantize=force_no_decoder_quantization)) n_imgs_per_row = len(denoise_row) denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') @@ -562,15 +603,18 @@ def delta_border(self, h, w): def get_weighting(self, h, w, Ly, Lx, device): weighting = self.delta_border(h, w) - weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"], - self.split_input_params["clip_max_weight"], ) + weighting = torch.clip( + weighting, self.split_input_params["clip_min_weight"], self.split_input_params["clip_max_weight"], + ) weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) if self.split_input_params["tie_braker"]: L_weighting = self.delta_border(Ly, Lx) - L_weighting = torch.clip(L_weighting, - self.split_input_params["clip_min_tie_weight"], - self.split_input_params["clip_max_tie_weight"]) + L_weighting = torch.clip( + L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"], + ) L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) weighting = weighting * L_weighting @@ -601,9 +645,12 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), - dilation=1, padding=0, - stride=(stride[0] * uf, stride[1] * uf)) + fold_params2 = dict( + kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, + padding=0, + stride=(stride[0] * uf, stride[1] * uf), + ) fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) @@ -614,9 +661,12 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df), - dilation=1, padding=0, - stride=(stride[0] // df, stride[1] // df)) + fold_params2 = dict( + kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, + padding=0, + stride=(stride[0] // df, stride[1] // df), + ) fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) @@ -629,8 +679,16 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once return fold, unfold, normalization, weighting @torch.no_grad() - def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, - cond_key=None, return_original_cond=False, bs=None): + def get_input( + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + ): if self.first_stage_key.endswith('encoded'): gaussian_parameters = batch[self.first_stage_key] encoder_posterior = DiagonalGaussianDistribution(gaussian_parameters) @@ -671,7 +729,7 @@ def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=F c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} if self.text_embedding_dropout_rate > 0: - assert (self.text_embedding_dropout_rate < 1.) + assert self.text_embedding_dropout_rate < 1.0 c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) else: @@ -696,7 +754,7 @@ def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) z = rearrange(z, 'b h w c -> b c h w').contiguous() - z = 1. / self.scale_factor * z + z = 1.0 / self.scale_factor * z if hasattr(self, "split_input_params"): if self.split_input_params["patch_distributed_vq"]: @@ -720,13 +778,15 @@ def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): # 2. apply model loop over last dim if isinstance(self.first_stage_model, VQModelInterface): - output_list = [self.first_stage_model.decode(z[:, :, :, :, i], - force_not_quantize=predict_cids or force_not_quantize) - for i in range(z.shape[-1])] + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] else: - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) o = o * weighting @@ -756,7 +816,7 @@ def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_qua z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) z = rearrange(z, 'b h w c -> b c h w').contiguous() - z = 1. / self.scale_factor * z + z = 1.0 / self.scale_factor * z if hasattr(self, "split_input_params"): if self.split_input_params["patch_distributed_vq"]: @@ -780,13 +840,15 @@ def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_qua # 2. apply model loop over last dim if isinstance(self.first_stage_model, VQModelInterface): - output_list = [self.first_stage_model.decode(z[:, :, :, :, i], - force_not_quantize=predict_cids or force_not_quantize) - for i in range(z.shape[-1])] + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] else: - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) o = o * weighting @@ -830,8 +892,7 @@ def encode_first_stage(self, x): # Reshape to img shape z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) o = o * weighting @@ -902,11 +963,13 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] - if self.cond_stage_key in ["image", "LR_image", "segmentation", - 'bbox_img'] and self.model.conditioning_key: # todo check for completeness + if ( + self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] + and self.model.conditioning_key + ): # todo check for completeness c_key = next(iter(cond.keys())) # get key c = next(iter(cond.values())) # get value - assert (len(c) == 1) # todo extend to list with more than one elem + assert len(c) == 1 # todo extend to list with more than one elem c = c[0] # get element c = unfold(c) @@ -915,7 +978,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] elif self.cond_stage_key == 'coordinates_bbox': - assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size' + assert ( + 'original_image_size' in self.split_input_params + ), 'BoudingBoxRescaling is missing original_image_size' # assuming padding of unfold is always 0 and its dilation is always 1 n_patches_per_row = int((w - ks[0]) / stride[0] + 1) @@ -927,19 +992,25 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): # get top left postions of patches as conforming for the bbbox tokenizer, therefore we # need to rescale the tl patch coordinates to be in between (0,1) - tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, - rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h) - for patch_nr in range(z.shape[-1])] + tl_patch_coordinates = [ + ( + rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h, + ) + for patch_nr in range(z.shape[-1]) + ] # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) - patch_limits = [(x_tl, y_tl, - rescale_latent * ks[0] / full_img_w, - rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates] + patch_limits = [ + (x_tl, y_tl, rescale_latent * ks[0] / full_img_w, rescale_latent * ks[1] / full_img_h) + for x_tl, y_tl in tl_patch_coordinates + ] # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] # tokenize crop coordinates for the bounding boxes of the respective patches - patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None] - for bbox in patch_limits] # list of length l with tensors of shape (1, 2) + patch_limits_tknzd = [ + torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None] for bbox in patch_limits + ] # list of length l with tensors of shape (1, 2) logging.info(patch_limits_tknzd[0].shape) # cut tknzd crop position from conditioning assert isinstance(cond, dict), 'cond must be dict to be fed into model' @@ -961,8 +1032,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): # apply model by loop over crops output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] - assert not isinstance(output_list[0], - tuple) # todo cant deal with multiple model outputs check this never happens + assert not isinstance( + output_list[0], tuple + ) # todo cant deal with multiple model outputs check this never happens o = torch.stack(output_list, axis=-1) o = o * weighting @@ -980,8 +1052,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): return x_recon def _predict_eps_from_xstart(self, x_t, t, pred_xstart): - return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \ - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart + ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) def _prior_bpd(self, x_start): """ @@ -1027,13 +1100,23 @@ def p_losses(self, x_start, cond, t, noise=None): loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) - loss += (self.original_elbo_weight * loss_vlb) + loss += self.original_elbo_weight * loss_vlb loss_dict.update({f'{prefix}/loss': loss}) return loss, loss_dict - def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False, - return_x0=False, score_corrector=None, corrector_kwargs=None): + def p_mean_variance( + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, + ): t_in = t model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) @@ -1052,7 +1135,7 @@ def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=Fals raise NotImplementedError() if clip_denoised: - x_recon.clamp_(-1., 1.) + x_recon.clamp_(-1.0, 1.0) if quantize_denoised: x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) @@ -1064,15 +1147,33 @@ def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=Fals return model_mean, posterior_variance, posterior_log_variance @torch.no_grad() - def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, - return_codebook_ids=False, quantize_denoised=False, return_x0=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None): + def p_sample( + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + ): b, *_, device = *x.shape, x.device - outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised, - return_codebook_ids=return_codebook_ids, - quantize_denoised=quantize_denoised, - return_x0=return_x0, - score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + outputs = self.p_mean_variance( + x=x, + c=c, + t=t, + clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) if return_codebook_ids: raise DeprecationWarning("Support dropped.") model_mean, _, model_log_variance, logits = outputs @@ -1082,7 +1183,7 @@ def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, model_mean, _, model_log_variance = outputs noise = noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: + if noise_dropout > 0.0: noise = torch.nn.functional.dropout(noise, p=noise_dropout) # no noise when t == 0 nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) @@ -1095,10 +1196,25 @@ def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise @torch.no_grad() - def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False, - img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0., - score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None, - log_every_t=None): + def progressive_denoising( + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, + ): if not log_every_t: log_every_t = self.log_every_t timesteps = self.num_timesteps @@ -1114,16 +1230,22 @@ def progressive_denoising(self, cond, shape, verbose=True, callback=None, quanti intermediates = [] if cond is not None: if isinstance(cond, dict): - cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else - list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } else: cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] if start_T is not None: timesteps = min(timesteps, start_T) - iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', - total=timesteps) if verbose else reversed( - range(0, timesteps)) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) if type(temperature) == float: temperature = [temperature] * timesteps @@ -1134,27 +1256,48 @@ def progressive_denoising(self, cond, shape, verbose=True, callback=None, quanti tc = self.cond_ids[ts].to(cond.device) cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) - img, x0_partial = self.p_sample(img, cond, ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised, return_x0=True, - temperature=temperature[i], noise_dropout=noise_dropout, - score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + img, x0_partial = self.p_sample( + img, + cond, + ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, + return_x0=True, + temperature=temperature[i], + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) if mask is not None: assert x0 is not None img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1. - mask) * img + img = img_orig * mask + (1.0 - mask) * img if i % log_every_t == 0 or i == timesteps - 1: intermediates.append(x0_partial) - if callback: callback(i) - if img_callback: img_callback(img, i) + if callback: + callback(i) + if img_callback: + img_callback(img, i) return img, intermediates @torch.no_grad() - def p_sample_loop(self, cond, shape, return_intermediates=False, - x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False, - mask=None, x0=None, img_callback=None, start_T=None, - log_every_t=None): + def p_sample_loop( + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, + ): if not log_every_t: log_every_t = self.log_every_t @@ -1171,8 +1314,11 @@ def p_sample_loop(self, cond, shape, return_intermediates=False, if start_T is not None: timesteps = min(timesteps, start_T) - iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed( - range(0, timesteps)) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) if mask is not None: assert x0 is not None @@ -1185,39 +1331,60 @@ def p_sample_loop(self, cond, shape, return_intermediates=False, tc = self.cond_ids[ts].to(cond.device) cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) - img = self.p_sample(img, cond, ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised) + img = self.p_sample(img, cond, ts, clip_denoised=self.clip_denoised, quantize_denoised=quantize_denoised) if mask is not None: img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1. - mask) * img + img = img_orig * mask + (1.0 - mask) * img if i % log_every_t == 0 or i == timesteps - 1: intermediates.append(img) - if callback: callback(i) - if img_callback: img_callback(img, i) + if callback: + callback(i) + if img_callback: + img_callback(img, i) if return_intermediates: return img, intermediates return img @torch.no_grad() - def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None, - verbose=True, timesteps=None, quantize_denoised=False, - mask=None, x0=None, shape=None, **kwargs): + def sample( + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, + ): if shape is None: shape = (batch_size, self.channels, self.image_size, self.image_size) if cond is not None: if isinstance(cond, dict): - cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else - list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } else: cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - return self.p_sample_loop(cond, - shape, - return_intermediates=return_intermediates, x_T=x_T, - verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised, - mask=mask, x0=x0) + return self.p_sample_loop( + cond, + shape, + return_intermediates=return_intermediates, + x_T=x_T, + verbose=verbose, + timesteps=timesteps, + quantize_denoised=quantize_denoised, + mask=mask, + x0=x0, + ) @torch.no_grad() def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): @@ -1225,28 +1392,42 @@ def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): if ddim: ddim_sampler = DDIMSampler(self) shape = (self.channels, self.image_size, self.image_size) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, - shape, cond, verbose=False, **kwargs) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) else: - samples, intermediates = self.sample(cond=cond, batch_size=batch_size, - return_intermediates=True, **kwargs) + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, return_intermediates=True, **kwargs) return samples, intermediates @torch.no_grad() - def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None, - quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, - plot_diffusion_rows=True, **kwargs): + def log_images( + self, + batch, + N=8, + n_row=4, + sample=True, + ddim_steps=200, + ddim_eta=1.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=True, + **kwargs, + ): use_ddim = ddim_steps is not None log = dict() - z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, - return_first_stage_outputs=True, - force_c_encode=True, - return_original_cond=True, - bs=N) + z, c, x, xrec, xc = self.get_input( + batch, + self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N, + ) N = min(x.shape[0], N) n_row = min(x.shape[0], n_row) log["inputs"] = x @@ -1287,8 +1468,9 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= if sample: # get denoise row with self.ema_scope("Plotting"): - samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta) + samples, z_denoise_row = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, ddim_steps=ddim_steps, eta=ddim_eta + ) # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) x_samples = self.decode_first_stage(samples) log["samples"] = x_samples @@ -1296,13 +1478,21 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= denoise_grid = self._get_denoise_row_from_list(z_denoise_row) log["denoise_row"] = denoise_grid - if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance( - self.first_stage_model, IdentityFirstStage): + if ( + quantize_denoised + and not isinstance(self.first_stage_model, AutoencoderKL) + and not isinstance(self.first_stage_model, IdentityFirstStage) + ): # also display when quantizing x0 while sampling with self.ema_scope("Plotting Quantized Denoised"): - samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta, - quantize_denoised=True) + samples, z_denoise_row = self.sample_log( + cond=c, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + quantize_denoised=True, + ) # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, # quantize_denoised=True) x_samples = self.decode_first_stage(samples) @@ -1313,27 +1503,29 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= b, h, w = z.shape[0], z.shape[2], z.shape[3] mask = torch.ones(N, h, w) # zeros will be filled in - mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0. + mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 mask = mask[:, None, ...] with self.ema_scope("Plotting Inpaint"): - samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, - ddim_steps=ddim_steps, x0=z[:N], mask=mask) + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) x_samples = self.decode_first_stage(samples) log["samples_inpainting"] = x_samples log["mask"] = mask # outpaint with self.ema_scope("Plotting Outpaint"): - samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, - ddim_steps=ddim_steps, x0=z[:N], mask=mask) + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) x_samples = self.decode_first_stage(samples) log["samples_outpainting"] = x_samples if plot_progressive_rows: with self.ema_scope("Plotting Progressives"): - img, progressives = self.progressive_denoising(c, - shape=(self.channels, self.image_size, self.image_size), - batch_size=N) + img, progressives = self.progressive_denoising( + c, shape=(self.channels, self.image_size, self.image_size), batch_size=N + ) prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") log["progressive_row"] = prog_row @@ -1360,7 +1552,7 @@ def to_rgb(self, x): if not hasattr(self, "colorize"): self.colorize = torch.randn(3, x.shape[1], 1, 1, generator=self.rng).to(x) x = nn.functional.conv2d(x, weight=self.colorize) - x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 return x def set_input_tensor(self, input_tensor): @@ -1425,9 +1617,8 @@ def forward(self, x, c, *args, **kwargs): @torch.no_grad() def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: - assert self.cfg.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' - batch[self.cfg.first_stage_key] = \ - batch[self.cfg.first_stage_key].cuda(non_blocking=True) + assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) self.model.on_train_batch_start(batch, batch_idx) def training_step(self, dataloader_iter, batch_idx): @@ -1500,8 +1691,7 @@ def training_step(self, dataloader_iter, batch_idx): if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) - self.log_dict(loss_dict, prog_bar=False, - logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) @@ -1541,22 +1731,19 @@ def _append_sequence_parallel_module_grads(self, module, grads): grads.append(grad.data) def get_forward_output_and_loss_func(self): - def process_batch(batch): """ Prepares the global batch for apex fwd/bwd functions. Global batch is a list of micro batches. """ # noise_map, condition - batch[self.cfg.first_stage_key] = \ - batch[self.cfg.first_stage_key].cuda(non_blocking=True) + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): # in the case of precached text embeddings, cond_stage is also a tensor batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) # SD has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): x, c = self.model.get_input(batch, self.cfg.first_stage_key) @@ -1677,13 +1864,11 @@ def build_train_valid_test_datasets(self): if self.cfg.first_stage_key.endswith("encoded"): self._train_ds, self._validation_ds = build_train_valid_precached_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0), + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), ) else: self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0) + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) ) self._test_ds = None @@ -1733,8 +1918,7 @@ def setup_test_data(self, cfg): f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' ) self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, pin_memory=True, + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py index 4f0f495aaf1b..4e7460115206 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py @@ -19,15 +19,16 @@ -- merci """ +from contextlib import contextmanager +from functools import partial + import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn from apex import amp from apex.contrib.clip_grad import clip_grad_norm_ -from contextlib import contextmanager from einops import rearrange, repeat -from functools import partial from omegaconf import open_dict from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.distributed import rank_zero_only @@ -39,24 +40,39 @@ from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import VQModelInterface, IdentityFirstStage, \ - AutoencoderKL +from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import ( + AutoencoderKL, + IdentityFirstStage, + VQModelInterface, +) from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import random_dropout -from nemo.collections.multimodal.models.stable_diffusion.ldm_config import DDPMDiffusionModelConfig, \ - LatentDiffusionModelConfig +from nemo.collections.multimodal.models.stable_diffusion.ldm_config import ( + DDPMDiffusionModelConfig, + LatentDiffusionModelConfig, +) from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_beta_schedule, \ - extract_into_tensor, noise_like -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import normal_kl, \ - DiagonalGaussianDistribution -from nemo.collections.multimodal.parts.stable_diffusion.utils import log_txt_as_img, exists, default, ismap, isimage, \ - mean_flat, count_params +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, + normal_kl, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import ( + count_params, + default, + exists, + isimage, + ismap, + log_txt_as_img, + mean_flat, +) from nemo.core.classes.common import Serialization from nemo.utils import logging -__conditioning_keys__ = {'concat': 'c_concat', - 'crossattn': 'c_crossattn', - 'adm': 'y'} +__conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} def disabled_train(self, mode=True): @@ -122,8 +138,14 @@ def __init__(self, cfg: DDPMDiffusionModelConfig, trainer=None): # if cfg.ckpt_path is not None: # self.init_from_ckpt(cfg.ckpt_path, ignore_keys=cfg.ignore_keys, only_model=cfg.load_only_unet) - self.register_schedule(given_betas=cfg.given_betas, beta_schedule=cfg.beta_schedule, timesteps=cfg.timesteps, - linear_start=cfg.linear_start, linear_end=cfg.linear_end, cosine_s=cfg.cosine_s) + self.register_schedule( + given_betas=cfg.given_betas, + beta_schedule=cfg.beta_schedule, + timesteps=cfg.timesteps, + linear_start=cfg.linear_start, + linear_end=cfg.linear_end, + cosine_s=cfg.cosine_s, + ) self.loss_type = cfg.loss_type @@ -133,18 +155,26 @@ def __init__(self, cfg: DDPMDiffusionModelConfig, trainer=None): self.logvar = nn.Parameter(self.logvar, requires_grad=True) self.learning_rate = cfg.learning_rate - def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, - linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): if exists(given_betas): betas = given_betas else: - betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, - cosine_s=cosine_s) - alphas = 1. - betas + betas = make_beta_schedule( + beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s + ) + alphas = 1.0 - betas alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - timesteps, = betas.shape + (timesteps,) = betas.shape self.num_timesteps = int(timesteps) self.linear_start = linear_start self.linear_end = linear_end @@ -158,28 +188,32 @@ def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps= # calculations for diffusion q(x_t | x_{t-1}) and others self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( - 1. - alphas_cumprod) + self.v_posterior * betas + posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( + 1.0 - alphas_cumprod + ) + self.v_posterior * betas # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.register_buffer('posterior_variance', to_torch(posterior_variance)) # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer('posterior_mean_coef1', to_torch( - betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) - self.register_buffer('posterior_mean_coef2', to_torch( - (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + self.register_buffer( + 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) + ) + self.register_buffer( + 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) + ) if self.parameterization == "eps": lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + ) elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) else: raise NotImplementedError("mu not supported") # TODO how to choose this term @@ -212,8 +246,9 @@ def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): if k.startswith(ik): print("Deleting key {} from state_dict.".format(k)) del sd[k] - missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( - sd, strict=False) + missing, unexpected = ( + self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) + ) print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") if len(missing) > 0: print(f"Missing Keys: {missing}") @@ -227,21 +262,21 @@ def q_mean_variance(self, x_start, t): :param t: the number of diffusion steps (minus 1). Here, 0 means one step. :return: A tuple (mean, variance, log_variance), all of x_start's shape. """ - mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) return mean, variance, log_variance def predict_start_from_noise(self, x_t, t, noise): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise ) def q_posterior(self, x_start, x_t, t): posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + - extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t ) posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) @@ -254,7 +289,7 @@ def p_mean_variance(self, x, t, clip_denoised: bool): elif self.parameterization == "x0": x_recon = model_out if clip_denoised: - x_recon.clamp_(-1., 1.) + x_recon.clamp_(-1.0, 1.0) model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) return model_mean, posterior_variance, posterior_log_variance @@ -275,8 +310,9 @@ def p_sample_loop(self, shape, return_intermediates=False): img = torch.randn(shape, device=device) intermediates = [img] for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): - img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long), - clip_denoised=self.clip_denoised) + img = self.p_sample( + img, torch.full((b,), i, device=device, dtype=torch.long), clip_denoised=self.clip_denoised + ) if i % self.log_every_t == 0 or i == self.num_timesteps - 1: intermediates.append(img) if return_intermediates: @@ -287,13 +323,16 @@ def p_sample_loop(self, shape, return_intermediates=False): def sample(self, batch_size=16, return_intermediates=False): image_size = self.image_size channels = self.channels - return self.p_sample_loop((batch_size, channels, image_size, image_size), - return_intermediates=return_intermediates) + return self.p_sample_loop( + (batch_size, channels, image_size, image_size), return_intermediates=return_intermediates + ) def q_sample(self, x_start, t, noise=None): noise = default(noise, lambda: torch.randn_like(x_start)) - return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) def get_loss(self, pred, target, mean=True): if self.use_fp16: @@ -366,11 +405,9 @@ def shared_step(self, batch): def training_step(self, batch, batch_idx): loss, loss_dict = self.shared_step(batch) - self.log_dict(loss_dict, prog_bar=False, - logger=True, on_step=True, on_epoch=True) + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, on_epoch=True) - self.log("global_step", self.global_step, - prog_bar=True, logger=True, on_step=True, on_epoch=False) + self.log("global_step", self.global_step, prog_bar=True, logger=True, on_step=True, on_epoch=False) if self.use_scheduler: lr = self.optimizers().param_groups[0]['lr'] @@ -499,17 +536,23 @@ def __init__(self, cfg: LatentDiffusionModelConfig, trainer=None): inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) self.model = optimize("inductor")(self.model) - def make_cond_schedule(self, ): + def make_cond_schedule(self,): self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() - self.cond_ids[:self.num_timesteps_cond] = ids + self.cond_ids[: self.num_timesteps_cond] = ids @rank_zero_only @torch.no_grad() def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): # only for very first batch - if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt: - assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' + if ( + self.scale_by_std + and self.current_epoch == 0 + and self.global_step == 0 + and batch_idx == 0 + and not self.restarted_from_ckpt + ): + assert self.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' # set rescale weight to 1./std of encodings print("### USING STD-RESCALING ###") x = super().get_input(batch, self.first_stage_key) @@ -517,13 +560,19 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): encoder_posterior = self.encode_first_stage(x) z = self.get_first_stage_encoding(encoder_posterior).detach() del self.scale_factor - self.register_buffer('scale_factor', 1. / z.flatten().std()) + self.register_buffer('scale_factor', 1.0 / z.flatten().std()) print(f"setting self.scale_factor to {self.scale_factor}") print("### USING STD-RESCALING ###") - def register_schedule(self, - given_betas=None, beta_schedule="linear", timesteps=1000, - linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) self.shorten_cond_schedule = self.num_timesteps_cond > 1 @@ -561,8 +610,9 @@ def instantiate_cond_stage(self, config): def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): denoise_row = [] for zd in tqdm(samples, desc=desc): - denoise_row.append(self.decode_first_stage(zd.to(self.device), - force_not_quantize=force_no_decoder_quantization)) + denoise_row.append( + self.decode_first_stage(zd.to(self.device), force_not_quantize=force_no_decoder_quantization) + ) n_imgs_per_row = len(denoise_row) denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') @@ -615,15 +665,18 @@ def delta_border(self, h, w): def get_weighting(self, h, w, Ly, Lx, device): weighting = self.delta_border(h, w) - weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"], - self.split_input_params["clip_max_weight"], ) + weighting = torch.clip( + weighting, self.split_input_params["clip_min_weight"], self.split_input_params["clip_max_weight"], + ) weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) if self.split_input_params["tie_braker"]: L_weighting = self.delta_border(Ly, Lx) - L_weighting = torch.clip(L_weighting, - self.split_input_params["clip_min_tie_weight"], - self.split_input_params["clip_max_tie_weight"]) + L_weighting = torch.clip( + L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"], + ) L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) weighting = weighting * L_weighting @@ -654,9 +707,12 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), - dilation=1, padding=0, - stride=(stride[0] * uf, stride[1] * uf)) + fold_params2 = dict( + kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, + padding=0, + stride=(stride[0] * uf, stride[1] * uf), + ) fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) @@ -667,9 +723,12 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df), - dilation=1, padding=0, - stride=(stride[0] // df, stride[1] // df)) + fold_params2 = dict( + kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, + padding=0, + stride=(stride[0] // df, stride[1] // df), + ) fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) @@ -682,8 +741,16 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once return fold, unfold, normalization, weighting @torch.no_grad() - def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, - cond_key=None, return_original_cond=False, bs=None): + def get_input( + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + ): x = super().get_input(batch, k) if bs is not None: x = x[:bs] @@ -720,7 +787,7 @@ def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=F c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} if self.text_embedding_dropout_rate > 0: - assert (self.text_embedding_dropout_rate < 1.) + assert self.text_embedding_dropout_rate < 1.0 c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) else: @@ -745,7 +812,7 @@ def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) z = rearrange(z, 'b h w c -> b c h w').contiguous() - z = 1. / self.scale_factor * z + z = 1.0 / self.scale_factor * z if hasattr(self, "split_input_params"): if self.split_input_params["patch_distributed_vq"]: @@ -769,13 +836,15 @@ def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): # 2. apply model loop over last dim if isinstance(self.first_stage_model, VQModelInterface): - output_list = [self.first_stage_model.decode(z[:, :, :, :, i], - force_not_quantize=predict_cids or force_not_quantize) - for i in range(z.shape[-1])] + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] else: - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) o = o * weighting @@ -805,7 +874,7 @@ def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_qua z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) z = rearrange(z, 'b h w c -> b c h w').contiguous() - z = 1. / self.scale_factor * z + z = 1.0 / self.scale_factor * z if hasattr(self, "split_input_params"): if self.split_input_params["patch_distributed_vq"]: @@ -829,13 +898,15 @@ def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_qua # 2. apply model loop over last dim if isinstance(self.first_stage_model, VQModelInterface): - output_list = [self.first_stage_model.decode(z[:, :, :, :, i], - force_not_quantize=predict_cids or force_not_quantize) - for i in range(z.shape[-1])] + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] else: - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) o = o * weighting @@ -879,8 +950,7 @@ def encode_first_stage(self, x): # Reshape to img shape z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) - for i in range(z.shape[-1])] + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) for i in range(z.shape[-1])] o = torch.stack(output_list, axis=-1) o = o * weighting @@ -949,11 +1019,13 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] - if self.cond_stage_key in ["image", "LR_image", "segmentation", - 'bbox_img'] and self.model.conditioning_key: # todo check for completeness + if ( + self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] + and self.model.conditioning_key + ): # todo check for completeness c_key = next(iter(cond.keys())) # get key c = next(iter(cond.values())) # get value - assert (len(c) == 1) # todo extend to list with more than one elem + assert len(c) == 1 # todo extend to list with more than one elem c = c[0] # get element c = unfold(c) @@ -962,7 +1034,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] elif self.cond_stage_key == 'coordinates_bbox': - assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size' + assert ( + 'original_image_size' in self.split_input_params + ), 'BoudingBoxRescaling is missing original_image_size' # assuming padding of unfold is always 0 and its dilation is always 1 n_patches_per_row = int((w - ks[0]) / stride[0] + 1) @@ -974,19 +1048,26 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): # get top left postions of patches as conforming for the bbbox tokenizer, therefore we # need to rescale the tl patch coordinates to be in between (0,1) - tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, - rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h) - for patch_nr in range(z.shape[-1])] + tl_patch_coordinates = [ + ( + rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h, + ) + for patch_nr in range(z.shape[-1]) + ] # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) - patch_limits = [(x_tl, y_tl, - rescale_latent * ks[0] / full_img_w, - rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates] + patch_limits = [ + (x_tl, y_tl, rescale_latent * ks[0] / full_img_w, rescale_latent * ks[1] / full_img_h) + for x_tl, y_tl in tl_patch_coordinates + ] # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] # tokenize crop coordinates for the bounding boxes of the respective patches - patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device) - for bbox in patch_limits] # list of length l with tensors of shape (1, 2) + patch_limits_tknzd = [ + torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device) + for bbox in patch_limits + ] # list of length l with tensors of shape (1, 2) print(patch_limits_tknzd[0].shape) # cut tknzd crop position from conditioning assert isinstance(cond, dict), 'cond must be dict to be fed into model' @@ -1008,8 +1089,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): # apply model by loop over crops output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] - assert not isinstance(output_list[0], - tuple) # todo cant deal with multiple model outputs check this never happens + assert not isinstance( + output_list[0], tuple + ) # todo cant deal with multiple model outputs check this never happens o = torch.stack(output_list, axis=-1) o = o * weighting @@ -1027,8 +1109,9 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): return x_recon def _predict_eps_from_xstart(self, x_t, t, pred_xstart): - return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \ - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart + ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) def _prior_bpd(self, x_start): """ @@ -1074,13 +1157,23 @@ def p_losses(self, x_start, cond, t, noise=None): loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) - loss += (self.original_elbo_weight * loss_vlb) + loss += self.original_elbo_weight * loss_vlb loss_dict.update({f'{prefix}/loss': loss}) return loss, loss_dict - def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False, - return_x0=False, score_corrector=None, corrector_kwargs=None): + def p_mean_variance( + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, + ): t_in = t model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) @@ -1099,7 +1192,7 @@ def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=Fals raise NotImplementedError() if clip_denoised: - x_recon.clamp_(-1., 1.) + x_recon.clamp_(-1.0, 1.0) if quantize_denoised: x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) @@ -1111,15 +1204,33 @@ def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=Fals return model_mean, posterior_variance, posterior_log_variance @torch.no_grad() - def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, - return_codebook_ids=False, quantize_denoised=False, return_x0=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None): + def p_sample( + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + ): b, *_, device = *x.shape, x.device - outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised, - return_codebook_ids=return_codebook_ids, - quantize_denoised=quantize_denoised, - return_x0=return_x0, - score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + outputs = self.p_mean_variance( + x=x, + c=c, + t=t, + clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) if return_codebook_ids: raise DeprecationWarning("Support dropped.") model_mean, _, model_log_variance, logits = outputs @@ -1129,7 +1240,7 @@ def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, model_mean, _, model_log_variance = outputs noise = noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: + if noise_dropout > 0.0: noise = torch.nn.functional.dropout(noise, p=noise_dropout) # no noise when t == 0 nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) @@ -1142,10 +1253,25 @@ def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise @torch.no_grad() - def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False, - img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0., - score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None, - log_every_t=None): + def progressive_denoising( + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, + ): if not log_every_t: log_every_t = self.log_every_t timesteps = self.num_timesteps @@ -1161,16 +1287,22 @@ def progressive_denoising(self, cond, shape, verbose=True, callback=None, quanti intermediates = [] if cond is not None: if isinstance(cond, dict): - cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else - list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } else: cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] if start_T is not None: timesteps = min(timesteps, start_T) - iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', - total=timesteps) if verbose else reversed( - range(0, timesteps)) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) if type(temperature) == float: temperature = [temperature] * timesteps @@ -1181,27 +1313,48 @@ def progressive_denoising(self, cond, shape, verbose=True, callback=None, quanti tc = self.cond_ids[ts].to(cond.device) cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) - img, x0_partial = self.p_sample(img, cond, ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised, return_x0=True, - temperature=temperature[i], noise_dropout=noise_dropout, - score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + img, x0_partial = self.p_sample( + img, + cond, + ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, + return_x0=True, + temperature=temperature[i], + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) if mask is not None: assert x0 is not None img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1. - mask) * img + img = img_orig * mask + (1.0 - mask) * img if i % log_every_t == 0 or i == timesteps - 1: intermediates.append(x0_partial) - if callback: callback(i) - if img_callback: img_callback(img, i) + if callback: + callback(i) + if img_callback: + img_callback(img, i) return img, intermediates @torch.no_grad() - def p_sample_loop(self, cond, shape, return_intermediates=False, - x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False, - mask=None, x0=None, img_callback=None, start_T=None, - log_every_t=None): + def p_sample_loop( + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, + ): if not log_every_t: log_every_t = self.log_every_t @@ -1218,8 +1371,11 @@ def p_sample_loop(self, cond, shape, return_intermediates=False, if start_T is not None: timesteps = min(timesteps, start_T) - iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed( - range(0, timesteps)) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) if mask is not None: assert x0 is not None @@ -1232,39 +1388,60 @@ def p_sample_loop(self, cond, shape, return_intermediates=False, tc = self.cond_ids[ts].to(cond.device) cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) - img = self.p_sample(img, cond, ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised) + img = self.p_sample(img, cond, ts, clip_denoised=self.clip_denoised, quantize_denoised=quantize_denoised) if mask is not None: img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1. - mask) * img + img = img_orig * mask + (1.0 - mask) * img if i % log_every_t == 0 or i == timesteps - 1: intermediates.append(img) - if callback: callback(i) - if img_callback: img_callback(img, i) + if callback: + callback(i) + if img_callback: + img_callback(img, i) if return_intermediates: return img, intermediates return img @torch.no_grad() - def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None, - verbose=True, timesteps=None, quantize_denoised=False, - mask=None, x0=None, shape=None, **kwargs): + def sample( + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, + ): if shape is None: shape = (batch_size, self.channels, self.image_size, self.image_size) if cond is not None: if isinstance(cond, dict): - cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else - list(map(lambda x: x[:batch_size], cond[key])) for key in cond} + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } else: cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - return self.p_sample_loop(cond, - shape, - return_intermediates=return_intermediates, x_T=x_T, - verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised, - mask=mask, x0=x0) + return self.p_sample_loop( + cond, + shape, + return_intermediates=return_intermediates, + x_T=x_T, + verbose=verbose, + timesteps=timesteps, + quantize_denoised=quantize_denoised, + mask=mask, + x0=x0, + ) @torch.no_grad() def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): @@ -1272,28 +1449,42 @@ def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): if ddim: ddim_sampler = DDIMSampler(self) shape = (self.channels, self.image_size, self.image_size) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, - shape, cond, verbose=False, **kwargs) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) else: - samples, intermediates = self.sample(cond=cond, batch_size=batch_size, - return_intermediates=True, **kwargs) + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, return_intermediates=True, **kwargs) return samples, intermediates @torch.no_grad() - def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None, - quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, - plot_diffusion_rows=True, **kwargs): + def log_images( + self, + batch, + N=8, + n_row=4, + sample=True, + ddim_steps=200, + ddim_eta=1.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=True, + **kwargs, + ): use_ddim = ddim_steps is not None log = dict() - z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, - return_first_stage_outputs=True, - force_c_encode=True, - return_original_cond=True, - bs=N) + z, c, x, xrec, xc = self.get_input( + batch, + self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N, + ) N = min(x.shape[0], N) n_row = min(x.shape[0], n_row) log["inputs"] = x @@ -1334,8 +1525,9 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= if sample: # get denoise row with self.ema_scope("Plotting"): - samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta) + samples, z_denoise_row = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, ddim_steps=ddim_steps, eta=ddim_eta + ) # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) x_samples = self.decode_first_stage(samples) log["samples"] = x_samples @@ -1343,13 +1535,21 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= denoise_grid = self._get_denoise_row_from_list(z_denoise_row) log["denoise_row"] = denoise_grid - if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance( - self.first_stage_model, IdentityFirstStage): + if ( + quantize_denoised + and not isinstance(self.first_stage_model, AutoencoderKL) + and not isinstance(self.first_stage_model, IdentityFirstStage) + ): # also display when quantizing x0 while sampling with self.ema_scope("Plotting Quantized Denoised"): - samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta, - quantize_denoised=True) + samples, z_denoise_row = self.sample_log( + cond=c, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + quantize_denoised=True, + ) # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, # quantize_denoised=True) x_samples = self.decode_first_stage(samples.to(self.device)) @@ -1360,27 +1560,29 @@ def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta= b, h, w = z.shape[0], z.shape[2], z.shape[3] mask = torch.ones(N, h, w).to(self.device) # zeros will be filled in - mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0. + mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 mask = mask[:, None, ...] with self.ema_scope("Plotting Inpaint"): - samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, - ddim_steps=ddim_steps, x0=z[:N], mask=mask) + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) x_samples = self.decode_first_stage(samples.to(self.device)) log["samples_inpainting"] = x_samples log["mask"] = mask # outpaint with self.ema_scope("Plotting Outpaint"): - samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, - ddim_steps=ddim_steps, x0=z[:N], mask=mask) + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) x_samples = self.decode_first_stage(samples.to(self.device)) log["samples_outpainting"] = x_samples if plot_progressive_rows: with self.ema_scope("Plotting Progressives"): - img, progressives = self.progressive_denoising(c, - shape=(self.channels, self.image_size, self.image_size), - batch_size=N) + img, progressives = self.progressive_denoising( + c, shape=(self.channels, self.image_size, self.image_size), batch_size=N + ) prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") log["progressive_row"] = prog_row @@ -1415,11 +1617,8 @@ def configure_optimizers(self): print("Setting up LambdaLR scheduler...") scheduler = [ - { - 'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), - 'interval': 'step', - 'frequency': 1 - }] + {'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1} + ] return [opt], scheduler return opt @@ -1433,8 +1632,7 @@ def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_va gradient_clip_algorithm = GradClipAlgorithmType(gradient_clip_algorithm) if not hasattr(self, "grad_clip_logged"): - logging.info("Gradient clipping: val, %f; algo, %s", - gradient_clip_val, gradient_clip_algorithm) + logging.info("Gradient clipping: val, %f; algo, %s", gradient_clip_val, gradient_clip_algorithm) self.grad_clip_logged = True parameters = amp.master_params(optimizer) @@ -1449,7 +1647,7 @@ def to_rgb(self, x): if not hasattr(self, "colorize"): self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x) x = nn.functional.conv2d(x, weight=self.colorize) - x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py index 2b461915dbdd..2f2acb40ed43 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py @@ -41,10 +41,11 @@ class SchedulerConfig: cls: Optional[str] = 'nemo.collections.multimodal.parts.lr_scheduler.LambdaLinearScheduler' warm_up_steps: Optional[List[int]] = field(default_factory=lambda: [10000]) cycle_lengths: Optional[List[int]] = field( - default_factory=lambda: [10000000000000]) # incredibly large number to prevent corner cases - f_start: Optional[List[float]] = field(default_factory=lambda: [1.e-6]) - f_max: Optional[List[float]] = field(default_factory=lambda: [1.]) - f_min: Optional[List[float]] = field(default_factory=lambda: [1.]) + default_factory=lambda: [10000000000000] + ) # incredibly large number to prevent corner cases + f_start: Optional[List[float]] = field(default_factory=lambda: [1.0e-6]) + f_max: Optional[List[float]] = field(default_factory=lambda: [1.0]) + f_min: Optional[List[float]] = field(default_factory=lambda: [1.0]) @dataclass @@ -66,7 +67,7 @@ class LDMEncoderConfig: ch_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) num_res_blocks: Optional[int] = 2 attn_resolutions: Optional[List[int]] = field(default_factory=lambda: []) - dropout: Optional[float] = 0. + dropout: Optional[float] = 0.0 @dataclass @@ -97,15 +98,17 @@ class DDPMDiffusionModelConfig(model_cfg.ModelConfig): linear_end: Optional[float] = 2e-2 cosine_s: Optional[float] = 8e-3 given_betas: Optional[float] = None - original_elbo_weight: Optional[float] = 0. - v_posterior: Optional[float] = 0. # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta - l_simple_weight: Optional[float] = 1. + original_elbo_weight: Optional[float] = 0.0 + v_posterior: Optional[ + float + ] = 0.0 # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta + l_simple_weight: Optional[float] = 1.0 conditioning_key: Optional[str] = None parameterization: Optional[str] = 'eps' # all assuming fixed variance schedules scheduler_config: Optional[Any] = None use_positional_encodings: Optional[bool] = False learn_logvar: Optional[bool] = False - logvar_init: Optional[float] = 0. + logvar_init: Optional[float] = 0.0 learning_rate: Optional[float] = 1.0e-04 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index edae8b8b17a8..c0f858ac3443 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -11,18 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from abc import ABC, abstractmethod + import numpy as np import torch -from abc import ABC, abstractmethod from tqdm import tqdm from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import make_ddim_sampling_parameters, \ - make_ddim_timesteps, noise_like +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + make_ddim_sampling_parameters, + make_ddim_timesteps, + noise_like, +) class AbstractBaseSampler(ABC): - def __init__(self, model, sampler, schedule="linear", **kwargs): super().__init__() self.model = model @@ -37,9 +40,13 @@ def register_buffer(self, name, attr): attr = attr.to(torch.device("cuda")) setattr(self, name, attr) - def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): - self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps, - num_ddpm_timesteps=self.ddpm_num_timesteps, verbose=verbose) + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True): + self.ddim_timesteps = make_ddim_timesteps( + ddim_discr_method=ddim_discretize, + num_ddim_timesteps=ddim_num_steps, + num_ddpm_timesteps=self.ddpm_num_timesteps, + verbose=verbose, + ) alphas_cumprod = self.model.alphas_cumprod assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep' to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) @@ -48,21 +55,23 @@ def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev)) # calculations for diffusion q(x_t | x_{t-1}) and others self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu()))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu()))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu()))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod.cpu()))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod.cpu()))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod.cpu()))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1))) # ddim sampling parameters - ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(), - ddim_timesteps=self.ddim_timesteps, - eta=ddim_eta, verbose=verbose) + ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters( + alphacums=alphas_cumprod.cpu(), ddim_timesteps=self.ddim_timesteps, eta=ddim_eta, verbose=verbose + ) self.register_buffer('ddim_sigmas', ddim_sigmas) self.register_buffer('ddim_alphas', ddim_alphas) self.register_buffer('ddim_alphas_prev', ddim_alphas_prev) - self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas)) + self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1.0 - ddim_alphas)) sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( - (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * ( - 1 - self.alphas_cumprod / self.alphas_cumprod_prev)) + (1 - self.alphas_cumprod_prev) + / (1 - self.alphas_cumprod) + * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) + ) self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps) @abstractmethod @@ -70,30 +79,31 @@ def p_sampling_fn(self): pass @torch.no_grad() - def sample(self, - S, - batch_size, - shape, - conditioning=None, - callback=None, - normals_sequence=None, - img_callback=None, - quantize_x0=False, - eta=0., - mask=None, - x0=None, - temperature=1., - noise_dropout=0., - score_corrector=None, - corrector_kwargs=None, - verbose=True, - x_T=None, - log_every_t=100, - unconditional_guidance_scale=1., - unconditional_conditioning=None, - # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... - **kwargs - ): + def sample( + self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0.0, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs, + ): if conditioning is not None: if isinstance(conditioning, dict): cbs = conditioning[list(conditioning.keys())[0]][0].shape[0] @@ -107,30 +117,47 @@ def sample(self, C, H, W = shape size = (batch_size, C, H, W) print(f'Data shape for sampling is {size}, eta {eta}') - samples, intermediates = self.sampling_fn(conditioning, size, - callback=callback, - img_callback=img_callback, - quantize_denoised=quantize_x0, - mask=mask, x0=x0, - ddim_use_original_steps=False, - noise_dropout=noise_dropout, - temperature=temperature, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - x_T=x_T, - log_every_t=log_every_t, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - ) + samples, intermediates = self.sampling_fn( + conditioning, + size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, + x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) return samples, intermediates @torch.no_grad() - def sampling_fn(self, cond, shape, - x_T=None, ddim_use_original_steps=False, - callback=None, timesteps=None, quantize_denoised=False, - mask=None, x0=None, img_callback=None, log_every_t=100, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, ): + def sampling_fn( + self, + cond, + shape, + x_T=None, + ddim_use_original_steps=False, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + log_every_t=100, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + ): device = self.model.betas.device b = shape[0] if x_T is None: @@ -156,37 +183,51 @@ def sampling_fn(self, cond, shape, index = total_steps - i - 1 ts = torch.full((b,), step, device=device, dtype=torch.long) if self.sampler is Sampler.PLMS: - ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long) + ts_next = torch.full( + (b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long + ) else: old_eps = None ts_next = None if mask is not None: assert x0 is not None img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? - img = img_orig * mask + (1. - mask) * img - outs = self.p_sampling_fn(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps, - quantize_denoised=quantize_denoised, temperature=temperature, - noise_dropout=noise_dropout, score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - old_eps=old_eps, t_next=ts_next) + img = img_orig * mask + (1.0 - mask) * img + outs = self.p_sampling_fn( + img, + cond, + ts, + index=index, + use_original_steps=ddim_use_original_steps, + quantize_denoised=quantize_denoised, + temperature=temperature, + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + old_eps=old_eps, + t_next=ts_next, + ) img, pred_x0 = outs[0], outs[1] if self.sampler is Sampler.PLMS: e_t = outs[2] old_eps.append(e_t) if len(old_eps) >= 4: old_eps.pop(0) - if callback: callback(i) - if img_callback: img_callback(pred_x0, i) + if callback: + callback(i) + if img_callback: + img_callback(pred_x0, i) if index % log_every_t == 0 or index == total_steps - 1: intermediates['x_inter'].append(img) intermediates['pred_x0'].append(pred_x0) return img, intermediates - def _get_model_output(self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, - corrector_kwargs): - if unconditional_conditioning is None or unconditional_guidance_scale == 1.: + def _get_model_output( + self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + ): + if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: e_t = self.model.apply_model(x, t, c) elif isinstance(c, dict): raise NotImplementedError @@ -201,11 +242,14 @@ def _get_model_output(self, x, t, unconditional_conditioning, unconditional_guid e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) return e_t - def _get_x_prev_and_pred_x0(self, use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, - temperature, noise_dropout): + def _get_x_prev_and_pred_x0( + self, use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, temperature, noise_dropout + ): alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + sqrt_one_minus_alphas = ( + self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + ) sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas # select parameters corresponding to the currently considered timestep @@ -218,9 +262,9 @@ def _get_x_prev_and_pred_x0(self, use_original_steps, b, index, device, x, e_t, if quantize_denoised: pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) # direction pointing to x_t - dir_xt = (1. - a_prev - sigma_t ** 2).sqrt() * e_t + dir_xt = (1.0 - a_prev - sigma_t ** 2).sqrt() * e_t noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: + if noise_dropout > 0.0: noise = torch.nn.functional.dropout(noise, p=noise_dropout) x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise return x_prev, pred_x0 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py index 37179d0a46ed..18863cee19fb 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py @@ -24,19 +24,35 @@ class DDIMSampler(AbstractBaseSampler): - def __init__(self, model, schedule="linear", **kwargs): super().__init__(model, sampler=Sampler.DDIM, schedule="linear", **kwargs) @torch.no_grad() - def p_sampling_fn(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None): + def p_sampling_fn( + self, + x, + c, + t, + index, + repeat_noise=False, + use_original_steps=False, + quantize_denoised=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + old_eps=None, + t_next=None, + ): b, *_, device = *x.shape, x.device - e_t = self._get_model_output(x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, - corrector_kwargs) - x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t, quantize_denoised, - repeat_noise, temperature, noise_dropout) + e_t = self._get_model_output( + x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + ) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, temperature, noise_dropout + ) return x_prev, pred_x0 @torch.no_grad() @@ -52,12 +68,21 @@ def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): if noise is None: noise = randn_like(x0, generator=self.model.rng) - return (extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 + - extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise) + return ( + extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 + + extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise + ) @torch.no_grad() - def decode(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None, - use_original_steps=False): + def decode( + self, + x_latent, + cond, + t_start, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + use_original_steps=False, + ): timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps timesteps = timesteps[:t_start] @@ -71,7 +96,13 @@ def decode(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unco for i, step in enumerate(iterator): index = total_steps - i - 1 ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long) - x_dec, _ = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning) + x_dec, _ = self.p_sample_ddim( + x_dec, + cond, + ts, + index=index, + use_original_steps=use_original_steps, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) return x_dec diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py index e28fc98880d8..ac4f8f7ad73d 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import math + import torch import torchsde from scipy import integrate from torch import nn from torchdiffeq import odeint -from tqdm.auto import trange, tqdm +from tqdm.auto import tqdm, trange def append_zero(x): @@ -32,7 +33,7 @@ def append_dims(x, target_dims): return x[(...,) + (None,) * dims_to_append] -def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'): +def get_sigmas_karras(n, sigma_min, sigma_max, rho=7.0, device='cpu'): """Constructs the noise schedule of Karras et al. (2022).""" ramp = torch.linspace(0, 1, n) min_inv_rho = sigma_min ** (1 / rho) @@ -47,7 +48,7 @@ def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'): return append_zero(sigmas) -def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1., device='cpu'): +def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1.0, device='cpu'): """Constructs an polynomial in log sigma noise schedule.""" ramp = torch.linspace(1, 0, n, device=device) ** rho sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min)) @@ -66,11 +67,11 @@ def to_d(x, sigma, denoised): return (x - denoised) / append_dims(sigma, x.ndim) -def get_ancestral_step(sigma_from, sigma_to, eta=1.): +def get_ancestral_step(sigma_from, sigma_to, eta=1.0): """Calculates the noise level (sigma_down) to step down to and the amount of noise to add (sigma_up) when doing an ancestral sampling step.""" if not eta: - return sigma_to, 0. + return sigma_to, 0.0 sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5) sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 return sigma_down, sigma_up @@ -133,13 +134,23 @@ def __call__(self, sigma, sigma_next): @torch.no_grad() -def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., - s_tmax=float('inf'), s_noise=1.): +def sample_euler( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 eps = torch.randn_like(x) * s_noise sigma_hat = sigmas[i] * (gamma + 1) if gamma > 0: @@ -155,8 +166,9 @@ def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, @torch.no_grad() -def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., - noise_sampler=None): +def sample_euler_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): """Ancestral sampling with Euler method steps.""" extra_args = {} if extra_args is None else extra_args noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler @@ -176,13 +188,23 @@ def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, dis @torch.no_grad() -def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., - s_tmax=float('inf'), s_noise=1.): +def sample_heun( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): """Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 eps = torch.randn_like(x) * s_noise sigma_hat = sigmas[i] * (gamma + 1) if gamma > 0: @@ -206,13 +228,23 @@ def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, @torch.no_grad() -def sample_dpm_2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., - s_tmax=float('inf'), s_noise=1.): +def sample_dpm_2( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 eps = torch.randn_like(x) * s_noise sigma_hat = sigmas[i] * (gamma + 1) if gamma > 0: @@ -238,8 +270,9 @@ def sample_dpm_2(model, x, sigmas, extra_args=None, callback=None, disable=None, @torch.no_grad() -def sample_dpm_2_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., - noise_sampler=None): +def sample_dpm_2_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): """Ancestral sampling with DPM-Solver second-order steps.""" extra_args = {} if extra_args is None else extra_args noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler @@ -272,7 +305,7 @@ def linear_multistep_coeff(order, t, i, j): raise ValueError(f'Order {order} too high for step {i}') def fn(tau): - prod = 1. + prod = 1.0 for k in range(order): if j == k: continue @@ -408,13 +441,16 @@ def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None): s2 = t + r2 * h u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) - u2 = x - self.sigma(s2) * (r2 * h).expm1() * eps - self.sigma(s2) * (r2 / r1) * ( - (r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) + u2 = ( + x + - self.sigma(s2) * (r2 * h).expm1() * eps + - self.sigma(s2) * (r2 / r1) * ((r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) + ) eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2) x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps) return x_3, eps_cache - def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_sampler=None): + def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0.0, s_noise=1.0, noise_sampler=None): noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler if not t_end > t_start and eta: raise ValueError('eta must be 0 for reverse sampling') @@ -435,7 +471,7 @@ def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_samp t_next_ = torch.minimum(t_end, self.t(sd)) su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5 else: - t_next_, su = t_next, 0. + t_next_, su = t_next, 0.0 eps, eps_cache = self.eps(eps_cache, 'eps', x, t) denoised = x - self.sigma(t) * eps @@ -453,8 +489,23 @@ def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_samp return x - def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., - dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None): + def dpm_solver_adaptive( + self, + x, + t_start, + t_end, + order=3, + rtol=0.05, + atol=0.0078, + h_init=0.05, + pcoeff=0.0, + icoeff=1.0, + dcoeff=0.0, + accept_safety=0.81, + eta=0.0, + s_noise=1.0, + noise_sampler=None, + ): noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler if order not in {2, 3}: raise ValueError('order should be 2 or 3') @@ -478,7 +529,7 @@ def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078 t_ = torch.minimum(t_end, self.t(sd)) su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 else: - t_, su = t, 0. + t_, su = t, 0.0 eps, eps_cache = self.eps(eps_cache, 'eps', x, s) denoised = x - self.sigma(s) * eps @@ -504,15 +555,35 @@ def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078 if self.info_callback is not None: self.info_callback( - {'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, - 'h': pid.h, **info}) + { + 'x': x, + 'i': info['steps'] - 1, + 't': s, + 't_up': s, + 'denoised': denoised, + 'error': error, + 'h': pid.h, + **info, + } + ) return x, info @torch.no_grad() -def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., - noise_sampler=None): +def sample_dpm_fast( + model, + x, + sigma_min, + sigma_max, + n, + extra_args=None, + callback=None, + disable=None, + eta=0.0, + s_noise=1.0, + noise_sampler=None, +): """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" if sigma_min <= 0 or sigma_max <= 0: raise ValueError('sigma_min and sigma_max must not be 0') @@ -520,15 +591,41 @@ def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) if callback is not None: dpm_solver.info_callback = lambda info: callback( - {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max)), - dpm_solver.t(torch.tensor(sigma_min)), n, eta, s_noise, noise_sampler) + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} + ) + return dpm_solver.dpm_solver_fast( + x, + dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), + n, + eta, + s_noise, + noise_sampler, + ) @torch.no_grad() -def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, - rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, - eta=0., s_noise=1., noise_sampler=None, return_info=False): +def sample_dpm_adaptive( + model, + x, + sigma_min, + sigma_max, + extra_args=None, + callback=None, + disable=None, + order=3, + rtol=0.05, + atol=0.0078, + h_init=0.05, + pcoeff=0.0, + icoeff=1.0, + dcoeff=0.0, + accept_safety=0.81, + eta=0.0, + s_noise=1.0, + noise_sampler=None, + return_info=False, +): """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" if sigma_min <= 0 or sigma_max <= 0: raise ValueError('sigma_min and sigma_max must not be 0') @@ -536,18 +633,33 @@ def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callbac dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) if callback is not None: dpm_solver.info_callback = lambda info: callback( - {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max)), - dpm_solver.t(torch.tensor(sigma_min)), order, rtol, atol, h_init, - pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler) + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} + ) + x, info = dpm_solver.dpm_solver_adaptive( + x, + dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), + order, + rtol, + atol, + h_init, + pcoeff, + icoeff, + dcoeff, + accept_safety, + eta, + s_noise, + noise_sampler, + ) if return_info: return x, info return x @torch.no_grad() -def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., - noise_sampler=None): +def sample_dpmpp_2s_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): """Ancestral sampling with DPM-Solver++(2S) second-order steps.""" extra_args = {} if extra_args is None else extra_args noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler @@ -581,8 +693,9 @@ def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, @torch.no_grad() -def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., - noise_sampler=None, r=1 / 2): +def sample_dpmpp_sde( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None, r=1 / 2 +): """DPM-Solver++ (stochastic).""" sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler @@ -703,7 +816,7 @@ def __init__(self, model, quantize=False): alphas_cumprod = model.alphas_cumprod super().__init__(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, quantize) self.inner_model = model - self.sigma_data = 1. + self.sigma_data = 1.0 def get_scalings(self, sigma): c_out = -sigma diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py index c5c6ab49e0e3..165c27a3f924 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py @@ -23,24 +23,57 @@ class PLMSSampler(AbstractBaseSampler): def __init__(self, model, schedule="linear", **kwargs): super().__init__(model, sampler=Sampler.PLMS, schedule="linear", **kwargs) - def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=False): + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False): if ddim_eta != 0: raise ValueError('ddim_eta must be 0 for PLMS') - super().make_schedule(ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=False) + super().make_schedule(ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False) @torch.no_grad() - def p_sampling_fn(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None): + def p_sampling_fn( + self, + x, + c, + t, + index, + repeat_noise=False, + use_original_steps=False, + quantize_denoised=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + old_eps=None, + t_next=None, + ): b, *_, device = *x.shape, x.device - e_t = self._get_model_output(x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, - corrector_kwargs) + e_t = self._get_model_output( + x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + ) if len(old_eps) == 0: # Pseudo Improved Euler (2nd order) - x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t, - quantize_denoised, repeat_noise, temperature, noise_dropout) - e_t_next = self._get_model_output(x_prev, t_next, unconditional_conditioning, unconditional_guidance_scale, - score_corrector, c, corrector_kwargs) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, + b, + index, + device, + x, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ) + e_t_next = self._get_model_output( + x_prev, + t_next, + unconditional_conditioning, + unconditional_guidance_scale, + score_corrector, + c, + corrector_kwargs, + ) e_t_prime = (e_t + e_t_next) / 2 elif len(old_eps) == 1: # 2nd order Pseudo Linear Multistep (Adams-Bashforth) @@ -52,7 +85,17 @@ def p_sampling_fn(self, x, c, t, index, repeat_noise=False, use_original_steps=F # 4nd order Pseudo Linear Multistep (Adams-Bashforth) e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 - x_prev, pred_x0 = self._get_x_prev_and_pred_x0(use_original_steps, b, index, device, x, e_t_prime, - quantize_denoised, repeat_noise, temperature, noise_dropout) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, + b, + index, + device, + x, + e_t_prime, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ) return x_prev, pred_x0, e_t diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index 5167f5810a3e..fd9fe4a5613f 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import math +from inspect import isfunction + import torch import torch.nn.functional as F from einops import rearrange, repeat -from inspect import isfunction -from torch import nn, einsum +from torch import einsum, nn from torch._dynamo import disable from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import checkpoint @@ -36,8 +37,8 @@ def check_cuda(): try: - from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func from flash_attn.flash_attention import FlashAttention + from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func flash_attn_installed = check_cuda() print("FlashAttention Installed") @@ -88,20 +89,13 @@ def forward(self, x): class FeedForward(nn.Module): - def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): super().__init__() inner_dim = int(dim * mult) dim_out = default(dim_out, dim) - project_in = nn.Sequential( - nn.Linear(dim, inner_dim), - nn.GELU() - ) if not glu else GEGLU(dim, inner_dim) - - self.net = nn.Sequential( - project_in, - nn.Dropout(dropout), - nn.Linear(inner_dim, dim_out) - ) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) def forward(self, x): return self.net(x) @@ -145,26 +139,10 @@ def __init__(self, in_channels): self.in_channels = in_channels self.norm = Normalize(in_channels) - self.q = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.k = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.v = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.proj_out = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) def forward(self, x): h_ = x @@ -206,7 +184,7 @@ def rearrange_heads_inner(t: torch.Tensor, h: int) -> torch.Tensor: class CrossAttention(nn.Module): - def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., use_flash_attention=False): + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, use_flash_attention=False): super().__init__() inner_dim = dim_head * heads context_dim = default(context_dim, query_dim) @@ -222,10 +200,7 @@ def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0. self.to_k = nn.Linear(context_dim, inner_dim, bias=False) self.to_v = nn.Linear(context_dim, inner_dim, bias=False) - self.to_out = nn.Sequential( - nn.Linear(inner_dim, query_dim), - nn.Dropout(dropout) - ) + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.use_flash_attention = use_flash_attention if context_dim == query_dim and dim_head <= 128 and (dim_head % 8) == 0 and flash_attn_installed: @@ -246,8 +221,13 @@ def forward(self, x, context=None, mask=None): def _attention(self, q, k, v, mask=None): h = self.heads - if not flash_attn_installed or not self.use_flash_attention or q.dtype == torch.float32 or ( - self.dim_head > 128 or (self.dim_head % 8) != 0) or mask is not None: + if ( + not flash_attn_installed + or not self.use_flash_attention + or q.dtype == torch.float32 + or (self.dim_head > 128 or (self.dim_head % 8) != 0) + or mask is not None + ): # original implementation # b n (h d) -> (b h) n d q = rearrange_heads_outer(q, h) @@ -301,15 +281,30 @@ def _attention(self, q, k, v, mask=None): class BasicTransformerBlock(nn.Module): - def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, use_checkpoint=False, - use_flash_attention=False): + def __init__( + self, + dim, + n_heads, + d_head, + dropout=0.0, + context_dim=None, + gated_ff=True, + use_checkpoint=False, + use_flash_attention=False, + ): super().__init__() - self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, - use_flash_attention=use_flash_attention) # is a self-attention + self.attn1 = CrossAttention( + query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, use_flash_attention=use_flash_attention + ) # is a self-attention self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) - self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, - heads=n_heads, dim_head=d_head, dropout=dropout, - use_flash_attention=use_flash_attention) # is self-attn if context is none + self.attn2 = CrossAttention( + query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout, + use_flash_attention=use_flash_attention, + ) # is self-attn if context is none self.norm1 = nn.LayerNorm(dim) self.norm2 = nn.LayerNorm(dim) self.norm3 = nn.LayerNorm(dim) @@ -334,31 +329,40 @@ class SpatialTransformer(nn.Module): Finally, reshape to image """ - def __init__(self, in_channels, n_heads, d_head, - depth=1, dropout=0., context_dim=None, use_checkpoint=False, - use_flash_attention=False): + def __init__( + self, + in_channels, + n_heads, + d_head, + depth=1, + dropout=0.0, + context_dim=None, + use_checkpoint=False, + use_flash_attention=False, + ): super().__init__() self.in_channels = in_channels inner_dim = n_heads * d_head self.norm = Normalize(in_channels) - self.proj_in = nn.Conv2d(in_channels, - inner_dim, - kernel_size=1, - stride=1, - padding=0) + self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) self.transformer_blocks = nn.ModuleList( - [BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim, - use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention) - for d in range(depth)] + [ + BasicTransformerBlock( + inner_dim, + n_heads, + d_head, + dropout=dropout, + context_dim=context_dim, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + for d in range(depth) + ] ) - self.proj_out = zero_module(nn.Conv2d(inner_dim, - in_channels, - kernel_size=1, - stride=1, - padding=0)) + self.proj_out = zero_module(nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)) def forward(self, x, context=None): # note: if no context is given, cross-attention defaults to self-attention diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py index 6f953f817336..6ee2ed7c9576 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py @@ -57,11 +57,7 @@ def __init__(self, in_channels, with_conv): super().__init__() self.with_conv = with_conv if self.with_conv: - self.conv = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) def forward(self, x): # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 @@ -84,11 +80,7 @@ def __init__(self, in_channels, with_conv): self.with_conv = with_conv if self.with_conv: # no asymmetric padding in torch conv, must do it ourselves - self.conv = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=3, - stride=2, - padding=0) + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0) def forward(self, x): if self.with_conv: @@ -101,8 +93,7 @@ def forward(self, x): class ResnetBlock(nn.Module): - def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, - dropout, temb_channels=512): + def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropout, temb_channels=512): super().__init__() self.in_channels = in_channels out_channels = in_channels if out_channels is None else out_channels @@ -110,34 +101,17 @@ def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, self.use_conv_shortcut = conv_shortcut self.norm1 = Normalize(in_channels) - self.conv1 = torch.nn.Conv2d(in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) if temb_channels > 0: - self.temb_proj = torch.nn.Linear(temb_channels, - out_channels) + self.temb_proj = torch.nn.Linear(temb_channels, out_channels) self.norm2 = Normalize(out_channels) self.dropout = torch.nn.Dropout(dropout) - self.conv2 = torch.nn.Conv2d(out_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) if self.in_channels != self.out_channels: if self.use_conv_shortcut: - self.conv_shortcut = torch.nn.Conv2d(in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) else: - self.nin_shortcut = torch.nn.Conv2d(in_channels, - out_channels, - kernel_size=1, - stride=1, - padding=0) + self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) def forward(self, x, temb): h = x @@ -175,26 +149,10 @@ def __init__(self, in_channels): self.in_channels = in_channels self.norm = Normalize(in_channels) - self.q = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.k = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.v = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) - self.proj_out = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=1, - stride=1, - padding=0) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) def forward(self, x): h_ = x @@ -235,11 +193,25 @@ def make_attn(in_channels, attn_type="vanilla"): class Model(nn.Module): - def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, - attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, - resolution, use_timestep=True, use_linear_attn=False, attn_type="vanilla"): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + use_timestep=True, + use_linear_attn=False, + attn_type="vanilla", + ): super().__init__() - if use_linear_attn: attn_type = "linear" + if use_linear_attn: + attn_type = "linear" self.ch = ch self.temb_ch = self.ch * 4 self.num_resolutions = len(ch_mult) @@ -251,19 +223,12 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, if self.use_timestep: # timestep embedding self.temb = nn.Module() - self.temb.dense = nn.ModuleList([ - torch.nn.Linear(self.ch, - self.temb_ch), - torch.nn.Linear(self.temb_ch, - self.temb_ch), - ]) + self.temb.dense = nn.ModuleList( + [torch.nn.Linear(self.ch, self.temb_ch), torch.nn.Linear(self.temb_ch, self.temb_ch),] + ) # downsampling - self.conv_in = torch.nn.Conv2d(in_channels, - self.ch, - kernel_size=3, - stride=1, - padding=1) + self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) curr_res = resolution in_ch_mult = (1,) + tuple(ch_mult) @@ -274,10 +239,11 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, block_in = ch * in_ch_mult[i_level] block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks): - block.append(ResnetBlock(in_channels=block_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout)) + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) block_in = block_out if curr_res in attn_resolutions: attn.append(make_attn(block_in, attn_type=attn_type)) @@ -291,15 +257,13 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, # middle self.mid = nn.Module() - self.mid.block_1 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) # upsampling self.up = nn.ModuleList() @@ -311,10 +275,14 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, for i_block in range(self.num_res_blocks + 1): if i_block == self.num_res_blocks: skip_in = ch * in_ch_mult[i_level] - block.append(ResnetBlock(in_channels=block_in + skip_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout)) + block.append( + ResnetBlock( + in_channels=block_in + skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) block_in = block_out if curr_res in attn_resolutions: attn.append(make_attn(block_in, attn_type=attn_type)) @@ -328,11 +296,7 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, # end self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, - out_ch, - kernel_size=3, - stride=1, - padding=1) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) def forward(self, x, t=None, context=None): # assert x.shape[2] == x.shape[3] == self.resolution @@ -369,8 +333,7 @@ def forward(self, x, t=None, context=None): # upsampling for i_level in reversed(range(self.num_resolutions)): for i_block in range(self.num_res_blocks + 1): - h = self.up[i_level].block[i_block]( - torch.cat([h, hs.pop()], dim=1), temb) + h = self.up[i_level].block[i_block](torch.cat([h, hs.pop()], dim=1), temb) if len(self.up[i_level].attn) > 0: h = self.up[i_level].attn[i_block](h) if i_level != 0: @@ -387,12 +350,27 @@ def get_last_layer(self): class Encoder(nn.Module): - def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, - attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, - resolution, z_channels, double_z=True, use_linear_attn=False, attn_type="vanilla", - **ignore_kwargs): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + double_z=True, + use_linear_attn=False, + attn_type="vanilla", + **ignore_kwargs, + ): super().__init__() - if use_linear_attn: attn_type = "linear" + if use_linear_attn: + attn_type = "linear" self.ch = ch self.temb_ch = 0 self.num_resolutions = len(ch_mult) @@ -401,11 +379,7 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, self.in_channels = in_channels # downsampling - self.conv_in = torch.nn.Conv2d(in_channels, - self.ch, - kernel_size=3, - stride=1, - padding=1) + self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) curr_res = resolution in_ch_mult = (1,) + tuple(ch_mult) @@ -417,10 +391,11 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, block_in = ch * in_ch_mult[i_level] block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks): - block.append(ResnetBlock(in_channels=block_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout)) + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) block_in = block_out if curr_res in attn_resolutions: attn.append(make_attn(block_in, attn_type=attn_type)) @@ -434,23 +409,19 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, # middle self.mid = nn.Module() - self.mid.block_1 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) # end self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, - 2 * z_channels if double_z else z_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv_out = torch.nn.Conv2d( + block_in, 2 * z_channels if double_z else z_channels, kernel_size=3, stride=1, padding=1 + ) def forward(self, x): # timestep embedding @@ -481,12 +452,28 @@ def forward(self, x): class Decoder(nn.Module): - def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, - attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, - resolution, z_channels, give_pre_end=False, tanh_out=False, use_linear_attn=False, - attn_type="vanilla", **ignorekwargs): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + give_pre_end=False, + tanh_out=False, + use_linear_attn=False, + attn_type="vanilla", + **ignorekwargs, + ): super().__init__() - if use_linear_attn: attn_type = "linear" + if use_linear_attn: + attn_type = "linear" self.ch = ch self.temb_ch = 0 self.num_resolutions = len(ch_mult) @@ -501,27 +488,20 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, block_in = ch * ch_mult[self.num_resolutions - 1] curr_res = resolution // 2 ** (self.num_resolutions - 1) self.z_shape = (1, z_channels, curr_res, curr_res) - print("Working with z of shape {} = {} dimensions.".format( - self.z_shape, np.prod(self.z_shape))) + print("Working with z of shape {} = {} dimensions.".format(self.z_shape, np.prod(self.z_shape))) # z to block_in - self.conv_in = torch.nn.Conv2d(z_channels, - block_in, - kernel_size=3, - stride=1, - padding=1) + self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1) # middle self.mid = nn.Module() - self.mid.block_1 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock(in_channels=block_in, - out_channels=block_in, - temb_channels=self.temb_ch, - dropout=dropout) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) # upsampling self.up = nn.ModuleList() @@ -530,10 +510,11 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, attn = nn.ModuleList() block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks + 1): - block.append(ResnetBlock(in_channels=block_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout)) + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) block_in = block_out if curr_res in attn_resolutions: attn.append(make_attn(block_in, attn_type=attn_type)) @@ -547,11 +528,7 @@ def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, # end self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, - out_ch, - kernel_size=3, - stride=1, - padding=1) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) def forward(self, z): # assert z.shape[1:] == self.z_shape[1:] @@ -592,25 +569,19 @@ def forward(self, z): class SimpleDecoder(nn.Module): def __init__(self, in_channels, out_channels, *args, **kwargs): super().__init__() - self.model = nn.ModuleList([nn.Conv2d(in_channels, in_channels, 1), - ResnetBlock(in_channels=in_channels, - out_channels=2 * in_channels, - temb_channels=0, dropout=0.0), - ResnetBlock(in_channels=2 * in_channels, - out_channels=4 * in_channels, - temb_channels=0, dropout=0.0), - ResnetBlock(in_channels=4 * in_channels, - out_channels=2 * in_channels, - temb_channels=0, dropout=0.0), - nn.Conv2d(2 * in_channels, in_channels, 1), - Upsample(in_channels, with_conv=True)]) + self.model = nn.ModuleList( + [ + nn.Conv2d(in_channels, in_channels, 1), + ResnetBlock(in_channels=in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=2 * in_channels, out_channels=4 * in_channels, temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=4 * in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), + nn.Conv2d(2 * in_channels, in_channels, 1), + Upsample(in_channels, with_conv=True), + ] + ) # end self.norm_out = Normalize(in_channels) - self.conv_out = torch.nn.Conv2d(in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv_out = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) def forward(self, x): for i, layer in enumerate(self.model): @@ -626,8 +597,7 @@ def forward(self, x): class UpsampleDecoder(nn.Module): - def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, - ch_mult=(2, 2), dropout=0.0): + def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, ch_mult=(2, 2), dropout=0.0): super().__init__() # upsampling self.temb_ch = 0 @@ -641,10 +611,11 @@ def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, res_block = [] block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks + 1): - res_block.append(ResnetBlock(in_channels=block_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout)) + res_block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) block_in = block_out self.res_blocks.append(nn.ModuleList(res_block)) if i_level != self.num_resolutions - 1: @@ -653,11 +624,7 @@ def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, # end self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, - out_channels, - kernel_size=3, - stride=1, - padding=1) + self.conv_out = torch.nn.Conv2d(block_in, out_channels, kernel_size=3, stride=1, padding=1) def forward(self, x): # upsampling @@ -678,32 +645,30 @@ def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2): super().__init__() # residual block, interpolate, residual block self.factor = factor - self.conv_in = nn.Conv2d(in_channels, - mid_channels, - kernel_size=3, - stride=1, - padding=1) - self.res_block1 = nn.ModuleList([ResnetBlock(in_channels=mid_channels, - out_channels=mid_channels, - temb_channels=0, - dropout=0.0) for _ in range(depth)]) + self.conv_in = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=1, padding=1) + self.res_block1 = nn.ModuleList( + [ + ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) + for _ in range(depth) + ] + ) self.attn = AttnBlock(mid_channels) - self.res_block2 = nn.ModuleList([ResnetBlock(in_channels=mid_channels, - out_channels=mid_channels, - temb_channels=0, - dropout=0.0) for _ in range(depth)]) + self.res_block2 = nn.ModuleList( + [ + ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) + for _ in range(depth) + ] + ) - self.conv_out = nn.Conv2d(mid_channels, - out_channels, - kernel_size=1, - ) + self.conv_out = nn.Conv2d(mid_channels, out_channels, kernel_size=1,) def forward(self, x): x = self.conv_in(x) for block in self.res_block1: x = block(x, None) - x = torch.nn.functional.interpolate(x, size=( - int(round(x.shape[2] * self.factor)), int(round(x.shape[3] * self.factor)))) + x = torch.nn.functional.interpolate( + x, size=(int(round(x.shape[2] * self.factor)), int(round(x.shape[3] * self.factor))) + ) x = self.attn(x) for block in self.res_block2: x = block(x, None) @@ -712,17 +677,42 @@ def forward(self, x): class MergedRescaleEncoder(nn.Module): - def __init__(self, in_channels, ch, resolution, out_ch, num_res_blocks, - attn_resolutions, dropout=0.0, resamp_with_conv=True, - ch_mult=(1, 2, 4, 8), rescale_factor=1.0, rescale_module_depth=1): + def __init__( + self, + in_channels, + ch, + resolution, + out_ch, + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + ch_mult=(1, 2, 4, 8), + rescale_factor=1.0, + rescale_module_depth=1, + ): super().__init__() intermediate_chn = ch * ch_mult[-1] - self.encoder = Encoder(in_channels=in_channels, num_res_blocks=num_res_blocks, ch=ch, ch_mult=ch_mult, - z_channels=intermediate_chn, double_z=False, resolution=resolution, - attn_resolutions=attn_resolutions, dropout=dropout, resamp_with_conv=resamp_with_conv, - out_ch=None) - self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=intermediate_chn, - mid_channels=intermediate_chn, out_channels=out_ch, depth=rescale_module_depth) + self.encoder = Encoder( + in_channels=in_channels, + num_res_blocks=num_res_blocks, + ch=ch, + ch_mult=ch_mult, + z_channels=intermediate_chn, + double_z=False, + resolution=resolution, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + out_ch=None, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=intermediate_chn, + mid_channels=intermediate_chn, + out_channels=out_ch, + depth=rescale_module_depth, + ) def forward(self, x): x = self.encoder(x) @@ -731,15 +721,41 @@ def forward(self, x): class MergedRescaleDecoder(nn.Module): - def __init__(self, z_channels, out_ch, resolution, num_res_blocks, attn_resolutions, ch, ch_mult=(1, 2, 4, 8), - dropout=0.0, resamp_with_conv=True, rescale_factor=1.0, rescale_module_depth=1): + def __init__( + self, + z_channels, + out_ch, + resolution, + num_res_blocks, + attn_resolutions, + ch, + ch_mult=(1, 2, 4, 8), + dropout=0.0, + resamp_with_conv=True, + rescale_factor=1.0, + rescale_module_depth=1, + ): super().__init__() tmp_chn = z_channels * ch_mult[-1] - self.decoder = Decoder(out_ch=out_ch, z_channels=tmp_chn, attn_resolutions=attn_resolutions, dropout=dropout, - resamp_with_conv=resamp_with_conv, in_channels=None, num_res_blocks=num_res_blocks, - ch_mult=ch_mult, resolution=resolution, ch=ch) - self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=z_channels, mid_channels=tmp_chn, - out_channels=tmp_chn, depth=rescale_module_depth) + self.decoder = Decoder( + out_ch=out_ch, + z_channels=tmp_chn, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + in_channels=None, + num_res_blocks=num_res_blocks, + ch_mult=ch_mult, + resolution=resolution, + ch=ch, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=z_channels, + mid_channels=tmp_chn, + out_channels=tmp_chn, + depth=rescale_module_depth, + ) def forward(self, x): x = self.rescaler(x) @@ -752,14 +768,23 @@ def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2): super().__init__() assert out_size >= in_size num_blocks = int(np.log2(out_size // in_size)) + 1 - factor_up = 1. + (out_size % in_size) + factor_up = 1.0 + (out_size % in_size) print( - f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}") - self.rescaler = LatentRescaler(factor=factor_up, in_channels=in_channels, mid_channels=2 * in_channels, - out_channels=in_channels) - self.decoder = Decoder(out_ch=out_channels, resolution=out_size, z_channels=in_channels, num_res_blocks=2, - attn_resolutions=[], in_channels=None, ch=in_channels, - ch_mult=[ch_mult for _ in range(num_blocks)]) + f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}" + ) + self.rescaler = LatentRescaler( + factor=factor_up, in_channels=in_channels, mid_channels=2 * in_channels, out_channels=in_channels + ) + self.decoder = Decoder( + out_ch=out_channels, + resolution=out_size, + z_channels=in_channels, + num_res_blocks=2, + attn_resolutions=[], + in_channels=None, + ch=in_channels, + ch_mult=[ch_mult for _ in range(num_blocks)], + ) def forward(self, x): x = self.rescaler(x) @@ -777,11 +802,7 @@ def __init__(self, in_channels=None, learned=False, mode="bilinear"): raise NotImplementedError() assert in_channels is not None # no asymmetric padding in torch conv, must do it ourselves - self.conv = torch.nn.Conv2d(in_channels, - in_channels, - kernel_size=4, - stride=2, - padding=1) + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1) def forward(self, x, scale_factor=1.0): if scale_factor == 1.0: @@ -792,13 +813,16 @@ def forward(self, x, scale_factor=1.0): class FirstStagePostProcessor(nn.Module): - - def __init__(self, ch_mult: list, in_channels, - pretrained_model: nn.Module = None, - reshape=False, - n_channels=None, - dropout=0., - pretrained_config=None): + def __init__( + self, + ch_mult: list, + in_channels, + pretrained_model: nn.Module = None, + reshape=False, + n_channels=None, + dropout=0.0, + pretrained_config=None, + ): super().__init__() if pretrained_config is None: assert pretrained_model is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' @@ -813,8 +837,7 @@ def __init__(self, ch_mult: list, in_channels, n_channels = self.pretrained_model.encoder.ch self.proj_norm = Normalize(in_channels, num_groups=in_channels // 2) - self.proj = nn.Conv2d(in_channels, n_channels, kernel_size=3, - stride=1, padding=1) + self.proj = nn.Conv2d(in_channels, n_channels, kernel_size=3, stride=1, padding=1) blocks = [] downs = [] diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 01090950d790..c1022fa6f715 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -12,24 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. import math +from abc import abstractmethod +from functools import partial +from typing import Iterable + import numpy as np import torch import torch as th import torch.nn as nn import torch.nn.functional as F -from abc import abstractmethod -from functools import partial -from typing import Iterable from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + avg_pool_nd, checkpoint, conv_nd, linear, - avg_pool_nd, - zero_module, normalization, timestep_embedding, + zero_module, ) @@ -49,11 +50,7 @@ class AttentionPool2d(nn.Module): """ def __init__( - self, - spacial_dim: int, - embed_dim: int, - num_heads_channels: int, - output_dim: int = None, + self, spacial_dim: int, embed_dim: int, num_heads_channels: int, output_dim: int = None, ): super().__init__() self.positional_embedding = nn.Parameter(th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5) @@ -130,9 +127,7 @@ def forward(self, x): if dtype == torch.bfloat16: x = x.to(torch.float32) if self.dims == 3: - x = F.interpolate( - x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest" - ) + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") else: x = F.interpolate(x, scale_factor=2, mode="nearest") if dtype == torch.bfloat16: @@ -174,9 +169,7 @@ def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): self.dims = dims stride = 2 if dims != 3 else (1, 2, 2) if use_conv: - self.op = conv_nd( - dims, self.channels, self.out_channels, 3, stride=stride, padding=padding - ) + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=padding) else: assert self.channels == self.out_channels self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) @@ -203,17 +196,17 @@ class ResBlock(TimestepBlock): """ def __init__( - self, - channels, - emb_channels, - dropout, - out_channels=None, - use_conv=False, - use_scale_shift_norm=False, - dims=2, - use_checkpoint=False, - up=False, - down=False, + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, ): super().__init__() self.channels = channels @@ -225,9 +218,7 @@ def __init__( self.use_scale_shift_norm = use_scale_shift_norm self.in_layers = nn.Sequential( - normalization(channels), - nn.SiLU(), - conv_nd(dims, channels, self.out_channels, 3, padding=1), + normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1), ) self.updown = up or down @@ -242,27 +233,19 @@ def __init__( self.h_upd = self.x_upd = nn.Identity() self.emb_layers = nn.Sequential( - nn.SiLU(), - linear( - emb_channels, - 2 * self.out_channels if use_scale_shift_norm else self.out_channels, - ), + nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), ) self.out_layers = nn.Sequential( normalization(self.out_channels), nn.SiLU(), nn.Dropout(p=dropout), - zero_module( - conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1) - ), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), ) if self.out_channels == channels: self.skip_connection = nn.Identity() elif use_conv: - self.skip_connection = conv_nd( - dims, channels, self.out_channels, 3, padding=1 - ) + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) else: self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) @@ -273,9 +256,7 @@ def forward(self, x, emb): :param emb: an [N x emb_channels] Tensor of timestep embeddings. :return: an [N x C x ...] Tensor of outputs. """ - return checkpoint( - self._forward, (x, emb), self.parameters(), self.use_checkpoint - ) + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) def _forward(self, x, emb): if self.updown: @@ -308,12 +289,7 @@ class AttentionBlock(nn.Module): """ def __init__( - self, - channels, - num_heads=1, - num_head_channels=-1, - use_checkpoint=False, - use_new_attention_order=False, + self, channels, num_heads=1, num_head_channels=-1, use_checkpoint=False, use_new_attention_order=False, ): super().__init__() self.channels = channels @@ -321,7 +297,7 @@ def __init__( self.num_heads = num_heads else: assert ( - channels % num_head_channels == 0 + channels % num_head_channels == 0 ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" self.num_heads = channels // num_head_channels self.use_checkpoint = use_checkpoint @@ -337,8 +313,9 @@ def __init__( self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) def forward(self, x): - return checkpoint(self._forward, (x,), self.parameters(), - True) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! + return checkpoint( + self._forward, (x,), self.parameters(), True + ) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! # return pt_checkpoint(self._forward, x) # pytorch def _forward(self, x): @@ -390,9 +367,7 @@ def forward(self, qkv): ch = width // (3 * self.n_heads) q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) scale = 1 / math.sqrt(math.sqrt(ch)) - weight = th.einsum( - "bct,bcs->bts", q * scale, k * scale - ) # More stable with f16 than dividing afterwards + weight = th.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) a = th.einsum("bts,bcs->bct", weight, v) return a.reshape(bs, -1, length) @@ -467,43 +442,48 @@ class UNetModel(nn.Module): """ def __init__( - self, - image_size, - in_channels, - model_channels, - out_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - num_classes=None, - use_checkpoint=False, - use_fp16=False, - num_heads=-1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - use_spatial_transformer=False, # custom transformer support - transformer_depth=1, # custom transformer support - context_dim=None, # custom transformer support - n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model - legacy=True, - from_pretrained: str = None, - from_NeMo=False, - # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF - use_flash_attention: bool = False, + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + from_pretrained: str = None, + from_NeMo=False, + # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF + use_flash_attention: bool = False, ): super().__init__() if use_spatial_transformer: - assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + assert ( + context_dim is not None + ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' if context_dim is not None: - assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + assert ( + use_spatial_transformer + ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' from omegaconf.listconfig import ListConfig + if type(context_dim) == ListConfig: context_dim = list(context_dim) @@ -534,20 +514,14 @@ def __init__( self.predict_codebook_ids = n_embed is not None time_embed_dim = model_channels * 4 self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), - nn.SiLU(), - linear(time_embed_dim, time_embed_dim), + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), ) if self.num_classes is not None: self.label_emb = nn.Embedding(num_classes, time_embed_dim) self.input_blocks = nn.ModuleList( - [ - TimestepEmbedSequential( - conv_nd(dims, in_channels, model_channels, 3, padding=1) - ) - ] + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] ) self._feature_size = model_channels input_block_chans = [model_channels] @@ -583,8 +557,12 @@ def __init__( num_heads=num_heads, num_head_channels=dim_head, use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( - ch, num_heads, dim_head, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, depth=transformer_depth, context_dim=context_dim, use_checkpoint=use_checkpoint, @@ -609,9 +587,7 @@ def __init__( down=True, ) if resblock_updown - else Downsample( - ch, conv_resample, dims=dims, out_channels=out_ch - ) + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) ) ) ch = out_ch @@ -642,8 +618,12 @@ def __init__( num_heads=num_heads, num_head_channels=dim_head, use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( - ch, num_heads, dim_head, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, depth=transformer_depth, context_dim=context_dim, use_checkpoint=use_checkpoint, @@ -692,8 +672,12 @@ def __init__( num_heads=num_heads_upsample, num_head_channels=dim_head, use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( - ch, num_heads, dim_head, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, depth=transformer_depth, context_dim=context_dim, use_checkpoint=use_checkpoint, @@ -721,9 +705,7 @@ def __init__( self._feature_size += ch self.out = nn.Sequential( - normalization(ch), - nn.SiLU(), - zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), + normalization(ch), nn.SiLU(), zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), ) if self.predict_codebook_ids: self.id_predictor = nn.Sequential( @@ -732,6 +714,7 @@ def __init__( # nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits ) from diffusers.modeling_utils import load_state_dict + if from_pretrained is not None: if from_NeMo: state_dict = torch.load(from_pretrained, map_location='cpu') @@ -747,12 +730,15 @@ def _input_blocks_mapping(self, input_dict): if "resnets" in key_: id_1 = int(key_[23]) target_id = 3 * id_0 + 1 + id_1 - post_fix = key_[25:].replace('time_emb_proj', 'emb_layers.1') \ - .replace('norm1', 'in_layers.0') \ - .replace('norm2', 'out_layers.0') \ - .replace('conv1', 'in_layers.2') \ - .replace('conv2', 'out_layers.3') \ + post_fix = ( + key_[25:] + .replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') .replace('conv_shortcut', 'skip_connection') + ) res_dict["input_blocks." + str(target_id) + '.0.' + post_fix] = value_ elif "attentions" in key_: id_1 = int(key_[26]) @@ -769,14 +755,16 @@ def _mid_blocks_mapping(self, mid_dict): res_dict = {} for key_, value_ in mid_dict.items(): if "resnets" in key_: - temp_key_ = key_.replace('time_emb_proj', 'emb_layers.1') \ - .replace('norm1', 'in_layers.0') \ - .replace('norm2', 'out_layers.0') \ - .replace('conv1', 'in_layers.2') \ - .replace('conv2', 'out_layers.3') \ - .replace('conv_shortcut', 'skip_connection') \ - .replace('middle_block.resnets.0', 'middle_block.0') \ + temp_key_ = ( + key_.replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') + .replace('conv_shortcut', 'skip_connection') + .replace('middle_block.resnets.0', 'middle_block.0') .replace('middle_block.resnets.1', 'middle_block.2') + ) res_dict[temp_key_] = value_ elif "attentions" in key_: res_dict[key_.replace('attentions.0', '1')] = value_ @@ -785,11 +773,13 @@ def _mid_blocks_mapping(self, mid_dict): def _other_blocks_mapping(self, other_dict): res_dict = {} for key_, value_ in other_dict.items(): - tmp_key = key_.replace('conv_in', 'input_blocks.0.0') \ - .replace('time_embedding.linear_1', 'time_embed.0') \ - .replace('time_embedding.linear_2', 'time_embed.2') \ - .replace('conv_norm_out', 'out.0') \ + tmp_key = ( + key_.replace('conv_in', 'input_blocks.0.0') + .replace('time_embedding.linear_1', 'time_embed.0') + .replace('time_embedding.linear_2', 'time_embed.2') + .replace('conv_norm_out', 'out.0') .replace('conv_out', 'out.2') + ) res_dict[tmp_key] = value_ return res_dict @@ -800,12 +790,15 @@ def _output_blocks_mapping(self, output_dict): if "resnets" in key_: id_1 = int(key_[24]) target_id = 3 * id_0 + id_1 - post_fix = key_[26:].replace('time_emb_proj', 'emb_layers.1') \ - .replace('norm1', 'in_layers.0') \ - .replace('norm2', 'out_layers.0') \ - .replace('conv1', 'in_layers.2') \ - .replace('conv2', 'out_layers.3') \ + post_fix = ( + key_[26:] + .replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') .replace('conv_shortcut', 'skip_connection') + ) res_dict["output_blocks." + str(target_id) + '.0.' + post_fix] = value_ elif "attentions" in key_: id_1 = int(key_[27]) @@ -821,6 +814,7 @@ def _output_blocks_mapping(self, output_dict): def _state_key_mapping(self, state_dict: dict): import re + res_dict = {} input_dict = {} mid_dict = {} @@ -867,10 +861,7 @@ def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from unexpected_keys = list(set(loaded_keys) - set(expected_keys)) def _find_mismatched_keys( - state_dict, - model_state_dict, - loaded_keys, - ignore_mismatched_sizes, + state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, ): mismatched_keys = [] if ignore_mismatched_sizes: @@ -878,8 +869,8 @@ def _find_mismatched_keys( model_key = checkpoint_key if ( - model_key in model_state_dict - and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape ): mismatched_keys.append( (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) @@ -890,10 +881,7 @@ def _find_mismatched_keys( if state_dict is not None: # Whole checkpoint mismatched_keys = _find_mismatched_keys( - state_dict, - model_state_dict, - original_loaded_keys, - ignore_mismatched_sizes, + state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, ) error_msgs = self._load_state_dict_into_model(state_dict) return missing_keys, unexpected_keys, mismatched_keys, error_msgs @@ -958,7 +946,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): :return: an [N x C x ...] Tensor of outputs. """ assert (y is not None) == ( - self.num_classes is not None + self.num_classes is not None ), "must specify y if and only if the model is class-conditional" hs = [] t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) @@ -994,28 +982,28 @@ class EncoderUNetModel(nn.Module): """ def __init__( - self, - image_size, - in_channels, - model_channels, - out_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - use_checkpoint=False, - use_fp16=False, - num_heads=1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - pool="adaptive", - *args, - **kwargs + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + *args, + **kwargs, ): super().__init__() if num_heads_upsample == -1: @@ -1037,17 +1025,11 @@ def __init__( time_embed_dim = model_channels * 4 self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), - nn.SiLU(), - linear(time_embed_dim, time_embed_dim), + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), ) self.input_blocks = nn.ModuleList( - [ - TimestepEmbedSequential( - conv_nd(dims, in_channels, model_channels, 3, padding=1) - ) - ] + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] ) self._feature_size = model_channels input_block_chans = [model_channels] @@ -1095,9 +1077,7 @@ def __init__( down=True, ) if resblock_updown - else Downsample( - ch, conv_resample, dims=dims, out_channels=out_ch - ) + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) ) ) ch = out_ch @@ -1143,17 +1123,11 @@ def __init__( elif pool == "attention": assert num_head_channels != -1 self.out = nn.Sequential( - normalization(ch), - nn.SiLU(), - AttentionPool2d( - (image_size // ds), ch, num_head_channels, out_channels - ), + normalization(ch), nn.SiLU(), AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), ) elif pool == "spatial": self.out = nn.Sequential( - nn.Linear(self._feature_size, 2048), - nn.ReLU(), - nn.Linear(2048, self.out_channels), + nn.Linear(self._feature_size, 2048), nn.ReLU(), nn.Linear(2048, self.out_channels), ) elif pool == "spatial_v2": self.out = nn.Sequential( diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index d50545106da5..2dbc4830142f 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -22,8 +22,9 @@ import math -import numpy as np import os + +import numpy as np import torch import torch.nn as nn from einops import repeat @@ -32,14 +33,10 @@ def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): if schedule == "linear": - betas = ( - torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 - ) + betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 elif schedule == "cosine": - timesteps = ( - torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s - ) + timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s alphas = timesteps / (1 + cosine_s) * np.pi / 2 alphas = torch.cos(alphas).pow(2) alphas = alphas / alphas[0] @@ -60,7 +57,7 @@ def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timestep c = num_ddpm_timesteps // num_ddim_timesteps ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) elif ddim_discr_method == 'quad': - ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * .8), num_ddim_timesteps)) ** 2).astype(int) + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) else: raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') @@ -81,8 +78,10 @@ def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) if verbose: print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') - print(f'For the chosen value of eta, which is {eta}, ' - f'this results in the following sigma_t schedule for ddim sampler {sigmas}') + print( + f'For the chosen value of eta, which is {eta}, ' + f'this results in the following sigma_t schedule for ddim sampler {sigmas}' + ) return sigmas, alphas, alphas_prev @@ -149,10 +148,7 @@ def backward(ctx, *output_grads): shallow_copies = [x.view_as(x) for x in ctx.input_tensors] output_tensors = ctx.run_function(*shallow_copies) input_grads = torch.autograd.grad( - output_tensors, - ctx.input_tensors + ctx.input_params, - output_grads, - allow_unused=True, + output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True, ) del ctx.input_tensors del ctx.input_params diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py index 096b3acf94f3..81d79ac5801a 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py @@ -51,25 +51,25 @@ def sample(self): def kl(self, other=None): if self.deterministic: - return torch.Tensor([0.]) + return torch.Tensor([0.0]) else: if other is None: - return 0.5 * torch.sum(torch.pow(self.mean, 2) - + self.var - 1.0 - self.logvar, - dim=[1, 2, 3]) + return 0.5 * torch.sum(torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, dim=[1, 2, 3]) else: return 0.5 * torch.sum( torch.pow(self.mean - other.mean, 2) / other.var - + self.var / other.var - 1.0 - self.logvar + other.logvar, - dim=[1, 2, 3]) + + self.var / other.var + - 1.0 + - self.logvar + + other.logvar, + dim=[1, 2, 3], + ) def nll(self, sample, dims=[1, 2, 3]): if self.deterministic: - return torch.Tensor([0.]) + return torch.Tensor([0.0]) logtwopi = np.log(2.0 * np.pi) - return 0.5 * torch.sum( - logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, - dim=dims) + return 0.5 * torch.sum(logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, dim=dims) def mode(self): return self.mean @@ -91,15 +91,8 @@ def normal_kl(mean1, logvar1, mean2, logvar2): # Force variances to be Tensors. Broadcasting helps convert scalars to # Tensors, but it does not work for torch.exp(). - logvar1, logvar2 = [ - x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) - for x in (logvar1, logvar2) - ] + logvar1, logvar2 = [x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) for x in (logvar1, logvar2)] return 0.5 * ( - -1.0 - + logvar2 - - logvar1 - + torch.exp(logvar1 - logvar2) - + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) + -1.0 + logvar2 - logvar1 + torch.exp(logvar1 - logvar2) + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 10d9a03362f4..d23bb20de392 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -11,16 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import partial + import kornia import torch import torch.nn as nn from einops import rearrange, repeat -from functools import partial -from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextConfig +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer from transformers.models.clip.modeling_clip import CLIPTextTransformer -from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder, \ - TransformerWrapper # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( + TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test +) +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder class AbstractEncoder(nn.Module): @@ -52,8 +55,9 @@ class TransformerEmbedder(AbstractEncoder): def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"): super().__init__() self.device = device - self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len, - attn_layers=Encoder(dim=n_embed, depth=n_layer)) + self.transformer = TransformerWrapper( + num_tokens=vocab_size, max_seq_len=max_seq_len, attn_layers=Encoder(dim=n_embed, depth=n_layer) + ) def forward(self, tokens): tokens = tokens.to(self.device) # meh @@ -70,14 +74,22 @@ class BERTTokenizer(AbstractEncoder): def __init__(self, device="cuda", vq_interface=True, max_length=77): super().__init__() from transformers import BertTokenizerFast # TODO: add to reuquirements + self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") self.device = device self.vq_interface = vq_interface self.max_length = max_length def forward(self, text): - batch_encoding = self.tokenizer(text, truncation=True, max_length=self.max_length, return_length=True, - return_overflowing_tokens=False, padding="max_length", return_tensors="pt") + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) tokens = batch_encoding["input_ids"].to(self.device) return tokens @@ -95,16 +107,27 @@ def decode(self, text): class BERTEmbedder(AbstractEncoder): """Uses the BERT tokenizr model and add some transformer encoder layers""" - def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77, - device="cuda", use_tokenizer=True, embedding_dropout=0.0): + def __init__( + self, + n_embed, + n_layer, + vocab_size=30522, + max_seq_len=77, + device="cuda", + use_tokenizer=True, + embedding_dropout=0.0, + ): super().__init__() self.use_tknz_fn = use_tokenizer if self.use_tknz_fn: self.tknz_fn = BERTTokenizer(vq_interface=False, max_length=max_seq_len) self.device = device - self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len, - attn_layers=Encoder(dim=n_embed, depth=n_layer), - emb_dropout=embedding_dropout) + self.transformer = TransformerWrapper( + num_tokens=vocab_size, + max_seq_len=max_seq_len, + attn_layers=Encoder(dim=n_embed, depth=n_layer), + emb_dropout=embedding_dropout, + ) def forward(self, text): if self.use_tknz_fn: @@ -120,13 +143,7 @@ def encode(self, text): class SpatialRescaler(nn.Module): - def __init__(self, - n_stages=1, - method='bilinear', - multiplier=0.5, - in_channels=3, - out_channels=None, - bias=False): + def __init__(self, n_stages=1, method='bilinear', multiplier=0.5, in_channels=3, out_channels=None, bias=False): super().__init__() self.n_stages = n_stages assert self.n_stages >= 0 @@ -189,8 +206,15 @@ def freeze(self): param.requires_grad = False def forward(self, text): - batch_encoding = self.tokenizer(text, truncation=True, max_length=self.max_length, return_length=True, - return_overflowing_tokens=False, padding="max_length", return_tensors="pt") + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) outputs = self.transformer(input_ids=tokens) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py index 69fa52e2b352..ff47596f9932 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py @@ -12,27 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. """shout-out to https://github.com/lucidrains/x-transformers/tree/main/x_transformers""" -import torch -import torch.nn.functional as F from collections import namedtuple -from einops import rearrange, repeat, reduce from functools import partial from inspect import isfunction -from torch import nn, einsum + +import torch +import torch.nn.functional as F +from einops import rearrange, reduce, repeat +from torch import einsum, nn # constants DEFAULT_DIM_HEAD = 64 -Intermediates = namedtuple('Intermediates', [ - 'pre_softmax_attn', - 'post_softmax_attn' -]) +Intermediates = namedtuple('Intermediates', ['pre_softmax_attn', 'post_softmax_attn']) -LayerIntermediates = namedtuple('Intermediates', [ - 'hiddens', - 'attn_intermediates' -]) +LayerIntermediates = namedtuple('Intermediates', ['hiddens', 'attn_intermediates']) class AbsolutePositionalEmbedding(nn.Module): @@ -52,7 +47,7 @@ def forward(self, x): class FixedPositionalEmbedding(nn.Module): def __init__(self, dim): super().__init__() - inv_freq = 1. / (10000 ** (torch.arange(0, dim, 2).float() / dim)) + inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) self.register_buffer('inv_freq', inv_freq) def forward(self, x, seq_dim=1, offset=0): @@ -64,6 +59,7 @@ def forward(self, x, seq_dim=1, offset=0): # helpers + def exists(val): return val is not None @@ -101,6 +97,7 @@ def max_neg_value(tensor): # keyword argument helpers + def pick_and_pop(keys, d): values = list(map(lambda key: d.pop(key), keys)) return dict(zip(keys, values)) @@ -125,7 +122,7 @@ def group_by_key_prefix(prefix, d): def groupby_prefix_and_trim(prefix, d): kwargs_with_prefix, kwargs = group_dict_by_key(partial(string_begins_with, prefix), d) - kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix):], x[1]), tuple(kwargs_with_prefix.items()))) + kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix) :], x[1]), tuple(kwargs_with_prefix.items()))) return kwargs_without_prefix, kwargs @@ -187,16 +184,14 @@ def __init__(self, dim): self.gru = nn.GRUCell(dim, dim) def forward(self, x, residual): - gated_output = self.gru( - rearrange(x, 'b n d -> (b n) d'), - rearrange(residual, 'b n d -> (b n) d') - ) + gated_output = self.gru(rearrange(x, 'b n d -> (b n) d'), rearrange(residual, 'b n d -> (b n) d')) return gated_output.reshape_as(x) # feedforward + class GEGLU(nn.Module): def __init__(self, dim_in, dim_out): super().__init__() @@ -208,20 +203,13 @@ def forward(self, x): class FeedForward(nn.Module): - def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): super().__init__() inner_dim = int(dim * mult) dim_out = default(dim_out, dim) - project_in = nn.Sequential( - nn.Linear(dim, inner_dim), - nn.GELU() - ) if not glu else GEGLU(dim, inner_dim) - - self.net = nn.Sequential( - project_in, - nn.Dropout(dropout), - nn.Linear(inner_dim, dim_out) - ) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) def forward(self, x): return self.net(x) @@ -230,18 +218,18 @@ def forward(self, x): # attention. class Attention(nn.Module): def __init__( - self, - dim, - dim_head=DEFAULT_DIM_HEAD, - heads=8, - causal=False, - mask=None, - talking_heads=False, - sparse_topk=None, - use_entmax15=False, - num_mem_kv=0, - dropout=0., - on_attn=False + self, + dim, + dim_head=DEFAULT_DIM_HEAD, + heads=8, + causal=False, + mask=None, + talking_heads=False, + sparse_topk=None, + use_entmax15=False, + num_mem_kv=0, + dropout=0.0, + on_attn=False, ): super().__init__() if use_entmax15: @@ -282,15 +270,15 @@ def __init__( self.to_out = nn.Sequential(nn.Linear(inner_dim, dim * 2), nn.GLU()) if on_attn else nn.Linear(inner_dim, dim) def forward( - self, - x, - context=None, - mask=None, - context_mask=None, - rel_pos=None, - sinusoidal_emb=None, - prev_attn=None, - mem=None + self, + x, + context=None, + mask=None, + context_mask=None, + rel_pos=None, + sinusoidal_emb=None, + prev_attn=None, + mem=None, ): b, n, _, h, talking_heads, device = *x.shape, self.heads, self.talking_heads, x.device kv_input = default(context, x) @@ -375,38 +363,35 @@ def forward( out = einsum('b h i j, b h j d -> b h i d', attn, v) out = rearrange(out, 'b h n d -> b n (h d)') - intermediates = Intermediates( - pre_softmax_attn=pre_softmax_attn, - post_softmax_attn=post_softmax_attn - ) + intermediates = Intermediates(pre_softmax_attn=pre_softmax_attn, post_softmax_attn=post_softmax_attn) return self.to_out(out), intermediates class AttentionLayers(nn.Module): def __init__( - self, - dim, - depth, - heads=8, - causal=False, - cross_attend=False, - only_cross=False, - use_scalenorm=False, - use_rmsnorm=False, - use_rezero=False, - rel_pos_num_buckets=32, - rel_pos_max_distance=128, - position_infused_attn=False, - custom_layers=None, - sandwich_coef=None, - par_ratio=None, - residual_attn=False, - cross_residual_attn=False, - macaron=False, - pre_norm=True, - gate_residual=False, - **kwargs + self, + dim, + depth, + heads=8, + causal=False, + cross_attend=False, + only_cross=False, + use_scalenorm=False, + use_rmsnorm=False, + use_rezero=False, + rel_pos_num_buckets=32, + rel_pos_max_distance=128, + position_infused_attn=False, + custom_layers=None, + sandwich_coef=None, + par_ratio=None, + residual_attn=False, + cross_residual_attn=False, + macaron=False, + pre_norm=True, + gate_residual=False, + **kwargs, ): super().__init__() ff_kwargs, kwargs = groupby_prefix_and_trim('ff_', kwargs) @@ -422,7 +407,9 @@ def __init__( self.pia_pos_emb = FixedPositionalEmbedding(dim) if position_infused_attn else None self.rotary_pos_emb = always(None) - assert rel_pos_num_buckets <= rel_pos_max_distance, 'number of relative position buckets must be less than the relative position max distance' + assert ( + rel_pos_num_buckets <= rel_pos_max_distance + ), 'number of relative position buckets must be less than the relative position max distance' self.rel_pos = None self.pre_norm = pre_norm @@ -488,21 +475,9 @@ def __init__( else: residual_fn = Residual() - self.layers.append(nn.ModuleList([ - norm_fn(), - layer, - residual_fn - ])) + self.layers.append(nn.ModuleList([norm_fn(), layer, residual_fn])) - def forward( - self, - x, - context=None, - mask=None, - context_mask=None, - mems=None, - return_hiddens=False - ): + def forward(self, x, context=None, mask=None, context_mask=None, mems=None, return_hiddens=False): hiddens = [] intermediates = [] prev_attn = None @@ -523,8 +498,14 @@ def forward( x = norm(x) if layer_type == 'a': - out, inter = block(x, mask=mask, sinusoidal_emb=self.pia_pos_emb, rel_pos=self.rel_pos, - prev_attn=prev_attn, mem=layer_mem) + out, inter = block( + x, + mask=mask, + sinusoidal_emb=self.pia_pos_emb, + rel_pos=self.rel_pos, + prev_attn=prev_attn, + mem=layer_mem, + ) elif layer_type == 'c': out, inter = block(x, context=context, mask=mask, context_mask=context_mask, prev_attn=prev_cross_attn) elif layer_type == 'f': @@ -544,10 +525,7 @@ def forward( x = norm(x) if return_hiddens: - intermediates = LayerIntermediates( - hiddens=hiddens, - attn_intermediates=intermediates - ) + intermediates = LayerIntermediates(hiddens=hiddens, attn_intermediates=intermediates) return x, intermediates @@ -562,17 +540,17 @@ def __init__(self, **kwargs): class TransformerWrapper(nn.Module): def __init__( - self, - *, - num_tokens, - max_seq_len, - attn_layers, - emb_dim=None, - max_mem_len=0., - emb_dropout=0., - num_memory_tokens=None, - tie_embedding=False, - use_pos_emb=True + self, + *, + num_tokens, + max_seq_len, + attn_layers, + emb_dim=None, + max_mem_len=0.0, + emb_dropout=0.0, + num_memory_tokens=None, + tie_embedding=False, + use_pos_emb=True, ): super().__init__() assert isinstance(attn_layers, AttentionLayers), 'attention layers must be one of Encoder or Decoder' @@ -585,8 +563,11 @@ def __init__( self.num_tokens = num_tokens self.token_emb = nn.Embedding(num_tokens, emb_dim) - self.pos_emb = AbsolutePositionalEmbedding(emb_dim, max_seq_len) if ( - use_pos_emb and not attn_layers.has_pos_emb) else always(0) + self.pos_emb = ( + AbsolutePositionalEmbedding(emb_dim, max_seq_len) + if (use_pos_emb and not attn_layers.has_pos_emb) + else always(0) + ) self.emb_dropout = nn.Dropout(emb_dropout) self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity() @@ -611,14 +592,7 @@ def init_(self): nn.init.normal_(self.token_emb.weight, std=0.02) def forward( - self, - x, - return_embeddings=False, - mask=None, - return_mems=False, - return_attn=False, - mems=None, - **kwargs + self, x, return_embeddings=False, mask=None, return_mems=False, return_attn=False, mems=None, **kwargs ): b, n, device, num_mem = *x.shape, x.device, self.num_memory_tokens x = self.token_emb(x) @@ -645,7 +619,7 @@ def forward( if return_mems: hiddens = intermediates.hiddens new_mems = list(map(lambda pair: torch.cat(pair, dim=-2), zip(mems, hiddens))) if exists(mems) else hiddens - new_mems = list(map(lambda t: t[..., -self.max_mem_len:, :].detach(), new_mems)) + new_mems = list(map(lambda t: t[..., -self.max_mem_len :, :].detach(), new_mems)) return out, new_mems if return_attn: diff --git a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py index e72249b1ff0e..620d1dcad41a 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py @@ -25,12 +25,13 @@ def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, ver self.lr_min = lr_min self.lr_max = lr_max self.lr_max_decay_steps = max_decay_steps - self.last_lr = 0. + self.last_lr = 0.0 self.verbosity_interval = verbosity_interval def schedule(self, n, **kwargs): if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") if n < self.lr_warm_up_steps: lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start self.last_lr = lr @@ -38,8 +39,7 @@ def schedule(self, n, **kwargs): else: t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) t = min(t, 1.0) - lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * ( - 1 + np.cos(t * np.pi)) + lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (1 + np.cos(t * np.pi)) self.last_lr = lr return lr @@ -61,7 +61,7 @@ def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosit self.f_max = f_max self.cycle_lengths = cycle_lengths self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths)) - self.last_f = 0. + self.last_f = 0.0 self.verbosity_interval = verbosity_interval def find_in_interval(self, n): @@ -75,8 +75,8 @@ def schedule(self, n, **kwargs): cycle = self.find_in_interval(n) n = n - self.cum_cycles[cycle] if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " - f"current cycle {cycle}") + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") if n < self.lr_warm_up_steps[cycle]: f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] self.last_f = f @@ -84,8 +84,7 @@ def schedule(self, n, **kwargs): else: t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]) t = min(t, 1.0) - f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * ( - 1 + np.cos(t * np.pi)) + f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (1 + np.cos(t * np.pi)) self.last_f = f return f @@ -94,13 +93,12 @@ def __call__(self, n, **kwargs): class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): - def schedule(self, n, **kwargs): cycle = self.find_in_interval(n) n = n - self.cum_cycles[cycle] if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " - f"current cycle {cycle}") + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") if n < self.lr_warm_up_steps[cycle]: f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] @@ -108,6 +106,7 @@ def schedule(self, n, **kwargs): return f else: f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / ( - self.cycle_lengths[cycle]) + self.cycle_lengths[cycle] + ) self.last_f = f return f diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 72d9bff4bacd..1c82e58543bf 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -14,26 +14,23 @@ import os import pickle import time + import torch -from PIL import Image from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): c = cond_stage_model.encode(batch_size * [prompt]) - if unconditional_guidance_scale != 1.: + if unconditional_guidance_scale != 1.0: uc = cond_stage_model.encode(batch_size * [""]) else: uc = None @@ -53,7 +50,7 @@ def initialize_sampler(model, sampler_type): def decode_images(model, samples): images = model.decode_first_stage(samples) - images = torch.clamp((images + 1.) / 2., min=0., max=1.) + images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) return images @@ -100,8 +97,9 @@ def pipeline(model, cfg, verbose=True, rng=None): else: raise ValueError('precision must be in [32, 16, "bf16"]') - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): in_channels = model.model.diffusion_model.in_channels @@ -122,9 +120,8 @@ def pipeline(model, cfg, verbose=True, rng=None): latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] latents = torch.randn( - [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], - generator=rng).to( - torch.cuda.current_device()) + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng + ).to(torch.cuda.current_device()) tic = time.perf_counter() samples, intermediates = sampler.sample( @@ -136,7 +133,7 @@ def pipeline(model, cfg, verbose=True, rng=None): unconditional_guidance_scale=unconditional_guidance_scale, unconditional_conditioning=u_cond, eta=eta, - x_T=latents + x_T=latents, ) toc = time.perf_counter() sampling_time = toc - tic @@ -150,13 +147,15 @@ def pipeline(model, cfg, verbose=True, rng=None): total_time = toc_total - tic_total output.append(images) - throughput.append({ - 'text-conditioning-time': conditioning_time, - 'sampling-time': sampling_time, - 'decode-time': decode_time, - 'total-time': total_time, - 'sampling-steps': inference_steps, - }) + throughput.append( + { + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + } + ) # Convert output type and save to disk if output_type == 'torch': diff --git a/nemo/collections/multimodal/parts/stable_diffusion/utils.py b/nemo/collections/multimodal/parts/stable_diffusion/utils.py index 5c204f0af1fc..cc285038bc45 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/utils.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/utils.py @@ -13,16 +13,17 @@ # limitations under the License. import importlib import multiprocessing as mp -import numpy as np -import torch -from PIL import Image, ImageDraw, ImageFont from collections import abc -from einops import rearrange from functools import partial from inspect import isfunction from queue import Queue from threading import Thread +import numpy as np +import torch +from einops import rearrange +from PIL import Image, ImageDraw, ImageFont + def log_txt_as_img(wh, xc, size=10): # wh a tuple of (width, height) @@ -34,7 +35,7 @@ def log_txt_as_img(wh, xc, size=10): draw = ImageDraw.Draw(txt) font = ImageFont.truetype('data/DejaVuSans.ttf', size=size) nc = int(40 * (wh[0] / 256)) - lines = "\n".join(xc[bi][start:start + nc] for start in range(0, len(xc[bi]), nc)) + lines = "\n".join(xc[bi][start : start + nc] for start in range(0, len(xc[bi]), nc)) try: draw.text((0, 0), lines, fill="black", font=font) @@ -117,7 +118,7 @@ def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False): def parallel_data_prefetch( - func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False + func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False ): # if target_data_type not in ["ndarray", "list"]: # raise ValueError( @@ -148,21 +149,12 @@ def parallel_data_prefetch( proc = Thread # spawn processes if target_data_type == "ndarray": - arguments = [ - [func, Q, part, i, use_worker_id] - for i, part in enumerate(np.array_split(data, n_proc)) - ] + arguments = [[func, Q, part, i, use_worker_id] for i, part in enumerate(np.array_split(data, n_proc))] else: - step = ( - int(len(data) / n_proc + 1) - if len(data) % n_proc != 0 - else int(len(data) / n_proc) - ) + step = int(len(data) / n_proc + 1) if len(data) % n_proc != 0 else int(len(data) / n_proc) arguments = [ [func, Q, part, i, use_worker_id] - for i, part in enumerate( - [data[i: i + step] for i in range(0, len(data), step)] - ) + for i, part in enumerate([data[i : i + step] for i in range(0, len(data), step)]) ] processes = [] for i in range(n_proc): diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 346ef5571768..686f92c5987b 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +from typing import Any, Callable, Dict, Tuple + import torch +from omegaconf import DictConfig, OmegaConf, open_dict from PIL import Image - -from omegaconf import OmegaConf, DictConfig, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from typing import Dict, Tuple, Any, Callable -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed @@ -36,6 +33,7 @@ HAVE_MEGATRON_CORE = False + def numpy_to_pil(images): """ Convert a numpy image or a batch of images to a PIL image. @@ -53,9 +51,7 @@ def randn_like(x, generator=None): def setup_trainer_and_model_for_inference( - model_provider: Any, - cfg: DictConfig, - model_cfg_modifier: Callable, + model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ) -> Tuple[Trainer, Any]: """ Set up a trainer and NeMo model for inference. @@ -78,10 +74,7 @@ def setup_trainer_and_model_for_inference( # Use the NLPDDPStrategy for the distributed data parallel strategy. # We don't use DDP for async grad allreduce and don't find unused parameters. - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, - find_unused_parameters=False, - ) + strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) # Set up the trainer with the specified plugins and strategy. trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) @@ -120,12 +113,11 @@ def setup_trainer_and_model_for_inference( elif cfg.model.restore_from_path.endswith(".ckpt"): logging.warning( - "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!") + "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" + ) model = model_provider.load_from_checkpoint( - cfg.model.restore_from_path, - hparams_file=cfg.model.get("hparams_file"), - trainer=trainer, + cfg.model.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, ) else: diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index 5b91a10eb264..d92e5306042d 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -673,7 +673,7 @@ def __init__( super(CoreAttention, self).__init__() self.precision = precision - self.fp16 = (precision == 16 or precision == '16') + self.fp16 = precision == 16 or precision == '16' self.bf16 = precision == 'bf16' self.multi_query_attention = multi_query_attention diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 391760ac54cb..805ce5cfb7ac 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -358,7 +358,10 @@ def modify_state_dict(self, conf, state_dict): state_dict = new_state_dict # Modify state key for Dreambooth inference - if conf.get('target') == 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion': + if ( + conf.get('target') + == 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + ): new_state_dict = {} for key in state_dict.keys(): new_key = key.replace('unet', 'model.diffusion_model') @@ -368,7 +371,6 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict - return state_dict def restore_from( diff --git a/nemo/collections/vision/data/imagenet_classnames.py b/nemo/collections/vision/data/imagenet_classnames.py index 2b15b544df0f..4dcd3e595923 100644 --- a/nemo/collections/vision/data/imagenet_classnames.py +++ b/nemo/collections/vision/data/imagenet_classnames.py @@ -12,168 +12,1005 @@ # See the License for the specific language governing permissions and # limitations under the License. -imagenet_classnames = ["tench", "goldfish", "great white shark", "tiger shark", "hammerhead shark", "electric ray", - "stingray", "rooster", "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", - "indigo bunting", "American robin", "bulbul", "jay", "magpie", "chickadee", "American dipper", - "kite (bird of prey)", "bald eagle", "vulture", "great grey owl", "fire salamander", - "smooth newt", "newt", "spotted salamander", "axolotl", "American bullfrog", "tree frog", - "tailed frog", "loggerhead sea turtle", "leatherback sea turtle", "mud turtle", "terrapin", - "box turtle", "banded gecko", "green iguana", "Carolina anole", - "desert grassland whiptail lizard", "agama", "frilled-necked lizard", "alligator lizard", - "Gila monster", "European green lizard", "chameleon", "Komodo dragon", "Nile crocodile", - "American alligator", "triceratops", "worm snake", "ring-necked snake", - "eastern hog-nosed snake", "smooth green snake", "kingsnake", "garter snake", "water snake", - "vine snake", "night snake", "boa constrictor", "African rock python", "Indian cobra", - "green mamba", "sea snake", "Saharan horned viper", "eastern diamondback rattlesnake", - "sidewinder rattlesnake", "trilobite", "harvestman", "scorpion", "yellow garden spider", - "barn spider", "European garden spider", "southern black widow", "tarantula", "wolf spider", - "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse", "prairie grouse", "peafowl", - "quail", "partridge", "african grey parrot", "macaw", "sulphur-crested cockatoo", "lorikeet", - "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "duck", - "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", - "koala", "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", - "snail", "slug", "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", - "fiddler crab", "red king crab", "American lobster", "spiny lobster", "crayfish", "hermit crab", - "isopod", "white stork", "black stork", "spoonbill", "flamingo", "little blue heron", - "great egret", "bittern bird", "crane bird", "limpkin", "common gallinule", "American coot", - "bustard", "ruddy turnstone", "dunlin", "common redshank", "dowitcher", "oystercatcher", - "pelican", "king penguin", "albatross", "grey whale", "killer whale", "dugong", "sea lion", - "Chihuahua", "Japanese Chin", "Maltese", "Pekingese", "Shih Tzu", "King Charles Spaniel", - "Papillon", "toy terrier", "Rhodesian Ridgeback", "Afghan Hound", "Basset Hound", "Beagle", - "Bloodhound", "Bluetick Coonhound", "Black and Tan Coonhound", "Treeing Walker Coonhound", - "English foxhound", "Redbone Coonhound", "borzoi", "Irish Wolfhound", "Italian Greyhound", - "Whippet", "Ibizan Hound", "Norwegian Elkhound", "Otterhound", "Saluki", "Scottish Deerhound", - "Weimaraner", "Staffordshire Bull Terrier", "American Staffordshire Terrier", - "Bedlington Terrier", "Border Terrier", "Kerry Blue Terrier", "Irish Terrier", - "Norfolk Terrier", "Norwich Terrier", "Yorkshire Terrier", "Wire Fox Terrier", - "Lakeland Terrier", "Sealyham Terrier", "Airedale Terrier", "Cairn Terrier", - "Australian Terrier", "Dandie Dinmont Terrier", "Boston Terrier", "Miniature Schnauzer", - "Giant Schnauzer", "Standard Schnauzer", "Scottish Terrier", "Tibetan Terrier", - "Australian Silky Terrier", "Soft-coated Wheaten Terrier", "West Highland White Terrier", - "Lhasa Apso", "Flat-Coated Retriever", "Curly-coated Retriever", "Golden Retriever", - "Labrador Retriever", "Chesapeake Bay Retriever", "German Shorthaired Pointer", "Vizsla", - "English Setter", "Irish Setter", "Gordon Setter", "Brittany dog", "Clumber Spaniel", - "English Springer Spaniel", "Welsh Springer Spaniel", "Cocker Spaniel", "Sussex Spaniel", - "Irish Water Spaniel", "Kuvasz", "Schipperke", "Groenendael dog", "Malinois", "Briard", - "Australian Kelpie", "Komondor", "Old English Sheepdog", "Shetland Sheepdog", "collie", - "Border Collie", "Bouvier des Flandres dog", "Rottweiler", "German Shepherd Dog", "Dobermann", - "Miniature Pinscher", "Greater Swiss Mountain Dog", "Bernese Mountain Dog", - "Appenzeller Sennenhund", "Entlebucher Sennenhund", "Boxer", "Bullmastiff", "Tibetan Mastiff", - "French Bulldog", "Great Dane", "St. Bernard", "husky", "Alaskan Malamute", "Siberian Husky", - "Dalmatian", "Affenpinscher", "Basenji", "pug", "Leonberger", "Newfoundland dog", - "Great Pyrenees dog", "Samoyed", "Pomeranian", "Chow Chow", "Keeshond", "brussels griffon", - "Pembroke Welsh Corgi", "Cardigan Welsh Corgi", "Toy Poodle", "Miniature Poodle", - "Standard Poodle", "Mexican hairless dog (xoloitzcuintli)", "grey wolf", "Alaskan tundra wolf", - "red wolf or maned wolf", "coyote", "dingo", "dhole", "African wild dog", "hyena", "red fox", - "kit fox", "Arctic fox", "grey fox", "tabby cat", "tiger cat", "Persian cat", "Siamese cat", - "Egyptian Mau", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", - "cheetah", "brown bear", "American black bear", "polar bear", "sloth bear", "mongoose", - "meerkat", "tiger beetle", "ladybug", "ground beetle", "longhorn beetle", "leaf beetle", - "dung beetle", "rhinoceros beetle", "weevil", "fly", "bee", "ant", "grasshopper", - "cricket insect", "stick insect", "cockroach", "praying mantis", "cicada", "leafhopper", - "lacewing", "dragonfly", "damselfly", "red admiral butterfly", "ringlet butterfly", - "monarch butterfly", "small white butterfly", "sulphur butterfly", "gossamer-winged butterfly", - "starfish", "sea urchin", "sea cucumber", "cottontail rabbit", "hare", "Angora rabbit", - "hamster", "porcupine", "fox squirrel", "marmot", "beaver", "guinea pig", "common sorrel horse", - "zebra", "pig", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo", "bison", - "ram (adult male sheep)", "bighorn sheep", "Alpine ibex", "hartebeest", "impala (antelope)", - "gazelle", "arabian camel", "llama", "weasel", "mink", "European polecat", - "black-footed ferret", "otter", "skunk", "badger", "armadillo", "three-toed sloth", "orangutan", - "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas monkey", "baboon", "macaque", - "langur", "black-and-white colobus", "proboscis monkey", "marmoset", "white-headed capuchin", - "howler monkey", "titi monkey", "Geoffroy's spider monkey", "common squirrel monkey", - "ring-tailed lemur", "indri", "Asian elephant", "African bush elephant", "red panda", - "giant panda", "snoek fish", "eel", "silver salmon", "rock beauty fish", "clownfish", - "sturgeon", "gar fish", "lionfish", "pufferfish", "abacus", "abaya", "academic gown", - "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance", - "amphibious vehicle", "analog clock", "apiary", "apron", "trash can", "assault rifle", - "backpack", "bakery", "balance beam", "balloon", "ballpoint pen", "Band-Aid", "banjo", - "baluster / handrail", "barbell", "barber chair", "barbershop", "barn", "barometer", "barrel", - "wheelbarrow", "baseball", "basketball", "bassinet", "bassoon", "swimming cap", "bath towel", - "bathtub", "station wagon", "lighthouse", "beaker", "military hat (bearskin or shako)", - "beer bottle", "beer glass", "bell tower", "baby bib", "tandem bicycle", "bikini", - "ring binder", "binoculars", "birdhouse", "boathouse", "bobsleigh", "bolo tie", "poke bonnet", - "bookcase", "bookstore", "bottle cap", "hunting bow", "bow tie", "brass memorial plaque", "bra", - "breakwater", "breastplate", "broom", "bucket", "buckle", "bulletproof vest", - "high-speed train", "butcher shop", "taxicab", "cauldron", "candle", "cannon", "canoe", - "can opener", "cardigan", "car mirror", "carousel", "tool kit", "cardboard box / carton", - "car wheel", "automated teller machine", "cassette", "cassette player", "castle", "catamaran", - "CD player", "cello", "mobile phone", "chain", "chain-link fence", "chain mail", "chainsaw", - "storage chest", "chiffonier", "bell or wind chime", "china cabinet", "Christmas stocking", - "church", "movie theater", "cleaver", "cliff dwelling", "cloak", "clogs", "cocktail shaker", - "coffee mug", "coffeemaker", "spiral or coil", "combination lock", "computer keyboard", - "candy store", "container ship", "convertible", "corkscrew", "cornet", "cowboy boot", - "cowboy hat", "cradle", "construction crane", "crash helmet", "crate", "infant bed", - "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", - "rotary dial telephone", "diaper", "digital clock", "digital watch", "dining table", - "dishcloth", "dishwasher", "disc brake", "dock", "dog sled", "dome", "doormat", "drilling rig", - "drum", "drumstick", "dumbbell", "Dutch oven", "electric fan", "electric guitar", - "electric locomotive", "entertainment center", "envelope", "espresso machine", "face powder", - "feather boa", "filing cabinet", "fireboat", "fire truck", "fire screen", "flagpole", "flute", - "folding chair", "football helmet", "forklift", "fountain", "fountain pen", "four-poster bed", - "freight car", "French horn", "frying pan", "fur coat", "garbage truck", - "gas mask or respirator", "gas pump", "goblet", "go-kart", "golf ball", "golf cart", "gondola", - "gong", "gown", "grand piano", "greenhouse", "radiator grille", "grocery store", "guillotine", - "hair clip", "hair spray", "half-track", "hammer", "hamper", "hair dryer", "hand-held computer", - "handkerchief", "hard disk drive", "harmonica", "harp", "combine harvester", "hatchet", - "holster", "home theater", "honeycomb", "hook", "hoop skirt", "gymnastic horizontal bar", - "horse-drawn vehicle", "hourglass", "iPod", "clothes iron", "carved pumpkin", "jeans", "jeep", - "T-shirt", "jigsaw puzzle", "rickshaw", "joystick", "kimono", "knee pad", "knot", "lab coat", - "ladle", "lampshade", "laptop computer", "lawn mower", "lens cap", "letter opener", "library", - "lifeboat", "lighter", "limousine", "ocean liner", "lipstick", "slip-on shoe", "lotion", - "music speaker", "loupe magnifying glass", "sawmill", "magnetic compass", "messenger bag", - "mailbox", "tights", "one-piece bathing suit", "manhole cover", "maraca", "marimba", "mask", - "matchstick", "maypole", "maze", "measuring cup", "medicine cabinet", "megalith", "microphone", - "microwave oven", "military uniform", "milk can", "minibus", "miniskirt", "minivan", "missile", - "mitten", "mixing bowl", "mobile home", "ford model t", "modem", "monastery", "monitor", - "moped", "mortar and pestle", "graduation cap", "mosque", "mosquito net", "vespa", - "mountain bike", "tent", "computer mouse", "mousetrap", "moving van", "muzzle", "metal nail", - "neck brace", "necklace", "baby pacifier", "notebook computer", "obelisk", "oboe", "ocarina", - "odometer", "oil filter", "pipe organ", "oscilloscope", "overskirt", "bullock cart", - "oxygen mask", "product packet / packaging", "paddle", "paddle wheel", "padlock", "paintbrush", - "pajamas", "palace", "pan flute", "paper towel", "parachute", "parallel bars", "park bench", - "parking meter", "railroad car", "patio", "payphone", "pedestal", "pencil case", - "pencil sharpener", "perfume", "Petri dish", "photocopier", "plectrum", "Pickelhaube", - "picket fence", "pickup truck", "pier", "piggy bank", "pill bottle", "pillow", "ping-pong ball", - "pinwheel", "pirate ship", "drink pitcher", "block plane", "planetarium", "plastic bag", - "plate rack", "farm plow", "plunger", "Polaroid camera", "pole", "police van", "poncho", - "pool table", "soda bottle", "plant pot", "potter's wheel", "power drill", "prayer rug", - "printer", "prison", "missile", "projector", "hockey puck", "punching bag", "purse", "quill", - "quilt", "race car", "racket", "radiator", "radio", "radio telescope", "rain barrel", - "recreational vehicle", "fishing casting reel", "reflex camera", "refrigerator", - "remote control", "restaurant", "revolver", "rifle", "rocking chair", "rotisserie", "eraser", - "rugby ball", "ruler measuring stick", "sneaker", "safe", "safety pin", "salt shaker", "sandal", - "sarong", "saxophone", "scabbard", "weighing scale", "school bus", "schooner", "scoreboard", - "CRT monitor", "screw", "screwdriver", "seat belt", "sewing machine", "shield", "shoe store", - "shoji screen / room divider", "shopping basket", "shopping cart", "shovel", "shower cap", - "shower curtain", "ski", "balaclava ski mask", "sleeping bag", "slide rule", "sliding door", - "slot machine", "snorkel", "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", - "solar thermal collector", "sombrero", "soup bowl", "keyboard space bar", "space heater", - "space shuttle", "spatula", "motorboat", "spider web", "spindle", "sports car", "spotlight", - "stage", "steam locomotive", "through arch bridge", "steel drum", "stethoscope", "scarf", - "stone wall", "stopwatch", "stove", "strainer", "tram", "stretcher", "couch", "stupa", - "submarine", "suit", "sundial", "sunglasses", "sunglasses", "sunscreen", "suspension bridge", - "mop", "sweatshirt", "swim trunks / shorts", "swing", "electrical switch", "syringe", - "table lamp", "tank", "tape player", "teapot", "teddy bear", "television", "tennis ball", - "thatched roof", "front curtain", "thimble", "threshing machine", "throne", "tile roof", - "toaster", "tobacco shop", "toilet seat", "torch", "totem pole", "tow truck", "toy store", - "tractor", "semi-trailer truck", "tray", "trench coat", "tricycle", "trimaran", "tripod", - "triumphal arch", "trolleybus", "trombone", "hot tub", "turnstile", "typewriter keyboard", - "umbrella", "unicycle", "upright piano", "vacuum cleaner", "vase", "vaulted or arched ceiling", - "velvet fabric", "vending machine", "vestment", "viaduct", "violin", "volleyball", - "waffle iron", "wall clock", "wallet", "wardrobe", "military aircraft", "sink", - "washing machine", "water bottle", "water jug", "water tower", "whiskey jug", "whistle", - "hair wig", "window screen", "window shade", "Windsor tie", "wine bottle", "airplane wing", - "wok", "wooden spoon", "wool", "split-rail fence", "shipwreck", "sailboat", "yurt", "website", - "comic book", "crossword", "traffic or street sign", "traffic light", "dust jacket", "menu", - "plate", "guacamole", "consomme", "hot pot", "trifle", "ice cream", "popsicle", "baguette", - "bagel", "pretzel", "cheeseburger", "hot dog", "mashed potatoes", "cabbage", "broccoli", - "cauliflower", "zucchini", "spaghetti squash", "acorn squash", "butternut squash", "cucumber", - "artichoke", "bell pepper", "cardoon", "mushroom", "Granny Smith apple", "strawberry", "orange", - "lemon", "fig", "pineapple", "banana", "jackfruit", "cherimoya (custard apple)", "pomegranate", - "hay", "carbonara", "chocolate syrup", "dough", "meatloaf", "pizza", "pot pie", "burrito", - "red wine", "espresso", "tea cup", "eggnog", "mountain", "bubble", "cliff", "coral reef", - "geyser", "lakeshore", "promontory", "sandbar", "beach", "valley", "volcano", "baseball player", - "bridegroom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn", - "rose hip", "horse chestnut seed", "coral fungus", "agaric", "gyromitra", "stinkhorn mushroom", - "earth star fungus", "hen of the woods mushroom", "bolete", "corn cob", "toilet paper"] +imagenet_classnames = [ + "tench", + "goldfish", + "great white shark", + "tiger shark", + "hammerhead shark", + "electric ray", + "stingray", + "rooster", + "hen", + "ostrich", + "brambling", + "goldfinch", + "house finch", + "junco", + "indigo bunting", + "American robin", + "bulbul", + "jay", + "magpie", + "chickadee", + "American dipper", + "kite (bird of prey)", + "bald eagle", + "vulture", + "great grey owl", + "fire salamander", + "smooth newt", + "newt", + "spotted salamander", + "axolotl", + "American bullfrog", + "tree frog", + "tailed frog", + "loggerhead sea turtle", + "leatherback sea turtle", + "mud turtle", + "terrapin", + "box turtle", + "banded gecko", + "green iguana", + "Carolina anole", + "desert grassland whiptail lizard", + "agama", + "frilled-necked lizard", + "alligator lizard", + "Gila monster", + "European green lizard", + "chameleon", + "Komodo dragon", + "Nile crocodile", + "American alligator", + "triceratops", + "worm snake", + "ring-necked snake", + "eastern hog-nosed snake", + "smooth green snake", + "kingsnake", + "garter snake", + "water snake", + "vine snake", + "night snake", + "boa constrictor", + "African rock python", + "Indian cobra", + "green mamba", + "sea snake", + "Saharan horned viper", + "eastern diamondback rattlesnake", + "sidewinder rattlesnake", + "trilobite", + "harvestman", + "scorpion", + "yellow garden spider", + "barn spider", + "European garden spider", + "southern black widow", + "tarantula", + "wolf spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse", + "prairie grouse", + "peafowl", + "quail", + "partridge", + "african grey parrot", + "macaw", + "sulphur-crested cockatoo", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "duck", + "red-breasted merganser", + "goose", + "black swan", + "tusker", + "echidna", + "platypus", + "wallaby", + "koala", + "wombat", + "jellyfish", + "sea anemone", + "brain coral", + "flatworm", + "nematode", + "conch", + "snail", + "slug", + "sea slug", + "chiton", + "chambered nautilus", + "Dungeness crab", + "rock crab", + "fiddler crab", + "red king crab", + "American lobster", + "spiny lobster", + "crayfish", + "hermit crab", + "isopod", + "white stork", + "black stork", + "spoonbill", + "flamingo", + "little blue heron", + "great egret", + "bittern bird", + "crane bird", + "limpkin", + "common gallinule", + "American coot", + "bustard", + "ruddy turnstone", + "dunlin", + "common redshank", + "dowitcher", + "oystercatcher", + "pelican", + "king penguin", + "albatross", + "grey whale", + "killer whale", + "dugong", + "sea lion", + "Chihuahua", + "Japanese Chin", + "Maltese", + "Pekingese", + "Shih Tzu", + "King Charles Spaniel", + "Papillon", + "toy terrier", + "Rhodesian Ridgeback", + "Afghan Hound", + "Basset Hound", + "Beagle", + "Bloodhound", + "Bluetick Coonhound", + "Black and Tan Coonhound", + "Treeing Walker Coonhound", + "English foxhound", + "Redbone Coonhound", + "borzoi", + "Irish Wolfhound", + "Italian Greyhound", + "Whippet", + "Ibizan Hound", + "Norwegian Elkhound", + "Otterhound", + "Saluki", + "Scottish Deerhound", + "Weimaraner", + "Staffordshire Bull Terrier", + "American Staffordshire Terrier", + "Bedlington Terrier", + "Border Terrier", + "Kerry Blue Terrier", + "Irish Terrier", + "Norfolk Terrier", + "Norwich Terrier", + "Yorkshire Terrier", + "Wire Fox Terrier", + "Lakeland Terrier", + "Sealyham Terrier", + "Airedale Terrier", + "Cairn Terrier", + "Australian Terrier", + "Dandie Dinmont Terrier", + "Boston Terrier", + "Miniature Schnauzer", + "Giant Schnauzer", + "Standard Schnauzer", + "Scottish Terrier", + "Tibetan Terrier", + "Australian Silky Terrier", + "Soft-coated Wheaten Terrier", + "West Highland White Terrier", + "Lhasa Apso", + "Flat-Coated Retriever", + "Curly-coated Retriever", + "Golden Retriever", + "Labrador Retriever", + "Chesapeake Bay Retriever", + "German Shorthaired Pointer", + "Vizsla", + "English Setter", + "Irish Setter", + "Gordon Setter", + "Brittany dog", + "Clumber Spaniel", + "English Springer Spaniel", + "Welsh Springer Spaniel", + "Cocker Spaniel", + "Sussex Spaniel", + "Irish Water Spaniel", + "Kuvasz", + "Schipperke", + "Groenendael dog", + "Malinois", + "Briard", + "Australian Kelpie", + "Komondor", + "Old English Sheepdog", + "Shetland Sheepdog", + "collie", + "Border Collie", + "Bouvier des Flandres dog", + "Rottweiler", + "German Shepherd Dog", + "Dobermann", + "Miniature Pinscher", + "Greater Swiss Mountain Dog", + "Bernese Mountain Dog", + "Appenzeller Sennenhund", + "Entlebucher Sennenhund", + "Boxer", + "Bullmastiff", + "Tibetan Mastiff", + "French Bulldog", + "Great Dane", + "St. Bernard", + "husky", + "Alaskan Malamute", + "Siberian Husky", + "Dalmatian", + "Affenpinscher", + "Basenji", + "pug", + "Leonberger", + "Newfoundland dog", + "Great Pyrenees dog", + "Samoyed", + "Pomeranian", + "Chow Chow", + "Keeshond", + "brussels griffon", + "Pembroke Welsh Corgi", + "Cardigan Welsh Corgi", + "Toy Poodle", + "Miniature Poodle", + "Standard Poodle", + "Mexican hairless dog (xoloitzcuintli)", + "grey wolf", + "Alaskan tundra wolf", + "red wolf or maned wolf", + "coyote", + "dingo", + "dhole", + "African wild dog", + "hyena", + "red fox", + "kit fox", + "Arctic fox", + "grey fox", + "tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat", + "Egyptian Mau", + "cougar", + "lynx", + "leopard", + "snow leopard", + "jaguar", + "lion", + "tiger", + "cheetah", + "brown bear", + "American black bear", + "polar bear", + "sloth bear", + "mongoose", + "meerkat", + "tiger beetle", + "ladybug", + "ground beetle", + "longhorn beetle", + "leaf beetle", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant", + "grasshopper", + "cricket insect", + "stick insect", + "cockroach", + "praying mantis", + "cicada", + "leafhopper", + "lacewing", + "dragonfly", + "damselfly", + "red admiral butterfly", + "ringlet butterfly", + "monarch butterfly", + "small white butterfly", + "sulphur butterfly", + "gossamer-winged butterfly", + "starfish", + "sea urchin", + "sea cucumber", + "cottontail rabbit", + "hare", + "Angora rabbit", + "hamster", + "porcupine", + "fox squirrel", + "marmot", + "beaver", + "guinea pig", + "common sorrel horse", + "zebra", + "pig", + "wild boar", + "warthog", + "hippopotamus", + "ox", + "water buffalo", + "bison", + "ram (adult male sheep)", + "bighorn sheep", + "Alpine ibex", + "hartebeest", + "impala (antelope)", + "gazelle", + "arabian camel", + "llama", + "weasel", + "mink", + "European polecat", + "black-footed ferret", + "otter", + "skunk", + "badger", + "armadillo", + "three-toed sloth", + "orangutan", + "gorilla", + "chimpanzee", + "gibbon", + "siamang", + "guenon", + "patas monkey", + "baboon", + "macaque", + "langur", + "black-and-white colobus", + "proboscis monkey", + "marmoset", + "white-headed capuchin", + "howler monkey", + "titi monkey", + "Geoffroy's spider monkey", + "common squirrel monkey", + "ring-tailed lemur", + "indri", + "Asian elephant", + "African bush elephant", + "red panda", + "giant panda", + "snoek fish", + "eel", + "silver salmon", + "rock beauty fish", + "clownfish", + "sturgeon", + "gar fish", + "lionfish", + "pufferfish", + "abacus", + "abaya", + "academic gown", + "accordion", + "acoustic guitar", + "aircraft carrier", + "airliner", + "airship", + "altar", + "ambulance", + "amphibious vehicle", + "analog clock", + "apiary", + "apron", + "trash can", + "assault rifle", + "backpack", + "bakery", + "balance beam", + "balloon", + "ballpoint pen", + "Band-Aid", + "banjo", + "baluster / handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel", + "wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "swimming cap", + "bath towel", + "bathtub", + "station wagon", + "lighthouse", + "beaker", + "military hat (bearskin or shako)", + "beer bottle", + "beer glass", + "bell tower", + "baby bib", + "tandem bicycle", + "bikini", + "ring binder", + "binoculars", + "birdhouse", + "boathouse", + "bobsleigh", + "bolo tie", + "poke bonnet", + "bookcase", + "bookstore", + "bottle cap", + "hunting bow", + "bow tie", + "brass memorial plaque", + "bra", + "breakwater", + "breastplate", + "broom", + "bucket", + "buckle", + "bulletproof vest", + "high-speed train", + "butcher shop", + "taxicab", + "cauldron", + "candle", + "cannon", + "canoe", + "can opener", + "cardigan", + "car mirror", + "carousel", + "tool kit", + "cardboard box / carton", + "car wheel", + "automated teller machine", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello", + "mobile phone", + "chain", + "chain-link fence", + "chain mail", + "chainsaw", + "storage chest", + "chiffonier", + "bell or wind chime", + "china cabinet", + "Christmas stocking", + "church", + "movie theater", + "cleaver", + "cliff dwelling", + "cloak", + "clogs", + "cocktail shaker", + "coffee mug", + "coffeemaker", + "spiral or coil", + "combination lock", + "computer keyboard", + "candy store", + "container ship", + "convertible", + "corkscrew", + "cornet", + "cowboy boot", + "cowboy hat", + "cradle", + "construction crane", + "crash helmet", + "crate", + "infant bed", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam", + "desk", + "desktop computer", + "rotary dial telephone", + "diaper", + "digital clock", + "digital watch", + "dining table", + "dishcloth", + "dishwasher", + "disc brake", + "dock", + "dog sled", + "dome", + "doormat", + "drilling rig", + "drum", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso machine", + "face powder", + "feather boa", + "filing cabinet", + "fireboat", + "fire truck", + "fire screen", + "flagpole", + "flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster bed", + "freight car", + "French horn", + "frying pan", + "fur coat", + "garbage truck", + "gas mask or respirator", + "gas pump", + "goblet", + "go-kart", + "golf ball", + "golf cart", + "gondola", + "gong", + "gown", + "grand piano", + "greenhouse", + "radiator grille", + "grocery store", + "guillotine", + "hair clip", + "hair spray", + "half-track", + "hammer", + "hamper", + "hair dryer", + "hand-held computer", + "handkerchief", + "hard disk drive", + "harmonica", + "harp", + "combine harvester", + "hatchet", + "holster", + "home theater", + "honeycomb", + "hook", + "hoop skirt", + "gymnastic horizontal bar", + "horse-drawn vehicle", + "hourglass", + "iPod", + "clothes iron", + "carved pumpkin", + "jeans", + "jeep", + "T-shirt", + "jigsaw puzzle", + "rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat", + "ladle", + "lampshade", + "laptop computer", + "lawn mower", + "lens cap", + "letter opener", + "library", + "lifeboat", + "lighter", + "limousine", + "ocean liner", + "lipstick", + "slip-on shoe", + "lotion", + "music speaker", + "loupe magnifying glass", + "sawmill", + "magnetic compass", + "messenger bag", + "mailbox", + "tights", + "one-piece bathing suit", + "manhole cover", + "maraca", + "marimba", + "mask", + "matchstick", + "maypole", + "maze", + "measuring cup", + "medicine cabinet", + "megalith", + "microphone", + "microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home", + "ford model t", + "modem", + "monastery", + "monitor", + "moped", + "mortar and pestle", + "graduation cap", + "mosque", + "mosquito net", + "vespa", + "mountain bike", + "tent", + "computer mouse", + "mousetrap", + "moving van", + "muzzle", + "metal nail", + "neck brace", + "necklace", + "baby pacifier", + "notebook computer", + "obelisk", + "oboe", + "ocarina", + "odometer", + "oil filter", + "pipe organ", + "oscilloscope", + "overskirt", + "bullock cart", + "oxygen mask", + "product packet / packaging", + "paddle", + "paddle wheel", + "padlock", + "paintbrush", + "pajamas", + "palace", + "pan flute", + "paper towel", + "parachute", + "parallel bars", + "park bench", + "parking meter", + "railroad car", + "patio", + "payphone", + "pedestal", + "pencil case", + "pencil sharpener", + "perfume", + "Petri dish", + "photocopier", + "plectrum", + "Pickelhaube", + "picket fence", + "pickup truck", + "pier", + "piggy bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate ship", + "drink pitcher", + "block plane", + "planetarium", + "plastic bag", + "plate rack", + "farm plow", + "plunger", + "Polaroid camera", + "pole", + "police van", + "poncho", + "pool table", + "soda bottle", + "plant pot", + "potter's wheel", + "power drill", + "prayer rug", + "printer", + "prison", + "missile", + "projector", + "hockey puck", + "punching bag", + "purse", + "quill", + "quilt", + "race car", + "racket", + "radiator", + "radio", + "radio telescope", + "rain barrel", + "recreational vehicle", + "fishing casting reel", + "reflex camera", + "refrigerator", + "remote control", + "restaurant", + "revolver", + "rifle", + "rocking chair", + "rotisserie", + "eraser", + "rugby ball", + "ruler measuring stick", + "sneaker", + "safe", + "safety pin", + "salt shaker", + "sandal", + "sarong", + "saxophone", + "scabbard", + "weighing scale", + "school bus", + "schooner", + "scoreboard", + "CRT monitor", + "screw", + "screwdriver", + "seat belt", + "sewing machine", + "shield", + "shoe store", + "shoji screen / room divider", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "balaclava ski mask", + "sleeping bag", + "slide rule", + "sliding door", + "slot machine", + "snorkel", + "snowmobile", + "snowplow", + "soap dispenser", + "soccer ball", + "sock", + "solar thermal collector", + "sombrero", + "soup bowl", + "keyboard space bar", + "space heater", + "space shuttle", + "spatula", + "motorboat", + "spider web", + "spindle", + "sports car", + "spotlight", + "stage", + "steam locomotive", + "through arch bridge", + "steel drum", + "stethoscope", + "scarf", + "stone wall", + "stopwatch", + "stove", + "strainer", + "tram", + "stretcher", + "couch", + "stupa", + "submarine", + "suit", + "sundial", + "sunglasses", + "sunglasses", + "sunscreen", + "suspension bridge", + "mop", + "sweatshirt", + "swim trunks / shorts", + "swing", + "electrical switch", + "syringe", + "table lamp", + "tank", + "tape player", + "teapot", + "teddy bear", + "television", + "tennis ball", + "thatched roof", + "front curtain", + "thimble", + "threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop", + "toilet seat", + "torch", + "totem pole", + "tow truck", + "toy store", + "tractor", + "semi-trailer truck", + "tray", + "trench coat", + "tricycle", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus", + "trombone", + "hot tub", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle", + "upright piano", + "vacuum cleaner", + "vase", + "vaulted or arched ceiling", + "velvet fabric", + "vending machine", + "vestment", + "viaduct", + "violin", + "volleyball", + "waffle iron", + "wall clock", + "wallet", + "wardrobe", + "military aircraft", + "sink", + "washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "hair wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "airplane wing", + "wok", + "wooden spoon", + "wool", + "split-rail fence", + "shipwreck", + "sailboat", + "yurt", + "website", + "comic book", + "crossword", + "traffic or street sign", + "traffic light", + "dust jacket", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot", + "trifle", + "ice cream", + "popsicle", + "baguette", + "bagel", + "pretzel", + "cheeseburger", + "hot dog", + "mashed potatoes", + "cabbage", + "broccoli", + "cauliflower", + "zucchini", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber", + "artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith apple", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple", + "banana", + "jackfruit", + "cherimoya (custard apple)", + "pomegranate", + "hay", + "carbonara", + "chocolate syrup", + "dough", + "meatloaf", + "pizza", + "pot pie", + "burrito", + "red wine", + "espresso", + "tea cup", + "eggnog", + "mountain", + "bubble", + "cliff", + "coral reef", + "geyser", + "lakeshore", + "promontory", + "sandbar", + "beach", + "valley", + "volcano", + "baseball player", + "bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper", + "corn", + "acorn", + "rose hip", + "horse chestnut seed", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn mushroom", + "earth star fungus", + "hen of the woods mushroom", + "bolete", + "corn cob", + "toilet paper", +] diff --git a/nemo/collections/vision/data/megatron/autoaugment.py b/nemo/collections/vision/data/megatron/autoaugment.py index 7038a7434d51..b55f395ed430 100644 --- a/nemo/collections/vision/data/megatron/autoaugment.py +++ b/nemo/collections/vision/data/megatron/autoaugment.py @@ -22,11 +22,10 @@ [1] https://arxiv.org/abs/1805.09501 """ -import numpy as np import random -from PIL import Image -from PIL import ImageEnhance -from PIL import ImageOps + +import numpy as np +from PIL import Image, ImageEnhance, ImageOps _MAX_LEVEL = 10 # Maximum integer strength of an augmentation, if applicable. @@ -104,14 +103,7 @@ class SubPolicy: """ def __init__( - self, - operation1, - probability1, - magnitude_idx1, - operation2, - probability2, - magnitude_idx2, - fillcolor, + self, operation1, probability1, magnitude_idx1, operation2, probability2, magnitude_idx2, fillcolor, ): """Initialize a SubPolicy. @@ -151,19 +143,19 @@ def __init__( "invert", ] assert (operation1 in supported_ops) and ( - operation2 in supported_ops + operation2 in supported_ops ), "SubPolicy:one of oper1 or oper2 refers to an unsupported operation." assert ( - 0.0 <= probability1 <= 1.0 and 0.0 <= probability2 <= 1.0 + 0.0 <= probability1 <= 1.0 and 0.0 <= probability2 <= 1.0 ), "SubPolicy: prob1 and prob2 should be within [0., 1.]." assert ( - isinstance(magnitude_idx1, int) and 0 <= magnitude_idx1 <= 10 + isinstance(magnitude_idx1, int) and 0 <= magnitude_idx1 <= 10 ), "SubPolicy: idx1 should be specified as an integer within [0, 10]." assert ( - isinstance(magnitude_idx2, int) and 0 <= magnitude_idx2 <= 10 + isinstance(magnitude_idx2, int) and 0 <= magnitude_idx2 <= 10 ), "SubPolicy: idx2 should be specified as an integer within [0, 10]." # Define a dictionary where each key refers to a specific type of @@ -177,19 +169,14 @@ def __init__( "translateY": np.linspace(0, 150 / 331, num_levels), "rotate": np.linspace(0, 30, num_levels), "color": np.linspace(0.0, 0.9, num_levels), - "posterize": np.round(np.linspace(8, 4, num_levels), 0).astype( - np.int - ), + "posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int), "solarize": np.linspace(256, 0, num_levels), # range [0, 256] "contrast": np.linspace(0.0, 0.9, num_levels), "sharpness": np.linspace(0.0, 0.9, num_levels), "brightness": np.linspace(0.0, 0.9, num_levels), - "autocontrast": [0] - * num_levels, # This augmentation doesn't use magnitude parameter. - "equalize": [0] - * num_levels, # This augmentation doesn't use magnitude parameter. - "invert": [0] - * num_levels, # This augmentation doesn't use magnitude parameter. + "autocontrast": [0] * num_levels, # This augmentation doesn't use magnitude parameter. + "equalize": [0] * num_levels, # This augmentation doesn't use magnitude parameter. + "invert": [0] * num_levels, # This augmentation doesn't use magnitude parameter. } def rotate_with_fill(img, magnitude): @@ -206,9 +193,7 @@ def rotate_with_fill(img, magnitude): disoccluded areas unveiled by the rotation. """ rotated = img.convert("RGBA").rotate(magnitude) - rotated_filled = Image.composite( - rotated, Image.new("RGBA", rotated.size, (128,) * 4), rotated - ) + rotated_filled = Image.composite(rotated, Image.new("RGBA", rotated.size, (128,) * 4), rotated) return rotated_filled.convert(img.mode) # Define a dictionary of augmentation functions where each key refers @@ -233,48 +218,28 @@ def rotate_with_fill(img, magnitude): "translateX": lambda img, magnitude: img.transform( img.size, Image.AFFINE, - ( - 1, - 0, - magnitude * img.size[0] * random.choice([-1, 1]), - 0, - 1, - 0, - ), + (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0,), fillcolor=fillcolor, ), "translateY": lambda img, magnitude: img.transform( img.size, Image.AFFINE, - ( - 1, - 0, - 0, - 0, - 1, - magnitude * img.size[1] * random.choice([-1, 1]), - ), + (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1]),), fillcolor=fillcolor, ), "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), - "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( 1 + magnitude * random.choice([-1, 1]) ), - "posterize": lambda img, magnitude: ImageOps.posterize( - img, magnitude + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1]) ), - "solarize": lambda img, magnitude: ImageOps.solarize( - img, magnitude + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1]) ), - "contrast": lambda img, magnitude: ImageEnhance.Contrast( - img - ).enhance(1 + magnitude * random.choice([-1, 1])), - "sharpness": lambda img, magnitude: ImageEnhance.Sharpness( - img - ).enhance(1 + magnitude * random.choice([-1, 1])), - "brightness": lambda img, magnitude: ImageEnhance.Brightness( - img - ).enhance(1 + magnitude * random.choice([-1, 1])), "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), "equalize": lambda img, magnitude: ImageOps.equalize(img), "invert": lambda img, magnitude: ImageOps.invert(img), diff --git a/nemo/collections/vision/data/megatron/data_samplers.py b/nemo/collections/vision/data/megatron/data_samplers.py index f42ee6672115..44cd8fb14149 100644 --- a/nemo/collections/vision/data/megatron/data_samplers.py +++ b/nemo/collections/vision/data/megatron/data_samplers.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List, Optional + import torch -from typing import Any, Optional, List, Dict from torch.utils.data import Dataset from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler @@ -20,19 +21,18 @@ class MegatronVisionPretrainingRandomSampler(MegatronPretrainingRandomSampler): - def __init__( - self, - dataset: Dataset, - total_samples: int, - consumed_samples: int, - micro_batch_size: int, - data_parallel_rank: int, - data_parallel_size: int, - data_sharding: bool, - drop_last: bool = True, - global_batch_size: Optional[int] = None, - pad_samples_to_global_batch_size: Optional[bool] = False, + self, + dataset: Dataset, + total_samples: int, + consumed_samples: int, + micro_batch_size: int, + data_parallel_rank: int, + data_parallel_size: int, + data_sharding: bool, + drop_last: bool = True, + global_batch_size: Optional[int] = None, + pad_samples_to_global_batch_size: Optional[bool] = False, ) -> None: super().__init__( total_samples=total_samples, @@ -58,8 +58,7 @@ def __iter__(self): # data sharding and random sampling if self.data_sharding: - bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) \ - * self.micro_batch_size + bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) * self.micro_batch_size bucket_offset = current_epoch_samples // self.data_parallel_size start_idx = self.data_parallel_rank * bucket_size @@ -68,15 +67,13 @@ def __iter__(self): random_idx = torch.randperm(bucket_size, generator=g).tolist() idx_range = [start_idx + x for x in random_idx[bucket_offset:]] else: - full_bucket_size = (self.total_samples // self.micro_batch_size) \ - * self.micro_batch_size + full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size full_bucket_offset = current_epoch_samples g = torch.Generator() g.manual_seed(self.epoch) - idx_range_total = \ - torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() idx_range_active = idx_range_total[full_bucket_offset:] - idx_range = idx_range_active[self.data_parallel_rank::self.data_parallel_size] + idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] batch = [] # Last batch if not complete will be dropped. diff --git a/nemo/collections/vision/data/megatron/image_folder.py b/nemo/collections/vision/data/megatron/image_folder.py index 1ffa852408fa..44138dec3320 100644 --- a/nemo/collections/vision/data/megatron/image_folder.py +++ b/nemo/collections/vision/data/megatron/image_folder.py @@ -15,12 +15,13 @@ # https://github.com/pytorch/vision/blob/main/torchvision/datasets/folder.py # added support for classes_fraction and data_per_class_fraction -import numpy as np import os import os.path +from typing import Any, Callable, Dict, List, Optional, Tuple, cast + +import numpy as np from PIL import Image from torchvision.datasets import VisionDataset -from typing import Any, Callable, cast, Dict, List, Optional, Tuple def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool: @@ -45,11 +46,11 @@ def is_image_file(filename: str) -> bool: def make_dataset( - directory: str, - class_to_idx: Dict[str, int], - data_per_class_fraction: float, - extensions: Optional[Tuple[str, ...]] = None, - is_valid_file: Optional[Callable[[str], bool]] = None, + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, ) -> List[Tuple[str, int]]: """Generates a list of samples of a form (path_to_sample, class). Args: @@ -73,8 +74,10 @@ def make_dataset( if both_none or both_something: raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time") if extensions is not None: + def is_valid_file(x: str) -> bool: return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions)) + is_valid_file = cast(Callable[[str], bool], is_valid_file) for target_class in sorted(class_to_idx.keys()): class_index = class_to_idx[target_class] @@ -89,7 +92,7 @@ def is_valid_file(x: str) -> bool: item = path, class_index local_instances.append(item) - instances.extend(local_instances[0:int(len(local_instances) * data_per_class_fraction)]) + instances.extend(local_instances[0 : int(len(local_instances) * data_per_class_fraction)]) return instances @@ -123,26 +126,21 @@ class DatasetFolder(VisionDataset): """ def __init__( - self, - root: str, - loader: Callable[[str], Any], - extensions: Optional[Tuple[str, ...]] = None, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - classes_fraction=1.0, - data_per_class_fraction=1.0, - is_valid_file: Optional[Callable[[str], bool]] = None, + self, + root: str, + loader: Callable[[str], Any], + extensions: Optional[Tuple[str, ...]] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + is_valid_file: Optional[Callable[[str], bool]] = None, ) -> None: - super(DatasetFolder, self).__init__(root, transform=transform, - target_transform=target_transform) + super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform) self.classes_fraction = classes_fraction self.data_per_class_fraction = data_per_class_fraction classes, class_to_idx = self._find_classes(self.root) - samples = self.make_dataset(self.root, - class_to_idx, - self.data_per_class_fraction, - extensions, - is_valid_file) + samples = self.make_dataset(self.root, class_to_idx, self.data_per_class_fraction, extensions, is_valid_file) if len(samples) == 0: msg = "Found 0 files in subfolders of: {}\n".format(self.root) if extensions is not None: @@ -159,17 +157,15 @@ def __init__( @staticmethod def make_dataset( - directory: str, - class_to_idx: Dict[str, int], - data_per_class_fraction: float, - extensions: Optional[Tuple[str, ...]] = None, - is_valid_file: Optional[Callable[[str], bool]] = None, + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, ) -> List[Tuple[str, int]]: - return make_dataset(directory, - class_to_idx, - data_per_class_fraction, - extensions=extensions, - is_valid_file=is_valid_file) + return make_dataset( + directory, class_to_idx, data_per_class_fraction, extensions=extensions, is_valid_file=is_valid_file + ) def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]: """ @@ -182,7 +178,7 @@ def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]: No class is a subdirectory of another. """ all_classes = [d.name for d in os.scandir(dir) if d.is_dir()] - classes = all_classes[0:int(len(all_classes) * self.classes_fraction)] + classes = all_classes[0 : int(len(all_classes) * self.classes_fraction)] classes.sort() class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} return classes, class_to_idx @@ -227,6 +223,7 @@ def pil_loader(path: str) -> Image.Image: # TODO: specify the return type def accimage_loader(path: str) -> Any: import accimage + try: return accimage.Image(path) except IOError: @@ -236,6 +233,7 @@ def accimage_loader(path: str) -> Any: def default_loader(path: str) -> Any: from torchvision import get_image_backend + if get_image_backend() == 'accimage': return accimage_loader(path) else: @@ -266,19 +264,23 @@ class ImageFolder(DatasetFolder): """ def __init__( - self, - root: str, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - classes_fraction=1.0, - data_per_class_fraction=1.0, - loader: Callable[[str], Any] = default_loader, - is_valid_file: Optional[Callable[[str], bool]] = None, + self, + root: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + loader: Callable[[str], Any] = default_loader, + is_valid_file: Optional[Callable[[str], bool]] = None, ): - super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None, - transform=transform, - target_transform=target_transform, - classes_fraction=classes_fraction, - data_per_class_fraction=data_per_class_fraction, - is_valid_file=is_valid_file) + super(ImageFolder, self).__init__( + root, + loader, + IMG_EXTENSIONS if is_valid_file is None else None, + transform=transform, + target_transform=target_transform, + classes_fraction=classes_fraction, + data_per_class_fraction=data_per_class_fraction, + is_valid_file=is_valid_file, + ) self.imgs = self.samples diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py index 057698153275..ad4c8d47e781 100644 --- a/nemo/collections/vision/data/megatron/vit_dataset.py +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import random + +import numpy as np import torch import torchvision.transforms as T from PIL import Image, ImageFilter, ImageOps @@ -35,7 +36,6 @@ def _to_torch_data_type(precision): class RandomSeedDataset(Dataset): - def __init__(self, dataset, seed=1234): self.base_seed = seed self.curr_seed = seed @@ -60,7 +60,7 @@ class GaussianBlur(object): Apply Gaussian Blur to the PIL image. """ - def __init__(self, p=0.5, radius_min=0.1, radius_max=2.): + def __init__(self, p=0.5, radius_min=0.1, radius_max=2.0): self.prob = p self.radius_min = radius_min self.radius_max = radius_max @@ -70,11 +70,7 @@ def __call__(self, img): if not do_it: return img - return img.filter( - ImageFilter.GaussianBlur( - radius=random.uniform(self.radius_min, self.radius_max) - ) - ) + return img.filter(ImageFilter.GaussianBlur(radius=random.uniform(self.radius_min, self.radius_max))) class Solarization(object): @@ -92,34 +88,38 @@ def __call__(self, img): return img -class ClassificationTransform(): +class ClassificationTransform: def __init__(self, model_cfg, image_size, train=True): self.data_type = _to_torch_data_type(model_cfg.precision) if train: - self.transform = T.Compose([ - T.RandomResizedCrop(image_size), - T.RandomHorizontalFlip(), - T.ColorJitter(0.4, 0.4, 0.4, 0.1), - ImageNetPolicy(), - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type) - ]) + self.transform = T.Compose( + [ + T.RandomResizedCrop(image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) else: - self.transform = T.Compose([ - T.Resize(image_size), - T.CenterCrop(image_size), - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type) - ]) + self.transform = T.Compose( + [ + T.Resize(image_size), + T.CenterCrop(image_size), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) def __call__(self, input): output = self.transform(input) return output -class InpaintingTransform(): +class InpaintingTransform: def __init__(self, model_cfg, image_size, train=True): self.mask_factor = model_cfg.mask_factor self.mask_type = model_cfg.mask_type @@ -130,21 +130,25 @@ def __init__(self, model_cfg, image_size, train=True): self.data_type = _to_torch_data_type(model_cfg.precision) if self.train: - self.transform = T.Compose([ - T.RandomResizedCrop(self.image_size), - T.RandomHorizontalFlip(), - T.ColorJitter(0.4, 0.4, 0.4, 0.1), - ImageNetPolicy(), - T.ToTensor(), - T.ConvertImageDtype(self.data_type) - ]) + self.transform = T.Compose( + [ + T.RandomResizedCrop(self.image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.ConvertImageDtype(self.data_type), + ] + ) else: - self.transform = T.Compose([ - T.Resize(self.image_size, interpolation=2), - T.CenterCrop(self.image_size), - T.ToTensor(), - T.ConvertImageDtype(self.data_type) - ]) + self.transform = T.Compose( + [ + T.Resize(self.image_size, interpolation=2), + T.CenterCrop(self.image_size), + T.ToTensor(), + T.ConvertImageDtype(self.data_type), + ] + ) def gen_mask(self, image_size, mask_size, mask_type, patch_size): # output: mask as a list with indices for missing patches @@ -164,23 +168,22 @@ def gen_mask(self, image_size, mask_size, mask_type, patch_size): y = torch.clamp(y + action_list[r][1], min=0, max=img_size_patch - 1) x_offset = x * patch_size y_offset = y * patch_size - mask[x_offset:x_offset + patch_size, y_offset:y_offset + patch_size] = 1 + mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 else: assert mask_type == 'row' count = 0 for x in reversed(range(img_size_patch)): for y in reversed(range(img_size_patch)): - if (count < mask_size): + if count < mask_size: count += 1 x_offset = x * patch_size y_offset = y * patch_size - mask[x_offset:x_offset + patch_size, y_offset:y_offset + patch_size] = 1 + mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 return mask def __call__(self, input): trans_input = self.transform(input) - mask = self.gen_mask(self.image_size, self.mask_size, - self.mask_type, self.patch_size) + mask = self.gen_mask(self.image_size, self.mask_size, self.mask_type, self.patch_size) mask = mask.unsqueeze(dim=0) return trans_input, mask @@ -188,58 +191,57 @@ def __call__(self, input): class DinoTransform(object): def __init__(self, model_cfg, image_size, train=True): self.data_type = _to_torch_data_type(model_cfg.precision) - flip_and_color_jitter = T.Compose([ - T.RandomHorizontalFlip(p=0.5), - T.RandomApply( - [T.ColorJitter(brightness=0.4, contrast=0.4, - saturation=0.2, hue=0.1)], - p=0.8 - ), - T.RandomGrayscale(p=0.2), - ]) + flip_and_color_jitter = T.Compose( + [ + T.RandomHorizontalFlip(p=0.5), + T.RandomApply([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], p=0.8), + T.RandomGrayscale(p=0.2), + ] + ) if model_cfg.precision in [16, "bf16"]: - normalize = T.Compose([ - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type) - ]) + normalize = T.Compose( + [ + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) else: - normalize = T.Compose([ - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - ]) + normalize = T.Compose([T.ToTensor(), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),]) # first global crop scale_const = 0.4 - self.global_transform1 = T.Compose([ - T.RandomResizedCrop(image_size, - scale=(scale_const, 1), - interpolation=Image.BICUBIC), - flip_and_color_jitter, - GaussianBlur(1.0), - normalize - ]) + self.global_transform1 = T.Compose( + [ + T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(1.0), + normalize, + ] + ) # second global crop - self.global_transform2 = T.Compose([ - T.RandomResizedCrop(image_size, - scale=(scale_const, 1), - interpolation=Image.BICUBIC), - flip_and_color_jitter, - GaussianBlur(0.1), - Solarization(0.2), - normalize - ]) + self.global_transform2 = T.Compose( + [ + T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(0.1), + Solarization(0.2), + normalize, + ] + ) # transformation for the local small crops self.local_crops_number = model_cfg.dino_local_crops_number - self.local_transform = T.Compose([ - T.RandomResizedCrop(model_cfg.dino_local_img_size, - scale=(0.05, scale_const), - interpolation=Image.BICUBIC), - flip_and_color_jitter, - GaussianBlur(p=0.5), - normalize - ]) + self.local_transform = T.Compose( + [ + T.RandomResizedCrop( + model_cfg.dino_local_img_size, scale=(0.05, scale_const), interpolation=Image.BICUBIC + ), + flip_and_color_jitter, + GaussianBlur(p=0.5), + normalize, + ] + ) def __call__(self, image): crops = [] @@ -261,8 +263,7 @@ def build_train_valid_datasets(model_cfg, data_path, image_size=224): train_transform = DinoTransform(model_cfg, image_size, train=True) val_transform = ClassificationTransform(model_cfg, image_size, train=False) else: - raise Exception('{} vit pretraining type is not supported.'.format( - model_cfg.vit_pretraining_type)) + raise Exception('{} vit pretraining type is not supported.'.format(model_cfg.vit_pretraining_type)) # training dataset train_data_path = data_path[0] if len(data_path) <= 2 else data_path[2] @@ -270,16 +271,13 @@ def build_train_valid_datasets(model_cfg, data_path, image_size=224): root=train_data_path, transform=train_transform, classes_fraction=model_cfg.classes_fraction, - data_per_class_fraction=model_cfg.data_per_class_fraction + data_per_class_fraction=model_cfg.data_per_class_fraction, ) train_data = RandomSeedDataset(train_data) # validation dataset val_data_path = data_path[1] - val_data = ImageFolder( - root=val_data_path, - transform=val_transform - ) + val_data = ImageFolder(root=val_data_path, transform=val_transform) val_data = RandomSeedDataset(val_data) return train_data, val_data diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index ef0e0d7fe66a..183f7cdb5575 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -13,29 +13,25 @@ # limitations under the License. import itertools +from functools import partial +from typing import Any, Dict, List, Optional + import numpy as np import torch -from functools import partial from omegaconf.dictconfig import DictConfig from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer -from typing import Any, Optional, List, Dict -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingSampler, -) +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingSampler from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.module import ( - MegatronModule, - Float16Module, -) +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule from nemo.collections.nlp.modules.common.megatron.utils import ( - get_linear_layer, - init_method_normal, - scaled_init_method_normal, average_losses_across_data_parallel_group, get_all_params_for_weight_decay_optimization, + get_linear_layer, get_params_for_weight_decay_optimization, + init_method_normal, + scaled_init_method_normal, ) from nemo.collections.nlp.parts.utils_funcs import get_last_rank from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomSampler @@ -70,8 +66,7 @@ class VitClassificationModel(MegatronModule): """Vision Transformer Model.""" - def __init__(self, model_cfg, num_classes, finetune=False, - pre_process=True, post_process=True): + def __init__(self, model_cfg, num_classes, finetune=False, pre_process=True, post_process=True): super(VitClassificationModel, self).__init__() scaled_init_method = ( @@ -91,18 +86,14 @@ def __init__(self, model_cfg, num_classes, finetune=False, scaled_init_method=scaled_init_method, pre_process=self.pre_process, post_process=self.post_process, - single_token_output=True + single_token_output=True, ) if self.post_process: if not self.finetune: self.head = VitMlpHead(self.hidden_size, self.num_classes) else: - self.head = get_linear_layer( - self.hidden_size, - self.num_classes, - torch.nn.init.zeros_ - ) + self.head = get_linear_layer(self.hidden_size, self.num_classes, torch.nn.init.zeros_) def set_input_tensor(self, input_tensor): """See megatron.model.transformer.set_input_tensor()""" @@ -391,7 +382,6 @@ def allreduce_sequence_parallel_gradients(self): buf.copy_(synced) def get_forward_output_and_loss_func(self): - def loss_func(labels, output_tensor): logits = output_tensor.contiguous().float() loss = torch.nn.functional.cross_entropy(logits, labels) @@ -482,8 +472,8 @@ def validation_epoch_end(self, outputs): acc_outputs = [output[1] for output in outputs] averaged_metrics = torch.tensor( - [torch.stack(loss_outputs).mean(), torch.stack(acc_outputs).mean()], - dtype=torch.float32, device='cuda') + [torch.stack(loss_outputs).mean(), torch.stack(acc_outputs).mean()], dtype=torch.float32, device='cuda' + ) else: averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32, device='cuda') @@ -511,9 +501,7 @@ def build_train_valid_test_datasets(self): raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, - data_path=self.cfg.data.data_path, - image_size=(self.cfg.img_h, self.cfg.img_w), + model_cfg=self.cfg, data_path=self.cfg.data.data_path, image_size=(self.cfg.img_h, self.cfg.img_w), ) self._test_ds = None @@ -649,9 +637,7 @@ def setup_validation_data(self, cfg): if not self.cfg.data.get('validation_drop_last', True): logging.info(f'Drop last in validation dataset is set to False') drop_last = False - self._validation_dl = self.build_pretraining_data_loader( - self._validation_ds, consumed_samples, - ) + self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples,) def setup_test_data(self, cfg): if hasattr(self, '_test_ds') and self._test_ds is not None: diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py index 1f07bfeab6a9..0e444911626b 100644 --- a/nemo/collections/vision/models/vision_base_model.py +++ b/nemo/collections/vision/models/vision_base_model.py @@ -14,6 +14,8 @@ import os import re +from typing import Any, Dict, Optional, Union + import torch from omegaconf import open_dict from omegaconf.dictconfig import DictConfig @@ -25,15 +27,17 @@ from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.migration import pl_legacy_patch from transformers import TRANSFORMERS_CACHE -from typing import Any, Union, Dict, Optional from nemo.collections.nlp.modules.common.megatron.clip_grads import ( clip_grad_norm_distributed_optimizer, clip_grad_norm_fp32, ) from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.parts.nlp_overrides import NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, GradScaler -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.nlp.parts.nlp_overrides import ( + NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, + GradScaler, + NLPSaveRestoreConnector, +) from nemo.core.classes import ModelPT from nemo.core.classes.exportable import Exportable from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler @@ -92,12 +96,12 @@ def is_model_parallel_initialized(self): @classmethod def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, ): """ Loads ModelPT from checkpoint, with some maintenance of restoration. @@ -351,14 +355,14 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus # TODO: Replace with newer override for scheduler.step() instead of # search for plugins for fp16 GradScalar if self.trainer.precision_plugin is not None and isinstance( - self.trainer.precision_plugin, NativeMixedPrecisionPlugin + self.trainer.precision_plugin, NativeMixedPrecisionPlugin ): precision_plugin = self.trainer.precision_plugin if ( - hasattr(precision_plugin, 'scaler') - and precision_plugin.scaler is not None - and isinstance(precision_plugin.scaler, GradScaler) + hasattr(precision_plugin, 'scaler') + and precision_plugin.scaler is not None + and isinstance(precision_plugin.scaler, GradScaler) ): grad_scaler = precision_plugin.scaler @@ -383,7 +387,7 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus grad_scaler.optimizer_update_skipped = None def setup_optimization( - self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, + self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() if self.with_distributed_adam: @@ -476,8 +480,8 @@ def configure_optimizers(self): def compute_consumed_samples(self, steps_since_resume=0): app_state = AppState() consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() + self.init_consumed_samples + + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() ) return int(consumed_samples) @@ -501,8 +505,8 @@ def _validate_config(self): self.cfg.sequence_parallel = False if ( - self.cfg.get('gradient_accumulation_fusion', False) - and self.cfg.get('pipeline_model_parallel_size', 1) == 1 + self.cfg.get('gradient_accumulation_fusion', False) + and self.cfg.get('pipeline_model_parallel_size', 1) == 1 ): logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") with open_dict(self.cfg): diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py index 2a3c0f6e457c..b1ab196ada78 100644 --- a/nemo/collections/vision/modules/common/megatron/vision_transformer.py +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -20,13 +20,13 @@ from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformerLayer_, ParallelTransformer +from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer, ParallelTransformerLayer_ from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults try: + from apex.normalization import MixedFusedRMSNorm from apex.transformer.enums import AttnMaskType, AttnType, ModelType from apex.transformer.utils import divide as safe_divide - from apex.normalization import MixedFusedRMSNorm HAVE_APEX = True @@ -68,19 +68,18 @@ class DropPath(MegatronModule): (when applied in main path of residual blocks). """ - def __init__(self, drop_prob=0.): + def __init__(self, drop_prob=0.0): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, hidden_state): - if self.drop_prob == 0. or not self.training: + if self.drop_prob == 0.0 or not self.training: return hidden_state keep_prob = 1 - self.drop_prob # work with diff dim tensors, not just 2D ConvNets # hidden_state: [s, b, h] shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2) - random_tensor = keep_prob + \ - torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) + random_tensor = keep_prob + torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) random_tensor.floor_() # binarize output = hidden_state.div(keep_prob) * random_tensor return output @@ -94,42 +93,42 @@ class ParallelVisionTransformerLayer_(ParallelTransformerLayer_): """ def __init__( - self, - init_method, - output_layer_init_method, - layer_number, - hidden_size, - ffn_hidden_size, - num_attention_heads, - layer_type=LayerType.encoder, - self_attn_mask_type=AttnMaskType.padding, - fp32_residual_connection=False, - precision=16, - apply_query_key_layer_scaling=True, - kv_channels=None, - layernorm_epsilon=1e-5, - hidden_dropout=0.1, - bias_dropout_add_fusion=True, - persist_layer_norm=False, - use_cpu_initialization=False, - bias_activation_fusion=True, - openai_gelu=False, - onnx_safe=False, - masked_softmax_fusion=True, - attention_dropout=0.1, - ffn_dropout=0.0, - drop_path_rate=0.0, - activation='gelu', - megatron_legacy=False, - bias=True, - chunk_size=64, - normalization='layernorm', - transformer_block_type='pre_ln', - headscale=False, - activations_checkpoint_granularity=None, - sequence_parallel=False, - gradient_accumulation_fusion=False, - normalize_attention_scores=True, + self, + init_method, + output_layer_init_method, + layer_number, + hidden_size, + ffn_hidden_size, + num_attention_heads, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + fp32_residual_connection=False, + precision=16, + apply_query_key_layer_scaling=True, + kv_channels=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + bias_dropout_add_fusion=True, + persist_layer_norm=False, + use_cpu_initialization=False, + bias_activation_fusion=True, + openai_gelu=False, + onnx_safe=False, + masked_softmax_fusion=True, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + activation='gelu', + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + activations_checkpoint_granularity=None, + sequence_parallel=False, + gradient_accumulation_fusion=False, + normalize_attention_scores=True, ): kwargs = locals() for key in ["self", "__class__"]: @@ -140,20 +139,20 @@ def __init__( self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None def forward( - self, - hidden_states, - attention_mask, - encoder_output=None, - enc_dec_attn_mask=None, - layer_past=None, - get_key_value=False, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - rotary_pos_emb=None, - # list of positional embedding tensors, first one self attention, second one and third one are for cross attention (q, k) - self_attention_relative_position_bias=None, - cross_attention_relative_position_bias=None, - checkpoint_core_attention=False, + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + rotary_pos_emb=None, + # list of positional embedding tensors, first one self attention, second one and third one are for cross attention (q, k) + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, ): # Self attention. if rotary_pos_emb is not None: @@ -212,12 +211,14 @@ def forward( if attention_bias is not None: attention_bias = attention_bias.expand_as(residual) - layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) + layernorm_input = bias_dropout_add_func( + attention_output, attention_bias, residual, self.hidden_dropout + ) else: assert self.transformer_block_type != 'normformer', "Normfomer doesn't support drop_path" - out = torch.nn.functional.dropout(attention_output + attention_bias, - p=self.hidden_dropout, - training=self.training) + out = torch.nn.functional.dropout( + attention_output + attention_bias, p=self.hidden_dropout, training=self.training + ) layernorm_input = residual + self.drop_path(out) # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}") @@ -235,14 +236,14 @@ def forward( return layernorm_input, normalization_output if ( - self.layer_type == LayerType.decoder - or self.layer_type == LayerType.retrieval_decoder - or self.layer_type == LayerType.retrieval_encoder - or self.layer_type == LayerType.retrieval_decoder_after_self_attn + self.layer_type == LayerType.decoder + or self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_encoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn ): if ( - self.layer_type == LayerType.retrieval_decoder - or self.layer_type == LayerType.retrieval_decoder_after_self_attn + self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn ): attention_output, attention_bias = self.inter_attention( normalization_output, @@ -297,9 +298,7 @@ def forward( output = bias_dropout_add_func(mlp_output, mlp_bias, residual, self.hidden_dropout) else: - out = torch.nn.functional.dropout(mlp_output + mlp_bias, - p=self.hidden_dropout, - training=self.training) + out = torch.nn.functional.dropout(mlp_output + mlp_bias, p=self.hidden_dropout, training=self.training) output = residual + self.drop_path(out) # print(f"Layer: {self.layer_number} MLP + Dropout + Residual checksum {output.sum()}") @@ -326,79 +325,75 @@ def __init__(self, **kwargs): raise ValueError(f"Cannot recognize precision {precision}") def forward( - self, - hidden_states, - attention_mask, - encoder_output=None, - enc_dec_attn_mask=None, - rotary_pos_emb=None, - layer_past=None, - get_key_value=False, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - self_attention_relative_position_bias=None, - cross_attention_relative_position_bias=None, - checkpoint_core_attention=False, + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + rotary_pos_emb=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, ): kwargs = locals() for key in ["self", "__class__"]: kwargs.pop(key) if self.dtype == torch.float32: - return super().forward( - **kwargs - ) + return super().forward(**kwargs) with torch.autocast(device_type="cuda", dtype=self.dtype): - return super().forward( - **kwargs - ) + return super().forward(**kwargs) class ParallelVisionTransformer(ParallelTransformer): """Transformer class.""" def __init__( - self, - init_method, - output_layer_init_method, - num_layers, - hidden_size, - ffn_hidden_size, - num_attention_heads, - apply_query_key_layer_scaling=True, - kv_channels=None, - layer_type=LayerType.encoder, # it can be a list of types or single type - self_attn_mask_type=AttnMaskType.padding, - pre_process=True, - post_process=True, - precision=16, - fp32_residual_connection=False, - activations_checkpoint_method=None, - activations_checkpoint_num_layers=None, - layernorm_epsilon=1e-5, - hidden_dropout=0.1, - attention_dropout=0.1, - ffn_dropout=0.0, - drop_path_rate=0.0, - use_cpu_initialization=False, - bias_activation_fusion=True, - bias_dropout_add_fusion=True, - masked_softmax_fusion=True, - persist_layer_norm=False, - openai_gelu=False, - onnx_safe=False, - activation='gelu', - model_type=ModelType.encoder_or_decoder, - megatron_legacy=False, - bias=True, - chunk_size=64, - normalization='layernorm', - transformer_block_type='pre_ln', - headscale=False, - layer_number_offset=0, # this is use only for attention norm_factor scaling - activations_checkpoint_granularity=None, - sequence_parallel=False, - gradient_accumulation_fusion=False, - normalize_attention_scores=True, + self, + init_method, + output_layer_init_method, + num_layers, + hidden_size, + ffn_hidden_size, + num_attention_heads, + apply_query_key_layer_scaling=True, + kv_channels=None, + layer_type=LayerType.encoder, # it can be a list of types or single type + self_attn_mask_type=AttnMaskType.padding, + pre_process=True, + post_process=True, + precision=16, + fp32_residual_connection=False, + activations_checkpoint_method=None, + activations_checkpoint_num_layers=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + use_cpu_initialization=False, + bias_activation_fusion=True, + bias_dropout_add_fusion=True, + masked_softmax_fusion=True, + persist_layer_norm=False, + openai_gelu=False, + onnx_safe=False, + activation='gelu', + model_type=ModelType.encoder_or_decoder, + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + layer_number_offset=0, # this is use only for attention norm_factor scaling + activations_checkpoint_granularity=None, + sequence_parallel=False, + gradient_accumulation_fusion=False, + normalize_attention_scores=True, ): kwargs = locals() for key in ["self", "__class__"]: @@ -409,9 +404,10 @@ def __init__( self.num_layers = self.get_num_layers(num_layers) self.drop_path_rates = [ - rate.item() for rate in - torch.linspace(0, self.drop_path_rate, - self.num_layers * parallel_state.get_pipeline_model_parallel_world_size()) + rate.item() + for rate in torch.linspace( + 0, self.drop_path_rate, self.num_layers * parallel_state.get_pipeline_model_parallel_world_size() + ) ] # Rebuild with vision transformer layers. @@ -478,13 +474,13 @@ def build_layer(layer_number): # Stage 0: [0, 1] [4, 5] # Stage 1: [2, 3] [6, 7] offset = parallel_state.get_virtual_pipeline_model_parallel_rank() * ( - num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() + num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() ) + (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) else: # Each stage gets a contiguous set of layers. if ( - self.model_type == ModelType.encoder_and_decoder - and parallel_state.get_pipeline_model_parallel_world_size() > 1 + self.model_type == ModelType.encoder_and_decoder + and parallel_state.get_pipeline_model_parallel_world_size() > 1 ): pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() if layer_type == LayerType.encoder: diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index 53684775090c..cf874830d378 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -14,11 +14,12 @@ """Vision Transformer(VIT) model.""" -import einops import math +from functools import partial + +import einops import torch import torch.nn.functional as F -from functools import partial from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm from nemo.collections.nlp.modules.common.megatron.module import MegatronModule @@ -48,14 +49,14 @@ class DropPatch(MegatronModule): """ def __init__(self, prob, class_token_length=8, exclude_cls_tokens=True): - assert 0 <= prob < 1. + assert 0 <= prob < 1.0 super(DropPatch, self).__init__() self.prob = prob self.class_token_length = class_token_length self.exclude_cls_tokens = exclude_cls_tokens # exclude CLS token def __call__(self, x): - if self.prob == 0. or not self.training: + if self.prob == 0.0 or not self.training: return x class_token_length = self.class_token_length @@ -110,22 +111,22 @@ def forward(self, hidden_states): def isPerfectSquare(x): - if (x >= 0): + if x >= 0: sr = math.sqrt(x) - return (int(sr) * int(sr) == x) + return int(sr) * int(sr) == x return False def twod_interpolate_position_embeddings_hook( - model_cfg, - class_token_present, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, + model_cfg, + class_token_present, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, ): num_patches_per_dim_h = model_cfg.img_h // model_cfg.patch_dim num_patches_per_dim_w = model_cfg.img_w // model_cfg.patch_dim @@ -140,7 +141,7 @@ def twod_interpolate_position_embeddings_hook( input_param = state_dict[key] input_seq_len = input_param.shape[0] - assert (isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - class_token_length)) + assert isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - class_token_length) input_has_class_token = not isPerfectSquare(input_seq_len) num_tok_input = input_seq_len - class_token_length if input_has_class_token else input_seq_len num_tok_output = num_patches @@ -161,15 +162,11 @@ def twod_interpolate_position_embeddings_hook( gs_new = (num_patches_per_dim_h, num_patches_per_dim_w) input_param_grid = input_param_grid.transpose(0, 1).contiguous() - input_param_grid = input_param_grid.reshape( - (1, -1, gs_input, gs_input) - ) + input_param_grid = input_param_grid.reshape((1, -1, gs_input, gs_input)) input_param_grid = input_param_grid.float() scale_factor = (gs_new[0] / gs_input, gs_new[1] / gs_input) - input_param_grid = F.interpolate( - input_param_grid, scale_factor=scale_factor, mode="bilinear" - ) + input_param_grid = F.interpolate(input_param_grid, scale_factor=scale_factor, mode="bilinear") input_param_grid = input_param_grid.half() input_param_grid = input_param_grid.reshape((-1, num_tok_output)) @@ -178,10 +175,7 @@ def twod_interpolate_position_embeddings_hook( assert input_param_grid.shape[1] == hidden_size input_param = input_param_grid - assert ( - input_param.shape[0] == num_tok_output - and input_param.shape[1] == hidden_size - ) + assert input_param.shape[0] == num_tok_output and input_param.shape[1] == hidden_size if output_has_class_token: input_param = torch.cat((input_param_tok, input_param), dim=0) @@ -192,14 +186,16 @@ def twod_interpolate_position_embeddings_hook( class VitBackbone(MegatronModule): """Vision Transformer Model.""" - def __init__(self, - model_cfg, - init_method=None, - scaled_init_method=None, - pre_process=True, - post_process=True, - class_token=True, - single_token_output=False): + def __init__( + self, + model_cfg, + init_method=None, + scaled_init_method=None, + pre_process=True, + post_process=True, + class_token=True, + single_token_output=False, + ): super(VitBackbone, self).__init__(share_token_embeddings=False) self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy @@ -218,8 +214,8 @@ def __init__(self, self.img_h = model_cfg.img_h self.img_w = model_cfg.img_w self.single_token_output = single_token_output - self.drop_patch_rate = model_cfg.get("drop_patch_rate", 0.) - self.drop_path_rate = model_cfg.get("drop_path_rate", 0.) + self.drop_patch_rate = model_cfg.get("drop_patch_rate", 0.0) + self.drop_path_rate = model_cfg.get("drop_path_rate", 0.0) preprocess_layernorm = model_cfg.get("preprocess_layernorm", False) assert self.img_h % self.patch_dim == 0 @@ -237,57 +233,41 @@ def __init__(self, if self.pre_process: # cls_token if self.class_token: - self.cls_token = torch.nn.Parameter( - torch.randn(1, class_token_length, self.hidden_size) - ) + self.cls_token = torch.nn.Parameter(torch.randn(1, class_token_length, self.hidden_size)) torch.nn.init.zeros_(self.cls_token) self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda() # Linear encoder - self.linear_encoder = torch.nn.Linear( - self.flatten_dim, self.hidden_size - ) + self.linear_encoder = torch.nn.Linear(self.flatten_dim, self.hidden_size) # embedding self.position_embedding_type = model_cfg.get("position_embedding_type", "learned_absolute") if self.position_embedding_type == "learned_absolute": - self.position_embeddings = torch.nn.Embedding( - self.seq_length, self.hidden_size - ) - init_method_normal(model_cfg.init_method_std)( - self.position_embeddings.weight - ) + self.position_embeddings = torch.nn.Embedding(self.seq_length, self.hidden_size) + init_method_normal(model_cfg.init_method_std)(self.position_embeddings.weight) class_token_present = self.class_token self.position_embeddings._register_load_state_dict_pre_hook( - partial( - twod_interpolate_position_embeddings_hook, - model_cfg, - class_token_present - ) + partial(twod_interpolate_position_embeddings_hook, model_cfg, class_token_present) ) elif self.position_embedding_type == "learned_parameters": - self.position_embeddings = torch.nn.Parameter( - torch.empty(self.seq_length, self.hidden_size) - ) - init_method_normal(model_cfg.init_method_std)( - self.position_embeddings - ) + self.position_embeddings = torch.nn.Parameter(torch.empty(self.seq_length, self.hidden_size)) + init_method_normal(model_cfg.init_method_std)(self.position_embeddings) else: raise ValueError(f"Unrecognized positional embedding type {self.position_embedding_type}!") self.embedding_dropout = torch.nn.Dropout(model_cfg.hidden_dropout) self.drop_patch = DropPatch( - self.drop_patch_rate, - class_token_length=class_token_length, - exclude_cls_tokens=self.class_token + self.drop_patch_rate, class_token_length=class_token_length, exclude_cls_tokens=self.class_token ) if preprocess_layernorm: self.preprocess_layernorm = get_layer_norm( - model_cfg.hidden_size, model_cfg.layernorm_epsilon, model_cfg.persist_layer_norm, - sequence_parallel=model_cfg.sequence_parallel + model_cfg.hidden_size, + model_cfg.layernorm_epsilon, + model_cfg.persist_layer_norm, + sequence_parallel=model_cfg.sequence_parallel, ) self.transformer = ParallelVisionTransformer( @@ -331,10 +311,7 @@ def forward(self, input): if self.pre_process: rearranged_input = einops.rearrange( - input, - "b c (h p1) (w p2) -> b (h w) (p1 p2 c)", - p1=self.patch_dim, - p2=self.patch_dim, + input, "b c (h p1) (w p2) -> b (h w) (p1 p2 c)", p1=self.patch_dim, p2=self.patch_dim, ) # [b num_patch patch_dim*patch_dim*c] -> [b, s, h]; s:=num_patch, h:=hidden @@ -346,8 +323,9 @@ def forward(self, input): concatenated_tokens = torch.cat((cls_tokens, encoder_output), dim=1) if self.position_embedding_type == "learned_absolute": - token_embeddings = concatenated_tokens + \ - self.position_embeddings(self.position_ids[:, :concatenated_tokens.shape[1]]) + token_embeddings = concatenated_tokens + self.position_embeddings( + self.position_ids[:, : concatenated_tokens.shape[1]] + ) elif self.position_embedding_type == "learned_parameters": token_embeddings = concatenated_tokens + self.position_embeddings diff --git a/nemo/utils/trt_utils.py b/nemo/utils/trt_utils.py index 03ff5126b2a5..492ca3c2a6e0 100644 --- a/nemo/utils/trt_utils.py +++ b/nemo/utils/trt_utils.py @@ -11,25 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from polygraphy.backend.trt import CreateConfig, Profile -from polygraphy.backend.trt import ( - engine_from_network, - network_from_onnx_path, - save_engine, -) - import tensorrt as trt +from polygraphy.backend.trt import CreateConfig, Profile, engine_from_network, network_from_onnx_path, save_engine + def build_engine( - onnx_path, - output_path, - fp16, - input_profile=None, - enable_refit=False, - enable_preview=False, - timing_cache=None, - workspace_size=0, - ): + onnx_path, + output_path, + fp16, + input_profile=None, + enable_refit=False, + enable_preview=False, + timing_cache=None, + workspace_size=0, +): print(f"Building TensorRT engine for {onnx_path}: {output_path}") p = Profile() if input_profile: @@ -41,9 +36,7 @@ def build_engine( config_kwargs = {} if workspace_size > 0: - config_kwargs["memory_pool_limits"] = { - trt.MemoryPoolType.WORKSPACE: workspace_size - } + config_kwargs["memory_pool_limits"] = {trt.MemoryPoolType.WORKSPACE: workspace_size} engine = engine_from_network( network_from_onnx_path(onnx_path), config=CreateConfig( @@ -56,4 +49,4 @@ def build_engine( ), save_timing_cache=timing_cache, ) - save_engine(engine, path=output_path) \ No newline at end of file + save_engine(engine, path=output_path) diff --git a/scripts/fid-eval-text2img/TFinception_V3.py b/scripts/fid-eval-text2img/TFinception_V3.py index 8fdcbff13117..6cb212f73ab0 100644 --- a/scripts/fid-eval-text2img/TFinception_V3.py +++ b/scripts/fid-eval-text2img/TFinception_V3.py @@ -46,20 +46,19 @@ # Inception weights ported to Pytorch from # http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz -FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases' \ - '/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' +FID_WEIGHTS_URL = ( + 'https://github.com/mseitzer/pytorch-fid/releases' '/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' +) class SwAV(nn.Module): def __init__(self): super().__init__() - self.model = torch.hub.load('facebookresearch/swav', 'resnet50', - pretrained=True) + self.model = torch.hub.load('facebookresearch/swav', 'resnet50', pretrained=True) self.model.fc = torch.nn.Sequential() def forward(self, x, align_corners=True): - y = self.model(F.interpolate( - x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) return y @@ -67,38 +66,30 @@ class Vgg16(nn.Module): def __init__(self): super().__init__() self.model = vgg16(pretrained=True, init_weights=False) - self.model.classifier = torch.nn.Sequential( - *[self.model.classifier[i] for i in range(4)] - ) + self.model.classifier = torch.nn.Sequential(*[self.model.classifier[i] for i in range(4)]) def forward(self, x, align_corners=True): - y = self.model(F.interpolate( - x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) return y class InceptionV3(nn.Module): def __init__(self): super().__init__() - self.model = inception_v3(transform_input=False, - pretrained=True, - init_weights=False) + self.model = inception_v3(transform_input=False, pretrained=True, init_weights=False) self.model.fc = torch.nn.Sequential() def forward(self, x, align_corners=True): - y = self.model(F.interpolate( - x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) return y class TFInceptionV3(nn.Module): def __init__(self): super().__init__() - self.model = inception_v3(transform_input=False, - num_classes=1008, - aux_logits=False, - pretrained=False, - init_weights=False) + self.model = inception_v3( + transform_input=False, num_classes=1008, aux_logits=False, pretrained=False, init_weights=False + ) self.model.Mixed_5b = FIDInceptionA(192, pool_features=32) self.model.Mixed_5c = FIDInceptionA(256, pool_features=64) self.model.Mixed_5d = FIDInceptionA(288, pool_features=64) @@ -109,16 +100,13 @@ def __init__(self): self.model.Mixed_7b = FIDInceptionE_1(1280) self.model.Mixed_7c = FIDInceptionE_2(2048) - state_dict = load_state_dict_from_url( - FID_WEIGHTS_URL, progress=True, map_location='cpu' - ) + state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True, map_location='cpu') self.model.load_state_dict(state_dict) self.model.fc = torch.nn.Sequential() def forward(self, x, align_corners=True): # x = apply_imagenet_normalization(x) - y = self.model(F.interpolate( - x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) return y @@ -140,8 +128,7 @@ def forward(self, x): # Patch: Tensorflow's average pool does not use the padded zero's in # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, - count_include_pad=False) + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) branch_pool = self.branch_pool(branch_pool) outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] @@ -169,8 +156,7 @@ def forward(self, x): # Patch: Tensorflow's average pool does not use the padded zero's in # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, - count_include_pad=False) + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) branch_pool = self.branch_pool(branch_pool) outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] @@ -203,8 +189,7 @@ def forward(self, x): # Patch: Tensorflow's average pool does not use the padded zero's in # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, - count_include_pad=False) + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) branch_pool = self.branch_pool(branch_pool) outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] diff --git a/scripts/fid-eval-text2img/compute_clip_score.py b/scripts/fid-eval-text2img/compute_clip_score.py index 8a9e2e482bb2..da587a9c3c32 100644 --- a/scripts/fid-eval-text2img/compute_clip_score.py +++ b/scripts/fid-eval-text2img/compute_clip_score.py @@ -20,15 +20,16 @@ images in each subfolder and the captions from `--captions_path`. """ +import argparse +import csv +import os +from glob import glob + import open_clip import torch import torch.nn as nn from PIL import Image -from glob import glob from tqdm import tqdm -import os -import argparse -import csv class CLIPEncoder(nn.Module): @@ -45,7 +46,8 @@ def __init__(self, clip_version='ViT-B/32', pretrained='', cache_dir=None, devic self.pretrained = 'openai' self.model, _, self.preprocess = open_clip.create_model_and_transforms( - self.clip_version, pretrained=self.pretrained, cache_dir=cache_dir) + self.clip_version, pretrained=self.pretrained, cache_dir=cache_dir + ) self.model.eval() self.model.to(device) @@ -70,6 +72,7 @@ def get_clip_score(self, text, image): return similarity + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--captions_path', default='/coco2014/coco2014_val_sampled_30k/captions/', type=str) @@ -91,13 +94,15 @@ def get_clip_score(self, text, image): for subfolder in os.listdir(args.fid_images_path): subfolder_path = os.path.join(args.fid_images_path, subfolder) if os.path.isdir(subfolder_path): - images = sorted(glob(f'{subfolder_path}/*.png'), key=lambda x: (int(x.split('/')[-1].strip('.png').split('_')[1]))) + images = sorted( + glob(f'{subfolder_path}/*.png'), key=lambda x: (int(x.split('/')[-1].strip('.png').split('_')[1])) + ) texts = sorted(glob(f'{captions_path}/*.txt')) print(images[:5], texts[:5]) assert len(images) == len(texts) print(f'Number of images text pairs: {len(images)}') - ave_sim = 0. + ave_sim = 0.0 count = 0 for text, img in zip(tqdm(texts), images): with open(text, 'r') as f: @@ -112,4 +117,4 @@ def get_clip_score(self, text, image): print(f'The CLIP similarity for CFG {subfolder}: {ave_sim}') # Write CLIP score to output CSV file - writer.writerow({'cfg': subfolder, 'clip_score': ave_sim}) \ No newline at end of file + writer.writerow({'cfg': subfolder, 'clip_score': ave_sim}) diff --git a/scripts/fid-eval-text2img/compute_fid.py b/scripts/fid-eval-text2img/compute_fid.py index e5b173c71944..cbeb81e1e4a7 100644 --- a/scripts/fid-eval-text2img/compute_fid.py +++ b/scripts/fid-eval-text2img/compute_fid.py @@ -12,16 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections -import numpy as np import os + +import numpy as np import torch import torch.distributed as dist import torch.nn.functional as F from scipy import linalg +from TFinception_V3 import InceptionV3, SwAV, TFInceptionV3, Vgg16 from torch import nn -from TFinception_V3 import SwAV, TFInceptionV3, InceptionV3, Vgg16 - def network_init(network='inception'): # inception = inception_v3(pretrained=True, transform_input=False) @@ -73,8 +73,7 @@ def _calculate_frechet_distance(act_1, act_2, eps=1e-6): # Product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): - msg = ('fid calculation produces singular product; ' - 'adding %s to diagonal of cov estimates') % eps + msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps print(msg) offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) @@ -87,8 +86,7 @@ def _calculate_frechet_distance(act_1, act_2, eps=1e-6): # raise ValueError('Imaginary component {}'.format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) - return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace( - sigma2) - 2 * tr_covmean)} + return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean)} def is_master(): @@ -109,12 +107,19 @@ def is_local_master(): return torch.cuda.current_device() == 0 -def load_or_compute_activations(act_path, data_loader, key_real, key_fake, - generator=None, sample_size=None, - preprocess=None, - is_video=False, few_shot_video=False, - network='inception', - **kwargs): +def load_or_compute_activations( + act_path, + data_loader, + key_real, + key_fake, + generator=None, + sample_size=None, + preprocess=None, + is_video=False, + few_shot_video=False, + network='inception', + **kwargs, +): r"""Load mean and covariance from saved npy file if exists. Otherwise, compute the mean and covariance. Args: @@ -139,12 +144,12 @@ def load_or_compute_activations(act_path, data_loader, key_real, key_fake, # Compute activations. if is_video: act = get_video_activations( - data_loader, key_real, key_fake, generator, - sample_size, preprocess, few_shot_video, network, **kwargs) + data_loader, key_real, key_fake, generator, sample_size, preprocess, few_shot_video, network, **kwargs + ) else: act = get_activations( - data_loader, key_real, key_fake, generator, - sample_size, preprocess, True, network, **kwargs) + data_loader, key_real, key_fake, generator, sample_size, preprocess, True, network, **kwargs + ) if act_path is not None and is_local_master(): print('Save activations to {}'.format(act_path)) if not os.path.exists(os.path.dirname(act_path)): @@ -154,10 +159,19 @@ def load_or_compute_activations(act_path, data_loader, key_real, key_fake, @torch.no_grad() -def compute_fid(fid_path, data_loader, net_G, - key_real='images', key_fake='fake_images', - sample_size=None, preprocess=None, return_act=False, - is_video=False, few_shot_video=False, **kwargs): +def compute_fid( + fid_path, + data_loader, + net_G, + key_real='images', + key_fake='fake_images', + sample_size=None, + preprocess=None, + return_act=False, + is_video=False, + few_shot_video=False, + **kwargs, +): r"""Compute the fid score. Args: @@ -177,20 +191,33 @@ def compute_fid(fid_path, data_loader, net_G, (float): FID value. """ print('Computing FID.') - act_path = os.path.join(os.path.dirname(fid_path), - 'activations_real.npy') + act_path = os.path.join(os.path.dirname(fid_path), 'activations_real.npy') # Get the fake mean and covariance. fake_act = load_or_compute_activations( - None, data_loader, key_real, key_fake, net_G, - sample_size, preprocess, is_video=is_video, - few_shot_video=few_shot_video, **kwargs + None, + data_loader, + key_real, + key_fake, + net_G, + sample_size, + preprocess, + is_video=is_video, + few_shot_video=few_shot_video, + **kwargs, ) # Get the ground truth mean and covariance. real_act = load_or_compute_activations( - act_path, data_loader, key_real, key_fake, None, - sample_size, preprocess, is_video=is_video, - few_shot_video=few_shot_video, **kwargs + act_path, + data_loader, + key_real, + key_fake, + None, + sample_size, + preprocess, + is_video=is_video, + few_shot_video=few_shot_video, + **kwargs, ) if is_master(): @@ -219,8 +246,7 @@ def dist_all_gather_tensor(tensor): world_size = get_world_size() if world_size < 2: return [tensor] - tensor_list = [ - torch.ones_like(tensor) for _ in range(dist.get_world_size())] + tensor_list = [torch.ones_like(tensor) for _ in range(dist.get_world_size())] with torch.no_grad(): dist.all_gather(tensor_list, tensor) return tensor_list @@ -257,8 +283,16 @@ def to_cuda(data): @torch.no_grad() def get_activations( - data_loader, key_real, key_fake, - generator=None, sample_size=None, preprocess=None, align_corners=True, network='inception', **kwargs): + data_loader, + key_real, + key_fake, + generator=None, + sample_size=None, + preprocess=None, + align_corners=True, + network='inception', + **kwargs, +): r"""Compute activation values and pack them in a list. Args: @@ -287,7 +321,7 @@ def get_activations( if generator is None: images = data[key_real] if torch.max(images) > 1: - images = images / 255. # convert RGB to (0,1) + images = images / 255.0 # convert RGB to (0,1) else: # Compute the generated image. text = data[1]['caption'] ### input is captions @@ -311,9 +345,18 @@ def get_activations( @torch.no_grad() -def compute_fid_data(folder_to_store_real_act, data_loader_a, data_loader_b, - key_a='images', key_b='images', sample_size=None, - is_video=False, few_shot_video=False, network='inception', **kwargs): +def compute_fid_data( + folder_to_store_real_act, + data_loader_a, + data_loader_b, + key_a='images', + key_b='images', + sample_size=None, + is_video=False, + few_shot_video=False, + network='inception', + **kwargs, +): r"""Compute the fid score between two datasets. Args: @@ -338,13 +381,29 @@ def compute_fid_data(folder_to_store_real_act, data_loader_a, data_loader_b, # sample_size = min_data_size if sample_size is None else min(sample_size, min_data_size) act_a = load_or_compute_activations( - path_a, data_loader_a, key_a, key_b, None, - sample_size=sample_size, is_video=is_video, - few_shot_video=few_shot_video, network=network, **kwargs) + path_a, + data_loader_a, + key_a, + key_b, + None, + sample_size=sample_size, + is_video=is_video, + few_shot_video=few_shot_video, + network=network, + **kwargs, + ) act_b = load_or_compute_activations( - None, data_loader_b, key_a, key_b, None, - sample_size=sample_size, is_video=is_video, - few_shot_video=few_shot_video, network=network, **kwargs) + None, + data_loader_b, + key_a, + key_b, + None, + sample_size=sample_size, + is_video=is_video, + few_shot_video=few_shot_video, + network=network, + **kwargs, + ) print(act_a.shape, act_b.shape) if is_master(): return _calculate_frechet_distance(act_a, act_b)["FID"] diff --git a/scripts/fid-eval-text2img/eval_fid.py b/scripts/fid-eval-text2img/eval_fid.py index a1565eebfbd3..d6312fad843a 100644 --- a/scripts/fid-eval-text2img/eval_fid.py +++ b/scripts/fid-eval-text2img/eval_fid.py @@ -82,14 +82,16 @@ # Compute FID score between synthetic images in subfolder and real images fid = compute_fid_data( - './', loader_real, loader_synthetic, + './', + loader_real, + loader_synthetic, key_a=0, key_b=0, sample_size=None, is_video=False, few_shot_video=False, network='tf_inception', - interpolation_mode='bilinear' + interpolation_mode='bilinear', ) print(f"The FID score between {subfolder_path} and {real_path} is {fid}") diff --git a/scripts/fid-eval-text2img/fid_dataset.py b/scripts/fid-eval-text2img/fid_dataset.py index de1f07a8221a..6da1db7cd00c 100644 --- a/scripts/fid-eval-text2img/fid_dataset.py +++ b/scripts/fid-eval-text2img/fid_dataset.py @@ -11,14 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import os + +import numpy as np import torch import torch.utils.data as data import torchvision.transforms as transforms from PIL import Image from pycocotools.coco import COCO -from torchvision.io import read_image, ImageReadMode +from torchvision.io import ImageReadMode, read_image def _pil_interp(method): @@ -42,7 +43,6 @@ def _size_tuple(size): class CenterCropResize: - def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)): self.target_size = _size_tuple(target_size) self.interpolation = interpolation @@ -52,8 +52,7 @@ def __call__(self, img): w, h = img.size img = np.array(img).astype(np.uint8) crop = min(w, h) - img = img[(h - crop) // 2:(h + crop) // 2, - (w - crop) // 2:(w + crop) // 2] + img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] image = Image.fromarray(img) if self.target_size is not None: interp_method = _pil_interp(self.interpolation) diff --git a/scripts/fid-eval-text2img/plot.py b/scripts/fid-eval-text2img/plot.py index 6947db7b36ba..e9217f4d6e72 100644 --- a/scripts/fid-eval-text2img/plot.py +++ b/scripts/fid-eval-text2img/plot.py @@ -8,8 +8,10 @@ """ import argparse -import pandas as pd + import matplotlib.pyplot as plt +import pandas as pd + def plot_fid_vs_clip(fid_scores_csv, clip_scores_csv): fid_scores = pd.read_csv(fid_scores_csv) @@ -28,6 +30,7 @@ def plot_fid_vs_clip(fid_scores_csv, clip_scores_csv): plt.show() + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--fid_scores_csv', required=True, type=str, help='Path to the FID scores CSV file') diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py index 7838a68b7924..8534ef06a127 100644 --- a/tests/collections/multimodal/test_clip_model.py +++ b/tests/collections/multimodal/test_clip_model.py @@ -19,15 +19,15 @@ from omegaconf import DictConfig, OmegaConf from pytorch_lightning import Trainer +from nemo.collections.multimodal.data.clip.clip_dataset import build_train_valid_datasets from nemo.collections.multimodal.models.clip.megatron_clip_models import ( - CLIPVisionTransformer, - CLIPTextTransformer, CLIPModel, + CLIPTextTransformer, + CLIPVisionTransformer, MegatronCLIPModel, ) -from nemo.collections.multimodal.data.clip.clip_dataset import build_train_valid_datasets -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy DEVICE_CAPABILITY = None if torch.cuda.is_available(): @@ -324,18 +324,17 @@ def clip_trainer_and_model(model_cfg, trainer_cfg, precision): def dummy(): return + if model.trainer.strategy.launcher is not None: model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) model.trainer.strategy.setup_environment() return trainer, model + def build_datasets(cfg, tokenizer): - return build_train_valid_datasets( - model_cfg=cfg, - consumed_samples=0, - tokenizer=tokenizer, - ) + return build_train_valid_datasets(model_cfg=cfg, consumed_samples=0, tokenizer=tokenizer,) + @pytest.mark.run_only_on('GPU') class TestMegatronCLIPModel: @@ -351,9 +350,7 @@ def test_constructor(self, clip_trainer_and_model): def test_build_dataset(self, clip_trainer_and_model, test_data_dir): clip_model = clip_trainer_and_model[1] train_ds, validation_ds = build_train_valid_datasets( - model_cfg=clip_model.cfg, - consumed_samples=0, - tokenizer=clip_model.tokenizer, + model_cfg=clip_model.cfg, consumed_samples=0, tokenizer=clip_model.tokenizer, ) assert len(train_ds) == 2000 assert len(validation_ds) == 1000 @@ -375,7 +372,6 @@ def test_build_dataset(self, clip_trainer_and_model, test_data_dir): ), ], ) - @pytest.mark.unit def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): trainer, clip_model = clip_trainer_and_model @@ -402,10 +398,7 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): B, C, H, W = tokens.shape assert H == W with torch.autocast('cuda', dtype=dtype): - output_tensor = clip_model( - image=tokens.cuda(), - text=texts.cuda(), - ) + output_tensor = clip_model(image=tokens.cuda(), text=texts.cuda(),) # output is (B, #classes) # assert output_tensor.shape == torch.Size([B, clip_model.cfg['num_classes']]) # assert output_tensor.dtype == dtype @@ -486,4 +479,4 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): # ) # # output is (B, #classes) # assert output_tensor.shape == torch.Size([6, 50]) - # assert output_tensor.dtype == dtype \ No newline at end of file + # assert output_tensor.dtype == dtype diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py index 2d65baf9c90c..415b26f6ecca 100644 --- a/tests/collections/vision/test_vit_model.py +++ b/tests/collections/vision/test_vit_model.py @@ -232,11 +232,7 @@ def build_datasets(cfg, test_data_dir): os.path.join(test_data_dir, "vision/tiny_imagenet/train"), os.path.join(test_data_dir, "vision/tiny_imagenet/val"), ] - return build_train_valid_datasets( - model_cfg=cfg, - data_path=data_path, - image_size=(cfg.img_h, cfg.img_w), - ) + return build_train_valid_datasets(model_cfg=cfg, data_path=data_path, image_size=(cfg.img_h, cfg.img_w),) @pytest.mark.run_only_on('GPU') @@ -305,9 +301,7 @@ def test_forward(self, vit_classification_trainer_and_model, test_data_dir): B, C, H, W = tokens.shape assert H == W with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_classification_model.forward( - tokens=tokens.cuda(), - ) + output_tensor = vit_classification_model.forward(tokens=tokens.cuda(),) # output is (B, #classes) assert output_tensor.shape == torch.Size([B, vit_classification_model.cfg['num_classes']]) assert output_tensor.dtype == dtype @@ -344,7 +338,7 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): scaled_init_method=None, pre_process=True, post_process=True, - single_token_output=True + single_token_output=True, ).cuda() vit_backbone.eval() @@ -355,9 +349,7 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): B, C, H, W = tokens.shape assert H == W with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_backbone( - tokens.cuda(), - ) + output_tensor = vit_backbone(tokens.cuda(),) # output is (B, #classes) assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) assert output_tensor.dtype == dtype @@ -374,18 +366,14 @@ def test_vit_head(self, model_cfg, trainer_cfg, precision): else: raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") - vit_head = VitMlpHead( - 24, 50, - ).cuda() + vit_head = VitMlpHead(24, 50,).cuda() vit_head.eval() hidden = torch.rand((6, 24)) with torch.no_grad(): with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_head( - hidden.cuda(), - ) + output_tensor = vit_head(hidden.cuda(),) # output is (B, #classes) assert output_tensor.shape == torch.Size([6, 50]) assert output_tensor.dtype == dtype diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index da81f33bc8b5..ff6e251a39f5 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -15,11 +15,10 @@ import git from omegaconf import OmegaConf -from utils import cal_target_metadata_wer, cal_write_wer, run_asr_inference from nemo.core.config import hydra_runner from nemo.utils import logging - +from utils import cal_target_metadata_wer, cal_write_wer, run_asr_inference """ This script serves as evaluator of ASR models diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 90e67ab844c7..884b6186e249 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm -from utils import get_segments import nemo.collections.asr as nemo_asr +from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") From ff68b04c25d60616b15f9c30a7de08100d01611e Mon Sep 17 00:00:00 2001 From: Maanu Grover Date: Wed, 3 May 2023 17:53:52 -0700 Subject: [PATCH 005/512] Pre-commit style check --- .gitlab-ci.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000000..d5fcccbd2d1c --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,13 @@ +stages: + - pre + +pre-commit-checks: + stage: pre + script: + - pre-commit run --all-files --verbose --show-diff-on-failure + image: gitlab-master.nvidia.com:5005/dl/ai-services/python-clients/codeformat:latest + tags: + - os/linux + - type/docker + only: + - merge_requests From dc867cd60327cb09979c04e7f099c1a34495e297 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 05:27:40 +0000 Subject: [PATCH 006/512] modify base sampler and pipeline to incorporate dpm-solver --- .../stable_diffusion/samplers/__init__.py | 2 +- .../stable_diffusion/samplers/base_sampler.py | 44 +++++++++++-------- .../parts/stable_diffusion/pipeline.py | 11 ++++- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py index e36354bb3b54..b796274ae8f0 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. from enum import Enum -Sampler = Enum('Sampler', ['PLMS', 'DDIM']) +Sampler = Enum('Sampler', ['PLMS', 'DDIM', 'DPM']) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index c0f858ac3443..b5f08e60b1f5 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -77,7 +77,10 @@ def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, @abstractmethod def p_sampling_fn(self): pass - + + def dpm_sampling_fn(self): + pass + @torch.no_grad() def sample( self, @@ -117,24 +120,25 @@ def sample( C, H, W = shape size = (batch_size, C, H, W) print(f'Data shape for sampling is {size}, eta {eta}') - samples, intermediates = self.sampling_fn( - conditioning, - size, - callback=callback, - img_callback=img_callback, - quantize_denoised=quantize_x0, - mask=mask, - x0=x0, - ddim_use_original_steps=False, - noise_dropout=noise_dropout, - temperature=temperature, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - x_T=x_T, - log_every_t=log_every_t, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - ) + + if self.sampler is Sampler.DPM: + return self.dpm_sampling_fn(shape=shape, steps=S, conditioning=conditioning, unconditional_conditioning=unconditional_conditioning, unconditional_guidance_scale=unconditional_guidance_scale, x_T=x_T) + + samples, intermediates = self.sampling_fn(conditioning, size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) return samples, intermediates @torch.no_grad() @@ -164,12 +168,14 @@ def sampling_fn( img = torch.randn(shape, generator=self.model.rng, device=device) else: img = x_T + if timesteps is None: timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps elif timesteps is not None and not ddim_use_original_steps: subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 timesteps = self.ddim_timesteps[:subset_end] intermediates = {'x_inter': [img], 'pred_x0': [img]} + # TODO: Is this needed if self.sampler is Sampler.PLMS: time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 1c82e58543bf..82dd35dd044b 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -25,7 +25,11 @@ from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.collections.multimodal.models.stable_diffusion.samplers.sampler_dpm import DPMSolverSampler +from nemo.collections.nlp.parts.nlp_overrides import ( + NLPDDPStrategy, + NLPSaveRestoreConnector, +) def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): @@ -42,8 +46,10 @@ def initialize_sampler(model, sampler_type): sampler = DDIMSampler(model) elif sampler_type == 'PLMS': sampler = PLMSSampler(model) + elif sampler_type == 'DPM-SOLVER': + sampler = DPMSolverSampler(model) else: - raise ValueError(f'Sampler {sampler_type} is not supported for {cls.__name__}') + raise ValueError(f'Sampler {sampler_type} is not supported.') return sampler @@ -171,6 +177,7 @@ def pipeline(model, cfg, verbose=True, rng=None): for text_prompt, pils in zip(prompts, output): for idx, image in enumerate(pils): image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) + #image.save(os.path.join(out_path, f'image_{img_idx}.png')) else: with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: pickle.dump(output, f) From 9282f51a8899228969ba6431a7a36c36cca046e8 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 05:51:50 +0000 Subject: [PATCH 007/512] modify base sampler and pipeline to incorporate dpm-solver --- .../models/stable_diffusion/samplers/base_sampler.py | 8 ++++---- .../multimodal/parts/stable_diffusion/pipeline.py | 10 ---------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index b5f08e60b1f5..52e86055065a 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -77,10 +77,10 @@ def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, @abstractmethod def p_sampling_fn(self): pass - + def dpm_sampling_fn(self): pass - + @torch.no_grad() def sample( self, @@ -120,7 +120,7 @@ def sample( C, H, W = shape size = (batch_size, C, H, W) print(f'Data shape for sampling is {size}, eta {eta}') - + if self.sampler is Sampler.DPM: return self.dpm_sampling_fn(shape=shape, steps=S, conditioning=conditioning, unconditional_conditioning=unconditional_conditioning, unconditional_guidance_scale=unconditional_guidance_scale, x_T=x_T) @@ -168,7 +168,7 @@ def sampling_fn( img = torch.randn(shape, generator=self.model.rng, device=device) else: img = x_T - + if timesteps is None: timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps elif timesteps is not None and not ddim_use_original_steps: diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 82dd35dd044b..4cea9eb8691b 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -16,20 +16,11 @@ import time import torch -from omegaconf.omegaconf import OmegaConf, open_dict from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler from nemo.collections.multimodal.models.stable_diffusion.samplers.sampler_dpm import DPMSolverSampler -from nemo.collections.nlp.parts.nlp_overrides import ( - NLPDDPStrategy, - NLPSaveRestoreConnector, -) def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): @@ -177,7 +168,6 @@ def pipeline(model, cfg, verbose=True, rng=None): for text_prompt, pils in zip(prompts, output): for idx, image in enumerate(pils): image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) - #image.save(os.path.join(out_path, f'image_{img_idx}.png')) else: with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: pickle.dump(output, f) From c3efbe2328d053cb2d3ad65d1ac4dc3867fa494c Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 08:32:03 +0000 Subject: [PATCH 008/512] add util functions to support dpm-solver --- .../stable_diffusion/diffusionmodules/util.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index 2dbc4830142f..a80495c1f857 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -270,3 +270,43 @@ def noise_like(shape, device, repeat=False): repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) noise = lambda: torch.randn(shape, device=device) return repeat_noise() if repeat else noise() + + +def interpolate_fn(x, xp, yp): + """ + A piecewise linear function y = f(x), using xp and yp as keypoints. + """ + N, K = x.shape[0], xp.shape[1] + all_x = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((N, 1, 1))], dim=2) + sorted_all_x, x_indices = torch.sort(all_x, dim=2) + x_idx = torch.argmin(x_indices, dim=2) + cand_start_idx = x_idx - 1 + start_idx = torch.where( + torch.eq(x_idx, 0), + torch.tensor(1, device=x.device), + torch.where( + torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + ), + ) + end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) + start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) + end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) + start_idx2 = torch.where( + torch.eq(x_idx, 0), + torch.tensor(0, device=x.device), + torch.where( + torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + ), + ) + y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) + start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) + end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) + cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) + return cand + + +def expand_dims(v, dims): + """ + Expand the tensor `v` to the dim `dims`. + """ + return v[(...,) + (None,)*(dims - 1)] \ No newline at end of file From dc9f1d36454a8265778ca7abfa42d2127b102dcd Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 08:34:29 +0000 Subject: [PATCH 009/512] add dpm-solver, its noise scheduler and model wrapper --- .../stable_diffusion/samplers/dpmsolver.py | 429 ++++++++++++++++++ .../stable_diffusion/samplers/sampler_dpm.py | 64 +++ 2 files changed, 493 insertions(+) create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py new file mode 100644 index 000000000000..795d762bb2b6 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py @@ -0,0 +1,429 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import math + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import interpolate_fn, expand_dims + + +class NoiseScheduleVP: + def __init__( + self, + schedule='discrete', + betas=None, + alphas_cumprod=None, + continuous_beta_0=0.1, + continuous_beta_1=20., + ): + """Create a wrapper class for the forward SDE. + + """ + + if schedule not in ['discrete', 'linear', 'cosine']: + raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format(schedule)) + + self.schedule = schedule + if schedule == 'discrete': + if betas is not None: + log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) + else: + assert alphas_cumprod is not None + log_alphas = 0.5 * torch.log(alphas_cumprod) + self.total_N = len(log_alphas) + self.T = 1. + self.t_array = torch.linspace(0., 1., self.total_N + 1)[1:].reshape((1, -1)) + self.log_alpha_array = log_alphas.reshape((1, -1,)) + else: + self.total_N = 1000 + self.beta_0 = continuous_beta_0 + self.beta_1 = continuous_beta_1 + self.cosine_s = 0.008 + self.cosine_beta_max = 999. + self.cosine_t_max = math.atan(self.cosine_beta_max * (1. + self.cosine_s) / math.pi) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s + self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1. + self.cosine_s) * math.pi / 2.)) + self.schedule = schedule + if schedule == 'cosine': + self.T = 0.9946 + else: + self.T = 1. + + def marginal_log_mean_coeff(self, t): + """ + Compute log(alpha_t) of a given continuous-time label t in [0, T]. + """ + if self.schedule == 'discrete': + return interpolate_fn(t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device)).reshape((-1)) + elif self.schedule == 'linear': + return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 + elif self.schedule == 'cosine': + def log_alpha_fn(s): + return torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.)) + + log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 + return log_alpha_t + + def marginal_alpha(self, t): + """ + Compute alpha_t of a given continuous-time label t in [0, T]. + """ + return torch.exp(self.marginal_log_mean_coeff(t)) + + def marginal_std(self, t): + """ + Compute sigma_t of a given continuous-time label t in [0, T]. + """ + return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t))) + + def marginal_lambda(self, t): + """ + Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. + """ + log_mean_coeff = self.marginal_log_mean_coeff(t) + log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff)) + return log_mean_coeff - log_std + + def inverse_lambda(self, lamb): + """ + Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. + """ + if self.schedule == 'linear': + tmp = 2. * (self.beta_1 - self.beta_0) * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) + Delta = self.beta_0**2 + tmp + return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) + elif self.schedule == 'discrete': + log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2. * lamb) + t = interpolate_fn(log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_array.to(lamb.device), [1]), torch.flip(self.t_array.to(lamb.device), [1])) + return t.reshape((-1,)) + else: + log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) + + def t_fn(log_alpha_t): + return torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s + t = t_fn(log_alpha) + return t + + +def model_wrapper( + model, + noise_schedule, + model_type="noise", + model_kwargs={}, + guidance_type="uncond", + condition=None, + unconditional_condition=None, + guidance_scale=1., + classifier_fn=None, + classifier_kwargs={}, +): + """Create a wrapper function for the noise prediction model. + + """ + + def get_model_input_time(t_continuous): + if noise_schedule.schedule == 'discrete': + return (t_continuous - 1. / noise_schedule.total_N) * 1000. + else: + return t_continuous + + def noise_pred_fn(x, t_continuous, cond=None): + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + t_input = get_model_input_time(t_continuous) + if cond is None: + output = model(x, t_input, **model_kwargs) + else: + output = model(x, t_input, cond, **model_kwargs) + if model_type == "noise": + return output + elif model_type == "x_start": + alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous) + dims = x.dim() + return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) + + def cond_grad_fn(x, t_input): + """ + Compute the gradient of the classifier, i.e. nabla_{x} log p_t(cond | x_t). + """ + with torch.enable_grad(): + x_in = x.detach().requires_grad_(True) + log_prob = classifier_fn(x_in, t_input, condition, **classifier_kwargs) + return torch.autograd.grad(log_prob.sum(), x_in)[0] + + def model_fn(x, t_continuous): + """ + The noise predicition model function that is used for DPM-Solver. + """ + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + if guidance_type == "uncond": + return noise_pred_fn(x, t_continuous) + elif guidance_type == "classifier": + assert classifier_fn is not None + t_input = get_model_input_time(t_continuous) + cond_grad = cond_grad_fn(x, t_input) + sigma_t = noise_schedule.marginal_std(t_continuous) + noise = noise_pred_fn(x, t_continuous) + return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad + elif guidance_type == "classifier-free": + if guidance_scale == 1. or unconditional_condition is None: + return noise_pred_fn(x, t_continuous, cond=condition) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t_continuous] * 2) + c_in = torch.cat([unconditional_condition, condition]) + noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) + return noise_uncond + guidance_scale * (noise - noise_uncond) + + assert model_type in ["noise", "x_start"] + assert guidance_type in ["uncond", "classifier", "classifier-free"] + return model_fn + + +class DPMSolver: + def __init__(self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.): + """Construct a DPM-Solver. + """ + self.model = model_fn + self.noise_schedule = noise_schedule + self.predict_x0 = predict_x0 + self.thresholding = thresholding + self.max_val = max_val + + def noise_prediction_fn(self, x, t): + """ + Return the noise prediction model. + """ + return self.model(x, t) + + def data_prediction_fn(self, x, t): + """ + Return the data prediction model (with thresholding). + """ + noise = self.noise_prediction_fn(x, t) + dims = x.dim() + alpha_t, sigma_t = self.noise_schedule.marginal_alpha(t), self.noise_schedule.marginal_std(t) + x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims) + if self.thresholding: + p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. + s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) + s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) + x0 = torch.clamp(x0, -s, s) / s + return x0 + + def model_fn(self, x, t): + """ + Convert the model to the noise prediction model or the data prediction model. + """ + if self.predict_x0: + return self.data_prediction_fn(x, t) + else: + return self.noise_prediction_fn(x, t) + + def get_time_steps(self, skip_type, t_T, t_0, N, device): + """Compute the intermediate time steps for sampling. + """ + if skip_type == 'logSNR': + lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) + lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) + logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) + return self.noise_schedule.inverse_lambda(logSNR_steps) + elif skip_type == 'time_uniform': + return torch.linspace(t_T, t_0, N + 1).to(device) + elif skip_type == 'time_quadratic': + t_order = 2 + t = torch.linspace(t_T**(1. / t_order), t_0**(1. / t_order), N + 1).pow(t_order).to(device) + return t + else: + raise ValueError("Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type)) + + def denoise_to_zero_fn(self, x, s): + """ + Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization. + """ + return self.data_prediction_fn(x, s) + + def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=False): + """ + DPM-Solver-1 (equivalent to DDIM) from time `s` to time `t`. + """ + ns = self.noise_schedule + dims = x.dim() + lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t) + h = lambda_t - lambda_s + log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t) + sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + if self.predict_x0: + phi_1 = torch.expm1(-h) + if model_s is None: + model_s = self.model_fn(x, s) + x_t = ( + expand_dims(sigma_t / sigma_s, dims) * x + - expand_dims(alpha_t * phi_1, dims) * model_s + ) + if return_intermediate: + return x_t, {'model_s': model_s} + else: + return x_t + else: + phi_1 = torch.expm1(h) + if model_s is None: + model_s = self.model_fn(x, s) + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x + - expand_dims(sigma_t * phi_1, dims) * model_s + ) + if return_intermediate: + return x_t, {'model_s': model_s} + else: + return x_t + + def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): + """ + Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`. + """ + if solver_type not in ['dpm_solver', 'taylor']: + raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type)) + ns = self.noise_schedule + dims = x.dim() + model_prev_1, model_prev_0 = model_prev_list + t_prev_1, t_prev_0 = t_prev_list + lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t) + log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + h_0 = lambda_prev_0 - lambda_prev_1 + h = lambda_t - lambda_prev_0 + r0 = h_0 / h + D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1) + if self.predict_x0: + if solver_type == 'dpm_solver': + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 + - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * D1_0 + ) + elif solver_type == 'taylor': + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1_0 + ) + else: + if solver_type == 'dpm_solver': + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 + - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * D1_0 + ) + elif solver_type == 'taylor': + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1_0 + ) + return x_t + + def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type='dpm_solver'): + """ + Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`. + """ + ns = self.noise_schedule + dims = x.dim() + model_prev_2, model_prev_1, model_prev_0 = model_prev_list + t_prev_2, t_prev_1, t_prev_0 = t_prev_list + lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_2), ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t) + log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + h_1 = lambda_prev_1 - lambda_prev_2 + h_0 = lambda_prev_0 - lambda_prev_1 + h = lambda_t - lambda_prev_0 + r0, r1 = h_0 / h, h_1 / h + D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1) + D1_1 = expand_dims(1. / r1, dims) * (model_prev_1 - model_prev_2) + D1 = D1_0 + expand_dims(r0 / (r0 + r1), dims) * (D1_0 - D1_1) + D2 = expand_dims(1. / (r0 + r1), dims) * (D1_0 - D1_1) + if self.predict_x0: + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1 + - expand_dims(alpha_t * ((torch.exp(-h) - 1. + h) / h**2 - 0.5), dims) * D2 + ) + else: + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1 + - expand_dims(sigma_t * ((torch.exp(h) - 1. - h) / h**2 - 0.5), dims) * D2 + ) + return x_t + + def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type='dpm_solver'): + """ + Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`. + """ + if order == 1: + return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1]) + elif order == 2: + return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + elif order == 3: + return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + else: + raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order)) + + def sample(self, x, steps=20, t_start=None, t_end=None, order=3, skip_type='time_uniform', method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver', atol=0.0078, rtol=0.05): + """ + Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`. + """ + t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end + t_T = self.noise_schedule.T if t_start is None else t_start + device = x.device + + if method == 'multistep': + assert steps >= order + timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) + assert timesteps.shape[0] - 1 == steps + with torch.no_grad(): + vec_t = timesteps[0].expand((x.shape[0])) + model_prev_list = [self.model_fn(x, vec_t)] + t_prev_list = [vec_t] + # Init the first `order` values by lower order multistep DPM-Solver. + for init_order in range(1, order): + vec_t = timesteps[init_order].expand(x.shape[0]) + x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type) + model_prev_list.append(self.model_fn(x, vec_t)) + t_prev_list.append(vec_t) + # Compute the remaining values by `order`-th order multistep DPM-Solver. + for step in range(order, steps + 1): + vec_t = timesteps[step].expand(x.shape[0]) + if lower_order_final and steps < 15: + step_order = min(order, steps + 1 - step) + else: + step_order = order + x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, step_order, solver_type=solver_type) + for i in range(order - 1): + t_prev_list[i] = t_prev_list[i + 1] + model_prev_list[i] = model_prev_list[i + 1] + t_prev_list[-1] = vec_t + # We do not need to evaluate the final model value. + if step < steps: + model_prev_list[-1] = self.model_fn(x, vec_t) + if denoise_to_zero: + x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0) + return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py new file mode 100644 index 000000000000..756aa4b6a991 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py @@ -0,0 +1,64 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import torch + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler +from .dpmsolver import NoiseScheduleVP, model_wrapper, DPMSolver + + +class DPMSolverSampler(AbstractBaseSampler): + def __init__(self, model, **kwargs): + + super().__init__(model, sampler=Sampler.DPM, **kwargs) + + def to_torch(x, model): + x_copy = x.clone() + x_detached = x_copy.detach() + x_float32 = x_detached.to(torch.float32) + x_device = x_float32.to(model.betas.device) + return x_device + + self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod, model)) + + @torch.no_grad() + def p_sampling_fn(self): + pass + + @torch.no_grad() + def dpm_sampling_fn(self, shape, steps, conditioning=None, unconditional_conditioning=None, unconditional_guidance_scale=1., x_T=None): + + device = self.model.betas.device + if x_T is None: + img = torch.randn(shape, generator=self.model.rng, device=device) + else: + img = x_T + + ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod) + + model_fn = model_wrapper( + lambda x, t, c: self.model.apply_model(x, t, c), + ns, + model_type="noise", + guidance_type="classifier-free", + condition=conditioning, + unconditional_condition=unconditional_conditioning, + guidance_scale=unconditional_guidance_scale, + ) + dpm_solver = DPMSolver(model_fn, ns, predict_x0=True, thresholding=False) + x = dpm_solver.sample(img, steps=steps, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True) + + return x.to(device), None From 15358d9baff0e528506587f505f8444f297577b2 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 17:46:55 +0000 Subject: [PATCH 010/512] minor fix --- .../modules/stable_diffusion/diffusionmodules/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index a80495c1f857..3d75b20d9bb1 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -309,4 +309,5 @@ def expand_dims(v, dims): """ Expand the tensor `v` to the dim `dims`. """ - return v[(...,) + (None,)*(dims - 1)] \ No newline at end of file + return v[(...,) + (None,)*(dims - 1)] + \ No newline at end of file From ba75277a96de36e9d0eac149db1f98074842a6da Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 22:26:15 +0000 Subject: [PATCH 011/512] minor fix --- .../modules/stable_diffusion/diffusionmodules/util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index 3d75b20d9bb1..c196050a7be8 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -22,7 +22,6 @@ import math -import os import numpy as np import torch @@ -310,4 +309,3 @@ def expand_dims(v, dims): Expand the tensor `v` to the dim `dims`. """ return v[(...,) + (None,)*(dims - 1)] - \ No newline at end of file From c1691a88bd99c350938ed54881ce5104d10c2a34 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 23:04:13 +0000 Subject: [PATCH 012/512] minor fix --- .../modules/stable_diffusion/diffusionmodules/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index c196050a7be8..f5897263b5e0 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -308,4 +308,4 @@ def expand_dims(v, dims): """ Expand the tensor `v` to the dim `dims`. """ - return v[(...,) + (None,)*(dims - 1)] + return v[(...,) + (None,) * (dims - 1)] From 634c3b2255a8bdd54dec28b2ff55a3f950b99fc5 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 23:18:36 +0000 Subject: [PATCH 013/512] minor fix --- .../stable_diffusion/diffusionmodules/util.py | 93 ++++++++++++++----- 1 file changed, 68 insertions(+), 25 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index f5897263b5e0..cda12b668059 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -30,12 +30,21 @@ from torch._dynamo import disable -def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): +def make_beta_schedule( + schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3 +): if schedule == "linear": - betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + betas = ( + torch.linspace( + linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64 + ) + ** 2 + ) elif schedule == "cosine": - timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + ) alphas = timesteps / (1 + cosine_s) * np.pi / 2 alphas = torch.cos(alphas).pow(2) alphas = alphas / alphas[0] @@ -43,28 +52,39 @@ def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, betas = np.clip(betas, a_min=0, a_max=0.999) elif schedule == "sqrt_linear": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + betas = torch.linspace( + linear_start, linear_end, n_timestep, dtype=torch.float64 + ) elif schedule == "sqrt": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + betas = ( + torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + ** 0.5 + ) else: raise ValueError(f"schedule '{schedule}' unknown.") return betas.numpy() -def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): - if ddim_discr_method == 'uniform': +def make_ddim_timesteps( + ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True +): + if ddim_discr_method == "uniform": c = num_ddpm_timesteps // num_ddim_timesteps ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) - elif ddim_discr_method == 'quad': - ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) + elif ddim_discr_method == "quad": + ddim_timesteps = ( + (np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2 + ).astype(int) else: - raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + raise NotImplementedError( + f'There is no ddim discretization method called "{ddim_discr_method}"' + ) # assert ddim_timesteps.shape[0] == num_ddim_timesteps # add one to get the final alpha values right (the ones from first scale to data during sampling) steps_out = ddim_timesteps + 1 if verbose: - print(f'Selected timesteps for ddim sampler: {steps_out}') + print(f"Selected timesteps for ddim sampler: {steps_out}") return steps_out @@ -74,12 +94,16 @@ def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) # according the the formula provided in https://arxiv.org/abs/2010.02502 - sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) + sigmas = eta * np.sqrt( + (1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev) + ) if verbose: - print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') print( - f'For the chosen value of eta, which is {eta}, ' - f'this results in the following sigma_t schedule for ddim sampler {sigmas}' + f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}" + ) + print( + f"For the chosen value of eta, which is {eta}, " + f"this results in the following sigma_t schedule for ddim sampler {sigmas}" ) return sigmas, alphas, alphas_prev @@ -147,7 +171,10 @@ def backward(ctx, *output_grads): shallow_copies = [x.view_as(x) for x in ctx.input_tensors] output_tensors = ctx.run_function(*shallow_copies) input_grads = torch.autograd.grad( - output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True, + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, ) del ctx.input_tensors del ctx.input_params @@ -162,7 +189,9 @@ def get_idx(end, device): return torch.arange(start=0, end=end, dtype=torch.float32, device=device) -def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False): +def timestep_embedding( + timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False +): """ Create sinusoidal timestep embeddings. :param timesteps: a 1-D Tensor of N indices, one per batch element. @@ -178,9 +207,11 @@ def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_ args = timesteps[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: - embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + embedding = torch.cat( + [embedding, torch.zeros_like(embedding[:, :1])], dim=-1 + ) else: - embedding = repeat(timesteps, 'b -> b d', d=dim) + embedding = repeat(timesteps, "b -> b d", d=dim) if use_fp16: return embedding.half() else: @@ -266,7 +297,9 @@ def avg_pool_nd(dims, *args, **kwargs): def noise_like(shape, device, repeat=False): - repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat( + shape[0], *((1,) * (len(shape) - 1)) + ) noise = lambda: torch.randn(shape, device=device) return repeat_noise() if repeat else noise() @@ -284,22 +317,32 @@ def interpolate_fn(x, xp, yp): torch.eq(x_idx, 0), torch.tensor(1, device=x.device), torch.where( - torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + torch.eq(x_idx, K), + torch.tensor(K - 2, device=x.device), + cand_start_idx, ), ) - end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) + end_idx = torch.where( + torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1 + ) start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) start_idx2 = torch.where( torch.eq(x_idx, 0), torch.tensor(0, device=x.device), torch.where( - torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + torch.eq(x_idx, K), + torch.tensor(K - 2, device=x.device), + cand_start_idx, ), ) y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) - start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) - end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) + start_y = torch.gather( + y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2) + ).squeeze(2) + end_y = torch.gather( + y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2) + ).squeeze(2) cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) return cand From ae0ee782731dc846fac0bacc67efaeef9ccfe01f Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 23:27:10 +0000 Subject: [PATCH 014/512] minor fix --- .../stable_diffusion/diffusionmodules/util.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index cda12b668059..bee430e23871 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -322,9 +322,7 @@ def interpolate_fn(x, xp, yp): cand_start_idx, ), ) - end_idx = torch.where( - torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1 - ) + end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) start_idx2 = torch.where( @@ -337,12 +335,8 @@ def interpolate_fn(x, xp, yp): ), ) y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) - start_y = torch.gather( - y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2) - ).squeeze(2) - end_y = torch.gather( - y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2) - ).squeeze(2) + start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) + end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) return cand From 4882e5fc21f2fc281331f7942e4e483486f69632 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 4 May 2023 23:46:48 +0000 Subject: [PATCH 015/512] pre-commit fixes --- .../stable_diffusion/samplers/base_sampler.py | 204 +++++++--- .../stable_diffusion/samplers/dpmsolver.py | 357 ++++++++++++------ .../stable_diffusion/samplers/sampler_dpm.py | 30 +- .../stable_diffusion/diffusionmodules/util.py | 12 +- 4 files changed, 435 insertions(+), 168 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index 52e86055065a..29657439ac3e 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -40,7 +40,9 @@ def register_buffer(self, name, attr): attr = attr.to(torch.device("cuda")) setattr(self, name, attr) - def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True): + def make_schedule( + self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True + ): self.ddim_timesteps = make_ddim_timesteps( ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps, @@ -48,31 +50,57 @@ def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=verbose, ) alphas_cumprod = self.model.alphas_cumprod - assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep' - to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) - self.register_buffer('betas', to_torch(self.model.betas)) - self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) - self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev)) + assert ( + alphas_cumprod.shape[0] == self.ddpm_num_timesteps + ), "alphas have to be defined for each timestep" + to_torch = ( + lambda x: x.clone() + .detach() + .to(torch.float32) + .to(torch.cuda.current_device()) + ) + self.register_buffer("betas", to_torch(self.model.betas)) + self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) + self.register_buffer( + "alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev) + ) # calculations for diffusion q(x_t | x_{t-1}) and others - self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod.cpu()))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod.cpu()))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod.cpu()))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1))) + self.register_buffer( + "sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_one_minus_alphas_cumprod", + to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), + ) + self.register_buffer( + "log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_recipm1_alphas_cumprod", + to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), + ) # ddim sampling parameters ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters( - alphacums=alphas_cumprod.cpu(), ddim_timesteps=self.ddim_timesteps, eta=ddim_eta, verbose=verbose + alphacums=alphas_cumprod.cpu(), + ddim_timesteps=self.ddim_timesteps, + eta=ddim_eta, + verbose=verbose, ) - self.register_buffer('ddim_sigmas', ddim_sigmas) - self.register_buffer('ddim_alphas', ddim_alphas) - self.register_buffer('ddim_alphas_prev', ddim_alphas_prev) - self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1.0 - ddim_alphas)) + self.register_buffer("ddim_sigmas", ddim_sigmas) + self.register_buffer("ddim_alphas", ddim_alphas) + self.register_buffer("ddim_alphas_prev", ddim_alphas_prev) + self.register_buffer("ddim_sqrt_one_minus_alphas", np.sqrt(1.0 - ddim_alphas)) sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) ) - self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps) + self.register_buffer( + "ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps + ) @abstractmethod def p_sampling_fn(self): @@ -111,34 +139,48 @@ def sample( if isinstance(conditioning, dict): cbs = conditioning[list(conditioning.keys())[0]][0].shape[0] if cbs != batch_size: - print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") + print( + f"Warning: Got {cbs} conditionings but batch-size is {batch_size}" + ) else: if conditioning.shape[0] != batch_size: - print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") + print( + f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}" + ) self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) # sampling C, H, W = shape size = (batch_size, C, H, W) - print(f'Data shape for sampling is {size}, eta {eta}') + print(f"Data shape for sampling is {size}, eta {eta}") if self.sampler is Sampler.DPM: - return self.dpm_sampling_fn(shape=shape, steps=S, conditioning=conditioning, unconditional_conditioning=unconditional_conditioning, unconditional_guidance_scale=unconditional_guidance_scale, x_T=x_T) + return self.dpm_sampling_fn( + shape=shape, + steps=S, + conditioning=conditioning, + unconditional_conditioning=unconditional_conditioning, + unconditional_guidance_scale=unconditional_guidance_scale, + x_T=x_T, + ) - samples, intermediates = self.sampling_fn(conditioning, size, - callback=callback, - img_callback=img_callback, - quantize_denoised=quantize_x0, - mask=mask, x0=x0, - ddim_use_original_steps=False, - noise_dropout=noise_dropout, - temperature=temperature, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - x_T=x_T, - log_every_t=log_every_t, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - ) + samples, intermediates = self.sampling_fn( + conditioning, + size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, + x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) return samples, intermediates @torch.no_grad() @@ -170,34 +212,59 @@ def sampling_fn( img = x_T if timesteps is None: - timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps + timesteps = ( + self.ddpm_num_timesteps + if ddim_use_original_steps + else self.ddim_timesteps + ) elif timesteps is not None and not ddim_use_original_steps: - subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 + subset_end = ( + int( + min(timesteps / self.ddim_timesteps.shape[0], 1) + * self.ddim_timesteps.shape[0] + ) + - 1 + ) timesteps = self.ddim_timesteps[:subset_end] - intermediates = {'x_inter': [img], 'pred_x0': [img]} + intermediates = {"x_inter": [img], "pred_x0": [img]} # TODO: Is this needed if self.sampler is Sampler.PLMS: - time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) + time_range = ( + list(reversed(range(0, timesteps))) + if ddim_use_original_steps + else np.flip(timesteps) + ) else: - time_range = reversed(range(0, timesteps)) if ddim_use_original_steps else np.flip(timesteps) + time_range = ( + reversed(range(0, timesteps)) + if ddim_use_original_steps + else np.flip(timesteps) + ) total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] print(f"Running {self.sampler.name} Sampling with {total_steps} timesteps") - iterator = tqdm(time_range, desc=f'{self.sampler.name} Sampler', total=total_steps) + iterator = tqdm( + time_range, desc=f"{self.sampler.name} Sampler", total=total_steps + ) old_eps = [] for i, step in enumerate(iterator): index = total_steps - i - 1 ts = torch.full((b,), step, device=device, dtype=torch.long) if self.sampler is Sampler.PLMS: ts_next = torch.full( - (b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long + (b,), + time_range[min(i + 1, len(time_range) - 1)], + device=device, + dtype=torch.long, ) else: old_eps = None ts_next = None if mask is not None: assert x0 is not None - img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? + img_orig = self.model.q_sample( + x0, ts + ) # TODO: deterministic forward pass? img = img_orig * mask + (1.0 - mask) * img outs = self.p_sampling_fn( img, @@ -226,12 +293,19 @@ def sampling_fn( if img_callback: img_callback(pred_x0, i) if index % log_every_t == 0 or index == total_steps - 1: - intermediates['x_inter'].append(img) - intermediates['pred_x0'].append(pred_x0) + intermediates["x_inter"].append(img) + intermediates["pred_x0"].append(pred_x0) return img, intermediates def _get_model_output( - self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + self, + x, + t, + unconditional_conditioning, + unconditional_guidance_scale, + score_corrector, + c, + corrector_kwargs, ): if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: e_t = self.model.apply_model(x, t, c) @@ -245,24 +319,48 @@ def _get_model_output( e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) if score_corrector is not None: assert self.model.parameterization == "eps" - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) + e_t = score_corrector.modify_score( + self.model, e_t, x, t, c, **corrector_kwargs + ) return e_t def _get_x_prev_and_pred_x0( - self, use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, temperature, noise_dropout + self, + use_original_steps, + b, + index, + device, + x, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, ): alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev + alphas_prev = ( + self.model.alphas_cumprod_prev + if use_original_steps + else self.ddim_alphas_prev + ) sqrt_one_minus_alphas = ( - self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + self.model.sqrt_one_minus_alphas_cumprod + if use_original_steps + else self.ddim_sqrt_one_minus_alphas + ) + sigmas = ( + self.model.ddim_sigmas_for_original_num_steps + if use_original_steps + else self.ddim_sigmas ) - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas # select parameters corresponding to the currently considered timestep a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) + sqrt_one_minus_at = torch.full( + (b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device + ) # current prediction for x_0 pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() if quantize_denoised: diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py index 795d762bb2b6..6548e7abc8de 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py @@ -11,64 +11,94 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import torch import math -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import interpolate_fn, expand_dims +import torch + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + expand_dims, + interpolate_fn, +) class NoiseScheduleVP: def __init__( - self, - schedule='discrete', - betas=None, - alphas_cumprod=None, - continuous_beta_0=0.1, - continuous_beta_1=20., - ): - """Create a wrapper class for the forward SDE. - - """ - - if schedule not in ['discrete', 'linear', 'cosine']: - raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format(schedule)) + self, + schedule="discrete", + betas=None, + alphas_cumprod=None, + continuous_beta_0=0.1, + continuous_beta_1=20.0, + ): + """Create a wrapper class for the forward SDE.""" + + if schedule not in ["discrete", "linear", "cosine"]: + raise ValueError( + "Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format( + schedule + ) + ) self.schedule = schedule - if schedule == 'discrete': + if schedule == "discrete": if betas is not None: log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) else: assert alphas_cumprod is not None log_alphas = 0.5 * torch.log(alphas_cumprod) self.total_N = len(log_alphas) - self.T = 1. - self.t_array = torch.linspace(0., 1., self.total_N + 1)[1:].reshape((1, -1)) - self.log_alpha_array = log_alphas.reshape((1, -1,)) + self.T = 1.0 + self.t_array = torch.linspace(0.0, 1.0, self.total_N + 1)[1:].reshape( + (1, -1) + ) + self.log_alpha_array = log_alphas.reshape( + ( + 1, + -1, + ) + ) else: self.total_N = 1000 self.beta_0 = continuous_beta_0 self.beta_1 = continuous_beta_1 self.cosine_s = 0.008 - self.cosine_beta_max = 999. - self.cosine_t_max = math.atan(self.cosine_beta_max * (1. + self.cosine_s) / math.pi) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s - self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1. + self.cosine_s) * math.pi / 2.)) + self.cosine_beta_max = 999.0 + self.cosine_t_max = ( + math.atan(self.cosine_beta_max * (1.0 + self.cosine_s) / math.pi) + * 2.0 + * (1.0 + self.cosine_s) + / math.pi + - self.cosine_s + ) + self.cosine_log_alpha_0 = math.log( + math.cos(self.cosine_s / (1.0 + self.cosine_s) * math.pi / 2.0) + ) self.schedule = schedule - if schedule == 'cosine': + if schedule == "cosine": self.T = 0.9946 else: - self.T = 1. + self.T = 1.0 def marginal_log_mean_coeff(self, t): """ Compute log(alpha_t) of a given continuous-time label t in [0, T]. """ - if self.schedule == 'discrete': - return interpolate_fn(t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device)).reshape((-1)) - elif self.schedule == 'linear': + if self.schedule == "discrete": + return interpolate_fn( + t.reshape((-1, 1)), + self.t_array.to(t.device), + self.log_alpha_array.to(t.device), + ).reshape((-1)) + elif self.schedule == "linear": return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 - elif self.schedule == 'cosine': + elif self.schedule == "cosine": + def log_alpha_fn(s): - return torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.)) + return torch.log( + torch.cos( + (s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0 + ) + ) log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 return log_alpha_t @@ -83,33 +113,50 @@ def marginal_std(self, t): """ Compute sigma_t of a given continuous-time label t in [0, T]. """ - return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t))) + return torch.sqrt(1.0 - torch.exp(2.0 * self.marginal_log_mean_coeff(t))) def marginal_lambda(self, t): """ Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. """ log_mean_coeff = self.marginal_log_mean_coeff(t) - log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff)) + log_std = 0.5 * torch.log(1.0 - torch.exp(2.0 * log_mean_coeff)) return log_mean_coeff - log_std def inverse_lambda(self, lamb): """ Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. """ - if self.schedule == 'linear': - tmp = 2. * (self.beta_1 - self.beta_0) * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) - Delta = self.beta_0**2 + tmp + if self.schedule == "linear": + tmp = ( + 2.0 + * (self.beta_1 - self.beta_0) + * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) + ) + Delta = self.beta_0 ** 2 + tmp return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) - elif self.schedule == 'discrete': - log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2. * lamb) - t = interpolate_fn(log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_array.to(lamb.device), [1]), torch.flip(self.t_array.to(lamb.device), [1])) + elif self.schedule == "discrete": + log_alpha = -0.5 * torch.logaddexp( + torch.zeros((1,)).to(lamb.device), -2.0 * lamb + ) + t = interpolate_fn( + log_alpha.reshape((-1, 1)), + torch.flip(self.log_alpha_array.to(lamb.device), [1]), + torch.flip(self.t_array.to(lamb.device), [1]), + ) return t.reshape((-1,)) else: - log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) + log_alpha = -0.5 * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) def t_fn(log_alpha_t): - return torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s + return ( + torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) + * 2.0 + * (1.0 + self.cosine_s) + / math.pi + - self.cosine_s + ) + t = t_fn(log_alpha) return t @@ -122,17 +169,15 @@ def model_wrapper( guidance_type="uncond", condition=None, unconditional_condition=None, - guidance_scale=1., + guidance_scale=1.0, classifier_fn=None, classifier_kwargs={}, ): - """Create a wrapper function for the noise prediction model. - - """ + """Create a wrapper function for the noise prediction model.""" def get_model_input_time(t_continuous): - if noise_schedule.schedule == 'discrete': - return (t_continuous - 1. / noise_schedule.total_N) * 1000. + if noise_schedule.schedule == "discrete": + return (t_continuous - 1.0 / noise_schedule.total_N) * 1000.0 else: return t_continuous @@ -147,9 +192,14 @@ def noise_pred_fn(x, t_continuous, cond=None): if model_type == "noise": return output elif model_type == "x_start": - alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous) + alpha_t, sigma_t = ( + noise_schedule.marginal_alpha(t_continuous), + noise_schedule.marginal_std(t_continuous), + ) dims = x.dim() - return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) + return (x - expand_dims(alpha_t, dims) * output) / expand_dims( + sigma_t, dims + ) def cond_grad_fn(x, t_input): """ @@ -174,9 +224,14 @@ def model_fn(x, t_continuous): cond_grad = cond_grad_fn(x, t_input) sigma_t = noise_schedule.marginal_std(t_continuous) noise = noise_pred_fn(x, t_continuous) - return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad + return ( + noise + - guidance_scale + * expand_dims(sigma_t, dims=cond_grad.dim()) + * cond_grad + ) elif guidance_type == "classifier-free": - if guidance_scale == 1. or unconditional_condition is None: + if guidance_scale == 1.0 or unconditional_condition is None: return noise_pred_fn(x, t_continuous, cond=condition) else: x_in = torch.cat([x] * 2) @@ -191,9 +246,15 @@ def model_fn(x, t_continuous): class DPMSolver: - def __init__(self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.): - """Construct a DPM-Solver. - """ + def __init__( + self, + model_fn, + noise_schedule, + predict_x0=False, + thresholding=False, + max_val=1.0, + ): + """Construct a DPM-Solver.""" self.model = model_fn self.noise_schedule = noise_schedule self.predict_x0 = predict_x0 @@ -212,12 +273,17 @@ def data_prediction_fn(self, x, t): """ noise = self.noise_prediction_fn(x, t) dims = x.dim() - alpha_t, sigma_t = self.noise_schedule.marginal_alpha(t), self.noise_schedule.marginal_std(t) + alpha_t, sigma_t = ( + self.noise_schedule.marginal_alpha(t), + self.noise_schedule.marginal_std(t), + ) x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims) if self.thresholding: - p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. + p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) - s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) + s = expand_dims( + torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims + ) x0 = torch.clamp(x0, -s, s) / s return x0 @@ -231,21 +297,30 @@ def model_fn(self, x, t): return self.noise_prediction_fn(x, t) def get_time_steps(self, skip_type, t_T, t_0, N, device): - """Compute the intermediate time steps for sampling. - """ - if skip_type == 'logSNR': + """Compute the intermediate time steps for sampling.""" + if skip_type == "logSNR": lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) - logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) + logSNR_steps = torch.linspace( + lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1 + ).to(device) return self.noise_schedule.inverse_lambda(logSNR_steps) - elif skip_type == 'time_uniform': + elif skip_type == "time_uniform": return torch.linspace(t_T, t_0, N + 1).to(device) - elif skip_type == 'time_quadratic': + elif skip_type == "time_quadratic": t_order = 2 - t = torch.linspace(t_T**(1. / t_order), t_0**(1. / t_order), N + 1).pow(t_order).to(device) + t = ( + torch.linspace(t_T ** (1.0 / t_order), t_0 ** (1.0 / t_order), N + 1) + .pow(t_order) + .to(device) + ) return t else: - raise ValueError("Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type)) + raise ValueError( + "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format( + skip_type + ) + ) def denoise_to_zero_fn(self, x, s): """ @@ -261,7 +336,10 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal dims = x.dim() lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t) h = lambda_t - lambda_s - log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t) + log_alpha_s, log_alpha_t = ( + ns.marginal_log_mean_coeff(s), + ns.marginal_log_mean_coeff(t), + ) sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t) alpha_t = torch.exp(log_alpha_t) @@ -274,7 +352,7 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal - expand_dims(alpha_t * phi_1, dims) * model_s ) if return_intermediate: - return x_t, {'model_s': model_s} + return x_t, {"model_s": model_s} else: return x_t else: @@ -286,58 +364,75 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal - expand_dims(sigma_t * phi_1, dims) * model_s ) if return_intermediate: - return x_t, {'model_s': model_s} + return x_t, {"model_s": model_s} else: return x_t - def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): + def multistep_dpm_solver_second_update( + self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver" + ): """ Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`. """ - if solver_type not in ['dpm_solver', 'taylor']: - raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type)) + if solver_type not in ["dpm_solver", "taylor"]: + raise ValueError( + "'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format( + solver_type + ) + ) ns = self.noise_schedule dims = x.dim() model_prev_1, model_prev_0 = model_prev_list t_prev_1, t_prev_0 = t_prev_list - lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t) - log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + lambda_prev_1, lambda_prev_0, lambda_t = ( + ns.marginal_lambda(t_prev_1), + ns.marginal_lambda(t_prev_0), + ns.marginal_lambda(t), + ) + log_alpha_prev_0, log_alpha_t = ( + ns.marginal_log_mean_coeff(t_prev_0), + ns.marginal_log_mean_coeff(t), + ) sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) alpha_t = torch.exp(log_alpha_t) h_0 = lambda_prev_0 - lambda_prev_1 h = lambda_t - lambda_prev_0 r0 = h_0 / h - D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1) + D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) if self.predict_x0: - if solver_type == 'dpm_solver': + if solver_type == "dpm_solver": x_t = ( expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 - - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * D1_0 + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * D1_0 ) - elif solver_type == 'taylor': + elif solver_type == "taylor": x_t = ( expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 - + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1_0 + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) + * D1_0 ) else: - if solver_type == 'dpm_solver': + if solver_type == "dpm_solver": x_t = ( expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 - - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * D1_0 + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * D1_0 ) - elif solver_type == 'taylor': + elif solver_type == "taylor": x_t = ( expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 - - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1_0 + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) + * D1_0 ) return x_t - def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type='dpm_solver'): + def multistep_dpm_solver_third_update( + self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver" + ): """ Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`. """ @@ -345,8 +440,16 @@ def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, dims = x.dim() model_prev_2, model_prev_1, model_prev_0 = model_prev_list t_prev_2, t_prev_1, t_prev_0 = t_prev_list - lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_2), ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t) - log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ( + ns.marginal_lambda(t_prev_2), + ns.marginal_lambda(t_prev_1), + ns.marginal_lambda(t_prev_0), + ns.marginal_lambda(t), + ) + log_alpha_prev_0, log_alpha_t = ( + ns.marginal_log_mean_coeff(t_prev_0), + ns.marginal_log_mean_coeff(t), + ) sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) alpha_t = torch.exp(log_alpha_t) @@ -354,50 +457,78 @@ def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, h_0 = lambda_prev_0 - lambda_prev_1 h = lambda_t - lambda_prev_0 r0, r1 = h_0 / h, h_1 / h - D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1) - D1_1 = expand_dims(1. / r1, dims) * (model_prev_1 - model_prev_2) + D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) + D1_1 = expand_dims(1.0 / r1, dims) * (model_prev_1 - model_prev_2) D1 = D1_0 + expand_dims(r0 / (r0 + r1), dims) * (D1_0 - D1_1) - D2 = expand_dims(1. / (r0 + r1), dims) * (D1_0 - D1_1) + D2 = expand_dims(1.0 / (r0 + r1), dims) * (D1_0 - D1_1) if self.predict_x0: x_t = ( expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0 - + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1 - - expand_dims(alpha_t * ((torch.exp(-h) - 1. + h) / h**2 - 0.5), dims) * D2 + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1 + - expand_dims( + alpha_t * ((torch.exp(-h) - 1.0 + h) / h ** 2 - 0.5), dims + ) + * D2 ) else: x_t = ( expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0 - - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1 - - expand_dims(sigma_t * ((torch.exp(h) - 1. - h) / h**2 - 0.5), dims) * D2 + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0 - h) / h ** 2 - 0.5), dims) + * D2 ) return x_t - def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type='dpm_solver'): + def multistep_dpm_solver_update( + self, x, model_prev_list, t_prev_list, t, order, solver_type="dpm_solver" + ): """ Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`. """ if order == 1: - return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1]) + return self.dpm_solver_first_update( + x, t_prev_list[-1], t, model_s=model_prev_list[-1] + ) elif order == 2: - return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + return self.multistep_dpm_solver_second_update( + x, model_prev_list, t_prev_list, t, solver_type=solver_type + ) elif order == 3: - return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + return self.multistep_dpm_solver_third_update( + x, model_prev_list, t_prev_list, t, solver_type=solver_type + ) else: raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order)) - def sample(self, x, steps=20, t_start=None, t_end=None, order=3, skip_type='time_uniform', method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver', atol=0.0078, rtol=0.05): + def sample( + self, + x, + steps=20, + t_start=None, + t_end=None, + order=3, + skip_type="time_uniform", + method="singlestep", + lower_order_final=True, + denoise_to_zero=False, + solver_type="dpm_solver", + atol=0.0078, + rtol=0.05, + ): """ Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`. """ - t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end + t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end t_T = self.noise_schedule.T if t_start is None else t_start device = x.device - if method == 'multistep': + if method == "multistep": assert steps >= order - timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) + timesteps = self.get_time_steps( + skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device + ) assert timesteps.shape[0] - 1 == steps with torch.no_grad(): vec_t = timesteps[0].expand((x.shape[0])) @@ -406,7 +537,14 @@ def sample(self, x, steps=20, t_start=None, t_end=None, order=3, skip_type='time # Init the first `order` values by lower order multistep DPM-Solver. for init_order in range(1, order): vec_t = timesteps[init_order].expand(x.shape[0]) - x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type) + x = self.multistep_dpm_solver_update( + x, + model_prev_list, + t_prev_list, + vec_t, + init_order, + solver_type=solver_type, + ) model_prev_list.append(self.model_fn(x, vec_t)) t_prev_list.append(vec_t) # Compute the remaining values by `order`-th order multistep DPM-Solver. @@ -416,7 +554,14 @@ def sample(self, x, steps=20, t_start=None, t_end=None, order=3, skip_type='time step_order = min(order, steps + 1 - step) else: step_order = order - x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, step_order, solver_type=solver_type) + x = self.multistep_dpm_solver_update( + x, + model_prev_list, + t_prev_list, + vec_t, + step_order, + solver_type=solver_type, + ) for i in range(order - 1): t_prev_list[i] = t_prev_list[i + 1] model_prev_list[i] = model_prev_list[i + 1] diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py index 756aa4b6a991..639a7b1ebd40 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py @@ -16,8 +16,11 @@ import torch from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler -from .dpmsolver import NoiseScheduleVP, model_wrapper, DPMSolver +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import ( + AbstractBaseSampler, +) + +from .dpmsolver import DPMSolver, NoiseScheduleVP, model_wrapper class DPMSolverSampler(AbstractBaseSampler): @@ -32,14 +35,22 @@ def to_torch(x, model): x_device = x_float32.to(model.betas.device) return x_device - self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod, model)) + self.register_buffer("alphas_cumprod", to_torch(model.alphas_cumprod, model)) @torch.no_grad() def p_sampling_fn(self): pass @torch.no_grad() - def dpm_sampling_fn(self, shape, steps, conditioning=None, unconditional_conditioning=None, unconditional_guidance_scale=1., x_T=None): + def dpm_sampling_fn( + self, + shape, + steps, + conditioning=None, + unconditional_conditioning=None, + unconditional_guidance_scale=1.0, + x_T=None, + ): device = self.model.betas.device if x_T is None: @@ -47,7 +58,7 @@ def dpm_sampling_fn(self, shape, steps, conditioning=None, unconditional_conditi else: img = x_T - ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod) + ns = NoiseScheduleVP("discrete", alphas_cumprod=self.alphas_cumprod) model_fn = model_wrapper( lambda x, t, c: self.model.apply_model(x, t, c), @@ -59,6 +70,13 @@ def dpm_sampling_fn(self, shape, steps, conditioning=None, unconditional_conditi guidance_scale=unconditional_guidance_scale, ) dpm_solver = DPMSolver(model_fn, ns, predict_x0=True, thresholding=False) - x = dpm_solver.sample(img, steps=steps, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True) + x = dpm_solver.sample( + img, + steps=steps, + skip_type="time_uniform", + method="multistep", + order=2, + lower_order_final=True, + ) return x.to(device), None diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index bee430e23871..cda12b668059 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -322,7 +322,9 @@ def interpolate_fn(x, xp, yp): cand_start_idx, ), ) - end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) + end_idx = torch.where( + torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1 + ) start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) start_idx2 = torch.where( @@ -335,8 +337,12 @@ def interpolate_fn(x, xp, yp): ), ) y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) - start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) - end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) + start_y = torch.gather( + y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2) + ).squeeze(2) + end_y = torch.gather( + y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2) + ).squeeze(2) cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) return cand From 92b5bc715ca1f9dc1d5f7ea6745e4e5fd94253aa Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Fri, 5 May 2023 00:11:01 +0000 Subject: [PATCH 016/512] pre-commit fixes --- .../stable_diffusion/samplers/base_sampler.py | 126 +++------------ .../stable_diffusion/samplers/dpmsolver.py | 150 ++++-------------- .../stable_diffusion/samplers/sampler_dpm.py | 11 +- .../stable_diffusion/diffusionmodules/util.py | 85 +++------- 4 files changed, 78 insertions(+), 294 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index 29657439ac3e..8f43c4da6a12 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -40,9 +40,7 @@ def register_buffer(self, name, attr): attr = attr.to(torch.device("cuda")) setattr(self, name, attr) - def make_schedule( - self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True - ): + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True): self.ddim_timesteps = make_ddim_timesteps( ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps, @@ -50,44 +48,24 @@ def make_schedule( verbose=verbose, ) alphas_cumprod = self.model.alphas_cumprod - assert ( - alphas_cumprod.shape[0] == self.ddpm_num_timesteps - ), "alphas have to be defined for each timestep" - to_torch = ( - lambda x: x.clone() - .detach() - .to(torch.float32) - .to(torch.cuda.current_device()) - ) + assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, "alphas have to be defined for each timestep" + to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) self.register_buffer("betas", to_torch(self.model.betas)) self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) - self.register_buffer( - "alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev) - ) + self.register_buffer("alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev)) # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer("sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu()))) self.register_buffer( - "sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu())) + "sqrt_one_minus_alphas_cumprod", to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), ) + self.register_buffer("log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu()))) + self.register_buffer("sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu()))) self.register_buffer( - "sqrt_one_minus_alphas_cumprod", - to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), - ) - self.register_buffer( - "log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu())) - ) - self.register_buffer( - "sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu())) - ) - self.register_buffer( - "sqrt_recipm1_alphas_cumprod", - to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), + "sqrt_recipm1_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), ) # ddim sampling parameters ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters( - alphacums=alphas_cumprod.cpu(), - ddim_timesteps=self.ddim_timesteps, - eta=ddim_eta, - verbose=verbose, + alphacums=alphas_cumprod.cpu(), ddim_timesteps=self.ddim_timesteps, eta=ddim_eta, verbose=verbose, ) self.register_buffer("ddim_sigmas", ddim_sigmas) self.register_buffer("ddim_alphas", ddim_alphas) @@ -98,9 +76,7 @@ def make_schedule( / (1 - self.alphas_cumprod) * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) ) - self.register_buffer( - "ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps - ) + self.register_buffer("ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps) @abstractmethod def p_sampling_fn(self): @@ -139,14 +115,10 @@ def sample( if isinstance(conditioning, dict): cbs = conditioning[list(conditioning.keys())[0]][0].shape[0] if cbs != batch_size: - print( - f"Warning: Got {cbs} conditionings but batch-size is {batch_size}" - ) + print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") else: if conditioning.shape[0] != batch_size: - print( - f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}" - ) + print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) # sampling C, H, W = shape @@ -212,59 +184,34 @@ def sampling_fn( img = x_T if timesteps is None: - timesteps = ( - self.ddpm_num_timesteps - if ddim_use_original_steps - else self.ddim_timesteps - ) + timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps elif timesteps is not None and not ddim_use_original_steps: - subset_end = ( - int( - min(timesteps / self.ddim_timesteps.shape[0], 1) - * self.ddim_timesteps.shape[0] - ) - - 1 - ) + subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 timesteps = self.ddim_timesteps[:subset_end] intermediates = {"x_inter": [img], "pred_x0": [img]} # TODO: Is this needed if self.sampler is Sampler.PLMS: - time_range = ( - list(reversed(range(0, timesteps))) - if ddim_use_original_steps - else np.flip(timesteps) - ) + time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) else: - time_range = ( - reversed(range(0, timesteps)) - if ddim_use_original_steps - else np.flip(timesteps) - ) + time_range = reversed(range(0, timesteps)) if ddim_use_original_steps else np.flip(timesteps) total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] print(f"Running {self.sampler.name} Sampling with {total_steps} timesteps") - iterator = tqdm( - time_range, desc=f"{self.sampler.name} Sampler", total=total_steps - ) + iterator = tqdm(time_range, desc=f"{self.sampler.name} Sampler", total=total_steps) old_eps = [] for i, step in enumerate(iterator): index = total_steps - i - 1 ts = torch.full((b,), step, device=device, dtype=torch.long) if self.sampler is Sampler.PLMS: ts_next = torch.full( - (b,), - time_range[min(i + 1, len(time_range) - 1)], - device=device, - dtype=torch.long, + (b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long, ) else: old_eps = None ts_next = None if mask is not None: assert x0 is not None - img_orig = self.model.q_sample( - x0, ts - ) # TODO: deterministic forward pass? + img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? img = img_orig * mask + (1.0 - mask) * img outs = self.p_sampling_fn( img, @@ -298,14 +245,7 @@ def sampling_fn( return img, intermediates def _get_model_output( - self, - x, - t, - unconditional_conditioning, - unconditional_guidance_scale, - score_corrector, - c, - corrector_kwargs, + self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs, ): if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: e_t = self.model.apply_model(x, t, c) @@ -319,9 +259,7 @@ def _get_model_output( e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) if score_corrector is not None: assert self.model.parameterization == "eps" - e_t = score_corrector.modify_score( - self.model, e_t, x, t, c, **corrector_kwargs - ) + e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) return e_t def _get_x_prev_and_pred_x0( @@ -338,29 +276,17 @@ def _get_x_prev_and_pred_x0( noise_dropout, ): alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = ( - self.model.alphas_cumprod_prev - if use_original_steps - else self.ddim_alphas_prev - ) + alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev sqrt_one_minus_alphas = ( - self.model.sqrt_one_minus_alphas_cumprod - if use_original_steps - else self.ddim_sqrt_one_minus_alphas - ) - sigmas = ( - self.model.ddim_sigmas_for_original_num_steps - if use_original_steps - else self.ddim_sigmas + self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas ) + sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas # select parameters corresponding to the currently considered timestep a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full( - (b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device - ) + sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) # current prediction for x_0 pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() if quantize_denoised: diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py index 6548e7abc8de..0ea758ace26c 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py @@ -15,20 +15,12 @@ import torch -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - expand_dims, - interpolate_fn, -) +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import expand_dims, interpolate_fn class NoiseScheduleVP: def __init__( - self, - schedule="discrete", - betas=None, - alphas_cumprod=None, - continuous_beta_0=0.1, - continuous_beta_1=20.0, + self, schedule="discrete", betas=None, alphas_cumprod=None, continuous_beta_0=0.1, continuous_beta_1=20.0, ): """Create a wrapper class for the forward SDE.""" @@ -48,15 +40,8 @@ def __init__( log_alphas = 0.5 * torch.log(alphas_cumprod) self.total_N = len(log_alphas) self.T = 1.0 - self.t_array = torch.linspace(0.0, 1.0, self.total_N + 1)[1:].reshape( - (1, -1) - ) - self.log_alpha_array = log_alphas.reshape( - ( - 1, - -1, - ) - ) + self.t_array = torch.linspace(0.0, 1.0, self.total_N + 1)[1:].reshape((1, -1)) + self.log_alpha_array = log_alphas.reshape((1, -1,)) else: self.total_N = 1000 self.beta_0 = continuous_beta_0 @@ -70,9 +55,7 @@ def __init__( / math.pi - self.cosine_s ) - self.cosine_log_alpha_0 = math.log( - math.cos(self.cosine_s / (1.0 + self.cosine_s) * math.pi / 2.0) - ) + self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1.0 + self.cosine_s) * math.pi / 2.0)) self.schedule = schedule if schedule == "cosine": self.T = 0.9946 @@ -85,20 +68,14 @@ def marginal_log_mean_coeff(self, t): """ if self.schedule == "discrete": return interpolate_fn( - t.reshape((-1, 1)), - self.t_array.to(t.device), - self.log_alpha_array.to(t.device), + t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device), ).reshape((-1)) elif self.schedule == "linear": return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 elif self.schedule == "cosine": def log_alpha_fn(s): - return torch.log( - torch.cos( - (s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0 - ) - ) + return torch.log(torch.cos((s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0)) log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 return log_alpha_t @@ -128,17 +105,11 @@ def inverse_lambda(self, lamb): Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. """ if self.schedule == "linear": - tmp = ( - 2.0 - * (self.beta_1 - self.beta_0) - * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) - ) + tmp = 2.0 * (self.beta_1 - self.beta_0) * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) Delta = self.beta_0 ** 2 + tmp return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) elif self.schedule == "discrete": - log_alpha = -0.5 * torch.logaddexp( - torch.zeros((1,)).to(lamb.device), -2.0 * lamb - ) + log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2.0 * lamb) t = interpolate_fn( log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_array.to(lamb.device), [1]), @@ -197,9 +168,7 @@ def noise_pred_fn(x, t_continuous, cond=None): noise_schedule.marginal_std(t_continuous), ) dims = x.dim() - return (x - expand_dims(alpha_t, dims) * output) / expand_dims( - sigma_t, dims - ) + return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) def cond_grad_fn(x, t_input): """ @@ -224,12 +193,7 @@ def model_fn(x, t_continuous): cond_grad = cond_grad_fn(x, t_input) sigma_t = noise_schedule.marginal_std(t_continuous) noise = noise_pred_fn(x, t_continuous) - return ( - noise - - guidance_scale - * expand_dims(sigma_t, dims=cond_grad.dim()) - * cond_grad - ) + return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad elif guidance_type == "classifier-free": if guidance_scale == 1.0 or unconditional_condition is None: return noise_pred_fn(x, t_continuous, cond=condition) @@ -247,12 +211,7 @@ def model_fn(x, t_continuous): class DPMSolver: def __init__( - self, - model_fn, - noise_schedule, - predict_x0=False, - thresholding=False, - max_val=1.0, + self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.0, ): """Construct a DPM-Solver.""" self.model = model_fn @@ -281,9 +240,7 @@ def data_prediction_fn(self, x, t): if self.thresholding: p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) - s = expand_dims( - torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims - ) + s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) x0 = torch.clamp(x0, -s, s) / s return x0 @@ -301,25 +258,17 @@ def get_time_steps(self, skip_type, t_T, t_0, N, device): if skip_type == "logSNR": lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) - logSNR_steps = torch.linspace( - lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1 - ).to(device) + logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) return self.noise_schedule.inverse_lambda(logSNR_steps) elif skip_type == "time_uniform": return torch.linspace(t_T, t_0, N + 1).to(device) elif skip_type == "time_quadratic": t_order = 2 - t = ( - torch.linspace(t_T ** (1.0 / t_order), t_0 ** (1.0 / t_order), N + 1) - .pow(t_order) - .to(device) - ) + t = torch.linspace(t_T ** (1.0 / t_order), t_0 ** (1.0 / t_order), N + 1).pow(t_order).to(device) return t else: raise ValueError( - "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format( - skip_type - ) + "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type) ) def denoise_to_zero_fn(self, x, s): @@ -347,10 +296,7 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal phi_1 = torch.expm1(-h) if model_s is None: model_s = self.model_fn(x, s) - x_t = ( - expand_dims(sigma_t / sigma_s, dims) * x - - expand_dims(alpha_t * phi_1, dims) * model_s - ) + x_t = expand_dims(sigma_t / sigma_s, dims) * x - expand_dims(alpha_t * phi_1, dims) * model_s if return_intermediate: return x_t, {"model_s": model_s} else: @@ -368,18 +314,12 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal else: return x_t - def multistep_dpm_solver_second_update( - self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver" - ): + def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): """ Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`. """ if solver_type not in ["dpm_solver", "taylor"]: - raise ValueError( - "'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format( - solver_type - ) - ) + raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type)) ns = self.noise_schedule dims = x.dim() model_prev_1, model_prev_0 = model_prev_list @@ -411,8 +351,7 @@ def multistep_dpm_solver_second_update( x_t = ( expand_dims(sigma_t / sigma_prev_0, dims) * x - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 - + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) - * D1_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1_0 ) else: if solver_type == "dpm_solver": @@ -425,14 +364,11 @@ def multistep_dpm_solver_second_update( x_t = ( expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 - - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) - * D1_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1_0 ) return x_t - def multistep_dpm_solver_third_update( - self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver" - ): + def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): """ Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`. """ @@ -466,39 +402,27 @@ def multistep_dpm_solver_third_update( expand_dims(sigma_t / sigma_prev_0, dims) * x - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1 - - expand_dims( - alpha_t * ((torch.exp(-h) - 1.0 + h) / h ** 2 - 0.5), dims - ) - * D2 + - expand_dims(alpha_t * ((torch.exp(-h) - 1.0 + h) / h ** 2 - 0.5), dims) * D2 ) else: x_t = ( expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1 - - expand_dims(sigma_t * ((torch.exp(h) - 1.0 - h) / h ** 2 - 0.5), dims) - * D2 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0 - h) / h ** 2 - 0.5), dims) * D2 ) return x_t - def multistep_dpm_solver_update( - self, x, model_prev_list, t_prev_list, t, order, solver_type="dpm_solver" - ): + def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type="dpm_solver"): """ Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`. """ if order == 1: - return self.dpm_solver_first_update( - x, t_prev_list[-1], t, model_s=model_prev_list[-1] - ) + return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1]) elif order == 2: - return self.multistep_dpm_solver_second_update( - x, model_prev_list, t_prev_list, t, solver_type=solver_type - ) + return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) elif order == 3: - return self.multistep_dpm_solver_third_update( - x, model_prev_list, t_prev_list, t, solver_type=solver_type - ) + return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) else: raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order)) @@ -526,9 +450,7 @@ def sample( if method == "multistep": assert steps >= order - timesteps = self.get_time_steps( - skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device - ) + timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) assert timesteps.shape[0] - 1 == steps with torch.no_grad(): vec_t = timesteps[0].expand((x.shape[0])) @@ -538,12 +460,7 @@ def sample( for init_order in range(1, order): vec_t = timesteps[init_order].expand(x.shape[0]) x = self.multistep_dpm_solver_update( - x, - model_prev_list, - t_prev_list, - vec_t, - init_order, - solver_type=solver_type, + x, model_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type, ) model_prev_list.append(self.model_fn(x, vec_t)) t_prev_list.append(vec_t) @@ -555,12 +472,7 @@ def sample( else: step_order = order x = self.multistep_dpm_solver_update( - x, - model_prev_list, - t_prev_list, - vec_t, - step_order, - solver_type=solver_type, + x, model_prev_list, t_prev_list, vec_t, step_order, solver_type=solver_type, ) for i in range(order - 1): t_prev_list[i] = t_prev_list[i + 1] diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py index 639a7b1ebd40..d53c6dee0562 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py @@ -16,9 +16,7 @@ import torch from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import ( - AbstractBaseSampler, -) +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler from .dpmsolver import DPMSolver, NoiseScheduleVP, model_wrapper @@ -71,12 +69,7 @@ def dpm_sampling_fn( ) dpm_solver = DPMSolver(model_fn, ns, predict_x0=True, thresholding=False) x = dpm_solver.sample( - img, - steps=steps, - skip_type="time_uniform", - method="multistep", - order=2, - lower_order_final=True, + img, steps=steps, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True, ) return x.to(device), None diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index cda12b668059..d7ccb3a75549 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -30,21 +30,12 @@ from torch._dynamo import disable -def make_beta_schedule( - schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3 -): +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): if schedule == "linear": - betas = ( - torch.linspace( - linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64 - ) - ** 2 - ) + betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 elif schedule == "cosine": - timesteps = ( - torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s - ) + timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s alphas = timesteps / (1 + cosine_s) * np.pi / 2 alphas = torch.cos(alphas).pow(2) alphas = alphas / alphas[0] @@ -52,33 +43,22 @@ def make_beta_schedule( betas = np.clip(betas, a_min=0, a_max=0.999) elif schedule == "sqrt_linear": - betas = torch.linspace( - linear_start, linear_end, n_timestep, dtype=torch.float64 - ) + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) elif schedule == "sqrt": - betas = ( - torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) - ** 0.5 - ) + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 else: raise ValueError(f"schedule '{schedule}' unknown.") return betas.numpy() -def make_ddim_timesteps( - ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True -): +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): if ddim_discr_method == "uniform": c = num_ddpm_timesteps // num_ddim_timesteps ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) elif ddim_discr_method == "quad": - ddim_timesteps = ( - (np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2 - ).astype(int) + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) else: - raise NotImplementedError( - f'There is no ddim discretization method called "{ddim_discr_method}"' - ) + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') # assert ddim_timesteps.shape[0] == num_ddim_timesteps # add one to get the final alpha values right (the ones from first scale to data during sampling) @@ -94,13 +74,9 @@ def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) # according the the formula provided in https://arxiv.org/abs/2010.02502 - sigmas = eta * np.sqrt( - (1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev) - ) + sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) if verbose: - print( - f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}" - ) + print(f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}") print( f"For the chosen value of eta, which is {eta}, " f"this results in the following sigma_t schedule for ddim sampler {sigmas}" @@ -171,10 +147,7 @@ def backward(ctx, *output_grads): shallow_copies = [x.view_as(x) for x in ctx.input_tensors] output_tensors = ctx.run_function(*shallow_copies) input_grads = torch.autograd.grad( - output_tensors, - ctx.input_tensors + ctx.input_params, - output_grads, - allow_unused=True, + output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True, ) del ctx.input_tensors del ctx.input_params @@ -189,9 +162,7 @@ def get_idx(end, device): return torch.arange(start=0, end=end, dtype=torch.float32, device=device) -def timestep_embedding( - timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False -): +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False): """ Create sinusoidal timestep embeddings. :param timesteps: a 1-D Tensor of N indices, one per batch element. @@ -207,9 +178,7 @@ def timestep_embedding( args = timesteps[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: - embedding = torch.cat( - [embedding, torch.zeros_like(embedding[:, :1])], dim=-1 - ) + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) else: embedding = repeat(timesteps, "b -> b d", d=dim) if use_fp16: @@ -297,9 +266,7 @@ def avg_pool_nd(dims, *args, **kwargs): def noise_like(shape, device, repeat=False): - repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat( - shape[0], *((1,) * (len(shape) - 1)) - ) + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) noise = lambda: torch.randn(shape, device=device) return repeat_noise() if repeat else noise() @@ -316,33 +283,19 @@ def interpolate_fn(x, xp, yp): start_idx = torch.where( torch.eq(x_idx, 0), torch.tensor(1, device=x.device), - torch.where( - torch.eq(x_idx, K), - torch.tensor(K - 2, device=x.device), - cand_start_idx, - ), - ) - end_idx = torch.where( - torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1 + torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), ) + end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) start_idx2 = torch.where( torch.eq(x_idx, 0), torch.tensor(0, device=x.device), - torch.where( - torch.eq(x_idx, K), - torch.tensor(K - 2, device=x.device), - cand_start_idx, - ), + torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), ) y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) - start_y = torch.gather( - y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2) - ).squeeze(2) - end_y = torch.gather( - y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2) - ).squeeze(2) + start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) + end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) return cand From e165c9e3cfc9c7fa3c50f230861ceffaa5bc367d Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Tue, 9 May 2023 10:47:00 -0700 Subject: [PATCH 017/512] Add conversion script from open clip checkpoint to nemo checkpoint --- .../clip/convert_openclip_to_nemo.py | 193 ++++++++++++++++++ .../models/clip/megatron_clip_models.py | 19 +- .../vision/modules/vit/vit_backbone.py | 2 +- 3 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 examples/multimodal/foundation/clip/convert_openclip_to_nemo.py diff --git a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py new file mode 100644 index 000000000000..173a8d36046e --- /dev/null +++ b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py @@ -0,0 +1,193 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch +from apex.transformer import parallel_state +from argparse import ArgumentParser +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank +import open_clip +from omegaconf import OmegaConf +import einops + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--arch", + type=str, + default="ViT-H-14" + ) + + parser.add_argument( + "--version", + type=str, + default="laion2b_s32b_b79k" + ) + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + +def mapping_state_dict(open_model): + open_state_dict = open_model.state_dict() + key_mapping = { + "positional_embedding": "text_encoder.language_model.embedding.position_embeddings", + "token_embedding.weight": "text_encoder.language_model.embedding.word_embeddings.weight", + "ln_final.weight": "text_encoder.language_model.encoder.final_layernorm.weight", + "ln_final.bias": "text_encoder.language_model.encoder.final_layernorm.bias", + "text_projection": "text_encoder.head.weight", + } + layer_mapping = { + ".ln_1.weight": ".input_layernorm.weight", + ".ln_1.bias": ".input_layernorm.bias", + ".attn.in_proj_weight": ".self_attention.query_key_value.weight", + ".attn.in_proj_bias": ".self_attention.query_key_value.bias", + ".attn.out_proj.weight": ".self_attention.dense.weight", + ".attn.out_proj.bias": ".self_attention.dense.bias", + ".ln_2.weight": ".post_attention_layernorm.weight", + ".ln_2.bias": ".post_attention_layernorm.bias", + ".mlp.c_fc.weight": ".mlp.dense_h_to_4h.weight", + ".mlp.c_fc.bias": ".mlp.dense_h_to_4h.bias", + ".mlp.c_proj.weight": ".mlp.dense_4h_to_h.weight", + ".mlp.c_proj.bias": ".mlp.dense_4h_to_h.bias", + ".ln_pre.weight": ".preprocess_layernorm.weight", + ".ln_pre.bias": ".preprocess_layernorm.bias", + ".ln_post.weight": ".transformer.final_layernorm.weight", + ".ln_post.bias": ".transformer.final_layernorm.bias", + ".positional_embedding": ".position_embeddings", + ".backbone.proj": ".head.weight", + ".class_embedding": ".cls_token", + ".backbone.conv1.weight": ".backbone.linear_encoder.weight", + } + + nemo_state_dict = {} + for key in open_state_dict.keys(): + if key.startswith("transformer.resblocks."): + key_ = key.replace("transformer.resblocks.", "text_encoder.language_model.encoder.layers.") + elif key.startswith("visual.transformer.resblocks."): + key_ = key.replace("visual.transformer.resblocks.", "vision_encoder.backbone.transformer.layers.") + elif key.startswith('visual.'): + key_ = key.replace("visual.", "vision_encoder.backbone.") + else: + key_ = key + for pat in key_mapping: + if key_ == pat: + key_ = key_.replace(pat, key_mapping[pat]) + for pat in layer_mapping: + if key_.endswith(pat): + key_ = key_[:-len(pat)] + layer_mapping[pat] + break + nemo_state_dict[key_] = open_state_dict[key] + + nemo_state_dict["text_encoder.head.weight"] = nemo_state_dict["text_encoder.head.weight"].T + nemo_state_dict["vision_encoder.head.weight"] = nemo_state_dict["vision_encoder.head.weight"].T + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict["vision_encoder.backbone.cls_token"].reshape(1, 1, -1) + w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange( + w, + "b c p1 p2 -> b (p1 p2 c)", + ) + nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) + + return nemo_state_dict + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size_=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + cfg = OmegaConf.load(args.hparams_file) + model = MegatronCLIPModel(cfg.model, trainer) + + open_model, _, _ = open_clip.create_model_and_transforms(args.arch, pretrained=args.version) + state_dict = mapping_state_dict(open_model) + model.model.load_state_dict(state_dict) + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index 22707c6eaec6..d97f55f6d8b2 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -85,13 +85,18 @@ def __init__(self, model_cfg, pre_process=True, post_process=True): self.global_average_pool = model_cfg.global_average_pool self.pre_process = pre_process self.post_process = post_process + + if model_cfg.get("class_token_length") is None or model_cfg.get("class_token_length") <= 0: + class_token = False + else: + class_token = True self.backbone = VitBackbone( model_cfg, init_method=init_method_normal(model_cfg.init_method_std), scaled_init_method=scaled_init_method, pre_process=self.pre_process, post_process=self.post_process, - class_token=False, + class_token=class_token, single_token_output=False, ) @@ -197,10 +202,20 @@ def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process= if self.post_process: self.head = torch.nn.Linear(model_cfg.hidden_size, self.output_dim, bias=False,) + self.attn_mask = self.build_attention_mask(model_cfg.max_position_embeddings) + def set_input_tensor(self, input_tensor): """See megatron.model.transformer.set_input_tensor()""" self.language_model.set_input_tensor(input_tensor) + def build_attention_mask(self, max_position_embeddings): + # lazily create causal attention mask, with full attention between the tokens + mask = torch.empty(max_position_embeddings, max_position_embeddings, dtype=bool, device='cuda') + mask.fill_(True) + mask.triu_(1) # zero out the lower diagonal + mask = mask.reshape(1, 1, max_position_embeddings, max_position_embeddings) + return mask + def forward( self, input_ids, ): @@ -211,7 +226,7 @@ def forward( hidden_states = self.language_model( input_ids, self.position_ids, - None, + self.attn_mask, token_type_ids=None, layer_past=None, get_key_value=False, diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index cf874830d378..ea114619e0cb 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -152,7 +152,7 @@ def twod_interpolate_position_embeddings_hook( input_param_tok = input_param[:class_token_length, :] input_param_grid = input_param[class_token_length:, :] else: - input_param_tok = torch.zeros(class_token_length, hidden_size) + input_param_tok = torch.zeros(class_token_length, hidden_size, device=input_param.device) input_param_grid = input_param assert input_param.shape[1] == hidden_size From 2bcc7237a446360a94e2abe23c5d29b52e2196a1 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Tue, 9 May 2023 23:35:27 +0000 Subject: [PATCH 018/512] change string DPM-SOLVER to DPM --- nemo/collections/multimodal/parts/stable_diffusion/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 4cea9eb8691b..cc4f1c62ffb6 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -37,7 +37,7 @@ def initialize_sampler(model, sampler_type): sampler = DDIMSampler(model) elif sampler_type == 'PLMS': sampler = PLMSSampler(model) - elif sampler_type == 'DPM-SOLVER': + elif sampler_type == 'DPM': sampler = DPMSolverSampler(model) else: raise ValueError(f'Sampler {sampler_type} is not supported.') From e81aa5492d0e2ab3bdc905b5974444790a4fba81 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Wed, 10 May 2023 07:11:24 +0000 Subject: [PATCH 019/512] change config to use DPM-solver and 25 inference steps --- .../multimodal/generative/stable_diffusion/conf/sd_infer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml index e6655891338a..dbe384dd2566 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml @@ -6,8 +6,8 @@ infer: height: 512 width: 512 down_factor: 8 - inference_steps: 50 - sampler_type: 'PLMS' + inference_steps: 25 + sampler_type: 'DPM' eta: 0 output_type: 'pil' save_to_file: True From d2c9fa1127297037d544984ccff2e44dd02fd261 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 10 May 2023 10:50:37 -0700 Subject: [PATCH 020/512] [Mingyuanm]Merging Fixed version of SD ddpm to internal/main --- .../clip/convert_openclip_to_nemo.py | 34 +- .../stable_diffusion/conf/sd_train.yaml | 101 +- .../generative/stable_diffusion/sd_train.py | 110 +- .../data/stable_diffusion/wds_sampler.py | 71 - .../data/stable_diffusion/webdataset.py | 390 ---- .../data/stable_diffusion/webdataset_utils.py | 278 --- .../stable_diffusion/ldm/ddpm_legacy.py | 1680 ----------------- .../diffusionmodules/openaimodel.py | 9 +- .../stable_diffusion/diffusionmodules/util.py | 7 +- .../stable_diffusion/encoders/modules.py | 24 +- 10 files changed, 136 insertions(+), 2568 deletions(-) delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/webdataset.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py diff --git a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py index 173a8d36046e..b34831c138cb 100644 --- a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py +++ b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py @@ -13,9 +13,13 @@ # limitations under the License. import os +from argparse import ArgumentParser + +import einops +import open_clip import torch from apex.transformer import parallel_state -from argparse import ArgumentParser +from omegaconf import OmegaConf from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -24,24 +28,13 @@ from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank -import open_clip -from omegaconf import OmegaConf -import einops def get_args(): parser = ArgumentParser() - parser.add_argument( - "--arch", - type=str, - default="ViT-H-14" - ) + parser.add_argument("--arch", type=str, default="ViT-H-14") - parser.add_argument( - "--version", - type=str, - default="laion2b_s32b_b79k" - ) + parser.add_argument("--version", type=str, default="laion2b_s32b_b79k") parser.add_argument( "--hparams_file", @@ -67,6 +60,7 @@ def get_args(): args = parser.parse_args() return args + def mapping_state_dict(open_model): open_state_dict = open_model.state_dict() key_mapping = { @@ -114,22 +108,22 @@ def mapping_state_dict(open_model): key_ = key_.replace(pat, key_mapping[pat]) for pat in layer_mapping: if key_.endswith(pat): - key_ = key_[:-len(pat)] + layer_mapping[pat] + key_ = key_[: -len(pat)] + layer_mapping[pat] break nemo_state_dict[key_] = open_state_dict[key] nemo_state_dict["text_encoder.head.weight"] = nemo_state_dict["text_encoder.head.weight"].T nemo_state_dict["vision_encoder.head.weight"] = nemo_state_dict["vision_encoder.head.weight"].T - nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict["vision_encoder.backbone.cls_token"].reshape(1, 1, -1) + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ + "vision_encoder.backbone.cls_token" + ].reshape(1, 1, -1) w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] - nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange( - w, - "b c p1 p2 -> b (p1 p2 c)", - ) + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) return nemo_state_dict + def convert(local_rank, rank, world_size, args): app_state = AppState() app_state.data_parallel_rank = 0 diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 693d92604103..c4371b4e0f0f 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -7,7 +7,7 @@ trainer: precision: 16 logger: False # logger provided by exp_manager enable_checkpointing: False - replace_sampler_ddp: True + replace_sampler_ddp: False max_epochs: 2 # PTL default. In practice, max_steps will be reached first. max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches log_every_n_steps: 10 @@ -15,11 +15,6 @@ trainer: gradient_clip_val: 1.0 benchmark: False enable_model_summary: True - strategy: - bucket_cap_mb: 256 - gradient_as_bucket_view: True - find_unused_parameters: False - allreduce_precision: 32 exp_manager: exp_dir: null @@ -48,7 +43,12 @@ exp_manager: model: - base_learning_rate: 1.0e-4 + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 1 # will use more micro batches to reach global batch size + linear_start: 0.00085 linear_end: 0.012 num_timesteps_cond: 1 @@ -65,7 +65,7 @@ model: use_ema: False scale_by_std: False ckpt_path: - ignore_keys: [ ] + ignore_keys: [] parameterization: eps clip_denoised: True load_only_unet: False @@ -79,7 +79,7 @@ model: logvar_init: 0 beta_schedule: linear loss_type: l2 - learning_rate: 1.0e-04 + concat_mode: True cond_stage_forward: text_embedding_dropout_rate: 0.1 @@ -89,22 +89,22 @@ model: unet_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: - from_NeMo: #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + from_pretrained: /ckpts/nemo-v1-2.ckpt + from_NeMo: True #Must be specified when from pretrained is not None, False means loading unet from HF ckpt image_size: 32 # unused in_channels: 4 out_channels: 4 model_channels: 320 attention_resolutions: - - 4 - - 2 - - 1 + - 4 + - 2 + - 1 num_res_blocks: 2 channel_mult: - - 1 - - 2 - - 4 - - 4 + - 1 + - 2 + - 4 + - 4 num_heads: 8 use_spatial_transformer: true transformer_depth: 1 @@ -126,12 +126,12 @@ model: out_ch: 3 ch: 128 ch_mult: - - 1 - - 2 - - 4 - - 4 + - 1 + - 2 + - 4 + - 4 num_res_blocks: 2 - attn_resolutions: [ ] + attn_resolutions: [] dropout: 0.0 lossconfig: target: torch.nn.Identity @@ -142,28 +142,43 @@ model: device: cuda max_length: 77 + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - scheduler_config: - cls: nemo.collections.multimodal.parts.stable_diffusion.lr_scheduler.LambdaLinearScheduler - warm_up_steps: [ 0 ] - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.0e-6 ] - f_max: [ 1.e-4 ] - f_min: [ 1.e-10 ] + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes data: - num_workers: 16 - train: - batch_size: 4 - dataset_path: - - /datasets/coyo/test.pkl - augmentations: - resize_smallest_side: 512 - center_crop_h_w: 512, 512 - horizontal_flip: False - filterings: + num_workers: 16 + train: + dataset_path: + - /datasets/coyo/test.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: - webdataset: - infinite_sampler: True - local_root_path: /datasets/coyo + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py index f08a8e0e666d..f423a684d349 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_train.py +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -11,73 +11,83 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import argparse -import os + from datetime import timedelta -import numpy as np -import pytorch_lightning as pl import torch -import torch.utils.data as data -from hydra.utils import instantiate -from omegaconf import OmegaConf +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.strategies.ddp import DDPStrategy -from torch.distributed.algorithms.ddp_comm_hooks.default_hooks import fp16_compress_hook +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector -from nemo.collections.multimodal.data.stable_diffusion.wds_sampler import WebDataloaderSamplerCallback -from nemo.collections.multimodal.data.stable_diffusion.webdataset import WebDatasetWithRawText -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm_legacy import LatentDiffusion -from nemo.collections.multimodal.models.stable_diffusion.ldm_config import LatentDiffusionModelConfig +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) from nemo.core.config import hydra_runner -from nemo.utils.exp_manager import StatelessTimer, exp_manager +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager -@hydra_runner(config_path='conf', config_name='sd_train.yaml') -def main(cfg): - pl.seed_everything(42) +@hydra_runner(config_path='conf', config_name='sd_train') +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - # Tune for DDP - if isinstance(cfg.trainer.strategy, str): - strategy = cfg.trainer.strategy - else: - ddp_config = dict(cfg.trainer.strategy) - if str(ddp_config.pop("allreduce_precision", "32")) == "16": # can be bf16 - ddp_config["ddp_comm_hook"] = fp16_compress_hook - ddp_config["timeout"] = timedelta(seconds=180) - strategy = DDPStrategy(**ddp_config) - del cfg.trainer.strategy - - batch_size = cfg.model.data.train.batch_size - dataset = WebDatasetWithRawText(dataset_cfg=cfg.model.data, is_train=True,) - data = torch.utils.data.DataLoader( - dataset, batch_size=batch_size, num_workers=cfg.model.data.num_workers, pin_memory=True, drop_last=False - ) - global_bs = cfg.trainer.devices * cfg.trainer.num_nodes * batch_size + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - callbacks = [] - if not cfg.model.data.webdataset.infinite_sampler: - wds_sampler = WebDataloaderSamplerCallback( - batch_size=batch_size, gradient_accumulation=cfg.trainer.accumulate_grad_batches - ) - callbacks.append(wds_sampler) + torch.backends.cuda.matmul.allow_tf32 = True plugins = [] + + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 65536.0), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) - trainer = pl.Trainer(**cfg.trainer, plugins=plugins, callbacks=callbacks, strategy=strategy) - exp_manager(trainer, cfg.get("exp_manager", None)) - if ( - not cfg.model.data.webdataset.infinite_sampler - and trainer._checkpoint_connector.resume_from_checkpoint_fit_path is not None - ): - # Reusming from previous training session - wds_sampler.resume_flag = True + callbacks = [] + trainer = Trainer(plugins=plugins, strategy=strategy, callbacks=callbacks, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + # update resume from checkpoint found by exp_manager + if cfg.model.get("resume_from_checkpoint") is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision - model = LatentDiffusion(cfg.model, trainer).cuda() - model.learning_rate = cfg.model.base_learning_rate * global_bs * cfg.trainer.accumulate_grad_batches + model = MegatronLatentDiffusion(cfg.model, trainer) - trainer.fit(model, data) + trainer.fit(model) if __name__ == '__main__': diff --git a/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py b/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py deleted file mode 100644 index c1664a028a3a..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/wds_sampler.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import torch -import torch.distributed as dist -from pytorch_lightning import Callback - - -class WDSSampler: - def __init__(self, mode): - self.mode = mode - assert self.mode in ['train', 'val'] - - def set_epoch(self, epoch, pseudo_epoch=None, start_index=0): - if self.mode == 'train': - world_size = dist.get_world_size() - num_samples_read_so_far = start_index * world_size - os.environ["WDS_EPOCH_NUM"] = str(epoch) - os.environ["WDS_START_INDEX"] = str(num_samples_read_so_far) - print(f'set WDS_EPOCH_NUM={epoch}; WDS_START_INDEX={num_samples_read_so_far}; start_index={start_index}') - else: - pass - - -class WebDataloaderSamplerCallback(Callback): - def __init__(self, batch_size, gradient_accumulation=1): - super().__init__() - self.batch_size = batch_size - self.train_sampler = WDSSampler(mode='train') - self.val_sampler = WDSSampler(mode='val') - self.resume_flag = False - self.ga = gradient_accumulation - - def on_train_epoch_start(self, trainer, pl_module): - # For most cases, epoch should start from 0 (start_index = 0), - # except for the case when we resume the checkpoint and start the epoch the first time - if self.resume_flag: - # We calculate the start_index by estimating the global steps / len(dataloader) - num_iters = trainer.global_step % trainer.num_training_batches - self.resume_flag = False - else: - num_iters = 0 - - # We assume that the batch size, # GPUs between different runs remain the same - # When ga is larger than 1, num_iters only records steps with back propagation - # The actual consumed samples needs to multiply with ga batches - consumed_samples_per_GPU = num_iters * self.batch_size * self.ga - # This part assume that when we resume, we are using the same num of gpus and also same batchsize as before - epoch = trainer.global_step * self.ga // trainer.num_training_batches - print( - f'WebdataLoaderSampler Calculated epoch={epoch}, num_iters={num_iters}, num_training_batches={trainer.num_training_batches}' - ) - if pl_module.current_epoch != epoch: - print(f'Warning: Calculated Epoch={epoch} is not equal to pyt-lightning epoch={pl_module.current_epoch}') - - self.train_sampler.set_epoch(epoch, start_index=consumed_samples_per_GPU) - - def on_validation_epoch_start(self, trainer, pl_module): - # For validation, we don't care if we finish or not because we never go through a complete epoch of validation set for now - self.val_sampler.set_epoch(pl_module.current_epoch) diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset.py deleted file mode 100644 index ab49c7f89026..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/webdataset.py +++ /dev/null @@ -1,390 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import io -import json -import os -import pickle -import random -import re - -import boto3 -import torch.distributed as dist -from botocore.config import Config -from PIL import Image -from torch.utils.data import IterableDataset -from webdataset.utils import pytorch_worker_info - -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( - construct_image_augmentations, - identical_transform, -) -from nemo.collections.multimodal.data.stable_diffusion.webdataset_utils import WebDataset -from nemo.core.classes import IterableDataset as NeMoIterableDataset - -Image.MAX_IMAGE_PIXELS = 933120000 -_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() -from webdataset import warn_and_continue - - -def pil_loader(key, data): - r""" - Function to load an image. - If the image is corrupt, it returns a black image. - Args: - key: Image key. - data: Image data stream. - """ - - extension = re.sub(r".*[.]", "", key) - if extension.lower() not in _IMG_EXTENSIONS: - return None - - with io.BytesIO(data) as stream: - img = Image.open(stream) - img.load() - img = img.convert("RGB") - - return img - - -def get_world_size(): - r"""Get world size. How many GPUs are available in this job.""" - world_size = 1 - if dist.is_available(): - if dist.is_initialized(): - world_size = dist.get_world_size() - return world_size - - -def repeat_list(x, n): - r""" - Function to repeat the list to a fixed shape. - n is the desired length of the extended list. - Args: - x (list): Input list - n (int): Desired length - """ - if n == 0: - return [] - assert len(x) > 0 - - x_extended = [] - while len(x_extended) < n: - x_extended = x_extended + x - x_extended = x_extended[0:n] - - return x_extended - - -def build_resolution_filter(value=None, method='larger', image_idx=0): - assert method == 'larger' or method == 'smaller' - if method == 'larger': - print(f'Only Selecting images with resolution >= {value}') - return lambda x: x[image_idx].size[0] >= value and x[image_idx].size[1] >= value - - print(f'Only Selecting images with resolution <= {value}') - return lambda x: x[image_idx].size[0] <= value and x[image_idx].size[1] <= value - - -class ShardListWithResumes(IterableDataset): - r""" - An iterable dataset that is compatible with custom resets. - Can be restored from an iteration number and index number. - """ - - def __init__( - self, - urls, - epoch_shuffle=False, - shuffle=True, - split_by_node=True, - split_by_worker=True, - chunk_size=1, - resume_flag=True, - verbose=False, - ): - r"""Create a ShardList. - Args: - urls (list): a list of URLs as a Python list or brace notation string - epoch_shuffle (bool): Shuffles the whole epoch. If disabled, each node will see the same set of urls. - shuffle (bool): shuffle samples before iterating. - split_by_node (bool): split shards by node if True - chunk_size (int): chunk size used in webdataset creation - resume_flag (bool): If enabled, resumes from a specific iteration and epoch number. - verbose (bool): Prints some logs if true - """ - super().__init__() - - self.verbose = verbose - self.epoch = 0 - self.start_index = 0 - self.epoch_shuffle = epoch_shuffle - self.shuffle = shuffle - self.split_by_node = split_by_node - self.split_by_worker = split_by_worker - self.chunk_size = chunk_size - self.resume_flag = resume_flag - self.urls = urls - - def set_epoch(self, epoch, start_index): - r"""Set the current epoch. Used for per-node shuffling. - Args: - epoch (int): Epoch number - start_index (int): iteraton number - """ - self.epoch = epoch - self.start_index = start_index - - def __iter__(self): - r"""Return an iterator over the shards.""" - - rank, world_size, worker_id, num_workers = pytorch_worker_info() - - # Setting epoch and start index - if self.resume_flag: - self.epoch = int(os.environ['WDS_EPOCH_NUM']) - - # This tells us number of chunks that have been seen by one GPU - self.start_index = int(os.environ['WDS_START_INDEX']) // self.chunk_size - - urls = self.urls - - # Shuffling the entire epoch before splitting among nodes and workers. - if self.epoch_shuffle: - if self.shuffle: - raise ValueError("If epoch_shuffle is used, do not use shuffle.") - - if self.verbose: - print("PytorchShardList epochshuffle {}".format(self.epoch)) - random.Random(self.epoch).shuffle(urls) - - num_urls = len(urls) - - # Splitting the shards by worker and node - - # Extending urls so that each workers receive the same number of batches. - # This serves the job of ddp_equalize. - nworkers_all = world_size * num_workers - if num_urls % nworkers_all > 0: - num_urls_per_process = (num_urls // nworkers_all) + 1 - else: - num_urls_per_process = num_urls // nworkers_all - extended_url_list_size = num_urls_per_process * nworkers_all - urls = repeat_list(urls, extended_url_list_size) - - # print(f'Total Number of URLS before spliting: {num_urls}') - if self.split_by_node: - urls = urls[rank::world_size] - - if self.split_by_worker: - urls = urls[worker_id::num_workers] - - if self.verbose: - print( - f'Number of URLs after splitting: {len(urls)}. rank/world_size={rank}/{world_size} worker_id/num_workers={worker_id}/{num_workers}' - ) - - if self.shuffle: - random.Random(self.epoch + 17).shuffle(urls) - - # This tells us the number of chunks seen by one worker. - # Do not iterate over the seen chunks. - start_index_per_worker = self.start_index // (num_workers * world_size) - urls = urls[start_index_per_worker:] - - if self.verbose: - print( - f'Number of URLS after using start_index_per_worker: {len(urls)}. self.start_index={self.start_index} start_index_per_worker={start_index_per_worker}' - ) - print( - f'PytorchShardList Rank=<{rank}/{world_size}> Worker=<{worker_id}/{num_workers}> receives {len(urls)} URLs (TARs)' - ) - - for url in urls: - yield dict(url=url) - - -class WebDatasetBase(NeMoIterableDataset): - def __init__(self, cfg, is_train=True): - r""" - Webdataloader class - Args: - cfg: Dataset Config - is_train (bool): Is the dataset used in training mode? - """ - super().__init__() - - self.cfg = cfg - self.num_workers = self.cfg.num_workers - self.world_size = get_world_size() - self.webdata_cfg = self.cfg.webdataset - self.infinite_sampler = self.webdata_cfg.infinite_sampler - if is_train: - dataset_path = cfg.train.dataset_path - self.batch_size = self.cfg.train.batch_size - self.augmentations = self.cfg.train.augmentations - self.filterings = self.cfg.train.filterings - else: - dataset_path = cfg.val.dataset_path - self.batch_size = self.val.batch_size - self.augmentations = self.cfg.val.augmentations - self.filterings = self.cfg.val.filterings - - if getattr(self.webdata_cfg, 'object_store', False): - # Initializing PBSS - print(f'Init PBSS using credentials file at {self.webdata_cfg.pbss_credentials_file}') - self.use_object_store = True - assert self.webdata_cfg.pbss_credentials_file is not None - with open(self.webdata_cfg.pbss_credentials_file) as fin: - self.credentials = json.load(fin) - config = Config(connect_timeout=30, signature_version="s3", retries={"max_attempts": 999999}) - self.s3 = boto3.client('s3', **self.credentials, config=config) - self.bucket = self.webdata_cfg.bucket - self.local_root_path = None - else: - self.use_object_store = False - self.s3 = None - self.bucket = None - self.local_root_path = self.webdata_cfg.local_root_path - print(f'Read Webdataset locally. Data stores at {self.local_root_path}') - - # Concatenate all dataset infos - - # wdinfo in a dict containing webdata information - self.wdinfo = dict() - for dset_info_path in dataset_path: - with open(dset_info_path, 'rb') as fp: - dset_info = pickle.load(fp) - if 'tar_files' not in self.wdinfo: - self.wdinfo['tar_files'] = dset_info['tar_files'] - self.wdinfo['total_key_count'] = dset_info['total_key_count'] - self.wdinfo['chunk_size'] = dset_info['chunk_size'] - else: - self.wdinfo['tar_files'].extend(dset_info['tar_files']) - self.wdinfo['total_key_count'] += dset_info['total_key_count'] - - def build_dataset(self, **kwargs): - raise ValueError('build_dataset function not implemented') - - -class WebDatasetWithRawText(WebDatasetBase): - def __init__(self, dataset_cfg, is_train=True): - r""" - Webdataloader class - Args: - dataset_cfg: Dataset config - is_train (bool): Is the dataset used in training mode? - """ - super().__init__(dataset_cfg, is_train=is_train) - # For adding corruptions and obtaining image pyramid - # TODO Add this for SR256/SR1024 training - # self.corruption_gen = ImagePyramidWithCorruptions( - # cfg=cfg, is_inference=is_inference, is_test=is_test - # ) - - # Construct augmentations - self.img_transform = construct_image_augmentations(self.augmentations) - self.text_transform = identical_transform - self.verbose = dataset_cfg.get("verbose", False) - self.build_dataset() - - def build_dataset(self): - """See base class.""" - - train_info = self.wdinfo - - shards_train_list = train_info["tar_files"] - num_shards = len(shards_train_list) - assert num_shards > 0, "Did not find any training data." - - chunk_size = train_info["chunk_size"] - - # Shuffle buffer: - shuffle_buffer_size = train_info["chunk_size"] - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict['images'] = input[0].permute(1, 2, 0) - - out_dict['captions'] = input[1] - yield out_dict - - # Train dataset object - from webdataset import warn_and_continue - - if self.infinite_sampler: - rank, world_size, worker_id, num_workers = pytorch_worker_info() - epoch_length = train_info["total_key_count"] // self.batch_size // world_size - print(f'Using infinite sampler, world_size={world_size}. The epoch length will be set to: {epoch_length}') - else: - print(f'Initiating ShardListWithResumes..') - shards_train_list = ShardListWithResumes( - urls=shards_train_list, - epoch_shuffle=True, - shuffle=False, - split_by_node=True, - split_by_worker=True, - chunk_size=chunk_size, - resume_flag=True, - verbose=self.verbose, - ) - - train_dataset = ( - WebDataset( - shards_train_list, - load_from_object_store=self.use_object_store, - s3_client=self.s3, - s3_bucket_name=self.bucket, - local_root_path=self.local_root_path, - handler=warn_and_continue, - resampled=self.infinite_sampler, - ) - .shuffle(shuffle_buffer_size) # Shuffling the buffer - .decode(pil_loader, handler=warn_and_continue) # Decoding the data - .to_tuple("jpg txt") # Splitting into tuple - ) - if self.filterings is not None: - if self.filterings.resolution is not None: - train_dataset = train_dataset.select( - build_resolution_filter(**self.filterings.resolution, image_idx=0) - ) - - # Add additional augmentation - train_dataset = train_dataset.map_tuple(self.img_transform, self.text_transform).compose( # Augmentation - tuple_to_dict - ) # Converting tuple to data dict - - train_dataset.total_images = train_info["total_key_count"] - # Set epoch length if using infinite sampler - if self.infinite_sampler: - rank, world_size, worker_id, num_workers = pytorch_worker_info() - nbatches = train_dataset.total_images // world_size // self.num_workers - print(f'Setting nbatches={nbatches} for infinite sampler. world_size={world_size}') - train_dataset = train_dataset.with_epoch(nbatches=nbatches) - - print("Total number of training shards: %d", num_shards) - print("Total training key count: %d", train_dataset.total_images) - - self._dataset = train_dataset - - def __iter__(self): - return self._dataset.__iter__() - - def __len__(self): - world_size = get_world_size() - # In Webdataset multi-gpu training settings, each GPU will be assigned with different portions of - # training data, therefore divde the dataset size by the number of GPUs. - return self._dataset.total_images // world_size diff --git a/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py b/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py deleted file mode 100644 index 2e5b7d6505ab..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/webdataset_utils.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import io -import os -import sys -from urllib.parse import urlparse - -import webdataset.gopen as gopen_webdata -import yaml -from webdataset import cache, filters, shardlists -from webdataset.compat import FluidInterface -from webdataset.handlers import reraise_exception -from webdataset.pipeline import DataPipeline -from webdataset.pytorch import IterableDataset -from webdataset.tariterators import group_by_keys, tar_file_expander - -# Number of attempts to read aws objects. -_NUM_OBJECT_STORE_READ_ATTEMPTS = 10 - - -def gopen(url, mode="rb", bufsize=8192, **kw): - r"""Open the URL. - This uses the `gopen_schemes` dispatch table to dispatch based - on scheme. - Support for the following schemes is built-in: pipe, file, - http, https, sftp, ftps, scp. - When no scheme is given the url is treated as a file. - You can use the OPEN_VERBOSE argument to get info about - files being opened. - - This implementation is based on webdataset's gopen, - with the modification of supporting reading from s3 object_store: - https://webdataset.github.io/webdataset/api/webdataset/gopen.html#gopen - Args: - url (list[str]): the source URL - mode (str): the mode ("rb", "r") - bufsize (int): the buffer size - """ - global fallback_gopen - verbose = int(os.environ.get("GOPEN_VERBOSE", 0)) - if verbose: - print("GOPEN", url, gopen_webdata.info, file=sys.stderr) - - assert mode in ["rb", "wb"], mode - if url == "-": - if mode == "rb": - return sys.stdin.buffer - elif mode == "wb": - return sys.stdout.buffer - else: - raise ValueError(f"unknown mode {mode}") - - # If we specify 'object_store' in keyword arguments, - # then we would load from AWS. - # In this case, you also need to specify s3_client and s3_bucket_name - # in arguments. - if 'object_store' in kw and kw['object_store']: - # Load from object store - attempt = 0 - - while attempt < _NUM_OBJECT_STORE_READ_ATTEMPTS: - try: - s3_response_object = kw['s3_client'].get_object(Bucket=kw['s3_bucket_name'], Key=url) - object_content = s3_response_object['Body'].read() - - # This is a check to verify is the object is fully read. - full_read = s3_response_object['ContentLength'] == len(object_content) - if full_read: - return io.BytesIO(object_content) - else: - attempt += 1 - except Exception as e: # noqa - # If there is an exception (usually connectivity error or protocol error), read again - attempt += 1 - print(e) - print('Retrying tar file download, attempt {}'.format(attempt)) - continue - raise ConnectionError('Unable to read {} from PBSS. {} attempts tried.'.format(url, attempt)) - - # Append root path to the url if dataset is stored on local disk system - elif 'local_root_path' in kw and kw['local_root_path'] is not None: - url = os.path.join(kw['local_root_path'], url) - - # For all other gopen schemes, use the native webdataset gopen functions. - pr = urlparse(url) - if pr.scheme == "": - bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) - return open(url, mode, buffering=bufsize) - if pr.scheme == "file": - bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) - return open(pr.path, mode, buffering=bufsize) - handler = gopen_webdata.gopen_schemes["__default__"] - handler = gopen_webdata.gopen_schemes.get(pr.scheme, handler) - return handler(url, mode, bufsize, **kw) - - -def url_opener(data, handler=reraise_exception, **kw): - r"""Given a stream of url names (packaged in `dict(url=url)`), yield opened streams. - - Args: - data: Iterator of dictionaires containing url paths. - handler: Exception handler. - """ - for sample in data: - assert isinstance(sample, dict), sample - assert "url" in sample - url = sample["url"] - try: - stream = gopen(url, **kw) - sample.update(stream=stream) - yield sample - except Exception as exn: - exn.args = exn.args + (url,) - if handler(exn): - continue - else: - break - - -# Define a new tarfile_samples -def tarfile_samples( - src, - handler=reraise_exception, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None, -): - r""" - Given an iterator of filenames, this function opens the URL streams - and groups data by keys. - - Args: - src: Iterator of data dictionaires containing URL names. - handler: Exception handler. - load_from_object_store (bool): A boolean flag to specify whether to load from - object store. - s3_client: If loading from object store, specify S3 client. - s3_bucket_name: If loading from object store, specify S3 bucket name. - local_root_path: If loading from local (or mounted) disk system, - specify the root path of the dataset. - """ - streams = url_opener( - src, - handler=handler, - object_store=load_from_object_store, - s3_client=s3_client, - s3_bucket_name=s3_bucket_name, - local_root_path=local_root_path, - ) - files = tar_file_expander(streams, handler=handler) - samples = group_by_keys(files, handler=handler) - return samples - - -tarfile_to_samples = filters.pipelinefilter(tarfile_samples) - - -class WebDataset(DataPipeline, FluidInterface): - r"""Webdataset class modified to support loading from object store.""" - - def __init__( - self, - urls, - handler=reraise_exception, - resampled=False, - shardshuffle=None, - cache_size=-1, - cache_dir=None, - detshuffle=False, - nodesplitter=shardlists.single_node_only, - verbose=False, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None, - ): - r""" - Args: - urls: An iterator containing a list of url names. - handler: Exception handler. - resampled: If true, sample shards from shard list with replacement. - shardshuffle: If true, shuffles the entire shard list. - cache_size: Size of cache. - cache_dir: Path to store cache. - detshuffle: Whether to use deterministic shuffling when shardshuffle is True. - nodesplitter: Function for splitting urls among nodes. - verbose: If True, prints logs. - load_from_object_store (bool): A boolean flag to specify whether to load from - object store. - s3_client: If loading from object store, specify S3 client. - s3_bucket_name: If loading from object store, specify S3 bucket name. - local_root_path: If loading from local (or mounted) disk system, - specify the root path of the dataset. - """ - super().__init__() - if isinstance(urls, IterableDataset): - assert not resampled - self.append(urls) - elif isinstance(urls, str) and (urls.endswith(".yaml") or urls.endswith(".yml")): - with (open(urls)) as stream: - spec = yaml.safe_load(stream) - assert "datasets" in spec - self.append(shardlists.MultiShardSample(spec)) - elif isinstance(urls, dict): - assert "datasets" in urls - self.append(shardlists.MultiShardSample(urls)) - elif resampled: - self.append(shardlists.ResampledShards(urls)) - else: - self.append(shardlists.SimpleShardList(urls)) - self.append(nodesplitter) - self.append(shardlists.split_by_worker) - if shardshuffle is True: - shardshuffle = 100 - if shardshuffle is not None: - if detshuffle: - self.append(filters.detshuffle(shardshuffle)) - else: - self.append(filters.shuffle(shardshuffle)) - if cache_dir is None or cache_size == 0: - self.append( - tarfile_to_samples( - handler=handler, - load_from_object_store=load_from_object_store, - s3_client=s3_client, - s3_bucket_name=s3_bucket_name, - local_root_path=local_root_path, - ) - ) - else: - - # We dont use cache. - assert cache_size == -1 or cache_size > 0 - self.append( - cache.cached_tarfile_to_samples( - handler=handler, verbose=verbose, cache_size=cache_size, cache_dir=cache_dir, - ) - ) - - -class WDSSampler: - r""" - A sampler function for setting the epoch number and iteration number. - In webdataset, information is propagated using environment flags. - In our case, - WDS_EPOCH_NUM: Epoch number - WDS_START_INDEX: Start index in this epoch. - """ - - def __init__(self, mode): - self.mode = mode - assert self.mode in ['train', 'val'] - - def set_epoch(self, epoch): - if self.mode == 'train': - os.environ["WDS_EPOCH_NUM"] = str(epoch) - else: - pass - - def set_iteration(self, start_index): - # start_index should be iters * batch_size - # It is the number of samples that have been seen by one GPU - if self.mode == 'train': - os.environ["WDS_START_INDEX"] = str(start_index) - else: - pass diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py deleted file mode 100644 index 4e7460115206..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm_legacy.py +++ /dev/null @@ -1,1680 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -wild mixture of -https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py -https://github.com/openai/improved-diffusion/blob/e94489283bb876ac1477d5dd7709bbbd2d9902ce/improved_diffusion/gaussian_diffusion.py -https://github.com/CompVis/taming-transformers --- merci -""" - -from contextlib import contextmanager -from functools import partial - -import numpy as np -import pytorch_lightning as pl -import torch -import torch.nn as nn -from apex import amp -from apex.contrib.clip_grad import clip_grad_norm_ -from einops import rearrange, repeat -from omegaconf import open_dict -from pytorch_lightning.utilities import GradClipAlgorithmType -from pytorch_lightning.utilities.distributed import rank_zero_only -from torch._dynamo import optimize -from torch._inductor import config as inductor_config -from torch.optim.lr_scheduler import LambdaLR -from torchvision.utils import make_grid -from tqdm import tqdm - -from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel -from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import ( - AutoencoderKL, - IdentityFirstStage, - VQModelInterface, -) -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import random_dropout -from nemo.collections.multimodal.models.stable_diffusion.ldm_config import ( - DDPMDiffusionModelConfig, - LatentDiffusionModelConfig, -) -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - extract_into_tensor, - make_beta_schedule, - noise_like, -) -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( - DiagonalGaussianDistribution, - normal_kl, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import ( - count_params, - default, - exists, - isimage, - ismap, - log_txt_as_img, - mean_flat, -) -from nemo.core.classes.common import Serialization -from nemo.utils import logging - -__conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} - - -def disabled_train(self, mode=True): - """Overwrite model.train with this function to make sure train/eval mode - does not change anymore.""" - return self - - -def uniform_on_device(r1, r2, shape, device): - return (r1 - r2) * torch.rand(*shape, device=device) + r2 - - -class DDPM(DiffusionModel): - # classic DDPM with Gaussian diffusion, in image space - def apply_model(self, x_t, t, c): - return None - - def get_conditioning(self, c): - return c - - def list_available_models(self): - return None - - def setup_training_data(self, cfg): - return None - - def setup_validation_data(self, cfg): - return None - - def __init__(self, cfg: DDPMDiffusionModelConfig, trainer=None): - super().__init__(cfg=cfg, trainer=trainer) - assert cfg.parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' - self.use_fp16 = (trainer is not None) and (trainer.precision == 16) - self.parameterization = cfg.parameterization - print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") - self.cond_stage_model = None - self.clip_denoised = cfg.clip_denoised - self.log_every_t = cfg.log_every_t - self.first_stage_key = cfg.first_stage_key - self.image_size = cfg.image_size # try conv? - self.channels = cfg.channels - self.use_positional_encodings = cfg.use_positional_encodings - if self.use_fp16: - with open_dict(cfg.unet_config): - cfg.unet_config.use_fp16 = True - self.model = DiffusionWrapper(cfg.unet_config, cfg.conditioning_key) - count_params(self.model, verbose=True) - self.use_ema = cfg.use_ema - if self.use_ema: - self.model_ema = LitEma(self.model) - print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") - - self.use_scheduler = cfg.scheduler_config is not None - if self.use_scheduler: - self.scheduler_config = cfg.scheduler_config - - self.v_posterior = cfg.v_posterior - self.original_elbo_weight = cfg.original_elbo_weight - self.l_simple_weight = cfg.l_simple_weight - - if cfg.monitor is not None: - self.monitor = cfg.monitor - # if cfg.ckpt_path is not None: - # self.init_from_ckpt(cfg.ckpt_path, ignore_keys=cfg.ignore_keys, only_model=cfg.load_only_unet) - - self.register_schedule( - given_betas=cfg.given_betas, - beta_schedule=cfg.beta_schedule, - timesteps=cfg.timesteps, - linear_start=cfg.linear_start, - linear_end=cfg.linear_end, - cosine_s=cfg.cosine_s, - ) - - self.loss_type = cfg.loss_type - - self.learn_logvar = cfg.learn_logvar - self.logvar = torch.full(fill_value=cfg.logvar_init, size=(self.num_timesteps,)) - if self.learn_logvar: - self.logvar = nn.Parameter(self.logvar, requires_grad=True) - self.learning_rate = cfg.learning_rate - - def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, - ): - if exists(given_betas): - betas = given_betas - else: - betas = make_beta_schedule( - beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s - ) - alphas = 1.0 - betas - alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - - (timesteps,) = betas.shape - self.num_timesteps = int(timesteps) - self.linear_start = linear_start - self.linear_end = linear_end - assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' - - to_torch = partial(torch.tensor, dtype=torch.float32) - - self.register_buffer('betas', to_torch(betas)) - self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) - self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) - - # calculations for diffusion q(x_t | x_{t-1}) and others - self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) - - # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( - 1.0 - alphas_cumprod - ) + self.v_posterior * betas - # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) - self.register_buffer('posterior_variance', to_torch(posterior_variance)) - # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain - self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer( - 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) - ) - self.register_buffer( - 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) - ) - - if self.parameterization == "eps": - lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) - ) - elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) - else: - raise NotImplementedError("mu not supported") - # TODO how to choose this term - lvlb_weights[0] = lvlb_weights[1] - self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) - assert not torch.isnan(self.lvlb_weights).all() - - @contextmanager - def ema_scope(self, context=None): - if self.use_ema: - self.model_ema.store(self.model.parameters()) - self.model_ema.copy_to(self.model) - if context is not None: - print(f"{context}: Switched to EMA weights") - try: - yield None - finally: - if self.use_ema: - self.model_ema.restore(self.model.parameters()) - if context is not None: - print(f"{context}: Restored training weights") - - def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): - sd = torch.load(path, map_location="cpu") - if "state_dict" in list(sd.keys()): - sd = sd["state_dict"] - keys = list(sd.keys()) - for k in keys: - for ik in ignore_keys: - if k.startswith(ik): - print("Deleting key {} from state_dict.".format(k)) - del sd[k] - missing, unexpected = ( - self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) - ) - print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") - - def q_mean_variance(self, x_start, t): - """ - Get the distribution q(x_t | x_0). - :param x_start: the [N x C x ...] tensor of noiseless inputs. - :param t: the number of diffusion steps (minus 1). Here, 0 means one step. - :return: A tuple (mean, variance, log_variance), all of x_start's shape. - """ - mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) - log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) - return mean, variance, log_variance - - def predict_start_from_noise(self, x_t, t, noise): - return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise - ) - - def q_posterior(self, x_start, x_t, t): - posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start - + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t - ) - posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) - posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) - return posterior_mean, posterior_variance, posterior_log_variance_clipped - - def p_mean_variance(self, x, t, clip_denoised: bool): - model_out = self.model(x, t) - if self.parameterization == "eps": - x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) - elif self.parameterization == "x0": - x_recon = model_out - if clip_denoised: - x_recon.clamp_(-1.0, 1.0) - - model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) - return model_mean, posterior_variance, posterior_log_variance - - @torch.no_grad() - def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): - b, *_, device = *x.shape, x.device - model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) - noise = noise_like(x.shape, device, repeat_noise) - # no noise when t == 0 - nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise - - @torch.no_grad() - def p_sample_loop(self, shape, return_intermediates=False): - device = self.betas.device - b = shape[0] - img = torch.randn(shape, device=device) - intermediates = [img] - for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): - img = self.p_sample( - img, torch.full((b,), i, device=device, dtype=torch.long), clip_denoised=self.clip_denoised - ) - if i % self.log_every_t == 0 or i == self.num_timesteps - 1: - intermediates.append(img) - if return_intermediates: - return img, intermediates - return img - - @torch.no_grad() - def sample(self, batch_size=16, return_intermediates=False): - image_size = self.image_size - channels = self.channels - return self.p_sample_loop( - (batch_size, channels, image_size, image_size), return_intermediates=return_intermediates - ) - - def q_sample(self, x_start, t, noise=None): - noise = default(noise, lambda: torch.randn_like(x_start)) - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise - ) - - def get_loss(self, pred, target, mean=True): - if self.use_fp16: - target = target.half() - - if self.loss_type == 'l1': - loss = (target - pred).abs() - if mean: - loss = loss.mean() - elif self.loss_type == 'l2': - if mean: - loss = torch.nn.functional.mse_loss(target, pred) - else: - loss = torch.nn.functional.mse_loss(target, pred, reduction='none') - else: - raise NotImplementedError("unknown loss type '{loss_type}'") - - return loss - - def p_losses(self, x_start, t, noise=None): - noise = default(noise, lambda: torch.randn_like(x_start)) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - model_out = self.model(x_noisy, t) - - loss_dict = {} - if self.parameterization == "eps": - target = noise - elif self.parameterization == "x0": - target = x_start - else: - raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") - - loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) - - log_prefix = 'train' if self.training else 'val' - - loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) - loss_simple = loss.mean() * self.l_simple_weight - - loss_vlb = (self.lvlb_weights[t] * loss).mean() - loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) - - loss = loss_simple + self.original_elbo_weight * loss_vlb - - loss_dict.update({f'{log_prefix}/loss': loss}) - - return loss, loss_dict - - def forward(self, x, *args, **kwargs): - # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size - # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' - t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() - return self.p_losses(x, t, *args, **kwargs) - - def get_input(self, batch, k): - x = batch[k] - if len(x.shape) == 3: - x = x[..., None] - x = rearrange(x, 'b h w c -> b c h w') - x = x.to(memory_format=torch.contiguous_format).float() - if self.use_fp16: - x = x.half() - return x - - def shared_step(self, batch): - x = self.get_input(batch, self.first_stage_key) - loss, loss_dict = self(x) - return loss, loss_dict - - def training_step(self, batch, batch_idx): - loss, loss_dict = self.shared_step(batch) - - self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, on_epoch=True) - - self.log("global_step", self.global_step, prog_bar=True, logger=True, on_step=True, on_epoch=False) - - if self.use_scheduler: - lr = self.optimizers().param_groups[0]['lr'] - self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False) - - return loss - - @torch.no_grad() - def validation_step(self, batch, batch_idx): - _, loss_dict_no_ema = self.shared_step(batch) - with self.ema_scope(): - _, loss_dict_ema = self.shared_step(batch) - loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema} - self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) - self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) - - def on_train_batch_end(self, *args, **kwargs): - if self.use_ema: - self.model_ema(self.model) - - def _get_rows_from_list(self, samples): - n_imgs_per_row = len(samples) - denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') - denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') - denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) - return denoise_grid - - @torch.no_grad() - def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): - log = dict() - x = self.get_input(batch, self.first_stage_key) - N = min(x.shape[0], N) - n_row = min(x.shape[0], n_row) - x = x.to(self.device)[:N] - log["inputs"] = x - - # get diffusion row - diffusion_row = list() - x_start = x[:n_row] - - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.to(self.device).long() - noise = torch.randn_like(x_start) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - diffusion_row.append(x_noisy) - - log["diffusion_row"] = self._get_rows_from_list(diffusion_row) - - if sample: - # get denoise row - with self.ema_scope("Plotting"): - samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) - - log["samples"] = samples - log["denoise_row"] = self._get_rows_from_list(denoise_row) - - if return_keys: - if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: - return log - else: - return {key: log[key] for key in return_keys} - return log - - def configure_optimizers(self): - lr = self.learning_rate - params = list(self.model.parameters()) - if self.learn_logvar: - params = params + [self.logvar] - opt = torch.optim.AdamW(params, lr=lr) - return opt - - -class LatentDiffusion(DDPM): - """main class""" - - def __init__(self, cfg: LatentDiffusionModelConfig, trainer=None): - self.num_timesteps_cond = default(cfg.num_timesteps_cond, 1) - self.scale_by_std = cfg.scale_by_std - assert self.num_timesteps_cond <= cfg.timesteps - # for backwards compatibility after implementation of DiffusionWrapper - if cfg.conditioning_key is None: - conditioning_key = 'concat' if cfg.concat_mode else 'crossattn' - else: - conditioning_key = cfg.conditioning_key - if cfg.cond_stage_config == '__is_unconditional__': - conditioning_key = None - ckpt_path = cfg.ckpt_path - ignore_keys = cfg.ignore_keys - cfg.conditioning_key = conditioning_key - super().__init__(cfg=cfg, trainer=trainer) - self.concat_mode = cfg.concat_mode - self.cond_stage_trainable = cfg.cond_stage_trainable - self.cond_stage_key = cfg.cond_stage_key - try: - self.num_downs = len(cfg.first_stage_config.ddconfig.ch_mult) - 1 - except: - self.num_downs = 0 - if not cfg.scale_by_std: - self.scale_factor = cfg.scale_factor - else: - self.register_buffer('scale_factor', torch.tensor(cfg.scale_factor)) - if self.use_fp16: - with open_dict(cfg.cond_stage_config): - cfg.cond_stage_config.update({"use_fp16": True}) - else: - with open_dict(cfg.cond_stage_config): - cfg.cond_stage_config.update({"use_fp16": False}) - self.instantiate_first_stage(cfg.first_stage_config) - self.instantiate_cond_stage(cfg.cond_stage_config) - self.cond_stage_forward = cfg.cond_stage_forward - self.clip_denoised = False - self.bbox_tokenizer = None - self.text_embedding_dropout_rate = cfg.text_embedding_dropout_rate - self.fused_opt = cfg.fused_opt - - self.restarted_from_ckpt = False - if ckpt_path is not None: - self.init_from_ckpt(ckpt_path, ignore_keys) - self.restarted_from_ckpt = True - - # Fusing VAE and CLIP doesn't give benefit - if cfg.get("inductor", False): - # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) - self.model = optimize("inductor")(self.model) - - def make_cond_schedule(self,): - self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) - ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() - self.cond_ids[: self.num_timesteps_cond] = ids - - @rank_zero_only - @torch.no_grad() - def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): - # only for very first batch - if ( - self.scale_by_std - and self.current_epoch == 0 - and self.global_step == 0 - and batch_idx == 0 - and not self.restarted_from_ckpt - ): - assert self.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' - # set rescale weight to 1./std of encodings - print("### USING STD-RESCALING ###") - x = super().get_input(batch, self.first_stage_key) - x = x.to(self.device) - encoder_posterior = self.encode_first_stage(x) - z = self.get_first_stage_encoding(encoder_posterior).detach() - del self.scale_factor - self.register_buffer('scale_factor', 1.0 / z.flatten().std()) - print(f"setting self.scale_factor to {self.scale_factor}") - print("### USING STD-RESCALING ###") - - def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, - ): - super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) - - self.shorten_cond_schedule = self.num_timesteps_cond > 1 - if self.shorten_cond_schedule: - self.make_cond_schedule() - - def instantiate_first_stage(self, config): - model = LatentDiffusion.from_config_dict(config) - self.first_stage_model = model.eval() - self.first_stage_model.train = disabled_train - for param in self.first_stage_model.parameters(): - param.requires_grad = False - - def instantiate_cond_stage(self, config): - if not self.cond_stage_trainable: - if config == "__is_first_stage__": - print("Using first stage also as cond stage.") - self.cond_stage_model = self.first_stage_model - elif config == "__is_unconditional__": - print(f"Training {self.__class__.__name__} as an unconditional model.") - self.cond_stage_model = None - # self.be_unconditional = True - else: - model = LatentDiffusion.from_config_dict(config) - self.cond_stage_model = model.eval() - self.cond_stage_model.train = disabled_train - for param in self.cond_stage_model.parameters(): - param.requires_grad = False - else: - assert config != '__is_first_stage__' - assert config != '__is_unconditional__' - model = LatentDiffusion.from_config_dict(config) - self.cond_stage_model = model - - def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): - denoise_row = [] - for zd in tqdm(samples, desc=desc): - denoise_row.append( - self.decode_first_stage(zd.to(self.device), force_not_quantize=force_no_decoder_quantization) - ) - n_imgs_per_row = len(denoise_row) - denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W - denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') - denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') - denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) - return denoise_grid - - def get_first_stage_encoding(self, encoder_posterior): - if isinstance(encoder_posterior, DiagonalGaussianDistribution): - z = encoder_posterior.sample() - elif isinstance(encoder_posterior, torch.Tensor): - z = encoder_posterior - else: - raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") - return self.scale_factor * z - - def get_learned_conditioning(self, c): - if self.cond_stage_forward is None: - if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): - c = self.cond_stage_model.encode(c) - if isinstance(c, DiagonalGaussianDistribution): - c = c.mode() - else: - c = self.cond_stage_model(c) - else: - assert hasattr(self.cond_stage_model, self.cond_stage_forward) - c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) - return c - - def meshgrid(self, h, w): - y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) - x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) - - arr = torch.cat([y, x], dim=-1) - return arr - - def delta_border(self, h, w): - """ - :param h: height - :param w: width - :return: normalized distance to image border, - wtith min distance = 0 at border and max dist = 0.5 at image center - """ - lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) - arr = self.meshgrid(h, w) / lower_right_corner - dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] - dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] - edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] - return edge_dist - - def get_weighting(self, h, w, Ly, Lx, device): - weighting = self.delta_border(h, w) - weighting = torch.clip( - weighting, self.split_input_params["clip_min_weight"], self.split_input_params["clip_max_weight"], - ) - weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) - - if self.split_input_params["tie_braker"]: - L_weighting = self.delta_border(Ly, Lx) - L_weighting = torch.clip( - L_weighting, - self.split_input_params["clip_min_tie_weight"], - self.split_input_params["clip_max_tie_weight"], - ) - - L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) - weighting = weighting * L_weighting - return weighting - - def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code - """ - :param x: img of size (bs, c, h, w) - :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) - """ - bs, nc, h, w = x.shape - - # number of crops in image - Ly = (h - kernel_size[0]) // stride[0] + 1 - Lx = (w - kernel_size[1]) // stride[1] + 1 - - if uf == 1 and df == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) - - weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) - - elif uf > 1 and df == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold_params2 = dict( - kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), - dilation=1, - padding=0, - stride=(stride[0] * uf, stride[1] * uf), - ) - fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) - - weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) - - elif df > 1 and uf == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold_params2 = dict( - kernel_size=(kernel_size[0] // df, kernel_size[0] // df), - dilation=1, - padding=0, - stride=(stride[0] // df, stride[1] // df), - ) - fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) - - weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) - - else: - raise NotImplementedError - - return fold, unfold, normalization, weighting - - @torch.no_grad() - def get_input( - self, - batch, - k, - return_first_stage_outputs=False, - force_c_encode=False, - cond_key=None, - return_original_cond=False, - bs=None, - ): - x = super().get_input(batch, k) - if bs is not None: - x = x[:bs] - x = x.to(self.device) - encoder_posterior = self.encode_first_stage(x) - z = self.get_first_stage_encoding(encoder_posterior).detach() - - if self.model.conditioning_key is not None: - if cond_key is None: - cond_key = self.cond_stage_key - if cond_key != self.first_stage_key: - if cond_key in ['captions', 'coordinates_bbox', 'txt']: - xc = batch[cond_key] - elif cond_key == 'class_label': - xc = batch - else: - xc = super().get_input(batch, cond_key).to(self.device) - else: - xc = x - if not self.cond_stage_trainable or force_c_encode: - if isinstance(xc, dict) or isinstance(xc, list): - # import pudb; pudb.set_trace() - c = self.get_learned_conditioning(xc) - else: - c = self.get_learned_conditioning(xc.to(self.device)) - else: - c = xc - if bs is not None: - c = c[:bs] - - if self.use_positional_encodings: - pos_x, pos_y = self.compute_latent_shifts(batch) - ckey = __conditioning_keys__[self.model.conditioning_key] - c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} - - if self.text_embedding_dropout_rate > 0: - assert self.text_embedding_dropout_rate < 1.0 - c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) - - else: - c = None - xc = None - if self.use_positional_encodings: - pos_x, pos_y = self.compute_latent_shifts(batch) - c = {'pos_x': pos_x, 'pos_y': pos_y} - out = [z, c] - if return_first_stage_outputs: - xrec = self.decode_first_stage(z) - out.extend([x, xrec]) - if return_original_cond: - out.append(xc) - return out - - @torch.no_grad() - def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): - if predict_cids: - if z.dim() == 4: - z = torch.argmax(z.exp(), dim=1).long() - z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) - z = rearrange(z, 'b h w c -> b c h w').contiguous() - - z = 1.0 / self.scale_factor * z - - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - uf = self.split_input_params["vqf"] - bs, nc, h, w = z.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - print("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - print("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) - - z = unfold(z) # (bn, nc * prod(**ks), L) - # 1. Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - # 2. apply model loop over last dim - if isinstance(self.first_stage_model, VQModelInterface): - output_list = [ - self.first_stage_model.decode( - z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize - ) - for i in range(z.shape[-1]) - ] - else: - - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) - o = o * weighting - # Reverse 1. reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization # norm is shape (1, 1, h, w) - return decoded - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - # same as above but without decorator - def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): - if predict_cids: - if z.dim() == 4: - z = torch.argmax(z.exp(), dim=1).long() - z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) - z = rearrange(z, 'b h w c -> b c h w').contiguous() - - z = 1.0 / self.scale_factor * z - - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - uf = self.split_input_params["vqf"] - bs, nc, h, w = z.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - print("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - print("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) - - z = unfold(z) # (bn, nc * prod(**ks), L) - # 1. Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - # 2. apply model loop over last dim - if isinstance(self.first_stage_model, VQModelInterface): - output_list = [ - self.first_stage_model.decode( - z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize - ) - for i in range(z.shape[-1]) - ] - else: - - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) - o = o * weighting - # Reverse 1. reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization # norm is shape (1, 1, h, w) - return decoded - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - @torch.no_grad() - def encode_first_stage(self, x): - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - df = self.split_input_params["vqf"] - self.split_input_params['original_image_size'] = x.shape[-2:] - bs, nc, h, w = x.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - print("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - print("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) - z = unfold(x) # (bn, nc * prod(**ks), L) - # Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) - o = o * weighting - - # Reverse reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization - return decoded - - else: - return self.first_stage_model.encode(x) - else: - return self.first_stage_model.encode(x) - - def shared_step(self, batch, **kwargs): - x, c = self.get_input(batch, self.first_stage_key) - loss = self(x, c) - return loss - - def forward(self, x, c, *args, **kwargs): - t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() - if self.model.conditioning_key is not None: - assert c is not None - if self.cond_stage_trainable: - c = self.get_learned_conditioning(c) - if self.shorten_cond_schedule: # TODO: drop this option - tc = self.cond_ids[t].to(self.device) - c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float())) - return self.p_losses(x, c, t, *args, **kwargs) - - def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset - def rescale_bbox(bbox): - x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2]) - y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3]) - w = min(bbox[2] / crop_coordinates[2], 1 - x0) - h = min(bbox[3] / crop_coordinates[3], 1 - y0) - return x0, y0, w, h - - return [rescale_bbox(b) for b in bboxes] - - def apply_model(self, x_noisy, t, cond, return_ids=False): - - if isinstance(cond, dict): - # hybrid case, cond is exptected to be a dict - pass - else: - if not isinstance(cond, list): - cond = [cond] - key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' - cond = {key: cond} - - if hasattr(self, "split_input_params"): - assert len(cond) == 1 # todo can only deal with one conditioning atm - assert not return_ids - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - - h, w = x_noisy.shape[-2:] - - fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) - - z = unfold(x_noisy) # (bn, nc * prod(**ks), L) - # Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] - - if ( - self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] - and self.model.conditioning_key - ): # todo check for completeness - c_key = next(iter(cond.keys())) # get key - c = next(iter(cond.values())) # get value - assert len(c) == 1 # todo extend to list with more than one elem - c = c[0] # get element - - c = unfold(c) - c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] - - elif self.cond_stage_key == 'coordinates_bbox': - assert ( - 'original_image_size' in self.split_input_params - ), 'BoudingBoxRescaling is missing original_image_size' - - # assuming padding of unfold is always 0 and its dilation is always 1 - n_patches_per_row = int((w - ks[0]) / stride[0] + 1) - full_img_h, full_img_w = self.split_input_params['original_image_size'] - # as we are operating on latents, we need the factor from the original image size to the - # spatial latent size to properly rescale the crops for regenerating the bbox annotations - num_downs = self.first_stage_model.encoder.num_resolutions - 1 - rescale_latent = 2 ** (num_downs) - - # get top left postions of patches as conforming for the bbbox tokenizer, therefore we - # need to rescale the tl patch coordinates to be in between (0,1) - tl_patch_coordinates = [ - ( - rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, - rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h, - ) - for patch_nr in range(z.shape[-1]) - ] - - # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) - patch_limits = [ - (x_tl, y_tl, rescale_latent * ks[0] / full_img_w, rescale_latent * ks[1] / full_img_h) - for x_tl, y_tl in tl_patch_coordinates - ] - # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] - - # tokenize crop coordinates for the bounding boxes of the respective patches - patch_limits_tknzd = [ - torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device) - for bbox in patch_limits - ] # list of length l with tensors of shape (1, 2) - print(patch_limits_tknzd[0].shape) - # cut tknzd crop position from conditioning - assert isinstance(cond, dict), 'cond must be dict to be fed into model' - cut_cond = cond['c_crossattn'][0][..., :-2].to(self.device) - print(cut_cond.shape) - - adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) - adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') - print(adapted_cond.shape) - adapted_cond = self.get_learned_conditioning(adapted_cond) - print(adapted_cond.shape) - adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) - print(adapted_cond.shape) - - cond_list = [{'c_crossattn': [e]} for e in adapted_cond] - - else: - cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient - - # apply model by loop over crops - output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] - assert not isinstance( - output_list[0], tuple - ) # todo cant deal with multiple model outputs check this never happens - - o = torch.stack(output_list, axis=-1) - o = o * weighting - # Reverse reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - x_recon = fold(o) / normalization - - else: - x_recon = self.model(x_noisy, t, **cond) - - if isinstance(x_recon, tuple) and not return_ids: - return x_recon[0] - else: - return x_recon - - def _predict_eps_from_xstart(self, x_t, t, pred_xstart): - return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart - ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) - - def _prior_bpd(self, x_start): - """ - Get the prior KL term for the variational lower-bound, measured in - bits-per-dim. - This term can't be optimized, as it only depends on the encoder. - :param x_start: the [N x C x ...] tensor of inputs. - :return: a batch of [N] KL values (in bits), one per batch element. - """ - batch_size = x_start.shape[0] - t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) - qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) - kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) - return mean_flat(kl_prior) / np.log(2.0) - - def p_losses(self, x_start, cond, t, noise=None): - noise = default(noise, lambda: torch.randn_like(x_start)) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - model_output = self.apply_model(x_noisy, t, cond) - - loss_dict = {} - prefix = 'train' if self.training else 'val' - - if self.parameterization == "x0": - target = x_start - elif self.parameterization == "eps": - target = noise - else: - raise NotImplementedError() - - loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) - loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) - self.logvar = self.logvar.to(self.device) - logvar_t = self.logvar[t].to(self.device) - loss = loss_simple / torch.exp(logvar_t) + logvar_t - # loss = loss_simple / torch.exp(self.logvar) + self.logvar - if self.learn_logvar: - loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) - loss_dict.update({'logvar': self.logvar.data.mean()}) - - loss = self.l_simple_weight * loss.mean() - - loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) - loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() - loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) - loss += self.original_elbo_weight * loss_vlb - loss_dict.update({f'{prefix}/loss': loss}) - - return loss, loss_dict - - def p_mean_variance( - self, - x, - c, - t, - clip_denoised: bool, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - score_corrector=None, - corrector_kwargs=None, - ): - t_in = t - model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) - - if score_corrector is not None: - assert self.parameterization == "eps" - model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) - - if return_codebook_ids: - model_out, logits = model_out - - if self.parameterization == "eps": - x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) - elif self.parameterization == "x0": - x_recon = model_out - else: - raise NotImplementedError() - - if clip_denoised: - x_recon.clamp_(-1.0, 1.0) - if quantize_denoised: - x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) - model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) - if return_codebook_ids: - return model_mean, posterior_variance, posterior_log_variance, logits - elif return_x0: - return model_mean, posterior_variance, posterior_log_variance, x_recon - else: - return model_mean, posterior_variance, posterior_log_variance - - @torch.no_grad() - def p_sample( - self, - x, - c, - t, - clip_denoised=False, - repeat_noise=False, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - ): - b, *_, device = *x.shape, x.device - outputs = self.p_mean_variance( - x=x, - c=c, - t=t, - clip_denoised=clip_denoised, - return_codebook_ids=return_codebook_ids, - quantize_denoised=quantize_denoised, - return_x0=return_x0, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - ) - if return_codebook_ids: - raise DeprecationWarning("Support dropped.") - model_mean, _, model_log_variance, logits = outputs - elif return_x0: - model_mean, _, model_log_variance, x0 = outputs - else: - model_mean, _, model_log_variance = outputs - - noise = noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.0: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - # no noise when t == 0 - nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) - - if return_codebook_ids: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) - if return_x0: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 - else: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise - - @torch.no_grad() - def progressive_denoising( - self, - cond, - shape, - verbose=True, - callback=None, - quantize_denoised=False, - img_callback=None, - mask=None, - x0=None, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - batch_size=None, - x_T=None, - start_T=None, - log_every_t=None, - ): - if not log_every_t: - log_every_t = self.log_every_t - timesteps = self.num_timesteps - if batch_size is not None: - b = batch_size if batch_size is not None else shape[0] - shape = [batch_size] + list(shape) - else: - b = batch_size = shape[0] - if x_T is None: - img = torch.randn(shape, device=self.device) - else: - img = x_T - intermediates = [] - if cond is not None: - if isinstance(cond, dict): - cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) - for key in cond - } - else: - cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - - if start_T is not None: - timesteps = min(timesteps, start_T) - iterator = ( - tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', total=timesteps) - if verbose - else reversed(range(0, timesteps)) - ) - if type(temperature) == float: - temperature = [temperature] * timesteps - - for i in iterator: - ts = torch.full((b,), i, device=self.device, dtype=torch.long) - if self.shorten_cond_schedule: - assert self.model.conditioning_key != 'hybrid' - tc = self.cond_ids[ts].to(cond.device) - cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) - - img, x0_partial = self.p_sample( - img, - cond, - ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised, - return_x0=True, - temperature=temperature[i], - noise_dropout=noise_dropout, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - ) - if mask is not None: - assert x0 is not None - img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1.0 - mask) * img - - if i % log_every_t == 0 or i == timesteps - 1: - intermediates.append(x0_partial) - if callback: - callback(i) - if img_callback: - img_callback(img, i) - return img, intermediates - - @torch.no_grad() - def p_sample_loop( - self, - cond, - shape, - return_intermediates=False, - x_T=None, - verbose=True, - callback=None, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - img_callback=None, - start_T=None, - log_every_t=None, - ): - - if not log_every_t: - log_every_t = self.log_every_t - device = self.betas.device - b = shape[0] - if x_T is None: - img = torch.randn(shape, device=device) - else: - img = x_T - - intermediates = [img] - if timesteps is None: - timesteps = self.num_timesteps - - if start_T is not None: - timesteps = min(timesteps, start_T) - iterator = ( - tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) - if verbose - else reversed(range(0, timesteps)) - ) - - if mask is not None: - assert x0 is not None - assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match - - for i in iterator: - ts = torch.full((b,), i, device=device, dtype=torch.long) - if self.shorten_cond_schedule: - assert self.model.conditioning_key != 'hybrid' - tc = self.cond_ids[ts].to(cond.device) - cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) - - img = self.p_sample(img, cond, ts, clip_denoised=self.clip_denoised, quantize_denoised=quantize_denoised) - if mask is not None: - img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1.0 - mask) * img - - if i % log_every_t == 0 or i == timesteps - 1: - intermediates.append(img) - if callback: - callback(i) - if img_callback: - img_callback(img, i) - - if return_intermediates: - return img, intermediates - return img - - @torch.no_grad() - def sample( - self, - cond, - batch_size=16, - return_intermediates=False, - x_T=None, - verbose=True, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - shape=None, - **kwargs, - ): - if shape is None: - shape = (batch_size, self.channels, self.image_size, self.image_size) - if cond is not None: - if isinstance(cond, dict): - cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) - for key in cond - } - else: - cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - return self.p_sample_loop( - cond, - shape, - return_intermediates=return_intermediates, - x_T=x_T, - verbose=verbose, - timesteps=timesteps, - quantize_denoised=quantize_denoised, - mask=mask, - x0=x0, - ) - - @torch.no_grad() - def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): - - if ddim: - ddim_sampler = DDIMSampler(self) - shape = (self.channels, self.image_size, self.image_size) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) - - else: - samples, intermediates = self.sample(cond=cond, batch_size=batch_size, return_intermediates=True, **kwargs) - - return samples, intermediates - - @torch.no_grad() - def log_images( - self, - batch, - N=8, - n_row=4, - sample=True, - ddim_steps=200, - ddim_eta=1.0, - return_keys=None, - quantize_denoised=True, - inpaint=True, - plot_denoise_rows=False, - plot_progressive_rows=True, - plot_diffusion_rows=True, - **kwargs, - ): - - use_ddim = ddim_steps is not None - - log = dict() - z, c, x, xrec, xc = self.get_input( - batch, - self.first_stage_key, - return_first_stage_outputs=True, - force_c_encode=True, - return_original_cond=True, - bs=N, - ) - N = min(x.shape[0], N) - n_row = min(x.shape[0], n_row) - log["inputs"] = x - log["reconstruction"] = xrec - if self.model.conditioning_key is not None: - if hasattr(self.cond_stage_model, "decode"): - xc = self.cond_stage_model.decode(c) - log["conditioning"] = xc - elif self.cond_stage_key in ["caption"]: - xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) - log["conditioning"] = xc - elif self.cond_stage_key == 'class_label': - xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) - log['conditioning'] = xc - elif isimage(xc): - log["conditioning"] = xc - if ismap(xc): - log["original_conditioning"] = self.to_rgb(xc) - - if plot_diffusion_rows: - # get diffusion row - diffusion_row = list() - z_start = z[:n_row] - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.to(self.device).long() - noise = torch.randn_like(z_start) - z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) - diffusion_row.append(self.decode_first_stage(z_noisy)) - - diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W - diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') - diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') - diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) - log["diffusion_row"] = diffusion_grid - - if sample: - # get denoise row - with self.ema_scope("Plotting"): - samples, z_denoise_row = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, ddim_steps=ddim_steps, eta=ddim_eta - ) - # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) - x_samples = self.decode_first_stage(samples) - log["samples"] = x_samples - if plot_denoise_rows: - denoise_grid = self._get_denoise_row_from_list(z_denoise_row) - log["denoise_row"] = denoise_grid - - if ( - quantize_denoised - and not isinstance(self.first_stage_model, AutoencoderKL) - and not isinstance(self.first_stage_model, IdentityFirstStage) - ): - # also display when quantizing x0 while sampling - with self.ema_scope("Plotting Quantized Denoised"): - samples, z_denoise_row = self.sample_log( - cond=c, - batch_size=N, - ddim=use_ddim, - ddim_steps=ddim_steps, - eta=ddim_eta, - quantize_denoised=True, - ) - # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, - # quantize_denoised=True) - x_samples = self.decode_first_stage(samples.to(self.device)) - log["samples_x0_quantized"] = x_samples - - if inpaint: - # make a simple center square - b, h, w = z.shape[0], z.shape[2], z.shape[3] - mask = torch.ones(N, h, w).to(self.device) - # zeros will be filled in - mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 - mask = mask[:, None, ...] - with self.ema_scope("Plotting Inpaint"): - samples, _ = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask - ) - x_samples = self.decode_first_stage(samples.to(self.device)) - log["samples_inpainting"] = x_samples - log["mask"] = mask - - # outpaint - with self.ema_scope("Plotting Outpaint"): - samples, _ = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask - ) - x_samples = self.decode_first_stage(samples.to(self.device)) - log["samples_outpainting"] = x_samples - - if plot_progressive_rows: - with self.ema_scope("Plotting Progressives"): - img, progressives = self.progressive_denoising( - c, shape=(self.channels, self.image_size, self.image_size), batch_size=N - ) - prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") - log["progressive_row"] = prog_row - - if return_keys: - if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: - return log - else: - return {key: log[key] for key in return_keys} - return log - - def configure_optimizers(self): - lr = self.learning_rate - params = list(self.model.parameters()) - if self.cond_stage_trainable: - print(f"{self.__class__.__name__}: Also optimizing conditioner params!") - params = params + list(self.cond_stage_model.parameters()) - if self.learn_logvar: - print('Diffusion model optimizing logvar') - params.append(self.logvar) - fused_opt = self.fused_opt - try: - from apex.optimizers import FusedAdam - except: # noqa - fused_opt = False - if fused_opt: - opt = FusedAdam(params, lr=lr, adam_w_mode=True) - print("Using fused AdamW") - else: - opt = torch.optim.AdamW(params, lr=lr) - if self.use_scheduler: - scheduler = LatentDiffusion.from_config_dict(self.scheduler_config) - - print("Setting up LambdaLR scheduler...") - scheduler = [ - {'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1} - ] - return [opt], scheduler - return opt - - def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): - if gradient_clip_val <= 0: - return - - if gradient_clip_algorithm is None: - gradient_clip_algorithm = self.trainer.gradient_clip_algorithm or "norm" - - gradient_clip_algorithm = GradClipAlgorithmType(gradient_clip_algorithm) - - if not hasattr(self, "grad_clip_logged"): - logging.info("Gradient clipping: val, %f; algo, %s", gradient_clip_val, gradient_clip_algorithm) - self.grad_clip_logged = True - - parameters = amp.master_params(optimizer) - if gradient_clip_algorithm == GradClipAlgorithmType.VALUE: - torch.nn.utils.clip_grad_value_(parameters, clip_value=gradient_clip_val) - elif gradient_clip_algorithm == GradClipAlgorithmType.NORM: - clip_grad_norm_(parameters, gradient_clip_val) - - @torch.no_grad() - def to_rgb(self, x): - x = x.float() - if not hasattr(self, "colorize"): - self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x) - x = nn.functional.conv2d(x, weight=self.colorize) - x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 - return x - - -class DiffusionWrapper(pl.LightningModule, Serialization): - def __init__(self, diff_model_config, conditioning_key): - super().__init__() - self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) - self.conditioning_key = conditioning_key - assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] - - def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): - if self.conditioning_key is None: - out = self.diffusion_model(x, t) - elif self.conditioning_key == 'concat': - xc = torch.cat([x] + c_concat, dim=1) - out = self.diffusion_model(xc, t) - elif self.conditioning_key == 'crossattn': - cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(x, t, context=cc) - elif self.conditioning_key == 'hybrid': - xc = torch.cat([x] + c_concat, dim=1) - cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(xc, t, context=cc) - elif self.conditioning_key == 'adm': - cc = c_crossattn[0] - out = self.diffusion_model(x, t, y=cc) - else: - raise NotImplementedError() - - return out diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index c1022fa6f715..1f4715e0ba9b 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -455,7 +455,6 @@ def __init__( dims=2, num_classes=None, use_checkpoint=False, - use_fp16=False, num_heads=-1, num_head_channels=-1, num_heads_upsample=-1, @@ -507,7 +506,6 @@ def __init__( self.conv_resample = conv_resample self.num_classes = num_classes self.use_checkpoint = use_checkpoint - self.dtype = th.float16 if use_fp16 else th.float32 self.num_heads = num_heads self.num_head_channels = num_head_channels self.num_heads_upsample = num_heads_upsample @@ -956,11 +954,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): assert y.shape == (x.shape[0],) emb = emb + self.label_emb(y) - # future support - if self.dtype == th.float32: - self.dtype == x.dtype - - h = x.type(self.dtype) + h = x.type(emb.dtype) for module in self.input_blocks: h = module(h, emb, context) hs.append(h) @@ -968,7 +962,6 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): for module in self.output_blocks: h = th.cat([h, hs.pop()], dim=1) h = module(h, emb, context) - h = h.type(self.dtype) if self.predict_codebook_ids: return self.id_predictor(h) else: diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index d7ccb3a75549..0174fc90a0a2 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -162,7 +162,7 @@ def get_idx(end, device): return torch.arange(start=0, end=end, dtype=torch.float32, device=device) -def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_fp16=False): +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): """ Create sinusoidal timestep embeddings. :param timesteps: a 1-D Tensor of N indices, one per batch element. @@ -181,10 +181,7 @@ def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False, use_ embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) else: embedding = repeat(timesteps, "b -> b d", d=dim) - if use_fp16: - return embedding.half() - else: - return embedding + return embedding def zero_module(module): diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index d23bb20de392..4a64ac6bd144 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -167,35 +167,13 @@ def encode(self, x): return self(x) -class CLIPTextModelZero(CLIPTextModel): - config_class = CLIPTextConfig - - def __init__(self, config: CLIPTextConfig): - super().__init__(config) - self.text_model = CLIPTextTransformerZero(config) - - -class CLIPTextTransformerZero(CLIPTextTransformer): - def _build_causal_attention_mask(self, bsz, seq_len, dtype, device=None): # TODO mmy check dtype - # lazily create causal attention mask, with full attention between the vision tokens - # pytorch uses additive attention mask; fill with -inf - mask = torch.empty(bsz, seq_len, seq_len, device=device) - mask.fill_(float("-inf")) - mask.triu_(1) # zero out the lower diagonal - mask = mask.unsqueeze(1) # expand mask - return mask.half() - - class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, use_fp16=False): super().__init__() self.tokenizer = CLIPTokenizer.from_pretrained(version) - if use_fp16: - self.transformer = CLIPTextModelZero.from_pretrained(version) - else: - self.transformer = CLIPTextModel.from_pretrained(version) + self.transformer = CLIPTextModel.from_pretrained(version) self.device = device self.max_length = max_length self.freeze() From 40c04c0d09b3d73458d196f340e1d5e241ff8865 Mon Sep 17 00:00:00 2001 From: kjanik Date: Thu, 11 May 2023 16:30:43 -0700 Subject: [PATCH 021/512] Merge the distributed optimizer to internal/main latest. --- .../megatron_vit_classification_models.py | 62 ++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index 183f7cdb5575..ea664c0da022 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -220,19 +220,19 @@ def configure_optimizers(self): module = self.model[0] # only the first virtual rank has the embeddings else: module = self.model - # if module.share_token_embeddings: - # param = module.word_embeddings_weight() - # param._disable_greedy_grad_copy = not self.megatron_amp_O2 - # param._disable_overlap_grad_sync = True + if module.share_token_embeddings: + param = module.word_embeddings_weight() + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True if parallel_state.is_pipeline_last_stage(ignore_virtual=True): if isinstance(self.model, list): module = self.model[-1] # only the last virtual rank has the embeddings else: module = self.model - # if module.share_token_embeddings: - # param = module.word_embeddings_weight() - # param._disable_greedy_grad_copy = not self.megatron_amp_O2 - # param._disable_overlap_grad_sync = True + if module.share_token_embeddings: + param = module.word_embeddings_weight() + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled @@ -241,6 +241,43 @@ def configure_optimizers(self): param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True + # KJJ - Copied this entire block, up to "return" here blindly from megatron_gpt_model.py + + # Initialize parameter buckets for overlapped grad and param syncs + # Note: Params with disabled overlapping are put in the + # last param bucket + buckets = [] + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + # Initialize a bucket for each virtual pipeline stage + for module in self.model: + if isinstance(module, Float16Module): + module = module.module + stage_bucket = [] + #for layer in module.language_model.encoder.layers: + for layer in module.backbone.transformer.layers: + stage_bucket.extend( + p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + buckets.append(stage_bucket) + else: + # Initialize a bucket for each Transformer layer + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + #for layer in module.language_model.encoder.layers: + for layer in module.backbone.transformer.layers: + + buckets.append( + [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] + ) + buckets.reverse() + used_params = set() + for bucket in buckets: + used_params.update(bucket) + buckets[-1].extend(p for p in self.parameters() if p not in used_params) + self.distributed_adam_buckets = buckets + return super().configure_optimizers() def forward(self, tokens): @@ -294,6 +331,15 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_sequence_parallel_gradients() if self.with_distributed_adam: + # KJJ - Added this block from megatron_gpt_model. It says it's not necessary + # and it's not clear if the remaining "if not" logic is still needed. + # keeping it for now, but might need to delete one or both of these. + + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + # launch grad reductions # Note: grads in first pipeline stage have already been # reduced From 9dd00c5b304c4594458c8a1e225e9d171f5d5bda Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Fri, 12 May 2023 13:12:50 -0700 Subject: [PATCH 022/512] Fix few issues regarding megatron core update and per micro batch dataloader --- examples/multimodal/convert_ckpt_to_nemo.py | 6 +++--- .../foundation/clip/convert_openclip_to_nemo.py | 16 ++++++++++++---- examples/vision/convert_ckpt_to_nemo.py | 6 +++--- nemo/utils/exp_manager.py | 7 +++++++ 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index 9aa9bf5876c6..4a1b1db4c794 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -125,9 +125,9 @@ def convert(local_rank, rank, world_size, args): app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size parallel_state.initialize_model_parallel( - tensor_model_parallel_size_=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, ) app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() diff --git a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py index b34831c138cb..98cd732336a8 100644 --- a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py +++ b/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py @@ -18,7 +18,6 @@ import einops import open_clip import torch -from apex.transformer import parallel_state from omegaconf import OmegaConf from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -29,6 +28,15 @@ from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + def get_args(): parser = ArgumentParser() @@ -156,9 +164,9 @@ def convert(local_rank, rank, world_size, args): app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size parallel_state.initialize_model_parallel( - tensor_model_parallel_size_=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, ) app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py index 686fe1ef8e63..27782f34d0bb 100644 --- a/examples/vision/convert_ckpt_to_nemo.py +++ b/examples/vision/convert_ckpt_to_nemo.py @@ -122,9 +122,9 @@ def convert(local_rank, rank, world_size, args): app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size parallel_state.initialize_model_parallel( - tensor_model_parallel_size_=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, ) app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index af3b25eb73bd..184b65ae33bb 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -184,6 +184,13 @@ def _on_batch_start(self, name): if self.timer.buffer_size <= 0: self.timer.reset(name) + if self.timer.is_active(name): + logging.warning( + f"Timer `{name}` was not correctly stopped, suggesting a " + "possible issue. The timer will be reset for now." + ) + self.timer.reset(name) + self.timer.start(name) def _on_batch_end(self, name, pl_module): From 1603663eca1d3875fe1b1609d158bf0199a08aee Mon Sep 17 00:00:00 2001 From: Ken Janik Date: Mon, 15 May 2023 10:38:30 -0700 Subject: [PATCH 023/512] Update megatron_vit_classification_models.py --- .../vision/models/megatron_vit_classification_models.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index ea664c0da022..ff40fee36151 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -220,19 +220,12 @@ def configure_optimizers(self): module = self.model[0] # only the first virtual rank has the embeddings else: module = self.model - if module.share_token_embeddings: - param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_O2 - param._disable_overlap_grad_sync = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): if isinstance(self.model, list): module = self.model[-1] # only the last virtual rank has the embeddings else: module = self.model - if module.share_token_embeddings: - param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_O2 - param._disable_overlap_grad_sync = True # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled From f37867aec5ebe2f7d964dde0f9b8f89742652cda Mon Sep 17 00:00:00 2001 From: kjanik Date: Mon, 15 May 2023 14:49:04 -0700 Subject: [PATCH 024/512] Updated vision_base_model with distrib_adam changes from megatron_base_model. --- .../vision/models/vision_base_model.py | 125 +++++++++++++----- 1 file changed, 89 insertions(+), 36 deletions(-) diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py index 0e444911626b..dc9726d53940 100644 --- a/nemo/collections/vision/models/vision_base_model.py +++ b/nemo/collections/vision/models/vision_base_model.py @@ -168,12 +168,13 @@ class MegatronVisionModel(VisionModel): It does the following things: 1. Initialize the model parallel for nemo given the model parallel parameters. 2. Turn on all the nvidia optimizations. - 3. If using distributed optimizer, configure to be compatible with + 3. If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the correct size for tensor model parallelism. + 4. If using distributed optimizer, configure to be compatible with O2-level optimizations and/or model parallelism. - 4. Perform gradient clipping: `grad_clip_pl_default` triggers the + 5. Perform gradient clipping: `grad_clip_pl_default` triggers the PyTorch Lightning default implementation, `with_distributed_adam` triggers the distributed optimizer's implementation, - `megatron_amp_O2` triggers gradient clipping on the main grads, + `megatron_amp_o2` triggers gradient clipping on the main grads, and otherwise gradient clipping is performed on the model grads. """ @@ -189,8 +190,6 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) - self._validate_config() - self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' # used in NVIDIA NGC PyTorch containers @@ -229,6 +228,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) + # This must be called after initialize model parallel since it needs to know the data parallel size + self._validate_and_override_config() + self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False # TODO: remove this when PTL 1.7.3 is released @@ -344,10 +346,14 @@ def reduce_overlap_gradients(self): stages, the grad sync is deferred until the bubble overhead. """ + if self.with_distributed_adam and self._optimizer.overlap_grad_sync: + if params is None: + params = self._optimizer.parameters() + self._optimizer.try_grad_sync(params) + + def sync_overlap_parameters(self, params=None): if self.with_distributed_adam: - self._optimizer.try_grad_sync( - p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) - ) + self._optimizer._try_start_bucket_param_sync(params) def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: super().on_train_batch_end(outputs, dataloader_iter, batch_idx) @@ -392,23 +398,33 @@ def setup_optimization( optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() if self.with_distributed_adam: - # Allocate grads since we are storing between microbatches + # Allocate contiguous buffers to avoid extra copies optim_kwargs['contiguous_grad_buffer'] = True + optim_kwargs['contiguous_param_buffer'] = True - if self.megatron_amp_O2: - # Match param allgather with model dtype - if hasattr(self, 'autocast_dtype'): - optim_kwargs['param_sync_dtype'] = self.autocast_dtype - if self.autocast_dtype == torch.float: - optim_kwargs['store_params'] = False - elif self.autocast_dtype == torch.float16: - optim_kwargs['store_params'] = True - elif self.autocast_dtype == torch.bfloat16: - optim_kwargs['store_params'] = False - optim_kwargs['store_param_remainders'] = True - else: - # Assume FP32 params, so no need to store main params + # Make sure optimizer state is in FP32 + optim_dtype = torch.float32 + optim_kwargs['dtype'] = optim_dtype + + # Make sure embedding grad reductions are in FP32 + for name, param in self.named_parameters(): + if 'word_embedding' in name or 'position_embedding' in name: + param._with_fp32_optimizer = True + + # Match param allgather with model dtype + model_dtype = torch.float32 + if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): + model_dtype = self.autocast_dtype + optim_kwargs['param_sync_dtype'] = model_dtype + + # Determine whether to store master params in optimizer + if optim_dtype == model_dtype: + optim_kwargs['store_params'] = False + elif optim_dtype == torch.float32 and model_dtype == torch.bfloat16: optim_kwargs['store_params'] = False + optim_kwargs['store_param_remainders'] = True + else: + optim_kwargs['store_params'] = True return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) @@ -465,12 +481,28 @@ def configure_optimizers(self): # Configure distributed optimizer if self.with_distributed_adam: - # Initialize params so that main grads are available + + # Initialize param buckets if explicitly provided + if hasattr(self, 'distributed_adam_buckets'): + for bucket in self.distributed_adam_buckets: + self._optimizer.init_params_bucket(bucket) + del self.distributed_adam_buckets + + # Make sure all params are initialized so main grads are + # available # Note: Consolidate grads without overlap - self._optimizer.init_params( - p for p in self.parameters() if getattr(p, '_disable_overlap_grad_sync', False) - ) - self._optimizer.init_params(self.parameters()) + overlap_params = [] + no_overlap_params = [] + for p in self.parameters(): + if getattr(p, '_disable_overlap_grad_sync', False): + no_overlap_params.append(p) + else: + overlap_params.append(p) + self._optimizer.init_params(reversed(overlap_params)) + self._optimizer.init_params(reversed(no_overlap_params)) + + # Initialize contiguous parameter buffer + self._optimizer.init_param_buffer() if self._scheduler is None: return self._optimizer @@ -494,8 +526,11 @@ def _extract_consumed_samples_from_ckpt(self, ckpt_path): return init_consumed_samples - def _validate_config(self): - """ Certain configurations might be incompatible or discouraged. We can check for them here.""" + def _validate_and_override_config(self): + """ Certain configurations might be incompatible or discouraged. + We can check for them here and override if necessary. + """ + app_state = AppState() if self.cfg.get('sequence_parallel', False) and self.cfg.get('tensor_model_parallel_size', 1) == 1: logging.info( @@ -504,19 +539,37 @@ def _validate_config(self): with open_dict(self.cfg): self.cfg.sequence_parallel = False - if ( - self.cfg.get('gradient_accumulation_fusion', False) - and self.cfg.get('pipeline_model_parallel_size', 1) == 1 - ): - logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") - with open_dict(self.cfg): - self.cfg.gradient_accumulation_fusion = False + # Gradient accumulation fusion does not work with our baseline implementaiton of + # async grad allreduce. This should be fixed! + # For now we must disable it whenever using the baseline implementaion. + # The distributed adam from apex does work with gradient accumulation fusion. + distributed_fused_adam = self.cfg.optim.get('name', 'fused_adam') == 'distributed_fused_adam' + pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) + data_parallel_size = app_state.data_parallel_size + + if self.cfg.get('gradient_accumulation_fusion', False): + if data_parallel_size > 1 and pipeline_model_parallel_size == 1 and not distributed_fused_adam: + logging.info( + "When not using pipeline model parallel, gradient accumulation fusion can only be used with distributed_fused_adam." + ) + with open_dict(self.cfg): + self.cfg.gradient_accumulation_fusion = False if self.cfg.get('gradient_accumulation_fusion', False) and not self.cfg.get('megatron_amp_O2', False): logging.info("Gradient accumulation fusion can only be used with megatron amp O2 mixed precision.") with open_dict(self.cfg): self.cfg.gradient_accumulation_fusion = False + if self.cfg.get('use_emha', False): + raise ValueError('use_emha is not yet supported please set to False') + + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + assert ( + self.cfg.num_layers // self.cfg.pipeline_model_parallel_size + ) % self.cfg.virtual_pipeline_model_parallel_size == 0, ( + 'Make sure the number of model chunks is the same across all pipeline stages.' + ) + def is_data_parallel_rank_zero(self): if is_global_rank_zero(): return True From 8afe1a971ac200f67294a9766ae21c569eeacd89 Mon Sep 17 00:00:00 2001 From: Ken Janik Date: Wed, 17 May 2023 13:26:54 -0700 Subject: [PATCH 025/512] Brought in changes from gpt to hopefully resolve dataloader problems --- .../megatron_vit_classification_pretrain.py | 3 +++ .../models/megatron_vit_classification_models.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py index 5be257e917f8..9dc384529dad 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py @@ -13,6 +13,7 @@ # limitations under the License. +import torch.multiprocessing as mp from omegaconf.omegaconf import OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment @@ -29,6 +30,8 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +mp.set_start_method("spawn", force=True) + @hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") def main(cfg) -> None: diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index 183f7cdb5575..2fa1b66e2e7e 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -548,8 +548,17 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru else: raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + # KJJ + # return torch.utils.data.DataLoader( + # dataset, batch_sampler=batch_sampler, num_workers=self.cfg.data.num_workers, pin_memory=True, + # ) + # return torch.utils.data.DataLoader( - dataset, batch_sampler=batch_sampler, num_workers=self.cfg.data.num_workers, pin_memory=True, + dataset, + batch_sampler=batch_sampler, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, ) def setup(self, stage=None): From fcbb4e34c367de4d5ba39d558b8349991c661e78 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 22 May 2023 08:43:16 -0700 Subject: [PATCH 026/512] Remove mp.set_start_method in ViT --- .../vision_transformer/megatron_vit_classification_pretrain.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py index 9dc384529dad..0c2d56351a2c 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py @@ -30,8 +30,6 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager -mp.set_start_method("spawn", force=True) - @hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") def main(cfg) -> None: From 91ec1b376eb685dad8f46d5cc07b4f0ffe1023ef Mon Sep 17 00:00:00 2001 From: Shanmugam Ramasamy Date: Tue, 23 May 2023 11:00:49 -0700 Subject: [PATCH 027/512] Add filtering support to webdataset --- nemo/collections/multimodal/data/common/webdataset.py | 2 +- .../data/stable_diffusion/stable_diffusion_dataset.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py index 5ce77d6f3ba7..84841f0e5e6e 100644 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -177,7 +177,7 @@ def __init__( if self.filterings is not None: if self.filterings.resolution is not None: - train_dataset = train_dataset.select(filter_fn) + train_dataset = train_dataset.select(filter_fn(**self.filterings.resolution)) # traindataset.to_tuple("").map_tuple(fns) train_dataset = train_dataset.map(map_fn) diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py index 9c98ec52b6d7..4a62bed1ddeb 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -25,6 +25,14 @@ def build_train_valid_datasets( ): data_cfg = model_cfg.data + def build_resolution_filter(value=None, method='larger'): + assert method == 'larger' or method == 'smaller' + if method == 'larger': + print(f'Only Selecting images with resolution >= {value}') + return lambda x: x['jpg'].size[0] >= value and x['jpg'].size[1] >= value + print(f'Only Selecting images with resolution <= {value}') + return lambda x: x['jpg'].size[0] <= value and x['jpg'].size[1] <= value + # This function maps data that are tuples to dictionary. def tuple_to_dict(inp): for input in inp: @@ -45,6 +53,7 @@ def transform_fn(sample): consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, + filter_fn=build_resolution_filter, is_train=True, ) @@ -55,6 +64,7 @@ def transform_fn(sample): consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, + filter_fn=build_resolution_filter, is_train=False, ) From 8e41778fc8994066104ef309160635befba10b53 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Fri, 26 May 2023 10:44:22 -0700 Subject: [PATCH 028/512] Add dist opt in clip --- .../models/clip/megatron_clip_models.py | 70 ++++++++++++++++-- .../models/multimodal_base_model.py | 74 +++++++++++++------ .../megatron_vit_classification_models.py | 4 +- 3 files changed, 118 insertions(+), 30 deletions(-) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index d97f55f6d8b2..45df3aeebdb8 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -391,6 +391,7 @@ def configure_optimizers(self): module = self.model[0] # only the first virtual rank has the embeddings else: module = self.model + # TODO (yuya): text transformer's embedding needs to be taken care of when PP>1 # if module.share_token_embeddings: # param = module.word_embeddings_weight() # param._disable_greedy_grad_copy = not self.megatron_amp_O2 @@ -408,10 +409,48 @@ def configure_optimizers(self): # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled for param in self.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): + if getattr(param, 'sequence_parallel', False): param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True + # Initialize parameter buckets for overlapped grad and param syncs + # Note: Params with disabled overlapping are put in the + # last param bucket + buckets = [] + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + # Initialize a bucket for each virtual pipeline stage + for module in self.model: + if isinstance(module, Float16Module): + module = module.module + stage_bucket = [] + for layer in itertools.chain( + module.vision_encoder.backbone.transformer.layers, + module.text_encoder.language_model.encoder.layers, + ): + stage_bucket.extend( + p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + buckets.append(stage_bucket) + else: + # Initialize a bucket for each Transformer layer + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + for layer in itertools.chain( + module.vision_encoder.backbone.transformer.layers, + module.text_encoder.language_model.encoder.layers, + ): + buckets.append( + [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] + ) + buckets.reverse() + used_params = set() + for bucket in buckets: + used_params.update(bucket) + buckets[-1].extend(p for p in self.parameters() if p not in used_params) + self.distributed_adam_buckets = buckets + return super().configure_optimizers() def forward(self, image, text): @@ -431,6 +470,24 @@ def training_step(self, dataloader_iter, batch_idx): # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() + if self.with_distributed_adam: + # hack to enable overlapping param sync and forward compute + # note: the distributed optimizer monkey-patches each + # parameter's __getattribute__ function so that it can + # launch parameter all-gathers the first time the + # parameter is accessed after the optimizer step. However, + # PyTorch directly passes embedding parameters into a C++, + # bypassing this process. A quick-and-dirty hack is to + # manually interact with the parameter. + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + module = module.text_encoder.language_model + if hasattr(module, 'embedding'): + for param in module.embedding.parameters(): + param.data_ptr() + # TODO (yuya): fix this shape tensor_shape = None @@ -465,20 +522,21 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_sequence_parallel_gradients() if self.with_distributed_adam: - # gradients are reduced internally in distributed optimizer - pass + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() self._optimizer.allreduce_main_grads() else: # async grad allreduce is not currently implemented for O1/autocasting mixed precision training # so we all-reduce gradients after the pipeline self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - # TODO (yuya): check if this is needed in text transformer + # TODO (yuya): check if this is needed in text transformer when PP>1 # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: # # when using pipeline parallelism the first and last stage must keep embeddings in sync # self.allreduce_first_last_embeddings() diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index ad13eb6c9064..c2ca22cf2ba7 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -441,10 +441,14 @@ def reduce_overlap_gradients(self): stages, the grad sync is deferred until the bubble overhead. """ + if self.with_distributed_adam and self._optimizer.overlap_grad_sync: + if params is None: + params = self._optimizer.parameters() + self._optimizer.try_grad_sync(params) + + def sync_overlap_parameters(self, params=None): if self.with_distributed_adam: - self._optimizer.try_grad_sync( - p for p in self._optimizer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) - ) + self._optimizer._try_start_bucket_param_sync(params) def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: super().on_train_batch_end(outputs, dataloader_iter, batch_idx) @@ -489,23 +493,33 @@ def setup_optimization( optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() if self.with_distributed_adam: - # Allocate grads since we are storing between microbatches + # Allocate contiguous buffers to avoid extra copies optim_kwargs['contiguous_grad_buffer'] = True + optim_kwargs['contiguous_param_buffer'] = True - if self.megatron_amp_O2: - # Match param allgather with model dtype - if hasattr(self, 'autocast_dtype'): - optim_kwargs['param_sync_dtype'] = self.autocast_dtype - if self.autocast_dtype == torch.float: - optim_kwargs['store_params'] = False - elif self.autocast_dtype == torch.float16: - optim_kwargs['store_params'] = True - elif self.autocast_dtype == torch.bfloat16: - optim_kwargs['store_params'] = False - optim_kwargs['store_param_remainders'] = True - else: - # Assume FP32 params, so no need to store main params + # Make sure optimizer state is in FP32 + optim_dtype = torch.float32 + optim_kwargs['dtype'] = optim_dtype + + # Make sure embedding grad reductions are in FP32 + for name, param in self.named_parameters(): + if 'word_embedding' in name or 'position_embedding' in name: + param._with_fp32_optimizer = True + + # Match param allgather with model dtype + model_dtype = torch.float32 + if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): + model_dtype = self.autocast_dtype + optim_kwargs['param_sync_dtype'] = model_dtype + + # Determine whether to store master params in optimizer + if optim_dtype == model_dtype: optim_kwargs['store_params'] = False + elif optim_dtype == torch.float32 and model_dtype == torch.bfloat16: + optim_kwargs['store_params'] = False + optim_kwargs['store_param_remainders'] = True + else: + optim_kwargs['store_params'] = True return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) @@ -562,12 +576,28 @@ def configure_optimizers(self): # Configure distributed optimizer if self.with_distributed_adam: - # Initialize params so that main grads are available + + # Initialize param buckets if explicitly provided + if hasattr(self, 'distributed_adam_buckets'): + for bucket in self.distributed_adam_buckets: + self._optimizer.init_params_bucket(bucket) + del self.distributed_adam_buckets + + # Make sure all params are initialized so main grads are + # available # Note: Consolidate grads without overlap - self._optimizer.init_params( - p for p in self.parameters() if getattr(p, '_disable_overlap_grad_sync', False) - ) - self._optimizer.init_params(self.parameters()) + overlap_params = [] + no_overlap_params = [] + for p in self.parameters(): + if getattr(p, '_disable_overlap_grad_sync', False): + no_overlap_params.append(p) + else: + overlap_params.append(p) + self._optimizer.init_params(reversed(overlap_params)) + self._optimizer.init_params(reversed(no_overlap_params)) + + # Initialize contiguous parameter buffer + self._optimizer.init_param_buffer() if self._scheduler is None: return self._optimizer diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index f0a0f4476a3a..574b307b1bc4 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -246,7 +246,7 @@ def configure_optimizers(self): if isinstance(module, Float16Module): module = module.module stage_bucket = [] - #for layer in module.language_model.encoder.layers: + # for layer in module.language_model.encoder.layers: for layer in module.backbone.transformer.layers: stage_bucket.extend( p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) @@ -258,7 +258,7 @@ def configure_optimizers(self): for module in modules: if isinstance(module, Float16Module): module = module.module - #for layer in module.language_model.encoder.layers: + # for layer in module.language_model.encoder.layers: for layer in module.backbone.transformer.layers: buckets.append( From e1f95d507ae34c8ccc8248058f82cbb517758a72 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 09:35:18 -0700 Subject: [PATCH 029/512] move to nvidia megatron repo (#6465) (#6475) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- Dockerfile | 3 ++- Jenkinsfile | 3 ++- .../nlp/models/language_modeling/megatron_bert_model.py | 2 ++ .../nlp/models/language_modeling/megatron_finetune_model.py | 1 + .../nlp/models/language_modeling/megatron_gpt_model.py | 2 ++ .../language_modeling/megatron_gpt_prompt_learning_model.py | 1 + .../language_modeling/megatron_lm_encoder_decoder_model.py | 3 +++ .../language_modeling/megatron_t5_prompt_learning_model.py | 1 + .../nlp/models/machine_translation/megatron_nmt_model.py | 1 + .../collections/nlp/modules/common/text_generation_strategy.py | 1 + 10 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0c3d56a0a29d..e8402189a474 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,8 +44,9 @@ RUN apt-get update && \ WORKDIR /workspace/ # Install Megatron-core -RUN git clone https://github.com/aklife97/Megatron-LM.git && \ +RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ + git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 && \ pip install -e . WORKDIR /tmp/ diff --git a/Jenkinsfile b/Jenkinsfile index f8c0605670df..e209b44f051c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -60,8 +60,9 @@ pipeline { // TODO: remove when pip package is available stage('Megatron Core installation') { steps { - sh 'git clone https://github.com/aklife97/Megatron-LM.git && \ + sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ + git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 && \ pip install -e .' } } diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index 90053f3052c8..a7a22bb18150 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -311,6 +311,7 @@ def training_step(self, dataloader_iter, batch_idx): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) if losses_reduced_per_micro_batch: @@ -411,6 +412,7 @@ def validation_step(self, dataloader_iter, batch_idx): tensor_shape=tensor_shape, dtype=self.autocast_dtype, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) if losses_reduced_per_micro_batch: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index 7fc48856453f..c4cfcfdad1ff 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -300,6 +300,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index d4132c4c7e80..d8f90c500182 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -375,6 +375,7 @@ def training_step(self, dataloader_iter, batch_idx): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses @@ -656,6 +657,7 @@ def validation_step(self, dataloader_iter, batch_idx): tensor_shape=tensor_shape, dtype=self.autocast_dtype, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stage of the pipeline returns losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index 331136c64a46..49cb078cd462 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -309,6 +309,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 94b2d348a61d..4f4bc0d709a8 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -328,6 +328,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): decoder_seq_length=self.max_decoder_seq_length, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + enable_autocast=True, ) # only the last stages of the pipeline return losses @@ -991,6 +992,7 @@ def dummy(): num_microbatches=1, decoder_seq_length=encoder_seq_length, dtype=self.autocast_dtype, + enable_autocast=True, ) if output_tensor: @@ -1154,6 +1156,7 @@ def dummy(): num_microbatches=1, decoder_seq_length=encoder_seq_length, dtype=self.autocast_dtype, + enable_autocast=True, ) # get output tensor if parallel_state.is_pipeline_last_stage(): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index 629b4d8b7217..edec760ec7e7 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -197,6 +197,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py index efa059419eda..ff1888c1c9ea 100644 --- a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py +++ b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py @@ -316,6 +316,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index a2e7f351ae09..07607d3840d8 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -62,6 +62,7 @@ def forward_step(self, batch, tensor_shape): forward_only=True, tensor_shape=tensor_shape, dtype=self.model.autocast_dtype, + enable_autocast=True, ) return output_tensor From 543cf55a9b742ab5250b62777106898ddfdf3d04 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 16:03:25 -0400 Subject: [PATCH 030/512] Megatron KERPLE positional embeddings (#6478) (#6480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [TTS] FastPitch adapter fine-tune and conditional layer normalization (#6416) [TTS] FastPitch adapter fine-tune and conditional layer normalization (#6416) --------- * [TTS] whitelist broken path fix. (#6412) * [TTS] whitelist broken path fix. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- * [TTS] FastPitch speaker encoder (#6417) * Add initial codes * Remove wemb * Fix import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Restore aligner loss * Add ConditionalInput * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error and support pre-trained config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow comments * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Change copyright and random weight test * Add initial codes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix import error * Add initial codes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix dataset error * Remove reference speaker embedding * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove SV encoder * Follow comments * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix length type * Fix append * Move error msg * Add look-up into speaker encoder * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add valueerror msg * Move lookup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove unused * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error * Rebase and Fix error * Fix spk encoder * Rename n_speakers * Follow comments * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix n_speakers None error --------- * Sharded manifests for tarred datasets (#6395) * testing sharded manifests * compatibility * proper fixes * adding flag tot convert_to_tarred_audio_dataset * shard_manifests conf param * propagating the shard_manifests param * propagating the shard_manifests param * distributed checks * typo * typo * fixes * fixes * fixes * fixes * fixes * fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixes based on PR comments and tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixes to convert_to_tarred_audio_dataset.py * reversing manifest shards flag * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * excluding manifests from webdataset url expansion * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * expand manifest paths before attempting to cache from datastore * explicit use of UTF-8 for manifest i/o --------- * Update wfst_text_normalization.rst (#6374) Add Hungarian (incoming in NeMo-text-processing) * Support Swiglu in TP PP Conversion (#6437) (#6451) * Support Swiglu in TP PP Conversion * Guard activation * Guard activation --------- * Update NeMo_TTS_Primer.ipynb (#6436) * Update NeMo_TTS_Primer.ipynb Changed a mistake in line 782. Instead of frequency band (ie. pitch) we should write frequency bin. Note that frequency bins in FFT are not related to pitch. * Update NeMo_TTS_Primer.ipynb Corrected the description of spectrogram and mel spectrogram calculations in lines 782 & 783 and added a fourth point to the description and added a reference for more mathematical details at the end of this point. --------- * add rampup batch size support for Megatron GPT (#6424) * added rampup batch size support * added tests for rampup batch size * fixed the typos * added assertions * changed assertion rules * deleted unused imports * changed tests for rampup batch size * updated rampup batch size tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed styling * rampup batch size tests changes --------- * Meagtron encoder decoder fix for empty validation outputs (#6459) (#6461) * 1. Meagtron encoder decoder fix for empty validation outputs. * 1. Debugging. --------- * Code-Switching dataset creation - upgrading to aggregate tokenizer manifest format (#6448) * added functionality to create agg tokenizer compatible manifest for CS, flag to use this mode by default * updated README with the new agg_tokenizer_manifest flag * fixed typo in scripts/speech_recognition/code_switching/README.md * changed agg_tokenizer_manifest to is_lid_manifest --------- * Added/updated new Conformer configs (#6426) (#6467) * Update script for ngram rnnt and hat beam search decoding (#6370) * add rnnt ngram beamsearch script * add return encoding embedding option * update script * add rnnt and hat ngram decoding script * add some parameters * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add return_encoder_embeddings parameter to RNNTDecodingConfig * replace return_encoder_embeddings parameter * generalization of scipt behavior * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove return_encoder_embeddings parameter * remove return_encoder_embeddings parameter * add manual encoder_embeddings calculation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix beam_width value to 8 * fix rescoring description --------- * BERT pre-training mp fork to spawn (#6442) (#6454) * change bert fork to spawn * num_workers=0 fix --------- * fix replace_bos_with_pad not found (#6443) (#6450) * reduce workers on NMT CI (#6472) (#6474) * 1. Added KERPLE positional embeddings to encoder-decoder. * 1. Added a missing file. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * 1. Fixing commits. * 1. Debugging. * 1. Debugging. * 1. Debugging. * 1. Debugging. --------- Signed-off-by: hsiehjackson Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Dima Rekesh Signed-off-by: Jim O’Regan Signed-off-by: smajumdar Signed-off-by: Mostafa Ghorbandoost Signed-off-by: Dmytro Pykhtar Signed-off-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Micha Livne Signed-off-by: Kunal Dhawan Signed-off-by: andrusenkoau Signed-off-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com> Signed-off-by: Abhinav Khattar Co-authored-by: Micha Livne Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Dima Rekesh Co-authored-by: Jim O’Regan Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Somshubra Majumdar Co-authored-by: Mostafa Ghorbandoost Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: Dmytro Pykhtar Co-authored-by: Eric Harper Co-authored-by: Micha Livne Co-authored-by: Kunal Dhawan Co-authored-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com> Co-authored-by: Abhinav Khattar --- Jenkinsfile | 96 +++++++++++++++++++ .../conf/megatron_model_base_config.yaml | 2 +- .../kerple_relative_position_embedding.py | 88 +++++++++++++++++ .../megatron/token_level_encoder_decoder.py | 25 ++++- 4 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py diff --git a/Jenkinsfile b/Jenkinsfile index e209b44f051c..a8adc89ab65c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3806,6 +3806,102 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' sh "rm -rf examples/nlp/language_modeling/t5_index_mappings" } } + stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { + when { + anyOf { + branch 'r1.18.0' + changeRequest target: 'r1.18.0' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/megatron_t5_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=10 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=10 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \ + model.tensor_model_parallel_size=2 \ + model.seq_length=128 \ + model.encoder.num_layers=4 \ + model.encoder.hidden_size=64 \ + model.encoder.num_attention_heads=8 \ + model.encoder.activation='swiglu' \ + model.encoder.masked_softmax_fusion=False \ + model.encoder.bias_activation_fusion=False \ + model.encoder.activations_checkpoint_method='block' \ + model.encoder.activations_checkpoint_num_layers=1 \ + model.encoder.position_embedding_type=kerple \ + model.decoder.num_layers=2 \ + model.decoder.hidden_size=64 \ + model.decoder.num_attention_heads=8 \ + model.decoder.activation='swiglu' \ + model.decoder.masked_softmax_fusion=False \ + model.decoder.bias_activation_fusion=False \ + model.decoder.activations_checkpoint_method='block' \ + model.decoder.activations_checkpoint_num_layers=1 \ + model.encoder.transformer_block_type='pre_ln' \ + model.decoder.transformer_block_type='pre_ln' \ + model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \ + model.data.data_impl=text_mmap \ + +model.data.data_impl_kwargs.newline_int=10 \ + +model.data.data_impl_kwargs.header_lines=0 \ + +model.data.data_impl_kwargs.workers=null \ + +model.data.data_impl_kwargs.sort_dataset_paths=False \ + model.share_token_embeddings=False \ + model.share_decoder_tokens_head_embeddings=False" + sh "python examples/nlp/language_modeling/megatron_t5_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=10 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=10 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \ + exp_manager.resume_if_exists=True \ + model.tensor_model_parallel_size=2 \ + model.seq_length=128 \ + model.encoder.num_layers=4 \ + model.encoder.hidden_size=64 \ + model.encoder.num_attention_heads=8 \ + model.encoder.activation='swiglu' \ + model.encoder.masked_softmax_fusion=False \ + model.encoder.bias_activation_fusion=False \ + model.encoder.activations_checkpoint_method='block' \ + model.encoder.activations_checkpoint_num_layers=1 \ + model.encoder.position_embedding_type=kerple \ + model.decoder.num_layers=2 \ + model.decoder.hidden_size=64 \ + model.decoder.num_attention_heads=8 \ + model.decoder.activation='swiglu' \ + model.decoder.masked_softmax_fusion=False \ + model.decoder.bias_activation_fusion=False \ + model.decoder.activations_checkpoint_method='block' \ + model.decoder.activations_checkpoint_num_layers=1 \ + model.encoder.transformer_block_type='pre_ln' \ + model.decoder.transformer_block_type='pre_ln' \ + model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \ + model.data.data_impl=text_mmap \ + +model.data.data_impl_kwargs.newline_int=10 \ + +model.data.data_impl_kwargs.header_lines=0 \ + +model.data.data_impl_kwargs.workers=null \ + +model.data.data_impl_kwargs.sort_dataset_paths=False \ + model.share_token_embeddings=False \ + model.share_decoder_tokens_head_embeddings=False" + sh "rm -rf examples/nlp/language_modeling/t5_pretrain_results" + sh "rm -rf examples/nlp/language_modeling/t5_index_mappings" + } + } stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { diff --git a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml index b623d08e4e8b..d3feb97ea9b4 100644 --- a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml @@ -6,7 +6,7 @@ init_method_std: 0.02 # Standard deviation of the zero mean normal distribution hidden_dropout: 0.1 # Dropout probability for hidden state transformer. attention_dropout: 0.1 # Dropout probability in the attention layer. ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. -position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'relative', 'alibi'] +position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'relative', 'alibi', 'kerple'] relative_attention_num_buckets: 32 # Relative position number of buckets for computing the bias relative_attention_max_distance: 128 # max_distance to keep relative distance in the attention_num_buckets. relative_position_bias_self_attention_only: True # whether to only use relative position bias for self attention only. diff --git a/nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py new file mode 100644 index 000000000000..54276d6fa21e --- /dev/null +++ b/nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py @@ -0,0 +1,88 @@ +# coding=utf-8 +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch + +from nemo.collections.nlp.modules.common.megatron.alibi_relative_position_embedding import ( + build_relative_position, + build_slopes, +) + +__all__ = ['KERPLERelativePositionEmbedding'] + + +class KERPLERelativePositionEmbedding(torch.nn.Module): + """ + kerple (Attention with Linear Biases) relative position embedding for auto-regressive decoder + and joint encoder (symmetric for forward and backward distance). + Based on https://arxiv.org/bas/2108.12409 + """ + + def __init__( + self, bidirectional, num_attention_heads, layer_type, num_attention_heads_kerple=None, max_seq_len=512 + ): + """ + Args: + bidirectional: Whether to use bidirectional relative position embedding + num_attention_heads: Number of attention heads + layer_type: Layer type. Can be one of [LayerType.encoder or LayerType.decoder]. Willdetermine the bias construction + num_attention_heads_kerple: Number of attention heads for which kerple bias will be used + max_seq_len: Maximum sequence length for precomputed relative positions. Larger sizes will result in more memory usage by computing kerple mask on-the-fly. + """ + super().__init__() + + if (num_attention_heads_kerple is None) or (num_attention_heads_kerple <= 0): + num_attention_heads_kerple = num_attention_heads + + if num_attention_heads_kerple > num_attention_heads: + raise ValueError( + f"num_attention_heads_kerple ({num_attention_heads_kerple}) cannot be larger than num_attention_heads ({num_attention_heads})" + ) + + self.bidirectional = bidirectional + self.num_attention_heads = num_attention_heads + # LayerType.encoder or LayerType.decoder. Is only needed to determine the group for the all_reduce + self.layer_type = layer_type + # define the size of pre-computed relative position slopes. + # define the number of attention heads for which kerple mask will be pre-computed (the rest are disabled). + self.num_attention_heads_kerple = num_attention_heads_kerple + # Larger sizes will result in more memory usage by computing kerple mask on-the-fly. + self.max_seq_len = max_seq_len + + # initialize the slopes + self.kerple_b = torch.nn.Parameter(build_slopes(num_attention_heads, num_attention_heads_kerple)) + self.kerple_a = torch.zeros_like(self.kerple_b) + self.kerple_p = torch.ones_like(self.kerple_b) + + # cache the relative position bias. shape (num_attention_heads, max_seq_len, max_seq_len) + self.relative_position = build_relative_position(max_seq_len, max_seq_len, num_attention_heads) + + def forward(self, query_seq_length, key_seq_length): + # used cached relative position if possible + max_seq_len = max(query_seq_length, key_seq_length) + if max_seq_len > self.max_seq_len: + relative_position = build_relative_position(max_seq_len, max_seq_len, self.num_attention_heads) + else: + relative_position = self.relative_position + # shape (num_attention_heads, query_seq_length, key_seq_length) + relative_position = relative_position[:, :query_seq_length, :key_seq_length] + # if not bidirectional, mask out the future positions + if not self.bidirectional: + relative_position = torch.tril(relative_position) + + # shape (1, num_heads, query_length, key_length) + return -self.kerple_b * torch.log(1 + self.kerple_a * relative_position.unsqueeze(0).pow(self.kerple_p)) diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py index a02fb5300912..dcf41a696b6e 100644 --- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py @@ -18,6 +18,9 @@ from nemo.collections.nlp.modules.common.megatron.alibi_relative_position_embedding import ( ALiBiRelativePositionEmbedding, ) +from nemo.collections.nlp.modules.common.megatron.kerple_relative_position_embedding import ( + KERPLERelativePositionEmbedding, +) from nemo.collections.nlp.modules.common.megatron.language_model import Embedding from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.megatron_decoders import get_decoder_model @@ -176,7 +179,16 @@ def __init__( num_attention_heads_alibi=None, max_seq_len=max_position_embeddings, ) - self._encoder_relative_position_embedding_key = "encoder_relative_position_embedding" + self._encoder_relative_position_embedding_key = "encoder_alibi_position_embedding" + elif self.encoder_cfg.get('position_embedding_type', 'learned_absolute') == 'kerple': + self.encoder_relative_position_embedding = KERPLERelativePositionEmbedding( + bidirectional=True, + num_attention_heads=encoder_cfg.num_attention_heads, + layer_type=LayerType.encoder, + num_attention_heads_kerple=None, + max_seq_len=max_position_embeddings, + ) + self._encoder_relative_position_embedding_key = "encoder_kerple_position_embedding" else: self.encoder_relative_position_embedding = None @@ -296,7 +308,16 @@ def __init__( num_attention_heads_alibi=None, max_seq_len=max_position_embeddings, ) - self._decoder_relative_position_embedding_key = "decoder_relative_position_embedding" + self._decoder_relative_position_embedding_key = "decoder_alibi_position_embedding" + elif self.decoder_cfg.get('position_embedding_type', 'learned_absolute') == 'kerple': + self.decoder_relative_position_embedding = KERPLERelativePositionEmbedding( + bidirectional=False, + num_attention_heads=decoder_cfg.num_attention_heads, + layer_type=LayerType.decoder, + num_attention_heads_kerple=None, + max_seq_len=max_position_embeddings, + ) + self._decoder_relative_position_embedding_key = "decoder_kerple_position_embedding" else: self.decoder_relative_position_embedding = None From 1d8ffac1d3ee4e2acb91d659384da525c3d770f7 Mon Sep 17 00:00:00 2001 From: Mostafa Ghorbandoost Date: Mon, 24 Apr 2023 16:23:16 -0700 Subject: [PATCH 031/512] Fix an invalid link in get_data.py of ljspeech (#6456) Usage of the link in line 63 leads to downloading a html file not a tsv file, so we need to change it to a raw link. Signed-off-by: Mostafa Ghorbandoost --- scripts/dataset_processing/tts/ljspeech/get_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dataset_processing/tts/ljspeech/get_data.py b/scripts/dataset_processing/tts/ljspeech/get_data.py index 7c28fb8ef903..d8a0b1c2834c 100644 --- a/scripts/dataset_processing/tts/ljspeech/get_data.py +++ b/scripts/dataset_processing/tts/ljspeech/get_data.py @@ -60,7 +60,7 @@ def __extract_file(filepath, data_dir): def __process_data(data_root, whitelist_path): if whitelist_path is None: wget.download( - "https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv", + "https://raw.githubusercontent.com/NVIDIA/NeMo-text-processing/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv", out=str(data_root), ) whitelist_path = data_root / "lj_speech.tsv" From 7ec5686dd39c69ac2efeb6276e5bb488bb22f070 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 09:44:21 -0400 Subject: [PATCH 032/512] 1. Added external index sample. (#6462) (#6483) Signed-off-by: Micha Livne Co-authored-by: Micha Livne --- .../megatron/dataset_utils.py | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/dataset_utils.py b/nemo/collections/nlp/data/language_modeling/megatron/dataset_utils.py index d1f0718a6abd..775ac271d5b2 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/dataset_utils.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/dataset_utils.py @@ -34,6 +34,7 @@ import os import subprocess import time +from typing import Any import numpy as np import torch @@ -1255,6 +1256,7 @@ def get_samples_mapping( name, binary_head, index_mapping_dir: str = None, + samples_mapping: Any = None, ): """Get a list that maps a sample index to a starting sentence index, end sentence index, and length""" @@ -1280,8 +1282,8 @@ def get_samples_mapping( indexmap_filename += '_{}s'.format(seed) indexmap_filename += '.npy' - # Build the indexed mapping if not exist. - if torch.distributed.get_rank() == 0 and not os.path.isfile(indexmap_filename): + # Build the indexed mapping if not exist and not provided externally. + if samples_mapping is None and torch.distributed.get_rank() == 0 and not os.path.isfile(indexmap_filename): # Fake index mapping if missing if (getattr(indexed_dataset, 'doc_idx', None) is None) and (getattr(indexed_dataset, 'sizes', None) is None): make_indexed_dataset_compatibility(indexed_dataset) @@ -1334,15 +1336,16 @@ def get_samples_mapping( torch.distributed.get_world_size() // torch.distributed.get_world_size(group=parallel_state.get_tensor_model_parallel_group()) ) - # Load indexed dataset. - logging.info(' > loading indexed mapping from {}'.format(indexmap_filename)) - start_time = time.time() - samples_mapping = np.load(indexmap_filename, allow_pickle=True, mmap_mode='r') - logging.info(' loaded indexed file in {:3.3f} seconds'.format(time.time() - start_time)) - logging.info(' total number of samples: {}'.format(samples_mapping.shape[0])) + # Load indexed dataset if not given externally. + if samples_mapping is None: + logging.info(' > loading indexed mapping from {}'.format(indexmap_filename)) + start_time = time.time() + samples_mapping = np.load(indexmap_filename, allow_pickle=True, mmap_mode='r') + logging.info(' loaded indexed file in {:3.3f} seconds'.format(time.time() - start_time)) + logging.info(' total number of samples: {}'.format(samples_mapping.shape[0])) - # Deallocate temporary numpy arrays that were created for `get_samples_mapping()` when needed - if hasattr(indexed_dataset, 'doc_idx') and hasattr(indexed_dataset, 'sizes'): - deallocate_indexed_dataset_memory(indexed_dataset) + # Deallocate temporary numpy arrays that were created for `get_samples_mapping()` when needed + if hasattr(indexed_dataset, 'doc_idx') and hasattr(indexed_dataset, 'sizes'): + deallocate_indexed_dataset_memory(indexed_dataset) return samples_mapping From 7cc5b67964b780f9e327c04744fac5f37c4d83dc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 12:10:32 -0600 Subject: [PATCH 033/512] Update README to add core installation (#6488) (#6489) * update README for megatron-core * fix --------- Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- README.rst | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fbf803b4e289..614445bc4951 100644 --- a/README.rst +++ b/README.rst @@ -235,8 +235,10 @@ Note that RNNT requires numba to be installed from conda. NeMo Megatron ~~~~~~~~~~~~~ -NeMo Megatron training requires NVIDIA Apex to be installed. -Install it manually if not using the NVIDIA PyTorch container. +NeMo Megatron training requires NVIDIA Apex and Megatron-core to be installed. +Install them manually if not using the NVIDIA PyTorch container. + +To install Apex, run .. code-block:: bash @@ -245,6 +247,15 @@ Install it manually if not using the NVIDIA PyTorch container. git checkout 57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2 pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ +To install Megatron-core, run + +.. code-block:: bash + + git clone https://github.com/NVIDIA/Megatron-LM.git + cd Megatron-LM + git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 + pip install -e . + It is highly recommended to use the NVIDIA PyTorch or NeMo container if having issues installing Apex or any other dependencies. While installing Apex, it may raise an error if the CUDA version on your system does not match the CUDA version torch was compiled with. From c34f647dab65b39da6062470a6793cdc97ea02e3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 12:42:29 -0700 Subject: [PATCH 034/512] Fix cache aware hybrid bugs (#6466) (#6484) --- ...ech_to_text_cache_aware_streaming_infer.py | 25 ++++++- .../asr/models/hybrid_rnnt_ctc_bpe_models.py | 6 +- .../asr/models/hybrid_rnnt_ctc_models.py | 14 ++-- .../asr/modules/conformer_encoder.py | 2 +- nemo/collections/asr/parts/mixins/mixins.py | 15 +++- .../asr/parts/submodules/subsampling.py | 74 +++++++++++++------ .../asr/parts/utils/streaming_utils.py | 6 +- .../asr/test_asr_hybrid_rnnt_ctc_model_bpe.py | 2 +- 8 files changed, 104 insertions(+), 40 deletions(-) diff --git a/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py b/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py index 589d4c7ec3ee..75912f1c03c1 100644 --- a/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py +++ b/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py @@ -42,8 +42,14 @@ You may drop the '--debug_mode' and '--compare_vs_offline' to speedup the streaming evaluation. If compare_vs_offline is not used, then significantly larger batch_size can be used. +Setting `--pad_and_drop_preencoded` would perform the caching for all steps including the first step. +It may result in slightly different outputs from the sub-sampling module compared to offline mode for some techniques like striding and sw_striding. +Enabling it would make it easier to export the model to ONNX. + +# Hybrid ASR models +For Hybrid ASR models which have two decoders, you may select the decoder by --set_decoder DECODER_TYPE, where DECODER_TYPE can be "ctc" or "rnnt". +If decoder is not set, then the default decoder would be used which is the RNNT decoder for Hybrid ASR models. -To best compare output with offline output (i.e. `--compare_vs_offline` is set) `--pad-and-drop-preencoded` should also be set. ## Evaluate a model trained with full context for offline mode @@ -126,6 +132,7 @@ def perform_streaming( transcribed_texts, cache_last_channel_next, cache_last_time_next, + cache_last_channel_len, best_hyp, ) = asr_model.conformer_stream_step( processed_signal=processed_signal, @@ -254,9 +261,16 @@ def main(): "--output_path", type=str, help="path to output file when manifest is used as input", default=None ) parser.add_argument( - "--pad-and-drop-preencoded", + "--pad_and_drop_preencoded", action="store_true", - help="Enables padding the audio input and then dropping the extra steps after the pre-encoding for the first step. It makes the outputs of the downsampling exactly as the offline mode for some techniques like striding.", + help="Enables padding the audio input and then dropping the extra steps after the pre-encoding for all the steps including the the first step. It may make the outputs of the downsampling slightly different from offline mode for some techniques like striding or sw_striding.", + ) + + parser.add_argument( + "--set_decoder", + choices=["ctc", "rnnt"], + default=None, + help="Selects the decoder for Hybrid ASR models which has both the CTC and RNNT decoder. Supported decoders are ['ctc', 'rnnt']", ) args = parser.parse_args() @@ -273,6 +287,11 @@ def main(): asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=args.asr_model) logging.info(asr_model.encoder.streaming_cfg) + if args.set_decoder is not None: + if hasattr(asr_model, "cur_decoder"): + asr_model.change_decoding_strategy(decoder_type=args.set_decoder) + else: + raise ValueError("Decoder cannot get changed for non-Hybrid ASR models.") global autocast if ( diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py index 170aa3f8001a..104b2eb95524 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py @@ -124,7 +124,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): ) # setting the RNNT decoder as the default one - self.use_rnnt_decoder = True + self.cur_decoder = "rnnt" def _setup_dataloader_from_config(self, config: Optional[Dict]): dataset = audio_to_text_dataset.get_audio_to_text_bpe_dataset_from_config( @@ -375,7 +375,7 @@ def change_vocabulary( logging.info(f"Changed tokenizer of the CTC decoder to {self.ctc_decoder.vocabulary} vocabulary.") - def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = None): + def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type: str = None): """ Changes decoding strategy used during RNNT decoding process. Args: @@ -446,7 +446,7 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = with open_dict(self.cfg.aux_ctc.decoding): self.cfg.aux_ctc.decoding = decoding_cfg - self.use_rnnt_decoder = False + self.cur_decoder = "ctc" logging.info( f"Changed decoding strategy of the CTC decoder to \n{OmegaConf.to_yaml(self.cfg.aux_ctc.decoding)}" ) diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py index e3acec2c7420..a413eaeed6fa 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py @@ -86,7 +86,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): ) # setting the RNNT decoder as the default one - self.use_rnnt_decoder = True + self.cur_decoder = "rnnt" # setting up interCTC loss (from InterCTCMixin) self.setup_interctc(decoder_name='ctc_decoder', loss_name='ctc_loss', wer_name='ctc_wer') @@ -125,7 +125,11 @@ def transcribe( * A list of greedy transcript texts / Hypothesis * An optional list of beam search transcript texts / Hypothesis / NBestHypothesis. """ - if self.use_rnnt_decoder: + if self.cur_decoder not in ["ctc", "rnnt"]: + raise ValueError( + f"{self.cur_decoder} is not supported for cur_decoder. Supported values are ['ctc', 'rnnt']" + ) + if self.cur_decoder == "rnnt": return super().transcribe( paths2audio_files=paths2audio_files, batch_size=batch_size, @@ -307,7 +311,7 @@ def change_vocabulary( logging.info(f"Changed the tokenizer of the CTC decoder to {self.ctc_decoder.vocabulary} vocabulary.") - def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = None): + def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type: str = None): """ Changes decoding strategy used during RNNT decoding process. @@ -319,7 +323,7 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = used. If set to 'ctc', it raises error if 'ctc_decoder' is not an attribute of the model. """ if decoder_type is None or decoder_type == 'rnnt': - self.use_rnnt_decoder = True + self.cur_decoder = "rnnt" return super().change_decoding_strategy(decoding_cfg=decoding_cfg) assert decoder_type == 'ctc' and hasattr(self, 'ctc_decoder') @@ -346,7 +350,7 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = with open_dict(self.cfg.aux_ctc): self.cfg.aux_ctc.decoding = decoding_cfg - self.use_rnnt_decoder = False + self.cur_decoder = "ctc" logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.aux_ctc.decoding)}") # PTL-specific methods diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py index 032a16d537ea..0fc0912a8921 100644 --- a/nemo/collections/asr/modules/conformer_encoder.py +++ b/nemo/collections/asr/modules/conformer_encoder.py @@ -500,7 +500,7 @@ def forward_for_export( def streaming_post_process(self, rets, keep_all_outputs=True): if len(rets) == 2: - return rets + return rets[0], rets[1], None, None, None (encoded, encoded_len, cache_last_channel_next, cache_last_time_next, cache_last_channel_next_len) = rets diff --git a/nemo/collections/asr/parts/mixins/mixins.py b/nemo/collections/asr/parts/mixins/mixins.py index 92977314d08f..f350dbcd5df0 100644 --- a/nemo/collections/asr/parts/mixins/mixins.py +++ b/nemo/collections/asr/parts/mixins/mixins.py @@ -491,8 +491,17 @@ def conformer_stream_step( drop_extra_pre_encoded=drop_extra_pre_encoded, ) - if isinstance(self, asr_models.EncDecCTCModel): - log_probs = self.decoder(encoder_output=encoded) + if isinstance(self, asr_models.EncDecCTCModel) or ( + isinstance(self, asr_models.EncDecHybridRNNTCTCModel) and self.cur_decoder == "ctc" + ): + if hasattr(self, "ctc_decoder"): + decoding = self.ctc_decoding + decoder = self.ctc_decoder + else: + decoding = self.decoding + decoder = self.decoder + + log_probs = decoder(encoder_output=encoded) predictions_tensor = log_probs.argmax(dim=-1, keepdim=False) # Concatenate the previous predictions with the current one to have the full predictions. @@ -517,7 +526,7 @@ def conformer_stream_step( # TODO: make decoding more efficient by avoiding the decoding process from the beginning if return_transcription: - decoded_out = self.decoding.ctc_decoder_predictions_tensor( + decoded_out = decoding.ctc_decoder_predictions_tensor( decoder_outputs=greedy_predictions_concat.unsqueeze(0), decoder_lengths=encoded_len[preds_idx : preds_idx + 1], return_hypotheses=False, diff --git a/nemo/collections/asr/parts/submodules/subsampling.py b/nemo/collections/asr/parts/submodules/subsampling.py index 5c0e937e0d24..c10f85403b25 100644 --- a/nemo/collections/asr/parts/submodules/subsampling.py +++ b/nemo/collections/asr/parts/submodules/subsampling.py @@ -126,42 +126,72 @@ def __init__( self._kernel_size = 3 self._ceil_mode = False - self._left_padding = (self._kernel_size - 1) // 2 - self._right_padding = (self._kernel_size - 1) // 2 + if self.is_causal: + self._left_padding = self._kernel_size - 1 + self._right_padding = self._stride - 1 + self._max_cache_len = subsampling_factor + 1 + else: + self._left_padding = (self._kernel_size - 1) // 2 + self._right_padding = (self._kernel_size - 1) // 2 + self._max_cache_len = 0 # Layer 1 - layers.append( - torch.nn.Conv2d( - in_channels=in_channels, - out_channels=conv_channels, - kernel_size=self._kernel_size, - stride=self._stride, - padding=self._left_padding, + if self.is_causal: + layers.append( + CausalConv2D( + in_channels=in_channels, + out_channels=conv_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=None, + ) + ) + else: + layers.append( + torch.nn.Conv2d( + in_channels=in_channels, + out_channels=conv_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=self._left_padding, + ) ) - ) in_channels = conv_channels layers.append(activation) for i in range(self._sampling_num - 1): - layers.extend( - [ - torch.nn.Conv2d( + if self.is_causal: + layers.append( + CausalConv2D( in_channels=in_channels, out_channels=in_channels, kernel_size=self._kernel_size, stride=self._stride, - padding=self._left_padding, + padding=None, groups=in_channels, - ), + ) + ) + else: + layers.append( torch.nn.Conv2d( in_channels=in_channels, - out_channels=conv_channels, - kernel_size=1, - stride=1, - padding=0, - groups=1, - ), - ] + out_channels=in_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=self._left_padding, + groups=in_channels, + ) + ) + + layers.append( + torch.nn.Conv2d( + in_channels=in_channels, + out_channels=conv_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + ) ) layers.append(activation) in_channels = conv_channels diff --git a/nemo/collections/asr/parts/utils/streaming_utils.py b/nemo/collections/asr/parts/utils/streaming_utils.py index 622b4fe57478..b824bc18e770 100644 --- a/nemo/collections/asr/parts/utils/streaming_utils.py +++ b/nemo/collections/asr/parts/utils/streaming_utils.py @@ -1367,9 +1367,10 @@ def __iter__(self): ) if self.buffer_idx == 0 and isinstance(self.streaming_cfg.shift_size, list): - shift_size = self.streaming_cfg.shift_size[0] if self.pad_and_drop_preencoded: shift_size = self.streaming_cfg.shift_size[1] + else: + shift_size = self.streaming_cfg.shift_size[0] else: shift_size = ( self.streaming_cfg.shift_size[1] @@ -1394,9 +1395,10 @@ def __iter__(self): # if there is not enough frames to be used as the pre-encoding cache, zeros would be added zeros_pads = None if self.buffer_idx == 0 and isinstance(self.streaming_cfg.pre_encode_cache_size, list): - cache_pre_encode_num_frames = self.streaming_cfg.pre_encode_cache_size[0] if self.pad_and_drop_preencoded: cache_pre_encode_num_frames = self.streaming_cfg.pre_encode_cache_size[1] + else: + cache_pre_encode_num_frames = self.streaming_cfg.pre_encode_cache_size[0] cache_pre_encode = torch.zeros( (audio_chunk.size(0), self.input_features, cache_pre_encode_num_frames), device=audio_chunk.device, diff --git a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_bpe.py b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_bpe.py index e59353102c39..0f3611f95153 100644 --- a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_bpe.py +++ b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_bpe.py @@ -306,4 +306,4 @@ def test_decoding_change(self, hybrid_asr_model): assert hybrid_asr_model.ctc_decoding.preserve_alignments is True assert hybrid_asr_model.ctc_decoding.compute_timestamps is True - assert hybrid_asr_model.use_rnnt_decoder is False + assert hybrid_asr_model.cur_decoder == "ctc" From 26eae0a50cd39e81cb26b91119d8c6d09a81636f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 17:03:27 -0700 Subject: [PATCH 035/512] Fix typos (#6494) (#6495) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- tutorials/VoiceSwapSample.ipynb | 2 +- ...red_Transducer_Inference_with_LCS_Merge.ipynb | 16 ++++++++-------- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 2 +- ...atron_Synthetic_Tabular_Data_Generation.ipynb | 4 ++-- tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb | 2 +- .../nlp/Token_Classification-BioMegatron.ipynb | 4 ++-- ...Classification_Named_Entity_Recognition.ipynb | 2 +- .../Speaker_Diarization_Inference.ipynb | 8 ++++---- .../tts/FastPitch_ChineseTTS_Training.ipynb | 4 ++-- tutorials/tts/FastPitch_GermanTTS_Training.ipynb | 6 +++--- .../tts/Inference_DurationPitchControl.ipynb | 2 +- 12 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index e00e80d56a6a..934071faa768 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.18.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index ce513700cf09..e409dcb90b35 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -321,7 +321,7 @@ " \"\"\"\n", " Longest Common Subsequence merge algorithm for aligning two consecutive buffers.\n", "\n", - " Base alignment construction algorithm is Longest Common Subsequence (reffered to as LCS hear after)\n", + " Base alignment construction algorithm is Longest Common Subsequence (referred to as LCS hear after)\n", "\n", " LCS Merge algorithm looks at two chunks i-1 and i, determines the aligned overlap at the\n", " end of i-1 and beginning of ith chunk, and then clips the subsegment of the ith chunk.\n", @@ -467,7 +467,7 @@ " j_temp = j_partial + 1 # diagonal next j\n", "\n", " j_exp = 0 # number of tokens to expand along the diagonal\n", - " j_skip = 0 # how many diagonals didnt have the token. Incremented by 1 for every row i\n", + " j_skip = 0 # how many diagonals didn't have the token. Incremented by 1 for every row i\n", "\n", " for i_idx in range(i_temp, m + 1): # walk from i_partial + 1 => m + 1\n", " j_any_skip = 0 # If the diagonal element at this location is not found, set to 1\n", @@ -496,13 +496,13 @@ " j_partial += j_exp\n", "\n", " # (3) Given new leftmost j_partial with expansions, backtrack the partial alignments\n", - " # counting how many diagonal skips occured to compute slice length\n", + " # counting how many diagonal skips occurred to compute slice length\n", " # as well as starting point of slice.\n", "\n", " # Partial backward trace to find start of slice\n", " while i_partial > 0 and j_partial > 0:\n", " if LCSuff[i_partial][j_partial] == 0:\n", - " # diagonal skip occured, move j to left 1 extra time\n", + " # diagonal skip occurred, move j to left 1 extra time\n", " j_partial -= 1\n", " j_skip += 1\n", "\n", @@ -531,7 +531,7 @@ " \"slice_idx\": result_idx,\n", " }\n", " write_lcs_alignment_to_pickle(LCSuff, filepath=filepath, extras=extras)\n", - " print(\"Wrote alignemnt to :\", filepath)\n", + " print(\"Wrote alignment to :\", filepath)\n", "\n", " return result_idx, LCSuff\n", "\n", @@ -664,7 +664,7 @@ " ):\n", " if self.lcs_delay < 0:\n", " raise ValueError(\n", - " \"Please set LCS Delay valus as `(buffer_duration - chunk_duration) / model_stride_in_secs`\"\n", + " \"Please set LCS Delay values as `(buffer_duration - chunk_duration) / model_stride_in_secs`\"\n", " )\n", "\n", " self.infer_logits()\n", @@ -1216,7 +1216,7 @@ "worse_idx = find_first_sample_with_alignment(lcs_alignments_path, lcs_worse, start_idx=0)\n", "worse_sample = lcs_worse[worse_idx]\n", "\n", - "print(\"A sample where LCS did worse than Middle Token merge algoritm :\")\n", + "print(\"A sample where LCS did worse than Middle Token merge algorithm :\")\n", "print(\"The texts are structured as (Ground Truth, Middle Token, LCS Merge)\")\n", "worse_sample" ], @@ -1256,7 +1256,7 @@ "better_idx = find_first_sample_with_alignment(lcs_alignments_path, lcs_better, start_idx=0)\n", "better_sample = lcs_better[better_idx]\n", "\n", - "print(\"A sample where LCS did better than Middle Token merge algoritm :\")\n", + "print(\"A sample where LCS did better than Middle Token merge algorithm :\")\n", "print(\"The texts are structured as (Ground Truth, Middle Token, LCS Merge)\")\n", "better_sample" ], diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 1f909eefdfd5..e7cd74a21d77 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -321,7 +321,7 @@ " topk_idxs = np.argpartition(query_scores, -k)[-k:]\n", " topk_cids = [test_kb_cids[idx] for idx in topk_idxs]\n", " \n", - " # If the correct query ID is amoung the top k closest kb IDs\n", + " # If the correct query ID is among the top k closest kb IDs\n", " # the model correctly linked the entity\n", " match = int(query_cid in topk_cids)\n", " accs[k] += match\n", diff --git a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb index 3e8f90511780..8b2474597819 100644 --- a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb +++ b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb @@ -21,7 +21,7 @@ "import os\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.18.0'\n", "\n", "GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n", "\n", diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index 0a3d9073e96e..74cc70e1db80 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -311,7 +311,7 @@ " full_msgs.append(delimiter.join(str_items)+eod_str)\n", " else:\n", " full_msgs.append(delimiter.join(str_items))\n", - " # use end of line to seperate rows\n", + " # use end of line to separate rows\n", " text = '\\n'.join(full_msgs)\n", " text_doc = {'text': text}\n", " doc = json.dumps(text_doc)+'\\n'\n", @@ -739,7 +739,7 @@ " '\\n')[history_rows:]]) for s in sentences]\n", " return extra_text\n", "\n", - "# generate the inital transactions \n", + "# generate the initial transactions \n", "data = {\n", " \"sentences\": [\"\"] * batch_size,\n", " \"tokens_to_generate\": num_of_rows * token_per_rows,\n", diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index bd5e09e7c1f9..004014ebdeeb 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -311,7 +311,7 @@ "### Prompt Formatting\n", "Now that we have our dataset, lets define what we want the prompt to look like. \n", "\n", - "The squad dataset json files contain fields named \"context\", \"question\" and \"answer\". The prompt formatting template allows us to arrange these fields and decide where to insert virtual prompts. We can add the `<|VIRTUAL_PROMPT_0|>` token anywere between the fields (although we recommend simply adding it in the leftmost position will be sufficient).\n", + "The squad dataset json files contain fields named \"context\", \"question\" and \"answer\". The prompt formatting template allows us to arrange these fields and decide where to insert virtual prompts. We can add the `<|VIRTUAL_PROMPT_0|>` token anywhere between the fields (although we recommend simply adding it in the leftmost position will be sufficient).\n", "\n", "For example, given a data jsonl file with examples like this: \n", "\n", diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 915c034dff55..d60f98aebd40 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -327,7 +327,7 @@ "metadata": {}, "outputs": [], "source": [ - "# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n", + "# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n", "config.model.dataset.data_dir = os.path.join(DATA_DIR, 'NER')\n", "\n", "# if you want to decrease the size of your datasets, uncomment the lines below:\n", @@ -385,7 +385,7 @@ "metadata": {}, "outputs": [], "source": [ - "# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n", + "# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n", "config.model.dataset.data_dir = os.path.join(DATA_DIR, 'NER')\n", "\n", "# if you want to decrease the size of your datasets, uncomment the lines below:\n", diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index e61b5a76d3a2..b95ea27f478f 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -474,7 +474,7 @@ "colab": {} }, "source": [ - "# in this tutorial train and dev datasets are located in the same folder, so it is enought to add the path of the data directory to the config\n", + "# in this tutorial train and dev datasets are located in the same folder, so it is enough to add the path of the data directory to the config\n", "config.model.dataset.data_dir = DATA_DIR\n", "\n", "# if you want to use the full dataset, set NUM_SAMPLES to -1\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index d9282f6317f0..20cc1f9ff37d 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -49,7 +49,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This tutorial covers speaker diarization inference. We will cover how to setup configurations and launch NeMo speaker diarization system with a few different settings. NeMo speaker diarization pipline includes the following steps as described in the above figure: VAD, Segmentation, Speaker Embedding Extraction, Clustering and Neural Diarizer. We will explain what each module does and we will run NeMo speaker diarization system on a small toy example. " + "This tutorial covers speaker diarization inference. We will cover how to setup configurations and launch NeMo speaker diarization system with a few different settings. NeMo speaker diarization pipeline includes the following steps as described in the above figure: VAD, Segmentation, Speaker Embedding Extraction, Clustering and Neural Diarizer. We will explain what each module does and we will run NeMo speaker diarization system on a small toy example. " ] }, { @@ -112,7 +112,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The information from each scale is combined by calculating the weighted sum of affininty matrix. An affinity matrix is calculated by cosine similarity value between all the segments (and corresponding embedding vectors) in that scale. Once affinity matrix for each sacle is calculated, we calculate a weighted sum on all the affinity matrices calculated as in the below figure." + "The information from each scale is combined by calculating the weighted sum of affininty matrix. An affinity matrix is calculated by cosine similarity value between all the segments (and corresponding embedding vectors) in that scale. Once affinity matrix for each scale is calculated, we calculate a weighted sum on all the affinity matrices calculated as in the below figure." ] }, { @@ -171,7 +171,7 @@ "metadata": {}, "source": [ "#### Neural Diarizer\n", - "In NeMo speaker diarization pipeline, the term **neural diarizer** referes to trainable neural modules that estimate speaker labels from the given feature or audio input. Neural diarizer contrasts with **clustering diarizer** in a way that clustering diarizer is not a trainable module. Neural diarizer is needed to enable overlap-aware diarization, more improved accucy and joint training with speaker embedding models using multispeaker datasets (diarization training datasets).\n", + "In NeMo speaker diarization pipeline, the term **neural diarizer** refers to trainable neural modules that estimate speaker labels from the given feature or audio input. Neural diarizer contrasts with **clustering diarizer** in a way that clustering diarizer is not a trainable module. Neural diarizer is needed to enable overlap-aware diarization, more improved accucy and joint training with speaker embedding models using multispeaker datasets (diarization training datasets).\n", "\n", "#### Multi-scale Diarization Decoder (MSDD)\n", "Currently, you can use Multi-scale Diarization Decoder (MSDD) model as a neural diarizer. MSDD models use clustering diarizer for obtaining the estimated speaker profile of each speaker and the estimated number of speakers. The below figure shows training and inference of MSDD model." @@ -375,7 +375,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using **oracle VAD** for speaker diarization can be regarded as performing a diarization inference based on ground-truth speech/non-speech labels. The motivation behind using oracle-VAD is to factor out the influence of VAD performane when we evaluate a speaker diarization system. Speaker diarization with oracle-VAD can also be used to run speaker diarization with rttms generated from any external VAD, not just VAD model from NeMo.\n", + "Using **oracle VAD** for speaker diarization can be regarded as performing a diarization inference based on ground-truth speech/non-speech labels. The motivation behind using oracle-VAD is to factor out the influence of VAD performance when we evaluate a speaker diarization system. Speaker diarization with oracle-VAD can also be used to run speaker diarization with rttms generated from any external VAD, not just VAD model from NeMo.\n", "\n", "The first step is to start converting reference audio RTTM file (containing VAD output) timestamps to oracle manifest file. This manifest file would be sent to our speaker diarizer to extract embeddings.\n", "\n", diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 4d5a9a35b2ec..38e6c2c9d6ff 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -294,7 +294,7 @@ "1. `audio_filepath`: location of the wav file;\n", "2. `duration`: duration of the wav file;\n", "3. `text`: original text;\n", - "4. `normalized_text`: normalized text through our text normalization pipline.\n", + "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", "Please refer to [sfspeech-chinese-english-bilingual-speech](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#sfspeech-chinese-english-bilingual-speech) for more details about the SFSpeech dataset. \n", "\n", @@ -440,7 +440,7 @@ "id": "35f2f667", "metadata": {}, "source": [ - "Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to overide `pitch_mean` and `pitch_std` configs below." + "Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to override `pitch_mean` and `pitch_std` configs below." ] }, { diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index b7603d37e374..b6c5463bb8c0 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -170,11 +170,11 @@ "1. `audio_filepath`: location of the wav file;\n", "2. `duration`: duration of the wav file;\n", "3. `text`: original text;\n", - "4. `normalized_text`: normalized text through our text normalization pipline.\n", + "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", "\n", - "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to dowload and normalize the entire dataset." + "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset." ] }, { @@ -316,7 +316,7 @@ "id": "d4364261", "metadata": {}, "source": [ - "Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to overide `pitch_mean` and `pitch_std` configs below." + "Now we are ready for training our model! Let's try to train FastPitch. Copy and Paste the `PITCH_MEAN` and `PITCH_STD` from previous steps to override `pitch_mean` and `pitch_std` configs below." ] }, { diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index cf7a4dc9f697..ab1fc6ce12a8 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -290,7 +290,7 @@ " spec_shift, audio_shift, durs_shift_pred, _ = str_to_audio(input_string, pitch=pitch_shift)\n", " # NOTE: We do not plot the pitch returned from str_to_audio.\n", " # When we override the pitch, we want to plot the pitch that override the model with.\n", - " # In thise case, it is `pitch_shift`\n", + " # In these case, it is `pitch_shift`\n", "\n", "# Let's see both results\n", "print(\"The first unshifted sample\")\n", From 354c9cb66303e85ac33a4e3ec38e84c986d4065c Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Tue, 25 Apr 2023 17:19:25 -0700 Subject: [PATCH 036/512] Add disclaimer about dataset for ASR (#6496) Signed-off-by: smajumdar --- tutorials/asr/ASR_CTC_Language_Finetuning.ipynb | 1 + tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb | 2 ++ tutorials/asr/ASR_for_telephony_speech.ipynb | 1 + tutorials/asr/ASR_with_NeMo.ipynb | 1 + tutorials/asr/ASR_with_Subword_Tokenization.ipynb | 1 + tutorials/asr/ASR_with_Transducers.ipynb | 1 + tutorials/asr/Buffered_Transducer_Inference.ipynb | 1 + .../asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb | 1 + tutorials/asr/Intro_to_Transducers.ipynb | 1 + tutorials/asr/Multilang_ASR.ipynb | 3 +++ tutorials/asr/Offline_ASR.ipynb | 1 + tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb | 1 + tutorials/asr/Online_ASR_Microphone_Demo.ipynb | 1 + tutorials/asr/Online_Noise_Augmentation.ipynb | 1 + tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb | 1 + tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb | 1 + tutorials/asr/Self_Supervised_Pre_Training.ipynb | 1 + tutorials/asr/Speech_Commands.ipynb | 1 + tutorials/asr/Streaming_ASR.ipynb | 1 + tutorials/asr/Voice_Activity_Detection.ipynb | 1 + tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb | 1 + 21 files changed, 24 insertions(+) diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index 268501558c77..cea5956f10ed 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -30,6 +30,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "\n", "# Install dependencies\n", diff --git a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb index 473fa9cfdc71..5293f85044fc 100644 --- a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb +++ b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb @@ -7,6 +7,8 @@ "source": [ "# Example: Training Esperanto ASR model using Mozilla Common Voice Dataset\n", "\n", + "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", + "\n", "\n", "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/main/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", "\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index 5d214ae0e719..1db6a631d6bc 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -17,6 +17,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index 5ff13b9e1847..e843e93ec599 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -43,6 +43,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index e5204549af1c..2d540930733a 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -31,6 +31,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "\n", "# Install dependencies\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index d2c59f02393a..59ef8c17d40b 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -17,6 +17,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "import os\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index 822256cbfcbe..7ae01cbace8b 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -17,6 +17,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index e409dcb90b35..d4caac73899e 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -35,6 +35,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index 1d2fd0dde4d5..3af72397866a 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -32,6 +32,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "import os\n", diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 4e58b0607d32..eba666bdf808 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -10,6 +10,9 @@ "This notebook helps you get started with NeMo multilingual ASR models; i.e. models that can transcribe audio in more than one language. You will learn how to use an existing pre-trained multilingual model for transcription, as well as how to create and train a new one.\n", "\n", "\n", + "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", + "\n", + "\n", "Please note that the current NeMo implementation is limited to models with subword tokenization. \n", "\n", "\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 22df23009392..9d134c99354b 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -31,6 +31,7 @@ "\n", "You may find more info on how to train and use language models for ASR models here:\n", "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n" + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", ] }, { diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index a209f29ab7ee..04125a030006 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -15,6 +15,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 2579d9fa51ae..da38a15ebf8f 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -16,6 +16,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Set up microphone for Colab\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index 49b14402e09e..bf21c7992089 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -22,6 +22,7 @@ "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index 93398903d4ff..940b19605a7d 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -16,6 +16,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Set up microphone for Colab\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index db65b35e7149..c956f3979227 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -18,6 +18,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Set up microphone for Colab\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 49a0dd9ed8c2..c6f50f5595d7 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -17,6 +17,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 34e0457d30f4..245d64c1f45a 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -51,6 +51,7 @@ "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index af625b7781a0..0bd437e06cf0 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -17,6 +17,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 85d9061c6b43..030f2066a6ab 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -18,6 +18,7 @@ "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 143be330e082..95f3aaec1d87 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -38,6 +38,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "import os\n", From 00f5e9b5b7949fbff598479980f51b8230e65e19 Mon Sep 17 00:00:00 2001 From: George <37293288+Jorjeous@users.noreply.github.com> Date: Wed, 26 Apr 2023 20:33:01 +0400 Subject: [PATCH 037/512] fix (#6502) datastore_path_to_webdataset_url(p) if is_datastore_path(p) and is_tarred_path(p) else p NameError: name 'is_tarred_path' is not defined Co-authored-by: George --- nemo/collections/asr/data/audio_to_text.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/collections/asr/data/audio_to_text.py b/nemo/collections/asr/data/audio_to_text.py index 756c05631627..d61f0e1f69ef 100644 --- a/nemo/collections/asr/data/audio_to_text.py +++ b/nemo/collections/asr/data/audio_to_text.py @@ -38,6 +38,7 @@ datastore_path_to_webdataset_url, is_datastore_cache_shared, is_datastore_path, + is_tarred_path, ) from nemo.utils.get_rank import is_global_rank_zero From 44b8481c5aaaad62ee05c912e7b3d83d23639f90 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 26 Apr 2023 13:52:48 -0700 Subject: [PATCH 038/512] fix broken links r1.18.0 (#6501) (#6504) * fix broken links * fix broken links --------- Signed-off-by: Evelina Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> --- tutorials/tools/CTC_Segmentation_Tutorial.ipynb | 4 ++-- tutorials/tts/Pronunciation_customization.ipynb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 1a0c78173714..e2419273c5e4 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`tutorials/text_processing/Text_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/text_processing/Text_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. Note, the audio-based normalization of long audio samples is not supported due to multiple normalization options. See [NeMo/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo/blob/stable/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", @@ -714,4 +714,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index 78635ed6fc22..1e762b86cf9d 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -58,7 +58,7 @@ "* *[heteronyms](https://en.wikipedia.org/wiki/Heteronym_(linguistics))* - words with the same spelling but different pronunciations and/or meanings, e.g., *bass* (the fish) and *bass* (the musical instrument).\n", "\n", "#### Important NeMo flags:\n", - "* `your_spec_generator_model.vocab.g2p.phoneme_dict` - phoneme dictionary that maps words to their phonetic transcriptions, e.g., [ARPABET-based CMU Dictionary](https://github.com/NVIDIA/NeMo/blob/r1.14.0/scripts/tts_dataset_files/cmudict-0.7b_nv22.10) or [IPA-based CMU Dictionary](https://github.com/NVIDIA/NeMo/blob/r1.14.0/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt)\n", + "* `your_spec_generator_model.vocab.g2p.phoneme_dict` - phoneme dictionary that maps words to their phonetic transcriptions, e.g., [ARPABET-based CMU Dictionary](https://raw.githubusercontent.com/NVIDIA/NeMo/stable/scripts/tts_dataset_files/cmudict-0.7b_nv22.10) or [IPA-based CMU Dictionary](https://github.com/NVIDIA/NeMo/blob/stable/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt)\n", "* `your_spec_generator_model.vocab.g2p.heteronyms` - list of the model's heteronyms, grapheme form of these words will be used even if the word is present in the phoneme dictionary.\n", "* `your_spec_generator_model.vocab.g2p.ignore_ambiguous_words`: if is set to **True**, words with more than one phonetic representation in the pronunciation dictionary are ignored. This flag is relevant to the words with multiple valid phonetic transcriptions in the dictionary that are not in `your_spec_generator_model.vocab.g2p.heteronyms` list.\n", "* `your_spec_generator_model.vocab.phoneme_probability` - phoneme probability flag in the Tokenizer and the same from in the G2P module: `your_spec_generator_model.vocab.g2p.phoneme_probability` ([0, 1]). If a word is present in the phoneme dictionary, we still want our TTS model to see graphemes and phonemes during training to handle OOV words during inference. The `phoneme_probability` determines the probability of an unambiguous dictionary word appearing in phonetic form during model training, `(1 - phoneme_probability)` is the probability of the graphemes. This flag is set to `1` in the parse() method during inference.\n", From e5d1923cb0925c5baceef774e177843acb397cff Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Thu, 27 Apr 2023 09:49:09 -0700 Subject: [PATCH 039/512] [TTS] Create functions for TTS preprocessing without dataloader (#6317) * [TTS] Create functions for TTS preprocessing without dataloader Signed-off-by: Ryan --- examples/tts/conf/features/feature_22050.yaml | 28 ++ examples/tts/conf/features/feature_44100.yaml | 28 ++ .../tts/parts/preprocessing/features.py | 335 ++++++++++++++++++ .../tts/parts/utils/tts_dataset_utils.py | 41 ++- .../tts/audio_processing/preprocess_audio.py | 4 +- .../tts/compute_features.py | 90 +++++ .../tts/compute_speaker_stats.py | 26 +- .../tts/parts/preprocessing/test_features.py | 183 ++++++++++ .../tts/parts/utils/test_tts_dataset_utils.py | 55 +++ 9 files changed, 774 insertions(+), 16 deletions(-) create mode 100644 examples/tts/conf/features/feature_22050.yaml create mode 100644 examples/tts/conf/features/feature_44100.yaml create mode 100644 nemo/collections/tts/parts/preprocessing/features.py create mode 100644 scripts/dataset_processing/tts/compute_features.py create mode 100644 tests/collections/tts/parts/preprocessing/test_features.py create mode 100644 tests/collections/tts/parts/utils/test_tts_dataset_utils.py diff --git a/examples/tts/conf/features/feature_22050.yaml b/examples/tts/conf/features/feature_22050.yaml new file mode 100644 index 000000000000..c5779500bc3c --- /dev/null +++ b/examples/tts/conf/features/feature_22050.yaml @@ -0,0 +1,28 @@ +sample_rate: 22050 +win_length: 1024 +hop_length: 256 + +mel_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer + sample_rate: ${sample_rate} + win_length: ${win_length} + hop_length: ${hop_length} + mel_dim: 80 + lowfreq: 0 + highfreq: 8000 + +pitch_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer + sample_rate: ${sample_rate} + win_length: ${win_length} + hop_length: ${hop_length} + pitch_fmin: 60 + pitch_fmax: 640 + +energy_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer + spec_featurizer: ${mel_feature} + +featurizers: + pitch: ${pitch_feature} + energy: ${energy_feature} diff --git a/examples/tts/conf/features/feature_44100.yaml b/examples/tts/conf/features/feature_44100.yaml new file mode 100644 index 000000000000..0cfc27f4dab3 --- /dev/null +++ b/examples/tts/conf/features/feature_44100.yaml @@ -0,0 +1,28 @@ +sample_rate: 44100 +win_length: 2048 +hop_length: 512 + +mel_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer + sample_rate: ${sample_rate} + win_length: ${win_length} + hop_length: ${hop_length} + mel_dim: 80 + lowfreq: 0 + highfreq: null + +pitch_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer + sample_rate: ${sample_rate} + win_length: ${win_length} + hop_length: ${hop_length} + pitch_fmin: 60 + pitch_fmax: 640 + +energy_feature: + _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer + spec_featurizer: ${mel_feature} + +featurizers: + pitch: ${pitch_feature} + energy: ${energy_feature} diff --git a/nemo/collections/tts/parts/preprocessing/features.py b/nemo/collections/tts/parts/preprocessing/features.py new file mode 100644 index 000000000000..675d61adeebe --- /dev/null +++ b/nemo/collections/tts/parts/preprocessing/features.py @@ -0,0 +1,335 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Dict, Optional, Tuple, Union + +import librosa +import numpy as np +import torch +from torch import Tensor + +from nemo.collections.asr.modules import AudioToMelSpectrogramPreprocessor +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_audio_filepaths +from nemo.utils.decorators import experimental + + +@experimental +class Featurizer(ABC): + @abstractmethod + def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + """ + Save feature value to disk for given manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is stored. + feature_dir: base directory where features will be stored. + """ + + @abstractmethod + def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + """ + Read saved feature value for given manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is stored. + feature_dir: base directory where features were stored by save(). + + Returns: + Dictionary of feature names to Tensors + """ + + +def _get_feature_filepath(manifest_entry: dict, audio_dir: Path, feature_dir: Path, feature_name: str) -> Path: + """ + Get the absolute path for the feature file corresponding to the input manifest entry + + Example: audio_filepath "/speaker1/audio1.wav" becomes + feature_filepath "//speaker1/audio1.pt" + """ + _, audio_filepath_rel = get_audio_filepaths(manifest_entry=manifest_entry, audio_dir=audio_dir) + feature_filepath = feature_dir / feature_name / audio_filepath_rel.with_suffix(".pt") + return feature_filepath + + +def _save_pt_feature( + feature_name: Optional[str], feature_tensor: Tensor, manifest_entry: Dict, audio_dir: Path, feature_dir: Path, +) -> None: + """ + If feature_name is provided, save feature as .pt file. + """ + if feature_name is None: + return + + feature_filepath = _get_feature_filepath( + manifest_entry=manifest_entry, audio_dir=audio_dir, feature_dir=feature_dir, feature_name=feature_name + ) + feature_filepath.parent.mkdir(exist_ok=True, parents=True) + torch.save(feature_tensor, feature_filepath) + + +def _load_pt_feature( + feature_dict: Dict, feature_name: Optional[str], manifest_entry: Dict, audio_dir: Path, feature_dir: Path, +) -> None: + """ + If feature_name is provided, load feature into feature_dict from .pt file. + """ + + if feature_name is None: + return + + feature_filepath = _get_feature_filepath( + manifest_entry=manifest_entry, audio_dir=audio_dir, feature_dir=feature_dir, feature_name=feature_name + ) + feature_tensor = torch.load(feature_filepath) + feature_dict[feature_name] = feature_tensor + + +class MelSpectrogramFeaturizer: + def __init__( + self, + feature_name: str = "mel_spec", + sample_rate: int = 22050, + mel_dim: int = 80, + win_length: int = 1024, + hop_length: int = 256, + lowfreq: int = 0, + highfreq: int = 8000, + log: bool = True, + log_zero_guard_type: str = "add", + log_zero_guard_value: float = 1.0, + mel_norm: Optional[Union[str, int]] = None, + ) -> None: + self.feature_name = feature_name + self.sample_rate = sample_rate + self.win_length = win_length + self.hop_length = hop_length + + self.preprocessor = AudioToMelSpectrogramPreprocessor( + sample_rate=sample_rate, + features=mel_dim, + pad_to=1, + n_window_size=win_length, + n_window_stride=hop_length, + window_size=False, + window_stride=False, + n_fft=win_length, + lowfreq=lowfreq, + highfreq=highfreq, + log=log, + log_zero_guard_type=log_zero_guard_type, + log_zero_guard_value=log_zero_guard_value, + mel_norm=mel_norm, + ) + + def compute_mel_spec(self, manifest_entry: dict, audio_dir: Path) -> Tensor: + """ + Computes mel spectrogram for the input manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is store + + Returns: + [spec_dim, T_spec] float tensor containing spectrogram features. + """ + + audio_filepath, _ = get_audio_filepaths(manifest_entry=manifest_entry, audio_dir=audio_dir) + audio, _ = librosa.load(path=audio_filepath, sr=self.sample_rate) + + # [1, T_audio] + audio_tensor = torch.tensor(audio[np.newaxis, :], dtype=torch.float32) + # [1] + audio_len_tensor = torch.tensor([audio.shape[0]], dtype=torch.int32) + + # [1, spec_dim, T_spec] + spec_tensor, _ = self.preprocessor(input_signal=audio_tensor, length=audio_len_tensor) + # [spec_dim, T_spec] + spec_tensor = spec_tensor.detach()[0] + + return spec_tensor + + def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + spec_tensor = self.compute_mel_spec(manifest_entry=manifest_entry, audio_dir=audio_dir) + _save_pt_feature( + feature_name=self.feature_name, + feature_tensor=spec_tensor, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + + def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + feature_dict = {} + _load_pt_feature( + feature_dict=feature_dict, + feature_name=self.feature_name, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + return feature_dict + + +class EnergyFeaturizer: + def __init__(self, spec_featurizer: MelSpectrogramFeaturizer, feature_name: str = "energy") -> None: + self.feature_name = feature_name + self.spec_featurizer = spec_featurizer + + def compute_energy(self, manifest_entry: dict, audio_dir: Path) -> Tensor: + """ + Computes energy for the input manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is store + + Returns: + [T_spec] float tensor containing energy features. + """ + # [1, T_audio] + spec = self.spec_featurizer.compute_mel_spec(manifest_entry=manifest_entry, audio_dir=audio_dir) + # [T_audio] + energy = torch.linalg.norm(spec, axis=0) + + return energy + + def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + energy_tensor = self.compute_energy(manifest_entry=manifest_entry, audio_dir=audio_dir) + _save_pt_feature( + feature_name=self.feature_name, + feature_tensor=energy_tensor, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + + def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + feature_dict = {} + _load_pt_feature( + feature_dict=feature_dict, + feature_name=self.feature_name, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + return feature_dict + + +class PitchFeaturizer: + def __init__( + self, + pitch_name: Optional[str] = "pitch", + voiced_mask_name: Optional[str] = "voiced_mask", + voiced_prob_name: Optional[str] = None, + sample_rate: int = 22050, + win_length: int = 1024, + hop_length: int = 256, + pitch_fmin: int = librosa.note_to_hz('C2'), + pitch_fmax: int = librosa.note_to_hz('C7'), + ) -> None: + self.pitch_name = pitch_name + self.voiced_mask_name = voiced_mask_name + self.voiced_prob_name = voiced_prob_name + self.sample_rate = sample_rate + self.win_length = win_length + self.hop_length = hop_length + self.pitch_fmin = pitch_fmin + self.pitch_fmax = pitch_fmax + + def compute_pitch(self, manifest_entry: dict, audio_dir: Path) -> Tuple[Tensor, Tensor, Tensor]: + """ + Computes pitch and optional voiced mask for the input manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is store + + Returns: + pitch: [T_spec] float tensor containing pitch for each audio frame. + voiced_mask: [T_spec] bool tensor indicating whether each audio frame is voiced. + voiced_prob: [T_spec] float array with [0, 1] probability that each audio frame is voiced. + """ + audio_filepath, _ = get_audio_filepaths(manifest_entry=manifest_entry, audio_dir=audio_dir) + audio, _ = librosa.load(path=audio_filepath, sr=self.sample_rate) + + pitch, voiced_mask, voiced_prob = librosa.pyin( + audio, + fmin=self.pitch_fmin, + fmax=self.pitch_fmax, + frame_length=self.win_length, + hop_length=self.hop_length, + sr=self.sample_rate, + fill_na=0.0, + ) + pitch_tensor = torch.tensor(pitch, dtype=torch.float32) + voiced_mask_tensor = torch.tensor(voiced_mask, dtype=torch.bool) + voiced_prob_tensor = torch.tensor(voiced_prob, dtype=torch.float32) + + return pitch_tensor, voiced_mask_tensor, voiced_prob_tensor + + def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + pitch_tensor, voiced_mask_tensor, voiced_prob_tensor = self.compute_pitch( + manifest_entry=manifest_entry, audio_dir=audio_dir + ) + _save_pt_feature( + feature_name=self.pitch_name, + feature_tensor=pitch_tensor, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + _save_pt_feature( + feature_name=self.voiced_mask_name, + feature_tensor=voiced_mask_tensor, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + _save_pt_feature( + feature_name=self.voiced_prob_name, + feature_tensor=voiced_prob_tensor, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + + def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + feature_dict = {} + _load_pt_feature( + feature_dict=feature_dict, + feature_name=self.pitch_name, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + _load_pt_feature( + feature_dict=feature_dict, + feature_name=self.voiced_mask_name, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + _load_pt_feature( + feature_dict=feature_dict, + feature_name=self.voiced_prob_name, + manifest_entry=manifest_entry, + audio_dir=audio_dir, + feature_dir=feature_dir, + ) + return feature_dict diff --git a/nemo/collections/tts/parts/utils/tts_dataset_utils.py b/nemo/collections/tts/parts/utils/tts_dataset_utils.py index 039bc7398d3a..f07b2a9a5b74 100644 --- a/nemo/collections/tts/parts/utils/tts_dataset_utils.py +++ b/nemo/collections/tts/parts/utils/tts_dataset_utils.py @@ -24,24 +24,41 @@ from torch.special import gammaln -def get_audio_paths(audio_path: Path, base_path: Path) -> Tuple[Path, Path]: - if os.path.isabs(audio_path): - abs_path = audio_path - rel_path = audio_path.relative_to(base_path) +def get_abs_rel_paths(input_path: Path, base_path: Path) -> Tuple[Path, Path]: + """ + Get the absolute and relative paths of input file path. + + Args: + input_path: An absolute or relative path. + base_path: base directory the input is relative to. + + Returns: + The absolute and relative paths of the file. + """ + if os.path.isabs(input_path): + abs_path = input_path + rel_path = input_path.relative_to(base_path) else: - rel_path = audio_path + rel_path = input_path abs_path = base_path / rel_path return abs_path, rel_path -def get_sup_data_file_path(entry: dict, base_audio_path: Path, sup_data_path: Path) -> Path: - audio_path = Path(entry["audio_filepath"]) - rel_audio_path = audio_path.relative_to(base_audio_path).with_suffix("") - audio_id = str(rel_audio_path).replace(os.sep, "_") - file_name = f"{audio_id}.pt" - file_path = Path(os.path.join(sup_data_path, file_name)) - return file_path +def get_audio_filepaths(manifest_entry: dict, audio_dir: Path) -> Tuple[Path, Path]: + """ + Get the absolute and relative paths of audio from a manifest entry. + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is stored. + + Returns: + The absolute and relative paths of the audio. + """ + audio_filepath = Path(manifest_entry["audio_filepath"]) + audio_filepath_abs, audio_filepath_rel = get_abs_rel_paths(input_path=audio_filepath, base_path=audio_dir) + return audio_filepath_abs, audio_filepath_rel def normalize_volume(audio: np.array, volume_level: float) -> np.array: diff --git a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py b/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py index 515e81551542..b0a4be54da33 100644 --- a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py +++ b/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py @@ -52,7 +52,7 @@ from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest from nemo.collections.tts.parts.preprocessing.audio_trimming import AudioTrimmer -from nemo.collections.tts.parts.utils.tts_dataset_utils import get_audio_paths, normalize_volume +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_abs_rel_paths, normalize_volume from nemo.utils import logging @@ -116,7 +116,7 @@ def _process_entry( ) -> Tuple[dict, float, float]: audio_filepath = Path(entry["audio_filepath"]) - audio_path, audio_path_rel = get_audio_paths(audio_path=audio_filepath, base_path=input_audio_dir) + audio_path, audio_path_rel = get_abs_rel_paths(input_path=audio_filepath, base_path=input_audio_dir) output_path = output_audio_dir / audio_path_rel output_path.parent.mkdir(exist_ok=True, parents=True) diff --git a/scripts/dataset_processing/tts/compute_features.py b/scripts/dataset_processing/tts/compute_features.py new file mode 100644 index 000000000000..7b847eb571be --- /dev/null +++ b/scripts/dataset_processing/tts/compute_features.py @@ -0,0 +1,90 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script computes features for TTS models prior to training, such as pitch and energy. +The resulting features will be stored in the provided 'feature_dir'. + +$ python /scripts/dataset_processing/tts/compute_features.py \ + --feature_config_path=/examples/tts/conf/features/feature_22050.yaml \ + --manifest_path=/manifest.json \ + --audio_dir=/audio \ + --feature_dir=/features \ + --num_workers=1 +""" + +import argparse +from pathlib import Path + +from hydra.utils import instantiate +from joblib import Parallel, delayed +from omegaconf import OmegaConf +from tqdm import tqdm + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Compute TTS features.", + ) + parser.add_argument( + "--feature_config_path", required=True, type=Path, help="Path to feature config file.", + ) + parser.add_argument( + "--manifest_path", required=True, type=Path, help="Path to training manifest.", + ) + parser.add_argument( + "--audio_dir", required=True, type=Path, help="Path to base directory with audio data.", + ) + parser.add_argument( + "--feature_dir", required=True, type=Path, help="Path to directory where feature data will be stored.", + ) + parser.add_argument( + "--num_workers", default=1, type=int, help="Number of parallel threads to use. If -1 all CPUs are used." + ) + args = parser.parse_args() + return args + + +def main(): + args = get_args() + feature_config_path = args.feature_config_path + manifest_path = args.manifest_path + audio_dir = args.audio_dir + feature_dir = args.feature_dir + num_workers = args.num_workers + + if not manifest_path.exists(): + raise ValueError(f"Manifest {manifest_path} does not exist.") + + if not audio_dir.exists(): + raise ValueError(f"Audio directory {audio_dir} does not exist.") + + feature_config = OmegaConf.load(feature_config_path) + feature_config = instantiate(feature_config) + featurizers = feature_config.featurizers + + entries = read_manifest(manifest_path) + + for feature_name, featurizer in featurizers.items(): + print(f"Computing: {feature_name}") + Parallel(n_jobs=num_workers)( + delayed(featurizer.save)(manifest_entry=entry, audio_dir=audio_dir, feature_dir=feature_dir,) + for entry in tqdm(entries) + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/dataset_processing/tts/compute_speaker_stats.py b/scripts/dataset_processing/tts/compute_speaker_stats.py index 253b6a6444c5..5061edb216c9 100644 --- a/scripts/dataset_processing/tts/compute_speaker_stats.py +++ b/scripts/dataset_processing/tts/compute_speaker_stats.py @@ -35,7 +35,7 @@ from tqdm import tqdm from nemo.collections.asr.parts.utils.manifest_utils import read_manifest -from nemo.collections.tts.parts.utils.tts_dataset_utils import get_base_dir, get_sup_data_file_path +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_base_dir from nemo.collections.tts.torch.tts_data_types import Pitch from nemo.utils import logging @@ -67,6 +67,28 @@ def _compute_stats(values: List[torch.Tensor]) -> Tuple[float, float]: return mean, std +def _get_sup_data_filepath(manifest_entry: dict, audio_dir: Path, sup_data_dir: Path) -> Path: + """ + Get the absolute path of a supplementary data type for the input manifest entry. + + Example: audio_filepath "/speaker1/audio1.wav" becomes "/speaker1_audio1.pt" + + Args: + manifest_entry: Manifest entry dictionary. + audio_dir: base directory where audio is stored. + sup_data_dir: base directory where supplementary data is stored. + + Returns: + Path to the supplementary data file. + """ + audio_path = Path(manifest_entry["audio_filepath"]) + rel_audio_path = audio_path.relative_to(audio_dir) + rel_sup_data_path = rel_audio_path.with_suffix(".pt") + sup_data_filename = str(rel_sup_data_path).replace(os.sep, "_") + sup_data_filepath = sup_data_dir / sup_data_filename + return sup_data_filepath + + def main(): args = get_args() manifest_path = args.manifest_path @@ -88,7 +110,7 @@ def main(): global_pitch_values = [] speaker_pitch_values = defaultdict(list) for entry in tqdm(entries): - pitch_path = get_sup_data_file_path(entry, base_dir, pitch_data_path) + pitch_path = _get_sup_data_filepath(manifest_entry=entry, audio_dir=base_dir, sup_data_dir=pitch_data_path) if not os.path.exists(pitch_path): logging.warning(f"Unable to find pitch file for {entry}") continue diff --git a/tests/collections/tts/parts/preprocessing/test_features.py b/tests/collections/tts/parts/preprocessing/test_features.py new file mode 100644 index 000000000000..873e112b2158 --- /dev/null +++ b/tests/collections/tts/parts/preprocessing/test_features.py @@ -0,0 +1,183 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import contextlib +import tempfile +from pathlib import Path + +import numpy as np +import pytest +import soundfile as sf +import torch + +from nemo.collections.tts.parts.preprocessing.features import ( + EnergyFeaturizer, + MelSpectrogramFeaturizer, + PitchFeaturizer, +) + + +class TestTTSFeatures: + def setup_class(self): + self.audio_filename = "test.wav" + self.spec_dim = 80 + self.hop_len = 100 + self.audio_len = 10000 + self.sample_rate = 20000 + self.spec_len = 1 + (self.audio_len // self.hop_len) + self.manifest_entry = {"audio_filepath": self.audio_filename} + + @contextlib.contextmanager + def _create_test_dir(self): + test_audio = np.random.uniform(size=[self.audio_len]) + temp_dir = tempfile.TemporaryDirectory() + try: + test_dir = Path(temp_dir.name) + audio_path = test_dir / self.audio_filename + sf.write(audio_path, test_audio, self.sample_rate) + yield test_dir + finally: + temp_dir.cleanup() + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_compute_mel_spectrogram(self): + mel_featurizer = MelSpectrogramFeaturizer( + mel_dim=self.spec_dim, hop_length=self.hop_len, sample_rate=self.sample_rate + ) + + with self._create_test_dir() as test_dir: + spec = mel_featurizer.compute_mel_spec(manifest_entry=self.manifest_entry, audio_dir=test_dir) + + assert len(spec.shape) == 2 + assert spec.dtype == torch.float32 + assert spec.shape[0] == self.spec_dim + assert spec.shape[1] == self.spec_len + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_save_and_load_mel_spectrogram(self): + mel_name = "mel_test" + mel_featurizer = MelSpectrogramFeaturizer( + feature_name=mel_name, mel_dim=self.spec_dim, hop_length=self.hop_len, sample_rate=self.sample_rate + ) + + with self._create_test_dir() as test_dir: + feature_dir = test_dir / "feature" + mel_featurizer.save(manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir) + mel_dict = mel_featurizer.load( + manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir + ) + + mel_spec = mel_dict[mel_name] + assert len(mel_spec.shape) == 2 + assert mel_spec.dtype == torch.float32 + assert mel_spec.shape[0] == self.spec_dim + assert mel_spec.shape[1] == self.spec_len + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_compute_pitch(self): + pitch_featurizer = PitchFeaturizer(hop_length=self.hop_len, sample_rate=self.sample_rate) + + with self._create_test_dir() as test_dir: + pitch, voiced, voiced_prob = pitch_featurizer.compute_pitch( + manifest_entry=self.manifest_entry, audio_dir=test_dir + ) + + assert len(pitch.shape) == 1 + assert pitch.shape[0] == self.spec_len + assert pitch.dtype == torch.float32 + + assert len(voiced.shape) == 1 + assert voiced.shape[0] == self.spec_len + assert voiced.dtype == torch.bool + + assert len(voiced_prob.shape) == 1 + assert voiced_prob.shape[0] == self.spec_len + assert voiced_prob.dtype == torch.float32 + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_save_and_load_pitch(self): + pitch_name = "pitch_test" + voiced_mask_name = "voiced_mask_test" + voiced_prob_name = "voiced_prob_test" + pitch_featurizer = PitchFeaturizer( + pitch_name=pitch_name, + voiced_mask_name=voiced_mask_name, + voiced_prob_name=voiced_prob_name, + hop_length=self.hop_len, + sample_rate=self.sample_rate, + ) + + with self._create_test_dir() as test_dir: + feature_dir = test_dir / "feature" + pitch_featurizer.save(manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir) + pitch_dict = pitch_featurizer.load( + manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir + ) + + pitch = pitch_dict[pitch_name] + voiced_mask = pitch_dict[voiced_mask_name] + voiced_prob = pitch_dict[voiced_prob_name] + + assert len(pitch.shape) == 1 + assert pitch.shape[0] == self.spec_len + assert pitch.dtype == torch.float32 + + assert len(voiced_mask.shape) == 1 + assert voiced_mask.shape[0] == self.spec_len + assert voiced_mask.dtype == torch.bool + + assert len(voiced_prob.shape) == 1 + assert voiced_prob.shape[0] == self.spec_len + assert voiced_prob.dtype == torch.float32 + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_compute_energy(self): + mel_featurizer = MelSpectrogramFeaturizer( + mel_dim=self.spec_dim, hop_length=self.hop_len, sample_rate=self.sample_rate + ) + energy_featurizer = EnergyFeaturizer(spec_featurizer=mel_featurizer) + + with self._create_test_dir() as test_dir: + energy = energy_featurizer.compute_energy(manifest_entry=self.manifest_entry, audio_dir=test_dir) + + assert len(energy.shape) == 1 + assert energy.shape[0] == self.spec_len + assert energy.dtype == torch.float32 + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_save_and_load_energy(self): + energy_name = "energy_test" + mel_featurizer = MelSpectrogramFeaturizer( + mel_dim=self.spec_dim, hop_length=self.hop_len, sample_rate=self.sample_rate + ) + energy_featurizer = EnergyFeaturizer(feature_name=energy_name, spec_featurizer=mel_featurizer) + + with self._create_test_dir() as test_dir: + feature_dir = test_dir / "feature" + energy_featurizer.save(manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir) + energy_dict = energy_featurizer.load( + manifest_entry=self.manifest_entry, audio_dir=test_dir, feature_dir=feature_dir + ) + + energy = energy_dict[energy_name] + assert len(energy.shape) == 1 + assert energy.shape[0] == self.spec_len + assert energy.dtype == torch.float32 diff --git a/tests/collections/tts/parts/utils/test_tts_dataset_utils.py b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py new file mode 100644 index 000000000000..180c3ca9f7fc --- /dev/null +++ b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py @@ -0,0 +1,55 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path + +import pytest + +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_abs_rel_paths, get_audio_filepaths + + +class TestTTSDatasetUtils: + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_get_abs_rel_paths_input_abs(self): + input_path = Path("/home/data/audio/test") + base_path = Path("/home/data") + + abs_path, rel_path = get_abs_rel_paths(input_path=input_path, base_path=base_path) + + assert abs_path == input_path + assert rel_path == Path("audio/test") + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_get_abs_rel_paths_input_rel(self): + input_path = Path("audio/test") + base_path = Path("/home/data") + + abs_path, rel_path = get_abs_rel_paths(input_path=input_path, base_path=base_path) + + assert abs_path == Path("/home/data/audio/test") + assert rel_path == input_path + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_get_audio_paths(self): + audio_dir = Path("/home/audio") + audio_rel_path = Path("examples/example.wav") + manifest_entry = {"audio_filepath": str(audio_rel_path)} + + abs_path, rel_path = get_audio_filepaths(manifest_entry=manifest_entry, audio_dir=audio_dir) + + assert abs_path == Path("/home/audio/examples/example.wav") + assert rel_path == audio_rel_path From 9ea2bde9daae4e9a4b942736b895d9b3f6838da0 Mon Sep 17 00:00:00 2001 From: Slyne Deng Date: Thu, 27 Apr 2023 13:44:43 -0700 Subject: [PATCH 040/512] Cache aware streaming nfa (#6209) * add cache aware streaming to nemo aligner Signed-off-by: Slyne Deng --- nemo/collections/asr/parts/mixins/mixins.py | 157 ++++++++++++++++++- tools/nemo_forced_aligner/align.py | 6 + tools/nemo_forced_aligner/utils/data_prep.py | 16 +- 3 files changed, 171 insertions(+), 8 deletions(-) diff --git a/nemo/collections/asr/parts/mixins/mixins.py b/nemo/collections/asr/parts/mixins/mixins.py index f350dbcd5df0..a963850341f9 100644 --- a/nemo/collections/asr/parts/mixins/mixins.py +++ b/nemo/collections/asr/parts/mixins/mixins.py @@ -440,6 +440,7 @@ def conformer_stream_step( previous_pred_out: torch.Tensor = None, drop_extra_pre_encoded: int = None, return_transcription: bool = True, + return_log_probs: bool = False, ): """ It simulates a forward step with caching for streaming purposes. @@ -455,6 +456,7 @@ def conformer_stream_step( previous_pred_out: the predicted outputs from the previous step for CTC models drop_extra_pre_encoded: number of steps to drop from the beginning of the outputs after the downsampling module. This can be used if extra paddings are added on the left side of the input. return_transcription: whether to decode and return the transcriptions. It can not get disabled for Transducer models. + return_log_probs: whether to return the log probs, only valid for ctc model Returns: greedy_predictions: the greedy predictions from the decoder @@ -463,6 +465,9 @@ def conformer_stream_step( cache_last_time_next: the updated tensor cache for last time layers to be used for next streaming step cache_last_channel_next_len: the updated lengths for cache_last_channel best_hyp: the best hypotheses for the Transducer models + + log_probs: the logits tensor of current streaming chunk, only returned when return_log_probs=True + encoded_len: the length of the output log_probs + history chunk log_probs, only returned when return_log_probs=True """ if not isinstance(self, asr_models.EncDecRNNTModel) and not isinstance(self, asr_models.EncDecCTCModel): raise NotImplementedError(f"stream_step does not support {type(self)}!") @@ -475,6 +480,9 @@ def conformer_stream_step( "return_transcription can not be False for Transducer models as decoder returns the transcriptions too." ) + if not isinstance(self, asr_models.EncDecCTCModel) and return_log_probs is True: + logging.info("return_log_probs can only be True for CTC models.") + ( encoded, encoded_len, @@ -545,14 +553,159 @@ def conformer_stream_step( if all_hyp_or_transcribed_texts is None: all_hyp_or_transcribed_texts = best_hyp - return ( + result = [ greedy_predictions, all_hyp_or_transcribed_texts, cache_last_channel_next, cache_last_time_next, cache_last_channel_next_len, best_hyp, - ) + ] + if return_log_probs: + result.append(log_probs) + result.append(encoded_len) + + return tuple(result) + + @torch.no_grad() + def transcribe_simulate_cache_aware_streaming( + self, + paths2audio_files: List[str], + batch_size: int = 4, + logprobs: bool = False, + return_hypotheses: bool = False, + online_normalization: bool = False, + ): + """ + Args: + paths2audio_files: (a list) of paths to audio files. + batch_size: (int) batch size to use during inference. + Bigger will result in better throughput performance but would use more memory. + logprobs: (bool) pass True to get log probabilities instead of transcripts. + return_hypotheses: (bool) Either return hypotheses or text + With hypotheses can do some postprocessing like getting timestamp or rescoring + online_normalization: (bool) Perform normalization on the run per chunk. + Returns: + A list of transcriptions (or raw log probabilities if logprobs is True) in the same order as paths2audio_files + """ + if paths2audio_files is None or len(paths2audio_files) == 0: + return {} + + if return_hypotheses and logprobs: + raise ValueError( + "Either `return_hypotheses` or `logprobs` can be True at any given time." + "Returned hypotheses will contain the logprobs." + ) + + if not isinstance(self, asr_models.EncDecCTCModel): + raise NotImplementedError(f"simulate streaming does not support {type(self)}!") + + if not isinstance(self.encoder, StreamingEncoder): + raise NotImplementedError(f"Encoder of this model does not support streaming!") + + data_loader = self._setup_streaming_transcribe_dataloader(paths2audio_files, batch_size, online_normalization) + + total_log_probs = [] + total_texts = [] + + for streaming_buffer in data_loader: + streaming_buffer_iter = iter(streaming_buffer) + batch_size = len(streaming_buffer.streams_length) + cache_last_channel, cache_last_time, cache_last_channel_len = self.encoder.get_initial_cache_state( + batch_size=batch_size + ) + previous_hypotheses = None + pred_out_stream = None + encoded_len = None + transcribed_texts = None + batch_log_probs = [] + + for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer_iter): + drop_extra_pre_encoded = self.encoder.streaming_cfg.drop_extra_pre_encoded if step_num != 0 else 0 + with torch.inference_mode(): + result = self.conformer_stream_step( + processed_signal=chunk_audio, + processed_signal_length=chunk_lengths, + cache_last_channel=cache_last_channel, + cache_last_time=cache_last_time, + cache_last_channel_len=cache_last_channel_len, + keep_all_outputs=streaming_buffer.is_buffer_empty(), + previous_hypotheses=previous_hypotheses, + previous_pred_out=pred_out_stream, + drop_extra_pre_encoded=drop_extra_pre_encoded, + return_transcription=True, + return_log_probs=logprobs or return_hypotheses, + ) + if logprobs or return_hypotheses: + ( + pred_out_stream, + transcribed_texts, + cache_last_channel, + cache_last_time, + cache_last_channel_len, + previous_hypotheses, + cur_chunk_log_probs, + encoded_len, + ) = result + batch_log_probs.append(cur_chunk_log_probs.cpu()) + else: + ( + pred_out_stream, + transcribed_texts, + cache_last_channel, + cache_last_time, + cache_last_channel_len, + previous_hypotheses, + ) = result + + if logprobs or return_hypotheses: + # concatenate chunk log probs on T dim + batch_log_probs = torch.cat(batch_log_probs, axis=1) + for log_probs, log_prob_len in zip(batch_log_probs, encoded_len): + total_log_probs.append(log_probs[0:log_prob_len]) + + if transcribed_texts is None: + total_texts += [''] * batch_size + else: + total_texts += transcribed_texts + + if logprobs: + return total_log_probs + + if not return_hypotheses: + return total_texts + + hyps = [] + for log_probs, text in zip(total_log_probs, total_texts): + hyps.append(Hypothesis(y_sequence=log_probs, text=text, score=0.0, dec_state=None)) + return hyps + + def _setup_streaming_transcribe_dataloader( + self, paths2audio_files: List[str], batch_size: int, online_normalization=False + ): + """ + Setup function for a temporary data loader which wraps the provided audio file. + + Args: + paths2audio_files: (a list) of paths to audio files. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + online_normalization: whether to do online normalization + Returns: + a new batch streaming buffer + """ + from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer + + streaming_buffer = CacheAwareStreamingAudioBuffer(model=self, online_normalization=online_normalization) + for sample_idx, sample in enumerate(paths2audio_files): + processed_signal, processed_signal_length, stream_id = streaming_buffer.append_audio_file( + sample, stream_id=-1 + ) + logging.info(f'Added this sample to the buffer: {sample}') + if (sample_idx + 1) % batch_size == 0 or sample_idx == len(paths2audio_files) - 1: + logging.info(f"Starting to stream samples {sample_idx - len(streaming_buffer) + 1} to {sample_idx}...") + yield streaming_buffer + streaming_buffer.reset_buffer() class DiarizationMixin(ABC): diff --git a/tools/nemo_forced_aligner/align.py b/tools/nemo_forced_aligner/align.py index e688060f529d..ed3ca3e45b5b 100644 --- a/tools/nemo_forced_aligner/align.py +++ b/tools/nemo_forced_aligner/align.py @@ -94,6 +94,8 @@ total_buffer_in_secs: float Length of buffer (chunk + left and right padding) in seconds chunk_batch_size: int batch size for buffered chunk inference, which will cut one audio into segments and do inference on chunk_batch_size segments at a time + + simulate_cache_aware_streaming: False, if set True, using cache aware streaming to do get the logits for alignment """ @@ -122,6 +124,9 @@ class AlignmentConfig: total_buffer_in_secs: float = 4.0 chunk_batch_size: int = 32 + # Cache aware streaming configs + simulate_cache_aware_streaming: Optional[bool] = False + @hydra_runner(config_name="AlignmentConfig", schema=AlignmentConfig) def main(cfg: AlignmentConfig): @@ -274,6 +279,7 @@ def main(cfg: AlignmentConfig): model, cfg.additional_ctm_grouping_separator, cfg.align_using_pred_text, + cfg.simulate_cache_aware_streaming, cfg.use_buffered_chunked_streaming, buffered_chunk_params, ) diff --git a/tools/nemo_forced_aligner/utils/data_prep.py b/tools/nemo_forced_aligner/utils/data_prep.py index c506bee0d818..852be91d78c4 100644 --- a/tools/nemo_forced_aligner/utils/data_prep.py +++ b/tools/nemo_forced_aligner/utils/data_prep.py @@ -72,14 +72,11 @@ def get_manifest_lines_batch(manifest_filepath, start, end): manifest_lines_batch = [] with open(manifest_filepath, "r") as f: for line_i, line in enumerate(f): - if line_i == start and line_i == end: + if line_i >= start and line_i <= end: manifest_lines_batch.append(json.loads(line)) - break if line_i == end: break - if line_i >= start: - manifest_lines_batch.append(json.loads(line)) return manifest_lines_batch @@ -291,6 +288,7 @@ def get_batch_tensors_and_boundary_info( model, separator, align_using_pred_text, + simulate_cache_aware_streaming=False, use_buffered_chunked_streaming=False, buffered_chunk_params={}, ): @@ -314,8 +312,14 @@ def get_batch_tensors_and_boundary_info( pred_text_batch = [] if not use_buffered_chunked_streaming: - with torch.no_grad(): - hypotheses = model.transcribe(audio_filepaths_batch, return_hypotheses=True, batch_size=B) + if not simulate_cache_aware_streaming: + with torch.no_grad(): + hypotheses = model.transcribe(audio_filepaths_batch, return_hypotheses=True, batch_size=B) + else: + with torch.no_grad(): + hypotheses = model.transcribe_simulate_cache_aware_streaming( + audio_filepaths_batch, return_hypotheses=True, batch_size=B + ) for hypothesis in hypotheses: log_probs_list_batch.append(hypothesis.y_sequence) T_list_batch.append(hypothesis.y_sequence.shape[0]) From 897703a82a087c05a892a7b940033de51284b40a Mon Sep 17 00:00:00 2001 From: Taejin Park Date: Fri, 28 Apr 2023 09:39:24 -0700 Subject: [PATCH 041/512] [BugFix] Force _get_batch_preds() to keep logits in decoder timestamps generator (#6499) * [BugFix] _get_batch_preds() is forced to keep logits in decoder timestamps generators Signed-off-by: Taejin Park * Ingnore keep_logits boolean in FrameASRBatchLogits Signed-off-by: Taejin Park --------- Signed-off-by: Taejin Park Co-authored-by: Jagadeesh Balam <4916480+jbalam-nv@users.noreply.github.com> --- .../asr/parts/utils/decoder_timestamps_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nemo/collections/asr/parts/utils/decoder_timestamps_utils.py b/nemo/collections/asr/parts/utils/decoder_timestamps_utils.py index 8e81d49939cb..f26b0c6b701a 100644 --- a/nemo/collections/asr/parts/utils/decoder_timestamps_utils.py +++ b/nemo/collections/asr/parts/utils/decoder_timestamps_utils.py @@ -232,7 +232,7 @@ def get_wer_feat_logit(audio_file_path, asr, frame_len, tokens_per_chunk, delay, return hyp, tokens, log_prob -class FrameBatchASR_Logits(FrameBatchASR): +class FrameBatchASRLogits(FrameBatchASR): """ A class for streaming frame-based ASR. Inherits from FrameBatchASR and adds new capability of returning the logit output. @@ -260,10 +260,9 @@ def read_audio_file_and_return(self, audio_filepath: str, delay: float, model_st self.set_frame_reader(frame_reader) @torch.no_grad() - def _get_batch_preds(self): + def _get_batch_preds(self, keep_logits): device = self.asr_model.device for batch in iter(self.data_loader): - feat_signal, feat_signal_len = batch feat_signal, feat_signal_len = feat_signal.to(device), feat_signal_len.to(device) log_probs, encoded_len, predictions = self.asr_model( @@ -272,9 +271,12 @@ def _get_batch_preds(self): preds = torch.unbind(predictions) for pred in preds: self.all_preds.append(pred.cpu().numpy()) + # Always keep logits in FrameBatchASRLogits + _ = keep_logits log_probs_tup = torch.unbind(log_probs) for log_prob in log_probs_tup: self.all_logprobs.append(log_prob) + del log_probs, log_probs_tup del encoded_len del predictions @@ -635,7 +637,7 @@ def run_ASR_BPE_CTC(self, asr_model: Type[EncDecCTCModelBPE]) -> Tuple[Dict, Dic log_prediction=asr_model._cfg.get("log_prediction", False), ) - frame_asr = FrameBatchASR_Logits( + frame_asr = FrameBatchASRLogits( asr_model=asr_model, frame_len=self.chunk_len_in_sec, total_buffer=self.total_buffer_in_secs, From 2072bb22f21fad704950b6b61901adbe555ed5a7 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Fri, 28 Apr 2023 09:40:17 -0700 Subject: [PATCH 042/512] [TTS] Fix FastPitch energy code (#6511) Signed-off-by: Ryan --- nemo/collections/tts/modules/fastpitch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/tts/modules/fastpitch.py b/nemo/collections/tts/modules/fastpitch.py index e2da672cf9c7..5f2227a999db 100644 --- a/nemo/collections/tts/modules/fastpitch.py +++ b/nemo/collections/tts/modules/fastpitch.py @@ -317,7 +317,7 @@ def forward( # Predict energy if self.energy_predictor is not None: - energy_pred = self.energy_predictor(prosody_input, enc_mask).squeeze(-1) + energy_pred = self.energy_predictor(enc_out, enc_mask, conditioning=spk_emb).squeeze(-1) if energy is not None: # Average energy over characters @@ -402,7 +402,7 @@ def infer( assert energy.shape[-1] == text.shape[-1], f"energy.shape[-1]: {energy.shape[-1]} != len(text)" energy_emb = self.energy_emb(energy) else: - energy_pred = self.energy_predictor(prosody_input, enc_mask).squeeze(-1) + energy_pred = self.energy_predictor(enc_out, enc_mask, conditioning=spk_emb).squeeze(-1) energy_emb = self.energy_emb(energy_pred.unsqueeze(1)) enc_out = enc_out + energy_emb.transpose(1, 2) From 525f994ce80697e9a4d13bdd7bed2c5c4dfcc919 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 28 Apr 2023 14:08:42 -0700 Subject: [PATCH 043/512] [TTS] fixed broken path. (#6514) (#6518) Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- tutorials/tts/Vits_Training.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index e4c088d66a5e..37e55e0d7572 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -305,6 +305,8 @@ " model.sample_rate=22050 \\\n", " train_dataset=tests/data/asr/an4_train.json \\\n", " validation_datasets=tests/data/asr/an4_val.json \\\n", + " phoneme_dict_path=tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt \\\n", + " heteronyms_path=tts_dataset_files/heteronyms-052722 \\\n", " trainer.max_epochs=3 \\\n", " trainer.accelerator=null \\\n", " trainer.check_val_every_n_epoch=1 \\\n", From 8b735d3aedafaf0ea47719aafddade97d9143b80 Mon Sep 17 00:00:00 2001 From: anteju <108555623+anteju@users.noreply.github.com> Date: Fri, 28 Apr 2023 14:33:59 -0700 Subject: [PATCH 044/512] Fix normalization of impulse response in ImpulsePerturbation (#6505) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ante Jukić --- .../asr/parts/preprocessing/perturb.py | 47 +++++++++++++------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py index 801305d90b7a..d4b1944ec6a2 100644 --- a/nemo/collections/asr/parts/preprocessing/perturb.py +++ b/nemo/collections/asr/parts/preprocessing/perturb.py @@ -344,16 +344,24 @@ class ImpulsePerturbation(Perturbation): manifest_path (list): Manifest file for RIRs audio_tar_filepaths (list): Tar files, if RIR audio files are tarred shuffle_n (int): Shuffle parameter for shuffling buffered files from the tar files + normalize_impulse (bool): Normalize impulse response to zero mean and amplitude 1 shift_impulse (bool): Shift impulse response to adjust for delay at the beginning rng (int): Random seed. Default is None """ def __init__( - self, manifest_path=None, audio_tar_filepaths=None, shuffle_n=128, shift_impulse=False, rng=None, + self, + manifest_path=None, + audio_tar_filepaths=None, + shuffle_n=128, + normalize_impulse=False, + shift_impulse=False, + rng=None, ): self._manifest = collections.ASRAudioText(manifest_path, parser=parsers.make_parser([]), index_by_file_id=True) self._audiodataset = None self._tarred_audio = False + self._normalize_impulse = normalize_impulse self._shift_impulse = shift_impulse self._data_iterator = None @@ -373,23 +381,32 @@ def perturb(self, data): tarred_audio=self._tarred_audio, audio_dataset=self._data_iterator, ) - if not self._shift_impulse: - impulse_norm = (impulse.samples - min(impulse.samples)) / (max(impulse.samples) - min(impulse.samples)) - data._samples = signal.fftconvolve(data._samples, impulse_norm, "same") - data._samples = data._samples / max( - abs(data._samples) - ) # normalize data samples to [-1,1] after rir convolution to avoid nans with fp16 training + + # normalize if necessary + if self._normalize_impulse: + # normalize the impulse response to zero mean and amplitude 1 + impulse_norm = impulse.samples - np.mean(impulse.samples) + impulse_norm /= max(abs(impulse_norm)) else: - # Find peak and shift peak to left - impulse_norm = (impulse.samples - min(impulse.samples)) / (max(impulse.samples) - min(impulse.samples)) + impulse_norm = impulse.samples + + # len of input data samples + len_data = len(data._samples) + + # convolve with the full impulse response + data._samples = signal.fftconvolve(data._samples, impulse_norm, "full") + + # compensate the dominant path propagation delay + if self._shift_impulse: + # Find the peak of the IR and shift the output to the left max_ind = np.argmax(np.abs(impulse_norm)) + data._samples = data._samples[max_ind:] + + # trim to match the input data length + data._samples = data._samples[:len_data] - impulse_resp = impulse_norm[max_ind:] - delay_after = len(impulse_resp) - data._samples = signal.fftconvolve(data._samples, impulse_resp, "full")[:-delay_after] - data._samples = data._samples / max( - abs(data._samples) - ) # normalize data samples to [-1,1] after rir convolution to avoid nans with fp16 training + # normalize data samples to [-1,1] after rir convolution to avoid nans with fp16 training + data._samples = data._samples / max(abs(data._samples)) class ShiftPerturbation(Perturbation): From e5218b374c7f5fd1b01a12c8db59fa88bde913fd Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Fri, 28 Apr 2023 16:30:42 -0700 Subject: [PATCH 045/512] Add interleaved pp support (#6498) * Add support for Virtual Pipeline Parallel conversion Signed-off-by: smajumdar * Add support for Virtual Pipeline Parallel conversion Signed-off-by: smajumdar * Switch to megatron core Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../megatron_change_num_partitions.py | 385 ++++++++++++++---- 1 file changed, 313 insertions(+), 72 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index 944565d8bd43..a4b28fa4d761 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -13,11 +13,13 @@ # limitations under the License. import os +import tempfile from argparse import ArgumentParser from typing import Dict, List import torch -from omegaconf import open_dict +import torch.nn as nn +from omegaconf import OmegaConf, open_dict from pytorch_lightning import Trainer from nemo.collections.nlp.parts.nlp_overrides import ( @@ -54,6 +56,20 @@ --target_pipeline_model_parallel_size=1 \ --target_pipeline_model_parallel_split_rank=0 \ --precision=bf16 + +# Megatron GPT + Virtual Pipeline parallelism + +python megatron_change_num_partitions.py \ + --model_extracted_dir="" \ + --target_file="" \ + --ckpt_name="" \ + --tensor_model_parallel_size= \ + --target_tensor_model_parallel_size= \ + --pipeline_model_parallel_size= \ + --target_pipeline_model_parallel_size= \ + --virtual_pipeline_model_parallel_size= \ + --hparams_file="" \ + --precision=bf16 ### Only Tensor Parallelism conversion ### @@ -100,6 +116,43 @@ """ +def set_virtual_parallel_rank_safely(rank: int): + AppState().virtual_pipeline_model_parallel_rank = rank + + try: + from megatron.core import parallel_state + + parallel_state.set_virtual_pipeline_model_parallel_rank(rank) + + if rank is None: + parallel_state.set_virtual_pipeline_model_parallel_world_size(0) + + except (ImportError, ModuleNotFoundError): + logging.warning("`megatron-core` not installed, cannot set virtual parallel rank !") + + +################# +### Utilities ### +################# + + +def force_cpu_model(cfg): + with open_dict(cfg): + # temporarily + original_cpu_init = cfg.get('use_cpu_initialization', False) + original_amp_o2 = cfg.get('megatron_amp_O2', False) + cfg.use_cpu_initialization = True + cfg.megatron_amp_O2 = False + return cfg, {'original_cpu_init': original_cpu_init, 'original_amp_o2': original_amp_o2} + + +def restore_model_config(cfg, original_dict): + with open_dict(cfg): + for key, val in original_dict.items(): + cfg[key] = val + return cfg + + ################# ### Utilities ### ################# @@ -732,6 +785,12 @@ def main(): parser.add_argument( '--target_pipeline_model_parallel_split_rank', type=int, default=0, help='PP rank to split for Enc-Dec models' ) + parser.add_argument( + '--virtual_pipeline_model_parallel_size', type=int, default=None, help='Virtual Pipeline parallelism size' + ) + parser.add_argument( + '--ckpt_name', type=str, default=None, help='Checkpoint name to load from for Virtual Parallel' + ) parser.add_argument( "--model_class", type=str, @@ -759,6 +818,7 @@ def main(): default=None, help="Path to the tokenizer model path if your model uses a tokenizer model as an artifact. This is needed if your model uses a sentencepiece tokenizer.", ) + parser.add_argument('--hparams_file', type=str, default=None, help='Path to hparams file from PTL training') parser.add_argument('--tp_conversion_only', action='store_true', help='Only convert TP model to TP model') parser.add_argument('--model_extracted_dir', type=str, default=None, help='Path to pre-extracted model directory') @@ -795,6 +855,25 @@ def main(): pp_size = args.pipeline_model_parallel_size tgt_pp_size = args.target_pipeline_model_parallel_size pipeline_model_parallel_split_rank = args.target_pipeline_model_parallel_split_rank + vp_size = args.virtual_pipeline_model_parallel_size + if vp_size is None: + vp_size = 1 + + convert_vp = vp_size > 1 + if convert_vp: + hparams_filepath = args.hparams_file + if hparams_filepath is None: + logging.warning( + '\n\n\n!!!!!!!!!\n' + 'You are converting a model with virtual pipeline parallelism enabled, \n' + 'but have not passed `hparams_file` argument. \n' + 'This will cause each ckpt file to be temporarily laoded onto GPU memory!\n\n' + 'It is highly recommended to pass `hparams_file` argument to avoid this.\n' + ) + else: + hparams_filepath = None + + # Import the class of the model cls = model_utils.import_class_by_path(args.model_class) if args.model_file is None and args.model_extracted_dir is None: @@ -830,10 +909,16 @@ def main(): tgt_pp_size = 1 pipeline_model_parallel_split_rank = 0 + if vp_size is None or vp_size < 0: + vp_size = 1 + app_state = AppState() app_state.data_parallel_rank = 0 app_state.pipeline_model_parallel_size = pp_size app_state.tensor_model_parallel_size = tp_size + + if vp_size > 1: + app_state.virtual_pipeline_model_parallel_size = vp_size app_state.model_parallel_size = app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size world_size = pp_size * tp_size # pseudo world size for simulating load of a specific rank on a single gpu @@ -841,87 +926,198 @@ def main(): app_state.tensor_model_parallel_rank = 0 app_state.pipeline_model_parallel_rank = 0 + if vp_size > 1: + set_virtual_parallel_rank_safely(0) + # If input model has TP > 1 or PP > 1 # Reconstruct the model to have TP = 1 and PP = 1 # Note that this is a forward loop that will process PP [0..N] TP [0..M] in sequential order. if tp_size > 1 or pp_size > 1: - partitions = {} + partitions = {} # 3d list of VP x PP x TP model = None - for pp_rank in range(pp_size): - app_state.pipeline_model_parallel_rank = pp_rank - partitions[pp_rank] = [] - - for tp_rank in range(tp_size): - app_state.tensor_model_parallel_rank = tp_rank - - logging.info(f"Loading ------------ PP Rank: {pp_rank} TP Rank: {tp_rank}") - - # Override flag that forces Model to use AppState instead of Trainer - # to determine the world size, global and local rank - # Used for simulating load of a specific rank on a single gpu - os.environ[NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE] = "true" - - # Compute the global rank to load the correct subset of parameters - global_rank = pp_rank * tp_size + tp_rank - - # Update AppState - app_state.world_size = world_size - app_state.global_rank = global_rank - app_state.local_rank = global_rank % num_gpu_per_node - app_state.pipeline_model_parallel_size = pp_size - app_state.tensor_model_parallel_size = tp_size - app_state.pipeline_model_parallel_split_rank = pipeline_model_parallel_split_rank - app_state.model_parallel_size = ( - app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size - ) - save_restore_connector = NLPSaveRestoreConnector() + # Build partitions structure + for vp_idx in range(vp_size): + partitions[vp_idx] = [] # Build first layer - VP - if args.model_extracted_dir is not None: - logging.info(f"Using extracted model directory: {args.model_extracted_dir}") - save_restore_connector.model_extracted_dir = args.model_extracted_dir + for pp_idx in range(pp_size): + # For each VP, build PP x TP holder + partitions[vp_idx].append({}) + partitions[vp_idx][pp_idx] = [] - if args.model_file is not None: - model_filepath = args.model_file - else: - model_filepath = args.model_extracted_dir + for vp_rank in range(vp_size): + if vp_size > 1: + set_virtual_parallel_rank_safely(vp_rank) - model = cls.restore_from( - restore_path=model_filepath, - trainer=trainer, - map_location=torch.device("cpu"), - save_restore_connector=save_restore_connector, - ) - model.to(dtype=dtype) + for pp_rank in range(pp_size): + app_state.pipeline_model_parallel_rank = pp_rank - # Reset env flag - os.environ.pop(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, None) + for tp_rank in range(tp_size): + app_state.tensor_model_parallel_rank = tp_rank - logging.info( - f"<<<<<<<< LOADED MODEL PP={pp_rank + 1} TP={tp_rank + 1} | " - f"GLOBAL RANK = {global_rank} >>>>>>>>>" - ) - params = [p for _, p in model.named_parameters()] - partitions[pp_rank].append(params) + logging.info(f"Loading ------------ PP Rank: {pp_rank} TP Rank: {tp_rank}") - # app_state is being updated incorrectly during restore - app_state.data_parallel_rank = 0 - app_state.pipeline_model_parallel_rank = pp_rank - app_state.tensor_model_parallel_rank = tp_rank - app_state.pipeline_model_parallel_size = pp_size - app_state.tensor_model_parallel_size = tp_size - app_state.model_parallel_size = ( - app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size - ) + # Override flag that forces Model to use AppState instead of Trainer + # to determine the world size, global and local rank + # Used for simulating load of a specific rank on a single gpu + os.environ[NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE] = "true" + + # Compute the global rank to load the correct subset of parameters + global_rank = pp_rank * tp_size + tp_rank + + # Update AppState + app_state.world_size = world_size + app_state.global_rank = global_rank + app_state.local_rank = global_rank % num_gpu_per_node + app_state.pipeline_model_parallel_size = pp_size + app_state.tensor_model_parallel_size = tp_size + app_state.pipeline_model_parallel_split_rank = pipeline_model_parallel_split_rank + app_state.model_parallel_size = ( + app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size + ) + + if vp_size > 1: + set_virtual_parallel_rank_safely(vp_rank) + + if vp_rank == 0: + save_restore_connector = NLPSaveRestoreConnector() + + if args.model_extracted_dir is not None: + logging.info(f"Using extracted model directory: {args.model_extracted_dir}") + save_restore_connector.model_extracted_dir = args.model_extracted_dir + + if args.model_file is not None: + model_filepath = args.model_file + else: + model_filepath = args.model_extracted_dir + + if vp_size == 1: + + # Get model config + tmp_cfg = cls.restore_from( + restore_path=model_filepath, + trainer=trainer, + map_location=torch.device("cpu"), + save_restore_connector=save_restore_connector, + return_config=True, + ) + + # Force model onto CPU + tmp_cfg, restore_dict = force_cpu_model(tmp_cfg) + + # Restore model + model = cls.restore_from( + restore_path=model_filepath, + trainer=trainer, + map_location=torch.device("cpu"), + save_restore_connector=save_restore_connector, + override_config_path=tmp_cfg, + ) + model.freeze() + + # Restore model config + restore_model_config(model.cfg, restore_dict) + + else: + if args.ckpt_name is None: + raise ValueError( + "For Virtual Parallel, ckpt name is required.\n" + "Please provide `--ckpt_name` argument." + ) + + # inject model parallel rank + checkpoint_path = model_utils.inject_model_parallel_rank( + os.path.join(model_filepath, args.ckpt_name) + ) + + if hparams_filepath is not None: + # Force the model onto CPU + tmp_cfg = OmegaConf.load(hparams_filepath) + tmp_cfg, restore_dict = force_cpu_model(tmp_cfg) + + with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.yml') as tmp: + OmegaConf.save(tmp_cfg, tmp, resolve=True) + tmp.seek(0) + + model = cls.load_from_checkpoint( + checkpoint_path=checkpoint_path, + trainer=trainer, + map_location=torch.device("cpu"), + hparams_file=tmp.name, + ) + model.freeze() + + restore_model_config(model.cfg, restore_dict) + + else: + model = cls.load_from_checkpoint( + checkpoint_path=checkpoint_path, trainer=trainer, map_location=torch.device("cpu"), + ) + model.freeze() + + model.to(dtype=dtype) + + # Reset env flag + os.environ.pop(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, None) + + logging.info( + f"<<<<<<<< LOADED MODEL PP={pp_rank + 1} TP={tp_rank + 1} | " + f"GLOBAL RANK = {global_rank} >>>>>>>>>" + ) + + # Save the parameters + if vp_size == 1: + params = [p for p in model.parameters()] + partitions[vp_rank][pp_rank].append(params) # vp_rank = 0 + + else: + vp_params_tmp = [] + for vp_idx in range(vp_size): + set_virtual_parallel_rank_safely(vp_idx) + params = [p for p in model.model[vp_idx].parameters()] + # params = model.model[vp_idx].module.state_dict_for_save_checkpoint() + # params = [p for p in params.values()] + vp_params_tmp.append(params) + # partitions[pp_rank][vp_idx].append(params) + + for vp_idx in range(vp_size): + partitions[vp_idx][pp_rank].append(vp_params_tmp[vp_idx]) + + del vp_params_tmp + set_virtual_parallel_rank_safely(0) + + # app_state is being updated incorrectly during restore + app_state.data_parallel_rank = 0 + app_state.pipeline_model_parallel_rank = pp_rank + app_state.tensor_model_parallel_rank = tp_rank + app_state.pipeline_model_parallel_size = pp_size + app_state.tensor_model_parallel_size = tp_size + app_state.model_parallel_size = ( + app_state.pipeline_model_parallel_size * app_state.tensor_model_parallel_size + ) + + if vp_size > 1: + app_state.virtual_pipeline_model_parallel_size = vp_size + set_virtual_parallel_rank_safely(vp_rank) # Build a unified model with PP 1 TP 1 with open_dict(model.cfg): model.cfg.tensor_model_parallel_size = 1 model.cfg.pipeline_model_parallel_size = 1 + model.cfg.virtual_pipeline_model_parallel_size = None + + app_state.global_rank = 0 + app_state.local_rank = 0 + app_state.data_parallel_rank = 0 + app_state.pipeline_model_parallel_rank = 0 app_state.tensor_model_parallel_rank = 0 - app_state.pipeline_model_parallel_size = 0 + app_state.pipeline_model_parallel_size = 1 + app_state.tensor_model_parallel_size = 1 app_state.model_parallel_size = 1 + if vp_size > 1: + set_virtual_parallel_rank_safely(None) + trainer = Trainer(devices=1, strategy=NLPDDPStrategy(), accelerator="cpu", precision=precision) with open_dict(model.cfg): @@ -930,25 +1126,52 @@ def main(): if args.tokenizer_vocab_file is not None: model.cfg.tokenizer.vocab_file = args.tokenizer_vocab_file - # temporarily - original_cpu_init = model.cfg.get('use_cpu_initialization', False) - original_amp_o2 = model.cfg.get('megatron_amp_O2', False) - model.cfg.use_cpu_initialization = True - model.cfg.megatron_amp_O2 = False + model.cfg, restore_dict = force_cpu_model(model.cfg) - model = cls(model.cfg, trainer) + # Remove Virtual Parallelism + model.cfg.virtual_pipeline_model_parallel_size = None + + logging.info(f"<<<<<<<< Building TP 1 PP 1 base model >>>>>>>>>") + model = cls(model.cfg, trainer) # type: nn.Module + model.freeze() model = model.to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() + vp_param_count = 0 + for vp in range(vp_size): + for pp in range(pp_size): + for tp in range(tp_size): + vp_param_count += len(partitions[vp][pp][tp]) + + if vp_size > 1: + logging.debug(f"Total params in TP PP VP = 1 : {len(list(model.parameters()))}") + logging.debug(f"Total params in VP PP TP (og): {vp_param_count}") + + # Flatten Virtual Pipeline + if vp_size == 1: + # unpack vp container, pack pp tp container + partitions = partitions[0] + partitions = {idx: val for idx, val in enumerate(partitions)} + else: + flat_partitions = {idx: [] for idx in range(pp_size)} + + for pp in range(pp_size): + for tp in range(tp_size): + vp_cache = [] + for vp in range(vp_size): + vp_cache.extend(partitions[vp][pp][tp]) + + flat_partitions[pp].append(vp_cache) + + partitions = flat_partitions + if tgt_tp_size > 1 or tgt_pp_size > 1: merge_partition(model, partitions) else: # Write out the PP 1 TP 1 model to disk merge_partition(model, partitions, args.target_file) - with open_dict(model.cfg): - model.cfg.use_cpu_initialization = original_cpu_init - model.cfg.megatron_amp_O2 = original_amp_o2 + restore_model_config(model.cfg, restore_dict) # Empty cache memory of all parameters from all PP TP partitions partitions.clear() @@ -968,6 +1191,16 @@ def main(): else: model_filepath = args.model_extracted_dir + tmp_cfg = cls.restore_from( + restore_path=model_filepath, + trainer=trainer, + map_location=torch.device("cpu"), + save_restore_connector=save_restore_connector, + return_config=True, + ) + + tmp_cfg, restore_dict = force_cpu_model(tmp_cfg) + model = cls.restore_from( restore_path=model_filepath, trainer=trainer, @@ -976,6 +1209,8 @@ def main(): ) model.to(dtype=dtype) + restore_model_config(model.cfg, restore_dict) + # If target model has TP > 1 or PP > 1 if tgt_pp_size > 1 or tgt_tp_size > 1: @@ -1046,10 +1281,16 @@ def main(): with open_dict(model.cfg): model.cfg.tokenizer.model = args.tokenizer_model_path - model = cls(model.cfg, trainer).to('cpu') + model.cfg, restore_dict = force_cpu_model(model.cfg) + + model = cls(model.cfg, trainer) + model = model.to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() + model.freeze() model.to(dtype=dtype) + restore_model_config(model.cfg, restore_dict) + # Update global batch size if old_global_batch_size % new_global_batch_size != 0 or old_global_batch_size < new_global_batch_size: logging.info( From e3155f684376977644b1dc6612a9300bd9a3cddd Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Mon, 1 May 2023 09:49:25 -0700 Subject: [PATCH 046/512] Fix typos (#6523) * Fix typos Signed-off-by: smajumdar * Fix typos Signed-off-by: smajumdar --------- Signed-off-by: smajumdar --- tutorials/asr/ASR_CTC_Language_Finetuning.ipynb | 10 +++++----- tutorials/asr/ASR_with_Subword_Tokenization.ipynb | 4 ++-- tutorials/asr/Buffered_Transducer_Inference.ipynb | 2 +- .../asr/Online_Offline_Speech_Commands_Demo.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 2 +- tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index cea5956f10ed..cbad6b79b858 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -540,8 +540,8 @@ "import matplotlib.pyplot as plt\n", "\n", "plt.bar(x=TOKEN_COUNT_X, height=NUM_TOKENS_Y)\n", - "plt.title(\"Occurance of unique tokens in train+dev set\")\n", - "plt.xlabel(\"# of occurances\")\n", + "plt.title(\"Occurrences of unique tokens in train+dev set\")\n", + "plt.xlabel(\"# of occurrences\")\n", "plt.ylabel(\"# of tokens\")\n", "plt.xlim(0, MAX_COUNT);" ], @@ -565,13 +565,13 @@ "source": [ "UNCOMMON_TOKENS_COUNT = 5\n", "\n", - "chars_with_infrequent_occurance = set()\n", + "chars_with_infrequent_occurrence = set()\n", "for count in range(1, UNCOMMON_TOKENS_COUNT + 1):\n", " if count in train_counts:\n", " token_list = train_counts[count]\n", - " chars_with_infrequent_occurance.update(set(token_list))\n", + " chars_with_infrequent_occurrence.update(set(token_list))\n", "\n", - "print(f\"Number of tokens with <= {UNCOMMON_TOKENS_COUNT} occurances : {len(chars_with_infrequent_occurance)}\")" + "print(f\"Number of tokens with <= {UNCOMMON_TOKENS_COUNT} occurrences : {len(chars_with_infrequent_occurrence)}\")" ], "execution_count": null, "outputs": [] diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index 2d540930733a..3adca1a8d0ac 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -312,7 +312,7 @@ "\r\n", " - Sophisticated subword tokenization algorithms build their vocabularies based on large text corpora. To accurately tokenize such large volumes of text with minimal vocabulary size, the subwords that are learned inherently model the interdependency between tokens of that language to some degree. \r\n", " \r\n", - "Looking at the previous example, the token `hel##` is a single token that represents the relationship `h` => `e` => `l`. When the model predicts the singe token `hel##`, it implicitly predicts this relationship - even though the subsequent token can be either `l` (for `hell`) or `##lo` (for `hello`) and is predicted independently of the previous token!\r\n", + "Looking at the previous example, the token `hel##` is a single token that represents the relationship `h` => `e` => `l`. When the model predicts the single token `hel##`, it implicitly predicts this relationship - even though the subsequent token can be either `l` (for `hell`) or `##lo` (for `hello`) and is predicted independently of the previous token!\r\n", "\r\n", " - By reducing the target sentence length by subword tokenization (target sentence here being the characters/subwords transcribed from the audio signal), we entirely sidestep the sequence length limitation of CTC loss!\r\n", "\r\n", @@ -554,7 +554,7 @@ "\r\n", " - `--spe_sample_size`: If the dataset is too large, consider using a sampled dataset indicated by a positive integer. By default, any negative value (default = -1) will use the entire dataset.\r\n", "\r\n", - " - `--spe_train_extremely_large_corpus`: When training a sentencepiece tokenizer on very large amounts of text, sometimes the tokenizer will run out of memory or wont be able to process so much data on RAM. At some point you might receive the following error - \"Input corpus too large, try with train_extremely_large_corpus=true\". If your machine has large amounts of RAM, it might still be possible to build the tokenizer using the above flag. Will silently fail if it runs out of RAM.\r\n", + " - `--spe_train_extremely_large_corpus`: When training a sentencepiece tokenizer on very large amounts of text, sometimes the tokenizer will run out of memory or won't be able to process so much data on RAM. At some point you might receive the following error - \"Input corpus too large, try with train_extremely_large_corpus=true\". If your machine has large amounts of RAM, it might still be possible to build the tokenizer using the above flag. Will silently fail if it runs out of RAM.\r\n", "\r\n", " - `--log`: Whether the script should display log messages" ] diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index 7ae01cbace8b..6661634332a5 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -806,7 +806,7 @@ " print(\"\\nGreedy labels collected from this buffer\")\n", " print(tok[len(tok) - 1 - delay:len(tok) - 1 - delay + tokens_per_chunk]) \n", " self.toks_unmerged += tok[len(tok) - 1 - delay:len(tok) - 1 - delay + tokens_per_chunk]\n", - " print(\"\\nTokens collected from succesive buffers before RNNT merge\")\n", + " print(\"\\nTokens collected from successive buffers before RNNT merge\")\n", " print(self.toks_unmerged)\n", "\n", " output = []\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index c956f3979227..8632e5e2926b 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -440,7 +440,7 @@ " Arg:\n", " wav_file: wave file to be performed inference on.\n", " STEP: infer every STEP seconds \n", - " WINDOW_SIZE : lenght of audio to be sent to NN.\n", + " WINDOW_SIZE : length of audio to be sent to NN.\n", " \"\"\"\n", " \n", " FRAME_LEN = STEP \n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index 0bd437e06cf0..1ba7415630d5 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -538,7 +538,7 @@ " print(\"\\nGreedy labels collected from this buffer\")\n", " print(tok[len(tok) - 1 - delay:len(tok) - 1 - delay + self.n_tokens_per_chunk]) \n", " self.toks_unmerged += tok[len(tok) - 1 - delay:len(tok) - 1 - delay + self.n_tokens_per_chunk]\n", - " print(\"\\nTokens collected from succesive buffers before CTC merge\")\n", + " print(\"\\nTokens collected from successive buffers before CTC merge\")\n", " print(self.toks_unmerged)\n", "\n", "\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 95f3aaec1d87..2f15181b8160 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -665,7 +665,7 @@ "\n", "For this experiment we will continue to use the original spec augmentation config in the base model, however you may find better results by modifying the strength of this augmentation.\n", "\n", - "**Note**: The script inside ASR examples **disables spec augment entirely**. This is done in order to provide a stable default to measure the best possible adaptation case, but may severely degrade the performance on general speech. Please be careful when copying the hyper parameters from the tutorial to the script for large scale experimentatin." + "**Note**: The script inside ASR examples **disables spec augment entirely**. This is done in order to provide a stable default to measure the best possible adaptation case, but may severely degrade the performance on general speech. Please be careful when copying the hyper parameters from the tutorial to the script for large scale experimentation." ], "metadata": { "id": "T3VuqcGTNuIJ" @@ -804,7 +804,7 @@ "source": [ "-----\n", "\n", - "As you can see, a single component of the model may support one or more adapter types (or none at all)! Below, we will experiment with the simple Linear Adapters, but as an excercise, you might try to use other adapter types present here." + "As you can see, a single component of the model may support one or more adapter types (or none at all)! Below, we will experiment with the simple Linear Adapters, but as an exercise, you might try to use other adapter types present here." ], "metadata": { "id": "YXTC4LiSnB2O" From a6260687115b2da6da3056e1dbbdd256a7c1c2b0 Mon Sep 17 00:00:00 2001 From: trias702 <25867060+trias702@users.noreply.github.com> Date: Tue, 2 May 2023 11:06:59 -0500 Subject: [PATCH 047/512] New noise_norm perturbation based on Riva work (#6445) * Initial commit for new noise_norm perturbation Signed-off-by: Daniel Egert * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Minor fix to random seed in perturb Signed-off-by: Daniel Egert * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated code to reflect feedback Signed-off-by: Daniel Egert * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updates for feedback given by code reviewers Signed-off-by: Daniel Egert * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updates in response to PR feedback Signed-off-by: Daniel Egert * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added comment about ref_mic being None Signed-off-by: Daniel Egert * Updated perturb to use inspect module Signed-off-by: Daniel Egert --------- Signed-off-by: Daniel Egert Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/asr/data/audio_to_text.py | 10 +- .../asr/data/audio_to_text_dataset.py | 4 +- .../asr/parts/preprocessing/__init__.py | 1 + .../asr/parts/preprocessing/perturb.py | 344 +++++++++++++++--- nemo/utils/model_utils.py | 2 +- 5 files changed, 304 insertions(+), 57 deletions(-) diff --git a/nemo/collections/asr/data/audio_to_text.py b/nemo/collections/asr/data/audio_to_text.py index d61f0e1f69ef..3b2e2a767a97 100644 --- a/nemo/collections/asr/data/audio_to_text.py +++ b/nemo/collections/asr/data/audio_to_text.py @@ -191,8 +191,8 @@ def expand_sharded_filepaths(sharded_filepaths, shard_strategy: str, world_size: sharded_filepaths = sharded_filepaths.replace(bkey, "}") if isinstance(sharded_filepaths, str): - # Brace expand - sharded_filepaths = list(braceexpand.braceexpand(sharded_filepaths)) + # Brace expand, set escape=False for Windows compatibility + sharded_filepaths = list(braceexpand.braceexpand(sharded_filepaths, escape=False)) # Expand store paths into WebDataset URLs sharded_filepaths = [ @@ -1359,5 +1359,9 @@ def __iter__(self): for dataset_idx in shuffled_order: d = self.datasets[dataset_idx] assert isinstance(d, IterableDataset), "ChainDataset only supports IterableDataset" - for x in d: + for idx, x in enumerate(d): yield x + # in case d is an infinite dataset, we want to break the loop + # so that the other datasets get a chance to yield too + if idx >= len(d) - 1: + break diff --git a/nemo/collections/asr/data/audio_to_text_dataset.py b/nemo/collections/asr/data/audio_to_text_dataset.py index 325857e81323..14e8dea19651 100644 --- a/nemo/collections/asr/data/audio_to_text_dataset.py +++ b/nemo/collections/asr/data/audio_to_text_dataset.py @@ -512,7 +512,7 @@ def get_audio_to_text_char_dataset_from_config( constructed dataset or None if dataset config is invalid or nothing to load """ if 'augmentor' in config: - augmentor = process_augmentations(config['augmentor']) + augmentor = process_augmentations(config['augmentor'], global_rank=global_rank, world_size=world_size) else: augmentor = None @@ -609,7 +609,7 @@ def get_audio_to_text_bpe_dataset_from_config( constructed dataset or None if dataset config is invalid or nothing to load """ if 'augmentor' in config: - augmentor = process_augmentations(config['augmentor']) + augmentor = process_augmentations(config['augmentor'], global_rank=global_rank, world_size=world_size) else: augmentor = None diff --git a/nemo/collections/asr/parts/preprocessing/__init__.py b/nemo/collections/asr/parts/preprocessing/__init__.py index b25f0ff25e42..a0785c56bf2a 100644 --- a/nemo/collections/asr/parts/preprocessing/__init__.py +++ b/nemo/collections/asr/parts/preprocessing/__init__.py @@ -20,6 +20,7 @@ GainPerturbation, ImpulsePerturbation, NoisePerturbation, + NoisePerturbationWithNormalization, Perturbation, RirAndNoisePerturbation, ShiftPerturbation, diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py index d4b1944ec6a2..d882bc83772b 100644 --- a/nemo/collections/asr/parts/preprocessing/perturb.py +++ b/nemo/collections/asr/parts/preprocessing/perturb.py @@ -33,6 +33,7 @@ # SOFTWARE. # This file contains code artifacts adapted from https://github.com/ryanleary/patter import copy +import inspect import io import os import random @@ -44,10 +45,10 @@ import numpy as np import soundfile as sf from scipy import signal -from torch.utils.data import IterableDataset from nemo.collections.asr.parts.preprocessing.segment import AudioSegment from nemo.collections.common.parts.preprocessing import collections, parsers +from nemo.core.classes import IterableDataset from nemo.utils import logging # TODO @blisc: Perhaps refactor instead of import guarding @@ -69,16 +70,11 @@ HAVE_NUMBA = False -def read_one_audiosegment(manifest, target_sr, rng=None, tarred_audio=False, audio_dataset=None): - - random.seed(rng) if rng else None - +def read_one_audiosegment(manifest, target_sr, tarred_audio=False, audio_dataset=None): if tarred_audio: if audio_dataset is None: raise TypeError("Expected augmentation dataset but got None") - audio_file, file_id = next(audio_dataset) - manifest_idx = manifest.mapping[file_id] - manifest_entry = manifest[manifest_idx] + audio_file, file_id, manifest_entry = next(audio_dataset) offset = 0 if manifest_entry.offset is None else manifest_entry.offset duration = 0 if manifest_entry.duration is None else manifest_entry.duration @@ -375,11 +371,7 @@ def __init__( def perturb(self, data): impulse = read_one_audiosegment( - self._manifest, - data.sample_rate, - self._rng, - tarred_audio=self._tarred_audio, - audio_dataset=self._data_iterator, + self._manifest, data.sample_rate, tarred_audio=self._tarred_audio, audio_dataset=self._data_iterator, ) # normalize if necessary @@ -491,7 +483,7 @@ def orig_sr(self): def get_one_noise_sample(self, target_sr): return read_one_audiosegment( - self._manifest, target_sr, self._rng, tarred_audio=self._tarred_audio, audio_dataset=self._data_iterator + self._manifest, target_sr, tarred_audio=self._tarred_audio, audio_dataset=self._data_iterator ) def perturb(self, data, ref_mic=0): @@ -501,11 +493,7 @@ def perturb(self, data, ref_mic=0): ref_mic (int): reference mic index for scaling multi-channel audios """ noise = read_one_audiosegment( - self._manifest, - data.sample_rate, - self._rng, - tarred_audio=self._tarred_audio, - audio_dataset=self._data_iterator, + self._manifest, data.sample_rate, tarred_audio=self._tarred_audio, audio_dataset=self._data_iterator, ) self.perturb_with_input_noise(data, noise, ref_mic=ref_mic) @@ -600,6 +588,223 @@ def perturb_with_foreground_noise(self, data, noise, data_rms=None, max_noise_du data._samples[noise_idx : noise_idx + noise_samples.shape[0]] += noise_samples +class NoisePerturbationWithNormalization(Perturbation): + """ + Perturbation that adds noise to input audio, with normalisation to specific decibel level. + Also tiles shorter noise samples up to their corresponding clean audio length. + + Args: + manifest_path (str or list): Manifest file with paths to noise files, can be list if using multiple noise sources + min_snr_db (float): Minimum SNR of audio after noise is added + max_snr_db (float): Maximum SNR of audio after noise is added + snr_samples (list): A discrete list of SNRs DBs to sample from when mixing, will be used instead of [min_snr_db,max_snr_db] + norm_to_db (float): Will normalise clean, noise, and mixed samples to this DB + audio_tar_filepaths (str or list) : Tar files, if noise audio files are tarred, can be list for multiple sources + shuffle_n (int): Shuffle parameter for shuffling buffered files from the tar files + orig_sr (int): Original sampling rate of the noise files + rng (int): Random seed. Default is None + shard_strategy (str): if you're using tarred audio and wish to scatter instead of replicate, set this to 'scatter' + epsilon (float): minimum value for RMS DB normalisation to avoid divide by zero + """ + + def __init__( + self, + manifest_path=None, + min_snr_db=10, + max_snr_db=50, + snr_samples=None, + norm_to_db=None, + rng=None, + audio_tar_filepaths=None, + shuffle_n=128, + orig_sr=16000, + global_rank=0, + world_size=1, + shard_strategy='replicate', + epsilon=0.01, + ): + # import here to avoid circular import error + from nemo.collections.asr.data.audio_to_text import RandomizedChainDataset + + self._manifest = collections.ASRAudioText(manifest_path, parser=parsers.make_parser([]), index_by_file_id=True) + self._audiodataset = None + self._tarred_audio = False + self._orig_sr = orig_sr + self._data_iterator = None + + random.seed(rng) if rng else None + self._rng = rng + + if audio_tar_filepaths: + self._tarred_audio = True + if isinstance(manifest_path, str): + manifest_path = [manifest_path] + if isinstance(audio_tar_filepaths, str): + audio_tar_filepaths = [audio_tar_filepaths] + datasets = [] + for tarred_audio_filepath, manifest_filepath in zip(audio_tar_filepaths, manifest_path): + dataset = AugmentationDataset( + manifest_filepath, + tarred_audio_filepath, + shuffle_n, + rank=global_rank, + world_size=world_size, + shard_strategy=shard_strategy, + ) + datasets.append(dataset) + self._audiodataset = RandomizedChainDataset( + datasets, rnd_seed=(rng if rng else random.randint(0, 30000)) + global_rank + ) + if len(self._audiodataset) == 0: + raise RuntimeError( + "NoisePerturbationWithNormalization detected a zero length RandomizedChainDataset, should never happen" + ) + self._data_iterator = iter(self._audiodataset) + + self._min_snr_db = min_snr_db + self._max_snr_db = max_snr_db + self._norm_to_db = norm_to_db + self._snr_samples = snr_samples if isinstance(snr_samples, list) and len(snr_samples) > 0 else None + self._epsilon = epsilon + + @property + def orig_sr(self): + return self._orig_sr + + def read_one_audiosegment(self, target_sr): + if self._tarred_audio: + if self._data_iterator is None: + raise TypeError("Expected valid iterator but got None") + try: + audio_file, file_id, manifest_entry = next(self._data_iterator) + except StopIteration: + self._data_iterator = iter(self._audiodataset) + audio_file, file_id, manifest_entry = next(self._data_iterator) + + offset = 0 if manifest_entry.offset is None else manifest_entry.offset + duration = 0 if manifest_entry.duration is None else manifest_entry.duration + + else: + audio_record = random.sample(self._manifest.data, 1)[0] + audio_file = audio_record.audio_file + offset = 0 if audio_record.offset is None else audio_record.offset + duration = 0 if audio_record.duration is None else audio_record.duration + + return AudioSegment.from_file(audio_file, target_sr=target_sr, offset=offset, duration=duration) + + def perturb(self, data, ref_mic=0): + """ + Args: + data (AudioSegment): audio data + ref_mic (int): reference mic index for scaling multi-channel audios + """ + + noise = self.read_one_audiosegment(data.sample_rate) + + # noise samples need to be at least 1 second long to avoid strange oddities + # in the RMS SNR mixing, so we have a fail-safe here to ensure at least 1 sec duration + while noise.duration < 1: + noise = self.read_one_audiosegment(data.sample_rate) + + self.perturb_with_input_noise(data, noise, ref_mic=ref_mic, norm_to_db=self._norm_to_db) + + def snr_mixer(self, clean, noise, snr, norm_to_db=-25.0): + """ + Mixes the clean audio with the noise + Args: + clean (numpy array): the clean audio data + noise (numpy array): the noise audio data + snr (float): the SNR value for the mixing + norm_to_db (float): the DB value to normalise to before mixing + """ + clean = self.norm_audio_to_db(clean, norm_to_db) + noise = self.norm_audio_to_db(noise, norm_to_db) + + # Set the noise level for a given SNR + # note that if your noise doesn't overlap with your audio then your target SNR + # may not be achievable. Consider using an rms-threshold in the future + noisescalar = 10 ** (-snr / 20.0) + noisenewlevel = noise * noisescalar + noisyspeech = clean + noisenewlevel + + return clean, noisenewlevel, noisyspeech + + def norm_audio_to_db(self, x, norm_to_db): + """ + Normalises audio signal to particular db, with some epsilon in-case of divide by zero + Args: + x (numpy array): input audio signal + norm_to_db (float): the db to normalise to + """ + rms = (x ** 2).mean(axis=0) ** 0.5 + rms = np.where(np.isclose(rms, 0), self._epsilon, rms) + scalar = 10 ** (norm_to_db / 20.0) / rms + return x * scalar + + def concatenate_noise_sample(self, clean, noise, fs, silence_length=0.25): + """ + Tiles the noise array to match the clean audio array, with small silence between the joins + Args: + clean (numpy array): clean audio data + noise (numpy array): noise audio data + fs (int): sample rate used by both clean and noise audio data + silence_length (float): the amount of silence (in secs) to insert before tiling + """ + while len(noise) < len(clean): + if noise.ndim > 1: + zeros = np.zeros((int(fs * silence_length), noise.shape[-1])) + else: + zeros = np.zeros((int(fs * silence_length),)) + noiseconcat = np.append(noise, zeros, axis=0) + noise = np.append(noiseconcat, noise, axis=0) + + return noise + + def perturb_with_input_noise(self, data, noise, data_rms=None, ref_mic=0, norm_to_db=-25.0): + """ + Args: + data (AudioSegment): audio data + noise (AudioSegment): noise data + data_rms (Union[float, List[float]): rms_db for data input + ref_mic (int): reference mic index for scaling multi-channel audio, if set to None then + each channel will be scaled independently + norm_to_db (float): will normalise all audio to this DB + """ + if data.num_channels != noise.num_channels: + raise ValueError( + f"Found mismatched channels for data ({data.num_channels}) and noise ({noise.num_channels})." + ) + + if not (0 <= ref_mic < data.num_channels): + raise ValueError( + f" reference mic ID must be an integer in [0, {data.num_channels}), got {ref_mic} instead." + ) + + if self._snr_samples: + snr_db = random.sample(self._snr_samples, 1)[0] + else: + snr_db = random.uniform(self._min_snr_db, self._max_snr_db) + if data_rms is None: + data_rms = data.rms_db if ref_mic is None else data.rms_db[ref_mic] + + if norm_to_db is None: + norm_to_db = data_rms + + data_norm = data._samples + noise_norm = noise._samples + + if len(data_norm) == 0: + return + + if len(noise_norm) < len(data_norm): + noise_norm = self.concatenate_noise_sample(data_norm, noise_norm, data.sample_rate) + noise_norm = noise_norm[0 : len(data_norm)] + + _, _, noisy_snr = self.snr_mixer(clean=data_norm, noise=noise_norm, snr=snr_db, norm_to_db=norm_to_db) + + data._samples = noisy_snr + + class WhiteNoisePerturbation(Perturbation): """ Perturbation that adds white noise to an audio file in the training dataset. @@ -857,6 +1062,7 @@ def perturb(self, data): "impulse": ImpulsePerturbation, "shift": ShiftPerturbation, "noise": NoisePerturbation, + "noise_norm": NoisePerturbationWithNormalization, "white_noise": WhiteNoisePerturbation, "rir_noise_aug": RirAndNoisePerturbation, "transcode_aug": TranscodePerturbation, @@ -902,7 +1108,7 @@ def from_config(cls, config): return cls(perturbations=ptbs) -def process_augmentations(augmenter) -> Optional[AudioAugmentor]: +def process_augmentations(augmenter, global_rank=0, world_size=1) -> Optional[AudioAugmentor]: """Process list of online data augmentations. Accepts either an AudioAugmentor object with pre-defined augmentations, or a dictionary that points to augmentations that have been defined. @@ -1016,7 +1222,12 @@ class CustomPerturbation(perturb.Perturbation): raise ValueError("`prob` must be a float value between 0 and 1.") try: - augmentation = perturbation_types[augment_name](**augment_kwargs) + augmentation_class = perturbation_types[augment_name] + if 'global_rank' in inspect.signature(augmentation_class).parameters: + augment_kwargs['global_rank'] = global_rank + if 'world_size' in inspect.signature(augmentation_class).parameters: + augment_kwargs['world_size'] = world_size + augmentation = augmentation_class(**augment_kwargs) augmentations.append([prob, augmentation]) except KeyError: raise KeyError(f"Invalid perturbation name. Allowed values : {perturbation_types.keys()}") @@ -1028,40 +1239,38 @@ class CustomPerturbation(perturb.Perturbation): class AugmentationDataset(IterableDataset): """ A class that loads tarred audio files and cycles over the files in the dataset. - Accepts a single comma-separated JSON manifest file (in the same style as for the AudioToCharDataset/AudioToBPEDataset), as well as the path(s) to the tarball(s) containing the wav files. Each line of the manifest should contain the information for one audio file, including at least the transcript and name of the audio file within the tarball. - Valid formats for the audio_tar_filepaths argument include: (1) a single string that can be brace-expanded, e.g. 'path/to/audio.tar' or 'path/to/audio_{1..100}.tar.gz', or (2) a list of file paths that will not be brace-expanded, e.g. ['audio_1.tar', 'audio_2.tar', ...]. - Note: For brace expansion in (1), there may be cases where `{x..y}` syntax cannot be used due to shell interference. This occurs most commonly inside SLURM scripts. Therefore we provide a few equivalent replacements. Supported opening braces - { <=> (, [, < and the special tag _OP_. Supported closing braces - } <=> ), ], > and the special tag _CL_. For SLURM based tasks, we suggest the use of the special tags for ease of use. - See the WebDataset documentation for more information about accepted data and input formats. """ - def __init__(self, manifest_path: str, tar_filepaths: Union[str, List[str]], shuffle_n: int = 128): - self._manifest = collections.ASRAudioText(manifest_path, parser=parsers.make_parser([]), index_by_file_id=True) + def __init__( + self, + manifest_path: str, + tar_filepaths: Union[str, List[str]], + shuffle_n: int = 128, + rank: int = 0, + world_size: int = 1, + shard_strategy: str = "replicate", + ): + # import here to avoid circular import error + from nemo.collections.asr.data.audio_to_text import expand_sharded_filepaths - if isinstance(tar_filepaths, str): - # Replace '(' and '[' with '{' - brace_keys_open = ['(', '[', '<', '_OP_'] - for bkey in brace_keys_open: - if bkey in tar_filepaths: - tar_filepaths = tar_filepaths.replace(bkey, "{") + self._manifest = collections.ASRAudioText(manifest_path, parser=parsers.make_parser([]), index_by_file_id=True) - # Replace ')' and ']' with '}' - brace_keys_close = [')', ']', '>', '_CL_'] - for bkey in brace_keys_close: - if bkey in tar_filepaths: - tar_filepaths = tar_filepaths.replace(bkey, "}") + tar_filepaths = expand_sharded_filepaths( + tar_filepaths, shard_strategy=shard_strategy, world_size=world_size, global_rank=rank + ) if not HAVE_OMEGACONG_WEBDATASET: raise LightningNotInstalledException(self) @@ -1072,25 +1281,58 @@ def __init__(self, manifest_path: str, tar_filepaths: Union[str, List[str]], shu else: logging.info("WebDataset will not shuffle files within the tar files.") - self.audio_dataset = self.audio_dataset.rename(audio='wav', key='__key__').to_tuple('audio', 'key') - self.audio_iter = iter(self.audio_dataset) + self.audio_dataset = ( + self.audio_dataset.rename(audio='wav;ogg;flac', key='__key__') + .to_tuple('audio', 'key') + .pipe(self._loop_offsets) + ) def __len__(self): return len(self._manifest) + def _loop_offsets(self, iterator): + """This function is used to iterate through utterances with different offsets for each file. + """ + + class TarredAudioLoopOffsets: + def __init__(self, collection): + self.iterator = iterator + self.collection = collection + self.current_fn = None + self.current_bytes = None + self.offset_id = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.current_fn is None: + self.current_bytes, self.current_fn = next(self.iterator) + self.offset_id = 0 + else: + offset_list = self.collection.mapping[self.current_fn] + if len(offset_list) == self.offset_id + 1: + self.current_bytes, self.current_fn = next(self.iterator) + self.offset_id = 0 + else: + self.offset_id += 1 + + return self.current_bytes, self.current_fn, self.offset_id + + return TarredAudioLoopOffsets(self._manifest) + def __iter__(self): - return self + audio_iter = iter(self.audio_dataset) - def __next__(self): while True: try: - audio_bytes, audio_filename = next(self.audio_iter) - + audio_bytes, audio_filename, offset_id = next(audio_iter) + file_id, _ = os.path.splitext(os.path.basename(audio_filename)) + manifest_idx = self._manifest.mapping[file_id][offset_id] + manifest_entry = self._manifest[manifest_idx] + + # Convert audio bytes to IO stream for processing (for SoundFile to read) + audio_file = io.BytesIO(audio_bytes) + yield audio_file, file_id, manifest_entry except StopIteration: - self.audio_iter = iter(self.audio_dataset) - audio_bytes, audio_filename = next(self.audio_iter) - file_id, _ = os.path.splitext(os.path.basename(audio_filename)) - - # Convert audio bytes to IO stream for processing (for SoundFile to read) - audio_file = io.BytesIO(audio_bytes) - return audio_file, file_id + audio_iter = iter(self.audio_dataset) diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py index 45fabceb4a91..211ffdcdf11e 100644 --- a/nemo/utils/model_utils.py +++ b/nemo/utils/model_utils.py @@ -256,7 +256,7 @@ def resolve_validation_dataloaders(model: 'ModelPT'): ds_key = resolve_dataset_name_from_cfg(cfg.validation_ds) - if ds_key is None: + if ds_key is None or val_dl_idx < 0: logging.debug( "Could not resolve file path from provided config - {}. " "Disabling support for multi-dataloaders.".format(cfg.validation_ds) From 4d97f925b143ab19602c88ed814f3a99be666bd6 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Tue, 2 May 2023 09:10:03 -0700 Subject: [PATCH 048/512] [TTS] Add script for computing feature stats (#6508) * [TTS] Add script for computing feature stats Signed-off-by: Ryan * [TTS] Add overwrite config Signed-off-by: Ryan --------- Signed-off-by: Ryan --- .../tts/compute_feature_stats.py | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 scripts/dataset_processing/tts/compute_feature_stats.py diff --git a/scripts/dataset_processing/tts/compute_feature_stats.py b/scripts/dataset_processing/tts/compute_feature_stats.py new file mode 100644 index 000000000000..6774563810d9 --- /dev/null +++ b/scripts/dataset_processing/tts/compute_feature_stats.py @@ -0,0 +1,196 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script is to compute global and speaker-level feature statistics for a given TTS training manifest. + +This script should be run after compute_features.py as it loads the precomputed feature data. + +$ python /scripts/dataset_processing/tts/compute_feature_stats.py \ + --feature_config_path=/examples/tts/conf/features/feature_22050.yaml + --manifest_path=/manifest.json \ + --audio_dir=/audio \ + --feature_dir=/features \ + --stats_path=/feature_stats.json + +The output dictionary will contain the feature statistics for every speaker, as well as a "default" entry +with the global statistics. + +For example: + +{ + "default": { + "pitch_mean": 100.0, + "pitch_std": 50.0, + "energy_mean": 7.5, + "energy_std": 4.5 + }, + "speaker1": { + "pitch_mean": 105.0, + "pitch_std": 45.0, + "energy_mean": 7.0, + "energy_std": 5.0 + }, + "speaker2": { + "pitch_mean": 110.0, + "pitch_std": 30.0, + "energy_mean": 5.0, + "energy_std": 2.5 + } +} + +""" + +import argparse +import json +from collections import defaultdict +from pathlib import Path +from typing import List, Tuple + +import torch +from hydra.utils import instantiate +from omegaconf import OmegaConf +from tqdm import tqdm + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Compute TTS feature statistics.", + ) + parser.add_argument( + "--feature_config_path", required=True, type=Path, help="Path to feature config file.", + ) + parser.add_argument( + "--manifest_path", required=True, type=Path, help="Path to training manifest.", + ) + parser.add_argument( + "--audio_dir", required=True, type=Path, help="Path to base directory with audio data.", + ) + parser.add_argument( + "--feature_dir", required=True, type=Path, help="Path to directory where feature data was stored.", + ) + parser.add_argument( + "--feature_names", default="pitch,energy", type=str, help="Comma separated list of features to process.", + ) + parser.add_argument( + "--mask_field", + default="voiced_mask", + type=str, + help="If provided, stat computation will ignore non-masked frames.", + ) + parser.add_argument( + "--stats_path", + default=Path("feature_stats.json"), + type=Path, + help="Path to output JSON file with dataset feature statistics.", + ) + parser.add_argument( + "--overwrite", default=False, type=bool, help="Whether to overwrite the output stats file if it exists.", + ) + + args = parser.parse_args() + return args + + +def _compute_stats(values: List[torch.Tensor]) -> Tuple[float, float]: + values_tensor = torch.cat(values, dim=0) + mean = values_tensor.mean().item() + std = values_tensor.std(dim=0).item() + return mean, std + + +def main(): + args = get_args() + + feature_config_path = args.feature_config_path + manifest_path = args.manifest_path + audio_dir = args.audio_dir + feature_dir = args.feature_dir + feature_name_str = args.feature_names + mask_field = args.mask_field + stats_path = args.stats_path + overwrite = args.overwrite + + if not manifest_path.exists(): + raise ValueError(f"Manifest {manifest_path} does not exist.") + + if not audio_dir.exists(): + raise ValueError(f"Audio directory {audio_dir} does not exist.") + + if not feature_dir.exists(): + raise ValueError( + f"Feature directory {audio_dir} does not exist. " + f"Please check that the path is correct and that you ran compute_features.py" + ) + + if stats_path.exists(): + if overwrite: + print(f"Will overwrite existing stats path: {stats_path}") + else: + raise ValueError(f"Stats path already exists: {stats_path}") + + feature_config = OmegaConf.load(feature_config_path) + feature_config = instantiate(feature_config) + featurizer_dict = feature_config.featurizers + + print(f"Found featurizers for {list(featurizer_dict.keys())}.") + featurizers = featurizer_dict.values() + + feature_names = feature_name_str.split(",") + # For each feature, we have a dictionary mapping speaker IDs to a list containing all features + # for that speaker + feature_stats = {name: defaultdict(list) for name in feature_names} + + entries = read_manifest(manifest_path) + + for entry in tqdm(entries): + speaker = entry["speaker"] + + entry_dict = {} + for featurizer in featurizers: + feature_dict = featurizer.load(manifest_entry=entry, audio_dir=audio_dir, feature_dir=feature_dir) + entry_dict.update(feature_dict) + + if mask_field: + mask = entry_dict[mask_field] + else: + mask = None + + for feature_name in feature_names: + values = entry_dict[feature_name] + if mask is not None: + values = values[mask] + + feature_stat_dict = feature_stats[feature_name] + feature_stat_dict["default"].append(values) + feature_stat_dict[speaker].append(values) + + stat_dict = defaultdict(dict) + for feature_name in feature_names: + mean_key = f"{feature_name}_mean" + std_key = f"{feature_name}_std" + feature_stat_dict = feature_stats[feature_name] + for speaker_id, values in feature_stat_dict.items(): + speaker_mean, speaker_std = _compute_stats(values) + stat_dict[speaker_id][mean_key] = speaker_mean + stat_dict[speaker_id][std_key] = speaker_std + + with open(stats_path, 'w', encoding="utf-8") as stats_f: + json.dump(stat_dict, stats_f, indent=4) + + +if __name__ == "__main__": + main() From df94a0dedc0965ab82e6482a3cee85d0cc24f5c2 Mon Sep 17 00:00:00 2001 From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com> Date: Tue, 2 May 2023 16:29:43 -0400 Subject: [PATCH 049/512] Add Frame-VAD model and datasets (#6441) * add model, dataset, necessary utils and tests Signed-off-by: stevehuang52 * fix tarred data Signed-off-by: stevehuang52 * fix typo Signed-off-by: stevehuang52 * update docstring Signed-off-by: stevehuang52 * update doc Signed-off-by: stevehuang52 * update doc Signed-off-by: stevehuang52 * update pretrained model info Signed-off-by: stevehuang52 --------- Signed-off-by: stevehuang52 --- nemo/collections/asr/data/audio_to_label.py | 452 +++++++++++++++++- .../asr/data/audio_to_label_dataset.py | 87 ++++ nemo/collections/asr/models/__init__.py | 2 +- .../asr/models/classification_models.py | 312 +++++++++++- .../asr/parts/preprocessing/features.py | 4 + .../asr/parts/preprocessing/segment.py | 22 +- .../common/parts/preprocessing/collections.py | 75 ++- .../asr/test_asr_classification_model.py | 146 +++++- tests/collections/asr/test_label_datasets.py | 30 +- 9 files changed, 1100 insertions(+), 30 deletions(-) diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 4317642a8fff..7585e4d7ea4f 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -15,11 +15,11 @@ import os from typing import Dict, List, Optional, Union -import braceexpand import torch import webdataset as wd from nemo.collections.asr.data.audio_to_text import cache_datastore_manifests, expand_sharded_filepaths +from nemo.collections.asr.parts.preprocessing.features import WaveformFeaturizer from nemo.collections.asr.parts.preprocessing.segment import available_formats as valid_sf_formats from nemo.collections.common.parts.preprocessing import collections from nemo.core.classes import Dataset, IterableDataset @@ -399,7 +399,7 @@ class AudioToSpeechLabelDataset(_AudioLabelDataset): Defaults to False. is_regression_task (bool): Whether the dataset is for a regression task instead of classification. Defaults to False. - cal_labels_occurrence (bool): Wether to calculate occurrence of labels + cal_labels_occurrence (bool): Whether to calculate occurrence of labels Defaults to False. """ @@ -737,9 +737,6 @@ class TarredAudioToClassificationLabelDataset(_TarredAudioLabelDataset): is_regression_task (bool): Whether it is a regression task. Defualts to False. """ - # self.labels = labels if labels else self.collection.uniq_labels - # self.num_commands = len(self.labels) - def _collate_fn(self, batch): return _speech_collate_fn(batch, pad_id=0) @@ -856,7 +853,450 @@ def fixed_seq_collate_fn(self, batch): return _fixed_seq_collate_fn(self, batch) def sliced_seq_collate_fn(self, batch): - return _sliced_seq_collate_fn(self, batch) + raise NotImplementedError def vad_frame_seq_collate_fn(self, batch): return _vad_frame_seq_collate_fn(self, batch) + + +class AudioToMultiLabelDataset(Dataset): + """ + Dataset that loads a json file containing paths to audio files, durations (in seconds), and a sequence of labels. + Each new line is a different sample. Example below: + {"audio_filepath": "/path/to/audio_wav_0.wav", "duration": time_in_sec_0, "label": \ + "0 1 1 0 1", "offset": offset_in_sec_0} + ... + {"audio_filepath": "/path/to/audio_wav_n.wav", "duration": time_in_sec_n, "label": \ + "0 1 0 0 1", "offset": offset_in_sec_n} + Args: + manifest_filepath (Union[str, List[str]]): Path to manifest json as described above. Can + be comma-separated paths. + labels (Optional[list]): String containing all the possible labels to map to + if None then automatically picks from ASRSpeechLabel collection. + min_duration (float): Dataset parameter. + All training files which have a duration less than min_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to 0.1. + max_duration (float): Dataset parameter. + All training files which have a duration more than max_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to None. + trim (bool): Whether to use trim silence from beginning and end + of audio signal using librosa.effects.trim(). + Defaults to False. + window_length_in_sec (float): length of window/slice (in seconds) + Use this for speaker recognition and VAD tasks. + shift_length_in_sec (float): amount of shift of window for generating the frame for VAD task in a batch + Use this for VAD task during inference. + normalize_audio (bool): Whether to normalize audio signal. + Defaults to False. + is_regression_task (bool): Whether the dataset is for a regression task instead of classification. + Defaults to False. + cal_labels_occurrence (bool): Whether to calculate occurrence of labels + Defaults to False. + delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None. + normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False. + normalize_audio_db_target (float): Target db to normalize audio signal, default to -20. + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + + output_types = { + 'audio_signal': NeuralType( + ('B', 'T'), + AudioSignal(freq=self._sample_rate) + if self is not None and hasattr(self, '_sample_rate') + else AudioSignal(), + ), + 'a_sig_length': NeuralType(tuple('B'), LengthsType()), + } + + if self.is_regression_task: + output_types.update( + { + 'targets': NeuralType(tuple('B, T'), RegressionValuesType()), + 'targets_length': NeuralType(tuple('B'), LengthsType()), + } + ) + else: + output_types.update( + {'label': NeuralType(('B', 'T'), LabelsType()), 'label_length': NeuralType(tuple('B'), LengthsType()),} + ) + + return output_types + + def __init__( + self, + *, + manifest_filepath: Union[str, List[str]], + sample_rate: int, + labels: Optional[List[str]] = None, + int_values: bool = False, + augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None, + min_duration: Optional[float] = 0.1, + max_duration: Optional[float] = None, + trim_silence: bool = False, + is_regression_task: bool = False, + cal_labels_occurrence: Optional[bool] = False, + delimiter: Optional[str] = None, + normalize_audio_db: bool = False, + normalize_audio_db_target: float = -20.0, + ): + super().__init__() + if isinstance(manifest_filepath, str): + manifest_filepath = manifest_filepath.split(',') + + self.delimiter = delimiter + self.normalize_audio_db = normalize_audio_db + self.normalize_audio_db_target = normalize_audio_db_target + + self.collection = collections.ASRSpeechLabel( + manifests_files=manifest_filepath, + min_duration=min_duration, + max_duration=max_duration, + is_regression_task=is_regression_task, + cal_labels_occurrence=cal_labels_occurrence, + delimiter=delimiter, + ) + + self.featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=augmentor) + self.trim = trim_silence + self.is_regression_task = is_regression_task + self.id2occurrence = {} + self.labels_occurrence = None + + if not is_regression_task: + self.labels = labels if labels else self._get_label_set() + self.num_classes = len(self.labels) if self.labels is not None else 1 + self.label2id, self.id2label = {}, {} + for label_id, label in enumerate(self.labels): + self.label2id[label] = label_id + self.id2label[label_id] = label + if cal_labels_occurrence: + self.id2occurrence[label_id] = self.collection.labels_occurrence[label] + self.labels_occurrence.append(self.id2occurrence[label_id]) + + for idx in range(len(self.labels[:5])): + logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx])) + else: + self.labels = [] + self.num_classes = 1 + + def _get_label_set(self): + labels = [] + for sample in self.collection: + label_str = sample.label + if label_str: + label_str_list = label_str.split(self.delimiter) if self.delimiter else label_str.split() + labels.extend(label_str_list) + return sorted(set(labels)) + + def _label_str_to_tensor(self, label_str: str): + labels = label_str.split(self.delimiter) if self.delimiter else label_str.split() + + if self.is_regression_task: + labels = [float(s) for s in labels] + labels = torch.tensor(labels).float() + else: + labels = [self.label2id[s] for s in labels] + labels = torch.tensor(labels).long() + return labels + + def __len__(self): + return len(self.collection) + + def __getitem__(self, index): + sample = self.collection[index] + + offset = sample.offset + + if offset is None: + offset = 0 + + features = self.featurizer.process( + sample.audio_file, + offset=offset, + duration=sample.duration, + trim=self.trim, + normalize_db=self.normalize_audio_db, + normalize_db_target=self.normalize_audio_db_target, + ) + + f, fl = features, torch.tensor(features.size(0)).long() + + t = self._label_str_to_tensor(sample.label) + + tl = torch.tensor(t.size(0)).long() + + return f, fl, t, tl + + def _collate_fn(self, batch): + return _speech_collate_fn(batch, pad_id=0) + + +class TarredAudioToMultiLabelDataset(IterableDataset): + """ + A similar Dataset to the AudioToMultiLabelDataset, but which loads tarred audio files. + + Accepts a single comma-separated JSON manifest file (in the same style as for the AudioToSpeechLabelDataset), + as well as the path(s) to the tarball(s) containing the wav files. Each line of the manifest should + contain the information for one audio file, including at least the transcript and name of the audio + file within the tarball. + + Valid formats for the audio_tar_filepaths argument include: + (1) a single string that can be brace-expanded, e.g. 'path/to/audio.tar' or 'path/to/audio_{1..100}.tar.gz', or + (2) a list of file paths that will not be brace-expanded, e.g. ['audio_1.tar', 'audio_2.tar', ...]. + + See the WebDataset documentation for more information about accepted data and input formats. + + If using multiple processes the number of shards should be divisible by the number of workers to ensure an + even split among workers. If it is not divisible, logging will give a warning but training will proceed. + In addition, if using mutiprocessing, each shard MUST HAVE THE SAME NUMBER OF ENTRIES after filtering + is applied. We currently do not check for this, but your program may hang if the shards are uneven! + + Notice that a few arguments are different from the AudioToBPEDataset; for example, shuffle (bool) has been + replaced by shuffle_n (int). + + Additionally, please note that the len() of this DataLayer is assumed to be the length of the manifest + after filtering. An incorrect manifest length may lead to some DataLoader issues down the line. + + Args: + audio_tar_filepaths: Either a list of audio tarball filepaths, or a + string (can be brace-expandable). + manifest_filepath (str): Path to the manifest. + labels (list): Dataset parameter. + List of target classes that can be output by the speaker recognition model. + shuffle_n (int): How many samples to look ahead and load to be shuffled. + See WebDataset documentation for more details. + Defaults to 0. + min_duration (float): Dataset parameter. + All training files which have a duration less than min_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to 0.1. + max_duration (float): Dataset parameter. + All training files which have a duration more than max_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to None. + trim(bool): Whether to use trim silence from beginning and end + of audio signal using librosa.effects.trim(). + Defaults to False. + window_length_in_sec (float): time length of window/slice (in seconds) # Pass this only for speaker recognition and VAD task + shift_length_in_sec (float): amount of shift of window for generating the frame for VAD task. in a batch # Pass this only for VAD task during inference. + normalize_audio (bool): Whether to normalize audio signal. Defaults to False. + shard_strategy (str): Tarred dataset shard distribution strategy chosen as a str value during ddp. + - `scatter`: The default shard strategy applied by WebDataset, where each node gets + a unique set of shards, which are permanently pre-allocated and never changed at runtime. + - `replicate`: Optional shard strategy, where each node gets all of the set of shards + available in the tarred dataset, which are permanently pre-allocated and never changed at runtime. + The benefit of replication is that it allows each node to sample data points from the entire + dataset independently of other nodes, and reduces dependence on value of `shuffle_n`. + + .. warning:: + Replicated strategy allows every node to sample the entire set of available tarfiles, + and therefore more than one node may sample the same tarfile, and even sample the same + data points! As such, there is no assured guarantee that all samples in the dataset will be + sampled at least once during 1 epoch. Scattered strategy, on the other hand, on specific + occasions (when the number of shards is not divisible with ``world_size``), will not sample + the entire dataset. For these reasons it is not advisable to use tarred datasets as validation + or test datasets. + global_rank (int): Worker rank, used for partitioning shards. Defaults to 0. + world_size (int): Total number of processes, used for partitioning shards. Defaults to 0. + delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None. + normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False. + normalize_audio_db_target (float): Target db to normalize audio signal, default to -20. + """ + + def __init__( + self, + *, + audio_tar_filepaths: Union[str, List[str]], + manifest_filepath: Union[str, List[str]], + sample_rate: int, + labels: Optional[List[str]] = None, + shuffle_n: int = 0, + int_values: bool = False, + augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None, + min_duration: Optional[float] = 0.1, + max_duration: Optional[float] = None, + trim_silence: bool = False, + is_regression_task: bool = False, + shard_strategy: str = "scatter", + global_rank: int = 0, + world_size: int = 0, + delimiter: Optional[str] = None, + normalize_audio_db: bool = False, + normalize_audio_db_target: float = -20.0, + ): + super().__init__() + if isinstance(manifest_filepath, str): + manifest_filepath = manifest_filepath.split(',') + + self.trim = trim_silence + self.is_regression_task = is_regression_task + self.delimiter = delimiter + self.normalize_audio_db = normalize_audio_db + self.normalize_audio_db_target = normalize_audio_db_target + + self.collection = collections.ASRSpeechLabel( + manifests_files=manifest_filepath, + min_duration=min_duration, + max_duration=max_duration, + is_regression_task=is_regression_task, + index_by_file_id=True, + ) + self.file_occurence = count_occurence(self.collection.mapping) + + self.featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=augmentor) + + if not is_regression_task: + self.labels = labels if labels else self._get_label_set() + self.num_classes = len(self.labels) if self.labels is not None else 1 + self.label2id, self.id2label = {}, {} + for label_id, label in enumerate(self.labels): + self.label2id[label] = label_id + self.id2label[label_id] = label + for idx in range(len(self.labels[:5])): + logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx])) + else: + self.labels = [] + self.num_classes = 1 + + audio_tar_filepaths = expand_sharded_filepaths( + sharded_filepaths=audio_tar_filepaths, + shard_strategy=shard_strategy, + world_size=world_size, + global_rank=global_rank, + ) + # Put together WebDataset + self._dataset = wd.WebDataset(urls=audio_tar_filepaths, nodesplitter=None) + + if shuffle_n > 0: + self._dataset = self._dataset.shuffle(shuffle_n) + else: + logging.info("WebDataset will not shuffle files within the tar files.") + + self._dataset = ( + self._dataset.rename(audio=VALID_FILE_FORMATS, key='__key__') + .to_tuple('audio', 'key') + .pipe(self._filter) + .map(f=self._build_sample) + ) + + def _get_label_set(self): + labels = [] + for sample in self.collection: + label_str = sample.label + if label_str: + label_str_list = label_str.split(self.delimiter) if self.delimiter else label_str.split() + labels.extend(label_str_list) + return sorted(set(labels)) + + def _label_str_to_tensor(self, label_str: str): + labels = label_str.split(self.delimiter) if self.delimiter else label_str.split() + + if self.is_regression_task: + labels = [float(s) for s in labels] + labels = torch.tensor(labels).float() + else: + labels = [self.label2id[s] for s in labels] + labels = torch.tensor(labels).long() + return labels + + def _filter(self, iterator): + """This function is used to remove samples that have been filtered out by ASRSpeechLabel already. + Otherwise, we would get a KeyError as _build_sample attempts to find the manifest entry for a sample + that was filtered out (e.g. for duration). + Note that if using multi-GPU training, filtering may lead to an imbalance in samples in each shard, + which may make your code hang as one process will finish before the other. + """ + + class TarredAudioFilter: + def __init__(self, collection, file_occurence): + self.iterator = iterator + self.collection = collection + self.file_occurence = file_occurence + self._iterable = self._internal_generator() + + def __iter__(self): + self._iterable = self._internal_generator() + return self + + def __next__(self): + try: + values = next(self._iterable) + except StopIteration: + # reset generator + self._iterable = self._internal_generator() + values = next(self._iterable) + + return values + + def _internal_generator(self): + """ + WebDataset requires an Iterator, but we require an iterable that yields 1-or-more + values per value inside self.iterator. + + Therefore wrap the iterator with a generator function that will yield 1-or-more + values per sample in the iterator. + """ + for _, tup in enumerate(self.iterator): + audio_bytes, audio_filename = tup + + file_id, _ = os.path.splitext(os.path.basename(audio_filename)) + if audio_filename in self.file_occurence: + for j in range(0, self.file_occurence[file_id]): + if j == 0: + audio_filename = file_id + else: + audio_filename = file_id + "-sub" + str(j) + yield audio_bytes, audio_filename + + return TarredAudioFilter(self.collection, self.file_occurence) + + def _build_sample(self, tup): + """Builds the training sample by combining the data from the WebDataset with the manifest info. + """ + audio_bytes, audio_filename = tup + # Grab manifest entry from self.collection + file_id, _ = os.path.splitext(os.path.basename(audio_filename)) + + manifest_idx = self.collection.mapping[file_id] + manifest_entry = self.collection[manifest_idx] + + offset = manifest_entry.offset + if offset is None: + offset = 0 + + # Convert audio bytes to IO stream for processing (for SoundFile to read) + audio_filestream = io.BytesIO(audio_bytes) + features = self.featurizer.process( + audio_filestream, + offset=offset, + duration=manifest_entry.duration, + trim=self.trim, + normalize_db=self.normalize_audio_db, + normalize_db_target=self.normalize_audio_db_target, + ) + + audio_filestream.close() + + # Audio features + f, fl = features, torch.tensor(features.shape[0]).long() + + t = self._label_str_to_tensor(manifest_entry.label) + + tl = torch.tensor(t.size(0)).long() + + return f, fl, t, tl + + def __iter__(self): + return self._dataset.__iter__() + + def __len__(self): + return len(self.collection) + + def _collate_fn(self, batch): + return _speech_collate_fn(batch, pad_id=0) diff --git a/nemo/collections/asr/data/audio_to_label_dataset.py b/nemo/collections/asr/data/audio_to_label_dataset.py index 10aec0a049a6..a242308d4042 100644 --- a/nemo/collections/asr/data/audio_to_label_dataset.py +++ b/nemo/collections/asr/data/audio_to_label_dataset.py @@ -13,8 +13,11 @@ # limitations under the License. import copy +from omegaconf import DictConfig + from nemo.collections.asr.data import audio_to_label from nemo.collections.asr.data.audio_to_text_dataset import convert_to_config_list, get_chain_dataset +from nemo.collections.asr.parts.preprocessing.perturb import process_augmentations from nemo.collections.common.data.dataset import ConcatDataset @@ -217,3 +220,87 @@ def get_tarred_speech_label_dataset( datasets.append(dataset) return get_chain_dataset(datasets=datasets, ds_config=config, rank=global_rank) + + +def get_audio_multi_label_dataset(cfg: DictConfig) -> audio_to_label.AudioToMultiLabelDataset: + if "augmentor" in cfg: + augmentor = process_augmentations(cfg.augmentor) + else: + augmentor = None + + dataset = audio_to_label.AudioToMultiLabelDataset( + manifest_filepath=cfg.get("manifest_filepath"), + sample_rate=cfg.get("sample_rate"), + labels=cfg.get("labels", None), + int_values=cfg.get("int_values", False), + augmentor=augmentor, + min_duration=cfg.get("min_duration", None), + max_duration=cfg.get("max_duration", None), + trim_silence=cfg.get("trim_silence", False), + is_regression_task=cfg.get("is_regression_task", False), + cal_labels_occurrence=cfg.get("cal_labels_occurrence", False), + delimiter=cfg.get("delimiter", None), + normalize_audio_db=cfg.get("normalize_audio_db", False), + normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20), + ) + return dataset + + +def get_tarred_audio_multi_label_dataset( + cfg: DictConfig, shuffle_n: int, global_rank: int, world_size: int +) -> audio_to_label.TarredAudioToMultiLabelDataset: + + if "augmentor" in cfg: + augmentor = process_augmentations(cfg.augmentor) + else: + augmentor = None + + tarred_audio_filepaths = cfg['tarred_audio_filepaths'] + manifest_filepaths = cfg['manifest_filepath'] + datasets = [] + tarred_audio_filepaths = convert_to_config_list(tarred_audio_filepaths) + manifest_filepaths = convert_to_config_list(manifest_filepaths) + + bucketing_weights = cfg.get('bucketing_weights', None) # For upsampling buckets + if bucketing_weights: + for idx, weight in enumerate(bucketing_weights): + if not isinstance(weight, int) or weight <= 0: + raise ValueError(f"bucket weights must be positive integers") + + if len(manifest_filepaths) != len(tarred_audio_filepaths): + raise ValueError( + f"manifest_filepaths (length={len(manifest_filepaths)}) and tarred_audio_filepaths (length={len(tarred_audio_filepaths)}) need to have the same number of buckets." + ) + + for dataset_idx, (tarred_audio_filepath, manifest_filepath) in enumerate( + zip(tarred_audio_filepaths, manifest_filepaths) + ): + if len(tarred_audio_filepath) == 1: + tarred_audio_filepath = tarred_audio_filepath[0] + + dataset = audio_to_label.TarredAudioToMultiLabelDataset( + audio_tar_filepaths=tarred_audio_filepath, + manifest_filepath=manifest_filepath, + sample_rate=cfg["sample_rate"], + labels=cfg['labels'], + shuffle_n=shuffle_n, + int_values=cfg.get("int_values", False), + augmentor=augmentor, + min_duration=cfg.get('min_duration', None), + max_duration=cfg.get('max_duration', None), + trim_silence=cfg.get('trim_silence', False), + is_regression_task=cfg.get('is_regression_task', False), + delimiter=cfg.get("delimiter", None), + shard_strategy=cfg.get('tarred_shard_strategy', 'scatter'), + global_rank=global_rank, + world_size=world_size, + normalize_audio_db=cfg.get("normalize_audio_db", False), + normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20), + ) + + if bucketing_weights: + [datasets.append(dataset) for _ in range(bucketing_weights[dataset_idx])] + else: + datasets.append(dataset) + + return get_chain_dataset(datasets=datasets, ds_config=cfg, rank=global_rank) diff --git a/nemo/collections/asr/models/__init__.py b/nemo/collections/asr/models/__init__.py index b261a38436bd..a7275faea3d0 100644 --- a/nemo/collections/asr/models/__init__.py +++ b/nemo/collections/asr/models/__init__.py @@ -14,7 +14,7 @@ from nemo.collections.asr.models.asr_model import ASRModel from nemo.collections.asr.models.audio_to_audio_model import AudioToAudioModel -from nemo.collections.asr.models.classification_models import EncDecClassificationModel +from nemo.collections.asr.models.classification_models import EncDecClassificationModel, EncDecFrameClassificationModel from nemo.collections.asr.models.clustering_diarizer import ClusteringDiarizer from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE from nemo.collections.asr.models.ctc_models import EncDecCTCModel diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py index f1bc248b6afa..a7b55e49d754 100644 --- a/nemo/collections/asr/models/classification_models.py +++ b/nemo/collections/asr/models/classification_models.py @@ -17,12 +17,13 @@ import os import tempfile from abc import abstractmethod -from math import ceil +from math import ceil, floor from typing import Dict, List, Optional, Union import torch from omegaconf import DictConfig, ListConfig, OmegaConf from pytorch_lightning import Trainer +from torchmetrics import Accuracy from torchmetrics.regression import MeanAbsoluteError, MeanSquaredError from nemo.collections.asr.data import audio_to_label_dataset, feature_to_label_dataset @@ -199,7 +200,7 @@ def setup_test_data(self, test_data_config: Optional[Union[DictConfig, Dict]], u # preserve config self._update_dataset_config(dataset_name='test', config=test_data_config) - if use_feat: + if use_feat and hasattr(self, '_setup_feature_label_dataloader'): self._test_dl = self._setup_feature_label_dataloader(config=DictConfig(test_data_config)) else: self._test_dl = self._setup_dataloader_from_config(config=DictConfig(test_data_config)) @@ -834,3 +835,310 @@ def _update_decoder_config(self, labels, cfg): cfg.num_classes = 1 OmegaConf.set_struct(cfg, True) + + +class EncDecFrameClassificationModel(EncDecClassificationModel): + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"outputs": NeuralType(('B', 'T', 'C'), LogitsType())} + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + self.num_classes = len(cfg.labels) + self.eval_loop_cnt = 0 + super().__init__(cfg=cfg, trainer=trainer) + + @classmethod + def list_available_models(cls) -> Optional[List[PretrainedModelInfo]]: + results = [] + model = PretrainedModelInfo( + pretrained_model_name="vad_multilingual_frame_marblenet", + description="For details about this model, please visit https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/vad_multilingual_frame_marblenet", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/vad_multilingual_frame_marblenet/versions/1.20.0/files/vad_multilingual_frame_marblenet.nemo", + ) + results.append(model) + return results + + def _setup_metrics(self): + self._accuracy = TopKClassificationAccuracy(dist_sync_on_step=True) + self._macro_accuracy = Accuracy(num_classes=self.num_classes, average='macro', task="multiclass") + + def _setup_loss(self): + if "loss" in self.cfg: + weight = self.cfg.loss.get("weight", None) + if weight in [None, "none", "None"]: + weight = [1.0] * self.num_classes + logging.info(f"Using cross-entropy with weights: {weight}") + else: + weight = [1.0] * self.num_classes + return CrossEntropyLoss(logits_ndim=3, weight=weight) + + def _setup_dataloader_from_config(self, config: DictConfig): + OmegaConf.set_struct(config, False) + config.is_regression_task = self.is_regression_task + OmegaConf.set_struct(config, True) + shuffle = config.get('shuffle', False) + + if config.get('is_tarred', False): + if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( + 'manifest_filepath' in config and config['manifest_filepath'] is None + ): + raise ValueError( + "Could not load dataset as `manifest_filepath` is None or " + f"`tarred_audio_filepaths` is None. Provided cfg : {config}" + ) + + shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 + dataset = audio_to_label_dataset.get_tarred_audio_multi_label_dataset( + cfg=config, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, + ) + shuffle = False + if hasattr(dataset, 'collate_fn'): + collate_func = dataset.collate_fn + else: + collate_func = dataset.datasets[0].collate_fn + else: + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + raise ValueError(f"Could not load dataset as `manifest_filepath` is None. Provided cfg : {config}") + dataset = audio_to_label_dataset.get_audio_multi_label_dataset(config) + collate_func = dataset.collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config.get("batch_size", 1), + collate_fn=collate_func, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def _setup_feature_label_dataloader(self, config: DictConfig) -> torch.utils.data.DataLoader: + """ + setup dataloader for VAD inference with audio features as input + """ + + OmegaConf.set_struct(config, False) + config.is_regression_task = self.is_regression_task + OmegaConf.set_struct(config, True) + + if 'augmentor' in config: + augmentor = process_augmentations(config['augmentor']) + else: + augmentor = None + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + logging.warning(f"Could not load dataset as `manifest_filepath` is None. Provided config : {config}") + return None + + dataset = feature_to_label_dataset.get_feature_multi_label_dataset(config=config, augmentor=augmentor) + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config.get("batch_size", 1), + collate_fn=dataset.collate_fn, + drop_last=config.get('drop_last', False), + shuffle=config.get('shuffle', False), + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def get_label_masks(self, labels, labels_len): + mask = torch.arange(labels.size(1))[None, :].to(labels.device) < labels_len[:, None] + return mask.to(labels.device, dtype=bool) + + @typecheck() + def forward( + self, input_signal=None, input_signal_length=None, processed_signal=None, processed_signal_length=None + ): + has_input_signal = input_signal is not None and input_signal_length is not None + has_processed_signal = processed_signal is not None and processed_signal_length is not None + if (has_input_signal ^ has_processed_signal) == False: + raise ValueError( + f"{self} Arguments ``input_signal`` and ``input_signal_length`` are mutually exclusive " + " with ``processed_signal`` and ``processed_signal_length`` arguments." + ) + + if not has_processed_signal: + processed_signal, processed_signal_length = self.preprocessor( + input_signal=input_signal, length=input_signal_length, + ) + + # Crop or pad is always applied + if self.crop_or_pad is not None: + processed_signal, processed_signal_length = self.crop_or_pad( + input_signal=processed_signal, length=processed_signal_length + ) + # Spec augment is not applied during evaluation/testing + if self.spec_augmentation is not None and self.training: + processed_signal = self.spec_augmentation(input_spec=processed_signal, length=processed_signal_length) + encoded, encoded_len = self.encoder(audio_signal=processed_signal, length=processed_signal_length) + logits = self.decoder(encoded.transpose(1, 2)) + return logits + + # PTL-specific methods + def training_step(self, batch, batch_idx): + audio_signal, audio_signal_len, labels, labels_len = batch + logits = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) + labels, labels_len = self.reshape_labels(logits, labels, audio_signal_len, labels_len) + masks = self.get_label_masks(labels, labels_len) + + loss_value = self.loss(logits=logits, labels=labels, loss_mask=masks) + + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + + metric_logits, metric_labels = self.get_metric_logits_labels(logits, labels, masks) + self._accuracy(logits=metric_logits, labels=metric_labels) + topk_scores = self._accuracy.compute() + self._accuracy.reset() + + for top_k, score in zip(self._accuracy.top_k, topk_scores): + tensorboard_logs[f'training_batch_accuracy_top@{top_k}'] = score + + return {'loss': loss_value, 'log': tensorboard_logs} + + def validation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = 'val'): + audio_signal, audio_signal_len, labels, labels_len = batch + logits = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) + labels, labels_len = self.reshape_labels(logits, labels, audio_signal_len, labels_len) + masks = self.get_label_masks(labels, labels_len) + + loss_value = self.loss(logits=logits, labels=labels, loss_mask=masks) + + metric_logits, metric_labels = self.get_metric_logits_labels(logits, labels, masks) + + acc = self._accuracy(logits=metric_logits, labels=metric_labels) + correct_counts, total_counts = self._accuracy.correct_counts_k, self._accuracy.total_counts_k + + self._macro_accuracy.update(preds=metric_logits, target=metric_labels) + stats = self._macro_accuracy._final_state() + + return { + f'{tag}_loss': loss_value, + f'{tag}_correct_counts': correct_counts, + f'{tag}_total_counts': total_counts, + f'{tag}_acc_micro': acc, + f'{tag}_acc_stats': stats, + } + + def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0, tag: str = 'val'): + val_loss_mean = torch.stack([x[f'{tag}_loss'] for x in outputs]).mean() + correct_counts = torch.stack([x[f'{tag}_correct_counts'] for x in outputs]).sum(axis=0) + total_counts = torch.stack([x[f'{tag}_total_counts'] for x in outputs]).sum(axis=0) + + self._accuracy.correct_counts_k = correct_counts + self._accuracy.total_counts_k = total_counts + topk_scores = self._accuracy.compute() + + self._macro_accuracy.tp = torch.stack([x[f'{tag}_acc_stats'][0] for x in outputs]).sum(axis=0) + self._macro_accuracy.fp = torch.stack([x[f'{tag}_acc_stats'][1] for x in outputs]).sum(axis=0) + self._macro_accuracy.tn = torch.stack([x[f'{tag}_acc_stats'][2] for x in outputs]).sum(axis=0) + self._macro_accuracy.fn = torch.stack([x[f'{tag}_acc_stats'][3] for x in outputs]).sum(axis=0) + macro_accuracy_score = self._macro_accuracy.compute() + + self._accuracy.reset() + self._macro_accuracy.reset() + + tensorboard_log = { + f'{tag}_loss': val_loss_mean, + f'{tag}_acc_macro': macro_accuracy_score, + } + + for top_k, score in zip(self._accuracy.top_k, topk_scores): + tensorboard_log[f'{tag}_acc_micro_top@{top_k}'] = score + + self.log_dict(tensorboard_log, sync_dist=True) + return tensorboard_log + + def test_step(self, batch, batch_idx, dataloader_idx=0): + return self.validation_step(batch, batch_idx, dataloader_idx, tag='test') + + def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): + return self.multi_validation_epoch_end(outputs, dataloader_idx, tag='test') + + def reshape_labels(self, logits, labels, logits_len, labels_len): + """ + Reshape labels to match logits shape. For example, each label is expected to cover a 40ms frame, while each frme prediction from the + model covers 20ms. If labels are shorter than logits, labels are repeated, otherwise labels are folded and argmax is applied to obtain + the label of each frame. When lengths of labels and logits are not factors of each other, labels are truncated or padded with zeros. + The threshold 0.2 is used to determine whether to pad or truncate labels, where the value 0.2 is not important as in real cases the ratio + is very close to either ceil(ratio) or floor(ratio). We use 0.2 here for easier unit-testing. + Args: + logits: logits tensor with shape [B, T1, C] + labels: labels tensor with shape [B, T2] + logits_len: logits length tensor with shape [B] + labels_len: labels length tensor with shape [B] + Returns: + labels: labels tensor with shape [B, T1] + labels_len: labels length tensor with shape [B] + """ + logits_max_len = logits.size(1) + labels_max_len = labels.size(1) + batch_size = logits.size(0) + if logits_max_len < labels_max_len: + ratio = labels_max_len // logits_max_len + res = labels_max_len % logits_max_len + if ceil(ratio) - ratio < 0.2: # e.g., ratio is 1.99 + # pad labels with zeros until labels_max_len is a multiple of logits_max_len + labels = labels.cpu().tolist() + if len(labels) % ceil(ratio) != 0: + labels += [0] * (ceil(ratio) - len(labels) % ceil(ratio)) + labels = torch.tensor(labels).long().to(logits.device) + labels = labels.view(-1, ceil(ratio)).amax(1) + return self.reshape_labels(logits, labels, logits_len, labels_len) + else: + # truncate additional labels until labels_max_len is a multiple of logits_max_len + if res > 0: + labels = labels[:, :-res] + mask = labels_len > (labels_max_len - res) + labels_len = labels_len - mask * (labels_len - (labels_max_len - res)) + labels = labels.view(batch_size, ratio, -1).amax(1) + labels_len = torch.div(labels_len, ratio, rounding_mode="floor") + labels_len = torch.min(torch.cat([logits_len[:, None], labels_len[:, None]], dim=1), dim=1)[0] + return labels.contiguous(), labels_len.contiguous() + elif logits_max_len > labels_max_len: + ratio = logits_max_len / labels_max_len + res = logits_max_len % labels_max_len + if ceil(ratio) - ratio < 0.2: # e.g., ratio is 1.99 + # repeat labels for ceil(ratio) times, and DROP additional labels based on logits_max_len + labels = labels.repeat_interleave(ceil(ratio), dim=1).long() + labels = labels[:, :logits_max_len] + labels_len = labels_len * ceil(ratio) + mask = labels_len > logits_max_len + labels_len = labels_len - mask * (labels_len - logits_max_len) + else: # e.g., ratio is 2.01 + # repeat labels for floor(ratio) times, and ADD padding labels based on logits_max_len + labels = labels.repeat_interleave(floor(ratio), dim=1).long() + labels_len = labels_len * floor(ratio) + if res > 0: + labels = torch.cat([labels, labels[:, -res:]], dim=1) + # no need to update `labels_len` since we ignore additional "res" padded labels + labels_len = torch.min(torch.cat([logits_len[:, None], labels_len[:, None]], dim=1), dim=1)[0] + return labels.contiguous(), labels_len.contiguous() + else: + labels_len = torch.min(torch.cat([logits_len[:, None], labels_len[:, None]], dim=1), dim=1)[0] + return labels, labels_len + + def get_metric_logits_labels(self, logits, labels, masks): + """ + Computes valid logits and labels for metric computation. + Args: + logits: tensor of shape [B, T, C] + labels: tensor of shape [B, T] + masks: tensor of shape [B, T] + Returns: + logits of shape [N, C] + labels of shape [N,] + """ + C = logits.size(2) + logits = logits.view(-1, C) # [BxT, C] + labels = labels.view(-1).contiguous() # [BxT,] + masks = masks.view(-1) # [BxT,] + idx = masks.nonzero() # [BxT, 1] + + logits = logits.gather(dim=0, index=idx.repeat(1, 2)) + labels = labels.gather(dim=0, index=idx.view(-1)) + + return logits, labels diff --git a/nemo/collections/asr/parts/preprocessing/features.py b/nemo/collections/asr/parts/preprocessing/features.py index 302c1a40dd74..c2e84b04e981 100644 --- a/nemo/collections/asr/parts/preprocessing/features.py +++ b/nemo/collections/asr/parts/preprocessing/features.py @@ -181,6 +181,8 @@ def process( trim_hop_length=512, orig_sr=None, channel_selector=None, + normalize_db=False, + normalize_db_target=-20.0, ): audio = AudioSegment.from_file( file_path, @@ -195,6 +197,8 @@ def process( trim_hop_length=trim_hop_length, orig_sr=orig_sr, channel_selector=channel_selector, + normalize_db=normalize_db, + normalize_db_target=normalize_db_target, ) return self.process_segment(audio) diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py index 8a4f14ee6036..af6034f9af3a 100644 --- a/nemo/collections/asr/parts/preprocessing/segment.py +++ b/nemo/collections/asr/parts/preprocessing/segment.py @@ -78,6 +78,8 @@ def __init__( trim_hop_length=512, orig_sr=None, channel_selector=None, + normalize_db=False, + normalize_db_target=-20.0, ): """Create audio segment from samples. Samples are convert float32 internally, with int scaled to [-1, 1]. @@ -112,6 +114,8 @@ def __init__( self._samples = samples self._sample_rate = sample_rate self._orig_sr = orig_sr if orig_sr is not None else sample_rate + if normalize_db: + self.normalize_db(normalize_db_target) def __eq__(self, other): """Return whether two objects are equal.""" @@ -181,6 +185,8 @@ def from_file( trim_hop_length=512, orig_sr=None, channel_selector=None, + normalize_db=False, + normalize_db_target=-20.0, ): """ Load a file supported by librosa and return as an AudioSegment. @@ -201,6 +207,8 @@ def from_file( :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable of integers denoting a subset of channels. Channel selector is using zero-based indexing. If set to `None`, the original signal will be used. + :param normalize_db (bool): if true, normalize the audio signal to a target RMS value + :param normalize_db_target (float): the target RMS value in decibels :return: AudioSegment instance """ samples = None @@ -274,6 +282,8 @@ def from_file( trim_hop_length=trim_hop_length, orig_sr=orig_sr, channel_selector=channel_selector, + normalize_db=normalize_db, + normalize_db_target=normalize_db_target, ) @classmethod @@ -342,9 +352,8 @@ def from_file_list( else: # Check the dimensions match if len(a_samples) != len(samples): - # import ipdb; ipdb.set_trace() raise RuntimeError( - f'Loaded samples need to have identical length: {a_samples.shape} != {sample.shape}' + f'Loaded samples need to have identical length: {a_samples.shape} != {samples.shape}' ) # Concatenate along channel dimension @@ -394,7 +403,7 @@ def segment_from_file( audio_start = math.floor(offset * sample_rate) if audio_start > max_audio_start: raise RuntimeError( - f'Provided audio start ({audio_start_seconds} seconds = {audio_start} samples) is larger than the maximum possible ({max_audio_start})' + f'Provided audio start ({audio_start}) is larger than the maximum possible ({max_audio_start})' ) f.seek(audio_start) samples = f.read(n_segments_at_original_sr, dtype='float32') @@ -455,6 +464,13 @@ def orig_sr(self): def gain_db(self, gain): self._samples *= 10.0 ** (gain / 20.0) + def normalize_db(self, target_db=-20): + """Normalize the signal to a target RMS value in decibels. + """ + rms_db = self.rms_db + gain = target_db - rms_db + self.gain_db(gain) + def pad(self, pad_size, symmetric=False): """Add zero padding to the sample. The pad size is given in number of samples. diff --git a/nemo/collections/common/parts/preprocessing/collections.py b/nemo/collections/common/parts/preprocessing/collections.py index fb495110c22f..4616f95e1a4f 100644 --- a/nemo/collections/common/parts/preprocessing/collections.py +++ b/nemo/collections/common/parts/preprocessing/collections.py @@ -295,9 +295,7 @@ def __init__( else: data.sort(key=lambda entity: entity.duration) - logging.info( - "Filtered duration for loading collection is %f.", duration_filtered, - ) + logging.info(f"Filtered duration for loading collection is {duration_filtered / 3600: .2f} hours.") logging.info(f"Dataset loaded with {len(data)} items, total duration of {total_duration / 3600: .2f} hours.") self.uniq_labels = sorted(set(map(lambda x: x.label, data))) logging.info("# {} files loaded accounting to # {} labels".format(len(data), len(self.uniq_labels))) @@ -313,6 +311,7 @@ def __init__( manifests_files: Union[str, List[str]], is_regression_task=False, cal_labels_occurrence=False, + delimiter=None, *args, **kwargs, ): @@ -323,24 +322,27 @@ def __init__( manifests to yield items from. is_regression_task: It's a regression task. cal_labels_occurrence: whether to calculate occurence of labels. + delimiter: separator for labels strings. *args: Args to pass to `SpeechLabel` constructor. **kwargs: Kwargs to pass to `SpeechLabel` constructor. """ audio_files, durations, labels, offsets = [], [], [], [] - + all_labels = [] for item in manifest.item_iter(manifests_files, parse_func=self.__parse_item): audio_files.append(item['audio_file']) durations.append(item['duration']) if not is_regression_task: label = item['label'] + label_list = label.split() if not delimiter else label.split(delimiter) else: label = float(item['label']) + label_list = [label] labels.append(label) offsets.append(item['offset']) - + all_labels.extend(label_list) if cal_labels_occurrence: - self.labels_occurrence = collections.Counter(labels) + self.labels_occurrence = collections.Counter(all_labels) super().__init__(audio_files, durations, labels, offsets, *args, **kwargs) @@ -968,13 +970,17 @@ def get_audio_file(item: Dict, manifest_key: Union[str, List[str]]): class FeatureLabel(_Collection): """List of feature sequence and their label correspondence with preprocessing.""" - OUTPUT_TYPE = collections.namedtuple(typename='FeatureLabelEntity', field_names='feature_file label',) + OUTPUT_TYPE = collections.namedtuple(typename='FeatureLabelEntity', field_names='feature_file label duration',) def __init__( self, feature_files: List[str], labels: List[str], + durations: List[float], + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, max_number: Optional[int] = None, + do_sort_by_duration: bool = False, index_by_file_id: bool = False, ): """Instantiates feature-SequenceLabel manifest with filters and preprocessing. @@ -988,16 +994,26 @@ def __init__( output_type = self.OUTPUT_TYPE data = [] - + duration_filtered = 0.0 + total_duration = 0.0 self.uniq_labels = set() if index_by_file_id: self.mapping = {} - for feature_file, label in zip(feature_files, labels): + for feature_file, label, duration in zip(feature_files, labels, durations): + # Duration filters. + if min_duration is not None and duration < min_duration: + duration_filtered += duration + continue + + if max_duration is not None and duration > max_duration: + duration_filtered += duration + continue - data.append(output_type(feature_file, label)) + data.append(output_type(feature_file, label, duration)) self.uniq_labels |= set(label) + total_duration += duration if index_by_file_id: file_id, _ = os.path.splitext(os.path.basename(feature_file)) @@ -1007,6 +1023,14 @@ def __init__( if len(data) == max_number: break + if do_sort_by_duration: + if index_by_file_id: + logging.warning("Tried to sort dataset by duration, but cannot since index_by_file_id is set.") + else: + data.sort(key=lambda entity: entity.duration) + + logging.info(f"Filtered duration for loading collection is {duration_filtered / 2600:.2f} hours.") + logging.info(f"Dataset loaded with {len(data)} items, total duration of {total_duration / 3600: .2f} hours.") logging.info("# {} files loaded including # {} unique labels".format(len(data), len(self.uniq_labels))) super().__init__(data) @@ -1015,7 +1039,13 @@ class ASRFeatureLabel(FeatureLabel): """`FeatureLabel` collector from asr structured json files.""" def __init__( - self, manifests_files: Union[str, List[str]], max_number: Optional[int] = None, index_by_file_id: bool = False, + self, + manifests_files: Union[str, List[str]], + is_regression_task: bool = False, + cal_labels_occurrence: bool = False, + delimiter: Optional[str] = None, + *args, + **kwargs, ): """Parse lists of feature files and sequences of labels. @@ -1027,12 +1057,25 @@ def __init__( index_by_file_id: If True, saves a mapping from filename base (ID) to index in data; pass to `FeatureSequenceLabel` constructor. """ - feature_files, labels = [], [] + feature_files, labels, durations = [], [], [] + all_labels = [] for item in manifest.item_iter(manifests_files, parse_func=self._parse_item): feature_files.append(item['feature_file']) - labels.append(item['label']) + durations.append(item['duration']) + + if not is_regression_task: + label = item['label'] + label_list = label.split() if not delimiter else label.split(delimiter) + else: + label = float(item['label']) + label_list = [label] - super().__init__(feature_files, labels, max_number, index_by_file_id) + labels.append(label) + all_labels.extend(label_list) + if cal_labels_occurrence: + self.labels_occurrence = collections.Counter(all_labels) + + super().__init__(feature_files, labels, durations, *args, **kwargs) def _parse_item(self, line: str, manifest_file: str) -> Dict[str, Any]: item = json.loads(line) @@ -1046,7 +1089,7 @@ def _parse_item(self, line: str, manifest_file: str) -> Dict[str, Any]: raise ValueError( f"Manifest file has invalid json line " f"structure: {line} without proper 'feature_file' key." ) - item['feature_file'] = os.path.expanduser(item['feature_file']) + item['feature_file'] = manifest.get_full_path(audio_file=item['feature_file'], manifest_file=manifest_file) # Label. if 'label' in item: @@ -1054,7 +1097,7 @@ def _parse_item(self, line: str, manifest_file: str) -> Dict[str, Any]: else: raise ValueError(f"Manifest file has invalid json line structure: {line} without proper 'label' key.") - item = dict(feature_file=item['feature_file'], label=item['label'],) + item = dict(feature_file=item['feature_file'], label=item['label'], duration=item['duration']) return item diff --git a/tests/collections/asr/test_asr_classification_model.py b/tests/collections/asr/test_asr_classification_model.py index a543003f50f1..44125de92b3d 100644 --- a/tests/collections/asr/test_asr_classification_model.py +++ b/tests/collections/asr/test_asr_classification_model.py @@ -20,7 +20,7 @@ from omegaconf import DictConfig, ListConfig from nemo.collections.asr.data import audio_to_label -from nemo.collections.asr.models import EncDecClassificationModel, configs +from nemo.collections.asr.models import EncDecClassificationModel, EncDecFrameClassificationModel, configs from nemo.utils.config_utils import assert_dataclass_signature_match @@ -67,6 +67,49 @@ def speech_classification_model(): return model +@pytest.fixture() +def frame_classification_model(): + preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} + encoder = { + 'cls': 'nemo.collections.asr.modules.ConvASREncoder', + 'params': { + 'feat_in': 64, + 'activation': 'relu', + 'conv_mask': True, + 'jasper': [ + { + 'filters': 32, + 'repeat': 1, + 'kernel': [1], + 'stride': [1], + 'dilation': [1], + 'dropout': 0.0, + 'residual': False, + 'separable': True, + 'se': True, + 'se_context_size': -1, + } + ], + }, + } + + decoder = { + 'cls': 'nemo.collections.asr.modules.ConvASRDecoderClassification', + 'params': {'feat_in': 32, 'num_classes': 5,}, + } + + modelConfig = DictConfig( + { + 'preprocessor': DictConfig(preprocessor), + 'encoder': DictConfig(encoder), + 'decoder': DictConfig(decoder), + 'labels': ListConfig(["dummy_cls_{}".format(i + 1) for i in range(5)]), + } + ) + model = EncDecFrameClassificationModel(cfg=modelConfig) + return model + + class TestEncDecClassificationModel: @pytest.mark.unit def test_constructor(self, speech_classification_model): @@ -209,3 +252,104 @@ def test_EncDecClassificationDatasetConfig_for_AudioToSpeechLabelDataset(self): assert signatures_match assert cls_subset is None assert dataclass_subset is None + + +class TestEncDecFrameClassificationModel(TestEncDecClassificationModel): + @pytest.mark.unit + def test_reshape_labels(self, frame_classification_model): + model = frame_classification_model.eval() + + logits = torch.ones(4, 20, 2) + labels = torch.ones(4, 10) + logits_len = torch.tensor([6, 7, 8, 9]) + labels_len = torch.tensor([5, 6, 7, 8]) + labels_new, labels_len_new = model.reshape_labels( + logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len + ) + assert labels_new.size(1) == logits.size(1) + assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) + + logits = torch.ones(4, 21, 2) + labels = torch.ones(4, 10) + logits_len = torch.tensor([6, 7, 8, 9]) + labels_len = torch.tensor([5, 6, 7, 8]) + labels_new, labels_len_new = model.reshape_labels( + logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len + ) + assert labels_new.size(1) == logits.size(1) + assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) + + logits = torch.ones(4, 19, 2) + labels = torch.ones(4, 10) + logits_len = torch.tensor([6, 7, 8, 9]) + labels_len = torch.tensor([5, 6, 7, 8]) + labels_new, labels_len_new = model.reshape_labels( + logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len + ) + assert labels_new.size(1) == logits.size(1) + assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) + + logits = torch.ones(4, 20, 2) + labels = torch.ones(4, 9) + logits_len = torch.tensor([6, 7, 8, 9]) + labels_len = torch.tensor([5, 6, 7, 8]) + labels_new, labels_len_new = model.reshape_labels( + logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len + ) + assert labels_new.size(1) == logits.size(1) + assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) + + logits = torch.ones(4, 20, 2) + labels = torch.ones(4, 11) + logits_len = torch.tensor([6, 7, 8, 9]) + labels_len = torch.tensor([5, 6, 7, 8]) + labels_new, labels_len_new = model.reshape_labels( + logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len + ) + assert labels_new.size(1) == logits.size(1) + assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) + + @pytest.mark.unit + def test_EncDecClassificationDatasetConfig_for_AudioToMultiSpeechLabelDataset(self): + # ignore some additional arguments as dataclass is generic + IGNORE_ARGS = [ + 'is_tarred', + 'num_workers', + 'batch_size', + 'tarred_audio_filepaths', + 'shuffle', + 'pin_memory', + 'drop_last', + 'tarred_shard_strategy', + 'shuffle_n', + # `featurizer` is supplied at runtime + 'featurizer', + # additional ignored arguments + 'vad_stream', + 'int_values', + 'sample_rate', + 'normalize_audio', + 'augmentor', + 'bucketing_batch_size', + 'bucketing_strategy', + 'bucketing_weights', + 'delimiter', + 'normalize_audio_db', + 'normalize_audio_db_target', + 'window_length_in_sec', + 'shift_length_in_sec', + ] + + REMAP_ARGS = {'trim_silence': 'trim'} + + result = assert_dataclass_signature_match( + audio_to_label.AudioToMultiLabelDataset, + configs.EncDecClassificationDatasetConfig, + ignore_args=IGNORE_ARGS, + remap_args=REMAP_ARGS, + ) + signatures_match, cls_subset, dataclass_subset = result + + assert signatures_match + assert cls_subset is None + assert dataclass_subset is None diff --git a/tests/collections/asr/test_label_datasets.py b/tests/collections/asr/test_label_datasets.py index 0c2b54dc24e2..4cf8a1d27f3c 100644 --- a/tests/collections/asr/test_label_datasets.py +++ b/tests/collections/asr/test_label_datasets.py @@ -15,10 +15,12 @@ import os import tempfile +import numpy as np import pytest +import soundfile as sf import torch -from nemo.collections.asr.data.audio_to_label import TarredAudioToClassificationLabelDataset +from nemo.collections.asr.data.audio_to_label import AudioToMultiLabelDataset, TarredAudioToClassificationLabelDataset from nemo.collections.asr.data.feature_to_label import FeatureToLabelDataset, FeatureToSeqSpeakerLabelDataset from nemo.collections.asr.parts.preprocessing.feature_loader import ExternalFeatureLoader from nemo.collections.asr.parts.preprocessing.features import WaveformFeaturizer @@ -132,3 +134,29 @@ def test_feat_label_dataset(self): for _ in dataset: count += 1 assert count == 2 + + @pytest.mark.unit + def test_audio_multilabel_dataset(self): + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = os.path.join(tmpdir, 'manifest_input.json') + with open(manifest_path, 'w', encoding='utf-8') as fp: + for i in range(2): + audio_file = os.path.join(tmpdir, f"audio_{i}.wav") + data = np.random.normal(0, 1, 16000 * 10) + sf.write(audio_file, data, 16000) + entry = {'audio_filepath': audio_file, 'duration': 10, 'label': '0 1 0 1'} + fp.write(json.dumps(entry) + '\n') + + dataset = AudioToMultiLabelDataset(manifest_filepath=manifest_path, sample_rate=16000, labels=['0', '1']) + + correct_label = torch.tensor([0, 1, 0, 1]) + correct_label_length = torch.tensor(4) + + assert dataset[0][0].shape == torch.tensor([0.1] * 160000).shape + assert torch.equal(dataset[0][2], correct_label) + assert torch.equal(dataset[0][3], correct_label_length) + + count = 0 + for _ in dataset: + count += 1 + assert count == 2 From bed428875f93931c12a6287827484d8133a3b6a6 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Tue, 2 May 2023 13:57:38 -0700 Subject: [PATCH 050/512] Support dynamic length batches with GPT SFT (#6510) * Support synamic length with GPT SFT Signed-off-by: Abhinav Khattar * make branch functional Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar --- .../language_modeling/megatron_gpt_model.py | 120 ++++++++---------- .../megatron_gpt_sft_model.py | 65 +++++++++- .../nlp/modules/common/megatron/utils.py | 16 ++- 3 files changed, 126 insertions(+), 75 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index d8f90c500182..8defb94fd3c1 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -330,6 +330,56 @@ def forward(self, tokens, text_position_ids, attention_mask, labels): output_tensor = self.model(tokens, text_position_ids, attention_mask, labels=labels) return output_tensor + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.concat(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + loss_sum_tensors_list = [ + loss_sum['loss_sum_and_ub_size'] + for loss_sum in losses_reduced_per_micro_batch + if loss_sum['loss_sum_and_ub_size'][1] > 0 + ] + loss_sum = ( + torch.vstack(loss_sum_tensors_list).sum(axis=0) + if len(loss_sum_tensors_list) > 0 + else torch.tensor([0.0, 0.0]).cuda() + ) + return loss_sum + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean + def training_step(self, dataloader_iter, batch_idx): """ We pass the dataloader iterator function to the micro-batch scheduler. @@ -358,34 +408,7 @@ def training_step(self, dataloader_iter, batch_idx): for param in module.embedding.parameters(): param.data_ptr() - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=False, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - - # only the last stages of the pipeline return losses - if losses_reduced_per_micro_batch: - # average loss across micro batches - loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.concat(loss_tensors_list) - loss_mean = loss_tensor.mean() - else: - loss_mean = torch.tensor(0.0).cuda() + loss_mean = self.fwd_bwd_step(dataloader_iter, batch_idx, False) # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): @@ -642,46 +665,7 @@ def validation_step(self, dataloader_iter, batch_idx): The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] - - # run forward passes for an entire global batch - # we do this inside validation_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(validation_step=True), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=True, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - - # only the last stage of the pipeline returns losses - if losses_reduced_per_micro_batch: - if self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] - return torch.concat(loss_tensors_list).mean() - else: - # Get the total loss since micro batches sizes are not uniform - loss_sum_tensors_list = [ - loss_sum['loss_sum_and_ub_size'] - for loss_sum in losses_reduced_per_micro_batch - if loss_sum['loss_sum_and_ub_size'][1] > 0 - ] - loss_sum = ( - torch.vstack(loss_sum_tensors_list).sum(axis=0) - if len(loss_sum_tensors_list) > 0 - else torch.tensor([0.0, 0.0]).cuda() - ) - return loss_sum - else: - # we're not on the last pipeline stage so no losses - return [] + return self.fwd_bwd_step(dataloader_iter, batch_idx, True) def validation_epoch_end(self, outputs): if parallel_state.is_pipeline_last_stage(): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index d77256c0d813..56a4496b800b 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -23,14 +23,21 @@ get_datasets_weights_and_num_samples, ) from nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset import BlendableDataset -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingSampler from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset +from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( + MegatronPretrainingBatchSampler, +) from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.utils import get_iterator_k_split from nemo.collections.nlp.modules.common.text_generation_utils import LengthParam, SamplingParam, megatron_gpt_generate from nemo.utils import AppState, logging try: - from apex.transformer.pipeline_parallel.utils import _reconfigure_microbatch_calculator + from apex.transformer.pipeline_parallel.utils import ( + _reconfigure_microbatch_calculator, + get_micro_batch_size, + get_num_microbatches, + ) HAVE_APEX = True except (ImportError, ModuleNotFoundError): @@ -38,6 +45,7 @@ try: from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func HAVE_MEGATRON_CORE = True @@ -237,6 +245,7 @@ def _build_dataset(self, data_cfg, is_train=True): ), answer_only_loss=self.cfg.get('answer_only_loss', True), truncation_field=data_cfg.get('truncation_field', 'context'), + pad_to_max_length=False, index_mapping_dir=data_cfg.get('index_mapping_dir', None), prompt_template=data_cfg.get('prompt_template', None), ) @@ -264,6 +273,56 @@ def _determine_log_key(self, data_config, dataloader_idx, metric_name, mode): else: return base_key + f"dataloader{dataloader_idx}" + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + batch = next(dataloader_iter) + _, seq_length = batch['tokens'].shape + tensor_shape = [seq_length, get_micro_batch_size(), self.cfg.hidden_size] + data_iter = get_iterator_k_split(batch, get_num_microbatches()) + + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=data_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.concat(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + loss_sum_tensors_list = [ + loss_sum['loss_sum_and_ub_size'] + for loss_sum in losses_reduced_per_micro_batch + if loss_sum['loss_sum_and_ub_size'][1] > 0 + ] + loss_sum = ( + torch.vstack(loss_sum_tensors_list).sum(axis=0) + if len(loss_sum_tensors_list) > 0 + else torch.tensor([0.0, 0.0]).cuda() + ) + return loss_sum + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean + def validation_step(self, dataloader_iter, batch_idx, dataloader_idx=0): return self.inference_step(dataloader_iter, batch_idx, 'validation', dataloader_idx) @@ -561,7 +620,7 @@ def build_data_loader(self, dataset, data_cfg, consumed_samples=0): else: collate_fn = dataset.collate_fn - batch_sampler = MegatronPretrainingSampler( + batch_sampler = MegatronPretrainingBatchSampler( total_samples=len(dataset), consumed_samples=consumed_samples, micro_batch_size=data_cfg.micro_batch_size, diff --git a/nemo/collections/nlp/modules/common/megatron/utils.py b/nemo/collections/nlp/modules/common/megatron/utils.py index 696f4c257822..8ef46c10d49b 100644 --- a/nemo/collections/nlp/modules/common/megatron/utils.py +++ b/nemo/collections/nlp/modules/common/megatron/utils.py @@ -368,8 +368,16 @@ def get_all_params_for_weight_decay_optimization( return ({'params': weight_decay_params},) -def get_iterator_k_split(batch: List[torch.Tensor], microbatches: int) -> Iterator: - assert batch[0].shape[0] % microbatches == 0, "Issue with batch size configuration!" - split_batch = [torch.tensor_split(item, microbatches, dim=0) for item in batch] - microbatches = [[elem[i] for elem in split_batch] for i in range(microbatches)] +def get_iterator_k_split(batch: List[torch.Tensor], num_microbatches: int) -> Iterator: + if isinstance(batch, dict): + items = list(batch.items()) + assert items[0][1].shape[0] % num_microbatches == 0, "Issue with batch size configuration!" + split_batch = [torch.tensor_split(item[1], num_microbatches, dim=0) for item in items] + microbatches = [[(items[i][0], split_batch[i][j]) for i in range(len(items))] for j in range(num_microbatches)] + microbatches = [dict(elem) for elem in microbatches] + else: + assert batch[0].shape[0] % num_microbatches == 0, "Issue with batch size configuration!" + split_batch = [torch.tensor_split(item, num_microbatches, dim=0) for item in batch] + microbatches = [[elem[i] for elem in split_batch] for i in range(num_microbatches)] + return itertools.chain(microbatches) From 025ba9ee85e61d152f96e743fdb5dc901f844451 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 17:53:35 -0700 Subject: [PATCH 051/512] added back the fast emit section to the configs. (#6540) (#6542) * added back the fast emit section to the configs. * added back the fast emit section to the configs. --------- Signed-off-by: Vahid Co-authored-by: Vahid Noroozi --- .../fastconformer_transducer_bpe_streaming.yaml | 6 ++++++ .../fastconformer_transducer_char_streaming.yaml | 6 ++++++ .../fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml | 6 ++++++ .../fastconformer_hybrid_transducer_ctc_char_streaming.yaml | 6 ++++++ .../fastconformer_hybrid_transducer_ctc_bpe.yaml | 6 ++++++ .../fastconformer_hybrid_transducer_ctc_char.yaml | 6 ++++++ 6 files changed, 36 insertions(+) diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml index 94e8d56d6e5b..5f223061a420 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml @@ -189,6 +189,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml index 71fbad88aeb2..68a78ba60aac 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml @@ -194,6 +194,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml index 619a608fa86f..8b7a2ce7b39d 100644 --- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml +++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml @@ -204,6 +204,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml index fc3176485b34..a24829b50788 100644 --- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml +++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml @@ -209,6 +209,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw diff --git a/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_bpe.yaml b/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_bpe.yaml index 3b7d37a39bc6..ba68eb34513b 100644 --- a/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_bpe.yaml +++ b/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_bpe.yaml @@ -192,6 +192,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw diff --git a/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_char.yaml b/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_char.yaml index c87a66227043..5e41d73b9261 100644 --- a/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_char.yaml +++ b/examples/asr/conf/fastconformer/hybrid_transducer_ctc/fastconformer_hybrid_transducer_ctc_char.yaml @@ -197,6 +197,12 @@ model: loss: loss_name: "default" + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + # It also helps to improve the accuracy of the model in streaming mode + fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: name: adamw From 044a44c92ef8de0540950c38b78bc6992b3201cd Mon Sep 17 00:00:00 2001 From: Dima Rekesh Date: Tue, 2 May 2023 22:41:58 -0700 Subject: [PATCH 052/512] removing unnessary avoid_bfloat16_autocast_context (#6481) Signed-off-by: Dima Rekesh --- nemo/collections/asr/parts/submodules/subsampling.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nemo/collections/asr/parts/submodules/subsampling.py b/nemo/collections/asr/parts/submodules/subsampling.py index c10f85403b25..4358d09977fe 100644 --- a/nemo/collections/asr/parts/submodules/subsampling.py +++ b/nemo/collections/asr/parts/submodules/subsampling.py @@ -19,7 +19,6 @@ from torch.nn import LayerNorm from nemo.collections.asr.parts.submodules.causal_convs import CausalConv2D -from nemo.utils import avoid_bfloat16_autocast_context class StackingSubsampling(torch.nn.Module): @@ -265,13 +264,7 @@ def forward(self, x, lengths): ) x = x.unsqueeze(1) - if self._subsampling in ['striding', 'dw_striding']: - # added in order to prevent slowdown in torch.nn.Conv2d with bfloat16 / CUDNN v8 API - # to be removed once the above is fixed in cudnn - with avoid_bfloat16_autocast_context(): - x = self.conv(x) - else: - x = self.conv(x) + x = self.conv(x) b, c, t, f = x.size() x = self.out(x.transpose(1, 2).reshape(b, t, -1)) From 50a28fb8286729f66663c31c2f440cd7ce343e3c Mon Sep 17 00:00:00 2001 From: Dima Rekesh Date: Wed, 3 May 2023 07:53:59 -0700 Subject: [PATCH 053/512] FC models in menu (#6473) * FC models in menu Signed-off-by: Dima Rekesh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Dima Rekesh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/asr/models/ctc_bpe_models.py | 14 ++++++++++++++ nemo/collections/asr/models/rnnt_bpe_models.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py index 335035225850..a82f218d1d69 100644 --- a/nemo/collections/asr/models/ctc_bpe_models.py +++ b/nemo/collections/asr/models/ctc_bpe_models.py @@ -597,4 +597,18 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_ctc_large", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_large/versions/1.0.0/files/stt_en_fastconformer_ctc_large.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_ctc_large_ls", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large_ls", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_large_ls/versions/1.0.0/files/stt_en_fastconformer_ctc_large_ls.nemo", + ) + results.append(model) + return results diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py index fd891bdb2911..b162f2411450 100644 --- a/nemo/collections/asr/models/rnnt_bpe_models.py +++ b/nemo/collections/asr/models/rnnt_bpe_models.py @@ -246,6 +246,20 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_transducer_large", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_large/versions/1.0.0/files/stt_en_fastconformer_transducer_large.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_transducer_large_ls", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large_ls", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_large_ls/versions/1.0.0/files/stt_en_fastconformer_transducer_large_ls.nemo", + ) + results.append(model) + return results def __init__(self, cfg: DictConfig, trainer: Trainer = None): From 4d3d58a8ba8e2407b7676471d9eb3639fc7b532a Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Wed, 3 May 2023 08:27:38 -0700 Subject: [PATCH 054/512] [TTS] Add tutorials for FastPitch TTS speaker adaptation with adapters (#6431) * Add tts adapter tutorial Signed-off-by: hsiehjackson * Update main tutorial Signed-off-by: hsiehjackson * Add tts adapter tutorial Signed-off-by: hsiehjackson * Update main tutorial Signed-off-by: hsiehjackson * Update tutorial Signed-off-by: hsiehjackson * Follow comments Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow comments Signed-off-by: hsiehjackson * Fix load .nemo error Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Support multi-speaker fine-tune Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow comments Signed-off-by: hsiehjackson * Use .nemo Signed-off-by: hsiehjackson * Follow Comments Signed-off-by: hsiehjackson * Fix bug Signed-off-by: hsiehjackson * Fix bug Signed-off-by: hsiehjackson * Fix bug Signed-off-by: hsiehjackson * Add precomputed speaker emb Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix space Signed-off-by: hsiehjackson * Remove repeated argument Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * optional batch size Signed-off-by: hsiehjackson * Fix comments in notebook Signed-off-by: hsiehjackson --------- Signed-off-by: hsiehjackson Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../conf/fastpitch_align_44100_adapter.yaml | 11 +- nemo/collections/tts/models/fastpitch.py | 3 + nemo/collections/tts/modules/fastpitch.py | 16 +- nemo/collections/tts/modules/submodules.py | 22 +- .../tts/extract_sup_data.py | 2 +- .../tts/resynthesize_dataset.py | 11 + .../tts/FastPitch_Adapter_Finetuning.ipynb | 827 ++++++++++++++++++ .../FastPitch_MultiSpeaker_Pretraining.ipynb | 735 ++++++++++++++++ 8 files changed, 1613 insertions(+), 14 deletions(-) create mode 100644 tutorials/tts/FastPitch_Adapter_Finetuning.ipynb create mode 100644 tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb diff --git a/examples/tts/conf/fastpitch_align_44100_adapter.yaml b/examples/tts/conf/fastpitch_align_44100_adapter.yaml index bac6a64b06e9..b2957b057d28 100644 --- a/examples/tts/conf/fastpitch_align_44100_adapter.yaml +++ b/examples/tts/conf/fastpitch_align_44100_adapter.yaml @@ -208,7 +208,7 @@ model: dropatt: 0.1 dropemb: 0.0 d_embed: ${model.symbols_embedding_dim} - condition_types: [ "add", "layernorm" ] # options: [ "add", "cat", "layernorm" ] + condition_types: [ "add", "layernorm" ] # options: [ "add", "concat", "layernorm" ] output_fft: _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder @@ -221,12 +221,12 @@ model: dropout: 0.1 dropatt: 0.1 dropemb: 0.0 - condition_types: [ "add", "layernorm" ] # options: [ "add", "cat", "layernorm" ] + condition_types: [ "add", "layernorm" ] # options: [ "add", "concat", "layernorm" ] alignment_module: _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder n_text_channels: ${model.symbols_embedding_dim} - condition_types: [ "add" ] # options: [ "add", "cat" ] + condition_types: [ "add" ] # options: [ "add", "concat" ] duration_predictor: _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor @@ -235,7 +235,7 @@ model: filter_size: 256 dropout: 0.1 n_layers: 2 - condition_types: [ "add", "layernorm" ] # options: [ "add", "cat", "layernorm" ] + condition_types: [ "add", "layernorm" ] # options: [ "add", "concat", "layernorm" ] pitch_predictor: _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor @@ -244,10 +244,11 @@ model: filter_size: 256 dropout: 0.1 n_layers: 2 - condition_types: [ "add", "layernorm" ] # options: [ "add", "cat", "layernorm" ] + condition_types: [ "add", "layernorm" ] # options: [ "add", "concat", "layernorm" ] speaker_encoder: _target_: nemo.collections.tts.modules.submodules.SpeakerEncoder + precomputed_embedding_dim: null lookup_module: _target_: nemo.collections.tts.modules.submodules.SpeakerLookupTable n_speakers: ??? diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py index 5502e69a3111..28185c8f8622 100644 --- a/nemo/collections/tts/models/fastpitch.py +++ b/nemo/collections/tts/models/fastpitch.py @@ -183,6 +183,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): if self.fastpitch.speaker_emb is not None: self.export_config["num_speakers"] = cfg.n_speakers + # Adapter modules setup (from FastPitchAdapterModelMixin) + self.setup_adapters() + def _get_default_text_tokenizer_conf(self): text_tokenizer: TextTokenizerConfig = TextTokenizerConfig() return OmegaConf.create(OmegaConf.to_yaml(text_tokenizer)) diff --git a/nemo/collections/tts/modules/fastpitch.py b/nemo/collections/tts/modules/fastpitch.py index 5f2227a999db..77dff7bc85ed 100644 --- a/nemo/collections/tts/modules/fastpitch.py +++ b/nemo/collections/tts/modules/fastpitch.py @@ -177,7 +177,6 @@ def __init__( self.learn_alignment = aligner is not None self.use_duration_predictor = True self.binarize = False - # TODO: combine self.speaker_emb with self.speaker_encoder # cfg: remove `n_speakers`, create `speaker_encoder.lookup_module` # state_dict: move `speaker_emb.weight` to `speaker_encoder.lookup_module.table.weight` @@ -244,10 +243,10 @@ def output_types(self): "energy_tgt": NeuralType(('B', 'T_audio'), RegressionValuesType()), } - def get_speaker_embedding(self, speaker, reference_spec, reference_spec_lens): + def get_speaker_embedding(self, batch_size, speaker, reference_spec, reference_spec_lens): """spk_emb: Bx1xD""" if self.speaker_encoder is not None: - spk_emb = self.speaker_encoder(speaker, reference_spec, reference_spec_lens).unsqueeze(1) + spk_emb = self.speaker_encoder(batch_size, speaker, reference_spec, reference_spec_lens).unsqueeze(1) elif self.speaker_emb is not None: if speaker is None: raise ValueError('Please give speaker id to get lookup speaker embedding.') @@ -281,7 +280,10 @@ def forward( # Calculate speaker embedding spk_emb = self.get_speaker_embedding( - speaker=speaker, reference_spec=reference_spec, reference_spec_lens=reference_spec_lens, + batch_size=text.shape[0], + speaker=speaker, + reference_spec=reference_spec, + reference_spec_lens=reference_spec_lens, ) # Input FFT @@ -379,10 +381,12 @@ def infer( reference_spec=None, reference_spec_lens=None, ): - # Calculate speaker embedding spk_emb = self.get_speaker_embedding( - speaker=speaker, reference_spec=reference_spec, reference_spec_lens=reference_spec_lens, + batch_size=text.shape[0], + speaker=speaker, + reference_spec=reference_spec, + reference_spec_lens=reference_spec_lens, ) # Input FFT diff --git a/nemo/collections/tts/modules/submodules.py b/nemo/collections/tts/modules/submodules.py index dbf26f1ceeee..6efccf18eeea 100644 --- a/nemo/collections/tts/modules/submodules.py +++ b/nemo/collections/tts/modules/submodules.py @@ -709,18 +709,29 @@ class SpeakerEncoder represents speakers representation. This module can combine GST (global style token) based speaker embeddings and lookup table speaker embeddings. """ - def __init__(self, lookup_module=None, gst_module=None): + def __init__(self, lookup_module=None, gst_module=None, precomputed_embedding_dim=None): """ lookup_module: Torch module to get lookup based speaker embedding gst_module: Neural module to get GST based speaker embedding + precomputed_embedding_dim: Give precomputed speaker embedding dimension to use precompute speaker embedding """ super(SpeakerEncoder, self).__init__() + + # Multi-speaker embedding self.lookup_module = lookup_module + + # Reference speaker embedding self.gst_module = gst_module + if precomputed_embedding_dim is not None: + self.precomputed_emb = torch.nn.Parameter(torch.empty(precomputed_embedding_dim)) + else: + self.precomputed_emb = None + @property def input_types(self): return { + "batch_size": NeuralType(optional=True), "speaker": NeuralType(('B'), Index(), optional=True), "reference_spec": NeuralType(('B', 'D', 'T_spec'), MelSpectrogramType(), optional=True), "reference_spec_lens": NeuralType(('B'), LengthsType(), optional=True), @@ -732,9 +743,16 @@ def output_types(self): "embs": NeuralType(('B', 'D'), EncodedRepresentation()), } - def forward(self, speaker=None, reference_spec=None, reference_spec_lens=None): + def overwrite_precomputed_emb(self, emb): + self.precomputed_emb = torch.nn.Parameter(emb) + + def forward(self, batch_size=None, speaker=None, reference_spec=None, reference_spec_lens=None): embs = None + # Get Precomputed speaker embedding + if self.precomputed_emb is not None: + return self.precomputed_emb.unsqueeze(0).repeat(batch_size, 1) + # Get Lookup table speaker embedding if self.lookup_module is not None and speaker is not None: embs = self.lookup_module(speaker) diff --git a/scripts/dataset_processing/tts/extract_sup_data.py b/scripts/dataset_processing/tts/extract_sup_data.py index 57fa220a733c..9a5dcc223444 100644 --- a/scripts/dataset_processing/tts/extract_sup_data.py +++ b/scripts/dataset_processing/tts/extract_sup_data.py @@ -31,7 +31,7 @@ def get_pitch_stats(pitch_list): def preprocess_ds_for_fastpitch_align(dataloader): pitch_list = [] for batch in tqdm(dataloader, total=len(dataloader)): - audios, audio_lengths, tokens, tokens_lengths, align_prior_matrices, pitches, pitches_lengths = batch + audios, audio_lengths, tokens, tokens_lengths, align_prior_matrices, pitches, pitches_lengths, *_ = batch pitch = pitches.squeeze(0) pitch_list.append(pitch[pitch != 0]) diff --git a/scripts/dataset_processing/tts/resynthesize_dataset.py b/scripts/dataset_processing/tts/resynthesize_dataset.py index cacd41e93109..652fde299572 100644 --- a/scripts/dataset_processing/tts/resynthesize_dataset.py +++ b/scripts/dataset_processing/tts/resynthesize_dataset.py @@ -117,6 +117,15 @@ def resynthesize_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]: batch = to_device_recursive(batch, self.device) mels, mel_lens = self.model.preprocessor(input_signal=batch["audio"], length=batch["audio_lens"]) + + reference_audio = batch.get("reference_audio", None) + reference_audio_len = batch.get("reference_audio_lens", None) + reference_spec, reference_spec_len = None, None + if reference_audio is not None: + reference_spec, reference_spec_len = self.model.preprocessor( + input_signal=reference_audio, length=reference_audio_len + ) + outputs_tuple = self.model.forward( text=batch["text"], durs=None, @@ -127,6 +136,8 @@ def resynthesize_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]: attn_prior=batch.get("attn_prior"), mel_lens=mel_lens, input_lens=batch["text_lens"], + reference_spec=reference_spec, + reference_spec_lens=reference_spec_len, ) names = self.model.fastpitch.output_types.keys() return {"spec": mels, "mel_lens": mel_lens, **dict(zip(names, outputs_tuple))} diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb new file mode 100644 index 000000000000..fa1b1bdc90c8 --- /dev/null +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -0,0 +1,827 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ea49c0e5", + "metadata": {}, + "source": [ + "# FastPitch Adapter Finetuning\n", + "\n", + "This notebook is designed to provide a guide on how to run FastPitch Adapter Finetuning Pipeline. It contains the following sections:\n", + "1. **Fine-tune FastPitch on adaptation data**: fine-tune pre-trained multi-speaker FastPitch for a new speaker\n", + "* Dataset Preparation: download dataset and extract manifest files. (duration more than 15 mins)\n", + "* Preprocessing: add absolute audio paths in manifest and extract Supplementary Data.\n", + "* **Model Setting: transform pre-trained checkpoint to adapter-compatible checkpoint and precompute speaker embedding**\n", + "* Training: fine-tune frozen multispeaker FastPitch with trainable adapters.\n", + "2. **Fine-tune HiFiGAN on adaptation data**: fine-tune a vocoder for the fine-tuned multi-speaker FastPitch\n", + "* Dataset Preparation: extract mel-spectrograms from fine-tuned FastPitch.\n", + "* Training: fine-tune HiFiGAN with fine-tuned adaptation data.\n", + "3. **Inference**: generate speech from adpated FastPitch\n", + "* Load Model: load pre-trained multi-speaker FastPitch with **fine-tuned adapters**.\n", + "* Output Audio: generate audio files." + ] + }, + { + "cell_type": "markdown", + "id": "37259555", + "metadata": {}, + "source": [ + "# License\n", + "\n", + "> Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n", + "> \n", + "> Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "> you may not use this file except in compliance with the License.\n", + "> You may obtain a copy of the License at\n", + "> \n", + "> http://www.apache.org/licenses/LICENSE-2.0\n", + "> \n", + "> Unless required by applicable law or agreed to in writing, software\n", + "> distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "> See the License for the specific language governing permissions and\n", + "> limitations under the License." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d61cbea5", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "You can either run this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", + "Instructions for setting up Colab are as follows:\n", + "1. Open a new Python 3 notebook.\n", + "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", + "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", + "4. Run this cell to set up dependencies# .\n", + "\"\"\"\n", + "# # If you're using Colab and not running locally, uncomment and run this cell.\n", + "# BRANCH = 'main'\n", + "# !apt-get install sox libsndfile1 ffmpeg\n", + "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", + "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", + "\n", + "# # Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n", + "# # comment out the below lines and set `code_dir` to your local path.\n", + "code_dir = 'NeMoTTS' \n", + "!git clone https://github.com/NVIDIA/NeMo.git {code_dir}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fef9aba9", + "metadata": {}, + "outputs": [], + "source": [ + "!wandb login #PASTE_WANDB_APIKEY_HERE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49bc38ab", + "metadata": {}, + "outputs": [], + "source": [ + "# .nemo files for your pre-trained FastPitch and HiFiGAN\n", + "pretrained_fastpitch_checkpoint = \"\"\n", + "finetuned_hifigan_on_multispeaker_checkpoint = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9459f9dc", + "metadata": {}, + "outputs": [], + "source": [ + "sample_rate = 44100\n", + "# Store all manifest and audios\n", + "data_dir = 'NeMoTTS_dataset'\n", + "# Store all supplementary files\n", + "supp_dir = \"NeMoTTS_sup_data\"\n", + "# Store all training logs\n", + "logs_dir = \"NeMoTTS_logs\"\n", + "# Store all mel-spectrograms for vocoder training\n", + "mels_dir = \"NeMoTTS_mels\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb26f54d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import shutil\n", + "import nemo\n", + "import torch\n", + "import numpy as np\n", + "\n", + "from pathlib import Path\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12b28329", + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(code_dir, exist_ok=True)\n", + "code_dir = os.path.abspath(code_dir)\n", + "os.makedirs(data_dir, exist_ok=True)\n", + "data_dir = os.path.abspath(data_dir)\n", + "os.makedirs(supp_dir, exist_ok=True)\n", + "supp_dir = os.path.abspath(supp_dir)\n", + "os.makedirs(logs_dir, exist_ok=True)\n", + "logs_dir = os.path.abspath(logs_dir)\n", + "os.makedirs(mels_dir, exist_ok=True)\n", + "mels_dir = os.path.abspath(mels_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "30996769", + "metadata": {}, + "source": [ + "# 1. Fine-tune FastPitch on adaptation data" + ] + }, + { + "cell_type": "markdown", + "id": "2f5f5945", + "metadata": {}, + "source": [ + "## a. Data Preparation\n", + "For our tutorial, we use small part of VCTK dataset with a new target speaker (p267). Usually, the audios should have total duration more than 15 mintues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8047f988", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {data_dir} && wget https://vctk-subset.s3.amazonaws.com/vctk_subset.tar.gz && tar zxf vctk_subset.tar.gz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8242769", + "metadata": {}, + "outputs": [], + "source": [ + "manidir = f\"{data_dir}/vctk_subset\"\n", + "!ls {manidir}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79cf8539", + "metadata": {}, + "outputs": [], + "source": [ + "train_manifest = os.path.abspath(os.path.join(manidir, 'train.json'))\n", + "valid_manifest = os.path.abspath(os.path.join(manidir, 'dev.json'))" + ] + }, + { + "cell_type": "markdown", + "id": "35c3b97b", + "metadata": {}, + "source": [ + "## b. Preprocessing" + ] + }, + { + "cell_type": "markdown", + "id": "ba3a7c3a", + "metadata": {}, + "source": [ + "### Add absolute file path in manifest\n", + "We use absoluate path for audio_filepath to get the audio during training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bc485b5", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9cb8ef5", + "metadata": {}, + "outputs": [], + "source": [ + "train_data = read_manifest(train_manifest)\n", + "for m in train_data: m['audio_filepath'] = os.path.abspath(os.path.join(manidir, m['audio_filepath']))\n", + "write_manifest(train_manifest, train_data)\n", + "\n", + "valid_data = read_manifest(valid_manifest)\n", + "for m in valid_data: m['audio_filepath'] = os.path.abspath(os.path.join(manidir, m['audio_filepath']))\n", + "write_manifest(valid_manifest, valid_data)" + ] + }, + { + "cell_type": "markdown", + "id": "f92054d5", + "metadata": {}, + "source": [ + "### Extract Supplementary Data\n", + "\n", + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0adc618b", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} && python scripts/dataset_processing/tts/extract_sup_data.py \\\n", + " manifest_filepath={train_manifest} \\\n", + " sup_data_path={supp_dir} \\\n", + " dataset.sample_rate={sample_rate} \\\n", + " dataset.n_fft=2048 \\\n", + " dataset.win_length=2048 \\\n", + " dataset.hop_length=512" + ] + }, + { + "cell_type": "markdown", + "id": "96dd5fe1", + "metadata": {}, + "source": [ + "After running the above command line, you will observe a new folder NeMoTTS_sup_data/pitch and printouts of pitch statistics like below. Specify these values to the FastPitch training configurations. We will be there in the following section.\n", + "```bash\n", + "PITCH_MEAN=175.48513793945312, PITCH_STD=42.3786735534668\n", + "PITCH_MIN=65.4063949584961, PITCH_MAX=270.8517761230469\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23703c76", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} && python scripts/dataset_processing/tts/extract_sup_data.py \\\n", + " manifest_filepath={valid_manifest} \\\n", + " sup_data_path={supp_dir} \\\n", + " dataset.sample_rate={sample_rate} \\\n", + " dataset.n_fft=2048 \\\n", + " dataset.win_length=2048 \\\n", + " dataset.hop_length=512" + ] + }, + { + "cell_type": "markdown", + "id": "7c70e5db", + "metadata": {}, + "source": [ + "## c. Model Setting\n", + "### Transform pre-trained checkpoint to adapter-compatible checkpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "439f2f82", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.tts.models import FastPitchModel\n", + "from nemo.collections.asr.parts.preprocessing.features import WaveformFeaturizer\n", + "from nemo.core import adapter_mixins\n", + "from omegaconf import DictConfig, OmegaConf, open_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30f865cb", + "metadata": {}, + "outputs": [], + "source": [ + "def update_model_config_to_support_adapter(config) -> DictConfig:\n", + " with open_dict(config):\n", + " enc_adapter_metadata = adapter_mixins.get_registered_adapter(config.input_fft._target_)\n", + " if enc_adapter_metadata is not None:\n", + " config.input_fft._target_ = enc_adapter_metadata.adapter_class_path\n", + "\n", + " dec_adapter_metadata = adapter_mixins.get_registered_adapter(config.output_fft._target_)\n", + " if dec_adapter_metadata is not None:\n", + " config.output_fft._target_ = dec_adapter_metadata.adapter_class_path\n", + "\n", + " pitch_predictor_adapter_metadata = adapter_mixins.get_registered_adapter(config.pitch_predictor._target_)\n", + " if pitch_predictor_adapter_metadata is not None:\n", + " config.pitch_predictor._target_ = pitch_predictor_adapter_metadata.adapter_class_path\n", + "\n", + " duration_predictor_adapter_metadata = adapter_mixins.get_registered_adapter(config.duration_predictor._target_)\n", + " if duration_predictor_adapter_metadata is not None:\n", + " config.duration_predictor._target_ = duration_predictor_adapter_metadata.adapter_class_path\n", + "\n", + " aligner_adapter_metadata = adapter_mixins.get_registered_adapter(config.alignment_module._target_)\n", + " if aligner_adapter_metadata is not None:\n", + " config.alignment_module._target_ = aligner_adapter_metadata.adapter_class_path\n", + "\n", + " return config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e92910b5", + "metadata": {}, + "outputs": [], + "source": [ + "spec_model = FastPitchModel.restore_from(pretrained_fastpitch_checkpoint).eval().cuda()\n", + "spec_model.cfg = update_model_config_to_support_adapter(spec_model.cfg)" + ] + }, + { + "cell_type": "markdown", + "id": "7f03219f", + "metadata": {}, + "source": [ + "### Precompute Speaker Embedding\n", + "Get all GST speaker embeddings from training data, take average, and save as `precomputed_emb` in FastPitch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2a35241", + "metadata": {}, + "outputs": [], + "source": [ + "wave_model = WaveformFeaturizer(sample_rate=sample_rate)\n", + "train_data = read_manifest(train_manifest)\n", + "\n", + "spk_embs = [] \n", + "for data in train_data:\n", + " with torch.no_grad():\n", + " audio = wave_model.process(data['audio_filepath'])\n", + " audio_length = torch.tensor(audio.shape[0]).long()\n", + " audio = audio.unsqueeze(0).to(device=spec_model.device)\n", + " audio_length = audio_length.unsqueeze(0).to(device=spec_model.device)\n", + " spec_ref, spec_ref_lens = spec_model.preprocessor(input_signal=audio, length=audio_length)\n", + " spk_emb = spec_model.fastpitch.get_speaker_embedding(batch_size=spec_ref.shape[0],\n", + " speaker=None,\n", + " reference_spec=spec_ref,\n", + " reference_spec_lens=spec_ref_lens)\n", + "\n", + " spk_embs.append(spk_emb.squeeze().cpu())\n", + "\n", + "spk_embs = torch.stack(spk_embs, dim=0)\n", + "spk_emb = torch.mean(spk_embs, dim=0)\n", + "spk_emb_dim = spk_emb.shape[0]\n", + "\n", + "with open_dict(spec_model.cfg):\n", + " spec_model.cfg.speaker_encoder.precomputed_embedding_dim = spec_model.cfg.symbols_embedding_dim\n", + "\n", + "spec_model.fastpitch.speaker_encoder.overwrite_precomputed_emb(spk_emb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fa1b309", + "metadata": {}, + "outputs": [], + "source": [ + "spec_model.save_to('Pretrained-FastPitch.nemo')\n", + "shutil.copyfile(finetuned_hifigan_on_multispeaker_checkpoint, \"Pretrained-HifiGan.nemo\")\n", + "pretrained_fastpitch_checkpoint = os.path.abspath(\"Pretrained-FastPitch.nemo\")\n", + "finetuned_hifigan_on_multispeaker_checkpoint = os.path.abspath(\"Pretrained-HifiGan.nemo\")" + ] + }, + { + "cell_type": "markdown", + "id": "3b77e95f", + "metadata": {}, + "source": [ + "## d. Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e8c3740", + "metadata": {}, + "outputs": [], + "source": [ + "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", + "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", + "\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "PITCH_MEAN=175.48513793945312\n", + "PITCH_STD=42.3786735534668" + ] + }, + { + "cell_type": "markdown", + "id": "19bb6d8b", + "metadata": {}, + "source": [ + "### Important notes\n", + "* `+init_from_nemo_model`: initialize with a multi-speaker FastPitch checkpoint\n", + "* `model.speaker_encoder.precomputed_embedding_dim={spk_emb_dim}`: use precomputed speaker embedding\n", + "* `~model.speaker_encoder.lookup_module`: we use precomputed speaker embedding, so we remove the pre-trained looked-up speaker embedding\n", + "* `~model.speaker_encoder.gst_module`: we use precomputed speaker embedding, so we remove the pre-trained gst speaker embedding\n", + "* Other optional arguments based on your preference:\n", + " * batch_size\n", + " * exp_manager\n", + " * trainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c8cbea2", + "metadata": {}, + "outputs": [], + "source": [ + "# Normally 100 epochs\n", + "!cd {code_dir} && python examples/tts/fastpitch_finetune_adapters.py \\\n", + "--config-name=fastpitch_align_44100_adapter.yaml \\\n", + "+init_from_nemo_model={pretrained_fastpitch_checkpoint} \\\n", + "train_dataset={train_manifest} \\\n", + "validation_datasets={valid_manifest} \\\n", + "sup_data_types=\"['align_prior_matrix', 'pitch']\" \\\n", + "sup_data_path={supp_dir} \\\n", + "pitch_mean={PITCH_MEAN} \\\n", + "pitch_std={PITCH_STD} \\\n", + "model.speaker_encoder.precomputed_embedding_dim={spk_emb_dim} \\\n", + "~model.speaker_encoder.lookup_module \\\n", + "~model.speaker_encoder.gst_module \\\n", + "model.train_ds.dataloader_params.batch_size=8 \\\n", + "model.validation_ds.dataloader_params.batch_size=8 \\\n", + "model.optim.name=adam \\\n", + "model.optim.lr=2e-4 \\\n", + "~model.optim.sched \\\n", + "exp_manager.exp_dir={logs_dir} \\\n", + "+exp_manager.create_wandb_logger=True \\\n", + "+exp_manager.wandb_logger_kwargs.name=\"tutorial-FastPitch-finetune-adaptation\" \\\n", + "+exp_manager.wandb_logger_kwargs.project=\"NeMo\" \\\n", + "+exp_manager.checkpoint_callback_params.save_top_k=-1 \\\n", + "trainer.max_epochs=10 \\\n", + "trainer.check_val_every_n_epoch=10 \\\n", + "trainer.log_every_n_steps=1 \\\n", + "trainer.devices=1 \\\n", + "trainer.strategy=ddp \\\n", + "trainer.precision=32" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe5c7b2f", + "metadata": {}, + "outputs": [], + "source": [ + "# e.g. NeMoTTS_logs/FastPitch/Y-M-D_H-M-S/checkpoints/FastPitch.nemo\n", + "# e.g. NeMoTTS_logs/FastPitch/Y-M-D_H-M-S/checkpoints/adapters.pt\n", + "last_checkpoint_dir = sorted(list([i for i in (Path(logs_dir) / \"FastPitch\").iterdir() if i.is_dir()]))[-1] / \"checkpoints\"\n", + "finetuned_fastpitch_checkpoint = list(last_checkpoint_dir.glob('*.nemo'))[0]\n", + "finetuned_adapter_checkpoint = list(last_checkpoint_dir.glob('adapters.pt'))[0]\n", + "print(finetuned_fastpitch_checkpoint)\n", + "print(finetuned_adapter_checkpoint)" + ] + }, + { + "cell_type": "markdown", + "id": "75856d0e", + "metadata": {}, + "source": [ + "# 3. Fine-tune HiFiGAN on adaptation data" + ] + }, + { + "cell_type": "markdown", + "id": "3444698f", + "metadata": {}, + "source": [ + "## a. Dataset Preparation\n", + "Generate mel-spectrograms for HiFiGAN training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb2fd64d", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} \\\n", + "&& python scripts/dataset_processing/tts/resynthesize_dataset.py \\\n", + "--model-path={finetuned_fastpitch_checkpoint} \\\n", + "--input-json-manifest={train_manifest} \\\n", + "--input-sup-data-path={supp_dir} \\\n", + "--output-folder={mels_dir} \\\n", + "--device=\"cuda:0\" \\\n", + "--batch-size=1 \\\n", + "--num-workers=1 \\\n", + "&& python scripts/dataset_processing/tts/resynthesize_dataset.py \\\n", + "--model-path={finetuned_fastpitch_checkpoint} \\\n", + "--input-json-manifest={valid_manifest} \\\n", + "--input-sup-data-path={supp_dir} \\\n", + "--output-folder={mels_dir} \\\n", + "--device=\"cuda:0\" \\\n", + "--batch-size=1 \\\n", + "--num-workers=1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da69cb66", + "metadata": {}, + "outputs": [], + "source": [ + "train_manifest_mel = f\"{mels_dir}/train_mel.json\"\n", + "valid_manifest_mel = f\"{mels_dir}/dev_mel.json\"" + ] + }, + { + "cell_type": "markdown", + "id": "fa2cbb02", + "metadata": {}, + "source": [ + "## b. Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffdce5d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Normally 500 epochs\n", + "!cd {code_dir} && python examples/tts/hifigan_finetune.py \\\n", + "--config-name=hifigan_44100.yaml \\\n", + "train_dataset={train_manifest_mel} \\\n", + "validation_datasets={valid_manifest_mel} \\\n", + "+init_from_nemo_model={finetuned_hifigan_on_multispeaker_checkpoint} \\\n", + "model.train_ds.dataloader_params.batch_size=32 \\\n", + "model.optim.lr=0.0001 \\\n", + "model/train_ds=train_ds_finetune \\\n", + "model/validation_ds=val_ds_finetune \\\n", + "+trainer.max_epochs=5 \\\n", + "trainer.check_val_every_n_epoch=5 \\\n", + "trainer.devices=-1 \\\n", + "trainer.strategy='ddp' \\\n", + "trainer.precision=16 \\\n", + "exp_manager.exp_dir={logs_dir} \\\n", + "exp_manager.create_wandb_logger=True \\\n", + "exp_manager.wandb_logger_kwargs.name=\"tutorial-HiFiGAN-finetune-multispeaker\" \\\n", + "exp_manager.wandb_logger_kwargs.project=\"NeMo\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e6376cf", + "metadata": {}, + "outputs": [], + "source": [ + "# e.g. NeMoTTS_logs/HifiGan/Y-M-D_H-M-S/checkpoints/HifiGan.nemo\n", + "last_checkpoint_dir = sorted(list([i for i in (Path(logs_dir) / \"HifiGan\").iterdir() if i.is_dir()]))[-1] / \"checkpoints\"\n", + "finetuned_hifigan_on_adaptation_checkpoint = list(last_checkpoint_dir.glob('*.nemo'))[0]\n", + "finetuned_hifigan_on_adaptation_checkpoint" + ] + }, + { + "cell_type": "markdown", + "id": "e5076e51", + "metadata": {}, + "source": [ + "# 4. Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52358549", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.tts.models import HifiGanModel\n", + "import IPython.display as ipd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "id": "9e96ee13", + "metadata": {}, + "source": [ + "## a. Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cb5d524", + "metadata": {}, + "outputs": [], + "source": [ + "wave_model = WaveformFeaturizer(sample_rate=sample_rate)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32dbd30c", + "metadata": {}, + "outputs": [], + "source": [ + "# Load from pretrained FastPitch and finetuned adapter\n", + "# spec_model = FastPitchModel.restore_from(pretrained_fastpitch_checkpoint)\n", + "# spec_model.load_adapters(finetuned_adapter_checkpoint)\n", + "\n", + "# Load from finetuned FastPitch\n", + "spec_model = FastPitchModel.restore_from(finetuned_fastpitch_checkpoint)\n", + "spec_model = spec_model.eval().cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74a7ad03", + "metadata": {}, + "outputs": [], + "source": [ + "# HiFiGAN\n", + "vocoder_model = HifiGanModel.restore_from(finetuned_hifigan_on_adaptation_checkpoint).eval().cuda()" + ] + }, + { + "cell_type": "markdown", + "id": "4f882975", + "metadata": {}, + "source": [ + "## b. Output Audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2178a8ef", + "metadata": {}, + "outputs": [], + "source": [ + "def gt_spectrogram(audio_path, wave_model, spec_gen_model):\n", + " features = wave_model.process(audio_path, trim=False)\n", + " audio, audio_length = features, torch.tensor(features.shape[0]).long()\n", + " audio = audio.unsqueeze(0).to(device=spec_gen_model.device)\n", + " audio_length = audio_length.unsqueeze(0).to(device=spec_gen_model.device)\n", + " with torch.no_grad():\n", + " spectrogram, spec_len = spec_gen_model.preprocessor(input_signal=audio, length=audio_length)\n", + " return spectrogram, spec_len\n", + "\n", + "def gen_spectrogram(text, spec_gen_model, reference_spec, reference_spec_lens):\n", + " parsed = spec_gen_model.parse(text)\n", + " with torch.no_grad(): \n", + " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed, \n", + " reference_spec=reference_spec, \n", + " reference_spec_lens=reference_spec_lens)\n", + "\n", + " return spectrogram\n", + " \n", + "def synth_audio(vocoder_model, spectrogram): \n", + " with torch.no_grad(): \n", + " audio = vocoder_model.convert_spectrogram_to_audio(spec=spectrogram)\n", + " if isinstance(audio, torch.Tensor):\n", + " audio = audio.to('cpu').numpy()\n", + " return audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "766154e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Reference Audio\n", + "with open(train_manifest, \"r\") as f:\n", + " for i, line in enumerate(f):\n", + " reference_record = json.loads(line)\n", + " break\n", + " \n", + "# Validatation Audio\n", + "num_val = 3\n", + "val_records = []\n", + "with open(valid_manifest, \"r\") as f:\n", + " for i, line in enumerate(f):\n", + " val_records.append(json.loads(line))\n", + " if len(val_records) >= num_val:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfa71ca6", + "metadata": {}, + "outputs": [], + "source": [ + "for i, val_record in enumerate(val_records):\n", + " reference_spec, reference_spec_lens = gt_spectrogram(reference_record['audio_filepath'], wave_model, spec_model)\n", + " reference_spec = reference_spec.to(spec_model.device)\n", + " spec_pred = gen_spectrogram(val_record['text'], spec_model,\n", + " reference_spec=reference_spec, \n", + " reference_spec_lens=reference_spec_lens)\n", + "\n", + " audio_gen = synth_audio(vocoder_model, spec_pred)\n", + " \n", + " audio_ref = ipd.Audio(reference_record['audio_filepath'], rate=sample_rate)\n", + " audio_gt = ipd.Audio(val_record['audio_filepath'], rate=sample_rate)\n", + " audio_gen = ipd.Audio(audio_gen, rate=sample_rate)\n", + " \n", + " print(\"------\")\n", + " print(f\"Text: {val_record['text']}\")\n", + " print('Reference Audio')\n", + " ipd.display(audio_ref)\n", + " print('Ground Truth Audio')\n", + " ipd.display(audio_gt)\n", + " print('Synthesized Audio')\n", + " ipd.display(audio_gen)\n", + " plt.imshow(spec_pred[0].to('cpu').numpy(), origin=\"lower\", aspect=\"auto\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51d9d176", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Pretraind FastPitch: {pretrained_fastpitch_checkpoint}\")\n", + "print(f\"Finetuned Adapter: {finetuned_adapter_checkpoint}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6180a7d2", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Finetuned FastPitch: {finetuned_fastpitch_checkpoint}\")\n", + "print(f\"Finetuned HiFi-Gan: {finetuned_hifigan_on_adaptation_checkpoint}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b33263b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb new file mode 100644 index 000000000000..defd0272d89d --- /dev/null +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -0,0 +1,735 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "afd8cdc9", + "metadata": {}, + "source": [ + "# FastPitch MultiSpeaker Pretraining\n", + "\n", + "This notebook is designed to provide a guide on how to run FastPitch MultiSpeaker Pretraining Pipeline. It contains the following sections:\n", + "1. **Pre-train FastPitch on multi-speaker data**: pre-train a multi-speaker FastPitch\n", + "* Dataset Preparation: download dataset and extract manifest files.\n", + "* Preprocessing: add absolute audio paths in manifest, calibrate speaker id to start from 0, and extract Supplementary Data.\n", + "* Training: pre-train multispeaker FastPitch\n", + "2. **Fine-tune HiFiGAN on multi-speaker data**: fine-tune a vocoder for the pre-trained multi-speaker FastPitch\n", + "* Dataset Preparation: extract mel-spectrograms from pre-trained FastPitch.\n", + "* Training: fine-tune HiFiGAN with pre-trained multi-speaker data.\n", + "3. **Inference**: generate speech from pre-trained multi-speaker FastPitch\n", + "* Load Model: load pre-trained multi-speaker FastPitch.\n", + "* Output Audio: generate audio files." + ] + }, + { + "cell_type": "markdown", + "id": "4fc9c6b9", + "metadata": {}, + "source": [ + "# License\n", + "> Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n", + "> \n", + "> Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "> you may not use this file except in compliance with the License.\n", + "> You may obtain a copy of the License at\n", + "> \n", + "> http://www.apache.org/licenses/LICENSE-2.0\n", + "> \n", + "> Unless required by applicable law or agreed to in writing, software\n", + "> distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "> See the License for the specific language governing permissions and\n", + "> limitations under the License." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b81f6c14", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "You can either run this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", + "Instructions for setting up Colab are as follows:\n", + "1. Open a new Python 3 notebook.\n", + "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", + "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", + "4. Run this cell to set up dependencies# .\n", + "\"\"\"\n", + "# BRANCH = 'main'\n", + "# # If you're using Colab and not running locally, uncomment and run this cell.\n", + "# !apt-get install sox libsndfile1 ffmpeg\n", + "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", + "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", + "\n", + "# # Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n", + "# # comment out the below lines and set `code_dir` to your local path.\n", + "code_dir = 'NeMoTTS' \n", + "!git clone https://github.com/NVIDIA/NeMo.git {code_dir}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2f1e3ac", + "metadata": {}, + "outputs": [], + "source": [ + "!wandb login #PASTE_WANDB_APIKEY_HERE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1acd141d", + "metadata": {}, + "outputs": [], + "source": [ + "sample_rate = 44100\n", + "# Store all manifest and audios\n", + "data_dir = 'NeMoTTS_dataset'\n", + "# Store all supplementary files\n", + "supp_dir = \"NeMoTTS_sup_data\"\n", + "# Store all training logs\n", + "logs_dir = \"NeMoTTS_logs\"\n", + "# Store all mel-spectrograms for vocoder training\n", + "mels_dir = \"NeMoTTS_mels\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b54c45e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import nemo\n", + "import torch\n", + "import numpy as np\n", + "\n", + "from pathlib import Path\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a119994b", + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(code_dir, exist_ok=True)\n", + "code_dir = os.path.abspath(code_dir)\n", + "os.makedirs(data_dir, exist_ok=True)\n", + "data_dir = os.path.abspath(data_dir)\n", + "os.makedirs(supp_dir, exist_ok=True)\n", + "supp_dir = os.path.abspath(supp_dir)\n", + "os.makedirs(logs_dir, exist_ok=True)\n", + "logs_dir = os.path.abspath(logs_dir)\n", + "os.makedirs(mels_dir, exist_ok=True)\n", + "mels_dir = os.path.abspath(mels_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "dbb3ac0e", + "metadata": {}, + "source": [ + "# 1. Pre-train FastPitch on multi-speaker data" + ] + }, + { + "cell_type": "markdown", + "id": "095a1fca", + "metadata": {}, + "source": [ + "## a. Dataset Preparation\n", + "For our tutorial, we use the subset of VCTK dataset with 5 speakers (p225-p229). The audios have 48 kHz sampling rate, we downsample to 44.1 kHz in this tutorial. \n", + "You can read more about dataset [here](https://datashare.ed.ac.uk/handle/10283/2950)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69b17b07", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {data_dir} && wget https://vctk-subset.s3.amazonaws.com/vctk_subset_multispeaker.tar.gz && tar zxf vctk_subset_multispeaker.tar.gz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a65e7938", + "metadata": {}, + "outputs": [], + "source": [ + "manidir = f\"{data_dir}/vctk_subset_multispeaker\"\n", + "!ls {manidir}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08b27b92", + "metadata": {}, + "outputs": [], + "source": [ + "train_manifest = os.path.abspath(os.path.join(manidir, 'train.json'))\n", + "valid_manifest = os.path.abspath(os.path.join(manidir, 'dev.json'))" + ] + }, + { + "cell_type": "markdown", + "id": "7cbf24d6", + "metadata": {}, + "source": [ + "## b. Preprocessing" + ] + }, + { + "cell_type": "markdown", + "id": "cae8567d", + "metadata": {}, + "source": [ + "### Add absoluate audio path in manifest\n", + "We use absoluate path for `audio_filepath` to get the audio during training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71d2fe63", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc51398c", + "metadata": {}, + "outputs": [], + "source": [ + "train_data = read_manifest(train_manifest)\n", + "for m in train_data: m['audio_filepath'] = os.path.abspath(os.path.join(manidir, m['audio_filepath']))\n", + "write_manifest(train_manifest, train_data)\n", + "\n", + "valid_data = read_manifest(valid_manifest)\n", + "for m in valid_data: m['audio_filepath'] = os.path.abspath(os.path.join(manidir, m['audio_filepath']))\n", + "write_manifest(valid_manifest, valid_data)" + ] + }, + { + "cell_type": "markdown", + "id": "678bb37c", + "metadata": {}, + "source": [ + "### Calibrate speaker id to start from 0\n", + "We use speaker id start from 0, so we can create a speaker look-up table with speaker size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "594c6f2d", + "metadata": {}, + "outputs": [], + "source": [ + "train_data = read_manifest(train_manifest)\n", + "speaker2id = {s: _id for _id, s in enumerate(set([m['speaker'] for m in train_data]))}\n", + "for m in train_data: m['old_speaker'], m['speaker'] = m['speaker'], speaker2id[m['speaker']]\n", + "write_manifest(train_manifest, train_data)\n", + "\n", + "valid_data = read_manifest(valid_manifest)\n", + "for m in valid_data: m['old_speaker'], m['speaker'] = m['speaker'], speaker2id[m['speaker']]\n", + "write_manifest(valid_manifest, valid_data)" + ] + }, + { + "cell_type": "markdown", + "id": "15b6cc65", + "metadata": {}, + "source": [ + "### Extract Supplementary Data\n", + "\n", + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3728ac9", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} && python scripts/dataset_processing/tts/extract_sup_data.py \\\n", + " manifest_filepath={train_manifest} \\\n", + " sup_data_path={supp_dir} \\\n", + " dataset.sample_rate={sample_rate} \\\n", + " dataset.n_fft=2048 \\\n", + " dataset.win_length=2048 \\\n", + " dataset.hop_length=512" + ] + }, + { + "cell_type": "markdown", + "id": "effd9182", + "metadata": {}, + "source": [ + "After running the above command line, you will observe a new folder NeMoTTS_sup_data/pitch and printouts of pitch statistics like below. Specify these values to the FastPitch training configurations. We will be there in the following section.\n", + "```bash\n", + "PITCH_MEAN=140.84278869628906, PITCH_STD=50.97673034667969\n", + "PITCH_MIN=65.4063949584961, PITCH_MAX=285.3046875\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37e54cd4", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} && python scripts/dataset_processing/tts/extract_sup_data.py \\\n", + " manifest_filepath={valid_manifest} \\\n", + " sup_data_path={supp_dir} \\\n", + " dataset.sample_rate={sample_rate} \\\n", + " dataset.n_fft=2048 \\\n", + " dataset.win_length=2048 \\\n", + " dataset.hop_length=512" + ] + }, + { + "cell_type": "markdown", + "id": "82d2c99d", + "metadata": {}, + "source": [ + "* If you want to compute pitch mean and std for each speaker, you can use the script `compute_speaker_stats.py`\n", + "```bash\n", + "!cd {code_dir} && python scripts/dataset_processing/tts/compute_speaker_stats.py \\\n", + " --manifest_path={train_manifest} \\\n", + " --sup_data_path={supp_dir} \\\n", + " --pitch_stats_path={data_dir}/pitch_stats.json\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a7c8dfb6", + "metadata": {}, + "source": [ + "## c. Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e378a792", + "metadata": {}, + "outputs": [], + "source": [ + "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", + "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", + "\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "PITCH_MEAN=140.84278869628906\n", + "PITCH_STD=65.4063949584961" + ] + }, + { + "cell_type": "markdown", + "id": "a90ddfb3", + "metadata": {}, + "source": [ + "### Important notes\n", + "* `sup_data_types=\"['align_prior_matrix', 'pitch', 'speaker_id', 'reference_audio']\" `\n", + " * **speaker_id**: each data has an unique speaker index (start from 0) in the input.\n", + " * **reference_audio**: each data has a reference audio (from the same speaker) in the input.\n", + " \n", + "* `model.speaker_encoder.lookup_module.n_speakers`\n", + " * if use **model.speaker_encoder.lookup_module**, please give n_speakers to create the lookup table\n", + "\n", + "* `condition_types=\"['add', 'concat', layernorm']`\n", + " * use different operation type to condition module (e.g. input_fft/output_fft/duration_predictor/pitch_predictor/alignment_module)\n", + " * **add**: add conditions to module input\n", + " * **concat**: concat conditions to module input\n", + " * **layernorm**: scale and shift layernorm outputs based on conditions\n", + " \n", + "* Other default arguments in config:\n", + " * `model.speaker_encoder.lookup_module`: model creates lookup table to get speaker embedding from speaker id.\n", + " * `model.speaker_encoder.gst_module`: model creates global style token to extract speaker information from reference audio.\n", + "\n", + "* Other optional arguments based on your preference:\n", + " * batch_size\n", + " * max_duration\n", + " * min_duration\n", + " * exp_manager\n", + " * trainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac22f3a8", + "metadata": {}, + "outputs": [], + "source": [ + "# Normally 200 epochs\n", + "!(cd {code_dir} && python examples/tts/fastpitch.py \\\n", + "--config-name=fastpitch_align_44100_adapter.yaml \\\n", + "+init_from_pretrained_model=\"tts_en_fastpitch\" \\\n", + "train_dataset={train_manifest} \\\n", + "validation_datasets={valid_manifest} \\\n", + "sup_data_types=\"['align_prior_matrix', 'pitch', 'speaker_id', 'reference_audio']\" \\\n", + "sup_data_path={supp_dir} \\\n", + "pitch_mean={PITCH_MEAN} \\\n", + "pitch_std={PITCH_STD} \\\n", + "phoneme_dict_path={phoneme_dict_path} \\\n", + "heteronyms_path={heteronyms_path} \\\n", + "model.speaker_encoder.lookup_module.n_speakers=5 \\\n", + "model.input_fft.condition_types=\"['add', 'layernorm']\" \\\n", + "model.output_fft.condition_types=\"['add', 'layernorm']\" \\\n", + "model.duration_predictor.condition_types=\"['add', 'layernorm']\" \\\n", + "model.pitch_predictor.condition_types=\"['add', 'layernorm']\" \\\n", + "model.alignment_module.condition_types=\"['add']\" \\\n", + "model.train_ds.dataloader_params.batch_size=8 \\\n", + "model.validation_ds.dataloader_params.batch_size=8 \\\n", + "model.train_ds.dataset.max_duration=20 \\\n", + "model.validation_ds.dataset.max_duration=20 \\\n", + "model.validation_ds.dataset.min_duration=0.1 \\\n", + "exp_manager.exp_dir={logs_dir} \\\n", + "+exp_manager.create_wandb_logger=True \\\n", + "+exp_manager.wandb_logger_kwargs.name=\"tutorial-FastPitch-pretrain-multispeaker\" \\\n", + "+exp_manager.wandb_logger_kwargs.project=\"NeMo\" \\\n", + "trainer.max_epochs=20 \\\n", + "trainer.check_val_every_n_epoch=20 \\\n", + "trainer.log_every_n_steps=1 \\\n", + "trainer.devices=-1 \\\n", + "trainer.strategy=ddp \\\n", + "trainer.precision=32 \\\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6fc98a5", + "metadata": {}, + "outputs": [], + "source": [ + "# e.g. NeMoTTS_logs/FastPitch/Y-M-D_H-M-S/checkpoints/FastPitch.nemo\n", + "last_checkpoint_dir = sorted(list([i for i in (Path(logs_dir) / \"FastPitch\").iterdir() if i.is_dir()]))[-1] / \"checkpoints\"\n", + "pretrained_fastpitch_checkpoint = os.path.abspath(list(last_checkpoint_dir.glob('*.nemo'))[0])\n", + "print(pretrained_fastpitch_checkpoint)" + ] + }, + { + "cell_type": "markdown", + "id": "b175f755", + "metadata": {}, + "source": [ + "# 2. Fine-tune HiFiGAN on multi-speaker data" + ] + }, + { + "cell_type": "markdown", + "id": "5749a0b8", + "metadata": {}, + "source": [ + "## a. Dataset Preparation\n", + "Generate mel-spectrograms for HiFiGAN training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d77bda9", + "metadata": {}, + "outputs": [], + "source": [ + "!cd {code_dir} \\\n", + "&& python scripts/dataset_processing/tts/resynthesize_dataset.py \\\n", + "--model-path={pretrained_fastpitch_checkpoint} \\\n", + "--input-json-manifest={train_manifest} \\\n", + "--input-sup-data-path={supp_dir} \\\n", + "--output-folder={mels_dir} \\\n", + "--device=\"cuda:0\" \\\n", + "--batch-size=1 \\\n", + "--num-workers=1 \\\n", + "&& python scripts/dataset_processing/tts/resynthesize_dataset.py \\\n", + "--model-path={pretrained_fastpitch_checkpoint} \\\n", + "--input-json-manifest={valid_manifest} \\\n", + "--input-sup-data-path={supp_dir} \\\n", + "--output-folder={mels_dir} \\\n", + "--device=\"cuda:0\" \\\n", + "--batch-size=1 \\\n", + "--num-workers=1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c9159a1", + "metadata": {}, + "outputs": [], + "source": [ + "train_manifest_mel = f\"{mels_dir}/train_mel.json\"\n", + "valid_manifest_mel = f\"{mels_dir}/dev_mel.json\"" + ] + }, + { + "cell_type": "markdown", + "id": "24653f24", + "metadata": {}, + "source": [ + "## b. Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fadc0410", + "metadata": {}, + "outputs": [], + "source": [ + "# Normally 100 epochs\n", + "!cd {code_dir} && python examples/tts/hifigan_finetune.py \\\n", + "--config-name=hifigan_44100.yaml \\\n", + "train_dataset={train_manifest_mel} \\\n", + "validation_datasets={valid_manifest_mel} \\\n", + "+init_from_pretrained_model=\"tts_en_hifitts_hifigan_ft_fastpitch\" \\\n", + "model.train_ds.dataloader_params.batch_size=32 \\\n", + "model.optim.lr=0.0001 \\\n", + "model/train_ds=train_ds_finetune \\\n", + "model/validation_ds=val_ds_finetune \\\n", + "+trainer.max_epochs=5 \\\n", + "trainer.check_val_every_n_epoch=5 \\\n", + "trainer.devices=1 \\\n", + "trainer.strategy='ddp' \\\n", + "trainer.precision=16 \\\n", + "exp_manager.exp_dir={logs_dir} \\\n", + "exp_manager.create_wandb_logger=True \\\n", + "exp_manager.wandb_logger_kwargs.name=\"tutorial-HiFiGAN-finetune-multispeaker\" \\\n", + "exp_manager.wandb_logger_kwargs.project=\"NeMo\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "864fe5ba", + "metadata": {}, + "outputs": [], + "source": [ + "# e.g. NeMoTTS_logs/HifiGan/Y-M-D_H-M-S/checkpoints/HifiGan.nemo\n", + "last_checkpoint_dir = sorted(list([i for i in (Path(logs_dir) / \"HifiGan\").iterdir() if i.is_dir()]))[-1] / \"checkpoints\"\n", + "finetuned_hifigan_on_multispeaker_checkpoint = os.path.abspath(list(last_checkpoint_dir.glob('*.nemo'))[0])\n", + "finetuned_hifigan_on_multispeaker_checkpoint" + ] + }, + { + "cell_type": "markdown", + "id": "e04540b6", + "metadata": {}, + "source": [ + "# 3. Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf662f7", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.asr.parts.preprocessing.features import WaveformFeaturizer\n", + "from nemo.collections.tts.models import FastPitchModel\n", + "from nemo.collections.tts.models import HifiGanModel\n", + "from collections import defaultdict\n", + "import IPython.display as ipd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "id": "270a3264", + "metadata": {}, + "source": [ + "## a. Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01315a66", + "metadata": {}, + "outputs": [], + "source": [ + "wave_model = WaveformFeaturizer(sample_rate=sample_rate)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "536c8fdc", + "metadata": {}, + "outputs": [], + "source": [ + "# FastPitch\n", + "spec_model = FastPitchModel.restore_from(pretrained_fastpitch_checkpoint).eval().cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2ace7c4", + "metadata": {}, + "outputs": [], + "source": [ + "# HiFiGAN\n", + "vocoder_model = HifiGanModel.restore_from(finetuned_hifigan_on_multispeaker_checkpoint).eval().cuda()" + ] + }, + { + "cell_type": "markdown", + "id": "cf4a42fa", + "metadata": {}, + "source": [ + "## b. Output Audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b376468", + "metadata": {}, + "outputs": [], + "source": [ + "def gt_spectrogram(audio_path, wave_model, spec_gen_model):\n", + " features = wave_model.process(audio_path, trim=False)\n", + " audio, audio_length = features, torch.tensor(features.shape[0]).long()\n", + " audio = audio.unsqueeze(0).to(device=spec_gen_model.device)\n", + " audio_length = audio_length.unsqueeze(0).to(device=spec_gen_model.device)\n", + " with torch.no_grad():\n", + " spectrogram, spec_len = spec_gen_model.preprocessor(input_signal=audio, length=audio_length)\n", + " return spectrogram, spec_len\n", + "\n", + "def gen_spectrogram(text, spec_gen_model, speaker, reference_spec, reference_spec_lens):\n", + " parsed = spec_gen_model.parse(text)\n", + " speaker = torch.tensor([speaker]).long().to(device=spec_gen_model.device)\n", + " with torch.no_grad(): \n", + " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed, \n", + " speaker=speaker, \n", + " reference_spec=reference_spec, \n", + " reference_spec_lens=reference_spec_lens)\n", + "\n", + " return spectrogram\n", + " \n", + "def synth_audio(vocoder_model, spectrogram): \n", + " with torch.no_grad(): \n", + " audio = vocoder_model.convert_spectrogram_to_audio(spec=spectrogram)\n", + " if isinstance(audio, torch.Tensor):\n", + " audio = audio.to('cpu').numpy()\n", + " return audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f93f73a6", + "metadata": {}, + "outputs": [], + "source": [ + "# Reference Audio\n", + "reference_records = []\n", + "with open(train_manifest, \"r\") as f:\n", + " for i, line in enumerate(f):\n", + " reference_records.append(json.loads(line))\n", + "\n", + "speaker_to_index = defaultdict(list)\n", + "for i, d in enumerate(reference_records): speaker_to_index[d.get('speaker', None)].append(i)\n", + " \n", + "# Validatation Audio\n", + "num_val = 3\n", + "val_records = []\n", + "with open(valid_manifest, \"r\") as f:\n", + " for i, line in enumerate(f):\n", + " val_records.append(json.loads(line))\n", + " if len(val_records) >= num_val:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77590752", + "metadata": {}, + "outputs": [], + "source": [ + "for i, val_record in enumerate(val_records):\n", + " reference_record = reference_records[speaker_to_index[val_record['speaker']][0]]\n", + " reference_spec, reference_spec_lens = gt_spectrogram(reference_record['audio_filepath'], wave_model, spec_model)\n", + " reference_spec = reference_spec.to(spec_model.device)\n", + " spec_pred = gen_spectrogram(val_record['text'], \n", + " spec_model,\n", + " speaker=val_record['speaker'], \n", + " reference_spec=reference_spec, \n", + " reference_spec_lens=reference_spec_lens)\n", + "\n", + " audio_gen = synth_audio(vocoder_model, spec_pred)\n", + " \n", + " audio_ref = ipd.Audio(reference_record['audio_filepath'], rate=sample_rate)\n", + " audio_gt = ipd.Audio(val_record['audio_filepath'], rate=sample_rate)\n", + " audio_gen = ipd.Audio(audio_gen, rate=sample_rate)\n", + " \n", + " print(\"------\")\n", + " print(f\"Text: {val_record['text']}\")\n", + " print('Reference Audio')\n", + " ipd.display(audio_ref)\n", + " print('Ground Truth Audio')\n", + " ipd.display(audio_gt)\n", + " print('Synthesized Audio')\n", + " ipd.display(audio_gen)\n", + " plt.imshow(spec_pred[0].to('cpu').numpy(), origin=\"lower\", aspect=\"auto\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cd156e4", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"FastPitch checkpoint: {pretrained_fastpitch_checkpoint}\")\n", + "print(f\"HiFi-Gan checkpoint: {finetuned_hifigan_on_multispeaker_checkpoint}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5ad3d97b776fc3a43b3b2bf25b943c1e55998955 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Wed, 3 May 2023 14:23:49 -0700 Subject: [PATCH 055/512] [TTS] Create initial TTS dataset feature processors (#6507) Signed-off-by: Ryan --- .../parts/preprocessing/feature_processors.py | 216 ++++++++++++++++++ .../preprocessing/test_feature_processors.py | 182 +++++++++++++++ 2 files changed, 398 insertions(+) create mode 100644 nemo/collections/tts/parts/preprocessing/feature_processors.py create mode 100644 tests/collections/tts/parts/preprocessing/test_feature_processors.py diff --git a/nemo/collections/tts/parts/preprocessing/feature_processors.py b/nemo/collections/tts/parts/preprocessing/feature_processors.py new file mode 100644 index 000000000000..19ed8139ae65 --- /dev/null +++ b/nemo/collections/tts/parts/preprocessing/feature_processors.py @@ -0,0 +1,216 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional + +import torch + +from nemo.utils.decorators import experimental + + +@experimental +class FeatureProcessor(ABC): + @abstractmethod + def process(self, training_example: dict) -> None: + """ + Process the input training example dictionary, modifying necessary fields in place. + + Args: + training_example: training example dictionary. + """ + + +class FeatureScaler(FeatureProcessor): + def __init__(self, field: str, add_value: float = 0.0, div_value: float = 1.0): + """ + Scales a field by constant factors. For example, for mean-variance normalization. + + Specifically: input[field] = (input[field] + add_value) / div_value + + Args: + field: Field to scale + add_value: Constant float value to add to feature. + div_value: Constant float value to divide feature by. + """ + self.field = field + self.add_value = add_value + self.div_value = div_value + + def process(self, training_example: dict) -> None: + feature = training_example[self.field] + feature = (feature + self.add_value) / self.div_value + training_example[self.field] = feature + + +class LogCompression(FeatureProcessor): + def __init__(self, field: str, log_zero_guard_type: str = "add", log_zero_guard_value: float = 1.0): + """ + Apply log compression to a field. + + By default: input[field] = log(1.0 + input[field]) + For clamp mode: input[field] = log(max(log_zero_guard_value, input[field])) + + Args: + field: Field to apply log compression to. + log_zero_guard_type: Method to avoid logarithm approaching -inf, either "add" or "clamp". + log_zero_guard_value: Value to add or clamp input with. + """ + + self.field = field + + if log_zero_guard_type == "add": + self.guard_fn = self._add_guard + elif log_zero_guard_type == "clamp": + self.guard_fn = self._clamp_guard + else: + raise ValueError(f"Unsupported log zero guard type: '{log_zero_guard_type}'") + + self.guard_type = log_zero_guard_type + self.guard_value = log_zero_guard_value + + def _add_guard(self, feature: torch.Tensor): + return feature + self.guard_value + + def _clamp_guard(self, feature: torch.Tensor): + return torch.clamp(feature, min=self.guard_value) + + def process(self, training_example: dict) -> None: + feature = training_example[self.field] + + feature = self.guard_fn(feature) + feature = torch.log(feature) + + training_example[self.field] = feature + + +class MeanVarianceNormalization(FeatureProcessor): + def __init__(self, field: str, stats_path: Path, mask_field: Optional[str] = "voiced_mask"): + """ + Apply mean and variance to the input field. Statistics are provided in JSON format, and can be + computed using scripts.dataset_processing.tts.compute_feature_stats.py + + Specifically: input[field] = (input[field] + mean) / standard_deviation + + Stats file format example for field 'pitch': + + { + "default": { + "pitch_mean": 100.0, + "pitch_std": 50.0, + } + } + + Args: + field: Field to apply normalization to. + stats_path: JSON file with feature mean and variance. + mask_field: Optional, field in example dictionary with boolean array indicating which values to + mask to 0. Defaults to 'voiced_mask', expected to be computed by pyin pitch estimator. + """ + + self.field = field + self.mask_field = mask_field + + with open(stats_path, 'r', encoding="utf-8") as stats_f: + stats_dict = json.load(stats_f) + self.mean = stats_dict["default"][f"{self.field}_mean"] + self.std = stats_dict["default"][f"{self.field}_std"] + + def process(self, training_example: dict) -> None: + feature = training_example[self.field] + + feature = (feature - self.mean) / self.std + if self.mask_field: + voiced_mask = training_example[self.mask_field] + feature[~voiced_mask] = 0.0 + + training_example[self.field] = feature + + +class MeanVarianceSpeakerNormalization(FeatureProcessor): + def __init__( + self, + field: str, + stats_path: Path, + speaker_field: str = "speaker", + mask_field: Optional[str] = "voiced_mask", + fallback_to_default: bool = False, + ): + """ + Apply speaker level mean and variance to the input field. Statistics are provided in JSON format, and can be + computed using scripts.dataset_processing.tts.compute_feature_stats.py + + Specifically: input[field] = (input[field] + speaker_mean) / speaker_standard_deviation + + Stats file format example for field 'pitch': + + { + "default": { + "pitch_mean": 100.0, + "pitch_std": 50.0, + }, + "speaker1": { + "pitch_mean": 110.0, + "pitch_std": 45.0, + }, + "speaker2": { + "pitch_mean": 105.0, + "pitch_std": 30.0, + }, + ... + } + + Args: + field: Field to apply normalization to. + stats_path: JSON file with feature mean and variance. + speaker_field: field containing speaker ID string. + mask_field: Optional, field in example dictionary with boolean array indicating which values to + mask to 0. Defaults to 'voiced_mask', expected to be computed by pyin pitch estimator. + fallback_to_default: Whether to use 'default' feature statistics when speaker is not found in + the statistics dictionary. + """ + + self.field = field + self.key_mean = f"{self.field}_mean" + self.key_std = f"{self.field}_std" + self.speaker_field = speaker_field + self.mask_field = mask_field + self.fallback_to_default = fallback_to_default + + with open(stats_path, 'r', encoding="utf-8") as stats_f: + self.stats_dict = json.load(stats_f) + + def process(self, training_example: dict) -> None: + feature = training_example[self.field] + + speaker = training_example[self.speaker_field] + if speaker in self.stats_dict: + stats = self.stats_dict[speaker] + elif self.fallback_to_default: + stats = self.stats_dict["default"] + else: + raise ValueError(f"Statistics not found for speaker: {speaker}") + + feature_mean = stats[self.key_mean] + feature_std = stats[self.key_std] + + feature = (feature - feature_mean) / feature_std + + if self.mask_field: + mask = training_example[self.mask_field] + feature[~mask] = 0.0 + + training_example[self.field] = feature diff --git a/tests/collections/tts/parts/preprocessing/test_feature_processors.py b/tests/collections/tts/parts/preprocessing/test_feature_processors.py new file mode 100644 index 000000000000..82739d7137be --- /dev/null +++ b/tests/collections/tts/parts/preprocessing/test_feature_processors.py @@ -0,0 +1,182 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import json +import tempfile +from pathlib import Path + +import numpy as np +import pytest +import torch + +from nemo.collections.tts.parts.preprocessing.feature_processors import ( + FeatureScaler, + LogCompression, + MeanVarianceNormalization, + MeanVarianceSpeakerNormalization, +) + + +class TestTTSFeatureProcessors: + @contextlib.contextmanager + def _write_test_dict(self, test_dict, filename): + temp_dir = tempfile.TemporaryDirectory() + try: + test_dir = Path(temp_dir.name) + test_dict_filepath = test_dir / filename + with open(test_dict_filepath, 'w', encoding="utf-8") as stats_f: + json.dump(test_dict, stats_f, indent=4) + + yield test_dict_filepath + finally: + temp_dir.cleanup() + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_feature_scalar(self): + field = "test_feat" + input_tensor = torch.tensor([-2.5, 0.0, 1.0], dtype=torch.float32) + expected_tensor = torch.tensor([0.0, 2.0, 2.8], dtype=torch.float32) + processor = FeatureScaler(field, add_value=2.5, div_value=1.25) + + training_example = {field: input_tensor} + processor.process(training_example) + output_tensor = training_example[field] + + torch.testing.assert_close(output_tensor, expected_tensor) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_log_compression(self): + field = "test_feat" + + input_tensor = torch.tensor([-0.5, 0.0, 2.0], dtype=torch.float32) + expected_tensor = torch.tensor([np.log(0.5), 0.0, np.log(3.0)], dtype=torch.float32) + processor = LogCompression(field) + + training_example = {field: input_tensor} + processor.process(training_example) + output_tensor = training_example[field] + + torch.testing.assert_close(output_tensor, expected_tensor) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_log_compression_clamp(self): + field = "test_feat" + + input_tensor = torch.tensor([0.1, 1.0, 2.0], dtype=torch.float32) + expected_tensor = torch.tensor([np.log(0.5), 0.0, np.log(2.0)], dtype=torch.float32) + processor = LogCompression(field, log_zero_guard_type="clamp", log_zero_guard_value=0.5) + + training_example = {field: input_tensor} + processor.process(training_example) + output_tensor = training_example[field] + + torch.testing.assert_close(output_tensor, expected_tensor) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_mean_variance_normalization(self): + field = "test_feat" + filename = "stats.json" + stat_dict = {"default": {"test_feat_mean": 1.5, "test_feat_std": 0.5}} + + input_tensor = torch.tensor([0.0, 1.5, 2.0], dtype=torch.float32) + expected_tensor = torch.tensor([-3.0, 0.0, 1.0], dtype=torch.float32) + training_example = {field: input_tensor} + + with self._write_test_dict(stat_dict, filename=filename) as stat_dict_filepath: + processor = MeanVarianceNormalization(field, stats_path=stat_dict_filepath, mask_field=None) + processor.process(training_example) + + output_tensor = training_example[field] + torch.testing.assert_close(output_tensor, expected_tensor) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_mean_variance_normalization_masked(self): + field = "test_feat" + mask_field = "mask" + filename = "stats.json" + stat_dict = {"default": {"test_feat_mean": 1.0, "test_feat_std": 0.5}} + + input_tensor = torch.tensor([2.0, 3.0, 4.0, 5.0], dtype=torch.float32) + input_mask = torch.tensor([True, False, False, True], dtype=torch.bool) + expected_tensor = torch.tensor([2.0, 0.0, 0.0, 8.0], dtype=torch.float32) + training_example = {field: input_tensor, mask_field: input_mask} + + with self._write_test_dict(stat_dict, filename=filename) as stat_dict_filepath: + processor = MeanVarianceNormalization(field, stats_path=stat_dict_filepath, mask_field=mask_field) + processor.process(training_example) + + output_tensor = training_example[field] + torch.testing.assert_close(output_tensor, expected_tensor) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_mean_variance_speaker_normalization(self): + field = "pitch" + filename = "stats.json" + stat_dict = { + "default": {"pitch_mean": 1.5, "pitch_std": 0.5}, + "speaker1": {"pitch_mean": 0.5, "pitch_std": 1.0}, + "speaker2": {"pitch_mean": 0.0, "pitch_std": 2.0}, + } + + input_tensor = torch.tensor([0.0, 1.0], dtype=torch.float32) + + training_example1 = {field: input_tensor, "speaker": "speaker1"} + training_example2 = {field: input_tensor, "speaker": "speaker2"} + training_example3 = {field: input_tensor, "speaker": "unknown"} + expected_tensor1 = torch.tensor([-0.5, 0.5], dtype=torch.float32) + expected_tensor2 = torch.tensor([0.0, 0.5], dtype=torch.float32) + expected_tensor3 = torch.tensor([-3.0, -1.0], dtype=torch.float32) + + with self._write_test_dict(stat_dict, filename=filename) as stat_dict_filepath: + processor = MeanVarianceSpeakerNormalization( + field, stats_path=stat_dict_filepath, mask_field=None, fallback_to_default=True + ) + processor.process(training_example1) + processor.process(training_example2) + processor.process(training_example3) + + output_tensor1 = training_example1[field] + output_tensor2 = training_example2[field] + output_tensor3 = training_example3[field] + torch.testing.assert_close(output_tensor1, expected_tensor1) + torch.testing.assert_close(output_tensor2, expected_tensor2) + torch.testing.assert_close(output_tensor3, expected_tensor3) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_mean_variance_speaker_normalization_masked(self): + field = "test_feat" + mask_field = "test_mask" + filename = "stats.json" + stat_dict = {"steve": {"test_feat_mean": -1.0, "test_feat_std": 2.0}} + + input_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32) + input_mask = torch.tensor([False, True, False, True], dtype=torch.bool) + expected_tensor = torch.tensor([0.0, 1.5, 0.0, 2.5], dtype=torch.float32) + + training_example = {field: input_tensor, "speaker": "steve", mask_field: input_mask} + + with self._write_test_dict(stat_dict, filename=filename) as stat_dict_filepath: + processor = MeanVarianceSpeakerNormalization(field, stats_path=stat_dict_filepath, mask_field=mask_field) + processor.process(training_example) + + output_tensor = training_example[field] + torch.testing.assert_close(output_tensor, expected_tensor) From e4971caf7c3a3ccaeb9747261854b34a6cb7e022 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 15:58:50 -0700 Subject: [PATCH 056/512] fix (#6529) (#6546) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- .../nlp/models/language_modeling/megatron_finetune_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index c4cfcfdad1ff..fb58ec6a843b 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -284,7 +284,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): _, seq_length = batch[0].shape _, dec_seq_length = batch[1].shape - tensor_shape = [seq_length, get_micro_batch_size(), self.hidden_size] + tensor_shape = [seq_length, get_micro_batch_size(), self.cfg.encoder.hidden_size] data_iter = get_iterator_k_split(batch, get_num_microbatches()) fwd_bwd_function = get_forward_backward_func() From f1bf31cffb6e7dc9659114cecd02c7cf095318c9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 21:41:02 -0700 Subject: [PATCH 057/512] Add FastConformer Hybrid ASR models for EN, ES, IT, DE, PL, HR, UA, BY (#6549) (#6553) * Added fastconfomer hybrid asr models for en, es, it, de, pl, hr, ua, by * updated ASR docs with the fastconformer hybrid checkpoints * added the fastconformer RNNT and CTC models --------- Signed-off-by: KunalDhawan Co-authored-by: Kunal Dhawan --- docs/source/asr/data/benchmark_by.csv | 2 + docs/source/asr/data/benchmark_de.csv | 1 + docs/source/asr/data/benchmark_en.csv | 5 +- docs/source/asr/data/benchmark_es.csv | 3 +- docs/source/asr/data/benchmark_hr.csv | 1 + docs/source/asr/data/benchmark_it.csv | 2 +- docs/source/asr/data/benchmark_pl.csv | 1 + docs/source/asr/data/benchmark_ua.csv | 2 + docs/source/asr/results.rst | 19 ++++++ nemo/collections/asr/models/ctc_bpe_models.py | 7 --- .../asr/models/hybrid_rnnt_ctc_bpe_models.py | 61 ++++++++++++++++++- .../collections/asr/models/rnnt_bpe_models.py | 7 --- 12 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 docs/source/asr/data/benchmark_by.csv create mode 100644 docs/source/asr/data/benchmark_ua.csv diff --git a/docs/source/asr/data/benchmark_by.csv b/docs/source/asr/data/benchmark_by.csv new file mode 100644 index 000000000000..750dfd82ff94 --- /dev/null +++ b/docs/source/asr/data/benchmark_by.csv @@ -0,0 +1,2 @@ +Model,Model Base Class,Model Card +stt_by_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_by_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_de.csv b/docs/source/asr/data/benchmark_de.csv index 99e221a6b835..6084e95c37c0 100644 --- a/docs/source/asr/data/benchmark_de.csv +++ b/docs/source/asr/data/benchmark_de.csv @@ -4,3 +4,4 @@ stt_de_citrinet_1024,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidi stt_de_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_contextnet_1024" stt_de_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_conformer_ctc_large" stt_de_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_conformer_transducer_large" +stt_de_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_fastconformer_hybrid_large_pc" diff --git a/docs/source/asr/data/benchmark_en.csv b/docs/source/asr/data/benchmark_en.csv index 0f03452d034d..5f68e9ca22ce 100644 --- a/docs/source/asr/data/benchmark_en.csv +++ b/docs/source/asr/data/benchmark_en.csv @@ -25,4 +25,7 @@ stt_en_conformer_transducer_small,EncDecRNNTBPEModel,"https://ngc.nvidia.com/cat stt_en_conformer_transducer_medium,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_medium" stt_en_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large" stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge" -stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge" \ No newline at end of file +stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge" +stt_en_fastconformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large" +stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large" +stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_es.csv b/docs/source/asr/data/benchmark_es.csv index 1e1ade3a739c..0fa8b0ecedf1 100644 --- a/docs/source/asr/data/benchmark_es.csv +++ b/docs/source/asr/data/benchmark_es.csv @@ -4,4 +4,5 @@ stt_es_citrinet_512,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvi stt_es_citrinet_1024_gamma_0_25,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_citrinet_1024_gamma_0_25" stt_es_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_conformer_ctc_large" stt_es_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_conformer_transducer_large" -stt_es_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_contextnet_1024" \ No newline at end of file +stt_es_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_contextnet_1024" +stt_es_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_hr.csv b/docs/source/asr/data/benchmark_hr.csv index ea506eed3432..35a5b5f04f39 100644 --- a/docs/source/asr/data/benchmark_hr.csv +++ b/docs/source/asr/data/benchmark_hr.csv @@ -1,3 +1,4 @@ Model,Model Base Class,Model Card stt_hr_conformer_ctc_large,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_ctc_large" stt_hr_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_transducer_large" +stt_hr_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_it.csv b/docs/source/asr/data/benchmark_it.csv index d605b68809eb..230194966573 100644 --- a/docs/source/asr/data/benchmark_it.csv +++ b/docs/source/asr/data/benchmark_it.csv @@ -1,3 +1,3 @@ Model,Model Base Class,Model Card stt_it_quartznet15x5,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_quartznet15x5" - +stt_it_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_fastconformer_hybrid_large_pc" diff --git a/docs/source/asr/data/benchmark_pl.csv b/docs/source/asr/data/benchmark_pl.csv index bf646e107306..e3ad9bdb50b7 100644 --- a/docs/source/asr/data/benchmark_pl.csv +++ b/docs/source/asr/data/benchmark_pl.csv @@ -1,2 +1,3 @@ Model,Model Base Class,Model Card stt_pl_quartznet15x5,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_quartznet15x5" +stt_pl_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_ua.csv b/docs/source/asr/data/benchmark_ua.csv new file mode 100644 index 000000000000..df1b6c383d3b --- /dev/null +++ b/docs/source/asr/data/benchmark_ua.csv @@ -0,0 +1,2 @@ +Model,Model Base Class,Model Card +stt_ua_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ua_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/results.rst b/docs/source/asr/results.rst index e6ffe3deac2a..a1c96c7e1727 100644 --- a/docs/source/asr/results.rst +++ b/docs/source/asr/results.rst @@ -268,3 +268,22 @@ Kinyarwanda :widths: 40, 10, 50 :header-rows: 1 +----------------------------- + +Belarusian +^^^^^^^^^^^ +.. csv-table:: + :file: data/benchmark_by.csv + :align: left + :widths: 40, 10, 50 + :header-rows: 1 + +----------------------------- + +Ukrainian +^^^^^^^^^^^ +.. csv-table:: + :file: data/benchmark_ua.csv + :align: left + :widths: 40, 10, 50 + :header-rows: 1 \ No newline at end of file diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py index a82f218d1d69..b97bf769132c 100644 --- a/nemo/collections/asr/models/ctc_bpe_models.py +++ b/nemo/collections/asr/models/ctc_bpe_models.py @@ -604,11 +604,4 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) - model = PretrainedModelInfo( - pretrained_model_name="stt_en_fastconformer_ctc_large_ls", - description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large_ls", - location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_large_ls/versions/1.0.0/files/stt_en_fastconformer_ctc_large_ls.nemo", - ) - results.append(model) - return results diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py index 104b2eb95524..d10d3364ea29 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py @@ -14,7 +14,7 @@ import copy import os -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Union import torch from omegaconf import DictConfig, ListConfig, OmegaConf, open_dict @@ -454,7 +454,7 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type raise ValueError(f"decoder_type={decoder_type} is not supported. Supported values: [ctc,rnnt]") @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: + def list_available_models(cls) -> List[PretrainedModelInfo]: """ This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. @@ -462,4 +462,61 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]: List of available pre-trained models. """ results = [] + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_en_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_de_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_de_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_de_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_it_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_it_fastconformer_hybrid_large_pc/versions/1.18/files/stt_it_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_es_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_es_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_es_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_hr_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_hr_fastconformer_hybrid_large_pc/versions/1.18.0/files/FastConformer-Hybrid-Transducer-CTC-BPE-v256-averaged.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_ua_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ua_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_ua_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_ua_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_pl_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_pl_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_pl_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_by_fastconformer_hybrid_large_pc", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_by_fastconformer_hybrid_large_pc", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_by_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_by_fastconformer_hybrid_large_pc.nemo", + ) + results.append(model) + return results diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py index b162f2411450..5ee5824b9d27 100644 --- a/nemo/collections/asr/models/rnnt_bpe_models.py +++ b/nemo/collections/asr/models/rnnt_bpe_models.py @@ -253,13 +253,6 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) - model = PretrainedModelInfo( - pretrained_model_name="stt_en_fastconformer_transducer_large_ls", - description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large_ls", - location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_large_ls/versions/1.0.0/files/stt_en_fastconformer_transducer_large_ls.nemo", - ) - results.append(model) - return results def __init__(self, cfg: DictConfig, trainer: Trainer = None): From e482892f19fbe083e55b1bc293a35d995c25b28c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 21:41:55 -0700 Subject: [PATCH 058/512] Add scores for FastConformer models (#6557) (#6558) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- .../asr/data/scores/by/fastconformer_by.csv | 2 + .../source/asr/data/scores/de/citrinet_de.csv | 4 +- .../asr/data/scores/de/conformer_de.csv | 6 +- .../asr/data/scores/de/contextnet_de.csv | 4 +- .../asr/data/scores/de/fastconformer_de.csv | 2 + .../asr/data/scores/de/quartznet15x5_de.csv | 4 +- .../source/asr/data/scores/en/citrinet_en.csv | 14 +- .../asr/data/scores/en/conformer_en.csv | 28 ++-- .../asr/data/scores/en/contextnet_en.csv | 14 +- .../asr/data/scores/en/fastconformer_en.csv | 4 + .../asr/data/scores/en/jasper10x5dr_en.csv | 4 +- .../asr/data/scores/en/quartznet15x5_en.csv | 4 +- .../asr/data/scores/en/squeezeformer_en.csv | 14 +- .../source/asr/data/scores/es/citrinet_es.csv | 6 +- .../asr/data/scores/es/conformer_es.csv | 6 +- .../asr/data/scores/es/contextnet_es.csv | 4 +- .../asr/data/scores/es/fastconformer_es.csv | 2 + .../asr/data/scores/es/quartznet15x5_es.csv | 4 +- .../asr/data/scores/hr/conformer_hr.csv | 6 +- .../asr/data/scores/hr/fastconformer_hr.csv | 2 + .../asr/data/scores/it/conformer_it.csv | 6 +- .../asr/data/scores/it/fastconformer_it.csv | 2 + .../asr/data/scores/it/quartznet15x5_it.csv | 4 +- .../asr/data/scores/pl/fastconformer_pl.csv | 2 + .../asr/data/scores/pl/quartznet15x5_pl.csv | 4 +- .../asr/data/scores/ua/fastconformer_ua.csv | 2 + .../data/scores_pc/by/fastconformer_by.csv | 2 + .../data/scores_pc/de/fastconformer_de.csv | 2 + .../data/scores_pc/en/fastconformer_en.csv | 2 + .../data/scores_pc/es/fastconformer_es.csv | 2 + .../data/scores_pc/hr/fastconformer_hr.csv | 2 + .../data/scores_pc/it/fastconformer_it.csv | 2 + .../data/scores_pc/pl/fastconformer_pl.csv | 2 + .../data/scores_pc/ua/fastconformer_ua.csv | 2 + docs/source/asr/scores.rst | 147 ++++++++++++++++++ 35 files changed, 249 insertions(+), 68 deletions(-) create mode 100644 docs/source/asr/data/scores/by/fastconformer_by.csv create mode 100644 docs/source/asr/data/scores/de/fastconformer_de.csv create mode 100644 docs/source/asr/data/scores/en/fastconformer_en.csv create mode 100644 docs/source/asr/data/scores/es/fastconformer_es.csv create mode 100644 docs/source/asr/data/scores/hr/fastconformer_hr.csv create mode 100644 docs/source/asr/data/scores/it/fastconformer_it.csv create mode 100644 docs/source/asr/data/scores/pl/fastconformer_pl.csv create mode 100644 docs/source/asr/data/scores/ua/fastconformer_ua.csv create mode 100644 docs/source/asr/data/scores_pc/by/fastconformer_by.csv create mode 100644 docs/source/asr/data/scores_pc/de/fastconformer_de.csv create mode 100644 docs/source/asr/data/scores_pc/en/fastconformer_en.csv create mode 100644 docs/source/asr/data/scores_pc/es/fastconformer_es.csv create mode 100644 docs/source/asr/data/scores_pc/hr/fastconformer_hr.csv create mode 100644 docs/source/asr/data/scores_pc/it/fastconformer_it.csv create mode 100644 docs/source/asr/data/scores_pc/pl/fastconformer_pl.csv create mode 100644 docs/source/asr/data/scores_pc/ua/fastconformer_ua.csv diff --git a/docs/source/asr/data/scores/by/fastconformer_by.csv b/docs/source/asr/data/scores/by/fastconformer_by.csv new file mode 100644 index 000000000000..c03cc945d99d --- /dev/null +++ b/docs/source/asr/data/scores/by/fastconformer_by.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set v12.0 (be),MCV Test-Set v12.0 (be) +stt_by_fastconformer_hybrid_large_pc,by,2.7 %,2.7 % diff --git a/docs/source/asr/data/scores/de/citrinet_de.csv b/docs/source/asr/data/scores/de/citrinet_de.csv index 1b3e7db093a2..1768373077b9 100644 --- a/docs/source/asr/data/scores/de/citrinet_de.csv +++ b/docs/source/asr/data/scores/de/citrinet_de.csv @@ -1,2 +1,2 @@ -Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v7.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) -stt_de_citrinet_1024,de,,6.63,7.59,4.06,5.07,12.33,10.02 +Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v12.0 (de),MCV Dev-Set v7.0 (de),MCV Test-Set v12.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_citrinet_1024,de,,,6.63,,7.59,4.06,5.07,12.33,10.02 diff --git a/docs/source/asr/data/scores/de/conformer_de.csv b/docs/source/asr/data/scores/de/conformer_de.csv index 3d0a9e18d452..1bd1443de00e 100644 --- a/docs/source/asr/data/scores/de/conformer_de.csv +++ b/docs/source/asr/data/scores/de/conformer_de.csv @@ -1,3 +1,3 @@ -Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v7.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) -stt_de_conformer_ctc_large,de,,5.84,6.68,3.85,4.63,12.56,10.51 -stt_de_conformer_transducer_large,de,,4.75,5.36,3.46,4.19,11.21,9.14 +Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v12.0 (de),MCV Dev-Set v7.0 (de),MCV Test-Set v12.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_conformer_ctc_large,de,,,5.84,,6.68,3.85,4.63,12.56,10.51 +stt_de_conformer_transducer_large,de,,,4.75,,5.36,3.46,4.19,11.21,9.14 diff --git a/docs/source/asr/data/scores/de/contextnet_de.csv b/docs/source/asr/data/scores/de/contextnet_de.csv index b7d52d649e73..40be2181a77f 100644 --- a/docs/source/asr/data/scores/de/contextnet_de.csv +++ b/docs/source/asr/data/scores/de/contextnet_de.csv @@ -1,2 +1,2 @@ -Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v7.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) -stt_de_contextnet_1024,de,,4.76,5.5,3.53,4.2,11.32,9.4 +Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v12.0 (de),MCV Dev-Set v7.0 (de),MCV Test-Set v12.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_contextnet_1024,de,,,4.76,,5.5,3.53,4.2,11.32,9.4 diff --git a/docs/source/asr/data/scores/de/fastconformer_de.csv b/docs/source/asr/data/scores/de/fastconformer_de.csv new file mode 100644 index 000000000000..fe6e6491f443 --- /dev/null +++ b/docs/source/asr/data/scores/de/fastconformer_de.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v12.0 (de),MCV Dev-Set v7.0 (de),MCV Test-Set v12.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_fastconformer_hybrid_large_pc,de,,4.2 %,,4.9 %,,3.3 %,3.8 %,10.8 %,8.7 % diff --git a/docs/source/asr/data/scores/de/quartznet15x5_de.csv b/docs/source/asr/data/scores/de/quartznet15x5_de.csv index 17540903f41e..22da250a97f3 100644 --- a/docs/source/asr/data/scores/de/quartznet15x5_de.csv +++ b/docs/source/asr/data/scores/de/quartznet15x5_de.csv @@ -1,2 +1,2 @@ -Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v7.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) -stt_de_quartznet15x5,de,11.78,,,,,, +Model Name,Language,MCV Dev-Set (v??) (de),MCV Dev-Set v12.0 (de),MCV Dev-Set v7.0 (de),MCV Test-Set v12.0 (de),MCV Test-Set v7.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_quartznet15x5,de,11.78,,,,,,,, diff --git a/docs/source/asr/data/scores/en/citrinet_en.csv b/docs/source/asr/data/scores/en/citrinet_en.csv index 42d8cff2cb9b..47f180e7298e 100644 --- a/docs/source/asr/data/scores/en/citrinet_en.csv +++ b/docs/source/asr/data/scores/en/citrinet_en.csv @@ -1,7 +1,7 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_citrinet_256,en,4.2 % WER,10.7 % WER,4.4 % WER,10.7 % WER,,,,,,,,, -stt_en_citrinet_512,en,3.7 % WER,8.9 % WER,3.7 % WER,8.9 % WER,,,,,,,,, -stt_en_citrinet_1024,en,3.7 % WER,8.3 % WER,3.6 % WER,7.9 % WER,,,,,,,,, -stt_en_citrinet_256_gamma_0_25,en,4.7 %,10.6 %,4.8 %,10.7 %,,,,8.3 %,,,,5.8 %,3.6 % -stt_en_citrinet_512_gamma_0_25,en,4.0 %,9.0 %,3.9 %,9.0 %,,,,6.9 %,,,,4.4 %,3.6 % -stt_en_citrinet_1024_gamma_0_25,en,3.4 %,7.7 %,3.4 %,7.6 %,,,,6.2 %,,,,4.0 %,2.5 % +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_citrinet_256,en,,,4.2 % WER,10.7 % WER,4.4 % WER,10.7 % WER,,,,,,,,,,,, +stt_en_citrinet_512,en,,,3.7 % WER,8.9 % WER,3.7 % WER,8.9 % WER,,,,,,,,,,,, +stt_en_citrinet_1024,en,,,3.7 % WER,8.3 % WER,3.6 % WER,7.9 % WER,,,,,,,,,,,, +stt_en_citrinet_256_gamma_0_25,en,,,4.7 %,10.6 %,4.8 %,10.7 %,,,,,8.3 %,,,,,,5.8 %,3.6 % +stt_en_citrinet_512_gamma_0_25,en,,,4.0 %,9.0 %,3.9 %,9.0 %,,,,,6.9 %,,,,,,4.4 %,3.6 % +stt_en_citrinet_1024_gamma_0_25,en,,,3.4 %,7.7 %,3.4 %,7.6 %,,,,,6.2 %,,,,,,4.0 %,2.5 % diff --git a/docs/source/asr/data/scores/en/conformer_en.csv b/docs/source/asr/data/scores/en/conformer_en.csv index 23ec44382578..905bdf2ebedc 100644 --- a/docs/source/asr/data/scores/en/conformer_en.csv +++ b/docs/source/asr/data/scores/en/conformer_en.csv @@ -1,14 +1,14 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_conformer_ctc_small,en,3.6,8.1,3.7,8.1,,,,,,,,, -stt_en_conformer_ctc_medium,en,2.5,5.8,2.6,5.9,,,,,,,,, -stt_en_conformer_ctc_large,en,1.9,4.4,2.1,4.5,,,,,,,,, -stt_en_conformer_ctc_xlarge,en,1.77 %,3.79 %,2.00 %,3.74 %,7.88 %,,5.99 %,,6.44 %,22.90 %,5.50 %,2.36 %, -stt_en_conformer_ctc_small_ls,en,3.3,8.8,3.4,8.8,,,,,,,,, -stt_en_conformer_ctc_medium_ls,en,2.7,7.4,3.0,7.3,,,,,,,,, -stt_en_conformer_ctc_large_ls,en,2.4,6.2,2.7,6.0,,,,,,,,, -stt_en_conformer_transducer_small,en,2.8,6.6,2.5,6.6,,,,,,,,, -stt_en_conformer_transducer_medium,en,2.0,4.6,2.1,4.7,,,,,,,,, -stt_en_conformer_transducer_large,en,1.6,3.5,1.7,3.7,,,,,,,,, -stt_en_conformer_transducer_large_ls,en,2.1,5.0,2.3,5.1,,,,,,,,, -stt_en_conformer_transducer_xlarge,en,1.48 %,2.95 %,1.62 %,3.01 %,6.46 %,4.59 %,5.32 %,5.70 %,6.47 %,21.32 %,,2.05 %,1.17 % -stt_en_conformer_transducer_xxlarge,en,1.52 %,3.09 %,1.72 %,3.14 %,,5.29 %,5.85 %,6.64 %,,,,2.42 %,1.49 % +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_conformer_ctc_small,en,,,3.6,8.1,3.7,8.1,,,,,,,,,,,, +stt_en_conformer_ctc_medium,en,,,2.5,5.8,2.6,5.9,,,,,,,,,,,, +stt_en_conformer_ctc_large,en,,,1.9,4.4,2.1,4.5,,,,,,,,,,,, +stt_en_conformer_ctc_xlarge,en,,,1.77 %,3.79 %,2.00 %,3.74 %,,7.88 %,,5.99 %,,6.44 %,22.90 %,5.50 %,,,2.36 %, +stt_en_conformer_ctc_small_ls,en,,,3.3,8.8,3.4,8.8,,,,,,,,,,,, +stt_en_conformer_ctc_medium_ls,en,,,2.7,7.4,3.0,7.3,,,,,,,,,,,, +stt_en_conformer_ctc_large_ls,en,,,2.4,6.2,2.7,6.0,,,,,,,,,,,, +stt_en_conformer_transducer_small,en,,,2.8,6.6,2.5,6.6,,,,,,,,,,,, +stt_en_conformer_transducer_medium,en,,,2.0,4.6,2.1,4.7,,,,,,,,,,,, +stt_en_conformer_transducer_large,en,,,1.6,3.5,1.7,3.7,,,,,,,,,,,, +stt_en_conformer_transducer_large_ls,en,,,2.1,5.0,2.3,5.1,,,,,,,,,,,, +stt_en_conformer_transducer_xlarge,en,,,1.48 %,2.95 %,1.62 %,3.01 %,,6.46 %,4.59 %,5.32 %,5.70 %,6.47 %,21.32 %,,,,2.05 %,1.17 % +stt_en_conformer_transducer_xxlarge,en,,,1.52 %,3.09 %,1.72 %,3.14 %,,,5.29 %,5.85 %,6.64 %,,,,,,2.42 %,1.49 % diff --git a/docs/source/asr/data/scores/en/contextnet_en.csv b/docs/source/asr/data/scores/en/contextnet_en.csv index 4a065dd299f8..6f986e28039a 100644 --- a/docs/source/asr/data/scores/en/contextnet_en.csv +++ b/docs/source/asr/data/scores/en/contextnet_en.csv @@ -1,7 +1,7 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_contextnet_256,en,3.3 %,7.9 %,3.3 %,8.0 %,,9.7 %,11.0 %,7.1 %,,,,4.6 %,3.2 % -stt_en_contextnet_512,en,2.0 %,4.8 %,2.2 %,5.0 %,,6.6 %,7.3 %,5.9 %,,,,2.8 %,1.4 % -stt_en_contextnet_1024,en,1.7 %,3.8 %,1.9 %,4.0 %,7.9 %,,5.9 %,5.2 %,6.5 %,21.7 %,4.7 %,2.3 %,1.3 % -stt_en_contextnet_256_mls,en,,9.0 %,,9.2 %,,9.4 %,10.9 %,,,,,, -stt_en_contextnet_512_mls,en,,5.2 %,,5.2 %,,5.6 %,6.6 %,,,,,, -stt_en_contextnet_1024_mls,en,,4.1 %,,4.2 %,,4.6 %,5.6 %,,,,,, +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_contextnet_256,en,,,3.3 %,7.9 %,3.3 %,8.0 %,,,9.7 %,11.0 %,7.1 %,,,,,,4.6 %,3.2 % +stt_en_contextnet_512,en,,,2.0 %,4.8 %,2.2 %,5.0 %,,,6.6 %,7.3 %,5.9 %,,,,,,2.8 %,1.4 % +stt_en_contextnet_1024,en,,,1.7 %,3.8 %,1.9 %,4.0 %,,7.9 %,,5.9 %,5.2 %,6.5 %,21.7 %,4.7 %,,,2.3 %,1.3 % +stt_en_contextnet_256_mls,en,,,,9.0 %,,9.2 %,,,9.4 %,10.9 %,,,,,,,, +stt_en_contextnet_512_mls,en,,,,5.2 %,,5.2 %,,,5.6 %,6.6 %,,,,,,,, +stt_en_contextnet_1024_mls,en,,,,4.1 %,,4.2 %,,,4.6 %,5.6 %,,,,,,,, diff --git a/docs/source/asr/data/scores/en/fastconformer_en.csv b/docs/source/asr/data/scores/en/fastconformer_en.csv new file mode 100644 index 000000000000..e993273dfbf4 --- /dev/null +++ b/docs/source/asr/data/scores/en/fastconformer_en.csv @@ -0,0 +1,4 @@ +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_fastconformer_ctc_large,en,,,1.9,4.2,2.1,4.2,,,,,,,,,,,, +stt_en_fastconformer_transducer_large,en,,,2.0,3.8,1.8,3.8,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_pc,en,8.0 %,10.3 %,,,2.0 %,4.1 %,8.2 %,,,4.5 %,4.6 %,,,,2.3 %,4.5 %,, diff --git a/docs/source/asr/data/scores/en/jasper10x5dr_en.csv b/docs/source/asr/data/scores/en/jasper10x5dr_en.csv index ac9b260c5bb3..a812337ac0eb 100644 --- a/docs/source/asr/data/scores/en/jasper10x5dr_en.csv +++ b/docs/source/asr/data/scores/en/jasper10x5dr_en.csv @@ -1,2 +1,2 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_jasper10x5dr,en,3.74,10.21,,,,,,,,,,, +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_jasper10x5dr,en,,,3.74,10.21,,,,,,,,,,,,,, diff --git a/docs/source/asr/data/scores/en/quartznet15x5_en.csv b/docs/source/asr/data/scores/en/quartznet15x5_en.csv index 04aef4aa49dd..67b52bc9a0da 100644 --- a/docs/source/asr/data/scores/en/quartznet15x5_en.csv +++ b/docs/source/asr/data/scores/en/quartznet15x5_en.csv @@ -1,2 +1,2 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_quartznet15x5,en,4.38,11.3,,,,,,,,,,, +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_quartznet15x5,en,,,4.38,11.3,,,,,,,,,,,,,, diff --git a/docs/source/asr/data/scores/en/squeezeformer_en.csv b/docs/source/asr/data/scores/en/squeezeformer_en.csv index fdbd9bd99665..ecd18cc40b97 100644 --- a/docs/source/asr/data/scores/en/squeezeformer_en.csv +++ b/docs/source/asr/data/scores/en/squeezeformer_en.csv @@ -1,7 +1,7 @@ -Model Name,Language,Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,WSJ Dev 93,WSJ Eval 92 -stt_en_squeezeformer_ctc_xsmall_ls,en,3.6 %,9.7 %,3.8 %,9.4 %,,,,,,,,, -stt_en_squeezeformer_ctc_small_ls,en,2.9 %,7.4 %,3.1 %,7.4 %,,,,,,,,, -stt_en_squeezeformer_ctc_small_medium_ls,en,2.7 %,7.0 %,2.8 %,7.1 %,,,,,,,,, -stt_en_squeezeformer_ctc_medium_ls,en,2.4 %,6.2 %,2.6 %,6.3 %,,,,,,,,, -stt_en_squeezeformer_ctc_medium_large_ls,en,2.3 %,6.0 %,2.5 %,5.9 %,,,,,,,,, -stt_en_squeezeformer_ctc_large_ls,en,2.3 %,5.7 %,2.4 %,5.7 %,,,,,,,,, +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Dev-Clean,Librispeech Dev-Other,Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MCV Test-Set v8.0 (en),MLS Dev (en),MLS Test (en),NSC Part1,NSC Part6,Peoples Speech Test v1,SLR 83 Test,SPGI Test,VoxPopuli Test (en),WSJ Dev 93,WSJ Eval 92 +stt_en_squeezeformer_ctc_xsmall_ls,en,,,3.6 %,9.7 %,3.8 %,9.4 %,,,,,,,,,,,, +stt_en_squeezeformer_ctc_small_ls,en,,,2.9 %,7.4 %,3.1 %,7.4 %,,,,,,,,,,,, +stt_en_squeezeformer_ctc_small_medium_ls,en,,,2.7 %,7.0 %,2.8 %,7.1 %,,,,,,,,,,,, +stt_en_squeezeformer_ctc_medium_ls,en,,,2.4 %,6.2 %,2.6 %,6.3 %,,,,,,,,,,,, +stt_en_squeezeformer_ctc_medium_large_ls,en,,,2.3 %,6.0 %,2.5 %,5.9 %,,,,,,,,,,,, +stt_en_squeezeformer_ctc_large_ls,en,,,2.3 %,5.7 %,2.4 %,5.7 %,,,,,,,,,,,, diff --git a/docs/source/asr/data/scores/es/citrinet_es.csv b/docs/source/asr/data/scores/es/citrinet_es.csv index 9311fb2b04fd..9471293dd227 100644 --- a/docs/source/asr/data/scores/es/citrinet_es.csv +++ b/docs/source/asr/data/scores/es/citrinet_es.csv @@ -1,3 +1,3 @@ -Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) -stt_es_citrinet_512,es,,,,,,9.1 % WER,,10.3 % WER,,4.9 % WER,5.2 % WER,, -stt_es_citrinet_1024_gamma_0_25,es,19.9 %,21.3 %,19.1 %,15.8 %,15.9 %,,6.1 %,,6.8 %,3.5 %,4.1 %,5.6 %,7.0 % +Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v12.0 (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v12.0 (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_citrinet_512,es,,,,,,9.1 % WER,,,10.3 % WER,,,4.9 % WER,5.2 % WER,, +stt_es_citrinet_1024_gamma_0_25,es,19.9 %,21.3 %,19.1 %,15.8 %,15.9 %,,,6.1 %,,,6.8 %,3.5 %,4.1 %,5.6 %,7.0 % diff --git a/docs/source/asr/data/scores/es/conformer_es.csv b/docs/source/asr/data/scores/es/conformer_es.csv index 10b28dc49f4e..e7e47cbdc068 100644 --- a/docs/source/asr/data/scores/es/conformer_es.csv +++ b/docs/source/asr/data/scores/es/conformer_es.csv @@ -1,3 +1,3 @@ -Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) -stt_es_conformer_ctc_large,es,23.7 %,25.3 %,22.4 %,18.3 %,18.5 %,,6.3 %,,6.9 %,4.3 %,4.2 %,6.1 %,7.5 % -stt_es_conformer_transducer_large,es,18.0 %,19.4 %,17.2 %,14.7 %,14.8 %,,4.6 %,,5.2 %,2.7 %,3.2 %,4.7 %,6.0 % +Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v12.0 (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v12.0 (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_conformer_ctc_large,es,23.7 %,25.3 %,22.4 %,18.3 %,18.5 %,,,6.3 %,,,6.9 %,4.3 %,4.2 %,6.1 %,7.5 % +stt_es_conformer_transducer_large,es,18.0 %,19.4 %,17.2 %,14.7 %,14.8 %,,,4.6 %,,,5.2 %,2.7 %,3.2 %,4.7 %,6.0 % diff --git a/docs/source/asr/data/scores/es/contextnet_es.csv b/docs/source/asr/data/scores/es/contextnet_es.csv index ec20b5708d93..9f75e2a70bce 100644 --- a/docs/source/asr/data/scores/es/contextnet_es.csv +++ b/docs/source/asr/data/scores/es/contextnet_es.csv @@ -1,2 +1,2 @@ -Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) -stt_es_contextnet_1024,es,19.1 %,20.7 %,18.2 %,15.3 %,15.1 %,,4.8 %,,5.2 %,3.1 %,3.5 %,5.1 %,6.2 % +Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v12.0 (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v12.0 (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_contextnet_1024,es,19.1 %,20.7 %,18.2 %,15.3 %,15.1 %,,,4.8 %,,,5.2 %,3.1 %,3.5 %,5.1 %,6.2 % diff --git a/docs/source/asr/data/scores/es/fastconformer_es.csv b/docs/source/asr/data/scores/es/fastconformer_es.csv new file mode 100644 index 000000000000..a6c12afe95e1 --- /dev/null +++ b/docs/source/asr/data/scores/es/fastconformer_es.csv @@ -0,0 +1,2 @@ +Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v12.0 (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v12.0 (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_fastconformer_hybrid_large_pc,es,,,,29.4 %,28.9 %,,7.1 %,,,7.5 %,,10.6 %,11.8 %,8.6 %,9.8 % diff --git a/docs/source/asr/data/scores/es/quartznet15x5_es.csv b/docs/source/asr/data/scores/es/quartznet15x5_es.csv index 79de5ce952d8..54de5e94025b 100644 --- a/docs/source/asr/data/scores/es/quartznet15x5_es.csv +++ b/docs/source/asr/data/scores/es/quartznet15x5_es.csv @@ -1,2 +1,2 @@ -Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) -stt_es_quartznet15x5,es,,,,,,12.97,,,,,,, +Model Name,Language,Call Home Dev Test (es),Call Home Eval Test (es),Call Home Train (es),Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set (v??) (es),MCV Dev-Set v12.0 (es),MCV Dev-Set v7.0 (es),MCV Test-Set (v??) (es),MCV Test-Set v12.0 (es),MCV Test-Set v7.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_quartznet15x5,es,,,,,,12.97,,,,,,,,, diff --git a/docs/source/asr/data/scores/hr/conformer_hr.csv b/docs/source/asr/data/scores/hr/conformer_hr.csv index 04383a14e888..4cfd3f79a89f 100644 --- a/docs/source/asr/data/scores/hr/conformer_hr.csv +++ b/docs/source/asr/data/scores/hr/conformer_hr.csv @@ -1,3 +1,3 @@ -Model Name,Language,ParlaSpeech Dev-Set v1.0 (hr),ParlaSpeech Test-Set v1.0 (hr) -stt_hr_conformer_ctc_large,hr,4.43,4.70 -stt_hr_conformer_transducer_large,hr,4.56,4.69 +Model Name,Language,ParlaSpeech Dev-Set v1.0 (hr),ParlaSpeech Test-Set v1.0 (hr),Parlaspeech Dev-Set (v??) (hr),Parlaspeech Test-Set (v??) (hr) +stt_hr_conformer_ctc_large,hr,4.43,4.70,, +stt_hr_conformer_transducer_large,hr,4.56,4.69,, diff --git a/docs/source/asr/data/scores/hr/fastconformer_hr.csv b/docs/source/asr/data/scores/hr/fastconformer_hr.csv new file mode 100644 index 000000000000..ee54e981e7aa --- /dev/null +++ b/docs/source/asr/data/scores/hr/fastconformer_hr.csv @@ -0,0 +1,2 @@ +Model Name,Language,ParlaSpeech Dev-Set v1.0 (hr),ParlaSpeech Test-Set v1.0 (hr),Parlaspeech Dev-Set (v??) (hr),Parlaspeech Test-Set (v??) (hr) +stt_hr_fastconformer_hybrid_large_pc,hr,,,4.5 %,4.2 % diff --git a/docs/source/asr/data/scores/it/conformer_it.csv b/docs/source/asr/data/scores/it/conformer_it.csv index 3e3854eb862a..c86a906e982c 100644 --- a/docs/source/asr/data/scores/it/conformer_it.csv +++ b/docs/source/asr/data/scores/it/conformer_it.csv @@ -1,3 +1,3 @@ -Model Name,Language,MCV Dev-Set (v??) (it),MCV Dev-Set v11.0 (it),MCV Test-Set v11.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) -stt_it_conformer_ctc_large,it,,5.38,5.92,13.16,10.62,13.43,16.75 -stt_it_conformer_transducer_large,it,,4.80,5.24,14.62,12.18,12.00,15.15 +Model Name,Language,MCV Dev-Set (v??) (it),MCV Dev-Set v11.0 (it),MCV Dev-Set v12.0 (it),MCV Test-Set v11.0 (it),MCV Test-Set v12.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) +stt_it_conformer_ctc_large,it,,5.38,,5.92,,13.16,10.62,13.43,16.75 +stt_it_conformer_transducer_large,it,,4.80,,5.24,,14.62,12.18,12.00,15.15 diff --git a/docs/source/asr/data/scores/it/fastconformer_it.csv b/docs/source/asr/data/scores/it/fastconformer_it.csv new file mode 100644 index 000000000000..3a684662295e --- /dev/null +++ b/docs/source/asr/data/scores/it/fastconformer_it.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set (v??) (it),MCV Dev-Set v11.0 (it),MCV Dev-Set v12.0 (it),MCV Test-Set v11.0 (it),MCV Test-Set v12.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) +stt_it_fastconformer_hybrid_large_pc,it,,,5.2 %,,5.8 %,13.6 %,11.5 %,12.7 %,15.6 % diff --git a/docs/source/asr/data/scores/it/quartznet15x5_it.csv b/docs/source/asr/data/scores/it/quartznet15x5_it.csv index 475058e38bc0..f22cfda089dc 100644 --- a/docs/source/asr/data/scores/it/quartznet15x5_it.csv +++ b/docs/source/asr/data/scores/it/quartznet15x5_it.csv @@ -1,2 +1,2 @@ -Model Name,Language,MCV Dev-Set (v??) (it),MCV Dev-Set v11.0 (it),MCV Test-Set v11.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) -stt_it_quartznet15x5,it,15.22,,,,,, +Model Name,Language,MCV Dev-Set (v??) (it),MCV Dev-Set v11.0 (it),MCV Dev-Set v12.0 (it),MCV Test-Set v11.0 (it),MCV Test-Set v12.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) +stt_it_quartznet15x5,it,15.22,,,,,,,, diff --git a/docs/source/asr/data/scores/pl/fastconformer_pl.csv b/docs/source/asr/data/scores/pl/fastconformer_pl.csv new file mode 100644 index 000000000000..8cf9a506b704 --- /dev/null +++ b/docs/source/asr/data/scores/pl/fastconformer_pl.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set (v??) (pl),MCV Dev-Set v12.0 (pl),MCV Test-Set v12.0 (pl),MLS Dev (en),MLS Test (en),VoxPopuli Dev (pl),VoxPopuli Test (pl) +stt_pl_fastconformer_hybrid_large_pc,pl,,6.0 %,8.7 %,7.1 %,5.8 %,11.3 %,8.5 % diff --git a/docs/source/asr/data/scores/pl/quartznet15x5_pl.csv b/docs/source/asr/data/scores/pl/quartznet15x5_pl.csv index 5692e36037ac..98c80fdd5401 100644 --- a/docs/source/asr/data/scores/pl/quartznet15x5_pl.csv +++ b/docs/source/asr/data/scores/pl/quartznet15x5_pl.csv @@ -1,2 +1,2 @@ -Model Name,Language,MCV Dev-Set (v??) (pl) -stt_pl_quartznet15x5,pl,14 +Model Name,Language,MCV Dev-Set (v??) (pl),MCV Dev-Set v12.0 (pl),MCV Test-Set v12.0 (pl),MLS Dev (en),MLS Test (en),VoxPopuli Dev (pl),VoxPopuli Test (pl) +stt_pl_quartznet15x5,pl,14,,,,,, diff --git a/docs/source/asr/data/scores/ua/fastconformer_ua.csv b/docs/source/asr/data/scores/ua/fastconformer_ua.csv new file mode 100644 index 000000000000..c325a73c5f53 --- /dev/null +++ b/docs/source/asr/data/scores/ua/fastconformer_ua.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Test-Set v12.0 (ua) +stt_ua_fastconformer_hybrid_large_pc,ua,5.2 % diff --git a/docs/source/asr/data/scores_pc/by/fastconformer_by.csv b/docs/source/asr/data/scores_pc/by/fastconformer_by.csv new file mode 100644 index 000000000000..88f5e320f088 --- /dev/null +++ b/docs/source/asr/data/scores_pc/by/fastconformer_by.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set v12.0 (be),MCV Test-Set v12.0 (be) +stt_by_fastconformer_hybrid_large_pc,by,3.8 %,3.9 % diff --git a/docs/source/asr/data/scores_pc/de/fastconformer_de.csv b/docs/source/asr/data/scores_pc/de/fastconformer_de.csv new file mode 100644 index 000000000000..f86228918460 --- /dev/null +++ b/docs/source/asr/data/scores_pc/de/fastconformer_de.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set v12.0 (de),MCV Test-Set v12.0 (de),MLS Dev (en),MLS Test (en),VoxPopuli Dev (de),VoxPopuli Test (de) +stt_de_fastconformer_hybrid_large_pc,de,4.7 %,5.4 %,10.1 %,11.1 %,12.6 %,10.4 % diff --git a/docs/source/asr/data/scores_pc/en/fastconformer_en.csv b/docs/source/asr/data/scores_pc/en/fastconformer_en.csv new file mode 100644 index 000000000000..9495643af30d --- /dev/null +++ b/docs/source/asr/data/scores_pc/en/fastconformer_en.csv @@ -0,0 +1,2 @@ +Model Name,Language,EuroParl Test Set (en),Fisher Test Set (en),Librispeech Test-Clean,Librispeech Test-Other,MCV Test-Set v11.0 (en),MLS Test (en),NSC Part1,SPGI Test,VoxPopuli Test (en) +stt_en_fastconformer_hybrid_large_pc,en,12.5 %,19.0 %,7.3 %,9.2 %,10.1 %,12.7 %,7.2 %,5.1 %,6.7 % diff --git a/docs/source/asr/data/scores_pc/es/fastconformer_es.csv b/docs/source/asr/data/scores_pc/es/fastconformer_es.csv new file mode 100644 index 000000000000..501771865ed8 --- /dev/null +++ b/docs/source/asr/data/scores_pc/es/fastconformer_es.csv @@ -0,0 +1,2 @@ +Model Name,Language,Fisher Dev Set (es),Fisher Test Set (es),MCV Dev-Set v12.0 (es),MCV Test-Set v12.0 (es),MLS Dev (en),MLS Test (en),VoxPopuli Dev (es),VoxPopuli Test (es) +stt_es_fastconformer_hybrid_large_pc,es,14.7 %,14.6 %,4.5 %,5.0 %,3.1 %,3.9 %,4.4 %,5.6 % diff --git a/docs/source/asr/data/scores_pc/hr/fastconformer_hr.csv b/docs/source/asr/data/scores_pc/hr/fastconformer_hr.csv new file mode 100644 index 000000000000..3c024c09f329 --- /dev/null +++ b/docs/source/asr/data/scores_pc/hr/fastconformer_hr.csv @@ -0,0 +1,2 @@ +Model Name,Language,Parlaspeech Dev-Set (v??) (hr),Parlaspeech Test-Set (v??) (hr) +stt_hr_fastconformer_hybrid_large_pc,hr,10.4 %,8.7 % diff --git a/docs/source/asr/data/scores_pc/it/fastconformer_it.csv b/docs/source/asr/data/scores_pc/it/fastconformer_it.csv new file mode 100644 index 000000000000..6bcf2c0b4400 --- /dev/null +++ b/docs/source/asr/data/scores_pc/it/fastconformer_it.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set v12.0 (it),MCV Test-Set v12.0 (it),MLS Dev (en),MLS Test (en),VoxPopuli Dev (it),VoxPopuli Test (it) +stt_it_fastconformer_hybrid_large_pc,it,7.8 %,8.2 %,26.4 %,22.5 %,16.8 %,19.6 % diff --git a/docs/source/asr/data/scores_pc/pl/fastconformer_pl.csv b/docs/source/asr/data/scores_pc/pl/fastconformer_pl.csv new file mode 100644 index 000000000000..5cbadae40b59 --- /dev/null +++ b/docs/source/asr/data/scores_pc/pl/fastconformer_pl.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Dev-Set v12.0 (pl),MCV Test-Set v12.0 (pl),MLS Dev (en),MLS Test (en),VoxPopuli Dev (pl),VoxPopuli Test (pl) +stt_pl_fastconformer_hybrid_large_pc,pl,8.9 %,11.0 %,16.0 %,11.0 %,14.0 %,11.4 % diff --git a/docs/source/asr/data/scores_pc/ua/fastconformer_ua.csv b/docs/source/asr/data/scores_pc/ua/fastconformer_ua.csv new file mode 100644 index 000000000000..b486fa23aeb3 --- /dev/null +++ b/docs/source/asr/data/scores_pc/ua/fastconformer_ua.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Test-Set v12.0 (ua) +stt_ua_fastconformer_hybrid_large_pc,ua,7.3 % diff --git a/docs/source/asr/scores.rst b/docs/source/asr/scores.rst index bcb083bd917e..d008a26700ec 100644 --- a/docs/source/asr/scores.rst +++ b/docs/source/asr/scores.rst @@ -28,6 +28,13 @@ EN -------------------- +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/en/fastconformer_en.csv + +-------------------- + .. csv-table:: :header-rows: 1 :align: left @@ -59,6 +66,16 @@ BE -------------------- +BY +^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/by/fastconformer_by.csv + +-------------------- + CA ^^ @@ -100,6 +117,13 @@ DE -------------------- +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/de/fastconformer_de.csv + +-------------------- + .. csv-table:: :header-rows: 1 :align: left @@ -158,6 +182,13 @@ ES -------------------- +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/es/fastconformer_es.csv + +-------------------- + .. csv-table:: :header-rows: 1 :align: left @@ -206,6 +237,13 @@ HR -------------------- +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/hr/fastconformer_hr.csv + +-------------------- + IT ^^ @@ -216,6 +254,13 @@ IT -------------------- +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/it/fastconformer_it.csv + +-------------------- + .. csv-table:: :header-rows: 1 :align: left @@ -236,6 +281,13 @@ KAB PL ^^ +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/pl/fastconformer_pl.csv + +-------------------- + .. csv-table:: :header-rows: 1 :align: left @@ -270,6 +322,16 @@ RW -------------------- +UA +^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/ua/fastconformer_ua.csv + +-------------------- + ZH ^^ @@ -287,3 +349,88 @@ ZH -------------------- + + +Scores with Punctuation and Capitalization +------------------------------------------ + +EN with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/en/fastconformer_en.csv + +-------------------- + +BY with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/by/fastconformer_by.csv + +-------------------- + +DE with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/de/fastconformer_de.csv + +-------------------- + +ES with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/es/fastconformer_es.csv + +-------------------- + +HR with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/hr/fastconformer_hr.csv + +-------------------- + +IT with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/it/fastconformer_it.csv + +-------------------- + +PL with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/pl/fastconformer_pl.csv + +-------------------- + +UA with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/ua/fastconformer_ua.csv + +-------------------- + From 415d02bf2e043254ecc4c13304aa94ccb7b8958a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 21:42:28 -0700 Subject: [PATCH 059/512] Fix fp16 (#6543) (#6544) Signed-off-by: MaximumEntropy Co-authored-by: Sandeep Subramanian --- examples/nlp/language_modeling/megatron_gpt_eval.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index d797937850e0..00b53a9f6f8f 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -196,6 +196,8 @@ def main(cfg) -> None: pretrained_cfg.activations_checkpoint_granularity = None pretrained_cfg.activations_checkpoint_method = None pretrained_cfg.precision = trainer.precision + if trainer.precision == "16": + pretrained_cfg.megatron_amp_O2 = False model = MegatronGPTModel.restore_from( restore_path=cfg.gpt_model_file, trainer=trainer, From bccb46e10310a213ef924c87d6a7deef5a1703e1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 02:36:27 -0700 Subject: [PATCH 060/512] Patch transcribe and support offline transcribe for hybrid model (#6550) (#6559) Signed-off-by: fayejf Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> --- examples/asr/transcribe_speech.py | 17 ++++++++++++++--- .../asr/parts/utils/transcribe_utils.py | 4 ++-- tools/asr_evaluator/conf/eval.yaml | 2 +- tools/asr_evaluator/utils.py | 3 ++- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 3493fb28d81d..30700153e340 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch -from omegaconf import OmegaConf +from omegaconf import OmegaConf, open_dict from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig from nemo.collections.asr.metrics.wer import CTCDecodingConfig -from nemo.collections.asr.models.ctc_models import EncDecCTCModel +from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.modules.conformer_encoder import ConformerChangeConfig from nemo.collections.asr.parts.utils.transcribe_utils import ( compute_output_filename, @@ -154,6 +154,9 @@ class TranscriptionConfig: def main(cfg: TranscriptionConfig) -> TranscriptionConfig: logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') + for key in cfg: + cfg[key] = None if cfg[key] == 'None' else cfg[key] + if is_dataclass(cfg): cfg = OmegaConf.structured(cfg) @@ -223,7 +226,6 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: decoding_cfg.preserve_alignments = cfg.compute_timestamps if 'compute_langs' in decoding_cfg: decoding_cfg.compute_langs = cfg.compute_langs - asr_model.change_decoding_strategy(decoding_cfg, decoder_type=cfg.decoder_type) # Check if ctc or rnnt model @@ -243,6 +245,15 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: asr_model.change_decoding_strategy(cfg.ctc_decoding) + # Setup decoding config based on model type and decoder_type + with open_dict(cfg): + if isinstance(asr_model, EncDecCTCModel) or ( + isinstance(asr_model, EncDecHybridRNNTCTCModel) and cfg.decoder_type == "ctc" + ): + cfg.decoding = cfg.ctc_decoding + else: + cfg.decoding = cfg.rnnt_decoding + # prepare audio filepaths and decide wether it's partical audio filepaths, partial_audio = prepare_audio_data(cfg) diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index d59d453ba972..8cfe58523751 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -289,14 +289,14 @@ def write_transcription( if isinstance(transcriptions[0], rnnt_utils.Hypothesis): # List[rnnt_utils.Hypothesis] best_hyps = transcriptions - assert cfg.ctc_decoding.beam.return_best_hypothesis, "Works only with return_best_hypothesis=true" + assert cfg.decoding.beam.return_best_hypothesis, "Works only with return_best_hypothesis=true" elif isinstance(transcriptions[0], list) and isinstance( transcriptions[0][0], rnnt_utils.Hypothesis ): # List[List[rnnt_utils.Hypothesis]] NBestHypothesis best_hyps, beams = [], [] for hyps in transcriptions: best_hyps.append(hyps[0]) - if not cfg.ctc_decoding.beam.return_best_hypothesis: + if not cfg.decoding.beam.return_best_hypothesis: beam = [] for hyp in hyps: beam.append((hyp.text, hyp.score)) diff --git a/tools/asr_evaluator/conf/eval.yaml b/tools/asr_evaluator/conf/eval.yaml index 95e7c94b5b43..9129eddc49f1 100644 --- a/tools/asr_evaluator/conf/eval.yaml +++ b/tools/asr_evaluator/conf/eval.yaml @@ -13,7 +13,7 @@ engine: chunk_len_in_secs: 1.6 #null # Need to specify if use buffered inference (default for offline_by_chunked is 20) total_buffer_in_secs: 4 #null # Need to specify if use buffered inference (default for offline_by_chunked is 22) model_stride: 4 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models - + decoder_type: null # Used for hybrid CTC RNNT model only. Specify decoder_type *ctc* or *rnnt* for hybrid CTC RNNT model. test_ds: manifest_filepath: null sample_rate: 16000 diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py index ad69b249f5db..c233376eb13a 100644 --- a/tools/asr_evaluator/utils.py +++ b/tools/asr_evaluator/utils.py @@ -154,7 +154,8 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig: f"output_filename={cfg.output_filename} " f"batch_size={cfg.test_ds.batch_size} " f"random_seed={cfg.random_seed} " - f"eval_config_yaml={f.name} ", + f"eval_config_yaml={f.name} " + f"decoder_type={cfg.inference.decoder_type} ", shell=True, check=True, ) From 8285c9613e30a33899717c3d8e6a41ed6683ef41 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Thu, 4 May 2023 09:20:49 -0700 Subject: [PATCH 061/512] Fix notebook bad json (#6561) Signed-off-by: smajumdar --- tutorials/asr/Offline_ASR.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 9d134c99354b..c692277d1257 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -30,8 +30,8 @@ "* use beam search decoder with N-gram language model re-scoring\n", "\n", "You may find more info on how to train and use language models for ASR models here:\n", - "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n" - "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", + "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", + "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, { From 856afbdf0d08e6abc5819b5330c8ae2aea90d4d0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 09:26:29 -0700 Subject: [PATCH 062/512] Change Megatron Enc Dec model to use persistent_workers (#6548) (#6552) * persistent workers * fix --------- Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar Co-authored-by: Eric Harper --- .../language_modeling/megatron_lm_encoder_decoder_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 4f4bc0d709a8..b3ecc1b150ac 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -822,7 +822,11 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, num_workers): # Torch dataloader. return torch.utils.data.DataLoader( - dataset, batch_sampler=batch_sampler, num_workers=num_workers, pin_memory=True, + dataset, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=True, + persistent_workers=True if num_workers > 0 else False, ) def setup(self, stage=None): From 544d69d655b7d2a1b03c82ab6bbb462bfa9e4c20 Mon Sep 17 00:00:00 2001 From: Nikolay Karpov Date: Thu, 4 May 2023 20:57:47 +0400 Subject: [PATCH 063/512] Make KenLM with PC for AggregateTokenizer and merge it (#6081) * do_lowercase, rm_punctuation Signed-off-by: Nikolay Karpov * support beam_strategy = beam Signed-off-by: Nikolay Karpov * black Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix config and^Cunctuation capitalization Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm math Signed-off-by: Nikolay Karpov * update kenlm Signed-off-by: Nikolay Karpov * black Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add opengrm Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * mv install_beamsearch_decoders Signed-off-by: Nikolay Karpov * punctuation_to_preserve Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Only tikenizer opion Signed-off-by: Nikolay Karpov * Black Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * DEFAULT_TOKEN_OFFSET Signed-off-by: Nikolay Karpov * aggregate_tokenizer Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * install kenlm with more than 5gram Signed-off-by: Nikolay Karpov * install_beamsearch_decoders Signed-off-by: Nikolay Karpov * ngram_bin_path kenlm_bin_path Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * black Signed-off-by: Nikolay Karpov * fix greedy PC bug Signed-off-by: Nikolay Karpov * move global params Signed-off-by: Nikolay Karpov * fix description and perplexity Signed-off-by: Nikolay Karpov * fix description Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * NEMO_PATH Signed-off-by: Nikolay Karpov * nemo:23.01 Signed-off-by: Nikolay Karpov * License Signed-off-by: Nikolay Karpov * description Signed-off-by: Nikolay Karpov * isinstance Signed-off-by: Nikolay Karpov * refactor kenlm stdin Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * black Signed-off-by: Nikolay Karpov * add cmd arg Signed-off-by: Nikolay Karpov * use new iter_files Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * EncDecHybridRNNTCTCModel Signed-off-by: Nikolay Karpov * punctuation Signed-off-by: Nikolay Karpov * train_kenlm args Signed-off-by: Nikolay Karpov * add docstrings Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add ngram_merge docs Signed-off-by: Nikolay Karpov * ngram_prune Signed-off-by: Nikolay Karpov * rename to ngram_merge Signed-off-by: Nikolay Karpov * rename to ngram Signed-off-by: Nikolay Karpov * add comments Signed-off-by: Nikolay Karpov * Ngram Signed-off-by: Nikolay Karpov * nemo_model_file Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * install_opengrm_ngram Signed-off-by: Nikolay Karpov * install opengrm Signed-off-by: Nikolay Karpov * rename to install_opengrm.sh Signed-off-by: Nikolay Karpov * rm extra import Signed-off-by: Nikolay Karpov * train_paths Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * text_processing Signed-off-by: Nikolay Karpov * fix ngram_bin_path Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * DECODERS_PATH Signed-off-by: Nikolay Karpov * farcompile Signed-off-by: Nikolay Karpov * rm text processing Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * text_processing Signed-off-by: Nikolay Karpov * AggregateTokenizer.DummyTokenizer Signed-off-by: Nikolay Karpov * comments Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * TextProcessingConfig Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * typo Signed-off-by: Nikolay Karpov * doc Signed-off-by: Nikolay Karpov * types Signed-off-by: Nikolay Karpov * nemo_model_file Signed-off-by: Nikolay Karpov * rm assert Signed-off-by: Nikolay Karpov * import kenlm_utils Signed-off-by: Nikolay Karpov * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * return None Signed-off-by: Nikolay Karpov * Copyright Signed-off-by: Nikolay Karpov * 2022 Signed-off-by: Nikolay Karpov * 2023 Signed-off-by: Nikolay Karpov --------- Signed-off-by: Nikolay Karpov Signed-off-by: Nikolay Karpov Co-authored-by: Nikolay Karpov Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/source/asr/asr_language_modeling.rst | 118 ++++- examples/asr/speech_to_text_eval.py | 18 +- nemo/collections/asr/metrics/wer_bpe.py | 6 +- .../asr/parts/utils/transcribe_utils.py | 49 +- .../ngram_lm/eval_beamsearch_ngram.py | 42 +- .../ngram_lm/install_beamsearch_decoders.sh | 47 +- .../ngram_lm/kenlm_utils.py | 189 ++++++-- .../ngram_lm/ngram_merge.py | 448 ++++++++++++++++++ .../ngram_lm/train_kenlm.py | 201 ++++---- scripts/installers/Dockerfile.ngramtools | 30 ++ scripts/installers/install_opengrm.sh | 32 ++ scripts/installers/setup_os2s_decoders.py | 138 ++++++ 12 files changed, 1132 insertions(+), 186 deletions(-) create mode 100644 scripts/asr_language_modeling/ngram_lm/ngram_merge.py create mode 100644 scripts/installers/Dockerfile.ngramtools create mode 100755 scripts/installers/install_opengrm.sh create mode 100644 scripts/installers/setup_os2s_decoders.py diff --git a/docs/source/asr/asr_language_modeling.rst b/docs/source/asr/asr_language_modeling.rst index a0e578092f50..a0d46ca795b1 100644 --- a/docs/source/asr/asr_language_modeling.rst +++ b/docs/source/asr/asr_language_modeling.rst @@ -21,7 +21,9 @@ best candidates. The beam search decoders in NeMo support language models traine `https://github.com/kpu/kenlm `__). The beam search decoders and KenLM library are not installed by default in NeMo, and you need to install them to be able to use beam search decoding and N-gram LM. -Please refer to `scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh` on how to install them. +Please refer to `scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh `__ +on how to install them. Alternatively, you can build Docker image +`scripts/installers/Dockerfile.ngramtools `__ with all the necessary dependencies. NeMo supports both character-based and BPE-based models for N-gram LMs. An N-gram LM can be used with beam search decoders on top of the ASR models to produce more accurate candidates. The beam search decoder would incorporate @@ -45,7 +47,7 @@ The script to train an N-gram language model with KenLM can be found at `scripts/asr_language_modeling/ngram_lm/train_kenlm.py `__. This script would train an N-gram language model with KenLM library which can be used with the beam search decoders -on top of the ASR models. This script supports both character level and BPE level encodings and models which is +on top of the ASR models. This script supports both character level and BPE level encodings and models which are detected automatically from the type of the model. @@ -53,15 +55,15 @@ You may train the N-gram model as the following: .. code-block:: - python train_kenlm.py --nemo_model_file \ - --train_file \ - --kenlm_model_file \ - --ngram_length \ - --preserve_arpa + python train_kenlm.py nemo_model_file= \ + train_paths= \ + kenlm_bin_path= \ + kenlm_model_file= \ + ngram_length= \ + preserve_arpa=true -The train file specified by `--train_file` can be a text file or JSON manifest. If the file's extension is anything -other than `.json`, it assumes that data format is plain text. For plain text format, each line should contain one +The `train_paths` parameter allows for various input types, such as a list of text files, JSON manifests, or directories, to be used as the training data. +If the file's extension is anything other than `.json`, it assumes that data format is plain text. For plain text format, each line should contain one sample. For JSON manifest file, the file need to contain json formatted samples per each line like this: .. code-block:: @@ -69,16 +71,16 @@ sample. For JSON manifest file, the file need to contain json formatted samples {"audio_filepath": "/data_path/file1.wav", "text": "The transcript of the audio file."} It just extracts the `text` field from each line to create the training text file. After the N-gram model is trained, -it is stored at the path specified by `--kenlm_model_file`. +it is stored at the path specified by `kenlm_model_file`. The following is the list of the arguments for the training script: +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ | **Argument** | **Type** | **Default** | **Description** | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ -| nemo_model_file | str | Required | The path of the `.nemo` file of the ASR model. It is needed to extract the tokenizer. | +| nemo_model_file | str | Required | The path to `.nemo` file of the ASR model, or name of a pretrained NeMo model to extract a tokenizer. | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ -| train_file | str | Required | Path to the training file, it can be a text file or JSON manifest. | +| train_paths | List[str] | Required | List of training files or folders. Files can be a plain text file or ".json" manifest or ".json.gz". | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ | kenlm_model_file | str | Required | The path to store the KenLM binary model file. | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ @@ -86,10 +88,14 @@ The following is the list of the arguments for the training script: +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ | ngram_length** | int | Required | Specifies order of N-gram LM. | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ -| do_lower_case | bool | ``False`` | Whether to make the training text all lower case. | +| ngram_prune | List[int] | [0] | List of thresholds to prune N-grams. Example: [0,0,1]. See Pruning section on the https://kheafield.com/code/kenlm/estimation | ++------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ +| cache_path | str | "" | Cache path to save tokenized files. | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ | preserve_arpa | bool | ``False`` | Whether to preserve the intermediate ARPA file after construction of the BIN file. | +------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ +| verbose | int | 1 | Verbose level. | ++------------------+----------+-------------+-------------------------------------------------------------------------------------------------+ ** Note: Recommend to use 6 as the order of the N-gram model for BPE-based models. Higher orders may need the re-compilation of KenLM to support it. @@ -175,6 +181,14 @@ The following is the list of the important arguments for the evaluation script: | decoding | Dict | BeamCTC | Subdict of beam search configs. Values found via | | | Config | InferConfig | python eval_beamsearch_ngram.py --help | +---------------------+----------+------------------+-------------------------------------------------------------------------+ +| text_processing.do_lowercase | bool | ``False`` | Whether to make the training text all lower case. | ++---------------------+----------+------------------+-------------------------------------------------------------------------+ +| text_processing.punctuation_marks | str | "" | String with punctuation marks to process. Example: ".\,?" | ++---------------------+----------+------------------+-------------------------------------------------------------------------+ +| text_processing.rm_punctuation | bool | ``False``| Whether to remove punctuation marks from text. | ++---------------------+----------+------------------+-------------------------------------------------------------------------+ +| text_processing.separate_punctuation | bool |``True``| Whether to separate punctuation with the previous word by space. | ++---------------------+----------+------------------+-------------------------------------------------------------------------+ Width of the beam search (`--beam_width`) specifies the number of top candidates/predictions the beam search decoder would search for. Larger beams result in more accurate but slower predictions. @@ -334,7 +348,7 @@ Given a trained TransformerLMModel `.nemo` file or a pretrained HF model, the sc `scripts/asr_language_modeling/neural_rescorer/eval_neural_rescorer.py `__ can be used to re-score beams obtained with ASR model. You need the `.tsv` file containing the candidates produced by the acoustic model and the beam search decoding to use this script. The candidates can be the result of just the beam -search decoding or the result of fusion with an N-gram LM. You may generate this file by specifying `--preds_output_folder' for +search decoding or the result of fusion with an N-gram LM. You may generate this file by specifying `--preds_output_folder` for `scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py `__. The neural rescorer would rescore the beams/candidates by using two parameters of `rescorer_alpha` and `rescorer_beta` as the following: @@ -457,3 +471,77 @@ You can then pass this file to your flashlight config object during decoding: decoding.beam.flashlight_cfg.boost_path='/path/to/my_boost_file.boost' \ decoding.beam.flashlight_cfg.beam_size_token = 32 \ decoding.beam.flashlight_cfg.beam_threshold = 25.0 + +Combine N-gram Language Models +============================== + +Before combining N-gram LMs install required OpenGrm NGram library using `scripts/installers/install_opengrm.sh `__. +Alternatively, you can use Docker image `scripts/installers/Dockerfile.ngramtools `__ with all the necessary dependencies. + +To combine two N-gram language models, you can use the script ngram_merge.py located at +`scripts/asr_language_modeling/ngram_lm/ngram_merge.py `__. + +This script interpolate two ARPA N-gram language models and creates a KenLM binary file that can be used with the beam search decoders on top of ASR models. +You can specify weights (`--alpha` and `--beta`) for each of the models (`--ngram_a` and `--ngram_b`) correspondingly: `alpha` * `ngram_a` + `beta` * `ngram_b`. +This script supports both character level and BPE level encodings and models which are detected automatically from the type of the model. + +To combine two N-gram models, you can use the following command: + +.. code-block:: + + python ngram_merge.py --kenlm_bin_path \ + --ngram_bin_path \ + --arpa_a \ + --alpha \ + --arpa_b \ + --beta \ + --out_path + + + +If you provide `--test_file` and `--nemo_model_file`, the script will calculate the perplexity of the resulting N-gram model on the test set. +Note, the result of each step during the process is cached in the temporary file in the `--out_path`, to speed up further run. +You can use the `--force` flag to discard the cache and recalculate everything from scratch. + +.. code-block:: + + python ngram_merge.py --kenlm_bin_path \ + --ngram_bin_path \ + --arpa_a \ + --alpha \ + --arpa_b \ + --beta \ + --out_path + --nemo_model_file \ + --test_file \ + --symbols \ + --force + + +The following is the list of the arguments for the opengrm script: + ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| **Argument** |**Type**| **Default** | **Description** | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| kenlm_bin_path | str | Required | The path to the bin folder of KenLM library. It is a folder named `bin` under where KenLM is installed. | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| ngram_bin_path | str | Required | The path to the bin folder of OpenGrm Ngram. It is a folder named `bin` under where OpenGrm Ngram is installed. | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| arpa_a | str | Required | Path to the ARPA N-gram model file A | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| alpha | float | Required | Weight of N-gram model A | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| arpa_b | int | Required | Path to the ARPA N-gram model file B | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| beta | float | Required | Weight of N-gram model B | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| out_path | str | Required | Path for writing temporary and resulting files. | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| test_file | str | None | Path to test file to count perplexity if provided. | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| symbols | str | None | Path to symbols (.syms) file. Could be calculated if it is not provided.| ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| nemo_model_file | str | None | The path to '.nemo' file of the ASR model, or name of a pretrained NeMo model. | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ +| force | bool | ``False`` | Whether to recompile and rewrite all files | ++----------------------+--------+------------------+-------------------------------------------------------------------------+ diff --git a/examples/asr/speech_to_text_eval.py b/examples/asr/speech_to_text_eval.py index d846157b6513..f8dcbcf81bbd 100644 --- a/examples/asr/speech_to_text_eval.py +++ b/examples/asr/speech_to_text_eval.py @@ -66,7 +66,7 @@ from omegaconf import MISSING, OmegaConf, open_dict from nemo.collections.asr.metrics.wer import word_error_rate -from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization +from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization, TextProcessingConfig from nemo.core.config import hydra_runner from nemo.utils import logging @@ -81,9 +81,9 @@ class EvaluationConfig(transcribe_speech.TranscriptionConfig): only_score_manifest: bool = False - separate_punctuation: bool = True - do_lowercase: bool = False - rm_punctuation: bool = False + text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( + punctuation_marks=".,?", separate_punctuation=True, do_lowercase=False, rm_punctuation=False, + ) @hydra_runner(config_name="EvaluationConfig", schema=EvaluationConfig) @@ -131,13 +131,13 @@ def main(cfg: EvaluationConfig): predicted_text.append(data['pred_text']) - pc = PunctuationCapitalization('.,?') - if cfg.separate_punctuation: + pc = PunctuationCapitalization(cfg.text_processing.punctuation_marks) + if cfg.text_processing.separate_punctuation: ground_truth_text = pc.separate_punctuation(ground_truth_text) - if cfg.do_lowercase: + if cfg.text_processing.do_lowercase: ground_truth_text = pc.do_lowercase(ground_truth_text) predicted_text = pc.do_lowercase(predicted_text) - if cfg.rm_punctuation: + if cfg.text_processing.rm_punctuation: ground_truth_text = pc.rm_punctuation(ground_truth_text) predicted_text = pc.rm_punctuation(predicted_text) @@ -164,8 +164,6 @@ def main(cfg: EvaluationConfig): raise ValueError(f"Got {metric_name} of {metric_value}, which was higher than tolerance={cfg.tolerance}") logging.info(f'Got {metric_name} of {metric_value}. Tolerance was {cfg.tolerance}') - else: - logging.info(f'Got {metric_name} of {metric_value}') logging.info(f'Dataset WER/CER ' + str(round(100 * wer, 2)) + "%/" + str(round(100 * cer, 2)) + "%") diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py index 3dbecbb39628..762acf172a16 100644 --- a/nemo/collections/asr/metrics/wer_bpe.py +++ b/nemo/collections/asr/metrics/wer_bpe.py @@ -22,6 +22,7 @@ from nemo.collections.asr.metrics.wer import AbstractCTCDecoding, CTCDecodingConfig from nemo.collections.asr.parts.submodules import ctc_beam_decoding from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis +from nemo.collections.common.tokenizers.aggregate_tokenizer import DummyTokenizer from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec from nemo.utils import logging @@ -147,7 +148,10 @@ def __init__(self, decoding_cfg, tokenizer: TokenizerSpec): if isinstance(self.decoding, ctc_beam_decoding.AbstractBeamCTCInfer): if hasattr(self.tokenizer.tokenizer, 'get_vocab'): vocab_dict = self.tokenizer.tokenizer.get_vocab() - vocab = list(vocab_dict.keys()) + if isinstance(self.tokenizer.tokenizer, DummyTokenizer): # AggregateTokenizer.DummyTokenizer + vocab = vocab_dict + else: + vocab = list(vocab_dict.keys()) self.decoding.set_vocabulary(vocab) self.decoding.set_tokenizer(tokenizer) else: diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 8cfe58523751..0e72ed8fa16d 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -15,6 +15,7 @@ import json import os import re +from dataclasses import dataclass from typing import List, Optional, Tuple, Union import torch @@ -442,14 +443,48 @@ def transcribe_partial_audio( class PunctuationCapitalization: - def __init__(self, punctuation_marks='.,?'): - self.regex_punctuation = re.compile(fr"([{''.join(punctuation_marks)}])") + def __init__(self, punctuation_marks: str): + """ + Class for text processing with punctuation and capitalization. Can be used with class TextProcessingConfig. + + Args: + punctuation_marks (str): String with punctuation marks to process. + Example: punctuation_marks = '.,?' + """ + if punctuation_marks: + self.regex_punctuation = re.compile(fr"([{''.join(punctuation_marks)}])") + self.regex_extra_space = re.compile('\s{2,}') + else: + self.regex_punctuation = None - def separate_punctuation(self, lines): - return [self.regex_punctuation.sub(r' \1 ', line) for line in lines] + def separate_punctuation(self, lines: List[str]) -> List[str]: + if self.regex_punctuation is not None: + return [ + self.regex_extra_space.sub('', self.regex_punctuation.sub(r' \1 ', line)).strip() for line in lines + ] + else: + return lines - def do_lowercase(self, lines): + def do_lowercase(self, lines: List[str]) -> List[str]: return [line.lower() for line in lines] - def rm_punctuation(self, lines): - return [self.regex_punctuation.sub(' ', line).strip() for line in lines] + def rm_punctuation(self, lines: List[str]) -> List[str]: + if self.regex_punctuation is not None: + return [self.regex_extra_space.sub('', self.regex_punctuation.sub(' ', line)).strip() for line in lines] + else: + return lines + + +@dataclass +class TextProcessingConfig: + # Punctuation marks to process. Example: ".,?" + punctuation_marks: str = "" + + # Whether to apply lower case conversion on the training text. + do_lowercase: bool = False + + # Whether to remove punctuation marks from text. + rm_punctuation: bool = False + + # Whether to separate punctuation with the previouse word by space. + separate_punctuation: bool = True diff --git a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py index e994a29426cc..1f62da6bb168 100644 --- a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py +++ b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py @@ -71,7 +71,7 @@ import nemo.collections.asr as nemo_asr from nemo.collections.asr.parts.submodules import ctc_beam_decoding -from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization +from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization, TextProcessingConfig from nemo.core.config import hydra_runner from nemo.utils import logging @@ -111,10 +111,12 @@ class EvalBeamSearchNGramConfig: decoding_strategy: str = "beam" decoding: ctc_beam_decoding.BeamCTCInferConfig = ctc_beam_decoding.BeamCTCInferConfig(beam_size=128) - separate_punctuation: bool = True - do_lowercase: bool = False - rm_punctuation: bool = False - + text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( + punctuation_marks = ".,?", + separate_punctuation = True, + do_lowercase = False, + rm_punctuation = False, + ) # fmt: on @@ -130,6 +132,7 @@ def beam_search_eval( beam_width: int = 128, beam_batch_size: int = 128, progress_bar: bool = True, + punctuation_capitalization: PunctuationCapitalization = None, ): level = logging.getEffectiveLevel() logging.setLevel(logging.CRITICAL) @@ -182,15 +185,9 @@ def beam_search_eval( _, beams_batch = model.decoding.ctc_decoder_predictions_tensor( packed_batch, decoder_lengths=probs_lens, return_hypotheses=True, ) - pc = PunctuationCapitalization(',.?') + for beams_idx, beams in enumerate(beams_batch): target = target_transcripts[sample_idx + beams_idx] - if cfg.separate_punctuation: - target = pc.separate_punctuation([target])[0] - if cfg.do_lowercase: - target = pc.do_lowercase([target])[0] - if cfg.rm_punctuation: - target = pc.rm_punctuation([target])[0] target_split_w = target.split() target_split_c = list(target) words_count += len(target_split_w) @@ -198,10 +195,10 @@ def beam_search_eval( wer_dist_min = cer_dist_min = 10000 for candidate_idx, candidate in enumerate(beams): # type: (int, ctc_beam_decoding.rnnt_utils.Hypothesis) pred_text = candidate.text - if cfg.do_lowercase: - pred_text = pc.do_lowercase([pred_text])[0] - if cfg.rm_punctuation: - pred_text = pc.rm_punctuation([pred_text])[0] + if cfg.text_processing.do_lowercase: + pred_text = punctuation_capitalization.do_lowercase([pred_text])[0] + if cfg.text_processing.rm_punctuation: + pred_text = punctuation_capitalization.rm_punctuation([pred_text])[0] pred_split_w = pred_text.split() wer_dist = editdistance.eval(target_split_w, pred_split_w) pred_split_c = list(pred_text) @@ -281,6 +278,14 @@ def main(cfg: EvalBeamSearchNGramConfig): target_transcripts.append(data['text']) audio_file_paths.append(str(audio_file.absolute())) + punctuation_capitalization = PunctuationCapitalization(cfg.text_processing.punctuation_marks) + if cfg.text_processing.separate_punctuation: + target_transcripts = punctuation_capitalization.separate_punctuation(target_transcripts) + if cfg.text_processing.do_lowercase: + target_transcripts = punctuation_capitalization.do_lowercase(target_transcripts) + if cfg.text_processing.rm_punctuation: + target_transcripts = punctuation_capitalization.rm_punctuation(target_transcripts) + if cfg.probs_cache_file and os.path.exists(cfg.probs_cache_file): logging.info(f"Found a pickle file of probabilities at '{cfg.probs_cache_file}'.") logging.info(f"Loading the cached pickle file of probabilities from '{cfg.probs_cache_file}' ...") @@ -327,6 +332,10 @@ def default_autocast(): preds = np.argmax(probs, axis=1) preds_tensor = torch.tensor(preds, device='cpu').unsqueeze(0) pred_text = asr_model._wer.decoding.ctc_decoder_predictions_tensor(preds_tensor)[0][0] + if cfg.text_processing.do_lowercase: + pred_text = punctuation_capitalization.do_lowercase([pred_text])[0] + if cfg.text_processing.rm_punctuation: + pred_text = punctuation_capitalization.rm_punctuation([pred_text])[0] pred_split_w = pred_text.split() target_split_w = target_transcripts[batch_idx].split() @@ -393,6 +402,7 @@ def default_autocast(): beam_beta=hp["beam_beta"], beam_batch_size=cfg.beam_batch_size, progress_bar=True, + punctuation_capitalization=punctuation_capitalization, ) if candidate_cer < best_cer: diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh index e0fd1a2fdd0b..c1a94df53a41 100644 --- a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh +++ b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh @@ -1,7 +1,29 @@ #!/usr/bin/env bash -# install Boost package -sudo apt-get update -sudo apt-get install swig build-essential libboost-all-dev cmake zlib1g-dev libbz2-dev liblzma-dev +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use this script to install KenLM, OpenSeq2Seq decoder, Flashlight decoder +NEMO_PATH=/workspace/nemo # Path to NeMo folder: /workspace/nemo if you use NeMo/Dockerfile +if [ "$#" -eq 1 ] +then + NEMO_PATH=$1 +fi +KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_os2s_decoders.py + +cd $NEMO_PATH +apt-get update && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # needed for flashlight decoder + git clone https://github.com/NVIDIA/OpenSeq2Seq cd OpenSeq2Seq git checkout ctc-decoders @@ -11,5 +33,24 @@ rm -rf OpenSeq2Seq cd decoders # patch setup code to support the recent distutils sed -i 's/, distutils/, distutils\nimport distutils.ccompiler/g' setup.py + +cp $NEMO_PATH/scripts/installers/setup_os2s_decoders.py ./setup.py ./setup.sh + +# install Boost package for KenLM +wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && ./b2 --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE +export BOOST_ROOT=$NEMO_PATH/decoders/boost_1_80_0 + +# install KenLM +cd $NEMO_PATH/decoders/kenlm/build && cmake -DKENLM_MAX_ORDER=$KENLM_MAX_ORDER .. && make -j2 +cd $NEMO_PATH/decoders/kenlm +python setup.py install --max_order=$KENLM_MAX_ORDER +export KENLM_LIB=$NEMO_PATH/decoders/kenlm/build/bin +export KENLM_ROOT=$NEMO_PATH/decoders/kenlm +cd .. + +# install Flashlight +git clone https://github.com/flashlight/text && cd text +python setup.py bdist_wheel +pip install dist/*.whl cd .. diff --git a/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py b/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py index 27bfa7c25c09..9e255ddc50ca 100644 --- a/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py +++ b/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,21 +15,36 @@ """ Utility methods to be used for training N-gram LM with KenLM in train_kenlm.py + +The BPE sub-words are encoded using the Unicode table. +This encoding scheme reduces the required memory significantly, and the LM and its binary blob format require less storage space. +The value DEFAULT_TOKEN_OFFSET from nemo.collections.asr.parts.submodules.ctc_beam_decoding is utilized as the offset value. """ +CHUNK_SIZE = 8192 +CHUNK_BUFFER_SIZE = 512 + +import gzip import json import os import numpy as np +import torch from joblib import Parallel, delayed from tqdm.auto import tqdm +import nemo.collections.asr as nemo_asr +from nemo.collections.asr.parts.submodules.ctc_beam_decoding import DEFAULT_TOKEN_OFFSET +from nemo.utils import logging + # List of the supported models to be used with N-gram LM and beam search decoding SUPPORTED_MODELS = { 'EncDecCTCModelBPE': 'subword', 'EncDecCTCModel': 'char', 'EncDecRNNTBPEModel': 'subword', 'EncDecRNNTModel': 'char', + 'EncDecHybridRNNTCTCBPEModel': 'subword', + 'EncDecHybridRNNTCTCModel': 'char', } @@ -38,77 +53,169 @@ def softmax(x): return e / e.sum(axis=-1).reshape([x.shape[0], 1]) -def read_train_file(path, lowercase: bool = False): +def get_train_list(args_train_path): + + train_path = [] + for train_item in args_train_path: + if os.path.isdir(train_item): + file_list = os.listdir(train_item) + train_path.extend([os.path.join(train_item, file) for file in file_list]) + + elif os.path.isfile(train_item): + train_path.append(train_item) + return sorted(train_path) + + +def setup_tokenizer(nemo_model_file): + """ TOKENIZER SETUP + nemo_model_file (str): The path to the NeMo model file (.nemo). + """ + logging.info(f"Loading nemo model '{nemo_model_file}' ...") + if nemo_model_file.endswith('.nemo'): + model = nemo_asr.models.ASRModel.restore_from(nemo_model_file, map_location=torch.device('cpu')) + else: + logging.warning( + "tokenizer_model_file does not end with .model or .nemo, therefore trying to load a pretrained model with this name." + ) + model = nemo_asr.models.ASRModel.from_pretrained(nemo_model_file, map_location=torch.device('cpu')) + + if type(model.tokenizer).__name__ == 'AggregateTokenizer': + is_aggregate_tokenizer = True + else: + is_aggregate_tokenizer = False + + encoding_level = SUPPORTED_MODELS.get(type(model).__name__, None) + if not encoding_level: + logging.warning( + f"Model type '{type(model).__name__}' may not be supported. Would try to train a char-level LM." + ) + encoding_level = 'char' + + tokenizer_nemo = model.tokenizer + del model + + return tokenizer_nemo, encoding_level, is_aggregate_tokenizer + + +def iter_files(source_path, dest_path, tokenizer, encoding_level, is_aggregate_tokenizer, verbose): + if isinstance(dest_path, list): + paths = zip(dest_path, source_path) + else: # dest_path is stdin of KenLM + paths = [(dest_path, path) for path in source_path] + + for dest_path, input_path in paths: + dataset = read_train_file(input_path, is_aggregate_tokenizer=is_aggregate_tokenizer, verbose=verbose) + if encoding_level == "subword": + tokenize_text( + data=dataset, + tokenizer=tokenizer, + path=dest_path, + chunk_size=CHUNK_SIZE, + buffer_size=CHUNK_BUFFER_SIZE, + ) + else: # encoding_level == "char" + if isinstance(dest_path, str): + with open(dest_path, 'w', encoding='utf-8') as f: + for line in dataset: + f.write(line + "\n") + else: # write to stdin of KenLM + for line in dataset: + dest_path.write((line + '\n').encode()) + + +def read_train_file( + path, is_aggregate_tokenizer: bool = False, verbose: int = 0, +): lines_read = 0 - text_dataset = [] - - with open(path, 'r', encoding='utf-8') as f: - reader = tqdm(iter(lambda: f.readline(), ''), desc="Read 0 lines", unit=' lines') - for i, line in enumerate(reader): - if path.endswith('.json'): - line = json.loads(line)['text'] - - line = line.replace("\n", "").strip() - if lowercase: - line = line.lower() - + text_dataset, lang_dataset = [], [] + if path[-8:] == '.json.gz': # for Common Crawl dataset + fin = gzip.open(path, 'r') + else: + fin = open(path, 'r', encoding='utf-8') + + if verbose > 0: + reader = tqdm(iter(lambda: fin.readline(), ''), desc="Read 0 lines", unit=' lines') + else: + reader = fin + + for line in reader: + lang = None + if line: + if path[-8:] == '.json.gz': # for Common Crawl dataset + line = json.loads(line.decode('utf-8'))['text'] + elif path.endswith('.json'): + jline = json.loads(line) + line = jline['text'] + if is_aggregate_tokenizer: + lang = jline['lang'] + + line_list = line.split("\n") + + line = " ".join(line_list) if line: text_dataset.append(line) - + if lang: + lang_dataset.append(lang) lines_read += 1 - if lines_read % 100000 == 0: + if verbose > 0 and lines_read % 100000 == 0: reader.set_description(f"Read {lines_read} lines") - - return text_dataset - - -def tokenize_str(texts, tokenizer, offset): + else: + break + fin.close() + if is_aggregate_tokenizer: + assert len(text_dataset) == len( + lang_dataset + ), f"text_dataset length {len(text_dataset)} and lang_dataset length {len(lang_dataset)} must be the same!" + return list(zip(text_dataset, lang_dataset)) + else: + return [[text] for text in text_dataset] + + +def tokenize_str(texts, tokenizer): tokenized_text = [] for text in texts: - tok_text = tokenizer.text_to_ids(text) - tok_text = [chr(token + offset) for token in tok_text] + tok_text = tokenizer.text_to_ids(*text) + tok_text = [chr(token + DEFAULT_TOKEN_OFFSET) for token in tok_text] tokenized_text.append(tok_text) return tokenized_text -def tokenize_text(data, tokenizer, path, chunk_size=8192, buffer_size=32, token_offset=100): +def tokenize_text(data, tokenizer, path, chunk_size=8192, buffer_size=32): dataset_len = len(data) - print( - f"Chunking {dataset_len} rows into {dataset_len / float(chunk_size):0.4f} tasks (each chunk contains {chunk_size} elements)" - ) - current_step = 0 - if os.path.exists(path): - print(f"Deleting previous file : {path}") + if isinstance(path, str) and os.path.exists(path): os.remove(path) - with Parallel(n_jobs=-2, verbose=10) as parallel: + with Parallel(n_jobs=-2, verbose=0) as parallel: while True: start = current_step * chunk_size end = min((current_step + buffer_size) * chunk_size, dataset_len) tokenized_data = parallel( - delayed(tokenize_str)(data[start : start + chunk_size], tokenizer, token_offset) + delayed(tokenize_str)(data[start : start + chunk_size], tokenizer) for start in range(start, end, chunk_size) ) # Write dataset write_dataset(tokenized_data, path) current_step += len(tokenized_data) - print(f"Finished writing {len(tokenized_data)} chunks to {path}. Current chunk index = {current_step}") + logging.info( + f"Finished writing {len(tokenized_data)} chunks to {path}. Current chunk index = {current_step}" + ) del tokenized_data if end >= dataset_len: break def write_dataset(chunks, path): - basedir = os.path.dirname(path) - - if not os.path.exists(basedir): - os.makedirs(basedir, exist_ok=True) - - with open(path, 'a+', encoding='utf-8') as f: - for chunk_idx in tqdm(range(len(chunks)), desc='Chunk ', total=len(chunks), unit=' chunks'): + if isinstance(path, str): + with open(path, 'a+', encoding='utf-8') as f: + for chunk_idx in tqdm(range(len(chunks)), desc='Chunk ', total=len(chunks), unit=' chunks'): + for text in chunks[chunk_idx]: + line = ' '.join(text) + f.write(f"{line}\n") + else: # write to stdin of KenLM + for chunk_idx in range(len(chunks)): for text in chunks[chunk_idx]: line = ' '.join(text) - f.write(f"{line}\n") + path.write((line + '\n').encode()) diff --git a/scripts/asr_language_modeling/ngram_lm/ngram_merge.py b/scripts/asr_language_modeling/ngram_lm/ngram_merge.py new file mode 100644 index 000000000000..abffc6372518 --- /dev/null +++ b/scripts/asr_language_modeling/ngram_lm/ngram_merge.py @@ -0,0 +1,448 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This script would interpolate two arpa N-gram language models (LMs), +culculate perplexity of resulted LM, and make binary KenLM from it. + +Minimun usage example to interpolate two N-gram language models with weights: +alpha * ngram_a + beta * ngram_b = 2 * ngram_a + 1 * ngram_b + +python3 ngram_merge.py --kenlm_bin_path /workspace/nemo/decoders/kenlm/build/bin \ + --arpa_a /path/ngram_a.kenlm.tmp.arpa \ + --alpha 2 \ + --arpa_b /path/ngram_b.kenlm.tmp.arpa \ + --beta 1 \ + --out_path /path/out + + +Merge two N-gram language models and calculate its perplexity with test_file. +python3 ngram_merge.py --kenlm_bin_path /workspace/nemo/decoders/kenlm/build/bin \ + --ngram_bin_path /workspace/nemo/decoders/ngram-1.3.14/src/bin \ + --arpa_a /path/ngram_a.kenlm.tmp.arpa \ + --alpha 0.5 \ + --arpa_b /path/ngram_b.kenlm.tmp.arpa \ + --beta 0.5 \ + --out_path /path/out \ + --nemo_model_file /path/to/model_tokenizer.nemo \ + --test_file /path/to/test_manifest.json \ + --force +""" + +import argparse +import os +import subprocess +import sys +from typing import Tuple + +import kenlm_utils +import torch + +import nemo.collections.asr as nemo_asr +from nemo.collections.asr.parts.submodules.ctc_beam_decoding import DEFAULT_TOKEN_OFFSET +from nemo.utils import logging + + +class NgramMerge: + def __init__(self, ngram_bin_path): + self.ngram_bin_path = ngram_bin_path + + def ngrammerge(self, arpa_a: str, alpha: float, arpa_b: str, beta: float, arpa_c: str, force: bool) -> str: + """ + Merge two ARPA n-gram language models using the ngrammerge command-line tool and output the result in ARPA format. + + Args: + arpa_a (str): Path to the first input ARPA file. + alpha (float): Interpolation weight for the first model. + arpa_b (str): Path to the second input ARPA file. + beta (float): Interpolation weight for the second model. + arpa_c (str): Path to the output ARPA file. + force (bool): Whether to overwrite existing output files. + + Returns: + str: Path to the output ARPA file in mod format. + """ + mod_a = arpa_a + ".mod" + mod_b = arpa_b + ".mod" + mod_c = arpa_c + ".mod" + if os.path.isfile(mod_c) and not force: + logging.info("File " + mod_c + " exists. Skipping.") + else: + sh_args = [ + os.path.join(self.ngram_bin_path, "ngrammerge"), + "--alpha=" + str(alpha), + "--beta=" + str(beta), + "--normalize", + # "--use_smoothing", + mod_a, + mod_b, + mod_c, + ] + logging.info( + "\n" + + str(subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,)) + + "\n", + ) + return mod_c + + def arpa2mod(self, arpa_path: str, force: bool): + """ + This function reads an ARPA n-gram model and converts it to a binary format. The binary model is saved to the same directory as the ARPA model with a ".mod" extension. If the binary model file already exists and force argument is False, then the function skips conversion and returns a message. Otherwise, it executes the command to create a binary model using the subprocess.run method. + + Parameters: + arpa_path (string): The file path to the ARPA n-gram model. + force (bool): If True, the function will convert the ARPA model to binary even if the binary file already exists. If False and the binary file exists, the function will skip the conversion. + Returns: + If the binary model file already exists and force argument is False, returns a message indicating that the file exists and the conversion is skipped. + Otherwise, returns a subprocess.CompletedProcess object, which contains information about the executed command. The subprocess's output and error streams are redirected to stdout and stderr, respectively. + """ + mod_path = arpa_path + ".mod" + if os.path.isfile(mod_path) and not force: + return "File " + mod_path + " exists. Skipping." + else: + sh_args = [ + os.path.join(self.ngram_bin_path, "ngramread"), + "--ARPA", + arpa_path, + mod_path, + ] + return subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,) + + def merge( + self, arpa_a: str, alpha: float, arpa_b: str, beta: float, out_path: str, force: bool + ) -> Tuple[str, str]: + """ + Merges two ARPA language models using the ngrammerge tool. + + Args: + arpa_a (str): Path to the first ARPA language model file. + alpha (float): Interpolation weight for the first model. + arpa_b (str): Path to the second ARPA language model file. + beta (float): Interpolation weight for the second model. + out_path (str): Path to the output directory for the merged ARPA model. + force (bool): Whether to force overwrite of existing files. + + Returns: + Tuple[str, str]: A tuple containing the path to the merged binary language model file and the path to the + merged ARPA language model file. + """ + logging.info("\n" + str(self.arpa2mod(arpa_a, force)) + "\n") + + logging.info("\n" + str(self.arpa2mod(arpa_b, force)) + "\n") + arpa_c = os.path.join(out_path, f"{os.path.split(arpa_a)[1]}-{alpha}-{os.path.split(arpa_b)[1]}-{beta}.arpa",) + mod_c = self.ngrammerge(arpa_a, alpha, arpa_b, beta, arpa_c, force) + return mod_c, arpa_c + + def perplexity(self, ngram_mod: str, test_far: str) -> str: + """ + Calculates perplexity of a given ngram model on a test file. + + Args: + ngram_mod (str): The path to the ngram model file. + test_far (str): The path to the test file. + + Returns: + str: A string representation of the perplexity calculated. + + Raises: + AssertionError: If the subprocess to calculate perplexity returns a non-zero exit code. + + Example: + >>> perplexity("/path/to/ngram_model", "/path/to/test_file") + 'Perplexity: 123.45' + """ + sh_args = [ + os.path.join(self.ngram_bin_path, "ngramperplexity"), + "--v=1", + ngram_mod, + test_far, + ] + ps = subprocess.Popen(sh_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = ps.communicate() + exit_code = ps.wait() + command = " ".join(sh_args) + assert ( + exit_code == 0 + ), f"Exit_code must be 0.\n bash command: {command} \n stdout: {stdout} \n stderr: {stderr}" + perplexity_out = "\n".join(stdout.split("\n")[-6:-1]) + return perplexity_out + + def make_arpa(self, ngram_mod: str, ngram_arpa: str, force: bool): + """ + Converts an ngram model in binary format to ARPA format. + + Args: + - ngram_mod (str): The path to the ngram model in binary format. + - ngram_arpa (str): The desired path for the ARPA format output file. + - force (bool): If True, the ARPA format file will be generated even if it already exists. + + Returns: + - Tuple[bytes, bytes] + + Raises: + - AssertionError: If the shell command execution returns a non-zero exit code. + - FileNotFoundError: If the binary ngram model file does not exist. + """ + if os.path.isfile(ngram_arpa) and not force: + logging.info("File " + ngram_arpa + " exists. Skipping.") + return None + else: + sh_args = [ + os.path.join(self.ngram_bin_path, "ngramprint"), + "--ARPA", + ngram_mod, + ngram_arpa, + ] + return subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,) + + def test_perplexity( + self, mod_c: str, symbols: str, test_txt: str, nemo_model_file: str, tmp_path: str, force: bool + ) -> str: + """ + Tests the perplexity of a given ngram model on a test file. + + Args: + mod_c (str): The path to the ngram model file. + symbols (str): The path to the symbol table file. + test_txt (str): The path to the test text file. + nemo_model_file (str): The path to the NeMo model file. + tmp_path (str): The path to the temporary directory where the test far file will be created. + force (bool): If True, overwrites any existing far file. + + Returns: + str: A string representation of the perplexity calculated. + + Example: + >>> test_perplexity("/path/to/ngram_model", "/path/to/symbol_table", "/path/to/test_file", "/path/to/tokenizer_model", "/path/to/tmp_dir", True) + 'Perplexity: 123.45' + """ + + test_far = farcompile(symbols, test_txt, tmp_path, nemo_model_file, force) + res_p = self.perplexity(mod_c, test_far) + return res_p + + +def farcompile(symbols: str, text_file: str, tmp_path: str, nemo_model_file: str, force: bool,) -> str: + """ + Compiles a text file into a FAR file using the given symbol table or tokenizer. + + Args: + symbols (str): The path to the symbol table file. + text_file (str): The path to the text file to compile. + tmp_path (str): The path to the temporary directory where the test far file will be created. + nemo_model_file (str): The path to the NeMo model file (.nemo). + force (bool): If True, overwrites any existing FAR file. + + Returns: + test_far (str): The path to the resulting FAR file. + + Example: + >>> farcompile("/path/to/symbol_table", "/path/to/text_file", "/path/to/far_file", "/path/to/tokenizer_model", "/path/to/nemo_model", True) + """ + test_far = os.path.join(tmp_path, os.path.split(text_file)[1] + ".far") + + if os.path.isfile(test_far) and not force: + logging.info("File " + test_far + " exists. Skipping.") + return None + else: + sh_args = [ + "farcompilestrings", + "--generate_keys=10", + "--fst_type=compact", + "--symbols=" + symbols, + "--keep_symbols", + ">", + test_far, + ] + + tokenizer, encoding_level, is_aggregate_tokenizer = kenlm_utils.setup_tokenizer(nemo_model_file) + + ps = subprocess.Popen( + " ".join(sh_args), shell=True, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr, + ) + + kenlm_utils.iter_files( + source_path=[text_file], + dest_path=ps.stdin, + tokenizer=tokenizer, + encoding_level=encoding_level, + is_aggregate_tokenizer=is_aggregate_tokenizer, + verbose=1, + ) + stdout, stderr = ps.communicate() + + exit_code = ps.returncode + + command = " ".join(sh_args) + assert ( + exit_code == 0 + ), f"Exit_code must be 0.\n bash command: {command} \n stdout: {stdout} \n stderr: {stderr}" + return test_far + + +def make_kenlm(kenlm_bin_path: str, ngram_arpa: str, force: bool): + """ + Builds a language model from an ARPA format file using the KenLM toolkit. + + Args: + - kenlm_bin_path (str): The path to the KenLM toolkit binary. + - ngram_arpa (str): The path to the ARPA format file. + - force (bool): If True, the KenLM language model will be generated even if it already exists. + + Raises: + - AssertionError: If the shell command execution returns a non-zero exit code. + - FileNotFoundError: If the KenLM binary or ARPA format file does not exist. + """ + ngram_kenlm = ngram_arpa + ".kenlm" + if os.path.isfile(ngram_kenlm) and not force: + logging.info("File " + ngram_kenlm + " exists. Skipping.") + return None + else: + sh_args = [kenlm_bin_path, "trie", "-i", ngram_arpa, ngram_kenlm] + return subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,) + + +def make_symbol_list(nemo_model_file, symbols, force): + """ + Function: make_symbol_list + + Create a symbol table for the input tokenizer model file. + + Args: + nemo_model_file (str): Path to the NeMo model file. + symbols (str): Path to the file where symbol list will be saved. + force (bool): Flag to force creation of symbol list even if it already exists. + + Returns: + None + + Raises: + None + """ + if os.path.isfile(symbols) and not force: + logging.info("File " + symbols + " exists. Skipping.") + else: + if nemo_model_file.endswith('.nemo'): + asr_model = nemo_asr.models.ASRModel.restore_from(nemo_model_file, map_location=torch.device('cpu')) + vocab_size = len(asr_model.decoder.vocabulary) + else: + logging.warning( + "nemo_model_file does not end with .nemo, therefore trying to load a pretrained model with this name." + ) + asr_model = nemo_asr.models.ASRModel.from_pretrained(nemo_model_file, map_location=torch.device('cpu')) + vocab_size = len(asr_model.decoder.vocabulary) + + vocab = [chr(idx + DEFAULT_TOKEN_OFFSET) for idx in range(vocab_size)] + with open(symbols, "w", encoding="utf-8") as f: + for i, v in enumerate(vocab): + f.write(v + " " + str(i) + "\n") + + +def main( + kenlm_bin_path: str, + ngram_bin_path: str, + arpa_a: str, + alpha: float, + arpa_b: str, + beta: float, + out_path: str, + test_file: str, + symbols: str, + nemo_model_file: str, + force: bool, +) -> None: + """ + Entry point function for merging ARPA format language models, testing perplexity, creating symbol list, + and making ARPA and Kenlm models. + + Args: + - kenlm_bin_path (str): The path to the Kenlm binary. + - arpa_a (str): The path to the first ARPA format language model. + - alpha (float): The weight given to the first language model during merging. + - arpa_b (str): The path to the second ARPA format language model. + - beta (float): The weight given to the second language model during merging. + - out_path (str): The path where the output files will be saved. + - test_file (str): The path to the file on which perplexity needs to be calculated. + - symbols (str): The path to the file where symbol list for the tokenizer model will be saved. + - nemo_model_file (str): The path to the NeMo model file. + - force (bool): If True, overwrite existing files, otherwise skip the operations. + + Returns: + - None + """ + nm = NgramMerge(ngram_bin_path) + mod_c, arpa_c = nm.merge(arpa_a, alpha, arpa_b, beta, out_path, force) + + if test_file and nemo_model_file: + if not symbols: + symbols = os.path.join(out_path, os.path.split(nemo_model_file)[1] + ".syms") + make_symbol_list(nemo_model_file, symbols, force) + test_p = nm.test_perplexity(mod_c, symbols, test_file, nemo_model_file, out_path, force) + logging.info("Perplexity summary:" + test_p) + + logging.info("Making ARPA and Kenlm model " + arpa_c) + out = nm.make_arpa(mod_c, arpa_c, force) + if out: + logging.info("\n" + str(out) + "\n") + + out = make_kenlm(kenlm_bin_path, arpa_c, force) + if out: + logging.info("\n" + str(out) + "\n") + + +def _parse_args(): + parser = argparse.ArgumentParser( + description="Interpolate ARPA N-gram language models and make KenLM binary model to be used with beam search decoder of ASR models." + ) + parser.add_argument( + "--kenlm_bin_path", required=True, type=str, help="The path to the bin folder of KenLM library.", + ) # Use /workspace/nemo/decoders/kenlm/build/bin if installed it with scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh + parser.add_argument( + "--ngram_bin_path", required=True, type=str, help="The path to the bin folder of OpenGrm Ngram library.", + ) # Use /workspace/nemo/decoders/ngram-1.3.14/src/bin if installed it with scripts/installers/install_opengrm.sh + parser.add_argument("--arpa_a", required=True, type=str, help="Path to the arpa_a") + parser.add_argument("--alpha", required=True, type=float, help="Weight of arpa_a") + parser.add_argument("--arpa_b", required=True, type=str, help="Path to the arpa_b") + parser.add_argument("--beta", required=True, type=float, help="Weight of arpa_b") + parser.add_argument( + "--out_path", required=True, type=str, help="Path to write tmp and resulted files.", + ) + parser.add_argument( + "--test_file", + required=False, + type=str, + default=None, + help="Path to test file to count perplexity if provided.", + ) + parser.add_argument( + "--symbols", + required=False, + type=str, + default=None, + help="Path to symbols (.syms) file . Could be calculated if it is not provided. Use as: --symbols /path/to/earnest.syms", + ) + parser.add_argument( + "--nemo_model_file", + required=False, + type=str, + default=None, + help="The path to '.nemo' file of the ASR model, or name of a pretrained NeMo model", + ) + parser.add_argument("--force", "-f", action="store_true", help="Whether to recompile and rewrite all files") + return parser.parse_args() + + +if __name__ == "__main__": + main(**vars(_parse_args())) diff --git a/scripts/asr_language_modeling/ngram_lm/train_kenlm.py b/scripts/asr_language_modeling/ngram_lm/train_kenlm.py index 6536a7f5eadd..d23141722653 100644 --- a/scripts/asr_language_modeling/ngram_lm/train_kenlm.py +++ b/scripts/asr_language_modeling/ngram_lm/train_kenlm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,144 +23,159 @@ # You need to install the KenLM library and also the beam search decoders to use this feature. Please refer # to 'scripts/ngram_lm/install_beamsearch_decoders.sh' on how to install them. # -# USAGE: python train_kenlm.py --nemo_model_file \ -# --train_file \ -# --kenlm_model_file \ -# --ngram_length \ -# --preserve_arpa +# USAGE: python train_kenlm.py nemo_model_file= \ +# train_paths= \ +# kenlm_bin_path= \ +# kenlm_model_file= \ +# ngram_length= \ # # After training is done, the binary LM model is stored at the path specified by '--kenlm_model_file'. # You may find more info on how to use this script at: # https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html -import argparse import logging import os import subprocess import sys +from dataclasses import dataclass, field +from glob import glob +from typing import List -import kenlm_utils -import torch +from omegaconf import MISSING +from scripts.asr_language_modeling.ngram_lm import kenlm_utils -import nemo.collections.asr as nemo_asr -from nemo.collections.asr.parts.submodules.ctc_beam_decoding import DEFAULT_TOKEN_OFFSET +from nemo.core.config import hydra_runner from nemo.utils import logging """ NeMo's beam search decoders only support char-level encodings. In order to make it work with BPE-level encodings, we use a trick to encode the sub-word tokens of the training data as unicode characters and train a char-level KenLM. -DEFAULT_TOKEN_OFFSET is the offset in the unicode table to be used to encode the BPE sub-words. This encoding scheme reduces -the required memory significantly, and the LM and its binary blob format require less storage space. """ -CHUNK_SIZE = 8192 -CHUNK_BUFFER_SIZE = 512 - - -def main(): - parser = argparse.ArgumentParser( - description='Train an N-gram language model with KenLM to be used with beam search decoder of ASR models.' - ) - parser.add_argument( - "--train_file", - required=True, - type=str, - help="Path to the training file, it can be a text file or JSON manifest", - ) - parser.add_argument( - "--nemo_model_file", - required=True, - type=str, - help="The path of the '.nemo' file of the ASR model or name of a pretrained model", - ) - parser.add_argument( - "--kenlm_model_file", required=True, type=str, help="The path to store the KenLM binary model file" - ) - parser.add_argument("--ngram_length", required=True, type=int, help="The order of N-gram LM") - parser.add_argument("--kenlm_bin_path", required=True, type=str, help="The path to the bin folder of KenLM") - parser.add_argument( - "--do_lowercase", action='store_true', help="Whether to apply lower case conversion on the training text" - ) - parser.add_argument( - '--preserve_arpa', required=False, action='store_true', help='Whether to preserve the intermediate ARPA file.' - ) - args = parser.parse_args() - - """ TOKENIZER SETUP """ - logging.info(f"Loading nemo model '{args.nemo_model_file}' ...") - - if args.nemo_model_file.endswith('.nemo'): - model = nemo_asr.models.ASRModel.restore_from(args.nemo_model_file, map_location=torch.device('cpu')) - else: - logging.warning( - "nemo_model_file does not end with .nemo, therefore trying to load a pretrained model with this name." - ) - model = nemo_asr.models.ASRModel.from_pretrained(args.nemo_model_file, map_location=torch.device('cpu')) - encoding_level = kenlm_utils.SUPPORTED_MODELS.get(type(model).__name__, None) - if not encoding_level: - logging.warning( - f"Model type '{type(model).__name__}' may not be supported. Would try to train a char-level LM." - ) - encoding_level = 'char' +@dataclass +class TrainKenlmConfig: + """ + Train an N-gram language model with KenLM to be used with beam search decoder of ASR models. + """ + + train_paths: List[ + str + ] = MISSING # List of training files or folders. Files can be a plain text file or ".json" manifest or ".json.gz". Example: [/path/to/manifest/file,/path/to/folder] + + nemo_model_file: str = MISSING # The path to '.nemo' file of the ASR model, or name of a pretrained NeMo model + kenlm_model_file: str = MISSING # The path to store the KenLM binary model file + ngram_length: int = MISSING # The order of N-gram LM + kenlm_bin_path: str = MISSING # The path to the bin folder of KenLM. + + preserve_arpa: bool = False # Whether to preserve the intermediate ARPA file. + ngram_prune: List[int] = field( + default_factory=lambda: [0] + ) # List of digits to prune Ngram. Example: [0,0,1]. See Pruning section on the https://kheafield.com/code/kenlm/estimation + cache_path: str = "" # Cache path to save tokenized files. + verbose: int = 1 # Verbose level, default is 1. + + +@hydra_runner(config_path=None, config_name='TrainKenlmConfig', schema=TrainKenlmConfig) +def main(args: TrainKenlmConfig): + train_paths = kenlm_utils.get_train_list(args.train_paths) + + if isinstance(args.ngram_prune, str): + args.ngram_prune = [args.ngram_prune] + + tokenizer, encoding_level, is_aggregate_tokenizer = kenlm_utils.setup_tokenizer(args.nemo_model_file) - """ DATASET SETUP """ - logging.info(f"Encoding the train file '{args.train_file}' ...") - dataset = kenlm_utils.read_train_file(args.train_file, lowercase=args.do_lowercase) - encoded_train_file = f"{args.kenlm_model_file}.tmp.txt" if encoding_level == "subword": - kenlm_utils.tokenize_text( - dataset, - model.tokenizer, - path=encoded_train_file, - chunk_size=CHUNK_SIZE, - buffer_size=CHUNK_BUFFER_SIZE, - token_offset=DEFAULT_TOKEN_OFFSET, - ) - # --discount_fallback is needed for training KenLM for BPE-based models - discount_arg = "--discount_fallback" + discount_arg = "--discount_fallback" # --discount_fallback is needed for training KenLM for BPE-based models else: - with open(encoded_train_file, 'w', encoding='utf-8') as f: - for line in dataset: - f.write(f"{line}\n") - discount_arg = "" - del model - arpa_file = f"{args.kenlm_model_file}.tmp.arpa" """ LMPLZ ARGUMENT SETUP """ kenlm_args = [ os.path.join(args.kenlm_bin_path, 'lmplz'), "-o", - f"{args.ngram_length}", - "--text", - encoded_train_file, + str(args.ngram_length), "--arpa", arpa_file, discount_arg, - ] + "--prune", + ] + [str(n) for n in args.ngram_prune] + + if args.cache_path: + if not os.path.exists(args.cache_path): + os.makedirs(args.cache_path, exist_ok=True) + + """ DATASET SETUP """ + encoded_train_files = [] + for file_num, train_file in enumerate(train_paths): + logging.info(f"Encoding the train file '{train_file}' number {file_num+1} out of {len(train_paths)} ...") + + cached_files = glob(os.path.join(args.cache_path, os.path.split(train_file)[1]) + "*") + encoded_train_file = os.path.join(args.cache_path, os.path.split(train_file)[1] + f"_{file_num}.tmp.txt") + if ( + cached_files and cached_files[0] != encoded_train_file + ): # cached_files exists but has another file name: f"_{file_num}.tmp.txt" + os.rename(cached_files[0], encoded_train_file) + logging.info("Rename", cached_files[0], "to", encoded_train_file) + + encoded_train_files.append(encoded_train_file) + + kenlm_utils.iter_files( + source_path=train_paths, + dest_path=encoded_train_files, + tokenizer=tokenizer, + encoding_level=encoding_level, + is_aggregate_tokenizer=is_aggregate_tokenizer, + verbose=args.verbose, + ) - ret = subprocess.run(kenlm_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr) - if ret.returncode != 0: + first_process_args = ["cat"] + encoded_train_files + first_process = subprocess.Popen(first_process_args, stdout=subprocess.PIPE, stderr=sys.stderr) + + logging.info(f"Running lmplz command \n\n{' '.join(kenlm_args)}\n\n") + kenlm_p = subprocess.run( + kenlm_args, + stdin=first_process.stdout, + capture_output=False, + text=True, + stdout=sys.stdout, + stderr=sys.stderr, + ) + first_process.wait() + + else: + logging.info(f"Running lmplz command \n\n{' '.join(kenlm_args)}\n\n") + kenlm_p = subprocess.Popen(kenlm_args, stdout=sys.stdout, stdin=subprocess.PIPE, stderr=sys.stderr) + + kenlm_utils.iter_files( + source_path=train_paths, + dest_path=kenlm_p.stdin, + tokenizer=tokenizer, + encoding_level=encoding_level, + is_aggregate_tokenizer=is_aggregate_tokenizer, + verbose=args.verbose, + ) + + kenlm_p.communicate() + + if kenlm_p.returncode != 0: raise RuntimeError("Training KenLM was not successful!") + """ BINARY BUILD """ - logging.info(f"Running binary_build command \n\n{' '.join(kenlm_args)}\n\n") + kenlm_args = [ os.path.join(args.kenlm_bin_path, "build_binary"), "trie", arpa_file, args.kenlm_model_file, ] + logging.info(f"Running binary_build command \n\n{' '.join(kenlm_args)}\n\n") ret = subprocess.run(kenlm_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr) if ret.returncode != 0: raise RuntimeError("Training KenLM was not successful!") - os.remove(encoded_train_file) - logging.info(f"Deleted the temporary encoded training file '{encoded_train_file}'.") - if not args.preserve_arpa: os.remove(arpa_file) logging.info(f"Deleted the arpa file '{arpa_file}'.") diff --git a/scripts/installers/Dockerfile.ngramtools b/scripts/installers/Dockerfile.ngramtools new file mode 100644 index 000000000000..49d3c12b3529 --- /dev/null +++ b/scripts/installers/Dockerfile.ngramtools @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use this script to install KenLM, OpenSeq2Seq decoder, Flashlight decoder, OpenGRM Ngram tool to contaner + +# How to use? Build it from NeMo root folder: +# 1. git clone https://github.com/NVIDIA/NeMo.git && cd NeMo +# 2. DOCKER_BUILDKIT=1 docker build -t nemo:23.01.1 -f ./scripts/installers/Dockerfile.ngramtools . + +from nvcr.io/nvidia/nemo:23.01 + +WORKDIR /workspace/nemo + +COPY scripts/. /workspace/nemo/scripts/ + +RUN /bin/bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh + +RUN /bin/bash scripts/installers/install_opengrm.sh diff --git a/scripts/installers/install_opengrm.sh b/scripts/installers/install_opengrm.sh new file mode 100755 index 000000000000..e3e11e8d1db9 --- /dev/null +++ b/scripts/installers/install_opengrm.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script install OpenFST and Ngram tools from OpenGRM library +# Optionally, you can specify a path where to install it as a first positional argument: scripts/installers/install_opengrm.sh /path/to/install/openfst . +# Alternatively, in the Linux Debian you can use: sudo apt install libngram-tools + +DECODERS_PATH=/workspace/nemo/decoders # Path to decoders folder: /workspace/nemo/decoders if you use NeMo/Dockerfile +if [ "$#" -eq 1 ] +then + DECODERS_PATH=$1 +fi +cd $DECODERS_PATH + +# Install OpenGrm OpenFST +wget https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.8.2.tar.gz --no-check-certificate && tar xvzf openfst-1.8.2.tar.gz && cd openfst-1.8.2 && ./configure --enable-grm && make -j4 && make -j4 install && cd .. + +# Install OpenGrm Ngram +OPENFSTPREFIX=$DECODERS_PATH/openfst-1.8.2/src && wget https://www.opengrm.org/twiki/pub/GRM/NGramDownload/ngram-1.3.14.tar.gz --no-check-certificate && tar xvzf ngram-1.3.14.tar.gz && cd ngram-1.3.14 && LDFLAGS="-L${OPENFSTPREFIX}/lib" CXXFLAGS="-I${OPENFSTPREFIX}/include" ./configure --prefix ${OPENFSTPREFIX} && make -j4 && make -j4 install && cd .. diff --git a/scripts/installers/setup_os2s_decoders.py b/scripts/installers/setup_os2s_decoders.py new file mode 100644 index 000000000000..6dfe1bef54e8 --- /dev/null +++ b/scripts/installers/setup_os2s_decoders.py @@ -0,0 +1,138 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to build and install decoder package. + +It is used by scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh to install +KenLM and OpenSeq2Seq decoder. + +You can set the order of KenLM model by changing -DKENLM_MAX_ORDER=10 argument. +""" +from __future__ import absolute_import, division, print_function + +import argparse +import glob +import multiprocessing.pool +import os +import platform +import sys + +from setuptools import Extension, distutils, setup + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--num_processes", default=1, type=int, help="Number of cpu processes to build package. (default: %(default)d)" +) +args = parser.parse_known_args() + +# reconstruct sys.argv to pass to setup below +sys.argv = [sys.argv[0]] + args[1] + + +# monkey-patch for parallel compilation +# See: https://stackoverflow.com/a/13176803 +def parallelCCompile( + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None, +): + # those lines are copied from distutils.ccompiler.CCompiler directly + macros, objects, extra_postargs, pp_opts, build = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) + + # parallel code + def _single_compile(obj): + try: + src, ext = build[obj] + except KeyError: + return + self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + + # convert to list, imap is evaluated on-demand + thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes) + list(thread_pool.imap(_single_compile, objects)) + return objects + + +def compile_test(header, library): + dummy_path = os.path.join(os.path.dirname(__file__), "dummy") + command = ( + "bash -c \"g++ -include " + + header + + " -l" + + library + + " -x c++ - <<<'int main() {}' -o " + + dummy_path + + " >/dev/null 2>/dev/null && rm " + + dummy_path + + " 2>/dev/null\"" + ) + return os.system(command) == 0 + + +# hack compile to support parallel compiling +distutils.ccompiler.CCompiler.compile = parallelCCompile + +FILES = glob.glob('kenlm/util/*.cc') + glob.glob('kenlm/lm/*.cc') + glob.glob('kenlm/util/double-conversion/*.cc') + +FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') + +FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith('unittest.cc'))] + +LIBS = ['stdc++'] +if platform.system() != 'Darwin': + LIBS.append('rt') + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=10', '-std=c++11'] + +if compile_test('zlib.h', 'z'): + ARGS.append('-DHAVE_ZLIB') + LIBS.append('z') + +if compile_test('bzlib.h', 'bz2'): + ARGS.append('-DHAVE_BZLIB') + LIBS.append('bz2') + +if compile_test('lzma.h', 'lzma'): + ARGS.append('-DHAVE_XZLIB') + LIBS.append('lzma') + +os.system('swig -python -c++ ./decoders.i') + +decoders_module = [ + Extension( + name='_swig_decoders', + sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'), + language='c++', + include_dirs=['.', 'kenlm', 'openfst-1.6.3/src/include', 'ThreadPool',], + libraries=LIBS, + extra_compile_args=ARGS, + ) +] + +setup( + name='ctc_decoders', + version='1.1', + description="""CTC decoders""", + ext_modules=decoders_module, + py_modules=['ctc_decoders', 'swig_decoders'], +) From 33669a20e98b21c635e3535396bae0542a9a5170 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 14:54:22 -0700 Subject: [PATCH 064/512] temp rtd fix (#6568) (#6569) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- requirements/requirements_docs.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements_docs.txt b/requirements/requirements_docs.txt index b7bfd6956905..34406bd2a366 100644 --- a/requirements/requirements_docs.txt +++ b/requirements/requirements_docs.txt @@ -9,4 +9,5 @@ sphinx-book-theme sphinx-copybutton sphinxcontrib-bibtex sphinxext-opengraph +urllib3<2.0.0 wrapt From e9ae9ba54c8dfd2a9ffd0ad9ce2d89e5eb9fbfa2 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Fri, 5 May 2023 08:32:27 -0700 Subject: [PATCH 065/512] [TTS] Add script for mapping speaker names to indices (#6509) Signed-off-by: Ryan --- .../tts/create_speaker_map.py | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 scripts/dataset_processing/tts/create_speaker_map.py diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py new file mode 100644 index 000000000000..027a5c6e3e35 --- /dev/null +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -0,0 +1,95 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script takes a list of TTS manifests and creates a JSON mapping the input speaker names to +unique indices for multi-speaker TTS training. + +To ensure that speaker names are unique across datasets, it is recommended that you prepend the speaker +names in your manifest with the name of the dataset. + +$ python /scripts/dataset_processing/tts/create_speaker_map.py \ + --manifest_path=manifest1.json \ + --manifest_path=manifest2.json \ + --speaker_map_path=speakers.json + +Example output: + +{ + "vctk_p225": 0, + "vctk_p226": 1, + "vctk_p227": 2, + ... +} + +""" + +import argparse +import json +from pathlib import Path + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Create mapping from speaker names to numerical speaker indices.", + ) + parser.add_argument( + "--manifest_path", required=True, type=Path, action="append", help="Path to training manifest(s).", + ) + parser.add_argument( + "--speaker_map_path", required=True, type=Path, help="Path for output speaker index JSON", + ) + parser.add_argument( + "--overwrite", default=False, type=bool, help="Whether to overwrite the output speaker file if it exists.", + ) + args = parser.parse_args() + return args + + +def main(): + args = get_args() + manifest_paths = args.manifest_path + speaker_map_path = args.speaker_map_path + overwrite = args.overwrite + + for manifest_path in manifest_paths: + if not manifest_path.exists(): + raise ValueError(f"Manifest {manifest_path} does not exist.") + + if speaker_map_path.exists(): + if overwrite: + print(f"Will overwrite existing speaker path: {speaker_map_path}") + else: + raise ValueError(f"Speaker path already exists: {speaker_map_path}") + + speaker_set = set() + for manifest_path in manifest_paths: + entries = read_manifest(manifest_path) + for entry in entries: + speaker = str(entry["speaker"]) + speaker_set.add(speaker) + + speaker_list = list(speaker_set) + speaker_list.sort() + speaker_index_map = {speaker_list[i]: i for i in range(len(speaker_list))} + + with open(speaker_map_path, 'w', encoding="utf-8") as stats_f: + json.dump(speaker_index_map, stats_f, indent=4) + + +if __name__ == "__main__": + main() From 615a2567583330d4ba291ca7e3b00bd1408e6ec0 Mon Sep 17 00:00:00 2001 From: Nikolay Karpov Date: Fri, 5 May 2023 22:17:02 +0400 Subject: [PATCH 066/512] whitespace (#6574) Signed-off-by: Nikolay Karpov --- nemo/collections/asr/parts/utils/transcribe_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 0e72ed8fa16d..8101bee96723 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -460,7 +460,7 @@ def __init__(self, punctuation_marks: str): def separate_punctuation(self, lines: List[str]) -> List[str]: if self.regex_punctuation is not None: return [ - self.regex_extra_space.sub('', self.regex_punctuation.sub(r' \1 ', line)).strip() for line in lines + self.regex_extra_space.sub(' ', self.regex_punctuation.sub(r' \1 ', line)).strip() for line in lines ] else: return lines @@ -470,7 +470,7 @@ def do_lowercase(self, lines: List[str]) -> List[str]: def rm_punctuation(self, lines: List[str]) -> List[str]: if self.regex_punctuation is not None: - return [self.regex_extra_space.sub('', self.regex_punctuation.sub(' ', line)).strip() for line in lines] + return [self.regex_extra_space.sub(' ', self.regex_punctuation.sub(' ', line)).strip() for line in lines] else: return lines From b1aac0ba62697e5fa33472344bb261a56b841cb0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 5 May 2023 11:21:53 -0700 Subject: [PATCH 067/512] Update manifest.py for speedup (#6565) (#6573) * Update manifest.py Re-order the checks for faster processing audio filepaths that are already absolute paths * Update manifest.py --------- Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: Vahid Noroozi --- nemo/collections/common/parts/preprocessing/manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/common/parts/preprocessing/manifest.py b/nemo/collections/common/parts/preprocessing/manifest.py index c1da97c63bcb..9fd69801ec0d 100644 --- a/nemo/collections/common/parts/preprocessing/manifest.py +++ b/nemo/collections/common/parts/preprocessing/manifest.py @@ -198,7 +198,7 @@ def get_full_path( # If input is a string, get the corresponding full path audio_file = Path(audio_file) - if (len(str(audio_file)) < audio_file_len_limit) and not audio_file.is_file() and not audio_file.is_absolute(): + if (len(str(audio_file)) < audio_file_len_limit) and not audio_file.is_absolute() and not audio_file.is_file(): # If audio_file is not available and the path is not absolute, the full path is assumed # to be relative to the manifest file parent directory or data directory. if manifest_file is None and data_dir is None: From 17e477f1f44b931c68c8664c9d32f3e82ac78675 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 5 May 2023 13:00:41 -0700 Subject: [PATCH 068/512] More streaming conformer export fixes (#6567) (#6578) Signed-off-by: Greg Clark Co-authored-by: Greg Clark Co-authored-by: Vahid Noroozi --- .../asr/modules/conformer_encoder.py | 29 +++++++++++++++++++ nemo/core/classes/exportable.py | 16 +++++++--- scripts/export.py | 1 + 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py index 0fc0912a8921..9955e35444f4 100644 --- a/nemo/collections/asr/modules/conformer_encoder.py +++ b/nemo/collections/asr/modules/conformer_encoder.py @@ -183,6 +183,19 @@ def input_types(self): } ) + @property + def input_types_for_export(self): + """Returns definitions of module input ports.""" + return OrderedDict( + { + "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "length": NeuralType(tuple('B'), LengthsType()), + "cache_last_channel": NeuralType(('B', 'D', 'T', 'D'), ChannelType(), optional=True), + "cache_last_time": NeuralType(('B', 'D', 'D', 'T'), ChannelType(), optional=True), + "cache_last_channel_len": NeuralType(tuple('B'), LengthsType(), optional=True), + } + ) + @property def output_types(self): """Returns definitions of module output ports.""" @@ -196,6 +209,19 @@ def output_types(self): } ) + @property + def output_types_for_export(self): + """Returns definitions of module output ports.""" + return OrderedDict( + { + "outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), + "encoded_lengths": NeuralType(tuple('B'), LengthsType()), + "cache_last_channel_next": NeuralType(('B', 'D', 'T', 'D'), ChannelType(), optional=True), + "cache_last_time_next": NeuralType(('B', 'D', 'D', 'T'), ChannelType(), optional=True), + "cache_last_channel_next_len": NeuralType(tuple('B'), LengthsType(), optional=True), + } + ) + @property def disabled_deployment_input_names(self): if not self.export_cache_support: @@ -489,6 +515,8 @@ def forward_for_export( rets = self.streaming_post_process(rets, keep_all_outputs=False) if len(rets) == 2: return rets + elif rets[2] is None and rets[3] is None and rets[4] is None: + return (rets[0], rets[1]) else: return ( rets[0], @@ -549,6 +577,7 @@ def forward_internal( audio_signal = self.pre_encode(audio_signal) else: audio_signal, length = self.pre_encode(x=audio_signal, lengths=length) + length = length.to(torch.int64) # self.streaming_cfg is set by setup_streaming_cfg(), called in the init if self.streaming_cfg.drop_extra_pre_encoded > 0 and cache_last_channel is not None: audio_signal = audio_signal[:, self.streaming_cfg.drop_extra_pre_encoded :, :] diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py index eb399b1c1d1d..38b8e1c1e31b 100644 --- a/nemo/core/classes/exportable.py +++ b/nemo/core/classes/exportable.py @@ -215,8 +215,8 @@ def _export( elif format == ExportFormat.ONNX: # dynamic axis is a mapping from input/output_name => list of "dynamic" indices if dynamic_axes is None: - dynamic_axes = get_dynamic_axes(self.input_module.input_types, input_names) - dynamic_axes.update(get_dynamic_axes(self.output_module.output_types, output_names)) + dynamic_axes = get_dynamic_axes(self.input_module.input_types_for_export, input_names) + dynamic_axes.update(get_dynamic_axes(self.output_module.output_types_for_export, output_names)) torch.onnx.export( jitted_model, input_example, @@ -273,11 +273,19 @@ def _export_teardown(self): @property def input_names(self): - return get_io_names(self.input_module.input_types, self.disabled_deployment_input_names) + return get_io_names(self.input_module.input_types_for_export, self.disabled_deployment_input_names) @property def output_names(self): - return get_io_names(self.output_module.output_types, self.disabled_deployment_output_names) + return get_io_names(self.output_module.output_types_for_export, self.disabled_deployment_output_names) + + @property + def input_types_for_export(self): + return self.input_types + + @property + def output_types_for_export(self): + return self.output_types def get_export_subnet(self, subnet=None): """ diff --git a/scripts/export.py b/scripts/export.py index efb257d00447..80cbcf3dc666 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -32,6 +32,7 @@ import torch from pytorch_lightning import Trainer +import nemo from nemo.core import ModelPT from nemo.core.classes import Exportable from nemo.core.config.pytorch_lightning import TrainerConfig From 9b71b35a6919f91c1e68dacb29b448f16e4735da Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 5 May 2023 15:43:42 -0600 Subject: [PATCH 069/512] user selected max_seq_len should be less than model's max_seq_len (#6333) (#6386) * user selection should not break model max limit * eval max seq length --------- Signed-off-by: arendu Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com> Co-authored-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com> Co-authored-by: Sandeep Subramanian Co-authored-by: Eric Harper --- .../language_modeling/megatron_gpt_prompt_learning_eval.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py index d66bac0bfecc..3a490b3532f1 100644 --- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py @@ -151,12 +151,13 @@ def placeholder(): "compute_logprob": cfg.inference.compute_logprob, } - max_input_length = model.frozen_model.cfg.encoder_seq_length - length_params["max_length"] + max_seq_length = model.frozen_model.cfg.encoder_seq_length - length_params["max_length"] + max_seq_length = min(max_seq_length, cfg.get("max_seq_length", 8192)) _, dataloader = model.build_virtual_prompt_dataset( data=cfg.data_paths, batch_size=cfg.inference.get('batch_size', 1), - max_seq_length=max_input_length, + max_seq_length=max_seq_length, min_seq_length=model.cfg.data.get('min_seq_length', 1), add_bos=sampling_params["add_BOS"], add_eos=False, From c453e5207a03602f9bcd88ca35c134be4bd9f5d5 Mon Sep 17 00:00:00 2001 From: Adi Renduchintala <108822655+arendu@users.noreply.github.com> Date: Fri, 5 May 2023 16:26:03 -0700 Subject: [PATCH 070/512] Framework for PEFT via mixins (#6391) * init commit ptuning via mixin Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates Signed-off-by: arendu * gpt ptuning places virtual tokens on the left only Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * encoder input modified when pre_process is true Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * optimizer group and state dict updates Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adapter ptuning working for pp>1 Signed-off-by: arendu * adapter defaults Signed-off-by: arendu * adapter ptuining config defaults Signed-off-by: arendu * training works Signed-off-by: arendu * loading and saving adapter only params during training Signed-off-by: arendu * added checks and comments Signed-off-by: arendu * clean up Signed-off-by: arendu * checks for grad is None before calling all_reduce Signed-off-by: arendu * load adapter .nemo file working Signed-off-by: arendu * resume training for adapters Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * peft tuning Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor Signed-off-by: arendu * file not needed Signed-off-by: arendu * undo prompt learning dataset changes Signed-off-by: arendu * undo updates to gpt prompt learning model Signed-off-by: arendu * naming updates Signed-off-by: arendu * decoding Signed-off-by: arendu * predict_step in gpt_sft_model Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed inference from tuning config Signed-off-by: arendu * no test in peft training Signed-off-by: arendu * answer only loss and correct defaults for val_loss Signed-off-by: arendu * hybrid adapters and ptuning Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * eval working.. Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * prepending tokens for ptuning Signed-off-by: arendu * cleaned up eval config Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up Signed-off-by: arendu * update Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * default prompt template Signed-off-by: arendu * Lora added Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Support synamic length with GPT SFT Signed-off-by: Abhinav Khattar * make branch functional Signed-off-by: Abhinav Khattar * defaults to max_pad_length=False in GPT SFT dataset Signed-off-by: arendu * adapter parallel_adapters to support Lora Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added early stopping by default Signed-off-by: arendu * eval script for peft and eval config. bug fixes in predict step and added out_features to t5 adapter config Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docs Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * better defaults Signed-off-by: arendu * updates Signed-off-by: arendu * update Signed-off-by: arendu * docs Signed-off-by: arendu --------- Signed-off-by: arendu Signed-off-by: Abhinav Khattar Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Abhinav Khattar --- .../conf/megatron_gpt_peft_eval_config.yaml | 130 +++++++ .../conf/megatron_gpt_peft_tuning_config.yaml | 207 ++++++++++ .../tuning/megatron_gpt_peft_eval.py | 157 ++++++++ .../tuning/megatron_gpt_peft_tuning.py | 244 ++++++++++++ .../megatron/gpt_sft_dataset.py | 22 +- .../language_modeling/megatron_base_model.py | 3 +- .../megatron_gpt_adapter_model.py | 1 + .../language_modeling/megatron_gpt_model.py | 3 +- .../megatron_gpt_peft_models.py | 361 ++++++++++++++++++ .../megatron_gpt_sft_model.py | 35 +- .../megatron_t5_adapter_model.py | 1 + .../megatron/adapters/parallel_adapters.py | 167 ++++++-- .../nlp/modules/common/megatron/attention.py | 13 +- .../nlp/modules/common/megatron/clip_grads.py | 25 +- .../modules/common/megatron/language_model.py | 23 +- nemo/collections/nlp/parts/nlp_overrides.py | 95 +++++ nemo/core/optim/optimizer_with_main_params.py | 3 +- 17 files changed, 1443 insertions(+), 47 deletions(-) create mode 100755 examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml create mode 100755 examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml create mode 100644 examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py create mode 100644 examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py create mode 100644 nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml new file mode 100755 index 000000000000..d7ebd69f31be --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml @@ -0,0 +1,130 @@ +name: megatron_gpt_peft_${model.peft.peft_scheme}_tuning + +trainer: + devices: 1 + accelerator: gpu + num_nodes: 1 + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: 9999 + max_steps: 20000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 # frequency with which training steps are logged + val_check_interval: 200 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch + gradient_clip_val: 1.0 + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: validation_${model.data.validation_ds.metric.name} + save_top_k: 1 + mode: max + save_nemo_on_train_end: True + filename: '${name}--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' + model_parallel_size: ${model.tensor_model_parallel_size} + always_save_nemo: True + save_best_model: False + +model: + seed: 1234 + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + + global_batch_size: 1 + micro_batch_size: 1 + restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. + sync_batch_comm: False + megatron_amp_O2: False + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + answer_only_loss: False # not used right now + gradient_as_bucket_view: False + + hidden_dropout: 0.0 + attention_dropout: 0.0 + ffn_dropout: 0.0 + + peft: + peft_scheme: "adapter" # can be either adapter,ia3, or ptuning + restore_from_path: null + + # Used for adapter peft training + adapter_tuning: + type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' + adapter_dim: 32 + adapter_dropout: 0.0 + norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used. + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] + + # Used for p-tuning peft training + p_tuning: + virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence + bottleneck_dim: 1024 # the size of the prompt encoder mlp bottleneck + embedding_dim: 1024 # the size of the prompt encoder embeddings + init_std: 0.023 + + data: + test_ds: + file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: ??? # Names of the corresponding datasets used to log metrics. + global_batch_size: ??? + micro_batch_size: ??? + shuffle: False + num_workers: 0 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: ${data.train_ds.context_key} + label_key: ${data.train_ds.label_key} + add_eos: ${data.train_ds.add_eos} + add_sep: ${data.train_ds.add_sep} + add_bos: ${data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: ${data.train_ds.truncation_field} # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${data.train_ds.prompt_template} + tokens_to_generate: ??? + + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + +inference: + greedy: True # Whether or not to use sampling ; use greedy decoding otherwise + top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: 1.0 # sampling temperature + all_probs: False # whether return the log prob for all the tokens in vocab + repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. + min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. + compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + outfile_path: /home/adithyare/exp/foo.txt \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml new file mode 100755 index 000000000000..799d105aae7c --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml @@ -0,0 +1,207 @@ +name: megatron_gpt_peft_tuning + +trainer: + devices: 1 + accelerator: gpu + num_nodes: 1 + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: 9999 + max_steps: 20000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 # frequency with which training steps are logged + val_check_interval: 200 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch + gradient_clip_val: 1.0 + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: validation_${model.data.validation_ds.metric.name} + save_top_k: 1 + mode: min + save_nemo_on_train_end: True + filename: '${name}--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' + model_parallel_size: ${model.tensor_model_parallel_size} + always_save_nemo: False + save_best_model: True + create_early_stopping_callback: True + early_stopping_callback_params: + monitor: "val_loss" + mode: "min" + min_delta: 0.001 + patience: 10 + verbose: True + strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training. + +model: + seed: 1234 + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + + global_batch_size: 128 + micro_batch_size: 4 + restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + save_nemo_on_validation_end: False # Saves an inference ready .nemo file every time a checkpoint is saved during training. + sync_batch_comm: False + megatron_amp_O2: False + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + answer_only_loss: True + gradient_as_bucket_view: False + + hidden_dropout: 0.0 + attention_dropout: 0.0 + ffn_dropout: 0.0 + + peft: + peft_scheme: "adapter" # can be either adapter,ia3, or ptuning + restore_from_path: null + + # Used for adapter peft training + adapter_tuning: + type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' + adapter_dim: 32 + adapter_dropout: 0.0 + norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used. + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] + + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + + # Used for p-tuning peft training + p_tuning: + virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence + bottleneck_dim: 1024 # the size of the prompt encoder mlp bottleneck + embedding_dim: 1024 # the size of the prompt encoder embeddings + init_std: 0.023 + + data: + train_ds: + # Example of how to specify paths to multiple datasets + # file_names: + # - /path/to/squad.jsonl + # - /path/to/mnli.jsonl + # - /path/to/boolq.jsonl + # Example of how each dataset is formatted + # {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'} + file_names: ??? # Path to a list of JSONL files corresponding to the source data. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: True + num_workers: 0 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: True + # Example of how to specify concat_sampling_probabilities + # concat_sampling_probabilities: + # - 0.5 + # - 0.25 + # - 0.25 + concat_sampling_probabilities: null # When providing a list of datasets, this arg defines the sampling probabilities from each dataset when strategy='random' + context_key: 'input' + label_key: 'output' + add_eos: True + add_sep: False + add_bos: False + separate_prompt_and_response_with_newline: False + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: "{input} {output}" # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}" + + validation_ds: + file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: null # Names of the corresponding datasets used to log metrics. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: False + num_workers: 0 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: 'input' + label_key: 'output' + add_eos: ${model.data.train_ds.add_eos} + add_sep: ${model.data.train_ds.add_sep} + add_bos: ${model.data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${model.data.train_ds.prompt_template} # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}" + + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + test_ds: + file_names: null # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: null # Names of the corresponding datasets used to log metrics. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: False + num_workers: 4 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: 'input' + label_key: 'output' + add_eos: ${model.data.train_ds.add_eos} + add_sep: ${model.data.train_ds.add_sep} + add_bos: ${model.data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${model.data.train_ds.prompt_template} + + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0.01 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 50 + min_lr: 0.0 # min_lr must be 0.0 for prompt learning when pipeline parallel > 1 + constant_steps: 0 # Constant steps should also be 0 when min_lr=0 + monitor: val_loss + reduce_on_plateau: false \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py new file mode 100644 index 000000000000..a9f6a110c210 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -0,0 +1,157 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import ( + MegatronGPTAdapterModel, + MegatronGPTAdapterPTuningModel, + MegatronGPTIA3Model, + MegatronGPTLoRAModel, + MegatronGPTPEFTModel, + MegatronGPTPTuningModel, +) +from nemo.collections.nlp.models.nlp_model import NLPModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PEFTSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging + +mp.set_start_method("spawn", force=True) + +""" +This is the script to train an Adapter infused GPT Model for text generation. +A base GPT Model is required as a starting point. This script will then insert +Adapters into each Transformer layer and will train/update only these adapters +during training. The base GPT Model weights will remain frozen. + +During training this script will only save the newly trained Adapter weights +in checkpoints. At the end of training a .nemo file of Adapter weights will +be saved. + +Usage: + Assuming the base model is a 125m GPT Model, with TP=1, PP=1: + a. run a training run for a base gpt nemo file: + python megatron_gpt_adapter_tuning.py \ + "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", + "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", + model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" + name="NAME OF TRAINING RUN" + exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", + trainer.max_epochs=2 +""" + + +@hydra_runner(config_path="conf", config_name="megatron_gpt_peft_eval_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") + assert cfg.model.restore_from_path is not None + assert cfg.model.peft.restore_from_path is not None + megatron_amp_o2 = cfg.model.get("megatron_amp_O2", False) + with_distributed_adam = False + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, "bf16"]: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get("native_amp_init_scale", 2 ** 32), + growth_interval=cfg.model.get("native_amp_growth_interval", 1000), + hysteresis=cfg.model.get("hysteresis", 2), + enabled=False + if cfg.model.pipeline_model_parallel_size > 1 + else True, # turn off the grad scale for pipeline parallel LM model + ) + if megatron_amp_o2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device="cuda", scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device="cuda", scaler=scaler)) + + if cfg.get("cluster_type", None) == "BCP": + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + peft_model_cfg = MegatronGPTPEFTModel.restore_from( + restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, + ) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(peft_model_cfg): + # update the model config of the trained model with params we want to set at inference time. + peft_model_cfg.precision = cfg.trainer.precision + peft_model_cfg.data.test_ds = cfg.model.data.test_ds + + with open_dict(cfg): + # update the config with the trained model config + # required for hydra interpolation to work inside cfg.inference + cfg.inference.add_BOS = peft_model_cfg.data.test_ds.add_bos + cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate + + save_restore_connector = PEFTSaveRestoreConnector( + peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, + ) + if os.path.isdir(peft_model_cfg.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + # peft_cls = _get_peft_scheme(peft_model_cfg) + model = NLPModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=peft_model_cfg, + save_restore_connector=save_restore_connector, + ) + + model.freeze() + _test_ds = model._build_dataset(peft_model_cfg.data.test_ds, is_train=False) + request_dl = DataLoader( + dataset=_test_ds[0], + batch_size=peft_model_cfg.data.test_ds.global_batch_size, + collate_fn=_test_ds[0].collate_fn, + ) + config = OmegaConf.to_container(cfg.inference, resolve=True) + model.set_inference_config(config) + response = trainer.predict(model, request_dl) + if model.global_rank == 0: + print("***************************") + if cfg.inference.outfile_path is not None: + with open(cfg.inference.outfile_path, "w", encoding="utf-8") as f: + for batch in response: + for sentence in batch["sentences"]: + s = " ".join(sentence.split("\n")) + f.write(s + "\n") + print("predictions saved to {}".format(cfg.inference.outfile_path)) + else: + print(response) + print("***************************") + + +if __name__ == "__main__": + main() diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py new file mode 100644 index 000000000000..d0f95b371a13 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py @@ -0,0 +1,244 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import tempfile + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import ( + MegatronGPTAdapterModel, + MegatronGPTAdapterPTuningModel, + MegatronGPTIA3Model, + MegatronGPTLoRAModel, + MegatronGPTPTuningModel, +) +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PEFTSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import AppState, logging +from nemo.utils.exp_manager import exp_manager +from nemo.utils.model_utils import inject_model_parallel_rank + +mp.set_start_method("spawn", force=True) + +""" +This is the script to train an Adapter infused GPT Model for text generation. +A base GPT Model is required as a starting point. This script will then insert +Adapters into each Transformer layer and will train/update only these adapters +during training. The base GPT Model weights will remain frozen. + +During training this script will only save the newly trained Adapter weights +in checkpoints. At the end of training a .nemo file of Adapter weights will +be saved. + +Usage: + Assuming the base model is a 125m GPT Model, with TP=1, PP=1: + a. run a training run for a base gpt nemo file: + python megatron_gpt_adapter_tuning.py \ + "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", + "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", + model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" + name="NAME OF TRAINING RUN" + exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", + trainer.max_epochs=2 +""" + + +def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): + """ + This function modifies the original gpt pre-training config (gpt_cfg) with attributes from the finetuning config (cfg). + The `add_cfg_to_tree` arg adds `cfg` to the top of the yaml tree which is needed for all `hparams.yaml` files when passed as an arg to `load_from_checkpoint()`. + """ + OmegaConf.set_struct(gpt_cfg, True) + OmegaConf.resolve(cfg) + with open_dict(gpt_cfg): + gpt_cfg.megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + gpt_cfg.micro_batch_size = cfg.model.data.train_ds.micro_batch_size + gpt_cfg.global_batch_size = cfg.model.data.train_ds.global_batch_size + gpt_cfg.sequence_parallel = cfg.model.get("sequence_parallel", False) + gpt_cfg.activations_checkpoint_granularity = cfg.model.get("activations_checkpoint_granularity", None) + gpt_cfg.activations_checkpoint_num_layers = cfg.model.get("activations_checkpoint_num_layers", None) + gpt_cfg.activations_checkpoint_method = cfg.model.get("activations_checkpoint_method", None) + gpt_cfg.data = cfg.model.data + gpt_cfg.optim = cfg.model.optim + gpt_cfg.precision = cfg.trainer.precision + gpt_cfg.answer_only_loss = cfg.model.answer_only_loss + gpt_cfg.restore_from_path = cfg.model.restore_from_path + gpt_cfg.resume_from_checkpoint = cfg.model.resume_from_checkpoint + gpt_cfg.save_nemo_on_validation_end = cfg.model.save_nemo_on_validation_end + gpt_cfg.gradient_as_bucket_view = cfg.model.gradient_as_bucket_view + gpt_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.0) + gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0) + gpt_cfg.ffn_dropout = cfg.model.ffn_dropout + gpt_cfg.peft = cfg.model.peft + + # This is needed when modifying a hparam file directly to load `.ckpt` files. + # This is not needed to modify the cfg in `.nemo` files. + if add_cfg_to_tree: + OmegaConf.resolve(gpt_cfg) + gpt_cfg.cfg = gpt_cfg + + return gpt_cfg + + +def _get_peft_scheme(cfg): + if cfg.peft.peft_scheme == "adapter": + peft_cls = MegatronGPTAdapterModel + elif cfg.peft.peft_scheme == "ia3": + peft_cls = MegatronGPTIA3Model + elif cfg.peft.peft_scheme == "ptuning": + peft_cls = MegatronGPTPTuningModel + elif cfg.peft.peft_scheme == "adapter_and_ptuning": + peft_cls = MegatronGPTAdapterPTuningModel + elif cfg.peft.peft_scheme == "lora": + peft_cls = MegatronGPTLoRAModel + else: + raise RuntimeError("Invalid Peft scheme") + return peft_cls + + +def load_from_checkpoint_dir(cls, cfg, trainer, modify_confg_fn): + app_state = AppState() + if cfg.model.tensor_model_parallel_size > 1 or cfg.model.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.model.tensor_model_parallel_size * cfg.model.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.model.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.model.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.model.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.model.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.model.pipeline_model_parallel_split_rank, + ) + checkpoint_path = inject_model_parallel_rank( + os.path.join(cfg.model.pretrained_checkpoint.checkpoint_dir, cfg.model.pretrained_checkpoint.checkpoint_name) + ) + hparams_file = OmegaConf.load(cfg.model.pretrained_checkpoint.hparams_file) + gpt_cfg = modify_confg_fn(hparams_file.cfg, cfg, add_cfg_to_tree=True) + with tempfile.NamedTemporaryFile(suffix='.yaml') as f: + OmegaConf.save(config=gpt_cfg, f=f.name) + model = cls.load_from_checkpoint(checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name,) + return model + + +def validate_checkpoint_loading_args(cfg): + if cfg.checkpoint_dir is None or not os.path.isdir(cfg.checkpoint_dir): + raise ValueError(f'Checkpoint directory {cfg.checkpoint_dir} does not exist or is not a directory.') + if cfg.checkpoint_name is None: + raise ValueError(f'Checkpoint name {cfg.checkpoint_name} is not valid.') + if cfg.hparams_file is None or not os.path.isfile(cfg.hparams_file): + raise ValueError(f'Hparams file {cfg.hparams_file} does not exist or is not a file.') + + +@hydra_runner(config_path="conf", config_name="megatron_gpt_peft_tuning_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + enabled=False + if cfg.model.pipeline_model_parallel_size > 1 + else True, # turn off the grad scale for pipeline parallel LM model + ) + if megatron_amp_o2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + exp_manager(trainer, cfg.exp_manager) + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + if cfg.model.restore_from_path: + base_model_save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + base_model_save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + base_model_cfg = MegatronGPTModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + return_config=True, + save_restore_connector=base_model_save_restore_connector, + ) + base_model_cfg = _modify_config(base_model_cfg, cfg, add_cfg_to_tree=False) + save_restore_connector = PEFTSaveRestoreConnector( + peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=resume_from_checkpoint + ) + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + peft_cls = _get_peft_scheme(cfg.model) + model = peft_cls.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=base_model_cfg, + save_restore_connector=save_restore_connector, + ) + else: + raise RuntimeError("PEFT training needs a trained base model present.") + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 4df6a1bb577b..24b7fe8d3d6d 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -41,9 +41,11 @@ def __init__( separate_prompt_and_response_with_newline: bool = False, answer_only_loss: bool = True, truncation_field: str = "answer", - pad_to_max_length: bool = True, + pad_to_max_length: bool = False, # (@adithyare) allows for much faster training especially in PEFT settings. index_mapping_dir: str = None, prompt_template: str = None, + virtual_tokens: int = 0, + tokens_to_generate: int = 0, ): """ file_path: Path to a JSONL GPT supervised fine-tuning dataset. Data is formatted as multiple JSON lines with each line formatted as follows. {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'} @@ -84,6 +86,8 @@ def __init__( self.pad_to_max_length = pad_to_max_length self.index_mapping_dir = index_mapping_dir self.prompt_template = prompt_template + self.virtual_tokens = virtual_tokens + self.tokens_to_generate = tokens_to_generate if self.prompt_template is not None: # When providing things like newlines in the prompt template via the CLI, they are escaped. This line unescapes them. self.prompt_template = self.prompt_template.encode('utf-8').decode('unicode_escape') @@ -156,8 +160,14 @@ def _process_example(self, example): elif not self.separate_prompt_and_response_with_newline and self.prompt_template is None: text = context + ' ' + output - tokenized_text = self.tokenizer.text_to_ids(text) - context_ids = self.tokenizer.text_to_ids(context) + if self.virtual_tokens: + # (@adithyare) we are going to insert "pad/eos" tokens in the beginning of the text and context + # these pad/eos tokens are placeholders for virtual tokens + pre_pad = [self.tokenizer.eos_id] * self.virtual_tokens + else: + pre_pad = [] + tokenized_text = pre_pad + self.tokenizer.text_to_ids(text) + context_ids = pre_pad + self.tokenizer.text_to_ids(context) answer_ids = tokenized_text[len(context_ids) :] total_ids = len(context_ids) + len(answer_ids) if self.add_bos: @@ -212,7 +222,7 @@ def _maybe_cast_to_list(self, x): return [item.tolist() for item in x] return x - def _round_to_nearest(self, n, m): + def _ceil_to_nearest(self, n, m): return (n + m - 1) // m * m def _collate_item(self, item, max_length, pad_id): @@ -252,12 +262,12 @@ def collate_fn(self, batch): context_lengths = torch.LongTensor([item['context_length'] for item in batch]) loss_mask = [self._build_loss_mask(item)[1:] for item in batch] - max_length = max([len(x) for x in input_ids]) + max_length = max([len(x) for x in input_ids]) + self.tokens_to_generate # increase max length to nearest multiple of 4 or 8 if self.pad_to_max_length: max_length = self.max_seq_length else: - max_length = min(self.max_seq_length, self._round_to_nearest(max_length, 8)) + max_length = min(self.max_seq_length, self._ceil_to_nearest(max_length, 8)) assert max_length <= self.max_seq_length attention_mask = [self._create_attention_mask(max_length) for _ in batch] diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 5e5c177737fa..3899c75675db 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -239,7 +239,8 @@ def _get_parameters(self): params = [] for param_group in self._optimizer_param_groups: for param in param_group['params']: - params.append(param) + if param.requires_grad: # (@adithyare) adapter training with pp>1 can result in params with no grads + params.append(param) return params def configure_gradient_clipping(self, *args, **kwargs): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py index aa7cd4652b0a..cb38ad863a52 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py @@ -272,6 +272,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if cfg.adapter_tuning.type == "parallel_adapter": adapter_cfg = ParallelLinearAdapterConfig( in_features=self.frozen_model_cfg.hidden_size, + out_features=self.frozen_model_cfg.hidden_size, dim=cfg.adapter_tuning.adapter_dim, norm_position=cfg.adapter_tuning.get('norm_position', 'pre'), norm_type=cfg.adapter_tuning.get('norm_type', 'mixedfusedlayernorm'), diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 8defb94fd3c1..7159190fdec7 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -440,7 +440,8 @@ def training_step(self, dataloader_iter, batch_idx): # we can avoid this broadcast by updating the PTL log function to accept specific ranks torch.distributed.broadcast(loss_mean, get_last_rank()) - if self.cfg.precision == 16: + # (@adithyare) we need to check for the _scaler attribute to enable pp>1 for adapter training + if self.cfg.precision == 16 and hasattr(self.trainer.precision_plugin.scaler, "_scale"): loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py new file mode 100644 index 000000000000..930bfbc8cf25 --- /dev/null +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -0,0 +1,361 @@ +# coding=utf-8 +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + InfusedAdapterConfig, + LoraKQVAdapterConfig, + MLPInfusedAdapterConfig, + ParallelLinearAdapterConfig, + PromptEncoderAdapterConfig, +) +from nemo.core.classes.mixins import adapter_mixins +from nemo.utils import logging, model_utils + + +class MegatronGPTPEFTModel(MegatronGPTSFTModel): + """ + base class for all mixin based adapter models + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + super().__init__(cfg, trainer) + self.setup_complete = False + self.base_keys = self.get_all_keys() + self.init_peft_modules() + self.adapter_keys = self.get_all_keys() - self.base_keys + + def first_stage_of_pipeline(self): + if hasattr(self, "model") and hasattr(self.model, "pre_process"): + return self.model.pre_process + logging.warning("no attribute named model or no model.pre_process found. Can not detect stage of pipeline...") + return False + + def init_peft_modules(self): + """ + Randomly initialize the peft params and add them to the appropriate modules. + """ + assert len(self.peft_name_keys) > 0, "peft_name_keys have not been set no PEFT modules will be added" + assert len(self.name_key_to_cfg) > 0, "name_key_to_cfg has not been set no PEFT modules will be added" + logging.info(f"Before adding PEFT params:\n{self.summarize()}") + for _, module in self.named_modules(): + if isinstance(module, adapter_mixins.AdapterModuleMixin): + for peft_key in self.peft_name_keys: + peft_cfg = self.name_key_to_cfg[peft_key] + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_key, cfg=peft_cfg, + ) + logging.info(f"After adding PEFT params:\n{self.summarize()}") + return True + + def setup(self, stage=None): + super().setup(stage) + self.setup_complete = True + + def get_all_keys(self,): + """ + Returns all the keys in the model + """ + k = [n for n, p in self.named_parameters()] + return set(k) + + def get_peft_state_dict(self,): + """ + Gets the keys associated with the adapters only. + """ + state_dict = self.model.state_dict(prefix="model.") + peft_state_dict = {} + for k in self.adapter_keys: + peft_state_dict[k] = state_dict[k] + return peft_state_dict + + def state_dict(self, destination=None, prefix=None, keep_vars=False): + if self.setup_complete: + # Once setup is complete we no longer need to track the frozen part of the model. Only there adapter state dict keeps changing so state_dict only track these. + return self.get_peft_state_dict() + else: + # we want all the params with the same keys as calling self.state_dict() + # but we can't call self.state_dict() here as it would be a recursive call. + # so we call self.model.state_dict(prefix="model.") which will return all the keys and params same as calling self.state_dict() + return self.model.state_dict(prefix="model.") + + def load_state_dict(self, state_dict, strict: bool = True): + if self.setup_complete: + # at this stage only PEFT params will appear in the state_dict arg + # so we only update those while the rest of the model is frozen. + # setting strict=False will ignore the missing keys (which are not being updated anyway) + # explicitly check if state_dict.keys matches all the expected self.adapter_keys since we don't have the + # safety in strict=True anymore. + assert set(state_dict.keys()) == self.adapter_keys + super().load_state_dict(state_dict, strict=False) + else: + super().load_state_dict(state_dict, strict=True) + + def setup_optimizer_param_groups(self): + """ + ModelPT override. Optimizer will get self._optimizer_param_groups. + Makes two optimizer param groups, one for the frozen model params + and one for the prompt-table/prompt-encoder params. The learning + rate for the frozen model's params will always be zero effectively + freezing the model's params but still allowing for the needed gradients + to be passed around in pipeline parallel models. The prompt-encoder + and/or prompt table will use the learning rate set by the user. + """ + self.freeze() # Freeze the entire model + opt_params = [] + for _, module in self.named_modules(): + if isinstance(module, adapter_mixins.AdapterModuleMixin) and module.is_adapter_available(): + module.set_enabled_adapters(enabled=True) + module.unfreeze_enabled_adapters() # selectively unfreeze the adapter modules. + opt_params += [p for p in module.parameters()] + + self._optimizer_param_groups = ({"params": opt_params},) + logging.info(f"Optimizer groups set:\n{self.summarize()}") + + +class MegatronGPTAdapterModel(MegatronGPTPEFTModel): + """ + MegatronGPTAdapterLearningModel is a model that combines a base model (GPTSFTModel) with a adapters. + This class only supports the canonical Adapter training described in Houlsby et al. (https://arxiv.org/pdf/1902.00751.pdf) + + Two adapter's are inserted into each Transformer layer in the base GPT Model. + + It is assumed that these set of adapters will then be trained for a specific task. + Once trained, the adapter weights will be saved and can be re-loaded + and infused into the same GPT Model for inference. + """ + + def __init__( + self, cfg: DictConfig, trainer: Trainer, + ): + self.peft_name_keys = [ + AdapterName.PRE_ATTN_ADAPTER, + AdapterName.POST_ATTN_ADAPTER, + ] + adapter_tuning_cfg = cfg.peft.adapter_tuning + + adapter_cfg = ParallelLinearAdapterConfig( + in_features=cfg.hidden_size, + out_features=cfg.hidden_size, + dim=adapter_tuning_cfg.adapter_dim, + norm_position=adapter_tuning_cfg.get("norm_position", "pre"), + norm_type=adapter_tuning_cfg.get("norm_type", "mixedfusedlayernorm"), + column_init_method=adapter_tuning_cfg.get("column_init_method", "xavier"), + row_init_method=adapter_tuning_cfg.get("row_init_method", "zero"), + dropout=adapter_tuning_cfg.adapter_dropout, + ) + + self.name_key_to_cfg = {} + for k in self.peft_name_keys: + self.name_key_to_cfg[k] = adapter_cfg + + super().__init__(cfg, trainer) + + +class MegatronGPTIA3Model(MegatronGPTPEFTModel): + """ + MegatronGPTInfusedAdapterModel is a model that combines a base model (GPTSFTModel) with a "Infused Adapter that can Inhibiting and Amplify Inner Activations", known as IA3. + This class supports the addition of IA3 into a transformer based LM as described in Liu et al. (https://arxiv.org/pdf/2205.05638.pdf) + + Three adapter's are inserted into each Transformer layer in the base GPT Model. Each adapter is basically a vector that simply scales the key, value or ffn hidden representations. + + It is assumed that these set of adapters will then be trained for a specific task. + Once trained, the adapter weights will be saved and can be re-loaded + and infused into the same GPT Model for inference. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + self.peft_name_keys = [AdapterName.KEY_INFUSED, AdapterName.VALUE_INFUSED, AdapterName.MLP_INFUSED] + + mlp_infused_adapter_cfg = MLPInfusedAdapterConfig( + in_features=cfg.ffn_hidden_size // cfg.tensor_model_parallel_size + ) + infused_adapter_cfg = InfusedAdapterConfig(in_features=cfg.hidden_size // cfg.tensor_model_parallel_size) + + self.name_key_to_cfg = {} + for k in self.peft_name_keys: + if k == AdapterName.MLP_INFUSED: + self.name_key_to_cfg[k] = mlp_infused_adapter_cfg + elif k in [ + AdapterName.KEY_INFUSED, + AdapterName.VALUE_INFUSED, + ]: + self.name_key_to_cfg[k] = infused_adapter_cfg + else: + raise ValueError(f"PEFT Key {k} is unknown.") + super().__init__(cfg, trainer) + + +class MegatronGPTPTuningModel(MegatronGPTPEFTModel): + """ + MegatronGPTPTuningModel is a model that combines a base model (GPTSFTModel) with a p-tuning prefix in the + input word embedding representations using a prompt-encoder as descripted in Liu et al. https://arxiv.org/pdf/2103.10385.pdf + + The mixin framework adds the output of prompt-encoder (i.e. the virtual embeddings) inside + nemo/collections/nlp/modules/common/megatron/language_model.py + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + self.peft_name_keys = [AdapterName.PTUNING_ADAPTER] + + adapter_cfg = PromptEncoderAdapterConfig( + cfg.peft.p_tuning.virtual_tokens, + cfg.peft.p_tuning.bottleneck_dim, + cfg.peft.p_tuning.embedding_dim, + cfg.peft.p_tuning.init_std, + cfg.hidden_size, + ) + self.name_key_to_cfg = {AdapterName.PTUNING_ADAPTER: adapter_cfg} + super().__init__(cfg, trainer) + self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens + + def init_peft_modules(self,): + """ + Initialize the p-tuning prompt encoder in the mixin. + This should only happen in the first stage of the pipeline unlike other PEFT methods like Lora or Adapters + because p-tuning only adds params at input to the encoder layer. + """ + if not self.first_stage_of_pipeline(): + # There are no params to add if we are not in the first state of the pipeline + return True + super().init_peft_modules() + return True + + def state_dict(self, destination=None, prefix=None, keep_vars=False): + """ + Reimplement state_dict for ptuning because we also need to check the stage of the pipeline. + The check is required to make pp>1 to work. + """ + if self.setup_complete: + if self.first_stage_of_pipeline(): + return self.get_peft_state_dict() + # if we are not in the first state of pipeline after setup is done + # there should be no params in the state_dict + return {} + else: + return self.model.state_dict(prefix="model.") + + def load_state_dict(self, state_dict, strict: bool = True): + """ + Reimplement load_state_dict for ptuning because we also need to check the stage of the pipeline. + The check is required to make pp>1 to work. + """ + if self.setup_complete: + if self.first_stage_of_pipeline(): + # if we are not in the first state of pipeline after setup is done + # there should be no params to load... + assert set(state_dict.keys()) == self.adapter_keys + super().load_state_dict(state_dict, strict=False) + else: + super().load_state_dict(state_dict, strict=True) + + def setup_optimizer_param_groups(self): + if self.first_stage_of_pipeline(): + super().setup_optimizer_param_groups() + else: + self.freeze() # Freeze the entire model + self._optimizer_param_groups = ({"params": []},) + logging.info(f"Optimizer groups set:\n{self.summarize()}") + + +class MegatronGPTAdapterPTuningModel(MegatronGPTPEFTModel): + """ + Want to combine adapters and p-tuning? Why not? they are orthogonal methods. + This class includes both sets of params. + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + self.peft_name_keys = [ + AdapterName.PRE_ATTN_ADAPTER, + AdapterName.POST_ATTN_ADAPTER, + AdapterName.PTUNING_ADAPTER, + ] + ptuning_cfg = PromptEncoderAdapterConfig( + cfg.peft.p_tuning.virtual_tokens, + cfg.peft.p_tuning.bottleneck_dim, + cfg.peft.p_tuning.embedding_dim, + cfg.peft.p_tuning.init_std, + cfg.hidden_size, + ) + adapter_tuning_cfg = cfg.peft.adapter_tuning + adapter_cfg = ParallelLinearAdapterConfig( + in_features=cfg.hidden_size, + out_features=cfg.hidden_size, + dim=adapter_tuning_cfg.adapter_dim, + norm_position=adapter_tuning_cfg.get("norm_position", "pre"), + norm_type=adapter_tuning_cfg.get("norm_type", "mixedfusedlayernorm"), + column_init_method=adapter_tuning_cfg.get("column_init_method", "xavier"), + row_init_method=adapter_tuning_cfg.get("row_init_method", "zero"), + dropout=adapter_tuning_cfg.adapter_dropout, + ) + + self.name_key_to_cfg = { + AdapterName.PRE_ATTN_ADAPTER: adapter_cfg, + AdapterName.POST_ATTN_ADAPTER: adapter_cfg, + AdapterName.PTUNING_ADAPTER: ptuning_cfg, + } + super().__init__(cfg, trainer) + self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens + + +class MegatronGPTLoRAModel(MegatronGPTPEFTModel): + """ + MegatronGPTLoRAModel is a model that combines a base model (GPTSFTModel) with a low-rank adapters. + The lora adapters will be added in `nemo/collections/nlp/modules/common/megatron/attention.py` + The implementation is based on Hu et al. nemo/collections/nlp/modules/common/megatron/attention.py + + A single low-rank feedfowrad layer is used in parallel with the KQV projection layer. + TODO: Add support to also include an option to adda low-rank adapter in the output projection layer. + """ + + def __init__( + self, cfg: DictConfig, trainer: Trainer, + ): + self.peft_name_keys = [ + AdapterName.LORA_KQV_ADAPTER, + ] + lora_cfg = cfg.peft.lora_tuning + if cfg.kv_channels is None: + assert ( + cfg.hidden_size % cfg.num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = cfg.hidden_size // cfg.num_attention_heads + else: + kv_channels = cfg.kv_channels + projection_size = kv_channels * cfg.num_attention_heads + + adapter_cfg = LoraKQVAdapterConfig( + in_features=cfg.hidden_size, + out_features=3 * projection_size, + dim=lora_cfg.adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=lora_cfg.get("column_init_method", "normal"), + row_init_method=lora_cfg.get("row_init_method", "zero"), + gather_output=False, + dropout=lora_cfg.adapter_dropout, + ) + + self.name_key_to_cfg = {} + for k in self.peft_name_keys: + self.name_key_to_cfg[k] = adapter_cfg + + super().__init__(cfg, trainer) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 56a4496b800b..a28b8216c207 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -13,6 +13,7 @@ # limitations under the License. import json +from typing import Any, Optional import torch from omegaconf import DictConfig, ListConfig @@ -29,7 +30,12 @@ ) from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.megatron.utils import get_iterator_k_split -from nemo.collections.nlp.modules.common.text_generation_utils import LengthParam, SamplingParam, megatron_gpt_generate +from nemo.collections.nlp.modules.common.text_generation_utils import ( + LengthParam, + SamplingParam, + generate, + megatron_gpt_generate, +) from nemo.utils import AppState, logging try: @@ -83,6 +89,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.original_checkpointing_granularity = base_module.language_model.encoder.activations_checkpoint_granularity self.original_checkpointing_num_layers = base_module.language_model.encoder.activations_checkpoint_num_layers self.original_checkpointing_method = base_module.language_model.encoder.activations_checkpoint_method + self.virtual_tokens = 0 def setup_metric(self, data_cfg): metric_name = "exact_string_match" @@ -248,6 +255,10 @@ def _build_dataset(self, data_cfg, is_train=True): pad_to_max_length=False, index_mapping_dir=data_cfg.get('index_mapping_dir', None), prompt_template=data_cfg.get('prompt_template', None), + virtual_tokens=self.virtual_tokens, + tokens_to_generate=data_cfg.get( + 'tokens_to_generate', 0 + ), # used at inference time to allocate tensor positions for tokens that will be generated by inf procedure. ) datasets.append(dataset) @@ -515,6 +526,28 @@ def inference_epoch_end(self, outputs, mode, data_cfg): return averaged_loss, averaged_metric + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + inference_config = self.get_inference_config() + if inference_config is None: + return None + # need to overwrite some configuration, make it immutable + inference_config = inference_config.copy() + compute_logprob = inference_config['compute_logprob'] + if compute_logprob: + del inference_config['compute_logprob'] + inference_config['inputs'] = batch + inference_config['tokens_to_generate'] = 1 + inference_config['all_probs'] = True + inference_config["add_BOS"] = False + inference_config['greedy'] = True + response = generate(self, **inference_config) + compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) + return compute_prob_response + else: + del inference_config['compute_logprob'] + inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda()) + return generate(self, **inference_config) + def write_predictions_to_file(self, outputs, output_file_path_prefix): with open(output_file_path_prefix + "_inputs_preds_labels.jsonl", "w") as f_json: assert len(outputs['inputs']) == len(outputs['preds']) == len(outputs['labels']) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py index 71b3d5537efd..32345e829be8 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py @@ -397,6 +397,7 @@ def _get_adapter_cfg(self, component_cfg): if component_cfg.adapter_tuning.type == "parallel_adapter": adapter_cfg = ParallelLinearAdapterConfig( in_features=component_cfg.hidden_size, + out_features=component_cfg.hidden_size, dim=component_cfg.adapter_tuning.adapter_dim, norm_position=component_cfg.adapter_tuning.get('norm_position', 'pre'), norm_type=component_cfg.adapter_tuning.get('norm_type', 'mixedfusedlayernorm'), diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index 979214d1971f..b26b971a38ba 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -21,9 +21,11 @@ import torch import torch.nn as nn +import torch.nn.init as init from nemo.collections.common.parts.adapter_modules import AdapterModuleUtil from nemo.collections.common.parts.utils import activation_registry +from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu from nemo.collections.nlp.modules.common.megatron.utils import init_method_const, init_method_normal from nemo.core.classes.mixins import adapter_mixin_strategies @@ -56,6 +58,10 @@ class AdapterName(str, enum.Enum): VALUE_INFUSED = "value_infused_adapter" PRE_ATTN_ADAPTER = 'adapter_1' POST_ATTN_ADAPTER = 'adapter_2' + PTUNING_ADAPTER = "ptuning_adapter" + LORA_KQV_ADAPTER = "lora_kqv_adapter" + LORA_KV_ADAPTER = "lora_kv_adapter" + LORA_Q_ADAPTER = "lora_q_adapter" class InfusedAdapter(nn.Module, AdapterModuleUtil): @@ -97,12 +103,14 @@ class ParallelLinearAdapter(nn.Module, AdapterModuleUtil): def __init__( self, in_features: int, + out_features: int, dim: int, activation: str = 'swish', norm_position: str = 'post', norm_type: str = 'mixedfusedlayernorm', - column_init_method: str = 'xavier', - row_init_method: str = 'zero', + column_init_method: str = 'xavier', # TODO: (@adithyare) should rename this to input_init_method to be more precise. + row_init_method: str = 'zero', # TODO: (@adithyare) should rename this to output_init_method to be more precise. + gather_output: bool = True, dropout: float = 0.0, adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None, ): @@ -116,30 +124,28 @@ def __init__( self.activation = activation_registry[activation]() self.norm_position = norm_position - if column_init_method == 'xavier': - self.linear_in = ColumnParallelLinear(in_features, dim, bias=False) - elif column_init_method == 'normal': - self.linear_in = ColumnParallelLinear(in_features, dim, bias=False, init_method=init_method_normal(0.2)) - elif column_init_method == 'zero': - self.linear_in = ColumnParallelLinear(in_features, dim, bias=False, init_method=init_method_const(0.0)) + self.linear_in = ColumnParallelLinear( + in_features, dim, bias=False, gather_output=True, init_method=self._get_init_fn(column_init_method) + ) + if gather_output: + self.linear_out = RowParallelLinear( + dim, out_features, bias=False, init_method=self._get_init_fn(row_init_method) + ) else: - raise NotImplementedError("column_init_method should be zero, normal or xavier") - - if row_init_method == 'xavier': - self.linear_out = RowParallelLinear(dim, in_features, bias=False) - elif row_init_method == 'normal': - self.linear_out = RowParallelLinear(dim, in_features, bias=False, init_method=init_method_normal(0.2)) - elif row_init_method == 'zero': - self.linear_out = RowParallelLinear(dim, in_features, bias=False, init_method=init_method_const(0.0)) - else: - raise NotImplementedError("row_init_method should be zero, normal or xavier") - - if norm_type == 'mixedfusedlayernorm': - self.layer_norm = MixedFusedLayerNorm(in_features, 1e-5, sequence_parallel_enabled=False) - elif norm_type == 'layernorm': - self.layer_norm = nn.LayerNorm(in_features) - else: - raise NotImplementedError("norm_type should be either mixedfusedlayernorm or layernorm") + # (@adithyare) we use this option to mirror the behavior a column parallel layer with two low-rank column parallel layers + # if the original column parallel layer uses gather_output=False, then we will use the self.liner_out layer defined below. + self.linear_out = ColumnParallelLinear( + dim, out_features, bias=False, gather_output=False, init_method=self._get_init_fn(row_init_method) + ) + + if self.norm_position in ["pre", "post"]: + ln_features = in_features if self.norm_position == "pre" else out_features + if norm_type == 'mixedfusedlayernorm': + self.layer_norm = MixedFusedLayerNorm(ln_features, 1e-5, sequence_parallel_enbaled=False) + elif norm_type == 'layernorm': + self.layer_norm = nn.LayerNorm(ln_features) + else: + raise NotImplementedError("norm_type should be either mixedfusedlayernorm or layernorm") if dropout > 0.0: self.dropout = nn.Dropout(dropout) @@ -149,6 +155,17 @@ def __init__( # Setup adapter strategy self.setup_adapter_strategy(adapter_strategy) + def _get_init_fn(self, init_method: str): + if init_method == 'xavier': + init_fn = init.xavier_normal_ + elif init_method == 'normal': + init_fn = init_method_normal(0.2) + elif init_method == "zero": + init_fn = init_method_const(0.0) + else: + raise NotImplementedError("out_init_method should be zero, normal or xavier") + return init_fn + def forward(self, x): if self.norm_position == 'pre': @@ -157,7 +174,6 @@ def forward(self, x): x, _ = self.linear_in(x) # (@adithyare) ColumnLinear returns output and bias, we are ignoring the bias term. x = self.activation(x) x, _ = self.linear_out(x) - if self.norm_position == 'post': x = self.layer_norm(x) @@ -171,12 +187,111 @@ def forward(self, x): @dataclass class ParallelLinearAdapterConfig: in_features: int + out_features: int dim: int activation: str = 'swish' norm_position: str = 'post' norm_type: str = 'mixedfusedlayernorm' column_init_method: str = 'xavier' row_init_method: str = 'zero' + gather_output: bool = True dropout: float = 0.0 adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() _target_: str = "{0}.{1}".format(ParallelLinearAdapter.__module__, ParallelLinearAdapter.__name__) + + +class LoraKQVAdapter(ParallelLinearAdapter): + """ + Lora Adapters are the same arch as regualr adapters but with potentially different input and output feature sizes + and they do not use an bottleneck activation function + """ + + pass + + +@dataclass +class LoraKQVAdapterConfig(ParallelLinearAdapterConfig): + _target_: str = "{0}.{1}".format(LoraKQVAdapter.__module__, LoraKQVAdapter.__name__) + + +class PromptEncoderAdapter(nn.Module, AdapterModuleUtil): + """ + The Tensor Parallel MLP prompt encoder network that is used to generate the virtual + token embeddings for p-tuning. It only have two layers. + TODO: (@adithyare) Need to add all the functionality from the PromptEncoder class + """ + + def __init__( + self, + virtual_tokens: int, + bottleneck_dim: int, + embedding_dim: int, + init_std: float, + output_dim: int, + adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None, + ): + """ + Initializes the Tensor Model parallel MLP PromptEncoderMLP module. + Args: + virtual_tokens: the number of vitural tokens + hidden_size: hidden dimension + output_size: the output dimension + init_std: the MLP init std value + """ + super().__init__() + self.bottleneck_dim = bottleneck_dim + self.embedding_dim = embedding_dim + self.output_dim = output_dim + self.virtual_tokens = virtual_tokens + self.activation = "gelu" + + sequence_parallel = False + gradient_accumulation_fusion = False + # (@adithyare) the persistent=False will not pollute the indices into the state_dict of this module. + self.register_buffer("indices", torch.LongTensor(list(range(self.virtual_tokens))), persistent=False) + self.embedding = torch.nn.Embedding(self.virtual_tokens, self.embedding_dim) + self.first = ColumnParallelLinear( + self.embedding_dim, + self.bottleneck_dim, + gather_output=False, + init_method=init_method_normal(init_std), + skip_bias_add=True, + use_cpu_initialization=False, + bias=True, + sequence_parallel_enabled=sequence_parallel, + gradient_accumulation_fusion=gradient_accumulation_fusion, + ) + self.second = RowParallelLinear( + self.bottleneck_dim, + self.output_dim, + input_is_parallel=True, + init_method=init_method_normal(init_std), + skip_bias_add=True, + use_cpu_initialization=False, + bias=True, + sequence_parallel_enabled=sequence_parallel, + gradient_accumulation_fusion=gradient_accumulation_fusion, + ) + # Setup adapter strategy + self.setup_adapter_strategy(adapter_strategy) + + def forward(self, batch_size): + input_embeds = self.embedding(self.indices).unsqueeze(0) + intermediate_parallel, bias_parallel = self.first(input_embeds) + intermediate_parallel = fused_bias_gelu(intermediate_parallel, bias_parallel) + output_embeds, bias_parallel = self.second(intermediate_parallel) + output_embeds = output_embeds + bias_parallel + output_embeds = output_embeds.transpose(0, 1) + output_embeds = output_embeds.expand(self.virtual_tokens, batch_size, self.output_dim) + return output_embeds + + +@dataclass +class PromptEncoderAdapterConfig: + virtual_tokens: int + bottleneck_dim: int + embedding_dim: int + init_std: float + output_dim: int + adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + _target_: str = "{0}.{1}".format(PromptEncoderAdapter.__module__, PromptEncoderAdapter.__name__) diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index d92e5306042d..f6768a5ba4a0 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -18,7 +18,11 @@ import torch.nn.functional as F from einops import rearrange, repeat -from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, InfusedAdapterConfig +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + InfusedAdapterConfig, + LoraKQVAdapterConfig, +) from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax from nemo.collections.nlp.modules.common.megatron.module import MegatronModule from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import apply_rotary_pos_emb @@ -108,7 +112,7 @@ def __init__( self.megatron_legacy = megatron_legacy - self.set_accepted_adapter_types([InfusedAdapterConfig._target_]) + self.set_accepted_adapter_types([InfusedAdapterConfig._target_, LoraKQVAdapterConfig._target_]) if kv_channels is None: assert ( @@ -360,6 +364,11 @@ def forward( if self.attention_type == AttnType.self_attn: # Attention heads [sq, b, h] --> [sq, b, (np * 3 * hn)] mixed_x_layer, _ = self.query_key_value(hidden_states) + if self.is_adapter_available(): + lora_kqv_adapter = self.get_adapter_module(AdapterName.LORA_KQV_ADAPTER) + if lora_kqv_adapter: + lora_mixed_x_layer = lora_kqv_adapter(hidden_states) + mixed_x_layer = mixed_x_layer + lora_mixed_x_layer # [sq, b, (np * 3 * hn)] --> [sq, b, np, 3 * hn] new_tensor_shape = mixed_x_layer.size()[:-1] + ( diff --git a/nemo/collections/nlp/modules/common/megatron/clip_grads.py b/nemo/collections/nlp/modules/common/megatron/clip_grads.py index 68a97485edf6..a1620931a695 100644 --- a/nemo/collections/nlp/modules/common/megatron/clip_grads.py +++ b/nemo/collections/nlp/modules/common/megatron/clip_grads.py @@ -20,6 +20,7 @@ from torch import inf from nemo.collections.nlp.modules.common.megatron.module import param_is_not_shared +from nemo.utils import logging try: import amp_C @@ -91,7 +92,7 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): grads_for_norm.append(grad) if not grads_for_norm: - raise ValueError("No grads found, please disable gradient clipping") + logging.warning("No grads found, consider disabling gradient clipping") # Norm parameters. max_norm = float(max_norm) @@ -100,7 +101,8 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): # Calculate norm. if norm_type == inf: - total_norm = max(grad.abs().max() for grad in grads_for_norm) + if grads_for_norm: # (@adithyare) grads_for_norm can be empty for adapter training with pp>1 + total_norm = max(grad.abs().max() for grad in grads_for_norm) total_norm_cuda = torch.cuda.FloatTensor([float(total_norm)]) # Take max across all model-parallel GPUs. torch.distributed.all_reduce( @@ -114,9 +116,12 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): # Use apex's multi-tensor applier for efficiency reasons. # Multi-tensor applier takes a function and a list of list # and performs the operation on that list all in one kernel. - grad_norm, _ = multi_tensor_applier( - amp_C.multi_tensor_l2norm, dummy_overflow_buf, [grads_for_norm], False # no per-parameter norm - ) + if grads_for_norm: # (@adithyare) grads_for_norm can be empty for adapter training with pp>1 + grad_norm, _ = multi_tensor_applier( + amp_C.multi_tensor_l2norm, dummy_overflow_buf, [grads_for_norm], False # no per-parameter norm + ) + else: + grad_norm = 0.0 # Since we will be summing across data parallel groups, # we need the pow(norm-type). total_norm = grad_norm ** norm_type @@ -127,14 +132,18 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): total_norm += grad_norm ** norm_type # Sum across all model-parallel GPUs. + total_norm_cuda = torch.cuda.FloatTensor( + [float(total_norm)] + ) # (@adithyare) total_norm can be a float at this point so we convert it to cuda.FloatTensor torch.distributed.all_reduce( - total_norm, op=torch.distributed.ReduceOp.SUM, group=parallel_state.get_model_parallel_group() + total_norm_cuda, op=torch.distributed.ReduceOp.SUM, group=parallel_state.get_model_parallel_group() ) - total_norm = total_norm.item() ** (1.0 / norm_type) + total_norm = total_norm_cuda[0].item() + total_norm = total_norm ** (1.0 / norm_type) # Scale. clip_coeff = max_norm / (total_norm + 1.0e-6) - if clip_coeff < 1.0: + if clip_coeff < 1.0 and grads: # (@adithyare) grads can be empty for adapter training. dummy_overflow_buf = torch.cuda.IntTensor([0]) multi_tensor_applier(amp_C.multi_tensor_scale, dummy_overflow_buf, [grads, grads], clip_coeff) diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index aa8a8a08da64..ddbed5813d95 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -15,6 +15,10 @@ """Transformer based language model.""" import torch +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + PromptEncoderAdapterConfig, +) from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.module import MegatronModule from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding @@ -25,6 +29,7 @@ init_method_normal, scaled_init_method_normal, ) +from nemo.core import adapter_mixins try: from apex.transformer.enums import AttnMaskType @@ -419,7 +424,7 @@ def load_state_dict(self, state_dict, strict=True): ) -class TransformerLanguageModel(MegatronModule): +class TransformerLanguageModel(MegatronModule, adapter_mixins.AdapterModuleMixin): """Transformer language model. Arguments: @@ -648,6 +653,7 @@ def __init__( init_method=self.init_method, ) self._output_layer_key = 'output_layer' + self.set_accepted_adapter_types([PromptEncoderAdapterConfig._target_]) def set_input_tensor(self, input_tensor): """ See megatron.model.transformer.set_input_tensor()""" @@ -680,7 +686,21 @@ def forward( ): # Embeddings. if self.pre_process and encoder_input is None: + encoder_input = self.embedding(enc_input_ids, enc_position_ids, token_type_ids=token_type_ids) + if self.is_adapter_available(): + _sq, _bs, _hs = encoder_input.size() + ptuning_adapter = self.get_adapter_module(AdapterName.PTUNING_ADAPTER) + v = ptuning_adapter.virtual_tokens + if ptuning_adapter and _sq >= v: # The sequence should be longer the v to insert virtual embeddings. + strategy = ptuning_adapter.adapter_strategy + virtual_embeddings = self.forward_single_enabled_adapter_( + _bs, ptuning_adapter, adapter_name=AdapterName.PTUNING_ADAPTER, adapter_strategy=strategy, + ) + encoder_input = encoder_input[ + v:, :, : + ] # the first v tokens are pads so that they can be swapped out with virtual embeddings. + encoder_input = torch.concat([virtual_embeddings, encoder_input], dim=0) else: pass @@ -705,6 +725,7 @@ def forward( rotary_pos_emb = self.rotary_pos_emb(encoder_input.size(0)) else: rotary_pos_emb = None + # encoder. if enc_hidden_states is None: encoder_output = self.encoder( diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 805ce5cfb7ac..e1c0246bd8da 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -419,6 +419,101 @@ def restore_from( return instance +class PEFTSaveRestoreConnector(NLPSaveRestoreConnector): + """ + PEFT models require the ability to load/save a small subset of the full model (once PEFT params have been infused into the base model.) + The PEFTSaveRestoreConnector is used to allow loading and saving only the PEFT params while not saving the entire model. + + Args: + peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params) + peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training. + If both are provided the peft_model_ckpt_path takes precedence. + If neither are provided, PEFT params are initialized at random (not loaded from any external source). + """ + + def __init__(self, peft_model_nemo_path: Optional[str] = None, peft_model_ckpt_path: Optional[str] = None) -> None: + super().__init__() + self.peft_model_ckpt_name = "model_weights.ckpt" + if peft_model_ckpt_path: + # First we will try to load a adapter ckpt path + # this is given priority over loading from nemo path to make resumption of training possible + ckpt_name = os.path.basename(peft_model_ckpt_path) + if not ckpt_name.strip() == '': + # update the weights file name inside the ckpt path rank folders + self.peft_model_ckpt_name = ckpt_name + self.peft_model_ckpt_dir = os.path.dirname(peft_model_ckpt_path) + assert os.path.isdir(self.peft_model_ckpt_dir) + self.peft_model_nemo_path = None + elif peft_model_nemo_path: + # If resumption is not possible we will try to load a adapter nemo path + self.peft_model_nemo_path = peft_model_nemo_path + assert os.path.exists(self.peft_model_nemo_path) + self.peft_model_ckpt_dir = None + else: + # We are not resuming training from a nemo file or a ckpt + # We are training the adapter from randomly initialization + self.peft_model_nemo_path = None + self.peft_model_ckpt_dir = None + + def _load_state_dict_from_disk(self, model_weights, map_location=None): + """ + Infuse the state_dict of the base model with PEFT params from either a peft_model_nemo_path or peft_model_ckpt_path + """ + # first load based model weights + base_model_state_dict = super()._load_state_dict_from_disk(model_weights, map_location) + # Next, We want to load PEFT model's weights + if self.peft_model_nemo_path: + # if the PEFT weights are provided in a .nemo file + # we need to untar the .nemo if its still tarred + with tempfile.TemporaryDirectory() as tmpdir: + self._unpack_nemo_file(self.peft_model_nemo_path, tmpdir) + model_weights_path = self._inject_model_parallel_rank_for_ckpt(tmpdir, self.peft_model_ckpt_name) + peft_state_dict = torch.load(model_weights_path, map_location) + elif self.peft_model_ckpt_dir: + # if the PEFT weights are provided in a ckpt path file + # we don't need to untar + model_weights_path = self._inject_model_parallel_rank_for_ckpt( + self.peft_model_ckpt_dir, self.peft_model_ckpt_name + ) + peft_state_dict = torch.load(model_weights_path, map_location)['state_dict'] + else: + peft_state_dict = {} + base_model_state_dict.update(peft_state_dict) # add the PEFT state_dict into the base model's state_dict + return base_model_state_dict + + def restore_from( + self, + calling_cls, + restore_path: str, + override_config_path: Optional[Union[OmegaConf, str]] = None, + map_location: Optional[torch.device] = None, + strict: bool = True, + return_config: bool = False, + trainer: Trainer = None, + ): + """ + Extends the restore_from method of the `NLPSaveRestoreConnector` so that PEFT params are inserted into the state_dict which is required when training a PEFT model from scratch. + """ + # Get path where the command is executed - the artifacts will be "retrieved" there + # (original .nemo behavior) + loaded_params = super().load_config_and_state_dict( + calling_cls, restore_path, override_config_path, map_location, strict, return_config, trainer, + ) + if not isinstance(loaded_params, tuple) or return_config is True: + return loaded_params + conf, instance, state_dict = loaded_params + state_dict = self.modify_state_dict(conf, state_dict) + + if ( + self.peft_model_nemo_path is None and self.peft_model_ckpt_dir is None + ): # we have this check only for training PEFT from scratch + peft_state_dict = instance.get_peft_state_dict() + state_dict.update(peft_state_dict) + self.load_instance_with_state_dict(instance, state_dict, strict) + logging.info(f'Model {instance.__class__.__name__} was successfully restored from {restore_path}.') + return instance + + class PipelineMixedPrecisionPlugin(NativeMixedPrecisionPlugin): """ Overrides PTL autocasting to not wrap training/val/test_step. We do this because we have the megatron-core fwd/bwd functions in training_step. diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py index cab5e84fda2f..c9790ee2a139 100644 --- a/nemo/core/optim/optimizer_with_main_params.py +++ b/nemo/core/optim/optimizer_with_main_params.py @@ -492,7 +492,8 @@ def get_parameters(self): params = [] for param_group in self.optimizer.param_groups: for param in param_group['params']: - params.append(param) + if param.requires_grad: # (@adithyare) added to enable pp>1 training for adapters + params.append(param) return params # Promote state so it can be retrieved or set via From 79fa33c2e44f4313ed55a3243dc8b16b6c29ad79 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 7 May 2023 16:38:13 -0600 Subject: [PATCH 071/512] cache and reuse inputs (#6422) (#6452) Co-authored-by: Sangkug Lym Co-authored-by: Eric Harper --- .../language_modeling/megatron/gpt_dataset.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py index a17ba6be3cb4..d2aa5182b716 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py @@ -321,6 +321,8 @@ def __init__( self.reset_position_ids = cfg.data.get('reset_position_ids', False) self.reset_attention_mask = cfg.data.get('reset_attention_mask', False) self.eod_mask_loss = cfg.data.get('eod_mask_loss', False) + self.create_inputs = any([self.reset_position_ids, self.reset_attention_mask, self.eod_mask_loss]) + self.cached_inputs = False self.eos_id = tokenizer.eos_id self.no_seqlen_plus_one_input_tokens = cfg.data.get('no_seqlen_plus_one_input_tokens', False) self.add_extra_token = 1 @@ -406,9 +408,19 @@ def __getitem__(self, idx): tokens = text labels = torch.roll(text, shifts=-1, dims=0) labels[-1] = -1 - attention_mask, loss_mask, position_ids = _create_ltor_masks_and_position_ids( - tokens, self.eos_id, self.reset_position_ids, self.reset_attention_mask, self.eod_mask_loss, - ) + if self.create_inputs or not self.cached_inputs: + attention_mask, loss_mask, position_ids = _create_ltor_masks_and_position_ids( + tokens, self.eos_id, self.reset_position_ids, self.reset_attention_mask, self.eod_mask_loss, + ) + if not self.create_inputs: + self.cached_attention_mask = attention_mask + self.cached_loss_mask = loss_mask + self.cached_position_ids = position_ids + self.cached_inputs = True + else: + attention_mask = self.cached_attention_mask + loss_mask = self.cached_loss_mask + position_ids = self.cached_position_ids loss_mask[labels == -1] = 0.0 tokens[tokens == -1] = 0 labels[labels == -1] = 0 From 4aa4aedc1a2cd0a3640413992eea65d41f2ec0e6 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Mon, 8 May 2023 09:19:37 -0700 Subject: [PATCH 072/512] Add patches for Virtual Parallel conversion (#6589) * Add patches for Virtual Parllel conversion Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../nlp/language_modeling/megatron_change_num_partitions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index a4b28fa4d761..558986e3da36 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -125,7 +125,7 @@ def set_virtual_parallel_rank_safely(rank: int): parallel_state.set_virtual_pipeline_model_parallel_rank(rank) if rank is None: - parallel_state.set_virtual_pipeline_model_parallel_world_size(0) + parallel_state.set_virtual_pipeline_model_parallel_world_size(None) except (ImportError, ModuleNotFoundError): logging.warning("`megatron-core` not installed, cannot set virtual parallel rank !") @@ -861,6 +861,10 @@ def main(): convert_vp = vp_size > 1 if convert_vp: + from megatron.core import parallel_state + + parallel_state.set_virtual_pipeline_model_parallel_world_size(vp_size) + hparams_filepath = args.hparams_file if hparams_filepath is None: logging.warning( From 4260e9f55ce434eb9b2a570200b304fd04f6e2ee Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 11:56:27 -0700 Subject: [PATCH 073/512] Pass `.scale` instead of scaler object to core (#6551) * pass .scale instead of scaler object to core (#6545) Signed-off-by: Abhinav Khattar Co-authored-by: Eric Harper * Update megatron_gpt_model.py Signed-off-by: Abhinav Khattar * scale changes for main Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar Co-authored-by: Eric Harper --- .../nlp/models/language_modeling/megatron_bert_model.py | 2 +- .../nlp/models/language_modeling/megatron_finetune_model.py | 2 +- .../nlp/models/language_modeling/megatron_gpt_model.py | 2 +- .../language_modeling/megatron_gpt_prompt_learning_model.py | 2 +- .../nlp/models/language_modeling/megatron_gpt_sft_model.py | 2 +- .../language_modeling/megatron_lm_encoder_decoder_model.py | 2 +- .../language_modeling/megatron_t5_prompt_learning_model.py | 2 +- .../nlp/models/machine_translation/megatron_nmt_model.py | 2 +- nemo/collections/nlp/parts/nlp_overrides.py | 3 --- 9 files changed, 8 insertions(+), 11 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index a7a22bb18150..cd50f8414470 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -309,7 +309,7 @@ def training_step(self, dataloader_iter, batch_idx): forward_only=False, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index fb58ec6a843b..b7a9fb476409 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -298,7 +298,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): tensor_shape=tensor_shape, decoder_seq_length=dec_seq_length, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 7159190fdec7..5cab67a71441 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -346,7 +346,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index 49cb078cd462..dd0d9168c16a 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -307,7 +307,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index a28b8216c207..7c3bddc9a08c 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -300,7 +300,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index b3ecc1b150ac..365b1870a2d5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -327,7 +327,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): tensor_shape=tensor_shape, decoder_seq_length=self.max_decoder_seq_length, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index edec760ec7e7..cef0f6ba8e0e 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -195,7 +195,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): tensor_shape=tensor_shape, decoder_seq_length=dec_seq_length, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py index ff1888c1c9ea..248a3c8e2ec0 100644 --- a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py +++ b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py @@ -314,7 +314,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): tensor_shape=tensor_shape, decoder_seq_length=decoder_seq_length, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index e1c0246bd8da..c1938e7dca41 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -568,9 +568,6 @@ def __init__( self.hysteresis = hysteresis self._hysteresis_tracker = self.hysteresis - def __call__(self, outputs): - return self.scale(outputs) - def _unscale_grads_(self, optimizer, *args): if getattr(optimizer, "_custom_amp_unscale_grads", False): return optimizer.unscale_grads(*args) From bbf0f7d17e2ba229da298a3fa355369a70bb7e90 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 13:19:23 -0700 Subject: [PATCH 074/512] Documentation for ASR-TTS models (#6594) (#6595) * Add docs about hybrid ASR-TTS models * Add docs about text-only datasets * Add docs about ASR-TTS checkpoints * Add docs about ASR-TTS configs and training * Clean up * ASR-TTS docs: add to api, fix imports * Clean up * Wrap optional import * Revert general ASR import --------- Signed-off-by: Vladimir Bataev Co-authored-by: Vladimir Bataev --- docs/source/asr/api.rst | 18 +++ docs/source/asr/configs.rst | 107 ++++++++++++++++++ docs/source/asr/datasets.rst | 22 +++- .../asr/images/hybrid_asr_tts_model.png | Bin 0 -> 112870 bytes docs/source/asr/models.rst | 27 +++++ docs/source/asr/results.rst | 11 ++ docs/source/tts/models.rst | 2 + nemo/collections/asr/data/text_to_text.py | 13 ++- .../asr/models/hybrid_asr_tts_models.py | 4 +- 9 files changed, 198 insertions(+), 6 deletions(-) create mode 100644 docs/source/asr/images/hybrid_asr_tts_model.png diff --git a/docs/source/asr/api.rst b/docs/source/asr/api.rst index 5735990dc82a..1e2073798d64 100644 --- a/docs/source/asr/api.rst +++ b/docs/source/asr/api.rst @@ -35,6 +35,11 @@ Model Classes :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact +.. autoclass:: nemo.collections.asr.models.hybrid_asr_tts_models.ASRWithTTSModel + :show-inheritance: + :members: from_asr_config, from_pretrained_models, save_asr_model_to, setup_training_data + + Modules ------- @@ -131,6 +136,19 @@ Character Encoding Datasets :show-inheritance: :members: + +Text-to-Text Datasets for Hybrid ASR-TTS models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: nemo.collections.asr.data.text_to_text.TextToTextDataset + :show-inheritance: + :members: + +.. autoclass:: nemo.collections.asr.data.text_to_text.TextToTextIterableDataset + :show-inheritance: + :members: + + Subword Encoding Datasets ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst index bd42ac45f9f2..fc48bc06b3ca 100644 --- a/docs/source/asr/configs.rst +++ b/docs/source/asr/configs.rst @@ -878,6 +878,113 @@ FastEmit Regularization is supported for the default Numba based WarpRNNT loss. Refer to the above paper for results and recommendations of ``fastemit_lambda``. +.. _Hybrid-ASR-TTS_model__Config: + +Hybrid ASR-TTS Model Configuration +---------------------------------- + +:ref:`Hybrid ASR-TTS model ` consists of three parts: + +* ASR model (``EncDecCTCModelBPE`` or ``EncDecRNNTBPEModel``) +* TTS Mel Spectrogram Generator (currently, only :ref:`FastPitch ` model is supported) +* Enhancer model (optional) + +Also, the config allows to specify :ref:`text-only dataset `. + +Main parts of the config: + +* ASR model + * ``asr_model_path``: path to the ASR model checkpoint (`.nemo`) file, loaded only once, then the config of the ASR model is stored in the ``asr_model`` field + * ``asr_model_type``: needed only when training from scratch, ``rnnt_bpe`` corresponds to ``EncDecRNNTBPEModel``, ``ctc_bpe`` to ``EncDecCTCModelBPE`` + * ``asr_model_fuse_bn``: fusing BatchNorm in the pretrained ASR model, can improve quality in finetuning scenario +* TTS model + * ``tts_model_path``: path to the pretrained TTS model checkpoint (`.nemo`) file, loaded only once, then the config of the model is stored in the ``tts_model`` field +* Enhancer model + * ``enhancer_model_path``: optional path to the enhancer model. Loaded only once, the config is stored in the ``enhancer_model`` field +* ``train_ds`` + * ``text_data``: properties related to text-only data + * ``manifest_filepath``: path (or paths) to :ref:`text-only dataset ` manifests + * ``speakers_filepath``: path (or paths) to the text file containing speaker ids for the multi-speaker TTS model (speakers are sampled randomly during training) + * ``min_words`` and ``max_words``: parameters to filter text-only manifests by the number of words + * ``tokenizer_workers``: number of workers for initial tokenization (when loading the data). ``num_CPUs / num_GPUs`` is a recommended value. + * ``asr_tts_sampling_technique``, ``asr_tts_sampling_temperature``, ``asr_tts_sampling_probabilities``: sampling parameters for text-only and audio-text data (if both specified). See parameters for ``nemo.collections.common.data.ConcatDataset`` + * all other components are similar to conventional ASR models +* ``validation_ds`` and ``test_ds`` correspond to the underlying ASR model + + +.. code-block:: yaml + + model: + sample_rate: 16000 + + # asr model + asr_model_path: ??? + asr_model: null + asr_model_type: null # rnnt_bpe or ctc_bpe, needed only if instantiating from config, otherwise type is auto inferred + asr_model_fuse_bn: false # only ConformerEncoder supported now, use false for other models + + # tts model + tts_model_path: ??? + tts_model: null + + # enhancer model + enhancer_model_path: null + enhancer_model: null + + train_ds: + text_data: + manifest_filepath: ??? + speakers_filepath: ??? + min_words: 1 + max_words: 45 # 45 - recommended value, ~16.7 sec for LibriSpeech + tokenizer_workers: 1 + asr_tts_sampling_technique: round-robin # random, round-robin, temperature + asr_tts_sampling_temperature: null + asr_tts_sampling_probabilities: null # [0.5,0.5] – ASR,TTS + manifest_filepath: ??? + batch_size: 16 # you may increase batch_size if your memory allows + # other params + +Finetuning +~~~~~~~~~~~ + +To finetune existing ASR model using text-only data use ``/examples/asr/asr_with_tts/speech_to_text_bpe_with_text_finetune.py`` script with the corresponding config ``/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml``. + +Please specify paths to all the required models (ASR, TTS, and Enhancer checkpoints), along with ``train_ds.text_data.manifest_filepath`` and ``train_ds.text_data.speakers_filepath``. + +.. code-block:: shell + + python speech_to_text_bpe_with_text_finetune.py \ + model.asr_model_path= \ + model.tts_model_path= \ + model.enhancer_model_path= \ + model.asr_model_fuse_bn= \ + model.train_ds.manifest_filepath= \ + model.train_ds.text_data.manifest_filepath= \ + model.train_ds.text_data.speakers_filepath= \ + model.train_ds.text_data.tokenizer_workers=4 \ + model.validation_ds.manifest_filepath= \ + model.train_ds.batch_size= + +Training from Scratch +~~~~~~~~~~~~~~~~~~~~~ + +To train ASR model from scratch using text-only data use ``/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py`` script with conventional ASR model config, e.g. ``/examples/asr/conf/conformer/conformer_ctc_bpe.yaml`` or ``/examples/asr/conf/conformer/conformer_transducer_bpe.yaml`` + +Please specify the ASR model type, paths to the TTS model, and (optional) enhancer, along with text-only data-related fields. + +.. code-block:: shell + + python speech_to_text_bpe_with_text.py \ + ++asr_model_type= \ + ++tts_model_path= \ + ++enhancer_model_path= \ + ++model.train_ds.text_data.manifest_filepath= \ + ++model.train_ds.text_data.speakers_filepath= \ + ++model.train_ds.text_data.min_words=1 \ + ++model.train_ds.text_data.max_words=45 \ + ++model.train_ds.text_data.tokenizer_workers=4 + Fine-tuning Configurations -------------------------- diff --git a/docs/source/asr/datasets.rst b/docs/source/asr/datasets.rst index b55e49ad1c8f..5f74510bd054 100644 --- a/docs/source/asr/datasets.rst +++ b/docs/source/asr/datasets.rst @@ -481,4 +481,24 @@ An example using an AIS cluster at ``hostname:port`` with a tarred dataset for t model.train_ds.tarred_audio_filepaths=ais://train_bucket/audio__OP_0..511_CL_.tar \ ++model.train_ds.defer_setup=true \ mode.validation_ds.manifest_filepath=ais://validation_bucket/validation_manifest.json \ - ++model.validation_ds.defer_setup=true \ No newline at end of file + ++model.validation_ds.defer_setup=true + + +.. _Hybrid-ASR-TTS_model__Text-Only-Data: + +Preparing Text-Only Data for Hybrid ASR-TTS Models +-------------------------------------------------- + +:ref:`Hybrid ASR-TTS models ` require a text-only dataset for training the ASR model. +Each record in the dataset (in ``.json`` file) should contain the following fields: + +* ``text``: text to use as a target for the ASR model +* ``tts_text`` or/and ``tts_text_normalized``: text to use as a source for TTS model. ``tts_text_normalized`` should contain normalized text for TTS model. If there is no such field, ``tts_text`` will be used after normalization using the normalizer from the TTS model. It is highly recommended to normalize the text and create ``tts_text_normalized`` field manually, since current normalizers are unsuitable for processing a large amount of text on the fly. + +**Example record:** + +.. code-block:: json + + {"text": "target for one hundred billion parameters asr model", + "tts_text": "Target for 100B parameters ASR model.", + "tts_text_normalized": "Target for one hundred billion parameters ASR model."} diff --git a/docs/source/asr/images/hybrid_asr_tts_model.png b/docs/source/asr/images/hybrid_asr_tts_model.png new file mode 100644 index 0000000000000000000000000000000000000000..458c47b8b7723aeb1244511d2949a7c8110c131a GIT binary patch literal 112870 zcmeGEc{r5s8$OOdGseE}`@V$ivSyj6$ex6d#$FM!@6A}E63R|OMI^gKwvjbES(7EZ ztRXvn?@9H3{eGW+zsL8l&v8s0bBt%^d7kUOmh(EV^L`!}=xLCXu#i9yM1D#0ydeZ3 zU=W1YO+)}bdDq+rK7gINrLJz^sIH;z;^g9X#nsBj?wqZQwVk1cnxuk)JOl|ou|c5> zHAMtlt!PlF*3Q#HByQe@5fPDwsC!-5Hf%jt2evsQHOazal8I`v0YXTy&<9jCV~Jjf zF9mmejB6pSFf@D=i`nAj>e3Sv6t+U4I-rvHc5M>mG6$DApAc(@C1lMAV<)n8ffaw| zUZzFKP;IjprNnSKa2?>qbS5I~r7_D>+Tlg`NT-|{F0O^7r|EHTppBaQnORw30uO?? zh_nn(Yaz?rgU;Dqw)eaEAedL;l_9;-=F^`Zbdh9{k<3iUZxol06sXvqM_M~jSrehD zQhDpKlXYVEm7tItNL@lhP z&%N#(1}rCUXL`wAR~Hfn?};E-_$>$l-T})(EZ`pm!8720KS97U@czEX>&E>Nwy84+ zK}hJ*`Ey1-u;nzuXLRpITR7W#{nvD{TJ%&!21W)cf;p|6)cB+*mtj&bBPY~3#APS( z3+FF9;SsEPqe&ez?!PvhcB6}IWO;dfd}Vwr;l|;ltyHqkM(0<(=cn|a+bXOethit_ z|K}^hAAW7RHq42OI0pJZUTB_}G&Vzx$^P6cYSRz;C2gE(xG;?ZG5p#LpHku#=Np4a zhxZMLpDFoD^}p@Vfb${Ye3Mj@^1MF_&wV@%+g=$xrJW%1M8^_UpVdSoG?9TB?lZ02yNRbw#Dpv=is(emjE+uUeG_N& z#^{o;@ms<8(4em7^HDJ(B%(||c;vpdtT7ubxSrR{VqWfaw}rXNd(C*7 zEK*XwE8>xu126b#DGWphW3rj)NWSt|S|X}vIj z9s(QgHTd?Ll>3~{wIUQo#H>tnFyEX}xu!nl#6E&>ySBI|q+%7w@BO*Fk6GmsEAxn&rDW_jJMR(A9Kfyp;+q^KIr+?0~$@RO5pZx7oHL zlID_Rr;(Dds{}3rzymPn`CP%DibTq}=j&;Hw4E+AnvN9EP0@U=0N}j!%w^sROzuieEG|J7fx?u z_}xqdHp6XsxLC@zl?o$py!&JNdEmpYGrB6b2jAwZwDo`RpR(q`g@P>!%+$&1XzRwk zBmPo{ZsWND_xY}?oSuawR@+-8?P9U!E;UyJ-%ek1UmFO@&QgVu2})KPd7(Q!39^+UHp55;Hw-TEc3f0x!g~2yhwUa%{ZuY8))$Ur$yj)9s zLX5LL5`I}1`VuIJ2-=%vwDTj+LJw`gMW`40b7Qww$IOXQm&XK+#l91x&=Gf=hi=>x zl{k8jM`u8_Uwj^anvCf9S?7D9AJ1?;`XU{ogFe63_ikancf9v{ek0hr>EWeBvFrWv zcnp@sEY^m^XwT!LgPlaqpxt^GDx!K$y;mrnFPZ9uc5aM$X$^j{zY~cMEgDZY$=1ee z8=Q{5@Xi3Ld1o?&vTeZRMrX3AOQgZW9g!|JrqS8PJ?fgOBCx3=@j3V z5uaR)bvl$F9K|A|C3+NGYBE~L7Kgp_rR?%F*)COyYUO;~$bIq;TJged zbYFEj$4`)4tu%5c-P@ z&ZamPTvC#7x@!QR2;50i+U$_N9hj3$y@a&7=F_dbLqy<9cCw-mw4pZZ)~vPKgMxrx zM>|0bi^E;JS&5d0Znw&=JiN^hRm-Uyt~azTrW4axdVr)Swln6P?zZe;Y4iKy!D7lf zf6a-5aR#hoCd^i|byM4b%ksZ-91_SuoGJnt%C1epu8oe9W?i(m)Ip#BFj3TeaGpgU zd=vMAY8S_S=h^pWV)gqAGz6&SDFdb-%k{6i+xwkqdI|g>KNIzVSoD!XY{<&tYW1d` z?{aa=E2CV)-o_`M+`p?$gc^KUmO;gKU0IU7s{Sci_E=%fb_(i|?EOX_bpFfzt;LrK zF-a5x+=mSxsA~C07{#8M8VsBLE>svJ+TjwrN7eh^^EP@lI3HB5aEW|cSN1*Lo5weA zx@aDu$d9eKJN?R}(DHWoO>%9@K~(d#4<5JDI(Yw7ve5I88mG<{0T~n5u(5GC4Zl|F zs7sxw+?YqK-jrk)alp?36E|Sm_VPQUUlZn9pQx|a9yq_-iqYa`B;`F>1#X(*YTr^b zH$>F5GpeQQhDI&+Skg`FK*=qnQAJF$(}UyTF~33r7!RD-rlGipT&+aGvfcbR%!w0 z9F?(yMa9Ks>A&)>Z6~s_Ks-vaG4OLwwD;G$;onX2A>_T66TQETpvI%qSc0b@F(9+4 zuu|%+r7@14=xWkR#treA$quq)dt!88+F8%r?W|3NF5GdbAfAF@8AZ25!Cje%KusxDhmOL6)PHejsP+~v+}M)->shvEV6Bxg-rPIi z?IL2rQ)TZ6$3#mj+Ag?3%L8PE9e$$-d8^>hp{ENfcwUZn&ObSUcj!~Fo3m%VmIapv z^W6(=^!H3GeJ_;Qewxabq`8jJOIOJsyjHvY#nHJ**eE9>!LvWlr2Buyl#0bRKA??X zJNwy$+$$GTml8?BoQ=RjZ*J5DIahTl%(LzNXyMEPso0t2k5BSZk^e65ps>Y)JF6I2 z9L!Jsa!~?6HE>MtIfx>`Z)<@Ymt@T|3CLMmhl_2pY(G8y|M>wBZdnP0SlXpOFrF0^l&aP0;dK>y$p-lJ8ED#fp)vJ*z@u;R;^8nsE=%J(lhg06pL>VNW2f; zYX#fi2FNiFW&v2pa{cyE@qxx4C>@~)2QUbSTQs|$E02Bn#s(LlhlxGad7QzCI}Ll! z4S9`Ul7Fx@vWy0;L{`bPQA`0)PHl;kw#xo6;Nwk z$W5}GB=p(ah8S*jgqw%oeWz{(Ze$+)nkw(;u9_XE~Rh$J)Pt^Xyfj| z_KF=y603vdu7&}->sIx;6vQ*M%9Yqm$hKl1Pq^+ZnrS_fugub8z zyjdM~Kc1`)>3ebEQIezfTN5VfV*oBPN6F-`ynthy>WNOGF_Id)s-AEDK4BowgtVEF zh{*-zI10e@g=!h<33lj!&H8%CI04n&B>A3Hx5k4gd|bi-I1UbzU%`B;q`=j|ufQMn z<61lfpl=gfpL_C5#F^z?QHrYZO(%&r2Dwj*HGHp^rueD#3GYAyiXXlEZpf0gH(bMr z7%B||NI55&roSY2dWEWsVgZlqMPT#ClX@xj1k&ksss3?sh?za^t7&4;z*Q4F>wKE& zUe#ZRr67Sd|7>x^fnT|a0Cnw##)Y8rr#M%o3C4mx=oPTu?L^iyOi|9oY{fxG`_zv2 z8snhR_b!PbKegQ_XEmC|1=zb4m^UoK^hPn)Ns$!fcAaUZuHQgkE49yMp%*sNoa;;#{N@L6G_4xHpSHj~-`pi%_A2NL=~XBxTg-Nd>QE(_D+n zk~b`f&mb39B)Ik(PnZ@`KI6p{8bj!_yH{XTOUmv0e{dl+_PpZEt=0jen9^b$$jzHi z?R*@hulzGk=eg3g0m3DQ+WqgRa}Be!_`bJDom`+MmoPDsRn>3nrU_LTFQbQ6vR}}> zqo0X!;x|;Xzfe7nZ`y@Vlhu%A`0uRVZvS@k?dhfSe1>*BCUdRAHFKsjRW8!d_w*ll)X0GWsP)og78!&_Xjj_Vc3_<+bN65Q zD&TOH1wQmVikV6n>ibGr{p2L4-X}&%G_}b4Z!f>0r?LnqW2R{kZ99Auk`C5+Y5iM6 zF4TJHY(;)@VW8=rak_IiqQ!=XW5dTxJp~}<9}5F{i3c5{&Q-cAqe}u3YJcvV_+rMn z5Q4tf`ke0vaA?VC?8AVv1D^+ck|0PCeR@~w?=rlgx%oN!F>-Yfwjn>WQ5Jeel+b7Q zTlj3J+=Nkgh%x+t(V7*H;7{h%w1sO#b6j;CP87dUAQ9?VkeDMjJU|UiA3So;U+Hsf4ADzwCPs+ zdkQYtWlR558OOO7Nh*OK&NzP^2A3_5VV1UUZDPE!Y`_0==tkD9&)KUAf9qjPE27A; zOIL2}6%`*p|0e)O@Pph9X$@=GAY;f32DNAyL9YnpHcygQ<5?KdTi0iK4S1AFT&`{{ zF;XyBxHvoZ2??Yd^0x|6@!wOY&_G*I>?ytDzM25_G%pd_ccxrr5HaO?cY8or8V}!a~2RmZ)ROzIht{2hw_3G>8tF$GUs?kV*s0+Lj6Tt7VzPQq5ck4a1ytT znM*s0yvGQ=XY$;jxq0RfJIsMx@BUkNF<9%j>{#R#J#T6J2cb^H5Z}UV=pG;S}-CV;}g%V(83{7hk8h<-Z?) z<3@klSjV`~Qgm-)R{NWnzQG%#T%?^;+8^Ur{#N8gfd^hq)9vDQ9?JVPJ|h`DQAAPJDdu&CGM%acW(dBx7yq-&qFwk3Z=9;~GV{%Q0_hk8DG(|+qU*2g?3akc?kbpTtoIOn^WXN|HQ#AW`rAXkno(x?nLoms^W>oz|f~1xUX7;k$!q5?;f;rq2;R37Nb+74y&ERZjy>!|VM3?+z zpP#w)^o+~6QkInO|E$YZmo!ZT?l*mua-70+z3PRqakR6j%8qHF*8?|MxkxL&JUg7upA zOz|bFU_Cfa5r;#xEFpLK@OXtH9W*w22DOPRPPCyC*) zKhPB;4n^z!%|H<}60=@cMx0d}-4!4m`r-?e&wio;VTthJVV~bb3-GzzQ?Bci=KyA@ zU{eV&9jgWiDzvOD|7-08_wE&fj<}%I44*#94xWay7k| zuxt)V6XvDm&$ZCRv;q~rW@R_O{l`W`(dfOq&*ksF*qCCqd{FEuSIwZGa^_Qh*{23w z+rPP2lS>z&vo{vE)cNr+0FH9R0!aPl9PuFa@>rFLt1WKTe!n8fFj97HtpF-swZc*K zfK`{1!f!zefie)k*>zTbpv+^juLKk>rW~2Pzd*nE9MlexFo-Pd?~GQuzn(D01SLQK z#=AN2e;>@(2IH758OKY%OFET0f<@-`%%B%2vcNNV?EQ5q9`tyXp8yFoqAwK^Sm$ox z#qLvV)UY4ML5K3A5&stR34g}Uj|;id_8k#U-!5CGj9A91$C;!K{K-j}ByN=Oi0+=H z99LD;!gMe`sfQ7ZtkXjCv}o{3a*@nGnw18f>vMd#_p;Q@YOw9FHf3T=;?d&=qn+C+ zY}PL}a&u+?kvYcCnOpJr+DSf;ih#=BG|9gE!PK#qw>LI$=jRnTZU}HAHbutq!w;WF zNZ9&Zs!OwZ=`hD{u*nKn_S=ft|GCtn>)&y|Kj(6Tauds5>?wV%a;K56Wj>EH(ry+~ z2oR<*h_`@R^9i&$(^(2mggBRIWXFVa_-O(#!r{H57NVmt9=GOc6PSRvJo;WAj2&JV zfWX#%W1RnH^T*#M;G^P(`+Md715)w8kc;>7aed2z}=TX2}-6pXZ9Y4Qj zmF-|sF<1JvM~vc8qa#GIZxp&B98N=}oe}fxiTa?uw>!0Yl5V&{h@FOu>aSjOSbr(K zbvI_3yIb6%!a$kP*oX1D#}jixPJZn7fxI!&fj3v(r+;u@;2GX)<3GlA%A;{b@x^m2 zz#(>EjV|Bl)->ul>`IxK(2Qfxw@X^l;d5*#?N%}W;IT-PKlafNR}+yRsc`iKD=bP4 zZ)P?lMz0m-BG#`_6nU+fC2D{cWla7r&cfZjL9QYHd*tT9l0dVSEkNtec^%tT49i`?h+-WOkZ#NavopNmRB7F#!cg?q1jnQfj{u#S@M*lux89g+PLyRBsKIq$*4 z;}>E^aNCQ~>&Yi8NUWcs`GiLd?_he_GNLphcX`rQ{h6m&s`^PhUPW}Qy;bg7zYJfW zX1jXS+;(!kjglDPPnz4ypHjDQIYRHb5_3yF{eVgtEw_-+rH%P+&Y=~%>A$r|nm8$z z9Nf9$o*;~ZQ)PKjTK~C{^S@?_3)4y~QTGmv_Z}SLy~sIV8VJ~ts23^4!hcdO)jdRq z1mV9WskN0q`wyk9joDEq{h~{%Lc=}FEz%@l2*KE|5s@um|!9b zp%&4oXh{oxZ0?oUxOyiXYxy!qZ&&q?fw?dq+~{{d6xmdREHtJ!e5M+S-(R<{8asdS zU$W=XLbm^m+2meZniPhW!I3v6|0cG7hv43D)e`Yl@kbTPHr;Uqt?`UQ8>abVgU8_8 z0+mzFqnwW9A+OiKC?(wDPDf-tCR}!uc&{1JNmMFusGGz+c+4;Wfe4@?MK^y>FQ-DG&1^*|5%l#s)READ&fv)2yY z&)pixVp^D=t>a-G0n?#X7s=~*_ zNixSh1)I614p`>$?~I#T7tX)ctI(P+}b%;2wx&6_6>q9UQ{XHn%z z-13-YX@@p27&_JSfyb6kx)7tWp5k|dUFLQi*6rID$HJdoF{oS#=0h+WQ!)oc&F${3 zn>u9^lm|6sgy*+^-gKfpAqRTTqitT7?OePVown>^{l)Qlz^Me|=YiWJ-xz3n528UO z7_0JTtWG34EYP46C)MVM3BwGDMM_Hu_wUY7YEE)sdMLkfa!Rom$-V^>z5O%SqqC_9 zrSB!ZaimPMq-_4;k`!Sh>kkmBN-1_CVDoXAZo@sJhyPmNmA3dK3n9wz(asF>j9RS0C>O6{vEnH^@5rQtsF2ZH`B4`@N@-kz0jB;bf)lRJWNAiLtf; zpil=fN_{6BS}p9Skbg*jS|HKF?ScMLCXwcrM8K{qMkA2@OW7@6)6v|9=Eto34BKYm z!L7^P{k5DQYLre|eS`q&-lHe6Hm|YOQvS1c3S%BNJp=m1udN^^ESyK)8dn#PlfWN@*m1ILC=yH*1R%;x!QVxuYDhvbe?I zDJX7Q^*M#H`=?*|^;He!4#%n?ta#l`-B_C#2QUZT_7D7|^XkBMZ_4xyD&Ok~6z-z7_JVsjnrd=YuK_i&9XtnA8 z;!5XQP3`j$n^V{24sM?A$~>gBi_C4W_hzyoIt+nx>`bv<19F9(s<)e8G&(Q7PW|eU z;#;<9a~Lwuo0I?*QH9}iZD`znW;+;imqzme6b}heKMY=?;+xcIq_7Oed)6{SftUcD zG(Q^*ya_t|ZsX}uLu2=aTGkv_GT#yV?BPf?;KI;XBXtRmWe^za@ogVNK z#|+wL>H@mjE6nY>N_oT&sgUMqZ!k#rklon@vJ1_I`{V-()xTcWVm@k%*YNFzbN(o- z-Ph8J6U;2J&Iifs=t3T&SYD?r-kdHQY zChq0&339C$Bz4Pd-e>s*3IHOA)?;C~_$8f~g(>l{BDsbLvF0UWs66K-#jtY`Aece; z;#d>X;~=VxnqT`%h4I!+T@kY+yc$tPznrqj!VXPFFEt4PKs-{YRA zNycHoICJB@bK1lN&G;scL@#FmR5fGE@7&R-42w#8S|y**tcS7GI!IlVIsW;9vF&aE zrzAnWTptC~FH84M8813pRu&9r#krdnOM zS)k`KJ=coCLfl^{wxh)hUWy#{oSP(}TW@&8*$DcCN6LAR{ZV5mN1Z2t7{AMQ7;K^( zb7?*tV$gx17>Lew(Rr$HvCYri~q+Sp)1Ye3L=n1!W!I!KW+fdhNG)`ZCDhQG1V#L`ttqG5?# zHEiutCV_|vTH#9fK65k1xjYZqOFUP*1)O|Y4;79yCfr4SM;}*yyU)6HDZ8+ z|Cy*XkFn~gIXUW~l^=zHlmd7tUoc!=&u=-e8BW1I^F6O{o0+rT3_j>>s%A|n&)Ha^ zMd)d;G^`pV*mwvV3M1H`TwJOkRJAvyoPC$B7}Qvl&7^8!zrK%NSfa9ApT_4V{COrV zZBZ#!K-aLHe6j&cW;y%O8cUaC*yciFo8zBT&#>_WCJ_=iR-It@b+s@~#rT8L+u87P z=D9IH%4-%4;Y;;|oc$yH)G78NArgrXuur(&O6&Zhpvtg%g2|$Y;6>Rpou%X&{>V>9 z<%LxB?UeNwf=x!=-CiDXrZyd<&QTrH5ju~PHF2ght=qshmDvqc=-^+ZxZi0(;u9<% zoY+>%~H@7+{zLQ_*XLYI0+-IFPeU28DP7mC^_U!kt0ck;7RM&J6Qb!M?Yt1PwB;-U9D(1HFkJ;h|vk7@p@WgERQ1_!v6)|z1&QupfkP#l}gAvsG}7s znRVvf?ETE9ysGRwlD_f2eW^n#$R@$s$=3BysF#VK zbdsembVl`T_3$;x1G1o+dU9B(Qp4ufGTj1uBjQ2smL6C@my%mB^qoZRh|#Dr?g$%V z&V>B$E_~(OTZV8{38^unDEmBCnaMq48FYvHrrJ~HWO?vA`8lI@QRFqv>x!Jwd@+|1 z6>MBDHDCWwV8S}WIz>J*oQEIvOmh&$bgTZV?k1CDYxKoG&>&^N%SOIftB*cq|Dw}x zl8_-dN`Wc~b9Vq&FLUxeBJ!$MmFcRNUPR z=de3h4CfEoqf}FRMs1yN@O~m_(+J8?4&2|mOtH7DcrGb4SHXVcM?J>o?3IPG%~VcR zq9aA|sccGUlm_zDIP#Eqx8OH-{YD549;Z5cMx12(;9{=Al#TNs@t1%?Z$>SMef>R^ zW@fNbc~j&b#iCS)P0=9=x>HMGpoAQGwfEg4aLdi%y^f{bvl0h*N0g^2&0!ho z;rn0R+C}$`*nh14=01>0v^Z;f|F(9Z&LwJ>aYDO#hxEWXfqFwKW7r&1Yt6!W30fDi zYmLLOSR%{J)LCg#J@hx1PtQC)CXvM2l=C{6e){-OpK!CP;Ij`OCQ<;YjgAy}-Znp? z8no|qK*$%vXwfmKZW`X>d>-OpWA!Af$USDVB(-0kzSf^Le)6X_;k>pA;B}#D`|Zo5CSnu3;teU5-hS9p2e^ zhUj|)pzY!XS+!ZeBf*z94>Wp?F}=}sdvmGE_n;b`rOf)WusGzbU$XY1!*_ndeKw6z z6Z8-v%Q3!FsrU8!Cd#Njm(!v#6Q>J1wG&wvU*2gW@BTTrLDFGpb4OOEJ0nvL525Jf zPZ)OLHe3ZMNI5_ zQ}~chU2PQaMp8Z9y61;$s{Z1(pFvY|gd3f^kouH~x;uqgU|D^CThuxZ&<>y29fI^d zOfF6jUmEIMy1A2e#4fvr@yudI@ymhvkMt*TXR*SD3z4bIl3}&PwXiZ8=vKD$oq*_K z-S;L%2i!fT%}bWqPmq4p>+Damhpw19rx5!Pw{JV~k-Wc$iXp&=#&FsG%HdHB+H#5& zDs+>KWpmOkPK?1Qrq<_KC~*cqHUZAak38opGvvQi5*&EO=gp73cHzk@9>AlEFO2vUdm9&$~E^Z#Hb*$Z(JJU_2?q9&7jX; zC(005j%u?53cEg>LZDD1{2764Hu^EE)hEon4}Q&a{Trm^x*2<7S!H6Bvq@)_lh)Yi z6xUTV@y3f$_ZV9p#IF<)!VMOZJmn_L%X-WD(T03IQqc8NY%6sd2QIxG?I4c=rWy}+ zU>+%Z7l4)0Id7dEPy6UJ59MWEx*u~sGD%Y$6Z$%SN#8;!RV4J)9}=32k#Isxuail9 z=}2xMhX;fVBqlTjt(Nm2x5i0$mHKPM807!Vo)|+FOe$JEmile4I&7Y@E>FwTHfdBJT#rOAS|rjH{*-;stQ(S zSS%)u~-bD9%(14j&pZZvi(fq=tBw-R$_ka;RIQBUyF{ziFg=;bGsTQZJXD~ zwX_-wUYwzJG(+RO&IE@&2Q{45c6Urll)uyOmF?x5b6|ca^%R6D^QE)#-6U5t!m#!Q zdxO!KA=g6a$|?7Ym**w%DPE1hA9T&z1s4{co7^&^bs-Tc51|VzOmxE=DpF1pVp=@R zMrMC@;$~Y+N~S35Ku$2`zB~#jR7~ksO9i7jen7ci{gmYU3rKq7+!dtLnOjsg%i+-I zT>UK$`@Q03Hi60gh&xkT4Dy1fl4zhcnqw>Tb1RJm?PrQ~VD(1Ai=M7IkDiMXF2(6g zT|Ia1nDDc*c`7Gg)jU5@hY#aQbjg_N_E{H3*?Y$U>3q_4<&!$CSQkK?iAa z4y3N>d&f8NAB$1qJuwJA+PNuCNt8u4hvUXErrg-#D{lHteZxXMRq-h<%c;6O9$vKsfQjc&uvH%ABe6{ zonxt<{8Cz@IW+o>+xfnMYRq6?@J37@8vzQPL`K@zci^AKR%btPd@md+)6n0o8H_+ooyn()rEbF_Aa!Dl(JzZNr zQZ-H1W_4K7`3wiw5qt+mrY2ssVxd{`{8de`|k6I=kCM@|qzOL+I%R~H18kn$;*;T$N z6^^+Ub;QfG5aKZZ+0JJ++!f{g>wdN6Ud*apO0);UX>pEThjMnigKxuzVDD{d*G3gR zS+$C-ss9SU$A}sSNk37cWG&#=v#?ys%*iYavN4rcG=O9km!Sb#r$*s zezQR7&dAhY$-Wu{(%;?(Y@1POM>+%5&3jES=%v%|dFYr&V@ek4h>sp4(z&A&M=4&j z^)y{w`Mugj1OwfiSD*xMSv)jXMfi)h{;k>i<(8*R>`q~rJ5o9M+#W#(M30C|?|AvP zx*T_o_;XLYG?HcO6G)FmHrV!VJ-J*L6v%lYDmn4)??E0R&T{$@jcvn1!)F~%!-5~r z7mY?ScdWfnc5W`dF5Rreq;D;PXbGCL4exriwh*hYvZy!6#s1Jo-ufbX0AGYqSEJ{9 z@TO`Nd1e&(&m*O$ze~l{lTtyH1jmSB9lb`5G3P7`13r)y9#Y9?5J{+KP%Z)$lCtix zRkO0=phzhZeT%X8+;Ztk4p}mn#TPwd$m!4J#)j9&M? zkRaR{VO2Uw@Dvgfi}wfcwZgskR;t}zicGC9E~>J2m!UIN)rxYz#}BHT z)hzeUoGv_MJ9|sc=!-_GjM5(^F7MC`OOBG9>moCAzny&!N;&IYkQ&PA z;$!xcW+vv-qY#DZMTVL4MHHism57ZD(fxE^eY2^Q@YFUw%^Qn{z2P5GGwVg?XYXxI8|n=9OYqtnyf zv@1A^D3ieb6Y;8K&GM3z4sFm@l?mDvH;_5l%PcyeEu|2UBjc)O^PL)7ny9PmHdgZ~ zIFd{3I|!LieaIs+LS##hF0VP>U%I`dId)Odqv+y^dVeY&l$c^UFOn^>k8YGU%Yhd}6?ozVV$X^wh&>GgBJHitc70UIY(HJ@YYZl&qcep*^yN~LvliKUFg zaj^x~LM>wc+kuO)l+p2S6wGHUqBfb6K;y;q&pphKaSmu`%n14POl%Z*3=@4EA@ zS5Qljnz~bxi+8O4Ob9IB^8v(0-0Psa@USG5w06FQ+YF8110gtEMBf?B37{nS87w zr4Ggd9iQo@bHsJ#-qBtX$EW#TFhQ$C8<;)8Uq(DAg4;J^hjBvv^?{MsNc4!L^=q!2G zZC>GdiAWsnAy`f?9fIU?kp>qEw_Z za*#w*Bgg!Gf!U4t0I#`UUaVX69hTPDuWG@9BTbmM_gzZae8yT-KU1fUUEJig8fVpF zW9wMC#TE0VbWSKfLs*IY$KZ;$l$UU!LR0i8SQwmp#1r_TpFpw*6jv=hAIq(CO>7MaLa`sS8CB<43M#h_uM{ z(H&nV5Rkxa*5K4W`^v`mg@OXcMX20UHh#4TG=peMKlE`6le&IJ(u8%~0~gst3RD34 zAR`0Ch3VkI&O*r-RKn)Js0Jxk9dVP!iN_$9;yFJ?h;sSNh8P5MA`hvu=`z??fGj!v z7X7MY->U~SPYs3fo?myVT9K`DqG~Pbz*0~J138k-se@;!r7$nnPDd5FFjwn)yXt$U zzVxJykTl9s^-4xKv0_J02Ri?Ih6p-@mFShCYJen;;fFm)Ee8W&>m^hfaw;_l#uYHCeV(gqbWL0Yz788AflRfr zLNE8C@cra~>VqG*ttY(%ks#vQmO~G@CfywDTsi{a9!v}DAKX+Jq_t-F=z&H&f=mey zCw?8Jn;F_rD8^87Jo+Km@_U!!4?XN5yUghAdoSU$)5D)$c?mcN;ob3?dnfH3%BkJ% zo`DQU-7!<18l}Kj4s|z&_2gXUwNr42c3jtGSMn*+=SAr{)M>Q3wu`>soe6P)lyfmZ zgaw>C&?||O&M#e{Sd!`Fb2t_)gX5$0Pp9nU0Y@)|{(ui`g@?Nf$)#DXOS7hg!K6VF z#TQ7z=K|GkABc5;roAQyul_kuZrcLAuw^d@eowCm< zPonK8n@?m3C8%4E5AUS>gTwi-SSq7913N=@` zy5xE`*)MjJRgd!TkG^x9f7v0oX*ES^bIlY=((;_apq*XtiAeDxyQ_nhso^&WidxmN zJ{0YUm15!oUm>~}z+5t4;Cl2j-*CKmo$z%SWk=lNNMqz1&5Npp6rEcPPE;0>D0@u- zsGj&^4EtnGVc@~Gv((^OuwTdCWET^H!V`4@w85}x)#Dw??ET%#XB<#ht|TNgs?oEN zzp$uo?`c$%=+lCvUEVhL$`*y=DG8FUe+zMI_Tpg5ThZ?Jv$@Z~d0T`TY5|+NPtO|4 zFRx#Dc#L%+jk^ZM{M(YOI?>yx2c*x#vjj~^g1y;DC3wQ`uGlVY|M7gzF@7=++Dn zmCsB|^KdYKOZ%LV%e47@XSE~FMyWjIjutIU{4_*|E|R7F`z!$4Zc8=O7BkyJZAUEH zfRg01+5Mrwq>*TE{ROHGiI`^9`~B4D7qn}oM|!Ud=qA2W9e!R4+z#wZmKGG9>^b3* zG`ZY~!Gxjmu;zKxY=Nm+$_Px2k&bG+Z?vO;F+F`4M`IQrOqBlIRur)fj@OhQpHSv( zJpWe*12$9O0a)C0?o9>ySMD;R-Q=|O& zTIIw_+{g*KiK#;UhTGJGGX8AUK$&Y#L97 zH7a)!)j_dkeVg{1!e7}WL>%F|-1r`+rx_$NAf|FuPU!kMR&n>`JT1>6vU3ymSCEP; z!AyLhizM@6$!GXg_;yKAa&?zqUX;4sFL-vuh4X41na&>}7w&ij?s&MDVyX^Y zQV(PfrZwzVP7oNK_PsPsIMFix&Rlmd0sTeAva#$##8d)6MvZrdO052U+@Ed(?o*sm;gagyXuiQ?(-%Na=6NR_vECXP%af(fohajF*FCiPWy9GnS*EGVdW*;{X) z$Uy#QFr4180(b>cBG(#jKl}gR3&X?>+$qw7F7H2X`p=TzaQM(7xccwLkN@h@tV03M zB-wZQt-$|6?B8iafdivmi8q4&j}wA)K#g-mO^xD|f9Lt%8!9mn#UJ@w5B^E(|94KX zy$}R0FMHGfy~+Q(lK%(S9va>{&qkW(dZt zeSda*;4%;=oQV_l4E;mxc07#gSW=~49)Fr1u#cF9Qy?8}?NB>v!g0@tSsbsa0>T{e zK2Qc?;$UXGh1aXf`#Rp=88r@^d~mW!`}gjC9k>AmelR&8l2z?8!Od@+(=?I#5_^1d6dz^2&i1ByWA3^t!Iu!^EYFL7ar zw!RN&tL#qV)iF4!6-5@8SFNRxr6{Poew;D;Ho~@_FfYQo>mll z`{{A?7$^c~wM~nyF94;K?!IcAKFHXb^sq?ATA&p{?;u!Qww6b(jNkipgx*<%t^!K;pH2nQp30V}K}gLHAEq%%wQ?@7MN!ZK(dY_~2nAwJ-^1P2CJ} zbbpVvB~1hcP7kG_TExIdC51r_LYDa82Op9A-FSpXz^OGqv0$sCjUfT@=1W{&xEy{q zuqg#BMGT80*sQpo^}j7V`25Y1X>I@!w01xec$! z9UF7|S@mB)p^4?9>R>?|Lj-~sFL4pzJSQZrSvVFVLX?8vfF}NK`sKp7o_vIqxAhe# zwp9UFAXP_cibtar6~xvoF+Dd@%5j^Ha81_qo>MjM&34ZP1=$7 zWb3l#Ua#`9hES*f-Cy5hbfOHD5A~7DkqThQctK>`&zuX<2eq|*ao=bu(3S!PN{bXIZlzG%ic4{Khf>@%NDHO7yQEO8Sb!kK z-QC??13>~IfhWE9{@$7QnJ055exU0>w;+xXAN15nu@pRlar7fC`$V~t&=n|Fzm*bub z4I5qywLU2nB7f++o6@4O-hXeuor&nL%f=}XZ_RRbo--GSr!(jkPE(-?e*RdTM)l9I zw0g~QL4uIU!t+y8QX)BFj4%Rw6ufFB)0(wI?EGAqtL%Lpo}=k_8cX_S2xk8SarD^=Uu4r zPdZ-Y8M39cfp1ktF1$rqw4O+f!bEt}90$;VnqQspEWkyZfTFxjDxO#v=f4LwyxT^W=_R(Hj}yHS=gbxbDDZ+1%5{jlZ&})f_crgX zM^fJU;3s)l*C@!1E@LOlE2NYyyp6p0mpb$IeK0DV#jltjBfy>Rw|&+@>FNx5w1+5; z#QHkA>VRJ$RY0nKeSAc3$ID^lvhglK6-zl=nBnyUInB~6VzXw+D}g4Hn>i7rYG#ua zzow_3xI}JanacL?beRZ`=(XbCwF_zR6e?mCzQvg3(rYGA69`1rjjo7dgI6F671FU8 zJY~PgpVo88cb~C)t4P||4t`#u!GbPaOyTtiR7(2Hc^1hDYshGGrf1e~{Jx$xLkRy* z&Hj}*NaCtJ=c?Lmh<2gLVka9!@PUydH}`4DYjQ9f`5|;A`RPKjTagZ+_1I$0oVVouV|GKcL+~ z{hn63z<%a1fmOx$e^8I$B;5Aq7Sk%CLuxim-m)9nXD3Ugj@==r)#PJ}wU1&2m9Z3y z40SPfegZ-!7M$ za}e11`q{qfg$f^BhJS04RCVEP3-$Xvu|e7UGwR61J^~?woMw)`SO&Y8%G0*N+miyR z27KEDuU&vy)L%p0$)K7)eXG!rA;MhGg1-bTPuoJ=3o2Hjdde7g9odU2osy<$D}?;p zOkTlIw_V}_$KS<8zcYref>xe{bb;YW2D{HK@8446U@sF;hWl(Sv+^23b2=NCmfLBs zg=+YloPU=n`Iy1NF%~o6FP`ocIXjvKK>j)%ZRB}tdl(ufZue{S<~i@S^BlVN36q({ zD+<;wjPqH|y0E{GnfDdVzqEZ&K}f5I6!Kp=>&`(q11p&}O%SP2uz}#}!|!yj;D6=m z2jXvZa`MykXEMF+t zAA3mQcf`|`J$txKKDU8yuUUp)Fd@+oH4JYcA3OEIY^!F^;FmiwKGcu@7!P+k)nadX zX8pn}1JP~K{=jf5B4d=CY>Q`((DXWASiNzwC;VQy68U%fkIThUNGfF%a3j+mLqY!H z>|VfAVWdXl||fBn(--^J5ewCMd}C=Sh>Up&6$oRCEE z9P7&O7XUa4NgzmtipT#nRdPBQdezo$W$3RO#^zhCzh;Sgbc&$#T;|DWs}{{;f=Lzb zDNUkUMgJ)n<65!S!+JE^9SuZ80n{%s3m8#}Hhx!B0)O;8G-mAu3O@hFH~%@LEB&{c zKg2Mwv#oRmTp5JqyRL2h>Nz9YWU8fcWzis2tIyuaRv4;RdWVNtv_JaG+z{tME$DFpt=KRY#|d^$5&Blxz4$aHKLo8v~Jrv$GK`#%wEzcHJ0=Y+D+X^cT982F?@zkaNM>8tw4am%rsMXm%v9>IB9=Jz|+ zn19>rO$u&WTi2ZzNAyl%q%* zvA!1@STC-}H~Jq)rI)xr8x#UI7Hsl8tLj?X**@$2@GX_Fk zQy-Izng82K>hfp?j}A=-g9X{+V{l7R>Kn^?ut+!8%-lkvtKxOOVfZw}{vWly6lE}* zUbSEoR@SXJwLlI1;bA{sym&EI%^1!0*v>It5IyIAa+q}4H-75RWOo&f@1F`hhnnVn z57ENN;;&mfxyYxuUH+fAGF`SEA;KG2#0`@1e~+6V9pw4=M^mD!a~_Su3m>w-=29v} zTz!X=3}v|+Xm3BVErefdmk#!y9Jv^WRE5dp5?xM{s{d787j$w zT1xz#9fuiB@2N7|rqWHoQRN>ySPN zwr@;Z+YIsI9tAc3)9{nQLAQR#bOeQ6;Jk)@R5hgBiYgud-ui!Caa1UWOOBxAKXBAP zYm$Fk81oV~IQflb=>KyR|33P^S1{AtI1#*~ZWin}&g<CLvBIec2O z{93Rkj`Mpf(KRm|Y`_i_z(5@b9ub`_`bi5bTd z@Q<<>L-EPvG_maY{(sHX{o${x{L@c1CMWI-efws?1DqM{Jzh^M0EZS{{PcCb!rZCK zAcUK=Q1?`=54y2uQdkgj*EI+f5>B_nrP>4>!UsT|9Mt`M{h68u_COwr+0c7JOx z_D?b5SdX(r0s7jY1rCF{)8y=?XL7C00tHg%80xwHY6C!EiVvaqv0nFTOPJ+JBV~-% zwkQf#6!L;I$HWDsS@p*zfB_YAdrSJyL$=Arq%)0)t*~mblXD%WYVO3&4ukhN66z=+ z9>e!AA8jrm_{I8oE}iRP(nG*jR~uXnrGRBI34lkSq{V+ux$X^X3^`ghwZo*LL{C-X z{tHU?^I%AZAsJMJ?*a*Z0`^|N9>U&Bj{NN7)H=HWa(vBun$XG_8}B0aKx0sMeASf0 zZlK-pN9{vHYnSxo)+|Fp(u>u&nyzR_f7Al2f6j#^&~E!QM+cr?;68cRYW5!9)Zkdx z?BhB}?Pi?df9u*{8y|s!o0@!?_A?|&O%iGP1ccTkaq94tZHm1XRr>xUlZD&5+__h@ z2sy}NR1#+h&|+@@dEpDJ`4A9sp%kwY_j;|;9QEWck=9Xi+4(LX0;Pg4ri1*252GjIdaW#FHhM zZ2suoHMJ)KU%F1X$$Sl2Dek-cVg6Q>x`wl155+T1SSv*Sx_^pr{_1}{V1&YT(ZhVd z&^5s*E-iQvLkGXrF;KVdCnD$ z;&q8{DhKxJ!D1*V;luY|#ZuspJF6l#>ksfPPZ2$iS5+~9Q74mrJI+;kH1!uZZ=wH_Kj(#rqm~S``iXL>Hrf>s#Wg(Z z9S3aj(JXoODhV!T{pmIGv+6+Jp_#en)8SD^-bg$lQKybCdSFk#{vE{SkclW>P@L27 zcGS@u;?i7hh^C+~i;8i5=?Q&*zElnPNq9+>Wh(IS==kxd8Z0}vtLJ3u@v;@S^Q^$ zx<-Ue&np|%+F2LR9ee2mdl9`{f95Nyf|`?pTYqo>%G$==;-~aKvbn{BgZvxRC--Tc zDweXXDBLKdUqa38#=xb2QQZ~Y?C(4nQCvb7sW-7;j+TLfJuG`Q7n?eru;!Sp7E>RO zR#Trzb*e$+=QB&7$9|XC$${>gQ8D!am0I6u_N$=}qqQAlKWW3BGw~}Z#S-V10SM7Z)3--01P;QV z4(P*Pmt>yE=WO zfU>9HkcfyAo;>+;`IzTSV&uFsj{xu0?{AK%!TOHPGg_$%CVf8n1AF(f@PWH;2udX* zw=U!<0MzOX6m~b!`R)7B0M2%Vp8!rta2dX1j#f&z}{XTS_;>RK3_6y zEkGj2X9GZfwlPj?)E_y%Q1Wg1Z*VCL< zUF$@kTB`eq_}w2ygpKZj5&`M_8e*Ln{HJ?slmsJfLw%UQWJkXGi(%p~n*?G=hHqlX z**-xun1uKv9Ln`5LIq-7d@MX~o893%gA?WPy$(m{w1pQ|&6A)V`O@na>*=XGnQ`UOjZs zMd|<{LVDTT$WX6cte2|Ks4;1JuyJrWd4z#`Jrc8(vVS>>KmhV6@-)Mks86a5)n6AY z4wVZ#f9l;dmz&nN7zh{Zbk7mrpu$-I$vO|15b7P;~a68}%oZ^`kqc7+xnlL%R0 zb>)W<+8E;DXOI#H^%i~R`d23YAB;R90^3nrQod^*1;?&+URnG;Rn^mJkQ@CVkH1Mx z6gz%KwmZkX&%gY@V)eJ#Scfb@c3FW0^8qMY7)H!yOTMA+z=G?_hb#YHU}At#83GN# zBdT{0+-Ppw9z>A9Zc&<}rC#wxP2+>vV$=?_4``=_cwfZE6f1gXo?$^>t=gjT!A>(y z$dAXXH`At)I&m$k0URVxtVZMC?&I2#CJK} z5qkVqraBRk6h|>KWLj=@P8A!U#E)H5CfQVD*|bW2x;NJaM{EVo>ek737>?-e8nC5nqll&!P9U|VY6A3P-_2m({{93 z>%G+>Q8KJQeoK9&O1q)E3hQvPp~}wZi%3inA?^CzBdKCl(nJ?5LBUxxeQ!Bid9>kX z*#M!5a>Jq+ord!yT0XD9!Sn)QH%_R7rgg7U8^wPuD87?Kt>bv=ev9pj*NLLtBY21;2cYoHxgzh)9P97Hcfyy7KpSA zv-l2@9J@147)&1IFU-5jPfLWb*)+7npBiKn^re%>P8LK6P{RxlNrxqt%*Nx?JQD=_ z-lIEU2YHoWu*V84eaOCDr_~^oJ<#m?y`Cf$Bh-qmUw5$QzvZ8NmRQ6V-yvDZWYyOXI`tT2I!!T?rR_N!YnvT- zXl&thW;h^uGcS1y-3T(EJep=<8Jdd$;n;(1A2nyf#Dop$J>0Pj%du{*n|z?7L6&QJ zpl0W9QB>S2xYuxFT_yi_pA(K35xHAIf<+3wLb27ASv^lKvM;$}Aoy5pJwY>y-+XP- z_53$YZ_l<&27)mMEa(7)gJdC|0Wb&{LhWr2`tFqCf{(0)e2jyw$y+GAYQ&_}D;HQ*{wPj9}rFtd_ zfBtht1@1gjsSav8RRG#V^UU7e89FN$-iw;x4$6%svlban6*A{Be>W)_Jy$~$7Z16; zUJMLKpq6Q2+-jwKV=(tArQRe^({#05@&P95^#~26c<+wI#@{95$E?8|9ux8m&{fd1 z=TI_mPi+PHZ`S~FfVY1DCQN&KIYxT-St1g(cPo5%{SacYWHpo4gHx%X_dg%EN7Yc$ zju7sirb*AC-@jaS`!vp1o>2_99OiQxg0}U-hY=RL$PQ7bVuKt{>y=dGd3JmA+LKCB zal!kQ+1eJL0mf(Oh;+tN%_c4$J9U$jMzo&i5^JeqP*fdEWg5Iy0`5-%w;l-7g$A<{ z;bXQB@48ag5Zkr_Xaz*p-5yLjb@{_LSKNBSfi)!$gKdFv=N4kBibC!=(^TJTZQ5~6 zZeMsQrq)$^E~vae?~C1z%mo$0(92{4Zoeln8F!B~c`Pa8&JZ%bz+1maZg;cDd_tT9qkM&O4MB_sLlSmtOJiXbq}?mUe4Ba;uZ$?--Ka?H%%W zzt-$0xe-NOtKIz1?|1X@770q3%UE>gcy#62A2x3(Y|NRlhxbL#K39(yU&J6bpElf8 zCVxO7p9Njf4ZFFzHCN5@>$h8s>q&6ACp)q8iPw3dt1dBPQaC!^((?_ z?B_IA-LM3w>{pr@b(_sYC*M0v7RA`6fpB$~W|b^2by`olC(*g3GUx{Vey^Ec5XQWL zS1ixa>48f*P4zm%Afg4<_a3hh0{Pa3hHExdLn+KOQ;biEnTRTnw^AWu?sF@W=il@I zl{$GzfNWn>vr*iU0rS$aUc3? zLyUXAyJuJFb${9KE=ga|O6PqfmK{L|WULK7(SkjO+;1sL7Q^K`Z|ncykim?do_;^K zHZ-C!#-GHf`o?gwIA(E@DE+J+nes~)jG6N=l@>gjqEfJ;sHz<8e9tiwmxITDA@C3A zBZy&qM9=P7q6KB0p!MSX*9z|j~jWDd|fg`h7H<;NVU^y66J5 zIX=9k5hYn9}9J{ny5!CYWUhkQ;Fc?@YE7&X$z_86b~wG8K6XJz!v zlAE#z@gV#b($8UdaJ>bvja%=_J?7G9+g*;7j}5YTy_%48)E}Er4wBU(NM-1hoqDjD z9Iz8EdXmNU1v-5DTjODD)u&xn&EEwvfq@(HEH^Y{)eofpLW&s!gO=${3sU-dTukws zmT=lhLhSgwH(#+u^*1tJfh8bBm z0aLj9mNy@j>Aps5VH#!56rp+~~jajFHAg z98DpJKj_t(R^4;uo}?@hGV=)m3Y8M=iq(>c2|xBjJ6nVB+9NM=hA2wW&k|14AMQVn zuZ_)l)8<=VEuLgZ3MB6xZ>`@`_Cz3rOVnzkMoJcHe2?k?uJpo{X9k_yUuZ+|aJ;Su z*U_`f968ra;$iI^3}jEXVHern9v+4gqJ|jUkfW=ymwE0J=`r2K7EerXA93ik{*go7hspK2y zYQiqOe9>Prputal0Q#*!{rkv}){k~3KD~jL;}C}oZg67Z_D1+lr+|^wjjRX0n1wO< zTqIaGnv8YFw_vD7o>~s<$_ujDT?Wc;E%wXTe*wbw+_uA?kgG?okstffwlu!YJIqh; z$#SwA!V_x)_c4z5iana#0GOzqkynkV*cX_Z6@;xac70ti`n-Gj%-n^e?dJ441$8RS zcfM3uiE8w43ycxJHo$okoU2%1A)^ddw^=HrnFfs+&>nm64kpl--xwKbv!u;`=~CO+ z>%~j>zzWiD^lKcu;PO4y_kGBSpW)?*J%e&KlfM4$;+DpNR(2+wYFu)7ayN zVIyrn68M6wNE)6ND)tHTY#hz1Id1U!ls+*)PiCfklf-{z+iF@IQm$I&aHneJ93_Cx zgl;}MYISPah6YCpq|{;AFIMYt7wyVRfAfvkAKrh|L9=o>dAPElj5CrkJ+YSOGtXx! zcpIYhyS@UbBnx|BVaF`dX0e*jFkPhp<*Q_$6O(YR8GbM0ZgULCq4Oh!pCP~RE_}au z2;WEZU8uCJ_dK+ZdCi+~xGOU}a#;J_gnl0PEr^GD85|-3dXToSC4BvxLA$Jcph5)a zY3lbU$M*buVjEtm?0A>13dbOwYGtOpq%+u6G^=jnC5UH8N> z#|APm&`Dk#_v<{ZpCV+eQoi&Z+*R0JDbAneSC!3_TqC?xa1B&633FeT%cl$%QuCXE z{H=6ZBmJMFp+EnDkI@scOSINwk$=Hux4P>S>UsjFbA6vwq-yeI)-_^xWGx-=1x2l; z4}v+A{y>NGdM9U5CROnS7Ja=riTf0|VD8(U%39;^Ioq4)tk}8q`uh|6JR`W-DKD2T zkAuS$2R}_m;-*Q!p@Umb8e?~}$kvF=7SqplM%T4k5p8mTP*515WQP9n&R$o>0Pe5G ze&MT4Hf>h-D`?0>!sNFlO5vaJxnj+IV!ZFI{0#<^r)owxZDOLJe2-Y`cgLT6sfsFnK#Z9ZtOu&Ql`eNHRh& zI2I;bU}M67R*AadHemoZLod*&TCyB8@1{r#f9ip5w2ASB{%i<48$+xU@s4Y$7wc=N z?!wu?!z$1Ax%^b`PQj#%(I4lHLWRwJON@bRL+TPmGz{U7<;Dd(C(8i2x={m+R*Hq8 z@x+25zR!Ed{9~(W*EVCu)2CbAjnPf{;Vm>%#~~y2SFXFu(&X=zfMhxcowVA529M76 zHoD%Ck2+ScS>gMi>8n$>UIug!<9ddh;b!WyB=&=O=KPN5s)rcs+qD^nN<>o>Gw7 zUB2lBlQvwB&{R=(pobLQKH4l+ijPVQhn4k`_2l?dJn%dn+|iloSgGlGLb{|90~7{1 z#>>uHE+zQ;xavj!XonRjpOmlUEQw`)?D%Qqovb>Gw|mQvgZ>;X_~vc<=rcnyG`*Q2{GzihrA@&g2G0aCuiZh-W(d>T6&GoGx30r0`ZEXj`vKb zQhqJYj~s1tol!xk0~gtK!hLq;GA@s%#4DZ|G>#JYKck+V&N?VxI|FGC>y_#H{K}!x zS$#;XujL&k{;lpaF^2CqXrF_1R9}>qTDymcZtP(vwGboz(u79#l6RXjaz~TFX(5fv z4Qw3{D8XduZ3QGjYgQ5;2x^_}>rG)eu5rzdscIaXj87(*C6yNa^6=_B3^;+g(e|xD zkJzf`X9XCeJEO7)k5$ZqL&~I?2u3i@+P2uB#)>F}p9S zb#R}rcwdsZC82o|-OS~5nK#MC>5BMPv{C%xk;SakskW|ae%iTur}pgc6uFs*<_CwY z{K`QH1K^)30-!AWyj=1-$*Q#&V}Q-06QMDp-3wbDm6OqZRwvw>!=p7Ttv0;S5M$i- zyJ!JRN_^0@Hb`H!&;jYTYnU(bG1)Bf%XOopqHcKEi%y%+3t_*-DlK5S-7=|4S++{^eB{a@q>@odk4LTbm<2L!#V^j=N(*XU&H}|px-5k|=YAs-#M3#qiJfY1h8EoT zYTh?H1k>@(6FuYm3?d4uCzfIi2srp5#uiH*7I|-&I(AaCPaTH842G2rPt94_IsB5N zIciJQh^$Odz%{kQf8+!SLU9h6&^L2jnrqt4`r((dm9DF zc!_YBePp?AA z6fK3_ZICT?6T|sfssMYQrf{_ z`p_S6SdX>;3#0M@4H>Yr|EOnonMQExf~Hd-=a+h(H@d7-cpW0P)8H=ami3^wT*&=m z)(Ea~gUO`*c5=zqo$o>_5S|}ggQy?L0Al6XSKIRSW!1k`>FQ#)dJT14X|+qMtnQQ! z2MfRtwC|nP8aUhLw~k`1iK;~jcSQ5aa$kd5)G>s8ErH}@DwnqcECWn0Ub6{*-2IGJ z50MIxUu|<*B9Cr-IIPWNAwQXz^}exQO?!;!ijBP5@GY zL_rmP{NV}YD~r>?OOvq?^`(AHlvPK!AzH5t;_J@w%6WdiJ(Y4c_!-9G@w@h{#6+e0 z=N_)$P+%V?ahSJ-Mu3eRioDsqwoj2E>zRY7CUYn4(hef+6&adcYz?4Omv=bvb-%55 z%uKqONUq%zG|+IK4$j={GN2w-s=x9$gFS`t*|^=-ud%VSb536Y16CchaN`6LK`hqi zpH~a<>i5~ScWD@=@~c|d$ITfn;?n8CS1zTBXm0{QAU;j zTG^?g;N69e0$MHoPx02}8qY3{@^WoY8EjR)7yUqk;{vfVrG-jXi}qg0}rW?=jK1uwz8F&tdsaNf31dy zNbu)uouI>!hb2bH^SsJKQ({#54_Gk&@#LJDN~5@Fpo6ptgsfw>rI}u`Z4>xGQKsBG z7;*3JXOd48)Mpwyzf>XO!LC;)8i5f+hvh$CNSMr`ChmVLD>$p>QAH9Iz;>@PJLI0pF8qfpcC^B1}j~F-;?09rFWg1CZ?cC#j&# zkGe(MyMPaH=+PRFhLZs)p8Y2EOVbs+GpN;634eY1RWfc1Sa*k4gUN{aZr-O<-Y~K} z{7an zcE%F06LFg_`;ZsvranN_(9>t)z27EL!1APOfs~_>8f99SA?bQ2E>kidKP<{har+~* z42pFp{4;DlE3yJK%Ux|tVFrz=@7mOe$66oF8#JyC+Y#Eo^#wmqG3<|N8tXoz42^Pp;Wu1>Vc2@{0 zsa=w{LO1xzW3Q?qga6KbUY_wbbW>j7_x21DwF!^k^UWcVrzJ1EDPAPg7W;RQHxBGn zh{dljhGOB|Sgi``sN}sSn(CHfzvB3wY)6nW_DH3rvBjp=_hQ_?>O(nr@i|8j+T)-g ze8RJ#SGFBJMw^w!6n9xV8z=D*BvFA=K2Fu#p-RYPqV@3&Q*wH7r)Oii!^g=Oouv)N z{1pOW>57jx!i$Y3hAmK&c{`Hbgd?^PtFkAHF{dsG4Ia*Xv921=HfnC=m=Y+@L}|~A z&X2X(_*C}C2qcnZgqikC3N9O$?K&xjs+1gV<}@lTL-#wjrcX*tMNM!HZalL!ZG)^+#*4;>6Ipd+GeqR*D;&h%yn-D1L@^`1N2^9;0EQ~I08rc{{ zWk44ve+mN?8yTP9k>;MSmSwwniy7!EmH*`EV4e3R=QgXjpEifvsqG@KzwJkVUny4o z&A<~dC=t11o~S1sg0|hVr@&t)>_k|pGjF?gosCnoNx@F@?oE@YW6pYbf#^2y)`OQE z_|79ZWrV@ViLW>}TPTwbRCi=G7}W!xVmhFcRZ+x7Gcl!Co9Ha0X4{2?sMAXS?jDW8 z=oB{QG>OhG7mH5VrGEFTZ-i@mF0iG<)3{!no_!>ovn|I8wR|K00Qb8n)6E7Gi_;?v zh5W~*vh{^INK!C;%@m(LevIn3x1Jb1(x1g!5H}YlaE;6Z_P~3aKRIK)#`cmQg;elJ zhB0!uXr;kDH%F&t{De70NFN3#!h7o<7!T4kar6a=*rW#cdr)m`iY z8>C-4$Ns3^sEmYoZKjszt@}Q#dX1_ALLBkGZD1%bN1!F4 zIbQwAgV+yQU)4wyNzYfW&8FR_RT8zU*&;mIC4bbT>d*dZe@?%A_pGgG|4v}2_Ne)A zSvgHf$Y37lt4|AMQ*6FV;+`GVZh39P7U)}d=_Y)@aS66sclbhe=HhlgUHizo@9VIE z{K(#kt*#Gyd;yN$GTuo93(P!hg14K4_%jf{*(}{gUYC@n)41L@^FAYV&?XS=cuhk7D8)&A<7@DtCQDrzvp*@~rpajZ zu4eF9F8{fKY*J;ltbmB74}7aRv~9pk6$sl7chjl4^q_nC*Ac3a_~|o+_x)y-xTx$G zcBH4wk_fM2&bx)051asB>Z+N+KWKyw^qw!)+0hjkKItij#Tq6RQOso}V_oBgb+Z}` zPPbJN&&d%WW>2a4Fd5sm{>K71AUJ@S_El)qd_FfZKMFb!>RA+t+4efnHDDd3IeNvL z%>=;uoK8yAoFL@B4f-yJK56zVv^uLN_oR%OOdBL3Wp9+boh?%car!$Fh-XS(s49(5 zlfT}MXh_LbJmV#J7GI3|pW#Rgxe5Syi)qcqTipBWg%*_B+VvMtlv;-1+j%^BSiW51 zNAc1wV;=AHtLu7C&4tlydnGd7rC4sKvOe9tOBtNc^!jL{rdk^&XtLpxvU(G$VrSab_KOe#MYo>%Hw)SQ z_RGC|Uops!to^oAr7f-=TGV>_ zp~g*_Kao+lso;qZ!ZJ%xK+DE>Z4gBQ!{rP5rD#XGy^(f ze&Dn*e-6&rlG2*Zp>tAvd96oQ&ctf)Wdes}zFL zjLZ$byLe_47U>fGf|bQ@Ut-*f(8jC&Y*p-2j27#DzEt{T=+h4#2UnptL;?2w7Gh}N zU5=uPmbQ1>7xMexWYwT?=zqspcr;pOFbuhtxmbDQe2C&!X_*8rFXA4g}4g@Jr=8^-4YAlf<{?gy79zUxPAN$l9SA4 z{)R5=#XhHlh}?D^e#vVn;XTvlpDuGmD08*-OKjf}1v`ymy{lRwt(9A!rQp|b0Yxy`7{l9(Ir#CQGEr@TqdYDNrL_01)-sd^;@ z7b#!#gLQTXT-&NK3<>)Wd|T4h1F756?*<)p;z%43 z*gR$pF4u3EXSg&Y)d_!>n(~66Ox0(ryh<%e0R{={sqTT^-cB+A zm{fh2$W{L_iOYF|^Ncd~z1rzi>C;juPZ5}HGTDs>p(0E0E zqB*p*#_KDPfR*mivup+7-vKOnI212VgeODQw&Axc$Ul2`Y8ji))3O)&xI;WF@iFeD z80EXUW1?#uC)(G=p)j$!a0d>ETwg+&_=eZ!`bOfTAtHdE&FG^(yURy_CAn`Xmqgs3 z$P{$f+*!d#uouo0s5}2lQ00XWK?8l_=Ajn;qzy!BL*&eDAfc8kc*h{mdTa zQ_mJ_|In{5kr3ltp#$^hp=Toa0-B=S3wsA+qBCP(F$4(HWb;%+mvq|78aq6k%&BZ! zDxDnd$-e@9VjW1Q!weORd`>Kq2$eM2(mKF)Ya|l-TtnXk%gL?Q`mRRD)w@iKpa$K8 zNrKdm&ssDXYon_&lh!1DENW1_q-hlfSEfgVTrBnm()g1;bQne<>1 z{-dX;iPX=mHG9gOhBe(D(5}X>J5OVlJ}4R5_KWXFmrc&|(uZavQ_%dQvrDZPn1ByD zpw%fL?MWflPb}vt9GlXmX8Jb~L~kA$pkF$a=?kVmdJ2?T3lDEgz%dWifaIF&Tug}R z@ajJ-d39UgxiC}2I2<+hCwu2K3(^X!-Fpt2R(Z8q*VLbfWQ=qQXi|bq!CFD|A@zf=RUY&mx<-Kth+M;)$nXtG~sva8#opSh;6(~liVAX-5E+$P)y*m_yYI*;+NZ8i9~kao$J}UNruQ~aXa}XmCPFgea`Ue z>0L-mnL9D)Xg9HuZyD~`nk6c3GV6yn7=;2Y7g;*y7Vy5UDHS}~fL$!s-qqYez`ONJ zfcvfH)9La#;or|UGUpd1qzB%CLVj5zs{hn?;E5-j^@z7N>m8uBRYI}FxAUGD5bk%; zRnxxJ8I7v>a)s`b>Dpd==)BEuE~la2bpNBKf8#C<(}}R^ZJECSeHP#zsgmy$v?g^& zVT7@{ie+9ewYzHJ)wEhf$~AShxs|98SW@u1E%g+ov7*b9HUVo4z_^LuG^ z)>vYxlAZOS12%>ESi)I;AY#AuaxHkLbHezUqI6bHU+F1Il$A$q9rgUKq(pc5S*}x! z_XJ|H88Lsq$F$(Rh6EDiB&EIEwW;P={<^KH|@JZagWc4gZe4SP9w9$xF!-E6x1 ziTv|GXH|`^uKt;VD=Lp?tZ@rid zc+Crl#~X~d}Ck_WmJA0r01Z8Yfv!;yMfnRC+Un2jYQ;PAV45#Zhp2un-i&Y@-&dzs-Fol-6 z$SL4!9F3ybFM5qyy8@9Ub6{0nb~e3fjS5|ZFO_l|6g8)?dmaEZOpU{aWP^OJ;*c{S z(URNH!U_XL&YftJUF~$G7d$5>e08&u%~#6p!h@mMN$i#5{?KyaHkrARnYv?B?a^g= zYk%Vd<$b!!d*=B~oo6>Pr3P^E+^oSgqBDN<7$woc)b3|Gugg{4a=B+-JX|>O_%^wNL_;C*K_=Kj1oVe#=Yw z*e5_PxWVm*Sb#ui>f-UDUy`bvci^?H*;l~Mpi-mjN#UhhJ1ckr@qR$d`Lswxz08n% zWMS>dgJr5wi+kIlnc9Et-oh1ePkTwpstes6uu@4@8$`K|`tMZU*o-vjtd5Fc1}$u5bK!hfFeN z55qZt>4l?)()gJ#rr4jRESj;#U664o?#TUk3`jF#qKP0I;v_MiO`=FS{Fvi`(ZM)3 z(UMtY87y4jr`k)w^$_paLNeDt^zx-#M@L7sBn=G>3OO-+pYmt)M;UAz#XQ@FKsfQS zu&`8BRpYKY!`BD1S^W+=H=K6l-dm2ypi4(GepM3?#o{FsbGy!XOy-wbfd}3#QlVq| zrWr&5y+GN*|TT(>Hmt76&_{C3p?gCW0Yg?Ag==0blB^K8OCT63L` zef;9mqYXT6xUM)0MQuxR!kOOWneLO|@v=ID zBW(z%e}=(m%PsJMj@gkTT(SDtRpr3E_`kU3j ziWjEGmL?{ivFnC~ZdYww)_A0(iivrw0oN^60ggqeYa5>7{1X~~^IQZyN3Igb{r;`> zhQpHrMeP1q)~H1GclLT_-TAS?KBjx$5#iDNkt7C>G)vW011L@i!nCKublfik>~;Wv zmhk%Yvpy=b9))0YBs19C7&|1iv+^TEH(I-c6o1JxXT0PCK}WHX^Sh3SKdx5L{@8Ez zd4FF2U2jEGQ#C%r$Po`3VP5E<3Do76TKf#%$B*a&5Uy70tZ^M`QXS3mW(291DKK2AoCKKEhW`&=Zvj-twzLgzELhMW!7aE3 zcMI(#4Q_w)1; zwC0Zsd~#m>xd3~SM!oaBc1P|5*y}#&Gyl2NF0`cJRraHy4z1`lEv(e|Fpqqf^?Y^cBX@ruL+2YHtn$m5)^A?h0dgDz4c%2r%%SqMfZ@m9r?!SLBoEQ zVW3ylif6;!i}-heAcT5M-_g$Pr8GY7Y=`%*T^QhE(uX4reCJei;@t3iT!zHCp~5a> zO?-lz)y5~wYQfYm)fa@&OkTbm$egc};!N(m4;R{hk#ZX(6#Gg%c>T%9E>uOGBQ(o2^>o~$aueHQLjvP8PgzM}d=yFAIeF2>n8l&^3J9D~3p#!I$pj6=wn!g@^+JSqCKlDIbhODIg#QTKKX*cM zpDhsTim1VpY4<;u9T-{(y8f{0&9KOCDgEc4o_@5G0S~$Tkpup(XI_ZHh|8_Rr~*%c z>i@k05rBv2OjytU_W|(gB_&Y#TS2}v!l!BRKNlf6@bFE&(#!vS0K7LJ2YA2^ZudqU z{@()8?LG_J2g+ozAk>3=lj;t5b8rh(+#}0uE%GJ;Wd5fR;dY{@`k^W!dQBh{>$Jq^ zRBxB1^iqvh=&L63c$rxhmhUbO>0YWdu!_^Z`Bd#3%@5 z{fcm$yZVraS%*a6a5cP~C0fxa9R(+{1S`uM!WI(6Z_XqBEBu`$Zh=<~zLjLf2^f^P^&ZH#uepmnlRoC>vR1 zN7D!6>~?)B+7?3tkTa8(ucPz6gQZd+8IYzY-B#2B?9NoF7)iB_nTwlO2VKnSLHHEo zN?rR4EYSDysBeYd-Vsl-;@d6pZt)}0*>PS74a4^f86XGMu-SO5(MeZS^-nfF<;ej! zOajz6qG3PyaPx^Rb$07-5;~V2-i4|~`GXf6jbY~x?=#du<>cmjnumD7LhfhLHjAF` z`W)8R@2&~Luq3tl5YKiVo_TRuBE!KgWgfI_j*h|@(a0yNIWLA9mWn>r*9~(^@(V)q zwFHO7>7wrIMqDmHY5mc%5$uPDlvGB@IQ!L~{k)e?gV`RpOl6^3k_8Akn# zlugs-w)r~}V$f)qpzAm65K{Mlm=JF&ym#6VG}rhM(BxPwAAr13zAK0&qXlh+cy=CF zveklSqSTke$8I*m{A5xh&e2vMumH-0P~sE8f25c67`;3jH zrmhYb<-Sxx+}`#ekBcmg+I+%3SuA(-$_*iwPvP9r@%FoG(v?GBI zU8G}-;hHX6ii?DP5O9x_iS64LggH7&T`j8)Yy-W7yz-?&u~a>Ed7HOUBnOt!clQzr z?CD;VEG`BhC=WIJEPd&qt;GgPZ01BT`1{IPq7$Fl#a+y^T~Mh%_1$#o(%6n}y(`E0 zz12s%>{F$|s_5mInjgH zdLPT_1IstIaw0A#JB+oW4UZ@>9?aKJ4c&B9E!!wOwyf)yb z?L;xb9q?Dlu~#kl+OW;IGL&*jpc~aA+HZIHHeQMKRTkjW>N{Kk&|VoW-f1}k-f0;? zED9;4a$c=g&gY07oM``k_+1GdW141G-Q9KC-f*NhGZN*U5#8JQG?|cxiQ>2M!8rW| zF&v6}m~-UZ%|ytOvIx)nM3He1GdakU4P9ORlhHp2gidXda21ue7OQFsq5zsx29iH zSLRy5@}7!wLOECF{lu$56tWeWJZn9;!dBlfRUw4e@2(5I*SU$VdnK*Huk!CLacqNz zZg=(`g{(Set3U80UUzByoDL#Se1gt0UJxXdO13{dR+LV=oFTv_%Ph~)`t^i=rs(|n zLtgtZAbpVCYNC$)co*`?`35ajjymAF@$1)4f3LeO#dUL&3X3$w_~AgkMkWY|Usb$2 zJo&q50=oHF8rd5iMsQj@WibK+;$tL0ZdFWf6b9mG>{x3h(9MPiaU5CIuiiO84w}9K z=@T$78?>VuS9@b#e?iv;v0E+bDm1FQ309tqY^wxp#N902hK(^A)Rtx6_JWmD+X>gV zwA^I7Z}$bW7Cu(IWi^qLG#1lUO!FV_prH2G+>y#tov(ZS;+KkDpSsRbSKN(mX7;`T z?grKSr{%~e@P*dN;M-7;Vlg%3zSY^r_9Ic_ed{mM82Thhg$GMD`uE2gsp&2+G~nrb zvM2R4LZsP{6(w+o_Nk~&luM>vF{ew%8MMPLBToJ65?I7da{S(29Pm*q{^aUt*4EF@ z&?>2>+KVssIJ_3cv#D%dt1Jx5mX7Lc+3|zt%mp(1iWit{*xs7&a1p$Jh-cOr3s%3~ zPJ`L3vFCG6`FfjksFxxQFG2!)rC#=}cl_^=#qnLZza8 z`Y;IH-AnLZ8LXDucE4Ed+Mx`6CgVfB>ftmtaej90KdDSESbpLkQ1a{+az_=4zsMg9 z{i{F9R3{0KEjQP3ySeA`%4PPV{~n%s1gT?rK7inCa+C4B*To&1`7CeAOp`pl#df`1 z_j`tljmujScn(O|7e>peYMdBQg&TNn^${&R$P0#svVF|h`^BSBx#aaP1hnW-0>Jw1 zM$w$WSA;FqYY_Rz`^kll$`s@>$* zic57W<7(LE`tZ`)UO=u`=hSpC&3ima-aX&Y{=E5tK)uF6HweI-Znfap4z%3wa@~vL zjpgN7K@v6?mp(si<>=2zus;;Fc49;+R~*eH3KjJBW^I2P;;XzmC#W;uE+g3L;tV~j zLw0DUd@m%etW(Hkc34bokA*+Qy=OYyh`DKnp&s#FkR`<5%^zJNzyfFw@tg+f z9fdx-#244k6Iq-xriRmEOS4c(*ee=2-q6!v*3N7XlKqairYiV!m6a^KQyDmc?0!!O zT?)BiIXOSi-A@XfWpG~}fwMR{c<(pqn?n$G?-E2e`I6SK$tCkrN25Yb?AI~pvoJ>Q zBc+H*R{jd_yq@OBHZq8xckcqC(pOwooAu1BU@ zra99*Cp}1Owl>ZDa)2f9o#$;zkmKI+OCDuKoX|A#Q|B}xZxIxHMbg^la!M-#~6yWMUb1c3^F7mjzs`yKCVnABp)&zMk~a zpMpb^towAUJn<(XLstQNyGe;A3Iue;$fYBIwPbwl9iEc$sF0jxD1L%>Q0Ht9Fnb#c z%Ss*6>THuK7(r^pBtJ>oU#_rayz>>_8MBObE|{hUk7u;V`XlB$u_^2BRR>4>*|0^x zGQloHQ|%)4CY{R^W~+uiI!0x5UO296sVpv0VfEZS$^NZ}X!gV|&aY`dEQm z2CS85A#-XnG;qRi3@(<$0RGK{GfeO#zJ4T zKDTJm@b?^deL>GC5q_^<#d>v{=3vO-6{^Qr_e_!&G-K#f_iVl%Po{X{s$Aa5?Fj*g zhXrcBH&`z>QL9mbYCu^&iD{}9*Mcf%T^ZYuf_o?!*9+P@SYt= z&JIP5&G-1(uz|RcJEE^#ia3nk6I(IQfK)GpcxLlkm-#Xq(482zNEINv)K6i0RJVOx z$KB13%aql23YsOTr~Z|1-%T~kf2@0B*AhcFbE)gIJJ+}`1aL&$s=f_sfJ#L4$r~%U z{YW0ubZQvJRMe)cW8d!*#C{v1#-JyP_PiITkBRQBcLf`rK1wb%07+(pOXzbrHZ%8V zuw+02WYh9*5SJT=p9zG#fxe^YO7!HYtkddWlI#sjixHnW^PXS7?(9Ak^3&!oo9cD6 zZLX=!`*K(Yu=@s&UI{7+qc);@ig!CgME3&Qzd{QyXleq!nUiIa_y@JpfhCMl*yv{E zIvFfDpT=+dXNThos?g=v%e6$e`x?-DGfpGr?nk46@|iRsHC@m5rAx!wRFKc&4-soy z@J}qC_(Yw$-0~g;FxS{0_d~WkJ9da?=dQP2-~2USck6YC&ZH`JZBFXK;>ZtV?C%P9 zEBBY@_+c?c)t3R=w@1pIqX*R&Iy|z7VTLT};5Yl2w$x@93UGzDzh3h6Zy;MvKhk?{ z`6H@2lW2!wuip!?X-{JJ9ZGuN^US}K0N>VRRtfni02)^9xssB@g|dln@v!5gPzJJ|Qn_m4S)o}f1HL?&Z}laadY5S_{--r$z4 z+U6s)t;^ca#G@)xLv1-Wi%65R71YO@oKYV?#el-30EpfmT`E%vumk30*MX0j`nJjd zA=HTyn)1g@T(rF`5rJY^ulm+4h!fqtaf=`IC{zf(F@h{Otnc&iGd-jre6W7v+%R655sMV=39t|x27b}KVmJ^2ee8~YVbF<`&x}p!s(MPO)=+i zt@<4*ZaSk}-X5E}QOWI}R~oF@+|zp9&>8xYsk)rFr^uY`D!!nxP3?>DEl|H}liZ2GR*8ZZ26((V#-Ppx}sDVj9t=}_Ez zQ%h;30a+2v9wvNy#PH5Fc>33^a9GP!c28VLBX{|(BQ>h4>S z_#JO1je)A>M6umL`$VFF^Ly^XF5L@U7DF>O0~sNa#m@UCa>8m_N)X>PGE!@fI+x}Z z%E)1sCa!CM!`RGaPLz;yYF|`E|M=ASL=Y9SfVu`z-WS^*!I#^rg4Hv`Rfe)3Lv_O}1Yuf<+hz+f z)9o_$P7?AT%GrePU#hp3tP!ttD@&K^eH-bbTbe!o{&RPVNxUBo&D#bz3)GU~L(gz? zl+}Hu)wLjF(?(N_S+=XoIi}{JrQGA=)@|-sal1;V0`qW68(G)qLViO1D8M&WdaXA? z&j)0{x-Csvtf{Il(2AAAlcUB_p$x_*twq=pa#e{gBD^s>ZY~QNT2G-hAJ2_Mz0NF( zqV5}z1jQM{`*+|5xU2wdkwH6_3ZwP~547>)D5^~{J%xLvHTJZ$L*BNgW5JR3@8Zz@)UQ&=e`O)}8g|Dil;41LB z4!!Fr_b5|aIwh(1CR0;pE6U+rH{eo-=e(8t&@^&tj$qQGL3b%mGJ7O-xv*wq`N`s@ zUV1!=M`xLG=RDyQAet3>b-E4BJms)Gm~`nb`?LE!sl&~imOW*pkmnpWveRKC#PryR z%_wiQk+kS~GXlN5RL$e8w3#ddzGGVDmaXqU%gNM2%D41fFF{}6oyJCmo$j~7HWvm< zvRWUnUq*Nt#FwS_ zPTt~=KMLKv&aZA@-AuglP7?}a2vJMo{+0?1M}_HUe9Q>ppJU?rI>x)H-bUjb7bxY< zT?*!(O3O@vt?(*x&1b_O=Vc*hr)ai?#*q&mXPi)BP`3O;6d( z?bTbW!uAw}rPF7|-i5FCrHX%grlyRe)nOn{Xkc1``qy3%V_D#rdjpf`29NHKWVX_I z`}d#=8e3Uc%wO0HTSg@he=!@*c4fvFMJr+DrfRU8GFrbUfjb)&R@EE_A4?|cO(bL;IS85XP4L}jLX3wRnhCDi7XK_>n zrqp;uqWxA&Qth4`vz{J4JW2CP*B@5Y!Y&t*ke^r%gJs^kidphW^cZJcS940x z<+N=aql)W4c?b{B>oi4~7BH5KS8%8;FG@Wt$2-l*i6x*J>^c_B66B(Fd_aZu9`sM)#6XN~Y@%X){a0 z01&FL?}g`&Ikg$EA4f=By*t?G<}vWrifNBfg}Ip$w0iSoLEfAC6H}o>p`LPiw3;(# z?R?WTGFA0jNiQ|LmU57cA(=D6lrgMAx+tFqNCYeTt`5=m&0oP%{x}RO+{`g_+7X3n z47APXvFT?&EzJa>zT2LRm)tGkphCM<+Sq(DfhfxTX!voq?YTWmPuZR&sq9UoQy5o5 zV6)g*q`!(DHuZ|vdFtSe;FI^#l@97Orrn8L?_GFlnQqT(Pbt;aa7yzLSN;!Dd=5b+ zP8}t4Q`9e-3yEI@S*IO*9`WO;l+>>n!jyLvM@^f`Ja^pQU(#s%DNQbk^SR(-`yhS7 z9h2r1)&NXP38!jOfS@M;4AR0xg~KHn)#bJmz#HRp(aoXos^W~TU+;}%!RJVmB20}6 zh*1YZIh0xNwRS!JNS9GT+8)72M#<=t{yQGsMtCTmljr1rvVPtF-dMf&)n+05&59oqDEl^pTqtT>27KIs`r6ZTRYgV8$QVvRL+ zs-#e<>>yVP?qb5!x{@RXvn2!}s&D1ttnSCRY{6iz!_%eH9&@M(IUxejb1ayrWIMhdyyBovp7TCbK@@#s6Q*(@$zl4 zH~HUf1602Ori=Do;J}xnOZCaaFBG;JN=Ts6;X59_ER=~A&!?zO8m)yKENr9qUCa>` z;Wk`CU>Di0tNKm26W_+xk5uFgMR@tl!(5c0x>1`2n-kK(PzB=YGMKPH0G z1U|0;(_el*M2F&eu=zSoDQY-?di;c9F<=JuNuoG;eK<}xUQpuU${;L`35**SX_bUc z%$2kcQ@XFM}@mqp(<;-^N1g30gl-lb9ld1DA_LS>1 z(%4Rrg9T|Rh0V<3-@HCBntf1T@p!iq-c&uu1KoZ?`Iz;U(%#bTraPr6H|Q2w|IXD5 zPj~z7y3)b`&+nsrn{n}i!|QG=M&Cg#RcNiIrNz5qa%ebu*@?_d3)Yjbn2eKYE<#SE_2g2I-bZTLaaF285!okhi3^@p)SJQ zoYNV5gyuiG_UmQv5zh19;C3IMQ-o?Wjs9iO0RhI~L%37!A-;z$5^~iZeO`J3MT67s zV~sZDA!RARlAkOPO5}A+F#ZtYJA|}XpYMWD=W(bV()#>tQZBwZc0e6ZVn|0JL-B@r zimLpEbu|ldc_K21y|`b_rV$#z>8*;wOQq)%*nj!zO^V4iY-mBo$E)X z^r4hvii4;085F>Dq#;ApLoeDSxg;^&>ilm0CSVN*j5lSo8V~tonc@K{mH%>%3mpGc zAcXw^QgUd?j#d5PtLWaXX6FM`be2F&p3o27D}vA@esCI)kY$?<@TrrE*2iN(6^{V> z$gCT3okZ%5Un`hN?1dC%b8J&+I&Ibn1>Clyw(ncV$~dSnks*5Wfno6w_6i}NX^ZG_ zToL@=@kP>DSl0Ja@@!(y-CCOG+!LSU=4n`-Pk$c=VsX^Hd!vhA%g3w6a^#8@jHpX7 z$gD$)e%#!C3yLs@?}2swFVwh)BAAJDTc-?r)MnG`@{Qy}VM-EpseY3r_=`4W7t+H+ zFWGj>lHRd3Bs}Nz6qme?B;Qip5jgTGmDm2Wn6p;?tIm&U^mxv@`qv7q^Bym{5j3sJt2_V!C_Q|x zhgfD)hZ}ZzMtnY3V=V?SU~_9a=Aa?k70Sg5V&7z1ooF_Yc@x8-DbAjc4v16RE*>HIUHl+X->zS;N^WDcuKiU@ms#(?CY}* z3dSMCiy0xnu{CklqF8+gK*C2a#{+>h7>g4p2a6~ds0Q5=oJwv|%x=1UCrYi2N*HT3ULN->xh(d^fZn!F0Z}X0(MRj-^#e+|6LTpgA=F)=0{4a4PefRavIQo>eXH;f@-YFp z_x8r*9KVdHhw>J@Nk(^8xzB&mKYAYDZoesb--DA_`EKEpKD)yO>&{d`fV1sld-5kQ z+vY-jT{(AccV}yH)o*Ac1G7nH3z<;&)s86|cR(aeIYZ@82__?ZIf%P#m4x4OY*|i` zo&gg8J4*(EMvh7nJ89)9SEUgChGnn6Pw5?=BXR9{c<>sBlsRsXjf{y?St5G$&KBv? zC)U};#YR6+ZJ90_=@nSoeDw!?LyC&< zJ$dmh&8MhQ0GJA~tw-dP-c&qP#2H{Q*^}B|>B@UQ$ln40|IRl0Tu=mxl}jQV+b>VY zq&BVrFO+Q5<2T#5NUGyvvE}}Jd!;_Es=8w!)yK=755M|>TnI`DGH*7tW>$SQExq*{ z!Y$e~qPc(Z+&69hYbNB3UyGxQ0*Wa{8s8*{M ze>*K~Y0$%Xfc+jYR00sq<_@{WGt-*CiPimwMU89AHMcq>X~G%y-S+ozyY8izBk z4OMhc|X1*b{Qqi@zIUW&cLM+Kke!cjsW7^#SZ9D&T)AObPQzE@QT{xmBq{H~}Cl zBi}iIqua1+t6hp|RkG7dcD>OxkiZ0)g!8Qq z;heS$mW$E+rfu-5)X!Op?X(y^%RMOpknhVE%Q5{wmz*-WcChje9+PZYBsXOkOC)v) zJb>W&ji;+a;oF*YL0#G5_%Q_q{+O?1{QRPFjgOHeB_l;*W7*PX#~%+2k+lp?Wfl0C zRk-2rFLtRXa(W4Yv_7T9aWOCMDqaCxAM}iQ_ZPQoK|$qjnr*4PX^K(g+t@VjYQa|| zvm`mZLT!!{HAdFk*^$gAu_0N#qWNMU#xGi2lGNFYSpce^m|Sk!XX$fsfE#-2+T%4k zZ!^(!C@ODIE-N>6WyI9nW6)#X&CAkDoj&d!~$qJ*XP zPOHJVl%r zq{&kNGOwt&K5(1wT6q zNF_SF+82X!eCp8`-`VyIOd)7L{veRP!0OcG%aNJpjWuhh6le@d(eoG~j^iEfAB>bz z0S>t@0Gi>wI#jz$w)`$*%yTQGA%kPrOk1Yp>0BpuUgslAi&^d)uWdoW$fc5B=SMt; z37eC8x++gNfWwE;Gv(W9e+qt*5b>WOkHvT-{l~{LO$}S8&8j&pZC2d2Ph?y#H*nGG zm4wZ7sL-t$-`DmSNZ{u%7$JTD$ihu=D? zju};9htNNB6jG`frswkQw$OHX>^8kLsY>NKHmnG;RA!C1>QZl&AS~MxvH219D*{`7 z+&uGQR?)@$NwB`FYdx~_w41)W-R7&5MLr#kcwT*`0q~ev>#ycmrKr3Y2*@4`7fPuw zgg&aM$}lry;4>Fp`5NB#iaLJRGWhV&X>3sWQk0B*{;hMxW~*z(9oEZ;uAOe)M&3!M z$)z>wA0f>8%yfD*XxNx9oIR-4cHVmQ4s3*{HIr(9e+)q25!w)o3HwqRLv~QZh$Tp` z)_EIU*v6#QQ^y2ojKT(j5{2KrBZY&PEY0-y_g4ZOaV=dz=0|Z%`sCh^eUsmPd_YJ@ zNHTI=A*``{nipdMT}uOZ&$X+Rg-2JF2kPCQC0RH;cY6R(N}4n^s_12E4tw@3)Ao|R z$LrGEDf~|Ao~03S`E20S;%F|#SXd6^Hu7&Sdu_6g_cvb0k1Sc`uBR`Srtvc6z7P== zxw$#-4VpYQZ}ZCO3U#$<%G~>gOhpLsxKbOenpuFX(_k<*p$%wPpQTG{ZuJ(df zcfCSUIVDe&!YJM7M@;Vt#9d@Rf`vS;KmyIU=1^+oC+|*%Ub<*VA_{7aLNUL-biH7( zzeNP_wVAJKVd^`_MMy9?ph!q;3$+#-gdVgEdZUaVxOLUMjmAu!%38cuPc{Z|Vn{p= zMR@+G!7H+M)K&lnKr8gaDiZtqZ;&k8Zm7IIA%3{f2gW-X{xUW9_n}Uz-$xf?G&NAm z4gG`=8Jn&B>pZNM`XN54W6W(2jEt<9jo{i@7fQ(d@Rd8*`ZW)LcFqMYGZG6lw%QbN zmo~hFnyXb|n$mWJ|2J%}y#;YAi(&@JU~4f;ZoVar&AW-S{V}KyuX!JSRq)9{l4cr^ z`tfgc)$gG8+>3nOrkCp2kw{t(Ja(xMS}rsDz3b92%hqg5L$l<8-!(vwyUN2+pVDZ) zAyEovx_)^H`xiRv&yKSm&G#^1TpSg?pA|q;k?XW0Uv4)CUPgCWoM@oN_LC=@etKRY zko@@;g+vB)>0z?<`rrS~0fp+tCF39ao(HnZ%BWR6Skk*jnB(L^lc61C<7kb@&PVO+>Q zkSQx12SIFhS*t!`rs`{iphU@+ukka1Uyh%UWW5xz1Ic%n6CyyQTP6zfKkP( zBZm32OD!RiFUB=CE=|C9RI&%8q`&4ypfQNjK&48?zXi6Y!CNd(ef|m>+A8z%>{#FW z6|L7@H9IEhvJLFn;o*3%K{l6&Q(Z-R+tX(wULx>^e@_2=WXFS*3V-4`qS|8=1Csvw zip+LNZ`Q#Z%U%5MI-)$wNUfpk8#s>^3rt!PK zY(o0L^?T5TixK!6y%n35=BCqJ1-A-TD`Y^BQ6P-&L$f@Lzm=PjoS)#r*;0N5`mZH702a7!PH-MK^DI z1DX3^)Q9wUHy+C^ADl<&gxs_Z>rep=HUS!ZoU*IAP<*+?^t)4tp_#$W=}lavmM}79 zN$?g4#L-7)l){P6;ra*psyye1(*$z)xD;GRvU-)Cmc@1_pzH?HJE-|cB17Aj?W)A+ z>e6x9Ggw$ciuS{b6Ty3ShA;0>mR|}*|Ayn5V!RXIJ2j2OSaE%b)AFrF?vnFqSXlmrN-&{eEE%CLwCO{6~ z*{*STCEUJ%cy#;yu;wfM<`0CNX&9&A-&OY!Cd72LyF^{6f0e>+o&DZZ<3hI8jgK*~S?e73N8R|GBKiw?3FAK8%EwDe@*1B@69_}xmh2&K@-bjB z<3VlHcaW-bhr&}yG|M6t#CZ<(-|YyB1qGU*+#G_fJ^7c`0POl4)H+|ghbXkHQx+C2 zg=Si+B!7;BWSr^@{i#|r&7a2npgcM|7T~=+EIVW1XLWY3TwjdNlVdjEOqp*$$>2n{ zgMz+$dwM%`$TL`#r?|bh0@I0S5|4a_s{h*>?v^Co6Uo2g^N2{56ua?Qb0Z$U&%7nthM-6 zP(0}d{dAy8&P9k@PW=m5y<||^(p~E5?GMa{_17taS7_wV>aQH$Tmpfo`SA9I-RbO1 zj#cgYJ%0LkZmGSe0#dI4q@<(mN5$EBwe(exOe}fmcdz4BJbnI})Q@3h7~rXl#;;O; z76opZWo_4&`ur`*Hc+m_7~AQAEqSNieOy2;@S-DPF|&wZ>(B28owUCdhYtsIILSgL-*nih z^|4z4a@S4_-HQ#zxg?dmVat7ubwA-uiU=lIX*w-)mJ}gweFhe>8Fc^xty_%C~&vmi%PExY6UjknFi-;RKKW>ba3j43!6A_0FO?6~=J_ z_yj!_*&47FBYAzXXjPQz11+<`{N6HTz22F+WZn{Poo_p%U&*_BpD0AdsJQ3F5mUvu zfulS7e#1S`$d68X94a0U3fRv9x?%2@+LN>61scEuRwmXEQkS<*t0LL(OB&DD*B6VB zkRgx|6H~OoenY0ZtIcs6^ZNF7@EGnl6-p_Ss7L@N1pR@9>7mZ&;O#x1%yG1!;Hry+ z#4#TQN{-wHBg8ZteGcJWGaeik>r((StR4B9kK$uzfMN^_hV%JK3*Rc2WVTZVvBa-= zr@l5RO9Qu!pOK*E=H{)0a$cjAYsi@95@k8;NE>4#dm9TBCGThMuN>Pa{ot*Rlozv; zPs(KT##`*p_s^O?Bi~M~|8_t-Kj3qp%J%~gC3IL!3g7j&<}|hmsaQMU5T-O834_aF z%PvoR9ErdGi(8_d%LoJGp(UrB_50>bD6s~JcOsX?xlpa_3-RUrhyXwA zi){%d&DdeU`a68jJUJD;8d?qrlyn(xy-E3y^j3_Y>PMiDuqgUxbh0VnMCM!${YFP~ z#!y&n3b#&WN?=BKkQxr7TVGAF>)>zH0omTDk-`t zY$3axWJX3L1_QC6o=kzN$A#c7H%w^o(?UcVhFISDoy&;qI%q9dNdRF+eEC}_fm&f0 zt^e*|oaJ64S^&@=dU2rU@AU^&BEx_B=U?~sf+wSpPC@P2Z`A`V{{MBA$^z*8X)n$# zyx&vszpX|P0(T4$@m1&v8u-`uevbgKLOyX-o`(b z=zv($XWb{9AtQL=HwfdOa+u_w4&sg$^m0$+{$Tj!hUa}B`pNNe#{OJg?-pbp7Mc{j z4d|@T;6*c(e-)t@-2Od*ZF{9F@N7FTO~$w#z-;dAq$S>k6AL7MeI+fLB@tQ)kPn~% zp0^fI^eYDM>VFrFDC47fyyX4p^)vAH@=&!6a&JWka~Ad38%{EKdqoUHCwx56rA|~3 zT+XjgH>3I8?0@b)=_6pr>G1{B0uZXw0k@s9nOXj;j>Nl(0#!ex-&QTc^XJ*i%kO@U zM?|0x#L&kPobj6IH~*zvl3<{rX@EZy>=h=!gTqaRj)NlsAS7ZOpuJ#PTU*bTJ75?G z=jM_oy1~Ykm@A`Mk~>(Ez*ypvEpKdy1CHLPmM6WD9mxqQhqa4@_qX|@8~(Zri(-uE z!ff59Yl7@r8=!qoZ!Qmq;h)-)gboORJTX=y;=jc0gS3wq%q!X0?-^8DZY=ieQh^5| z20+dHfsqyNJdxb}XQ-ZjEMvkJYs1X!5Eg^1Mmspr?c{;3t^eg9QuZ%_Bwa-0Hbeo@ z`?z}Zo1w!dKM_vy-icBE`&$pNgcZ+xuzhZYY`?U#Jqd5ZFj3a;1??Z<4T0VJ{f@XJ zz+kH*1*A=vpUM&QU!t%Y=p!%%Du@1@X6maUOv(hXp3rgCl&SwUf9hXD={~(=w+0_l z>gNM|3G^1`pJM*}fcf(ZOL`|FLOl^MC*}_#GrIm&Y4l5225p#jB+`S1EI9PfmQO;> zVKDK>c>8-6+erCP-!bY`vz#>pC%k`hcnT8M|CL}Yqxaw6V}=E0iiw%&M85LhFZ!+@ zOdFs57dLD^seLexnda`8(fwkbSRfq1;jkn9GAOYkpB3 zN<=w4ZOPr~pZ8I}w){}@eA5@8Mt*m4;W zk2qudgWf?IZD-XhM$J!c1dg#y6=;y<@~ayu37MmjZQqTqvZI$t#2N=f(9_Wm%P8>w zA|*nhAH9OMx)?6X3n$TIea;u4DRXjik^8>08~+arc9{@*n1X!Ee1;;}^+HbL>SV*g zz_-SictJ?6=PaWUs18u zH?>c3#lo2jFzT|6@jkLZkPE^?iT?9SXv48y_7F zmUZ(iFZmkDf^h+1QB1LilTi_e28#cB+i35f8hwyh{uXeqvw&A=l69s8GUJcC7w<@*q@%=ddc5(SQ^-3>W?IQGzyX+~7(#l_|54?n1_=Bf?uwS&Id$RxY(otY z${nVnMQMv(-Sz9ooYhlAgNQHtUcdWWe0}Swj_$Oxu|aiF3~s^oHmK)+YBVE4s(m6Q z3t}oPuGy<+9L4sFOH>0>L}mf z|M|aT`Q`p7JDs-ee5+cM8qw(2Mu}Gk(6bIc?JXYRW^+HHPQp%%PnAlO@rddP#Py#H zgwmV$Bg}vI7O?bELvz`FQt3zXGg_FvBWYB*=aT7eBtAt-6$nY6hr2QRNgsQWK^Jpk zxhVy>m4@4N#Mh|r$@rR0dlCFFNhMPk&j6R(OB5>evFvJta}eEI2F))Az4Eb$uM|_i zxZl|dE&vR&$6@Owe*bum?v^J_zBr85@*m2quht9K<5kLq=4;ZabN`TjcRhq*m8Os#WDLSq0&=h*tV9R~P?h;k3o`g3&o+ zBH|CnnJhidFB1djSqx5ecux++d1epvcfNX;%5h3Q7|iVeQCpNGu?8GFqx7unQm z?0v6xK|1zyP*(8&hkcn&&EnGG+~~qAXTss$^!)ldXB_AWc?8EDCE4BWY}&|HqUz_b zfE0WTQ1d$#;^D^nRM4f?h8n4FabMD8XM ziX3;&)En*3nXX;)UU@&tsWli1S}ayWZ}g>hkRR2*B)C&v z+!)-Iu)oemvr7=W4 zo9f}!FvCAs?<$nU$)wmKr2UG5`>ysEX^MbL%E{*GxC@>+7a3s9B=OTYzW9&e&WE;t zByky=L9~2d*Y;3}4X58MZawEsO&xtxQH&EM7yIi_EuExwSIT_ceV18Mda3m^0-qyE zjNolgk|rELf0j_-_VR3APu;K(#d|0Hzy}Eu+-{4nwc)ebW{qTVdfRBlj6zbA{0%Z0 z%4LX?l(QxF`d48vr(PwouIIxVa-I zlfs)bQ0unN>(}F8y0Uqqaj(KT$hT`K_pNgj`Ns1mi`{AZa?Pi26H!n$jZSe+l`EOqe_A`N_7;YI-?ja}WGas`gYCd)!LeUrJ%7GP^%?YWql&r1rFQgOgNkRsea1W-VNVx%+Z>f{NOh|<2_7Xz1zXE(F6;L zdcuGdo>ir*ilE}(6(oCfu$lPO`5oxv#in`wdZ7qv|6$j0J@gS3wDmeoPT=OGcyND? zBQ8_?-dlY7enqBZKIB=nPRhO5CyJF z;g^So@_438@sOcu-1B#10oLJh(}4>#-a{k|OOJ=j+wb18?q@R}d68U=ymf+r2~Tof zd>+X?zbD=zMSgUBdtvQ8chgz2F~+mE&`&(^qh+WiImu(L!69vacGsH6ZaKn1h6vC5 zB0Ri5wZyjn#2Xyz;hr}<*lIoZc06%ap1O?5lhvuU`Thsm56*kT+r_!M!9vP1txNW` zBFNaUd8!|f{b$GX6*%}mB+}rT_-755?@kkW!G%p=msrIg-! zFmYlg;5vv-vLP_{TBr@NvkX67e{3b8;p1$jJvBwE)KQ#a-n}6dI@@y9e&2{(Kki|B z8Ldctaem9?bmGOje;T;{0Ms7swLXKz0EcF-knKwT;5>g?YG)$$Wic7>ZB}3UGGANB z6osnK0VqMdI(n>m^x2m8|tc>R#yroJrL)MLsIQpW#vzR-4}30CL962HB`&RXOeGYVwp{waepQya&Nji` z-Ih7A;PlLRK%whxtH{Bgn{NwU&G#!<<~F;OXc}d)XF@`gv!(ZwxJ){x=Rl^^qX_=1 zBW`MMznd82smpM;f!Sa4xl^v7mP4;$M4acZVVcfaPTMTo9)+c0ICa1rg>hAt#ky_! zbm|P)G#;P!XZJ;KYvu~xw#Zx<#Re5YLYzq9W zG!A!Yex_8~lv`7^c$YnE^8RjCgC*N*lPvRBL4kR$_np@Duid~DPyFZshM`C6fM&rH zU254Fx*Tyo7@fMrPVG1zFTs>fV%>c0tmIczIL^@KvpKmRvX*;49|@4`UQ-Cm`rLik zEJPsn0YL*d;cle8r8Gkh`sv(_jt3$ZW(n;a+-Kl@M9|E&6N8UNUhZYZ+o+yp3*Lxj zJ__4&SkRus!Zh{0Y|(}8RLA)q`&ts5LXqECPDsCg%NV$Jpa0@eU9-9pw`3`$>8rqQ zD?Lw39c?bMf67_wJQT>I=|S*DNs>A~V|R8_e?0!uG*JY$PG|5&=jXC<-_45vNM^E!C)>+}XXkF}#h<7&~uQmSb?Q<>5I`;V>{?rl3gMcr$Q z%Qkvul)l!~I|k)x=d9SPSW81erg%aJ_^%C%*agDo$NaMb2Gl95SQ?8rVI)Ndi(F0K z)dxXL)l%Y;;*u5GCvOaupI$XNw%5==!SZlh)ITYd@Y)Mxr~^SRI>d{1a9&WI5HR51 zxoI<>2t9u0r+ffQexg3pLLApg)?WfewOGCL?LC}gKg8AC({wgjY2=)RL0oE5(o*em znjkLBu1K6pyyI=Qi)j4xBzS#m zK1C0eM+7yk(~-WPgUFA%e-$R}g{+R@>fh<%*Wk8G*8?uKmiG1EymU&BB9-3k^0h*y z@kOJoCdlmK4lc&j*ZI7uwMQLd$o{K|YLpv8%2YR{cMmC_$sm%NX*OX_sLry!0u;JE z&=bxl!ki+qklm9vEZ>au_~&;+;uvPuk-ZX0x*VXvZ$T3rnWRqPXCc;e71p6cvoZcs&a~ zDJ@|rV4^|3vGB1Cm`HJus|C;d?9;BT@VN}SaWS%F@30zJo;wzni0u=Y@j!}jC*vxFO^sGFEY4T67X7Lk-JU{GFn3$?$f&6=x~$l9<>$of zU9ff#Yqzmfm4$Fc*6!EhfV5VuJRAITdY8`3Of;y-$)Vj}5$_=LXj6QeW;vuZrU#nF z$ghTWSJPzjzd<6ELUz6U-HF>g`1F!%FU}$TFN&sF(cr!Z^H<4?U3H5fOTuk~LxLb-|yYe<@vCvpN zt=M_lbj+?>&a!Ei5UZnN%|1Bw0VI6G$UD`M`2E&9Jy$NRZjum3?{;p^e7JLSh2Zy&} zhhxQvvPMAP{D55bQ1>RHW>W?^Z&;`?CB@`c9(gE?ZH>PfJJp$C&UwYK!z%p~>2aga zM21zmaNx7L_T+xIvjN+y$#LhS?EPDeod?6II7G7<6;{J@kvo?DQ&OL8WbTV5Bp~x9 zg%kud?1kf22CcEhmP6s^cjdpnuh$iG0Q9@hbW9Ocd+f(P{{%o6!ZNLbU=vl2B1k2_ z2ozN)r$4sAE~8?lVg0SX0z)XVdmT?9L6^?k@TlkkKi1fw8CveWacNQa@IB-7`}S$n zMh)fX)?2byd9 z3I%d4!SA3)OX#8c>qI^-c}oR|i=%iZ#Esn{Ex40+{nI&zDahTq&(Mu8uuO}g!}_ER z$N=_OicBIwS*3nuxF?#*cO3B0E)z*)`myN@6k;o;@EkS_%W#x@n}zqtB$8AvB&4Vz zVOAFE-L!hb!P?T(yJ6pMtX{6Mvhz)~o79xvvZ9})<@19@1-#zHwCXFY&;)m6!90)=p?(!8l@H^w=(k$DpN17tiLu`yNr`?;j zph|W&wrik)ZI;GNmSomrggZ|lgr<&Zo+Q*rp4`PDnq1@v(Dq}t%t}a3XxLE{9JqZF zb7Ex#7ptS#c<$sy4fjNe*T zeGr{0Rn_?%ydE+l?6duNp_t)DeUw4M=c9!$SgsJhwZk-M>3sctI<|5%{rtgr(`S?X zLTUHm-H&B)AsZ&g&HZ;AV5Ks&$#O{G| zY8B$XX)SqthzaDPmKLl6RJ8&b6B+Xp@m*BHsAwlOSuF~3y6Tlj=5@H^+G?D+Q7RP!MGPM zUyHMmLpB76WMQ5^(278#r4`XSE@u@M#nHWQcSaG!o&wb&jxna@R~UU9B32b)wzQgts?-xN%83;=U#!AqItW zt<)|=PU?0V?bz#MQDtuP-1SVE3dncKJ-lx_+qi-KFi|wdwSPj(={CrL zK5n5e&~VaPXCK|$SM^S#f#UMyoy62Aa&6NwQ^ztzH&qg8llW?FXk}!qD&#Fb6&phF339(_v9;PV@8Vy zlZtO^ILRriXlwJTwtZ0{>m<9!p>y*CsfXN2JGnB~G#y6yuLKLNfyo3_=b{Q@IZ%e) zN#pne<8E8voaKUARGo`W=w+M|;^&r)WkKb3oAWCOcXf~xrQv}bqaH2anoHvi;=&8K z4^(8`eXiPNOiIwH0;<4ZN(j-vuYr|yKefSND2ZVHEW4mt7WDZJ`*m>;}FW?2; za3vkej`22DY1|_B+fs~nr)RlxuKK-u?@lXt!xJN$a1+HLi!8BA_dn|Y)�BvlIRD z69)3NC0ksMggkLtIH-|~mNqW5Fu{Y48&aMr%BzQ7%8l7?)>M!O3B10k5@L73yy8Q; zBZKT0#J)Z-pZD$ii#LHTNgiM5snS2f_;>l1vycCTAV2*hzde1Qj6lbP(HWpEjCF2W z2}kzg@vlumrMkP`O~wz78cq$bJYKA$)?PWre`x009s1u0=HT`#;uqHkB>IFi1NYdS zIcXhenU8_iCw0J=VfK^vbCbl3o|fbt13RXih~-xkyu5nl5ByQ13Y%oMdKfI_;2W`v zD2{y*Kg9T^n=~eW^)kJ;)2DClJ!KLDAMP6a?S!#>YdDSRqXm*0exW_*|C#!8NEo2@ zm}sMmGcUC4n=3jp3?-*mx|=BN40^KRpYu{ZP9N@2wBLSmRt3iWb$93go)`gU@qwdX za9%)jQj9|+Az{iMPg;`VytdZ(NvFjpHxd_OW;;CA%Ice7lE=#hs^%RcbgoievTmF_ z{adAhAmX(^KXx|WcCO&tYa@Q`xcpESni~J5e%;M7sXe*}Ol(0u*I)btMt*F2$7=vr zoi6ua`)8aVj$%Lv0JwY0(Z>aTg|APTPl#B3625D3GCWiMKe2-tZTCWKQqqD|N#WvJR3B5J7b z^JR1XLu^7Ey{>0}J!hKfgzzon!zXmhBp%&C*=Wh%;OEETYmWmHjJuyBJ$3 za3&vqLN?@k>LHp>&b?l!-!Y?X*yD(}?AsCceY@2~=4)cGAEU++XwpVYJS8mZ20IP2 z)TkYlr#XQeEnj*ee%%-=M8n|>p0RRRsgtR2CrCIz=?|notbzS&rEB5pWJ71|owjjT zDEg5VFyENUKIPy0@|9lOROyy>lJh-lP2#(f9ioBOG)!@I6#a8YmuLcfU6PHNDZa7^ zg}zTuh#F6r$F$QwruuHSGAZ?I*XLiTLF4_kzkM4-yNx<4m&pBGQ$rrpS< zsX0V*=u@$plXhg0Wt~|PX{-LUima%bVR@G!%$!goQ|{hS*4PaCbA)OS=#NJw#qWvZ z;&V2i-EivSmG|5!xRO(q-cX_)IN-z?)kpLyC=Vo2~DH5z}08%s2F)Pv$?f0Xv4VAjyB#KQ5eJw@+_ zc}#RgY$QD$J(dX>`Aaml95*hJ3K)<4cL$n#!|)h4{!7+mM%>)7LxpPJKDM2IdxLuj zUC_iGpL5Nul%rTk=nz}6jOK5I%kM;gJg)0TN|%tP>#p#l{X?r0)dtg}JGSe{n$DZG!wr6}k5Q(n(#|r#Vr*#g%Hi>@xlf(Ea(^NEidu{@2VM+yn*;P}A9>U*h*m(|~NrdtjID!SB_I z|FPP??hFKi?48K6SAQ=3rD)NI+35av5r4M#|C=XqVYNQY2T%!>FcOiV7}11Ivdn&X zmptIBW~Rpfn7+zwYB>R$5rBPtuPsu3rMxOOJ$-dDZkn()3dR*37{Aw|v8kyiYa`y< z0`^lNj7C#;P+(|qFrDk!lP7dqT3Ve0O!9}Q={9V#)WT@|LT;5j|0O3L=WiC=79Oi~ zHnp0Pl8aK4W&ZX#e z-P2D2-*P^yIO5rXY!&0(~5 zt6$3v>%5m-=xJAzDYI=+-DP)M=gCA|jEArr>mQ>JST0F+{zI`TEJUE*H|!vslgFeB z>>2XuUTh|yRnMQ-I4S2Q<^c6Ec`|vd?T3&5^($;2cw#KyvK4#S=|hX%2xwq9^|mi* z4TV5_1(UbXD1G}$+laipU0;0MqLi%5!Pw0PKR6+;$>n#-^%B>e9sYHmRCWBz)~{bL=BVXN?i zD^zZ?C13k%*a2Syq(>g2Z%WhAFwsq%4CSV#VXENyyOCi#Cx8RU-0HK|tW~>MO#S8< zf0t;=>_0GtjR9E9T{U#Vq4@|klVa)z^s!u(%`*Sk+vgI%6$8fL1WsyAe%J^VoaW&h zvw!!TI+;qHii;uY(6uzRtb@;=SEXh*{DYn5UjqBv;ISZb=!r%%*qO?L`bk7;{IAgB zV7+YY2QcZ~$=Vl>IM-c|ssXj7!6$1V|H!K(7jUe#tO3WKKca>ljBqW`=a$+(7i<`3 zJ#4a3J`Bo;^H&>j2MeC60wwbV$aP{#6X($V)LM$^CnF$NU*FfrYF!iRGLQ<1>#NQb z#05)TG;xa}KED!ng`QdG-nVnYx8Ugs;_;o6Fe;ifNU_KEi3>Mx zI+?nh)pLdlw;Rjf@`gV7bJDS@!15~|f*b8#_gL)YyNor;My0P_=MSORS1Al8bA}cQ zu&K>-yl$=V+*BO_r_3hKM}OwYQLp5r_JsYJm>PdNFbvHi?+vXapz)=Uj_iKY8O{8A z9NB^SCk+eCG=%L{S_Dr>oTX>x?23&!3LKDH1Jh}hbisT_#`C)~Ze!2}<0_nukRrj= z^FLqggcz8jFb*$DJoJhytE`u8rV@$Ul<+yBpfy4Ip8osyJiUb*d6Y`pX+ z-@H8k1V+_@^r4&yxcCOw1=pq>Q*j8~r;8jOV7oS=QPJg{r(jD%mulj^Yxqajvci82n7CE!Dc_=4 zB&P9hY6WjamEZ~OIQA85{=}{?zUX_$evB+c6Ty65JKmx=M|;eFqkV(hvzd;Xg-u?4>QEOYU0V%6X&)cE zU|CU+K9F4~^D=8;G>jHO-f|tQ3JALRES~Z2#&QAkJCZ?WT!e#BuW6rcnuNmUh&%tfWT{QNTF=V7g1pZ%399Y9nW)=Lwm z>{?%;Idl5JLE7Ou#kVM4j@{pW=x55Ib15Ds7P2m&h{J+rwZ5xU=MZ8kNu9_3Nyc;( zt4%Uev+$0CyvW2!50{&3w}p+~Rlxda4s58}4xM*ZVs0hUS7Lpl!rjhb!}nK|{$oX9 z{f9pVvCWhwszx%>5{6#TWm`C@;o*A+#to%rQ{Ioew^!ZD| zshq1Nl7bi^=BZjtZP`w>dvD{>x<5WwXHEbaTWfQQyQ4>2sFPjyr55jZ{>ImUEowM; zu89k|{dIvwN!Vgm=+F7?ANyd^^w#SeM;hnl?*H*Ll>ex!OkE0h! z%3rhkLc%#X2>hJ$kEhpiKA+$ZeZ^Q7gk)eW>nT^yPe1x|wE29s7iaBq9=^S{lG@1# z=?2tg6TfG59zvlVEWhI0kHyok-5cc z#oA=`n9V%b?>p51nfEwFclqIku`{~T;WTkStosI7jnT1kNPQHMx8}P4*Ba03^Sgyp z<(Dkw0luLA+4J`Y0(Xc3Z=%BExmpHZX~@i}yV z7m|q|w9|gJnx1~)z1roK{-*K!B!M+I`Ow!HM@6M z1-tetPo;0}Nt_cr-^J}v8J7V;35r`*u1~F*Q}#BO1~J=2<%!4U!Zz<}5-tH>C&HfE zVUDRxDlvt;zQ#WPFZhTxqOPNzzx`_NouL|d*`Kf|AO{Yr-`|};-ST5Sp z7DdT%zo?TY=la?`F9bKnx!U!4>r_raHSb23SF7Yvs>NMHmfVgQR9}L#A|&}4*6+5V zAq#y&bdr>iBWgOR!fnkz<@;9kMh%m%_k$sG@~-n;ef_pe|D2r}V8tG>&0fDVpi1dF zkMV}ve3)Ixy}y4QvG0=l078Y5#x{O$N~^G<%vB?@?@x52ljRY6y@gY^4s+rurMj*! z)|;(K^XuPQgJX^k18FzYPS;UF+|d=9>YIm#D=)MD5fn9-!}CRrE-n@iO)p851x>IZ zK+%_iCoTmA#H*yP63F&Q06KoO%Cm7r5J|3qb(A|pKo3!v4tTE=8=T|aSP*r)(A+qZ zLsW3w_|DMNUP*@$CBM1h*A-4}PC^tok|H!`X^`OT*3dP^`9QtC+RDeeTCLs=>{;P* zvDm~!3@ps|a@S1Ob#irn_&ZaE->M(CivI`L%O9!AUn=;RW1V6y`t^P1fpMOXZ;qoR zb!8&*AWJpH5LN>%mEe%a4$81ZKUHKhf^0TpD{P&c5V(POF^DT?%np7(S40(x{SvlU zF#JIz89c0&+q-WiQHQ6&{Ac~h`IDkaRx@k>sa>ofQ@pucOCmwNH<&!N+M}{fGbhk# znTRB}Q7f!`x7<)uypQupKvnraVYAvr;GiDgIt%RQwe;qBpr=CD?JPCw+O0m`cc4+< zK@ubDX}GBriL_Y_^fl%?{Scf~d`OyqeXjzCVAn^wuU!;~ zt|!W^=p>QHw20>@jt{dR<~=T>UAlU+Z{@9ubkMpjT%#*4fGR;BIe0 zWrnrTn@#$7?KcC&ttir>+~><<$@|s1kCmt8FGSIk999~b`x0}alI7)tx>r~4`~^!? zVY&d6w|&gsTfcxL_2N$j_odD$$;b?Qi7NP{Hf1Y)pQKu$pN0)p4r0^0ZQCjmN0Njx zPhGIqi=TJk3$cLoiaDx8@8vyl<7V^m_2-`CWU5DVsNB^bx=BOUZUyT{7E( z7*kwrN=lt;vs6r^EQ&=l64;#;1$-qb_;XA}(1fqU<`^c^P8_m4KEOn?_%x=6Dy)T$ z+FL{gV&P;nFH^&zt3Y`tD~UQHXjbRq$o~9-D2Hew>q40eMoVXt?3KO@%UEOg{A=0O ztPYYi{gr8cYD1`zzEX$qlfoZD6C5(Sf?f8L%!OaQN4`Pee$cZYLK}b;+uX7 zX$VR7jmq#^Ai(A;s;uq4T7tSA9pLyh_wNXu`6Oq5`$K2Hrc}F536`?*2zU2z5!BGt zn@=|(b5Hd)`ln*dXq)NlUQNTcwGaiZKK3O$mN|r=#w#O|{BbLnjxRqPtihL1S*Rxa zHhe0`9rtn8+BDLSQ`Hsa&mzB0k%6LIfgkrj59!yGBLNYSn3R(qJJD+>!KPTkb;$DSC(idd>XzUAt&49os3=%yen6`GSW1X5EnpL-Ab1# zHJuJv2`W^KB-Xu5ll3gEi2mp3Zys2idy%hnRVgT|0mrV@Y)}DhM({OzG8x)H`j)oQ zSMyrZ{hJu~gVa)IBVWar!CukBAe0Sqm9P_^+W7>e8du9~-<7NyD6&a`ipQwz#$wpk z6ie;C>s@7F=J=)G?~6msqXxs8s~xoXZhLq@UX^xI;4TBni~=ZWgjkxn=6@O>%duR}*Ck?x>~X11&s#C(C z>2}FK?v08jg((}sWScT=J0 zHq+m|09aC5XftJcGg1F-rdIMUhTx-i+cO(++*D=PI*)q}txaiQB=}?k;a-cZsQn8L z>BC*-qf0%xZ!S>*hhlA%8w8M^CS`Kw2rfuXhi;3BUY)i7%RHHIA*26H@i{t(X};5I^3o(*kU3+p3v>Up4a6g2cGN1)W0J_Quq^n^E_B1^LODk z!B9f|meAEtU)=8w9nH#tUjoe~+xWiVW^5*&CT2`PNJodzcH@|#AUVqwpicOmIBQkv z$RWk5wT{@lN8j&_xKMNZmi=pw`46a9IzwsO-EV1`*D!k3@qMBty%gH>Km}BNRUa0B zMIeRQ5c_-p*kw?KD&k&NPtDjQx>^n~x(@N2Jn>NI=(QaNs#Vf>gJqfnMsu&NhYQCW zH52I!Lb6dkOx8)ua9%m2C=RZ>vasVwe;0>bmiu96ZZc|H)dnWEJ|efdDp^~t@&NvP z%myCZ{t`N4&I0-ktd;4He4{M1S(PqMGl+nVJ+Uag%+G%aGmKZ7%;R zkg6~~fgP^(iZ8%uaQr z1zQ}m*YV$R169}?c7W7~zMW58`7k{ggCd_pq%3r&oJVmb880w9J4>xY5w|;!j3*nj zLtLL0>y^snqzLw6=QD<^7&*io$dWoQQ)S@{Jb2fn;`YUkU0CqGJggNOq`-f#<#9`U z-2zgNm1Z&Yv2lTkZy(2lx1HaoOK7T`^}Dz`xBlx=IAWVJ=DRjZ+4_PP#B+!wCBtap zz5@z4PYogGTqa_=g7u7WUS*jnY=4+qw9+n=ytMkC!qJ&Y^G(}V-0U-%2u^>C=*3Q1 z_wloGHhzV=6Ospx4=+D~g}Ox(Bs%p=f;Xt|y1AE&KE>oRonzIrJ96PA>M)DpWnr-|vspoZW#!&=L6h|KQjR{H7tT@JHq-=GpC!f!4q6mwxYRYm+xuCDLu z7*qw|*bNN@<%b}~(ndRIyTNj*$)Q7>;wEArx}B3lCG7_DY`(OhJyQXbvsn+A9O60J z!lp&Wz0KYt_0-Nesf<=R@WmB{j5ZlLoAW~yoLBAgo?FIM!9ZzqLn9*~&axIZu1jFL zjfYwA)x@Oie;1M*r#tpo5cR)p@Yft5@a(Uq0?l zT?@?7PL>U)cZ0o}2r24TGZA8v1)E@)d=X7O$&xTm7x{<%3g_hI?#t&Zy+H2rH8eP3se<2;x zBEKyd{7*W&qY8+AW|9@Arq@OZ6?^y1MubbY{_aTTAq{ zrDWxcJw;+4*;Qhczr^gjrMoxjE)BBZ(~!B@<&yhYt(^|m=sPt&0(VU{MI3gPuSk96 z+5)CL(zos$etD)I9X;=V#D~YfU($RacPmqWzbAffAw$l7Z@)&Wx_b>5w3asU8aXXFXw%;T&v{AesF;0)EeQVG?oIJ(9O_f6InR!mah3Klhk^DTA zdZr#DsQO1q)bMcS)F>*rZ;XUgNBt!TWBEJcsPt zF?UvNf}G~$ksYIwAXNSP&;A?F?zK(PSs{~szbb}@xfKSa6PQ2$C22*>j&UUTMPYZ_ZSW-wg0k zE!rB7nyQlFue(Z?R_!TxSe{>n`}YIH{TXnIE)uraw^^kzb@9Sn%&s66bFA_z#@uHPKkU7pe+UL(}(`N}_Xp;DTHXg!NM_a>h>iRtXC*p5NW zVJFhB!e0=Kc}kOU4*N4ML-axUb+d!c%66N!9c_`gBaFD{K;$xWNy~FNH7k9 z3d!^0;KAck`=)w7Y)q>xS&)-6{4{u%6#F|1nH%@;J;Tg*&W+v={_-~1jsgIm)Ayl` zZ>`&-pj&si>TIUk;(cbHO&{CG)nI&uElZ-~HEHQB5L}f_xA(UEZchggX$#Xj0CQL5 zfPa69ue&v9uluOA`}UEJ6k8g->aC_Wd4mg_ktf8@hTA;q@e)}xlv*5c_#X%UUs!)o z$>9a3d8R@&qW+rgK9OBdmEt$z1JB>PA?2{khXYNlX^3aoeHo;Zy0~8f&1u_QVXnUzG%b; zw;)CuW}NHZwpU8_ANgc2j`A$lJuNrpF<8#TdkOp${2x9}TKR2$_GzUDl`J>5Q_wT3 zvc>4!3HP$T*J+d&^9OTAh_&O2htYVc(aaJr2@@BdW7a za3?3<;$?y425D4p^bK!D`Gt|vDgI2|H%=rwpWesx%w7F*;5ZOuVC!@O&o90XD)E^^z;R|rZ!Q>x1Oh=vMNrEGap?&w-V#CcsQikd(d$hx6N zIF9Aj&bI`@eB3KdFxwiBVihjIT(z&N=@5~Ds?OT0Hu^u3QwM`{)Q|8#-KFk+jSsSR@y@f5qub z5te`rOhXcT=f}-@qV3)C)Y_fmSIXHnyTfVsB4ry1dJ4tgI7WRFM^QWrju~rdpu80% zVlUciq%wSQ#x)hcQk-inYHEqtrX)y#EL*QPij_9(zT*}v({D}=;30)%Gc_Cu_KST0 zTf3Qaqolo|pp~QBBvM&9qi*>TGbTM#r4sC09K?7iUD@l^>ZAw!)bpSc_EbKQQSQrR z2e6+u+>O2B%{}0Is~83`Z|mn*QZjYYn2|68_9UNX}2W zYScs0<4&iFgEH5+u9J?Z#>E{^g9NUYjJR5!jdx(J9O+6dk|;h2iR>f}f2V@KXxt7! z)jMSReJizR;4ZwIs%q-BtcY+^Lf?rrE=d$MG1n{GqAyZ^zte`k|S`bkk8@h{FmMKh~QL|=hyZx|b(jj}~e(=cMi*B{OFn4f8k#x&* zy;E{22yqirHl9=Jv}TpMFiL8r)BN`F(>Q^3Oa*sEFmXWrD#+0TCS5c=D5U99Jd$su zX;Q2(e$=TqRmSlB^;H!k=h2cxpdQr!d)x6fz&c#^p7oDy4gb659QkZL9A~unz z6E0DoEqZsw`gBnFFqbna3Z4x@wP$|m+3Q}ZaMX#2*H<~S*zFX2)Nm0ToPDFXeZQ!+ z;lneaT0jfNo@No^=b>Ek4^N<0Lk|=Wx!vviKge|er>Q+T;0|IVv;O;wFMs~^#1eqg zH4*ZEo$4>mJ9ZYJIU-2lPJs+uK z_&WP$x`-*}av3G`98RaI@7CqFWeFnIz`wdz!U;k+F49m!>>K{`I*C2T$3E`5>~gx` zPu;g2?Xxy4_tdBbEPqqI`9HOA&Byr6b2O_ZQ303 z+x2ecNdznta_gff`2f3xe%3QZ7SlV&6bGdTI_`Xs%>OTjSzt(7jB zsZw}kgX5gNUvR(9!0o`U$Uh^+Y669T9Rd#|*ugZo81hZ>&X&PS?iPA=qfkMSO58wI z!zOUe++hm!kv&F9oVd+P5-GlaLP@0tfpF=oTF6VE7sFt#9Fq#Z3UrDqKARwHDySND zJb4s5MPNyh#~6WLtk&$$n^@u z5&gAV9xuIga~df)vzKh{zx{#>%CB}zY6hI=#v|3udxWS6! z_hLcEW7x?!Tcdm7U2l0%yuKy@g9|bD^AVAE%?P#%_cw%p9k8Eh+NCYV{om;fc~fOA zX`_5QxwYv^OR^>Wnn~KN-Ck?0D5EqTsDC9a;G|GU^-uIfT645_&xOwbYEE7sY8%2tj|G5 z+}Z{cD+0%~*?Yb^ewMr-Vq9pN$ezZRiN?RcuB_~?N`A%o;vlNXx{%hoqZ{>0SKdwm zU4NX#j-HZZgH~Ls7nVb_dW%E%9=3p)?Y_TjGi-&=Uj9DMo~=7J7*>IC3UJFrix`V` zcP^mlx5bXW*DDN7Dz=y}&^`S(fy}0OU_2WnCZJv^MHfN$Q!|2>m} zO~B#O&h;eKobH4;B5&;|_i0@)JkxJges?y$v`1+x-&c0U6r&pQV20$0_OXUGKNbpi zw%bdVns3VkCk>0n=)cE|+r7L#z4}B@awbI+eQxt!g7F}A_jI67!s4iBP`PK`21l3W z(Bziiq6A?mxW}~6w)FzTU1w)MJ>M}7u*rL)}tBjz1zg_!r z$UEdyMMo0!7hRNNh>kqyDst607{%@!^(q)Z?majPsCCo0P+!ma^E1&>=H4&SZv&II zYGL72D1FNG9^?kVbnHy?{!;|+aX}3ddZ7Rh1RR|&O()NSgSUNv)+5uOOQ7{=Evc-@ z)f6vV()iKA-H)R4@k~ZibX3x%`h2qMEkd?mbp*VJ-N$S0#(U#Nu40=iOM&-nd9y*& zb0738V45vt?#d&a^O zcP9KEk8(4!`xDTzjlCb?JS4A9ZYqZg@bpXtRm%Vlwt zl?D4gnKN(t3Rm;U`_8B9{Vx;3)=mSbd;YE~p_c>|? z^Dp)iX+%0?K2pbPND1|Bt+6{oYm9(MA?Gnu`eJu={X{5(I4@4_MibewK(W5^bl*lj z&fHR6+G+4+!5cTZC{8iY3}f+62~3$O&0Q?qTP-WxU0gj+A1ZZ^2VJ#Xco3>Tw3@j< zXLMrYFMRtH2H>*`MYg0EVsMP=gFRBlxroCY6-|36k2&jH)%}o>sRvP1q@nEI>P){S zt$Z6;YGnyXui9#2$WgKc&Vkgh~yZJN*dqMq05v-)q%-sO{U^fIY=9dWUDok4TE# zC@_xj5*8OyY8A36lcBPRLu|fwbRs|AL^?{!Nqb~QVUx|kr< zx1kS7=0cZRS0^=GT99#$W~x`>Yf|ji)DNfE)`^{hds=ZUpPMuEx27eYtx&I?y`@qc>xYS2bn_YxGnj_H^_Ow8@5M) zC<`6{6z>cnN>(;EdSZnRY}#hT+IXtN>RqJre*HQa>921B2REu%`ckPOZ@sf+s$JT~ znuG)pg~a#z0Us!nHA6mvGx*tuIIqk2-O)+&3M&=@tf-iJ(h=yx zgEjuA%y{-T5Tqm+2WF`c>MOp_S?r)-@$2lYyFqV9K&9KscX|=_#{Nd`)|56p!k5sA z_!y|{?!-W0PJsJ_W(?_fCzr4hn9Fh9X-(;-A0FX#;6c%(LfYNRl}F``sozW)OJIx{ zL(r+7W={v`sKQm1fN<;(mzm-czoWw8pv!}RahQ{LmB~!Li0o6c;C}G>o_5E%IAI+fK zsK;06WP1!<*MGf0+K)<+zaIh17`{@IX@|n#aH_(E~(o@BD|!qj^N> zXTDtEJO)^NaLy=}CM@n1n|mIqleng_LI5g;{6}!B0ndQG1sX&hl&ekBq*h)sY^aCN zek^LlonSeHxkOmKTy?-?k!39>;N~^IiLfz9tTbe7pEtJkr9#u z@xZQ5G;nLqVFObEwjIe-z41&UnW3jpk%doxla9&IXvi|a*8ff@bZ^p@vw1)*QQzP} zUP1!CgtJb35P_>N~uVWR;k3SX%HYb+bTFV3NMX(KQ}2H z#dLbooFri?d7X(VEYm^x2JXzRS*(b-OfSywqcQpE%NUhs!61K^`{CE8-DsRRT@Djq zs}skYNr4QO*EjxBk)JXSjBtU`nhu|f2;Zk}aF$oGbz|R{@!Z$Y$9+$!nD++L?$DeV z$Jq=WHibQaNMY~L9Ny!w25Xb_PE`+L&O7&fAkZdUSYJpKZ3uhPEDx*J)GV)#$C;W1 z1lW+5Lw#EWgIY$_w}YX?wBc-PV&OAIkvn5YwC(l%21US3OqGqkB^rHDE;l2erP zizD9OuIKs20r|0k^VFlSaUi&1PHSm}S@K^_>ABL@#X(}A_CEY^GPn0s_5Ah;?pLGc z)T>m_u|*GE@T4Q$n-9Yo>?^b_u#C9iYfa=pe-rZ@fT6B%2V>i*%RmWG%$Uk&f?PTp zbk82zA(AML;c-O;f9qSmJeA1>Onhina_tgKBKB{Xc<_pa22xEQybAM*@_%WV+t!kF zUp;hwe#T}G-(6l`>jAXYO7Bh*u^vyc=!5K23&kU^WCe~Z-)*gXsgG48ZhI$%TR)gQ z7j&`uPE4l=z$$0auJU{hY3@or{V^F`RKoKh!~_}TTm5JT`%HOAd>k`4@}ELw)~iSq z024-bjsq#dbG<8>f_?+q%@UgSh;I^zS7EWn72!6%O(!c0CZf#A3vIB~V3we~pq`3Q z+dmO5+jA`-uX4$}xVAoDVarJojQ8&uzvNWAU(h#LWB&dK zX{(YWRFP^XL;Y-x=u=tKnr0^TN_ z;qKtSS?}iRkhU!_U86$ku7|Zc+-(6*FSSpCPGh;=H|A=2nuO6eW{@uJ?g6>wp_-X- z1@^a$kInQTpf9KkuOF*x&tas-KR;I5kg^oEr?Ip3RzTUW&&?|d7EIP~O}DfDZ+F}> z!v~cJ(R{fzHD5GZY}#%<5+P>;)D)&-dhfoUpUz;tL2#??pwFw0ufE>!KJ99Th)Kyz zy4OmQYa9yP1^q~8w&OGH-j@L+wzaFB>npJ z-QBB0@}eoXImwM3Hi?pUI%)Eg20kMbHh~#3U$Yw3k<~k>^VuSXP2V@D2P+Rk9{v>344MKQ;_#>;Z)IN*n;p98T=`d*? zojGn<@w%k_S&L)C?jm9&gsH-6EwK+CLz~ji_4HoV3La~ldbCR>?k+PSc*Y=(P9l22 z^?m_yMgjq_IQ@72fd-qhchsV-blffuhd4n2fR_n*gf|_DK`?N-UK51bT;gHLzj#)f zpxBjknpb+y_r}WPdE_de=m7m@+4ALRADPBJI4Drd{R=_=?uKThBd90=PWf}_O z$sJxhw1&%a#1ow)eut2z2_ok!h_Y_Is(L;pW_?R1D&fXT)X`)nWPe4#>N1NolHz_M z5oYL-v_{~rOotb`t{WW)T565!*woWb$icey@@rK6VtIC;pTpbSKTLdv70)nvexboM zI2$~iAAGD~LDp%*l{jA{a5`?g3_^uHrQI06*;X$Qvd4ee zA%L?D9K~uS841^3C?Tixmz4**DfrHgZ9MiXt+~qLHNpkz zrDA4-*&&WZJ+q7j+BGU>!}+G#l6OrP|&@=76iS|XAi~7&_v)up-VPQON4TrI}uEk!V$U{5NEl;&D1kgf$qa_L- zM{#Z+{CH{p0kbac#zc9<;~oKihZljsv;OT*qxphY2S2pIMXp8SNjls8a-H|J)lIUU zw`=T*w?nbfqP(!ixlJc&hnMXg0)$xj2n(gHb^dr7bnwtpbA))`ea-LbVRFF}E{7j6 zs)4rK88Bv)HI3oSxhOg=3k2M5M<4BP1z~P%evrl);uM|v1B`{4<+g}@~mt6_z?)(smQ*Dh|2_a|;RgxhhMm~d znppnq<^{^*NGR_W#U;z1%KTNq_muF=CMrT%n_T+!Eehip6l7ug;OJ=bL;il+@T@s4 zv?GG5*~E#n7eN&Prl$0=A$T{4tRt_EOaNrk!-zgstqEr`sbu7L64oX1#&+%vAa9)iqh$3u$JFzCUNEXod`Bp*I4KtH@_9aw%TOqr-}=IdCbfSm|K%sCWMizhjp*zO|H|YfN!hxzLl$ zNH=Q!CPXGJ_Gf~8NBO~nmMlX%V!mqf0Jc?=9xa3f&UpZ>cBV}h8l_%c;3l%rDGI=6 z!als*Y}@{m+<=w=nY>h0Nn3d!ikjSSN4K8Lq`}V^ULXJSj1=-4-WjVNA?Nh2`C2^g ztG@Q#Z9mW){=`J*l*l(;BY=$5P}`t}rftoJbwTx-ecj&&{cNFIjFoFdTAfW*Lsl~r zVh9@huDrPpo|qV3=8%!upFbQExbZ4Nuyt75Jl>x0Cms=INmfLKEYI%Hl8Rm}xj9uY>nOkSa2#eh8vMk(7|~| zQ+g3YuIX50Us3#XA_lvACxG1Mcu~bU7<;Lxrj{wg)ASEo9t(Aeo|{IOo%jBPqp|ZL zGtVA>ldcaH?Pw<`*o<0kXrRsGxYVCIm_DDFCmOIQm)UMt7}!%U(5gtJijKVYE8npb z#qOk`i&v#J_Q4IqiA4LvPUWr!>4^jz|8P2uZ>0$YeY^8j&0|I zSQlYE)YSW45$22a&=T8b8ugDqT!7PKgp_{1Y z4wm6d3Vf7yptXw7UVz2bYv*DRFfnCuRHlaYCH$ilg-nB2qCG2kt;k;Fn#h*z zQgT*e@ae47rQ)&I5Dqt`3Z(I@<-xCE~E&2+J#r- zpvUk-xg<{YKq9hp9mg>2^OMuAJ#R*Xcf@mMmBcLM&H3gHs<`0nl!wookh$%amXyp@ zwe2JDnL7o?kClnGJ+v)XinP=B40Tb}3luU7@u7NymrkC{Ri3{M$UVeqd>eTpNp#T+ zFxiK7v9K1gk%gIG8pa8fxE{V(t+l#&qU1-!Lb_xA^6yi7;l|{>2Vy4Lho9%P{Mp8S zenNxGiREPs{$yPL`U3e0h^dNrlGvdB>7rkskm0Tj9S!RrUgsCh4$gu>MoewMs=oI3 zjQE}P24e01*JE8gfY;xr`)(5P?=AALfU*#LSp9r|CF)+OfJg(_{+5cKZ2Y|;{?#0C zbPO62aWwo8Cgh@UGEAm8zaSQa3DL^8?8nc@Ds z2vAx!7U;uV{df~L@K%MfcRlf0vS+gU+T&-S~m{u{;vneo@e3R0QO3W2)+=M<6D9_6haM z)cW=DB{H~=^Pltb*P0-v1${VsP3G{*H2UjfAh_pD{1xNXVg31pd=?aD;O4bV`kzOQ zV2Xa1c<|mFP3VvsE>7}}*GhXjfz9cJPYjQL1*=|&%}XJL))7FygCSvkY~0OcF1-+ME1M?xtNVX<7F_^nR=-n7O(gQg z#i`Zqx4Tx+w24kle3xgtaKnTmEk*Hhe^@h)#9{PvC&z>Z$}YQTld7(}p-pwXy4!Yh z+>dR{i_ULLu;@qnojBt6o(|XFv+ETy`rhbmSWp@t0gYj8{&0`G{~j(JpWhRwu=~dwK$(Mh5m! zo6FP9hPb#FpW4>SM|2jZ9ATEB9Bgw>?v{Fzc^o|-t10PZ?Fg^ywo#%hU@s)OeD)?>$wonMxJCQ3{?$mVX}!HTKaH~=(H@yv z(AKQ5P!I8-`W8gCNq}I+CgoL|av$l>97;w4#wgoMs8tvbSP@IwO`ntgOz7Rx&)4sc z)`2Udx8K?0XSW>yBeCOWBD$aI&&;}*;Haac@kA^hY=<%tqP4z#H66*HS`Qz==qle0*rSPqrZC_MP)<+Sr6W@*-2|DiaQ zn%d@0 zfdYy4-1`1`p^FpN8TUpm`XNQ9(U)_?Y2I_xo3dz6hW5BoZ{ z(*u(RAGzsz92@G`vLTKXhn=ohCWWvvKm&OOxovrKpKW~! za8uz5A?PUW5ip;-ai3n{6Bo+xLY&o$K<;@0O`oE^ zU6V}m1xj`R?z8bS(*n(8ECB<_U+rqRRRhe?!{(%}<2;8y+dZmuPrMc9TP^+eM6VqNHUtUsDt%D@-c`z&UnI7_i(oLS7fCF{7?YNc>iJhwd0N8i>jG3Y zj&3G-|6@~(eUC7K7>njcNXoeAu3;(bN26Lkb{#u%Ae|ms*5evu7`ZLk)|P|IwygQg z8^ta0VQ?hO`3!Cjn({@z%usO^$N~XF1N*X~E8Azx!eh#aD@eL4V^1Aud>ix z(Z_y0som_ZDBbhRFGnYDzX+JK78TP|#RjjwdHqrt;2_7EW=aG4RUDGX$4hBJtp4l6CS1h_01O;GmdBoT91<>GQmsqN<{DB>++5T|!_OSTLcVS|x z;wk94Gy$fTb}H9)9wei~zzd^X?xh@jI9%%cB4BiJyjZCk6O%QRBLW>|7qZJ68>dWm zpAF(ah<>noabMhKC+*|q#VKg_17n!dbM2GGJA1uPZdE^ft(_&CU`9gw3Dty>1%}}x zB0wyB1A#u44P7p>kLnDxx#ILFK@gebNpvQ1aFdt{GW>i;_xyv4AQIkThT<}qC%}#M z+#35)%0jWPTVAfkhqe(>{lm||plo#T!npIn%-oOfdzPj$E}^hn zS;R~Ail=>R12#ZS5a`!OKo!6_8g<<^&SH&yhupB?qGj$GEGwCNN>^_lF*vT5oTSw<D$v z0*%6lDx4my$9>?Cq=snRRm!e+S0pjDCt69`;f$nGCtS%J-<6qjrBte4qC#8DE|zzV z(z20EqB0KMX$ht&IZ(4KA9CvlUYunm+M5=gc2r1oi*X=Q#@>A<@!NkkW&|Xt5CFYU zA^!V4GKxqhJ$5;iVe7q}6A>dKL+f=aS~l})*5j|qPHxJ_ht18&Y_qP8w0qE*9%Dcy z26l1_*%WOLKgfs7{0NP_g*n8@&r;CkbFw%-a(g7*CRHtt6m0EkVFs~+E_+LAT02H3 zd&|Yyz@`ts@$-K8v>tKVX%>?#0}nCYqI^ijsL=~?gJUyxPP3Jjx`-U-Dr^Pd9$*&) z7edmU^9{kvGg}?i$?4kpsVPIzZFzL$lg&w&O3!OC}H_82|q5 zE$Ict*xK$al55$s*Zd`8?|%*?;r-^vCIZ{@QaB+pm%j#E$`;(VQ|OGD2EcN*WN0~!z9zAKSS>g-^)81d!SiS zmU#N)oym&qI90V8C%jLh71^UIrK6a|JY_?>N3`5*3}MX;ryDgnQhw<<4&!s07=dLE zuE%y}(F=r5+|asfH-wI2s!0NZh;yavNtQMkLu`Yorr3P_M=f*UPAkI>u~VUnY?u_ndWOdj zU)oNwyOv7&w+w%{Z`K`VE({2)$=eOP#^T5F3G5FlnvOvgOoJ;Fm>-|9Exv8@&2#mV z8N1TW)Ahs*^YMynQH7=J10oAR9*}F5 z+Yo+HeZcmuJiKl9)eYv`<_c8-}i=C(m-;xt$dL z@)c=jMnNv)`FxbFEc!Gql10Jf+iMMO#LZv&wKTR$G7L0jE=D{^o^287EeE4fyTV~t~vyM-$ z+J@kcnI_nqarH2mx9~pptt-B`_cu7>RxDw>#U0yf)*nt2`<}#QDPi`QMHnVew(;f7aCuU)@KkzQlC4{g~w4FeY$)r5M=OL6Q1R04Rv>m6|d#aNXJ=^KCZm%yprs5$5 za{WLv*DhJf&HH5H%K8~tt%Xf($m~6a#~AcMS|0!K;IomDj*zq8Jk9>WanI?y_X6;E zSYxSZMP>JJgJI{}o9j>L~ceoGAZTuXoWF^)^tRNTf7n=^Q9xl^#i zQ}Aps`>Dge6Yw0_6lAK0Q>OPF+pz*N=ive zZ*&WW^*?w#8X7RbzNpFLWUx>6=C;7QiJ1)&6zGwBW_h|$fshZl2NZw&SUR&IjZ;t| z!Ur02+$iDK8iiTShndm@_vLV7-QO98ic_0We+teoX=i|MD$Z?~eVqL1;TQCtB|$UD zn#=Q{>A2)M@sh6Hvb^gC=X|YFPbj6sUf&WJP5eQg;djRBwm+m=oVr6~-Oast`@CK_ zMAZZloB2t?5k)Ll7Rzw ztc(B&f(r4lD&Xj+%C=OP>bZ>>M#TU$PO&^vl(Y=+Vu0Now+S;aZP$(_K%9zbnS(?tOSSH z<>3O&zQiSoI^YIQwtjV4>P^YP`)Gio(Kga<1}NZANioydU~@w?qv;tB+ooa|iE*_Z>DS$F#sc4W}fXF-TO!hHu@urH()) zN6CyDrHq*h_+?hG z>5Ycrua$+Y);~m6&*U1^V^+K+c1HviJkoz>Cz~_lNSz8OzNoJu2~N zWIj6YUwS^FgP9Eli?ya-${7gV5BX6Mj5i`?f)VtWYr%E{>4ambvP5L}-l4mnoR`s~ zvjCW6!N!W5Yg)&HllOLHq4T+#jIh;$N*yzMQx$~v=*bRd(XSvRVRWg$m@8`VWDgqj zV7NhpE$>H3C;Nz&n5H0u&|Xju6Sm*+v+Oqx{n!8SGl*#Kfk=dy2E;f>z2`_B41;bA16*Xm;np_&61EsLt*X8a;y9EcQpqJDpFidgZ`@z$XVL1mSin1v!#U zOh_g&-JA)w*}qC-hykTZk&=uL&mqrcuL5Jyt3UOqA4MQsEhJL3AW;8 zG?_#`?)@3bEU%oH_iz5;aFWYEN9H0)Zo2+vL5c7u3l7ISMNiuvFYU7at3*gL5(sBx zHwvXlMuT13WTA z^kw>2lK=(Bgl!VzBRg1m3y)To?a4c=0xS0Lckn*+cEODnaChN1Pv-i=k3t@Sf+%@P zjk;LFXrK(=eA)intRXyLSBGvpuYyizrEzhCnQha1F!iDwbSQukjT-w8CD592?*ir{Sl=keFKGaUb0o*dxu#s3>2 zArH2Mj}m|V9@c-#`wphx7TteZ38n)SjCn;%)bxML;{y|q6y-k?(hE`n3Tr($>gWAW zd0rqc(wKIYL;Pz(w{n2O#z=Kt{u^Z30{!WP|DVnTfhvFs$R}o{H~(8+A_%hlZ*cxM zIR9HY|64i#Gi(2U>^Wtf4u?zH8MsHsi%DH@E-6zcM$hAPopYet*7$TJS2<70l6C!G zJGT5j@<-(C6N>yqtTXVtg|~NKJOpvv9CecvpH2+M{AV1a&#sk%$W=6;IHo)K4<|Fh z$w>=8{8=~vi7CEsmrq=_2ajfdd5ifNgbSaTSpl`6a%n52|7ZiweX;a~HjTM__0mpU zX{MRe^QIdt1_l0mR|GiZui&u4=ZCP}*c{&l5M|VZ z(*y$rnw;=3^ujzv>Na}q(GP#Iurs)S3Xg$1V@7jW#ySJ^-3gdL{mb<4rdT`EuA4DH zestmLA$<%XkJ0k5H6w*qpj+U;*fz?m#wVE)*Md86UdBdlY7?e&P7e!$>$lhwEQD4_@I6L;wy zV_(-^oUDR73c`btkpJmy<2B+Y_lsxyO=R)+x7@S+u<1=BnU(003wM2ihh>4Mh(Mi;X&nEG1&?6cYZv!pWm|%deZHPR&{ClYF zb0NVijz;b~fa6w(^K#8{?zMu*nygqiLPEU#u=)b{>>qPpQ?}*OMjaImc$;x?{*$FN z0h#1&G`GDzaifLKm<#W&=1!Pnl>|iVYYV!?Kh+x%vfnQ)iW%!N%8GK)v^2}AUkUA@hryMYM{Idza9z(sqO4&HV7Wa>0#t_Ke3Cu02+Wecm#=*mkvc3 zF0=s?-DDWJ#N%E4Toc??OCW5&dEt#p`Y_4;ASOmXkbBL;ikn@g@TlF` zdnK5uw3F2+49bC?*K?!ecm_Cevk~`gB1bE&U}^#}Bj%#ok@!g-Cpr7AMdu4EVHNUA zN__T z%(0BfQZ!q;J%Wpra?2Nodqtk-jECDTWKl{zKow%s_w-dYSj~?e*NR&ffTajk6^PAW z0ItAn(yT|r#vMz+R#DP*+rC^&9d)AOKC!#UJC)Ne=FB^9lKg3Q1-gea@>cHh*kv=C z74;olPvRUWB9oS`sXiDtclQ_~PKd?sAS6kN)&z1sJ?AElGP5BDN5Fb5P>KrWYdI;x z1Rhk+g`y-CRjh${yEB&$pz&PAgFlobbkA)sgK}VKzp*l=G-TK1a3g^5 z{`r@=y$%d`rPl7dd(UM6v1@3rT)ilgM1ECiq+QSXh^}^V6@s@$+#DcQcJLE(c zOWKBgQZJZo^cHu?><6E6Lk+auO;yi>@EO>|KHQ$!tu!C$R!(qf6UAEp2;_F`w%D>}Qixj~oTlid# ze_!bZ;7JcG9o+k^T715k+vSSNUUJYFg2?>gapkyKo&oD2b~ZcQ&lMB>Tf-qmB`<;Q zCFix|Pe#w;ZDP->^4zi#@hb*FbsvX!uljeJjt9nFi{N-kxV!^eQ#q=4L-z|jUemFO zJ7>2A6g!TUG#c}qtRtI1Ia#m?rb=bzkLRa`xtE=fdyZIBOzUN44G(lAx&<%Y9^0=d z#W4-oPVsVsxQhOv^n>znz>m3hg2`%-35_$VR1v|Gi<%N4L|FiqWBZU<9*l9-_g4)B$04_)Pn$Y7&IAWF;12&6!uzgf#`&3U~Ck zNA-io^cgMRE(ro!r^?=7d1ItkNbLcBw`>Wg_ZUw?DjlOzDZ_Otbgp-L1przRP$HH=s~hH0p{a$dkLY zVk-WME3j$ZLb5mzp_(~cSS!;tV6ZdSR!p$8^j;)s9qcr%ZJn?o9B8Iu*NXD9^Zk*- zW6H~G^;kr7;z>#)aOyfK&A}a%xq)-Rz`Wb}IH-r{m}kp0fVamn0(WaRDv^vjeRvwXb6hS zvns%UbIA^cHUx%Dv^Qe2CmG?hpWR?16CDnMC9$ojUO`w75E(4vJP`QOA?N+qOp+ zKdtWia!YW{pVay*P7dw$s+%DZUi(KI8)*bOL}eo;g(Qv2`Kf4CSX}4QMvQF?OUb0YvyjWec>sv8|7iL7+uN%UYBSaM!wHig<1! z_;jjCO)hqN!grz`2Fa}2yLhMLwn(n~Gcm6n1huMq`^H1YD>Bc;{V**_UZxfhr@WCu zI47hO#DC8E{HjB*#D*A%mcKNUQ~%BM(uJ-K-8NdqOig@&+xOl{Osi{64YW=y3pd(_ zNM}@pG{^D;C@bCSTf?W8)P!ikMZBCPOcHp|9}zG~E*#bPa0GxTlAyARNcY_L`YyPt zBUCinfc8S5kO7bTX}GPdXVJ%lQzE zDMdm%QMDv^qP_IKfh9E!asG>t2SutP!Us30B~HqDv9g4(hMWmJ_B+o}gy+R6IEs-b zud-apui&g$^5<4z#Zbm?8r~10-Y_29gmP}y?h+Q99Kt*~`}$r9$juqT-Wf_25j>{_ z!nc4M3)){KlV~RUa9$PjH8UZmp8r_zrA^#AW~o2Y<3+c}0a35~8{>HunBkNPZ@K7I zlTsCL%4Bx}0K-0cUvDH>Iyy_C#q{iGn}(;qj}Hhb#N6`(BxXwX`lVY+-nQQgM|i|` zwVJ=#$DzTmKzyP(U|so%IxxSpH3ezTh=l(;QBc75%{628}$B(HwwZNTpiQYFncAsbs zpFl(TgjpshNDB21SBCPkm4%`?>^K}uFzx}br=))xC)1$Pssx$X0 zvkjV$0*}y^l)PhQW;2e0U(J9heL1xuX0q?xa#qrYc|SS=J3TX@*2~YM;NFETLBwOk z-h=D&Agp^F+V-eue}7rga`cV!AgM7c3(Hxzu8_{P8zO9bTy5KWcKILx2?8z_2c!D8 zJjt%7msvMxH`7KSJZ~^WR9M1)_ffX#w`qmX{A?2rl4h-|SAv*|g9>RR}PCKKviYxR$aLq9VCY+D-;)(|WC)MSVlt#S_Qn<%a z$qJQpUw%fRF^x@H%L+#Uj+<$xfu`)vq=!X}S;yCD$^N{0{}E-KX#fEn$*32dQ^Y-D zN;)j2ByCa`f7oNvy8Fn?MpyxL=t#3)Al})g=jG~)fI#G%&PQ+PdP|MvJY7Ky{I)t@ zdK-x3n1it2y(B8zgI@Y))hFrqNxwY6@adfTLa-gIOC6|KRYG?0mao$jsdCary3<8O zDr?IC;oJxwnbl=ls|@;=j0rHI$xW!)cjB_zcIdO>f5b84Z39oCWy3kD#)8$^1(VKs*(XYL>`D|2^8&6*CuzdskFr;VLr%0aUv&can|u>H zW(%KB>xI<=)#kbN!eXolP)AG`eyUvsLCv|@xw02Pn(4f-2h_T(tA#a%Qz|K)JOmWZ zevr}blfAa%hc5##s;jxw$y=9G!{+>)G6EF2IP_wSL|NUAUh>MOJQ1-Z>#O&q9Vf(P zFbzF-IR5uC|Bja&DxT#x0g@nr-#AozIPF?Nq#-&)dJIIgBPI$owg*6b*&I7h8)QBK z#-$772852d(xbqAJ1gavgHOd{vXVT;_cmd$)|wiXEZ|+AjP`lftd;mbbx*<9XvW&D zdKq0gn$8k$@~PyL)KiKqZyy5?&@}=gERGVSV3URu-pyM5({%LgKq6rDh}^CBcLB`y zR~MxNCgG8xD63V8+(4GxVUGfYkbnnANI!!k7YbZpF$;f@9cO@6?pl_Gz=gk-4FGmO zw=4NdsA=*1lpTOT*ExS7>nW6$pN+)=Pi(-FDNuIr zGjnFv80*SowCP>!5MTVZm{IfI#Q#-K)`6IKOLcm!d3tN>>LPIIlA>51P}-zN`QsB8 z8)t0$svOz&ZTp%k&$SQH?mw9gcJr94X%tk|pX*NxBX@JVon2-<4y4)caoQ7MBdxEf zDuL^M>=r@|8O_j2pWHI-TfR}u_JUkq73n0Q8m3ofpHVoZB+9+tv(J8fsj6(}CH9w1 z&;8)Fu)i^Nx4RgF<*8UT4%*;5ulTRp4r7K6lcJI{}z94npFad4uOy z7;kP>C-dnLIDLaB8Wk3%26{yi+i(>p+3CVb%I>=To20NgsVe^wc0K}#>r^?Vj)XyK zk?lf)NrqEvFvUj2lCmW!w@teJt7r$mSAj%>2WZE4f?2%M0TNhwmro?@dCf&<^e|Yu zAEU9np#PgBA!UgOPKSK_t!wJf;K~^za0&K7+m0g6i(5xqUv9Z;6vF~~ME*o!fI%dX zNcIUQHjC;kXD1G6rBJcitbl$O#Lh-VJRh9n);;YmzpMj#s9`ou0T@Z^Z zSy;}MYrzv^9J9|Uq--}ae{(`*>|o%WH}*sB7!X0~Qud$WQ*4LQ5%b9&oP(;f_R-VsY41@8SN{E_!X#-K+8;xpeg z(YULo^jGKw6CUro2FZ}RitT=)A_z6e$pXI;{@<@aybSb)iMnq<7P11cf1 z1t3S^067!Y>R>zSqV#9d9aMD_4F;*tH3Ac}0c7381EZ~CnNANAH?gXflJY#^J?p^~7QYJ_b*w4+=@+oE2 zfrmj8cpW)BFOIWBf98t-E~Zo~I{O#5$SV9DD010B20N3q)kfW+jx8{bk>&LuJ%x9; z0z|f_Z{FqT1eq1zJnq89s>ORCJEz0q({iR!5C0(>u-x`c1BXn_r7 zi3Y$WQ)em|H#vs@VWknyHl4BpN%}~h9Kgt_Kr-=UP1Rk7`?7)`rD_Q5`7c8F=P8WDGjb1n3uFOw= z@e3C3O=?J55W+1BI3@i(i22#&k#$2 z@-4DlprBFOfV&DHg&4qFa}TRL2xTfCR2Yuk0)6!uAg{oT4vZIIhg$&nkOTbBm8qbD zbLO9T4fmWu?d55bd!V#rZfU3;aJ6d2TMa4G&_8=BlE{++fq?@2=`lQu4`J< zDj2!N(~m%T6*nNu+XWlp_}D9aV~E52_547`J^(qp`y-3Z)*#*JEanX#ZuA>o7qPrzM-D<_>uUZv#*wbyC6 z_0P0AZz{xsX`)cTZJ$yVR26+%$-StTPoCUg=H{Gy`PwsH=G4%v;VZ865f@RgC&hyb zlYLL%IP7{@7uN1b@jkjmaJ^6lIsz~k<4K8=1fKg{Nud&ukD%^}a8geU%zRhwI(h{8 zOeMHxnDB0%lnIL^$TmghqWp-jF=r`{G~@r|R&F}FRSwB5-Fv_ndDwphwwLb@?Ftno z@doohM5zc1?{-V+X+^_LTfAGS5;sHaiDoSW?4TasvZ= zkjghBSYeYsQs;W;A}F_YU7Bo_(bX_FEd^coq;&h+-8jIAjS*tfw%R{6;sY+lFftTtRXBv>60To_ zi$TYLq3)X85hbbXEx(+PvgxhVlluPHF?mQhKNp(fc$AaIXMQ0Pmm*M z_GTx@o*T2o-l^LI*`K7YziFem6ckRVk-fPX-(Kyb9h%cl@`3E^RxN1kx zDq!HdIh5(eYnm67D|FtTp9$H181VG58kyH8CE)EM|aGb7Mv@w)b5Aa-brRZj(1iFsCW@_DO zHP{x|4*|n4m+JFDYYxBY8*AH5V5iGH3vUlDR2K%qU6d&3>7fwJw(kHVspVV~S0^nj z#Re$OtYl##rnnw>xrMD{ug^DbtY$;i=CRR~ZXkB8PHpno+*yeWpoWN`n7SbKC%#)S z&oBw{FBi}%*ci+SgGZdSahR3Fuv`KKPE2eS?L?poWlhYs<$B_`6Rp1yi)lDISJ^A9 zm_hb-{J2xC_uq_c-E`&N_SHFZHY^*F6(F2zl%pIFDDdbw-Hgae)9_*B3>Gt}asSeK_vWL$A@nNY(=v=uUKzSP8LR|>fEc(+T0mD5 zV}IRtNLe+;1o1Q1qIcW3WrKJV%ES-M%K{dqDg%Z-YIj3+>%hTFC;5`P7(8Ouc?A`I z?wa(aC%9cEZq+721Ekj3CzOU_o#?5oKCm|$ZJQI7R@3O{Jl>Ld7e47=qm3t5Lc`it z7A2n=&dmJ;(JzH0r^u9iAqthLGr%P^^;;Lw6<;)IItOfVjA4}auzq-srvlq{6neEH z!cz17o@&++Rg&>L-{Uv5Y}%;nt2#I5Z)f4dAxZI=Mdy!*k}u$^}Pq`!tH@t)qz zx)CYg6h4^eW!*mQL*VW`2HC9>xqBspk*FlDiex!^+o5S`@`%Z41m(@bI)3Gso?xG- zoFaQ{^npKZte7rpr){X^SNd=)s`txR#Om!suBY1WD`C|xOKM{zby;Ah`Pu3A#rx_pV6aT>QHglIU&d!mXlQdr!W{tj{4o-{Fk81TMFf}H*vZU!O zah4BS2K3m{BTzP+Aj@mLyOis$I`!jXA}orRGD8G)bi00Hr(&fG&Ui+UbTgALHZ+_9}3;;xX!nzkdWw+4GwZ)i5H_- z!AS4vfTGa5b%mK|9eQ}lvI#c$eKULRm4SoiLhYIpj%rV1yP?Wn5S~u*&p%li>i+n$ z%q4N)FvL}M&g7v?_l>-uJ`p~m2X05K40oYl^_}l$xHFx9S}pX?sJZKYU-Sl>z$OZj z*7wje_G9R6b-}qrw;jnO((9DcLb@XWjozlgc?(g-L2K|G!@@pid=1o!@~-)jZ8-s& z_C=P`_JdG5o?LM(ganiQ3y}7B(0!^^Td3jgNJ;xRh0|00<|Od)n8ym!H$gz;Vj3m& zd~N|S7OX(fXa98HhLy-HAJkq$=oWVA)@^cr$ARNt@;M0NUIP1;_bEgL034NWE4g$QL_YDQ;`Zk}4Q&Idv-rfpmtvE#!E?GAFQyMCP zCzL2&eU2h?sBJH-ZziCa*`G)22R;gsyLSprJ2cw4n;U+?SX3cn#+Zo5y?G;sPUldg zdgL%>=Uo;Vz4maJs+P6SD-}44zYC!1Xhlx^y?Y%oy0J`v#Sw!&?`CB+^$9+sQ6f1J z=vDD$LvQ%}5m)BEh*mjXBQ2yn=$Y6!*@eF52s8C%NaUCb9qVgdV5h&(kwvOk;W(~2@8oizUjXE2o~3O1UQ3va{4ei43S6FgEBrFkaHwaNLfIHs_rMnVY7R?>rnjv_Qz+-@epD-%=Fl038ld%acSXqWhsqYnN5n?teW@_M$?j!AhfqkYBIv+ z4m120F}%%ajx5d z$wF{VKvrka&{JpGy?dNE4%TgXXCz#f&eTC8AfeT?Kw{8?-ePY?QKo5Iij`R<@GoVk z*M02zqWul@hWlEH3Wf89aw>>BVfSxT`7?iN)%fg_w-vj!a{2`8Xl14Mp@psGY3})v zUIzLxNb{a=@a%72ZH2h!nZ7Dic0cA1#eJfZc9I!_OIhX+94Z+8(cR@mnGc&xpih|d zYCDw*JtubdL*OkiSU_U+ZJRtThCH3iF-gV;BP6pUBEu5(?B80VL7Q3vNzCoKYw@^e z7-?n>7trdR9f2T!$J7g^h>}qPQ=&>HZPm>52CO?}F-mR1WtR=u4Pl`8RKPwZ#5wkCyzK+E{E=iNZ-4IyX z_mq=eQtjLPh{LONUo8#(ncY0Y+EC5Mq35Cdl2y8o*7R9f4*y#FY)Gt6Zo!K7mFY&x z1?qb0FHX$UnvtDL9btH;mXTys(0oU5@=Pg6jdC@IBqZ;GGXT`A&Q0ernsedIp6D?CEq-LaJafNwe^ z1KcO}ARV!BQdx!!l0aFMY{wop=Ok7`zasIxAs;HYdYP}nr7R?5vm&mmO$2sCWIEum zA8pUO^;~Q-`EARAnx?8K&*RFnrbq;r7@~Y$;jO3TNv6xAt=hTH|DW2vGOEgL4R?Vc zCDJ0GfKpPTQWC;KP-zh9RA~v3Mp^*{rAv`Ux{;Jdk?xcbK}w`k`o5pwKKqO_#{G4# zKOBy+_gd?VIp-Tsyar%Uc!IZxb6!SWfhkZ;PP;C@J3~wkKQ`)Y=;=QxZCfC^`-1N8 zsw@KkkNpFgYFy6d`LV1l^@zO2KX8EL3rn=sGhcb`WhX!!eujjj#@}mD^r?{~3cgn# zy#JgK1SJ~!bF)>nH=e4SVY+e&x=ZcQKGBj1ts)E=^9r7Ezj0Azy^`FLg!7J`5}gDi zR@56{u`tu3Xdgdh@7i#65{w-ukeW6g-_E}rSnd<5SxGz<)9=e2+EZy$H`5(5fLL>Q z>27_ZYyhm!WL-@QZ^{ z3J-N!>14)l>;7pyHPU)6)7-4gTyoCO*)S#D13*>1X59HB^8_SZuCjQuk$|G=3x*JDWF!wS*Xgv^Mo;6=E^{B@> zP^q^_{Ygb1e`DQxdE&kH;h+x}<9Qlx=)siTnbi3dDZaX$pid=NK_K@gnLdc`M%a!E zuGfM>CX03%Q8q>0OnKfApFtzBgPBGnhN8~6d2QW{yxWub2fTL7l~Gwm9JnO??{k)WsURdmCzc!J#)AGUl{MwMac39@?;6h#C2U{Vyv&-*1<=>UyvLr09sn`UCtI$J*qkX|0D+~y29G3zp1G*5Iv-U8!%c_w?C!b+yvvV&2Pf=l{^Vj5#QFWz zIBuf}ht9>yp9SxCt1r4yMXG*~+3!px;V({AW7Z!`pQE4@O7Xl#7yX`En>+YRwl z8gkO}&estDENj7a95aAa+BA;*lDq+v`n>wh$Ut_ij9^%8kIogHDiUz{nywW!@+d@g zE>71G6sL=Rl|h9ix*gUb(H_!x^f-d@h`#1cE=K(r*q|7JitqBPO6Fg_O#N02ABq*b zg$J9{{e`3zouIj3wFY>%cYT(=SPa2AH{3=oOGi3(#aHj7>2ia15nCA3)R;5B!pWJ3 z^xqhqi+Dxx{aDYW=z(lE*Dzw2dQGV0M&` zS3r(}Mv$BCw}&Kxu?h|A0_j{Q)B=l*1rh#|)Q6{tI{zztOg|uOiu~?DOmx_)T-vqq zEwQ1k$jnH%`52T4Musa^X2pJjy{gU=-Cr^bs~-=dU+m@<~}Na>T!G#a%# z`IJh(U7SL~KfF~Wcj%U4apEe8l9;P!eEhLn*34U{3KgO>|F)^3Y1hixtJ@yBhe@F) zfhR_30}$3=1pHQv@D*!;NO;UaymV+A1efJ?Ppf zv(08vagvA@V(O0o%`ghDEdEJZ$tOg@%jnnwiBxtkI-2?&1?nT7Q4NXYVkKAc?DuB! z^8Qp24LWGj#B2`gmRzxho*vbH(~N%=VIgt+(%FUGfyo=>l^qD#@Jfd2_~ZoIwaUE? zr#a!nTJrnsH$y||*u<_xKV5Si%ER`hq~u3J$Gmv`@BStPI5SY=mS2|8q>xxDD?M@J zDNjjpBc^WB=|uUj)s10aZrOOFpOd_lEwbtI>OEW>S<1aUFneo!nCh+1wqVi^;Ctl@ zh;UL}2kEBEx1xSc_P6FsDk9gKwzj*6O5pgINu>tm4d|pT1P3OKAUVn!)1d`k zH6FvS_QHnG6G;D3PwjUrxS=pd1kyld?P(QD%!=#!0my!`=ZU_jVDJAm>M7vv=q<6f71YaDzD<3ow5=27f%Sb8C8f^E64( zBN9PX4xYsa8pqhPCnIvwvv-2HM$(A&XVtllrlr6={9BWuuPGILsr{HvlR3p!x zs_oCFOb(d==vWxo&he}3GGC-uyoIGHc|Lx#*|t%jI_RE1Qt0}96uWn!^Ey_d%2T0) zh)>k##qRZJtZ$fxyHGT~mOFerH+WNSmN%G_adnw}X@BTlDrL3q7q(DkZF07_BFFt6 zW?I4GyIL0t2lCHWXKlnCDC1xVg%mz68N{QtON+e- zPy#;2HWgT1AGVvYqX8z%T0&VoCvc@rCeMCzDaMwbPX1}?uSpC6*~_T~7qXKkrd-a? zj%7c@4=-H(EEcbXYjU+mpt(P~u8jbo@Mheu(|_Wpi9Pf`dCD)Oemc&TZZiNuj=03a zvT%Bt{(sON6c#n2I{L!gzV^SMkQ|uRp~*aF^8X9$_#@5wf@pE`ebM4-@kV;s13I>y$t)o5Ycp;9n5x(LYVhk^cL;>mC0qi{c>xHlQ3< zY{S1vCPz@-|A*T$`6|&dl~0_8i#g`Dn@t7%j%@&ZS`nOQsO0hEL$<^xJpU!Lhj;uI z?)Vb7VusmSOoRbM1(MFJRfL*r`?H(rj9O3IAJ72^5o@sVq5viDZ~^eb=~je}y~YP< zcfYHBjx&%(>uk|N(@If7kHE0nwsAk_R&ztKdK3{>K;(;1mL*N|%aD@5K9jlIe+ye9kY z&lf$Rf!K#MH>cs}t|^FYzX!Fmc<@MlXe?k`6?_PQpJFr9UQfWR6bM^gI9amXeu1=% z`cM_0z5x-~TLzt!yujnMYM|#ZTx{X`=D##~1rqUv$#onX(N)TXoPs1Uf z0KehZvM5q9->B@Bem^4Fe2308>^0j{cZj7?zz4b^svKfG)HT10X+P1i`qO6Jg|5=d;`8FJu_51 zCP4^@mHo9swR_BI=KLDaRfjDw&9d@+I#_xfHHgyAOWoDHE{1Tp+JH2wzKn>Exe*d~ z8Qify^Pv1UC`#5KdVL%Y$$6?9Whl-n?Ex=vbX7{k5x@+Y+q^rKj=t|gt<1Hx1#-r( z=KuC4>BSn;Qeq8aV482=8)ksnue}h zZKxk$8{F3QYhJp|zE=@PrR;0zzpXl?bP;Mb11;(K;IpfoDZQOG>nwVN z))}c}c(xOs%yWV(^vdy!aa7*`FZnSIdzZgpu2cFDQ3mjVf5!*lVGWx?q-II6^+vNrzQF@l1*k_iv zg7rT=n+l8PlW3-h6OH27T?XY%uqNLSb?`8>rCda|9^ZEKFBYDReL9!kyLwqkpVXrA zl6uDj|zrJ7Z&?eaUF%1;Q2ccV-R_m8(hqzH91;KWv#@c)!~(^3T&JK7$tP z?0~7Qc1#Xw_BEtz=Q}6YE$b+&ZLx=m0PYfW(Zv-9Nj)b)ox+V>Wtv9;(SDV7t*% z@>s7tQnWQGd}jIcvxk|z4+OlY46Fct%b)!?Zx<(Mm*Fdt;d-M$8<+FkO)`f#R$PQq z%e>Q2^CfM6VxG~0SG$U0{WT)7G5nymEr#2+ZWwdH-dkQJ2)kDBSJsbgj)&p%xmbV7@Dua~Sd>CXIUX0Yq?E8`v~nc=5aR3bCbtqzJ=59mykV zi3X4!=n0bET!3}f`Nu+)d4fXL<- zBS}9-AjniJRUo>0K0fKxRBEj+^Up`mqZuCQRYtP_fq#!rgUpsy(01bHB`Em)dA9~s zE*=s<=OXb&jM(Y3MZEL{H}g!vEX%iYPIO2&i07v$+^w8}*!WuG&j}EU=!bLkq{ty) zJRh_pDix|h>5<)Yhi*lIe#9DXQ|g8D-`5Rvk1P`C4)@)Rbbx?_O13UHvYYyUGi;6@@;~fmnzEdo*RqOfduoxVt-nt zQkIwM#jZ0LW<`0mCuz>ce2Kp*fLIGQ6R|SXU6o=J-8=E0#{=0NJv&w+kv{uxEGx!R zJ|<9as5ztd^d^%dF_x4i%;2*WUH|8UWaewVj=BUVJCG+?FP*ZrVM&Ssf$0rFH41zFpGKev+?MC(p zvp}wsbAzvav^+Dwjo?b%O~6Fb(d)nF!%XR18o{a96R(sWtm#gd*ThMv(b6o6X) zq5LV=I#JKOHb4Af+hL0mr)010i+wp=Zif#OcY06yl4G@Mis84HdviE7Tzr~bnN}nNbR}oN@FRyDUGsI|5iICHN0SWT5nL}#7%d$S2aL5 zB^$n%+kXSd6`NdNyWf8h*v1}&TqgAEql$ryiuRy^xL76}y9Du@teAyelyh0o{? zh?sC35gXJu^*%+ZI%lvgSDhg-c?a$5TzLyQRoa~U>}?B%kB5u1H(uwuZVl}o0t0Ah zf1^2R835mucnX1jMAnzBgvLL?l5=@9^YuI7Uv9aTo88H262k;8AJ@9Ml6}{(<0S;B zIG7VY+}>cdbv%3i>sfZTj>BY0+t-O5wAK=M-}8x0BrsRyCK!biKGhrkq~25*qy9&< zLU;$gM$z_*XZ*9H5=BKXju4-9HaY^qn{Ehe_d8zr_O19L6xcyXBmUh z{E+l=!$*RoPe4H-;%?m{;ds-tL%GF+Cm!6AK9nVAS*eovrTyerYPkcX7WX1^AYmO{B;VO_d9zsdy~MS%XIum+4;Jc@Bm>fE3f8@ATW?xtO}!aOHW zTO^%*tzM$xTQ*b*h~01D*GY0k=U-F5^DbZPp|y~*3W;WX;f67SUlF}*x>R8RJCGN^ zew%uI=L6^US(1B>Uh?ZMo)W69uzUJ9Q@U4SX6!b8$QA z*-CRII~@ZrNlv4Nc7EDL{r@RS!)AgHW{gHMrN&X_IlUn!Z*e) zk-42JnQIApUJt=Tu0|9r2#wZfyNU%_H4y!vUJ>(Tf)aya>x2T)o`C6&L zrH6heBjebyn&s%%L+^&#$(F^9DsOo2pV}Q$UKlSs?*C+w;*Nb7-&Nc9XT_-doTsQ- z+nJlf6pv?DdZ=41x}cTo@8$eGT$D#^pi(umu5Xv-aYdI``%)N-95fOxGl-((pKi<< zY8LN_K7R|vhEqtWk=kXazAmo^2>)}yi+QPOuYNJqLQ?*|xC}~e#IE^SD3B*PzT9$N z`h@2+csm?V;bCVr-XGM#@Ph4-!*VT-b0+5bYRYD4pU{5W$t!-?_~?@_`IV*~@~4~g zKSSEiJB*B%T-k1P$DGLI6A0$MIYXYuY=*Dq12n1-^zT^0v1-EY)|4EfdE>#u6idpE zKYl?NxKMADrqNZo_jSAh5NI&Rw2#XyXjb6|S*P2gmEmUCJ^7W;!YECKwFO1+_)BiW ziz9P2Z!Y+rGTT_)Nfj+KAfb+rd;a{{sLVL13jkK==fMaxlpjHC_VV9=YE;l(Z}?pv zVZk>k`Me^cF@ZzJ)g~7I1IAU~AmswD15_AGx?<33om_Od8&tu=3nchL!E~tX(id5U zeHSX!XvDeSi=P(w9QCtY(6(>z6gAJK*M6+>+I5I3w@Q|fBZoXuK7?+0!!-ChEw8~va`%LZw(N71)`MK+OKuv~q-5G{ex9!r+ zKXl0Xg$k)_!a;!V|9xc+aRS1xQu5@W^Tyg#Y?4~ z+cga=lW50)Ce&jZU0@(Dthg7P?R>lsZ>e}!*$0{PnYa0xWT~i5+9`Q{LkUw=xl~PKNAc6Eg)X`U^ru@#)AbF(` zfd3^87yUv5HML`}t{scy&|koKx-5SKo| zAC4}RRU871aVsLU%tqO|L(hrk{8iS&6LaNrPf{5!>EH)L5357u&Q?fX<(xi<+z;I` z553V0q*kXTHzX~zOiz<;a?iWl%nG?4J9Y6eds>OmwXgO0ydm|7w9!8;v*y;{b%)NP zBj14>ey1WN=Ao&K$309&mWm8D5sXP;MUj)y_e8dY1zz>&MVTn2sr;DyW;;cwHr~UW zX1Y;!U2ZcO7s^H&^Dq}GNfN6X44hI9UfroFo7Oj1Mf4IV%q{S6-o9)97p=>JLHrRx0{sbEz(}Hh@oJ=0MRYI4+9cNto2a0c40<_?_@*VqfHqU%UHr~xOiyU| zu|!;Z`1$3as~o?z7iClxrILh~8-}v6BV)X?Pw@>e7IyLsnQa&p;VdLT6i8&dF`L1O zE4ZnsyWc~OYEGhX%I<8LV6YvUZPt7GGa2dC#OAM(ukLY)5(=&=YH=9U29XP(!;0!k zpZs|3g2Bi%@1XPLss#R)t+^9_F=WXtIEDKR4l#UoSEh2vL_z#<~ZL2+=VOM-qz zFtM)${3C|77EYW#?^7nMeL=Ae%a(B*O8JE|C>`FeURo6k&LAh}t4w0NTjkLfk=$Z2 zv-I9C4=Ow7s>4uCzOMPk&G`1dp{J%aaNY=Th zv*yTRD(xZM;L*&%g1NZ2F+Gt#C^m5{*n$KIa*C^hA@64 z(17-j#*zkG6Ly4xrK(J0dg=(11IPr`z&b9J}9BmekeJnjmh7ny&Uk*_!m0{;`T9qplS3^L9XMSy-Q^Q*J+vXrDPX3$FmVt*BCgkGqc)jSYQ!yLW07h-gwK;s@>VU3-$^ z2}9>Z7b6dv>3lNS9zjwo2?sLoiAgB`4teq1A#sFEAR7~1l$HO5IvpRCoXmz1mIuk+ zuZL4P<%`y0|M(pA2BUwAi~=)sPF;r2&wHP!%pl3(ud=BV3}LEvEe|`bM3xv1@!nhF zC=P+N#Zu5=(fmV`^9tgF1BAsYcfTipHy0QT4B_Vips&><9Nx3yXxgwDrew@J;|gV$ zztCkxEn*gh*wzS-N3@DVNAw#x>HFZ#VgybCo2g4CB4g?IQhWI*3bE85r6;!{hA9L; z&&DB6KdNZPzql7KnIm|*RAQpT&+L5=*SbiZ%IMRHtxN z5IhP>Wz0)YkjkyT88j?Q&;FTqbqO8;?I=vZ$odpZn%SsqFTg>H!*YR+@|bE1?WQ3T zB<8q$v7%jKc#f`JKs9sge>sH=L~!!2gI?2pRrLqn@z?)pK0mq`&uOQ*aR94xe$pKZ zrxXyi@lv$7>;MSy=%f9@BL#*i*a&S5qWohK1o0elRw@0_?6wx3Ol~j<@Y1RtnUoh&T(OMrip? zFCnD-F8o)Tm}v&!N?Y+9t@1je>XHKK@eLp5RtjNkmhBu~KPZ-nnD;T@GHp~ZVE0Zu z``+YbwPx|t zf9Dt~F<&ukCKpA;dw3l3q2exE|UjZz-Vk?TTW^Rpaw=XkK!+H$NR4KTwLLySMeP+^{>r0)H>-t=Od8TqZm#P!XeJuKV&*8O9^ly+ZG?xpZFGF|T@YdHmD9-2Q*AlNA26#s3<}rTc(7m|D8| z0b`P!(KYG9oxg)l8P9Z*dcaBWJ9I8JvpTjrYyvJY51>ocqDLwY3&4*33M`^~PwBqB z`1AD9Be=*As?)T>v(1?n&Pvp2ES4Sz3Gvn;ks*7NlE^CN53^-;# z*p|HEd|XPv2PeZX5LJ|$=V+Ii!!QYZNTgrv@@W(OnFH#XMm|_*8rqkwHPFzmQRPy` z3yVv|PzqR(IvV^YqaTaPeV@^?cCT!&&D^uQ_wDu3tz_Zb{_1ZdLcwsX23mj{)Hxr0 z43p8dzSrGMESp<~pnPH??R0Sr?k7 zJ6doP8tB@2c)8QK7M=*Z1`&qedtFudzHq;PO6aFg)aDx@2TKP%!=t2UM+;8`((8cCoN7n65qH?lTr&8^Ggu^G&+$Fn%S--i>}05PkoODpQ5#KP0oyfa&W#y5Ds$wd+l-G>Dw?j zehu(Dos?={Wj=a~#9WwjdYN(O8SAkXMfc!IN1pKdRtOnKvDNrZhYYkQX0g+nQSllc z_QY9aMbR^3pQaNr=Y7JZGWMT8}vlVUigF%kXTI zu0*P6GRxyF@&(^SO1mwJcj5QSkHs&@j0%`Ot!=|xcMaESe(-8JV9Dk%7U;4#{wyeB zsarx>I(UtP2#yE4`eM#Svz@MK4rkZd^do1T5KK*$4)^;!f7>p0b7^epq!z@BiRP}f zy9|_c^n+sb83cExNzlj!lDtiS;X*AnkI8q36PMI-qAk7_)Z?Gwc+*;b&tWbq;sjQS z(rs`VZ9mgAF1W65>A|G+z0BV zT->`SDo0EFng&|zcVFD;;uFfz=Qc_{T}8zw8kSeI)tHT6(qEaVA3cM;Hr-|uIb38G zY3c*5M`7ZC78}B&4_~}}|M&8^Z~u%xfNO}t;AN8@htYeN?Nxi=DeR(NYmx?W8_So2 zdiY^}Y1q+tNE80w2&|Dasd~LMNR8#L1qjXVcS9H_i0a z@oD-{3{B$XyfHWRs=LTcjRqw6?(Jebcxg9QV6l`MQhsdlqhOcqVdy)cKVBjPn1IR^ zz-u0*Gz6Arf2soAq3}7J2flb#hPPolhyk}z>vAfL`!X3Vci8F>`18oo^wijpY3W1c zE+L@2Tsl7n7;&*d6DcN!(S1XBka^o&2Kr6$=LJ*6gAwOcg(vIv4xM$9Es!_ zXj~fTe6-Dn{Hbj3*Q=8jL~lTIofpZ%;fayk8s0zK7AHDp@iS>anV7-^4l%W=@R{Qe zS9m;ILF|6qQ1RAN&705(xrPdo;!X9W+|Hy8jfI$!0bk~2%nj#$!;+ic_?NHi zjMsT5UD&<)?#?!sR&lE`opc6WU+CPcsV`1GH_Y-msyXlW z+y;hDwVP|)A&mAL2EFDi!B4&Ke;yvn6PFC3XxM?zegi5^IXwjOt>!|8)2wUQ%0wAN zeXl|u@}^ZOKR#Yc7d6l^ZX2)qMBzwnn-fjlQu<`;i$_d;(NR`XG7q-@@aj~{^Lv*) z0&o8+8YMc)eLN*e6^#pwJ7W2O;}e%n&HJd7b@rAY`O3`om#2^+?WNhFg%r_3l z&2cWPz7%ZAjd4GYA5ANHr)r6x{)Z<&ZpZl}VC({B&|STt)L+&py^Izdw$Ayl9~J;Z zSWX4;+5H_Oru7rU)s4qfwy8McABRwEh}xpiJuMVo0T>>qa$YNclix^ss^p3-)u9rX zz>W26z)|985hr2yE~y;y!~eanzY)s)2!%Fm&l5w9%4%T;Q1dp+<@VZ(NW;yRo9;db z+RbqNE6P!Uzl$XaoBVAFNL-5@mUJD=T#fc;L=Uf8d?j-+GPI2?)tkdoR>s)u~sL^B9 zZ!mDE1j*ekXS(wEn=qtF(uo)zP^t|>!tolzV_m7^ z&996D<-m*%dQSDcn!b{`nB!gYlpswn^T1*jRRgSmA3L>gc~SA#jECqwEOoQguWIE# z^sWRSQn!p`%w1sm)x+UoUhc5eKYW~Wda0nroDiEy%(ZjiEw7_>Zv|oTL0VY?r-1i{ zn>L3HmU7^){%o5KYS|)XS9bc)IX!or;|)~D8Sj?0^I>iv!#(r;&%?Q`M-9Ro&$+n` zhpau=2`VA-@>?u{1bdx}_4+eRCZy3qRiN^$`s&e&zRGH~Ij`XiY#9q^sM)VhO6h$; z0xx`GUL+5a-G|a>J;6&R-#t&@xRezVt-%=T1t`KkoD;DAy>{*>tOlL&^zy<&;y@*E z3oRWgzEe}Mt0Ii9()&fVWvwo)y>C$o*tr@XQjm=1I%#IU!JkwrQ`<{|w_ zsII$uG}4{*OeguWbU2#^oG$w)kE-*F-zv*pL0EbU^b~}ZDj6&5LnBPDXXay7qtJY& z;`ioS-&-gdv&^m9$uOm>|9(Pv-7erL(w8cshd=-PAU@pf!|a+;0WD3h-Jqoau-o(w!jr6Sgq8;)$k!WB2yi;s~f| zeNUnwPk9Iji*3^CDU@SDsi0yd(&=7YLc0@*b)$n1bk`k{s_BgAPhJzgh*WW2_-S+* z2WdOWb8$YWQE7*QPzVmh`G3Yhg)>|<#`MC3s&)t&uL|Q$R>UZt4akJE86xvGL0hWn ziHZHJ=jeMrr`JGtXDa1~KaApo95h_=f?AF?5>IKaTg6Wy1(}(!uO0@rIW?d88yaH8 zcMLqHn#S<$+8my6e!i&s{O66L&*6jP#)t*4klUW)FzBr=Via+b-&b#G?VWx(Nxt$N zg%P{C?Fv4{=AdkF<1}853M!I7?+Tl>h(+LI9o^N+g*-SlonaM>rn#8Zh=%Km8I{}p znts*_v07LJeaSEb8?kn#Y&8XgO862h5^Q34+m&XQrEZ)5nAA)muFQ5^*1XKf+z zdxyE_uJjvAK$A8bS_`Uq_iNLLeWS!Y&})zf^MMa1|HDII7@bVr*WX^98Vx%o*YfiH zcLI^;N2}iJ7Fz6^QAex=u(i_nfsZGZqmia~fl}{w@ED?JV9d3_^qj=%E$%!`s@agv z*AOlxhSm3_Q(>w(JlEyzQP977G0(8&%vk#%m^6earCTIW`QTrt;fy(qC%(&%OKhM$ zENpuI3j{H9;2PEbln5N6D}}|;Uub&ItT!!qs_h+r*Dv8eA^s&T#;qZp%BMw~Iv;zH z1V+eVQI>CQ?#)kejp5p>-a;A|Y9naNvN%)FC)Crs+g|42jcv{7|XVvoc@!W zuVRCqMM0~~I@!$l(&bhdITYGv3n4rUqOGNmdZ8s3P0@2y0&n6d77w8^##tZ=26jg7 z z!=iS_BvPGr}6F5{c{$Mi4JUAL<7F9z9t2QcthC?BZZwGY)= zrHDFVw+EApf2J6+Czkgggc3_CV6@%d2AZ~cd$a&j=Io|oc+wL^v1Rc2xLKA>MMx~1 z%D#{sLn68I64{Y@=!H}D;RF`O^WQ3mi@4XB3aZRYHI*9vfHlPy!e1+jw8O1wI5uER|h7)*`qvSKvX$5Si3n5W>@y$p7Ps}C9ojo+* z{LvkVIcJhxJ_#Qo-0~Ow+YAEJF{}s_eBMa za4>SR^!6`c-Mg`#w(nU~f8wZ+xQ0#MkPN*w%cpi%>OZRIKMa1WVNJ1x`Qr3>Ydw)P z|AaU=```9QhWyE3FWy6IwL)iqlu0?~X;>g^U#j zO(A!+Kbo~X)h5E!_Lo0_in^Idg{67otSpz*LT_K@=<@+2!*L|(wEcMh$j z!$s?UUT9%_6Te4SbW|2z-!D70@Y-?DEWQ<&-YOeY3?3#{U%MiWxCJ30R!|8372Kd+ zFYcpk^eA>BC-&#}C~=0X8rkGWmCI2a`15YxW1hlfyeOLct%YTk;Y)H}60DKglQqtN;K2 literal 0 HcmV?d00001 diff --git a/docs/source/asr/models.rst b/docs/source/asr/models.rst index b08dd0cf0a8a..2323e1636fcc 100644 --- a/docs/source/asr/models.rst +++ b/docs/source/asr/models.rst @@ -316,6 +316,33 @@ By default, the decoding for HAT model works in the same way as for Conformer-Tr In the case of external ngram LM fusion you can use ``/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py``. To enable HAT internal LM subtraction set ``hat_subtract_ilm=True`` and find more appropriate couple of ``beam_alpha`` and ``hat_ilm_weight`` values in terms of the best recognition accuracy. + +.. _Hybrid-ASR-TTS_model: + +Hybrid ASR-TTS Model +-------------------- + +Hybrid ASR-TTS Model (``ASRWithTTSModel``) is a transparent wrapper for the ASR model with a frozen pretrained text-to-spectrogram model. The approach is described in the paper +`Text-only domain adaptation for end-to-end ASR using integrated text-to-mel-spectrogram generator `_. +This allows using text-only data for training and finetuning, mixing it with audio-text pairs if necessary. + +The model consists of three models: + +* ASR model (``EncDecCTCModelBPE`` or ``EncDecRNNTBPEModel``) +* Frozen TTS Mel Spectrogram Generator (currently, only :ref:`FastPitch ` model is supported) +* Optional frozen Enhancer model trained to mitigate mismatch between real and generated mel spectrogram + + .. image:: images/hybrid_asr_tts_model.png + :align: center + :alt: Hybrid ASR-TTS Model + :scale: 50% + +For the detailed information see: + +* :ref:`Text-only dataset ` preparation +* :ref:`Configs and training ` + + References ---------- diff --git a/docs/source/asr/results.rst b/docs/source/asr/results.rst index a1c96c7e1727..358114e75a40 100644 --- a/docs/source/asr/results.rst +++ b/docs/source/asr/results.rst @@ -26,6 +26,17 @@ If there is a local ``.nemo`` checkpoint that you'd like to load, use the :code: Where the model base class is the ASR model class of the original checkpoint, or the general ``ASRModel`` class. + +Hybrid ASR-TTS Models Checkpoints +--------------------------------- + +:ref:`Hybrid ASR-TTS model ` is a transparent wrapper for the ASR model, text-to-mel-spectrogram generator, and optional enhancer. +The model is saved as a solid ``.nemo`` checkpoint containing all these parts. +Due to transparency, the ASR model can be extracted after training/finetuning separately by using the ``asr_model`` attribute (NeMo submodel) +:code:`hybrid_model.asr_model.save_to(.nemo)` or by using a wrapper +made for convenience purpose :code:`hybrid_model.save_asr_model_to(.nemo)` + + NGC Pretrained Checkpoints -------------------------- diff --git a/docs/source/tts/models.rst b/docs/source/tts/models.rst index a9c7cdf50042..8b283529a706 100644 --- a/docs/source/tts/models.rst +++ b/docs/source/tts/models.rst @@ -12,6 +12,8 @@ This section provides a brief overview of TTS models that NeMo's TTS collection Mel-Spectrogram Generators -------------------------- +.. _FastPitch_model: + FastPitch ~~~~~~~~~ FastPitch is a fully-parallel text-to-speech synthesis model based on FastSpeech, conditioned on fundamental frequency contours. The model predicts pitch contours during inference. By altering these predictions, the generated speech can be more expressive, better match the semantic of the utterance, and in the end more engaging to the listener. Uniformly increasing or decreasing pitch with FastPitch generates speech that resembles the voluntary modulation of voice. Conditioning on frequency contours improves the overall quality of synthesized speech, making it comparable to the state of the art. It does not introduce an overhead, and FastPitch retains the favorable, fully-parallel Transformers architecture, with over 900x real-time factor for mel-spectrogram synthesis of a typical utterance. The architecture of FastPitch is shown below. It is based on FastSpeech and consists of two feed-forward Transformer (FFTr) stacks. The first FFTr operates in the resolution of input tokens, and the other one in the resolution of the output frames. Please refer to :cite:`tts-models-lancucki2021fastpitch` for details. diff --git a/nemo/collections/asr/data/text_to_text.py b/nemo/collections/asr/data/text_to_text.py index 6ed06aa10dde..23ccd3d7a2ef 100644 --- a/nemo/collections/asr/data/text_to_text.py +++ b/nemo/collections/asr/data/text_to_text.py @@ -26,7 +26,6 @@ import numpy as np import torch import torch.utils.data -from nemo_text_processing.text_normalization.normalize import Normalizer from torch.nn.utils.rnn import pad_sequence from tqdm.auto import tqdm @@ -35,6 +34,12 @@ from nemo.core.classes import Dataset, IterableDataset from nemo.utils import logging +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except Exception as e: + logging.warning(e) + logging.warning("nemo_text_processing is not installed") + AnyPath = Union[Path, str] @@ -176,7 +181,7 @@ def __init__( asr_use_start_end_token: bool, tts_parser: Callable, tts_text_pad_id: int, - tts_text_normalizer: Normalizer, + tts_text_normalizer: "Normalizer", tts_text_normalizer_call_kwargs: Dict, min_words: int = 1, max_words: int = 1_000_000, @@ -379,7 +384,7 @@ def __init__( asr_use_start_end_token: bool, tts_parser: Callable, tts_text_pad_id: int, - tts_text_normalizer: Normalizer, + tts_text_normalizer: "Normalizer", tts_text_normalizer_call_kwargs: Dict, min_words: int = 1, max_words: int = 1_000_000, @@ -426,7 +431,7 @@ def __init__( asr_use_start_end_token: bool, tts_parser: Callable, tts_text_pad_id: int, - tts_text_normalizer: Normalizer, + tts_text_normalizer: "Normalizer", tts_text_normalizer_call_kwargs: Dict, min_words: int = 1, max_words: int = 1_000_000, diff --git a/nemo/collections/asr/models/hybrid_asr_tts_models.py b/nemo/collections/asr/models/hybrid_asr_tts_models.py index 23a98d13c404..1f15e49e0b0d 100644 --- a/nemo/collections/asr/models/hybrid_asr_tts_models.py +++ b/nemo/collections/asr/models/hybrid_asr_tts_models.py @@ -31,7 +31,9 @@ TextToTextDataset, TextToTextIterableDataset, ) -from nemo.collections.asr.models import ASRModel, EncDecCTCModelBPE, EncDecRNNTBPEModel +from nemo.collections.asr.models.asr_model import ASRModel +from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE +from nemo.collections.asr.models.rnnt_bpe_models import EncDecRNNTBPEModel from nemo.collections.asr.modules.conformer_encoder import ConformerEncoder from nemo.collections.asr.parts.preprocessing.features import clean_spectrogram_batch, normalize_batch from nemo.collections.asr.parts.submodules.batchnorm import replace_bn_with_fused_bn_all From 95650df468e5f8758a2688230a0f98c9a5817383 Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Mon, 8 May 2023 14:03:27 -0700 Subject: [PATCH 075/512] [TTS] Fix aligner nan loss in fp32 (#6435) * Fix nan loss in fp32 Signed-off-by: hsiehjackson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: hsiehjackson Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/tts/losses/aligner_loss.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nemo/collections/tts/losses/aligner_loss.py b/nemo/collections/tts/losses/aligner_loss.py index 05d8f28e70fe..1a666d750521 100644 --- a/nemo/collections/tts/losses/aligner_loss.py +++ b/nemo/collections/tts/losses/aligner_loss.py @@ -58,9 +58,7 @@ def forward(self, attn_logprob, in_lens, out_lens): # Convert to log probabilities # Note: Mask out probs beyond key_len key_inds = torch.arange(max_key_len + 1, device=attn_logprob.device, dtype=torch.long) - attn_logprob.masked_fill_( - key_inds.view(1, 1, -1) > key_lens.view(1, -1, 1), -float("inf") # key_inds >= key_lens+1 - ) + attn_logprob.masked_fill_(key_inds.view(1, 1, -1) > key_lens.view(1, -1, 1), -1e15) # key_inds >= key_lens+1 attn_logprob = self.log_softmax(attn_logprob) # Target sequences From 4ca2797dbeb3971da9c17da3f2d74958ec407e93 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 16:52:59 -0600 Subject: [PATCH 076/512] Update SDP docs (#6485) (#6596) * add info about SDP e.g. processor classes in docs * add link to SDP docs in README * address code review comments and add SDP overview diagram * Fix spelling typo --------- Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> --- README.rst | 1 + docs/source/tools/speech_data_processor.rst | 162 +++++++++++++++++++- 2 files changed, 161 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 614445bc4951..700b4edfdf16 100644 --- a/README.rst +++ b/README.rst @@ -124,6 +124,7 @@ Key Features * `Text Processing (text normalization and inverse text normalization) `_ * `CTC-Segmentation tool `_ * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets + * `Speech Data Processor `_ Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes. diff --git a/docs/source/tools/speech_data_processor.rst b/docs/source/tools/speech_data_processor.rst index 49c3d7a81117..29bc4abb82bd 100644 --- a/docs/source/tools/speech_data_processor.rst +++ b/docs/source/tools/speech_data_processor.rst @@ -3,6 +3,164 @@ Speech Data Processor Speech Data Processor (SDP) is a toolkit to make it easy to: 1. write code to process a new dataset, minimizing the amount of boilerplate code required. - 2. share the steps for processing a speech dataset. Sharing processing steps can be as easy as sharing a YAML file. + 2. share the steps for processing a speech dataset. -SDP is hosted here: https://github.com/NVIDIA/NeMo-speech-data-processor. \ No newline at end of file +SDP is hosted here: https://github.com/NVIDIA/NeMo-speech-data-processor. + +SDP's philosophy is to represent processing operations as 'processor' classes, which take in a path to a NeMo-style data manifest as input (or a path to the raw data directory if you do not have a NeMo-style manifest to start with), apply some processing to it, and then save the output manifest file. + +You specifiy which processors you want to run using a YAML config file. Many common processing operations are provided, and it is easy to add your own. If you do not need to add your own processors, then all that is needed to process a new dataset is to write a single YAML file containing the parameters needed to process your dataset. + +.. image:: https://github.com/NVIDIA/NeMo/releases/download/v1.17.0/sdp_overview_diagram.png + :alt: Overview diagram of Speech Data Processor + +Overview of how SDP processes a dataset +--------------------------------------- + +1. You call the ``main.py`` script, passing in a YAML config file, possibly with some overrides. +2. ``main.py`` script calls ``run_processors.py``, passing in your config. +3. ``run_processors.py`` does the following: + + a. picks out the processors that you specified to be run (you can specify a subset of the processors in the config override, e.g. to avoid re-running time-consuming steps). + b. if some of the processors have not had "output_manifest_file" or "input_manfiest_file" entries specified, SDP will automatically create temporary files for those. + c. instantiates the processor classes using ``hydra.utils.instantiate`` + d. runs the run-time processor tests by calling the ``processor.test()`` method (more details about testing :ref:`here`). + e. runs the processing method (``processor.process()``) of each processor in order. + + +Layout of config YAML files +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The YAML config file for processing a dataset must contain a key ``processors``, the value of which is a list. Each item in that list is expected to be a dictionary specifying a processor class, i.e. it must have a key ``_target_``, the value of which is a path to a "processor" class, and the remaining keys must be the kwargs necessary to instantiate that class with ``hydra.utils.instantiate()`` (c.f. https://hydra.cc/docs/advanced/instantiate_objects/overview/). + +SDP will run the processors specified in the ``processors`` list in the config file. It will also check for a ``processors_to_run`` key in the config file, which can be either the string ``"all"``, or any Python "slice" object like ``3:4``, ``2:`` etc. (if there is no ``processors_to_run`` key, then all of the processors will be run). + +.. note:: + SDP will run the processors in the order in which they are listed in the config YAML file. Make sure to list the processors in an order which makes sense, e.g. create an initial manifest first; make sure to run asr inference before doing any processing which looks at ``pred_text`` fields in the manifest. + +Processor classes +----------------- + +**BaseProcessor** +~~~~~~~~~~~~~~~~~ + +All processor classes inherit from the ``BaseProcessor`` class. This is a simple abstract class which has 2 empty methods: ``process()`` and ``test()``. +These serve to remind us that SDP essentially just runs ``test()`` on all processors, and then ``process()`` on all processors (more details about testing :ref:`here`). + +``ASRInference`` is a child class of ``BaseProcessor``. It has a simple ``process()`` method which runs transcription on every utterance in the input_manifest. + +``WriteManifest`` is also a child class of ``BaseProcessor``. It has a simple ``process()`` method which saves a copy of the input manifest containing only the fields specified in ``fields_to_save``. + +**BaseParallelProcessor** +~~~~~~~~~~~~~~~~~~~~~~~~~ +``BaseParallelProcessor`` inherits from the ``BaseProcessor`` class. Within the ``BaseParallelProcessor.process()`` method, it calls other methods and functions, which allow it to do more complex processing. +Most importantly, it calls its ``BaseParallelProcessor.process_dataset_entry(data_entry)`` method on every utterance in the manifest, and it does this in parallel, allowing for more efficient processing. + +What is a **DataEntry**? +~~~~~~~~~~~~~~~~~~~~~~~~ +As mentioned above, ``BaseParallelProcessor.process_dataset_entry(data_entry)`` is called on a variable called ``data_entry`` which represents an utterance in our dataset. +Most often, ``data_entry`` will be a dictionary containing items which represent the JSON manifest entry. +Sometimes, such as in ``CreateInitialManifestMLS``, it will be a string containing a line for that utterance from the original raw MLS transcript. + +``BaseParallelProcessor.process_dataset_entry`` will process ``data_entry`` and output a ``DataEntry`` object. + +The ``DataEntry`` class is a dataclass which contains 2 attributes: + +1. ``data`` is an Optional dictionary containing items which represent the JSON manifest entry. ``data`` can also be ``None``. If a ``.process_dataset_entry(data_entry)`` method returns a ``DataEntry`` class where ``data is None``, then that utterance will be dropped from the output manifest. +2. ``metrics``, which can be of any type, and are ``None`` by default. This variable is used by some variables to record summary statistics about the changes made to the dataset, these metrics are aggregated and can be displayed once every utterance has been processed by the processor. + +What happens in **BaseParallelProcessor.process()**? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We outline the ``BaseParallelProcessor.process()`` method below: + +.. raw:: html + +
+ +
+ + +**ModifyManifestTextProcessor** +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``ModifyManifestTextProcessor`` inherits from the ``BaseParallelProcessor`` class. + +The ``ModifyManifestTextProcessor`` constructor takes in the following arguments: +* ``text_key`` (string) and ``pred_text_key`` (string): these parameters specify which keys in ``data_entry.data`` will be used for processing. (default: ``text_key="text"``, ``pred_text_key="pred_text"``, ie. by default the processor will refer to and modify the ``"text"`` and/or ``"pred_text"`` attributes of the input manifest). +* ``test_cases`` (optional, list of dicts) - test cases for checking that the processor makes the changes that we are expecting. + +``ModifyManifestTextProcessor`` has the following methods: +* ``ModifyManifestTextProcessor.test()``: this method makes sure that the output from the processor matches the expected output specified in the ``test_cases`` parameter. +* ``ModifyManifestTextProcessor.process_dataset_entry(data_entry)``: this method applies processing to a ``data_entry``. First, spaces are added to the start and end of the 'text' and 'pred_text' entries (if they exist), then the abstract method ``ModifyManifestTextProcessor._process_dataset_entry(data_entry)`` is called. Then, any extra spaces (e.g. two spaces next to each other ' ') are removed from 'text' and 'pred_text' entries. +* ``ModifyManifestTextProcessor._process_dataset_entry(data_entry)``: this is an abstract method which will be over-written by children of ``ModifyManifestTextProcessor``. + +How to make your own processor classes +-------------------------------------- + +We will describe how to make your own processor classes by referring to SDP's existing classes. + +Creating an initial manifest +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +One of the child classes of ``BaseParallelProcessor`` provided in SDP is ``CreateInitialManifestMLS``. It downloads raw MLS data for a specified language, and creates an initial manifest (in the format expected by NeMo) which can be cleaned by subsequent processors. + +The ``CreateInitialManifestMLS.prepare()`` method downloads and extracts the raw data. + +The ``CreateInitialManifestMLS.read_manifest()`` method reads the lines in the raw MLS transcript file. + +The ``CreateInitialManifestMLS.process_dataset_entry()`` method takes in the lines from the raw MLS transcript file, and outputs ``DataEntry`` objects containing entries that will be saved into the manifest (i.e. ``"audio_filepath"``, ``"duration"``, ``"text"``) for each utterance. + + +A **ModifyManifestTextProcessor** subclass that cleans the reference text +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the classes provided in SDP is ``SubRegex``. At initialization, it takes in ``regex_params_list``, a list of dictionaries which must contain the keys ``"pattern"``, ``"repl"``, and, optionally, ``"count"``. These keys will be used to apply regex substitutions using these parameters fed into ``re.sub``. The substitutions will be applied to the data at ``text_key`` (i.e. ``data_entry.data[self.text_key]``). By default, ``text_key="text"``, i.e. the substitutions will be applied to the ``"text"`` attribute of the manifest. + +In its ``_process_dataset_entry(data_entry)`` method, the ``SubRegex`` processor does the string to string conversion upon the ``data_entry`` that is input. Its output is a ``data_entry`` with the changes applied to ``data``, and the the metrics of which regex patterns caused a substitution to be made. These metrics will be aggregated over all utterances by the ``BaseParallelProcessor`` class. ``SubRegex`` also has a ``finalize(metrics)`` method which will log information about the aggregated metrics after all of the utterances in the manifest have been processed. + +A **ModifyManifestTextProcessor** subclass that drops incorrectly transcribed utterances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the classes provided in SDP is ``DropHighLowCharrate``. At initialization, it takes in ``high_charrate_threshold`` and ``low_charrate_threshold``, for which the utterance will be dropped if it is above or below each value respectively. This is helpful for automatically filtering out incorrectly transcribed utterances. + +In its ``_process_dataset_entry(data_entry)`` method it evaluates the character rate of the utterance(by dividing the length of ``data_entry.data[self.text_key]`` by the value of ``data_entry.data["duration"]``). If the character rate is within bounds, it will return the same ``data_entry`` that was input. If the character rate is out of bounds, it will return a ``data_entry`` with ``data=None`` and ``metrics`` which reflect the applied changes. +Similar to the ``SubSubstringToSpace`` class, it has a ``finalize(metrics)`` method which will log information about the aggregated metrics after all of the utterances in the manifest have been processed. + +Class diagram +------------- +A diagram of the classes mentioned above is included here. Arrows represent inheritance. + +We omit the details of the ``CreateInitialManifestMLS`` class in the diagram in order to save space. + + +.. raw:: html + +
+ +
+ +SDP Tests +--------- +It is important to make sure that your data processing code has the effect you intend, so SDP has a few different types of tests: + +1. Runtime tests + +* Before running the specified processors, SDP runs ``processor.test()`` on all specified processors. +* Currently, the only provided processor classes with a test method are subclasses of ``ModifyManifestTextProcessor``. + + * ``ModifyManifestTextProcessor.test()`` runs any ``test_cases`` that were provided in the object constructor. + * This means you can provided test cases in the YAML config file, and the dataset will only be processed if the test cases pass. + * This is helpful to (a) make sure that the rules you wrote have the effect you desired, and (b) demonstrate why you wrote those rules. + * An example of test cases we could include in the YAML config file:: + + - _target_: sdp.processors.DropIfRegexMatch + regex_patterns: + - "(\\D ){5,20}" # looks for between 4 and 19 characters surrounded by spaces + test_cases: + - {input: {text: "some s p a c e d out letters"}, output: null} + - {input: {text: "normal words only"}, output: {text: "normal words only"}} + +2. ``pytest`` tests which can be run locally with ``python -m pytest tests/`` and will be run during the GitHub CI process. There are 2 sub-types: + + a. "End to end" tests (link) which run SDP on a mini version of the raw initial dataset, and make sure the final manifest matches the reference final manifest. + b. "Unit tests" for processors and utils (link). From cc258bb8b40ff21fbba9b527d62de589384dd368 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Tue, 9 May 2023 12:04:46 -0700 Subject: [PATCH 077/512] Bug/typo fixes (#6599) Signed-off-by: Igor Gitman --- examples/asr/transcribe_speech.py | 10 +++++----- nemo/collections/asr/models/hybrid_rnnt_ctc_models.py | 1 + nemo/collections/asr/models/rnnt_models.py | 2 +- nemo/collections/asr/parts/utils/transcribe_utils.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 30700153e340..0ab50dba016b 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -47,7 +47,7 @@ compute_timestamps: Bool to request greedy time stamp information (if the model supports it) compute_langs: Bool to request language ID information (if the model supports it) - + (Optionally: You can limit the type of timestamp computations using below overrides) ctc_decoding.ctc_timestamp_type="all" # (default all, can be [all, char, word]) rnnt_decoding.rnnt_timestamp_type="all" # (default all, can be [all, char, word]) @@ -60,12 +60,12 @@ batch_size: batch size during inference cuda: Optional int to enable or disable execution of model on certain CUDA device. - allow_mps: Bool to allow using MPS (Apple Silicon M-series GPU) device if available + allow_mps: Bool to allow using MPS (Apple Silicon M-series GPU) device if available amp: Bool to decide if Automatic Mixed Precision should be used during inference audio_type: Str filetype of the audio. Supported = wav, flac, mp3 overwrite_transcripts: Bool which when set allows repeated transcriptions to overwrite previous results. - + ctc_decoding: Decoding sub-config for CTC. Refer to documentation for specific values. rnnt_decoding: Decoding sub-config for RNNT. Refer to documentation for specific values. @@ -209,7 +209,7 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: # collect additional transcription information return_hypotheses = True - # we will adjust this flag is the model does not support it + # we will adjust this flag if the model does not support it compute_timestamps = cfg.compute_timestamps compute_langs = cfg.compute_langs @@ -254,7 +254,7 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: else: cfg.decoding = cfg.rnnt_decoding - # prepare audio filepaths and decide wether it's partical audio + # prepare audio filepaths and decide wether it's partial audio filepaths, partial_audio = prepare_audio_data(cfg) # setup AMP (optional) diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py index a413eaeed6fa..9ba5533dbe64 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py @@ -138,6 +138,7 @@ def transcribe( num_workers=num_workers, channel_selector=channel_selector, augmentor=augmentor, + verbose=verbose, ) if paths2audio_files is None or len(paths2audio_files) == 0: diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index f4e227f510af..7c91aed99cda 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -286,7 +286,7 @@ def transcribe( config['augmentor'] = augmentor temporary_datalayer = self._setup_transcribe_dataloader(config) - for test_batch in tqdm(temporary_datalayer, desc="Transcribing", disable=True): + for test_batch in tqdm(temporary_datalayer, desc="Transcribing", disable=(not verbose)): encoded, encoded_len = self.forward( input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) ) diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 8101bee96723..69abf09e8cab 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -325,7 +325,7 @@ def write_transcription( item['beams'] = beams[idx] f.write(json.dumps(item) + "\n") else: - with open(cfg.dataset_manifest, 'r', encoding='utf_8') as fr: + with open(cfg.dataset_manifest, 'r', encoding='utf-8') as fr: for idx, line in enumerate(fr): item = json.loads(line) item[pred_text_attr_name] = best_hyps[idx].text From 13d0895a3c3a9ec4549661cd265a11064bdb9aef Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 9 May 2023 16:07:33 -0600 Subject: [PATCH 078/512] Manual garbage collection with an interval (#6469) (#6482) * Manual garbage collection with an interval * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * use trainer.global_step for tracking the interval of GC --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper --- .../language_modeling/conf/megatron_gpt_config.yaml | 4 ++++ .../models/language_modeling/megatron_base_model.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index 09b30c08dd47..67999548e8da 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -216,3 +216,7 @@ model: warmup_steps: 500 constant_steps: 50000 min_lr: 2e-5 + + gc_interval: 0 + # Interval of the host memory garbage collection. When it is zero, collectiion relies on the automatic garbage collector. + # If an interger value larger than zero is set, collection is done manually by the batch step interval of `gc_interval`. diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 3899c75675db..1237491fa39c 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import gc import os import re from typing import Any, Dict, Optional, Union @@ -148,6 +149,13 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): "default_on_epoch": False, } + self.gc_interval = cfg.get('gc_interval', 0) + assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." + # If gc_interval > 0, memory garbage collection is manually controlled. + # The automatic garbage collector sould be disabled before training starts. + if self.gc_interval > 0: + gc.disable() + def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" @@ -351,6 +359,9 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus # accumulated gradient updates. grad_scaler.optimizer_update_skipped = None + if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): + gc.collect() + def setup_optimization( self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): From c85c30c611e95deaf4fa26670a4d70ae55a640b3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 9 May 2023 15:21:06 -0700 Subject: [PATCH 079/512] Make tensor split contiguous (#6580) (#6593) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- nemo/collections/nlp/modules/common/megatron/attention.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index f6768a5ba4a0..85ef427535b0 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -380,7 +380,9 @@ def forward( mixed_x_layer = mixed_x_layer.view(*new_tensor_shape) # [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn] - (query_layer, key_layer, value_layer) = tensor_parallel.split_tensor_along_last_dim(mixed_x_layer, 3) + (query_layer, key_layer, value_layer) = tensor_parallel.split_tensor_along_last_dim( + mixed_x_layer, 3, contiguous_split_chunks=True + ) else: # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)] mixed_kv_layer, _ = self.key_value(encoder_output) @@ -395,7 +397,9 @@ def forward( mixed_kv_layer = mixed_kv_layer.view(*new_tensor_shape) # [sk, b, np, 2 * hn] --> 2 [sk, b, np, hn] - (key_layer, value_layer) = tensor_parallel.split_tensor_along_last_dim(mixed_kv_layer, 2) + (key_layer, value_layer) = tensor_parallel.split_tensor_along_last_dim( + mixed_kv_layer, 2, contiguous_split_chunks=True + ) # Attention head [sq, b, h] --> [sq, b, hp] query_layer, _ = self.query(hidden_states) From 690946d889dee6639e79b6ce222afa63f507c73e Mon Sep 17 00:00:00 2001 From: Samuel Kriman Date: Tue, 9 May 2023 17:25:02 -0700 Subject: [PATCH 080/512] [ASR] Fix for old models in change_attention_model (#6608) * fixes Signed-off-by: sam1373 * done already Signed-off-by: sam1373 --------- Signed-off-by: sam1373 --- nemo/collections/asr/modules/conformer_encoder.py | 11 +++++++---- nemo/collections/asr/parts/mixins/mixins.py | 8 ++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py index 9955e35444f4..df5b8f5c69ed 100644 --- a/nemo/collections/asr/modules/conformer_encoder.py +++ b/nemo/collections/asr/modules/conformer_encoder.py @@ -20,7 +20,7 @@ import torch import torch.distributed import torch.nn as nn -from omegaconf import DictConfig, ListConfig +from omegaconf import DictConfig, ListConfig, open_dict from nemo.collections.asr.models.configs import CacheAwareStreamingConfig from nemo.collections.asr.parts.mixins.streaming import StreamingEncoder @@ -884,8 +884,10 @@ def change_attention_model( if att_context_size: att_context_size = list(att_context_size) - else: + elif hasattr(self._cfg, "att_context_size"): att_context_size = self._cfg.att_context_size + else: + att_context_size = self.att_context_size if self_attention_model is None: self_attention_model = self._cfg.self_attention_model @@ -971,8 +973,9 @@ def change_attention_model( m.self_attention_model = self_attention_model if update_config: - self._cfg.self_attention_model = self_attention_model - self._cfg.att_context_size = att_context_size + with open_dict(self._cfg): + self._cfg.self_attention_model = self_attention_model + self._cfg.att_context_size = att_context_size class ConformerEncoderAdapter(ConformerEncoder, adapter_mixins.AdapterModuleMixin): diff --git a/nemo/collections/asr/parts/mixins/mixins.py b/nemo/collections/asr/parts/mixins/mixins.py index a963850341f9..eba896d0478d 100644 --- a/nemo/collections/asr/parts/mixins/mixins.py +++ b/nemo/collections/asr/parts/mixins/mixins.py @@ -412,6 +412,9 @@ def change_attention_model( update_config (bool): Whether to update the config or not with the new attention model. Defaults to True. """ + if self_attention_model is None and att_context_size is None: + return + if not hasattr(self, 'encoder'): logging.info( "Could not change the self_attention_model in encoder " @@ -425,8 +428,9 @@ def change_attention_model( self.encoder.change_attention_model(self_attention_model, att_context_size, update_config, self.device) if update_config: - self.cfg.encoder.self_attention_model = self_attention_model - self.cfg.encoder.att_context_size = att_context_size + with open_dict(self.cfg): + self.cfg.encoder.self_attention_model = self_attention_model + self.cfg.encoder.att_context_size = att_context_size def conformer_stream_step( self, From d94d09d0142e35cc74373d250e5523ad628051a5 Mon Sep 17 00:00:00 2001 From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com> Date: Wed, 10 May 2023 10:09:12 -0400 Subject: [PATCH 081/512] Update manifest.py to use os.path for get_full_path (#6598) * Update manifest.py to use os.path for get_full_path Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update manifest.py to get rid of pathlib Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update manifest.py Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * Update manifest.py Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vahid Noroozi --- .../common/parts/preprocessing/manifest.py | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/nemo/collections/common/parts/preprocessing/manifest.py b/nemo/collections/common/parts/preprocessing/manifest.py index 9fd69801ec0d..98194505c589 100644 --- a/nemo/collections/common/parts/preprocessing/manifest.py +++ b/nemo/collections/common/parts/preprocessing/manifest.py @@ -15,7 +15,6 @@ import json import os from os.path import expanduser -from pathlib import Path from typing import Any, Callable, Dict, Iterator, List, Optional, Union from nemo.utils import logging @@ -196,9 +195,11 @@ def get_full_path( ] elif isinstance(audio_file, str): # If input is a string, get the corresponding full path - audio_file = Path(audio_file) - - if (len(str(audio_file)) < audio_file_len_limit) and not audio_file.is_absolute() and not audio_file.is_file(): + if ( + (len(audio_file) < audio_file_len_limit) + and not os.path.isabs(audio_file) + and not os.path.isfile(audio_file) + ): # If audio_file is not available and the path is not absolute, the full path is assumed # to be relative to the manifest file parent directory or data directory. if manifest_file is None and data_dir is None: @@ -210,23 +211,17 @@ def get_full_path( # resolve the data directory if data_dir is None: - if is_datastore_path(manifest_file): - # WORKAROUND: pathlib does not support URIs, so use os.path - data_dir = os.path.dirname(manifest_file) - else: - data_dir = Path(manifest_file).parent.as_posix() + data_dir = os.path.dirname(manifest_file) # assume audio_file path is relative to data_dir - audio_file_path = os.path.join(data_dir, audio_file.as_posix()) + audio_file_path = os.path.join(data_dir, audio_file) if is_datastore_path(audio_file_path): # If audio was originally on an object store, use locally-cached path audio_file_path = datastore_path_to_local_path(audio_file_path) - audio_file_path = Path(audio_file_path) - - if audio_file_path.is_file(): - audio_file = str(audio_file_path.absolute()) + if os.path.isfile(audio_file_path): + audio_file = os.path.abspath(audio_file_path) else: audio_file = expanduser(audio_file) else: From 32f58f62f365f9fdec49324cfe3b991b0e8f06bc Mon Sep 17 00:00:00 2001 From: fayejf <36722593+fayejf@users.noreply.github.com> Date: Wed, 10 May 2023 08:14:38 -0700 Subject: [PATCH 082/512] Cherry pick commits in #6601 to main (#6611) * fix write Signed-off-by: fayejf * decoding ctc Signed-off-by: fayejf * temp set rnnt decoding return_best_hypothesis to true Signed-off-by: fayejf * add wer cal back to transcribe_speech as requested Signed-off-by: fayejf * add wer cal back to speech_to_text_buffered_infer_rnnt as requested Signed-off-by: fayejf * add wer cal back to speech_to_text_buffered_infer_ctc as requested Signed-off-by: fayejf * style fix Signed-off-by: fayejf * reflect change in asr_evaluator Signed-off-by: fayejf * reflect som and vahid comment Signed-off-by: fayejf * remove return_best_hy=true in transcribe_speech Signed-off-by: fayejf * no text skip Signed-off-by: fayejf * revert partial Signed-off-by: fayejf --------- Signed-off-by: fayejf --- .../ctc/speech_to_text_buffered_infer_ctc.py | 30 +++- .../speech_to_text_buffered_infer_rnnt.py | 32 +++- examples/asr/transcribe_speech.py | 29 +++- .../collections/asr/parts/utils/eval_utils.py | 153 ++++++++++++++++++ .../asr/parts/utils/transcribe_utils.py | 8 +- tools/asr_evaluator/asr_evaluator.py | 42 +++-- tools/asr_evaluator/conf/eval.yaml | 3 +- tools/asr_evaluator/utils.py | 138 +--------------- 8 files changed, 276 insertions(+), 159 deletions(-) create mode 100644 nemo/collections/asr/parts/utils/eval_utils.py diff --git a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py index 5755297d1600..69ea139d2ed6 100644 --- a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py +++ b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py @@ -27,7 +27,9 @@ total_buffer_in_secs=4.0 \ chunk_len_in_secs=1.6 \ model_stride=4 \ - batch_size=32 + batch_size=32 \ + clean_groundtruth_text=True \ + langid='en' # NOTE: You can use `DEBUG=1 python speech_to_text_buffered_infer_ctc.py ...` to print out the @@ -45,6 +47,8 @@ import torch from omegaconf import OmegaConf +from nemo.collections.asr.metrics.wer import CTCDecodingConfig +from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR from nemo.collections.asr.parts.utils.transcribe_utils import ( compute_output_filename, @@ -79,6 +83,9 @@ class TranscriptionConfig: total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds model_stride: int = 8 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models", + # Decoding strategy for CTC models + decoding: CTCDecodingConfig = CTCDecodingConfig() + # Set `cuda` to int to define CUDA device. If 'None', will look for CUDA # device anyway, and do inference on CPU only if CUDA device is not found. # If `cuda` is a negative number, inference will be on CPU only. @@ -89,6 +96,12 @@ class TranscriptionConfig: # Recompute model transcription, even if the output folder exists with scores. overwrite_transcripts: bool = True + # Config for word / character error rate calculation + calculate_wer: bool = True + clean_groundtruth_text: bool = False + langid: str = "en" # specify this for convert_num_to_words step in groundtruth cleaning + use_cer: bool = False + @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig) def main(cfg: TranscriptionConfig) -> TranscriptionConfig: @@ -188,11 +201,24 @@ def autocast(): manifest, filepaths, ) - output_filename = write_transcription( + output_filename, pred_text_attr_name = write_transcription( hyps, cfg, model_name, filepaths=filepaths, compute_langs=False, compute_timestamps=False ) logging.info(f"Finished writing predictions to {output_filename}!") + if cfg.calculate_wer: + output_manifest_w_wer, total_res, _ = cal_write_wer( + pred_manifest=output_filename, + pred_text_attr_name=pred_text_attr_name, + clean_groundtruth_text=cfg.clean_groundtruth_text, + langid=cfg.langid, + use_cer=cfg.use_cer, + output_filename=None, + ) + if output_manifest_w_wer: + logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!") + logging.info(f"{total_res}") + return cfg diff --git a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py index f2b6d143bdb2..385a29b8f417 100644 --- a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py +++ b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py @@ -34,7 +34,9 @@ total_buffer_in_secs=4.0 \ chunk_len_in_secs=1.6 \ model_stride=4 \ - batch_size=32 + batch_size=32 \ + clean_groundtruth_text=True \ + langid='en' # Longer Common Subsequence (LCS) Merge algorithm @@ -66,6 +68,7 @@ import torch from omegaconf import OmegaConf, open_dict +from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.collections.asr.parts.utils.streaming_utils import ( BatchedFrameASRRNNT, LongestCommonSubsequenceBatchedFrameASRRNNT, @@ -101,7 +104,7 @@ class TranscriptionConfig: # Chunked configs chunk_len_in_secs: float = 1.6 # Chunk length in seconds total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds - model_stride: int = 8 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models", + model_stride: int = 8 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models # Set `cuda` to int to define CUDA device. If 'None', will look for CUDA # device anyway, and do inference on CPU only if CUDA device is not found. @@ -120,6 +123,12 @@ class TranscriptionConfig: merge_algo: Optional[str] = 'middle' # choices=['middle', 'lcs'], choice of algorithm to apply during inference. lcs_alignment_dir: Optional[str] = None # Path to a directory to store LCS algo alignments + # Config for word / character error rate calculation + calculate_wer: bool = True + clean_groundtruth_text: bool = False + langid: str = "en" # specify this for convert_num_to_words step in groundtruth cleaning + use_cer: bool = False + @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig) def main(cfg: TranscriptionConfig) -> TranscriptionConfig: @@ -194,9 +203,13 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: decoding_cfg.strategy = "greedy_batch" decoding_cfg.preserve_alignments = True # required to compute the middle token for transducers. decoding_cfg.fused_batch_size = -1 # temporarily stop fused batch during inference. + decoding_cfg.beam.return_best_hypothesis = True asr_model.change_decoding_strategy(decoding_cfg) + with open_dict(cfg): + cfg.decoding = decoding_cfg + feature_stride = model_cfg.preprocessor['window_stride'] model_stride_in_secs = feature_stride * cfg.model_stride total_buffer = cfg.total_buffer_in_secs @@ -242,11 +255,24 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: filepaths=filepaths, ) - output_filename = write_transcription( + output_filename, pred_text_attr_name = write_transcription( hyps, cfg, model_name, filepaths=filepaths, compute_langs=False, compute_timestamps=False ) logging.info(f"Finished writing predictions to {output_filename}!") + if cfg.calculate_wer: + output_manifest_w_wer, total_res, _ = cal_write_wer( + pred_manifest=output_filename, + pred_text_attr_name=pred_text_attr_name, + clean_groundtruth_text=cfg.clean_groundtruth_text, + langid=cfg.langid, + use_cer=cfg.use_cer, + output_filename=None, + ) + if output_manifest_w_wer: + logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!") + logging.info(f"{total_res}") + return cfg diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 0ab50dba016b..531b5c56aa4e 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -25,6 +25,7 @@ from nemo.collections.asr.metrics.wer import CTCDecodingConfig from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.modules.conformer_encoder import ConformerChangeConfig +from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.collections.asr.parts.utils.transcribe_utils import ( compute_output_filename, prepare_audio_data, @@ -69,6 +70,11 @@ ctc_decoding: Decoding sub-config for CTC. Refer to documentation for specific values. rnnt_decoding: Decoding sub-config for RNNT. Refer to documentation for specific values. + calculate_wer: Bool to decide whether to calculate wer/cer at end of this script + clean_groundtruth_text: Bool to clean groundtruth text + langid: Str used for convert_num_to_words during groundtruth cleaning + use_cer: Bool to use Character Error Rate (CER) or Word Error Rate (WER) + # Usage ASR model can be specified by either "model_path" or "pretrained_name". Data for transcription can be defined with either "audio_dir" or "dataset_manifest". @@ -82,6 +88,8 @@ audio_dir="" \ dataset_manifest="" \ output_filename="" \ + clean_groundtruth_text=True \ + langid='en' \ batch_size=32 \ compute_timestamps=False \ compute_langs=False \ @@ -149,6 +157,12 @@ class TranscriptionConfig: # Use this for model-specific changes before transcription model_change: ModelChangeConfig = ModelChangeConfig() + # Config for word / character error rate calculation + calculate_wer: bool = True + clean_groundtruth_text: bool = False + langid: str = "en" # specify this for convert_num_to_words step in groundtruth cleaning + use_cer: bool = False + @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig) def main(cfg: TranscriptionConfig) -> TranscriptionConfig: @@ -322,7 +336,7 @@ def autocast(): transcriptions = transcriptions[0] # write audio transcriptions - output_filename = write_transcription( + output_filename, pred_text_attr_name = write_transcription( transcriptions, cfg, model_name, @@ -332,6 +346,19 @@ def autocast(): ) logging.info(f"Finished writing predictions to {output_filename}!") + if cfg.calculate_wer: + output_manifest_w_wer, total_res, _ = cal_write_wer( + pred_manifest=output_filename, + pred_text_attr_name=pred_text_attr_name, + clean_groundtruth_text=cfg.clean_groundtruth_text, + langid=cfg.langid, + use_cer=cfg.use_cer, + output_filename=None, + ) + if output_manifest_w_wer: + logging.info(f"Writing prediction and error rate of each sample to {output_manifest_w_wer}!") + logging.info(f"{total_res}") + return cfg diff --git a/nemo/collections/asr/parts/utils/eval_utils.py b/nemo/collections/asr/parts/utils/eval_utils.py new file mode 100644 index 000000000000..5838f3b4035d --- /dev/null +++ b/nemo/collections/asr/parts/utils/eval_utils.py @@ -0,0 +1,153 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import Tuple + +from nemo.collections.asr.metrics.wer import word_error_rate_detail +from nemo.utils import logging + + +def clean_label(_str: str, num_to_words: bool = True, langid="en") -> str: + """ + Remove unauthorized characters in a string, lower it and remove unneeded spaces + """ + replace_with_space = [char for char in '/?*\",.:=?_{|}~¨«·»¡¿„…‧‹›≪≫!:;ː→'] + replace_with_blank = [char for char in '`¨´‘’“”`ʻ‘’“"‘”'] + replace_with_apos = [char for char in '‘’ʻ‘’‘'] + _str = _str.strip() + _str = _str.lower() + for i in replace_with_blank: + _str = _str.replace(i, "") + for i in replace_with_space: + _str = _str.replace(i, " ") + for i in replace_with_apos: + _str = _str.replace(i, "'") + if num_to_words: + if langid == "en": + _str = convert_num_to_words(_str, langid="en") + else: + logging.info( + "Currently support basic num_to_words in English only. Please use Text Normalization to convert other languages! Skipping!" + ) + + ret = " ".join(_str.split()) + return ret + + +def convert_num_to_words(_str: str, langid: str = "en") -> str: + """ + Convert digits to corresponding words. Note this is a naive approach and could be replaced with text normalization. + """ + if langid == "en": + num_to_words = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] + _str = _str.strip() + words = _str.split() + out_str = "" + num_word = [] + for word in words: + if word.isdigit(): + num = int(word) + while num: + digit = num % 10 + digit_word = num_to_words[digit] + num_word.append(digit_word) + num = int(num / 10) + if not (num): + num_str = "" + num_word = num_word[::-1] + for ele in num_word: + num_str += ele + " " + out_str += num_str + " " + num_word.clear() + else: + out_str += word + " " + out_str = out_str.strip() + else: + raise ValueError( + "Currently support basic num_to_words in English only. Please use Text Normalization to convert other languages!" + ) + return out_str + + +def cal_write_wer( + pred_manifest: str = None, + pred_text_attr_name: str = "pred_text", + clean_groundtruth_text: bool = False, + langid: str = 'en', + use_cer: bool = False, + output_filename: str = None, +) -> Tuple[str, dict, str]: + """ + Calculate wer, inserion, deletion and substitution rate based on groundtruth text and pred_text_attr_name (pred_text) + We use WER in function name as a convention, but Error Rate (ER) currently support Word Error Rate (WER) and Character Error Rate (CER) + """ + samples = [] + hyps = [] + refs = [] + eval_metric = "cer" if use_cer else "wer" + + with open(pred_manifest, 'r') as fp: + for line in fp: + sample = json.loads(line) + + if 'text' not in sample: + logging.info( + "ground-truth text is not present in manifest! Cannot calculate Word Error Rate. Returning!" + ) + return None, None, eval_metric + + hyp = sample[pred_text_attr_name] + ref = sample['text'] + + if clean_groundtruth_text: + ref = clean_label(ref, langid=langid) + + wer, tokens, ins_rate, del_rate, sub_rate = word_error_rate_detail( + hypotheses=[hyp], references=[ref], use_cer=use_cer + ) + sample[eval_metric] = wer # evaluatin metric, could be word error rate of character error rate + sample['tokens'] = tokens # number of word/characters/tokens + sample['ins_rate'] = ins_rate # insertion error rate + sample['del_rate'] = del_rate # deletion error rate + sample['sub_rate'] = sub_rate # substitution error rate + + samples.append(sample) + hyps.append(hyp) + refs.append(ref) + + total_wer, total_tokens, total_ins_rate, total_del_rate, total_sub_rate = word_error_rate_detail( + hypotheses=hyps, references=refs, use_cer=use_cer + ) + + if not output_filename: + output_manifest_w_wer = pred_manifest + else: + output_manifest_w_wer = output_filename + + with open(output_manifest_w_wer, 'w') as fout: + for sample in samples: + json.dump(sample, fout) + fout.write('\n') + fout.flush() + + total_res = { + "samples": len(samples), + "tokens": total_tokens, + eval_metric: total_wer, + "ins_rate": total_ins_rate, + "del_rate": total_del_rate, + "sub_rate": total_sub_rate, + } + return output_manifest_w_wer, total_res, eval_metric diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 69abf09e8cab..d7946aa2842b 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -276,7 +276,7 @@ def write_transcription( filepaths: List[str] = None, compute_langs: bool = False, compute_timestamps: bool = False, -) -> str: +) -> Tuple[str, str]: """ Write generated transcription to output file. """ if cfg.append_pred: logging.info(f'Transcripts will be written in "{cfg.output_filename}" file') @@ -321,7 +321,7 @@ def write_transcription( if compute_langs: item['pred_lang'] = transcription.langs item['pred_lang_chars'] = transcription.langs_chars - if not cfg.ctc_decoding.beam.return_best_hypothesis: + if not cfg.decoding.beam.return_best_hypothesis: item['beams'] = beams[idx] f.write(json.dumps(item) + "\n") else: @@ -344,11 +344,11 @@ def write_transcription( item['pred_lang'] = best_hyps[idx].langs item['pred_lang_chars'] = best_hyps[idx].langs_chars - if not cfg.ctc_decoding.beam.return_best_hypothesis: + if not cfg.decoding.beam.return_best_hypothesis: item['beams'] = beams[idx] f.write(json.dumps(item) + "\n") - return cfg.output_filename + return cfg.output_filename, pred_text_attr_name def transcribe_partial_audio( diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index ff6e251a39f5..9540d3429138 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -12,18 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import json - import git -from omegaconf import OmegaConf - +from omegaconf import OmegaConf, open_dict +from utils import cal_target_metadata_wer, run_asr_inference +from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.core.config import hydra_runner from nemo.utils import logging -from utils import cal_target_metadata_wer, cal_write_wer, run_asr_inference """ This script serves as evaluator of ASR models Usage: - python python asr_evaluator.py \ +python asr_evaluator.py \ engine.pretrained_name="stt_en_conformer_transducer_large" \ engine.inference.mode="offline" \ engine.test_ds.augmentor.noise.manifest_path= \ @@ -44,15 +43,34 @@ def main(cfg): report['git_hash'] = repo.head.object.hexsha ## Engine - # Could skip next line to use generated manifest - - # If need to change more parameters for ASR inference, change it in - # 1) shell script in eval_utils.py in nemo/collections/asr/parts/utils or - # 2) TranscriptionConfig on top of the executed scripts such as transcribe_speech.py in examples/asr - cfg.engine = run_asr_inference(cfg=cfg.engine) + # Could skip run_asr_inference and use the generated manifest by + # specifying analyst.metric_calculator.exist_pred_manifest + if cfg.analyst.metric_calculator.exist_pred_manifest is None: + # If need to change more parameters for ASR inference, change it in + # 1) shell script in utils.py + # 2) TranscriptionConfig on top of the executed scripts such as transcribe_speech.py in examples/asr + # Note we SKIP calculating wer during asr_inference stage with calculate_wer=False and calculate wer for each sample below + # for more flexibility and reducing possible redundant inference cost. + cfg.engine = run_asr_inference(cfg=cfg.engine) + + else: + logging.info( + f"Use generated prediction manifest {cfg.analyst.metric_calculator.exist_pred_manifest} and skip enigneer" + ) + with open_dict(cfg): + cfg.engine.output_filename = cfg.analyst.metric_calculator.exist_pred_manifest ## Analyst - cfg, total_res, eval_metric = cal_write_wer(cfg) + output_manifest_w_wer, total_res, eval_metric = cal_write_wer( + pred_manifest=cfg.engine.output_filename, + clean_groundtruth_text=cfg.analyst.metric_calculator.clean_groundtruth_text, + langid=cfg.analyst.metric_calculator.langid, + use_cer=cfg.analyst.metric_calculator.use_cer, + output_filename=cfg.analyst.metric_calculator.output_filename, + ) + with open_dict(cfg): + cfg.analyst.metric_calculator.output_filename = output_manifest_w_wer + report.update({"res": total_res}) for target in cfg.analyst.metadata: diff --git a/tools/asr_evaluator/conf/eval.yaml b/tools/asr_evaluator/conf/eval.yaml index 9129eddc49f1..176392b9c070 100644 --- a/tools/asr_evaluator/conf/eval.yaml +++ b/tools/asr_evaluator/conf/eval.yaml @@ -38,9 +38,10 @@ engine: analyst: metric_calculator: + exist_pred_manifest: null # specify the previously generated manifest will skip engine clean_groundtruth_text: True langid: "en" # speciify language to clean text. Note use text normalization in NeMo for better performancce - output_filename: null # specify it if wanna skip engine and use previously generated manifest + output_filename: null use_cer: False metadata: diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py index c233376eb13a..84f4bdb62364 100644 --- a/tools/asr_evaluator/utils.py +++ b/tools/asr_evaluator/utils.py @@ -18,8 +18,6 @@ from typing import Tuple from omegaconf import DictConfig, OmegaConf, open_dict - -from nemo.collections.asr.metrics.wer import word_error_rate_detail from nemo.utils import logging @@ -110,6 +108,7 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig: subprocess.run( f"python {script_path} " + f"calculate_wer=False " f"model_path={cfg.model_path} " f"pretrained_name={cfg.pretrained_name} " f"dataset_manifest={cfg.test_ds.manifest_filepath} " @@ -148,6 +147,7 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig: # 2) add command as "rnnt_decoding.strategy=greedy_batch " to below script subprocess.run( f"python {script_path} " + f"calculate_wer=False " f"model_path={cfg.model_path} " f"pretrained_name={cfg.pretrained_name} " f"dataset_manifest={cfg.test_ds.manifest_filepath} " @@ -163,139 +163,6 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig: return cfg -def clean_label(_str: str, num_to_words: bool = True, langid="en") -> str: - """ - Remove unauthorized characters in a string, lower it and remove unneeded spaces - """ - replace_with_space = [char for char in '/?*\",.:=?_{|}~¨«·»¡¿„…‧‹›≪≫!:;ː→'] - replace_with_blank = [char for char in '`¨´‘’“”`ʻ‘’“"‘”'] - replace_with_apos = [char for char in '‘’ʻ‘’‘'] - _str = _str.strip() - _str = _str.lower() - for i in replace_with_blank: - _str = _str.replace(i, "") - for i in replace_with_space: - _str = _str.replace(i, " ") - for i in replace_with_apos: - _str = _str.replace(i, "'") - if num_to_words: - if langid == "en": - _str = convert_num_to_words(_str, langid="en") - else: - logging.info( - "Currently support basic num_to_words in English only. Please use Text Normalization to convert other languages! Skipping!" - ) - - ret = " ".join(_str.split()) - return ret - - -def convert_num_to_words(_str: str, langid: str = "en") -> str: - """ - Convert digits to corresponding words. Note this is a naive approach and could be replaced with text normalization. - """ - if langid == "en": - num_to_words = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] - _str = _str.strip() - words = _str.split() - out_str = "" - num_word = [] - for word in words: - if word.isdigit(): - num = int(word) - while num: - digit = num % 10 - digit_word = num_to_words[digit] - num_word.append(digit_word) - num = int(num / 10) - if not (num): - num_str = "" - num_word = num_word[::-1] - for ele in num_word: - num_str += ele + " " - out_str += num_str + " " - num_word.clear() - else: - out_str += word + " " - out_str = out_str.strip() - else: - raise ValueError( - "Currently support basic num_to_words in English only. Please use Text Normalization to convert other languages!" - ) - return out_str - - -def cal_write_wer(cfg: DictConfig, pred_text_attr_name: str = None) -> Tuple[DictConfig, dict]: - """ - Calculate wer, inserion, deletion and substitution rate based on groundtruth text and pred_text_attr_name (pred_text) - We use WER in function name as a convention, but it currently Error Rate (ER) support Word Error Rate (WER) and Character Error Rate (CER) - """ - samples = [] - hyps = [] - refs = [] - - with open(cfg.engine.output_filename, 'r') as fp: - for line in fp: - sample = json.loads(line) - - if 'text' not in sample: - raise ValueError( - "ground-truth text does not present in manifest! Cannot calculate Word Error Rate. Exiting!" - ) - - if not pred_text_attr_name: - pred_text_attr_name = "pred_text" - - hyp = sample[pred_text_attr_name] - ref = sample['text'] - - if cfg.analyst.metric_calculator.clean_groundtruth_text: - ref = clean_label(ref, langid=cfg.analyst.metric_calculator.langid) - - wer, tokens, ins_rate, del_rate, sub_rate = word_error_rate_detail( - hypotheses=[hyp], references=[ref], use_cer=cfg.analyst.metric_calculator.use_cer - ) - eval_metric = "wer" - if cfg.analyst.metric_calculator.use_cer: - eval_metric = "cer" - - sample[eval_metric] = wer # evaluatin metric, could be word error rate of character error rate - sample['tokens'] = tokens # number of word/characters/tokens - sample['ins_rate'] = ins_rate # insertion error rate - sample['del_rate'] = del_rate # deletion error rate - sample['sub_rate'] = sub_rate # substitution error rate - - samples.append(sample) - hyps.append(hyp) - refs.append(ref) - - total_wer, total_tokens, total_ins_rate, total_del_rate, total_sub_rate = word_error_rate_detail( - hypotheses=hyps, references=refs, use_cer=cfg.analyst.metric_calculator.use_cer - ) - - if "output_filename" not in cfg.analyst.metric_calculator or not cfg.analyst.metric_calculator.output_filename: - # overwrite the current generated manifest - OmegaConf.set_struct(cfg, True) - with open_dict(cfg): - cfg.analyst.metric_calculator.output_filename = cfg.engine.output_filename - - with open(cfg.analyst.metric_calculator.output_filename, 'w') as fout: - for sample in samples: - json.dump(sample, fout) - fout.write('\n') - fout.flush() - - total_res = { - "samples": len(samples), - "tokens": total_tokens, - eval_metric: total_wer, - "ins_rate": total_ins_rate, - "del_rate": total_del_rate, - "sub_rate": total_sub_rate, - } - return cfg, total_res, eval_metric - - def cal_target_metadata_wer(manifest: str, target: str, meta_cfg: DictConfig, eval_metric: str = "wer",) -> dict: """ Caculating number of samples (samples), number of words/characters/tokens (tokens), @@ -314,7 +181,6 @@ def cal_target_metadata_wer(manifest: str, target: str, meta_cfg: DictConfig, ev Return: ret (dict): Generated dictionary containing all results regarding the target metadata. """ - if eval_metric not in ['wer', 'cer']: raise ValueError( "Currently support wer and cer as eval_metric. Please implement it in cal_target_metadata_wer if using different eval_metric" From af3225b5f9b6cd51052e6551cb636f65723e7608 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 08:45:34 -0700 Subject: [PATCH 083/512] Create dummy iters to satisy len checks (#6600) (#6603) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar Co-authored-by: Eric Harper --- Dockerfile | 2 +- Jenkinsfile | 2 +- .../nlp/models/language_modeling/megatron_gpt_adapter_model.py | 3 ++- .../nlp/models/language_modeling/megatron_gpt_model.py | 3 ++- .../language_modeling/megatron_gpt_prompt_learning_model.py | 3 ++- .../nlp/models/language_modeling/megatron_retrieval_model.py | 3 ++- .../nlp/models/language_modeling/megatron_t5_adapter_model.py | 3 ++- .../collections/nlp/modules/common/text_generation_strategy.py | 2 +- 8 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index e8402189a474..4cbbf14314c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ WORKDIR /workspace/ # Install Megatron-core RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ - git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 && \ + git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 && \ pip install -e . WORKDIR /tmp/ diff --git a/Jenkinsfile b/Jenkinsfile index a8adc89ab65c..59a6fe7c416e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -62,7 +62,7 @@ pipeline { steps { sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ - git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 && \ + git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 && \ pip install -e .' } } diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py index cb38ad863a52..2985ab4df3bb 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_adapter_model.py @@ -114,7 +114,8 @@ def get_forward_output_only_func(self): Used for generate method only for now. """ - def fwd_output_only_func(batch, model): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) extra_arg = {} ( tokens, diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 5cab67a71441..967f6a6cf85f 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -629,7 +629,8 @@ def loss_func(output_tensor): return fwd_output_and_loss_func def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) extra_arg = {} if len(batch) == 3: batch = [x.cuda() for x in batch] diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index dd0d9168c16a..cca46b54e8a8 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -646,7 +646,8 @@ def get_forward_output_only_func(self): Used for generate method only for now. """ - def fwd_output_only_func(batch, model): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) extra_arg = {} ( tokens, diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py index ad918b3a5e3a..55ec336749a5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py @@ -509,7 +509,8 @@ def get_forward_output_only_func(self): Used for generate method only. """ - def fwd_output_only_func(batch, model): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) extra_arg = {} ( tokens, diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py index 32345e829be8..31c147022486 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py @@ -215,7 +215,8 @@ def get_forward_output_only_func(self): Used for generate method only for now. """ - def fwd_output_only_func(batch, model): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) extra_arg = {} ( tokens, diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index 07607d3840d8..b23f77645d3a 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -56,7 +56,7 @@ def forward_step(self, batch, tensor_shape): output_tensor = fwd_bwd_function( forward_step_func=self.model.get_forward_output_only_func(), - data_iterator=batch, + data_iterator=iter([batch,]), model=[self.forward_model], num_microbatches=get_num_microbatches(), forward_only=True, From 2148d57ddda5909491d439220271c5d707b72d6e Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Wed, 10 May 2023 08:46:11 -0700 Subject: [PATCH 084/512] add GPT eval mode fix for interleaved to main (#6610) Signed-off-by: Abhinav Khattar --- .../models/language_modeling/megatron_gpt_model.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 967f6a6cf85f..0222eedd54ce 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -666,8 +666,17 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + if isinstance(self.model, list): + for model_module in self.model: + model_module.eval() + + loss = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - return self.fwd_bwd_step(dataloader_iter, batch_idx, True) + if isinstance(self.model, list): + for model_module in self.model: + model_module.train() + + return loss def validation_epoch_end(self, outputs): if parallel_state.is_pipeline_last_stage(): From d5f2de2710e05a7b5566ca7f21a5c703a8d3318f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 08:48:01 -0700 Subject: [PATCH 085/512] Fix batch size reconf for T5 FT for multi-validation (#6582) (#6588) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar Co-authored-by: Eric Harper --- .../megatron_finetune_model.py | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index b7a9fb476409..8e59b1e4ce62 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -30,6 +30,7 @@ try: from apex.transformer.pipeline_parallel.utils import ( _reconfigure_microbatch_calculator, + get_current_global_batch_size, get_micro_batch_size, get_num_microbatches, ) @@ -260,16 +261,33 @@ def cast_for_metric(self, pred, label, metric_name, class_labels=None, labels_ar def _reconfigure_and_process_inference_batch(self, batch, ds_config): global_batch_size_per_gpu = batch['text_enc'].size(0) # This should happen only on the last batch of the dataset. - if global_batch_size_per_gpu != ds_config.global_batch_size // parallel_state.get_data_parallel_world_size(): + if ( + global_batch_size_per_gpu + != get_current_global_batch_size() // parallel_state.get_data_parallel_world_size() + ): # NOTE: This is reconfiguring to make sure there is no grad-acc for validation batches. - app_state = AppState() - _reconfigure_microbatch_calculator( - rank=app_state.global_rank, - rampup_batch_size=None, - global_batch_size=global_batch_size_per_gpu * parallel_state.get_data_parallel_world_size(), - micro_batch_size=global_batch_size_per_gpu, - data_parallel_size=parallel_state.get_data_parallel_world_size(), - ) + if ( + global_batch_size_per_gpu + != ds_config.global_batch_size // parallel_state.get_data_parallel_world_size() + ): + app_state = AppState() + _reconfigure_microbatch_calculator( + rank=app_state.global_rank, + rampup_batch_size=None, + global_batch_size=global_batch_size_per_gpu * parallel_state.get_data_parallel_world_size(), + micro_batch_size=global_batch_size_per_gpu, + data_parallel_size=parallel_state.get_data_parallel_world_size(), + ) + # NOTE: need to explicitly handle resetting for multi-validation + else: + app_state = AppState() + _reconfigure_microbatch_calculator( + rank=app_state.global_rank, + rampup_batch_size=None, + global_batch_size=ds_config.global_batch_size, + micro_batch_size=ds_config.micro_batch_size, + data_parallel_size=parallel_state.get_data_parallel_world_size(), + ) def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): """ From e23a5dbb5b29785b077ae9ffc5483157ed34e6d1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 10:02:52 -0600 Subject: [PATCH 086/512] Not doing CastToFloat by default (#6524) (#6563) * Not doing CastToFloat by default * Added docustring * Dummy commit --------- Signed-off-by: Boris Fomitchev Co-authored-by: Boris Fomitchev Co-authored-by: Eric Harper --- nemo/utils/cast_utils.py | 18 ++++++++++++------ nemo/utils/export_utils.py | 24 +++++++++++++++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/nemo/utils/cast_utils.py b/nemo/utils/cast_utils.py index eeb48f35ffa7..21e977ec494d 100644 --- a/nemo/utils/cast_utils.py +++ b/nemo/utils/cast_utils.py @@ -70,8 +70,11 @@ def __init__(self, mod): self.mod = mod def forward(self, x): - with torch.cuda.amp.autocast(enabled=False): - ret = self.mod.forward(x.to(torch.float32)).to(x.dtype) + if torch.is_autocast_enabled() and x.dtype != torch.float32: + with torch.cuda.amp.autocast(enabled=False): + ret = self.mod.forward(x.to(torch.float32)).to(x.dtype) + else: + ret = self.mod.forward(x) return ret @@ -81,7 +84,10 @@ def __init__(self, mod): self.mod = mod def forward(self, *args): - from_dtype = args[0].dtype - with torch.cuda.amp.autocast(enabled=False): - ret = self.mod.forward(*cast_all(args, from_dtype=from_dtype, to_dtype=torch.float32)) - return cast_all(ret, from_dtype=torch.float32, to_dtype=from_dtype) + if torch.is_autocast_enabled(): + from_dtype = args[0].dtype + with torch.cuda.amp.autocast(enabled=False): + ret = self.mod.forward(*cast_all(args, from_dtype=from_dtype, to_dtype=torch.float32)) + return cast_all(ret, from_dtype=torch.float32, to_dtype=from_dtype) + else: + return self.mod.forward(*args) diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py index cc0ce744a9a6..9fa2bc239eb8 100644 --- a/nemo/utils/export_utils.py +++ b/nemo/utils/export_utils.py @@ -440,22 +440,16 @@ def script_module(m: nn.Module): def replace_for_export(model: nn.Module) -> nn.Module: """ - Top-level function to replace default set of modules in model + Top-level function to replace 'default set' of modules in model, called from _prepare_for_export. NOTE: This occurs in place, if you want to preserve model then make sure to copy it first. Args: model : top level module - replace_1D_2D : include 1D -> 2D replacements Returns: model, possibly modified in-place """ from nemo.collections.tts.modules.submodules import MaskedInstanceNorm1d default_replacements = { - "BatchNorm1d": wrap_module(nn.BatchNorm1d, CastToFloat), - "BatchNorm2d": wrap_module(nn.BatchNorm2d, CastToFloat), - "LayerNorm": wrap_module(nn.LayerNorm, CastToFloat), - "InstanceNorm1d": wrap_module(nn.InstanceNorm1d, CastToFloat), - "MaskedInstanceNorm1d": wrap_module(MaskedInstanceNorm1d, CastToFloatAll), "MatchedScaleMaskSoftmax": wrap_module(None, replace_MatchedScaleMaskSoftmax), } @@ -463,3 +457,19 @@ def replace_for_export(model: nn.Module) -> nn.Module: replace_modules(model, default_replacements) # This one has to be the last replace_modules(model, script_replacements) + + +def add_casts_around_norms(model: nn.Module): + """ + Function to put additional to/from float32 casts around operations known to require full precision. + It was used with an extra post-parse script to have TRT preserve extra precision when --fp16 needed. + Should not be needed with TRT 8.6.1 or later. + """ + default_cast_replacements = { + "BatchNorm1d": wrap_module(nn.BatchNorm1d, CastToFloat), + "BatchNorm2d": wrap_module(nn.BatchNorm2d, CastToFloat), + "LayerNorm": wrap_module(nn.LayerNorm, CastToFloat), + "InstanceNorm1d": wrap_module(nn.InstanceNorm1d, CastToFloat), + "MaskedInstanceNorm1d": wrap_module(MaskedInstanceNorm1d, CastToFloatAll), + } + replace_modules(model, default_cast_replacements) From 1e17524d0cb2d1cc1ebd8e88477ac2b6901d9de5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 14:58:30 -0700 Subject: [PATCH 087/512] Turn autocast off when precision is fp32 (#6576) * Turn autocast off when precision is fp32 (#6554) * Turn autocast off when precision is fp32 Signed-off-by: Abhinav Khattar * address review Signed-off-by: Abhinav Khattar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixes Signed-off-by: Abhinav Khattar * merge Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper * correct auto-merge Signed-off-by: Abhinav Khattar * correct auto-merge Signed-off-by: Abhinav Khattar * add to GPT SFT Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper --- .../megatron_base_prompt_learning_model.py | 4 ++++ .../models/language_modeling/megatron_bert_model.py | 8 ++++++-- .../language_modeling/megatron_finetune_model.py | 2 +- .../nlp/models/language_modeling/megatron_gpt_model.py | 6 +++++- .../megatron_gpt_prompt_learning_model.py | 6 +++++- .../models/language_modeling/megatron_gpt_sft_model.py | 2 +- .../megatron_lm_encoder_decoder_model.py | 10 +++++++--- .../language_modeling/megatron_retrieval_model.py | 4 ++++ .../megatron_t5_prompt_learning_model.py | 2 +- .../models/machine_translation/megatron_nmt_model.py | 2 +- .../nlp/modules/common/text_generation_strategy.py | 2 +- 11 files changed, 36 insertions(+), 12 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py index 9e79cb4a41e7..88da586832df 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py @@ -146,6 +146,10 @@ def init_model(self, cfg: DictConfig, trainer: Trainer): self.lowest_val_loss = None self.prompt_encoder = None + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + # define validation metric if self.cfg.get('report_validation_metric', False): validation_metric = self.cfg.get('validation_metric', 'accuracy') diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index cd50f8414470..bda1a595655a 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -90,6 +90,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + # used in NVIDIA NGC PyTorch containers # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_lm_loss_buffer = [] @@ -311,7 +315,7 @@ def training_step(self, dataloader_iter, batch_idx): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) if losses_reduced_per_micro_batch: @@ -412,7 +416,7 @@ def validation_step(self, dataloader_iter, batch_idx): tensor_shape=tensor_shape, dtype=self.autocast_dtype, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) if losses_reduced_per_micro_batch: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index 8e59b1e4ce62..4ed71756e60e 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -318,7 +318,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 0222eedd54ce..9cb4efca57fc 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -156,6 +156,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + self.transformer_engine = cfg.get('transformer_engine', False) # configuration used for inference @@ -348,7 +352,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index cca46b54e8a8..95448e67bd11 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -150,6 +150,10 @@ def init_model(self, cfg: DictConfig, trainer: Trainer): self.virtual_prompt_style = VirtualPromptStyle(cfg.virtual_prompt_style) self.model_type = ModelType.encoder_or_decoder + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + if self.pipeline_parallel: assert ( self.cfg.optim.sched.get("min_lr", 0.0) == 0.0 @@ -309,7 +313,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 7c3bddc9a08c..a52a7d22e219 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -302,7 +302,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 365b1870a2d5..80d980858f1c 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -135,6 +135,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + self.enc_dec_model.model_type = ModelType.encoder_and_decoder def setup_optimizer_param_groups(self): @@ -328,7 +332,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): decoder_seq_length=self.max_decoder_seq_length, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses @@ -996,7 +1000,7 @@ def dummy(): num_microbatches=1, decoder_seq_length=encoder_seq_length, dtype=self.autocast_dtype, - enable_autocast=True, + enable_autocast=self.enable_autocast, ) if output_tensor: @@ -1160,7 +1164,7 @@ def dummy(): num_microbatches=1, decoder_seq_length=encoder_seq_length, dtype=self.autocast_dtype, - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # get output tensor if parallel_state.is_pipeline_last_stage(): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py index 55ec336749a5..31c361b29d44 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py @@ -105,6 +105,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('precision must be in [32, 16, "bf16"]') self.model.model_type = ModelType.encoder_and_decoder + self.enable_autocast = ( + True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + if hasattr(self.cfg, "shape_file"): set_base_shapes(self, self.register_artifact("shape_file", self.cfg.shape_file), rescale_params=False) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index cef0f6ba8e0e..034c91e009f3 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -197,7 +197,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py index 248a3c8e2ec0..05fb492828aa 100644 --- a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py +++ b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py @@ -316,7 +316,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index b23f77645d3a..16935be1cc2d 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -62,7 +62,7 @@ def forward_step(self, batch, tensor_shape): forward_only=True, tensor_shape=tensor_shape, dtype=self.model.autocast_dtype, - enable_autocast=True, + enable_autocast=self.model.enable_autocast, ) return output_tensor From c97e380af7fb7b3b86a9204f877351e36f842e09 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 16:13:58 -0600 Subject: [PATCH 088/512] update core commit hash in readme (#6622) (#6623) Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 700b4edfdf16..929cc7f86abc 100644 --- a/README.rst +++ b/README.rst @@ -254,7 +254,7 @@ To install Megatron-core, run git clone https://github.com/NVIDIA/Megatron-LM.git cd Megatron-LM - git checkout 3db2063b1ff992a971ba18f7101eecc9c4e90f03 + git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 pip install -e . It is highly recommended to use the NVIDIA PyTorch or NeMo container if having issues installing Apex or any other dependencies. From b45de484df350906ca04e6d075d0bee6c2eca63e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 17:27:00 -0700 Subject: [PATCH 089/512] add hat image to docs (#6619) (#6621) Signed-off-by: andrusenkoau Co-authored-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com> --- docs/source/asr/images/hat.png | Bin 0 -> 110874 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/source/asr/images/hat.png diff --git a/docs/source/asr/images/hat.png b/docs/source/asr/images/hat.png new file mode 100644 index 0000000000000000000000000000000000000000..4631fe89211d20a9afe5cd038bb2c8618b3f1d4b GIT binary patch literal 110874 zcmeFZWm}YO+cpe1C@DjCcQ*o3!VuCSAstfEA>GoA(n>gh(jiEfw9<`$Qqs~T>9db| zUUmP0_ruFFFm>#;<+{Z|l)9=sHU=365)u;jLj{;764EU=5)uRr?Kb#?c*ZIiJR!Sj z%1a}a4^eD_H>s9-53N*Gkl4U$G$aTz84@bu67Va5O#ZLea>y)5w|;+)f`k-div;<5 zjVgFX{3L>3#F#(NkQd=dcfdP5@Ee+o{C6byJ{RSmk+ACBvD2rM+IdGXhaWc2$ z@^WxS%!4H6B??|TSh}0hcsbZRx`}#;)BV0e6ud@!%uPr0`x19MaXLK}bs8BbS4$cp zE*>r(ItdIK8X7UzCsv}GFuA|O!8>s}8+UhSQEqNePfspSel90hYi?c<5fN@4K5jlf zPH+XMo42F8nHQ&{8~vY2{+S16>1N?->+Ejp{ruUdrI+o$ zS8{awdn|B3Zp1g-yj(oo|C$>N6+?U~s&4CLY5y2z>i}p5){x-k;pY?kJ>dU*_3tJB zW2oN0hl=p?{O8F3_~w5{YP(sw$~ZZIHQgot&9A?M|MSbg1I4%zNB<8}{Gs#jPXVDN zFvPh3#WV?waLG9@z()#OnA#)o3?vTmgMyGbmOsyk*W00&3(yuMBq^kaum_L4kbh*M z&J%P~_uN6(ke0f}$BU+6iFWs7qft~UO3tTaQ>Y6HN=^=@djqF|hq}|VM&i*iqG6Dr zu@s#xSUab-G_}k$wfH*iWuN<*T3a)>RqySJU%paU8+)BgkBmq2KaXG*Tm?B=GR85E zaELEyHjGk>DgBXsphIl_CG|M!I82;%>!T%Sy(Eb=QRr+|Px}&wc%&2L~n#^!NWylH6Vp#g9=*6%mn@;J*5gAu{fgtA5 zD|GG`e}q5^1veL}k~?Ij^)Lv6mk^Y(!5^LxCAV_YMNiZ~&K*|S><@BQ;u zuWB6^o@82W3mrL+_X$DRG`3icwd+M^)d{NqYE*C`G#46&VMQd3(3fCzl5YB|B!aZ; zB{Wva^ODcpghULfiCQ;?_2x2^DG>w~VL9wd|)MBN%4_`Q(8m! z39`24O?FZ$zeDu{lf5{T&`D(3h#p~B)pK{-(k<}X-$z&()M=*AQuXXt@-nu(PQRDnH?GVEV!~LUoS7oB6#Ht0Y{Q|$7g?t!jQ}H`Xh##V9rbU%d@-a!fEZ~T5_L0 z-^uiR`r7I9QuEM`)6{FQdKF+UlTKju2Zlu!l#hC1DI@tG#x;!kKfjPtfFw@}=$Ufp%Go;nb$eD3|f+86f!$_6wqnu1Bj#09v4J5q-3@g z9>$E|4%7`8eT#A>JzBg-Clu)G;5Bb)XV*dpzlA&Yl<2VMLGRNq$UiID*$ViCSRy1d z=pHz`6B8CKGG1kd6wLwp(x>h)`|zP3U|-E>koT#Y$v(6ezaKU_}+)1(nIJ z1|-bKblVExeA|fgwM6Ji;zeqpK(iEsd~}kQ$6uli17f^ZY)@keBPr9Xf-k1>sO~(T zdIeZj1e_7mGaRrrXA)#a2^Rs;Xf=B53vm5%5?J6%El=eO&BPvyglZMAxg-X#`G<`I zuOQ2jNT@ax&zN2T69OedHaU9O(5|7Umyr(U{D@$`UPN^RgJ}v<>tTt%q_)1kBq2H# zm=!N8C?h4g(*ysy-1HK>4?!3aEXE$%U=Qo`^gDHOb1B$akm z>z7~&I=S{VnpLqj(M7T7}L-nviD<0rv`V_X+Yziep6bg~t#alA;5+L#4~u`qe`zdi4fZOJuu3 zSN?Z{#89|9LgE0)=oeWP8t&MB*Wd$u$6T=T6mW3f&EOcJNN+G3r^Wo>{}-1flSvTG+}cVu|! zajkjM&$ETV`HmlA4JT6qaOs9(TI;Aw*O0~yk16BjQzQPjTa_3pw$Z%XX&RR3`?Uvbb z5{B@*6!c`1PP;@_tNH$EMCOByt3EHzzFokQS+yTXMy!8?f9akq(ftax|0!d|vC3~W zHL=?7?3#7?!}Oi4%?KqSf^uT+37| zMUp8!%#nD$aXyALnrqB&QCYO06-hUyH8>X~QGq2GN6o@xNQ=R~M|?CDZU1-=<5BYF zGYkza4lD*_ZWPYB34DV(hZjq)?`?%eAOjfzW=>k*?)?-MI%Im}Xm$Fy_8SE>4ehNB zT&*sP4_hT#<9Qj-md&@Xx&}PgW6Wh0Gu|E$laxbczga5j6702}SGAe@8RtPyqQZ$4 z+GCKq3yJY=1N+s*i=h9mp->!zMY?R^R<`tU)hu=wrqO&vN#^Rsu;g@C(Rf>APM`$s zT`CD0iHj%U?N`cqov%l|sX2+g_Xe%8JOF^vfc2#uQB|4lK6O#rGMHb!BU&XB36K4F zEg!>#ZE%}yXxoQJ`Zz}Rs73_-%ctN3t9@k2sL6BVqhRqQ(IlH>an9vo{==6#k4o@_ zvn!4%GPh%}I_Vz(VaI!v!3>0*11M2kx3O84Li=qLf;Vp-923A*$R+7-pZX8FTCH=x z#$RD>WwHv`k2jdTNRVU=$06M#vJyLBgyAwL(>8jbYeY7Lk-%SZHdQquUw=d!4a&G8 zUMkQPjSN#-+aAr?n~-te-G324#flSVli$Z@Vgg~2^G`Ap25XA}P!{*@+Yu=?Qj;o!Cw++;s120Y=%Y_Qz4%1i*d3GVp|rBs z#YG#X}$wlR0gaO>t> zOQ_a{@#CQ<9OSE5KkjP^I9%1V(S0q~h-_S1L`R3HAYl?ei0ya~DjoaBIQ{!Ec{xHLu?9^eY-A5c1~P#We~54w z=pIChC`q)Y1aE4>!+V7Bq-#7{4wo0--0}&lB*a|v%@k3iZS)_iV?OvFiHJ5qfxjM40$1Q_z;tao9aVGS+xH;PP6RSLsz?v+JNHa>-?POklsTo*Ajm zk+0hV^Px?Q`Q!wVdq+U+$42VAyTp5g>1?%uEwWQ*vmdrEo22Kzxp`70$@kxL`gSSX z(ikVZ_RA7m$e;)Fc@P;~slrx3QT(i$}D_zNTJ(2xs+;<46 z&+n3pshXK?1@N znjXZ)z3x=`Fqnn~T|7{*D#{=xKa$L$_c`_dR+gw@clxs?DC03~yg-GUhK0PY zkh4-H))g884ZV3yDNS(h8L&iJWj=}5!h$v0ZK#cA9>_Hs$`W4fvG?U6$HgIkpq_HY zq<2j=Np9K5Yl*K8Z>Mq5Ft-=YT9hl^DI{r<%lUo{*(62Q8GXFxZ`F8>Q(E)Yr-oQ5 zm1|#_Y501Td_deLll2xSZm#wO9+w;wz^-V(dPleulsrTUMk{?>Eoa(KRG%@vc1O^j zUp+g@>#HhlqtA}(Xr3Bz-8GCBZOy-af?3~0ngG40h1XEO^O3oqm)EZyZ}^j__iTOJ zHfhjq3K$k|j~Hx~M}R~obn*_A&{2#j6S^b$(~{SESD7BgB}XXv!}-gyZW$b+vD3ss z?)YEFc|SddRe1wHQdD_w->$2lsq;AA|Kw=4_2hE;m2Cgi$+5Vu8Mafof2`M4l^mVj zR1-u(&vY)AULNO@!*imisrD;3rQFYU=(8PGZZsg?7Bgov==atrtj7qv>UGe5$fPm5RvIRaZCqQAHRQ%(gM zdlZUPav|xAV89-?TgdUdX~Q=}^4`pE#UOLHe_pqAp$6hksXR*)bHKj;PTwe$lt9r)!{Iibh7$s_H){VxGM~E{ewub zeEx@i`y1o?4of*{CiS#f1AHgUFkEV#BDq1k2JF#p54IVd?tw3?u%y5=$Fi=&l$aB^ zX-X&5h$GQr4No?5nW}KE^8K8A$RF3#P}P@AN*&sA^bN-Q^au3%u%+QBdU(B@hy>5h z{e(;x-}lUf_{G_Jpt|4bf#Qm#R^TW&H8d#h((;!t2#$-Gj zaHM@I3ad*Of_Y}1L%+gs;Rdyi1Zp43D$X7jKPu`P;}*iia>M?Lf zig`fDk*#HtyAeOPN3g9EfbXo7(rv;8UD)|r;LlQT5WwM8Z;C4ts!Ld7cXoF)UzYP3 z8?kaGk5y*CZvQQ%UEm{O@f4mhuV>pj!lLh###A(uP*DW?q4M`f62Lzmm~$XNGa_cX z^zR~jA*BvUh8D;Quoz0v1CH?4l`C&Xqs)DP=ugONvyjk6(>7vfPC*x_XG&f~*?)N- z6LvJ^v!iAUTwXBr-d7*7k~D0!h3tyx)WO-tuq6hK(8BdABm3d!0XS3shnx1Jo{KR) z$IMiA9LByU*w++Pk=lCmDKeyAn@)(KEpHmN-USj77Jr@jhU$&IE2%%1UGT8*F`rQ= zpuHR#M>R-=DDZ(`>dT-O$i#@@_s?Yp($?hVP?eWVWj3QVaV%QJAuO7Op_WO| zSg7pw>5n;1GC|)7he$4t^5++Eu6BM5p)43)H{C)V^^?7vMNYUmb~5f}l>Ci=BPuxo z$|S@G85cbq4pli@as&a_GZrAR1GaU>tX{KR~t0`3mmRp0~#6)&)3{u0+tB8ifklek9I`9IoNi=Q3_A>R7Y=>ju93J+^fcA)ZubMbz2D>Rz+9vort!lAtZ_p z#;(KtL#T!bQ(<-RrHM6u-*X2ODz;3mUzI_ehP4i2i>X*=2G#GjqjOX#+%R>S>#Q4P zlGzO&<0oj#HjJgf!m&sv8QXd*YDYJ~>xF6h*D;I%-1dZ$Q9rQ9f zuPSzY6A=?Nn;=H})b>|pZlYuafCYzV>!#;i25(<@mFoBpE*rdrPf&A^SFqCR7ilsE zi~I>Qp*SG$UwK3PVNV)QfCHuJa7pP70+K zmuCpSKwChd^bLkavd7_fwdDgj?eD21#qJz8eAZLu!RNjog-fIfyxHuccz@Yytw*|M zuCk3uPuNE~j&?AvLXGrR@g78m_r^xx+Y62(CU2dMDq zVCnK6M>-ot@81|#3kSnW?IBEv96ncaX?mdCe~L~It3t)7mDX@PhjSWZ-Yc@Rl1Ia` z9}a2g{p?@8BOjcS_S{9GZSQig%V#hX9r^sE+~KXxV0o4aZr1FAUe-PJZ-vA0Sq3#? zx>3*cze!TeJ7qpOJw|nnuiTWq9j@HYB5jL}O-lbemS+KOm1KuRCk7(TXlhs!=^L&8 z4iSS!iC;=^ui0UekRlv@SII4>AJ?|cR>u)^u%rOszhRwL3H8{cvKcDm;*>75(T7c~ z^QwLgKis?uo(Em{eS3<4tyKWN5^|i~?>1L`Q=ewMNkhbyQNirrs)aM!(5ZCcoiCksasUJ=`<|(bz0l#Ow2snrJ)%0#Po-FliLyYc3}Xm!tgof zdW~$21kudi(V1kxxRzP@hOV8H{bimB+Sv1@ChDOKu5Yh+wM~N=SE-6LbYrQnYeiQ_ znSKZ8dc4H(Q|hmB5t-cJbVQPh-{@m>Vvs*1S(`w_HL_f=+yiaYZT;7TPI4KIYR-9g z*}iGl!$N5SR5NWAJ@wdfyR>X`;9`B$893m56y|qIHppbvmZZC7X9YI%1y!^cTk0f| z4CV^m*n9Pe;84F6=C67aijPB|J1jI}v&%NA4jylyQhDk5U5xSpM}(GSqI%77m-_qx zL4BL4ddadmZ3#1(F5$16%Zlu@bvlJ#S8gg4_X8g7YUme9I~xoQTsUWv@HN+ETkrZF zMl)gxZLA-Mss&Ar?341{`L9eANXo|QXgmQq4>7s>p)869EglUU`>&fOv3%_V-Z*S8 zIn`SVOA`PmL>Tt$QA0ATR&xSZXjv99a8c{?+A$pm=c9ow?KGV&}F1)RIf-#j!6kL39&T@dIH%-)a$2 ziN~)>4@7k&dURyu3nNU!r;chk8h{Yz5`{Ty(U@KCQ%_9(>`;1G63$wbd268yp^?Q+ z5&@j$PRDIGIsXPPLU<*c#8mDJVfUYRS~e@11=MQ-8uI`^srIC z8+(a`>Bf3z`%Ipns91}tlLo}NDQqrHsXXd@tN@M2A~Jmkcg;@}Xedw!@}@Qct_i%} ziS;@@aBW1Ba;PlmpT}uCZisfQmnN`O&b(+$=kaD}Hs%_WCP?MO8a??yZP|>W5p?sk z-{(NNDO}XeeueMp&nFrM=Gfu`+2#%p~IIkuN^}p@mRsvZ& z^M^-`e!s`)iP;+kqKzr_ADq$Ra^gn$VJx2w8qMDz5Kp#j|B)9F+=ga0?vy=^<1ydp z+NLU8rDO;N6P5K0K=$t;I(m>#^_sg`o>oLA0|xYbR(<*Kt=_r?f0TJQ z1GdU24`^pxU#d473}w*QoLntgCmpFZAqHukV}#DeRrKNwQ3! zcm$D5z3RwX2N`i($G57I)Lm!090O!A^k^hm8})ROC?IU{2;;6{{hpO!F(&98$?DU_ zRt0p2l<9!OJI&F--d^bm#PR&)enp3y2g^vKYZr3Tqs^=cQUjC^j-<^~fF;4`Qra)`|w6bv4 zfQ?ZKC%LdU%p%#;%vWGVq*`y3lIX&$jIJM2NCr&U{Tlw0wQfhB_PW7gw@sUbvS&{We4ph~I z0ep?$=F>?Bl_y{kDC$5a@~XY}>LbV6it_Lt{8!eIOJ! zp<-CP=O!OdMNk=Q?we|A)L64AyI@=q4FEWWJzmcdO;Fim8jvDK(`N8O6apY%b5SGx z_Vnl^Xjl}&>R4m~3OH|`J(8ytPlCexX52JLLd_&q(D?JJwIa=tDX~VsRy)-h!)gGc zHU#-=uifq`pD+6iAy94X9uyh+-n*$KB%N@ZHAMpMyq$-$nqvAZytZEiVl0SmBlDk^ z@dlu>N_Zr5YCUv+`U^h4+s#33L9&}ROg#9q(>vkDtIzAxV4&~Y`>?^SgiqpzRr*{; zHjr2um!!@h2(f@C)93QN`nd=8_1u=cYmEl#yhfR;jHAQ=i((xZ*?~ zZ}Qu88db;m?lo}dq6(-_R?-lKFW;12cZSI8Rv6>f*)8+fr8ecC%q{3VXg%!7&TF&j zbXL0w@l&&raaD^1HW@u~=(WfmE)F5f_wn~z>}uf)G(ibyPRf*w&Y{l<3FrvqMG@sT z1qK8SC+6(g(Y>I#G$%+MTEDAsP-HdaXmcCXIHAZl@$+v68!Wr&P z$5A+~DWfnXdO^e@mc}qLz0Qyf!23lNp!u)RWfW+Z&`fFRulR>3Q6U&(p-qG{reJDv zbT16`Co1fPaX}fNfEknrOtxs_rJgrlWm=FvODBs0t=v-f_RQCB9qAxD0RnhdWQ-Bv z9X+8KP|?u=+o)ZBV#NTZhR`TXmm9H{(l$M zWKkHLvw|cK9#@&m+M zr6EcOn|+R!1;BK^NCzw6c_PIyBfrDnw`7!U5_X^7Z0WyvDYcBZ&nWQ#((w{Sz-14+ z78F<8sQ^Vzou6=lM23NpYkCCW3Xu#^H%Rt-NP?R9EeIF6~dKTRJ>PVfXYBZa0?2#){}eG-&L_O zKzkuHVs-WlETFM~Ve!oU=?rHFtAzOGKG;qW-3GNb-!I#TJX!QdBI9#Pgm{k-xb{4$^XS>U{J-JlG-0EJjTpWfVPjsYEhfOE#Q> zd7X&n8xzQcvw@+m^aMXw_q#u3_(Umput~YJdrE*y`w!X|wwlv(bqGP6Z)# zy=_~njl8w=sYT*J$Zt-VKL@n6KfYxLItEN>a!U))4nHj7mTRPkpirVS5FuMGb*UQ& zKihqX+<_lISE_(@mGHY%9uI~MJVR#(r&@d{^;qc|dqtK^u$Lg1n;0n|H+gKc}J!nlg>7v%f={byK^iXxrAU6GzSq= zR-1fb1FO=Ylb{y25&Fb^Aw^~DVAYIs-%x{~C>rYHzFvgGn28A#Lz_Y!fQhC-{so3z zA1!HvQ4j`Gf_ESF)d-pL6vCv!WJoB{h2x|O&GC00h2RH7tE&I)^4bSw<*kRI>xg9+ zpGyg&5I$ympbwvUW~TUN>z2TCdR3_?Wr8O)PnYMtu-9*1ZbUr9)!D@W&tE#mnMhV5>TWlvK|d7g-rPaWh%QSvXRjjq{7mG=dt zP-GNnk@FT>(o?ZCW};vG_@#Xz@JTv_JF@ri(+Y2$2L1pl$gaI@7C+$pw?2FD4TpZsg{wSvS@nEt|x1b=y42fElE}>OyMTb{SLeM+C z&<~m-d4&_5wO^r`Yy(@PEKIBdWn&hgM*v}g6cA-8g2!vX6icB9;i}VVQ)<_G=;>-3 zx5is_Oel|#jr&F$JR9WTe#WU2pDjIFZ7jEB(`>~==OV5i&?8p*Gj17glZIz0c1fDk;!}BQT7*btT(x7IPsp>P(KN=irs2@i#ffZbRDeYLoDMN6k%{NCrb0w=9V6 z>7P6iL0k(Srp!QX@7wZmBev+wf~;n?bHRg!x9w`{;#O+iweb z^mfOR@;@m7P>1%t)gasZC!dx*u(W@MWpbfIs}+6|2yB1??56h_DLZgMQYZ%x7v~$? z%)xl`?WwX0+>Y^aMx|8c9#);w^kxCe&jh1ITFk>XhZM^-&PyG}zcdRJEy;3%|DH?= zWfT$OQ4Vo{paRi1^1OX0>;AKk=*W$}XO0OZYwz}2FUxHvi&N&h9lth4xvcawgq`1K zeN>p3CJYjLacvCE06hIA6QX;7+>DR?&qm!)dgCeGs@bh0>VwLk500FaL#?!Gy;Jn}|HVnwe33DXX^%Q6Nt(6{hgKoMH39(fx* zdi0Mll;WpL2DTQOw6&guQn6MFO~+spt? zn1mn{Y573PZ$g2Zshapna=M6{m7W+8mmv$!u@FgwNWQ~t4LVctuyBb^8D0vz?x}Yc z@m~oHf^-0fK}IF{7Sw0J#?YjY3@W}4g9u78P(n)isJ}A;hgjVARG@8xp5Va5cSn#~ z_xetq(}J;bmgr<4gYoZnv)?0QfQ{DsnGRY`0Ly4dP+l^t5oO)?yvtN<@Y-rHGwMsV zbyFCsuV#g@_`w(J98_|np8j;+yq`Chd)--{-)vba=|Qi_A2Xnb@M3Yq7JohnRmhc# zBMTrVKOCqv{TO22pUO%1Y-=JrEkv}pHD9mUX75|egKqi!D17E{hV!#~8`LGJ+mENbAN@uq zB3e0vuOaLx^zTFp?ZyaCfB3}l&tz1PfU5&}k=~=c1c}4?+d=U;#^Ws5Q*tAVsZ5w2$KLCEI&iytLkO1ppam~%kUF<1p1jV!1|lye^o#k_x8c?1LM zRY8ZVC(3UMf^HTer*9mn8=%&E(oB13?7d1DT&73m`-4|}expdIEVIBnzo#bnN=@QC zKb_w?Ubp7{ZoLeJu(Oou8#tguRIC4Gv}_E~=0Udat_Z8b_&c?Vh|zy30h9_j*8QKn zf`3*E4(eZhaU>OI@J{TY3x8}Zo!dl0+zMzvYWM<~5fJ0m<`KgNSJLMWM<%U)kGYI% z+}2g=#NH`q(22TlEE0wLtI;YXP~Kml!KxzYVPFEP;P^+#9G;(%<~%%=l4 z{>L0#ogWnt5=_a~gDRpT^2LOfns*n$^0Y}{if_!#>73Jrpq?Vox3V1hj%r{sSmi)o z#wo*p^>a#hv37}aOrz^+G(ldTT-;cNZkfI|#uzX_n`Xy;8ClpdE%{p&u z)Y?C8E7+N-lEE0`ZoASq9%J+! ztq=FJ%SICN@)`bf}xrgzU$)tyhQrFZu`XEtLL;5Sapjop_BY z^Uujmd1>_?v|aAr_ePz|Hg#myX;*o4Hgtba;epTA1ZxxxPz}x4OwN(y!q^$dzx3ll z7z(*Cvp>5KMJ{!otF~6F+tHzsFhgy*KAk)9irV@JHNl$B;xP+}&($f@*GvSKQ4c3C z;vdaM^qP(S(`$yePZN~lV1}waS1NWqBBL23Z8O?{KU43L_nN-R=j7>i2YW>`9ja|K z0qZf3&tD=3K|(>ZxX~YnvnY!cgZ2@l(d)qcnrtoH`~jWlFUVWfEH=)3#pVUKVR6k^ zA-k!8KtvkY2=01gt(>`#WUotCrI8TniI^; zvFO%08nG&vX}|s3upSHpvYNT!i8QB%@JPRK^=j=iYC?AB8??gn)pCq6Mu0(kp%Dq0 zDl=e4-Ml#2KB2fc-lRt@qLRY-Yi=2600kgF+=YI3I>$n8u8!1LrFionHN49S(2pMB zc>KPQ2IR_kY{wW_6^66L+1)<9WKJN+Q^^!Sr`Kir#|f;1&Y@?H3sq|fc0h%KL1NJ) zdc-8)Ty`x5m;_d-9Ixf5$|I}WyATi^(|2zeIJFd*6zW!F6XfHF?FUiG{cA@kU`MbG z%i7|jQiQ7L6q4@@+ zv&R>w+8?o##Ro{{)t-Cbd~ARLC_v(T{63#bPVGB7!u(>4VG?uobsVk_bC=~|lL`Ww7DS8!{yhO$Qm?CY(CGvEw_mga zN0*NTymYy7Tbi-Y!{`wLR?Q8w<1VV;oPdjm`tnuqDp!?PLMb5x)Oi1NV*j^EF{ME& zAzKSEZt_qGPvtbw!5DE}{Y+{;O1=$A<#-GX0I(gXc>s+8+O z(Spps52WA)+%kF~CKN%}adfHNcE#IDg|67-LdkT;z&dI_5ED&r2in8Zy#buXu}+Q6 zs5HhH@J9c(_d!~~Mv4}^(^Er`h=+Fn6LXFN`^Hjd#Hw~_2qrs=PIp0!B#MdOL73sd z#w(}Rq&TGvz5;?gai8PJwx0Kh259hb0ht$pyNzcN)B#X{dhurx%GGitTt?s<0?4oW zo*Mw*d>mN!&8LI~I6-%|c1;BuECd>TsiR&RT|#ksixN1Jc7J8!1xPqHITL3-SDKO# z*e+Z%QR1+V{^0Arunpy!;~voLQj<30Cxe;VrnGE-(nv5A7g`wa6WjTE7xN!8Rc2>P zKy%slZ|&_i?GaTq8{Po7*ELb3wHkwkLQt-dL}v~bHsg<6#rhU->5wGu>v6D=mk@3= z*y4MJVIODePO|KAyrE%{s3r;f0mPi+=3<*W*q^NGE6R;ggRAVk-_f8Z7HA`sn-93$ zp>z7yOsAGDMhAeV#1ZB?O2CNyLaUMc_7vx9dw6y&(7ol&cR57ohpRC@UsSoAJ3`UN zFp6t`vS`3VYeIC&4ZnOEH2=zp*Z#TB|MHZO+W$By`!K^gJ1OM2ylHo=b0Z2H#i{lD zZTdZ@?4MgD6%dKgj+URxF^)1@7I|+=Q4w|w78P_VI63MF;0D)DZ|gQtK+GmenQ*L+ z?<~*NE{iEV0+I%Deb07%jI^@^X~{m(S-MG;Q}huB#>o-OX%O!8D(U_&g1 zkbN!1_#~_%kEz};Q_sE3xJ7is^-A_?Jtxo*;G<`@I!4pg!xG!vPnC_wb=;oW!oL=v zm!y}PlxjacS{vk2v2Xn9nTaYeRs87!meCFXi_xsS;B{;LYN~~1@y zphPGbt?QsrZIDB+h9;YLKk%km?IU?KR9jNQnM1oc*={gXuxFX$T(G~SqM$%@OAHm%{92KCwl$5+-oae&-NYSAcEiuH?nEk4iE zgcg_zUF#gbZjFV1YD8eqG@&b}$m zfUOJ;Hj5AigdAHfG)8xr4YKwr+6+Qjg9c z;R%@{_d^TWWDJ~pO3`x63iA|WEI?r9*AusjS!rYE_T{*C-hEu99Dw;Q*OWY+duVrt z*R>+Lt3NFLn~hc@?h18r?SyS9%jX9iK1ft`WdpMxyKib`L6pP%y)W zv+l2t)iRHmlzV-OrxKs#BWTY5AnP=LsKISd!3yJ~kJCrNDw?e27`eQAM4!fGv{iQn zLSj|w9)}<6pvQIdTg5Q-fBxpW@>F!P1)yaggLYI7ystj|-iJ?Y<8;p+E_H<6`nICF6F7DM zJ!GFk^9(oM@^i9_ZOm8MTJ<$YMS$f9FmC}YhnF6-5?WZRn>nX+0?^yBNW^fd2@Zwu zVlluo22?$1HHTV}nWHgTJXnHS{$ccMXcj~H+|$k8ZgoFJ z_F?izL?oMufyQVxT7?@HBRBBf1_oPGW!r?q50PzN))rS9ysAl&%$k7AZC!wf-d8+_ z$1fuuG>r#^eG!v?VG2`Vp^xe)l83(&^u2Mbg2UR_X95s+g~a2*D?>>mzAxWt&KxH$ zBCqW4DYdjHHbABqhVu&t=jtLiABzJAjcPAoBNUy zQp?+qBe`t5p8>F5{{hn>=A`XGQ#!YPP2b6g49@0G@5mE}Hp8a())EYha!A9t110w) zNMgASiNR?2;^VJ0oLV%sTm%gulLadjP0B``|D)wL{gj zz#O3@TFy@S*v#+B`$tvp{H|oF1j6Rse=n|DSd#(mF68n%7SJ!Ayu@mA0quBny3T6H z-bbn%CQ|^hwu>V7qG{-%O>{n{G^nw`b&nC}gwLej!U;tC0w!5t-_xt>Nk`Y{W*ouA z{A&6^ZzVhP!SmQRkK7pee{Fnr#O5|rX@;yRa>SQy z?K$rf@$K6kYQK>W&P&qPLg61g1cDz|TQ%(e&={tlPXmm5=Kjk3!=aV*BNrU^fn?T~ zcBenSRLu%Zf!a?HM4E09wDhIih*3|_%g19ijw(i)?|#uz6JHhj!8jUPxEx8{rbp3T zv>RDF{qAXuXh*cij~U)^rt70&$uYXpIoP7*@Y2S2#N8rM~3o^!hk3i&b@(mYVep;5qLPI8&Qmt_uN| zWti8c7dY|*2?g}Q`3ylgjx#1-yi5>S;sZzs=9lWsr%=X2wf&!K&s?jJM%}luhl1$J z5Z^fzdJS?;UyT#Df}6b#mUHjlk(}Yh^1Qw{F}B6ijtsFt_?f_~g9u`LW{4C)fy%3? z?QbNa?uE7|X5x>|;tzg;K)&y%9!DQZ`BmlJ%m($a4FE80^zW_}T)66K=uXO0(!Q=D zF6XwcWzjs@n)H{Q?lfcVo%Ym@7d7Y2QV_)=)wLu|0B|?*NtH&wFUamV?#P$`+(!R& zwVDPG4TeO`RHcx5u(x-RvX{n`v-%9=tFHlF!t3rc8hVd z|Io7Mq@wNSP$WknJCtQ@wDovQsXT+2XXcqb&K2ox?Qbf2Os4QRO)AI*P;?0hsN z=fAVGC6X@^cyq-yAQ|3ChuhN~!xMV`VcPh}78Of2vV(2g?lPB@-zr_(AUuUl`}4LF z4QPLrSXt>f-W+cklWL}t@SAZGt)?ATh>)$I+uXm05mS3{sUtt|%Kg0oh(t>O*7Ru_ zD^Oa`6tI2q@>N%3uL!c({n(=!e(RwD)EfH@H_+gnr?QVsXyC_b(mEgJA9@fxA^b#N z9A^?&rg#$wo0zRAL*|n}~F9Q2{2M1pqw~ z)>m1|As1y-p^+BGmY*1RIvMpeSjYAX0|$@$)Tg!N?U#f7X^5F|F_!V z-LoXR=eKsKFF?%NG$Kc6Ldh#RlVdkuU!_aN5k!dXxz}>$laj@0P-{cr;7j_meMy9x zbR0!BTg=w?@jxQv2U8GX-+_jf_&w)P!ax9_Jcv_x&NI+Q0r0-45nfrLko&xtS6D^8 z*!wz=!%OaW2&Brih(;S77A8jB5nS6?ImUWG2Qzzy{yi0V^lm(ROd<{v7F%WGh{D6> zd(QD57j83Z!yI?R3(PHNfC3DfxOxvNM}7k-g>qJ{xAj%dA>e;c{P|x1iIS~)7EW4M z=^*8sxk>Rz+f(n_U&OvMZ<#dVaGh=z0?TNL+0Kci8iH z5aP@ioK6haQ#2NLkCV6OG)n_Ff|#3;44h!BXn3->+M4c<|T=jikDn#`#tQ$UdjPJ^3IW_r=+Gpfhf& zC3v!Ua5ma|S=IUSY|qn-7uSNk5)BI!v$EPj{5>yGKL2dm$aO1Xe2#GNB7zNea3vTp zqd836{QP$W+x`h+ZX9WT067^Eq+-`DenNf*Q1%y$!k$L5Yp;yCr4C$p3=t=v{qqEz6gV>>H@?nR5xjLwn!-RhJ%-fN`Q zg}CT;6NO2-=YCUz0UN|dFW%oaqjzjFgab%a0-a-gv9pzCdiW7VvWHss z^8z@q>5B+LW(wL&K1)U7@F{cDuTn$yUYxV%7uywg6xk{)A}g2S@p{RdKMz(MH>=DV zU86^ssvNl-K>`B~G0Sjgt}adKa3zBDlrHS#2;|dK zx5LC&24-!#L*f#!vb}T5XH?}xs!zSZZSMCp<*R+*-+<$dd6Y!Edin7Vbze#fp%#ax zp!Lw}#6xpBJfI}i*29ee{V$*TeH{4(Vo8yP$GAnlprtQHu4p~(;3p`K$mW+1fculc zO{foO)_`y>vwERTSC}jaNq}f+;G-`{{XD<0p8lL+btD^NREoylL1yygb3#l1>otk1 zeVOsv36McwR^i2W1R1aD7Q=zjNB&R5RMUBAH{SE$Os4Ccw_P17Wkh^&cHl52bl9fq zYASwpApCTqFn5mZQzXgl^@bMgF8&Axxfh~G9w@;l+tW>mf1_}E7L(xGlVGH3BhRo+ z5)aJ+@i@7`iQjyarxQcTKr#}Mqg<bgF((}9K}n#N8+8|}Tm z^(8MpeSA2jt+X zpeA&K+^N}Ywg(hL^q7M<}B44>!EOlU&`I`^bSpzYUA9O(upAm(Q)4D4}*u5{I_JlYd>c88tKJT^M5Giu? zyP&uD1cjFy=8sMP!R@Xo*c5+%x>i~+wUB-KbU!Kg2FTCLUG4GdOHWKk+$uR@I#`{r zuZ+aYBUvlI+DZqw7vR@xLKkqa!$hG*X9-uKRxYQb*iqHW)z)N5IoR`;G6|3tbH4^z zG><34VhS|7=%%up?=AZqudxAxxp(JwN`l`2`RW`5bvbf*F=D&P;?)wCa6}a@h#sx0 z8H(BsvR_}ek^L6@&%WFi2KN@#E~}e&df88vW;lrDWoYC`_$$xh$E85>7-3b!1yvQv z)N^fm&p@J#XHaRiDBfYN_Cw|e&I9qln&J!4r8$&(^?dOAOEn3XYCI4le+_eRMi}W* z{B}PNzj@8TVXr`d#CoUrUA>~!!&pvZb$-P5f*e}^5=_q@q(NcK2oJi%+!MilmHym3 zErysjlV1MrH1P9&<{uNbLFU@4Dpc?oWWx?KxKg=)>d&u%3~9XA2e3ijC$mR4z1}RC z9+YBUBVn?U1Bd+K4)gWA%U1&!eQDe|x?lHi3F+^GB1rk#;H#!dWSM0y!+K~LTO|cv zq!J1+53Xjrtp_5{MYb+acVhZaGX*~@N1Cg-$F}mg0Wao)P0H7GY~eQwmu-M1lL~}` zwiMxL1Z-O$%KB<-yd~=a0x<*A-WVtUg0vss?0^Tr1DRihj$N2w&!{Lx8cCNkVTyw2we7hm){5+{1q_azlck_YBMCVgPIW%>n z4uuN->__bB0r|=0`$P%+j)rn&jfo)QTt~@c#Ug4T-5~?u{lfu3Lukrh#j2$6d=E=L zplENN&3RoOPP~=%r~H@PsQq+hmuBuWg1X9&p%S#S-R;n;NIO?3kHuC0&jy(U4KyA46hiCdLU!ap`ZsrhuF=z)Gf{{Z~wBF zO?=GnzK-AnQ{!NleMl+ry6ufVS1bZcYoTy>PpU&B)Au0j4o}Bo%s=N@q~B#emJelB z^f$U}Ju3<@&sPFOu5of32qYVn=+(TrBC{LKQJ7;#vjB&`=?9%==$(pb*&j?Hqj>7W zp_)dU{4%Vt`_!QmYSc90VujZifa2a2^f=7npp8W1*Y9Mo{ZXt=l7J}=n#4F{m-OR3 z3g+wYl}FVGgMI4br(l%6Lqk`M1_ z5kYKEEeL9#l${1O61-opr46NAW1@@ao|Ux#fu&ll`(B?#kNUYC$Salzinrj_OFWEbP`@4;-V1q1QJH?rVq(ldN*u~L$VzvlCRLjhO>z(CR5Q%c?v?>R|sHQKsH(p0d zB+ua7ITn5N1D8n(SFXLA>oX&#vw1g6dldDS(i9XmtD+W+0gTVVbfI$k6ECb-I(pY^ zB=uetX(>Bif~N=83Kss!d-xM4rRIed2e*2TJWkdC%lHb))^v?+;&6~MhB6vTDlPK? zJZN6+@zufH8dCrYy4e)i)fd9|Ks&SSpe~bqu*RfmlV>B_;u>!zimdK@M$|>JDmD!6 z4+YAtNR#=p?2UyHacp*(e~Xs*A$K(l3l3ZvIZYs#Q8VywMX#3(#0bH8EQ>aPL9SA( z1av~ij}e*yZH=rwWM&$IVnp%H6sKgDkVyVuq1(U^zu?VlsVclWaTkGeQt92XrJ z!NU&4J8#$eor*9w11BH(+VXBOsG=^Ju#yuJ5-1&L&?`ct#1~$aOr*P3X+NCOhRE zA!{>eWv6a&_p7MO!7h(&9Sd-U9PIZNTU&E2KpJ|2 zDb^J6k*n?X=)L0vm7o=vas!Rr2zO`vP06TmscigKf2p7O;rW7*0@)j}xaXd5mI#pQgwBoER#z1;H)DMtkk&CPzaWCdGe zN;MqtRt8ZP?diFg_a#gmkqfyNTfb=*(A2f)C^Kf_E-i#RCPZHn7*alzKViR)ia|`} zcRc;d368h4yyZ=g&IAOld60EFf7m6gq#FFRySuXg^j;v{Eb;x0UCH@$V?gY)uUy|N zx*#(0{;A6bFaIR)$#1u4A|L4#-EALS&h)*RzrcXZAyHg|t#kQ6#N{Lqq4jB&OF_Tl7HtNgkn|%*uZBM5 zX~!1(v(%d=R5y5UHrieA9moqC<$BfaPLIAhS4znUeABV5sH=N~%0(c!_T}wF!E?Ex z`)6{%^(nUKIENcEn#>ZK-LJ0Zn?8Q8001{&Q2F#)jb*!tc_!kF`-h^M5->k`tyNAB z!^U-;Tt(JP;=b2|`$A;eDkSme1A=pA@b*FYO0hUSOwx8I{jp6jB(SYbk&u!ilKd$y z3P#0CKFnm1OAExJRP(ohn^{wqg*AkZNnqlI%S|n4@)emVNLOl~*Lt***I`aTe@$`b z|MA*EhP&yat&CtiqUw5 zdOl}ETMWdR06WZe-#RM ziob?S;@RqG!hYG-MKs8iKLY!ETx>@@{sZk8F*M{7DeKwZb}IaZ;PWUPRlH_&TB3}-Olb)))yu?r{M{ow^>-WANh>0>J;WYepk zsdL=xjd>i5$%4=9_UGJ*s90+g1s;PgRJ_DVwJzEckPQChb0iLI0JY!6euE5Apor?`-yFa1IJ(>jJ`TLW-^-`C3?w7KRfsg4h|CA z0Ow%<#El~vyzZhdzel@VPPcW}`8Hcq$_yGtOI`wCj^=tR(BJi#YO;ttKK?``+AF_^|r4M}YPjE$~uGB1XTj-H832MqU|I@2&5 zA7+94ip35dp`?)s-S7$e0vC;v8-`6VX&NnC(Mxm1&04u|09mXp5b#z^$p3&R{yLx= z0MBBg#mlYEl3-2C_hdpl(*6kU)<=n)Ch4c{KllPXvAG%NjbUnSej%64n6c51B!NJShh9 z21&B?E!TPX_kVb(Sz;Mf*$-Fy7^3P+eFnOEa7YLyH$F*-la0~?wwmz+rTNK3W# zQa|QgMO+sQDB{i-uw@M}Buv&|uu+XLn_+!j6B-*c?rC z0YR}0;L?E`=>gYiW!b-@TP$$rU-a5*K^oG_zS^!Dl5X)lF&hTRUK&InA`<8SXk{EZ zgm~Q5m!t>sZ{3W*P> zrsw-V;rfn$+P`Rg&Vnze&=E``CoU35s$^p$M#)m;e{?;axqMe$;o7iruJnNGR_uDlk^&%~{ z;PFor;DCfO0Q+_@o~kgF04NE?L9!B{G{F7wUZ#iCAFQ=!ER1V}9Rw^|N_rACUE-!C zg~fOBwj|uPYP^0*5tq%l=2Ckj`mX%i-ePNS2IQ|ap;6s4u6B~M%zfyZ7LPz@mZL#u z$X%IJrI8erjltm!WHUR~dD9qV<3Lc7A@Ui*JS zONcNypq6yG=z*DoAj6e^KwR{>XS^0CT1(9yTlb$9^)(G*@j1xUt`&(l^y#k&pz1PE zykqtMABE~42(WYQC)&Cm4;%xD z?H8IGjz~L5;S`cME-(PfH2bUx$jgCkz@X5>{b7#%3KaXI2{af zaX$R@-Lr}l3Hg0B2=xAs_;d}d;DH8Cc>N|cB%K@#mgVL9By%0uVnFW6X!u4Sv$H~;3 zn3kLbm?}`WFbM>$@N)n&6<;z06pz)Bd_V$;XkGptZqOj`Ofh5G@VjItP7n~_0+3k* z`G;yhN+9pw)HB>Q)xh+57-n2;Pt&iKx;_Fh*a)tO3YrI z^*ZVHDPXA^uwSZ?aN@!*qD4b2F2s}w)pU*D0*Gl=PMBcu?rvIUFs;%%9<`~+?`i!m zG9Z~r*oEX;Ujqs7Syt}qjUk{X;Sr1ZHg5pkL7QmKYaqV*_+tc2ehP!}U3MNIQ-JDo z(~d75xfmCeR)b{ekPu->^%=+kiRDr2_70QMwHXc8;1B-XFfx zlA}uljgh}#sxrubLwi8Rr)0c^xLT-H!d#LG6pY#u@H{g3*)DGk)bJY7hRyEkYib}l zj+>4_ z|0Lif89D-1-xhJY_n`ulJXnH;|26^C62775n(WBXktLSC24cd%q?v-m7|(6s3drN6 z0G-_bXt%x40l6rRH85ZCD!BjrbE-q^TRAR*zj9r|>H3NgTLP zumdxy;m2n++@{KG_*aiv{;};_%;Vg^W5Cfq(ckV9cHU4^2LeZ-FcZVOCH5RF_E&{M zIwZJIEPqPha|u0(^?}qpqA75;F-d|Pi_-tu=#P*94VI0ceJ-9)E}JJFz1ADAh;nc@ z2ToE{-vc&kDfys|kq%WTzg~5vLiP@3xP#g9xD|&hHRPP$_KsU5{t5D_ zJP(YB&gdQm2@$4a?HK=@3&R_Up93GBz2?A7(xvgNTdiy)00 z?Ubvx!mPb(Wy=H{tW&B|%E-gwoz03aOf$u|Y!;epXmUT`B^O1d!JQdTc z_OEAot}=EEr1-H~!|!gdwGGSEvfkro0%jkl=*vr1tlPsDYr!J_0p@DFx`j_aY@Qta zUQ1p+J8IQaSx=_Y41KX&(3>Xm;e+5iw^jG7bL99oxt!bi1etNRk?|hQ#rE}T%db5% zRqa@AMz!~)1Ce;yEM2xIJ4~ksQ4+?I=@?_`ni<{QxT+L=J6tK}*=ujg7aTtsbNpa@ zq5Z3Q>~gNj#Vax79>XcJe)yt$L|-lLZ^wZp{>bUBp*mkimD`K3X*xv{;?wCy8*N4F z7r#h~7(#$JD1R(EDtUCCdNa|+`swvzA_o-vM~nT_G~M@ex`!dm*b`5he(`U2?)7xW zKH?W$xQHNMGL7+4454XIOzqaIf1QP&1DKD9=5!F%W0Lhc-k?6ESfu0IF|tn?v@wo< zUn8^If2ONU&$nwtclxN+kiRcP1b3WcUc>B}SFoJb(pZ40zMJP?T$~oDOx(ZJD-k=cIit(ZY|Z?lztxHQlYnDn}HPkEo&V6&B3399wR7FqNpkGg@D?zJe`d}>D z_m^|Vf}Y(}-PZ&`)<@++Ih4c^vRV+aYge9aK{F+$l2rdug+81gntU z3sh_zul=3(*L`5C0$XBn1Dk%70`ptCi>78>{61`D2X>1Hgt*I8e-BHE()Aeah3F~`>rIt?Bqrq zLKPwx#X7MS?tY`?&bLfr9lUjH#o-_2qV)(qI8#4mMMae3-eoqf7635s@hs&U2bySvujvCoBRe4$N&xcH#_hD! zi$z5y69gYv^C`UWI_K*1h)7wK^X07mG)&vxPm~ZUth>kGuq>8;l&Xv@x-j$e2euHa z2Cb^e)1n!vP&99O?8$i-FLtiF;1g_zb&eY*n+M*EbIazJm1o)n*%=i#2S>A%Zcs~x z$*Nqe0Tx%tOPiD0Pm@HmqPQJvr{$jpStT-Lvt`muBL>*HSS5kEVgf%kVx^#RL?VY# z0+RFg6d6q&(4dZDcn*o|iiI*smx)<3NeBLsm!oDz4%|3tMaNL5hOtRY+yLKJBitA8 zev?9ySGR-r$Drh^j(7#5bdw#D2Wn8mJlVf@c==26#4#wiTmO4oYZj;)0@%zLnqy#2 z4;A19vLa%rJyJh;;D!q`J`5kWx%WZxSP&g!BnJn!Df$O^$vIT>RqL0W2dNsgA5D2| z51g4pS097R&F(Ib3I=2WbRkAE4*a~h@(Sapp4l4VTJGRqM_ZtL%p~vFG<){;Wz_c1EJzDh z0Q{lX)@o2@+z}!AWJrV|o-QuPfc6Jr0I-g86%)-mz`4bSV$~H&fQN~!f0udcBo7?s zd!vGv-x>JV*z2svWZA9a=)z7sE}WoHP_Fj>t+40~P>+Tmz>`|C${^j%ARLW_?Vo4R zNrY(E8rT(ny4U)pULSC5xx@fH*?RNFl~DT=Q~Slwm0f%Jlo`EXvfn&j7A>tL)7q1M z2{qlwaEh<^Pho=%5B2$nYm)EW?Q+f*2erg|lmzo=#env(oYexwRQ+n#UI z){|QwYUO%Y4fcpBNZnip5H&n2rIML;$p8sd+|4qO>C#Vg_%rad&RjwJEm#h(hGG?{ z%#avV)8n*;0lePya0Uhb^d!b|F?|SRM7aOul#t6Iu;jj~-);YJ-+Ufui9u$CyVx+m z>;ZDXhY|L*C}5Hv$ma}4Hce^+y7(M|W1#u2OaM7_@mj+mEwt-(i7xNoA+f^oBlZE4 zbFvJClN>14ffxjTbBT)K)!w>UlBPhdJvCDhV?;y(d|tHHFaQx7HSNNgqCS_PZV((% z!T+FnU+vU^)c{0dTjGK)o3tp_-QgrBcWFuQ;Rz??3@jCl(j~*D2DtJxNa;p(TjK?f zOEOU~@a18o6*_mVL1cg=iFnn*y+naGzo5mtkKE>{*CSmwhTG96EFURQS%Bn-nMDGS z{K#z{No!07j|%_?3uxud(si8Scom-aSR3CS4#xm+VzCVfPjg(3fr@O5Ob=AyW|9ZA zT+yAMvXlz+t&V(IhFOq@@#JGM;>hTLn~=pAc3bq(=738901)VnQ~2NXtoL-~AX$(} zo`}P+7$Dw@_0`ue*`4zqX9XiGf%K6SQ*@SSNgh^juoh48+`)k4qFrB~Et;w7HO)g}l67=ijGjIh-wZ~x9>=VRbfn={GEMNyRSQxuiG!fB4BQ{24E zB82<)Hn(ZuFR|y~-3jOqD1>z$fC_7>fM-*-MqvY~%Kw|}G^K0|TA>lmu3xJ()ChDr z8qo^?Ji`G`^ae1fJFWM?UzWY(3RG4|z$$Ea82oQ0A<=ChC07P$OW}zjC-v^ruVf7R zC+>hZeB)Z-or6eu4)G&?hfjvb09b*nHxS5%FE1*byC-e^d(u*38&Cu05*&l)JXrvP z&ry;I$dUR*=jOlT4idVl$~}%O0JttSC?J}I`>yiOfhr%qCPDxP7{6Kg45`UrdP|G= z90{P@k6L%RUXvySkAVEaSLMI7uucrb;^oMnjUZDtL9;iRI}bk_DDuMNnJX!1A-uqO zKh6H{IUGnyXVXBIBGL>Z(W4l8FlqGvaousDyh!hEF2LP*3@YPAT1~F@0GVZ`M+T%L zQ-Mi)$M76FGZgnU)=$NFqEK@b1MrkuBncpM6Qr<#d{_~z|MDO~nGZg@DJqoT@I>1} z(-%Y0)Kby_w%(1B;oT@z{Tn5!@}+x*!;Z}dK`PlG-%=xB?OTl0|E}{-(#9YLr4bFM z>nmpfO_K0q1h_MT1ti5R{|cn&wjnF9OYik*i8n90D71w>K|&V@Yy+wLxoAMQQeXTg zUDzWB!5&Z*&r{kw|3ar|KUE}1R+12gdLcRC92roty2H~mAyKptZDfm*G#d7o4LNm8 zia~ZTl>_?Ary@VlfuYwcqj?UmVM;FSu7)uT8na2@d$F3Bi>4ey6ZpRi7Zi7WFcwJF zRsM4IHJFt<6$=CkpbwQ~GpNuGAr|qh#?Jy$`HvMK0KgmV3Cu8 zMnU$fe_+JY;o1O8Np=JozpF#G4%lM;PQBMy`epi497u4pCI`sQ=c zMYPf|4Pe?|Q&^AFM#xbfWZFlxO3GseEb|mc>RqH>7|eht?$4)iI0vdLYxsdwf7z%A zj#hwm>;3;0hXD{brVacO0MurWs)axP_wI*+Z*}}pP%mzGb=kDYNK>gQGV}dSIS#TCc&GJ>j6H|YHN9e84y(-hKq7QhX zwCoHQj>Jku?$>BPeIR557`S>sp@J!KdX@+_E3RyW!DCKBPVOe#$tcO@S9qx7Qm9n1PwaRCiel^)Hs4DY1VSiA{8 z?N6X`Q^Z?7YDg4%R{sndq?|$mA67ble&D?9qzKrAE{HA1 zPr*NBBkft&KMhObbEqh#Xse*Bg{sN~0QbY+82#KcWK@upRigVXGiZO>%-dpq^iB%! z&sSxtG`iUV1h8|bd_o$erQ{=U#AU`bPbBf9)1o3aON*2MC>=6#7Bcnhiy~gdY)o#EAwChj>ctB8E z_)QA@e1|Fm)SDN}0so2;(MYtN%OrxL`vJ+88aQAs3cgi=V_F!g;*dS9#x=Cwc{O$c zQTK@%o=YhI$k6W!6{=0i`!JU1&0Q-R<8^JX=wY{caZf?zH7?tHPW2sVa{V^f#+-t}2qBe#zm62V zE>w5KO8jSnJ-BPe)oPd%0wqSOu)e!18QkTw9K!J8%{DdBU`YQhPL?bL0dWxxYA%yR za1ufEY&s0VuArs+UJ0VVJRL;87GHAwkCDL?rn+}K@2%Vi_*;-3LsE_2+_U9MNAmAM zHdZ`PL_ma;VwCf%nL-oCpRK`?Rj5esm3Mx9)0>(fs^wkxjQ0OK;h_@Tit( zz+jsC6QlWwZ{tB9>SIvb+|&oRKnN5=s6gKiGU1K7MYn4GWm=Yh`)6Nz8{)?n@o3kb z1w6+Rslo(wMkGpxv%!Q-x<)bNwSgk17@jaD5|uH%Khp28!U+jziUNU4U?_r}b^bS) zAp`-iY&!6P`#{1rG$b1U3H(Si^dZUs{9=k4LZ%pMnwRHpD*%B=;@9$!%i{SU`0rC2 z*RTabcyH_Qet`1*di}TU@MeLs1>t18{@qb2`C7*tBiTzV(hsR1l1Syb)B$0I@X?{a zyYHcUezPd?-JQ*mLJociXq_~OI}3zpBRb@0%`*((22m49YS7Ox^t+Y~P%c3sFmG2z z4kk%3o(PCSkn$P#nXDnJ4{y=Ys4?_6=eVT%B&f*$U4BadIyr<7UoU>*)yUP|cvxXD znZQ&&DgdVEN7c?eu&Q)kb(Z0sP^6wm<#K>LqSf)DXc?en&1oJ+*vur2aAh zKETobviuTlS;)oJxe(aetvr|u%h12;N)}0rbcmSj0}|CzAVNSs5$88lFy|}v{~R=RQ8mbkcv9}_H%|s0 z{n~?6^yk*U{G!Y}_j@Qu71@lUt)+_@jB^jFzqq--s)zVNYI@{v9NT)1PXd?_d4tZ* zWeGTxCgQLGX?%m5JCwAL=Q&fiA+1QnmS{fhP<4$}w?8|C`Fp;0-n>rGWlub<$t})W zdRw{e+1>JYY=1*NGVEHV0F;T0|g_3V7Fl!{Y6n&2SR?7u74 z%z#d-ob>6?GBVa$`VWI~1LUMN#9st)g@m|HO&Eom6r=EF0-bu3l7rfxJBuEQv?7LJ>zPV?9zX-u#ZLT>9acboMw{B)J1}K8+}u~_XzEoV zs793GmirIXr8dyTp+V^t9FCkA>Y`-Vf!gV3=Nm0eXB!F2V@p2<^qr6Qxn{n21P{s~ zh;{b4h}}8}Pvq+lyn6Hrw1nWH!xR))qX^W;exJpms0a_E&gBp!$Hfe@7$3sh+dhH5 zLP9PkZ2u4uP2E}(9?z+~7xFJEBzY_kSd;l= zsiY&MqiY`nODq%_x@Z3$a*{^y3{@UMl1KbCy(lWR4UN*r!uJO}=(L6COo(VB(aR?d zhhU7|zsCkD7DyGb(-at5-dwW9v+7GGGT=gB?~zMg=@Im8)<;4b>>)5NZpjcT&~a4s z;eZ}$X=OjFHw2vN^!H?3sHqXCNpXvPwQYhRKIi+?@fBWHaa=txRavWB zwmTMR>b}~EKjt^0F2a7<8aXb8dSbtM^peJr5r;cqV}OkR_hBwe;iE`OG0|N4WCM{| zo`sJG)nyohgI6X)p9B@XeZe;&lCKM~d+N>u`&}Q+Prkp--nf1oT^lGt6DH}N`5fmoyFSsgt;lZ}F95XX42eR47YDe3B zf}v;2ub1w1Zize(Hng35|K?Zku9lG!$)8ovUF;*Pl^Nw~uO5e)x+{eF7B6j|@Cdss z8kB<9Ba)kmc1F@10ai+#C9T*i)*^44NB%BM%q~0&^pPmksI0PXpU0`^Dtm+DKj&+o zltduYNMEe%ef4+;nZ0%?njHK6(Xv{z(1|do>lr~-VQ<6dvgU>GSe9j;#{Hd~J?oQ0 zUYnQ)y(IbfWKXnLh&T*j$I_^h)W1gZX)Uq>salm*oH(oql_021t#vg5@ z;&Bg4tw&UGWSUsZWF5;YTeRF7wdq5!0&N9?H`%~+(1dx?Ar=K}oY+oUTP@;*s_SLG z{YFK$OsCP{DMNa+nr~&vC4#Qb7uj(YMa$Q`fRr)KXSYpNcx(64QD*K?l|oELMCe34 z{2aB;tNVdyl6Pl4b*B%MT^uSa$Kbl#?%s{%);u8w-=qf^n$}f7h|Rx{E7QuJFhI`l z5q>%H!orH?29%8YAh_HSH9jGfVgrkyS4as!jlA?D<@1@bZ4lk_7O!f}C|Veb=VxFu z5Xng3FqH6VJk2{=XLt9BbS&TSX}uNdmHU}s*muDq9ehlpVs|S8CF93>csgWY z?dt^mpd|c?)m}s;Ermd!xe30cCe87wRldZ?My@GtT1wTwRV#G*nd(`eCbQJyjqvms z8?P#hG#;(Ictk314&~?T92QCj74qd|CDoq;k!2%S*9*Dz2g{y~Y)5NMG@fc@S4ZQ*(6QkvFi2$!rr%cL03 zGS9OTLn31_&6>*bc>Jn1HcUO_m5|~Q+@X$*j1Rma+GRCRe`C1=RUcj8WWW)`#2e#^l-He7_r;hwG z7usp89T6=JE4#U(^jtx?MqEj*viPjBSC>}q9GPeit5*_dm2 z?6SEakxI7$f#C+)({QHOl3?)Al7dAu(pIsrRW2E~vwwdN`^A}hsKwc%(T{pH_SCTs z+D3sJt+Q_GV~qnYG}SrgmCJ`Ge67KjG)F}Jigss{Wz*ueTE&`{N;Q-jZ`ZzO4dzr1 z@Sz$7<~u!b7h^8_TzKVd|0-TyUmjRW$th}gf&eSDOxQCKe>F^X!>==y2V{r2=y>#M zmi*J=9+=JHXEmSgt$GsDD$zJHu8&hkz_xeCg@t^i{j81`^gJ!k#yYIfi7&1r&b*0L zUdH%dqge;#ukY%(iHO?gjZ)_Y)u(mlYHY#to1^dt!>oh)_ zuKWE)trPoTn`#JGmdKj`#+LE!;}0fL66vMdh_vIF%Pg&5BxOgIx3YU~8wGvot=?Fx z9C50NJ@C2b*G2XffN`|!+u%j*U6^jul8QJNGnN|aZd5Muq8bHk^hk+jafn^(iS{b` zdg#85Xq+&kpsA2TsO533Pd(8Br?fnbq6}G*P5P>Z-_Q4tL#_9_Gwj7|KF6kjf@Tw_ z-E@6KipWV}|7uw+OKpRN=ITSbWd*3pA6N2;LL({tezpbmr&T)~=8NHI!mqw_Cw8}8 zDZ4L*pqVqrX>{>$^hB`l8&WPDJx1cdUgoI&{~aGF>ZJqm>WayyOHAx=e54#~JQ z98C^&IX(u>%@@^neHopU0k1GhQfuBY@IpDWMC%&*K$lymg2 z(yNUJ9iQ~1o<}M`eV=%fE0Z7P76*%6`tt3J5&Lj|yAJ^e`qXIj@1E_?6j$Zm%-R$GO0)4xC1pZu&y5$HfQEqK34vnn`i0WHKY2nw4v9}FbDzm zQM$4rYClWZXpS;lWp>NqY3#zw%kNJX{*V&~GII1h612$l9@Os?(AUqA#vd@`87ryh zrxyIV!U2u4#_<=URZV#s^DKdly7F}mBET!Z-TD|tcbe+V%eX)HD-9*s)5DQ_8}Po} z-BASJUK~)~%*}ZxXItq_E?zr&JWy?O??(9cbrx)ycTF9ZZye_9U%vd|`FnYVqrsr3 zHLn)8$&??p1hTqXIUDJ)znS&pym%z(A(@blNb8HZRIb(gq{AIG+Ig?)T*qrH8JvW= zS%!iRi}TUg^jej!gU@gjPB5qoJ50g?;=9KPAIUx@SAfq}hXpXccAu%3`8hX_o|Sc7j69fAh$7mBjqgr^w@K+)TdhJ zz^*T7hWJLLp=;#}NeN^i>FGVT#mcGk{K@KrBF9=-e0*c0qm5grp8+N(S)E{B#(*hV+yTM8NQy%P(Q(`{g)VIVn=Z#Ljwz@U( zEPIJBb({p;p`ABPful-&y{#DaNHs|iXgT7~;o8^Ts z4j0DAtq)&+9=DLkmGaO2(&;{LltboO!#SA`>d>`s@$)OX`RQ6@A$-fEUjNW<6gMC; zl+?5R{o*B$+dl*!_xzEmb{=_iqBN2n9H~%N8D!z1bV(zvn8+csIFzw@;Gvql{p`l2 zOR5a*B)uTpa~97!yMpM#-_XPH_wvc5CMpExKl)()Ex^&kCrG@(LIAUPg}(-es#m)& z{tf^BtNCEX!S&V8nzju?rKI*_uy=%Nk#kSaams$4kKh#^iJ#8CHr|sEbX)s0S=mkY z1vqQ#zjyl8?bG}*)a@V52naD<$|S3sXN4M5{=>XIk0i&vKj)8#18<|(OZp-oU)1Wd zlWTI+QR=x_?AZrGt<%^;ZBxT9wC_12tJ~jhr`)_ckPn}{_QD$G)+FnDW6qNBdCxbQ zW=vJnHxg@Ch<2=9*4_Fb5-(L@%ZW&mN2hR2%rWQvp!PA!f^VpC$`wTL68VkE!O$UQTJ{QqJtSuIGqwD!Sft?!{dW>;xk!O z%x`~LU2W5&Q5Sj1EbXy&v>H@0ex2_S&q(q|okxGr?uFWP_IJLdF7h(g6n-znlg&9* z(d8`)b(R3D#s|NxLYH}>TYib^?|7J6D$$f{8=qo@8)N@BW90v4jA~rc`Kwh|)jV3F zrI4=(N7~QiK3wxqmLLu>^TCwq;liHpz3RLhe;P0k(>b_d>Rp&)pTt_i40Wg9#dwIZ zW$r%m>a)wvMp@Cd6xQ#p`fP!Urp|4h@;nh(nNNh-3G%e?eyQ_6hmwj#zQN{y^heIE zn0WnyMjDRn1~RO(+%v?wqVoNt)QeuW(rcEghhPveiCJ~sSe^ML#FAPjN-exF@a zKh$C{CFFB|I&I<-smNH7qI2R6{>l^cX)bBWXaC3zHOV`wiWDtC16rxw{N{pXD*=hu z`zOgnjn{i*F3S9uw6;txi{Vy>D&`6-Wa_Vm1zAkho1DieyJ-x^ydiv)P`0mfcybyY zpH;(XNrsrVgmM^aY-S}F_#Y!t+n~z4-6|h-Pnmr)PN;xPCB4HnV*&VvtE#ar_F#{> z4)MT^eE)3gQOl2XIdKIVqxFumctR4POx`!&wRtS2@+x(%+!kPfJc+T!;SyxQI0?7P z+kFI$ipB9yqxdqq&pT7?Q1wkC$~aZjG1LJwyg3A#`_D{Zv;8v@;*5bChg~fBJ1p!U zVz^bYE?*5GNnr{zTqru!EZ4a3*HJFYZ`aNfS4+kvlCUiv<~3GN>I^<_v`uxWe!W2B zvDFi}_`E_x#@zE}dvW6$OT>Jp-dTHLAJ?QTU>iLK)@C71cgpz`t2$`Kboh0fzKn&rOm4a^gx&pWAZP`QnjA-T~iWdlmR z)rZ(IzIzYV9+8FU%SIj2h}8x%)_u4Z;3>R7Iv8=JIVPP-Ti6(WM(Y$o_is_&C;4wt z#z~H!^PZlZmgJvh?gkOMtK6K=+nGw`Kh(!6JE|9Z&LZ6)-1=dW;JE;doUCII8gPVa zI9jDK9o7+KgiIJSH0i5AJ0Ze^sXe4EliMFof7mspaM;WZgguSw_egn7>URNu^#5@L zR0V8Ydz}?Hl@NubVTLRX>`5p|#+@lA3}`FN@VmyH4V4mfMnX+7g!^==UWf>~ELXG6 zG_GuozY^z9Cd3g!Q+Eh+a(NDl?`^f=e~U*>VLV?cxdj zGTe*_>j$nlFxKVNAGW1O$NH6a=qCdFsw^?#*W!Xa{~cn~|JUl=CJ}p{Q3^g!Iz4eI zy!wgbM&mczz7yL#{;OuQtfs`aRei$e-TfqU6ucPf>}mi|ru@m!7OA@*_rSH2Gk;xe zIon(yXDBKCoSaXU!pUP=wS!a{`Y;Vmc&j)4qI$uxOXSNTZdNUa8?9*G3uM>Ia`$V= zWsb2K1nR^yt(&+1!Y2Zay? z=o=B?q5_u}JZ8rTo)t3BcUb0@SrSWObw*pyKmqQNd9R-n^o8&4zx*6q9tK%;-I=QV zXy~~~@D#J(L*#z5s-vrSL^fK5_&?)=Aob7q0Cy3yB2=@F?Ab;7c8&nf(qkQg{im6# z+SasP6zdZ{jeHM@Co)DqD|RT5=s-$!Dnn(l&gDg{^cRMur>NGoh5wLzV4&_b?*-Jv z-pS6hu4jaB>Y*^|VEfn9&lOGT!V=5#JgIzzBiR|&mvi;~Nse0T%~-yim1V~Z3>3zB z=-H?dBTrS0+XdJ9)Q?|eU&4}_C85voYeRIbh3&_zc{-i$td|-1o+jm^3r2iLe@n4F zCZyw_%FFdO;BK89YSzL2w7)rwGJ+EkbGuGK04`mVm1o?;nfmgM#|e4DqcGuHtoCUi z=T6_=Mrz1&7e0ObrR=_LWs1kKhc=-35lCRL=fVRW?+id( z3Gn?7k;5g&U(fFS%1OqHe=A<}ZGGx7+ur;zs_0eGdsBldiy=v;<|Or}ris|nF0n|)6_b$_O@MVKt&Ch}Kk9G(dj;EiP=dd0L zNGfkO(_;Q8H2OgAyCrBNGdGN(p6+BnS@W>Qdcog4O3B$j5$%lZhfsI!GZha}j zIo7BNmsp0@zWsuxUb?i`s-=xI!5PVQ2Ek+a8liZEvTXAWOs{$~ztsF@AVt~lNe%YEn>KFDT} zPS{PMx3@X5;}+#fZT<0PX;m>ELtDFb>P<>3Zka~&%;AF^K9F&B&Xh4cJ!WaGR%Q~4 z^OCvsFw|1VkMQ{ykOEB30d))U`eHCWyIX#93+H-02%4IaHz)X0#!?+yRp#n8kOHin zolV!%TC?|YXTOS@ zPP;!P2yxh`7R+b^wGo~RFZMZ$~JJ}?A-aK(qsP2J3+jutz<^PGFV z+S;J{_BKVv<8VE}0~(O-7k?M;JP3gBW-!@=%OD90_4b*cFUCm8{f>Fkkp#Y1*u2M0 zUqNV9HH}cF-{P;5Caequo|@r4ILvB{t#jO@RxQg<4UwAN&U|O;Y%@0N8Eqqr<3ud^ zmTjF?k~@#;XFmD?OPG#4%*+5{NG@L)r}HA3rO)=;vo|#iL{kEX>e$r;C|3tl6|aYF zGZJ8QKPCU!u`<-VQy2#s1olj6K_n`?f{PIW$q={zo`@gpK*UfV0-`;@)e`&lH`E1! z&s31dh%6D7;SmyukbOyDdNGO3J|Lh?z5mx$>N*4HP5Gg1qif%)tg!vzgYo)5ET7LF zbdNdb6~c-&ih1*69e@klaXXJd|FUQi z^huvJW|fgi@*IaBpn_3=ync+Z@;eBbgb#kcc1TEk1ozScu07;y3=W#Q92ObrLlDMv zM&e9Yp=|$VB^F?17S8R7j-&!O+RgD8uQ>-ab;~G9J#2858n_DUQ9YV1Ux3wrod;Ky zBXaW)Nsc2RE5Kh^{vI@+AnqX27#{UegbaZ$1;m2kV*DNS#Q|+(n7f+{v_ZTr0$fLi zYc>@PLZ}b#qbMUGLZU&4jSfQ&1f?E5wX@;@LD@0%Hg`cEtQ@k-MrvuPA!5h7STjGk&OtufL<)>7A2~h`Ina+~bc+~i5%{!If%X4D=1Set3{D`0 zJIQiLv=zX;o`!XH*!XGGbdw46W>rxWk9QOG2pAGbx z3iCWxjeIMK-WZl9r7$$R0xJI^vA^!tO?bGYIkmLUN`MBA{1R~Ct${0VHnh%`s*HlM zo@k9jGVH@SkR+%``+K%wG2irsg%0skmt~$bF|YK{1Xk0Z`Ui#LXuuV?XHA)oh)9n> z9l9t{j%?v`g?F;Yr=~ULX2oH1@}uYp8%=pGh&kT;-R_s%Prk$lLQW#^p2<)lGN6xm zx$TYo8p<097%@_d0+g=yf)$1T#Ns!l}koluM!_Aq`WRGUv>CoN&{PqO`!#aPcXD$55@1Alsh>NPHVugQ5S;dDx zB9k)ybHQDl{h|9u7z9S|FU|W?u8j$R9h9S3dK@Zg6m&165}x!^djB8O$zb`X$@P%f zzYT>1vbl8ceV=-E=Z(ks?pvdcERn#1IO&#(AT3VUs33PrUqe3m2}+mOlXRtEL8KKD zo3B{~i_ca_QYbQi$OCvzik?kABMPv%iK3Jc)=#~rzr8WS`L8$on&9J{Zq(ARypWN+ zTEWp$PPohj@vEZwgZiEUyyxMKa{C0ZBjl}9o-=>L@r-|+#vpVM|K%i+Pj&48PWy9Y ztAu-K#p-R1LC(?9F;JF0(XC7GYG z$h`{^1BF8*(6Yhv3OfI?sz#wJsYrI0p`9G>K2Ywa5nYhyt%fl8}!*sv7SD z(3zBH#w}i~PYD7dEiw>%8B-9<)?zS~hCLJB@~aMZ!}^>**U0wr)>IRBg{l5nzK)y9By9> zjy;>Sj=hi88GZkpQxxe}nX<9z*2e)yUU|D~cQd}{q|%AyMULNLS>~bl<6dpfvYz$q zzGj~*tbt^%L<^;4nfqsyu&cA(zGZLO4WP|X6CUKt7I0dQE|YZbj-%WF&QP!C!p>OW zAyF^G*aD4Rz2(q(@*2}0Y)WC6t>VnFHIwSi+_C2@;S0r|)(`HVQsjWpu=@X@>Mz5h z?B4HjSP*6Cp;KZI=@5`EX-VmlP#Woy4h89wZt0Nj1{GAgQ@Xo5|9kNM{GRV~9K53D z+Hsw^&b2Bp%J!QZPdj1PyYSwvBPasUgAzD^44g**wOhgE8v{zuzEp+V#Z46MT3V$t}X4?#Q|z zg_bZVa|GNcGKcJ!Z5qJjvM873un;gc#NS-hx>-)t$p0P1YFM?Bu~ zkTu9MLL&{IRConfB4PE+Mc+t0yfC9L)9wY9Rw|RaqO9szv;9p^)kke zuG{;j%K;Shumfb>mOP}83G17Nq4zu5mjdpM_8SbFv*opyqhl(mu+l!2>h-LCDdoE} zq3nxFZ`?^vn@P(EvTH%VGQC}yyh&?NIZJ2`8oKr7L0-gTZ!_)hUaGMr*F%C}~_(W-ISMwnH#5|xly z+=1XuWUuL@g+a7>MZBpJ7Q^qfNBIRCUK5c-S9Kh5gl}keSx$u3{_p<~JW|IYV5%M#e-zGcC9(2J3 zt!vNoTS}n6RkHn%^?OeO{ljwK1=iH5NKgp41PvC|!Z)w1YFGf{lfc8vt?4-@V{w>l zP0#W8XMa9~dc$r7w^l<|SAtx+ZjExei3ye#jRNgQmvg2+M~L+l+|iD_FJs-DcJh*) zj@9FiLgJW20Yo%s)ccM)B#}zIRIf#5uV(8dQ?THpG|yFAT#IBr$(^5HG`gM}LX>C5dFZD2Ukqr+iFB){=QZZ2$-WK|!DJOq4aTpL~ z$y?#p1T?2X9olv^u*E+P7~>FDOZvgxRQG()&#{k<4hdf)Ds^5^6BPF1)*)a6G%;~9 zDHcdIF=jv@M>R_lLy#z11sM5Ec=C;VAp0QN_^RF#zFCRqm{yq0^X6sXCkUY+u|BDUWu%8B;g=4Ba_g1=8jpg{wyk zD`Vjh6#v+jN~S+9bDMMC#hHpy;p5dbrfFoyOM7$QWK$;~HrlNc#ikdF_{TTPi4}eE z`Zkg;Hr1my-sJ>?SfNCapW9|Sn;=XtFZHi;Cp+1KFhQF6v`=tcU}k7n+N2==+0ZF7 zzu95C`R0T<8WypjkDleAD(J9lgL=G8seU7i>6=N-)!uZe)F9L6KpNKkayOp-a8f!L z05^ur_-jj{nI+{RjyheZje7p3qgLD>QU9LR=<29+6ylGl@#iU~G4N;LEfD7d#L6Gaaf+2S+nWpz}+)+4T*H zS5B8?%PP+DYW2y;Mem1=n0Nc)=b*K&dNA#yy5qx#BAe*X!B}&Uy}Ix)HhrwkJ41> zLJjX!e2g9LFS(u8J`@ujcPJ2Yv19E1bk$jw@$zadrzJn^O*`pi_MXA0K#)c4OM6KK$2PE(L|Muv{>Xo6 z7=!hFPMUIiBGQWfuccv~!y4v^`v;%Rhu>XTH9?evs#AC)uLW;NR4?}ifdJv>;8?W_ zd)m9bG|kh^f`?6Vwcw1Fg(O+J-}OXOlPiiI8ycstRLZBFs#f0=PWEy4p>$5x4OzXSvjD7kRs>?Qo!9rRDypTWh=EjahV5LH5 zBv$ABXv~C(SK~~JSSYY_YVpbEgXc=P9a%A@B6>-+9yOaZnkH^RWyprRpU%zND4wmd zrZ25FPy9(SO}l$@#Ym&@WkR+$D$xog(sDo6tBFDBO&5=PGLB?NPQv(aPt0WW8{4LK zmqhEMJe~F3$J8Lad@JiD{EX)9eYtE1O9&*Z0%Cv14LH-9SRMP)pS$O-gg;Sg+B&?I z$8HjQkC&Myo{=y1#x>pIVB0vSY{Afwi~Rg|9<^v4rqJW%$-@4S&HUZr>QQ4+*r(U64VyMil~ymzyggyly$35a{|i?j;UJmR{Msl|n_)*UX7y;!jG= zAj)z|TB(uxrCYWCVacntSj0_B62!-CgoOydG)8l|0z#YFnXnIKy8Z`_*uW*6x^$W9 zUXpYz7w0>0ReH@?$ixX0?Am2PZ!9-87Y$!$qInRk4y(P97424~lNer;58K6_5uA0< zX3*eWtbEB^7M=abyV1nPm30H{ME8okImk>mC&|z5l@goih*@Ck?Sez0a%IJ;m9G@& zBK8zGrlIone^17|6$3@GgNqdhlUvc|#^dM}026uEJa?xH-VUX}8s$FG9scaR5_Fs~ zuZ~AHP)H0&oI~DZPboip4KwAP(aK21Dn4J)2JT5Kh(Qx9k@xxIOu*WV#Z6Ur0HOF-&*M7mwe9MnU?G(?JDko1?DdcIV%s0)>mi>+}# zvC;>`Ja=(Qf$r8BW(6fDALfwY*QYIsY_Xt({wLvys@vqMaE`_p!0(Au zH0{~v{(KiG5moy3$8Y@s%@qhf{C*i9X*?6Mm+)f_7?;>$dlhrEviS|VVrZ#nxgT7_ zF2l=W(zmF^z9Z|`On>yn7+Zi>ksVcu!_>RO{pSMe5;0~L(XA(_sD;^OPYnSq=Ifi_ zei`Vtj*l?c70JYJgeyKM6`%;!*_`|B_FW*Qv@EBOTo+#GNmnf926wfg3-QNzo9uJf zSGkb1Q+x~m{8D{cWJii3HxK-iB)gl_wa!DH$bR9A;9uwty*`HrR36UxrEgC|(+d>G zZ!28Bf?O9$*r)nt?5AmkwZQzWIo^= z12CGSFBCzhjo8OX1_CDnkrlKcPaCfjFJzu6O3|9RPK?75&#}M{JPW)enRqDdo=Hgc z`rZ#Qc!^ChS{B58K3ark_K|Y5>R587ZRZD`$zGC#y3@2?8KcG)YigglP>RNwin*$* z`YEr-e5tJzlSoV`v1+W!ZF+Q=-_i1~G%6=_X!3hJf6eF2m@gONj%HH&9frgpe&M>K zF%5F6fm1_LPv@?uvwJ(>JZ5Tj>~>XqSEdUbFO-jOJP5sQPTn8&rwPfbh192z8uzC8 z-%gdi)@iqjp!3VejF4+{k--a`NB}|Rdt2CO9Envx>n#DmuPAsd2SWgG!Hc{GkHQK5 za=n#(gklQN<*B^ZUe!}(0)o9}k{VPYgk0jV>v9u>m7Zv^r=#yFMMvr!$KoAcII)IN z9RN_qNKW+@`rswT)?l8#H`z~>lE2$4ae~J|P|#t@d$ld_3Q4G&(_yR!Mm#dALDkh4 zEVE1}8$m9pJQx4m;jwm$ak>5;SHmK!dLi9S-M+i-lLYM1h5aO-gX7@G+?L{?Wcq?A z%0iY^w@DlyRXv*WTwvGRxv^&J52+q(`+0mSI{cG>M}*?HQrm(2>}iUtKh$Zp?qbv; z>%a&rce`_B{57AV#Nv{i`2Mo_3n)N21RU~a!d&bYnlvQB4(lD#BZW7rVr0x!C;=u5iv1lC6@Rq5VR+{Rw?LJ&OANbgkbcI~yD~ z+s#j^eS3QBDe72ZBSjjiLU-0pAaH{BQcF>Y6idx)e@epr+b8UTniyd{f=+FjH9ODl zsFN5vB~ii@vv&^O=l*U^&aOIGk|5d_4J7flD9e$A^~s0@^1W|Dv?v~QIV6tu7|Ui| zSU!jjc1XIc$md4p_Jt@dvG@0AaSRU0hb%XG1eHz50SK+Ja#|RLaLaB~GPiZ1zTMum zt49-BIz@xU8hfpMLjWfVLHV0jVA7PF-P-#i#EEV~hKkIsp7?weqggr8YcC|nX{05bwe@gW;*s}6@^`;pFuvJK~^0?6i85j_J-CeJ%F*&A#x zmx)iNU+IxB{3vDgitF%F4Uj5;e zTD__AY51_sd^BV%Efq3;rvR|Gk*;YlNrY}=VAw4~NmV_K=o zK6h$a{mXH6#E`mAHsf}(QhD6VkYbVww_nu_b(?c>qbp3BKev*wHyaEH3E+>fgpu$h zJ@e$!T0J0F5n>4tMl^c}2UDK$gywzVsxzbjk$g#cppj0O)Q3?9pbwR@ayPez|Kho; zI5(eLyh02zUg$eD>=F);g%Sl$2a}hxzuYUKL*tU zW1-56^P?#C-Y9_0SN|);d4d7pY|yeR ztPJ!xjfa3~tZe`HtJv<{M|g<%nQzAbfnT0DjaKlGvYSvIU{g;kU@MUE7r0cUN_+gu zyn;Y0l!W3+cyuh}D3i2M=-o?6o7Ha-R@5_X}HWUMm+ z-~I)%>w^dHq~xb)I;5~DvU~*tHcdP@j60LTe%#RHX1eU@ic=@vyX(|CYK%Lm6 zKT*Ev3lJ-nS5&j0OC>nKxMll(O(LM7&rRwHn!_aA7KjIj`d0bMSvc~S3l6oDuUQ^a z71FE-VlHi@V(d-?zY zw-`&S&sogjHa*3=9cB<+*7s%HNDfu8wmN)Ei_ctTjY(I|lb2{JIv()-*_L!mfwq$6xG@Z$trK?dN)Iy|zrznq4KMBlrlSq^LT5WFRn; zNcr0f?q3WMbz}-3ww^NcVcU=neg@@grGuf4z*MB&sd61!k;%QEIANuaIsE8A@Zx3k z`>kxM;4O$?$c59jZ9o>q9TTy8ZgkE=-_)eme)XXB+6xi?wV#C}6@Ughm1?(qzHB-2 zwa#T}_)wu~IdM$vL|o|1HTajKDJAdsUr)y?YQfkQx4FH$aE6|%ma7b=cfUgEG=44G zi1br6(9|B%ki(j`&D-uSRZjTS1gAYyIyl;U`sk`IcHuQ-6J6aKXzad*mEOl5j-i&v zdZ$<416vvd3xAnXfr&AJyte{^4lhfZ;rY_9hc(B~(FlC4l4{S+GlnF#>_VSxIo-_* zT6`_~f&CpFJnr{6)XXSP(B3#b*ZE(f)Yy`pBcGN=67N0g7|fQ&k}bJ(@!urUCSlii z&(DmQjtcdtcZ9O(&jp*uPSq^NHfa`DvYtNzTh{t1{?>!@99M$uW~W^D&1e62H=mt0 zcC2?swZ!|5UhEe!PzbY|{$eG6WZr`4S};Dp+b*lN+m9#b0MeTJmzS7M^&q( z$MUoCyA@yaD=WTM;^KYWlY{3hOOjragnjyWTAS6D9Ye=6tSXMKQ|;S(YeUOH!|U@1 z{X6rM6K?%uN?vHJ%AiDC72$S2Vz?NG^1oiI&L#=ogfbzT9`6aLm9_T;hH?eS9#~}6 z=xTraD?YJ1#(Y-gF~LN|z~Bd>B-oKf4zm?o8INwg>#L6}ah2$vgKxv-s27D1y8X?T z7Vy`5jN{h^?t$1E@f-wqjq?{Ek|89t`4ug@f7l=Po_qMV8F{@chjH_ZoY3PO+bGcn z!H|-vqL6aBncB4nJAbPcwH`ahR+#k5Oxn}k#oUB@JTqhSjeoW`TONQ(AyCMoVq(+a zAB?xZCD&^(Tk08iHo)eshw6U2?YcsgL>9yCO}^-v+GIJE-qszb^;(a|-FJ~TP%*K> z2zHzQb;fk7SjhUfuP3s3>5r?Cpy4iu4Z4uUjs(-6p1hmCb(;U2$-$e5^^g8M@=XA< z*bPMA;~F#riT03hK3rMz74ncRM@-SHV=!uzht8eKju5Zo*KoeJ@AK-D(XqhG z{I7@U#MP#0E$C;)gyn-@8qw#Xec;vf5OjA_;ZTB`oj(X*fE!#&9{B&Ke2nN!^&g;T zYj&IQnKd|lsVJnHiFmN3-xX*l1dr77Z;dU0M{e`^7k-G60Wer`gm5L-l0 zI`@81mV!E?Lj8FfhiRG69&n>R`8_>Qkq5+fC@c!^j-45R(+lOD*pg(Nzp8tu74cdDuZNMk9 zc*ESp6d%7PY2gD68;O!?-H~atSTfhZG>hi|_yHku+6iUTYrVm)KUAP;e_j3=>U2x= zYvLhMKM{7u1Bj-K&so6XmrOu=9{CuZ$Y7)KfE~IY|G;nE@LgHX?XCT5awJh%Ffvo+ zQJgPWuaIHkQK0KE$Kp|v*8m)gwC2TX~X$k0N6X3Kd!_eL?spVq( zFw=ZwezCLW7c2rOZ|(=(Kaq3QqW%EFU9Y14CQyTOrzAjuEOkoN><=oj*ix=J<5nL3f+}`k0vN``LU=8h8?VC+ zWCpSDrVzpRRljDavB>!s((QgEID8@DrO3AFcguWYPNjfR(6TRn7JlXnmN)LojL!pA=6LvAmzc zmg0x`(HY|E+w+olhKJPMLBR6IaOShLCwc{tnL4ZE1ZPcH09M1nHIoZKE9OwzYd~ss z;#uUuO#ECl#<$G}O!bQJf**09w2!gFIIeseL_`1v0RO4&Bk(z2L@fWpw?K+N(=Xpy zO9_7I6P1plGU1E@P+M#$vb6)Idnc&3fUPrvZymW{)f1$YS8bU0(4RR4aTKLrrkr0> z=k`RHWU%jH8*+$EerD5BxIv|4WlzWD8uA%y!2A5`6f%E92_;6_HUcH#-n(9j6sWP^ z!^h}f0g;MKlC8m0B}zmei;9?3dWon37V8ZmYMnl;oWR}0dKu%yfKZdu)D>6?Yl}n0 zsRL(!4*zQk?LJo0K~Yuo^n(zVwMuMe{Q{=b2vlkKFVtZheCoUs;kA8(aFALaU%s$XP%K`oQ74$RvAYU_1MW87sk+Dx})%3_P@QHc>56 zCf54$hyf9DE;A33m-f4oZ{x`pZV|aFg45Iva!*NoNpUW3Q^ z^dlhPCFM56Z0DJ!`2A@f%ksseG|O`qtPYH6I-g zVgg^+G-Vc}428-$!L1PzIxT=GjT4A($wa{LW>(%aTsZ5n>dVXh z1u+#1z=6gIL|DGZG`vQE0741yg(OBPpTy~Su-IWx_&VJa(89xo(18Y8;<78myx(hf!0=qDQ3z!G4s3eh#DF=#mxJd-v9NS9Y{U+buevI(ns?1Sw^x9U3$Brdpcc!jFXTiJvqOvsU%fbB1K{N2 zt3yWg!r^h=PGFFP^Dhghj}Vpu9$yCO$w)$GZB=hvdic8!zSw-*z{0Zk@$w!bsq6rS zvy9+@r>|^t&Dh8en7|t`KXSt8cs)hKkW8N^95I>6u+TxwK$Jko@{XLrEa2EPVG zg9Qqyd{sb$_7bo#PInP;fG%maTf65`FLzii+)hFee6NDDdftBX&Y}OSKAh3LwoxD; zv0iJUE4H6IppHUCqTP}&_z|TnW6e+xJOClHKOpoSW-M*WjGJurZe@_ZJ72;&al1OT znyIu~Y*&ik3#MrTG1R*Mw>;l3XJeA~r<(!7MCA}5HS?NVhlEv6 z;Q#GdAQ>W%M~UM|6S%sV|NgMHPor2{4b*RA8R};Ui9M{jSSvk`*Q5Rn=^|s2Ny9}L zSg=r_5Dmot;9i}(_}yBcTh`g6a*G3}-Oqi3Jzw}Z>+=-5KLJ(9fHU#?fN~|eLZqcf ziL-cXk39?m@uT(yg^B?nDX}y5;?@vAXz!f6&tl5m&#ZZMx)w*ye@k4SA2?O!Y_lQ$ zyXBNkDEq`CO^8xV^>E%XodDO6fP{l(<659X>d3gn{K-_H-ph*aOjoU&|x zNm+zKN zen7p|_<{=@yRu8g0}K<=-+&Snu6}#?1gOucNzi#;i$Cy{hC%WA43pdwr7sa0$JB$io+Yu#&SqFk&GB*xT+ z^EqnTN`*5IjhI3JXb~7ApJvGZ82W-i;G=cGWj0Onr$)TTEs0m9-fl^fK_wTRMltQp z4BAwMv25)J5XHcJMJ&+_8c7Dlnt@?Ms8zp2QJA znYXy4*MoVDj2fa;UV99fQmsqr08p-oI?+7E+z)xE=iv@&;4Y4mD1Jk(J-@qsJpZ2R`C<6)cn7h0T=u`>r8RAzE_Q@icL0&r!sL_o z7CCNlU~#(PACOw(iP6k;M>;w8ZliK&jO#L|;V@SW1~7ZXbey;VerfRYGlo4%DeYdf zM^UJ=jd`wIqUa#{2nziAyjAq+_aECmT5i7i70G3#ox0xxbU(@=L*QrS;*>T1>mP6a z<}Vc4Rh)I=KFwsD5sEdZ1&GyD9t$%Lh@VdzAjVv_q~Bv*p+*P=D(0y6HJlBVYoYyA z0BHLFFkSgPxhXrxBexbz{jCK&DZS9eogqB5+I|K9=jGoB>95Yy_;}{H-9q%j8S=Uv zlo3ToUS;{$eDv{00?d~bygI;dS=4e@8lvx5ngc$q=`KnT2|oZaNn2+$t<#U_;a_d0 z0%&_R9>z9R@SyUP7Jki9R_>ZucN$&Pe{>}=;WNJMU zaIEe_$i&M)Ox+X46P?@2aP{lDgM5>4L{%Ccp{_(2hViSoFJA-_eQ?@Cqz;zahhtw* zM?F(gmPvn_NUR1z6nY{h6o@zhoVdMfQdtOXl&Hd}6J{zqd7uOs5yK?l9^nCk6wXAw zL!Nc%8i*^VFO`679la3H*Oex9edECQi2&)Fx(S! z1~3XAv15HimT^EM$IrSZroWZr@vWGg%OW$-Ks?}rA26kX4&rvNdSicd^9+d?kg>?w zUtUrh%2TxYx5S?)V2SKMicKSwQum~WFOtU%{ptM7iVS|sg3!T(s?2hd9UPDO@UvQ} z3AAizj>n4ed-w@Z8qgwogy(76cdxmVhzS^XCOfB_f=K1xx*!~gV0o#Oi=4 zsn+J(N(_}avs5@C2tJWS=Yj1~j;WxX-5Q^1ojss%5KSOpRA-7DpC^XDc#vSg2aN34 zZN;kFu&hoIB42!N^gI}RD3ljz`>FV&6ZyP|q3s#cpa|!Sv+Gw4cl#N2_!VFR^Upx$ zI}lyG#+hHSVv@d?gL`qkSRideFQb+6OfF%d7@Zv+4^e6&a@)*~p!5T3_VHpFJv~N0 zTGrZ?m0I}HqGa2`MOFDp&d7znev0oK%wSyGm`eTYC^o6sBAO6*~*vRnHZiN)-x9YLf zt{B&*A?}$HE=E7Wq`J4Slm;KD7^XI|0f#`eBoGPoWuw`DttA6Pq7NActf=)TF%be# zS(l4oV>|1>NCZU6uT-``wufH8^~Em`B)s@QA$4)5;kUoccv{p*B?@d)n>ReEzoT#e z2rZU2Wl4_|q>ltlmm7!4a?~Jmd4TACx;@w%H{0s1k4yugn)Wp@67&?%2>3QnBAtIC zKvP&)7>ll~TJDVA@}s?NrLOSiSQiA5)Yso5V~#hcFD|6hLLU|AWW5CRUUx|UhkYRK zANv3Ri3i>^T@88GAaLrUB{{x9Gf9sA#*PeW3g?c@c={3D!szw$6IZu#qs(|_o4j1hKv z%nHnew|*`7#_nM5+q!RlL9CxNFenYH+g(3Z?kZ3&P+Q zUlAnSI=7QT5X!!H+n#vNO#+Cx)&1#1h5&(c{fyMTBSU%9>ZMyd?R>U7W6jzN#E7QX zIxEvb*5LG?Hw?r>bU3xljQ73`-W3|)cRJ6J{;RHad zkvR0=i8w7Hpi&;`jZ5Qm>MC~iND%CgIc&WTG2!d;*+X)X_~=}}1N{CpISb$^1`c(!U1*2eI+3B2`k^J6#es`E!)j@z3OPd?+J;0%&>Fsf9F8q@0>j>J4Sx1T{@>(Jp| z&p+&`pmsOwX%HT*D?y}ro@J+Fp*JxV;RVh7r6d9tNx2Bw-$sXjKDfkqP z+C7r_RPousio5KzM&Fy;g}D=Wz)`57s}%ldJMC1APN|7jLj4Z_6vBB=;KvXIr$Gi= z4_A?U?*&wA^YxnD3VE0{N>cuM8u^Gk#W65_yZuxSG5|zW>)+0gKKk*~3a|pt?82*1 zz(%(5`~J}P5^3yfRJS`RJ!HZt9Q$+iaBy17^ZLQ)?X6)$aZg1sTcbI$h4R-J>Ce}w zR3!H8E^y+(Xq2Ki^82ou`)5SpHp!>$zmVv#6Z=K7a(^&}5nm*Pby)tu&a}aG zujJBY@L0=CCmF$46@P#Onu+lCrx^V9inM#jgiH>}Cv(+Pb*2e6^67M8M!3-10LOnH z(rn8a0DjxTeLRkIz#i=Io`vF5dnU*tg)&^;i~q|m+2Y%{bu^T>^z*u8T)C>8`#>k$ zEc@4oEVGNj=FO#z=%-m6x>Cs6v2*>PI2neH1Y8OhQxAfeI~$hmJ`93v!7XZ&}@>?AV_E*meF? zAd(acBi@hgU=DeMiGnXBQR^lLsble`@PnSj6Lo>|p^pz1(s_jIbDajOzg)GnK$pBY znOQ!DE((5ueemyNy83Bnpv*(AUx1$`@DT30!5b!N@OJ^j@h+pgTye;+liqukuA5D6 zt~^e=C;90dce4_y8%<%895M+#w<}$-u#={zfWuIXV`g{0$hX5QUf=;*H=Y2tjiZW; z$>G&pJ$b1=oUnVNVU2`I#zQFqTqR%`-)Q(FOWbj&+d{zdM)Ly&3Y0CnEs9zydw)4m zQ86teD>HLMOR;>JfI_B+3}FQ1zw_e%C7QY|RKY7uV)~8=hFThMGqmZkr_J@Z+!5ol z==R&o=DXRS$T60xv<*k|E!v8m=#%CuTemMqH~N46pNS^YBYJ(DT&Ek|TzClj0GMbv zTVd)?AifSljYdFHA1UkNO1~GasQn=~UdJ$U!37a0t&K9*o^Xow&szGgFHb6_l}r7* z4z}2AsDqijGQ;stPE6ezWRfWwozJ*(h;J~#6&dxRK)VmaHS z1rh|~MJ#fG+n*BM0sTMmWvzjpeNnSN&%gW+p3o^-E6fmQnIB~6Rv2qV=VXqSo@82f z-UyR~Ji!}PGtu-fTaj67o(LjYWn|gHazYa29Pv{UhX4J!w2uTGzW~mUk?1TUJ$8CB z@&kNw8Bagb)sDUt-Ygw!ci@(4$=5ogrBD^qZrT%T{{=Z<01_Uc-g+4z3*P+H)FpM* zHyyXHF0cx+*kmf$%?)UuBpqK5>FqW@ZV=u$+)1tw-;LQY(`bB&Cu}W{NVo~{Gs{QG z^12TyBis5?`E%%u?Yg?%s(sIP4lD~2o_E;GFsJTEuma5^mU>+vtfdnG$kU7dt_3y! z{@*hQ;g)@ZuKW^u;lRW!Q1`QV=2e=U2rE8=*rQ>SY|Bn%m9galy4VVjv!1YTj)#}V ze6BmFR{ZWMULXzKy{)BH)#=3QuT@DOU-jq1q$08Skxv1)uyT}bnBi5we*!l0f#NZc z`*lw$ryYnwSP>hYcZQg2o9&i9>mYZ>&}Su|I5PVDmy-HGCL{v&>YP#w zQ>2k^s^$`eeLJhj)1m791jV4&Q^kb8aT!!)fZ-jKJJJ-xsI)4KfDHz;>ZBo*Rt|_j z#AFPm|K{qfv6@FVjtQjJrXDc3F#+uX#Wof|T;mLiWflkBPZvGYzWdcFwr409TMVu9 z_0okeexKU})0Z#ony~H{^33*RQ;lB66=iQQOKN7e%~UwJIS2T@4~-G%?ohZY==4@S zJcX9*2*Q=Bovs0%;D3IX(m)7-A5f4sUR>2E_vciak6)<3y4T)kUDKGkggsNGv)cA- z1E$Bd?lqWLeSnnm(qQRVm~sM(e#><6;C8lTV=bq{YMX9rqgpt6D};>CQF`~tXs-t7 z8rr(I07>PBEH#{`((~$%;RzLdQ)mpNsJaBb_SdQ8mBZgAwiq(+v^vHq=(a9miwmrX znVc@0k1XlAshWwXRjiz7_I1fYlo-vIbm4z)@Z1+Db;^K3K@P2AL!TXiJiUHPQwnp( zbMlWeW+ecl059c4l-kc8hB3DP{G8-Nt9$DK1yytBVgZ;5@XXdWvgtz2csWfm@x$fB zam+fB*NXg=-R6e*N?D&-wt!Nc>hLcQU8)eAWG<^O-Bzx`dqC-OW)^6^zo=iX0UYes zXHoaD4kfe^DFr5O82`oFCIEU8K_~;a>LL?R9{1BCkVt}bnh5D3xO4CX1@0tag8$!?^1L*n zfLNDQ`#z3d8t;q8v$70uSGCfdYvKGRbv$an?}W0_#p3UQbeLUJm33ToXfx(zXY zX80U=Fsq`_Efjycm77$7zn$Y4q+CS$vLJ37WUd7j&9DD2W}1i>;XxgA$D$Z$2{CGEl(5#IBy>O z-wXB{*)TxUl9YTEIbGt)#R5%d3AKtBWo0teBU3JpEOc?X&5${deyb2;A*Vl)XW z0|W*Wsp^YgJi@f?soYeBo}asqEZ=|KYXoxF*7Z9e6RsNlb^x-1PS3P$BRWv6{(tYz zyd3Dwu11jF&q4wkZZMhr0s#3D-^o7q$r+}V=Geo>W1B#j=gOPGj+ z1YiR_Akz=qJLBSSuwRi;^EmvX6HY4DJHZxm6EE|4Y2VU;CiI6&-?=3G&iT73}Lp9?=PR z1DJ+IkyP=-zCP%G4>Q;Hf}&Rj(wBcWAWEbS6R^&|weiQR2IPO_Lru6>17cyUp9?VN zvehLRlZL`I2{SJ^qy|SyT9b!}RqqpKNUTu0vkoAbwX>8?DQrVlp$5XEbkl~h!5*)8~l<8-PF{%lgQc0zLs~0B_(hGkq z2;{dv-huh@`yc@qMUz>aJDpc1r-i&|>-$7QZ-MOOR6~;p;KcU*^3&nplAq&1{2?px zpG5=ho2NXpOoB{G2>;jJ`A9RcQ>yh{9ed}f+8*y|+QG=@54S0=^sTgB*6z+^J$B7B zlU%BafxU!TMfm{!P=u_XpZ@*VL2O7YWaYzWnLzs)XqFtp|NbL6Zk@-~GhOA|6`8~_ z)u+(R+(mKYe}5uJIr5GLPz_CZ>G#oh1UdT+X$?*e*w}1-vnT8eu+cQ{bi-mg@+ZH8jAT+Ts8RHfkwBC{ z&)5Z+IC1>C3Z1Eom;h5g`Ms&2M2B$RU35pDW-7Jd9NVkbN4ALR@B*ac-xIQ(a(Si4%HXU4lXq zeAK$_>wVQxm3A807)kc~A7cHhpdh^h%`|Br7qWkkApkh%E&MfygT)w4=*tDT1-vWQ z@!{vd69lNP%vOqA&5eq4+cmx+nur&}!p`n5asn>&9rH-HJ#p5k=!i_zlT~3{$9w-T zLMeh6=0!#^AAR|oD^*D5cd4#2N4II8#L`x)s4@G>2w?@~BPt67qo(^V6=)9dpedCr za*ik@z!pAy6oNmZ4=#%3e@y1dX`FBEyTEI8 z>2hRKdMYw}q1EUw^pN0Ja6y(**^kad{O1{WXc{O6`G`2eeTiPZLkN4=j%4kwu%7+3 zd2@V$L!$QJTecH&zx)CoC1UtVi9bUqhGIyWN=#6bB(}akzV}5)zP5k^rPe+=VCYgM zfLG>)170>wsk6$G`Zb|nJ|MBcgkRS{@WIWVZB|IORsea77=*6?nc7JVrMCwon zr*YljP0dkk>1!j8hQS+-Zb@LJ*+*Qk;lJa@jlkHEWrDa{9Z4NNzQ^(YH}*)JRko_~uQ>@DKix;xi%DIfGu!Q>7eeF=yOyJI;2O z_|^S>zoiiu=*0CCIHFAUW}p7fvb+7ldFCI^{^b9L|AA0MbyTjMMp0(B4msA-0LedC z#53?W8vzUH$E&~+UOOKlt=a^WovE)|=64MJLtibz99=WyKnnGRWfNYj_k~a!jN+&> z`sc6X0=|%&gh5^S#tlG0cHsHp04s^$g1q@^UMRKMF0605PUd%sUiME7mO4Q)Rt2)l zwG$y#K10k}_lz#kZO7Ld0fH6}5V57(3Rmjk7R9}ci5t;-fB>F0cUJ*zFqt?eVq-u` zKa~B-aN+L(19*T0ctb+y`F3$a67l}c!i(hBrNN4iu01HRL<1j-V!+F9;LGLdSA6S? zW43ReigfX-@3cyOzvU8i0E5SlJ5w)on?aF23*It|CsbKHoAb^CYGBIYWW4#}K{(Ix z@GPDSj`wN#FXmNa`qkPUXH@C-ysDfKyk-b9kBd&>!!2@U-!basl15=j$9Q!Pq?MKNqfU2!8pr z41n_k{xFbWpYxx*s#gqRKx5=2z}|$L?$6ufeV}c(QNNbvu!W7*PA=$Hx8mMK#BKyR z0V-*;Q!SqS`1Ff^DKtPa4K9H4$CURAkKH>y?^^+2XQbn8B;SG9X9VN`MBhk+JnE!; z$(B9=4j)PbJ_LTqAb2SsPqPY9bGjdL#t2GStRy964jJcmX&pU&lTHjyFzF?>)4vm3 z2lfkBjB2?`^f{iaL-|t;?i4D;=@!rAp&VJM@lyTq=8#8=g=*g%U1xCsV1b$-9acv7 zgwrfi+O0WF$TNxEv~Sya`E~R&c{9-R(Mv}={sr(2YQ7$z7-;rwKBp5@-g4v;*(G!^ zxq;g(q5@qe3wgz2cQn6zr=o*r_s2ztn1Pp-ftM-jcWB^mVQwdW1NGRpJX-DD2%tD7 zr&^-ZBU4Nhcty-^6j&1n>idm5E6c_WDHJN_o8LeY*SS#{T*>p1m#JQ|&#y_dC_zrs^p#xQ4fl6xsRQJx^pgc>KoADOI1Zecu9E%g}xl|-j;x#P^b_qL7_{tE9ofc;=V`y27nXLn8{RTfe!8LoZ@tfN7?+I%*fRaM0q#)|$@1R6+zBn~~fYlMHeoUu6g z4*Zjm5eK2WnN)$2hH02Y*+fXb;7v$etp_S_>e`~8cx+|`{MSm_2=NLOr~Lw}`h_Vb z@=$ZzfPyq_ZQ{07?TjR7729LQFI|E@NC6P@?&1Mj@|i2$@r(im5X6rn*Hn}r^Y7+f z^*kD+9d;iYj1Ji0r`K{wGdL+Vm=;EEJK!cne9myEdBl^*Y z5;_Oz(H4s8OA$0wtQes&&R3gp-*L+%s3Gh6DtuwXoi8p!#h@~vFB`dS8&pr3G-I@! zLP-gMFLRfQ&9sk9R8K7LfjP<-1wCiuT9eL(6^7a#= z4I~tV)q9qJvNb9%Pq@RZub(?*- z1aJ-wt4Z&tEh}%^9%4Z~UZ&y|>K9fj=C6R9XBf!j#jZ*(A>L_H-S4XXLtpl#$FEmA znU01&*_bI%-%UPq9-bGThcEU1seOV(fT72@MO)rNHauMXOP21hXlyw`O@#?Fiokv4 zeT>2xuc7DKLH<{fX@ZGksNk7=D}bF5Gc~&pdee9CWCp5)%`Vm=!8bdJ$g? zpI;kh11Ut!r6bW#BwUvH2mVx$c2Yz|!4o{V>V1nmS`wXE{Sq9QDqOTojE9-lTctsC3Bi*9_YJm&;W>8`mYGT1rLRv8xbO}q)n-x z-roy0r0_H$i8+c{DPr32_vZN@PdYBrLWVca6X$wzKV*+(UVYOob!1d4W4oyo#rI#+ zkU&yujf}BdMDzt6Ra}q3X{2o=&oz1gds;WCg{}{ zfZ8pm^R_0p&gMsvX)2#bSFswG(esyaI@VO;4>9muM~KP(AR;D9LecGd!c)*Zu1!8+ z_i(>dK)`cirNZ+V2*ziWdyAK>%$oyh48s7n7B&3#OXeNWoD|GjhD&GSl8MdmTwHFI z!r^78UO)i$#Dmq4_dbcs^8TX9%}tl;qfEGYv01fdVOS5=m9iz9*XGxm8_t*191| zrmvs!8seNRw8Pay#Cid@X&dQvO0C)N@F$LUH)pul=nkSl<}(MhWLJ#>vp3w=2dEfy z8#qBSt=}amd^%ys2}p+gHF9L|fv;`+!T9>C&&Jxe5vc7)N`{fF7!BfUym={ncU}48 z(_`VhxoQWIHHXb*WX2Fm5_BqFR>)IU#5sy7sU6`Y(PXS!91iHL*{DC?c+=q7RuJVT ze>S^;HT45#bOKKW(GdwIYtiQ+pH6Xpgq~S;_0OUL`YcTx2OB1R6N%LcY8qjn{1g+AaMWXpgTJ&s82qQ)fw`gh) zoSgXqBBOvWFISUol6LXPLZy9edy`(Mmg)&cpY?F`CurN{AZW}x zkkH|WHd;MB0r1I$h~Nv#*~7;1x{^?a)R7iXcNs;+4;5AHoauVcs`mH%ayj`)1l>5lUw6xc<_9g( z925r$2LT~`%-n)R-{U}n&vsr!Qw4M71J0O_H}_{Y3zY1*G2Fa12h{k!IhON2hp0E> z-y-7omqz<~Wo2cvFrV-Z=zf@45u$OzLISa9g+glAR*l#-OXK_^ZUMgpL-sj^M|;%D`ri+ z^Uh4h_Y7JYx%k%y4St(L$yHw1eflCK#t+zWyzcHx6K z{VAeykW&P2U*i#;zDRZGmiqM=*)qAado$VQD#WPUKp)4dMPqF^JZ|)C^g&hoWl^Q# z5|yucjPUQRTboywlR+9_3hn$$xV(--q%kIVEO)U=oWnezQ8>&$i9NqAuVnz##l~9< z2M$UxiZm>5LK&XWp4kGY(m?n!jSN%Ep= zdAkAWF{pL~kwEYL(JJkGLxZ~))}6)YYu+rdx7{}gR*~ozhT-QX8?JU?F!sEw^_^|@ zbJc&#^k4fS!iFC{cvLLaRszhQuWfMqtqs5=)2>0)FYt7G-dmx(3*&c={u%9vv0jZk zj`e;&ck8t86@PHl`lpxQN^`yS&y)h#KB9B~#8R0-`kh8i6k(;G&NV4dZzh_K|@$_4;g| zj+T8?u(!vo?eA)~+gzl*wXI;l8d-S1^l}YM6n+)Yto<<_%4@#G2B}LPEoy5$*PUFX zTiIE61{yjz>qSctw5k6UT&ZC}dS+GmN}rFeY|Fvs{v{tw*iQ?N7#zvAf9k~=gekiW z6w)Nu>OTV&9x)WDP4wK4d*sNGPxq!jeeV90>rtgi@mXgi>GTR%4Ps}FRKZxY;gmwr zz#&|xWWYRskwUDq=3ouux4<9!Fb3q22t&=}tr!JA#Mx?4M+%~_v^wIdh%mP9?77Zr z6xIPWb~y`gg*m#U7IG5-%do)KGmOcp4#k#gmVxWjtydLH5E>o*H0 zQxK;FcD={pfrsEP52{;`FVMk z`(uYq|H`;1=xJWV!k0T?uoJPr_ofq^;p^PQ+ftsYFj_;L)D4N_azZ!nh z0s7;8tQZ0;o)3Y6vzV^`dyXp4d)HdZX}-gPpKY(x_Dd1be>=>!zxn63JIU}4r{eWt zi>lQ>0{A)Ih`k^C?xiknq?8TzA;j%@)N9=waHDfy4i^{RiKw>|PT>hz8#e~q0FIXV#MmIJBPL5s7(Hfx1eZvhM}?WIe1$94=m zZG@HSnfiS0nh)#sDwW@%_r2Fx)Q#80tl7bVGI(i3cv>b(ViXLJnba?mSdvQ*o~dk? zEWQP7>;n|O-XSnDs%LuM=%fN_Hos~C&Bmqf3!5`vOu?T^KTP4HJ+!5A^HGoy6e{0J zv&s3mDZgkm4Uc&ylL0jj3`-_3L4W=R*K#_?YC2m6 zlbGrG5B!V!H8-EHbN7FH@?aNrMedFyW{h6CKAW#5#w-_L7HKJP*cukQ1?(I_(?ba? z$<{X76%@E&pN`*b4@PgbPh#$`jTc^Xq+XGe4AP7oeY5#6$wod_b7{zxlo!b%qS~g7 zHQ^T07`d(A^^frUSzNV9s(}UB1aW*QEddBt?A1TQxa%m`lYqC6+4lamEq?7O)Je*I#9Kjz~UcYEpfue6?sWOhdMbFFda`#BKKbHEJgsQAz&NI(hly;ZlJ%+pqO-rg?`5YcuO+xgCoz=DXpgEm1rX2!41# zb};3h0>w4{G|=*LuN!dKt6oL^13agW2WOei1a1u)U-Ocv_P=?M=gJc}m6eya+$^2|D6ZjA~$@N%l zn?XD%1J@u-QUQ2OFrplzVVfYYb?oV$l?(u9v|+5hXUd< z*wAS~W6fpPu2SbiMnd0}6IaQrT z``mFPUzV-sw9j8=9V!3(XaVY!0KgCif~7<>#VtNfLk}L0%LOOJc$LK#7cP)TmtV>3 zjP?w62(xtm03@6wwij2l(r!5i+Li4n{h$ua1P+JTq2PFHgPi$>2 zsz@{=fIz7u;7Sz|Ju(;G)h7YOcss1GkKOEUpsKXPXKN z!CZwAQ4## zWY>t1x|b3TgbI9i-c%;0S}mecuR&PZY1rb-VHbjs{EUmyT#>&ca!A6*@4-CJU^Ge` zjSvI)v_TGhjcoi*2a2xnI9siBtVe?_Rjh3YYS13|ExjIi52**|u$# zyEOoYkp|h{59U^J4sHi{6tQKWDJ%R{8t7@@&iDT&@X>!4QolH)d9G$W?T5#xS!VqS zS7(1_DT$BFTOH#zD4gn!6h#VBheRMYDJ~x2E4m7BF>~BDu0@kD23fMPADt`&x5Tgp z{y+P|5R&hSTVM$;>$jf^ za628$S8NJ`RPOB}zoMw5a^UQ054!1=JHgszsG2N5-=7PRqiEpbQdv+SceB7@m;8Dx zm*&n=h8&pux{~nr639bxSZaGj95_kZ0JM*{Dey|K5xHa${86Nl8P_(x6P6ejY`IWM zk)UW=J!jbh;x7Zx%$LJVvY>Qk%r`+R6*2=4E9`HN35)RNCs#m*mZIfMMW&4>6Ok1i zr^*Em?3H?FOs7r!09LOJ3&T9&YB*m)l|`!katACUAYE9*>t=}hMCCUE{@?2Y0^iz3 zk8(c@g!)h<45A;?7?b#7anfNgu(r83B&i3Kt5NWA0N0%It`r*^xX?3H6hz98$M{cB zXg8k}j4}${+aFQ;d-1ILasaiRnlm;r!8&wJ7Ek+nVk`YW#K%YRw)2|{d)D&Fu$$!C zPs7++WUTWzMScmG(3u^Qb9^{+Y3V?r7kBd&{jd{%B~SlW+lr&21jF2IvKWr<|E*`b zgn5+xaq!WPDJ@8K;>V$OgY-N&mk;kS9v%VU%Ijst{_c4k1RYhJ-d{8nQb=TEC%_zP zLY9q%{+u`nFnBY6|9(4hXt=nBx(;xGkG@dt-Ng(Z_m6=fyO55pbT36vtck++dQVNl zC}fh=0D}lW`Vx-JeQ4olkb+CsE?u}oAPg9nBDyt{ zoR99oH+j_JTw6+YGqO&SN;FqFtE4&rCP@+WQ_h;lB{AC0CTw{pEbHh8-lQRu-rKChT8`GO3B z%nG;E=8=7Myf!hB|E_c~3&087Uv|}sEl3(cVF`H3@k3`IH>>0WhiIKgocQ@*o_`>L zjl9+CQFzqS>t=^;_fIJ?#tU}u&Tp9H=AOpL0zROUa6E%#++Gjv*A7=l9~7!l@tNNM+nx_ZQwj9L;Neuftz(h?nJo3f~$~mXM(ld;*eO&#~#b z*qK^gAH`z_Qn=}c%$w!>SL{+m>5-NDGBf>eu|fQa2yl%-%V%E?=4CxSJvAoyP5=`N zO5BImZh8x%VAEs(v_-i9;72)xq(9;E0YXlMHSlp_dOG1sh6ZgP%$`>T^>G{TKsnk* z@z}no2P{g7L8}gahP!&Rpvws|ED?hW%@^!DiuXEjz`r8MIs<8%N<%tiqi-CK*?@05DRlGEn~gZ zmI~69g@y4_!=N^f9#9RFoDuXSyv+n%HoqG}j@}lmUiGix6*e8;+Pz-PfS+H*?=0SP4kdX z;j6rZk50fijg;myLOycxK`hw7ZY1Hi2glE2sb=&kPzO{rAUO9IRykgRJV27h@e*Ac z?S@Iw;yQD^jrHHUp^C(ho?-*l!-8BzzvWJ=wn3H^rCZ4LEb4JH2-z9z!8ZWLzu*`Q zN)89pc@z{g09<~8$?$~>EI|QCqcYfm*Y%0iqS?*ewL>%+wKTdbpr$b+NEx%cgy6Ht z0nKVx^wog(cUCRn2gQOy&ivh@3-Ojao!MI#knr{Kn`LwpaS2-%?I1=zLdC()jW3pT zW^1c*oSu06b2O;n`7^&H5F`&?fb!3-qm@1~<*w)~amZ(ASHG@ba4dk>LQiFU#0}Tt zRSxP!vUFS8Pr=(jbJQ+V$pHR5U|+taeFHL2F+IUS$kLd^q(nf_qYfba_QtENh87cf z7%D=GYR(`Pck<#C+q`x#iKiHn0iZB7x}TVE1`^94qxCWV42XP^Ctz)AF)x5;m-TW8 z2M#=%d{DIzT=w}9#E>kV(k@`SJ9OkkhYR4n4OdsIk8gcGZh9bQ*%I=kQ>qNs;yXboo5u>$y@iX$9EX}kRgaW2mB$q9=m=-Z%th8rEAC}yHl zOYjOv2%;VGC_pMCgT?`?78Ml~eBo5*T!eK7!+ly_X`i+J5o9Jci?}5(3tD(xiDv|p zy<6TZT{}C!B^Vp|wUVtuv~z~Zw}sN67bLKx7K;G8hJ3`q+*1B+q0Qs=7nTv{QoC2e z<(8oHk?5k|^?E-}2!v8Ril~n(7A)kY!O0+%8GyQY|E&p0fG3s(j&qQ!u|@9(X{cP< z!ujzrWm@W5HWJ|J(H;`lY7joYH&#Z=r|r0H*79c6Yf$-ZL6#z* zCq#jv7DVIJ-nZY7eYnhqM;BY&HZ~fp3x16mX*%2D9%31ffmRFW6z4$3F;3oY|C=t~ zaD~Ik+D$&@=&MxUqS!F-~#MfuzgXh)oqB^8?YR}`D=awAc=c8#1D ztW;ITB{f}208MF-a2piy1h=LO4x&TP2_4q+xR)!pB}-!e>I$a^VBkL1rhQCWCtD+I zm@7JX3$w5EctO7ul>OhB<9ho(b5~v@~MDlFGWIb zIb!pbu1~%a4|d?z9WQ}Vow4$ALjSUu!Q&?$!hCecPrqjY^d6_1;oESJLTl|#%yJ$c z9#ZaaT$`K@S=0ZP)c8E&pPt+=e+8`LoCwm_1NFgpO!4lDH6N3`AJYOrEZUIleFLQc zc6D~|m{A0e)qR2`c=7r(ZnXOq$}6}dJi z0`giSc-xOJumY^xd}s&AjhiSJku>Jk2m{*o$DL1s=!CJ`?;CZ7-|RL9M~lZE_Eo0? z-asC34lO>7u+1=+Vp(y5e;oY01e1*{$9=@ATSC3bD2pH;e)enyMG*1G2{%av*E=bs z3k!YO9u*V*EToe2CaL6rRZ`m#Si9M%@2l8!ip&?DItMQ5C(D5Dxh#xug_r`2-uUPX z;v?nwgQ9w(hIfZ&^=H6cJWeKO3yf*;Cx=Bi0^E(uok6JRkb4^h^7UMYs zoSKdPL|>oG-o3G(!e?+0i(=k}atyw)muQ+taW~~;TYz$vUy?nM zqJRdc1{&Oc&&~{hGhnI@>dK1HduCCW$1Tt{M@n$Sx?+VAKjWuOhU7dh891YQstQoJ zpY+O>tW>X2W#SX>UN-YW&n^na|A_G$2zsoNBTM|U>Fu7_q~R}EIdkQIX#20g!rIb6 zV!%!fjuJBl60=)=hkF$iE&wN$9#D#?82Lu9QeQijF^kU1<<7-{Cbsw)g2hnN1Ow-a zT#>77vUAYru*EF}oHtrYhOm3{QSck^!%DeqPpM<#EbMxA(zdNttSg(s`w|AO#Yl3J`24DA8WW@OAV`l*yhv_lqLq@u3`2ZTLF^bMM_ z!RCOtk{$ES)t&-=kRVt8)=FWjvbf_P!o=~bE@RNE$)(1Rw0nnKYs_iIQ zByAU(q`;1{-=*Ir3>Ck=1__7$z7~2rF{~`WpN{FFJG@_afW_;p#vWK40eC;t*i&J( zXayFu-~*Bi%DzVDp`KP1J+wabKXEvuQ8In6;d=fMuN@1d0fD$>LtjmtIzu@@zYc0g*n{nGeuXKug z2cqCe&%;9{2lAQL2aiePS5_gH7228( ze7K1m$|tEZrS5QBkGsz?Y;(sN3MJ&q`$5|?$nAqj201c+S}5E97V1n3jZQ;a(*eGN zg$_biw)n+Ps@x7ZsLw%+wY-lULa_k)_~dN>))-zEH*n;WxS`}$L1zSj$Fi(xe1q1jb1zIq-p_>0z;jl zp@g6x^lR1twfpI7fR_o9(jUPbOn=6R(to_s5xyV(Z}Q z#dwhRIH>iv8h^ThLq@YqZ|tld&>5=eO96uTpCnxZa=hi8vls|2ad~{bWw72`)w^%fp^bv#l;E~w zPHvMO8&H2MDJTei3d=)aklWvv@duud5BeEFICK$OP03tliddM?V|0V6I?ogk2Y_%% ztb-c)$R(Xw`1v%S^uNV*69N4ua&}~Z&u0Rw)8cHWAPChe;O7E`M8s1sXdIC2f`7M^ zr-6{weAHVmmY#AIC^HB`eSf&cp})I0e*^;OvCK19J;SeUTwY z)^MOXRIHf@pn(?en1e$&yT-t=O=2>;0;kRjj54nS8hQEqzw6K+UhwJ(y*RJPh= zFz>%(tA!rh7RLVsERXtVwFj;8^7L$HBIyE(tm9{F48%=zQ$HOV#j+0EUA;~sEeLlD zHa9`G0zZozYNwxM-Zvd8tYQDfy<#2QPbwCn;O2!7&;vmI24mB5D!7e7puz~_Lyn*o z#F!O}ShNc47acmu_4PHwlM+MVf*u%bvtAJE;;Ya_Pj_lp?AMlhMKSel$X)Po6cBDW z+r<_v;5Dc4WfqnIDCcC4?oh<{KN|37OgQi)t4u=kenLZ?>Ky0704_E5~cy&j`r@~#xo~EAt$Js?r24>P^OlOt*WH1E!L?m z0$4@>^^+hrK!=<ac&nwuvnBl>%E_sf?gCcqXdP?BQJK34mAU&1(u!5*@_v3N{ zDe^Yu(f;38m1kc;H@GA!fGtHMnx_hU;XZzP(BKrjX??&#L$$|2q+t~U+T>7<#8xT5 z{ujmYfOFQw>AMG|T~Hz|3hd3vhI};Fr}>7eE9>zb6wq8xtQ#s4Pr~VnQUcT-J_w*$ zr*MH)2?k1?OWN;==l>pz79|S%tjvSFO$()#YI`35KU*~iU^i7T8y-SW78`uadjucA zW)0RJ2XEA>h#8;?kX$G|?Ip&OJH;LYZY-KxnE;tj6iTuEc(?^5NV5O`?_NMRHimEk z;snVZK*pCm2T5yuqzC|9(m!bz7}#tOuM$aNpL=^N!U2YJ6yi3}kleI0zlGUaZ2Arv;oc$q#=`3 zpf4Pb8$|K{SDT0eJFHP|z!EwRjJ$ZT?FMe z1G|blv;}%TZnT0vhutH>e2^m=IH67M3G*MZ#}mrH`4TJ)Wjzc9y$GK0Zuv*;tVv-) zi%id=!5D&p04_W{6mpp+DB zW0pve1i9h04<~%}c=08hubVyI6P1F zV0dtM6af^qa7fCZ6ktYF>dAz;$q?ovMBj*jeyrh-1J0=s6W9UFcRd@w{xP{ZtRLs5 zj6Zvhu!nVZb_^eYM*hhn5p>E`4K|gDa1;7%V08C7@le#O4hi5(ux9@(X35|cbY$NxKK<+nV@ZXNCh6PZ@83}G-}XB5-@OYy#uy?%Sv$j#`e&Qyjc~s3nxG5JE};5m z_IkJ|;eZS9gtO!zijfaJtAD`{ZeH}BIRJU8N(>p8Egpo)kCRUyTkk%V5l^koEi)vF@IU*>mBHQqkYpp-S?lTb%7!xz8>hT3iGj-Vd;<|y_$NX?(@npfCLBJv!>yWvYPXM_m9WxO$7 zRK76TDgu{MaJ&Q$_H0e)wA)hJJ0TGcRq@LRuqGOlQ@6S)=8PfU-dgd4@peu&Kb})T zfB7C2ZlI)~`04O5n`O|HO&G8)0*M}b#Gs!Y#!&*(9F~h{rH1SAXb-DA zy&&uauu?(x#d^n2`@Z|1`vUa$ zzYl&0{QrIsxPu|cgl|QGmxQ6CA|;+I0E>nUs4hi<^G4r0+}nP1UipLV*d{* z7`5|nMPf~?V-3W+O_vY}@V~3fbMUcGa$xZa&xQxVOz~Zlrb;Fh7*J;l+l9n9`tHVb?G6fD314G9}X(diF9puk6O zjtw>hz!XNMhTLc3lPupb!hvV-e}K}(F;8UW!xv^}*uc1e!)T6}kNgVOwsS=~U!5r2 z;+0=~4(b`QMn7~a%~Y(=TQ`7?KN@-=hu?otNrz+=x6E=0H8>@+rU|hoB{J%p$|VVg zBfBMUoo>CWvF&pm+ke5arWDcA2EuKAC{k@iGKzYOAgCYoY2MX)vGw*ndFNyPNj0#v zqqrbng39Ir?Yk$7-Iq@vO})!#d-1mZU_Rf7jy1&8v_Cd!JWr9ab8++fY=^6Kp+VE} zwc_3jHK6&h0{ck7c855g57&W?jN0~;dSxMoIv-&-OVCIKxHkqgV+S;?-gxE~DK#8n zrIa#u50>f*e+xXbq@5ef>|?haZ7D`}OWNA&?}}x&vk$?eOPeYa^1Askfd7VS+R}qws(r=QNR*wE zp5&s96$Kb=Z>2B%>nVGMJVwh@r;^M{z1=f7ebQmT*91)Gad|@_^zB&kk$ggjChUGI zcc=vMT91XRCyIHvl#3&CF&Y<|55I1^U;=YLo@eIm%q8RO z2ic!!q^x|QPU2r6-su69>A~x-i<9=YI9Wmhud?0}s?3tC9xk=x9FpE09pfxC-bQ`h zf3285tcMci^=7GVve|6{=GZji2#Hc>E5V@qqnaHg3l-(ERXnKaaK>(sQgXrZjyYhv z+lyBz&9{eicW80{Ux96X@7Zyen{osOQR%#0@M`xdmYwNki`QkK^K8*-mQ70A&t$kn_^^Y5a*fur2T>e@a|J>sQo`Z8q9xRWrQX{TTP0dvY~E_u=G zihiI{ESuO&LsdMzfoga!?Un5k4x6sJUdfNKpaf#K7zfFqlj^hniG-}%4_yzE99c4W z|5s9%t(idkQ(A(~jW9^oe1L>oy~@^R9Cz!^)!)xWOC_N%pZi47E(bv{X5&{6T!CuZ zr44YW1Qqsb4L3gy6jwlm((E$uhSyQMJD!}G3i}Y$anT^`_P}Aj4$ifAF8fOQo7acOqVZh z-wZXMRML-S6M7l&KP^T%TQW-$Wli9W83ToKK=9LI6CImF1i`Dk)G;UF;PhPUjmz0n z%~&cI@sJmkEvIpf+Wk?uS&82_a|mrT8woiac!15-TgzI6@vCBrWGvq2J1gPYXi1X~$QsMXb@RynG%=c3z+w&F$ zRjjOt%oFEkD=DV)iE6T?=6aRu=ZwUN(S4nd;NGcLnM2nuH0|QW0P~GY{7d;5?U<_7vCJiEpHURnk+E!ijHTHkR$rjo{IeUw8Wr28K3p3KD&J=%}NE3FGRrq@&(&e>3N85Mf$bWq-CWq!Zu(apv&b9~8S zvuhknZ6&~{PendetlgWP4g!&rispS0fDZ{cN~4b15XY;evp#&&07WEueooxRpB7rg zYX^<-?2PhQ6S`&fESGXfJe(gjx|!0Dv8cxKx}0*lTJIk_9Oir$lFjcb4yt}BpUgG& z1`#HE67xmh$!XTcNkusITC$G3eZDsqt4OwBR{u#cjPw=kIXV9zrIFIBGJ^~Q(tlVE zXumnK1r!971|vLpRv*&~sm*~AZ zjx&(twm3~*a4;hmx{74PJFXYzH@?$jRLTiE6VJx6F=+lZOzKIKJM<&9S5bFGBd{87 zKb8?RC_qJO$pd-DA}@1%wk5eQ$rsp&TDU?jZR-BB4%fRZa_Pl^LAuq>DKywdO8s*C z+xxG*x_smU-(XRXW+&?gms{3<81&p7cSxbJuNkrYLe@i0_Z2-!;K8ZLV79HmvDfk- z^^r{OV=4Lmtd~k->QM1JUp{WPIhb!@VyW%UewMs93Y(_YtB>#AOQ-&w%Jpg0vFd&t z9rIF6|EUVc#G3Er?5>1gPx5(_L>WIV)svXIm4uII`~J!~U3ei`x!f>$PRQxvz$=no zHt-~?P09;1+ddafMR(^#5~4i3q4FJx3PZv5KCIUejBC^Ncg^ zck!I5Qy=5SW7{$R*|KoTR!!D??g|!7)Mc`|;6qlfj)cth0qb_|h3Sf! zB;M6~b%?@Zz>R5RSd1qcFYb6O(p50s(SI_oVC7_~SMJNa7+VAQZ>oOy9zjy&*=3)? z$GJ`1-&NJ>{ZXbA@amVgT(ZG!btOGAscAK{==<}mD|Z( z7&F?~B@bs(w(gV<*eI{VH9G1*=MLjld=6=pF_5lM`fiE1Gq+KlwNy)7pI_G67sT$y z9AG}cow(gQbxMUvo`|hFr+b}V(-)KApqSVkV86*kHU90=My5EIw#c&Y%cS*a5~uO7 zY`>~m#_hOgcj%VlXkgqacklOxw~2D&y2Big1)*r|z@?H+oX^b zvXK<32FmQF_k1v+Jhs`KTEO9;Z^@cp)cPO#^ zqEY`Kzf)Pdv}aqRq-(Ka*E($xmH5eL(tVxDta*MerCvt{V`aC!bt#v|k~wGWghIn& z=Am>REq%Q!Qmib&HO(q}UFCQoSsc0yPa$2U0}?`d{Uqa)u+_@%1VIUf!}>L1-<`Z` zY_i)e{{_BOMm@q_nwKKBlq5Yw*_iEubE#IkD6!bs7ZfxSu)ibGsr{6Y7b0uqs?;1` zyUw>*?z%B&GdtpPNh(NtqDVOXJ%%QNt+NGP$0c5XlX_tAGcosJu}l03r?G)fDw~t( zKr{;}AY8@&=<6+gX8tE-YBg<7Hjb*28P}z-GPfExOEyQSLl9>u;7$oB))9|eRet~QZ z7^)fMT_5aQxkP@L5*P}Ag&IU>FtXYNT66D3B6j-@CFvebJKs30A5sPXt-Kyhh0!D7(8$pY681QW zD#CjQC{v!0%aK?XOpMeDQEGdsto4;@vuCm*+|J}ht6D#*Me7~o#noiD?n^zsjYMJS zGT05A&wiJgtTCkwe{rtHGz`faC|I*J1^)YubRRee`BFXhq#}*83AOkCodYp%1}Wa@ z5DVLc`~4;{wq*OSoug!O+3-){xQH< ziy=)Qd2dU5sl_uq_|kzfB4BHrpW66mUUy-+(L;*N!tEVpRs(6|Cu_8h9xNj%*MM!m zZ|@jP{tS#W7LfD4Y`|g`PWVlz$V>znP$3FBm=ufOiumw1nMYYtb0O;EE?)nZRBr|Y zOJn&qZjQ#d5;JweT#N9`kd;j)7f!)Yv9vX55Mx`@Y#%aEujf|nK2uAB>oH&53x_Sx z!pUl>w|kh&=9R+N?BudJXWIPpJ58hDPt;As55i@#c&t;?h+Nrhw@`BfxTg0D96OYy zwKN5qL9D5-0(zexFzyb<7gH#twkx#UFa8$j$C|^wC=a(2~K#a+oVuTFb-p} zRJb~Gap+!Qa0}J5L9#CT;=>Uz2+s*v3@X*BFXXN^GHtB#5a_KHf=(8DC!P_^;cDxlh{#vt z7ZfK7*{HNr-E-A?dAn&zhb6Hmc>cT0589;7FneC*)0{3z;&z5Q7Ce!l)OV0AHQR|; zHE|F=0q+rCrq}$v=+!pmz*Lz*h_gjiu2GkG%b}WGOYGI3fG<^3+O)%Swf3oHHDA{Z zqPxGG3Qe-|`J3Mw%8acOKKzYvCfvKGmWMx5ulKvR&R{gGd`Tq$%Dz<^X<^%*{qZ-i zDTv+~=#SF~=VHJ+v9?oOLh4k)?&Fe4YrLR&M!CC~jY7Gbw+297mqcn^lG+Vtlmndp zT}It@4|>vSds!X0V*Cuf2*e(7i$s%E;|CYcx^THrX7@-F z-I2d3b~uEzn7XQ^2AtXRrnGHtk0Lp_#ig+s?o-r~E=c7?+_#sTQ)rFoBi&WbazFSQ z^H_C#Hfuvi8&zc_Pehn5twVP!thVOb6Y=*uTQbAuUU0DIWbpjBI_($(^hw;&{xmOVH< zR=WEmv-#Az$5&33O*cKE+VRaW z;w@zoT~&qDR@BNKgB}I4TjE-qOpld9nV=JNgOJ~GI{?YCR~N(?%wm$N@|Tev*E^)D3oHU&Q#b|svtuAQK!xI+naLjTL1sEcY#mNn!Dp=Kz*pU(;f;X zn<9rcYoIlJpz$vtex6j+56NPDaj;&CnU=&Cp%ZKlNLe%_uVnUY~S8 z16zpE2Tev>@l`J6+2gj+FK*?7nzWInOpif{Y@|M2S$q~D+_8KXMehxhtor>TixQh} zDzxBCi%aDM5A{n`d6TbTdv8Ui_uBGk1~sk39TygfPY8>W!ZhF{a>>c85{B>@)=W#X ze5D3MD(IZStNnm0)Q-9MQqoT#>yG(el`}Sb?_EvFND@S-&Wv?qAnhk-@yp*DhO^rd ztIQZD^v88&Ep(Mq-zcdXl^0I9&2B`oOe4uCEgN4={`p;=_fg&3?70h)vYoYB&}|kU z{@2Rycyz)zaioW6t!^&a?&>EqO()fkXXK~G%D>rguB5y-Vob6F3TQEfi#FHW5S!9- zTby_#4w6>22jpFB4bKcIdpv4iN*)@_(Whrxh6Z3SwcNkIb|#>0@tj3qG;E5(J_`a) zq8ldlllhGn=l*o(Kj|<^(=2jUiZZs}M?P0$T>RcfyDxG$MKA}!V(%3=hrBd{}HFQ7)}=@}Dzrtl18(X0;gcP3@91ExoR< zjefg7@~OdKd;AR>7B-5aYgigqt-F{~yM@{A%*11WPnxO5g}emj;X-tEmHNs;emd4+ zrg};)WP^6cIWQGv)LZ)dfsiSY#bE5-Tnil6 z+51deg`l>E=D|E#-Hn3)Ii76zZzJ?#I`xi%>hx>vQsOi-NApb%=`m(SG@!tBK=PZ@ zF#d;&ZLBO+GnVNTzX7f|L8HRyV{7`6afY&1V!cUTBc7%BfMhQ zDGbB$v*Ov1o4%Nf8U^08*Zi4SHYQ{m2ME!6$sM!D?Pv01GMyf6RAmE|`{RBw{gLsq z_Q>qx?*xqM+6(o}*b9^JKpDbcJ`RD=q7%nxMG%EpngGFM?UJ|Zj4D+z3#Sx>(YcUt zGHQGqetR}0S3D0@%lJPh<4ds?oJcx~{T)c&s3svVBk)%|5}3WG6eA(ExV&w&j4JlT zeGvu!H0}f^8jTN(Y2RF%sI1#?!$;>yiiV)hF0;@n?Lw$Ij=_AhLya$hPMDpuFTN3Q;g z{n>;iXcbopn{i}!g2q(Sau|bH@${}c^X#vwcq2)br%SJv+8^I|De9IW6gnQ?)woPa zVi{2F*P2uo@ENQQ$m@sOOQVeivfN;i8)ft1PcfH~+Iep1jvaW=Nk&gR!&mw9`$4yq zIe?zcugy-w%FYnC?nn!D!^A;GC7iK`#lF!AB0f3P_ihg6QAG1=<`Xix;bAxUKV1J~{_EZsK#1yONH zv!W2CF23c|rRr@lzP!xkV>4|QFKJ6Q$JcC`ps!Zy;TMM|URpr|DQ8+CjQE9XfsQz} z$BWvF5>~NXIzeA83=*FoEKnuAGMz(Z)GKL0{&$QXUvgV&$f0Rm%Uxy6AX&Y|D%mov z8tDg_l0b10rMN;VNYAY+DMj*P@;!VmDF3TEAcUB|fO;b~VyFG(AYm zG{uJ%{j0*way7f7>SZdZ9Jsl8Fi8GywW`Ktl8=iQhyS|%>GJe`Izt`{scjm)8Jbg< z-F4&Jt;bboR(XAXBqAJfIebEPSj zCwgl9S4=MCoaKj~b9p}5Gs>cGF%N+x`NdoEmF-;p z&G$LAH?;M0-zt@7cKK(*6D$Vha&SGopdo*iJy|?W9~mQ3Q4Cfqm73*vvHrnMcaeNR zW{7WW&{sV0iZd8#l^8GyF4I$c6L4c z;h1^h-^L~@fy2YxTI5|dpY&YBdq&NF2*nX_WL3yY#*XJ?4>8igbI8X zrp+nmswMR2YI`187IXNM>76{=JCZVG$rtlkN)2{x1^vR`RB|x%hy^jylo(cWPYV!q zCxwEPWaZjQ7-R##0yf=j#IQen#f;$)t47e1wjw^S-ypnf;i|Gc{j^G%kdls91*99KL8QAIzS;P`=bZ05zdzx-F7(pPe%7;M*37I~Gxxm_tY^qgwY*5C zT*DcG2db2vX#dk)Q~Q81iFpoQM4GWGM@qsy-N~ zdTrwVp*@)&=D`BD{!Ax#4rGeoRcM$jRTTFU;)6w@*FBNOzbzS9vmY}lqLw!z2A2># z90fv>`zI{rq=Kf{cQ=`s0z~|8+)?F6-o}yl=|Nm5m=t07&glGiL(h!vf|zb$in?sA zpi0rKp|Nk}H2`|+PWyQxdHrt>oMm15)8@{{c!C6O>CFyZRj!wy?UkrA%nU&Fas)T4 z0Vht3s<{*2A&EVg1#Sk_p$F1x#?Lh!j*H|+D}O&n#+m7Ez`gN{Jt2RMrr#V$pORI1 z)UP@aRCzkRxkjBl+hp&@itOiPibvk9Vn-ObR5C;ADFKgMU`Kzv{l4enQhFsse&pAh z*P>sA6**XFr^jbL33Mh=_iKB1Ugs|LWa>QF6_1)~Jv|dNlExF$(R~M#!K#Ki^EJP! zhH=ZLPIV{w(&vP*7`kVWS&@YGrW1%_2Yzk6RrTjE83~eEqET({=;PovJZvPqZ#o~jId+O@%&j?aJYy7QqsV44fhY0ND zFRw|PihExFlWrfAW&&ZtfjT8BJA92wGuA=NA;}-=yrIeuG`4D8F1+NDszi`cAzuM2 zR6ZFeK{F6Oy~nLhkWVga?h8tAMGB_a|Gb>=g|caUUr!kWWQ2Y4+Eq3 zG-6QeLRYYlrbDch_qbt>H6ZbLW(ix>rqk%Ne~UHTYVNwUIKO$J{-Yo&X2B0T+w%E` zOogPc6X*)Fs{CSZtG!fWMOu8a!Fh+oF4O8Q`kGyBX^Ki8gBuf<(v8}Fxmz=ASTU*B zqhsGI;2-JUOqLa>_{f)PJSx#B$rDp;b38tf3T&<=*A1QtdN!GrP;xEik+~c zKM7t8k$uCaVA%Y-yS$%9;zCkT-9ILUgu!>W)ZQE|lPZ_?TK*mj@1bSZ7Oabh9DK&# zt4-AA zHIq!E$!8a_gIdgXU-N9w=Cu{x>By87=`>`%kP5`aZTE}&MIJe-&dem>p+~R(EJY+q z{P6z2i3u~5@2q>)gU?ICSdqr~gk583{Tw!q0g9D}&Un`={CuK73*7K2@JJ`kkXgS*0&&94d7(32T|! z=QV>ol3B&HzY_QU17gJ8kXf86;yz~t$CTx~&&TRNp5P=?t7;XcD&<bUEw|NJ}U6 zUA&`ifn#T4k@B2cjgUF|>vfJSCsnU?(gl4V2O1WFtI-|BCgIun1rkLTR4;QVI`avw-n)F7pCg-OUv9s`o9 z&@+oeqEwJ~>7*2f{6(cAQKr21Mg--STB^z&EtNM(c0wm?7+F#Tr5ehMWN|i58t#^t zyDAqU<^%>}#U8VytB|nVou`t#6X+&gYdx2ltxZiOHh#USxM@;_m-+VnnKXO8^pQ{2 zI@MNtwf+{6+re&pme6@z^pab$L7C-2Or>>F;84mZJ^KC+Jil1&->PZwSO)l)mdP{X zJqU#$?6-I(yqzbSOn=R@_hsQdxK=UvCP-Oh6jOgu zD*i^tAfIcJwr#C^@1Z_!Tf48r8h_!P)E^&yx0;9GtC-TFiYSwpECU#SK$AHn{!AF; zlwsS&@~27XGlgQko>6O1Cvak+-T0tP(k)=|f<5J&q*uwGRW6W}R)8t&hurSH;;7HR z^VIyUQIehh5I!xR)z9QVz7YNpk)5EDqA31);KA&B02rRq?1LK@F?akfrFo0g z)hetHTuoOg^1{BjL)Jvswc>eXCC;iN8l*7FJPgH}G3+ZMC6;#KCT??~j2_xvolShV zvNSw-+m~y$3~fude?&Bp9!`kTs#b%HVpUhV`L=InjBb_mwa&VyE7RlUr!O_muA#+;URPIZc2$V|CW==P&y1}3jpHC&S`~9S1 zK>dmPk7nnG+GUH6KO7kPMHpN?8t=P)V4-P{`tl?=J05!!&0kG2h`2BKP)k9k(n_S zQQ}fdd28%4w$b<{D4!rXtFKe@V6m-y3?(@eQV=o+1Dh3RP)DLEFGsUha}cqWE_XQ^ z$bO>Jj48XLftLX31DTo*>oA@VaKCgCQ;zy0pNePi9#slsUWl$>;F8(L{+@O<%cXCb z#Jk7A7aX{9MH;=KsvYwTe*BxT|@^KaljA*t^GYm~zIQ zf4sjr$4KyEa~P4QRvM%=1SQDhL58X7LZveg_?g{nUz9CwyhdhmCX7&snWu%{CKp@V zdF;aAn=>N$Q{rS?!Jk2!+hzNaEYTjXcAc%N?xuv)0wY!;LwUU7R>|Opf`L(+5J*mB zPTnIC{LGsq=;jM2)#2hy|wpQmBg_Gto+hb7UgNcj7sqDKj6qpYVXZSz1v{JdS`F-S)wUUa=PHihs zP;6Qb_BYI11alyg@`gLwxs7r1y_S4sF?EY9=|GW>CK%md^5p8Q6SpH+_HS=DwD z)L)ot?z1-zU5(6U`D~z{+KC#4O?++MalhoOi0c39>{MOLz}!J`nadPr@M{`5C9Tl` zsX42oOkwKZA5C0ti42gDE?pBF(6aUe8fFE+%3d>+$Xi0DSohL1X2RuS&Z{O;Kh-Jn3K!rW(ROz9TD8-j_o?b2t(CQO)PrkB52Ph# zzDdd(+O=Kmwu)A}A0@ita18x$)AJFlJsnhNX%F0ZN_iZuXXgL0X-A4Og@4(Ie}9`5 zha=<1v%{bsv_5`38YwyYe!UX4o5r^wx!Udueu>{bl;;F#6zD^Z#5<+C%?%f=&2ElT z%o{Gx;vdwT<-CtwS+?uCp&^SO3lU{#YEQ-%q1fWF{dGeLc zuj8cjjbIJ`arshA!y)nGzRvk_&g@*%aN$&DcK6G+H(|lFTO;WbJoBk#vE3MRTd!z7 zvYHfJ5Ia4$%*PTLsFNgtzgln#Vllq-rr@59V&Q8$2gfkY1OVtw<9}J!iht9t9nH{) z&x|LW;ufb4am_G?FG${oU%|nM^M^y%2lVeBvY^-*1zWsB{MBSfF`IGOMODI8i8ERK z{OK9vLIn}ihmeA)fhYVc2u}N*Tnzv8XDQsCZyPd?9$o0VCx*T9iz6cY&-c4%B2bWJ z^t2D>xQ;^1rwhUE`^c+R_qb=AP*;qxxm!c@nyXCUBR=w15^&3FKytUN6lVXmDQMm9&1n z#uJVUC|JVK9tFnziXmA@5#?m4=#CpMdQ8f1CA>TPE4eC)(FOf~Y!9%Yt3!Xi!bv%( zay@6``Qj33q#hJkZ2a9zv&4%O7ZYYE3_6H;{hI8JzYbaGhhL=^4~qXu?&&MF-44nu zgT$m>PL5HVi)DjW6SsS~*WsCPc!p3?+#>-}W=cimA`wRYU+yIFl?-uMkI6gtMl5XMF8)Ok?6)su<| z%&JhQ|F+2`2gkO*S+#R+y?q0H3BFZ;(RKI%?J6Hs2t=3~|6A{qEIJJdp}U)tqygYx zihnRzMHxsI#pHj?==C=y^gRS8{ry!G*4y&QnP(oBQdcu@5*HAT2j^EO>UTWxeuTQ4TJqtQVPK+KKE zC*S>BXU>YO)%Z?_R)9^$BnHL&fAqx>X~HKFpV*)*q8ROtk0uh#9Jm|rX>yxfMe)I; zLzW)-{FBB5^%dl}@;5E2mem6Yd&#k52fslRgD#q1o)0V^QsF_JOcUC`iKzD8rmYo$ z6)z6eC-;~)DLL=4nX9itRfkZW;D8FUvDt=)@E~FAHVr{36i7MA1G}N;ELzlvJo8cj zKT}8rf?9~29A!>DK)u(~3;|`G)6S@EW^_{{e;;odnSGaYEXy2(Pk3=n1X&%gf( zk^Z#-fo%OzvZscx^;)5Zgr4hj`Sgn0mk-ySjd`B?c$#A7h(oXct-%^I_04fs5BJ9P zR;KU*0zsj-h*#gmSZT2->KVH3zPDgJ_;wd9i00Lr`VC#P~$h==T9VBu>~> zMe8Xa6vy2?Zq}}o@MrirU}?i05`1s&=PV=b!s$hR1c4f|n)+>2BjlkhW$eDr#{X2P zBl5?n22dJ1zDgCh;fQgX1q2pn&xo((X5OX zQkEj242e}dm`s}7e6(zLzLU5b3QgbdBPRakQ1qIn4ij*5c0Sss=6BpAceL~!6>fY5 z3K0Ejbe_*;?bNOis)WI@TmR@S)|!U}0(TaC&8yCps} zfGQO4P7#b{#3AJ=JADYqY$luIH_N^MS6c+XMc9I$OT_mZM{e;$wcIO&7t}#3LTMy> zd1oSbBmlY;en7$`{QYU<@dB7Pg#wLowhu_wEg*I``VAv5#fxh&WNvjL{l z6xlFhlMJSQl67b?_cj_5q{V8sxm;r#0QQiuQu)UIm$YrjFCGd{Lj?a002$_U0m_p^ zu3~0Myx&v!RX4J#d}KCo@>+N!k{$`*qg71_d2Hzb>aE>`h!R;IcKPFuh~slA>9i4j zf0Iz8&gB3dh}KY8phj%@olgk!3avcg6#_AY=O8Ko!lVMM*$Y|G2N=krG5Bf3K?MLA zNGBD@=K|8LXMhh;{{MZyS0mI#(*f_Gy(t&S)?9bWlm*!XWQ{BQcvj$5I&M+wcf|Et z92~u~1R%`Sd0|TU#R8PG?bSXO54}m*Q(^h?H?`=#77*_mee=cKFCoYNxA(O%$l_Wg z=CS=rx8|$!!&#gfr!c6WoOS}i;kSI&OXY(BRK$M2{Jr@J1l?Vop^Cch;nTnH2T*HA z9mcH90}QYM6Xz{~1fgUCu11_VcrZJBp&-Bu!Sv30SG)j>2QMLdc=lfjABw!|^q*{) zVysTqC7hm~+U~`MO64nN1{Bu*1a8A=Lgf2(NqVt-Fw0B^0}el#^A`dHT^V3v@T-Ua zMNPPZR_iAOWW`3KWyaOVn%Q!s8UT&)e>Wf&N|pp@FPZs60OrvTWVUoB13A^(R8ZV{ zIgO52z2YA0wslMLyJ6pDh)U#yW}Hi?ka!{2!IbTJ3X2g0QGWqoh2QFOk|f>9UyXXd zg;V~-y4-wMP|0Lo0ep^fjlKE%V@|hYTa%HJOi}$XDBf&zLslMc{7RksN9+@zAO1KI zAhoJZK>`_s+!4Oik4SlafeIDZ*|7O}1{z@d^#Vc}34>1uZ0&WV&*Ro|iwOrE70Od+ zzD_O#>0;G@pjv0qwf!qbo-GK+?LU}J!NkM4=THeBt(YyhrmTJk3E02_TVb7D$H@HA z?Kmzg$}J~@E-nF^GJuEdx+6}A+yuxD+(Sho{{ajd*I)`@sVSuhxFs?KijX4`(YpS1 zqAL6^ov2p2|G_P}(zRA51-MLJVGsV3cO}`MiQKG7fajiDLI(?asi=}gJ^_$Vna72D zc6B7Z5)F^TdZLoP0tT9Qq|jwi(FwtC{C_WwKRn|5Tii(~?MUYZ047Q>#|8WAfcp2N z-F^g^z~8%{9~tH!#jw1T1Ja0!zZ>UrIJT$&1o;VY)zsIrCJpI{d61~XgFFaKR0`xx zmgDxn`uwTqfkze)ZLg02nJ_(2T9EycvBms%Zvd1D0KYb!cc)8+Dgr>F&xSz`A=*t) z3?`4{b9uln0QL(Ga;JOw1TguH9&{DRQ7v0%A}V7IO*5%K#UrN#bf`< zy%w_kG_%r*)d$3b=%#WWoD-3tsaN5@&eNa%)5gqDR-pj{nQzAkxUVau%T&6{#v@kfI16vIdX9A@}i;hSC@$ppG*+{1=Br`QLFjX2QnP z4j|@a{m%l4X!(yW_pMW?(T{h}m{X4h6|p_U;dk2WF{qyOJ$#JUmzVs0*MQ(?(!*J% z=|mqf^T}~LlSz6uq06o^Akg8tQmIrl3A15=P|~-~T%~vJLlLXZ=6uD8=qzg{oS~CH zk_RQ_=lJ%JO=Ou7$?e!bzY9$`M<)qIpXRlza9O*`gmQNpO*NSdSC!M%R9z4T;6L`m zSlJsO()L{!I8(EFoS5aC z@YZRsE6SXd_OBU+V`ekSvF^_{m5Jr3rp~0PmA!Z`WJ-vK--;MlpHD|j@HyjA`38&e zUguSNKBGgwxKdH%Y@y5f{8&tVY2#MNPVK=e$KbuW!$Wn`XCsM&xi6Vo8q}^cuYG!H z$a~KJb;yrH-$gM!&AfnmBv|u{A(8ATSBC{z-3Kb-Zpx{fL*3nFYArqb4SmP6GHd$1 zNnbNgETr*x->siYH{(a<_Z;!kMt|xb?nxEt_rk3(i>aU$liBZnYAIM>tgdP)0_mVe z!ozFMrGc)`r}(l0M6Ex(mFH`E5}TfioyD!yw6$_Q=X*!>t=Ll4PsSg*2GTNS8@|Wf zPn6Xi<~pzG)*wb7Oc0|kwdwZ@uPAI}m-WY4-&h(%-oCYjU*bzV)(g zGFoLRC1rM5YG56(eItIk%oT63mS3b6l|QJlN{}ne5SHN?>rCg|I#{O4RU~VfLd3t8 z*sb91G~Q?`aIP`ooEkz|pp*7Bvfp_j^O?VzZoM>9RU&(iWr3A*iOF`{8wuMD+xW8AIa%6W=n|Yq2{$rXX5Mb4w%7C@>&hTE zS!3V23!Rl&i`u8MM8)W?34zxI((h+)1=s4927NRXOQ~}bx$F0*Y{Xs?FTgNYNObwR zRfd0Jtl_jHO&7-SXE6d3osz)sDj#1<*sdORx=hG%f%jk0v!AF|>(+rT|O-DAC8 zz+*W1`>lB3erJZm{V9!t{_vl4xM_-OBAq1kl-oq**BPx=^^|#BR1}JIa5loVZEV9Q zSM7K7<>Gy|v*_P&hL(#ZoBy(L4=jK1e)fhTos{uy1MAB1V$+kQjsjhzxRdL4j4P85 zhn^{itm01C8!%0{yMEV*zSYA1aI`tst-t-&sc6w;gYBVv%r#2iV4=N0F3xnrfr}4l zkMR_awAh&v@Qn_e#ti+|IHgkPu}P)&_*s;s zG`P9C3kR1ZhJ5Bbv>JTb`#lXun*lMa7n<79{pV}~gg-fTjo-KLNCy{W4{>RAX`m!0 zb;m!fKuSIzmgApgof@ZInwU1#4-r44aye^$w&WWWPS%ivb{ly3>j%9i>PiSxmL$u4 zw7DVc5l|qwSmc^rlRu`67HYOw3 zpELI2ft^=FA~xJL9&af?TDHB-)UCf>1`V>D-&nG|0)lpuD6PJeWCMZ@|X!7fj8VRT1eE_V;_>HckvsCEzQ#>U!gU{4ZMf_t4n` z(VKNrB!Pwf>MvoSN6LT#uz+1ahYBw$V%b}tNU)yop#QDCwwztfm2;y z{CKz#%E3fOk^SrZeUX|5wn%00o~xbr@;do|W+ltx;w%=R)Zef^e@g>!AGF#7*r@Bs zj+`~$WV(d{1E9D87q4MTz8YRlSRC(0K09v4*m6pq4^{M28Kl z;3CorDT7b`kHr!N$HoW)=ZXc+-?Ou4Bb@o^3_9X=kM~SvHGqW17!Yvdxmb~l4PeVx zG2)Q;icZa_OplR0xQ%lw9v%6pj`TEOCq_|-qRVciiwDvWKM263@}rr&314CjLLI$= zIuB4JCdtJ@GN=?^fS@{nfQ1Z+7#5b;Oe^{q&G7R4ffn2dcv`t|yor}a;`0KP+u)Cn z;QX1ygbVQsHU0~;p)+;k>u$@N(1T*J5E69($Ar#y$q#tq2H!2+uC52wWx?ZL7wS~EHKtjbZQ_WGVodTjQ zJ&4Wj@*rcj3vg#@|P>z_-a$Dmlg$sUCaP(dUv9^TU{rf?q#vkW4QO~0+X?#E?Pa z-XJ}59n@N)r&d5rM9wjvZEvPt58}K`2NXKD;9#$Cn*a~xVJ9}`awRQBTr5%&TbgGz z?C09<=1UeuF{*_)#ze-B=aCZABKJ|9(~bnGETA7oU=yg%59>YQx;(EaU!vcVBlJ+s zIzodFaF zSklJf9HUREO{q;wETz;-1|s#CwgnruQmVS2EQ~LVAAGdwPCV*Znd4?1?$BC@Kd@^^ zUUD&*C-TnGEmIDvKhT0K0BFk^`{V z#V4It3c}J0bn2=fYJPo~@zC)#NLRB>`ak+Sg2sIhj6oSV7VEa7DYO{n-a|n{2$2V( zqV+!fR!oeFQ7Pg{z8qk&DM^KX^B#Gqz_HQKf9Bg`pYN|vFt(7nm_4nk`s7s#w|?_! zdjQlXoeQ{*<*U0lg$ImZFH2s5)}AVRlE>%V8>8~{OhCg(vseyhvy>?_6x(Ak2K+tq z(0T~yZFli*_>xT9|jf(17qT!@v&bWt9TCB zqPUN56voH`cco*4(IcOb<7`@Ww>6o~j3P&fL`l63pWT6BeKgOL) zl5qRBFdOZ_k=*5|U%+C%;Q1^HP}K0~#H6V4uX+d;N`a~q5y|oEgnjryhv^KMHZ$c@ zMXZO^4GAxptN!54veMFHq;cnVJ5V9J@Cy?mwx&!6%@koE#OnH#jQhF(!BPrP4Mjt; zAo8@mUb7($G9c16Q3fKcWZ8p{d_T%va{p}yXh8>`+~2wCfbZ8nu&Mk+QaAQ(L$I^$ z_H#Z8K&p-b2GySs_MMA;=^hRgM^b*@}Uj?Wd2z)`FAZ{XBto3V?fqydQRudYo>RKp3u9!B&YKS4@ez^WbtJ zelQ9lg4F`n5JHW?sAvkD`_e=e#=!8F*6q7oP%-imVr~>j^huzf(aVmnJfQY(mTmI8 zd?P&`xqlW$e39@K6iE>1kS~hH54`}>y=|vkkmWYbvoP={+JQ90)T}VI1h4}Y^OWi7 z6*K%vcx<0?Sd1I-ahj-2)jI72$(vR%kLaWI>IEv^zuX`8*Gh=RVjrqgLVPO%JR5@w zdaJbt0BX)v&W$Ku1F+^yFuPW3EfSzn1_4gy517I*;JzU&GV5Is`koDe zVi}0R>qEShA|EIUsDRrIi{1t%eL3pk9wkx^GBjQRey#Q1pju#kzoy6G;LckH8gC

5%02J^iy`Kh6gO1|vE*6O zbinDoIU;Ad2AnLqPd_SQp($kR0 zN7H(va`u+-RDIn5;LqQy6ZVI!pQVd;g6G!3cQPZMmfb#SgxS!W= z)j~=OmkTBFg&V769Aw=&r20%4|7wtqIf_Sc8h1A z*2(5*qv4`t@Fx;#7sw5RdhN=AneQvt2qAJN8zoVl$#hB$g3U(DTDeTla9?!i&kvLt zD>==)Bj{+#aK{siY?IQ*aMS^Xdc?fjg+jvTC=V({|K_7IFklbawcfj>aNA_v6)DH_ zHj1Ge!MtoX8utI3!M9E^o3C2H0B%IgN@0y0 zpuFDuCWgSCd4+;}_=;B?3?Yv1vlwVY*v3j|ox@@`PCdxd(i9npAtjb*;9D?VZkpv6tqNWh{sT-^Z(e|pii(sD{0tLUH9@_0E3gfA#!UP z{L*G#;ZBE;`n@5|KG1fEIG~bv3b+HGpG;DjC_M5(8kK2QuW9nUjhP{Zwe;ZcVb6$k zrXrrnE{imtfU4OzppZl$dKB#J8mW?aY!}HvSd$+YU6k4^$QeUvRJl$&Mp|B@?`1=h z$lE9>5WLT#DS)FoP@p-iF?8T|-(|IU$hyP;Bds+egAA=t6s!u_+2Toh<%}P(-gu=| zj8F2bu`**S=aM7=w`#bx7}LmRMe9Y}!oRd&t`ELISesrsC+rlIR)0ZTH#KXW37%Z7s*>Vd^hsdz z`B6$dcKHVKX%iIEMN7B_EP=BQ_tuZw!B^tJEtVX8hOqOEmd4#gN>r(~`AQSiPINk?bF1SCu6<3u(ys;EM?zShzex3}v(0zU%HlEwS2nhqSeNQTQtK^H^J1E`oBK|QuHoj;W zu(HXH_ND1tqJz0;(V2@w{H%st7)?2?4j&uVg!pAE z!hogC>#>jie9|9Ye^mV2p8XuVFJ+tB7 zO=akZQf{XM#ihwghnJGNMC*!>uRymxj}>Q!f&~^>HoJ8f*?uD~yp`&+a)75HQaILl zh?1pYzzpR_%*e&@vPI5?Zjtpf3_We=mL2c@ZaB=hWKw79rAg$X1b^rMU;N9(XI-lP z$Z8mHp1^JzQ=ZApO5y3*0Lb~qZ%E$GHU}46z5hhdpS}AgDux)rv=-5XA3MgOl&4K@ ziZkzyF15cO;YSJ^IEcOJdT;)X5=842=Ax9!cly);%Gc;9E2phhQG)1b2!BaW8)H7{ zr3!3eww&jCf)DmG2n7ZOD#BHiYjm1xRenPIhZ3*0PDh%HnUfuMSq~epg2r4Jefkv54O7X>@>A1`G~ z)_9;9qg46A1bP5NF!mPN+tQnur@F{Zh=Z8afT zOaWXWa$eLKFXit!$xlBbo=SRc+T=Q2i^@+~`ZWf-CigZ5PDY}Pl3_}l<+eToyTQ#r z|5NU1TD z$ymC#+I-^P8*VwE3~NT^iBsriX8KI^XILxH`y(3yI@}eg?-r)fJ$A`ob7;$mCxcm9rys>H656baLJ$=ZjZt_C z*`s+E1z9t9hGf+2@5v&t-DBPvicIWDJ~c%=bwK%MRDJNpBZTEptBp1Fe90l?4hkm* zLSmu^tJU1c(mThA7?*;r%0pl_oGY-nhG_5` z?%4Stk0%Z6Yd%JsA0qQpTTlB_LAyLvCO^d2c5jtc4qdZRgc)cX>`7e3tS3f6^S11T z>0fKO{_>hap`Y_fX!H zNCay$AH+Kq&yMVmd6oK(F7)-S1Q>xJQwCGn6><}Sei4N1T*@7!`TU59HkI67%Jk8= zSRnfvtcEaKwu|?M1dE`TP^==bpvjH@K2VNM=3-uqV zi-23m>?W?G1%62uksgs03fKz|xAi6}8h_{FVIx-5bC?6?nj*}!$#p@EP)M&p z+WoRw*C8|SoNk`P@8XTRU{gcwUc9iuOnXiK5NT*~5nzWzK5qOh1bbHFy$X-Xft^bl zu&cN5&e>^py$Tu=0r8_?^3=w6@Hm0}4O!3~GM?ln3Xax(Am1*f)9~1?9y|8 zjn@A-J%w8ut(F1orb#!cIj8Wz*qP)Cu25q-^7ZzK-SM)BJ@xlCl^BlCV10^jVVN=u zv)%L+N2;y6NFz_TLplPTH&6>ZgU6(x>B#D?VOfOTWSk|^x1a=5i~HbCr93?TO)gam zsnii-OgEl)fvYfL&PIfSRtrwao-@vI`Wc#n$1bbn2TwIH#5!wL>1!}wH5W$ws2GkA zM_TR;*-9$=-J>v@(?;LKGRC*okLp_ECG?KVdU#Kggweq z$d^Oi=jm`a^I!q<`5nq8&=f2W?iWC4D-`MKj9LkpatIF?l6KAfCua{oSE2PjLg0gA z<)sXc2^>Bh^wgkZ+QR$r7%em{x6JbrX4*-&9(kbghh>!rlb;rWUSRRQqXCmi4Tn}b zK5kLOo?b<-5y%g?O#>_Zi=q3s#m;)WNvKhEkn&Q#5b`yFe0l9WeGgVu0iKuS?K147 z@jw3jom&fyj|Qgo#rt>cvE)SPm*zC=2GXyIItp2Z&X+R&Jcq|JplK=Xg%uiq9?K3+ zq?d~evxk9qLkqk%yVu!056}=_U=8u#(Heqxw8}vg48a`m!<_4`pY^(X=LjBuOc!g+ z2YD&lAMdrlEmk+-QAUP>#)lLtqn4x3BWMaKc3kfRJzJm|vXw7i=JCY73%x-4{)!;2 zvhSWug=<5{?5~a7heH*3TRgcdt}xRJ!AD^2g~(x9iG)SGuwd$#SMS8b87BrhowoT0 z;*60K66qj!;Mge?9effTeU8|Z6ts!LaD@a7g zw-(;wZpJ|~x}^9+THZ$YFanXn!2n&%m7nJ=0Z+2Cko*K5i-e|W?UIV%soLWF`qyJI zFZG-Hu342Y{n8DM=5yMSnshy)_Bi)eOj9Q0t}@!H+}BSZz*wECkgQMGd$*_GBpnq~ z+9fcoVbEL$Z2{gdQ#-7=!3*DRQi0QyXQz73j#i2>?Xis13yE$9ipyyczfc7)N>uQf zb&Snz?K>G1i@zBUJz-sk5L$JO#}iXh2sos&X$m^^X>L#RPn5VSA7PQO7!I4$5xg8y zAbM&0aq)>#7pB>0rB0!l!i0%XzvmbSeAC$Y#pXnE1d116i@;WUlzCQ>Pu0m+K6U|1Qii1CyfZUn9XEIQ()Os z)zidG3#ZXxpzwFjt%&Fu+9cPgUR~;28#*yJ6*!%G-D`+ASeu}#xj0rD`g};S>ALi- zaeE4PtiobgoXpYonHcxAF4zT0dv+!ENS!s11Zu#@R;q^lcvtr<L*!yZas_{Z(Lk5TaGLTel*>#RTo+$u^=-eVWh z@i?*qFm4};lZm(O{C%YxJz-Dq6Jgi#tgPs*Atfr8R~>yR3&IjfMh{`TSBTvQtMpI=&4 zpO-;YJjS*INgT8+=rwAiR6I%yfa{U6!9t5xJTN*6_1g2Ns^!Ur@5z^!)^}uQG1Gc$ zMhk>@At^d~>gxsbsDRu3d8-!vM98d>bu}$pf5~Zi`|M)ZT3=dfeY}-8<8A-^+V)1O zsA9cYZbph^h53c3{rP!z)I+O)L~HBRFdtb}1i$U#m(NpQq@?c%s^3V8I^7xfx_a@G zCBdE56)sgtt#7*PAdUM0(zrMHYNo7`Ln^gS)t}Rn=1&p^x)#-Y0#!%*hm#&zGkV)U zD)iR!?(0u1XO`HUsHVre9$`IgVi?S{9*pn%V$5jV^9Qf=*jHe0Xr`McB(hu3U6QTL z_yt>bWFS7L1y%k0@uanI>5zeyR?5oo!$=2I3fJ$W(Ir;jiA=B1x)m)iMR&7#JDy1p z$fP&csFps-ZA#FUXsflX$HQKjj}r6{uKF@}w=dI0kp5+Hp4BsTx6noJDptL&;S|Ds zXfU?HVdF`Cmi$a4_JK6YiA+%?$6LO^j-t>b?hFsLk zn-5OwZ4!zLwcasUcLuv;XuTJMMzH2`37)L; z5OJFL-(j|WGu@RTx_vyB(w1;L>nEp8>^)%xO(dIfu0lb?K%t$V)=^n{wGFBqn?L#Zy^uXIIEA`cK3>c1CAr@EW7PLY}bGHo?@-p9@nlM_6hW+ z%f(`AOJ1Wvmfgw9pTb678TQ2{)+Nk01O1{`U$0AXyB^AC)hOqys~zLGOLXOsojf=j zF1)z5OiCF!>85hd>&n&doS@yObByDp$~NA_O3L@;C0T7n&9j7U;f~I|ytGMFw`Jow z>R$4^$>qZHkFrE}T=s}*)TVlP`(Ng;R;4=U_l{#_KY$3(}?up|n9aU*N~g z$4)W0>`R=b21 zJ!H|sJ$3eK;G{v&VXPoTL$&q=F5%9&P}ORL1abg^%$kg`WgAot<|q8~_Vd!%ov{@r z4~l|Lem;!o@Lts1kP9C>=4bX;o77#??GiiLAmDE>sdRIFkC|iQX|g}coucHg_SKy>kh}2L`R>PoDM6lB%)L3&wuOm%GVQQ zKJLiD|ItmyTRpmn6 z@K_47KX=+@(!`jAuT34h>UGaNofR!^5=VQ3B^;aJ{LJ3MxaJ#aqtDeiiVP;a^Mm^} zKUr30HYD2U^fwmt45C&r-;YpfCuqmnSwIVUt@o4Bc{y?Tg^uei)=JzrLvNp)$*zr3GN&gy7fz6Pp&oy3BD>gm<25qTDM-OOXDxG!ODrI0KF*cTFOwH zw5T~^$W^F|uxelZc3-t^R~>6^ww1MDOvjEFJDfPi?TuM_`%L6yvqr(XO7o8YJP)>F zh5ZJ3Y?%?corwy&1^eIv#h(4Ex;oVqQg+%p@1ymVP_fgV$|rAEOupk!h!vH^M}Ez8 z2+hw_kF2)t8Zndx!f3r-ocy3MZArczgP%<0$9&77Q*~4-`B9t>I7b7TE{rK|?-qAH z%|`**a#h6!cHd_!?OYrCX{(q{K2f4{6L2}| zN#(oF8Cg^p=LDgZ;z02E>C}d`?g@I@)x9{SP+oMIVV$cT;ot=SN_Lio;@+0WIT^8~A^RFgRWp~E_#^+A}S*wyNZ-Sx_a zxh2Vs|EIk(kB9ne<9-pMvLuA;Nm;WcvZQPgvW{J4CrifIhomeaBxKK)7>s>vgOYuj zMwYS-*@=zWYA@WkawHvjCUEYXm| zDdew3C7bh;MJr(K@sT15ZyY5T)x-6=-lgoWf$7-gvOTIuuaxbTWKL_2vVN0lQf9P~ z_R}akNMQKH-AUAJ@_@q$xb?hq76uZc?Yn0mQH2V5(Q-2AmK8g@zthf#*0r?l*JodJ z{T{5%%DX^4>a+gdZ1k5uLeP751k@&-F~yzNJxUd^5KXrNCt4qAeS|C8VePK<+@qlO zwfoiFSN+Jj{OUAxAUOjZ?!FyHZUl97S7e)dg{Zl-?}K@7H}gABoSnXpz}Fymrx)R8 zxN6&4Prd&Qz3qlYq)h5na25p94LG_*6&l53(~Hf86Z1$>JQ;5OIdy{wMf6-+bXY)! z7TMZd+(3l<+i&VjQVGFFe6nH5OA__AJ3)srE<-nnLd%H74 zOO<|{<@n1|v0@3&skk7*6leVuhdt}B<-iF~9*0I?-mr~`xorBZRCeeh70Y9mKQnDG zV^~L|-bxB2UPNrDPn&XJP;5$h=H&&g22$nJ=*15Z++$5A5{M|Wj(h49B6y+x)~2dc z;({Ju=QH`VLK`;YtuaiZ68h!Ps2e_gtaUwQ78++FZ~eu|{a%Vww`L&*VVcHv-ozE{ zCX7(OWJ zJzNG0)&GdSa{26%bC)l|tuWtU;>I|pkqUiRz=Rg=(qGp%w?bzKW9nD9^-!Gu8{Hva zfSCEPoH-fXZ zo5HnGkSApPk#puJ*EK2DyIN*4i!N;ml10uHV;S9MteecH(B^}^t@fEHfl%3U7NM|3 z@p^(FQkwTXt);(vOeE`Obq!*pW{z%(s@!1pNPJSUzc~8y_V0Bu#eNJLpJv4ee1HJ( z0q`qN54)eXE|+0V`CEmGU#d${j@Ed1#K@t4`mS!*aLsX#St4I7GUz~Sue)h14P`V~ z3#%OTk37~$9lx`+tnlzjHY#@UwmQpAn4^D3kJ#SG`y-Wk&s@{M<*( znddm+79LB(`CQI{y8|>)0@1bxJ<0uX4jJ|K$CvY1vh0_%Mw`=)Z4TjdCkt;3I=9HM z{9g0H-+Sqv7xIhMS+VhMV^``d#=p5be#mT4yN*-l@y@b4Bt!U9iYNz2j-4a@{hgbb z0ZJH~m0?h-vm8?bE8=m-^fB>gQkG*9)|}~BdRw)gVP*(-QCfEQJoMu%;=V+ors=9_ zdjeAaC?+z%^PRyUdi(YQ;|>!h2X(mUTwx=i&xq#sRiA(-$}AX+M)Y2zyU21h?=0!d z`+(vH))YFGv;h-kGfCw&m2$1PF?6sA&z{}b`kpuQLlrN{G}D^C-6`ZnU7BFk^Ma$? zob~`@5&EiC9xXAg)bFl!ZM58qoHMisV?xk{U(_*<>_@D@RJVKqdP} z4KqUfP2CXs{7(DdhC1hSCM#AQG>Z`NgiG@wxfvJn8xq@5oB7BO`MPy!^Y+!VIhg9j zJ(Zq?GQ`ST%!gi4rWr)E<=cql!ScVGelGBa{?Z^m%!`WVnD}!mbt(o35O1YO}7bWb3(Z4Ol9wSQki@e$nv%6 zPi;tT1niS`vlZA_OWFLM@objt`eL5@I9X58OksETkxT7H1J@KICmh{X9iEB$ZEc7T z-u9q}N#*5P?kah#UWQU}MW?y1i|T(9U4oI{-z9sl95va$Qc?r!93k*A97 zO)7y$H%7L6jG;#p+VEoi3=v7bx|p3l0ge~-Q|m!ZJ_n!nTt&S)HpKTk5%S;mM(LSR zOO9loiW?YB@tm*s_^eGDGo=1KMS<6+MOQrX*@dgNGIR#fM8D}Ot{->IkQecJwETFm zjQTOOWkAZf>vy+5$LUOhY@RflF#anBQZv6?V%r!t(n;+6sZ7?K=+S7+tO7VfIIk$wF zE>360U-?!W99&Z4T8N3pu{xPY4RX-fiX4dl^vPM}B=k@ zXwycG8~l?)i-ekQpv$YS$c8OGltwmOzgpbMQdND=;~^Jxcz3-b#tXU#eaFjhyRTM& zyz%4kTjK1j@uB}_RCMl$b93Rr2&(3Rytnw2p{bK{WKw2yWCB-E;OquS2u^RM4_%D# z9VxVUDw!9jC2Qg7obDiOOOQT1$Pa4M6kAC@b_fW03MVaRVdqP*4GU<>1O~nbPDK~d zE_vlKk(VErUY-C0O-pxo=fcWAeg&$FqL!z&Do^R4wjaD1nNtE@^+j^OthC!mk0PQ{ zyM80c?5p5Zk`_EbKZ8eeR$}^hsr$VVEyH-G0U_o!-}oejLptw7V?`&pq3vgiQ+9WQ%zW=PSl`Csyj;o)_8*TkUen*Pc9I^9q?XU&VRYh{ zw`Njmc0GtPu&iH_PpY(dl_)A|boB}Nxq2r>+4Nr^hW9fr0B6fA*{qd`2=akC1yK*nY4A``SnQ|K$>Qr>Uzow~;$IY;Yeb{4%84CeOgPk{z1uA1Ai~u{Y1|cZ z=v)+G_dzpGz07&(ij^Kn1b?3x7yF$i`dgo2`-TX5)8Grj>3Q({Z@$#amLkLKUhsg? z7|z8ui|uhy5ZP`gu|A()rSR=I@Cv1_qAzMmBEv3}Gpdz$`I^*Oju*uG%Rb7fOdUwt zv8xH+s`zZo z93A_e3|lZ~=3rJDiT(1zaA8mHXm@Ha=49e$u%#?|MZX4fgBV6O|H(Sym1yBdMTyhl z6ygD6{a(RZyIYAdRd8{uR~t*%UVZ`dEf49XUy*b!MGTbP5-y&`?YD%2kztE!&!GuN z4BO9Su?;u&zi%955z?_rA4KHSID-je510FD4`6GzzY3`cEyllT^gO_rC_p>#ZyK9^ z54zimCrTCDqnEx3hb`V#^EK#C^?3GC*G9}`PO^)BxBhUpF4w=MfUAyH>F!N2bFcxT z{)JqaxcP;D;c+Hw5~8%ZdAU5#_DHpRjx>9Gi<$*25GSZJACZcaxOz-RN=~9M!(u>5>pI;yrEB$NuBH;gBqldTzd87DDI zIoh}L2!q<6tFJ)Pes(qi4NB1$!Y@=dhjR|^wQATLqL_nm-{n+yms(-1^QE(o2Bndu z^*gOhvyXM zdBS6R9nM|~*1{}2eT(fCvv1qW<9#(;niu)PE$9Y;iwM|QL9DftqLK$RWbuZTH?$sx za3-HHe5m)_{;|cAG;!jlz{&!n()P-Th|GMnw6fpO#~S%@j7USefDp31I!+CDvH(w_ zY3)1RF*Mzf$&B<{3T%U|eGi65@&_DQC0-B;j5RRN!Wb@I{UqaFEd7HFt7VQ)^pbDi zaZNDJh)z&BC#Di7wcXRprzqj%3woud#zy4%M^j&J@o{QxGgfECaTR4Gg%Eq1ZW7h_JbRy`i$@SQbPdHe9w~=P)EV z)oIxyvuU$&>st{%%ueO*`w&H7c=6eNHA`%>S)-l@yk!qp&N}Vpi*npMXca1&F%loC z+UIvpHmtf)3IglH2k_{R(hF);!>S#ihCV#}yb}HT<0WF{fQpwkinBn~q`+Nc*Leir zstCj=rqsXog{KS5Jsu3JN5cFq{jXy%5k+I=NeE-OGf0hnOP#db-yvMyL!Q ztkRbC2;EqM7nK$wBRl*$cY}rs+1LaW&uqIKU0K0Kv_%~Wu8yyBshATKJQejR)ca1x zbzk!?xm+$-mjU{V^89}cBAy?L9=9{J#Ndvt9hq*ideh~u0aa+;txm=Pn{Ta~gRvov z+TTp*Y-|sbZCWWyr}J%cZ-1iau3|ap4i3VSa#VG4czZ-vxD2(Jyq3&HzQhhS&*vZ$ z7A~BAg(k?^n>o}^6y6yWPn8LaVb5@($LAm+*rCFAdOJ(TDsplXZV_0XS;r~Id<)4a zyS@_$#lh5H&+T{GYkAevu5e(?>{iYX%|%Bv#BuX6#r+I|uTOV4)zK*SC-6D~Ndx~m z0W+}I7B$Yia8U{Nm7T4=Ej7U6Fnm59>wN|NI3FSmlbPHhS=&N1&y;DfkOe$D)Lvsk zxRNQK%QqeLXTO;^{W{OYDg~NiA+pX?R8n=cfzHc;bwwgjOlG5+4M}xhOcY#OZxVCf zm6x1mgC<2fR@=k_1gpTDN0D24_2g!u23v0}4A{b9T0g$2ldBjAJ05&zt5;BzFycA% zd2YXN=ra0-bFtOpX^AG-Ymzzfn)(I3?B>c*iP=bzt6Quj5=o1NgunYHDYX@Qi~Nh! zi@ig*Uyf#6=V19;0wIxUd`EU#l21>;N0l50%Qi+|UhBoI&RvCF=VH@W%X1Pg68ePd z-ST^wV>XbRxG7g^yOCgnhW$wOpv}$4xl&O=ogfES&4;aJMGz2cJ~*1cn9dGgQ<>ep z&u=v?SI1d^8!Oa$`eS|D(pLPkzrK~z%bxBsj@f&t*HKZDGAt_&HUgbH9TD80Zz}ea zi>S~kN{~|{%s{n#)Hnm^^}20#VPd{hW?o69`dq+kU5Y6Xnn2t^o++}nAj2=d(5UiL z!S)Y2FAY39GX`cN@Wi$uSybDkqQkp>`U^c(=j(Hp_r~j)6;3N0{PeC_ACDazC!C!N zb+tLfF;P(O^W+UZu`=tC^a6d(!o4Y1IP2m6iKNUIOYyX^fNy8;=V(oI-0Ni%rP1{J zw30CTy~|-&W@oC3D6?Fa=t4*0A~|ba@wl~Njp&Y@Qj8IVs`5u(JRl(XTU8)lDhQ_w zfYnmJ%x5d;%o`~Mf(5@J0C`5-5O?VhYl0@w7$LNgW}2;mRqZB&NrBX$@Lb3#Y`oY} zOB=mb+VAR7T4TSryS-wr@_n=hjwqG4JS17OnPA}eS)Y+gXr^H0cUKopx_J84?|POi z`;PWqUC7u>Z6$fD;RYYcANyU}hI^lJl8!~Ela0&W!_5L)cC%2tNyLN=&(Y%PCa_HD zZMX#z6s15WBVI-*ER=>-SLKaL&R>IK370|f@Y5bzrK!h{voC?|brt+G4=}=3wTsAy zE1ZyQE>7q*tt`ScED7& zF=@9ZGQjrZeks&XO+cGJ2XZw}Jy+cE#oJJkvkN!_`6K&!gF-HkFB*7OjIGJbN8(Vs zz-wM(ee#z##<#y}^nJM7I-m_rbkLKJ*~8x?jD{t|3t1YgyiUI?qwZSaFp)My+z==? zMA?t&ghXAk6I!|-t<_`Q@!7RV4BqR$I)x0gn5;gKP-r=T?vqNK1L z=Vo8`Kzu~5?gz8nFi$!q9<|3{i`{Ciq>q>A6WL;4E9ysAL2#-wxe)mhYBNiotWVlV zl3`8X5r-l|n)N|QHj?g}vx*!vW7OZORc+UVF<``XWKc75#B`G?Z_H!<>!TXK3Gu8) z*wojzgcJSu28>2VT$)GB*9$w>I_h|G?=PI6UE#`LNf$w{dG1aHGJBm_Ajg6|s}`0y zOXso2g(I*1l`NH^n*DS3H0=AT!pm-<{&LU;7nl3P>F2uS>Cldsz@AXBw+ z0NOVvw@Ee8k_bCn!)Ac;*^Xi&!T$!wWfKU6ta8IL44BcHdqp@wL5;mpkgk!X!|Sy2%4f1_$Gplvbb?KakY8CR4-P{uZXA+tP*qAFRLwB?%`M?_)-ZsEASXaZz%y3L5v9&p&D(iHSDf>;uvvW+^E)svDgu)Fp{L6rl2Wlz!z zH?F+F063MztzejxUF7HV?(MK-TsCBdf1vh`=Ry3_bop#|X|PEX47LXh?%qixFErhg zyW%u>@OVWyQ35HFfj;v10*xVVFTq&Moy?sF*tA3}W!yps9!fV_L^XXsuZmIhWjC)O z%r%|HH!I`lSY%sd#>WEj3~*c?b+WZq%PU687h}W?K_A1_ZzE4F1NZc54$}FiK{ zRHb}2na49){Gt^KrE|+IS-XjnbZ`5=mF(#FO7ed8D1*RHk9B%X7! zUYd={`qT?tS$bldb~$k9BlXg=_k5mCzmK_IFynj1HD26_JRQ;jT-!dUFT!=Fzutht z=ek?`z@;cDf3&dES3EtaJ*AtNd^&@N;<3&^N(1Xg^O=1Y)4j=@OXXm*jqf zt*F{>?V&4gyxQbkmtA}3g=&54pp)FnQL)bh4}8)jKbO|@C>Irwy3~MJypp{)LWZ3;{`KG@wPRE#BeaO&W{4H(Bj}iRiZw_@_ZSS$LA5s$M_d{@O{mkk!pNPzD1c-H zt~1K?ywccr0qUx2#$e|$f7xwW$o=;W#1yi0-l1xon=pYMQ&zQ#@{(CEgid4U_`)nI zyPpvDkP0)UT&U1dcUC73wtHLYnHU?w@LRAw8j~56`1ELPeb{Kgcu8V^amnN-+o&4F zh1xm@1r}0l4cuCdNTZyl%M;vfiRz`d#M*!8=VJR?(-dxp={n~-~+w& z+Zkq7*(kFnxl0o%vr6GcOVq?%e1O^<2&sz{*N0bVE@q=uAniuqChC)eVwWiH#jD$h zQ#sFWBzy;Jce6wop0b@qUAXQ~>rMCy^9sw(GkX%0O%;h(WF^!)1Sf;jm4)2BywjZVIfpmNG5ASVnp%q>5K2h_2S^|UJ-cNYT zIUNnFR`M*PVtpGeO9X#P>p@x3=SL8J6g3D;WfqFEE%|g-sYe!)CrkzP`!f)rGx>H< zIQM_@2smeE(?6n%az2ox_1BKq(-siPBTsLZ&HZ?An>4&+e-E$FTAl(5f@oy(YI`nD zqQFAF6`DS~o)+G3=Yj}Y$*UAl#yu@qmtk@4(6z}`uQU-wvM2Ag5&Gw>fpLAT(|IrfCFJEqe)HZk1+dh_dS=U}fbM%jQs$=) zGOO1fZGB0c7k{Ubh!j6O@SBg(FVeLrFccPHQQ8dcD%3JTTD0`n!Ww=#@a8;8dZSx6 z%S?%O>R;KDTsPPVXY__>_TY4q<;N&G?5_TYZ z^(}1voU8%%&mPf%0(P~Rpum@*(_hsS6j=XYunq)b4xry#%qd=n(JQ4%&9V|zbfJgGlIlIEKi3={yjj`4A`Dmv;uiId181$8xp>M2POZyoY$ef+z!4D zG*c-z6F;074MM7oa26Q0U4lj`ZuO;Ds)LdV=_b{9Vo~@{l23{oK*JjjX<|Q}^Poxg zSm=~&_JHptt{Xzs7-ETnnYq4sEf)(=uMEY1y#cyKBS;i;E)ha< z`-#+rxTkO*T=yC4sSsH)lMwR5g)cSz=j7gav-Zh^0Zps^Mpf3o$-|n?fsT^eAaM?Z zfLnvGnZfGTKfT@?5DHRv;|U1VoEWhsF0|K!KgzQfU@EV3P1v3k2QIb7*H4%pNJA#b z!GGO_VRv1vi_ST)DFss)ze8=^9(?hgD6>>6dnWyOW#Ut?O?wR2z2daRWA#wYM*GUk zj-u3I;wu!SddD*h6jt2t&v>qD(%Lj}GwoMmfxd-y-SMOD>isi;v=(&NGw8tw$c>B3 zN}Oo2ZbYW5=)t4v_ zQ|K(8T2C}9e&su<{pJRDOaqD8IZa%{W^-)PoNn0eyuqtP}4H`8e`I2wEn_uvJO zJ9@ln1pIGvT7#bx^jA>}9}c5myD9;M@968LP>Zc?QSuCp8qFbG;F(KuBwCbiw~F=3 zRz40TD(Wu;Yrn7U9?&s(2wdeas`>KwR_PqQ;+a2QS_f*MegVEFvl#A@9ecXk;&uiZ z8)4o)oe~Duwm}9poXhHD3;=Dj#Ycyu=~l~cd!H~kL$0^FZuuhcwcNg$`o&Fc95mF@5|p~@kmeiJ&5fN?PXdP0 zl4N!Xvhk6rCqpP=6L-}3hS%u1rAx5nyX}$ox__=W_k)seAkerFq=xTFJP{{Kflg}N4y<$@tuCEE3cL>FXVcjt?}%EP zBueml?_WI_q*t=q$Ujt_DKsvR9PzSx(#j8vZ{{(UZRW^ePT}6k4c^G_Qd?I z9x`ANn2f+cx>MLwduAEIkATeUh{`Q%!xaae?x3GAOP1Aeywemx0>sw9ul9WbyHTx8j~M=^hgk!DtwSnTFw@bS4O~>S&$( z>oo?@?JT2k(<82p@j#h!W!e(Z8*%~hfl{>46s2WYY{p!w4LCQs`uVzaGZ%ncz6A!@ z$~=Y1BuUWL@<DMC|g;;cZ zN&ejM{P)Jd#?+^+%xeJe6mrQE;EQHbTpL6`JG8&Ldd6>iB_8nq+C$E<-l+n(x90%M zbstb@Xpb*Y4MawZ=?(9Br3K&fm5O6z^{ClpFZR7TBV2vk7L(4VoABfFJT^O5HJY6+ zCF-K+ot4_9;xKorzXtefO4BWTJphEjUfujPWZ|dj0?-~|?sd-rDXtxgkz*=T4^&FO z;cSwCx2MXFBm1V%!fO51?LxSqnsv5Fp^Fyd7cq^`l&s->~rN2+Ebl z7*1HoV?|NVbj0!Hq=GLKNBurO;HZ)A0{YV(>PA3jEVJ!l0f>+XYCb!kQ>CFffHei6 zO0h8pV|89k2a4}N@HGkwA7EZbUu!R%0gMq-en{~pI_}CR0BiRKKz;6;ZZwh7w69J! z^HgbE?pFDA|IdqbxOzSC&|ex~zs~)6+8H&*_%v61XLWPX0)9$`!;3OqJyFr_X;;w#%|T$wDtiYM%{c}UM#)!DiLXu1Wq?_ zqg(%DZD?t$LC;S2Na4N%pe24m1O3-ko(u5*&_~` zC3=&$%_IOGmH}v!stJO|=lkC1X-O*&A}vK^CLUDW%K$l97qj+7t7thxa7Ms|O>34i z(KCPDa_9c-z%NCB4nN;?_mf^lP{eF+ngD3oKot=Iv6NpXpN*qzOwl<8+QVO05Y=U zy<#x8>}LvLBC|ggzV)NbcXKhud9y|r{<5osFXPf*0}*Ie7sneiZKMSsZp$Qsr8^7h z)$eGE3P9JQL*W+v)HZkH0n%}96}t-Lr$i%M4Y|$XfJI~hTs@V8^-hyAmYl&KaB6Y@ z8I5bN4E)I-1*5$=HIxCxs8T zG$?7}Gyj!q%4Qg_gs2=~V00;WZSeCGLnh*-t%cuXUM`Bi{}e}U26%A`faxU4?Xp_? z`Q;f_)O5mC9q#IPps!b-fxae9Q=*4z5nBQ#?&WgZ9x;}!wJs42fIQ@3**)CDcf~_2 znE$ajr@x;8Pp1J0W<2ELeJr0dxGItHy5PM55J`U&`c8@&FYPi+m|*b$E?fav_+{Ya zg_s+bZlB1+0G<)(1pjy|PUy+Hp_=c@Fo4JE0wZeKPzTTn*nyMseIIB4xR=<68h~(< z3wUW_;LwX0^2MS$!lj4?l8|G}5}2Lm>BF{U5+_VNt~UWHltz-6&GQIir_m6_)BVO0 zl5?lB(7i3i$^>B@v_aV4Un{$^IuT6M304EyEBl|%h^PTtTvo9i;3HK77ruBXpY>9h zt_0lO``c@ULvQ0c&!V@0n&{8a_4j+rd)CmK3(#V8(GBw}=U60Sq@lB6<3V3QTsltF zE~D`FKCj5Yy?Lvdn?q?5_wbDh$AkOWld3&<*ztz9l}DK1eSaqhHTBOT#+x`Q#~QL{ zq&dN;WGZL&eGCo*sGJ5TMhO0-|Kt11IKNguoq891I0n#%Yk*Z`muc$>xN9g7!i@FN zS2WX;0GTFAzd-+9l`D=NYXPU*9)MV~{p$sO5qXbPs9P*RJYIQ^nps?~@l7Evp}h6L z-FpJE2c)mQ$DbYW3MJR}S(}axrl28b5(O6L_~9u*@S6!RM)8&byS(4zFgK^pVokjP zCog58>IrcUfCY3dQrkYZi)_R5J(H!sXimKEd0DrK+iJ{zOaW)iyQY#y&DOxO(%oVZ zGx~G;7_pdcOXFZ0}Dd@CbLlQBM|gZd!4X6zIRLlNs5$f0>42)djov$&TAy# z`L(WRfx!{4aJa)`Z4MA(HsEwNBIEDn+j3%4K&V&S7;T@~1+16w5^eG^hbo4V1UQiM zY_44XVmM}*fPL?U(HiKEW4xI66@#2sKcO99b{*6nXCu^Eth!yMYa3Le`T43$4% zAXbsm3;-iIi^&iHPNnea69D$B@dplst_={$wABH4lc)0T#~=cN;UBMsA@mW3nx4zUHt`Gh!C4^Ifb*pb4vLC# z2h*nMZ-F&5A567ew!7uWQ%UyPb)8Wzi#fSa&Tfq8Ng>=11Hs}NrOQ!ArBg}M|i*o}X z_UZe^0fH4AKmnZwR29l=G?rhze3zu9{T%_=&VTh2#bk>hP_O^}W~Ty(SKJF@JPvuQ zEt_0UnZe1rfC0}1xV9wrXVmUFc*ZnIC&MpQ^n6-xi6-{1r-}woB?nj3kOi2dO*K{Q z{o|(^qMBb%Bc$5xc(k#3wd-liF<4xJ86yiE?Z+Ae1KV%#Y9vUwV1RAvyzS=SPyUw zL{AoeH4m7yQ6eE#RpBeuAEyR}5CxgX!)zLb{Ra}OH(bX@Ke=W(9#euiS)$MX0e2Bi zzPI5+KpI>7RgUQd6C@Hdj`s80GP1u(vH%`UGckSo<{!fg?(&4JSK-K?XxpP;ENYZl zRrfD&$LNCTre3ov_RpPz^uZcMPpEgBE2aj7{ZO7NK$8jrp{6-FvBRamh?KJuP{w}x zZJ67G^O(dsIeez+@*zlzFE>D-d2BD9p`X3(-2)_n4&ZofwpYelRlpf32>`lmdaE9c zF=B+`l*A13Gg06MlD+R#@4rjN3K=lm>mD@%`KxXToHB(SEnEy{jOX>cyV8)C)J5$lh z6#T;g+qPMZ1%pH*mRq?{Mqy`yRC>KzyvW8DBse6&`ltWqy3pUb#$(G(q-mwo9pkXt z0i{90)c;rHKh&_l`t3ZZ;<}R6Qgu29AYYPM>>9}aP7WTAiPQh^wEnMNASgk|v=dn4 zM{m8B1~mcvlD@15;5Y@8BN*cWExPOKoLm-{tl*!p1a%t(^)|quwr&9icU%xDMJrl_ zmHXK(@1r#{IHVbKuKt0FwHR07Teo7X-F_`FealATTQQ1zZg> z@xLDX^5w4$aKEIRPqRw3LCB(n$)DXI3gt`N*qwyeKyKq7%pj-P#O9D?BCBgXmVfZN zZ;#|qJ2=@XQXto~>PhL~0>Dwe2>uhD|75%fczE;i51<8xMadN|VM#gr_^DQO)l~c#g?;40pKAcEG-sVhhBH}Xv@qKMFl;aiiO@PW40Xni8tGq(8gALKO9yd~lEMRY0>PN|=;AucA2AFN(T#dxn zwL${GJxBv91g+{nhA#j8_W(!(DzPdRGf)3#>fpbB{4Z&;##)fTOgxiS|Bya91 pDeC`sQ2ovz^eCOfw{{h*7Ey(}? literal 0 HcmV?d00001 From ef2019c1a4782440d45ed5d4332ba23d533d8c64 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 18:58:24 -0600 Subject: [PATCH 090/512] Allow indices exchange via distributed (#6618) (#6624) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mikołaj Błaż Co-authored-by: mikolajblaz --- .../conf/megatron_gpt_config.yaml | 1 + .../language_modeling/megatron/gpt_dataset.py | 37 +++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index 67999548e8da..d502f255bd8e 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -195,6 +195,7 @@ model: no_seqlen_plus_one_input_tokens: False # Set to True to disable fetching (sequence length + 1) input tokens, instead get (sequence length) input tokens and mask the last token pad_samples_to_global_batch_size: False # Set to True if you want to pad the last partial batch with -1's to equal global batch size shuffle_documents: True # Set to False to disable documents shuffling. Sample index will still be shuffled + exchange_indices_distributed: False # Set to True to exchange indices via torch.distributed instead of filesystem # Nsys profiling options nsys_profile: diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py index d2aa5182b716..cf1de245d0e7 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py @@ -329,6 +329,7 @@ def __init__( if self.no_seqlen_plus_one_input_tokens: self.add_extra_token = 0 self.shuffle_documents = cfg.data.get('shuffle_documents', True) + self.exchange_indices_distributed = cfg.data.get('exchange_indices_distributed', False) # save index mappings to a configurable dir self.index_mapping_dir = cfg.data.get('index_mapping_dir', None) @@ -353,6 +354,7 @@ def __init__( drop_last=drop_last, add_extra_token=self.add_extra_token, shuffle_documents=self.shuffle_documents, + exchange_indices_distributed=self.exchange_indices_distributed, ) deallocate_indexed_dataset_memory(self.indexed_dataset) @@ -544,6 +546,7 @@ def _build_index_mappings( drop_last: bool = True, add_extra_token: int = 1, shuffle_documents: bool = True, + exchange_indices_distributed: bool = False, ): """Build doc-idx, sample-idx, and shuffle-idx. doc-idx: is an array (ordered) of documents to be used in training. @@ -572,12 +575,13 @@ def _build_index_mappings( # Build the indexed mapping if not exist. if torch.distributed.get_rank() == 0: + using_cached_indices = True if ( (not os.path.isfile(doc_idx_filename)) or (not os.path.isfile(sample_idx_filename)) or (not os.path.isfile(shuffle_idx_filename)) ): - + using_cached_indices = False logging.info(' > WARNING: could not find index map files, building ' 'the indices on rank 0 ...') # For the last epoch, decide whether include the entire epoch @@ -677,17 +681,26 @@ def _build_index_mappings( // torch.distributed.get_world_size(group=parallel_state.get_tensor_model_parallel_group()) ) - # Load mappings. - start_time = time.time() - logging.info(' > loading doc-idx mapping from {}'.format(doc_idx_filename)) - doc_idx = np.load(doc_idx_filename, allow_pickle=True, mmap_mode='r') - logging.info(' > loading sample-idx mapping from {}'.format(sample_idx_filename)) - sample_idx = np.load(sample_idx_filename, allow_pickle=True, mmap_mode='r') - logging.info(' > loading shuffle-idx mapping from {}'.format(shuffle_idx_filename)) - shuffle_idx = np.load(shuffle_idx_filename, allow_pickle=True, mmap_mode='r') - logging.info(' loaded indexed file in {:3.3f} seconds'.format(time.time() - start_time)) - logging.info(' total number of samples: {}'.format(sample_idx.shape[0])) - logging.info(' total number of epochs: {}'.format(num_epochs)) + if not exchange_indices_distributed or (torch.distributed.get_rank() == 0 and using_cached_indices): + # Load mappings. + start_time = time.time() + logging.info(' > loading doc-idx mapping from {}'.format(doc_idx_filename)) + doc_idx = np.load(doc_idx_filename, allow_pickle=True, mmap_mode='r') + logging.info(' > loading sample-idx mapping from {}'.format(sample_idx_filename)) + sample_idx = np.load(sample_idx_filename, allow_pickle=True, mmap_mode='r') + logging.info(' > loading shuffle-idx mapping from {}'.format(shuffle_idx_filename)) + shuffle_idx = np.load(shuffle_idx_filename, allow_pickle=True, mmap_mode='r') + logging.info(' loaded indexed file in {:3.3f} seconds'.format(time.time() - start_time)) + logging.info(' total number of samples: {}'.format(sample_idx.shape[0])) + logging.info(' total number of epochs: {}'.format(num_epochs)) + + if exchange_indices_distributed: + if torch.distributed.get_rank() == 0: + indices = [(doc_idx, sample_idx, shuffle_idx)] + else: + indices = [None] + torch.distributed.broadcast_object_list(indices) + doc_idx, sample_idx, shuffle_idx = indices[0] return doc_idx, sample_idx, shuffle_idx From 4b363ce24e8ded40c26c93ff305e8b7210b0f7f4 Mon Sep 17 00:00:00 2001 From: fayejf <36722593+fayejf@users.noreply.github.com> Date: Wed, 10 May 2023 18:31:03 -0700 Subject: [PATCH 091/512] Offline and streaming inference support for hybrid model (#6570) * streaming buffered for hybrid + ctc Signed-off-by: fayejf * change default model_stride in eval.yaml Signed-off-by: fayejf * add fc model_stride Signed-off-by: fayejf * small fix Signed-off-by: fayejf * check whether model and decoding match Signed-off-by: fayejf * small fix Signed-off-by: fayejf * streaming buffered for hybrid + rnnt Signed-off-by: fayejf * style fix Signed-off-by: fayejf * fix yaml Signed-off-by: fayejf * reflect comment wip Signed-off-by: fayejf * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix Signed-off-by: fayejf * refactor and verified Signed-off-by: fayejf * add get_full_path to buffered Signed-off-by: fayejf * small fix Signed-off-by: fayejf * add RNNTDecodingConfig Signed-off-by: fayejf * model name & instruction of changing decoding Signed-off-by: fayejf --------- Signed-off-by: fayejf Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../ctc/speech_to_text_buffered_infer_ctc.py | 29 ++++++- .../speech_to_text_buffered_infer_rnnt.py | 46 ++++++++---- examples/asr/transcribe_speech.py | 16 +++- .../asr/parts/utils/streaming_utils.py | 12 ++- .../asr/parts/utils/transcribe_utils.py | 6 +- tools/asr_evaluator/conf/eval.yaml | 4 +- tools/asr_evaluator/utils.py | 75 +++++++++++++++---- 7 files changed, 151 insertions(+), 37 deletions(-) diff --git a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py index 69ea139d2ed6..dd801ddb37f2 100644 --- a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py +++ b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py @@ -48,6 +48,7 @@ from omegaconf import OmegaConf from nemo.collections.asr.metrics.wer import CTCDecodingConfig +from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR from nemo.collections.asr.parts.utils.transcribe_utils import ( @@ -78,10 +79,16 @@ class TranscriptionConfig: pred_name_postfix: Optional[str] = None # If you need to use another model name, rather than standard one. random_seed: Optional[int] = None # seed number going to be used in seed_everything() + # Set to True to output greedy timestamp information (only supported models) + compute_timestamps: bool = False + + # Set to True to output language ID information + compute_langs: bool = False + # Chunked configs chunk_len_in_secs: float = 1.6 # Chunk length in seconds total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds - model_stride: int = 8 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models", + model_stride: int = 8 # Model downsampling factor, 8 for Citrinet and FasConformer models and 4 for Conformer models. # Decoding strategy for CTC models decoding: CTCDecodingConfig = CTCDecodingConfig() @@ -108,6 +115,9 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') torch.set_grad_enabled(False) + for key in cfg: + cfg[key] = None if cfg[key] == 'None' else cfg[key] + if is_dataclass(cfg): cfg = OmegaConf.structured(cfg) @@ -174,6 +184,23 @@ def autocast(): ) return cfg + # Setup decoding strategy + if hasattr(asr_model, 'change_decoding_strategy'): + if not isinstance(asr_model, EncDecCTCModel) and not isinstance(asr_model, EncDecHybridRNNTCTCModel): + raise ValueError("The script supports ctc model and hybrid model with ctc decodng!") + + else: + if cfg.compute_langs: + raise ValueError("CTC models do not support `compute_langs` at the moment.") + + if hasattr( + asr_model, 'cur_decoder' + ): # hybrid model with ctc decoding or potential other models containing decoding switch feature + asr_model.change_decoding_strategy(cfg.decoding, decoder_type='ctc') + + else: # ctc model + asr_model.change_decoding_strategy(cfg.decoding) + asr_model.eval() asr_model = asr_model.to(asr_model.device) diff --git a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py index 385a29b8f417..07f7effb85f8 100644 --- a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py +++ b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py @@ -67,7 +67,8 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf, open_dict - +from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig +from nemo.collections.asr.models import EncDecHybridRNNTCTCModel, EncDecRNNTModel from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.collections.asr.parts.utils.streaming_utils import ( BatchedFrameASRRNNT, @@ -101,10 +102,16 @@ class TranscriptionConfig: pred_name_postfix: Optional[str] = None # If you need to use another model name, rather than standard one. random_seed: Optional[int] = None # seed number going to be used in seed_everything() + # Set to True to output greedy timestamp information (only supported models) + compute_timestamps: bool = False + + # Set to True to output language ID information + compute_langs: bool = False + # Chunked configs chunk_len_in_secs: float = 1.6 # Chunk length in seconds total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds - model_stride: int = 8 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models + model_stride: int = 8 # Model downsampling factor, 8 for Citrinet and FastConformer models and 4 for Conformer models. # Set `cuda` to int to define CUDA device. If 'None', will look for CUDA # device anyway, and do inference on CPU only if CUDA device is not found. @@ -115,6 +122,9 @@ class TranscriptionConfig: # Recompute model transcription, even if the output folder exists with scores. overwrite_transcripts: bool = True + # Decoding strategy for RNNT models + decoding: RNNTDecodingConfig = RNNTDecodingConfig() + # Decoding configs max_steps_per_timestep: int = 5 #'Maximum number of tokens decoded per acoustic timestep' stateful_decoding: bool = False # Whether to perform stateful decoding @@ -135,6 +145,9 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') torch.set_grad_enabled(False) + for key in cfg: + cfg[key] = None if cfg[key] == 'None' else cfg[key] + if is_dataclass(cfg): cfg = OmegaConf.structured(cfg) @@ -195,20 +208,27 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: asr_model = asr_model.to(asr_model.device) # Change Decoding Config - decoding_cfg = asr_model.cfg.decoding - with open_dict(decoding_cfg): + with open_dict(cfg.decoding): if cfg.stateful_decoding: - decoding_cfg.strategy = "greedy" + cfg.decoding.strategy = "greedy" else: - decoding_cfg.strategy = "greedy_batch" - decoding_cfg.preserve_alignments = True # required to compute the middle token for transducers. - decoding_cfg.fused_batch_size = -1 # temporarily stop fused batch during inference. - decoding_cfg.beam.return_best_hypothesis = True - - asr_model.change_decoding_strategy(decoding_cfg) + cfg.decoding.strategy = "greedy_batch" + cfg.decoding.preserve_alignments = True # required to compute the middle token for transducers. + cfg.decoding.fused_batch_size = -1 # temporarily stop fused batch during inference. + cfg.decoding.beam.return_best_hypothesis = True # return and write the best hypothsis only + + # Setup decoding strategy + if hasattr(asr_model, 'change_decoding_strategy'): + if not isinstance(asr_model, EncDecRNNTModel) and not isinstance(asr_model, EncDecHybridRNNTCTCModel): + raise ValueError("The script supports rnnt model and hybrid model with rnnt decodng!") + else: + # rnnt model + if isinstance(asr_model, EncDecRNNTModel): + asr_model.change_decoding_strategy(cfg.decoding) - with open_dict(cfg): - cfg.decoding = decoding_cfg + # hybrid ctc rnnt model with decoder_type = rnnt + if hasattr(asr_model, 'cur_decoder'): + asr_model.change_decoding_strategy(cfg.decoding, decoder_type='rnnt') feature_stride = model_cfg.preprocessor['window_stride'] model_stride_in_secs = feature_stride * cfg.model_stride diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 531b5c56aa4e..8c8d11132183 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -227,6 +227,17 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: compute_timestamps = cfg.compute_timestamps compute_langs = cfg.compute_langs + # Check whether model and decoder type match + if isinstance(asr_model, EncDecCTCModel): + if cfg.decoder_type and cfg.decoder_type != 'ctc': + raise ValueError('CTC model only support ctc decoding!') + elif isinstance(asr_model, EncDecHybridRNNTCTCModel): + if cfg.decoder_type and cfg.decoder_type not in ['ctc', 'rnnt']: + raise ValueError('Hybrid model only support ctc or rnnt decoding!') + else: # rnnt model, there could be other models needs to be addressed. + if cfg.decoder_type and cfg.decoder_type != 'rnnt': + raise ValueError('RNNT model only support rnnt decoding!') + # Setup decoding strategy if hasattr(asr_model, 'change_decoding_strategy'): if cfg.decoder_type is not None: @@ -240,7 +251,10 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig: decoding_cfg.preserve_alignments = cfg.compute_timestamps if 'compute_langs' in decoding_cfg: decoding_cfg.compute_langs = cfg.compute_langs - asr_model.change_decoding_strategy(decoding_cfg, decoder_type=cfg.decoder_type) + if hasattr(asr_model, 'cur_decoder'): + asr_model.change_decoding_strategy(decoding_cfg, decoder_type=cfg.decoder_type) + else: + asr_model.change_decoding_strategy(decoding_cfg) # Check if ctc or rnnt model elif hasattr(asr_model, 'joint'): # RNNT model diff --git a/nemo/collections/asr/parts/utils/streaming_utils.py b/nemo/collections/asr/parts/utils/streaming_utils.py index b824bc18e770..9efb675b6175 100644 --- a/nemo/collections/asr/parts/utils/streaming_utils.py +++ b/nemo/collections/asr/parts/utils/streaming_utils.py @@ -769,9 +769,15 @@ def _get_batch_preds(self, keep_logits=False): feat_signal, feat_signal_len = batch feat_signal, feat_signal_len = feat_signal.to(device), feat_signal_len.to(device) - log_probs, encoded_len, predictions = self.asr_model( - processed_signal=feat_signal, processed_signal_length=feat_signal_len - ) + forward_outs = self.asr_model(processed_signal=feat_signal, processed_signal_length=feat_signal_len) + + if len(forward_outs) == 2: # hybrid ctc rnnt model + encoded, encoded_len = forward_outs + log_probs = self.asr_model.ctc_decoder(encoder_output=encoded) + predictions = log_probs.argmax(dim=-1, keepdim=False) + else: + log_probs, encoded_len, predictions = forward_outs + preds = torch.unbind(predictions) for pred in preds: self.all_preds.append(pred.cpu().numpy()) diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index d7946aa2842b..990e3b96b0fc 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -58,7 +58,8 @@ def get_buffered_pred_feat_rnnt( print("Parsing manifest files...") for l in mfst_f: row = json.loads(l.strip()) - filepaths.append(row['audio_filepath']) + audio_file = get_full_path(audio_file=row['audio_filepath'], manifest_file=manifest) + filepaths.append(audio_file) if 'text' in row: refs.append(row['text']) @@ -149,8 +150,9 @@ def get_buffered_pred_feat( row = json.loads(l.strip()) if 'text' in row: refs.append(row['text']) + audio_file = get_full_path(audio_file=row['audio_filepath'], manifest_file=manifest) # do not support partial audio - asr.read_audio_file(row['audio_filepath'], delay, model_stride_in_secs) + asr.read_audio_file(audio_file, delay, model_stride_in_secs) hyp = asr.transcribe(tokens_per_chunk, delay) hyps.append(hyp) diff --git a/tools/asr_evaluator/conf/eval.yaml b/tools/asr_evaluator/conf/eval.yaml index 176392b9c070..fe2d74507903 100644 --- a/tools/asr_evaluator/conf/eval.yaml +++ b/tools/asr_evaluator/conf/eval.yaml @@ -12,12 +12,14 @@ engine: mode: offline # choose from offline, chunked or offline_by_chunked chunk_len_in_secs: 1.6 #null # Need to specify if use buffered inference (default for offline_by_chunked is 20) total_buffer_in_secs: 4 #null # Need to specify if use buffered inference (default for offline_by_chunked is 22) - model_stride: 4 # Model downsampling factor, 8 for Citrinet models and 4 for Conformer models + model_stride: 8 # Model downsampling factor, 8 for Citrinet and FastConformer models, and 4 for Conformer models decoder_type: null # Used for hybrid CTC RNNT model only. Specify decoder_type *ctc* or *rnnt* for hybrid CTC RNNT model. + test_ds: manifest_filepath: null sample_rate: 16000 batch_size: 32 + num_workers: 4 augmentor: silence: diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py index 84f4bdb62364..8fd2ebb224c3 100644 --- a/tools/asr_evaluator/utils.py +++ b/tools/asr_evaluator/utils.py @@ -28,6 +28,9 @@ def run_asr_inference(cfg: DictConfig) -> DictConfig: if (cfg.model_path and cfg.pretrained_name) or (not cfg.model_path and not cfg.pretrained_name): raise ValueError("Please specify either cfg.model_path or cfg.pretrained_name!") + if cfg.inference.decoder_type not in [None, 'ctc', 'rnnt']: + raise ValueError("decoder_type could only be null, ctc or rnnt") + if cfg.inference.mode == "offline": cfg = run_offline_inference(cfg) @@ -67,6 +70,7 @@ def run_asr_inference(cfg: DictConfig) -> DictConfig: def run_chunked_inference(cfg: DictConfig) -> DictConfig: + if "output_filename" not in cfg or not cfg.output_filename: if cfg.model_path: model_name = Path(cfg.model_path).stem @@ -93,10 +97,43 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig: / "ctc" / "speech_to_text_buffered_infer_ctc.py" ) + use_rnnt_scrpit = False + # hybrid model + if (cfg.pretrained_name and 'hybrid' in cfg.pretrained_name.lower()) or ( + cfg.model_path and 'hybrid' in cfg.model_path.lower() + ): + if cfg.inference.decoder_type != 'ctc': + use_rnnt_scrpit = True + # rnnt model + elif ( + (cfg.pretrained_name and 'rnnt' in cfg.pretrained_name.lower()) + or (cfg.pretrained_name and 'transducer' in cfg.pretrained_name.lower()) + or (cfg.model_path and 'rnnt' in cfg.model_path.lower()) + or (cfg.model_path and 'transducer' in cfg.model_path.lower()) + ): + if cfg.inference.decoder_type and cfg.inference.decoder_type != 'rnnt': + raise ValueError( + f"rnnt models only support rnnt deocoding! Current decoder_type: {cfg.inference.decoder_type}! Change it to null or rnnt for rnnt models" + ) + use_rnnt_scrpit = True - if (cfg.pretrained_name and 'transducer' in cfg.pretrained_name) or ( - cfg.model_path and 'transducer' in cfg.model_path + # ctc model + elif (cfg.pretrained_name and 'ctc' in cfg.pretrained_name.lower()) or ( + cfg.pretrained_name and 'ctc' in cfg.pretrained_name.lower() ): + if cfg.inference.decoder_type and cfg.inference.decoder_type != 'ctc': + raise ValueError( + f"ctc models only support ctc deocoding! Current decoder_type: {cfg.inference.decoder_type}! Change it to null or ctc for ctc models" + ) + else: + raise ValueError( + "Please make sure your pretrained_name or model_path contains \n\ + 'hybrid' for EncDecHybridRNNTCTCModel model, \n\ + 'transducer/rnnt' for EncDecRNNTModel model or \n\ + 'ctc' for EncDecCTCModel." + ) + + if use_rnnt_scrpit: script_path = ( Path(__file__).parents[2] / "examples" @@ -106,20 +143,25 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig: / "speech_to_text_buffered_infer_rnnt.py" ) + # If need to change other config such as decoding strategy, could either: + # 1) change TranscriptionConfig on top of the executed scripts such as speech_to_text_buffered_infer_rnnt.py, or + # 2) add command as "decoding.strategy=greedy_batch " to below script + + base_cmd = f"python {script_path} \ + calculate_wer=False \ + model_path={cfg.model_path} \ + pretrained_name={cfg.pretrained_name} \ + dataset_manifest={cfg.test_ds.manifest_filepath} \ + output_filename={cfg.output_filename} \ + random_seed={cfg.random_seed} \ + batch_size={cfg.test_ds.batch_size} \ + num_workers={cfg.test_ds.num_workers} \ + chunk_len_in_secs={cfg.inference.chunk_len_in_secs} \ + total_buffer_in_secs={cfg.inference.total_buffer_in_secs} \ + model_stride={cfg.inference.model_stride} " + subprocess.run( - f"python {script_path} " - f"calculate_wer=False " - f"model_path={cfg.model_path} " - f"pretrained_name={cfg.pretrained_name} " - f"dataset_manifest={cfg.test_ds.manifest_filepath} " - f"output_filename={cfg.output_filename} " - f"random_seed={cfg.random_seed} " - f"batch_size={cfg.test_ds.batch_size} " - f"chunk_len_in_secs={cfg.inference.chunk_len_in_secs} " - f"total_buffer_in_secs={cfg.inference.total_buffer_in_secs} " - f"model_stride={cfg.inference.model_stride} ", - shell=True, - check=True, + base_cmd, shell=True, check=True, ) return cfg @@ -142,7 +184,7 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig: f.seek(0) # reset file pointer script_path = Path(__file__).parents[2] / "examples" / "asr" / "transcribe_speech.py" - # If need to move other config such as decoding strategy, could either: + # If need to change other config such as decoding strategy, could either: # 1) change TranscriptionConfig on top of the executed scripts such as transcribe_speech.py in examples/asr, or # 2) add command as "rnnt_decoding.strategy=greedy_batch " to below script subprocess.run( @@ -153,6 +195,7 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig: f"dataset_manifest={cfg.test_ds.manifest_filepath} " f"output_filename={cfg.output_filename} " f"batch_size={cfg.test_ds.batch_size} " + f"num_workers={cfg.test_ds.num_workers} " f"random_seed={cfg.random_seed} " f"eval_config_yaml={f.name} " f"decoder_type={cfg.inference.decoder_type} ", From 67b19062183907cca03119ceda09ad05294bf547 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 20:23:25 -0700 Subject: [PATCH 092/512] Patch decoding for PC models (#6630) (#6631) * Patch decoding logic for PC models * Patch decoding logic for PC models --------- Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- nemo/collections/asr/metrics/rnnt_wer.py | 5 +++++ nemo/collections/asr/metrics/wer.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index 00cacbf863d4..0634a45f6a23 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import re from abc import abstractmethod from dataclasses import dataclass, is_dataclass from typing import Callable, Dict, List, Optional, Tuple, Union @@ -499,6 +500,10 @@ def decode_hypothesis(self, hypotheses_list: List[Hypothesis]) -> List[Union[Hyp else: hypothesis = self.decode_tokens_to_str(prediction) + # TODO: remove + # collapse leading spaces before . , ? for PC models + hypothesis = re.sub(r'(\s+)([\.\,\?])', r'\2', hypothesis) + if self.compute_hypothesis_token_set: hypotheses_list[ind].tokens = self.decode_ids_to_tokens(prediction) diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index faeef5d3d477..4bbeba7624ae 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from abc import abstractmethod from dataclasses import dataclass, is_dataclass from typing import Callable, Dict, List, Optional, Tuple, Union @@ -540,6 +541,10 @@ def decode_hypothesis( else: hypothesis = self.decode_tokens_to_str(decoded_prediction) + # TODO: remove + # collapse leading spaces before . , ? for PC models + hypothesis = re.sub(r'(\s+)([\.\,\?])', r'\2', hypothesis) + # Preserve this wrapped hypothesis or decoded text tokens. hypotheses_list[ind].text = hypothesis From 32054d7a3eeb48f0fc5af5c6bbe8a703c0963d03 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 May 2023 09:02:07 -0700 Subject: [PATCH 093/512] Fix wer.py where 'errors' variable was not set (#6633) (#6634) Fix wer.py where 'errors' variable was not set when both reference and hypothesis are empty strings Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> --- nemo/collections/asr/metrics/wer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index 4bbeba7624ae..d9b745cbc940 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -114,6 +114,8 @@ def word_error_rate_detail( if len(h_list) != 0: errors = len(h_list) ops_count['insertions'] += errors + else: + errors = 0 else: if use_cer: measures = jiwer.cer(r, h, return_dict=True) From a4bfbb37a2ce7c489ca95c9a602762badf04fd6f Mon Sep 17 00:00:00 2001 From: Tim Moon <4406448+timmoon10@users.noreply.github.com> Date: Thu, 11 May 2023 10:00:28 -0700 Subject: [PATCH 094/512] Restore GPT support for interleaved pipeline parallelism (#6528) (#6613) * Restore logic for data-parallel communication with pipeline parallelism in GPT * Support dynamic attention masks in GPT * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Debug typos * Debug data iterator caching with interleaved pipeline parallelism Each model chunk accesses the data iterator multiple times, so we need to cache multiple samples. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Megatron-LM commit * Distinguish between list of data iterators and data iterator that is a list * Create dummy iters to satisy len checks * Kludge while waiting for Megatron-LM update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * set transformers offline to avoid rate limiting --------- Signed-off-by: Tim Moon Signed-off-by: Eric Harper Signed-off-by: Abhinav Khattar Signed-off-by: ericharper Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper Co-authored-by: Abhinav Khattar --- Jenkinsfile | 42 ++--- .../language_modeling/megatron_gpt_model.py | 151 ++++++++++++------ .../modules/common/megatron/build_model.py | 49 +++--- 3 files changed, 150 insertions(+), 92 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 59a6fe7c416e..5b3f6d5d4186 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ pipeline { agent { docker { image 'pytorch_23.03:apex_57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2' - args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g' + args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1' } } options { @@ -1014,7 +1014,7 @@ pipeline { // TODO: pleasefixme @redoctopus // stage('ByT5G2P training, evaluation and inference') { // steps { - // sh 'TRANSFORMERS_OFFLINE=0 && cd examples/tts/g2p && \ + // sh 'TRANSFORMERS_OFFLINE=1 && cd examples/tts/g2p && \ // TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_T5=output_byt5_${TIME} && \ // python g2p_train_and_evaluate.py \ // train_manifest=/home/TestData/g2p/g2p.json \ @@ -1158,7 +1158,7 @@ pipeline { parallel { stage('Dialogue: Intent and slot classification using GPT') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ model.dataset.data_dir=/home/TestData/nlp/sgd_small \ model.language_model.lm_checkpoint=/home/TestData/nlp/gpt2/pytorch_model.bin\ @@ -1185,7 +1185,7 @@ pipeline { } stage('Intent and slot classification using SGDQA') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ model.dataset.data_dir=/home/TestData/nlp/sgd_small \ model.dataset.dialogues_example_dir=sgd_gen_bert_outputs \ @@ -1208,7 +1208,7 @@ pipeline { } stage('Intent and slot classification using IntentSlotClassificationModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ model.dataset.data_dir=/home/TestData/nlp/processed_assistant \ model.dataset.dialogues_example_dir=sgd_gen_bert_intent_classification_outputs \ @@ -1230,7 +1230,7 @@ pipeline { } stage('Intent classification using ZeroShotIntentModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/drive_thru_revised \ @@ -1255,7 +1255,7 @@ pipeline { } stage('Design Intent classification using ZeroShotIntentModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/design_dataset \ @@ -1281,7 +1281,7 @@ pipeline { } stage('Design Intent classification using ZeroShotIntentModel BART Classifier') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/design_dataset \ @@ -1300,7 +1300,7 @@ pipeline { } stage('Design Intent classification using DialogueNearestNeighbourModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/design_dataset \ @@ -1329,7 +1329,7 @@ pipeline { parallel { stage('Dialogue: Answer Extender using DialogueS2SGenerationModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/ms-marco-qa \ @@ -1354,7 +1354,7 @@ pipeline { } stage('Dialogue: SGD Based Answer Extender using DialogueS2SGenerationModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/sgd_small \ @@ -1395,7 +1395,7 @@ pipeline { // parallel { // stage('Dialogue: Answer Extender using DialogueGPTGenerationModel') { // steps { -// sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ +// sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ // python dialogue.py \ // do_training=False \ // model.dataset.data_dir=/home/TestData/nlp/ms-marco-qa \ @@ -1425,7 +1425,7 @@ pipeline { parallel { stage('Dialogue: Answer Extender using DialogueGPTGenerationModel') { steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/dialogue && \ python dialogue.py \ do_training=False \ model.dataset.data_dir=/home/TestData/nlp/ms-marco-qa \ @@ -1549,7 +1549,7 @@ pipeline { stage('BERT SQUAD 1.1') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v1.1/train-v1.1.json \ model.dataset.use_cache=false \ @@ -1574,7 +1574,7 @@ pipeline { stage('BERT SQUAD 2.0') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v2.0/train-v2.0.json \ model.dataset.use_cache=false \ @@ -1608,7 +1608,7 @@ pipeline { stage('BART SQUAD 1.1') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v1.1/train-v1.1.json \ model.dataset.use_cache=false \ @@ -1634,7 +1634,7 @@ pipeline { stage('BART SQUAD 2.0') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v2.0/train-v2.0.json \ model.dataset.use_cache=false \ @@ -1669,7 +1669,7 @@ pipeline { stage('GPT2 SQUAD 1.1') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v1.1/train-v1.1.json \ model.dataset.use_cache=false \ @@ -1695,7 +1695,7 @@ pipeline { stage('GPT2 SQUAD 2.0') { // Cannot do fast_dev_run because squad needs whole dev dataset steps { - sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/question_answering && \ + sh 'TRANSFORMERS_OFFLINE=1 && cd examples/nlp/question_answering && \ python question_answering.py \ model.train_ds.file=/home/TestData/nlp/squad_mini/v2.0/train-v2.0.json \ model.dataset.use_cache=false \ @@ -4016,7 +4016,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' sh "rm -rf examples/nlp/language_modeling/t5_index_mappings" } } - + stage('L2: Megatron T5 Prompt Learning TP1 PP1') { when { anyOf { @@ -4101,7 +4101,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } } - + // TODO: add when https://github.com/NVIDIA/apex/pull/1596 is merged // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 9cb4efca57fc..b5f8b2b18f69 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -13,7 +13,9 @@ # limitations under the License. import itertools -from typing import Any, List, Optional, Union +import queue +from functools import partial +from typing import Any, Iterator, List, Optional, Union import numpy as np import torch @@ -68,6 +70,9 @@ from megatron.core import parallel_state from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + # TODO @tmoon: Use once available in Megatron-LM + # from megatron.core.pipeline_parallel.schedules import DataIteratorList + HAVE_MEGATRON_CORE = True except (ImportError, ModuleNotFoundError): @@ -337,15 +342,24 @@ def forward(self, tokens, text_position_ids, attention_mask, labels): def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_o2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism fwd_bwd_function = get_forward_backward_func() - # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], + data_iterator=self._make_data_iterator_list(dataloader_iter), + model=self.model, num_microbatches=get_num_microbatches(), forward_only=forward_only, tensor_shape=tensor_shape, @@ -353,6 +367,9 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, ) # only the last stages of the pipeline return losses @@ -556,44 +573,88 @@ def allreduce_first_last_embeddings(self): grad = word_embeddings_weight.grad torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group()) + def _make_data_iterator_list(self, data_iterator: Iterator) -> List[Iterator]: + """ Convert data iterator into form expected by Megatron + + With interleaved pipeline parallelism, Megatron expects a + list of one data iterator per model chunk. Each model + chunk independently gets data from its data iterator, so + we need to interact with the data iterator multiple times + for each microbatch step. Instead of incorporating this + logic into the data loader, we cache the iterator's output + to the first model chunk and reuse it in the other model + chunks. + """ + + if not isinstance(self.model, list) or len(self.model) == 1: + return data_iterator # TODO @tmoon: Remove + # TODO @tmoon: Use once available in Megatron-LM + # return DataIteratorList([data_iterator]) + + class CachingIterator: + """Iterator wrapper that caches values""" + + class Proxy: + """Returns values from caching iterator wrapper + + Assumed to never advance past the caching iterator. + """ + + def __init__(self): + self.cache = queue.Queue() + + def __iter__(self): + return self + + def __next__(self): + return self.cache.get_nowait() + + def __init__(self, iterator: Iterator): + self.iterator = iterator + self.proxies = [] + + def make_proxy(self): + self.proxies.append(CachingIterator.Proxy()) + return self.proxies[-1] + + def __iter__(self): + return self + + def __next__(self): + val = next(self.iterator) + for proxy in self.proxies: + proxy.cache.put(val) + return val + + # Make list of iterator wrappers + iters = [CachingIterator(data_iterator)] + while len(iters) < len(self.model): + iters.append(iters[0].make_proxy()) + return iters # TODO @tmoon: Remove + # TODO @tmoon: Use once available in Megatron-LM + # return DataIteratorList(iters) + def get_forward_output_and_loss_func(self, validation_step=False): def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): + + # Get data batch + batch = next(dataloader_iter) + + # Transfer needed data to GPU + required_keys = set() if parallel_state.get_pipeline_model_parallel_world_size() == 1: - batch = next(dataloader_iter) - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None - else: - batch[k] = batch[k].cuda(non_blocking=True) + required_keys.update(batch.keys()) else: + required_keys.add('attention_mask') if parallel_state.is_pipeline_first_stage(): - batch = next(dataloader_iter) - # First pipeline stage needs tokens, position_ids, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k in ['tokens', 'position_ids'] else None - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['tokens', 'position_ids', 'attention_mask'] - else None - ) - elif parallel_state.is_pipeline_last_stage(): - batch = next(dataloader_iter) - # Last pipeline stage needs the labels, loss_mask, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['labels', 'loss_mask', 'attention_mask'] - else None - ) - else: - # Intermediate pipeline stage doesn't need any inputs - batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels']} - + required_keys.update(('tokens', 'position_ids')) + if parallel_state.is_pipeline_last_stage(): + required_keys.update(('labels', 'loss_mask')) + if self.get_attention_mask_from_fusion: + required_keys.remove('attention_mask') + batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in batch.items()} + + # Model forward pass output_tensor = model( batch['tokens'], batch['position_ids'], @@ -1052,8 +1113,8 @@ def parameters(self): return self.model.parameters() def _reset_activation_checkpointing_args(self): - """ Disables activation checkpointing completely and saves the values so that - _restore_activation_checkpointing_args can restore them later. This function must always be + """ Disables activation checkpointing completely and saves the values so that + _restore_activation_checkpointing_args can restore them later. This function must always be called before _restore_activation_checkpointing_args. """ # Store values to restore them later. @@ -1076,8 +1137,8 @@ def _reset_activation_checkpointing_args(self): module.language_model.encoder.activations_checkpoint_layers_per_pipeline = None def _restore_activation_checkpointing_args(self): - """ Restores the activation checkpointing parameters using the values saved by - _reset_activation_checkpointing_args. This function must never be called before + """ Restores the activation checkpointing parameters using the values saved by + _reset_activation_checkpointing_args. This function must never be called before _reset_activation_checkpointing_args. """ # Restore config values. @@ -1096,8 +1157,8 @@ def _restore_activation_checkpointing_args(self): ) def _reset_sequence_parallelism_args(self): - """ Disables sequence parallelism completely and saves the values so that - _restore_sequence_parallelism_args can restore them later. This function must always be + """ Disables sequence parallelism completely and saves the values so that + _restore_sequence_parallelism_args can restore them later. This function must always be called before _restore_sequence_parallelism_args. """ # Store values to restore them later. @@ -1112,8 +1173,8 @@ def _reset_sequence_parallelism_args(self): module.language_model.encoder.sequence_parallel = None def _restore_sequence_parallelism_args(self): - """ Restores the sequence parallelism parameters using the values saved by - _reset_sequence_parallelism_args. This function must never be called before + """ Restores the sequence parallelism parameters using the values saved by + _reset_sequence_parallelism_args. This function must never be called before _reset_sequence_parallelism_args. """ # Restore config values. diff --git a/nemo/collections/nlp/modules/common/megatron/build_model.py b/nemo/collections/nlp/modules/common/megatron/build_model.py index 4c7790773d5b..929093405fce 100644 --- a/nemo/collections/nlp/modules/common/megatron/build_model.py +++ b/nemo/collections/nlp/modules/common/megatron/build_model.py @@ -74,28 +74,25 @@ def build_model( and virtual_pipeline_model_parallel_size is not None ): model = [] + parallel_state.set_virtual_pipeline_model_parallel_world_size(virtual_pipeline_model_parallel_size) for i in range(virtual_pipeline_model_parallel_size): - cur_args = args - cur_kwargs = kwargs parallel_state.set_virtual_pipeline_model_parallel_rank(i) - # Set pre_process and post_process only after virtual rank is set. - pre_process = parallel_state.is_pipeline_first_stage() - post_process = parallel_state.is_pipeline_last_stage() - cur_kwargs.update( - {"pre_process": pre_process, "post_process": post_process,} + model.append( + model_provider_func( + *args, + **kwargs, + pre_process=parallel_state.is_pipeline_first_stage(), + post_process=parallel_state.is_pipeline_last_stage(), + ) ) - this_model = model_provider_func(*cur_args, **cur_kwargs) - model.append(this_model) else: - cur_args = args - cur_kwargs = kwargs if model_type == ModelType.encoder_or_decoder: - pre_process = parallel_state.is_pipeline_first_stage() - post_process = parallel_state.is_pipeline_last_stage() - cur_kwargs.update( - {"pre_process": pre_process, "post_process": post_process,} + model = model_provider_func( + *args, + **kwargs, + pre_process=parallel_state.is_pipeline_first_stage(), + post_process=parallel_state.is_pipeline_last_stage(), ) - model = model_provider_func(*cur_args, **cur_kwargs) elif model_type == ModelType.encoder_and_decoder: pre_process = parallel_state.is_pipeline_first_stage() post_process = parallel_state.is_pipeline_last_stage() @@ -111,23 +108,23 @@ def build_model( post_process = rank == (split_rank - 1) or rank == (world_size - 1) add_encoder = parallel_state.is_pipeline_stage_before_split() add_decoder = parallel_state.is_pipeline_stage_after_split() - cur_kwargs.update( - { - "pre_process": pre_process, - "post_process": post_process, - "add_encoder": add_encoder, - "add_decoder": add_decoder, - } + model = model_provider_func( + *args, + **kwargs, + pre_process=pre_process, + post_process=post_process, + add_encoder=add_encoder, + add_decoder=add_decoder, ) - model = model_provider_func(*cur_args, **cur_kwargs) else: raise ValueError(f"Unrecognized ModelType '{model_type}'") - model.model_type = model_type - if not isinstance(model, list): model = [model] + for model_module in model: + model_module.model_type = model_type + # Set tensor model parallel attributes if not set. # Only parameters that are already tensor model parallel have these # attributes set for them. We should make sure the default attributes From d1a5ad50401318508bdf310f150ead01d3470944 Mon Sep 17 00:00:00 2001 From: fayejf <36722593+fayejf@users.noreply.github.com> Date: Thu, 11 May 2023 14:30:19 -0700 Subject: [PATCH 095/512] bugfix (#6636) Signed-off-by: fayejf --- tools/asr_evaluator/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py index 8fd2ebb224c3..1702dc3caf53 100644 --- a/tools/asr_evaluator/utils.py +++ b/tools/asr_evaluator/utils.py @@ -119,7 +119,7 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig: # ctc model elif (cfg.pretrained_name and 'ctc' in cfg.pretrained_name.lower()) or ( - cfg.pretrained_name and 'ctc' in cfg.pretrained_name.lower() + cfg.model_path and 'ctc' in cfg.model_path.lower() ): if cfg.inference.decoder_type and cfg.inference.decoder_type != 'ctc': raise ValueError( From 3ddebd91ef2885c790d5b40a853772fc560f2fb4 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Thu, 11 May 2023 14:38:14 -0700 Subject: [PATCH 096/512] Disable interctc tests (#6638) Signed-off-by: Igor Gitman --- tests/collections/asr/test_asr_interctc_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py index c4022cdb49c7..ebed951d1d92 100644 --- a/tests/collections/asr/test_asr_interctc_models.py +++ b/tests/collections/asr/test_asr_interctc_models.py @@ -66,6 +66,7 @@ def squeezeformer_encoder_config() -> Dict: class TestInterCTCLoss: + @pytest.mark.pleasefixme @pytest.mark.unit @pytest.mark.parametrize( "model_class", [EncDecCTCModel, EncDecHybridRNNTCTCModel], From 1f08bd3f0600a061a62e111e3a077c753cdbc1cb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 May 2023 17:15:31 -0600 Subject: [PATCH 097/512] Add megatron_core to requirements (#6639) (#6640) * add megatron_core to requirements * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: ericharper Co-authored-by: Eric Harper Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Dockerfile | 5 ----- README.rst | 13 ++----------- requirements/requirements_nlp.txt | 1 + 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4cbbf14314c9..d27ed857a88a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,11 +43,6 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* WORKDIR /workspace/ -# Install Megatron-core -RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ - cd Megatron-LM && \ - git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 && \ - pip install -e . WORKDIR /tmp/ # TODO: Remove once this Apex commit (2/24/23) is included in PyTorch diff --git a/README.rst b/README.rst index 929cc7f86abc..da24655d008f 100644 --- a/README.rst +++ b/README.rst @@ -236,8 +236,8 @@ Note that RNNT requires numba to be installed from conda. NeMo Megatron ~~~~~~~~~~~~~ -NeMo Megatron training requires NVIDIA Apex and Megatron-core to be installed. -Install them manually if not using the NVIDIA PyTorch container. +NeMo Megatron training requires NVIDIA Apex to be installed. +Install it manually if not using the NVIDIA PyTorch container. To install Apex, run @@ -248,15 +248,6 @@ To install Apex, run git checkout 57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2 pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ -To install Megatron-core, run - -.. code-block:: bash - - git clone https://github.com/NVIDIA/Megatron-LM.git - cd Megatron-LM - git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 - pip install -e . - It is highly recommended to use the NVIDIA PyTorch or NeMo container if having issues installing Apex or any other dependencies. While installing Apex, it may raise an error if the CUDA version on your system does not match the CUDA version torch was compiled with. diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index 7283c3092000..0c3c42ba583f 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -11,6 +11,7 @@ ijson inflect jieba matplotlib>=3.3.2 +megatron_core==0.1.0 nltk>=3.6.5 numpy opencc From fbf5d425a3ca12741644a9cf59e17f2ca0d4c6de Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 May 2023 17:19:52 -0600 Subject: [PATCH 098/512] Remove from jenkins (#6642) * Remove from jenkins (#6641) * add megatron_core to requirements Signed-off-by: ericharper * remove from jenkins Signed-off-by: ericharper --------- Signed-off-by: ericharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove dup Signed-off-by: ericharper --------- Signed-off-by: ericharper Co-authored-by: Eric Harper Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Jenkinsfile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5b3f6d5d4186..deba8fcbfd70 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,16 +57,6 @@ pipeline { } } - // TODO: remove when pip package is available - stage('Megatron Core installation') { - steps { - sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ - cd Megatron-LM && \ - git checkout 9f8bdeb4814ed61fbc9c7d5b39c7710e77b99754 && \ - pip install -e .' - } - } - stage('PyTorch Lightning version') { steps { sh 'python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"' From cf4a8cff07d228e655890f6f741cfbea928e1e4b Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Thu, 11 May 2023 18:18:10 -0700 Subject: [PATCH 099/512] sft model can use this script for eval (#6637) * sft model can use this script for eval Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * please fix me Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor Signed-off-by: arendu --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../tuning/megatron_gpt_peft_eval.py | 23 +++++++++++++------ .../asr/test_asr_interctc_models.py | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py index a9f6a110c210..8cccaa024396 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -34,6 +34,7 @@ GradScaler, MegatronHalfPrecisionPlugin, NLPDDPStrategy, + NLPSaveRestoreConnector, PEFTSaveRestoreConnector, PipelineMixedPrecisionPlugin, ) @@ -70,7 +71,6 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f"\n{OmegaConf.to_yaml(cfg)}") assert cfg.model.restore_from_path is not None - assert cfg.model.peft.restore_from_path is not None megatron_amp_o2 = cfg.model.get("megatron_amp_O2", False) with_distributed_adam = False @@ -100,9 +100,14 @@ def main(cfg) -> None: plugins.append(TorchElasticEnvironment()) trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - peft_model_cfg = MegatronGPTPEFTModel.restore_from( - restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, - ) + if cfg.model.peft.restore_from_path: + peft_model_cfg = MegatronGPTPEFTModel.restore_from( + restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, + ) + else: + peft_model_cfg = MegatronGPTPEFTModel.restore_from( + restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True, + ) # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams with open_dict(peft_model_cfg): @@ -116,9 +121,13 @@ def main(cfg) -> None: cfg.inference.add_BOS = peft_model_cfg.data.test_ds.add_bos cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate - save_restore_connector = PEFTSaveRestoreConnector( - peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, - ) + if cfg.model.peft.restore_from_path: + save_restore_connector = PEFTSaveRestoreConnector( + peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, + ) + else: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(peft_model_cfg.restore_from_path): save_restore_connector.model_extracted_dir = cfg.model.restore_from_path # peft_cls = _get_peft_scheme(peft_model_cfg) diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py index ebed951d1d92..6225eecf9660 100644 --- a/tests/collections/asr/test_asr_interctc_models.py +++ b/tests/collections/asr/test_asr_interctc_models.py @@ -87,6 +87,7 @@ class TestInterCTCLoss: ([], [0.3]), ], ) + @pytest.mark.pleasefixme def test_forward(self, model_class, encoder_config, apply_at_layers, loss_weights): preprocessor_config = {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor'} vocabulary = [ From 8dbc5b37da020503c5b47b9e9ec6a0f23478ccb9 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Thu, 11 May 2023 21:10:57 -0700 Subject: [PATCH 100/512] [TTS] Fix TTS audio preprocessing bugs (#6628) Signed-off-by: Ryan --- .../tts/parts/preprocessing/features.py | 4 + .../tts/parts/utils/tts_dataset_utils.py | 3 + .../tts/audio_processing/preprocess_audio.py | 4 +- tests/collections/tts/data/test_data_utils.py | 76 ------------------- .../tts/parts/utils/test_tts_dataset_utils.py | 68 ++++++++++++++++- 5 files changed, 76 insertions(+), 79 deletions(-) delete mode 100644 tests/collections/tts/data/test_data_utils.py diff --git a/nemo/collections/tts/parts/preprocessing/features.py b/nemo/collections/tts/parts/preprocessing/features.py index 675d61adeebe..7d7150a7050f 100644 --- a/nemo/collections/tts/parts/preprocessing/features.py +++ b/nemo/collections/tts/parts/preprocessing/features.py @@ -131,10 +131,14 @@ def __init__( n_fft=win_length, lowfreq=lowfreq, highfreq=highfreq, + mag_power=1.0, log=log, log_zero_guard_type=log_zero_guard_type, log_zero_guard_value=log_zero_guard_value, mel_norm=mel_norm, + normalize=None, + preemph=None, + dither=0.0, ) def compute_mel_spec(self, manifest_entry: dict, audio_dir: Path) -> Tensor: diff --git a/nemo/collections/tts/parts/utils/tts_dataset_utils.py b/nemo/collections/tts/parts/utils/tts_dataset_utils.py index f07b2a9a5b74..06befcb6ec02 100644 --- a/nemo/collections/tts/parts/utils/tts_dataset_utils.py +++ b/nemo/collections/tts/parts/utils/tts_dataset_utils.py @@ -67,6 +67,9 @@ def normalize_volume(audio: np.array, volume_level: float) -> np.array: if not (0.0 <= volume_level <= 1.0): raise ValueError(f"Volume must be in range [0.0, 1.0], received {volume_level}") + if audio.size == 0: + return audio + max_sample = np.max(np.abs(audio)) if max_sample == 0: return audio diff --git a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py b/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py index b0a4be54da33..c1121dae7f71 100644 --- a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py +++ b/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py @@ -128,7 +128,7 @@ def _process_entry( if audio_trimmer is not None: audio, start_i, end_i = audio_trimmer.trim_audio(audio=audio, sample_rate=sample_rate, audio_id=audio_path) - if output_sample_rate is not None: + if output_sample_rate: audio = librosa.resample(y=audio, orig_sr=sample_rate, target_sr=output_sample_rate) sample_rate = output_sample_rate @@ -140,7 +140,7 @@ def _process_entry( original_duration = librosa.get_duration(filename=audio_path) output_duration = librosa.get_duration(filename=output_path) - entry["duration"] = output_duration + entry["duration"] = round(output_duration, 2) if os.path.isabs(audio_filepath): entry["audio_filepath"] = output_path diff --git a/tests/collections/tts/data/test_data_utils.py b/tests/collections/tts/data/test_data_utils.py deleted file mode 100644 index 0ce77a35945f..000000000000 --- a/tests/collections/tts/data/test_data_utils.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pytest - -from nemo.collections.tts.parts.utils.tts_dataset_utils import normalize_volume - - -class TestDataUtils: - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume(self): - input_audio = np.array([0.0, 0.1, 0.3, 0.5]) - expected_output = np.array([0.0, 0.18, 0.54, 0.9]) - - output_audio = normalize_volume(audio=input_audio, volume_level=0.9) - - np.testing.assert_array_almost_equal(output_audio, expected_output) - - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume_negative_peak(self): - input_audio = np.array([0.0, 0.1, -0.3, -1.0, 0.5]) - expected_output = np.array([0.0, 0.05, -0.15, -0.5, 0.25]) - - output_audio = normalize_volume(audio=input_audio, volume_level=0.5) - - np.testing.assert_array_almost_equal(output_audio, expected_output) - - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume_zero(self): - input_audio = np.array([0.0, 0.1, 0.3, 0.5]) - expected_output = np.array([0.0, 0.0, 0.0, 0.0]) - - output_audio = normalize_volume(audio=input_audio, volume_level=0.0) - - np.testing.assert_array_almost_equal(output_audio, expected_output) - - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume_max(self): - input_audio = np.array([0.0, 0.1, 0.3, 0.5]) - expected_output = np.array([0.0, 0.2, 0.6, 1.0]) - - output_audio = normalize_volume(audio=input_audio, volume_level=1.0) - - np.testing.assert_array_almost_equal(output_audio, expected_output) - - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume_zeros(self): - input_audio = np.array([0.0, 0.0, 0.0]) - - output_audio = normalize_volume(audio=input_audio, volume_level=0.5) - - np.testing.assert_array_almost_equal(input_audio, input_audio) - - @pytest.mark.run_only_on('CPU') - @pytest.mark.unit - def test_normalize_volume_out_of_range(self): - input_audio = np.array([0.0, 0.1, 0.3, 0.5]) - with pytest.raises(ValueError, match="Volume must be in range"): - normalize_volume(audio=input_audio, volume_level=2.0) diff --git a/tests/collections/tts/parts/utils/test_tts_dataset_utils.py b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py index 180c3ca9f7fc..dadb1844eca6 100644 --- a/tests/collections/tts/parts/utils/test_tts_dataset_utils.py +++ b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py @@ -14,9 +14,10 @@ from pathlib import Path +import numpy as np import pytest -from nemo.collections.tts.parts.utils.tts_dataset_utils import get_abs_rel_paths, get_audio_filepaths +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_abs_rel_paths, get_audio_filepaths, normalize_volume class TestTTSDatasetUtils: @@ -53,3 +54,68 @@ def test_get_audio_paths(self): assert abs_path == Path("/home/audio/examples/example.wav") assert rel_path == audio_rel_path + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume(self): + input_audio = np.array([0.0, 0.1, 0.3, 0.5]) + expected_output = np.array([0.0, 0.18, 0.54, 0.9]) + + output_audio = normalize_volume(audio=input_audio, volume_level=0.9) + + np.testing.assert_array_almost_equal(output_audio, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_negative_peak(self): + input_audio = np.array([0.0, 0.1, -0.3, -1.0, 0.5]) + expected_output = np.array([0.0, 0.05, -0.15, -0.5, 0.25]) + + output_audio = normalize_volume(audio=input_audio, volume_level=0.5) + + np.testing.assert_array_almost_equal(output_audio, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_zero(self): + input_audio = np.array([0.0, 0.1, 0.3, 0.5]) + expected_output = np.array([0.0, 0.0, 0.0, 0.0]) + + output_audio = normalize_volume(audio=input_audio, volume_level=0.0) + + np.testing.assert_array_almost_equal(output_audio, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_max(self): + input_audio = np.array([0.0, 0.1, 0.3, 0.5]) + expected_output = np.array([0.0, 0.2, 0.6, 1.0]) + + output_audio = normalize_volume(audio=input_audio, volume_level=1.0) + + np.testing.assert_array_almost_equal(output_audio, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_zeros(self): + input_audio = np.array([0.0, 0.0, 0.0]) + + output_audio = normalize_volume(audio=input_audio, volume_level=0.5) + + np.testing.assert_array_almost_equal(output_audio, input_audio) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_empty(self): + input_audio = np.array([]) + + output_audio = normalize_volume(audio=input_audio, volume_level=1.0) + + np.testing.assert_array_almost_equal(output_audio, input_audio) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_normalize_volume_out_of_range(self): + input_audio = np.array([0.0, 0.1, 0.3, 0.5]) + with pytest.raises(ValueError, match="Volume must be in range"): + normalize_volume(audio=input_audio, volume_level=2.0) From e6efafc038eebc30bf8aedde7e83270acac7d175 Mon Sep 17 00:00:00 2001 From: Vladimir Bataev Date: Fri, 12 May 2023 23:21:55 +0400 Subject: [PATCH 101/512] Move black parameters to pyproject.toml (#6647) Signed-off-by: Vladimir Bataev --- .pre-commit-config.yaml | 1 - pyproject.toml | 6 ++++++ setup.py | 5 ++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd89d3983cc5..75d1a6c51a1e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,5 +43,4 @@ repos: hooks: - id: black name: Format code - args: [--skip-string-normalization, --line-length=119] additional_dependencies: ['click==8.0.2'] diff --git a/pyproject.toml b/pyproject.toml index 32490f886e1a..f2e74dab4eb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,12 @@ default_section = "THIRDPARTY" extend_skip = ["setup.py", "docs/source/conf.py"] +[tool.black] +line_length = 119 +skip_string_normalization = true +required_version = "19.10b0" # recongized by future versions, disallows to reformat code with incompatible versions + + [tool.pytest.ini_options] # durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one. # -vv will also display tests with durration = 0.00s diff --git a/setup.py b/setup.py index 315fed8b0caf..c58fa794de03 100644 --- a/setup.py +++ b/setup.py @@ -111,9 +111,8 @@ def req_file(filename, folder="requirements"): class StyleCommand(distutils_cmd.Command): - __LINE_WIDTH = 119 - __ISORT_BASE = 'isort ' - __BLACK_BASE = f'black --skip-string-normalization --line-length={__LINE_WIDTH}' + __ISORT_BASE = 'isort' + __BLACK_BASE = 'black' description = 'Checks overall project code style.' user_options = [ ('scope=', None, 'Folder of file to operate within.'), From 83b77e65e9b4af25b3e27360e819822d9fb61d13 Mon Sep 17 00:00:00 2001 From: Vladimir Bataev Date: Fri, 12 May 2023 23:52:33 +0400 Subject: [PATCH 102/512] ASR-TTS Models: Support hybrid RNNT-CTC, improve docs. (#6620) * ASR-TTS: support hybrid RNNT-CTC models * Do not warn on optional import * Explain adding options to config * Fix import guard docs * Add docs for ConcatDataset * Add explanation for sampling parameters * Initial docs for the enhancer model * Fix use_start_end_token parameter usage --------- Signed-off-by: Vladimir Bataev --- docs/source/asr/configs.rst | 11 ++++++----- docs/source/asr/models.rst | 2 +- docs/source/common/data.rst | 13 +++++++++++++ docs/source/common/intro.rst | 1 + docs/source/tts/api.rst | 5 +++++ docs/source/tts/models.rst | 13 ++++++++++++- .../asr_with_tts/speech_to_text_bpe_with_text.py | 2 +- examples/asr/conf/asr_tts/hybrid_asr_tts.yaml | 2 +- nemo/collections/asr/data/text_to_text.py | 3 +-- .../collections/asr/models/hybrid_asr_tts_models.py | 12 +++++++++--- 10 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 docs/source/common/data.rst diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst index fc48bc06b3ca..120969ee9dfa 100644 --- a/docs/source/asr/configs.rst +++ b/docs/source/asr/configs.rst @@ -885,9 +885,9 @@ Hybrid ASR-TTS Model Configuration :ref:`Hybrid ASR-TTS model ` consists of three parts: -* ASR model (``EncDecCTCModelBPE`` or ``EncDecRNNTBPEModel``) +* ASR model (``EncDecCTCModelBPE``, ``EncDecRNNTBPEModel`` or ``EncDecHybridRNNTCTCBPEModel``) * TTS Mel Spectrogram Generator (currently, only :ref:`FastPitch ` model is supported) -* Enhancer model (optional) +* :ref:`Enhancer model ` (optional) Also, the config allows to specify :ref:`text-only dataset `. @@ -895,7 +895,7 @@ Main parts of the config: * ASR model * ``asr_model_path``: path to the ASR model checkpoint (`.nemo`) file, loaded only once, then the config of the ASR model is stored in the ``asr_model`` field - * ``asr_model_type``: needed only when training from scratch, ``rnnt_bpe`` corresponds to ``EncDecRNNTBPEModel``, ``ctc_bpe`` to ``EncDecCTCModelBPE`` + * ``asr_model_type``: needed only when training from scratch. ``rnnt_bpe`` corresponds to ``EncDecRNNTBPEModel``, ``ctc_bpe`` to ``EncDecCTCModelBPE``, ``hybrid_rnnt_ctc_bpe`` to ``EncDecHybridRNNTCTCBPEModel`` * ``asr_model_fuse_bn``: fusing BatchNorm in the pretrained ASR model, can improve quality in finetuning scenario * TTS model * ``tts_model_path``: path to the pretrained TTS model checkpoint (`.nemo`) file, loaded only once, then the config of the model is stored in the ``tts_model`` field @@ -907,7 +907,7 @@ Main parts of the config: * ``speakers_filepath``: path (or paths) to the text file containing speaker ids for the multi-speaker TTS model (speakers are sampled randomly during training) * ``min_words`` and ``max_words``: parameters to filter text-only manifests by the number of words * ``tokenizer_workers``: number of workers for initial tokenization (when loading the data). ``num_CPUs / num_GPUs`` is a recommended value. - * ``asr_tts_sampling_technique``, ``asr_tts_sampling_temperature``, ``asr_tts_sampling_probabilities``: sampling parameters for text-only and audio-text data (if both specified). See parameters for ``nemo.collections.common.data.ConcatDataset`` + * ``asr_tts_sampling_technique``, ``asr_tts_sampling_temperature``, ``asr_tts_sampling_probabilities``: sampling parameters for text-only and audio-text data (if both specified). Correspond to ``sampling_technique``, ``sampling_temperature``, and ``sampling_probabilities`` parameters of the :mod:`ConcatDataset `. * all other components are similar to conventional ASR models * ``validation_ds`` and ``test_ds`` correspond to the underlying ASR model @@ -920,7 +920,7 @@ Main parts of the config: # asr model asr_model_path: ??? asr_model: null - asr_model_type: null # rnnt_bpe or ctc_bpe, needed only if instantiating from config, otherwise type is auto inferred + asr_model_type: null # rnnt_bpe, ctc_bpe or hybrid_rnnt_ctc_bpe; needed only if instantiating from config, otherwise type is auto inferred asr_model_fuse_bn: false # only ConformerEncoder supported now, use false for other models # tts model @@ -972,6 +972,7 @@ Training from Scratch To train ASR model from scratch using text-only data use ``/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py`` script with conventional ASR model config, e.g. ``/examples/asr/conf/conformer/conformer_ctc_bpe.yaml`` or ``/examples/asr/conf/conformer/conformer_transducer_bpe.yaml`` Please specify the ASR model type, paths to the TTS model, and (optional) enhancer, along with text-only data-related fields. +Use ``++`` or ``+`` markers for these options, since the options are not present in the original ASR model config. .. code-block:: shell diff --git a/docs/source/asr/models.rst b/docs/source/asr/models.rst index 2323e1636fcc..80a0fd90f0fb 100644 --- a/docs/source/asr/models.rst +++ b/docs/source/asr/models.rst @@ -330,7 +330,7 @@ The model consists of three models: * ASR model (``EncDecCTCModelBPE`` or ``EncDecRNNTBPEModel``) * Frozen TTS Mel Spectrogram Generator (currently, only :ref:`FastPitch ` model is supported) -* Optional frozen Enhancer model trained to mitigate mismatch between real and generated mel spectrogram +* Optional frozen :ref:`Spectrogram Enhancer model ` model trained to mitigate mismatch between real and generated mel spectrogram .. image:: images/hybrid_asr_tts_model.png :align: center diff --git a/docs/source/common/data.rst b/docs/source/common/data.rst new file mode 100644 index 000000000000..4c2f38cbba83 --- /dev/null +++ b/docs/source/common/data.rst @@ -0,0 +1,13 @@ +Data +---- + +.. autoclass:: nemo.collections.common.data.dataset.ConcatDataset + :show-inheritance: + :members: + :undoc-members: + + +.. autoclass:: nemo.collections.common.data.dataset.ConcatMapDataset + :show-inheritance: + :members: + :undoc-members: diff --git a/docs/source/common/intro.rst b/docs/source/common/intro.rst index dbe8d5d17930..fadbd9528485 100644 --- a/docs/source/common/intro.rst +++ b/docs/source/common/intro.rst @@ -10,3 +10,4 @@ The common collection contains things that could be used across all collections. losses metrics tokenizers + data diff --git a/docs/source/tts/api.rst b/docs/source/tts/api.rst index 2b706132fc0d..e291a995d3cb 100644 --- a/docs/source/tts/api.rst +++ b/docs/source/tts/api.rst @@ -25,6 +25,11 @@ Mel-Spectrogram Generators :members: :exclude-members: setup_training_data, setup_validation_data, training_step, validation_epoch_end, validation_step, setup_test_data, on_train_epoch_start +.. autoclass:: nemo.collections.tts.models.SpectrogramEnhancerModel + :show-inheritance: + :members: + :exclude-members: setup_training_data, setup_validation_data, training_step, validation_epoch_end, validation_step, setup_test_data, on_train_epoch_start + Speech-to-Text Aligner Models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/tts/models.rst b/docs/source/tts/models.rst index 8b283529a706..fedfd157c307 100644 --- a/docs/source/tts/models.rst +++ b/docs/source/tts/models.rst @@ -112,7 +112,7 @@ Speech-to-text alignment is a critical component of neural TTS models. Autoregre End2End Models --------- +-------------- VITS ~~~~~~~~~~~~~~~ @@ -123,6 +123,17 @@ VITS is an end-to-end speech synthesis model, which generates raw waveform audio :alt: vits model :scale: 25% + +Enhancers +--------- + +.. _SpectrogramEnhancer_model: + +Spectrogram Enhancer +~~~~~~~~~~~~~~~~~~~~ +GAN-based model to add details to blurry spectrograms from TTS models like Tacotron or FastPitch. + + References ---------- diff --git a/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py b/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py index 386a567cf2dc..946202364c53 100644 --- a/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py +++ b/examples/asr/asr_with_tts/speech_to_text_bpe_with_text.py @@ -19,7 +19,7 @@ ```shell python speech_to_text_bpe_with_text.py \ # (Optional: --config-path= --config-name=) \ - ++asr_model_type= \ + ++asr_model_type= \ ++tts_model_path= \ ++enhancer_model_path= \ model.tokenizer.dir= \ diff --git a/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml b/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml index e933fc59b40f..bdd483215632 100644 --- a/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml +++ b/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml @@ -8,7 +8,7 @@ model: # asr model asr_model_path: ??? asr_model: null - asr_model_type: null # rnnt_bpe or ctc_bpe, needed only if instantiating from config, otherwise type is auto inferred + asr_model_type: null # rnnt_bpe, ctc_bpe or hybrid_rnnt_ctc_bpe; needed only if instantiating from config, otherwise type is auto inferred asr_model_fuse_bn: false # only ConformerEncoder supported now, use false for other models # tts model diff --git a/nemo/collections/asr/data/text_to_text.py b/nemo/collections/asr/data/text_to_text.py index 23ccd3d7a2ef..88b417ea21bc 100644 --- a/nemo/collections/asr/data/text_to_text.py +++ b/nemo/collections/asr/data/text_to_text.py @@ -37,8 +37,7 @@ try: from nemo_text_processing.text_normalization.normalize import Normalizer except Exception as e: - logging.warning(e) - logging.warning("nemo_text_processing is not installed") + pass # Normalizer imported only for annotation purposes, error can be ignored AnyPath = Union[Path, str] diff --git a/nemo/collections/asr/models/hybrid_asr_tts_models.py b/nemo/collections/asr/models/hybrid_asr_tts_models.py index 1f15e49e0b0d..8486f956c3b7 100644 --- a/nemo/collections/asr/models/hybrid_asr_tts_models.py +++ b/nemo/collections/asr/models/hybrid_asr_tts_models.py @@ -33,6 +33,7 @@ ) from nemo.collections.asr.models.asr_model import ASRModel from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE +from nemo.collections.asr.models.hybrid_rnnt_ctc_bpe_models import EncDecHybridRNNTCTCBPEModel from nemo.collections.asr.models.rnnt_bpe_models import EncDecRNNTBPEModel from nemo.collections.asr.modules.conformer_encoder import ConformerEncoder from nemo.collections.asr.parts.preprocessing.features import clean_spectrogram_batch, normalize_batch @@ -89,7 +90,7 @@ class ASRWithTTSModel(ASRModel): Text-only data can be mixed with audio-text pairs """ - asr_model: Union[EncDecRNNTBPEModel, EncDecCTCModelBPE] + asr_model: Union[EncDecRNNTBPEModel, EncDecCTCModelBPE, EncDecHybridRNNTCTCBPEModel] tts_model: FastPitchModel enhancer_model: Optional[SpectrogramEnhancerModel] @@ -100,6 +101,7 @@ class ASRModelTypes(PrettyStrEnum): RNNT_BPE = "rnnt_bpe" CTC_BPE = "ctc_bpe" + HYBRID_RNNT_CTC_BPE = "hybrid_rnnt_ctc_bpe" @classmethod def from_asr_model(cls, model: Any): @@ -107,6 +109,8 @@ def from_asr_model(cls, model: Any): return cls.RNNT_BPE if isinstance(model, EncDecCTCModelBPE): return cls.CTC_BPE + if isinstance(model, EncDecHybridRNNTCTCBPEModel): + return cls.HYBRID_RNNT_CTC_BPE raise ValueError(f"Unsupported model type: {type(model)}") def get_asr_cls(self): @@ -114,6 +118,8 @@ def get_asr_cls(self): return EncDecRNNTBPEModel if self == self.CTC_BPE: return EncDecCTCModelBPE + if self == self.HYBRID_RNNT_CTC_BPE: + return EncDecHybridRNNTCTCBPEModel raise NotImplementedError(f"Not implemented for value {self.value}") @classmethod @@ -540,7 +546,7 @@ def _setup_text_dataset_from_config( manifest_filepath=text_data_config.manifest_filepath, speakers_filepath=text_data_config.speakers_filepath, asr_tokenizer=self.asr_model.tokenizer, - asr_use_start_end_token=train_data_config.use_start_end_token, + asr_use_start_end_token=train_data_config.get("use_start_end_token", False), tts_parser=self.tts_model.parser, tts_text_pad_id=self.tts_model.vocab.pad, tts_text_normalizer=self.tts_model.normalizer, @@ -556,7 +562,7 @@ def _setup_text_dataset_from_config( manifest_filepath=text_data_config.manifest_filepath, speakers_filepath=text_data_config.speakers_filepath, asr_tokenizer=self.asr_model.tokenizer, - asr_use_start_end_token=train_data_config.use_start_end_token, + asr_use_start_end_token=train_data_config.get("use_start_end_token", False), tts_parser=self.tts_model.parser, tts_text_pad_id=self.tts_model.vocab.pad, tts_text_normalizer=self.tts_model.normalizer, From cd15d978bdc425bf0e7b68bf1b1d22168f52416d Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Fri, 12 May 2023 19:50:57 -0700 Subject: [PATCH 103/512] fix conversion and eval (#6648) * fix conversion and eval Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../megatron_ckpt_to_nemo.py | 17 +++- .../tuning/megatron_gpt_peft_eval.py | 84 +++++++++++-------- 2 files changed, 66 insertions(+), 35 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py index 5ec767c34a10..e2fd1d4bbcd1 100644 --- a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py +++ b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py @@ -29,12 +29,14 @@ import torch from megatron.core import parallel_state +from omegaconf import open_dict from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.models.language_modeling.megatron_bart_model import MegatronBARTModel from nemo.collections.nlp.models.language_modeling.megatron_bert_model import MegatronBertModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel from nemo.collections.nlp.models.language_modeling.megatron_retrieval_model import MegatronRetrievalModel from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model from nemo.collections.nlp.models.machine_translation.megatron_nmt_model import MegatronNMTModel @@ -80,7 +82,11 @@ def get_args(): help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", ) parser.add_argument( - "--model_type", type=str, required=True, default="gpt", choices=["gpt", "t5", "bert", "nmt", "bart", "retro"] + "--model_type", + type=str, + required=True, + default="gpt", + choices=["gpt", "sft", "t5", "bert", "nmt", "bart", "retro"], ) parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") @@ -138,6 +144,15 @@ def convert(local_rank, rank, world_size, args): if args.model_type == 'gpt': model = MegatronGPTModel.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) + elif args.model_type == 'sft': + model = MegatronGPTSFTModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + # we force the target for the loaded model to have the correct target + # because the hparams.yaml sometimes contains MegatronGPTModel as the target. + with open_dict(model.cfg): + model.cfg.target = f"{MegatronGPTSFTModel.__module__}.{MegatronGPTSFTModel.__name__}" + elif args.model_type == 'bert': model = MegatronBertModel.load_from_checkpoint( checkpoint_path, hparams_file=args.hparams_file, trainer=trainer diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py index 8cccaa024396..338b66a80cfa 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -13,6 +13,7 @@ # limitations under the License. +import json import os import torch.multiprocessing as mp @@ -21,14 +22,9 @@ from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch.utils.data import DataLoader -from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import ( - MegatronGPTAdapterModel, - MegatronGPTAdapterPTuningModel, - MegatronGPTIA3Model, - MegatronGPTLoRAModel, - MegatronGPTPEFTModel, - MegatronGPTPTuningModel, -) +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel from nemo.collections.nlp.models.nlp_model import NLPModel from nemo.collections.nlp.parts.nlp_overrides import ( GradScaler, @@ -42,27 +38,35 @@ from nemo.utils import logging mp.set_start_method("spawn", force=True) - """ -This is the script to train an Adapter infused GPT Model for text generation. -A base GPT Model is required as a starting point. This script will then insert -Adapters into each Transformer layer and will train/update only these adapters -during training. The base GPT Model weights will remain frozen. - -During training this script will only save the newly trained Adapter weights -in checkpoints. At the end of training a .nemo file of Adapter weights will -be saved. - -Usage: - Assuming the base model is a 125m GPT Model, with TP=1, PP=1: - a. run a training run for a base gpt nemo file: - python megatron_gpt_adapter_tuning.py \ - "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", - "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", - model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" - name="NAME OF TRAINING RUN" - exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", - trainer.max_epochs=2 +This is the script to run inference with a PEFT model or an SFT Model. + +If you want to evaluate an SFT .nemo file: + +python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \ + model.restore_from_path= \ + model.peft.restore_from_path=null \ + trainer.devices=1 model.data.test_ds.file_names=\[, ] \ + model.data.test_ds.names=\['name_for_test_file1', 'name_for_test_file2'] \ # this is not the filename just some identifier + model.data.test_ds.global_batch_size=4 \ # or some other value + model.data.test_ds.micro_batch_size=4 \ + model.data.test_ds.tokens_to_generate=30 \ + inference.greedy=True \ + inference.outfile_path=\'' + +If you want to evaluate a PEFT Model, you should provide a base GPT model and a PEFT model .nemo file + +python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \ + model.restore_from_path= \ + model.peft.restore_from_path= \ # this will be created if you use `megatron_gpt_peft_tuning.py` + trainer.devices=1 model.data.test_ds.file_names=\[, ] \ + model.data.test_ds.names=\['name_for_test_file1', 'name_for_test_file2'] \ # this is not the filename just some identifier + model.data.test_ds.global_batch_size=4 \ # or some other value + model.data.test_ds.micro_batch_size=4 \ + model.data.test_ds.tokens_to_generate=30 \ + inference.greedy=True \ + inference.outfile_path=\'' + """ @@ -105,7 +109,7 @@ def main(cfg) -> None: restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, ) else: - peft_model_cfg = MegatronGPTPEFTModel.restore_from( + peft_model_cfg = MegatronGPTSFTModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True, ) @@ -114,6 +118,8 @@ def main(cfg) -> None: # update the model config of the trained model with params we want to set at inference time. peft_model_cfg.precision = cfg.trainer.precision peft_model_cfg.data.test_ds = cfg.model.data.test_ds + peft_model_cfg.activations_checkpoint_granularity = None + peft_model_cfg.activations_checkpoint_method = None with open_dict(cfg): # update the config with the trained model config @@ -128,9 +134,8 @@ def main(cfg) -> None: else: save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(peft_model_cfg.restore_from_path): + if os.path.isdir(cfg.model.restore_from_path): save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - # peft_cls = _get_peft_scheme(peft_model_cfg) model = NLPModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, @@ -148,14 +153,25 @@ def main(cfg) -> None: config = OmegaConf.to_container(cfg.inference, resolve=True) model.set_inference_config(config) response = trainer.predict(model, request_dl) + if model.global_rank == 0: print("***************************") if cfg.inference.outfile_path is not None: with open(cfg.inference.outfile_path, "w", encoding="utf-8") as f: for batch in response: - for sentence in batch["sentences"]: - s = " ".join(sentence.split("\n")) - f.write(s + "\n") + batch_sentences = [s for s in batch['sentences']] + batch_tokens = [s for s in batch['tokens']] + batch_logprob = [s.tolist() for s in batch['logprob']] + for s, t, l in zip(batch_sentences, batch_tokens, batch_logprob): + if cfg.inference.get("verbose", False): + d = { + 'sentence': s, + 'tokens_with_logprobs': ', '.join([f"{_t} {_l:.4f}" for _t, _l in zip(t, l)]), + } + f.write(json.dumps(d, sort_keys=True, indent=2) + '\n') + else: + d = {'sentence': s} + f.write(json.dumps(d) + '\n') print("predictions saved to {}".format(cfg.inference.outfile_path)) else: print(response) From 33cada8f75d2103bd7d64816bef84054924c36c5 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Mon, 15 May 2023 13:05:16 -0700 Subject: [PATCH 104/512] Confidence ensembles implementation (#6614) * Working version to train conf model + save ensemble class Signed-off-by: Igor Gitman * Working version Signed-off-by: Igor Gitman * Remove copy of transcribe_speech.py Signed-off-by: Igor Gitman * Move models parameter to config Signed-off-by: Igor Gitman * Add explicit parameters to transcribe Signed-off-by: Igor Gitman * Small cleanups Signed-off-by: Igor Gitman * Add temperature and integration tests Signed-off-by: Igor Gitman * Add more tests Signed-off-by: Igor Gitman * Add pc removal config Signed-off-by: Igor Gitman * Cleanup Signed-off-by: Igor Gitman * Fix typo Signed-off-by: Igor Gitman * Address review comments Signed-off-by: Igor Gitman --------- Signed-off-by: Igor Gitman --- examples/asr/transcribe_speech.py | 14 +- nemo/collections/asr/metrics/rnnt_wer.py | 3 + nemo/collections/asr/metrics/rnnt_wer_bpe.py | 2 +- nemo/collections/asr/metrics/wer.py | 11 +- .../asr/models/confidence_ensemble.py | 203 ++++++++++++++ nemo/collections/asr/models/ctc_bpe_models.py | 2 + nemo/collections/asr/models/ctc_models.py | 2 + .../asr/models/hybrid_rnnt_ctc_bpe_models.py | 4 + .../asr/models/hybrid_rnnt_ctc_models.py | 2 + .../collections/asr/models/rnnt_bpe_models.py | 2 + nemo/collections/asr/models/rnnt_models.py | 2 + nemo/collections/asr/modules/conv_asr.py | 7 + nemo/collections/asr/modules/rnnt.py | 13 +- .../asr/parts/utils/asr_confidence_utils.py | 4 +- nemo/core/classes/modelPT.py | 1 - .../confidence_ensembles/build_ensemble.py | 251 ++++++++++++++++++ .../confidence_ensembles/ensemble_config.yaml | 23 ++ .../test_confidence_ensembles.py | 100 +++++++ 18 files changed, 633 insertions(+), 13 deletions(-) create mode 100644 nemo/collections/asr/models/confidence_ensemble.py create mode 100644 scripts/confidence_ensembles/build_ensemble.py create mode 100644 scripts/confidence_ensembles/ensemble_config.yaml create mode 100644 scripts/confidence_ensembles/test_confidence_ensembles.py diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 8c8d11132183..1c1d5c08199c 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -15,7 +15,7 @@ import contextlib import os from dataclasses import dataclass, is_dataclass -from typing import Optional, Union +from typing import List, Optional, Union import pytorch_lightning as pl import torch @@ -163,9 +163,14 @@ class TranscriptionConfig: langid: str = "en" # specify this for convert_num_to_words step in groundtruth cleaning use_cer: bool = False + # can be set to True to return list of transcriptions instead of the config + # if True, will also skip writing anything to the output file + return_transcriptions: bool = False + @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig) -def main(cfg: TranscriptionConfig) -> TranscriptionConfig: +# just specifying List in the return type as otherwise it's too many things +def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List]: logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') for key in cfg: @@ -299,7 +304,7 @@ def autocast(): cfg = compute_output_filename(cfg, model_name) # if transcripts should not be overwritten, and already exists, skip re-transcription step and return - if not cfg.overwrite_transcripts and os.path.exists(cfg.output_filename): + if not cfg.return_transcriptions and not cfg.overwrite_transcripts and os.path.exists(cfg.output_filename): logging.info( f"Previous transcripts found at {cfg.output_filename}, and flag `overwrite_transcripts`" f"is {cfg.overwrite_transcripts}. Returning without re-transcribing text." @@ -349,6 +354,9 @@ def autocast(): if type(transcriptions) == tuple and len(transcriptions) == 2: transcriptions = transcriptions[0] + if cfg.return_transcriptions: + return transcriptions + # write audio transcriptions output_filename, pred_text_attr_name = write_transcription( transcriptions, diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index 0634a45f6a23..1ccc2d0ac6fc 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -1268,3 +1268,6 @@ class RNNTDecodingConfig: # beam decoding config beam: beam_decode.BeamRNNTInferConfig = beam_decode.BeamRNNTInferConfig(beam_size=4) + + # can be used to change temperature for decoding + temperature: float = 1.0 diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py index c59b65552842..99c71daebaa9 100644 --- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py +++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py @@ -62,7 +62,7 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): The timestamps will be available in the returned Hypothesis.timestep as a dictionary. compute_langs: a bool flag, which allows to compute language id (LID) information per token, - word, and the entire sample (most likely language id). The LIDS will be available + word, and the entire sample (most likely language id). The LIDS will be available in the returned Hypothesis object as a dictionary rnnt_timestamp_type: A str value, which represents the types of timestamps that should be calculated. diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index d9b745cbc940..7f7f853d307d 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -75,8 +75,8 @@ def word_error_rate_detail( ) -> Tuple[float, int, float, float, float]: """ Computes Average Word Error Rate with details (insertion rate, deletion rate, substitution rate) - between two texts represented as corresponding lists of string. - + between two texts represented as corresponding lists of string. + Hypotheses and references must have same length. Args: hypotheses (list): list of hypotheses @@ -88,7 +88,7 @@ def word_error_rate_detail( ins_rate (float): average insertion error rate del_rate (float): average deletion error rate sub_rate (float): average substitution error rate - + """ scores = 0 words = 0 @@ -1222,5 +1222,8 @@ class CTCDecodingConfig: # beam decoding config beam: ctc_beam_decoding.BeamCTCInferConfig = ctc_beam_decoding.BeamCTCInferConfig(beam_size=4) - # confidence config + # confidence config confidence_cfg: ConfidenceConfig = ConfidenceConfig() + + # can be used to change temperature for decoding + temperature: float = 1.0 diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py new file mode 100644 index 000000000000..34fe037e30b5 --- /dev/null +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -0,0 +1,203 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Optional, Union + +import joblib +import numpy as np +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer + +from nemo.collections.asr.models.asr_model import ASRModel +from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, get_confidence_aggregation_bank +from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.core.classes import ModelPT +from nemo.utils import model_utils + +__all__ = ['ConfidenceEnsembleModel'] + + +class ConfidenceEnsembleModel(ModelPT): + def __init__( + self, cfg: DictConfig, trainer: 'Trainer' = None, + ): + super().__init__(cfg=cfg, trainer=trainer) + + # either we load all models from ``load_models`` cfg parameter + # or all of them are specified in the config as modelX alongside the num_models key + # + # ideally, we'd like to directly store all models in a list, but that + # is not currently supported by the submodule logic + # so to access all the models, we do something like + # + # for model_idx in range(self.num_models): + # model = getattr(self, f"model{model_idx}") + + if 'num_models' in self.cfg: + self.num_models = self.cfg.num_models + for idx in range(self.num_models): + cfg_field = f"model{idx}" + model_cfg = self.cfg[cfg_field] + model_class = model_utils.import_class_by_path(model_cfg['target']) + self.register_nemo_submodule( + name=cfg_field, config_field=cfg_field, model=model_class(model_cfg, trainer=trainer), + ) + else: + self.num_models = len(cfg.load_models) + with open_dict(self.cfg): + self.cfg.num_models = self.num_models + for idx, model in enumerate(cfg.load_models): + cfg_field = f"model{idx}" + if model.endswith(".nemo"): + self.register_nemo_submodule( + name=cfg_field, + config_field=cfg_field, + model=ASRModel.restore_from(model, trainer=trainer, map_location="cpu"), + ) + else: + self.register_nemo_submodule( + cfg_field, config_field=cfg_field, model=ASRModel.from_pretrained(model, map_location="cpu"), + ) + + # registering model selection block - this is expected to be a joblib-saved + # pretrained sklearn pipeline containing standardization + logistic regression + # trained to predict "most-confident" model index from the confidence scores of all models + model_selection_block_path = self.register_artifact("model_selection_block", cfg.model_selection_block) + self.model_selection_block = joblib.load(model_selection_block_path) + self.confidence_cfg = ConfidenceConfig(**self.cfg.confidence) + + # making sure each model has correct confidence settings in the decoder strategy + for model_idx in range(self.num_models): + model = getattr(self, f"model{model_idx}") + # for now we assume users are direclty responsible for matching + # decoder type when building ensemlbe with inference type + # TODO: add automatic checks for errors + if isinstance(model, EncDecHybridRNNTCTCModel): + self.update_decoding_parameters(model.cfg.decoding) + model.change_decoding_strategy(model.cfg.decoding, decoder_type="rnnt") + self.update_decoding_parameters(model.cfg.aux_ctc.decoding) + model.change_decoding_strategy(model.cfg.aux_ctc.decoding, decoder_type="ctc") + else: + self.update_decoding_parameters(model.cfg.decoding) + model.change_decoding_strategy(model.cfg.decoding) + + def update_decoding_parameters(self, decoding_cfg): + """Updating confidence/temperature parameters of the config.""" + with open_dict(decoding_cfg): + decoding_cfg.confidence_cfg = self.confidence_cfg + decoding_cfg.temperature = self.cfg.temperature + + def setup_training_data(self, train_data_config: Union[DictConfig, Dict]): + """Pass-through to the ensemble models. + + Note that training is not actually supported for this class! + """ + for model_idx in range(self.num_models): + getattr(self, f"model{model_idx}").setup_training_data(train_data_config) + + def setup_validation_data(self, val_data_config: Union[DictConfig, Dict]): + """Pass-through to the ensemble models.""" + for model_idx in range(self.num_models): + getattr(self, f"model{model_idx}").setup_validation_data(val_data_config) + + def change_attention_model( + self, self_attention_model: str = None, att_context_size: List[int] = None, update_config: bool = True + ): + """Pass-through to the ensemble models.""" + for model_idx in range(self.num_models): + getattr(self, f"model{model_idx}").change_attention_model( + self_attention_model, att_context_size, update_config + ) + + def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type: str = None): + """Pass-through to the ensemble models. + + The only change here is that we always require frame-confidence to + be returned. + """ + decoding_cfg.confidence_cfg = self.confidence_cfg + for model_idx in range(self.num_models): + model = getattr(self, f"model{model_idx}") + if isinstance(model, EncDecHybridRNNTCTCModel): + model.change_decoding_strategy(decoding_cfg, decoder_type=decoder_type) + else: + model.change_decoding_strategy(decoding_cfg) + + @torch.no_grad() + def transcribe( + self, + paths2audio_files: List[str], + batch_size: int = 4, + return_hypotheses: bool = False, + num_workers: int = 0, + channel_selector: Optional[ChannelSelectorType] = None, + augmentor: DictConfig = None, + verbose: bool = True, + **kwargs, # any other model specific parameters are passed directly + ) -> List[str]: + """Confidence-ensemble transcribe method. + + Consists of the following steps: + + 1. Run all models (TODO: in parallel) + 2. Compute confidence for each model + 3. Use logistic regression to pick the "most confident" model + 4. Return the output of that model + """ + # TODO: lots of duplicate code with building ensemble script + aggr_func = get_confidence_aggregation_bank()[self.confidence_cfg.aggregation] + confidences = [] + all_transcriptions = [] + # always requiring to return hypothesis + # TODO: make sure to return text only if was False originally + return_hypotheses = True + for model_idx in range(self.num_models): + model = getattr(self, f"model{model_idx}") + transcriptions = model.transcribe( + paths2audio_files=paths2audio_files, + batch_size=batch_size, + return_hypotheses=return_hypotheses, + num_workers=num_workers, + channel_selector=channel_selector, + augmentor=augmentor, + verbose=verbose, + **kwargs, + ) + if isinstance(transcriptions, tuple): # transducers return a tuple + transcriptions = transcriptions[0] + + model_confidences = [] + for transcription in transcriptions: + if isinstance(transcription.frame_confidence[0], list): + # NeMo Transducer API returns list of lists for confidences + conf_values = [conf_value for confs in transcription.frame_confidence for conf_value in confs] + else: + conf_values = transcription.frame_confidence + model_confidences.append(aggr_func(conf_values)) + confidences.append(model_confidences) + all_transcriptions.append(transcriptions) + + # transposing with zip(*list) + features = np.array(list(zip(*confidences))) + model_indices = self.model_selection_block.predict(features) + final_transcriptions = [] + for transcrption_idx in range(len(all_transcriptions[0])): + final_transcriptions.append(all_transcriptions[model_indices[transcrption_idx]][transcrption_idx]) + + return final_transcriptions + + def list_available_models(self): + return [] diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py index b97bf769132c..a74c7f3de5c2 100644 --- a/nemo/collections/asr/models/ctc_bpe_models.py +++ b/nemo/collections/asr/models/ctc_bpe_models.py @@ -305,6 +305,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig): dist_sync_on_step=True, ) + self.decoder.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.decoding): self.cfg.decoding = decoding_cfg diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py index b7816ec5040d..1446e1ce871f 100644 --- a/nemo/collections/asr/models/ctc_models.py +++ b/nemo/collections/asr/models/ctc_models.py @@ -337,6 +337,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig): dist_sync_on_step=True, ) + self.decoder.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.decoding): self.cfg.decoding = decoding_cfg diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py index d10d3364ea29..b88669a1fbc0 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py @@ -415,6 +415,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type self.joint.set_loss(self.loss) self.joint.set_wer(self.wer) + self.joint.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.decoding): self.cfg.decoding = decoding_cfg @@ -442,6 +444,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type dist_sync_on_step=True, ) + self.ctc_decoder.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.aux_ctc.decoding): self.cfg.aux_ctc.decoding = decoding_cfg diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py index 9ba5533dbe64..447caa3f5de6 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py @@ -347,6 +347,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type dist_sync_on_step=True, ) + self.ctc_decoder.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.aux_ctc): self.cfg.aux_ctc.decoding = decoding_cfg diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py index 5ee5824b9d27..6fed8be9d410 100644 --- a/nemo/collections/asr/models/rnnt_bpe_models.py +++ b/nemo/collections/asr/models/rnnt_bpe_models.py @@ -454,6 +454,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig): self.joint.set_loss(self.loss) self.joint.set_wer(self.wer) + self.joint.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.decoding): self.cfg.decoding = decoding_cfg diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index 7c91aed99cda..84e08635834d 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -442,6 +442,8 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig): self.joint.set_loss(self.loss) self.joint.set_wer(self.wer) + self.joint.temperature = decoding_cfg.get('temperature', 1.0) + # Update config with open_dict(self.cfg.decoding): self.cfg.decoding = decoding_cfg diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index a45ee47d0de2..a05ee894f050 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -445,6 +445,9 @@ def __init__(self, feat_in, num_classes, init_mode="xavier_uniform", vocabulary= accepted_adapters = [adapter_utils.LINEAR_ADAPTER_CLASSPATH] self.set_accepted_adapter_types(accepted_adapters) + # to change, requires running ``model.temperature = T`` explicitly + self.temperature = 1.0 + @typecheck() def forward(self, encoder_output): # Adapter module forward step @@ -453,6 +456,10 @@ def forward(self, encoder_output): encoder_output = self.forward_enabled_adapters(encoder_output) encoder_output = encoder_output.transpose(1, 2) # [B, C, T] + if self.temperature != 1.0: + return torch.nn.functional.log_softmax( + self.decoder_layers(encoder_output).transpose(1, 2) / self.temperature, dim=-1 + ) return torch.nn.functional.log_softmax(self.decoder_layers(encoder_output).transpose(1, 2), dim=-1) def input_example(self, max_batch=1, max_dim=256): diff --git a/nemo/collections/asr/modules/rnnt.py b/nemo/collections/asr/modules/rnnt.py index a07b03731aee..04bdd25ac351 100644 --- a/nemo/collections/asr/modules/rnnt.py +++ b/nemo/collections/asr/modules/rnnt.py @@ -1235,6 +1235,9 @@ def __init__( # Flag needed for RNNT export support self._rnnt_export = False + # to change, requires running ``model.temperature = T`` explicitly + self.temperature = 1.0 + @typecheck() def forward( self, @@ -1430,10 +1433,16 @@ def joint(self, f: torch.Tensor, g: torch.Tensor) -> torch.Tensor: # If log_softmax is automatic if self.log_softmax is None: if not res.is_cuda: # Use log softmax only if on CPU - res = res.log_softmax(dim=-1) + if self.temperature != 1.0: + res = (res / self.temperature).log_softmax(dim=-1) + else: + res = res.log_softmax(dim=-1) else: if self.log_softmax: - res = res.log_softmax(dim=-1) + if self.temperature != 1.0: + res = (res / self.temperature).log_softmax(dim=-1) + else: + res = res.log_softmax(dim=-1) return res diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py index 0891ea7312d0..a15428ee52df 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py @@ -156,7 +156,7 @@ def get_confidence_aggregation_bank(): class ConfidenceMeasureMixin(ABC): """Confidence Measure Mixin class. - + It initializes per-frame confidence measure. """ @@ -193,7 +193,7 @@ def _init_confidence_measure(self, confidence_method_cfg: Optional[DictConfig] = class ConfidenceMixin(ABC): """Confidence Mixin class. - + It initializes per-frame confidence measure. """ diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index c7221dfef0f1..01cf1611f7a4 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -220,7 +220,6 @@ def register_artifact( src (str): Path to artifact. verify_src_exists (bool): If set to False, then the artifact is optional and register_artifact will return None even if src is not found. Defaults to True. - save_restore_connector (SaveRestoreConnector): Can be overridden to add custom save and restore logic. Returns: str: If src is not None or empty it always returns absolute path which is guaranteed to exist during model instance life diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py new file mode 100644 index 000000000000..9620b73aac87 --- /dev/null +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -0,0 +1,251 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Run ``python build_ensemble.py --help`` for usage examples. + +import atexit + +# using default logging to be able to silence unnecessary messages from nemo +import logging +import os +import random +import sys +import tempfile +from dataclasses import dataclass, is_dataclass +from pathlib import Path +from typing import List + +import joblib +import numpy as np +import pytorch_lightning as pl +from omegaconf import DictConfig, OmegaConf +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import confusion_matrix +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler + +from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel +from nemo.collections.asr.parts.utils.asr_confidence_utils import ( + ConfidenceConfig, + ConfidenceMethodConfig, + get_confidence_aggregation_bank, +) +from nemo.core.config import hydra_runner + +LOG = logging.getLogger(__file__) + +# adding Python path. If not found, asking user to get the file +try: + sys.path.append(str(Path(__file__).parents[2] / "examples" / "asr")) + import transcribe_speech +except ImportError: + # if users run script normally from nemo repo, this shouldn't be triggered as + # we modify the path above. But if they downloaded the build_ensemble.py as + # an isolated script, we'd ask them to also download corresponding version + # of the transcribe_speech.py + print( + "Current script depends on 'examples/asr/transcribe_speech.py', but can't find it. " + "If it's not present, download it from the NeMo github manually and put inside this folder." + ) + + +@dataclass +class EnsembleConfig: + # .nemo path or pretrained name + model: str + # path to the training data manifest (non-tarred) + training_manifest: str + # specify to limit the number of training samples + # 100 is most likely enough, but setting higher default just in case + max_training_samples: int = 1000 + # specify to provide dev data manifest for HP tuning + # dev_manifest: Optional[str] = None + + +@dataclass +class BuildEnsembleConfig: + # where to save the resulting ensemble model + output_path: str + + # each model specification + ensemble: List[EnsembleConfig] + + random_seed: int = 0 # for reproducibility + + # default confidence, can override + confidence: ConfidenceConfig = ConfidenceConfig( + # we keep frame confidences and apply aggregation manually to get full-utterance confidence + preserve_frame_confidence=True, + exclude_blank=True, + aggregation="mean", + method_cfg=ConfidenceMethodConfig( + name="entropy", + entropy_type="renui", + temperature=0.25, # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703 + entropy_norm="lin", + ), + ) + temperature: float = 1.0 # this is a real temperature that will be applied to logits + + # this is optional, but can be used to change any aspect of the transcription + # config, such as batch size or amp usage. Note that model, data and confidence + # will be overriden by this script + transcription: transcribe_speech.TranscriptionConfig = transcribe_speech.TranscriptionConfig() + + +def calculate_score(features, labels, pipe): + """Score is always calculated as mean of the per-class scores. + + This is done to account for possible class imbalances. + """ + predictions = pipe.predict(features) + conf_m = confusion_matrix(labels, predictions) + score = np.diag(conf_m).sum() / conf_m.sum() + return score, conf_m + + +def train_model_selection( + training_features, + training_labels, + multi_class="multinomial", + C=10000.0, # disabling regularization by default as overfitting is likely not an issue + class_weight="balanced", # in case training data is imbalanced + max_iter=1000, +): + pipe = make_pipeline( + StandardScaler(), + LogisticRegression(multi_class=multi_class, C=C, max_iter=max_iter, class_weight=class_weight), + ) + pipe.fit(training_features, training_labels) + + accuracy, confusion = calculate_score(training_features, training_labels, pipe) + + LOG.info("Training fit accuracy: %.4f", accuracy * 100.0) + LOG.info("Training confusion matrix:\n%s", str(confusion)) + return pipe + + +def subsample_manifest(manifest_file, max_samples): + """Will save a subsampled version of the manifest to the same folder. + + Have to save to the same folder to support relative paths. + """ + with open(manifest_file, "rt", encoding="utf-8") as fin: + lines = fin.readlines() + if max_samples < len(lines): + lines = random.sample(lines, max_samples) + output_file = manifest_file + "-subsampled" + with open(output_file, "wt", encoding="utf-8") as fout: + fout.write("".join(lines)) + return output_file + + +def cleanup_subsampled_manifests(subsampled_manifests): + for manifest in subsampled_manifests: + os.remove(manifest) + + +@hydra_runner(schema=BuildEnsembleConfig) +def main(cfg: BuildEnsembleConfig): + # silencing all messages from nemo/ptl to avoid dumping tons of configs to the stdout + logging.getLogger('pytorch_lightning').setLevel(logging.CRITICAL) + logging.getLogger('nemo_logger').setLevel(logging.CRITICAL) + LOG.info(f'Build ensemble config:\n{OmegaConf.to_yaml(cfg)}') + + if is_dataclass(cfg): + cfg = OmegaConf.structured(cfg) + + # no matter what's in the config, frame confidence is required + cfg.confidence.preserve_frame_confidence = True + + pl.seed_everything(cfg.random_seed) + cfg.transcription.random_seed = None # seed is already applied + cfg.transcription.return_transcriptions = True + cfg.transcription.ctc_decoding.confidence_cfg = cfg.confidence + cfg.transcription.rnnt_decoding.confidence_cfg = cfg.confidence + cfg.transcription.ctc_decoding.temperature = cfg.temperature + cfg.transcription.rnnt_decoding.temperature = cfg.temperature + + aggregations = get_confidence_aggregation_bank() + aggr_func = aggregations[cfg.confidence.aggregation] + + confidences = [] + labels = [] + + # registering clean-up function that will hold on to this list and + # should clean up even if there is partial error in some of the transcribe + # calls + subsampled_manifests = [] + atexit.register(cleanup_subsampled_manifests, subsampled_manifests) + + # note that we loop over the same config. + # This is intentional, as we need to run all models on all datasets + for model_idx, model_cfg in enumerate(cfg.ensemble): + model_confidences = [] + for data_idx, data_cfg in enumerate(cfg.ensemble): + if model_idx == 0: # generating subsampled manifests only one time + subsampled_manifests.append( + subsample_manifest(data_cfg.training_manifest, data_cfg.max_training_samples) + ) + subsampled_manifest = subsampled_manifests[data_idx] + + if model_cfg.model.endswith(".nemo"): + cfg.transcription.model_path = model_cfg.model + else: # assuming pretrained model + cfg.transcription.pretrained_name = model_cfg.model + + cfg.transcription.dataset_manifest = subsampled_manifest + + with tempfile.NamedTemporaryFile() as output_file: + cfg.transcription.output_filename = output_file.name + LOG.info("Transcribing dataset %d with model %d", data_idx, model_idx) + transcriptions = transcribe_speech.main(cfg.transcription.copy()) + + for transcription in transcriptions: + if isinstance(transcription.frame_confidence[0], list): + # NeMo Transducer API returns list of lists for confidences + conf_values = [conf_value for confs in transcription.frame_confidence for conf_value in confs] + else: + conf_values = transcription.frame_confidence + model_confidences.append(aggr_func(conf_values)) + if model_idx == 0: # labels are the same for all models + labels.append(data_idx) + + confidences.append(model_confidences) + + # transposing with zip(*list) + training_features = np.array(list(zip(*confidences))) + training_labels = np.array(labels) + model_selection_block = train_model_selection(training_features, training_labels) + with tempfile.TemporaryDirectory() as tmpdir: + model_selection_block_path = os.path.join(tmpdir, 'model_selection_block.pkl') + joblib.dump(model_selection_block, model_selection_block_path) + + # creating ensemble checkpoint + ensemble_model = ConfidenceEnsembleModel( + cfg=DictConfig( + { + 'model_selection_block': model_selection_block_path, + 'confidence': cfg.confidence, + 'temperature': cfg.temperature, + 'load_models': [model_cfg.model for model_cfg in cfg.ensemble], + } + ), + trainer=None, + ) + ensemble_model.save_to(cfg.output_path) + + +if __name__ == '__main__': + main() diff --git a/scripts/confidence_ensembles/ensemble_config.yaml b/scripts/confidence_ensembles/ensemble_config.yaml new file mode 100644 index 000000000000..954876a0c3cc --- /dev/null +++ b/scripts/confidence_ensembles/ensemble_config.yaml @@ -0,0 +1,23 @@ +# an example of it-es ctc model ensemble +# see test_confidence_ensembles.py for expected data structure +# and additional usage examples +ensemble: + - model: stt_es_conformer_ctc_large + training_manifest: ${oc.env:TEST_DATA_PATH}/es/train_manifest.json + - model: stt_it_conformer_ctc_large + training_manifest: ${oc.env:TEST_DATA_PATH}/it/train_manifest.json + +output_path: confidence-ensemble.nemo + +# this is default +temperature: 1.0 + +# this is default +confidence: + exclude_blank: True + aggregation: mean + method_cfg: + name: entropy + entropy_type: renui + temperature: 0.25 # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703 + entropy_norm: lin diff --git a/scripts/confidence_ensembles/test_confidence_ensembles.py b/scripts/confidence_ensembles/test_confidence_ensembles.py new file mode 100644 index 000000000000..3e225384de92 --- /dev/null +++ b/scripts/confidence_ensembles/test_confidence_ensembles.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# these tests are not included in CI, since they take moderate amount of time +# they are supposed to be run in the nightly pipeline instead + +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +from nemo.collections.asr.parts.utils.transcribe_utils import TextProcessingConfig + +sys.path.append(str(Path(__file__).parents[2] / 'examples' / 'asr')) +import speech_to_text_eval + + +@pytest.mark.parametrize( + 'build_args', + [ + "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_ctc_large", + "ensemble.0.model=stt_es_conformer_transducer_large ensemble.1.model=stt_it_conformer_transducer_large", + "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc ensemble.1.model=stt_it_fastconformer_hybrid_large_pc", + ( + "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc " + "ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " + "transcription.decoder_type=ctc" + ), + "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_transducer_large", + ], + ids=( + [ + "CTC models", + "Transducer models", + "Hybrid models (Transducer mode)", + "Hybrid models (CTC mode)", + "CTC + Transducer", + ] + ), +) +def test_confidence_ensemble(tmp_path, build_args): + """Integration tests for confidence-ensembles. + + Tests building ensemble and running inference with the model. + To use, make sure to define TEST_DATA_PATH env variable with path to + the test data. The following structure is assumed: + + $TEST_DATA_PATH + ├── es + │ ├── dev + │ ├── dev_manifest.json + │ ├── test + │ ├── train + │ └── train_manifest.json + ├── it + │ ├── dev + │ ├── dev_manifest.json + │ ├── test + │ ├── test_manifest.json + │ ├── train + │ └── train_manifest.json + └── test_manifest.json + + """ + # checking for test data and failing right away if not defined + if not os.getenv("TEST_DATA_PATH"): + raise ValueError("TEST_DATA_PATH env variable has to be defined!") + + test_data_path = Path(os.environ['TEST_DATA_PATH']) + + build_ensemble_cmd = f""" + python {Path(__file__).parent / 'build_ensemble.py'} \ + --config-name=ensemble_config.yaml \ + output_path={tmp_path / 'ensemble.nemo'} \ + {build_args} + """ + subprocess.run(build_ensemble_cmd, check=True, shell=True) + + eval_cfg = speech_to_text_eval.EvaluationConfig( + dataset_manifest=str(test_data_path / 'test_manifest.json'), + output_filename=str(tmp_path / 'output.json'), + model_path=str(tmp_path / 'ensemble.nemo'), + text_processing=TextProcessingConfig(punctuation_marks=".,?", do_lowercase=True, rm_punctuation=True), + ) + + results = speech_to_text_eval.main(eval_cfg) + assert results.metric_value < 0.15 # relaxed check for better than 15% WER From 78a377ea99c252dd6c31d1eb1ee80c6a9ac0e627 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Mon, 15 May 2023 14:59:46 -0700 Subject: [PATCH 105/512] Patch memory used for NeMo Megatron models (#6615) * Patch memory used for NeMo Megatron models Signed-off-by: smajumdar * Cleanup the dtype of embeddings Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Refactor util function for parsing precision Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Refactor util function for parsing precision Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Try patch for Megatron O2 Signed-off-by: smajumdar * Refactor to incorporate megatron amp 02 state Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Refactor to incorporate megatron amp 02 state Signed-off-by: smajumdar * Correct indent Signed-off-by: smajumdar * Correct utils import Signed-off-by: smajumdar --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../language_modeling/megatron/bert_model.py | 2 ++ .../language_modeling/megatron/gpt_model.py | 9 ++++- .../language_modeling/megatron_bert_model.py | 1 + .../language_modeling/megatron_gpt_model.py | 3 +- .../megatron_lm_encoder_decoder_model.py | 1 + .../nlp/modules/common/megatron/attention.py | 11 +++++++ .../modules/common/megatron/language_model.py | 23 +++++++++++-- .../common/megatron/megatron_decoders.py | 3 ++ .../common/megatron/megatron_encoders.py | 4 +++ .../megatron/megatron_perceiver_encoders.py | 4 +++ .../megatron/megatron_transformer_decoder.py | 2 ++ .../megatron/megatron_transformer_encoder.py | 2 ++ .../nlp/modules/common/megatron/mlp.py | 8 +++++ .../nlp/modules/common/megatron/module.py | 4 +-- .../retrieval_token_level_encoder_decoder.py | 8 +++++ .../common/megatron/retrieval_transformer.py | 4 +++ .../megatron/token_level_encoder_decoder.py | 9 +++++ .../modules/common/megatron/transformer.py | 33 ++++++++++--------- nemo/collections/nlp/parts/utils_funcs.py | 16 ++++++++- .../core/connectors/save_restore_connector.py | 2 +- 20 files changed, 124 insertions(+), 25 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py index 464d69c72043..132f900298a6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py @@ -172,6 +172,7 @@ def __init__( init_method_std=0.02, fp16_lm_cross_entropy=False, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, precision=16, fp32_residual_connection=False, @@ -219,6 +220,7 @@ def __init__( post_process=self.post_process, init_method_std=init_method_std, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, precision=precision, fp32_residual_connection=fp32_residual_connection, activations_checkpoint_granularity=activations_checkpoint_granularity, diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py index d6af1960eae9..e890e6ae4807 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py @@ -24,6 +24,7 @@ parallel_lm_logits, scaled_init_method_normal, ) +from nemo.collections.nlp.parts import utils_funcs try: from apex.transformer.enums import AttnMaskType @@ -123,6 +124,7 @@ def __init__( use_scaled_init_method=True, fp16_lm_cross_entropy=False, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, ffn_dropout=0.0, @@ -171,6 +173,7 @@ def __init__( self.sequence_parallel = sequence_parallel self.gradient_accumulation_fusion = gradient_accumulation_fusion self.share_embeddings_and_output_weights = share_embeddings_and_output_weights + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) if kv_channels is None: assert ( @@ -204,6 +207,7 @@ def __init__( post_process=self.post_process, init_method_std=init_method_std, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, precision=precision, fp32_residual_connection=fp32_residual_connection, activations_checkpoint_granularity=activations_checkpoint_granularity, @@ -243,7 +247,10 @@ def __init__( if self.share_embeddings_and_output_weights: self.initialize_word_embeddings( - init_method=init_method_normal(init_method_std), vocab_size=vocab_size, hidden_size=hidden_size + init_method=init_method_normal(init_method_std), + vocab_size=vocab_size, + hidden_size=hidden_size, + param_dtype=self.dtype, ) def set_input_tensor(self, input_tensor): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index bda1a595655a..64430a669269 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -156,6 +156,7 @@ def model_provider_func(self, pre_process, post_process): init_method_std=cfg.get('init_method_std', 0.02), fp16_lm_cross_entropy=cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=cfg.get('use_cpu_initialization', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), hidden_dropout=cfg.get('hidden_dropout', 0.1), precision=cfg.get('precision', 16), fp32_residual_connection=cfg.get('fp32_residual_connection', False), diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index b5f8b2b18f69..e9545361b88d 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -148,7 +148,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): converted_model = [] for module in self.model: converted_model.append(Float16Module(module=module, precision=cfg.precision)) - self.model = converted_model + self.model = converted_model else: self.model = Float16Module(module=self.model, precision=cfg.precision) @@ -213,6 +213,7 @@ def model_provider_func(self, pre_process, post_process): use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=self.cfg.get('use_cpu_initialization', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), hidden_dropout=self.cfg.get('hidden_dropout', 0.1), attention_dropout=self.cfg.get('attention_dropout', 0.1), ffn_dropout=self.cfg.get('ffn_dropout', 0.0), diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 80d980858f1c..217b707f5014 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -272,6 +272,7 @@ def model_provider_func(self, pre_process, post_process, add_encoder, add_decode post_process=post_process, fp16_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=self.cfg.get('use_cpu_initialization', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), precision=self.cfg.get('precision', 16), embedding_init_method_std=embedding_init_method_std, embedding_dropout=embedding_dropout, diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index 85ef427535b0..bb28ba630e34 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -27,6 +27,7 @@ from nemo.collections.nlp.modules.common.megatron.module import MegatronModule from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import apply_rotary_pos_emb from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, attention_mask_func +from nemo.collections.nlp.parts import utils_funcs from nemo.core import adapter_mixins try: @@ -88,6 +89,7 @@ def __init__( apply_query_key_layer_scaling=True, kv_channels=None, use_cpu_initialization=False, + megatron_amp_O2=False, masked_softmax_fusion=True, attention_dropout=0.1, layer_type=None, @@ -111,6 +113,7 @@ def __init__( self.multi_query_attention = multi_query_attention self.megatron_legacy = megatron_legacy + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) self.set_accepted_adapter_types([InfusedAdapterConfig._target_, LoraKQVAdapterConfig._target_]) @@ -141,6 +144,7 @@ def __init__( gather_output=False, init_method=init_method, use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, async_tensor_model_parallel_allreduce=async_tensor_model_parallel_allreduce, @@ -153,6 +157,8 @@ def __init__( projection_size, gather_output=False, init_method=init_method, + use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, async_tensor_model_parallel_allreduce=async_tensor_model_parallel_allreduce, @@ -164,6 +170,8 @@ def __init__( 2 * projection_size, gather_output=False, init_method=init_method, + use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, async_tensor_model_parallel_allreduce=async_tensor_model_parallel_allreduce, @@ -194,6 +202,7 @@ def __init__( init_method=output_layer_init_method, skip_bias_add=True, use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, gradient_accumulation_fusion=gradient_accumulation_fusion, @@ -515,6 +524,7 @@ def __init__( apply_query_key_layer_scaling=True, kv_channels=None, use_cpu_initialization=False, + megatron_amp_O2=False, masked_softmax_fusion=True, attention_dropout=0.1, megatron_legacy=False, @@ -537,6 +547,7 @@ def __init__( apply_query_key_layer_scaling=apply_query_key_layer_scaling, kv_channels=kv_channels, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, masked_softmax_fusion=masked_softmax_fusion, attention_dropout=attention_dropout, megatron_legacy=megatron_legacy, diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index ddbed5813d95..36eacc43327a 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -29,6 +29,7 @@ init_method_normal, scaled_init_method_normal, ) +from nemo.collections.nlp.parts import utils_funcs from nemo.core import adapter_mixins try: @@ -74,6 +75,7 @@ def get_language_model( post_process=True, init_method_std=0.02, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, ffn_dropout=0.0, @@ -149,6 +151,7 @@ def get_language_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, @@ -252,6 +255,8 @@ def __init__( init_method, num_tokentypes=0, use_cpu_initialization=False, + megatron_amp_O2=False, + dtype=torch.float32, fp32_residual_connection=False, sequence_parallel=False, position_embedding_type='learned_absolute', @@ -267,13 +272,17 @@ def __init__( # Word embeddings (parallel). self.word_embeddings = tensor_parallel.VocabParallelEmbedding( - vocab_size, self.hidden_size, init_method=self.init_method, use_cpu_initialization=use_cpu_initialization, + vocab_size, + self.hidden_size, + init_method=self.init_method, + use_cpu_initialization=use_cpu_initialization, + params_dtype=dtype, ) self._word_embeddings_key = 'word_embeddings' if self.position_embedding_type == 'learned_absolute': # Position embedding (serial). - self.position_embeddings = torch.nn.Embedding(max_sequence_length, self.hidden_size) + self.position_embeddings = torch.nn.Embedding(max_sequence_length, self.hidden_size, dtype=dtype) self._position_embeddings_key = 'position_embeddings' # Initialize the position embeddings. self.init_method(self.position_embeddings.weight) @@ -291,7 +300,7 @@ def __init__( # token types and add them as needed. self._tokentype_embeddings_key = 'tokentype_embeddings' if self.num_tokentypes > 0: - self.tokentype_embeddings = torch.nn.Embedding(self.num_tokentypes, self.hidden_size) + self.tokentype_embeddings = torch.nn.Embedding(self.num_tokentypes, self.hidden_size, dtype=dtype) # Initialize the token-type embeddings. self.init_method(self.tokentype_embeddings.weight) else: @@ -457,6 +466,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, ffn_dropout=0.0, @@ -516,6 +526,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.share_embeddings_and_output_weights = share_embeddings_and_output_weights self.sequence_parallel = sequence_parallel + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) if kv_channels is None: @@ -533,10 +544,12 @@ def __init__( init_method=self.init_method, num_tokentypes=self.num_tokentypes, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, embedding_dropout_prob=self.hidden_dropout, sequence_parallel=sequence_parallel, position_embedding_type=position_embedding_type, fp32_residual_connection=fp32_residual_connection, + dtype=self.dtype, ) self._embedding_key = 'embedding' @@ -570,6 +583,7 @@ def __init__( attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, persist_layer_norm=persist_layer_norm, openai_gelu=openai_gelu, onnx_safe=onnx_safe, @@ -624,6 +638,7 @@ def __init__( hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, @@ -649,6 +664,8 @@ def __init__( self.output_layer = tensor_parallel.ColumnParallelLinear( self.hidden_size, self.vocab_size, + use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, bias=False, # Setting bias to False always to keep it consistent with embedding tying that also does not have a bias. init_method=self.init_method, ) diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py index 3e82537b6b71..28eb39e630fc 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py @@ -53,6 +53,7 @@ def get_decoder_model( post_process=True, init_method_std=0.02, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, ffn_dropout=0.0, @@ -117,6 +118,7 @@ def get_decoder_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, @@ -158,6 +160,7 @@ def get_decoder_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, precision=precision, diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py index 998b16240347..4005ffbd879e 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py @@ -54,6 +54,7 @@ def get_encoder_model( post_process=True, init_method_std=0.02, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, ffn_dropout=0.0, @@ -119,6 +120,7 @@ def get_encoder_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, @@ -160,6 +162,7 @@ def get_encoder_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, precision=precision, @@ -202,6 +205,7 @@ def get_encoder_model( pre_process=pre_process, post_process=post_process, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py index 73774573596d..150c6466bcde 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py @@ -56,6 +56,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, encoder_attn_mask_type=AttnMaskType.padding, hidden_dropout=0.1, attention_dropout=0.1, @@ -124,6 +125,7 @@ def __init__( self.ffn_dropout = ffn_dropout self.normalize_attention_scores = normalize_attention_scores self.megatron_legacy = megatron_legacy + self.megatron_amp_O2 = megatron_amp_O2 assert self.num_self_attention_per_cross_attention >= 1 assert self.hidden_steps >= 1 @@ -165,6 +167,7 @@ def _build_cross_attn_layer(self): attention_dropout=self.attention_dropout, ffn_dropout=self.ffn_dropout, use_cpu_initialization=self.use_cpu_initialization, + megatron_amp_O2=self.megatron_amp_O2, bias_activation_fusion=self.bias_activation_fusion, bias_dropout_add_fusion=self.bias_dropout_add_fusion, masked_softmax_fusion=self.masked_softmax_fusion, @@ -204,6 +207,7 @@ def _build_self_attn_layer(self): attention_dropout=self.attention_dropout, ffn_dropout=self.ffn_dropout, use_cpu_initialization=self.use_cpu_initialization, + megatron_amp_O2=self.megatron_amp_O2, bias_activation_fusion=self.bias_activation_fusion, bias_dropout_add_fusion=self.bias_dropout_add_fusion, masked_softmax_fusion=self.masked_softmax_fusion, diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py index 77f8e2c3fa25..c3cb1fd05c3b 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py @@ -57,6 +57,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, decoder_attn_mask_type=AttnMaskType.causal, hidden_dropout=0.1, attention_dropout=0.1, @@ -129,6 +130,7 @@ def __init__( attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py index 667d000f7a9f..2eacf8aad672 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py @@ -54,6 +54,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, encoder_attn_mask_type=AttnMaskType.padding, hidden_dropout=0.1, attention_dropout=0.1, @@ -127,6 +128,7 @@ def __init__( attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, diff --git a/nemo/collections/nlp/modules/common/megatron/mlp.py b/nemo/collections/nlp/modules/common/megatron/mlp.py index 1deff2c1fdc4..a5515cae0dac 100644 --- a/nemo/collections/nlp/modules/common/megatron/mlp.py +++ b/nemo/collections/nlp/modules/common/megatron/mlp.py @@ -68,6 +68,7 @@ def __init__( hidden_size, ffn_hidden_size, use_cpu_initialization=False, + dtype=torch.float32, bias_activation_fusion=True, openai_gelu=False, onnx_safe=False, @@ -90,6 +91,7 @@ def __init__( self.persist_layer_norm = persist_layer_norm self.activation = activation self.dropout = dropout + self.dtype = dtype self.set_accepted_adapter_types([MLPInfusedAdapterConfig._target_]) supported_activations = [ @@ -122,6 +124,7 @@ def __init__( init_method=init_method, skip_bias_add=True, use_cpu_initialization=use_cpu_initialization, + params_dtype=dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, async_tensor_model_parallel_allreduce=async_tensor_model_parallel_allreduce, @@ -138,6 +141,7 @@ def __init__( init_method=init_method, skip_bias_add=True, use_cpu_initialization=use_cpu_initialization, + params_dtype=dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, async_tensor_model_parallel_allreduce=async_tensor_model_parallel_allreduce, @@ -194,6 +198,7 @@ def __init__( init_method=output_layer_init_method, skip_bias_add=True, use_cpu_initialization=use_cpu_initialization, + params_dtype=dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, gradient_accumulation_fusion=gradient_accumulation_fusion, @@ -279,6 +284,7 @@ def __init__( hidden_size, ffn_hidden_size, use_cpu_initialization=False, + dtype=torch.float32, bias_activation_fusion=True, openai_gelu=False, onnx_safe=False, @@ -303,6 +309,7 @@ def __init__( init_method=init_method, skip_bias_add=False, use_cpu_initialization=use_cpu_initialization, + params_dtype=dtype, bias=bias, sequence_parallel_enabled=sequence_parallel, gradient_accumulation_fusion=gradient_accumulation_fusion, @@ -314,6 +321,7 @@ def __init__( 'hidden_size': hidden_size, 'ffn_hidden_size': ffn_hidden_size, 'use_cpu_initialization': use_cpu_initialization, + 'dtype': dtype, 'bias_activation_fusion': bias_activation_fusion, 'openai_gelu': openai_gelu, 'onnx_safe': onnx_safe, diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py index 7d36716f928d..49759bfa7a91 100644 --- a/nemo/collections/nlp/modules/common/megatron/module.py +++ b/nemo/collections/nlp/modules/common/megatron/module.py @@ -111,7 +111,7 @@ def decoder_cross_attention_relative_position_embeddings_weight(self): f"No decoder_cross_attention_relative_position_embedding found on this rank. Looking for decoder_cross_attention_relative_position_embedding.relative_position_embedding.weight" ) - def initialize_word_embeddings(self, init_method, vocab_size, hidden_size): + def initialize_word_embeddings(self, init_method, vocab_size, hidden_size, param_dtype=torch.float32): if not self.share_token_embeddings: raise Exception('initialize_word_embeddings() was called but ' 'share_token_embeddings is false') @@ -140,7 +140,7 @@ def initialize_word_embeddings(self, init_method, vocab_size, hidden_size): # set word_embeddings weights to 0 here, then copy first # stage's weights using all_reduce below. self.word_embeddings = tensor_parallel.VocabParallelEmbedding( - vocab_size, hidden_size, init_method=init_method + vocab_size, hidden_size, init_method=init_method, params_dtype=param_dtype ) self.word_embeddings.weight.data.fill_(0) self.word_embeddings.weight.shared = True diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py index 0b164a80e0e4..cbec4c754840 100644 --- a/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py @@ -26,6 +26,7 @@ init_method_normal, scaled_init_method_normal, ) +from nemo.collections.nlp.parts import utils_funcs try: from apex.transformer.enums import ModelType @@ -68,6 +69,7 @@ def __init__( init_method_std=0.02, fp16_cross_entropy=False, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, precision=16, @@ -121,6 +123,8 @@ def __init__( self.num_chunked_cross_attention = len(dec_cross_attention) self.megatron_lm_compatible = megatron_lm_compatible + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) + if kv_channels is None: assert ( hidden_size % num_attention_heads == 0 @@ -138,6 +142,7 @@ def __init__( embedding_dropout_prob=hidden_dropout, position_embedding_type='learned_absolute' if add_position_embedding else '', transpose_batch_sequence=False, + dtype=self.dtype, ) self._embedding_key = "embedding" @@ -172,6 +177,7 @@ def __init__( else post_process, # megatron lm model has no final layer_norm init_method_std=init_method_std, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, precision=precision, @@ -236,6 +242,7 @@ def __init__( post_process=False, # no need for post process init_method_std=init_method_std, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, precision=precision, @@ -280,6 +287,7 @@ def __init__( post_process=post_process, init_method_std=init_method_std, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, precision=precision, diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py index 76d171eb55da..73c41cee6c6f 100644 --- a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py @@ -54,6 +54,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, precision=16, @@ -126,6 +127,7 @@ def __init__( hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, @@ -337,6 +339,7 @@ def __init__( pre_process=True, post_process=True, use_cpu_initialization=False, + megatron_amp_O2=False, hidden_dropout=0.1, attention_dropout=0.1, precision=16, @@ -408,6 +411,7 @@ def __init__( hidden_dropout=hidden_dropout, attention_dropout=attention_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py index dcf41a696b6e..229a9af48048 100644 --- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py @@ -38,6 +38,7 @@ scaled_init_method_normal, ) from nemo.collections.nlp.modules.common.megatron.vocab_parallel_cross_entropy import vocab_parallel_cross_entropy +from nemo.collections.nlp.parts import utils_funcs try: from apex.transformer.enums import AttnMaskType, ModelType @@ -115,6 +116,7 @@ def __init__( post_process=True, fp16_cross_entropy=False, use_cpu_initialization=False, + megatron_amp_O2=False, precision=16, embedding_init_method_std=0.02, embedding_dropout=0.1, @@ -143,6 +145,8 @@ def __init__( encoder_kv_channels, decoder_kv_channels = self._validate_config() + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) + encoder, decoder = None, None if add_encoder: if pre_process: @@ -153,6 +157,7 @@ def __init__( init_method=init_method_normal(embedding_init_method_std), num_tokentypes=num_tokentypes, use_cpu_initialization=use_cpu_initialization, + dtype=self.dtype, embedding_dropout_prob=embedding_dropout, position_embedding_type=encoder_cfg.get('position_embedding_type', 'learned_absolute'), ) @@ -209,6 +214,7 @@ def __init__( post_process=post_process, init_method_std=encoder_cfg.get('init_method_std', 0.02), use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=encoder_cfg.get('hidden_dropout', 0.1), attention_dropout=encoder_cfg.get('attention_dropout', 0.1), ffn_dropout=encoder_cfg.get('ffn_dropout', 0.0), @@ -254,6 +260,7 @@ def __init__( init_method=init_method_normal(embedding_init_method_std), num_tokentypes=num_tokentypes, use_cpu_initialization=use_cpu_initialization, + dtype=self.dtype, embedding_dropout_prob=embedding_dropout, position_embedding_type=decoder_cfg.get('position_embedding_type', 'learned_absolute'), ) @@ -338,6 +345,7 @@ def __init__( post_process=post_process, init_method_std=decoder_cfg.get('init_method_std', 0.02), use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, hidden_dropout=decoder_cfg.get('hidden_dropout', 0.1), attention_dropout=decoder_cfg.get('attention_dropout', 0.1), ffn_dropout=decoder_cfg.get('ffn_dropout', 0.0), @@ -393,6 +401,7 @@ def __init__( gather_output=not self.parallel_output, init_method=init_method_normal(decoder_cfg.init_method_std), use_cpu_initialization=use_cpu_initialization, + params_dtype=self.dtype, ) self._tokens_head_key = 'tokens_head' diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 85d055f70e37..0f6112e08036 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -38,6 +38,7 @@ from nemo.collections.nlp.modules.common.megatron.mlp import ParallelMLP, SwitchMLP from nemo.collections.nlp.modules.common.megatron.module import MegatronModule from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults +from nemo.collections.nlp.parts import utils_funcs from nemo.core import adapter_mixins from nemo.utils import logging @@ -139,6 +140,7 @@ def __init__( hidden_dropout=0.1, persist_layer_norm=False, use_cpu_initialization=False, + megatron_amp_O2=False, bias_activation_fusion=True, bias_dropout_add_fusion=True, masked_softmax_fusion=True, @@ -176,6 +178,8 @@ def __init__( self.bias = bias self.transformer_block_type = transformer_block_type self.position_embedding_type = position_embedding_type + self.param_dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) + self.set_accepted_adapter_types([LinearAdapterConfig._target_, ParallelLinearAdapterConfig._target_]) if not bias and bias_dropout_add_fusion: @@ -223,6 +227,7 @@ def __init__( apply_query_key_layer_scaling=apply_query_key_layer_scaling, kv_channels=kv_channels, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, masked_softmax_fusion=masked_softmax_fusion, attention_dropout=attention_dropout, multi_query_attention=multi_query_attention, @@ -292,6 +297,7 @@ def __init__( kv_channels=kv_channels, multi_query_attention=multi_query_attention, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, masked_softmax_fusion=masked_softmax_fusion, attention_dropout=attention_dropout, megatron_legacy=megatron_legacy, @@ -339,6 +345,7 @@ def __init__( apply_query_key_layer_scaling=apply_query_key_layer_scaling, kv_channels=kv_channels, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, masked_softmax_fusion=masked_softmax_fusion, attention_dropout=attention_dropout, megatron_legacy=megatron_legacy, @@ -381,6 +388,7 @@ def __init__( hidden_size=hidden_size, ffn_hidden_size=ffn_hidden_size, use_cpu_initialization=use_cpu_initialization, + dtype=self.param_dtype, bias_activation_fusion=bias_activation_fusion, openai_gelu=openai_gelu, onnx_safe=onnx_safe, @@ -401,6 +409,7 @@ def __init__( hidden_size=hidden_size, ffn_hidden_size=ffn_hidden_size, use_cpu_initialization=use_cpu_initialization, + dtype=self.param_dtype, bias_activation_fusion=bias_activation_fusion, openai_gelu=openai_gelu, onnx_safe=onnx_safe, @@ -637,6 +646,7 @@ def __init__( bias_dropout_add_fusion=True, persist_layer_norm=False, use_cpu_initialization=False, + megatron_amp_O2=False, bias_activation_fusion=True, openai_gelu=False, onnx_safe=False, @@ -678,6 +688,7 @@ def __init__( bias_dropout_add_fusion=bias_dropout_add_fusion, persist_layer_norm=persist_layer_norm, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, openai_gelu=openai_gelu, onnx_safe=onnx_safe, @@ -702,14 +713,8 @@ def __init__( moe_dropout=moe_dropout, ) - if precision == 'bf16': - self.dtype = torch.bfloat16 - elif int(precision) == 16: - self.dtype = torch.float16 - elif int(precision) == 32: - self.dtype = torch.float32 - else: - raise ValueError + # Dtype for forward pass - ignore amp O2 + self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2=None) def forward( self, @@ -822,14 +827,8 @@ def __init__( ) # use_emha=use_emha, - if autocast_dtype == 32: - self.dtype = torch.float32 - elif autocast_dtype == 16: - self.dtype = torch.float16 - elif autocast_dtype == 'bf16': - self.dtype = torch.bfloat16 - else: - raise ValueError + # Dtype for forward pass - ignore amp O2 + self.dtype = utils_funcs.dtype_from_precision(autocast_dtype, megatron_amp_O2=None) def forward( self, @@ -889,6 +888,7 @@ def __init__( attention_dropout=0.1, ffn_dropout=0.0, use_cpu_initialization=False, + megatron_amp_O2=False, bias_activation_fusion=True, bias_dropout_add_fusion=True, masked_softmax_fusion=True, @@ -1079,6 +1079,7 @@ def build_layer(layer_number): attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, use_cpu_initialization=use_cpu_initialization, + megatron_amp_O2=megatron_amp_O2, bias_activation_fusion=bias_activation_fusion, bias_dropout_add_fusion=bias_dropout_add_fusion, masked_softmax_fusion=masked_softmax_fusion, diff --git a/nemo/collections/nlp/parts/utils_funcs.py b/nemo/collections/nlp/parts/utils_funcs.py index 58872c6b4670..cd76840c8db8 100644 --- a/nemo/collections/nlp/parts/utils_funcs.py +++ b/nemo/collections/nlp/parts/utils_funcs.py @@ -16,7 +16,7 @@ import os import time -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import numpy as np import torch @@ -27,6 +27,20 @@ from nemo.utils import logging +def dtype_from_precision(precision: Union[int, str], megatron_amp_O2: Optional[bool]) -> torch.dtype: + if megatron_amp_O2 is not None and megatron_amp_O2 is False: + return torch.float32 + + if precision == 'bf16': + return torch.bfloat16 + elif int(precision) == 16: + return torch.float16 + elif int(precision) == 32: + return torch.float32 + else: + raise ValueError(f"Could not parse the precision of `{precision}` to a valid torch.dtype") + + def list2str(l: List[int]) -> str: """ Converts list to a string""" return ' '.join([str(x) for x in l]) diff --git a/nemo/core/connectors/save_restore_connector.py b/nemo/core/connectors/save_restore_connector.py index e5eb4930e224..998de3e91059 100644 --- a/nemo/core/connectors/save_restore_connector.py +++ b/nemo/core/connectors/save_restore_connector.py @@ -562,7 +562,7 @@ def _save_state_dict_to_disk(state_dict, filepath): @staticmethod def _load_state_dict_from_disk(model_weights, map_location=None): - return torch.load(model_weights, map_location=map_location) + return torch.load(model_weights, map_location='cpu') @property def model_config_yaml(self) -> str: From 97462e2de1bd9096139aed513d284af69b4ed23d Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Mon, 15 May 2023 18:44:48 -0700 Subject: [PATCH 106/512] handle artifacts when path is dir (#6658) Signed-off-by: arendu --- nemo/core/connectors/save_restore_connector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/core/connectors/save_restore_connector.py b/nemo/core/connectors/save_restore_connector.py index 998de3e91059..473118744594 100644 --- a/nemo/core/connectors/save_restore_connector.py +++ b/nemo/core/connectors/save_restore_connector.py @@ -470,7 +470,10 @@ def _handle_artifacts(self, model, nemo_file_folder): # unpack all restorations paths (nemo checkpoints) # in nemo checkpoints all resources contain hash in name, so there should be no collisions for path in restoration_paths: - self._unpack_nemo_file(path2file=path, out_folder=archive_dir) + if self.model_extracted_dir: + shutil.copytree(src=path, dst=archive_dir, dirs_exist_ok=True) + else: + self._unpack_nemo_file(path2file=path, out_folder=archive_dir) os.chdir(archive_dir) for conf_path, artiitem in tarfile_artifacts: # Get basename and copy it to nemo_file_folder From ceea9143e677b48d534ee58ea8a02333f12899d8 Mon Sep 17 00:00:00 2001 From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Date: Mon, 15 May 2023 21:19:07 -0700 Subject: [PATCH 107/512] remove upgrading setuptools in reinstall.sh (#6659) Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> --- reinstall.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reinstall.sh b/reinstall.sh index 06f9f0f284c0..7a533ff146d9 100755 --- a/reinstall.sh +++ b/reinstall.sh @@ -5,6 +5,8 @@ INSTALL_OPTION=${1:-"dev"} PIP=pip +${PIP} install -U ${PIP} + echo 'Uninstalling stuff' ${PIP} uninstall -y nemo_toolkit ${PIP} uninstall -y sacrebleu @@ -14,8 +16,6 @@ ${PIP} uninstall -y nemo_asr ${PIP} uninstall -y nemo_nlp ${PIP} uninstall -y nemo_tts -${PIP} install -U setuptools - if [ -n "${NVIDIA_PYTORCH_VERSION}" ]; then echo 'Installing NeMo in NVIDIA PyTorch container:' "${NVIDIA_PYTORCH_VERSION}" 'so will not install numba' else From 4a47336b4d57be486cec2204123360a51ae07f21 Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Mon, 15 May 2023 22:25:05 -0700 Subject: [PATCH 108/512] merge lora weights into base model (#6597) * merge lora weights into base model Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * typo fix Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor update Signed-off-by: arendu * update copyright Signed-off-by: arendu * eval needs to know the PEFT class Signed-off-by: arendu * add target class in training script so that we can use it in eval Signed-off-by: arendu * update Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update to work for tp1 Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * set restore model path Signed-off-by: arendu * peft can be none Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated merge script so that eval works easily Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * eval with peft or sft model Signed-off-by: arendu * keep sentences in jsonl format Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * convert sft using correct classpath Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated to force sft yaml to have the correct target Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated docs Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix conversion and eval Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../tuning/megatron_gpt_peft_eval.py | 1 - .../tuning/megatron_gpt_peft_tuning.py | 2 + .../conf/merge_lora_weights.yaml | 16 ++ .../merge_lora_weights/merge.py | 223 ++++++++++++++++++ 4 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 scripts/nlp_language_modeling/merge_lora_weights/conf/merge_lora_weights.yaml create mode 100644 scripts/nlp_language_modeling/merge_lora_weights/merge.py diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py index 338b66a80cfa..b45f5da69e89 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -22,7 +22,6 @@ from pytorch_lightning.plugins.environments import TorchElasticEnvironment from torch.utils.data import DataLoader -from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel from nemo.collections.nlp.models.nlp_model import NLPModel diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py index d0f95b371a13..bf2705aa99e1 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py @@ -97,6 +97,8 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0) gpt_cfg.ffn_dropout = cfg.model.ffn_dropout gpt_cfg.peft = cfg.model.peft + peft_cls = _get_peft_scheme(cfg.model) + gpt_cfg.target = f"{peft_cls.__module__}.{peft_cls.__name__}" # This is needed when modifying a hparam file directly to load `.ckpt` files. # This is not needed to modify the cfg in `.nemo` files. diff --git a/scripts/nlp_language_modeling/merge_lora_weights/conf/merge_lora_weights.yaml b/scripts/nlp_language_modeling/merge_lora_weights/conf/merge_lora_weights.yaml new file mode 100644 index 000000000000..891509c15996 --- /dev/null +++ b/scripts/nlp_language_modeling/merge_lora_weights/conf/merge_lora_weights.yaml @@ -0,0 +1,16 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +tensor_model_parallel_size: -1 +pipeline_model_parallel_size: -1 +pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) +gpt_model_file: null, # GPT nemo file path +checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training +checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null # model configuration file, only used for PTL checkpoint loading +lora_model_path: ??? +merged_model_path: ??? \ No newline at end of file diff --git a/scripts/nlp_language_modeling/merge_lora_weights/merge.py b/scripts/nlp_language_modeling/merge_lora_weights/merge.py new file mode 100644 index 000000000000..9989574cbf5b --- /dev/null +++ b/scripts/nlp_language_modeling/merge_lora_weights/merge.py @@ -0,0 +1,223 @@ +#!/usr/bin/env +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge lora weights into a base GPT LM. Only PP=1 supported so far. +""" + + +import os +import tempfile +from typing import Any, Dict + +import torch +from omegaconf import OmegaConf, open_dict +from pytorch_lightning.trainer.trainer import Trainer +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTLoRAModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class RequestDataSet(Dataset): + def __init__(self, sentences): + super().__init__() + self.sentences = sentences + + def __len__(self,): + return len(self.sentences) + + def __getitem__(self, idx): + return self.sentences[idx] + + +def load_lora(lora_nemo, tp): + lora_state_dict = {} + with tempfile.TemporaryDirectory() as tmpdir: + NLPSaveRestoreConnector._unpack_nemo_file(lora_nemo, tmpdir) + # assert os.path.isdir(lora_extracted_dir), "requires the untar'ed the lora .nemo file" + for i in range(tp): + if tp == 1: + ckpt_file = f"{tmpdir}/model_weights.ckpt" + else: + ckpt_file = f"{tmpdir}/mp_rank_0{i}/model_weights.ckpt" + + l = torch.load(ckpt_file, map_location=torch.device('cpu')) + lora_state_dict[i] = l + return lora_state_dict + + +def merge( + base_model_state_dict: Dict[str, Any], lora_state_dict: Dict[int, Any], tp: int, num_layers: int, curr_rank: int +): + """ + Iterate through all the self_attention.query_key_value projection feedforward weights in all the layers. + Collect the corresponding lora weights for each layer and across tp ranks. + Computes the "full rank" weight from the two low-rank weights and add it to the self_attention.query_key_value weight. + Args: + base_model_state_dict: A state_dict for the base model for the current rank. + lora_state_dict: A complete set of weights for the lora model across all tp ranks. They key for this dict is an int tp rank. + tp: the tensor_model_parallel_size for the base_model (and the lora model) + num_layers: the number of layers in the base_model to iterate over. + curr_rank: current tp rank of the base model which is being merged with Lora. + """ + + for nl in range(num_layers): + key_self_attn_kqv = f'model.language_model.encoder.layers.{nl}.self_attention.query_key_value.weight' + key_lora_in = ( + f'model.language_model.encoder.layers.{nl}.self_attention.adapter_layer.lora_kqv_adapter.linear_in.weight' + ) + key_lora_out = ( + f'model.language_model.encoder.layers.{nl}.self_attention.adapter_layer.lora_kqv_adapter.linear_out.weight' + ) + wt_lora_in = torch.cat([lora_state_dict[_tp][key_lora_in] for _tp in range(tp)], dim=0) + wt_lora_out = lora_state_dict[curr_rank][key_lora_out] + wt_self_attn = base_model_state_dict[key_self_attn_kqv] + wt_lora = wt_lora_out @ wt_lora_in + base_model_state_dict[key_self_attn_kqv] = wt_self_attn + wt_lora.type_as(wt_self_attn) + return base_model_state_dict + + +@hydra_runner(config_path="conf", config_name="merge_lora_weights") +def main(cfg) -> None: + + # trainer required for restoring model parallel models + trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) + + if ( + cfg.tensor_model_parallel_size < 0 + or cfg.pipeline_model_parallel_size < 0 + or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 + ): + model_config = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True, + ) + + with open_dict(cfg): + cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) + cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) + cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + if cfg.gpt_model_file: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + + pretrained_cfg = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + OmegaConf.set_struct(pretrained_cfg, True) + with open_dict(pretrained_cfg): + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + if trainer.precision == "16": + pretrained_cfg.megatron_amp_O2 = False + model = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + map_location=torch.device("cpu"), + save_restore_connector=save_restore_connector, + ) + elif cfg.checkpoint_dir: + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + model = MegatronGPTModel.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + + lora_model_cfg = MegatronGPTLoRAModel.restore_from( + restore_path=cfg.lora_model_path, trainer=trainer, return_config=True, + ) + + # load the lora weights on cpu for all ranks of the lora model + lora_weights = load_lora(cfg.lora_model_path, model.cfg.tensor_model_parallel_size) + + # merge the lora weights with the base model, for this current rank. + merged_weights = merge( + model.state_dict(), + lora_weights, + tp=model.cfg.tensor_model_parallel_size, + num_layers=model.cfg.num_layers, + curr_rank=model.global_rank, + ) + + # load the merged_weights back into the base model, for this current rank. + model.load_state_dict(merged_weights) + + # Going to go through the motions of inference to force PTL to run subprocess for loading all base model's ranks. + input = "Context: In 2004, philosopher and psychologist Michel ter Hark (Groningen, The Netherlands) published a book, called Popper, Otto Selz and the rise of evolutionary epistemology, in which he claimed that Popper took some of his ideas from his tutor, the German psychologist Otto Selz. Selz never published his ideas, partly because of the rise of Nazism, which forced him to quit his work in 1933, and the prohibition of referring to Selz' work. Popper, the historian of ideas and his scholarship, is criticised in some academic quarters for his rejection of Plato, Hegel and Marx. Question: Who claimed Otto Selz deserved credit for ideas published by Popper? Answer:" + ds = RequestDataSet([input]) + request_dl = DataLoader(dataset=ds, batch_size=1) + config = {'greedy': True, 'compute_logprob': False, 'tokens_to_generate': 5, 'add_BOS': False} + model.set_inference_config(config) + response = trainer.predict(model, request_dl) + print(response) + + with open_dict(model.cfg): + model.cfg.restore_from_path = cfg.merged_model_path + model.cfg.data = lora_model_cfg.data + model.cfg.target = f"{MegatronGPTSFTModel.__module__}.{MegatronGPTSFTModel.__name__}" + + model.save_to(cfg.merged_model_path) + logging.info(f"saved merged model to {cfg.merged_model_path}") + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter From 5c2e9c8ca65efc4cf6762f51cce29ab96c7d6ff7 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Tue, 16 May 2023 10:42:12 -0600 Subject: [PATCH 109/512] upgrade to 23.04 (#6660) Signed-off-by: ericharper --- Dockerfile | 2 +- Jenkinsfile | 2 +- README.rst | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index d27ed857a88a..a2b3eacf664c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.04-py3 # build an image that includes only the nemo dependencies, ensures that dependencies # are included first for optimal caching, and useful for building a development diff --git a/Jenkinsfile b/Jenkinsfile index deba8fcbfd70..b9899704e147 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,7 +1,7 @@ pipeline { agent { docker { - image 'pytorch_23.03:apex_57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2' + image 'nvcr.io/nvidia/pytorch:23.04-py3' args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1' } } diff --git a/README.rst b/README.rst index da24655d008f..1335620ead25 100644 --- a/README.rst +++ b/README.rst @@ -301,13 +301,13 @@ To build a nemo container with Dockerfile from a branch, please run DOCKER_BUILDKIT=1 docker build -f Dockerfile -t nemo:latest . -If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.03-py3 and then installing from GitHub. +If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.04-py3 and then installing from GitHub. .. code-block:: bash docker run --gpus all -it --rm -v :/NeMo --shm-size=8g \ -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \ - stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.03-py3 + stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.04-py3 Examples -------- From 503755ae72ae9bb939ef5b1133d44609d989772f Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Tue, 16 May 2023 10:48:01 -0600 Subject: [PATCH 110/512] Merge r1.18.0 bugfixes and doc updates to main (#6655) * update branch Signed-off-by: ericharper * Remove from jenkins (#6641) * add megatron_core to requirements Signed-off-by: ericharper * remove from jenkins Signed-off-by: ericharper --------- Signed-off-by: ericharper * remove dup Signed-off-by: ericharper * update branch Signed-off-by: ericharper * [TTS] reformat NeMo versions in the tts logging messages to avoid batch process them when upgrading NeMo versions. Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --------- Signed-off-by: ericharper Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- Dockerfile | 2 +- Jenkinsfile | 4 ++-- nemo/collections/tts/g2p/modules.py | 6 +++--- nemo/collections/tts/models/aligner.py | 2 +- nemo/collections/tts/models/fastpitch.py | 2 +- nemo/collections/tts/models/mixer_tts.py | 2 +- nemo/collections/tts/models/radtts.py | 2 +- nemo/collections/tts/models/tacotron2.py | 2 +- nemo/collections/tts/models/vits.py | 2 +- nemo/collections/tts/torch/g2ps.py | 6 +++--- nemo/package_info.py | 2 +- tutorials/VoiceSwapSample.ipynb | 2 +- tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 2 +- 13 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index a2b3eacf664c..82d16a561886 100644 --- a/Dockerfile +++ b/Dockerfile @@ -89,7 +89,7 @@ COPY . . # start building the final container FROM nemo-deps as nemo -ARG NEMO_VERSION=1.18.0 +ARG NEMO_VERSION=1.19.0 # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container # version information as runtime environment variable for introspection purposes diff --git a/Jenkinsfile b/Jenkinsfile index b9899704e147..2d42debb53d7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3799,8 +3799,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.18.0' - changeRequest target: 'r1.18.0' + branch 'main' + changeRequest target: 'main' } } failFast true diff --git a/nemo/collections/tts/g2p/modules.py b/nemo/collections/tts/g2p/modules.py index b8124489f3b1..cff81345a52d 100644 --- a/nemo/collections/tts/g2p/modules.py +++ b/nemo/collections/tts/g2p/modules.py @@ -15,7 +15,7 @@ from nemo.collections.tts.g2p.models.en_us_arpabet import EnglishG2p from nemo.collections.tts.g2p.models.i18n_ipa import IpaG2p as IPAG2P -# TODO @xueyang: This file is kept for backward-compatibility purposes since all older NGC models (<= r1.16.0) used this -# import path. We will remove this file soon; `IPAG2P` will be also renamed as `IpaG2p`. Please start using new import -# path and the new `IpaG2p` name from r1.16.0. +# TODO @xueyang: This file is kept for backward-compatibility purposes since all older NGC models that were trained on +# and before NeMo 1.16.0 used this import path. We will remove this file soon; `IPAG2P` will be also renamed as +# `IpaG2p`. Please start using new import path and the new `IpaG2p` name from NeMo 1.16.0. from nemo.collections.tts.g2p.models.zh_cn_pinyin import ChineseG2p diff --git a/nemo/collections/tts/models/aligner.py b/nemo/collections/tts/models/aligner.py index 05c32cf09b36..49301afc1591 100644 --- a/nemo/collections/tts/models/aligner.py +++ b/nemo/collections/tts/models/aligner.py @@ -103,7 +103,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py index 28185c8f8622..d44de8ce0075 100644 --- a/nemo/collections/tts/models/fastpitch.py +++ b/nemo/collections/tts/models/fastpitch.py @@ -222,7 +222,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/mixer_tts.py b/nemo/collections/tts/models/mixer_tts.py index 9623de698f8e..38efd5a147a0 100644 --- a/nemo/collections/tts/models/mixer_tts.py +++ b/nemo/collections/tts/models/mixer_tts.py @@ -153,7 +153,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/radtts.py b/nemo/collections/tts/models/radtts.py index cf2ca3c73590..98bfbb4c2a18 100644 --- a/nemo/collections/tts/models/radtts.py +++ b/nemo/collections/tts/models/radtts.py @@ -337,7 +337,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/tacotron2.py b/nemo/collections/tts/models/tacotron2.py index 27462f97149d..37880a0eae6f 100644 --- a/nemo/collections/tts/models/tacotron2.py +++ b/nemo/collections/tts/models/tacotron2.py @@ -337,7 +337,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/vits.py b/nemo/collections/tts/models/vits.py index 8f1dd96a56a0..78614fa6264b 100644 --- a/nemo/collections/tts/models/vits.py +++ b/nemo/collections/tts/models/vits.py @@ -117,7 +117,7 @@ def _setup_tokenizer(self, cfg): cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( "nemo_text_processing.g2p", "nemo.collections.tts.g2p" ) - logging.warning("This checkpoint support will be dropped after r1.18.0.") + logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/torch/g2ps.py b/nemo/collections/tts/torch/g2ps.py index 90c2798c8baa..084a4c9d7699 100644 --- a/nemo/collections/tts/torch/g2ps.py +++ b/nemo/collections/tts/torch/g2ps.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO @xueyang: This file is kept for backward-compatibility purposes since all older NGC models (<= r1.16.0) used this -# import path. We will remove this file soon; `IPAG2P` will be also renamed as `IpaG2p`. Please start using new import -# path and the new `IpaG2p` name from r1.16.0. +# TODO @xueyang: This file is kept for backward-compatibility purposes since all older NGC models that were trained on +# and before NeMo 1.16.0 used this import path. We will remove this file soon; `IPAG2P` will be also renamed as +# `IpaG2p`. Please start using new import path and the new `IpaG2p` name from NeMo 1.16.0. from nemo.collections.tts.g2p.models.en_us_arpabet import EnglishG2p from nemo.collections.tts.g2p.models.i18n_ipa import IpaG2p as IPAG2P from nemo.collections.tts.g2p.models.zh_cn_pinyin import ChineseG2p diff --git a/nemo/package_info.py b/nemo/package_info.py index 10bb89c73aff..d77e3046359b 100644 --- a/nemo/package_info.py +++ b/nemo/package_info.py @@ -14,7 +14,7 @@ MAJOR = 1 -MINOR = 18 +MINOR = 19 PATCH = 0 PRE_RELEASE = '' diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index 934071faa768..addf19f3b236 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.18.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb index 8b2474597819..6204bf2516bb 100644 --- a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb +++ b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb @@ -21,7 +21,7 @@ "import os\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.18.0'\n", + "BRANCH = 'main'\n", "\n", "GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n", "\n", From 20c9e0f57210c55d6037f14048bd2831d86150d7 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Tue, 16 May 2023 09:50:01 -0700 Subject: [PATCH 111/512] Confidence ensembles: fix issues and add tuning functionality (#6657) * Implement compute confidence to properly handle blanks Signed-off-by: Igor Gitman * Implement proper confidence for transducers Signed-off-by: Igor Gitman * Implement tuning logic Signed-off-by: Igor Gitman * Add tests for confidence tuning Signed-off-by: Igor Gitman * Remove unused imports Signed-off-by: Igor Gitman * Add types/docs Signed-off-by: Igor Gitman * Add comment about the main conf compute loop Signed-off-by: Igor Gitman --------- Signed-off-by: Igor Gitman --- examples/asr/transcribe_speech.py | 5 +- .../asr/models/confidence_ensemble.py | 149 +++++- .../asr/parts/utils/asr_confidence_utils.py | 2 +- .../confidence_ensembles/build_ensemble.py | 475 +++++++++++++++--- .../test_confidence_ensembles.py | 18 +- 5 files changed, 567 insertions(+), 82 deletions(-) diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 1c1d5c08199c..4a93e630876c 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -26,6 +26,7 @@ from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.modules.conformer_encoder import ConformerChangeConfig from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer +from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis from nemo.collections.asr.parts.utils.transcribe_utils import ( compute_output_filename, prepare_audio_data, @@ -33,7 +34,6 @@ transcribe_partial_audio, write_transcription, ) -from nemo.collections.common.tokenizers.aggregate_tokenizer import AggregateTokenizer from nemo.core.config import hydra_runner from nemo.utils import logging @@ -169,8 +169,7 @@ class TranscriptionConfig: @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig) -# just specifying List in the return type as otherwise it's too many things -def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List]: +def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis]]: logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') for key in cfg: diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py index 34fe037e30b5..0a5441a1cd52 100644 --- a/nemo/collections/asr/models/confidence_ensemble.py +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -12,25 +12,139 @@ # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass from typing import Dict, List, Optional, Union import joblib import numpy as np import torch -from omegaconf import DictConfig, OmegaConf, open_dict +from omegaconf import DictConfig, open_dict from pytorch_lightning import Trainer from nemo.collections.asr.models.asr_model import ASRModel from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, get_confidence_aggregation_bank +from nemo.collections.asr.parts.utils.asr_confidence_utils import ( + ConfidenceConfig, + ConfidenceMethodConfig, + get_confidence_aggregation_bank, + get_confidence_measure_bank, +) from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis from nemo.core.classes import ModelPT from nemo.utils import model_utils -__all__ = ['ConfidenceEnsembleModel'] + +# frozen is required to allow hashing of this class and use it +# as a dictionary key when running confidence tuning +@dataclass(frozen=True) +class ConfidenceSpec: + exclude_blank: bool + aggregation: str + confidence_type: str + alpha: float + + def to_confidence_config(self) -> ConfidenceConfig: + """Converts confidence spec to the confidence config. + + Internally, the tuning procedure uses this "spec" objects as they + are more aligned with how things are implemented. But when it's time + to save the models or call transcribe, we need to use the proper + object of type ``ConfidenceConfig``. + """ + if self.confidence_type == 'max_prob': + name = 'max_prob' + entropy_type = 'tsallis' # can be any + entropy_norm = 'lin' # can be any + else: + name, entropy_type, entropy_norm = self.confidence_type.split("_") + return ConfidenceConfig( + exclude_blank=self.exclude_blank, + aggregation=self.aggregation, + method_cfg=ConfidenceMethodConfig( + name=name, entropy_type=entropy_type, temperature=self.alpha, entropy_norm=entropy_norm, + ), + ) + + +def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch.Tensor: + """Returns logprobs from the hypothesis object with optional blanks filter. + + This function supports both CTC and Transducer hypotheses. Will place the + logprobs on GPU if it's available. + + Args: + hypothesis: generated hypothesis as returned from the transcribe + method of the ASR model. + exclude_blank: whether to filter out all ```` tokens. + + Returns: + torch.Tensor: of shape [S, V], where S is (filtered) sequence length and + V is the vocabulary size. + """ + if isinstance(hypothesis.alignments, list): # Transducer + filtered_logprobs = [] + for alignment in hypothesis.alignments: + for align_elem in alignment: + if exclude_blank and align_elem[1].item() != align_elem[0].shape[-1] - 1: + filtered_logprobs.append(align_elem[0]) + filtered_logprobs.append(align_elem[0]) + if not filtered_logprobs: # for the edge-case of all blanks + filtered_logprobs.append(align_elem[0]) + filtered_logprobs = torch.stack(filtered_logprobs) + if torch.cuda.is_available(): # by default logprobs are placed on cpu in nemo + filtered_logprobs = filtered_logprobs.cuda() + else: # CTC + logprobs = hypothesis.y_sequence + if torch.cuda.is_available(): # by default logprobs are placed on cpu in nemo + logprobs = logprobs.cuda() + if exclude_blank: # filtering blanks + labels = logprobs.argmax(dim=-1) + filtered_logprobs = logprobs[labels != logprobs.shape[1] - 1] + else: + filtered_logprobs = logprobs + return filtered_logprobs + + +def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) -> float: + """Computes confidence score of the full utterance from a given hypothesis. + + This is essentially a re-implementation of the built-in confidence + computation in NeMo. The difference is that we aggregate full-utterance + scores, while core functionality only supports word and token level + aggregations. + + Args: + hypothesis: generated hypothesis as returned from the transcribe + method of the ASR model. + confidence_cfg: confidence config specifying what kind of + measure/aggregation should be used. + + Returns: + float: confidence score. + + """ + filtered_logprobs = get_filtered_logprobs(hypothesis, confidence_cfg.exclude_blank) + vocab_size = filtered_logprobs.shape[1] + aggr_func = get_confidence_aggregation_bank()[confidence_cfg.aggregation] + if confidence_cfg.method_cfg.name == "max_prob": + conf_type = "max_prob" + alpha = 1.0 + else: + conf_type = f"entropy_{confidence_cfg.method_cfg.entropy_type}_{confidence_cfg.method_cfg.entropy_norm}" + alpha = confidence_cfg.method_cfg.temperature + conf_func = get_confidence_measure_bank()[conf_type] + + conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item() + return conf_value class ConfidenceEnsembleModel(ModelPT): + """Implementation of the confidence ensemble model. + + See for details. + """ + def __init__( self, cfg: DictConfig, trainer: 'Trainer' = None, ): @@ -79,7 +193,7 @@ def __init__( self.model_selection_block = joblib.load(model_selection_block_path) self.confidence_cfg = ConfidenceConfig(**self.cfg.confidence) - # making sure each model has correct confidence settings in the decoder strategy + # making sure each model has correct temperature setting in the decoder strategy for model_idx in range(self.num_models): model = getattr(self, f"model{model_idx}") # for now we assume users are direclty responsible for matching @@ -94,11 +208,15 @@ def __init__( self.update_decoding_parameters(model.cfg.decoding) model.change_decoding_strategy(model.cfg.decoding) - def update_decoding_parameters(self, decoding_cfg): - """Updating confidence/temperature parameters of the config.""" + def update_decoding_parameters(self, decoding_cfg: DictConfig): + """Updating temperature/preserve_alignment/preserve_frame_confidence parameters of the config.""" with open_dict(decoding_cfg): - decoding_cfg.confidence_cfg = self.confidence_cfg decoding_cfg.temperature = self.cfg.temperature + decoding_cfg.preserve_alignments = True + if 'confidence_cfg' in decoding_cfg: + decoding_cfg.confidence_cfg.preserve_frame_confidence = True + else: + decoding_cfg.confidence_cfg = ConfidenceConfig(preserve_frame_confidence=True) def setup_training_data(self, train_data_config: Union[DictConfig, Dict]): """Pass-through to the ensemble models. @@ -122,13 +240,13 @@ def change_attention_model( self_attention_model, att_context_size, update_config ) - def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type: str = None): + def change_decoding_strategy(self, decoding_cfg: Optional[DictConfig] = None, decoder_type: str = None): """Pass-through to the ensemble models. - The only change here is that we always require frame-confidence to - be returned. + The only change here is that we always require expected temperature + to be set as well as ``decoding_cfg.preserve_alignments = True`` """ - decoding_cfg.confidence_cfg = self.confidence_cfg + self.update_decoding_parameters(decoding_cfg) for model_idx in range(self.num_models): model = getattr(self, f"model{model_idx}") if isinstance(model, EncDecHybridRNNTCTCModel): @@ -157,8 +275,6 @@ def transcribe( 3. Use logistic regression to pick the "most confident" model 4. Return the output of that model """ - # TODO: lots of duplicate code with building ensemble script - aggr_func = get_confidence_aggregation_bank()[self.confidence_cfg.aggregation] confidences = [] all_transcriptions = [] # always requiring to return hypothesis @@ -181,12 +297,7 @@ def transcribe( model_confidences = [] for transcription in transcriptions: - if isinstance(transcription.frame_confidence[0], list): - # NeMo Transducer API returns list of lists for confidences - conf_values = [conf_value for confs in transcription.frame_confidence for conf_value in confs] - else: - conf_values = transcription.frame_confidence - model_confidences.append(aggr_func(conf_values)) + model_confidences.append(compute_confidence(transcription, self.confidence_cfg)) confidences.append(model_confidences) all_transcriptions.append(transcriptions) diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py index a15428ee52df..1387f6940b38 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py @@ -312,7 +312,7 @@ def _aggregate_token_confidence_subwords_sentencepiece( raise RuntimeError( f"""Something went wrong with word-level confidence aggregation.\n Please check these values for debugging:\n - len(words): {len(hypothesis.words)},\n + len(words): {len(words)},\n len(word_confidence): {len(word_confidence)},\n recognized text: `{' '.join(words)}`""" ) diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index 9620b73aac87..07ceccb8b3d5 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -13,6 +13,7 @@ # limitations under the License. # # Run ``python build_ensemble.py --help`` for usage examples. +# TODO: write usage. Mention that neither train nor dev requires transcriptions import atexit @@ -22,9 +23,10 @@ import random import sys import tempfile -from dataclasses import dataclass, is_dataclass +from copy import deepcopy +from dataclasses import dataclass from pathlib import Path -from typing import List +from typing import Dict, List, Optional, Tuple import joblib import numpy as np @@ -32,15 +34,23 @@ from omegaconf import DictConfig, OmegaConf from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix -from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import StandardScaler +from tqdm import tqdm -from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel +from nemo.collections.asr.models.confidence_ensemble import ( + ConfidenceEnsembleModel, + ConfidenceSpec, + compute_confidence, + get_filtered_logprobs, +) from nemo.collections.asr.parts.utils.asr_confidence_utils import ( ConfidenceConfig, ConfidenceMethodConfig, get_confidence_aggregation_bank, + get_confidence_measure_bank, ) +from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis from nemo.core.config import hydra_runner LOG = logging.getLogger(__file__) @@ -70,7 +80,71 @@ class EnsembleConfig: # 100 is most likely enough, but setting higher default just in case max_training_samples: int = 1000 # specify to provide dev data manifest for HP tuning - # dev_manifest: Optional[str] = None + dev_manifest: Optional[str] = None + + +@dataclass +class TuneConfidenceConfig: + # important parameter, so should always be tuned + exclude_blank: Tuple[bool] = (True, False) + # prod is pretty much always worse, so not including by default + aggregation: Tuple[str] = ("mean", "min", "max") + # not including max prob, as there is always an entropy-based metric + # that's better but otherwise including everything + confidence_type: Tuple[str] = ( + "entropy_renui_exp", + "entropy_renui_lin", + "entropy_tsallis_exp", + "entropy_tsallis_lin", + "entropy_gibbs_lin", + "entropy_gibbs_exp", + ) + + # TODO: currently it's not possible to efficiently tune temperature, as we always + # apply log-softmax in the decoder, so to try different values it will be required + # to rerun the decoding, which is very slow. To support this for one-off experiments + # it's possible to modify the code of CTC decoder / Transducer joint to + # remove log-softmax and then apply it directly in this script with the temperature + # + # Alternatively, one can run this script multiple times with different values of + # temperature and pick the best performing ensemble. Note that this will increase + # tuning time by the number of temperature values tried. On the other hand, + # the above approach is a lot more efficient and will only slightly increase + # the total tuning runtime. + + # very important to tune for max prob, but for entropy metrics 1.0 is almost always best + # temperature: Tuple[float] = (1.0,) + + # not that important, but can sometimes make a small difference + alpha: Tuple[float] = (0.25, 0.33, 0.5, 1.0) + + def get_grid_size(self) -> int: + """Returns the total number of points in the search space.""" + if "max_prob" in self.confidence_type: + return ( + len(self.exclude_blank) + * len(self.aggregation) + * ((len(self.confidence_type) - 1) * len(self.alpha) + 1) + ) + return len(self.exclude_blank) * len(self.aggregation) * len(self.confidence_type) * len(self.alpha) + + +@dataclass +class TuneLogisticRegressionConfig: + # will have log-uniform grid over this range with that many points + # note that a value of 10000.0 (not regularization) is always added + C_num_points: int = 10 + C_min: float = 0.0001 + C_max: float = 10.0 + + # not too important + multi_class: Tuple[str] = ("ovr", "multinomial") + + # should try to include weights directly if the data is too imbalanced + class_weight: Tuple = (None, "balanced") + + # increase if getting many warnings that algorithm didn't converge + max_iter: int = 1000 @dataclass @@ -103,11 +177,60 @@ class BuildEnsembleConfig: # will be overriden by this script transcription: transcribe_speech.TranscriptionConfig = transcribe_speech.TranscriptionConfig() - -def calculate_score(features, labels, pipe): + # set to True to tune the confidence. + # requires dev manifests to be specified for each model + tune_confidence: bool = False + # used to specify what to tune over. By default runs tuning over some + # reasonalbe grid, so that it does not take forever. + # Can be changed as needed + tune_confidence_config: TuneConfidenceConfig = TuneConfidenceConfig() + + # very fast to tune and can be important in case of imbalanced datasets + # will automatically set to False if dev data is not available + tune_logistic_regression: bool = True + tune_logistic_regression_config: TuneLogisticRegressionConfig = TuneLogisticRegressionConfig() + + def __post_init__(self): + """Checking that if any dev data is provided, all are provided. + + Will also auto-set tune_logistic_regression to False if no dev data + is available. + + If tune_confidence is set to True (user choice) and no dev data is + provided, will raise an error. + """ + num_dev_data = 0 + for ensemble_cfg in self.ensemble: + num_dev_data += ensemble_cfg.dev_manifest is not None + if num_dev_data == 0: + if self.tune_confidence: + raise ValueError("tune_confidence is set to True, but no dev data is provided") + LOG.info("Setting tune_logistic_regression = False since no dev data is provided") + self.tune_logistic_regression = False + return + + if num_dev_data < len(self.ensemble): + raise ValueError( + "Some ensemble configs specify dev data, but some don't. Either all have to specify it or none!" + ) + + +def calculate_score(features: np.ndarray, labels: np.ndarray, pipe: Pipeline) -> Tuple[float, np.ndarray]: """Score is always calculated as mean of the per-class scores. This is done to account for possible class imbalances. + + Args: + features: numpy array of features of shape [N x D], where N is the + number of objects (typically a total number of utterances in + all datasets) and D is the total number of confidence scores + used to train the model (typically = number of models). + labels: numpy array of shape [N] contatining ground-truth model indices. + pipe: classification pipeline (currently, standardization + logistic + regression). + + Returns: + tuple: score value in [0, 1] and full classification confusion matrix. """ predictions = pipe.predict(features) conf_m = confusion_matrix(labels, predictions) @@ -116,30 +239,105 @@ def calculate_score(features, labels, pipe): def train_model_selection( - training_features, - training_labels, - multi_class="multinomial", - C=10000.0, # disabling regularization by default as overfitting is likely not an issue - class_weight="balanced", # in case training data is imbalanced - max_iter=1000, -): - pipe = make_pipeline( - StandardScaler(), - LogisticRegression(multi_class=multi_class, C=C, max_iter=max_iter, class_weight=class_weight), - ) - pipe.fit(training_features, training_labels) - - accuracy, confusion = calculate_score(training_features, training_labels, pipe) - - LOG.info("Training fit accuracy: %.4f", accuracy * 100.0) - LOG.info("Training confusion matrix:\n%s", str(confusion)) - return pipe - + training_features: np.ndarray, + training_labels: np.ndarray, + dev_features: Optional[np.ndarray] = None, + dev_labels: Optional[np.ndarray] = None, + tune_lr: bool = False, + tune_lr_cfg: Optional[TuneLogisticRegressionConfig] = None, + verbose: bool = False, +) -> Tuple[Pipeline, float]: + """Trains model selection block with an (optional) tuning of the parameters. + + Returns a pipeline consisting of feature standardization and logistic + regression. If tune_lr is set to True, dev features/labels will be used + to tune the hyperparameters of the logistic regression with the grid + search that's defined via ``tune_lr_cfg``. + + If no tuning is requested, uses the following parameters:: + + best_pipe = make_pipeline( + StandardScaler(), + LogisticRegression( + multi_class="multinomial", + C=10000.0, + max_iter=1000, + class_weight="balanced", + ), + ) -def subsample_manifest(manifest_file, max_samples): + Args: + training_features: numpy array of features of shape [N x D], where N is + the number of objects (typically a total number of utterances in + all training datasets) and D is the total number of confidence + scores used to train the model (typically = number of models). + training_labels: numpy array of shape [N] contatining ground-truth + model indices. + dev_features: same as training, but for the validation subset. + dev_labels: same as training, but for the validation subset. + tune_lr: controls whether tuning of LR hyperparameters is performed. + If set to True, it's required to also provide dev features/labels. + tune_lr_cfg: specifies what values of LR hyperparameters to try. + verbose: if True, will output final training/dev scores. + + Returns: + tuple: trained model selection pipeline, best score (or -1 if no tuning + was done). + """ + if not tune_lr: + # default parameters: C=10000.0 disables regularization + best_pipe = make_pipeline( + StandardScaler(), + LogisticRegression(multi_class="multinomial", C=10000.0, max_iter=1000, class_weight="balanced"), + ) + max_score = -1 + else: + C_pms = np.append( + np.exp(np.linspace(np.log(tune_lr_cfg.C_min), np.log(tune_lr_cfg.C_max), tune_lr_cfg.C_num_points)), + 10000.0, + ) + max_score = 0 + best_pipe = None + for class_weight in tune_lr_cfg.class_weight: + for multi_class in tune_lr_cfg.multi_class: + for C in C_pms: + pipe = make_pipeline( + StandardScaler(), + LogisticRegression( + multi_class=multi_class, C=C, max_iter=tune_lr_cfg.max_iter, class_weight=class_weight + ), + ) + pipe.fit(training_features, training_labels) + score, confusion = calculate_score(dev_features, dev_labels, pipe) + if score > max_score: + max_score = score + best_pipe = pipe + + best_pipe.fit(training_features, training_labels) + if verbose: + accuracy, confusion = calculate_score(training_features, training_labels, best_pipe) + LOG.info("Training fit accuracy: %.4f", accuracy * 100.0) + LOG.info("Training confusion matrix:\n%s", str(confusion)) + if dev_features is not None and verbose: + accuracy, confusion = calculate_score(dev_features, dev_labels, best_pipe) + LOG.info("Dev fit accuracy: %.4f", accuracy * 100.0) + LOG.info("Dev confusion matrix:\n%s", str(confusion)) + + return best_pipe, max_score + + +def subsample_manifest(manifest_file: str, max_samples: int) -> str: """Will save a subsampled version of the manifest to the same folder. Have to save to the same folder to support relative paths. + + Args: + manifest_file: path to the manifest file that needs subsampling. + max_samples: how many samples to retain. Will randomly select that + many lines from the manifest. + + Returns: + str: the path to the subsampled manifest file. """ with open(manifest_file, "rt", encoding="utf-8") as fin: lines = fin.readlines() @@ -151,11 +349,115 @@ def subsample_manifest(manifest_file, max_samples): return output_file -def cleanup_subsampled_manifests(subsampled_manifests): +def cleanup_subsampled_manifests(subsampled_manifests: List[str]): + """Removes all generated subsamples manifests.""" for manifest in subsampled_manifests: os.remove(manifest) +def compute_all_confidences( + hypothesis: Hypothesis, tune_confidence_cfg: TuneConfidenceConfig +) -> Dict[ConfidenceSpec, float]: + """Computes a set of confidence scores from a given hypothesis. + + Works with the output of both CTC and Transducer decoding. + + Args: + hypothesis: generated hypothesis as returned from the transcribe + method of the ASR model. + tune_confidence_cfg: config specifying what confidence scores to + compute. + + Returns: + dict: dictionary with confidenct spec -> confidence score mapping. + """ + conf_values = {} + + for exclude_blank in tune_confidence_cfg.exclude_blank: + filtered_logprobs = get_filtered_logprobs(hypothesis, exclude_blank) + vocab_size = filtered_logprobs.shape[1] + for aggregation in tune_confidence_cfg.aggregation: + aggr_func = get_confidence_aggregation_bank()[aggregation] + for conf_type in tune_confidence_cfg.confidence_type: + conf_func = get_confidence_measure_bank()[conf_type] + if conf_type == "max_prob": # skipping alpha in this case + conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=1.0)).cpu().item() + conf_values[ConfidenceSpec(exclude_blank, aggregation, conf_type, 1.0)] = conf_value + else: + for alpha in tune_confidence_cfg.alpha: + conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item() + conf_values[ConfidenceSpec(exclude_blank, aggregation, conf_type, alpha)] = conf_value + + return conf_values + + +def find_best_confidence( + train_confidences: List[List[Dict[ConfidenceSpec, float]]], + train_labels: List[int], + dev_confidences: List[List[Dict[ConfidenceSpec, float]]], + dev_labels: List[int], + tune_lr: bool, + tune_lr_config: TuneConfidenceConfig, +) -> Tuple[ConfidenceConfig, Pipeline]: + """Finds the best confidence configuration for model selection. + + Will loop over all values in the confidence dictionary and fit the LR + model (optionally tuning its HPs). The best performing confidence (on the + dev set) will be used for the final LR model. + + Args: + train_confidences: this is an object of type + ``List[List[Dict[ConfidenceSpec, float]]]``. The shape of this + object is [M, N, S], where + M: number of models + N: number of utterances in all training sets + S: number of confidence scores to try + + This argument will be used to construct np.array objects for each + of the confidence scores with the shape [M, N] + + train_labels: ground-truth labels of the correct model for each data + points. This is a list of size [N] + dev_confidences: same as training, but for the validation subset. + dev_labels: same as training, but for the validation subset. + tune_lr: controls whether tuning of LR hyperparameters is performed. + tune_lr_cfg: specifies what values of LR hyperparameters to try. + + Returns: + tuple: best confidence config, best model selection pipeline + """ + max_score = 0 + best_pipe = None + best_conf_spec = None + LOG.info("Evaluation all confidences. Total grid size: %d", len(train_confidences[0][0].keys())) + for conf_spec in tqdm(train_confidences[0][0].keys()): + cur_train_confidences = [] + for model_confs in train_confidences: + cur_train_confidences.append([]) + for model_conf in model_confs: + cur_train_confidences[-1].append(model_conf[conf_spec]) + cur_dev_confidences = [] + for model_confs in dev_confidences: + cur_dev_confidences.append([]) + for model_conf in model_confs: + cur_dev_confidences[-1].append(model_conf[conf_spec]) + # transposing with zip(*list) + training_features = np.array(list(zip(*cur_train_confidences))) + training_labels = np.array(train_labels) + dev_features = np.array(list(zip(*cur_dev_confidences))) + dev_labels = np.array(dev_labels) + pipe, score = train_model_selection( + training_features, training_labels, dev_features, dev_labels, tune_lr, tune_lr_config, + ) + if max_score < score: + max_score = score + best_pipe = pipe + best_conf_spec = conf_spec + LOG.info("Found better parameters: %s. New score: %.4f", str(conf_spec), max_score) + + return best_conf_spec.to_confidence_config(), best_pipe + + @hydra_runner(schema=BuildEnsembleConfig) def main(cfg: BuildEnsembleConfig): # silencing all messages from nemo/ptl to avoid dumping tons of configs to the stdout @@ -163,25 +465,23 @@ def main(cfg: BuildEnsembleConfig): logging.getLogger('nemo_logger').setLevel(logging.CRITICAL) LOG.info(f'Build ensemble config:\n{OmegaConf.to_yaml(cfg)}') - if is_dataclass(cfg): - cfg = OmegaConf.structured(cfg) - - # no matter what's in the config, frame confidence is required - cfg.confidence.preserve_frame_confidence = True + # to ensure post init is called + cfg = BuildEnsembleConfig(**cfg) pl.seed_everything(cfg.random_seed) cfg.transcription.random_seed = None # seed is already applied cfg.transcription.return_transcriptions = True - cfg.transcription.ctc_decoding.confidence_cfg = cfg.confidence - cfg.transcription.rnnt_decoding.confidence_cfg = cfg.confidence + # that sets preserve_alignment to True + cfg.transcription.compute_timestamps = True cfg.transcription.ctc_decoding.temperature = cfg.temperature cfg.transcription.rnnt_decoding.temperature = cfg.temperature + # this ensures that generated output is after log-softmax for consistency with CTC + cfg.transcription.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True - aggregations = get_confidence_aggregation_bank() - aggr_func = aggregations[cfg.confidence.aggregation] - - confidences = [] - labels = [] + train_confidences = [] + dev_confidences = [] + train_labels = [] + dev_labels = [] # registering clean-up function that will hold on to this list and # should clean up even if there is partial error in some of the transcribe @@ -191,8 +491,19 @@ def main(cfg: BuildEnsembleConfig): # note that we loop over the same config. # This is intentional, as we need to run all models on all datasets + # this loop will do the following things: + # 1. Goes through each model X each training dataset + # 2. Computes predictions by directly calling transcribe_speech.main + # 3. Converts transcription to the confidence score(s) as specified in the config + # 4. If dev sets are provided, computes the same for them + # 5. Creates a list of ground-truth model indices by mapping each model + # to its own training dataset as specified in the config. + # 6. After the loop, we either run tuning over all confidence scores or + # directly use a single score to fit logistic regression and save the + # final ensemble model. for model_idx, model_cfg in enumerate(cfg.ensemble): - model_confidences = [] + train_model_confidences = [] + dev_model_confidences = [] for data_idx, data_cfg in enumerate(cfg.ensemble): if model_idx == 0: # generating subsampled manifests only one time subsampled_manifests.append( @@ -207,27 +518,75 @@ def main(cfg: BuildEnsembleConfig): cfg.transcription.dataset_manifest = subsampled_manifest + # training with tempfile.NamedTemporaryFile() as output_file: cfg.transcription.output_filename = output_file.name - LOG.info("Transcribing dataset %d with model %d", data_idx, model_idx) - transcriptions = transcribe_speech.main(cfg.transcription.copy()) - - for transcription in transcriptions: - if isinstance(transcription.frame_confidence[0], list): - # NeMo Transducer API returns list of lists for confidences - conf_values = [conf_value for confs in transcription.frame_confidence for conf_value in confs] + LOG.info("Transcribing training dataset %d with model %d", data_idx, model_idx) + transcriptions = transcribe_speech.main(deepcopy(cfg.transcription)) + LOG.info("Generating confidence scores") + # TODO: parallelize this loop? + for transcription in tqdm(transcriptions): + if cfg.tune_confidence: + train_model_confidences.append( + compute_all_confidences(transcription, cfg.tune_confidence_config) + ) else: - conf_values = transcription.frame_confidence - model_confidences.append(aggr_func(conf_values)) + train_model_confidences.append(compute_confidence(transcription, cfg.confidence)) if model_idx == 0: # labels are the same for all models - labels.append(data_idx) - - confidences.append(model_confidences) + train_labels.append(data_idx) + + # optional dev + if data_cfg.dev_manifest is not None: + cfg.transcription.dataset_manifest = data_cfg.dev_manifest + with tempfile.NamedTemporaryFile() as output_file: + cfg.transcription.output_filename = output_file.name + LOG.info("Transcribing dev dataset %d with model %d", data_idx, model_idx) + transcriptions = transcribe_speech.main(deepcopy(cfg.transcription)) + LOG.info("Generating confidence scores") + for transcription in tqdm(transcriptions): + if cfg.tune_confidence: + dev_model_confidences.append( + compute_all_confidences(transcription, cfg.tune_confidence_config) + ) + else: + dev_model_confidences.append(compute_confidence(transcription, cfg.confidence)) + if model_idx == 0: # labels are the same for all models + dev_labels.append(data_idx) + + train_confidences.append(train_model_confidences) + if dev_model_confidences: + dev_confidences.append(dev_model_confidences) + + if cfg.tune_confidence: + best_confidence, model_selection_block = find_best_confidence( + train_confidences, + train_labels, + dev_confidences, + dev_labels, + cfg.tune_logistic_regression, + cfg.tune_logistic_regression_config, + ) + else: + best_confidence = cfg.confidence + # transposing with zip(*list) + training_features = np.array(list(zip(*train_confidences))) + training_labels = np.array(train_labels) + if dev_confidences: + dev_features = np.array(list(zip(*dev_confidences))) + dev_labels = np.array(dev_labels) + else: + dev_features = None + dev_labels = None + model_selection_block, _ = train_model_selection( + training_features, + training_labels, + dev_features, + dev_labels, + cfg.tune_logistic_regression, + cfg.tune_logistic_regression_config, + verbose=True, + ) - # transposing with zip(*list) - training_features = np.array(list(zip(*confidences))) - training_labels = np.array(labels) - model_selection_block = train_model_selection(training_features, training_labels) with tempfile.TemporaryDirectory() as tmpdir: model_selection_block_path = os.path.join(tmpdir, 'model_selection_block.pkl') joblib.dump(model_selection_block, model_selection_block_path) @@ -237,7 +596,7 @@ def main(cfg: BuildEnsembleConfig): cfg=DictConfig( { 'model_selection_block': model_selection_block_path, - 'confidence': cfg.confidence, + 'confidence': best_confidence, 'temperature': cfg.temperature, 'load_models': [model_cfg.model for model_cfg in cfg.ensemble], } diff --git a/scripts/confidence_ensembles/test_confidence_ensembles.py b/scripts/confidence_ensembles/test_confidence_ensembles.py index 3e225384de92..b665375c0c33 100644 --- a/scripts/confidence_ensembles/test_confidence_ensembles.py +++ b/scripts/confidence_ensembles/test_confidence_ensembles.py @@ -37,9 +37,23 @@ ( "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc " "ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " - "transcription.decoder_type=ctc" + "transcription.decoder_type=ctc " ), "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_transducer_large", + ( + "ensemble.0.model=stt_es_conformer_ctc_large " + "ensemble.1.model=stt_it_conformer_ctc_large " + f"ensemble.0.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'es' / 'dev_manifest.json'} " + f"ensemble.1.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'it' / 'dev_manifest.json'} " + "tune_confidence=True " + ), + ( + "ensemble.0.model=stt_es_conformer_transducer_large " + "ensemble.1.model=stt_it_conformer_transducer_large " + f"ensemble.0.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'es' / 'dev_manifest.json'} " + f"ensemble.1.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'it' / 'dev_manifest.json'} " + "tune_confidence=True " + ), ], ids=( [ @@ -48,6 +62,8 @@ "Hybrid models (Transducer mode)", "Hybrid models (CTC mode)", "CTC + Transducer", + "CTC models + confidence tuning", + "Transducer models + confidence tuning", ] ), ) From 9a6f4cd3b121ff1c89249c44d355bf144e3a8f20 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Tue, 16 May 2023 09:50:13 -0700 Subject: [PATCH 112/512] [TTS] Implement new TextToSpeech dataset (#6575) * [TTS] Implement new TextToSpeech dataset Signed-off-by: Ryan * [TTS] Add unit tests Signed-off-by: Ryan * [TTS] Fix defaulting of use_log_energy Signed-off-by: Ryan * [TTS] Fix TTS export test Signed-off-by: Ryan --------- Signed-off-by: Ryan --- .../tts/conf/fastpitch/fastpitch_22050.yaml | 220 +++++++++++++ .../{features => feature}/feature_22050.yaml | 18 +- .../{features => feature}/feature_44100.yaml | 18 +- .../tts/data/text_to_speech_dataset.py | 297 ++++++++++++++++++ nemo/collections/tts/models/fastpitch.py | 77 +++-- nemo/collections/tts/modules/fastpitch.py | 6 +- .../tts/parts/preprocessing/features.py | 86 ++++- .../tts/parts/utils/tts_dataset_utils.py | 81 ++++- .../tts/parts/utils/test_tts_dataset_utils.py | 59 +++- 9 files changed, 802 insertions(+), 60 deletions(-) create mode 100644 examples/tts/conf/fastpitch/fastpitch_22050.yaml rename examples/tts/conf/{features => feature}/feature_22050.yaml (61%) rename examples/tts/conf/{features => feature}/feature_44100.yaml (61%) create mode 100644 nemo/collections/tts/data/text_to_speech_dataset.py diff --git a/examples/tts/conf/fastpitch/fastpitch_22050.yaml b/examples/tts/conf/fastpitch/fastpitch_22050.yaml new file mode 100644 index 000000000000..016e157ce39f --- /dev/null +++ b/examples/tts/conf/fastpitch/fastpitch_22050.yaml @@ -0,0 +1,220 @@ +# This config contains the default values for training a FastPitch model with aligner. +# If you want to train a model on other dataset, you can change config values according to your dataset. +# Most dataset-specific arguments are in the head of the config file, see below. + +name: FastPitch + +max_epochs: ??? +batch_size: 32 +weighted_sample_steps: null + +n_speakers: ??? +speaker_path: null +feature_stats_path: null + +train_ds_meta: ??? +val_ds_meta: ??? + +phoneme_dict_path: ??? +heteronyms_path: ??? + +defaults: + - feature: feature_22050 + +model: + learn_alignment: true + bin_loss_warmup_epochs: 100 + + n_speakers: ${n_speakers} + n_mel_channels: ${feature.mel_feature.mel_dim} + max_token_duration: 75 + symbols_embedding_dim: 384 + pitch_embedding_kernel_size: 3 + energy_embedding_kernel_size: 3 + speaker_emb_condition_prosody: true + speaker_emb_condition_aligner: true + use_log_energy: false + dur_loss_scale: 0.1 + pitch_loss_scale: 0.1 + energy_loss_scale: 0.1 + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + features: ${feature.mel_feature.mel_dim} + lowfreq: ${feature.mel_feature.lowfreq} + highfreq: ${feature.mel_feature.highfreq} + n_fft: ${feature.win_length} + n_window_size: ${feature.win_length} + window_size: false + n_window_stride: ${feature.hop_length} + window_stride: false + pad_to: 1 + pad_value: 0 + sample_rate: ${feature.sample_rate} + window: hann + normalize: null + preemph: null + dither: 0.0 + frame_splicing: 1 + log: true + log_zero_guard_type: add + log_zero_guard_value: 1.0 + mag_power: 1.0 + mel_norm: null + + text_tokenizer: + _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.IPATokenizer + punct: true + apostrophe: true + pad_with_space: true + g2p: + _target_: nemo.collections.tts.g2p.models.i18n_ipa.IpaG2p + phoneme_dict: ${phoneme_dict_path} + heteronyms: ${heteronyms_path} + phoneme_probability: 0.8 + # Relies on the heteronyms list for anything that needs to be disambiguated + ignore_ambiguous_words: false + use_chars: true + use_stresses: true + + pitch_processor: + _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization + field: pitch + stats_path: ${feature_stats_path} + + energy_processor: + _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization + field: energy + stats_path: ${feature_stats_path} + + align_prior_config: + _target_: nemo.collections.tts.data.text_to_speech_dataset.AlignPriorConfig + hop_length: ${feature.hop_length} + use_beta_binomial_interpolator: false + + train_ds: + dataset: + _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset + dataset_meta: ${train_ds_meta} + weighted_sample_steps: ${weighted_sample_steps} + sample_rate: ${feature.sample_rate} + speaker_path: ${speaker_path} + featurizers: ${feature.featurizers} + feature_processors: + pitch: ${model.pitch_processor} + energy: ${model.energy_processor} + align_prior_config: ${model.align_prior_config} + min_duration: 0.1 + max_duration: 10.0 + + dataloader_params: + batch_size: ${batch_size} + drop_last: true + num_workers: 8 + + validation_ds: + dataset: + _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset + dataset_meta: ${val_ds_meta} + sample_rate: ${feature.sample_rate} + speaker_path: ${speaker_path} + featurizers: ${feature.featurizers} + feature_processors: + pitch: ${model.pitch_processor} + energy: ${model.energy_processor} + align_prior_config: ${model.align_prior_config} + + dataloader_params: + batch_size: ${batch_size} + drop_last: false + num_workers: 2 + + input_fft: + _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder + n_layer: 6 + n_head: 2 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + d_embed: ${model.symbols_embedding_dim} + + output_fft: + _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder + n_layer: 6 + n_head: 1 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + + alignment_module: + _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder + n_text_channels: ${model.symbols_embedding_dim} + + duration_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + pitch_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + energy_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + optim: + name: adamw + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 1e-6 + + sched: + name: NoamAnnealing + warmup_steps: 1000 + last_epoch: -1 + d_model: 1 # Disable scaling based on model dim + +trainer: + num_nodes: 1 + devices: 1 + accelerator: gpu + strategy: ddp + precision: 16 + max_epochs: ${max_epochs} + accumulate_grad_batches: 1 + gradient_clip_val: 10.0 + enable_checkpointing: false # Provided by exp_manager + logger: false # Provided by exp_manager + log_every_n_steps: 100 + check_val_every_n_epoch: 10 + benchmark: false + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + monitor: val_loss + resume_if_exists: false + resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/features/feature_22050.yaml b/examples/tts/conf/feature/feature_22050.yaml similarity index 61% rename from examples/tts/conf/features/feature_22050.yaml rename to examples/tts/conf/feature/feature_22050.yaml index c5779500bc3c..1b159bc66ddf 100644 --- a/examples/tts/conf/features/feature_22050.yaml +++ b/examples/tts/conf/feature/feature_22050.yaml @@ -4,25 +4,25 @@ hop_length: 256 mel_feature: _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer - sample_rate: ${sample_rate} - win_length: ${win_length} - hop_length: ${hop_length} + sample_rate: ${..sample_rate} + win_length: ${..win_length} + hop_length: ${..hop_length} mel_dim: 80 lowfreq: 0 highfreq: 8000 pitch_feature: _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer - sample_rate: ${sample_rate} - win_length: ${win_length} - hop_length: ${hop_length} + sample_rate: ${..sample_rate} + win_length: ${..win_length} + hop_length: ${..hop_length} pitch_fmin: 60 pitch_fmax: 640 energy_feature: _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer - spec_featurizer: ${mel_feature} + spec_featurizer: ${..mel_feature} featurizers: - pitch: ${pitch_feature} - energy: ${energy_feature} + pitch: ${..pitch_feature} + energy: ${..energy_feature} diff --git a/examples/tts/conf/features/feature_44100.yaml b/examples/tts/conf/feature/feature_44100.yaml similarity index 61% rename from examples/tts/conf/features/feature_44100.yaml rename to examples/tts/conf/feature/feature_44100.yaml index 0cfc27f4dab3..e852a93a2d6c 100644 --- a/examples/tts/conf/features/feature_44100.yaml +++ b/examples/tts/conf/feature/feature_44100.yaml @@ -4,25 +4,25 @@ hop_length: 512 mel_feature: _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer - sample_rate: ${sample_rate} - win_length: ${win_length} - hop_length: ${hop_length} + sample_rate: ${..sample_rate} + win_length: ${..win_length} + hop_length: ${..hop_length} mel_dim: 80 lowfreq: 0 highfreq: null pitch_feature: _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer - sample_rate: ${sample_rate} - win_length: ${win_length} - hop_length: ${hop_length} + sample_rate: ${..sample_rate} + win_length: ${..win_length} + hop_length: ${..hop_length} pitch_fmin: 60 pitch_fmax: 640 energy_feature: _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer - spec_featurizer: ${mel_feature} + spec_featurizer: ${..mel_feature} featurizers: - pitch: ${pitch_feature} - energy: ${energy_feature} + pitch: ${..pitch_feature} + energy: ${..energy_feature} diff --git a/nemo/collections/tts/data/text_to_speech_dataset.py b/nemo/collections/tts/data/text_to_speech_dataset.py new file mode 100644 index 000000000000..f6230fa3493a --- /dev/null +++ b/nemo/collections/tts/data/text_to_speech_dataset.py @@ -0,0 +1,297 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional + +import librosa +import torch.utils.data + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest +from nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers import BaseTokenizer +from nemo.collections.tts.parts.preprocessing.feature_processors import FeatureProcessor +from nemo.collections.tts.parts.preprocessing.features import Featurizer +from nemo.collections.tts.parts.utils.tts_dataset_utils import ( + BetaBinomialInterpolator, + beta_binomial_prior_distribution, + filter_dataset_by_duration, + get_abs_rel_paths, + get_weighted_sampler, + stack_tensors, +) +from nemo.core.classes import Dataset +from nemo.utils import logging +from nemo.utils.decorators import experimental + + +@dataclass +class DatasetMeta: + manifest_path: Path + audio_dir: Path + feature_dir: Path + sample_weight: float = 1.0 + + +@dataclass +class DatasetSample: + manifest_entry: Dict[str, Any] + audio_dir: Path + feature_dir: Path + text: str + speaker: str + speaker_index: int = None + + +@dataclass +class AlignPriorConfig: + hop_length: int + use_beta_binomial_interpolator: bool = False + + +@experimental +class TextToSpeechDataset(Dataset): + """ + Class for processing and loading text to speech training examples. + + Args: + dataset_meta: Dict of dataset names (string) to dataset metadata. + sample_rate: Sample rate to load audio as. If the audio is stored at a different sample rate, then it will + be resampled. + text_tokenizer: Tokenizer to apply to the text field. + weighted_sample_steps: Optional int, If provided, then data will be sampled (with replacement) based on + the sample weights provided in the dataset metadata. If None, then sample weights will be ignored. + speaker_path: Optional, path to JSON file with speaker indices, for multi-speaker training. Can be created with + scripts.dataset_processing.tts.create_speaker_map.py + featurizers: Optional, list of featurizers to load feature data from. Should be the same config provided + when running scripts.dataset_processing.tts.compute_features.py before training. + feature_processors: Optional, list of feature processors to run on training examples. + align_prior_config: Optional, if provided alignment prior will be calculated and included in + batch output. + min_duration: Optional float, if provided audio files in the training manifest shorter than 'min_duration' + will be ignored. + max_duration: Optional float, if provided audio files in the training manifest longer than 'max_duration' + will be ignored. + """ + + def __init__( + self, + dataset_meta: Dict[str, DatasetMeta], + sample_rate: int, + text_tokenizer: BaseTokenizer, + weighted_sample_steps: Optional[int] = None, + speaker_path: Optional[Path] = None, + featurizers: Optional[Dict[str, Featurizer]] = None, + feature_processors: Optional[Dict[str, FeatureProcessor]] = None, + align_prior_config: Optional[AlignPriorConfig] = None, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + ): + super().__init__() + + self.sample_rate = sample_rate + self.text_tokenizer = text_tokenizer + self.weighted_sample_steps = weighted_sample_steps + + if speaker_path: + self.include_speaker = True + with open(speaker_path, 'r', encoding="utf-8") as speaker_f: + speaker_index_map = json.load(speaker_f) + else: + self.include_speaker = False + speaker_index_map = None + + if featurizers: + logging.info(f"Found featurizers {featurizers.keys()}") + self.featurizers = featurizers.values() + else: + self.featurizers = [] + + if feature_processors: + logging.info(f"Found featurize processors {feature_processors.keys()}") + self.feature_processors = feature_processors.values() + else: + self.feature_processors = [] + + self.align_prior_config = align_prior_config + if self.align_prior_config.use_beta_binomial_interpolator: + self.beta_binomial_interpolator = BetaBinomialInterpolator() + else: + self.beta_binomial_interpolator = None + + self.data_samples = [] + self.sample_weights = [] + for dataset_name, dataset in dataset_meta.items(): + samples, weights = self._process_dataset( + dataset_name=dataset_name, + dataset=dataset, + min_duration=min_duration, + max_duration=max_duration, + speaker_index_map=speaker_index_map, + ) + self.data_samples += samples + self.sample_weights += weights + + def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]: + if not self.weighted_sample_steps: + return None + + sampler = get_weighted_sampler( + sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sample_steps + ) + return sampler + + def _process_dataset( + self, + dataset_name: str, + dataset: DatasetMeta, + min_duration: float, + max_duration: float, + speaker_index_map: Dict[str, int], + ): + entries = read_manifest(dataset.manifest_path) + filtered_entries, total_hours, filtered_hours = filter_dataset_by_duration( + entries=entries, min_duration=min_duration, max_duration=max_duration + ) + + logging.info(dataset_name) + logging.info(f"Original # of files: {len(entries)}") + logging.info(f"Filtered # of files: {len(filtered_entries)}") + logging.info(f"Original duration: {total_hours} hours") + logging.info(f"Filtered duration: {filtered_hours} hours") + + samples = [] + sample_weights = [] + for entry in filtered_entries: + + if "normalized_text" in entry: + text = entry["normalized_text"] + else: + text = entry["text"] + + if self.include_speaker: + speaker = entry["speaker"] + speaker_index = speaker_index_map[speaker] + else: + speaker = None + speaker_index = 0 + + sample = DatasetSample( + manifest_entry=entry, + audio_dir=dataset.audio_dir, + feature_dir=dataset.feature_dir, + text=text, + speaker=speaker, + speaker_index=speaker_index, + ) + samples.append(sample) + sample_weights.append(dataset.sample_weight) + + return samples, sample_weights + + def __len__(self): + return len(self.data_samples) + + def __getitem__(self, index): + data = self.data_samples[index] + + audio_filepath = Path(data.manifest_entry["audio_filepath"]) + audio_path, _ = get_abs_rel_paths(input_path=audio_filepath, base_path=data.audio_dir) + + audio, _ = librosa.load(audio_path, sr=self.sample_rate) + tokens = self.text_tokenizer(data.text) + + example = {"audio": audio, "tokens": tokens} + + if data.speaker is not None: + example["speaker"] = data.speaker + example["speaker_index"] = data.speaker_index + + if self.align_prior_config: + text_len = len(tokens) + spec_len = 1 + librosa.core.samples_to_frames( + audio.shape[0], hop_length=self.align_prior_config.hop_length + ) + if self.beta_binomial_interpolator: + align_prior = self.beta_binomial_interpolator(w=spec_len, h=text_len) + else: + align_prior = beta_binomial_prior_distribution(phoneme_count=text_len, mel_count=spec_len) + align_prior = torch.tensor(align_prior, dtype=torch.float32) + example["align_prior"] = align_prior + + for featurizer in self.featurizers: + feature_dict = featurizer.load( + manifest_entry=data.manifest_entry, audio_dir=data.audio_dir, feature_dir=data.feature_dir + ) + example.update(feature_dict) + + for processor in self.feature_processors: + processor.process(example) + + return example + + def collate_fn(self, batch: List[dict]): + + audio_list = [] + audio_len_list = [] + token_list = [] + token_len_list = [] + speaker_list = [] + prior_list = [] + + for example in batch: + audio_tensor = torch.tensor(example["audio"], dtype=torch.float32) + audio_list.append(audio_tensor) + audio_len_list.append(audio_tensor.shape[0]) + + token_tensor = torch.tensor(example["tokens"], dtype=torch.int32) + token_list.append(token_tensor) + token_len_list.append(token_tensor.shape[0]) + + if self.include_speaker: + speaker_list.append(example["speaker_index"]) + + if self.align_prior_config: + prior_list.append(example["align_prior"]) + + batch_audio_len = torch.IntTensor(audio_len_list) + audio_max_len = int(batch_audio_len.max().item()) + + batch_token_len = torch.IntTensor(token_len_list) + token_max_len = int(batch_token_len.max().item()) + + batch_audio = stack_tensors(audio_list, max_lens=[audio_max_len]) + batch_tokens = stack_tensors(token_list, max_lens=[token_max_len], pad_value=self.text_tokenizer.pad) + + batch_dict = { + "audio": batch_audio, + "audio_lens": batch_audio_len, + "text": batch_tokens, + "text_lens": batch_token_len, + } + + if self.include_speaker: + batch_dict["speaker_id"] = torch.IntTensor(speaker_list) + + if self.align_prior_config: + spec_max_len = max([prior.shape[0] for prior in prior_list]) + text_max_len = max([prior.shape[1] for prior in prior_list]) + batch_dict["align_prior_matrix"] = stack_tensors(prior_list, max_lens=[text_max_len, spec_max_len],) + + for featurizer in self.featurizers: + feature_dict = featurizer.collate_fn(batch) + batch_dict.update(feature_dict) + + return batch_dict diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py index d44de8ce0075..281a7c2891b3 100644 --- a/nemo/collections/tts/models/fastpitch.py +++ b/nemo/collections/tts/models/fastpitch.py @@ -95,15 +95,20 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): input_fft_kwargs = {} if self.learn_alignment: self.vocab = None - self.ds_class_name = cfg.train_ds.dataset._target_.split(".")[-1] - if self.ds_class_name == "TTSDataset": - self._setup_tokenizer(cfg) - assert self.vocab is not None - input_fft_kwargs["n_embed"] = len(self.vocab.tokens) - input_fft_kwargs["padding_idx"] = self.vocab.pad - else: - raise ValueError(f"Unknown dataset class: {self.ds_class_name}.") + self.ds_class = cfg.train_ds.dataset._target_ + self.ds_class_name = self.ds_class.split(".")[-1] + if not self.ds_class in [ + "nemo.collections.tts.data.dataset.TTSDataset", + "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset", + "nemo.collections.tts.torch.data.TTSDataset", + ]: + raise ValueError(f"Unknown dataset class: {self.ds_class}.") + + self._setup_tokenizer(cfg) + assert self.vocab is not None + input_fft_kwargs["n_embed"] = len(self.vocab.tokens) + input_fft_kwargs["padding_idx"] = self.vocab.pad self._parser = None self._tb_logger = None @@ -149,6 +154,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): speaker_emb_condition_prosody = cfg.get("speaker_emb_condition_prosody", False) speaker_emb_condition_decoder = cfg.get("speaker_emb_condition_decoder", False) speaker_emb_condition_aligner = cfg.get("speaker_emb_condition_aligner", False) + use_log_energy = cfg.get("use_log_energy", True) if n_speakers > 1 and "add" not in input_fft.cond_input.condition_types: input_fft.cond_input.condition_types.append("add") if speaker_emb_condition_prosody: @@ -173,6 +179,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): energy_embedding_kernel_size, cfg.n_mel_channels, cfg.max_token_duration, + use_log_energy, ) self._input_types = self._output_types = None self.export_config = { @@ -261,12 +268,7 @@ def parser(self): return self._parser if self.learn_alignment: - ds_class_name = self._cfg.train_ds.dataset._target_.split(".")[-1] - - if ds_class_name == "TTSDataset": - self._parser = self.vocab.encode - else: - raise ValueError(f"Unknown dataset class: {ds_class_name}") + self._parser = self.vocab.encode else: self._parser = parsers.make_parser( labels=self._cfg.labels, @@ -382,8 +384,10 @@ def training_step(self, batch, batch_idx): None, ) if self.learn_alignment: - assert self.ds_class_name == "TTSDataset", f"Unknown dataset class: {self.ds_class_name}" - batch_dict = process_batch(batch, self._train_dl.dataset.sup_data_types_set) + if self.ds_class == "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset": + batch_dict = batch + else: + batch_dict = process_batch(batch, self._train_dl.dataset.sup_data_types_set) audio = batch_dict.get("audio") audio_lens = batch_dict.get("audio_lens") text = batch_dict.get("text") @@ -493,8 +497,10 @@ def validation_step(self, batch, batch_idx): None, ) if self.learn_alignment: - assert self.ds_class_name == "TTSDataset", f"Unknown dataset class: {self.ds_class_name}" - batch_dict = process_batch(batch, self._train_dl.dataset.sup_data_types_set) + if self.ds_class == "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset": + batch_dict = batch + else: + batch_dict = process_batch(batch, self._train_dl.dataset.sup_data_types_set) audio = batch_dict.get("audio") audio_lens = batch_dict.get("audio_lens") text = batch_dict.get("text") @@ -578,6 +584,29 @@ def validation_epoch_end(self, outputs): ) self.log_train_images = True + def _setup_train_dataloader(self, cfg): + phon_mode = contextlib.nullcontext() + if hasattr(self.vocab, "set_phone_prob"): + phon_mode = self.vocab.set_phone_prob(self.vocab.phoneme_probability) + + with phon_mode: + dataset = instantiate(cfg.dataset, text_tokenizer=self.vocab,) + + sampler = dataset.get_sampler(cfg.dataloader_params.batch_size) + return torch.utils.data.DataLoader( + dataset, collate_fn=dataset.collate_fn, sampler=sampler, **cfg.dataloader_params + ) + + def _setup_test_dataloader(self, cfg): + phon_mode = contextlib.nullcontext() + if hasattr(self.vocab, "set_phone_prob"): + phon_mode = self.vocab.set_phone_prob(0.0) + + with phon_mode: + dataset = instantiate(cfg.dataset, text_tokenizer=self.vocab,) + + return torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params) + def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, name: str = "train"): if "dataset" not in cfg or not isinstance(cfg.dataset, DictConfig): raise ValueError(f"No dataset for {name}") @@ -596,7 +625,7 @@ def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, na elif cfg.dataloader_params.shuffle: logging.error(f"The {name} dataloader for {self} has shuffle set to True!!!") - if cfg.dataset._target_ == "nemo.collections.tts.data.dataset.TTSDataset": + if self.ds_class == "nemo.collections.tts.data.dataset.TTSDataset": phon_mode = contextlib.nullcontext() if hasattr(self.vocab, "set_phone_prob"): phon_mode = self.vocab.set_phone_prob(prob=None if name == "val" else self.vocab.phoneme_probability) @@ -614,10 +643,16 @@ def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, na return torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params) def setup_training_data(self, cfg): - self._train_dl = self.__setup_dataloader_from_config(cfg) + if self.ds_class == "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset": + self._train_dl = self._setup_train_dataloader(cfg) + else: + self._train_dl = self.__setup_dataloader_from_config(cfg) def setup_validation_data(self, cfg): - self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="val") + if self.ds_class == "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset": + self._validation_dl = self._setup_test_dataloader(cfg) + else: + self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="val") def setup_test_data(self, cfg): """Omitted.""" diff --git a/nemo/collections/tts/modules/fastpitch.py b/nemo/collections/tts/modules/fastpitch.py index 77dff7bc85ed..b26aafa72e32 100644 --- a/nemo/collections/tts/modules/fastpitch.py +++ b/nemo/collections/tts/modules/fastpitch.py @@ -164,6 +164,7 @@ def __init__( energy_embedding_kernel_size: int, n_mel_channels: int = 80, max_token_duration: int = 75, + use_log_energy: bool = True, ): super().__init__() @@ -177,6 +178,8 @@ def __init__( self.learn_alignment = aligner is not None self.use_duration_predictor = True self.binarize = False + self.use_log_energy = use_log_energy + # TODO: combine self.speaker_emb with self.speaker_encoder # cfg: remove `n_speakers`, create `speaker_encoder.lookup_module` # state_dict: move `speaker_emb.weight` to `speaker_encoder.lookup_module.table.weight` @@ -327,7 +330,8 @@ def forward( energy_tgt = average_features(energy.unsqueeze(1), attn_hard_dur) else: energy_tgt = average_features(energy.unsqueeze(1), durs_predicted) - energy_tgt = torch.log(1.0 + energy_tgt) + if self.use_log_energy: + energy_tgt = torch.log(1.0 + energy_tgt) energy_emb = self.energy_emb(energy_tgt) energy_tgt = energy_tgt.squeeze(1) else: diff --git a/nemo/collections/tts/parts/preprocessing/features.py b/nemo/collections/tts/parts/preprocessing/features.py index 7d7150a7050f..2972279339b5 100644 --- a/nemo/collections/tts/parts/preprocessing/features.py +++ b/nemo/collections/tts/parts/preprocessing/features.py @@ -15,7 +15,7 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Dict, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import librosa import numpy as np @@ -23,14 +23,17 @@ from torch import Tensor from nemo.collections.asr.modules import AudioToMelSpectrogramPreprocessor -from nemo.collections.tts.parts.utils.tts_dataset_utils import get_audio_filepaths +from nemo.collections.tts.parts.utils.tts_dataset_utils import get_audio_filepaths, stack_tensors from nemo.utils.decorators import experimental @experimental class Featurizer(ABC): + def __init__(self, feature_names: List[str]) -> None: + self.feature_names = feature_names + @abstractmethod - def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + def save(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> None: """ Save feature value to disk for given manifest entry. @@ -41,7 +44,7 @@ def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None """ @abstractmethod - def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + def load(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: """ Read saved feature value for given manifest entry. @@ -54,8 +57,17 @@ def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict Dictionary of feature names to Tensors """ + @abstractmethod + def collate_fn(self, train_batch: List[Dict[str, Tensor]]) -> Dict[str, Tensor]: + """ + Combine list/batch of features into a feature dictionary. + """ + raise NotImplementedError + -def _get_feature_filepath(manifest_entry: dict, audio_dir: Path, feature_dir: Path, feature_name: str) -> Path: +def _get_feature_filepath( + manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path, feature_name: str +) -> Path: """ Get the absolute path for the feature file corresponding to the input manifest entry @@ -68,7 +80,11 @@ def _get_feature_filepath(manifest_entry: dict, audio_dir: Path, feature_dir: Pa def _save_pt_feature( - feature_name: Optional[str], feature_tensor: Tensor, manifest_entry: Dict, audio_dir: Path, feature_dir: Path, + feature_name: Optional[str], + feature_tensor: Tensor, + manifest_entry: Dict[str, Any], + audio_dir: Path, + feature_dir: Path, ) -> None: """ If feature_name is provided, save feature as .pt file. @@ -84,12 +100,15 @@ def _save_pt_feature( def _load_pt_feature( - feature_dict: Dict, feature_name: Optional[str], manifest_entry: Dict, audio_dir: Path, feature_dir: Path, + feature_dict: Dict[str, Tensor], + feature_name: Optional[str], + manifest_entry: Dict[str, Any], + audio_dir: Path, + feature_dir: Path, ) -> None: """ If feature_name is provided, load feature into feature_dict from .pt file. """ - if feature_name is None: return @@ -100,6 +119,22 @@ def _load_pt_feature( feature_dict[feature_name] = feature_tensor +def _collate_feature( + feature_dict: Dict[str, Tensor], feature_name: Optional[str], train_batch: List[Dict[str, Tensor]] +) -> None: + if feature_name is None: + return + + feature_tensors = [] + for example in train_batch: + feature_tensor = example[feature_name] + feature_tensors.append(feature_tensor) + + max_len = max([f.shape[0] for f in feature_tensors]) + stacked_features = stack_tensors(feature_tensors, max_lens=[max_len]) + feature_dict[feature_name] = stacked_features + + class MelSpectrogramFeaturizer: def __init__( self, @@ -141,7 +176,7 @@ def __init__( dither=0.0, ) - def compute_mel_spec(self, manifest_entry: dict, audio_dir: Path) -> Tensor: + def compute_mel_spec(self, manifest_entry: Dict[str, Any], audio_dir: Path) -> Tensor: """ Computes mel spectrogram for the input manifest entry. @@ -168,7 +203,7 @@ def compute_mel_spec(self, manifest_entry: dict, audio_dir: Path) -> Tensor: return spec_tensor - def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + def save(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> None: spec_tensor = self.compute_mel_spec(manifest_entry=manifest_entry, audio_dir=audio_dir) _save_pt_feature( feature_name=self.feature_name, @@ -178,7 +213,7 @@ def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None feature_dir=feature_dir, ) - def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + def load(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: feature_dict = {} _load_pt_feature( feature_dict=feature_dict, @@ -189,13 +224,18 @@ def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict ) return feature_dict + def collate_fn(self, train_batch: List[Dict[str, Tensor]]) -> Dict[str, Tensor]: + feature_dict = {} + _collate_feature(feature_dict=feature_dict, feature_name=self.feature_name, train_batch=train_batch) + return feature_dict + class EnergyFeaturizer: def __init__(self, spec_featurizer: MelSpectrogramFeaturizer, feature_name: str = "energy") -> None: self.feature_name = feature_name self.spec_featurizer = spec_featurizer - def compute_energy(self, manifest_entry: dict, audio_dir: Path) -> Tensor: + def compute_energy(self, manifest_entry: Dict[str, Any], audio_dir: Path) -> Tensor: """ Computes energy for the input manifest entry. @@ -213,7 +253,7 @@ def compute_energy(self, manifest_entry: dict, audio_dir: Path) -> Tensor: return energy - def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + def save(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> None: energy_tensor = self.compute_energy(manifest_entry=manifest_entry, audio_dir=audio_dir) _save_pt_feature( feature_name=self.feature_name, @@ -223,7 +263,7 @@ def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None feature_dir=feature_dir, ) - def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + def load(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: feature_dict = {} _load_pt_feature( feature_dict=feature_dict, @@ -234,6 +274,11 @@ def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict ) return feature_dict + def collate_fn(self, train_batch: List[Dict[str, Tensor]]) -> Dict[str, Tensor]: + feature_dict = {} + _collate_feature(feature_dict=feature_dict, feature_name=self.feature_name, train_batch=train_batch) + return feature_dict + class PitchFeaturizer: def __init__( @@ -256,7 +301,7 @@ def __init__( self.pitch_fmin = pitch_fmin self.pitch_fmax = pitch_fmax - def compute_pitch(self, manifest_entry: dict, audio_dir: Path) -> Tuple[Tensor, Tensor, Tensor]: + def compute_pitch(self, manifest_entry: Dict[str, Any], audio_dir: Path) -> Tuple[Tensor, Tensor, Tensor]: """ Computes pitch and optional voiced mask for the input manifest entry. @@ -287,7 +332,7 @@ def compute_pitch(self, manifest_entry: dict, audio_dir: Path) -> Tuple[Tensor, return pitch_tensor, voiced_mask_tensor, voiced_prob_tensor - def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None: + def save(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> None: pitch_tensor, voiced_mask_tensor, voiced_prob_tensor = self.compute_pitch( manifest_entry=manifest_entry, audio_dir=audio_dir ) @@ -313,7 +358,7 @@ def save(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> None feature_dir=feature_dir, ) - def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: + def load(self, manifest_entry: Dict[str, Any], audio_dir: Path, feature_dir: Path) -> Dict[str, Tensor]: feature_dict = {} _load_pt_feature( feature_dict=feature_dict, @@ -337,3 +382,10 @@ def load(self, manifest_entry: dict, audio_dir: Path, feature_dir: Path) -> Dict feature_dir=feature_dir, ) return feature_dict + + def collate_fn(self, train_batch: List[Dict[str, Tensor]]) -> Dict[str, Tensor]: + feature_dict = {} + _collate_feature(feature_dict=feature_dict, feature_name=self.pitch_name, train_batch=train_batch) + _collate_feature(feature_dict=feature_dict, feature_name=self.voiced_mask_name, train_batch=train_batch) + _collate_feature(feature_dict=feature_dict, feature_name=self.voiced_prob_name, train_batch=train_batch) + return feature_dict diff --git a/nemo/collections/tts/parts/utils/tts_dataset_utils.py b/nemo/collections/tts/parts/utils/tts_dataset_utils.py index 06befcb6ec02..47c7b8cd78da 100644 --- a/nemo/collections/tts/parts/utils/tts_dataset_utils.py +++ b/nemo/collections/tts/parts/utils/tts_dataset_utils.py @@ -15,7 +15,7 @@ import functools import os from pathlib import Path -from typing import Tuple +from typing import Any, Dict, List, Tuple import numpy as np import torch @@ -45,7 +45,7 @@ def get_abs_rel_paths(input_path: Path, base_path: Path) -> Tuple[Path, Path]: return abs_path, rel_path -def get_audio_filepaths(manifest_entry: dict, audio_dir: Path) -> Tuple[Path, Path]: +def get_audio_filepaths(manifest_entry: Dict[str, Any], audio_dir: Path) -> Tuple[Path, Path]: """ Get the absolute and relative paths of audio from a manifest entry. @@ -107,6 +107,31 @@ def general_padding(item, item_len, max_len, pad_value=0): return item +def stack_tensors(tensors: List[torch.Tensor], max_lens: List[int], pad_value: float = 0.0) -> torch.Tensor: + """ + Create batch by stacking input tensor list along the time axes. + + Args: + tensors: List of tensors to pad and stack + max_lens: List of lengths to pad each axis to, starting with the last axis + pad_value: Value for padding + + Returns: + Padded and stacked tensor. + """ + padded_tensors = [] + for tensor in tensors: + padding = [] + for i, max_len in enumerate(max_lens, 1): + padding += [0, max_len - tensor.shape[-i]] + + padded_tensor = torch.nn.functional.pad(tensor, pad=padding, value=pad_value) + padded_tensors.append(padded_tensor) + + stacked_tensor = torch.stack(padded_tensors) + return stacked_tensor + + def logbeta(x, y): return gammaln(x) + gammaln(y) - gammaln(x + y) @@ -153,3 +178,55 @@ def common_path(path1, path2): base_dir = common_path(base_dir, audio_dir) return base_dir + + +def filter_dataset_by_duration(entries: List[Dict[str, Any]], min_duration: float, max_duration: float): + """ + Filter out manifest entries based on duration. + + Args: + entries: List of manifest entry dictionaries. + min_duration: Minimum duration below which entries are removed. + max_duration: Maximum duration above which entries are removed. + + Returns: + filtered_entries: List of manifest entries after filtering. + total_hours: Total duration of original dataset, in hours + filtered_hours: Total duration of dataset after filtering, in hours + """ + filtered_entries = [] + total_duration = 0.0 + filtered_duration = 0.0 + for entry in entries: + duration = entry["duration"] + total_duration += duration + if (min_duration and duration < min_duration) or (max_duration and duration > max_duration): + continue + + filtered_duration += duration + filtered_entries.append(entry) + + total_hours = total_duration / 3600.0 + filtered_hours = filtered_duration / 3600.0 + + return filtered_entries, total_hours, filtered_hours + + +def get_weighted_sampler( + sample_weights: List[float], batch_size: int, num_steps: int +) -> torch.utils.data.WeightedRandomSampler: + """ + Create pytorch sampler for doing weighted random sampling. + + Args: + sample_weights: List of sampling weights for all elements in the dataset. + batch_size: Batch size to sample. + num_steps: Number of steps to be considered an epoch. + + Returns: + Pytorch sampler + """ + weights = torch.tensor(sample_weights, dtype=torch.float64) + num_samples = batch_size * num_steps + sampler = torch.utils.data.WeightedRandomSampler(weights=weights, num_samples=num_samples) + return sampler diff --git a/tests/collections/tts/parts/utils/test_tts_dataset_utils.py b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py index dadb1844eca6..0730934d46dc 100644 --- a/tests/collections/tts/parts/utils/test_tts_dataset_utils.py +++ b/tests/collections/tts/parts/utils/test_tts_dataset_utils.py @@ -16,8 +16,15 @@ import numpy as np import pytest +import torch -from nemo.collections.tts.parts.utils.tts_dataset_utils import get_abs_rel_paths, get_audio_filepaths, normalize_volume +from nemo.collections.tts.parts.utils.tts_dataset_utils import ( + filter_dataset_by_duration, + get_abs_rel_paths, + get_audio_filepaths, + normalize_volume, + stack_tensors, +) class TestTTSDatasetUtils: @@ -119,3 +126,53 @@ def test_normalize_volume_out_of_range(self): input_audio = np.array([0.0, 0.1, 0.3, 0.5]) with pytest.raises(ValueError, match="Volume must be in range"): normalize_volume(audio=input_audio, volume_level=2.0) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_stack_tensors(self): + tensors = [torch.ones([2]), torch.ones([4]), torch.ones([3])] + max_lens = [6] + expected_output = torch.tensor( + [[1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0]], dtype=torch.float32 + ) + + stacked_tensor = stack_tensors(tensors=tensors, max_lens=max_lens) + + torch.testing.assert_close(stacked_tensor, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_stack_tensors_3d(self): + tensors = [torch.ones([2, 2]), torch.ones([1, 3])] + max_lens = [4, 2] + expected_output = torch.tensor( + [[[1, 1, 0, 0], [1, 1, 0, 0]], [[1, 1, 1, 0], [0, 0, 0, 0]]], dtype=torch.float32 + ) + + stacked_tensor = stack_tensors(tensors=tensors, max_lens=max_lens) + + torch.testing.assert_close(stacked_tensor, expected_output) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_filter_dataset_by_duration(self): + min_duration = 1.0 + max_duration = 10.0 + entries = [ + {"duration": 0.5}, + {"duration": 10.0}, + {"duration": 20.0}, + {"duration": 0.1}, + {"duration": 100.0}, + {"duration": 5.0}, + ] + + filtered_entries, total_hours, filtered_hours = filter_dataset_by_duration( + entries=entries, min_duration=min_duration, max_duration=max_duration + ) + + assert len(filtered_entries) == 2 + assert filtered_entries[0]["duration"] == 10.0 + assert filtered_entries[1]["duration"] == 5.0 + assert total_hours == (135.6 / 3600.0) + assert filtered_hours == (15.0 / 3600.0) From 9fec81099f0a61ccaab6cc0c2da26db4dd247449 Mon Sep 17 00:00:00 2001 From: Yi Dong <43824965+yidong72@users.noreply.github.com> Date: Tue, 16 May 2023 15:00:46 -0400 Subject: [PATCH 113/512] Dialogue dataset (#6654) * chatbot interface Signed-off-by: Yi Dong * latest gradio Signed-off-by: Yi Dong * default greedy Signed-off-by: Yi Dong * better chatbot Signed-off-by: Yi Dong * handle preamble Signed-off-by: Yi Dong * added chatbot training capablity Signed-off-by: Yi Dong * added chatbot ui Signed-off-by: Yi Dong * remove debug code Signed-off-by: Yi Dong * default human Signed-off-by: Yi Dong * use special token for roles Signed-off-by: Yi Dong * special tokens Signed-off-by: Yi Dong * fix name Signed-off-by: Yi Dong * new chat dataset Signed-off-by: Yi Dong * fix the system token Signed-off-by: Yi Dong * upgrade gradio Signed-off-by: Yi Dong * save the chat history Signed-off-by: Yi Dong * update ui Signed-off-by: root * update chat interface Signed-off-by: Yi Dong * handles canonical form Signed-off-by: Yi Dong * new sft chatbot Signed-off-by: Yi Dong * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change format Signed-off-by: Yi Dong * check extra_id in the tokenizer Signed-off-by: Yi Dong * added vocab property check Signed-off-by: Yi Dong * added missing file Signed-off-by: Yi Dong --------- Signed-off-by: Yi Dong Signed-off-by: root Co-authored-by: root Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Sandeep Subramanian --- .../conf/megatron_gpt_inference.yaml | 3 +- .../language_modeling/megatron_gpt_eval.py | 8 +- .../tuning/conf/megatron_gpt_sft.yaml | 1 + .../megatron/gpt_sft_chat_dataset.py | 207 ++++++++++++++++++ .../megatron_gpt_sft_model.py | 7 +- .../nlp/modules/common/chat_css.py | 84 +++++++ .../nlp/modules/common/chatbot_component.py | 173 +++++++++++++++ .../nlp/modules/common/megatron_web_server.py | 184 ++++++++++++++-- requirements/requirements_nlp.txt | 3 +- 9 files changed, 646 insertions(+), 24 deletions(-) create mode 100644 nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py create mode 100644 nemo/collections/nlp/modules/common/chat_css.py create mode 100644 nemo/collections/nlp/modules/common/chatbot_component.py diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml index f61f5342041e..6bd1be905a97 100644 --- a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml @@ -34,4 +34,5 @@ web_server: False # whether launch the web inference server share: False # whether create a public URL username: test # user name for web client password: test2 # password for web client -web_port: 9889 # the port number of the web server \ No newline at end of file +web_port: 9889 # the port number of the web server +chat: False # use the chat interface \ No newline at end of file diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index 00b53a9f6f8f..0ac155374512 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -23,7 +23,7 @@ from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.modules.common.megatron_web_server import get_demo +from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer from nemo.collections.nlp.modules.common.text_generation_utils import generate from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam @@ -277,9 +277,13 @@ def main(cfg) -> None: if cfg.server: if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: if cfg.web_server: + if cfg.chat: + web_ui = get_chatbot_demo + else: + web_ui = get_demo loop = asyncio.new_event_loop() thread = threading.Thread( - target=get_demo, + target=web_ui, daemon=True, args=(cfg.share, cfg.username, cfg.password, cfg.port, cfg.web_port, loop), ) diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml index 12db9133104a..678851db3b01 100644 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml @@ -66,6 +66,7 @@ model: ffn_dropout: 0.0 data: + chat: False # whether use chatbot data or not train_ds: # Example of how to specify paths to multiple datasets # file_names: diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py new file mode 100644 index 000000000000..deb6e77cdb92 --- /dev/null +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -0,0 +1,207 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import torch + +from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec +from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset +from nemo.utils import logging + +__all__ = ['GPTSFTChatDataset'] + +IGNORE_INDEX = -100 +END_SIGNAL = "\n" +END_NAME_SIGNAL = "\n" + +SYSTEM_TOKEN = "System\n" +TURN_TOKEN = "" + +GUARD_RAIL_INSTRUCTION = { + "TEXT_TO_CANONICAL_FORM": "Given a dialogue, for each turn you need to generate a short summary called a canonical form. Generate the canonical form for the last turn in the dialogue.", + "CANONICAL_FORM_TO_TEXT": "Given a dialogue, for each turn we also have a short summary called a canonical form. Generate the canonical form given the last turn message and canonical form. Then generate the message.", +} + + +def _mask_targets(target, tokenized_lens, speakers, header_len, s_ids, tokenizer, mask_role): + cur_idx = header_len + tgt_len = target.shape[0] + for i, (tokenized_len, speaker, s_id) in enumerate(zip(tokenized_lens, speakers, s_ids)): + # note, sentence piece will add extra empty token in front. s_id has that extra token too + skip_name_len = len(tokenizer.text_to_ids(TURN_TOKEN + speaker + END_NAME_SIGNAL)) + if cur_idx >= tgt_len: + break + elif cur_idx + tokenized_len < tgt_len: + # Check whether the mask is applied to the correct position, the first token is turn token: + # s_id[2:] skips the artifact empty token and the turn token + # target[cur_idx + 1:cur_idx + tokenized_len] skip the turn token + if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[2:]): + logging.warning("a sentence mismatches the corresponding piece " "in the conversation") + if i == 0: + # mask the first turn completely to provide at least one turn as context + target[cur_idx : cur_idx + tokenized_len] = IGNORE_INDEX + elif speaker == mask_role: + # leave the first human tag unmasked + target[cur_idx + 1 : cur_idx + tokenized_len] = IGNORE_INDEX + else: + # mask up to the name end, need to remove one as skip name has an extra artifact empty token + target[cur_idx : cur_idx + skip_name_len - 1] = IGNORE_INDEX + cur_idx += tokenized_len + + +def cannonical_form_formater(cannoical_form): + return f'{cannoical_form}\n' + + +def _add_speaker_and_signal(header, source, mask_role, gtype): + """Add speaker and start/end signal on each round.""" + BEGIN_SIGNAL = "" + conversation = header + for i, sentence in enumerate(source): + sentence_from = sentence["from"] + role_token = TURN_TOKEN + if gtype is None: + sentence["value"] = ( + BEGIN_SIGNAL + role_token + sentence_from + END_NAME_SIGNAL + sentence["value"] + END_SIGNAL + ) + elif gtype == "TEXT_TO_CANONICAL_FORM": + sentence["value"] = ( + BEGIN_SIGNAL + + role_token + + sentence_from + + END_NAME_SIGNAL + + sentence["value"] + + END_SIGNAL + + cannonical_form_formater(sentence['canonical_form']) + ) + elif gtype == "CANONICAL_FORM_TO_TEXT": + sentence["value"] = ( + BEGIN_SIGNAL + + role_token + + sentence_from + + END_NAME_SIGNAL + + cannonical_form_formater(sentence['canonical_form']) + + sentence["value"] + + END_SIGNAL + ) + else: + raise ValueError(f"source type {gtype} not supported") + conversation += sentence["value"] + # if the last turn is not masked, add next token start token to the end, which will be included for loss calculation + if sentence_from != mask_role and i == len(source) - 1: + conversation += TURN_TOKEN + return conversation + + +def preprocess( + source: dict, tokenizer: TokenizerSpec, +): + """ + Given a conversation list. This transform: + 1. Add signal '### ' at the beginning each sentence, with end signal '\n'; + 2. Concatenate conversations together; + 3. Tokenize the concatenated conversation; + 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. + """ + canonical_type = None + if 'type' in source: + canonical_type = source['type'] + assert canonical_type in GUARD_RAIL_INSTRUCTION, f"source type {canonical_type} not supported" + # add end signal and concatenate together + conversation = source['system'] + if canonical_type is not None: + conversation = conversation + '\n' + GUARD_RAIL_INSTRUCTION[canonical_type] + mask_role = source.get('mask', 'User') + header = f"{SYSTEM_TOKEN}{conversation}\n\n" + conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, canonical_type) + # tokenize conversations + input_ids = tokenizer.text_to_ids(conversation) + target = copy.deepcopy(input_ids) + header_len = len(tokenizer.text_to_ids(header)) + + ids = [] + tokenized_lens = [] + for s in source['conversations']: + tokenized_sentence = tokenizer.text_to_ids(s["value"]) + ids.append(torch.tensor(tokenized_sentence)) + # remove one token as it adds an empty token in front + tokenized_lens.append(len(tokenized_sentence) - 1) + speakers = [sentence["from"] for sentence in source['conversations']] + assert mask_role in speakers, "mask role not in the conversation" + target = torch.LongTensor(target) + # not going to train on the header + target[:header_len] = IGNORE_INDEX + input_ids = torch.LongTensor(input_ids) + + _mask_targets(target, tokenized_lens, speakers, header_len, ids, tokenizer, mask_role) + mask = (target != IGNORE_INDEX).bool() + assert mask.sum().item() != 0, "mask is empty" + return dict(input_ids=input_ids, mask=mask) + + +class GPTSFTChatDataset(GPTSFTDataset): + def _build_samples_mapping(self): + super()._build_samples_mapping() + assert hasattr(self.tokenizer, "vocab"), "tokenizer should have vocab property, not supported" + assert '' in self.tokenizer.vocab, " not in the tokenizer vocab. not supported" + assert '' in self.tokenizer.vocab, " not in the tokenizer vocab. not supported" + + def _process_example(self, example): + """ + Create an example by concatenating text and answer. + Truncation is carried out when needed, but it is performed only on the prompt side. + BOS, EOS, and SEP, are added if specified. + """ + result = preprocess(example, self.tokenizer) + + return result + + def collate_fn(self, batch): + input_ids = [item['input_ids'][:-1].tolist() for item in batch] + labels = [item['input_ids'][1:].tolist() for item in batch] + loss_mask = [item['mask'][1:].tolist() for item in batch] + + max_length = max([len(x) for x in input_ids]) + if max_length > self.max_seq_length: + # truncate the sequences if it is longer than max_seq_length + input_ids = [x[: self.max_seq_length] for x in input_ids] + labels = [x[: self.max_seq_length] for x in labels] + loss_mask = [x[: self.max_seq_length] for x in loss_mask] + # increase max length to nearest multiple of 4 or 8 + if self.pad_to_max_length: + max_length = self.max_seq_length + else: + max_length = min(self.max_seq_length, self._round_to_nearest(max_length, 8)) + assert max_length <= self.max_seq_length + + attention_mask = [self._create_attention_mask(max_length) for _ in batch] + attention_mask = torch.stack(attention_mask) + position_ids = [list(range(max_length)) for _ in batch] + position_ids = torch.LongTensor(position_ids) + input_ids = torch.LongTensor( + self._collate_item(input_ids, max_length=max_length, pad_id=self.tokenizer.eos_id) + ) + labels = torch.LongTensor(self._collate_item(labels, max_length=max_length, pad_id=self.tokenizer.eos_id)) + loss_mask = torch.LongTensor(self._collate_item(loss_mask, max_length=max_length, pad_id=0)) + + processed_batch = { + 'tokens': input_ids, + 'labels': labels, + 'attention_mask': attention_mask, + 'loss_mask': loss_mask, + 'position_ids': position_ids, + } + + return processed_batch diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index a52a7d22e219..61b491d4af1d 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -24,6 +24,7 @@ get_datasets_weights_and_num_samples, ) from nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset import BlendableDataset +from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset import GPTSFTChatDataset from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset from nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers import ( MegatronPretrainingBatchSampler, @@ -234,7 +235,11 @@ def _build_dataset(self, data_cfg, is_train=True): num_train_samples_per_dataset = [[None]] * len(data_cfg.file_names) for file_path, num_samples in zip(data_cfg.file_names, num_train_samples_per_dataset): - dataset = GPTSFTDataset( + if self.cfg.data.chat: + dataset_cls = GPTSFTChatDataset + else: + dataset_cls = GPTSFTDataset + dataset = dataset_cls( file_path=file_path, tokenizer=self.tokenizer, max_seq_length=data_cfg.max_seq_length, diff --git a/nemo/collections/nlp/modules/common/chat_css.py b/nemo/collections/nlp/modules/common/chat_css.py new file mode 100644 index 000000000000..e6b9a79c4bfe --- /dev/null +++ b/nemo/collections/nlp/modules/common/chat_css.py @@ -0,0 +1,84 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CSS = """ +#chatbot .hll { background-color: #ffffcc } +#chatbot .c { color: #408080; font-style: italic } +#chatbot .err { border: 1px solid #FF0000 } +#chatbot .k { color: #008000; font-weight: bold } +#chatbot .o { color: #666666 } +#chatbot .ch { color: #408080; font-style: italic } +#chatbot .cm { color: #408080; font-style: italic } +#chatbot .cp { color: #BC7A00 } +#chatbot .cpf { color: #408080; font-style: italic } +#chatbot .c1 { color: #408080; font-style: italic } +#chatbot .cs { color: #408080; font-style: italic } +#chatbot .gd { color: #A00000 } +#chatbot .ge { font-style: italic } +#chatbot .gr { color: #FF0000 } +#chatbot .gh { color: #000080; font-weight: bold } +#chatbot .gi { color: #00A000 } +#chatbot .go { color: #888888 } +#chatbot .gp { color: #000080; font-weight: bold } +#chatbot .gs { font-weight: bold } +#chatbot .gu { color: #800080; font-weight: bold } +#chatbot .gt { color: #0044DD } +#chatbot .kc { color: #008000; font-weight: bold } +#chatbot .kd { color: #008000; font-weight: bold } +#chatbot .kn { color: #008000; font-weight: bold } +#chatbot .kp { color: #008000 } +#chatbot .kr { color: #008000; font-weight: bold } +#chatbot .kt { color: #B00040 } +#chatbot .m { color: #666666 } +#chatbot .s { color: #BA2121 } +#chatbot .na { color: #7D9029 } +#chatbot .nb { color: #008000 } +#chatbot .nc { color: #0000FF; font-weight: bold } +#chatbot .no { color: #880000 } +#chatbot .nd { color: #AA22FF } +#chatbot .ni { color: #999999; font-weight: bold } +#chatbot .ne { color: #D2413A; font-weight: bold } +#chatbot .nf { color: #0000FF } +#chatbot .nl { color: #A0A000 } +#chatbot .nn { color: #0000FF; font-weight: bold } +#chatbot .nt { color: #008000; font-weight: bold } +#chatbot .nv { color: #19177C } +#chatbot .ow { color: #AA22FF; font-weight: bold } +#chatbot .w { color: #bbbbbb } +#chatbot .mb { color: #666666 } +#chatbot .mf { color: #666666 } +#chatbot .mh { color: #666666 } +#chatbot .mi { color: #666666 } +#chatbot .mo { color: #666666 } +#chatbot .sa { color: #BA2121 } +#chatbot .sb { color: #BA2121 } +#chatbot .sc { color: #BA2121 } +#chatbot .dl { color: #BA2121 } +#chatbot .sd { color: #BA2121; font-style: italic } +#chatbot .s2 { color: #BA2121 } +#chatbot .se { color: #BB6622; font-weight: bold } +#chatbot .sh { color: #BA2121 } +#chatbot .si { color: #BB6688; font-weight: bold } +#chatbot .sx { color: #008000 } +#chatbot .sr { color: #BB6688 } +#chatbot .s1 { color: #BA2121 } +#chatbot .ss { color: #19177C } +#chatbot .bp { color: #008000 } +#chatbot .fm { color: #0000FF } +#chatbot .vc { color: #19177C } +#chatbot .vg { color: #19177C } +#chatbot .vi { color: #19177C } +#chatbot .vm { color: #19177C } +#chatbot .il { color: #666666 } +""" diff --git a/nemo/collections/nlp/modules/common/chatbot_component.py b/nemo/collections/nlp/modules/common/chatbot_component.py new file mode 100644 index 000000000000..548458df7e29 --- /dev/null +++ b/nemo/collections/nlp/modules/common/chatbot_component.py @@ -0,0 +1,173 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" + +Adopted from https://github.com/gradio-app/gradio/blob/main/gradio/components.py +Fix a markdown render problem. +""" +from __future__ import annotations + +from gradio.components import * +from markdown2 import Markdown + + +class _Keywords(Enum): + NO_VALUE = "NO_VALUE" # Used as a sentinel to determine if nothing is provided as a argument for `value` in `Component.update()` + FINISHED_ITERATING = ( + "FINISHED_ITERATING" # Used to skip processing of a component's value (needed for generators + state) + ) + + +@document("style") +class Chatbot(Changeable, Selectable, IOComponent, JSONSerializable): + """ + Displays a chatbot output showing both user submitted messages and responses. Supports a subset of Markdown including bold, italics, code, and images. + Preprocessing: this component does *not* accept input. + Postprocessing: expects function to return a {List[Tuple[str | None | Tuple, str | None | Tuple]]}, a list of tuples with user message and response messages. Messages should be strings, tuples, or Nones. If the message is a string, it can include Markdown. If it is a tuple, it should consist of (string filepath to image/video/audio, [optional string alt text]). Messages that are `None` are not displayed. + + Demos: chatbot_simple, chatbot_multimodal + """ + + def __init__( + self, + value: List[Tuple[str | None, str | None]] | Callable | None = None, + color_map: Dict[str, str] | None = None, # Parameter moved to Chatbot.style() + *, + label: str | None = None, + every: float | None = None, + show_label: bool = True, + visible: bool = True, + elem_id: str | None = None, + elem_classes: List[str] | str | None = None, + **kwargs, + ): + """ + Parameters: + value: Default value to show in chatbot. If callable, the function will be called whenever the app loads to set the initial value of the component. + label: component name in interface. + every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open. Has no effect otherwise. Queue must be enabled. The event can be accessed (e.g. to cancel it) via this component's .load_event attribute. + show_label: if True, will display label. + visible: If False, component will be hidden. + elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles. + elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles. + """ + if color_map is not None: + warnings.warn("The 'color_map' parameter has been deprecated.",) + # self.md = utils.get_markdown_parser() + self.md = Markdown(extras=["fenced-code-blocks", "tables", "break-on-newline"]) + self.select: EventListenerMethod + """ + Event listener for when the user selects message from Chatbot. + Uses event data gradio.SelectData to carry `value` referring to text of selected message, and `index` tuple to refer to [message, participant] index. + See EventData documentation on how to use this event data. + """ + + IOComponent.__init__( + self, + label=label, + every=every, + show_label=show_label, + visible=visible, + elem_id=elem_id, + elem_classes=elem_classes, + value=value, + **kwargs, + ) + + def get_config(self): + return { + "value": self.value, + "selectable": self.selectable, + **IOComponent.get_config(self), + } + + @staticmethod + def update( + value: Any | Literal[_Keywords.NO_VALUE] | None = _Keywords.NO_VALUE, + label: str | None = None, + show_label: bool | None = None, + visible: bool | None = None, + ): + updated_config = { + "label": label, + "show_label": show_label, + "visible": visible, + "value": value, + "__type__": "update", + } + return updated_config + + def _process_chat_messages(self, chat_message: str | Tuple | List | Dict | None) -> str | Dict | None: + if chat_message is None: + return None + elif isinstance(chat_message, (tuple, list)): + mime_type = processing_utils.get_mimetype(chat_message[0]) + return { + "name": chat_message[0], + "mime_type": mime_type, + "alt_text": chat_message[1] if len(chat_message) > 1 else None, + "data": None, # These last two fields are filled in by the frontend + "is_file": True, + } + elif isinstance(chat_message, dict): # This happens for previously processed messages + return chat_message + elif isinstance(chat_message, str): + # return self.md.render(chat_message) + return str(self.md.convert(chat_message)) + else: + raise ValueError(f"Invalid message for Chatbot component: {chat_message}") + + def postprocess( + self, y: List[Tuple[str | Tuple | List | Dict | None, str | Tuple | List | Dict | None]], + ) -> List[Tuple[str | Dict | None, str | Dict | None]]: + """ + Parameters: + y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format. It can also be a tuple whose first element is a string filepath or URL to an image/video/audio, and second (optional) element is the alt text, in which case the media file is displayed. It can also be None, in which case that message is not displayed. + Returns: + List of tuples representing the message and response. Each message and response will be a string of HTML, or a dictionary with media information. + """ + if y is None: + return [] + processed_messages = [] + for message_pair in y: + assert isinstance( + message_pair, (tuple, list) + ), f"Expected a list of lists or list of tuples. Received: {message_pair}" + assert ( + len(message_pair) == 2 + ), f"Expected a list of lists of length 2 or list of tuples of length 2. Received: {message_pair}" + processed_messages.append( + ( + # '

' +
+                    #                    message_pair[0] + "
", + message_pair[0], + self._process_chat_messages(message_pair[1]), + ) + ) + return processed_messages + + def style(self, height: int | None = None, **kwargs): + """ + This method can be used to change the appearance of the Chatbot component. + """ + if height is not None: + self._style["height"] = height + if kwargs.get("color_map") is not None: + warnings.warn("The 'color_map' parameter has been deprecated.") + + Component.style( + self, **kwargs, + ) + return self diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py index 84ceeb286ea3..884f7abe5f01 100644 --- a/nemo/collections/nlp/modules/common/megatron_web_server.py +++ b/nemo/collections/nlp/modules/common/megatron_web_server.py @@ -16,6 +16,8 @@ import gradio as gr +from nemo.collections.nlp.modules.common.chat_css import CSS +from nemo.collections.nlp.modules.common.chatbot_component import Chatbot from nemo.collections.nlp.modules.common.megatron.retrieval_services.util import ( convert_retrieved_to_md, request_data, @@ -24,25 +26,64 @@ __all__ = ['RetroDemoWebApp', 'get_demo'] +TURN_TOKEN = '' -def create_gen_function(port=5555): - def get_generation(prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings): - data = { - "sentences": [prompt], - "tokens_to_generate": int(token_to_gen), - "temperature": temp, - "add_BOS": add_BOS, - "top_k": top_k, - "top_p": top_p, - "greedy": greedy, - "all_probs": False, - "repetition_penalty": repetition, - "min_tokens_to_generate": int(min_tokens), - "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], - } - response = text_generation(data, port=port) - sentences = response['sentences'] - return sentences[0] +DEFAULT_SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n" +SYSTEM_TOKEN = 'System\n' +# HUMAN_TOKEN = 'Human:' +# ASSITANT_TOKEN = 'Assistant:' + + +def create_gen_function(port=5555, chat=False): + if chat: + + def get_generation( + prompt, preamble, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings + ): + if preamble is not None and preamble != '': + prompt = SYSTEM_TOKEN + preamble + prompt + data = { + "sentences": [prompt], + "tokens_to_generate": int(token_to_gen), + "temperature": temp, + "add_BOS": add_BOS, + "top_k": top_k, + "top_p": top_p, + "greedy": greedy, + "all_probs": False, + "repetition_penalty": repetition, + "min_tokens_to_generate": int(min_tokens), + "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], + } + response = text_generation(data, port=port) + sentences = response['sentences'] + bot_message = sentences[0] + bot_message = bot_message[len(prompt) :] + return bot_message + + else: + + def get_generation( + prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings + ): + data = { + "sentences": [prompt], + "tokens_to_generate": int(token_to_gen), + "temperature": temp, + "add_BOS": add_BOS, + "top_k": top_k, + "top_p": top_p, + "greedy": greedy, + "all_probs": False, + "repetition_penalty": repetition, + "min_tokens_to_generate": int(min_tokens), + "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], + } + response = text_generation(data, port=port) + sentences = response['sentences'] + bot_message = sentences[0] + bot_message = bot_message[len(prompt) :] + return bot_message return get_generation @@ -72,7 +113,7 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No output_box = gr.Textbox(value="", label="Output") btn = gr.Button(value="Submit") btn.click( - create_gen_function(server_port), + create_gen_function(server_port, chat=False), inputs=[ input_prompt, greedy_flag, @@ -90,6 +131,111 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No demo.launch(share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password)) +def get_chatbot_demo(share, username, password, server_port=5555, web_port=9889, loop=None): + asyncio.set_event_loop(loop) + with gr.Blocks(css=CSS) as demo: + # store the mutliple turn conversation + with gr.Row(): + with gr.Column(scale=2, width=200): + # store the mutliple turn conversation + session_state = gr.State(value=[]) + greedy_flag = gr.Checkbox(label="Greedy", value=True) + add_BOS = gr.Checkbox(label="Add BOS token", value=False) + token_to_gen = gr.Number(label='Number of Tokens to generate', value=300, type=int) + min_token_to_gen = gr.Number(label='Min number of Tokens to generate', value=1, type=int) + temperature = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, label='Temperature', step=0.1) + top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.9, label='Top P') + top_k = gr.Slider(minimum=0, maximum=10000, step=2, value=0, label='Top K') + repetition_penality = gr.Slider( + minimum=1.0, maximum=5.0, step=0.02, value=1.2, label='Repetition penalty' + ) + end_strings = gr.Textbox( + label="End strings (comma separated)", value=f"<|endoftext|>,,", lines=1, + ) + gr.HTML("
") + human_name = gr.Textbox(label="Human Name", value="User", line=1,) + assistant_name = gr.Textbox(label="Assistant Name", value="Assistant", line=1,) + preamble = gr.Textbox(label="System", value=DEFAULT_SYSTEM, lines=2,) + with gr.Column(scale=1, min_width=800): + chatbot = Chatbot(elem_id="chatbot").style(height=800) + msg = gr.Textbox(label="User", value="", lines=1,) + clear = gr.Button("Clear") + + def user(user_message, history, session_state): + session_state.append(user_message) + user_message = user_message.replace('\n', '
') + return "", history + [[user_message, None]] + + def bot( + history, + preamble, + greedy_flag, + add_BOS, + token_to_gen, + min_token_to_gen, + temperature, + top_p, + top_k, + repetition_penality, + end_strings, + human_name, + assistant_name, + session_state, + ): + prompt_text = '' + names = [human_name, assistant_name] + for i, meg in enumerate(session_state): + name = names[i % 2] + prompt_text += TURN_TOKEN + name + '\n' + meg + '\n' + prompt_text += TURN_TOKEN + assistant_name + '\n' + bot_message = create_gen_function(server_port, chat=True)( + prompt_text, + preamble, + greedy_flag, + add_BOS, + token_to_gen, + min_token_to_gen, + temperature, + top_p, + top_k, + repetition_penality, + end_strings, + ) + if bot_message.endswith(TURN_TOKEN): + bot_message = bot_message[: -len(TURN_TOKEN)] + history[-1][1] = bot_message + session_state.append(bot_message.strip()) + return history + + msg.submit(user, [msg, chatbot, session_state], [msg, chatbot], queue=False).then( + bot, + [ + chatbot, + preamble, + greedy_flag, + add_BOS, + token_to_gen, + min_token_to_gen, + temperature, + top_p, + top_k, + repetition_penality, + end_strings, + human_name, + assistant_name, + session_state, + ], + chatbot, + ) + + def clear_fun(session_state): + session_state.clear() + return None + + clear.click(clear_fun, [session_state], chatbot, queue=False) + demo.launch(share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password)) + + class RetroDemoWebApp: def __init__(self, text_service_ip, text_service_port, combo_service_ip, combo_service_port): self.text_service_ip = text_service_ip diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index 0c3c42ba583f..d88280b363c2 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -5,11 +5,12 @@ fasttext flask_restful ftfy gdown -gradio +gradio==3.28.3 h5py ijson inflect jieba +markdown2 matplotlib>=3.3.2 megatron_core==0.1.0 nltk>=3.6.5 From 18cad416f428d880950f964383299732b8d951ee Mon Sep 17 00:00:00 2001 From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com> Date: Tue, 16 May 2023 18:42:08 -0400 Subject: [PATCH 114/512] Add support for RNNT/hybrid models to partial transcribe (#6609) * Add support for RNNT/hybrid models to partial transcribe Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * Update transcribe_utils.py Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * Update transcribe_speech.py Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * Update transcribe_utils.py Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- examples/asr/transcribe_speech.py | 32 ++++++------------- .../asr/parts/utils/transcribe_utils.py | 28 +++++++++++----- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 4a93e630876c..4ed3d92a6305 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -314,28 +314,16 @@ def autocast(): with autocast(): with torch.no_grad(): if partial_audio: - if isinstance(asr_model, EncDecCTCModel): - transcriptions = transcribe_partial_audio( - asr_model=asr_model, - path2manifest=cfg.dataset_manifest, - batch_size=cfg.batch_size, - num_workers=cfg.num_workers, - return_hypotheses=return_hypotheses, - channel_selector=cfg.channel_selector, - augmentor=augmentor, - ) - else: - logging.warning( - "RNNT models do not support transcribe partial audio for now. Transcribing full audio." - ) - transcriptions = asr_model.transcribe( - paths2audio_files=filepaths, - batch_size=cfg.batch_size, - num_workers=cfg.num_workers, - return_hypotheses=return_hypotheses, - channel_selector=cfg.channel_selector, - augmentor=augmentor, - ) + transcriptions = transcribe_partial_audio( + asr_model=asr_model, + path2manifest=cfg.dataset_manifest, + batch_size=cfg.batch_size, + num_workers=cfg.num_workers, + return_hypotheses=return_hypotheses, + channel_selector=cfg.channel_selector, + augmentor=augmentor, + decoder_type=cfg.decoder_type, + ) else: transcriptions = asr_model.transcribe( paths2audio_files=filepaths, diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 990e3b96b0fc..60f936306d05 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -362,11 +362,11 @@ def transcribe_partial_audio( num_workers: int = 0, channel_selector: Optional[int] = None, augmentor: DictConfig = None, + decoder_type: Optional[str] = None, ) -> List[str]: """ - See description of this function in trancribe() in nemo/collections/asr/models/ctc_models.py """ - - assert isinstance(asr_model, EncDecCTCModel), "Currently support CTC model only." + See description of this function in trancribe() in nemo/collections/asr/models/ctc_models.py and nemo/collections/asr/models/rnnt_models.py + """ if return_hypotheses and logprobs: raise ValueError( @@ -384,6 +384,17 @@ def transcribe_partial_audio( dither_value = asr_model.preprocessor.featurizer.dither pad_to_value = asr_model.preprocessor.featurizer.pad_to + if decoder_type is not None: # Hybrid model + decode_function = ( + asr_model.decoding.rnnt_decoder_predictions_tensor + if decoder_type == 'rnnt' + else asr_model.decoding.ctc_decoder_predictions_tensor + ) + elif hasattr(asr_model, 'joint'): # RNNT model + decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor + else: # CTC model + decode_function = asr_model.decoding.ctc_decoder_predictions_tensor + try: asr_model.preprocessor.featurizer.dither = 0.0 asr_model.preprocessor.featurizer.pad_to = 0 @@ -406,18 +417,20 @@ def transcribe_partial_audio( temporary_datalayer = asr_model._setup_transcribe_dataloader(config) for test_batch in tqdm(temporary_datalayer, desc="Transcribing"): - logits, logits_len, greedy_predictions = asr_model.forward( + outputs = asr_model.forward( input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) ) + logits, logits_len = outputs[0], outputs[1] if logprobs: # dump log probs per file for idx in range(logits.shape[0]): lg = logits[idx][: logits_len[idx]] hypotheses.append(lg.cpu().numpy()) else: - current_hypotheses, all_hyp = asr_model.decoding.ctc_decoder_predictions_tensor( - logits, decoder_lengths=logits_len, return_hypotheses=return_hypotheses, - ) + current_hypotheses, all_hyp = decode_function(logits, logits_len, return_hypotheses=return_hypotheses,) + + if isinstance(current_hypotheses, tuple) and len(current_hypotheses) == 2: + current_hypotheses = current_hypotheses[0] if return_hypotheses: # dump log probs per file @@ -428,7 +441,6 @@ def transcribe_partial_audio( hypotheses += current_hypotheses - del greedy_predictions del logits del test_batch From cf42ddb0abc47b122793dba8fc5b00f66c54fb1e Mon Sep 17 00:00:00 2001 From: Nikolay Karpov Date: Wed, 17 May 2023 12:25:27 +0400 Subject: [PATCH 115/512] eval_beamsearch_ngram.py with hybrid ctc (#6656) * separate_punctuation = false * ctc decoding strategy = model.decoding * transcribe(files, logprobs=True) returns logprobs --------- Signed-off-by: Nikolay Karpov Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- examples/asr/speech_to_text_eval.py | 3 +- .../asr/models/hybrid_rnnt_ctc_models.py | 11 +++++- .../ngram_lm/eval_beamsearch_ngram.py | 39 ++++++++++++++----- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/examples/asr/speech_to_text_eval.py b/examples/asr/speech_to_text_eval.py index f8dcbcf81bbd..f4d2a66ffec0 100644 --- a/examples/asr/speech_to_text_eval.py +++ b/examples/asr/speech_to_text_eval.py @@ -82,7 +82,7 @@ class EvaluationConfig(transcribe_speech.TranscriptionConfig): only_score_manifest: bool = False text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( - punctuation_marks=".,?", separate_punctuation=True, do_lowercase=False, rm_punctuation=False, + punctuation_marks=".,?", separate_punctuation=False, do_lowercase=False, rm_punctuation=False, ) @@ -134,6 +134,7 @@ def main(cfg: EvaluationConfig): pc = PunctuationCapitalization(cfg.text_processing.punctuation_marks) if cfg.text_processing.separate_punctuation: ground_truth_text = pc.separate_punctuation(ground_truth_text) + predicted_text = pc.separate_punctuation(predicted_text) if cfg.text_processing.do_lowercase: ground_truth_text = pc.do_lowercase(ground_truth_text) predicted_text = pc.do_lowercase(predicted_text) diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py index 447caa3f5de6..5ca6124ecfd7 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py @@ -102,6 +102,7 @@ def transcribe( channel_selector: Optional[ChannelSelectorType] = None, augmentor: DictConfig = None, verbose: bool = True, + logprobs: bool = False, ) -> (List[str], Optional[List['Hypothesis']]): """ Uses greedy decoding to transcribe audio files. Use this method for debugging and prototyping. @@ -119,6 +120,7 @@ def transcribe( channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied. verbose: (bool) whether to display tqdm progress bar + logprobs: (bool) whether to return ctc logits insted of hypotheses Returns: Returns a tuple of 2 items - @@ -189,6 +191,7 @@ def transcribe( config['augmentor'] = augmentor temporary_datalayer = self._setup_transcribe_dataloader(config) + logits_list = [] for test_batch in tqdm(temporary_datalayer, desc="Transcribing", disable=not verbose): encoded, encoded_len = self.forward( input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) @@ -206,6 +209,9 @@ def transcribe( best_hyp[idx].y_sequence = logits[idx][: encoded_len[idx]] if best_hyp[idx].alignments is None: best_hyp[idx].alignments = best_hyp[idx].y_sequence + if logprobs: + for logit, elen in zip(logits, encoded_len): + logits_list.append(logit[:elen]) del logits hypotheses += best_hyp @@ -229,7 +235,10 @@ def transcribe( self.joint.unfreeze() if hasattr(self, 'ctc_decoder'): self.ctc_decoder.unfreeze() - return hypotheses, all_hypotheses + if logprobs: + return logits_list + else: + return hypotheses, all_hypotheses def change_vocabulary( self, diff --git a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py index 1f62da6bb168..1846a986bf6e 100644 --- a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py +++ b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py @@ -15,10 +15,12 @@ """ # This script would evaluate an N-gram language model trained with KenLM library (https://github.com/kpu/kenlm) in -# fusion with beam search decoders on top of a trained ASR model. NeMo's beam search decoders are capable of using the -# KenLM's N-gram models to find the best candidates. This script supports both character level and BPE level +# fusion with beam search decoders on top of a trained ASR model with CTC decoder. To evaluate a model with +# Transducer (RNN-T) decoder use another script 'scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py'. +# NeMo's beam search decoders are capable of using the KenLM's N-gram models +# to find the best candidates. This script supports both character level and BPE level # encodings and models which is detected automatically from the type of the model. -# You may train the LM model with 'scripts/ngram_lm/train_kenlm.py'. +# You may train the LM model with 'scripts/asr_language_modeling/ngram_lm/train_kenlm.py'. # Config Help @@ -29,7 +31,7 @@ # USAGE python eval_beamsearch_ngram.py nemo_model_file= \ - input_manifest= \ kenlm_model_file= \ beam_width=[] \ beam_alpha=[] \ @@ -70,6 +72,7 @@ from tqdm.auto import tqdm import nemo.collections.asr as nemo_asr +from nemo.collections.asr.models import EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.submodules import ctc_beam_decoding from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization, TextProcessingConfig from nemo.core.config import hydra_runner @@ -113,7 +116,7 @@ class EvalBeamSearchNGramConfig: text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( punctuation_marks = ".,?", - separate_punctuation = True, + separate_punctuation = False, do_lowercase = False, rm_punctuation = False, ) @@ -151,7 +154,12 @@ def beam_search_eval( model.cfg.decoding.beam = cfg.decoding # Update model's decoding strategy - model.change_decoding_strategy(model.cfg.decoding) + if isinstance(model, EncDecHybridRNNTCTCModel): + model.change_decoding_strategy(model.cfg.decoding, decoder_type='ctc') + decoding = model.ctc_decoding + else: + model.change_decoding_strategy(model.cfg.decoding) + decoding = model.decoding logging.setLevel(level) wer_dist_first = cer_dist_first = 0 @@ -182,7 +190,7 @@ def beam_search_eval( probs_batch[prob_index], device=packed_batch.device, dtype=packed_batch.dtype ) - _, beams_batch = model.decoding.ctc_decoder_predictions_tensor( + _, beams_batch = decoding.ctc_decoder_predictions_tensor( packed_batch, decoder_lengths=probs_lens, return_hypotheses=True, ) @@ -199,6 +207,8 @@ def beam_search_eval( pred_text = punctuation_capitalization.do_lowercase([pred_text])[0] if cfg.text_processing.rm_punctuation: pred_text = punctuation_capitalization.rm_punctuation([pred_text])[0] + if cfg.text_processing.separate_punctuation: + pred_text = punctuation_capitalization.separate_punctuation([pred_text])[0] pred_split_w = pred_text.split() wer_dist = editdistance.eval(target_split_w, pred_split_w) pred_split_c = list(pred_text) @@ -247,6 +257,7 @@ def beam_search_eval( @hydra_runner(config_path=None, config_name='EvalBeamSearchNGramConfig', schema=EvalBeamSearchNGramConfig) def main(cfg: EvalBeamSearchNGramConfig): + logging.warning("This file will be renamed to eval_beamsearch_ngram_ctc.py in the future NeMo (1.21) release.") if is_dataclass(cfg): cfg = OmegaConf.structured(cfg) # type: EvalBeamSearchNGramConfig @@ -279,12 +290,12 @@ def main(cfg: EvalBeamSearchNGramConfig): audio_file_paths.append(str(audio_file.absolute())) punctuation_capitalization = PunctuationCapitalization(cfg.text_processing.punctuation_marks) - if cfg.text_processing.separate_punctuation: - target_transcripts = punctuation_capitalization.separate_punctuation(target_transcripts) if cfg.text_processing.do_lowercase: target_transcripts = punctuation_capitalization.do_lowercase(target_transcripts) if cfg.text_processing.rm_punctuation: target_transcripts = punctuation_capitalization.rm_punctuation(target_transcripts) + if cfg.text_processing.separate_punctuation: + target_transcripts = punctuation_capitalization.separate_punctuation(target_transcripts) if cfg.probs_cache_file and os.path.exists(cfg.probs_cache_file): logging.info(f"Found a pickle file of probabilities at '{cfg.probs_cache_file}'.") @@ -316,6 +327,8 @@ def default_autocast(): with autocast(): with torch.no_grad(): + if isinstance(asr_model, EncDecHybridRNNTCTCModel): + asr_model.cur_decoder = 'ctc' all_logits = asr_model.transcribe(audio_file_paths, batch_size=cfg.acoustic_batch_size, logprobs=True) all_probs = all_logits @@ -331,11 +344,17 @@ def default_autocast(): for batch_idx, probs in enumerate(all_probs): preds = np.argmax(probs, axis=1) preds_tensor = torch.tensor(preds, device='cpu').unsqueeze(0) - pred_text = asr_model._wer.decoding.ctc_decoder_predictions_tensor(preds_tensor)[0][0] + if isinstance(asr_model, EncDecHybridRNNTCTCModel): + pred_text = asr_model.ctc_decoding.ctc_decoder_predictions_tensor(preds_tensor)[0][0] + else: + pred_text = asr_model._wer.decoding.ctc_decoder_predictions_tensor(preds_tensor)[0][0] + if cfg.text_processing.do_lowercase: pred_text = punctuation_capitalization.do_lowercase([pred_text])[0] if cfg.text_processing.rm_punctuation: pred_text = punctuation_capitalization.rm_punctuation([pred_text])[0] + if cfg.text_processing.separate_punctuation: + pred_text = punctuation_capitalization.separate_punctuation([pred_text])[0] pred_split_w = pred_text.split() target_split_w = target_transcripts[batch_idx].split() From e6a3985cb9317f47998f98760e9f9faca6bc9977 Mon Sep 17 00:00:00 2001 From: Nithin Rao Date: Wed, 17 May 2023 10:35:11 -0700 Subject: [PATCH 116/512] fix bucketing bug issue for picking new bucket (#6663) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Koluguri --- nemo/collections/asr/data/audio_to_text.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nemo/collections/asr/data/audio_to_text.py b/nemo/collections/asr/data/audio_to_text.py index 3b2e2a767a97..58cd3630e322 100644 --- a/nemo/collections/asr/data/audio_to_text.py +++ b/nemo/collections/asr/data/audio_to_text.py @@ -1341,8 +1341,7 @@ def __next__(self): try: sample = next(self.wrapped_iter) except StopIteration: - self.wrapped_iter = iter(self.wrapped_ds) - sample = next(self.wrapped_iter) + break batches.append(sample) if len(batches) == 0: raise StopIteration From 8a4e9d21ac4db3949125fd2a1288444938dc5a70 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 17 May 2023 15:13:07 -0700 Subject: [PATCH 117/512] Cut branch r1.19.0 Signed-off-by: smajumdar --- Jenkinsfile | 330 +++++++++--------- README.rst | 60 ++-- tutorials/00_NeMo_Primer.ipynb | 8 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 4 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- tutorials/VoiceSwapSample.ipynb | 4 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 4 +- .../ASR_Example_CommonVoice_Finetuning.ipynb | 10 +- tutorials/asr/ASR_for_telephony_speech.ipynb | 2 +- tutorials/asr/ASR_with_NeMo.ipynb | 4 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 2 +- tutorials/asr/ASR_with_Transducers.ipynb | 2 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 4 +- tutorials/asr/Multilang_ASR.ipynb | 8 +- tutorials/asr/Offline_ASR.ipynb | 4 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 2 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 2 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 4 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 4 +- .../asr/Self_Supervised_Pre_Training.ipynb | 10 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 4 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 4 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 12 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 6 +- ...Language_Models_for_Downstream_Tasks.ipynb | 12 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 6 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 4 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 12 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 4 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 2 +- .../nlp/Multitask_Prompt_and_PTuning.ipynb | 8 +- .../nlp/Punctuation_and_Capitalization.ipynb | 8 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 8 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 4 +- .../ASR_with_SpeakerDiarization.ipynb | 6 +- .../Speaker_Diarization_Inference.ipynb | 12 +- .../Speaker_Diarization_Training.ipynb | 8 +- .../Speaker_Identification_Verification.ipynb | 8 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 8 +- tutorials/tools/Multispeaker_Simulator.ipynb | 4 +- .../tts/Aligner_Inference_Examples.ipynb | 4 +- .../Evaluation_MelCepstralDistortion.ipynb | 6 +- .../tts/FastPitch_Adapter_Finetuning.ipynb | 4 +- .../tts/FastPitch_ChineseTTS_Training.ipynb | 8 +- tutorials/tts/FastPitch_Finetuning.ipynb | 4 +- .../tts/FastPitch_GermanTTS_Training.ipynb | 10 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../FastPitch_MultiSpeaker_Pretraining.ipynb | 4 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 4 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 12 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- tutorials/tts/Vits_Training.ipynb | 2 +- 71 files changed, 357 insertions(+), 357 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2d42debb53d7..27fbf11148f6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -85,8 +85,8 @@ pipeline { stage('L0: Unit Tests CPU') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } steps { @@ -97,8 +97,8 @@ pipeline { stage('L2: ASR dev run') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -183,8 +183,8 @@ pipeline { stage('L2: ASR dev run - part two') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -213,8 +213,8 @@ pipeline { stage('L2: Speech to Text EMA') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } steps { @@ -234,8 +234,8 @@ pipeline { stage('L2: Speaker dev run') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -357,8 +357,8 @@ pipeline { // stage('L2: ASR DALI dev run') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -425,8 +425,8 @@ pipeline { // stage('L2: ASR RNNT dev run') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -487,8 +487,8 @@ pipeline { // stage('L2: Hybrid ASR RNNT-CTC dev run') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -517,8 +517,8 @@ pipeline { stage('L2: ASR Multi-dataloader dev run') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -565,8 +565,8 @@ pipeline { stage('L2: ASR Adapters') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -610,8 +610,8 @@ pipeline { stage('L2: Megatron T5 Adapter PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -656,8 +656,8 @@ pipeline { stage('L2: Megatron T5 Adapter TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -700,8 +700,8 @@ pipeline { stage('L2: Megatron T5 IA3 PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -746,8 +746,8 @@ pipeline { stage('L2: Megatron T5 IA3 TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -790,8 +790,8 @@ pipeline { stage('L2: Megatron GPT Adapter TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -833,8 +833,8 @@ pipeline { stage('L2: Megatron GPT Adapter PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -877,8 +877,8 @@ pipeline { stage('L2: Speech Transcription') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -898,8 +898,8 @@ pipeline { stage('L2: Transducer alignment') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -915,8 +915,8 @@ pipeline { stage('L2: Segmentation Tool') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } stages { @@ -971,8 +971,8 @@ pipeline { stage('L2: G2P Models') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1053,8 +1053,8 @@ pipeline { // stage('L2: Multi-GPU Megatron finetuning') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1080,8 +1080,8 @@ pipeline { stage('L2: STS-b') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1140,8 +1140,8 @@ pipeline { stage('L2: Dialogue Classification') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1311,8 +1311,8 @@ pipeline { stage('L2: Dialogue Generation') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1377,8 +1377,8 @@ pipeline { // stage('L2: Dialogue Generation Part 2') { // when { // anyOf { -// branch 'r1.17.0' -// changeRequest target: 'r1.17.0' +// branch 'r1.19.0' +// changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1407,8 +1407,8 @@ pipeline { stage('L2: COPY') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1437,8 +1437,8 @@ pipeline { stage('L2: Duplex Text Normalization') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1470,13 +1470,13 @@ pipeline { } } } - // Runs out of memory on the 12G TITAN V (GPU 0 on main CI) + // Runs out of memory on the 12G TITAN V (GPU 0 on r1.19.0 CI) // TODO: add when megatron bert is supported again in NeMo // stage('L2: MegaBERT Token Classification') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1501,8 +1501,8 @@ pipeline { stage('L2: BERT Text Classification') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1530,8 +1530,8 @@ pipeline { stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1589,8 +1589,8 @@ pipeline { stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1650,8 +1650,8 @@ pipeline { stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1711,8 +1711,8 @@ pipeline { stage('L2: Intent and Slot Classification Tasks') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1751,8 +1751,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Text Classification') { // when { // anyOf{ - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1780,8 +1780,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Autoresume') { // when { // anyOf{ - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1811,8 +1811,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') { // when { // anyOf{ - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1832,8 +1832,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') { // when { // anyOf{ - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1855,8 +1855,8 @@ pipeline { stage('L2: Parallel NLP Examples 2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1980,8 +1980,8 @@ pipeline { stage('Punctuation & Capitalization tarred dataset') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2039,8 +2039,8 @@ pipeline { stage('Punctuation & Capitalization, Different ways of passing labels to model') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2147,8 +2147,8 @@ pipeline { stage('Punctuation & Capitalization inference') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2173,8 +2173,8 @@ pipeline { stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2235,8 +2235,8 @@ pipeline { stage('L2: Entity Linking') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2263,8 +2263,8 @@ pipeline { stage('L2: NMT Attention is All You Need Training') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2386,8 +2386,8 @@ pipeline { stage('L2: NMT Attention is All You Need Inference') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2422,8 +2422,8 @@ pipeline { stage('L2: NMT Attention is All You Need Finetuning') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2457,8 +2457,8 @@ pipeline { stage('L2: NMT Tarred Dataset Creation') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2511,8 +2511,8 @@ pipeline { stage('L2: Megatron NMT Training TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2610,8 +2610,8 @@ pipeline { // stage('L2: NMT Bottleneck Fallback') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2657,8 +2657,8 @@ pipeline { // stage('L2: NMT Bottleneck Architecture') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2740,8 +2740,8 @@ pipeline { // stage('L2: NMT Bottleneck LVM') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2823,8 +2823,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training with Pipeline Paralleism') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2893,8 +2893,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2964,8 +2964,8 @@ pipeline { stage('L2: Megatron RETRO Pretraining and Resume Training') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3036,8 +3036,8 @@ pipeline { stage('L2: Megatron RETRO muTransfer Pretraining Performance') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3119,8 +3119,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: BioMegatron Bert NER Task') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3137,8 +3137,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3221,8 +3221,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3305,8 +3305,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Finetuning PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3373,8 +3373,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3390,8 +3390,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval PP2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3409,8 +3409,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3444,8 +3444,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP2 PP1') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3488,8 +3488,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -3533,8 +3533,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -3560,8 +3560,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Change Partitions') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3607,8 +3607,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3703,8 +3703,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3799,8 +3799,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3895,8 +3895,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3965,8 +3965,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 w/ Mixture of Expert Pretraining') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4010,8 +4010,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP1 PP1') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4051,8 +4051,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP2 PP1') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4096,8 +4096,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { // anyOf { - // branch 'r1.17.0' - // changeRequest target: 'r1.17.0' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -4140,8 +4140,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4220,8 +4220,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Eval') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4237,8 +4237,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, TP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4306,8 +4306,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, PP=2') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4379,8 +4379,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 GLUE/XNLI Finetuning') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4452,8 +4452,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Mock Data Generation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4489,8 +4489,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: TTS Fast dev runs 1') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } parallel { @@ -4635,8 +4635,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L??: Speech Checkpoints tests') { when { anyOf { - branch 'r1.17.0' - changeRequest target: 'r1.17.0' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true diff --git a/README.rst b/README.rst index 1335620ead25..841509dfec5f 100644 --- a/README.rst +++ b/README.rst @@ -5,9 +5,9 @@ :target: http://www.repostatus.org/#active :alt: Project Status: Active – The project has reached a stable, usable state and is being actively developed. -.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main +.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 :alt: Documentation - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ .. |license| image:: https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg :target: https://github.com/NVIDIA/NeMo/blob/master/LICENSE @@ -25,7 +25,7 @@ :target: https://pepy.tech/project/nemo-toolkit :alt: PyPi total downloads -.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=main&event=push +.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=r1.19.0&event=push :target: https://github.com/nvidia/nemo/actions/workflows/codeql.yml :alt: CodeQL @@ -33,7 +33,7 @@ :target: https://github.com/psf/black :alt: Code style: black -.. _main-readme: +.. _r1.19.0-readme: **NVIDIA NeMo** =============== @@ -61,7 +61,7 @@ We have extensive `tutorials `_. For advanced users that want to train NeMo models from scratch or finetune existing NeMo models -we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. +we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. For scaling NeMo LLM training on Slurm clusters or public clouds, please see the `NVIDIA NeMo Megatron Launcher `_. The NM launcher has extensive recipes, scripts, utilities, and documentation for training NeMo LLMs and also has an `Autoconfigurator `_ @@ -74,7 +74,7 @@ Key Features * Speech processing * `HuggingFace Space for Audio Transcription (File, Microphone and YouTube) `_ - * `Automatic Speech Recognition (ASR) `_ + * `Automatic Speech Recognition (ASR) `_ * Supported ASR models: ``_ * Jasper, QuartzNet, CitriNet, ContextNet * Conformer-CTC, Conformer-Transducer, FastConformer-CTC, FastConformer-Transducer @@ -88,42 +88,42 @@ Key Features * Streaming/Buffered ASR (CTC/Transducer) - `Chunked Inference Examples `_ * Cache-aware Streaming Conformer - ``_ * Beam Search decoding - * `Language Modelling for ASR `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer - * `Support of long audios for Conformer with memory efficient local attention `_ - * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) + * `Language Modelling for ASR `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer + * `Support of long audios for Conformer with memory efficient local attention `_ + * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) * `Voice activity Detection (VAD) `_: MarbleNet * ASR with VAD Inference - `Example `_ - * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet - * `Speaker Diarization `_ + * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet + * `Speaker Diarization `_ * Clustering Diarizer: TitaNet, ECAPA_TDNN, SpeakerNet * Neural Diarizer: MSDD (Multi-scale Diarization Decoder) - * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer + * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer * `Pretrained models on different languages. `_: English, Spanish, German, Russian, Chinese, French, Italian, Polish, ... * `NGC collection of pre-trained speech processing models. `_ * Natural Language Processing * `NeMo Megatron pre-training of Large Language Models `_ - * `Neural Machine Translation (NMT) `_ - * `Punctuation and Capitalization `_ - * `Token classification (named entity recognition) `_ - * `Text classification `_ - * `Joint Intent and Slot Classification `_ - * `Question answering `_ - * `GLUE benchmark `_ - * `Information retrieval `_ - * `Entity Linking `_ - * `Dialogue State Tracking `_ - * `Prompt Learning `_ + * `Neural Machine Translation (NMT) `_ + * `Punctuation and Capitalization `_ + * `Token classification (named entity recognition) `_ + * `Text classification `_ + * `Joint Intent and Slot Classification `_ + * `Question answering `_ + * `GLUE benchmark `_ + * `Information retrieval `_ + * `Entity Linking `_ + * `Dialogue State Tracking `_ + * `Prompt Learning `_ * `NGC collection of pre-trained NLP models. `_ * `Synthetic Tabular Data Generation `_ -* `Speech synthesis (TTS) `_ +* `Speech synthesis (TTS) `_ * Spectrogram generation: Tacotron2, GlowTTS, TalkNet, FastPitch, FastSpeech2, Mixer-TTS, Mixer-TTS-X * Vocoders: WaveGlow, SqueezeWave, UniGlow, MelGAN, HiFiGAN, UnivNet * End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E, VITS * `NGC collection of pre-trained TTS models. `_ * `Tools `_ - * `Text Processing (text normalization and inverse text normalization) `_ - * `CTC-Segmentation tool `_ - * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets + * `Text Processing (text normalization and inverse text normalization) `_ + * `CTC-Segmentation tool `_ + * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets * `Speech Data Processor `_ @@ -139,10 +139,10 @@ Requirements Documentation ------------- -.. |main| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main +.. |r1.19.0| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 :alt: Documentation Status :scale: 100% - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ .. |stable| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=stable :alt: Documentation Status @@ -152,7 +152,7 @@ Documentation +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Version | Status | Description | +=========+=============+==========================================================================================================================================+ -| Latest | |main| | `Documentation of the latest (i.e. main) branch. `_ | +| Latest | |r1.19.0| | `Documentation of the latest (i.e. main) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Stable | |stable| | `Documentation of the stable (i.e. most recent release) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 4acf685ad5c3..193680f6d06d 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -14,7 +14,7 @@ "\n", "The toolkit comes with extendable collections of pre-built modules and ready-to-use models for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS). Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes.\n", "\n", - "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/#" + "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/#" ] }, { @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", @@ -1146,7 +1146,7 @@ "\n", "NeMo constantly adds new models and new tasks to these examples, such that these examples serve as the basis to train and evaluate models from scratch with the provided config files.\n", "\n", - "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/examples" + "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples" ] }, { @@ -1251,7 +1251,7 @@ "\n", "While the tutorials are a great example of the simplicity of NeMo, please note for the best performance when training on real datasets, we advice the use of the example scripts instead of the tutorial notebooks. \n", "\n", - "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/tutorials" + "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials" ] } ], diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index d18e285b7de8..2a65509bd8cd 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 40cf4753dbd4..e6874d14169f 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index 69603686be08..ac79ca3b204d 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -249,7 +249,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " ] } ], diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index 73c83788c295..da2e53fd94eb 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index addf19f3b236..ea8356981908 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -283,7 +283,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " ] }, { diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index cbad6b79b858..fac120e1b699 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -71,7 +71,7 @@ "\n", "For this tutorial (and limited by the compute and storage available on Colab environments), we will attempt to fine-tune an English ASR model onto the [Mozilla Common Voice](https://commonvoice.mozilla.org/en) dataset for Japanese. This dataset will also allow us to discuss a few details for fine-tuning low-resource languages. The methods discussed here can also be applied to languages with several thousand hours of data!\n", "\n", - "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/asr_ctc/README.md)\n" + "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/asr_ctc/README.md)\n" ] }, { diff --git a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb index 5293f85044fc..c0af01bd27c2 100644 --- a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb +++ b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb @@ -10,7 +10,7 @@ "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\n", "\n", - "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/main/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", + "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/r1.19.0/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", "\n", "This example describes all basic steps required to build ASR model for Esperanto:\n", "\n", @@ -160,7 +160,7 @@ "\n", "The tarred dataset allows storing the dataset as large *.tar files instead of small separate audio files. It may speed up the training and minimizes the load when data is moved from storage to GPU nodes.\n", "\n", - "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", + "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", "\n", "```bash\n", "\n", @@ -207,11 +207,11 @@ "source": [ "## Training hyper-parameters\n", "\n", - "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", + "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", "\n", "### Select Training Batch Size\n", "\n", - "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", + "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", "\n", "### Selecting Optimizer and Learning Rate Scheduler\n", "\n", @@ -327,7 +327,7 @@ "+init_from_pretrained_model=${PRETRAINED_MODEL_NAME}\n", "```\n", "\n", - "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", + "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", "\n", "```python\n", "model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(f\"nvidia/{PRETRAINED_MODEL_NAME}\", map_location='cpu')\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index 1db6a631d6bc..48be4b4db737 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index e843e93ec599..c1f62a871a91 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -54,7 +54,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -588,7 +588,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'r1.17.0'\n", + " BRANCH = 'r1.19.0'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index 3adca1a8d0ac..cf4d8442fe0f 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -41,7 +41,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index 59ef8c17d40b..7846a1468d98 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -29,7 +29,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index 6661634332a5..bc1209a80410 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index d4caac73899e..fad96a6097b0 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -46,7 +46,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index 3af72397866a..c82d7ed86dcd 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -44,7 +44,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, @@ -225,7 +225,7 @@ "id": "0W12xF_CqcVF" }, "source": [ - "![](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/images/transducer.png?raw=true)" + "![](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/images/transducer.png?raw=true)" ] }, { diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index eba666bdf808..431dc515a459 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -104,7 +104,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -204,7 +204,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_librispeech_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_librispeech_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_librispeech_data.py" ] }, { @@ -296,7 +296,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_commonvoice_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_commonvoice_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_commonvoice_data.py" ] }, { @@ -800,7 +800,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"process_asr_text_tokenizer.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py" ] }, { diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index c692277d1257..685d3ef6f37a 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -30,7 +30,7 @@ "* use beam search decoder with N-gram language model re-scoring\n", "\n", "You may find more info on how to train and use language models for ASR models here:\n", - "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", + "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/asr/asr_language_modeling.html\n", "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, @@ -52,7 +52,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index 04125a030006..9d4f66b82599 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -23,7 +23,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index da38a15ebf8f..6a1ac0bb1079 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index bf21c7992089..b2fbf1a2b17d 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -32,7 +32,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index 940b19605a7d..e642fd4f6961 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -67,7 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 8632e5e2926b..23e31e5b0da0 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -29,7 +29,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -59,7 +59,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index c6f50f5595d7..fe47a62e2f27 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -51,7 +51,7 @@ "\n", "The approach we will use for pre-training our models is represented in the following diagram:\n", "\n", - " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/main/tutorials/asr/images/contrastive_ssl.png)\n", + " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/tutorials/asr/images/contrastive_ssl.png)\n", "\n", "We first mask parts of our input using SpecAugment. The model is then trained to solve a contrastive task of distinguishing the latent representation of the masked time steps from several sampled distractors. Since our encoders also contain stride blocks which reduce the length of the inputs, in order to obtain target representations we combine several consecutive time steps. They are then passed through a quantizer, which has been found to help with contrastive pre-training." ] @@ -272,8 +272,8 @@ "source": [ "## Grab the configs we'll use in this example\n", "!mkdir configs\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/citrinet/citrinet_1024.yaml\n" + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/citrinet/citrinet_1024.yaml\n" ] }, { @@ -482,7 +482,7 @@ "outputs": [], "source": [ "!mkdir scripts\n", - "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\n", + "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\n", "\n", "!python ./scripts/process_asr_text_tokenizer.py \\\n", " --manifest=\"{data_dir}/an4/train_manifest.json\" \\\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 245d64c1f45a..b26cba7da0b3 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -61,7 +61,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index 1ba7415630d5..d90bf363370a 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -62,7 +62,7 @@ "* Real-time or close to real-time inference for live transcriptions\n", "* Offline transcriptions of very long audio\n", "\n", - "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/main/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." + "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." ] }, { diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 030f2066a6ab..b4c7e33f7bab 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -28,7 +28,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 2f15181b8160..80cf4ecacc1d 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -50,7 +50,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -1297,7 +1297,7 @@ "source": [ "# Further reading\n", "\n", - "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/main/examples/asr/asr_adapters).\n", + "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/asr/asr_adapters).\n", "\n", "Please follow the following articles that discuss the use of adapters in ASR - \n", "- [Exploiting Adapters for Cross-lingual Low-resource Speech Recognition](https://arxiv.org/abs/2105.11905)\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index 4dc860b79471..97697781dddd 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -193,17 +193,17 @@ "config_path = str(config_dir / \"config.yaml\")\n", "\n", "# download scripts to format the data source.\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", " str(code_dir))\n", "\n", "# download scripts to run training\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", " str(code_dir))\n", "\n", "# download script to create tokenizer\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", " str(code_dir))" ] }, diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index fdcc91d95ea0..078e76d55ba7 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -173,8 +173,8 @@ "outputs": [], "source": [ "config_path = str(config_dir / \"config.yaml\")\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" ] }, { diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index 049908b3a9a5..c18ebbac596b 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -152,7 +152,7 @@ "id": "jEgEo0aPj3Ws" }, "source": [ - "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." + "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." ] }, { @@ -261,7 +261,7 @@ "id": "EVp4zvxPatga" }, "source": [ - "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/main/tutorials/nlp) for more details about training of a particular model). \n", + "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials/nlp) for more details about training of a particular model). \n", "\n", "You can also provide a pretrained language model checkpoint and a configuration file if available.\n", "\n", @@ -349,7 +349,7 @@ "model.language_model.lm_checkpoint= \\\n", "model.language_model.config_file=`\n", "\n", - "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) for all NeMo supported arguments.\n", + "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) for all NeMo supported arguments.\n", "\n", "## Download pretrained model\n", "\n", @@ -373,7 +373,7 @@ "source": [ "# Using any HuggingFace Pretrained Model\n", "\n", - "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/nlp/modules/common/huggingface): \n", + "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/r1.19.0/nemo/collections/nlp/modules/common/huggingface): \n", "\n", "* BERT\n", "* RoBERTa\n", @@ -383,7 +383,7 @@ "As was mentioned before, just set `model.language_model.pretrained_model_name` to the desired model name in your config and get_lm_model() will take care of the rest.\n", "\n", "If you want to use another language model from [https://huggingface.co/models](https://huggingface.co/models), use HuggingFace API directly in NeMo.\n", - "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/main/tutorials)." + "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials)." ] } ], diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index d78bf2377c51..5c909fe73432 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index 302b1dd85fc5..28d5330ac3b2 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -217,7 +217,7 @@ "print()\n", "\n", "\n", - "!wget https://raw.github.com/NVIDIA/NeMo/main/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", " -O filter_langs_nmt.py\n", "\n", "!python filter_langs_nmt.py \\\n", @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", @@ -760,7 +760,7 @@ "metadata": {}, "outputs": [], "source": [ - "!wget https://raw.github.com/NVIDIA/NeMo/main/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", " -O create_tarred_parallel_dataset.py\n", "\n", "!python create_tarred_parallel_dataset.py \\\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index 0050b07fb343..cf0392da9c36 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index e7cd74a21d77..892eb881b528 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, @@ -68,7 +68,7 @@ "#### Task Description\n", "[Entity linking](https://en.wikipedia.org/wiki/Entity_linking) is the process of connecting concepts mentioned in natural language to their canonical forms stored in a knowledge base. For example, say a knowledge base contained the entity 'ID3452 influenza' and we wanted to process some natural language containing the sentence \"The patient has flu like symptoms\". An entity linking model would match the word 'flu' to the knowledge base entity 'ID3452 influenza', allowing for disambiguation and normalization of concepts referenced in text. Entity linking applications range from helping automate data ingestion to assisting in real time dialogue concept normalization. We will be focusing on entity linking in the medical domain for this demo, but the entity linking model, dataset, and training code within NVIDIA NeMo can be applied to other domains like finance and retail.\n", "\n", - "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The main idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", + "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The r1.19.0 idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", "\n", "In this tutorial we will be using the [faiss](https://github.com/facebookresearch/faiss) library to build our concept index." ] diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 0cc61712d7de..1c60b95bcc8c 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb index 6204bf2516bb..50ec879b7761 100644 --- a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb +++ b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb @@ -21,7 +21,7 @@ "import os\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n", "\n", @@ -284,7 +284,7 @@ "id": "miXYxOv_mNVo" }, "source": [ - "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", + "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", "```\n", "content/alignment/punct has 920953 instances\n", "content/alignment/date has 150499 instances\n", @@ -405,7 +405,7 @@ { "cell_type": "markdown", "source": [ - "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." + "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." ], "metadata": { "id": "ueJYVF0cU3ic" @@ -1016,11 +1016,11 @@ "\n", "See also the scripts for the whole pipeline:\n", "\n", - "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", + "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", "\n", - "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", + "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", "\n", - "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", + "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", "\n" ], "metadata": { diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index ba80f5a34892..c656fdd7088a 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index 669be8d0643e..5bec75028064 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.17.0'" + "BRANCH='r1.19.0'" ] }, { @@ -64,7 +64,7 @@ "\n", "If you prefer to use the Huggingface BERT models, please skip this section and refer to `Setting up a NeMo Experiment` section to load a model from `nemo_nlp.modules.get_pretrained_lm_models_list()`\n", "\n", - "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." + "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index 74cc70e1db80..a92317b17320 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -62,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 004014ebdeeb..02d533e59ad4 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.19.0'" ] }, { @@ -45,7 +45,7 @@ "\n", "- Our p-tuning implementation is based off Liu et al's paper [GPT Understands, Too](https://arxiv.org/abs/2103.10385).\n", "\n", - "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", + "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "\"Prompt\n", "\n", @@ -88,7 +88,7 @@ "# The Best of Both\n", "A single pretrained GPT model can use both p-tuning and prompt-tuning. While you must decide to use either p-tuning or prompt-tuning for each task you want your model to perform, you can p-tune your model on a set of tasks A, then prompt tune your same model on a different set of tasks B, then finally run inference on tasks from both A and B at the same time. During prompt-tuning or p-tuning, tasks tuned at the same time must use the same number of virtual tokens. During inference, tasks using differing amounts of virtual tokens can be run at the same time.\n", "\n", - "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", + "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "With all that covered, let's get started!\n" ] @@ -723,7 +723,7 @@ "- `length_params`\n", "- `sampling_params`\n", "\n", - "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", + "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", "\n", "If `length_params` and `sampling_params` are set to `None`, the model generates output with a greedy decoding strategy and generates up to `30` new tokens. Most predictive downstream tasks (not text generation tasks), use greedy sampling. To see other ways to run inference with your prompt learning model and more details on how to define various inference parameters, visit `examples/nlp/language_modeling/megatron_gpt_eval.py`.\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 82f64d5e692a..54e424c831c2 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { @@ -293,7 +293,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", + "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", "\n", "```\n", "NEMO_ROOT = \"\"\n", @@ -421,7 +421,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { @@ -950,7 +950,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index e030bf8a5ba6..3ce2ef9fb6de 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { @@ -369,7 +369,7 @@ } }, "source": [ - "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." + "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." ] }, { @@ -514,7 +514,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { @@ -913,7 +913,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index 80f532ba0b8d..b337c569425d 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 88103290e8fd..0cd718e71754 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 5137f4c65c89..3296acd05919 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index d60f98aebd40..c3f95bff841a 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.17.0'" + "BRANCH='r1.19.0'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index b95ea27f478f..9b8007751f55 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.17.0'" + "BRANCH = 'r1.19.0'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index faee7a4b6392..9ec4482f30b2 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -630,7 +630,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/main/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", + "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/r1.19.0/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index 192d46217cd3..0c0b8163622c 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -58,7 +58,7 @@ "For detailed parameter setting and execution of speaker diarization, refer to this [Diarization Inference](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb) tutorial.\n", "\n", "\n", - "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", + "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", "\n", "### Speaker diarization in ASR pipeline\n", "\n", @@ -196,7 +196,7 @@ "DOMAIN_TYPE = \"meeting\" # Can be meeting or telephonic based on domain type of the audio file\n", "CONFIG_FILE_NAME = f\"diar_infer_{DOMAIN_TYPE}.yaml\"\n", "\n", - "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", + "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", "\n", "if not os.path.exists(os.path.join(data_dir,CONFIG_FILE_NAME)):\n", " CONFIG = wget.download(CONFIG_URL, data_dir)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 20cc1f9ff37d..93ff3ed97b2e 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -62,9 +62,9 @@ "* **with oracle VAD**: use ground-truth speech/non-speech labels. \n", "* **with system VAD**: use speech/non-speech labels generated by an actual VAD model. \n", "\n", - "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", + "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", "\n", - "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", + "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", "\n", "For demonstration purposes we would be using simulated audio from [an4 dataset](http://www.speech.cs.cmu.edu/databases/an4/)." ] @@ -140,7 +140,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." + " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." ] }, { @@ -191,7 +191,7 @@ "MSDD models employ pairwise (two-speaker) unit-model for both training and inference. While training, pairwise model is trained on data samples with two speakers or two-speaker subset from data samples with more than two speakers. \n", "In inference mode, we retrieve all possible pairs from the estimated number of speakers and average the results. For example, if there are four speakers `(A, B, C, D)`, we extract 6 pairs: `(A,B)`, `(A,C)`, `(A,D)`, `(B,C)`, `(B,D)`, `(C,D)`. Finally, the sigmoid outputs are averaged. In this way, MSDD can deal with flexible number of speakers using a pairwise model. \n", "\n", - "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " + "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " ] }, { @@ -399,7 +399,7 @@ "from omegaconf import OmegaConf\n", "MODEL_CONFIG = os.path.join(data_dir,'diar_infer_telephonic.yaml')\n", "if not os.path.exists(MODEL_CONFIG):\n", - " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", + " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", " MODEL_CONFIG = wget.download(config_url,data_dir)\n", "\n", "config = OmegaConf.load(MODEL_CONFIG)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index 896e4dce7e80..ab5cab58bc69 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -197,9 +197,9 @@ "\n", "- Please skip this section and go directly to [Prepare Training data for MSDD](#Prepare-Training-data-for-MSDD) section if you have your own speaker diarization dataset. \n", "\n", - "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", + "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", "\n", - "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" + "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" ] }, { @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index d7f89e3c25c0..f956334b892c 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", @@ -58,7 +58,7 @@ "source": [ "In this tutorial, we shall first train these embeddings on speaker-related datasets, and then get speaker embeddings from a pretrained network for a new dataset. Since Google Colab has very slow read-write speeds, I'll be demonstrating this tutorial using [an4](http://www.speech.cs.cmu.edu/databases/an4/). \n", "\n", - "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/main/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " + "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/r1.19.0/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " ] }, { @@ -276,7 +276,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." + "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." ] }, { @@ -760,7 +760,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" + "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" ] }, { diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index e2419273c5e4..5f5641d1fb85 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", @@ -126,7 +126,7 @@ "id": "S1DZk-inQGTI" }, "source": [ - "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/main/tools/ctc_segmentation/scripts)." + "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/ctc_segmentation/scripts)." ] }, { @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", @@ -699,7 +699,7 @@ "source": [ "# Next Steps\n", "\n", - "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/main/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", + "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", "- Try Audio-based normalization tool." ] }, diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index 2fa7d912f51b..8264854dfd59 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -326,7 +326,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"multispeaker_data_analysis.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speaker_tasks/multispeaker_data_analysis.py\n", + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speaker_tasks/multispeaker_data_analysis.py\n", "\n", "from multispeaker_data_analysis import run_multispeaker_data_analysis\n", "\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index b4aacabe623c..e113af5565ae 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -700,7 +700,7 @@ "## Resources\n", "\n", "- For more information about the Aligner architecture, check out the [RAD-TTS Aligner paper](https://arxiv.org/abs/2108.10447).\n", - "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/aligner_heteronym_disambiguation.py)." + "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/aligner_heteronym_disambiguation.py)." ] }, { diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 24744eecbad0..0f501f89a90e 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] @@ -601,9 +601,9 @@ "source": [ "## Additional NeMo Resources\n", "\n", - "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_Finetuning.ipynb).\n", + "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_Finetuning.ipynb).\n", "\n", - "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." + "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." ] } ], diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index fa1b1bdc90c8..95bc3805030c 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -59,7 +59,7 @@ "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", - "# BRANCH = 'main'\n", + "# BRANCH = 'r1.19.0'\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", @@ -246,7 +246,7 @@ "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 38e6c2c9d6ff..6685eca56251 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -134,10 +134,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFi-GAN\n", - "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." + "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -780,7 +780,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index baec811d9285..cf94862263cd 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -627,7 +627,7 @@ "id": "843674e7", "metadata": {}, "source": [ - "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/hifigan/hifigan.yaml):\n", + "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/conf/hifigan/hifigan.yaml):\n", "\n", "```bash\n", "python examples/tts/hifigan_finetune.py \\\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index b6c5463bb8c0..7d1ce265856a 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -133,10 +133,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFiGAN\n", - "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." + "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -172,7 +172,7 @@ "3. `text`: original text;\n", "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", - "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", + "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", "\n", "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset." ] @@ -649,7 +649,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 0d938127c00b..403faa965534 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index defd0272d89d..a67744ef0f58 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -56,7 +56,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "# BRANCH = 'main'\n", + "# BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", @@ -258,7 +258,7 @@ "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index 068ba166e8ec..5a7f56dc201d 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index ab1fc6ce12a8..d4e1b1ba0678 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -202,7 +202,7 @@ "\n", "Let's see the `pitch_predicted` for a sample text. You can run the below cell. You should get an image that looks like the following for the input `Hey, what is my pitch?`:\n", "\n", - "\n", + "\n", "\n", "Notice that the last word `pitch` has an increase in pitch to stress that it is a question." ] diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index 1b0aa7b43642..abdda3e16747 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index 8a78f8dbb6e5..497552a9ac43 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index 1e762b86cf9d..6185610fe4ab 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -128,7 +128,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "During preprocessing, unambiguous dictionary words are converted to phonemes, while OOV and words with multiple entries are kept as graphemes. For example, **paracetamol** is missing from the phoneme dictionary, and **can** has 2 forms." @@ -186,7 +186,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "## Dictionary customization\n", @@ -212,7 +212,7 @@ "if os.path.exists(ipa_cmu_dict):\n", " ! rm $ipa_cmu_dict\n", "\n", - "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/$ipa_cmu_dict\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tts_dataset_files/$ipa_cmu_dict\n", "\n", "with open(ipa_cmu_dict, \"a\") as f:\n", " f.write(f\"PARACETAMOL {new_pronunciation}\\n\")\n", @@ -267,7 +267,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " " + " " ] }, { @@ -276,7 +276,7 @@ "source": [ "# Resources\n", "* [TTS pipeline customization](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-custom.html#tts-pipeline-configuration)\n", - "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", + "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", "* [G2P models in NeMo](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/g2p.html)\n", "* [Riva TTS documentation](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html)" ] diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index 005a1b5bae1c..a696ee26e8a7 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.17.0'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index 37e55e0d7572..dbe4e9362cc1 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -63,7 +63,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", From 3d58990436e9161d7f366388030fefbe86907838 Mon Sep 17 00:00:00 2001 From: Li Tao Date: Fri, 19 May 2023 21:04:59 +0800 Subject: [PATCH 118/512] Fix a bug, use _ceil_to_nearest instead as _round_to_nearest is not defined (#6681) --- .../nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py index deb6e77cdb92..2c896c2e61af 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -183,7 +183,7 @@ def collate_fn(self, batch): if self.pad_to_max_length: max_length = self.max_seq_length else: - max_length = min(self.max_seq_length, self._round_to_nearest(max_length, 8)) + max_length = min(self.max_seq_length, self._ceil_to_nearest(max_length, 8)) assert max_length <= self.max_seq_length attention_mask = [self._create_attention_mask(max_length) for _ in batch] From 57dc5d525b1b2b905ed62cd0e18107b19341aa89 Mon Sep 17 00:00:00 2001 From: Vladimir Bataev Date: Tue, 23 May 2023 21:19:09 +0400 Subject: [PATCH 119/512] Fix k2 installation in Docker with CUDA 12 (#6707) Signed-off-by: Vladimir Bataev --- scripts/speech_recognition/k2/setup.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/speech_recognition/k2/setup.sh b/scripts/speech_recognition/k2/setup.sh index 64d9a3c122e4..48ca31dab803 100755 --- a/scripts/speech_recognition/k2/setup.sh +++ b/scripts/speech_recognition/k2/setup.sh @@ -15,10 +15,12 @@ # limitations under the License. K2_REPO=https://github.com/k2-fsa/k2 -LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \ - ls-remote --exit-code --refs --sort='version:refname' --tags ${K2_REPO} '*.*' \ - | tail --lines=1 \ - | cut -d '/' -f 3) +LATEST_RELEASE=e5671de # Temporary fix for CUDA 12 +# uncomment the following line after the next k2 version is released (>1.24.3) +#LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \ +# ls-remote --exit-code --refs --sort='version:refname' --tags ${K2_REPO} '*.*' \ +# | tail --lines=1 \ +# | cut -d '/' -f 3) # "cut --delimiter '/' --fields 3" doesn't work on macOS, use "-d ... -f ..." instead K2_MAKE_ARGS="-j" pip install -v "git+${K2_REPO}@${LATEST_RELEASE}#egg=k2" || { echo "k2 could not be installed!"; exit 1; } From cf678db134c20701e8f9d05ccc478a79af6e7544 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 24 May 2023 15:21:36 -0700 Subject: [PATCH 120/512] Tutorial fixes (#6717) Signed-off-by: smajumdar --- tutorials/00_NeMo_Primer.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 2 +- tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- tutorials/asr/Offline_ASR.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb | 4 ++-- tutorials/nlp/Punctuation_and_Capitalization.ipynb | 2 +- .../nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb | 2 +- tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- tutorials/tools/CTC_Segmentation_Tutorial.ipynb | 2 +- tutorials/tts/FastPitch_GermanTTS_Training.ipynb | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 193680f6d06d..2eff9c596b7f 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -14,7 +14,7 @@ "\n", "The toolkit comes with extendable collections of pre-built modules and ready-to-use models for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS). Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes.\n", "\n", - "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/#" + "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/#" ] }, { diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index ac79ca3b204d..b5c9d13a5c6d 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -63,7 +63,7 @@ "import nemo\n", "# Import Speech Recognition collection\n", "import nemo.collections.asr as nemo_asr\n", - "# Import Natural Language Processing colleciton\n", + "# Import Natural Language Processing collection\n", "import nemo.collections.nlp as nemo_nlp\n", "# Import Speech Synthesis collection\n", "import nemo.collections.tts as nemo_tts\n", diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index da2e53fd94eb..ede417d3583c 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -627,7 +627,7 @@ "\n", "\n", "Eg: \n", - "Since this model was trained on publically available speech datasets, the performance of this model might degrade for speech which includes technical terms, or vernacular that the model has not been trained on. The model might also perform worse for accented speech.\n", + "Since this model was trained on publicly available speech datasets, the performance of this model might degrade for speech which includes technical terms, or vernacular that the model has not been trained on. The model might also perform worse for accented speech.\n", "\n", "\n", "## References\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 685d3ef6f37a..2d963a6b77d3 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -30,7 +30,7 @@ "* use beam search decoder with N-gram language model re-scoring\n", "\n", "You may find more info on how to train and use language models for ASR models here:\n", - "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/asr/asr_language_modeling.html\n", + "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index 5bec75028064..c435d6e76d54 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -64,7 +64,7 @@ "\n", "If you prefer to use the Huggingface BERT models, please skip this section and refer to `Setting up a NeMo Experiment` section to load a model from `nemo_nlp.modules.get_pretrained_lm_models_list()`\n", "\n", - "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." + "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." ] }, { diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 02d533e59ad4..7ccf33826157 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -45,7 +45,7 @@ "\n", "- Our p-tuning implementation is based off Liu et al's paper [GPT Understands, Too](https://arxiv.org/abs/2103.10385).\n", "\n", - "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", + "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "\"Prompt\n", "\n", @@ -88,7 +88,7 @@ "# The Best of Both\n", "A single pretrained GPT model can use both p-tuning and prompt-tuning. While you must decide to use either p-tuning or prompt-tuning for each task you want your model to perform, you can p-tune your model on a set of tasks A, then prompt tune your same model on a different set of tasks B, then finally run inference on tasks from both A and B at the same time. During prompt-tuning or p-tuning, tasks tuned at the same time must use the same number of virtual tokens. During inference, tasks using differing amounts of virtual tokens can be run at the same time.\n", "\n", - "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", + "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "With all that covered, let's get started!\n" ] diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 54e424c831c2..ea6dc45ef273 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -421,7 +421,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 3ce2ef9fb6de..62b3255d119b 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -514,7 +514,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 9ec4482f30b2..a1b0c4fd8561 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -630,7 +630,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/r1.19.0/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", + "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/main/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 5f5641d1fb85..5f1ffd27ea05 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index 7d1ce265856a..512ec8249694 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -172,7 +172,7 @@ "3. `text`: original text;\n", "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", - "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", + "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", "\n", "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset." ] From 0972fc89ccc0101971666a5930f2b61e6f606329 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Fri, 26 May 2023 09:13:49 -0700 Subject: [PATCH 121/512] VP Fixes for converter + Config management (#6698) (#6738) * [Temp] VP Fixes Signed-off-by: smajumdar * Revert logging Signed-off-by: smajumdar --------- Signed-off-by: smajumdar (cherry picked from commit b6f46a0f36659024bae04f24323a16aa8b09f45a) --- .../megatron_change_num_partitions.py | 99 ++++++++++++++++--- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index 558986e3da36..2938a16098a1 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -56,7 +56,7 @@ --target_pipeline_model_parallel_size=1 \ --target_pipeline_model_parallel_split_rank=0 \ --precision=bf16 - + # Megatron GPT + Virtual Pipeline parallelism python megatron_change_num_partitions.py \ @@ -138,17 +138,34 @@ def set_virtual_parallel_rank_safely(rank: int): def force_cpu_model(cfg): with open_dict(cfg): - # temporarily + # temporarily set to cpu original_cpu_init = cfg.get('use_cpu_initialization', False) - original_amp_o2 = cfg.get('megatron_amp_O2', False) + if 'megatron_amp_O2' in cfg: + key = 'megatron_amp_O2' + original_amp_o2 = cfg.megatron_amp_O2 + elif 'megatron_amp_02' in cfg: + key = 'megatron_amp_02' + original_amp_o2 = cfg.megatron_amp_02 + else: + key, original_amp_o2 = None, None + + # Set new values cfg.use_cpu_initialization = True - cfg.megatron_amp_O2 = False - return cfg, {'original_cpu_init': original_cpu_init, 'original_amp_o2': original_amp_o2} + if key is not None: + cfg[key] = False + + # Setup restore dict + restore_dict = {'use_cpu_initialization': original_cpu_init} # 'megatron_amp_O2': original_amp_o2 + if key is not None: + restore_dict[key] = original_amp_o2 + + return cfg, restore_dict def restore_model_config(cfg, original_dict): with open_dict(cfg): for key, val in original_dict.items(): + logging.info(f"Restoring model config key ({key}) from {cfg[key]} to original value of {val}") cfg[key] = val return cfg @@ -1034,6 +1051,8 @@ def main(): os.path.join(model_filepath, args.ckpt_name) ) + vp_state_dict = torch.load(checkpoint_path, map_location="cpu") + if hparams_filepath is not None: # Force the model onto CPU tmp_cfg = OmegaConf.load(hparams_filepath) @@ -1078,9 +1097,10 @@ def main(): vp_params_tmp = [] for vp_idx in range(vp_size): set_virtual_parallel_rank_safely(vp_idx) - params = [p for p in model.model[vp_idx].parameters()] - # params = model.model[vp_idx].module.state_dict_for_save_checkpoint() - # params = [p for p in params.values()] + vp_params = vp_state_dict[f'model{vp_idx}'] + model.model[vp_idx].module.load_state_dict(vp_params, strict=True) + model.model[vp_idx].module.to('cpu') + params = [p for p in model.model[vp_idx].module.parameters()] vp_params_tmp.append(params) # partitions[pp_rank][vp_idx].append(params) @@ -1141,6 +1161,8 @@ def main(): model = model.to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() + restore_model_config(model.cfg, restore_dict) + vp_param_count = 0 for vp in range(vp_size): for pp in range(pp_size): @@ -1159,15 +1181,62 @@ def main(): else: flat_partitions = {idx: [] for idx in range(pp_size)} - for pp in range(pp_size): - for tp in range(tp_size): - vp_cache = [] - for vp in range(vp_size): - vp_cache.extend(partitions[vp][pp][tp]) + """ + Under VP convention + Notation : + Stage = PP rank + Number = GPT model / layer index + Ignore TP - every PP has all TP corresponding to that PP + chunk_index = the physical index of any [] in the list. Ex idx = 2 in below map corresponds to [2: PP 0 VP 1]] + + + For a PP 2 VP 4 model with 8 GPT layers- - flat_partitions[pp].append(vp_cache) + Indices + # Stage 0: [0:PP 0 VP 0] [2:PP 0 VP 1] [4:PP 0 VP 2] [6:PP 0 VP 3] + # Stage 1: [1:PP 1 VP 0] [3:PP 1 VP 1] [5:PP 1 VP 2] [7:PP 1 VP 3] + + after conversion will become + + # Stage 0: [0,1,2,3:PP 0] + # Stage 1: [4,5,6,7:PP 1] + + """ + pp_index = 0 + chunk_counter = 0 + tp_cache = [[] for _ in range(tp_size)] + + for vp in range(vp_size): + for pp in range(pp_size): + # Gather all TP under this VP PP combination. + # We will accumulate TP parameters from multiple layers in this cache. + for tp in range(tp_size): + tp_cache[tp].extend(partitions[vp][pp][tp]) + + # This counter indexes the global selection of a VP PP combination in the above map + chunk_counter += 1 + + # Log the mapping from old VP x PP to new PP index + logging.info(f"VP Conversion - vp: {vp} pp: {pp} -> pp_idx: {pp_index}") + + # Every vp_size chunks, we can fill a new PP index in the flat_partitions + if chunk_counter % vp_size == 0: + flat_partitions[pp_index].extend(tp_cache) + tp_cache = [[] for _ in range(tp_size)] + pp_index += 1 + + logging.debug( + f"VP merge step: \n" + f"vp: {vp} pp: {pp} pp_idx: {pp_index - 1} " + f"len(flat_partitions): {len(flat_partitions[pp_index - 1])}" + ) + + logging.debug(f"PP Size len(flat partitions) : {len(flat_partitions)}") + logging.debug(f"TP Size len(flat partitions[0]): {len(flat_partitions[0])}") + logging.debug(f"Layers len(flat partitions[0][0]) : {len(flat_partitions[0][0])}") partitions = flat_partitions + del tp_cache if tgt_tp_size > 1 or tgt_pp_size > 1: merge_partition(model, partitions) @@ -1175,8 +1244,6 @@ def main(): # Write out the PP 1 TP 1 model to disk merge_partition(model, partitions, args.target_file) - restore_model_config(model.cfg, restore_dict) - # Empty cache memory of all parameters from all PP TP partitions partitions.clear() From 338709b305e4b717f71ea5813d5abc63e787a09f Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Fri, 26 May 2023 11:28:26 -0700 Subject: [PATCH 122/512] fix format --- tools/asr_evaluator/asr_evaluator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index 9540d3429138..82b61290e66f 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -12,12 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import json + import git from omegaconf import OmegaConf, open_dict -from utils import cal_target_metadata_wer, run_asr_inference + from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import cal_target_metadata_wer, run_asr_inference """ This script serves as evaluator of ASR models From 90874257b9360ffef65f49cc50a2dbc33f7ab37c Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Sat, 27 May 2023 06:29:54 +0800 Subject: [PATCH 123/512] Fix fastpitch test nightly (#6742) Signed-off-by: hsiehjackson --- .../collections/tts/models/test_fastpitch.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/collections/tts/models/test_fastpitch.py b/tests/collections/tts/models/test_fastpitch.py index c77d70cbc44a..2502b1f7cb8a 100644 --- a/tests/collections/tts/models/test_fastpitch.py +++ b/tests/collections/tts/models/test_fastpitch.py @@ -16,7 +16,10 @@ This file implemented unit tests for loading all pretrained FastPitch NGC checkpoints and generating Mel-spectrograms. The test duration breakdowns are shown below. In general, each test for a single model is ~25 seconds on an NVIDIA RTX A6000. """ +import random + import pytest +import torch from nemo.collections.tts.models import FastPitchModel @@ -38,4 +41,23 @@ def test_inference(pretrained_model, language_specific_text_example): model, language_id = pretrained_model text = language_specific_text_example[language_id] parsed_text = model.parse(text) - _ = model.generate_spectrogram(tokens=parsed_text) + + # Multi-Speaker + speaker_id = None + reference_spec = None + reference_spec_lens = None + + if hasattr(model.fastpitch, 'speaker_emb'): + speaker_id = 0 + + if hasattr(model.fastpitch, 'speaker_encoder'): + if hasattr(model.fastpitch.speaker_encoder, 'lookup_module'): + speaker_id = 0 + if hasattr(model.fastpitch.speaker_encoder, 'gst_module'): + bs, lens, t_spec = parsed_text.shape[0], random.randint(50, 100), model.cfg.n_mel_channels + reference_spec = torch.rand(bs, lens, t_spec) + reference_spec_lens = torch.tensor([lens]).long().expand(bs) + + _ = model.generate_spectrogram( + tokens=parsed_text, speaker=speaker_id, reference_spec=reference_spec, reference_spec_lens=reference_spec_lens + ) From ff17f64c9aaf04de96f2c8a780bb4bcdb5e0c834 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Fri, 26 May 2023 16:41:46 -0600 Subject: [PATCH 124/512] check for first or last stage (#6708) * check for first or last stage Signed-off-by: ericharper * remove redundant check Signed-off-by: ericharper * fix typo Signed-off-by: ericharper * add map_location Signed-off-by: ericharper --------- Signed-off-by: ericharper --- .../language_modeling/megatron_gpt_eval.py | 1 + .../modules/common/text_generation_utils.py | 65 ++++++++++--------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index 0ac155374512..14cdbf8a760c 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -203,6 +203,7 @@ def main(cfg) -> None: trainer=trainer, override_config_path=pretrained_cfg, save_restore_connector=save_restore_connector, + map_location=f'cuda:{trainer.local_rank}', # map_location is needed for converted models ) elif cfg.checkpoint_dir: app_state = AppState() diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index b39ac406d4a4..8cfb02c5e321 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -135,36 +135,41 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para def get_computeprob_response(tokenizer, response, inputs): - compute_prob_response = {} - new_token_ids = [] - new_tokens = [] - new_texts = [] - log_probs = [] - full_logprobs = [] - offsets = [] - for batch_id in range(len(response['tokens'])): - if isinstance(inputs, (list, tuple)): - if isinstance(inputs[0], str): - new_token_id = tokenizer.text_to_ids(inputs[batch_id]) - new_text = inputs[batch_id] - token_len = len(new_token_id) - elif isinstance(inputs[0], torch.Tensor): - token_len = int(inputs[1][batch_id].item()) - new_token_id = inputs[0][batch_id][:token_len].tolist() - new_text = tokenizer.ids_to_text(new_token_id) - new_token_ids.append(new_token_id) - new_tokens.append(response['tokens'][batch_id][:token_len]) - new_texts.append(new_text) - log_probs.append(response['logprob'][batch_id][:token_len]) - full_logprobs.append(response['full_logprob'][batch_id][:token_len]) - offsets.append(response['offsets'][batch_id][:-1]) - compute_prob_response['sentences'] = new_texts - compute_prob_response['tokens'] = new_tokens - compute_prob_response['token_ids'] = new_token_ids - compute_prob_response['logprob'] = log_probs - compute_prob_response['full_logprob'] = full_logprobs - compute_prob_response['offsets'] = offsets - return compute_prob_response + if parallel_state.is_pipeline_first_stage() or parallel_state.is_pipeline_last_stage(): + # we only have a response on the first and last pipeline stages + compute_prob_response = {} + new_token_ids = [] + new_tokens = [] + new_texts = [] + log_probs = [] + full_logprobs = [] + offsets = [] + for batch_id in range(len(response['tokens'])): + if isinstance(inputs, (list, tuple)): + if isinstance(inputs[0], str): + new_token_id = tokenizer.text_to_ids(inputs[batch_id]) + new_text = inputs[batch_id] + token_len = len(new_token_id) + elif isinstance(inputs[0], torch.Tensor): + token_len = int(inputs[1][batch_id].item()) + new_token_id = inputs[0][batch_id][:token_len].tolist() + new_text = tokenizer.ids_to_text(new_token_id) + new_token_ids.append(new_token_id) + new_tokens.append(response['tokens'][batch_id][:token_len]) + new_texts.append(new_text) + log_probs.append(response['logprob'][batch_id][:token_len]) + full_logprobs.append(response['full_logprob'][batch_id][:token_len]) + offsets.append(response['offsets'][batch_id][:-1]) + compute_prob_response['sentences'] = new_texts + compute_prob_response['tokens'] = new_tokens + compute_prob_response['token_ids'] = new_token_ids + compute_prob_response['logprob'] = log_probs + compute_prob_response['full_logprob'] = full_logprobs + compute_prob_response['offsets'] = offsets + return compute_prob_response + else: + # intermediate stages + return None def get_batch(model, tokenizer, context_tokens): From 50e7a98cf277b2aa129c1250e6ffd9a1c6fad82e Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Mon, 29 May 2023 16:01:52 -0700 Subject: [PATCH 125/512] Bug fix to restore act ckpt (#6753) * Bug fix to restore act ckpt Signed-off-by: Markel Sanz Ausin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Markel Sanz Ausin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../language_modeling/megatron_gpt_model.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index e9545361b88d..809825752cab 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1143,16 +1143,20 @@ def _restore_activation_checkpointing_args(self): _reset_activation_checkpointing_args. """ # Restore config values. - self.cfg.activations_checkpoint_granularity = self.last_checkpointing_granularity - self.cfg.activations_checkpoint_method = self.last_checkpointing_method - self.cfg.activations_checkpoint_num_layers = self.last_checkpointing_num_layers + self.cfg.activations_checkpoint_granularity = self.last_activations_checkpoint_granularity + self.cfg.activations_checkpoint_method = self.last_activations_checkpoint_method + self.cfg.activations_checkpoint_num_layers = self.last_activations_checkpoint_num_layers self.cfg.activations_checkpoint_layers_per_pipeline = self.last_activations_checkpoint_layers_per_pipeline # Restore model parameters. for module in self.get_gpt_module_list(): - module.language_model.encoder.activations_checkpoint_granularity = self.last_checkpointing_granularity - module.language_model.encoder.activations_checkpoint_method = self.last_checkpointing_method - module.language_model.encoder.activations_checkpoint_num_layers = self.last_checkpointing_num_layers + module.language_model.encoder.activations_checkpoint_granularity = ( + self.last_activations_checkpoint_granularity + ) + module.language_model.encoder.activations_checkpoint_method = self.last_activations_checkpoint_method + module.language_model.encoder.activations_checkpoint_num_layers = ( + self.last_activations_checkpoint_num_layers + ) module.language_model.encoder.activations_checkpoint_layers_per_pipeline = ( self.last_activations_checkpoint_layers_per_pipeline ) From 029cf74fe991d6b771053afed79787eb7c6906da Mon Sep 17 00:00:00 2001 From: Izzy Putterman Date: Tue, 30 May 2023 17:09:25 -0700 Subject: [PATCH 126/512] drop image from ip2p export --- .../instruct_pix2pix/conf/sd_export.yaml | 1 - .../instruct_pix2pix/sd_edit_export.py | 18 +++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml index 8acfbc201874..2af156df6c60 100644 --- a/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml @@ -1,7 +1,6 @@ edit: resolution: 256 steps: 100 - input: path/to/input/picture out_path: "instruct_pix2pix" cfg_text: 7.5 cfg_image: 1.2 diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py index 1d4d868e2859..6125f5e32723 100644 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py @@ -79,14 +79,18 @@ def model_cfg_modifier(model_cfg): model_wrap_cfg = CFGDenoiser(model_wrap) null_token = model.get_learned_conditioning([""]) - input_image = Image.open(edit_cfg.input).convert("RGB") - width, height = input_image.size - factor = edit_cfg.resolution / max(width, height) - factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) - width = int((width * factor) // 64) * 64 - height = int((height * factor) // 64) * 64 - input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) + # input_image = Image.open(edit_cfg.input).convert("RGB") + # width, height = input_image.size + # factor = edit_cfg.resolution / max(width, height) + # factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) + # width = int((width * factor) // 64) * 64 + # height = int((height * factor) // 64) * 64 + # input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) + input_image = np.random.rand(edit_cfg.resolution, edit_cfg.resolution, 3) * 255 + input_image = Image.fromarray(input_image.astype('uint8')).convert('RGB') batch_size = edit_cfg.get("num_images_per_prompt", 1) + height = edit_cfg.resolution + width = edit_cfg.resolution output_dir = edit_cfg.out_path From f1fb07291647d4ef6df4c5fc5f1a40bc5f75da92 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Wed, 31 May 2023 12:56:01 -0700 Subject: [PATCH 127/512] Update multimodal code to fit NeMo r1.19.0 style --- .../models/clip/megatron_clip_models.py | 126 ++++++++++-------- .../models/dreambooth/dreambooth.py | 2 +- .../models/multimodal_base_model.py | 5 +- .../models/stable_diffusion/ldm/ddpm.py | 2 +- .../megatron_vit_classification_models.py | 117 +++++++++------- .../vision/models/vision_base_model.py | 5 +- nemo/core/optim/optimizer_with_main_params.py | 2 +- 7 files changed, 147 insertions(+), 112 deletions(-) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index 45df3aeebdb8..bcabf5402184 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -350,6 +350,20 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + self.enable_autocast = ( + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + + self.transformer_engine = cfg.get('transformer_engine', False) + + # Convert the global-batch-based profile index to micro-batch index + if hasattr(self, '_nsys_profile_enabled'): + mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) + data_parallel_world_size = trainer.world_size // mp_size + grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) + self._nsys_profile_start_step *= grad_accum_steps + self._nsys_profile_end_step *= grad_accum_steps + def get_module_list(self): if isinstance(self.model, list): return [model.module if isinstance(model, Float16Module) else model for model in self.model] @@ -457,6 +471,58 @@ def forward(self, image, text): output_tensor = self.model(image, text) return output_tensor + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch @@ -488,34 +554,7 @@ def training_step(self, dataloader_iter, batch_idx): for param in module.embedding.parameters(): param.data_ptr() - # TODO (yuya): fix this shape - tensor_shape = None - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=False, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - - # only the last stages of the pipeline return losses - if losses_reduced_per_micro_batch: - # average loss across micro batches - loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_mean = loss_tensor.mean() - else: - loss_mean = torch.tensor(0.0).cuda() + loss_mean = self.fwd_bwd_step(dataloader_iter, batch_idx, False) # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): @@ -709,35 +748,8 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - tensor_shape = None # Placeholder - - # run forward passes for an entire global batch - # we do this inside validation_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=True, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - - def _get_metric(metric_key): - # only the last stage of the pipeline returns losses - if losses_reduced_per_micro_batch: - loss_tensors_list = [loss_reduced[metric_key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.vstack(loss_tensors_list) - loss_mean = loss_tensor.mean(axis=0) - else: - loss_mean = torch.tensor([0.0]).cuda() - return loss_mean[0] - - return _get_metric('loss') + loss = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + return loss def validation_epoch_end(self, outputs): # TODO (yuya): need fix later, check with Sean diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index 7e90d06035ef..ce7e1a7a7591 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -261,7 +261,7 @@ def training_step(self, dataloader_iter, batch_idx): forward_only=False, tensor_shape=None, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index c2ca22cf2ba7..f495864bd855 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -297,7 +297,10 @@ def _enable_nvidia_optimizations(self): # NVIDIA container version check nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) if nvidia_torch_version is not None: - NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + try: + NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + except Exception: + NVIDIA_TORCH_MAJOR = 0 try: NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) except Exception: diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index a535582850c2..1e7d88567397 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -1647,7 +1647,7 @@ def training_step(self, dataloader_iter, batch_idx): forward_only=False, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=True, ) diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index 574b307b1bc4..aa9ada81cf62 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -178,6 +178,20 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): else: raise ValueError('precision must be in [32, 16, "bf16"]') + self.enable_autocast = ( + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + + self.transformer_engine = cfg.get('transformer_engine', False) + + # Convert the global-batch-based profile index to micro-batch index + if hasattr(self, '_nsys_profile_enabled'): + mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) + data_parallel_world_size = trainer.world_size // mp_size + grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) + self._nsys_profile_start_step *= grad_accum_steps + self._nsys_profile_end_step *= grad_accum_steps + def get_module_list(self): if isinstance(self.model, list): return [model.module if isinstance(model, Float16Module) else model for model in self.model] @@ -277,22 +291,20 @@ def forward(self, tokens): output_tensor = self.model(tokens) return output_tensor - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): # TODO (yuya): fix this shape tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism fwd_bwd_function = get_forward_backward_func() @@ -302,22 +314,55 @@ def training_step(self, dataloader_iter, batch_idx): data_iterator=dataloader_iter, model=[self.model], num_microbatches=get_num_microbatches(), - forward_only=False, + forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, ) # only the last stages of the pipeline return losses if losses_reduced_per_micro_batch: - # average loss across micro batches - loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_mean = loss_tensor.mean() + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + acc_tensors_list = [loss_reduced['accuracy'] for loss_reduced in losses_reduced_per_micro_batch] + acc_tensor = torch.stack(acc_tensors_list) + accuracy_mean = acc_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") else: - loss_mean = torch.tensor(0.0).cuda() + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + accuracy_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + accuracy_mean = loss_mean.copy() + + return loss_mean, accuracy_mean + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, False) # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): @@ -470,36 +515,8 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] - - # run forward passes for an entire global batch - # we do this inside validation_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=True, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - - def _get_metric(metric_key): - # only the last stage of the pipeline returns losses - if losses_reduced_per_micro_batch: - loss_tensors_list = [loss_reduced[metric_key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.vstack(loss_tensors_list) - loss_mean = loss_tensor.mean(axis=0) - else: - loss_mean = torch.tensor([0.0]).cuda() - return loss_mean[0] - - return _get_metric('loss'), _get_metric('accuracy') + loss, accuracy = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + return loss, accuracy def validation_epoch_end(self, outputs): # TODO (yuya): need fix later, check with Sean diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py index dc9726d53940..aefa5287bf83 100644 --- a/nemo/collections/vision/models/vision_base_model.py +++ b/nemo/collections/vision/models/vision_base_model.py @@ -247,7 +247,10 @@ def _enable_nvidia_optimizations(self): # NVIDIA container version check nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) if nvidia_torch_version is not None: - NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + try: + NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) + except Exception: + NVIDIA_TORCH_MAJOR = 0 try: NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) except Exception: diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py index c9790ee2a139..6f0a9b513337 100644 --- a/nemo/core/optim/optimizer_with_main_params.py +++ b/nemo/core/optim/optimizer_with_main_params.py @@ -492,7 +492,7 @@ def get_parameters(self): params = [] for param_group in self.optimizer.param_groups: for param in param_group['params']: - if param.requires_grad: # (@adithyare) added to enable pp>1 training for adapters + if param is not None: params.append(param) return params From 766dd1eaf925b91f8474161d5e71ac9320a9acd1 Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Wed, 31 May 2023 10:22:23 -0700 Subject: [PATCH 128/512] Bug fix to reset sequence parallelism (#6756) * Bug fix to reset sequence parallelism Signed-off-by: Markel Sanz Ausin * Update seq par reset/restore Signed-off-by: Markel Sanz Ausin * Add nested loop Signed-off-by: Markel Sanz Ausin --------- Signed-off-by: Markel Sanz Ausin --- .../models/language_modeling/megatron_gpt_model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 809825752cab..66fa0ed2716e 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1170,12 +1170,13 @@ def _reset_sequence_parallelism_args(self): self.last_sequence_parallel = self.cfg.sequence_parallel # Reset config values. Needed for calling generate. - self.cfg.sequence_parallel = None + self.cfg.sequence_parallel = False # Reset model parameters. - for module in self.get_gpt_module_list(): - module.language_model.encoder.sequence_parallel = None + for mod in module.modules(): + if hasattr(mod, "sequence_parallel"): + mod.sequence_parallel = self.last_sequence_parallel def _restore_sequence_parallelism_args(self): """ Restores the sequence parallelism parameters using the values saved by @@ -1187,4 +1188,6 @@ def _restore_sequence_parallelism_args(self): # Restore model parameters. for module in self.get_gpt_module_list(): - module.language_model.encoder.sequence_parallel = self.last_sequence_parallel + for mod in module.modules(): + if hasattr(mod, "sequence_parallel"): + mod.sequence_parallel = self.last_sequence_parallel From 1ea66a408f93357f0242d0d4e6465a703d4ce067 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Wed, 31 May 2023 10:46:19 -0700 Subject: [PATCH 129/512] Fix checkpointed forward and add test for full activation checkpointing (#6744) * fix checkpointed forward and add test for full activation checkpointing Signed-off-by: Abhinav Khattar * add method Signed-off-by: Abhinav Khattar * add method Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar --- Jenkinsfile | 2 ++ nemo/collections/nlp/modules/common/megatron/transformer.py | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 27fbf11148f6..780e3e4b43c4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3175,6 +3175,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.hidden_size=256 \ model.num_attention_heads=8 \ model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ model.activations_checkpoint_num_layers=1 \ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" @@ -3211,6 +3212,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.hidden_size=256 \ model.num_attention_heads=8 \ model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ model.activations_checkpoint_num_layers=1 \ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 0f6112e08036..9a09a9f9aa0b 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1268,9 +1268,6 @@ def custom_forward(*inputs): return custom_forward - # Make sure memory is freed. - tensor_parallel.reset_checkpointed_activations_memory_buffer() - if self.activations_checkpoint_method == 'uniform': # Uniformly divide the total number of Transformer layers and checkpoint # the input activation of each divided chunk. From f627715177879567b2994e7410edd35d1a26b6f5 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Tue, 6 Jun 2023 16:10:05 -0700 Subject: [PATCH 130/512] Fix set epoch for multithreading workers --- nemo/collections/vision/data/megatron/vit_dataset.py | 7 ++++--- .../vision/models/megatron_vit_classification_models.py | 5 ----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py index ad4c8d47e781..29852e70446b 100644 --- a/nemo/collections/vision/data/megatron/vit_dataset.py +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -20,6 +20,7 @@ from PIL import Image, ImageFilter, ImageOps from torch.utils.data import Dataset +from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch from nemo.collections.vision.data.megatron.autoaugment import ImageNetPolicy from nemo.collections.vision.data.megatron.image_folder import ImageFolder @@ -38,17 +39,17 @@ def _to_torch_data_type(precision): class RandomSeedDataset(Dataset): def __init__(self, dataset, seed=1234): self.base_seed = seed - self.curr_seed = seed self.dataset = dataset + self.epoch = SharedEpoch() def __len__(self): return len(self.dataset) def set_epoch(self, epoch): - self.curr_seed = self.base_seed + epoch + self.epoch.set_value(epoch) def __getitem__(self, idx): - seed = idx + self.curr_seed + seed = idx + self.base_seed + self.epoch.get_value() * 32768 torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index aa9ada81cf62..5c6da1508062 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -604,11 +604,6 @@ def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=Tru else: raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') - # KJJ - # return torch.utils.data.DataLoader( - # dataset, batch_sampler=batch_sampler, num_workers=self.cfg.data.num_workers, pin_memory=True, - # ) - # return torch.utils.data.DataLoader( dataset, batch_sampler=batch_sampler, From ab3377f4b4fe600999dca61ff31f5c0be9b183a8 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Thu, 8 Jun 2023 15:29:53 -0700 Subject: [PATCH 131/512] Add support of using NeMo clip in SD 1.5 --- .../clip/conf/megatron_clip_VIT-L-14.yaml | 203 ++++++++++++++++++ .../dreambooth/conf/dreambooth.yaml | 20 +- .../generative/dreambooth/dreambooth.py | 3 +- .../stable_diffusion/conf/sd_train.yaml | 13 +- .../models/clip/megatron_clip_models.py | 1 + .../models/stable_diffusion/ldm/ddpm.py | 2 +- .../stable_diffusion/encoders/modules.py | 202 +++++++++++++++++ .../nlp/modules/common/megatron/mlp.py | 6 +- .../nlp/modules/common/megatron/utils.py | 10 + .../vision/modules/vit/vit_backbone.py | 1 + 10 files changed, 447 insertions(+), 14 deletions(-) create mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml new file mode 100644 index 000000000000..8a21fccd0874 --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml @@ -0,0 +1,203 @@ +model: + precision: 32 + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 2048 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + output_dim: 768 + # As the number of devices used to train increases, so does the space complexity of + # the logit matrix. Using a naïve all-gather scheme, space complexity will be + # `O(n^2)`. Instead, complexity may become effectively linear if the flags + # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one + # numerical results as the naïve method. + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: 32 + # vision configs + patch_dim: 14 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + output_dim: ${model.output_dim} + class_token_length: 1 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 24 + hidden_size: 1024 + ffn_hidden_size: 4096 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: True + activation: quick-gelu + + + + text: + precision: 32 + # text configs + output_dim: ${model.output_dim} + + # model architecture + encoder_seq_length: 77 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: True + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + activation: quick-gelu + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.2 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 1e-5 \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml index cea0fe4995f1..756ce433db32 100644 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml @@ -46,11 +46,11 @@ model: micro_batch_size: 2 # limited by GPU memory global_batch_size: 4 # will use more micro batches to reach global batch size - with_prior_preservation: True + with_prior_preservation: False pretrained_ckpt: prior_loss_weight: 0.5 train_text_encoder: False - restore_from_path: /ckpts/v1-5-pruned.ckpt #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed + restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed @@ -143,10 +143,16 @@ model: target: torch.nn.Identity cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - version: openai/clip-vit-large-patch14 + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /ckpts/openai.nemo device: cuda - max_length: 77 + freeze: True + layer: "last" + # For compatibility of history version that uses HF clip model + # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + # version: openai/clip-vit-large-patch14 + # device: cuda + # max_length: 77 noise_scheduler: _target_: nemo.collections.multimodal.models.dreambooth.util.sd_noise_scheduler @@ -188,9 +194,9 @@ model: data: name: pbss num_workers: 4 - instance_dir: /datasets/instance_dir + instance_dir: /dataset/instance_dir instance_prompt: a photo of a sks dog - regularization_dir: /datasets/nemo_dogs + regularization_dir: /dataset/nemo_dogs regularization_prompt: a photo of a dog num_reg_images: 200 num_images_per_prompt: 4 diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index 038dbb8b763c..5a6208e31fc7 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -125,7 +125,8 @@ def main(cfg): if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) - prepare_reg_data(cfg) + if cfg.model.with_prior_preservation: + prepare_reg_data(cfg) parallel_state.destroy_model_parallel() callbacks = [] diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index c4371b4e0f0f..5dc8865dcb8a 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -137,10 +137,17 @@ model: target: torch.nn.Identity cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - version: openai/clip-vit-large-patch14 + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /ckpts/openai.nemo device: cuda - max_length: 77 + freeze: True + layer: "last" + # For compatibility of history version that uses HF clip model + # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + # version: openai/clip-vit-large-patch14 + # device: cuda + # max_length: 77 + # miscellaneous seed: 1234 diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index bcabf5402184..96c0f86b5600 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -186,6 +186,7 @@ def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process= fp8_amax_compute_algo=model_cfg.fp8_amax_compute_algo, reduce_amax=model_cfg.get('reduce_amax', True), use_emha=model_cfg.use_emha, + activation=model_cfg.activation, ) self.initialize_word_embeddings( diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 1e7d88567397..695cf3ba9ddf 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -98,7 +98,7 @@ def random_dropout(embeddings, drop_rate): drop_rate (float): Rate of dropping the embedding. """ nsamples = embeddings.shape[0] - zero_flag = torch.ones(nsamples, 1, 1).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.ones(nsamples, 1, 1, device=torch.cuda.current_device()).to(embeddings.dtype) * (1 - drop_rate) zero_flag = torch.bernoulli(zero_flag).cuda(non_blocking=True) embeddings = embeddings * zero_flag return embeddings diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 4a64ac6bd144..81aaec23d868 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -11,19 +11,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +import tempfile from functools import partial import kornia +import open_clip import torch import torch.nn as nn from einops import rearrange, repeat +from omegaconf import OmegaConf +from torch.utils.checkpoint import checkpoint from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer from transformers.models.clip.modeling_clip import CLIPTextTransformer +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPModel from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test ) from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector class AbstractEncoder(nn.Module): @@ -207,6 +217,198 @@ def encode(self, text): return self(text) +class FrozenOpenCLIPEmbedder(AbstractEncoder): + """ + Uses the OpenCLIP transformer encoder for text + """ + + LAYERS = [ + # "pooled", + "last", + "penultimate", + ] + + def __init__( + self, + arch="ViT-H-14", + version="laion2b_s32b_b79k", + device="cuda", + max_length=77, + freeze=True, + layer="last", + use_fp16=False, + ): + super().__init__() + assert layer in self.LAYERS + model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version) + del model.visual + self.model = model + + self.device = device + self.max_length = max_length + if freeze: + self.freeze() + self.layer = layer + if self.layer == "last": + self.layer_idx = 0 + elif self.layer == "penultimate": + self.layer_idx = 1 + else: + raise NotImplementedError() + + def freeze(self): + self.model = self.model.eval() + for param in self.parameters(): + param.requires_grad = False + + def forward(self, text): + tokens = open_clip.tokenize(text) + z = self.encode_with_transformer(tokens.to(self.device)) + return z + + def encode_with_transformer(self, text): + x = self.model.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.model.positional_embedding + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.model.ln_final(x) + return x + + def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): + for i, r in enumerate(self.model.transformer.resblocks): + if i == len(self.model.transformer.resblocks) - self.layer_idx: + break + if self.model.transformer.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(r, x, attn_mask) + else: + x = r(x, attn_mask=attn_mask) + return x + + def encode(self, text): + return self(text) + + +class FrozenMegatronCLIPEmbedder(AbstractEncoder): + def __init__(self, restore_from_path, device="cuda", layer="last", freeze=True, use_fp16=False): + super().__init__() + cfg, state_dict = self.load_config_and_state_from_nemo(restore_from_path) + self.build_tokenizer(cfg) + self.load_model(cfg, state_dict) + + self.device = device + if freeze: + self.freeze() + self.layer = layer + if self.layer == "last": + self.layer_idx = 0 + elif self.layer == "penultimate": + self.layer_idx = 1 + else: + raise NotImplementedError() + + def freeze(self): + self.model = self.model.eval() + for param in self.parameters(): + param.requires_grad = False + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict + + def build_tokenizer(self, cfg): + legacy = cfg.tokenizer.sentencepiece_legacy + self.tokenizer = get_nmt_tokenizer( + library=cfg.tokenizer.library, + model_name=cfg.tokenizer.type, + tokenizer_model=cfg.tokenizer.model, + vocab_file=cfg.tokenizer.vocab_file, + merges_file=cfg.tokenizer.merge_file, + delimiter=cfg.tokenizer.get('delimiter', None), + legacy=legacy, + ) + + _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) + + def load_model(self, cfg, state_dict): + padded_vocab_size = self._vocab_size_with_padding( + orig_vocab_size=self.tokenizer.vocab_size, + make_vocab_size_divisible_by=cfg.get('make_vocab_size_divisible_by', 128), + tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), + ) + model = CLIPModel( + model_cfg=cfg, + padded_vocab_size=padded_vocab_size, + pre_process=cfg.text.pre_process, + post_process=cfg.text.post_process, + ) + + clip_state_dict = {} + for key, value in state_dict.items(): + key = key[6:] + clip_state_dict[key] = value + model.load_state_dict(clip_state_dict) + + del model.vision_encoder + self.model = model.text_encoder + + def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size): + after = orig_vocab_size + multiple = make_vocab_size_divisible_by * tensor_model_parallel_size + while (after % multiple) != 0: + after += 1 + return after + + def forward(self, text): + texts = self.text_transform(text) + z = self.encode_with_transformer(texts.to(self.device)) + # # Pad the seq length to multiple of 8 + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def encode_with_transformer(self, text): + x = self.model.language_model.embedding.word_embeddings(text) + x += self.model.language_model.embedding.position_embeddings + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) + x = self.model.language_model.encoder.final_layernorm(x) + x = x.permute(1, 0, 2) # LND -> NLD + return x + + def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): + for i, r in enumerate(self.model.language_model.encoder.layers): + if i == len(self.model.language_model.encoder.layers) - self.layer_idx: + break + x = r(x, attn_mask) + return x + + def encode(self, text): + return self(text) + + if __name__ == "__main__": from ldm.util import count_params diff --git a/nemo/collections/nlp/modules/common/megatron/mlp.py b/nemo/collections/nlp/modules/common/megatron/mlp.py index a5515cae0dac..4f90775460c6 100644 --- a/nemo/collections/nlp/modules/common/megatron/mlp.py +++ b/nemo/collections/nlp/modules/common/megatron/mlp.py @@ -24,7 +24,7 @@ from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm from nemo.collections.nlp.modules.common.megatron.layer_norm_1p import LayerNorm1P from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, erf_gelu +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, QuickGELUActivation, erf_gelu from nemo.collections.nlp.modules.common.megatron.utils import openai_gelu as openai_gelu_func from nemo.collections.nlp.modules.common.megatron.utils import squared_relu from nemo.core import adapter_mixins @@ -93,7 +93,6 @@ def __init__( self.dropout = dropout self.dtype = dtype self.set_accepted_adapter_types([MLPInfusedAdapterConfig._target_]) - supported_activations = [ 'gelu', 'geglu', @@ -103,6 +102,7 @@ def __init__( 'fast-geglu', 'fast-swiglu', 'fast-reglu', + 'quick-gelu', ] if activation not in supported_activations: @@ -180,6 +180,8 @@ def __init__( self.activation_func = openai_gelu_func elif activation in ["gelu", "geglu", "fast-geglu"]: self.activation_func = F.gelu + elif activation == 'quick-gelu': + self.activation_func = QuickGELUActivation elif onnx_safe: self.activation_func = erf_gelu elif activation in ["reglu", "fast-reglu"]: diff --git a/nemo/collections/nlp/modules/common/megatron/utils.py b/nemo/collections/nlp/modules/common/megatron/utils.py index 8ef46c10d49b..919990065057 100644 --- a/nemo/collections/nlp/modules/common/megatron/utils.py +++ b/nemo/collections/nlp/modules/common/megatron/utils.py @@ -18,6 +18,9 @@ from typing import Dict, Iterator, List, Tuple, Union import torch +import torch.nn as nn + +from torch import Tensor try: from apex.normalization import MixedFusedRMSNorm @@ -43,6 +46,13 @@ HAVE_MEGATRON_CORE = False +def QuickGELUActivation(input: Tensor): + """ + Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs + """ + return input * torch.sigmoid(1.702 * input) + + class ApexGuardDefaults(object): """ This class can be used to replace missing classes when apex is missing. diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index ea114619e0cb..46347980370d 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -301,6 +301,7 @@ def __init__( sequence_parallel=model_cfg.sequence_parallel, activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, gradient_accumulation_fusion=model_cfg.gradient_accumulation_fusion, + activation=model_cfg.activation, ) def set_input_tensor(self, input_tensor): From 410bcb70ccfd3aedb5a786c67d71fab3830088bb Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Thu, 8 Jun 2023 15:58:44 -0700 Subject: [PATCH 132/512] Fix clip activation default value --- nemo/collections/multimodal/models/clip/megatron_clip_models.py | 2 +- nemo/collections/vision/modules/vit/vit_backbone.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index 96c0f86b5600..1653faaf6900 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -186,7 +186,7 @@ def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process= fp8_amax_compute_algo=model_cfg.fp8_amax_compute_algo, reduce_amax=model_cfg.get('reduce_amax', True), use_emha=model_cfg.use_emha, - activation=model_cfg.activation, + activation=model_cfg.get('activation', 'gelu'), ) self.initialize_word_embeddings( diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index 46347980370d..661eeec8b3f3 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -301,7 +301,7 @@ def __init__( sequence_parallel=model_cfg.sequence_parallel, activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, gradient_accumulation_fusion=model_cfg.gradient_accumulation_fusion, - activation=model_cfg.activation, + activation=model_cfg.get('activation', 'gelu'), ) def set_input_tensor(self, input_tensor): From daca448d431f9c1f6aecca16b89194da2463ec7e Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Fri, 9 Jun 2023 12:32:49 -0700 Subject: [PATCH 133/512] Add HF CLIP converter --- .../foundation/clip/convert_hfclip_to_nemo.py | 191 ++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py diff --git a/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py new file mode 100644 index 000000000000..e48736d2dbba --- /dev/null +++ b/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py @@ -0,0 +1,191 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import einops +import torch +from apex.transformer import parallel_state +from omegaconf import OmegaConf +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer +from transformers import CLIPModel + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--hf_name", type=str, default="yuvalkirstain/PickScore_v1") + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def mapping_state_dict(open_model): + open_state_dict = open_model.state_dict() + key_mapping = { + "text_projection.weight": "text_encoder.head.weight", + "visual_projection.weight": "vision_encoder.head.weight", + } + + layer_mapping = { + ".layer_norm1.weight": ".input_layernorm.weight", + ".layer_norm1.bias": ".input_layernorm.bias", + ".self_attn.out_proj.weight": ".self_attention.dense.weight", + ".self_attn.out_proj.bias": ".self_attention.dense.bias", + ".layer_norm2.weight": ".post_attention_layernorm.weight", + ".layer_norm2.bias": ".post_attention_layernorm.bias", + ".mlp.fc1.weight": ".mlp.dense_h_to_4h.weight", + ".mlp.fc1.bias": ".mlp.dense_h_to_4h.bias", + ".mlp.fc2.weight": ".mlp.dense_4h_to_h.weight", + ".mlp.fc2.bias": ".mlp.dense_4h_to_h.bias", + ".pre_layrnorm.weight": ".preprocess_layernorm.weight", + ".pre_layrnorm.bias": ".preprocess_layernorm.bias", + ".post_layernorm.weight": ".transformer.final_layernorm.weight", + ".post_layernorm.bias": ".transformer.final_layernorm.bias", + ".backbone.embeddings.position_embedding.weight": ".backbone.position_embeddings", + ".language_model.embeddings.position_embedding.weight": ".language_model.embedding.position_embeddings", + ".embeddings.class_embedding": ".cls_token", + ".backbone.embeddings.patch_embedding.weight": ".backbone.linear_encoder.weight", + ".final_layer_norm.weight": ".encoder.final_layernorm.weight", + ".final_layer_norm.bias": ".encoder.final_layernorm.bias", + ".embeddings.token_embedding.weight": ".embedding.word_embeddings.weight", + } + + nemo_state_dict = {} + for key in open_state_dict.keys(): + if key.startswith("text_model.encoder.layers"): + key_ = key.replace("text_model.encoder.layers", "text_encoder.language_model.encoder.layers") + elif key.startswith("vision_model.encoder.layers"): + key_ = key.replace("vision_model.encoder.layers", "vision_encoder.backbone.transformer.layers") + elif key.startswith('vision_model.'): + key_ = key.replace("vision_model.", "vision_encoder.backbone.") + elif key.startswith('text_model.'): + key_ = key.replace('text_model.', 'text_encoder.language_model.') + else: + key_ = key + for pat in key_mapping: + if key_ == pat: + key_ = key_.replace(pat, key_mapping[pat]) + for pat in layer_mapping: + if key_.endswith(pat): + key_ = key_[: -len(pat)] + layer_mapping[pat] + break + if 'q_proj' in key_: + key_k = key.replace('q_proj', 'k_proj') + key_v = key.replace('q_proj', 'v_proj') + key_new = key_.replace('self_attn.q_proj', 'self_attention.query_key_value') + value_new = torch.concat((open_state_dict[key], open_state_dict[key_k], open_state_dict[key_v]), dim=0) + nemo_state_dict[key_new] = value_new + elif not ('k_proj' in key_ or 'v_proj' in key_ or 'position_ids' in key_): + nemo_state_dict[key_] = open_state_dict[key] + + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ + "vision_encoder.backbone.cls_token" + ].reshape(1, 1, -1) + w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) + nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) + + return nemo_state_dict + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size_=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + cfg = OmegaConf.load(args.hparams_file) + model = MegatronCLIPModel(cfg.model, trainer) + + hf_model = CLIPModel.from_pretrained(args.hf_name) + state_dict = mapping_state_dict(hf_model) + model.model.load_state_dict(state_dict) + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) From e0939a569b52313f76f4e1975a0c94f97e29c1d4 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Fri, 9 Jun 2023 13:04:25 -0700 Subject: [PATCH 134/512] Add imagen support --- examples/multimodal/convert_ckpt_to_nemo.py | 3 + .../multimodal/generative/imagen/README.md | 104 ++ .../generative/imagen/conf/base64-2b.yaml | 138 +++ .../imagen/conf/base64-500m-edm.yaml | 135 +++ .../generative/imagen/conf/base64-500m.yaml | 143 +++ .../conf/base64-500m_online_encoding.yaml | 136 +++ .../generative/imagen/conf/fid_inference.yaml | 26 + .../imagen/conf/inference_pipeline.yaml | 42 + .../generative/imagen/conf/sr1024-600m.yaml | 141 +++ .../imagen/conf/sr256-400m-edm.yaml | 221 +++++ .../generative/imagen/conf/sr256-400m.yaml | 147 +++ .../imagen/conf/sr256-450m-edm.yaml | 221 +++++ .../imagen/conf/sr256-600m-edm-noise.yaml | 141 +++ .../imagen/conf/sr256-600m-edm.yaml | 218 +++++ .../generative/imagen/conf/sr256-600m.yaml | 143 +++ .../imagen/imagen_generate_images.py | 62 ++ .../generative/imagen/imagen_infer.py | 45 + .../generative/imagen/imagen_training.py | 119 +++ .../multimodal/data/common/webdataset.py | 2 +- .../multimodal/data/imagen/__init__.py | 13 + .../data/imagen/augmentations/__init__.py | 13 + .../imagen/augmentations/augmentations.py | 76 ++ .../data/imagen/augmentations/corruption.py | 33 + .../multimodal/data/imagen/imagen_dataset.py | 111 +++ .../stable_diffusion_dataset.py | 6 +- .../multimodal/models/imagen/imagen.py | 535 +++++++++++ .../models/imagen/imagen_pipeline.py | 324 +++++++ .../multimodal/models/imagen/precond.py | 191 ++++ .../imagen/diffusionmodules/attention.py | 317 +++++++ .../modules/imagen/diffusionmodules/blocks.py | 897 ++++++++++++++++++ .../modules/imagen/diffusionmodules/embs.py | 69 ++ .../modules/imagen/diffusionmodules/layers.py | 240 +++++ .../modules/imagen/diffusionmodules/nets.py | 624 ++++++++++++ .../modules/imagen/encoder/t5encoder.json | 51 + .../modules/imagen/encoder/t5encoder.py | 61 ++ .../modules/imagen/sampler/batch_ops.py | 57 ++ .../modules/imagen/sampler/continuous_ddpm.py | 169 ++++ .../modules/imagen/sampler/sampler.py | 250 +++++ .../multimodal/parts/imagen/__init__.py | 13 + .../multimodal/parts/imagen/utils.py | 29 + 40 files changed, 6263 insertions(+), 3 deletions(-) create mode 100644 examples/multimodal/generative/imagen/README.md create mode 100644 examples/multimodal/generative/imagen/conf/base64-2b.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml create mode 100644 examples/multimodal/generative/imagen/conf/fid_inference.yaml create mode 100644 examples/multimodal/generative/imagen/conf/inference_pipeline.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr1024-600m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m.yaml create mode 100644 examples/multimodal/generative/imagen/imagen_generate_images.py create mode 100644 examples/multimodal/generative/imagen/imagen_infer.py create mode 100644 examples/multimodal/generative/imagen/imagen_training.py create mode 100644 nemo/collections/multimodal/data/imagen/__init__.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/__init__.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/augmentations.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/corruption.py create mode 100644 nemo/collections/multimodal/data/imagen/imagen_dataset.py create mode 100644 nemo/collections/multimodal/models/imagen/imagen.py create mode 100644 nemo/collections/multimodal/models/imagen/imagen_pipeline.py create mode 100644 nemo/collections/multimodal/models/imagen/precond.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py create mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json create mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/sampler.py create mode 100644 nemo/collections/multimodal/parts/imagen/__init__.py create mode 100644 nemo/collections/multimodal/parts/imagen/utils.py diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index 4a1b1db4c794..393617deed7d 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -33,6 +33,7 @@ from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector @@ -156,6 +157,8 @@ def convert(local_rank, rank, world_size, args): model = MegatronLatentDiffusion.load_from_checkpoint( checkpoint_path, hparams_file=args.hparams_file, trainer=trainer ) + elif args.model_type == 'imagen': + model = MegatronImagen.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/multimodal/generative/imagen/README.md b/examples/multimodal/generative/imagen/README.md new file mode 100644 index 000000000000..ba33b649cb35 --- /dev/null +++ b/examples/multimodal/generative/imagen/README.md @@ -0,0 +1,104 @@ +# Imagen +## A. Overview + +Imagen is a multi-stage text-to-image diffusion model with an unprecedented degree of photorealism and a deep level of language understanding. Given a text prompt, Imagen first generates an image at a 64x64 resolution and then upsamples the generated image to 256x256 and 1024x1024 resolutions, all using diffusion models. + +**Table of Contents:** +- [Imagen](#imagen) + - [A. Overview](#a-overview) + - [B. Imagen Pipeline](#b-imagen-pipeline) + - [C. Files in this folder](#c-files-in-this-folder) + - [D. Imagen Training](#d-imagen-training) + - [D.1 Training Dataset](#d1-training-dataset) + - [D.2 Training configs](#d2-training-configs) + - [E. Imagen Inference](#e-imagen-inference) + - [E.1 Inference Settings](#e1-inference-settings) + - [E.2 Running the sample inference code](#e2-running-the-sample-inference-code) + - [E.3 Inference GPU Memory Usage](#e3-inference-gpu-memory-usage) + - [E.3.1 FP16 Inference](#e31-fp16-inference) + - [E.3.2 FP32 Inference](#e32-fp32-inference) + - [E.3.3 AMP Inference (Autocast Enabled)](#e33-amp-inference-autocast-enabled) + - [F. UNet Architecture](#f-unet-architecture) + - [F.1 U-Net (used for base model)](#f1-u-net-used-for-base-model) + - [F.2 Efficient U-Net (used for SR models)](#f2-efficient-u-net-used-for-sr-models) + +## B. Imagen Pipeline + +Imagen comprises a frozen text encoder (e.g. T5-XXL) to map input text into a sequence of embeddings, and a 64x64 image diffusion model, followed by two super-resolution diffusion models for generating 256x256 and 1024x1024 images. All diffusion models are conditioned on the text embedding sequence and use classifier-free guidance. + +## C. Files in this folder + +- [imagen_training.py](imagen_training.py): Script for running inference +- [imagen_generate_images.py](imagen_generate_images.py): Script for generating images for FID-CLIP analysis +- [imagen_infer.py](imagen_infer.py): Script for running inference + +## D. Imagen Training + +All three diffusion models (64x64, 256x256, 1024x1024) can be trained independently. + +### D.1 Training Dataset + +### D.2 Training configs +| configs | Description | +|---|---| +| base64-2b.yaml | 2b-parameter base 64x64 model as described in Imagen paper | +| base64-500m.yaml | 500m-parameter base 64x64 model with decreased number of embedding channels| +|sr256-400m.yaml| 400m-parameter sr 256x256 model as described in Imagen paper | +|sr1024-400m.yaml| 400m-parameter sr 1024x1024 model as described in Imagen paper | + +## E. Imagen Inference + +### E.1 Inference Settings + +[inference_pipeline.yaml](conf/inference_pipeline.yaml) specifies every config for running the sample inference code. Specifically: +- num_images_per_promt: The number of images you want to generate for each text prompt +- model_name: Different pre-defined configs (not used for now) +- run_ema_model: Either run reg/ema model for pretrained models +- customized_model: Instead of loading pre-defined models, load specified checkpoint. .ckpt checkpoint (generated during in-the-middle of training) and .nemo checkpoint (generated once training completed) are both acceptable +- target_resolution: should be one of [64, 256, 1024] +- inference_precision: Running inference in one of [16, 32, AMP] mode +- dynamic_thresholding: Whether to use dynamic thresholding when generating images +- texts: List of text prompts that are used to generate images +- output_path: The path to save generate images +- encoder_path: If not set (null), it will download text encoder first time running the inference code (and will be saved to HF_HOME), you can also load it offline by setting it to the prepared folder +- samplers: List of sampler settings that are used for each model. `step` (the number of iterations to denoise the image, ideally the larger the better, but also consume more time) and `cfg` for classifier free guidance value. You can tweak these values for better visual quality. + +### E.2 Running the sample inference code +``` +(inside NeMo root folder) +python examples/multimodal/generative/imagen/imagen_infer.py +``` + +### E.3 Inference GPU Memory Usage + +#### E.3.1 FP16 Inference +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 11.7G | 11.9G | +| 256x256 | 12.5G | 13.0G | +| 1024x1024 | 14.1G | 21.6G | + +#### E.3.2 FP32 Inference +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 21.7G | 22.6G | +| 256x256 | 23.4G | 24.5G | +| 1024x1024 | 26.6G | 40.6G | + +#### E.3.3 AMP Inference (Autocast Enabled) +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 22.4G | 23.4G | +| 256x256 | 24.0G | 25.1G | +| 1024x1024 | 26.4G | 33.7G | + +## F. UNet Architecture + +We have prepared two types of UNet for Imagen according to the paper. Base model (64x64) and SR models (256x256, 1024x1024) are using different UNet models. + +### F.1 U-Net (used for base model) + + + +### F.2 Efficient U-Net (used for SR models) + diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml new file mode 100644 index 000000000000..23773b0bf4b4 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-2b.yaml @@ -0,0 +1,138 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf512 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + unet_type: base + unet: + embed_dim: 512 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml b/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml new file mode 100644 index 000000000000..68b1ea57384e --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml @@ -0,0 +1,135 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 48 # limited by GPU memory + global_batch_size: 48 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + unet_type: base + + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml new file mode 100644 index 000000000000..9c2b8436c572 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -0,0 +1,143 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + unet_type: base + + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml b/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml new file mode 100644 index 000000000000..637c954719a2 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml @@ -0,0 +1,136 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 48 # limited by GPU memory + global_batch_size: 48 # will use more micro batches to reach global batch size + + unet_type: base + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: True + flash_attention: False + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: DDPM + preconditioning: + loss_type: l2 + pred_objective: noise + noise_schedule: cosine + timesteps: 1000 + + conditioning: + online_encoding: True # defaults to False (use precached encodings) if not specified + # Online encoding increases training time by about 3-4x, and is only for users who want to do a quick dev run of + # Imagen, and/or those who do not have the disk space to store precached embeddings. + # Optionally specify encoder_path if online_encoding; else, specify precached_key and out_key + encoder_path: # folder path to t5xxl-encoder.bin, or leave empty to download (and cache) t5-11b weights + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/fid_inference.yaml b/examples/multimodal/generative/imagen/conf/fid_inference.yaml new file mode 100644 index 000000000000..413da2b8eeac --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/fid_inference.yaml @@ -0,0 +1,26 @@ +num_images_per_promt: 8 # The number of images generated for each promt text +model_name: null # Avaliable model_name defined in pretrained_models.yaml +run_ema_model: True # Whether load the reg/ema model when using pretrained models +customized_model: # Mutually exclusive with model_name + base_ckpt: /aot/exp/nemo-megatron-stacked-ddpm-16n/imagen-nemo/checkpoints/imagen-nemo--reduced_train_loss=0.03-step=100000-consumed_samples=512000000.0.ckpt # Either .ckpt or .nemo is accepatable + base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint + sr256_ckpt: null + sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml + sr1024_ckpt: null + sr1024_cfg: null +target_resolution: 64 # in [64, 256, 1024] +inference_precision: '32' # [16, 32, AMP] +thresholding_method: 'dynamic' +output_path: 'output/imagen-megatron-pipeline-fid' # Save location +record_time: True # Whether to record inference time meta +encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly +samplings: + - + step: 250 + cfg: 7.5 + - + step: 20 + cfg: 7.5 + + + diff --git a/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml b/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml new file mode 100644 index 000000000000..1b4bbd9e5a17 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml @@ -0,0 +1,42 @@ +num_images_per_promt: 4 # The number of images generated for each promt text +model_name: null # Avaliable model_name defined in pretrained_models.yaml +run_ema_model: True # Whether load the reg/ema model when using pretrained models +customized_model: # Mutually exclusive with model_name + base_ckpt: null # Either .ckpt or .nemo is accepatable + base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint + sr256_ckpt: null + sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml + sr1024_ckpt: null + sr1024_cfg: examples/multimodal/generative/imagen/conf/sr1024-400m.yaml +target_resolution: 64 # in [64, 256, 1024] +inference_precision: 32 # [16, 32, AMP] +thresholding_method: dynamic +texts: + - 'a photograph of an astronaut riding a horse' + - 'a highly detailed digital painting of a portal in a mystic forest with many beautiful trees. A person is standing in front of the portal' + - A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat. + - A cute corgi lives in a house made out of sushi. + - A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him. + - A brain riding a rocketship heading towards the moon. + - One cat and two dogs sitting on the grass. + - A wine glass on top of a dog. + - A blue coloured pizza. + - A transparent sculpture of a duck made out of glass. There is a painting on the wall behind it. + - A raccoon wearing cowboy hat and black leather jacket is behind the backyard window. Rain droplets on the window. + +output_path: 'output/imagen_output' # Save location +record_time: True # Whether to record inference time meta +encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly +samplings: + - # Base64 + step: 30 + cfg: 7.5 + - # SR256 + step: 20 + cfg: 8 + - # SR1024 + step: 20 + cfg: 7.5 + + + diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml new file mode 100644 index 000000000000..5f40d2a076af --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml @@ -0,0 +1,141 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-1024 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr1024-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 1024 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: cross + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: True + flash_attention: False + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 1024 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 1024 + estimated_portion: 0.2 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [64, 256] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml new file mode 100644 index 000000000000..843cc5189a8c --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml @@ -0,0 +1,221 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml new file mode 100644 index 000000000000..ce0dc88f2abc --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml @@ -0,0 +1,147 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [ 64, 256 ] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml new file mode 100644 index 000000000000..961357014288 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml @@ -0,0 +1,221 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: stacked + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml new file mode 100644 index 000000000000..8c54d64b3281 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml @@ -0,0 +1,141 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: stacked + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml new file mode 100644 index 000000000000..2eb0ac8f93b4 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml @@ -0,0 +1,218 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: stacked + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml new file mode 100644 index 000000000000..59141a595cd1 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml @@ -0,0 +1,143 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: fused + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [64, 256] + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/imagen_generate_images.py b/examples/multimodal/generative/imagen/imagen_generate_images.py new file mode 100644 index 000000000000..a2497425b70e --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_generate_images.py @@ -0,0 +1,62 @@ +import os +import pickle + +import torch +from omegaconf import OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='fid_inference.yaml') +def main(inference_config): + inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) + captions = pickle.load(open('coco_captions5k.pkl', 'rb')) + ntasks = 8 + if os.environ.get('CUDA_VISIBLE_DEVICES'): + # Multi-GPU + task_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0)) + else: + # Single GPU + task_id = 0 + chuncksize = int(len(captions) // ntasks) + if task_id != ntasks - 1: + input = captions[task_id * chuncksize : (task_id + 1) * chuncksize] + else: + input = captions[task_id * chuncksize :] + captions = input + + trainer = Trainer() + pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) + batch_size = 16 + batch_idx = 0 + + possible_res = [64, 256] # [64, 256] + outpaths = [] + for res in possible_res: + outpath = f'{inference_config.output_path}_RES{res}' + os.makedirs(outpath, exist_ok=True) + outpaths.append(outpath) + while True: + if batch_idx * batch_size >= len(captions): + break + batch_captions = captions[batch_idx * batch_size : (batch_idx + 1) * batch_size] + + # Different seed for every image + seeds = [task_id * chuncksize + batch_idx * batch_size + idx for idx in range(len(batch_captions))] + seed = batch_idx + chuncksize + + with torch.no_grad(): + images, all_res_images, throughput = pipeline(prompts=batch_captions, seed=seeds, single_batch_mode=True,) + + for outpath, one_res in zip(outpaths, all_res_images): + for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): + image.save(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.png')) + with open(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.txt'), 'w') as f: + f.writelines(caption) + batch_idx += 1 + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/imagen/imagen_infer.py b/examples/multimodal/generative/imagen/imagen_infer.py new file mode 100644 index 000000000000..5d2e58c395f9 --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_infer.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from omegaconf import OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='inference_pipeline.yaml') +def main(inference_config): + if inference_config.get('infer'): + # invoking from launcher + trainer = Trainer(inference_config.trainer) + inference_config = inference_config.infer + else: + trainer = Trainer() + inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) + pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) + + # Texts are passed in the config files + images, all_res, throughput = pipeline() + + # Save images + outpath = inference_config.output_path + os.makedirs(outpath, exist_ok=True) + for text, pils in zip(inference_config.texts, images): + for idx, image in enumerate(pils): + image.save(os.path.join(outpath, f'{text}_{idx}.png')) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/imagen/imagen_training.py b/examples/multimodal/generative/imagen/imagen_training.py new file mode 100644 index 000000000000..641dde746590 --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_training.py @@ -0,0 +1,119 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytorch_lightning as pl +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector +from torch._dynamo import disable, optimize +from torch._inductor import config as inductor_config + +from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path='conf', config_name='base64-500m') +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + allow_tf32 = cfg.get('allow_tf32', True) + if allow_tf32: + logging.info('Allow TensorFloat32 operations on supported devices') + else: + logging.info('Disable TensorFloat32 operations.') + torch.backends.cuda.matmul.allow_tf32 = allow_tf32 + torch.backends.cudnn.allow_tf32 = allow_tf32 + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + callbacks = [] + trainer = Trainer(plugins=plugins, strategy=strategy, callbacks=callbacks, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + # update resume from checkpoint found by exp_manager + if cfg.model.get("resume_from_checkpoint") is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + + cfg.model.precision = cfg.trainer.precision + + model = MegatronImagen(cfg.model, trainer) + + assert not cfg.model.get( + "inductor", False + ), 'Inductor is currently under investigation of its impact on convergence.' + if cfg.model.get("inductor", False): + # Temporary hack to get rid of TorchDynamo issue with DDP + # TODO: remove these if https://github.com/pytorch/pytorch/issues/94574 fixed + torch.arange = disable(torch.arange) + torch.ones = disable(torch.ones) + torch.zeros = disable(torch.zeros) + + # TODO: remove this if latest TorchDynamo fixed `t.uniform_(0, 1)` failure + torch.Tensor.uniform_ = disable(torch.Tensor.uniform_) + + # Disable TorchDynamo for unsupported function + pl.core.LightningModule.log = disable(pl.core.LightningModule.log) + + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.model.inductor_cudagraphs + model.model.unet = torch.compile(model.model.unet) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py index 84841f0e5e6e..5ce77d6f3ba7 100644 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -177,7 +177,7 @@ def __init__( if self.filterings is not None: if self.filterings.resolution is not None: - train_dataset = train_dataset.select(filter_fn(**self.filterings.resolution)) + train_dataset = train_dataset.select(filter_fn) # traindataset.to_tuple("").map_tuple(fns) train_dataset = train_dataset.map(map_fn) diff --git a/nemo/collections/multimodal/data/imagen/__init__.py b/nemo/collections/multimodal/data/imagen/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/__init__.py b/nemo/collections/multimodal/data/imagen/augmentations/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py b/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py new file mode 100644 index 000000000000..23f481bc8720 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py @@ -0,0 +1,76 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +import torch + +from nemo.utils import logging + + +def build_resolution_filter(value=None, method='larger', image_idx=0): + """ + Filter image based on its resolution. + value: filter threshold + method: Either larger or smaller + image_idx: idx of the image in the tuple input + """ + assert method == 'larger' or method == 'smaller' + if method == 'larger': + logging.info(f'Only Selecting images with resolution >= {value}') + return lambda x: x[image_idx].size[0] >= value and x[image_idx].size[1] >= value + + logging.info(f'Only Selecting images with resolution <= {value}') + return lambda x: x[image_idx].size[0] <= value and x[image_idx].size[1] <= value + + +class PickleTransform: + """ + Convert encodings stored in the pickle file to encoding and mask. + Transform the pad and resize the embedding to match the generator config. + """ + + def __init__(self, encoding_lengths: List[int], encoding_keys: List[str], out_keys: Optional[List[str]] = None): + assert len(encoding_keys) == len(encoding_lengths) + self.encoding_lengths = encoding_lengths + self.encoding_keys = encoding_keys + self.out_keys = out_keys if out_keys is not None else encoding_keys + + def _pad_and_resize(self, arr, ntokens): + # Function for padding and resizing a numpy array + + arr = torch.tensor(arr) + embed_dim = arr.shape[1] + + arr_padded = torch.zeros(ntokens, embed_dim, device=arr.device, dtype=torch.float32) + + # If the input text is larger than num_text_tokens, clip it. + if arr.shape[0] > ntokens: + arr = arr[0:ntokens] + + mask = torch.LongTensor(ntokens).zero_() + if len(arr.shape) > 1: + mask[0 : arr.shape[0]] = 1 + + if len(arr.shape) > 1: + arr_padded[0 : arr.shape[0]] = arr + + return arr_padded, mask + + def __call__(self, data): + out_dict = dict() + for token_length, encoding_key, out_key in zip(self.encoding_lengths, self.encoding_keys, self.out_keys): + embed, mask = self._pad_and_resize(data[encoding_key]['encodings'], token_length) + out_dict[f'{out_key}_embeddings'] = embed + out_dict[f'{out_key}_mask'] = mask + return out_dict diff --git a/nemo/collections/multimodal/data/imagen/augmentations/corruption.py b/nemo/collections/multimodal/data/imagen/augmentations/corruption.py new file mode 100644 index 000000000000..6c17066fd285 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/corruption.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torchvision.transforms.functional as torchvision_F + + +class ImagePyramidNoCorruptions: + r""" + Only downsample image without any additional corruption. + """ + + def __init__(self, target_resolutions): + self.resolutions = target_resolutions + + def obtain_image_pyramid(self, image): + # Downsampling + data_dict = dict() + for res in self.resolutions: + image_downsampled = torchvision_F.resize( + image, res, interpolation=torchvision_F.InterpolationMode.BICUBIC, antialias=True + ) + data_dict[f'images_{res}'] = image_downsampled + return data_dict diff --git a/nemo/collections/multimodal/data/imagen/imagen_dataset.py b/nemo/collections/multimodal/data/imagen/imagen_dataset.py new file mode 100644 index 000000000000..2a0365bfc311 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/imagen_dataset.py @@ -0,0 +1,111 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.imagen.augmentations.augmentations import ( + PickleTransform, + build_resolution_filter, +) +from nemo.collections.multimodal.data.imagen.augmentations.corruption import ImagePyramidNoCorruptions +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) +from nemo.utils import logging + + +def _build_functions_with_pickles(data_cfg, condition_cfg): + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + + # Output from pickle transform is already a dictionary + out_dict.update(input[1]) + + out_dict['raw_text'] = input[2] + yield out_dict + + def transform_fn(sample): + image, encodings, text = sample['jpg'], sample['pickle'], sample['txt'] + img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) + pickle_transform = PickleTransform( + encoding_keys=[condition_cfg.precached_key], + encoding_lengths=[condition_cfg.token_length], + out_keys=[condition_cfg.out_key], + ) + text_transform = identical_transform + return img_transform(image), pickle_transform(encodings), text_transform(text) + + return tuple_to_dict, transform_fn + + +def _build_functions_no_pickles(data_cfg): + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + out_dict['raw_text'] = input[1] + yield out_dict + + def transform_fn(sample): + image, text = sample['jpg'], sample['txt'] + img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) + text_transform = identical_transform + return img_transform(image), text_transform(text) + + return tuple_to_dict, transform_fn + + +def build_train_valid_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + condition_cfg = model_cfg.conditioning + # This function maps data that are tuples to dictionary. + if condition_cfg.get("online_encoding", False): + tuple_to_dict, transform_fn = _build_functions_no_pickles(data_cfg) + else: + tuple_to_dict, transform_fn = _build_functions_with_pickles(data_cfg, condition_cfg) + + filter_cfg = data_cfg.train.get('filterings', None) + + # For adding corruptions and obtaining image pyramid + if model_cfg.unet_type.startswith('sr'): + assert data_cfg.train.get('target_resolutions'), 'SR model requires multiple resolution for training' + logging.info(f'Resizing input images into the follow resolutions: {data_cfg.train.target_resolutions}') + corruption_gen = ImagePyramidNoCorruptions(target_resolutions=data_cfg.train.target_resolutions) + else: + corruption_gen = None + + # This function is used for obtaining image pyramid + # in SR models for Imagen, we need to use low-res image as conditioning. + def obtain_image_pyramid(inp): + for data_dict in inp: + data_pyramid = corruption_gen.obtain_image_pyramid(data_dict['images']) + data_dict.update(data_pyramid) + yield data_dict + + compose_fn = [tuple_to_dict] + if corruption_gen: + compose_fn.append(obtain_image_pyramid) + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=compose_fn, + filter_fn=build_resolution_filter(**filter_cfg.resolution, image_idx='jpg') if filter_cfg else None, + is_train=True, + ) + return train_data, None diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py index 4a62bed1ddeb..018735377c4d 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -48,12 +48,14 @@ def transform_fn(sample): text_transform = identical_transform return img_transform(image), text_transform(text) + filter_cfg = data_cfg.train.get('filterings', None) + filter_fn = build_resolution_filter(**filter_cfg.resolution) if filter_cfg else None train_data = WebDatasetCommon( dataset_cfg=data_cfg, consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, - filter_fn=build_resolution_filter, + filter_fn=filter_fn, is_train=True, ) @@ -64,7 +66,7 @@ def transform_fn(sample): consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, - filter_fn=build_resolution_filter, + filter_fn=filter_fn, is_train=False, ) diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py new file mode 100644 index 000000000000..076e5a96971c --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -0,0 +1,535 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from datetime import datetime +from typing import Any + +import numpy as np +import torch +import torch.nn.functional as F +from einops import rearrange +from omegaconf import DictConfig +from pytorch_lightning import Trainer +from tqdm import tqdm + +from nemo.collections.multimodal.data.imagen.imagen_dataset import build_train_valid_datasets +from nemo.collections.multimodal.models.imagen.precond import ContinousDDPMPrecond, EDMPrecond +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.multimodal.modules.imagen.diffusionmodules.nets import EfficientUNetModel, UNetModel +from nemo.collections.multimodal.modules.imagen.encoder.t5encoder import T5Encoder +from nemo.collections.multimodal.modules.imagen.sampler.sampler import DDPMSampler, EDMSampler +from nemo.collections.multimodal.parts.imagen.utils import random_dropout +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import Serialization +from nemo.utils import logging + +try: + from apex import amp + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +DUMMY_TENSOR = torch.tensor([1.0]) + + +class Imagen(torch.nn.Module, Serialization): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + # Make sure the initialization on different GPUs are the same + self.unet_type = cfg.get('unet_type', 'base') + self.noise_cond_aug = cfg.get('noise_cond_aug', False) + if self.unet_type == 'base': + logging.info('Initializing UNet.') + unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim) + elif self.unet_type == 'sr': + logging.info('Initializing Efficient-UNet.') + unet = EfficientUNetModel( + **cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug + ) + elif self.unet_type == 'sr-unet': + logging.info('Initializing UNet for SR model.') + unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug) + else: + raise NotImplemented(f'{self.unet_type} UNet is not implemented.') + + # Preconditioning + self.preconditioning_type = cfg.get('preconditioning_type', 'DDPM') + if self.preconditioning_type == 'DDPM': + logging.info('Preconditioned with Continous DDPM') + self.model = ContinousDDPMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) + self.sampler = DDPMSampler(unet_type=self.unet_type, denoiser=self.model.scheduler) + elif self.preconditioning_type == 'EDM': + logging.info('Preconditioned with EDM') + self.model = EDMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) + self.sampler = EDMSampler(unet_type=self.unet_type) + else: + raise NotImplemented(f'{self.preconditioning_type} preconditioning is not implemented.') + + self.rng = None + self.conditioning = cfg.conditioning + self.text_drop_rate = cfg.conditioning.drop_rate + self.model_type = None + self.image_size = cfg.unet.image_size + + def setup_rng(self): + # We need to set different rng seed for different GPUs/ different runs; + # otherwise, the noise map and time will be exactly the same. + self.rng = torch.Generator(device=torch.cuda.current_device()) + self.rng_seed = int(datetime.now().timestamp()) + self.cfg.seed + parallel_state.get_data_parallel_rank() + logging.info(f'RNG seed set as {self.rng_seed} for rank {parallel_state.get_data_parallel_rank()}') + self.rng.manual_seed(self.rng_seed) + self.model.set_rng(self.rng) + + @property + def unet(self): + return self.model.unet + + def get_text_encoder(self, encoder_path=None): + # TODO Assume using T5 for all + return T5Encoder(max_seq_len=self.conditioning.token_length, encoder_path=encoder_path) + + def forward(self, x_start, text_embed, text_mask, x_lowres=None): + if self.unet_type == 'base': + assert x_lowres[0].item() == DUMMY_TENSOR.item(), 'Base model should have no low-resolution conditioning' + x_lowres = None + else: + assert x_lowres[0].dim() not in [0, 1], 'SR model should have low-resolution conditioning' + + # Apply random dropout to text embedding + text_embed = random_dropout(text_embed, drop_rate=self.text_drop_rate) + # UNet Forward Pass + low_res_cond = {'x_low_res': x_lowres} if x_lowres is not None else {} + # UNet Forward Pass and compute loss + loss = self.model.compute_loss( + x0=x_start, + text_embed=text_embed, + text_mask=text_mask, + time=None, # Randomly Sample + noise=None, # Randomly Sample + **low_res_cond, + ) + return loss, {'train/loss': loss} + + @torch.no_grad() + def sample_image( + self, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + return self.sampler( + self.model, noise_map, text_encoding, text_mask, x_low_res, cond_scale, sampling_steps, thresholding_method + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # only required for pipeline parallelism + pass + + +class MegatronImagen(MegatronMultimodalModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + self.online_encoding = cfg.conditioning.get("online_encoding", False) + self.text_encoder_path = cfg.conditioning.get("encoder_path", None) + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = Imagen(cfg=self.cfg) + return model + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + """ Prepares the batch for megatron fwd/bwd functions. + Global batch is a list of micro batches. + """ + # Base model and SR models have slightly different batch input: + # Base model would only require images (64x64), + # while SR models (both SR256 and SR1024) require low-res image (64x64) and + # actual (cropped) image (256x256) + if self.cfg.unet_type == 'base': + x_start = batch['images'] + # Pass in DUMMY_TENSOR because megatron requires each input to be + # tensor (not None) with same batch size (first dim) + x_lowres = DUMMY_TENSOR.repeat(x_start.shape[0]) + elif self.cfg.unet_type == 'sr' or self.cfg.unet_type == 'sr-unet': + x_start = batch['images_256'] + x_lowres = batch['images_64'] + else: + raise NotImplemented(f'Unknown UNet type: {self.cfg.unet_type}') + + if self.cfg.conditioning.get("online_encoding", False): + input_text = batch["raw_text"] + # Encode the text embeddings using text encoder. + with torch.no_grad(): + text_embed, text_mask = self.text_encoder.encode(input_text) + else: + text_conditioning_key = self.cfg.conditioning.out_key + text_embed = batch[f'{text_conditioning_key}_embeddings'] + text_mask = batch[f'{text_conditioning_key}_mask'] + return [x_start, text_embed, text_mask, x_lowres] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + loss, loss_dict = model(*batch) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Imagen...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) + ) + # We do not have test dataset + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, + ) + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + tensor_shape = None # Placeholder + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=False, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def validation_step(self, dataloader_iter, batch_idx): + tensor_shape = None # Placeholder + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=True, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + + # only the last stages of the pipeline return losses + val_loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + val_loss_dict[key] = loss_tensor.mean() + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + # Setup RNG seed in model + self.model.setup_rng() + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + def on_save_checkpoint(self, checkpoint) -> None: + if self.online_encoding: + # Removing the weights relating to Text encoder when saving the checkpoints + frozen_weights_keys = [k for k in checkpoint['state_dict'].keys() if k.startswith("text_encoder")] + for k in frozen_weights_keys: + del checkpoint['state_dict'][k] + + def on_fit_start(self) -> None: + if self.online_encoding: + # if encoding text online, set up text_encoder here (after loading checkpoints) instead of in __init__. + # This is because text encoder weights are not saved, so the encoder must be loaded after other weights + # are loaded. + logging.info( + f'Setting up pretrained text encoder: {self.text_encoder_path or "download or use cached t5-11b"}' + ) + self.text_encoder = self.model.get_text_encoder(encoder_path=self.text_encoder_path).to( + torch.cuda.current_device() + ) + self.text_encoder.eval() + for param in self.text_encoder.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py new file mode 100644 index 000000000000..fb4932765b41 --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py @@ -0,0 +1,324 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from dataclasses import dataclass, field +from typing import Callable, Dict, List, Literal, Optional, Union + +import torch +from omegaconf.omegaconf import OmegaConf +from pytorch_lightning import Trainer +from torch.cuda.amp import autocast + +from nemo.collections.multimodal.models.imagen.imagen import Imagen, MegatronImagen +from nemo.collections.multimodal.parts.utils import numpy_to_pil + + +@dataclass +class ImagenCustomizedModelConfig: + base_ckpt: Optional[str] = None + base_cfg: Optional[str] = None + sr256_ckpt: Optional[str] = None + sr256_cfg: Optional[str] = None + sr1024_ckpt: Optional[str] = None + sr1024_cfg: Optional[str] = None + + +@dataclass +class ImagenSamplingConfig: + step: Optional[int] = None + cfg: Optional[float] = 1 + + +@dataclass +class ImagenPipelineConfig: + model_name: Optional[str] = None + run_ema_model: Optional[bool] = True + customized_model: Optional[ImagenCustomizedModelConfig] = None + num_images_per_promt: Optional[int] = 8 + texts: Optional[List[str]] = field(default_factory=lambda: []) + output_path: Optional[str] = 'output/imagen_inference' + record_time: Optional[bool] = False + encoder_path: Optional[str] = None + target_resolution: Optional[int] = 256 + inference_precision: Optional[str] = '32' + thresholding_method: Optional[str] = 'dynamic' + samplings: Optional[List[ImagenSamplingConfig]] = field(default_factory=lambda: list()) + part: Optional[int] = 0 + + +class ImagenPipeline(Callable): + def __init__(self, models: List[Imagen], text_encoder, cfg, device): + self.models = [model.to(device) for model in models] + self.text_encoder = text_encoder.to(device) + self.cfg = cfg + self.device = device + + def _load_model(model_ckpt: str, model_cfg: str, eval_mode: bool = True, trainer: Trainer = None): + assert model_ckpt is not None, 'model ckpt cannot be None' + if model_ckpt.endswith('.nemo'): + model_cfg = MegatronImagen.restore_from(restore_path=model_ckpt, trainer=trainer, return_config=True) + model_cfg.unet.flash_attention = False + model_cfg.micro_batch_size = 1 + model_cfg.global_batch_size = 1 + model = MegatronImagen.restore_from( + restore_path=model_ckpt, override_config_path=model_cfg, trainer=trainer, + ) + elif model_ckpt.endswith('.ckpt'): + model_cfg = OmegaConf.load(model_cfg) + model_cfg.model.unet.flash_attention = False + model_cfg.model.micro_batch_size = 1 + model_cfg.model.global_batch_size = 1 + model = MegatronImagen(cfg=model_cfg.model, trainer=trainer) + checkpoint = torch.load(model_ckpt, map_location=lambda storage, loc: storage) + model.load_state_dict(checkpoint['state_dict'], strict=True) + else: + raise Exception('Invalid ckpt type. Should be either .nemo or .ckpt with cfg') + + model = model.model # We do not need Megatron Instance for inference + model.model.set_inference_mode(True) # Used for adding the least noise for EDM inference for SR model. + if eval_mode: + model.unet.cuda().eval() + return model + + def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None): + customized_models = cfg.customized_model + models = [] + + print('Load base model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.base_ckpt, model_cfg=customized_models.base_cfg, trainer=trainer, + ) + models.append(model) + + if cfg.target_resolution >= 256: + print('Load SR256 model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.sr256_ckpt, model_cfg=customized_models.sr256_cfg, trainer=trainer + ) + models.append(model) + + if cfg.target_resolution >= 1024: + print('Load SR1024 model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.sr1024_ckpt, model_cfg=customized_models.sr1024_cfg, trainer=trainer + ) + models.append(model) + return models + + @classmethod + def from_pretrained(cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda'): + target_resolution = cfg.target_resolution + assert target_resolution in [64, 256, 1024] + + # Set encoder_path which will be used when inst the model + if cfg.encoder_path is not None: + os.environ['ENCODER_PATH'] = cfg.encoder_path + + assert cfg.model_name is None, 'No predefined model for now' + assert cfg.customized_model is not None, 'Need to provide customized models for inference' + models = ImagenPipeline._load_customized_model(cfg, trainer) + assert len(models) >= 1, 'Need to load at least one model' + if cfg.inference_precision == '16': + print('Running Inference in FP16.') + print('Converting all difussion models to FP16..') + for model in models: + model.half() + + print('Loading text encoder') + text_encoder = models[0].get_text_encoder(encoder_path=cfg.encoder_path) + if cfg.inference_precision == '16': + print('Converting text encoders to FP16..') + text_encoder.half() + return ImagenPipeline(models=models, text_encoder=text_encoder, cfg=cfg, device=device) + + @torch.no_grad() + def get_text_encodings(self, input_text, repeat=1): + # Repeat the inputs so that we generate multiple samples per query + if isinstance(input_text, str): + inp_text_batch = [input_text] + else: + inp_text_batch = input_text + # Encode the text embeddings using text encoder. + text_encodings, text_mask = self.text_encoder.encode(inp_text_batch) + if repeat != 1: + assert len(inp_text_batch) == 1, 'Repeat should only be applied if we feed single text to encoder.' + text_encodings = text_encodings.repeat(repeat, 1, 1) + text_mask = text_mask.repeat(repeat, 1) + return text_encodings, text_mask + + @torch.no_grad() + def __call__( + self, + prompts: Union[str, List[str]] = None, + inference_steps: Union[int, List[int]] = None, + classifier_free_guidance: Union[float, List[float]] = None, + num_images_per_promt: Optional[int] = 0, + thresholding_method: bool = None, + output_type: Optional[str] = 'pil', + seed: Union[int, List[int]] = 2000, + single_batch_mode: bool = False, + output_res: Optional[int] = None, + low_res_input: Optional[torch.Tensor] = None, + ): + if prompts is None: + prompts = OmegaConf.to_object(self.cfg.texts) + if num_images_per_promt == 0: + num_images_per_promt = self.cfg.num_images_per_promt + if thresholding_method is None: + thresholding_method = self.cfg.thresholding_method + device = self.device + inference_precision = self.cfg.inference_precision + assert inference_precision in ['16', '32', 'AMP'], "Inference Precision should be one of ['16', '32', 'AMP']" + print(f'Running inference in {inference_precision} mode.') + amp_enabled = inference_precision == 'AMP' + + # Based on output_res and low_res_input, determine which models to run + if output_res is not None or low_res_input is not None: + models = [] + if output_res is not None: + for model in self.models: + models.append(model) + if model.image_size == output_res: + break + else: + models = self.models + if low_res_input is not None: + print(f'Low-res input shape: {low_res_input.shape}') + low_res_dim = low_res_input.shape[-1] + num_images_per_promt = low_res_input.shape[0] + for idx, model in enumerate(models): + if model.image_size == low_res_dim: + models = models[idx + 1 :] + break + print(f'Running inference on {len(models)} models.') + else: + models = self.models + + if classifier_free_guidance is None: + cfgs = [each.cfg for each in self.cfg.samplings] + cfgs = cfgs[: len(models)] + else: + cfgs = classifier_free_guidance + if isinstance(cfgs, int): + cfgs = [cfgs] * len(models) + + if inference_steps is None: + steps = [each.step for each in self.cfg.samplings] + steps = steps[: len(models)] + else: + steps = inference_steps + if isinstance(steps, int): + steps = [steps] * len(models) + + assert len(steps) == len(cfgs) == len(models) + + output = [] + all_res_output = [[] for _ in range(len(models))] + if single_batch_mode: + num_images_per_promt = len(prompts) + + throughputs = {'text-encoding': []} + for idx in range(len(models)): + throughputs[f'stage-{idx+1}'] = [] + for prompt in prompts: + if single_batch_mode: + text_input = prompts + else: + text_input = prompt.strip('\n') + print('Input caption: {}'.format(text_input)) + tic = time.perf_counter() + text_encondings, text_mask = self.get_text_encodings( + text_input, repeat=num_images_per_promt if not single_batch_mode else 1 + ) + throughputs['text-encoding'].append(time.perf_counter() - tic) + + # Set seed + noise_maps = [] + if isinstance(seed, int): + # Single seed for the batch + torch.random.manual_seed(seed) + # Generate noise maps + for model in models: + noise_map = torch.randn( + (num_images_per_promt, 3, model.unet.image_size, model.unet.image_size), device=device + ) + noise_map = noise_map.half() if inference_precision == '16' else noise_map + noise_maps.append(noise_map) + elif isinstance(seed, list): + assert len(seed) == num_images_per_promt + for model in models: + noise_map_batch = [] + for single_seed in seed: + torch.random.manual_seed(single_seed) + noise_map_single = torch.randn( + (1, 3, model.unet.image_size, model.unet.image_size), device=device + ) + noise_map_batch.append(noise_map_single) + noise_map_batch = torch.cat(noise_map_batch, dim=0) + noise_map_batch = noise_map_batch.half() if inference_precision == '16' else noise_map_batch + noise_maps.append(noise_map_batch) + else: + raise RuntimeError('Seed type incorrect.') + + x_low_res = low_res_input + all_res = [] + for idx, (model, noise_map, cfg, step) in enumerate(zip(models, noise_maps, cfgs, steps)): + tic = time.perf_counter() + with autocast(enabled=amp_enabled): + generated_images = model.sample_image( + noise_map=noise_map, + text_encoding=text_encondings, + text_mask=text_mask, + x_low_res=x_low_res, + cond_scale=cfg, + sampling_steps=step, + thresholding_method=thresholding_method, + ) + x_low_res = generated_images + all_res.append(generated_images) + throughputs[f'stage-{idx+1}'].append(time.perf_counter() - tic) + # recenter from [-1, 1] to [0, 1] + assert generated_images is not None + generated_images = ((generated_images + 1) / 2).clamp_(0, 1) + all_res = [((each + 1) / 2).clamp_(0, 1) for each in all_res] + output.append(generated_images) + for idx, each in enumerate(all_res): + all_res_output[idx].append(each) + if single_batch_mode: + break + + if output_type == 'torch': + return torch.cat(output, dim=0), [torch.cat(each, dim=0) for each in all_res_output] + output_new = [] + for x_samples_image in output: + # Convert to numpy + x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == 'pil': + x_samples_image = numpy_to_pil(x_samples_image) + output_new.append(x_samples_image) + + all_res_output_new = [[] for each in range(len(models))] + for idx, res_output in enumerate(all_res_output): + for x_samples_image in res_output: + # Convert to numpy + x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == 'pil': + x_samples_image = numpy_to_pil(x_samples_image) + all_res_output_new[idx].append(x_samples_image) + + for item in throughputs: + throughputs[item] = sum(throughputs[item]) / len(throughputs[item]) + + return output_new, all_res_output_new, throughputs diff --git a/nemo/collections/multimodal/models/imagen/precond.py b/nemo/collections/multimodal/models/imagen/precond.py new file mode 100644 index 000000000000..77717ac8db72 --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/precond.py @@ -0,0 +1,191 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_mul +from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes +from nemo.collections.multimodal.parts.utils import randn_like + + +class PrecondModel(torch.nn.Module): + def __init__(self, unet, loss_type): + super().__init__() + self.unet = unet + self.rng = None + self.inference = False + if loss_type == 'l1': + self.loss_fn = F.l1_loss + elif loss_type == 'l2': + self.loss_fn = F.mse_loss + elif loss_type == 'huber': + self.loss_fn = F.smooth_l1_loss + else: + raise NotImplementedError(f'{loss_type} loss is not supported') + + def set_inference_mode(self, value): + self.inference = value + + def forward(self, **model_kwargs): + return self.unet(**model_kwargs) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + def set_rng(self, generator): + self.rng = generator + + +class ContinousDDPMPrecond(PrecondModel): + def __init__( + self, + unet, + loss_type='l2', + pred_objective='noise', + noise_schedule='cosine', + timesteps=1000, + noise_cond_aug=False, + ): + super().__init__(unet, loss_type) + self.scheduler = GaussianDiffusionContinuousTimes(noise_schedule=noise_schedule, timesteps=timesteps) + self.pred_objective = pred_objective + assert noise_cond_aug == False, 'noise cond aug currently not supported for DDPM' + + def sample_time(self, batch_size, device=None): + return self.scheduler.sample_random_times(batch_size=batch_size, device=device) + + def get_xt(self, x0, t=None, epsilon=None): + if epsilon is None: + epsilon = randn_like(x0, generator=self.rng) + if t is None: + t = self.sample_time(batch_size=x0.shape[0], device=x0.device) + x_noisy, log_snr, alpha, sigma = self.scheduler.q_sample(x_start=x0, t=t, noise=epsilon,) + return x_noisy, t, epsilon + + def forward(self, x, time, text_embed, text_mask, **model_kwargs): + # Convert time to FP32 for calculating time embedding due to FP16 overflow + time = time.float() + time = self.scheduler.get_condition(time) + time = time.type_as(x) + + return self.unet(x=x, time=time, text_embed=text_embed, text_mask=text_mask, **model_kwargs) + + def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): + x_noisy, time, noise = self.get_xt(x0=x0, t=time, epsilon=noise) + pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) + # Determine target + if self.pred_objective == 'noise': + target = noise + elif self.pred_objective == 'x_start': + target = x0 + else: + raise ValueError(f'unknown objective {self.pred_objective}') + return self.loss_fn(pred, target) + + def set_rng(self, generator): + self.scheduler.rng = generator + self.rng = generator + + +class EDMPrecond(PrecondModel): + def __init__( + self, + unet, # Underlying model. + loss_type='l2', + sigma_data=0.5, # Expected standard deviation of the training data. + p_mean=-1.2, + p_std=1.2, + noise_cond_aug=False, + ): + super().__init__(unet, loss_type) + self.sigma_data = sigma_data + self.p_mean = p_mean + self.p_std = p_std + self.noise_cond_aug = noise_cond_aug + + def forward(self, x, time, text_embed, text_mask, **model_kwargs): + bs = x.shape[0] + assert time.ndim <= 1, 'time should be in shape of either [bs] or scalar' + sigma = time + c_skip = self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) + c_out = sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2).sqrt() + c_in = 1 / (self.sigma_data ** 2 + sigma ** 2).sqrt() + c_noise = sigma.log() / 4 + + if c_noise.ndim < 1: + c_noise = c_noise.repeat(bs,) + + def save_image_tensor(tensor, dir=None): + if tensor.ndim == 4: + print(f'Saving {tensor.shape[0]} images') + elif tensor.ndim == 3: + print('Saving single image') + tensor = tensor.unsqueeze(0) + import os + + import torchvision.transforms as T + + os.makedirs(dir, exist_ok=True) + transform = T.ToPILImage() + for idx, image_tensor in enumerate(tensor): + image_tensor = ((image_tensor + 1) / 2).clamp_(0, 1) + img = transform(image_tensor) + img.save(f'{dir}/{idx}.png') + + if self.noise_cond_aug: + # Applying noise conditioning augmentation + assert 'x_low_res' in model_kwargs, 'x_low_res does not exist when attemping to apply noise augmentation' + x_low_res = model_kwargs['x_low_res'] + if self.inference: + batch_size = x_low_res.shape[0] + time_low_res = torch.ones(batch_size, device=x_low_res.device) * 0.002 + x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=time_low_res, epsilon=None) + else: + x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=None, epsilon=None) + c_in_noise = 1 / (self.sigma_data ** 2 + time_low_res ** 2).sqrt() + c_noise_noise = time_low_res.log() / 4 + model_kwargs['x_low_res'] = batch_mul(c_in_noise, x_low_res_noisy) + model_kwargs['time_low_res'] = c_noise_noise + + F_x = self.unet(batch_mul(c_in, x), c_noise, text_embed, text_mask, **model_kwargs) + D_x = batch_mul(c_skip, x) + batch_mul(c_out, F_x) + return D_x + + def sample_time(self, batch_size, device=None): + return (torch.randn(batch_size, device=device, generator=self.rng) * self.p_std + self.p_mean).exp() + + def get_xt(self, x0, t=None, epsilon=None): + if epsilon is None: + epsilon = randn_like(x0, generator=self.rng) + assert epsilon.shape == x0.shape + if t is None: + t = self.sample_time(batch_size=x0.shape[0], device=x0.device) + sigma = t + noise = batch_mul(epsilon, sigma) + return x0 + noise, sigma + + def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): + x_noisy, time = self.get_xt(x0=x0, t=None, epsilon=noise) + pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) + sigma = time + weight = ((sigma ** 2 + self.sigma_data ** 2) / (sigma * self.sigma_data) ** 2).sqrt() + target = x0 + return self.loss_fn(batch_mul(weight, target), batch_mul(weight, pred),) + + def set_rng(self, generator): + self.rng = generator diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py new file mode 100644 index 000000000000..de301e0bc038 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py @@ -0,0 +1,317 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math + +import numpy as np +import torch +import torch.nn as nn +from torch.cuda.amp import custom_bwd, custom_fwd + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += torch.DoubleTensor([matmul_ops]) + + +# Stable attention +class StableAttentionOp(torch.autograd.Function): + # This function defines the attention weight computation in a stable way + # The idea is to scale the gradients of weight matrix by the maximum absolute value. + # In case of overflow, this will prevent weight gradients from exploding. + # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. + + @staticmethod + def forward(ctx, q, k): + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) + ctx.save_for_backward(q, k, w) + return w + + @staticmethod + def backward(ctx, dw): + q, k, w = ctx.saved_tensors + + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + + # Due to softmax, w is fp32, making db fp32. + # Type casting is required for amp to work. + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) + s = s / math.sqrt(k.shape[1]) + + dq = torch.einsum('nck,nqk->ncq', k, db) * s + dk = torch.einsum('ncq,nqk->nck', q, db) * s + + return dq, dk + + +class QKVStableAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + + # Reshaping q and k + # try: + # q = q.view(bs * self.n_heads, ch, length) + # k = k.view(bs * self.n_heads, ch, length) + # except Exception: + q = q.reshape(bs * self.n_heads, ch, length) + k = k.reshape(bs * self.n_heads, ch, length) + + weight = StableAttentionOp.apply(q, k) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class StableMaskedAttentionOp(torch.autograd.Function): + # Robust attention operation in case of masked attention + @staticmethod + @custom_fwd + def forward(ctx, q, k, mask): + max_neg_value = -float('inf') + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) + w = w.masked_fill(mask, max_neg_value) + w = w.softmax(dim=2) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a stable number. + w = w.nan_to_num_() + ctx.save_for_backward(q, k, w, mask) + return w + + @staticmethod + @custom_bwd + def backward(ctx, dw): + q, k, w, mask = ctx.saved_tensors + max_neg_value = -torch.finfo(q.dtype).max + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) + + # Masking db + db_in = db.clone().masked_fill_(mask, 0) + + s = s / math.sqrt(k.shape[1]) + dq = torch.einsum('nck,nqk->ncq', k, db_in) * s + dk = torch.einsum('ncq,nqk->nck', q, db_in) * s + + # These are dummy derivatives since mask is a constant + dmask = (max_neg_value - w) * db.clone() * s + + return dq, dk, dmask + + +class QKVMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length_q), + (k * scale).view(bs * self.n_heads, ch, length_k), + ) # More stable with f16 than dividing afterwards + + # Duplicate mask n_heads times + mask = mask.repeat_interleave(self.n_heads, dim=0) + assert mask.shape == weight.shape + max_neg_value = -float('inf') + weight = weight.masked_fill(~mask, max_neg_value) + + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a non-nan number. + weight = weight.nan_to_num_() + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVStableMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + q = q.view(bs * self.n_heads, ch, length_q) + k = k.view(bs * self.n_heads, ch, length_k) + + # Forming attention mask + mask = mask.repeat_interleave(self.n_heads, dim=0) + + weight = StableMaskedAttentionOp.apply(q, k, ~mask) + + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class SelfAttentionPooling(nn.Module): + """ + Implementation of SelfAttentionPooling + Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition + https://arxiv.org/pdf/2008.01077v1.pdf + Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 + """ + + def __init__(self, input_dim): + super(SelfAttentionPooling, self).__init__() + self.W = nn.Linear(input_dim, 1) + + def forward(self, batch_rep): + """ + input: + batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension + + attention_weight: + att_w : size (N, T, 1) + + return: + utter_rep: size (N, H) + """ + softmax = nn.functional.softmax + att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) + utter_rep = torch.sum(batch_rep * att_w, dim=1) + + return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py new file mode 100644 index 000000000000..2c57cfee8628 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py @@ -0,0 +1,897 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math +from abc import abstractmethod + +import torch as th +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from einops import rearrange + +from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import ( + QKVAttention, + QKVMaskedAttention, + QKVStableAttention, + QKVStableMaskedAttention, +) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import ( + Downsample, + Upsample, + UpsampleLearnable, + conv_nd, + linear, + normalization, + zero_module, +) + + +def check_cuda(): + if not th.cuda.is_available(): + raise RuntimeError('CUDA is not available') + cur_device = th.cuda.current_device() + dprops = th.cuda.get_device_properties(cur_device) + + is_sm75 = dprops.major == 7 and dprops.minor == 5 + is_sm8x = dprops.major == 8 and dprops.minor >= 0 + is_sm90 = dprops.major == 9 and dprops.minor >= 0 + + return is_sm8x or is_sm75 or is_sm90 + + +try: + from flash_attn.flash_attention import FlashAttention + from flash_attn.flash_attn_interface import flash_attn_unpadded_func, flash_attn_unpadded_kvpacked_func + + flash_attn_installed = check_cuda() +except ImportError: + flash_attn_installed = False + + +class TextConditionedBlock(nn.Module): + r""" + Any module where forward() takes text embeddings as arguments. + """ + + @abstractmethod + def forward(self, x, text_emb, text_mask): + """ + Apply the module to `x` given `text_emb` text embedding and 'text_mask' text valid mask. + """ + + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class ConditionalSequential(nn.Sequential, TimestepBlock, TextConditionedBlock): + r""" + A sequential module that accepts timestep embeddings, text embedding and text mask in addition to the input x. + Depending on the type of block, we either pass timestep embedding or text embeddings as inputs. + """ + + def forward(self, x, emb, text_emb, text_mask): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, TextConditionedBlock): + x = layer(x, text_emb, text_mask) + else: + x = layer(x) + return x + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + learnable_upsampling=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + if learnable_upsampling: + upsample_fn = UpsampleLearnable + else: + upsample_fn = Upsample + + if up: + self.h_upd = upsample_fn(channels, False, dims) + self.x_upd = upsample_fn(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + nn.SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, emb) + else: + return self._forward(x, emb) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class EfficientResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + Follow Figure A.27 in Imagen Paper. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__() + + out_channels = out_channels or channels + + self.use_scale_shift_norm = use_scale_shift_norm + self.use_checkpoint = use_checkpoint + + self.in_layers = nn.Sequential( + normalization(channels), nn.SiLU(), conv_nd(dims, channels, out_channels, 3, padding=1) + ) + + self.emb_layers = nn.Sequential( + nn.SiLU(), nn.Linear(emb_channels, 2 * out_channels if use_scale_shift_norm else out_channels,), + ) + + self.out_layers = nn.Sequential( + normalization(out_channels), + nn.SiLU(), + zero_module(conv_nd(dims, out_channels, out_channels, 3, padding=1)), + ) + + self.shortcut = conv_nd(dims, channels, out_channels, 1) + self.shortcut_scale = 1 / math.sqrt(2) if skip_connection_scaling else 1 + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, emb) + else: + return self._forward(x, emb) + + def _forward(self, x, emb): + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + + return h + self.shortcut(x) * self.shortcut_scale + + +class Block(nn.Module): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__() + + out_channels = out_channels or channels + + self.attention_type = attention_type + self.text_embed_dim = text_embed_dim + + blocks = [ + EfficientResBlock( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + ] + + blocks += [ + EfficientResBlock( + out_channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + for _ in range(num_resblocks - 1) + ] + + self.blocks = nn.ModuleList(blocks) + + # Attention blocks + # Self - Self-attention blocks + # fused - Single attention layer for fusing self and cross attention. + if self.attention_type is not None: + assert self.attention_type in ('self', 'cross', 'fused', 'stacked') + attention_kwargs = dict() + + if self.attention_type == 'self': + attention_fn = SelfAttentionBlock + elif self.attention_type == 'cross': + attention_fn = CrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + elif self.attention_type == 'stacked': + attention_fn = StackedCrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + else: + attention_fn = FusedCrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + + self.attention_layer = attention_fn( + out_channels, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + **attention_kwargs, + ) + + @abstractmethod + def forward(self, x, emb, text_embed=None, text_mask=None): + pass + + +class DBlock(Block): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + conv_down=True, + stride=2, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + num_resblocks=num_resblocks, + attention_type=attention_type, + text_embed_dim=text_embed_dim, + stable_attention=stable_attention, + flash_attention=flash_attention, + num_head_channels=num_head_channels, + num_heads=num_heads, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + + self.conv_down = conv_down + if self.conv_down: + # self.conv = nn.Conv2d(channels, channels, 3, stride=stride, padding=1) + self.conv = nn.Conv2d(channels, channels, 4, stride=stride, padding=1) + + def forward(self, x, emb, text_embed=None, text_mask=None): + if self.conv_down: + x = self.conv(x) + + for block in self.blocks: + x = block(x, emb) + + if self.attention_type in ('cross', 'fused', 'stacked'): + x = self.attention_layer(x, text_embed, text_mask) + elif self.attention_type == 'self': + x = self.attention_layer(x) + + return x + + +class UBlock(Block): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + conv_up=True, + stride=2, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + num_resblocks=num_resblocks, + attention_type=attention_type, + text_embed_dim=text_embed_dim, + stable_attention=stable_attention, + flash_attention=flash_attention, + num_head_channels=num_head_channels, + num_heads=num_heads, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + + self.conv_up = conv_up + if self.conv_up: + self.conv = nn.ConvTranspose2d(out_channels, out_channels, 4, stride, 1) + + def forward(self, x, emb, text_embed=None, text_mask=None): + for block in self.blocks: + x = block(x, emb) + + if self.attention_type in ('cross', 'fused', 'stacked'): + x = self.attention_layer(x, text_embed, text_mask) + elif self.attention_type == 'self': + x = self.attention_layer(x) + + if self.conv_up: + x = self.conv(x) + + return x + + +class FusedCrossAttentionBlock(TextConditionedBlock): + """ + An attention block that fuses self-attention and cross-attention + in a single block. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.flash_attention = flash_attention + self.norm = normalization(channels) + self.norm_context = normalization(context_dim) + self.norm_self = normalization(channels) + + # For image features + self.q = conv_nd(1, channels, channels, 1) + + # For context + self.kv_context = conv_nd(1, context_dim, channels * 2, 1) + + # For spatial + self.kv_self = conv_nd(1, channels, channels * 2, 1) + + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + + elif stable_attention: + self.attention = QKVStableMaskedAttention(self.num_heads) + else: + self.attention = QKVMaskedAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + q = self.q(self.norm(x)) + + # Key-value pairs for self-attention + kv_self = self.kv_self(self.norm_self(x)) + k_self, v_self = kv_self.chunk(2, dim=1) + k_self = k_self.contiguous() + v_self = v_self.contiguous() + + # Key-value pairs for cross-attention + context = th.permute(context, (0, 2, 1)) + context_n = self.norm_context(context) + kv_context = self.kv_context(context_n) + k_context, v_context = kv_context.chunk(2, dim=1) + k_context = k_context.contiguous() + v_context = v_context.contiguous() + + # Appending key-value pairs + k_full = th.cat([k_self, k_context], dim=2) + v_full = th.cat([v_self, v_context], dim=2) + + if self.flash_attention: + # q: b (h d) s, k_context: b (h d) s + batch_size = q.shape[0] + max_seqlen_q, max_seqlen_k = q.shape[2], q.shape[2] + k_context.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + + mask_self = th.ones((batch_size, max_seqlen_q), device=q.device, dtype=th.bool) + mask_context = mask.bool() + mask_full = th.cat([mask_self, mask_context], dim=1) + + k_full_unpadded = k_full.transpose(1, 2)[mask_full] + total_k = k_full_unpadded.shape[0] + k_full_unpadded = k_full_unpadded.view(total_k, self.num_heads, -1) + + v_full_unpadded = v_full.transpose(1, 2)[mask_full] + v_full_unpadded = v_full_unpadded.view(total_k, self.num_heads, -1) + + # (b s) t h d + kv_full_unpadded = th.stack([k_full_unpadded, v_full_unpadded], dim=1) + + cu_seqlens_q = th.arange( + 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device + ) + cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=k_full.device) + cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) + cu_seqlens_k += cu_seqlens_q + + out = flash_attn_unpadded_kvpacked_func( + q, kv_full_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 + ) + h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) + else: + # Computing mask for self attention + mask_self = th.ones(k_self.shape[0], q.shape[2], k_self.shape[2], device=mask.device) + + # Mask for cross attention + mask_context = mask.view(mask.shape[0], 1, mask.shape[1]) + mask_context = mask_context.repeat(1, q.shape[2], 1) + + # Fused mask + mask_full = th.cat([mask_self, mask_context], dim=2) + mask_full = mask_full.to(th.bool) + + h, _ = self.attention(q, k_full, v_full, mask_full) + + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +class SelfAttentionBlock(nn.Module): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=False, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + self.flash_attention = flash_attention + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + elif stable_attention: + self.attention = QKVStableAttention(self.num_heads) + else: + self.attention = QKVAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x) + else: + return self._forward(x) + + def _forward(self, x): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + qkv = self.qkv(self.norm(x)) + + if self.flash_attention: + # qkv shape: (b, (3 h d) s), need to reshape to (b, s, h, d) for each q, k, v + b, _, _ = qkv.shape + h = self.num_heads + q, k, v = qkv.chunk(3, dim=1) + max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + k = rearrange(k, 'b (h d) s -> (b s) h d', h=self.num_heads) + v = rearrange(v, 'b (h d) s -> (b s) h d', h=self.num_heads) + cu_seqlens_q = th.arange(0, (b + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device) + cu_seqlens_k = th.arange(0, (b + 1) * max_seqlen_k, step=max_seqlen_k, dtype=th.int32, device=k.device) + h = flash_attn_unpadded_func(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0) + h = rearrange(h, '(b s) h d -> b (h d) s', b=b, h=self.num_heads) + else: + h, _ = self.attention(qkv) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +######################################################################### +# These are the attention blocks as implemented by Stable Diffusion +# https://github.com/CompVis/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/ldm/modules/attention.py#L196 + + +class CrossAttentionBlock(TextConditionedBlock): + """ + An attention block that allows spatial positions to attend to context. + In our case, context is the token-wise text embeddings. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.norm_context = normalization(context_dim) + self.flash_attention = flash_attention + # For image features + self.q = conv_nd(1, channels, channels, 1) + + # For context + self.kv = conv_nd(1, context_dim, channels * 2, 1) + + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + elif stable_attention: + self.attention = QKVStableMaskedAttention(self.num_heads) + else: + self.attention = QKVMaskedAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + q = self.q(self.norm(x)) + context = th.permute(context, (0, 2, 1)) + context_n = self.norm_context(context) + kv = self.kv(context_n) + k, v = kv.chunk(2, dim=1) + k = k.contiguous() + v = v.contiguous() + + if self.flash_attention: + batch_size = q.shape[0] + max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + mask = mask.to(th.bool) + k_unpadded = k.transpose(1, 2)[mask] + total_k = k_unpadded.shape[0] + k_unpadded = k_unpadded.view(total_k, self.num_heads, -1) + v_unpadded = v.transpose(1, 2)[mask] + v_unpadded = v_unpadded.view(total_k, self.num_heads, -1) + kv_unpadded = th.stack([k_unpadded, v_unpadded], dim=1) + cu_seqlens_q = th.arange( + 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device + ) + cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=q.device) + cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) + + out = flash_attn_unpadded_kvpacked_func( + q, kv_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 + ) + h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) + else: + # Computing mask for cross attention + mask = mask.view(mask.shape[0], 1, mask.shape[1]) + mask = mask.repeat(1, q.shape[-1], 1) + mask = mask.to(th.bool) + + h, _ = self.attention(q, k, v, mask) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.norm = normalization(dim) + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim)) + + def forward(self, x): + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + h = self.norm(x) + + # Reshape so that the channel dim moves to last + # Linear function operates on the last dimension + h = th.permute(h, (0, 2, 1)) + + h = self.net(h) + + # Permute it back + h = th.permute(h, (0, 2, 1)) + + return (x + h).reshape(b, c, *spatial) + + +class StackedCrossAttentionBlock(TextConditionedBlock): + """ + An attention block that stacks self-attention and cross-attention layers + in a single block. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.proj_in = conv_nd(2, channels, channels, 1) + self.norm = normalization(channels) + self.use_checkpoint = use_checkpoint + + self.self_attention_block = SelfAttentionBlock( + channels=channels, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + ) + + self.cross_attention_block = CrossAttentionBlock( + channels=channels, + context_dim=context_dim, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + ) + + self.ff = FeedForward(dim=channels, glu=True) + self.proj_out = zero_module(conv_nd(2, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + h = self.norm(x) + h = self.proj_in(h) + + h = self.self_attention_block(h) + h = self.cross_attention_block(h, context, mask) + h = self.ff(h) + + h = self.proj_out(h) + return h + x diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py new file mode 100644 index 000000000000..6d5f50023166 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py @@ -0,0 +1,69 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import torch +import torch.nn as nn +from einops import rearrange + + +class LearnedSinusoidalPosEmb(nn.Module): + """ following @crowsonkb 's lead with learned sinusoidal pos emb """ + + """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ + + def __init__(self, dim): + super().__init__() + assert (dim % 2) == 0 + half_dim = dim // 2 + self.weights = nn.Parameter(torch.randn(half_dim)) + + def forward(self, x): + x = rearrange(x, 'b -> b 1') + freqs = x * rearrange(self.weights, 'd -> 1 d') * 2 * math.pi + fouriered = torch.cat((freqs.sin(), freqs.cos()), dim=-1) + fouriered = torch.cat((x, fouriered), dim=-1) + return fouriered + + +class UnLearnedSinusoidalPosEmb(nn.Module): + def __init__(self, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + super().__init__() + self.dim = dim + self.max_period = max_period + print(f'Unlearned Timestep Embedding Schedule: dim={dim}, max_period={max_period}') + + def forward(self, timesteps): + dim = self.dim + half = dim // 2 + max_period = self.max_period + dtype = timesteps.dtype + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to( + device=timesteps.device + ) + args = timesteps[:, None].float() * freqs[None] + freqs = freqs.to(dtype=dtype) + args = args.to(dtype=dtype) + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py new file mode 100644 index 000000000000..3d9cac44e820 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py @@ -0,0 +1,240 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright (c) 2021 OpenAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Brought from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py + +Various utilities for neural networks. +""" + +import math + +import torch as th +import torch.nn as nn +import torch.nn.functional as F + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return nn.GroupNorm(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000, dtype=th.float32): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = th.exp(-math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half).to( + device=timesteps.device + ) + args = timesteps[:, None].float() * freqs[None] + freqs = freqs.to(dtype=dtype) + args = args.to(dtype=dtype) + embedding = th.cat([th.cos(args), th.sin(args)], dim=-1) + if dim % 2: + embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + +# Native ADM nearest neighbor upsampling +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class UpsampleLearnable(nn.Module): + """ + Upsampling based on ConvTranspose2d. This is needed for bfloat support. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + + if self.dims == 2: + self.conv = nn.ConvTranspose2d(self.channels, self.out_channels, 4, 2, 1) + elif self.dims == 3: + self.conv = nn.ConvTranspose3d( + self.channels, self.out_channels, kernel_size=(1, 4, 4), stride=(1, 2, 2), padding=(0, 1, 1) + ) + else: + raise ValueError('Upsampling support only for 2D and 3D') + + def forward(self, x): + assert x.shape[1] == self.channels + x = self.conv(x) + return x + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py new file mode 100644 index 000000000000..e8e52e7e9ddd --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py @@ -0,0 +1,624 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import SelfAttentionPooling +from nemo.collections.multimodal.modules.imagen.diffusionmodules.blocks import ( + ConditionalSequential, + DBlock, + FusedCrossAttentionBlock, + ResBlock, + StackedCrossAttentionBlock, + UBlock, +) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.embs import ( + LearnedSinusoidalPosEmb, + UnLearnedSinusoidalPosEmb, +) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import Downsample +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import UpsampleLearnable as Upsample +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import linear, normalization, zero_module + + +class UNetModel(nn.Module): + def __init__( + self, + embed_dim, # Dimension of embeddings. Also used to calculate the number of channels in ResBlock + image_size, # Input image size. Used to calculate where to inject attention layers in UNet + channels=3, # Input channel number + text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values + num_res_blocks=3, # Number of ResBlock in each level of UNet + channel_mult=[1, 2, 3, 4], # Used with embed_dim to calculate the number of channels for each level of UNet + num_attn_heads=4, # The number of heads in the attention layer + per_head_channels=64, # The number of channels per attention head + cond_dim=512, # Dimension of Conditioning projections + attention_type='fused', # Type of attention layer + feature_pooling_type='attention', # Type of pooling + learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. + attention_resolutions=[8, 16, 32], # List of resolutions to inject attention layers + dropout=False, # The rate of dropout + use_null_token=False, # Whether to create a learned null token for attention + init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 + gradient_checkpointing=False, # Whether to use gradient checkpointing + scale_shift_norm=True, # Whether to use scale shift norm + stable_attention=True, # Whether to use numerically-stable attention calculation + flash_attention=False, # Whether to use flash attention calculation + resblock_updown=False, # Whether to use ResBlock or Downsample/Upsample + resample_with_conv=True, # When resblock_updown=False, whether to use conv in addition to Pooling&ConvTranspose + low_res_cond=False, + noise_cond_aug=False, + ): + super().__init__() + + # Attention Class + if attention_type == 'stacked': + attention_fn = StackedCrossAttentionBlock + elif attention_type == 'fused': + attention_fn = FusedCrossAttentionBlock + else: + raise ValueError('Attention {} not defined'.format(attention_type)) + + # Time embedding for log(snr) noise from continous version + time_embed_dim = embed_dim * 4 + assert learned_sinu_pos_emb_dim >= 0 + if learned_sinu_pos_emb_dim > 0: + sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 + self.time_embed = nn.Sequential( + sinu_pos_emb, + nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + else: + # Unlearned Time Embedding + sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + self.time_embed = nn.Sequential( + sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) + ) + + # Pooling + assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' + self.feature_pooling_type = feature_pooling_type + if feature_pooling_type == 'attention': + self.attention_pooling = nn.Sequential( + SelfAttentionPooling(input_dim=text_embed_dim), + nn.LayerNorm(text_embed_dim), + nn.Linear(text_embed_dim, cond_dim), + ) + + # Context Projections + self.text_to_cond = linear(text_embed_dim, cond_dim) + self.to_text_non_attn_cond = nn.Sequential( + nn.LayerNorm(cond_dim), + nn.Linear(cond_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + + # Register for Null Token + if use_null_token: + self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) + self.use_null_token = use_null_token + + # Converting attention resolutions to downsampling factor + attention_ds = [] + attention_resolutions = sorted(attention_resolutions) + self.image_size = image_size + for res in attention_resolutions: + attention_ds.append(image_size // int(res)) + + self.low_res_cond = low_res_cond + # Low res noise conditioning augmentation + self.noise_cond_aug = noise_cond_aug + if self.noise_cond_aug: + assert ( + self.low_res_cond + ), 'noise conditioning augmentation should only be enabled when training with low-res cond' + if learned_sinu_pos_emb_dim > 0: + lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 + else: + lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + lowres_sinu_pos_emb_dim = embed_dim + self.lowres_time_embed = nn.Sequential( + lowres_sinu_pos_emb, + nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + + # Initial Convolution + in_channels = 2 * channels if low_res_cond else channels + init_dim = embed_dim * channel_mult[0] + self.init_conv = ConditionalSequential( + nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) + ) + + if isinstance(num_res_blocks, int): + res_blocks_list = [num_res_blocks] * len(channel_mult) + else: + res_blocks_list = num_res_blocks + # UNet Init + # Downsampling Layers + # We use Conv2D for UNet + CONV_DIM = 2 + ch = init_dim + ds = 1 + self.input_blocks = nn.ModuleList([self.init_conv]) + num_input_block_channels = [ch] + for level, mult in enumerate(channel_mult): + num_res_blocks = res_blocks_list[level] + for _ in range(num_res_blocks): + out_channels = mult * embed_dim + layers = [ + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ) + ] + ch = out_channels + if ds in attention_ds: + layers.append( + attention_fn( + channels=ch, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ) + ) + self.input_blocks.append(ConditionalSequential(*layers)) + num_input_block_channels.append(ch) + is_last_level = level == len(channel_mult) - 1 + if not is_last_level: + # DownSampling + self.input_blocks.append( + ConditionalSequential( + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=ch, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + down=True, + learnable_upsampling=True, + ) + if resblock_updown + else Downsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch,) + ) + ) + num_input_block_channels.append(ch) + ds *= 2 + + # Middle Layers + self.middle_block = ConditionalSequential( + # Mid Block 1 + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ), + # Attention Layer + attention_fn( + channels=ch, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ), + # Mid Block 2 + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ), + ) + + # Upsampling Layers + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + num_res_blocks = res_blocks_list[level] + for i in range(num_res_blocks + 1): + ich = num_input_block_channels.pop() + out_channels = embed_dim * mult + layers = [ + ResBlock( + channels=ch + ich, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ) + ] + ch = out_channels + + if ds in attention_ds: + layers.append( + attention_fn( + channels=ch, + num_heads=-1, # TODO + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ) + ) + is_last_block = i == num_res_blocks + if level and is_last_block: + layers.append( + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=ch, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + up=True, + learnable_upsampling=True, + ) + if resblock_updown + else Upsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch) + ) + ds //= 2 + self.output_blocks.append(ConditionalSequential(*layers)) + + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + zero_module(nn.Conv2d(init_dim, channels, init_conv_kernel_size, padding=init_conv_kernel_size // 2)), + ) + + def forward( + self, x, time, text_embed=None, text_mask=None, x_low_res=None, time_low_res=None, + ): + if self.low_res_cond: + assert x_low_res is not None, 'x_low_res cannot be None' + else: + assert x_low_res is None, 'x_low_res cannot be presented' + if self.noise_cond_aug: + assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' + else: + assert time_low_res is None, 'time_low_res cannot be presented' + # Concatenating low resolution images + if x_low_res is not None: + if x_low_res.shape != x.shape: + # Upscale if not done in the trainer + _, _, new_height, new_width = x.shape + x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") + x = torch.cat([x, x_low_res], dim=1) + batch_size, device = x.shape[0], x.device + # Time Conditioning + t = self.time_embed(time) + # Add lowres time conditioning + if self.noise_cond_aug: + lowres_t = self.lowres_time_embed(time_low_res) + t += lowres_t + # Text Conditioning + text_cond = self.text_to_cond(text_embed) + + # Context Embedding + # TODO We may want to concat time token here + if self.use_null_token: + # Null Context (Helpful when text_embed is drop) + null_context = self.null_text_embedding.repeat(batch_size, 1, 1) + context_emb = torch.cat([text_cond, null_context], dim=1) + context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) + else: + context_emb = text_cond + context_mask = text_mask + + # Add pooled text embeddings to the diffusion timestep + # TODO We may only want to calculated the pooled feature based on text token length + if self.feature_pooling_type == 'mean': + pooled_text_cond = text_cond.mean(dim=-2) + elif self.feature_pooling_type == 'attention': + pooled_text_cond = self.attention_pooling(text_embed) + text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) + t += text_hiddens + + h = x + hs = [] + # UNet Forward + for module in self.input_blocks: + h = module(h, t, context_emb, context_mask) + hs.append(h) + h = self.middle_block(h, t, context_emb, context_mask) + for module in self.output_blocks: + h_prev = hs.pop() + h = torch.cat([h, h_prev], dim=1) + h = module(h, t, context_emb, context_mask) + return self.out(h) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + +class EfficientUNetModel(nn.Module): + def __init__( + self, + embed_dim, + image_size, + channels=3, + text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values + channel_mult=[ + 1, + 1, + 2, + 4, + 8, + ], # Used with embed_dim to calculate the number of channels for each level of Efficient-UNet + num_attn_heads=8, # The number of heads in the attention layer + per_head_channels=64, # The number of channels per attention head + attention_type='fused', # Type of attention layer + atnn_enabled_at=[0, 0, 0, 0, 1], # Whether to enable attention at each level + feature_pooling_type='attention', # Type of pooling + stride=2, # Stride in ResBlock + num_resblocks=[ + 1, + 2, + 4, + 8, + 8, + ], # Used with num_res_blocks to calculate the number of residual blocks at each level of Efficient-UNet + learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. + use_null_token=False, # Whether to create a learned null token for attention + init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 + gradient_checkpointing=False, # Whether to use gradient checkpointing + scale_shift_norm=True, # Whether to use scale shift norm + stable_attention=True, # Whether to use numerically-stable attention calculation + flash_attention=False, # Whether to use flash attention calculation + skip_connection_scaling=False, # Whether to use 1/sqrt(2) scaling for ResBlock skip connection + noise_cond_aug=False, + ): + super().__init__() + + self.n_levels = len(channel_mult) + self.image_size = image_size + # Time embedding for log(snr) noise from continous version + time_embed_dim = embed_dim * 4 + assert learned_sinu_pos_emb_dim >= 0 + if learned_sinu_pos_emb_dim > 0: + sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 + self.time_embed = nn.Sequential( + sinu_pos_emb, + nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + else: + # Unlearned Time Embedding + sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + self.time_embed = nn.Sequential( + sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) + ) + + self.noise_cond_aug = noise_cond_aug + if self.noise_cond_aug: + if learned_sinu_pos_emb_dim > 0: + lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 + else: + lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + lowres_sinu_pos_emb_dim = embed_dim + self.lowres_time_embed = nn.Sequential( + lowres_sinu_pos_emb, + nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + cond_dim = text_embed_dim # time_embed_dim + # Pooling + assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' + self.feature_pooling_type = feature_pooling_type + if feature_pooling_type == 'attention': + self.attention_pooling = nn.Sequential( + SelfAttentionPooling(input_dim=text_embed_dim), + nn.LayerNorm(text_embed_dim), + nn.Linear(text_embed_dim, cond_dim), + ) + + # Context Projections + self.text_to_cond = linear(text_embed_dim, cond_dim) + self.to_text_non_attn_cond = nn.Sequential( + nn.LayerNorm(cond_dim), + nn.Linear(cond_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + # Register for Null Token + if use_null_token: + self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) + self.use_null_token = use_null_token + + # Initial Convolution + # Multiply in_channels by 2 because we concatenate with low res inputs. + in_channels = channels * 2 + init_dim = embed_dim * channel_mult[0] + self.init_conv = nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) + # Efficient-UNet Init + self.DBlocks = nn.ModuleDict() + self.UBlocks = nn.ModuleDict() + ch = init_dim + for level, mult in enumerate(channel_mult): + # Different level has different num of res blocks + num_resblock = num_resblocks[level] + # Only perform upsample/downsample if it is not the last (deepest) level + is_last_level = level == len(channel_mult) - 1 + level_attention_type = attention_type if atnn_enabled_at[level] else None + + level_key = str(level) # TODO Change to more meaningful naming + self.DBlocks[level_key] = DBlock( + channels=ch, + emb_channels=time_embed_dim, + out_channels=int(mult * embed_dim), + use_scale_shift_norm=scale_shift_norm, + conv_down=not is_last_level, + stride=stride, + num_resblocks=num_resblock, + attention_type=level_attention_type, + text_embed_dim=cond_dim, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + skip_connection_scaling=skip_connection_scaling, + ) + self.UBlocks[level_key] = UBlock( + channels=int(mult * embed_dim), + emb_channels=time_embed_dim, + out_channels=ch, + use_scale_shift_norm=scale_shift_norm, + conv_up=not is_last_level, + stride=stride, + num_resblocks=num_resblock, + attention_type=level_attention_type, + text_embed_dim=cond_dim, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + skip_connection_scaling=skip_connection_scaling, + ) + ch = int(mult * embed_dim) + self.out = nn.Conv2d(channel_mult[0] * embed_dim, channels, 1) + + def forward( + self, x, time, text_embed, text_mask, x_low_res, time_low_res=None, + ): + if self.noise_cond_aug: + assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' + else: + assert time_low_res is None, 'time_low_res cannot be presented' + + batch_size, device = x.shape[0], x.device + # Time Conditioning + t = self.time_embed(time) + # Text Conditioning + text_cond = self.text_to_cond(text_embed) + # Concatenating low resolution images + if x_low_res.shape != x.shape: + # Upscale if not done in the trainer + _, _, new_height, new_width = x.shape + x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") + x = torch.cat([x, x_low_res], dim=1) + + # Add lowres time conditioning + if self.noise_cond_aug: + lowres_t = self.lowres_time_embed(time_low_res) + t += lowres_t + # Context Embedding + # TODO We may want to concat time token here + if self.use_null_token: + # Null Context (Helpful when text_embed is drop) + null_context = self.null_text_embedding.repeat(batch_size, 1, 1) + context_emb = torch.cat([text_cond, null_context], dim=1) + context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) + else: + context_emb = text_cond + context_mask = text_mask + + # Add pooled text embeddings to the diffusion timestep + # TODO We may only want to calculated the pooled feature based on text token length + if self.feature_pooling_type == 'mean': + pooled_text_cond = text_cond.mean(dim=-2) + elif self.feature_pooling_type == 'attention': + pooled_text_cond = self.attention_pooling(text_embed) + text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) + t += text_hiddens + + # UNet forward + x = self.init_conv(x) + feats = dict() + for level in range(self.n_levels): + level_key = str(level) + x = self.DBlocks[level_key](x, t, context_emb, context_mask) + # Save feats for UBlocks + if level < self.n_levels - 1: + feats[level_key] = x + for level in range(self.n_levels - 1, -1, -1): + level_key = str(level) + if level < self.n_levels - 1: + x += feats[level_key] + x = self.UBlocks[level_key](x, t, context_emb, context_mask) + return self.out(x) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + +if __name__ == '__main__': + model = UNetModel(embed_dim=512, image_size=64,) + + pytorch_total_params = sum(p.numel() for p in model.parameters()) + print(pytorch_total_params) + + image_batch = torch.rand(4, 3, 64, 64) + text_cond = torch.rand(4, 88, 512) + text_mask = torch.ones(4, 88) + time = torch.ones(4) + + output = model(image_batch, time, text_cond, text_mask,) + + print(output.shape) + + model_sr = EfficientUNetModel(embed_dim=128, image_size=256) + pytorch_total_params = sum(p.numel() for p in model_sr.parameters()) + print(pytorch_total_params) + output = model_sr( + torch.randn(4, 3, 256, 256), + torch.randn(4, 3, 256, 256), + torch.ones(4), + torch.randn(4, 88, 512), + torch.ones(4, 88), + ) + print(output.shape) diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json new file mode 100644 index 000000000000..3fb4ffdac7f1 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json @@ -0,0 +1,51 @@ +{ + "architectures": [ + "T5WithLMHeadModel" + ], + "d_ff": 65536, + "d_kv": 128, + "d_model": 1024, + "decoder_start_token_id": 0, + "dropout_rate": 0.1, + "eos_token_id": 1, + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_heads": 128, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_num_buckets": 32, + "task_specific_params": { + "summarization": { + "early_stopping": true, + "length_penalty": 2.0, + "max_length": 200, + "min_length": 30, + "no_repeat_ngram_size": 3, + "num_beams": 4, + "prefix": "summarize: " + }, + "translation_en_to_de": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to German: " + }, + "translation_en_to_fr": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to French: " + }, + "translation_en_to_ro": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to Romanian: " + } + }, + "vocab_size": 32128 +} diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py new file mode 100644 index 000000000000..03e6f75253ec --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py @@ -0,0 +1,61 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import torch +from transformers import T5Config, T5EncoderModel, T5Tokenizer + + +class T5Encoder(torch.nn.Module): + def __init__(self, max_seq_len=512, encoder_path=None): + super().__init__() + self.max_seq_len = max_seq_len + + self.model_seq_len = 512 + # Initializing T5 model + self.tokenizer = T5Tokenizer.from_pretrained("t5-11b", model_max_length=self.model_seq_len) + + if encoder_path is None: + self.model = T5EncoderModel.from_pretrained("t5-11b", low_cpu_mem_usage=True) + else: + print(f'Load T5 encoder from {encoder_path}') + hard_coded_encoder_weight_location = os.path.join(encoder_path, "t5xxl-encoder.bin") + hard_coded_encoder_config_location = os.path.join( + "nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json" + ) + self.model = T5EncoderModel.from_pretrained( + hard_coded_encoder_weight_location, + config=T5Config.from_json_file(hard_coded_encoder_config_location), + low_cpu_mem_usage=True, + ) + + def encode(self, text_batch): + encoded = self.tokenizer.batch_encode_plus( + text_batch, return_tensors="pt", padding="max_length", max_length=self.model_seq_len, truncation=True + ) + # We expect all the processing is done in GPU. + input_ids = encoded.input_ids.cuda() + attn_mask = encoded.attention_mask.cuda() + + with torch.no_grad(): + output = self.model(input_ids=input_ids, attention_mask=attn_mask) + encoded_text = output.last_hidden_state.detach() + + encoded_text = encoded_text[:, 0 : self.max_seq_len] + attn_mask = attn_mask[:, 0 : self.max_seq_len] + for bnum in range(encoded_text.shape[0]): + nvalid_elem = attn_mask[bnum].sum().item() + encoded_text[bnum][nvalid_elem:] = 0 + + return encoded_text, attn_mask diff --git a/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py b/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py new file mode 100644 index 000000000000..029bbf60ffbc --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py @@ -0,0 +1,57 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Functions for performing operations with broadcasting to the right axis +# +# Example +# input1: tensor of size (N1, N2) +# input2: tensor of size (N1, N2, N3, N4) +# batch_mul(input1, input2) = input1[:, :, None, None] * input2 +# +# If the common dimensions don't match, we raise an assertion error. + + +def common_broadcast(x, y): + ndims1 = x.ndim + ndims2 = y.ndim + + common_ndims = min(ndims1, ndims2) + for axis in range(common_ndims): + assert x.shape[axis] == y.shape[axis], 'Dimensions not equal at axis {}'.format(axis) + + if ndims1 < ndims2: + x = x.reshape(x.shape + (1,) * (ndims2 - ndims1)) + elif ndims2 < ndims1: + y = y.reshape(y.shape + (1,) * (ndims1 - ndims2)) + + return x, y + + +def batch_add(x, y): + x, y = common_broadcast(x, y) + return x + y + + +def batch_mul(x, y): + x, y = common_broadcast(x, y) + return x * y + + +def batch_sub(x, y): + x, y = common_broadcast(x, y) + return x - y + + +def batch_div(x, y): + x, y = common_broadcast(x, y) + return x / y diff --git a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py new file mode 100644 index 000000000000..27b1732f1e28 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py @@ -0,0 +1,169 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from functools import partial, wraps + +import torch +import torch._dynamo +import torch.nn as nn +from einops import repeat +from torch.special import expm1 + +from nemo.collections.multimodal.parts.utils import randn_like + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if callable(d) else d + + +def maybe(fn): + @wraps(fn) + def inner(x): + if not exists(x): + return x + return fn(x) + + return inner + + +def log(t, eps: float = 1e-12): + return torch.log(t.clamp(min=eps)) + + +def right_pad_dims_to(x, t): + padding_dims = x.ndim - t.ndim + if padding_dims <= 0: + return t + return t.view(*t.shape, *((1,) * padding_dims)) + + +@torch.jit.script +def beta_linear_log_snr(t): + return -torch.log(expm1(1e-4 + 10 * (t ** 2))) + + +@torch.jit.script +def alpha_cosine_log_snr(t, s: float = 0.008): + return -log( + (torch.cos((t + s) / (1 + s) * math.pi * 0.5) ** -2) - 1, eps=1e-5 + ) # not sure if this accounts for beta being clipped to 0.999 in discrete version + + +def log_snr_to_alpha_sigma(log_snr): + return torch.sqrt(torch.sigmoid(log_snr)), torch.sqrt(torch.sigmoid(-log_snr)) + + +class GaussianDiffusionContinuousTimes(nn.Module): + def __init__(self, *, noise_schedule, timesteps=1000, rng=None): + super().__init__() + + if noise_schedule == "linear": + self.log_snr = beta_linear_log_snr + elif noise_schedule == "cosine": + self.log_snr = alpha_cosine_log_snr + else: + raise ValueError(f'invalid noise schedule {noise_schedule}') + + self.num_timesteps = timesteps + self.rng = rng + + def get_times(self, batch_size, noise_level, *, device): + return torch.full((batch_size,), noise_level, device=device, dtype=torch.float32) + + def sample_random_times(self, batch_size, *, device): + return torch.rand((batch_size,), device=device, generator=self.rng, dtype=torch.float32) + + def get_condition(self, times): + return maybe(self.log_snr)(times) + + def get_sampling_timesteps(self, batch, *, device): + times = torch.linspace(1.0, 0.0, self.num_timesteps + 1, device=device) + times = repeat(times, 't -> b t', b=batch) + times = torch.stack((times[:, :-1], times[:, 1:]), dim=0) + times = times.unbind(dim=-1) + return times + + def q_posterior(self, x_start, x_t, t, *, t_next=None): + t_next = default(t_next, lambda: (t - 1.0 / self.num_timesteps).clamp(min=0.0)) + + """ https://openreview.net/attachment?id=2LdBqxc1Yv&name=supplementary_material """ + log_snr = self.log_snr(t) + log_snr_next = self.log_snr(t_next) + log_snr, log_snr_next = map(partial(right_pad_dims_to, x_t), (log_snr, log_snr_next)) + + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + alpha_next, sigma_next = log_snr_to_alpha_sigma(log_snr_next) + + # c - as defined near eq 33 + c = -expm1(log_snr - log_snr_next) + posterior_mean = alpha_next * (x_t * (1 - c) / alpha + c * x_start) + + # following (eq. 33) + posterior_variance = (sigma_next ** 2) * c + posterior_log_variance_clipped = log(posterior_variance, eps=1e-20) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def q_sample(self, x_start, t, noise=None): + dtype = x_start.dtype + + if isinstance(t, float): + batch = x_start.shape[0] + t = torch.full((batch,), t, device=x_start.device, dtype=dtype) + + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + log_snr = self.log_snr(t).type(dtype) + log_snr_padded_dim = right_pad_dims_to(x_start, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) + + return alpha * x_start + sigma * noise, log_snr, alpha, sigma + + def q_sample_from_to(self, x_from, from_t, to_t, noise=None): + shape, device, dtype = x_from.shape, x_from.device, x_from.dtype + batch = shape[0] + + if isinstance(from_t, float): + from_t = torch.full((batch,), from_t, device=device, dtype=dtype) + + if isinstance(to_t, float): + to_t = torch.full((batch,), to_t, device=device, dtype=dtype) + + noise = default(noise, lambda: randn_like(x_from, generator=self.rng)) + + log_snr = self.log_snr(from_t) + log_snr_padded_dim = right_pad_dims_to(x_from, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) + + log_snr_to = self.log_snr(to_t) + log_snr_padded_dim_to = right_pad_dims_to(x_from, log_snr_to) + alpha_to, sigma_to = log_snr_to_alpha_sigma(log_snr_padded_dim_to) + + return x_from * (alpha_to / alpha) + noise * (sigma_to * alpha - sigma * alpha_to) / alpha + + def predict_start_from_v(self, x_t, t, v): + log_snr = self.log_snr(t) + log_snr = right_pad_dims_to(x_t, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + return alpha * x_t - sigma * v + + def predict_start_from_noise(self, x_t, t, noise): + log_snr = self.log_snr(t) + log_snr = right_pad_dims_to(x_t, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + return (x_t - sigma * noise) / alpha.clamp(min=1e-8) diff --git a/nemo/collections/multimodal/modules/imagen/sampler/sampler.py b/nemo/collections/multimodal/modules/imagen/sampler/sampler.py new file mode 100644 index 000000000000..2fd05faf814d --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/sampler.py @@ -0,0 +1,250 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +from einops import rearrange +from tqdm import tqdm + +from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_div, batch_mul +from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes + + +def right_pad_dims_to(x, t): + padding_dims = x.ndim - t.ndim + if padding_dims <= 0: + return t + return t.view(*t.shape, *((1,) * padding_dims)) + + +def thresholding_x0(x0, method='dynamic', th=0.995): + if method is None: + return x0 + elif method == 'static': + return x0.clamp(-1.0, 1.0) + elif method == 'dynamic': + # torch.quantile only suppoprt either float or double dtype + # we need to manual cast it if running in FP16/AMP mode + original_dtype = x0.dtype + if original_dtype not in [torch.float, torch.double]: + x0 = x0.float() + s = torch.quantile(rearrange(x0, 'b ... -> b (...)').abs(), th, dim=-1) # From Figure A.10 (b) + s.clamp_(min=1.0) + s = right_pad_dims_to(x0, s) + x0 = x0.clamp(-s, s) / s + return x0.type(original_dtype) + else: + raise RuntimeError(f'Thresholding method: {method} not supported.') + + +def thresholding_derivative(x, t, d, thresholding_method='dynamic'): + x0 = x - batch_mul(d, t) + corrected_x0 = thresholding_x0(x0, thresholding_method) + corrected_d = batch_div(x - corrected_x0, t) + return corrected_d + + +class Sampler(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, model, model_kwargs, shape, z=None): + pass + + +class DDPMSampler(Sampler): + def __init__(self, unet_type, denoiser): + super().__init__() + self.unet_type = unet_type + self.noise_scheduler = denoiser + self.pred_objective = 'noise' + + def p_mean_variance( + self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' + ): + + if self.unet_type == 'base': + pred = unet.forward_with_cond_scale( + x=x, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale + ) + elif self.unet_type == 'sr': + pred = unet.forward_with_cond_scale( + x=x, x_low_res=x_low_res, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale + ) + + if self.pred_objective == 'noise': + x_start = self.noise_scheduler.predict_start_from_noise(x, t=t, noise=pred) + elif self.pred_objective == 'x_start': + x_start = pred + elif self.pred_objective == 'v': + x_start = self.noise_scheduler.predict_start_from_v(x, t=t, v=pred) + else: + raise ValueError(f'unknown objective {self.pred_objective}') + + x_start = thresholding_x0(x_start, method=thresholding_method) + mean_and_variance = self.noise_scheduler.q_posterior(x_start=x_start, x_t=x, t=t, t_next=t_next) + return mean_and_variance, x_start + + @torch.no_grad() + def p_sample( + self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' + ): + (model_mean, _, model_log_variance), x_start = self.p_mean_variance( + unet=unet, + x=x, + t=t, + t_next=t_next, + text_embeds=text_embeds, + text_mask=text_mask, + cond_scale=cond_scale, + x_low_res=x_low_res, + thresholding_method=thresholding_method, + ) + noise = torch.randn_like(x) + # no noise when t == 0 + b = x.shape[0] + is_last_sampling_timestep = ( + (t_next == 0) if isinstance(self.noise_scheduler, GaussianDiffusionContinuousTimes) else (t == 0) + ) + nonzero_mask = (1 - is_last_sampling_timestep.type_as(x)).reshape(b, *((1,) * (len(x.shape) - 1))) + pred = model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + return pred, x_start + + def forward( + self, + model, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + batch = noise_map.shape[0] + device = noise_map.device + dtype = noise_map.dtype + original_steps = self.noise_scheduler.num_timesteps + if sampling_steps: + self.noise_scheduler.num_timesteps = sampling_steps + timesteps = self.noise_scheduler.get_sampling_timesteps(batch, device=device) + img = noise_map + for times, times_next in tqdm(timesteps, total=len(timesteps)): + img, x_start = self.p_sample( + unet=model, + x=img.type(dtype), + t=times.type(dtype), + t_next=times_next.type(dtype), + text_embeds=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + x_low_res=x_low_res.type(dtype) if x_low_res is not None else None, + thresholding_method=thresholding_method, + ) + self.noise_scheduler.num_timesteps = original_steps + return img + + +class EDMSampler(Sampler): + def __init__( + self, + unet_type, + num_steps=50, + sigma_min=0.002, + sigma_max=80, + rho=7, + S_churn=0, + S_min=0, + S_max=float('inf'), + S_noise=1, + ): + super().__init__() + self.unet_type = unet_type + self.sigma_min = sigma_min + self.sigma_max = sigma_max + self.rho = rho + self.S_churn = S_churn + self.S_min = S_min + self.S_max = S_max + self.S_noise = S_noise + self.num_steps = num_steps + + def forward( + self, + unet, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + if self.unet_type == 'base': + assert x_low_res is None + elif self.unet_type == 'sr': + assert x_low_res is not None + low_res_cond = {'x_low_res': x_low_res} if x_low_res is not None else {} + thresholding_method = 'dynamic' + sigma_min = self.sigma_min + sigma_max = self.sigma_max + print(f'Sampling with sigma in [{sigma_min}, {sigma_max}], cfg={cond_scale}') + # Time step discretization + num_steps = sampling_steps if sampling_steps else self.num_steps + step_indices = torch.arange(num_steps, device=noise_map.device) + # Table 1: Sampling - Time steps + t_steps = ( + sigma_max ** (1 / self.rho) + + step_indices / (num_steps - 1) * (sigma_min ** (1 / self.rho) - sigma_max ** (1 / self.rho)) + ) ** self.rho + t_steps = torch.cat([t_steps, torch.zeros_like(t_steps[:1])]) # t_N = 0 + + # Main sampling loop. + x_next = noise_map * t_steps[0] + for i, (t_cur, t_next) in tqdm( + enumerate(zip(t_steps[:-1], t_steps[1:])), total=len(t_steps[:-1]) + ): # 0, ..., N-1 + x_cur = x_next + + # Increase noise temporarily. + gamma = min(self.S_churn / num_steps, np.sqrt(2) - 1) if self.S_min <= t_cur <= self.S_max else 0 + t_hat = (t_cur + gamma * t_cur).to(x_cur.device) + x_hat = x_cur + (t_hat ** 2 - t_cur ** 2).sqrt() * self.S_noise * torch.randn_like(x_cur) + + # Euler step. + denoised = unet.forward_with_cond_scale( + x=x_hat.to(torch.float32), + time=t_hat.to(torch.float32), + text_embed=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + **low_res_cond, + ) + d_cur = (x_hat - denoised) / t_hat + d_cur = thresholding_derivative(x_hat, t_hat, d_cur, thresholding_method=thresholding_method) + x_next = x_hat + (t_next - t_hat) * d_cur + + # Apply 2nd order correction. + if i < num_steps - 1: + denoised = unet.forward_with_cond_scale( + x=x_next.to(torch.float32), + time=t_next.to(torch.float32), + text_embed=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + **low_res_cond, + ) + d_prime = (x_next - denoised) / t_next + d_prime = thresholding_derivative(x_next, t_next, d_prime, thresholding_method=thresholding_method) + x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime) + return x_next diff --git a/nemo/collections/multimodal/parts/imagen/__init__.py b/nemo/collections/multimodal/parts/imagen/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/imagen/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/imagen/utils.py b/nemo/collections/multimodal/parts/imagen/utils.py new file mode 100644 index 000000000000..565b1ed6a2b4 --- /dev/null +++ b/nemo/collections/multimodal/parts/imagen/utils.py @@ -0,0 +1,29 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + + +def random_dropout(embeddings, drop_rate): + r""" + Function to perform random dropout for embeddings. + When we drop embeddings, we zero them out. + Args: + embeddings (tensor): Input embeddings + drop_rate (float): Rate of dropping the embedding. + """ + nsamples = embeddings.shape[0] + zero_flag = torch.ones(nsamples, 1, 1).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.bernoulli(zero_flag).cuda() + embeddings = embeddings * zero_flag + return embeddings From f4ba6181a973a04de73c35769482861a9eba91bc Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 21 Jun 2023 15:13:34 -0700 Subject: [PATCH 135/512] [SD/Mingyuan Ma] Merging NHWC+GN into internal/Main --- .../stable_diffusion/conf/sd_train.yaml | 6 +- .../generative/stable_diffusion/sd_train.py | 18 ++++ .../stable_diffusion/ldm/autoencoder.py | 98 +++++++++++++++++-- .../models/stable_diffusion/ldm/ddpm.py | 49 ++++++++-- .../modules/stable_diffusion/attention.py | 4 +- .../diffusionmodules/model.py | 11 +-- .../diffusionmodules/openaimodel.py | 5 +- .../stable_diffusion/diffusionmodules/util.py | 5 +- .../stable_diffusion/encoders/modules.py | 39 +++++++- utils/flash-attention.patch | 36 ++++--- 10 files changed, 224 insertions(+), 47 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 5dc8865dcb8a..01b5ed38f62a 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -84,8 +84,10 @@ model: cond_stage_forward: text_embedding_dropout_rate: 0.1 fused_opt: True - inductor: False + inductor: True inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True unet_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel @@ -135,6 +137,7 @@ model: dropout: 0.0 lossconfig: target: torch.nn.Identity + capture_cudagraph_iters: ${model.capture_cudagraph_iters} cond_stage_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder @@ -147,6 +150,7 @@ model: # version: openai/clip-vit-large-patch14 # device: cuda # max_length: 77 + # capture_cudagraph_iters: {$model.capture_cudagraph_iters} # miscellaneous diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py index f423a684d349..71a0fcb44163 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_train.py +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from datetime import timedelta import torch @@ -43,6 +44,23 @@ def main(cfg) -> None: torch.backends.cuda.matmul.allow_tf32 = True + if cfg.model.capture_cudagraph_iters >= 0: + # Required by CUDA graph with DDP + os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0" + + # Hack to avoid CUDA graph issue with AMP, PyTorch Lightning doesn't support + # changing autocast arguments for now. + # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/cuda/graphs.py#L234 + def amp_autocast_init(self, *args, **kwargs): + if "cache_enabled" not in kwargs: + kwargs["cache_enabled"] = False + return self.__orig_init__(*args, **kwargs) + + torch.cuda.amp.autocast.__orig_init__ = torch.cuda.amp.autocast.__init__ + torch.cuda.amp.autocast.__init__ = amp_autocast_init + torch.autocast.__orig_init__ = torch.autocast.__init__ + torch.autocast.__init__ = amp_autocast_init + plugins = [] strategy = NLPDDPStrategy( diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py index 7ac641d4ec1f..7443dc014f94 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -314,6 +314,7 @@ def __init__( colorize_nlabels=None, monitor=None, from_pretrained: str = None, + capture_cudagraph_iters=-1, ): super().__init__() self.image_key = image_key @@ -337,6 +338,16 @@ def __init__( state_dict = load_state_dict(from_pretrained) self._load_pretrained_model(state_dict) + # CUDA graph captured sub-modules + self.capture_cudagraph_iters = capture_cudagraph_iters + self.stream = torch.cuda.Stream() + self.encoder_iterations = self.decoder_iterations = 0 + self.encoder_graph = torch.cuda.CUDAGraph() # eval + self.decoder_graph = torch.cuda.CUDAGraph() # eval + self.graphed_encoder = self.graphed_decoder = None # train + self.static_x = self.static_moments = None + self.static_z = self.static_dec = None + def _state_key_mapping(self, state_dict: dict): import re @@ -446,15 +457,88 @@ def init_from_ckpt(self, path, ignore_keys=list()): print(f"Restored from {path}") def encode(self, x): - h = self.encoder(x) - moments = self.quant_conv(h) - posterior = DiagonalGaussianDistribution(moments) - return posterior + if self.training: + if self.encoder_iterations == self.capture_cudagraph_iters: + logging.info("Capturing CUDA graph for module: %s", self.encoder.__class__.__name__) + self.graphed_encoder = torch.cuda.make_graphed_callables(self.encoder, (x,)) + + if 0 <= self.capture_cudagraph_iters <= self.encoder_iterations: + h = self.graphed_encoder(x) + else: + h = self.encoder(x) + self.encoder_iterations += 1 + + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + else: + # create static input and copy input to static buffer + if self.static_x is None: + self.static_x = torch.randn_like(x) + self.static_x.copy_(x) + + if self.encoder_iterations == self.capture_cudagraph_iters: + # cuda graph capture + logging.info("Capturing CUDA graph for module: %s", self.encoder.__class__.__name__) + with torch.cuda.graph(self.encoder_graph): + h = self.encoder(self.static_x) + self.static_moments = self.quant_conv(h) + + if 0 <= self.capture_cudagraph_iters <= self.encoder_iterations: + # cuda graph replay + self.encoder_graph.replay() + else: + # warmup + self.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.stream): + h = self.encoder(self.static_x) + self.static_moments = self.quant_conv(h) + torch.cuda.current_stream().wait_stream(self.stream) + self.encoder_iterations += 1 + + posterior = DiagonalGaussianDistribution(self.static_moments) + return posterior def decode(self, z): - z = self.post_quant_conv(z) - dec = self.decoder(z) - return dec + if self.training: + if self.decoder_iterations == self.capture_cudagraph_iters: + logging.info("Capturing CUDA graph for module: %s", self.decoder.__class__.__name__) + self.graphed_decoder = torch.cuda.make_graphed_callables(self.decoder, (z,)) + + h = self.post_quant_conv(z) + if 0 <= self.capture_cudagraph_iters <= self.decoder_iterations: + dec = self.graphed_decoder(h) + else: + dec = self.decoder(h) + self.decoder_iterations += 1 + + return dec + else: + # create static input and copy input to static buffer + if self.static_z is None: + self.static_z = torch.randn_like(z) + self.static_z.copy_(z) + + if self.decoder_iterations == self.capture_cudagraph_iters: + # cuda graph capture + logging.info("Capturing CUDA graph for module: %s", self.decoder.__class__.__name__) + with torch.cuda.graph(self.decoder_graph): + h = self.post_quant_conv(self.static_z) + self.static_dec = self.decoder(h) + + if 0 <= self.capture_cudagraph_iters <= self.decoder_iterations: + # cuda graph replay + self.decoder_graph.replay() + else: + # warmup + self.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.stream): + h = self.post_quant_conv(self.static_z) + self.static_dec = self.decoder(h) + torch.cuda.current_stream().wait_stream(self.stream) + self.decoder_iterations += 1 + + return self.static_dec def forward(self, input, sample_posterior=True): posterior = self.encode(input) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 695cf3ba9ddf..fd2286791047 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -126,8 +126,11 @@ def __init__(self, cfg): self.first_stage_key = cfg.first_stage_key self.image_size = cfg.image_size # try conv? self.channels = cfg.channels + self.channels_last = cfg.channels_last self.use_positional_encodings = cfg.use_positional_encodings - self.model = DiffusionWrapper(cfg.unet_config, cfg.conditioning_key) + self.model = DiffusionWrapper( + cfg.unet_config, cfg.conditioning_key, cfg.inductor, cfg.inductor_cudagraphs, cfg.capture_cudagraph_iters + ) self.model_type = None count_params(self.model, verbose=True) @@ -384,8 +387,11 @@ def get_input(self, batch, k): x = batch[k] if len(x.shape) == 3: x = x[..., None] - x = rearrange(x, 'b h w c -> b c h w') - x = x.to(memory_format=torch.contiguous_format) + if self.channels_last: + x = x.permute(0, 3, 1, 2).to(non_blocking=True) + else: + x = rearrange(x, "b h w c -> b c h w") + x = x.to(memory_format=torch.contiguous_format, non_blocking=True) return x def shared_step(self, batch): @@ -481,11 +487,9 @@ def __init__(self, cfg): self.init_from_ckpt(ckpt_path, ignore_keys) self.restarted_from_ckpt = True - # Fusing VAE and CLIP doesn't give benefit - if cfg.get("inductor", False): - # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) - self.model = optimize("inductor")(self.model) + if self.channels_last: + self.first_stage_model = self.first_stage_model.to(memory_format=torch.channels_last) + self.model = self.model.to(memory_format=torch.channels_last) def make_cond_schedule(self,): self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) @@ -1950,12 +1954,29 @@ def parameters(self): class DiffusionWrapper(pl.LightningModule, Serialization): - def __init__(self, diff_model_config, conditioning_key): + def __init__( + self, + diff_model_config, + conditioning_key, + inductor: bool = False, + inductor_cudagraphs: bool = False, + capture_cudagraph_iters: int = -1, + ): super().__init__() self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) self.conditioning_key = conditioning_key assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + # Fusing VAE and CLIP doesn't give benefit + if inductor: + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = inductor_cudagraphs + self.diffusion_model = optimize("inductor")(self.diffusion_model) + # CUDA graph + self.capture_cudagraph_iters = capture_cudagraph_iters + self.iterations = 0 + self.graphed_diffusion_model = None + def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): if self.conditioning_key is None: out = self.diffusion_model(x, t) @@ -1964,7 +1985,15 @@ def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): out = self.diffusion_model(xc, t) elif self.conditioning_key == 'crossattn': cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(x, t, context=cc) + if self.iterations == self.capture_cudagraph_iters: + logging.info("Capturing CUDA graph for module: %s", self.diffusion_model.__class__.__name__) + self.graphed_diffusion_model = torch.cuda.make_graphed_callables(self.diffusion_model, (x, t, cc)) + + if 0 <= self.capture_cudagraph_iters <= self.iterations: + out = self.graphed_diffusion_model(x, t, cc) + else: + out = self.diffusion_model(x, t, context=cc) + self.iterations += 1 elif self.conditioning_key == 'hybrid': xc = torch.cat([x] + c_concat, dim=1) cc = torch.cat(c_crossattn, 1) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index fd9fe4a5613f..d8317cafb85e 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -17,6 +17,7 @@ import torch import torch.nn.functional as F from einops import rearrange, repeat +from group_norm import GroupNormOpt from torch import einsum, nn from torch._dynamo import disable @@ -111,7 +112,7 @@ def zero_module(module): def Normalize(in_channels): - return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + return GroupNormOpt(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) class LinearAttention(nn.Module): @@ -375,6 +376,5 @@ def forward(self, x, context=None): for block in self.transformer_blocks: x = block(x, context=context) x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w - x = x.contiguous() # workaround for dynamo ddp bug x = self.proj_out(x) return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py index 6ee2ed7c9576..dbfab3ab4b07 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py @@ -17,6 +17,7 @@ import torch import torch.nn as nn from einops import rearrange +from group_norm import GroupNormOpt from nemo.collections.multimodal.modules.stable_diffusion.attention import LinearAttention from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config @@ -48,8 +49,8 @@ def nonlinearity(x): return torch.nn.functional.silu(x) -def Normalize(in_channels, num_groups=32): - return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True) +def Normalize(in_channels, num_groups=32, act=""): + return GroupNormOpt(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) class Upsample(nn.Module): @@ -100,11 +101,11 @@ def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropo self.out_channels = out_channels self.use_conv_shortcut = conv_shortcut - self.norm1 = Normalize(in_channels) + self.norm1 = Normalize(in_channels, act="silu") self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) if temb_channels > 0: self.temb_proj = torch.nn.Linear(temb_channels, out_channels) - self.norm2 = Normalize(out_channels) + self.norm2 = Normalize(out_channels, act="silu") self.dropout = torch.nn.Dropout(dropout) self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) if self.in_channels != self.out_channels: @@ -116,14 +117,12 @@ def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropo def forward(self, x, temb): h = x h = self.norm1(h) - h = nonlinearity(h) h = self.conv1(h) if temb is not None: h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] h = self.norm2(h) - h = nonlinearity(h) h = self.dropout(h) h = self.conv2(h) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 1f4715e0ba9b..2bd026a9e0ed 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -218,7 +218,7 @@ def __init__( self.use_scale_shift_norm = use_scale_shift_norm self.in_layers = nn.Sequential( - normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1), + normalization(channels, act="silu"), conv_nd(dims, channels, self.out_channels, 3, padding=1), ) self.updown = up or down @@ -236,8 +236,7 @@ def __init__( nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), ) self.out_layers = nn.Sequential( - normalization(self.out_channels), - nn.SiLU(), + normalization(self.out_channels, act="silu"), nn.Dropout(p=dropout), zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index 0174fc90a0a2..e9b9d4cdc58c 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -27,6 +27,7 @@ import torch import torch.nn as nn from einops import repeat +from group_norm import GroupNormOpt from torch._dynamo import disable @@ -209,13 +210,13 @@ def mean_flat(tensor): return tensor.mean(dim=list(range(1, len(tensor.shape)))) -def normalization(channels): +def normalization(channels, act=""): """ Make a standard normalization layer. :param channels: number of input channels. :return: an nn.Module for normalization. """ - return GroupNorm32(32, channels) + return GroupNormOpt(32, channels, act=act) # PyTorch 1.7 has SiLU, but we support PyTorch 1.5. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 81aaec23d868..1c81723fdda9 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -34,6 +34,7 @@ from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import logging class AbstractEncoder(nn.Module): @@ -180,7 +181,9 @@ def encode(self, x): class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" - def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, use_fp16=False): + def __init__( + self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, capture_cudagraph_iters: int = -1 + ): super().__init__() self.tokenizer = CLIPTokenizer.from_pretrained(version) self.transformer = CLIPTextModel.from_pretrained(version) @@ -188,6 +191,14 @@ def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_l self.max_length = max_length self.freeze() + # CUDA graph captured sub-modules + self.capture_cudagraph_iters = capture_cudagraph_iters + self.iterations = 0 + self.stream = torch.cuda.Stream() + self.transformer_graph = torch.cuda.CUDAGraph() + self.static_tokens = None + self.static_outputs = None + def freeze(self): self.transformer = self.transformer.eval() for param in self.parameters(): @@ -203,10 +214,28 @@ def forward(self, text): padding="max_length", return_tensors="pt", ) - tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) - outputs = self.transformer(input_ids=tokens) - - z = outputs.last_hidden_state + if self.static_tokens is None: + self.static_tokens = batch_encoding["input_ids"].to(device=self.device, non_blocking=True) + self.static_tokens.copy_(batch_encoding["input_ids"], non_blocking=True) + + if self.iterations == self.capture_cudagraph_iters: + # cuda graph capture + logging.info("Capturing CUDA graph for module: %s", self.transformer.__class__.__name__) + with torch.cuda.graph(self.transformer_graph): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + + if 0 <= self.capture_cudagraph_iters <= self.iterations: + # cuda graph replay + self.transformer_graph.replay() + else: + # warmup + self.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.stream): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + torch.cuda.current_stream().wait_stream(self.stream) + self.iterations += 1 + + z = self.static_outputs.last_hidden_state # # Pad the seq length to multiple of 8 seq_len = (z.shape[1] + 8 - 1) // 8 * 8 diff --git a/utils/flash-attention.patch b/utils/flash-attention.patch index 52a771dfdd9e..3587ffd57257 100644 --- a/utils/flash-attention.patch +++ b/utils/flash-attention.patch @@ -1,5 +1,5 @@ diff --git a/csrc/flash_attn/fmha_api.cpp b/csrc/flash_attn/fmha_api.cpp -index 43b6f4c..064e8fe 100644 +index 6602a6c..19d1551 100644 --- a/csrc/flash_attn/fmha_api.cpp +++ b/csrc/flash_attn/fmha_api.cpp @@ -207,6 +207,11 @@ mha_fwd(const at::Tensor &q, // total_q x num_heads x head_size, total_q @@ -14,7 +14,7 @@ index 43b6f4c..064e8fe 100644 TORCH_CHECK(is_sm8x || is_sm75); auto stream = at::cuda::getCurrentCUDAStream().stream(); bool is_dropout = p_dropout > 0.0; -@@ -358,6 +363,11 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size +@@ -359,6 +364,11 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size bool is_sm75 = dprops->major == 7 && dprops->minor == 5; bool is_sm80 = dprops->major == 8 && dprops->minor == 0; bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; @@ -25,17 +25,17 @@ index 43b6f4c..064e8fe 100644 + } TORCH_CHECK(is_sm8x || is_sm75); auto launch = &run_fmha_bwd; - -@@ -406,7 +416,7 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size + +@@ -407,7 +417,7 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size TORCH_CHECK(batch_size > 0); TORCH_CHECK((head_size % 8 == 0) && (head_size <= 128)); if (head_size > 64) { // TODO: eventually we should support SM86 and SM70 with d=128 as well - TORCH_CHECK(is_sm80); + TORCH_CHECK(is_sm80 || is_sm90); } - + CHECK_SHAPE(q, total_q, num_heads, head_size); -@@ -648,7 +658,12 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size +@@ -650,7 +660,12 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size auto dprops = at::cuda::getCurrentDeviceProperties(); bool is_sm80 = dprops->major == 8 && dprops->minor == 0; bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; @@ -47,19 +47,33 @@ index 43b6f4c..064e8fe 100644 + } + TORCH_CHECK(is_sm8x); auto launch = &run_fmha_block_dgrad_fp16_sm80; - + bool is_dropout = p_dropout > 0.0; -@@ -698,7 +713,7 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size +@@ -700,7 +715,7 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size TORCH_CHECK(batch_size > 0); TORCH_CHECK(head_size == 16 || head_size == 32 || head_size == 64 || head_size == 128); if (head_size == 128) { // TODO: eventually we should support SM86 and SM70 with d=128 as well - TORCH_CHECK(is_sm80); + TORCH_CHECK(is_sm80 || is_sm90); } - + CHECK_SHAPE(q, total_q, num_heads, head_size); +diff --git a/csrc/flash_attn/src/fmha_bwd_hdim64.cu b/csrc/flash_attn/src/fmha_bwd_hdim64.cu +index 7dd8650..d039726 100644 +--- a/csrc/flash_attn/src/fmha_bwd_hdim64.cu ++++ b/csrc/flash_attn/src/fmha_bwd_hdim64.cu +@@ -24,6 +24,9 @@ void run_fmha_bwd_hdim64(FMHA_dgrad_params ¶ms, cudaStream_t stream, const b + } else if (dprops->major == 7 && dprops->minor == 5) { + using Kernel_traits = FMHA_kernel_traits<128, 64, 16, 1, 8, 0x08u, elem_type>; + run_fmha_bwd_loop(params, stream, configure); ++ } else if (dprops->major == 9 && dprops->minor == 0) { ++ using Kernel_traits = FMHA_kernel_traits<256, 64, 16, 1, 8, 0x100u, elem_type>; ++ run_fmha_bwd_loop(params, stream, configure); + } + } + })); diff --git a/setup.py b/setup.py -index 1cd61e5..10eb2b5 100644 +index 5516804..a21a903 100644 --- a/setup.py +++ b/setup.py @@ -112,6 +112,8 @@ cc_flag.append("-gencode") @@ -68,6 +82,6 @@ index 1cd61e5..10eb2b5 100644 cc_flag.append("arch=compute_80,code=sm_80") +cc_flag.append("-gencode") +cc_flag.append("arch=compute_90,code=sm_90") - + subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) ext_modules.append( From 9ab8f8753e61b9e2f375e60282f96a67146ac8a0 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Wed, 21 Jun 2023 15:34:51 -0700 Subject: [PATCH 136/512] Update clip conversion script --- .../clip/conf/megatron_clip_VIT-L-14.yaml | 203 ------------------ ...mo.py => convert_external_clip_to_nemo.py} | 99 ++++++++- .../foundation/clip/convert_hfclip_to_nemo.py | 191 ---------------- 3 files changed, 94 insertions(+), 399 deletions(-) delete mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml rename examples/multimodal/foundation/clip/{convert_openclip_to_nemo.py => convert_external_clip_to_nemo.py} (63%) delete mode 100644 examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml deleted file mode 100644 index 8a21fccd0874..000000000000 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml +++ /dev/null @@ -1,203 +0,0 @@ -model: - precision: 32 - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 64 # limited by GPU memory - global_batch_size: 2048 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_pretrained: null # used in fine-tuning - # multimodal configs - output_dim: 768 - # As the number of devices used to train increases, so does the space complexity of - # the logit matrix. Using a naïve all-gather scheme, space complexity will be - # `O(n^2)`. Instead, complexity may become effectively linear if the flags - # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one - # numerical results as the naïve method. - local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) - gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue - - vision: - precision: 32 - # vision configs - patch_dim: 14 - img_h: 224 - img_w: 224 - image_mean: null - image_std: null - num_channels: 3 - drop_patch_rate: 0.0 - drop_path_rate: 0.0 - global_average_pool: False - output_dim: ${model.output_dim} - class_token_length: 1 - preprocess_layernorm: True # apply layer norm to embedded tokens - - # model architecture - encoder_seq_length: 196 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_parameters - num_layers: 24 - hidden_size: 1024 - ffn_hidden_size: 4096 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: True - activation: quick-gelu - - - - text: - precision: 32 - # text configs - output_dim: ${model.output_dim} - - # model architecture - encoder_seq_length: 77 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_parameters - num_layers: 12 - hidden_size: 768 - ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 12 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: True - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - activation: quick-gelu - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'huggingface' - type: 'openai/clip-vit-large-patch14' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. - - data: - num_workers: 8 - train: - dataset_path: # List of paths to pkl files or tar files - - /datasets/coyo/test.pkl - validation: # List of paths to pkl files or tar files - dataset_path: - - /datasets/coyo/test.pkl - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo - - imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.2 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 2000 - constant_steps: 0 - min_lr: 1e-5 \ No newline at end of file diff --git a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py similarity index 63% rename from examples/multimodal/foundation/clip/convert_openclip_to_nemo.py rename to examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py index 98cd732336a8..67151d95e971 100644 --- a/examples/multimodal/foundation/clip/convert_openclip_to_nemo.py +++ b/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +Usage example: + python /opt/NeMo/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py + --arch=ViT-H-14 + --version=laion2b_s32b_b79k + --hparams_file=path/to/saved.yaml + --nemo_file_path=open_clip.nemo + +If converting from OpenCLIP, specify the architecture (`arch`) and version (`version`) from the OpenCLIP model list (https://github.com/mlfoundations/open_clip#usage). + +If converting from Hugging Face, set the version to `huggingface` and the architecture (`arch`) to the Hugging Face model name (e.g., `yuvalkirstain/PickScore_v1`). + +Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. +""" + import os from argparse import ArgumentParser @@ -21,6 +36,7 @@ from omegaconf import OmegaConf from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer +from transformers import CLIPModel from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector @@ -41,7 +57,6 @@ def get_args(): parser = ArgumentParser() parser.add_argument("--arch", type=str, default="ViT-H-14") - parser.add_argument("--version", type=str, default="laion2b_s32b_b79k") parser.add_argument( @@ -69,7 +84,7 @@ def get_args(): return args -def mapping_state_dict(open_model): +def mapping_openclip_state_dict(open_model): open_state_dict = open_model.state_dict() key_mapping = { "positional_embedding": "text_encoder.language_model.embedding.position_embeddings", @@ -132,6 +147,75 @@ def mapping_state_dict(open_model): return nemo_state_dict +def mapping_hf_state_dict(hf_model): + hf_state_dict = hf_model.state_dict() + key_mapping = { + "text_projection.weight": "text_encoder.head.weight", + "visual_projection.weight": "vision_encoder.head.weight", + } + + layer_mapping = { + ".layer_norm1.weight": ".input_layernorm.weight", + ".layer_norm1.bias": ".input_layernorm.bias", + ".self_attn.out_proj.weight": ".self_attention.dense.weight", + ".self_attn.out_proj.bias": ".self_attention.dense.bias", + ".layer_norm2.weight": ".post_attention_layernorm.weight", + ".layer_norm2.bias": ".post_attention_layernorm.bias", + ".mlp.fc1.weight": ".mlp.dense_h_to_4h.weight", + ".mlp.fc1.bias": ".mlp.dense_h_to_4h.bias", + ".mlp.fc2.weight": ".mlp.dense_4h_to_h.weight", + ".mlp.fc2.bias": ".mlp.dense_4h_to_h.bias", + ".pre_layrnorm.weight": ".preprocess_layernorm.weight", + ".pre_layrnorm.bias": ".preprocess_layernorm.bias", + ".post_layernorm.weight": ".transformer.final_layernorm.weight", + ".post_layernorm.bias": ".transformer.final_layernorm.bias", + ".backbone.embeddings.position_embedding.weight": ".backbone.position_embeddings", + ".language_model.embeddings.position_embedding.weight": ".language_model.embedding.position_embeddings", + ".embeddings.class_embedding": ".cls_token", + ".backbone.embeddings.patch_embedding.weight": ".backbone.linear_encoder.weight", + ".final_layer_norm.weight": ".encoder.final_layernorm.weight", + ".final_layer_norm.bias": ".encoder.final_layernorm.bias", + ".embeddings.token_embedding.weight": ".embedding.word_embeddings.weight", + } + + nemo_state_dict = {} + for key in hf_state_dict.keys(): + if key.startswith("text_model.encoder.layers"): + key_ = key.replace("text_model.encoder.layers", "text_encoder.language_model.encoder.layers") + elif key.startswith("vision_model.encoder.layers"): + key_ = key.replace("vision_model.encoder.layers", "vision_encoder.backbone.transformer.layers") + elif key.startswith('vision_model.'): + key_ = key.replace("vision_model.", "vision_encoder.backbone.") + elif key.startswith('text_model.'): + key_ = key.replace('text_model.', 'text_encoder.language_model.') + else: + key_ = key + for pat in key_mapping: + if key_ == pat: + key_ = key_.replace(pat, key_mapping[pat]) + for pat in layer_mapping: + if key_.endswith(pat): + key_ = key_[: -len(pat)] + layer_mapping[pat] + break + if 'q_proj' in key_: + key_k = key.replace('q_proj', 'k_proj') + key_v = key.replace('q_proj', 'v_proj') + key_new = key_.replace('self_attn.q_proj', 'self_attention.query_key_value') + value_new = torch.concat((hf_state_dict[key], hf_state_dict[key_k], hf_state_dict[key_v]), dim=0) + nemo_state_dict[key_new] = value_new + elif not ('k_proj' in key_ or 'v_proj' in key_ or 'position_ids' in key_): + nemo_state_dict[key_] = hf_state_dict[key] + + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ + "vision_encoder.backbone.cls_token" + ].reshape(1, 1, -1) + w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) + nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) + + return nemo_state_dict + + def convert(local_rank, rank, world_size, args): app_state = AppState() app_state.data_parallel_rank = 0 @@ -175,8 +259,13 @@ def convert(local_rank, rank, world_size, args): cfg = OmegaConf.load(args.hparams_file) model = MegatronCLIPModel(cfg.model, trainer) - open_model, _, _ = open_clip.create_model_and_transforms(args.arch, pretrained=args.version) - state_dict = mapping_state_dict(open_model) + if args.version == "huggingface": + hf_model = CLIPModel.from_pretrained(args.arch) + state_dict = mapping_hf_state_dict(hf_model) + else: + open_model, _, _ = open_clip.create_model_and_transforms(args.arch, pretrained=args.version) + state_dict = mapping_openclip_state_dict(open_model) + model.model.load_state_dict(state_dict) model._save_restore_connector = NLPSaveRestoreConnector() diff --git a/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py b/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py deleted file mode 100644 index e48736d2dbba..000000000000 --- a/examples/multimodal/foundation/clip/convert_hfclip_to_nemo.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from argparse import ArgumentParser - -import einops -import torch -from apex.transformer import parallel_state -from omegaconf import OmegaConf -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer -from transformers import CLIPModel - -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed -from nemo.utils.model_utils import inject_model_parallel_rank - - -def get_args(): - parser = ArgumentParser() - parser.add_argument("--hf_name", type=str, default="yuvalkirstain/PickScore_v1") - - parser.add_argument( - "--hparams_file", - type=str, - default=None, - required=False, - help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", - ) - parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument("--gpus_per_node", type=int, required=False, default=1) - parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) - parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) - parser.add_argument( - "--pipeline_model_parallel_split_rank", - type=int, - required=False, - default=None, - help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", - ) - parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) - parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") - - args = parser.parse_args() - return args - - -def mapping_state_dict(open_model): - open_state_dict = open_model.state_dict() - key_mapping = { - "text_projection.weight": "text_encoder.head.weight", - "visual_projection.weight": "vision_encoder.head.weight", - } - - layer_mapping = { - ".layer_norm1.weight": ".input_layernorm.weight", - ".layer_norm1.bias": ".input_layernorm.bias", - ".self_attn.out_proj.weight": ".self_attention.dense.weight", - ".self_attn.out_proj.bias": ".self_attention.dense.bias", - ".layer_norm2.weight": ".post_attention_layernorm.weight", - ".layer_norm2.bias": ".post_attention_layernorm.bias", - ".mlp.fc1.weight": ".mlp.dense_h_to_4h.weight", - ".mlp.fc1.bias": ".mlp.dense_h_to_4h.bias", - ".mlp.fc2.weight": ".mlp.dense_4h_to_h.weight", - ".mlp.fc2.bias": ".mlp.dense_4h_to_h.bias", - ".pre_layrnorm.weight": ".preprocess_layernorm.weight", - ".pre_layrnorm.bias": ".preprocess_layernorm.bias", - ".post_layernorm.weight": ".transformer.final_layernorm.weight", - ".post_layernorm.bias": ".transformer.final_layernorm.bias", - ".backbone.embeddings.position_embedding.weight": ".backbone.position_embeddings", - ".language_model.embeddings.position_embedding.weight": ".language_model.embedding.position_embeddings", - ".embeddings.class_embedding": ".cls_token", - ".backbone.embeddings.patch_embedding.weight": ".backbone.linear_encoder.weight", - ".final_layer_norm.weight": ".encoder.final_layernorm.weight", - ".final_layer_norm.bias": ".encoder.final_layernorm.bias", - ".embeddings.token_embedding.weight": ".embedding.word_embeddings.weight", - } - - nemo_state_dict = {} - for key in open_state_dict.keys(): - if key.startswith("text_model.encoder.layers"): - key_ = key.replace("text_model.encoder.layers", "text_encoder.language_model.encoder.layers") - elif key.startswith("vision_model.encoder.layers"): - key_ = key.replace("vision_model.encoder.layers", "vision_encoder.backbone.transformer.layers") - elif key.startswith('vision_model.'): - key_ = key.replace("vision_model.", "vision_encoder.backbone.") - elif key.startswith('text_model.'): - key_ = key.replace('text_model.', 'text_encoder.language_model.') - else: - key_ = key - for pat in key_mapping: - if key_ == pat: - key_ = key_.replace(pat, key_mapping[pat]) - for pat in layer_mapping: - if key_.endswith(pat): - key_ = key_[: -len(pat)] + layer_mapping[pat] - break - if 'q_proj' in key_: - key_k = key.replace('q_proj', 'k_proj') - key_v = key.replace('q_proj', 'v_proj') - key_new = key_.replace('self_attn.q_proj', 'self_attention.query_key_value') - value_new = torch.concat((open_state_dict[key], open_state_dict[key_k], open_state_dict[key_v]), dim=0) - nemo_state_dict[key_new] = value_new - elif not ('k_proj' in key_ or 'v_proj' in key_ or 'position_ids' in key_): - nemo_state_dict[key_] = open_state_dict[key] - - nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ - "vision_encoder.backbone.cls_token" - ].reshape(1, 1, -1) - w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] - nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) - nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) - - return nemo_state_dict - - -def convert(local_rank, rank, world_size, args): - app_state = AppState() - app_state.data_parallel_rank = 0 - num_nodes = world_size // args.gpus_per_node - if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') - - app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = args.tensor_model_parallel_size - - # no use atm, use to split ranks in encoder/decoder models. - if args.pipeline_model_parallel_size > 1 and args.model_type in []: - if args.pipeline_model_parallel_split_rank is not None: - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank - else: - if args.pipeline_model_parallel_size % 2 != 0: - raise ValueError( - f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." - ) - else: - # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 - else: - app_state.pipeline_model_parallel_split_rank = None - - app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - - parallel_state.initialize_model_parallel( - tensor_model_parallel_size_=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size_=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=app_state.pipeline_model_parallel_split_rank, - ) - - app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() - app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - - cfg = OmegaConf.load(args.hparams_file) - model = MegatronCLIPModel(cfg.model, trainer) - - hf_model = CLIPModel.from_pretrained(args.hf_name) - state_dict = mapping_state_dict(hf_model) - model.model.load_state_dict(state_dict) - - model._save_restore_connector = NLPSaveRestoreConnector() - - if torch.distributed.is_initialized(): - torch.distributed.barrier() - - model.save_to(args.nemo_file_path) - - logging.info(f'NeMo model saved to: {args.nemo_file_path}') - - -if __name__ == '__main__': - args = get_args() - local_rank, rank, world_size = initialize_distributed(args) - convert(local_rank, rank, world_size, args) From 331fe0f2cbe26e6c317dd342b67cc1b27ec3d507 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Fri, 23 Jun 2023 11:05:39 -0700 Subject: [PATCH 137/512] Add SD2.0 support --- .../stable_diffusion/conf/sd2_train.yaml | 186 ++++++++++++++++++ .../generative/stable_diffusion/sd_export.py | 72 +++++-- .../models/stable_diffusion/ldm/ddpm.py | 16 +- .../modules/stable_diffusion/attention.py | 22 ++- .../diffusionmodules/openaimodel.py | 4 + .../stable_diffusion/encoders/modules.py | 1 + 6 files changed, 282 insertions(+), 19 deletions(-) create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml new file mode 100644 index 000000000000..7ba8c27c2971 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml @@ -0,0 +1,186 @@ +name: stable-diffusion2-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 140000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +exp_manager: + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: nemo-sd + name: ${name} + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + from_NeMo: #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + use_linear_in_transformer: true + transformer_depth: 1 + context_dim: 1024 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /path/to/clip.nemo + device: cuda + freeze: True + layer: "penultimate" + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + num_workers: 16 + train: + dataset_path: + - /datasets/coyo/test.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index b00d5e2c0b1d..434161d7558c 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -14,14 +14,18 @@ import gc import os import time +from typing import Dict, List, Optional import torch import torch.nn as nn from omegaconf.omegaconf import OmegaConf, open_dict from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion +from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.classes.exportable import Exportable from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType from nemo.utils.trt_utils import build_engine @@ -137,27 +141,67 @@ def forward(self, input_ids): outputs = self.model(input_ids=input_ids) return outputs.last_hidden_state - input_names = ["tokens"] - output_names = ["logits"] + class OpenCLIPWrapper(nn.Module, Exportable): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model.encode_with_transformer(input_ids) + return outputs + + def input_example(self, max_text=64): + sample = next(self.parameters()) + tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('H', 'D'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'H'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) + + if openai_clip: + input_names = ["tokens"] + output_names = ["logits"] + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) + else: + clip_model = OpenCLIPWrapper(model.cond_stage_model) + clip_model.export("stable-diffusion/onnx/clip/clip.onnx") + input_profile_clip = {} input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) deployment_conf.clip.max_length = model.cond_stage_model.max_length + deployment_conf.clip.openai_clip = openai_clip with open(f"{output_dir}/plan/conf.yaml", "wb") as f: OmegaConf.save(config=deployment_conf, f=f.name) del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index fd2286791047..6de6ed07f341 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -117,7 +117,7 @@ def uniform_on_device(r1, r2, shape, device): class DDPM(torch.nn.Module): def __init__(self, cfg): super().__init__() - assert cfg.parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' + assert cfg.parameterization in ["eps", "x0", "v"], 'currently only supporting "eps" and "x0" and "v"' self.parameterization = cfg.parameterization logging.info(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") self.cond_stage_model = None @@ -215,6 +215,10 @@ def register_schedule( ) elif self.parameterization == "x0": lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) + elif self.parameterization == "v": + lvlb_weights = torch.ones_like( + self.betas ** 2 / (2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + ) else: raise NotImplementedError("mu not supported") # TODO how to choose this term @@ -333,6 +337,12 @@ def q_sample(self, x_start, t, noise=None): + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise ) + def get_v(self, x, noise, t): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x + ) + def get_loss(self, pred, target, mean=True): if self.loss_type == 'l1': loss = (target - pred).abs() @@ -358,6 +368,8 @@ def p_losses(self, x_start, t, noise=None): target = noise elif self.parameterization == "x0": target = x_start + elif self.parameterization == "v": + target = self.get_v(x_start, noise, t) else: raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") @@ -1086,6 +1098,8 @@ def p_losses(self, x_start, cond, t, noise=None): target = x_start elif self.parameterization == "eps": target = noise + elif self.parameterization == "v": + target = self.get_v(x_start, noise, t) else: raise NotImplementedError() diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index d8317cafb85e..a7832382f2c7 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -338,6 +338,7 @@ def __init__( depth=1, dropout=0.0, context_dim=None, + use_linear=False, use_checkpoint=False, use_flash_attention=False, ): @@ -346,7 +347,10 @@ def __init__( inner_dim = n_heads * d_head self.norm = Normalize(in_channels) - self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + if not use_linear: + self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + else: + self.proj_in = nn.Linear(in_channels, inner_dim) self.transformer_blocks = nn.ModuleList( [ @@ -363,18 +367,28 @@ def __init__( ] ) - self.proj_out = zero_module(nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)) + if not use_linear: + self.proj_out = zero_module(nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)) + else: + self.proj_out = zero_module(nn.Linear(in_channels, inner_dim)) + self.use_linear = use_linear def forward(self, x, context=None): # note: if no context is given, cross-attention defaults to self-attention b, c, h, w = x.shape x_in = x x = self.norm(x) - x = self.proj_in(x) + if not self.use_linear: + x = self.proj_in(x) x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c x = x.contiguous() # workaround for dynamo ddp bug + if self.use_linear: + x = self.proj_in(x) for block in self.transformer_blocks: x = block(x, context=context) + if self.use_linear: + x = self.proj_out(x) x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w - x = self.proj_out(x) + if not self.use_linear: + x = self.proj_out(x) return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 2bd026a9e0ed..aab1fd3dbc50 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -465,6 +465,7 @@ def __init__( context_dim=None, # custom transformer support n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model legacy=True, + use_linear_in_transformer=False, from_pretrained: str = None, from_NeMo=False, # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF @@ -562,6 +563,7 @@ def __init__( dim_head, depth=transformer_depth, context_dim=context_dim, + use_linear=use_linear_in_transformer, use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention, ) @@ -623,6 +625,7 @@ def __init__( dim_head, depth=transformer_depth, context_dim=context_dim, + use_linear=use_linear_in_transformer, use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention, ), @@ -677,6 +680,7 @@ def __init__( dim_head, depth=transformer_depth, context_dim=context_dim, + use_linear=use_linear_in_transformer, use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention, ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 1c81723fdda9..02fbef2b0f0f 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -380,6 +380,7 @@ def build_tokenizer(self, cfg): ) _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) + self.max_length = cfg.text.get("max_position_embeddings") def load_model(self, cfg, state_dict): padded_vocab_size = self._vocab_size_with_padding( From 7931051f500504cad441514047c9f386b53c50f8 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Tue, 27 Jun 2023 12:34:17 -0700 Subject: [PATCH 138/512] Fix dreambooth conversion --- .../multimodal/models/multimodal_base_model.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index f495864bd855..fe41a74acd41 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -153,6 +153,11 @@ def load_from_checkpoint( cfg.unet_config.from_pretrained = None if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): cfg.first_stage_config.from_pretrained = None + ## Append some dummy configs that DB didn't support + if not cfg.get('channels_last'): + cfg.channels_last = True + if not cfg.get('capture_cudagraph_iters'): + cfg.capture_cudagraph_iters = -1 # compatibility for stable diffusion old checkpoint tweaks first_key = list(checkpoint['state_dict'].keys())[0] @@ -163,8 +168,11 @@ def load_from_checkpoint( new_key = "model." + key new_state_dict[new_key] = checkpoint['state_dict'][key] checkpoint['state_dict'] = new_state_dict - elif first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids': - # remap state keys from dreambooth + elif ( + first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' + or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' + ): + # remap state keys from dreambooth when using HF clip new_state_dict = {} for key in checkpoint['state_dict'].keys(): new_key = key.replace('._orig_mod', "") From bda5e7339f7e2d071bc4d18c3313d408ac8ebd03 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Thu, 27 Apr 2023 15:15:44 -0700 Subject: [PATCH 139/512] Clone model and config files for contronet from https://github.com/lllyasviel/ControlNet --- .../controlnet/conf/controlnet_v1-5.yaml | 79 ++++ .../models/ControlNet/controlnet.py | 417 ++++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml create mode 100644 nemo/collections/multimodal/models/ControlNet/controlnet.py diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml new file mode 100644 index 000000000000..fde1825577ac --- /dev/null +++ b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml @@ -0,0 +1,79 @@ +model: + target: cldm.cldm.ControlLDM + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + control_key: "hint" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + only_mid_control: False + + control_stage_config: + target: cldm.cldm.ControlNet + params: + image_size: 32 # unused + in_channels: 4 + hint_channels: 3 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + unet_config: + target: cldm.cldm.ControlledUnetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/nemo/collections/multimodal/models/ControlNet/controlnet.py b/nemo/collections/multimodal/models/ControlNet/controlnet.py new file mode 100644 index 000000000000..b2bf884bd666 --- /dev/null +++ b/nemo/collections/multimodal/models/ControlNet/controlnet.py @@ -0,0 +1,417 @@ +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import UNetModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm_legacy import LatentDiffusion + +class ControlledUnetModel(UNetModel): + def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs): + hs = [] + with torch.no_grad(): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + + if control is not None: + h += control.pop() + + for i, module in enumerate(self.output_blocks): + if only_mid_control or control is None: + h = torch.cat([h, hs.pop()], dim=1) + else: + h = torch.cat([h, hs.pop() + control.pop()], dim=1) + h = module(h, emb, context) + + h = h.type(x.dtype) + return self.out(h) + + + +class ControlLDM(LatentDiffusion): + + def __init__(self, control_stage_config, control_key, only_mid_control, *args, **kwargs): + super().__init__(*args, **kwargs) + self.control_model = instantiate_from_config(control_stage_config) + self.control_key = control_key + self.only_mid_control = only_mid_control + self.control_scales = [1.0] * 13 + + @torch.no_grad() + def get_input(self, batch, k, bs=None, *args, **kwargs): + x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) + control = batch[self.control_key] + if bs is not None: + control = control[:bs] + control = control.to(self.device) + control = einops.rearrange(control, 'b h w c -> b c h w') + control = control.to(memory_format=torch.contiguous_format).float() + return x, dict(c_crossattn=[c], c_concat=[control]) + + def apply_model(self, x_noisy, t, cond, *args, **kwargs): + assert isinstance(cond, dict) + diffusion_model = self.model.diffusion_model + + cond_txt = torch.cat(cond['c_crossattn'], 1) + + if cond['c_concat'] is None: + eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control) + else: + control = self.control_model(x=x_noisy, hint=torch.cat(cond['c_concat'], 1), timesteps=t, context=cond_txt) + control = [c * scale for c, scale in zip(control, self.control_scales)] + eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control) + + return eps + + @torch.no_grad() + def get_unconditional_conditioning(self, N): + return self.get_learned_conditioning([""] * N) + + @torch.no_grad() + def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta=0.0, return_keys=None, + quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, + plot_diffusion_rows=False, unconditional_guidance_scale=9.0, unconditional_guidance_label=None, + use_ema_scope=True, + **kwargs): + use_ddim = ddim_steps is not None + + log = dict() + z, c = self.get_input(batch, self.first_stage_key, bs=N) + c_cat, c = c["c_concat"][0][:N], c["c_crossattn"][0][:N] + N = min(z.shape[0], N) + n_row = min(z.shape[0], n_row) + log["reconstruction"] = self.decode_first_stage(z) + log["control"] = c_cat * 2.0 - 1.0 + log["conditioning"] = log_txt_as_img((512, 512), batch[self.cond_stage_key], size=16) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(z_start) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + samples, z_denoise_row = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]}, + batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if unconditional_guidance_scale > 1.0: + uc_cross = self.get_unconditional_conditioning(N) + uc_cat = c_cat # torch.zeros_like(c_cat) + uc_full = {"c_concat": [uc_cat], "c_crossattn": [uc_cross]} + samples_cfg, _ = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]}, + batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=uc_full, + ) + x_samples_cfg = self.decode_first_stage(samples_cfg) + log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg + + return log + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + ddim_sampler = DDIMSampler(self) + b, c, h, w = cond["c_concat"][0].shape + shape = (self.channels, h // 8, w // 8) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) + return samples, intermediates + + def configure_optimizers(self): + lr = self.learning_rate + params = list(self.control_model.parameters()) + if not self.sd_locked: + params += list(self.model.diffusion_model.output_blocks.parameters()) + params += list(self.model.diffusion_model.out.parameters()) + opt = torch.optim.AdamW(params, lr=lr) + return opt + + def low_vram_shift(self, is_diffusing): + if is_diffusing: + self.model = self.model.cuda() + self.control_model = self.control_model.cuda() + self.first_stage_model = self.first_stage_model.cpu() + self.cond_stage_model = self.cond_stage_model.cpu() + else: + self.model = self.model.cpu() + self.control_model = self.control_model.cpu() + self.first_stage_model = self.first_stage_model.cuda() + self.cond_stage_model = self.cond_stage_model.cuda() + +class ControlNet(nn.Module): + def __init__( + self, + image_size, + in_channels, + model_channels, + hint_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, ###TODO MMY these are new + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + ): + super().__init__() + if use_spatial_transformer: + assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + from omegaconf.listconfig import ListConfig + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.dims = dims + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + if isinstance(num_res_blocks, int): + self.num_res_blocks = len(channel_mult) * [num_res_blocks] + else: + if len(num_res_blocks) != len(channel_mult): + raise ValueError("provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult") + self.num_res_blocks = num_res_blocks + if disable_self_attentions is not None: + # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not + assert len(disable_self_attentions) == len(channel_mult) + if num_attention_blocks is not None: + assert len(num_attention_blocks) == len(self.num_res_blocks) + assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks)))) + print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " + f"This option has LESS priority than attention_resolutions {attention_resolutions}, " + f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " + f"attention will still not be set.") + + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) + + self.input_hint_block = TimestepEmbedSequential( + conv_nd(dims, hint_channels, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 32, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 32, 32, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 32, 96, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 96, 96, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 96, 256, 3, padding=1, stride=2), + nn.SiLU(), + zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)) + ) + + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for nr in range(self.num_res_blocks[level]): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( + ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, + disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self.zero_convs.append(self.make_zero_conv(ch)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + self.zero_convs.append(self.make_zero_conv(ch)) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( # always uses a self-attn + ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self.middle_block_out = self.make_zero_conv(ch) + self._feature_size += ch + + def make_zero_conv(self, channels): + return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) + + def forward(self, x, hint, timesteps, context, **kwargs): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + guided_hint = self.input_hint_block(hint, emb, context) + + outs = [] + + h = x.type(self.dtype) + for module, zero_conv in zip(self.input_blocks, self.zero_convs): + if guided_hint is not None: + h = module(h, emb, context) + h += guided_hint + guided_hint = None + else: + h = module(h, emb, context) + outs.append(zero_conv(h, emb, context)) + + h = self.middle_block(h, emb, context) + outs.append(self.middle_block_out(h, emb, context)) + + return outs From fe01b0ebe6e1768a601eaa1d3458d8630ab908ca Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 26 Jun 2023 15:51:42 -0700 Subject: [PATCH 140/512] Add conversion Support controlnet into internal/main --- examples/multimodal/convert_ckpt_to_nemo.py | 4 + .../controlnet/conf/controlnet_infer.yaml | 36 + .../controlnet/conf/controlnet_v1-5.yaml | 295 +++-- .../generative/controlnet/controlnet_infer.py | 242 ++++ .../generative/controlnet/train_controlnet.py | 77 ++ .../data/controlnet/controlnet_dataset.py | 102 ++ .../models/ControlNet/controlnet.py | 417 ------ .../multimodal/models/controlnet/__init__.py | 13 + .../models/controlnet/controlnet.py | 935 ++++++++++++++ .../models/controlnet/uniformer/LICENSE | 203 +++ .../models/controlnet/uniformer/__init__.py | 27 + .../configs/_base_/datasets/ade20k.py | 54 + .../configs/_base_/datasets/chase_db1.py | 59 + .../configs/_base_/datasets/cityscapes.py | 54 + .../_base_/datasets/cityscapes_769x769.py | 35 + .../configs/_base_/datasets/drive.py | 59 + .../uniformer/configs/_base_/datasets/hrf.py | 59 + .../configs/_base_/datasets/pascal_context.py | 60 + .../_base_/datasets/pascal_context_59.py | 60 + .../configs/_base_/datasets/pascal_voc12.py | 57 + .../_base_/datasets/pascal_voc12_aug.py | 9 + .../configs/_base_/datasets/stare.py | 59 + .../configs/_base_/default_runtime.py | 14 + .../configs/_base_/models/ann_r50-d8.py | 46 + .../configs/_base_/models/apcnet_r50-d8.py | 44 + .../configs/_base_/models/ccnet_r50-d8.py | 44 + .../uniformer/configs/_base_/models/cgnet.py | 35 + .../configs/_base_/models/danet_r50-d8.py | 44 + .../configs/_base_/models/deeplabv3_r50-d8.py | 44 + .../_base_/models/deeplabv3_unet_s5-d16.py | 50 + .../_base_/models/deeplabv3plus_r50-d8.py | 46 + .../configs/_base_/models/dmnet_r50-d8.py | 44 + .../configs/_base_/models/dnl_r50-d8.py | 46 + .../configs/_base_/models/emanet_r50-d8.py | 47 + .../configs/_base_/models/encnet_r50-d8.py | 48 + .../configs/_base_/models/fast_scnn.py | 57 + .../configs/_base_/models/fcn_hr18.py | 52 + .../configs/_base_/models/fcn_r50-d8.py | 45 + .../configs/_base_/models/fcn_unet_s5-d16.py | 51 + .../configs/_base_/models/fpn_r50.py | 36 + .../configs/_base_/models/fpn_uniformer.py | 35 + .../configs/_base_/models/gcnet_r50-d8.py | 46 + .../configs/_base_/models/lraspp_m-v3-d8.py | 25 + .../configs/_base_/models/nonlocal_r50-d8.py | 46 + .../configs/_base_/models/ocrnet_hr18.py | 68 + .../configs/_base_/models/ocrnet_r50-d8.py | 47 + .../configs/_base_/models/pointrend_r50.py | 56 + .../configs/_base_/models/psanet_r50-d8.py | 49 + .../configs/_base_/models/pspnet_r50-d8.py | 44 + .../_base_/models/pspnet_unet_s5-d16.py | 50 + .../configs/_base_/models/upernet_r50.py | 44 + .../_base_/models/upernet_uniformer.py | 43 + .../configs/_base_/schedules/schedule_160k.py | 9 + .../configs/_base_/schedules/schedule_20k.py | 9 + .../configs/_base_/schedules/schedule_40k.py | 9 + .../configs/_base_/schedules/schedule_80k.py | 9 + .../exp/upernet_global_small/config.py | 38 + .../uniformer/exp/upernet_global_small/run.sh | 10 + .../exp/upernet_global_small/test.sh | 10 + .../exp/upernet_global_small/test_config_g.py | 38 + .../upernet_global_small/test_config_h32.py | 39 + .../upernet_global_small/test_config_w32.py | 39 + .../controlnet/uniformer/mmcv/__init__.py | 15 + .../uniformer/mmcv/arraymisc/__init__.py | 4 + .../uniformer/mmcv/arraymisc/quantization.py | 55 + .../controlnet/uniformer/mmcv/cnn/__init__.py | 41 + .../controlnet/uniformer/mmcv/cnn/alexnet.py | 61 + .../uniformer/mmcv/cnn/bricks/__init__.py | 35 + .../uniformer/mmcv/cnn/bricks/activation.py | 92 ++ .../mmcv/cnn/bricks/context_block.py | 125 ++ .../uniformer/mmcv/cnn/bricks/conv.py | 44 + .../cnn/bricks/conv2d_adaptive_padding.py | 62 + .../uniformer/mmcv/cnn/bricks/conv_module.py | 206 +++ .../uniformer/mmcv/cnn/bricks/conv_ws.py | 148 +++ .../bricks/depthwise_separable_conv_module.py | 96 ++ .../uniformer/mmcv/cnn/bricks/drop.py | 65 + .../mmcv/cnn/bricks/generalized_attention.py | 412 ++++++ .../uniformer/mmcv/cnn/bricks/hsigmoid.py | 34 + .../uniformer/mmcv/cnn/bricks/hswish.py | 29 + .../uniformer/mmcv/cnn/bricks/non_local.py | 306 +++++ .../uniformer/mmcv/cnn/bricks/norm.py | 144 +++ .../uniformer/mmcv/cnn/bricks/padding.py | 36 + .../uniformer/mmcv/cnn/bricks/plugin.py | 88 ++ .../uniformer/mmcv/cnn/bricks/registry.py | 16 + .../uniformer/mmcv/cnn/bricks/scale.py | 21 + .../uniformer/mmcv/cnn/bricks/swish.py | 25 + .../uniformer/mmcv/cnn/bricks/transformer.py | 595 +++++++++ .../uniformer/mmcv/cnn/bricks/upsample.py | 84 ++ .../uniformer/mmcv/cnn/bricks/wrappers.py | 180 +++ .../controlnet/uniformer/mmcv/cnn/builder.py | 30 + .../controlnet/uniformer/mmcv/cnn/resnet.py | 316 +++++ .../uniformer/mmcv/cnn/utils/__init__.py | 19 + .../uniformer/mmcv/cnn/utils/flops_counter.py | 599 +++++++++ .../uniformer/mmcv/cnn/utils/fuse_conv_bn.py | 59 + .../uniformer/mmcv/cnn/utils/sync_bn.py | 59 + .../uniformer/mmcv/cnn/utils/weight_init.py | 684 ++++++++++ .../controlnet/uniformer/mmcv/cnn/vgg.py | 175 +++ .../uniformer/mmcv/engine/__init__.py | 8 + .../controlnet/uniformer/mmcv/engine/test.py | 202 +++ .../uniformer/mmcv/fileio/__init__.py | 11 + .../uniformer/mmcv/fileio/file_client.py | 1148 +++++++++++++++++ .../mmcv/fileio/handlers/__init__.py | 7 + .../uniformer/mmcv/fileio/handlers/base.py | 30 + .../mmcv/fileio/handlers/json_handler.py | 36 + .../mmcv/fileio/handlers/pickle_handler.py | 28 + .../mmcv/fileio/handlers/yaml_handler.py | 24 + .../controlnet/uniformer/mmcv/fileio/io.py | 151 +++ .../controlnet/uniformer/mmcv/fileio/parse.py | 97 ++ .../uniformer/mmcv/image/__init__.py | 28 + .../uniformer/mmcv/image/colorspace.py | 306 +++++ .../uniformer/mmcv/image/geometric.py | 728 +++++++++++ .../controlnet/uniformer/mmcv/image/io.py | 258 ++++ .../controlnet/uniformer/mmcv/image/misc.py | 44 + .../uniformer/mmcv/image/photometric.py | 428 ++++++ .../uniformer/mmcv/model_zoo/deprecated.json | 6 + .../uniformer/mmcv/model_zoo/mmcls.json | 31 + .../uniformer/mmcv/model_zoo/open_mmlab.json | 50 + .../controlnet/uniformer/mmcv/ops/__init__.py | 81 ++ .../uniformer/mmcv/ops/assign_score_withk.py | 123 ++ .../uniformer/mmcv/ops/ball_query.py | 55 + .../controlnet/uniformer/mmcv/ops/bbox.py | 72 ++ .../uniformer/mmcv/ops/border_align.py | 109 ++ .../uniformer/mmcv/ops/box_iou_rotated.py | 45 + .../controlnet/uniformer/mmcv/ops/carafe.py | 287 +++++ .../uniformer/mmcv/ops/cc_attention.py | 83 ++ .../uniformer/mmcv/ops/contour_expand.py | 49 + .../uniformer/mmcv/ops/corner_pool.py | 161 +++ .../uniformer/mmcv/ops/correlation.py | 196 +++ .../uniformer/mmcv/ops/deform_conv.py | 405 ++++++ .../uniformer/mmcv/ops/deform_roi_pool.py | 204 +++ .../uniformer/mmcv/ops/deprecated_wrappers.py | 43 + .../uniformer/mmcv/ops/focal_loss.py | 212 +++ .../mmcv/ops/furthest_point_sample.py | 83 ++ .../mmcv/ops/fused_bias_leakyrelu.py | 268 ++++ .../uniformer/mmcv/ops/gather_points.py | 57 + .../uniformer/mmcv/ops/group_points.py | 224 ++++ .../controlnet/uniformer/mmcv/ops/info.py | 36 + .../controlnet/uniformer/mmcv/ops/iou3d.py | 85 ++ .../controlnet/uniformer/mmcv/ops/knn.py | 77 ++ .../uniformer/mmcv/ops/masked_conv.py | 111 ++ .../uniformer/mmcv/ops/merge_cells.py | 149 +++ .../mmcv/ops/modulated_deform_conv.py | 282 ++++ .../mmcv/ops/multi_scale_deform_attn.py | 358 +++++ .../controlnet/uniformer/mmcv/ops/nms.py | 417 ++++++ .../uniformer/mmcv/ops/pixel_group.py | 75 ++ .../uniformer/mmcv/ops/point_sample.py | 336 +++++ .../uniformer/mmcv/ops/points_in_boxes.py | 133 ++ .../uniformer/mmcv/ops/points_sampler.py | 177 +++ .../controlnet/uniformer/mmcv/ops/psa_mask.py | 92 ++ .../uniformer/mmcv/ops/roi_align.py | 223 ++++ .../uniformer/mmcv/ops/roi_align_rotated.py | 177 +++ .../controlnet/uniformer/mmcv/ops/roi_pool.py | 86 ++ .../uniformer/mmcv/ops/roiaware_pool3d.py | 114 ++ .../uniformer/mmcv/ops/roipoint_pool3d.py | 77 ++ .../controlnet/uniformer/mmcv/ops/saconv.py | 145 +++ .../uniformer/mmcv/ops/scatter_points.py | 135 ++ .../controlnet/uniformer/mmcv/ops/sync_bn.py | 279 ++++ .../uniformer/mmcv/ops/three_interpolate.py | 68 + .../controlnet/uniformer/mmcv/ops/three_nn.py | 51 + .../uniformer/mmcv/ops/tin_shift.py | 68 + .../uniformer/mmcv/ops/upfirdn2d.py | 330 +++++ .../controlnet/uniformer/mmcv/ops/voxelize.py | 132 ++ .../uniformer/mmcv/parallel/__init__.py | 13 + .../uniformer/mmcv/parallel/_functions.py | 79 ++ .../uniformer/mmcv/parallel/collate.py | 84 ++ .../uniformer/mmcv/parallel/data_container.py | 89 ++ .../uniformer/mmcv/parallel/data_parallel.py | 89 ++ .../uniformer/mmcv/parallel/distributed.py | 112 ++ .../mmcv/parallel/distributed_deprecated.py | 70 + .../uniformer/mmcv/parallel/registry.py | 8 + .../uniformer/mmcv/parallel/scatter_gather.py | 59 + .../uniformer/mmcv/parallel/utils.py | 20 + .../uniformer/mmcv/runner/__init__.py | 47 + .../uniformer/mmcv/runner/base_module.py | 195 +++ .../uniformer/mmcv/runner/base_runner.py | 542 ++++++++ .../uniformer/mmcv/runner/builder.py | 24 + .../uniformer/mmcv/runner/checkpoint.py | 707 ++++++++++ .../mmcv/runner/default_constructor.py | 44 + .../uniformer/mmcv/runner/dist_utils.py | 164 +++ .../mmcv/runner/epoch_based_runner.py | 187 +++ .../uniformer/mmcv/runner/fp16_utils.py | 410 ++++++ .../uniformer/mmcv/runner/hooks/__init__.py | 29 + .../uniformer/mmcv/runner/hooks/checkpoint.py | 167 +++ .../uniformer/mmcv/runner/hooks/closure.py | 11 + .../uniformer/mmcv/runner/hooks/ema.py | 89 ++ .../uniformer/mmcv/runner/hooks/evaluation.py | 509 ++++++++ .../uniformer/mmcv/runner/hooks/hook.py | 92 ++ .../uniformer/mmcv/runner/hooks/iter_timer.py | 18 + .../mmcv/runner/hooks/logger/__init__.py | 15 + .../mmcv/runner/hooks/logger/base.py | 166 +++ .../mmcv/runner/hooks/logger/dvclive.py | 58 + .../mmcv/runner/hooks/logger/mlflow.py | 78 ++ .../mmcv/runner/hooks/logger/neptune.py | 82 ++ .../mmcv/runner/hooks/logger/pavi.py | 117 ++ .../mmcv/runner/hooks/logger/tensorboard.py | 57 + .../mmcv/runner/hooks/logger/text.py | 256 ++++ .../mmcv/runner/hooks/logger/wandb.py | 56 + .../uniformer/mmcv/runner/hooks/lr_updater.py | 670 ++++++++++ .../uniformer/mmcv/runner/hooks/memory.py | 25 + .../mmcv/runner/hooks/momentum_updater.py | 493 +++++++ .../uniformer/mmcv/runner/hooks/optimizer.py | 508 ++++++++ .../uniformer/mmcv/runner/hooks/profiler.py | 180 +++ .../mmcv/runner/hooks/sampler_seed.py | 20 + .../mmcv/runner/hooks/sync_buffer.py | 22 + .../mmcv/runner/iter_based_runner.py | 273 ++++ .../uniformer/mmcv/runner/log_buffer.py | 41 + .../mmcv/runner/optimizer/__init__.py | 9 + .../mmcv/runner/optimizer/builder.py | 44 + .../runner/optimizer/default_constructor.py | 249 ++++ .../uniformer/mmcv/runner/priority.py | 60 + .../controlnet/uniformer/mmcv/runner/utils.py | 93 ++ .../uniformer/mmcv/utils/__init__.py | 69 + .../controlnet/uniformer/mmcv/utils/config.py | 688 ++++++++++ .../controlnet/uniformer/mmcv/utils/env.py | 95 ++ .../uniformer/mmcv/utils/ext_loader.py | 71 + .../uniformer/mmcv/utils/logging.py | 110 ++ .../controlnet/uniformer/mmcv/utils/misc.py | 377 ++++++ .../uniformer/mmcv/utils/parrots_jit.py | 41 + .../uniformer/mmcv/utils/parrots_wrapper.py | 107 ++ .../controlnet/uniformer/mmcv/utils/path.py | 101 ++ .../uniformer/mmcv/utils/progressbar.py | 208 +++ .../uniformer/mmcv/utils/registry.py | 315 +++++ .../uniformer/mmcv/utils/testing.py | 140 ++ .../controlnet/uniformer/mmcv/utils/timer.py | 118 ++ .../controlnet/uniformer/mmcv/utils/trace.py | 23 + .../uniformer/mmcv/utils/version_utils.py | 90 ++ .../controlnet/uniformer/mmcv/version.py | 35 + .../uniformer/mmcv/video/__init__.py | 11 + .../controlnet/uniformer/mmcv/video/io.py | 318 +++++ .../uniformer/mmcv/video/optflow.py | 254 ++++ .../uniformer/mmcv/video/processing.py | 160 +++ .../uniformer/mmcv/visualization/__init__.py | 9 + .../uniformer/mmcv/visualization/color.py | 51 + .../uniformer/mmcv/visualization/image.py | 152 +++ .../uniformer/mmcv/visualization/optflow.py | 112 ++ .../uniformer/mmcv_custom/__init__.py | 5 + .../uniformer/mmcv_custom/checkpoint.py | 500 +++++++ .../uniformer/mmseg/apis/__init__.py | 9 + .../uniformer/mmseg/apis/inference.py | 136 ++ .../controlnet/uniformer/mmseg/apis/test.py | 238 ++++ .../controlnet/uniformer/mmseg/apis/train.py | 116 ++ .../uniformer/mmseg/core/__init__.py | 3 + .../mmseg/core/evaluation/__init__.py | 8 + .../mmseg/core/evaluation/class_names.py | 152 +++ .../mmseg/core/evaluation/eval_hooks.py | 109 ++ .../mmseg/core/evaluation/metrics.py | 326 +++++ .../uniformer/mmseg/core/seg/__init__.py | 4 + .../uniformer/mmseg/core/seg/builder.py | 8 + .../mmseg/core/seg/sampler/__init__.py | 4 + .../core/seg/sampler/base_pixel_sampler.py | 12 + .../core/seg/sampler/ohem_pixel_sampler.py | 76 ++ .../uniformer/mmseg/core/utils/__init__.py | 3 + .../uniformer/mmseg/core/utils/misc.py | 17 + .../uniformer/mmseg/datasets/__init__.py | 19 + .../uniformer/mmseg/datasets/ade.py | 84 ++ .../uniformer/mmseg/datasets/builder.py | 169 +++ .../uniformer/mmseg/datasets/chase_db1.py | 27 + .../uniformer/mmseg/datasets/cityscapes.py | 217 ++++ .../uniformer/mmseg/datasets/custom.py | 400 ++++++ .../mmseg/datasets/dataset_wrappers.py | 50 + .../uniformer/mmseg/datasets/drive.py | 27 + .../uniformer/mmseg/datasets/hrf.py | 27 + .../mmseg/datasets/pascal_context.py | 103 ++ .../mmseg/datasets/pipelines/__init__.py | 16 + .../mmseg/datasets/pipelines/compose.py | 51 + .../mmseg/datasets/pipelines/formating.py | 288 +++++ .../mmseg/datasets/pipelines/loading.py | 153 +++ .../mmseg/datasets/pipelines/test_time_aug.py | 133 ++ .../mmseg/datasets/pipelines/transforms.py | 889 +++++++++++++ .../uniformer/mmseg/datasets/stare.py | 27 + .../uniformer/mmseg/datasets/voc.py | 29 + .../uniformer/mmseg/models/__init__.py | 12 + .../mmseg/models/backbones/__init__.py | 17 + .../uniformer/mmseg/models/backbones/cgnet.py | 367 ++++++ .../mmseg/models/backbones/fast_scnn.py | 375 ++++++ .../uniformer/mmseg/models/backbones/hrnet.py | 555 ++++++++ .../mmseg/models/backbones/mobilenet_v2.py | 180 +++ .../mmseg/models/backbones/mobilenet_v3.py | 255 ++++ .../mmseg/models/backbones/resnest.py | 314 +++++ .../mmseg/models/backbones/resnet.py | 688 ++++++++++ .../mmseg/models/backbones/resnext.py | 145 +++ .../uniformer/mmseg/models/backbones/unet.py | 429 ++++++ .../mmseg/models/backbones/uniformer.py | 422 ++++++ .../uniformer/mmseg/models/backbones/vit.py | 459 +++++++ .../uniformer/mmseg/models/builder.py | 46 + .../mmseg/models/decode_heads/__init__.py | 28 + .../mmseg/models/decode_heads/ann_head.py | 245 ++++ .../mmseg/models/decode_heads/apc_head.py | 158 +++ .../mmseg/models/decode_heads/aspp_head.py | 107 ++ .../decode_heads/cascade_decode_head.py | 57 + .../mmseg/models/decode_heads/cc_head.py | 42 + .../mmseg/models/decode_heads/da_head.py | 178 +++ .../mmseg/models/decode_heads/decode_head.py | 234 ++++ .../mmseg/models/decode_heads/dm_head.py | 140 ++ .../mmseg/models/decode_heads/dnl_head.py | 131 ++ .../mmseg/models/decode_heads/ema_head.py | 168 +++ .../mmseg/models/decode_heads/enc_head.py | 187 +++ .../mmseg/models/decode_heads/fcn_head.py | 81 ++ .../mmseg/models/decode_heads/fpn_head.py | 68 + .../mmseg/models/decode_heads/gc_head.py | 47 + .../mmseg/models/decode_heads/lraspp_head.py | 90 ++ .../mmseg/models/decode_heads/nl_head.py | 49 + .../mmseg/models/decode_heads/ocr_head.py | 127 ++ .../mmseg/models/decode_heads/point_head.py | 349 +++++ .../mmseg/models/decode_heads/psa_head.py | 196 +++ .../mmseg/models/decode_heads/psp_head.py | 101 ++ .../models/decode_heads/sep_aspp_head.py | 101 ++ .../mmseg/models/decode_heads/sep_fcn_head.py | 51 + .../mmseg/models/decode_heads/uper_head.py | 126 ++ .../uniformer/mmseg/models/losses/__init__.py | 12 + .../uniformer/mmseg/models/losses/accuracy.py | 78 ++ .../mmseg/models/losses/cross_entropy_loss.py | 198 +++ .../mmseg/models/losses/dice_loss.py | 119 ++ .../mmseg/models/losses/lovasz_loss.py | 303 +++++ .../uniformer/mmseg/models/losses/utils.py | 121 ++ .../uniformer/mmseg/models/necks/__init__.py | 4 + .../uniformer/mmseg/models/necks/fpn.py | 212 +++ .../mmseg/models/necks/multilevel_neck.py | 70 + .../mmseg/models/segmentors/__init__.py | 5 + .../uniformer/mmseg/models/segmentors/base.py | 273 ++++ .../segmentors/cascade_encoder_decoder.py | 98 ++ .../models/segmentors/encoder_decoder.py | 298 +++++ .../uniformer/mmseg/models/utils/__init__.py | 13 + .../uniformer/mmseg/models/utils/drop.py | 31 + .../mmseg/models/utils/inverted_residual.py | 208 +++ .../mmseg/models/utils/make_divisible.py | 27 + .../uniformer/mmseg/models/utils/res_layer.py | 94 ++ .../uniformer/mmseg/models/utils/se_layer.py | 57 + .../models/utils/self_attention_block.py | 159 +++ .../mmseg/models/utils/up_conv_block.py | 101 ++ .../mmseg/models/utils/weight_init.py | 62 + .../uniformer/mmseg/ops/__init__.py | 4 + .../uniformer/mmseg/ops/encoding.py | 74 ++ .../uniformer/mmseg/ops/wrappers.py | 50 + .../uniformer/mmseg/utils/__init__.py | 4 + .../uniformer/mmseg/utils/collect_env.py | 17 + .../uniformer/mmseg/utils/logger.py | 27 + .../multimodal/models/controlnet/util.py | 87 ++ .../models/multimodal_base_model.py | 8 + .../stable_diffusion/samplers/base_sampler.py | 9 +- .../modules/stable_diffusion/attention.py | 24 +- .../diffusionmodules/openaimodel.py | 14 + .../parts/stable_diffusion/utils.py | 3 +- nemo/collections/multimodal/parts/utils.py | 3 + 344 files changed, 45228 insertions(+), 507 deletions(-) create mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml create mode 100644 examples/multimodal/generative/controlnet/controlnet_infer.py create mode 100644 examples/multimodal/generative/controlnet/train_controlnet.py create mode 100644 nemo/collections/multimodal/data/controlnet/controlnet_dataset.py delete mode 100644 nemo/collections/multimodal/models/ControlNet/controlnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/controlnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/LICENSE create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py create mode 100644 nemo/collections/multimodal/models/controlnet/util.py diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index 393617deed7d..508682166cec 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -37,6 +37,7 @@ from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank @@ -159,6 +160,9 @@ def convert(local_rank, rank, world_size, args): ) elif args.model_type == 'imagen': model = MegatronImagen.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) + elif args.model_type == 'controlnet': + model = MegatronControlNet.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, + trainer=trainer) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml new file mode 100644 index 000000000000..0012e272aac4 --- /dev/null +++ b/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml @@ -0,0 +1,36 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 3 + num_images_per_prompt: 4 + hint_image_size: 512 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'DDIM' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'controlnet' + seed: 355 + prompts: + - high quality picture of a house in oil painting style + control: + - /datasets/coco-stuff/house.png #images/val2017/000000001584.jpg + # Depending on the input control, if the input control is already the conditioning image, null should be passed here + # If a reconstruction target is used as control, then preprocessing function that turns it into a conditioning image needs to be specified + control_image_preprocess: seg2img + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: /ckpts/controlnet/30k.nemo + precision: ${trainer.precision} + strength: 2.0 + guess_mode: False \ No newline at end of file diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml index fde1825577ac..f6e802653657 100644 --- a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml +++ b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml @@ -1,79 +1,218 @@ +trainer: + devices: 2 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: True + max_epochs: 3 # PTL default. In practice, max_steps will be reached first. + max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: controlnet + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: controlnet + name: controlnet-v1.5 + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + save_top_k: -1 + every_n_train_steps: 5000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + + + model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 8 + + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions + control_key: hint + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + learning_rate: 1.0e-04 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + only_mid_control: False + sd_locked: True + + control_stage_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet + params: + from_pretrained_unet: /ckpts/v1-5-pruned.ckpt + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + hint_channels: 3 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + use_linear_in_transformer: False + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + unet_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel + from_pretrained: /ckpts/v1-5-pruned.ckpt + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + data: + num_workers: 16 + train: + dataset_path: + #- /datasets/tarfiles/fill50k.pkl + - /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 0 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + image_logger: + batch_frequency: 1000 + max_images: 4 + + #miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) diff --git a/examples/multimodal/generative/controlnet/controlnet_infer.py b/examples/multimodal/generative/controlnet/controlnet_infer.py new file mode 100644 index 000000000000..de0360adecce --- /dev/null +++ b/examples/multimodal/generative/controlnet/controlnet_infer.py @@ -0,0 +1,242 @@ +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import cv2 +import os +import time +import einops + +from PIL import Image +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler +from nemo.collections.multimodal.models.controlnet.util import get_preprocessing_function + +def get_control_input(image_path, batch_size, hint_image_size, control_image_preprocess=None): + image = cv2.imread(image_path) + if control_image_preprocess: + # More applications can be supported here + process = get_preprocessing_function(control_image_preprocess) + image = process(image) + image = cv2.resize(image, (hint_image_size, hint_image_size)) + control = torch.from_numpy(image).float() / 255.0 + control = torch.stack([control for _ in range(batch_size)], dim=0) + control = einops.rearrange(control, 'b h w c -> b c h w') + return control + +def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): + c = cond_stage_model.encode(batch_size * [prompt]) + if unconditional_guidance_scale != 1.: + uc = cond_stage_model.encode(batch_size * [""]) + else: + uc = None + return c, uc + + +def initialize_sampler(model, sampler_type): + if sampler_type == 'DDIM': + sampler = DDIMSampler(model) + elif sampler_type == 'PLMS': + sampler = PLMSSampler(model) + else: + raise ValueError(f'Sampler {sampler_type} is not supported for {cls.__name__}') + return sampler + + +def decode_images(model, samples): + images = model.decode_first_stage(samples) + + images = torch.clamp((images + 1.) / 2., min=0., max=1.) + + return images + +def torch_to_numpy(images): + numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] + return numpy_images + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def pipeline(model, cfg, rng=None, verbose=True): + # setup default values for inference configs + unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + batch_size = cfg.infer.get('num_images_per_prompt', 1) + prompts = cfg.infer.get('prompts', []) + control = cfg.infer.get('control', []) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + sampler_type = cfg.infer.get('sampler_type', 'DDIM') + inference_steps = cfg.infer.get('inference_steps', 50) + output_type = cfg.infer.get('output_type', 'pil') + save_to_file = cfg.infer.get('save_to_file', True) + out_path = cfg.infer.get('out_path', '') + eta = cfg.infer.get('eta', 0) + guess_mode = cfg.model.get('guess_mode', False) + hint_image_size = cfg.infer.get('hint_image_size', 512) + control_image_preprocess = cfg.infer.get('control_image_preprocess', None) + + # get autocast_dtype + if cfg.trainer.precision == 'bf16': + autocast_dtype = torch.bfloat16 + elif int(cfg.trainer.precision) == 32: + autocast_dtype = torch.float + elif int(cfg.trainer.precision) == 16: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), + dtype=autocast_dtype, ): + + in_channels = model.model.diffusion_model.in_channels + + + sampler = initialize_sampler(model, sampler_type.upper()) + + output = [] + throughput = [] + + if isinstance(prompts, str): + prompts = [prompts] + + + assert (len(prompts) == len(control)) + + + for control, prompt in zip(control,prompts): + tic = time.perf_counter() + tic_total = tic + txt_cond, txt_u_cond = encode_prompt(model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size) + + control = get_control_input(control, batch_size, hint_image_size, control_image_preprocess).to(torch.cuda.current_device(), dtype=autocast_dtype) + + + + cond = {"c_concat": control, "c_crossattn": txt_cond} + u_cond = {"c_concat": None if guess_mode else control,"c_crossattn": txt_u_cond} + + toc = time.perf_counter() + conditioning_time = toc - tic + + latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] + latents = torch.randn( + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], + generator=rng).to( + torch.cuda.current_device()) + + + tic = time.perf_counter() + samples, intermediates = sampler.sample( + S=inference_steps, + conditioning=cond, + batch_size=batch_size, + shape=latent_shape, + verbose=False, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=u_cond, + eta=eta, + x_T=latents + ) + toc = time.perf_counter() + sampling_time = toc - tic + + tic = time.perf_counter() + images = decode_images(model, samples) + toc = time.perf_counter() + decode_time = toc - tic + + toc_total = time.perf_counter() + total_time = toc_total - tic_total + output.append(images) + + throughput.append({ + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + }) + + # Convert output type and save to disk + if output_type == 'torch': + output = torch.cat(output, dim=0) + else: + output = torch_to_numpy(output) + if output_type == 'pil': + output = [numpy_to_pil(x) for x in output] + + if save_to_file: + os.makedirs(out_path, exist_ok=True) + # Saving control map + control_image = control[0].float().cpu().permute(1,2,0).numpy() + control_image = Image.fromarray((control_image * 255).round().astype("uint8")) + control_image.save(os.path.join(out_path, f'{prompt[:50]}_control.png')) + if output_type == 'pil': + for text_prompt, pils in zip(prompts, output): + for idx, image in enumerate(pils): + image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) + else: + with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: + pickle.dump(output, f) + else: + return output + + ave_metrics = {} + for key in throughput[0].keys(): + ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) + if verbose: + print(ave_metrics) + + + +@hydra_runner(config_path='conf', config_name='controlnet_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.control_stage_config.from_pretrained_unet = None + model_cfg.channels_last = True + model_cfg.capture_cudagraph_iters = -1 + + torch.backends.cuda.matmul.allow_tf32 = True + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronControlNet, + cfg=cfg, + model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + guess_mode = cfg.model.guess_mode + model.contol_scales = [cfg.model.strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([cfg.model.strength] * 13) + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/multimodal/generative/controlnet/train_controlnet.py b/examples/multimodal/generative/controlnet/train_controlnet.py new file mode 100644 index 000000000000..e12ccdd48467 --- /dev/null +++ b/examples/multimodal/generative/controlnet/train_controlnet.py @@ -0,0 +1,77 @@ +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.core.config import hydra_runner +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config +from datetime import timedelta +from pytorch_lightning.strategies.ddp import DDPStrategy +import torch +import pytorch_lightning as pl +from nemo.utils.exp_manager import StatelessTimer, exp_manager +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) + + + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) + +from nemo.collections.multimodal.models.controlnet.util import ImageLogger +from pytorch_lightning.plugins.environments import TorchElasticEnvironment + + +@hydra_runner(config_path='conf', config_name='controlnet_v1-5.yaml') +def main(cfg): + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins=[] + callbacks = [] + + + # Tune for DDP + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + # dataset, _ = build_train_valid_datasets(cfg.model, 0) + + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 65536.0), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + + if cfg.model.get('image_logger', None): + callbacks.append(ImageLogger(**cfg.model.image_logger)) + + trainer = pl.Trainer(**cfg.trainer, + plugins=plugins, + callbacks=callbacks, + strategy=strategy) + + exp_manager(trainer, cfg.get("exp_manager", None)) + + model = MegatronControlNet(cfg.model, trainer) + trainer.fit(model) + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py new file mode 100644 index 000000000000..8b5a349b03de --- /dev/null +++ b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py @@ -0,0 +1,102 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) + + +def build_train_valid_datasets( + model_cfg, + consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0].permute(1, 2, 0) + out_dict['captions'] = input[1] + out_dict['hint'] = input[2].permute(1, 2, 0) + yield out_dict + + def transform_fn(sample): + + image, text, hint = sample["jpg"], sample["txt"], sample["png"] + # TODO : If no agumentations just return the image ? + img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) + text_transform = identical_transform + return img_transform(image), text_transform(text), img_transform(hint) + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data + + +def build_train_valid_precached_datasets( + model_cfg, + consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) + out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) + yield out_dict + + def transform_fn(sample): + return sample['pickle'] + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data diff --git a/nemo/collections/multimodal/models/ControlNet/controlnet.py b/nemo/collections/multimodal/models/ControlNet/controlnet.py deleted file mode 100644 index b2bf884bd666..000000000000 --- a/nemo/collections/multimodal/models/ControlNet/controlnet.py +++ /dev/null @@ -1,417 +0,0 @@ -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import UNetModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm_legacy import LatentDiffusion - -class ControlledUnetModel(UNetModel): - def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs): - hs = [] - with torch.no_grad(): - t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) - emb = self.time_embed(t_emb) - h = x.type(self.dtype) - for module in self.input_blocks: - h = module(h, emb, context) - hs.append(h) - h = self.middle_block(h, emb, context) - - if control is not None: - h += control.pop() - - for i, module in enumerate(self.output_blocks): - if only_mid_control or control is None: - h = torch.cat([h, hs.pop()], dim=1) - else: - h = torch.cat([h, hs.pop() + control.pop()], dim=1) - h = module(h, emb, context) - - h = h.type(x.dtype) - return self.out(h) - - - -class ControlLDM(LatentDiffusion): - - def __init__(self, control_stage_config, control_key, only_mid_control, *args, **kwargs): - super().__init__(*args, **kwargs) - self.control_model = instantiate_from_config(control_stage_config) - self.control_key = control_key - self.only_mid_control = only_mid_control - self.control_scales = [1.0] * 13 - - @torch.no_grad() - def get_input(self, batch, k, bs=None, *args, **kwargs): - x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) - control = batch[self.control_key] - if bs is not None: - control = control[:bs] - control = control.to(self.device) - control = einops.rearrange(control, 'b h w c -> b c h w') - control = control.to(memory_format=torch.contiguous_format).float() - return x, dict(c_crossattn=[c], c_concat=[control]) - - def apply_model(self, x_noisy, t, cond, *args, **kwargs): - assert isinstance(cond, dict) - diffusion_model = self.model.diffusion_model - - cond_txt = torch.cat(cond['c_crossattn'], 1) - - if cond['c_concat'] is None: - eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control) - else: - control = self.control_model(x=x_noisy, hint=torch.cat(cond['c_concat'], 1), timesteps=t, context=cond_txt) - control = [c * scale for c, scale in zip(control, self.control_scales)] - eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control) - - return eps - - @torch.no_grad() - def get_unconditional_conditioning(self, N): - return self.get_learned_conditioning([""] * N) - - @torch.no_grad() - def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta=0.0, return_keys=None, - quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, - plot_diffusion_rows=False, unconditional_guidance_scale=9.0, unconditional_guidance_label=None, - use_ema_scope=True, - **kwargs): - use_ddim = ddim_steps is not None - - log = dict() - z, c = self.get_input(batch, self.first_stage_key, bs=N) - c_cat, c = c["c_concat"][0][:N], c["c_crossattn"][0][:N] - N = min(z.shape[0], N) - n_row = min(z.shape[0], n_row) - log["reconstruction"] = self.decode_first_stage(z) - log["control"] = c_cat * 2.0 - 1.0 - log["conditioning"] = log_txt_as_img((512, 512), batch[self.cond_stage_key], size=16) - - if plot_diffusion_rows: - # get diffusion row - diffusion_row = list() - z_start = z[:n_row] - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.to(self.device).long() - noise = torch.randn_like(z_start) - z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) - diffusion_row.append(self.decode_first_stage(z_noisy)) - - diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W - diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') - diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') - diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) - log["diffusion_row"] = diffusion_grid - - if sample: - # get denoise row - samples, z_denoise_row = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]}, - batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta) - x_samples = self.decode_first_stage(samples) - log["samples"] = x_samples - if plot_denoise_rows: - denoise_grid = self._get_denoise_row_from_list(z_denoise_row) - log["denoise_row"] = denoise_grid - - if unconditional_guidance_scale > 1.0: - uc_cross = self.get_unconditional_conditioning(N) - uc_cat = c_cat # torch.zeros_like(c_cat) - uc_full = {"c_concat": [uc_cat], "c_crossattn": [uc_cross]} - samples_cfg, _ = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]}, - batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=uc_full, - ) - x_samples_cfg = self.decode_first_stage(samples_cfg) - log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg - - return log - - @torch.no_grad() - def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): - ddim_sampler = DDIMSampler(self) - b, c, h, w = cond["c_concat"][0].shape - shape = (self.channels, h // 8, w // 8) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) - return samples, intermediates - - def configure_optimizers(self): - lr = self.learning_rate - params = list(self.control_model.parameters()) - if not self.sd_locked: - params += list(self.model.diffusion_model.output_blocks.parameters()) - params += list(self.model.diffusion_model.out.parameters()) - opt = torch.optim.AdamW(params, lr=lr) - return opt - - def low_vram_shift(self, is_diffusing): - if is_diffusing: - self.model = self.model.cuda() - self.control_model = self.control_model.cuda() - self.first_stage_model = self.first_stage_model.cpu() - self.cond_stage_model = self.cond_stage_model.cpu() - else: - self.model = self.model.cpu() - self.control_model = self.control_model.cpu() - self.first_stage_model = self.first_stage_model.cuda() - self.cond_stage_model = self.cond_stage_model.cuda() - -class ControlNet(nn.Module): - def __init__( - self, - image_size, - in_channels, - model_channels, - hint_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - use_checkpoint=False, - use_fp16=False, - num_heads=-1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - use_spatial_transformer=False, # custom transformer support - transformer_depth=1, # custom transformer support - context_dim=None, # custom transformer support - n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model - legacy=True, - disable_self_attentions=None, ###TODO MMY these are new - num_attention_blocks=None, - disable_middle_self_attn=False, - use_linear_in_transformer=False, - ): - super().__init__() - if use_spatial_transformer: - assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' - - if context_dim is not None: - assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' - from omegaconf.listconfig import ListConfig - if type(context_dim) == ListConfig: - context_dim = list(context_dim) - - if num_heads_upsample == -1: - num_heads_upsample = num_heads - - if num_heads == -1: - assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' - - if num_head_channels == -1: - assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' - - self.dims = dims - self.image_size = image_size - self.in_channels = in_channels - self.model_channels = model_channels - if isinstance(num_res_blocks, int): - self.num_res_blocks = len(channel_mult) * [num_res_blocks] - else: - if len(num_res_blocks) != len(channel_mult): - raise ValueError("provide num_res_blocks either as an int (globally constant) or " - "as a list/tuple (per-level) with the same length as channel_mult") - self.num_res_blocks = num_res_blocks - if disable_self_attentions is not None: - # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not - assert len(disable_self_attentions) == len(channel_mult) - if num_attention_blocks is not None: - assert len(num_attention_blocks) == len(self.num_res_blocks) - assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks)))) - print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " - f"This option has LESS priority than attention_resolutions {attention_resolutions}, " - f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " - f"attention will still not be set.") - - self.attention_resolutions = attention_resolutions - self.dropout = dropout - self.channel_mult = channel_mult - self.conv_resample = conv_resample - self.use_checkpoint = use_checkpoint - self.dtype = th.float16 if use_fp16 else th.float32 - self.num_heads = num_heads - self.num_head_channels = num_head_channels - self.num_heads_upsample = num_heads_upsample - self.predict_codebook_ids = n_embed is not None - - time_embed_dim = model_channels * 4 - self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), - nn.SiLU(), - linear(time_embed_dim, time_embed_dim), - ) - - self.input_blocks = nn.ModuleList( - [ - TimestepEmbedSequential( - conv_nd(dims, in_channels, model_channels, 3, padding=1) - ) - ] - ) - self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) - - self.input_hint_block = TimestepEmbedSequential( - conv_nd(dims, hint_channels, 16, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 16, 16, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 16, 32, 3, padding=1, stride=2), - nn.SiLU(), - conv_nd(dims, 32, 32, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 32, 96, 3, padding=1, stride=2), - nn.SiLU(), - conv_nd(dims, 96, 96, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 96, 256, 3, padding=1, stride=2), - nn.SiLU(), - zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)) - ) - - self._feature_size = model_channels - input_block_chans = [model_channels] - ch = model_channels - ds = 1 - for level, mult in enumerate(channel_mult): - for nr in range(self.num_res_blocks[level]): - layers = [ - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=mult * model_channels, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ) - ] - ch = mult * model_channels - if ds in attention_resolutions: - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - if exists(disable_self_attentions): - disabled_sa = disable_self_attentions[level] - else: - disabled_sa = False - - if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: - layers.append( - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( - ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, - disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint - ) - ) - self.input_blocks.append(TimestepEmbedSequential(*layers)) - self.zero_convs.append(self.make_zero_conv(ch)) - self._feature_size += ch - input_block_chans.append(ch) - if level != len(channel_mult) - 1: - out_ch = ch - self.input_blocks.append( - TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=out_ch, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - down=True, - ) - if resblock_updown - else Downsample( - ch, conv_resample, dims=dims, out_channels=out_ch - ) - ) - ) - ch = out_ch - input_block_chans.append(ch) - self.zero_convs.append(self.make_zero_conv(ch)) - ds *= 2 - self._feature_size += ch - - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - self.middle_block = TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( # always uses a self-attn - ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, - disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint - ), - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - ) - self.middle_block_out = self.make_zero_conv(ch) - self._feature_size += ch - - def make_zero_conv(self, channels): - return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) - - def forward(self, x, hint, timesteps, context, **kwargs): - t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) - emb = self.time_embed(t_emb) - - guided_hint = self.input_hint_block(hint, emb, context) - - outs = [] - - h = x.type(self.dtype) - for module, zero_conv in zip(self.input_blocks, self.zero_convs): - if guided_hint is not None: - h = module(h, emb, context) - h += guided_hint - guided_hint = None - else: - h = module(h, emb, context) - outs.append(zero_conv(h, emb, context)) - - h = self.middle_block(h, emb, context) - outs.append(self.middle_block_out(h, emb, context)) - - return outs diff --git a/nemo/collections/multimodal/models/controlnet/__init__.py b/nemo/collections/multimodal/models/controlnet/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py new file mode 100644 index 000000000000..11b8a0026cc8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -0,0 +1,935 @@ +import torch +import torch.nn as nn +import einops +from omegaconf import DictConfig, OmegaConf, open_dict +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from pytorch_lightning import Trainer +from pytorch_lightning.utilities.distributed import rank_zero_only +from typing import Any, Dict, Optional, Union +from nemo.utils import logging + +from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + conv_nd, + linear, + zero_module, + timestep_embedding, +) +from einops import rearrange, repeat +from torchvision.utils import make_grid +from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample, AttentionBlock +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.utils import exists, log_txt_as_img +from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets + +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler + + +try: + from apex import amp + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class ControlledUnetModel(UNetModel): + def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs): + hs = [] + with torch.no_grad(): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + h = x.type(emb.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + + if control is not None: + h += control.pop() + + for i, module in enumerate(self.output_blocks): + if only_mid_control or control is None: + h = torch.cat([h, hs.pop()], dim=1) + else: + h = torch.cat([h, hs.pop() + control.pop()], dim=1) + h = module(h, emb, context) + + h = h.type(x.dtype) + return self.out(h) + + + +class ControlLDM(LatentDiffusion): + def __init__(self, cfg): + super().__init__(cfg=cfg) + self.control_model = ControlLDM.from_config_dict(cfg.control_stage_config) + self.control_key = cfg.control_key + self.only_mid_control = cfg.only_mid_control + self.control_scales = [1.0] * 13 + self.sd_locked = cfg.sd_locked + self.channels_last = cfg.channels_last + + if cfg.get("inductor", False): + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) + self.control_model = optimize("inductor")(self.control_model) + + if self.channels_last: + self.control_model = self.control_model.to(memory_format=torch.channels_last) + + + @torch.no_grad() + def get_input(self, batch, k, bs=None, *args, **kwargs): + x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) + control = batch[self.control_key] + if bs is not None: + control = control[:bs] + control = control.to(torch.cuda.current_device()) + if self.channels_last: + control = control.permute(0, 3, 1, 2).to(non_blocking=True) + else: + control = einops.rearrange(control, 'b h w c -> b c h w') + control = control.to(memory_format=torch.contiguous_format).float() + return x, dict(c_crossattn=c, c_concat=control) + + def apply_model(self, x_noisy, t, cond, *args, **kwargs): + assert isinstance(cond, dict) + diffusion_model = self.model.diffusion_model + + #cond_txt = torch.cat(cond['c_crossattn'], 1) ## Has removed this first dim in the get_input function, same for below hint input + cond_txt = cond['c_crossattn'] + + + if cond['c_concat'] is None: + eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control) + else: + control = self.control_model(x=x_noisy, hint=cond['c_concat'], timesteps=t, context=cond_txt) + control = [c * scale for c, scale in zip(control, self.control_scales)] + eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control) + return eps + + @torch.no_grad() + def get_unconditional_conditioning(self, N): + return self.get_learned_conditioning([""] * N) + + @torch.no_grad() + def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta=0.0, return_keys=None, + quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, + plot_diffusion_rows=False, unconditional_guidance_scale=9.0, unconditional_guidance_label=None, + use_ema_scope=True, + **kwargs): + use_ddim = ddim_steps is not None + + log = dict() + batch = next(batch) + z, c = self.get_input(batch, self.first_stage_key, bs=N) + c_cat, c = c["c_concat"][:N], c["c_crossattn"][:N] + N = min(z.shape[0], N) + n_row = min(z.shape[0], n_row) + log["reconstruction"] = self.decode_first_stage(z) + log["control"] = c_cat * 2.0 - 1.0 + log["conditioning"] = log_txt_as_img((512, 512), batch[self.cond_stage_key], size=16) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(z_start) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + samples, z_denoise_row = self.sample_log(cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if unconditional_guidance_scale > 1.0: + uc_cross = self.get_unconditional_conditioning(N) + uc_cat = c_cat # torch.zeros_like(c_cat) + uc_full = {"c_concat": uc_cat, "c_crossattn": uc_cross} + samples_cfg, _ = self.sample_log(cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, ddim=use_ddim, + ddim_steps=ddim_steps, eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=uc_full, + ) + x_samples_cfg = self.decode_first_stage(samples_cfg) + log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg + + return log + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + ddim_sampler = DDIMSampler(self) + c, h, w = cond["c_concat"][0].shape + shape = (self.channels, h // 8, w // 8) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) + return samples, intermediates + + + def parameters(self): + params = list(self.control_model.parameters()) + if not self.sd_locked: + params += list(self.model.diffusion_model.output_blocks.parameters()) + params += list(self.model.diffusion_model.out.parameters()) + return params + + + def low_vram_shift(self, is_diffusing): + if is_diffusing: + self.model = self.model.cuda() + self.control_model = self.control_model.cuda() + self.first_stage_model = self.first_stage_model.cpu() + self.cond_stage_model = self.cond_stage_model.cpu() + else: + self.model = self.model.cpu() + self.control_model = self.control_model.cpu() + self.first_stage_model = self.first_stage_model.cuda() + self.cond_stage_model = self.cond_stage_model.cuda() + +class ControlNet(nn.Module): + def __init__( + self, + image_size, + in_channels, + model_channels, + hint_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, ###TODO MMY these are new + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + use_flash_attention=False, + from_pretrained_unet=None, + from_NeMo=True + ): + super().__init__() + if use_spatial_transformer: + assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + from omegaconf.listconfig import ListConfig + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.dims = dims + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + if isinstance(num_res_blocks, int): + self.num_res_blocks = len(channel_mult) * [num_res_blocks] + else: + if len(num_res_blocks) != len(channel_mult): + raise ValueError("provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult") + self.num_res_blocks = num_res_blocks + if disable_self_attentions is not None: + # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not + assert len(disable_self_attentions) == len(channel_mult) + if num_attention_blocks is not None: + assert len(num_attention_blocks) == len(self.num_res_blocks) + assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks)))) + print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " + f"This option has LESS priority than attention_resolutions {attention_resolutions}, " + f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " + f"attention will still not be set.") + + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = torch.float16 if use_fp16 else torch.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) + + self.input_hint_block = TimestepEmbedSequential( + conv_nd(dims, hint_channels, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 32, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 32, 32, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 32, 96, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 96, 96, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 96, 256, 3, padding=1, stride=2), + nn.SiLU(), + zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)) + ) + + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for nr in range(self.num_res_blocks[level]): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( + ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, + disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self.zero_convs.append(self.make_zero_conv(ch)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + self.zero_convs.append(self.make_zero_conv(ch)) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) if not use_spatial_transformer else SpatialTransformer( # always uses a self-attn + ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self.middle_block_out = self.make_zero_conv(ch) + self._feature_size += ch + + if from_pretrained_unet is not None: + self.load_from_unet(from_pretrained_unet=from_pretrained_unet, from_NeMo=from_NeMo) + + def load_from_unet(self, from_pretrained_unet, from_NeMo=True): + if not from_NeMo: + print('loading from other source of unet is experimental! Carefully check if keys are loaded correctly.') + else: + print("Loading unet blocks from sd") + + state_dict = torch.load(from_pretrained_unet, map_location='cpu') + state_dict = state_dict['state_dict'] + model_state_dict = self.state_dict() + + re_state_dict = {} + for key_, value_ in state_dict.items(): + if key_.startswith('model.model.diffusion_model'): + re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ + if key_.startswith('model.diffusion_model'): + re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ + if key_.startswith('model.model._orig_mod.diffusion_model'): + re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model._orig_mod.diffusion_model'): + re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ + + expected_keys = list(model_state_dict.keys()) + loaded_keys = list(re_state_dict.keys()) + missing_keys = list(set(expected_keys)-set(loaded_keys)) + unexpected_keys = list(set(loaded_keys)-set(expected_keys)) + + + + if 'input_blocks.1.0.in_layers.2.weight' in loaded_keys and 'input_blocks.1.0.in_layers.1.weight' in expected_keys: + # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + for key_ in missing_keys: + if key_.startswith('input_blocks') or key_.startswith('middle_block.'): + s = key_.split('.') + idx = int(s[-2]) + new_key_ = ".".join(s[:-2] + [str(int(idx+1))] + [s[-1]]) + re_state_dict[key_] = re_state_dict[new_key_] + + loaded_keys = list(re_state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + + self.load_state_dict(re_state_dict, strict=False) + + if len(missing_keys) > 42: + print('warning: only input hint blocks and zero conv layers are randomly initialized. This message indicates some unet blocks are not loaded correctly.') + print(f'There is {len(missing_keys)} total missing keys') + print("Missing:", missing_keys) + print("Unexpected:", unexpected_keys) + else: + print("sd blocks loaded successfully") + + # Check if unet blocks are loaded + # for key, value in self.state_dict().items(): + # if key in missing_keys: + # continue + # if torch.allclose(value, re_state_dict[key], atol = 1e-5): + # pass + # else: + # print(f"{key} not matching after loading") + + + + + + def make_zero_conv(self, channels): + return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) + + def forward(self, x, hint, timesteps, context, **kwargs): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + guided_hint = self.input_hint_block(hint, emb, context) + outs = [] + + h = x.type(self.dtype) + for module, zero_conv in zip(self.input_blocks, self.zero_convs): + if guided_hint is not None: + h = module(h, emb, context) + h += guided_hint + guided_hint = None + else: + h = module(h, emb, context) + outs.append(zero_conv(h, emb, context)) + + h = self.middle_block(h, emb, context) + outs.append(self.middle_block_out(h, emb, context)) + + return outs + + +class MegatronControlNet(MegatronMultimodalModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + self.conditioning_keys = [] + + if self.trainer.precision == 'bf16': + self.autocast_dtype = torch.bfloat16 + elif int(self.trainer.precision) == 32: + self.autocast_dtype = torch.float + elif int(self.trainer.precision) == 16: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = ControlLDM(cfg=self.cfg) + return model + + def forward(self, x, c, *args, **kwargs): + output_tensor = self.model(x, c, *args, **kwargs) + return output_tensor + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: + assert self.cfg.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = \ + batch[self.cfg.first_stage_key].cuda(non_blocking=True) + self.model.on_train_batch_start(batch, batch_idx) + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + tensor_shape = None + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + # we prepare the micro batches for the apex fwd/bwd function + + fwd_bwd_function = get_forward_backward_func() + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=False, + tensor_shape=tensor_shape, # required by pipeline parallelism + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, + logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size = 1 + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) + if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): + # in the case of precached text embeddings, cond_stage is also a tensor + batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) + + # SD has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), + dtype=self.autocast_dtype, + ): + x, c = self.model.get_input(batch, self.cfg.first_stage_key) + + if not isinstance(c, dict): + return [x, c] + + if len(self.conditioning_keys) == 0: + self.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in self.conditioning_keys] + return [x, *c_list] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + if len(self.conditioning_keys) == 0: + x, c = batch + else: + x = batch[0] + c = {} + for idx, key in enumerate(self.conditioning_keys): + c[key] = batch[1 + idx] + loss, loss_dict = model(x, c) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + tensor_shape = None # Placeholder + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=True, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + # only the last stages of the pipeline return losses + val_loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + val_loss_dict[key] = loss_tensor.mean() + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Stable Diffusion...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + if self.cfg.first_stage_key.endswith("encoded"): + self._train_ds, self._validation_ds = build_train_valid_precached_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0), + ) + else: + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0) + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, pin_memory=True, + ) + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def log_images(self, *args, **kwargs): + return self.model.log_images(*args, **kwargs) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE b/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE new file mode 100644 index 000000000000..c38dc639e6e2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE @@ -0,0 +1,203 @@ +Copyright 2022 SenseTime X-Lab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 SenseTime X-Lab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py new file mode 100644 index 000000000000..f904ec642f93 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py @@ -0,0 +1,27 @@ +# Uniformer +# From https://github.com/Sense-X/UniFormer +# # Apache-2.0 license + +import os + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core.evaluation import get_palette + + +checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" + + +class UniformerDetector: + def __init__(self): + annotator_ckpts_path = '/opt/NeMo/nemo/collections/multimodal/models/controlnet/uniformer' + modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") + if not os.path.exists(modelpath): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) + config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py") + self.model = init_segmentor(config_file, modelpath).cuda() + + def __call__(self, img): + result = inference_segmentor(self.model, img) + res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1) + return res_img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py new file mode 100644 index 000000000000..efc8b4bb20c9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py new file mode 100644 index 000000000000..298594ea925f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py new file mode 100644 index 000000000000..f21867c63e18 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 000000000000..336c7b254fe3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py new file mode 100644 index 000000000000..06e8ff606e0d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py new file mode 100644 index 000000000000..242d790eb1b8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py new file mode 100644 index 000000000000..ff65bad1b86d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 000000000000..37585abab898 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 000000000000..ba1d42d0c578 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 000000000000..3f23b6717d53 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,9 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=[ + 'ImageSets/Segmentation/train.txt', + 'ImageSets/Segmentation/aug.txt' + ])) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py new file mode 100644 index 000000000000..3f71b25488cc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py new file mode 100644 index 000000000000..b564cc4e7e7d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py @@ -0,0 +1,14 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 000000000000..a2cb653827e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 000000000000..c8f5316cbcf3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 000000000000..794148f576b9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py new file mode 100644 index 000000000000..eff8d9458c87 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 000000000000..2c934939fac4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 000000000000..d7a43bee0142 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 000000000000..0cd262999d8b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 000000000000..050e39e091d8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 000000000000..d22ba52640be --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 000000000000..edb4c174c51e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 000000000000..26adcd430926 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 000000000000..be777123a886 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py new file mode 100644 index 000000000000..32fdeb659355 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py new file mode 100644 index 000000000000..c3e299bc89ad --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 000000000000..5e98f6cc918b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 000000000000..a33e7972877f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py new file mode 100644 index 000000000000..86ab327db92e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py new file mode 100644 index 000000000000..8aae98c59910 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole') +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 000000000000..3d2ad69f5c22 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 000000000000..93258242a906 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,25 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 000000000000..5674a39854ca --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 000000000000..c60f62a7cdf3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 000000000000..615aa3ff7039 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py new file mode 100644 index 000000000000..9d323dbf9466 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 000000000000..689513fa9d2a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 000000000000..f451e08ad2eb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 000000000000..fcff9ec4f41f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py new file mode 100644 index 000000000000..10974962fdd7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py new file mode 100644 index 000000000000..41aa4db809dc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py @@ -0,0 +1,43 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 000000000000..52603890b10f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 000000000000..bf780a1b6f65 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 000000000000..cdbf841abcb2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 000000000000..c190cee6bdc7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py new file mode 100644 index 000000000000..01db96bf9b0b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh new file mode 100644 index 000000000000..9fb22edfa7a3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh new file mode 100644 index 000000000000..d9a85e7a0d3b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/test.py ${work_path}/test_config_h32.py \ + ${work_path}/ckpt/latest.pth \ + --launcher pytorch \ + --eval mIoU \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py new file mode 100644 index 000000000000..e43737a98a3b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py new file mode 100644 index 000000000000..a31e3874f76f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=True, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py new file mode 100644 index 000000000000..3d9e06f029e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=True, + hybrid=False, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py new file mode 100644 index 000000000000..210a29891383 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +from .arraymisc import * +from .fileio import * +from .image import * +from .utils import * +from .version import * +from .video import * +from .visualization import * + +# The following modules are not imported to this level, so mmcv may be used +# without PyTorch. +# - runner +# - parallel +# - op diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py new file mode 100644 index 000000000000..4b4700d6139a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .quantization import dequantize, quantize + +__all__ = ['quantize', 'dequantize'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py new file mode 100644 index 000000000000..8e47a3545780 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + + +def quantize(arr, min_val, max_val, levels, dtype=np.int64): + """Quantize an array of (-inf, inf) to [0, levels-1]. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the quantized array. + + Returns: + tuple: Quantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + arr = np.clip(arr, min_val, max_val) - min_val + quantized_arr = np.minimum( + np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + + return quantized_arr + + +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): + """Dequantize an array. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the dequantized array. + + Returns: + tuple: Dequantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - + min_val) / levels + min_val + + return dequantized_arr diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py new file mode 100644 index 000000000000..7246c897430f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .alexnet import AlexNet +# yapf: disable +from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, + ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, + ConvTranspose2d, ConvTranspose3d, ConvWS2d, + DepthwiseSeparableConvModule, GeneralizedAttention, + HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, + NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, + build_activation_layer, build_conv_layer, + build_norm_layer, build_padding_layer, build_plugin_layer, + build_upsample_layer, conv_ws_2d, is_norm) +from .builder import MODELS, build_model_from_cfg +# yapf: enable +from .resnet import ResNet, make_res_layer +from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, + NormalInit, PretrainedInit, TruncNormalInit, UniformInit, + XavierInit, bias_init_with_prob, caffe2_xavier_init, + constant_init, fuse_conv_bn, get_model_complexity_info, + initialize, kaiming_init, normal_init, trunc_normal_init, + uniform_init, xavier_init) +from .vgg import VGG, make_vgg_layer + +__all__ = [ + 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', + 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', + 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', + 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', + 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', + 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', + 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', + 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', + 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', + 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', + 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py new file mode 100644 index 000000000000..89e36b8c7851 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + + +class AlexNet(nn.Module): + """AlexNet backbone. + + Args: + num_classes (int): number of classes for classification. + """ + + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + # use default initializer + pass + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + + x = self.features(x) + if self.num_classes > 0: + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py new file mode 100644 index 000000000000..0f33124ed23f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .activation import build_activation_layer +from .context_block import ContextBlock +from .conv import build_conv_layer +from .conv2d_adaptive_padding import Conv2dAdaptivePadding +from .conv_module import ConvModule +from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d +from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .drop import Dropout, DropPath +from .generalized_attention import GeneralizedAttention +from .hsigmoid import HSigmoid +from .hswish import HSwish +from .non_local import NonLocal1d, NonLocal2d, NonLocal3d +from .norm import build_norm_layer, is_norm +from .padding import build_padding_layer +from .plugin import build_plugin_layer +from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .scale import Scale +from .swish import Swish +from .upsample import build_upsample_layer +from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, + Linear, MaxPool2d, MaxPool3d) + +__all__ = [ + 'ConvModule', 'build_activation_layer', 'build_conv_layer', + 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', + 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', + 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', + 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', + 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', + 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py new file mode 100644 index 000000000000..f59de07583f2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version +from .registry import ACTIVATION_LAYERS + +for module in [ + nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, + nn.Sigmoid, nn.Tanh +]: + ACTIVATION_LAYERS.register_module(module=module) + + +@ACTIVATION_LAYERS.register_module(name='Clip') +@ACTIVATION_LAYERS.register_module() +class Clamp(nn.Module): + """Clamp activation layer. + + This activation function is to clamp the feature map value within + :math:`[min, max]`. More details can be found in ``torch.clamp()``. + + Args: + min (Number | optional): Lower-bound of the range to be clamped to. + Default to -1. + max (Number | optional): Upper-bound of the range to be clamped to. + Default to 1. + """ + + def __init__(self, min=-1., max=1.): + super(Clamp, self).__init__() + self.min = min + self.max = max + + def forward(self, x): + """Forward function. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: Clamped tensor. + """ + return torch.clamp(x, min=self.min, max=self.max) + + +class GELU(nn.Module): + r"""Applies the Gaussian Error Linear Units function: + + .. math:: + \text{GELU}(x) = x * \Phi(x) + where :math:`\Phi(x)` is the Cumulative Distribution Function for + Gaussian Distribution. + + Shape: + - Input: :math:`(N, *)` where `*` means, any number of additional + dimensions + - Output: :math:`(N, *)`, same shape as the input + + .. image:: scripts/activation_images/GELU.png + + Examples:: + + >>> m = nn.GELU() + >>> input = torch.randn(2) + >>> output = m(input) + """ + + def forward(self, input): + return F.gelu(input) + + +if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.4')): + ACTIVATION_LAYERS.register_module(module=GELU) +else: + ACTIVATION_LAYERS.register_module(module=nn.GELU) + + +def build_activation_layer(cfg): + """Build activation layer. + + Args: + cfg (dict): The activation layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + + Returns: + nn.Module: Created activation layer. + """ + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py new file mode 100644 index 000000000000..d60fdb904c74 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py @@ -0,0 +1,125 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn + +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + + +def last_zero_init(m): + if isinstance(m, nn.Sequential): + constant_init(m[-1], val=0) + else: + constant_init(m, val=0) + + +@PLUGIN_LAYERS.register_module() +class ContextBlock(nn.Module): + """ContextBlock module in GCNet. + + See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + (https://arxiv.org/abs/1904.11492) for details. + + Args: + in_channels (int): Channels of the input feature map. + ratio (float): Ratio of channels of transform bottleneck + pooling_type (str): Pooling method for context modeling. + Options are 'att' and 'avg', stand for attention pooling and + average pooling respectively. Default: 'att'. + fusion_types (Sequence[str]): Fusion method for feature fusion, + Options are 'channels_add', 'channel_mul', stand for channelwise + addition and multiplication respectively. Default: ('channel_add',) + """ + + _abbr_ = 'context_block' + + def __init__(self, + in_channels, + ratio, + pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + valid_fusion_types = ['channel_add', 'channel_mul'] + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + self.in_channels = in_channels + self.ratio = ratio + self.planes = int(in_channels * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_mul_conv = None + self.reset_parameters() + + def reset_parameters(self): + if self.pooling_type == 'att': + kaiming_init(self.conv_mask, mode='fan_in') + self.conv_mask.inited = True + + if self.channel_add_conv is not None: + last_zero_init(self.channel_add_conv) + if self.channel_mul_conv is not None: + last_zero_init(self.channel_mul_conv) + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py new file mode 100644 index 000000000000..cf54491997a4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn + +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) + + +def build_conv_layer(cfg, *args, **kwargs): + """Build convolution layer. + + Args: + cfg (None or dict): The conv layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an conv layer. + args (argument list): Arguments passed to the `__init__` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the `__init__` + method of the corresponding conv layer. + + Returns: + nn.Module: Created conv layer. + """ + if cfg is None: + cfg_ = dict(type='Conv2d') + else: + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) + + layer = conv_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py new file mode 100644 index 000000000000..b45e758ac6cf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from torch import nn +from torch.nn import functional as F + +from .registry import CONV_LAYERS + + +@CONV_LAYERS.register_module() +class Conv2dAdaptivePadding(nn.Conv2d): + """Implementation of 2D convolution in tensorflow with `padding` as "same", + which applies padding to input (if needed) so that input image gets fully + covered by filter and stride you specified. For stride 1, this will ensure + that output image size is same as input. For stride of 2, output dimensions + will be half, for example. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, + dilation, groups, bias) + + def forward(self, x): + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + stride_h, stride_w = self.stride + output_h = math.ceil(img_h / stride_h) + output_w = math.ceil(img_w / stride_w) + pad_h = ( + max((output_h - 1) * self.stride[0] + + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) + pad_w = ( + max((output_w - 1) * self.stride[1] + + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py new file mode 100644 index 000000000000..a585314151bd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py @@ -0,0 +1,206 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm +from ..utils import constant_init, kaiming_init +from .activation import build_activation_layer +from .conv import build_conv_layer +from .norm import build_norm_layer +from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class ConvModule(nn.Module): + """A conv block that bundles conv/norm/activation layers. + + This block simplifies the usage of convolution layers, which are commonly + used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). + It is based upon three build methods: `build_conv_layer()`, + `build_norm_layer()` and `build_activation_layer()`. + + Besides, we add some additional features in this module. + 1. Automatically set `bias` of the conv layer. + 2. Spectral norm is supported. + 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only + supports zero and circular padding, and we add "reflect" padding mode. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. + groups (int): Number of blocked connections from input channels to + output channels. Same as that in ``nn._ConvNd``. + bias (bool | str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise + False. Default: "auto". + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + inplace (bool): Whether to use inplace mode for activation. + Default: True. + with_spectral_norm (bool): Whether use spectral norm in conv module. + Default: False. + padding_mode (str): If the `padding_mode` has not been supported by + current `Conv2d` in PyTorch, we will use our own padding layer + instead. Currently, we support ['zeros', 'circular'] with official + implementation and ['reflect'] with our own implementation. + Default: 'zeros'. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Default: ('conv', 'norm', 'act'). + """ + + _abbr_ = 'conv_block' + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act')): + super(ConvModule, self).__init__() + assert conv_cfg is None or isinstance(conv_cfg, dict) + assert norm_cfg is None or isinstance(norm_cfg, dict) + assert act_cfg is None or isinstance(act_cfg, dict) + official_padding_mode = ['zeros', 'circular'] + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.inplace = inplace + self.with_spectral_norm = with_spectral_norm + self.with_explicit_padding = padding_mode not in official_padding_mode + self.order = order + assert isinstance(self.order, tuple) and len(self.order) == 3 + assert set(order) == set(['conv', 'norm', 'act']) + + self.with_norm = norm_cfg is not None + self.with_activation = act_cfg is not None + # if the conv layer is before a norm layer, bias is unnecessary. + if bias == 'auto': + bias = not self.with_norm + self.with_bias = bias + + if self.with_explicit_padding: + pad_cfg = dict(type=padding_mode) + self.padding_layer = build_padding_layer(pad_cfg, padding) + + # reset padding to 0 for conv module + conv_padding = 0 if self.with_explicit_padding else padding + # build convolution layer + self.conv = build_conv_layer( + conv_cfg, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=conv_padding, + dilation=dilation, + groups=groups, + bias=bias) + # export the attributes of self.conv to a higher level for convenience + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_spectral_norm: + self.conv = nn.utils.spectral_norm(self.conv) + + # build normalization layers + if self.with_norm: + # norm layer is after conv layer + if order.index('norm') > order.index('conv'): + norm_channels = out_channels + else: + norm_channels = in_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + self.add_module(self.norm_name, norm) + if self.with_bias: + if isinstance(norm, (_BatchNorm, _InstanceNorm)): + warnings.warn( + 'Unnecessary conv bias before batch/instance norm') + else: + self.norm_name = None + + # build activation layer + if self.with_activation: + act_cfg_ = act_cfg.copy() + # nn.Tanh has no 'inplace' argument + if act_cfg_['type'] not in [ + 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' + ]: + act_cfg_.setdefault('inplace', inplace) + self.activate = build_activation_layer(act_cfg_) + + # Use msra init by default + self.init_weights() + + @property + def norm(self): + if self.norm_name: + return getattr(self, self.norm_name) + else: + return None + + def init_weights(self): + # 1. It is mainly for customized conv layers with their own + # initialization manners by calling their own ``init_weights()``, + # and we do not want ConvModule to override the initialization. + # 2. For customized conv layers without their own initialization + # manners (that is, they don't have their own ``init_weights()``) + # and PyTorch's conv layers, they will be initialized by + # this method with default ``kaiming_init``. + # Note: For PyTorch's conv layers, they will be overwritten by our + # initialization implementation using default ``kaiming_init``. + if not hasattr(self.conv, 'init_weights'): + if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': + nonlinearity = 'leaky_relu' + a = self.act_cfg.get('negative_slope', 0.01) + else: + nonlinearity = 'relu' + a = 0 + kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) + if self.with_norm: + constant_init(self.norm, 1, bias=0) + + def forward(self, x, activate=True, norm=True): + for layer in self.order: + if layer == 'conv': + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + elif layer == 'norm' and norm and self.with_norm: + x = self.norm(x) + elif layer == 'act' and activate and self.with_activation: + x = self.activate(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py new file mode 100644 index 000000000000..a3941e278749 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .registry import CONV_LAYERS + + +def conv_ws_2d(input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + eps=1e-5): + c_in = weight.size(0) + weight_flat = weight.view(c_in, -1) + mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) + std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) + weight = (weight - mean) / (std + eps) + return F.conv2d(input, weight, bias, stride, padding, dilation, groups) + + +@CONV_LAYERS.register_module('ConvWS') +class ConvWS2d(nn.Conv2d): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + eps=1e-5): + super(ConvWS2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.eps = eps + + def forward(self, x): + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.eps) + + +@CONV_LAYERS.register_module(name='ConvAWS') +class ConvAWS2d(nn.Conv2d): + """AWS (Adaptive Weight Standardization) + + This is a variant of Weight Standardization + (https://arxiv.org/pdf/1903.10520.pdf) + It is used in DetectoRS to avoid NaN + (https://arxiv.org/pdf/2006.02334.pdf) + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the conv kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If set True, adds a learnable bias to the + output. Default: True + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.register_buffer('weight_gamma', + torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', + torch.zeros(self.out_channels, 1, 1, 1)) + + def _get_weight(self, weight): + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + weight = (weight - mean) / std + weight = self.weight_gamma * weight + self.weight_beta + return weight + + def forward(self, x): + weight = self._get_weight(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + """Override default load function. + + AWS overrides the function _load_from_state_dict to recover + weight_gamma and weight_beta if they are missing. If weight_gamma and + weight_beta are found in the checkpoint, this function will return + after super()._load_from_state_dict. Otherwise, it will compute the + mean and std of the pretrained weights and store them in weight_beta + and weight_gamma. + """ + + self.weight_gamma.data.fill_(-1) + local_missing_keys = [] + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, local_missing_keys, + unexpected_keys, error_msgs) + if self.weight_gamma.data.mean() > 0: + for k in local_missing_keys: + missing_keys.append(k) + return + weight = self.weight.data + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + self.weight_beta.data.copy_(mean) + self.weight_gamma.data.copy_(std) + missing_gamma_beta = [ + k for k in local_missing_keys + if k.endswith('weight_gamma') or k.endswith('weight_beta') + ] + for k in missing_gamma_beta: + local_missing_keys.remove(k) + for k in local_missing_keys: + missing_keys.append(k) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py new file mode 100644 index 000000000000..722d5d8d71f7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .conv_module import ConvModule + + +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. + + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. Default: 1. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. Default: 0. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs): + super(DepthwiseSeparableConvModule, self).__init__() + assert 'groups' not in kwargs, 'groups should not be specified' + + # if norm/activation config of depthwise/pointwise ConvModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg + dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg + pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg + + # depthwise convolution + self.depthwise_conv = ConvModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs) + + self.pointwise_conv = ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, + **kwargs) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py new file mode 100644 index 000000000000..4ae8a5a2534f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py @@ -0,0 +1,65 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import build_from_cfg +from .registry import DROPOUT_LAYERS + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + # handle tensors with different dimensions, not just 4D tensors. + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + output = x.div(keep_prob) * random_tensor.floor() + return output + + +@DROPOUT_LAYERS.register_module() +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + + Args: + drop_prob (float): Probability of the path to be zeroed. Default: 0.1 + """ + + def __init__(self, drop_prob=0.1): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +@DROPOUT_LAYERS.register_module() +class Dropout(nn.Dropout): + """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of + ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with + ``DropPath`` + + Args: + drop_prob (float): Probability of the elements to be + zeroed. Default: 0.5. + inplace (bool): Do the operation inplace or not. Default: False. + """ + + def __init__(self, drop_prob=0.5, inplace=False): + super().__init__(p=drop_prob, inplace=inplace) + + +def build_dropout(cfg, default_args=None): + """Builder for drop out layers.""" + return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py new file mode 100644 index 000000000000..988d9adf2f28 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py @@ -0,0 +1,412 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class GeneralizedAttention(nn.Module): + """GeneralizedAttention module. + + See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + (https://arxiv.org/abs/1711.07971) for details. + + Args: + in_channels (int): Channels of the input feature map. + spatial_range (int): The spatial range. -1 indicates no spatial range + constraint. Default: -1. + num_heads (int): The head number of empirical_attention module. + Default: 9. + position_embedding_dim (int): The position embedding dimension. + Default: -1. + position_magnitude (int): A multiplier acting on coord difference. + Default: 1. + kv_stride (int): The feature stride acting on key/value feature map. + Default: 2. + q_stride (int): The feature stride acting on query feature map. + Default: 1. + attention_type (str): A binary indicator string for indicating which + items in generalized empirical_attention module are used. + Default: '1111'. + + - '1000' indicates 'query and key content' (appr - appr) item, + - '0100' indicates 'query content and relative position' + (appr - position) item, + - '0010' indicates 'key content only' (bias - appr) item, + - '0001' indicates 'relative position only' (bias - position) item. + """ + + _abbr_ = 'gen_attention_block' + + def __init__(self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111'): + + super(GeneralizedAttention, self).__init__() + + # hard range means local range for non-local operation + self.position_embedding_dim = ( + position_embedding_dim + if position_embedding_dim > 0 else in_channels) + + self.position_magnitude = position_magnitude + self.num_heads = num_heads + self.in_channels = in_channels + self.spatial_range = spatial_range + self.kv_stride = kv_stride + self.q_stride = q_stride + self.attention_type = [bool(int(_)) for _ in attention_type] + self.qk_embed_dim = in_channels // num_heads + out_c = self.qk_embed_dim * num_heads + + if self.attention_type[0] or self.attention_type[1]: + self.query_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.query_conv.kaiming_init = True + + if self.attention_type[0] or self.attention_type[2]: + self.key_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.key_conv.kaiming_init = True + + self.v_dim = in_channels // num_heads + self.value_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=self.v_dim * num_heads, + kernel_size=1, + bias=False) + self.value_conv.kaiming_init = True + + if self.attention_type[1] or self.attention_type[3]: + self.appr_geom_fc_x = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x.kaiming_init = True + + self.appr_geom_fc_y = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y.kaiming_init = True + + if self.attention_type[2]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.appr_bias = nn.Parameter(appr_bias_value) + + if self.attention_type[3]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.geom_bias = nn.Parameter(geom_bias_value) + + self.proj_conv = nn.Conv2d( + in_channels=self.v_dim * num_heads, + out_channels=in_channels, + kernel_size=1, + bias=True) + self.proj_conv.kaiming_init = True + self.gamma = nn.Parameter(torch.zeros(1)) + + if self.spatial_range >= 0: + # only works when non local is after 3*3 conv + if in_channels == 256: + max_len = 84 + elif in_channels == 512: + max_len = 42 + + max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) + local_constraint_map = np.ones( + (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + for iy in range(max_len): + for ix in range(max_len): + local_constraint_map[ + iy, ix, + max((iy - self.spatial_range) // + self.kv_stride, 0):min((iy + self.spatial_range + + 1) // self.kv_stride + + 1, max_len), + max((ix - self.spatial_range) // + self.kv_stride, 0):min((ix + self.spatial_range + + 1) // self.kv_stride + + 1, max_len)] = 0 + + self.local_constraint_map = nn.Parameter( + torch.from_numpy(local_constraint_map).byte(), + requires_grad=False) + + if self.q_stride > 1: + self.q_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.q_stride) + else: + self.q_downsample = None + + if self.kv_stride > 1: + self.kv_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.kv_stride) + else: + self.kv_downsample = None + + self.init_weights() + + def get_position_embedding(self, + h, + w, + h_kv, + w_kv, + q_stride, + kv_stride, + device, + dtype, + feat_dim, + wave_length=1000): + # the default type of Tensor is float32, leading to type mismatch + # in fp16 mode. Cast it to support fp16 mode. + h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) + h_idxs = h_idxs.view((h, 1)) * q_stride + + w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) + w_idxs = w_idxs.view((w, 1)) * q_stride + + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to( + device=device, dtype=dtype) + h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride + + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to( + device=device, dtype=dtype) + w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride + + # (h, h_kv, 1) + h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) + h_diff *= self.position_magnitude + + # (w, w_kv, 1) + w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) + w_diff *= self.position_magnitude + + feat_range = torch.arange(0, feat_dim / 4).to( + device=device, dtype=dtype) + + dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) + dim_mat = dim_mat**((4. / feat_dim) * feat_range) + dim_mat = dim_mat.view((1, 1, -1)) + + embedding_x = torch.cat( + ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + + embedding_y = torch.cat( + ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + + return embedding_x, embedding_y + + def forward(self, x_input): + num_heads = self.num_heads + + # use empirical_attention + if self.q_downsample is not None: + x_q = self.q_downsample(x_input) + else: + x_q = x_input + n, _, h, w = x_q.shape + + if self.kv_downsample is not None: + x_kv = self.kv_downsample(x_input) + else: + x_kv = x_input + _, _, h_kv, w_kv = x_kv.shape + + if self.attention_type[0] or self.attention_type[1]: + proj_query = self.query_conv(x_q).view( + (n, num_heads, self.qk_embed_dim, h * w)) + proj_query = proj_query.permute(0, 1, 3, 2) + + if self.attention_type[0] or self.attention_type[2]: + proj_key = self.key_conv(x_kv).view( + (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + + if self.attention_type[1] or self.attention_type[3]: + position_embed_x, position_embed_y = self.get_position_embedding( + h, w, h_kv, w_kv, self.q_stride, self.kv_stride, + x_input.device, x_input.dtype, self.position_embedding_dim) + # (n, num_heads, w, w_kv, dim) + position_feat_x = self.appr_geom_fc_x(position_embed_x).\ + view(1, w, w_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + # (n, num_heads, h, h_kv, dim) + position_feat_y = self.appr_geom_fc_y(position_embed_y).\ + view(1, h, h_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + position_feat_x /= math.sqrt(2) + position_feat_y /= math.sqrt(2) + + # accelerate for saliency only + if (np.sum(self.attention_type) == 1) and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy = torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, h_kv * w_kv) + + h = 1 + w = 1 + else: + # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for + if not self.attention_type[0]: + energy = torch.zeros( + n, + num_heads, + h, + w, + h_kv, + w_kv, + dtype=x_input.dtype, + device=x_input.device) + + # attention_type[0]: appr - appr + # attention_type[1]: appr - position + # attention_type[2]: bias - appr + # attention_type[3]: bias - position + if self.attention_type[0] or self.attention_type[2]: + if self.attention_type[0] and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[0]: + energy = torch.matmul(proj_query, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy += torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, 1, h_kv, w_kv) + + if self.attention_type[1] or self.attention_type[3]: + if self.attention_type[1] and self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + + proj_query_reshape = (proj_query + geom_bias).\ + view(n, num_heads, h, w, self.qk_embed_dim) + + energy_x = torch.matmul( + proj_query_reshape.permute(0, 1, 3, 2, 4), + position_feat_x.permute(0, 1, 2, 4, 3)) + energy_x = energy_x.\ + permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul( + proj_query_reshape, + position_feat_y.permute(0, 1, 2, 4, 3)) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[1]: + proj_query_reshape = proj_query.\ + view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.\ + permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.\ + permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.\ + permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, + position_feat_x_reshape) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, + position_feat_y_reshape) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, self.qk_embed_dim, 1).\ + repeat(n, 1, 1, 1) + + position_feat_x_reshape = position_feat_x.\ + view(n, num_heads, w*w_kv, self.qk_embed_dim) + + position_feat_y_reshape = position_feat_y.\ + view(n, num_heads, h * h_kv, self.qk_embed_dim) + + energy_x = torch.matmul(position_feat_x_reshape, geom_bias) + energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) + + energy_y = torch.matmul(position_feat_y_reshape, geom_bias) + energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) + + energy += energy_x + energy_y + + energy = energy.view(n, num_heads, h * w, h_kv * w_kv) + + if self.spatial_range >= 0: + cur_local_constraint_map = \ + self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ + contiguous().\ + view(1, 1, h*w, h_kv*w_kv) + + energy = energy.masked_fill_(cur_local_constraint_map, + float('-inf')) + + attention = F.softmax(energy, 3) + + proj_value = self.value_conv(x_kv) + proj_value_reshape = proj_value.\ + view((n, num_heads, self.v_dim, h_kv * w_kv)).\ + permute(0, 1, 3, 2) + + out = torch.matmul(attention, proj_value_reshape).\ + permute(0, 1, 3, 2).\ + contiguous().\ + view(n, self.v_dim * self.num_heads, h, w) + + out = self.proj_conv(out) + + # output is downsampled, upsample back to input size + if self.q_downsample is not None: + out = F.interpolate( + out, + size=x_input.shape[2:], + mode='bilinear', + align_corners=False) + + out = self.gamma * out + x_input + return out + + def init_weights(self): + for m in self.modules(): + if hasattr(m, 'kaiming_init') and m.kaiming_init: + kaiming_init( + m, + mode='fan_in', + nonlinearity='leaky_relu', + bias=0, + distribution='uniform', + a=1) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py new file mode 100644 index 000000000000..30b1a3d6580c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py @@ -0,0 +1,34 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSigmoid(nn.Module): + """Hard Sigmoid Module. Apply the hard sigmoid function: + Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) + Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) + + Args: + bias (float): Bias of the input feature map. Default: 1.0. + divisor (float): Divisor of the input feature map. Default: 2.0. + min_value (float): Lower bound value. Default: 0.0. + max_value (float): Upper bound value. Default: 1.0. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): + super(HSigmoid, self).__init__() + self.bias = bias + self.divisor = divisor + assert self.divisor != 0 + self.min_value = min_value + self.max_value = max_value + + def forward(self, x): + x = (x + self.bias) / self.divisor + + return x.clamp_(self.min_value, self.max_value) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py new file mode 100644 index 000000000000..7e0c090ff037 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSwish(nn.Module): + """Hard Swish Module. + + This module applies the hard swish function: + + .. math:: + Hswish(x) = x * ReLU6(x + 3) / 6 + + Args: + inplace (bool): can optionally do the operation in-place. + Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, inplace=False): + super(HSwish, self).__init__() + self.act = nn.ReLU6(inplace) + + def forward(self, x): + return x * self.act(x + 3) / 6 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py new file mode 100644 index 000000000000..92d00155ef27 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta + +import torch +import torch.nn as nn + +from ..utils import constant_init, normal_init +from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS + + +class _NonLocalNd(nn.Module, metaclass=ABCMeta): + """Basic Non-local module. + + This module is proposed in + "Non-local Neural Networks" + Paper reference: https://arxiv.org/abs/1711.07971 + Code reference: https://github.com/AlexHex7/Non-local_pytorch + + Args: + in_channels (int): Channels of the input feature map. + reduction (int): Channel reduction ratio. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. + Default: True. + conv_cfg (None | dict): The config dict for convolution layers. + If not specified, it will use `nn.Conv2d` for convolution layers. + Default: None. + norm_cfg (None | dict): The config dict for normalization layers. + Default: None. (This parameter is only applicable to conv_out.) + mode (str): Options are `gaussian`, `concatenation`, + `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. + """ + + def __init__(self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs): + super(_NonLocalNd, self).__init__() + self.in_channels = in_channels + self.reduction = reduction + self.use_scale = use_scale + self.inter_channels = max(in_channels // reduction, 1) + self.mode = mode + + if mode not in [ + 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' + ]: + raise ValueError("Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.') + + # g, theta, phi are defaulted as `nn.ConvNd`. + # Here we use ConvModule for potential usage. + self.g = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.conv_out = ConvModule( + self.inter_channels, + self.in_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.mode != 'gaussian': + self.theta = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.phi = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + + if self.mode == 'concatenation': + self.concat_project = ConvModule( + self.inter_channels * 2, + 1, + kernel_size=1, + stride=1, + padding=0, + bias=False, + act_cfg=dict(type='ReLU')) + + self.init_weights(**kwargs) + + def init_weights(self, std=0.01, zeros_init=True): + if self.mode != 'gaussian': + for m in [self.g, self.theta, self.phi]: + normal_init(m.conv, std=std) + else: + normal_init(self.g.conv, std=std) + if zeros_init: + if self.conv_out.norm_cfg is None: + constant_init(self.conv_out.conv, 0) + else: + constant_init(self.conv_out.norm, 0) + else: + if self.conv_out.norm_cfg is None: + normal_init(self.conv_out.conv, std=std) + else: + normal_init(self.conv_out.norm, std=std) + + def gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def embedded_gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def dot_product(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight /= pairwise_weight.shape[-1] + return pairwise_weight + + def concatenation(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + + concat_feature = torch.cat([theta_x, phi_x], dim=1) + pairwise_weight = self.concat_project(concat_feature) + n, _, h, w = pairwise_weight.size() + pairwise_weight = pairwise_weight.view(n, h, w) + pairwise_weight /= pairwise_weight.shape[-1] + + return pairwise_weight + + def forward(self, x): + # Assume `reduction = 1`, then `inter_channels = C` + # or `inter_channels = C` when `mode="gaussian"` + + # NonLocal1d x: [N, C, H] + # NonLocal2d x: [N, C, H, W] + # NonLocal3d x: [N, C, T, H, W] + n = x.size(0) + + # NonLocal1d g_x: [N, H, C] + # NonLocal2d g_x: [N, HxW, C] + # NonLocal3d g_x: [N, TxHxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] + # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] + # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + pairwise_func = getattr(self, self.mode) + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # NonLocal1d y: [N, H, C] + # NonLocal2d y: [N, HxW, C] + # NonLocal3d y: [N, TxHxW, C] + y = torch.matmul(pairwise_weight, g_x) + # NonLocal1d y: [N, C, H] + # NonLocal2d y: [N, C, H, W] + # NonLocal3d y: [N, C, T, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + output = x + self.conv_out(y) + + return output + + +class NonLocal1d(_NonLocalNd): + """1D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv1d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv1d'), + **kwargs): + super(NonLocal1d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool1d(kernel_size=2) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +@PLUGIN_LAYERS.register_module() +class NonLocal2d(_NonLocalNd): + """2D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv2d'). + """ + + _abbr_ = 'nonlocal_block' + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv2d'), + **kwargs): + super(NonLocal2d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +class NonLocal3d(_NonLocalNd): + """3D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv3d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv3d'), + **kwargs): + super(NonLocal3d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py new file mode 100644 index 000000000000..0502cba9a690 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + When we build a norm layer with `build_norm_layer()`, we want to preserve + the norm type in variable names, e.g, self.bn1, self.gn. This method will + infer the abbreviation to map class types to abbreviations. + + Rule 1: If the class has the property "_abbr_", return the property. + Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or + InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and + "in" respectively. + Rule 3: If the class name contains "batch", "group", "layer" or "instance", + the abbreviation of this layer will be "bn", "gn", "ln" and "in" + respectively. + Rule 4: Otherwise, the abbreviation falls back to "norm". + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN + return 'in' + elif issubclass(class_type, _BatchNorm): + return 'bn' + elif issubclass(class_type, nn.GroupNorm): + return 'gn' + elif issubclass(class_type, nn.LayerNorm): + return 'ln' + else: + class_name = class_type.__name__.lower() + if 'batch' in class_name: + return 'bn' + elif 'group' in class_name: + return 'gn' + elif 'layer' in class_name: + return 'ln' + elif 'instance' in class_name: + return 'in' + else: + return 'norm_layer' + + +def build_norm_layer(cfg, num_features, postfix=''): + """Build normalization layer. + + Args: + cfg (dict): The norm layer config, which should contain: + + - type (str): Layer type. + - layer args: Args needed to instantiate a norm layer. + - requires_grad (bool, optional): Whether stop gradient updates. + num_features (int): Number of input channels. + postfix (int | str): The postfix to be appended into norm abbreviation + to create named layer. + + Returns: + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + + norm_layer = NORM_LAYERS.get(layer_type) + abbr = infer_abbr(norm_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + requires_grad = cfg_.pop('requires_grad', True) + cfg_.setdefault('eps', 1e-5) + if layer_type != 'GN': + layer = norm_layer(num_features, **cfg_) + if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): + layer._specify_ddp_gpu_num(1) + else: + assert 'num_groups' in cfg_ + layer = norm_layer(num_channels=num_features, **cfg_) + + for param in layer.parameters(): + param.requires_grad = requires_grad + + return name, layer + + +def is_norm(layer, exclude=None): + """Check if a layer is a normalization layer. + + Args: + layer (nn.Module): The layer to be checked. + exclude (type | tuple[type]): Types to be excluded. + + Returns: + bool: Whether the layer is a norm layer. + """ + if exclude is not None: + if not isinstance(exclude, tuple): + exclude = (exclude, ) + if not is_tuple_of(exclude, type): + raise TypeError( + f'"exclude" must be either None or type or a tuple of types, ' + f'but got {type(exclude)}: {exclude}') + + if exclude and isinstance(layer, exclude): + return False + + all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) + return isinstance(layer, all_norm_bases) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py new file mode 100644 index 000000000000..e4ac6b28a178 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) + + +def build_padding_layer(cfg, *args, **kwargs): + """Build padding layer. + + Args: + cfg (None or dict): The padding layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate a padding layer. + + Returns: + nn.Module: Created padding layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + + cfg_ = cfg.copy() + padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) + + layer = padding_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py new file mode 100644 index 000000000000..07c010d40531 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py @@ -0,0 +1,88 @@ +import inspect +import platform + +from .registry import PLUGIN_LAYERS + +if platform.system() == 'Windows': + import regex as re +else: + import re + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + This method will infer the abbreviation to map class types to + abbreviations. + + Rule 1: If the class has the property "abbr", return the property. + Rule 2: Otherwise, the abbreviation falls back to snake case of class + name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + + def camel2snack(word): + """Convert camel case word into snack case. + + Modified from `inflection lib + `_. + + Example:: + + >>> camel2snack("FancyBlock") + 'fancy_block' + """ + + word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) + word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) + word = word.replace('-', '_') + return word.lower() + + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + else: + return camel2snack(class_type.__name__) + + +def build_plugin_layer(cfg, postfix='', **kwargs): + """Build plugin layer. + + Args: + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. + postfix (int, str): appended into norm abbreviation to + create named layer. Default: ''. + + Returns: + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') + + plugin_layer = PLUGIN_LAYERS.get(layer_type) + abbr = infer_abbr(plugin_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + layer = plugin_layer(**kwargs, **cfg_) + + return name, layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py new file mode 100644 index 000000000000..584e3b0870fc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') + +DROPOUT_LAYERS = Registry('drop out layers') +POSITIONAL_ENCODING = Registry('position encoding') +ATTENTION = Registry('attention') +FEEDFORWARD_NETWORK = Registry('feed-forward Network') +TRANSFORMER_LAYER = Registry('transformerLayer') +TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py new file mode 100644 index 000000000000..c905fffcc8bf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +class Scale(nn.Module): + """A learnable scale parameter. + + This layer scales the input by a learnable factor. It multiplies a + learnable scale parameter of shape (1,) with input of any shape. + + Args: + scale (float): Initial value of scale factor. Default: 1.0 + """ + + def __init__(self, scale=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) + + def forward(self, x): + return x * self.scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py new file mode 100644 index 000000000000..e2ca8ed7b749 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class Swish(nn.Module): + """Swish Module. + + This module applies the swish function: + + .. math:: + Swish(x) = x * Sigmoid(x) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(Swish, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py new file mode 100644 index 000000000000..200148e2a588 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py @@ -0,0 +1,595 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings + +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import ConfigDict, deprecated_api_warning +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import Linear, build_activation_layer, build_norm_layer +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.base_module import BaseModule, ModuleList, Sequential +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg +from .drop import build_dropout +from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, + TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) + +# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 + warnings.warn( + ImportWarning( + '``MultiScaleDeformableAttention`` has been moved to ' + '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 + '``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 + 'to ``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 + )) + +except ImportError: + warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' + '``mmcv.ops.multi_scale_deform_attn``, ' + 'You should install ``mmcv-full`` if you need this module. ') + + +def build_positional_encoding(cfg, default_args=None): + """Builder for Position Encoding.""" + return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) + + +def build_attention(cfg, default_args=None): + """Builder for attention.""" + return build_from_cfg(cfg, ATTENTION, default_args) + + +def build_feedforward_network(cfg, default_args=None): + """Builder for feed-forward network (FFN).""" + return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) + + +def build_transformer_layer(cfg, default_args=None): + """Builder for transformer layer.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) + + +def build_transformer_layer_sequence(cfg, default_args=None): + """Builder for transformer encoder and transformer decoder.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) + + +@ATTENTION.register_module() +class MultiheadAttention(BaseModule): + """A wrapper for ``torch.nn.MultiheadAttention``. + + This module implements MultiheadAttention with identity connection, + and positional encoding is also passed as input. + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): When it is True, Key, Query and Value are shape of + (batch, n, embed_dim), otherwise (n, batch, embed_dim). + Default to False. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=dict(type='Dropout', drop_prob=0.), + init_cfg=None, + batch_first=False, + **kwargs): + super(MultiheadAttention, self).__init__(init_cfg) + if 'dropout' in kwargs: + warnings.warn('The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ') + attn_drop = kwargs['dropout'] + dropout_layer['drop_prob'] = kwargs.pop('dropout') + + self.embed_dims = embed_dims + self.num_heads = num_heads + self.batch_first = batch_first + + self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, + **kwargs) + + self.proj_drop = nn.Dropout(proj_drop) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else nn.Identity() + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiheadAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_pos=None, + attn_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `MultiheadAttention`. + + **kwargs allow passing a more general data flow when combining + with other operations in `transformerlayer`. + + Args: + query (Tensor): The input query with shape [num_queries, bs, + embed_dims] if self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + If None, the ``query`` will be used. Defaults to None. + value (Tensor): The value tensor with same shape as `key`. + Same in `nn.MultiheadAttention.forward`. Defaults to None. + If None, the `key` will be used. + identity (Tensor): This tensor, with the same shape as x, + will be used for the identity link. + If None, `x` will be used. Defaults to None. + query_pos (Tensor): The positional encoding for query, with + the same shape as `x`. If not None, it will + be added to `x` before forward function. Defaults to None. + key_pos (Tensor): The positional encoding for `key`, with the + same shape as `key`. Defaults to None. If not None, it will + be added to `key` before forward function. If None, and + `query_pos` has the same shape as `key`, then `query_pos` + will be used for `key_pos`. Defaults to None. + attn_mask (Tensor): ByteTensor mask with shape [num_queries, + num_keys]. Same in `nn.MultiheadAttention.forward`. + Defaults to None. + key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. + Defaults to None. + + Returns: + Tensor: forwarded results with shape + [num_queries, bs, embed_dims] + if self.batch_first is False, else + [bs, num_queries embed_dims]. + """ + + if key is None: + key = query + if value is None: + value = key + if identity is None: + identity = query + if key_pos is None: + if query_pos is not None: + # use query_pos if key_pos is not available + if query_pos.shape == key.shape: + key_pos = query_pos + else: + warnings.warn(f'position encoding of key is' + f'missing in {self.__class__.__name__}.') + if query_pos is not None: + query = query + query_pos + if key_pos is not None: + key = key + key_pos + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + out = self.attn( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + +@FEEDFORWARD_NETWORK.register_module() +class FFN(BaseModule): + """Implements feed-forward networks (FFNs) with identity connection. + + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + num_fcs (int, optional): The number of fully-connected layers in + FFNs. Default: 2. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + add_identity (bool, optional): Whether to add the + identity connection. Default: `True`. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + @deprecated_api_warning( + { + 'dropout': 'ffn_drop', + 'add_residual': 'add_identity' + }, + cls_name='FFN') + def __init__(self, + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0., + dropout_layer=None, + add_identity=True, + init_cfg=None, + **kwargs): + super(FFN, self).__init__(init_cfg) + assert num_fcs >= 2, 'num_fcs should be no less ' \ + f'than 2. got {num_fcs}.' + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.num_fcs = num_fcs + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + layers = [] + in_channels = embed_dims + for _ in range(num_fcs - 1): + layers.append( + Sequential( + Linear(in_channels, feedforward_channels), self.activate, + nn.Dropout(ffn_drop))) + in_channels = feedforward_channels + layers.append(Linear(feedforward_channels, embed_dims)) + layers.append(nn.Dropout(ffn_drop)) + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else torch.nn.Identity() + self.add_identity = add_identity + + @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') + def forward(self, x, identity=None): + """Forward function for `FFN`. + + The function would add x to the output tensor if residue is None. + """ + out = self.layers(x) + if not self.add_identity: + return self.dropout_layer(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +@TRANSFORMER_LAYER.register_module() +class BaseTransformerLayer(BaseModule): + """Base `TransformerLayer` for vision transformer. + + It can be built from `mmcv.ConfigDict` and support more flexible + customization, for example, using any number of `FFN or LN ` and + use different kinds of `attention` by specifying a list of `ConfigDict` + named `attn_cfgs`. It is worth mentioning that it supports `prenorm` + when you specifying `norm` as the first element of `operation_order`. + More details about the `prenorm`: `On Layer Normalization in the + Transformer Architecture `_ . + + Args: + attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for `self_attention` or `cross_attention` modules, + The order of the configs in the list should be consistent with + corresponding attentions in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. Default: None. + ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for FFN, The order of the configs in the list should be + consistent with corresponding ffn in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. + operation_order (tuple[str]): The execution order of operation + in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). + Support `prenorm` when you specifying first element as `norm`. + Default:None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape + of (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + """ + + def __init__(self, + attn_cfgs=None, + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0., + act_cfg=dict(type='ReLU', inplace=True), + ), + operation_order=None, + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False, + **kwargs): + + deprecated_args = dict( + feedforward_channels='feedforward_channels', + ffn_dropout='ffn_drop', + ffn_num_fcs='num_fcs') + for ori_name, new_name in deprecated_args.items(): + if ori_name in kwargs: + warnings.warn( + f'The arguments `{ori_name}` in BaseTransformerLayer ' + f'has been deprecated, now you should set `{new_name}` ' + f'and other FFN related arguments ' + f'to a dict named `ffn_cfgs`. ') + ffn_cfgs[new_name] = kwargs[ori_name] + + super(BaseTransformerLayer, self).__init__(init_cfg) + + self.batch_first = batch_first + + assert set(operation_order) & set( + ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ + set(operation_order), f'The operation_order of' \ + f' {self.__class__.__name__} should ' \ + f'contains all four operation type ' \ + f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" + + num_attn = operation_order.count('self_attn') + operation_order.count( + 'cross_attn') + if isinstance(attn_cfgs, dict): + attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] + else: + assert num_attn == len(attn_cfgs), f'The length ' \ + f'of attn_cfg {num_attn} is ' \ + f'not consistent with the number of attention' \ + f'in operation_order {operation_order}.' + + self.num_attn = num_attn + self.operation_order = operation_order + self.norm_cfg = norm_cfg + self.pre_norm = operation_order[0] == 'norm' + self.attentions = ModuleList() + + index = 0 + for operation_name in operation_order: + if operation_name in ['self_attn', 'cross_attn']: + if 'batch_first' in attn_cfgs[index]: + assert self.batch_first == attn_cfgs[index]['batch_first'] + else: + attn_cfgs[index]['batch_first'] = self.batch_first + attention = build_attention(attn_cfgs[index]) + # Some custom attentions used as `self_attn` + # or `cross_attn` can have different behavior. + attention.operation_name = operation_name + self.attentions.append(attention) + index += 1 + + self.embed_dims = self.attentions[0].embed_dims + + self.ffns = ModuleList() + num_ffns = operation_order.count('ffn') + if isinstance(ffn_cfgs, dict): + ffn_cfgs = ConfigDict(ffn_cfgs) + if isinstance(ffn_cfgs, dict): + ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] + assert len(ffn_cfgs) == num_ffns + for ffn_index in range(num_ffns): + if 'embed_dims' not in ffn_cfgs[ffn_index]: + ffn_cfgs['embed_dims'] = self.embed_dims + else: + assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims + self.ffns.append( + build_feedforward_network(ffn_cfgs[ffn_index], + dict(type='FFN'))) + + self.norms = ModuleList() + num_norms = operation_order.count('norm') + for _ in range(num_norms): + self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) + + def forward(self, + query, + key=None, + value=None, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerDecoderLayer`. + + **kwargs contains some specific arguments of attentions. + + Args: + query (Tensor): The input query with shape + [num_queries, bs, embed_dims] if + self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + value (Tensor): The value tensor with same shape as `key`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor] | None): 2D Tensor used in + calculation of corresponding attention. The length of + it should equal to the number of `attention` in + `operation_order`. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in `self_attn` layer. + Defaults to None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: forwarded results with shape [num_queries, bs, embed_dims]. + """ + + norm_index = 0 + attn_index = 0 + ffn_index = 0 + identity = query + if attn_masks is None: + attn_masks = [None for _ in range(self.num_attn)] + elif isinstance(attn_masks, torch.Tensor): + attn_masks = [ + copy.deepcopy(attn_masks) for _ in range(self.num_attn) + ] + warnings.warn(f'Use same attn_mask in all attentions in ' + f'{self.__class__.__name__} ') + else: + assert len(attn_masks) == self.num_attn, f'The length of ' \ + f'attn_masks {len(attn_masks)} must be equal ' \ + f'to the number of attention in ' \ + f'operation_order {self.num_attn}' + + for layer in self.operation_order: + if layer == 'self_attn': + temp_key = temp_value = query + query = self.attentions[attn_index]( + query, + temp_key, + temp_value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=query_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=query_key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'norm': + query = self.norms[norm_index](query) + norm_index += 1 + + elif layer == 'cross_attn': + query = self.attentions[attn_index]( + query, + key, + value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=key_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'ffn': + query = self.ffns[ffn_index]( + query, identity if self.pre_norm else None) + ffn_index += 1 + + return query + + +@TRANSFORMER_LAYER_SEQUENCE.register_module() +class TransformerLayerSequence(BaseModule): + """Base class for TransformerEncoder and TransformerDecoder in vision + transformer. + + As base-class of Encoder and Decoder in vision transformer. + Support customization such as specifying different kind + of `transformer_layer` in `transformer_coder`. + + Args: + transformerlayer (list[obj:`mmcv.ConfigDict`] | + obj:`mmcv.ConfigDict`): Config of transformerlayer + in TransformerCoder. If it is obj:`mmcv.ConfigDict`, + it would be repeated `num_layer` times to a + list[`mmcv.ConfigDict`]. Default: None. + num_layers (int): The number of `TransformerLayer`. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): + super(TransformerLayerSequence, self).__init__(init_cfg) + if isinstance(transformerlayers, dict): + transformerlayers = [ + copy.deepcopy(transformerlayers) for _ in range(num_layers) + ] + else: + assert isinstance(transformerlayers, list) and \ + len(transformerlayers) == num_layers + self.num_layers = num_layers + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append(build_transformer_layer(transformerlayers[i])) + self.embed_dims = self.layers[0].embed_dims + self.pre_norm = self.layers[0].pre_norm + + def forward(self, + query, + key, + value, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerCoder`. + + Args: + query (Tensor): Input query with shape + `(num_queries, bs, embed_dims)`. + key (Tensor): The key tensor with shape + `(num_keys, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_keys, bs, embed_dims)`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor], optional): Each element is 2D Tensor + which is used in calculation of corresponding attention in + operation_order. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in self-attention + Default: None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: results with shape [num_queries, bs, embed_dims]. + """ + for layer in self.layers: + query = layer( + query, + key, + value, + query_pos=query_pos, + key_pos=key_pos, + attn_masks=attn_masks, + query_key_padding_mask=query_key_padding_mask, + key_padding_mask=key_padding_mask, + **kwargs) + return query diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py new file mode 100644 index 000000000000..a1a353767d0c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) + + +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') +class PixelShufflePack(nn.Module): + """Pixel Shuffle upsample layer. + + This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to + achieve a simple upsampling with pixel shuffle. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of the conv layer to expand the + channels. + """ + + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super(PixelShufflePack, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2) + self.init_weights() + + def init_weights(self): + xavier_init(self.upsample_conv, distribution='uniform') + + def forward(self, x): + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + + +def build_upsample_layer(cfg, *args, **kwargs): + """Build upsample layer. + + Args: + cfg (dict): The upsample layer config, which should contain: + + - type (str): Layer type. + - scale_factor (int): Upsample ratio, which is not applicable to + deconv. + - layer args: Args needed to instantiate a upsample layer. + args (argument list): Arguments passed to the ``__init__`` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the + ``__init__`` method of the corresponding conv layer. + + Returns: + nn.Module: Created upsample layer. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + raise KeyError( + f'the cfg dict must contain the key "type", but got {cfg}') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) + + if upsample is nn.Upsample: + cfg_['mode'] = layer_type + layer = upsample(*args, **kwargs, **cfg_) + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py new file mode 100644 index 000000000000..8aebf67bf523 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py @@ -0,0 +1,180 @@ +# Copyright (c) OpenMMLab. All rights reserved. +r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 + +Wrap some nn modules to support empty tensor input. Currently, these wrappers +are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask +heads are trained on only positive RoIs. +""" +import math + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair, _triple + +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + +if torch.__version__ == 'parrots': + TORCH_VERSION = torch.__version__ +else: + # torch.__version__ could be 1.3.1+cu92, we only need the first two + # for comparison + TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) + + +def obsolete_torch_version(torch_version, version_threshold): + return torch_version == 'parrots' or torch_version <= version_threshold + + +class NewEmptyTensorOp(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return NewEmptyTensorOp.apply(grad, shape), None + + +@CONV_LAYERS.register_module('Conv', force=True) +class Conv2d(nn.Conv2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module('Conv3d', force=True) +class Conv3d(nn.Conv3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) +class ConvTranspose2d(nn.ConvTranspose2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv3d') +@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) +class ConvTranspose3d(nn.ConvTranspose3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +class MaxPool2d(nn.MaxPool2d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), + _pair(self.padding), _pair(self.stride), + _pair(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class MaxPool3d(nn.MaxPool3d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size), + _triple(self.padding), + _triple(self.stride), + _triple(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class Linear(torch.nn.Linear): + + def forward(self, x): + # empty tensor forward of Linear layer is supported in Pytorch 1.6 + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): + out_shape = [x.shape[0], self.out_features] + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py new file mode 100644 index 000000000000..7567316c566b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..runner import Sequential +from ..utils import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py new file mode 100644 index 000000000000..1cb3ac057ee2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn +import torch.utils.checkpoint as cp + +from .utils import constant_init, kaiming_init + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + super(BasicBlock, self).__init__() + assert style in ['pytorch', 'caffe'] + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + assert not with_cp + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='pytorch', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + + self.out_indices = out_indices + self.style = style + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): + stride = strides[i] + dilation = dilations[i] + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, f'layer{i}') + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py new file mode 100644 index 000000000000..a263e31c1e39 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .flops_counter import get_model_complexity_info +from .fuse_conv_bn import fuse_conv_bn +from .sync_bn import revert_sync_batchnorm +from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, + KaimingInit, NormalInit, PretrainedInit, + TruncNormalInit, UniformInit, XavierInit, + bias_init_with_prob, caffe2_xavier_init, + constant_init, initialize, kaiming_init, normal_init, + trunc_normal_init, uniform_init, xavier_init) + +__all__ = [ + 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', + 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', + 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'revert_sync_batchnorm' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py new file mode 100644 index 000000000000..2b69ac3a54d6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py @@ -0,0 +1,599 @@ +# Modified from flops-counter.pytorch by Vladislav Sovrasov +# original repo: https://github.com/sovrasov/flops-counter.pytorch + +# MIT License + +# Copyright (c) 2018 Vladislav Sovrasov + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +from functools import partial + +import numpy as np +import torch +import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def get_model_complexity_info(model, + input_shape, + print_per_layer_stat=True, + as_strings=True, + input_constructor=None, + flush=False, + ost=sys.stdout): + """Get complexity information of a model. + + This method can calculate FLOPs and parameter counts of a model with + corresponding input shape. It can also print complexity information for + each layer in a model. + + Supported layers are listed as below: + - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. + - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, + ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, + ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. + - Linear: ``nn.Linear``. + - Deconvolution: ``nn.ConvTranspose2d``. + - Upsample: ``nn.Upsample``. + + Args: + model (nn.Module): The model for complexity calculation. + input_shape (tuple): Input shape used for calculation. + print_per_layer_stat (bool): Whether to print complexity information + for each layer in a model. Default: True. + as_strings (bool): Output FLOPs and params counts in a string form. + Default: True. + input_constructor (None | callable): If specified, it takes a callable + method that generates input. otherwise, it will generate a random + tensor with input shape to calculate FLOPs. Default: None. + flush (bool): same as that in :func:`print`. Default: False. + ost (stream): same as ``file`` param in :func:`print`. + Default: sys.stdout. + + Returns: + tuple[float | str]: If ``as_strings`` is set to True, it will return + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. + """ + assert type(input_shape) is tuple + assert len(input_shape) >= 1 + assert isinstance(model, nn.Module) + flops_model = add_flops_counting_methods(model) + flops_model.eval() + flops_model.start_flops_count() + if input_constructor: + input = input_constructor(input_shape) + _ = flops_model(**input) + else: + try: + batch = torch.ones(()).new_empty( + (1, *input_shape), + dtype=next(flops_model.parameters()).dtype, + device=next(flops_model.parameters()).device) + except StopIteration: + # Avoid StopIteration for models which have no parameters, + # like `nn.Relu()`, `nn.AvgPool2d`, etc. + batch = torch.ones(()).new_empty((1, *input_shape)) + + _ = flops_model(batch) + + flops_count, params_count = flops_model.compute_average_flops_cost() + if print_per_layer_stat: + print_model_with_flops( + flops_model, flops_count, params_count, ost=ost, flush=flush) + flops_model.stop_flops_count() + + if as_strings: + return flops_to_string(flops_count), params_to_string(params_count) + + return flops_count, params_count + + +def flops_to_string(flops, units='GFLOPs', precision=2): + """Convert FLOPs number into a string. + + Note that Here we take a multiply-add counts as one FLOP. + + Args: + flops (float): FLOPs number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', + 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically + choose the most suitable unit for FLOPs. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted FLOPs number with units. + + Examples: + >>> flops_to_string(1e9) + '1.0 GFLOPs' + >>> flops_to_string(2e5, 'MFLOPs') + '0.2 MFLOPs' + >>> flops_to_string(3e-9, None) + '3e-09 FLOPs' + """ + if units is None: + if flops // 10**9 > 0: + return str(round(flops / 10.**9, precision)) + ' GFLOPs' + elif flops // 10**6 > 0: + return str(round(flops / 10.**6, precision)) + ' MFLOPs' + elif flops // 10**3 > 0: + return str(round(flops / 10.**3, precision)) + ' KFLOPs' + else: + return str(flops) + ' FLOPs' + else: + if units == 'GFLOPs': + return str(round(flops / 10.**9, precision)) + ' ' + units + elif units == 'MFLOPs': + return str(round(flops / 10.**6, precision)) + ' ' + units + elif units == 'KFLOPs': + return str(round(flops / 10.**3, precision)) + ' ' + units + else: + return str(flops) + ' FLOPs' + + +def params_to_string(num_params, units=None, precision=2): + """Convert parameter number into a string. + + Args: + num_params (float): Parameter number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'M', + 'K' and ''. If set to None, it will automatically choose the most + suitable unit for Parameter number. Default: None. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted parameter number with units. + + Examples: + >>> params_to_string(1e9) + '1000.0 M' + >>> params_to_string(2e5) + '200.0 k' + >>> params_to_string(3e-9) + '3e-09' + """ + if units is None: + if num_params // 10**6 > 0: + return str(round(num_params / 10**6, precision)) + ' M' + elif num_params // 10**3: + return str(round(num_params / 10**3, precision)) + ' k' + else: + return str(num_params) + else: + if units == 'M': + return str(round(num_params / 10.**6, precision)) + ' ' + units + elif units == 'K': + return str(round(num_params / 10.**3, precision)) + ' ' + units + else: + return str(num_params) + + +def print_model_with_flops(model, + total_flops, + total_params, + units='GFLOPs', + precision=3, + ost=sys.stdout, + flush=False): + """Print a model with FLOPs for each layer. + + Args: + model (nn.Module): The model to be printed. + total_flops (float): Total FLOPs of the model. + total_params (float): Total parameter counts of the model. + units (str | None): Converted FLOPs units. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 3. + ost (stream): same as `file` param in :func:`print`. + Default: sys.stdout. + flush (bool): same as that in :func:`print`. Default: False. + + Example: + >>> class ExampleModel(nn.Module): + + >>> def __init__(self): + >>> super().__init__() + >>> self.conv1 = nn.Conv2d(3, 8, 3) + >>> self.conv2 = nn.Conv2d(8, 256, 3) + >>> self.conv3 = nn.Conv2d(256, 8, 3) + >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + >>> self.flatten = nn.Flatten() + >>> self.fc = nn.Linear(8, 1) + + >>> def forward(self, x): + >>> x = self.conv1(x) + >>> x = self.conv2(x) + >>> x = self.conv3(x) + >>> x = self.avg_pool(x) + >>> x = self.flatten(x) + >>> x = self.fc(x) + >>> return x + + >>> model = ExampleModel() + >>> x = (3, 16, 16) + to print the complexity information state for each layer, you can use + >>> get_model_complexity_info(model, x) + or directly use + >>> print_model_with_flops(model, 4579784.0, 37361) + ExampleModel( + 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, + (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 + (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) + (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) + (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) + (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) + (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) + ) + """ + + def accumulate_params(self): + if is_supported_instance(self): + return self.__params__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_params() + return sum + + def accumulate_flops(self): + if is_supported_instance(self): + return self.__flops__ / model.__batch_counter__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_flops() + return sum + + def flops_repr(self): + accumulated_num_params = self.accumulate_params() + accumulated_flops_cost = self.accumulate_flops() + return ', '.join([ + params_to_string( + accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string( + accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr() + ]) + + def add_extra_repr(m): + m.accumulate_flops = accumulate_flops.__get__(m) + m.accumulate_params = accumulate_params.__get__(m) + flops_extra_repr = flops_repr.__get__(m) + if m.extra_repr != flops_extra_repr: + m.original_extra_repr = m.extra_repr + m.extra_repr = flops_extra_repr + assert m.extra_repr != m.original_extra_repr + + def del_extra_repr(m): + if hasattr(m, 'original_extra_repr'): + m.extra_repr = m.original_extra_repr + del m.original_extra_repr + if hasattr(m, 'accumulate_flops'): + del m.accumulate_flops + + model.apply(add_extra_repr) + print(model, file=ost, flush=flush) + model.apply(del_extra_repr) + + +def get_model_parameters_number(model): + """Calculate parameter number of a model. + + Args: + model (nn.module): The model for parameter number calculation. + + Returns: + float: Parameter number of the model. + """ + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + return num_params + + +def add_flops_counting_methods(net_main_module): + # adding additional methods to the existing module object, + # this is done this way so that each function has access to self object + net_main_module.start_flops_count = start_flops_count.__get__( + net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__( + net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__( + net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 + net_main_module) + + net_main_module.reset_flops_count() + + return net_main_module + + +def compute_average_flops_cost(self): + """Compute average FLOPs cost. + + A method to compute average FLOPs cost, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + + Returns: + float: Current mean flops consumption per image. + """ + batches_count = self.__batch_counter__ + flops_sum = 0 + for module in self.modules(): + if is_supported_instance(module): + flops_sum += module.__flops__ + params_sum = get_model_parameters_number(self) + return flops_sum / batches_count, params_sum + + +def start_flops_count(self): + """Activate the computation of mean flops consumption per image. + + A method to activate the computation of mean flops consumption per image. + which will be available after ``add_flops_counting_methods()`` is called on + a desired net object. It should be called before running the network. + """ + add_batch_counter_hook_function(self) + + def add_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + return + + else: + handle = module.register_forward_hook( + get_modules_mapping()[type(module)]) + + module.__flops_handle__ = handle + + self.apply(partial(add_flops_counter_hook_function)) + + +def stop_flops_count(self): + """Stop computing the mean flops consumption per image. + + A method to stop computing the mean flops consumption per image, which will + be available after ``add_flops_counting_methods()`` is called on a desired + net object. It can be called to pause the computation whenever. + """ + remove_batch_counter_hook_function(self) + self.apply(remove_flops_counter_hook_function) + + +def reset_flops_count(self): + """Reset statistics computed so far. + + A method to Reset computed statistics, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + """ + add_batch_counter_variables_or_reset(self) + self.apply(add_flops_counter_variable_or_reset) + + +# ---- Internal functions +def empty_flops_counter_hook(module, input, output): + module.__flops__ += 0 + + +def upsample_flops_counter_hook(module, input, output): + output_size = output[0] + batch_size = output_size.shape[0] + output_elements_count = batch_size + for val in output_size.shape[1:]: + output_elements_count *= val + module.__flops__ += int(output_elements_count) + + +def relu_flops_counter_hook(module, input, output): + active_elements_count = output.numel() + module.__flops__ += int(active_elements_count) + + +def linear_flops_counter_hook(module, input, output): + input = input[0] + output_last_dim = output.shape[ + -1] # pytorch checks dimensions, so here we don't care much + module.__flops__ += int(np.prod(input.shape) * output_last_dim) + + +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) + + +def norm_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) + if (getattr(module, 'affine', False) + or getattr(module, 'elementwise_affine', False)): + batch_flops *= 2 + module.__flops__ += int(batch_flops) + + +def deconv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] + + kernel_height, kernel_width = conv_module.kernel_size + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = ( + kernel_height * kernel_width * in_channels * filters_per_channel) + + active_elements_count = batch_size * input_height * input_width + overall_conv_flops = conv_per_position_flops * active_elements_count + bias_flops = 0 + if conv_module.bias is not None: + output_height, output_width = output.shape[2:] + bias_flops = out_channels * batch_size * output_height * output_height + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def conv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(conv_module.kernel_size) + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = int( + np.prod(kernel_dims)) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + + bias_flops = 0 + + if conv_module.bias is not None: + + bias_flops = out_channels * active_elements_count + + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def batch_counter_hook(module, input, output): + batch_size = 1 + if len(input) > 0: + # Can have multiple inputs, getting the first one + input = input[0] + batch_size = len(input) + else: + pass + print('Warning! No positional inputs found for a module, ' + 'assuming batch size is 1.') + module.__batch_counter__ += batch_size + + +def add_batch_counter_variables_or_reset(module): + + module.__batch_counter__ = 0 + + +def add_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + return + + handle = module.register_forward_hook(batch_counter_hook) + module.__batch_counter_handle__ = handle + + +def remove_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + module.__batch_counter_handle__.remove() + del module.__batch_counter_handle__ + + +def add_flops_counter_variable_or_reset(module): + if is_supported_instance(module): + if hasattr(module, '__flops__') or hasattr(module, '__params__'): + print('Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + + ' ptflops can affect your code!') + module.__flops__ = 0 + module.__params__ = get_model_parameters_number(module) + + +def is_supported_instance(module): + if type(module) in get_modules_mapping(): + return True + return False + + +def remove_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + module.__flops_handle__.remove() + del module.__flops_handle__ + + +def get_modules_mapping(): + return { + # convolutions + nn.Conv1d: conv_flops_counter_hook, + nn.Conv2d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, + nn.Conv3d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, + # activations + nn.ReLU: relu_flops_counter_hook, + nn.PReLU: relu_flops_counter_hook, + nn.ELU: relu_flops_counter_hook, + nn.LeakyReLU: relu_flops_counter_hook, + nn.ReLU6: relu_flops_counter_hook, + # poolings + nn.MaxPool1d: pool_flops_counter_hook, + nn.AvgPool1d: pool_flops_counter_hook, + nn.AvgPool2d: pool_flops_counter_hook, + nn.MaxPool2d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, + nn.MaxPool3d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, + nn.AvgPool3d: pool_flops_counter_hook, + nn.AdaptiveMaxPool1d: pool_flops_counter_hook, + nn.AdaptiveAvgPool1d: pool_flops_counter_hook, + nn.AdaptiveMaxPool2d: pool_flops_counter_hook, + nn.AdaptiveAvgPool2d: pool_flops_counter_hook, + nn.AdaptiveMaxPool3d: pool_flops_counter_hook, + nn.AdaptiveAvgPool3d: pool_flops_counter_hook, + # normalizations + nn.BatchNorm1d: norm_flops_counter_hook, + nn.BatchNorm2d: norm_flops_counter_hook, + nn.BatchNorm3d: norm_flops_counter_hook, + nn.GroupNorm: norm_flops_counter_hook, + nn.InstanceNorm1d: norm_flops_counter_hook, + nn.InstanceNorm2d: norm_flops_counter_hook, + nn.InstanceNorm3d: norm_flops_counter_hook, + nn.LayerNorm: norm_flops_counter_hook, + # FC + nn.Linear: linear_flops_counter_hook, + mmcv.cnn.bricks.Linear: linear_flops_counter_hook, + # Upscale + nn.Upsample: upsample_flops_counter_hook, + # Deconvolution + nn.ConvTranspose2d: deconv_flops_counter_hook, + mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, + } diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py new file mode 100644 index 000000000000..cb7076f80bf3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py new file mode 100644 index 000000000000..b7ed04dc5f85 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py @@ -0,0 +1,59 @@ +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + """A general BatchNorm layer without input dimension check. + + Reproduced from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + is `_check_input_dim` that is designed for tensor sanity checks. + The check has been bypassed in this class for the convenience of converting + SyncBatchNorm. + """ + + def _check_input_dim(self, input): + return + + +def revert_sync_batchnorm(module): + """Helper function to convert all `SyncBatchNorm` (SyncBN) and + `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to + `BatchNormXd` layers. + + Adapted from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + + Args: + module (nn.Module): The module containing `SyncBatchNorm` layers. + + Returns: + module_output: The converted module with `BatchNormXd` layers. + """ + module_output = module + module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] + if hasattr(mmcv, 'ops'): + module_checklist.append(mmcv.ops.SyncBatchNorm) + if isinstance(module, tuple(module_checklist)): + module_output = _BatchNormXd(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + # no_grad() may not be needed here but + # just to be consistent with `convert_sync_batchnorm()` + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + module_output.training = module.training + # qconfig exists in quantized models + if hasattr(module, 'qconfig'): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py new file mode 100644 index 000000000000..a5bb6cde3850 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,684 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import math +import warnings + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg, get_logger, print_log + +INITIALIZERS = Registry('initializer') + + +def update_init_info(module, init_info): + """Update the `_params_init_info` in the module if the value of parameters + are changed. + + Args: + module (obj:`nn.Module`): The module of PyTorch with a user-defined + attribute `_params_init_info` which records the initialization + information. + init_info (str): The string that describes the initialization. + """ + assert hasattr( + module, + '_params_init_info'), f'Can not find `_params_init_info` in {module}' + for name, param in module.named_parameters(): + + assert param in module._params_init_info, ( + f'Find a new :obj:`Parameter` ' + f'named `{name}` during executing the ' + f'`init_weights` of ' + f'`{module.__class__.__name__}`. ' + f'Please do not add or ' + f'replace parameters during executing ' + f'the `init_weights`. ') + + # The parameter has been changed during executing the + # `init_weights` of module + mean_value = param.data.mean() + if module._params_init_info[param]['tmp_mean_value'] != mean_value: + module._params_init_info[param]['init_info'] = init_info + module._params_init_info[param]['tmp_mean_value'] = mean_value + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def trunc_normal_init(module: nn.Module, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + bias: float = 0) -> None: + if hasattr(module, 'weight') and module.weight is not None: + trunc_normal_(module.weight, mean, std, a, b) # type: ignore + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) # type: ignore + + +def uniform_init(module, a=0, b=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init( + module, + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + bias=bias, + distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to a given probability value.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +def _get_bases_name(m): + return [b.__name__ for b in m.__class__.__bases__] + + +class BaseInit(object): + + def __init__(self, *, bias=0, bias_prob=None, layer=None): + self.wholemodule = False + if not isinstance(bias, (int, float)): + raise TypeError(f'bias must be a number, but got a {type(bias)}') + + if bias_prob is not None: + if not isinstance(bias_prob, float): + raise TypeError(f'bias_prob type must be float, \ + but got {type(bias_prob)}') + + if layer is not None: + if not isinstance(layer, (str, list)): + raise TypeError(f'layer must be a str or a list of str, \ + but got a {type(layer)}') + else: + layer = [] + + if bias_prob is not None: + self.bias = bias_init_with_prob(bias_prob) + else: + self.bias = bias + self.layer = [layer] if isinstance(layer, str) else layer + + def _get_init_info(self): + info = f'{self.__class__.__name__}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Constant') +class ConstantInit(BaseInit): + """Initialize module parameters with constant values. + + Args: + val (int | float): the value to fill the weights in the module with + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, val, **kwargs): + super().__init__(**kwargs) + self.val = val + + def __call__(self, module): + + def init(m): + if self.wholemodule: + constant_init(m, self.val, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + constant_init(m, self.val, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Xavier') +class XavierInit(BaseInit): + r"""Initialize module parameters with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks - Glorot, X. & Bengio, Y. (2010). + `_ + + Args: + gain (int | float): an optional scaling factor. Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` + or ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, gain=1, distribution='normal', **kwargs): + super().__init__(**kwargs) + self.gain = gain + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + xavier_init(m, self.gain, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + xavier_init(m, self.gain, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: gain={self.gain}, ' \ + f'distribution={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Normal') +class NormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + Args: + mean (int | float):the mean of the normal distribution. Defaults to 0. + std (int | float): the standard deviation of the normal distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean=0, std=1, **kwargs): + super().__init__(**kwargs) + self.mean = mean + self.std = std + + def __call__(self, module): + + def init(m): + if self.wholemodule: + normal_init(m, self.mean, self.std, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + normal_init(m, self.mean, self.std, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: mean={self.mean},' \ + f' std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='TruncNormal') +class TruncNormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values + outside :math:`[a, b]`. + + Args: + mean (float): the mean of the normal distribution. Defaults to 0. + std (float): the standard deviation of the normal distribution. + Defaults to 1. + a (float): The minimum cutoff value. + b ( float): The maximum cutoff value. + bias (float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + **kwargs) -> None: + super().__init__(**kwargs) + self.mean = mean + self.std = std + self.a = a + self.b = b + + def __call__(self, module: nn.Module) -> None: + + def init(m): + if self.wholemodule: + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ + f' mean={self.mean}, std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Uniform') +class UniformInit(BaseInit): + r"""Initialize module parameters with values drawn from the uniform + distribution :math:`\mathcal{U}(a, b)`. + + Args: + a (int | float): the lower bound of the uniform distribution. + Defaults to 0. + b (int | float): the upper bound of the uniform distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, b=1, **kwargs): + super().__init__(**kwargs) + self.a = a + self.b = b + + def __call__(self, module): + + def init(m): + if self.wholemodule: + uniform_init(m, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + uniform_init(m, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a},' \ + f' b={self.b}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Kaiming') +class KaimingInit(BaseInit): + r"""Initialize module parameters with the values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015). + `_ + + Args: + a (int | float): the negative slope of the rectifier used after this + layer (only used with ``'leaky_relu'``). Defaults to 0. + mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing + ``'fan_in'`` preserves the magnitude of the variance of the weights + in the forward pass. Choosing ``'fan_out'`` preserves the + magnitudes in the backwards pass. Defaults to ``'fan_out'``. + nonlinearity (str): the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` . + Defaults to 'relu'. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` or + ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, + a=0, + mode='fan_out', + nonlinearity='relu', + distribution='normal', + **kwargs): + super().__init__(**kwargs) + self.a = a + self.mode = mode + self.nonlinearity = nonlinearity + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ + f'nonlinearity={self.nonlinearity}, ' \ + f'distribution ={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Caffe2Xavier') +class Caffe2XavierInit(KaimingInit): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + def __init__(self, **kwargs): + super().__init__( + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform', + **kwargs) + + def __call__(self, module): + super().__call__(module) + + +@INITIALIZERS.register_module(name='Pretrained') +class PretrainedInit(object): + """Initialize module by loading a pretrained model. + + Args: + checkpoint (str): the checkpoint file of the pretrained model should + be load. + prefix (str, optional): the prefix of a sub-module in the pretrained + model. it is for loading a part of the pretrained model to + initialize. For example, if we would like to only load the + backbone of a detector model, we can set ``prefix='backbone.'``. + Defaults to None. + map_location (str): map tensors into proper locations. + """ + + def __init__(self, checkpoint, prefix=None, map_location=None): + self.checkpoint = checkpoint + self.prefix = prefix + self.map_location = map_location + + def __call__(self, module): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, + load_state_dict) + logger = get_logger('mmcv') + if self.prefix is None: + print_log(f'load model from: {self.checkpoint}', logger=logger) + load_checkpoint( + module, + self.checkpoint, + map_location=self.map_location, + strict=False, + logger=logger) + else: + print_log( + f'load {self.prefix} in model from: {self.checkpoint}', + logger=logger) + state_dict = _load_checkpoint_with_prefix( + self.prefix, self.checkpoint, map_location=self.map_location) + load_state_dict(module, state_dict, strict=False, logger=logger) + + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: load from {self.checkpoint}' + return info + + +def _initialize(module, cfg, wholemodule=False): + func = build_from_cfg(cfg, INITIALIZERS) + # wholemodule flag is for override mode, there is no layer key in override + # and initializer will give init values for the whole module with the name + # in override. + func.wholemodule = wholemodule + func(module) + + +def _initialize_override(module, override, cfg): + if not isinstance(override, (dict, list)): + raise TypeError(f'override must be a dict or a list of dict, \ + but got {type(override)}') + + override = [override] if isinstance(override, dict) else override + + for override_ in override: + + cp_override = copy.deepcopy(override_) + name = cp_override.pop('name', None) + if name is None: + raise ValueError('`override` must contain the key "name",' + f'but got {cp_override}') + # if override only has name key, it means use args in init_cfg + if not cp_override: + cp_override.update(cfg) + # if override has name key and other args except type key, it will + # raise error + elif 'type' not in cp_override.keys(): + raise ValueError( + f'`override` need "type" key, but got {cp_override}') + + if hasattr(module, name): + _initialize(getattr(module, name), cp_override, wholemodule=True) + else: + raise RuntimeError(f'module did not have attribute {name}, ' + f'but init_cfg is {cp_override}.') + + +def initialize(module, init_cfg): + """Initialize a module. + + Args: + module (``torch.nn.Module``): the module will be initialized. + init_cfg (dict | list[dict]): initialization configuration dict to + define initializer. OpenMMLab has implemented 6 initializers + including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, + ``Kaiming``, and ``Pretrained``. + Example: + >>> module = nn.Linear(2, 3, bias=True) + >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) + >>> initialize(module, init_cfg) + + >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + >>> # define key ``'layer'`` for initializing layer with different + >>> # configuration + >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Linear', val=2)] + >>> initialize(module, init_cfg) + + >>> # define key``'override'`` to initialize some specific part in + >>> # module + >>> class FooNet(nn.Module): + >>> def __init__(self): + >>> super().__init__() + >>> self.feat = nn.Conv2d(3, 16, 3) + >>> self.reg = nn.Conv2d(16, 10, 3) + >>> self.cls = nn.Conv2d(16, 5, 3) + >>> model = FooNet() + >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', + >>> override=dict(type='Constant', name='reg', val=3, bias=4)) + >>> initialize(model, init_cfg) + + >>> model = ResNet(depth=50) + >>> # Initialize weights with the pretrained model. + >>> init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + >>> initialize(model, init_cfg) + + >>> # Initialize weights of a sub-module with the specific part of + >>> # a pretrained model by using "prefix". + >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + >>> 'retinanet_r50_fpn_1x_coco/'\ + >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + >>> init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + """ + if not isinstance(init_cfg, (dict, list)): + raise TypeError(f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}') + + if isinstance(init_cfg, dict): + init_cfg = [init_cfg] + + for cfg in init_cfg: + # should deeply copy the original config because cfg may be used by + # other modules, e.g., one init_cfg shared by multiple bottleneck + # blocks, the expected cfg will be changed after pop and will change + # the initialization behavior of other modules + cp_cfg = copy.deepcopy(cfg) + override = cp_cfg.pop('override', None) + _initialize(module, cp_cfg) + + if override is not None: + cp_cfg.pop('layer', None) + _initialize_override(module, override, cp_cfg) + else: + # All attributes in module have same initialization. + pass + + +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, + b: float) -> Tensor: + # Method based on + # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + # Modified from + # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower = norm_cdf((a - mean) / std) + upper = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [lower, upper], then translate + # to [2lower-1, 2upper-1]. + tensor.uniform_(2 * lower - 1, 2 * upper - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor: Tensor, + mean: float = 0., + std: float = 1., + a: float = -2., + b: float = 2.) -> Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + Modified from + https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. + mean (float): the mean of the normal distribution. + std (float): the standard deviation of the normal distribution. + a (float): the minimum cutoff value. + b (float): the maximum cutoff value. + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py new file mode 100644 index 000000000000..8778b649561a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py @@ -0,0 +1,175 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + +from .utils import constant_init, kaiming_init, normal_init + + +def conv3x3(in_planes, out_planes, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + padding=dilation, + dilation=dilation) + + +def make_vgg_layer(inplanes, + planes, + num_blocks, + dilation=1, + with_bn=False, + ceil_mode=False): + layers = [] + for _ in range(num_blocks): + layers.append(conv3x3(inplanes, planes, dilation)) + if with_bn: + layers.append(nn.BatchNorm2d(planes)) + layers.append(nn.ReLU(inplace=True)) + inplanes = planes + layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) + + return layers + + +class VGG(nn.Module): + """VGG backbone. + + Args: + depth (int): Depth of vgg, from {11, 13, 16, 19}. + with_bn (bool): Use BatchNorm or not. + num_classes (int): number of classes for classification. + num_stages (int): VGG stages, normally 5. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + """ + + arch_settings = { + 11: (1, 1, 2, 2, 2), + 13: (2, 2, 2, 2, 2), + 16: (2, 2, 3, 3, 3), + 19: (2, 2, 4, 4, 4) + } + + def __init__(self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True): + super(VGG, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for vgg') + assert num_stages >= 1 and num_stages <= 5 + stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + assert len(dilations) == num_stages + assert max(out_indices) <= num_stages + + self.num_classes = num_classes + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + + self.inplanes = 3 + start_idx = 0 + vgg_layers = [] + self.range_sub_modules = [] + for i, num_blocks in enumerate(self.stage_blocks): + num_modules = num_blocks * (2 + with_bn) + 1 + end_idx = start_idx + num_modules + dilation = dilations[i] + planes = 64 * 2**i if i < 4 else 512 + vgg_layer = make_vgg_layer( + self.inplanes, + planes, + num_blocks, + dilation=dilation, + with_bn=with_bn, + ceil_mode=ceil_mode) + vgg_layers.extend(vgg_layer) + self.inplanes = planes + self.range_sub_modules.append([start_idx, end_idx]) + start_idx = end_idx + if not with_last_pool: + vgg_layers.pop(-1) + self.range_sub_modules[-1][1] -= 1 + self.module_name = 'features' + self.add_module(self.module_name, nn.Sequential(*vgg_layers)) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, std=0.01) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + vgg_layers = getattr(self, self.module_name) + for i in range(len(self.stage_blocks)): + for j in range(*self.range_sub_modules[i]): + vgg_layer = vgg_layers[j] + x = vgg_layer(x) + if i in self.out_indices: + outs.append(x) + if self.num_classes > 0: + x = x.view(x.size(0), -1) + x = self.classifier(x) + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(VGG, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + vgg_layers = getattr(self, self.module_name) + if mode and self.frozen_stages >= 0: + for i in range(self.frozen_stages): + for j in range(*self.range_sub_modules[i]): + mod = vgg_layers[j] + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py new file mode 100644 index 000000000000..3193b7f664e1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, + single_gpu_test) + +__all__ = [ + 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', + 'single_gpu_test' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py new file mode 100644 index 000000000000..4ba24f8d4a45 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import pickle +import shutil +import tempfile +import time + +import torch +import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info + + +def single_gpu_test(model, data_loader): + """Test model with a single gpu. + + This method tests model with a single gpu and displays test progress bar. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + # Assume result has the same length of batch_size + # refer to https://github.com/open-mmlab/mmcv/issues/985 + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting + ``gpu_collect=True``, it encodes results to gpu tensors and use gpu + communication for results collection. On cpu mode it saves the results on + different gpus to ``tmpdir`` and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + time.sleep(2) # This line can prevent deadlock problem in some cases. + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + if rank == 0: + batch_size = len(result) + batch_size_all = batch_size * world_size + if batch_size_all + prog_bar.completed > len(dataset): + batch_size_all = len(dataset) - prog_bar.completed + for _ in range(batch_size_all): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results under cpu mode. + + On cpu mode, this function will save the results on different gpus to + ``tmpdir`` and collect them by the rank 0 worker. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + tmpdir (str | None): temporal directory for collected results to + store. If set to None, it will create a random temporal directory + for it. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + mmcv.mkdir_or_exist('.dist_test') + tmpdir = tempfile.mkdtemp(dir='.dist_test') + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, f'part_{i}.pkl') + part_result = mmcv.load(part_file) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results under gpu mode. + + On gpu mode, this function will encode results to gpu tensors and use gpu + communication for results collection. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py new file mode 100644 index 000000000000..2051b85f7e59 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', + 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', + 'list_from_file', 'dict_from_file' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py new file mode 100644 index 000000000000..bafca300c6db --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py @@ -0,0 +1,1148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os +import os.path as osp +import re +import tempfile +import warnings +from abc import ABCMeta, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator, Optional, Tuple, Union +from urllib.request import urlopen + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.misc import has_method +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.path import is_filepath + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + # a flag to indicate whether the backend can create a symlink for a file + _allow_symlink = False + + @property + def name(self): + return self.__class__.__name__ + + @property + def allow_symlink(self): + return self._allow_symlink + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + PetrelBackend supports reading and writing data to multiple clusters. + If the file path contains the cluster name, PetrelBackend will read data + from specified cluster or write data to it. Otherwise, PetrelBackend will + access the default cluster. + + Args: + path_mapping (dict, optional): Path mapping dict from local path to + Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in + ``filepath`` will be replaced by ``dst``. Default: None. + enable_mc (bool, optional): Whether to enable memcached support. + Default: True. + + Examples: + >>> filepath1 = 's3://path/of/file' + >>> filepath2 = 'cluster-name:s3://path/of/file' + >>> client = PetrelBackend() + >>> client.get(filepath1) # get data from default cluster + >>> client.get(filepath2) # get data from 'cluster-name' cluster + """ + + def __init__(self, + path_mapping: Optional[dict] = None, + enable_mc: bool = True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' + 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def _map_path(self, filepath: Union[str, Path]) -> str: + """Map ``filepath`` to a string path whose prefix will be replaced by + :attr:`self.path_mapping`. + + Args: + filepath (str): Path to be mapped. + """ + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + return filepath + + def _format_path(self, filepath: str) -> str: + """Convert a ``filepath`` to standard format of petrel oss. + + If the ``filepath`` is concatenated by ``os.path.join``, in a Windows + environment, the ``filepath`` will be the format of + 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the + above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. + + Args: + filepath (str): Path to be formatted. + """ + return re.sub(r'\\+', '/', filepath) + + def get(self, filepath: Union[str, Path]) -> memoryview: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + memoryview: A memory view of expected bytes object to avoid + copying. The memoryview object can be converted to bytes by + ``value_buf.tobytes()``. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return str(self.get(filepath), encoding=encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Save data to a given ``filepath``. + + Args: + obj (bytes): Data to be saved. + filepath (str or Path): Path to write data. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.put(filepath, obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Save data to a given ``filepath``. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to encode the ``obj``. + Default: 'utf-8'. + """ + self.put(bytes(obj, encoding=encoding), filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + if not has_method(self._client, 'delete'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.delete(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + if not (has_method(self._client, 'contains') + and has_method(self._client, 'isdir')): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) or self._client.isdir(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + if not has_method(self._client, 'isdir'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + if not has_method(self._client, 'contains'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result after concatenation. + """ + filepath = self._format_path(self._map_path(filepath)) + if filepath.endswith('/'): + filepath = filepath[:-1] + formatted_paths = [filepath] + for path in filepaths: + formatted_paths.append(self._format_path(self._map_path(path))) + return '/'.join(formatted_paths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download a file from ``filepath`` and return a temporary path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str | Path): Download a file from ``filepath``. + + Examples: + >>> client = PetrelBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('s3://path/of/your/file') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one temporary path. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + assert self.isfile(filepath) + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + Petrel has no concept of directories but it simulates the directory + hierarchy in the filesystem through public prefixes. In addition, + if the returned path ends with '/', it means the path is a public + prefix which is a logical directory. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + In addition, the returned path of directory will not contains the + suffix '/' which is consistent with other backends. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if not has_method(self._client, 'list'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.')) + + dir_path = self._map_path(dir_path) + dir_path = self._format_path(dir_path) + if list_dir and suffix is not None: + raise TypeError( + '`list_dir` should be False when `suffix` is not None') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + # Petrel's simulated directory hierarchy assumes that directory paths + # should end with `/` + if not dir_path.endswith('/'): + dir_path += '/' + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for path in self._client.list(dir_path): + # the `self.isdir` is not used here to determine whether path + # is a directory, because `self.isdir` relies on + # `self._client.list` + if path.endswith('/'): # a directory path + next_dir_path = self.join_path(dir_path, path) + if list_dir: + # get the relative path and exclude the last + # character '/' + rel_dir = next_dir_path[len(root):-1] + yield rel_dir + if recursive: + yield from _list_dir_or_file(next_dir_path, list_dir, + list_file, suffix, + recursive) + else: # a file path + absolute_path = self.join_path(dir_path, path) + rel_path = absolute_path[len(root):] + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError( + 'Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, + self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, + db_path, + readonly=True, + lock=False, + readahead=False, + **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open( + self.db_path, + readonly=readonly, + lock=lock, + readahead=readahead, + **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + _allow_symlink = True + + def get(self, filepath: Union[str, Path]) -> bytes: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes: Expected bytes object. + """ + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + with open(filepath, 'r', encoding=encoding) as f: + value_buf = f.read() + return value_buf + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` will create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'wb') as f: + f.write(obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` will create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'w', encoding=encoding) as f: + f.write(obj) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + os.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return osp.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return osp.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return osp.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return osp.join(filepath, *filepaths) + + @contextmanager + def get_local_path( + self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + """Only for unified API and do nothing.""" + yield filepath + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if list_dir and suffix is not None: + raise TypeError('`suffix` should be None when `list_dir` is True') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + elif osp.isdir(entry.path): + if list_dir: + rel_dir = osp.relpath(entry.path, root) + yield rel_dir + if recursive: + yield from _list_dir_or_file(entry.path, list_dir, + list_file, suffix, + recursive) + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class HTTPBackend(BaseStorageBackend): + """HTTP and HTTPS storage bachend.""" + + def get(self, filepath): + value_buf = urlopen(filepath).read() + return value_buf + + def get_text(self, filepath, encoding='utf-8'): + value_buf = urlopen(filepath).read() + return value_buf.decode(encoding) + + @contextmanager + def get_local_path(self, filepath: str) -> Iterable[str]: + """Download a file from ``filepath``. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str): Download a file from ``filepath``. + + Examples: + >>> client = HTTPBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('http://path/of/your/file') as path: + ... # do something here + """ + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + +class FileClient: + """A general file client to access files in different backends. + + The client loads a file or text in a specified backend from its path + and returns it as a binary or text file. There are two ways to choose a + backend, the name of backend and the prefix of path. Although both of them + can be used to choose a storage backend, ``backend`` has a higher priority + that is if they are all set, the storage backend will be chosen by the + backend argument. If they are all `None`, the disk backend will be chosen. + Note that It can also register other backend accessor with a given name, + prefixes, and backend class. In addition, We use the singleton pattern to + avoid repeated object creation. If the arguments are the same, the same + object will be returned. + + Args: + backend (str, optional): The storage backend type. Options are "disk", + "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. + prefix (str, optional): The prefix of the registered storage backend. + Options are "s3", "http", "https". Default: None. + + Examples: + >>> # only set backend + >>> file_client = FileClient(backend='petrel') + >>> # only set prefix + >>> file_client = FileClient(prefix='s3') + >>> # set both backend and prefix but use backend to choose client + >>> file_client = FileClient(backend='petrel', prefix='s3') + >>> # if the arguments are the same, the same object is returned + >>> file_client1 = FileClient(backend='petrel') + >>> file_client1 is file_client + True + + Attributes: + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + 'http': HTTPBackend, + } + # This collection is used to record the overridden backends, and when a + # backend appears in the collection, the singleton pattern is disabled for + # that backend, because if the singleton pattern is used, then the object + # returned will be the backend before overwriting + _overridden_backends = set() + _prefix_to_backends = { + 's3': PetrelBackend, + 'http': HTTPBackend, + 'https': HTTPBackend, + } + _overridden_prefixes = set() + + _instances = {} + + def __new__(cls, backend=None, prefix=None, **kwargs): + if backend is None and prefix is None: + backend = 'disk' + if backend is not None and backend not in cls._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' + f' are {list(cls._backends.keys())}') + if prefix is not None and prefix not in cls._prefix_to_backends: + raise ValueError( + f'prefix {prefix} is not supported. Currently supported ones ' + f'are {list(cls._prefix_to_backends.keys())}') + + # concatenate the arguments to a unique key for determining whether + # objects with the same arguments were created + arg_key = f'{backend}:{prefix}' + for key, value in kwargs.items(): + arg_key += f':{key}:{value}' + + # if a backend was overridden, it will create a new object + if (arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes): + _instance = cls._instances[arg_key] + else: + # create a new object and put it to _instance + _instance = super().__new__(cls) + if backend is not None: + _instance.client = cls._backends[backend](**kwargs) + else: + _instance.client = cls._prefix_to_backends[prefix](**kwargs) + + cls._instances[arg_key] = _instance + + return _instance + + @property + def name(self): + return self.client.name + + @property + def allow_symlink(self): + return self.client.allow_symlink + + @staticmethod + def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: + """Parse the prefix of a uri. + + Args: + uri (str | Path): Uri to be parsed that contains the file prefix. + + Examples: + >>> FileClient.parse_uri_prefix('s3://path/of/your/file') + 's3' + + Returns: + str | None: Return the prefix of uri if the uri contains '://' + else ``None``. + """ + assert is_filepath(uri) + uri = str(uri) + if '://' not in uri: + return None + else: + prefix, _ = uri.split('://') + # In the case of PetrelBackend, the prefix may contains the cluster + # name like clusterName:s3 + if ':' in prefix: + _, prefix = prefix.split(':') + return prefix + + @classmethod + def infer_client(cls, + file_client_args: Optional[dict] = None, + uri: Optional[Union[str, Path]] = None) -> 'FileClient': + """Infer a suitable file client based on the URI and arguments. + + Args: + file_client_args (dict, optional): Arguments to instantiate a + FileClient. Default: None. + uri (str | Path, optional): Uri to be parsed that contains the file + prefix. Default: None. + + Examples: + >>> uri = 's3://path/of/your/file' + >>> file_client = FileClient.infer_client(uri=uri) + >>> file_client_args = {'backend': 'petrel'} + >>> file_client = FileClient.infer_client(file_client_args) + + Returns: + FileClient: Instantiated FileClient object. + """ + assert file_client_args is not None or uri is not None + if file_client_args is None: + file_prefix = cls.parse_uri_prefix(uri) # type: ignore + return cls(prefix=file_prefix) + else: + return cls(**file_client_args) + + @classmethod + def _register_backend(cls, name, backend, force=False, prefixes=None): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' + f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError( + f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError( + f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' + 'add "force=True" if you want to override it') + + if name in cls._backends and force: + cls._overridden_backends.add(name) + cls._backends[name] = backend + + if prefixes is not None: + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if prefix not in cls._prefix_to_backends: + cls._prefix_to_backends[prefix] = backend + elif (prefix in cls._prefix_to_backends) and force: + cls._overridden_prefixes.add(prefix) + cls._prefix_to_backends[prefix] = backend + else: + raise KeyError( + f'{prefix} is already registered as a storage backend,' + ' add "force=True" if you want to override it') + + @classmethod + def register_backend(cls, name, backend=None, force=False, prefixes=None): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + prefixes (str or list[str] or tuple[str], optional): The prefixes + of the registered storage backend. Default: None. + `New in version 1.3.15.` + """ + if backend is not None: + cls._register_backend( + name, backend, force=force, prefixes=prefixes) + return + + def _register(backend_cls): + cls._register_backend( + name, backend_cls, force=force, prefixes=prefixes) + return backend_cls + + return _register + + def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: + """Read data from a given ``filepath`` with 'rb' mode. + + Note: + There are two types of return values for ``get``, one is ``bytes`` + and the other is ``memoryview``. The advantage of using memoryview + is that you can avoid copying, and if you want to convert it to + ``bytes``, you can use ``.tobytes()``. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes | memoryview: Expected bytes object or a memory view of the + bytes object. + """ + return self.client.get(filepath) + + def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return self.client.get_text(filepath, encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` should create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + self.client.put(obj, filepath) + + def put_text(self, obj: str, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` should create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str, optional): The encoding format used to open the + `filepath`. Default: 'utf-8'. + """ + self.client.put_text(obj, filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str, Path): Path to be removed. + """ + self.client.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return self.client.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return self.client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return self.client.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return self.client.join_path(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download data from ``filepath`` and write the data to local path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Note: + If the ``filepath`` is a local path, just return itself. + + .. warning:: + ``get_local_path`` is an experimental interface that may change in + the future. + + Args: + filepath (str or Path): Path to be read data. + + Examples: + >>> file_client = FileClient(prefix='s3') + >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one path. + """ + with self.client.get_local_path(str(filepath)) as local_path: + yield local_path + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, + suffix, recursive) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py new file mode 100644 index 000000000000..aa24d9197283 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py new file mode 100644 index 000000000000..288878bc5728 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + # `str_like` is a flag to indicate whether the type of file object is + # str-like object or bytes-like object. Pickle only processes bytes-like + # objects but json only processes str-like object. If it is str-like + # object, `StringIO` will be used to process the buffer. + str_like = True + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 000000000000..18d4f15f7413 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 000000000000..b37c79bed4ef --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + str_like = False + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 000000000000..c5aa2eea1e8c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py new file mode 100644 index 000000000000..aaefde58aa3e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from io import BytesIO, StringIO +from pathlib import Path + +from ..utils import is_list_of, is_str +from .file_client import FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler() +} + + +def load(file, file_format=None, file_client_args=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Note: + In v1.3.16 and later, ``load`` supports loading data from serialized + files those can be storaged in different backends. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> load('/path/of/your/file') # file is storaged in disk + >>> load('https://path/of/your/file') # file is storaged in Internet + >>> load('s3://path/of/your/file') # file is storaged in petrel + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO(file_client.get_text(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + else: + with BytesIO(file_client.get(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Note: + In v1.3.16 and later, ``dump`` supports dumping data as strings or to + files which is saved to different backends. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dumped to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dump('hello world', '/path/of/your/file') # disk + >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError( + 'file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put_text(f.getvalue(), file) + else: + with BytesIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put(f.getvalue(), file) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError( + f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py new file mode 100644 index 000000000000..f60f0d611b8d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from io import StringIO + +from .file_client import FileClient + + +def list_from_file(filename, + prefix='', + offset=0, + max_num=0, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a list of strings. + + Note: + In v1.3.16 and later, ``list_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a list for strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the beginning of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> list_from_file('/path/of/your/file') # disk + ['hello', 'world'] + >>> list_from_file('s3://path/of/your/file') # ceph or petrel + ['hello', 'world'] + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for _ in range(offset): + f.readline() + for line in f: + if 0 < max_num <= cnt: + break + item_list.append(prefix + line.rstrip('\n\r')) + cnt += 1 + return item_list + + +def dict_from_file(filename, + key_type=str, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns split by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Note: + In v1.3.16 and later, ``dict_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a dict. + + Args: + filename(str): Filename. + key_type(type): Type of the dict keys. str is user by default and + type conversion will be performed if specified. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dict_from_file('/path/of/your/file') # disk + {'key1': 'value1', 'key2': 'value2'} + >>> dict_from_file('s3://path/of/your/file') # ceph or petrel + {'key1': 'value1', 'key2': 'value2'} + + Returns: + dict: The parsed contents. + """ + mapping = {} + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py new file mode 100644 index 000000000000..d0051d609d3d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, + gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, + rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) +from .geometric import (cutout, imcrop, imflip, imflip_, impad, + impad_to_multiple, imrescale, imresize, imresize_like, + imresize_to_multiple, imrotate, imshear, imtranslate, + rescale_size) +from .io import imfrombytes, imread, imwrite, supported_backends, use_backend +from .misc import tensor2imgs +from .photometric import (adjust_brightness, adjust_color, adjust_contrast, + adjust_lighting, adjust_sharpness, auto_contrast, + clahe, imdenormalize, imequalize, iminvert, + imnormalize, imnormalize_, lut_transform, posterize, + solarize) + +__all__ = [ + 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', + 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', + 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', + 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', + 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', + 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', + 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', + 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', + 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', + 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py new file mode 100644 index 000000000000..814533952fdf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + + +def imconvert(img, src, dst): + """Convert an image from the src colorspace to dst colorspace. + + Args: + img (ndarray): The input image. + src (str): The source colorspace, e.g., 'rgb', 'hsv'. + dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. + + Returns: + ndarray: The converted image. + """ + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + out_img = cv2.cvtColor(img, code) + return out_img + + +def bgr2gray(img, keepdim=False): + """Convert a BGR image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def rgb2gray(img, keepdim=False): + """Convert a RGB image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def gray2bgr(img): + """Convert a grayscale image to BGR image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted BGR image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return out_img + + +def gray2rgb(img): + """Convert a grayscale image to RGB image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted RGB image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + return out_img + + +def _convert_input_type_range(img): + """Convert the type and range of the input image. + + It converts the input image to np.float32 type and range of [0, 1]. + It is mainly used for pre-processing the input image in colorspace + conversion functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + (ndarray): The converted image with type of np.float32 and range of + [0, 1]. + """ + img_type = img.dtype + img = img.astype(np.float32) + if img_type == np.float32: + pass + elif img_type == np.uint8: + img /= 255. + else: + raise TypeError('The img type should be np.float32 or np.uint8, ' + f'but got {img_type}') + return img + + +def _convert_output_type_range(img, dst_type): + """Convert the type and range of the image according to dst_type. + + It converts the image to desired type and range. If `dst_type` is np.uint8, + images will be converted to np.uint8 type with range [0, 255]. If + `dst_type` is np.float32, it converts the image to np.float32 type with + range [0, 1]. + It is mainly used for post-processing images in colorspace conversion + functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The image to be converted with np.float32 type and + range [0, 255]. + dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it + converts the image to np.uint8 type with range [0, 255]. If + dst_type is np.float32, it converts the image to np.float32 type + with range [0, 1]. + + Returns: + (ndarray): The converted image with desired type and range. + """ + if dst_type not in (np.uint8, np.float32): + raise TypeError('The dst_type should be np.float32 or np.uint8, ' + f'but got {dst_type}') + if dst_type == np.uint8: + img = img.round() + else: + img /= 255. + return img.astype(dst_type) + + +def rgb2ycbcr(img, y_only=False): + """Convert a RGB image to YCbCr image. + + This function produces the same results as Matlab's `rgb2ycbcr` function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 + else: + out_img = np.matmul( + img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], + [24.966, 112.0, -18.214]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul( + img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], + [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2rgb(img): + """Convert a YCbCr image to RGB image. + + This function produces the same results as Matlab's ycbcr2rgb function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted RGB image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0, -0.00153632, 0.00791071], + [0.00625893, -0.00318811, 0]]) * 255.0 + [ + -222.921, 135.576, -276.836 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2bgr(img): + """Convert a YCbCr image to BGR image. + + The bgr version of ycbcr2rgb. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted BGR image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0.00791071, -0.00153632, 0], + [0, -0.00318811, 0.00625893]]) * 255.0 + [ + -276.836, 135.576, -222.921 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def convert_color_factory(src, dst): + + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + + def convert_color(img): + out_img = cv2.cvtColor(img, code) + return out_img + + convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} + image. + + Args: + img (ndarray or str): The input image. + + Returns: + ndarray: The converted {dst.upper()} image. + """ + + return convert_color + + +bgr2rgb = convert_color_factory('bgr', 'rgb') + +rgb2bgr = convert_color_factory('rgb', 'bgr') + +bgr2hsv = convert_color_factory('bgr', 'hsv') + +hsv2bgr = convert_color_factory('hsv', 'bgr') + +bgr2hls = convert_color_factory('bgr', 'hls') + +hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py new file mode 100644 index 000000000000..cf97c201cb4e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py @@ -0,0 +1,728 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers + +import cv2 +import numpy as np + +from ..utils import to_2tuple +from .io import imread_backend + +try: + from PIL import Image +except ImportError: + Image = None + + +def _scale_size(size, scale): + """Rescale a size by a ratio. + + Args: + size (tuple[int]): (w, h). + scale (float | tuple(float)): Scaling factor. + + Returns: + tuple[int]: scaled size. + """ + if isinstance(scale, (float, int)): + scale = (scale, scale) + w, h = size + return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) + + +cv2_interp_codes = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, + 'area': cv2.INTER_AREA, + 'lanczos': cv2.INTER_LANCZOS4 +} + +if Image is not None: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + +def imresize(img, + size, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image to a given size. + + Args: + img (ndarray): The input image. + size (tuple[int]): Target size (w, h). + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if backend is None: + backend = imread_backend + if backend not in ['cv2', 'pillow']: + raise ValueError(f'backend: {backend} is not supported for resize.' + f"Supported backends are 'cv2', 'pillow'") + + if backend == 'pillow': + assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' + pil_image = Image.fromarray(img) + pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) + resized_img = np.array(pil_image) + else: + resized_img = cv2.resize( + img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + if not return_scale: + return resized_img + else: + w_scale = size[0] / w + h_scale = size[1] / h + return resized_img, w_scale, h_scale + + +def imresize_to_multiple(img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image according to a given size or scale factor and then rounds + up the the resized or rescaled image size to the nearest value that can be + divided by the divisor. + + Args: + img (ndarray): The input image. + divisor (int | tuple): Resized image size will be a multiple of + divisor. If divisor is a tuple, divisor should be + (w_divisor, h_divisor). + size (None | int | tuple[int]): Target size (w, h). Default: None. + scale_factor (None | float | tuple[float]): Multiplier for spatial + size. Should match input size if it is a tuple and the 2D style is + (w_scale_factor, h_scale_factor). Default: None. + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Default: False. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if size is not None and scale_factor is not None: + raise ValueError('only one of size or scale_factor should be defined') + elif size is None and scale_factor is None: + raise ValueError('one of size or scale_factor should be defined') + elif size is not None: + size = to_2tuple(size) + if keep_ratio: + size = rescale_size((w, h), size, return_scale=False) + else: + size = _scale_size((w, h), scale_factor) + + divisor = to_2tuple(divisor) + size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) + resized_img, w_scale, h_scale = imresize( + img, + size, + return_scale=True, + interpolation=interpolation, + out=out, + backend=backend) + if return_scale: + return resized_img, w_scale, h_scale + else: + return resized_img + + +def imresize_like(img, + dst_img, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image to the same size of a given image. + + Args: + img (ndarray): The input image. + dst_img (ndarray): The target image. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = dst_img.shape[:2] + return imresize(img, (w, h), return_scale, interpolation, backend=backend) + + +def rescale_size(old_size, scale, return_scale=False): + """Calculate the new size to be rescaled to. + + Args: + old_size (tuple[int]): The old size (w, h) of image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image size. + + Returns: + tuple[int]: The new rescaled image size. + """ + w, h = old_size + if isinstance(scale, (float, int)): + if scale <= 0: + raise ValueError(f'Invalid scale {scale}, must be positive.') + scale_factor = scale + elif isinstance(scale, tuple): + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + else: + raise TypeError( + f'Scale must be a number or tuple of int, but got {type(scale)}') + + new_size = _scale_size((w, h), scale_factor) + + if return_scale: + return new_size, scale_factor + else: + return new_size + + +def imrescale(img, + scale, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image while keeping the aspect ratio. + + Args: + img (ndarray): The input image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + ndarray: The rescaled image. + """ + h, w = img.shape[:2] + new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) + rescaled_img = imresize( + img, new_size, interpolation=interpolation, backend=backend) + if return_scale: + return rescaled_img, scale_factor + else: + return rescaled_img + + +def imflip(img, direction='horizontal'): + """Flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image. + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return np.flip(img, axis=1) + elif direction == 'vertical': + return np.flip(img, axis=0) + else: + return np.flip(img, axis=(0, 1)) + + +def imflip_(img, direction='horizontal'): + """Inplace flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image (inplace). + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return cv2.flip(img, 1, img) + elif direction == 'vertical': + return cv2.flip(img, 0, img) + else: + return cv2.flip(img, -1, img) + + +def imrotate(img, + angle, + center=None, + scale=1.0, + border_value=0, + interpolation='bilinear', + auto_bound=False): + """Rotate an image. + + Args: + img (ndarray): Image to be rotated. + angle (float): Rotation angle in degrees, positive values mean + clockwise rotation. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. + scale (float): Isotropic scale factor. + border_value (int): Border value. + interpolation (str): Same as :func:`resize`. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. + + Returns: + ndarray: The rotated image. + """ + if center is not None and auto_bound: + raise ValueError('`auto_bound` conflicts with `center`') + h, w = img.shape[:2] + if center is None: + center = ((w - 1) * 0.5, (h - 1) * 0.5) + assert isinstance(center, tuple) + + matrix = cv2.getRotationMatrix2D(center, -angle, scale) + if auto_bound: + cos = np.abs(matrix[0, 0]) + sin = np.abs(matrix[0, 1]) + new_w = h * sin + w * cos + new_h = h * cos + w * sin + matrix[0, 2] += (new_w - w) * 0.5 + matrix[1, 2] += (new_h - h) * 0.5 + w = int(np.round(new_w)) + h = int(np.round(new_h)) + rotated = cv2.warpAffine( + img, + matrix, (w, h), + flags=cv2_interp_codes[interpolation], + borderValue=border_value) + return rotated + + +def bbox_clip(bboxes, img_shape): + """Clip bboxes to fit the image shape. + + Args: + bboxes (ndarray): Shape (..., 4*k) + img_shape (tuple[int]): (height, width) of the image. + + Returns: + ndarray: Clipped bboxes. + """ + assert bboxes.shape[-1] % 4 == 0 + cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) + cmin[0::2] = img_shape[1] - 1 + cmin[1::2] = img_shape[0] - 1 + clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) + return clipped_bboxes + + +def bbox_scaling(bboxes, scale, clip_shape=None): + """Scaling bboxes w.r.t the box center. + + Args: + bboxes (ndarray): Shape(..., 4). + scale (float): Scaling factor. + clip_shape (tuple[int], optional): If specified, bboxes that exceed the + boundary will be clipped according to the given shape (h, w). + + Returns: + ndarray: Scaled bboxes. + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes + + +def imcrop(img, bboxes, scale=1.0, pad_fill=None): + """Crop image patches. + + 3 steps: scale the bboxes -> clip bboxes -> crop and pad. + + Args: + img (ndarray): Image to be cropped. + bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. + scale (float, optional): Scale ratio of bboxes, the default value + 1.0 means no padding. + pad_fill (Number | list[Number]): Value to be filled for padding. + Default: None, which means no padding. + + Returns: + list[ndarray] | ndarray: The cropped image patches. + """ + chn = 1 if img.ndim == 2 else img.shape[2] + if pad_fill is not None: + if isinstance(pad_fill, (int, float)): + pad_fill = [pad_fill for _ in range(chn)] + assert len(pad_fill) == chn + + _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes + scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) + clipped_bbox = bbox_clip(scaled_bboxes, img.shape) + + patches = [] + for i in range(clipped_bbox.shape[0]): + x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) + if pad_fill is None: + patch = img[y1:y2 + 1, x1:x2 + 1, ...] + else: + _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) + if chn == 1: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) + else: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) + patch = np.array( + pad_fill, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + x_start = 0 if _x1 >= 0 else -_x1 + y_start = 0 if _y1 >= 0 else -_y1 + w = x2 - x1 + 1 + h = y2 - y1 + 1 + patch[y_start:y_start + h, x_start:x_start + w, + ...] = img[y1:y1 + h, x1:x1 + w, ...] + patches.append(patch) + + if bboxes.ndim == 1: + return patches[0] + else: + return patches + + +def impad(img, + *, + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. + + Args: + img (ndarray): Image to be padded. + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. + pad_val (Number | Sequence[Number]): Values to be filled in padding + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + ndarray: The padded image. + """ + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): + assert len(pad_val) == img.shape[-1] + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' + f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' + f'But received {padding}') + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT + } + img = cv2.copyMakeBorder( + img, + padding[1], + padding[3], + padding[0], + padding[2], + border_type[padding_mode], + value=pad_val) + + return img + + +def impad_to_multiple(img, divisor, pad_val=0): + """Pad an image to ensure each edge to be multiple to some number. + + Args: + img (ndarray): Image to be padded. + divisor (int): Padded image edges will be multiple to divisor. + pad_val (Number | Sequence[Number]): Same as :func:`impad`. + + Returns: + ndarray: The padded image. + """ + pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor + pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor + return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) + + +def cutout(img, shape, pad_val=0): + """Randomly cut out a rectangle from the original img. + + Args: + img (ndarray): Image to be cutout. + shape (int | tuple[int]): Expected cutout shape (h, w). If given as a + int, the value will be used for both h and w. + pad_val (int | float | tuple[int | float]): Values to be filled in the + cut area. Defaults to 0. + + Returns: + ndarray: The cutout image. + """ + + channels = 1 if img.ndim == 2 else img.shape[2] + if isinstance(shape, int): + cut_h, cut_w = shape, shape + else: + assert isinstance(shape, tuple) and len(shape) == 2, \ + f'shape must be a int or a tuple with length 2, but got type ' \ + f'{type(shape)} instead.' + cut_h, cut_w = shape + if isinstance(pad_val, (int, float)): + pad_val = tuple([pad_val] * channels) + elif isinstance(pad_val, tuple): + assert len(pad_val) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(pad_val), channels) + else: + raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') + + img_h, img_w = img.shape[:2] + y0 = np.random.uniform(img_h) + x0 = np.random.uniform(img_w) + + y1 = int(max(0, y0 - cut_h / 2.)) + x1 = int(max(0, x0 - cut_w / 2.)) + y2 = min(img_h, y1 + cut_h) + x2 = min(img_w, x1 + cut_w) + + if img.ndim == 2: + patch_shape = (y2 - y1, x2 - x1) + else: + patch_shape = (y2 - y1, x2 - x1, channels) + + img_cutout = img.copy() + patch = np.array( + pad_val, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + img_cutout[y1:y2, x1:x2, ...] = patch + + return img_cutout + + +def _get_shear_matrix(magnitude, direction='horizontal'): + """Generate the shear matrix for transformation. + + Args: + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + + Returns: + ndarray: The shear matrix with dtype float32. + """ + if direction == 'horizontal': + shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) + elif direction == 'vertical': + shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) + return shear_matrix + + +def imshear(img, + magnitude, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Shear an image. + + Args: + img (ndarray): Image to be sheared with format (h, w) + or (h, w, c). + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The sheared image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`') + shear_matrix = _get_shear_matrix(magnitude, direction) + sheared = cv2.warpAffine( + img, + shear_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. shearing masks whose channels large + # than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return sheared + + +def _get_translate_matrix(offset, direction='horizontal'): + """Generate the translate matrix. + + Args: + offset (int | float): The offset used for translate. + direction (str): The translate direction, either + "horizontal" or "vertical". + + Returns: + ndarray: The translate matrix with dtype float32. + """ + if direction == 'horizontal': + translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) + elif direction == 'vertical': + translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) + return translate_matrix + + +def imtranslate(img, + offset, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Translate an image. + + Args: + img (ndarray): Image to be translated with format + (h, w) or (h, w, c). + offset (int | float): The offset used for translate. + direction (str): The translate direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The translated image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`.') + translate_matrix = _get_translate_matrix(offset, direction) + translated = cv2.warpAffine( + img, + translate_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. translating masks whose channels + # large than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return translated diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py new file mode 100644 index 000000000000..cba7afefe60a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py @@ -0,0 +1,258 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os.path as osp +from pathlib import Path + +import cv2 +import numpy as np +from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, + IMREAD_UNCHANGED) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import check_file_exist, is_str, mkdir_or_exist + +try: + from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG +except ImportError: + TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None + +try: + from PIL import Image, ImageOps +except ImportError: + Image = None + +try: + import tifffile +except ImportError: + tifffile = None + +jpeg = None +supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] + +imread_flags = { + 'color': IMREAD_COLOR, + 'grayscale': IMREAD_GRAYSCALE, + 'unchanged': IMREAD_UNCHANGED, + 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, + 'grayscale_ignore_orientation': + IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE +} + +imread_backend = 'cv2' + + +def use_backend(backend): + """Select a backend for image decoding. + + Args: + backend (str): The image decoding backend type. Options are `cv2`, + `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) + and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` + file format. + """ + assert backend in supported_backends + global imread_backend + imread_backend = backend + if imread_backend == 'turbojpeg': + if TurboJPEG is None: + raise ImportError('`PyTurboJPEG` is not installed') + global jpeg + if jpeg is None: + jpeg = TurboJPEG() + elif imread_backend == 'pillow': + if Image is None: + raise ImportError('`Pillow` is not installed') + elif imread_backend == 'tifffile': + if tifffile is None: + raise ImportError('`tifffile` is not installed') + + +def _jpegflag(flag='color', channel_order='bgr'): + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'color': + if channel_order == 'bgr': + return TJPF_BGR + elif channel_order == 'rgb': + return TJCS_RGB + elif flag == 'grayscale': + return TJPF_GRAY + else: + raise ValueError('flag must be "color" or "grayscale"') + + +def _pillow2array(img, flag='color', channel_order='bgr'): + """Convert a pillow image to numpy array. + + Args: + img (:obj:`PIL.Image.Image`): The image loaded using PIL + flag (str): Flags specifying the color type of a loaded image, + candidates are 'color', 'grayscale' and 'unchanged'. + Default to 'color'. + channel_order (str): The channel order of the output image array, + candidates are 'bgr' and 'rgb'. Default to 'bgr'. + + Returns: + np.ndarray: The converted numpy array + """ + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'unchanged': + array = np.array(img) + if array.ndim >= 3 and array.shape[2] >= 3: # color image + array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR + else: + # Handle exif orientation tag + if flag in ['color', 'grayscale']: + img = ImageOps.exif_transpose(img) + # If the image mode is not 'RGB', convert it to 'RGB' first. + if img.mode != 'RGB': + if img.mode != 'LA': + # Most formats except 'LA' can be directly converted to RGB + img = img.convert('RGB') + else: + # When the mode is 'LA', the default conversion will fill in + # the canvas with black, which sometimes shadows black objects + # in the foreground. + # + # Therefore, a random color (124, 117, 104) is used for canvas + img_rgba = img.convert('RGBA') + img = Image.new('RGB', img_rgba.size, (124, 117, 104)) + img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha + if flag in ['color', 'color_ignore_orientation']: + array = np.array(img) + if channel_order != 'rgb': + array = array[:, :, ::-1] # RGB to BGR + elif flag in ['grayscale', 'grayscale_ignore_orientation']: + img = img.convert('L') + array = np.array(img) + else: + raise ValueError( + 'flag must be "color", "grayscale", "unchanged", ' + f'"color_ignore_orientation" or "grayscale_ignore_orientation"' + f' but got {flag}') + return array + + +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): + """Read an image. + + Args: + img_or_path (ndarray or str or Path): Either a numpy array or str or + pathlib.Path. If it is a numpy array (loaded image), then + it will be returned as is. + flag (str): Flags specifying the color type of a loaded image, + candidates are `color`, `grayscale`, `unchanged`, + `color_ignore_orientation` and `grayscale_ignore_orientation`. + By default, `cv2` and `pillow` backend would rotate the image + according to its EXIF info unless called with `unchanged` or + `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend + always ignore image's EXIF info regardless of the flag. + The `turbojpeg` backend only supports `color` and `grayscale`. + channel_order (str): Order of channel, candidates are `bgr` and `rgb`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. + If backend is None, the global imread_backend specified by + ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if isinstance(img_or_path, Path): + img_or_path = str(img_or_path) + + if isinstance(img_or_path, np.ndarray): + return img_or_path + elif is_str(img_or_path): + check_file_exist(img_or_path, + f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), + _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + elif backend == 'tifffile': + img = tifffile.imread(img_or_path) + return img + else: + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + else: + raise TypeError('"img" must be a numpy array or a str or ' + 'a pathlib.Path object') + + +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): + """Read an image from bytes. + + Args: + content (bytes): Image bytes got from files or other streams. + flag (str): Same as :func:`imread`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if backend == 'turbojpeg': + img = jpeg.decode(content, _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) + return img + else: + img_np = np.frombuffer(content, np.uint8) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imdecode(img_np, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + + +def imwrite(img, file_path, params=None, auto_mkdir=True): + """Write image to file. + + Args: + img (ndarray): Image array to be written. + file_path (str): Image file path. + params (None or list): Same as opencv :func:`imwrite` interface. + auto_mkdir (bool): If the parent folder of `file_path` does not exist, + whether to create it automatically. + + Returns: + bool: Successful or not. + """ + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py new file mode 100644 index 000000000000..b41891ffb0a5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +try: + import torch +except ImportError: + torch = None + + +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. + + Args: + tensor (torch.Tensor): Tensor that contains multiple images, shape ( + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). + to_rgb (bool, optional): Whether the tensor was converted to RGB + format in the first place. If so, convert it back to BGR. + Defaults to True. + + Returns: + list[np.ndarray]: A list that contains multiple images. + """ + + if torch is None: + raise RuntimeError('pytorch is not installed') + assert torch.is_tensor(tensor) and tensor.ndim == 4 + assert len(mean) == 3 + assert len(std) == 3 + + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + imgs = [] + for img_id in range(num_imgs): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) + imgs.append(np.ascontiguousarray(img)) + return imgs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py new file mode 100644 index 000000000000..5085d012019c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py @@ -0,0 +1,428 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from ..utils import is_tuple_of +from .colorspace import bgr2gray, gray2bgr + + +def imnormalize(img, mean, std, to_rgb=True): + """Normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + img = img.copy().astype(np.float32) + return imnormalize_(img, mean, std, to_rgb) + + +def imnormalize_(img, mean, std, to_rgb=True): + """Inplace normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(mean.reshape(1, -1)) + stdinv = 1 / np.float64(std.reshape(1, -1)) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def imdenormalize(img, mean, std, to_bgr=True): + assert img.dtype != np.uint8 + mean = mean.reshape(1, -1).astype(np.float64) + std = std.reshape(1, -1).astype(np.float64) + img = cv2.multiply(img, std) # make a copy + cv2.add(img, mean, img) # inplace + if to_bgr: + cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace + return img + + +def iminvert(img): + """Invert (negate) an image. + + Args: + img (ndarray): Image to be inverted. + + Returns: + ndarray: The inverted image. + """ + return np.full_like(img, 255) - img + + +def solarize(img, thr=128): + """Solarize an image (invert all pixel values above a threshold) + + Args: + img (ndarray): Image to be solarized. + thr (int): Threshold for solarizing (0 - 255). + + Returns: + ndarray: The solarized image. + """ + img = np.where(img < thr, img, 255 - img) + return img + + +def posterize(img, bits): + """Posterize an image (reduce the number of bits for each color channel) + + Args: + img (ndarray): Image to be posterized. + bits (int): Number of bits (1 to 8) to use for posterizing. + + Returns: + ndarray: The posterized image. + """ + shift = 8 - bits + img = np.left_shift(np.right_shift(img, shift), shift) + return img + + +def adjust_color(img, alpha=1, beta=None, gamma=0): + r"""It blends the source image and its gray image: + + .. math:: + output = img * alpha + gray\_img * beta + gamma + + Args: + img (ndarray): The input source image. + alpha (int | float): Weight for the source image. Default 1. + beta (int | float): Weight for the converted gray image. + If None, it's assigned the value (1 - `alpha`). + gamma (int | float): Scalar added to each sum. + Same as :func:`cv2.addWeighted`. Default 0. + + Returns: + ndarray: Colored image which has the same size and dtype as input. + """ + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not the default `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img + + +def imequalize(img): + """Equalize the image histogram. + + This function applies a non-linear mapping to the input image, + in order to create a uniform distribution of grayscale values + in the output image. + + Args: + img (ndarray): Image to be equalized. + + Returns: + ndarray: The equalized image. + """ + + def _scale_channel(im, c): + """Scale the data in the corresponding channel.""" + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # For computing the step, filter out the nonzeros. + nonzero_histo = histo[histo > 0] + step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 + if not step: + lut = np.array(range(256)) + else: + # Compute the cumulative sum, shifted by step // 2 + # and then normalized by step. + lut = (np.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = np.concatenate([[0], lut[:-1]], 0) + # handle potential integer overflow + lut[lut > 255] = 255 + # If step is zero, return the original image. + # Otherwise, index from lut. + return np.where(np.equal(step, 0), im, lut[im]) + + # Scales each channel independently and then stacks + # the result. + s1 = _scale_channel(img, 0) + s2 = _scale_channel(img, 1) + s3 = _scale_channel(img, 2) + equalized_img = np.stack([s1, s2, s3], axis=-1) + return equalized_img.astype(img.dtype) + + +def adjust_brightness(img, factor=1.): + """Adjust image brightness. + + This function controls the brightness of an image. An + enhancement factor of 0.0 gives a black image. + A factor of 1.0 gives the original image. This function + blends the source image and the degenerated black image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be brightened. + factor (float): A value controls the enhancement. + Factor 1.0 returns the original image, lower + factors mean less color (brightness, contrast, + etc), and higher values more. Default 1. + + Returns: + ndarray: The brightened image. + """ + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + brightened_img = np.clip(brightened_img, 0, 255) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.): + """Adjust image contrast. + + This function controls the contrast of an image. An + enhancement factor of 0.0 gives a solid grey + image. A factor of 1.0 gives the original image. It + blends the source image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be contrasted. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + + Returns: + ndarray: The contrasted image. + """ + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + contrasted_img = np.clip(contrasted_img, 0, 255) + return contrasted_img.astype(img.dtype) + + +def auto_contrast(img, cutoff=0): + """Auto adjust image contrast. + + This function maximize (normalize) image contrast by first removing cutoff + percent of the lightest and darkest pixels from the histogram and remapping + the image so that the darkest pixel becomes black (0), and the lightest + becomes white (255). + + Args: + img (ndarray): Image to be contrasted. BGR order. + cutoff (int | float | tuple): The cutoff percent of the lightest and + darkest pixels to be removed. If given as tuple, it shall be + (low, high). Otherwise, the single value will be used for both. + Defaults to 0. + + Returns: + ndarray: The contrasted image. + """ + + def _auto_contrast_channel(im, c, cutoff): + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # Remove cut-off percent pixels from histo + histo_sum = np.cumsum(histo) + cut_low = histo_sum[-1] * cutoff[0] // 100 + cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 + histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low + histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) + + # Compute mapping + low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] + # If all the values have been cut off, return the origin img + if low >= high: + return im + scale = 255.0 / (high - low) + offset = -low * scale + lut = np.array(range(256)) + lut = lut * scale + offset + lut = np.clip(lut, 0, 255) + return lut[im] + + if isinstance(cutoff, (int, float)): + cutoff = (cutoff, cutoff) + else: + assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \ + f'float or tuple, but got {type(cutoff)} instead.' + # Auto adjusts contrast for each channel independently and then stacks + # the result. + s1 = _auto_contrast_channel(img, 0, cutoff) + s2 = _auto_contrast_channel(img, 1, cutoff) + s3 = _auto_contrast_channel(img, 2, cutoff) + contrasted_img = np.stack([s1, s2, s3], axis=-1) + return contrasted_img.astype(img.dtype) + + +def adjust_sharpness(img, factor=1., kernel=None): + """Adjust image sharpness. + + This function controls the sharpness of an image. An + enhancement factor of 0.0 gives a blurred image. A + factor of 1.0 gives the original image. And a factor + of 2.0 gives a sharpened image. It blends the source + image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be sharpened. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + kernel (np.ndarray, optional): Filter kernel to be applied on the img + to obtain the degenerated img. Defaults to None. + + Note: + No value sanity check is enforced on the kernel set by users. So with + an inappropriate kernel, the ``adjust_sharpness`` may fail to perform + the function its name indicates but end up performing whatever + transform determined by the kernel. + + Returns: + ndarray: The sharpened image. + """ + + if kernel is None: + # adopted from PIL.ImageFilter.SMOOTH + kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13 + assert isinstance(kernel, np.ndarray), \ + f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' + assert kernel.ndim == 2, \ + f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' + + degenerated = cv2.filter2D(img, -1, kernel) + sharpened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + sharpened_img = np.clip(sharpened_img, 0, 255) + return sharpened_img.astype(img.dtype) + + +def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): + """AlexNet-style PCA jitter. + + This data augmentation is proposed in `ImageNet Classification with Deep + Convolutional Neural Networks + `_. + + Args: + img (ndarray): Image to be adjusted lighting. BGR order. + eigval (ndarray): the eigenvalue of the convariance matrix of pixel + values, respectively. + eigvec (ndarray): the eigenvector of the convariance matrix of pixel + values, respectively. + alphastd (float): The standard deviation for distribution of alpha. + Defaults to 0.1 + to_rgb (bool): Whether to convert img to rgb. + + Returns: + ndarray: The adjusted image. + """ + assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \ + f'eigval and eigvec should both be of type np.ndarray, got ' \ + f'{type(eigval)} and {type(eigvec)} instead.' + + assert eigval.ndim == 1 and eigvec.ndim == 2 + assert eigvec.shape == (3, eigval.shape[0]) + n_eigval = eigval.shape[0] + assert isinstance(alphastd, float), 'alphastd should be of type float, ' \ + f'got {type(alphastd)} instead.' + + img = img.copy().astype(np.float32) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + + alpha = np.random.normal(0, alphastd, n_eigval) + alter = eigvec \ + * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \ + * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) + alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) + img_adjusted = img + alter + return img_adjusted + + +def lut_transform(img, lut_table): + """Transform array by look-up table. + + The function lut_transform fills the output array with values from the + look-up table. Indices of the entries are taken from the input array. + + Args: + img (ndarray): Image to be transformed. + lut_table (ndarray): look-up table of 256 elements; in case of + multi-channel input array, the table should either have a single + channel (in this case the same table is used for all channels) or + the same number of channels as in the input array. + + Returns: + ndarray: The transformed image. + """ + assert isinstance(img, np.ndarray) + assert 0 <= np.min(img) and np.max(img) <= 255 + assert isinstance(lut_table, np.ndarray) + assert lut_table.shape == (256, ) + + return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) + + +def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + img (ndarray): Image to be processed. + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + + Returns: + ndarray: The processed image. + """ + assert isinstance(img, np.ndarray) + assert img.ndim == 2 + assert isinstance(clip_limit, (float, int)) + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + + clahe = cv2.createCLAHE(clip_limit, tile_grid_size) + return clahe.apply(np.array(img, dtype=np.uint8)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json new file mode 100644 index 000000000000..25cf6f28caec --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json new file mode 100644 index 000000000000..bdb311d9fe6d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json @@ -0,0 +1,31 @@ +{ + "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", + "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", + "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", + "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", + "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", + "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", + "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", + "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", + "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth", + "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth", + "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth", + "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth", + "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth", + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth", + "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", + "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", + "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", + "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", + "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", + "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", + "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", + "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 000000000000..8311db4feef9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,50 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", + "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", + "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py new file mode 100644 index 000000000000..999e090a458e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .assign_score_withk import assign_score_withk +from .ball_query import ball_query +from .bbox import bbox_overlaps +from .border_align import BorderAlign, border_align +from .box_iou_rotated import box_iou_rotated +from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive +from .cc_attention import CrissCrossAttention +from .contour_expand import contour_expand +from .corner_pool import CornerPool +from .correlation import Correlation +from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d +from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, + ModulatedDeformRoIPoolPack, deform_roi_pool) +from .deprecated_wrappers import Conv2d_deprecated as Conv2d +from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d +from .deprecated_wrappers import Linear_deprecated as Linear +from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d +from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, + sigmoid_focal_loss, softmax_focal_loss) +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) +from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu +from .gather_points import gather_points +from .group_points import GroupAll, QueryAndGroup, grouping_operation +from .info import (get_compiler_version, get_compiling_cuda_version, + get_onnxruntime_op_path) +from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev +from .knn import knn +from .masked_conv import MaskedConv2d, masked_conv2d +from .modulated_deform_conv import (ModulatedDeformConv2d, + ModulatedDeformConv2dPack, + modulated_deform_conv2d) +from .multi_scale_deform_attn import MultiScaleDeformableAttention +from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms +from .pixel_group import pixel_group +from .point_sample import (SimpleRoIAlign, point_sample, + rel_roi_point_to_rel_img_point) +from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, + points_in_boxes_part) +from .points_sampler import PointsSampler +from .psa_mask import PSAMask +from .roi_align import RoIAlign, roi_align +from .roi_align_rotated import RoIAlignRotated, roi_align_rotated +from .roi_pool import RoIPool, roi_pool +from .roiaware_pool3d import RoIAwarePool3d +from .roipoint_pool3d import RoIPointPool3d +from .saconv import SAConv2d +from .scatter_points import DynamicScatter, dynamic_scatter +from .sync_bn import SyncBatchNorm +from .three_interpolate import three_interpolate +from .three_nn import three_nn +from .tin_shift import TINShift, tin_shift +from .upfirdn2d import upfirdn2d +from .voxelize import Voxelization, voxelization + +__all__ = [ + 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', + 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', + 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', + 'get_compiler_version', 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', + 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', + 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', + 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', + 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', + 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', + 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', + 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', + 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', + 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', + 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', + 'border_align', 'gather_points', 'furthest_point_sample', + 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', + 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', + 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', + 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py new file mode 100644 index 000000000000..4906adaa2cff --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py @@ -0,0 +1,123 @@ +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) + + +class AssignScoreWithK(Function): + r"""Perform weighted sum to generate output features according to scores. + Modified from `PAConv `_. + + This is a memory-efficient CUDA implementation of assign_scores operation, + which first transform all point features with weight bank, then assemble + neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. + + See the `paper `_ appendix Sec. D for + more detailed descriptions. + + Note: + This implementation assumes using ``neighbor`` kernel input, which is + (point_features - center_features, point_features). + See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ + pointnet2/paconv.py#L128 for more details. + """ + + @staticmethod + def forward(ctx, + scores, + point_features, + center_features, + knn_idx, + aggregate='sum'): + """ + Args: + scores (torch.Tensor): (B, npoint, K, M), predicted scores to + aggregate weight matrices in the weight bank. + ``npoint`` is the number of sampled centers. + ``K`` is the number of queried neighbors. + ``M`` is the number of weight matrices in the weight bank. + point_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed point features to be aggregated. + center_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed center features to be aggregated. + knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. + We assume the first idx in each row is the idx of the center. + aggregate (str, optional): Aggregation method. + Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. + + Returns: + torch.Tensor: (B, out_dim, npoint, K), the aggregated features. + """ + agg = {'sum': 0, 'avg': 1, 'max': 2} + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + output = point_features.new_zeros((B, out_dim, npoint, K)) + ext_module.assign_score_withk_forward( + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + output, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg[aggregate]) + + ctx.save_for_backward(output, point_features, center_features, scores, + knn_idx) + ctx.agg = agg[aggregate] + + return output + + @staticmethod + def backward(ctx, grad_out): + """ + Args: + grad_out (torch.Tensor): (B, out_dim, npoint, K) + + Returns: + grad_scores (torch.Tensor): (B, npoint, K, M) + grad_point_features (torch.Tensor): (B, N, M, out_dim) + grad_center_features (torch.Tensor): (B, N, M, out_dim) + """ + _, point_features, center_features, scores, knn_idx = ctx.saved_tensors + + agg = ctx.agg + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + grad_point_features = point_features.new_zeros(point_features.shape) + grad_center_features = center_features.new_zeros(center_features.shape) + grad_scores = scores.new_zeros(scores.shape) + + ext_module.assign_score_withk_backward( + grad_out.contiguous(), + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + grad_point_features, + grad_center_features, + grad_scores, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg) + + return grad_scores, grad_point_features, \ + grad_center_features, None, None + + +assign_score_withk = AssignScoreWithK.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py new file mode 100644 index 000000000000..d0466847c6e5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) + + +class BallQuery(Function): + """Find nearby points in spherical space.""" + + @staticmethod + def forward(ctx, min_radius: float, max_radius: float, sample_num: int, + xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: + """ + Args: + min_radius (float): minimum radius of the balls. + max_radius (float): maximum radius of the balls. + sample_num (int): maximum number of features in the balls. + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) centers of the ball query. + + Returns: + Tensor: (B, npoint, nsample) tensor with the indices of + the features that form the query balls. + """ + assert center_xyz.is_contiguous() + assert xyz.is_contiguous() + assert min_radius < max_radius + + B, N, _ = xyz.size() + npoint = center_xyz.size(1) + idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) + + ext_module.ball_query_forward( + center_xyz, + xyz, + idx, + b=B, + n=N, + m=npoint, + min_radius=min_radius, + max_radius=max_radius, + nsample=sample_num) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py new file mode 100644 index 000000000000..0c4d58b6c91f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py @@ -0,0 +1,72 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): + """Calculate overlap between two set of bboxes. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + mode_dict = {'iou': 0, 'iof': 1} + assert mode in mode_dict.keys() + mode_flag = mode_dict[mode] + # Either the boxes are empty or the length of boxes' last dimension is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + assert offset == 1 or offset == 0 + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) + ext_module.bbox_overlaps( + bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py new file mode 100644 index 000000000000..ff305be328e9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from +# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['border_align_forward', 'border_align_backward']) + + +class BorderAlignFunction(Function): + + @staticmethod + def symbolic(g, input, boxes, pool_size): + return g.op( + 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) + + @staticmethod + def forward(ctx, input, boxes, pool_size): + ctx.pool_size = pool_size + ctx.input_shape = input.size() + + assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' + assert boxes.size(2) == 4, \ + 'the last dimension of boxes must be (x1, y1, x2, y2)' + assert input.size(1) % 4 == 0, \ + 'the channel for input feature must be divisible by factor 4' + + # [B, C//4, H*W, 4] + output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) + output = input.new_zeros(output_shape) + # `argmax_idx` only used for backward + argmax_idx = input.new_zeros(output_shape).to(torch.int) + + ext_module.border_align_forward( + input, boxes, output, argmax_idx, pool_size=ctx.pool_size) + + ctx.save_for_backward(boxes, argmax_idx) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + boxes, argmax_idx = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous + grad_output = grad_output.contiguous() + ext_module.border_align_backward( + grad_output, + boxes, + argmax_idx, + grad_input, + pool_size=ctx.pool_size) + return grad_input, None, None + + +border_align = BorderAlignFunction.apply + + +class BorderAlign(nn.Module): + r"""Border align pooling layer. + + Applies border_align over the input feature based on predicted bboxes. + The details were described in the paper + `BorderDet: Border Feature for Dense Object Detection + `_. + + For each border line (e.g. top, left, bottom or right) of each box, + border_align does the following: + 1. uniformly samples `pool_size`+1 positions on this line, involving \ + the start and end points. + 2. the corresponding features on these points are computed by \ + bilinear interpolation. + 3. max pooling over all the `pool_size`+1 positions are used for \ + computing pooled feature. + + Args: + pool_size (int): number of positions sampled over the boxes' borders + (e.g. top, bottom, left, right). + + """ + + def __init__(self, pool_size): + super(BorderAlign, self).__init__() + self.pool_size = pool_size + + def forward(self, input, boxes): + """ + Args: + input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), + [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, + right features respectively. + boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). + + Returns: + Tensor: Pooled features with shape [N,C,H*W,4]. The order is + (top,left,bottom,right) for the last dimension. + """ + return border_align(input, boxes, self.pool_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(pool_size={self.pool_size})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py new file mode 100644 index 000000000000..2d78015e9c2a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) + + +def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): + """Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Arguments: + boxes1 (Tensor): rotated bboxes 1. \ + It has shape (N, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + boxes2 (Tensor): rotated bboxes 2. \ + It has shape (M, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (N, M) if aligned == False else shape (N,) + """ + assert mode in ['iou', 'iof'] + mode_dict = {'iou': 0, 'iof': 1} + mode_flag = mode_dict[mode] + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows * cols)) + bboxes1 = bboxes1.contiguous() + bboxes2 = bboxes2.contiguous() + ext_module.box_iou_rotated( + bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) + if not aligned: + ious = ious.view(rows, cols) + return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py new file mode 100644 index 000000000000..5154cb3abfcc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py @@ -0,0 +1,287 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.nn.modules.module import Module + +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', + 'carafe_backward' +]) + + +class CARAFENaiveFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFENaive', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + ext_module.carafe_naive_forward( + features, + masks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + grad_input = torch.zeros_like(features) + grad_masks = torch.zeros_like(masks) + ext_module.carafe_naive_backward( + grad_output.contiguous(), + features, + masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + return grad_input, grad_masks, None, None, None + + +carafe_naive = CARAFENaiveFunction.apply + + +class CARAFENaive(Module): + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe_naive(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +class CARAFEFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFE', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + routput = features.new_zeros(output.size(), requires_grad=False) + rfeatures = features.new_zeros(features.size(), requires_grad=False) + rmasks = masks.new_zeros(masks.size(), requires_grad=False) + ext_module.carafe_forward( + features, + masks, + rfeatures, + routput, + rmasks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks, rfeatures) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks, rfeatures = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + rgrad_output = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input = torch.zeros_like(features, requires_grad=False) + rgrad_masks = torch.zeros_like(masks, requires_grad=False) + grad_input = torch.zeros_like(features, requires_grad=False) + grad_masks = torch.zeros_like(masks, requires_grad=False) + ext_module.carafe_backward( + grad_output.contiguous(), + rfeatures, + masks, + rgrad_output, + rgrad_input_hs, + rgrad_input, + rgrad_masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + return grad_input, grad_masks, None, None, None + + +carafe = CARAFEFunction.apply + + +class CARAFE(Module): + """ CARAFE: Content-Aware ReAssembly of FEatures + + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + kernel_size (int): reassemble kernel size + group_size (int): reassemble group size + scale_factor (int): upsample ratio + + Returns: + upsampled feature map + """ + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +@UPSAMPLE_LAYERS.register_module(name='carafe') +class CARAFEPack(nn.Module): + """A unified package of CARAFE upsampler that contains: 1) channel + compressor 2) content encoder 3) CARAFE op. + + Official implementation of ICCV 2019 paper + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + channels (int): input feature channels + scale_factor (int): upsample ratio + up_kernel (int): kernel size of CARAFE op + up_group (int): group size of CARAFE op + encoder_kernel (int): kernel size of content encoder + encoder_dilation (int): dilation of content encoder + compressed_channels (int): output channels of channels compressor + + Returns: + upsampled feature map + """ + + def __init__(self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64): + super(CARAFEPack, self).__init__() + self.channels = channels + self.scale_factor = scale_factor + self.up_kernel = up_kernel + self.up_group = up_group + self.encoder_kernel = encoder_kernel + self.encoder_dilation = encoder_dilation + self.compressed_channels = compressed_channels + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, + 1) + self.content_encoder = nn.Conv2d( + self.compressed_channels, + self.up_kernel * self.up_kernel * self.up_group * + self.scale_factor * self.scale_factor, + self.encoder_kernel, + padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), + dilation=self.encoder_dilation, + groups=1) + self.init_weights() + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + normal_init(self.content_encoder, std=0.001) + + def kernel_normalizer(self, mask): + mask = F.pixel_shuffle(mask, self.scale_factor) + n, mask_c, h, w = mask.size() + # use float division explicitly, + # to void inconsistency while exporting to onnx + mask_channel = int(mask_c / float(self.up_kernel**2)) + mask = mask.view(n, mask_channel, -1, h, w) + + mask = F.softmax(mask, dim=2, dtype=mask.dtype) + mask = mask.view(n, mask_c, h, w).contiguous() + + return mask + + def feature_reassemble(self, x, mask): + x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) + return x + + def forward(self, x): + compressed_x = self.channel_compressor(x) + mask = self.content_encoder(compressed_x) + mask = self.kernel_normalizer(mask) + + x = self.feature_reassemble(x, mask) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py new file mode 100644 index 000000000000..7d901fdc6b47 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py @@ -0,0 +1,83 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import PLUGIN_LAYERS, Scale + + +def NEG_INF_DIAG(n, device): + """Returns a diagonal matrix of size [n, n]. + + The diagonal are all "-inf". This is for avoiding calculating the + overlapped element in the Criss-Cross twice. + """ + return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) + + +@PLUGIN_LAYERS.register_module() +class CrissCrossAttention(nn.Module): + """Criss-Cross Attention Module. + + .. note:: + Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch + to a pure PyTorch and equivalent implementation. For more + details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. + + Speed comparison for one forward pass + + - Input size: [2,512,97,97] + - Device: 1 NVIDIA GeForce RTX 2080 Ti + + +-----------------------+---------------+------------+---------------+ + | |PyTorch version|CUDA version|Relative speed | + +=======================+===============+============+===============+ + |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | + +-----------------------+---------------+------------+---------------+ + |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | + +-----------------------+---------------+------------+---------------+ + + Args: + in_channels (int): Channels of the input feature map. + """ + + def __init__(self, in_channels): + super().__init__() + self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.value_conv = nn.Conv2d(in_channels, in_channels, 1) + self.gamma = Scale(0.) + self.in_channels = in_channels + + def forward(self, x): + """forward function of Criss-Cross Attention. + + Args: + x (Tensor): Input feature. \ + shape (batch_size, in_channels, height, width) + Returns: + Tensor: Output of the layer, with shape of \ + (batch_size, in_channels, height, width) + """ + B, C, H, W = x.size() + query = self.query_conv(x) + key = self.key_conv(x) + value = self.value_conv(x) + energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG( + H, query.device) + energy_H = energy_H.transpose(1, 2) + energy_W = torch.einsum('bchw,bchj->bhwj', query, key) + attn = F.softmax( + torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] + out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) + out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) + + out = self.gamma(out) + x + out = out.contiguous() + + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py new file mode 100644 index 000000000000..ea1111e1768b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['contour_expand']) + + +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, + kernel_num): + """Expand kernel contours so that foreground pixels are assigned into + instances. + + Arguments: + kernel_mask (np.array or Tensor): The instance kernel mask with + size hxw. + internal_kernel_label (np.array or Tensor): The instance internal + kernel label with size hxw. + min_kernel_area (int): The minimum kernel area. + kernel_num (int): The instance kernel number. + + Returns: + label (list): The instance index map with size hxw. + """ + assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) + assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(min_kernel_area, int) + assert isinstance(kernel_num, int) + + if isinstance(kernel_mask, np.ndarray): + kernel_mask = torch.from_numpy(kernel_mask) + if isinstance(internal_kernel_label, np.ndarray): + internal_kernel_label = torch.from_numpy(internal_kernel_label) + + if torch.__version__ == 'parrots': + if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: + label = [] + else: + label = ext_module.contour_expand( + kernel_mask, + internal_kernel_label, + min_kernel_area=min_kernel_area, + kernel_num=kernel_num) + label = label.tolist() + else: + label = ext_module.contour_expand(kernel_mask, internal_kernel_label, + min_kernel_area, kernel_num) + return label diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py new file mode 100644 index 000000000000..a33d798b43d4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py @@ -0,0 +1,161 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', + 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', + 'right_pool_forward', 'right_pool_backward' +]) + +_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} + + +class TopPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output + + +class LeftPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output + + +class CornerPool(nn.Module): + """Corner Pooling. + + Corner Pooling is a new type of pooling layer that helps a + convolutional network better localize corners of bounding boxes. + + Please refer to https://arxiv.org/abs/1808.01244 for more details. + Code is modified from https://github.com/princeton-vl/CornerNet-Lite. + + Args: + mode(str): Pooling orientation for the pooling layer + + - 'bottom': Bottom Pooling + - 'left': Left Pooling + - 'right': Right Pooling + - 'top': Top Pooling + + Returns: + Feature map after pooling. + """ + + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + + cummax_dim_flip = { + 'bottom': (2, False), + 'left': (3, True), + 'right': (3, False), + 'top': (2, True), + } + + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions + self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + if torch.onnx.is_in_onnx_export(): + assert torch.__version__ >= '1.7.0', \ + 'When `cummax` serves as an intermediate component whose '\ + 'outputs is used as inputs for another modules, it\'s '\ + 'expected that pytorch version must be >= 1.7.0, '\ + 'otherwise Error appears like: `RuntimeError: tuple '\ + 'appears in op that does not forward tuples, unsupported '\ + 'kind: prim::PythonOp`.' + + dim, flip = self.cummax_dim_flip[self.mode] + if flip: + x = x.flip(dim) + pool_tensor, _ = torch.cummax(x, dim=dim) + if flip: + pool_tensor = pool_tensor.flip(dim) + return pool_tensor + else: + return self.corner_pool.apply(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py new file mode 100644 index 000000000000..3d0b79c301b2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py @@ -0,0 +1,196 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import Tensor, nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['correlation_forward', 'correlation_backward']) + + +class CorrelationFunction(Function): + + @staticmethod + def forward(ctx, + input1, + input2, + kernel_size=1, + max_displacement=1, + stride=1, + padding=1, + dilation=1, + dilation_patch=1): + + ctx.save_for_backward(input1, input2) + + kH, kW = ctx.kernel_size = _pair(kernel_size) + patch_size = max_displacement * 2 + 1 + ctx.patch_size = patch_size + dH, dW = ctx.stride = _pair(stride) + padH, padW = ctx.padding = _pair(padding) + dilationH, dilationW = ctx.dilation = _pair(dilation) + dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair( + dilation_patch) + + output_size = CorrelationFunction._output_size(ctx, input1) + + output = input1.new_zeros(output_size) + + ext_module.correlation_forward( + input1, + input2, + output, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilation_patchH, dilation_patchW = ctx.dilation_patch + dH, dW = ctx.stride + grad_input1 = torch.zeros_like(input1) + grad_input2 = torch.zeros_like(input2) + + ext_module.correlation_backward( + grad_output, + input1, + input2, + grad_input1, + grad_input2, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + return grad_input1, grad_input2, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input1): + iH, iW = input1.size(2), input1.size(3) + batch_size = input1.size(0) + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + dH, dW = ctx.stride + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilatedKH = (kH - 1) * dilationH + 1 + dilatedKW = (kW - 1) * dilationW + 1 + + oH = int((iH + 2 * padH - dilatedKH) / dH + 1) + oW = int((iW + 2 * padW - dilatedKW) / dW + 1) + + output_size = (batch_size, patch_size, patch_size, oH, oW) + return output_size + + +class Correlation(nn.Module): + r"""Correlation operator + + This correlation operator works for optical flow correlation computation. + + There are two batched tensors with shape :math:`(N, C, H, W)`, + and the correlation output's shape is :math:`(N, max\_displacement \times + 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` + + where + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - + dilation \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation + \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding + window convolution between input1 and shifted input2, + + .. math:: + Corr(N_i, dx, dy) = + \sum_{c=0}^{C-1} + input1(N_i, c) \star + \mathcal{S}(input2(N_i, c), dy, dx) + + where :math:`\star` is the valid 2d sliding window convolution operator, + and :math:`\mathcal{S}` means shifting the input features (auto-complete + zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in + [-max\_displacement \times dilation\_patch, max\_displacement \times + dilation\_patch]`. + + Args: + kernel_size (int): The size of sliding window i.e. local neighborhood + representing the center points and involved in correlation + computation. Defaults to 1. + max_displacement (int): The radius for computing correlation volume, + but the actual working space can be dilated by dilation_patch. + Defaults to 1. + stride (int): The stride of the sliding blocks in the input spatial + dimensions. Defaults to 1. + padding (int): Zero padding added to all four sides of the input1. + Defaults to 0. + dilation (int): The spacing of local neighborhood that will involved + in correlation. Defaults to 1. + dilation_patch (int): The spacing between position need to compute + correlation. Defaults to 1. + """ + + def __init__(self, + kernel_size: int = 1, + max_displacement: int = 1, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + dilation_patch: int = 1) -> None: + super().__init__() + self.kernel_size = kernel_size + self.max_displacement = max_displacement + self.stride = stride + self.padding = padding + self.dilation = dilation + self.dilation_patch = dilation_patch + + def forward(self, input1: Tensor, input2: Tensor) -> Tensor: + return CorrelationFunction.apply(input1, input2, self.kernel_size, + self.max_displacement, self.stride, + self.padding, self.dilation, + self.dilation_patch) + + def __repr__(self) -> str: + s = self.__class__.__name__ + s += f'(kernel_size={self.kernel_size}, ' + s += f'max_displacement={self.max_displacement}, ' + s += f'stride={self.stride}, ' + s += f'padding={self.padding}, ' + s += f'dilation={self.dilation}, ' + s += f'dilation_patch={self.dilation_patch})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py new file mode 100644 index 000000000000..6696b8a7747c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py @@ -0,0 +1,405 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext('_ext', [ + 'deform_conv_forward', 'deform_conv_backward_input', + 'deform_conv_backward_parameters' +]) + + +class DeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, + input, + offset, + weight, + stride, + padding, + dilation, + groups, + deform_groups, + bias=False, + im2col_step=32): + return g.op( + 'mmcv::MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + bias_i=bias, + im2col_step_i=im2col_step) + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + assert bias is False, 'Only support bias is False.' + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.im2col_step = im2col_step + + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + DeformConv2dFunction._output_size(ctx, input, weight)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % + cur_im2col_step) == 0, 'im2col step must divide batchsize' + ext_module.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % cur_im2col_step + ) == 0, 'batch size must be divisible by im2col_step' + + grad_output = grad_output.contiguous() + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + ext_module.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + ext_module.deform_conv_backward_parameters( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + scale=1, + im2col_step=cur_im2col_step) + + return grad_input, grad_offset, grad_weight, \ + None, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +deform_conv2d = DeformConv2dFunction.apply + + +class DeformConv2d(nn.Module): + r"""Deformable 2D convolution. + + Applies a deformable 2D convolution over an input signal composed of + several input planes. DeformConv2d was described in the paper + `Deformable Convolutional Networks + `_ + + Note: + The argument ``im2col_step`` was added in version 1.3.17, which means + number of samples processed by the ``im2col_cuda_kernel`` per call. + It enables users to define ``batch_size`` and ``im2col_step`` more + flexibly and solved `issue mmcv#1440 + `_. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + im2col_step (int): Number of samples processed by im2col_cuda_kernel + per call. It will work when ``batch_size`` > ``im2col_step``, but + ``batch_size`` must be divisible by ``im2col_step``. Default: 32. + `New in version 1.3.17.` + """ + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='DeformConv2d') + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]], + stride: Union[int, Tuple[int, ...]] = 1, + padding: Union[int, Tuple[int, ...]] = 0, + dilation: Union[int, Tuple[int, ...]] = 1, + groups: int = 1, + deform_groups: int = 1, + bias: bool = False, + im2col_step: int = 32) -> None: + super(DeformConv2d, self).__init__() + + assert not bias, \ + f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, \ + f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert out_channels % groups == 0, \ + f'out_channels {out_channels} cannot be divisible by groups \ + {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + self.im2col_step = im2col_step + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + # only weight, no bias + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, + *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + # switch the initialization of `self.weight` to the standard kaiming + # method described in `Delving deep into rectifiers: Surpassing + # human-level performance on ImageNet classification` - He, K. et al. + # (2015), using a uniform distribution + nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') + + def forward(self, x: Tensor, offset: Tensor) -> Tensor: + """Deformable Convolutional forward function. + + Args: + x (Tensor): Input feature, shape (B, C_in, H_in, W_in) + offset (Tensor): Offset for deformable convolution, shape + (B, deform_groups*kernel_size[0]*kernel_size[1]*2, + H_out, W_out), H_out, W_out are equal to the output's. + + An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Returns: + Tensor: Output of the layer. + """ + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < + self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) + offset = offset.contiguous() + out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - + pad_w].contiguous() + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels},\n' + s += f'out_channels={self.out_channels},\n' + s += f'kernel_size={self.kernel_size},\n' + s += f'stride={self.stride},\n' + s += f'padding={self.padding},\n' + s += f'dilation={self.dilation},\n' + s += f'groups={self.groups},\n' + s += f'deform_groups={self.deform_groups},\n' + # bias is not supported in DeformConv2d. + s += 'bias=False)' + return s + + +@CONV_LAYERS.register_module('DCN') +class DeformConv2dPack(DeformConv2d): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, DeformConvPack loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py new file mode 100644 index 000000000000..cc245ba91fee --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) + + +class DeformRoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, offset, output_size, spatial_scale, + sampling_ratio, gamma): + return g.op( + 'mmcv::MMCVDeformRoIPool', + input, + rois, + offset, + pooled_height_i=output_size[0], + pooled_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_f=sampling_ratio, + gamma_f=gamma) + + @staticmethod + def forward(ctx, + input, + rois, + offset, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + if offset is None: + offset = input.new_zeros(0) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = float(spatial_scale) + ctx.sampling_ratio = int(sampling_ratio) + ctx.gamma = float(gamma) + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + + ext_module.deform_roi_pool_forward( + input, + rois, + offset, + output, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + + ctx.save_for_backward(input, rois, offset) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, offset = ctx.saved_tensors + grad_input = grad_output.new_zeros(input.shape) + grad_offset = grad_output.new_zeros(offset.shape) + + ext_module.deform_roi_pool_backward( + grad_output, + input, + rois, + offset, + grad_input, + grad_offset, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + if grad_offset.numel() == 0: + grad_offset = None + return grad_input, None, grad_offset, None, None, None, None + + +deform_roi_pool = DeformRoIPoolFunction.apply + + +class DeformRoIPool(nn.Module): + + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPool, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.gamma = float(gamma) + + def forward(self, input, rois, offset=None): + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class DeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, + sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class ModulatedDeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(ModulatedDeformRoIPoolPack, + self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + self.mask_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid()) + self.mask_fc[2].weight.data.zero_() + self.mask_fc[2].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + mask = self.mask_fc(x.view(rois_num, -1)) + mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) + d = deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + return d * mask diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py new file mode 100644 index 000000000000..a2e593df9ee5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This file is for backward compatibility. +# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. +import warnings + +from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + + +class Conv2d_deprecated(Conv2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class ConvTranspose2d_deprecated(ConvTranspose2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' + 'deprecated in the future. Please import them from "mmcv.cnn" ' + 'instead') + + +class MaxPool2d_deprecated(MaxPool2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class Linear_deprecated(Linear): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py new file mode 100644 index 000000000000..763bc93bd257 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py @@ -0,0 +1,212 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' +]) + + +class SigmoidFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSigmoidFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + output = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_forward( + input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input, target, weight) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, target, weight = ctx.saved_tensors + + grad_input = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_backward( + input, + target, + weight, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input.size(0) + return grad_input, None, None, None, None, None + + +sigmoid_focal_loss = SigmoidFocalLossFunction.apply + + +class SigmoidFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SigmoidFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s + + +class SoftmaxFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSoftmaxFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + channel_stats, _ = torch.max(input, dim=1) + input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) + input_softmax.exp_() + + channel_stats = input_softmax.sum(dim=1) + input_softmax /= channel_stats.unsqueeze(1).expand_as(input) + + output = input.new_zeros(input.size(0)) + ext_module.softmax_focal_loss_forward( + input_softmax, + target, + weight, + output, + gamma=ctx.gamma, + alpha=ctx.alpha) + + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input_softmax, target, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input_softmax, target, weight = ctx.saved_tensors + buff = input_softmax.new_zeros(input_softmax.size(0)) + grad_input = input_softmax.new_zeros(input_softmax.size()) + + ext_module.softmax_focal_loss_backward( + input_softmax, + target, + weight, + buff, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input_softmax.size(0) + return grad_input, None, None, None, None, None + + +softmax_focal_loss = SoftmaxFocalLossFunction.apply + + +class SoftmaxFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return softmax_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py new file mode 100644 index 000000000000..374b7a878f19 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py @@ -0,0 +1,83 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'furthest_point_sampling_forward', + 'furthest_point_sampling_with_dist_forward' +]) + + +class FurthestPointSampling(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_xyz: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_xyz (Tensor): (B, N, 3) where N > num_points. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_xyz.is_contiguous() + + B, N = points_xyz.size()[:2] + output = torch.cuda.IntTensor(B, num_points) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + ext_module.furthest_point_sampling_forward( + points_xyz, + temp, + output, + b=B, + n=N, + m=num_points, + ) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +class FurthestPointSamplingWithDist(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_dist: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_dist (Tensor): (B, N, N) Distance between each point pair. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_dist.is_contiguous() + + B, N, _ = points_dist.size() + output = points_dist.new_zeros([B, num_points], dtype=torch.int32) + temp = points_dist.new_zeros([B, N]).fill_(1e10) + + ext_module.furthest_point_sampling_with_dist_forward( + points_dist, temp, output, b=B, n=N, m=num_points) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply +furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py new file mode 100644 index 000000000000..6d12508469c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py @@ -0,0 +1,268 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +import torch.nn.functional as F +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) + + +class FusedBiasLeakyReLUFunctionBackward(Function): + """Calculate second order deviation. + + This function is to compute the second order deviation for the fused leaky + relu operation. + """ + + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = ext_module.fused_bias_leakyrelu( + grad_output, + empty, + out, + act=3, + grad=1, + alpha=negative_slope, + scale=scale) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + out, = ctx.saved_tensors + + # The second order deviation, in fact, contains two parts, while the + # the first part is zero. Thus, we direct consider the second part + # which is similar with the first order deviation in implementation. + gradgrad_out = ext_module.fused_bias_leakyrelu( + gradgrad_input, + gradgrad_bias.to(out.dtype), + out, + act=3, + grad=1, + alpha=ctx.negative_slope, + scale=ctx.scale) + + return gradgrad_out, None, None, None + + +class FusedBiasLeakyReLUFunction(Function): + + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + + out = ext_module.fused_bias_leakyrelu( + input, + bias, + empty, + act=3, + grad=0, + alpha=negative_slope, + scale=scale) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + out, = ctx.saved_tensors + + grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale) + + return grad_input, grad_bias, None, None + + +class FusedBiasLeakyReLU(nn.Module): + """Fused bias leaky ReLU. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + TODO: Implement the CPU version. + + Args: + channel (int): The channel number of the feature map. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + """ + + def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): + super(FusedBiasLeakyReLU, self).__init__() + + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.negative_slope = negative_slope + self.scale = scale + + def forward(self, input): + return fused_bias_leakyrelu(input, self.bias, self.negative_slope, + self.scale) + + +def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): + """Fused bias leaky ReLU function. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + Args: + input (torch.Tensor): Input feature map. + bias (nn.Parameter): The bias from convolution operation. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + + Returns: + torch.Tensor: Feature map after non-linear activation. + """ + + if not input.is_cuda: + return bias_leakyrelu_ref(input, bias, negative_slope, scale) + + return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), + negative_slope, scale) + + +def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): + + if bias is not None: + assert bias.ndim == 1 + assert bias.shape[0] == x.shape[1] + x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) + + x = F.leaky_relu(x, negative_slope) + if scale != 1: + x = x * scale + + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py new file mode 100644 index 000000000000..f52f1677d8ea --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py @@ -0,0 +1,57 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['gather_points_forward', 'gather_points_backward']) + + +class GatherPoints(Function): + """Gather points with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) features to gather. + indices (Tensor): (B, M) where M is the number of points. + + Returns: + Tensor: (B, C, M) where M is the number of points. + """ + assert features.is_contiguous() + assert indices.is_contiguous() + + B, npoint = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + ext_module.gather_points_forward( + features, indices, output, b=B, c=C, n=N, npoints=npoint) + + ctx.for_backwards = (indices, C, N) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(indices) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + grad_out_data = grad_out.data.contiguous() + ext_module.gather_points_backward( + grad_out_data, + idx, + grad_features.data, + b=B, + c=C, + n=N, + npoints=npoint) + return grad_features, None + + +gather_points = GatherPoints.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py new file mode 100644 index 000000000000..6c3ec9d758eb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py @@ -0,0 +1,224 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple + +import torch +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader +from .ball_query import ball_query +from .knn import knn + +ext_module = ext_loader.load_ext( + '_ext', ['group_points_forward', 'group_points_backward']) + + +class QueryAndGroup(nn.Module): + """Groups points with a ball query of radius. + + Args: + max_radius (float): The maximum radius of the balls. + If None is given, we will use kNN sampling instead of ball query. + sample_num (int): Maximum number of features to gather in the ball. + min_radius (float, optional): The minimum radius of the balls. + Default: 0. + use_xyz (bool, optional): Whether to use xyz. + Default: True. + return_grouped_xyz (bool, optional): Whether to return grouped xyz. + Default: False. + normalize_xyz (bool, optional): Whether to normalize xyz. + Default: False. + uniform_sample (bool, optional): Whether to sample uniformly. + Default: False + return_unique_cnt (bool, optional): Whether to return the count of + unique samples. Default: False. + return_grouped_idx (bool, optional): Whether to return grouped idx. + Default: False. + """ + + def __init__(self, + max_radius, + sample_num, + min_radius=0, + use_xyz=True, + return_grouped_xyz=False, + normalize_xyz=False, + uniform_sample=False, + return_unique_cnt=False, + return_grouped_idx=False): + super().__init__() + self.max_radius = max_radius + self.min_radius = min_radius + self.sample_num = sample_num + self.use_xyz = use_xyz + self.return_grouped_xyz = return_grouped_xyz + self.normalize_xyz = normalize_xyz + self.uniform_sample = uniform_sample + self.return_unique_cnt = return_unique_cnt + self.return_grouped_idx = return_grouped_idx + if self.return_unique_cnt: + assert self.uniform_sample, \ + 'uniform_sample should be True when ' \ + 'returning the count of unique samples' + if self.max_radius is None: + assert not self.normalize_xyz, \ + 'can not normalize grouped xyz when max_radius is None' + + def forward(self, points_xyz, center_xyz, features=None): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. + """ + # if self.max_radius is None, we will perform kNN instead of ball query + # idx is of shape [B, npoint, sample_num] + if self.max_radius is None: + idx = knn(self.sample_num, points_xyz, center_xyz, False) + idx = idx.transpose(1, 2).contiguous() + else: + idx = ball_query(self.min_radius, self.max_radius, self.sample_num, + points_xyz, center_xyz) + + if self.uniform_sample: + unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) + for i_batch in range(idx.shape[0]): + for i_region in range(idx.shape[1]): + unique_ind = torch.unique(idx[i_batch, i_region, :]) + num_unique = unique_ind.shape[0] + unique_cnt[i_batch, i_region] = num_unique + sample_ind = torch.randint( + 0, + num_unique, (self.sample_num - num_unique, ), + dtype=torch.long) + all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) + idx[i_batch, i_region, :] = all_ind + + xyz_trans = points_xyz.transpose(1, 2).contiguous() + # (B, 3, npoint, sample_num) + grouped_xyz = grouping_operation(xyz_trans, idx) + grouped_xyz_diff = grouped_xyz - \ + center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets + if self.normalize_xyz: + grouped_xyz_diff /= self.max_radius + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + # (B, C + 3, npoint, sample_num) + new_features = torch.cat([grouped_xyz_diff, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + assert (self.use_xyz + ), 'Cannot have not features and not use xyz as a feature!' + new_features = grouped_xyz_diff + + ret = [new_features] + if self.return_grouped_xyz: + ret.append(grouped_xyz) + if self.return_unique_cnt: + ret.append(unique_cnt) + if self.return_grouped_idx: + ret.append(idx) + if len(ret) == 1: + return ret[0] + else: + return tuple(ret) + + +class GroupAll(nn.Module): + """Group xyz with feature. + + Args: + use_xyz (bool): Whether to use xyz. + """ + + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, + xyz: torch.Tensor, + new_xyz: torch.Tensor, + features: torch.Tensor = None): + """ + Args: + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + new_xyz (Tensor): new xyz coordinates of the features. + features (Tensor): (B, C, N) features to group. + + Returns: + Tensor: (B, C + 3, 1, N) Grouped feature. + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + # (B, 3 + C, 1, N) + new_features = torch.cat([grouped_xyz, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features + + +class GroupingOperation(Function): + """Group feature with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) tensor of features to group. + indices (Tensor): (B, npoint, nsample) the indices of + features to group with. + + Returns: + Tensor: (B, C, npoint, nsample) Grouped features. + """ + features = features.contiguous() + indices = indices.contiguous() + + B, nfeatures, nsample = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, + indices, output) + + ctx.for_backwards = (indices, N) + return output + + @staticmethod + def backward(ctx, + grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients + of the output from forward. + + Returns: + Tensor: (B, C, N) gradient of the features. + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + + grad_out_data = grad_out.data.contiguous() + ext_module.group_points_backward(B, C, N, npoint, nsample, + grad_out_data, idx, + grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py new file mode 100644 index 000000000000..29f2e5598ae2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os + +import torch + +if torch.__version__ == 'parrots': + import parrots + + def get_compiler_version(): + return 'GCC ' + parrots.version.compiler + + def get_compiling_cuda_version(): + return parrots.version.cuda +else: + from ..utils import ext_loader + ext_module = ext_loader.load_ext( + '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + def get_compiler_version(): + return ext_module.get_compiler_version() + + def get_compiling_cuda_version(): + return ext_module.get_compiling_cuda_version() + + +def get_onnxruntime_op_path(): + wildcard = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(__file__))), + '_ext_ort.*.so') + + paths = glob.glob(wildcard) + if len(paths) > 0: + return paths[0] + else: + return '' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py new file mode 100644 index 000000000000..6fc719791903 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', + 'iou3d_nms_normal_forward' +]) + + +def boxes_iou_bev(boxes_a, boxes_b): + """Calculate boxes IoU in the Bird's Eye View. + + Args: + boxes_a (torch.Tensor): Input boxes a with shape (M, 5). + boxes_b (torch.Tensor): Input boxes b with shape (N, 5). + + Returns: + ans_iou (torch.Tensor): IoU result with shape (M, N). + """ + ans_iou = boxes_a.new_zeros( + torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) + + ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), + boxes_b.contiguous(), ans_iou) + + return ans_iou + + +def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): + """NMS function GPU implementation (for BEV boxes). The overlap of two + boxes for IoU calculation is defined as the exact overlapping area of the + two boxes. In this function, one can also set ``pre_max_size`` and + ``post_max_size``. + + Args: + boxes (torch.Tensor): Input boxes with the shape of [N, 5] + ([x1, y1, x2, y2, ry]). + scores (torch.Tensor): Scores of boxes with the shape of [N]. + thresh (float): Overlap threshold of NMS. + pre_max_size (int, optional): Max size of boxes before NMS. + Default: None. + post_max_size (int, optional): Max size of boxes after NMS. + Default: None. + + Returns: + torch.Tensor: Indexes after NMS. + """ + assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + if pre_max_size is not None: + order = order[:pre_max_size] + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh) + keep = order[keep[:num_out].cuda(boxes.device)].contiguous() + if post_max_size is not None: + keep = keep[:post_max_size] + return keep + + +def nms_normal_bev(boxes, scores, thresh): + """Normal NMS function GPU implementation (for BEV boxes). The overlap of + two boxes for IoU calculation is defined as the exact overlapping area of + the two boxes WITH their yaw angle set to 0. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (float): Overlap threshold of NMS. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. + """ + assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh) + return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py new file mode 100644 index 000000000000..f33578503666 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py @@ -0,0 +1,77 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['knn_forward']) + + +class KNN(Function): + r"""KNN (CUDA) based on heap data structure. + Modified from `PAConv `_. + + Find k-nearest points. + """ + + @staticmethod + def forward(ctx, + k: int, + xyz: torch.Tensor, + center_xyz: torch.Tensor = None, + transposed: bool = False) -> torch.Tensor: + """ + Args: + k (int): number of nearest neighbors. + xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). + xyz coordinates of the features. + center_xyz (Tensor, optional): (B, npoint, 3) if transposed == + False, else (B, 3, npoint). centers of the knn query. + Default: None. + transposed (bool, optional): whether the input tensors are + transposed. Should not explicitly use this keyword when + calling knn (=KNN.apply), just add the fourth param. + Default: False. + + Returns: + Tensor: (B, k, npoint) tensor with the indices of + the features that form k-nearest neighbours. + """ + assert (k > 0) & (k < 100), 'k should be in range(0, 100)' + + if center_xyz is None: + center_xyz = xyz + + if transposed: + xyz = xyz.transpose(2, 1).contiguous() + center_xyz = center_xyz.transpose(2, 1).contiguous() + + assert xyz.is_contiguous() # [B, N, 3] + assert center_xyz.is_contiguous() # [B, npoint, 3] + + center_xyz_device = center_xyz.get_device() + assert center_xyz_device == xyz.get_device(), \ + 'center_xyz and xyz should be put on the same device' + if torch.cuda.current_device() != center_xyz_device: + torch.cuda.set_device(center_xyz_device) + + B, npoint, _ = center_xyz.shape + N = xyz.shape[1] + + idx = center_xyz.new_zeros((B, npoint, k)).int() + dist2 = center_xyz.new_zeros((B, npoint, k)).float() + + ext_module.knn_forward( + xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) + # idx shape to [B, k, npoint] + idx = idx.transpose(2, 1).contiguous() + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None + + +knn = KNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py new file mode 100644 index 000000000000..cd514cc204c1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) + + +class MaskedConv2dFunction(Function): + + @staticmethod + def symbolic(g, features, mask, weight, bias, padding, stride): + return g.op( + 'mmcv::MMCVMaskedConv2d', + features, + mask, + weight, + bias, + padding_i=padding, + stride_i=stride) + + @staticmethod + def forward(ctx, features, mask, weight, bias, padding=0, stride=1): + assert mask.dim() == 3 and mask.size(0) == 1 + assert features.dim() == 4 and features.size(0) == 1 + assert features.size()[2:] == mask.size()[1:] + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair(stride) + if stride_h != 1 or stride_w != 1: + raise ValueError( + 'Stride could not only be 1 in masked_conv2d currently.') + out_channel, in_channel, kernel_h, kernel_w = weight.size() + + batch_size = features.size(0) + out_h = int( + math.floor((features.size(2) + 2 * pad_h - + (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int( + math.floor((features.size(3) + 2 * pad_w - + (kernel_h - 1) - 1) / stride_w + 1)) + mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) + output = features.new_zeros(batch_size, out_channel, out_h, out_w) + if mask_inds.numel() > 0: + mask_h_idx = mask_inds[:, 0].contiguous() + mask_w_idx = mask_inds[:, 1].contiguous() + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, + mask_inds.size(0)) + ext_module.masked_im2col_forward( + features, + mask_h_idx, + mask_w_idx, + data_col, + kernel_h=kernel_h, + kernel_w=kernel_w, + pad_h=pad_h, + pad_w=pad_w) + + masked_output = torch.addmm(1, bias[:, None], 1, + weight.view(out_channel, -1), data_col) + ext_module.masked_col2im_forward( + masked_output, + mask_h_idx, + mask_w_idx, + output, + height=out_h, + width=out_w, + channels=out_channel) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + return (None, ) * 5 + + +masked_conv2d = MaskedConv2dFunction.apply + + +class MaskedConv2d(nn.Conv2d): + """A MaskedConv2d which inherits the official Conv2d. + + The masked forward doesn't implement the backward function and only + supports the stride parameter to be 1 currently. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super(MaskedConv2d, + self).__init__(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, bias) + + def forward(self, input, mask=None): + if mask is None: # fallback to the normal Conv2d + return super(MaskedConv2d, self).forward(input) + else: + return masked_conv2d(input, mask, self.weight, self.bias, + self.padding) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py new file mode 100644 index 000000000000..48ca8cc0a8ac --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py @@ -0,0 +1,149 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..cnn import ConvModule + + +class BaseMergeCell(nn.Module): + """The basic class for cells used in NAS-FPN and NAS-FCOS. + + BaseMergeCell takes 2 inputs. After applying convolution + on them, they are resized to the target size. Then, + they go through binary_op, which depends on the type of cell. + If with_out_conv is True, the result of output will go through + another convolution layer. + + Args: + in_channels (int): number of input channels in out_conv layer. + out_channels (int): number of output channels in out_conv layer. + with_out_conv (bool): Whether to use out_conv layer + out_conv_cfg (dict): Config dict for convolution layer, which should + contain "groups", "kernel_size", "padding", "bias" to build + out_conv layer. + out_norm_cfg (dict): Config dict for normalization layer in out_conv. + out_conv_order (tuple): The order of conv/norm/activation layers in + out_conv. + with_input1_conv (bool): Whether to use convolution on input1. + with_input2_conv (bool): Whether to use convolution on input2. + input_conv_cfg (dict): Config dict for building input1_conv layer and + input2_conv layer, which is expected to contain the type of + convolution. + Default: None, which means using conv2d. + input_norm_cfg (dict): Config dict for normalization layer in + input1_conv and input2_conv layer. Default: None. + upsample_mode (str): Interpolation method used to resize the output + of input1_conv and input2_conv to target size. Currently, we + support ['nearest', 'bilinear']. Default: 'nearest'. + """ + + def __init__(self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict( + groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest'): + super(BaseMergeCell, self).__init__() + assert upsample_mode in ['nearest', 'bilinear'] + self.with_out_conv = with_out_conv + self.with_input1_conv = with_input1_conv + self.with_input2_conv = with_input2_conv + self.upsample_mode = upsample_mode + + if self.with_out_conv: + self.out_conv = ConvModule( + fused_channels, + out_channels, + **out_conv_cfg, + norm_cfg=out_norm_cfg, + order=out_conv_order) + + self.input1_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input1_conv else nn.Sequential() + self.input2_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input2_conv else nn.Sequential() + + def _build_input_conv(self, channel, conv_cfg, norm_cfg): + return ConvModule( + channel, + channel, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True) + + @abstractmethod + def _binary_op(self, x1, x2): + pass + + def _resize(self, x, size): + if x.shape[-2:] == size: + return x + elif x.shape[-2:] < size: + return F.interpolate(x, size=size, mode=self.upsample_mode) + else: + assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 + kernel_size = x.shape[-1] // size[-1] + x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) + return x + + def forward(self, x1, x2, out_size=None): + assert x1.shape[:2] == x2.shape[:2] + assert out_size is None or len(out_size) == 2 + if out_size is None: # resize to larger one + out_size = max(x1.size()[2:], x2.size()[2:]) + + x1 = self.input1_conv(x1) + x2 = self.input2_conv(x2) + + x1 = self._resize(x1, out_size) + x2 = self._resize(x2, out_size) + + x = self._binary_op(x1, x2) + if self.with_out_conv: + x = self.out_conv(x) + return x + + +class SumCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(SumCell, self).__init__(in_channels, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + return x1 + x2 + + +class ConcatCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(ConcatCell, self).__init__(in_channels * 2, out_channels, + **kwargs) + + def _binary_op(self, x1, x2): + ret = torch.cat([x1, x2], dim=1) + return ret + + +class GlobalPoolingCell(BaseMergeCell): + + def __init__(self, in_channels=None, out_channels=None, **kwargs): + super().__init__(in_channels, out_channels, **kwargs) + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + + def _binary_op(self, x1, x2): + x2_att = self.global_pool(x2).sigmoid() + return x2 + x2_att * x1 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py new file mode 100644 index 000000000000..3f1ceb073285 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py @@ -0,0 +1,282 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext( + '_ext', + ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, input, offset, mask, weight, bias, stride, padding, + dilation, groups, deform_groups): + input_tensors = [input, offset, mask, weight] + if bias is not None: + input_tensors.append(bias) + return g.op( + 'mmcv::MMCVModulatedDeformConv2d', + *input_tensors, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups) + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(0) # fake tensor + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty( + ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + ext_module.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + grad_output = grad_output.contiguous() + ext_module.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None) + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply + + +class ModulatedDeformConv2d(nn.Module): + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='ModulatedDeformConv2d') + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True): + super(ModulatedDeformConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, + *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + +@CONV_LAYERS.register_module('DCNv2') +class ModulatedDeformConv2dPack(ModulatedDeformConv2d): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv + layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int): Same as nn.Conv2d, while tuple is not supported. + padding (int): Same as nn.Conv2d, while tuple is not supported. + dilation (int): Same as nn.Conv2d, while tuple is not supported. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConv2dPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, ModulatedDeformConvPack + # loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py new file mode 100644 index 000000000000..ed3f26f61c22 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py @@ -0,0 +1,358 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import Function, once_differentiable + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import deprecated_api_warning +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import constant_init, xavier_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.registry import ATTENTION +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import BaseModule +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) + + +class MultiScaleDeformableAttnFunction(Function): + + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, + sampling_locations, attention_weights, im2col_step): + """GPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + im2col_step (Tensor): The step used in image to column. + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + ctx.im2col_step = im2col_step + output = ext_module.ms_deform_attn_forward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + im2col_step=ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, + value_level_start_index, sampling_locations, + attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + """GPU version of backward function. + + Args: + grad_output (Tensor): Gradient + of output tensor of forward. + + Returns: + Tuple[Tensor]: Gradient + of input tensors in forward. + """ + value, value_spatial_shapes, value_level_start_index,\ + sampling_locations, attention_weights = ctx.saved_tensors + grad_value = torch.zeros_like(value) + grad_sampling_loc = torch.zeros_like(sampling_locations) + grad_attn_weight = torch.zeros_like(attention_weights) + + ext_module.ms_deform_attn_backward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + grad_output.contiguous(), + grad_value, + grad_sampling_loc, + grad_attn_weight, + im2col_step=ctx.im2col_step) + + return grad_value, None, None, \ + grad_sampling_loc, grad_attn_weight, None + + +def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, + sampling_locations, attention_weights): + """CPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ =\ + sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for level, (H_, W_) in enumerate(value_spatial_shapes): + # bs, H_*W_, num_heads, embed_dims -> + # bs, H_*W_, num_heads*embed_dims -> + # bs, num_heads*embed_dims, H_*W_ -> + # bs*num_heads, embed_dims, H_, W_ + value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape( + bs * num_heads, embed_dims, H_, W_) + # bs, num_queries, num_heads, num_points, 2 -> + # bs, num_heads, num_queries, num_points, 2 -> + # bs*num_heads, num_queries, num_points, 2 + sampling_grid_l_ = sampling_grids[:, :, :, + level].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (bs, num_queries, num_heads, num_levels, num_points) -> + # (bs, num_heads, num_queries, num_levels, num_points) -> + # (bs, num_heads, 1, num_queries, num_levels*num_points) + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * + attention_weights).sum(-1).view(bs, num_heads * embed_dims, + num_queries) + return output.transpose(1, 2).contiguous() + + +@ATTENTION.register_module() +class MultiScaleDeformableAttention(BaseModule): + """An attention module used in Deformable-Detr. + + `Deformable DETR: Deformable Transformers for End-to-End Object Detection. + `_. + + Args: + embed_dims (int): The embedding dimension of Attention. + Default: 256. + num_heads (int): Parallel attention heads. Default: 64. + num_levels (int): The number of feature map used in + Attention. Default: 4. + num_points (int): The number of sampling points for + each query in each head. Default: 4. + im2col_step (int): The step used in image_to_column. + Default: 64. + dropout (float): A Dropout layer on `inp_identity`. + Default: 0.1. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + norm_cfg (dict): Config dict for normalization layer. + Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims=256, + num_heads=8, + num_levels=4, + num_points=4, + im2col_step=64, + dropout=0.1, + batch_first=False, + norm_cfg=None, + init_cfg=None): + super().__init__(init_cfg) + if embed_dims % num_heads != 0: + raise ValueError(f'embed_dims must be divisible by num_heads, ' + f'but got {embed_dims} and {num_heads}') + dim_per_head = embed_dims // num_heads + self.norm_cfg = norm_cfg + self.dropout = nn.Dropout(dropout) + self.batch_first = batch_first + + # you'd better set dim_per_head to a power of 2 + # which is more efficient in the CUDA implementation + def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError( + 'invalid input for _is_power_of_2: {} (type: {})'.format( + n, type(n))) + return (n & (n - 1) == 0) and n != 0 + + if not _is_power_of_2(dim_per_head): + warnings.warn( + "You'd better set embed_dims in " + 'MultiScaleDeformAttention to make ' + 'the dimension of each attention head a power of 2 ' + 'which is more efficient in our CUDA implementation.') + + self.im2col_step = im2col_step + self.embed_dims = embed_dims + self.num_levels = num_levels + self.num_heads = num_heads + self.num_points = num_points + self.sampling_offsets = nn.Linear( + embed_dims, num_heads * num_levels * num_points * 2) + self.attention_weights = nn.Linear(embed_dims, + num_heads * num_levels * num_points) + self.value_proj = nn.Linear(embed_dims, embed_dims) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.init_weights() + + def init_weights(self): + """Default initialization for Parameters of Module.""" + constant_init(self.sampling_offsets, 0.) + thetas = torch.arange( + self.num_heads, + dtype=torch.float32) * (2.0 * math.pi / self.num_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / + grid_init.abs().max(-1, keepdim=True)[0]).view( + self.num_heads, 1, 1, + 2).repeat(1, self.num_levels, self.num_points, 1) + for i in range(self.num_points): + grid_init[:, :, i, :] *= i + 1 + + self.sampling_offsets.bias.data = grid_init.view(-1) + constant_init(self.attention_weights, val=0., bias=0.) + xavier_init(self.value_proj, distribution='uniform', bias=0.) + xavier_init(self.output_proj, distribution='uniform', bias=0.) + self._is_init = True + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiScaleDeformableAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_padding_mask=None, + reference_points=None, + spatial_shapes=None, + level_start_index=None, + **kwargs): + """Forward Function of MultiScaleDeformAttention. + + Args: + query (Tensor): Query of Transformer with shape + (num_query, bs, embed_dims). + key (Tensor): The key tensor with shape + `(num_key, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_key, bs, embed_dims)`. + identity (Tensor): The tensor used for addition, with the + same shape as `query`. Default None. If None, + `query` will be used. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. Default + None. + reference_points (Tensor): The normalized reference + points with shape (bs, num_query, num_levels, 2), + all elements is range in [0, 1], top-left (0,0), + bottom-right (1, 1), including padding area. + or (N, Length_{query}, num_levels, 4), add + additional two dimensions is (w, h) to + form reference boxes. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_key]. + spatial_shapes (Tensor): Spatial shape of features in + different levels. With shape (num_levels, 2), + last dimension represents (h, w). + level_start_index (Tensor): The start index of each level. + A tensor has shape ``(num_levels, )`` and can be represented + as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. + + Returns: + Tensor: forwarded results with shape [num_query, bs, embed_dims]. + """ + + if value is None: + value = query + + if identity is None: + identity = query + if query_pos is not None: + query = query + query_pos + if not self.batch_first: + # change to (bs, num_query ,embed_dims) + query = query.permute(1, 0, 2) + value = value.permute(1, 0, 2) + + bs, num_query, _ = query.shape + bs, num_value, _ = value.shape + assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value + + value = self.value_proj(value) + if key_padding_mask is not None: + value = value.masked_fill(key_padding_mask[..., None], 0.0) + value = value.view(bs, num_value, self.num_heads, -1) + sampling_offsets = self.sampling_offsets(query).view( + bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) + attention_weights = self.attention_weights(query).view( + bs, num_query, self.num_heads, self.num_levels * self.num_points) + attention_weights = attention_weights.softmax(-1) + + attention_weights = attention_weights.view(bs, num_query, + self.num_heads, + self.num_levels, + self.num_points) + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack( + [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) + sampling_locations = reference_points[:, :, None, :, None, :] \ + + sampling_offsets \ + / offset_normalizer[None, None, None, :, None, :] + elif reference_points.shape[-1] == 4: + sampling_locations = reference_points[:, :, None, :, None, :2] \ + + sampling_offsets / self.num_points \ + * reference_points[:, :, None, :, None, 2:] \ + * 0.5 + else: + raise ValueError( + f'Last dim of reference_points must be' + f' 2 or 4, but get {reference_points.shape[-1]} instead.') + if torch.cuda.is_available() and value.is_cuda: + output = MultiScaleDeformableAttnFunction.apply( + value, spatial_shapes, level_start_index, sampling_locations, + attention_weights, self.im2col_step) + else: + output = multi_scale_deformable_attn_pytorch( + value, spatial_shapes, sampling_locations, attention_weights) + + output = self.output_proj(output) + + if not self.batch_first: + # (num_query, bs ,embed_dims) + output = output.permute(1, 0, 2) + + return self.dropout(output) + identity diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py new file mode 100644 index 000000000000..00f0b004ff55 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py @@ -0,0 +1,417 @@ +import os + +import numpy as np +import torch + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) + + +# This function is modified from: https://github.com/pytorch/vision/ +class NMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + is_filtering_by_score = score_threshold > 0 + if is_filtering_by_score: + valid_mask = scores > score_threshold + bboxes, scores = bboxes[valid_mask], scores[valid_mask] + valid_inds = torch.nonzero( + valid_mask, as_tuple=False).squeeze(dim=1) + + inds = ext_module.nms( + bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + + if max_num > 0: + inds = inds[:max_num] + if is_filtering_by_score: + inds = valid_inds[inds] + return inds + + @staticmethod + def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + # TensorRT nms plugin is aligned with original nms in ONNXRuntime + is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' + if has_custom_op and (not is_trt_backend): + return g.op( + 'mmcv::NonMaxSuppression', + bboxes, + scores, + iou_threshold_f=float(iou_threshold), + offset_i=int(offset)) + else: + from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze + from ..onnx.onnx_utils.symbolic_helper import _size_helper + + boxes = unsqueeze(g, bboxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + + if max_num > 0: + max_num = g.op( + 'Constant', + value_t=torch.tensor(max_num, dtype=torch.long)) + else: + dim = g.op('Constant', value_t=torch.tensor(0)) + max_num = _size_helper(g, bboxes, dim) + max_output_per_class = max_num + iou_threshold = g.op( + 'Constant', + value_t=torch.tensor([iou_threshold], dtype=torch.float)) + score_threshold = g.op( + 'Constant', + value_t=torch.tensor([score_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, + max_output_per_class, iou_threshold, + score_threshold) + return squeeze( + g, + select( + g, nms_out, 1, + g.op( + 'Constant', + value_t=torch.tensor([2], dtype=torch.long))), 1) + + +class SoftNMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + inds = ext_module.softnms( + boxes.cpu(), + scores.cpu(), + dets.cpu(), + iou_threshold=float(iou_threshold), + sigma=float(sigma), + min_score=float(min_score), + method=int(method), + offset=int(offset)) + return dets, inds + + @staticmethod + def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + from packaging import version + assert version.parse(torch.__version__) >= version.parse('1.7.0') + nms_out = g.op( + 'mmcv::SoftNonMaxSuppression', + boxes, + scores, + iou_threshold_f=float(iou_threshold), + sigma_f=float(sigma), + min_score_f=float(min_score), + method_i=int(method), + offset_i=int(offset), + outputs=2) + return nms_out + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): + """Dispatch to either CPU or GPU NMS implementations. + + The input can be either torch tensor or numpy array. GPU NMS will be used + if the input is gpu tensor, otherwise CPU NMS + will be used. The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + score_threshold (float): score threshold for NMS. + max_num (int): maximum number of boxes after NMS. + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], + >>> [49.3, 32.9, 51.0, 35.3], + >>> [49.2, 31.8, 51.0, 35.4], + >>> [35.1, 11.5, 39.1, 15.7], + >>> [35.6, 11.8, 39.3, 14.2], + >>> [35.3, 11.5, 39.9, 14.5], + >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ + dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = nms(boxes, scores, iou_threshold) + >>> assert len(inds) == len(dets) == 3 + """ + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + + if torch.__version__ == 'parrots': + indata_list = [boxes, scores] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'offset': int(offset) + } + inds = ext_module.nms(*indata_list, **indata_dict) + else: + inds = NMSop.apply(boxes, scores, iou_threshold, offset, + score_threshold, max_num) + dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def soft_nms(boxes, + scores, + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='linear', + offset=0): + """Dispatch to only CPU Soft NMS implementations. + + The input can be either a torch tensor or numpy array. + The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + sigma (float): hyperparameter for gaussian method + min_score (float): score filter threshold + method (str): either 'linear' or 'gaussian' + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[4., 3., 5., 3.], + >>> [4., 3., 5., 4.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) + >>> assert len(inds) == len(dets) == 5 + """ + + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} + assert method in method_dict.keys() + + if torch.__version__ == 'parrots': + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'sigma': float(sigma), + 'min_score': min_score, + 'method': method_dict[method], + 'offset': int(offset) + } + inds = ext_module.softnms(*indata_list, **indata_dict) + else: + dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(), + float(iou_threshold), float(sigma), + float(min_score), method_dict[method], + int(offset)) + + dets = dets[:inds.size(0)] + + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + else: + return dets.to(device=boxes.device), inds.to(device=boxes.device) + + +def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): + """Performs non-maximum suppression in a batched fashion. + + Modified from https://github.com/pytorch/vision/blob + /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. + In order to perform NMS independently per class, we add an offset to all + the boxes. The offset is dependent only on the class idx, and is large + enough so that boxes from different classes do not overlap. + + Arguments: + boxes (torch.Tensor): boxes in shape (N, 4). + scores (torch.Tensor): scores in shape (N, ). + idxs (torch.Tensor): each index value correspond to a bbox cluster, + and NMS will not be applied between elements of different idxs, + shape (N, ). + nms_cfg (dict): specify nms type and other parameters like iou_thr. + Possible keys includes the following. + + - iou_thr (float): IoU threshold used for NMS. + - split_thr (float): threshold number of boxes. In some cases the + number of boxes is large (e.g., 200k). To avoid OOM during + training, the users could set `split_thr` to a small value. + If the number of boxes is greater than the threshold, it will + perform NMS on each group of boxes separately and sequentially. + Defaults to 10000. + class_agnostic (bool): if true, nms is class agnostic, + i.e. IoU thresholding happens over all boxes, + regardless of the predicted class. + + Returns: + tuple: kept dets and indice. + """ + nms_cfg_ = nms_cfg.copy() + class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) + if class_agnostic: + boxes_for_nms = boxes + else: + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + + nms_type = nms_cfg_.pop('type', 'nms') + nms_op = eval(nms_type) + + split_thr = nms_cfg_.pop('split_thr', 10000) + # Won't split to multiple nms nodes when exporting to onnx + if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): + dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) + boxes = boxes[keep] + # -1 indexing works abnormal in TensorRT + # This assumes `dets` has 5 dimensions where + # the last dimension is score. + # TODO: more elegant way to handle the dimension issue. + # Some type of nms would reweight the score, such as SoftNMS + scores = dets[:, 4] + else: + max_num = nms_cfg_.pop('max_num', -1) + total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + # Some type of nms would reweight the score, such as SoftNMS + scores_after_nms = scores.new_zeros(scores.size()) + for id in torch.unique(idxs): + mask = (idxs == id).nonzero(as_tuple=False).view(-1) + dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) + total_mask[mask[keep]] = True + scores_after_nms[mask[keep]] = dets[:, -1] + keep = total_mask.nonzero(as_tuple=False).view(-1) + + scores, inds = scores_after_nms[keep].sort(descending=True) + keep = keep[inds] + boxes = boxes[keep] + + if max_num > 0: + keep = keep[:max_num] + boxes = boxes[:max_num] + scores = scores[:max_num] + + return torch.cat([boxes, scores[:, None]], -1), keep + + +def nms_match(dets, iou_threshold): + """Matched dets into different groups by NMS. + + NMS match is Similar to NMS but when a bbox is suppressed, nms match will + record the indice of suppressed bbox and form a group with the indice of + kept bbox. In each group, indice is sorted as score order. + + Arguments: + dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). + iou_thr (float): IoU thresh for NMS. + + Returns: + List[torch.Tensor | np.ndarray]: The outer list corresponds different + matched group, the inner Tensor corresponds the indices for a group + in score order. + """ + if dets.shape[0] == 0: + matched = [] + else: + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ + f'but get {dets.shape}' + if isinstance(dets, torch.Tensor): + dets_t = dets.detach().cpu() + else: + dets_t = torch.from_numpy(dets) + indata_list = [dets_t] + indata_dict = {'iou_threshold': float(iou_threshold)} + matched = ext_module.nms_match(*indata_list, **indata_dict) + if torch.__version__ == 'parrots': + matched = matched.tolist() + + if isinstance(dets, torch.Tensor): + return [dets.new_tensor(m, dtype=torch.long) for m in matched] + else: + return [np.array(m, dtype=np.int) for m in matched] + + +def nms_rotated(dets, scores, iou_threshold, labels=None): + """Performs non-maximum suppression (NMS) on the rotated boxes according to + their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Args: + boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ + be in (x_ctr, y_ctr, width, height, angle_radian) format. + scores (Tensor): scores in shape (N, ). + iou_threshold (float): IoU thresh for NMS. + labels (Tensor): boxes' label in shape (N,). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + """ + if dets.shape[0] == 0: + return dets, None + multi_label = labels is not None + if multi_label: + dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) + else: + dets_wl = dets + _, order = scores.sort(0, descending=True) + dets_sorted = dets_wl.index_select(0, order) + + if torch.__version__ == 'parrots': + keep_inds = ext_module.nms_rotated( + dets_wl, + scores, + order, + dets_sorted, + iou_threshold=iou_threshold, + multi_label=multi_label) + else: + keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, + iou_threshold, multi_label) + dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), + dim=1) + return dets, keep_inds diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py new file mode 100644 index 000000000000..2143c75f835a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['pixel_group']) + + +def pixel_group(score, mask, embedding, kernel_label, kernel_contour, + kernel_region_num, distance_threshold): + """Group pixels into text instances, which is widely used text detection + methods. + + Arguments: + score (np.array or Tensor): The foreground score with size hxw. + mask (np.array or Tensor): The foreground mask with size hxw. + embedding (np.array or Tensor): The embedding with size hxwxc to + distinguish instances. + kernel_label (np.array or Tensor): The instance kernel index with + size hxw. + kernel_contour (np.array or Tensor): The kernel contour with size hxw. + kernel_region_num (int): The instance kernel region number. + distance_threshold (float): The embedding distance threshold between + kernel and pixel in one instance. + + Returns: + pixel_assignment (List[List[float]]): The instance coordinate list. + Each element consists of averaged confidence, pixel number, and + coordinates (x_i, y_i for all pixels) in order. + """ + assert isinstance(score, (torch.Tensor, np.ndarray)) + assert isinstance(mask, (torch.Tensor, np.ndarray)) + assert isinstance(embedding, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_region_num, int) + assert isinstance(distance_threshold, float) + + if isinstance(score, np.ndarray): + score = torch.from_numpy(score) + if isinstance(mask, np.ndarray): + mask = torch.from_numpy(mask) + if isinstance(embedding, np.ndarray): + embedding = torch.from_numpy(embedding) + if isinstance(kernel_label, np.ndarray): + kernel_label = torch.from_numpy(kernel_label) + if isinstance(kernel_contour, np.ndarray): + kernel_contour = torch.from_numpy(kernel_contour) + + if torch.__version__ == 'parrots': + label = ext_module.pixel_group( + score, + mask, + embedding, + kernel_label, + kernel_contour, + kernel_region_num=kernel_region_num, + distance_threshold=distance_threshold) + label = label.tolist() + label = label[0] + list_index = kernel_region_num + pixel_assignment = [] + for x in range(kernel_region_num): + pixel_assignment.append( + np.array( + label[list_index:list_index + int(label[x])], + dtype=np.float)) + list_index = list_index + int(label[x]) + else: + pixel_assignment = ext_module.pixel_group(score, mask, embedding, + kernel_label, kernel_contour, + kernel_region_num, + distance_threshold) + return pixel_assignment diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py new file mode 100644 index 000000000000..37886cdb1a0d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py @@ -0,0 +1,336 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa + +from os import path as osp + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair +from torch.onnx.operators import shape_as_tensor + + +def bilinear_grid_sample(im, grid, align_corners=False): + """Given an input and a flow-field grid, computes the output using input + values and pixel locations from grid. Supported only bilinear interpolation + method to sample the input pixels. + + Args: + im (torch.Tensor): Input feature map, shape (N, C, H, W) + grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) + align_corners {bool}: If set to True, the extrema (-1 and 1) are + considered as referring to the center points of the input’s + corner pixels. If set to False, they are instead considered as + referring to the corner points of the input’s corner pixels, + making the sampling more resolution agnostic. + Returns: + torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) + """ + n, c, h, w = im.shape + gn, gh, gw, _ = grid.shape + assert n == gn + + x = grid[:, :, :, 0] + y = grid[:, :, :, 1] + + if align_corners: + x = ((x + 1) / 2) * (w - 1) + y = ((y + 1) / 2) * (h - 1) + else: + x = ((x + 1) * w - 1) / 2 + y = ((y + 1) * h - 1) / 2 + + x = x.view(n, -1) + y = y.view(n, -1) + + x0 = torch.floor(x).long() + y0 = torch.floor(y).long() + x1 = x0 + 1 + y1 = y0 + 1 + + wa = ((x1 - x) * (y1 - y)).unsqueeze(1) + wb = ((x1 - x) * (y - y0)).unsqueeze(1) + wc = ((x - x0) * (y1 - y)).unsqueeze(1) + wd = ((x - x0) * (y - y0)).unsqueeze(1) + + # Apply default for grid_sample function zero padding + im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) + padded_h = h + 2 + padded_w = w + 2 + # save points positions after padding + x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 + + # Clip coordinates to padded image size + x0 = torch.where(x0 < 0, torch.tensor(0), x0) + x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) + x1 = torch.where(x1 < 0, torch.tensor(0), x1) + x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) + y0 = torch.where(y0 < 0, torch.tensor(0), y0) + y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) + y1 = torch.where(y1 < 0, torch.tensor(0), y1) + y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) + + im_padded = im_padded.view(n, c, -1) + + x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + + Ia = torch.gather(im_padded, 2, x0_y0) + Ib = torch.gather(im_padded, 2, x0_y1) + Ic = torch.gather(im_padded, 2, x1_y0) + Id = torch.gather(im_padded, 2, x1_y1) + + return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) + + +def is_in_onnx_export_without_custom_ops(): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + return torch.onnx.is_in_onnx_export( + ) and not osp.exists(ort_custom_op_path) + + +def normalize(grid): + """Normalize input grid from [-1, 1] to [0, 1] + Args: + grid (Tensor): The grid to be normalize, range [-1, 1]. + Returns: + Tensor: Normalized grid, range [0, 1]. + """ + + return (grid + 1.0) / 2.0 + + +def denormalize(grid): + """Denormalize input grid from range [0, 1] to [-1, 1] + Args: + grid (Tensor): The grid to be denormalize, range [0, 1]. + Returns: + Tensor: Denormalized grid, range [-1, 1]. + """ + + return grid * 2.0 - 1.0 + + +def generate_grid(num_grid, size, device): + """Generate regular square grid of points in [0, 1] x [0, 1] coordinate + space. + + Args: + num_grid (int): The number of grids to sample, one for each region. + size (tuple(int, int)): The side size of the regular grid. + device (torch.device): Desired device of returned tensor. + + Returns: + (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that + contains coordinates for the regular grids. + """ + + affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) + grid = F.affine_grid( + affine_trans, torch.Size((1, 1, *size)), align_corners=False) + grid = normalize(grid) + return grid.view(1, -1, 2).expand(num_grid, -1, -1) + + +def rel_roi_point_to_abs_img_point(rois, rel_roi_points): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + Returns: + Tensor: Image based absolute point coordinates, shape (N, P, 2) + """ + + with torch.no_grad(): + assert rel_roi_points.size(0) == rois.size(0) + assert rois.dim() == 2 + assert rel_roi_points.dim() == 3 + assert rel_roi_points.size(2) == 2 + # remove batch idx + if rois.size(1) == 5: + rois = rois[:, 1:] + abs_img_points = rel_roi_points.clone() + # To avoid an error during exporting to onnx use independent + # variables instead inplace computation + xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) + ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) + xs += rois[:, None, 0] + ys += rois[:, None, 1] + abs_img_points = torch.stack([xs, ys], dim=2) + return abs_img_points + + +def get_shape_from_feature_map(x): + """Get spatial resolution of input feature map considering exporting to + onnx mode. + + Args: + x (torch.Tensor): Input tensor, shape (N, C, H, W) + Returns: + torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) + """ + if torch.onnx.is_in_onnx_export(): + img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( + x.device).float() + else: + img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( + x.device).float() + return img_shape + + +def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): + """Convert image based absolute point coordinates to image based relative + coordinates for sampling. + + Args: + abs_img_points (Tensor): Image based absolute point coordinates, + shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + assert (isinstance(img, tuple) and len(img) == 2) or \ + (isinstance(img, torch.Tensor) and len(img.shape) == 4) + + if isinstance(img, tuple): + h, w = img + scale = torch.tensor([w, h], + dtype=torch.float, + device=abs_img_points.device) + scale = scale.view(1, 1, 2) + else: + scale = get_shape_from_feature_map(img) + + return abs_img_points / scale * spatial_scale + + +def rel_roi_point_to_rel_img_point(rois, + rel_roi_points, + img, + spatial_scale=1.): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, + spatial_scale) + + return rel_img_point + + +def point_sample(input, points, align_corners=False, **kwargs): + """A wrapper around :func:`grid_sample` to support 3D point_coords tensors + Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to + lie inside ``[0, 1] x [0, 1]`` square. + + Args: + input (Tensor): Feature map, shape (N, C, H, W). + points (Tensor): Image based absolute point coordinates (normalized), + range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). + align_corners (bool): Whether align_corners. Default: False + + Returns: + Tensor: Features of `point` on `input`, shape (N, C, P) or + (N, C, Hgrid, Wgrid). + """ + + add_dim = False + if points.dim() == 3: + add_dim = True + points = points.unsqueeze(2) + if is_in_onnx_export_without_custom_ops(): + # If custom ops for onnx runtime not compiled use python + # implementation of grid_sample function to make onnx graph + # with supported nodes + output = bilinear_grid_sample( + input, denormalize(points), align_corners=align_corners) + else: + output = F.grid_sample( + input, denormalize(points), align_corners=align_corners, **kwargs) + if add_dim: + output = output.squeeze(3) + return output + + +class SimpleRoIAlign(nn.Module): + + def __init__(self, output_size, spatial_scale, aligned=True): + """Simple RoI align in PointRend, faster than standard RoIAlign. + + Args: + output_size (tuple[int]): h, w + spatial_scale (float): scale the input boxes by this number + aligned (bool): if False, use the legacy implementation in + MMDetection, align_corners=True will be used in F.grid_sample. + If True, align the results more perfectly. + """ + + super(SimpleRoIAlign, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + # to be consistent with other RoI ops + self.use_torchvision = False + self.aligned = aligned + + def forward(self, features, rois): + num_imgs = features.size(0) + num_rois = rois.size(0) + rel_roi_points = generate_grid( + num_rois, self.output_size, device=rois.device) + + if torch.onnx.is_in_onnx_export(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois, rel_roi_points, features, self.spatial_scale) + rel_img_points = rel_img_points.reshape(num_imgs, -1, + *rel_img_points.shape[1:]) + point_feats = point_sample( + features, rel_img_points, align_corners=not self.aligned) + point_feats = point_feats.transpose(1, 2) + else: + point_feats = [] + for batch_ind in range(num_imgs): + # unravel batch dim + feat = features[batch_ind].unsqueeze(0) + inds = (rois[:, 0].long() == batch_ind) + if inds.any(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois[inds], rel_roi_points[inds], feat, + self.spatial_scale).unsqueeze(0) + point_feat = point_sample( + feat, rel_img_points, align_corners=not self.aligned) + point_feat = point_feat.squeeze(0).transpose(0, 1) + point_feats.append(point_feat) + + point_feats = torch.cat(point_feats, dim=0) + + channels = features.size(1) + roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) + + return roi_feats + + def __repr__(self): + format_str = self.__class__.__name__ + format_str += '(output_size={}, spatial_scale={}'.format( + self.output_size, self.spatial_scale) + return format_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py new file mode 100644 index 000000000000..4003173a5305 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py @@ -0,0 +1,133 @@ +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', + 'points_in_boxes_all_forward' +]) + + +def points_in_boxes_part(points, boxes): + """Find the box in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in + LiDAR/DEPTH coordinate, (x, y, z) is the bottom center + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + + box_idxs_of_pts = points.new_zeros((batch_size, num_points), + dtype=torch.int).fill_(-1) + + # If manually put the tensor 'points' or 'boxes' on a device + # which is not the current device, some temporary variables + # will be created on the current device in the cuda op, + # and the output will be incorrect. + # Therefore, we force the current device to be the same + # as the device of the tensors if it was not. + # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 + # for the incorrect output before the fix. + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_part_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts + + +def points_in_boxes_cpu(points, boxes): + """Find all boxes in which each point is (CPU). The CPU version of + :meth:`points_in_boxes_all`. + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in + LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + point_indices = points.new_zeros((batch_size, num_boxes, num_points), + dtype=torch.int) + for b in range(batch_size): + ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(), + points[b].float().contiguous(), + point_indices[b]) + point_indices = point_indices.transpose(1, 2) + + return point_indices + + +def points_in_boxes_all(points, boxes): + """Find all boxes in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert boxes.shape[0] == points.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {boxes.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), + dtype=torch.int).fill_(0) + + # Same reason as line 25-32 + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_all_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py new file mode 100644 index 000000000000..bf38beab85a1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py @@ -0,0 +1,177 @@ +from typing import List + +import torch +from torch import nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import force_fp32 +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) + + +def calc_square_dist(point_feat_a, point_feat_b, norm=True): + """Calculating square distance between a and b. + + Args: + point_feat_a (Tensor): (B, N, C) Feature vector of each point. + point_feat_b (Tensor): (B, M, C) Feature vector of each point. + norm (Bool, optional): Whether to normalize the distance. + Default: True. + + Returns: + Tensor: (B, N, M) Distance between each pair points. + """ + num_channel = point_feat_a.shape[-1] + # [bs, n, 1] + a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) + # [bs, 1, m] + b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) + + corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) + + dist = a_square + b_square - 2 * corr_matrix + if norm: + dist = torch.sqrt(dist) / num_channel + return dist + + +def get_sampler_cls(sampler_type): + """Get the type and mode of points sampler. + + Args: + sampler_type (str): The type of points sampler. + The valid value are "D-FPS", "F-FPS", or "FS". + + Returns: + class: Points sampler type. + """ + sampler_mappings = { + 'D-FPS': DFPSSampler, + 'F-FPS': FFPSSampler, + 'FS': FSSampler, + } + try: + return sampler_mappings[sampler_type] + except KeyError: + raise KeyError( + f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ + {sampler_type}') + + +class PointsSampler(nn.Module): + """Points sampling. + + Args: + num_point (list[int]): Number of sample points. + fps_mod_list (list[str], optional): Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. + """ + + def __init__(self, + num_point: List[int], + fps_mod_list: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1]): + super().__init__() + # FPS would be applied to different fps_mod in the list, + # so the length of the num_point should be equal to + # fps_mod_list and fps_sample_range_list. + assert len(num_point) == len(fps_mod_list) == len( + fps_sample_range_list) + self.num_point = num_point + self.fps_sample_range_list = fps_sample_range_list + self.samplers = nn.ModuleList() + for fps_mod in fps_mod_list: + self.samplers.append(get_sampler_cls(fps_mod)()) + self.fp16_enabled = False + + @force_fp32() + def forward(self, points_xyz, features): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, npoint, sample_num) Indices of sampled points. + """ + indices = [] + last_fps_end_index = 0 + + for fps_sample_range, sampler, npoint in zip( + self.fps_sample_range_list, self.samplers, self.num_point): + assert fps_sample_range < points_xyz.shape[1] + + if fps_sample_range == -1: + sample_points_xyz = points_xyz[:, last_fps_end_index:] + if features is not None: + sample_features = features[:, :, last_fps_end_index:] + else: + sample_features = None + else: + sample_points_xyz = \ + points_xyz[:, last_fps_end_index:fps_sample_range] + if features is not None: + sample_features = features[:, :, last_fps_end_index: + fps_sample_range] + else: + sample_features = None + + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, + npoint) + + indices.append(fps_idx + last_fps_end_index) + last_fps_end_index += fps_sample_range + indices = torch.cat(indices, dim=1) + + return indices + + +class DFPSSampler(nn.Module): + """Using Euclidean distances of points for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with D-FPS.""" + fps_idx = furthest_point_sample(points.contiguous(), npoint) + return fps_idx + + +class FFPSSampler(nn.Module): + """Using feature distances for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with F-FPS.""" + assert features is not None, \ + 'feature input to FFPS_Sampler should not be None' + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist( + features_for_fps, features_for_fps, norm=False) + fps_idx = furthest_point_sample_with_dist(features_dist, npoint) + return fps_idx + + +class FSSampler(nn.Module): + """Using F-FPS and D-FPS simultaneously.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with FS_Sampling.""" + assert features is not None, \ + 'feature input to FS_Sampler should not be None' + ffps_sampler = FFPSSampler() + dfps_sampler = DFPSSampler() + fps_idx_ffps = ffps_sampler(points, features, npoint) + fps_idx_dfps = dfps_sampler(points, features, npoint) + fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) + return fps_idx diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py new file mode 100644 index 000000000000..cdf14e62b50e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py @@ -0,0 +1,92 @@ +# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['psamask_forward', 'psamask_backward']) + + +class PSAMaskFunction(Function): + + @staticmethod + def symbolic(g, input, psa_type, mask_size): + return g.op( + 'mmcv::MMCVPSAMask', + input, + psa_type_i=psa_type, + mask_size_i=mask_size) + + @staticmethod + def forward(ctx, input, psa_type, mask_size): + ctx.psa_type = psa_type + ctx.mask_size = _pair(mask_size) + ctx.save_for_backward(input) + + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + assert channels == h_mask * w_mask + output = input.new_zeros( + (batch_size, h_feature * w_feature, h_feature, w_feature)) + + ext_module.psamask_forward( + input, + output, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return output + + @staticmethod + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + psa_type = ctx.psa_type + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + grad_input = grad_output.new_zeros( + (batch_size, channels, h_feature, w_feature)) + ext_module.psamask_backward( + grad_output, + grad_input, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return grad_input, None, None, None + + +psa_mask = PSAMaskFunction.apply + + +class PSAMask(nn.Module): + + def __init__(self, psa_type, mask_size=None): + super(PSAMask, self).__init__() + assert psa_type in ['collect', 'distribute'] + if psa_type == 'collect': + psa_type_enum = 0 + else: + psa_type_enum = 1 + self.psa_type_enum = psa_type_enum + self.mask_size = mask_size + self.psa_type = psa_type + + def forward(self, input): + return psa_mask(input, self.psa_type_enum, self.mask_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(psa_type={self.psa_type}, ' + s += f'mask_size={self.mask_size})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py new file mode 100644 index 000000000000..0755aefc66e6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py @@ -0,0 +1,223 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import deprecated_api_warning, ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_align_forward', 'roi_align_backward']) + + +class RoIAlignFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, + pool_mode, aligned): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + if has_custom_op: + return g.op( + 'mmcv::MMCVRoiAlign', + input, + rois, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned) + else: + from torch.onnx.symbolic_opset9 import sub, squeeze + from torch.onnx.symbolic_helper import _slice_helper + from torch.onnx import TensorProtoDataType + # batch_indices = rois[:, 0].long() + batch_indices = _slice_helper( + g, rois, axes=[1], starts=[0], ends=[1]) + batch_indices = squeeze(g, batch_indices, 1) + batch_indices = g.op( + 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) + # rois = rois[:, 1:] + rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) + if aligned: + # rois -= 0.5/spatial_scale + aligned_offset = g.op( + 'Constant', + value_t=torch.tensor([0.5 / spatial_scale], + dtype=torch.float32)) + rois = sub(g, rois, aligned_offset) + # roi align + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=max(0, sampling_ratio), + mode_s=pool_mode) + + @staticmethod + def forward(ctx, + input, + rois, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + assert pool_mode in ('max', 'avg') + ctx.pool_mode = 0 if pool_mode == 'max' else 1 + ctx.aligned = aligned + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + if ctx.pool_mode == 0: + argmax_y = input.new_zeros(output_shape) + argmax_x = input.new_zeros(output_shape) + else: + argmax_y = input.new_zeros(0) + argmax_x = input.new_zeros(0) + + ext_module.roi_align_forward( + input, + rois, + output, + argmax_y, + argmax_x, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + + ctx.save_for_backward(rois, argmax_y, argmax_x) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax_y, argmax_x = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous. + grad_output = grad_output.contiguous() + ext_module.roi_align_backward( + grad_output, + rois, + argmax_y, + argmax_x, + grad_input, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + return grad_input, None, None, None, None, None, None + + +roi_align = RoIAlignFunction.apply + + +class RoIAlign(nn.Module): + """RoI align pooling layer. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + pool_mode (str, 'avg' or 'max'): pooling mode in each bin. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + use_torchvision (bool): whether to use roi_align from torchvision. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + @deprecated_api_warning( + { + 'out_size': 'output_size', + 'sample_num': 'sampling_ratio' + }, + cls_name='RoIAlign') + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True, + use_torchvision=False): + super(RoIAlign, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.pool_mode = pool_mode + self.aligned = aligned + self.use_torchvision = use_torchvision + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N.\ + The other 4 columns are xyxy. + """ + if self.use_torchvision: + from torchvision.ops import roi_align as tv_roi_align + if 'aligned' in tv_roi_align.__code__.co_varnames: + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.aligned) + else: + if self.aligned: + rois -= rois.new_tensor([0.] + + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio) + else: + return roi_align(input, rois, self.output_size, self.spatial_scale, + self.sampling_ratio, self.pool_mode, self.aligned) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale}, ' + s += f'sampling_ratio={self.sampling_ratio}, ' + s += f'pool_mode={self.pool_mode}, ' + s += f'aligned={self.aligned}, ' + s += f'use_torchvision={self.use_torchvision})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py new file mode 100644 index 000000000000..0ce4961a3555 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py @@ -0,0 +1,177 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) + + +class RoIAlignRotatedFunction(Function): + + @staticmethod + def symbolic(g, features, rois, out_size, spatial_scale, sample_num, + aligned, clockwise): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + return g.op( + 'mmcv::MMCVRoIAlignRotated', + features, + rois, + output_height_i=out_h, + output_width_i=out_h, + spatial_scale_f=spatial_scale, + sampling_ratio_i=sample_num, + aligned_i=aligned, + clockwise_i=clockwise) + + @staticmethod + def forward(ctx, + features, + rois, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.aligned = aligned + ctx.clockwise = clockwise + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + ext_module.roi_align_rotated_forward( + features, + rois, + output, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + aligned = ctx.aligned + clockwise = ctx.clockwise + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert feature_size is not None + batch_size, num_channels, data_height, data_width = feature_size + + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + + if ctx.needs_input_grad[0]: + grad_input = rois.new_zeros(batch_size, num_channels, data_height, + data_width) + ext_module.roi_align_rotated_backward( + grad_output.contiguous(), + rois, + grad_input, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return grad_input, grad_rois, None, None, None, None, None + + +roi_align_rotated = RoIAlignRotatedFunction.apply + + +class RoIAlignRotated(nn.Module): + """RoI align pooling layer for rotated proposals. + + It accepts a feature map of shape (N, C, H, W) and rois with shape + (n, 6) with each roi decoded as (batch_index, center_x, center_y, + w, h, angle). The angle is in radian. + + Args: + out_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sample_num (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + Default: True. + clockwise (bool): If True, the angle in each proposal follows a + clockwise fashion in image space, otherwise, the angle is + counterclockwise. Default: False. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + def __init__(self, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + super(RoIAlignRotated, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + self.aligned = aligned + self.clockwise = clockwise + + def forward(self, features, rois): + return RoIAlignRotatedFunction.apply(features, rois, self.out_size, + self.spatial_scale, + self.sample_num, self.aligned, + self.clockwise) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py new file mode 100644 index 000000000000..d339d8f2941e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_pool_forward', 'roi_pool_backward']) + + +class RoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale): + return g.op( + 'MaxRoiPool', + input, + rois, + pooled_shape_i=output_size, + spatial_scale_f=spatial_scale) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + argmax = input.new_zeros(output_shape, dtype=torch.int) + + ext_module.roi_pool_forward( + input, + rois, + output, + argmax, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + ctx.save_for_backward(rois, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_pool_backward( + grad_output, + rois, + argmax, + grad_input, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + return grad_input, None, None, None + + +roi_pool = RoIPoolFunction.apply + + +class RoIPool(nn.Module): + + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py new file mode 100644 index 000000000000..8742eeaa4f64 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn as nn +from torch.autograd import Function + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) + + +class RoIAwarePool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `PartA2 `_ for more + details. + + Args: + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int, optional): The maximum number of points per + voxel. Default: 128. + mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. + Default: 'max'. + """ + + def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): + super().__init__() + + self.out_size = out_size + self.max_pts_per_voxel = max_pts_per_voxel + assert mode in ['max', 'avg'] + pool_mapping = {'max': 0, 'avg': 1} + self.mode = pool_mapping[mode] + + def forward(self, rois, pts, pts_feature): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] + """ + + return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, + self.out_size, + self.max_pts_per_voxel, self.mode) + + +class RoIAwarePool3dFunction(Function): + + @staticmethod + def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, + mode): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int): The maximum number of points per voxel. + Default: 128. + mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average + pool). + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output + pooled features. + """ + + if isinstance(out_size, int): + out_x = out_y = out_z = out_size + else: + assert len(out_size) == 3 + assert mmcv.is_tuple_of(out_size, int) + out_x, out_y, out_z = out_size + + num_rois = rois.shape[0] + num_channels = pts_feature.shape[-1] + num_pts = pts.shape[0] + + pooled_features = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels)) + argmax = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) + pts_idx_of_voxels = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, max_pts_per_voxel), + dtype=torch.int) + + ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, + pts_idx_of_voxels, pooled_features, + mode) + + ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, + num_pts, num_channels) + return pooled_features + + @staticmethod + def backward(ctx, grad_out): + ret = ctx.roiaware_pool3d_for_backward + pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret + + grad_in = grad_out.new_zeros((num_pts, num_channels)) + ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, + grad_out.contiguous(), grad_in, + mode) + + return None, None, grad_in, None, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py new file mode 100644 index 000000000000..0a21412c0728 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py @@ -0,0 +1,77 @@ +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) + + +class RoIPointPool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `Paper of PartA2 `_ + for more details. + + Args: + num_sampled_points (int, optional): Number of samples in each roi. + Default: 512. + """ + + def __init__(self, num_sampled_points=512): + super().__init__() + self.num_sampled_points = num_sampled_points + + def forward(self, points, point_features, boxes3d): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, + self.num_sampled_points) + + +class RoIPointPool3dFunction(Function): + + @staticmethod + def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + num_sampled_points (int, optional): The num of sampled points. + Default: 512. + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + assert len(points.shape) == 3 and points.shape[2] == 3 + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ + 1], point_features.shape[2] + pooled_boxes3d = boxes3d.view(batch_size, -1, 7) + pooled_features = point_features.new_zeros( + (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros( + (batch_size, boxes_num)).int() + + ext_module.roipoint_pool3d_forward(points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, pooled_empty_flag) + + return pooled_features, pooled_empty_flag + + @staticmethod + def backward(ctx, grad_out): + raise NotImplementedError diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py new file mode 100644 index 000000000000..63f067b908b3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py @@ -0,0 +1,145 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.deform_conv import deform_conv2d +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + + +@CONV_LAYERS.register_module(name='SAC') +class SAConv2d(ConvAWS2d): + """SAC (Switchable Atrous Convolution) + + This is an implementation of SAC in DetectoRS + (https://arxiv.org/pdf/2006.02334.pdf). + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + padding_mode (string, optional): ``'zeros'``, ``'reflect'``, + ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + use_deform: If ``True``, replace convolution with deformable + convolution. Default: ``False``. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.use_deform = use_deform + self.switch = nn.Conv2d( + self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) + self.pre_context = nn.Conv2d( + self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d( + self.out_channels, self.out_channels, kernel_size=1, bias=True) + if self.use_deform: + self.offset_s = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.offset_l = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.init_weights() + + def init_weights(self): + constant_init(self.switch, 0, bias=1) + self.weight_diff.data.zero_() + constant_init(self.pre_context, 0) + constant_init(self.post_context, 0) + if self.use_deform: + constant_init(self.offset_s, 0) + constant_init(self.offset_l, 0) + + def forward(self, x): + # pre-context + avg_x = F.adaptive_avg_pool2d(x, output_size=1) + avg_x = self.pre_context(avg_x) + avg_x = avg_x.expand_as(x) + x = x + avg_x + # switch + avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') + avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) + switch = self.switch(avg_x) + # sac + weight = self._get_weight(self.weight) + zero_bias = torch.zeros( + self.out_channels, device=weight.device, dtype=weight.dtype) + + if self.use_deform: + offset = self.offset_s(avg_x) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_s = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_s = super()._conv_forward(x, weight, zero_bias) + else: + out_s = super()._conv_forward(x, weight) + ori_p = self.padding + ori_d = self.dilation + self.padding = tuple(3 * p for p in self.padding) + self.dilation = tuple(3 * d for d in self.dilation) + weight = weight + self.weight_diff + if self.use_deform: + offset = self.offset_l(avg_x) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_l = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_l = super()._conv_forward(x, weight, zero_bias) + else: + out_l = super()._conv_forward(x, weight) + + out = switch * out_s + (1 - switch) * out_l + self.padding = ori_p + self.dilation = ori_d + # post-context + avg_x = F.adaptive_avg_pool2d(out, output_size=1) + avg_x = self.post_context(avg_x) + avg_x = avg_x.expand_as(out) + out = out + avg_x + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py new file mode 100644 index 000000000000..2b8aa4169e9f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py @@ -0,0 +1,135 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) + + +class _DynamicScatter(Function): + + @staticmethod + def forward(ctx, feats, coors, reduce_type='max'): + """convert kitti points(N, >=3) to voxels. + + Args: + feats (torch.Tensor): [N, C]. Points features to be reduced + into voxels. + coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates + (specifically multi-dim voxel index) of each points. + reduce_type (str, optional): Reduce op. support 'max', 'sum' and + 'mean'. Default: 'max'. + + Returns: + voxel_feats (torch.Tensor): [M, C]. Reduced features, input + features that shares the same voxel coordinates are reduced to + one row. + voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. + """ + results = ext_module.dynamic_point_to_voxel_forward( + feats, coors, reduce_type) + (voxel_feats, voxel_coors, point2voxel_map, + voxel_points_count) = results + ctx.reduce_type = reduce_type + ctx.save_for_backward(feats, voxel_feats, point2voxel_map, + voxel_points_count) + ctx.mark_non_differentiable(voxel_coors) + return voxel_feats, voxel_coors + + @staticmethod + def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): + (feats, voxel_feats, point2voxel_map, + voxel_points_count) = ctx.saved_tensors + grad_feats = torch.zeros_like(feats) + # TODO: whether to use index put or use cuda_backward + # To use index put, need point to voxel index + ext_module.dynamic_point_to_voxel_backward( + grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, + point2voxel_map, voxel_points_count, ctx.reduce_type) + return grad_feats, None, None + + +dynamic_scatter = _DynamicScatter.apply + + +class DynamicScatter(nn.Module): + """Scatters points into voxels, used in the voxel encoder with dynamic + voxelization. + + Note: + The CPU and GPU implementation get the same output, but have numerical + difference after summation and division (e.g., 5e-7). + + Args: + voxel_size (list): list [x, y, z] size of three dimension. + point_cloud_range (list): The coordinate range of points, [x_min, + y_min, z_min, x_max, y_max, z_max]. + average_points (bool): whether to use avg pooling to scatter points + into voxel. + """ + + def __init__(self, voxel_size, point_cloud_range, average_points: bool): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.average_points = average_points + + def forward_single(self, points, coors): + """Scatters points into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + reduce = 'mean' if self.average_points else 'max' + return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) + + def forward(self, points, coors): + """Scatters points/features into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + if coors.size(-1) == 3: + return self.forward_single(points, coors) + else: + batch_size = coors[-1, 0] + 1 + voxels, voxel_coors = [], [] + for i in range(batch_size): + inds = torch.where(coors[:, 0] == i) + voxel, voxel_coor = self.forward_single( + points[inds], coors[inds][:, 1:]) + coor_pad = nn.functional.pad( + voxel_coor, (1, 0), mode='constant', value=i) + voxel_coors.append(coor_pad) + voxels.append(voxel) + features = torch.cat(voxels, dim=0) + feature_coors = torch.cat(voxel_coors, dim=0) + + return features, feature_coors + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', average_points=' + str(self.average_points) + s += ')' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py new file mode 100644 index 000000000000..d5721f4ea0b5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py @@ -0,0 +1,279 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NORM_LAYERS +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', + 'sync_bn_backward_param', 'sync_bn_backward_data' +]) + + +class SyncBatchNormFunction(Function): + + @staticmethod + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + return g.op( + 'mmcv::MMCVSyncBatchNorm', + input, + running_mean, + running_var, + weight, + bias, + momentum_f=momentum, + eps_f=eps, + group_i=group, + group_size_i=group_size, + stats_mode=stats_mode) + + @staticmethod + def forward(self, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + self.momentum = momentum + self.eps = eps + self.group = group + self.group_size = group_size + self.stats_mode = stats_mode + + assert isinstance( + input, (torch.HalfTensor, torch.FloatTensor, + torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ + f'only support Half or Float Tensor, but {input.type()}' + output = torch.zeros_like(input) + input3d = input.flatten(start_dim=2) + output3d = output.view_as(input3d) + num_channels = input3d.size(1) + + # ensure mean/var/norm/std are initialized as zeros + # ``torch.empty()`` does not guarantee that + mean = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + var = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + norm = torch.zeros_like( + input3d, dtype=torch.float, device=input3d.device) + std = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + + batch_size = input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_forward_mean(input3d, mean) + batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) + else: + # skip updating mean and leave it as zeros when the input is empty + batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) + + # synchronize mean and the batch flag + vec = torch.cat([mean, batch_flag]) + if self.stats_mode == 'N': + vec *= batch_size + if self.group_size > 1: + dist.all_reduce(vec, group=self.group) + total_batch = vec[-1].detach() + mean = vec[:num_channels] + + if self.stats_mode == 'default': + mean = mean / self.group_size + elif self.stats_mode == 'N': + mean = mean / total_batch.clamp(min=1) + else: + raise NotImplementedError + + # leave var as zeros when the input is empty + if batch_size > 0: + ext_module.sync_bn_forward_var(input3d, mean, var) + + if self.stats_mode == 'N': + var *= batch_size + if self.group_size > 1: + dist.all_reduce(var, group=self.group) + + if self.stats_mode == 'default': + var /= self.group_size + elif self.stats_mode == 'N': + var /= total_batch.clamp(min=1) + else: + raise NotImplementedError + + # if the total batch size over all the ranks is zero, + # we should not update the statistics in the current batch + update_flag = total_batch.clamp(max=1) + momentum = update_flag * self.momentum + ext_module.sync_bn_forward_output( + input3d, + mean, + var, + weight, + bias, + running_mean, + running_var, + norm, + std, + output3d, + eps=self.eps, + momentum=momentum, + group_size=self.group_size) + self.save_for_backward(norm, std, weight) + return output + + @staticmethod + @once_differentiable + def backward(self, grad_output): + norm, std, weight = self.saved_tensors + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(weight) + grad_input = torch.zeros_like(grad_output) + grad_output3d = grad_output.flatten(start_dim=2) + grad_input3d = grad_input.view_as(grad_output3d) + + batch_size = grad_input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, + grad_bias) + + # all reduce + if self.group_size > 1: + dist.all_reduce(grad_weight, group=self.group) + dist.all_reduce(grad_bias, group=self.group) + grad_weight /= self.group_size + grad_bias /= self.group_size + + if batch_size > 0: + ext_module.sync_bn_backward_data(grad_output3d, weight, + grad_weight, grad_bias, norm, std, + grad_input3d) + + return grad_input, None, None, grad_weight, grad_bias, \ + None, None, None, None, None + + +@NORM_LAYERS.register_module(name='MMSyncBN') +class SyncBatchNorm(Module): + """Synchronized Batch Normalization. + + Args: + num_features (int): number of features/chennels in input tensor + eps (float, optional): a value added to the denominator for numerical + stability. Defaults to 1e-5. + momentum (float, optional): the value used for the running_mean and + running_var computation. Defaults to 0.1. + affine (bool, optional): whether to use learnable affine parameters. + Defaults to True. + track_running_stats (bool, optional): whether to track the running + mean and variance during training. When set to False, this + module does not track such statistics, and initializes statistics + buffers ``running_mean`` and ``running_var`` as ``None``. When + these buffers are ``None``, this module always uses batch + statistics in both training and eval modes. Defaults to True. + group (int, optional): synchronization of stats happen within + each process group individually. By default it is synchronization + across the whole world. Defaults to None. + stats_mode (str, optional): The statistical mode. Available options + includes ``'default'`` and ``'N'``. Defaults to 'default'. + When ``stats_mode=='default'``, it computes the overall statistics + using those from each worker with equal weight, i.e., the + statistics are synchronized and simply divied by ``group``. This + mode will produce inaccurate statistics when empty tensors occur. + When ``stats_mode=='N'``, it compute the overall statistics using + the total number of batches in each worker ignoring the number of + group, i.e., the statistics are synchronized and then divied by + the total batch ``N``. This mode is beneficial when empty tensors + occur during training, as it average the total mean by the real + number of batch. + """ + + def __init__(self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None, + stats_mode='default'): + super(SyncBatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + group = dist.group.WORLD if group is None else group + self.group = group + self.group_size = dist.get_world_size(group) + assert stats_mode in ['default', 'N'], \ + f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' + self.stats_mode = stats_mode + if self.affine: + self.weight = Parameter(torch.Tensor(num_features)) + self.bias = Parameter(torch.Tensor(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + if self.track_running_stats: + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.register_buffer('num_batches_tracked', + torch.tensor(0, dtype=torch.long)) + else: + self.register_buffer('running_mean', None) + self.register_buffer('running_var', None) + self.register_buffer('num_batches_tracked', None) + self.reset_parameters() + + def reset_running_stats(self): + if self.track_running_stats: + self.running_mean.zero_() + self.running_var.fill_(1) + self.num_batches_tracked.zero_() + + def reset_parameters(self): + self.reset_running_stats() + if self.affine: + self.weight.data.uniform_() # pytorch use ones_() + self.bias.data.zero_() + + def forward(self, input): + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input, got {input.dim()}D input') + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float( + self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + if self.training or not self.track_running_stats: + return SyncBatchNormFunction.apply( + input, self.running_mean, self.running_var, self.weight, + self.bias, exponential_average_factor, self.eps, self.group, + self.group_size, self.stats_mode) + else: + return F.batch_norm(input, self.running_mean, self.running_var, + self.weight, self.bias, False, + exponential_average_factor, self.eps) + + def __repr__(self): + s = self.__class__.__name__ + s += f'({self.num_features}, ' + s += f'eps={self.eps}, ' + s += f'momentum={self.momentum}, ' + s += f'affine={self.affine}, ' + s += f'track_running_stats={self.track_running_stats}, ' + s += f'group_size={self.group_size},' + s += f'stats_mode={self.stats_mode})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py new file mode 100644 index 000000000000..203f47f05d58 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py @@ -0,0 +1,68 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['three_interpolate_forward', 'three_interpolate_backward']) + + +class ThreeInterpolate(Function): + """Performs weighted linear interpolation on 3 features. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor, + weight: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, M) Features descriptors to be + interpolated + indices (Tensor): (B, n, 3) index three nearest neighbors + of the target features in features + weight (Tensor): (B, n, 3) weights of interpolation + + Returns: + Tensor: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert indices.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = indices.size(1) + ctx.three_interpolate_for_backward = (indices, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + ext_module.three_interpolate_forward( + features, indices, weight, output, b=B, c=c, m=m, n=n) + return output + + @staticmethod + def backward( + ctx, grad_out: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, N) tensor with gradients of outputs + + Returns: + Tensor: (B, C, M) tensor with gradients of features + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = torch.cuda.FloatTensor(B, c, m).zero_() + grad_out_data = grad_out.data.contiguous() + + ext_module.three_interpolate_backward( + grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py new file mode 100644 index 000000000000..2b01047a1299 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py @@ -0,0 +1,51 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) + + +class ThreeNN(Function): + """Find the top-3 nearest neighbors of the target set from the source set. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, target: torch.Tensor, + source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + target (Tensor): shape (B, N, 3), points set that needs to + find the nearest neighbors. + source (Tensor): shape (B, M, 3), points set that is used + to find the nearest neighbors of points in target set. + + Returns: + Tensor: shape (B, N, 3), L2 distance of each point in target + set to their corresponding nearest neighbors. + """ + target = target.contiguous() + source = source.contiguous() + + B, N, _ = target.size() + m = source.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py new file mode 100644 index 000000000000..472c9fcfe45a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Code reference from "Temporal Interlacing Network" +# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py +# Hao Shao, Shengju Qian, Yu Liu +# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk + +import torch +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['tin_shift_forward', 'tin_shift_backward']) + + +class TINShiftFunction(Function): + + @staticmethod + def forward(ctx, input, shift): + C = input.size(2) + num_segments = shift.size(1) + if C // num_segments <= 0 or C % num_segments != 0: + raise ValueError('C should be a multiple of num_segments, ' + f'but got C={C} and num_segments={num_segments}.') + + ctx.save_for_backward(shift) + + out = torch.zeros_like(input) + ext_module.tin_shift_forward(input, shift, out) + + return out + + @staticmethod + def backward(ctx, grad_output): + + shift = ctx.saved_tensors[0] + data_grad_input = grad_output.new(*grad_output.size()).zero_() + shift_grad_input = shift.new(*shift.size()).zero_() + ext_module.tin_shift_backward(grad_output, shift, data_grad_input) + + return data_grad_input, shift_grad_input + + +tin_shift = TINShiftFunction.apply + + +class TINShift(nn.Module): + """Temporal Interlace Shift. + + Temporal Interlace shift is a differentiable temporal-wise frame shifting + which is proposed in "Temporal Interlacing Network" + + Please refer to https://arxiv.org/abs/2001.06499 for more details. + Code is modified from https://github.com/mit-han-lab/temporal-shift-module + """ + + def forward(self, input, shift): + """Perform temporal interlace shift. + + Args: + input (Tensor): Feature map with shape [N, num_segments, C, H * W]. + shift (Tensor): Shift tensor with shape [N, num_segments]. + + Returns: + Feature map after temporal interlace shift. + """ + return tin_shift(input, shift) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py new file mode 100644 index 000000000000..8802690df0c7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py @@ -0,0 +1,330 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +from torch.autograd import Function +from torch.nn import functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import to_2tuple +from ..utils import ext_loader + +upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) + + +class UpFirDn2dBackward(Function): + + @staticmethod + def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, + in_size, out_size): + + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_ext.upfirdn2d( + grad_output, + grad_kernel, + up_x=down_x, + up_y=down_y, + down_x=up_x, + down_y=up_y, + pad_x0=g_pad_x0, + pad_x1=g_pad_x1, + pad_y0=g_pad_y0, + pad_y1=g_pad_y1) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], + in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + kernel, = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], + ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_ext.upfirdn2d( + gradgrad_input, + kernel, + up_x=ctx.up_x, + up_y=ctx.up_y, + down_x=ctx.down_x, + down_y=ctx.down_y, + pad_x0=ctx.pad_x0, + pad_x1=ctx.pad_x1, + pad_y0=ctx.pad_y0, + pad_y1=ctx.pad_y1) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], + # ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], + ctx.out_size[0], ctx.out_size[1]) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + batch, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_ext.upfirdn2d( + input, + kernel, + up_x=up_x, + up_y=up_y, + down_x=down_x, + down_y=down_y, + pad_x0=pad_x0, + pad_x1=pad_x1, + pad_y0=pad_y0, + pad_y1=pad_y1) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, + kernel, + grad_kernel, + ctx.up, + ctx.down, + ctx.pad, + ctx.g_pad, + ctx.in_size, + ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): + """UpFRIDn for 2d features. + + UpFIRDn is short for upsample, apply FIR filter and downsample. More + details can be found in: + https://www.mathworks.com/help/signal/ref/upfirdn.html + + Args: + input (Tensor): Tensor with shape of (n, c, h, w). + kernel (Tensor): Filter kernel. + up (int | tuple[int], optional): Upsampling factor. If given a number, + we will use this factor for the both height and width side. + Defaults to 1. + down (int | tuple[int], optional): Downsampling factor. If given a + number, we will use this factor for the both height and width side. + Defaults to 1. + pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or + (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). + + Returns: + Tensor: Tensor after UpFIRDn. + """ + if input.device.type == 'cpu': + if len(pad) == 2: + pad = (pad[0], pad[1], pad[0], pad[1]) + + up = to_2tuple(up) + + down = to_2tuple(down) + + out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], + pad[0], pad[1], pad[2], pad[3]) + else: + _up = to_2tuple(up) + + _down = to_2tuple(down) + + if len(pad) == 4: + _pad = pad + elif len(pad) == 2: + _pad = (pad[0], pad[1], pad[0], pad[1]) + + out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) + + return out + + +def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, + pad_y0, pad_y1): + _, channel, in_h, in_w = input.shape + input = input.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad( + out, + [0, 0, + max(pad_x0, 0), + max(pad_x1, 0), + max(pad_y0, 0), + max(pad_y1, 0)]) + out = out[:, + max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape( + [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py new file mode 100644 index 000000000000..ca3226a4fbcb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) + + +class _Voxelization(Function): + + @staticmethod + def forward(ctx, + points, + voxel_size, + coors_range, + max_points=35, + max_voxels=20000): + """Convert kitti points(N, >=3) to voxels. + + Args: + points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points + and points[:, 3:] contain other information like reflectivity. + voxel_size (tuple or float): The size of voxel with the shape of + [3]. + coors_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_points (int, optional): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. Default: 35. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + + Returns: + voxels_out (torch.Tensor): Output voxels with the shape of [M, + max_points, ndim]. Only contain points and returned when + max_points != -1. + coors_out (torch.Tensor): Output coordinates with the shape of + [M, 3]. + num_points_per_voxel_out (torch.Tensor): Num points per voxel with + the shape of [M]. Only returned when max_points != -1. + """ + if max_points == -1 or max_voxels == -1: + coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) + ext_module.dynamic_voxelize_forward(points, coors, voxel_size, + coors_range, 3) + return coors + else: + voxels = points.new_zeros( + size=(max_voxels, max_points, points.size(1))) + coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) + num_points_per_voxel = points.new_zeros( + size=(max_voxels, ), dtype=torch.int) + voxel_num = ext_module.hard_voxelize_forward( + points, voxels, coors, num_points_per_voxel, voxel_size, + coors_range, max_points, max_voxels, 3) + # select the valid voxels + voxels_out = voxels[:voxel_num] + coors_out = coors[:voxel_num] + num_points_per_voxel_out = num_points_per_voxel[:voxel_num] + return voxels_out, coors_out, num_points_per_voxel_out + + +voxelization = _Voxelization.apply + + +class Voxelization(nn.Module): + """Convert kitti points(N, >=3) to voxels. + + Please refer to `PVCNN `_ for more + details. + + Args: + voxel_size (tuple or float): The size of voxel with the shape of [3]. + point_cloud_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_num_points (int): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + """ + + def __init__(self, + voxel_size, + point_cloud_range, + max_num_points, + max_voxels=20000): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.max_num_points = max_num_points + if isinstance(max_voxels, tuple): + self.max_voxels = max_voxels + else: + self.max_voxels = _pair(max_voxels) + + point_cloud_range = torch.tensor( + point_cloud_range, dtype=torch.float32) + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + grid_size = (point_cloud_range[3:] - + point_cloud_range[:3]) / voxel_size + grid_size = torch.round(grid_size).long() + input_feat_shape = grid_size[:2] + self.grid_size = grid_size + # the origin shape is as [x-len, y-len, z-len] + # [w, h, d] -> [d, h, w] + self.pcd_shape = [*input_feat_shape, 1][::-1] + + def forward(self, input): + if self.training: + max_voxels = self.max_voxels[0] + else: + max_voxels = self.max_voxels[1] + + return voxelization(input, self.voxel_size, self.point_cloud_range, + self.max_num_points, max_voxels) + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', max_num_points=' + str(self.max_num_points) + s += ', max_voxels=' + str(self.max_voxels) + s += ')' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py new file mode 100644 index 000000000000..2ed2c17ad357 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .collate import collate +from .data_container import DataContainer +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter, scatter_kwargs +from .utils import is_module_wrapper + +__all__ = [ + 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', + 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py new file mode 100644 index 000000000000..9b5a8a44483a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + # unsqueeze the first dimension thus the tensor's shape is the + # same as those scattered with GPU. + output = output.unsqueeze(0) + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py new file mode 100644 index 000000000000..ad749197df21 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections.abc import Mapping, Sequence + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .data_container import DataContainer + + +def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ + + if not isinstance(batch, Sequence): + raise TypeError(f'{batch.dtype} is not supported.') + + if isinstance(batch[0], DataContainer): + stacked = [] + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer( + stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + + if batch[i].pad_dims is not None: + ndim = batch[i].dim() + assert ndim > batch[i].pad_dims + max_shape = [0 for _ in range(batch[i].pad_dims)] + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = batch[i].size(-dim) + for sample in batch[i:i + samples_per_gpu]: + for dim in range(0, ndim - batch[i].pad_dims): + assert batch[i].size(dim) == sample.size(dim) + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], + sample.size(-dim)) + padded_samples = [] + for sample in batch[i:i + samples_per_gpu]: + pad = [0 for _ in range(batch[i].pad_dims * 2)] + for dim in range(1, batch[i].pad_dims + 1): + pad[2 * dim - + 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append( + F.pad( + sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + elif batch[i].pad_dims is None: + stacked.append( + default_collate([ + sample.data + for sample in batch[i:i + samples_per_gpu] + ])) + else: + raise ValueError( + 'pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py new file mode 100644 index 000000000000..cedb0d32a51a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import torch + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError( + f'{args[0].__class__.__name__} has no attribute ' + f'{func.__name__} for type {args[0].datatype}') + return func(*args, **kwargs) + + return wrapper + + +class DataContainer: + """A container for any type of objects. + + Typically tensors will be stacked in the collate function and sliced along + some dimension in the scatter function. This behavior has some limitations. + 1. All tensors have to be the same size. + 2. Types are limited (numpy array or Tensor). + + We design `DataContainer` and `MMDataParallel` to overcome these + limitations. The behavior can be either of the following. + + - copy to GPU, pad all tensors to the same size and stack them + - copy to GPU without stacking + - leave the objects as is and pass it to the model + - pad_dims specifies the number of last few dimensions to do padding + """ + + def __init__(self, + data, + stack=False, + padding_value=0, + cpu_only=False, + pad_dims=2): + self._data = data + self._cpu_only = cpu_only + self._stack = stack + self._padding_value = padding_value + assert pad_dims in [None, 1, 2, 3] + self._pad_dims = pad_dims + + def __repr__(self): + return f'{self.__class__.__name__}({repr(self.data)})' + + def __len__(self): + return len(self._data) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self._cpu_only + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @property + def pad_dims(self): + return self._pad_dims + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py new file mode 100644 index 000000000000..79b5f69b654c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the data in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py new file mode 100644 index 000000000000..b88c33ce159e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import print_log +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def to_kwargs(self, inputs, kwargs, device_id): + # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 + # to move all tensors to device_id + return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.train_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py new file mode 100644 index 000000000000..4a0efe2edc0f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter_kwargs + + +@MODULE_WRAPPERS.register_module() +class MMDistributedDataParallel(nn.Module): + + def __init__(self, + module, + dim=0, + broadcast_buffers=True, + bucket_cap_mb=25): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 + self._sync_params() + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip( + tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def _sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, + self.broadcast_bucket_size) + if self.broadcast_buffers: + if (TORCH_VERSION != 'parrots' + and digit_version(TORCH_VERSION) < digit_version('1.0')): + buffers = [b.data for b in self.module._all_buffers()] + else: + buffers = [b.data for b in self.module.buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, + self.broadcast_bucket_size) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) + + def train_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + return output + + def val_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.val_step(*inputs[0], **kwargs[0]) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py new file mode 100644 index 000000000000..4d31bc1d08e6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch.nn.parallel import DataParallel, DistributedDataParallel + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +MODULE_WRAPPERS = Registry('module wrapper') +MODULE_WRAPPERS.register_module(module=DataParallel) +MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py new file mode 100644 index 000000000000..900ff88566f8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter +from .data_container import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + if target_gpus != [-1]: + return OrigScatter.apply(target_gpus, None, dim, obj) + else: + # for CPU inference we use self-implemented scatter + return Scatter.forward(target_gpus, obj) + if isinstance(obj, DataContainer): + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary.""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py new file mode 100644 index 000000000000..0f5712cb42c3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .registry import MODULE_WRAPPERS + + +def is_module_wrapper(module): + """Check if a module is a module wrapper. + + The following 3 modules in MMCV (and their subclasses) are regarded as + module wrappers: DataParallel, DistributedDataParallel, + MMDistributedDataParallel (the deprecated version). You may add you own + module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: True if the input module is a module wrapper. + """ + module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) + return isinstance(module, module_wrappers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py new file mode 100644 index 000000000000..52e4b48d383a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_module import BaseModule, ModuleList, Sequential +from .base_runner import BaseRunner +from .builder import RUNNERS, build_runner +from .checkpoint import (CheckpointLoader, _load_checkpoint, + _load_checkpoint_with_prefix, load_checkpoint, + load_state_dict, save_checkpoint, weights_to_cpu) +from .default_constructor import DefaultRunnerConstructor +from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, + init_dist, master_only) +from .epoch_based_runner import EpochBasedRunner, Runner +from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model +from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook, + DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook, + Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, Hook, IterTimerHook, + LoggerHook, LrUpdaterHook, MlflowLoggerHook, + NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, + SyncBuffersHook, TensorboardLoggerHook, TextLoggerHook, + WandbLoggerHook) +from .iter_based_runner import IterBasedRunner, IterLoader +from .log_buffer import LogBuffer +from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, + DefaultOptimizerConstructor, build_optimizer, + build_optimizer_constructor) +from .priority import Priority, get_priority +from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed + +__all__ = [ + 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook', + 'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict', + 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority', + 'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict', + 'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', + 'build_optimizer_constructor', 'IterLoader', 'set_random_seed', + 'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook', + 'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads', + 'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', + '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential', + 'ModuleList', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py new file mode 100644 index 000000000000..b67c1f1bb08e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py @@ -0,0 +1,195 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings +from abc import ABCMeta +from collections import defaultdict +from logging import FileHandler + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.dist_utils import master_only +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.logging import get_logger, logger_initialized, print_log + + +class BaseModule(nn.Module, metaclass=ABCMeta): + """Base module for all modules in openmmlab. + + ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional + functionality of parameter initialization. Compared with + ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. + + - ``init_cfg``: the config to control the initialization. + - ``init_weights``: The function of parameter + initialization and recording initialization + information. + - ``_params_init_info``: Used to track the parameter + initialization information. This attribute only + exists during executing the ``init_weights``. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, init_cfg=None): + """Initialize BaseModule, inherited from `torch.nn.Module`""" + + # NOTE init_cfg can be defined in different levels, but init_cfg + # in low levels has a higher priority. + + super(BaseModule, self).__init__() + # define default value of init_cfg instead of hard code + # in init_weights() function + self._is_init = False + + self.init_cfg = copy.deepcopy(init_cfg) + + # Backward compatibility in derived classes + # if pretrained is not None: + # warnings.warn('DeprecationWarning: pretrained is a deprecated \ + # key, please consider using init_cfg') + # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + + @property + def is_init(self): + return self._is_init + + def init_weights(self): + """Initialize the weights.""" + + is_top_level_module = False + # check if it is top-level module + if not hasattr(self, '_params_init_info'): + # The `_params_init_info` is used to record the initialization + # information of the parameters + # the key should be the obj:`nn.Parameter` of model and the value + # should be a dict containing + # - init_info (str): The string that describes the initialization. + # - tmp_mean_value (FloatTensor): The mean of the parameter, + # which indicates whether the parameter has been modified. + # this attribute would be deleted after all parameters + # is initialized. + self._params_init_info = defaultdict(dict) + is_top_level_module = True + + # Initialize the `_params_init_info`, + # When detecting the `tmp_mean_value` of + # the corresponding parameter is changed, update related + # initialization information + for name, param in self.named_parameters(): + self._params_init_info[param][ + 'init_info'] = f'The value is the same before and ' \ + f'after calling `init_weights` ' \ + f'of {self.__class__.__name__} ' + self._params_init_info[param][ + 'tmp_mean_value'] = param.data.mean() + + # pass `params_init_info` to all submodules + # All submodules share the same `params_init_info`, + # so it will be updated when parameters are + # modified at any level of the model. + for sub_module in self.modules(): + sub_module._params_init_info = self._params_init_info + + # Get the initialized logger, if not exist, + # create a logger named `mmcv` + logger_names = list(logger_initialized.keys()) + logger_name = logger_names[0] if logger_names else 'mmcv' + + from ..cnn import initialize + from ..cnn.utils.weight_init import update_init_info + module_name = self.__class__.__name__ + if not self._is_init: + if self.init_cfg: + print_log( + f'initialize {module_name} with init_cfg {self.init_cfg}', + logger=logger_name) + initialize(self, self.init_cfg) + if isinstance(self.init_cfg, dict): + # prevent the parameters of + # the pre-trained model + # from being overwritten by + # the `init_weights` + if self.init_cfg['type'] == 'Pretrained': + return + + for m in self.children(): + if hasattr(m, 'init_weights'): + m.init_weights() + # users may overload the `init_weights` + update_init_info( + m, + init_info=f'Initialized by ' + f'user-defined `init_weights`' + f' in {m.__class__.__name__} ') + + self._is_init = True + else: + warnings.warn(f'init_weights of {self.__class__.__name__} has ' + f'been called more than once.') + + if is_top_level_module: + self._dump_init_info(logger_name) + + for sub_module in self.modules(): + del sub_module._params_init_info + + @master_only + def _dump_init_info(self, logger_name): + """Dump the initialization information to a file named + `initialization.log.json` in workdir. + + Args: + logger_name (str): The name of logger. + """ + + logger = get_logger(logger_name) + + with_file_handler = False + # dump the information to the logger file if there is a `FileHandler` + for handler in logger.handlers: + if isinstance(handler, FileHandler): + handler.stream.write( + 'Name of parameter - Initialization information\n') + for name, param in self.named_parameters(): + handler.stream.write( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n") + handler.stream.flush() + with_file_handler = True + if not with_file_handler: + for name, param in self.named_parameters(): + print_log( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n ", + logger=logger_name) + + def __repr__(self): + s = super().__repr__() + if self.init_cfg: + s += f'\ninit_cfg={self.init_cfg}' + return s + + +class Sequential(BaseModule, nn.Sequential): + """Sequential module in openmmlab. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, *args, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.Sequential.__init__(self, *args) + + +class ModuleList(BaseModule, nn.ModuleList): + """ModuleList in openmmlab. + + Args: + modules (iterable, optional): an iterable of modules to add. + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, modules=None, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.ModuleList.__init__(self, modules) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py new file mode 100644 index 000000000000..a8671e7ae844 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py @@ -0,0 +1,542 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook +from .log_buffer import LogBuffer +from .priority import Priority, get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None): + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' + f'but got {type(batch_processor)}') + warnings.warn('batch_processor is deprecated, please implement ' + 'train_step() and val_step() in the model instead.') + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' + 'cannot be both available.') + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}') + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' + f'or dict or None, but got {type(optimizer)}') + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' + f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError( + f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError( + 'Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, + out_dir, + filename_tmpl, + save_optimizer=True, + meta=None, + create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError( + 'lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError( + 'momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def get_hook_info(self): + # Get hooks info in each stage + stage_hook_map = {stage: [] for stage in Hook.stages} + for hook in self.hooks: + try: + priority = Priority(hook.priority).name + except ValueError: + priority = hook.priority + classname = hook.__class__.__name__ + hook_info = f'({priority:<12}) {classname:<35}' + for trigger_stage in hook.get_triggered_stages(): + stage_hook_map[trigger_stage].append(hook_info) + + stage_hook_infos = [] + for stage in Hook.stages: + hook_infos = stage_hook_map[stage] + if len(hook_infos) > 0: + info = f'{stage}:\n' + info += '\n'.join(hook_infos) + info += '\n -------------------- ' + stage_hook_infos.append(info) + return '\n'.join(stage_hook_infos) + + def load_checkpoint(self, + filename, + map_location='cpu', + strict=False, + revise_keys=[(r'^module.', '')]): + return load_checkpoint( + self.model, + filename, + map_location, + strict, + self.logger, + revise_keys=revise_keys) + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if self.meta is None: + self.meta = {} + self.meta.setdefault('hook_msgs', {}) + # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages + self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) + + # Re-calculate the number of iterations when resuming + # models with different number of GPUs + if 'config' in checkpoint['meta']: + config = mmcv.Config.fromstring( + checkpoint['meta']['config'], file_format='.py') + previous_gpu_ids = config.get('gpu_ids', None) + if previous_gpu_ids and len(previous_gpu_ids) > 0 and len( + previous_gpu_ids) != self.world_size: + self._iter = int(self._iter * len(previous_gpu_ids) / + self.world_size) + self.logger.info('the iteration number is changed due to ' + 'change of GPU number') + + # resume meta information meta + self.meta = checkpoint['meta'] + + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if lr_config is None: + return + elif isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook, priority='VERY_HIGH') + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook, priority='HIGH') + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook, priority='ABOVE_NORMAL') + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook, priority='NORMAL') + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg( + info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_timer_hook(self, timer_config): + if timer_config is None: + return + if isinstance(timer_config, dict): + timer_config_ = copy.deepcopy(timer_config) + hook = mmcv.build_from_cfg(timer_config_, HOOKS) + else: + hook = timer_config + self.register_hook(hook, priority='LOW') + + def register_custom_hooks(self, custom_config): + if custom_config is None: + return + + if not isinstance(custom_config, list): + custom_config = [custom_config] + + for item in custom_config: + if isinstance(item, dict): + self.register_hook_from_cfg(item) + else: + self.register_hook(item, priority='NORMAL') + + def register_profiler_hook(self, profiler_config): + if profiler_config is None: + return + if isinstance(profiler_config, dict): + profiler_config.setdefault('type', 'ProfilerHook') + hook = mmcv.build_from_cfg(profiler_config, HOOKS) + else: + hook = profiler_config + self.register_hook(hook) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + timer_config=dict(type='IterTimerHook'), + custom_hooks_config=None): + """Register default and custom hooks for training. + + Default and custom hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_timer_hook(timer_config) + self.register_logger_hooks(log_config) + self.register_custom_hooks(custom_hooks_config) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py new file mode 100644 index 000000000000..77c96ba0b2f3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +from ..utils import Registry + +RUNNERS = Registry('runner') +RUNNER_BUILDERS = Registry('runner builder') + + +def build_runner_constructor(cfg): + return RUNNER_BUILDERS.build(cfg) + + +def build_runner(cfg, default_args=None): + runner_cfg = copy.deepcopy(cfg) + constructor_type = runner_cfg.pop('constructor', + 'DefaultRunnerConstructor') + runner_constructor = build_runner_constructor( + dict( + type=constructor_type, + runner_cfg=runner_cfg, + default_args=default_args)) + runner = runner_constructor() + return runner diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py new file mode 100644 index 000000000000..28621e8a2863 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py @@ -0,0 +1,707 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import re +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ..fileio import FileClient +from ..fileio import load as load_file +from ..parallel import is_module_wrapper +from ..utils import mkdir_or_exist +from .dist_utils import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +class CheckpointLoader: + """A general checkpoint loader to manage all schemes.""" + + _schemes = {} + + @classmethod + def _register_scheme(cls, prefixes, loader, force=False): + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if (prefix not in cls._schemes) or force: + cls._schemes[prefix] = loader + else: + raise KeyError( + f'{prefix} is already registered as a loader backend, ' + 'add "force=True" if you want to override it') + # sort, longer prefixes take priority + cls._schemes = OrderedDict( + sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) + + @classmethod + def register_scheme(cls, prefixes, loader=None, force=False): + """Register a loader to CheckpointLoader. + + This method can be used as a normal class method or a decorator. + + Args: + prefixes (str or list[str] or tuple[str]): + The prefix of the registered loader. + loader (function, optional): The loader function to be registered. + When this method is used as a decorator, loader is None. + Defaults to None. + force (bool, optional): Whether to override the loader + if the prefix has already been registered. Defaults to False. + """ + + if loader is not None: + cls._register_scheme(prefixes, loader, force=force) + return + + def _register(loader_cls): + cls._register_scheme(prefixes, loader_cls, force=force) + return loader_cls + + return _register + + @classmethod + def _get_checkpoint_loader(cls, path): + """Finds a loader that supports the given path. Falls back to the local + loader if no other loader is found. + + Args: + path (str): checkpoint path + + Returns: + loader (function): checkpoint loader + """ + + for p in cls._schemes: + if path.startswith(p): + return cls._schemes[p] + + @classmethod + def load_checkpoint(cls, filename, map_location=None, logger=None): + """load checkpoint through URL scheme path. + + Args: + filename (str): checkpoint file name with given prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + logger (:mod:`logging.Logger`, optional): The logger for message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint_loader = cls._get_checkpoint_loader(filename) + class_name = checkpoint_loader.__name__ + mmcv.print_log( + f'load checkpoint from {class_name[10:]} path: {filename}', logger) + return checkpoint_loader(filename, map_location) + + +@CheckpointLoader.register_scheme(prefixes='') +def load_from_local(filename, map_location): + """load checkpoint by local file path. + + Args: + filename (str): local checkpoint file path + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) +def load_from_http(filename, map_location=None, model_dir=None): + """load checkpoint through HTTP or HTTPS scheme path. In distributed + setting, this function only download checkpoint at local rank 0. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + model_dir (string, optional): directory in which to save the object, + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='pavi://') +def load_from_pavi(filename, map_location=None): + """load checkpoint through the file path prefixed with pavi. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with pavi prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + assert filename.startswith('pavi://'), \ + f'Expected filename startswith `pavi://`, but get {filename}' + model_path = filename[7:] + + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='s3://') +def load_from_ceph(filename, map_location=None, backend='petrel'): + """load checkpoint through the file path prefixed with s3. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with s3 prefix + map_location (str, optional): Same as :func:`torch.load`. + backend (str, optional): The storage backend type. Options are 'ceph', + 'petrel'. Default: 'petrel'. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + allowed_backends = ['ceph', 'petrel'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + + if backend == 'ceph': + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + + # CephClient and PetrelBackend have the same prefix 's3://' and the latter + # will be chosen as default. If PetrelBackend can not be instantiated + # successfully, the CephClient will be chosen. + try: + file_client = FileClient(backend=backend) + except ImportError: + allowed_backends.remove(backend) + file_client = FileClient(backend=allowed_backends[0]) + + with io.BytesIO(file_client.get(filename)) as buffer: + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) +def load_from_torchvision(filename, map_location=None): + """load checkpoint through the file path prefixed with modelzoo or + torchvision. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + model_urls = get_torchvision_models() + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_name = filename[11:] + else: + model_name = filename[14:] + return load_from_http(model_urls[model_name], map_location=map_location) + + +@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) +def load_from_openmmlab(filename, map_location=None): + """load checkpoint through the file path prefixed with open-mmlab or + openmmlab. + + Args: + filename (str): checkpoint file path with open-mmlab or + openmmlab prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_external_models() + prefix_str = 'open-mmlab://' + if filename.startswith(prefix_str): + model_name = filename[13:] + else: + model_name = filename[12:] + prefix_str = 'openmmlab://' + + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'{prefix_str}{model_name} is deprecated in favor ' + f'of {prefix_str}{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_from_http(model_url, map_location=map_location) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='mmcls://') +def load_from_mmcls(filename, map_location=None): + """load checkpoint through the file path prefixed with mmcls. + + Args: + filename (str): checkpoint file path with mmcls prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_from_http( + model_urls[model_name], map_location=map_location) + checkpoint = _process_mmcls_checkpoint(checkpoint) + return checkpoint + + +def _load_checkpoint(filename, map_location=None, logger=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str, optional): Same as :func:`torch.load`. + Default: None. + logger (:mod:`logging.Logger`, optional): The logger for error message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + return CheckpointLoader.load_checkpoint(filename, map_location, logger) + + +def _load_checkpoint_with_prefix(prefix, filename, map_location=None): + """Load partial pretrained model with specific prefix. + + Args: + prefix (str): The prefix of sub-module. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint = _load_checkpoint(filename, map_location=map_location) + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + if not prefix.endswith('.'): + prefix += '.' + prefix_len = len(prefix) + + state_dict = { + k[prefix_len:]: v + for k, v in state_dict.items() if k.startswith(prefix) + } + + assert state_dict, f'{prefix} is not in the pretrained model' + return state_dict + + +def load_checkpoint(model, + filename, + map_location=None, + strict=False, + logger=None, + revise_keys=[(r'^module\.', '')]): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + revise_keys (list): A list of customized keywords to modify the + state_dict in checkpoint. Each item is a (pattern, replacement) + pair of the regular expression operations. Default: strip + the prefix 'module.' by [(r'^module\\.', '')]. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location, logger) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + # strip prefix of state_dict + metadata = getattr(state_dict, '_metadata', OrderedDict()) + for p, r in revise_keys: + state_dict = OrderedDict( + {re.sub(p, r, k): v + for k, v in state_dict.items()}) + # Keep metadata in state_dict + state_dict._metadata = metadata + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + # Keep metadata in state_dict + state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, + filename, + optimizer=None, + meta=None, + file_client_args=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + if file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" if filename starts with' + f'"pavi://", but got {file_client_args}') + try: + from pavi import modelcloud + from pavi import exception + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except exception.NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + file_client = FileClient.infer_client(file_client_args, filename) + with io.BytesIO() as f: + torch.save(checkpoint, f) + file_client.put(f.getvalue(), filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py new file mode 100644 index 000000000000..7db4c3922229 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py @@ -0,0 +1,44 @@ +from .builder import RUNNER_BUILDERS, RUNNERS + + +@RUNNER_BUILDERS.register_module() +class DefaultRunnerConstructor: + """Default constructor for runners. + + Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. + For example, We can inject some new properties and functions for `Runner`. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import RUNNER_BUILDERS, build_runner + >>> # Define a new RunnerReconstructor + >>> @RUNNER_BUILDERS.register_module() + >>> class MyRunnerConstructor: + ... def __init__(self, runner_cfg, default_args=None): + ... if not isinstance(runner_cfg, dict): + ... raise TypeError('runner_cfg should be a dict', + ... f'but got {type(runner_cfg)}') + ... self.runner_cfg = runner_cfg + ... self.default_args = default_args + ... + ... def __call__(self): + ... runner = RUNNERS.build(self.runner_cfg, + ... default_args=self.default_args) + ... # Add new properties for existing runner + ... runner.my_name = 'my_runner' + ... runner.my_function = lambda self: print(self.my_name) + ... ... + >>> # build your runner + >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, + ... constructor='MyRunnerConstructor') + >>> runner = build_runner(runner_cfg) + """ + + def __init__(self, runner_cfg, default_args=None): + if not isinstance(runner_cfg, dict): + raise TypeError('runner_cfg should be a dict', + f'but got {type(runner_cfg)}') + self.runner_cfg = runner_cfg + self.default_args = default_args + + def __call__(self): + return RUNNERS.build(self.runner_cfg, default_args=self.default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py new file mode 100644 index 000000000000..d3a1ef3fda5c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py @@ -0,0 +1,164 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py new file mode 100644 index 000000000000..078e159d29fc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py @@ -0,0 +1,187 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor( + self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, + **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' + 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self._iter += 1 + + self.call_hook('after_train_epoch') + self._epoch += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' + 'please set max_epochs in runner_config', DeprecationWarning) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, ( + 'max_epochs must be specified during instantiation') + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', + self.get_hook_info()) + self.logger.info('workflow: %s, max: %d epochs', workflow, + self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError( + f'runner has no method named "{mode}" to run an ' + 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError( + 'mode in workflow must be a str, but got {}'.format( + type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint(self, + out_dir, + filename_tmpl='epoch_{}.pth', + save_optimizer=True, + meta=None, + create_symlink=True): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn( + 'Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py new file mode 100644 index 000000000000..e5067eebb7e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py @@ -0,0 +1,410 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .dist_utils import allreduce_grads as _allreduce_grads + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 + # manually, so the behavior may not be consistent with real amp. + from torch.cuda.amp import autocast +except ImportError: + pass + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, nn.Module): + return inputs + elif isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({ + k: cast_tensor_type(v, src_type, dst_type) + for k, v in inputs.items() + }) + elif isinstance(inputs, abc.Iterable): + return type(inputs)( + cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=True): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. If you are using PyTorch >= 1.6, + torch.cuda.amp is used as the backend, otherwise, original mmcv + implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=False): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + For PyTorch >= 1.6, this function will + 1. Set fp16 flag inside the model to True. + + Otherwise: + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + 3. Set `fp16_enabled` flag inside the model to True. + + Args: + model (nn.Module): Model in FP32. + """ + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.6.0')): + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): + module.float() + if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, + torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), + **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, + init_scale=2**32, + mode='dynamic', + scale_factor=2., + scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', + 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') \ + or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % \ + self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + def state_dict(self): + """Returns the state of the scaler as a :class:`dict`.""" + return dict( + cur_scale=self.cur_scale, + cur_iter=self.cur_iter, + mode=self.mode, + last_overflow_iter=self.last_overflow_iter, + scale_factor=self.scale_factor, + scale_window=self.scale_window) + + def load_state_dict(self, state_dict): + """Loads the loss_scaler state dict. + + Args: + state_dict (dict): scaler state. + """ + self.cur_scale = state_dict['cur_scale'] + self.cur_iter = state_dict['cur_iter'] + self.mode = state_dict['mode'] + self.last_overflow_iter = state_dict['last_overflow_iter'] + self.scale_factor = state_dict['scale_factor'] + self.scale_window = state_dict['scale_window'] + + @property + def loss_scale(self): + return self.cur_scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py new file mode 100644 index 000000000000..915af28cefab --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .checkpoint import CheckpointHook +from .closure import ClosureHook +from .ema import EMAHook +from .evaluation import DistEvalHook, EvalHook +from .hook import HOOKS, Hook +from .iter_timer import IterTimerHook +from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, + NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, + TextLoggerHook, WandbLoggerHook) +from .lr_updater import LrUpdaterHook +from .memory import EmptyCacheHook +from .momentum_updater import MomentumUpdaterHook +from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, OptimizerHook) +from .profiler import ProfilerHook +from .sampler_seed import DistSamplerSeedHook +from .sync_buffer import SyncBuffersHook + +__all__ = [ + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', + 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', + 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', + 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py new file mode 100644 index 000000000000..400e589a9de9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from ..dist_utils import allreduce_params, master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + interval (int): The saving period. If ``by_epoch=True``, interval + indicates epochs, otherwise it indicates iterations. + Default: -1, which means "never". + by_epoch (bool): Saving checkpoints by epoch or by iteration. + Default: True. + save_optimizer (bool): Whether to save optimizer state_dict in the + checkpoint. It is usually used for resuming experiments. + Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, ``runner.work_dir`` will be used by default. If + specified, the ``out_dir`` will be the concatenation of ``out_dir`` + and the last level directory of ``runner.work_dir``. + `Changed in version 1.3.16.` + max_keep_ckpts (int, optional): The maximum checkpoints to keep. + In some cases we want only the latest few checkpoints and would + like to delete old ones to save the disk space. + Default: -1, which means unlimited. + save_last (bool, optional): Whether to force the last checkpoint to be + saved regardless of interval. Default: True. + sync_buffer (bool, optional): Whether to synchronize buffers in + different gpus. Default: False. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + + .. warning:: + Before v1.3.16, the ``out_dir`` argument indicates the path where the + checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the + root directory and the final path to save checkpoint is the + concatenation of ``out_dir`` and the last level directory of + ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" + and the value of ``runner.work_dir`` is "/path/of/B", then the final + path will be "/path/of/A/B". + """ + + def __init__(self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + save_last=True, + sync_buffer=False, + file_client_args=None, + **kwargs): + self.interval = interval + self.by_epoch = by_epoch + self.save_optimizer = save_optimizer + self.out_dir = out_dir + self.max_keep_ckpts = max_keep_ckpts + self.save_last = save_last + self.args = kwargs + self.sync_buffer = sync_buffer + self.file_client_args = file_client_args + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + + runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' + f'{self.file_client.name}.')) + + # disable the create_symlink option because some file backends do not + # allow to create a symlink + if 'create_symlink' in self.args: + if self.args[ + 'create_symlink'] and not self.file_client.allow_symlink: + self.args['create_symlink'] = False + warnings.warn( + ('create_symlink is set as True by the user but is changed' + 'to be False because creating symbolic link is not ' + f'allowed in {self.file_client.name}')) + else: + self.args['create_symlink'] = self.file_client.allow_symlink + + def after_train_epoch(self, runner): + if not self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` epochs + # 2. reach the last epoch of training + if self.every_n_epochs( + runner, self.interval) or (self.save_last + and self.is_last_epoch(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) + + @master_only + def _save_checkpoint(self, runner): + """Save the current checkpoint and delete unwanted checkpoint.""" + runner.save_checkpoint( + self.out_dir, save_optimizer=self.save_optimizer, **self.args) + if runner.meta is not None: + if self.by_epoch: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + else: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + runner.meta.setdefault('hook_msgs', dict()) + runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path( + self.out_dir, cur_ckpt_filename) + # remove other checkpoints + if self.max_keep_ckpts > 0: + if self.by_epoch: + name = 'epoch_{}.pth' + current_ckpt = runner.epoch + 1 + else: + name = 'iter_{}.pth' + current_ckpt = runner.iter + 1 + redundant_ckpts = range( + current_ckpt - self.max_keep_ckpts * self.interval, 0, + -self.interval) + filename_tmpl = self.args.get('filename_tmpl', name) + for _step in redundant_ckpts: + ckpt_path = self.file_client.join_path( + self.out_dir, filename_tmpl.format(_step)) + if self.file_client.isfile(ckpt_path): + self.file_client.remove(ckpt_path) + else: + break + + def after_train_iter(self, runner): + if self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` iterations + # 2. reach the last iteration of training + if self.every_n_iters( + runner, self.interval) or (self.save_last + and self.is_last_iter(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.iter + 1} iterations') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py new file mode 100644 index 000000000000..b955f81f425b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ClosureHook(Hook): + + def __init__(self, fn_name, fn): + assert hasattr(self, fn_name) + assert callable(fn) + setattr(self, fn_name, fn) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py new file mode 100644 index 000000000000..15c7e68088f0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...parallel import is_module_wrapper +from ..hooks.hook import HOOKS, Hook + + +@HOOKS.register_module() +class EMAHook(Hook): + r"""Exponential Moving Average Hook. + + Use Exponential Moving Average on all parameters of model in training + process. All parameters have a ema backup, which update by the formula + as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. + + .. math:: + + \text{Xema\_{t+1}} = (1 - \text{momentum}) \times + \text{Xema\_{t}} + \text{momentum} \times X_t + + Args: + momentum (float): The momentum used for updating ema parameter. + Defaults to 0.0002. + interval (int): Update ema parameter every interval iteration. + Defaults to 1. + warm_up (int): During first warm_up steps, we may use smaller momentum + to update ema parameters more slowly. Defaults to 100. + resume_from (str): The checkpoint path. Defaults to None. + """ + + def __init__(self, + momentum=0.0002, + interval=1, + warm_up=100, + resume_from=None): + assert isinstance(interval, int) and interval > 0 + self.warm_up = warm_up + self.interval = interval + assert momentum > 0 and momentum < 1 + self.momentum = momentum**interval + self.checkpoint = resume_from + + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + model = runner.model + if is_module_wrapper(model): + model = model.module + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + + def after_train_iter(self, runner): + """Update ema parameter every self.interval iterations.""" + curr_step = runner.iter + # We warm up the momentum considering the instability at beginning + momentum = min(self.momentum, + (1 + curr_step) / (self.warm_up + curr_step)) + if curr_step % self.interval != 0: + return + for name, parameter in self.model_parameters.items(): + buffer_name = self.param_ema_buffer[name] + buffer_parameter = self.model_buffers[buffer_name] + buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) + + def after_train_epoch(self, runner): + """We load parameter values from ema backup to model before the + EvalHook.""" + self._swap_ema_parameters() + + def before_train_epoch(self, runner): + """We recover model's parameter from ema backup after last epoch's + EvalHook.""" + self._swap_ema_parameters() + + def _swap_ema_parameters(self): + """Swap the parameter of model with parameter in ema_buffer.""" + for name, value in self.model_parameters.items(): + temp = value.data.clone() + ema_buffer = self.model_buffers[self.param_ema_buffer[name]] + value.data.copy_(ema_buffer.data) + ema_buffer.data.copy_(temp) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py new file mode 100644 index 000000000000..5e5ba5b2e5d2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py @@ -0,0 +1,509 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from math import inf + +import torch.distributed as dist +from torch.nn.modules.batchnorm import _BatchNorm +from torch.utils.data import DataLoader + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_seq_of +from .hook import Hook +from .logger import LoggerHook + + +class EvalHook(Hook): + """Non-Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in non-distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader, and return the test results. If ``None``, the default + test function ``mmcv.engine.single_gpu_test`` will be used. + (default: ``None``) + greater_keys (List[str] | None, optional): Metric keys that will be + inferred by 'greater' comparison rule. If ``None``, + _default_greater_keys will be used. (default: ``None``) + less_keys (List[str] | None, optional): Metric keys that will be + inferred by 'less' comparison rule. If ``None``, _default_less_keys + will be used. (default: ``None``) + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + `New in version 1.3.16.` + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + `New in version 1.3.16.` + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + + Notes: + If new arguments are added for EvalHook, tools/test.py, + tools/eval_metric.py may be affected. + """ + + # Since the key for determine greater or less is related to the downstream + # tasks, downstream repos may need to overwrite the following inner + # variable accordingly. + + rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} + init_value_map = {'greater': -inf, 'less': inf} + _default_greater_keys = [ + 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', + 'mAcc', 'aAcc' + ] + _default_less_keys = ['loss'] + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + out_dir=None, + file_client_args=None, + **eval_kwargs): + if not isinstance(dataloader, DataLoader): + raise TypeError(f'dataloader must be a pytorch DataLoader, ' + f'but got {type(dataloader)}') + + if interval <= 0: + raise ValueError(f'interval must be a positive number, ' + f'but got {interval}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' + + if start is not None and start < 0: + raise ValueError(f'The evaluation start epoch {start} is smaller ' + f'than 0') + + self.dataloader = dataloader + self.interval = interval + self.start = start + self.by_epoch = by_epoch + + assert isinstance(save_best, str) or save_best is None, \ + '""save_best"" should be a str or None ' \ + f'rather than {type(save_best)}' + self.save_best = save_best + self.eval_kwargs = eval_kwargs + self.initial_flag = True + + if test_fn is None: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import single_gpu_test + self.test_fn = single_gpu_test + else: + self.test_fn = test_fn + + if greater_keys is None: + self.greater_keys = self._default_greater_keys + else: + if not isinstance(greater_keys, (list, tuple)): + greater_keys = (greater_keys, ) + assert is_seq_of(greater_keys, str) + self.greater_keys = greater_keys + + if less_keys is None: + self.less_keys = self._default_less_keys + else: + if not isinstance(less_keys, (list, tuple)): + less_keys = (less_keys, ) + assert is_seq_of(less_keys, str) + self.less_keys = less_keys + + if self.save_best is not None: + self.best_ckpt_path = None + self._init_rule(rule, self.save_best) + + self.out_dir = out_dir + self.file_client_args = file_client_args + + def _init_rule(self, rule, key_indicator): + """Initialize rule, key_indicator, comparison_func, and best score. + + Here is the rule to determine which rule is used for key indicator + when the rule is not specific (note that the key indicator matching + is case-insensitive): + 1. If the key indicator is in ``self.greater_keys``, the rule will be + specified as 'greater'. + 2. Or if the key indicator is in ``self.less_keys``, the rule will be + specified as 'less'. + 3. Or if the key indicator is equal to the substring in any one item + in ``self.greater_keys``, the rule will be specified as 'greater'. + 4. Or if the key indicator is equal to the substring in any one item + in ``self.less_keys``, the rule will be specified as 'less'. + + Args: + rule (str | None): Comparison rule for best score. + key_indicator (str | None): Key indicator to determine the + comparison rule. + """ + if rule not in self.rule_map and rule is not None: + raise KeyError(f'rule must be greater, less or None, ' + f'but got {rule}.') + + if rule is None: + if key_indicator != 'auto': + # `_lc` here means we use the lower case of keys for + # case-insensitive matching + key_indicator_lc = key_indicator.lower() + greater_keys = [key.lower() for key in self.greater_keys] + less_keys = [key.lower() for key in self.less_keys] + + if key_indicator_lc in greater_keys: + rule = 'greater' + elif key_indicator_lc in less_keys: + rule = 'less' + elif any(key in key_indicator_lc for key in greater_keys): + rule = 'greater' + elif any(key in key_indicator_lc for key in less_keys): + rule = 'less' + else: + raise ValueError(f'Cannot infer the rule for key ' + f'{key_indicator}, thus a specific rule ' + f'must be specified.') + self.rule = rule + self.key_indicator = key_indicator + if self.rule is not None: + self.compare_func = self.rule_map[self.rule] + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'The best checkpoint will be saved to {self.out_dir} by ' + f'{self.file_client.name}')) + + if self.save_best is not None: + if runner.meta is None: + warnings.warn('runner.meta is None. Creating an empty one.') + runner.meta = dict() + runner.meta.setdefault('hook_msgs', dict()) + self.best_ckpt_path = runner.meta['hook_msgs'].get( + 'best_ckpt', None) + + def before_train_iter(self, runner): + """Evaluate the model only at the start of training by iteration.""" + if self.by_epoch or not self.initial_flag: + return + if self.start is not None and runner.iter >= self.start: + self.after_train_iter(runner) + self.initial_flag = False + + def before_train_epoch(self, runner): + """Evaluate the model only at the start of training by epoch.""" + if not (self.by_epoch and self.initial_flag): + return + if self.start is not None and runner.epoch >= self.start: + self.after_train_epoch(runner) + self.initial_flag = False + + def after_train_iter(self, runner): + """Called after every training iter to evaluate the results.""" + if not self.by_epoch and self._should_evaluate(runner): + # Because the priority of EvalHook is higher than LoggerHook, the + # training log and the evaluating log are mixed. Therefore, + # we need to dump the training log and clear it before evaluating + # log is generated. In addition, this problem will only appear in + # `IterBasedRunner` whose `self.by_epoch` is False, because + # `EpochBasedRunner` whose `self.by_epoch` is True calls + # `_do_evaluate` in `after_train_epoch` stage, and at this stage + # the training log has been printed, so it will not cause any + # problem. more details at + # https://github.com/open-mmlab/mmsegmentation/issues/694 + for hook in runner._hooks: + if isinstance(hook, LoggerHook): + hook.after_train_iter(runner) + runner.log_buffer.clear() + + self._do_evaluate(runner) + + def after_train_epoch(self, runner): + """Called after every training epoch to evaluate the results.""" + if self.by_epoch and self._should_evaluate(runner): + self._do_evaluate(runner) + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + results = self.test_fn(runner.model, self.dataloader) + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to save + # the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) + + def _should_evaluate(self, runner): + """Judge whether to perform evaluation. + + Here is the rule to judge whether to perform evaluation: + 1. It will not perform evaluation during the epoch/iteration interval, + which is determined by ``self.interval``. + 2. It will not perform evaluation if the start time is larger than + current time. + 3. It will not perform evaluation when current time is larger than + the start time but during epoch/iteration interval. + + Returns: + bool: The flag indicating whether to perform evaluation. + """ + if self.by_epoch: + current = runner.epoch + check_time = self.every_n_epochs + else: + current = runner.iter + check_time = self.every_n_iters + + if self.start is None: + if not check_time(runner, self.interval): + # No evaluation during the interval. + return False + elif (current + 1) < self.start: + # No evaluation if start is larger than the current time. + return False + else: + # Evaluation only at epochs/iters 3, 5, 7... + # if start==3 and interval==2 + if (current + 1 - self.start) % self.interval: + return False + return True + + def _save_ckpt(self, runner, key_score): + """Save the best checkpoint. + + It will compare the score according to the compare function, write + related information (best score, best checkpoint path) and save the + best checkpoint into ``work_dir``. + """ + if self.by_epoch: + current = f'epoch_{runner.epoch + 1}' + cur_type, cur_time = 'epoch', runner.epoch + 1 + else: + current = f'iter_{runner.iter + 1}' + cur_type, cur_time = 'iter', runner.iter + 1 + + best_score = runner.meta['hook_msgs'].get( + 'best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + best_score = key_score + runner.meta['hook_msgs']['best_score'] = best_score + + if self.best_ckpt_path and self.file_client.isfile( + self.best_ckpt_path): + self.file_client.remove(self.best_ckpt_path) + runner.logger.info( + (f'The previous best checkpoint {self.best_ckpt_path} was ' + 'removed')) + + best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' + self.best_ckpt_path = self.file_client.join_path( + self.out_dir, best_ckpt_name) + runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path + + runner.save_checkpoint( + self.out_dir, best_ckpt_name, create_symlink=False) + runner.logger.info( + f'Now best checkpoint is saved as {best_ckpt_name}.') + runner.logger.info( + f'Best {self.key_indicator} is {best_score:0.4f} ' + f'at {cur_time} {cur_type}.') + + def evaluate(self, runner, results): + """Evaluate the results. + + Args: + runner (:obj:`mmcv.Runner`): The underlined training runner. + results (list): Output results. + """ + eval_res = self.dataloader.dataset.evaluate( + results, logger=runner.logger, **self.eval_kwargs) + + for name, val in eval_res.items(): + runner.log_buffer.output[name] = val + runner.log_buffer.ready = True + + if self.save_best is not None: + # If the performance of model is pool, the `eval_res` may be an + # empty dict and it will raise exception when `self.save_best` is + # not None. More details at + # https://github.com/open-mmlab/mmdetection/issues/6265. + if not eval_res: + warnings.warn( + 'Since `eval_res` is an empty dict, the behavior to save ' + 'the best checkpoint will be skipped in this evaluation.') + return None + + if self.key_indicator == 'auto': + # infer from eval_results + self._init_rule(self.rule, list(eval_res.keys())[0]) + return eval_res[self.key_indicator] + + return None + + +class DistEvalHook(EvalHook): + """Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader in a multi-gpu manner, and return the test results. If + ``None``, the default test function ``mmcv.engine.multi_gpu_test`` + will be used. (default: ``None``) + tmpdir (str | None): Temporary directory to save the results of all + processes. Default: None. + gpu_collect (bool): Whether to use gpu or cpu to collect results. + Default: False. + broadcast_bn_buffer (bool): Whether to broadcast the + buffer(running_mean and running_var) of rank 0 to other rank + before evaluation. Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + """ + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + broadcast_bn_buffer=True, + tmpdir=None, + gpu_collect=False, + out_dir=None, + file_client_args=None, + **eval_kwargs): + + if test_fn is None: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import multi_gpu_test + test_fn = multi_gpu_test + + super().__init__( + dataloader, + start=start, + interval=interval, + by_epoch=by_epoch, + save_best=save_best, + rule=rule, + test_fn=test_fn, + greater_keys=greater_keys, + less_keys=less_keys, + out_dir=out_dir, + file_client_args=file_client_args, + **eval_kwargs) + + self.broadcast_bn_buffer = broadcast_bn_buffer + self.tmpdir = tmpdir + self.gpu_collect = gpu_collect + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + # Synchronization of BatchNorm's buffer (running_mean + # and running_var) is not supported in the DDP of pytorch, + # which may cause the inconsistent performance of models in + # different ranks, so we broadcast BatchNorm's buffers + # of rank 0 to other ranks to avoid this. + if self.broadcast_bn_buffer: + model = runner.model + for name, module in model.named_modules(): + if isinstance(module, + _BatchNorm) and module.track_running_stats: + dist.broadcast(module.running_var, 0) + dist.broadcast(module.running_mean, 0) + + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + + results = self.test_fn( + runner.model, + self.dataloader, + tmpdir=tmpdir, + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to + # save the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py new file mode 100644 index 000000000000..c58065a1ac50 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, is_method_overridden + +HOOKS = Registry('hook') + + +class Hook: + stages = ('before_run', 'before_train_epoch', 'before_train_iter', + 'after_train_iter', 'after_train_epoch', 'before_val_epoch', + 'before_val_iter', 'after_val_iter', 'after_val_epoch', + 'after_run') + + def before_run(self, runner): + pass + + def after_run(self, runner): + pass + + def before_epoch(self, runner): + pass + + def after_epoch(self, runner): + pass + + def before_iter(self, runner): + pass + + def after_iter(self, runner): + pass + + def before_train_epoch(self, runner): + self.before_epoch(runner) + + def before_val_epoch(self, runner): + self.before_epoch(runner) + + def after_train_epoch(self, runner): + self.after_epoch(runner) + + def after_val_epoch(self, runner): + self.after_epoch(runner) + + def before_train_iter(self, runner): + self.before_iter(runner) + + def before_val_iter(self, runner): + self.before_iter(runner) + + def after_train_iter(self, runner): + self.after_iter(runner) + + def after_val_iter(self, runner): + self.after_iter(runner) + + def every_n_epochs(self, runner, n): + return (runner.epoch + 1) % n == 0 if n > 0 else False + + def every_n_inner_iters(self, runner, n): + return (runner.inner_iter + 1) % n == 0 if n > 0 else False + + def every_n_iters(self, runner, n): + return (runner.iter + 1) % n == 0 if n > 0 else False + + def end_of_epoch(self, runner): + return runner.inner_iter + 1 == len(runner.data_loader) + + def is_last_epoch(self, runner): + return runner.epoch + 1 == runner._max_epochs + + def is_last_iter(self, runner): + return runner.iter + 1 == runner._max_iters + + def get_triggered_stages(self): + trigger_stages = set() + for stage in Hook.stages: + if is_method_overridden(stage, Hook, self): + trigger_stages.add(stage) + + # some methods will be triggered in multi stages + # use this dict to map method to stages. + method_stages_map = { + 'before_epoch': ['before_train_epoch', 'before_val_epoch'], + 'after_epoch': ['after_train_epoch', 'after_val_epoch'], + 'before_iter': ['before_train_iter', 'before_val_iter'], + 'after_iter': ['after_train_iter', 'after_val_iter'], + } + + for method, map_stages in method_stages_map.items(): + if is_method_overridden(method, Hook, self): + trigger_stages.update(map_stages) + + return [stage for stage in Hook.stages if stage in trigger_stages] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py new file mode 100644 index 000000000000..cfd5002fe85f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + + def before_epoch(self, runner): + self.t = time.time() + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + runner.log_buffer.update({'time': time.time() - self.t}) + self.t = time.time() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py new file mode 100644 index 000000000000..a0b6b345640a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import LoggerHook +from .dvclive import DvcliveLoggerHook +from .mlflow import MlflowLoggerHook +from .neptune import NeptuneLoggerHook +from .pavi import PaviLoggerHook +from .tensorboard import TensorboardLoggerHook +from .text import TextLoggerHook +from .wandb import WandbLoggerHook + +__all__ = [ + 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', + 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', + 'NeptuneLoggerHook', 'DvcliveLoggerHook' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py new file mode 100644 index 000000000000..f84525672945 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from abc import ABCMeta, abstractmethod + +import numpy as np +import torch + +from ..hook import Hook + + +class LoggerHook(Hook): + """Base class for logger hooks. + + Args: + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + by_epoch (bool): Whether EpochBasedRunner is used. + """ + + __metaclass__ = ABCMeta + + def __init__(self, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + self.interval = interval + self.ignore_last = ignore_last + self.reset_flag = reset_flag + self.by_epoch = by_epoch + + @abstractmethod + def log(self, runner): + pass + + @staticmethod + def is_scalar(val, include_np=True, include_torch=True): + """Tell the input variable is a scalar or not. + + Args: + val: Input variable. + include_np (bool): Whether include 0-d np.ndarray as a scalar. + include_torch (bool): Whether include 0-d torch.Tensor as a scalar. + + Returns: + bool: True or False. + """ + if isinstance(val, numbers.Number): + return True + elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: + return True + elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: + return True + else: + return False + + def get_mode(self, runner): + if runner.mode == 'train': + if 'time' in runner.log_buffer.output: + mode = 'train' + else: + mode = 'val' + elif runner.mode == 'val': + mode = 'val' + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return mode + + def get_epoch(self, runner): + if runner.mode == 'train': + epoch = runner.epoch + 1 + elif runner.mode == 'val': + # normal val mode + # runner.epoch += 1 has been done before val workflow + epoch = runner.epoch + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return epoch + + def get_iter(self, runner, inner_iter=False): + """Get the current training iteration step.""" + if self.by_epoch and inner_iter: + current_iter = runner.inner_iter + 1 + else: + current_iter = runner.iter + 1 + return current_iter + + def get_lr_tags(self, runner): + tags = {} + lrs = runner.current_lr() + if isinstance(lrs, dict): + for name, value in lrs.items(): + tags[f'learning_rate/{name}'] = value[0] + else: + tags['learning_rate'] = lrs[0] + return tags + + def get_momentum_tags(self, runner): + tags = {} + momentums = runner.current_momentum() + if isinstance(momentums, dict): + for name, value in momentums.items(): + tags[f'momentum/{name}'] = value[0] + else: + tags['momentum'] = momentums[0] + return tags + + def get_loggable_tags(self, + runner, + allow_scalar=True, + allow_text=False, + add_mode=True, + tags_to_skip=('time', 'data_time')): + tags = {} + for var, val in runner.log_buffer.output.items(): + if var in tags_to_skip: + continue + if self.is_scalar(val) and not allow_scalar: + continue + if isinstance(val, str) and not allow_text: + continue + if add_mode: + var = f'{self.get_mode(runner)}/{var}' + tags[var] = val + tags.update(self.get_lr_tags(runner)) + tags.update(self.get_momentum_tags(runner)) + return tags + + def before_run(self, runner): + for hook in runner.hooks[::-1]: + if isinstance(hook, LoggerHook): + hook.reset_flag = True + break + + def before_epoch(self, runner): + runner.log_buffer.clear() # clear logs of last epoch + + def after_train_iter(self, runner): + if self.by_epoch and self.every_n_inner_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif not self.by_epoch and self.every_n_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif self.end_of_epoch(runner) and not self.ignore_last: + # not precise but more stable + runner.log_buffer.average(self.interval) + + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_train_epoch(self, runner): + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_val_epoch(self, runner): + runner.log_buffer.average() + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py new file mode 100644 index 000000000000..687cdc58c033 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class DvcliveLoggerHook(LoggerHook): + """Class to log metrics with dvclive. + + It requires `dvclive`_ to be installed. + + Args: + path (str): Directory where dvclive will write TSV log files. + interval (int): Logging interval (every k iterations). + Default 10. + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + Default: True. + reset_flag (bool): Whether to clear the output buffer after logging. + Default: True. + by_epoch (bool): Whether EpochBasedRunner is used. + Default: True. + + .. _dvclive: + https://dvc.org/doc/dvclive + """ + + def __init__(self, + path, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + + super(DvcliveLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.path = path + self.import_dvclive() + + def import_dvclive(self): + try: + import dvclive + except ImportError: + raise ImportError( + 'Please run "pip install dvclive" to install dvclive') + self.dvclive = dvclive + + @master_only + def before_run(self, runner): + self.dvclive.init(self.path) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for k, v in tags.items(): + self.dvclive.log(k, v, step=self.get_iter(runner)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py new file mode 100644 index 000000000000..f9a72592be47 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class MlflowLoggerHook(LoggerHook): + + def __init__(self, + exp_name=None, + tags=None, + log_model=True, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + """Class to log metrics and (optionally) a trained model to MLflow. + + It requires `MLflow`_ to be installed. + + Args: + exp_name (str, optional): Name of the experiment to be used. + Default None. + If not None, set the active experiment. + If experiment does not exist, an experiment with provided name + will be created. + tags (dict of str: str, optional): Tags for the current run. + Default None. + If not None, set tags for the current run. + log_model (bool, optional): Whether to log an MLflow artifact. + Default True. + If True, log runner.model as an MLflow artifact + for the current run. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _MLflow: + https://www.mlflow.org/docs/latest/index.html + """ + super(MlflowLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_mlflow() + self.exp_name = exp_name + self.tags = tags + self.log_model = log_model + + def import_mlflow(self): + try: + import mlflow + import mlflow.pytorch as mlflow_pytorch + except ImportError: + raise ImportError( + 'Please run "pip install mlflow" to install mlflow') + self.mlflow = mlflow + self.mlflow_pytorch = mlflow_pytorch + + @master_only + def before_run(self, runner): + super(MlflowLoggerHook, self).before_run(runner) + if self.exp_name is not None: + self.mlflow.set_experiment(self.exp_name) + if self.tags is not None: + self.mlflow.set_tags(self.tags) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.mlflow.log_metrics(tags, step=self.get_iter(runner)) + + @master_only + def after_run(self, runner): + if self.log_model: + self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py new file mode 100644 index 000000000000..7a38772b0c93 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class NeptuneLoggerHook(LoggerHook): + """Class to log metrics to NeptuneAI. + + It requires `neptune-client` to be installed. + + Args: + init_kwargs (dict): a dict contains the initialization keys as below: + - project (str): Name of a project in a form of + namespace/project_name. If None, the value of + NEPTUNE_PROJECT environment variable will be taken. + - api_token (str): User’s API token. + If None, the value of NEPTUNE_API_TOKEN environment + variable will be taken. Note: It is strongly recommended + to use NEPTUNE_API_TOKEN environment variable rather than + placing your API token in plain text in your source code. + - name (str, optional, default is 'Untitled'): Editable name of + the run. Name is displayed in the run's Details and in + Runs table as a column. + Check https://docs.neptune.ai/api-reference/neptune#init for + more init arguments. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _NeptuneAI: + https://docs.neptune.ai/you-should-know/logging-metadata + """ + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=True, + with_step=True, + by_epoch=True): + + super(NeptuneLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_neptune() + self.init_kwargs = init_kwargs + self.with_step = with_step + + def import_neptune(self): + try: + import neptune.new as neptune + except ImportError: + raise ImportError( + 'Please run "pip install neptune-client" to install neptune') + self.neptune = neptune + self.run = None + + @master_only + def before_run(self, runner): + if self.init_kwargs: + self.run = self.neptune.init(**self.init_kwargs) + else: + self.run = self.neptune.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for tag_name, tag_value in tags.items(): + if self.with_step: + self.run[tag_name].log( + tag_value, step=self.get_iter(runner)) + else: + tags['global_step'] = self.get_iter(runner) + self.run[tag_name].log(tags) + + @master_only + def after_run(self, runner): + self.run.stop() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py new file mode 100644 index 000000000000..c221e9d87021 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import os +import os.path as osp + +import torch +import yaml + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ....parallel.utils import is_module_wrapper +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class PaviLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True, + img_key='img_info'): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.init_kwargs = init_kwargs + self.add_graph = add_graph + self.add_last_ckpt = add_last_ckpt + self.img_key = img_key + + @master_only + def before_run(self, runner): + super(PaviLoggerHook, self).before_run(runner) + try: + from pavi import SummaryWriter + except ImportError: + raise ImportError('Please run "pip install pavi" to install pavi.') + + self.run_name = runner.work_dir.split('/')[-1] + + if not self.init_kwargs: + self.init_kwargs = dict() + self.init_kwargs['name'] = self.run_name + self.init_kwargs['model'] = runner._model_name + if runner.meta is not None: + if 'config_dict' in runner.meta: + config_dict = runner.meta['config_dict'] + assert isinstance( + config_dict, + dict), ('meta["config_dict"] has to be of a dict, ' + f'but got {type(config_dict)}') + elif 'config_file' in runner.meta: + config_file = runner.meta['config_file'] + config_dict = dict(mmcv.Config.fromfile(config_file)) + else: + config_dict = None + if config_dict is not None: + # 'max_.*iter' is parsed in pavi sdk as the maximum iterations + # to properly set up the progress bar. + config_dict = config_dict.copy() + config_dict.setdefault('max_iter', runner.max_iters) + # non-serializable values are first converted in + # mmcv.dump to json + config_dict = json.loads( + mmcv.dump(config_dict, file_format='json')) + session_text = yaml.dump(config_dict) + self.init_kwargs['session_text'] = session_text + self.writer = SummaryWriter(**self.init_kwargs) + + def get_step(self, runner): + """Get the total training step/epoch.""" + if self.get_mode(runner) == 'val' and self.by_epoch: + return self.get_epoch(runner) + else: + return self.get_iter(runner) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, add_mode=False) + if tags: + self.writer.add_scalars( + self.get_mode(runner), tags, self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.add_last_ckpt: + ckpt_path = osp.join(runner.work_dir, 'latest.pth') + if osp.islink(ckpt_path): + ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) + + if osp.isfile(ckpt_path): + # runner.epoch += 1 has been done before `after_run`. + iteration = runner.epoch if self.by_epoch else runner.iter + return self.writer.add_snapshot_file( + tag=self.run_name, + snapshot_file_path=ckpt_path, + iteration=iteration) + + # flush the buffer and send a task ending signal to Pavi + self.writer.close() + + @master_only + def before_epoch(self, runner): + if runner.epoch == 0 and self.add_graph: + if is_module_wrapper(runner.model): + _model = runner.model.module + else: + _model = runner.model + device = next(_model.parameters()).device + data = next(iter(runner.data_loader)) + image = data[self.img_key][0:1].to(device) + with torch.no_grad(): + self.writer.add_graph(_model, image) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py new file mode 100644 index 000000000000..477769cdcd2f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TensorboardLoggerHook(LoggerHook): + + def __init__(self, + log_dir=None, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.log_dir = log_dir + + @master_only + def before_run(self, runner): + super(TensorboardLoggerHook, self).before_run(runner) + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.1')): + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' + 'TensorboardLoggerHook.') + else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)') + + if self.log_dir is None: + self.log_dir = osp.join(runner.work_dir, 'tf_logs') + self.writer = SummaryWriter(self.log_dir) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, allow_text=True) + for tag, val in tags.items(): + if isinstance(val, str): + self.writer.add_text(tag, val, self.get_iter(runner)) + else: + self.writer.add_scalar(tag, val, self.get_iter(runner)) + + @master_only + def after_run(self, runner): + self.writer.close() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py new file mode 100644 index 000000000000..5a46d166699d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py @@ -0,0 +1,256 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import datetime +import os +import os.path as osp +from collections import OrderedDict + +import torch +import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio.file_client import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of, scandir +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TextLoggerHook(LoggerHook): + """Logger hook in text. + + In this logger hook, the information will be printed on terminal and + saved in json file. + + Args: + by_epoch (bool, optional): Whether EpochBasedRunner is used. + Default: True. + interval (int, optional): Logging interval (every k iterations). + Default: 10. + ignore_last (bool, optional): Ignore the log of last iterations in each + epoch if less than :attr:`interval`. Default: True. + reset_flag (bool, optional): Whether to clear the output buffer after + logging. Default: False. + interval_exp_name (int, optional): Logging interval for experiment + name. This feature is to help users conveniently get the experiment + information from screen or log file. Default: 1000. + out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. + If ``out_dir`` is specified, logs will be copied to a new directory + which is the concatenation of ``out_dir`` and the last level + directory of ``runner.work_dir``. Default: None. + `New in version 1.3.16.` + out_suffix (str or tuple[str], optional): Those filenames ending with + ``out_suffix`` will be copied to ``out_dir``. + Default: ('.log.json', '.log', '.py'). + `New in version 1.3.16.` + keep_local (bool, optional): Whether to keep local log when + :attr:`out_dir` is specified. If False, the local log will be + removed. Default: True. + `New in version 1.3.16.` + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + + def __init__(self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000, + out_dir=None, + out_suffix=('.log.json', '.log', '.py'), + keep_local=True, + file_client_args=None): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.by_epoch = by_epoch + self.time_sec_tot = 0 + self.interval_exp_name = interval_exp_name + + if out_dir is None and file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" when `out_dir` is not' + 'specified.') + self.out_dir = out_dir + + if not (out_dir is None or isinstance(out_dir, str) + or is_tuple_of(out_dir, str)): + raise TypeError('out_dir should be "None" or string or tuple of ' + 'string, but got {out_dir}') + self.out_suffix = out_suffix + + self.keep_local = keep_local + self.file_client_args = file_client_args + if self.out_dir is not None: + self.file_client = FileClient.infer_client(file_client_args, + self.out_dir) + + def before_run(self, runner): + super(TextLoggerHook, self).before_run(runner) + + if self.out_dir is not None: + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + # The final `self.out_dir` is the concatenation of `self.out_dir` + # and the last level directory of `runner.work_dir` + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'Text logs will be saved to {self.out_dir} by ' + f'{self.file_client.name} after the training process.')) + + self.start_iter = runner.iter + self.json_log_path = osp.join(runner.work_dir, + f'{runner.timestamp}.log.json') + if runner.meta is not None: + self._dump_log(runner.meta, runner) + + def _get_max_memory(self, runner): + device = getattr(runner.model, 'output_device', None) + mem = torch.cuda.max_memory_allocated(device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], + dtype=torch.int, + device=device) + if runner.world_size > 1: + dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) + return mem_mb.item() + + def _log_info(self, log_dict, runner): + # print exp name for users to distinguish experiments + # at every ``interval_exp_name`` iterations and the end of each epoch + if runner.meta is not None and 'exp_name' in runner.meta: + if (self.every_n_iters(runner, self.interval_exp_name)) or ( + self.by_epoch and self.end_of_epoch(runner)): + exp_info = f'Exp name: {runner.meta["exp_name"]}' + runner.logger.info(exp_info) + + if log_dict['mode'] == 'train': + if isinstance(log_dict['lr'], dict): + lr_str = [] + for k, val in log_dict['lr'].items(): + lr_str.append(f'lr_{k}: {val:.3e}') + lr_str = ' '.join(lr_str) + else: + lr_str = f'lr: {log_dict["lr"]:.3e}' + + # by epoch: Epoch [4][100/1000] + # by iter: Iter [100/100000] + if self.by_epoch: + log_str = f'Epoch [{log_dict["epoch"]}]' \ + f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + else: + log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' + log_str += f'{lr_str}, ' + + if 'time' in log_dict.keys(): + self.time_sec_tot += (log_dict['time'] * self.interval) + time_sec_avg = self.time_sec_tot / ( + runner.iter - self.start_iter + 1) + eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) + eta_str = str(datetime.timedelta(seconds=int(eta_sec))) + log_str += f'eta: {eta_str}, ' + log_str += f'time: {log_dict["time"]:.3f}, ' \ + f'data_time: {log_dict["data_time"]:.3f}, ' + # statistic memory + if torch.cuda.is_available(): + log_str += f'memory: {log_dict["memory"]}, ' + else: + # val/test time + # here 1000 is the length of the val dataloader + # by epoch: Epoch[val] [4][1000] + # by iter: Iter[val] [1000] + if self.by_epoch: + log_str = f'Epoch({log_dict["mode"]}) ' \ + f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' + else: + log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' + + log_items = [] + for name, val in log_dict.items(): + # TODO: resolve this hack + # these items have been in log_str + if name in [ + 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', + 'memory', 'epoch' + ]: + continue + if isinstance(val, float): + val = f'{val:.4f}' + log_items.append(f'{name}: {val}') + log_str += ', '.join(log_items) + + runner.logger.info(log_str) + + def _dump_log(self, log_dict, runner): + # dump log in json format + json_log = OrderedDict() + for k, v in log_dict.items(): + json_log[k] = self._round_float(v) + # only append log at last line + if runner.rank == 0: + with open(self.json_log_path, 'a+') as f: + mmcv.dump(json_log, f, file_format='json') + f.write('\n') + + def _round_float(self, items): + if isinstance(items, list): + return [self._round_float(item) for item in items] + elif isinstance(items, float): + return round(items, 5) + else: + return items + + def log(self, runner): + if 'eval_iter_num' in runner.log_buffer.output: + # this doesn't modify runner.iter and is regardless of by_epoch + cur_iter = runner.log_buffer.output.pop('eval_iter_num') + else: + cur_iter = self.get_iter(runner, inner_iter=True) + + log_dict = OrderedDict( + mode=self.get_mode(runner), + epoch=self.get_epoch(runner), + iter=cur_iter) + + # only record lr of the first param group + cur_lr = runner.current_lr() + if isinstance(cur_lr, list): + log_dict['lr'] = cur_lr[0] + else: + assert isinstance(cur_lr, dict) + log_dict['lr'] = {} + for k, lr_ in cur_lr.items(): + assert isinstance(lr_, list) + log_dict['lr'].update({k: lr_[0]}) + + if 'time' in runner.log_buffer.output: + # statistic memory + if torch.cuda.is_available(): + log_dict['memory'] = self._get_max_memory(runner) + + log_dict = dict(log_dict, **runner.log_buffer.output) + + self._log_info(log_dict, runner) + self._dump_log(log_dict, runner) + return log_dict + + def after_run(self, runner): + # copy or upload logs to self.out_dir + if self.out_dir is not None: + for filename in scandir(runner.work_dir, self.out_suffix, True): + local_filepath = osp.join(runner.work_dir, filename) + out_filepath = self.file_client.join_path( + self.out_dir, filename) + with open(local_filepath, 'r') as f: + self.file_client.put_text(f.read(), out_filepath) + + runner.logger.info( + (f'The file {local_filepath} has been uploaded to ' + f'{out_filepath}.')) + + if not self.keep_local: + os.remove(local_filepath) + runner.logger.info( + (f'{local_filepath} was removed due to the ' + '`self.keep_local=False`')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py new file mode 100644 index 000000000000..9f6808462eb7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class WandbLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=False, + commit=True, + by_epoch=True, + with_step=True): + super(WandbLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_wandb() + self.init_kwargs = init_kwargs + self.commit = commit + self.with_step = with_step + + def import_wandb(self): + try: + import wandb + except ImportError: + raise ImportError( + 'Please run "pip install wandb" to install wandb') + self.wandb = wandb + + @master_only + def before_run(self, runner): + super(WandbLoggerHook, self).before_run(runner) + if self.wandb is None: + self.import_wandb() + if self.init_kwargs: + self.wandb.init(**self.init_kwargs) + else: + self.wandb.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + if self.with_step: + self.wandb.log( + tags, step=self.get_iter(runner), commit=self.commit) + else: + tags['global_step'] = self.get_iter(runner) + self.wandb.log(tags, commit=self.commit) + + @master_only + def after_run(self, runner): + self.wandb.join() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py new file mode 100644 index 000000000000..fe0c84af6f91 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py @@ -0,0 +1,670 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from math import cos, pi + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .hook import HOOKS, Hook + + +class LrUpdaterHook(Hook): + """LR Scheduler in MMCV. + + Args: + by_epoch (bool): LR changes epoch by epoch + warmup (string): Type of warmup used. It can be None(use no warmup), + 'constant', 'linear' or 'exp' + warmup_iters (int): The number of iterations or epochs that warmup + lasts + warmup_ratio (float): LR used at the beginning of warmup equals to + warmup_ratio * initial_lr + warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters + means the number of epochs that warmup lasts, otherwise means the + number of iteration that warmup lasts + """ + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.1, + warmup_by_epoch=False): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_ratio" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + self.warmup_by_epoch = warmup_by_epoch + + if self.warmup_by_epoch: + self.warmup_epochs = self.warmup_iters + self.warmup_iters = None + else: + self.warmup_epochs = None + + self.base_lr = [] # initial lr for all param groups + self.regular_lr = [] # expected lr if no warming up is performed + + def _set_lr(self, runner, lr_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, lr in zip(optim.param_groups, lr_groups[k]): + param_group['lr'] = lr + else: + for param_group, lr in zip(runner.optimizer.param_groups, + lr_groups): + param_group['lr'] = lr + + def get_lr(self, runner, base_lr): + raise NotImplementedError + + def get_regular_lr(self, runner): + if isinstance(runner.optimizer, dict): + lr_groups = {} + for k in runner.optimizer.keys(): + _lr_group = [ + self.get_lr(runner, _base_lr) + for _base_lr in self.base_lr[k] + ] + lr_groups.update({k: _lr_group}) + + return lr_groups + else: + return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] + + def get_warmup_lr(self, cur_iters): + + def _get_warmup_lr(cur_iters, regular_lr): + if self.warmup == 'constant': + warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - + self.warmup_ratio) + warmup_lr = [_lr * (1 - k) for _lr in regular_lr] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_lr = [_lr * k for _lr in regular_lr] + return warmup_lr + + if isinstance(self.regular_lr, dict): + lr_groups = {} + for key, regular_lr in self.regular_lr.items(): + lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr) + return lr_groups + else: + return _get_warmup_lr(cur_iters, self.regular_lr) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + group.setdefault('initial_lr', group['lr']) + _base_lr = [ + group['initial_lr'] for group in optim.param_groups + ] + self.base_lr.update({k: _base_lr}) + else: + for group in runner.optimizer.param_groups: + group.setdefault('initial_lr', group['lr']) + self.base_lr = [ + group['initial_lr'] for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if self.warmup_iters is None: + epoch_len = len(runner.data_loader) + self.warmup_iters = self.warmup_epochs * epoch_len + + if not self.by_epoch: + return + + self.regular_lr = self.get_regular_lr(runner) + self._set_lr(runner, self.regular_lr) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_lr = self.get_regular_lr(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + + +@HOOKS.register_module() +class FixedLrUpdaterHook(LrUpdaterHook): + + def __init__(self, **kwargs): + super(FixedLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + return base_lr + + +@HOOKS.register_module() +class StepLrUpdaterHook(LrUpdaterHook): + """Step LR scheduler with min_lr clipping. + + Args: + step (int | list[int]): Step to decay the LR. If an int value is given, + regard it as the decay interval. If a list is given, decay LR at + these steps. + gamma (float, optional): Decay LR ratio. Default: 0.1. + min_lr (float, optional): Minimum LR value to keep. If LR after decay + is lower than `min_lr`, it will be clipped to this value. If None + is given, we don't perform lr clipping. Default: None. + """ + + def __init__(self, step, gamma=0.1, min_lr=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_lr = min_lr + super(StepLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + lr = base_lr * (self.gamma**exp) + if self.min_lr is not None: + # clip to a minimum value + lr = max(lr, self.min_lr) + return lr + + +@HOOKS.register_module() +class ExpLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, **kwargs): + self.gamma = gamma + super(ExpLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * self.gamma**progress + + +@HOOKS.register_module() +class PolyLrUpdaterHook(LrUpdaterHook): + + def __init__(self, power=1., min_lr=0., **kwargs): + self.power = power + self.min_lr = min_lr + super(PolyLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + coeff = (1 - progress / max_progress)**self.power + return (base_lr - self.min_lr) * coeff + self.min_lr + + +@HOOKS.register_module() +class InvLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, power=1., **kwargs): + self.gamma = gamma + self.power = power + super(InvLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * (1 + self.gamma * progress)**(-self.power) + + +@HOOKS.register_module() +class CosineAnnealingLrUpdaterHook(LrUpdaterHook): + + def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): + """Flat + Cosine lr schedule. + + Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501 + + Args: + start_percent (float): When to start annealing the learning rate + after the percentage of the total training steps. + The value should be in range [0, 1). + Default: 0.75 + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, + start_percent=0.75, + min_lr=None, + min_lr_ratio=None, + **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + if start_percent < 0 or start_percent > 1 or not isinstance( + start_percent, float): + raise ValueError( + 'expected float between 0 and 1 start_percent, but ' + f'got {start_percent}') + self.start_percent = start_percent + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + start = round(runner.max_epochs * self.start_percent) + progress = runner.epoch - start + max_progress = runner.max_epochs - start + else: + start = round(runner.max_iters * self.start_percent) + progress = runner.iter - start + max_progress = runner.max_iters - start + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + if progress < 0: + return base_lr + else: + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class CosineRestartLrUpdaterHook(LrUpdaterHook): + """Cosine annealing with restarts learning rate scheme. + + Args: + periods (list[int]): Periods for each cosine anneling cycle. + restart_weights (list[float], optional): Restart weights at each + restart iteration. Default: [1]. + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, + periods, + restart_weights=[1], + min_lr=None, + min_lr_ratio=None, + **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.periods = periods + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + self.restart_weights = restart_weights + assert (len(self.periods) == len(self.restart_weights) + ), 'periods and restart_weights should have the same length.' + super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) + + self.cumulative_periods = [ + sum(self.periods[0:i + 1]) for i in range(0, len(self.periods)) + ] + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + else: + progress = runner.iter + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + idx = get_position_from_periods(progress, self.cumulative_periods) + current_weight = self.restart_weights[idx] + nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] + current_periods = self.periods[idx] + + alpha = min((progress - nearest_restart) / current_periods, 1) + return annealing_cos(base_lr, target_lr, alpha, current_weight) + + +def get_position_from_periods(iteration, cumulative_periods): + """Get the position from a period list. + + It will return the index of the right-closest number in the period list. + For example, the cumulative_periods = [100, 200, 300, 400], + if iteration == 50, return 0; + if iteration == 210, return 2; + if iteration == 300, return 3. + + Args: + iteration (int): Current iteration. + cumulative_periods (list[int]): Cumulative period list. + + Returns: + int: The position of the right-closest number in the period list. + """ + for i, period in enumerate(cumulative_periods): + if iteration < period: + return i + raise ValueError(f'Current iteration {iteration} exceeds ' + f'cumulative_periods {cumulative_periods}') + + +@HOOKS.register_module() +class CyclicLrUpdaterHook(LrUpdaterHook): + """Cyclic LR Scheduler. + + Implement the cyclical learning rate policy (CLR) described in + https://arxiv.org/pdf/1506.01186.pdf + + Different from the original paper, we use cosine annealing rather than + triangular policy inside a cycle. This improves the performance in the + 3D detection area. + + Args: + by_epoch (bool): Whether to update LR by epoch. + target_ratio (tuple[float]): Relative ratio of the highest LR and the + lowest LR to the initial LR. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of LR in + the total cycle. + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. Default: 'cos'. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + anneal_strategy='cos', + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.lr_phases = [] # init lr_phases + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicLrUpdaterHook, self).before_run(runner) + # initiate lr_phases + # total lr_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.lr_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.lr_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.lr_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return self.anneal_func(base_lr * start_ratio, + base_lr * end_ratio, + progress / (end_iter - start_iter)) + + +@HOOKS.register_module() +class OneCycleLrUpdaterHook(LrUpdaterHook): + """One Cycle LR Scheduler. + + The 1cycle learning rate policy changes the learning rate after every + batch. The one cycle learning rate policy is described in + https://arxiv.org/pdf/1708.07120.pdf + + Args: + max_lr (float or list): Upper learning rate boundaries in the cycle + for each parameter group. + total_steps (int, optional): The total number of steps in the cycle. + Note that if a value is not provided here, it will be the max_iter + of runner. Default: None. + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + div_factor (float): Determines the initial learning rate via + initial_lr = max_lr/div_factor + Default: 25 + final_div_factor (float): Determines the minimum learning rate via + min_lr = initial_lr/final_div_factor + Default: 1e4 + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__(self, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy='cos', + div_factor=25, + final_div_factor=1e4, + three_phase=False, + **kwargs): + # validate by_epoch, currently only support by_epoch = False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], \ + 'currently only support "by_epoch" = False' + if not isinstance(max_lr, (numbers.Number, list, dict)): + raise ValueError('the type of max_lr must be the one of list or ' + f'dict, but got {type(max_lr)}') + self._max_lr = max_lr + if total_steps is not None: + if not isinstance(total_steps, int): + raise ValueError('the type of total_steps must be int, but' + f'got {type(total_steps)}') + self.total_steps = total_steps + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('expected float between 0 and 1 pct_start, but ' + f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.div_factor = div_factor + self.final_div_factor = final_div_factor + self.three_phase = three_phase + self.lr_phases = [] # init lr_phases + super(OneCycleLrUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if hasattr(self, 'total_steps'): + total_steps = self.total_steps + else: + total_steps = runner.max_iters + if total_steps < runner.max_iters: + raise ValueError( + 'The total steps must be greater than or equal to max ' + f'iterations {runner.max_iters} of runner, but total steps ' + f'is {total_steps}.') + + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + _max_lr = format_param(k, optim, self._max_lr) + self.base_lr[k] = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(optim.param_groups, self.base_lr[k]): + group.setdefault('initial_lr', lr) + else: + k = type(runner.optimizer).__name__ + _max_lr = format_param(k, runner.optimizer, self._max_lr) + self.base_lr = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(runner.optimizer.param_groups, self.base_lr): + group.setdefault('initial_lr', lr) + + if self.three_phase: + self.lr_phases.append( + [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([ + float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1 + ]) + self.lr_phases.append( + [total_steps - 1, 1, 1 / self.final_div_factor]) + else: + self.lr_phases.append( + [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append( + [total_steps - 1, self.div_factor, 1 / self.final_div_factor]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + start_iter = 0 + for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): + if curr_iter <= end_iter: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, + pct) + break + start_iter = end_iter + return lr + + +def annealing_cos(start, end, factor, weight=1): + """Calculate annealing cos learning rate. + + Cosine anneal from `weight * start + (1 - weight) * end` to `end` as + percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the cosine annealing. + end (float): The ending learing rate of the cosine annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + weight (float, optional): The combination factor of `start` and `end` + when calculating the actual starting learning rate. Default to 1. + """ + cos_out = cos(pi * factor) + 1 + return end + 0.5 * weight * (start - end) * cos_out + + +def annealing_linear(start, end, factor): + """Calculate annealing linear learning rate. + + Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the linear annealing. + end (float): The ending learing rate of the linear annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + """ + return start + (end - start) * factor + + +def format_param(name, optim, param): + if isinstance(param, numbers.Number): + return [param] * len(optim.param_groups) + elif isinstance(param, (list, tuple)): # multi param groups + if len(param) != len(optim.param_groups): + raise ValueError(f'expected {len(optim.param_groups)} ' + f'values for {name}, got {len(param)}') + return param + else: # multi optimizers + if name not in param: + raise KeyError(f'{name} is not found in {param.keys()}') + return param[name] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py new file mode 100644 index 000000000000..70cf9a838fb3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class EmptyCacheHook(Hook): + + def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): + self._before_epoch = before_epoch + self._after_epoch = after_epoch + self._after_iter = after_iter + + def after_iter(self, runner): + if self._after_iter: + torch.cuda.empty_cache() + + def before_epoch(self, runner): + if self._before_epoch: + torch.cuda.empty_cache() + + def after_epoch(self, runner): + if self._after_epoch: + torch.cuda.empty_cache() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py new file mode 100644 index 000000000000..78a436ecfa21 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py @@ -0,0 +1,493 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .hook import HOOKS, Hook +from .lr_updater import annealing_cos, annealing_linear, format_param + + +class MomentumUpdaterHook(Hook): + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.9): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_momentum" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + + self.base_momentum = [] # initial momentum for all param groups + self.regular_momentum = [ + ] # expected momentum if no warming up is performed + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, + momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, + momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, base_momentum): + raise NotImplementedError + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k in runner.optimizer.keys(): + _momentum_group = [ + self.get_momentum(runner, _base_momentum) + for _base_momentum in self.base_momentum[k] + ] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + return [ + self.get_momentum(runner, _base_momentum) + for _base_momentum in self.base_momentum + ] + + def get_warmup_momentum(self, cur_iters): + + def _get_warmup_momentum(cur_iters, regular_momentum): + if self.warmup == 'constant': + warmup_momentum = [ + _momentum / self.warmup_ratio + for _momentum in self.regular_momentum + ] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - + self.warmup_ratio) + warmup_momentum = [ + _momentum / (1 - k) for _momentum in self.regular_mom + ] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_momentum = [ + _momentum / k for _momentum in self.regular_mom + ] + return warmup_momentum + + if isinstance(self.regular_momentum, dict): + momentum_groups = {} + for key, regular_momentum in self.regular_momentum.items(): + momentum_groups[key] = _get_warmup_momentum( + cur_iters, regular_momentum) + return momentum_groups + else: + return _get_warmup_momentum(cur_iters, self.regular_momentum) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, + # if 'initial_momentum' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_momentum = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + _base_momentum = [ + group['initial_momentum'] for group in optim.param_groups + ] + self.base_momentum.update({k: _base_momentum}) + else: + for group in runner.optimizer.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + self.base_momentum = [ + group['initial_momentum'] + for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if not self.by_epoch: + return + self.regular_mom = self.get_regular_momentum(runner) + self._set_momentum(runner, self.regular_mom) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_mom = self.get_regular_momentum(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + + +@HOOKS.register_module() +class StepMomentumUpdaterHook(MomentumUpdaterHook): + """Step momentum scheduler with min value clipping. + + Args: + step (int | list[int]): Step to decay the momentum. If an int value is + given, regard it as the decay interval. If a list is given, decay + momentum at these steps. + gamma (float, optional): Decay momentum ratio. Default: 0.5. + min_momentum (float, optional): Minimum momentum value to keep. If + momentum after decay is lower than this value, it will be clipped + accordingly. If None is given, we don't perform lr clipping. + Default: None. + """ + + def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_momentum = min_momentum + super(StepMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + momentum = base_momentum * (self.gamma**exp) + if self.min_momentum is not None: + # clip to a minimum value + momentum = max(momentum, self.min_momentum) + return momentum + + +@HOOKS.register_module() +class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): + + def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): + assert (min_momentum is None) ^ (min_momentum_ratio is None) + self.min_momentum = min_momentum + self.min_momentum_ratio = min_momentum_ratio + super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + if self.min_momentum_ratio is not None: + target_momentum = base_momentum * self.min_momentum_ratio + else: + target_momentum = self.min_momentum + return annealing_cos(base_momentum, target_momentum, + progress / max_progress) + + +@HOOKS.register_module() +class CyclicMomentumUpdaterHook(MomentumUpdaterHook): + """Cyclic momentum Scheduler. + + Implement the cyclical momentum scheduler policy described in + https://arxiv.org/pdf/1708.07120.pdf + + This momentum scheduler usually used together with the CyclicLRUpdater + to improve the performance in the 3D detection area. + + Attributes: + target_ratio (tuple[float]): Relative ratio of the lowest momentum and + the highest momentum to the initial momentum. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of momentum + in the total cycle. + by_epoch (bool): Whether to update momentum by epoch. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.momentum_phases = [] # init momentum_phases + # currently only support by_epoch=False + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicMomentumUpdaterHook, self).before_run(runner) + # initiate momentum_phases + # total momentum_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.momentum_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.momentum_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_momentum(self, runner, base_momentum): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.momentum_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return annealing_cos(base_momentum * start_ratio, + base_momentum * end_ratio, + progress / (end_iter - start_iter)) + + +@HOOKS.register_module() +class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): + """OneCycle momentum Scheduler. + + This momentum scheduler usually used together with the OneCycleLrUpdater + to improve the performance. + + Args: + base_momentum (float or list): Lower momentum boundaries in the cycle + for each parameter group. Note that momentum is cycled inversely + to learning rate; at the peak of a cycle, momentum is + 'base_momentum' and learning rate is 'max_lr'. + Default: 0.85 + max_momentum (float or list): Upper momentum boundaries in the cycle + for each parameter group. Functionally, + it defines the cycle amplitude (max_momentum - base_momentum). + Note that momentum is cycled inversely + to learning rate; at the start of a cycle, momentum is + 'max_momentum' and learning rate is 'base_lr' + Default: 0.95 + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__(self, + base_momentum=0.85, + max_momentum=0.95, + pct_start=0.3, + anneal_strategy='cos', + three_phase=False, + **kwargs): + # validate by_epoch, currently only support by_epoch=False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], \ + 'currently only support "by_epoch" = False' + if not isinstance(base_momentum, (float, list, dict)): + raise ValueError('base_momentum must be the type among of float,' + 'list or dict.') + self._base_momentum = base_momentum + if not isinstance(max_momentum, (float, list, dict)): + raise ValueError('max_momentum must be the type among of float,' + 'list or dict.') + self._max_momentum = max_momentum + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('Expected float between 0 and 1 pct_start, but ' + f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must by one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.three_phase = three_phase + self.momentum_phases = [] # init momentum_phases + super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + if ('momentum' not in optim.defaults + and 'betas' not in optim.defaults): + raise ValueError('optimizer must support momentum with' + 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip( + optim.param_groups, _base_momentum, _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + else: + optim = runner.optimizer + if ('momentum' not in optim.defaults + and 'betas' not in optim.defaults): + raise ValueError('optimizer must support momentum with' + 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + k = type(optim).__name__ + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip(optim.param_groups, + _base_momentum, + _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + + if self.three_phase: + self.momentum_phases.append({ + 'end_iter': + float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': + 'max_momentum', + 'end_momentum': + 'base_momentum' + }) + self.momentum_phases.append({ + 'end_iter': + float(2 * self.pct_start * runner.max_iters) - 2, + 'start_momentum': + 'base_momentum', + 'end_momentum': + 'max_momentum' + }) + self.momentum_phases.append({ + 'end_iter': runner.max_iters - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'max_momentum' + }) + else: + self.momentum_phases.append({ + 'end_iter': + float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': + 'max_momentum', + 'end_momentum': + 'base_momentum' + }) + self.momentum_phases.append({ + 'end_iter': runner.max_iters - 1, + 'start_momentum': 'base_momentum', + 'end_momentum': 'max_momentum' + }) + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, + momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, + momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, param_group): + curr_iter = runner.iter + start_iter = 0 + for i, phase in enumerate(self.momentum_phases): + end_iter = phase['end_iter'] + if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + momentum = self.anneal_func( + param_group[phase['start_momentum']], + param_group[phase['end_momentum']], pct) + break + start_iter = end_iter + return momentum + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k, optim in runner.optimizer.items(): + _momentum_group = [ + self.get_momentum(runner, param_group) + for param_group in optim.param_groups + ] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + momentum_groups = [] + for param_group in runner.optimizer.param_groups: + momentum_groups.append(self.get_momentum(runner, param_group)) + return momentum_groups diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py new file mode 100644 index 000000000000..c44b6338c0dc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py @@ -0,0 +1,508 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + from torch.cuda.amp import GradScaler +except ImportError: + pass + + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + + +@HOOKS.register_module() +class GradientCumulativeOptimizerHook(OptimizerHook): + """Optimizer Hook implements multi-iters gradient cumulating. + + Args: + cumulative_iters (int, optional): Num of gradient cumulative iters. + The optimizer will step every `cumulative_iters` iters. + Defaults to 1. + + Examples: + >>> # Use cumulative_iters to simulate a large batch size + >>> # It is helpful when the hardware cannot handle a large batch size. + >>> loader = DataLoader(data, batch_size=64) + >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4) + >>> # almost equals to + >>> loader = DataLoader(data, batch_size=256) + >>> optim_hook = OptimizerHook() + """ + + def __init__(self, cumulative_iters=1, **kwargs): + super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) + + assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \ + f'cumulative_iters only accepts positive int, but got ' \ + f'{type(cumulative_iters)} instead.' + + self.cumulative_iters = cumulative_iters + self.divisible_iters = 0 + self.remainder_iters = 0 + self.initialized = False + + def has_batch_norm(self, module): + if isinstance(module, _BatchNorm): + return True + for m in module.children(): + if self.has_batch_norm(m): + return True + return False + + def _init(self, runner): + if runner.iter % self.cumulative_iters != 0: + runner.logger.warning( + 'Resume iter number is not divisible by cumulative_iters in ' + 'GradientCumulativeOptimizerHook, which means the gradient of ' + 'some iters is lost and the result may be influenced slightly.' + ) + + if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: + runner.logger.warning( + 'GradientCumulativeOptimizerHook may slightly decrease ' + 'performance if the model has BatchNorm layers.') + + residual_iters = runner.max_iters - runner.iter + + self.divisible_iters = ( + residual_iters // self.cumulative_iters * self.cumulative_iters) + self.remainder_iters = residual_iters - self.divisible_iters + + self.initialized = True + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + loss.backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + runner.optimizer.zero_grad() + + +if (TORCH_VERSION != 'parrots' + and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (using PyTorch's implementation). + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of GradScalar. + Defaults to 512. For Pytorch >= 1.6, mmcv uses official + implementation of GradScaler. If you use a dict version of + loss_scale to create GradScaler, please refer to: + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler + for the parameters. + + Examples: + >>> loss_scale = dict( + ... init_scale=65536.0, + ... growth_factor=2.0, + ... backoff_factor=0.5, + ... growth_interval=2000 + ... ) + >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + self._scale_update_param = None + if loss_scale == 'dynamic': + self.loss_scaler = GradScaler() + elif isinstance(loss_scale, float): + self._scale_update_param = loss_scale + self.loss_scaler = GradScaler(init_scale=loss_scale) + elif isinstance(loss_scale, dict): + self.loss_scaler = GradScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training.""" + # wrap model mode to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, + fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new( + fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), + fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer to + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler. + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients. + 3. Unscale the optimizer’s gradient tensors. + 4. Call optimizer.step() and update scale factor. + 5. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + + self.loss_scaler.scale(runner.outputs['loss']).backward() + self.loss_scaler.unscale_(runner.optimizer) + # grad clip + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, + Fp16OptimizerHook): + """Fp16 optimizer Hook (using PyTorch's implementation) implements + multi-iters gradient cumulating. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + """ + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, + self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + + self.loss_scaler.scale(loss).backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + # copy fp16 grads in the model to fp32 params in the optimizer + self.loss_scaler.unscale_(runner.optimizer) + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() + +else: + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (mmcv's implementation). + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler( + init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] + for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, + fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new( + fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), + fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + 6. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, + self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, + Fp16OptimizerHook): + """Fp16 optimizer Hook (using mmcv implementation) implements multi- + iters gradient cumulating.""" + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, + self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + + loss = runner.outputs['loss'] + loss = loss / loss_factor + + # scale the loss value + scaled_loss = loss * self.loss_scaler.loss_scale + scaled_loss.backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + # copy fp16 grads in the model to fp32 params in the optimizer + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, + self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + else: + runner.logger.warning( + 'Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') + + self.loss_scaler.update_scale(has_overflow) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py new file mode 100644 index 000000000000..b70236997eec --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py @@ -0,0 +1,180 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from typing import Callable, List, Optional, Union + +import torch + +from ..dist_utils import master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ProfilerHook(Hook): + """Profiler to analyze performance during training. + + PyTorch Profiler is a tool that allows the collection of the performance + metrics during the training. More details on Profiler can be found at + https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile + + Args: + by_epoch (bool): Profile performance by epoch or by iteration. + Default: True. + profile_iters (int): Number of iterations for profiling. + If ``by_epoch=True``, profile_iters indicates that they are the + first profile_iters epochs at the beginning of the + training, otherwise it indicates the first profile_iters + iterations. Default: 1. + activities (list[str]): List of activity groups (CPU, CUDA) to use in + profiling. Default: ['cpu', 'cuda']. + schedule (dict, optional): Config of generating the callable schedule. + if schedule is None, profiler will not add step markers into the + trace and table view. Default: None. + on_trace_ready (callable, dict): Either a handler or a dict of generate + handler. Default: None. + record_shapes (bool): Save information about operator's input shapes. + Default: False. + profile_memory (bool): Track tensor memory allocation/deallocation. + Default: False. + with_stack (bool): Record source information (file and line number) + for the ops. Default: False. + with_flops (bool): Use formula to estimate the FLOPS of specific + operators (matrix multiplication and 2D convolution). + Default: False. + json_trace_path (str, optional): Exports the collected trace in Chrome + JSON format. Default: None. + + Example: + >>> runner = ... # instantiate a Runner + >>> # tensorboard trace + >>> trace_config = dict(type='tb_trace', dir_name='work_dir') + >>> profiler_config = dict(on_trace_ready=trace_config) + >>> runner.register_profiler_hook(profiler_config) + >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) + """ + + def __init__(self, + by_epoch: bool = True, + profile_iters: int = 1, + activities: List[str] = ['cpu', 'cuda'], + schedule: Optional[dict] = None, + on_trace_ready: Optional[Union[Callable, dict]] = None, + record_shapes: bool = False, + profile_memory: bool = False, + with_stack: bool = False, + with_flops: bool = False, + json_trace_path: Optional[str] = None) -> None: + try: + from torch import profiler # torch version >= 1.8.1 + except ImportError: + raise ImportError('profiler is the new feature of torch1.8.1, ' + f'but your version is {torch.__version__}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' + self.by_epoch = by_epoch + + if profile_iters < 1: + raise ValueError('profile_iters should be greater than 0, but got ' + f'{profile_iters}') + self.profile_iters = profile_iters + + if not isinstance(activities, list): + raise ValueError( + f'activities should be list, but got {type(activities)}') + self.activities = [] + for activity in activities: + activity = activity.lower() + if activity == 'cpu': + self.activities.append(profiler.ProfilerActivity.CPU) + elif activity == 'cuda': + self.activities.append(profiler.ProfilerActivity.CUDA) + else: + raise ValueError( + f'activity should be "cpu" or "cuda", but got {activity}') + + if schedule is not None: + self.schedule = profiler.schedule(**schedule) + else: + self.schedule = None + + self.on_trace_ready = on_trace_ready + self.record_shapes = record_shapes + self.profile_memory = profile_memory + self.with_stack = with_stack + self.with_flops = with_flops + self.json_trace_path = json_trace_path + + @master_only + def before_run(self, runner): + if self.by_epoch and runner.max_epochs < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' + f'{runner.max_epochs}') + + if not self.by_epoch and runner.max_iters < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' + f'{runner.max_iters}') + + if callable(self.on_trace_ready): # handler + _on_trace_ready = self.on_trace_ready + elif isinstance(self.on_trace_ready, dict): # config of handler + trace_cfg = self.on_trace_ready.copy() + trace_type = trace_cfg.pop('type') # log_trace handler + if trace_type == 'log_trace': + + def _log_handler(prof): + print(prof.key_averages().table(**trace_cfg)) + + _on_trace_ready = _log_handler + elif trace_type == 'tb_trace': # tensorboard_trace handler + try: + import torch_tb_profiler # noqa: F401 + except ImportError: + raise ImportError('please run "pip install ' + 'torch-tb-profiler" to install ' + 'torch_tb_profiler') + _on_trace_ready = torch.profiler.tensorboard_trace_handler( + **trace_cfg) + else: + raise ValueError('trace_type should be "log_trace" or ' + f'"tb_trace", but got {trace_type}') + elif self.on_trace_ready is None: + _on_trace_ready = None # type: ignore + else: + raise ValueError('on_trace_ready should be handler, dict or None, ' + f'but got {type(self.on_trace_ready)}') + + if runner.max_epochs > 1: + warnings.warn(f'profiler will profile {runner.max_epochs} epochs ' + 'instead of 1 epoch. Since profiler will slow down ' + 'the training, it is recommended to train 1 epoch ' + 'with ProfilerHook and adjust your setting according' + ' to the profiler summary. During normal training ' + '(epoch > 1), you may disable the ProfilerHook.') + + self.profiler = torch.profiler.profile( + activities=self.activities, + schedule=self.schedule, + on_trace_ready=_on_trace_ready, + record_shapes=self.record_shapes, + profile_memory=self.profile_memory, + with_stack=self.with_stack, + with_flops=self.with_flops) + + self.profiler.__enter__() + runner.logger.info('profiler is profiling...') + + @master_only + def after_train_epoch(self, runner): + if self.by_epoch and runner.epoch == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) + + @master_only + def after_train_iter(self, runner): + self.profiler.step() + if not self.by_epoch and runner.iter == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py new file mode 100644 index 000000000000..ee0dc6bdd8df --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class DistSamplerSeedHook(Hook): + """Data-loading sampler for distributed training. + + When distributed training, it is only useful in conjunction with + :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same + purpose with :obj:`IterLoader`. + """ + + def before_epoch(self, runner): + if hasattr(runner.data_loader.sampler, 'set_epoch'): + # in case the data loader uses `SequentialSampler` in Pytorch + runner.data_loader.sampler.set_epoch(runner.epoch) + elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): + # batch sampler in pytorch warps the sampler as its attributes. + runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py new file mode 100644 index 000000000000..6376b7ff8942 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..dist_utils import allreduce_params +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class SyncBuffersHook(Hook): + """Synchronize model buffers such as running_mean and running_var in BN at + the end of each epoch. + + Args: + distributed (bool): Whether distributed training is used. It is + effective only for distributed training. Defaults to True. + """ + + def __init__(self, distributed=True): + self.distributed = distributed + + def after_epoch(self, runner): + """All-reduce model buffers at the end of each epoch.""" + if self.distributed: + allreduce_params(runner.model.buffers()) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py new file mode 100644 index 000000000000..8062be9d777c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py @@ -0,0 +1,273 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch +from torch.optim import Optimizer + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .hooks import IterTimerHook +from .utils import get_host_info + + +class IterLoader: + + def __init__(self, dataloader): + self._dataloader = dataloader + self.iter_loader = iter(self._dataloader) + self._epoch = 0 + + @property + def epoch(self): + return self._epoch + + def __next__(self): + try: + data = next(self.iter_loader) + except StopIteration: + self._epoch += 1 + if hasattr(self._dataloader.sampler, 'set_epoch'): + self._dataloader.sampler.set_epoch(self._epoch) + time.sleep(2) # Prevent possible deadlock during epoch transition + self.iter_loader = iter(self._dataloader) + data = next(self.iter_loader) + + return data + + def __len__(self): + return len(self._dataloader) + + +@RUNNERS.register_module() +class IterBasedRunner(BaseRunner): + """Iteration-based Runner. + + This runner train models iteration by iteration. + """ + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._epoch = data_loader.epoch + data_batch = next(data_loader) + self.call_hook('before_train_iter') + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.train_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_train_iter') + self._inner_iter += 1 + self._iter += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + data_batch = next(data_loader) + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.val_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_val_iter') + self._inner_iter += 1 + + def run(self, data_loaders, workflow, max_iters=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, iters) to specify the + running order and iterations. E.g, [('train', 10000), + ('val', 1000)] means running 10000 iterations for training and + 1000 iterations for validation, iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_iters is not None: + warnings.warn( + 'setting max_iters in run is deprecated, ' + 'please set max_iters in runner_config', DeprecationWarning) + self._max_iters = max_iters + assert self._max_iters is not None, ( + 'max_iters must be specified during instantiation') + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', + self.get_hook_info()) + self.logger.info('workflow: %s, max: %d iters', workflow, + self._max_iters) + self.call_hook('before_run') + + iter_loaders = [IterLoader(x) for x in data_loaders] + + self.call_hook('before_epoch') + + while self.iter < self._max_iters: + for i, flow in enumerate(workflow): + self._inner_iter = 0 + mode, iters = flow + if not isinstance(mode, str) or not hasattr(self, mode): + raise ValueError( + 'runner has no method named "{}" to run a workflow'. + format(mode)) + iter_runner = getattr(self, mode) + for _ in range(iters): + if mode == 'train' and self.iter >= self._max_iters: + break + iter_runner(iter_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_epoch') + self.call_hook('after_run') + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + """Resume model from checkpoint. + + Args: + checkpoint (str): Checkpoint to resume from. + resume_optimizer (bool, optional): Whether resume the optimizer(s) + if the checkpoint file includes optimizer(s). Default to True. + map_location (str, optional): Same as :func:`torch.load`. + Default to 'default'. + """ + if map_location == 'default': + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + self._inner_iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') + + def save_checkpoint(self, + out_dir, + filename_tmpl='iter_{}.pth', + meta=None, + save_optimizer=True, + create_symlink=True): + """Save checkpoint to file. + + Args: + out_dir (str): Directory to save checkpoint files. + filename_tmpl (str, optional): Checkpoint file template. + Defaults to 'iter_{}.pth'. + meta (dict, optional): Metadata to be saved in checkpoint. + Defaults to None. + save_optimizer (bool, optional): Whether save optimizer. + Defaults to True. + create_symlink (bool, optional): Whether create symlink to the + latest checkpoint file. Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.iter + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + custom_hooks_config=None): + """Register default hooks for iter-based training. + + Checkpoint hook, optimizer stepper hook and logger hooks will be set to + `by_epoch=False` by default. + + Default hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + if checkpoint_config is not None: + checkpoint_config.setdefault('by_epoch', False) + if lr_config is not None: + lr_config.setdefault('by_epoch', False) + if log_config is not None: + for info in log_config['hooks']: + info.setdefault('by_epoch', False) + super(IterBasedRunner, self).register_training_hooks( + lr_config=lr_config, + momentum_config=momentum_config, + optimizer_config=optimizer_config, + checkpoint_config=checkpoint_config, + log_config=log_config, + timer_config=IterTimerHook(), + custom_hooks_config=custom_hooks_config) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py new file mode 100644 index 000000000000..d949e2941c54 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict + +import numpy as np + + +class LogBuffer: + + def __init__(self): + self.val_history = OrderedDict() + self.n_history = OrderedDict() + self.output = OrderedDict() + self.ready = False + + def clear(self): + self.val_history.clear() + self.n_history.clear() + self.clear_output() + + def clear_output(self): + self.output.clear() + self.ready = False + + def update(self, vars, count=1): + assert isinstance(vars, dict) + for key, var in vars.items(): + if key not in self.val_history: + self.val_history[key] = [] + self.n_history[key] = [] + self.val_history[key].append(var) + self.n_history[key].append(count) + + def average(self, n=0): + """Average latest n values or all values.""" + assert n >= 0 + for key in self.val_history: + values = np.array(self.val_history[key][-n:]) + nums = np.array(self.n_history[key][-n:]) + avg = np.sum(values * nums) / np.sum(nums) + self.output[key] = avg + self.ready = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py new file mode 100644 index 000000000000..53c34d047099 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, + build_optimizer_constructor) +from .default_constructor import DefaultOptimizerConstructor + +__all__ = [ + 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', + 'build_optimizer', 'build_optimizer_constructor' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py new file mode 100644 index 000000000000..f9234eed8f1f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import inspect + +import torch + +from ...utils import Registry, build_from_cfg + +OPTIMIZERS = Registry('optimizer') +OPTIMIZER_BUILDERS = Registry('optimizer builder') + + +def register_torch_optimizers(): + torch_optimizers = [] + for module_name in dir(torch.optim): + if module_name.startswith('__'): + continue + _optim = getattr(torch.optim, module_name) + if inspect.isclass(_optim) and issubclass(_optim, + torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + return torch_optimizers + + +TORCH_OPTIMIZERS = register_torch_optimizers() + + +def build_optimizer_constructor(cfg): + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', + 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict( + type=constructor_type, + optimizer_cfg=optimizer_cfg, + paramwise_cfg=paramwise_cfg)) + optimizer = optim_constructor(model) + return optimizer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py new file mode 100644 index 000000000000..f13b67e0d63a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py @@ -0,0 +1,249 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +from torch.nn import GroupNorm, LayerNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.ext_loader import check_ops_exist +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS + + +@OPTIMIZER_BUILDERS.register_module() +class DefaultOptimizerConstructor: + """Default constructor for optimizers. + + By default each parameter share the same optimizer settings, and we + provide an argument ``paramwise_cfg`` to specify parameter-wise settings. + It is a dict and may contain the following fields: + + - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If + one of the keys in ``custom_keys`` is a substring of the name of one + parameter, then the setting of the parameter will be specified by + ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will + be ignored. It should be noted that the aforementioned ``key`` is the + longest key that is a substring of the name of the parameter. If there + are multiple matched keys with the same length, then the key with lower + alphabet order will be chosen. + ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` + and ``decay_mult``. See Example 2 below. + - ``bias_lr_mult`` (float): It will be multiplied to the learning + rate for all bias parameters (except for those in normalization + layers and offset layers of DCN). + - ``bias_decay_mult`` (float): It will be multiplied to the weight + decay for all bias parameters (except for those in + normalization layers, depthwise conv layers, offset layers of DCN). + - ``norm_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of normalization + layers. + - ``dwconv_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of depthwise conv + layers. + - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning + rate for parameters of offset layer in the deformable convs + of a model. + - ``bypass_duplicate`` (bool): If true, the duplicate parameters + would not be added into optimizer. Default: False. + + Note: + 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will + override the effect of ``bias_lr_mult`` in the bias of offset + layer. So be careful when using both ``bias_lr_mult`` and + ``dcn_offset_lr_mult``. If you wish to apply both of them to the + offset layer in deformable convs, set ``dcn_offset_lr_mult`` + to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. + 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will + apply it to all the DCN layers in the model. So be careful when + the model contains multiple DCN layers in places other than + backbone. + + Args: + model (:obj:`nn.Module`): The model with parameters to be optimized. + optimizer_cfg (dict): The config dict of the optimizer. + Positional fields are + + - `type`: class name of the optimizer. + + Optional fields are + + - any arguments of the corresponding optimizer type, e.g., + lr, weight_decay, momentum, etc. + paramwise_cfg (dict, optional): Parameter-wise options. + + Example 1: + >>> model = torch.nn.modules.Conv1d(1, 1, 1) + >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, + >>> weight_decay=0.0001) + >>> paramwise_cfg = dict(norm_decay_mult=0.) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + + Example 2: + >>> # assume model have attribute model.backbone and model.cls_head + >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) + >>> paramwise_cfg = dict(custom_keys={ + '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + >>> # Then the `lr` and `weight_decay` for model.backbone is + >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for + >>> # model.cls_head is (0.01, 0.95). + """ + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + if not isinstance(optimizer_cfg, dict): + raise TypeError('optimizer_cfg should be a dict', + f'but got {type(optimizer_cfg)}') + self.optimizer_cfg = optimizer_cfg + self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg + self.base_lr = optimizer_cfg.get('lr', None) + self.base_wd = optimizer_cfg.get('weight_decay', None) + self._validate_cfg() + + def _validate_cfg(self): + if not isinstance(self.paramwise_cfg, dict): + raise TypeError('paramwise_cfg should be None or a dict, ' + f'but got {type(self.paramwise_cfg)}') + + if 'custom_keys' in self.paramwise_cfg: + if not isinstance(self.paramwise_cfg['custom_keys'], dict): + raise TypeError( + 'If specified, custom_keys must be a dict, ' + f'but got {type(self.paramwise_cfg["custom_keys"])}') + if self.base_wd is None: + for key in self.paramwise_cfg['custom_keys']: + if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: + raise ValueError('base_wd should not be None') + + # get base lr and weight decay + # weight_decay must be explicitly specified if mult is specified + if ('bias_decay_mult' in self.paramwise_cfg + or 'norm_decay_mult' in self.paramwise_cfg + or 'dwconv_decay_mult' in self.paramwise_cfg): + if self.base_wd is None: + raise ValueError('base_wd should not be None') + + def _is_in(self, param_group, param_group_list): + assert is_list_of(param_group_list, dict) + param = set(param_group['params']) + param_set = set() + for group in param_group_list: + param_set.update(set(group['params'])) + + return not param.isdisjoint(param_set) + + def add_params(self, params, module, prefix='', is_dcn_module=None): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + prefix (str): The prefix of the module + is_dcn_module (int|float|None): If the current module is a + submodule of DCN, `is_dcn_module` will be passed to + control conv_offset layer's learning rate. Defaults to None. + """ + # get param-wise options + custom_keys = self.paramwise_cfg.get('custom_keys', {}) + # first sort with alphabet order and then sort with reversed len of str + sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) + + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.) + bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.) + + # special rules for norm layers and depth-wise conv layers + is_norm = isinstance(module, + (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = ( + isinstance(module, torch.nn.Conv2d) + and module.in_channels == module.groups) + + for name, param in module.named_parameters(recurse=False): + param_group = {'params': [param]} + if not param.requires_grad: + params.append(param_group) + continue + if bypass_duplicate and self._is_in(param_group, params): + warnings.warn(f'{prefix} is duplicate. It is skipped since ' + f'bypass_duplicate={bypass_duplicate}') + continue + # if the parameter match one of the custom keys, ignore other rules + is_custom = False + for key in sorted_keys: + if key in f'{prefix}.{name}': + is_custom = True + lr_mult = custom_keys[key].get('lr_mult', 1.) + param_group['lr'] = self.base_lr * lr_mult + if self.base_wd is not None: + decay_mult = custom_keys[key].get('decay_mult', 1.) + param_group['weight_decay'] = self.base_wd * decay_mult + break + + if not is_custom: + # bias_lr_mult affects all bias parameters + # except for norm.bias dcn.conv_offset.bias + if name == 'bias' and not (is_norm or is_dcn_module): + param_group['lr'] = self.base_lr * bias_lr_mult + + if (prefix.find('conv_offset') != -1 and is_dcn_module + and isinstance(module, torch.nn.Conv2d)): + # deal with both dcn_offset's bias & weight + param_group['lr'] = self.base_lr * dcn_offset_lr_mult + + # apply weight decay policies + if self.base_wd is not None: + # norm decay + if is_norm: + param_group[ + 'weight_decay'] = self.base_wd * norm_decay_mult + # depth-wise conv + elif is_dwconv: + param_group[ + 'weight_decay'] = self.base_wd * dwconv_decay_mult + # bias lr and decay + elif name == 'bias' and not is_dcn_module: + # TODO: current bias_decay_mult will have affect on DCN + param_group[ + 'weight_decay'] = self.base_wd * bias_decay_mult + params.append(param_group) + + if check_ops_exist(): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import DeformConv2d, ModulatedDeformConv2d + is_dcn_module = isinstance(module, + (DeformConv2d, ModulatedDeformConv2d)) + else: + is_dcn_module = False + for child_name, child_mod in module.named_children(): + child_prefix = f'{prefix}.{child_name}' if prefix else child_name + self.add_params( + params, + child_mod, + prefix=child_prefix, + is_dcn_module=is_dcn_module) + + def __call__(self, model): + if hasattr(model, 'module'): + model = model.module + + optimizer_cfg = self.optimizer_cfg.copy() + # if no paramwise option is specified, just use the global setting + if not self.paramwise_cfg: + optimizer_cfg['params'] = model.parameters() + return build_from_cfg(optimizer_cfg, OPTIMIZERS) + + # set param-wise lr and weight decay recursively + params = [] + self.add_params(params, model) + optimizer_cfg['params'] = params + + return build_from_cfg(optimizer_cfg, OPTIMIZERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py new file mode 100644 index 000000000000..64cc4e3a05f8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + + +class Priority(Enum): + """Hook priority levels. + + +--------------+------------+ + | Level | Value | + +==============+============+ + | HIGHEST | 0 | + +--------------+------------+ + | VERY_HIGH | 10 | + +--------------+------------+ + | HIGH | 30 | + +--------------+------------+ + | ABOVE_NORMAL | 40 | + +--------------+------------+ + | NORMAL | 50 | + +--------------+------------+ + | BELOW_NORMAL | 60 | + +--------------+------------+ + | LOW | 70 | + +--------------+------------+ + | VERY_LOW | 90 | + +--------------+------------+ + | LOWEST | 100 | + +--------------+------------+ + """ + + HIGHEST = 0 + VERY_HIGH = 10 + HIGH = 30 + ABOVE_NORMAL = 40 + NORMAL = 50 + BELOW_NORMAL = 60 + LOW = 70 + VERY_LOW = 90 + LOWEST = 100 + + +def get_priority(priority): + """Get priority value. + + Args: + priority (int or str or :obj:`Priority`): Priority. + + Returns: + int: The priority value. + """ + if isinstance(priority, int): + if priority < 0 or priority > 100: + raise ValueError('priority must be between 0 and 100') + return priority + elif isinstance(priority, Priority): + return priority.value + elif isinstance(priority, str): + return Priority[priority.upper()].value + else: + raise TypeError('priority must be an integer or Priority enum value') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py new file mode 100644 index 000000000000..9680d73032bb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py @@ -0,0 +1,93 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import random +import sys +import time +import warnings +from getpass import getuser +from socket import gethostname + +import numpy as np +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def get_host_info(): + """Get hostname and username. + + Return empty string if exception raised, e.g. ``getpass.getuser()`` will + lead to error in docker container + """ + host = '' + try: + host = f'{getuser()}@{gethostname()}' + except Exception as e: + warnings.warn(f'Host or user not found: {str(e)}') + finally: + return host + + +def get_time_str(): + return time.strftime('%Y%m%d_%H%M%S', time.localtime()) + + +def obj_from_dict(info, parent=None, default_args=None): + """Initialize an object from dict. + + The dict must contain the key "type", which indicates the object type, it + can be either a string or type, such as "list" or ``list``. Remaining + fields are treated as the arguments for constructing the object. + + Args: + info (dict): Object types and arguments. + parent (:class:`module`): Module which may containing expected object + classes. + default_args (dict, optional): Default arguments for initializing the + object. + + Returns: + any type: Object built from the dict. + """ + assert isinstance(info, dict) and 'type' in info + assert isinstance(default_args, dict) or default_args is None + args = info.copy() + obj_type = args.pop('type') + if mmcv.is_str(obj_type): + if parent is not None: + obj_type = getattr(parent, obj_type) + else: + obj_type = sys.modules[obj_type] + elif not isinstance(obj_type, type): + raise TypeError('type must be a str or valid type, but ' + f'got {type(obj_type)}') + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + return obj_type(**args) + + +def set_random_seed(seed, deterministic=False, use_rank_shift=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + rank_shift (bool): Whether to add rank number to the random seed to + have different random seed in different threads. Default: False. + """ + if use_rank_shift: + rank, _ = mmcv.runner.get_dist_info() + seed += rank + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py new file mode 100644 index 000000000000..378a0068432a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py @@ -0,0 +1,69 @@ +# flake8: noqa +# Copyright (c) OpenMMLab. All rights reserved. +from .config import Config, ConfigDict, DictAction +from .misc import (check_prerequisites, concat_list, deprecated_api_warning, + has_method, import_modules_from_strings, is_list_of, + is_method_overridden, is_seq_of, is_str, is_tuple_of, + iter_cast, list_cast, requires_executable, requires_package, + slice_list, to_1tuple, to_2tuple, to_3tuple, to_4tuple, + to_ntuple, tuple_cast) +from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist, + scandir, symlink) +from .progressbar import (ProgressBar, track_iter_progress, + track_parallel_progress, track_progress) +from .testing import (assert_attrs_equal, assert_dict_contains_subset, + assert_dict_has_keys, assert_is_norm_layer, + assert_keys_equal, assert_params_all_zeros, + check_python_script) +from .timer import Timer, TimerError, check_time +from .version_utils import digit_version, get_git_hash + +try: + import torch +except ImportError: + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast', + 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', + 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', + 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', + 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', + 'track_progress', 'track_iter_progress', 'track_parallel_progress', + 'Timer', 'TimerError', 'check_time', 'deprecated_api_warning', + 'digit_version', 'get_git_hash', 'import_modules_from_strings', + 'assert_dict_contains_subset', 'assert_attrs_equal', + 'assert_dict_has_keys', 'assert_keys_equal', 'check_python_script', + 'to_1tuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'to_ntuple', + 'is_method_overridden', 'has_method' + ] +else: + from .env import collect_env + from .logging import get_logger, print_log + from .parrots_jit import jit, skip_no_elena + from .parrots_wrapper import ( + TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, DataLoader, + PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, + _AvgPoolNd, _BatchNorm, _ConvNd, _ConvTransposeMixin, _InstanceNorm, + _MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home) + from .registry import Registry, build_from_cfg + from .trace import is_jit_tracing + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger', + 'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', + 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', + 'check_prerequisites', 'requires_package', 'requires_executable', + 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', + 'symlink', 'scandir', 'ProgressBar', 'track_progress', + 'track_iter_progress', 'track_parallel_progress', 'Registry', + 'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'SyncBatchNorm', + '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', '_AvgPoolNd', '_BatchNorm', + '_ConvNd', '_ConvTransposeMixin', '_InstanceNorm', '_MaxPoolNd', + 'get_build_config', 'BuildExtension', 'CppExtension', 'CUDAExtension', + 'DataLoader', 'PoolDataLoader', 'TORCH_VERSION', + 'deprecated_api_warning', 'digit_version', 'get_git_hash', + 'import_modules_from_strings', 'jit', 'skip_no_elena', + 'assert_dict_contains_subset', 'assert_attrs_equal', + 'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer', + 'assert_params_all_zeros', 'check_python_script', + 'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch', + '_get_cuda_home', 'has_method' + ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py new file mode 100644 index 000000000000..a699e4d0230d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py @@ -0,0 +1,688 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == 'Windows': + import regex as re +else: + import re + +BASE_KEY = '_base_' +DELETE_KEY = '_delete_' +DEPRECATION_KEY = '_deprecation_' +RESERVED_KEYS = ['filename', 'text', 'pretty_text'] + + +class ConfigDict(Dict): + + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError(f"'{self.__class__.__name__}' object has no " + f"attribute '{name}'") + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=''): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument('--' + prefix + k) + elif isinstance(v, int): + parser.add_argument('--' + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument('--' + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument('--' + prefix + k, action='store_true') + elif isinstance(v, dict): + add_args(parser, v, prefix + k + '.') + elif isinstance(v, abc.Iterable): + parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') + else: + print(f'cannot parse key {prefix + k} of type {type(v)}') + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError('There are syntax errors in config ' + f'file {filename}: {e}') + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname) + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' + value = value.replace('\\', '/') + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}' + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' + base_var_dict[randstr] = base_var + regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split('.'): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars( + v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple( + Config._substitute_base_vars(c, base_var_dict, base_cfg) + for c in cfg) + elif isinstance(cfg, list): + cfg = [ + Config._substitute_base_vars(c, base_var_dict, base_cfg) + for c in cfg + ] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split('.'): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname) + if platform.system() == 'Windows': + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, + temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars( + temp_config_file.name, temp_config_file.name) + + if filename.endswith('.py'): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith('__') + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith(('.yml', '.yaml', '.json')): + import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + cfg_dict = mmcv.load(temp_config_file.name) + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = f'The config file {filename} will be deprecated ' \ + 'in the future.' + if 'expected' in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' \ + 'instead.' + if 'reference' in deprecation_info: + warning_msg += ' More information can be found at ' \ + f'{deprecation_info["reference"]}' + warnings.warn(warning_msg) + + cfg_text = filename + '\n' + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = base_filename if isinstance( + base_filename, list) else [base_filename] + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError('Duplicate key is not allowed among bases. ' + f'Duplicate keys: {duplicate_keys}') + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, + base_cfg_dict) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = '\n'.join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f'Index {k} exceeds the length of list {b}') + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, + dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f'{k}={v} in child config cannot inherit from base ' + f'because {k} is a dict in the child config but is of ' + f'type {type(b[k])} in base config. You may set ' + f'`{DELETE_KEY}=True` to ignore the base config') + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, + use_predefined_variables=True, + import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, + use_predefined_variables) + if import_custom_modules and cfg_dict.get('custom_imports', None): + import_modules_from_strings(**cfg_dict['custom_imports']) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + if file_format != '.py' and 'dict(' in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn( + 'Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile( + 'w', encoding='utf-8', suffix=file_format, + delete=False) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument('config', help='config file path') + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument('config', help='config file path') + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError('cfg_dict must be a dict, but ' + f'got {type(cfg_dict)}') + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f'{key} is reserved for config file') + + super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) + super(Config, self).__setattr__('_filename', filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, 'r') as f: + text = f.read() + else: + text = '' + super(Config, self).__setattr__('_text', text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split('\n') + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * ' ') + line for line in s] + s = '\n'.join(s) + s = first + '\n' + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = '[\n' + v_str += '\n'.join( + f'dict({_indent(_format_dict(v_), indent)}),' + for v_ in v).rstrip(',') + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + ']' + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= \ + (not str(key_name).isidentifier()) + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = '' + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += '{' + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = '' if outest_level or is_last else ',' + if isinstance(v, dict): + v_str = '\n' + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: dict({v_str}' + else: + attr_str = f'{str(k)}=dict({v_str}' + attr_str = _indent(attr_str, indent) + ')' + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += '\n'.join(s) + if use_mapping: + r += '}' + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style='pep8', + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__('_cfg_dict', _cfg_dict) + super(Config, self).__setattr__('_filename', _filename) + super(Config, self).__setattr__('_text', _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() + if self.filename.endswith('.py'): + if file is None: + return self.pretty_text + else: + with open(file, 'w', encoding='utf-8') as f: + f.write(self.pretty_text) + else: + import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + if file is None: + file_format = self.filename.split('.')[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'model.backbone.depth': 50, + ... 'model.backbone.with_cp':True} + >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... model=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split('.') + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + super(Config, self).__setattr__( + '_cfg_dict', + Config._merge_a_into_b( + option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return True if val.lower() == 'true' else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count('(') == string.count(')')) and ( + string.count('[') == string.count(']')), \ + f'Imbalanced brackets exist in {string}' + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ((char == ',') and (pre.count('(') == pre.count(')')) + and (pre.count('[') == pre.count(']'))): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip('\'\"').replace(' ', '') + is_tuple = False + if val.startswith('(') and val.endswith(')'): + is_tuple = True + val = val[1:-1] + elif val.startswith('[') and val.endswith(']'): + val = val[1:-1] + elif ',' not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1:] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py new file mode 100644 index 000000000000..97f99de9680c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py @@ -0,0 +1,95 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""This file holding some environment constant for sharing by other files.""" + +import os.path as osp +import subprocess +import sys +from collections import defaultdict + +import cv2 +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .parrots_wrapper import get_build_config + + +def collect_env(): + """Collect the information of the running environments. + + Returns: + dict: The environment information. The following fields are contained. + + - sys.platform: The variable of ``sys.platform``. + - Python: Python version. + - CUDA available: Bool, indicating if CUDA is available. + - GPU devices: Device type of each GPU. + - CUDA_HOME (optional): The env var ``CUDA_HOME``. + - NVCC (optional): NVCC version. + - GCC: GCC version, "n/a" if GCC is not installed. + - PyTorch: PyTorch version. + - PyTorch compiling details: The output of \ + ``torch.__config__.show()``. + - TorchVision (optional): TorchVision version. + - OpenCV: OpenCV version. + - MMCV: MMCV version. + - MMCV Compiler: The GCC version for compiling MMCV ops. + - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. + """ + env_info = {} + env_info['sys.platform'] = sys.platform + env_info['Python'] = sys.version.replace('\n', '') + + cuda_available = torch.cuda.is_available() + env_info['CUDA available'] = cuda_available + + if cuda_available: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + devices[torch.cuda.get_device_name(k)].append(str(k)) + for name, device_ids in devices.items(): + env_info['GPU ' + ','.join(device_ids)] = name + + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _get_cuda_home + CUDA_HOME = _get_cuda_home() + env_info['CUDA_HOME'] = CUDA_HOME + + if CUDA_HOME is not None and osp.isdir(CUDA_HOME): + try: + nvcc = osp.join(CUDA_HOME, 'bin/nvcc') + nvcc = subprocess.check_output( + f'"{nvcc}" -V | tail -n1', shell=True) + nvcc = nvcc.decode('utf-8').strip() + except subprocess.SubprocessError: + nvcc = 'Not Available' + env_info['NVCC'] = nvcc + + try: + gcc = subprocess.check_output('gcc --version | head -n1', shell=True) + gcc = gcc.decode('utf-8').strip() + env_info['GCC'] = gcc + except subprocess.CalledProcessError: # gcc is unavailable + env_info['GCC'] = 'n/a' + + env_info['PyTorch'] = torch.__version__ + env_info['PyTorch compiling details'] = get_build_config() + + try: + import torchvision + env_info['TorchVision'] = torchvision.__version__ + except ModuleNotFoundError: + pass + + env_info['OpenCV'] = cv2.__version__ + + env_info['MMCV'] = mmcv.__version__ + + try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_compiler_version, get_compiling_cuda_version + except ModuleNotFoundError: + env_info['MMCV Compiler'] = 'n/a' + env_info['MMCV CUDA Compiler'] = 'n/a' + else: + env_info['MMCV Compiler'] = get_compiler_version() + env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() + + return env_info diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py new file mode 100644 index 000000000000..08132d2c1b9a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib +import os +import pkgutil +import warnings +from collections import namedtuple + +import torch + +if torch.__version__ != 'parrots': + + def load_ext(name, funcs): + ext = importlib.import_module('mmcv.' + name) + for fun in funcs: + assert hasattr(ext, fun), f'{fun} miss in module {name}' + return ext +else: + from parrots import extension + from parrots.base import ParrotsException + + has_return_value_ops = [ + 'nms', + 'softnms', + 'nms_match', + 'nms_rotated', + 'top_pool_forward', + 'top_pool_backward', + 'bottom_pool_forward', + 'bottom_pool_backward', + 'left_pool_forward', + 'left_pool_backward', + 'right_pool_forward', + 'right_pool_backward', + 'fused_bias_leakyrelu', + 'upfirdn2d', + 'ms_deform_attn_forward', + 'pixel_group', + 'contour_expand', + ] + + def get_fake_func(name, e): + + def fake_func(*args, **kwargs): + warnings.warn(f'{name} is not supported in parrots now') + raise e + + return fake_func + + def load_ext(name, funcs): + ExtModule = namedtuple('ExtModule', funcs) + ext_list = [] + lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + for fun in funcs: + try: + ext_fun = extension.load(fun, name, lib_dir=lib_root) + except ParrotsException as e: + if 'No element registered' not in e.message: + warnings.warn(e.message) + ext_fun = get_fake_func(fun, e) + ext_list.append(ext_fun) + else: + if fun in has_return_value_ops: + ext_list.append(ext_fun.op) + else: + ext_list.append(ext_fun.op_) + return ExtModule(*ext_list) + + +def check_ops_exist(): + ext_loader = pkgutil.find_loader('mmcv._ext') + return ext_loader is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py new file mode 100644 index 000000000000..4aa0e04bb9b3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py @@ -0,0 +1,110 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.distributed as dist + +logger_initialized = {} + + +def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): + """Initialize and get a logger by name. + + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + file_mode (str): The file mode used in opening log file. + Defaults to 'w'. + + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + # handle duplicate logs to the console + # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) + # to the root logger. As logger.propagate is True by default, this root + # level handler causes logging messages from rank>0 processes to + # unexpectedly show up on the console, creating much unwanted clutter. + # To fix this issue, we set the root logger's StreamHandler, if any, to log + # at the ERROR level. + for handler in logger.root.handlers: + if type(handler) is logging.StreamHandler: + handler.setLevel(logging.ERROR) + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + # only rank 0 will add a FileHandler + if rank == 0 and log_file is not None: + # Here, the default behaviour of the official logger is 'a'. Thus, we + # provide an interface to change the file mode to the default + # behaviour. + file_handler = logging.FileHandler(log_file, file_mode) + handlers.append(file_handler) + + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + if rank == 0: + logger.setLevel(log_level) + else: + logger.setLevel(logging.ERROR) + + logger_initialized[name] = True + + return logger + + +def print_log(msg, logger=None, level=logging.INFO): + """Print a log message. + + Args: + msg (str): The message to be logged. + logger (logging.Logger | str | None): The logger to be used. + Some special loggers are: + - "silent": no message will be printed. + - other str: the logger obtained with `get_root_logger(logger)`. + - None: The `print()` method will be used to print log messages. + level (int): Logging level. Only available when `logger` is a Logger + object or "root". + """ + if logger is None: + print(msg) + elif isinstance(logger, logging.Logger): + logger.log(level, msg) + elif logger == 'silent': + pass + elif isinstance(logger, str): + _logger = get_logger(logger) + _logger.log(level, msg) + else: + raise TypeError( + 'logger should be either a logging.Logger object, str, ' + f'"silent" or None, but got {type(logger)}') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py new file mode 100644 index 000000000000..2c58d0d7fee9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py @@ -0,0 +1,377 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections.abc +import functools +import itertools +import subprocess +import warnings +from collections import abc +from importlib import import_module +from inspect import getfullargspec +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError( + f'custom_imports must be a list but got type {type(imports)}') + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError( + f'{imp} is of type {type(imp)} and cannot be imported.') + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f'{imp} failed to import and is ignored.', + UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +def iter_cast(inputs, dst_type, return_type=None): + """Cast elements of an iterable object into some type. + + Args: + inputs (Iterable): The input object. + dst_type (type): Destination type. + return_type (type, optional): If specified, the output object will be + converted to this type, otherwise an iterator. + + Returns: + iterator or specified type: The converted object. + """ + if not isinstance(inputs, abc.Iterable): + raise TypeError('inputs must be an iterable object') + if not isinstance(dst_type, type): + raise TypeError('"dst_type" must be a valid type') + + out_iterable = map(dst_type, inputs) + + if return_type is None: + return out_iterable + else: + return return_type(out_iterable) + + +def list_cast(inputs, dst_type): + """Cast elements of an iterable object into a list of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=list) + + +def tuple_cast(inputs, dst_type): + """Cast elements of an iterable object into a tuple of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=tuple) + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_list_of(seq, expected_type): + """Check whether it is a list of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=list) + + +def is_tuple_of(seq, expected_type): + """Check whether it is a tuple of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=tuple) + + +def slice_list(in_list, lens): + """Slice a list into several sub lists by a list of given length. + + Args: + in_list (list): The list to be sliced. + lens(int or list): The expected length of each out list. + + Returns: + list: A list of sliced list. + """ + if isinstance(lens, int): + assert len(in_list) % lens == 0 + lens = [lens] * int(len(in_list) / lens) + if not isinstance(lens, list): + raise TypeError('"indices" must be an integer or a list of integers') + elif sum(lens) != len(in_list): + raise ValueError('sum of lens and list length does not ' + f'match: {sum(lens)} != {len(in_list)}') + out_list = [] + idx = 0 + for i in range(len(lens)): + out_list.append(in_list[idx:idx + lens[i]]) + idx += lens[i] + return out_list + + +def concat_list(in_list): + """Concatenate a list of list into a single list. + + Args: + in_list (list): The list of list to be merged. + + Returns: + list: The concatenated flat list. + """ + return list(itertools.chain(*in_list)) + + +def check_prerequisites( + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' + 'found, please install them first.'): # yapf: disable + """A decorator factory to check if prerequisites are satisfied. + + Args: + prerequisites (str of list[str]): Prerequisites to be checked. + checker (callable): The checker method that returns True if a + prerequisite is meet, False otherwise. + msg_tmpl (str): The message template with two variables. + + Returns: + decorator: A specific decorator. + """ + + def wrap(func): + + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + requirements = [prerequisites] if isinstance( + prerequisites, str) else prerequisites + missing = [] + for item in requirements: + if not checker(item): + missing.append(item) + if missing: + print(msg_tmpl.format(', '.join(missing), func.__name__)) + raise RuntimeError('Prerequisites not meet.') + else: + return func(*args, **kwargs) + + return wrapped_func + + return wrap + + +def _check_py_package(package): + try: + import_module(package) + except ImportError: + return False + else: + return True + + +def _check_executable(cmd): + if subprocess.call(f'which {cmd}', shell=True) != 0: + return False + else: + return True + + +def requires_package(prerequisites): + """A decorator to check if some python packages are installed. + + Example: + >>> @requires_package('numpy') + >>> func(arg1, args): + >>> return numpy.zeros(1) + array([0.]) + >>> @requires_package(['numpy', 'non_package']) + >>> func(arg1, args): + >>> return numpy.zeros(1) + ImportError + """ + return check_prerequisites(prerequisites, checker=_check_py_package) + + +def requires_executable(prerequisites): + """A decorator to check if some executable files are installed. + + Example: + >>> @requires_executable('ffmpeg') + >>> func(arg1, args): + >>> print(1) + 1 + """ + return check_prerequisites(prerequisites, checker=_check_executable) + + +def deprecated_api_warning(name_dict, cls_name=None): + """A decorator to check if some arguments are deprecate and try to replace + deprecate src_arg_name to dst_arg_name. + + Args: + name_dict(dict): + key (str): Deprecate argument names. + val (str): Expected argument names. + + Returns: + func: New function. + """ + + def api_warning_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get name of the function + func_name = old_func.__name__ + if cls_name is not None: + func_name = f'{cls_name}.{func_name}' + if args: + arg_names = args_info.args[:len(args)] + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in arg_names: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + arg_names[arg_names.index(src_arg_name)] = dst_arg_name + if kwargs: + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in kwargs: + + assert dst_arg_name not in kwargs, ( + f'The expected behavior is to replace ' + f'the deprecated key `{src_arg_name}` to ' + f'new key `{dst_arg_name}`, but got them ' + f'in the arguments at the same time, which ' + f'is confusing. `{src_arg_name} will be ' + f'deprecated in the future, please ' + f'use `{dst_arg_name}` instead.') + + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + kwargs[dst_arg_name] = kwargs.pop(src_arg_name) + + # apply converted arguments to the decorated method + output = old_func(*args, **kwargs) + return output + + return new_func + + return api_warning_wrapper + + +def is_method_overridden(method, base_class, derived_class): + """Check if a method of base class is overridden in derived class. + + Args: + method (str): the method name to check. + base_class (type): the class of the base class. + derived_class (type | Any): the class or instance of the derived class. + """ + assert isinstance(base_class, type), \ + "base_class doesn't accept instance, Please pass class instead." + + if not isinstance(derived_class, type): + derived_class = derived_class.__class__ + + base_method = getattr(base_class, method) + derived_method = getattr(derived_class, method) + return derived_method != base_method + + +def has_method(obj: object, method: str) -> bool: + """Check whether the object has a method. + + Args: + method (str): The method name to check. + obj (object): The object to check. + + Returns: + bool: True if the object has the method else False. + """ + return hasattr(obj, method) and callable(getattr(obj, method)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py new file mode 100644 index 000000000000..61873f6dbb9b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os + +from .parrots_wrapper import TORCH_VERSION + +parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') + +if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': + from parrots.jit import pat as jit +else: + + def jit(func=None, + check_input=None, + full_shape=True, + derivate=False, + coderize=False, + optimize=False): + + def wrapper(func): + + def wrapper_inner(*args, **kargs): + return func(*args, **kargs) + + return wrapper_inner + + if func is None: + return wrapper + else: + return func + + +if TORCH_VERSION == 'parrots': + from parrots.utils.tester import skip_no_elena +else: + + def skip_no_elena(func): + + def wrapper(*args, **kargs): + return func(*args, **kargs) + + return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py new file mode 100644 index 000000000000..93c97640d4b9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py @@ -0,0 +1,107 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from functools import partial + +import torch + +TORCH_VERSION = torch.__version__ + + +def is_rocm_pytorch() -> bool: + is_rocm = False + if TORCH_VERSION != 'parrots': + try: + from torch.utils.cpp_extension import ROCM_HOME + is_rocm = True if ((torch.version.hip is not None) and + (ROCM_HOME is not None)) else False + except ImportError: + pass + return is_rocm + + +def _get_cuda_home(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import CUDA_HOME + else: + if is_rocm_pytorch(): + from torch.utils.cpp_extension import ROCM_HOME + CUDA_HOME = ROCM_HOME + else: + from torch.utils.cpp_extension import CUDA_HOME + return CUDA_HOME + + +def get_build_config(): + if TORCH_VERSION == 'parrots': + from parrots.config import get_build_info + return get_build_info() + else: + return torch.__config__.show() + + +def _get_conv(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin + else: + from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin + return _ConvNd, _ConvTransposeMixin + + +def _get_dataloader(): + if TORCH_VERSION == 'parrots': + from torch.utils.data import DataLoader, PoolDataLoader + else: + from torch.utils.data import DataLoader + PoolDataLoader = DataLoader + return DataLoader, PoolDataLoader + + +def _get_extension(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import BuildExtension, Extension + CppExtension = partial(Extension, cuda=False) + CUDAExtension = partial(Extension, cuda=True) + else: + from torch.utils.cpp_extension import (BuildExtension, CppExtension, + CUDAExtension) + return BuildExtension, CppExtension, CUDAExtension + + +def _get_pool(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + else: + from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + + +def _get_norm(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm2d + else: + from torch.nn.modules.instancenorm import _InstanceNorm + from torch.nn.modules.batchnorm import _BatchNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm + return _BatchNorm, _InstanceNorm, SyncBatchNorm_ + + +_ConvNd, _ConvTransposeMixin = _get_conv() +DataLoader, PoolDataLoader = _get_dataloader() +BuildExtension, CppExtension, CUDAExtension = _get_extension() +_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() +_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() + + +class SyncBatchNorm(SyncBatchNorm_): + + def _check_input_dim(self, input): + if TORCH_VERSION == 'parrots': + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input (got {input.dim()}D input)') + else: + super()._check_input_dim(input) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py new file mode 100644 index 000000000000..7dab4b304141 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError('`filepath` should be a string or a Path') + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == '': + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = suffix.lower() if isinstance(suffix, str) else tuple( + item.lower() for item in suffix) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, + case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=('.git', )): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py new file mode 100644 index 000000000000..0062f670dd94 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py @@ -0,0 +1,208 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import sys +from collections.abc import Iterable +from multiprocessing import Pool +from shutil import get_terminal_size + +from .timer import Timer + + +class ProgressBar: + """A progress bar which can print the progress.""" + + def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): + self.task_num = task_num + self.bar_width = bar_width + self.completed = 0 + self.file = file + if start: + self.start() + + @property + def terminal_width(self): + width, _ = get_terminal_size() + return width + + def start(self): + if self.task_num > 0: + self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' + 'elapsed: 0s, ETA:') + else: + self.file.write('completed: 0, elapsed: 0s') + self.file.flush() + self.timer = Timer() + + def update(self, num_tasks=1): + assert num_tasks > 0 + self.completed += num_tasks + elapsed = self.timer.since_start() + if elapsed > 0: + fps = self.completed / elapsed + else: + fps = float('inf') + if self.task_num > 0: + percentage = self.completed / float(self.task_num) + eta = int(elapsed * (1 - percentage) / percentage + 0.5) + msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ + f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ + f'ETA: {eta:5}s' + + bar_width = min(self.bar_width, + int(self.terminal_width - len(msg)) + 2, + int(self.terminal_width * 0.6)) + bar_width = max(2, bar_width) + mark_width = int(bar_width * percentage) + bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) + self.file.write(msg.format(bar_chars)) + else: + self.file.write( + f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' + f' {fps:.1f} tasks/s') + self.file.flush() + + +def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): + """Track the progress of tasks execution with a progress bar. + + Tasks are done with a simple for-loop. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + results = [] + for task in tasks: + results.append(func(task, **kwargs)) + prog_bar.update() + prog_bar.file.write('\n') + return results + + +def init_pool(process_num, initializer=None, initargs=None): + if initializer is None: + return Pool(process_num) + elif initargs is None: + return Pool(process_num, initializer) + else: + if not isinstance(initargs, tuple): + raise TypeError('"initargs" must be a tuple') + return Pool(process_num, initializer, initargs) + + +def track_parallel_progress(func, + tasks, + nproc, + initializer=None, + initargs=None, + bar_width=50, + chunksize=1, + skip_first=False, + keep_order=True, + file=sys.stdout): + """Track the progress of parallel task execution with a progress bar. + + The built-in :mod:`multiprocessing` module is used for process pools and + tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + nproc (int): Process (worker) number. + initializer (None or callable): Refer to :class:`multiprocessing.Pool` + for details. + initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for + details. + chunksize (int): Refer to :class:`multiprocessing.Pool` for details. + bar_width (int): Width of progress bar. + skip_first (bool): Whether to skip the first sample for each worker + when estimating fps, since the initialization step may takes + longer. + keep_order (bool): If True, :func:`Pool.imap` is used, otherwise + :func:`Pool.imap_unordered` is used. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + pool = init_pool(nproc, initializer, initargs) + start = not skip_first + task_num -= nproc * chunksize * int(skip_first) + prog_bar = ProgressBar(task_num, bar_width, start, file=file) + results = [] + if keep_order: + gen = pool.imap(func, tasks, chunksize) + else: + gen = pool.imap_unordered(func, tasks, chunksize) + for result in gen: + results.append(result) + if skip_first: + if len(results) < nproc * chunksize: + continue + elif len(results) == nproc * chunksize: + prog_bar.start() + continue + prog_bar.update() + prog_bar.file.write('\n') + pool.close() + pool.join() + return results + + +def track_iter_progress(tasks, bar_width=50, file=sys.stdout): + """Track the progress of tasks iteration or enumeration with a progress + bar. + + Tasks are yielded with a simple for-loop. + + Args: + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Yields: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + for task in tasks: + yield task + prog_bar.update() + prog_bar.file.write('\n') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py new file mode 100644 index 000000000000..fa9df39bc9f3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py @@ -0,0 +1,315 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from config dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' + f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' + f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError( + f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a str or valid type, but got {type(obj_type)}') + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f'{obj_cls.__name__}: {e}') + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + \ + f'(name={self._name}, ' \ + f'items={self._module_dict})' + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split('.') + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find('.') + if split_index != -1: + return key[:split_index], key[split_index + 1:] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert registry.scope not in self.children, \ + f'scope {registry.scope} exists in {self.name} registry' + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError('module must be a class, ' + f'but got {type(module_class)}') + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f'{name} is already registered ' + f'in {self.name}') + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.') + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + 'name must be either of None, an instance of str or a sequence' + f' of str, but got {type(name)}') + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module( + module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module( + module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py new file mode 100644 index 000000000000..a27f936da8ec --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py @@ -0,0 +1,140 @@ +# Copyright (c) Open-MMLab. +import sys +from collections.abc import Iterable +from runpy import run_path +from shlex import split +from typing import Any, Dict, List +from unittest.mock import patch + + +def check_python_script(cmd): + """Run the python cmd script with `__main__`. The difference between + `os.system` is that, this function exectues code in the current process, so + that it can be tracked by coverage tools. Currently it supports two forms: + + - ./tests/data/scripts/hello.py zz + - python tests/data/scripts/hello.py zz + """ + args = split(cmd) + if args[0] == 'python': + args = args[1:] + with patch.object(sys, 'argv', args): + run_path(args[0], run_name='__main__') + + +def _any(judge_result): + """Since built-in ``any`` works only when the element of iterable is not + iterable, implement the function.""" + if not isinstance(judge_result, Iterable): + return judge_result + + try: + for element in judge_result: + if _any(element): + return True + except TypeError: + # Maybe encounter the case: torch.tensor(True) | torch.tensor(False) + if judge_result: + return True + return False + + +def assert_dict_contains_subset(dict_obj: Dict[Any, Any], + expected_subset: Dict[Any, Any]) -> bool: + """Check if the dict_obj contains the expected_subset. + + Args: + dict_obj (Dict[Any, Any]): Dict object to be checked. + expected_subset (Dict[Any, Any]): Subset expected to be contained in + dict_obj. + + Returns: + bool: Whether the dict_obj contains the expected_subset. + """ + + for key, value in expected_subset.items(): + if key not in dict_obj.keys() or _any(dict_obj[key] != value): + return False + return True + + +def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: + """Check if attribute of class object is correct. + + Args: + obj (object): Class object to be checked. + expected_attrs (Dict[str, Any]): Dict of the expected attrs. + + Returns: + bool: Whether the attribute of class object is correct. + """ + for attr, value in expected_attrs.items(): + if not hasattr(obj, attr) or _any(getattr(obj, attr) != value): + return False + return True + + +def assert_dict_has_keys(obj: Dict[str, Any], + expected_keys: List[str]) -> bool: + """Check if the obj has all the expected_keys. + + Args: + obj (Dict[str, Any]): Object to be checked. + expected_keys (List[str]): Keys expected to contained in the keys of + the obj. + + Returns: + bool: Whether the obj has the expected keys. + """ + return set(expected_keys).issubset(set(obj.keys())) + + +def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool: + """Check if target_keys is equal to result_keys. + + Args: + result_keys (List[str]): Result keys to be checked. + target_keys (List[str]): Target keys to be checked. + + Returns: + bool: Whether target_keys is equal to result_keys. + """ + return set(result_keys) == set(target_keys) + + +def assert_is_norm_layer(module) -> bool: + """Check if the module is a norm layer. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the module is a norm layer. + """ + from .parrots_wrapper import _BatchNorm, _InstanceNorm + from torch.nn import GroupNorm, LayerNorm + norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) + return isinstance(module, norm_layer_candidates) + + +def assert_params_all_zeros(module) -> bool: + """Check if the parameters of the module is all zeros. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the parameters of the module is all zeros. + """ + weight_data = module.weight.data + is_weight_zero = weight_data.allclose( + weight_data.new_zeros(weight_data.size())) + + if hasattr(module, 'bias') and module.bias is not None: + bias_data = module.bias.data + is_bias_zero = bias_data.allclose( + bias_data.new_zeros(bias_data.size())) + else: + is_bias_zero = True + + return is_weight_zero and is_bias_zero diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py new file mode 100644 index 000000000000..e01716a205e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from time import time + + +class TimerError(Exception): + + def __init__(self, message): + self.message = message + super(TimerError, self).__init__(message) + + +class Timer: + """A flexible Timer class. + + :Example: + + >>> import time + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> with mmcv.Timer(): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + 1.000 + >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + it takes 1.0 seconds + >>> timer = mmcv.Timer() + >>> time.sleep(0.5) + >>> print(timer.since_start()) + 0.500 + >>> time.sleep(0.5) + >>> print(timer.since_last_check()) + 0.500 + >>> print(timer.since_start()) + 1.000 + """ + + def __init__(self, start=True, print_tmpl=None): + self._is_running = False + self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' + if start: + self.start() + + @property + def is_running(self): + """bool: indicate whether the timer is running""" + return self._is_running + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + print(self.print_tmpl.format(self.since_last_check())) + self._is_running = False + + def start(self): + """Start the timer.""" + if not self._is_running: + self._t_start = time() + self._is_running = True + self._t_last = time() + + def since_start(self): + """Total time since the timer is started. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + self._t_last = time() + return self._t_last - self._t_start + + def since_last_check(self): + """Time since the last checking. + + Either :func:`since_start` or :func:`since_last_check` is a checking + operation. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + dur = time() - self._t_last + self._t_last = time() + return dur + + +_g_timers = {} # global timers + + +def check_time(timer_id): + """Add check points in a single line. + + This method is suitable for running a task on a list of items. A timer will + be registered when the method is called for the first time. + + :Example: + + >>> import time + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> for i in range(1, 6): + >>> # simulate a code block + >>> time.sleep(i) + >>> mmcv.check_time('task1') + 2.000 + 3.000 + 4.000 + 5.000 + + Args: + timer_id (str): Timer identifier. + """ + if timer_id not in _g_timers: + _g_timers[timer_id] = Timer() + return 0 + else: + return _g_timers[timer_id].since_last_check() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py new file mode 100644 index 000000000000..10702f29964f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py @@ -0,0 +1,23 @@ +import warnings + +import torch + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import digit_version + + +def is_jit_tracing() -> bool: + if (torch.__version__ != 'parrots' + and digit_version(torch.__version__) >= digit_version('1.6.0')): + on_trace = torch.jit.is_tracing() + # In PyTorch 1.6, torch.jit.is_tracing has a bug. + # Refers to https://github.com/pytorch/pytorch/issues/42448 + if isinstance(on_trace, bool): + return on_trace + else: + return torch._C._is_tracing() + else: + warnings.warn( + 'torch.jit.is_tracing is only supported after v1.6.0. ' + 'Therefore is_tracing returns False automatically. Please ' + 'set on_trace manually if you are using trace.', UserWarning) + return False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py new file mode 100644 index 000000000000..963c45a2e8a8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py @@ -0,0 +1,90 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import subprocess +import warnings + +from packaging.version import parse + + +def digit_version(version_str: str, length: int = 4): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. For pre-release + versions: alpha < beta < rc. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int]: The version info in digits (integers). + """ + assert 'parrots' not in version_str + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + mapping = {'a': -3, 'b': -2, 'rc': -1} + val = -4 + # version.pre can be None + if version.pre: + if version.pre[0] not in mapping: + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' + 'version checking may go wrong') + else: + val = mapping[version.pre[0]] + release.extend([val, version.pre[-1]]) + else: + release.extend([val, 0]) + + elif version.is_postrelease: + release.extend([1, version.post]) + else: + release.extend([0, 0]) + return tuple(release) + + +def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + +def get_git_hash(fallback='unknown', digits=None): + """Get the git hash of the current repo. + + Args: + fallback (str, optional): The fallback string when git hash is + unavailable. Defaults to 'unknown'. + digits (int, optional): kept digits of the hash. Defaults to None, + meaning all digits are kept. + + Returns: + str: Git commit hash. + """ + + if digits is not None and not isinstance(digits, int): + raise TypeError('digits must be None or an integer') + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + if digits is not None: + sha = sha[:digits] + except OSError: + sha = fallback + + return sha diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py new file mode 100644 index 000000000000..1cce4e50bd69 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +__version__ = '1.3.17' + + +def parse_version_info(version_str: str, length: int = 4) -> tuple: + """Parse a version string into a tuple. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int | str]: The version info, e.g., "1.3.0" is parsed into + (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into + (2, 0, 0, 0, 'rc', 1) (when length is set to 4). + """ + from packaging.version import parse + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + release.extend(list(version.pre)) + elif version.is_postrelease: + release.extend(list(version.post)) + else: + release.extend([0, 0]) + return tuple(release) + + +version_info = tuple(int(x) for x in __version__.split('.')[:3]) + +__all__ = ['__version__', 'version_info', 'parse_version_info'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py new file mode 100644 index 000000000000..73199b01dec5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .io import Cache, VideoReader, frames2video +from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread, + flowwrite, quantize_flow, sparse_flow_from_bytes) +from .processing import concat_video, convert_video, cut_video, resize_video + +__all__ = [ + 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', + 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', + 'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py new file mode 100644 index 000000000000..b2f81860e659 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py @@ -0,0 +1,318 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict + +import cv2 +from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT, + CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH, + CAP_PROP_POS_FRAMES, VideoWriter_fourcc) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import (check_file_exist, mkdir_or_exist, scandir, + track_progress) + + +class Cache: + + def __init__(self, capacity): + self._cache = OrderedDict() + self._capacity = int(capacity) + if capacity <= 0: + raise ValueError('capacity must be a positive integer') + + @property + def capacity(self): + return self._capacity + + @property + def size(self): + return len(self._cache) + + def put(self, key, val): + if key in self._cache: + return + if len(self._cache) >= self.capacity: + self._cache.popitem(last=False) + self._cache[key] = val + + def get(self, key, default=None): + val = self._cache[key] if key in self._cache else default + return val + + +class VideoReader: + """Video class with similar usage to a list object. + + This video warpper class provides convenient apis to access frames. + There exists an issue of OpenCV's VideoCapture class that jumping to a + certain frame may be inaccurate. It is fixed in this class by checking + the position after jumping each time. + Cache is used when decoding videos. So if the same frame is visited for + the second time, there is no need to decode again if it is stored in the + cache. + + :Example: + + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> v = mmcv.VideoReader('sample.mp4') + >>> len(v) # get the total frame number with `len()` + 120 + >>> for img in v: # v is iterable + >>> mmcv.imshow(img) + >>> v[5] # get the 6th frame + """ + + def __init__(self, filename, cache_capacity=10): + # Check whether the video path is a url + if not filename.startswith(('https://', 'http://')): + check_file_exist(filename, 'Video file not found: ' + filename) + self._vcap = cv2.VideoCapture(filename) + assert cache_capacity > 0 + self._cache = Cache(cache_capacity) + self._position = 0 + # get basic info + self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) + self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) + self._fps = self._vcap.get(CAP_PROP_FPS) + self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) + self._fourcc = self._vcap.get(CAP_PROP_FOURCC) + + @property + def vcap(self): + """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" + return self._vcap + + @property + def opened(self): + """bool: Indicate whether the video is opened.""" + return self._vcap.isOpened() + + @property + def width(self): + """int: Width of video frames.""" + return self._width + + @property + def height(self): + """int: Height of video frames.""" + return self._height + + @property + def resolution(self): + """tuple: Video resolution (width, height).""" + return (self._width, self._height) + + @property + def fps(self): + """float: FPS of the video.""" + return self._fps + + @property + def frame_cnt(self): + """int: Total frames of the video.""" + return self._frame_cnt + + @property + def fourcc(self): + """str: "Four character code" of the video.""" + return self._fourcc + + @property + def position(self): + """int: Current cursor position, indicating frame decoded.""" + return self._position + + def _get_real_position(self): + return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) + + def _set_real_position(self, frame_id): + self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) + pos = self._get_real_position() + for _ in range(frame_id - pos): + self._vcap.read() + self._position = frame_id + + def read(self): + """Read the next frame. + + If the next frame have been decoded before and in the cache, then + return it directly, otherwise decode, cache and return it. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + # pos = self._position + if self._cache: + img = self._cache.get(self._position) + if img is not None: + ret = True + else: + if self._position != self._get_real_position(): + self._set_real_position(self._position) + ret, img = self._vcap.read() + if ret: + self._cache.put(self._position, img) + else: + ret, img = self._vcap.read() + if ret: + self._position += 1 + return img + + def get_frame(self, frame_id): + """Get frame by index. + + Args: + frame_id (int): Index of the expected frame, 0-based. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + if frame_id < 0 or frame_id >= self._frame_cnt: + raise IndexError( + f'"frame_id" must be between 0 and {self._frame_cnt - 1}') + if frame_id == self._position: + return self.read() + if self._cache: + img = self._cache.get(frame_id) + if img is not None: + self._position = frame_id + 1 + return img + self._set_real_position(frame_id) + ret, img = self._vcap.read() + if ret: + if self._cache: + self._cache.put(self._position, img) + self._position += 1 + return img + + def current_frame(self): + """Get the current frame (frame that is just visited). + + Returns: + ndarray or None: If the video is fresh, return None, otherwise + return the frame. + """ + if self._position == 0: + return None + return self._cache.get(self._position - 1) + + def cvt2frames(self, + frame_dir, + file_start=0, + filename_tmpl='{:06d}.jpg', + start=0, + max_num=0, + show_progress=True): + """Convert a video to frame images. + + Args: + frame_dir (str): Output directory to store all the frame images. + file_start (int): Filenames will start from the specified number. + filename_tmpl (str): Filename template with the index as the + placeholder. + start (int): The starting frame index. + max_num (int): Maximum number of frames to be written. + show_progress (bool): Whether to show a progress bar. + """ + mkdir_or_exist(frame_dir) + if max_num == 0: + task_num = self.frame_cnt - start + else: + task_num = min(self.frame_cnt - start, max_num) + if task_num <= 0: + raise ValueError('start must be less than total frame number') + if start > 0: + self._set_real_position(start) + + def write_frame(file_idx): + img = self.read() + if img is None: + return + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + cv2.imwrite(filename, img) + + if show_progress: + track_progress(write_frame, range(file_start, + file_start + task_num)) + else: + for i in range(task_num): + write_frame(file_start + i) + + def __len__(self): + return self.frame_cnt + + def __getitem__(self, index): + if isinstance(index, slice): + return [ + self.get_frame(i) + for i in range(*index.indices(self.frame_cnt)) + ] + # support negative indexing + if index < 0: + index += self.frame_cnt + if index < 0: + raise IndexError('index out of range') + return self.get_frame(index) + + def __iter__(self): + self._set_real_position(0) + return self + + def __next__(self): + img = self.read() + if img is not None: + return img + else: + raise StopIteration + + next = __next__ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._vcap.release() + + +def frames2video(frame_dir, + video_file, + fps=30, + fourcc='XVID', + filename_tmpl='{:06d}.jpg', + start=0, + end=0, + show_progress=True): + """Read the frame images from a directory and join them as a video. + + Args: + frame_dir (str): The directory containing video frames. + video_file (str): Output filename. + fps (float): FPS of the output video. + fourcc (str): Fourcc of the output video, this should be compatible + with the output file type. + filename_tmpl (str): Filename template with the index as the variable. + start (int): Starting frame index. + end (int): Ending frame index. + show_progress (bool): Whether to show a progress bar. + """ + if end == 0: + ext = filename_tmpl.split('.')[-1] + end = len([name for name in scandir(frame_dir, ext)]) + first_file = osp.join(frame_dir, filename_tmpl.format(start)) + check_file_exist(first_file, 'The start frame not found: ' + first_file) + img = cv2.imread(first_file) + height, width = img.shape[:2] + resolution = (width, height) + vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, + resolution) + + def write_frame(file_idx): + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + img = cv2.imread(filename) + vwriter.write(img) + + if show_progress: + track_progress(write_frame, range(start, end)) + else: + for i in range(start, end): + write_frame(i) + vwriter.release() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py new file mode 100644 index 000000000000..1261e13f64f4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py @@ -0,0 +1,254 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import cv2 +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.arraymisc import dequantize, quantize +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str + + +def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): + """Read an optical flow map. + + Args: + flow_or_path (ndarray or str): A flow map or filepath. + quantize (bool): whether to read quantized pair, if set to True, + remaining args will be passed to :func:`dequantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + + Returns: + ndarray: Optical flow represented as a (h, w, 2) numpy array + """ + if isinstance(flow_or_path, np.ndarray): + if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): + raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') + return flow_or_path + elif not is_str(flow_or_path): + raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' + f'not {type(flow_or_path)}') + + if not quantize: + with open(flow_or_path, 'rb') as f: + try: + header = f.read(4).decode('utf-8') + except Exception: + raise IOError(f'Invalid flow file: {flow_or_path}') + else: + if header != 'PIEH': + raise IOError(f'Invalid flow file: {flow_or_path}, ' + 'header does not contain PIEH') + + w = np.fromfile(f, np.int32, 1).squeeze() + h = np.fromfile(f, np.int32, 1).squeeze() + flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) + else: + assert concat_axis in [0, 1] + cat_flow = imread(flow_or_path, flag='unchanged') + if cat_flow.ndim != 2: + raise IOError( + f'{flow_or_path} is not a valid quantized flow file, ' + f'its dimension is {cat_flow.ndim}.') + assert cat_flow.shape[concat_axis] % 2 == 0 + dx, dy = np.split(cat_flow, 2, axis=concat_axis) + flow = dequantize_flow(dx, dy, *args, **kwargs) + + return flow.astype(np.float32) + + +def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): + """Write optical flow to file. + + If the flow is not quantized, it will be saved as a .flo file losslessly, + otherwise a jpeg image which is lossy but of much smaller size. (dx and dy + will be concatenated horizontally into a single image if quantize is True.) + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + filename (str): Output filepath. + quantize (bool): Whether to quantize the flow and save it to 2 jpeg + images. If set to True, remaining args will be passed to + :func:`quantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + """ + if not quantize: + with open(filename, 'wb') as f: + f.write('PIEH'.encode('utf-8')) + np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) + flow = flow.astype(np.float32) + flow.tofile(f) + f.flush() + else: + assert concat_axis in [0, 1] + dx, dy = quantize_flow(flow, *args, **kwargs) + dxdy = np.concatenate((dx, dy), axis=concat_axis) + imwrite(dxdy, filename) + + +def quantize_flow(flow, max_val=0.02, norm=True): + """Quantize flow to [0, 255]. + + After this step, the size of flow will be much smaller, and can be + dumped as jpeg images. + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + max_val (float): Maximum value of flow, values beyond + [-max_val, max_val] will be truncated. + norm (bool): Whether to divide flow values by image width/height. + + Returns: + tuple[ndarray]: Quantized dx and dy. + """ + h, w, _ = flow.shape + dx = flow[..., 0] + dy = flow[..., 1] + if norm: + dx = dx / w # avoid inplace operations + dy = dy / h + # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. + flow_comps = [ + quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy] + ] + return tuple(flow_comps) + + +def dequantize_flow(dx, dy, max_val=0.02, denorm=True): + """Recover from quantized flow. + + Args: + dx (ndarray): Quantized dx. + dy (ndarray): Quantized dy. + max_val (float): Maximum value used when quantizing. + denorm (bool): Whether to multiply flow values with width/height. + + Returns: + ndarray: Dequantized flow. + """ + assert dx.shape == dy.shape + assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) + + dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] + + if denorm: + dx *= dx.shape[1] + dy *= dx.shape[0] + flow = np.dstack((dx, dy)) + return flow + + +def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): + """Use flow to warp img. + + Args: + img (ndarray, float or uint8): Image to be warped. + flow (ndarray, float): Optical Flow. + filling_value (int): The missing pixels will be set with filling_value. + interpolate_mode (str): bilinear -> Bilinear Interpolation; + nearest -> Nearest Neighbor. + + Returns: + ndarray: Warped image with the same shape of img + """ + warnings.warn('This function is just for prototyping and cannot ' + 'guarantee the computational efficiency.') + assert flow.ndim == 3, 'Flow must be in 3D arrays.' + height = flow.shape[0] + width = flow.shape[1] + channels = img.shape[2] + + output = np.ones( + (height, width, channels), dtype=img.dtype) * filling_value + + grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) + dx = grid[:, :, 0] + flow[:, :, 1] + dy = grid[:, :, 1] + flow[:, :, 0] + sx = np.floor(dx).astype(int) + sy = np.floor(dy).astype(int) + valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) + + if interpolate_mode == 'nearest': + output[valid, :] = img[dx[valid].round().astype(int), + dy[valid].round().astype(int), :] + elif interpolate_mode == 'bilinear': + # dirty walkround for integer positions + eps_ = 1e-6 + dx, dy = dx + eps_, dy + eps_ + left_top_ = img[np.floor(dx[valid]).astype(int), + np.floor(dy[valid]).astype(int), :] * ( + np.ceil(dx[valid]) - dx[valid])[:, None] * ( + np.ceil(dy[valid]) - dy[valid])[:, None] + left_down_ = img[np.ceil(dx[valid]).astype(int), + np.floor(dy[valid]).astype(int), :] * ( + dx[valid] - np.floor(dx[valid]))[:, None] * ( + np.ceil(dy[valid]) - dy[valid])[:, None] + right_top_ = img[np.floor(dx[valid]).astype(int), + np.ceil(dy[valid]).astype(int), :] * ( + np.ceil(dx[valid]) - dx[valid])[:, None] * ( + dy[valid] - np.floor(dy[valid]))[:, None] + right_down_ = img[np.ceil(dx[valid]).astype(int), + np.ceil(dy[valid]).astype(int), :] * ( + dx[valid] - np.floor(dx[valid]))[:, None] * ( + dy[valid] - np.floor(dy[valid]))[:, None] + output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ + else: + raise NotImplementedError( + 'We only support interpolation modes of nearest and bilinear, ' + f'but got {interpolate_mode}.') + return output.astype(img.dtype) + + +def flow_from_bytes(content): + """Read dense optical flow from bytes. + + .. note:: + This load optical flow function works for FlyingChairs, FlyingThings3D, + Sintel, FlyingChairsOcc datasets, but cannot load the data from + ChairsSDHom. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + ndarray: Loaded optical flow with the shape (H, W, 2). + """ + + # header in first 4 bytes + header = content[:4] + if header.decode('utf-8') != 'PIEH': + raise Exception('Flow file header does not contain PIEH') + # width in second 4 bytes + width = np.frombuffer(content[4:], np.int32, 1).squeeze() + # height in third 4 bytes + height = np.frombuffer(content[8:], np.int32, 1).squeeze() + # after first 12 bytes, all bytes are flow + flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape( + (height, width, 2)) + + return flow + + +def sparse_flow_from_bytes(content): + """Read the optical flow in KITTI datasets from bytes. + + This function is modified from RAFT load the `KITTI datasets + `_. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2) + and flow valid mask with the shape (H, W). + """ # nopa + + content = np.frombuffer(content, np.uint8) + flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) + flow = flow[:, :, ::-1].astype(np.float32) + # flow shape (H, W, 2) valid shape (H, W) + flow, valid = flow[:, :, :2], flow[:, :, 2] + flow = (flow - 2**15) / 64.0 + return flow, valid diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py new file mode 100644 index 000000000000..4e53b21b0788 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +import subprocess +import tempfile + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import requires_executable + + +@requires_executable('ffmpeg') +def convert_video(in_file, + out_file, + print_cmd=False, + pre_options='', + **kwargs): + """Convert a video with ffmpeg. + + This provides a general api to ffmpeg, the executed command is:: + + `ffmpeg -y -i ` + + Options(kwargs) are mapped to ffmpeg commands with the following rules: + + - key=val: "-key val" + - key=True: "-key" + - key=False: "" + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + pre_options (str): Options appears before "-i ". + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = [] + for k, v in kwargs.items(): + if isinstance(v, bool): + if v: + options.append(f'-{k}') + elif k == 'log_level': + assert v in [ + 'quiet', 'panic', 'fatal', 'error', 'warning', 'info', + 'verbose', 'debug', 'trace' + ] + options.append(f'-loglevel {v}') + else: + options.append(f'-{k} {v}') + cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \ + f'{out_file}' + if print_cmd: + print(cmd) + subprocess.call(cmd, shell=True) + + +@requires_executable('ffmpeg') +def resize_video(in_file, + out_file, + size=None, + ratio=None, + keep_ar=False, + log_level='info', + print_cmd=False): + """Resize a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). + ratio (tuple or float): Expected resize ratio, (2, 0.5) means + (w*2, h*0.5). + keep_ar (bool): Whether to keep original aspect ratio. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + if size is None and ratio is None: + raise ValueError('expected size or ratio must be specified') + if size is not None and ratio is not None: + raise ValueError('size and ratio cannot be specified at the same time') + options = {'log_level': log_level} + if size: + if not keep_ar: + options['vf'] = f'scale={size[0]}:{size[1]}' + else: + options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \ + 'force_original_aspect_ratio=decrease' + else: + if not isinstance(ratio, tuple): + ratio = (ratio, ratio) + options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def cut_video(in_file, + out_file, + start=None, + end=None, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Cut a clip from a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + start (None or float): Start time (in seconds). + end (None or float): End time (in seconds). + vcodec (None or str): Output video codec, None for unchanged. + acodec (None or str): Output audio codec, None for unchanged. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + if start: + options['ss'] = start + else: + start = 0 + if end: + options['t'] = end - start + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def concat_video(video_list, + out_file, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Concatenate multiple videos into a single one. + + Args: + video_list (list): A list of video filenames + out_file (str): Output video filename + vcodec (None or str): Output video codec, None for unchanged + acodec (None or str): Output audio codec, None for unchanged + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) + with open(tmp_filename, 'w') as f: + for filename in video_list: + f.write(f'file {osp.abspath(filename)}\n') + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + convert_video( + tmp_filename, + out_file, + print_cmd, + pre_options='-f concat -safe 0', + **options) + os.close(tmp_filehandler) + os.remove(tmp_filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py new file mode 100644 index 000000000000..835df136bdcf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .color import Color, color_val +from .image import imshow, imshow_bboxes, imshow_det_bboxes +from .optflow import flow2rgb, flowshow, make_color_wheel + +__all__ = [ + 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', + 'flowshow', 'flow2rgb', 'make_color_wheel' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py new file mode 100644 index 000000000000..9dd2d0deb9c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py @@ -0,0 +1,51 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str + + +class Color(Enum): + """An enum that defines common colors. + + Contains red, green, blue, cyan, yellow, magenta, white and black. + """ + red = (0, 0, 255) + green = (0, 255, 0) + blue = (255, 0, 0) + cyan = (255, 255, 0) + yellow = (0, 255, 255) + magenta = (255, 0, 255) + white = (255, 255, 255) + black = (0, 0, 0) + + +def color_val(color): + """Convert various input to color tuples. + + Args: + color (:obj:`Color`/str/tuple/int/ndarray): Color inputs + + Returns: + tuple[int]: A tuple of 3 integers indicating BGR channels. + """ + if is_str(color): + return Color[color].value + elif isinstance(color, Color): + return color.value + elif isinstance(color, tuple): + assert len(color) == 3 + for channel in color: + assert 0 <= channel <= 255 + return color + elif isinstance(color, int): + assert 0 <= color <= 255 + return color, color, color + elif isinstance(color, np.ndarray): + assert color.ndim == 1 and color.size == 3 + assert np.all((color >= 0) & (color <= 255)) + color = color.astype(np.uint8) + return tuple(color) + else: + raise TypeError(f'Invalid type for color: {type(color)}') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py new file mode 100644 index 000000000000..c3b0b61f99f9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py @@ -0,0 +1,152 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite +from .color import color_val + + +def imshow(img, win_name='', wait_time=0): + """Show an image. + + Args: + img (str or ndarray): The image to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + cv2.imshow(win_name, imread(img)) + if wait_time == 0: # prevent from hanging if windows was closed + while True: + ret = cv2.waitKey(1) + + closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 + # if user closed window or if some key pressed + if closed or ret != -1: + break + else: + ret = cv2.waitKey(wait_time) + + +def imshow_bboxes(img, + bboxes, + colors='green', + top_k=-1, + thickness=1, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (list or ndarray): A list of ndarray of shape (k, 4). + colors (list[str or tuple or Color]): A list of colors. + top_k (int): Plot the first k bboxes only if set positive. + thickness (int): Thickness of lines. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str, optional): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + img = imread(img) + img = np.ascontiguousarray(img) + + if isinstance(bboxes, np.ndarray): + bboxes = [bboxes] + if not isinstance(colors, list): + colors = [colors for _ in range(len(bboxes))] + colors = [color_val(c) for c in colors] + assert len(bboxes) == len(colors) + + for i, _bboxes in enumerate(bboxes): + _bboxes = _bboxes.astype(np.int32) + if top_k <= 0: + _top_k = _bboxes.shape[0] + else: + _top_k = min(top_k, _bboxes.shape[0]) + for j in range(_top_k): + left_top = (_bboxes[j, 0], _bboxes[j, 1]) + right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) + cv2.rectangle( + img, left_top, right_bottom, colors[i], thickness=thickness) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img + + +def imshow_det_bboxes(img, + bboxes, + labels, + class_names=None, + score_thr=0, + bbox_color='green', + text_color='green', + thickness=1, + font_scale=0.5, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes and class labels (with scores) on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or + (n, 5). + labels (ndarray): Labels of bboxes. + class_names (list[str]): Names of each classes. + score_thr (float): Minimum score of bboxes to be shown. + bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. + text_color (str or tuple or :obj:`Color`): Color of texts. + thickness (int): Thickness of lines. + font_scale (float): Font scales of texts. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str or None): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + assert bboxes.ndim == 2 + assert labels.ndim == 1 + assert bboxes.shape[0] == labels.shape[0] + assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 + img = imread(img) + img = np.ascontiguousarray(img) + + if score_thr > 0: + assert bboxes.shape[1] == 5 + scores = bboxes[:, -1] + inds = scores > score_thr + bboxes = bboxes[inds, :] + labels = labels[inds] + + bbox_color = color_val(bbox_color) + text_color = color_val(text_color) + + for bbox, label in zip(bboxes, labels): + bbox_int = bbox.astype(np.int32) + left_top = (bbox_int[0], bbox_int[1]) + right_bottom = (bbox_int[2], bbox_int[3]) + cv2.rectangle( + img, left_top, right_bottom, bbox_color, thickness=thickness) + label_text = class_names[ + label] if class_names is not None else f'cls {label}' + if len(bbox) > 4: + label_text += f'|{bbox[-1]:.02f}' + cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), + cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py new file mode 100644 index 000000000000..e958b90e4120 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from __future__ import division + +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import rgb2bgr +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.video import flowread +from .image import imshow + + +def flowshow(flow, win_name='', wait_time=0): + """Show optical flow. + + Args: + flow (ndarray or str): The optical flow to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + flow = flowread(flow) + flow_img = flow2rgb(flow) + imshow(rgb2bgr(flow_img), win_name, wait_time) + + +def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): + """Convert flow map to RGB image. + + Args: + flow (ndarray): Array of optical flow. + color_wheel (ndarray or None): Color wheel used to map flow field to + RGB colorspace. Default color wheel will be used if not specified. + unknown_thr (str): Values above this threshold will be marked as + unknown and thus ignored. + + Returns: + ndarray: RGB image that can be visualized. + """ + assert flow.ndim == 3 and flow.shape[-1] == 2 + if color_wheel is None: + color_wheel = make_color_wheel() + assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 + num_bins = color_wheel.shape[0] + + dx = flow[:, :, 0].copy() + dy = flow[:, :, 1].copy() + + ignore_inds = ( + np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | + (np.abs(dy) > unknown_thr)) + dx[ignore_inds] = 0 + dy[ignore_inds] = 0 + + rad = np.sqrt(dx**2 + dy**2) + if np.any(rad > np.finfo(float).eps): + max_rad = np.max(rad) + dx /= max_rad + dy /= max_rad + + rad = np.sqrt(dx**2 + dy**2) + angle = np.arctan2(-dy, -dx) / np.pi + + bin_real = (angle + 1) / 2 * (num_bins - 1) + bin_left = np.floor(bin_real).astype(int) + bin_right = (bin_left + 1) % num_bins + w = (bin_real - bin_left.astype(np.float32))[..., None] + flow_img = (1 - + w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] + small_ind = rad <= 1 + flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) + flow_img[np.logical_not(small_ind)] *= 0.75 + + flow_img[ignore_inds, :] = 0 + + return flow_img + + +def make_color_wheel(bins=None): + """Build a color wheel. + + Args: + bins(list or tuple, optional): Specify the number of bins for each + color range, corresponding to six ranges: red -> yellow, + yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, + magenta -> red. [15, 6, 4, 11, 13, 6] is used for default + (see Middlebury). + + Returns: + ndarray: Color wheel of shape (total_bins, 3). + """ + if bins is None: + bins = [15, 6, 4, 11, 13, 6] + assert len(bins) == 6 + + RY, YG, GC, CB, BM, MR = tuple(bins) + + ry = [1, np.arange(RY) / RY, 0] + yg = [1 - np.arange(YG) / YG, 1, 0] + gc = [0, 1, np.arange(GC) / GC] + cb = [0, 1 - np.arange(CB) / CB, 1] + bm = [np.arange(BM) / BM, 0, 1] + mr = [1, 0, 1 - np.arange(MR) / MR] + + num_bins = RY + YG + GC + CB + BM + MR + + color_wheel = np.zeros((3, num_bins), dtype=np.float32) + + col = 0 + for i, color in enumerate([ry, yg, gc, cb, bm, mr]): + for j in range(3): + color_wheel[j, col:col + bins[i]] = color[j] + col += bins[i] + + return color_wheel.T diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py new file mode 100644 index 000000000000..4b958738b9fd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .checkpoint import load_checkpoint + +__all__ = ['load_checkpoint'] \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py new file mode 100644 index 000000000000..f60be7d3675b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py @@ -0,0 +1,500 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo +from torch.nn import functional as F + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import load as load_file +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import is_module_wrapper +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import mkdir_or_exist +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def load_url_dist(url, model_dir=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + return checkpoint + + +def load_pavimodel_dist(model_path, map_location=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load( + downloaded_file, map_location=map_location) + return checkpoint + + +def load_fileclient_dist(filename, backend, map_location): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + allowed_backends = ['ceph'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + if rank == 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +def _load_checkpoint(filename, map_location=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict | OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_urls = get_torchvision_models() + model_name = filename[11:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('torchvision://'): + model_urls = get_torchvision_models() + model_name = filename[14:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('open-mmlab://'): + model_urls = get_external_models() + model_name = filename[13:] + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'open-mmlab://{model_name} is deprecated in favor ' + f'of open-mmlab://{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_url_dist(model_url) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + elif filename.startswith('mmcls://'): + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_url_dist(model_urls[model_name]) + checkpoint = _process_mmcls_checkpoint(checkpoint) + elif filename.startswith(('http://', 'https://')): + checkpoint = load_url_dist(filename) + elif filename.startswith('pavi://'): + model_path = filename[7:] + checkpoint = load_pavimodel_dist(model_path, map_location=map_location) + elif filename.startswith('s3://'): + checkpoint = load_fileclient_dist( + filename, backend='ceph', map_location=map_location) + else: + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +def load_checkpoint(model, + filename, + map_location='cpu', + strict=False, + logger=None): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # for MoBY, load model of online branch + if sorted(list(state_dict.keys()))[0].startswith('encoder'): + state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = model.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H*W: + logger.warning("Error in loading absolute_pos_embed, pass") + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) + + # interpolate position bias table if needed + relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + table_current = model.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + logger.warning(f"Error in loading {table_key}, pass") + else: + if L1 != L2: + S1 = int(L1 ** 0.5) + S2 = int(L2 ** 0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).view(1, nH1, S1, S1), + size=(S2, S2), mode='bicubic') + state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + try: + from pavi import modelcloud + from pavi.exception import NodeNotFoundError + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + mmcv.mkdir_or_exist(osp.dirname(filename)) + # immediately flush buffer + with open(filename, 'wb') as f: + torch.save(checkpoint, f) + f.flush() \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py new file mode 100644 index 000000000000..170724be38de --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py @@ -0,0 +1,9 @@ +from .inference import inference_segmentor, init_segmentor, show_result_pyplot +from .test import multi_gpu_test, single_gpu_test +from .train import get_root_logger, set_random_seed, train_segmentor + +__all__ = [ + 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', + 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', + 'show_result_pyplot' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py new file mode 100644 index 000000000000..9805c8e15886 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py @@ -0,0 +1,136 @@ +import matplotlib.pyplot as plt +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate, scatter +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets.pipelines import Compose +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import build_segmentor + + +def init_segmentor(config, checkpoint=None, device='cuda:0'): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, + img, + result, + palette=None, + fig_size=(15, 10), + opacity=0.5, + title='', + block=True): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result( + img, result, palette=palette, show=False, opacity=opacity) + # plt.figure(figsize=fig_size) + # plt.imshow(mmcv.bgr2rgb(img)) + # plt.title(title) + # plt.tight_layout() + # plt.show(block=block) + return mmcv.bgr2rgb(img) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py new file mode 100644 index 000000000000..5fb42ad7d00b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py @@ -0,0 +1,238 @@ +import os.path as osp +import pickle +import shutil +import tempfile + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +import torch +import torch.distributed as dist +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import tensor2imgs +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info + + +def np2tmp(array, temp_file_name=None): + """Save ndarray to local numpy file. + + Args: + array (ndarray): Ndarray to save. + temp_file_name (str): Numpy file name. If 'temp_file_name=None', this + function will generate a file name with tempfile.NamedTemporaryFile + to save ndarray. Default: None. + + Returns: + str: The numpy file name. + """ + + if temp_file_name is None: + temp_file_name = tempfile.NamedTemporaryFile( + suffix='.npy', delete=False).name + np.save(temp_file_name, array) + return temp_file_name + + +def single_gpu_test(model, + data_loader, + show=False, + out_dir=None, + efficient_test=False, + opacity=0.5): + """Test with single GPU. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + show (bool): Whether show results during inference. Default: False. + out_dir (str, optional): If specified, the results will be dumped into + the directory to save output results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + + if show or out_dir: + img_tensor = data['img'][0] + img_metas = data['img_metas'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) + assert len(imgs) == len(img_metas) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + + ori_h, ori_w = img_meta['ori_shape'][:-1] + img_show = mmcv.imresize(img_show, (ori_w, ori_h)) + + if out_dir: + out_file = osp.join(out_dir, img_meta['ori_filename']) + else: + out_file = None + + model.module.show_result( + img_show, + result, + palette=dataset.PALETTE, + show=show, + out_file=out_file, + opacity=opacity) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, + data_loader, + tmpdir=None, + gpu_collect=False, + efficient_test=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' + it encodes results to gpu tensors and use gpu communication for results + collection. On cpu mode it saves the results on different gpus to 'tmpdir' + and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + if rank == 0: + batch_size = data['img'][0].size(0) + for _ in range(batch_size * world_size): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results with CPU.""" + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + tmpdir = tempfile.mkdtemp() + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) + part_list.append(mmcv.load(part_file)) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results with GPU.""" + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_list.append( + pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py new file mode 100644 index 000000000000..36e6e10444de --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py @@ -0,0 +1,116 @@ +import random +import warnings + +import numpy as np +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import build_optimizer, build_runner + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import DistEvalHook, EvalHook +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets import build_dataloader, build_dataset +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + + +def set_random_seed(seed, deterministic=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def train_segmentor(model, + dataset, + cfg, + distributed=False, + validate=False, + timestamp=None, + meta=None): + """Launch segmentor training.""" + logger = get_root_logger(cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + # cfg.gpus will be ignored if distributed + len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed, + drop_last=True) for ds in dataset + ] + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: + model = MMDataParallel( + model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + if cfg.get('runner') is None: + cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} + warnings.warn( + 'config is now expected to have a `runner` section, ' + 'please set `runner` in your config.', UserWarning) + + runner = build_runner( + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta)) + + # register hooks + runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + + # an ugly walkaround to make the .log and .log.json filenames the same + runner.timestamp = timestamp + + # register eval hooks + if validate: + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + val_dataloader = build_dataloader( + val_dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + eval_cfg = cfg.get('evaluation', {}) + eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' + eval_hook = DistEvalHook if distributed else EvalHook + runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py new file mode 100644 index 000000000000..965605587211 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py @@ -0,0 +1,3 @@ +from .evaluation import * # noqa: F401, F403 +from .seg import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py new file mode 100644 index 000000000000..f7cc4b23413a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py @@ -0,0 +1,8 @@ +from .class_names import get_classes, get_palette +from .eval_hooks import DistEvalHook, EvalHook +from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou + +__all__ = [ + 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', + 'eval_metrics', 'get_classes', 'get_palette' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py new file mode 100644 index 000000000000..f91355141f28 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py @@ -0,0 +1,152 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag' + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', + 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor' + ] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py new file mode 100644 index 000000000000..b6493f20505e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py @@ -0,0 +1,109 @@ +import os.path as osp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import DistEvalHook as _DistEvalHook +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import EvalHook as _EvalHook + + +class EvalHook(_EvalHook): + """Single GPU EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test( + runner.model, + self.dataloader, + show=False, + efficient_test=self.efficient_test) + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test(runner.model, self.dataloader, show=False) + self.evaluate(runner, results) + + +class DistEvalHook(_DistEvalHook): + """Distributed EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect, + efficient_test=self.efficient_test) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py new file mode 100644 index 000000000000..9f4ba8a2b4ec --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py @@ -0,0 +1,326 @@ +from collections import OrderedDict + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +import torch + + +def f_score(precision, recall, beta=1): + """calcuate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta**2) * (precision * recall) / ( + (beta**2 * precision) + recall) + return score + + +def intersect_and_union(pred_label, + label, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray | str): Ground truth segmentation map + or label filename. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + if isinstance(label, str): + label = torch.from_numpy( + mmcv.imread(label, flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + +def total_intersect_and_union(results, + gt_seg_maps, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + ndarray: The intersection of prediction and ground truth histogram + on all classes. + ndarray: The union of prediction and ground truth histogram on all + classes. + ndarray: The prediction histogram on all classes. + ndarray: The ground truth histogram on all classes. + """ + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) + for i in range(num_imgs): + area_intersect, area_union, area_pred_label, area_label = \ + intersect_and_union( + results[i], gt_seg_maps[i], num_classes, ignore_index, + label_map, reduce_zero_label) + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + return total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label + + +def mean_iou(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category IoU, shape (num_classes, ). + """ + iou_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mIoU'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return iou_result + + +def mean_dice(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Dice (mDice) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category dice, shape (num_classes, ). + """ + + dice_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mDice'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return dice_result + + +def mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category recall, shape (num_classes, ). + ndarray: Per category precision, shape (num_classes, ). + ndarray: Per category f-score, shape (num_classes, ). + """ + fscore_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mFscore'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + beta=beta) + return fscore_result + + +def eval_metrics(results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError('metrics {} is not supported'.format(metrics)) + + total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, + reduce_zero_label) + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / ( + total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor( + [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = { + metric: value.numpy() + for metric, value in ret_metrics.items() + } + if nan_to_num is not None: + ret_metrics = OrderedDict({ + metric: np.nan_to_num(metric_value, nan=nan_to_num) + for metric, metric_value in ret_metrics.items() + }) + return ret_metrics diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py new file mode 100644 index 000000000000..93bc129b685e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_pixel_sampler +from .sampler import BasePixelSampler, OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py new file mode 100644 index 000000000000..908e885cb71d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py @@ -0,0 +1,8 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg + +PIXEL_SAMPLERS = Registry('pixel sampler') + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py new file mode 100644 index 000000000000..332b242c03d1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py @@ -0,0 +1,4 @@ +from .base_pixel_sampler import BasePixelSampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py new file mode 100644 index 000000000000..b75b1566c9f1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py @@ -0,0 +1,12 @@ +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py new file mode 100644 index 000000000000..88bb10d44026 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F + +from ..builder import PIXEL_SAMPLERS +from .base_pixel_sampler import BasePixelSampler + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super(OHEMPixelSampler, self).__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, + sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + else: + losses = self.context.loss_decode( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none') + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1. + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py new file mode 100644 index 000000000000..f2678b321c29 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .misc import add_prefix + +__all__ = ['add_prefix'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py new file mode 100644 index 000000000000..eb862a82bd47 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py @@ -0,0 +1,17 @@ +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py new file mode 100644 index 000000000000..ebeaef4a28ef --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py @@ -0,0 +1,19 @@ +from .ade import ADE20KDataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .custom import CustomDataset +from .dataset_wrappers import ConcatDataset, RepeatDataset +from .drive import DRIVEDataset +from .hrf import HRFDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .stare import STAREDataset +from .voc import PascalVOCDataset + +__all__ = [ + 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', + 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', + 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', + 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', + 'STAREDataset' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py new file mode 100644 index 000000000000..5913e43775ed --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py @@ -0,0 +1,84 @@ +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ADE20KDataset(CustomDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + CLASSES = ( + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + def __init__(self, **kwargs): + super(ADE20KDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py new file mode 100644 index 000000000000..371b9903be64 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py @@ -0,0 +1,169 @@ +import copy +import platform +import random +from functools import partial + +import numpy as np +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader +from torch.utils.data import DistributedSampler + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + hard_limit = rlimit[1] + soft_limit = min(4096, hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + +DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') + + +def _concat_dataset(cfg, default_args=None): + """Build :obj:`ConcatDataset by.""" + from .dataset_wrappers import ConcatDataset + img_dir = cfg['img_dir'] + ann_dir = cfg.get('ann_dir', None) + split = cfg.get('split', None) + num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 + if ann_dir is not None: + num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 + else: + num_ann_dir = 0 + if split is not None: + num_split = len(split) if isinstance(split, (list, tuple)) else 1 + else: + num_split = 0 + if num_img_dir > 1: + assert num_img_dir == num_ann_dir or num_ann_dir == 0 + assert num_img_dir == num_split or num_split == 0 + else: + assert num_split == num_ann_dir or num_ann_dir <= 1 + num_dset = max(num_split, num_img_dir) + + datasets = [] + for i in range(num_dset): + data_cfg = copy.deepcopy(cfg) + if isinstance(img_dir, (list, tuple)): + data_cfg['img_dir'] = img_dir[i] + if isinstance(ann_dir, (list, tuple)): + data_cfg['ann_dir'] = ann_dir[i] + if isinstance(split, (list, tuple)): + data_cfg['split'] = split[i] + datasets.append(build_dataset(data_cfg, default_args)) + + return ConcatDataset(datasets) + + +def build_dataset(cfg, default_args=None): + """Build datasets.""" + from .dataset_wrappers import ConcatDataset, RepeatDataset + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset( + build_dataset(cfg['dataset'], default_args), cfg['times']) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance( + cfg.get('split', None), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset + + +def build_dataloader(dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + dataloader_type='PoolDataLoader', + **kwargs): + """Build PyTorch DataLoader. + + In distributed training, each GPU/process has a dataloader. + In non-distributed training, there is only one dataloader for all GPUs. + + Args: + dataset (Dataset): A PyTorch dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e., + batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data loading + for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed training. + dist (bool): Distributed training/test or not. Default: True. + shuffle (bool): Whether to shuffle the data at every epoch. + Default: True. + seed (int | None): Seed to be used. Default: None. + drop_last (bool): Whether to drop the last incomplete batch in epoch. + Default: False + pin_memory (bool): Whether to use pin_memory in DataLoader. + Default: True + dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' + kwargs: any keyword argument to be used to initialize DataLoader + + Returns: + DataLoader: A PyTorch dataloader. + """ + rank, world_size = get_dist_info() + if dist: + sampler = DistributedSampler( + dataset, world_size, rank, shuffle=shuffle) + shuffle = False + batch_size = samples_per_gpu + num_workers = workers_per_gpu + else: + sampler = None + batch_size = num_gpus * samples_per_gpu + num_workers = num_gpus * workers_per_gpu + + init_fn = partial( + worker_init_fn, num_workers=num_workers, rank=rank, + seed=seed) if seed is not None else None + + assert dataloader_type in ( + 'DataLoader', + 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' + + if dataloader_type == 'PoolDataLoader': + dataloader = PoolDataLoader + elif dataloader_type == 'DataLoader': + dataloader = DataLoader + + data_loader = dataloader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + **kwargs) + + return data_loader + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + np.random.seed(worker_seed) + random.seed(worker_seed) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py new file mode 100644 index 000000000000..8bc29bea1470 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(CustomDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(ChaseDB1Dataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_1stHO.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py new file mode 100644 index 000000000000..ca7cd01c9fb5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py @@ -0,0 +1,217 @@ +import os.path as osp +import tempfile + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class CityscapesDataset(CustomDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + + CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle') + + PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], + [0, 80, 100], [0, 0, 230], [119, 11, 32]] + + def __init__(self, **kwargs): + super(CityscapesDataset, self).__init__( + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtFine_labelTrainIds.png', + **kwargs) + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + import cityscapesscripts.helpers.labels as CSLabels + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy + + def results2img(self, results, imgfile_prefix, to_label_id): + """Write the segmentation results to images. + + Args: + results (list[list | tuple | ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + prog_bar = mmcv.ProgressBar(len(self)) + for idx in range(len(self)): + result = results[idx] + if to_label_id: + result = self._convert_to_label_id(result) + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + output = Image.fromarray(result.astype(np.uint8)).convert('P') + import cityscapesscripts.helpers.labels as CSLabels + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) + for label_id, label in CSLabels.id2label.items(): + palette[label_id] = label.color + + output.putpalette(palette) + output.save(png_filename) + result_files.append(png_filename) + prog_bar.update() + + return result_files + + def format_results(self, results, imgfile_prefix=None, to_label_id=True): + """Format the results into dir (standard format for Cityscapes + evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str | None): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". If not specified, a temp file will be created. + Default: None. + to_label_id (bool): whether convert output to label_id for + submission. Default: False + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + + assert isinstance(results, list), 'results must be a list' + assert len(results) == len(self), ( + 'The length of results is not equal to the dataset len: ' + f'{len(results)} != {len(self)}') + + if imgfile_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + imgfile_prefix = tmp_dir.name + else: + tmp_dir = None + result_files = self.results2img(results, imgfile_prefix, to_label_id) + + return result_files, tmp_dir + + def evaluate(self, + results, + metric='mIoU', + logger=None, + imgfile_prefix=None, + efficient_test=False): + """Evaluation in Cityscapes/default protocol. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file, + for cityscapes evaluation only. It includes the file path and + the prefix of filename, e.g., "a/b/prefix". + If results are evaluated with cityscapes protocol, it would be + the prefix of output png files. The output files would be + png images under folder "a/b/prefix/xxx.png", where "xxx" is + the image name of cityscapes. If not specified, a temp file + will be created for evaluation. + Default: None. + + Returns: + dict[str, float]: Cityscapes/default metrics. + """ + + eval_results = dict() + metrics = metric.copy() if isinstance(metric, list) else [metric] + if 'cityscapes' in metrics: + eval_results.update( + self._evaluate_cityscapes(results, logger, imgfile_prefix)) + metrics.remove('cityscapes') + if len(metrics) > 0: + eval_results.update( + super(CityscapesDataset, + self).evaluate(results, metrics, logger, efficient_test)) + + return eval_results + + def _evaluate_cityscapes(self, results, logger, imgfile_prefix): + """Evaluation in Cityscapes protocol. + + Args: + results (list): Testing results of the dataset. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + try: + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + except ImportError: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + result_files, tmp_dir = self.format_results(results, imgfile_prefix) + + if tmp_dir is None: + result_dir = imgfile_prefix + else: + result_dir = tmp_dir.name + + eval_results = dict() + print_log(f'Evaluating results under {result_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + seg_map_list = [] + pred_list = [] + + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + for seg_map in mmcv.scandir( + self.ann_dir, 'gtFine_labelIds.png', recursive=True): + seg_map_list.append(osp.join(self.ann_dir, seg_map)) + pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + + eval_results.update( + CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + + if tmp_dir is not None: + tmp_dir.cleanup() + + return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py new file mode 100644 index 000000000000..9d414a6fd43f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py @@ -0,0 +1,400 @@ +import os +import os.path as osp +from collections import OrderedDict +from functools import reduce + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log +from prettytable import PrettyTable +from torch.utils.data import Dataset + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import eval_metrics +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from .builder import DATASETS +from .pipelines import Compose + + +@DATASETS.register_module() +class CustomDataset(Dataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of CustomDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/tutorials/new_dataset.md`` for more details. + + + Args: + pipeline (list[dict]): Processing pipeline + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. Default: '.jpg' + ann_dir (str, optional): Path to annotation directory. Default: None + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + split (str, optional): Split txt file. If split is specified, only + file with suffix in the splits will be loaded. Otherwise, all + images in img_dir/ann_dir will be loaded. Default: None + data_root (str, optional): Data root for img_dir/ann_dir. Default: + None. + test_mode (bool): If test_mode=True, gt wouldn't be loaded. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default: False + classes (str | Sequence[str], optional): Specify classes to load. + If is None, ``cls.CLASSES`` will be used. Default: None. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, and + self.PALETTE is None, random palette will be generated. + Default: None + """ + + CLASSES = None + + PALETTE = None + + def __init__(self, + pipeline, + img_dir, + img_suffix='.jpg', + ann_dir=None, + seg_map_suffix='.png', + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False, + classes=None, + palette=None): + self.pipeline = Compose(pipeline) + self.img_dir = img_dir + self.img_suffix = img_suffix + self.ann_dir = ann_dir + self.seg_map_suffix = seg_map_suffix + self.split = split + self.data_root = data_root + self.test_mode = test_mode + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.label_map = None + self.CLASSES, self.PALETTE = self.get_classes_and_palette( + classes, palette) + + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.img_dir): + self.img_dir = osp.join(self.data_root, self.img_dir) + if not (self.ann_dir is None or osp.isabs(self.ann_dir)): + self.ann_dir = osp.join(self.data_root, self.ann_dir) + if not (self.split is None or osp.isabs(self.split)): + self.split = osp.join(self.data_root, self.split) + + # load annotations + self.img_infos = self.load_annotations(self.img_dir, self.img_suffix, + self.ann_dir, + self.seg_map_suffix, self.split) + + def __len__(self): + """Total number of samples of data.""" + return len(self.img_infos) + + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, + split): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos + + def get_ann_info(self, idx): + """Get annotation by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + return self.img_infos[idx]['ann'] + + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + results['seg_fields'] = [] + results['img_prefix'] = self.img_dir + results['seg_prefix'] = self.ann_dir + if self.custom_classes: + results['label_map'] = self.label_map + + def __getitem__(self, idx): + """Get training/test data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training/test data (with annotation if `test_mode` is set + False). + """ + + if self.test_mode: + return self.prepare_test_img(idx) + else: + return self.prepare_train_img(idx) + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys + introduced by pipeline. + """ + + img_info = self.img_infos[idx] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def prepare_test_img(self, idx): + """Get testing data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Testing data after pipeline with new keys introduced by + pipeline. + """ + + img_info = self.img_infos[idx] + results = dict(img_info=img_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def format_results(self, results, **kwargs): + """Place holder to format result to dataset specific output.""" + + def get_gt_seg_maps(self, efficient_test=False): + """Get ground truth segmentation maps for evaluation.""" + gt_seg_maps = [] + for img_info in self.img_infos: + seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map']) + if efficient_test: + gt_seg_map = seg_map + else: + gt_seg_map = mmcv.imread( + seg_map, flag='unchanged', backend='pillow') + gt_seg_maps.append(gt_seg_map) + return gt_seg_maps + + def get_classes_and_palette(self, classes=None, palette=None): + """Get class names of current dataset. + + Args: + classes (Sequence[str] | str | None): If classes is None, use + default CLASSES defined by builtin dataset. If classes is a + string, take it as a file name. The file contains the name of + classes where each line contains one class name. If classes is + a tuple or list, override the CLASSES defined by the dataset. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, random + palette will be generated. Default: None + """ + if classes is None: + self.custom_classes = False + return self.CLASSES, self.PALETTE + + self.custom_classes = True + if isinstance(classes, str): + # take it as a file path + class_names = mmcv.list_from_file(classes) + elif isinstance(classes, (tuple, list)): + class_names = classes + else: + raise ValueError(f'Unsupported type {type(classes)} of classes.') + + if self.CLASSES: + if not set(classes).issubset(self.CLASSES): + raise ValueError('classes is not a subset of CLASSES.') + + # dictionary, its keys are the old label ids and its values + # are the new label ids. + # used for changing pixel labels in load_annotations. + self.label_map = {} + for i, c in enumerate(self.CLASSES): + if c not in class_names: + self.label_map[i] = -1 + else: + self.label_map[i] = classes.index(c) + + palette = self.get_palette_for_custom_classes(class_names, palette) + + return class_names, palette + + def get_palette_for_custom_classes(self, class_names, palette=None): + + if self.label_map is not None: + # return subset of palette + palette = [] + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != -1: + palette.append(self.PALETTE[old_id]) + palette = type(self.PALETTE)(palette) + + elif palette is None: + if self.PALETTE is None: + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + else: + palette = self.PALETTE + + return palette + + def evaluate(self, + results, + metric='mIoU', + logger=None, + efficient_test=False, + **kwargs): + """Evaluate the dataset. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. 'mIoU', + 'mDice' and 'mFscore' are supported. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + + Returns: + dict[str, float]: Default metrics. + """ + + if isinstance(metric, str): + metric = [metric] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metric).issubset(set(allowed_metrics)): + raise KeyError('metric {} is not supported'.format(metric)) + eval_results = {} + gt_seg_maps = self.get_gt_seg_maps(efficient_test) + if self.CLASSES is None: + num_classes = len( + reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) + else: + num_classes = len(self.CLASSES) + ret_metrics = eval_metrics( + results, + gt_seg_maps, + num_classes, + self.ignore_index, + metric, + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label) + + if self.CLASSES is None: + class_names = tuple(range(num_classes)) + else: + class_names = self.CLASSES + + # summary table + ret_metrics_summary = OrderedDict({ + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict({ + ret_metric: np.round(ret_metric_value * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + + # for logger + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + summary_table_data = PrettyTable() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + summary_table_data.add_column(key, [val]) + else: + summary_table_data.add_column('m' + key, [val]) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + print_log('Summary:', logger) + print_log('\n' + summary_table_data.get_string(), logger=logger) + + # each metric dict + for key, value in ret_metrics_summary.items(): + if key == 'aAcc': + eval_results[key] = value / 100.0 + else: + eval_results['m' + key] = value / 100.0 + + ret_metrics_class.pop('Class', None) + for key, value in ret_metrics_class.items(): + eval_results.update({ + key + '.' + str(name): value[idx] / 100.0 + for idx, name in enumerate(class_names) + }) + + if mmcv.is_list_of(results, str): + for file_name in results: + os.remove(file_name) + return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 000000000000..d6a5e957ec3b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,50 @@ +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + +from .builder import DATASETS + + +@DATASETS.register_module() +class ConcatDataset(_ConcatDataset): + """A wrapper of concatenated dataset. + + Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but + concat the group flag for image aspect ratio. + + Args: + datasets (list[:obj:`Dataset`]): A list of datasets. + """ + + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + self.CLASSES = datasets[0].CLASSES + self.PALETTE = datasets[0].PALETTE + + +@DATASETS.register_module() +class RepeatDataset(object): + """A wrapper of repeated dataset. + + The length of repeated dataset will be `times` larger than the original + dataset. This is useful when the data loading time is long but the dataset + is small. Using RepeatDataset can reduce the data loading time between + epochs. + + Args: + dataset (:obj:`Dataset`): The dataset to be repeated. + times (int): Repeat times. + """ + + def __init__(self, dataset, times): + self.dataset = dataset + self.times = times + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self._ori_len = len(self.dataset) + + def __getitem__(self, idx): + """Get item from original dataset.""" + return self.dataset[idx % self._ori_len] + + def __len__(self): + """The length is multiplied by ``times``""" + return self.times * self._ori_len diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py new file mode 100644 index 000000000000..3cbfda8ae74b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class DRIVEDataset(CustomDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(DRIVEDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_manual1.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py new file mode 100644 index 000000000000..923203b51377 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class HRFDataset(CustomDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(HRFDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py new file mode 100644 index 000000000000..541a63c66a13 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py @@ -0,0 +1,103 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('background', 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', + 'bicycle', 'bird', 'boat', 'book', 'bottle', 'building', 'bus', + 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', + 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', + 'floor', 'flower', 'food', 'grass', 'ground', 'horse', + 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', 'person', + 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', + 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', + 'track', 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', + 'window', 'wood') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None + + +@DATASETS.register_module() +class PascalContextDataset59(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', + 'bird', 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', + 'car', 'cat', 'ceiling', 'chair', 'cloth', 'computer', 'cow', + 'cup', 'curtain', 'dog', 'door', 'fence', 'floor', 'flower', + 'food', 'grass', 'ground', 'horse', 'keyboard', 'light', + 'motorbike', 'mountain', 'mouse', 'person', 'plate', 'platform', + 'pottedplant', 'road', 'rock', 'sheep', 'shelves', 'sidewalk', + 'sign', 'sky', 'snow', 'sofa', 'table', 'track', 'train', + 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', 'wood') + + PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], + [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], + [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], + [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], + [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], + [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], + [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], + [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], + [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], + [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], + [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], + [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], + [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], + [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset59, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=True, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py new file mode 100644 index 000000000000..8b9046b07bb4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py @@ -0,0 +1,16 @@ +from .compose import Compose +from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, + Transpose, to_tensor) +from .loading import LoadAnnotations, LoadImageFromFile +from .test_time_aug import MultiScaleFlipAug +from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, + PhotoMetricDistortion, RandomCrop, RandomFlip, + RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) + +__all__ = [ + 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', + 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', + 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', + 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', + 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py new file mode 100644 index 000000000000..c3b11a9870a5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py @@ -0,0 +1,51 @@ +import collections + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Compose(object): + """Compose multiple transforms sequentially. + + Args: + transforms (Sequence[dict | callable]): Sequence of transform object or + config dict to be composed. + """ + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + """Call function to apply transforms sequentially. + + Args: + data (dict): A result dict contains the data to transform. + + Returns: + dict: Transformed data. + """ + + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py new file mode 100644 index 000000000000..f74b359efe10 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py @@ -0,0 +1,288 @@ +from collections.abc import Sequence + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + + Args: + data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to + be converted. + """ + + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor(object): + """Convert some results to :obj:`torch.Tensor` by given keys. + + Args: + keys (Sequence[str]): Keys that need to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert data in results to :obj:`torch.Tensor`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted + to :obj:`torch.Tensor`. + """ + + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class ImageToTensor(object): + """Convert image to :obj:`torch.Tensor` by given keys. + + The dimension order of input image is (H, W, C). The pipeline will convert + it to (C, H, W). If only 2 dimension (H, W) is given, the output would be + (1, H, W). + + Args: + keys (Sequence[str]): Key of images to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + img = results[key] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + results[key] = to_tensor(img.transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose(object): + """Transpose some results by given keys. + + Args: + keys (Sequence[str]): Keys of results to be transposed. + order (Sequence[int]): Order of transpose. + """ + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, order={self.order})' + + +@PIPELINES.register_module() +class ToDataContainer(object): + """Convert results to :obj:`mmcv.DataContainer` by given fields. + + Args: + fields (Sequence[dict]): Each field is a dict like + ``dict(key='xxx', **kwargs)``. The ``key`` in result will + be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. + Default: ``(dict(key='img', stack=True), + dict(key='gt_semantic_seg'))``. + """ + + def __init__(self, + fields=(dict(key='img', + stack=True), dict(key='gt_semantic_seg'))): + self.fields = fields + + def __call__(self, results): + """Call function to convert data in results to + :obj:`mmcv.DataContainer`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted to + :obj:`mmcv.DataContainer`. + """ + + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img" + and "gt_semantic_seg". These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + """Call function to transform and format common fields in results. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data that is formatted with + default bundle. + """ + + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + if 'gt_semantic_seg' in results: + # convert to long + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, + ...].astype(np.int64)), + stack=True) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module() +class Collect(object): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "gt_semantic_seg". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg')`` + """ + + def __init__(self, + keys, + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg')): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + """Call function to collect keys in results. The keys in ``meta_keys`` + will be converted to :obj:mmcv.DataContainer. + + Args: + results (dict): Result dict contains the data to collect. + + Returns: + dict: The result dict contains the following keys + - keys in``self.keys`` + - ``img_metas`` + """ + + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_metas'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py new file mode 100644 index 000000000000..da7f347e66d2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py @@ -0,0 +1,153 @@ +import os.path as osp + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class LoadImageFromFile(object): + """Load an image from file. + + Required keys are "img_prefix" and "img_info" (a dict that must contain the + key "filename"). Added or updated keys are "filename", "img", "img_shape", + "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), + "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + color_type (str): The flag argument for :func:`mmcv.imfrombytes`. + Defaults to 'color'. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'cv2' + """ + + def __init__(self, + to_float32=False, + color_type='color', + file_client_args=dict(backend='disk'), + imdecode_backend='cv2'): + self.to_float32 = to_float32 + self.color_type = color_type + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call functions to load image and get image meta information. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('img_prefix') is not None: + filename = osp.join(results['img_prefix'], + results['img_info']['filename']) + else: + filename = results['img_info']['filename'] + img_bytes = self.file_client.get(filename) + img = mmcv.imfrombytes( + img_bytes, flag=self.color_type, backend=self.imdecode_backend) + if self.to_float32: + img = img.astype(np.float32) + + results['filename'] = filename + results['ori_filename'] = results['img_info']['filename'] + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + num_channels = 1 if len(img.shape) < 3 else img.shape[2] + results['img_norm_cfg'] = dict( + mean=np.zeros(num_channels, dtype=np.float32), + std=np.ones(num_channels, dtype=np.float32), + to_rgb=False) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(to_float32={self.to_float32},' + repr_str += f"color_type='{self.color_type}'," + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str + + +@PIPELINES.register_module() +class LoadAnnotations(object): + """Load annotations for semantic segmentation. + + Args: + reduce_zero_label (bool): Whether reduce all label value by 1. + Usually used for datasets where 0 is background label. + Default: False. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'pillow' + """ + + def __init__(self, + reduce_zero_label=False, + file_client_args=dict(backend='disk'), + imdecode_backend='pillow'): + self.reduce_zero_label = reduce_zero_label + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call function to load multiple types annotations. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('seg_prefix', None) is not None: + filename = osp.join(results['seg_prefix'], + results['ann_info']['seg_map']) + else: + filename = results['ann_info']['seg_map'] + img_bytes = self.file_client.get(filename) + gt_semantic_seg = mmcv.imfrombytes( + img_bytes, flag='unchanged', + backend=self.imdecode_backend).squeeze().astype(np.uint8) + # modify if custom classes + if results.get('label_map', None) is not None: + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg == old_id] = new_id + # reduce zero_label + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + results['gt_semantic_seg'] = gt_semantic_seg + results['seg_fields'].append('gt_semantic_seg') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label},' + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py new file mode 100644 index 000000000000..21f86894ea76 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py @@ -0,0 +1,133 @@ +import warnings + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from ..builder import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module() +class MultiScaleFlipAug(object): + """Test-time augmentation with multiple scales and flipping. + + An example configuration is as followed: + + .. code-block:: + + img_scale=(2048, 1024), + img_ratios=[0.5, 1.0], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ] + + After MultiScaleFLipAug with above configuration, the results are wrapped + into lists of the same length as followed: + + .. code-block:: + + dict( + img=[...], + img_shape=[...], + scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] + flip=[False, True, False, True] + ... + ) + + Args: + transforms (list[dict]): Transforms to apply in each augmentation. + img_scale (None | tuple | list[tuple]): Images scales for resizing. + img_ratios (float | list[float]): Image ratios for resizing + flip (bool): Whether apply flip augmentation. Default: False. + flip_direction (str | list[str]): Flip augmentation directions, + options are "horizontal" and "vertical". If flip_direction is list, + multiple flip augmentations will be applied. + It has no effect when flip == False. Default: "horizontal". + """ + + def __init__(self, + transforms, + img_scale, + img_ratios=None, + flip=False, + flip_direction='horizontal'): + self.transforms = Compose(transforms) + if img_ratios is not None: + img_ratios = img_ratios if isinstance(img_ratios, + list) else [img_ratios] + assert mmcv.is_list_of(img_ratios, float) + if img_scale is None: + # mode 1: given img_scale=None and a range of image ratio + self.img_scale = None + assert mmcv.is_list_of(img_ratios, float) + elif isinstance(img_scale, tuple) and mmcv.is_list_of( + img_ratios, float): + assert len(img_scale) == 2 + # mode 2: given a scale and a range of image ratio + self.img_scale = [(int(img_scale[0] * ratio), + int(img_scale[1] * ratio)) + for ratio in img_ratios] + else: + # mode 3: given multiple scales + self.img_scale = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None + self.flip = flip + self.img_ratios = img_ratios + self.flip_direction = flip_direction if isinstance( + flip_direction, list) else [flip_direction] + assert mmcv.is_list_of(self.flip_direction, str) + if not self.flip and self.flip_direction != ['horizontal']: + warnings.warn( + 'flip_direction has no effect when flip is set to False') + if (self.flip + and not any([t['type'] == 'RandomFlip' for t in transforms])): + warnings.warn( + 'flip has no effect when RandomFlip is not in transforms') + + def __call__(self, results): + """Call function to apply test time augment transforms on results. + + Args: + results (dict): Result dict contains the data to transform. + + Returns: + dict[str: list]: The augmented data, where each value is wrapped + into a list. + """ + + aug_data = [] + if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): + h, w = results['img'].shape[:2] + img_scale = [(int(w * ratio), int(h * ratio)) + for ratio in self.img_ratios] + else: + img_scale = self.img_scale + flip_aug = [False, True] if self.flip else [False] + for scale in img_scale: + for flip in flip_aug: + for direction in self.flip_direction: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + _results['flip_direction'] = direction + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(transforms={self.transforms}, ' + repr_str += f'img_scale={self.img_scale}, flip={self.flip})' + repr_str += f'flip_direction={self.flip_direction}' + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py new file mode 100644 index 000000000000..ab97de3dfa34 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py @@ -0,0 +1,889 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning, is_tuple_of +from numpy import random + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Resize(object): + """Resize images & seg. + + This transform resizes the input image to some scale. If the input dict + contains the key "scale", then the scale in the input dict is used, + otherwise the specified scale in the init method is used. + + ``img_scale`` can be None, a tuple (single-scale) or a list of tuple + (multi-scale). There are 4 multiscale modes: + + - ``ratio_range is not None``: + 1. When img_scale is None, img_scale is the shape of image in results + (img_scale = results['img'].shape[:2]) and the image is resized based + on the original size. (mode 1) + 2. When img_scale is a tuple (single-scale), randomly sample a ratio from + the ratio range and multiply it with the image scale. (mode 2) + + - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a + scale from the a range. (mode 3) + + - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a + scale from multiple scales. (mode 4) + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + multiscale_mode (str): Either "range" or "value". + ratio_range (tuple[float]): (min_ratio, max_ratio) + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + + Args: + img_scales (list[tuple]): Images scales for selection. + + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and upper bound of image scales. + + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + + Args: + results (dict): Result dict from :obj:`dataset`. + + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), + self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[key], results['scale'], interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results[key], results['scale'], interpolation='nearest') + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})') + return repr_str + + +@PIPELINES.register_module() +class RandomFlip(object): + """Flip the image & seg. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + prob (float, optional): The flipping probability. Default: None. + direction(str, optional): The flipping direction. Options are + 'horizontal' and 'vertical'. Default: 'horizontal'. + """ + + @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') + def __init__(self, prob=None, direction='horizontal'): + self.prob = prob + self.direction = direction + if prob is not None: + assert prob >= 0 and prob <= 1 + assert direction in ['horizontal', 'vertical'] + + def __call__(self, results): + """Call function to flip bounding boxes, masks, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Flipped results, 'flip', 'flip_direction' keys are added into + result dict. + """ + + if 'flip' not in results: + flip = True if np.random.rand() < self.prob else False + results['flip'] = flip + if 'flip_direction' not in results: + results['flip_direction'] = self.direction + if results['flip']: + # flip image + results['img'] = mmcv.imflip( + results['img'], direction=results['flip_direction']) + + # flip segs + for key in results.get('seg_fields', []): + # use copy() to make numpy stride positive + results[key] = mmcv.imflip( + results[key], direction=results['flip_direction']).copy() + return results + + def __repr__(self): + return self.__class__.__name__ + f'(prob={self.prob})' + + +@PIPELINES.register_module() +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + """ + + def __init__(self, + size=None, + size_divisor=None, + pad_val=0, + seg_pad_val=255): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + """Pad images according to ``self.size``.""" + if self.size is not None: + padded_img = mmcv.impad( + results['img'], shape=self.size, pad_val=self.pad_val) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple( + results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_seg(self, results): + """Pad masks according to ``results['pad_shape']``.""" + for key in results.get('seg_fields', []): + results[key] = mmcv.impad( + results[key], + shape=results['pad_shape'][:2], + pad_val=self.seg_pad_val) + + def __call__(self, results): + """Call function to pad images, masks, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + + self._pad_img(results) + self._pad_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \ + f'pad_val={self.pad_val})' + return repr_str + + +@PIPELINES.register_module() +class Normalize(object): + """Normalize the image. + + Added key is "img_norm_cfg". + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + """Call function to normalize images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Normalized results, 'img_norm_cfg' key is added into + result dict. + """ + + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, + self.to_rgb) + results['img_norm_cfg'] = dict( + mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \ + f'{self.to_rgb})' + return repr_str + + +@PIPELINES.register_module() +class Rerange(object): + """Rerange the image pixel value. + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def __call__(self, results): + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@PIPELINES.register_module() +class CLAHE(object): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def __call__(self, results): + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), + self.clip_limit, self.tile_grid_size) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, '\ + f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@PIPELINES.register_module() +class RandomCrop(object): + """Random crop the image & seg. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + cat_max_ratio (float): The maximum ratio that single category could + occupy. + """ + + def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255): + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + def get_crop_bbox(self, img): + """Randomly get a crop bounding box.""" + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + def crop(self, img, crop_bbox): + """Crop from ``img``""" + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def __call__(self, results): + """Call function to randomly crop images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.get_crop_bbox(img) + if self.cat_max_ratio < 1.: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum( + cnt) < self.cat_max_ratio: + break + crop_bbox = self.get_crop_bbox(img) + + # crop the image + img = self.crop(img, crop_bbox) + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@PIPELINES.register_module() +class RandomRotate(object): + """Rotate the image & seg. + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, + prob, + degree, + pad_val=0, + seg_pad_val=255, + center=None, + auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + def __call__(self, results): + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate = True if np.random.rand() < self.prob else False + degree = np.random.uniform(min(*self.degree), max(*self.degree)) + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], + angle=degree, + border_value=self.pal_val, + center=self.center, + auto_bound=self.auto_bound) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' \ + f'degree={self.degree}, ' \ + f'pad_val={self.pal_val}, ' \ + f'seg_pad_val={self.seg_pad_val}, ' \ + f'center={self.center}, ' \ + f'auto_bound={self.auto_bound})' + return repr_str + + +@PIPELINES.register_module() +class RGB2Gray(object): + """Convert RGB image to grayscale image. + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def __call__(self, results): + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' \ + f'weights={self.weights})' + return repr_str + + +@PIPELINES.register_module() +class AdjustGamma(object): + """Using gamma correction to process the image. + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0)**inv_gamma * 255 + for i in np.arange(256)]).astype('uint8') + + def __call__(self, results): + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform( + np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@PIPELINES.register_module() +class SegRescale(object): + """Rescale semantic segmentation maps. + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale( + results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@PIPELINES.register_module() +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, img, alpha=1, beta=0): + """Multiple with alpha and add beat with clip.""" + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img): + """Brightness distortion.""" + if random.randint(2): + return self.convert( + img, + beta=random.uniform(-self.brightness_delta, + self.brightness_delta)) + return img + + def contrast(self, img): + """Contrast distortion.""" + if random.randint(2): + return self.convert( + img, + alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img): + """Saturation distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], + alpha=random.uniform(self.saturation_lower, + self.saturation_upper)) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img): + """Hue distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, + 0] = (img[:, :, 0].astype(int) + + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def __call__(self, results): + """Call function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})') + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py new file mode 100644 index 000000000000..cbd14e0920e7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class STAREDataset(CustomDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(STAREDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.ah.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py new file mode 100644 index 000000000000..a8855203b14e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py @@ -0,0 +1,29 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalVOCDataset(CustomDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', + 'train', 'tvmonitor') + + PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + def __init__(self, split, **kwargs): + super(PascalVOCDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py new file mode 100644 index 000000000000..3cf93f8bec9c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py @@ -0,0 +1,12 @@ +from .backbones import * # noqa: F401,F403 +from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, + build_head, build_loss, build_segmentor) +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', + 'build_head', 'build_loss', 'build_segmentor' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py new file mode 100644 index 000000000000..8339983905fb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py @@ -0,0 +1,17 @@ +from .cgnet import CGNet +# from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .unet import UNet +from .vit import VisionTransformer +from .uniformer import UniFormer + +__all__ = [ + 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', + 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', + 'VisionTransformer', 'UniFormer' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py new file mode 100644 index 000000000000..a672d3156aeb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py @@ -0,0 +1,367 @@ +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, + constant_init, kaiming_init) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super(GlobalContextExtractor, self).__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False): + super(ContextGuidedBlock, self).__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, + channels, + kernel_size, + stride, + padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.f_loc = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=1, + groups=channels, + bias=False) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer( + conv_cfg, + 2 * channels, + out_channels, + kernel_size=1, + bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super(InputInjection, self).__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@BACKBONES.register_module() +class CGNet(nn.Module): + """CGNet backbone. + + A Light-weight Context Guided Network for Semantic Segmentation + arXiv: https://arxiv.org/abs/1811.08201 + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False): + + super(CGNet, self).__init__() + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len( + self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + elif isinstance(m, nn.PReLU): + constant_init(m, 0) + else: + raise TypeError('pretrained must be a str or None') + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(CGNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 000000000000..fb8e1ade7c42 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,375 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, constant_init, + kaiming_init) +from torch.nn.modules.batchnorm import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.decode_heads.psp_head import PPM +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import BACKBONES +from ..utils.inverted_residual import InvertedResidual + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + """ + + def __init__(self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU')): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], strides[0], + expand_ratio) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + strides[1], expand_ratio) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + strides[2], expand_ratio) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, + in_channels, + out_channels, + blocks, + stride=1, + expand_ratio=6): + layers = [ + InvertedResidual( + in_channels, + out_channels, + stride, + expand_ratio, + norm_cfg=self.norm_cfg) + ] + for i in range(1, blocks): + layers.append( + InvertedResidual( + out_channels, + out_channels, + 1, + expand_ratio, + norm_cfg=self.norm_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(FeatureFusionModule, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + size=higher_res_feature.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(nn.Module): + """Fast-SCNN Backbone. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + + super(FastSCNN, self).__init__() + if global_in_channels != higher_in_channels: + raise AssertionError('Global Input Channels must be the same \ + with Higher Input Channels!') + elif global_out_channels != lower_in_channels: + raise AssertionError('Global Output Channels must be the same \ + with Lower Input Channels!') + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def init_weights(self, pretrained=None): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py new file mode 100644 index 000000000000..54ec053919d5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py @@ -0,0 +1,555 @@ +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init, + kaiming_init) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Upsample, resize +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from .resnet import BasicBlock, Bottleneck + + +class HRModule(nn.Module): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__(self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True)): + super(HRModule, self).__init__() + self._check_branches(num_branches, num_blocks, in_channels, + num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, + num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, + num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ + f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ + f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ + f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, + branch_index, + block, + num_blocks, + num_channels, + stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or \ + self.in_channels[branch_index] != \ + num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * + block.expansion)[1]) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + self.in_channels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample( + scale_factor=2**(j - i), + mode='bilinear', + align_corners=False))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[i])[1])) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[j])[1], + nn.ReLU(inplace=False))) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), + size=x[i].shape[2:], + mode='bilinear', + align_corners=False) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@BACKBONES.register_module() +class HRNet(nn.Module): + """HRNet backbone. + + High-Resolution Representations for Labeling Pixels and Regions + arXiv: https://arxiv.org/abs/1904.04514 + + Args: + extra (dict): detailed configuration for each stage of HRNet. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): dictionary to construct and config conv layer. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__(self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + zero_init_residual=False): + super(HRNet, self).__init__() + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.zero_init_residual = zero_init_residual + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + 64, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], + num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, + num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + inplanes, + planes, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*hr_modules), in_channels + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(HRNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 000000000000..7abce078ffd2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,180 @@ +import logging + +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidual, make_divisible + + +@BACKBONES.register_module() +class MobileNetV2(nn.Module): + """MobileNetV2 backbone. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], + [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__(self, + widen_factor=1., + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False): + super(MobileNetV2, self).__init__() + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' + f'range(0, 8). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' + f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, + expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp)) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV2, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 000000000000..37a5c313bcfa --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,255 @@ +import logging + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks import Conv2dAdaptivePadding +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidualV3 as InvertedResidual + + +@BACKBONES.register_module() +class MobileNetV3(nn.Module): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + """ + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1]], + 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1]] + } # yapf: disable + + def __init__(self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False): + super(MobileNetV3, self).__init__() + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert mmcv.is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}') + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError('frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}') + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, + stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ + i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp) + in_channels = out_channels + layer_name = 'layer{}'.format(i + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + layer_name = 'layer{}'.format(len(layer_setting) + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV3, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py new file mode 100644 index 000000000000..f03a57fb2965 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py @@ -0,0 +1,314 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None): + super(SplitAttentionConv2d, self).__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False) + self.norm0_name, norm0 = build_norm_layer( + norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer( + None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer( + None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + """Bottleneck block for ResNeSt.""" + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)) + } + + def __init__(self, + groups=1, + base_width=4, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super(ResNeSt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py new file mode 100644 index 000000000000..fa3ade5cd085 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py @@ -0,0 +1,688 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer, + constant_init, kaiming_init) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import ResLayer + + +class BasicBlock(nn.Module): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(BasicBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default" 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + norm_cfg (dict): Dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages' + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.zero_init_residual = zero_init_residual + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len( + self.stage_blocks) - 1 else None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.dcn is not None: + for m in self.modules(): + if isinstance(m, Bottleneck) and hasattr( + m, 'conv2_offset'): + constant_init(m.conv2_offset, 0) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@BACKBONES.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv + in the input stem with three 3x3 convs. + + References: + .. [1] https://arxiv.org/pdf/1812.01187.pdf + """ + + def __init__(self, **kwargs): + super(ResNetV1c, self).__init__( + deep_stem=True, avg_down=False, **kwargs) + + +@BACKBONES.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1d, self).__init__( + deep_stem=True, avg_down=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py new file mode 100644 index 000000000000..9b085b9d4497 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py @@ -0,0 +1,145 @@ +import math + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py new file mode 100644 index 000000000000..ab45a33edbc2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py @@ -0,0 +1,429 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer, + build_norm_layer, constant_init, kaiming_init) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import UpConvBlock + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None): + super(BasicConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2): + super(DeconvModule, self).__init__() + + assert (kernel_size - scale_factor >= 0) and\ + (kernel_size - scale_factor) % 2 == 0,\ + f'kernel_size should be greater than or equal to scale_factor '\ + f'and (kernel_size - scale_factor) should be even numbers, '\ + f'while the kernel size is {kernel_size} and scale_factor is '\ + f'{scale_factor}.' + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)): + super(InterpConv, self).__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + upsample = nn.Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@BACKBONES.register_module() +class UNet(nn.Module): + """UNet backbone. + U-Net: Convolutional Networks for Biomedical Image Segmentation. + https://arxiv.org/pdf/1505.04597.pdf + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + + """ + + def __init__(self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None): + super(UNet, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, \ + 'The length of strides should be equal to num_stages, '\ + f'while the strides is {strides}, the length of '\ + f'strides is {len(strides)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_num_convs) == num_stages, \ + 'The length of enc_num_convs should be equal to num_stages, '\ + f'while the enc_num_convs is {enc_num_convs}, the length of '\ + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_num_convs) == (num_stages-1), \ + 'The length of dec_num_convs should be equal to (num_stages-1), '\ + f'while the dec_num_convs is {dec_num_convs}, the length of '\ + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(downsamples) == (num_stages-1), \ + 'The length of downsamples should be equal to (num_stages-1), '\ + f'while the downsamples is {downsamples}, the length of '\ + f'downsamples is {len(downsamples)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_dilations) == num_stages, \ + 'The length of enc_dilations should be equal to num_stages, '\ + f'while the enc_dilations is {enc_dilations}, the length of '\ + f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_dilations) == (num_stages-1), \ + 'The length of dec_dilations should be equal to (num_stages-1), '\ + f'while the dec_dilations is {dec_dilations}, the length of '\ + f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + f'{num_stages}.' + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = (strides[i] != 1 or downsamples[i - 1]) + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2**i, + skip_channels=base_channels * 2**(i - 1), + out_channels=base_channels * 2**(i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None)) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2**i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None)) + self.encoder.append((nn.Sequential(*enc_conv_block))) + in_channels = base_channels * 2**i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(UNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) \ + and (w % whole_downsample_rate == 0),\ + f'The input image size {(h, w)} should be divisible by the whole '\ + f'downsample rate {whole_downsample_rate}, when num_stages is '\ + f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + f'is {self.downsamples}.' + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py new file mode 100644 index 000000000000..d6b2fe4b35ee --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py @@ -0,0 +1,422 @@ +# -------------------------------------------------------- +# UniFormer +# Copyright (c) 2022 SenseTime X-Lab +# Licensed under The MIT License [see LICENSE for details] +# Written by Kunchang Li +# -------------------------------------------------------- + +from collections import OrderedDict +import math + +from functools import partial +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +import numpy as np +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv_custom import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CMlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.act = act_layer() + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CBlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = nn.BatchNorm2d(dim) + self.conv1 = nn.Conv2d(dim, dim, 1) + self.conv2 = nn.Conv2d(dim, dim, 1) + self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = nn.BatchNorm2d(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x))))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SABlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + B, N, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.transpose(1, 2).reshape(B, N, H, W) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SABlock_Windows(nn.Module): + def __init__(self, dim, num_heads, window_size=14, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.window_size=window_size + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x.permute(0, 2, 3, 1) + B, H, W, C = x.shape + shortcut = x + x = self.norm1(x) + + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) + return x + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + self.norm = nn.LayerNorm(embed_dim) + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + B, _, H, W = x.shape + x = self.proj(x) + B, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + return x + + +@BACKBONES.register_module() +class UniFormer(nn.Module): + """ Vision Transformer + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - + https://arxiv.org/abs/2010.11929 + """ + def __init__(self, layers=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=80, embed_dim=[64, 128, 320, 512], + head_dim=64, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, + drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=partial(nn.LayerNorm, eps=1e-6), + pretrained_path=None, use_checkpoint=False, checkpoint_num=[0, 0, 0, 0], + windows=False, hybrid=False, window_size=14): + """ + Args: + layer (list): number of block in each layer + img_size (int, tuple): input image size + in_chans (int): number of input channels + num_classes (int): number of classes for classification head + embed_dim (int): embedding dimension + head_dim (int): dimension of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + qk_scale (float): override default qk scale of head_dim ** -0.5 if set + representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set + drop_rate (float): dropout rate + attn_drop_rate (float): attention dropout rate + drop_path_rate (float): stochastic depth rate + norm_layer (nn.Module): normalization layer + pretrained_path (str): path of pretrained model + use_checkpoint (bool): whether use checkpoint + checkpoint_num (list): index for using checkpoint in every stage + windows (bool): whether use window MHRA + hybrid (bool): whether use hybrid MHRA + window_size (int): size of window (>14) + """ + super().__init__() + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.checkpoint_num = checkpoint_num + self.windows = windows + print(f'Use Checkpoint: {self.use_checkpoint}') + print(f'Checkpoint Number: {self.checkpoint_num}') + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) + + self.patch_embed1 = PatchEmbed( + img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) + self.patch_embed2 = PatchEmbed( + img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1]) + self.patch_embed3 = PatchEmbed( + img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2]) + self.patch_embed4 = PatchEmbed( + img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3]) + + self.pos_drop = nn.Dropout(p=drop_rate) + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule + num_heads = [dim // head_dim for dim in embed_dim] + self.blocks1 = nn.ModuleList([ + CBlock( + dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) + for i in range(layers[0])]) + self.norm1=norm_layer(embed_dim[0]) + self.blocks2 = nn.ModuleList([ + CBlock( + dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]], norm_layer=norm_layer) + for i in range(layers[1])]) + self.norm2 = norm_layer(embed_dim[1]) + if self.windows: + print('Use local window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + elif hybrid: + print('Use hybrid window for blocks in stage3') + block3 = [] + for i in range(layers[2]): + if (i + 1) % 4 == 0: + block3.append(SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + else: + block3.append(SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + self.blocks3 = nn.ModuleList(block3) + else: + print('Use global window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + self.norm3 = norm_layer(embed_dim[2]) + self.blocks4 = nn.ModuleList([ + SABlock( + dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]+layers[2]], norm_layer=norm_layer) + for i in range(layers[3])]) + self.norm4 = norm_layer(embed_dim[3]) + + # Representation layer + if representation_size: + self.num_features = representation_size + self.pre_logits = nn.Sequential(OrderedDict([ + ('fc', nn.Linear(embed_dim, representation_size)), + ('act', nn.Tanh()) + ])) + else: + self.pre_logits = nn.Identity() + + self.apply(self._init_weights) + self.init_weights(pretrained=pretrained_path) + + def init_weights(self, pretrained): + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + print(f'Load pretrained model from {pretrained}') + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token'} + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + out = [] + x = self.patch_embed1(x) + x = self.pos_drop(x) + for i, blk in enumerate(self.blocks1): + if self.use_checkpoint and i < self.checkpoint_num[0]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm1(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed2(x) + for i, blk in enumerate(self.blocks2): + if self.use_checkpoint and i < self.checkpoint_num[1]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm2(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed3(x) + for i, blk in enumerate(self.blocks3): + if self.use_checkpoint and i < self.checkpoint_num[2]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm3(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed4(x) + for i, blk in enumerate(self.blocks4): + if self.use_checkpoint and i < self.checkpoint_num[3]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm4(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + return tuple(out) + + def forward(self, x): + x = self.forward_features(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py new file mode 100644 index 000000000000..353b75fad121 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py @@ -0,0 +1,459 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/vision_transformer.py.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (Conv2d, Linear, build_activation_layer, build_norm_layer, + constant_init, kaiming_init, normal_init) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import _load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import DropPath, trunc_normal_ + + +class Mlp(nn.Module): + """MLP layer for Encoder block. + + Args: + in_features(int): Input dimension for the first fully + connected layer. + hidden_features(int): Output dimension for the first fully + connected layer. + out_features(int): Output dementsion for the second fully + connected layer. + act_cfg(dict): Config dict for activation layer. + Default: dict(type='GELU'). + drop(float): Drop rate for the dropout layer. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_cfg=dict(type='GELU'), + drop=0.): + super(Mlp, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = Linear(in_features, hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + """Attention layer for Encoder block. + + Args: + dim (int): Dimension for the input vector. + num_heads (int): Number of parallel attention heads. + qkv_bias (bool): Enable bias for qkv if True. Default: False. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for output weights. Default: 0. + """ + + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + super(Attention, self).__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + b, n, c = x.shape + qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, + c // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + """Implements encoder block with residual connection. + + Args: + dim (int): The feature dimension. + num_heads (int): Number of parallel attention heads. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop (float): Drop rate for mlp output weights. Default: 0. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for attn layer output weights. + Default: 0. + drop_path (float): Drop rate for paths of model. + Default: 0. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', requires_grad=True). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + dim, + num_heads, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + proj_drop=0., + drop_path=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + with_cp=False): + super(Block, self).__init__() + self.with_cp = with_cp + _, self.norm1 = build_norm_layer(norm_cfg, dim) + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, + proj_drop) + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + _, self.norm2 = build_norm_layer(norm_cfg, dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_cfg=act_cfg, + drop=drop) + + def forward(self, x): + + def _inner_forward(x): + out = x + self.drop_path(self.attn(self.norm1(x))) + out = out + self.drop_path(self.mlp(self.norm2(out))) + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding. + + Args: + img_size (int | tuple): Input image size. + default: 224. + patch_size (int): Width and height for a patch. + default: 16. + in_channels (int): Input channels for images. Default: 3. + embed_dim (int): The embedding dimension. Default: 768. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dim=768): + super(PatchEmbed, self).__init__() + if isinstance(img_size, int): + self.img_size = (img_size, img_size) + elif isinstance(img_size, tuple): + self.img_size = img_size + else: + raise TypeError('img_size must be type of int or tuple') + h, w = self.img_size + self.patch_size = (patch_size, patch_size) + self.num_patches = (h // patch_size) * (w // patch_size) + self.proj = Conv2d( + in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + return self.proj(x).flatten(2).transpose(1, 2) + + +@BACKBONES.register_module() +class VisionTransformer(nn.Module): + """Vision transformer backbone. + + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for + Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 + + Args: + img_size (tuple): input image size. Default: (224, 224). + patch_size (int, tuple): patch size. Default: 16. + in_channels (int): number of input channels. Default: 3. + embed_dim (int): embedding dimension. Default: 768. + depth (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + qk_scale (float): override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): dropout rate. Default: 0. + attn_drop_rate (float): attention dropout rate. Default: 0. + drop_path_rate (float): Rate of DropPath. Default: 0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', eps=1e-6, requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + with_cls_token (bool): If concatenating class token into image tokens + as transformer input. Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + """ + + def __init__(self, + img_size=(224, 224), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + out_indices=11, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), + act_cfg=dict(type='GELU'), + norm_eval=False, + final_norm=False, + with_cls_token=True, + interpolate_mode='bicubic', + with_cp=False): + super(VisionTransformer, self).__init__() + self.img_size = img_size + self.patch_size = patch_size + self.features = self.embed_dim = embed_dim + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=embed_dim) + + self.with_cls_token = with_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) + self.pos_embed = nn.Parameter( + torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) + self.pos_drop = nn.Dropout(p=drop_rate) + + if isinstance(out_indices, int): + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) + ] # stochastic depth decay rule + self.blocks = nn.ModuleList([ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=dpr[i], + attn_drop=attn_drop_rate, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp) for i in range(depth) + ]) + + self.interpolate_mode = interpolate_mode + self.final_norm = final_norm + if final_norm: + _, self.norm = build_norm_layer(norm_cfg, embed_dim) + + self.norm_eval = norm_eval + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = get_root_logger() + checkpoint = _load_checkpoint(pretrained, logger=logger) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + logger.info(msg=f'Resize the pos_embed shape from \ +{state_dict["pos_embed"].shape} to {self.pos_embed.shape}') + h, w = self.img_size + pos_size = int( + math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], (h, w), (pos_size, pos_size), + self.patch_size, self.interpolate_mode) + + self.load_state_dict(state_dict, False) + + elif pretrained is None: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=.02) + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'mlp' in n: + normal_init(m.bias, std=1e-6) + else: + constant_init(m.bias, 0) + elif isinstance(m, Conv2d): + kaiming_init(m.weight, mode='fan_in') + if m.bias is not None: + constant_init(m.bias, 0) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m.bias, 0) + constant_init(m.weight, 1.0) + else: + raise TypeError('pretrained must be a str or None') + + def _pos_embeding(self, img, patched_img, pos_embed): + """Positiong embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + img (torch.Tensor): The inference image tensor, the shape + must be [B, C, H, W]. + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * ( + self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError( + 'Unexpected shape of pos_embed, got {}.'.format( + pos_embed.shape)) + pos_embed = self.resize_pos_embed(pos_embed, img.shape[2:], + (pos_h, pos_w), self.patch_size, + self.interpolate_mode) + return self.pos_drop(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): pos_embed weights. + input_shpae (tuple): Tuple for (input_h, intput_w). + pos_shape (tuple): Tuple for (pos_h, pos_w). + patch_size (int): Patch size. + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + input_h, input_w = input_shpae + pos_h, pos_w = pos_shape + cls_token_weight = pos_embed[:, 0] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] + pos_embed_weight = pos_embed_weight.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = F.interpolate( + pos_embed_weight, + size=[input_h // patch_size, input_w // patch_size], + align_corners=False, + mode=mode) + cls_token_weight = cls_token_weight.unsqueeze(1) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x = self.patch_embed(inputs) + + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(inputs, x, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer input + x = x[:, 1:] + + outs = [] + for i, blk in enumerate(self.blocks): + x = blk(x) + if i == len(self.blocks) - 1: + if self.final_norm: + x = self.norm(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, inputs.shape[2] // self.patch_size, + inputs.shape[3] // self.patch_size, + C).permute(0, 3, 1, 2) + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(VisionTransformer, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py new file mode 100644 index 000000000000..66541fd20178 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py @@ -0,0 +1,46 @@ +import warnings + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import MODELS as MMCV_MODELS +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +MODELS = Registry('models', parent=MMCV_MODELS) + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn( + 'train_cfg and test_cfg is deprecated, ' + 'please specify them in model', UserWarning) + assert cfg.get('train_cfg') is None or train_cfg is None, \ + 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, \ + 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build( + cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py new file mode 100644 index 000000000000..ac66d3cfe0ea --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,28 @@ +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .lraspp_head import LRASPPHead +from .nl_head import NLHead +from .ocr_head import OCRHead +# from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .uper_head import UPerHead + +__all__ = [ + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'APCHead', 'DMHead', 'LRASPPHead' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 000000000000..69e5d05ba75e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,245 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super(PPMConcat, self).__init__( + [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, share_key_query, query_scale, key_pool_scales, + conv_cfg, norm_cfg, act_cfg): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super(SelfAttentionBlock, self).__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, query_scales, key_pool_scales, conv_cfg, + norm_cfg, act_cfg): + super(AFNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + out_channels + high_in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, out_channels, query_scales, + key_pool_scales, conv_cfg, norm_cfg, act_cfg): + super(APNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + 2 * in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@HEADS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, + project_channels, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + **kwargs): + super(ANNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 000000000000..119db3709457 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(ACM, self).__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.global_info = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, + -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = self.gla(x + resize( + self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) + ).permute(0, 2, 3, 1).reshape( + batch_size, -1, self.pool_scale**2) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@HEADS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super(APCHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM(pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 000000000000..3eb5ece9e660 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,107 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, + act_cfg): + super(ASPPModule, self).__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@HEADS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super(ASPPHead, self).__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 000000000000..d02122ca0e68 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,57 @@ +from abc import ABCMeta, abstractmethod + +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.losses(seg_logits, gt_semantic_seg) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs, prev_output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 000000000000..0aa8380f4aab --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,42 @@ +import torch + +from ..builder import HEADS +from .fcn_head import FCNHead + +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@HEADS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' + 'CrissCrossAttention ops') + super(CCHead, self).__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py new file mode 100644 index 000000000000..784ebc8a20c8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,178 @@ +import torch +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, Scale +from torch import nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super(DAHead, self).losses(pam_cam_seg_logit, seg_label), + 'pam_cam')) + loss.update( + add_prefix( + super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update( + add_prefix( + super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 000000000000..2fac63380adb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,234 @@ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import normal_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16, force_fp32 + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import build_pixel_sampler +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import build_loss +from ..losses import accuracy + + +class BaseDecodeHead(nn.Module, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255 + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__(self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False): + super(BaseDecodeHead, self).__init__() + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.num_classes = num_classes + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + self.loss_decode = build_loss(loss_decode) + self.ignore_index = ignore_index + self.align_corners = align_corners + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + self.fp16_enabled = False + + def extra_repr(self): + """Extra repr.""" + s = f'input_transform={self.input_transform}, ' \ + f'ignore_index={self.ignore_index}, ' \ + f'align_corners={self.align_corners}' + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.conv_seg, mean=0, std=0.01) + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize( + input=x, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @auto_fp16() + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.losses(seg_logits, gt_semantic_seg) + return losses + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs) + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + @force_fp32(apply_to=('seg_logit', )) + def losses(self, seg_logit, seg_label): + """Compute segmentation loss.""" + loss = dict() + seg_logit = resize( + input=seg_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logit, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + loss['loss_seg'] = self.loss_decode( + seg_logit, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + loss['acc_seg'] = accuracy(seg_logit, seg_label) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 000000000000..52efb6c71e28 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(DCM, self).__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, + 0) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv( + F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, + self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@HEADS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super(DMHead, self).__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM(filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 000000000000..1823f20a6fd9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,131 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d +from torch import nn + +from ..builder import HEADS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight /= self.temperature + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( + n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@HEADS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + temperature=0.05, + **kwargs): + super(DNLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 000000000000..5bebc82ba632 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,168 @@ +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super(EMAModule, self).__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2. / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - + self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@HEADS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, + ema_channels, + num_bases, + num_stages, + concat_input=True, + momentum=0.1, + **kwargs): + super(EMAHead, self).__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, + self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=None, + act_cfg=None) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 000000000000..649c7357c2bf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,187 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_norm_layer + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Encoding, resize +from ..builder import HEADS, build_loss +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super(EncModule, self).__init__() + self.encoding_project = ConvModule( + in_channels, + in_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( + '2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True)) + self.fc = nn.Sequential( + nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@HEADS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__(self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=0.2), + **kwargs): + super(EncHead, self).__init__( + input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.enc_module = EncModule( + self.channels, + num_codes=num_codes, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize( + lateral_conv(inputs[i]), + size=feat.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + return self.forward(inputs)[0] + else: + return self.forward(inputs) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc( + bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def losses(self, seg_logit, seg_label): + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super(EncHead, self).losses(seg_logit, seg_label)) + se_loss = self.loss_se_decode( + se_seg_logit, + self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 000000000000..8fc58a5bb514 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, + num_convs=2, + kernel_size=3, + concat_input=True, + dilation=1, + **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super(FCNHead, self).__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs(x) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 000000000000..1fc3c4d0fc0b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,68 @@ +import numpy as np +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super(FPNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max( + 1, + int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if feature_strides[i] != feature_strides[0]: + scale_head.append( + nn.Upsample( + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), + size=output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 000000000000..459a4a502c0d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,47 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ContextBlock + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + **kwargs): + super(GCHead, self).__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, + ratio=self.ratio, + pooling_type=self.pooling_type, + fusion_types=self.fusion_types) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 000000000000..9af362d83b1a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,90 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import is_tuple_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super(LRASPPHead, self).__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'') + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module( + f'conv{i}', + nn.Conv2d( + self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False)) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule( + self.in_channels[2], + self.channels, + 1, + act_cfg=dict(type='Sigmoid'), + bias=False)) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize( + x, + size=inputs[i].size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 000000000000..130005d1c16d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,49 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + **kwargs): + super(NLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 000000000000..6ddd5a06cc55 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,127 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super(SpatialGatherModule, self).__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, + act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super(ObjectAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.bottleneck = ConvModule( + in_channels * 2, + in_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super(ObjectAttentionBlock, + self).forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@HEADS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super(OCRHead, self).__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py new file mode 100644 index 000000000000..033d2dbd36d8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,349 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, normal_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import point_sample + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.builder import HEADS +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super(PointHead, self).__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.fc_seg, std=0.001) + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + loss['loss_point'] = self.loss_decode( + point_logits, point_label, ignore_index=self.ignore_index) + loss['acc_point'] = accuracy(point_logits, point_label) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 000000000000..c85bfc34a5a6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,196 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@HEADS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__(self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super(PSAHead, self).__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize( + out, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm( + out.view(n, c, h * w), y.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize( + x_col, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + x_dis = resize( + x_dis, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm( + x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + x_dis = torch.bmm( + x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize( + out, + size=identity.shape[2:], + mode='bilinear', + align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 000000000000..e8b5cf0fb261 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, + act_cfg, align_corners): + super(PPM, self).__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg))) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize( + ppm_out, + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 000000000000..dae028e41876 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + +@HEADS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, + c1_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + DepthwiseSeparableConvModule( + self.channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize( + input=output, + size=c1_output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 000000000000..35479f2419a3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,51 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import DepthwiseSeparableConvModule + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to Fast-SCNN paper. + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + """ + + def __init__(self, **kwargs): + super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 000000000000..ff3fc4f1846c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@HEADS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(UPerHead, self).__init__( + input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def forward(self, inputs): + """Forward function.""" + + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += resize( + laterals[i], + size=prev_shape, + mode='bilinear', + align_corners=self.align_corners) + + # build outputs + fpn_outs = [ + self.fpn_convs[i](laterals[i]) + for i in range(used_backbone_levels - 1) + ] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], + size=fpn_outs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fpn_outs = torch.cat(fpn_outs, dim=1) + output = self.fpn_bottleneck(fpn_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py new file mode 100644 index 000000000000..beca72045694 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py @@ -0,0 +1,12 @@ +from .accuracy import Accuracy, accuracy +from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, + cross_entropy, mask_cross_entropy) +from .dice_loss import DiceLoss +from .lovasz_loss import LovaszLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', + 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', + 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py new file mode 100644 index 000000000000..c0fd2e7e74a0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py @@ -0,0 +1,78 @@ +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk, ) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), \ + f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / target.numel())) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1, ), thresh=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 000000000000..42c0790c9861 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,198 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, + label, + weight=None, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=-100): + """The wrapper function for :func:`F.cross_entropy`""" + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy( + pred, + label, + weight=class_weight, + reduction='none', + ignore_index=ignore_index) + + # apply weights and do the reduction + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss( + loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights *= valid_mask + + return bin_labels, bin_label_weights + + +def binary_cross_entropy(pred, + label, + weight=None, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=255): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int | None): The label index to be ignored. Default: 255 + + Returns: + torch.Tensor: The calculated loss + """ + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or ( + pred.dim() == 4 and label.dim() == 3), \ + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + 'H, W], label shape [N, H, W] are supported' + label, weight = _expand_onehot_labels(label, weight, pred.shape, + ignore_index) + + # weighted element-wise losses + if weight is not None: + weight = weight.float() + loss = F.binary_cross_entropy_with_logits( + pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss( + loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, + target, + label, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=None): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + use_sigmoid=False, + use_mask=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(CrossEntropyLoss, self).__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + weight, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py new file mode 100644 index 000000000000..27a77b962d7d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py @@ -0,0 +1,119 @@ +"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ +segmentron/solver/loss.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def dice_loss(pred, + target, + valid_mask, + smooth=1, + exponent=2, + class_weight=None, + ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + dice_loss = binary_dice_loss( + pred[:, i], + target[..., i], + valid_mask=valid_mask, + smooth=smooth, + exponent=exponent) + if class_weight is not None: + dice_loss *= class_weight[i] + total_loss += dice_loss + return total_loss / num_classes + + +@weighted_loss +def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwards): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth + den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth + + return 1 - num / den + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + """DiceLoss. + + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1 + exponent (float): An float number to calculate denominator + value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + """ + + def __init__(self, + smooth=1, + exponent=2, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ignore_index=255, + **kwards): + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.reduction = reduction + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, + pred, + target, + avg_factor=None, + reduction_override=None, + **kwards): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), + num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor, + smooth=self.smooth, + exponent=self.exponent, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 000000000000..fb3b313b61e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,303 @@ +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = (labels != ignore_index) + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = (labels != ignore_index) + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge(logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits( + logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat( + *flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0. + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if (classes == 'present' and fg.sum() == 0): + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax(probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs( + prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), + classes=classes, + class_weight=class_weight) + return loss + + +@LOSSES.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(LovaszLoss, self).__init__() + assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) + if not per_image: + assert reduction == 'none', "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py new file mode 100644 index 000000000000..d9801a4195c9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py @@ -0,0 +1,121 @@ +import functools + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +import torch.nn.functional as F + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = mmcv.load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py new file mode 100644 index 000000000000..9b9d3d5b3fe8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py @@ -0,0 +1,4 @@ +from .fpn import FPN +from .multilevel_neck import MultiLevelNeck + +__all__ = ['FPN', 'MultiLevelNeck'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py new file mode 100644 index 000000000000..fa5c836de534 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py @@ -0,0 +1,212 @@ +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, xavier_init + +from ..builder import NECKS + + +@NECKS.register_module() +class FPN(nn.Module): + """Feature Pyramid Network. + + This is an implementation of - Feature Pyramid Networks for Object + Detection (https://arxiv.org/abs/1612.03144) + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale) + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (str): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: `dict(mode='nearest')` + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest')): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] += F.interpolate(laterals[i], + **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += F.interpolate( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 000000000000..a61e2b968d72 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,70 @@ +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import NECKS + + +@NECKS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[int]): Scale factors for each input feature map. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scales=[0.5, 1, 2, 4], + norm_cfg=None, + act_cfg=None): + super(MultiLevelNeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule( + in_channel, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, + out_channels, + kernel_size=3, + padding=1, + stride=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + print(inputs[0].shape) + inputs = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = F.interpolate( + inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py new file mode 100644 index 000000000000..dca2f0940533 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py @@ -0,0 +1,5 @@ +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .encoder_decoder import EncoderDecoder + +__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py new file mode 100644 index 000000000000..bf7d2829d6ea --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py @@ -0,0 +1,273 @@ +import logging +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16 + + +class BaseSegmentor(nn.Module): + """Base class for segmentors.""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseSegmentor, self).__init__() + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self): + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, + 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self): + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, imgs): + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, img, img_metas): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + """Placeholder for Forward function for training.""" + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + """Placeholder for single image test.""" + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + """Placeholder for augmentation test.""" + pass + + def init_weights(self, pretrained=None): + """Initialize the weights in segmentor. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if pretrained is not None: + logger = logging.getLogger() + logger.info(f'load model from: {pretrained}') + + def forward_test(self, imgs, img_metas, **kwargs): + """ + Args: + imgs (List[Tensor]): the outer list indicates test-time + augmentations and inner Tensor should have a shape NxCxHxW, + which contains all images in the batch. + img_metas (List[List[dict]]): the outer list indicates test-time + augs (multiscale, flip, etc.) and the inner list indicates + images in a batch. + """ + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError(f'{name} must be a list, but got ' + f'{type(var)}') + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError(f'num of augmentations ({len(imgs)}) != ' + f'num of image meta ({len(img_metas)})') + # all images in the same aug batch all of the same ori_shape and pad + # shape + for img_meta in img_metas: + ori_shapes = [_['ori_shape'] for _ in img_meta] + assert all(shape == ori_shapes[0] for shape in ori_shapes) + img_shapes = [_['img_shape'] for _ in img_meta] + assert all(shape == img_shapes[0] for shape in img_shapes) + pad_shapes = [_['pad_shape'] for _ in img_meta] + assert all(shape == pad_shapes[0] for shape in pad_shapes) + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + @auto_fp16(apply_to=('img', )) + def forward(self, img, img_metas, return_loss=True, **kwargs): + """Calls either :func:`forward_train` or :func:`forward_test` depending + on whether ``return_loss`` is ``True``. + + Note this setting will change the expected inputs. When + ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor + and List[dict]), and when ``resturn_loss=False``, img and img_meta + should be double nested (i.e. List[Tensor], List[List[dict]]), with + the outer list indicating test time augmentations. + """ + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: + return self.forward_test(img, img_metas, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + outputs = dict( + loss=loss, + log_vars=log_vars, + num_samples=len(data_batch['img_metas'])) + + return outputs + + def val_step(self, data_batch, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + output = self(**data_batch, **kwargs) + return output + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def show_result(self, + img, + result, + palette=None, + win_name='', + show=False, + wait_time=0, + out_file=None, + opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The semantic segmentation results to draw over + `img`. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + if palette is None: + if self.PALETTE is None: + palette = np.random.randint( + 0, 255, size=(len(self.CLASSES), 3)) + else: + palette = self.PALETTE + palette = np.array(palette) + assert palette.shape[0] == len(self.CLASSES) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' + 'result image will be returned') + return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 000000000000..0de6d2991660 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,98 @@ +from torch import nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .encoder_decoder import EncoderDecoder + + +@SEGMENTORS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + """ + + def __init__(self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + self.num_stages = num_stages + super(CascadeEncoderDecoder, self).__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(builder.build_head(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + self.backbone.init_weights(pretrained=pretrained) + for i in range(self.num_stages): + self.decode_head[i].init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + for i in range(1, self.num_stages): + out = self.decode_head[i].forward_test(x, out, img_metas, + self.test_cfg) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + prev_outputs = self.decode_head[i - 1].forward_test( + x, img_metas, self.test_cfg) + loss_decode = self.decode_head[i].forward_train( + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 000000000000..ca573d7af79e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,298 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .base import BaseSegmentor + + +@SEGMENTORS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + """ + + def __init__(self, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(EncoderDecoder, self).__init__() + self.backbone = builder.build_backbone(backbone) + if neck is not None: + self.neck = builder.build_neck(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + assert self.with_decode_head + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + self.decode_head = builder.build_head(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + + def _init_auxiliary_head(self, auxiliary_head): + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(builder.build_head(head_cfg)) + else: + self.auxiliary_head = builder.build_head(auxiliary_head) + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + + super(EncoderDecoder, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + self.decode_head.init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def extract_feat(self, img): + """Extract features from images.""" + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self._decode_head_forward_test(x, img_metas) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _decode_head_forward_test(self, x, img_metas): + """Run forward function and calculate loss for decode head in + inference.""" + seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + return seg_logits + + def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def forward_dummy(self, img): + """Dummy forward function.""" + seg_logit = self.encode_decode(img, None) + + return seg_logit + + def forward_train(self, img, img_metas, gt_semantic_seg): + """Forward function for training. + + Args: + img (Tensor): Input images. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(img) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, img_metas, + gt_semantic_seg) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train( + x, img_metas, gt_semantic_seg) + losses.update(loss_aux) + + return losses + + # TODO refactor + def slide_inference(self, img, img_meta, rescale): + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = img.size() + num_classes = self.num_classes + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = img[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, img_meta) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + if torch.onnx.is_in_onnx_export(): + # cast count_mat to constant while exporting to ONNX + count_mat = torch.from_numpy( + count_mat.cpu().detach().numpy()).to(device=img.device) + preds = preds / count_mat + if rescale: + preds = resize( + preds, + size=img_meta[0]['ori_shape'][:2], + mode='bilinear', + align_corners=self.align_corners, + warning=False) + return preds + + def whole_inference(self, img, img_meta, rescale): + """Inference with full image.""" + + seg_logit = self.encode_decode(img, img_meta) + if rescale: + # support dynamic shape for onnx + if torch.onnx.is_in_onnx_export(): + size = img.shape[2:] + else: + size = img_meta[0]['ori_shape'][:2] + seg_logit = resize( + seg_logit, + size=size, + mode='bilinear', + align_corners=self.align_corners, + warning=False) + + return seg_logit + + def inference(self, img, img_meta, rescale): + """Inference with slide/whole style. + + Args: + img (Tensor): The input image of shape (N, 3, H, W). + img_meta (dict): Image info dict where each dict has: 'img_shape', + 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + rescale (bool): Whether rescale back to original shape. + + Returns: + Tensor: The output segmentation map. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = img_meta[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in img_meta) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(img, img_meta, rescale) + else: + seg_logit = self.whole_inference(img, img_meta, rescale) + output = F.softmax(seg_logit, dim=1) + flip = img_meta[0]['flip'] + if flip: + flip_direction = img_meta[0]['flip_direction'] + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + output = output.flip(dims=(3, )) + elif flip_direction == 'vertical': + output = output.flip(dims=(2, )) + + return output + + def simple_test(self, img, img_meta, rescale=True): + """Simple test with single image.""" + seg_logit = self.inference(img, img_meta, rescale) + seg_pred = seg_logit.argmax(dim=1) + if torch.onnx.is_in_onnx_export(): + # our inference backend only support 4D output + seg_pred = seg_pred.unsqueeze(0) + return seg_pred + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(imgs[0], img_metas[0], rescale) + for i in range(1, len(imgs)): + cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit += cur_seg_logit + seg_logit /= len(imgs) + seg_pred = seg_logit.argmax(dim=1) + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py new file mode 100644 index 000000000000..3d3bdd349b9f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py @@ -0,0 +1,13 @@ +from .drop import DropPath +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .up_conv_block import UpConvBlock +from .weight_init import trunc_normal_ + +__all__ = [ + 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', + 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_' +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py new file mode 100644 index 000000000000..4520b0ff407d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py @@ -0,0 +1,31 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import torch +from torch import nn + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + Args: + drop_prob (float): Drop rate for paths of model. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, drop_prob=0.): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.keep_prob = 1 - drop_prob + + def forward(self, x): + if self.drop_prob == 0. or not self.training: + return x + shape = (x.shape[0], ) + (1, ) * ( + x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = self.keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(self.keep_prob) * random_tensor + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py new file mode 100644 index 000000000000..e9390b6b09f3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,208 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule +from torch import nn +from torch.utils import checkpoint as cp + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' \ + f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + layers.extend([ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False): + super(InvertedResidualV3, self).__init__() + self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict( + type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, x): + + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py new file mode 100644 index 000000000000..75ad75605252 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py @@ -0,0 +1,27 @@ +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py new file mode 100644 index 000000000000..d094f15783dd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py @@ -0,0 +1,94 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer +from torch import nn as nn + + +class ResLayer(nn.Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__(self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + downsample.extend([ + build_conv_layer( + conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=conv_stride, + bias=False), + build_norm_layer(norm_cfg, planes * block.expansion)[1] + ]) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + super(ResLayer, self).__init__(*layers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py new file mode 100644 index 000000000000..a3836ed3adda --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py @@ -0,0 +1,57 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__(self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))): + super(SELayer, self).__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert mmcv.is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0]) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1]) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py new file mode 100644 index 000000000000..4c6d4da3cbf8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,159 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init +from torch import nn as nn +from torch.nn import functional as F + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out, + conv_cfg, norm_cfg, act_cfg): + super(SelfAttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, + conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ + ConvModule( + in_channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ] + for _ in range(num_convs - 1): + convs.append( + ConvModule( + channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py new file mode 100644 index 000000000000..7d236018920a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None): + super(UpConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py new file mode 100644 index 000000000000..38141ba3d61f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py @@ -0,0 +1,62 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import math +import warnings + +import torch + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + """Reference: https://people.sc.fsu.edu/~jburkardt/presentations + /truncated_normal.pdf""" + + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower_bound = norm_cdf((a - mean) / std) + upper_bound = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor` + mean (float): the mean of the normal distribution + std (float): the standard deviation of the normal distribution + a (float): the minimum cutoff value + b (float): the maximum cutoff value + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py new file mode 100644 index 000000000000..bec51c75b936 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py @@ -0,0 +1,4 @@ +from .encoding import Encoding +from .wrappers import Upsample, resize + +__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py new file mode 100644 index 000000000000..7eb3629a6426 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py @@ -0,0 +1,74 @@ +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super(Encoding, self).__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1. / ((num_codes * channels)**0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, + dtype=torch.float).uniform_(-std, std), + requires_grad=True) + # [num_codes] + self.scale = nn.Parameter( + torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), + requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * ( + expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * + (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax( + self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ + f'x{self.channels})' + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py new file mode 100644 index 000000000000..0ed9a0cb8d7c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py @@ -0,0 +1,50 @@ +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None): + super(Upsample, self).__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py new file mode 100644 index 000000000000..ac489e2dbbc0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py @@ -0,0 +1,4 @@ +from .collect_env import collect_env +from .logger import get_root_logger + +__all__ = ['get_root_logger', 'collect_env'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py new file mode 100644 index 000000000000..7d4c191368c2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py @@ -0,0 +1,17 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import collect_env as collect_base_env +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_git_hash + +import nemo.collections.multimodal.models.controlnet.uniformer.mmseg as mmseg + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print('{}: {}'.format(name, val)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py new file mode 100644 index 000000000000..6167a6f88ea4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py @@ -0,0 +1,27 @@ +import logging + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_logger + + +def get_root_logger(log_file=None, log_level=logging.INFO): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name, + e.g., "mmseg". + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + + Returns: + logging.Logger: The root logger. + """ + + logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) + + return logger diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py new file mode 100644 index 000000000000..d410985870cd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/util.py @@ -0,0 +1,87 @@ +import os + +import numpy as np +import torch +import torchvision +from PIL import Image +from pytorch_lightning import Callback +from pytorch_lightning.utilities.distributed import rank_zero_only +from nemo.collections.multimodal.models.controlnet.uniformer import UniformerDetector + +class ImageLogger(Callback): + def __init__(self, batch_frequency=2000, max_images=4, clamp=True, increase_log_steps=True, + rescale=True, disabled=False, log_on_batch_idx=False, log_first_step=False, + log_images_kwargs=None): + super().__init__() + self.rescale = rescale + self.batch_freq = batch_frequency + self.max_images = max_images + if not increase_log_steps: + self.log_steps = [self.batch_freq] + self.clamp = clamp + self.disabled = disabled + self.log_on_batch_idx = log_on_batch_idx + self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {} + self.log_first_step = log_first_step + + print("callback initialized") + + @rank_zero_only + def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx): + root = os.path.join(save_dir, "image_log", split) + for k in images: + grid = torchvision.utils.make_grid(images[k], nrow=4) + if self.rescale: + grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w + grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1) + grid = grid.numpy() + grid = (grid * 255).astype(np.uint8) + filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(k, global_step, current_epoch, batch_idx) + path = os.path.join(root, filename) + os.makedirs(os.path.split(path)[0], exist_ok=True) + Image.fromarray(grid).save(path) + + def log_img(self, pl_module, batch, batch_idx, split="train"): + check_idx = batch_idx # if self.log_on_batch_idx else pl_module.global_step + if (self.check_frequency(check_idx) and # batch_idx % self.batch_freq == 0 + hasattr(pl_module, "log_images") and + callable(pl_module.log_images) and + self.max_images > 0): + logger = type(pl_module.logger) + + is_train = pl_module.training + if is_train: + pl_module.eval() + + with torch.no_grad(): + images = pl_module.log_images(batch, split=split, **self.log_images_kwargs) + + for k in images: + N = min(images[k].shape[0], self.max_images) + images[k] = images[k][:N] + if isinstance(images[k], torch.Tensor): + images[k] = images[k].detach().cpu() + if self.clamp: + images[k] = torch.clamp(images[k], -1., 1.) + + self.log_local(pl_module.logger.save_dir, split, images, + pl_module.global_step, pl_module.current_epoch, batch_idx) + + if is_train: + pl_module.train() + + def check_frequency(self, check_idx): + return (check_idx % self.batch_freq == 0) + + def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): + if not self.disabled: + self.log_img(pl_module, batch, batch_idx, split="train") + + +def get_preprocessing_function(name): + if name == 'seg2img': + apply_uniformer = UniformerDetector() + return apply_uniformer + else: + print("The application is not yet supported") + raise NotImplementedError \ No newline at end of file diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index fe41a74acd41..b34506671011 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -191,6 +191,14 @@ def load_from_checkpoint( new_state_dict[new_key] = checkpoint['state_dict'][key] checkpoint['state_dict'] = new_state_dict + # compatibility for inductor in inference + if not cfg.get('inductor', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + if cfg.get('megatron_amp_O2', False): new_state_dict = {} for key in checkpoint['state_dict'].keys(): diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index 8f43c4da6a12..1cd7c5877f97 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -113,7 +113,9 @@ def sample( ): if conditioning is not None: if isinstance(conditioning, dict): - cbs = conditioning[list(conditioning.keys())[0]][0].shape[0] + ctmp = conditioning[list(conditioning.keys())[0]] + while isinstance(ctmp, list): ctmp = ctmp[0] + cbs = ctmp.shape[0] if cbs != batch_size: print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") else: @@ -250,7 +252,10 @@ def _get_model_output( if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: e_t = self.model.apply_model(x, t, c) elif isinstance(c, dict): - raise NotImplementedError + ### Contolnet conditioning is dict format + model_t = self.model.apply_model(x, t, c) + model_uncond = self.model.apply_model(x, t, unconditional_conditioning) + e_t = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) else: x_in = torch.cat([x] * 2) t_in = torch.cat([t] * 2) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index a7832382f2c7..dc4882deb6f0 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -38,8 +38,8 @@ def check_cuda(): try: - from flash_attn.flash_attention import FlashAttention from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func + from flash_attn.flash_attention import FlashAttention flash_attn_installed = check_cuda() print("FlashAttention Installed") @@ -292,11 +292,12 @@ def __init__( gated_ff=True, use_checkpoint=False, use_flash_attention=False, + disable_self_attn=False, ): super().__init__() - self.attn1 = CrossAttention( - query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, use_flash_attention=use_flash_attention - ) # is a self-attention + self.disable_self_attn = disable_self_attn + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, + use_flash_attention=use_flash_attention, context_dim=context_dim if self.disable_self_attn else None) # is a self-attention self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) self.attn2 = CrossAttention( query_dim=dim, @@ -315,7 +316,7 @@ def forward(self, x, context=None): return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) def _forward(self, x, context=None): - x = self.attn1(self.norm1(x)) + x + x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x x = self.attn2(self.norm2(x), context=context) + x x = self.ff(self.norm3(x)) + x return x @@ -338,11 +339,14 @@ def __init__( depth=1, dropout=0.0, context_dim=None, + disable_self_attn=False, use_linear=False, use_checkpoint=False, use_flash_attention=False, ): super().__init__() + if exists(context_dim) and not isinstance(context_dim, list): + context_dim = [context_dim] self.in_channels = in_channels inner_dim = n_heads * d_head self.norm = Normalize(in_channels) @@ -359,9 +363,10 @@ def __init__( n_heads, d_head, dropout=dropout, - context_dim=context_dim, + context_dim=context_dim[d], use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention, + disable_self_attn = disable_self_attn ) for d in range(depth) ] @@ -375,6 +380,8 @@ def __init__( def forward(self, x, context=None): # note: if no context is given, cross-attention defaults to self-attention + if not isinstance(context, list): + context = [context] b, c, h, w = x.shape x_in = x x = self.norm(x) @@ -384,11 +391,12 @@ def forward(self, x, context=None): x = x.contiguous() # workaround for dynamo ddp bug if self.use_linear: x = self.proj_in(x) - for block in self.transformer_blocks: - x = block(x, context=context) + for i, block in enumerate(self.transformer_blocks): + x = block(x, context=context[i]) if self.use_linear: x = self.proj_out(x) x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w + x = x.contiguous() # workaround for dynamo ddp bu if not self.use_linear: x = self.proj_out(x) return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index aab1fd3dbc50..b0f277b83c02 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -722,6 +722,8 @@ def __init__( self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) else: state_dict = load_state_dict(from_pretrained) + if 'state_dict' in state_dict.keys(): + state_dict = state_dict['state_dict'] self._load_pretrained_model(state_dict) def _input_blocks_mapping(self, input_dict): @@ -861,6 +863,18 @@ def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from missing_keys = list(set(expected_keys) - set(loaded_keys)) unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + if 'input_blocks.1.0.in_layers.2.weight' in loaded_keys and 'input_blocks.1.0.in_layers.1.weight' in expected_keys: + # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + for key_ in missing_keys: + s = key_.split('.') + idx = int(s[-2]) + new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) + state_dict[key_] = state_dict[new_key_] + + loaded_keys = list(state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + def _find_mismatched_keys( state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, ): diff --git a/nemo/collections/multimodal/parts/stable_diffusion/utils.py b/nemo/collections/multimodal/parts/stable_diffusion/utils.py index cc285038bc45..c5d15a5ad5eb 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/utils.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/utils.py @@ -33,12 +33,11 @@ def log_txt_as_img(wh, xc, size=10): for bi in range(b): txt = Image.new("RGB", wh, color="white") draw = ImageDraw.Draw(txt) - font = ImageFont.truetype('data/DejaVuSans.ttf', size=size) nc = int(40 * (wh[0] / 256)) lines = "\n".join(xc[bi][start : start + nc] for start in range(0, len(xc[bi]), nc)) try: - draw.text((0, 0), lines, fill="black", font=font) + draw.text((0, 0), lines, fill="black") except UnicodeEncodeError: print("Cant encode string for logging. Skipping.") diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 686f92c5987b..39116ce0e9ec 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -50,6 +50,9 @@ def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) +def randn_like(x, generator=None): + return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) + def setup_trainer_and_model_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ) -> Tuple[Trainer, Any]: From 9f46a933ba78cb3c288ed8584da491546e403c6b Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Tue, 27 Jun 2023 12:31:08 -0700 Subject: [PATCH 141/512] formatting --- examples/multimodal/convert_ckpt_to_nemo.py | 7 +- .../generative/controlnet/controlnet_infer.py | 83 ++-- .../generative/controlnet/train_controlnet.py | 40 +- .../data/controlnet/controlnet_dataset.py | 6 +- .../models/controlnet/controlnet.py | 287 ++++++----- .../models/controlnet/uniformer/__init__.py | 12 +- .../configs/_base_/datasets/ade20k.py | 16 +- .../configs/_base_/datasets/chase_db1.py | 21 +- .../configs/_base_/datasets/cityscapes.py | 24 +- .../_base_/datasets/cityscapes_769x769.py | 11 +- .../configs/_base_/datasets/drive.py | 21 +- .../uniformer/configs/_base_/datasets/hrf.py | 21 +- .../configs/_base_/datasets/pascal_context.py | 16 +- .../_base_/datasets/pascal_context_59.py | 16 +- .../configs/_base_/datasets/pascal_voc12.py | 16 +- .../_base_/datasets/pascal_voc12_aug.py | 7 +- .../configs/_base_/datasets/stare.py | 21 +- .../configs/_base_/default_runtime.py | 3 +- .../configs/_base_/models/ann_r50-d8.py | 16 +- .../configs/_base_/models/apcnet_r50-d8.py | 14 +- .../configs/_base_/models/ccnet_r50-d8.py | 14 +- .../uniformer/configs/_base_/models/cgnet.py | 33 +- .../configs/_base_/models/danet_r50-d8.py | 14 +- .../configs/_base_/models/deeplabv3_r50-d8.py | 14 +- .../_base_/models/deeplabv3_unet_s5-d16.py | 14 +- .../_base_/models/deeplabv3plus_r50-d8.py | 14 +- .../configs/_base_/models/dmnet_r50-d8.py | 14 +- .../configs/_base_/models/dnl_r50-d8.py | 14 +- .../configs/_base_/models/emanet_r50-d8.py | 14 +- .../configs/_base_/models/encnet_r50-d8.py | 17 +- .../configs/_base_/models/fast_scnn.py | 18 +- .../configs/_base_/models/fcn_hr18.py | 37 +- .../configs/_base_/models/fcn_r50-d8.py | 14 +- .../configs/_base_/models/fcn_unet_s5-d16.py | 14 +- .../configs/_base_/models/fpn_r50.py | 16 +- .../configs/_base_/models/fpn_uniformer.py | 21 +- .../configs/_base_/models/gcnet_r50-d8.py | 18 +- .../configs/_base_/models/lraspp_m-v3-d8.py | 13 +- .../configs/_base_/models/nonlocal_r50-d8.py | 14 +- .../configs/_base_/models/ocrnet_hr18.py | 41 +- .../configs/_base_/models/ocrnet_r50-d8.py | 14 +- .../configs/_base_/models/pointrend_r50.py | 27 +- .../configs/_base_/models/psanet_r50-d8.py | 14 +- .../configs/_base_/models/pspnet_r50-d8.py | 14 +- .../_base_/models/pspnet_unet_s5-d16.py | 14 +- .../configs/_base_/models/upernet_r50.py | 14 +- .../_base_/models/upernet_uniformer.py | 20 +- .../exp/upernet_global_small/config.py | 54 +- .../exp/upernet_global_small/test_config_g.py | 52 +- .../upernet_global_small/test_config_h32.py | 54 +- .../upernet_global_small/test_config_w32.py | 54 +- .../uniformer/mmcv/arraymisc/quantization.py | 18 +- .../controlnet/uniformer/mmcv/cnn/__init__.py | 152 ++++-- .../controlnet/uniformer/mmcv/cnn/alexnet.py | 1 + .../uniformer/mmcv/cnn/bricks/__init__.py | 52 +- .../uniformer/mmcv/cnn/bricks/activation.py | 17 +- .../mmcv/cnn/bricks/context_block.py | 12 +- .../cnn/bricks/conv2d_adaptive_padding.py | 28 +- .../uniformer/mmcv/cnn/bricks/conv_module.py | 44 +- .../uniformer/mmcv/cnn/bricks/conv_ws.py | 67 +-- .../bricks/depthwise_separable_conv_module.py | 41 +- .../uniformer/mmcv/cnn/bricks/drop.py | 9 +- .../mmcv/cnn/bricks/generalized_attention.py | 272 ++++------ .../uniformer/mmcv/cnn/bricks/non_local.py | 100 ++-- .../uniformer/mmcv/cnn/bricks/norm.py | 16 +- .../uniformer/mmcv/cnn/bricks/plugin.py | 3 +- .../uniformer/mmcv/cnn/bricks/transformer.py | 296 +++++------ .../uniformer/mmcv/cnn/bricks/upsample.py | 9 +- .../uniformer/mmcv/cnn/bricks/wrappers.py | 43 +- .../controlnet/uniformer/mmcv/cnn/builder.py | 4 +- .../controlnet/uniformer/mmcv/cnn/resnet.py | 109 ++-- .../uniformer/mmcv/cnn/utils/__init__.py | 53 +- .../uniformer/mmcv/cnn/utils/flops_counter.py | 109 ++-- .../uniformer/mmcv/cnn/utils/fuse_conv_bn.py | 9 +- .../uniformer/mmcv/cnn/utils/sync_bn.py | 6 +- .../uniformer/mmcv/cnn/utils/weight_init.py | 178 +++---- .../controlnet/uniformer/mmcv/cnn/vgg.py | 64 +-- .../uniformer/mmcv/engine/__init__.py | 8 +- .../controlnet/uniformer/mmcv/engine/test.py | 21 +- .../uniformer/mmcv/fileio/__init__.py | 14 +- .../uniformer/mmcv/fileio/file_client.py | 231 ++++----- .../mmcv/fileio/handlers/json_handler.py | 1 - .../mmcv/fileio/handlers/pickle_handler.py | 6 +- .../mmcv/fileio/handlers/yaml_handler.py | 4 +- .../controlnet/uniformer/mmcv/fileio/io.py | 9 +- .../controlnet/uniformer/mmcv/fileio/parse.py | 12 +- .../uniformer/mmcv/image/__init__.py | 122 ++++- .../uniformer/mmcv/image/colorspace.py | 42 +- .../uniformer/mmcv/image/geometric.py | 181 +++---- .../controlnet/uniformer/mmcv/image/io.py | 28 +- .../controlnet/uniformer/mmcv/image/misc.py | 3 +- .../uniformer/mmcv/image/photometric.py | 48 +- .../controlnet/uniformer/mmcv/ops/__init__.py | 129 +++-- .../uniformer/mmcv/ops/assign_score_withk.py | 22 +- .../uniformer/mmcv/ops/ball_query.py | 16 +- .../controlnet/uniformer/mmcv/ops/bbox.py | 7 +- .../uniformer/mmcv/ops/border_align.py | 23 +- .../uniformer/mmcv/ops/box_iou_rotated.py | 3 +- .../controlnet/uniformer/mmcv/ops/carafe.py | 74 ++- .../uniformer/mmcv/ops/cc_attention.py | 8 +- .../uniformer/mmcv/ops/contour_expand.py | 12 +- .../uniformer/mmcv/ops/corner_pool.py | 55 +- .../uniformer/mmcv/ops/correlation.py | 55 +- .../uniformer/mmcv/ops/deform_conv.py | 193 ++++---- .../uniformer/mmcv/ops/deform_roi_pool.py | 119 ++--- .../uniformer/mmcv/ops/deprecated_wrappers.py | 16 +- .../uniformer/mmcv/ops/focal_loss.py | 73 +-- .../mmcv/ops/furthest_point_sample.py | 23 +- .../mmcv/ops/fused_bias_leakyrelu.py | 47 +- .../uniformer/mmcv/ops/gather_points.py | 18 +- .../uniformer/mmcv/ops/group_points.py | 70 +-- .../controlnet/uniformer/mmcv/ops/info.py | 10 +- .../controlnet/uniformer/mmcv/ops/iou3d.py | 13 +- .../controlnet/uniformer/mmcv/ops/knn.py | 14 +- .../uniformer/mmcv/ops/masked_conv.py | 63 +-- .../uniformer/mmcv/ops/merge_cells.py | 67 ++- .../mmcv/ops/modulated_deform_conv.py | 142 +++--- .../mmcv/ops/multi_scale_deform_attn.py | 186 ++++--- .../controlnet/uniformer/mmcv/ops/nms.py | 112 ++--- .../uniformer/mmcv/ops/pixel_group.py | 18 +- .../uniformer/mmcv/ops/point_sample.py | 63 +-- .../uniformer/mmcv/ops/points_in_boxes.py | 78 ++- .../uniformer/mmcv/ops/points_sampler.py | 38 +- .../controlnet/uniformer/mmcv/ops/psa_mask.py | 23 +- .../uniformer/mmcv/ops/roi_align.py | 83 ++-- .../uniformer/mmcv/ops/roi_align_rotated.py | 48 +- .../controlnet/uniformer/mmcv/ops/roi_pool.py | 21 +- .../uniformer/mmcv/ops/roiaware_pool3d.py | 32 +- .../uniformer/mmcv/ops/roipoint_pool3d.py | 26 +- .../controlnet/uniformer/mmcv/ops/saconv.py | 65 +-- .../uniformer/mmcv/ops/scatter_points.py | 33 +- .../controlnet/uniformer/mmcv/ops/sync_bn.py | 111 +++-- .../uniformer/mmcv/ops/three_interpolate.py | 16 +- .../controlnet/uniformer/mmcv/ops/three_nn.py | 3 +- .../uniformer/mmcv/ops/tin_shift.py | 9 +- .../uniformer/mmcv/ops/upfirdn2d.py | 63 +-- .../controlnet/uniformer/mmcv/ops/voxelize.py | 39 +- .../uniformer/mmcv/parallel/__init__.py | 10 +- .../uniformer/mmcv/parallel/_functions.py | 7 +- .../uniformer/mmcv/parallel/collate.py | 37 +- .../uniformer/mmcv/parallel/data_container.py | 12 +- .../uniformer/mmcv/parallel/data_parallel.py | 24 +- .../uniformer/mmcv/parallel/distributed.py | 46 +- .../mmcv/parallel/distributed_deprecated.py | 32 +- .../uniformer/mmcv/runner/__init__.py | 137 +++-- .../uniformer/mmcv/runner/base_module.py | 45 +- .../uniformer/mmcv/runner/base_runner.py | 129 ++--- .../uniformer/mmcv/runner/builder.py | 9 +- .../uniformer/mmcv/runner/checkpoint.py | 113 ++--- .../mmcv/runner/default_constructor.py | 3 +- .../uniformer/mmcv/runner/dist_utils.py | 15 +- .../mmcv/runner/epoch_based_runner.py | 49 +- .../uniformer/mmcv/runner/fp16_utils.py | 68 +-- .../uniformer/mmcv/runner/hooks/__init__.py | 55 +- .../uniformer/mmcv/runner/hooks/checkpoint.py | 73 ++- .../uniformer/mmcv/runner/hooks/closure.py | 1 - .../uniformer/mmcv/runner/hooks/ema.py | 11 +- .../uniformer/mmcv/runner/hooks/evaluation.py | 150 +++--- .../uniformer/mmcv/runner/hooks/hook.py | 16 +- .../uniformer/mmcv/runner/hooks/iter_timer.py | 1 - .../mmcv/runner/hooks/logger/__init__.py | 11 +- .../mmcv/runner/hooks/logger/base.py | 21 +- .../mmcv/runner/hooks/logger/dvclive.py | 15 +- .../mmcv/runner/hooks/logger/mlflow.py | 18 +- .../mmcv/runner/hooks/logger/neptune.py | 19 +- .../mmcv/runner/hooks/logger/pavi.py | 42 +- .../mmcv/runner/hooks/logger/tensorboard.py | 20 +- .../mmcv/runner/hooks/logger/text.py | 93 ++-- .../mmcv/runner/hooks/logger/wandb.py | 28 +- .../uniformer/mmcv/runner/hooks/lr_updater.py | 197 +++----- .../uniformer/mmcv/runner/hooks/memory.py | 1 - .../mmcv/runner/hooks/momentum_updater.py | 230 +++------ .../uniformer/mmcv/runner/hooks/optimizer.py | 135 ++--- .../uniformer/mmcv/runner/hooks/profiler.py | 72 ++- .../mmcv/runner/iter_based_runner.py | 71 +-- .../uniformer/mmcv/runner/log_buffer.py | 1 - .../mmcv/runner/optimizer/__init__.py | 10 +- .../mmcv/runner/optimizer/builder.py | 12 +- .../runner/optimizer/default_constructor.py | 77 ++- .../controlnet/uniformer/mmcv/runner/utils.py | 3 +- .../uniformer/mmcv/utils/__init__.py | 214 ++++++-- .../controlnet/uniformer/mmcv/utils/config.py | 104 ++-- .../controlnet/uniformer/mmcv/utils/env.py | 11 +- .../uniformer/mmcv/utils/ext_loader.py | 3 +- .../uniformer/mmcv/utils/logging.py | 7 +- .../controlnet/uniformer/mmcv/utils/misc.py | 42 +- .../uniformer/mmcv/utils/parrots_jit.py | 10 +- .../uniformer/mmcv/utils/parrots_wrapper.py | 27 +- .../controlnet/uniformer/mmcv/utils/path.py | 8 +- .../uniformer/mmcv/utils/progressbar.py | 50 +- .../uniformer/mmcv/utils/registry.py | 44 +- .../uniformer/mmcv/utils/testing.py | 16 +- .../controlnet/uniformer/mmcv/utils/timer.py | 1 - .../controlnet/uniformer/mmcv/utils/trace.py | 7 +- .../uniformer/mmcv/utils/version_utils.py | 6 +- .../controlnet/uniformer/mmcv/version.py | 1 + .../uniformer/mmcv/video/__init__.py | 28 +- .../controlnet/uniformer/mmcv/video/io.py | 56 +-- .../uniformer/mmcv/video/optflow.py | 68 ++- .../uniformer/mmcv/video/processing.py | 48 +- .../uniformer/mmcv/visualization/__init__.py | 10 +- .../uniformer/mmcv/visualization/color.py | 1 + .../uniformer/mmcv/visualization/image.py | 52 +- .../uniformer/mmcv/visualization/optflow.py | 13 +- .../uniformer/mmcv_custom/__init__.py | 2 +- .../uniformer/mmcv_custom/checkpoint.py | 83 ++-- .../uniformer/mmseg/apis/__init__.py | 11 +- .../uniformer/mmseg/apis/inference.py | 19 +- .../controlnet/uniformer/mmseg/apis/test.py | 49 +- .../controlnet/uniformer/mmseg/apis/train.py | 51 +- .../mmseg/core/evaluation/__init__.py | 10 +- .../mmseg/core/evaluation/class_names.py | 468 +++++++++++++++--- .../mmseg/core/evaluation/eval_hooks.py | 19 +- .../mmseg/core/evaluation/metrics.py | 139 ++---- .../core/seg/sampler/ohem_pixel_sampler.py | 10 +- .../uniformer/mmseg/datasets/__init__.py | 21 +- .../uniformer/mmseg/datasets/ade.py | 372 +++++++++++--- .../uniformer/mmseg/datasets/builder.py | 55 +- .../uniformer/mmseg/datasets/chase_db1.py | 6 +- .../uniformer/mmseg/datasets/cityscapes.py | 92 ++-- .../uniformer/mmseg/datasets/custom.py | 86 ++-- .../uniformer/mmseg/datasets/drive.py | 6 +- .../uniformer/mmseg/datasets/hrf.py | 6 +- .../mmseg/datasets/pascal_context.py | 309 +++++++++--- .../mmseg/datasets/pipelines/__init__.py | 47 +- .../mmseg/datasets/pipelines/formating.py | 38 +- .../mmseg/datasets/pipelines/loading.py | 36 +- .../mmseg/datasets/pipelines/test_time_aug.py | 35 +- .../mmseg/datasets/pipelines/transforms.py | 184 +++---- .../uniformer/mmseg/datasets/stare.py | 6 +- .../uniformer/mmseg/datasets/voc.py | 58 ++- .../uniformer/mmseg/models/__init__.py | 13 +- .../mmseg/models/backbones/__init__.py | 18 +- .../uniformer/mmseg/models/backbones/cgnet.py | 125 +++-- .../mmseg/models/backbones/fast_scnn.py | 220 ++++---- .../uniformer/mmseg/models/backbones/hrnet.py | 221 ++++----- .../mmseg/models/backbones/mobilenet_v2.py | 51 +- .../mmseg/models/backbones/mobilenet_v3.py | 112 +++-- .../mmseg/models/backbones/resnest.py | 107 ++-- .../mmseg/models/backbones/resnet.py | 236 ++++----- .../mmseg/models/backbones/resnext.py | 49 +- .../uniformer/mmseg/models/backbones/unet.py | 236 +++++---- .../mmseg/models/backbones/uniformer.py | 294 ++++++++--- .../uniformer/mmseg/models/backbones/vit.py | 224 ++++----- .../uniformer/mmseg/models/builder.py | 15 +- .../mmseg/models/decode_heads/__init__.py | 25 +- .../mmseg/models/decode_heads/ann_head.py | 84 ++-- .../mmseg/models/decode_heads/apc_head.py | 67 +-- .../mmseg/models/decode_heads/aspp_head.py | 25 +- .../decode_heads/cascade_decode_head.py | 3 +- .../mmseg/models/decode_heads/cc_head.py | 3 +- .../mmseg/models/decode_heads/da_head.py | 40 +- .../mmseg/models/decode_heads/decode_head.py | 62 +-- .../mmseg/models/decode_heads/dm_head.py | 45 +- .../mmseg/models/decode_heads/dnl_head.py | 21 +- .../mmseg/models/decode_heads/ema_head.py | 41 +- .../mmseg/models/decode_heads/enc_head.py | 67 +-- .../mmseg/models/decode_heads/fcn_head.py | 18 +- .../mmseg/models/decode_heads/fpn_head.py | 23 +- .../mmseg/models/decode_heads/gc_head.py | 12 +- .../mmseg/models/decode_heads/lraspp_head.py | 45 +- .../mmseg/models/decode_heads/nl_head.py | 9 +- .../mmseg/models/decode_heads/ocr_head.py | 23 +- .../mmseg/models/decode_heads/point_head.py | 117 ++--- .../mmseg/models/decode_heads/psa_head.py | 92 ++-- .../mmseg/models/decode_heads/psp_head.py | 20 +- .../models/decode_heads/sep_aspp_head.py | 45 +- .../mmseg/models/decode_heads/sep_fcn_head.py | 9 +- .../mmseg/models/decode_heads/uper_head.py | 40 +- .../uniformer/mmseg/models/losses/__init__.py | 17 +- .../uniformer/mmseg/models/losses/accuracy.py | 9 +- .../mmseg/models/losses/cross_entropy_loss.py | 81 +-- .../mmseg/models/losses/dice_loss.py | 43 +- .../mmseg/models/losses/lovasz_loss.py | 117 ++--- .../uniformer/mmseg/models/losses/utils.py | 10 +- .../uniformer/mmseg/models/necks/fpn.py | 54 +- .../mmseg/models/necks/multilevel_neck.py | 33 +- .../uniformer/mmseg/models/segmentors/base.py | 41 +- .../segmentors/cascade_encoder_decoder.py | 41 +- .../models/segmentors/encoder_decoder.py | 55 +- .../uniformer/mmseg/models/utils/__init__.py | 11 +- .../uniformer/mmseg/models/utils/drop.py | 10 +- .../mmseg/models/utils/inverted_residual.py | 118 +++-- .../uniformer/mmseg/models/utils/res_layer.py | 64 +-- .../uniformer/mmseg/models/utils/se_layer.py | 22 +- .../models/utils/self_attention_block.py | 63 +-- .../mmseg/models/utils/up_conv_block.py | 41 +- .../mmseg/models/utils/weight_init.py | 9 +- .../uniformer/mmseg/ops/encoding.py | 29 +- .../uniformer/mmseg/ops/wrappers.py | 25 +- .../uniformer/mmseg/utils/collect_env.py | 3 +- .../multimodal/models/controlnet/util.py | 37 +- .../stable_diffusion/samplers/base_sampler.py | 3 +- .../modules/stable_diffusion/attention.py | 14 +- .../diffusionmodules/openaimodel.py | 5 +- nemo/collections/multimodal/parts/utils.py | 1 + 296 files changed, 7448 insertions(+), 7667 deletions(-) diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index 508682166cec..3ae6cd1ba117 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -32,12 +32,12 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank @@ -161,8 +161,9 @@ def convert(local_rank, rank, world_size, args): elif args.model_type == 'imagen': model = MegatronImagen.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) elif args.model_type == 'controlnet': - model = MegatronControlNet.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, - trainer=trainer) + model = MegatronControlNet.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/multimodal/generative/controlnet/controlnet_infer.py b/examples/multimodal/generative/controlnet/controlnet_infer.py index de0360adecce..156433b59c14 100644 --- a/examples/multimodal/generative/controlnet/controlnet_infer.py +++ b/examples/multimodal/generative/controlnet/controlnet_infer.py @@ -8,20 +8,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import torch -import cv2 import os import time -import einops +import cv2 +import einops +import torch from PIL import Image + from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.controlnet.util import get_preprocessing_function +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference from nemo.core.config import hydra_runner -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler -from nemo.collections.multimodal.models.controlnet.util import get_preprocessing_function def get_control_input(image_path, batch_size, hint_image_size, control_image_preprocess=None): image = cv2.imread(image_path) @@ -35,9 +36,10 @@ def get_control_input(image_path, batch_size, hint_image_size, control_image_pre control = einops.rearrange(control, 'b h w c -> b c h w') return control + def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): c = cond_stage_model.encode(batch_size * [prompt]) - if unconditional_guidance_scale != 1.: + if unconditional_guidance_scale != 1.0: uc = cond_stage_model.encode(batch_size * [""]) else: uc = None @@ -57,14 +59,16 @@ def initialize_sampler(model, sampler_type): def decode_images(model, samples): images = model.decode_first_stage(samples) - images = torch.clamp((images + 1.) / 2., min=0., max=1.) + images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) return images + def torch_to_numpy(images): numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] return numpy_images + def numpy_to_pil(images): """ Convert a numpy image or a batch of images to a PIL image. @@ -106,12 +110,12 @@ def pipeline(model, cfg, rng=None, verbose=True): else: raise ValueError('precision must be in [32, 16, "bf16"]') - with torch.no_grad(), torch.cuda.amp.autocast(enabled=autocast_dtype in (torch.half, torch.bfloat16), - dtype=autocast_dtype, ): + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): in_channels = model.model.diffusion_model.in_channels - sampler = initialize_sampler(model, sampler_type.upper()) output = [] @@ -120,31 +124,29 @@ def pipeline(model, cfg, rng=None, verbose=True): if isinstance(prompts, str): prompts = [prompts] + assert len(prompts) == len(control) - assert (len(prompts) == len(control)) - - - for control, prompt in zip(control,prompts): + for control, prompt in zip(control, prompts): tic = time.perf_counter() tic_total = tic - txt_cond, txt_u_cond = encode_prompt(model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size) - - control = get_control_input(control, batch_size, hint_image_size, control_image_preprocess).to(torch.cuda.current_device(), dtype=autocast_dtype) - + txt_cond, txt_u_cond = encode_prompt( + model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size + ) + control = get_control_input(control, batch_size, hint_image_size, control_image_preprocess).to( + torch.cuda.current_device(), dtype=autocast_dtype + ) cond = {"c_concat": control, "c_crossattn": txt_cond} - u_cond = {"c_concat": None if guess_mode else control,"c_crossattn": txt_u_cond} + u_cond = {"c_concat": None if guess_mode else control, "c_crossattn": txt_u_cond} toc = time.perf_counter() conditioning_time = toc - tic latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] latents = torch.randn( - [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], - generator=rng).to( - torch.cuda.current_device()) - + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng + ).to(torch.cuda.current_device()) tic = time.perf_counter() samples, intermediates = sampler.sample( @@ -156,7 +158,7 @@ def pipeline(model, cfg, rng=None, verbose=True): unconditional_guidance_scale=unconditional_guidance_scale, unconditional_conditioning=u_cond, eta=eta, - x_T=latents + x_T=latents, ) toc = time.perf_counter() sampling_time = toc - tic @@ -170,13 +172,15 @@ def pipeline(model, cfg, rng=None, verbose=True): total_time = toc_total - tic_total output.append(images) - throughput.append({ - 'text-conditioning-time': conditioning_time, - 'sampling-time': sampling_time, - 'decode-time': decode_time, - 'total-time': total_time, - 'sampling-steps': inference_steps, - }) + throughput.append( + { + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + } + ) # Convert output type and save to disk if output_type == 'torch': @@ -189,7 +193,7 @@ def pipeline(model, cfg, rng=None, verbose=True): if save_to_file: os.makedirs(out_path, exist_ok=True) # Saving control map - control_image = control[0].float().cpu().permute(1,2,0).numpy() + control_image = control[0].float().cpu().permute(1, 2, 0).numpy() control_image = Image.fromarray((control_image * 255).round().astype("uint8")) control_image.save(os.path.join(out_path, f'{prompt[:50]}_control.png')) if output_type == 'pil': @@ -209,7 +213,6 @@ def pipeline(model, cfg, rng=None, verbose=True): print(ave_metrics) - @hydra_runner(config_path='conf', config_name='controlnet_infer') def main(cfg): def model_cfg_modifier(model_cfg): @@ -224,19 +227,21 @@ def model_cfg_modifier(model_cfg): torch.backends.cuda.matmul.allow_tf32 = True trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronControlNet, - cfg=cfg, - model_cfg_modifier=model_cfg_modifier + model_provider=MegatronControlNet, cfg=cfg, model_cfg_modifier=model_cfg_modifier ) model = megatron_diffusion_model.model model.cuda().eval() guess_mode = cfg.model.guess_mode - model.contol_scales = [cfg.model.strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([cfg.model.strength] * 13) + model.contol_scales = ( + [cfg.model.strength * (0.825 ** float(12 - i)) for i in range(13)] + if guess_mode + else ([cfg.model.strength] * 13) + ) rng = torch.Generator().manual_seed(cfg.infer.seed) pipeline(model, cfg, rng=rng) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/multimodal/generative/controlnet/train_controlnet.py b/examples/multimodal/generative/controlnet/train_controlnet.py index e12ccdd48467..23d4c3f8f7c1 100644 --- a/examples/multimodal/generative/controlnet/train_controlnet.py +++ b/examples/multimodal/generative/controlnet/train_controlnet.py @@ -1,28 +1,26 @@ -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.core.config import hydra_runner -from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config from datetime import timedelta -from pytorch_lightning.strategies.ddp import DDPStrategy -import torch -import pytorch_lightning as pl -from nemo.utils.exp_manager import StatelessTimer, exp_manager -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - PipelineMixedPrecisionPlugin, -) - +import pytorch_lightning as pl +import torch +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.strategies.ddp import DDPStrategy from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( construct_image_augmentations, identical_transform, ) - +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet from nemo.collections.multimodal.models.controlnet.util import ImageLogger -from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils.exp_manager import StatelessTimer, exp_manager @hydra_runner(config_path='conf', config_name='controlnet_v1-5.yaml') @@ -30,10 +28,9 @@ def main(cfg): megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - plugins=[] + plugins = [] callbacks = [] - # Tune for DDP strategy = NLPDDPStrategy( no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce @@ -59,19 +56,16 @@ def main(cfg): if cfg.get('cluster_type', None) == 'BCP': plugins.append(TorchElasticEnvironment()) - if cfg.model.get('image_logger', None): callbacks.append(ImageLogger(**cfg.model.image_logger)) - trainer = pl.Trainer(**cfg.trainer, - plugins=plugins, - callbacks=callbacks, - strategy=strategy) + trainer = pl.Trainer(**cfg.trainer, plugins=plugins, callbacks=callbacks, strategy=strategy) exp_manager(trainer, cfg.get("exp_manager", None)) model = MegatronControlNet(cfg.model, trainer) trainer.fit(model) + if __name__ == '__main__': main() diff --git a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py index 8b5a349b03de..301be555dad1 100644 --- a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py +++ b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py @@ -21,8 +21,7 @@ def build_train_valid_datasets( - model_cfg, - consumed_samples, + model_cfg, consumed_samples, ): data_cfg = model_cfg.data @@ -65,8 +64,7 @@ def transform_fn(sample): def build_train_valid_precached_datasets( - model_cfg, - consumed_samples, + model_cfg, consumed_samples, ): data_cfg = model_cfg.data diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py index 11b8a0026cc8..e7b2a463638e 100644 --- a/nemo/collections/multimodal/models/controlnet/controlnet.py +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -1,32 +1,37 @@ +from typing import Any, Dict, Optional, Union + +import einops import torch import torch.nn as nn -import einops +from einops import rearrange, repeat from omegaconf import DictConfig, OmegaConf, open_dict -from torch._dynamo import optimize -from torch._inductor import config as inductor_config from pytorch_lightning import Trainer from pytorch_lightning.utilities.distributed import rank_zero_only -from typing import Any, Dict, Optional, Union -from nemo.utils import logging +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torchvision.utils import make_grid +from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets from nemo.collections.multimodal.models.multimodal_base_model import MegatronMultimodalModel -from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import ( + AttentionBlock, + Downsample, + ResBlock, + TimestepEmbedSequential, + UNetModel, +) from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( conv_nd, linear, - zero_module, timestep_embedding, + zero_module, ) -from einops import rearrange, repeat -from torchvision.utils import make_grid -from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample, AttentionBlock -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion from nemo.collections.multimodal.parts.stable_diffusion.utils import exists, log_txt_as_img -from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets - -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler - +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.utils import logging try: from apex import amp @@ -74,7 +79,6 @@ def forward(self, x, timesteps=None, context=None, control=None, only_mid_contro return self.out(h) - class ControlLDM(LatentDiffusion): def __init__(self, cfg): super().__init__(cfg=cfg) @@ -93,7 +97,6 @@ def __init__(self, cfg): if self.channels_last: self.control_model = self.control_model.to(memory_format=torch.channels_last) - @torch.no_grad() def get_input(self, batch, k, bs=None, *args, **kwargs): x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) @@ -112,16 +115,19 @@ def apply_model(self, x_noisy, t, cond, *args, **kwargs): assert isinstance(cond, dict) diffusion_model = self.model.diffusion_model - #cond_txt = torch.cat(cond['c_crossattn'], 1) ## Has removed this first dim in the get_input function, same for below hint input + # cond_txt = torch.cat(cond['c_crossattn'], 1) ## Has removed this first dim in the get_input function, same for below hint input cond_txt = cond['c_crossattn'] - if cond['c_concat'] is None: - eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control) + eps = diffusion_model( + x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control + ) else: control = self.control_model(x=x_noisy, hint=cond['c_concat'], timesteps=t, context=cond_txt) control = [c * scale for c, scale in zip(control, self.control_scales)] - eps = diffusion_model(x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control) + eps = diffusion_model( + x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control + ) return eps @torch.no_grad() @@ -129,11 +135,25 @@ def get_unconditional_conditioning(self, N): return self.get_learned_conditioning([""] * N) @torch.no_grad() - def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta=0.0, return_keys=None, - quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, - plot_diffusion_rows=False, unconditional_guidance_scale=9.0, unconditional_guidance_label=None, - use_ema_scope=True, - **kwargs): + def log_images( + self, + batch, + N=4, + n_row=2, + sample=False, + ddim_steps=50, + ddim_eta=0.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=False, + unconditional_guidance_scale=9.0, + unconditional_guidance_label=None, + use_ema_scope=True, + **kwargs, + ): use_ddim = ddim_steps is not None log = dict() @@ -166,9 +186,13 @@ def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta= if sample: # get denoise row - samples, z_denoise_row = self.sample_log(cond={"c_concat": c_cat, "c_crossattn": c}, - batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta) + samples, z_denoise_row = self.sample_log( + cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + ) x_samples = self.decode_first_stage(samples) log["samples"] = x_samples if plot_denoise_rows: @@ -179,12 +203,15 @@ def log_images(self, batch, N=4, n_row=2, sample=False, ddim_steps=50, ddim_eta= uc_cross = self.get_unconditional_conditioning(N) uc_cat = c_cat # torch.zeros_like(c_cat) uc_full = {"c_concat": uc_cat, "c_crossattn": uc_cross} - samples_cfg, _ = self.sample_log(cond={"c_concat": c_cat, "c_crossattn": c}, - batch_size=N, ddim=use_ddim, - ddim_steps=ddim_steps, eta=ddim_eta, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=uc_full, - ) + samples_cfg, _ = self.sample_log( + cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=uc_full, + ) x_samples_cfg = self.decode_first_stage(samples_cfg) log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg @@ -198,7 +225,6 @@ def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) return samples, intermediates - def parameters(self): params = list(self.control_model.parameters()) if not self.sd_locked: @@ -206,7 +232,6 @@ def parameters(self): params += list(self.model.diffusion_model.out.parameters()) return params - def low_vram_shift(self, is_diffusing): if is_diffusing: self.model = self.model.cuda() @@ -219,47 +244,53 @@ def low_vram_shift(self, is_diffusing): self.first_stage_model = self.first_stage_model.cuda() self.cond_stage_model = self.cond_stage_model.cuda() + class ControlNet(nn.Module): def __init__( - self, - image_size, - in_channels, - model_channels, - hint_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - use_checkpoint=False, - use_fp16=False, - num_heads=-1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - use_spatial_transformer=False, # custom transformer support - transformer_depth=1, # custom transformer support - context_dim=None, # custom transformer support - n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model - legacy=True, - disable_self_attentions=None, ###TODO MMY these are new - num_attention_blocks=None, - disable_middle_self_attn=False, - use_linear_in_transformer=False, - use_flash_attention=False, - from_pretrained_unet=None, - from_NeMo=True + self, + image_size, + in_channels, + model_channels, + hint_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, ###TODO MMY these are new + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + use_flash_attention=False, + from_pretrained_unet=None, + from_NeMo=True, ): super().__init__() if use_spatial_transformer: - assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + assert ( + context_dim is not None + ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' if context_dim is not None: - assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + assert ( + use_spatial_transformer + ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' from omegaconf.listconfig import ListConfig + if type(context_dim) == ListConfig: context_dim = list(context_dim) @@ -280,19 +311,25 @@ def __init__( self.num_res_blocks = len(channel_mult) * [num_res_blocks] else: if len(num_res_blocks) != len(channel_mult): - raise ValueError("provide num_res_blocks either as an int (globally constant) or " - "as a list/tuple (per-level) with the same length as channel_mult") + raise ValueError( + "provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult" + ) self.num_res_blocks = num_res_blocks if disable_self_attentions is not None: # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not assert len(disable_self_attentions) == len(channel_mult) if num_attention_blocks is not None: assert len(num_attention_blocks) == len(self.num_res_blocks) - assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks)))) - print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " - f"This option has LESS priority than attention_resolutions {attention_resolutions}, " - f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " - f"attention will still not be set.") + assert all( + map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks))) + ) + print( + f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " + f"This option has LESS priority than attention_resolutions {attention_resolutions}, " + f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " + f"attention will still not be set." + ) self.attention_resolutions = attention_resolutions self.dropout = dropout @@ -307,17 +344,11 @@ def __init__( time_embed_dim = model_channels * 4 self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), - nn.SiLU(), - linear(time_embed_dim, time_embed_dim), + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), ) self.input_blocks = nn.ModuleList( - [ - TimestepEmbedSequential( - conv_nd(dims, in_channels, model_channels, 3, padding=1) - ) - ] + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] ) self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) @@ -336,7 +367,7 @@ def __init__( nn.SiLU(), conv_nd(dims, 96, 256, 3, padding=1, stride=2), nn.SiLU(), - zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)) + zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)), ) self._feature_size = model_channels @@ -379,10 +410,18 @@ def __init__( num_heads=num_heads, num_head_channels=dim_head, use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( - ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, - disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + disable_self_attn=disabled_sa, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, ) ) self.input_blocks.append(TimestepEmbedSequential(*layers)) @@ -404,9 +443,7 @@ def __init__( down=True, ) if resblock_updown - else Downsample( - ch, conv_resample, dims=dims, out_channels=out_ch - ) + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) ) ) ch = out_ch @@ -438,10 +475,18 @@ def __init__( num_heads=num_heads, num_head_channels=dim_head, use_new_attention_order=use_new_attention_order, - ) if not use_spatial_transformer else SpatialTransformer( # always uses a self-attn - ch, num_heads, dim_head, depth=transformer_depth, context_dim=context_dim, - disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention + ) + if not use_spatial_transformer + else SpatialTransformer( # always uses a self-attn + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, ), ResBlock( ch, @@ -481,29 +526,31 @@ def load_from_unet(self, from_pretrained_unet, from_NeMo=True): expected_keys = list(model_state_dict.keys()) loaded_keys = list(re_state_dict.keys()) - missing_keys = list(set(expected_keys)-set(loaded_keys)) - unexpected_keys = list(set(loaded_keys)-set(expected_keys)) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - - if 'input_blocks.1.0.in_layers.2.weight' in loaded_keys and 'input_blocks.1.0.in_layers.1.weight' in expected_keys: + if ( + 'input_blocks.1.0.in_layers.2.weight' in loaded_keys + and 'input_blocks.1.0.in_layers.1.weight' in expected_keys + ): # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following for key_ in missing_keys: if key_.startswith('input_blocks') or key_.startswith('middle_block.'): s = key_.split('.') idx = int(s[-2]) - new_key_ = ".".join(s[:-2] + [str(int(idx+1))] + [s[-1]]) + new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) re_state_dict[key_] = re_state_dict[new_key_] loaded_keys = list(re_state_dict.keys()) missing_keys = list(set(expected_keys) - set(loaded_keys)) unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - self.load_state_dict(re_state_dict, strict=False) if len(missing_keys) > 42: - print('warning: only input hint blocks and zero conv layers are randomly initialized. This message indicates some unet blocks are not loaded correctly.') + print( + 'warning: only input hint blocks and zero conv layers are randomly initialized. This message indicates some unet blocks are not loaded correctly.' + ) print(f'There is {len(missing_keys)} total missing keys') print("Missing:", missing_keys) print("Unexpected:", unexpected_keys) @@ -519,10 +566,6 @@ def load_from_unet(self, from_pretrained_unet, from_NeMo=True): # else: # print(f"{key} not matching after loading") - - - - def make_zero_conv(self, channels): return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) @@ -591,9 +634,8 @@ def forward(self, x, c, *args, **kwargs): @torch.no_grad() def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: - assert self.cfg.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' - batch[self.cfg.first_stage_key] = \ - batch[self.cfg.first_stage_key].cuda(non_blocking=True) + assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) self.model.on_train_batch_start(batch, batch_idx) def training_step(self, dataloader_iter, batch_idx): @@ -667,8 +709,7 @@ def training_step(self, dataloader_iter, batch_idx): if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) - self.log_dict(loss_dict, prog_bar=False, - logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) @@ -678,7 +719,7 @@ def training_step(self, dataloader_iter, batch_idx): self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), prog_bar=True, rank_zero_only=True, - batch_size = 1 + batch_size=1, ) return loss_mean @@ -720,8 +761,7 @@ def process_batch(batch): # SD has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), - dtype=self.autocast_dtype, + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): x, c = self.model.get_input(batch, self.cfg.first_stage_key) @@ -842,13 +882,11 @@ def build_train_valid_test_datasets(self): if self.cfg.first_stage_key.endswith("encoded"): self._train_ds, self._validation_ds = build_train_valid_precached_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0), + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), ) else: self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0) + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) ) self._test_ds = None @@ -898,8 +936,7 @@ def setup_test_data(self, cfg): f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' ) self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, pin_memory=True, + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: @@ -931,5 +968,3 @@ def parameters(self): return itertools.chain.from_iterable(module.parameters() for module in self.model) else: return self.model.parameters() - - diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py index f904ec642f93..a03ce9a4511d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py @@ -4,10 +4,13 @@ import os -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import ( + inference_segmentor, + init_segmentor, + show_result_pyplot, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core.evaluation import get_palette - checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" @@ -17,8 +20,11 @@ def __init__(self): modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") if not os.path.exists(modelpath): from basicsr.utils.download_util import load_file_from_url + load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) - config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py") + config_file = os.path.join( + os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py" + ) self.model = init_segmentor(config_file, modelpath).cuda() def __call__(self, img): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py index efc8b4bb20c9..868ea7214c35 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'ADE20KDataset' data_root = 'data/ade/ADEChallengeData2016' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,7 +28,8 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] data = dict( samples_per_gpu=4, @@ -39,16 +39,20 @@ data_root=data_root, img_dir='images/training', ann_dir='annotations/training', - pipeline=train_pipeline), + pipeline=train_pipeline, + ), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py index 298594ea925f..4a234cc4de85 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'ChaseDB1Dataset' data_root = 'data/CHASE_DB1' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (960, 999) crop_size = (128, 128) train_pipeline = [ @@ -15,7 +14,7 @@ dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Collect', keys=['img', 'gt_semantic_seg']), ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,8 +28,9 @@ dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Collect', keys=['img']), + ], + ), ] data = dict( @@ -44,16 +44,21 @@ data_root=data_root, img_dir='images/training', ann_dir='annotations/training', - pipeline=train_pipeline)), + pipeline=train_pipeline, + ), + ), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py index f21867c63e18..e44904a99a8d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,7 +28,8 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] data = dict( samples_per_gpu=2, @@ -39,16 +39,12 @@ data_root=data_root, img_dir='leftImg8bit/train', ann_dir='gtFine/train', - pipeline=train_pipeline), + pipeline=train_pipeline, + ), val=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', - pipeline=test_pipeline), + type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline + ), test=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/val', - ann_dir='gtFine/val', - pipeline=test_pipeline)) + type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py index 336c7b254fe3..f4a0def57ae7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py @@ -1,6 +1,5 @@ _base_ = './cityscapes.py' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (769, 769) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -27,9 +26,7 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +data = dict(train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py index 06e8ff606e0d..51849ec17534 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'DRIVEDataset' data_root = 'data/DRIVE' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (584, 565) crop_size = (64, 64) train_pipeline = [ @@ -15,7 +14,7 @@ dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Collect', keys=['img', 'gt_semantic_seg']), ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,8 +28,9 @@ dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Collect', keys=['img']), + ], + ), ] data = dict( @@ -44,16 +44,21 @@ data_root=data_root, img_dir='images/training', ann_dir='annotations/training', - pipeline=train_pipeline)), + pipeline=train_pipeline, + ), + ), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py index 242d790eb1b8..ef920a7e9491 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'HRFDataset' data_root = 'data/HRF' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (2336, 3504) crop_size = (256, 256) train_pipeline = [ @@ -15,7 +14,7 @@ dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Collect', keys=['img', 'gt_semantic_seg']), ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,8 +28,9 @@ dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Collect', keys=['img']), + ], + ), ] data = dict( @@ -44,16 +44,21 @@ data_root=data_root, img_dir='images/training', ann_dir='annotations/training', - pipeline=train_pipeline)), + pipeline=train_pipeline, + ), + ), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py index ff65bad1b86d..9b7a0d335b16 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'PascalContextDataset' data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (520, 520) crop_size = (480, 480) @@ -32,7 +31,8 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] data = dict( samples_per_gpu=4, @@ -43,18 +43,22 @@ img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline), + pipeline=train_pipeline, + ), val=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py index 37585abab898..8e757090c2a2 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'PascalContextDataset59' data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (520, 520) crop_size = (480, 480) @@ -32,7 +31,8 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] data = dict( samples_per_gpu=4, @@ -43,18 +43,22 @@ img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline), + pipeline=train_pipeline, + ), val=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClassContext', split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py index ba1d42d0c578..55d49f3b0156 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'PascalVOCDataset' data_root = 'data/VOCdevkit/VOC2012' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,7 +28,8 @@ dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), - ]) + ], + ), ] data = dict( samples_per_gpu=4, @@ -40,18 +40,22 @@ img_dir='JPEGImages', ann_dir='SegmentationClass', split='ImageSets/Segmentation/train.txt', - pipeline=train_pipeline), + pipeline=train_pipeline, + ), val=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClass', split='ImageSets/Segmentation/val.txt', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='JPEGImages', ann_dir='SegmentationClass', split='ImageSets/Segmentation/val.txt', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py index 3f23b6717d53..5dfc7c2e640a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py @@ -3,7 +3,6 @@ data = dict( train=dict( ann_dir=['SegmentationClass', 'SegmentationClassAug'], - split=[ - 'ImageSets/Segmentation/train.txt', - 'ImageSets/Segmentation/aug.txt' - ])) + split=['ImageSets/Segmentation/train.txt', 'ImageSets/Segmentation/aug.txt'], + ) +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py index 3f71b25488cc..c2e6bbc32e0b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py @@ -1,8 +1,7 @@ # dataset settings dataset_type = 'STAREDataset' data_root = 'data/STARE' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (605, 700) crop_size = (128, 128) train_pipeline = [ @@ -15,7 +14,7 @@ dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']) + dict(type='Collect', keys=['img', 'gt_semantic_seg']), ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -29,8 +28,9 @@ dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) - ]) + dict(type='Collect', keys=['img']), + ], + ), ] data = dict( @@ -44,16 +44,21 @@ data_root=data_root, img_dir='images/training', ann_dir='annotations/training', - pipeline=train_pipeline)), + pipeline=train_pipeline, + ), + ), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', ann_dir='annotations/validation', - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py index b564cc4e7e7d..42ed60a779ae 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py @@ -4,7 +4,8 @@ hooks=[ dict(type='TextLoggerHook', by_epoch=False), # dict(type='TensorboardLoggerHook') - ]) + ], +) # yapf:enable dist_params = dict(backend='nccl') log_level = 'INFO' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py index a2cb653827e4..74d4d7851a59 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py @@ -13,21 +13,22 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='ANNHead', in_channels=[1024, 2048], in_index=[2, 3], channels=512, project_channels=256, - query_scales=(1, ), + query_scales=(1,), key_pool_scales=(1, 3, 6, 8), dropout_ratio=0.1, num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -39,8 +40,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py index c8f5316cbcf3..96ece2073821 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='APCHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=dict(type='SyncBN', requires_grad=True), align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py index 794148f576b9..b949aa80e45e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='CCHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py index eff8d9458c87..19f45463bbb9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py @@ -9,7 +9,8 @@ num_channels=(32, 64, 128), num_blocks=(3, 21), dilations=(2, 4), - reductions=(8, 16)), + reductions=(8, 16), + ), decode_head=dict( type='FCNHead', in_channels=256, @@ -25,11 +26,29 @@ use_sigmoid=False, loss_weight=1.0, class_weight=[ - 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, - 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, - 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, - 10.396974, 10.055647 - ])), + 2.5959933, + 6.7415504, + 3.5354059, + 9.8663225, + 9.690899, + 9.369352, + 10.289121, + 9.953208, + 4.3097677, + 9.490387, + 7.674431, + 9.396905, + 10.347791, + 6.3927646, + 10.226669, + 10.241062, + 10.280587, + 10.396974, + 10.055647, + ], + ), + ), # model training and testing settings train_cfg=dict(sampler=None), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py index 2c934939fac4..758161a914a5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='DAHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py index d7a43bee0142..501b207c0de2 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='ASPPHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py index 0cd262999d8b..4f1a8536caf9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -19,7 +19,8 @@ norm_cfg=norm_cfg, act_cfg=dict(type='ReLU'), upsample_cfg=dict(type='InterpConv'), - norm_eval=False), + norm_eval=False, + ), decode_head=dict( type='ASPPHead', in_channels=64, @@ -30,8 +31,8 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=128, @@ -43,8 +44,9 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170)) + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py index 050e39e091d8..fbf847d8941d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='DepthwiseSeparableASPPHead', in_channels=2048, @@ -26,8 +27,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -39,8 +40,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py index d22ba52640be..42ab79c4ce82 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='DMHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=dict(type='SyncBN', requires_grad=True), align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py index edb4c174c51e..5e6656c49b78 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='DNLHead', in_channels=2048, @@ -26,8 +27,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -39,8 +40,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py index 26adcd430926..ff8a84c1c491 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='EMAHead', in_channels=2048, @@ -27,8 +28,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -40,8 +41,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py index be777123a886..c61fb7d77e35 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='EncHead', in_channels=[512, 1024, 2048], @@ -26,10 +27,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_se_decode=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -41,8 +41,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py index 32fdeb659355..d6a4fb7205dc 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py @@ -14,7 +14,8 @@ fusion_out_channels=128, out_indices=(0, 1, 2), norm_cfg=norm_cfg, - align_corners=False), + align_corners=False, + ), decode_head=dict( type='DepthwiseSeparableFCNHead', in_channels=128, @@ -24,8 +25,8 @@ in_index=-1, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), auxiliary_head=[ dict( type='FCNHead', @@ -37,8 +38,8 @@ norm_cfg=norm_cfg, concat_input=False, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), dict( type='FCNHead', in_channels=64, @@ -49,9 +50,10 @@ norm_cfg=norm_cfg, concat_input=False, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), ], # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py index c3e299bc89ad..0c20335075a9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py @@ -8,30 +8,14 @@ norm_cfg=norm_cfg, norm_eval=False, extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(18, 36)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(18, 36, 72)), + stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), + stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), + stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(18, 36, 72, 144)))), + num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) + ), + ), + ), decode_head=dict( type='FCNHead', in_channels=[18, 36, 72, 144], @@ -45,8 +29,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py index 5e98f6cc918b..43364899324f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='FCNHead', in_channels=2048, @@ -25,8 +26,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -38,8 +39,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py index a33e7972877f..ebfd9879787a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py @@ -19,7 +19,8 @@ norm_cfg=norm_cfg, act_cfg=dict(type='ReLU'), upsample_cfg=dict(type='InterpConv'), - norm_eval=False), + norm_eval=False, + ), decode_head=dict( type='FCNHead', in_channels=64, @@ -31,8 +32,8 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=128, @@ -44,8 +45,9 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170)) + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py index 86ab327db92e..a51398d3a5b0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py @@ -13,12 +13,9 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=4), + contract_dilation=True, + ), + neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), decode_head=dict( type='FPNHead', in_channels=[256, 256, 256, 256], @@ -29,8 +26,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py index 8aae98c59910..f81960a35c2d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py @@ -7,16 +7,13 @@ embed_dim=[64, 128, 320, 512], layers=[3, 4, 8, 3], head_dim=64, - mlp_ratio=4., + mlp_ratio=4.0, qkv_bias=True, - drop_rate=0., - attn_drop_rate=0., - drop_path_rate=0.1), - neck=dict( - type='FPN', - in_channels=[64, 128, 320, 512], - out_channels=256, - num_outs=4), + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + ), + neck=dict(type='FPN', in_channels=[64, 128, 320, 512], out_channels=256, num_outs=4), decode_head=dict( type='FPNHead', in_channels=[256, 256, 256, 256], @@ -27,9 +24,9 @@ num_classes=150, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole') + test_cfg=dict(mode='whole'), ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py index 3d2ad69f5c22..c1d4477e0250 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py @@ -13,21 +13,22 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='GCHead', in_channels=2048, in_index=3, channels=512, - ratio=1 / 4., + ratio=1 / 4.0, pooling_type='att', - fusion_types=('channel_add', ), + fusion_types=('channel_add',), dropout_ratio=0.1, num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -39,8 +40,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py index 93258242a906..2b2fa51f8d01 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py @@ -2,11 +2,7 @@ norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) model = dict( type='EncoderDecoder', - backbone=dict( - type='MobileNetV3', - arch='large', - out_indices=(1, 3, 16), - norm_cfg=norm_cfg), + backbone=dict(type='MobileNetV3', arch='large', out_indices=(1, 3, 16), norm_cfg=norm_cfg), decode_head=dict( type='LRASPPHead', in_channels=(16, 24, 960), @@ -18,8 +14,9 @@ norm_cfg=norm_cfg, act_cfg=dict(type='ReLU'), align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py index 5674a39854ca..7477ac076da2 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='NLHead', in_channels=2048, @@ -26,8 +27,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -39,8 +40,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py index c60f62a7cdf3..282f7d239eb5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py @@ -9,30 +9,14 @@ norm_cfg=norm_cfg, norm_eval=False, extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(18, 36)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(18, 36, 72)), + stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), + stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), + stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(18, 36, 72, 144)))), + num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) + ), + ), + ), decode_head=[ dict( type='FCNHead', @@ -47,8 +31,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), dict( type='OCRHead', in_channels=[18, 36, 72, 144], @@ -60,9 +44,10 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), ], # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py index 615aa3ff7039..a5dcc09b6750 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py @@ -14,7 +14,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=[ dict( type='FCNHead', @@ -27,8 +28,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), dict( type='OCRHead', in_channels=2048, @@ -39,9 +40,10 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), ], # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py index 9d323dbf9466..88ec38a37a5c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py @@ -14,12 +14,9 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=4), + contract_dilation=True, + ), + neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), decode_head=[ dict( type='FPNHead', @@ -31,8 +28,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), dict( type='PointHead', in_channels=[256], @@ -43,14 +40,10 @@ dropout_ratio=-1, num_classes=19, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), ], # model training and testing settings - train_cfg=dict( - num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), - test_cfg=dict( - mode='whole', - subdivision_steps=2, - subdivision_num_points=8196, - scale_factor=2)) + train_cfg=dict(num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict(mode='whole', subdivision_steps=2, subdivision_num_points=8196, scale_factor=2), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py index 689513fa9d2a..07aba72c3f7d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='PSAHead', in_channels=2048, @@ -29,8 +30,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -42,8 +43,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py index f451e08ad2eb..e6c2a5534fc0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='PSPHead', in_channels=2048, @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py index fcff9ec4f41f..7010b76bc4e0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py @@ -19,7 +19,8 @@ norm_cfg=norm_cfg, act_cfg=dict(type='ReLU'), upsample_cfg=dict(type='InterpConv'), - norm_eval=False), + norm_eval=False, + ), decode_head=dict( type='PSPHead', in_channels=64, @@ -30,8 +31,8 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=128, @@ -43,8 +44,9 @@ num_classes=2, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170)) + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py index 10974962fdd7..bef6484ab3ae 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py @@ -13,7 +13,8 @@ norm_cfg=norm_cfg, norm_eval=False, style='pytorch', - contract_dilation=True), + contract_dilation=True, + ), decode_head=dict( type='UPerHead', in_channels=[256, 512, 1024, 2048], @@ -24,8 +25,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=1024, @@ -37,8 +38,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py index 41aa4db809dc..df70f56cf9a0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py @@ -8,11 +8,12 @@ embed_dim=[64, 128, 320, 512], layers=[3, 4, 8, 3], head_dim=64, - mlp_ratio=4., + mlp_ratio=4.0, qkv_bias=True, - drop_rate=0., - attn_drop_rate=0., - drop_path_rate=0.1), + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + ), decode_head=dict( type='UPerHead', in_channels=[64, 128, 320, 512], @@ -23,8 +24,8 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), auxiliary_head=dict( type='FCNHead', in_channels=320, @@ -36,8 +37,9 @@ num_classes=19, norm_cfg=norm_cfg, align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) \ No newline at end of file + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py index 01db96bf9b0b..3d17fe03c602 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py @@ -1,8 +1,8 @@ _base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/models/upernet_uniformer.py', '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py' + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', ] model = dict( backbone=dict( @@ -12,27 +12,37 @@ head_dim=64, drop_path_rate=0.25, windows=False, - hybrid=False + hybrid=False, ), - decode_head=dict( - in_channels=[64, 128, 320, 512], - num_classes=150 - ), - auxiliary_head=dict( - in_channels=320, - num_classes=150 - )) + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) # AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) -data=dict(samples_per_gpu=2) \ No newline at end of file +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py index e43737a98a3b..3d17fe03c602 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py @@ -1,8 +1,8 @@ _base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/models/upernet_uniformer.py', '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py' + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', ] model = dict( backbone=dict( @@ -14,25 +14,35 @@ windows=False, hybrid=False, ), - decode_head=dict( - in_channels=[64, 128, 320, 512], - num_classes=150 - ), - auxiliary_head=dict( - in_channels=320, - num_classes=150 - )) + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) # AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) -data=dict(samples_per_gpu=2) \ No newline at end of file +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py index a31e3874f76f..4a5923cb210c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py @@ -1,8 +1,8 @@ _base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/models/upernet_uniformer.py', '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py' + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', ] model = dict( backbone=dict( @@ -13,27 +13,37 @@ drop_path_rate=0.25, windows=False, hybrid=True, - window_size=32 + window_size=32, ), - decode_head=dict( - in_channels=[64, 128, 320, 512], - num_classes=150 - ), - auxiliary_head=dict( - in_channels=320, - num_classes=150 - )) + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) # AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) -data=dict(samples_per_gpu=2) \ No newline at end of file +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py index 3d9e06f029e4..4fde8ab1ebe4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py @@ -1,8 +1,8 @@ _base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/models/upernet_uniformer.py', '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py' + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', ] model = dict( backbone=dict( @@ -13,27 +13,37 @@ drop_path_rate=0.25, windows=True, hybrid=False, - window_size=32 + window_size=32, ), - decode_head=dict( - in_channels=[64, 128, 320, 512], - num_classes=150 - ), - auxiliary_head=dict( - in_channels=320, - num_classes=150 - )) + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) # AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) -data=dict(samples_per_gpu=2) \ No newline at end of file +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py index 8e47a3545780..87ba022c1ced 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py @@ -16,15 +16,12 @@ def quantize(arr, min_val, max_val, levels, dtype=np.int64): tuple: Quantized array. """ if not (isinstance(levels, int) and levels > 1): - raise ValueError( - f'levels must be a positive integer, but got {levels}') + raise ValueError(f'levels must be a positive integer, but got {levels}') if min_val >= max_val: - raise ValueError( - f'min_val ({min_val}) must be smaller than max_val ({max_val})') + raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') arr = np.clip(arr, min_val, max_val) - min_val - quantized_arr = np.minimum( - np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + quantized_arr = np.minimum(np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) return quantized_arr @@ -43,13 +40,10 @@ def dequantize(arr, min_val, max_val, levels, dtype=np.float64): tuple: Dequantized array. """ if not (isinstance(levels, int) and levels > 1): - raise ValueError( - f'levels must be a positive integer, but got {levels}') + raise ValueError(f'levels must be a positive integer, but got {levels}') if min_val >= max_val: - raise ValueError( - f'min_val ({min_val}) must be smaller than max_val ({max_val})') + raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') - dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - - min_val) / levels + min_val + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - min_val) / levels + min_val return dequantized_arr diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py index 7246c897430f..f87bac5fafca 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py @@ -1,41 +1,131 @@ # Copyright (c) OpenMMLab. All rights reserved. from .alexnet import AlexNet + # yapf: disable -from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, - PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, - ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, - ConvTranspose2d, ConvTranspose3d, ConvWS2d, - DepthwiseSeparableConvModule, GeneralizedAttention, - HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, - NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, - build_activation_layer, build_conv_layer, - build_norm_layer, build_padding_layer, build_plugin_layer, - build_upsample_layer, conv_ws_2d, is_norm) +from .bricks import ( + ACTIVATION_LAYERS, + CONV_LAYERS, + NORM_LAYERS, + PADDING_LAYERS, + PLUGIN_LAYERS, + UPSAMPLE_LAYERS, + ContextBlock, + Conv2d, + Conv3d, + ConvAWS2d, + ConvModule, + ConvTranspose2d, + ConvTranspose3d, + ConvWS2d, + DepthwiseSeparableConvModule, + GeneralizedAttention, + HSigmoid, + HSwish, + Linear, + MaxPool2d, + MaxPool3d, + NonLocal1d, + NonLocal2d, + NonLocal3d, + Scale, + Swish, + build_activation_layer, + build_conv_layer, + build_norm_layer, + build_padding_layer, + build_plugin_layer, + build_upsample_layer, + conv_ws_2d, + is_norm, +) from .builder import MODELS, build_model_from_cfg + # yapf: enable from .resnet import ResNet, make_res_layer -from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, - NormalInit, PretrainedInit, TruncNormalInit, UniformInit, - XavierInit, bias_init_with_prob, caffe2_xavier_init, - constant_init, fuse_conv_bn, get_model_complexity_info, - initialize, kaiming_init, normal_init, trunc_normal_init, - uniform_init, xavier_init) +from .utils import ( + INITIALIZERS, + Caffe2XavierInit, + ConstantInit, + KaimingInit, + NormalInit, + PretrainedInit, + TruncNormalInit, + UniformInit, + XavierInit, + bias_init_with_prob, + caffe2_xavier_init, + constant_init, + fuse_conv_bn, + get_model_complexity_info, + initialize, + kaiming_init, + normal_init, + trunc_normal_init, + uniform_init, + xavier_init, +) from .vgg import VGG, make_vgg_layer __all__ = [ - 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', - 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', - 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', - 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', - 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', - 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', - 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', - 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', - 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', - 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', - 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', - 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', - 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', - 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', - 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' + 'AlexNet', + 'VGG', + 'make_vgg_layer', + 'ResNet', + 'make_res_layer', + 'constant_init', + 'xavier_init', + 'normal_init', + 'trunc_normal_init', + 'uniform_init', + 'kaiming_init', + 'caffe2_xavier_init', + 'bias_init_with_prob', + 'ConvModule', + 'build_activation_layer', + 'build_conv_layer', + 'build_norm_layer', + 'build_padding_layer', + 'build_upsample_layer', + 'build_plugin_layer', + 'is_norm', + 'NonLocal1d', + 'NonLocal2d', + 'NonLocal3d', + 'ContextBlock', + 'HSigmoid', + 'Swish', + 'HSwish', + 'GeneralizedAttention', + 'ACTIVATION_LAYERS', + 'CONV_LAYERS', + 'NORM_LAYERS', + 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', + 'PLUGIN_LAYERS', + 'Scale', + 'get_model_complexity_info', + 'conv_ws_2d', + 'ConvAWS2d', + 'ConvWS2d', + 'fuse_conv_bn', + 'DepthwiseSeparableConvModule', + 'Linear', + 'Conv2d', + 'ConvTranspose2d', + 'MaxPool2d', + 'ConvTranspose3d', + 'MaxPool3d', + 'Conv3d', + 'initialize', + 'INITIALIZERS', + 'ConstantInit', + 'XavierInit', + 'NormalInit', + 'TruncNormalInit', + 'UniformInit', + 'KaimingInit', + 'PretrainedInit', + 'Caffe2XavierInit', + 'MODELS', + 'build_model_from_cfg', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py index 89e36b8c7851..e52d852bceaa 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py @@ -44,6 +44,7 @@ def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: # use default initializer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py index 0f33124ed23f..4405eb058c4c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py @@ -14,22 +14,48 @@ from .norm import build_norm_layer, is_norm from .padding import build_padding_layer from .plugin import build_plugin_layer -from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, - PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .registry import ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS from .scale import Scale from .swish import Swish from .upsample import build_upsample_layer -from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, - Linear, MaxPool2d, MaxPool3d) +from .wrappers import Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, Linear, MaxPool2d, MaxPool3d __all__ = [ - 'ConvModule', 'build_activation_layer', 'build_conv_layer', - 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', - 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', - 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', - 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', - 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', - 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', - 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', - 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' + 'ConvModule', + 'build_activation_layer', + 'build_conv_layer', + 'build_norm_layer', + 'build_padding_layer', + 'build_upsample_layer', + 'build_plugin_layer', + 'is_norm', + 'HSigmoid', + 'HSwish', + 'NonLocal1d', + 'NonLocal2d', + 'NonLocal3d', + 'ContextBlock', + 'GeneralizedAttention', + 'ACTIVATION_LAYERS', + 'CONV_LAYERS', + 'NORM_LAYERS', + 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', + 'PLUGIN_LAYERS', + 'Scale', + 'ConvAWS2d', + 'ConvWS2d', + 'conv_ws_2d', + 'DepthwiseSeparableConvModule', + 'Swish', + 'Linear', + 'Conv2dAdaptivePadding', + 'Conv2d', + 'ConvTranspose2d', + 'MaxPool2d', + 'ConvTranspose3d', + 'MaxPool3d', + 'Conv3d', + 'Dropout', + 'DropPath', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py index f59de07583f2..74134627bd60 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py @@ -3,13 +3,15 @@ import torch.nn as nn import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + TORCH_VERSION, + build_from_cfg, + digit_version, +) + from .registry import ACTIVATION_LAYERS -for module in [ - nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, - nn.Sigmoid, nn.Tanh -]: +for module in [nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh]: ACTIVATION_LAYERS.register_module(module=module) @@ -28,7 +30,7 @@ class Clamp(nn.Module): Default to 1. """ - def __init__(self, min=-1., max=1.): + def __init__(self, min=-1.0, max=1.0): super(Clamp, self).__init__() self.min = min self.max = max @@ -71,8 +73,7 @@ def forward(self, input): return F.gelu(input) -if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.4')): +if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4'): ACTIVATION_LAYERS.register_module(module=GELU) else: ACTIVATION_LAYERS.register_module(module=nn.GELU) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py index d60fdb904c74..fd2a4b80ea18 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py @@ -33,11 +33,7 @@ class ContextBlock(nn.Module): _abbr_ = 'context_block' - def __init__(self, - in_channels, - ratio, - pooling_type='att', - fusion_types=('channel_add', )): + def __init__(self, in_channels, ratio, pooling_type='att', fusion_types=('channel_add',)): super(ContextBlock, self).__init__() assert pooling_type in ['avg', 'att'] assert isinstance(fusion_types, (list, tuple)) @@ -59,7 +55,8 @@ def __init__(self, nn.Conv2d(self.in_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + nn.Conv2d(self.planes, self.in_channels, kernel_size=1), + ) else: self.channel_add_conv = None if 'channel_mul' in fusion_types: @@ -67,7 +64,8 @@ def __init__(self, nn.Conv2d(self.in_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + nn.Conv2d(self.planes, self.in_channels, kernel_size=1), + ) else: self.channel_mul_conv = None self.reset_parameters() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py index b45e758ac6cf..39f9c01dd794 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -30,17 +30,8 @@ class Conv2dAdaptivePadding(nn.Conv2d): output. Default: ``True`` """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True): - super().__init__(in_channels, out_channels, kernel_size, stride, 0, - dilation, groups, bias) + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) def forward(self, x): img_h, img_w = x.size()[-2:] @@ -48,15 +39,8 @@ def forward(self, x): stride_h, stride_w = self.stride output_h = math.ceil(img_h / stride_h) output_w = math.ceil(img_w / stride_w) - pad_h = ( - max((output_h - 1) * self.stride[0] + - (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) - pad_w = ( - max((output_w - 1) * self.stride[1] + - (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) + pad_h = max((output_h - 1) * self.stride[0] + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0) + pad_w = max((output_w - 1) * self.stride[1] + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0) if pad_h > 0 or pad_w > 0: - x = F.pad(x, [ - pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 - ]) - return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, - self.dilation, self.groups) + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py index a585314151bd..3e9f76b3f9e5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py @@ -67,22 +67,24 @@ class ConvModule(nn.Module): _abbr_ = 'conv_block' - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias='auto', - conv_cfg=None, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - inplace=True, - with_spectral_norm=False, - padding_mode='zeros', - order=('conv', 'norm', 'act')): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act'), + ): super(ConvModule, self).__init__() assert conv_cfg is None or isinstance(conv_cfg, dict) assert norm_cfg is None or isinstance(norm_cfg, dict) @@ -121,7 +123,8 @@ def __init__(self, padding=conv_padding, dilation=dilation, groups=groups, - bias=bias) + bias=bias, + ) # export the attributes of self.conv to a higher level for convenience self.in_channels = self.conv.in_channels self.out_channels = self.conv.out_channels @@ -147,8 +150,7 @@ def __init__(self, self.add_module(self.norm_name, norm) if self.with_bias: if isinstance(norm, (_BatchNorm, _InstanceNorm)): - warnings.warn( - 'Unnecessary conv bias before batch/instance norm') + warnings.warn('Unnecessary conv bias before batch/instance norm') else: self.norm_name = None @@ -156,9 +158,7 @@ def __init__(self, if self.with_activation: act_cfg_ = act_cfg.copy() # nn.Tanh has no 'inplace' argument - if act_cfg_['type'] not in [ - 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' - ]: + if act_cfg_['type'] not in ['Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish']: act_cfg_.setdefault('inplace', inplace) self.activate = build_activation_layer(act_cfg_) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py index a3941e278749..ecd8ed0db777 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py @@ -6,14 +6,7 @@ from .registry import CONV_LAYERS -def conv_ws_2d(input, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - eps=1e-5): +def conv_ws_2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, eps=1e-5): c_in = weight.size(0) weight_flat = weight.view(c_in, -1) mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) @@ -24,17 +17,9 @@ def conv_ws_2d(input, @CONV_LAYERS.register_module('ConvWS') class ConvWS2d(nn.Conv2d): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - eps=1e-5): + def __init__( + self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, eps=1e-5 + ): super(ConvWS2d, self).__init__( in_channels, out_channels, @@ -43,12 +28,12 @@ def __init__(self, padding=padding, dilation=dilation, groups=groups, - bias=bias) + bias=bias, + ) self.eps = eps def forward(self, x): - return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, - self.dilation, self.groups, self.eps) + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.eps) @CONV_LAYERS.register_module(name='ConvAWS') @@ -75,15 +60,7 @@ class ConvAWS2d(nn.Conv2d): output. Default: True """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): super().__init__( in_channels, out_channels, @@ -92,11 +69,10 @@ def __init__(self, padding=padding, dilation=dilation, groups=groups, - bias=bias) - self.register_buffer('weight_gamma', - torch.ones(self.out_channels, 1, 1, 1)) - self.register_buffer('weight_beta', - torch.zeros(self.out_channels, 1, 1, 1)) + bias=bias, + ) + self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) def _get_weight(self, weight): weight_flat = weight.view(weight.size(0), -1) @@ -108,11 +84,11 @@ def _get_weight(self, weight): def forward(self, x): weight = self._get_weight(self.weight) - return F.conv2d(x, weight, self.bias, self.stride, self.padding, - self.dilation, self.groups) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): """Override default load function. AWS overrides the function _load_from_state_dict to recover @@ -125,9 +101,9 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, self.weight_gamma.data.fill_(-1) local_missing_keys = [] - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, local_missing_keys, - unexpected_keys, error_msgs) + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, local_missing_keys, unexpected_keys, error_msgs + ) if self.weight_gamma.data.mean() > 0: for k in local_missing_keys: missing_keys.append(k) @@ -138,10 +114,7 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) self.weight_beta.data.copy_(mean) self.weight_gamma.data.copy_(std) - missing_gamma_beta = [ - k for k in local_missing_keys - if k.endswith('weight_gamma') or k.endswith('weight_beta') - ] + missing_gamma_beta = [k for k in local_missing_keys if k.endswith('weight_gamma') or k.endswith('weight_beta')] for k in missing_gamma_beta: local_missing_keys.remove(k) for k in local_missing_keys: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py index 722d5d8d71f7..6e4b622aed59 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -45,20 +45,22 @@ class DepthwiseSeparableConvModule(nn.Module): ConvModule. See ConvModule for ref. """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - dw_norm_cfg='default', - dw_act_cfg='default', - pw_norm_cfg='default', - pw_act_cfg='default', - **kwargs): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs + ): super(DepthwiseSeparableConvModule, self).__init__() assert 'groups' not in kwargs, 'groups should not be specified' @@ -80,15 +82,12 @@ def __init__(self, groups=in_channels, norm_cfg=dw_norm_cfg, act_cfg=dw_act_cfg, - **kwargs) + **kwargs + ) self.pointwise_conv = ConvModule( - in_channels, - out_channels, - 1, - norm_cfg=pw_norm_cfg, - act_cfg=pw_act_cfg, - **kwargs) + in_channels, out_channels, 1, norm_cfg=pw_norm_cfg, act_cfg=pw_act_cfg, **kwargs + ) def forward(self, x): x = self.depthwise_conv(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py index 4ae8a5a2534f..b7f1af30b38d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py @@ -6,20 +6,19 @@ from .registry import DROPOUT_LAYERS -def drop_path(x, drop_prob=0., training=False): +def drop_path(x, drop_prob=0.0, training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). We follow the implementation https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 """ - if drop_prob == 0. or not training: + if drop_prob == 0.0 or not training: return x keep_prob = 1 - drop_prob # handle tensors with different dimensions, not just 4D tensors. - shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) - random_tensor = keep_prob + torch.rand( - shape, dtype=x.dtype, device=x.device) + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) output = x.div(keep_prob) * random_tensor.floor() return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py index 988d9adf2f28..3886a902c75c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py @@ -44,22 +44,22 @@ class GeneralizedAttention(nn.Module): _abbr_ = 'gen_attention_block' - def __init__(self, - in_channels, - spatial_range=-1, - num_heads=9, - position_embedding_dim=-1, - position_magnitude=1, - kv_stride=2, - q_stride=1, - attention_type='1111'): + def __init__( + self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111', + ): super(GeneralizedAttention, self).__init__() # hard range means local range for non-local operation - self.position_embedding_dim = ( - position_embedding_dim - if position_embedding_dim > 0 else in_channels) + self.position_embedding_dim = position_embedding_dim if position_embedding_dim > 0 else in_channels self.position_magnitude = position_magnitude self.num_heads = num_heads @@ -72,36 +72,24 @@ def __init__(self, out_c = self.qk_embed_dim * num_heads if self.attention_type[0] or self.attention_type[1]: - self.query_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=out_c, - kernel_size=1, - bias=False) + self.query_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) self.query_conv.kaiming_init = True if self.attention_type[0] or self.attention_type[2]: - self.key_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=out_c, - kernel_size=1, - bias=False) + self.key_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) self.key_conv.kaiming_init = True self.v_dim = in_channels // num_heads self.value_conv = nn.Conv2d( - in_channels=in_channels, - out_channels=self.v_dim * num_heads, - kernel_size=1, - bias=False) + in_channels=in_channels, out_channels=self.v_dim * num_heads, kernel_size=1, bias=False + ) self.value_conv.kaiming_init = True if self.attention_type[1] or self.attention_type[3]: - self.appr_geom_fc_x = nn.Linear( - self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) self.appr_geom_fc_x.kaiming_init = True - self.appr_geom_fc_y = nn.Linear( - self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) self.appr_geom_fc_y.kaiming_init = True if self.attention_type[2]: @@ -115,10 +103,8 @@ def __init__(self, self.geom_bias = nn.Parameter(geom_bias_value) self.proj_conv = nn.Conv2d( - in_channels=self.v_dim * num_heads, - out_channels=in_channels, - kernel_size=1, - bias=True) + in_channels=self.v_dim * num_heads, out_channels=in_channels, kernel_size=1, bias=True + ) self.proj_conv.kaiming_init = True self.gamma = nn.Parameter(torch.zeros(1)) @@ -130,50 +116,37 @@ def __init__(self, max_len = 42 max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) - local_constraint_map = np.ones( - (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + local_constraint_map = np.ones((max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) for iy in range(max_len): for ix in range(max_len): local_constraint_map[ - iy, ix, - max((iy - self.spatial_range) // - self.kv_stride, 0):min((iy + self.spatial_range + - 1) // self.kv_stride + - 1, max_len), - max((ix - self.spatial_range) // - self.kv_stride, 0):min((ix + self.spatial_range + - 1) // self.kv_stride + - 1, max_len)] = 0 + iy, + ix, + max((iy - self.spatial_range) // self.kv_stride, 0) : min( + (iy + self.spatial_range + 1) // self.kv_stride + 1, max_len + ), + max((ix - self.spatial_range) // self.kv_stride, 0) : min( + (ix + self.spatial_range + 1) // self.kv_stride + 1, max_len + ), + ] = 0 self.local_constraint_map = nn.Parameter( - torch.from_numpy(local_constraint_map).byte(), - requires_grad=False) + torch.from_numpy(local_constraint_map).byte(), requires_grad=False + ) if self.q_stride > 1: - self.q_downsample = nn.AvgPool2d( - kernel_size=1, stride=self.q_stride) + self.q_downsample = nn.AvgPool2d(kernel_size=1, stride=self.q_stride) else: self.q_downsample = None if self.kv_stride > 1: - self.kv_downsample = nn.AvgPool2d( - kernel_size=1, stride=self.kv_stride) + self.kv_downsample = nn.AvgPool2d(kernel_size=1, stride=self.kv_stride) else: self.kv_downsample = None self.init_weights() - def get_position_embedding(self, - h, - w, - h_kv, - w_kv, - q_stride, - kv_stride, - device, - dtype, - feat_dim, - wave_length=1000): + def get_position_embedding(self, h, w, h_kv, w_kv, q_stride, kv_stride, device, dtype, feat_dim, wave_length=1000): # the default type of Tensor is float32, leading to type mismatch # in fp16 mode. Cast it to support fp16 mode. h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) @@ -182,12 +155,10 @@ def get_position_embedding(self, w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) w_idxs = w_idxs.view((w, 1)) * q_stride - h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to( - device=device, dtype=dtype) + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(device=device, dtype=dtype) h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride - w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to( - device=device, dtype=dtype) + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(device=device, dtype=dtype) w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride # (h, h_kv, 1) @@ -198,18 +169,15 @@ def get_position_embedding(self, w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) w_diff *= self.position_magnitude - feat_range = torch.arange(0, feat_dim / 4).to( - device=device, dtype=dtype) + feat_range = torch.arange(0, feat_dim / 4).to(device=device, dtype=dtype) dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) - dim_mat = dim_mat**((4. / feat_dim) * feat_range) + dim_mat = dim_mat ** ((4.0 / feat_dim) * feat_range) dim_mat = dim_mat.view((1, 1, -1)) - embedding_x = torch.cat( - ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + embedding_x = torch.cat(((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) - embedding_y = torch.cat( - ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + embedding_y = torch.cat(((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) return embedding_x, embedding_y @@ -230,56 +198,55 @@ def forward(self, x_input): _, _, h_kv, w_kv = x_kv.shape if self.attention_type[0] or self.attention_type[1]: - proj_query = self.query_conv(x_q).view( - (n, num_heads, self.qk_embed_dim, h * w)) + proj_query = self.query_conv(x_q).view((n, num_heads, self.qk_embed_dim, h * w)) proj_query = proj_query.permute(0, 1, 3, 2) if self.attention_type[0] or self.attention_type[2]: - proj_key = self.key_conv(x_kv).view( - (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + proj_key = self.key_conv(x_kv).view((n, num_heads, self.qk_embed_dim, h_kv * w_kv)) if self.attention_type[1] or self.attention_type[3]: position_embed_x, position_embed_y = self.get_position_embedding( - h, w, h_kv, w_kv, self.q_stride, self.kv_stride, - x_input.device, x_input.dtype, self.position_embedding_dim) + h, + w, + h_kv, + w_kv, + self.q_stride, + self.kv_stride, + x_input.device, + x_input.dtype, + self.position_embedding_dim, + ) # (n, num_heads, w, w_kv, dim) - position_feat_x = self.appr_geom_fc_x(position_embed_x).\ - view(1, w, w_kv, num_heads, self.qk_embed_dim).\ - permute(0, 3, 1, 2, 4).\ - repeat(n, 1, 1, 1, 1) + position_feat_x = ( + self.appr_geom_fc_x(position_embed_x) + .view(1, w, w_kv, num_heads, self.qk_embed_dim) + .permute(0, 3, 1, 2, 4) + .repeat(n, 1, 1, 1, 1) + ) # (n, num_heads, h, h_kv, dim) - position_feat_y = self.appr_geom_fc_y(position_embed_y).\ - view(1, h, h_kv, num_heads, self.qk_embed_dim).\ - permute(0, 3, 1, 2, 4).\ - repeat(n, 1, 1, 1, 1) + position_feat_y = ( + self.appr_geom_fc_y(position_embed_y) + .view(1, h, h_kv, num_heads, self.qk_embed_dim) + .permute(0, 3, 1, 2, 4) + .repeat(n, 1, 1, 1, 1) + ) position_feat_x /= math.sqrt(2) position_feat_y /= math.sqrt(2) # accelerate for saliency only if (np.sum(self.attention_type) == 1) and self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim).\ - repeat(n, 1, 1, 1) + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) - energy = torch.matmul(appr_bias, proj_key).\ - view(n, num_heads, 1, h_kv * w_kv) + energy = torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, h_kv * w_kv) h = 1 w = 1 else: # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for if not self.attention_type[0]: - energy = torch.zeros( - n, - num_heads, - h, - w, - h_kv, - w_kv, - dtype=x_input.dtype, - device=x_input.device) + energy = torch.zeros(n, num_heads, h, w, h_kv, w_kv, dtype=x_input.dtype, device=x_input.device) # attention_type[0]: appr - appr # attention_type[1]: appr - position @@ -287,74 +254,53 @@ def forward(self, x_input): # attention_type[3]: bias - position if self.attention_type[0] or self.attention_type[2]: if self.attention_type[0] and self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim) - energy = torch.matmul(proj_query + appr_bias, proj_key).\ - view(n, num_heads, h, w, h_kv, w_kv) + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).view(n, num_heads, h, w, h_kv, w_kv) elif self.attention_type[0]: - energy = torch.matmul(proj_query, proj_key).\ - view(n, num_heads, h, w, h_kv, w_kv) + energy = torch.matmul(proj_query, proj_key).view(n, num_heads, h, w, h_kv, w_kv) elif self.attention_type[2]: - appr_bias = self.appr_bias.\ - view(1, num_heads, 1, self.qk_embed_dim).\ - repeat(n, 1, 1, 1) + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) - energy += torch.matmul(appr_bias, proj_key).\ - view(n, num_heads, 1, 1, h_kv, w_kv) + energy += torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, 1, h_kv, w_kv) if self.attention_type[1] or self.attention_type[3]: if self.attention_type[1] and self.attention_type[3]: - geom_bias = self.geom_bias.\ - view(1, num_heads, 1, self.qk_embed_dim) + geom_bias = self.geom_bias.view(1, num_heads, 1, self.qk_embed_dim) - proj_query_reshape = (proj_query + geom_bias).\ - view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = (proj_query + geom_bias).view(n, num_heads, h, w, self.qk_embed_dim) energy_x = torch.matmul( - proj_query_reshape.permute(0, 1, 3, 2, 4), - position_feat_x.permute(0, 1, 2, 4, 3)) - energy_x = energy_x.\ - permute(0, 1, 3, 2, 4).unsqueeze(4) - - energy_y = torch.matmul( - proj_query_reshape, - position_feat_y.permute(0, 1, 2, 4, 3)) + proj_query_reshape.permute(0, 1, 3, 2, 4), position_feat_x.permute(0, 1, 2, 4, 3) + ) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, position_feat_y.permute(0, 1, 2, 4, 3)) energy_y = energy_y.unsqueeze(5) energy += energy_x + energy_y elif self.attention_type[1]: - proj_query_reshape = proj_query.\ - view(n, num_heads, h, w, self.qk_embed_dim) - proj_query_reshape = proj_query_reshape.\ - permute(0, 1, 3, 2, 4) - position_feat_x_reshape = position_feat_x.\ - permute(0, 1, 2, 4, 3) - position_feat_y_reshape = position_feat_y.\ - permute(0, 1, 2, 4, 3) - - energy_x = torch.matmul(proj_query_reshape, - position_feat_x_reshape) + proj_query_reshape = proj_query.view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, position_feat_x_reshape) energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) - energy_y = torch.matmul(proj_query_reshape, - position_feat_y_reshape) + energy_y = torch.matmul(proj_query_reshape, position_feat_y_reshape) energy_y = energy_y.unsqueeze(5) energy += energy_x + energy_y elif self.attention_type[3]: - geom_bias = self.geom_bias.\ - view(1, num_heads, self.qk_embed_dim, 1).\ - repeat(n, 1, 1, 1) + geom_bias = self.geom_bias.view(1, num_heads, self.qk_embed_dim, 1).repeat(n, 1, 1, 1) - position_feat_x_reshape = position_feat_x.\ - view(n, num_heads, w*w_kv, self.qk_embed_dim) + position_feat_x_reshape = position_feat_x.view(n, num_heads, w * w_kv, self.qk_embed_dim) - position_feat_y_reshape = position_feat_y.\ - view(n, num_heads, h * h_kv, self.qk_embed_dim) + position_feat_y_reshape = position_feat_y.view(n, num_heads, h * h_kv, self.qk_embed_dim) energy_x = torch.matmul(position_feat_x_reshape, geom_bias) energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) @@ -367,35 +313,29 @@ def forward(self, x_input): energy = energy.view(n, num_heads, h * w, h_kv * w_kv) if self.spatial_range >= 0: - cur_local_constraint_map = \ - self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ - contiguous().\ - view(1, 1, h*w, h_kv*w_kv) + cur_local_constraint_map = ( + self.local_constraint_map[:h, :w, :h_kv, :w_kv].contiguous().view(1, 1, h * w, h_kv * w_kv) + ) - energy = energy.masked_fill_(cur_local_constraint_map, - float('-inf')) + energy = energy.masked_fill_(cur_local_constraint_map, float('-inf')) attention = F.softmax(energy, 3) proj_value = self.value_conv(x_kv) - proj_value_reshape = proj_value.\ - view((n, num_heads, self.v_dim, h_kv * w_kv)).\ - permute(0, 1, 3, 2) + proj_value_reshape = proj_value.view((n, num_heads, self.v_dim, h_kv * w_kv)).permute(0, 1, 3, 2) - out = torch.matmul(attention, proj_value_reshape).\ - permute(0, 1, 3, 2).\ - contiguous().\ - view(n, self.v_dim * self.num_heads, h, w) + out = ( + torch.matmul(attention, proj_value_reshape) + .permute(0, 1, 3, 2) + .contiguous() + .view(n, self.v_dim * self.num_heads, h, w) + ) out = self.proj_conv(out) # output is downsampled, upsample back to input size if self.q_downsample is not None: - out = F.interpolate( - out, - size=x_input.shape[2:], - mode='bilinear', - align_corners=False) + out = F.interpolate(out, size=x_input.shape[2:], mode='bilinear', align_corners=False) out = self.gamma * out + x_input return out @@ -403,10 +343,4 @@ def forward(self, x_input): def init_weights(self): for m in self.modules(): if hasattr(m, 'kaiming_init') and m.kaiming_init: - kaiming_init( - m, - mode='fan_in', - nonlinearity='leaky_relu', - bias=0, - distribution='uniform', - a=1) + kaiming_init(m, mode='fan_in', nonlinearity='leaky_relu', bias=0, distribution='uniform', a=1) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py index 92d00155ef27..34a3602e2a84 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py @@ -32,14 +32,16 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. """ - def __init__(self, - in_channels, - reduction=2, - use_scale=True, - conv_cfg=None, - norm_cfg=None, - mode='embedded_gaussian', - **kwargs): + def __init__( + self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs, + ): super(_NonLocalNd, self).__init__() self.in_channels = in_channels self.reduction = reduction @@ -47,52 +49,32 @@ def __init__(self, self.inter_channels = max(in_channels // reduction, 1) self.mode = mode - if mode not in [ - 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' - ]: - raise ValueError("Mode should be in 'gaussian', 'concatenation', " - f"'embedded_gaussian' or 'dot_product', but got " - f'{mode} instead.') + if mode not in ['gaussian', 'embedded_gaussian', 'dot_product', 'concatenation']: + raise ValueError( + "Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.' + ) # g, theta, phi are defaulted as `nn.ConvNd`. # Here we use ConvModule for potential usage. - self.g = ConvModule( - self.in_channels, - self.inter_channels, - kernel_size=1, - conv_cfg=conv_cfg, - act_cfg=None) + self.g = ConvModule(self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) self.conv_out = ConvModule( - self.inter_channels, - self.in_channels, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) + self.inter_channels, self.in_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None + ) if self.mode != 'gaussian': self.theta = ConvModule( - self.in_channels, - self.inter_channels, - kernel_size=1, - conv_cfg=conv_cfg, - act_cfg=None) + self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None + ) self.phi = ConvModule( - self.in_channels, - self.inter_channels, - kernel_size=1, - conv_cfg=conv_cfg, - act_cfg=None) + self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None + ) if self.mode == 'concatenation': self.concat_project = ConvModule( - self.inter_channels * 2, - 1, - kernel_size=1, - stride=1, - padding=0, - bias=False, - act_cfg=dict(type='ReLU')) + self.inter_channels * 2, 1, kernel_size=1, stride=1, padding=0, bias=False, act_cfg=dict(type='ReLU') + ) self.init_weights(**kwargs) @@ -128,7 +110,7 @@ def embedded_gaussian(self, theta_x, phi_x): pairwise_weight = torch.matmul(theta_x, phi_x) if self.use_scale: # theta_x.shape[-1] is `self.inter_channels` - pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight /= theta_x.shape[-1] ** 0.5 pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight @@ -203,8 +185,7 @@ def forward(self, x): # NonLocal1d y: [N, C, H] # NonLocal2d y: [N, C, H, W] # NonLocal3d y: [N, C, T, H, W] - y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, - *x.size()[2:]) + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) output = x + self.conv_out(y) @@ -223,13 +204,8 @@ class NonLocal1d(_NonLocalNd): Default: dict(type='Conv1d'). """ - def __init__(self, - in_channels, - sub_sample=False, - conv_cfg=dict(type='Conv1d'), - **kwargs): - super(NonLocal1d, self).__init__( - in_channels, conv_cfg=conv_cfg, **kwargs) + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv1d'), **kwargs): + super(NonLocal1d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample @@ -257,13 +233,8 @@ class NonLocal2d(_NonLocalNd): _abbr_ = 'nonlocal_block' - def __init__(self, - in_channels, - sub_sample=False, - conv_cfg=dict(type='Conv2d'), - **kwargs): - super(NonLocal2d, self).__init__( - in_channels, conv_cfg=conv_cfg, **kwargs) + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv2d'), **kwargs): + super(NonLocal2d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample @@ -288,13 +259,8 @@ class NonLocal3d(_NonLocalNd): Default: dict(type='Conv3d'). """ - def __init__(self, - in_channels, - sub_sample=False, - conv_cfg=dict(type='Conv3d'), - **kwargs): - super(NonLocal3d, self).__init__( - in_channels, conv_cfg=conv_cfg, **kwargs) + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv3d'), **kwargs): + super(NonLocal3d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample if sub_sample: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py index 0502cba9a690..e3f5eaa8af18 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py @@ -4,7 +4,12 @@ import torch.nn as nn from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( + SyncBatchNorm, + _BatchNorm, + _InstanceNorm, +) + from .registry import NORM_LAYERS NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) @@ -43,8 +48,7 @@ def infer_abbr(class_type): str: The inferred abbreviation. """ if not inspect.isclass(class_type): - raise TypeError( - f'class_type must be a type, but got {type(class_type)}') + raise TypeError(f'class_type must be a type, but got {type(class_type)}') if hasattr(class_type, '_abbr_'): return class_type._abbr_ if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN @@ -131,11 +135,11 @@ def is_norm(layer, exclude=None): """ if exclude is not None: if not isinstance(exclude, tuple): - exclude = (exclude, ) + exclude = (exclude,) if not is_tuple_of(exclude, type): raise TypeError( - f'"exclude" must be either None or type or a tuple of types, ' - f'but got {type(exclude)}: {exclude}') + f'"exclude" must be either None or type or a tuple of types, ' f'but got {type(exclude)}: {exclude}' + ) if exclude and isinstance(layer, exclude): return False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py index 07c010d40531..d1e6d6fb326a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py @@ -44,8 +44,7 @@ def camel2snack(word): return word.lower() if not inspect.isclass(class_type): - raise TypeError( - f'class_type must be a type, but got {type(class_type)}') + raise TypeError(f'class_type must be a type, but got {type(class_type)}') if hasattr(class_type, '_abbr_'): return class_type._abbr_ else: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py index 200148e2a588..7661266316c8 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py @@ -6,28 +6,48 @@ import torch.nn as nn from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import ConfigDict, deprecated_api_warning -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import Linear, build_activation_layer, build_norm_layer -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.base_module import BaseModule, ModuleList, Sequential +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + Linear, + build_activation_layer, + build_norm_layer, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.base_module import ( + BaseModule, + ModuleList, + Sequential, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg + from .drop import build_dropout -from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, - TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) +from .registry import ( + ATTENTION, + FEEDFORWARD_NETWORK, + POSITIONAL_ENCODING, + TRANSFORMER_LAYER, + TRANSFORMER_LAYER_SEQUENCE, +) # Avoid BC-breaking of importing MultiScaleDeformableAttention from this file try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import ( # noqa F401 + MultiScaleDeformableAttention, + ) + warnings.warn( ImportWarning( '``MultiScaleDeformableAttention`` has been moved to ' '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 '``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 'to ``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 - )) + ) + ) except ImportError: - warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' - '``mmcv.ops.multi_scale_deform_attn``, ' - 'You should install ``mmcv-full`` if you need this module. ') + warnings.warn( + 'Fail to import ``MultiScaleDeformableAttention`` from ' + '``mmcv.ops.multi_scale_deform_attn``, ' + 'You should install ``mmcv-full`` if you need this module. ' + ) def build_positional_encoding(cfg, default_args=None): @@ -78,21 +98,25 @@ class MultiheadAttention(BaseModule): Default to False. """ - def __init__(self, - embed_dims, - num_heads, - attn_drop=0., - proj_drop=0., - dropout_layer=dict(type='Dropout', drop_prob=0.), - init_cfg=None, - batch_first=False, - **kwargs): + def __init__( + self, + embed_dims, + num_heads, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=dict(type='Dropout', drop_prob=0.0), + init_cfg=None, + batch_first=False, + **kwargs, + ): super(MultiheadAttention, self).__init__(init_cfg) if 'dropout' in kwargs: - warnings.warn('The arguments `dropout` in MultiheadAttention ' - 'has been deprecated, now you can separately ' - 'set `attn_drop`(float), proj_drop(float), ' - 'and `dropout_layer`(dict) ') + warnings.warn( + 'The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ' + ) attn_drop = kwargs['dropout'] dropout_layer['drop_prob'] = kwargs.pop('dropout') @@ -100,25 +124,24 @@ def __init__(self, self.num_heads = num_heads self.batch_first = batch_first - self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, - **kwargs) + self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, **kwargs) self.proj_drop = nn.Dropout(proj_drop) - self.dropout_layer = build_dropout( - dropout_layer) if dropout_layer else nn.Identity() - - @deprecated_api_warning({'residual': 'identity'}, - cls_name='MultiheadAttention') - def forward(self, - query, - key=None, - value=None, - identity=None, - query_pos=None, - key_pos=None, - attn_mask=None, - key_padding_mask=None, - **kwargs): + self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else nn.Identity() + + @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiheadAttention') + def forward( + self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_pos=None, + attn_mask=None, + key_padding_mask=None, + **kwargs, + ): """Forward function for `MultiheadAttention`. **kwargs allow passing a more general data flow when combining @@ -171,8 +194,7 @@ def forward(self, if query_pos.shape == key.shape: key_pos = query_pos else: - warnings.warn(f'position encoding of key is' - f'missing in {self.__class__.__name__}.') + warnings.warn(f'position encoding of key is' f'missing in {self.__class__.__name__}.') if query_pos is not None: query = query + query_pos if key_pos is not None: @@ -189,12 +211,7 @@ def forward(self, key = key.transpose(0, 1) value = value.transpose(0, 1) - out = self.attn( - query=query, - key=key, - value=value, - attn_mask=attn_mask, - key_padding_mask=key_padding_mask)[0] + out = self.attn(query=query, key=key, value=value, attn_mask=attn_mask, key_padding_mask=key_padding_mask)[0] if self.batch_first: out = out.transpose(0, 1) @@ -225,25 +242,21 @@ class FFN(BaseModule): Default: None. """ - @deprecated_api_warning( - { - 'dropout': 'ffn_drop', - 'add_residual': 'add_identity' - }, - cls_name='FFN') - def __init__(self, - embed_dims=256, - feedforward_channels=1024, - num_fcs=2, - act_cfg=dict(type='ReLU', inplace=True), - ffn_drop=0., - dropout_layer=None, - add_identity=True, - init_cfg=None, - **kwargs): + @deprecated_api_warning({'dropout': 'ffn_drop', 'add_residual': 'add_identity'}, cls_name='FFN') + def __init__( + self, + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True, + init_cfg=None, + **kwargs, + ): super(FFN, self).__init__(init_cfg) - assert num_fcs >= 2, 'num_fcs should be no less ' \ - f'than 2. got {num_fcs}.' + assert num_fcs >= 2, 'num_fcs should be no less ' f'than 2. got {num_fcs}.' self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.num_fcs = num_fcs @@ -253,16 +266,12 @@ def __init__(self, layers = [] in_channels = embed_dims for _ in range(num_fcs - 1): - layers.append( - Sequential( - Linear(in_channels, feedforward_channels), self.activate, - nn.Dropout(ffn_drop))) + layers.append(Sequential(Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(ffn_drop))) in_channels = feedforward_channels layers.append(Linear(feedforward_channels, embed_dims)) layers.append(nn.Dropout(ffn_drop)) self.layers = Sequential(*layers) - self.dropout_layer = build_dropout( - dropout_layer) if dropout_layer else torch.nn.Identity() + self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else torch.nn.Identity() self.add_identity = add_identity @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') @@ -316,55 +325,58 @@ class BaseTransformerLayer(BaseModule): or (n, batch, embed_dim). Default to False. """ - def __init__(self, - attn_cfgs=None, - ffn_cfgs=dict( - type='FFN', - embed_dims=256, - feedforward_channels=1024, - num_fcs=2, - ffn_drop=0., - act_cfg=dict(type='ReLU', inplace=True), - ), - operation_order=None, - norm_cfg=dict(type='LN'), - init_cfg=None, - batch_first=False, - **kwargs): + def __init__( + self, + attn_cfgs=None, + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True), + ), + operation_order=None, + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False, + **kwargs, + ): deprecated_args = dict( - feedforward_channels='feedforward_channels', - ffn_dropout='ffn_drop', - ffn_num_fcs='num_fcs') + feedforward_channels='feedforward_channels', ffn_dropout='ffn_drop', ffn_num_fcs='num_fcs' + ) for ori_name, new_name in deprecated_args.items(): if ori_name in kwargs: warnings.warn( f'The arguments `{ori_name}` in BaseTransformerLayer ' f'has been deprecated, now you should set `{new_name}` ' f'and other FFN related arguments ' - f'to a dict named `ffn_cfgs`. ') + f'to a dict named `ffn_cfgs`. ' + ) ffn_cfgs[new_name] = kwargs[ori_name] super(BaseTransformerLayer, self).__init__(init_cfg) self.batch_first = batch_first - assert set(operation_order) & set( - ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ - set(operation_order), f'The operation_order of' \ - f' {self.__class__.__name__} should ' \ - f'contains all four operation type ' \ + assert set(operation_order) & set(['self_attn', 'norm', 'ffn', 'cross_attn']) == set(operation_order), ( + f'The operation_order of' + f' {self.__class__.__name__} should ' + f'contains all four operation type ' f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" + ) - num_attn = operation_order.count('self_attn') + operation_order.count( - 'cross_attn') + num_attn = operation_order.count('self_attn') + operation_order.count('cross_attn') if isinstance(attn_cfgs, dict): attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] else: - assert num_attn == len(attn_cfgs), f'The length ' \ - f'of attn_cfg {num_attn} is ' \ - f'not consistent with the number of attention' \ + assert num_attn == len(attn_cfgs), ( + f'The length ' + f'of attn_cfg {num_attn} is ' + f'not consistent with the number of attention' f'in operation_order {operation_order}.' + ) self.num_attn = num_attn self.operation_order = operation_order @@ -400,25 +412,25 @@ def __init__(self, ffn_cfgs['embed_dims'] = self.embed_dims else: assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims - self.ffns.append( - build_feedforward_network(ffn_cfgs[ffn_index], - dict(type='FFN'))) + self.ffns.append(build_feedforward_network(ffn_cfgs[ffn_index], dict(type='FFN'))) self.norms = ModuleList() num_norms = operation_order.count('norm') for _ in range(num_norms): self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) - def forward(self, - query, - key=None, - value=None, - query_pos=None, - key_pos=None, - attn_masks=None, - query_key_padding_mask=None, - key_padding_mask=None, - **kwargs): + def forward( + self, + query, + key=None, + value=None, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs, + ): """Forward function for `TransformerDecoderLayer`. **kwargs contains some specific arguments of attentions. @@ -457,16 +469,15 @@ def forward(self, if attn_masks is None: attn_masks = [None for _ in range(self.num_attn)] elif isinstance(attn_masks, torch.Tensor): - attn_masks = [ - copy.deepcopy(attn_masks) for _ in range(self.num_attn) - ] - warnings.warn(f'Use same attn_mask in all attentions in ' - f'{self.__class__.__name__} ') + attn_masks = [copy.deepcopy(attn_masks) for _ in range(self.num_attn)] + warnings.warn(f'Use same attn_mask in all attentions in ' f'{self.__class__.__name__} ') else: - assert len(attn_masks) == self.num_attn, f'The length of ' \ - f'attn_masks {len(attn_masks)} must be equal ' \ - f'to the number of attention in ' \ - f'operation_order {self.num_attn}' + assert len(attn_masks) == self.num_attn, ( + f'The length of ' + f'attn_masks {len(attn_masks)} must be equal ' + f'to the number of attention in ' + f'operation_order {self.num_attn}' + ) for layer in self.operation_order: if layer == 'self_attn': @@ -480,7 +491,8 @@ def forward(self, key_pos=query_pos, attn_mask=attn_masks[attn_index], key_padding_mask=query_key_padding_mask, - **kwargs) + **kwargs, + ) attn_index += 1 identity = query @@ -498,13 +510,13 @@ def forward(self, key_pos=key_pos, attn_mask=attn_masks[attn_index], key_padding_mask=key_padding_mask, - **kwargs) + **kwargs, + ) attn_index += 1 identity = query elif layer == 'ffn': - query = self.ffns[ffn_index]( - query, identity if self.pre_norm else None) + query = self.ffns[ffn_index](query, identity if self.pre_norm else None) ffn_index += 1 return query @@ -533,12 +545,9 @@ class TransformerLayerSequence(BaseModule): def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): super(TransformerLayerSequence, self).__init__(init_cfg) if isinstance(transformerlayers, dict): - transformerlayers = [ - copy.deepcopy(transformerlayers) for _ in range(num_layers) - ] + transformerlayers = [copy.deepcopy(transformerlayers) for _ in range(num_layers)] else: - assert isinstance(transformerlayers, list) and \ - len(transformerlayers) == num_layers + assert isinstance(transformerlayers, list) and len(transformerlayers) == num_layers self.num_layers = num_layers self.layers = ModuleList() for i in range(num_layers): @@ -546,16 +555,18 @@ def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): self.embed_dims = self.layers[0].embed_dims self.pre_norm = self.layers[0].pre_norm - def forward(self, - query, - key, - value, - query_pos=None, - key_pos=None, - attn_masks=None, - query_key_padding_mask=None, - key_padding_mask=None, - **kwargs): + def forward( + self, + query, + key, + value, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs, + ): """Forward function for `TransformerCoder`. Args: @@ -591,5 +602,6 @@ def forward(self, attn_masks=attn_masks, query_key_padding_mask=query_key_padding_mask, key_padding_mask=key_padding_mask, - **kwargs) + **kwargs, + ) return query diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py index a1a353767d0c..f4d0f1fa8291 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py @@ -24,8 +24,7 @@ class PixelShufflePack(nn.Module): channels. """ - def __init__(self, in_channels, out_channels, scale_factor, - upsample_kernel): + def __init__(self, in_channels, out_channels, scale_factor, upsample_kernel): super(PixelShufflePack, self).__init__() self.in_channels = in_channels self.out_channels = out_channels @@ -35,7 +34,8 @@ def __init__(self, in_channels, out_channels, scale_factor, self.in_channels, self.out_channels * scale_factor * scale_factor, self.upsample_kernel, - padding=(self.upsample_kernel - 1) // 2) + padding=(self.upsample_kernel - 1) // 2, + ) self.init_weights() def init_weights(self): @@ -68,8 +68,7 @@ def build_upsample_layer(cfg, *args, **kwargs): if not isinstance(cfg, dict): raise TypeError(f'cfg must be a dict, but got {type(cfg)}') if 'type' not in cfg: - raise KeyError( - f'the cfg dict must contain the key "type", but got {cfg}') + raise KeyError(f'the cfg dict must contain the key "type", but got {cfg}') cfg_ = cfg.copy() layer_type = cfg_.pop('type') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py index 8aebf67bf523..9028d69d4480 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py @@ -26,7 +26,6 @@ def obsolete_torch_version(torch_version, version_threshold): class NewEmptyTensorOp(torch.autograd.Function): - @staticmethod def forward(ctx, x, new_shape): ctx.shape = x.shape @@ -40,12 +39,10 @@ def backward(ctx, grad): @CONV_LAYERS.register_module('Conv', force=True) class Conv2d(nn.Conv2d): - def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, - self.padding, self.stride, self.dilation): + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation): o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) @@ -61,12 +58,10 @@ def forward(self, x): @CONV_LAYERS.register_module('Conv3d', force=True) class Conv3d(nn.Conv3d): - def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, - self.padding, self.stride, self.dilation): + for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation): o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) @@ -84,13 +79,12 @@ def forward(self, x): @CONV_LAYERS.register_module('deconv') @UPSAMPLE_LAYERS.register_module('deconv', force=True) class ConvTranspose2d(nn.ConvTranspose2d): - def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, - self.padding, self.stride, - self.dilation, self.output_padding): + for i, k, p, s, d, op in zip( + x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding + ): out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: @@ -107,13 +101,12 @@ def forward(self, x): @CONV_LAYERS.register_module('deconv3d') @UPSAMPLE_LAYERS.register_module('deconv3d', force=True) class ConvTranspose3d(nn.ConvTranspose3d): - def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, - self.padding, self.stride, - self.dilation, self.output_padding): + for i, k, p, s, d, op in zip( + x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding + ): out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: @@ -127,14 +120,13 @@ def forward(self, x): class MaxPool2d(nn.MaxPool2d): - def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) - for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), - _pair(self.padding), _pair(self.stride), - _pair(self.dilation)): + for i, k, p, s, d in zip( + x.shape[-2:], _pair(self.kernel_size), _pair(self.padding), _pair(self.stride), _pair(self.dilation) + ): o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 o = math.ceil(o) if self.ceil_mode else math.floor(o) out_shape.append(o) @@ -145,15 +137,17 @@ def forward(self, x): class MaxPool3d(nn.MaxPool3d): - def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) - for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size), - _triple(self.padding), - _triple(self.stride), - _triple(self.dilation)): + for i, k, p, s, d in zip( + x.shape[-3:], + _triple(self.kernel_size), + _triple(self.padding), + _triple(self.stride), + _triple(self.dilation), + ): o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 o = math.ceil(o) if self.ceil_mode else math.floor(o) out_shape.append(o) @@ -164,7 +158,6 @@ def forward(self, x): class Linear(torch.nn.Linear): - def forward(self, x): # empty tensor forward of Linear layer is supported in Pytorch 1.6 if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py index 7567316c566b..64e378712149 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py @@ -19,9 +19,7 @@ def build_model_from_cfg(cfg, registry, default_args=None): nn.Module: A built nn module. """ if isinstance(cfg, list): - modules = [ - build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg - ] + modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg] return Sequential(*modules) else: return build_from_cfg(cfg, registry, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py index 1cb3ac057ee2..a432cd00d4c5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py @@ -10,26 +10,14 @@ def conv3x3(in_planes, out_planes, stride=1, dilation=1): """3x3 convolution with padding.""" return nn.Conv2d( - in_planes, - out_planes, - kernel_size=3, - stride=stride, - padding=dilation, - dilation=dilation, - bias=False) + in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False + ) class BasicBlock(nn.Module): expansion = 1 - def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False): + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): super(BasicBlock, self).__init__() assert style in ['pytorch', 'caffe'] self.conv1 = conv3x3(inplanes, planes, stride, dilation) @@ -64,14 +52,7 @@ def forward(self, x): class Bottleneck(nn.Module): expansion = 4 - def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False): + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): """Bottleneck block. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if @@ -85,21 +66,14 @@ def __init__(self, else: conv1_stride = stride conv2_stride = 1 - self.conv1 = nn.Conv2d( - inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) self.conv2 = nn.Conv2d( - planes, - planes, - kernel_size=3, - stride=conv2_stride, - padding=dilation, - dilation=dilation, - bias=False) + planes, planes, kernel_size=3, stride=conv2_stride, padding=dilation, dilation=dilation, bias=False + ) self.bn1 = nn.BatchNorm2d(planes) self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d( - planes, planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -108,7 +82,6 @@ def __init__(self, self.with_cp = with_cp def forward(self, x): - def _inner_forward(x): residual = x @@ -140,40 +113,19 @@ def _inner_forward(x): return out -def make_res_layer(block, - inplanes, - planes, - blocks, - stride=1, - dilation=1, - style='pytorch', - with_cp=False): +def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', with_cp=False): downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2d( - inplanes, - planes * block.expansion, - kernel_size=1, - stride=stride, - bias=False), + nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] - layers.append( - block( - inplanes, - planes, - stride, - dilation, - downsample, - style=style, - with_cp=with_cp)) + layers.append(block(inplanes, planes, stride, dilation, downsample, style=style, with_cp=with_cp)) inplanes = planes * block.expansion for _ in range(1, blocks): - layers.append( - block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + layers.append(block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) return nn.Sequential(*layers) @@ -204,20 +156,22 @@ class ResNet(nn.Module): 34: (BasicBlock, (3, 4, 6, 3)), 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)) + 152: (Bottleneck, (3, 8, 36, 3)), } - def __init__(self, - depth, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 1, 1), - out_indices=(0, 1, 2, 3), - style='pytorch', - frozen_stages=-1, - bn_eval=True, - bn_frozen=False, - with_cp=False): + def __init__( + self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False, + ): super(ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') @@ -235,8 +189,7 @@ def __init__(self, self.with_cp = with_cp self.inplanes = 64 - self.conv1 = nn.Conv2d( - 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @@ -245,7 +198,7 @@ def __init__(self, for i, num_blocks in enumerate(stage_blocks): stride = strides[i] dilation = dilations[i] - planes = 64 * 2**i + planes = 64 * 2 ** i res_layer = make_res_layer( block, self.inplanes, @@ -254,18 +207,20 @@ def __init__(self, stride=stride, dilation=dilation, style=self.style, - with_cp=with_cp) + with_cp=with_cp, + ) self.inplanes = planes * block.expansion layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) - self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) + self.feat_dim = block.expansion * 64 * 2 ** (len(stage_blocks) - 1) def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py index a263e31c1e39..2f1607650cb1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py @@ -2,18 +2,47 @@ from .flops_counter import get_model_complexity_info from .fuse_conv_bn import fuse_conv_bn from .sync_bn import revert_sync_batchnorm -from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, - KaimingInit, NormalInit, PretrainedInit, - TruncNormalInit, UniformInit, XavierInit, - bias_init_with_prob, caffe2_xavier_init, - constant_init, initialize, kaiming_init, normal_init, - trunc_normal_init, uniform_init, xavier_init) +from .weight_init import ( + INITIALIZERS, + Caffe2XavierInit, + ConstantInit, + KaimingInit, + NormalInit, + PretrainedInit, + TruncNormalInit, + UniformInit, + XavierInit, + bias_init_with_prob, + caffe2_xavier_init, + constant_init, + initialize, + kaiming_init, + normal_init, + trunc_normal_init, + uniform_init, + xavier_init, +) __all__ = [ - 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', - 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', - 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', - 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', - 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', - 'Caffe2XavierInit', 'revert_sync_batchnorm' + 'get_model_complexity_info', + 'bias_init_with_prob', + 'caffe2_xavier_init', + 'constant_init', + 'kaiming_init', + 'normal_init', + 'trunc_normal_init', + 'uniform_init', + 'xavier_init', + 'fuse_conv_bn', + 'initialize', + 'INITIALIZERS', + 'ConstantInit', + 'XavierInit', + 'NormalInit', + 'TruncNormalInit', + 'UniformInit', + 'KaimingInit', + 'PretrainedInit', + 'Caffe2XavierInit', + 'revert_sync_batchnorm', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py index 2b69ac3a54d6..afbba392fa97 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py @@ -33,13 +33,9 @@ import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -def get_model_complexity_info(model, - input_shape, - print_per_layer_stat=True, - as_strings=True, - input_constructor=None, - flush=False, - ost=sys.stdout): +def get_model_complexity_info( + model, input_shape, print_per_layer_stat=True, as_strings=True, input_constructor=None, flush=False, ost=sys.stdout +): """Get complexity information of a model. This method can calculate FLOPs and parameter counts of a model with @@ -95,7 +91,8 @@ def get_model_complexity_info(model, batch = torch.ones(()).new_empty( (1, *input_shape), dtype=next(flops_model.parameters()).dtype, - device=next(flops_model.parameters()).device) + device=next(flops_model.parameters()).device, + ) except StopIteration: # Avoid StopIteration for models which have no parameters, # like `nn.Relu()`, `nn.AvgPool2d`, etc. @@ -105,8 +102,7 @@ def get_model_complexity_info(model, flops_count, params_count = flops_model.compute_average_flops_cost() if print_per_layer_stat: - print_model_with_flops( - flops_model, flops_count, params_count, ost=ost, flush=flush) + print_model_with_flops(flops_model, flops_count, params_count, ost=ost, flush=flush) flops_model.stop_flops_count() if as_strings: @@ -139,21 +135,21 @@ def flops_to_string(flops, units='GFLOPs', precision=2): '3e-09 FLOPs' """ if units is None: - if flops // 10**9 > 0: - return str(round(flops / 10.**9, precision)) + ' GFLOPs' - elif flops // 10**6 > 0: - return str(round(flops / 10.**6, precision)) + ' MFLOPs' - elif flops // 10**3 > 0: - return str(round(flops / 10.**3, precision)) + ' KFLOPs' + if flops // 10 ** 9 > 0: + return str(round(flops / 10.0 ** 9, precision)) + ' GFLOPs' + elif flops // 10 ** 6 > 0: + return str(round(flops / 10.0 ** 6, precision)) + ' MFLOPs' + elif flops // 10 ** 3 > 0: + return str(round(flops / 10.0 ** 3, precision)) + ' KFLOPs' else: return str(flops) + ' FLOPs' else: if units == 'GFLOPs': - return str(round(flops / 10.**9, precision)) + ' ' + units + return str(round(flops / 10.0 ** 9, precision)) + ' ' + units elif units == 'MFLOPs': - return str(round(flops / 10.**6, precision)) + ' ' + units + return str(round(flops / 10.0 ** 6, precision)) + ' ' + units elif units == 'KFLOPs': - return str(round(flops / 10.**3, precision)) + ' ' + units + return str(round(flops / 10.0 ** 3, precision)) + ' ' + units else: return str(flops) + ' FLOPs' @@ -180,28 +176,22 @@ def params_to_string(num_params, units=None, precision=2): '3e-09' """ if units is None: - if num_params // 10**6 > 0: - return str(round(num_params / 10**6, precision)) + ' M' - elif num_params // 10**3: - return str(round(num_params / 10**3, precision)) + ' k' + if num_params // 10 ** 6 > 0: + return str(round(num_params / 10 ** 6, precision)) + ' M' + elif num_params // 10 ** 3: + return str(round(num_params / 10 ** 3, precision)) + ' k' else: return str(num_params) else: if units == 'M': - return str(round(num_params / 10.**6, precision)) + ' ' + units + return str(round(num_params / 10.0 ** 6, precision)) + ' ' + units elif units == 'K': - return str(round(num_params / 10.**3, precision)) + ' ' + units + return str(round(num_params / 10.0 ** 3, precision)) + ' ' + units else: return str(num_params) -def print_model_with_flops(model, - total_flops, - total_params, - units='GFLOPs', - precision=3, - ost=sys.stdout, - flush=False): +def print_model_with_flops(model, total_flops, total_params, units='GFLOPs', precision=3, ost=sys.stdout, flush=False): """Print a model with FLOPs for each layer. Args: @@ -273,15 +263,15 @@ def accumulate_flops(self): def flops_repr(self): accumulated_num_params = self.accumulate_params() accumulated_flops_cost = self.accumulate_flops() - return ', '.join([ - params_to_string( - accumulated_num_params, units='M', precision=precision), - '{:.3%} Params'.format(accumulated_num_params / total_params), - flops_to_string( - accumulated_flops_cost, units=units, precision=precision), - '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), - self.original_extra_repr() - ]) + return ', '.join( + [ + params_to_string(accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string(accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr(), + ] + ) def add_extra_repr(m): m.accumulate_flops = accumulate_flops.__get__(m) @@ -320,14 +310,10 @@ def get_model_parameters_number(model): def add_flops_counting_methods(net_main_module): # adding additional methods to the existing module object, # this is done this way so that each function has access to self object - net_main_module.start_flops_count = start_flops_count.__get__( - net_main_module) - net_main_module.stop_flops_count = stop_flops_count.__get__( - net_main_module) - net_main_module.reset_flops_count = reset_flops_count.__get__( - net_main_module) - net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 - net_main_module) + net_main_module.start_flops_count = start_flops_count.__get__(net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__(net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__(net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__(net_main_module) # noqa: E501 net_main_module.reset_flops_count() @@ -367,8 +353,7 @@ def add_flops_counter_hook_function(module): return else: - handle = module.register_forward_hook( - get_modules_mapping()[type(module)]) + handle = module.register_forward_hook(get_modules_mapping()[type(module)]) module.__flops_handle__ = handle @@ -417,8 +402,7 @@ def relu_flops_counter_hook(module, input, output): def linear_flops_counter_hook(module, input, output): input = input[0] - output_last_dim = output.shape[ - -1] # pytorch checks dimensions, so here we don't care much + output_last_dim = output.shape[-1] # pytorch checks dimensions, so here we don't care much module.__flops__ += int(np.prod(input.shape) * output_last_dim) @@ -431,8 +415,7 @@ def norm_flops_counter_hook(module, input, output): input = input[0] batch_flops = np.prod(input.shape) - if (getattr(module, 'affine', False) - or getattr(module, 'elementwise_affine', False)): + if getattr(module, 'affine', False) or getattr(module, 'elementwise_affine', False): batch_flops *= 2 module.__flops__ += int(batch_flops) @@ -450,8 +433,7 @@ def deconv_flops_counter_hook(conv_module, input, output): groups = conv_module.groups filters_per_channel = out_channels // groups - conv_per_position_flops = ( - kernel_height * kernel_width * in_channels * filters_per_channel) + conv_per_position_flops = kernel_height * kernel_width * in_channels * filters_per_channel active_elements_count = batch_size * input_height * input_width overall_conv_flops = conv_per_position_flops * active_elements_count @@ -477,8 +459,7 @@ def conv_flops_counter_hook(conv_module, input, output): groups = conv_module.groups filters_per_channel = out_channels // groups - conv_per_position_flops = int( - np.prod(kernel_dims)) * in_channels * filters_per_channel + conv_per_position_flops = int(np.prod(kernel_dims)) * in_channels * filters_per_channel active_elements_count = batch_size * int(np.prod(output_dims)) @@ -503,8 +484,7 @@ def batch_counter_hook(module, input, output): batch_size = len(input) else: pass - print('Warning! No positional inputs found for a module, ' - 'assuming batch size is 1.') + print('Warning! No positional inputs found for a module, ' 'assuming batch size is 1.') module.__batch_counter__ += batch_size @@ -530,9 +510,10 @@ def remove_batch_counter_hook_function(module): def add_flops_counter_variable_or_reset(module): if is_supported_instance(module): if hasattr(module, '__flops__') or hasattr(module, '__params__'): - print('Warning: variables __flops__ or __params__ are already ' - 'defined for the module' + type(module).__name__ + - ' ptflops can affect your code!') + print( + 'Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + ' ptflops can affect your code!' + ) module.__flops__ = 0 module.__params__ = get_model_parameters_number(module) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py index cb7076f80bf3..33dd13e18826 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py @@ -14,12 +14,10 @@ def _fuse_conv_bn(conv, bn): nn.Module: Fused module. """ conv_w = conv.weight - conv_b = conv.bias if conv.bias is not None else torch.zeros_like( - bn.running_mean) + conv_b = conv.bias if conv.bias is not None else torch.zeros_like(bn.running_mean) factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) - conv.weight = nn.Parameter(conv_w * - factor.reshape([conv.out_channels, 1, 1, 1])) + conv.weight = nn.Parameter(conv_w * factor.reshape([conv.out_channels, 1, 1, 1])) conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) return conv @@ -42,8 +40,7 @@ def fuse_conv_bn(module): last_conv_name = None for name, child in module.named_children(): - if isinstance(child, - (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if isinstance(child, (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): if last_conv is None: # only fuse BN that is after Conv continue fused_conv = _fuse_conv_bn(last_conv, child) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py index b7ed04dc5f85..d88b7e476317 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py @@ -37,9 +37,9 @@ def revert_sync_batchnorm(module): if hasattr(mmcv, 'ops'): module_checklist.append(mmcv.ops.SyncBatchNorm) if isinstance(module, tuple(module_checklist)): - module_output = _BatchNormXd(module.num_features, module.eps, - module.momentum, module.affine, - module.track_running_stats) + module_output = _BatchNormXd( + module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats + ) if module.affine: # no_grad() may not be needed here but # just to be consistent with `convert_sync_batchnorm()` diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py index a5bb6cde3850..aa5047e743cb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py @@ -8,7 +8,12 @@ import torch.nn as nn from torch import Tensor -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg, get_logger, print_log +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + Registry, + build_from_cfg, + get_logger, + print_log, +) INITIALIZERS = Registry('initializer') @@ -23,9 +28,7 @@ def update_init_info(module, init_info): information. init_info (str): The string that describes the initialization. """ - assert hasattr( - module, - '_params_init_info'), f'Can not find `_params_init_info` in {module}' + assert hasattr(module, '_params_init_info'), f'Can not find `_params_init_info` in {module}' for name, param in module.named_parameters(): assert param in module._params_init_info, ( @@ -35,7 +38,8 @@ def update_init_info(module, init_info): f'`{module.__class__.__name__}`. ' f'Please do not add or ' f'replace parameters during executing ' - f'the `init_weights`. ') + f'the `init_weights`. ' + ) # The parameter has been changed during executing the # `init_weights` of module @@ -70,12 +74,9 @@ def normal_init(module, mean=0, std=1, bias=0): nn.init.constant_(module.bias, bias) -def trunc_normal_init(module: nn.Module, - mean: float = 0, - std: float = 1, - a: float = -2, - b: float = 2, - bias: float = 0) -> None: +def trunc_normal_init( + module: nn.Module, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, bias: float = 0 +) -> None: if hasattr(module, 'weight') and module.weight is not None: trunc_normal_(module.weight, mean, std, a, b) # type: ignore if hasattr(module, 'bias') and module.bias is not None: @@ -89,20 +90,13 @@ def uniform_init(module, a=0, b=1, bias=0): nn.init.constant_(module.bias, bias) -def kaiming_init(module, - a=0, - mode='fan_out', - nonlinearity='relu', - bias=0, - distribution='normal'): +def kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'): assert distribution in ['uniform', 'normal'] if hasattr(module, 'weight') and module.weight is not None: if distribution == 'uniform': - nn.init.kaiming_uniform_( - module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + nn.init.kaiming_uniform_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) else: - nn.init.kaiming_normal_( - module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + nn.init.kaiming_normal_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) @@ -110,13 +104,7 @@ def kaiming_init(module, def caffe2_xavier_init(module, bias=0): # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch # Acknowledgment to FAIR's internal code - kaiming_init( - module, - a=1, - mode='fan_in', - nonlinearity='leaky_relu', - bias=bias, - distribution='uniform') + kaiming_init(module, a=1, mode='fan_in', nonlinearity='leaky_relu', bias=bias, distribution='uniform') def bias_init_with_prob(prior_prob): @@ -130,7 +118,6 @@ def _get_bases_name(m): class BaseInit(object): - def __init__(self, *, bias=0, bias_prob=None, layer=None): self.wholemodule = False if not isinstance(bias, (int, float)): @@ -138,13 +125,17 @@ def __init__(self, *, bias=0, bias_prob=None, layer=None): if bias_prob is not None: if not isinstance(bias_prob, float): - raise TypeError(f'bias_prob type must be float, \ - but got {type(bias_prob)}') + raise TypeError( + f'bias_prob type must be float, \ + but got {type(bias_prob)}' + ) if layer is not None: if not isinstance(layer, (str, list)): - raise TypeError(f'layer must be a str or a list of str, \ - but got a {type(layer)}') + raise TypeError( + f'layer must be a str or a list of str, \ + but got a {type(layer)}' + ) else: layer = [] @@ -177,7 +168,6 @@ def __init__(self, val, **kwargs): self.val = val def __call__(self, module): - def init(m): if self.wholemodule: constant_init(m, self.val, self.bias) @@ -220,7 +210,6 @@ def __init__(self, gain=1, distribution='normal', **kwargs): self.distribution = distribution def __call__(self, module): - def init(m): if self.wholemodule: xavier_init(m, self.gain, self.bias, self.distribution) @@ -235,8 +224,7 @@ def init(m): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): - info = f'{self.__class__.__name__}: gain={self.gain}, ' \ - f'distribution={self.distribution}, bias={self.bias}' + info = f'{self.__class__.__name__}: gain={self.gain}, ' f'distribution={self.distribution}, bias={self.bias}' return info @@ -263,7 +251,6 @@ def __init__(self, mean=0, std=1, **kwargs): self.std = std def __call__(self, module): - def init(m): if self.wholemodule: normal_init(m, self.mean, self.std, self.bias) @@ -278,8 +265,7 @@ def init(m): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): - info = f'{self.__class__.__name__}: mean={self.mean},' \ - f' std={self.std}, bias={self.bias}' + info = f'{self.__class__.__name__}: mean={self.mean},' f' std={self.std}, bias={self.bias}' return info @@ -303,12 +289,7 @@ class TruncNormalInit(BaseInit): """ - def __init__(self, - mean: float = 0, - std: float = 1, - a: float = -2, - b: float = 2, - **kwargs) -> None: + def __init__(self, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, **kwargs) -> None: super().__init__(**kwargs) self.mean = mean self.std = std @@ -316,25 +297,24 @@ def __init__(self, self.b = b def __call__(self, module: nn.Module) -> None: - def init(m): if self.wholemodule: - trunc_normal_init(m, self.mean, self.std, self.a, self.b, - self.bias) + trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): - trunc_normal_init(m, self.mean, self.std, self.a, self.b, - self.bias) + trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): - info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ - f' mean={self.mean}, std={self.std}, bias={self.bias}' + info = ( + f'{self.__class__.__name__}: a={self.a}, b={self.b},' + f' mean={self.mean}, std={self.std}, bias={self.bias}' + ) return info @@ -361,7 +341,6 @@ def __init__(self, a=0, b=1, **kwargs): self.b = b def __call__(self, module): - def init(m): if self.wholemodule: uniform_init(m, self.a, self.b, self.bias) @@ -376,8 +355,7 @@ def init(m): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): - info = f'{self.__class__.__name__}: a={self.a},' \ - f' b={self.b}, bias={self.bias}' + info = f'{self.__class__.__name__}: a={self.a},' f' b={self.b}, bias={self.bias}' return info @@ -408,12 +386,7 @@ class KaimingInit(BaseInit): Defaults to None. """ - def __init__(self, - a=0, - mode='fan_out', - nonlinearity='relu', - distribution='normal', - **kwargs): + def __init__(self, a=0, mode='fan_out', nonlinearity='relu', distribution='normal', **kwargs): super().__init__(**kwargs) self.a = a self.mode = mode @@ -421,26 +394,25 @@ def __init__(self, self.distribution = distribution def __call__(self, module): - def init(m): if self.wholemodule: - kaiming_init(m, self.a, self.mode, self.nonlinearity, - self.bias, self.distribution) + kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): - kaiming_init(m, self.a, self.mode, self.nonlinearity, - self.bias, self.distribution) + kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): - info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ - f'nonlinearity={self.nonlinearity}, ' \ - f'distribution ={self.distribution}, bias={self.bias}' + info = ( + f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' + f'nonlinearity={self.nonlinearity}, ' + f'distribution ={self.distribution}, bias={self.bias}' + ) return info @@ -449,12 +421,7 @@ class Caffe2XavierInit(KaimingInit): # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch # Acknowledgment to FAIR's internal code def __init__(self, **kwargs): - super().__init__( - a=1, - mode='fan_in', - nonlinearity='leaky_relu', - distribution='uniform', - **kwargs) + super().__init__(a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform', **kwargs) def __call__(self, module): super().__call__(module) @@ -481,23 +448,19 @@ def __init__(self, checkpoint, prefix=None, map_location=None): self.map_location = map_location def __call__(self, module): - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, - load_state_dict) + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import ( + _load_checkpoint_with_prefix, + load_checkpoint, + load_state_dict, + ) + logger = get_logger('mmcv') if self.prefix is None: print_log(f'load model from: {self.checkpoint}', logger=logger) - load_checkpoint( - module, - self.checkpoint, - map_location=self.map_location, - strict=False, - logger=logger) + load_checkpoint(module, self.checkpoint, map_location=self.map_location, strict=False, logger=logger) else: - print_log( - f'load {self.prefix} in model from: {self.checkpoint}', - logger=logger) - state_dict = _load_checkpoint_with_prefix( - self.prefix, self.checkpoint, map_location=self.map_location) + print_log(f'load {self.prefix} in model from: {self.checkpoint}', logger=logger) + state_dict = _load_checkpoint_with_prefix(self.prefix, self.checkpoint, map_location=self.map_location) load_state_dict(module, state_dict, strict=False, logger=logger) if hasattr(module, '_params_init_info'): @@ -519,8 +482,10 @@ def _initialize(module, cfg, wholemodule=False): def _initialize_override(module, override, cfg): if not isinstance(override, (dict, list)): - raise TypeError(f'override must be a dict or a list of dict, \ - but got {type(override)}') + raise TypeError( + f'override must be a dict or a list of dict, \ + but got {type(override)}' + ) override = [override] if isinstance(override, dict) else override @@ -529,22 +494,19 @@ def _initialize_override(module, override, cfg): cp_override = copy.deepcopy(override_) name = cp_override.pop('name', None) if name is None: - raise ValueError('`override` must contain the key "name",' - f'but got {cp_override}') + raise ValueError('`override` must contain the key "name",' f'but got {cp_override}') # if override only has name key, it means use args in init_cfg if not cp_override: cp_override.update(cfg) # if override has name key and other args except type key, it will # raise error elif 'type' not in cp_override.keys(): - raise ValueError( - f'`override` need "type" key, but got {cp_override}') + raise ValueError(f'`override` need "type" key, but got {cp_override}') if hasattr(module, name): _initialize(getattr(module, name), cp_override, wholemodule=True) else: - raise RuntimeError(f'module did not have attribute {name}, ' - f'but init_cfg is {cp_override}.') + raise RuntimeError(f'module did not have attribute {name}, ' f'but init_cfg is {cp_override}.') def initialize(module, init_cfg): @@ -596,8 +558,10 @@ def initialize(module, init_cfg): checkpoint=url, prefix='backbone.') """ if not isinstance(init_cfg, (dict, list)): - raise TypeError(f'init_cfg must be a dict or a list of dict, \ - but got {type(init_cfg)}') + raise TypeError( + f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}' + ) if isinstance(init_cfg, dict): init_cfg = [init_cfg] @@ -619,21 +583,21 @@ def initialize(module, init_cfg): pass -def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, - b: float) -> Tensor: +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, b: float) -> Tensor: # Method based on # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf # Modified from # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py def norm_cdf(x): # Computes standard normal cumulative distribution function - return (1. + math.erf(x / math.sqrt(2.))) / 2. + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 if (mean < a - 2 * std) or (mean > b + 2 * std): warnings.warn( 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' 'The distribution of values may be incorrect.', - stacklevel=2) + stacklevel=2, + ) with torch.no_grad(): # Values are generated by using a truncated uniform distribution and @@ -651,7 +615,7 @@ def norm_cdf(x): tensor.erfinv_() # Transform to proper mean, std - tensor.mul_(std * math.sqrt(2.)) + tensor.mul_(std * math.sqrt(2.0)) tensor.add_(mean) # Clamp to ensure it's in the proper range @@ -659,11 +623,7 @@ def norm_cdf(x): return tensor -def trunc_normal_(tensor: Tensor, - mean: float = 0., - std: float = 1., - a: float = -2., - b: float = 2.) -> Tensor: +def trunc_normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0) -> Tensor: r"""Fills the input Tensor with values drawn from a truncated normal distribution. The values are effectively drawn from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py index 8778b649561a..c430ff61db6e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py @@ -8,20 +8,10 @@ def conv3x3(in_planes, out_planes, dilation=1): """3x3 convolution with padding.""" - return nn.Conv2d( - in_planes, - out_planes, - kernel_size=3, - padding=dilation, - dilation=dilation) - - -def make_vgg_layer(inplanes, - planes, - num_blocks, - dilation=1, - with_bn=False, - ceil_mode=False): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, padding=dilation, dilation=dilation) + + +def make_vgg_layer(inplanes, planes, num_blocks, dilation=1, with_bn=False, ceil_mode=False): layers = [] for _ in range(num_blocks): layers.append(conv3x3(inplanes, planes, dilation)) @@ -51,25 +41,22 @@ class VGG(nn.Module): bn_frozen (bool): Whether to freeze weight and bias of BN layers. """ - arch_settings = { - 11: (1, 1, 2, 2, 2), - 13: (2, 2, 2, 2, 2), - 16: (2, 2, 3, 3, 3), - 19: (2, 2, 4, 4, 4) - } - - def __init__(self, - depth, - with_bn=False, - num_classes=-1, - num_stages=5, - dilations=(1, 1, 1, 1, 1), - out_indices=(0, 1, 2, 3, 4), - frozen_stages=-1, - bn_eval=True, - bn_frozen=False, - ceil_mode=False, - with_last_pool=True): + arch_settings = {11: (1, 1, 2, 2, 2), 13: (2, 2, 2, 2, 2), 16: (2, 2, 3, 3, 3), 19: (2, 2, 4, 4, 4)} + + def __init__( + self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True, + ): super(VGG, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for vgg') @@ -93,14 +80,10 @@ def __init__(self, num_modules = num_blocks * (2 + with_bn) + 1 end_idx = start_idx + num_modules dilation = dilations[i] - planes = 64 * 2**i if i < 4 else 512 + planes = 64 * 2 ** i if i < 4 else 512 vgg_layer = make_vgg_layer( - self.inplanes, - planes, - num_blocks, - dilation=dilation, - with_bn=with_bn, - ceil_mode=ceil_mode) + self.inplanes, planes, num_blocks, dilation=dilation, with_bn=with_bn, ceil_mode=ceil_mode + ) vgg_layers.extend(vgg_layer) self.inplanes = planes self.range_sub_modules.append([start_idx, end_idx]) @@ -126,6 +109,7 @@ def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py index 3193b7f664e1..91307c41c0a5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py @@ -1,8 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, - single_gpu_test) +from .test import collect_results_cpu, collect_results_gpu, multi_gpu_test, single_gpu_test -__all__ = [ - 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', - 'single_gpu_test' -] +__all__ = ['collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', 'single_gpu_test'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py index 4ba24f8d4a45..93f07f70ea4a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py @@ -111,16 +111,12 @@ def collect_results_cpu(result_part, size, tmpdir=None): if tmpdir is None: MAX_LEN = 512 # 32 is whitespace - dir_tensor = torch.full((MAX_LEN, ), - 32, - dtype=torch.uint8, - device='cuda') + dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') if rank == 0: mmcv.mkdir_or_exist('.dist_test') tmpdir = tempfile.mkdtemp(dir='.dist_test') - tmpdir = torch.tensor( - bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') - dir_tensor[:len(tmpdir)] = tmpdir + tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[: len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: @@ -169,8 +165,7 @@ def collect_results_gpu(result_part, size): """ rank, world_size = get_dist_info() # dump result part to tensor with pickle - part_tensor = torch.tensor( - bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') # gather all result part tensor shape shape_tensor = torch.tensor(part_tensor.shape, device='cuda') shape_list = [shape_tensor.clone() for _ in range(world_size)] @@ -178,17 +173,15 @@ def collect_results_gpu(result_part, size): # padding result part tensor to max length shape_max = torch.tensor(shape_list).max() part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') - part_send[:shape_tensor[0]] = part_tensor - part_recv_list = [ - part_tensor.new_zeros(shape_max) for _ in range(world_size) - ] + part_send[: shape_tensor[0]] = part_tensor + part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] # gather all result part dist.all_gather(part_recv_list, part_send) if rank == 0: part_list = [] for recv, shape in zip(part_recv_list, shape_list): - part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) + part_result = pickle.loads(recv[: shape[0]].cpu().numpy().tobytes()) # When data is severely insufficient, an empty part_result # on a certain gpu could makes the overall outputs empty. if part_result: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py index 2051b85f7e59..48c6ac0c6999 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py @@ -5,7 +5,15 @@ from .parse import dict_from_file, list_from_file __all__ = [ - 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', - 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', - 'list_from_file', 'dict_from_file' + 'BaseStorageBackend', + 'FileClient', + 'load', + 'dump', + 'register_handler', + 'BaseFileHandler', + 'JsonHandler', + 'PickleHandler', + 'YamlHandler', + 'list_from_file', + 'dict_from_file', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py index bafca300c6db..fe088721ca25 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py @@ -63,8 +63,7 @@ def __init__(self, path_mapping=None): except ImportError: raise ImportError('Please install ceph to enable CephBackend.') - warnings.warn( - 'CephBackend will be deprecated, please use PetrelBackend instead') + warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') self._client = ceph.S3Client() assert isinstance(path_mapping, dict) or path_mapping is None self.path_mapping = path_mapping @@ -105,14 +104,11 @@ class PetrelBackend(BaseStorageBackend): >>> client.get(filepath2) # get data from 'cluster-name' cluster """ - def __init__(self, - path_mapping: Optional[dict] = None, - enable_mc: bool = True): + def __init__(self, path_mapping: Optional[dict] = None, enable_mc: bool = True): try: from petrel_client import client except ImportError: - raise ImportError('Please install petrel_client to enable ' - 'PetrelBackend.') + raise ImportError('Please install petrel_client to enable ' 'PetrelBackend.') self._client = client.Client(enable_mc=enable_mc) assert isinstance(path_mapping, dict) or path_mapping is None @@ -161,9 +157,7 @@ def get(self, filepath: Union[str, Path]) -> memoryview: value_buf = memoryview(value) return value_buf - def get_text(self, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> str: + def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: """Read data from a given ``filepath`` with 'r' mode. Args: @@ -187,10 +181,7 @@ def put(self, obj: bytes, filepath: Union[str, Path]) -> None: filepath = self._format_path(filepath) self._client.put(filepath, obj) - def put_text(self, - obj: str, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> None: + def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: """Save data to a given ``filepath``. Args: @@ -209,9 +200,12 @@ def remove(self, filepath: Union[str, Path]) -> None: """ if not has_method(self._client, 'delete'): raise NotImplementedError( - ('Current version of Petrel Python SDK has not supported ' - 'the `delete` method, please use a higher version or dev' - ' branch instead.')) + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.' + ) + ) filepath = self._map_path(filepath) filepath = self._format_path(filepath) @@ -226,12 +220,14 @@ def exists(self, filepath: Union[str, Path]) -> bool: Returns: bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. """ - if not (has_method(self._client, 'contains') - and has_method(self._client, 'isdir')): + if not (has_method(self._client, 'contains') and has_method(self._client, 'isdir')): raise NotImplementedError( - ('Current version of Petrel Python SDK has not supported ' - 'the `contains` and `isdir` methods, please use a higher' - 'version or dev branch instead.')) + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.' + ) + ) filepath = self._map_path(filepath) filepath = self._format_path(filepath) @@ -250,9 +246,12 @@ def isdir(self, filepath: Union[str, Path]) -> bool: """ if not has_method(self._client, 'isdir'): raise NotImplementedError( - ('Current version of Petrel Python SDK has not supported ' - 'the `isdir` method, please use a higher version or dev' - ' branch instead.')) + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.' + ) + ) filepath = self._map_path(filepath) filepath = self._format_path(filepath) @@ -270,16 +269,18 @@ def isfile(self, filepath: Union[str, Path]) -> bool: """ if not has_method(self._client, 'contains'): raise NotImplementedError( - ('Current version of Petrel Python SDK has not supported ' - 'the `contains` method, please use a higher version or ' - 'dev branch instead.')) + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.' + ) + ) filepath = self._map_path(filepath) filepath = self._format_path(filepath) return self._client.contains(filepath) - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Args: @@ -328,12 +329,14 @@ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: finally: os.remove(f.name) - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. @@ -362,15 +365,17 @@ def list_dir_or_file(self, """ if not has_method(self._client, 'list'): raise NotImplementedError( - ('Current version of Petrel Python SDK has not supported ' - 'the `list` method, please use a higher version or dev' - ' branch instead.')) + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.' + ) + ) dir_path = self._map_path(dir_path) dir_path = self._format_path(dir_path) if list_dir and suffix is not None: - raise TypeError( - '`list_dir` should be False when `suffix` is not None') + raise TypeError('`list_dir` should be False when `suffix` is not None') if (suffix is not None) and not isinstance(suffix, (str, tuple)): raise TypeError('`suffix` must be a string or tuple of strings') @@ -382,8 +387,7 @@ def list_dir_or_file(self, root = dir_path - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive): + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): for path in self._client.list(dir_path): # the `self.isdir` is not used here to determine whether path # is a directory, because `self.isdir` relies on @@ -393,21 +397,17 @@ def _list_dir_or_file(dir_path, list_dir, list_file, suffix, if list_dir: # get the relative path and exclude the last # character '/' - rel_dir = next_dir_path[len(root):-1] + rel_dir = next_dir_path[len(root) : -1] yield rel_dir if recursive: - yield from _list_dir_or_file(next_dir_path, list_dir, - list_file, suffix, - recursive) + yield from _list_dir_or_file(next_dir_path, list_dir, list_file, suffix, recursive) else: # a file path absolute_path = self.join_path(dir_path, path) - rel_path = absolute_path[len(root):] - if (suffix is None - or rel_path.endswith(suffix)) and list_file: + rel_path = absolute_path[len(root) :] + if (suffix is None or rel_path.endswith(suffix)) and list_file: yield rel_path - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive) + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) class MemcachedBackend(BaseStorageBackend): @@ -423,23 +423,23 @@ class MemcachedBackend(BaseStorageBackend): def __init__(self, server_list_cfg, client_cfg, sys_path=None): if sys_path is not None: import sys + sys.path.append(sys_path) try: import mc except ImportError: - raise ImportError( - 'Please install memcached to enable MemcachedBackend.') + raise ImportError('Please install memcached to enable MemcachedBackend.') self.server_list_cfg = server_list_cfg self.client_cfg = client_cfg - self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, - self.client_cfg) + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, self.client_cfg) # mc.pyvector servers as a point which points to a memory cache self._mc_buffer = mc.pyvector() def get(self, filepath): filepath = str(filepath) import mc + self._client.Get(filepath, self._mc_buffer) value_buf = mc.ConvertBuffer(self._mc_buffer) return value_buf @@ -466,24 +466,14 @@ class LmdbBackend(BaseStorageBackend): db_path (str): Lmdb database path. """ - def __init__(self, - db_path, - readonly=True, - lock=False, - readahead=False, - **kwargs): + def __init__(self, db_path, readonly=True, lock=False, readahead=False, **kwargs): try: import lmdb except ImportError: raise ImportError('Please install lmdb to enable LmdbBackend.') self.db_path = str(db_path) - self._client = lmdb.open( - self.db_path, - readonly=readonly, - lock=lock, - readahead=readahead, - **kwargs) + self._client = lmdb.open(self.db_path, readonly=readonly, lock=lock, readahead=readahead, **kwargs) def get(self, filepath): """Get values according to the filepath. @@ -518,9 +508,7 @@ def get(self, filepath: Union[str, Path]) -> bytes: value_buf = f.read() return value_buf - def get_text(self, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> str: + def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: """Read data from a given ``filepath`` with 'r' mode. Args: @@ -550,10 +538,7 @@ def put(self, obj: bytes, filepath: Union[str, Path]) -> None: with open(filepath, 'wb') as f: f.write(obj) - def put_text(self, - obj: str, - filepath: Union[str, Path], - encoding: str = 'utf-8') -> None: + def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: """Write data to a given ``filepath`` with 'w' mode. Note: @@ -614,8 +599,7 @@ def isfile(self, filepath: Union[str, Path]) -> bool: """ return osp.isfile(filepath) - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Join one or more filepath components intelligently. The return value @@ -630,17 +614,18 @@ def join_path(self, filepath: Union[str, Path], return osp.join(filepath, *filepaths) @contextmanager - def get_local_path( - self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: """Only for unified API and do nothing.""" yield filepath - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. @@ -667,25 +652,20 @@ def list_dir_or_file(self, root = dir_path - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive): + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): for entry in os.scandir(dir_path): if not entry.name.startswith('.') and entry.is_file(): rel_path = osp.relpath(entry.path, root) - if (suffix is None - or rel_path.endswith(suffix)) and list_file: + if (suffix is None or rel_path.endswith(suffix)) and list_file: yield rel_path elif osp.isdir(entry.path): if list_dir: rel_dir = osp.relpath(entry.path, root) yield rel_dir if recursive: - yield from _list_dir_or_file(entry.path, list_dir, - list_file, suffix, - recursive) + yield from _list_dir_or_file(entry.path, list_dir, list_file, suffix, recursive) - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, - recursive) + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) class HTTPBackend(BaseStorageBackend): @@ -789,12 +769,13 @@ def __new__(cls, backend=None, prefix=None, **kwargs): backend = 'disk' if backend is not None and backend not in cls._backends: raise ValueError( - f'Backend {backend} is not supported. Currently supported ones' - f' are {list(cls._backends.keys())}') + f'Backend {backend} is not supported. Currently supported ones' f' are {list(cls._backends.keys())}' + ) if prefix is not None and prefix not in cls._prefix_to_backends: raise ValueError( f'prefix {prefix} is not supported. Currently supported ones ' - f'are {list(cls._prefix_to_backends.keys())}') + f'are {list(cls._prefix_to_backends.keys())}' + ) # concatenate the arguments to a unique key for determining whether # objects with the same arguments were created @@ -803,9 +784,11 @@ def __new__(cls, backend=None, prefix=None, **kwargs): arg_key += f':{key}:{value}' # if a backend was overridden, it will create a new object - if (arg_key in cls._instances - and backend not in cls._overridden_backends - and prefix not in cls._overridden_prefixes): + if ( + arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes + ): _instance = cls._instances[arg_key] else: # create a new object and put it to _instance @@ -855,9 +838,9 @@ def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: return prefix @classmethod - def infer_client(cls, - file_client_args: Optional[dict] = None, - uri: Optional[Union[str, Path]] = None) -> 'FileClient': + def infer_client( + cls, file_client_args: Optional[dict] = None, uri: Optional[Union[str, Path]] = None + ) -> 'FileClient': """Infer a suitable file client based on the URI and arguments. Args: @@ -885,18 +868,15 @@ def infer_client(cls, @classmethod def _register_backend(cls, name, backend, force=False, prefixes=None): if not isinstance(name, str): - raise TypeError('the backend name should be a string, ' - f'but got {type(name)}') + raise TypeError('the backend name should be a string, ' f'but got {type(name)}') if not inspect.isclass(backend): - raise TypeError( - f'backend should be a class but got {type(backend)}') + raise TypeError(f'backend should be a class but got {type(backend)}') if not issubclass(backend, BaseStorageBackend): - raise TypeError( - f'backend {backend} is not a subclass of BaseStorageBackend') + raise TypeError(f'backend {backend} is not a subclass of BaseStorageBackend') if not force and name in cls._backends: raise KeyError( - f'{name} is already registered as a storage backend, ' - 'add "force=True" if you want to override it') + f'{name} is already registered as a storage backend, ' 'add "force=True" if you want to override it' + ) if name in cls._backends and force: cls._overridden_backends.add(name) @@ -916,7 +896,8 @@ def _register_backend(cls, name, backend, force=False, prefixes=None): else: raise KeyError( f'{prefix} is already registered as a storage backend,' - ' add "force=True" if you want to override it') + ' add "force=True" if you want to override it' + ) @classmethod def register_backend(cls, name, backend=None, force=False, prefixes=None): @@ -962,13 +943,11 @@ def get_text(self, filepath): `New in version 1.3.15.` """ if backend is not None: - cls._register_backend( - name, backend, force=force, prefixes=prefixes) + cls._register_backend(name, backend, force=force, prefixes=prefixes) return def _register(backend_cls): - cls._register_backend( - name, backend_cls, force=force, prefixes=prefixes) + cls._register_backend(name, backend_cls, force=force, prefixes=prefixes) return backend_cls return _register @@ -1076,8 +1055,7 @@ def isfile(self, filepath: Union[str, Path]) -> bool: """ return self.client.isfile(filepath) - def join_path(self, filepath: Union[str, Path], - *filepaths: Union[str, Path]) -> str: + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Join one or more filepath components intelligently. The return value @@ -1120,12 +1098,14 @@ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: with self.client.get_local_path(str(filepath)) as local_path: yield local_path - def list_dir_or_file(self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False) -> Iterator[str]: + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. @@ -1144,5 +1124,4 @@ def list_dir_or_file(self, Yields: Iterable[str]: A relative path to ``dir_path``. """ - yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, - suffix, recursive) + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py index 18d4f15f7413..c95a8b72d202 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py @@ -23,7 +23,6 @@ def set_default(obj): class JsonHandler(BaseFileHandler): - def load_from_fileobj(self, file): return json.load(file) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py index b37c79bed4ef..fffd741130ff 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py @@ -12,8 +12,7 @@ def load_from_fileobj(self, file, **kwargs): return pickle.load(file, **kwargs) def load_from_path(self, filepath, **kwargs): - return super(PickleHandler, self).load_from_path( - filepath, mode='rb', **kwargs) + return super(PickleHandler, self).load_from_path(filepath, mode='rb', **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('protocol', 2) @@ -24,5 +23,4 @@ def dump_to_fileobj(self, obj, file, **kwargs): pickle.dump(obj, file, **kwargs) def dump_to_path(self, obj, filepath, **kwargs): - super(PickleHandler, self).dump_to_path( - obj, filepath, mode='wb', **kwargs) + super(PickleHandler, self).dump_to_path(obj, filepath, mode='wb', **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py index c5aa2eea1e8c..cf89a1efa70c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py @@ -2,7 +2,8 @@ import yaml try: - from yaml import CLoader as Loader, CDumper as Dumper + from yaml import CDumper as Dumper + from yaml import CLoader as Loader except ImportError: from yaml import Loader, Dumper @@ -10,7 +11,6 @@ class YamlHandler(BaseFileHandler): - def load_from_fileobj(self, file, **kwargs): kwargs.setdefault('Loader', Loader) return yaml.load(file, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py index aaefde58aa3e..bcbdc2eb3803 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py @@ -11,7 +11,7 @@ 'yaml': YamlHandler(), 'yml': YamlHandler(), 'pickle': PickleHandler(), - 'pkl': PickleHandler() + 'pkl': PickleHandler(), } @@ -99,8 +99,7 @@ def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): if is_str(file): file_format = file.split('.')[-1] elif file is None: - raise ValueError( - 'file_format must be specified since file is None') + raise ValueError('file_format must be specified since file is None') if file_format not in file_handlers: raise TypeError(f'Unsupported format: {file_format}') @@ -132,8 +131,7 @@ def _register_handler(handler, file_formats): handler. """ if not isinstance(handler, BaseFileHandler): - raise TypeError( - f'handler must be a child of BaseFileHandler, not {type(handler)}') + raise TypeError(f'handler must be a child of BaseFileHandler, not {type(handler)}') if isinstance(file_formats, str): file_formats = [file_formats] if not is_list_of(file_formats, str): @@ -143,7 +141,6 @@ def _register_handler(handler, file_formats): def register_handler(file_formats, **kwargs): - def wrap(cls): _register_handler(cls(**kwargs), file_formats) return cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py index f60f0d611b8d..19c618d9a034 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py @@ -5,12 +5,7 @@ from .file_client import FileClient -def list_from_file(filename, - prefix='', - offset=0, - max_num=0, - encoding='utf-8', - file_client_args=None): +def list_from_file(filename, prefix='', offset=0, max_num=0, encoding='utf-8', file_client_args=None): """Load a text file and parse the content as a list of strings. Note: @@ -52,10 +47,7 @@ def list_from_file(filename, return item_list -def dict_from_file(filename, - key_type=str, - encoding='utf-8', - file_client_args=None): +def dict_from_file(filename, key_type=str, encoding='utf-8', file_client_args=None): """Load a text file and parse the content as a dict. Each line of the text file will be two or more columns split by diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py index d0051d609d3d..bf63e993892c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py @@ -1,28 +1,106 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, - gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, - rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) -from .geometric import (cutout, imcrop, imflip, imflip_, impad, - impad_to_multiple, imrescale, imresize, imresize_like, - imresize_to_multiple, imrotate, imshear, imtranslate, - rescale_size) +from .colorspace import ( + bgr2gray, + bgr2hls, + bgr2hsv, + bgr2rgb, + bgr2ycbcr, + gray2bgr, + gray2rgb, + hls2bgr, + hsv2bgr, + imconvert, + rgb2bgr, + rgb2gray, + rgb2ycbcr, + ycbcr2bgr, + ycbcr2rgb, +) +from .geometric import ( + cutout, + imcrop, + imflip, + imflip_, + impad, + impad_to_multiple, + imrescale, + imresize, + imresize_like, + imresize_to_multiple, + imrotate, + imshear, + imtranslate, + rescale_size, +) from .io import imfrombytes, imread, imwrite, supported_backends, use_backend from .misc import tensor2imgs -from .photometric import (adjust_brightness, adjust_color, adjust_contrast, - adjust_lighting, adjust_sharpness, auto_contrast, - clahe, imdenormalize, imequalize, iminvert, - imnormalize, imnormalize_, lut_transform, posterize, - solarize) +from .photometric import ( + adjust_brightness, + adjust_color, + adjust_contrast, + adjust_lighting, + adjust_sharpness, + auto_contrast, + clahe, + imdenormalize, + imequalize, + iminvert, + imnormalize, + imnormalize_, + lut_transform, + posterize, + solarize, +) __all__ = [ - 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', - 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', - 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', - 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', - 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', - 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', - 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', - 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', - 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', - 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' + 'bgr2gray', + 'bgr2hls', + 'bgr2hsv', + 'bgr2rgb', + 'gray2bgr', + 'gray2rgb', + 'hls2bgr', + 'hsv2bgr', + 'imconvert', + 'rgb2bgr', + 'rgb2gray', + 'imrescale', + 'imresize', + 'imresize_like', + 'imresize_to_multiple', + 'rescale_size', + 'imcrop', + 'imflip', + 'imflip_', + 'impad', + 'impad_to_multiple', + 'imrotate', + 'imfrombytes', + 'imread', + 'imwrite', + 'supported_backends', + 'use_backend', + 'imdenormalize', + 'imnormalize', + 'imnormalize_', + 'iminvert', + 'posterize', + 'solarize', + 'rgb2ycbcr', + 'bgr2ycbcr', + 'ycbcr2rgb', + 'ycbcr2bgr', + 'tensor2imgs', + 'imshear', + 'imtranslate', + 'adjust_color', + 'imequalize', + 'adjust_brightness', + 'adjust_contrast', + 'lut_transform', + 'clahe', + 'adjust_sharpness', + 'auto_contrast', + 'cutout', + 'adjust_lighting', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py index 814533952fdf..e167caac49f3 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py @@ -102,10 +102,9 @@ def _convert_input_type_range(img): if img_type == np.float32: pass elif img_type == np.uint8: - img /= 255. + img /= 255.0 else: - raise TypeError('The img type should be np.float32 or np.uint8, ' - f'but got {img_type}') + raise TypeError('The img type should be np.float32 or np.uint8, ' f'but got {img_type}') return img @@ -131,12 +130,11 @@ def _convert_output_type_range(img, dst_type): (ndarray): The converted image with desired type and range. """ if dst_type not in (np.uint8, np.float32): - raise TypeError('The dst_type should be np.float32 or np.uint8, ' - f'but got {dst_type}') + raise TypeError('The dst_type should be np.float32 or np.uint8, ' f'but got {dst_type}') if dst_type == np.uint8: img = img.round() else: - img /= 255. + img /= 255.0 return img.astype(dst_type) @@ -167,9 +165,11 @@ def rgb2ycbcr(img, y_only=False): if y_only: out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 else: - out_img = np.matmul( - img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], - [24.966, 112.0, -18.214]]) + [16, 128, 128] + out_img = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], [24.966, 112.0, -18.214]]) + [ + 16, + 128, + 128, + ] out_img = _convert_output_type_range(out_img, img_type) return out_img @@ -201,9 +201,11 @@ def bgr2ycbcr(img, y_only=False): if y_only: out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 else: - out_img = np.matmul( - img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], - [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [ + 16, + 128, + 128, + ] out_img = _convert_output_type_range(out_img, img_type) return out_img @@ -231,11 +233,9 @@ def ycbcr2rgb(img): """ img_type = img.dtype img = _convert_input_type_range(img) * 255 - out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], - [0, -0.00153632, 0.00791071], - [0.00625893, -0.00318811, 0]]) * 255.0 + [ - -222.921, 135.576, -276.836 - ] + out_img = np.matmul( + img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071], [0.00625893, -0.00318811, 0]] + ) * 255.0 + [-222.921, 135.576, -276.836] out_img = _convert_output_type_range(out_img, img_type) return out_img @@ -263,11 +263,9 @@ def ycbcr2bgr(img): """ img_type = img.dtype img = _convert_input_type_range(img) * 255 - out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], - [0.00791071, -0.00153632, 0], - [0, -0.00318811, 0.00625893]]) * 255.0 + [ - -276.836, 135.576, -222.921 - ] + out_img = np.matmul( + img, [[0.00456621, 0.00456621, 0.00456621], [0.00791071, -0.00153632, 0], [0, -0.00318811, 0.00625893]] + ) * 255.0 + [-276.836, 135.576, -222.921] out_img = _convert_output_type_range(out_img, img_type) return out_img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py index cf97c201cb4e..2c96fae34feb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py @@ -34,7 +34,7 @@ def _scale_size(size, scale): 'bilinear': cv2.INTER_LINEAR, 'bicubic': cv2.INTER_CUBIC, 'area': cv2.INTER_AREA, - 'lanczos': cv2.INTER_LANCZOS4 + 'lanczos': cv2.INTER_LANCZOS4, } if Image is not None: @@ -44,16 +44,11 @@ def _scale_size(size, scale): 'bicubic': Image.BICUBIC, 'box': Image.BOX, 'lanczos': Image.LANCZOS, - 'hamming': Image.HAMMING + 'hamming': Image.HAMMING, } -def imresize(img, - size, - return_scale=False, - interpolation='bilinear', - out=None, - backend=None): +def imresize(img, size, return_scale=False, interpolation='bilinear', out=None, backend=None): """Resize image to a given size. Args: @@ -76,8 +71,7 @@ def imresize(img, if backend is None: backend = imread_backend if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported for resize.' - f"Supported backends are 'cv2', 'pillow'") + raise ValueError(f'backend: {backend} is not supported for resize.' f"Supported backends are 'cv2', 'pillow'") if backend == 'pillow': assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' @@ -85,8 +79,7 @@ def imresize(img, pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) resized_img = np.array(pil_image) else: - resized_img = cv2.resize( - img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + resized_img = cv2.resize(img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) if not return_scale: return resized_img else: @@ -95,15 +88,17 @@ def imresize(img, return resized_img, w_scale, h_scale -def imresize_to_multiple(img, - divisor, - size=None, - scale_factor=None, - keep_ratio=False, - return_scale=False, - interpolation='bilinear', - out=None, - backend=None): +def imresize_to_multiple( + img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None, +): """Resize image according to a given size or scale factor and then rounds up the the resized or rescaled image size to the nearest value that can be divided by the divisor. @@ -147,23 +142,15 @@ def imresize_to_multiple(img, divisor = to_2tuple(divisor) size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) resized_img, w_scale, h_scale = imresize( - img, - size, - return_scale=True, - interpolation=interpolation, - out=out, - backend=backend) + img, size, return_scale=True, interpolation=interpolation, out=out, backend=backend + ) if return_scale: return resized_img, w_scale, h_scale else: return resized_img -def imresize_like(img, - dst_img, - return_scale=False, - interpolation='bilinear', - backend=None): +def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear', backend=None): """Resize image to the same size of a given image. Args: @@ -204,11 +191,9 @@ def rescale_size(old_size, scale, return_scale=False): elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), - max_short_edge / min(h, w)) + scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: - raise TypeError( - f'Scale must be a number or tuple of int, but got {type(scale)}') + raise TypeError(f'Scale must be a number or tuple of int, but got {type(scale)}') new_size = _scale_size((w, h), scale_factor) @@ -218,11 +203,7 @@ def rescale_size(old_size, scale, return_scale=False): return new_size -def imrescale(img, - scale, - return_scale=False, - interpolation='bilinear', - backend=None): +def imrescale(img, scale, return_scale=False, interpolation='bilinear', backend=None): """Resize image while keeping the aspect ratio. Args: @@ -241,8 +222,7 @@ def imrescale(img, """ h, w = img.shape[:2] new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) - rescaled_img = imresize( - img, new_size, interpolation=interpolation, backend=backend) + rescaled_img = imresize(img, new_size, interpolation=interpolation, backend=backend) if return_scale: return rescaled_img, scale_factor else: @@ -289,13 +269,7 @@ def imflip_(img, direction='horizontal'): return cv2.flip(img, -1, img) -def imrotate(img, - angle, - center=None, - scale=1.0, - border_value=0, - interpolation='bilinear', - auto_bound=False): +def imrotate(img, angle, center=None, scale=1.0, border_value=0, interpolation='bilinear', auto_bound=False): """Rotate an image. Args: @@ -331,11 +305,7 @@ def imrotate(img, matrix[1, 2] += (new_h - h) * 0.5 w = int(np.round(new_w)) h = int(np.round(new_h)) - rotated = cv2.warpAffine( - img, - matrix, (w, h), - flags=cv2_interp_codes[interpolation], - borderValue=border_value) + rotated = cv2.warpAffine(img, matrix, (w, h), flags=cv2_interp_codes[interpolation], borderValue=border_value) return rotated @@ -413,22 +383,19 @@ def imcrop(img, bboxes, scale=1.0, pad_fill=None): for i in range(clipped_bbox.shape[0]): x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) if pad_fill is None: - patch = img[y1:y2 + 1, x1:x2 + 1, ...] + patch = img[y1 : y2 + 1, x1 : x2 + 1, ...] else: _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) if chn == 1: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) else: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) - patch = np.array( - pad_fill, dtype=img.dtype) * np.ones( - patch_shape, dtype=img.dtype) + patch = np.array(pad_fill, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) x_start = 0 if _x1 >= 0 else -_x1 y_start = 0 if _y1 >= 0 else -_y1 w = x2 - x1 + 1 h = y2 - y1 + 1 - patch[y_start:y_start + h, x_start:x_start + w, - ...] = img[y1:y1 + h, x1:x1 + w, ...] + patch[y_start : y_start + h, x_start : x_start + w, ...] = img[y1 : y1 + h, x1 : x1 + w, ...] patches.append(patch) if bboxes.ndim == 1: @@ -437,12 +404,7 @@ def imcrop(img, bboxes, scale=1.0, pad_fill=None): return patches -def impad(img, - *, - shape=None, - padding=None, - pad_val=0, - padding_mode='constant'): +def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): """Pad the given image to a certain shape or pad on all sides with specified padding mode and padding value. @@ -485,8 +447,7 @@ def impad(img, if isinstance(pad_val, tuple): assert len(pad_val) == img.shape[-1] elif not isinstance(pad_val, numbers.Number): - raise TypeError('pad_val must be a int or a tuple. ' - f'But received {type(pad_val)}') + raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') # check padding if isinstance(padding, tuple) and len(padding) in [2, 4]: @@ -495,8 +456,7 @@ def impad(img, elif isinstance(padding, numbers.Number): padding = (padding, padding, padding, padding) else: - raise ValueError('Padding must be a int or a 2, or 4 element tuple.' - f'But received {padding}') + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') # check padding mode assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] @@ -505,16 +465,11 @@ def impad(img, 'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE, 'reflect': cv2.BORDER_REFLECT_101, - 'symmetric': cv2.BORDER_REFLECT + 'symmetric': cv2.BORDER_REFLECT, } img = cv2.copyMakeBorder( - img, - padding[1], - padding[3], - padding[0], - padding[2], - border_type[padding_mode], - value=pad_val) + img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val + ) return img @@ -553,17 +508,17 @@ def cutout(img, shape, pad_val=0): if isinstance(shape, int): cut_h, cut_w = shape, shape else: - assert isinstance(shape, tuple) and len(shape) == 2, \ - f'shape must be a int or a tuple with length 2, but got type ' \ - f'{type(shape)} instead.' + assert isinstance(shape, tuple) and len(shape) == 2, ( + f'shape must be a int or a tuple with length 2, but got type ' f'{type(shape)} instead.' + ) cut_h, cut_w = shape if isinstance(pad_val, (int, float)): pad_val = tuple([pad_val] * channels) elif isinstance(pad_val, tuple): - assert len(pad_val) == channels, \ - 'Expected the num of elements in tuple equals the channels' \ - 'of input image. Found {} vs {}'.format( - len(pad_val), channels) + assert len(pad_val) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(pad_val), channels) + ) else: raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') @@ -571,8 +526,8 @@ def cutout(img, shape, pad_val=0): y0 = np.random.uniform(img_h) x0 = np.random.uniform(img_w) - y1 = int(max(0, y0 - cut_h / 2.)) - x1 = int(max(0, x0 - cut_w / 2.)) + y1 = int(max(0, y0 - cut_h / 2.0)) + x1 = int(max(0, x0 - cut_w / 2.0)) y2 = min(img_h, y1 + cut_h) x2 = min(img_w, x1 + cut_w) @@ -582,9 +537,7 @@ def cutout(img, shape, pad_val=0): patch_shape = (y2 - y1, x2 - x1, channels) img_cutout = img.copy() - patch = np.array( - pad_val, dtype=img.dtype) * np.ones( - patch_shape, dtype=img.dtype) + patch = np.array(pad_val, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) img_cutout[y1:y2, x1:x2, ...] = patch return img_cutout @@ -608,11 +561,7 @@ def _get_shear_matrix(magnitude, direction='horizontal'): return shear_matrix -def imshear(img, - magnitude, - direction='horizontal', - border_value=0, - interpolation='bilinear'): +def imshear(img, magnitude, direction='horizontal', border_value=0, interpolation='bilinear'): """Shear an image. Args: @@ -628,8 +577,7 @@ def imshear(img, Returns: ndarray: The sheared image. """ - assert direction in ['horizontal', - 'vertical'], f'Invalid direction: {direction}' + assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 @@ -638,13 +586,12 @@ def imshear(img, if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): - assert len(border_value) == channels, \ - 'Expected the num of elements in tuple equals the channels' \ - 'of input image. Found {} vs {}'.format( - len(border_value), channels) + assert len(border_value) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(border_value), channels) + ) else: - raise ValueError( - f'Invalid type {type(border_value)} for `border_value`') + raise ValueError(f'Invalid type {type(border_value)} for `border_value`') shear_matrix = _get_shear_matrix(magnitude, direction) sheared = cv2.warpAffine( img, @@ -655,7 +602,8 @@ def imshear(img, # than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], - flags=cv2_interp_codes[interpolation]) + flags=cv2_interp_codes[interpolation], + ) return sheared @@ -677,11 +625,7 @@ def _get_translate_matrix(offset, direction='horizontal'): return translate_matrix -def imtranslate(img, - offset, - direction='horizontal', - border_value=0, - interpolation='bilinear'): +def imtranslate(img, offset, direction='horizontal', border_value=0, interpolation='bilinear'): """Translate an image. Args: @@ -697,8 +641,7 @@ def imtranslate(img, Returns: ndarray: The translated image. """ - assert direction in ['horizontal', - 'vertical'], f'Invalid direction: {direction}' + assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 @@ -707,13 +650,12 @@ def imtranslate(img, if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): - assert len(border_value) == channels, \ - 'Expected the num of elements in tuple equals the channels' \ - 'of input image. Found {} vs {}'.format( - len(border_value), channels) + assert len(border_value) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(border_value), channels) + ) else: - raise ValueError( - f'Invalid type {type(border_value)} for `border_value`.') + raise ValueError(f'Invalid type {type(border_value)} for `border_value`.') translate_matrix = _get_translate_matrix(offset, direction) translated = cv2.warpAffine( img, @@ -724,5 +666,6 @@ def imtranslate(img, # large than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], - flags=cv2_interp_codes[interpolation]) + flags=cv2_interp_codes[interpolation], + ) return translated diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py index cba7afefe60a..eebffaac43d7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py @@ -5,8 +5,7 @@ import cv2 import numpy as np -from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, - IMREAD_UNCHANGED) +from cv2 import IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import check_file_exist, is_str, mkdir_or_exist @@ -33,8 +32,7 @@ 'grayscale': IMREAD_GRAYSCALE, 'unchanged': IMREAD_UNCHANGED, 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, - 'grayscale_ignore_orientation': - IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE + 'grayscale_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE, } imread_backend = 'cv2' @@ -133,7 +131,8 @@ def _pillow2array(img, flag='color', channel_order='bgr'): raise ValueError( 'flag must be "color", "grayscale", "unchanged", ' f'"color_ignore_orientation" or "grayscale_ignore_orientation"' - f' but got {flag}') + f' but got {flag}' + ) return array @@ -165,20 +164,19 @@ def imread(img_or_path, flag='color', channel_order='bgr', backend=None): if backend is None: backend = imread_backend if backend not in supported_backends: - raise ValueError(f'backend: {backend} is not supported. Supported ' - "backends are 'cv2', 'turbojpeg', 'pillow'") + raise ValueError( + f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" + ) if isinstance(img_or_path, Path): img_or_path = str(img_or_path) if isinstance(img_or_path, np.ndarray): return img_or_path elif is_str(img_or_path): - check_file_exist(img_or_path, - f'img file does not exist: {img_or_path}') + check_file_exist(img_or_path, f'img file does not exist: {img_or_path}') if backend == 'turbojpeg': with open(img_or_path, 'rb') as in_file: - img = jpeg.decode(in_file.read(), - _jpegflag(flag, channel_order)) + img = jpeg.decode(in_file.read(), _jpegflag(flag, channel_order)) if img.shape[-1] == 1: img = img[:, :, 0] return img @@ -196,8 +194,7 @@ def imread(img_or_path, flag='color', channel_order='bgr', backend=None): cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) return img else: - raise TypeError('"img" must be a numpy array or a str or ' - 'a pathlib.Path object') + raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object') def imfrombytes(content, flag='color', channel_order='bgr', backend=None): @@ -218,8 +215,9 @@ def imfrombytes(content, flag='color', channel_order='bgr', backend=None): if backend is None: backend = imread_backend if backend not in supported_backends: - raise ValueError(f'backend: {backend} is not supported. Supported ' - "backends are 'cv2', 'turbojpeg', 'pillow'") + raise ValueError( + f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" + ) if backend == 'turbojpeg': img = jpeg.decode(content, _jpegflag(flag, channel_order)) if img.shape[-1] == 1: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py index b41891ffb0a5..a66ed60474b9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py @@ -38,7 +38,6 @@ def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): imgs = [] for img_id in range(num_imgs): img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) - img = mmcv.imdenormalize( - img, mean, std, to_bgr=to_rgb).astype(np.uint8) + img = mmcv.imdenormalize(img, mean, std, to_bgr=to_rgb).astype(np.uint8) imgs.append(np.ascontiguousarray(img)) return imgs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py index 5085d012019c..a68b8f49cade 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py @@ -173,7 +173,7 @@ def _scale_channel(im, c): return equalized_img.astype(img.dtype) -def adjust_brightness(img, factor=1.): +def adjust_brightness(img, factor=1.0): """Adjust image brightness. This function controls the brightness of an image. An @@ -198,14 +198,12 @@ def adjust_brightness(img, factor=1.): # Note manually convert the dtype to np.float32, to # achieve as close results as PIL.ImageEnhance.Brightness. # Set beta=1-factor, and gamma=0 - brightened_img = cv2.addWeighted( - img.astype(np.float32), factor, degenerated.astype(np.float32), - 1 - factor, 0) + brightened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) brightened_img = np.clip(brightened_img, 0, 255) return brightened_img.astype(img.dtype) -def adjust_contrast(img, factor=1.): +def adjust_contrast(img, factor=1.0): """Adjust image contrast. This function controls the contrast of an image. An @@ -228,9 +226,7 @@ def adjust_contrast(img, factor=1.): mean = round(np.sum(gray_img) / np.sum(hist)) degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) degenerated = gray2bgr(degenerated) - contrasted_img = cv2.addWeighted( - img.astype(np.float32), factor, degenerated.astype(np.float32), - 1 - factor, 0) + contrasted_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) contrasted_img = np.clip(contrasted_img, 0, 255) return contrasted_img.astype(img.dtype) @@ -280,8 +276,9 @@ def _auto_contrast_channel(im, c, cutoff): if isinstance(cutoff, (int, float)): cutoff = (cutoff, cutoff) else: - assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \ - f'float or tuple, but got {type(cutoff)} instead.' + assert isinstance(cutoff, tuple), ( + 'cutoff must be of type int, ' f'float or tuple, but got {type(cutoff)} instead.' + ) # Auto adjusts contrast for each channel independently and then stacks # the result. s1 = _auto_contrast_channel(img, 0, cutoff) @@ -291,7 +288,7 @@ def _auto_contrast_channel(im, c, cutoff): return contrasted_img.astype(img.dtype) -def adjust_sharpness(img, factor=1., kernel=None): +def adjust_sharpness(img, factor=1.0, kernel=None): """Adjust image sharpness. This function controls the sharpness of an image. An @@ -321,16 +318,12 @@ def adjust_sharpness(img, factor=1., kernel=None): if kernel is None: # adopted from PIL.ImageFilter.SMOOTH - kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13 - assert isinstance(kernel, np.ndarray), \ - f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' - assert kernel.ndim == 2, \ - f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' + kernel = np.array([[1.0, 1.0, 1.0], [1.0, 5.0, 1.0], [1.0, 1.0, 1.0]]) / 13 + assert isinstance(kernel, np.ndarray), f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' + assert kernel.ndim == 2, f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' degenerated = cv2.filter2D(img, -1, kernel) - sharpened_img = cv2.addWeighted( - img.astype(np.float32), factor, degenerated.astype(np.float32), - 1 - factor, 0) + sharpened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) sharpened_img = np.clip(sharpened_img, 0, 255) return sharpened_img.astype(img.dtype) @@ -355,24 +348,25 @@ def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): Returns: ndarray: The adjusted image. """ - assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \ - f'eigval and eigvec should both be of type np.ndarray, got ' \ - f'{type(eigval)} and {type(eigvec)} instead.' + assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), ( + f'eigval and eigvec should both be of type np.ndarray, got ' f'{type(eigval)} and {type(eigvec)} instead.' + ) assert eigval.ndim == 1 and eigvec.ndim == 2 assert eigvec.shape == (3, eigval.shape[0]) n_eigval = eigval.shape[0] - assert isinstance(alphastd, float), 'alphastd should be of type float, ' \ - f'got {type(alphastd)} instead.' + assert isinstance(alphastd, float), 'alphastd should be of type float, ' f'got {type(alphastd)} instead.' img = img.copy().astype(np.float32) if to_rgb: cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace alpha = np.random.normal(0, alphastd, n_eigval) - alter = eigvec \ - * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \ + alter = ( + eigvec + * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) + ) alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) img_adjusted = img + alter return img_adjusted @@ -397,7 +391,7 @@ def lut_transform(img, lut_table): assert isinstance(img, np.ndarray) assert 0 <= np.min(img) and np.max(img) <= 255 assert isinstance(lut_table, np.ndarray) - assert lut_table.shape == (256, ) + assert lut_table.shape == (256,) return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py index 999e090a458e..ecee97e0c0cb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py @@ -10,34 +10,26 @@ from .corner_pool import CornerPool from .correlation import Correlation from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d -from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, - ModulatedDeformRoIPoolPack, deform_roi_pool) +from .deform_roi_pool import DeformRoIPool, DeformRoIPoolPack, ModulatedDeformRoIPoolPack, deform_roi_pool from .deprecated_wrappers import Conv2d_deprecated as Conv2d from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d from .deprecated_wrappers import Linear_deprecated as Linear from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d -from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, - sigmoid_focal_loss, softmax_focal_loss) -from .furthest_point_sample import (furthest_point_sample, - furthest_point_sample_with_dist) +from .focal_loss import SigmoidFocalLoss, SoftmaxFocalLoss, sigmoid_focal_loss, softmax_focal_loss +from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu from .gather_points import gather_points from .group_points import GroupAll, QueryAndGroup, grouping_operation -from .info import (get_compiler_version, get_compiling_cuda_version, - get_onnxruntime_op_path) +from .info import get_compiler_version, get_compiling_cuda_version, get_onnxruntime_op_path from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev from .knn import knn from .masked_conv import MaskedConv2d, masked_conv2d -from .modulated_deform_conv import (ModulatedDeformConv2d, - ModulatedDeformConv2dPack, - modulated_deform_conv2d) +from .modulated_deform_conv import ModulatedDeformConv2d, ModulatedDeformConv2dPack, modulated_deform_conv2d from .multi_scale_deform_attn import MultiScaleDeformableAttention from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms from .pixel_group import pixel_group -from .point_sample import (SimpleRoIAlign, point_sample, - rel_roi_point_to_rel_img_point) -from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, - points_in_boxes_part) +from .point_sample import SimpleRoIAlign, point_sample, rel_roi_point_to_rel_img_point +from .points_in_boxes import points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part from .points_sampler import PointsSampler from .psa_mask import PSAMask from .roi_align import RoIAlign, roi_align @@ -55,27 +47,88 @@ from .voxelize import Voxelization, voxelization __all__ = [ - 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', - 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', - 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', - 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', - 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', - 'get_compiler_version', 'get_compiling_cuda_version', - 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', - 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', - 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', - 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', - 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', - 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', - 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', - 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', - 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', - 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', - 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', - 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', - 'border_align', 'gather_points', 'furthest_point_sample', - 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', - 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', - 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', - 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' + 'bbox_overlaps', + 'CARAFE', + 'CARAFENaive', + 'CARAFEPack', + 'carafe', + 'carafe_naive', + 'CornerPool', + 'DeformConv2d', + 'DeformConv2dPack', + 'deform_conv2d', + 'DeformRoIPool', + 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', + 'deform_roi_pool', + 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', + 'sigmoid_focal_loss', + 'softmax_focal_loss', + 'get_compiler_version', + 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', + 'MaskedConv2d', + 'masked_conv2d', + 'ModulatedDeformConv2d', + 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', + 'batched_nms', + 'nms', + 'soft_nms', + 'nms_match', + 'RoIAlign', + 'roi_align', + 'RoIPool', + 'roi_pool', + 'SyncBatchNorm', + 'Conv2d', + 'ConvTranspose2d', + 'Linear', + 'MaxPool2d', + 'CrissCrossAttention', + 'PSAMask', + 'point_sample', + 'rel_roi_point_to_rel_img_point', + 'SimpleRoIAlign', + 'SAConv2d', + 'TINShift', + 'tin_shift', + 'assign_score_withk', + 'box_iou_rotated', + 'RoIPointPool3d', + 'nms_rotated', + 'knn', + 'ball_query', + 'upfirdn2d', + 'FusedBiasLeakyReLU', + 'fused_bias_leakyrelu', + 'RoIAlignRotated', + 'roi_align_rotated', + 'pixel_group', + 'QueryAndGroup', + 'GroupAll', + 'grouping_operation', + 'contour_expand', + 'three_nn', + 'three_interpolate', + 'MultiScaleDeformableAttention', + 'BorderAlign', + 'border_align', + 'gather_points', + 'furthest_point_sample', + 'furthest_point_sample_with_dist', + 'PointsSampler', + 'Correlation', + 'boxes_iou_bev', + 'nms_bev', + 'nms_normal_bev', + 'Voxelization', + 'voxelization', + 'dynamic_scatter', + 'DynamicScatter', + 'RoIAwarePool3d', + 'points_in_boxes_part', + 'points_in_boxes_cpu', + 'points_in_boxes_all', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py index 4906adaa2cff..399600eb812b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py @@ -2,8 +2,7 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) +ext_module = ext_loader.load_ext('_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) class AssignScoreWithK(Function): @@ -26,12 +25,7 @@ class AssignScoreWithK(Function): """ @staticmethod - def forward(ctx, - scores, - point_features, - center_features, - knn_idx, - aggregate='sum'): + def forward(ctx, scores, point_features, center_features, knn_idx, aggregate='sum'): """ Args: scores (torch.Tensor): (B, npoint, K, M), predicted scores to @@ -69,10 +63,10 @@ def forward(ctx, M=M, K=K, O=out_dim, - aggregate=agg[aggregate]) + aggregate=agg[aggregate], + ) - ctx.save_for_backward(output, point_features, center_features, scores, - knn_idx) + ctx.save_for_backward(output, point_features, center_features, scores, knn_idx) ctx.agg = agg[aggregate] return output @@ -114,10 +108,10 @@ def backward(ctx, grad_out): M=M, K=K, O=out_dim, - aggregate=agg) + aggregate=agg, + ) - return grad_scores, grad_point_features, \ - grad_center_features, None, None + return grad_scores, grad_point_features, grad_center_features, None, None assign_score_withk = AssignScoreWithK.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py index d0466847c6e5..51c403292391 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py @@ -11,8 +11,9 @@ class BallQuery(Function): """Find nearby points in spherical space.""" @staticmethod - def forward(ctx, min_radius: float, max_radius: float, sample_num: int, - xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: + def forward( + ctx, min_radius: float, max_radius: float, sample_num: int, xyz: torch.Tensor, center_xyz: torch.Tensor + ) -> torch.Tensor: """ Args: min_radius (float): minimum radius of the balls. @@ -34,15 +35,8 @@ def forward(ctx, min_radius: float, max_radius: float, sample_num: int, idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) ext_module.ball_query_forward( - center_xyz, - xyz, - idx, - b=B, - n=N, - m=npoint, - min_radius=min_radius, - max_radius=max_radius, - nsample=sample_num) + center_xyz, xyz, idx, b=B, n=N, m=npoint, min_radius=min_radius, max_radius=max_radius, nsample=sample_num + ) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(idx) return idx diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py index 0c4d58b6c91f..44aa88881385 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py @@ -51,8 +51,8 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): assert mode in mode_dict.keys() mode_flag = mode_dict[mode] # Either the boxes are empty or the length of boxes' last dimension is 4 - assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) - assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + assert bboxes1.size(-1) == 4 or bboxes1.size(0) == 0 + assert bboxes2.size(-1) == 4 or bboxes2.size(0) == 0 assert offset == 1 or offset == 0 rows = bboxes1.size(0) @@ -67,6 +67,5 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): ious = bboxes1.new_zeros(rows) else: ious = bboxes1.new_zeros((rows, cols)) - ext_module.bbox_overlaps( - bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + ext_module.bbox_overlaps(bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py index ff305be328e9..beea1a66e997 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py @@ -9,16 +9,13 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['border_align_forward', 'border_align_backward']) +ext_module = ext_loader.load_ext('_ext', ['border_align_forward', 'border_align_backward']) class BorderAlignFunction(Function): - @staticmethod def symbolic(g, input, boxes, pool_size): - return g.op( - 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) + return g.op('mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) @staticmethod def forward(ctx, input, boxes, pool_size): @@ -26,10 +23,8 @@ def forward(ctx, input, boxes, pool_size): ctx.input_shape = input.size() assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' - assert boxes.size(2) == 4, \ - 'the last dimension of boxes must be (x1, y1, x2, y2)' - assert input.size(1) % 4 == 0, \ - 'the channel for input feature must be divisible by factor 4' + assert boxes.size(2) == 4, 'the last dimension of boxes must be (x1, y1, x2, y2)' + assert input.size(1) % 4 == 0, 'the channel for input feature must be divisible by factor 4' # [B, C//4, H*W, 4] output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) @@ -37,8 +32,7 @@ def forward(ctx, input, boxes, pool_size): # `argmax_idx` only used for backward argmax_idx = input.new_zeros(output_shape).to(torch.int) - ext_module.border_align_forward( - input, boxes, output, argmax_idx, pool_size=ctx.pool_size) + ext_module.border_align_forward(input, boxes, output, argmax_idx, pool_size=ctx.pool_size) ctx.save_for_backward(boxes, argmax_idx) return output @@ -50,12 +44,7 @@ def backward(ctx, grad_output): grad_input = grad_output.new_zeros(ctx.input_shape) # complex head architecture may cause grad_output uncontiguous grad_output = grad_output.contiguous() - ext_module.border_align_backward( - grad_output, - boxes, - argmax_idx, - grad_input, - pool_size=ctx.pool_size) + ext_module.border_align_backward(grad_output, boxes, argmax_idx, grad_input, pool_size=ctx.pool_size) return grad_input, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py index 2d78015e9c2a..dfadb39c715c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py @@ -38,8 +38,7 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): ious = bboxes1.new_zeros((rows * cols)) bboxes1 = bboxes1.contiguous() bboxes2 = bboxes2.contiguous() - ext_module.box_iou_rotated( - bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) + ext_module.box_iou_rotated(bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) if not aligned: ious = ious.view(rows, cols) return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py index 5154cb3abfcc..bc0eb0d32f71 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py @@ -8,14 +8,12 @@ from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', - 'carafe_backward' -]) +ext_module = ext_loader.load_ext( + '_ext', ['carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', 'carafe_backward'] +) class CARAFENaiveFunction(Function): - @staticmethod def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( @@ -24,7 +22,8 @@ def symbolic(g, features, masks, kernel_size, group_size, scale_factor): masks, kernel_size_i=kernel_size, group_size_i=group_size, - scale_factor_f=scale_factor) + scale_factor_f=scale_factor, + ) @staticmethod def forward(ctx, features, masks, kernel_size, group_size, scale_factor): @@ -43,12 +42,8 @@ def forward(ctx, features, masks, kernel_size, group_size, scale_factor): n, c, h, w = features.size() output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) ext_module.carafe_naive_forward( - features, - masks, - output, - kernel_size=kernel_size, - group_size=group_size, - scale_factor=scale_factor) + features, masks, output, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor + ) if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks) @@ -73,7 +68,8 @@ def backward(ctx, grad_output): grad_masks, kernel_size=kernel_size, group_size=group_size, - scale_factor=scale_factor) + scale_factor=scale_factor, + ) return grad_input, grad_masks, None, None, None @@ -82,23 +78,19 @@ def backward(ctx, grad_output): class CARAFENaive(Module): - def __init__(self, kernel_size, group_size, scale_factor): super(CARAFENaive, self).__init__() - assert isinstance(kernel_size, int) and isinstance( - group_size, int) and isinstance(scale_factor, int) + assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) self.kernel_size = kernel_size self.group_size = group_size self.scale_factor = scale_factor def forward(self, features, masks): - return carafe_naive(features, masks, self.kernel_size, self.group_size, - self.scale_factor) + return carafe_naive(features, masks, self.kernel_size, self.group_size, self.scale_factor) class CARAFEFunction(Function): - @staticmethod def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( @@ -107,7 +99,8 @@ def symbolic(g, features, masks, kernel_size, group_size, scale_factor): masks, kernel_size_i=kernel_size, group_size_i=group_size, - scale_factor_f=scale_factor) + scale_factor_f=scale_factor, + ) @staticmethod def forward(ctx, features, masks, kernel_size, group_size, scale_factor): @@ -137,7 +130,8 @@ def forward(ctx, features, masks, kernel_size, group_size, scale_factor): output, kernel_size=kernel_size, group_size=group_size, - scale_factor=scale_factor) + scale_factor=scale_factor, + ) if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks, rfeatures) @@ -170,7 +164,8 @@ def backward(ctx, grad_output): grad_masks, kernel_size=kernel_size, group_size=group_size, - scale_factor=scale_factor) + scale_factor=scale_factor, + ) return grad_input, grad_masks, None, None, None @@ -194,15 +189,13 @@ class CARAFE(Module): def __init__(self, kernel_size, group_size, scale_factor): super(CARAFE, self).__init__() - assert isinstance(kernel_size, int) and isinstance( - group_size, int) and isinstance(scale_factor, int) + assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) self.kernel_size = kernel_size self.group_size = group_size self.scale_factor = scale_factor def forward(self, features, masks): - return carafe(features, masks, self.kernel_size, self.group_size, - self.scale_factor) + return carafe(features, masks, self.kernel_size, self.group_size, self.scale_factor) @UPSAMPLE_LAYERS.register_module(name='carafe') @@ -227,14 +220,16 @@ class CARAFEPack(nn.Module): upsampled feature map """ - def __init__(self, - channels, - scale_factor, - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64): + def __init__( + self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64, + ): super(CARAFEPack, self).__init__() self.channels = channels self.scale_factor = scale_factor @@ -243,16 +238,15 @@ def __init__(self, self.encoder_kernel = encoder_kernel self.encoder_dilation = encoder_dilation self.compressed_channels = compressed_channels - self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, - 1) + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, 1) self.content_encoder = nn.Conv2d( self.compressed_channels, - self.up_kernel * self.up_kernel * self.up_group * - self.scale_factor * self.scale_factor, + self.up_kernel * self.up_kernel * self.up_group * self.scale_factor * self.scale_factor, self.encoder_kernel, padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), dilation=self.encoder_dilation, - groups=1) + groups=1, + ) self.init_weights() def init_weights(self): @@ -266,7 +260,7 @@ def kernel_normalizer(self, mask): n, mask_c, h, w = mask.size() # use float division explicitly, # to void inconsistency while exporting to onnx - mask_channel = int(mask_c / float(self.up_kernel**2)) + mask_channel = int(mask_c / float(self.up_kernel ** 2)) mask = mask.view(n, mask_channel, -1, h, w) mask = F.softmax(mask, dim=2, dtype=mask.dtype) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py index 7d901fdc6b47..48fe50696acb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py @@ -46,7 +46,7 @@ def __init__(self, in_channels): self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.value_conv = nn.Conv2d(in_channels, in_channels, 1) - self.gamma = Scale(0.) + self.gamma = Scale(0.0) self.in_channels = in_channels def forward(self, x): @@ -63,12 +63,10 @@ def forward(self, x): query = self.query_conv(x) key = self.key_conv(x) value = self.value_conv(x) - energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG( - H, query.device) + energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(H, query.device) energy_H = energy_H.transpose(1, 2) energy_W = torch.einsum('bchw,bchj->bhwj', query, key) - attn = F.softmax( - torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] + attn = F.softmax(torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py index ea1111e1768b..14281d4c5d63 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py @@ -7,8 +7,7 @@ ext_module = ext_loader.load_ext('_ext', ['contour_expand']) -def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, - kernel_num): +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num): """Expand kernel contours so that foreground pixels are assigned into instances. @@ -38,12 +37,9 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, label = [] else: label = ext_module.contour_expand( - kernel_mask, - internal_kernel_label, - min_kernel_area=min_kernel_area, - kernel_num=kernel_num) + kernel_mask, internal_kernel_label, min_kernel_area=min_kernel_area, kernel_num=kernel_num + ) label = label.tolist() else: - label = ext_module.contour_expand(kernel_mask, internal_kernel_label, - min_kernel_area, kernel_num) + label = ext_module.contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num) return label diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py index a33d798b43d4..ede2266be45c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py @@ -5,21 +5,27 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', - 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', - 'right_pool_forward', 'right_pool_backward' -]) +ext_module = ext_loader.load_ext( + '_ext', + [ + 'top_pool_forward', + 'top_pool_backward', + 'bottom_pool_forward', + 'bottom_pool_backward', + 'left_pool_forward', + 'left_pool_backward', + 'right_pool_forward', + 'right_pool_backward', + ], +) _mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} class TopPoolFunction(Function): - @staticmethod def symbolic(g, input): - output = g.op( - 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) return output @staticmethod @@ -30,17 +36,15 @@ def forward(ctx, input): @staticmethod def backward(ctx, grad_output): - input, = ctx.saved_tensors + (input,) = ctx.saved_tensors output = ext_module.top_pool_backward(input, grad_output) return output class BottomPoolFunction(Function): - @staticmethod def symbolic(g, input): - output = g.op( - 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) return output @staticmethod @@ -51,17 +55,15 @@ def forward(ctx, input): @staticmethod def backward(ctx, grad_output): - input, = ctx.saved_tensors + (input,) = ctx.saved_tensors output = ext_module.bottom_pool_backward(input, grad_output) return output class LeftPoolFunction(Function): - @staticmethod def symbolic(g, input): - output = g.op( - 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) return output @staticmethod @@ -72,17 +74,15 @@ def forward(ctx, input): @staticmethod def backward(ctx, grad_output): - input, = ctx.saved_tensors + (input,) = ctx.saved_tensors output = ext_module.left_pool_backward(input, grad_output) return output class RightPoolFunction(Function): - @staticmethod def symbolic(g, input): - output = g.op( - 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) return output @staticmethod @@ -93,7 +93,7 @@ def forward(ctx, input): @staticmethod def backward(ctx, grad_output): - input, = ctx.saved_tensors + (input,) = ctx.saved_tensors output = ext_module.right_pool_backward(input, grad_output) return output @@ -142,13 +142,14 @@ def __init__(self, mode): def forward(self, x): if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': if torch.onnx.is_in_onnx_export(): - assert torch.__version__ >= '1.7.0', \ - 'When `cummax` serves as an intermediate component whose '\ - 'outputs is used as inputs for another modules, it\'s '\ - 'expected that pytorch version must be >= 1.7.0, '\ - 'otherwise Error appears like: `RuntimeError: tuple '\ - 'appears in op that does not forward tuples, unsupported '\ + assert torch.__version__ >= '1.7.0', ( + 'When `cummax` serves as an intermediate component whose ' + 'outputs is used as inputs for another modules, it\'s ' + 'expected that pytorch version must be >= 1.7.0, ' + 'otherwise Error appears like: `RuntimeError: tuple ' + 'appears in op that does not forward tuples, unsupported ' 'kind: prim::PythonOp`.' + ) dim, flip = self.cummax_dim_flip[self.mode] if flip: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py index 3d0b79c301b2..a5f89fa68576 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py @@ -7,22 +7,14 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['correlation_forward', 'correlation_backward']) +ext_module = ext_loader.load_ext('_ext', ['correlation_forward', 'correlation_backward']) class CorrelationFunction(Function): - @staticmethod - def forward(ctx, - input1, - input2, - kernel_size=1, - max_displacement=1, - stride=1, - padding=1, - dilation=1, - dilation_patch=1): + def forward( + ctx, input1, input2, kernel_size=1, max_displacement=1, stride=1, padding=1, dilation=1, dilation_patch=1 + ): ctx.save_for_backward(input1, input2) @@ -32,8 +24,7 @@ def forward(ctx, dH, dW = ctx.stride = _pair(stride) padH, padW = ctx.padding = _pair(padding) dilationH, dilationW = ctx.dilation = _pair(dilation) - dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair( - dilation_patch) + dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(dilation_patch) output_size = CorrelationFunction._output_size(ctx, input1) @@ -54,7 +45,8 @@ def forward(ctx, dilation_patchH=dilation_patchH, dilation_patchW=dilation_patchW, dH=dH, - dW=dW) + dW=dW, + ) return output @@ -89,7 +81,8 @@ def backward(ctx, grad_output): dilation_patchH=dilation_patchH, dilation_patchW=dilation_patchW, dH=dH, - dW=dW) + dW=dW, + ) return grad_input1, grad_input2, None, None, None, None, None, None @staticmethod @@ -164,13 +157,15 @@ class Correlation(nn.Module): correlation. Defaults to 1. """ - def __init__(self, - kernel_size: int = 1, - max_displacement: int = 1, - stride: int = 1, - padding: int = 0, - dilation: int = 1, - dilation_patch: int = 1) -> None: + def __init__( + self, + kernel_size: int = 1, + max_displacement: int = 1, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + dilation_patch: int = 1, + ) -> None: super().__init__() self.kernel_size = kernel_size self.max_displacement = max_displacement @@ -180,10 +175,16 @@ def __init__(self, self.dilation_patch = dilation_patch def forward(self, input1: Tensor, input2: Tensor) -> Tensor: - return CorrelationFunction.apply(input1, input2, self.kernel_size, - self.max_displacement, self.stride, - self.padding, self.dilation, - self.dilation_patch) + return CorrelationFunction.apply( + input1, + input2, + self.kernel_size, + self.max_displacement, + self.stride, + self.padding, + self.dilation, + self.dilation_patch, + ) def __repr__(self) -> str: s = self.__class__.__name__ diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py index 6696b8a7747c..6c6d14243d22 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py @@ -13,26 +13,16 @@ from ..cnn import CONV_LAYERS from ..utils import ext_loader, print_log -ext_module = ext_loader.load_ext('_ext', [ - 'deform_conv_forward', 'deform_conv_backward_input', - 'deform_conv_backward_parameters' -]) +ext_module = ext_loader.load_ext( + '_ext', ['deform_conv_forward', 'deform_conv_backward_input', 'deform_conv_backward_parameters'] +) class DeformConv2dFunction(Function): - @staticmethod - def symbolic(g, - input, - offset, - weight, - stride, - padding, - dilation, - groups, - deform_groups, - bias=False, - im2col_step=32): + def symbolic( + g, input, offset, weight, stride, padding, dilation, groups, deform_groups, bias=False, im2col_step=32 + ): return g.op( 'mmcv::MMCVDeformConv2d', input, @@ -44,24 +34,28 @@ def symbolic(g, groups_i=groups, deform_groups_i=deform_groups, bias_i=bias, - im2col_step_i=im2col_step) + im2col_step_i=im2col_step, + ) @staticmethod - def forward(ctx, - input, - offset, - weight, - stride=1, - padding=0, - dilation=1, - groups=1, - deform_groups=1, - bias=False, - im2col_step=32): + def forward( + ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32, + ): if input is not None and input.dim() != 4: raise ValueError( f'Expected 4D tensor as input, got {input.dim()}D tensor \ - instead.') + instead.' + ) assert bias is False, 'Only support bias is False.' ctx.stride = _pair(stride) ctx.padding = _pair(padding) @@ -81,14 +75,12 @@ def forward(ctx, weight = weight.type_as(input) ctx.save_for_backward(input, offset, weight) - output = input.new_empty( - DeformConv2dFunction._output_size(ctx, input, weight)) + output = input.new_empty(DeformConv2dFunction._output_size(ctx, input, weight)) ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones cur_im2col_step = min(ctx.im2col_step, input.size(0)) - assert (input.size(0) % - cur_im2col_step) == 0, 'im2col step must divide batchsize' + assert (input.size(0) % cur_im2col_step) == 0, 'im2col step must divide batchsize' ext_module.deform_conv_forward( input, weight, @@ -106,7 +98,8 @@ def forward(ctx, dilationH=ctx.dilation[0], group=ctx.groups, deformable_group=ctx.deform_groups, - im2col_step=cur_im2col_step) + im2col_step=cur_im2col_step, + ) return output @staticmethod @@ -117,8 +110,7 @@ def backward(ctx, grad_output): grad_input = grad_offset = grad_weight = None cur_im2col_step = min(ctx.im2col_step, input.size(0)) - assert (input.size(0) % cur_im2col_step - ) == 0, 'batch size must be divisible by im2col_step' + assert (input.size(0) % cur_im2col_step) == 0, 'batch size must be divisible by im2col_step' grad_output = grad_output.contiguous() if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: @@ -142,7 +134,8 @@ def backward(ctx, grad_output): dilationH=ctx.dilation[0], group=ctx.groups, deformable_group=ctx.deform_groups, - im2col_step=cur_im2col_step) + im2col_step=cur_im2col_step, + ) if ctx.needs_input_grad[2]: grad_weight = torch.zeros_like(weight) @@ -164,10 +157,10 @@ def backward(ctx, grad_output): group=ctx.groups, deformable_group=ctx.deform_groups, scale=1, - im2col_step=cur_im2col_step) + im2col_step=cur_im2col_step, + ) - return grad_input, grad_offset, grad_weight, \ - None, None, None, None, None, None, None + return grad_input, grad_offset, grad_weight, None, None, None, None, None, None, None @staticmethod def _output_size(ctx, input, weight): @@ -178,11 +171,11 @@ def _output_size(ctx, input, weight): pad = ctx.padding[d] kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 stride_ = ctx.stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) if not all(map(lambda s: s > 0, output_size)): raise ValueError( - 'convolution input is too small (output would be ' + - 'x'.join(map(str, output_size)) + ')') + 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' + ) return output_size @@ -223,27 +216,27 @@ class DeformConv2d(nn.Module): `New in version 1.3.17.` """ - @deprecated_api_warning({'deformable_groups': 'deform_groups'}, - cls_name='DeformConv2d') - def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, ...]], - stride: Union[int, Tuple[int, ...]] = 1, - padding: Union[int, Tuple[int, ...]] = 0, - dilation: Union[int, Tuple[int, ...]] = 1, - groups: int = 1, - deform_groups: int = 1, - bias: bool = False, - im2col_step: int = 32) -> None: + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='DeformConv2d') + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]], + stride: Union[int, Tuple[int, ...]] = 1, + padding: Union[int, Tuple[int, ...]] = 0, + dilation: Union[int, Tuple[int, ...]] = 1, + groups: int = 1, + deform_groups: int = 1, + bias: bool = False, + im2col_step: int = 32, + ) -> None: super(DeformConv2d, self).__init__() - assert not bias, \ - f'bias={bias} is not supported in DeformConv2d.' - assert in_channels % groups == 0, \ - f'in_channels {in_channels} cannot be divisible by groups {groups}' - assert out_channels % groups == 0, \ - f'out_channels {out_channels} cannot be divisible by groups \ + assert not bias, f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert ( + out_channels % groups == 0 + ), f'out_channels {out_channels} cannot be divisible by groups \ {groups}' self.in_channels = in_channels @@ -260,9 +253,7 @@ def __init__(self, self.output_padding = _single(0) # only weight, no bias - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // self.groups, - *self.kernel_size)) + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) self.reset_parameters() @@ -296,20 +287,27 @@ def forward(self, x: Tensor, offset: Tensor) -> Tensor: """ # To fix an assert error in deform_conv_cuda.cpp:128 # input image is smaller than kernel - input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < - self.kernel_size[1]) + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < self.kernel_size[1]) if input_pad: pad_h = max(self.kernel_size[0] - x.size(2), 0) pad_w = max(self.kernel_size[1] - x.size(3), 0) x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) offset = offset.contiguous() - out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, - self.dilation, self.groups, self.deform_groups, - False, self.im2col_step) + out = deform_conv2d( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + False, + self.im2col_step, + ) if input_pad: - out = out[:, :, :out.size(2) - pad_h, :out.size(3) - - pad_w].contiguous() + out = out[:, :, : out.size(2) - pad_h, : out.size(3) - pad_w].contiguous() return out def __repr__(self): @@ -364,7 +362,8 @@ def __init__(self, *args, **kwargs): stride=_pair(self.stride), padding=_pair(self.padding), dilation=_pair(self.dilation), - bias=True) + bias=True, + ) self.init_offset() def init_offset(self): @@ -373,33 +372,35 @@ def init_offset(self): def forward(self, x): offset = self.conv_offset(x) - return deform_conv2d(x, offset, self.weight, self.stride, self.padding, - self.dilation, self.groups, self.deform_groups, - False, self.im2col_step) - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): + return deform_conv2d( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + False, + self.im2col_step, + ) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, DeformConvPack loads previous benchmark models. - if (prefix + 'conv_offset.weight' not in state_dict - and prefix[:-1] + '_offset.weight' in state_dict): - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( - prefix[:-1] + '_offset.weight') - if (prefix + 'conv_offset.bias' not in state_dict - and prefix[:-1] + '_offset.bias' in state_dict): - state_dict[prefix + - 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + - '_offset.bias') + if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') + if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: + state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: - print_log( - f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' - 'version 2.', - logger='root') - - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, missing_keys, unexpected_keys, - error_msgs) + print_log(f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py index cc245ba91fee..1528a0748922 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py @@ -6,15 +6,12 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) +ext_module = ext_loader.load_ext('_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) class DeformRoIPoolFunction(Function): - @staticmethod - def symbolic(g, input, rois, offset, output_size, spatial_scale, - sampling_ratio, gamma): + def symbolic(g, input, rois, offset, output_size, spatial_scale, sampling_ratio, gamma): return g.op( 'mmcv::MMCVDeformRoIPool', input, @@ -24,17 +21,11 @@ def symbolic(g, input, rois, offset, output_size, spatial_scale, pooled_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_f=sampling_ratio, - gamma_f=gamma) + gamma_f=gamma, + ) @staticmethod - def forward(ctx, - input, - rois, - offset, - output_size, - spatial_scale=1.0, - sampling_ratio=0, - gamma=0.1): + def forward(ctx, input, rois, offset, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): if offset is None: offset = input.new_zeros(0) ctx.output_size = _pair(output_size) @@ -44,8 +35,7 @@ def forward(ctx, assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], - ctx.output_size[1]) + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) ext_module.deform_roi_pool_forward( @@ -57,7 +47,8 @@ def forward(ctx, pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, - gamma=ctx.gamma) + gamma=ctx.gamma, + ) ctx.save_for_backward(input, rois, offset) return output @@ -80,7 +71,8 @@ def backward(ctx, grad_output): pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, - gamma=ctx.gamma) + gamma=ctx.gamma, + ) if grad_offset.numel() == 0: grad_offset = None return grad_input, None, grad_offset, None, None, None, None @@ -90,12 +82,7 @@ def backward(ctx, grad_output): class DeformRoIPool(nn.Module): - - def __init__(self, - output_size, - spatial_scale=1.0, - sampling_ratio=0, - gamma=0.1): + def __init__(self, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): super(DeformRoIPool, self).__init__() self.output_size = _pair(output_size) self.spatial_scale = float(spatial_scale) @@ -103,102 +90,76 @@ def __init__(self, self.gamma = float(gamma) def forward(self, input, rois, offset=None): - return deform_roi_pool(input, rois, offset, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.gamma) + return deform_roi_pool( + input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma + ) class DeformRoIPoolPack(DeformRoIPool): - - def __init__(self, - output_size, - output_channels, - deform_fc_channels=1024, - spatial_scale=1.0, - sampling_ratio=0, - gamma=0.1): - super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, - sampling_ratio, gamma) + def __init__( + self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 + ): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) self.output_channels = output_channels self.deform_fc_channels = deform_fc_channels self.offset_fc = nn.Sequential( - nn.Linear( - self.output_size[0] * self.output_size[1] * - self.output_channels, self.deform_fc_channels), + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.deform_fc_channels), nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, - self.output_size[0] * self.output_size[1] * 2)) + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), + ) self.offset_fc[-1].weight.data.zero_() self.offset_fc[-1].bias.data.zero_() def forward(self, input, rois): assert input.size(1) == self.output_channels - x = deform_roi_pool(input, rois, None, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.gamma) + x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) rois_num = rois.size(0) offset = self.offset_fc(x.view(rois_num, -1)) - offset = offset.view(rois_num, 2, self.output_size[0], - self.output_size[1]) - return deform_roi_pool(input, rois, offset, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.gamma) + offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) + return deform_roi_pool( + input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma + ) class ModulatedDeformRoIPoolPack(DeformRoIPool): - - def __init__(self, - output_size, - output_channels, - deform_fc_channels=1024, - spatial_scale=1.0, - sampling_ratio=0, - gamma=0.1): - super(ModulatedDeformRoIPoolPack, - self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + def __init__( + self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 + ): + super(ModulatedDeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) self.output_channels = output_channels self.deform_fc_channels = deform_fc_channels self.offset_fc = nn.Sequential( - nn.Linear( - self.output_size[0] * self.output_size[1] * - self.output_channels, self.deform_fc_channels), + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.deform_fc_channels), nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, - self.output_size[0] * self.output_size[1] * 2)) + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), + ) self.offset_fc[-1].weight.data.zero_() self.offset_fc[-1].bias.data.zero_() self.mask_fc = nn.Sequential( - nn.Linear( - self.output_size[0] * self.output_size[1] * - self.output_channels, self.deform_fc_channels), + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, - self.output_size[0] * self.output_size[1] * 1), - nn.Sigmoid()) + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid(), + ) self.mask_fc[2].weight.data.zero_() self.mask_fc[2].bias.data.zero_() def forward(self, input, rois): assert input.size(1) == self.output_channels - x = deform_roi_pool(input, rois, None, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.gamma) + x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) rois_num = rois.size(0) offset = self.offset_fc(x.view(rois_num, -1)) - offset = offset.view(rois_num, 2, self.output_size[0], - self.output_size[1]) + offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) mask = self.mask_fc(x.view(rois_num, -1)) mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) - d = deform_roi_pool(input, rois, offset, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.gamma) + d = deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) return d * mask diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py index a2e593df9ee5..47d87b75d87f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py @@ -7,37 +7,37 @@ class Conv2d_deprecated(Conv2d): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead') + ' the future. Please import them from "mmcv.cnn" instead' + ) class ConvTranspose2d_deprecated(ConvTranspose2d): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' 'deprecated in the future. Please import them from "mmcv.cnn" ' - 'instead') + 'instead' + ) class MaxPool2d_deprecated(MaxPool2d): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead') + ' the future. Please import them from "mmcv.cnn" instead' + ) class Linear_deprecated(Linear): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead') + ' the future. Please import them from "mmcv.cnn" instead' + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py index 763bc93bd257..b218ed24ebc1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py @@ -6,14 +6,18 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', - 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' -]) +ext_module = ext_loader.load_ext( + '_ext', + [ + 'sigmoid_focal_loss_forward', + 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', + 'softmax_focal_loss_backward', + ], +) class SigmoidFocalLossFunction(Function): - @staticmethod def symbolic(g, input, target, gamma, alpha, weight, reduction): return g.op( @@ -23,16 +27,11 @@ def symbolic(g, input, target, gamma, alpha, weight, reduction): gamma_f=gamma, alpha_f=alpha, weight_f=weight, - reduction_s=reduction) + reduction_s=reduction, + ) @staticmethod - def forward(ctx, - input, - target, - gamma=2.0, - alpha=0.25, - weight=None, - reduction='mean'): + def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) assert input.dim() == 2 @@ -52,8 +51,7 @@ def forward(ctx, output = input.new_zeros(input.size()) - ext_module.sigmoid_focal_loss_forward( - input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + ext_module.sigmoid_focal_loss_forward(input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) if ctx.reduction == ctx.reduction_dict['mean']: output = output.sum() / input.size(0) elif ctx.reduction == ctx.reduction_dict['sum']: @@ -68,13 +66,7 @@ def backward(ctx, grad_output): grad_input = input.new_zeros(input.size()) - ext_module.sigmoid_focal_loss_backward( - input, - target, - weight, - grad_input, - gamma=ctx.gamma, - alpha=ctx.alpha) + ext_module.sigmoid_focal_loss_backward(input, target, weight, grad_input, gamma=ctx.gamma, alpha=ctx.alpha) grad_input *= grad_output if ctx.reduction == ctx.reduction_dict['mean']: @@ -86,7 +78,6 @@ def backward(ctx, grad_output): class SigmoidFocalLoss(nn.Module): - def __init__(self, gamma, alpha, weight=None, reduction='mean'): super(SigmoidFocalLoss, self).__init__() self.gamma = gamma @@ -95,8 +86,7 @@ def __init__(self, gamma, alpha, weight=None, reduction='mean'): self.reduction = reduction def forward(self, input, target): - return sigmoid_focal_loss(input, target, self.gamma, self.alpha, - self.weight, self.reduction) + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) def __repr__(self): s = self.__class__.__name__ @@ -107,7 +97,6 @@ def __repr__(self): class SoftmaxFocalLossFunction(Function): - @staticmethod def symbolic(g, input, target, gamma, alpha, weight, reduction): return g.op( @@ -117,16 +106,11 @@ def symbolic(g, input, target, gamma, alpha, weight, reduction): gamma_f=gamma, alpha_f=alpha, weight_f=weight, - reduction_s=reduction) + reduction_s=reduction, + ) @staticmethod - def forward(ctx, - input, - target, - gamma=2.0, - alpha=0.25, - weight=None, - reduction='mean'): + def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) assert input.dim() == 2 @@ -152,13 +136,7 @@ def forward(ctx, input_softmax /= channel_stats.unsqueeze(1).expand_as(input) output = input.new_zeros(input.size(0)) - ext_module.softmax_focal_loss_forward( - input_softmax, - target, - weight, - output, - gamma=ctx.gamma, - alpha=ctx.alpha) + ext_module.softmax_focal_loss_forward(input_softmax, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) if ctx.reduction == ctx.reduction_dict['mean']: output = output.sum() / input.size(0) @@ -174,13 +152,8 @@ def backward(ctx, grad_output): grad_input = input_softmax.new_zeros(input_softmax.size()) ext_module.softmax_focal_loss_backward( - input_softmax, - target, - weight, - buff, - grad_input, - gamma=ctx.gamma, - alpha=ctx.alpha) + input_softmax, target, weight, buff, grad_input, gamma=ctx.gamma, alpha=ctx.alpha + ) grad_input *= grad_output if ctx.reduction == ctx.reduction_dict['mean']: @@ -192,7 +165,6 @@ def backward(ctx, grad_output): class SoftmaxFocalLoss(nn.Module): - def __init__(self, gamma, alpha, weight=None, reduction='mean'): super(SoftmaxFocalLoss, self).__init__() self.gamma = gamma @@ -201,8 +173,7 @@ def __init__(self, gamma, alpha, weight=None, reduction='mean'): self.reduction = reduction def forward(self, input, target): - return softmax_focal_loss(input, target, self.gamma, self.alpha, - self.weight, self.reduction) + return softmax_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) def __repr__(self): s = self.__class__.__name__ diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py index 374b7a878f19..606855fef5f9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py @@ -3,10 +3,9 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'furthest_point_sampling_forward', - 'furthest_point_sampling_with_dist_forward' -]) +ext_module = ext_loader.load_ext( + '_ext', ['furthest_point_sampling_forward', 'furthest_point_sampling_with_dist_forward'] +) class FurthestPointSampling(Function): @@ -14,8 +13,7 @@ class FurthestPointSampling(Function): corresponding points have the furthest distance.""" @staticmethod - def forward(ctx, points_xyz: torch.Tensor, - num_points: int) -> torch.Tensor: + def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor: """ Args: points_xyz (Tensor): (B, N, 3) where N > num_points. @@ -31,12 +29,7 @@ def forward(ctx, points_xyz: torch.Tensor, temp = torch.cuda.FloatTensor(B, N).fill_(1e10) ext_module.furthest_point_sampling_forward( - points_xyz, - temp, - output, - b=B, - n=N, - m=num_points, + points_xyz, temp, output, b=B, n=N, m=num_points, ) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(output) @@ -52,8 +45,7 @@ class FurthestPointSamplingWithDist(Function): corresponding points have the furthest distance.""" @staticmethod - def forward(ctx, points_dist: torch.Tensor, - num_points: int) -> torch.Tensor: + def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor: """ Args: points_dist (Tensor): (B, N, N) Distance between each point pair. @@ -68,8 +60,7 @@ def forward(ctx, points_dist: torch.Tensor, output = points_dist.new_zeros([B, num_points], dtype=torch.int32) temp = points_dist.new_zeros([B, N]).fill_(1e10) - ext_module.furthest_point_sampling_with_dist_forward( - points_dist, temp, output, b=B, n=N, m=num_points) + ext_module.furthest_point_sampling_with_dist_forward(points_dist, temp, output, b=B, n=N, m=num_points) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(output) return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py index 6d12508469c6..a1f89dd27ebe 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py @@ -121,13 +121,8 @@ def forward(ctx, grad_output, out, negative_slope, scale): empty = grad_output.new_empty(0) grad_input = ext_module.fused_bias_leakyrelu( - grad_output, - empty, - out, - act=3, - grad=1, - alpha=negative_slope, - scale=scale) + grad_output, empty, out, act=3, grad=1, alpha=negative_slope, scale=scale + ) dim = [0] @@ -140,37 +135,24 @@ def forward(ctx, grad_output, out, negative_slope, scale): @staticmethod def backward(ctx, gradgrad_input, gradgrad_bias): - out, = ctx.saved_tensors + (out,) = ctx.saved_tensors # The second order deviation, in fact, contains two parts, while the # the first part is zero. Thus, we direct consider the second part # which is similar with the first order deviation in implementation. gradgrad_out = ext_module.fused_bias_leakyrelu( - gradgrad_input, - gradgrad_bias.to(out.dtype), - out, - act=3, - grad=1, - alpha=ctx.negative_slope, - scale=ctx.scale) + gradgrad_input, gradgrad_bias.to(out.dtype), out, act=3, grad=1, alpha=ctx.negative_slope, scale=ctx.scale + ) return gradgrad_out, None, None, None class FusedBiasLeakyReLUFunction(Function): - @staticmethod def forward(ctx, input, bias, negative_slope, scale): empty = input.new_empty(0) - out = ext_module.fused_bias_leakyrelu( - input, - bias, - empty, - act=3, - grad=0, - alpha=negative_slope, - scale=scale) + out = ext_module.fused_bias_leakyrelu(input, bias, empty, act=3, grad=0, alpha=negative_slope, scale=scale) ctx.save_for_backward(out) ctx.negative_slope = negative_slope ctx.scale = scale @@ -179,10 +161,11 @@ def forward(ctx, input, bias, negative_slope, scale): @staticmethod def backward(ctx, grad_output): - out, = ctx.saved_tensors + (out,) = ctx.saved_tensors grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( - grad_output, out, ctx.negative_slope, ctx.scale) + grad_output, out, ctx.negative_slope, ctx.scale + ) return grad_input, grad_bias, None, None @@ -210,7 +193,7 @@ class FusedBiasLeakyReLU(nn.Module): map. Defaults to 2**0.5. """ - def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): + def __init__(self, num_channels, negative_slope=0.2, scale=2 ** 0.5): super(FusedBiasLeakyReLU, self).__init__() self.bias = nn.Parameter(torch.zeros(num_channels)) @@ -218,11 +201,10 @@ def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): self.scale = scale def forward(self, input): - return fused_bias_leakyrelu(input, self.bias, self.negative_slope, - self.scale) + return fused_bias_leakyrelu(input, self.bias, self.negative_slope, self.scale) -def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): +def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2 ** 0.5): """Fused bias leaky ReLU function. This function is introduced in the StyleGAN2: @@ -250,11 +232,10 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): if not input.is_cuda: return bias_leakyrelu_ref(input, bias, negative_slope, scale) - return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), - negative_slope, scale) + return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), negative_slope, scale) -def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): +def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2 ** 0.5): if bias is not None: assert bias.ndim == 1 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py index f52f1677d8ea..b6aa89d50279 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py @@ -3,16 +3,14 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['gather_points_forward', 'gather_points_backward']) +ext_module = ext_loader.load_ext('_ext', ['gather_points_forward', 'gather_points_backward']) class GatherPoints(Function): """Gather points with given index.""" @staticmethod - def forward(ctx, features: torch.Tensor, - indices: torch.Tensor) -> torch.Tensor: + def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: """ Args: features (Tensor): (B, C, N) features to gather. @@ -28,8 +26,7 @@ def forward(ctx, features: torch.Tensor, _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, npoint) - ext_module.gather_points_forward( - features, indices, output, b=B, c=C, n=N, npoints=npoint) + ext_module.gather_points_forward(features, indices, output, b=B, c=C, n=N, npoints=npoint) ctx.for_backwards = (indices, C, N) if torch.__version__ != 'parrots': @@ -43,14 +40,7 @@ def backward(ctx, grad_out): grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_out_data = grad_out.data.contiguous() - ext_module.gather_points_backward( - grad_out_data, - idx, - grad_features.data, - b=B, - c=C, - n=N, - npoints=npoint) + ext_module.gather_points_backward(grad_out_data, idx, grad_features.data, b=B, c=C, n=N, npoints=npoint) return grad_features, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py index 6c3ec9d758eb..85e8956baa99 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py @@ -9,8 +9,7 @@ from .ball_query import ball_query from .knn import knn -ext_module = ext_loader.load_ext( - '_ext', ['group_points_forward', 'group_points_backward']) +ext_module = ext_loader.load_ext('_ext', ['group_points_forward', 'group_points_backward']) class QueryAndGroup(nn.Module): @@ -36,16 +35,18 @@ class QueryAndGroup(nn.Module): Default: False. """ - def __init__(self, - max_radius, - sample_num, - min_radius=0, - use_xyz=True, - return_grouped_xyz=False, - normalize_xyz=False, - uniform_sample=False, - return_unique_cnt=False, - return_grouped_idx=False): + def __init__( + self, + max_radius, + sample_num, + min_radius=0, + use_xyz=True, + return_grouped_xyz=False, + normalize_xyz=False, + uniform_sample=False, + return_unique_cnt=False, + return_grouped_idx=False, + ): super().__init__() self.max_radius = max_radius self.min_radius = min_radius @@ -57,12 +58,9 @@ def __init__(self, self.return_unique_cnt = return_unique_cnt self.return_grouped_idx = return_grouped_idx if self.return_unique_cnt: - assert self.uniform_sample, \ - 'uniform_sample should be True when ' \ - 'returning the count of unique samples' + assert self.uniform_sample, 'uniform_sample should be True when ' 'returning the count of unique samples' if self.max_radius is None: - assert not self.normalize_xyz, \ - 'can not normalize grouped xyz when max_radius is None' + assert not self.normalize_xyz, 'can not normalize grouped xyz when max_radius is None' def forward(self, points_xyz, center_xyz, features=None): """ @@ -80,8 +78,7 @@ def forward(self, points_xyz, center_xyz, features=None): idx = knn(self.sample_num, points_xyz, center_xyz, False) idx = idx.transpose(1, 2).contiguous() else: - idx = ball_query(self.min_radius, self.max_radius, self.sample_num, - points_xyz, center_xyz) + idx = ball_query(self.min_radius, self.max_radius, self.sample_num, points_xyz, center_xyz) if self.uniform_sample: unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) @@ -90,18 +87,14 @@ def forward(self, points_xyz, center_xyz, features=None): unique_ind = torch.unique(idx[i_batch, i_region, :]) num_unique = unique_ind.shape[0] unique_cnt[i_batch, i_region] = num_unique - sample_ind = torch.randint( - 0, - num_unique, (self.sample_num - num_unique, ), - dtype=torch.long) + sample_ind = torch.randint(0, num_unique, (self.sample_num - num_unique,), dtype=torch.long) all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) idx[i_batch, i_region, :] = all_ind xyz_trans = points_xyz.transpose(1, 2).contiguous() # (B, 3, npoint, sample_num) grouped_xyz = grouping_operation(xyz_trans, idx) - grouped_xyz_diff = grouped_xyz - \ - center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets + grouped_xyz_diff = grouped_xyz - center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets if self.normalize_xyz: grouped_xyz_diff /= self.max_radius @@ -109,13 +102,11 @@ def forward(self, points_xyz, center_xyz, features=None): grouped_features = grouping_operation(features, idx) if self.use_xyz: # (B, C + 3, npoint, sample_num) - new_features = torch.cat([grouped_xyz_diff, grouped_features], - dim=1) + new_features = torch.cat([grouped_xyz_diff, grouped_features], dim=1) else: new_features = grouped_features else: - assert (self.use_xyz - ), 'Cannot have not features and not use xyz as a feature!' + assert self.use_xyz, 'Cannot have not features and not use xyz as a feature!' new_features = grouped_xyz_diff ret = [new_features] @@ -142,10 +133,7 @@ def __init__(self, use_xyz: bool = True): super().__init__() self.use_xyz = use_xyz - def forward(self, - xyz: torch.Tensor, - new_xyz: torch.Tensor, - features: torch.Tensor = None): + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): """ Args: xyz (Tensor): (B, N, 3) xyz coordinates of the features. @@ -160,8 +148,7 @@ def forward(self, grouped_features = features.unsqueeze(2) if self.use_xyz: # (B, 3 + C, 1, N) - new_features = torch.cat([grouped_xyz, grouped_features], - dim=1) + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) else: new_features = grouped_features else: @@ -174,8 +161,7 @@ class GroupingOperation(Function): """Group feature with given index.""" @staticmethod - def forward(ctx, features: torch.Tensor, - indices: torch.Tensor) -> torch.Tensor: + def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: """ Args: features (Tensor): (B, C, N) tensor of features to group. @@ -192,15 +178,13 @@ def forward(ctx, features: torch.Tensor, _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) - ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, - indices, output) + ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, indices, output) ctx.for_backwards = (indices, N) return output @staticmethod - def backward(ctx, - grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Args: grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients @@ -215,9 +199,7 @@ def backward(ctx, grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_out_data = grad_out.data.contiguous() - ext_module.group_points_backward(B, C, N, npoint, nsample, - grad_out_data, idx, - grad_features.data) + ext_module.group_points_backward(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) return grad_features, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py index 29f2e5598ae2..5be5ea46aa91 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py @@ -12,10 +12,12 @@ def get_compiler_version(): def get_compiling_cuda_version(): return parrots.version.cuda + + else: from ..utils import ext_loader - ext_module = ext_loader.load_ext( - '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + ext_module = ext_loader.load_ext('_ext', ['get_compiler_version', 'get_compiling_cuda_version']) def get_compiler_version(): return ext_module.get_compiler_version() @@ -25,9 +27,7 @@ def get_compiling_cuda_version(): def get_onnxruntime_op_path(): - wildcard = os.path.join( - os.path.abspath(os.path.dirname(os.path.dirname(__file__))), - '_ext_ort.*.so') + wildcard = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), '_ext_ort.*.so') paths = glob.glob(wildcard) if len(paths) > 0: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py index 6fc719791903..35a098e50995 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py @@ -3,10 +3,9 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', - 'iou3d_nms_normal_forward' -]) +ext_module = ext_loader.load_ext( + '_ext', ['iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_nms_normal_forward'] +) def boxes_iou_bev(boxes_a, boxes_b): @@ -19,11 +18,9 @@ def boxes_iou_bev(boxes_a, boxes_b): Returns: ans_iou (torch.Tensor): IoU result with shape (M, N). """ - ans_iou = boxes_a.new_zeros( - torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) + ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) - ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), - boxes_b.contiguous(), ans_iou) + ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) return ans_iou diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py index f33578503666..66be24b2c0db 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py @@ -15,11 +15,9 @@ class KNN(Function): """ @staticmethod - def forward(ctx, - k: int, - xyz: torch.Tensor, - center_xyz: torch.Tensor = None, - transposed: bool = False) -> torch.Tensor: + def forward( + ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False + ) -> torch.Tensor: """ Args: k (int): number of nearest neighbors. @@ -50,8 +48,7 @@ def forward(ctx, assert center_xyz.is_contiguous() # [B, npoint, 3] center_xyz_device = center_xyz.get_device() - assert center_xyz_device == xyz.get_device(), \ - 'center_xyz and xyz should be put on the same device' + assert center_xyz_device == xyz.get_device(), 'center_xyz and xyz should be put on the same device' if torch.cuda.current_device() != center_xyz_device: torch.cuda.set_device(center_xyz_device) @@ -61,8 +58,7 @@ def forward(ctx, idx = center_xyz.new_zeros((B, npoint, k)).int() dist2 = center_xyz.new_zeros((B, npoint, k)).float() - ext_module.knn_forward( - xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) + ext_module.knn_forward(xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) # idx shape to [B, k, npoint] idx = idx.transpose(2, 1).contiguous() if torch.__version__ != 'parrots': diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py index cd514cc204c1..c067f11ca8c6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py @@ -9,22 +9,13 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) +ext_module = ext_loader.load_ext('_ext', ['masked_im2col_forward', 'masked_col2im_forward']) class MaskedConv2dFunction(Function): - @staticmethod def symbolic(g, features, mask, weight, bias, padding, stride): - return g.op( - 'mmcv::MMCVMaskedConv2d', - features, - mask, - weight, - bias, - padding_i=padding, - stride_i=stride) + return g.op('mmcv::MMCVMaskedConv2d', features, mask, weight, bias, padding_i=padding, stride_i=stride) @staticmethod def forward(ctx, features, mask, weight, bias, padding=0, stride=1): @@ -34,24 +25,18 @@ def forward(ctx, features, mask, weight, bias, padding=0, stride=1): pad_h, pad_w = _pair(padding) stride_h, stride_w = _pair(stride) if stride_h != 1 or stride_w != 1: - raise ValueError( - 'Stride could not only be 1 in masked_conv2d currently.') + raise ValueError('Stride could not only be 1 in masked_conv2d currently.') out_channel, in_channel, kernel_h, kernel_w = weight.size() batch_size = features.size(0) - out_h = int( - math.floor((features.size(2) + 2 * pad_h - - (kernel_h - 1) - 1) / stride_h + 1)) - out_w = int( - math.floor((features.size(3) + 2 * pad_w - - (kernel_h - 1) - 1) / stride_w + 1)) + out_h = int(math.floor((features.size(2) + 2 * pad_h - (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int(math.floor((features.size(3) + 2 * pad_w - (kernel_h - 1) - 1) / stride_w + 1)) mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) output = features.new_zeros(batch_size, out_channel, out_h, out_w) if mask_inds.numel() > 0: mask_h_idx = mask_inds[:, 0].contiguous() mask_w_idx = mask_inds[:, 1].contiguous() - data_col = features.new_zeros(in_channel * kernel_h * kernel_w, - mask_inds.size(0)) + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, mask_inds.size(0)) ext_module.masked_im2col_forward( features, mask_h_idx, @@ -60,24 +45,19 @@ def forward(ctx, features, mask, weight, bias, padding=0, stride=1): kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, - pad_w=pad_w) + pad_w=pad_w, + ) - masked_output = torch.addmm(1, bias[:, None], 1, - weight.view(out_channel, -1), data_col) + masked_output = torch.addmm(1, bias[:, None], 1, weight.view(out_channel, -1), data_col) ext_module.masked_col2im_forward( - masked_output, - mask_h_idx, - mask_w_idx, - output, - height=out_h, - width=out_w, - channels=out_channel) + masked_output, mask_h_idx, mask_w_idx, output, height=out_h, width=out_w, channels=out_channel + ) return output @staticmethod @once_differentiable def backward(ctx, grad_output): - return (None, ) * 5 + return (None,) * 5 masked_conv2d = MaskedConv2dFunction.apply @@ -90,22 +70,13 @@ class MaskedConv2d(nn.Conv2d): supports the stride parameter to be 1 currently. """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True): - super(MaskedConv2d, - self).__init__(in_channels, out_channels, kernel_size, stride, - padding, dilation, groups, bias) + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): + super(MaskedConv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias + ) def forward(self, input, mask=None): if mask is None: # fallback to the normal Conv2d return super(MaskedConv2d, self).forward(input) else: - return masked_conv2d(input, mask, self.weight, self.bias, - self.padding) + return masked_conv2d(input, mask, self.weight, self.bias, self.padding) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py index 48ca8cc0a8ac..ed7f7ece8e33 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py @@ -40,19 +40,20 @@ class BaseMergeCell(nn.Module): support ['nearest', 'bilinear']. Default: 'nearest'. """ - def __init__(self, - fused_channels=256, - out_channels=256, - with_out_conv=True, - out_conv_cfg=dict( - groups=1, kernel_size=3, padding=1, bias=True), - out_norm_cfg=None, - out_conv_order=('act', 'conv', 'norm'), - with_input1_conv=False, - with_input2_conv=False, - input_conv_cfg=None, - input_norm_cfg=None, - upsample_mode='nearest'): + def __init__( + self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict(groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest', + ): super(BaseMergeCell, self).__init__() assert upsample_mode in ['nearest', 'bilinear'] self.with_out_conv = with_out_conv @@ -62,28 +63,22 @@ def __init__(self, if self.with_out_conv: self.out_conv = ConvModule( - fused_channels, - out_channels, - **out_conv_cfg, - norm_cfg=out_norm_cfg, - order=out_conv_order) - - self.input1_conv = self._build_input_conv( - out_channels, input_conv_cfg, - input_norm_cfg) if with_input1_conv else nn.Sequential() - self.input2_conv = self._build_input_conv( - out_channels, input_conv_cfg, - input_norm_cfg) if with_input2_conv else nn.Sequential() + fused_channels, out_channels, **out_conv_cfg, norm_cfg=out_norm_cfg, order=out_conv_order + ) + + self.input1_conv = ( + self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) + if with_input1_conv + else nn.Sequential() + ) + self.input2_conv = ( + self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) + if with_input2_conv + else nn.Sequential() + ) def _build_input_conv(self, channel, conv_cfg, norm_cfg): - return ConvModule( - channel, - channel, - 3, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - bias=True) + return ConvModule(channel, channel, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True) @abstractmethod def _binary_op(self, x1, x2): @@ -119,7 +114,6 @@ def forward(self, x1, x2, out_size=None): class SumCell(BaseMergeCell): - def __init__(self, in_channels, out_channels, **kwargs): super(SumCell, self).__init__(in_channels, out_channels, **kwargs) @@ -128,10 +122,8 @@ def _binary_op(self, x1, x2): class ConcatCell(BaseMergeCell): - def __init__(self, in_channels, out_channels, **kwargs): - super(ConcatCell, self).__init__(in_channels * 2, out_channels, - **kwargs) + super(ConcatCell, self).__init__(in_channels * 2, out_channels, **kwargs) def _binary_op(self, x1, x2): ret = torch.cat([x1, x2], dim=1) @@ -139,7 +131,6 @@ def _binary_op(self, x1, x2): class GlobalPoolingCell(BaseMergeCell): - def __init__(self, in_channels=None, out_channels=None, **kwargs): super().__init__(in_channels, out_channels, **kwargs) self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py index 3f1ceb073285..f70b7d356cf5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py @@ -11,16 +11,12 @@ from ..cnn import CONV_LAYERS from ..utils import ext_loader, print_log -ext_module = ext_loader.load_ext( - '_ext', - ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) +ext_module = ext_loader.load_ext('_ext', ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) class ModulatedDeformConv2dFunction(Function): - @staticmethod - def symbolic(g, input, offset, mask, weight, bias, stride, padding, - dilation, groups, deform_groups): + def symbolic(g, input, offset, mask, weight, bias, stride, padding, dilation, groups, deform_groups): input_tensors = [input, offset, mask, weight] if bias is not None: input_tensors.append(bias) @@ -31,24 +27,18 @@ def symbolic(g, input, offset, mask, weight, bias, stride, padding, padding_i=padding, dilation_i=dilation, groups_i=groups, - deform_groups_i=deform_groups) + deform_groups_i=deform_groups, + ) @staticmethod - def forward(ctx, - input, - offset, - mask, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - deform_groups=1): + def forward( + ctx, input, offset, mask, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, deform_groups=1 + ): if input is not None and input.dim() != 4: raise ValueError( f'Expected 4D tensor as input, got {input.dim()}D tensor \ - instead.') + instead.' + ) ctx.stride = _pair(stride) ctx.padding = _pair(padding) ctx.dilation = _pair(dilation) @@ -67,8 +57,7 @@ def forward(ctx, input = input.type_as(offset) weight = weight.type_as(input) ctx.save_for_backward(input, offset, mask, weight, bias) - output = input.new_empty( - ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + output = input.new_empty(ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) ctx._bufs = [input.new_empty(0), input.new_empty(0)] ext_module.modulated_deform_conv_forward( input, @@ -89,7 +78,8 @@ def forward(ctx, dilation_w=ctx.dilation[1], group=ctx.groups, deformable_group=ctx.deform_groups, - with_bias=ctx.with_bias) + with_bias=ctx.with_bias, + ) return output @staticmethod @@ -126,12 +116,12 @@ def backward(ctx, grad_output): dilation_w=ctx.dilation[1], group=ctx.groups, deformable_group=ctx.deform_groups, - with_bias=ctx.with_bias) + with_bias=ctx.with_bias, + ) if not ctx.with_bias: grad_bias = None - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, - None, None, None, None, None) + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) @staticmethod def _output_size(ctx, input, weight): @@ -142,11 +132,11 @@ def _output_size(ctx, input, weight): pad = ctx.padding[d] kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 stride_ = ctx.stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) if not all(map(lambda s: s > 0, output_size)): raise ValueError( - 'convolution input is too small (output would be ' + - 'x'.join(map(str, output_size)) + ')') + 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' + ) return output_size @@ -154,19 +144,19 @@ def _output_size(ctx, input, weight): class ModulatedDeformConv2d(nn.Module): - - @deprecated_api_warning({'deformable_groups': 'deform_groups'}, - cls_name='ModulatedDeformConv2d') - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deform_groups=1, - bias=True): + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='ModulatedDeformConv2d') + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True, + ): super(ModulatedDeformConv2d, self).__init__() self.in_channels = in_channels self.out_channels = out_channels @@ -180,9 +170,7 @@ def __init__(self, self.transposed = False self.output_padding = _single(0) - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, - *self.kernel_size)) + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = nn.Parameter(torch.Tensor(out_channels)) else: @@ -193,16 +181,24 @@ def init_weights(self): n = self.in_channels for k in self.kernel_size: n *= k - stdv = 1. / math.sqrt(n) + stdv = 1.0 / math.sqrt(n) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.zero_() def forward(self, x, offset, mask): - return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, - self.stride, self.padding, - self.dilation, self.groups, - self.deform_groups) + return modulated_deform_conv2d( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + ) @CONV_LAYERS.register_module('DCNv2') @@ -234,7 +230,8 @@ def __init__(self, *args, **kwargs): stride=self.stride, padding=self.padding, dilation=self.dilation, - bias=True) + bias=True, + ) self.init_weights() def init_weights(self): @@ -248,35 +245,36 @@ def forward(self, x): o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) - return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, - self.stride, self.padding, - self.dilation, self.groups, - self.deform_groups) + return modulated_deform_conv2d( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + ) - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, ModulatedDeformConvPack # loads previous benchmark models. - if (prefix + 'conv_offset.weight' not in state_dict - and prefix[:-1] + '_offset.weight' in state_dict): - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( - prefix[:-1] + '_offset.weight') - if (prefix + 'conv_offset.bias' not in state_dict - and prefix[:-1] + '_offset.bias' in state_dict): - state_dict[prefix + - 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + - '_offset.bias') + if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') + if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: + state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: - print_log( - f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' - 'version 2.', - logger='root') + print_log(f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') - super()._load_from_state_dict(state_dict, prefix, local_metadata, - strict, missing_keys, unexpected_keys, - error_msgs) + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py index ed3f26f61c22..e4ef4f9ab2d7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py @@ -13,15 +13,14 @@ from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import BaseModule from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) +ext_module = ext_loader.load_ext('_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) class MultiScaleDeformableAttnFunction(Function): - @staticmethod - def forward(ctx, value, value_spatial_shapes, value_level_start_index, - sampling_locations, attention_weights, im2col_step): + def forward( + ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step + ): """GPU version of multi-scale deformable attention. Args: @@ -50,10 +49,11 @@ def forward(ctx, value, value_spatial_shapes, value_level_start_index, value_level_start_index, sampling_locations, attention_weights, - im2col_step=ctx.im2col_step) - ctx.save_for_backward(value, value_spatial_shapes, - value_level_start_index, sampling_locations, - attention_weights) + im2col_step=ctx.im2col_step, + ) + ctx.save_for_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights + ) return output @staticmethod @@ -69,8 +69,7 @@ def backward(ctx, grad_output): Tuple[Tensor]: Gradient of input tensors in forward. """ - value, value_spatial_shapes, value_level_start_index,\ - sampling_locations, attention_weights = ctx.saved_tensors + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors grad_value = torch.zeros_like(value) grad_sampling_loc = torch.zeros_like(sampling_locations) grad_attn_weight = torch.zeros_like(attention_weights) @@ -85,14 +84,13 @@ def backward(ctx, grad_output): grad_value, grad_sampling_loc, grad_attn_weight, - im2col_step=ctx.im2col_step) + im2col_step=ctx.im2col_step, + ) - return grad_value, None, None, \ - grad_sampling_loc, grad_attn_weight, None + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None -def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, - sampling_locations, attention_weights): +def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): """CPU version of multi-scale deformable attention. Args: @@ -114,10 +112,8 @@ def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, """ bs, _, num_heads, embed_dims = value.shape - _, num_queries, num_heads, num_levels, num_points, _ =\ - sampling_locations.shape - value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], - dim=1) + _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) sampling_grids = 2 * sampling_locations - 1 sampling_value_list = [] for level, (H_, W_) in enumerate(value_spatial_shapes): @@ -125,29 +121,27 @@ def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, # bs, H_*W_, num_heads*embed_dims -> # bs, num_heads*embed_dims, H_*W_ -> # bs*num_heads, embed_dims, H_, W_ - value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape( - bs * num_heads, embed_dims, H_, W_) + value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_) # bs, num_queries, num_heads, num_points, 2 -> # bs, num_heads, num_queries, num_points, 2 -> # bs*num_heads, num_queries, num_points, 2 - sampling_grid_l_ = sampling_grids[:, :, :, - level].transpose(1, 2).flatten(0, 1) + sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) # bs*num_heads, embed_dims, num_queries, num_points sampling_value_l_ = F.grid_sample( - value_l_, - sampling_grid_l_, - mode='bilinear', - padding_mode='zeros', - align_corners=False) + value_l_, sampling_grid_l_, mode='bilinear', padding_mode='zeros', align_corners=False + ) sampling_value_list.append(sampling_value_l_) # (bs, num_queries, num_heads, num_levels, num_points) -> # (bs, num_heads, num_queries, num_levels, num_points) -> # (bs, num_heads, 1, num_queries, num_levels*num_points) attention_weights = attention_weights.transpose(1, 2).reshape( - bs * num_heads, 1, num_queries, num_levels * num_points) - output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * - attention_weights).sum(-1).view(bs, num_heads * embed_dims, - num_queries) + bs * num_heads, 1, num_queries, num_levels * num_points + ) + output = ( + (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights) + .sum(-1) + .view(bs, num_heads * embed_dims, num_queries) + ) return output.transpose(1, 2).contiguous() @@ -179,20 +173,21 @@ class MultiScaleDeformableAttention(BaseModule): Default: None. """ - def __init__(self, - embed_dims=256, - num_heads=8, - num_levels=4, - num_points=4, - im2col_step=64, - dropout=0.1, - batch_first=False, - norm_cfg=None, - init_cfg=None): + def __init__( + self, + embed_dims=256, + num_heads=8, + num_levels=4, + num_points=4, + im2col_step=64, + dropout=0.1, + batch_first=False, + norm_cfg=None, + init_cfg=None, + ): super().__init__(init_cfg) if embed_dims % num_heads != 0: - raise ValueError(f'embed_dims must be divisible by num_heads, ' - f'but got {embed_dims} and {num_heads}') + raise ValueError(f'embed_dims must be divisible by num_heads, ' f'but got {embed_dims} and {num_heads}') dim_per_head = embed_dims // num_heads self.norm_cfg = norm_cfg self.dropout = nn.Dropout(dropout) @@ -202,9 +197,7 @@ def __init__(self, # which is more efficient in the CUDA implementation def _is_power_of_2(n): if (not isinstance(n, int)) or (n < 0): - raise ValueError( - 'invalid input for _is_power_of_2: {} (type: {})'.format( - n, type(n))) + raise ValueError('invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n))) return (n & (n - 1) == 0) and n != 0 if not _is_power_of_2(dim_per_head): @@ -212,54 +205,53 @@ def _is_power_of_2(n): "You'd better set embed_dims in " 'MultiScaleDeformAttention to make ' 'the dimension of each attention head a power of 2 ' - 'which is more efficient in our CUDA implementation.') + 'which is more efficient in our CUDA implementation.' + ) self.im2col_step = im2col_step self.embed_dims = embed_dims self.num_levels = num_levels self.num_heads = num_heads self.num_points = num_points - self.sampling_offsets = nn.Linear( - embed_dims, num_heads * num_levels * num_points * 2) - self.attention_weights = nn.Linear(embed_dims, - num_heads * num_levels * num_points) + self.sampling_offsets = nn.Linear(embed_dims, num_heads * num_levels * num_points * 2) + self.attention_weights = nn.Linear(embed_dims, num_heads * num_levels * num_points) self.value_proj = nn.Linear(embed_dims, embed_dims) self.output_proj = nn.Linear(embed_dims, embed_dims) self.init_weights() def init_weights(self): """Default initialization for Parameters of Module.""" - constant_init(self.sampling_offsets, 0.) - thetas = torch.arange( - self.num_heads, - dtype=torch.float32) * (2.0 * math.pi / self.num_heads) + constant_init(self.sampling_offsets, 0.0) + thetas = torch.arange(self.num_heads, dtype=torch.float32) * (2.0 * math.pi / self.num_heads) grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) - grid_init = (grid_init / - grid_init.abs().max(-1, keepdim=True)[0]).view( - self.num_heads, 1, 1, - 2).repeat(1, self.num_levels, self.num_points, 1) + grid_init = ( + (grid_init / grid_init.abs().max(-1, keepdim=True)[0]) + .view(self.num_heads, 1, 1, 2) + .repeat(1, self.num_levels, self.num_points, 1) + ) for i in range(self.num_points): grid_init[:, :, i, :] *= i + 1 self.sampling_offsets.bias.data = grid_init.view(-1) - constant_init(self.attention_weights, val=0., bias=0.) - xavier_init(self.value_proj, distribution='uniform', bias=0.) - xavier_init(self.output_proj, distribution='uniform', bias=0.) + constant_init(self.attention_weights, val=0.0, bias=0.0) + xavier_init(self.value_proj, distribution='uniform', bias=0.0) + xavier_init(self.output_proj, distribution='uniform', bias=0.0) self._is_init = True - @deprecated_api_warning({'residual': 'identity'}, - cls_name='MultiScaleDeformableAttention') - def forward(self, - query, - key=None, - value=None, - identity=None, - query_pos=None, - key_padding_mask=None, - reference_points=None, - spatial_shapes=None, - level_start_index=None, - **kwargs): + @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiScaleDeformableAttention') + def forward( + self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_padding_mask=None, + reference_points=None, + spatial_shapes=None, + level_start_index=None, + **kwargs, + ): """Forward Function of MultiScaleDeformAttention. Args: @@ -317,37 +309,35 @@ def forward(self, value = value.masked_fill(key_padding_mask[..., None], 0.0) value = value.view(bs, num_value, self.num_heads, -1) sampling_offsets = self.sampling_offsets(query).view( - bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) + bs, num_query, self.num_heads, self.num_levels, self.num_points, 2 + ) attention_weights = self.attention_weights(query).view( - bs, num_query, self.num_heads, self.num_levels * self.num_points) + bs, num_query, self.num_heads, self.num_levels * self.num_points + ) attention_weights = attention_weights.softmax(-1) - attention_weights = attention_weights.view(bs, num_query, - self.num_heads, - self.num_levels, - self.num_points) + attention_weights = attention_weights.view(bs, num_query, self.num_heads, self.num_levels, self.num_points) if reference_points.shape[-1] == 2: - offset_normalizer = torch.stack( - [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) - sampling_locations = reference_points[:, :, None, :, None, :] \ - + sampling_offsets \ - / offset_normalizer[None, None, None, :, None, :] + offset_normalizer = torch.stack([spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) + sampling_locations = ( + reference_points[:, :, None, :, None, :] + + sampling_offsets / offset_normalizer[None, None, None, :, None, :] + ) elif reference_points.shape[-1] == 4: - sampling_locations = reference_points[:, :, None, :, None, :2] \ - + sampling_offsets / self.num_points \ - * reference_points[:, :, None, :, None, 2:] \ - * 0.5 + sampling_locations = ( + reference_points[:, :, None, :, None, :2] + + sampling_offsets / self.num_points * reference_points[:, :, None, :, None, 2:] * 0.5 + ) else: raise ValueError( - f'Last dim of reference_points must be' - f' 2 or 4, but get {reference_points.shape[-1]} instead.') + f'Last dim of reference_points must be' f' 2 or 4, but get {reference_points.shape[-1]} instead.' + ) if torch.cuda.is_available() and value.is_cuda: output = MultiScaleDeformableAttnFunction.apply( - value, spatial_shapes, level_start_index, sampling_locations, - attention_weights, self.im2col_step) + value, spatial_shapes, level_start_index, sampling_locations, attention_weights, self.im2col_step + ) else: - output = multi_scale_deformable_attn_pytorch( - value, spatial_shapes, sampling_locations, attention_weights) + output = multi_scale_deformable_attn_pytorch(value, spatial_shapes, sampling_locations, attention_weights) output = self.output_proj(output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py index 00f0b004ff55..4914c139427b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py @@ -6,25 +6,20 @@ from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) +ext_module = ext_loader.load_ext('_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) # This function is modified from: https://github.com/pytorch/vision/ class NMSop(torch.autograd.Function): - @staticmethod - def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, - max_num): + def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): is_filtering_by_score = score_threshold > 0 if is_filtering_by_score: valid_mask = scores > score_threshold bboxes, scores = bboxes[valid_mask], scores[valid_mask] - valid_inds = torch.nonzero( - valid_mask, as_tuple=False).squeeze(dim=1) + valid_inds = torch.nonzero(valid_mask, as_tuple=False).squeeze(dim=1) - inds = ext_module.nms( - bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) if max_num > 0: inds = inds[:max_num] @@ -33,19 +28,16 @@ def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, return inds @staticmethod - def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, - max_num): + def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, max_num): from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() # TensorRT nms plugin is aligned with original nms in ONNXRuntime is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' if has_custom_op and (not is_trt_backend): return g.op( - 'mmcv::NonMaxSuppression', - bboxes, - scores, - iou_threshold_f=float(iou_threshold), - offset_i=int(offset)) + 'mmcv::NonMaxSuppression', bboxes, scores, iou_threshold_f=float(iou_threshold), offset_i=int(offset) + ) else: from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze from ..onnx.onnx_utils.symbolic_helper import _size_helper @@ -54,36 +46,20 @@ def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) if max_num > 0: - max_num = g.op( - 'Constant', - value_t=torch.tensor(max_num, dtype=torch.long)) + max_num = g.op('Constant', value_t=torch.tensor(max_num, dtype=torch.long)) else: dim = g.op('Constant', value_t=torch.tensor(0)) max_num = _size_helper(g, bboxes, dim) max_output_per_class = max_num - iou_threshold = g.op( - 'Constant', - value_t=torch.tensor([iou_threshold], dtype=torch.float)) - score_threshold = g.op( - 'Constant', - value_t=torch.tensor([score_threshold], dtype=torch.float)) - nms_out = g.op('NonMaxSuppression', boxes, scores, - max_output_per_class, iou_threshold, - score_threshold) - return squeeze( - g, - select( - g, nms_out, 1, - g.op( - 'Constant', - value_t=torch.tensor([2], dtype=torch.long))), 1) + iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float)) + score_threshold = g.op('Constant', value_t=torch.tensor([score_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold, score_threshold) + return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1) class SoftNMSop(torch.autograd.Function): - @staticmethod - def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, - offset): + def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, offset): dets = boxes.new_empty((boxes.size(0), 5), device='cpu') inds = ext_module.softnms( boxes.cpu(), @@ -93,13 +69,14 @@ def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, sigma=float(sigma), min_score=float(min_score), method=int(method), - offset=int(offset)) + offset=int(offset), + ) return dets, inds @staticmethod - def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, - offset): + def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, offset): from packaging import version + assert version.parse(torch.__version__) >= version.parse('1.7.0') nms_out = g.op( 'mmcv::SoftNonMaxSuppression', @@ -110,7 +87,8 @@ def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, min_score_f=float(min_score), method_i=int(method), offset_i=int(offset), - outputs=2) + outputs=2, + ) return nms_out @@ -162,14 +140,10 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): if torch.__version__ == 'parrots': indata_list = [boxes, scores] - indata_dict = { - 'iou_threshold': float(iou_threshold), - 'offset': int(offset) - } + indata_dict = {'iou_threshold': float(iou_threshold), 'offset': int(offset)} inds = ext_module.nms(*indata_list, **indata_dict) else: - inds = NMSop.apply(boxes, scores, iou_threshold, offset, - score_threshold, max_num) + inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold, max_num) dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) if is_numpy: dets = dets.cpu().numpy() @@ -178,13 +152,7 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): @deprecated_api_warning({'iou_thr': 'iou_threshold'}) -def soft_nms(boxes, - scores, - iou_threshold=0.3, - sigma=0.5, - min_score=1e-3, - method='linear', - offset=0): +def soft_nms(boxes, scores, iou_threshold=0.3, sigma=0.5, min_score=1e-3, method='linear', offset=0): """Dispatch to only CPU Soft NMS implementations. The input can be either a torch tensor or numpy array. @@ -238,16 +206,21 @@ def soft_nms(boxes, 'sigma': float(sigma), 'min_score': min_score, 'method': method_dict[method], - 'offset': int(offset) + 'offset': int(offset), } inds = ext_module.softnms(*indata_list, **indata_dict) else: - dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(), - float(iou_threshold), float(sigma), - float(min_score), method_dict[method], - int(offset)) + dets, inds = SoftNMSop.apply( + boxes.cpu(), + scores.cpu(), + float(iou_threshold), + float(sigma), + float(min_score), + method_dict[method], + int(offset), + ) - dets = dets[:inds.size(0)] + dets = dets[: inds.size(0)] if is_numpy: dets = dets.cpu().numpy() @@ -355,8 +328,7 @@ def nms_match(dets, iou_threshold): if dets.shape[0] == 0: matched = [] else: - assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ - f'but get {dets.shape}' + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' f'but get {dets.shape}' if isinstance(dets, torch.Tensor): dets_t = dets.detach().cpu() else: @@ -403,15 +375,9 @@ def nms_rotated(dets, scores, iou_threshold, labels=None): if torch.__version__ == 'parrots': keep_inds = ext_module.nms_rotated( - dets_wl, - scores, - order, - dets_sorted, - iou_threshold=iou_threshold, - multi_label=multi_label) + dets_wl, scores, order, dets_sorted, iou_threshold=iou_threshold, multi_label=multi_label + ) else: - keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, - iou_threshold, multi_label) - dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), - dim=1) + keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, iou_threshold, multi_label) + dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), dim=1) return dets, keep_inds diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py index 2143c75f835a..228769d37089 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py @@ -7,8 +7,7 @@ ext_module = ext_loader.load_ext('_ext', ['pixel_group']) -def pixel_group(score, mask, embedding, kernel_label, kernel_contour, - kernel_region_num, distance_threshold): +def pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold): """Group pixels into text instances, which is widely used text detection methods. @@ -56,20 +55,17 @@ def pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_label, kernel_contour, kernel_region_num=kernel_region_num, - distance_threshold=distance_threshold) + distance_threshold=distance_threshold, + ) label = label.tolist() label = label[0] list_index = kernel_region_num pixel_assignment = [] for x in range(kernel_region_num): - pixel_assignment.append( - np.array( - label[list_index:list_index + int(label[x])], - dtype=np.float)) + pixel_assignment.append(np.array(label[list_index : list_index + int(label[x])], dtype=np.float)) list_index = list_index + int(label[x]) else: - pixel_assignment = ext_module.pixel_group(score, mask, embedding, - kernel_label, kernel_contour, - kernel_region_num, - distance_threshold) + pixel_assignment = ext_module.pixel_group( + score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold + ) return pixel_assignment diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py index 37886cdb1a0d..204ff1c74e12 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py @@ -86,9 +86,9 @@ def bilinear_grid_sample(im, grid, align_corners=False): def is_in_onnx_export_without_custom_ops(): from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() - return torch.onnx.is_in_onnx_export( - ) and not osp.exists(ort_custom_op_path) + return torch.onnx.is_in_onnx_export() and not osp.exists(ort_custom_op_path) def normalize(grid): @@ -127,9 +127,8 @@ def generate_grid(num_grid, size, device): contains coordinates for the regular grids. """ - affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) - grid = F.affine_grid( - affine_trans, torch.Size((1, 1, *size)), align_corners=False) + affine_trans = torch.tensor([[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]], device=device) + grid = F.affine_grid(affine_trans, torch.Size((1, 1, *size)), align_corners=False) grid = normalize(grid) return grid.view(1, -1, 2).expand(num_grid, -1, -1) @@ -175,15 +174,13 @@ def get_shape_from_feature_map(x): torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) """ if torch.onnx.is_in_onnx_export(): - img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( - x.device).float() + img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(x.device).float() else: - img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( - x.device).float() + img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(x.device).float() return img_shape -def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): +def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.0): """Convert image based absolute point coordinates to image based relative coordinates for sampling. @@ -198,14 +195,11 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): shape (N, P, 2) """ - assert (isinstance(img, tuple) and len(img) == 2) or \ - (isinstance(img, torch.Tensor) and len(img.shape) == 4) + assert (isinstance(img, tuple) and len(img) == 2) or (isinstance(img, torch.Tensor) and len(img.shape) == 4) if isinstance(img, tuple): h, w = img - scale = torch.tensor([w, h], - dtype=torch.float, - device=abs_img_points.device) + scale = torch.tensor([w, h], dtype=torch.float, device=abs_img_points.device) scale = scale.view(1, 1, 2) else: scale = get_shape_from_feature_map(img) @@ -213,10 +207,7 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): return abs_img_points / scale * spatial_scale -def rel_roi_point_to_rel_img_point(rois, - rel_roi_points, - img, - spatial_scale=1.): +def rel_roi_point_to_rel_img_point(rois, rel_roi_points, img, spatial_scale=1.0): """Convert roi based relative point coordinates to image based absolute point coordinates. @@ -233,8 +224,7 @@ def rel_roi_point_to_rel_img_point(rois, """ abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) - rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, - spatial_scale) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, spatial_scale) return rel_img_point @@ -263,18 +253,15 @@ def point_sample(input, points, align_corners=False, **kwargs): # If custom ops for onnx runtime not compiled use python # implementation of grid_sample function to make onnx graph # with supported nodes - output = bilinear_grid_sample( - input, denormalize(points), align_corners=align_corners) + output = bilinear_grid_sample(input, denormalize(points), align_corners=align_corners) else: - output = F.grid_sample( - input, denormalize(points), align_corners=align_corners, **kwargs) + output = F.grid_sample(input, denormalize(points), align_corners=align_corners, **kwargs) if add_dim: output = output.squeeze(3) return output class SimpleRoIAlign(nn.Module): - def __init__(self, output_size, spatial_scale, aligned=True): """Simple RoI align in PointRend, faster than standard RoIAlign. @@ -296,29 +283,24 @@ def __init__(self, output_size, spatial_scale, aligned=True): def forward(self, features, rois): num_imgs = features.size(0) num_rois = rois.size(0) - rel_roi_points = generate_grid( - num_rois, self.output_size, device=rois.device) + rel_roi_points = generate_grid(num_rois, self.output_size, device=rois.device) if torch.onnx.is_in_onnx_export(): - rel_img_points = rel_roi_point_to_rel_img_point( - rois, rel_roi_points, features, self.spatial_scale) - rel_img_points = rel_img_points.reshape(num_imgs, -1, - *rel_img_points.shape[1:]) - point_feats = point_sample( - features, rel_img_points, align_corners=not self.aligned) + rel_img_points = rel_roi_point_to_rel_img_point(rois, rel_roi_points, features, self.spatial_scale) + rel_img_points = rel_img_points.reshape(num_imgs, -1, *rel_img_points.shape[1:]) + point_feats = point_sample(features, rel_img_points, align_corners=not self.aligned) point_feats = point_feats.transpose(1, 2) else: point_feats = [] for batch_ind in range(num_imgs): # unravel batch dim feat = features[batch_ind].unsqueeze(0) - inds = (rois[:, 0].long() == batch_ind) + inds = rois[:, 0].long() == batch_ind if inds.any(): rel_img_points = rel_roi_point_to_rel_img_point( - rois[inds], rel_roi_points[inds], feat, - self.spatial_scale).unsqueeze(0) - point_feat = point_sample( - feat, rel_img_points, align_corners=not self.aligned) + rois[inds], rel_roi_points[inds], feat, self.spatial_scale + ).unsqueeze(0) + point_feat = point_sample(feat, rel_img_points, align_corners=not self.aligned) point_feat = point_feat.squeeze(0).transpose(0, 1) point_feats.append(point_feat) @@ -331,6 +313,5 @@ def forward(self, features, rois): def __repr__(self): format_str = self.__class__.__name__ - format_str += '(output_size={}, spatial_scale={}'.format( - self.output_size, self.spatial_scale) + format_str += '(output_size={}, spatial_scale={}'.format(self.output_size, self.spatial_scale) return format_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py index 4003173a5305..bf01e1d7746e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py @@ -2,10 +2,9 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', - 'points_in_boxes_all_forward' -]) +ext_module = ext_loader.load_ext( + '_ext', ['points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', 'points_in_boxes_all_forward'] +) def points_in_boxes_part(points, boxes): @@ -20,19 +19,14 @@ def points_in_boxes_part(points, boxes): Returns: box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 """ - assert points.shape[0] == boxes.shape[0], \ - 'Points and boxes should have the same batch size, ' \ - f'but got {points.shape[0]} and {boxes.shape[0]}' - assert boxes.shape[2] == 7, \ - 'boxes dimension should be 7, ' \ - f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, \ - 'points dimension should be 3, ' \ - f'but got unexpected shape {points.shape[2]}' + assert points.shape[0] == boxes.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape - box_idxs_of_pts = points.new_zeros((batch_size, num_points), - dtype=torch.int).fill_(-1) + box_idxs_of_pts = points.new_zeros((batch_size, num_points), dtype=torch.int).fill_(-1) # If manually put the tensor 'points' or 'boxes' on a device # which is not the current device, some temporary variables @@ -43,14 +37,11 @@ def points_in_boxes_part(points, boxes): # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 # for the incorrect output before the fix. points_device = points.get_device() - assert points_device == boxes.get_device(), \ - 'Points and boxes should be put on the same device' + assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) - ext_module.points_in_boxes_part_forward(boxes.contiguous(), - points.contiguous(), - box_idxs_of_pts) + ext_module.points_in_boxes_part_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) return box_idxs_of_pts @@ -69,24 +60,19 @@ def points_in_boxes_cpu(points, boxes): Returns: box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. """ - assert points.shape[0] == boxes.shape[0], \ - 'Points and boxes should have the same batch size, ' \ - f'but got {points.shape[0]} and {boxes.shape[0]}' - assert boxes.shape[2] == 7, \ - 'boxes dimension should be 7, ' \ - f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, \ - 'points dimension should be 3, ' \ - f'but got unexpected shape {points.shape[2]}' + assert points.shape[0] == boxes.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape num_boxes = boxes.shape[1] - point_indices = points.new_zeros((batch_size, num_boxes, num_points), - dtype=torch.int) + point_indices = points.new_zeros((batch_size, num_boxes, num_points), dtype=torch.int) for b in range(batch_size): - ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(), - points[b].float().contiguous(), - point_indices[b]) + ext_module.points_in_boxes_cpu_forward( + boxes[b].float().contiguous(), points[b].float().contiguous(), point_indices[b] + ) point_indices = point_indices.transpose(1, 2) return point_indices @@ -104,30 +90,22 @@ def points_in_boxes_all(points, boxes): Returns: box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. """ - assert boxes.shape[0] == points.shape[0], \ - 'Points and boxes should have the same batch size, ' \ - f'but got {boxes.shape[0]} and {boxes.shape[0]}' - assert boxes.shape[2] == 7, \ - 'boxes dimension should be 7, ' \ - f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, \ - 'points dimension should be 3, ' \ - f'but got unexpected shape {points.shape[2]}' + assert boxes.shape[0] == points.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {boxes.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape num_boxes = boxes.shape[1] - box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), - dtype=torch.int).fill_(0) + box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), dtype=torch.int).fill_(0) # Same reason as line 25-32 points_device = points.get_device() - assert points_device == boxes.get_device(), \ - 'Points and boxes should be put on the same device' + assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) - ext_module.points_in_boxes_all_forward(boxes.contiguous(), - points.contiguous(), - box_idxs_of_pts) + ext_module.points_in_boxes_all_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) return box_idxs_of_pts diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py index bf38beab85a1..5e6d0078813f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py @@ -4,8 +4,8 @@ from torch import nn as nn from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import force_fp32 -from .furthest_point_sample import (furthest_point_sample, - furthest_point_sample_with_dist) + +from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist def calc_square_dist(point_feat_a, point_feat_b, norm=True): @@ -54,7 +54,8 @@ def get_sampler_cls(sampler_type): except KeyError: raise KeyError( f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ - {sampler_type}') + {sampler_type}' + ) class PointsSampler(nn.Module): @@ -71,16 +72,14 @@ class PointsSampler(nn.Module): Range of points to apply FPS. Default: [-1]. """ - def __init__(self, - num_point: List[int], - fps_mod_list: List[str] = ['D-FPS'], - fps_sample_range_list: List[int] = [-1]): + def __init__( + self, num_point: List[int], fps_mod_list: List[str] = ['D-FPS'], fps_sample_range_list: List[int] = [-1] + ): super().__init__() # FPS would be applied to different fps_mod in the list, # so the length of the num_point should be equal to # fps_mod_list and fps_sample_range_list. - assert len(num_point) == len(fps_mod_list) == len( - fps_sample_range_list) + assert len(num_point) == len(fps_mod_list) == len(fps_sample_range_list) self.num_point = num_point self.fps_sample_range_list = fps_sample_range_list self.samplers = nn.ModuleList() @@ -101,8 +100,7 @@ def forward(self, points_xyz, features): indices = [] last_fps_end_index = 0 - for fps_sample_range, sampler, npoint in zip( - self.fps_sample_range_list, self.samplers, self.num_point): + for fps_sample_range, sampler, npoint in zip(self.fps_sample_range_list, self.samplers, self.num_point): assert fps_sample_range < points_xyz.shape[1] if fps_sample_range == -1: @@ -112,16 +110,13 @@ def forward(self, points_xyz, features): else: sample_features = None else: - sample_points_xyz = \ - points_xyz[:, last_fps_end_index:fps_sample_range] + sample_points_xyz = points_xyz[:, last_fps_end_index:fps_sample_range] if features is not None: - sample_features = features[:, :, last_fps_end_index: - fps_sample_range] + sample_features = features[:, :, last_fps_end_index:fps_sample_range] else: sample_features = None - fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, - npoint) + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, npoint) indices.append(fps_idx + last_fps_end_index) last_fps_end_index += fps_sample_range @@ -150,11 +145,9 @@ def __init__(self): def forward(self, points, features, npoint): """Sampling points with F-FPS.""" - assert features is not None, \ - 'feature input to FFPS_Sampler should not be None' + assert features is not None, 'feature input to FFPS_Sampler should not be None' features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) - features_dist = calc_square_dist( - features_for_fps, features_for_fps, norm=False) + features_dist = calc_square_dist(features_for_fps, features_for_fps, norm=False) fps_idx = furthest_point_sample_with_dist(features_dist, npoint) return fps_idx @@ -167,8 +160,7 @@ def __init__(self): def forward(self, points, features, npoint): """Sampling points with FS_Sampling.""" - assert features is not None, \ - 'feature input to FS_Sampler should not be None' + assert features is not None, 'feature input to FS_Sampler should not be None' ffps_sampler = FFPSSampler() dfps_sampler = DFPSSampler() fps_idx_ffps = ffps_sampler(points, features, npoint) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py index cdf14e62b50e..e49546cb9059 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py @@ -5,19 +5,13 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', - ['psamask_forward', 'psamask_backward']) +ext_module = ext_loader.load_ext('_ext', ['psamask_forward', 'psamask_backward']) class PSAMaskFunction(Function): - @staticmethod def symbolic(g, input, psa_type, mask_size): - return g.op( - 'mmcv::MMCVPSAMask', - input, - psa_type_i=psa_type, - mask_size_i=mask_size) + return g.op('mmcv::MMCVPSAMask', input, psa_type_i=psa_type, mask_size_i=mask_size) @staticmethod def forward(ctx, input, psa_type, mask_size): @@ -28,8 +22,7 @@ def forward(ctx, input, psa_type, mask_size): h_mask, w_mask = ctx.mask_size batch_size, channels, h_feature, w_feature = input.size() assert channels == h_mask * w_mask - output = input.new_zeros( - (batch_size, h_feature * w_feature, h_feature, w_feature)) + output = input.new_zeros((batch_size, h_feature * w_feature, h_feature, w_feature)) ext_module.psamask_forward( input, @@ -41,7 +34,8 @@ def forward(ctx, input, psa_type, mask_size): h_mask=h_mask, w_mask=w_mask, half_h_mask=(h_mask - 1) // 2, - half_w_mask=(w_mask - 1) // 2) + half_w_mask=(w_mask - 1) // 2, + ) return output @staticmethod @@ -50,8 +44,7 @@ def backward(ctx, grad_output): psa_type = ctx.psa_type h_mask, w_mask = ctx.mask_size batch_size, channels, h_feature, w_feature = input.size() - grad_input = grad_output.new_zeros( - (batch_size, channels, h_feature, w_feature)) + grad_input = grad_output.new_zeros((batch_size, channels, h_feature, w_feature)) ext_module.psamask_backward( grad_output, grad_input, @@ -62,7 +55,8 @@ def backward(ctx, grad_output): h_mask=h_mask, w_mask=w_mask, half_h_mask=(h_mask - 1) // 2, - half_w_mask=(w_mask - 1) // 2) + half_w_mask=(w_mask - 1) // 2, + ) return grad_input, None, None, None @@ -70,7 +64,6 @@ def backward(ctx, grad_output): class PSAMask(nn.Module): - def __init__(self, psa_type, mask_size=None): super(PSAMask, self).__init__() assert psa_type in ['collect', 'distribute'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py index 0755aefc66e6..15be8de4db83 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py @@ -7,16 +7,14 @@ from ..utils import deprecated_api_warning, ext_loader -ext_module = ext_loader.load_ext('_ext', - ['roi_align_forward', 'roi_align_backward']) +ext_module = ext_loader.load_ext('_ext', ['roi_align_forward', 'roi_align_backward']) class RoIAlignFunction(Function): - @staticmethod - def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, - pool_mode, aligned): + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, pool_mode, aligned): from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() if has_custom_op: return g.op( @@ -28,25 +26,22 @@ def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, spatial_scale_f=spatial_scale, sampling_ratio_i=sampling_ratio, mode_s=pool_mode, - aligned_i=aligned) + aligned_i=aligned, + ) else: - from torch.onnx.symbolic_opset9 import sub, squeeze - from torch.onnx.symbolic_helper import _slice_helper from torch.onnx import TensorProtoDataType + from torch.onnx.symbolic_helper import _slice_helper + from torch.onnx.symbolic_opset9 import squeeze, sub + # batch_indices = rois[:, 0].long() - batch_indices = _slice_helper( - g, rois, axes=[1], starts=[0], ends=[1]) + batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1]) batch_indices = squeeze(g, batch_indices, 1) - batch_indices = g.op( - 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) + batch_indices = g.op('Cast', batch_indices, to_i=TensorProtoDataType.INT64) # rois = rois[:, 1:] rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) if aligned: # rois -= 0.5/spatial_scale - aligned_offset = g.op( - 'Constant', - value_t=torch.tensor([0.5 / spatial_scale], - dtype=torch.float32)) + aligned_offset = g.op('Constant', value_t=torch.tensor([0.5 / spatial_scale], dtype=torch.float32)) rois = sub(g, rois, aligned_offset) # roi align return g.op( @@ -58,17 +53,11 @@ def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, output_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_i=max(0, sampling_ratio), - mode_s=pool_mode) + mode_s=pool_mode, + ) @staticmethod - def forward(ctx, - input, - rois, - output_size, - spatial_scale=1.0, - sampling_ratio=0, - pool_mode='avg', - aligned=True): + def forward(ctx, input, rois, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True): ctx.output_size = _pair(output_size) ctx.spatial_scale = spatial_scale ctx.sampling_ratio = sampling_ratio @@ -79,8 +68,7 @@ def forward(ctx, assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], - ctx.output_size[1]) + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) if ctx.pool_mode == 0: argmax_y = input.new_zeros(output_shape) @@ -100,7 +88,8 @@ def forward(ctx, spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, pool_mode=ctx.pool_mode, - aligned=ctx.aligned) + aligned=ctx.aligned, + ) ctx.save_for_backward(rois, argmax_y, argmax_x) return output @@ -123,7 +112,8 @@ def backward(ctx, grad_output): spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, pool_mode=ctx.pool_mode, - aligned=ctx.aligned) + aligned=ctx.aligned, + ) return grad_input, None, None, None, None, None, None @@ -167,19 +157,10 @@ class RoIAlign(nn.Module): performance if ROIAlign is used together with conv layers. """ - @deprecated_api_warning( - { - 'out_size': 'output_size', - 'sample_num': 'sampling_ratio' - }, - cls_name='RoIAlign') - def __init__(self, - output_size, - spatial_scale=1.0, - sampling_ratio=0, - pool_mode='avg', - aligned=True, - use_torchvision=False): + @deprecated_api_warning({'out_size': 'output_size', 'sample_num': 'sampling_ratio'}, cls_name='RoIAlign') + def __init__( + self, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True, use_torchvision=False + ): super(RoIAlign, self).__init__() self.output_size = _pair(output_size) @@ -198,19 +179,19 @@ def forward(self, input, rois): """ if self.use_torchvision: from torchvision.ops import roi_align as tv_roi_align + if 'aligned' in tv_roi_align.__code__.co_varnames: - return tv_roi_align(input, rois, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.aligned) + return tv_roi_align( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned + ) else: if self.aligned: - rois -= rois.new_tensor([0.] + - [0.5 / self.spatial_scale] * 4) - return tv_roi_align(input, rois, self.output_size, - self.spatial_scale, self.sampling_ratio) + rois -= rois.new_tensor([0.0] + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) else: - return roi_align(input, rois, self.output_size, self.spatial_scale, - self.sampling_ratio, self.pool_mode, self.aligned) + return roi_align( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.pool_mode, self.aligned + ) def __repr__(self): s = self.__class__.__name__ diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py index 0ce4961a3555..07108d2bc888 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py @@ -4,15 +4,12 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) +ext_module = ext_loader.load_ext('_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) class RoIAlignRotatedFunction(Function): - @staticmethod - def symbolic(g, features, rois, out_size, spatial_scale, sample_num, - aligned, clockwise): + def symbolic(g, features, rois, out_size, spatial_scale, sample_num, aligned, clockwise): if isinstance(out_size, int): out_h = out_size out_w = out_size @@ -22,8 +19,7 @@ def symbolic(g, features, rois, out_size, spatial_scale, sample_num, assert isinstance(out_size[1], int) out_h, out_w = out_size else: - raise TypeError( - '"out_size" must be an integer or tuple of integers') + raise TypeError('"out_size" must be an integer or tuple of integers') return g.op( 'mmcv::MMCVRoIAlignRotated', features, @@ -33,17 +29,11 @@ def symbolic(g, features, rois, out_size, spatial_scale, sample_num, spatial_scale_f=spatial_scale, sampling_ratio_i=sample_num, aligned_i=aligned, - clockwise_i=clockwise) + clockwise_i=clockwise, + ) @staticmethod - def forward(ctx, - features, - rois, - out_size, - spatial_scale, - sample_num=0, - aligned=True, - clockwise=False): + def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): if isinstance(out_size, int): out_h = out_size out_w = out_size @@ -53,8 +43,7 @@ def forward(ctx, assert isinstance(out_size[1], int) out_h, out_w = out_size else: - raise TypeError( - '"out_size" must be an integer or tuple of integers') + raise TypeError('"out_size" must be an integer or tuple of integers') ctx.spatial_scale = spatial_scale ctx.sample_num = sample_num ctx.aligned = aligned @@ -75,7 +64,8 @@ def forward(ctx, spatial_scale=spatial_scale, sample_num=sample_num, aligned=aligned, - clockwise=clockwise) + clockwise=clockwise, + ) return output @staticmethod @@ -95,8 +85,7 @@ def backward(ctx, grad_output): grad_input = grad_rois = None if ctx.needs_input_grad[0]: - grad_input = rois.new_zeros(batch_size, num_channels, data_height, - data_width) + grad_input = rois.new_zeros(batch_size, num_channels, data_height, data_width) ext_module.roi_align_rotated_backward( grad_output.contiguous(), rois, @@ -106,7 +95,8 @@ def backward(ctx, grad_output): spatial_scale=spatial_scale, sample_num=sample_num, aligned=aligned, - clockwise=clockwise) + clockwise=clockwise, + ) return grad_input, grad_rois, None, None, None, None, None @@ -156,12 +146,7 @@ class RoIAlignRotated(nn.Module): performance if ROIAlign is used together with conv layers. """ - def __init__(self, - out_size, - spatial_scale, - sample_num=0, - aligned=True, - clockwise=False): + def __init__(self, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): super(RoIAlignRotated, self).__init__() self.out_size = out_size @@ -171,7 +156,6 @@ def __init__(self, self.clockwise = clockwise def forward(self, features, rois): - return RoIAlignRotatedFunction.apply(features, rois, self.out_size, - self.spatial_scale, - self.sample_num, self.aligned, - self.clockwise) + return RoIAlignRotatedFunction.apply( + features, rois, self.out_size, self.spatial_scale, self.sample_num, self.aligned, self.clockwise + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py index d339d8f2941e..04e3d55a3a5c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py @@ -7,20 +7,13 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', - ['roi_pool_forward', 'roi_pool_backward']) +ext_module = ext_loader.load_ext('_ext', ['roi_pool_forward', 'roi_pool_backward']) class RoIPoolFunction(Function): - @staticmethod def symbolic(g, input, rois, output_size, spatial_scale): - return g.op( - 'MaxRoiPool', - input, - rois, - pooled_shape_i=output_size, - spatial_scale_f=spatial_scale) + return g.op('MaxRoiPool', input, rois, pooled_shape_i=output_size, spatial_scale_f=spatial_scale) @staticmethod def forward(ctx, input, rois, output_size, spatial_scale=1.0): @@ -30,8 +23,7 @@ def forward(ctx, input, rois, output_size, spatial_scale=1.0): assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], - ctx.output_size[1]) + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) argmax = input.new_zeros(output_shape, dtype=torch.int) @@ -42,7 +34,8 @@ def forward(ctx, input, rois, output_size, spatial_scale=1.0): argmax, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale) + spatial_scale=ctx.spatial_scale, + ) ctx.save_for_backward(rois, argmax) return output @@ -60,7 +53,8 @@ def backward(ctx, grad_output): grad_input, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale) + spatial_scale=ctx.spatial_scale, + ) return grad_input, None, None, None @@ -69,7 +63,6 @@ def backward(ctx, grad_output): class RoIPool(nn.Module): - def __init__(self, output_size, spatial_scale=1.0): super(RoIPool, self).__init__() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py index 8742eeaa4f64..befb3a757324 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py @@ -6,8 +6,7 @@ import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) +ext_module = ext_loader.load_ext('_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) class RoIAwarePool3d(nn.Module): @@ -46,16 +45,12 @@ def forward(self, rois, pts, pts_feature): pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] """ - return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, - self.out_size, - self.max_pts_per_voxel, self.mode) + return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, self.out_size, self.max_pts_per_voxel, self.mode) class RoIAwarePool3dFunction(Function): - @staticmethod - def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, - mode): + def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, mode): """ Args: rois (torch.Tensor): [N, 7], in LiDAR coordinate, @@ -85,20 +80,13 @@ def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, num_channels = pts_feature.shape[-1] num_pts = pts.shape[0] - pooled_features = pts_feature.new_zeros( - (num_rois, out_x, out_y, out_z, num_channels)) - argmax = pts_feature.new_zeros( - (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) - pts_idx_of_voxels = pts_feature.new_zeros( - (num_rois, out_x, out_y, out_z, max_pts_per_voxel), - dtype=torch.int) + pooled_features = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels)) + argmax = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) + pts_idx_of_voxels = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, max_pts_per_voxel), dtype=torch.int) - ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, - pts_idx_of_voxels, pooled_features, - mode) + ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, mode) - ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, - num_pts, num_channels) + ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, num_pts, num_channels) return pooled_features @staticmethod @@ -107,8 +95,6 @@ def backward(ctx, grad_out): pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret grad_in = grad_out.new_zeros((num_pts, num_channels)) - ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, - grad_out.contiguous(), grad_in, - mode) + ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out.contiguous(), grad_in, mode) return None, None, grad_in, None, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py index 0a21412c0728..c24c4844bd24 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py @@ -34,12 +34,10 @@ def forward(self, points, point_features, boxes3d): shape is (B, M, 512, 3 + C). pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). """ - return RoIPointPool3dFunction.apply(points, point_features, boxes3d, - self.num_sampled_points) + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, self.num_sampled_points) class RoIPointPool3dFunction(Function): - @staticmethod def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): """ @@ -57,18 +55,18 @@ def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). """ assert len(points.shape) == 3 and points.shape[2] == 3 - batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ - 1], point_features.shape[2] + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[1], point_features.shape[2] pooled_boxes3d = boxes3d.view(batch_size, -1, 7) - pooled_features = point_features.new_zeros( - (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) - pooled_empty_flag = point_features.new_zeros( - (batch_size, boxes_num)).int() - - ext_module.roipoint_pool3d_forward(points.contiguous(), - pooled_boxes3d.contiguous(), - point_features.contiguous(), - pooled_features, pooled_empty_flag) + pooled_features = point_features.new_zeros((batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros((batch_size, boxes_num)).int() + + ext_module.roipoint_pool3d_forward( + points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, + pooled_empty_flag, + ) return pooled_features, pooled_empty_flag diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py index 63f067b908b3..c72bbb8a502d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py @@ -34,16 +34,18 @@ class SAConv2d(ConvAWS2d): convolution. Default: ``False``. """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - use_deform=False): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False, + ): super().__init__( in_channels, out_channels, @@ -52,30 +54,16 @@ def __init__(self, padding=padding, dilation=dilation, groups=groups, - bias=bias) + bias=bias, + ) self.use_deform = use_deform - self.switch = nn.Conv2d( - self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.switch = nn.Conv2d(self.in_channels, 1, kernel_size=1, stride=stride, bias=True) self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) - self.pre_context = nn.Conv2d( - self.in_channels, self.in_channels, kernel_size=1, bias=True) - self.post_context = nn.Conv2d( - self.out_channels, self.out_channels, kernel_size=1, bias=True) + self.pre_context = nn.Conv2d(self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=1, bias=True) if self.use_deform: - self.offset_s = nn.Conv2d( - self.in_channels, - 18, - kernel_size=3, - padding=1, - stride=stride, - bias=True) - self.offset_l = nn.Conv2d( - self.in_channels, - 18, - kernel_size=3, - padding=1, - stride=stride, - bias=True) + self.offset_s = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) + self.offset_l = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) self.init_weights() def init_weights(self): @@ -99,16 +87,13 @@ def forward(self, x): switch = self.switch(avg_x) # sac weight = self._get_weight(self.weight) - zero_bias = torch.zeros( - self.out_channels, device=weight.device, dtype=weight.dtype) + zero_bias = torch.zeros(self.out_channels, device=weight.device, dtype=weight.dtype) if self.use_deform: offset = self.offset_s(avg_x) - out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, - self.dilation, self.groups, 1) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) else: - if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): out_s = super().conv2d_forward(x, weight) elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): # bias is a required argument of _conv_forward in torch 1.8.0 @@ -122,11 +107,9 @@ def forward(self, x): weight = weight + self.weight_diff if self.use_deform: offset = self.offset_l(avg_x) - out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, - self.dilation, self.groups, 1) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) else: - if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): out_l = super().conv2d_forward(x, weight) elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): # bias is a required argument of _conv_forward in torch 1.8.0 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py index 2b8aa4169e9f..6d5866dcf2c6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py @@ -5,13 +5,10 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', - ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) +ext_module = ext_loader.load_ext('_ext', ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) class _DynamicScatter(Function): - @staticmethod def forward(ctx, feats, coors, reduce_type='max'): """convert kitti points(N, >=3) to voxels. @@ -30,26 +27,28 @@ def forward(ctx, feats, coors, reduce_type='max'): one row. voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. """ - results = ext_module.dynamic_point_to_voxel_forward( - feats, coors, reduce_type) - (voxel_feats, voxel_coors, point2voxel_map, - voxel_points_count) = results + results = ext_module.dynamic_point_to_voxel_forward(feats, coors, reduce_type) + (voxel_feats, voxel_coors, point2voxel_map, voxel_points_count) = results ctx.reduce_type = reduce_type - ctx.save_for_backward(feats, voxel_feats, point2voxel_map, - voxel_points_count) + ctx.save_for_backward(feats, voxel_feats, point2voxel_map, voxel_points_count) ctx.mark_non_differentiable(voxel_coors) return voxel_feats, voxel_coors @staticmethod def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): - (feats, voxel_feats, point2voxel_map, - voxel_points_count) = ctx.saved_tensors + (feats, voxel_feats, point2voxel_map, voxel_points_count) = ctx.saved_tensors grad_feats = torch.zeros_like(feats) # TODO: whether to use index put or use cuda_backward # To use index put, need point to voxel index ext_module.dynamic_point_to_voxel_backward( - grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, - point2voxel_map, voxel_points_count, ctx.reduce_type) + grad_feats, + grad_voxel_feats.contiguous(), + feats, + voxel_feats, + point2voxel_map, + voxel_points_count, + ctx.reduce_type, + ) return grad_feats, None, None @@ -115,10 +114,8 @@ def forward(self, points, coors): voxels, voxel_coors = [], [] for i in range(batch_size): inds = torch.where(coors[:, 0] == i) - voxel, voxel_coor = self.forward_single( - points[inds], coors[inds][:, 1:]) - coor_pad = nn.functional.pad( - voxel_coor, (1, 0), mode='constant', value=i) + voxel, voxel_coor = self.forward_single(points[inds], coors[inds][:, 1:]) + coor_pad = nn.functional.pad(voxel_coor, (1, 0), mode='constant', value=i) voxel_coors.append(coor_pad) voxels.append(voxel) features = torch.cat(voxels, dim=0) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py index d5721f4ea0b5..28a609585caf 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py @@ -10,17 +10,21 @@ from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NORM_LAYERS from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', [ - 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', - 'sync_bn_backward_param', 'sync_bn_backward_data' -]) +ext_module = ext_loader.load_ext( + '_ext', + [ + 'sync_bn_forward_mean', + 'sync_bn_forward_var', + 'sync_bn_forward_output', + 'sync_bn_backward_param', + 'sync_bn_backward_data', + ], +) class SyncBatchNormFunction(Function): - @staticmethod - def symbolic(g, input, running_mean, running_var, weight, bias, momentum, - eps, group, group_size, stats_mode): + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): return g.op( 'mmcv::MMCVSyncBatchNorm', input, @@ -32,11 +36,11 @@ def symbolic(g, input, running_mean, running_var, weight, bias, momentum, eps_f=eps, group_i=group, group_size_i=group_size, - stats_mode=stats_mode) + stats_mode=stats_mode, + ) @staticmethod - def forward(self, input, running_mean, running_var, weight, bias, momentum, - eps, group, group_size, stats_mode): + def forward(self, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): self.momentum = momentum self.eps = eps self.group = group @@ -44,9 +48,8 @@ def forward(self, input, running_mean, running_var, weight, bias, momentum, self.stats_mode = stats_mode assert isinstance( - input, (torch.HalfTensor, torch.FloatTensor, - torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ - f'only support Half or Float Tensor, but {input.type()}' + input, (torch.HalfTensor, torch.FloatTensor, torch.cuda.HalfTensor, torch.cuda.FloatTensor) + ), f'only support Half or Float Tensor, but {input.type()}' output = torch.zeros_like(input) input3d = input.flatten(start_dim=2) output3d = output.view_as(input3d) @@ -54,14 +57,10 @@ def forward(self, input, running_mean, running_var, weight, bias, momentum, # ensure mean/var/norm/std are initialized as zeros # ``torch.empty()`` does not guarantee that - mean = torch.zeros( - num_channels, dtype=torch.float, device=input3d.device) - var = torch.zeros( - num_channels, dtype=torch.float, device=input3d.device) - norm = torch.zeros_like( - input3d, dtype=torch.float, device=input3d.device) - std = torch.zeros( - num_channels, dtype=torch.float, device=input3d.device) + mean = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) + var = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) + norm = torch.zeros_like(input3d, dtype=torch.float, device=input3d.device) + std = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) batch_size = input3d.size(0) if batch_size > 0: @@ -120,7 +119,8 @@ def forward(self, input, running_mean, running_var, weight, bias, momentum, output3d, eps=self.eps, momentum=momentum, - group_size=self.group_size) + group_size=self.group_size, + ) self.save_for_backward(norm, std, weight) return output @@ -136,8 +136,7 @@ def backward(self, grad_output): batch_size = grad_input3d.size(0) if batch_size > 0: - ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, - grad_bias) + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, grad_bias) # all reduce if self.group_size > 1: @@ -147,12 +146,9 @@ def backward(self, grad_output): grad_bias /= self.group_size if batch_size > 0: - ext_module.sync_bn_backward_data(grad_output3d, weight, - grad_weight, grad_bias, norm, std, - grad_input3d) + ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight, grad_bias, norm, std, grad_input3d) - return grad_input, None, None, grad_weight, grad_bias, \ - None, None, None, None, None + return grad_input, None, None, grad_weight, grad_bias, None, None, None, None, None @NORM_LAYERS.register_module(name='MMSyncBN') @@ -190,14 +186,16 @@ class SyncBatchNorm(Module): number of batch. """ - def __init__(self, - num_features, - eps=1e-5, - momentum=0.1, - affine=True, - track_running_stats=True, - group=None, - stats_mode='default'): + def __init__( + self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None, + stats_mode='default', + ): super(SyncBatchNorm, self).__init__() self.num_features = num_features self.eps = eps @@ -207,8 +205,7 @@ def __init__(self, group = dist.group.WORLD if group is None else group self.group = group self.group_size = dist.get_world_size(group) - assert stats_mode in ['default', 'N'], \ - f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' + assert stats_mode in ['default', 'N'], f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' self.stats_mode = stats_mode if self.affine: self.weight = Parameter(torch.Tensor(num_features)) @@ -219,8 +216,7 @@ def __init__(self, if self.track_running_stats: self.register_buffer('running_mean', torch.zeros(num_features)) self.register_buffer('running_var', torch.ones(num_features)) - self.register_buffer('num_batches_tracked', - torch.tensor(0, dtype=torch.long)) + self.register_buffer('num_batches_tracked', torch.tensor(0, dtype=torch.long)) else: self.register_buffer('running_mean', None) self.register_buffer('running_var', None) @@ -241,8 +237,7 @@ def reset_parameters(self): def forward(self, input): if input.dim() < 2: - raise ValueError( - f'expected at least 2D input, got {input.dim()}D input') + raise ValueError(f'expected at least 2D input, got {input.dim()}D input') if self.momentum is None: exponential_average_factor = 0.0 else: @@ -252,20 +247,34 @@ def forward(self, input): if self.num_batches_tracked is not None: self.num_batches_tracked += 1 if self.momentum is None: # use cumulative moving average - exponential_average_factor = 1.0 / float( - self.num_batches_tracked) + exponential_average_factor = 1.0 / float(self.num_batches_tracked) else: # use exponential moving average exponential_average_factor = self.momentum if self.training or not self.track_running_stats: return SyncBatchNormFunction.apply( - input, self.running_mean, self.running_var, self.weight, - self.bias, exponential_average_factor, self.eps, self.group, - self.group_size, self.stats_mode) + input, + self.running_mean, + self.running_var, + self.weight, + self.bias, + exponential_average_factor, + self.eps, + self.group, + self.group_size, + self.stats_mode, + ) else: - return F.batch_norm(input, self.running_mean, self.running_var, - self.weight, self.bias, False, - exponential_average_factor, self.eps) + return F.batch_norm( + input, + self.running_mean, + self.running_var, + self.weight, + self.bias, + False, + exponential_average_factor, + self.eps, + ) def __repr__(self): s = self.__class__.__name__ diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py index 203f47f05d58..09333e484221 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py @@ -5,8 +5,7 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['three_interpolate_forward', 'three_interpolate_backward']) +ext_module = ext_loader.load_ext('_ext', ['three_interpolate_forward', 'three_interpolate_backward']) class ThreeInterpolate(Function): @@ -17,8 +16,7 @@ class ThreeInterpolate(Function): """ @staticmethod - def forward(ctx, features: torch.Tensor, indices: torch.Tensor, - weight: torch.Tensor) -> torch.Tensor: + def forward(ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: """ Args: features (Tensor): (B, C, M) Features descriptors to be @@ -39,14 +37,11 @@ def forward(ctx, features: torch.Tensor, indices: torch.Tensor, ctx.three_interpolate_for_backward = (indices, weight, m) output = torch.cuda.FloatTensor(B, c, n) - ext_module.three_interpolate_forward( - features, indices, weight, output, b=B, c=c, m=m, n=n) + ext_module.three_interpolate_forward(features, indices, weight, output, b=B, c=c, m=m, n=n) return output @staticmethod - def backward( - ctx, grad_out: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Args: grad_out (Tensor): (B, C, N) tensor with gradients of outputs @@ -60,8 +55,7 @@ def backward( grad_features = torch.cuda.FloatTensor(B, c, m).zero_() grad_out_data = grad_out.data.contiguous() - ext_module.three_interpolate_backward( - grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) + ext_module.three_interpolate_backward(grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) return grad_features, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py index 2b01047a1299..384d91534d17 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py @@ -16,8 +16,7 @@ class ThreeNN(Function): """ @staticmethod - def forward(ctx, target: torch.Tensor, - source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def forward(ctx, target: torch.Tensor, source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Args: target (Tensor): shape (B, N, 3), points set that needs to diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py index 472c9fcfe45a..4b0a8162e811 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py @@ -10,19 +10,18 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext('_ext', - ['tin_shift_forward', 'tin_shift_backward']) +ext_module = ext_loader.load_ext('_ext', ['tin_shift_forward', 'tin_shift_backward']) class TINShiftFunction(Function): - @staticmethod def forward(ctx, input, shift): C = input.size(2) num_segments = shift.size(1) if C // num_segments <= 0 or C % num_segments != 0: - raise ValueError('C should be a multiple of num_segments, ' - f'but got C={C} and num_segments={num_segments}.') + raise ValueError( + 'C should be a multiple of num_segments, ' f'but got C={C} and num_segments={num_segments}.' + ) ctx.save_for_backward(shift) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py index 8802690df0c7..c1f330686b9c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py @@ -106,10 +106,8 @@ class UpFirDn2dBackward(Function): - @staticmethod - def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, - in_size, out_size): + def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size): up_x, up_y = up down_x, down_y = down @@ -127,9 +125,9 @@ def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, pad_x0=g_pad_x0, pad_x1=g_pad_x1, pad_y0=g_pad_y0, - pad_y1=g_pad_y1) - grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], - in_size[3]) + pad_y1=g_pad_y1, + ) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) ctx.save_for_backward(kernel) @@ -150,10 +148,9 @@ def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, @staticmethod def backward(ctx, gradgrad_input): - kernel, = ctx.saved_tensors + (kernel,) = ctx.saved_tensors - gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], - ctx.in_size[3], 1) + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) gradgrad_out = upfirdn2d_ext.upfirdn2d( gradgrad_input, @@ -165,17 +162,16 @@ def backward(ctx, gradgrad_input): pad_x0=ctx.pad_x0, pad_x1=ctx.pad_x1, pad_y0=ctx.pad_y0, - pad_y1=ctx.pad_y1) + pad_y1=ctx.pad_y1, + ) # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], # ctx.out_size[1], ctx.in_size[3]) - gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], - ctx.out_size[0], ctx.out_size[1]) + gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]) return gradgrad_out, None, None, None, None, None, None, None, None class UpFirDn2d(Function): - @staticmethod def forward(ctx, input, kernel, up, down, pad): up_x, up_y = up @@ -215,7 +211,8 @@ def forward(ctx, input, kernel, up, down, pad): pad_x0=pad_x0, pad_x1=pad_x1, pad_y0=pad_y0, - pad_y1=pad_y1) + pad_y1=pad_y1, + ) # out = out.view(major, out_h, out_w, minor) out = out.view(-1, channel, out_h, out_w) @@ -226,15 +223,7 @@ def backward(ctx, grad_output): kernel, grad_kernel = ctx.saved_tensors grad_input = UpFirDn2dBackward.apply( - grad_output, - kernel, - grad_kernel, - ctx.up, - ctx.down, - ctx.pad, - ctx.g_pad, - ctx.in_size, - ctx.out_size, + grad_output, kernel, grad_kernel, ctx.up, ctx.down, ctx.pad, ctx.g_pad, ctx.in_size, ctx.out_size, ) return grad_input, None, None, None, None @@ -270,8 +259,7 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): down = to_2tuple(down) - out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], - pad[0], pad[1], pad[2], pad[3]) + out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], pad[0], pad[1], pad[2], pad[3]) else: _up = to_2tuple(up) @@ -287,8 +275,7 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): return out -def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, - pad_y0, pad_y1): +def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): _, channel, in_h, in_w = input.shape input = input.reshape(-1, in_h, in_w, 1) @@ -299,27 +286,17 @@ def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) out = out.view(-1, in_h * up_y, in_w * up_x, minor) - out = F.pad( - out, - [0, 0, - max(pad_x0, 0), - max(pad_x1, 0), - max(pad_y0, 0), - max(pad_y1, 0)]) - out = out[:, - max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), - max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[ + :, max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), :, + ] out = out.permute(0, 3, 1, 2) - out = out.reshape( - [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) out = F.conv2d(out, w) out = out.reshape( - -1, - minor, - in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, - in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, ) out = out.permute(0, 2, 3, 1) out = out[:, ::down_y, ::down_x, :] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py index ca3226a4fbcb..60e23663270d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py @@ -6,19 +6,12 @@ from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) +ext_module = ext_loader.load_ext('_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) class _Voxelization(Function): - @staticmethod - def forward(ctx, - points, - voxel_size, - coors_range, - max_points=35, - max_voxels=20000): + def forward(ctx, points, voxel_size, coors_range, max_points=35, max_voxels=20000): """Convert kitti points(N, >=3) to voxels. Args: @@ -46,18 +39,15 @@ def forward(ctx, """ if max_points == -1 or max_voxels == -1: coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) - ext_module.dynamic_voxelize_forward(points, coors, voxel_size, - coors_range, 3) + ext_module.dynamic_voxelize_forward(points, coors, voxel_size, coors_range, 3) return coors else: - voxels = points.new_zeros( - size=(max_voxels, max_points, points.size(1))) + voxels = points.new_zeros(size=(max_voxels, max_points, points.size(1))) coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) - num_points_per_voxel = points.new_zeros( - size=(max_voxels, ), dtype=torch.int) + num_points_per_voxel = points.new_zeros(size=(max_voxels,), dtype=torch.int) voxel_num = ext_module.hard_voxelize_forward( - points, voxels, coors, num_points_per_voxel, voxel_size, - coors_range, max_points, max_voxels, 3) + points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, 3 + ) # select the valid voxels voxels_out = voxels[:voxel_num] coors_out = coors[:voxel_num] @@ -86,11 +76,7 @@ class Voxelization(nn.Module): Default: 20000. """ - def __init__(self, - voxel_size, - point_cloud_range, - max_num_points, - max_voxels=20000): + def __init__(self, voxel_size, point_cloud_range, max_num_points, max_voxels=20000): super().__init__() self.voxel_size = voxel_size @@ -101,11 +87,9 @@ def __init__(self, else: self.max_voxels = _pair(max_voxels) - point_cloud_range = torch.tensor( - point_cloud_range, dtype=torch.float32) + point_cloud_range = torch.tensor(point_cloud_range, dtype=torch.float32) voxel_size = torch.tensor(voxel_size, dtype=torch.float32) - grid_size = (point_cloud_range[3:] - - point_cloud_range[:3]) / voxel_size + grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size grid_size = torch.round(grid_size).long() input_feat_shape = grid_size[:2] self.grid_size = grid_size @@ -119,8 +103,7 @@ def forward(self, input): else: max_voxels = self.max_voxels[1] - return voxelization(input, self.voxel_size, self.point_cloud_range, - self.max_num_points, max_voxels) + return voxelization(input, self.voxel_size, self.point_cloud_range, self.max_num_points, max_voxels) def __repr__(self): s = self.__class__.__name__ + '(' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py index 2ed2c17ad357..da4f1557d34a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py @@ -8,6 +8,12 @@ from .utils import is_module_wrapper __all__ = [ - 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', - 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' + 'collate', + 'DataContainer', + 'MMDataParallel', + 'MMDistributedDataParallel', + 'scatter', + 'scatter_kwargs', + 'is_module_wrapper', + 'MODULE_WRAPPERS', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py index 9b5a8a44483a..154a0302584c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py @@ -11,8 +11,7 @@ def scatter(input, devices, streams=None): if isinstance(input, list): chunk_size = (len(input) - 1) // len(devices) + 1 outputs = [ - scatter(input[i], [devices[i // chunk_size]], - [streams[i // chunk_size]]) for i in range(len(input)) + scatter(input[i], [devices[i // chunk_size]], [streams[i // chunk_size]]) for i in range(len(input)) ] return outputs elif isinstance(input, torch.Tensor): @@ -36,8 +35,7 @@ def synchronize_stream(output, devices, streams): chunk_size = len(output) // len(devices) for i in range(len(devices)): for j in range(chunk_size): - synchronize_stream(output[i * chunk_size + j], [devices[i]], - [streams[i]]) + synchronize_stream(output[i * chunk_size + j], [devices[i]], [streams[i]]) elif isinstance(output, torch.Tensor): if output.numel() != 0: with torch.cuda.device(devices[0]): @@ -62,7 +60,6 @@ def get_input_device(input): class Scatter: - @staticmethod def forward(target_gpus, input): input_device = get_input_device(input) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py index ad749197df21..9607ce7efb6d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py @@ -27,10 +27,8 @@ def collate(batch, samples_per_gpu=1): stacked = [] if batch[0].cpu_only: for i in range(0, len(batch), samples_per_gpu): - stacked.append( - [sample.data for sample in batch[i:i + samples_per_gpu]]) - return DataContainer( - stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) elif batch[0].stack: for i in range(0, len(batch), samples_per_gpu): assert isinstance(batch[i].data, torch.Tensor) @@ -41,44 +39,31 @@ def collate(batch, samples_per_gpu=1): max_shape = [0 for _ in range(batch[i].pad_dims)] for dim in range(1, batch[i].pad_dims + 1): max_shape[dim - 1] = batch[i].size(-dim) - for sample in batch[i:i + samples_per_gpu]: + for sample in batch[i : i + samples_per_gpu]: for dim in range(0, ndim - batch[i].pad_dims): assert batch[i].size(dim) == sample.size(dim) for dim in range(1, batch[i].pad_dims + 1): - max_shape[dim - 1] = max(max_shape[dim - 1], - sample.size(-dim)) + max_shape[dim - 1] = max(max_shape[dim - 1], sample.size(-dim)) padded_samples = [] - for sample in batch[i:i + samples_per_gpu]: + for sample in batch[i : i + samples_per_gpu]: pad = [0 for _ in range(batch[i].pad_dims * 2)] for dim in range(1, batch[i].pad_dims + 1): - pad[2 * dim - - 1] = max_shape[dim - 1] - sample.size(-dim) - padded_samples.append( - F.pad( - sample.data, pad, value=sample.padding_value)) + pad[2 * dim - 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append(F.pad(sample.data, pad, value=sample.padding_value)) stacked.append(default_collate(padded_samples)) elif batch[i].pad_dims is None: - stacked.append( - default_collate([ - sample.data - for sample in batch[i:i + samples_per_gpu] - ])) + stacked.append(default_collate([sample.data for sample in batch[i : i + samples_per_gpu]])) else: - raise ValueError( - 'pad_dims should be either None or integers (1-3)') + raise ValueError('pad_dims should be either None or integers (1-3)') else: for i in range(0, len(batch), samples_per_gpu): - stacked.append( - [sample.data for sample in batch[i:i + samples_per_gpu]]) + stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) return DataContainer(stacked, batch[0].stack, batch[0].padding_value) elif isinstance(batch[0], Sequence): transposed = zip(*batch) return [collate(samples, samples_per_gpu) for samples in transposed] elif isinstance(batch[0], Mapping): - return { - key: collate([d[key] for d in batch], samples_per_gpu) - for key in batch[0] - } + return {key: collate([d[key] for d in batch], samples_per_gpu) for key in batch[0]} else: return default_collate(batch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py index cedb0d32a51a..120f68b8fa60 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py @@ -5,13 +5,12 @@ def assert_tensor_type(func): - @functools.wraps(func) def wrapper(*args, **kwargs): if not isinstance(args[0].data, torch.Tensor): raise AttributeError( - f'{args[0].__class__.__name__} has no attribute ' - f'{func.__name__} for type {args[0].datatype}') + f'{args[0].__class__.__name__} has no attribute ' f'{func.__name__} for type {args[0].datatype}' + ) return func(*args, **kwargs) return wrapper @@ -34,12 +33,7 @@ class DataContainer: - pad_dims specifies the number of last few dimensions to do padding """ - def __init__(self, - data, - stack=False, - padding_value=0, - cpu_only=False, - pad_dims=2): + def __init__(self, data, stack=False, padding_value=0, cpu_only=False, pad_dims=2): self._data = data self._cpu_only = cpu_only self._stack = stack diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py index 79b5f69b654c..bd0715da94ad 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py @@ -51,17 +51,19 @@ def train_step(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, [-1]) return self.module.train_step(*inputs[0], **kwargs[0]) - assert len(self.device_ids) == 1, \ - ('MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - 'instead.') + assert len(self.device_ids) == 1, ( + 'MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.' + ) for t in chain(self.module.parameters(), self.module.buffers()): if t.device != self.src_device_obj: raise RuntimeError( 'module must have its parameters and buffers ' f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}') + f'found one of them on device: {t.device}' + ) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) return self.module.train_step(*inputs[0], **kwargs[0]) @@ -73,17 +75,19 @@ def val_step(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, [-1]) return self.module.val_step(*inputs[0], **kwargs[0]) - assert len(self.device_ids) == 1, \ - ('MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - ' instead.') + assert len(self.device_ids) == 1, ( + 'MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.' + ) for t in chain(self.module.parameters(), self.module.buffers()): if t.device != self.src_device_obj: raise RuntimeError( 'module must have its parameters and buffers ' f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}') + f'found one of them on device: {t.device}' + ) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py index b88c33ce159e..755c4398fcdc 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py @@ -1,10 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch -from torch.nn.parallel.distributed import (DistributedDataParallel, - _find_tensors) +from torch.nn.parallel.distributed import DistributedDataParallel, _find_tensors from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import print_log from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + from .scatter_gather import scatter_kwargs @@ -37,12 +37,12 @@ def train_step(self, *inputs, **kwargs): # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the # end of backward to the beginning of forward. - if ('parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) >= digit_version('1.7') - and self.reducer._rebuild_buckets()): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') + if ( + 'parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets() + ): + print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') if getattr(self, 'require_forward_param_sync', True): self._sync_params() @@ -51,21 +51,18 @@ def train_step(self, *inputs, **kwargs): if len(self.device_ids) == 1: output = self.module.train_step(*inputs[0], **kwargs[0]) else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) + outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) output = self.gather(outputs, self.output_device) else: output = self.module.train_step(*inputs, **kwargs) - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): + if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): if self.find_unused_parameters: self.reducer.prepare_for_backward(list(_find_tensors(output))) else: self.reducer.prepare_for_backward([]) else: - if ('parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) > digit_version('1.2')): + if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): self.require_forward_param_sync = False return output @@ -79,12 +76,12 @@ def val_step(self, *inputs, **kwargs): """ # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the # end of backward to the beginning of forward. - if ('parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) >= digit_version('1.7') - and self.reducer._rebuild_buckets()): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') + if ( + 'parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets() + ): + print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') if getattr(self, 'require_forward_param_sync', True): self._sync_params() @@ -93,20 +90,17 @@ def val_step(self, *inputs, **kwargs): if len(self.device_ids) == 1: output = self.module.val_step(*inputs[0], **kwargs[0]) else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) + outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) output = self.gather(outputs, self.output_device) else: output = self.module.val_step(*inputs, **kwargs) - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): + if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): if self.find_unused_parameters: self.reducer.prepare_for_backward(list(_find_tensors(output))) else: self.reducer.prepare_for_backward([]) else: - if ('parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) > digit_version('1.2')): + if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): self.require_forward_param_sync = False return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py index 4a0efe2edc0f..7e8a47648e1e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py @@ -2,22 +2,17 @@ import torch import torch.distributed as dist import torch.nn as nn -from torch._utils import (_flatten_dense_tensors, _take_tensors, - _unflatten_dense_tensors) +from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + from .registry import MODULE_WRAPPERS from .scatter_gather import scatter_kwargs @MODULE_WRAPPERS.register_module() class MMDistributedDataParallel(nn.Module): - - def __init__(self, - module, - dim=0, - broadcast_buffers=True, - bucket_cap_mb=25): + def __init__(self, module, dim=0, broadcast_buffers=True, bucket_cap_mb=25): super(MMDistributedDataParallel, self).__init__() self.module = module self.dim = dim @@ -30,41 +25,34 @@ def _dist_broadcast_coalesced(self, tensors, buffer_size): for tensors in _take_tensors(tensors, buffer_size): flat_tensors = _flatten_dense_tensors(tensors) dist.broadcast(flat_tensors, 0) - for tensor, synced in zip( - tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + for tensor, synced in zip(tensors, _unflatten_dense_tensors(flat_tensors, tensors)): tensor.copy_(synced) def _sync_params(self): module_states = list(self.module.state_dict().values()) if len(module_states) > 0: - self._dist_broadcast_coalesced(module_states, - self.broadcast_bucket_size) + self._dist_broadcast_coalesced(module_states, self.broadcast_bucket_size) if self.broadcast_buffers: - if (TORCH_VERSION != 'parrots' - and digit_version(TORCH_VERSION) < digit_version('1.0')): + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version('1.0'): buffers = [b.data for b in self.module._all_buffers()] else: buffers = [b.data for b in self.module.buffers()] if len(buffers) > 0: - self._dist_broadcast_coalesced(buffers, - self.broadcast_bucket_size) + self._dist_broadcast_coalesced(buffers, self.broadcast_bucket_size) def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) def forward(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, - [torch.cuda.current_device()]) + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) return self.module(*inputs[0], **kwargs[0]) def train_step(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, - [torch.cuda.current_device()]) + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) output = self.module.train_step(*inputs[0], **kwargs[0]) return output def val_step(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, - [torch.cuda.current_device()]) + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) output = self.module.val_step(*inputs[0], **kwargs[0]) return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py index 52e4b48d383a..8edd3a098aed 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py @@ -2,46 +2,117 @@ from .base_module import BaseModule, ModuleList, Sequential from .base_runner import BaseRunner from .builder import RUNNERS, build_runner -from .checkpoint import (CheckpointLoader, _load_checkpoint, - _load_checkpoint_with_prefix, load_checkpoint, - load_state_dict, save_checkpoint, weights_to_cpu) +from .checkpoint import ( + CheckpointLoader, + _load_checkpoint, + _load_checkpoint_with_prefix, + load_checkpoint, + load_state_dict, + save_checkpoint, + weights_to_cpu, +) from .default_constructor import DefaultRunnerConstructor -from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, - init_dist, master_only) +from .dist_utils import allreduce_grads, allreduce_params, get_dist_info, init_dist, master_only from .epoch_based_runner import EpochBasedRunner, Runner from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model -from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook, - DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook, - Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, - GradientCumulativeOptimizerHook, Hook, IterTimerHook, - LoggerHook, LrUpdaterHook, MlflowLoggerHook, - NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, - SyncBuffersHook, TensorboardLoggerHook, TextLoggerHook, - WandbLoggerHook) +from .hooks import ( + HOOKS, + CheckpointHook, + ClosureHook, + DistEvalHook, + DistSamplerSeedHook, + DvcliveLoggerHook, + EMAHook, + EvalHook, + Fp16OptimizerHook, + GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, + Hook, + IterTimerHook, + LoggerHook, + LrUpdaterHook, + MlflowLoggerHook, + NeptuneLoggerHook, + OptimizerHook, + PaviLoggerHook, + SyncBuffersHook, + TensorboardLoggerHook, + TextLoggerHook, + WandbLoggerHook, +) from .iter_based_runner import IterBasedRunner, IterLoader from .log_buffer import LogBuffer -from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, - DefaultOptimizerConstructor, build_optimizer, - build_optimizer_constructor) +from .optimizer import ( + OPTIMIZER_BUILDERS, + OPTIMIZERS, + DefaultOptimizerConstructor, + build_optimizer, + build_optimizer_constructor, +) from .priority import Priority, get_priority from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed __all__ = [ - 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', - 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', - 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', - 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', - 'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook', - 'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict', - 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority', - 'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict', - 'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS', - 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', - 'build_optimizer_constructor', 'IterLoader', 'set_random_seed', - 'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook', - 'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads', - 'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', - '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential', - 'ModuleList', 'GradientCumulativeOptimizerHook', - 'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor' + 'BaseRunner', + 'Runner', + 'EpochBasedRunner', + 'IterBasedRunner', + 'LogBuffer', + 'HOOKS', + 'Hook', + 'CheckpointHook', + 'ClosureHook', + 'LrUpdaterHook', + 'OptimizerHook', + 'IterTimerHook', + 'DistSamplerSeedHook', + 'LoggerHook', + 'PaviLoggerHook', + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'NeptuneLoggerHook', + 'WandbLoggerHook', + 'MlflowLoggerHook', + 'DvcliveLoggerHook', + '_load_checkpoint', + 'load_state_dict', + 'load_checkpoint', + 'weights_to_cpu', + 'save_checkpoint', + 'Priority', + 'get_priority', + 'get_host_info', + 'get_time_str', + 'obj_from_dict', + 'init_dist', + 'get_dist_info', + 'master_only', + 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', + 'DefaultOptimizerConstructor', + 'build_optimizer', + 'build_optimizer_constructor', + 'IterLoader', + 'set_random_seed', + 'auto_fp16', + 'force_fp32', + 'wrap_fp16_model', + 'Fp16OptimizerHook', + 'SyncBuffersHook', + 'EMAHook', + 'build_runner', + 'RUNNERS', + 'allreduce_grads', + 'allreduce_params', + 'LossScaler', + 'CheckpointLoader', + 'BaseModule', + '_load_checkpoint_with_prefix', + 'EvalHook', + 'DistEvalHook', + 'Sequential', + 'ModuleList', + 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', + 'DefaultRunnerConstructor', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py index b67c1f1bb08e..c0c66594dccb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py @@ -8,7 +8,11 @@ import torch.nn as nn from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.dist_utils import master_only -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.logging import get_logger, logger_initialized, print_log +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.logging import ( + get_logger, + logger_initialized, + print_log, +) class BaseModule(nn.Module, metaclass=ABCMeta): @@ -76,12 +80,12 @@ def init_weights(self): # the corresponding parameter is changed, update related # initialization information for name, param in self.named_parameters(): - self._params_init_info[param][ - 'init_info'] = f'The value is the same before and ' \ - f'after calling `init_weights` ' \ - f'of {self.__class__.__name__} ' - self._params_init_info[param][ - 'tmp_mean_value'] = param.data.mean() + self._params_init_info[param]['init_info'] = ( + f'The value is the same before and ' + f'after calling `init_weights` ' + f'of {self.__class__.__name__} ' + ) + self._params_init_info[param]['tmp_mean_value'] = param.data.mean() # pass `params_init_info` to all submodules # All submodules share the same `params_init_info`, @@ -97,12 +101,11 @@ def init_weights(self): from ..cnn import initialize from ..cnn.utils.weight_init import update_init_info + module_name = self.__class__.__name__ if not self._is_init: if self.init_cfg: - print_log( - f'initialize {module_name} with init_cfg {self.init_cfg}', - logger=logger_name) + print_log(f'initialize {module_name} with init_cfg {self.init_cfg}', logger=logger_name) initialize(self, self.init_cfg) if isinstance(self.init_cfg, dict): # prevent the parameters of @@ -117,15 +120,12 @@ def init_weights(self): m.init_weights() # users may overload the `init_weights` update_init_info( - m, - init_info=f'Initialized by ' - f'user-defined `init_weights`' - f' in {m.__class__.__name__} ') + m, init_info=f'Initialized by ' f'user-defined `init_weights`' f' in {m.__class__.__name__} ' + ) self._is_init = True else: - warnings.warn(f'init_weights of {self.__class__.__name__} has ' - f'been called more than once.') + warnings.warn(f'init_weights of {self.__class__.__name__} has ' f'been called more than once.') if is_top_level_module: self._dump_init_info(logger_name) @@ -148,20 +148,19 @@ def _dump_init_info(self, logger_name): # dump the information to the logger file if there is a `FileHandler` for handler in logger.handlers: if isinstance(handler, FileHandler): - handler.stream.write( - 'Name of parameter - Initialization information\n') + handler.stream.write('Name of parameter - Initialization information\n') for name, param in self.named_parameters(): handler.stream.write( - f'\n{name} - {param.shape}: ' - f"\n{self._params_init_info[param]['init_info']} \n") + f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n" + ) handler.stream.flush() with_file_handler = True if not with_file_handler: for name, param in self.named_parameters(): print_log( - f'\n{name} - {param.shape}: ' - f"\n{self._params_init_info[param]['init_info']} \n ", - logger=logger_name) + f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n ", + logger=logger_name, + ) def __repr__(self): s = super().__repr__() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py index a8671e7ae844..92948a64963c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py @@ -48,21 +48,23 @@ class BaseRunner(metaclass=ABCMeta): max_iters (int, optional): Total training iterations. """ - def __init__(self, - model, - batch_processor=None, - optimizer=None, - work_dir=None, - logger=None, - meta=None, - max_iters=None, - max_epochs=None): + def __init__( + self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None, + ): if batch_processor is not None: if not callable(batch_processor): - raise TypeError('batch_processor must be callable, ' - f'but got {type(batch_processor)}') - warnings.warn('batch_processor is deprecated, please implement ' - 'train_step() and val_step() in the model instead.') + raise TypeError('batch_processor must be callable, ' f'but got {type(batch_processor)}') + warnings.warn( + 'batch_processor is deprecated, please implement ' 'train_step() and val_step() in the model instead.' + ) # raise an error is `batch_processor` is not None and # `model.train_step()` exists. if is_module_wrapper(model): @@ -71,8 +73,8 @@ def __init__(self, _model = model if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): raise RuntimeError( - 'batch_processor and model.train_step()/model.val_step() ' - 'cannot be both available.') + 'batch_processor and model.train_step()/model.val_step() ' 'cannot be both available.' + ) else: assert hasattr(model, 'train_step') @@ -82,21 +84,20 @@ def __init__(self, if not isinstance(optim, Optimizer): raise TypeError( f'optimizer must be a dict of torch.optim.Optimizers, ' - f'but optimizer["{name}"] is a {type(optim)}') + f'but optimizer["{name}"] is a {type(optim)}' + ) elif not isinstance(optimizer, Optimizer) and optimizer is not None: raise TypeError( - f'optimizer must be a torch.optim.Optimizer object ' - f'or dict or None, but got {type(optimizer)}') + f'optimizer must be a torch.optim.Optimizer object ' f'or dict or None, but got {type(optimizer)}' + ) # check the type of `logger` if not isinstance(logger, logging.Logger): - raise TypeError(f'logger must be a logging.Logger object, ' - f'but got {type(logger)}') + raise TypeError(f'logger must be a logging.Logger object, ' f'but got {type(logger)}') # check the type of `meta` if meta is not None and not isinstance(meta, dict): - raise TypeError( - f'meta must be a dict or None, but got {type(meta)}') + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') self.model = model self.batch_processor = batch_processor @@ -127,8 +128,7 @@ def __init__(self, self._inner_iter = 0 if max_epochs is not None and max_iters is not None: - raise ValueError( - 'Only one of `max_epochs` or `max_iters` can be set.') + raise ValueError('Only one of `max_epochs` or `max_iters` can be set.') self._max_epochs = max_epochs self._max_iters = max_iters @@ -194,12 +194,7 @@ def run(self, data_loaders, workflow, **kwargs): pass @abstractmethod - def save_checkpoint(self, - out_dir, - filename_tmpl, - save_optimizer=True, - meta=None, - create_symlink=True): + def save_checkpoint(self, out_dir, filename_tmpl, save_optimizer=True, meta=None, create_symlink=True): pass def current_lr(self): @@ -217,8 +212,7 @@ def current_lr(self): for name, optim in self.optimizer.items(): lr[name] = [group['lr'] for group in optim.param_groups] else: - raise RuntimeError( - 'lr is not applicable because optimizer does not exist.') + raise RuntimeError('lr is not applicable because optimizer does not exist.') return lr def current_momentum(self): @@ -242,8 +236,7 @@ def _get_momentum(optimizer): return momentums if self.optimizer is None: - raise RuntimeError( - 'momentum is not applicable because optimizer does not exist.') + raise RuntimeError('momentum is not applicable because optimizer does not exist.') elif isinstance(self.optimizer, torch.optim.Optimizer): momentums = _get_momentum(self.optimizer) elif isinstance(self.optimizer, dict): @@ -329,34 +322,20 @@ def get_hook_info(self): stage_hook_infos.append(info) return '\n'.join(stage_hook_infos) - def load_checkpoint(self, - filename, - map_location='cpu', - strict=False, - revise_keys=[(r'^module.', '')]): - return load_checkpoint( - self.model, - filename, - map_location, - strict, - self.logger, - revise_keys=revise_keys) - - def resume(self, - checkpoint, - resume_optimizer=True, - map_location='default'): + def load_checkpoint(self, filename, map_location='cpu', strict=False, revise_keys=[(r'^module.', '')]): + return load_checkpoint(self.model, filename, map_location, strict, self.logger, revise_keys=revise_keys) + + def resume(self, checkpoint, resume_optimizer=True, map_location='default'): if map_location == 'default': if torch.cuda.is_available(): device_id = torch.cuda.current_device() checkpoint = self.load_checkpoint( - checkpoint, - map_location=lambda storage, loc: storage.cuda(device_id)) + checkpoint, map_location=lambda storage, loc: storage.cuda(device_id) + ) else: checkpoint = self.load_checkpoint(checkpoint) else: - checkpoint = self.load_checkpoint( - checkpoint, map_location=map_location) + checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) self._epoch = checkpoint['meta']['epoch'] self._iter = checkpoint['meta']['iter'] @@ -369,15 +348,11 @@ def resume(self, # Re-calculate the number of iterations when resuming # models with different number of GPUs if 'config' in checkpoint['meta']: - config = mmcv.Config.fromstring( - checkpoint['meta']['config'], file_format='.py') + config = mmcv.Config.fromstring(checkpoint['meta']['config'], file_format='.py') previous_gpu_ids = config.get('gpu_ids', None) - if previous_gpu_ids and len(previous_gpu_ids) > 0 and len( - previous_gpu_ids) != self.world_size: - self._iter = int(self._iter * len(previous_gpu_ids) / - self.world_size) - self.logger.info('the iteration number is changed due to ' - 'change of GPU number') + if previous_gpu_ids and len(previous_gpu_ids) > 0 and len(previous_gpu_ids) != self.world_size: + self._iter = int(self._iter * len(previous_gpu_ids) / self.world_size) + self.logger.info('the iteration number is changed due to ' 'change of GPU number') # resume meta information meta self.meta = checkpoint['meta'] @@ -387,12 +362,9 @@ def resume(self, self.optimizer.load_state_dict(checkpoint['optimizer']) elif isinstance(self.optimizer, dict): for k in self.optimizer.keys(): - self.optimizer[k].load_state_dict( - checkpoint['optimizer'][k]) + self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) else: - raise TypeError( - 'Optimizer should be dict or torch.optim.Optimizer ' - f'but got {type(self.optimizer)}') + raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) @@ -463,8 +435,7 @@ def register_logger_hooks(self, log_config): return log_interval = log_config['interval'] for info in log_config['hooks']: - logger_hook = mmcv.build_from_cfg( - info, HOOKS, default_args=dict(interval=log_interval)) + logger_hook = mmcv.build_from_cfg(info, HOOKS, default_args=dict(interval=log_interval)) self.register_hook(logger_hook, priority='VERY_LOW') def register_timer_hook(self, timer_config): @@ -500,14 +471,16 @@ def register_profiler_hook(self, profiler_config): hook = profiler_config self.register_hook(hook) - def register_training_hooks(self, - lr_config, - optimizer_config=None, - checkpoint_config=None, - log_config=None, - momentum_config=None, - timer_config=dict(type='IterTimerHook'), - custom_hooks_config=None): + def register_training_hooks( + self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + timer_config=dict(type='IterTimerHook'), + custom_hooks_config=None, + ): """Register default and custom hooks for training. Default and custom hooks include: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py index 77c96ba0b2f3..aaebf844ced3 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py @@ -13,12 +13,9 @@ def build_runner_constructor(cfg): def build_runner(cfg, default_args=None): runner_cfg = copy.deepcopy(cfg) - constructor_type = runner_cfg.pop('constructor', - 'DefaultRunnerConstructor') + constructor_type = runner_cfg.pop('constructor', 'DefaultRunnerConstructor') runner_constructor = build_runner_constructor( - dict( - type=constructor_type, - runner_cfg=runner_cfg, - default_args=default_args)) + dict(type=constructor_type, runner_cfg=runner_cfg, default_args=default_args) + ) runner = runner_constructor() return runner diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py index 28621e8a2863..479da4f51b26 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py @@ -16,6 +16,7 @@ from torch.utils import model_zoo import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + from ..fileio import FileClient from ..fileio import load as load_file from ..parallel import is_module_wrapper @@ -29,10 +30,8 @@ def _get_mmcv_home(): mmcv_home = os.path.expanduser( - os.getenv( - ENV_MMCV_HOME, - os.path.join( - os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) + ) mkdir_or_exist(mmcv_home) return mmcv_home @@ -69,11 +68,10 @@ def load(module, prefix=''): # complicated structure, e.g., nn.Module(nn.Module(DDP)) if is_module_wrapper(module): module = module.module - local_metadata = {} if metadata is None else metadata.get( - prefix[:-1], {}) - module._load_from_state_dict(state_dict, prefix, local_metadata, True, - all_missing_keys, unexpected_keys, - err_msg) + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg + ) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') @@ -82,21 +80,16 @@ def load(module, prefix=''): load = None # break load->load reference cycle # ignore "num_batches_tracked" of BN layers - missing_keys = [ - key for key in all_missing_keys if 'num_batches_tracked' not in key - ] + missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] if unexpected_keys: - err_msg.append('unexpected key in source ' - f'state_dict: {", ".join(unexpected_keys)}\n') + err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') if missing_keys: - err_msg.append( - f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') rank, _ = get_dist_info() if len(err_msg) > 0 and rank == 0: - err_msg.insert( - 0, 'The model and loaded state dict do not match exactly\n') + err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') err_msg = '\n'.join(err_msg) if strict: raise RuntimeError(err_msg) @@ -140,8 +133,7 @@ def get_mmcls_models(): def get_deprecated_model_names(): - deprecate_json_path = osp.join(mmcv.__path__[0], - 'model_zoo/deprecated.json') + deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') deprecate_urls = load_file(deprecate_json_path) assert isinstance(deprecate_urls, dict) @@ -176,10 +168,10 @@ def _register_scheme(cls, prefixes, loader, force=False): else: raise KeyError( f'{prefix} is already registered as a loader backend, ' - 'add "force=True" if you want to override it') + 'add "force=True" if you want to override it' + ) # sort, longer prefixes take priority - cls._schemes = OrderedDict( - sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) + cls._schemes = OrderedDict(sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) @classmethod def register_scheme(cls, prefixes, loader=None, force=False): @@ -240,8 +232,7 @@ def load_checkpoint(cls, filename, map_location=None, logger=None): checkpoint_loader = cls._get_checkpoint_loader(filename) class_name = checkpoint_loader.__name__ - mmcv.print_log( - f'load checkpoint from {class_name[10:]} path: {filename}', logger) + mmcv.print_log(f'load checkpoint from {class_name[10:]} path: {filename}', logger) return checkpoint_loader(filename, map_location) @@ -281,13 +272,11 @@ def load_from_http(filename, map_location=None, model_dir=None): rank, world_size = get_dist_info() rank = int(os.environ.get('LOCAL_RANK', rank)) if rank == 0: - checkpoint = model_zoo.load_url( - filename, model_dir=model_dir, map_location=map_location) + checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) if world_size > 1: torch.distributed.barrier() if rank > 0: - checkpoint = model_zoo.load_url( - filename, model_dir=model_dir, map_location=map_location) + checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) return checkpoint @@ -305,15 +294,13 @@ def load_from_pavi(filename, map_location=None): Returns: dict or OrderedDict: The loaded checkpoint. """ - assert filename.startswith('pavi://'), \ - f'Expected filename startswith `pavi://`, but get {filename}' + assert filename.startswith('pavi://'), f'Expected filename startswith `pavi://`, but get {filename}' model_path = filename[7:] try: from pavi import modelcloud except ImportError: - raise ImportError( - 'Please install pavi to load checkpoint from modelcloud.') + raise ImportError('Please install pavi to load checkpoint from modelcloud.') model = modelcloud.get(model_path) with TemporaryDirectory() as tmp_dir: @@ -347,8 +334,7 @@ def load_from_ceph(filename, map_location=None, backend='petrel'): raise ValueError(f'Load from Backend {backend} is not supported.') if backend == 'ceph': - warnings.warn( - 'CephBackend will be deprecated, please use PetrelBackend instead') + warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') # CephClient and PetrelBackend have the same prefix 's3://' and the latter # will be chosen as default. If PetrelBackend can not be instantiated @@ -379,8 +365,7 @@ def load_from_torchvision(filename, map_location=None): """ model_urls = get_torchvision_models() if filename.startswith('modelzoo://'): - warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' - 'use "torchvision://" instead') + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') model_name = filename[11:] else: model_name = filename[14:] @@ -412,8 +397,9 @@ def load_from_openmmlab(filename, map_location=None): deprecated_urls = get_deprecated_model_names() if model_name in deprecated_urls: - warnings.warn(f'{prefix_str}{model_name} is deprecated in favor ' - f'of {prefix_str}{deprecated_urls[model_name]}') + warnings.warn( + f'{prefix_str}{model_name} is deprecated in favor ' f'of {prefix_str}{deprecated_urls[model_name]}' + ) model_name = deprecated_urls[model_name] model_url = model_urls[model_name] # check if is url @@ -441,8 +427,7 @@ def load_from_mmcls(filename, map_location=None): model_urls = get_mmcls_models() model_name = filename[8:] - checkpoint = load_from_http( - model_urls[model_name], map_location=map_location) + checkpoint = load_from_http(model_urls[model_name], map_location=map_location) checkpoint = _process_mmcls_checkpoint(checkpoint) return checkpoint @@ -491,21 +476,13 @@ def _load_checkpoint_with_prefix(prefix, filename, map_location=None): prefix += '.' prefix_len = len(prefix) - state_dict = { - k[prefix_len:]: v - for k, v in state_dict.items() if k.startswith(prefix) - } + state_dict = {k[prefix_len:]: v for k, v in state_dict.items() if k.startswith(prefix)} assert state_dict, f'{prefix} is not in the pretrained model' return state_dict -def load_checkpoint(model, - filename, - map_location=None, - strict=False, - logger=None, - revise_keys=[(r'^module\.', '')]): +def load_checkpoint(model, filename, map_location=None, strict=False, logger=None, revise_keys=[(r'^module\.', '')]): """Load checkpoint from a file or URI. Args: @@ -528,8 +505,7 @@ def load_checkpoint(model, checkpoint = _load_checkpoint(filename, map_location, logger) # OrderedDict is a subclass of dict if not isinstance(checkpoint, dict): - raise RuntimeError( - f'No state_dict found in checkpoint file {filename}') + raise RuntimeError(f'No state_dict found in checkpoint file {filename}') # get state_dict from checkpoint if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] @@ -539,9 +515,7 @@ def load_checkpoint(model, # strip prefix of state_dict metadata = getattr(state_dict, '_metadata', OrderedDict()) for p, r in revise_keys: - state_dict = OrderedDict( - {re.sub(p, r, k): v - for k, v in state_dict.items()}) + state_dict = OrderedDict({re.sub(p, r, k): v for k, v in state_dict.items()}) # Keep metadata in state_dict state_dict._metadata = metadata @@ -617,13 +591,11 @@ def get_state_dict(module, destination=None, prefix='', keep_vars=False): if destination is None: destination = OrderedDict() destination._metadata = OrderedDict() - destination._metadata[prefix[:-1]] = local_metadata = dict( - version=module._version) + destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) _save_to_state_dict(module, destination, prefix, keep_vars) for name, child in module._modules.items(): if child is not None: - get_state_dict( - child, destination, prefix + name + '.', keep_vars=keep_vars) + get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) for hook in module._state_dict_hooks.values(): hook_result = hook(module, destination, prefix, local_metadata) if hook_result is not None: @@ -631,11 +603,7 @@ def get_state_dict(module, destination=None, prefix='', keep_vars=False): return destination -def save_checkpoint(model, - filename, - optimizer=None, - meta=None, - file_client_args=None): +def save_checkpoint(model, filename, optimizer=None, meta=None, file_client_args=None): """Save checkpoint to file. The checkpoint will have 3 fields: ``meta``, ``state_dict`` and @@ -664,10 +632,7 @@ def save_checkpoint(model, # save class name to the meta meta.update(CLASSES=model.CLASSES) - checkpoint = { - 'meta': meta, - 'state_dict': weights_to_cpu(get_state_dict(model)) - } + checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} # save optimizer state dict in the checkpoint if isinstance(optimizer, Optimizer): checkpoint['optimizer'] = optimizer.state_dict() @@ -679,14 +644,12 @@ def save_checkpoint(model, if filename.startswith('pavi://'): if file_client_args is not None: raise ValueError( - 'file_client_args should be "None" if filename starts with' - f'"pavi://", but got {file_client_args}') + 'file_client_args should be "None" if filename starts with' f'"pavi://", but got {file_client_args}' + ) try: - from pavi import modelcloud - from pavi import exception + from pavi import exception, modelcloud except ImportError: - raise ImportError( - 'Please install pavi to load checkpoint from modelcloud.') + raise ImportError('Please install pavi to load checkpoint from modelcloud.') model_path = filename[7:] root = modelcloud.Folder() model_dir, model_name = osp.split(model_path) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py index 7db4c3922229..c840d803f743 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py @@ -35,8 +35,7 @@ class DefaultRunnerConstructor: def __init__(self, runner_cfg, default_args=None): if not isinstance(runner_cfg, dict): - raise TypeError('runner_cfg should be a dict', - f'but got {type(runner_cfg)}') + raise TypeError('runner_cfg should be a dict', f'but got {type(runner_cfg)}') self.runner_cfg = runner_cfg self.default_args = default_args diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py index d3a1ef3fda5c..19799b785be9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py @@ -7,8 +7,7 @@ import torch import torch.multiprocessing as mp from torch import distributed as dist -from torch._utils import (_flatten_dense_tensors, _take_tensors, - _unflatten_dense_tensors) +from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors def init_dist(launcher, backend='nccl', **kwargs): @@ -56,8 +55,7 @@ def _init_dist_slurm(backend, port=None): node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) - addr = subprocess.getoutput( - f'scontrol show hostname {node_list} | head -n1') + addr = subprocess.getoutput(f'scontrol show hostname {node_list} | head -n1') # specify master port if port is not None: os.environ['MASTER_PORT'] = str(port) @@ -86,7 +84,6 @@ def get_dist_info(): def master_only(func): - @functools.wraps(func) def wrapper(*args, **kwargs): rank, _ = get_dist_info() @@ -128,10 +125,7 @@ def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): bucket_size_mb (int, optional): Size of bucket, the unit is MB. Defaults to -1. """ - grads = [ - param.grad.data for param in params - if param.requires_grad and param.grad is not None - ] + grads = [param.grad.data for param in params if param.requires_grad and param.grad is not None] _, world_size = get_dist_info() if world_size == 1: return @@ -159,6 +153,5 @@ def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): flat_tensors = _flatten_dense_tensors(bucket) dist.all_reduce(flat_tensors) flat_tensors.div_(world_size) - for tensor, synced in zip( - bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + for tensor, synced in zip(bucket, _unflatten_dense_tensors(flat_tensors, bucket)): tensor.copy_(synced) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py index 078e159d29fc..ba7a97fa0241 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py @@ -23,16 +23,13 @@ class EpochBasedRunner(BaseRunner): def run_iter(self, data_batch, train_mode, **kwargs): if self.batch_processor is not None: - outputs = self.batch_processor( - self.model, data_batch, train_mode=train_mode, **kwargs) + outputs = self.batch_processor(self.model, data_batch, train_mode=train_mode, **kwargs) elif train_mode: - outputs = self.model.train_step(data_batch, self.optimizer, - **kwargs) + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) else: outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) if not isinstance(outputs, dict): - raise TypeError('"batch_processor()" or "model.train_step()"' - 'and "model.val_step()" must return a dict') + raise TypeError('"batch_processor()" or "model.train_step()"' 'and "model.val_step()" must return a dict') if 'log_vars' in outputs: self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) self.outputs = outputs @@ -85,12 +82,12 @@ def run(self, data_loaders, workflow, max_epochs=None, **kwargs): assert len(data_loaders) == len(workflow) if max_epochs is not None: warnings.warn( - 'setting max_epochs in run is deprecated, ' - 'please set max_epochs in runner_config', DeprecationWarning) + 'setting max_epochs in run is deprecated, ' 'please set max_epochs in runner_config', + DeprecationWarning, + ) self._max_epochs = max_epochs - assert self._max_epochs is not None, ( - 'max_epochs must be specified during instantiation') + assert self._max_epochs is not None, 'max_epochs must be specified during instantiation' for i, flow in enumerate(workflow): mode, epochs = flow @@ -99,12 +96,9 @@ def run(self, data_loaders, workflow, max_epochs=None, **kwargs): break work_dir = self.work_dir if self.work_dir is not None else 'NONE' - self.logger.info('Start running, host: %s, work_dir: %s', - get_host_info(), work_dir) - self.logger.info('Hooks will be executed in the following order:\n%s', - self.get_hook_info()) - self.logger.info('workflow: %s, max: %d epochs', workflow, - self._max_epochs) + self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) + self.logger.info('workflow: %s, max: %d epochs', workflow, self._max_epochs) self.call_hook('before_run') while self.epoch < self._max_epochs: @@ -112,14 +106,10 @@ def run(self, data_loaders, workflow, max_epochs=None, **kwargs): mode, epochs = flow if isinstance(mode, str): # self.train() if not hasattr(self, mode): - raise ValueError( - f'runner has no method named "{mode}" to run an ' - 'epoch') + raise ValueError(f'runner has no method named "{mode}" to run an ' 'epoch') epoch_runner = getattr(self, mode) else: - raise TypeError( - 'mode in workflow must be a str, but got {}'.format( - type(mode))) + raise TypeError('mode in workflow must be a str, but got {}'.format(type(mode))) for _ in range(epochs): if mode == 'train' and self.epoch >= self._max_epochs: @@ -129,12 +119,9 @@ def run(self, data_loaders, workflow, max_epochs=None, **kwargs): time.sleep(1) # wait for some hooks like loggers to finish self.call_hook('after_run') - def save_checkpoint(self, - out_dir, - filename_tmpl='epoch_{}.pth', - save_optimizer=True, - meta=None, - create_symlink=True): + def save_checkpoint( + self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True + ): """Save the checkpoint. Args: @@ -153,8 +140,7 @@ def save_checkpoint(self, if meta is None: meta = {} elif not isinstance(meta, dict): - raise TypeError( - f'meta should be a dict or None, but got {type(meta)}') + raise TypeError(f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) # Note: meta.update(self.meta) should be done before @@ -182,6 +168,5 @@ class Runner(EpochBasedRunner): """Deprecated name of EpochBasedRunner.""" def __init__(self, *args, **kwargs): - warnings.warn( - 'Runner was deprecated, please use EpochBasedRunner instead') + warnings.warn('Runner was deprecated, please use EpochBasedRunner instead') super().__init__(*args, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py index e5067eebb7e4..e205ab42af90 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py @@ -41,13 +41,9 @@ def cast_tensor_type(inputs, src_type, dst_type): elif isinstance(inputs, np.ndarray): return inputs elif isinstance(inputs, abc.Mapping): - return type(inputs)({ - k: cast_tensor_type(v, src_type, dst_type) - for k, v in inputs.items() - }) + return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()}) elif isinstance(inputs, abc.Iterable): - return type(inputs)( - cast_tensor_type(item, src_type, dst_type) for item in inputs) + return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs) else: return inputs @@ -86,14 +82,12 @@ def auto_fp16(apply_to=None, out_fp32=False): """ def auto_fp16_wrapper(old_func): - @functools.wraps(old_func) def new_func(*args, **kwargs): # check if the module has set the attribute `fp16_enabled`, if not, # just fallback to the original method. if not isinstance(args[0], torch.nn.Module): - raise TypeError('@auto_fp16 can only be used to decorate the ' - 'method of nn.Module') + raise TypeError('@auto_fp16 can only be used to decorate the ' 'method of nn.Module') if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): return old_func(*args, **kwargs) @@ -105,11 +99,10 @@ def new_func(*args, **kwargs): new_args = [] # NOTE: default args are not taken into consideration if args: - arg_names = args_info.args[:len(args)] + arg_names = args_info.args[: len(args)] for i, arg_name in enumerate(arg_names): if arg_name in args_to_cast: - new_args.append( - cast_tensor_type(args[i], torch.float, torch.half)) + new_args.append(cast_tensor_type(args[i], torch.float, torch.half)) else: new_args.append(args[i]) # convert the kwargs that need to be processed @@ -117,13 +110,11 @@ def new_func(*args, **kwargs): if kwargs: for arg_name, arg_value in kwargs.items(): if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type( - arg_value, torch.float, torch.half) + new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.float, torch.half) else: new_kwargs[arg_name] = arg_value # apply converted arguments to the decorated method - if (TORCH_VERSION != 'parrots' and - digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): with autocast(enabled=True): output = old_func(*new_args, **new_kwargs) else: @@ -174,14 +165,12 @@ def force_fp32(apply_to=None, out_fp16=False): """ def force_fp32_wrapper(old_func): - @functools.wraps(old_func) def new_func(*args, **kwargs): # check if the module has set the attribute `fp16_enabled`, if not, # just fallback to the original method. if not isinstance(args[0], torch.nn.Module): - raise TypeError('@force_fp32 can only be used to decorate the ' - 'method of nn.Module') + raise TypeError('@force_fp32 can only be used to decorate the ' 'method of nn.Module') if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): return old_func(*args, **kwargs) # get the arg spec of the decorated method @@ -191,11 +180,10 @@ def new_func(*args, **kwargs): # convert the args that need to be processed new_args = [] if args: - arg_names = args_info.args[:len(args)] + arg_names = args_info.args[: len(args)] for i, arg_name in enumerate(arg_names): if arg_name in args_to_cast: - new_args.append( - cast_tensor_type(args[i], torch.half, torch.float)) + new_args.append(cast_tensor_type(args[i], torch.half, torch.float)) else: new_args.append(args[i]) # convert the kwargs that need to be processed @@ -203,13 +191,11 @@ def new_func(*args, **kwargs): if kwargs: for arg_name, arg_value in kwargs.items(): if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type( - arg_value, torch.half, torch.float) + new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float) else: new_kwargs[arg_name] = arg_value # apply converted arguments to the decorated method - if (TORCH_VERSION != 'parrots' and - digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): with autocast(enabled=False): output = old_func(*new_args, **new_kwargs) else: @@ -227,7 +213,8 @@ def new_func(*args, **kwargs): def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): warnings.warning( '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' - 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads' + ) _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) @@ -248,8 +235,7 @@ def wrap_fp16_model(model): Args: model (nn.Module): Model in FP32. """ - if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.6.0')): + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.6.0'): # convert model to fp16 model.half() # patch the normalization layers to make it work in fp32 mode @@ -273,8 +259,7 @@ def patch_norm_fp32(module): if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): module.float() if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': - module.forward = patch_forward_method(module.forward, torch.half, - torch.float) + module.forward = patch_forward_method(module.forward, torch.half, torch.float) for child in module.children(): patch_norm_fp32(child) return module @@ -294,8 +279,7 @@ def patch_forward_method(func, src_type, dst_type, convert_output=True): """ def new_forward(*args, **kwargs): - output = func(*cast_tensor_type(args, src_type, dst_type), - **cast_tensor_type(kwargs, src_type, dst_type)) + output = func(*cast_tensor_type(args, src_type, dst_type), **cast_tensor_type(kwargs, src_type, dst_type)) if convert_output: output = cast_tensor_type(output, dst_type, src_type) return output @@ -332,15 +316,10 @@ class LossScaler: overflow to wait before increasing the loss scale. Default: 1000. """ - def __init__(self, - init_scale=2**32, - mode='dynamic', - scale_factor=2., - scale_window=1000): + def __init__(self, init_scale=2 ** 32, mode='dynamic', scale_factor=2.0, scale_window=1000): self.cur_scale = init_scale self.cur_iter = 0 - assert mode in ('dynamic', - 'static'), 'mode can only be dynamic or static' + assert mode in ('dynamic', 'static'), 'mode can only be dynamic or static' self.mode = mode self.last_overflow_iter = -1 self.scale_factor = scale_factor @@ -364,8 +343,7 @@ def _has_inf_or_nan(x): raise return True else: - if cpu_sum == float('inf') or cpu_sum == -float('inf') \ - or cpu_sum != cpu_sum: + if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum: return True return False @@ -377,8 +355,7 @@ def update_scale(self, overflow): self.cur_scale = max(self.cur_scale / self.scale_factor, 1) self.last_overflow_iter = self.cur_iter else: - if (self.cur_iter - self.last_overflow_iter) % \ - self.scale_window == 0: + if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0: self.cur_scale *= self.scale_factor self.cur_iter += 1 @@ -390,7 +367,8 @@ def state_dict(self): mode=self.mode, last_overflow_iter=self.last_overflow_iter, scale_factor=self.scale_factor, - scale_window=self.scale_window) + scale_window=self.scale_window, + ) def load_state_dict(self, state_dict): """Loads the loss_scaler state dict. diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py index 915af28cefab..6b1b86fba36e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py @@ -5,25 +5,54 @@ from .evaluation import DistEvalHook, EvalHook from .hook import HOOKS, Hook from .iter_timer import IterTimerHook -from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, - NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, - TextLoggerHook, WandbLoggerHook) +from .logger import ( + DvcliveLoggerHook, + LoggerHook, + MlflowLoggerHook, + NeptuneLoggerHook, + PaviLoggerHook, + TensorboardLoggerHook, + TextLoggerHook, + WandbLoggerHook, +) from .lr_updater import LrUpdaterHook from .memory import EmptyCacheHook from .momentum_updater import MomentumUpdaterHook -from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, - GradientCumulativeOptimizerHook, OptimizerHook) +from .optimizer import ( + Fp16OptimizerHook, + GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, + OptimizerHook, +) from .profiler import ProfilerHook from .sampler_seed import DistSamplerSeedHook from .sync_buffer import SyncBuffersHook __all__ = [ - 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', - 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', - 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', - 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', - 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', - 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', - 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', - 'GradientCumulativeFp16OptimizerHook' + 'HOOKS', + 'Hook', + 'CheckpointHook', + 'ClosureHook', + 'LrUpdaterHook', + 'OptimizerHook', + 'Fp16OptimizerHook', + 'IterTimerHook', + 'DistSamplerSeedHook', + 'EmptyCacheHook', + 'LoggerHook', + 'MlflowLoggerHook', + 'PaviLoggerHook', + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'NeptuneLoggerHook', + 'WandbLoggerHook', + 'DvcliveLoggerHook', + 'MomentumUpdaterHook', + 'SyncBuffersHook', + 'EMAHook', + 'EvalHook', + 'DistEvalHook', + 'ProfilerHook', + 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py index 400e589a9de9..24381d6876ef 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py @@ -48,16 +48,18 @@ class CheckpointHook(Hook): path will be "/path/of/A/B". """ - def __init__(self, - interval=-1, - by_epoch=True, - save_optimizer=True, - out_dir=None, - max_keep_ckpts=-1, - save_last=True, - sync_buffer=False, - file_client_args=None, - **kwargs): + def __init__( + self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + save_last=True, + sync_buffer=False, + file_client_args=None, + **kwargs, + ): self.interval = interval self.by_epoch = by_epoch self.save_optimizer = save_optimizer @@ -72,8 +74,7 @@ def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir - self.file_client = FileClient.infer_client(self.file_client_args, - self.out_dir) + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the @@ -83,19 +84,20 @@ def before_run(self, runner): basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) - runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' - f'{self.file_client.name}.')) + runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' f'{self.file_client.name}.')) # disable the create_symlink option because some file backends do not # allow to create a symlink if 'create_symlink' in self.args: - if self.args[ - 'create_symlink'] and not self.file_client.allow_symlink: + if self.args['create_symlink'] and not self.file_client.allow_symlink: self.args['create_symlink'] = False warnings.warn( - ('create_symlink is set as True by the user but is changed' - 'to be False because creating symbolic link is not ' - f'allowed in {self.file_client.name}')) + ( + 'create_symlink is set as True by the user but is changed' + 'to be False because creating symbolic link is not ' + f'allowed in {self.file_client.name}' + ) + ) else: self.args['create_symlink'] = self.file_client.allow_symlink @@ -106,11 +108,8 @@ def after_train_epoch(self, runner): # save checkpoint for following cases: # 1. every ``self.interval`` epochs # 2. reach the last epoch of training - if self.every_n_epochs( - runner, self.interval) or (self.save_last - and self.is_last_epoch(runner)): - runner.logger.info( - f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.every_n_epochs(runner, self.interval) or (self.save_last and self.is_last_epoch(runner)): + runner.logger.info(f'Saving checkpoint at {runner.epoch + 1} epochs') if self.sync_buffer: allreduce_params(runner.model.buffers()) self._save_checkpoint(runner) @@ -118,18 +117,14 @@ def after_train_epoch(self, runner): @master_only def _save_checkpoint(self, runner): """Save the current checkpoint and delete unwanted checkpoint.""" - runner.save_checkpoint( - self.out_dir, save_optimizer=self.save_optimizer, **self.args) + runner.save_checkpoint(self.out_dir, save_optimizer=self.save_optimizer, **self.args) if runner.meta is not None: if self.by_epoch: - cur_ckpt_filename = self.args.get( - 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + cur_ckpt_filename = self.args.get('filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) else: - cur_ckpt_filename = self.args.get( - 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + cur_ckpt_filename = self.args.get('filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) runner.meta.setdefault('hook_msgs', dict()) - runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path( - self.out_dir, cur_ckpt_filename) + runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path(self.out_dir, cur_ckpt_filename) # remove other checkpoints if self.max_keep_ckpts > 0: if self.by_epoch: @@ -138,13 +133,10 @@ def _save_checkpoint(self, runner): else: name = 'iter_{}.pth' current_ckpt = runner.iter + 1 - redundant_ckpts = range( - current_ckpt - self.max_keep_ckpts * self.interval, 0, - -self.interval) + redundant_ckpts = range(current_ckpt - self.max_keep_ckpts * self.interval, 0, -self.interval) filename_tmpl = self.args.get('filename_tmpl', name) for _step in redundant_ckpts: - ckpt_path = self.file_client.join_path( - self.out_dir, filename_tmpl.format(_step)) + ckpt_path = self.file_client.join_path(self.out_dir, filename_tmpl.format(_step)) if self.file_client.isfile(ckpt_path): self.file_client.remove(ckpt_path) else: @@ -157,11 +149,8 @@ def after_train_iter(self, runner): # save checkpoint for following cases: # 1. every ``self.interval`` iterations # 2. reach the last iteration of training - if self.every_n_iters( - runner, self.interval) or (self.save_last - and self.is_last_iter(runner)): - runner.logger.info( - f'Saving checkpoint at {runner.iter + 1} iterations') + if self.every_n_iters(runner, self.interval) or (self.save_last and self.is_last_iter(runner)): + runner.logger.info(f'Saving checkpoint at {runner.iter + 1} iterations') if self.sync_buffer: allreduce_params(runner.model.buffers()) self._save_checkpoint(runner) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py index b955f81f425b..0781664b46a3 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py @@ -4,7 +4,6 @@ @HOOKS.register_module() class ClosureHook(Hook): - def __init__(self, fn_name, fn): assert hasattr(self, fn_name) assert callable(fn) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py index 15c7e68088f0..8114b106bf3c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py @@ -26,16 +26,12 @@ class EMAHook(Hook): resume_from (str): The checkpoint path. Defaults to None. """ - def __init__(self, - momentum=0.0002, - interval=1, - warm_up=100, - resume_from=None): + def __init__(self, momentum=0.0002, interval=1, warm_up=100, resume_from=None): assert isinstance(interval, int) and interval > 0 self.warm_up = warm_up self.interval = interval assert momentum > 0 and momentum < 1 - self.momentum = momentum**interval + self.momentum = momentum ** interval self.checkpoint = resume_from def before_run(self, runner): @@ -61,8 +57,7 @@ def after_train_iter(self, runner): """Update ema parameter every self.interval iterations.""" curr_step = runner.iter # We warm up the momentum considering the instability at beginning - momentum = min(self.momentum, - (1 + curr_step) / (self.warm_up + curr_step)) + momentum = min(self.momentum, (1 + curr_step) / (self.warm_up + curr_step)) if curr_step % self.interval != 0: return for name, parameter in self.model_parameters.items(): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py index 5e5ba5b2e5d2..1431bb39a665 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py @@ -76,53 +76,50 @@ class EvalHook(Hook): rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} init_value_map = {'greater': -inf, 'less': inf} - _default_greater_keys = [ - 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', - 'mAcc', 'aAcc' - ] + _default_greater_keys = ['acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', 'mAcc', 'aAcc'] _default_less_keys = ['loss'] - def __init__(self, - dataloader, - start=None, - interval=1, - by_epoch=True, - save_best=None, - rule=None, - test_fn=None, - greater_keys=None, - less_keys=None, - out_dir=None, - file_client_args=None, - **eval_kwargs): + def __init__( + self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + out_dir=None, + file_client_args=None, + **eval_kwargs, + ): if not isinstance(dataloader, DataLoader): - raise TypeError(f'dataloader must be a pytorch DataLoader, ' - f'but got {type(dataloader)}') + raise TypeError(f'dataloader must be a pytorch DataLoader, ' f'but got {type(dataloader)}') if interval <= 0: - raise ValueError(f'interval must be a positive number, ' - f'but got {interval}') + raise ValueError(f'interval must be a positive number, ' f'but got {interval}') assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' if start is not None and start < 0: - raise ValueError(f'The evaluation start epoch {start} is smaller ' - f'than 0') + raise ValueError(f'The evaluation start epoch {start} is smaller ' f'than 0') self.dataloader = dataloader self.interval = interval self.start = start self.by_epoch = by_epoch - assert isinstance(save_best, str) or save_best is None, \ - '""save_best"" should be a str or None ' \ - f'rather than {type(save_best)}' + assert isinstance(save_best, str) or save_best is None, ( + '""save_best"" should be a str or None ' f'rather than {type(save_best)}' + ) self.save_best = save_best self.eval_kwargs = eval_kwargs self.initial_flag = True if test_fn is None: from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import single_gpu_test + self.test_fn = single_gpu_test else: self.test_fn = test_fn @@ -131,7 +128,7 @@ def __init__(self, self.greater_keys = self._default_greater_keys else: if not isinstance(greater_keys, (list, tuple)): - greater_keys = (greater_keys, ) + greater_keys = (greater_keys,) assert is_seq_of(greater_keys, str) self.greater_keys = greater_keys @@ -139,7 +136,7 @@ def __init__(self, self.less_keys = self._default_less_keys else: if not isinstance(less_keys, (list, tuple)): - less_keys = (less_keys, ) + less_keys = (less_keys,) assert is_seq_of(less_keys, str) self.less_keys = less_keys @@ -171,8 +168,7 @@ def _init_rule(self, rule, key_indicator): comparison rule. """ if rule not in self.rule_map and rule is not None: - raise KeyError(f'rule must be greater, less or None, ' - f'but got {rule}.') + raise KeyError(f'rule must be greater, less or None, ' f'but got {rule}.') if rule is None: if key_indicator != 'auto': @@ -191,9 +187,11 @@ def _init_rule(self, rule, key_indicator): elif any(key in key_indicator_lc for key in less_keys): rule = 'less' else: - raise ValueError(f'Cannot infer the rule for key ' - f'{key_indicator}, thus a specific rule ' - f'must be specified.') + raise ValueError( + f'Cannot infer the rule for key ' + f'{key_indicator}, thus a specific rule ' + f'must be specified.' + ) self.rule = rule self.key_indicator = key_indicator if self.rule is not None: @@ -203,8 +201,7 @@ def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir - self.file_client = FileClient.infer_client(self.file_client_args, - self.out_dir) + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the @@ -213,17 +210,14 @@ def before_run(self, runner): if self.out_dir != runner.work_dir: basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) - runner.logger.info( - (f'The best checkpoint will be saved to {self.out_dir} by ' - f'{self.file_client.name}')) + runner.logger.info((f'The best checkpoint will be saved to {self.out_dir} by ' f'{self.file_client.name}')) if self.save_best is not None: if runner.meta is None: warnings.warn('runner.meta is None. Creating an empty one.') runner.meta = dict() runner.meta.setdefault('hook_msgs', dict()) - self.best_ckpt_path = runner.meta['hook_msgs'].get( - 'best_ckpt', None) + self.best_ckpt_path = runner.meta['hook_msgs'].get('best_ckpt', None) def before_train_iter(self, runner): """Evaluate the model only at the start of training by iteration.""" @@ -325,31 +319,22 @@ def _save_ckpt(self, runner, key_score): current = f'iter_{runner.iter + 1}' cur_type, cur_time = 'iter', runner.iter + 1 - best_score = runner.meta['hook_msgs'].get( - 'best_score', self.init_value_map[self.rule]) + best_score = runner.meta['hook_msgs'].get('best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score - if self.best_ckpt_path and self.file_client.isfile( - self.best_ckpt_path): + if self.best_ckpt_path and self.file_client.isfile(self.best_ckpt_path): self.file_client.remove(self.best_ckpt_path) - runner.logger.info( - (f'The previous best checkpoint {self.best_ckpt_path} was ' - 'removed')) + runner.logger.info((f'The previous best checkpoint {self.best_ckpt_path} was ' 'removed')) best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' - self.best_ckpt_path = self.file_client.join_path( - self.out_dir, best_ckpt_name) + self.best_ckpt_path = self.file_client.join_path(self.out_dir, best_ckpt_name) runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path - runner.save_checkpoint( - self.out_dir, best_ckpt_name, create_symlink=False) - runner.logger.info( - f'Now best checkpoint is saved as {best_ckpt_name}.') - runner.logger.info( - f'Best {self.key_indicator} is {best_score:0.4f} ' - f'at {cur_time} {cur_type}.') + runner.save_checkpoint(self.out_dir, best_ckpt_name, create_symlink=False) + runner.logger.info(f'Now best checkpoint is saved as {best_ckpt_name}.') + runner.logger.info(f'Best {self.key_indicator} is {best_score:0.4f} ' f'at {cur_time} {cur_type}.') def evaluate(self, runner, results): """Evaluate the results. @@ -358,8 +343,7 @@ def evaluate(self, runner, results): runner (:obj:`mmcv.Runner`): The underlined training runner. results (list): Output results. """ - eval_res = self.dataloader.dataset.evaluate( - results, logger=runner.logger, **self.eval_kwargs) + eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs) for name, val in eval_res.items(): runner.log_buffer.output[name] = val @@ -373,7 +357,8 @@ def evaluate(self, runner, results): if not eval_res: warnings.warn( 'Since `eval_res` is an empty dict, the behavior to save ' - 'the best checkpoint will be skipped in this evaluation.') + 'the best checkpoint will be skipped in this evaluation.' + ) return None if self.key_indicator == 'auto': @@ -436,25 +421,28 @@ class DistEvalHook(EvalHook): the dataset. """ - def __init__(self, - dataloader, - start=None, - interval=1, - by_epoch=True, - save_best=None, - rule=None, - test_fn=None, - greater_keys=None, - less_keys=None, - broadcast_bn_buffer=True, - tmpdir=None, - gpu_collect=False, - out_dir=None, - file_client_args=None, - **eval_kwargs): + def __init__( + self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + broadcast_bn_buffer=True, + tmpdir=None, + gpu_collect=False, + out_dir=None, + file_client_args=None, + **eval_kwargs, + ): if test_fn is None: from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import multi_gpu_test + test_fn = multi_gpu_test super().__init__( @@ -469,7 +457,8 @@ def __init__(self, less_keys=less_keys, out_dir=out_dir, file_client_args=file_client_args, - **eval_kwargs) + **eval_kwargs, + ) self.broadcast_bn_buffer = broadcast_bn_buffer self.tmpdir = tmpdir @@ -485,8 +474,7 @@ def _do_evaluate(self, runner): if self.broadcast_bn_buffer: model = runner.model for name, module in model.named_modules(): - if isinstance(module, - _BatchNorm) and module.track_running_stats: + if isinstance(module, _BatchNorm) and module.track_running_stats: dist.broadcast(module.running_var, 0) dist.broadcast(module.running_mean, 0) @@ -494,11 +482,7 @@ def _do_evaluate(self, runner): if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') - results = self.test_fn( - runner.model, - self.dataloader, - tmpdir=tmpdir, - gpu_collect=self.gpu_collect) + results = self.test_fn(runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py index c58065a1ac50..730cb0f21e7c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py @@ -5,10 +5,18 @@ class Hook: - stages = ('before_run', 'before_train_epoch', 'before_train_iter', - 'after_train_iter', 'after_train_epoch', 'before_val_epoch', - 'before_val_iter', 'after_val_iter', 'after_val_epoch', - 'after_run') + stages = ( + 'before_run', + 'before_train_epoch', + 'before_train_iter', + 'after_train_iter', + 'after_train_epoch', + 'before_val_epoch', + 'before_val_iter', + 'after_val_iter', + 'after_val_epoch', + 'after_run', + ) def before_run(self, runner): pass diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py index cfd5002fe85f..734404f95c9f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py @@ -6,7 +6,6 @@ @HOOKS.register_module() class IterTimerHook(Hook): - def before_epoch(self, runner): self.t = time.time() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py index a0b6b345640a..17da656e176e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py @@ -9,7 +9,12 @@ from .wandb import WandbLoggerHook __all__ = [ - 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', - 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', - 'NeptuneLoggerHook', 'DvcliveLoggerHook' + 'LoggerHook', + 'MlflowLoggerHook', + 'PaviLoggerHook', + 'TensorboardLoggerHook', + 'TextLoggerHook', + 'WandbLoggerHook', + 'NeptuneLoggerHook', + 'DvcliveLoggerHook', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py index f84525672945..cb873734d28e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py @@ -21,11 +21,7 @@ class LoggerHook(Hook): __metaclass__ = ABCMeta - def __init__(self, - interval=10, - ignore_last=True, - reset_flag=False, - by_epoch=True): + def __init__(self, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): self.interval = interval self.ignore_last = ignore_last self.reset_flag = reset_flag @@ -65,8 +61,7 @@ def get_mode(self, runner): elif runner.mode == 'val': mode = 'val' else: - raise ValueError(f"runner mode should be 'train' or 'val', " - f'but got {runner.mode}') + raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') return mode def get_epoch(self, runner): @@ -77,8 +72,7 @@ def get_epoch(self, runner): # runner.epoch += 1 has been done before val workflow epoch = runner.epoch else: - raise ValueError(f"runner mode should be 'train' or 'val', " - f'but got {runner.mode}') + raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') return epoch def get_iter(self, runner, inner_iter=False): @@ -109,12 +103,9 @@ def get_momentum_tags(self, runner): tags['momentum'] = momentums[0] return tags - def get_loggable_tags(self, - runner, - allow_scalar=True, - allow_text=False, - add_mode=True, - tags_to_skip=('time', 'data_time')): + def get_loggable_tags( + self, runner, allow_scalar=True, allow_text=False, add_mode=True, tags_to_skip=('time', 'data_time') + ): tags = {} for var, val in runner.log_buffer.output.items(): if var in tags_to_skip: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py index 687cdc58c033..d92f1696909d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py @@ -26,15 +26,9 @@ class DvcliveLoggerHook(LoggerHook): https://dvc.org/doc/dvclive """ - def __init__(self, - path, - interval=10, - ignore_last=True, - reset_flag=True, - by_epoch=True): - - super(DvcliveLoggerHook, self).__init__(interval, ignore_last, - reset_flag, by_epoch) + def __init__(self, path, interval=10, ignore_last=True, reset_flag=True, by_epoch=True): + + super(DvcliveLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.path = path self.import_dvclive() @@ -42,8 +36,7 @@ def import_dvclive(self): try: import dvclive except ImportError: - raise ImportError( - 'Please run "pip install dvclive" to install dvclive') + raise ImportError('Please run "pip install dvclive" to install dvclive') self.dvclive = dvclive @master_only diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py index f9a72592be47..3392baa8f43d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py @@ -6,15 +6,9 @@ @HOOKS.register_module() class MlflowLoggerHook(LoggerHook): - - def __init__(self, - exp_name=None, - tags=None, - log_model=True, - interval=10, - ignore_last=True, - reset_flag=False, - by_epoch=True): + def __init__( + self, exp_name=None, tags=None, log_model=True, interval=10, ignore_last=True, reset_flag=False, by_epoch=True + ): """Class to log metrics and (optionally) a trained model to MLflow. It requires `MLflow`_ to be installed. @@ -41,8 +35,7 @@ def __init__(self, .. _MLflow: https://www.mlflow.org/docs/latest/index.html """ - super(MlflowLoggerHook, self).__init__(interval, ignore_last, - reset_flag, by_epoch) + super(MlflowLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_mlflow() self.exp_name = exp_name self.tags = tags @@ -53,8 +46,7 @@ def import_mlflow(self): import mlflow import mlflow.pytorch as mlflow_pytorch except ImportError: - raise ImportError( - 'Please run "pip install mlflow" to install mlflow') + raise ImportError('Please run "pip install mlflow" to install mlflow') self.mlflow = mlflow self.mlflow_pytorch = mlflow_pytorch diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py index 7a38772b0c93..25e6f1d85ebb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py @@ -35,16 +35,11 @@ class NeptuneLoggerHook(LoggerHook): https://docs.neptune.ai/you-should-know/logging-metadata """ - def __init__(self, - init_kwargs=None, - interval=10, - ignore_last=True, - reset_flag=True, - with_step=True, - by_epoch=True): + def __init__( + self, init_kwargs=None, interval=10, ignore_last=True, reset_flag=True, with_step=True, by_epoch=True + ): - super(NeptuneLoggerHook, self).__init__(interval, ignore_last, - reset_flag, by_epoch) + super(NeptuneLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_neptune() self.init_kwargs = init_kwargs self.with_step = with_step @@ -53,8 +48,7 @@ def import_neptune(self): try: import neptune.new as neptune except ImportError: - raise ImportError( - 'Please run "pip install neptune-client" to install neptune') + raise ImportError('Please run "pip install neptune-client" to install neptune') self.neptune = neptune self.run = None @@ -71,8 +65,7 @@ def log(self, runner): if tags: for tag_name, tag_value in tags.items(): if self.with_step: - self.run[tag_name].log( - tag_value, step=self.get_iter(runner)) + self.run[tag_name].log(tag_value, step=self.get_iter(runner)) else: tags['global_step'] = self.get_iter(runner) self.run[tag_name].log(tags) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py index c221e9d87021..1f79cb0f305e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py @@ -15,18 +15,18 @@ @HOOKS.register_module() class PaviLoggerHook(LoggerHook): - - def __init__(self, - init_kwargs=None, - add_graph=False, - add_last_ckpt=False, - interval=10, - ignore_last=True, - reset_flag=False, - by_epoch=True, - img_key='img_info'): - super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, - by_epoch) + def __init__( + self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True, + img_key='img_info', + ): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.init_kwargs = init_kwargs self.add_graph = add_graph self.add_last_ckpt = add_last_ckpt @@ -49,10 +49,9 @@ def before_run(self, runner): if runner.meta is not None: if 'config_dict' in runner.meta: config_dict = runner.meta['config_dict'] - assert isinstance( - config_dict, - dict), ('meta["config_dict"] has to be of a dict, ' - f'but got {type(config_dict)}') + assert isinstance(config_dict, dict), ( + 'meta["config_dict"] has to be of a dict, ' f'but got {type(config_dict)}' + ) elif 'config_file' in runner.meta: config_file = runner.meta['config_file'] config_dict = dict(mmcv.Config.fromfile(config_file)) @@ -65,8 +64,7 @@ def before_run(self, runner): config_dict.setdefault('max_iter', runner.max_iters) # non-serializable values are first converted in # mmcv.dump to json - config_dict = json.loads( - mmcv.dump(config_dict, file_format='json')) + config_dict = json.loads(mmcv.dump(config_dict, file_format='json')) session_text = yaml.dump(config_dict) self.init_kwargs['session_text'] = session_text self.writer = SummaryWriter(**self.init_kwargs) @@ -82,8 +80,7 @@ def get_step(self, runner): def log(self, runner): tags = self.get_loggable_tags(runner, add_mode=False) if tags: - self.writer.add_scalars( - self.get_mode(runner), tags, self.get_step(runner)) + self.writer.add_scalars(self.get_mode(runner), tags, self.get_step(runner)) @master_only def after_run(self, runner): @@ -96,9 +93,8 @@ def after_run(self, runner): # runner.epoch += 1 has been done before `after_run`. iteration = runner.epoch if self.by_epoch else runner.iter return self.writer.add_snapshot_file( - tag=self.run_name, - snapshot_file_path=ckpt_path, - iteration=iteration) + tag=self.run_name, snapshot_file_path=ckpt_path, iteration=iteration + ) # flush the buffer and send a task ending signal to Pavi self.writer.close() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py index 477769cdcd2f..ccd0c5b5aac0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py @@ -9,27 +9,18 @@ @HOOKS.register_module() class TensorboardLoggerHook(LoggerHook): - - def __init__(self, - log_dir=None, - interval=10, - ignore_last=True, - reset_flag=False, - by_epoch=True): - super(TensorboardLoggerHook, self).__init__(interval, ignore_last, - reset_flag, by_epoch) + def __init__(self, log_dir=None, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.log_dir = log_dir @master_only def before_run(self, runner): super(TensorboardLoggerHook, self).before_run(runner) - if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.1')): + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.1'): try: from tensorboardX import SummaryWriter except ImportError: - raise ImportError('Please install tensorboardX to use ' - 'TensorboardLoggerHook.') + raise ImportError('Please install tensorboardX to use ' 'TensorboardLoggerHook.') else: try: from torch.utils.tensorboard import SummaryWriter @@ -37,7 +28,8 @@ def before_run(self, runner): raise ImportError( 'Please run "pip install future tensorboard" to install ' 'the dependencies to use torch.utils.tensorboard ' - '(applicable to PyTorch 1.1 or higher)') + '(applicable to PyTorch 1.1 or higher)' + ) if self.log_dir is None: self.log_dir = osp.join(runner.work_dir, 'tf_logs') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py index 5a46d166699d..da54f3d56059 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py @@ -52,66 +52,61 @@ class TextLoggerHook(LoggerHook): `New in version 1.3.16.` """ - def __init__(self, - by_epoch=True, - interval=10, - ignore_last=True, - reset_flag=False, - interval_exp_name=1000, - out_dir=None, - out_suffix=('.log.json', '.log', '.py'), - keep_local=True, - file_client_args=None): - super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, - by_epoch) + def __init__( + self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000, + out_dir=None, + out_suffix=('.log.json', '.log', '.py'), + keep_local=True, + file_client_args=None, + ): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.by_epoch = by_epoch self.time_sec_tot = 0 self.interval_exp_name = interval_exp_name if out_dir is None and file_client_args is not None: - raise ValueError( - 'file_client_args should be "None" when `out_dir` is not' - 'specified.') + raise ValueError('file_client_args should be "None" when `out_dir` is not' 'specified.') self.out_dir = out_dir - if not (out_dir is None or isinstance(out_dir, str) - or is_tuple_of(out_dir, str)): - raise TypeError('out_dir should be "None" or string or tuple of ' - 'string, but got {out_dir}') + if not (out_dir is None or isinstance(out_dir, str) or is_tuple_of(out_dir, str)): + raise TypeError('out_dir should be "None" or string or tuple of ' 'string, but got {out_dir}') self.out_suffix = out_suffix self.keep_local = keep_local self.file_client_args = file_client_args if self.out_dir is not None: - self.file_client = FileClient.infer_client(file_client_args, - self.out_dir) + self.file_client = FileClient.infer_client(file_client_args, self.out_dir) def before_run(self, runner): super(TextLoggerHook, self).before_run(runner) if self.out_dir is not None: - self.file_client = FileClient.infer_client(self.file_client_args, - self.out_dir) + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # The final `self.out_dir` is the concatenation of `self.out_dir` # and the last level directory of `runner.work_dir` basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) runner.logger.info( - (f'Text logs will be saved to {self.out_dir} by ' - f'{self.file_client.name} after the training process.')) + ( + f'Text logs will be saved to {self.out_dir} by ' + f'{self.file_client.name} after the training process.' + ) + ) self.start_iter = runner.iter - self.json_log_path = osp.join(runner.work_dir, - f'{runner.timestamp}.log.json') + self.json_log_path = osp.join(runner.work_dir, f'{runner.timestamp}.log.json') if runner.meta is not None: self._dump_log(runner.meta, runner) def _get_max_memory(self, runner): device = getattr(runner.model, 'output_device', None) mem = torch.cuda.max_memory_allocated(device=device) - mem_mb = torch.tensor([mem / (1024 * 1024)], - dtype=torch.int, - device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], dtype=torch.int, device=device) if runner.world_size > 1: dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) return mem_mb.item() @@ -120,8 +115,7 @@ def _log_info(self, log_dict, runner): # print exp name for users to distinguish experiments # at every ``interval_exp_name`` iterations and the end of each epoch if runner.meta is not None and 'exp_name' in runner.meta: - if (self.every_n_iters(runner, self.interval_exp_name)) or ( - self.by_epoch and self.end_of_epoch(runner)): + if (self.every_n_iters(runner, self.interval_exp_name)) or (self.by_epoch and self.end_of_epoch(runner)): exp_info = f'Exp name: {runner.meta["exp_name"]}' runner.logger.info(exp_info) @@ -137,21 +131,18 @@ def _log_info(self, log_dict, runner): # by epoch: Epoch [4][100/1000] # by iter: Iter [100/100000] if self.by_epoch: - log_str = f'Epoch [{log_dict["epoch"]}]' \ - f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + log_str = f'Epoch [{log_dict["epoch"]}]' f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' else: log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' log_str += f'{lr_str}, ' if 'time' in log_dict.keys(): - self.time_sec_tot += (log_dict['time'] * self.interval) - time_sec_avg = self.time_sec_tot / ( - runner.iter - self.start_iter + 1) + self.time_sec_tot += log_dict['time'] * self.interval + time_sec_avg = self.time_sec_tot / (runner.iter - self.start_iter + 1) eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) eta_str = str(datetime.timedelta(seconds=int(eta_sec))) log_str += f'eta: {eta_str}, ' - log_str += f'time: {log_dict["time"]:.3f}, ' \ - f'data_time: {log_dict["data_time"]:.3f}, ' + log_str += f'time: {log_dict["time"]:.3f}, ' f'data_time: {log_dict["data_time"]:.3f}, ' # statistic memory if torch.cuda.is_available(): log_str += f'memory: {log_dict["memory"]}, ' @@ -161,8 +152,7 @@ def _log_info(self, log_dict, runner): # by epoch: Epoch[val] [4][1000] # by iter: Iter[val] [1000] if self.by_epoch: - log_str = f'Epoch({log_dict["mode"]}) ' \ - f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' + log_str = f'Epoch({log_dict["mode"]}) ' f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' else: log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' @@ -170,10 +160,7 @@ def _log_info(self, log_dict, runner): for name, val in log_dict.items(): # TODO: resolve this hack # these items have been in log_str - if name in [ - 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', - 'memory', 'epoch' - ]: + if name in ['mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', 'memory', 'epoch']: continue if isinstance(val, float): val = f'{val:.4f}' @@ -208,10 +195,7 @@ def log(self, runner): else: cur_iter = self.get_iter(runner, inner_iter=True) - log_dict = OrderedDict( - mode=self.get_mode(runner), - epoch=self.get_epoch(runner), - iter=cur_iter) + log_dict = OrderedDict(mode=self.get_mode(runner), epoch=self.get_epoch(runner), iter=cur_iter) # only record lr of the first param group cur_lr = runner.current_lr() @@ -240,17 +224,12 @@ def after_run(self, runner): if self.out_dir is not None: for filename in scandir(runner.work_dir, self.out_suffix, True): local_filepath = osp.join(runner.work_dir, filename) - out_filepath = self.file_client.join_path( - self.out_dir, filename) + out_filepath = self.file_client.join_path(self.out_dir, filename) with open(local_filepath, 'r') as f: self.file_client.put_text(f.read(), out_filepath) - runner.logger.info( - (f'The file {local_filepath} has been uploaded to ' - f'{out_filepath}.')) + runner.logger.info((f'The file {local_filepath} has been uploaded to ' f'{out_filepath}.')) if not self.keep_local: os.remove(local_filepath) - runner.logger.info( - (f'{local_filepath} was removed due to the ' - '`self.keep_local=False`')) + runner.logger.info((f'{local_filepath} was removed due to the ' '`self.keep_local=False`')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py index 9f6808462eb7..c94d8391711c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py @@ -6,17 +6,17 @@ @HOOKS.register_module() class WandbLoggerHook(LoggerHook): - - def __init__(self, - init_kwargs=None, - interval=10, - ignore_last=True, - reset_flag=False, - commit=True, - by_epoch=True, - with_step=True): - super(WandbLoggerHook, self).__init__(interval, ignore_last, - reset_flag, by_epoch) + def __init__( + self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=False, + commit=True, + by_epoch=True, + with_step=True, + ): + super(WandbLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_wandb() self.init_kwargs = init_kwargs self.commit = commit @@ -26,8 +26,7 @@ def import_wandb(self): try: import wandb except ImportError: - raise ImportError( - 'Please run "pip install wandb" to install wandb') + raise ImportError('Please run "pip install wandb" to install wandb') self.wandb = wandb @master_only @@ -45,8 +44,7 @@ def log(self, runner): tags = self.get_loggable_tags(runner) if tags: if self.with_step: - self.wandb.log( - tags, step=self.get_iter(runner), commit=self.commit) + self.wandb.log(tags, step=self.get_iter(runner), commit=self.commit) else: tags['global_step'] = self.get_iter(runner) self.wandb.log(tags, commit=self.commit) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py index fe0c84af6f91..8f92871c64cd 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py @@ -22,23 +22,16 @@ class LrUpdaterHook(Hook): number of iteration that warmup lasts """ - def __init__(self, - by_epoch=True, - warmup=None, - warmup_iters=0, - warmup_ratio=0.1, - warmup_by_epoch=False): + def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.1, warmup_by_epoch=False): # validate the "warmup" argument if warmup is not None: if warmup not in ['constant', 'linear', 'exp']: raise ValueError( - f'"{warmup}" is not a supported type for warming up, valid' - ' types are "constant" and "linear"') + f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' + ) if warmup is not None: - assert warmup_iters > 0, \ - '"warmup_iters" must be a positive integer' - assert 0 < warmup_ratio <= 1.0, \ - '"warmup_ratio" must be in range (0,1]' + assert warmup_iters > 0, '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, '"warmup_ratio" must be in range (0,1]' self.by_epoch = by_epoch self.warmup = warmup @@ -61,8 +54,7 @@ def _set_lr(self, runner, lr_groups): for param_group, lr in zip(optim.param_groups, lr_groups[k]): param_group['lr'] = lr else: - for param_group, lr in zip(runner.optimizer.param_groups, - lr_groups): + for param_group, lr in zip(runner.optimizer.param_groups, lr_groups): param_group['lr'] = lr def get_lr(self, runner, base_lr): @@ -72,10 +64,7 @@ def get_regular_lr(self, runner): if isinstance(runner.optimizer, dict): lr_groups = {} for k in runner.optimizer.keys(): - _lr_group = [ - self.get_lr(runner, _base_lr) - for _base_lr in self.base_lr[k] - ] + _lr_group = [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr[k]] lr_groups.update({k: _lr_group}) return lr_groups @@ -83,16 +72,14 @@ def get_regular_lr(self, runner): return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] def get_warmup_lr(self, cur_iters): - def _get_warmup_lr(cur_iters, regular_lr): if self.warmup == 'constant': warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] elif self.warmup == 'linear': - k = (1 - cur_iters / self.warmup_iters) * (1 - - self.warmup_ratio) + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) warmup_lr = [_lr * (1 - k) for _lr in regular_lr] elif self.warmup == 'exp': - k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) warmup_lr = [_lr * k for _lr in regular_lr] return warmup_lr @@ -112,16 +99,12 @@ def before_run(self, runner): for k, optim in runner.optimizer.items(): for group in optim.param_groups: group.setdefault('initial_lr', group['lr']) - _base_lr = [ - group['initial_lr'] for group in optim.param_groups - ] + _base_lr = [group['initial_lr'] for group in optim.param_groups] self.base_lr.update({k: _base_lr}) else: for group in runner.optimizer.param_groups: group.setdefault('initial_lr', group['lr']) - self.base_lr = [ - group['initial_lr'] for group in runner.optimizer.param_groups - ] + self.base_lr = [group['initial_lr'] for group in runner.optimizer.param_groups] def before_train_epoch(self, runner): if self.warmup_iters is None: @@ -155,7 +138,6 @@ def before_train_iter(self, runner): @HOOKS.register_module() class FixedLrUpdaterHook(LrUpdaterHook): - def __init__(self, **kwargs): super(FixedLrUpdaterHook, self).__init__(**kwargs) @@ -203,7 +185,7 @@ def get_lr(self, runner, base_lr): exp = i break - lr = base_lr * (self.gamma**exp) + lr = base_lr * (self.gamma ** exp) if self.min_lr is not None: # clip to a minimum value lr = max(lr, self.min_lr) @@ -212,20 +194,18 @@ def get_lr(self, runner, base_lr): @HOOKS.register_module() class ExpLrUpdaterHook(LrUpdaterHook): - def __init__(self, gamma, **kwargs): self.gamma = gamma super(ExpLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): progress = runner.epoch if self.by_epoch else runner.iter - return base_lr * self.gamma**progress + return base_lr * self.gamma ** progress @HOOKS.register_module() class PolyLrUpdaterHook(LrUpdaterHook): - - def __init__(self, power=1., min_lr=0., **kwargs): + def __init__(self, power=1.0, min_lr=0.0, **kwargs): self.power = power self.min_lr = min_lr super(PolyLrUpdaterHook, self).__init__(**kwargs) @@ -237,26 +217,24 @@ def get_lr(self, runner, base_lr): else: progress = runner.iter max_progress = runner.max_iters - coeff = (1 - progress / max_progress)**self.power + coeff = (1 - progress / max_progress) ** self.power return (base_lr - self.min_lr) * coeff + self.min_lr @HOOKS.register_module() class InvLrUpdaterHook(LrUpdaterHook): - - def __init__(self, gamma, power=1., **kwargs): + def __init__(self, gamma, power=1.0, **kwargs): self.gamma = gamma self.power = power super(InvLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): progress = runner.epoch if self.by_epoch else runner.iter - return base_lr * (1 + self.gamma * progress)**(-self.power) + return base_lr * (1 + self.gamma * progress) ** (-self.power) @HOOKS.register_module() class CosineAnnealingLrUpdaterHook(LrUpdaterHook): - def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) self.min_lr = min_lr @@ -295,17 +273,10 @@ class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): Default: None. """ - def __init__(self, - start_percent=0.75, - min_lr=None, - min_lr_ratio=None, - **kwargs): + def __init__(self, start_percent=0.75, min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) - if start_percent < 0 or start_percent > 1 or not isinstance( - start_percent, float): - raise ValueError( - 'expected float between 0 and 1 start_percent, but ' - f'got {start_percent}') + if start_percent < 0 or start_percent > 1 or not isinstance(start_percent, float): + raise ValueError('expected float between 0 and 1 start_percent, but ' f'got {start_percent}') self.start_percent = start_percent self.min_lr = min_lr self.min_lr_ratio = min_lr_ratio @@ -346,24 +317,18 @@ class CosineRestartLrUpdaterHook(LrUpdaterHook): Default: None. """ - def __init__(self, - periods, - restart_weights=[1], - min_lr=None, - min_lr_ratio=None, - **kwargs): + def __init__(self, periods, restart_weights=[1], min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) self.periods = periods self.min_lr = min_lr self.min_lr_ratio = min_lr_ratio self.restart_weights = restart_weights - assert (len(self.periods) == len(self.restart_weights) - ), 'periods and restart_weights should have the same length.' + assert len(self.periods) == len( + self.restart_weights + ), 'periods and restart_weights should have the same length.' super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) - self.cumulative_periods = [ - sum(self.periods[0:i + 1]) for i in range(0, len(self.periods)) - ] + self.cumulative_periods = [sum(self.periods[0 : i + 1]) for i in range(0, len(self.periods))] def get_lr(self, runner, base_lr): if self.by_epoch: @@ -404,8 +369,7 @@ def get_position_from_periods(iteration, cumulative_periods): for i, period in enumerate(cumulative_periods): if iteration < period: return i - raise ValueError(f'Current iteration {iteration} exceeds ' - f'cumulative_periods {cumulative_periods}') + raise ValueError(f'Current iteration {iteration} exceeds ' f'cumulative_periods {cumulative_periods}') @HOOKS.register_module() @@ -431,26 +395,24 @@ class CyclicLrUpdaterHook(LrUpdaterHook): 'linear' for linear annealing. Default: 'cos'. """ - def __init__(self, - by_epoch=False, - target_ratio=(10, 1e-4), - cyclic_times=1, - step_ratio_up=0.4, - anneal_strategy='cos', - **kwargs): + def __init__( + self, + by_epoch=False, + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + anneal_strategy='cos', + **kwargs, + ): if isinstance(target_ratio, float): target_ratio = (target_ratio, target_ratio / 1e5) elif isinstance(target_ratio, tuple): - target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ - if len(target_ratio) == 1 else target_ratio + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio else: - raise ValueError('target_ratio should be either float ' - f'or tuple, got {type(target_ratio)}') + raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') - assert len(target_ratio) == 2, \ - '"target_ratio" must be list or tuple of two floats' - assert 0 <= step_ratio_up < 1.0, \ - '"step_ratio_up" must be in range [0,1)' + assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' self.target_ratio = target_ratio self.cyclic_times = cyclic_times @@ -458,15 +420,13 @@ def __init__(self, self.lr_phases = [] # init lr_phases # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must be one of "cos" or ' - f'"linear", instead got {anneal_strategy}') + raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': self.anneal_func = annealing_linear - assert not by_epoch, \ - 'currently only support "by_epoch" = False' + assert not by_epoch, 'currently only support "by_epoch" = False' super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) def before_run(self, runner): @@ -475,23 +435,18 @@ def before_run(self, runner): # total lr_phases are separated as up and down max_iter_per_phase = runner.max_iters // self.cyclic_times iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.lr_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) self.lr_phases.append( - [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) - self.lr_phases.append([ - iter_up_phase, max_iter_per_phase, max_iter_per_phase, - self.target_ratio[0], self.target_ratio[1] - ]) + [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] + ) def get_lr(self, runner, base_lr): curr_iter = runner.iter - for (start_iter, end_iter, max_iter_per_phase, start_ratio, - end_ratio) in self.lr_phases: + for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.lr_phases: curr_iter %= max_iter_per_phase if start_iter <= curr_iter < end_iter: progress = curr_iter - start_iter - return self.anneal_func(base_lr * start_ratio, - base_lr * end_ratio, - progress / (end_iter - start_iter)) + return self.anneal_func(base_lr * start_ratio, base_lr * end_ratio, progress / (end_iter - start_iter)) @HOOKS.register_module() @@ -529,39 +484,36 @@ class OneCycleLrUpdaterHook(LrUpdaterHook): Default: False """ - def __init__(self, - max_lr, - total_steps=None, - pct_start=0.3, - anneal_strategy='cos', - div_factor=25, - final_div_factor=1e4, - three_phase=False, - **kwargs): + def __init__( + self, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy='cos', + div_factor=25, + final_div_factor=1e4, + three_phase=False, + **kwargs, + ): # validate by_epoch, currently only support by_epoch = False if 'by_epoch' not in kwargs: kwargs['by_epoch'] = False else: - assert not kwargs['by_epoch'], \ - 'currently only support "by_epoch" = False' + assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' if not isinstance(max_lr, (numbers.Number, list, dict)): - raise ValueError('the type of max_lr must be the one of list or ' - f'dict, but got {type(max_lr)}') + raise ValueError('the type of max_lr must be the one of list or ' f'dict, but got {type(max_lr)}') self._max_lr = max_lr if total_steps is not None: if not isinstance(total_steps, int): - raise ValueError('the type of total_steps must be int, but' - f'got {type(total_steps)}') + raise ValueError('the type of total_steps must be int, but' f'got {type(total_steps)}') self.total_steps = total_steps # validate pct_start if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError('expected float between 0 and 1 pct_start, but ' - f'got {pct_start}') + raise ValueError('expected float between 0 and 1 pct_start, but ' f'got {pct_start}') self.pct_start = pct_start # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must be one of "cos" or ' - f'"linear", instead got {anneal_strategy}') + raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': @@ -581,7 +533,8 @@ def before_run(self, runner): raise ValueError( 'The total steps must be greater than or equal to max ' f'iterations {runner.max_iters} of runner, but total steps ' - f'is {total_steps}.') + f'is {total_steps}.' + ) if isinstance(runner.optimizer, dict): self.base_lr = {} @@ -598,18 +551,12 @@ def before_run(self, runner): group.setdefault('initial_lr', lr) if self.three_phase: - self.lr_phases.append( - [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) - self.lr_phases.append([ - float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1 - ]) - self.lr_phases.append( - [total_steps - 1, 1, 1 / self.final_div_factor]) + self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1]) + self.lr_phases.append([total_steps - 1, 1, 1 / self.final_div_factor]) else: - self.lr_phases.append( - [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) - self.lr_phases.append( - [total_steps - 1, self.div_factor, 1 / self.final_div_factor]) + self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([total_steps - 1, self.div_factor, 1 / self.final_div_factor]) def get_lr(self, runner, base_lr): curr_iter = runner.iter @@ -617,8 +564,7 @@ def get_lr(self, runner, base_lr): for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): if curr_iter <= end_iter: pct = (curr_iter - start_iter) / (end_iter - start_iter) - lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, - pct) + lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, pct) break start_iter = end_iter return lr @@ -661,8 +607,7 @@ def format_param(name, optim, param): return [param] * len(optim.param_groups) elif isinstance(param, (list, tuple)): # multi param groups if len(param) != len(optim.param_groups): - raise ValueError(f'expected {len(optim.param_groups)} ' - f'values for {name}, got {len(param)}') + raise ValueError(f'expected {len(optim.param_groups)} ' f'values for {name}, got {len(param)}') return param else: # multi optimizers if name not in param: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py index 70cf9a838fb3..d483c16b512c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py @@ -6,7 +6,6 @@ @HOOKS.register_module() class EmptyCacheHook(Hook): - def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): self._before_epoch = before_epoch self._after_epoch = after_epoch diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py index 78a436ecfa21..b366fa8e6817 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py @@ -5,23 +5,16 @@ class MomentumUpdaterHook(Hook): - - def __init__(self, - by_epoch=True, - warmup=None, - warmup_iters=0, - warmup_ratio=0.9): + def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.9): # validate the "warmup" argument if warmup is not None: if warmup not in ['constant', 'linear', 'exp']: raise ValueError( - f'"{warmup}" is not a supported type for warming up, valid' - ' types are "constant" and "linear"') + f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' + ) if warmup is not None: - assert warmup_iters > 0, \ - '"warmup_iters" must be a positive integer' - assert 0 < warmup_ratio <= 1.0, \ - '"warmup_momentum" must be in range (0,1]' + assert warmup_iters > 0, '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, '"warmup_momentum" must be in range (0,1]' self.by_epoch = by_epoch self.warmup = warmup @@ -29,21 +22,18 @@ def __init__(self, self.warmup_ratio = warmup_ratio self.base_momentum = [] # initial momentum for all param groups - self.regular_momentum = [ - ] # expected momentum if no warming up is performed + self.regular_momentum = [] # expected momentum if no warming up is performed def _set_momentum(self, runner, momentum_groups): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): - for param_group, mom in zip(optim.param_groups, - momentum_groups[k]): + for param_group, mom in zip(optim.param_groups, momentum_groups[k]): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) else: - for param_group, mom in zip(runner.optimizer.param_groups, - momentum_groups): + for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): @@ -57,43 +47,29 @@ def get_regular_momentum(self, runner): momentum_groups = {} for k in runner.optimizer.keys(): _momentum_group = [ - self.get_momentum(runner, _base_momentum) - for _base_momentum in self.base_momentum[k] + self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum[k] ] momentum_groups.update({k: _momentum_group}) return momentum_groups else: - return [ - self.get_momentum(runner, _base_momentum) - for _base_momentum in self.base_momentum - ] + return [self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum] def get_warmup_momentum(self, cur_iters): - def _get_warmup_momentum(cur_iters, regular_momentum): if self.warmup == 'constant': - warmup_momentum = [ - _momentum / self.warmup_ratio - for _momentum in self.regular_momentum - ] + warmup_momentum = [_momentum / self.warmup_ratio for _momentum in self.regular_momentum] elif self.warmup == 'linear': - k = (1 - cur_iters / self.warmup_iters) * (1 - - self.warmup_ratio) - warmup_momentum = [ - _momentum / (1 - k) for _momentum in self.regular_mom - ] + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) + warmup_momentum = [_momentum / (1 - k) for _momentum in self.regular_mom] elif self.warmup == 'exp': - k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) - warmup_momentum = [ - _momentum / k for _momentum in self.regular_mom - ] + k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) + warmup_momentum = [_momentum / k for _momentum in self.regular_mom] return warmup_momentum if isinstance(self.regular_momentum, dict): momentum_groups = {} for key, regular_momentum in self.regular_momentum.items(): - momentum_groups[key] = _get_warmup_momentum( - cur_iters, regular_momentum) + momentum_groups[key] = _get_warmup_momentum(cur_iters, regular_momentum) return momentum_groups else: return _get_warmup_momentum(cur_iters, self.regular_momentum) @@ -110,9 +86,7 @@ def before_run(self, runner): group.setdefault('initial_momentum', group['momentum']) else: group.setdefault('initial_momentum', group['betas'][0]) - _base_momentum = [ - group['initial_momentum'] for group in optim.param_groups - ] + _base_momentum = [group['initial_momentum'] for group in optim.param_groups] self.base_momentum.update({k: _base_momentum}) else: for group in runner.optimizer.param_groups: @@ -120,10 +94,7 @@ def before_run(self, runner): group.setdefault('initial_momentum', group['momentum']) else: group.setdefault('initial_momentum', group['betas'][0]) - self.base_momentum = [ - group['initial_momentum'] - for group in runner.optimizer.param_groups - ] + self.base_momentum = [group['initial_momentum'] for group in runner.optimizer.param_groups] def before_train_epoch(self, runner): if not self.by_epoch: @@ -191,7 +162,7 @@ def get_momentum(self, runner, base_momentum): exp = i break - momentum = base_momentum * (self.gamma**exp) + momentum = base_momentum * (self.gamma ** exp) if self.min_momentum is not None: # clip to a minimum value momentum = max(momentum, self.min_momentum) @@ -200,7 +171,6 @@ def get_momentum(self, runner, base_momentum): @HOOKS.register_module() class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): - def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): assert (min_momentum is None) ^ (min_momentum_ratio is None) self.min_momentum = min_momentum @@ -218,8 +188,7 @@ def get_momentum(self, runner, base_momentum): target_momentum = base_momentum * self.min_momentum_ratio else: target_momentum = self.min_momentum - return annealing_cos(base_momentum, target_momentum, - progress / max_progress) + return annealing_cos(base_momentum, target_momentum, progress / max_progress) @HOOKS.register_module() @@ -241,33 +210,23 @@ class CyclicMomentumUpdaterHook(MomentumUpdaterHook): by_epoch (bool): Whether to update momentum by epoch. """ - def __init__(self, - by_epoch=False, - target_ratio=(0.85 / 0.95, 1), - cyclic_times=1, - step_ratio_up=0.4, - **kwargs): + def __init__(self, by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4, **kwargs): if isinstance(target_ratio, float): target_ratio = (target_ratio, target_ratio / 1e5) elif isinstance(target_ratio, tuple): - target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ - if len(target_ratio) == 1 else target_ratio + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio else: - raise ValueError('target_ratio should be either float ' - f'or tuple, got {type(target_ratio)}') + raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') - assert len(target_ratio) == 2, \ - '"target_ratio" must be list or tuple of two floats' - assert 0 <= step_ratio_up < 1.0, \ - '"step_ratio_up" must be in range [0,1)' + assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' self.target_ratio = target_ratio self.cyclic_times = cyclic_times self.step_ratio_up = step_ratio_up self.momentum_phases = [] # init momentum_phases # currently only support by_epoch=False - assert not by_epoch, \ - 'currently only support "by_epoch" = False' + assert not by_epoch, 'currently only support "by_epoch" = False' super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) def before_run(self, runner): @@ -276,23 +235,20 @@ def before_run(self, runner): # total momentum_phases are separated as up and down max_iter_per_phase = runner.max_iters // self.cyclic_times iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.momentum_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) self.momentum_phases.append( - [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) - self.momentum_phases.append([ - iter_up_phase, max_iter_per_phase, max_iter_per_phase, - self.target_ratio[0], self.target_ratio[1] - ]) + [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] + ) def get_momentum(self, runner, base_momentum): curr_iter = runner.iter - for (start_iter, end_iter, max_iter_per_phase, start_ratio, - end_ratio) in self.momentum_phases: + for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.momentum_phases: curr_iter %= max_iter_per_phase if start_iter <= curr_iter < end_iter: progress = curr_iter - start_iter - return annealing_cos(base_momentum * start_ratio, - base_momentum * end_ratio, - progress / (end_iter - start_iter)) + return annealing_cos( + base_momentum * start_ratio, base_momentum * end_ratio, progress / (end_iter - start_iter) + ) @HOOKS.register_module() @@ -330,36 +286,27 @@ class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): Default: False """ - def __init__(self, - base_momentum=0.85, - max_momentum=0.95, - pct_start=0.3, - anneal_strategy='cos', - three_phase=False, - **kwargs): + def __init__( + self, base_momentum=0.85, max_momentum=0.95, pct_start=0.3, anneal_strategy='cos', three_phase=False, **kwargs + ): # validate by_epoch, currently only support by_epoch=False if 'by_epoch' not in kwargs: kwargs['by_epoch'] = False else: - assert not kwargs['by_epoch'], \ - 'currently only support "by_epoch" = False' + assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' if not isinstance(base_momentum, (float, list, dict)): - raise ValueError('base_momentum must be the type among of float,' - 'list or dict.') + raise ValueError('base_momentum must be the type among of float,' 'list or dict.') self._base_momentum = base_momentum if not isinstance(max_momentum, (float, list, dict)): - raise ValueError('max_momentum must be the type among of float,' - 'list or dict.') + raise ValueError('max_momentum must be the type among of float,' 'list or dict.') self._max_momentum = max_momentum # validate pct_start if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError('Expected float between 0 and 1 pct_start, but ' - f'got {pct_start}') + raise ValueError('Expected float between 0 and 1 pct_start, but ' f'got {pct_start}') self.pct_start = pct_start # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must by one of "cos" or ' - f'"linear", instead got {anneal_strategy}') + raise ValueError('anneal_strategy must by one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': @@ -371,15 +318,12 @@ def __init__(self, def before_run(self, runner): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): - if ('momentum' not in optim.defaults - and 'betas' not in optim.defaults): - raise ValueError('optimizer must support momentum with' - 'option enabled') + if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: + raise ValueError('optimizer must support momentum with' 'option enabled') self.use_beta1 = 'betas' in optim.defaults _base_momentum = format_param(k, optim, self._base_momentum) _max_momentum = format_param(k, optim, self._max_momentum) - for group, b_momentum, m_momentum in zip( - optim.param_groups, _base_momentum, _max_momentum): + for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): if self.use_beta1: _, beta2 = group['betas'] group['betas'] = (m_momentum, beta2) @@ -389,17 +333,13 @@ def before_run(self, runner): group['max_momentum'] = m_momentum else: optim = runner.optimizer - if ('momentum' not in optim.defaults - and 'betas' not in optim.defaults): - raise ValueError('optimizer must support momentum with' - 'option enabled') + if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: + raise ValueError('optimizer must support momentum with' 'option enabled') self.use_beta1 = 'betas' in optim.defaults k = type(optim).__name__ _base_momentum = format_param(k, optim, self._base_momentum) _max_momentum = format_param(k, optim, self._max_momentum) - for group, b_momentum, m_momentum in zip(optim.param_groups, - _base_momentum, - _max_momentum): + for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): if self.use_beta1: _, beta2 = group['betas'] group['betas'] = (m_momentum, beta2) @@ -409,54 +349,45 @@ def before_run(self, runner): group['max_momentum'] = m_momentum if self.three_phase: - self.momentum_phases.append({ - 'end_iter': - float(self.pct_start * runner.max_iters) - 1, - 'start_momentum': - 'max_momentum', - 'end_momentum': - 'base_momentum' - }) - self.momentum_phases.append({ - 'end_iter': - float(2 * self.pct_start * runner.max_iters) - 2, - 'start_momentum': - 'base_momentum', - 'end_momentum': - 'max_momentum' - }) - self.momentum_phases.append({ - 'end_iter': runner.max_iters - 1, - 'start_momentum': 'max_momentum', - 'end_momentum': 'max_momentum' - }) + self.momentum_phases.append( + { + 'end_iter': float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'base_momentum', + } + ) + self.momentum_phases.append( + { + 'end_iter': float(2 * self.pct_start * runner.max_iters) - 2, + 'start_momentum': 'base_momentum', + 'end_momentum': 'max_momentum', + } + ) + self.momentum_phases.append( + {'end_iter': runner.max_iters - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'max_momentum'} + ) else: - self.momentum_phases.append({ - 'end_iter': - float(self.pct_start * runner.max_iters) - 1, - 'start_momentum': - 'max_momentum', - 'end_momentum': - 'base_momentum' - }) - self.momentum_phases.append({ - 'end_iter': runner.max_iters - 1, - 'start_momentum': 'base_momentum', - 'end_momentum': 'max_momentum' - }) + self.momentum_phases.append( + { + 'end_iter': float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'base_momentum', + } + ) + self.momentum_phases.append( + {'end_iter': runner.max_iters - 1, 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum'} + ) def _set_momentum(self, runner, momentum_groups): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): - for param_group, mom in zip(optim.param_groups, - momentum_groups[k]): + for param_group, mom in zip(optim.param_groups, momentum_groups[k]): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) else: - for param_group, mom in zip(runner.optimizer.param_groups, - momentum_groups): + for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): @@ -470,8 +401,8 @@ def get_momentum(self, runner, param_group): if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: pct = (curr_iter - start_iter) / (end_iter - start_iter) momentum = self.anneal_func( - param_group[phase['start_momentum']], - param_group[phase['end_momentum']], pct) + param_group[phase['start_momentum']], param_group[phase['end_momentum']], pct + ) break start_iter = end_iter return momentum @@ -480,10 +411,7 @@ def get_regular_momentum(self, runner): if isinstance(runner.optimizer, dict): momentum_groups = {} for k, optim in runner.optimizer.items(): - _momentum_group = [ - self.get_momentum(runner, param_group) - for param_group in optim.param_groups - ] + _momentum_group = [self.get_momentum(runner, param_group) for param_group in optim.param_groups] momentum_groups.update({k: _momentum_group}) return momentum_groups else: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py index c44b6338c0dc..03090c2e97ff 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py @@ -20,13 +20,11 @@ @HOOKS.register_module() class OptimizerHook(Hook): - def __init__(self, grad_clip=None): self.grad_clip = grad_clip def clip_grads(self, params): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) + params = list(filter(lambda p: p.requires_grad and p.grad is not None, params)) if len(params) > 0: return clip_grad.clip_grad_norm_(params, **self.grad_clip) @@ -37,8 +35,7 @@ def after_train_iter(self, runner): grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) runner.optimizer.step() @@ -64,9 +61,9 @@ class GradientCumulativeOptimizerHook(OptimizerHook): def __init__(self, cumulative_iters=1, **kwargs): super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) - assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \ - f'cumulative_iters only accepts positive int, but got ' \ - f'{type(cumulative_iters)} instead.' + assert isinstance(cumulative_iters, int) and cumulative_iters > 0, ( + f'cumulative_iters only accepts positive int, but got ' f'{type(cumulative_iters)} instead.' + ) self.cumulative_iters = cumulative_iters self.divisible_iters = 0 @@ -92,12 +89,12 @@ def _init(self, runner): if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: runner.logger.warning( 'GradientCumulativeOptimizerHook may slightly decrease ' - 'performance if the model has BatchNorm layers.') + 'performance if the model has BatchNorm layers.' + ) residual_iters = runner.max_iters - runner.iter - self.divisible_iters = ( - residual_iters // self.cumulative_iters * self.cumulative_iters) + self.divisible_iters = residual_iters // self.cumulative_iters * self.cumulative_iters self.remainder_iters = residual_iters - self.divisible_iters self.initialized = True @@ -114,21 +111,18 @@ def after_train_iter(self, runner): loss = loss / loss_factor loss.backward() - if (self.every_n_iters(runner, self.cumulative_iters) - or self.is_last_iter(runner)): + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): if self.grad_clip is not None: grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) runner.optimizer.step() runner.optimizer.zero_grad() -if (TORCH_VERSION != 'parrots' - and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): +if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): @HOOKS.register_module() class Fp16OptimizerHook(OptimizerHook): @@ -159,12 +153,7 @@ class Fp16OptimizerHook(OptimizerHook): >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) """ - def __init__(self, - grad_clip=None, - coalesce=True, - bucket_size_mb=-1, - loss_scale=512., - distributed=True): + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): self.grad_clip = grad_clip self.coalesce = coalesce self.bucket_size_mb = bucket_size_mb @@ -178,8 +167,7 @@ def __init__(self, elif isinstance(loss_scale, dict): self.loss_scaler = GradScaler(**loss_scale) else: - raise ValueError('loss_scale must be of type float, dict, or ' - f'"dynamic", got {loss_scale}') + raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') def before_run(self, runner): """Preparing steps before Mixed Precision Training.""" @@ -192,18 +180,15 @@ def before_run(self, runner): def copy_grads_to_fp32(self, fp16_net, fp32_weights): """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, - fp16_net.parameters()): + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): if fp16_param.grad is not None: if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new( - fp32_param.size()) + fp32_param.grad = fp32_param.data.new(fp32_param.size()) fp32_param.grad.copy_(fp16_param.grad) def copy_params_to_fp16(self, fp16_net, fp32_weights): """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), - fp32_weights): + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): fp16_param.data.copy_(fp32_param.data) def after_train_iter(self, runner): @@ -228,19 +213,16 @@ def after_train_iter(self, runner): grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # backward and update scaler self.loss_scaler.step(runner.optimizer) self.loss_scaler.update(self._scale_update_param) # save state_dict of loss_scaler - runner.meta.setdefault( - 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() @HOOKS.register_module() - class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, - Fp16OptimizerHook): + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): """Fp16 optimizer Hook (using PyTorch's implementation) implements multi-iters gradient cumulating. @@ -249,8 +231,7 @@ class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, """ def __init__(self, *args, **kwargs): - super(GradientCumulativeFp16OptimizerHook, - self).__init__(*args, **kwargs) + super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) def after_train_iter(self, runner): if not self.initialized: @@ -265,8 +246,7 @@ def after_train_iter(self, runner): self.loss_scaler.scale(loss).backward() - if (self.every_n_iters(runner, self.cumulative_iters) - or self.is_last_iter(runner)): + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): # copy fp16 grads in the model to fp32 params in the optimizer self.loss_scaler.unscale_(runner.optimizer) @@ -275,22 +255,20 @@ def after_train_iter(self, runner): grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update( - {'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # backward and update scaler self.loss_scaler.step(runner.optimizer) self.loss_scaler.update(self._scale_update_param) # save state_dict of loss_scaler - runner.meta.setdefault( - 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() # clear grads runner.model.zero_grad() runner.optimizer.zero_grad() + else: @HOOKS.register_module() @@ -315,12 +293,7 @@ class Fp16OptimizerHook(OptimizerHook): Defaults to 512. """ - def __init__(self, - grad_clip=None, - coalesce=True, - bucket_size_mb=-1, - loss_scale=512., - distributed=True): + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): self.grad_clip = grad_clip self.coalesce = coalesce self.bucket_size_mb = bucket_size_mb @@ -328,13 +301,11 @@ def __init__(self, if loss_scale == 'dynamic': self.loss_scaler = LossScaler(mode='dynamic') elif isinstance(loss_scale, float): - self.loss_scaler = LossScaler( - init_scale=loss_scale, mode='static') + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') elif isinstance(loss_scale, dict): self.loss_scaler = LossScaler(**loss_scale) else: - raise ValueError('loss_scale must be of type float, dict, or ' - f'"dynamic", got {loss_scale}') + raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') def before_run(self, runner): """Preparing steps before Mixed Precision Training. @@ -344,15 +315,14 @@ def before_run(self, runner): """ # keep a copy of fp32 weights old_groups = runner.optimizer.param_groups - runner.optimizer.param_groups = copy.deepcopy( - runner.optimizer.param_groups) + runner.optimizer.param_groups = copy.deepcopy(runner.optimizer.param_groups) state = defaultdict(dict) p_map = { old_p: p for old_p, p in zip( chain(*(g['params'] for g in old_groups)), - chain(*(g['params'] - for g in runner.optimizer.param_groups))) + chain(*(g['params'] for g in runner.optimizer.param_groups)), + ) } for k, v in runner.optimizer.state.items(): state[p_map[k]] = v @@ -366,18 +336,15 @@ def before_run(self, runner): def copy_grads_to_fp32(self, fp16_net, fp32_weights): """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, - fp16_net.parameters()): + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): if fp16_param.grad is not None: if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new( - fp32_param.size()) + fp32_param.grad = fp32_param.data.new(fp32_param.size()) fp32_param.grad.copy_(fp16_param.grad) def copy_params_to_fp16(self, fp16_net, fp32_weights): """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), - fp32_weights): + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): fp16_param.data.copy_(fp32_param.data) def after_train_iter(self, runner): @@ -405,8 +372,7 @@ def after_train_iter(self, runner): self.copy_grads_to_fp32(runner.model, fp32_weights) # allreduce grads if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, - self.bucket_size_mb) + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) has_overflow = self.loss_scaler.has_overflow(fp32_weights) # if has overflow, skip this iteration @@ -419,31 +385,25 @@ def after_train_iter(self, runner): grad_norm = self.clip_grads(fp32_weights) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update( - {'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # update fp32 params runner.optimizer.step() # copy fp32 params to the fp16 model self.copy_params_to_fp16(runner.model, fp32_weights) self.loss_scaler.update_scale(has_overflow) if has_overflow: - runner.logger.warning('Check overflow, downscale loss scale ' - f'to {self.loss_scaler.cur_scale}') + runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') # save state_dict of loss_scaler - runner.meta.setdefault( - 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() @HOOKS.register_module() - class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, - Fp16OptimizerHook): + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): """Fp16 optimizer Hook (using mmcv implementation) implements multi- iters gradient cumulating.""" def __init__(self, *args, **kwargs): - super(GradientCumulativeFp16OptimizerHook, - self).__init__(*args, **kwargs) + super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) def after_train_iter(self, runner): if not self.initialized: @@ -461,8 +421,7 @@ def after_train_iter(self, runner): scaled_loss = loss * self.loss_scaler.loss_scale scaled_loss.backward() - if (self.every_n_iters(runner, self.cumulative_iters) - or self.is_last_iter(runner)): + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): # copy fp16 grads in the model to fp32 params in the optimizer fp32_weights = [] @@ -471,8 +430,7 @@ def after_train_iter(self, runner): self.copy_grads_to_fp32(runner.model, fp32_weights) # allreduce grads if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, - self.bucket_size_mb) + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) has_overflow = self.loss_scaler.has_overflow(fp32_weights) # if has overflow, skip this iteration @@ -485,23 +443,18 @@ def after_train_iter(self, runner): grad_norm = self.clip_grads(fp32_weights) if grad_norm is not None: # Add grad norm to the logger - runner.log_buffer.update( - {'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # update fp32 params runner.optimizer.step() # copy fp32 params to the fp16 model self.copy_params_to_fp16(runner.model, fp32_weights) else: - runner.logger.warning( - 'Check overflow, downscale loss scale ' - f'to {self.loss_scaler.cur_scale}') + runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') self.loss_scaler.update_scale(has_overflow) # save state_dict of loss_scaler - runner.meta.setdefault( - 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() # clear grads runner.model.zero_grad() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py index b70236997eec..ad58c981b2be 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py @@ -52,34 +52,33 @@ class ProfilerHook(Hook): >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) """ - def __init__(self, - by_epoch: bool = True, - profile_iters: int = 1, - activities: List[str] = ['cpu', 'cuda'], - schedule: Optional[dict] = None, - on_trace_ready: Optional[Union[Callable, dict]] = None, - record_shapes: bool = False, - profile_memory: bool = False, - with_stack: bool = False, - with_flops: bool = False, - json_trace_path: Optional[str] = None) -> None: + def __init__( + self, + by_epoch: bool = True, + profile_iters: int = 1, + activities: List[str] = ['cpu', 'cuda'], + schedule: Optional[dict] = None, + on_trace_ready: Optional[Union[Callable, dict]] = None, + record_shapes: bool = False, + profile_memory: bool = False, + with_stack: bool = False, + with_flops: bool = False, + json_trace_path: Optional[str] = None, + ) -> None: try: from torch import profiler # torch version >= 1.8.1 except ImportError: - raise ImportError('profiler is the new feature of torch1.8.1, ' - f'but your version is {torch.__version__}') + raise ImportError('profiler is the new feature of torch1.8.1, ' f'but your version is {torch.__version__}') assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' self.by_epoch = by_epoch if profile_iters < 1: - raise ValueError('profile_iters should be greater than 0, but got ' - f'{profile_iters}') + raise ValueError('profile_iters should be greater than 0, but got ' f'{profile_iters}') self.profile_iters = profile_iters if not isinstance(activities, list): - raise ValueError( - f'activities should be list, but got {type(activities)}') + raise ValueError(f'activities should be list, but got {type(activities)}') self.activities = [] for activity in activities: activity = activity.lower() @@ -88,8 +87,7 @@ def __init__(self, elif activity == 'cuda': self.activities.append(profiler.ProfilerActivity.CUDA) else: - raise ValueError( - f'activity should be "cpu" or "cuda", but got {activity}') + raise ValueError(f'activity should be "cpu" or "cuda", but got {activity}') if schedule is not None: self.schedule = profiler.schedule(**schedule) @@ -106,12 +104,10 @@ def __init__(self, @master_only def before_run(self, runner): if self.by_epoch and runner.max_epochs < self.profile_iters: - raise ValueError('self.profile_iters should not be greater than ' - f'{runner.max_epochs}') + raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_epochs}') if not self.by_epoch and runner.max_iters < self.profile_iters: - raise ValueError('self.profile_iters should not be greater than ' - f'{runner.max_iters}') + raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_iters}') if callable(self.on_trace_ready): # handler _on_trace_ready = self.on_trace_ready @@ -128,27 +124,24 @@ def _log_handler(prof): try: import torch_tb_profiler # noqa: F401 except ImportError: - raise ImportError('please run "pip install ' - 'torch-tb-profiler" to install ' - 'torch_tb_profiler') - _on_trace_ready = torch.profiler.tensorboard_trace_handler( - **trace_cfg) + raise ImportError('please run "pip install ' 'torch-tb-profiler" to install ' 'torch_tb_profiler') + _on_trace_ready = torch.profiler.tensorboard_trace_handler(**trace_cfg) else: - raise ValueError('trace_type should be "log_trace" or ' - f'"tb_trace", but got {trace_type}') + raise ValueError('trace_type should be "log_trace" or ' f'"tb_trace", but got {trace_type}') elif self.on_trace_ready is None: _on_trace_ready = None # type: ignore else: - raise ValueError('on_trace_ready should be handler, dict or None, ' - f'but got {type(self.on_trace_ready)}') + raise ValueError('on_trace_ready should be handler, dict or None, ' f'but got {type(self.on_trace_ready)}') if runner.max_epochs > 1: - warnings.warn(f'profiler will profile {runner.max_epochs} epochs ' - 'instead of 1 epoch. Since profiler will slow down ' - 'the training, it is recommended to train 1 epoch ' - 'with ProfilerHook and adjust your setting according' - ' to the profiler summary. During normal training ' - '(epoch > 1), you may disable the ProfilerHook.') + warnings.warn( + f'profiler will profile {runner.max_epochs} epochs ' + 'instead of 1 epoch. Since profiler will slow down ' + 'the training, it is recommended to train 1 epoch ' + 'with ProfilerHook and adjust your setting according' + ' to the profiler summary. During normal training ' + '(epoch > 1), you may disable the ProfilerHook.' + ) self.profiler = torch.profiler.profile( activities=self.activities, @@ -157,7 +150,8 @@ def _log_handler(prof): record_shapes=self.record_shapes, profile_memory=self.profile_memory, with_stack=self.with_stack, - with_flops=self.with_flops) + with_flops=self.with_flops, + ) self.profiler.__enter__() runner.logger.info('profiler is profiling...') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py index 8062be9d777c..f73f8ca649f1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py @@ -17,7 +17,6 @@ class IterLoader: - def __init__(self, dataloader): self._dataloader = dataloader self.iter_loader = iter(self._dataloader) @@ -100,19 +99,15 @@ def run(self, data_loaders, workflow, max_iters=None, **kwargs): assert len(data_loaders) == len(workflow) if max_iters is not None: warnings.warn( - 'setting max_iters in run is deprecated, ' - 'please set max_iters in runner_config', DeprecationWarning) + 'setting max_iters in run is deprecated, ' 'please set max_iters in runner_config', DeprecationWarning + ) self._max_iters = max_iters - assert self._max_iters is not None, ( - 'max_iters must be specified during instantiation') + assert self._max_iters is not None, 'max_iters must be specified during instantiation' work_dir = self.work_dir if self.work_dir is not None else 'NONE' - self.logger.info('Start running, host: %s, work_dir: %s', - get_host_info(), work_dir) - self.logger.info('Hooks will be executed in the following order:\n%s', - self.get_hook_info()) - self.logger.info('workflow: %s, max: %d iters', workflow, - self._max_iters) + self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) + self.logger.info('workflow: %s, max: %d iters', workflow, self._max_iters) self.call_hook('before_run') iter_loaders = [IterLoader(x) for x in data_loaders] @@ -124,9 +119,7 @@ def run(self, data_loaders, workflow, max_iters=None, **kwargs): self._inner_iter = 0 mode, iters = flow if not isinstance(mode, str) or not hasattr(self, mode): - raise ValueError( - 'runner has no method named "{}" to run a workflow'. - format(mode)) + raise ValueError('runner has no method named "{}" to run a workflow'.format(mode)) iter_runner = getattr(self, mode) for _ in range(iters): if mode == 'train' and self.iter >= self._max_iters: @@ -137,10 +130,7 @@ def run(self, data_loaders, workflow, max_iters=None, **kwargs): self.call_hook('after_epoch') self.call_hook('after_run') - def resume(self, - checkpoint, - resume_optimizer=True, - map_location='default'): + def resume(self, checkpoint, resume_optimizer=True, map_location='default'): """Resume model from checkpoint. Args: @@ -152,12 +142,9 @@ def resume(self, """ if map_location == 'default': device_id = torch.cuda.current_device() - checkpoint = self.load_checkpoint( - checkpoint, - map_location=lambda storage, loc: storage.cuda(device_id)) + checkpoint = self.load_checkpoint(checkpoint, map_location=lambda storage, loc: storage.cuda(device_id)) else: - checkpoint = self.load_checkpoint( - checkpoint, map_location=map_location) + checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) self._epoch = checkpoint['meta']['epoch'] self._iter = checkpoint['meta']['iter'] @@ -167,21 +154,15 @@ def resume(self, self.optimizer.load_state_dict(checkpoint['optimizer']) elif isinstance(self.optimizer, dict): for k in self.optimizer.keys(): - self.optimizer[k].load_state_dict( - checkpoint['optimizer'][k]) + self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) else: - raise TypeError( - 'Optimizer should be dict or torch.optim.Optimizer ' - f'but got {type(self.optimizer)}') + raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') - def save_checkpoint(self, - out_dir, - filename_tmpl='iter_{}.pth', - meta=None, - save_optimizer=True, - create_symlink=True): + def save_checkpoint( + self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True + ): """Save checkpoint to file. Args: @@ -198,8 +179,7 @@ def save_checkpoint(self, if meta is None: meta = {} elif not isinstance(meta, dict): - raise TypeError( - f'meta should be a dict or None, but got {type(meta)}') + raise TypeError(f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) # Note: meta.update(self.meta) should be done before @@ -221,13 +201,15 @@ def save_checkpoint(self, else: shutil.copy(filepath, dst_file) - def register_training_hooks(self, - lr_config, - optimizer_config=None, - checkpoint_config=None, - log_config=None, - momentum_config=None, - custom_hooks_config=None): + def register_training_hooks( + self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + custom_hooks_config=None, + ): """Register default hooks for iter-based training. Checkpoint hook, optimizer stepper hook and logger hooks will be set to @@ -270,4 +252,5 @@ def register_training_hooks(self, checkpoint_config=checkpoint_config, log_config=log_config, timer_config=IterTimerHook(), - custom_hooks_config=custom_hooks_config) + custom_hooks_config=custom_hooks_config, + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py index d949e2941c54..5a08dfb3b937 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py @@ -5,7 +5,6 @@ class LogBuffer: - def __init__(self): self.val_history = OrderedDict() self.n_history = OrderedDict() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py index 53c34d047099..c5a0041381c9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py @@ -1,9 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, - build_optimizer_constructor) +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, build_optimizer_constructor from .default_constructor import DefaultOptimizerConstructor __all__ = [ - 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', - 'build_optimizer', 'build_optimizer_constructor' + 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', + 'DefaultOptimizerConstructor', + 'build_optimizer', + 'build_optimizer_constructor', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py index f9234eed8f1f..d305b1a6eadd 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py @@ -16,8 +16,7 @@ def register_torch_optimizers(): if module_name.startswith('__'): continue _optim = getattr(torch.optim, module_name) - if inspect.isclass(_optim) and issubclass(_optim, - torch.optim.Optimizer): + if inspect.isclass(_optim) and issubclass(_optim, torch.optim.Optimizer): OPTIMIZERS.register_module()(_optim) torch_optimizers.append(module_name) return torch_optimizers @@ -32,13 +31,10 @@ def build_optimizer_constructor(cfg): def build_optimizer(model, cfg): optimizer_cfg = copy.deepcopy(cfg) - constructor_type = optimizer_cfg.pop('constructor', - 'DefaultOptimizerConstructor') + constructor_type = optimizer_cfg.pop('constructor', 'DefaultOptimizerConstructor') paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) optim_constructor = build_optimizer_constructor( - dict( - type=constructor_type, - optimizer_cfg=optimizer_cfg, - paramwise_cfg=paramwise_cfg)) + dict(type=constructor_type, optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg) + ) optimizer = optim_constructor(model) return optimizer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py index f13b67e0d63a..c0721ccad28f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py @@ -4,8 +4,14 @@ import torch from torch.nn import GroupNorm, LayerNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + _BatchNorm, + _InstanceNorm, + build_from_cfg, + is_list_of, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.ext_loader import check_ops_exist + from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS @@ -94,8 +100,7 @@ class DefaultOptimizerConstructor: def __init__(self, optimizer_cfg, paramwise_cfg=None): if not isinstance(optimizer_cfg, dict): - raise TypeError('optimizer_cfg should be a dict', - f'but got {type(optimizer_cfg)}') + raise TypeError('optimizer_cfg should be a dict', f'but got {type(optimizer_cfg)}') self.optimizer_cfg = optimizer_cfg self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg self.base_lr = optimizer_cfg.get('lr', None) @@ -104,14 +109,13 @@ def __init__(self, optimizer_cfg, paramwise_cfg=None): def _validate_cfg(self): if not isinstance(self.paramwise_cfg, dict): - raise TypeError('paramwise_cfg should be None or a dict, ' - f'but got {type(self.paramwise_cfg)}') + raise TypeError('paramwise_cfg should be None or a dict, ' f'but got {type(self.paramwise_cfg)}') if 'custom_keys' in self.paramwise_cfg: if not isinstance(self.paramwise_cfg['custom_keys'], dict): raise TypeError( - 'If specified, custom_keys must be a dict, ' - f'but got {type(self.paramwise_cfg["custom_keys"])}') + 'If specified, custom_keys must be a dict, ' f'but got {type(self.paramwise_cfg["custom_keys"])}' + ) if self.base_wd is None: for key in self.paramwise_cfg['custom_keys']: if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: @@ -119,9 +123,11 @@ def _validate_cfg(self): # get base lr and weight decay # weight_decay must be explicitly specified if mult is specified - if ('bias_decay_mult' in self.paramwise_cfg - or 'norm_decay_mult' in self.paramwise_cfg - or 'dwconv_decay_mult' in self.paramwise_cfg): + if ( + 'bias_decay_mult' in self.paramwise_cfg + or 'norm_decay_mult' in self.paramwise_cfg + or 'dwconv_decay_mult' in self.paramwise_cfg + ): if self.base_wd is None: raise ValueError('base_wd should not be None') @@ -154,19 +160,16 @@ def add_params(self, params, module, prefix='', is_dcn_module=None): # first sort with alphabet order and then sort with reversed len of str sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) - bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.) - bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.) - norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.) - dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.) + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.0) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.0) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.0) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.0) bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) - dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.0) # special rules for norm layers and depth-wise conv layers - is_norm = isinstance(module, - (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) - is_dwconv = ( - isinstance(module, torch.nn.Conv2d) - and module.in_channels == module.groups) + is_norm = isinstance(module, (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = isinstance(module, torch.nn.Conv2d) and module.in_channels == module.groups for name, param in module.named_parameters(recurse=False): param_group = {'params': [param]} @@ -174,18 +177,17 @@ def add_params(self, params, module, prefix='', is_dcn_module=None): params.append(param_group) continue if bypass_duplicate and self._is_in(param_group, params): - warnings.warn(f'{prefix} is duplicate. It is skipped since ' - f'bypass_duplicate={bypass_duplicate}') + warnings.warn(f'{prefix} is duplicate. It is skipped since ' f'bypass_duplicate={bypass_duplicate}') continue # if the parameter match one of the custom keys, ignore other rules is_custom = False for key in sorted_keys: if key in f'{prefix}.{name}': is_custom = True - lr_mult = custom_keys[key].get('lr_mult', 1.) + lr_mult = custom_keys[key].get('lr_mult', 1.0) param_group['lr'] = self.base_lr * lr_mult if self.base_wd is not None: - decay_mult = custom_keys[key].get('decay_mult', 1.) + decay_mult = custom_keys[key].get('decay_mult', 1.0) param_group['weight_decay'] = self.base_wd * decay_mult break @@ -195,8 +197,7 @@ def add_params(self, params, module, prefix='', is_dcn_module=None): if name == 'bias' and not (is_norm or is_dcn_module): param_group['lr'] = self.base_lr * bias_lr_mult - if (prefix.find('conv_offset') != -1 and is_dcn_module - and isinstance(module, torch.nn.Conv2d)): + if prefix.find('conv_offset') != -1 and is_dcn_module and isinstance(module, torch.nn.Conv2d): # deal with both dcn_offset's bias & weight param_group['lr'] = self.base_lr * dcn_offset_lr_mult @@ -204,32 +205,28 @@ def add_params(self, params, module, prefix='', is_dcn_module=None): if self.base_wd is not None: # norm decay if is_norm: - param_group[ - 'weight_decay'] = self.base_wd * norm_decay_mult + param_group['weight_decay'] = self.base_wd * norm_decay_mult # depth-wise conv elif is_dwconv: - param_group[ - 'weight_decay'] = self.base_wd * dwconv_decay_mult + param_group['weight_decay'] = self.base_wd * dwconv_decay_mult # bias lr and decay elif name == 'bias' and not is_dcn_module: # TODO: current bias_decay_mult will have affect on DCN - param_group[ - 'weight_decay'] = self.base_wd * bias_decay_mult + param_group['weight_decay'] = self.base_wd * bias_decay_mult params.append(param_group) if check_ops_exist(): - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import DeformConv2d, ModulatedDeformConv2d - is_dcn_module = isinstance(module, - (DeformConv2d, ModulatedDeformConv2d)) + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( + DeformConv2d, + ModulatedDeformConv2d, + ) + + is_dcn_module = isinstance(module, (DeformConv2d, ModulatedDeformConv2d)) else: is_dcn_module = False for child_name, child_mod in module.named_children(): child_prefix = f'{prefix}.{child_name}' if prefix else child_name - self.add_params( - params, - child_mod, - prefix=child_prefix, - is_dcn_module=is_dcn_module) + self.add_params(params, child_mod, prefix=child_prefix, is_dcn_module=is_dcn_module) def __call__(self, model): if hasattr(model, 'module'): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py index 9680d73032bb..4ac2ec3e88ff 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py @@ -59,8 +59,7 @@ def obj_from_dict(info, parent=None, default_args=None): else: obj_type = sys.modules[obj_type] elif not isinstance(obj_type, type): - raise TypeError('type must be a str or valid type, but ' - f'got {type(obj_type)}') + raise TypeError('type must be a str or valid type, but ' f'got {type(obj_type)}') if default_args is not None: for name, value in default_args.items(): args.setdefault(name, value) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py index 378a0068432a..f0eb61d01a3d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py @@ -1,20 +1,40 @@ # flake8: noqa # Copyright (c) OpenMMLab. All rights reserved. from .config import Config, ConfigDict, DictAction -from .misc import (check_prerequisites, concat_list, deprecated_api_warning, - has_method, import_modules_from_strings, is_list_of, - is_method_overridden, is_seq_of, is_str, is_tuple_of, - iter_cast, list_cast, requires_executable, requires_package, - slice_list, to_1tuple, to_2tuple, to_3tuple, to_4tuple, - to_ntuple, tuple_cast) -from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist, - scandir, symlink) -from .progressbar import (ProgressBar, track_iter_progress, - track_parallel_progress, track_progress) -from .testing import (assert_attrs_equal, assert_dict_contains_subset, - assert_dict_has_keys, assert_is_norm_layer, - assert_keys_equal, assert_params_all_zeros, - check_python_script) +from .misc import ( + check_prerequisites, + concat_list, + deprecated_api_warning, + has_method, + import_modules_from_strings, + is_list_of, + is_method_overridden, + is_seq_of, + is_str, + is_tuple_of, + iter_cast, + list_cast, + requires_executable, + requires_package, + slice_list, + to_1tuple, + to_2tuple, + to_3tuple, + to_4tuple, + to_ntuple, + tuple_cast, +) +from .path import check_file_exist, fopen, is_filepath, mkdir_or_exist, scandir, symlink +from .progressbar import ProgressBar, track_iter_progress, track_parallel_progress, track_progress +from .testing import ( + assert_attrs_equal, + assert_dict_contains_subset, + assert_dict_has_keys, + assert_is_norm_layer, + assert_keys_equal, + assert_params_all_zeros, + check_python_script, +) from .timer import Timer, TimerError, check_time from .version_utils import digit_version, get_git_hash @@ -22,48 +42,144 @@ import torch except ImportError: __all__ = [ - 'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast', - 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', - 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', - 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', - 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', - 'track_progress', 'track_iter_progress', 'track_parallel_progress', - 'Timer', 'TimerError', 'check_time', 'deprecated_api_warning', - 'digit_version', 'get_git_hash', 'import_modules_from_strings', - 'assert_dict_contains_subset', 'assert_attrs_equal', - 'assert_dict_has_keys', 'assert_keys_equal', 'check_python_script', - 'to_1tuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'to_ntuple', - 'is_method_overridden', 'has_method' + 'Config', + 'ConfigDict', + 'DictAction', + 'is_str', + 'iter_cast', + 'list_cast', + 'tuple_cast', + 'is_seq_of', + 'is_list_of', + 'is_tuple_of', + 'slice_list', + 'concat_list', + 'check_prerequisites', + 'requires_package', + 'requires_executable', + 'is_filepath', + 'fopen', + 'check_file_exist', + 'mkdir_or_exist', + 'symlink', + 'scandir', + 'ProgressBar', + 'track_progress', + 'track_iter_progress', + 'track_parallel_progress', + 'Timer', + 'TimerError', + 'check_time', + 'deprecated_api_warning', + 'digit_version', + 'get_git_hash', + 'import_modules_from_strings', + 'assert_dict_contains_subset', + 'assert_attrs_equal', + 'assert_dict_has_keys', + 'assert_keys_equal', + 'check_python_script', + 'to_1tuple', + 'to_2tuple', + 'to_3tuple', + 'to_4tuple', + 'to_ntuple', + 'is_method_overridden', + 'has_method', ] else: from .env import collect_env from .logging import get_logger, print_log from .parrots_jit import jit, skip_no_elena from .parrots_wrapper import ( - TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, DataLoader, - PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, - _AvgPoolNd, _BatchNorm, _ConvNd, _ConvTransposeMixin, _InstanceNorm, - _MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home) + TORCH_VERSION, + BuildExtension, + CppExtension, + CUDAExtension, + DataLoader, + PoolDataLoader, + SyncBatchNorm, + _AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, + _AvgPoolNd, + _BatchNorm, + _ConvNd, + _ConvTransposeMixin, + _get_cuda_home, + _InstanceNorm, + _MaxPoolNd, + get_build_config, + is_rocm_pytorch, + ) from .registry import Registry, build_from_cfg from .trace import is_jit_tracing + __all__ = [ - 'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger', - 'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', - 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', - 'check_prerequisites', 'requires_package', 'requires_executable', - 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', - 'symlink', 'scandir', 'ProgressBar', 'track_progress', - 'track_iter_progress', 'track_parallel_progress', 'Registry', - 'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'SyncBatchNorm', - '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', '_AvgPoolNd', '_BatchNorm', - '_ConvNd', '_ConvTransposeMixin', '_InstanceNorm', '_MaxPoolNd', - 'get_build_config', 'BuildExtension', 'CppExtension', 'CUDAExtension', - 'DataLoader', 'PoolDataLoader', 'TORCH_VERSION', - 'deprecated_api_warning', 'digit_version', 'get_git_hash', - 'import_modules_from_strings', 'jit', 'skip_no_elena', - 'assert_dict_contains_subset', 'assert_attrs_equal', - 'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer', - 'assert_params_all_zeros', 'check_python_script', - 'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch', - '_get_cuda_home', 'has_method' + 'Config', + 'ConfigDict', + 'DictAction', + 'collect_env', + 'get_logger', + 'print_log', + 'is_str', + 'iter_cast', + 'list_cast', + 'tuple_cast', + 'is_seq_of', + 'is_list_of', + 'is_tuple_of', + 'slice_list', + 'concat_list', + 'check_prerequisites', + 'requires_package', + 'requires_executable', + 'is_filepath', + 'fopen', + 'check_file_exist', + 'mkdir_or_exist', + 'symlink', + 'scandir', + 'ProgressBar', + 'track_progress', + 'track_iter_progress', + 'track_parallel_progress', + 'Registry', + 'build_from_cfg', + 'Timer', + 'TimerError', + 'check_time', + 'SyncBatchNorm', + '_AdaptiveAvgPoolNd', + '_AdaptiveMaxPoolNd', + '_AvgPoolNd', + '_BatchNorm', + '_ConvNd', + '_ConvTransposeMixin', + '_InstanceNorm', + '_MaxPoolNd', + 'get_build_config', + 'BuildExtension', + 'CppExtension', + 'CUDAExtension', + 'DataLoader', + 'PoolDataLoader', + 'TORCH_VERSION', + 'deprecated_api_warning', + 'digit_version', + 'get_git_hash', + 'import_modules_from_strings', + 'jit', + 'skip_no_elena', + 'assert_dict_contains_subset', + 'assert_attrs_equal', + 'assert_dict_has_keys', + 'assert_keys_equal', + 'assert_is_norm_layer', + 'assert_params_all_zeros', + 'check_python_script', + 'is_method_overridden', + 'is_jit_tracing', + 'is_rocm_pytorch', + '_get_cuda_home', + 'has_method', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py index a699e4d0230d..2d8eb6858bc4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py @@ -31,7 +31,6 @@ class ConfigDict(Dict): - def __missing__(self, name): raise KeyError(name) @@ -39,8 +38,7 @@ def __getattr__(self, name): try: value = super(ConfigDict, self).__getattr__(name) except KeyError: - ex = AttributeError(f"'{self.__class__.__name__}' object has no " - f"attribute '{name}'") + ex = AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{name}'") except Exception as e: ex = e else: @@ -100,8 +98,7 @@ def _validate_py_syntax(filename): try: ast.parse(content) except SyntaxError as e: - raise SyntaxError('There are syntax errors in config ' - f'file {filename}: {e}') + raise SyntaxError('There are syntax errors in config ' f'file {filename}: {e}') @staticmethod def _substitute_predefined_vars(filename, temp_config_name): @@ -113,7 +110,8 @@ def _substitute_predefined_vars(filename, temp_config_name): fileDirname=file_dirname, fileBasename=file_basename, fileBasenameNoExtension=file_basename_no_extension, - fileExtname=file_extname) + fileExtname=file_extname, + ) with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows config_file = f.read() @@ -156,17 +154,11 @@ def _substitute_base_vars(cfg, base_var_dict, base_cfg): new_v = new_v[new_k] cfg[k] = new_v elif isinstance(v, (list, tuple, dict)): - cfg[k] = Config._substitute_base_vars( - v, base_var_dict, base_cfg) + cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) elif isinstance(cfg, tuple): - cfg = tuple( - Config._substitute_base_vars(c, base_var_dict, base_cfg) - for c in cfg) + cfg = tuple(Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg) elif isinstance(cfg, list): - cfg = [ - Config._substitute_base_vars(c, base_var_dict, base_cfg) - for c in cfg - ] + cfg = [Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg] elif isinstance(cfg, str) and cfg in base_var_dict: new_v = base_cfg for new_k in base_var_dict[cfg].split('.'): @@ -184,20 +176,17 @@ def _file2dict(filename, use_predefined_variables=True): raise IOError('Only py/yml/yaml/json type are supported now!') with tempfile.TemporaryDirectory() as temp_config_dir: - temp_config_file = tempfile.NamedTemporaryFile( - dir=temp_config_dir, suffix=fileExtname) + temp_config_file = tempfile.NamedTemporaryFile(dir=temp_config_dir, suffix=fileExtname) if platform.system() == 'Windows': temp_config_file.close() temp_config_name = osp.basename(temp_config_file.name) # Substitute predefined variables if use_predefined_variables: - Config._substitute_predefined_vars(filename, - temp_config_file.name) + Config._substitute_predefined_vars(filename, temp_config_file.name) else: shutil.copyfile(filename, temp_config_file.name) # Substitute base variables from placeholders to strings - base_var_dict = Config._pre_substitute_base_vars( - temp_config_file.name, temp_config_file.name) + base_var_dict = Config._pre_substitute_base_vars(temp_config_file.name, temp_config_file.name) if filename.endswith('.py'): temp_module_name = osp.splitext(temp_config_name)[0] @@ -205,15 +194,12 @@ def _file2dict(filename, use_predefined_variables=True): Config._validate_py_syntax(filename) mod = import_module(temp_module_name) sys.path.pop(0) - cfg_dict = { - name: value - for name, value in mod.__dict__.items() - if not name.startswith('__') - } + cfg_dict = {name: value for name, value in mod.__dict__.items() if not name.startswith('__')} # delete imported module del sys.modules[temp_module_name] elif filename.endswith(('.yml', '.yaml', '.json')): import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + cfg_dict = mmcv.load(temp_config_file.name) # close temp file temp_config_file.close() @@ -221,14 +207,11 @@ def _file2dict(filename, use_predefined_variables=True): # check deprecation information if DEPRECATION_KEY in cfg_dict: deprecation_info = cfg_dict.pop(DEPRECATION_KEY) - warning_msg = f'The config file {filename} will be deprecated ' \ - 'in the future.' + warning_msg = f'The config file {filename} will be deprecated ' 'in the future.' if 'expected' in deprecation_info: - warning_msg += f' Please use {deprecation_info["expected"]} ' \ - 'instead.' + warning_msg += f' Please use {deprecation_info["expected"]} ' 'instead.' if 'reference' in deprecation_info: - warning_msg += ' More information can be found at ' \ - f'{deprecation_info["reference"]}' + warning_msg += ' More information can be found at ' f'{deprecation_info["reference"]}' warnings.warn(warning_msg) cfg_text = filename + '\n' @@ -239,8 +222,7 @@ def _file2dict(filename, use_predefined_variables=True): if BASE_KEY in cfg_dict: cfg_dir = osp.dirname(filename) base_filename = cfg_dict.pop(BASE_KEY) - base_filename = base_filename if isinstance( - base_filename, list) else [base_filename] + base_filename = base_filename if isinstance(base_filename, list) else [base_filename] cfg_dict_list = list() cfg_text_list = list() @@ -253,13 +235,11 @@ def _file2dict(filename, use_predefined_variables=True): for c in cfg_dict_list: duplicate_keys = base_cfg_dict.keys() & c.keys() if len(duplicate_keys) > 0: - raise KeyError('Duplicate key is not allowed among bases. ' - f'Duplicate keys: {duplicate_keys}') + raise KeyError('Duplicate key is not allowed among bases. ' f'Duplicate keys: {duplicate_keys}') base_cfg_dict.update(c) # Substitute base variables from strings to their actual values - cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, - base_cfg_dict) + cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, base_cfg_dict) base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) cfg_dict = base_cfg_dict @@ -310,26 +290,23 @@ def _merge_a_into_b(a, b, allow_list_keys=False): if len(b) <= k: raise KeyError(f'Index {k} exceeds the length of list {b}') b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) - elif isinstance(v, - dict) and k in b and not v.pop(DELETE_KEY, False): + elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): allowed_types = (dict, list) if allow_list_keys else dict if not isinstance(b[k], allowed_types): raise TypeError( f'{k}={v} in child config cannot inherit from base ' f'because {k} is a dict in the child config but is of ' f'type {type(b[k])} in base config. You may set ' - f'`{DELETE_KEY}=True` to ignore the base config') + f'`{DELETE_KEY}=True` to ignore the base config' + ) b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) else: b[k] = v return b @staticmethod - def fromfile(filename, - use_predefined_variables=True, - import_custom_modules=True): - cfg_dict, cfg_text = Config._file2dict(filename, - use_predefined_variables) + def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) if import_custom_modules and cfg_dict.get('custom_imports', None): import_modules_from_strings(**cfg_dict['custom_imports']) return Config(cfg_dict, cfg_text=cfg_text, filename=filename) @@ -350,11 +327,8 @@ def fromstring(cfg_str, file_format): raise IOError('Only py/yml/yaml/json type are supported now!') if file_format != '.py' and 'dict(' in cfg_str: # check if users specify a wrong suffix for python - warnings.warn( - 'Please check "file_format", the file format may be .py') - with tempfile.NamedTemporaryFile( - 'w', encoding='utf-8', suffix=file_format, - delete=False) as temp_file: + warnings.warn('Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile('w', encoding='utf-8', suffix=file_format, delete=False) as temp_file: temp_file.write(cfg_str) # on windows, previous implementation cause error # see PR 1077 for details @@ -378,8 +352,7 @@ def __init__(self, cfg_dict=None, cfg_text=None, filename=None): if cfg_dict is None: cfg_dict = dict() elif not isinstance(cfg_dict, dict): - raise TypeError('cfg_dict must be a dict, but ' - f'got {type(cfg_dict)}') + raise TypeError('cfg_dict must be a dict, but ' f'got {type(cfg_dict)}') for key in cfg_dict: if key in RESERVED_KEYS: raise KeyError(f'{key} is reserved for config file') @@ -437,9 +410,7 @@ def _format_list(k, v, use_mapping=False): # check if all items in the list are dict if all(isinstance(_, dict) for _ in v): v_str = '[\n' - v_str += '\n'.join( - f'dict({_indent(_format_dict(v_), indent)}),' - for v_ in v).rstrip(',') + v_str += '\n'.join(f'dict({_indent(_format_dict(v_), indent)}),' for v_ in v).rstrip(',') if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: {v_str}' @@ -453,8 +424,7 @@ def _format_list(k, v, use_mapping=False): def _contain_invalid_identifier(dict_str): contain_invalid_identifier = False for key_name in dict_str: - contain_invalid_identifier |= \ - (not str(key_name).isidentifier()) + contain_invalid_identifier |= not str(key_name).isidentifier() return contain_invalid_identifier def _format_dict(input_dict, outest_level=False): @@ -492,7 +462,8 @@ def _format_dict(input_dict, outest_level=False): yapf_style = dict( based_on_style='pep8', blank_line_before_nested_class_or_def=True, - split_before_expression_after_opening_paren=True) + split_before_expression_after_opening_paren=True, + ) text, _ = FormatCode(text, style_config=yapf_style, verify=True) return text @@ -541,6 +512,7 @@ def dump(self, file=None): f.write(self.pretty_text) else: import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + if file is None: file_format = self.filename.split('.')[-1] return mmcv.dump(cfg_dict, file_format=file_format) @@ -589,9 +561,8 @@ def merge_from_dict(self, options, allow_list_keys=True): cfg_dict = super(Config, self).__getattribute__('_cfg_dict') super(Config, self).__setattr__( - '_cfg_dict', - Config._merge_a_into_b( - option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) + '_cfg_dict', Config._merge_a_into_b(option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys) + ) class DictAction(Action): @@ -646,14 +617,13 @@ def find_next_comma(string): inside these brackets are ignored. """ assert (string.count('(') == string.count(')')) and ( - string.count('[') == string.count(']')), \ - f'Imbalanced brackets exist in {string}' + string.count('[') == string.count(']') + ), f'Imbalanced brackets exist in {string}' end = len(string) for idx, char in enumerate(string): pre = string[:idx] # The string before this ',' is balanced - if ((char == ',') and (pre.count('(') == pre.count(')')) - and (pre.count('[') == pre.count(']'))): + if (char == ',') and (pre.count('(') == pre.count(')')) and (pre.count('[') == pre.count(']')): end = idx break return end @@ -675,7 +645,7 @@ def find_next_comma(string): comma_idx = find_next_comma(val) element = DictAction._parse_iterable(val[:comma_idx]) values.append(element) - val = val[comma_idx + 1:] + val = val[comma_idx + 1 :] if is_tuple: values = tuple(values) return values diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py index 97f99de9680c..484c17be1767 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py @@ -10,6 +10,7 @@ import torch import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + from .parrots_wrapper import get_build_config @@ -50,14 +51,14 @@ def collect_env(): env_info['GPU ' + ','.join(device_ids)] = name from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _get_cuda_home + CUDA_HOME = _get_cuda_home() env_info['CUDA_HOME'] = CUDA_HOME if CUDA_HOME is not None and osp.isdir(CUDA_HOME): try: nvcc = osp.join(CUDA_HOME, 'bin/nvcc') - nvcc = subprocess.check_output( - f'"{nvcc}" -V | tail -n1', shell=True) + nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True) nvcc = nvcc.decode('utf-8').strip() except subprocess.SubprocessError: nvcc = 'Not Available' @@ -75,6 +76,7 @@ def collect_env(): try: import torchvision + env_info['TorchVision'] = torchvision.__version__ except ModuleNotFoundError: pass @@ -84,7 +86,10 @@ def collect_env(): env_info['MMCV'] = mmcv.__version__ try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_compiler_version, get_compiling_cuda_version + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( + get_compiler_version, + get_compiling_cuda_version, + ) except ModuleNotFoundError: env_info['MMCV Compiler'] = 'n/a' env_info['MMCV CUDA Compiler'] = 'n/a' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py index 08132d2c1b9a..6e2217c7e99d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py @@ -14,6 +14,8 @@ def load_ext(name, funcs): for fun in funcs: assert hasattr(ext, fun), f'{fun} miss in module {name}' return ext + + else: from parrots import extension from parrots.base import ParrotsException @@ -39,7 +41,6 @@ def load_ext(name, funcs): ] def get_fake_func(name, e): - def fake_func(*args, **kwargs): warnings.warn(f'{name} is not supported in parrots now') raise e diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py index 4aa0e04bb9b3..403a1ad7aa77 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py @@ -65,8 +65,7 @@ def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): file_handler = logging.FileHandler(log_file, file_mode) handlers.append(file_handler) - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') for handler in handlers: handler.setFormatter(formatter) handler.setLevel(log_level) @@ -106,5 +105,5 @@ def print_log(msg, logger=None, level=logging.INFO): _logger.log(level, msg) else: raise TypeError( - 'logger should be either a logging.Logger object, str, ' - f'"silent" or None, but got {type(logger)}') + 'logger should be either a logging.Logger object, str, ' f'"silent" or None, but got {type(logger)}' + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py index 2c58d0d7fee9..01204666f985 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py @@ -12,7 +12,6 @@ # From PyTorch internals def _ntuple(n): - def parse(x): if isinstance(x, collections.abc.Iterable): return x @@ -62,19 +61,16 @@ def import_modules_from_strings(imports, allow_failed_imports=False): single_import = True imports = [imports] if not isinstance(imports, list): - raise TypeError( - f'custom_imports must be a list but got type {type(imports)}') + raise TypeError(f'custom_imports must be a list but got type {type(imports)}') imported = [] for imp in imports: if not isinstance(imp, str): - raise TypeError( - f'{imp} is of type {type(imp)} and cannot be imported.') + raise TypeError(f'{imp} is of type {type(imp)} and cannot be imported.') try: imported_tmp = import_module(imp) except ImportError: if allow_failed_imports: - warnings.warn(f'{imp} failed to import and is ignored.', - UserWarning) + warnings.warn(f'{imp} failed to import and is ignored.', UserWarning) imported_tmp = None else: raise ImportError @@ -181,12 +177,11 @@ def slice_list(in_list, lens): if not isinstance(lens, list): raise TypeError('"indices" must be an integer or a list of integers') elif sum(lens) != len(in_list): - raise ValueError('sum of lens and list length does not ' - f'match: {sum(lens)} != {len(in_list)}') + raise ValueError('sum of lens and list length does not ' f'match: {sum(lens)} != {len(in_list)}') out_list = [] idx = 0 for i in range(len(lens)): - out_list.append(in_list[idx:idx + lens[i]]) + out_list.append(in_list[idx : idx + lens[i]]) idx += lens[i] return out_list @@ -204,10 +199,10 @@ def concat_list(in_list): def check_prerequisites( - prerequisites, - checker, - msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' - 'found, please install them first.'): # yapf: disable + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' 'found, please install them first.', +): # yapf: disable """A decorator factory to check if prerequisites are satisfied. Args: @@ -221,11 +216,9 @@ def check_prerequisites( """ def wrap(func): - @functools.wraps(func) def wrapped_func(*args, **kwargs): - requirements = [prerequisites] if isinstance( - prerequisites, str) else prerequisites + requirements = [prerequisites] if isinstance(prerequisites, str) else prerequisites missing = [] for item in requirements: if not checker(item): @@ -299,7 +292,6 @@ def deprecated_api_warning(name_dict, cls_name=None): """ def api_warning_wrapper(old_func): - @functools.wraps(old_func) def new_func(*args, **kwargs): # get the arg spec of the decorated method @@ -309,13 +301,14 @@ def new_func(*args, **kwargs): if cls_name is not None: func_name = f'{cls_name}.{func_name}' if args: - arg_names = args_info.args[:len(args)] + arg_names = args_info.args[: len(args)] for src_arg_name, dst_arg_name in name_dict.items(): if src_arg_name in arg_names: warnings.warn( f'"{src_arg_name}" is deprecated in ' f'`{func_name}`, please use "{dst_arg_name}" ' - 'instead') + 'instead' + ) arg_names[arg_names.index(src_arg_name)] = dst_arg_name if kwargs: for src_arg_name, dst_arg_name in name_dict.items(): @@ -328,12 +321,14 @@ def new_func(*args, **kwargs): f'in the arguments at the same time, which ' f'is confusing. `{src_arg_name} will be ' f'deprecated in the future, please ' - f'use `{dst_arg_name}` instead.') + f'use `{dst_arg_name}` instead.' + ) warnings.warn( f'"{src_arg_name}" is deprecated in ' f'`{func_name}`, please use "{dst_arg_name}" ' - 'instead') + 'instead' + ) kwargs[dst_arg_name] = kwargs.pop(src_arg_name) # apply converted arguments to the decorated method @@ -353,8 +348,7 @@ def is_method_overridden(method, base_class, derived_class): base_class (type): the class of the base class. derived_class (type | Any): the class or instance of the derived class. """ - assert isinstance(base_class, type), \ - "base_class doesn't accept instance, Please pass class instead." + assert isinstance(base_class, type), "base_class doesn't accept instance, Please pass class instead." if not isinstance(derived_class, type): derived_class = derived_class.__class__ diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py index 61873f6dbb9b..e68d315d4a23 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py @@ -9,15 +9,8 @@ from parrots.jit import pat as jit else: - def jit(func=None, - check_input=None, - full_shape=True, - derivate=False, - coderize=False, - optimize=False): - + def jit(func=None, check_input=None, full_shape=True, derivate=False, coderize=False, optimize=False): def wrapper(func): - def wrapper_inner(*args, **kargs): return func(*args, **kargs) @@ -34,7 +27,6 @@ def wrapper_inner(*args, **kargs): else: def skip_no_elena(func): - def wrapper(*args, **kargs): return func(*args, **kargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py index 93c97640d4b9..5f57625d7971 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py @@ -11,8 +11,8 @@ def is_rocm_pytorch() -> bool: if TORCH_VERSION != 'parrots': try: from torch.utils.cpp_extension import ROCM_HOME - is_rocm = True if ((torch.version.hip is not None) and - (ROCM_HOME is not None)) else False + + is_rocm = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False except ImportError: pass return is_rocm @@ -24,6 +24,7 @@ def _get_cuda_home(): else: if is_rocm_pytorch(): from torch.utils.cpp_extension import ROCM_HOME + CUDA_HOME = ROCM_HOME else: from torch.utils.cpp_extension import CUDA_HOME @@ -33,6 +34,7 @@ def _get_cuda_home(): def get_build_config(): if TORCH_VERSION == 'parrots': from parrots.config import get_build_info + return get_build_info() else: return torch.__config__.show() @@ -51,6 +53,7 @@ def _get_dataloader(): from torch.utils.data import DataLoader, PoolDataLoader else: from torch.utils.data import DataLoader + PoolDataLoader = DataLoader return DataLoader, PoolDataLoader @@ -58,33 +61,31 @@ def _get_dataloader(): def _get_extension(): if TORCH_VERSION == 'parrots': from parrots.utils.build_extension import BuildExtension, Extension + CppExtension = partial(Extension, cuda=False) CUDAExtension = partial(Extension, cuda=True) else: - from torch.utils.cpp_extension import (BuildExtension, CppExtension, - CUDAExtension) + from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension return BuildExtension, CppExtension, CUDAExtension def _get_pool(): if TORCH_VERSION == 'parrots': - from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd, - _AdaptiveMaxPoolNd, _AvgPoolNd, - _MaxPoolNd) + from parrots.nn.modules.pool import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd else: - from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, - _AdaptiveMaxPoolNd, _AvgPoolNd, - _MaxPoolNd) + from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd def _get_norm(): if TORCH_VERSION == 'parrots': from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm2d else: - from torch.nn.modules.instancenorm import _InstanceNorm from torch.nn.modules.batchnorm import _BatchNorm + from torch.nn.modules.instancenorm import _InstanceNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm return _BatchNorm, _InstanceNorm, SyncBatchNorm_ @@ -97,11 +98,9 @@ def _get_norm(): class SyncBatchNorm(SyncBatchNorm_): - def _check_input_dim(self, input): if TORCH_VERSION == 'parrots': if input.dim() < 2: - raise ValueError( - f'expected at least 2D input (got {input.dim()}D input)') + raise ValueError(f'expected at least 2D input (got {input.dim()}D input)') else: super()._check_input_dim(input) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py index 7dab4b304141..56eb66140d73 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py @@ -60,8 +60,7 @@ def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): raise TypeError('"suffix" must be a string or tuple of strings') if suffix is not None and not case_sensitive: - suffix = suffix.lower() if isinstance(suffix, str) else tuple( - item.lower() for item in suffix) + suffix = suffix.lower() if isinstance(suffix, str) else tuple(item.lower() for item in suffix) root = dir_path @@ -74,13 +73,12 @@ def _scandir(dir_path, suffix, recursive, case_sensitive): yield rel_path elif recursive and os.path.isdir(entry.path): # scan recursively if entry.path is a directory - yield from _scandir(entry.path, suffix, recursive, - case_sensitive) + yield from _scandir(entry.path, suffix, recursive, case_sensitive) return _scandir(dir_path, suffix, recursive, case_sensitive) -def find_vcs_root(path, markers=('.git', )): +def find_vcs_root(path, markers=('.git',)): """Finds the root directory (including itself) of specified markers. Args: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py index 0062f670dd94..68d2e2383dcf 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py @@ -25,8 +25,7 @@ def terminal_width(self): def start(self): if self.task_num > 0: - self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' - 'elapsed: 0s, ETA:') + self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' 'elapsed: 0s, ETA:') else: self.file.write('completed: 0, elapsed: 0s') self.file.flush() @@ -43,21 +42,19 @@ def update(self, num_tasks=1): if self.task_num > 0: percentage = self.completed / float(self.task_num) eta = int(elapsed * (1 - percentage) / percentage + 0.5) - msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ - f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ - f'ETA: {eta:5}s' + msg = ( + f'\r[{{}}] {self.completed}/{self.task_num}, ' + f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' + f'ETA: {eta:5}s' + ) - bar_width = min(self.bar_width, - int(self.terminal_width - len(msg)) + 2, - int(self.terminal_width * 0.6)) + bar_width = min(self.bar_width, int(self.terminal_width - len(msg)) + 2, int(self.terminal_width * 0.6)) bar_width = max(2, bar_width) mark_width = int(bar_width * percentage) bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) self.file.write(msg.format(bar_chars)) else: - self.file.write( - f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' - f' {fps:.1f} tasks/s') + self.file.write(f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' f' {fps:.1f} tasks/s') self.file.flush() @@ -84,8 +81,7 @@ def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): elif isinstance(tasks, Iterable): task_num = len(tasks) else: - raise TypeError( - '"tasks" must be an iterable object or a (iterator, int) tuple') + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') prog_bar = ProgressBar(task_num, bar_width, file=file) results = [] for task in tasks: @@ -106,16 +102,18 @@ def init_pool(process_num, initializer=None, initargs=None): return Pool(process_num, initializer, initargs) -def track_parallel_progress(func, - tasks, - nproc, - initializer=None, - initargs=None, - bar_width=50, - chunksize=1, - skip_first=False, - keep_order=True, - file=sys.stdout): +def track_parallel_progress( + func, + tasks, + nproc, + initializer=None, + initargs=None, + bar_width=50, + chunksize=1, + skip_first=False, + keep_order=True, + file=sys.stdout, +): """Track the progress of parallel task execution with a progress bar. The built-in :mod:`multiprocessing` module is used for process pools and @@ -150,8 +148,7 @@ def track_parallel_progress(func, elif isinstance(tasks, Iterable): task_num = len(tasks) else: - raise TypeError( - '"tasks" must be an iterable object or a (iterator, int) tuple') + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') pool = init_pool(nproc, initializer, initargs) start = not skip_first task_num -= nproc * chunksize * int(skip_first) @@ -199,8 +196,7 @@ def track_iter_progress(tasks, bar_width=50, file=sys.stdout): elif isinstance(tasks, Iterable): task_num = len(tasks) else: - raise TypeError( - '"tasks" must be an iterable object or a (iterator, int) tuple') + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') prog_bar = ProgressBar(task_num, bar_width, file=file) for task in tasks: yield task diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py index fa9df39bc9f3..d5433ed2b063 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py @@ -21,15 +21,11 @@ def build_from_cfg(cfg, registry, default_args=None): raise TypeError(f'cfg must be a dict, but got {type(cfg)}') if 'type' not in cfg: if default_args is None or 'type' not in default_args: - raise KeyError( - '`cfg` or `default_args` must contain the key "type", ' - f'but got {cfg}\n{default_args}') + raise KeyError('`cfg` or `default_args` must contain the key "type", ' f'but got {cfg}\n{default_args}') if not isinstance(registry, Registry): - raise TypeError('registry must be an mmcv.Registry object, ' - f'but got {type(registry)}') + raise TypeError('registry must be an mmcv.Registry object, ' f'but got {type(registry)}') if not (isinstance(default_args, dict) or default_args is None): - raise TypeError('default_args must be a dict or None, ' - f'but got {type(default_args)}') + raise TypeError('default_args must be a dict or None, ' f'but got {type(default_args)}') args = cfg.copy() @@ -41,13 +37,11 @@ def build_from_cfg(cfg, registry, default_args=None): if isinstance(obj_type, str): obj_cls = registry.get(obj_type) if obj_cls is None: - raise KeyError( - f'{obj_type} is not in the {registry.name} registry') + raise KeyError(f'{obj_type} is not in the {registry.name} registry') elif inspect.isclass(obj_type): obj_cls = obj_type else: - raise TypeError( - f'type must be a str or valid type, but got {type(obj_type)}') + raise TypeError(f'type must be a str or valid type, but got {type(obj_type)}') try: return obj_cls(**args) except Exception as e: @@ -116,9 +110,7 @@ def __contains__(self, key): return self.get(key) is not None def __repr__(self): - format_str = self.__class__.__name__ + \ - f'(name={self._name}, ' \ - f'items={self._module_dict})' + format_str = self.__class__.__name__ + f'(name={self._name}, ' f'items={self._module_dict})' return format_str @staticmethod @@ -163,7 +155,7 @@ def split_scope_key(key): """ split_index = key.find('.') if split_index != -1: - return key[:split_index], key[split_index + 1:] + return key[:split_index], key[split_index + 1 :] else: return None, key @@ -228,14 +220,12 @@ def _add_children(self, registry): assert isinstance(registry, Registry) assert registry.scope is not None - assert registry.scope not in self.children, \ - f'scope {registry.scope} exists in {self.name} registry' + assert registry.scope not in self.children, f'scope {registry.scope} exists in {self.name} registry' self.children[registry.scope] = registry def _register_module(self, module_class, module_name=None, force=False): if not inspect.isclass(module_class): - raise TypeError('module must be a class, ' - f'but got {type(module_class)}') + raise TypeError('module must be a class, ' f'but got {type(module_class)}') if module_name is None: module_name = module_class.__name__ @@ -243,15 +233,15 @@ def _register_module(self, module_class, module_name=None, force=False): module_name = [module_name] for name in module_name: if not force and name in self._module_dict: - raise KeyError(f'{name} is already registered ' - f'in {self.name}') + raise KeyError(f'{name} is already registered ' f'in {self.name}') self._module_dict[name] = module_class def deprecated_register_module(self, cls=None, force=False): warnings.warn( 'The old API of register_module(module, force=False) ' 'is deprecated and will be removed, please use the new API ' - 'register_module(name=None, force=False, module=None) instead.') + 'register_module(name=None, force=False, module=None) instead.' + ) if cls is None: return partial(self.deprecated_register_module, force=force) self._register_module(cls, force=force) @@ -297,19 +287,17 @@ def register_module(self, name=None, force=False, module=None): # raise the error ahead of time if not (name is None or isinstance(name, str) or is_seq_of(name, str)): raise TypeError( - 'name must be either of None, an instance of str or a sequence' - f' of str, but got {type(name)}') + 'name must be either of None, an instance of str or a sequence' f' of str, but got {type(name)}' + ) # use it as a normal method: x.register_module(module=SomeClass) if module is not None: - self._register_module( - module_class=module, module_name=name, force=force) + self._register_module(module_class=module, module_name=name, force=force) return module # use it as a decorator: @x.register_module() def _register(cls): - self._register_module( - module_class=cls, module_name=name, force=force) + self._register_module(module_class=cls, module_name=name, force=force) return cls return _register diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py index a27f936da8ec..4ba7d184d326 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py @@ -39,8 +39,7 @@ def _any(judge_result): return False -def assert_dict_contains_subset(dict_obj: Dict[Any, Any], - expected_subset: Dict[Any, Any]) -> bool: +def assert_dict_contains_subset(dict_obj: Dict[Any, Any], expected_subset: Dict[Any, Any]) -> bool: """Check if the dict_obj contains the expected_subset. Args: @@ -74,8 +73,7 @@ def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: return True -def assert_dict_has_keys(obj: Dict[str, Any], - expected_keys: List[str]) -> bool: +def assert_dict_has_keys(obj: Dict[str, Any], expected_keys: List[str]) -> bool: """Check if the obj has all the expected_keys. Args: @@ -111,8 +109,10 @@ def assert_is_norm_layer(module) -> bool: Returns: bool: Whether the module is a norm layer. """ - from .parrots_wrapper import _BatchNorm, _InstanceNorm from torch.nn import GroupNorm, LayerNorm + + from .parrots_wrapper import _BatchNorm, _InstanceNorm + norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) return isinstance(module, norm_layer_candidates) @@ -127,13 +127,11 @@ def assert_params_all_zeros(module) -> bool: bool: Whether the parameters of the module is all zeros. """ weight_data = module.weight.data - is_weight_zero = weight_data.allclose( - weight_data.new_zeros(weight_data.size())) + is_weight_zero = weight_data.allclose(weight_data.new_zeros(weight_data.size())) if hasattr(module, 'bias') and module.bias is not None: bias_data = module.bias.data - is_bias_zero = bias_data.allclose( - bias_data.new_zeros(bias_data.size())) + is_bias_zero = bias_data.allclose(bias_data.new_zeros(bias_data.size())) else: is_bias_zero = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py index e01716a205e4..c20892b088e0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py @@ -3,7 +3,6 @@ class TimerError(Exception): - def __init__(self, message): self.message = message super(TimerError, self).__init__(message) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py index 10702f29964f..12f297ee2eaa 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py @@ -6,8 +6,7 @@ def is_jit_tracing() -> bool: - if (torch.__version__ != 'parrots' - and digit_version(torch.__version__) >= digit_version('1.6.0')): + if torch.__version__ != 'parrots' and digit_version(torch.__version__) >= digit_version('1.6.0'): on_trace = torch.jit.is_tracing() # In PyTorch 1.6, torch.jit.is_tracing has a bug. # Refers to https://github.com/pytorch/pytorch/issues/42448 @@ -19,5 +18,7 @@ def is_jit_tracing() -> bool: warnings.warn( 'torch.jit.is_tracing is only supported after v1.6.0. ' 'Therefore is_tracing returns False automatically. Please ' - 'set on_trace manually if you are using trace.', UserWarning) + 'set on_trace manually if you are using trace.', + UserWarning, + ) return False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py index 963c45a2e8a8..a0abd9d4596e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py @@ -32,8 +32,7 @@ def digit_version(version_str: str, length: int = 4): # version.pre can be None if version.pre: if version.pre[0] not in mapping: - warnings.warn(f'unknown prerelease version {version.pre[0]}, ' - 'version checking may go wrong') + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 'version checking may go wrong') else: val = mapping[version.pre[0]] release.extend([val, version.pre[-1]]) @@ -58,8 +57,7 @@ def _minimal_ext_cmd(cmd): env['LANGUAGE'] = 'C' env['LANG'] = 'C' env['LC_ALL'] = 'C' - out = subprocess.Popen( - cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0] return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py index 1cce4e50bd69..50d390de11c6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py @@ -15,6 +15,7 @@ def parse_version_info(version_str: str, length: int = 4) -> tuple: (2, 0, 0, 0, 'rc', 1) (when length is set to 4). """ from packaging.version import parse + version = parse(version_str) assert version.release, f'failed to parse version {version_str}' release = list(version.release) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py index 73199b01dec5..71e5ece71438 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py @@ -1,11 +1,29 @@ # Copyright (c) OpenMMLab. All rights reserved. from .io import Cache, VideoReader, frames2video -from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread, - flowwrite, quantize_flow, sparse_flow_from_bytes) +from .optflow import ( + dequantize_flow, + flow_from_bytes, + flow_warp, + flowread, + flowwrite, + quantize_flow, + sparse_flow_from_bytes, +) from .processing import concat_video, convert_video, cut_video, resize_video __all__ = [ - 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', - 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', - 'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes' + 'Cache', + 'VideoReader', + 'frames2video', + 'convert_video', + 'resize_video', + 'cut_video', + 'concat_video', + 'flowread', + 'flowwrite', + 'quantize_flow', + 'dequantize_flow', + 'flow_warp', + 'flow_from_bytes', + 'sparse_flow_from_bytes', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py index b2f81860e659..43363f2dd8ed 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py @@ -3,16 +3,25 @@ from collections import OrderedDict import cv2 -from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT, - CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH, - CAP_PROP_POS_FRAMES, VideoWriter_fourcc) - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import (check_file_exist, mkdir_or_exist, scandir, - track_progress) +from cv2 import ( + CAP_PROP_FOURCC, + CAP_PROP_FPS, + CAP_PROP_FRAME_COUNT, + CAP_PROP_FRAME_HEIGHT, + CAP_PROP_FRAME_WIDTH, + CAP_PROP_POS_FRAMES, + VideoWriter_fourcc, +) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + check_file_exist, + mkdir_or_exist, + scandir, + track_progress, +) class Cache: - def __init__(self, capacity): self._cache = OrderedDict() self._capacity = int(capacity) @@ -167,8 +176,7 @@ def get_frame(self, frame_id): ndarray or None: Return the frame if successful, otherwise None. """ if frame_id < 0 or frame_id >= self._frame_cnt: - raise IndexError( - f'"frame_id" must be between 0 and {self._frame_cnt - 1}') + raise IndexError(f'"frame_id" must be between 0 and {self._frame_cnt - 1}') if frame_id == self._position: return self.read() if self._cache: @@ -195,13 +203,7 @@ def current_frame(self): return None return self._cache.get(self._position - 1) - def cvt2frames(self, - frame_dir, - file_start=0, - filename_tmpl='{:06d}.jpg', - start=0, - max_num=0, - show_progress=True): + def cvt2frames(self, frame_dir, file_start=0, filename_tmpl='{:06d}.jpg', start=0, max_num=0, show_progress=True): """Convert a video to frame images. Args: @@ -231,8 +233,7 @@ def write_frame(file_idx): cv2.imwrite(filename, img) if show_progress: - track_progress(write_frame, range(file_start, - file_start + task_num)) + track_progress(write_frame, range(file_start, file_start + task_num)) else: for i in range(task_num): write_frame(file_start + i) @@ -242,10 +243,7 @@ def __len__(self): def __getitem__(self, index): if isinstance(index, slice): - return [ - self.get_frame(i) - for i in range(*index.indices(self.frame_cnt)) - ] + return [self.get_frame(i) for i in range(*index.indices(self.frame_cnt))] # support negative indexing if index < 0: index += self.frame_cnt @@ -273,14 +271,9 @@ def __exit__(self, exc_type, exc_value, traceback): self._vcap.release() -def frames2video(frame_dir, - video_file, - fps=30, - fourcc='XVID', - filename_tmpl='{:06d}.jpg', - start=0, - end=0, - show_progress=True): +def frames2video( + frame_dir, video_file, fps=30, fourcc='XVID', filename_tmpl='{:06d}.jpg', start=0, end=0, show_progress=True +): """Read the frame images from a directory and join them as a video. Args: @@ -302,8 +295,7 @@ def frames2video(frame_dir, img = cv2.imread(first_file) height, width = img.shape[:2] resolution = (width, height) - vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, - resolution) + vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, resolution) def write_frame(file_idx): filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py index 1261e13f64f4..90b81c714f05 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py @@ -27,8 +27,7 @@ def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') return flow_or_path elif not is_str(flow_or_path): - raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' - f'not {type(flow_or_path)}') + raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' f'not {type(flow_or_path)}') if not quantize: with open(flow_or_path, 'rb') as f: @@ -38,8 +37,7 @@ def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): raise IOError(f'Invalid flow file: {flow_or_path}') else: if header != 'PIEH': - raise IOError(f'Invalid flow file: {flow_or_path}, ' - 'header does not contain PIEH') + raise IOError(f'Invalid flow file: {flow_or_path}, ' 'header does not contain PIEH') w = np.fromfile(f, np.int32, 1).squeeze() h = np.fromfile(f, np.int32, 1).squeeze() @@ -48,9 +46,7 @@ def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): assert concat_axis in [0, 1] cat_flow = imread(flow_or_path, flag='unchanged') if cat_flow.ndim != 2: - raise IOError( - f'{flow_or_path} is not a valid quantized flow file, ' - f'its dimension is {cat_flow.ndim}.') + raise IOError(f'{flow_or_path} is not a valid quantized flow file, ' f'its dimension is {cat_flow.ndim}.') assert cat_flow.shape[concat_axis] % 2 == 0 dx, dy = np.split(cat_flow, 2, axis=concat_axis) flow = dequantize_flow(dx, dy, *args, **kwargs) @@ -110,9 +106,7 @@ def quantize_flow(flow, max_val=0.02, norm=True): dx = dx / w # avoid inplace operations dy = dy / h # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. - flow_comps = [ - quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy] - ] + flow_comps = [quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy]] return tuple(flow_comps) @@ -153,15 +147,13 @@ def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): Returns: ndarray: Warped image with the same shape of img """ - warnings.warn('This function is just for prototyping and cannot ' - 'guarantee the computational efficiency.') + warnings.warn('This function is just for prototyping and cannot ' 'guarantee the computational efficiency.') assert flow.ndim == 3, 'Flow must be in 3D arrays.' height = flow.shape[0] width = flow.shape[1] channels = img.shape[2] - output = np.ones( - (height, width, channels), dtype=img.dtype) * filling_value + output = np.ones((height, width, channels), dtype=img.dtype) * filling_value grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) dx = grid[:, :, 0] + flow[:, :, 1] @@ -171,33 +163,36 @@ def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) if interpolate_mode == 'nearest': - output[valid, :] = img[dx[valid].round().astype(int), - dy[valid].round().astype(int), :] + output[valid, :] = img[dx[valid].round().astype(int), dy[valid].round().astype(int), :] elif interpolate_mode == 'bilinear': # dirty walkround for integer positions eps_ = 1e-6 dx, dy = dx + eps_, dy + eps_ - left_top_ = img[np.floor(dx[valid]).astype(int), - np.floor(dy[valid]).astype(int), :] * ( - np.ceil(dx[valid]) - dx[valid])[:, None] * ( - np.ceil(dy[valid]) - dy[valid])[:, None] - left_down_ = img[np.ceil(dx[valid]).astype(int), - np.floor(dy[valid]).astype(int), :] * ( - dx[valid] - np.floor(dx[valid]))[:, None] * ( - np.ceil(dy[valid]) - dy[valid])[:, None] - right_top_ = img[np.floor(dx[valid]).astype(int), - np.ceil(dy[valid]).astype(int), :] * ( - np.ceil(dx[valid]) - dx[valid])[:, None] * ( - dy[valid] - np.floor(dy[valid]))[:, None] - right_down_ = img[np.ceil(dx[valid]).astype(int), - np.ceil(dy[valid]).astype(int), :] * ( - dx[valid] - np.floor(dx[valid]))[:, None] * ( - dy[valid] - np.floor(dy[valid]))[:, None] + left_top_ = ( + img[np.floor(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] + * (np.ceil(dx[valid]) - dx[valid])[:, None] + * (np.ceil(dy[valid]) - dy[valid])[:, None] + ) + left_down_ = ( + img[np.ceil(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] + * (dx[valid] - np.floor(dx[valid]))[:, None] + * (np.ceil(dy[valid]) - dy[valid])[:, None] + ) + right_top_ = ( + img[np.floor(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] + * (np.ceil(dx[valid]) - dx[valid])[:, None] + * (dy[valid] - np.floor(dy[valid]))[:, None] + ) + right_down_ = ( + img[np.ceil(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] + * (dx[valid] - np.floor(dx[valid]))[:, None] + * (dy[valid] - np.floor(dy[valid]))[:, None] + ) output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ else: raise NotImplementedError( - 'We only support interpolation modes of nearest and bilinear, ' - f'but got {interpolate_mode}.') + 'We only support interpolation modes of nearest and bilinear, ' f'but got {interpolate_mode}.' + ) return output.astype(img.dtype) @@ -225,8 +220,7 @@ def flow_from_bytes(content): # height in third 4 bytes height = np.frombuffer(content[8:], np.int32, 1).squeeze() # after first 12 bytes, all bytes are flow - flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape( - (height, width, 2)) + flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape((height, width, 2)) return flow @@ -250,5 +244,5 @@ def sparse_flow_from_bytes(content): flow = flow[:, :, ::-1].astype(np.float32) # flow shape (H, W, 2) valid shape (H, W) flow, valid = flow[:, :, :2], flow[:, :, 2] - flow = (flow - 2**15) / 64.0 + flow = (flow - 2 ** 15) / 64.0 return flow, valid diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py index 4e53b21b0788..8933708ef744 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py @@ -8,11 +8,7 @@ @requires_executable('ffmpeg') -def convert_video(in_file, - out_file, - print_cmd=False, - pre_options='', - **kwargs): +def convert_video(in_file, out_file, print_cmd=False, pre_options='', **kwargs): """Convert a video with ffmpeg. This provides a general api to ffmpeg, the executed command is:: @@ -37,28 +33,18 @@ def convert_video(in_file, if v: options.append(f'-{k}') elif k == 'log_level': - assert v in [ - 'quiet', 'panic', 'fatal', 'error', 'warning', 'info', - 'verbose', 'debug', 'trace' - ] + assert v in ['quiet', 'panic', 'fatal', 'error', 'warning', 'info', 'verbose', 'debug', 'trace'] options.append(f'-loglevel {v}') else: options.append(f'-{k} {v}') - cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \ - f'{out_file}' + cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' f'{out_file}' if print_cmd: print(cmd) subprocess.call(cmd, shell=True) @requires_executable('ffmpeg') -def resize_video(in_file, - out_file, - size=None, - ratio=None, - keep_ar=False, - log_level='info', - print_cmd=False): +def resize_video(in_file, out_file, size=None, ratio=None, keep_ar=False, log_level='info', print_cmd=False): """Resize a video. Args: @@ -80,8 +66,7 @@ def resize_video(in_file, if not keep_ar: options['vf'] = f'scale={size[0]}:{size[1]}' else: - options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \ - 'force_original_aspect_ratio=decrease' + options['vf'] = f'scale=w={size[0]}:h={size[1]}:' 'force_original_aspect_ratio=decrease' else: if not isinstance(ratio, tuple): ratio = (ratio, ratio) @@ -90,14 +75,7 @@ def resize_video(in_file, @requires_executable('ffmpeg') -def cut_video(in_file, - out_file, - start=None, - end=None, - vcodec=None, - acodec=None, - log_level='info', - print_cmd=False): +def cut_video(in_file, out_file, start=None, end=None, vcodec=None, acodec=None, log_level='info', print_cmd=False): """Cut a clip from a video. Args: @@ -125,12 +103,7 @@ def cut_video(in_file, @requires_executable('ffmpeg') -def concat_video(video_list, - out_file, - vcodec=None, - acodec=None, - log_level='info', - print_cmd=False): +def concat_video(video_list, out_file, vcodec=None, acodec=None, log_level='info', print_cmd=False): """Concatenate multiple videos into a single one. Args: @@ -150,11 +123,6 @@ def concat_video(video_list, options['vcodec'] = 'copy' if acodec is None: options['acodec'] = 'copy' - convert_video( - tmp_filename, - out_file, - print_cmd, - pre_options='-f concat -safe 0', - **options) + convert_video(tmp_filename, out_file, print_cmd, pre_options='-f concat -safe 0', **options) os.close(tmp_filehandler) os.remove(tmp_filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py index 835df136bdcf..f336d6ce01b4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py @@ -4,6 +4,12 @@ from .optflow import flow2rgb, flowshow, make_color_wheel __all__ = [ - 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', - 'flowshow', 'flow2rgb', 'make_color_wheel' + 'Color', + 'color_val', + 'imshow', + 'imshow_bboxes', + 'imshow_det_bboxes', + 'flowshow', + 'flow2rgb', + 'make_color_wheel', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py index 9dd2d0deb9c6..d2290a315f9e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py @@ -11,6 +11,7 @@ class Color(Enum): Contains red, green, blue, cyan, yellow, magenta, white and black. """ + red = (0, 0, 255) green = (0, 255, 0) blue = (255, 0, 0) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py index c3b0b61f99f9..feda6fa59520 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py @@ -27,15 +27,9 @@ def imshow(img, win_name='', wait_time=0): ret = cv2.waitKey(wait_time) -def imshow_bboxes(img, - bboxes, - colors='green', - top_k=-1, - thickness=1, - show=True, - win_name='', - wait_time=0, - out_file=None): +def imshow_bboxes( + img, bboxes, colors='green', top_k=-1, thickness=1, show=True, win_name='', wait_time=0, out_file=None +): """Draw bboxes on an image. Args: @@ -71,8 +65,7 @@ def imshow_bboxes(img, for j in range(_top_k): left_top = (_bboxes[j, 0], _bboxes[j, 1]) right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) - cv2.rectangle( - img, left_top, right_bottom, colors[i], thickness=thickness) + cv2.rectangle(img, left_top, right_bottom, colors[i], thickness=thickness) if show: imshow(img, win_name, wait_time) @@ -81,19 +74,21 @@ def imshow_bboxes(img, return img -def imshow_det_bboxes(img, - bboxes, - labels, - class_names=None, - score_thr=0, - bbox_color='green', - text_color='green', - thickness=1, - font_scale=0.5, - show=True, - win_name='', - wait_time=0, - out_file=None): +def imshow_det_bboxes( + img, + bboxes, + labels, + class_names=None, + score_thr=0, + bbox_color='green', + text_color='green', + thickness=1, + font_scale=0.5, + show=True, + win_name='', + wait_time=0, + out_file=None, +): """Draw bboxes and class labels (with scores) on an image. Args: @@ -136,14 +131,11 @@ def imshow_det_bboxes(img, bbox_int = bbox.astype(np.int32) left_top = (bbox_int[0], bbox_int[1]) right_bottom = (bbox_int[2], bbox_int[3]) - cv2.rectangle( - img, left_top, right_bottom, bbox_color, thickness=thickness) - label_text = class_names[ - label] if class_names is not None else f'cls {label}' + cv2.rectangle(img, left_top, right_bottom, bbox_color, thickness=thickness) + label_text = class_names[label] if class_names is not None else f'cls {label}' if len(bbox) > 4: label_text += f'|{bbox[-1]:.02f}' - cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), - cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) + cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) if show: imshow(img, win_name, wait_time) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py index e958b90e4120..1954452dcda1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py @@ -43,27 +43,24 @@ def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): dx = flow[:, :, 0].copy() dy = flow[:, :, 1].copy() - ignore_inds = ( - np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | - (np.abs(dy) > unknown_thr)) + ignore_inds = np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | (np.abs(dy) > unknown_thr) dx[ignore_inds] = 0 dy[ignore_inds] = 0 - rad = np.sqrt(dx**2 + dy**2) + rad = np.sqrt(dx ** 2 + dy ** 2) if np.any(rad > np.finfo(float).eps): max_rad = np.max(rad) dx /= max_rad dy /= max_rad - rad = np.sqrt(dx**2 + dy**2) + rad = np.sqrt(dx ** 2 + dy ** 2) angle = np.arctan2(-dy, -dx) / np.pi bin_real = (angle + 1) / 2 * (num_bins - 1) bin_left = np.floor(bin_real).astype(int) bin_right = (bin_left + 1) % num_bins w = (bin_real - bin_left.astype(np.float32))[..., None] - flow_img = (1 - - w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] + flow_img = (1 - w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] small_ind = rad <= 1 flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) flow_img[np.logical_not(small_ind)] *= 0.75 @@ -106,7 +103,7 @@ def make_color_wheel(bins=None): col = 0 for i, color in enumerate([ry, yg, gc, cb, bm, mr]): for j in range(3): - color_wheel[j, col:col + bins[i]] = color[j] + color_wheel[j, col : col + bins[i]] = color[j] col += bins[i] return color_wheel.T diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py index 4b958738b9fd..7e0e39b03e2a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py @@ -2,4 +2,4 @@ from .checkpoint import load_checkpoint -__all__ = ['load_checkpoint'] \ No newline at end of file +__all__ = ['load_checkpoint'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py index f60be7d3675b..9f27d7fea454 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py @@ -11,16 +11,16 @@ import torch import torchvision +from torch.nn import functional as F from torch.optim import Optimizer from torch.utils import model_zoo -from torch.nn import functional as F import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import load as load_file from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import is_module_wrapper -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import mkdir_or_exist from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import mkdir_or_exist ENV_MMCV_HOME = 'MMCV_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' @@ -29,10 +29,8 @@ def _get_mmcv_home(): mmcv_home = os.path.expanduser( - os.getenv( - ENV_MMCV_HOME, - os.path.join( - os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) + ) mkdir_or_exist(mmcv_home) return mmcv_home @@ -69,11 +67,10 @@ def load(module, prefix=''): # complicated structure, e.g., nn.Module(nn.Module(DDP)) if is_module_wrapper(module): module = module.module - local_metadata = {} if metadata is None else metadata.get( - prefix[:-1], {}) - module._load_from_state_dict(state_dict, prefix, local_metadata, True, - all_missing_keys, unexpected_keys, - err_msg) + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg + ) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') @@ -82,21 +79,16 @@ def load(module, prefix=''): load = None # break load->load reference cycle # ignore "num_batches_tracked" of BN layers - missing_keys = [ - key for key in all_missing_keys if 'num_batches_tracked' not in key - ] + missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] if unexpected_keys: - err_msg.append('unexpected key in source ' - f'state_dict: {", ".join(unexpected_keys)}\n') + err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') if missing_keys: - err_msg.append( - f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') rank, _ = get_dist_info() if len(err_msg) > 0 and rank == 0: - err_msg.insert( - 0, 'The model and loaded state dict do not match exactly\n') + err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') err_msg = '\n'.join(err_msg) if strict: raise RuntimeError(err_msg) @@ -126,8 +118,7 @@ def load_pavimodel_dist(model_path, map_location=None): try: from pavi import modelcloud except ImportError: - raise ImportError( - 'Please install pavi to load checkpoint from modelcloud.') + raise ImportError('Please install pavi to load checkpoint from modelcloud.') rank, world_size = get_dist_info() rank = int(os.environ.get('LOCAL_RANK', rank)) if rank == 0: @@ -143,8 +134,7 @@ def load_pavimodel_dist(model_path, map_location=None): with TemporaryDirectory() as tmp_dir: downloaded_file = osp.join(tmp_dir, model.name) model.download(downloaded_file) - checkpoint = torch.load( - downloaded_file, map_location=map_location) + checkpoint = torch.load(downloaded_file, map_location=map_location) return checkpoint @@ -203,8 +193,7 @@ def get_mmcls_models(): def get_deprecated_model_names(): - deprecate_json_path = osp.join(mmcv.__path__[0], - 'model_zoo/deprecated.json') + deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') deprecate_urls = load_file(deprecate_json_path) assert isinstance(deprecate_urls, dict) @@ -237,8 +226,7 @@ def _load_checkpoint(filename, map_location=None): information, which depends on the checkpoint. """ if filename.startswith('modelzoo://'): - warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' - 'use "torchvision://" instead') + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') model_urls = get_torchvision_models() model_name = filename[11:] checkpoint = load_url_dist(model_urls[model_name]) @@ -251,8 +239,9 @@ def _load_checkpoint(filename, map_location=None): model_name = filename[13:] deprecated_urls = get_deprecated_model_names() if model_name in deprecated_urls: - warnings.warn(f'open-mmlab://{model_name} is deprecated in favor ' - f'of open-mmlab://{deprecated_urls[model_name]}') + warnings.warn( + f'open-mmlab://{model_name} is deprecated in favor ' f'of open-mmlab://{deprecated_urls[model_name]}' + ) model_name = deprecated_urls[model_name] model_url = model_urls[model_name] # check if is url @@ -274,8 +263,7 @@ def _load_checkpoint(filename, map_location=None): model_path = filename[7:] checkpoint = load_pavimodel_dist(model_path, map_location=map_location) elif filename.startswith('s3://'): - checkpoint = load_fileclient_dist( - filename, backend='ceph', map_location=map_location) + checkpoint = load_fileclient_dist(filename, backend='ceph', map_location=map_location) else: if not osp.isfile(filename): raise IOError(f'{filename} is not a checkpoint file') @@ -283,11 +271,7 @@ def _load_checkpoint(filename, map_location=None): return checkpoint -def load_checkpoint(model, - filename, - map_location='cpu', - strict=False, - logger=None): +def load_checkpoint(model, filename, map_location='cpu', strict=False, logger=None): """Load checkpoint from a file or URI. Args: @@ -306,8 +290,7 @@ def load_checkpoint(model, checkpoint = _load_checkpoint(filename, map_location) # OrderedDict is a subclass of dict if not isinstance(checkpoint, dict): - raise RuntimeError( - f'No state_dict found in checkpoint file {filename}') + raise RuntimeError(f'No state_dict found in checkpoint file {filename}') # get state_dict from checkpoint if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] @@ -328,7 +311,7 @@ def load_checkpoint(model, absolute_pos_embed = state_dict['absolute_pos_embed'] N1, L, C1 = absolute_pos_embed.size() N2, C2, H, W = model.absolute_pos_embed.size() - if N1 != N2 or C1 != C2 or L != H*W: + if N1 != N2 or C1 != C2 or L != H * W: logger.warning("Error in loading absolute_pos_embed, pass") else: state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) @@ -347,8 +330,8 @@ def load_checkpoint(model, S1 = int(L1 ** 0.5) S2 = int(L2 ** 0.5) table_pretrained_resized = F.interpolate( - table_pretrained.permute(1, 0).view(1, nH1, S1, S1), - size=(S2, S2), mode='bicubic') + table_pretrained.permute(1, 0).view(1, nH1, S1, S1), size=(S2, S2), mode='bicubic' + ) state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) # load state_dict @@ -421,13 +404,11 @@ def get_state_dict(module, destination=None, prefix='', keep_vars=False): if destination is None: destination = OrderedDict() destination._metadata = OrderedDict() - destination._metadata[prefix[:-1]] = local_metadata = dict( - version=module._version) + destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) _save_to_state_dict(module, destination, prefix, keep_vars) for name, child in module._modules.items(): if child is not None: - get_state_dict( - child, destination, prefix + name + '.', keep_vars=keep_vars) + get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) for hook in module._state_dict_hooks.values(): hook_result = hook(module, destination, prefix, local_metadata) if hook_result is not None: @@ -460,10 +441,7 @@ def save_checkpoint(model, filename, optimizer=None, meta=None): # save class name to the meta meta.update(CLASSES=model.CLASSES) - checkpoint = { - 'meta': meta, - 'state_dict': weights_to_cpu(get_state_dict(model)) - } + checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} # save optimizer state dict in the checkpoint if isinstance(optimizer, Optimizer): checkpoint['optimizer'] = optimizer.state_dict() @@ -477,8 +455,7 @@ def save_checkpoint(model, filename, optimizer=None, meta=None): from pavi import modelcloud from pavi.exception import NodeNotFoundError except ImportError: - raise ImportError( - 'Please install pavi to load checkpoint from modelcloud.') + raise ImportError('Please install pavi to load checkpoint from modelcloud.') model_path = filename[7:] root = modelcloud.Folder() model_dir, model_name = osp.split(model_path) @@ -497,4 +474,4 @@ def save_checkpoint(model, filename, optimizer=None, meta=None): # immediately flush buffer with open(filename, 'wb') as f: torch.save(checkpoint, f) - f.flush() \ No newline at end of file + f.flush() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py index 170724be38de..1752e7fc7969 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py @@ -3,7 +3,12 @@ from .train import get_root_logger, set_random_seed, train_segmentor __all__ = [ - 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', - 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', - 'show_result_pyplot' + 'get_root_logger', + 'set_random_seed', + 'train_segmentor', + 'init_segmentor', + 'inference_segmentor', + 'multi_gpu_test', + 'single_gpu_test', + 'show_result_pyplot', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py index 9805c8e15886..32c6db9f1ccb 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py @@ -1,9 +1,9 @@ import matplotlib.pyplot as plt -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate, scatter from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets.pipelines import Compose from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import build_segmentor @@ -24,8 +24,7 @@ def init_segmentor(config, checkpoint=None, device='cuda:0'): if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): - raise TypeError('config must be a filename or Config object, ' - 'but got {}'.format(type(config))) + raise TypeError('config must be a filename or Config object, ' 'but got {}'.format(type(config))) config.model.pretrained = None config.model.train_cfg = None model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) @@ -98,14 +97,7 @@ def inference_segmentor(model, img): return result -def show_result_pyplot(model, - img, - result, - palette=None, - fig_size=(15, 10), - opacity=0.5, - title='', - block=True): +def show_result_pyplot(model, img, result, palette=None, fig_size=(15, 10), opacity=0.5, title='', block=True): """Visualize the segmentation results on the image. Args: @@ -126,8 +118,7 @@ def show_result_pyplot(model, """ if hasattr(model, 'module'): model = model.module - img = model.show_result( - img, result, palette=palette, show=False, opacity=opacity) + img = model.show_result(img, result, palette=palette, show=False, opacity=opacity) # plt.figure(figsize=fig_size) # plt.imshow(mmcv.bgr2rgb(img)) # plt.title(title) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py index 5fb42ad7d00b..961b5e0a781b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py @@ -3,10 +3,11 @@ import shutil import tempfile -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np import torch import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import tensor2imgs from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info @@ -25,18 +26,12 @@ def np2tmp(array, temp_file_name=None): """ if temp_file_name is None: - temp_file_name = tempfile.NamedTemporaryFile( - suffix='.npy', delete=False).name + temp_file_name = tempfile.NamedTemporaryFile(suffix='.npy', delete=False).name np.save(temp_file_name, array) return temp_file_name -def single_gpu_test(model, - data_loader, - show=False, - out_dir=None, - efficient_test=False, - opacity=0.5): +def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5): """Test with single GPU. Args: @@ -81,12 +76,8 @@ def single_gpu_test(model, out_file = None model.module.show_result( - img_show, - result, - palette=dataset.PALETTE, - show=show, - out_file=out_file, - opacity=opacity) + img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity + ) if isinstance(result, list): if efficient_test: @@ -103,11 +94,7 @@ def single_gpu_test(model, return results -def multi_gpu_test(model, - data_loader, - tmpdir=None, - gpu_collect=False, - efficient_test=False): +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False, efficient_test=False): """Test model with multiple gpus. This method tests model with multiple gpus and collects the results @@ -168,15 +155,11 @@ def collect_results_cpu(result_part, size, tmpdir=None): if tmpdir is None: MAX_LEN = 512 # 32 is whitespace - dir_tensor = torch.full((MAX_LEN, ), - 32, - dtype=torch.uint8, - device='cuda') + dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') if rank == 0: tmpdir = tempfile.mkdtemp() - tmpdir = torch.tensor( - bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') - dir_tensor[:len(tmpdir)] = tmpdir + tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[: len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: @@ -208,8 +191,7 @@ def collect_results_gpu(result_part, size): """Collect results with GPU.""" rank, world_size = get_dist_info() # dump result part to tensor with pickle - part_tensor = torch.tensor( - bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') # gather all result part tensor shape shape_tensor = torch.tensor(part_tensor.shape, device='cuda') shape_list = [shape_tensor.clone() for _ in range(world_size)] @@ -217,18 +199,15 @@ def collect_results_gpu(result_part, size): # padding result part tensor to max length shape_max = torch.tensor(shape_list).max() part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') - part_send[:shape_tensor[0]] = part_tensor - part_recv_list = [ - part_tensor.new_zeros(shape_max) for _ in range(world_size) - ] + part_send[: shape_tensor[0]] = part_tensor + part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] # gather all result part dist.all_gather(part_recv_list, part_send) if rank == 0: part_list = [] for recv, shape in zip(part_recv_list, shape_list): - part_list.append( - pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) + part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes())) # sort the results ordered_results = [] for res in zip(*part_list): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py index 36e6e10444de..1ed5228bcfb6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py @@ -3,9 +3,12 @@ import numpy as np import torch -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import MMDataParallel, MMDistributedDataParallel -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import build_optimizer, build_runner +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import ( + MMDataParallel, + MMDistributedDataParallel, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import build_optimizer, build_runner from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import DistEvalHook, EvalHook from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets import build_dataloader, build_dataset from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger @@ -30,13 +33,7 @@ def set_random_seed(seed, deterministic=False): torch.backends.cudnn.benchmark = False -def train_segmentor(model, - dataset, - cfg, - distributed=False, - validate=False, - timestamp=None, - meta=None): +def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) @@ -51,7 +48,9 @@ def train_segmentor(model, len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, - drop_last=True) for ds in dataset + drop_last=True, + ) + for ds in dataset ] # put model on gpus @@ -63,10 +62,10 @@ def train_segmentor(model, model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, - find_unused_parameters=find_unused_parameters) + find_unused_parameters=find_unused_parameters, + ) else: - model = MMDataParallel( - model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) @@ -74,23 +73,20 @@ def train_segmentor(model, if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( - 'config is now expected to have a `runner` section, ' - 'please set `runner` in your config.', UserWarning) + 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning + ) runner = build_runner( cfg.runner, default_args=dict( - model=model, - batch_processor=None, - optimizer=optimizer, - work_dir=cfg.work_dir, - logger=logger, - meta=meta)) + model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta + ), + ) # register hooks - runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, - cfg.checkpoint_config, cfg.log_config, - cfg.get('momentum_config', None)) + runner.register_training_hooks( + cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None) + ) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp @@ -99,11 +95,8 @@ def train_segmentor(model, if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( - val_dataset, - samples_per_gpu=1, - workers_per_gpu=cfg.data.workers_per_gpu, - dist=distributed, - shuffle=False) + val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False + ) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py index f7cc4b23413a..c77282a68a12 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py @@ -3,6 +3,12 @@ from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou __all__ = [ - 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', - 'eval_metrics', 'get_classes', 'get_palette' + 'EvalHook', + 'DistEvalHook', + 'mean_dice', + 'mean_iou', + 'mean_fscore', + 'eval_metrics', + 'get_classes', + 'get_palette', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py index f91355141f28..7ebbe83b1851 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py @@ -4,117 +4,423 @@ def cityscapes_classes(): """Cityscapes class names for external use.""" return [ - 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', - 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', - 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', - 'bicycle' + 'road', + 'sidewalk', + 'building', + 'wall', + 'fence', + 'pole', + 'traffic light', + 'traffic sign', + 'vegetation', + 'terrain', + 'sky', + 'person', + 'rider', + 'car', + 'truck', + 'bus', + 'train', + 'motorcycle', + 'bicycle', ] def ade_classes(): """ADE20K class names for external use.""" return [ - 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', - 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', - 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', - 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', - 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', - 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', - 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', - 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', - 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', - 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', - 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', - 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', - 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', - 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', - 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', - 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', - 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', - 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', - 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', - 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', - 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', - 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', - 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', - 'clock', 'flag' + 'wall', + 'building', + 'sky', + 'floor', + 'tree', + 'ceiling', + 'road', + 'bed ', + 'windowpane', + 'grass', + 'cabinet', + 'sidewalk', + 'person', + 'earth', + 'door', + 'table', + 'mountain', + 'plant', + 'curtain', + 'chair', + 'car', + 'water', + 'painting', + 'sofa', + 'shelf', + 'house', + 'sea', + 'mirror', + 'rug', + 'field', + 'armchair', + 'seat', + 'fence', + 'desk', + 'rock', + 'wardrobe', + 'lamp', + 'bathtub', + 'railing', + 'cushion', + 'base', + 'box', + 'column', + 'signboard', + 'chest of drawers', + 'counter', + 'sand', + 'sink', + 'skyscraper', + 'fireplace', + 'refrigerator', + 'grandstand', + 'path', + 'stairs', + 'runway', + 'case', + 'pool table', + 'pillow', + 'screen door', + 'stairway', + 'river', + 'bridge', + 'bookcase', + 'blind', + 'coffee table', + 'toilet', + 'flower', + 'book', + 'hill', + 'bench', + 'countertop', + 'stove', + 'palm', + 'kitchen island', + 'computer', + 'swivel chair', + 'boat', + 'bar', + 'arcade machine', + 'hovel', + 'bus', + 'towel', + 'light', + 'truck', + 'tower', + 'chandelier', + 'awning', + 'streetlight', + 'booth', + 'television receiver', + 'airplane', + 'dirt track', + 'apparel', + 'pole', + 'land', + 'bannister', + 'escalator', + 'ottoman', + 'bottle', + 'buffet', + 'poster', + 'stage', + 'van', + 'ship', + 'fountain', + 'conveyer belt', + 'canopy', + 'washer', + 'plaything', + 'swimming pool', + 'stool', + 'barrel', + 'basket', + 'waterfall', + 'tent', + 'bag', + 'minibike', + 'cradle', + 'oven', + 'ball', + 'food', + 'step', + 'tank', + 'trade name', + 'microwave', + 'pot', + 'animal', + 'bicycle', + 'lake', + 'dishwasher', + 'screen', + 'blanket', + 'sculpture', + 'hood', + 'sconce', + 'vase', + 'traffic light', + 'tray', + 'ashcan', + 'fan', + 'pier', + 'crt screen', + 'plate', + 'monitor', + 'bulletin board', + 'shower', + 'radiator', + 'glass', + 'clock', + 'flag', ] def voc_classes(): """Pascal VOC class names for external use.""" return [ - 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', - 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', - 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', - 'tvmonitor' + 'background', + 'aeroplane', + 'bicycle', + 'bird', + 'boat', + 'bottle', + 'bus', + 'car', + 'cat', + 'chair', + 'cow', + 'diningtable', + 'dog', + 'horse', + 'motorbike', + 'person', + 'pottedplant', + 'sheep', + 'sofa', + 'train', + 'tvmonitor', ] def cityscapes_palette(): """Cityscapes palette for external use.""" - return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], - [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], - [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], - [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], - [0, 0, 230], [119, 11, 32]] + return [ + [128, 64, 128], + [244, 35, 232], + [70, 70, 70], + [102, 102, 156], + [190, 153, 153], + [153, 153, 153], + [250, 170, 30], + [220, 220, 0], + [107, 142, 35], + [152, 251, 152], + [70, 130, 180], + [220, 20, 60], + [255, 0, 0], + [0, 0, 142], + [0, 0, 70], + [0, 60, 100], + [0, 80, 100], + [0, 0, 230], + [119, 11, 32], + ] def ade_palette(): """ADE20K palette for external use.""" - return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], - [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], - [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], - [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], - [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], - [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], - [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], - [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], - [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], - [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], - [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], - [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], - [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], - [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], - [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], - [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], - [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], - [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], - [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], - [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], - [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], - [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], - [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], - [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], - [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], - [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], - [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], - [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], - [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], - [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], - [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], - [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], - [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], - [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], - [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], - [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], - [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], - [102, 255, 0], [92, 0, 255]] + return [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + [11, 200, 200], + [255, 82, 0], + [0, 255, 245], + [0, 61, 255], + [0, 255, 112], + [0, 255, 133], + [255, 0, 0], + [255, 163, 0], + [255, 102, 0], + [194, 255, 0], + [0, 143, 255], + [51, 255, 0], + [0, 82, 255], + [0, 255, 41], + [0, 255, 173], + [10, 0, 255], + [173, 255, 0], + [0, 255, 153], + [255, 92, 0], + [255, 0, 255], + [255, 0, 245], + [255, 0, 102], + [255, 173, 0], + [255, 0, 20], + [255, 184, 184], + [0, 31, 255], + [0, 255, 61], + [0, 71, 255], + [255, 0, 204], + [0, 255, 194], + [0, 255, 82], + [0, 10, 255], + [0, 112, 255], + [51, 0, 255], + [0, 194, 255], + [0, 122, 255], + [0, 255, 163], + [255, 153, 0], + [0, 255, 10], + [255, 112, 0], + [143, 255, 0], + [82, 0, 255], + [163, 255, 0], + [255, 235, 0], + [8, 184, 170], + [133, 0, 255], + [0, 255, 92], + [184, 0, 255], + [255, 0, 31], + [0, 184, 255], + [0, 214, 255], + [255, 0, 112], + [92, 255, 0], + [0, 224, 255], + [112, 224, 255], + [70, 184, 160], + [163, 0, 255], + [153, 0, 255], + [71, 255, 0], + [255, 0, 163], + [255, 204, 0], + [255, 0, 143], + [0, 255, 235], + [133, 255, 0], + [255, 0, 235], + [245, 0, 255], + [255, 0, 122], + [255, 245, 0], + [10, 190, 212], + [214, 255, 0], + [0, 204, 255], + [20, 0, 255], + [255, 255, 0], + [0, 153, 255], + [0, 41, 255], + [0, 255, 204], + [41, 0, 255], + [41, 255, 0], + [173, 0, 255], + [0, 245, 255], + [71, 0, 255], + [122, 0, 255], + [0, 255, 184], + [0, 92, 255], + [184, 255, 0], + [0, 133, 255], + [255, 214, 0], + [25, 194, 194], + [102, 255, 0], + [92, 0, 255], + ] def voc_palette(): """Pascal VOC palette for external use.""" - return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], - [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], - [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], - [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], - [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + return [ + [0, 0, 0], + [128, 0, 0], + [0, 128, 0], + [128, 128, 0], + [0, 0, 128], + [128, 0, 128], + [0, 128, 128], + [128, 128, 128], + [64, 0, 0], + [192, 0, 0], + [64, 128, 0], + [192, 128, 0], + [64, 0, 128], + [192, 0, 128], + [64, 128, 128], + [192, 128, 128], + [0, 64, 0], + [128, 64, 0], + [0, 192, 0], + [128, 192, 0], + [0, 64, 128], + ] dataset_aliases = { 'cityscapes': ['cityscapes'], 'ade': ['ade', 'ade20k'], - 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'] + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], } diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py index b6493f20505e..34b01f515383 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py @@ -31,12 +31,9 @@ def after_train_iter(self, runner): if self.by_epoch or not self.every_n_iters(runner, self.interval): return from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + runner.log_buffer.clear() - results = single_gpu_test( - runner.model, - self.dataloader, - show=False, - efficient_test=self.efficient_test) + results = single_gpu_test(runner.model, self.dataloader, show=False, efficient_test=self.efficient_test) self.evaluate(runner, results) def after_train_epoch(self, runner): @@ -47,6 +44,7 @@ def after_train_epoch(self, runner): if not self.by_epoch or not self.every_n_epochs(runner, self.interval): return from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + runner.log_buffer.clear() results = single_gpu_test(runner.model, self.dataloader, show=False) self.evaluate(runner, results) @@ -79,13 +77,15 @@ def after_train_iter(self, runner): if self.by_epoch or not self.every_n_iters(runner, self.interval): return from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() results = multi_gpu_test( runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect, - efficient_test=self.efficient_test) + efficient_test=self.efficient_test, + ) if runner.rank == 0: print('\n') self.evaluate(runner, results) @@ -98,12 +98,11 @@ def after_train_epoch(self, runner): if not self.by_epoch or not self.every_n_epochs(runner, self.interval): return from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() results = multi_gpu_test( - runner.model, - self.dataloader, - tmpdir=osp.join(runner.work_dir, '.eval_hook'), - gpu_collect=self.gpu_collect) + runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect + ) if runner.rank == 0: print('\n') self.evaluate(runner, results) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py index 9f4ba8a2b4ec..06b9755207e1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py @@ -1,9 +1,10 @@ from collections import OrderedDict -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np import torch +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + def f_score(precision, recall, beta=1): """calcuate the f-score value. @@ -17,17 +18,11 @@ def f_score(precision, recall, beta=1): Returns: [torch.tensor]: The f-score value. """ - score = (1 + beta**2) * (precision * recall) / ( - (beta**2 * precision) + recall) + score = (1 + beta ** 2) * (precision * recall) / ((beta ** 2 * precision) + recall) return score -def intersect_and_union(pred_label, - label, - num_classes, - ignore_index, - label_map=dict(), - reduce_zero_label=False): +def intersect_and_union(pred_label, label, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False): """Calculate intersection and Union. Args: @@ -57,8 +52,7 @@ def intersect_and_union(pred_label, pred_label = torch.from_numpy((pred_label)) if isinstance(label, str): - label = torch.from_numpy( - mmcv.imread(label, flag='unchanged', backend='pillow')) + label = torch.from_numpy(mmcv.imread(label, flag='unchanged', backend='pillow')) else: label = torch.from_numpy(label) @@ -70,27 +64,21 @@ def intersect_and_union(pred_label, label = label - 1 label[label == 254] = 255 - mask = (label != ignore_index) + mask = label != ignore_index pred_label = pred_label[mask] label = label[mask] intersect = pred_label[pred_label == label] - area_intersect = torch.histc( - intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_pred_label = torch.histc( - pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_label = torch.histc( - label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_intersect = torch.histc(intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc(pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc(label.float(), bins=(num_classes), min=0, max=num_classes - 1) area_union = area_pred_label + area_label - area_intersect return area_intersect, area_union, area_pred_label, area_label -def total_intersect_and_union(results, - gt_seg_maps, - num_classes, - ignore_index, - label_map=dict(), - reduce_zero_label=False): +def total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False +): """Calculate Total Intersection and Union. Args: @@ -113,30 +101,24 @@ def total_intersect_and_union(results, """ num_imgs = len(results) assert len(gt_seg_maps) == num_imgs - total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) + total_area_union = torch.zeros((num_classes,), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) + total_area_label = torch.zeros((num_classes,), dtype=torch.float64) for i in range(num_imgs): - area_intersect, area_union, area_pred_label, area_label = \ - intersect_and_union( - results[i], gt_seg_maps[i], num_classes, ignore_index, - label_map, reduce_zero_label) + area_intersect, area_union, area_pred_label, area_label = intersect_and_union( + results[i], gt_seg_maps[i], num_classes, ignore_index, label_map, reduce_zero_label + ) total_area_intersect += area_intersect total_area_union += area_union total_area_pred_label += area_pred_label total_area_label += area_label - return total_area_intersect, total_area_union, total_area_pred_label, \ - total_area_label + return total_area_intersect, total_area_union, total_area_pred_label, total_area_label -def mean_iou(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False): +def mean_iou( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False +): """Calculate Mean Intersection and Union (mIoU) Args: @@ -165,17 +147,14 @@ def mean_iou(results, metrics=['mIoU'], nan_to_num=nan_to_num, label_map=label_map, - reduce_zero_label=reduce_zero_label) + reduce_zero_label=reduce_zero_label, + ) return iou_result -def mean_dice(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False): +def mean_dice( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False +): """Calculate Mean Dice (mDice) Args: @@ -205,18 +184,14 @@ def mean_dice(results, metrics=['mDice'], nan_to_num=nan_to_num, label_map=label_map, - reduce_zero_label=reduce_zero_label) + reduce_zero_label=reduce_zero_label, + ) return dice_result -def mean_fscore(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False, - beta=1): +def mean_fscore( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False, beta=1 +): """Calculate Mean Intersection and Union (mIoU) Args: @@ -250,19 +225,22 @@ def mean_fscore(results, nan_to_num=nan_to_num, label_map=label_map, reduce_zero_label=reduce_zero_label, - beta=beta) + beta=beta, + ) return fscore_result -def eval_metrics(results, - gt_seg_maps, - num_classes, - ignore_index, - metrics=['mIoU'], - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False, - beta=1): +def eval_metrics( + results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1, +): """Calculate evaluation metrics Args: results (list[ndarray] | list[str]): List of prediction segmentation @@ -287,10 +265,9 @@ def eval_metrics(results, if not set(metrics).issubset(set(allowed_metrics)): raise KeyError('metrics {} is not supported'.format(metrics)) - total_area_intersect, total_area_union, total_area_pred_label, \ - total_area_label = total_intersect_and_union( - results, gt_seg_maps, num_classes, ignore_index, label_map, - reduce_zero_label) + total_area_intersect, total_area_union, total_area_pred_label, total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, reduce_zero_label + ) all_acc = total_area_intersect.sum() / total_area_label.sum() ret_metrics = OrderedDict({'aAcc': all_acc}) for metric in metrics: @@ -300,27 +277,21 @@ def eval_metrics(results, ret_metrics['IoU'] = iou ret_metrics['Acc'] = acc elif metric == 'mDice': - dice = 2 * total_area_intersect / ( - total_area_pred_label + total_area_label) + dice = 2 * total_area_intersect / (total_area_pred_label + total_area_label) acc = total_area_intersect / total_area_label ret_metrics['Dice'] = dice ret_metrics['Acc'] = acc elif metric == 'mFscore': precision = total_area_intersect / total_area_pred_label recall = total_area_intersect / total_area_label - f_value = torch.tensor( - [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + f_value = torch.tensor([f_score(x[0], x[1], beta) for x in zip(precision, recall)]) ret_metrics['Fscore'] = f_value ret_metrics['Precision'] = precision ret_metrics['Recall'] = recall - ret_metrics = { - metric: value.numpy() - for metric, value in ret_metrics.items() - } + ret_metrics = {metric: value.numpy() for metric, value in ret_metrics.items()} if nan_to_num is not None: - ret_metrics = OrderedDict({ - metric: np.nan_to_num(metric_value, nan=nan_to_num) - for metric, metric_value in ret_metrics.items() - }) + ret_metrics = OrderedDict( + {metric: np.nan_to_num(metric_value, nan=nan_to_num) for metric, metric_value in ret_metrics.items()} + ) return ret_metrics diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py index 88bb10d44026..cfab50a07df6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -54,22 +54,22 @@ def sample(self, seg_logit, seg_label): sort_prob, sort_indices = seg_prob[valid_mask].sort() if sort_prob.numel() > 0: - min_threshold = sort_prob[min(batch_kept, - sort_prob.numel() - 1)] + min_threshold = sort_prob[min(batch_kept, sort_prob.numel() - 1)] else: min_threshold = 0.0 threshold = max(min_threshold, self.thresh) - valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.0 else: losses = self.context.loss_decode( seg_logit, seg_label, weight=None, ignore_index=self.context.ignore_index, - reduction_override='none') + reduction_override='none', + ) # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa _, sort_indices = losses[valid_mask].sort(descending=True) - valid_seg_weight[sort_indices[:batch_kept]] = 1. + valid_seg_weight[sort_indices[:batch_kept]] = 1.0 seg_weight[valid_mask] = valid_seg_weight diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py index ebeaef4a28ef..3612a6e86e94 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py @@ -11,9 +11,20 @@ from .voc import PascalVOCDataset __all__ = [ - 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', - 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', - 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', - 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', - 'STAREDataset' + 'CustomDataset', + 'build_dataloader', + 'ConcatDataset', + 'RepeatDataset', + 'DATASETS', + 'build_dataset', + 'PIPELINES', + 'CityscapesDataset', + 'PascalVOCDataset', + 'ADE20KDataset', + 'PascalContextDataset', + 'PascalContextDataset59', + 'ChaseDB1Dataset', + 'DRIVEDataset', + 'HRFDataset', + 'STAREDataset', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py index 5913e43775ed..6a69943b1ce7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py @@ -11,74 +11,312 @@ class ADE20KDataset(CustomDataset): The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to '.png'. """ + CLASSES = ( - 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', - 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', - 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', - 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', - 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', - 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', - 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', - 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', - 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', - 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', - 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', - 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', - 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', - 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', - 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', - 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', - 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', - 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', - 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', - 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', - 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', - 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', - 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', - 'clock', 'flag') + 'wall', + 'building', + 'sky', + 'floor', + 'tree', + 'ceiling', + 'road', + 'bed ', + 'windowpane', + 'grass', + 'cabinet', + 'sidewalk', + 'person', + 'earth', + 'door', + 'table', + 'mountain', + 'plant', + 'curtain', + 'chair', + 'car', + 'water', + 'painting', + 'sofa', + 'shelf', + 'house', + 'sea', + 'mirror', + 'rug', + 'field', + 'armchair', + 'seat', + 'fence', + 'desk', + 'rock', + 'wardrobe', + 'lamp', + 'bathtub', + 'railing', + 'cushion', + 'base', + 'box', + 'column', + 'signboard', + 'chest of drawers', + 'counter', + 'sand', + 'sink', + 'skyscraper', + 'fireplace', + 'refrigerator', + 'grandstand', + 'path', + 'stairs', + 'runway', + 'case', + 'pool table', + 'pillow', + 'screen door', + 'stairway', + 'river', + 'bridge', + 'bookcase', + 'blind', + 'coffee table', + 'toilet', + 'flower', + 'book', + 'hill', + 'bench', + 'countertop', + 'stove', + 'palm', + 'kitchen island', + 'computer', + 'swivel chair', + 'boat', + 'bar', + 'arcade machine', + 'hovel', + 'bus', + 'towel', + 'light', + 'truck', + 'tower', + 'chandelier', + 'awning', + 'streetlight', + 'booth', + 'television receiver', + 'airplane', + 'dirt track', + 'apparel', + 'pole', + 'land', + 'bannister', + 'escalator', + 'ottoman', + 'bottle', + 'buffet', + 'poster', + 'stage', + 'van', + 'ship', + 'fountain', + 'conveyer belt', + 'canopy', + 'washer', + 'plaything', + 'swimming pool', + 'stool', + 'barrel', + 'basket', + 'waterfall', + 'tent', + 'bag', + 'minibike', + 'cradle', + 'oven', + 'ball', + 'food', + 'step', + 'tank', + 'trade name', + 'microwave', + 'pot', + 'animal', + 'bicycle', + 'lake', + 'dishwasher', + 'screen', + 'blanket', + 'sculpture', + 'hood', + 'sconce', + 'vase', + 'traffic light', + 'tray', + 'ashcan', + 'fan', + 'pier', + 'crt screen', + 'plate', + 'monitor', + 'bulletin board', + 'shower', + 'radiator', + 'glass', + 'clock', + 'flag', + ) - PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], - [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], - [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], - [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], - [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], - [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], - [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], - [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], - [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], - [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], - [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], - [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], - [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], - [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], - [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], - [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], - [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], - [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], - [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], - [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], - [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], - [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], - [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], - [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], - [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], - [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], - [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], - [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], - [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], - [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], - [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], - [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], - [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], - [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], - [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], - [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], - [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], - [102, 255, 0], [92, 0, 255]] + PALETTE = [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + [11, 200, 200], + [255, 82, 0], + [0, 255, 245], + [0, 61, 255], + [0, 255, 112], + [0, 255, 133], + [255, 0, 0], + [255, 163, 0], + [255, 102, 0], + [194, 255, 0], + [0, 143, 255], + [51, 255, 0], + [0, 82, 255], + [0, 255, 41], + [0, 255, 173], + [10, 0, 255], + [173, 255, 0], + [0, 255, 153], + [255, 92, 0], + [255, 0, 255], + [255, 0, 245], + [255, 0, 102], + [255, 173, 0], + [255, 0, 20], + [255, 184, 184], + [0, 31, 255], + [0, 255, 61], + [0, 71, 255], + [255, 0, 204], + [0, 255, 194], + [0, 255, 82], + [0, 10, 255], + [0, 112, 255], + [51, 0, 255], + [0, 194, 255], + [0, 122, 255], + [0, 255, 163], + [255, 153, 0], + [0, 255, 10], + [255, 112, 0], + [143, 255, 0], + [82, 0, 255], + [163, 255, 0], + [255, 235, 0], + [8, 184, 170], + [133, 0, 255], + [0, 255, 92], + [184, 0, 255], + [255, 0, 31], + [0, 184, 255], + [0, 214, 255], + [255, 0, 112], + [92, 255, 0], + [0, 224, 255], + [112, 224, 255], + [70, 184, 160], + [163, 0, 255], + [153, 0, 255], + [71, 255, 0], + [255, 0, 163], + [255, 204, 0], + [255, 0, 143], + [0, 255, 235], + [133, 255, 0], + [255, 0, 235], + [245, 0, 255], + [255, 0, 122], + [255, 245, 0], + [10, 190, 212], + [214, 255, 0], + [0, 204, 255], + [20, 0, 255], + [255, 255, 0], + [0, 153, 255], + [0, 41, 255], + [0, 255, 204], + [41, 0, 255], + [41, 255, 0], + [173, 0, 255], + [0, 245, 255], + [71, 0, 255], + [122, 0, 255], + [0, 255, 184], + [0, 92, 255], + [184, 255, 0], + [0, 133, 255], + [255, 214, 0], + [25, 194, 194], + [102, 255, 0], + [92, 0, 255], + ] def __init__(self, **kwargs): - super(ADE20KDataset, self).__init__( - img_suffix='.jpg', - seg_map_suffix='.png', - reduce_zero_label=True, - **kwargs) + super(ADE20KDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', reduce_zero_label=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py index 371b9903be64..c076a55fe358 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py @@ -4,15 +4,20 @@ from functools import partial import numpy as np +from torch.utils.data import DistributedSampler + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader -from torch.utils.data import DistributedSampler +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( + DataLoader, + PoolDataLoader, +) if platform.system() != 'Windows': # https://github.com/pytorch/pytorch/issues/973 import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) hard_limit = rlimit[1] soft_limit = min(4096, hard_limit) @@ -25,6 +30,7 @@ def _concat_dataset(cfg, default_args=None): """Build :obj:`ConcatDataset by.""" from .dataset_wrappers import ConcatDataset + img_dir = cfg['img_dir'] ann_dir = cfg.get('ann_dir', None) split = cfg.get('split', None) @@ -61,13 +67,12 @@ def _concat_dataset(cfg, default_args=None): def build_dataset(cfg, default_args=None): """Build datasets.""" from .dataset_wrappers import ConcatDataset, RepeatDataset + if isinstance(cfg, (list, tuple)): dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) elif cfg['type'] == 'RepeatDataset': - dataset = RepeatDataset( - build_dataset(cfg['dataset'], default_args), cfg['times']) - elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance( - cfg.get('split', None), (list, tuple)): + dataset = RepeatDataset(build_dataset(cfg['dataset'], default_args), cfg['times']) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance(cfg.get('split', None), (list, tuple)): dataset = _concat_dataset(cfg, default_args) else: dataset = build_from_cfg(cfg, DATASETS, default_args) @@ -75,17 +80,19 @@ def build_dataset(cfg, default_args=None): return dataset -def build_dataloader(dataset, - samples_per_gpu, - workers_per_gpu, - num_gpus=1, - dist=True, - shuffle=True, - seed=None, - drop_last=False, - pin_memory=True, - dataloader_type='PoolDataLoader', - **kwargs): +def build_dataloader( + dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + dataloader_type='PoolDataLoader', + **kwargs, +): """Build PyTorch DataLoader. In distributed training, each GPU/process has a dataloader. @@ -114,8 +121,7 @@ def build_dataloader(dataset, """ rank, world_size = get_dist_info() if dist: - sampler = DistributedSampler( - dataset, world_size, rank, shuffle=shuffle) + sampler = DistributedSampler(dataset, world_size, rank, shuffle=shuffle) shuffle = False batch_size = samples_per_gpu num_workers = workers_per_gpu @@ -124,13 +130,9 @@ def build_dataloader(dataset, batch_size = num_gpus * samples_per_gpu num_workers = num_gpus * workers_per_gpu - init_fn = partial( - worker_init_fn, num_workers=num_workers, rank=rank, - seed=seed) if seed is not None else None + init_fn = partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None - assert dataloader_type in ( - 'DataLoader', - 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' + assert dataloader_type in ('DataLoader', 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' if dataloader_type == 'PoolDataLoader': dataloader = PoolDataLoader @@ -147,7 +149,8 @@ def build_dataloader(dataset, shuffle=shuffle, worker_init_fn=init_fn, drop_last=drop_last, - **kwargs) + **kwargs, + ) return data_loader diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py index 8bc29bea1470..906e51485f72 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py @@ -20,8 +20,6 @@ class ChaseDB1Dataset(CustomDataset): def __init__(self, **kwargs): super(ChaseDB1Dataset, self).__init__( - img_suffix='.png', - seg_map_suffix='_1stHO.png', - reduce_zero_label=False, - **kwargs) + img_suffix='.png', seg_map_suffix='_1stHO.png', reduce_zero_label=False, **kwargs + ) assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py index ca7cd01c9fb5..4a8a0ecd589b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py @@ -1,11 +1,12 @@ import os.path as osp import tempfile -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log from PIL import Image +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log + from .builder import DATASETS from .custom import CustomDataset @@ -18,22 +19,54 @@ class CityscapesDataset(CustomDataset): fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. """ - CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', - 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', - 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', - 'bicycle') - - PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], - [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], - [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], - [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], - [0, 80, 100], [0, 0, 230], [119, 11, 32]] + CLASSES = ( + 'road', + 'sidewalk', + 'building', + 'wall', + 'fence', + 'pole', + 'traffic light', + 'traffic sign', + 'vegetation', + 'terrain', + 'sky', + 'person', + 'rider', + 'car', + 'truck', + 'bus', + 'train', + 'motorcycle', + 'bicycle', + ) + + PALETTE = [ + [128, 64, 128], + [244, 35, 232], + [70, 70, 70], + [102, 102, 156], + [190, 153, 153], + [153, 153, 153], + [250, 170, 30], + [220, 220, 0], + [107, 142, 35], + [152, 251, 152], + [70, 130, 180], + [220, 20, 60], + [255, 0, 0], + [0, 0, 142], + [0, 0, 70], + [0, 60, 100], + [0, 80, 100], + [0, 0, 230], + [119, 11, 32], + ] def __init__(self, **kwargs): super(CityscapesDataset, self).__init__( - img_suffix='_leftImg8bit.png', - seg_map_suffix='_gtFine_labelTrainIds.png', - **kwargs) + img_suffix='_leftImg8bit.png', seg_map_suffix='_gtFine_labelTrainIds.png', **kwargs + ) @staticmethod def _convert_to_label_id(result): @@ -41,6 +74,7 @@ def _convert_to_label_id(result): if isinstance(result, str): result = np.load(result) import cityscapesscripts.helpers.labels as CSLabels + result_copy = result.copy() for trainId, label in CSLabels.trainId2label.items(): result_copy[result == trainId] = label.id @@ -77,6 +111,7 @@ def results2img(self, results, imgfile_prefix, to_label_id): output = Image.fromarray(result.astype(np.uint8)).convert('P') import cityscapesscripts.helpers.labels as CSLabels + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) for label_id, label in CSLabels.id2label.items(): palette[label_id] = label.color @@ -109,8 +144,8 @@ def format_results(self, results, imgfile_prefix=None, to_label_id=True): assert isinstance(results, list), 'results must be a list' assert len(results) == len(self), ( - 'The length of results is not equal to the dataset len: ' - f'{len(results)} != {len(self)}') + 'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}' + ) if imgfile_prefix is None: tmp_dir = tempfile.TemporaryDirectory() @@ -121,12 +156,7 @@ def format_results(self, results, imgfile_prefix=None, to_label_id=True): return result_files, tmp_dir - def evaluate(self, - results, - metric='mIoU', - logger=None, - imgfile_prefix=None, - efficient_test=False): + def evaluate(self, results, metric='mIoU', logger=None, imgfile_prefix=None, efficient_test=False): """Evaluation in Cityscapes/default protocol. Args: @@ -151,13 +181,10 @@ def evaluate(self, eval_results = dict() metrics = metric.copy() if isinstance(metric, list) else [metric] if 'cityscapes' in metrics: - eval_results.update( - self._evaluate_cityscapes(results, logger, imgfile_prefix)) + eval_results.update(self._evaluate_cityscapes(results, logger, imgfile_prefix)) metrics.remove('cityscapes') if len(metrics) > 0: - eval_results.update( - super(CityscapesDataset, - self).evaluate(results, metrics, logger, efficient_test)) + eval_results.update(super(CityscapesDataset, self).evaluate(results, metrics, logger, efficient_test)) return eval_results @@ -176,8 +203,7 @@ def _evaluate_cityscapes(self, results, logger, imgfile_prefix): try: import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa except ImportError: - raise ImportError('Please run "pip install cityscapesscripts" to ' - 'install cityscapesscripts first.') + raise ImportError('Please run "pip install cityscapesscripts" to ' 'install cityscapesscripts first.') msg = 'Evaluating in Cityscapes style' if logger is None: msg = '\n' + msg @@ -203,13 +229,11 @@ def _evaluate_cityscapes(self, results, logger, imgfile_prefix): # when evaluating with official cityscapesscripts, # **_gtFine_labelIds.png is used - for seg_map in mmcv.scandir( - self.ann_dir, 'gtFine_labelIds.png', recursive=True): + for seg_map in mmcv.scandir(self.ann_dir, 'gtFine_labelIds.png', recursive=True): seg_map_list.append(osp.join(self.ann_dir, seg_map)) pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) - eval_results.update( - CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + eval_results.update(CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) if tmp_dir is not None: tmp_dir.cleanup() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py index 9d414a6fd43f..28680a832ca5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py @@ -3,14 +3,15 @@ from collections import OrderedDict from functools import reduce -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log from prettytable import PrettyTable from torch.utils.data import Dataset +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import eval_metrics from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from .builder import DATASETS from .pipelines import Compose @@ -72,19 +73,21 @@ class CustomDataset(Dataset): PALETTE = None - def __init__(self, - pipeline, - img_dir, - img_suffix='.jpg', - ann_dir=None, - seg_map_suffix='.png', - split=None, - data_root=None, - test_mode=False, - ignore_index=255, - reduce_zero_label=False, - classes=None, - palette=None): + def __init__( + self, + pipeline, + img_dir, + img_suffix='.jpg', + ann_dir=None, + seg_map_suffix='.png', + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False, + classes=None, + palette=None, + ): self.pipeline = Compose(pipeline) self.img_dir = img_dir self.img_suffix = img_suffix @@ -96,8 +99,7 @@ def __init__(self, self.ignore_index = ignore_index self.reduce_zero_label = reduce_zero_label self.label_map = None - self.CLASSES, self.PALETTE = self.get_classes_and_palette( - classes, palette) + self.CLASSES, self.PALETTE = self.get_classes_and_palette(classes, palette) # join paths if data_root is specified if self.data_root is not None: @@ -109,16 +111,15 @@ def __init__(self, self.split = osp.join(self.data_root, self.split) # load annotations - self.img_infos = self.load_annotations(self.img_dir, self.img_suffix, - self.ann_dir, - self.seg_map_suffix, self.split) + self.img_infos = self.load_annotations( + self.img_dir, self.img_suffix, self.ann_dir, self.seg_map_suffix, self.split + ) def __len__(self): """Total number of samples of data.""" return len(self.img_infos) - def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, - split): + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, split): """Load annotation from directory. Args: @@ -235,8 +236,7 @@ def get_gt_seg_maps(self, efficient_test=False): if efficient_test: gt_seg_map = seg_map else: - gt_seg_map = mmcv.imread( - seg_map, flag='unchanged', backend='pillow') + gt_seg_map = mmcv.imread(seg_map, flag='unchanged', backend='pillow') gt_seg_maps.append(gt_seg_map) return gt_seg_maps @@ -289,8 +289,7 @@ def get_palette_for_custom_classes(self, class_names, palette=None): if self.label_map is not None: # return subset of palette palette = [] - for old_id, new_id in sorted( - self.label_map.items(), key=lambda x: x[1]): + for old_id, new_id in sorted(self.label_map.items(), key=lambda x: x[1]): if new_id != -1: palette.append(self.PALETTE[old_id]) palette = type(self.PALETTE)(palette) @@ -303,12 +302,7 @@ def get_palette_for_custom_classes(self, class_names, palette=None): return palette - def evaluate(self, - results, - metric='mIoU', - logger=None, - efficient_test=False, - **kwargs): + def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): """Evaluate the dataset. Args: @@ -330,8 +324,7 @@ def evaluate(self, eval_results = {} gt_seg_maps = self.get_gt_seg_maps(efficient_test) if self.CLASSES is None: - num_classes = len( - reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) + num_classes = len(reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics( @@ -341,7 +334,8 @@ def evaluate(self, self.ignore_index, metric, label_map=self.label_map, - reduce_zero_label=self.reduce_zero_label) + reduce_zero_label=self.reduce_zero_label, + ) if self.CLASSES is None: class_names = tuple(range(num_classes)) @@ -349,17 +343,18 @@ def evaluate(self, class_names = self.CLASSES # summary table - ret_metrics_summary = OrderedDict({ - ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) - for ret_metric, ret_metric_value in ret_metrics.items() - }) + ret_metrics_summary = OrderedDict( + { + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + } + ) # each class table ret_metrics.pop('aAcc', None) - ret_metrics_class = OrderedDict({ - ret_metric: np.round(ret_metric_value * 100, 2) - for ret_metric, ret_metric_value in ret_metrics.items() - }) + ret_metrics_class = OrderedDict( + {ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items()} + ) ret_metrics_class.update({'Class': class_names}) ret_metrics_class.move_to_end('Class', last=False) @@ -389,10 +384,7 @@ def evaluate(self, ret_metrics_class.pop('Class', None) for key, value in ret_metrics_class.items(): - eval_results.update({ - key + '.' + str(name): value[idx] / 100.0 - for idx, name in enumerate(class_names) - }) + eval_results.update({key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names)}) if mmcv.is_list_of(results, str): for file_name in results: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py index 3cbfda8ae74b..9cb073329ef0 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py @@ -20,8 +20,6 @@ class DRIVEDataset(CustomDataset): def __init__(self, **kwargs): super(DRIVEDataset, self).__init__( - img_suffix='.png', - seg_map_suffix='_manual1.png', - reduce_zero_label=False, - **kwargs) + img_suffix='.png', seg_map_suffix='_manual1.png', reduce_zero_label=False, **kwargs + ) assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py index 923203b51377..b67616f5f58f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py @@ -19,9 +19,5 @@ class HRFDataset(CustomDataset): PALETTE = [[120, 120, 120], [6, 230, 230]] def __init__(self, **kwargs): - super(HRFDataset, self).__init__( - img_suffix='.png', - seg_map_suffix='.png', - reduce_zero_label=False, - **kwargs) + super(HRFDataset, self).__init__(img_suffix='.png', seg_map_suffix='.png', reduce_zero_label=False, **kwargs) assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py index 541a63c66a13..35028ac9b15a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py @@ -17,40 +17,136 @@ class PascalContextDataset(CustomDataset): split (str): Split txt file for PascalContext. """ - CLASSES = ('background', 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', - 'bicycle', 'bird', 'boat', 'book', 'bottle', 'building', 'bus', - 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', - 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', - 'floor', 'flower', 'food', 'grass', 'ground', 'horse', - 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', 'person', - 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', - 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', - 'track', 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', - 'window', 'wood') + CLASSES = ( + 'background', + 'aeroplane', + 'bag', + 'bed', + 'bedclothes', + 'bench', + 'bicycle', + 'bird', + 'boat', + 'book', + 'bottle', + 'building', + 'bus', + 'cabinet', + 'car', + 'cat', + 'ceiling', + 'chair', + 'cloth', + 'computer', + 'cow', + 'cup', + 'curtain', + 'dog', + 'door', + 'fence', + 'floor', + 'flower', + 'food', + 'grass', + 'ground', + 'horse', + 'keyboard', + 'light', + 'motorbike', + 'mountain', + 'mouse', + 'person', + 'plate', + 'platform', + 'pottedplant', + 'road', + 'rock', + 'sheep', + 'shelves', + 'sidewalk', + 'sign', + 'sky', + 'snow', + 'sofa', + 'table', + 'track', + 'train', + 'tree', + 'truck', + 'tvmonitor', + 'wall', + 'water', + 'window', + 'wood', + ) - PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], - [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], - [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], - [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], - [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], - [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], - [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], - [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], - [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], - [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], - [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], - [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], - [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], - [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], - [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + PALETTE = [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + ] def __init__(self, split, **kwargs): super(PascalContextDataset, self).__init__( - img_suffix='.jpg', - seg_map_suffix='.png', - split=split, - reduce_zero_label=False, - **kwargs) + img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=False, **kwargs + ) assert osp.exists(self.img_dir) and self.split is not None @@ -67,37 +163,132 @@ class PascalContextDataset59(CustomDataset): split (str): Split txt file for PascalContext. """ - CLASSES = ('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', - 'bird', 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', - 'car', 'cat', 'ceiling', 'chair', 'cloth', 'computer', 'cow', - 'cup', 'curtain', 'dog', 'door', 'fence', 'floor', 'flower', - 'food', 'grass', 'ground', 'horse', 'keyboard', 'light', - 'motorbike', 'mountain', 'mouse', 'person', 'plate', 'platform', - 'pottedplant', 'road', 'rock', 'sheep', 'shelves', 'sidewalk', - 'sign', 'sky', 'snow', 'sofa', 'table', 'track', 'train', - 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', 'wood') + CLASSES = ( + 'aeroplane', + 'bag', + 'bed', + 'bedclothes', + 'bench', + 'bicycle', + 'bird', + 'boat', + 'book', + 'bottle', + 'building', + 'bus', + 'cabinet', + 'car', + 'cat', + 'ceiling', + 'chair', + 'cloth', + 'computer', + 'cow', + 'cup', + 'curtain', + 'dog', + 'door', + 'fence', + 'floor', + 'flower', + 'food', + 'grass', + 'ground', + 'horse', + 'keyboard', + 'light', + 'motorbike', + 'mountain', + 'mouse', + 'person', + 'plate', + 'platform', + 'pottedplant', + 'road', + 'rock', + 'sheep', + 'shelves', + 'sidewalk', + 'sign', + 'sky', + 'snow', + 'sofa', + 'table', + 'track', + 'train', + 'tree', + 'truck', + 'tvmonitor', + 'wall', + 'water', + 'window', + 'wood', + ) - PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], - [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], - [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], - [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], - [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], - [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], - [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], - [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], - [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], - [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], - [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], - [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], - [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], - [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], - [0, 235, 255], [0, 173, 255], [31, 0, 255]] + PALETTE = [ + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + ] def __init__(self, split, **kwargs): super(PascalContextDataset59, self).__init__( - img_suffix='.jpg', - seg_map_suffix='.png', - split=split, - reduce_zero_label=True, - **kwargs) + img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=True, **kwargs + ) assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py index 8b9046b07bb4..52eb533242b3 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py @@ -1,16 +1,43 @@ from .compose import Compose -from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, - Transpose, to_tensor) +from .formating import Collect, ImageToTensor, ToDataContainer, ToTensor, Transpose, to_tensor from .loading import LoadAnnotations, LoadImageFromFile from .test_time_aug import MultiScaleFlipAug -from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, - PhotoMetricDistortion, RandomCrop, RandomFlip, - RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) +from .transforms import ( + CLAHE, + AdjustGamma, + Normalize, + Pad, + PhotoMetricDistortion, + RandomCrop, + RandomFlip, + RandomRotate, + Rerange, + Resize, + RGB2Gray, + SegRescale, +) __all__ = [ - 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', - 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', - 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', - 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', - 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' + 'Compose', + 'to_tensor', + 'ToTensor', + 'ImageToTensor', + 'ToDataContainer', + 'Transpose', + 'Collect', + 'LoadAnnotations', + 'LoadImageFromFile', + 'MultiScaleFlipAug', + 'Resize', + 'RandomFlip', + 'Pad', + 'RandomCrop', + 'Normalize', + 'SegRescale', + 'PhotoMetricDistortion', + 'RandomRotate', + 'AdjustGamma', + 'CLAHE', + 'Rerange', + 'RGB2Gray', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py index f74b359efe10..e5222a69bec6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py @@ -1,8 +1,9 @@ from collections.abc import Sequence -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import DataContainer as DC from ..builder import PIPELINES @@ -131,8 +132,7 @@ def __call__(self, results): return results def __repr__(self): - return self.__class__.__name__ + \ - f'(keys={self.keys}, order={self.order})' + return self.__class__.__name__ + f'(keys={self.keys}, order={self.order})' @PIPELINES.register_module() @@ -147,9 +147,7 @@ class ToDataContainer(object): dict(key='gt_semantic_seg'))``. """ - def __init__(self, - fields=(dict(key='img', - stack=True), dict(key='gt_semantic_seg'))): + def __init__(self, fields=(dict(key='img', stack=True), dict(key='gt_semantic_seg'))): self.fields = fields def __call__(self, results): @@ -206,9 +204,8 @@ def __call__(self, results): if 'gt_semantic_seg' in results: # convert to long results['gt_semantic_seg'] = DC( - to_tensor(results['gt_semantic_seg'][None, - ...].astype(np.int64)), - stack=True) + to_tensor(results['gt_semantic_seg'][None, ...].astype(np.int64)), stack=True + ) return results def __repr__(self): @@ -253,11 +250,21 @@ class Collect(object): 'img_norm_cfg')`` """ - def __init__(self, - keys, - meta_keys=('filename', 'ori_filename', 'ori_shape', - 'img_shape', 'pad_shape', 'scale_factor', 'flip', - 'flip_direction', 'img_norm_cfg')): + def __init__( + self, + keys, + meta_keys=( + 'filename', + 'ori_filename', + 'ori_shape', + 'img_shape', + 'pad_shape', + 'scale_factor', + 'flip', + 'flip_direction', + 'img_norm_cfg', + ), + ): self.keys = keys self.meta_keys = meta_keys @@ -284,5 +291,4 @@ def __call__(self, results): return data def __repr__(self): - return self.__class__.__name__ + \ - f'(keys={self.keys}, meta_keys={self.meta_keys})' + return self.__class__.__name__ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py index da7f347e66d2..5d2e2a51a1bf 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py @@ -1,8 +1,9 @@ import os.path as osp -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + from ..builder import PIPELINES @@ -28,11 +29,9 @@ class LoadImageFromFile(object): 'cv2' """ - def __init__(self, - to_float32=False, - color_type='color', - file_client_args=dict(backend='disk'), - imdecode_backend='cv2'): + def __init__( + self, to_float32=False, color_type='color', file_client_args=dict(backend='disk'), imdecode_backend='cv2' + ): self.to_float32 = to_float32 self.color_type = color_type self.file_client_args = file_client_args.copy() @@ -53,13 +52,11 @@ def __call__(self, results): self.file_client = mmcv.FileClient(**self.file_client_args) if results.get('img_prefix') is not None: - filename = osp.join(results['img_prefix'], - results['img_info']['filename']) + filename = osp.join(results['img_prefix'], results['img_info']['filename']) else: filename = results['img_info']['filename'] img_bytes = self.file_client.get(filename) - img = mmcv.imfrombytes( - img_bytes, flag=self.color_type, backend=self.imdecode_backend) + img = mmcv.imfrombytes(img_bytes, flag=self.color_type, backend=self.imdecode_backend) if self.to_float32: img = img.astype(np.float32) @@ -73,9 +70,8 @@ def __call__(self, results): results['scale_factor'] = 1.0 num_channels = 1 if len(img.shape) < 3 else img.shape[2] results['img_norm_cfg'] = dict( - mean=np.zeros(num_channels, dtype=np.float32), - std=np.ones(num_channels, dtype=np.float32), - to_rgb=False) + mean=np.zeros(num_channels, dtype=np.float32), std=np.ones(num_channels, dtype=np.float32), to_rgb=False + ) return results def __repr__(self): @@ -101,10 +97,7 @@ class LoadAnnotations(object): 'pillow' """ - def __init__(self, - reduce_zero_label=False, - file_client_args=dict(backend='disk'), - imdecode_backend='pillow'): + def __init__(self, reduce_zero_label=False, file_client_args=dict(backend='disk'), imdecode_backend='pillow'): self.reduce_zero_label = reduce_zero_label self.file_client_args = file_client_args.copy() self.file_client = None @@ -124,14 +117,13 @@ def __call__(self, results): self.file_client = mmcv.FileClient(**self.file_client_args) if results.get('seg_prefix', None) is not None: - filename = osp.join(results['seg_prefix'], - results['ann_info']['seg_map']) + filename = osp.join(results['seg_prefix'], results['ann_info']['seg_map']) else: filename = results['ann_info']['seg_map'] img_bytes = self.file_client.get(filename) - gt_semantic_seg = mmcv.imfrombytes( - img_bytes, flag='unchanged', - backend=self.imdecode_backend).squeeze().astype(np.uint8) + gt_semantic_seg = ( + mmcv.imfrombytes(img_bytes, flag='unchanged', backend=self.imdecode_backend).squeeze().astype(np.uint8) + ) # modify if custom classes if results.get('label_map', None) is not None: for old_id, new_id in results['label_map'].items(): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py index 21f86894ea76..e8675fb4c872 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py @@ -50,45 +50,31 @@ class MultiScaleFlipAug(object): It has no effect when flip == False. Default: "horizontal". """ - def __init__(self, - transforms, - img_scale, - img_ratios=None, - flip=False, - flip_direction='horizontal'): + def __init__(self, transforms, img_scale, img_ratios=None, flip=False, flip_direction='horizontal'): self.transforms = Compose(transforms) if img_ratios is not None: - img_ratios = img_ratios if isinstance(img_ratios, - list) else [img_ratios] + img_ratios = img_ratios if isinstance(img_ratios, list) else [img_ratios] assert mmcv.is_list_of(img_ratios, float) if img_scale is None: # mode 1: given img_scale=None and a range of image ratio self.img_scale = None assert mmcv.is_list_of(img_ratios, float) - elif isinstance(img_scale, tuple) and mmcv.is_list_of( - img_ratios, float): + elif isinstance(img_scale, tuple) and mmcv.is_list_of(img_ratios, float): assert len(img_scale) == 2 # mode 2: given a scale and a range of image ratio - self.img_scale = [(int(img_scale[0] * ratio), - int(img_scale[1] * ratio)) - for ratio in img_ratios] + self.img_scale = [(int(img_scale[0] * ratio), int(img_scale[1] * ratio)) for ratio in img_ratios] else: # mode 3: given multiple scales - self.img_scale = img_scale if isinstance(img_scale, - list) else [img_scale] + self.img_scale = img_scale if isinstance(img_scale, list) else [img_scale] assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None self.flip = flip self.img_ratios = img_ratios - self.flip_direction = flip_direction if isinstance( - flip_direction, list) else [flip_direction] + self.flip_direction = flip_direction if isinstance(flip_direction, list) else [flip_direction] assert mmcv.is_list_of(self.flip_direction, str) if not self.flip and self.flip_direction != ['horizontal']: - warnings.warn( - 'flip_direction has no effect when flip is set to False') - if (self.flip - and not any([t['type'] == 'RandomFlip' for t in transforms])): - warnings.warn( - 'flip has no effect when RandomFlip is not in transforms') + warnings.warn('flip_direction has no effect when flip is set to False') + if self.flip and not any([t['type'] == 'RandomFlip' for t in transforms]): + warnings.warn('flip has no effect when RandomFlip is not in transforms') def __call__(self, results): """Call function to apply test time augment transforms on results. @@ -104,8 +90,7 @@ def __call__(self, results): aug_data = [] if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): h, w = results['img'].shape[:2] - img_scale = [(int(w * ratio), int(h * ratio)) - for ratio in self.img_ratios] + img_scale = [(int(w * ratio), int(h * ratio)) for ratio in self.img_ratios] else: img_scale = self.img_scale flip_aug = [False, True] if self.flip else [False] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py index ab97de3dfa34..12bf591cff32 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py @@ -1,8 +1,9 @@ -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning, is_tuple_of from numpy import random +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning, is_tuple_of + from ..builder import PIPELINES @@ -38,11 +39,7 @@ class Resize(object): image. """ - def __init__(self, - img_scale=None, - multiscale_mode='range', - ratio_range=None, - keep_ratio=True): + def __init__(self, img_scale=None, multiscale_mode='range', ratio_range=None, keep_ratio=True): if img_scale is None: self.img_scale = None else: @@ -100,12 +97,8 @@ def random_sample(img_scales): assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 img_scale_long = [max(s) for s in img_scales] img_scale_short = [min(s) for s in img_scales] - long_edge = np.random.randint( - min(img_scale_long), - max(img_scale_long) + 1) - short_edge = np.random.randint( - min(img_scale_short), - max(img_scale_short) + 1) + long_edge = np.random.randint(min(img_scale_long), max(img_scale_long) + 1) + short_edge = np.random.randint(min(img_scale_short), max(img_scale_short) + 1) img_scale = (long_edge, short_edge) return img_scale, None @@ -157,11 +150,9 @@ def _random_scale(self, results): if self.ratio_range is not None: if self.img_scale is None: h, w = results['img'].shape[:2] - scale, scale_idx = self.random_sample_ratio((w, h), - self.ratio_range) + scale, scale_idx = self.random_sample_ratio((w, h), self.ratio_range) else: - scale, scale_idx = self.random_sample_ratio( - self.img_scale[0], self.ratio_range) + scale, scale_idx = self.random_sample_ratio(self.img_scale[0], self.ratio_range) elif len(self.img_scale) == 1: scale, scale_idx = self.img_scale[0], 0 elif self.multiscale_mode == 'range': @@ -177,8 +168,7 @@ def _random_scale(self, results): def _resize_img(self, results): """Resize images with ``results['scale']``.""" if self.keep_ratio: - img, scale_factor = mmcv.imrescale( - results['img'], results['scale'], return_scale=True) + img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] @@ -186,10 +176,8 @@ def _resize_img(self, results): w_scale = new_w / w h_scale = new_h / h else: - img, w_scale, h_scale = mmcv.imresize( - results['img'], results['scale'], return_scale=True) - scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], - dtype=np.float32) + img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding @@ -200,11 +188,9 @@ def _resize_seg(self, results): """Resize semantic segmentation map with ``results['scale']``.""" for key in results.get('seg_fields', []): if self.keep_ratio: - gt_seg = mmcv.imrescale( - results[key], results['scale'], interpolation='nearest') + gt_seg = mmcv.imrescale(results[key], results['scale'], interpolation='nearest') else: - gt_seg = mmcv.imresize( - results[key], results['scale'], interpolation='nearest') + gt_seg = mmcv.imresize(results[key], results['scale'], interpolation='nearest') results[key] = gt_seg def __call__(self, results): @@ -227,10 +213,12 @@ def __call__(self, results): def __repr__(self): repr_str = self.__class__.__name__ - repr_str += (f'(img_scale={self.img_scale}, ' - f'multiscale_mode={self.multiscale_mode}, ' - f'ratio_range={self.ratio_range}, ' - f'keep_ratio={self.keep_ratio})') + repr_str += ( + f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})' + ) return repr_str @@ -275,14 +263,12 @@ def __call__(self, results): results['flip_direction'] = self.direction if results['flip']: # flip image - results['img'] = mmcv.imflip( - results['img'], direction=results['flip_direction']) + results['img'] = mmcv.imflip(results['img'], direction=results['flip_direction']) # flip segs for key in results.get('seg_fields', []): # use copy() to make numpy stride positive - results[key] = mmcv.imflip( - results[key], direction=results['flip_direction']).copy() + results[key] = mmcv.imflip(results[key], direction=results['flip_direction']).copy() return results def __repr__(self): @@ -305,11 +291,7 @@ class Pad(object): Default: 255. """ - def __init__(self, - size=None, - size_divisor=None, - pad_val=0, - seg_pad_val=255): + def __init__(self, size=None, size_divisor=None, pad_val=0, seg_pad_val=255): self.size = size self.size_divisor = size_divisor self.pad_val = pad_val @@ -321,11 +303,9 @@ def __init__(self, def _pad_img(self, results): """Pad images according to ``self.size``.""" if self.size is not None: - padded_img = mmcv.impad( - results['img'], shape=self.size, pad_val=self.pad_val) + padded_img = mmcv.impad(results['img'], shape=self.size, pad_val=self.pad_val) elif self.size_divisor is not None: - padded_img = mmcv.impad_to_multiple( - results['img'], self.size_divisor, pad_val=self.pad_val) + padded_img = mmcv.impad_to_multiple(results['img'], self.size_divisor, pad_val=self.pad_val) results['img'] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size @@ -334,10 +314,7 @@ def _pad_img(self, results): def _pad_seg(self, results): """Pad masks according to ``results['pad_shape']``.""" for key in results.get('seg_fields', []): - results[key] = mmcv.impad( - results[key], - shape=results['pad_shape'][:2], - pad_val=self.seg_pad_val) + results[key] = mmcv.impad(results[key], shape=results['pad_shape'][:2], pad_val=self.seg_pad_val) def __call__(self, results): """Call function to pad images, masks, semantic segmentation maps. @@ -355,8 +332,7 @@ def __call__(self, results): def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \ - f'pad_val={self.pad_val})' + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' f'pad_val={self.pad_val})' return repr_str @@ -389,16 +365,13 @@ def __call__(self, results): result dict. """ - results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, - self.to_rgb) - results['img_norm_cfg'] = dict( - mean=self.mean, std=self.std, to_rgb=self.to_rgb) + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) + results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) return results def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \ - f'{self.to_rgb})' + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' f'{self.to_rgb})' return repr_str @@ -481,15 +454,14 @@ def __call__(self, results): for i in range(results['img'].shape[2]): results['img'][:, :, i] = mmcv.clahe( - np.array(results['img'][:, :, i], dtype=np.uint8), - self.clip_limit, self.tile_grid_size) + np.array(results['img'][:, :, i], dtype=np.uint8), self.clip_limit, self.tile_grid_size + ) return results def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(clip_limit={self.clip_limit}, '\ - f'tile_grid_size={self.tile_grid_size})' + repr_str += f'(clip_limit={self.clip_limit}, ' f'tile_grid_size={self.tile_grid_size})' return repr_str @@ -503,7 +475,7 @@ class RandomCrop(object): occupy. """ - def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255): + def __init__(self, crop_size, cat_max_ratio=1.0, ignore_index=255): assert crop_size[0] > 0 and crop_size[1] > 0 self.crop_size = crop_size self.cat_max_ratio = cat_max_ratio @@ -539,14 +511,13 @@ def __call__(self, results): img = results['img'] crop_bbox = self.get_crop_bbox(img) - if self.cat_max_ratio < 1.: + if self.cat_max_ratio < 1.0: # Repeat 10 times for _ in range(10): seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) labels, cnt = np.unique(seg_temp, return_counts=True) cnt = cnt[labels != self.ignore_index] - if len(cnt) > 1 and np.max(cnt) / np.sum( - cnt) < self.cat_max_ratio: + if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.cat_max_ratio: break crop_bbox = self.get_crop_bbox(img) @@ -585,13 +556,7 @@ class RandomRotate(object): rotated image. Default: False """ - def __init__(self, - prob, - degree, - pad_val=0, - seg_pad_val=255, - center=None, - auto_bound=False): + def __init__(self, prob, degree, pad_val=0, seg_pad_val=255, center=None, auto_bound=False): self.prob = prob assert prob >= 0 and prob <= 1 if isinstance(degree, (float, int)): @@ -599,8 +564,7 @@ def __init__(self, self.degree = (-degree, degree) else: self.degree = degree - assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ - f'tuple of (min, max)' + assert len(self.degree) == 2, f'degree {self.degree} should be a ' f'tuple of (min, max)' self.pal_val = pad_val self.seg_pad_val = seg_pad_val self.center = center @@ -621,11 +585,8 @@ def __call__(self, results): if rotate: # rotate image results['img'] = mmcv.imrotate( - results['img'], - angle=degree, - border_value=self.pal_val, - center=self.center, - auto_bound=self.auto_bound) + results['img'], angle=degree, border_value=self.pal_val, center=self.center, auto_bound=self.auto_bound + ) # rotate segs for key in results.get('seg_fields', []): @@ -635,17 +596,20 @@ def __call__(self, results): border_value=self.seg_pad_val, center=self.center, auto_bound=self.auto_bound, - interpolation='nearest') + interpolation='nearest', + ) return results def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(prob={self.prob}, ' \ - f'degree={self.degree}, ' \ - f'pad_val={self.pal_val}, ' \ - f'seg_pad_val={self.seg_pad_val}, ' \ - f'center={self.center}, ' \ - f'auto_bound={self.auto_bound})' + repr_str += ( + f'(prob={self.prob}, ' + f'degree={self.degree}, ' + f'pad_val={self.pal_val}, ' + f'seg_pad_val={self.seg_pad_val}, ' + f'center={self.center}, ' + f'auto_bound={self.auto_bound})' + ) return repr_str @@ -699,8 +663,7 @@ def __call__(self, results): def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(out_channels={self.out_channels}, ' \ - f'weights={self.weights})' + repr_str += f'(out_channels={self.out_channels}, ' f'weights={self.weights})' return repr_str @@ -718,8 +681,7 @@ def __init__(self, gamma=1.0): assert gamma > 0 self.gamma = gamma inv_gamma = 1.0 / gamma - self.table = np.array([(i / 255.0)**inv_gamma * 255 - for i in np.arange(256)]).astype('uint8') + self.table = np.array([(i / 255.0) ** inv_gamma * 255 for i in np.arange(256)]).astype('uint8') def __call__(self, results): """Call function to process the image with gamma correction. @@ -731,8 +693,7 @@ def __call__(self, results): dict: Processed results. """ - results['img'] = mmcv.lut_transform( - np.array(results['img'], dtype=np.uint8), self.table) + results['img'] = mmcv.lut_transform(np.array(results['img'], dtype=np.uint8), self.table) return results @@ -762,8 +723,7 @@ def __call__(self, results): """ for key in results.get('seg_fields', []): if self.scale_factor != 1: - results[key] = mmcv.imrescale( - results[key], self.scale_factor, interpolation='nearest') + results[key] = mmcv.imrescale(results[key], self.scale_factor, interpolation='nearest') return results def __repr__(self): @@ -791,11 +751,7 @@ class PhotoMetricDistortion(object): hue_delta (int): delta of hue. """ - def __init__(self, - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18): + def __init__(self, brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18): self.brightness_delta = brightness_delta self.contrast_lower, self.contrast_upper = contrast_range self.saturation_lower, self.saturation_upper = saturation_range @@ -810,18 +766,13 @@ def convert(self, img, alpha=1, beta=0): def brightness(self, img): """Brightness distortion.""" if random.randint(2): - return self.convert( - img, - beta=random.uniform(-self.brightness_delta, - self.brightness_delta)) + return self.convert(img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)) return img def contrast(self, img): """Contrast distortion.""" if random.randint(2): - return self.convert( - img, - alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return self.convert(img, alpha=random.uniform(self.contrast_lower, self.contrast_upper)) return img def saturation(self, img): @@ -829,9 +780,8 @@ def saturation(self, img): if random.randint(2): img = mmcv.bgr2hsv(img) img[:, :, 1] = self.convert( - img[:, :, 1], - alpha=random.uniform(self.saturation_lower, - self.saturation_upper)) + img[:, :, 1], alpha=random.uniform(self.saturation_lower, self.saturation_upper) + ) img = mmcv.hsv2bgr(img) return img @@ -839,9 +789,7 @@ def hue(self, img): """Hue distortion.""" if random.randint(2): img = mmcv.bgr2hsv(img) - img[:, :, - 0] = (img[:, :, 0].astype(int) + - random.randint(-self.hue_delta, self.hue_delta)) % 180 + img[:, :, 0] = (img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)) % 180 img = mmcv.hsv2bgr(img) return img @@ -880,10 +828,12 @@ def __call__(self, results): def __repr__(self): repr_str = self.__class__.__name__ - repr_str += (f'(brightness_delta={self.brightness_delta}, ' - f'contrast_range=({self.contrast_lower}, ' - f'{self.contrast_upper}), ' - f'saturation_range=({self.saturation_lower}, ' - f'{self.saturation_upper}), ' - f'hue_delta={self.hue_delta})') + repr_str += ( + f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})' + ) return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py index cbd14e0920e7..a94d01763980 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py @@ -20,8 +20,6 @@ class STAREDataset(CustomDataset): def __init__(self, **kwargs): super(STAREDataset, self).__init__( - img_suffix='.png', - seg_map_suffix='.ah.png', - reduce_zero_label=False, - **kwargs) + img_suffix='.png', seg_map_suffix='.ah.png', reduce_zero_label=False, **kwargs + ) assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py index a8855203b14e..5fd6641b33e1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py @@ -12,18 +12,54 @@ class PascalVOCDataset(CustomDataset): split (str): Split txt file for Pascal VOC. """ - CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', - 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', - 'train', 'tvmonitor') + CLASSES = ( + 'background', + 'aeroplane', + 'bicycle', + 'bird', + 'boat', + 'bottle', + 'bus', + 'car', + 'cat', + 'chair', + 'cow', + 'diningtable', + 'dog', + 'horse', + 'motorbike', + 'person', + 'pottedplant', + 'sheep', + 'sofa', + 'train', + 'tvmonitor', + ) - PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], - [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], - [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], - [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], - [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + PALETTE = [ + [0, 0, 0], + [128, 0, 0], + [0, 128, 0], + [128, 128, 0], + [0, 0, 128], + [128, 0, 128], + [0, 128, 128], + [128, 128, 128], + [64, 0, 0], + [192, 0, 0], + [64, 128, 0], + [192, 128, 0], + [64, 0, 128], + [192, 0, 128], + [64, 128, 128], + [192, 128, 128], + [0, 64, 0], + [128, 64, 0], + [0, 192, 0], + [128, 192, 0], + [0, 64, 128], + ] def __init__(self, split, **kwargs): - super(PascalVOCDataset, self).__init__( - img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + super(PascalVOCDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py index 3cf93f8bec9c..130f6c12914e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py @@ -1,12 +1,17 @@ from .backbones import * # noqa: F401,F403 -from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, - build_head, build_loss, build_segmentor) +from .builder import BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, build_head, build_loss, build_segmentor from .decode_heads import * # noqa: F401,F403 from .losses import * # noqa: F401,F403 from .necks import * # noqa: F401,F403 from .segmentors import * # noqa: F401,F403 __all__ = [ - 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', - 'build_head', 'build_loss', 'build_segmentor' + 'BACKBONES', + 'HEADS', + 'LOSSES', + 'SEGMENTORS', + 'build_backbone', + 'build_head', + 'build_loss', + 'build_segmentor', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py index 8339983905fb..ceb46fade97e 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py @@ -1,4 +1,5 @@ from .cgnet import CGNet + # from .fast_scnn import FastSCNN from .hrnet import HRNet from .mobilenet_v2 import MobileNetV2 @@ -7,11 +8,20 @@ from .resnet import ResNet, ResNetV1c, ResNetV1d from .resnext import ResNeXt from .unet import UNet -from .vit import VisionTransformer from .uniformer import UniFormer +from .vit import VisionTransformer __all__ = [ - 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', - 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', - 'VisionTransformer', 'UniFormer' + 'ResNet', + 'ResNetV1c', + 'ResNetV1d', + 'ResNeXt', + 'HRNet', + 'ResNeSt', + 'MobileNetV2', + 'UNet', + 'CGNet', + 'MobileNetV3', + 'VisionTransformer', + 'UniFormer', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py index a672d3156aeb..37a147de274d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py @@ -1,12 +1,18 @@ import torch import torch.nn as nn import torch.utils.checkpoint as cp -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, - constant_init, kaiming_init) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + build_conv_layer, + build_norm_layer, + constant_init, + kaiming_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES @@ -31,11 +37,13 @@ def __init__(self, channel, reduction=16, with_cp=False): self.with_cp = with_cp self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( - nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), - nn.Linear(channel // reduction, channel), nn.Sigmoid()) + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid(), + ) def forward(self, x): - def _inner_forward(x): num_batch, num_channel = x.size()[:2] y = self.avg_pool(x).view(num_batch, num_channel) @@ -75,17 +83,19 @@ class ContextGuidedBlock(nn.Module): memory while slowing down the training speed. Default: False. """ - def __init__(self, - in_channels, - out_channels, - dilation=2, - reduction=16, - skip_connect=True, - downsample=False, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - act_cfg=dict(type='PReLU'), - with_cp=False): + def __init__( + self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False, + ): super(ContextGuidedBlock, self).__init__() self.with_cp = with_cp self.downsample = downsample @@ -98,23 +108,12 @@ def __init__(self, padding = (kernel_size - 1) // 2 self.conv1x1 = ConvModule( - in_channels, - channels, - kernel_size, - stride, - padding, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + in_channels, channels, kernel_size, stride, padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) self.f_loc = build_conv_layer( - conv_cfg, - channels, - channels, - kernel_size=3, - padding=1, - groups=channels, - bias=False) + conv_cfg, channels, channels, kernel_size=3, padding=1, groups=channels, bias=False + ) self.f_sur = build_conv_layer( conv_cfg, channels, @@ -123,24 +122,19 @@ def __init__(self, padding=dilation, groups=channels, dilation=dilation, - bias=False) + bias=False, + ) self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] self.activate = nn.PReLU(2 * channels) if downsample: - self.bottleneck = build_conv_layer( - conv_cfg, - 2 * channels, - out_channels, - kernel_size=1, - bias=False) + self.bottleneck = build_conv_layer(conv_cfg, 2 * channels, out_channels, kernel_size=1, bias=False) self.skip_connect = skip_connect and not downsample self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) def forward(self, x): - def _inner_forward(x): out = self.conv1x1(x) loc = self.f_loc(out) @@ -212,23 +206,24 @@ class CGNet(nn.Module): memory while slowing down the training speed. Default: False. """ - def __init__(self, - in_channels=3, - num_channels=(32, 64, 128), - num_blocks=(3, 21), - dilations=(2, 4), - reductions=(8, 16), - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - act_cfg=dict(type='PReLU'), - norm_eval=False, - with_cp=False): + def __init__( + self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False, + ): super(CGNet, self).__init__() self.in_channels = in_channels self.num_channels = num_channels - assert isinstance(self.num_channels, tuple) and len( - self.num_channels) == 3 + assert isinstance(self.num_channels, tuple) and len(self.num_channels) == 3 self.num_blocks = num_blocks assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 self.dilations = dilations @@ -255,16 +250,16 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) cur_channels = num_channels[0] self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 cur_channels += in_channels - self.norm_prelu_0 = nn.Sequential( - build_norm_layer(norm_cfg, cur_channels)[1], - nn.PReLU(cur_channels)) + self.norm_prelu_0 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) # stage 1 self.level1 = nn.ModuleList() @@ -279,12 +274,12 @@ def __init__(self, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, - with_cp=with_cp)) # CG block + with_cp=with_cp, + ) + ) # CG block cur_channels = 2 * num_channels[1] + in_channels - self.norm_prelu_1 = nn.Sequential( - build_norm_layer(norm_cfg, cur_channels)[1], - nn.PReLU(cur_channels)) + self.norm_prelu_1 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) # stage 2 self.level2 = nn.ModuleList() @@ -299,12 +294,12 @@ def __init__(self, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, - with_cp=with_cp)) # CG block + with_cp=with_cp, + ) + ) # CG block cur_channels = 2 * num_channels[2] - self.norm_prelu_2 = nn.Sequential( - build_norm_layer(norm_cfg, cur_channels)[1], - nn.PReLU(cur_channels)) + self.norm_prelu_2 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) def forward(self, x): output = [] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py index fb8e1ade7c42..532d781d7ffa 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py @@ -1,11 +1,16 @@ import torch import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, constant_init, - kaiming_init) from torch.nn.modules.batchnorm import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + DepthwiseSeparableConvModule, + constant_init, + kaiming_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.decode_heads.psp_head import PPM from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize + from ..builder import BACKBONES from ..utils.inverted_residual import InvertedResidual @@ -26,13 +31,15 @@ class LearningToDownsample(nn.Module): dict(type='ReLU') """ - def __init__(self, - in_channels, - dw_channels, - out_channels, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU')): + def __init__( + self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + ): super(LearningToDownsample, self).__init__() self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg @@ -47,21 +54,14 @@ def __init__(self, stride=2, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.dsconv1 = DepthwiseSeparableConvModule( - dw_channels1, - dw_channels2, - kernel_size=3, - stride=2, - padding=1, - norm_cfg=self.norm_cfg) + dw_channels1, dw_channels2, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg + ) self.dsconv2 = DepthwiseSeparableConvModule( - dw_channels2, - out_channels, - kernel_size=3, - stride=2, - padding=1, - norm_cfg=self.norm_cfg) + dw_channels2, out_channels, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg + ) def forward(self, x): x = self.conv(x) @@ -103,32 +103,32 @@ class GlobalFeatureExtractor(nn.Module): Default: False """ - def __init__(self, - in_channels=64, - block_channels=(64, 96, 128), - out_channels=128, - expand_ratio=6, - num_blocks=(3, 3, 3), - strides=(2, 2, 1), - pool_scales=(1, 2, 3, 6), - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False): + def __init__( + self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): super(GlobalFeatureExtractor, self).__init__() self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg assert len(block_channels) == len(num_blocks) == 3 - self.bottleneck1 = self._make_layer(in_channels, block_channels[0], - num_blocks[0], strides[0], - expand_ratio) - self.bottleneck2 = self._make_layer(block_channels[0], - block_channels[1], num_blocks[1], - strides[1], expand_ratio) - self.bottleneck3 = self._make_layer(block_channels[1], - block_channels[2], num_blocks[2], - strides[2], expand_ratio) + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], num_blocks[0], strides[0], expand_ratio) + self.bottleneck2 = self._make_layer( + block_channels[0], block_channels[1], num_blocks[1], strides[1], expand_ratio + ) + self.bottleneck3 = self._make_layer( + block_channels[1], block_channels[2], num_blocks[2], strides[2], expand_ratio + ) self.ppm = PPM( pool_scales, block_channels[2], @@ -136,37 +136,21 @@ def __init__(self, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - align_corners=align_corners) + align_corners=align_corners, + ) self.out = ConvModule( block_channels[2] * 2, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) - def _make_layer(self, - in_channels, - out_channels, - blocks, - stride=1, - expand_ratio=6): - layers = [ - InvertedResidual( - in_channels, - out_channels, - stride, - expand_ratio, - norm_cfg=self.norm_cfg) - ] + def _make_layer(self, in_channels, out_channels, blocks, stride=1, expand_ratio=6): + layers = [InvertedResidual(in_channels, out_channels, stride, expand_ratio, norm_cfg=self.norm_cfg)] for i in range(1, blocks): - layers.append( - InvertedResidual( - out_channels, - out_channels, - 1, - expand_ratio, - norm_cfg=self.norm_cfg)) + layers.append(InvertedResidual(out_channels, out_channels, 1, expand_ratio, norm_cfg=self.norm_cfg)) return nn.Sequential(*layers) def forward(self, x): @@ -196,48 +180,36 @@ class FeatureFusionModule(nn.Module): Default: False """ - def __init__(self, - higher_in_channels, - lower_in_channels, - out_channels, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False): + def __init__( + self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): super(FeatureFusionModule, self).__init__() self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.align_corners = align_corners self.dwconv = ConvModule( - lower_in_channels, - out_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + lower_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) self.conv_lower_res = ConvModule( - out_channels, - out_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None) + out_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) self.conv_higher_res = ConvModule( - higher_in_channels, - out_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None) + higher_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) self.relu = nn.ReLU(True) def forward(self, higher_res_feature, lower_res_feature): lower_res_feature = resize( - lower_res_feature, - size=higher_res_feature.size()[2:], - mode='bilinear', - align_corners=self.align_corners) + lower_res_feature, size=higher_res_feature.size()[2:], mode='bilinear', align_corners=self.align_corners + ) lower_res_feature = self.dwconv(lower_res_feature) lower_res_feature = self.conv_lower_res(lower_res_feature) @@ -293,29 +265,35 @@ class FastSCNN(nn.Module): Default: False """ - def __init__(self, - in_channels=3, - downsample_dw_channels=(32, 48), - global_in_channels=64, - global_block_channels=(64, 96, 128), - global_block_strides=(2, 2, 1), - global_out_channels=128, - higher_in_channels=64, - lower_in_channels=128, - fusion_out_channels=128, - out_indices=(0, 1, 2), - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False): + def __init__( + self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): super(FastSCNN, self).__init__() if global_in_channels != higher_in_channels: - raise AssertionError('Global Input Channels must be the same \ - with Higher Input Channels!') + raise AssertionError( + 'Global Input Channels must be the same \ + with Higher Input Channels!' + ) elif global_out_channels != lower_in_channels: - raise AssertionError('Global Output Channels must be the same \ - with Lower Input Channels!') + raise AssertionError( + 'Global Output Channels must be the same \ + with Lower Input Channels!' + ) self.in_channels = in_channels self.downsample_dw_channels1 = downsample_dw_channels[0] @@ -338,7 +316,8 @@ def __init__(self, global_in_channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.global_feature_extractor = GlobalFeatureExtractor( global_in_channels, global_block_channels, @@ -347,7 +326,8 @@ def __init__(self, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - align_corners=self.align_corners) + align_corners=self.align_corners, + ) self.feature_fusion = FeatureFusionModule( higher_in_channels, lower_in_channels, @@ -355,7 +335,8 @@ def __init__(self, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - align_corners=self.align_corners) + align_corners=self.align_corners, + ) def init_weights(self, pretrained=None): for m in self.modules(): @@ -367,8 +348,7 @@ def init_weights(self, pretrained=None): def forward(self, x): higher_res_features = self.learning_to_downsample(x) lower_res_features = self.global_feature_extractor(higher_res_features) - fusion_output = self.feature_fusion(higher_res_features, - lower_res_features) + fusion_output = self.feature_fusion(higher_res_features, lower_res_features) outs = [higher_res_features, lower_res_features, fusion_output] outs = [outs[i] for i in self.out_indices] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py index 54ec053919d5..03dd604869ea 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py @@ -1,11 +1,16 @@ import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init, - kaiming_init) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + build_conv_layer, + build_norm_layer, + constant_init, + kaiming_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Upsample, resize from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES from .resnet import BasicBlock, Bottleneck @@ -17,19 +22,20 @@ class HRModule(nn.Module): is in this module. """ - def __init__(self, - num_branches, - blocks, - num_blocks, - in_channels, - num_channels, - multiscale_output=True, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True)): + def __init__( + self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + ): super(HRModule, self).__init__() - self._check_branches(num_branches, num_blocks, in_channels, - num_channels) + self._check_branches(num_branches, num_blocks, in_channels, num_channels) self.in_channels = in_channels self.num_branches = num_branches @@ -38,40 +44,28 @@ def __init__(self, self.norm_cfg = norm_cfg self.conv_cfg = conv_cfg self.with_cp = with_cp - self.branches = self._make_branches(num_branches, blocks, num_blocks, - num_channels) + self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) self.fuse_layers = self._make_fuse_layers() self.relu = nn.ReLU(inplace=False) - def _check_branches(self, num_branches, num_blocks, in_channels, - num_channels): + def _check_branches(self, num_branches, num_blocks, in_channels, num_channels): """Check branches configuration.""" if num_branches != len(num_blocks): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ - f'{len(num_blocks)})' + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' f'{len(num_blocks)})' raise ValueError(error_msg) if num_branches != len(num_channels): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ - f'{len(num_channels)})' + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' f'{len(num_channels)})' raise ValueError(error_msg) if num_branches != len(in_channels): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ - f'{len(in_channels)})' + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' f'{len(in_channels)})' raise ValueError(error_msg) - def _make_one_branch(self, - branch_index, - block, - num_blocks, - num_channels, - stride=1): + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): """Build one branch.""" downsample = None - if stride != 1 or \ - self.in_channels[branch_index] != \ - num_channels[branch_index] * block.expansion: + if stride != 1 or self.in_channels[branch_index] != num_channels[branch_index] * block.expansion: downsample = nn.Sequential( build_conv_layer( self.conv_cfg, @@ -79,9 +73,10 @@ def _make_one_branch(self, num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, - bias=False), - build_norm_layer(self.norm_cfg, num_channels[branch_index] * - block.expansion)[1]) + bias=False, + ), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * block.expansion)[1], + ) layers = [] layers.append( @@ -92,9 +87,10 @@ def _make_one_branch(self, downsample=downsample, with_cp=self.with_cp, norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg)) - self.in_channels[branch_index] = \ - num_channels[branch_index] * block.expansion + conv_cfg=self.conv_cfg, + ) + ) + self.in_channels[branch_index] = num_channels[branch_index] * block.expansion for i in range(1, num_blocks[branch_index]): layers.append( block( @@ -102,7 +98,9 @@ def _make_one_branch(self, num_channels[branch_index], with_cp=self.with_cp, norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg)) + conv_cfg=self.conv_cfg, + ) + ) return nn.Sequential(*layers) @@ -111,8 +109,7 @@ def _make_branches(self, num_branches, block, num_blocks, num_channels): branches = [] for i in range(num_branches): - branches.append( - self._make_one_branch(i, block, num_blocks, num_channels)) + branches.append(self._make_one_branch(i, block, num_blocks, num_channels)) return nn.ModuleList(branches) @@ -138,13 +135,13 @@ def _make_fuse_layers(self): kernel_size=1, stride=1, padding=0, - bias=False), + bias=False, + ), build_norm_layer(self.norm_cfg, in_channels[i])[1], # we set align_corners=False for HRNet - Upsample( - scale_factor=2**(j - i), - mode='bilinear', - align_corners=False))) + Upsample(scale_factor=2 ** (j - i), mode='bilinear', align_corners=False), + ) + ) elif j == i: fuse_layer.append(None) else: @@ -160,9 +157,11 @@ def _make_fuse_layers(self): kernel_size=3, stride=2, padding=1, - bias=False), - build_norm_layer(self.norm_cfg, - in_channels[i])[1])) + bias=False, + ), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + ) + ) else: conv_downsamples.append( nn.Sequential( @@ -173,10 +172,12 @@ def _make_fuse_layers(self): kernel_size=3, stride=2, padding=1, - bias=False), - build_norm_layer(self.norm_cfg, - in_channels[j])[1], - nn.ReLU(inplace=False))) + bias=False, + ), + build_norm_layer(self.norm_cfg, in_channels[j])[1], + nn.ReLU(inplace=False), + ) + ) fuse_layer.append(nn.Sequential(*conv_downsamples)) fuse_layers.append(nn.ModuleList(fuse_layer)) @@ -198,10 +199,8 @@ def forward(self, x): y += x[j] elif j > i: y = y + resize( - self.fuse_layers[i][j](x[j]), - size=x[i].shape[2:], - mode='bilinear', - align_corners=False) + self.fuse_layers[i][j](x[j]), size=x[i].shape[2:], mode='bilinear', align_corners=False + ) else: y += self.fuse_layers[i][j](x[j]) x_fuse.append(self.relu(y)) @@ -270,14 +269,16 @@ class HRNet(nn.Module): blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} - def __init__(self, - extra, - in_channels=3, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=False, - with_cp=False, - zero_init_residual=False): + def __init__( + self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + zero_init_residual=False, + ): super(HRNet, self).__init__() self.extra = extra self.conv_cfg = conv_cfg @@ -290,24 +291,10 @@ def __init__(self, self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) - self.conv1 = build_conv_layer( - self.conv_cfg, - in_channels, - 64, - kernel_size=3, - stride=2, - padding=1, - bias=False) + self.conv1 = build_conv_layer(self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer( - self.conv_cfg, - 64, - 64, - kernel_size=3, - stride=2, - padding=1, - bias=False) + self.conv2 = build_conv_layer(self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = nn.ReLU(inplace=True) @@ -329,10 +316,8 @@ def __init__(self, block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] - self.transition1 = self._make_transition_layer([stage1_out_channels], - num_channels) - self.stage2, pre_stage_channels = self._make_stage( - self.stage2_cfg, num_channels) + self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) + self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) # stage 3 self.stage3_cfg = self.extra['stage3'] @@ -341,10 +326,8 @@ def __init__(self, block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] - self.transition2 = self._make_transition_layer(pre_stage_channels, - num_channels) - self.stage3, pre_stage_channels = self._make_stage( - self.stage3_cfg, num_channels) + self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) # stage 4 self.stage4_cfg = self.extra['stage4'] @@ -353,10 +336,8 @@ def __init__(self, block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] - self.transition3 = self._make_transition_layer(pre_stage_channels, - num_channels) - self.stage4, pre_stage_channels = self._make_stage( - self.stage4_cfg, num_channels) + self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels) @property def norm1(self): @@ -368,8 +349,7 @@ def norm2(self): """nn.Module: the normalization layer named "norm2" """ return getattr(self, self.norm2_name) - def _make_transition_layer(self, num_channels_pre_layer, - num_channels_cur_layer): + def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): """Make transition layer.""" num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) @@ -387,18 +367,19 @@ def _make_transition_layer(self, num_channels_pre_layer, kernel_size=3, stride=1, padding=1, - bias=False), - build_norm_layer(self.norm_cfg, - num_channels_cur_layer[i])[1], - nn.ReLU(inplace=True))) + bias=False, + ), + build_norm_layer(self.norm_cfg, num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True), + ) + ) else: transition_layers.append(None) else: conv_downsamples = [] for j in range(i + 1 - num_branches_pre): in_channels = num_channels_pre_layer[-1] - out_channels = num_channels_cur_layer[i] \ - if j == i - num_branches_pre else in_channels + out_channels = num_channels_cur_layer[i] if j == i - num_branches_pre else in_channels conv_downsamples.append( nn.Sequential( build_conv_layer( @@ -408,9 +389,12 @@ def _make_transition_layer(self, num_channels_pre_layer, kernel_size=3, stride=2, padding=1, - bias=False), + bias=False, + ), build_norm_layer(self.norm_cfg, out_channels)[1], - nn.ReLU(inplace=True))) + nn.ReLU(inplace=True), + ) + ) transition_layers.append(nn.Sequential(*conv_downsamples)) return nn.ModuleList(transition_layers) @@ -421,13 +405,10 @@ def _make_layer(self, block, inplanes, planes, blocks, stride=1): if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( build_conv_layer( - self.conv_cfg, - inplanes, - planes * block.expansion, - kernel_size=1, - stride=stride, - bias=False), - build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + self.conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False + ), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1], + ) layers = [] layers.append( @@ -438,16 +419,14 @@ def _make_layer(self, block, inplanes, planes, blocks, stride=1): downsample=downsample, with_cp=self.with_cp, norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg)) + conv_cfg=self.conv_cfg, + ) + ) inplanes = planes * block.expansion for i in range(1, blocks): layers.append( - block( - inplanes, - planes, - with_cp=self.with_cp, - norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg)) + block(inplanes, planes, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg) + ) return nn.Sequential(*layers) @@ -477,7 +456,9 @@ def _make_stage(self, layer_config, in_channels, multiscale_output=True): reset_multiscale_output, with_cp=self.with_cp, norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg)) + conv_cfg=self.conv_cfg, + ) + ) return nn.Sequential(*hr_modules), in_channels diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py index 7abce078ffd2..cda42da943f5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py @@ -1,9 +1,10 @@ import logging import torch.nn as nn +from torch.nn.modules.batchnorm import _BatchNorm + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from torch.nn.modules.batchnorm import _BatchNorm from ..builder import BACKBONES from ..utils import InvertedResidual, make_divisible @@ -39,20 +40,21 @@ class MobileNetV2(nn.Module): # Parameters to build layers. 3 parameters are needed to construct a # layer, from left to right: expand_ratio, channel, num_blocks. - arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], - [6, 96, 3], [6, 160, 3], [6, 320, 1]] - - def __init__(self, - widen_factor=1., - strides=(1, 2, 2, 2, 1, 2, 1), - dilations=(1, 1, 1, 1, 1, 1, 1), - out_indices=(1, 2, 4, 6), - frozen_stages=-1, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU6'), - norm_eval=False, - with_cp=False): + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__( + self, + widen_factor=1.0, + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False, + ): super(MobileNetV2, self).__init__() self.widen_factor = widen_factor self.strides = strides @@ -61,12 +63,10 @@ def __init__(self, self.out_indices = out_indices for index in out_indices: if index not in range(0, 7): - raise ValueError('the item in out_indices must in ' - f'range(0, 8). But received {index}') + raise ValueError('the item in out_indices must in ' f'range(0, 8). But received {index}') if frozen_stages not in range(-1, 7): - raise ValueError('frozen_stages must be in range(-1, 7). ' - f'But received {frozen_stages}') + raise ValueError('frozen_stages must be in range(-1, 7). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg @@ -85,7 +85,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.layers = [] @@ -99,13 +100,13 @@ def __init__(self, num_blocks=num_blocks, stride=stride, dilation=dilation, - expand_ratio=expand_ratio) + expand_ratio=expand_ratio, + ) layer_name = f'layer{i + 1}' self.add_module(layer_name, inverted_res_layer) self.layers.append(layer_name) - def make_layer(self, out_channels, num_blocks, stride, dilation, - expand_ratio): + def make_layer(self, out_channels, num_blocks, stride, dilation, expand_ratio): """Stack InvertedResidual blocks to build a layer for MobileNetV2. Args: @@ -128,7 +129,9 @@ def make_layer(self, out_channels, num_blocks, stride, dilation, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - with_cp=self.with_cp)) + with_cp=self.with_cp, + ) + ) self.in_channels = out_channels return nn.Sequential(*layers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py index 37a5c313bcfa..11c665237e1c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py @@ -1,11 +1,12 @@ import logging -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import torch.nn as nn +from torch.nn.modules.batchnorm import _BatchNorm + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks import Conv2dAdaptivePadding from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from torch.nn.modules.batchnorm import _BatchNorm from ..builder import BACKBONES from ..utils import InvertedResidualV3 as InvertedResidual @@ -36,46 +37,53 @@ class MobileNetV3(nn.Module): some memory while slowing down the training speed. Default: False. """ + # Parameters to build each block: # [kernel size, mid channels, out channels, with_se, act type, stride] arch_settings = { - 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 - [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 - [3, 88, 24, False, 'ReLU', 1], - [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 - [5, 240, 40, True, 'HSwish', 1], - [5, 240, 40, True, 'HSwish', 1], - [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 - [5, 144, 48, True, 'HSwish', 1], - [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 - [5, 576, 96, True, 'HSwish', 1], - [5, 576, 96, True, 'HSwish', 1]], - 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 - [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 - [3, 72, 24, False, 'ReLU', 1], - [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 - [5, 120, 40, True, 'ReLU', 1], - [5, 120, 40, True, 'ReLU', 1], - [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 - [3, 200, 80, False, 'HSwish', 1], - [3, 184, 80, False, 'HSwish', 1], - [3, 184, 80, False, 'HSwish', 1], - [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 - [3, 672, 112, True, 'HSwish', 1], - [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 - [5, 960, 160, True, 'HSwish', 1], - [5, 960, 160, True, 'HSwish', 1]] + 'small': [ + [3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1], + ], + 'large': [ + [3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1], + ], } # yapf: disable - def __init__(self, - arch='small', - conv_cfg=None, - norm_cfg=dict(type='BN'), - out_indices=(0, 1, 12), - frozen_stages=-1, - reduction_factor=1, - norm_eval=False, - with_cp=False): + def __init__( + self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False, + ): super(MobileNetV3, self).__init__() assert arch in self.arch_settings assert isinstance(reduction_factor, int) and reduction_factor > 0 @@ -85,12 +93,15 @@ def __init__(self, raise ValueError( 'the item in out_indices must in ' f'range(0, {len(self.arch_settings[arch])+2}). ' - f'But received {index}') + f'But received {index}' + ) if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): - raise ValueError('frozen_stages must be in range(-1, ' - f'{len(self.arch_settings[arch])+2}). ' - f'But received {frozen_stages}') + raise ValueError( + 'frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}' + ) self.arch = arch self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg @@ -114,17 +125,16 @@ def _make_layer(self): padding=1, conv_cfg=dict(type='Conv2dAdaptivePadding'), norm_cfg=self.norm_cfg, - act_cfg=dict(type='HSwish')) + act_cfg=dict(type='HSwish'), + ) self.add_module('layer0', layer) layers.append('layer0') layer_setting = self.arch_settings[self.arch] for i, params in enumerate(layer_setting): - (kernel_size, mid_channels, out_channels, with_se, act, - stride) = params + (kernel_size, mid_channels, out_channels, with_se, act, stride) = params - if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ - i >= 8: + if self.arch == 'large' and i >= 12 or self.arch == 'small' and i >= 8: mid_channels = mid_channels // self.reduction_factor out_channels = out_channels // self.reduction_factor @@ -132,8 +142,8 @@ def _make_layer(self): se_cfg = dict( channels=mid_channels, ratio=4, - act_cfg=(dict(type='ReLU'), - dict(type='HSigmoid', bias=3.0, divisor=6.0))) + act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), + ) else: se_cfg = None @@ -148,7 +158,8 @@ def _make_layer(self): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=dict(type=act), - with_cp=self.with_cp) + with_cp=self.with_cp, + ) in_channels = out_channels layer_name = 'layer{}'.format(i + 1) self.add_module(layer_name, layer) @@ -166,7 +177,8 @@ def _make_layer(self): padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=dict(type='HSwish')) + act_cfg=dict(type='HSwish'), + ) layer_name = 'layer{}'.format(len(layer_setting) + 1) self.add_module(layer_name, layer) layers.append(layer_name) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py index f03a57fb2965..83915384db3a 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py @@ -55,19 +55,21 @@ class SplitAttentionConv2d(nn.Module): dcn (dict): Config dict for DCN. Default: None. """ - def __init__(self, - in_channels, - channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - radix=2, - reduction_factor=4, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None): + def __init__( + self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + ): super(SplitAttentionConv2d, self).__init__() inter_channels = max(in_channels * radix // reduction_factor, 32) self.radix = radix @@ -90,18 +92,15 @@ def __init__(self, padding=padding, dilation=dilation, groups=groups * radix, - bias=False) - self.norm0_name, norm0 = build_norm_layer( - norm_cfg, channels * radix, postfix=0) + bias=False, + ) + self.norm0_name, norm0 = build_norm_layer(norm_cfg, channels * radix, postfix=0) self.add_module(self.norm0_name, norm0) self.relu = nn.ReLU(inplace=True) - self.fc1 = build_conv_layer( - None, channels, inter_channels, 1, groups=self.groups) - self.norm1_name, norm1 = build_norm_layer( - norm_cfg, inter_channels, postfix=1) + self.fc1 = build_conv_layer(None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer(norm_cfg, inter_channels, postfix=1) self.add_module(self.norm1_name, norm1) - self.fc2 = build_conv_layer( - None, inter_channels, channels * radix, 1, groups=self.groups) + self.fc2 = build_conv_layer(None, inter_channels, channels * radix, 1, groups=self.groups) self.rsoftmax = RSoftmax(radix, groups) @property @@ -160,41 +159,37 @@ class Bottleneck(_Bottleneck): Bottleneck. Default: True. kwargs (dict): Key word arguments for base class. """ + expansion = 4 - def __init__(self, - inplanes, - planes, - groups=1, - base_width=4, - base_channels=64, - radix=2, - reduction_factor=4, - avg_down_stride=True, - **kwargs): + def __init__( + self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs + ): """Bottleneck block for ResNeSt.""" super(Bottleneck, self).__init__(inplanes, planes, **kwargs) if groups == 1: width = self.planes else: - width = math.floor(self.planes * - (base_width / base_channels)) * groups + width = math.floor(self.planes * (base_width / base_channels)) * groups self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 - self.norm1_name, norm1 = build_norm_layer( - self.norm_cfg, width, postfix=1) - self.norm3_name, norm3 = build_norm_layer( - self.norm_cfg, self.planes * self.expansion, postfix=3) + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer( - self.conv_cfg, - self.inplanes, - width, - kernel_size=1, - stride=self.conv1_stride, - bias=False) + self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False + ) self.add_module(self.norm1_name, norm1) self.with_modulated_dcn = False self.conv2 = SplitAttentionConv2d( @@ -209,22 +204,17 @@ def __init__(self, reduction_factor=reduction_factor, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - dcn=self.dcn) + dcn=self.dcn, + ) delattr(self, self.norm2_name) if self.avg_down_stride: self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) - self.conv3 = build_conv_layer( - self.conv_cfg, - width, - self.planes * self.expansion, - kernel_size=1, - bias=False) + self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) def forward(self, x): - def _inner_forward(x): identity = x @@ -285,16 +275,10 @@ class ResNeSt(ResNetV1d): 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), 152: (Bottleneck, (3, 8, 36, 3)), - 200: (Bottleneck, (3, 24, 36, 3)) + 200: (Bottleneck, (3, 24, 36, 3)), } - def __init__(self, - groups=1, - base_width=4, - radix=2, - reduction_factor=4, - avg_down_stride=True, - **kwargs): + def __init__(self, groups=1, base_width=4, radix=2, reduction_factor=4, avg_down_stride=True, **kwargs): self.groups = groups self.base_width = base_width self.radix = radix @@ -311,4 +295,5 @@ def make_res_layer(self, **kwargs): radix=self.radix, reduction_factor=self.reduction_factor, avg_down_stride=self.avg_down_stride, - **kwargs) + **kwargs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py index fa3ade5cd085..8b418aad171d 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py @@ -1,11 +1,17 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer, - constant_init, kaiming_init) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + build_conv_layer, + build_norm_layer, + build_plugin_layer, + constant_init, + kaiming_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES from ..utils import ResLayer @@ -15,18 +21,20 @@ class BasicBlock(nn.Module): expansion = 1 - def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None, - plugins=None): + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + ): super(BasicBlock, self).__init__() assert dcn is None, 'Not implemented yet.' assert plugins is None, 'Not implemented yet.' @@ -35,17 +43,10 @@ def __init__(self, self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.conv1 = build_conv_layer( - conv_cfg, - inplanes, - planes, - 3, - stride=stride, - padding=dilation, - dilation=dilation, - bias=False) + conv_cfg, inplanes, planes, 3, stride=stride, padding=dilation, dilation=dilation, bias=False + ) self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer( - conv_cfg, planes, planes, 3, padding=1, bias=False) + self.conv2 = build_conv_layer(conv_cfg, planes, planes, 3, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = nn.ReLU(inplace=True) @@ -103,18 +104,20 @@ class Bottleneck(nn.Module): expansion = 4 - def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None, - plugins=None): + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + ): super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] assert dcn is None or isinstance(dcn, dict) @@ -138,18 +141,9 @@ def __init__(self, if self.with_plugins: # collect plugins for conv1/conv2/conv3 - self.after_conv1_plugins = [ - plugin['cfg'] for plugin in plugins - if plugin['position'] == 'after_conv1' - ] - self.after_conv2_plugins = [ - plugin['cfg'] for plugin in plugins - if plugin['position'] == 'after_conv2' - ] - self.after_conv3_plugins = [ - plugin['cfg'] for plugin in plugins - if plugin['position'] == 'after_conv3' - ] + self.after_conv1_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv1'] + self.after_conv2_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv2'] + self.after_conv3_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv3'] if self.style == 'pytorch': self.conv1_stride = 1 @@ -160,16 +154,9 @@ def __init__(self, self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) - self.norm3_name, norm3 = build_norm_layer( - norm_cfg, planes * self.expansion, postfix=3) + self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) - self.conv1 = build_conv_layer( - conv_cfg, - inplanes, - planes, - kernel_size=1, - stride=self.conv1_stride, - bias=False) + self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) fallback_on_stride = False if self.with_dcn: @@ -183,7 +170,8 @@ def __init__(self, stride=self.conv2_stride, padding=dilation, dilation=dilation, - bias=False) + bias=False, + ) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' self.conv2 = build_conv_layer( @@ -194,27 +182,20 @@ def __init__(self, stride=self.conv2_stride, padding=dilation, dilation=dilation, - bias=False) + bias=False, + ) self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer( - conv_cfg, - planes, - planes * self.expansion, - kernel_size=1, - bias=False) + self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) self.downsample = downsample if self.with_plugins: - self.after_conv1_plugin_names = self.make_block_plugins( - planes, self.after_conv1_plugins) - self.after_conv2_plugin_names = self.make_block_plugins( - planes, self.after_conv2_plugins) - self.after_conv3_plugin_names = self.make_block_plugins( - planes * self.expansion, self.after_conv3_plugins) + self.after_conv1_plugin_names = self.make_block_plugins(planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins(planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins(planes * self.expansion, self.after_conv3_plugins) def make_block_plugins(self, in_channels, plugins): """make plugins for block. @@ -230,10 +211,7 @@ def make_block_plugins(self, in_channels, plugins): plugin_names = [] for plugin in plugins: plugin = plugin.copy() - name, layer = build_plugin_layer( - plugin, - in_channels=in_channels, - postfix=plugin.pop('postfix', '')) + name, layer = build_plugin_layer(plugin, in_channels=in_channels, postfix=plugin.pop('postfix', '')) assert not hasattr(self, name), f'duplicate plugin {name}' self.add_module(name, layer) plugin_names.append(name) @@ -367,32 +345,34 @@ class ResNet(nn.Module): 34: (BasicBlock, (3, 4, 6, 3)), 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)) + 152: (Bottleneck, (3, 8, 36, 3)), } - def __init__(self, - depth, - in_channels=3, - stem_channels=64, - base_channels=64, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 1, 1), - out_indices=(0, 1, 2, 3), - style='pytorch', - deep_stem=False, - avg_down=False, - frozen_stages=-1, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=False, - dcn=None, - stage_with_dcn=(False, False, False, False), - plugins=None, - multi_grid=None, - contract_dilation=False, - with_cp=False, - zero_init_residual=True): + def __init__( + self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True, + ): super(ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') @@ -438,9 +418,8 @@ def __init__(self, else: stage_plugins = None # multi grid is applied to last layer only - stage_multi_grid = multi_grid if i == len( - self.stage_blocks) - 1 else None - planes = base_channels * 2**i + stage_multi_grid = multi_grid if i == len(self.stage_blocks) - 1 else None + planes = base_channels * 2 ** i res_layer = self.make_res_layer( block=self.block, inplanes=self.inplanes, @@ -456,7 +435,8 @@ def __init__(self, dcn=dcn, plugins=stage_plugins, multi_grid=stage_multi_grid, - contract_dilation=contract_dilation) + contract_dilation=contract_dilation, + ) self.inplanes = planes * self.block.expansion layer_name = f'layer{i+1}' self.add_module(layer_name, res_layer) @@ -464,8 +444,7 @@ def __init__(self, self._freeze_stages() - self.feat_dim = self.block.expansion * base_channels * 2**( - len(self.stage_blocks) - 1) + self.feat_dim = self.block.expansion * base_channels * 2 ** (len(self.stage_blocks) - 1) def make_stage_plugins(self, plugins, stage_idx): """make plugins for ResNet 'stage_idx'th stage . @@ -534,13 +513,8 @@ def _make_stem_layer(self, in_channels, stem_channels): if self.deep_stem: self.stem = nn.Sequential( build_conv_layer( - self.conv_cfg, - in_channels, - stem_channels // 2, - kernel_size=3, - stride=2, - padding=1, - bias=False), + self.conv_cfg, in_channels, stem_channels // 2, kernel_size=3, stride=2, padding=1, bias=False + ), build_norm_layer(self.norm_cfg, stem_channels // 2)[1], nn.ReLU(inplace=True), build_conv_layer( @@ -550,30 +524,21 @@ def _make_stem_layer(self, in_channels, stem_channels): kernel_size=3, stride=1, padding=1, - bias=False), + bias=False, + ), build_norm_layer(self.norm_cfg, stem_channels // 2)[1], nn.ReLU(inplace=True), build_conv_layer( - self.conv_cfg, - stem_channels // 2, - stem_channels, - kernel_size=3, - stride=1, - padding=1, - bias=False), + self.conv_cfg, stem_channels // 2, stem_channels, kernel_size=3, stride=1, padding=1, bias=False + ), build_norm_layer(self.norm_cfg, stem_channels)[1], - nn.ReLU(inplace=True)) + nn.ReLU(inplace=True), + ) else: self.conv1 = build_conv_layer( - self.conv_cfg, - in_channels, - stem_channels, - kernel_size=7, - stride=2, - padding=3, - bias=False) - self.norm1_name, norm1 = build_norm_layer( - self.norm_cfg, stem_channels, postfix=1) + self.conv_cfg, in_channels, stem_channels, kernel_size=7, stride=2, padding=3, bias=False + ) + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, stem_channels, postfix=1) self.add_module(self.norm1_name, norm1) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @@ -616,8 +581,7 @@ def init_weights(self, pretrained=None): if self.dcn is not None: for m in self.modules(): - if isinstance(m, Bottleneck) and hasattr( - m, 'conv2_offset'): + if isinstance(m, Bottleneck) and hasattr(m, 'conv2_offset'): constant_init(m.conv2_offset, 0) if self.zero_init_residual: @@ -670,8 +634,7 @@ class ResNetV1c(ResNet): """ def __init__(self, **kwargs): - super(ResNetV1c, self).__init__( - deep_stem=True, avg_down=False, **kwargs) + super(ResNetV1c, self).__init__(deep_stem=True, avg_down=False, **kwargs) @BACKBONES.register_module() @@ -684,5 +647,4 @@ class ResNetV1d(ResNet): """ def __init__(self, **kwargs): - super(ResNetV1d, self).__init__( - deep_stem=True, avg_down=True, **kwargs) + super(ResNetV1d, self).__init__(deep_stem=True, avg_down=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py index 9b085b9d4497..5ee1d26d15a6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py @@ -15,35 +15,21 @@ class Bottleneck(_Bottleneck): "caffe", the stride-two layer is the first 1x1 conv layer. """ - def __init__(self, - inplanes, - planes, - groups=1, - base_width=4, - base_channels=64, - **kwargs): + def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, **kwargs): super(Bottleneck, self).__init__(inplanes, planes, **kwargs) if groups == 1: width = self.planes else: - width = math.floor(self.planes * - (base_width / base_channels)) * groups + width = math.floor(self.planes * (base_width / base_channels)) * groups - self.norm1_name, norm1 = build_norm_layer( - self.norm_cfg, width, postfix=1) - self.norm2_name, norm2 = build_norm_layer( - self.norm_cfg, width, postfix=2) - self.norm3_name, norm3 = build_norm_layer( - self.norm_cfg, self.planes * self.expansion, postfix=3) + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer( - self.conv_cfg, - self.inplanes, - width, - kernel_size=1, - stride=self.conv1_stride, - bias=False) + self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False + ) self.add_module(self.norm1_name, norm1) fallback_on_stride = False self.with_modulated_dcn = False @@ -59,7 +45,8 @@ def __init__(self, padding=self.dilation, dilation=self.dilation, groups=groups, - bias=False) + bias=False, + ) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' self.conv2 = build_conv_layer( @@ -71,15 +58,11 @@ def __init__(self, padding=self.dilation, dilation=self.dilation, groups=groups, - bias=False) + bias=False, + ) self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer( - self.conv_cfg, - width, - self.planes * self.expansion, - kernel_size=1, - bias=False) + self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) @@ -128,7 +111,7 @@ class ResNeXt(ResNet): arch_settings = { 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)) + 152: (Bottleneck, (3, 8, 36, 3)), } def __init__(self, groups=1, base_width=4, **kwargs): @@ -138,8 +121,4 @@ def __init__(self, groups=1, base_width=4, **kwargs): def make_res_layer(self, **kwargs): """Pack all blocks in a stage into a ``ResLayer``""" - return ResLayer( - groups=self.groups, - base_width=self.base_width, - base_channels=self.base_channels, - **kwargs) + return ResLayer(groups=self.groups, base_width=self.base_width, base_channels=self.base_channels, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py index ab45a33edbc2..e3a5a76e39f3 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py @@ -1,11 +1,18 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer, - build_norm_layer, constant_init, kaiming_init) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + UPSAMPLE_LAYERS, + ConvModule, + build_activation_layer, + build_norm_layer, + constant_init, + kaiming_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES from ..utils import UpConvBlock @@ -40,18 +47,20 @@ class BasicConvBlock(nn.Module): plugins (dict): plugins for convolutional layers. Default: None. """ - def __init__(self, - in_channels, - out_channels, - num_convs=2, - stride=1, - dilation=1, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - dcn=None, - plugins=None): + def __init__( + self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None, + ): super(BasicConvBlock, self).__init__() assert dcn is None, 'Not implemented yet.' assert plugins is None, 'Not implemented yet.' @@ -69,7 +78,9 @@ def __init__(self, padding=1 if i == 0 else dilation, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) self.convs = nn.Sequential(*convs) @@ -102,33 +113,30 @@ class DeconvModule(nn.Module): kernel_size (int): Kernel size of the convolutional layer. Default: 4. """ - def __init__(self, - in_channels, - out_channels, - with_cp=False, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - *, - kernel_size=4, - scale_factor=2): + def __init__( + self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2, + ): super(DeconvModule, self).__init__() - assert (kernel_size - scale_factor >= 0) and\ - (kernel_size - scale_factor) % 2 == 0,\ - f'kernel_size should be greater than or equal to scale_factor '\ - f'and (kernel_size - scale_factor) should be even numbers, '\ - f'while the kernel size is {kernel_size} and scale_factor is '\ - f'{scale_factor}.' + assert (kernel_size - scale_factor >= 0) and (kernel_size - scale_factor) % 2 == 0, ( + f'kernel_size should be greater than or equal to scale_factor ' + f'and (kernel_size - scale_factor) should be even numbers, ' + f'while the kernel size is {kernel_size} and scale_factor is ' + f'{scale_factor}.' + ) stride = scale_factor padding = (kernel_size - scale_factor) // 2 self.with_cp = with_cp - deconv = nn.ConvTranspose2d( - in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding) + deconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) norm_name, norm = build_norm_layer(norm_cfg, out_channels) activate = build_activation_layer(act_cfg) @@ -176,20 +184,21 @@ class InterpConv(nn.Module): scale_factor=2, mode='bilinear', align_corners=False). """ - def __init__(self, - in_channels, - out_channels, - with_cp=False, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - *, - conv_cfg=None, - conv_first=False, - kernel_size=1, - stride=1, - padding=0, - upsample_cfg=dict( - scale_factor=2, mode='bilinear', align_corners=False)): + def __init__( + self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False), + ): super(InterpConv, self).__init__() self.with_cp = with_cp @@ -201,7 +210,8 @@ def __init__(self, padding=padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) upsample = nn.Upsample(**upsample_cfg) if conv_first: self.interp_upsample = nn.Sequential(conv, upsample) @@ -274,57 +284,65 @@ class UNet(nn.Module): """ - def __init__(self, - in_channels=3, - base_channels=64, - num_stages=5, - strides=(1, 1, 1, 1, 1), - enc_num_convs=(2, 2, 2, 2, 2), - dec_num_convs=(2, 2, 2, 2), - downsamples=(True, True, True, True), - enc_dilations=(1, 1, 1, 1, 1), - dec_dilations=(1, 1, 1, 1), - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - norm_eval=False, - dcn=None, - plugins=None): + def __init__( + self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None, + ): super(UNet, self).__init__() assert dcn is None, 'Not implemented yet.' assert plugins is None, 'Not implemented yet.' - assert len(strides) == num_stages, \ - 'The length of strides should be equal to num_stages, '\ - f'while the strides is {strides}, the length of '\ - f'strides is {len(strides)}, and the num_stages is '\ + assert len(strides) == num_stages, ( + 'The length of strides should be equal to num_stages, ' + f'while the strides is {strides}, the length of ' + f'strides is {len(strides)}, and the num_stages is ' f'{num_stages}.' - assert len(enc_num_convs) == num_stages, \ - 'The length of enc_num_convs should be equal to num_stages, '\ - f'while the enc_num_convs is {enc_num_convs}, the length of '\ - f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + ) + assert len(enc_num_convs) == num_stages, ( + 'The length of enc_num_convs should be equal to num_stages, ' + f'while the enc_num_convs is {enc_num_convs}, the length of ' + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is ' f'{num_stages}.' - assert len(dec_num_convs) == (num_stages-1), \ - 'The length of dec_num_convs should be equal to (num_stages-1), '\ - f'while the dec_num_convs is {dec_num_convs}, the length of '\ - f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + ) + assert len(dec_num_convs) == (num_stages - 1), ( + 'The length of dec_num_convs should be equal to (num_stages-1), ' + f'while the dec_num_convs is {dec_num_convs}, the length of ' + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is ' f'{num_stages}.' - assert len(downsamples) == (num_stages-1), \ - 'The length of downsamples should be equal to (num_stages-1), '\ - f'while the downsamples is {downsamples}, the length of '\ - f'downsamples is {len(downsamples)}, and the num_stages is '\ + ) + assert len(downsamples) == (num_stages - 1), ( + 'The length of downsamples should be equal to (num_stages-1), ' + f'while the downsamples is {downsamples}, the length of ' + f'downsamples is {len(downsamples)}, and the num_stages is ' f'{num_stages}.' - assert len(enc_dilations) == num_stages, \ - 'The length of enc_dilations should be equal to num_stages, '\ - f'while the enc_dilations is {enc_dilations}, the length of '\ - f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + ) + assert len(enc_dilations) == num_stages, ( + 'The length of enc_dilations should be equal to num_stages, ' + f'while the enc_dilations is {enc_dilations}, the length of ' + f'enc_dilations is {len(enc_dilations)}, and the num_stages is ' f'{num_stages}.' - assert len(dec_dilations) == (num_stages-1), \ - 'The length of dec_dilations should be equal to (num_stages-1), '\ - f'while the dec_dilations is {dec_dilations}, the length of '\ - f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + ) + assert len(dec_dilations) == (num_stages - 1), ( + 'The length of dec_dilations should be equal to (num_stages-1), ' + f'while the dec_dilations is {dec_dilations}, the length of ' + f'dec_dilations is {len(dec_dilations)}, and the num_stages is ' f'{num_stages}.' + ) self.num_stages = num_stages self.strides = strides self.downsamples = downsamples @@ -339,13 +357,13 @@ def __init__(self, if i != 0: if strides[i] == 1 and downsamples[i - 1]: enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) - upsample = (strides[i] != 1 or downsamples[i - 1]) + upsample = strides[i] != 1 or downsamples[i - 1] self.decoder.append( UpConvBlock( conv_block=BasicConvBlock, - in_channels=base_channels * 2**i, - skip_channels=base_channels * 2**(i - 1), - out_channels=base_channels * 2**(i - 1), + in_channels=base_channels * 2 ** i, + skip_channels=base_channels * 2 ** (i - 1), + out_channels=base_channels * 2 ** (i - 1), num_convs=dec_num_convs[i - 1], stride=1, dilation=dec_dilations[i - 1], @@ -355,12 +373,14 @@ def __init__(self, act_cfg=act_cfg, upsample_cfg=upsample_cfg if upsample else None, dcn=None, - plugins=None)) + plugins=None, + ) + ) enc_conv_block.append( BasicConvBlock( in_channels=in_channels, - out_channels=base_channels * 2**i, + out_channels=base_channels * 2 ** i, num_convs=enc_num_convs[i], stride=strides[i], dilation=enc_dilations[i], @@ -369,9 +389,11 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, dcn=None, - plugins=None)) + plugins=None, + ) + ) self.encoder.append((nn.Sequential(*enc_conv_block))) - in_channels = base_channels * 2**i + in_channels = base_channels * 2 ** i def forward(self, x): self._check_input_divisible(x) @@ -402,12 +424,12 @@ def _check_input_divisible(self, x): for i in range(1, self.num_stages): if self.strides[i] == 2 or self.downsamples[i - 1]: whole_downsample_rate *= 2 - assert (h % whole_downsample_rate == 0) \ - and (w % whole_downsample_rate == 0),\ - f'The input image size {(h, w)} should be divisible by the whole '\ - f'downsample rate {whole_downsample_rate}, when num_stages is '\ - f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + assert (h % whole_downsample_rate == 0) and (w % whole_downsample_rate == 0), ( + f'The input image size {(h, w)} should be divisible by the whole ' + f'downsample rate {whole_downsample_rate}, when num_stages is ' + f'{self.num_stages}, strides is {self.strides}, and downsamples ' f'is {self.downsamples}.' + ) def init_weights(self, pretrained=None): """Initialize the weights in backbone. diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py index d6b2fe4b35ee..7dcf93aa357b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py @@ -5,24 +5,25 @@ # Written by Kunchang Li # -------------------------------------------------------- -from collections import OrderedDict import math - +from collections import OrderedDict from functools import partial + +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as checkpoint -import numpy as np from timm.models.layers import DropPath, to_2tuple, trunc_normal_ from nemo.collections.multimodal.models.controlnet.uniformer.mmcv_custom import load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES class Mlp(nn.Module): - def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features @@ -41,7 +42,7 @@ def forward(self, x): class CMlp(nn.Module): - def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features @@ -60,8 +61,19 @@ def forward(self, x): class CBlock(nn.Module): - def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., - drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): super().__init__() self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) self.norm1 = nn.BatchNorm2d(dim) @@ -69,7 +81,7 @@ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, self.conv2 = nn.Conv2d(dim, dim, 1) self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() self.norm2 = nn.BatchNorm2d(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) @@ -82,7 +94,7 @@ def forward(self, x): class Attention(nn.Module): - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads @@ -97,7 +109,7 @@ def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0. def forward(self, x): B, N, C = x.shape qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) - q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) attn = (q @ k.transpose(-2, -1)) * self.scale attn = attn.softmax(dim=-1) @@ -110,17 +122,27 @@ def forward(self, x): class SABlock(nn.Module): - def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., - drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): super().__init__() self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) self.norm1 = norm_layer(dim) self.attn = Attention( - dim, - num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, - attn_drop=attn_drop, proj_drop=drop) + dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop + ) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) @@ -132,7 +154,7 @@ def forward(self, x): x = x + self.drop_path(self.attn(self.norm1(x))) x = x + self.drop_path(self.mlp(self.norm2(x))) x = x.transpose(1, 2).reshape(B, N, H, W) - return x + return x def window_partition(x, window_size): @@ -166,18 +188,29 @@ def window_reverse(windows, window_size, H, W): class SABlock_Windows(nn.Module): - def __init__(self, dim, num_heads, window_size=14, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., - drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + def __init__( + self, + dim, + num_heads, + window_size=14, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): super().__init__() - self.window_size=window_size + self.window_size = window_size self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) self.norm1 = norm_layer(dim) self.attn = Attention( - dim, - num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, - attn_drop=attn_drop, proj_drop=drop) + dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop + ) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) @@ -194,7 +227,7 @@ def forward(self, x): pad_b = (self.window_size - H % self.window_size) % self.window_size x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) _, Hp, Wp, _ = x.shape - + x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C @@ -212,12 +245,13 @@ def forward(self, x): x = shortcut + self.drop_path(x) x = x + self.drop_path(self.mlp(self.norm2(x))) x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) - return x - + return x + class PatchEmbed(nn.Module): """ Image to Patch Embedding """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): super().__init__() img_size = to_2tuple(img_size) @@ -237,19 +271,38 @@ def forward(self, x): x = self.norm(x) x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() return x - -@BACKBONES.register_module() + +@BACKBONES.register_module() class UniFormer(nn.Module): """ Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 """ - def __init__(self, layers=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=80, embed_dim=[64, 128, 320, 512], - head_dim=64, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, - drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=partial(nn.LayerNorm, eps=1e-6), - pretrained_path=None, use_checkpoint=False, checkpoint_num=[0, 0, 0, 0], - windows=False, hybrid=False, window_size=14): + + def __init__( + self, + layers=[3, 4, 8, 3], + img_size=224, + in_chans=3, + num_classes=80, + embed_dim=[64, 128, 320, 512], + head_dim=64, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + representation_size=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + pretrained_path=None, + use_checkpoint=False, + checkpoint_num=[0, 0, 0, 0], + windows=False, + hybrid=False, + window_size=14, + ): """ Args: layer (list): number of block in each layer @@ -281,88 +334,167 @@ def __init__(self, layers=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=80 print(f'Use Checkpoint: {self.use_checkpoint}') print(f'Checkpoint Number: {self.checkpoint_num}') self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models - norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) - - self.patch_embed1 = PatchEmbed( - img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) + norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) + + self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) self.patch_embed2 = PatchEmbed( - img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1]) + img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1] + ) self.patch_embed3 = PatchEmbed( - img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2]) + img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2] + ) self.patch_embed4 = PatchEmbed( - img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3]) + img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3] + ) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule num_heads = [dim // head_dim for dim in embed_dim] - self.blocks1 = nn.ModuleList([ - CBlock( - dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) - for i in range(layers[0])]) - self.norm1=norm_layer(embed_dim[0]) - self.blocks2 = nn.ModuleList([ - CBlock( - dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]], norm_layer=norm_layer) - for i in range(layers[1])]) + self.blocks1 = nn.ModuleList( + [ + CBlock( + dim=embed_dim[0], + num_heads=num_heads[0], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer, + ) + for i in range(layers[0]) + ] + ) + self.norm1 = norm_layer(embed_dim[0]) + self.blocks2 = nn.ModuleList( + [ + CBlock( + dim=embed_dim[1], + num_heads=num_heads[1], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0]], + norm_layer=norm_layer, + ) + for i in range(layers[1]) + ] + ) self.norm2 = norm_layer(embed_dim[1]) if self.windows: print('Use local window for all blocks in stage3') - self.blocks3 = nn.ModuleList([ - SABlock_Windows( - dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) - for i in range(layers[2])]) + self.blocks3 = nn.ModuleList( + [ + SABlock_Windows( + dim=embed_dim[2], + num_heads=num_heads[2], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + for i in range(layers[2]) + ] + ) elif hybrid: print('Use hybrid window for blocks in stage3') block3 = [] for i in range(layers[2]): if (i + 1) % 4 == 0: - block3.append(SABlock( - dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + block3.append( + SABlock( + dim=embed_dim[2], + num_heads=num_heads[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + ) else: - block3.append(SABlock_Windows( - dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + block3.append( + SABlock_Windows( + dim=embed_dim[2], + num_heads=num_heads[2], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + ) self.blocks3 = nn.ModuleList(block3) else: print('Use global window for all blocks in stage3') - self.blocks3 = nn.ModuleList([ - SABlock( - dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) - for i in range(layers[2])]) + self.blocks3 = nn.ModuleList( + [ + SABlock( + dim=embed_dim[2], + num_heads=num_heads[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + for i in range(layers[2]) + ] + ) self.norm3 = norm_layer(embed_dim[2]) - self.blocks4 = nn.ModuleList([ - SABlock( - dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]+layers[2]], norm_layer=norm_layer) - for i in range(layers[3])]) + self.blocks4 = nn.ModuleList( + [ + SABlock( + dim=embed_dim[3], + num_heads=num_heads[3], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1] + layers[2]], + norm_layer=norm_layer, + ) + for i in range(layers[3]) + ] + ) self.norm4 = norm_layer(embed_dim[3]) - + # Representation layer if representation_size: self.num_features = representation_size - self.pre_logits = nn.Sequential(OrderedDict([ - ('fc', nn.Linear(embed_dim, representation_size)), - ('act', nn.Tanh()) - ])) + self.pre_logits = nn.Sequential( + OrderedDict([('fc', nn.Linear(embed_dim, representation_size)), ('act', nn.Tanh())]) + ) else: self.pre_logits = nn.Identity() - + self.apply(self._init_weights) self.init_weights(pretrained=pretrained_path) - + def init_weights(self, pretrained): if isinstance(pretrained, str): logger = get_root_logger() load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) print(f'Load pretrained model from {pretrained}') + def _init_weights(self, m): if isinstance(m, nn.Linear): - trunc_normal_(m.weight, std=.02) + trunc_normal_(m.weight, std=0.02) if isinstance(m, nn.Linear) and m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.LayerNorm): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py index 353b75fad121..883d56fd5bc9 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py @@ -7,12 +7,20 @@ import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as cp -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import (Conv2d, Linear, build_activation_layer, build_norm_layer, - constant_init, kaiming_init, normal_init) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + Conv2d, + Linear, + build_activation_layer, + build_norm_layer, + constant_init, + kaiming_init, + normal_init, +) from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import _load_checkpoint from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + from ..builder import BACKBONES from ..utils import DropPath, trunc_normal_ @@ -33,12 +41,7 @@ class Mlp(nn.Module): to be between 0 and 1. Default: 0. """ - def __init__(self, - in_features, - hidden_features=None, - out_features=None, - act_cfg=dict(type='GELU'), - drop=0.): + def __init__(self, in_features, hidden_features=None, out_features=None, act_cfg=dict(type='GELU'), drop=0.0): super(Mlp, self).__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features @@ -69,17 +72,11 @@ class Attention(nn.Module): proj_drop (float): Drop rate for output weights. Default: 0. """ - def __init__(self, - dim, - num_heads=8, - qkv_bias=False, - qk_scale=None, - attn_drop=0., - proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): super(Attention, self).__init__() self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim**-0.5 + self.scale = qk_scale or head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) @@ -88,8 +85,7 @@ def __init__(self, def forward(self, x): b, n, c = x.shape - qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, - c // self.num_heads).permute(2, 0, 3, 1, 4) + qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, c // self.num_heads).permute(2, 0, 3, 1, 4) q, k, v = qkv[0], qkv[1], qkv[2] attn = (q @ k.transpose(-2, -1)) * self.scale @@ -125,36 +121,31 @@ class Block(nn.Module): memory while slowing down the training speed. Default: False. """ - def __init__(self, - dim, - num_heads, - mlp_ratio=4, - qkv_bias=False, - qk_scale=None, - drop=0., - attn_drop=0., - proj_drop=0., - drop_path=0., - act_cfg=dict(type='GELU'), - norm_cfg=dict(type='LN', eps=1e-6), - with_cp=False): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + with_cp=False, + ): super(Block, self).__init__() self.with_cp = with_cp _, self.norm1 = build_norm_layer(norm_cfg, dim) - self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, - proj_drop) - self.drop_path = DropPath( - drop_path) if drop_path > 0. else nn.Identity() + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, proj_drop) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() _, self.norm2 = build_norm_layer(norm_cfg, dim) mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = Mlp( - in_features=dim, - hidden_features=mlp_hidden_dim, - act_cfg=act_cfg, - drop=drop) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_cfg=act_cfg, drop=drop) def forward(self, x): - def _inner_forward(x): out = x + self.drop_path(self.attn(self.norm1(x))) out = out + self.drop_path(self.mlp(self.norm2(out))) @@ -180,11 +171,7 @@ class PatchEmbed(nn.Module): embed_dim (int): The embedding dimension. Default: 768. """ - def __init__(self, - img_size=224, - patch_size=16, - in_channels=3, - embed_dim=768): + def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768): super(PatchEmbed, self).__init__() if isinstance(img_size, int): self.img_size = (img_size, img_size) @@ -195,8 +182,7 @@ def __init__(self, h, w = self.img_size self.patch_size = (patch_size, patch_size) self.num_patches = (h // patch_size) * (w // patch_size) - self.proj = Conv2d( - in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + self.proj = Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) def forward(self, x): return self.proj(x).flatten(2).transpose(1, 2) @@ -243,41 +229,40 @@ class VisionTransformer(nn.Module): Default: False. """ - def __init__(self, - img_size=(224, 224), - patch_size=16, - in_channels=3, - embed_dim=768, - depth=12, - num_heads=12, - mlp_ratio=4, - out_indices=11, - qkv_bias=True, - qk_scale=None, - drop_rate=0., - attn_drop_rate=0., - drop_path_rate=0., - norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), - act_cfg=dict(type='GELU'), - norm_eval=False, - final_norm=False, - with_cls_token=True, - interpolate_mode='bicubic', - with_cp=False): + def __init__( + self, + img_size=(224, 224), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + out_indices=11, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), + act_cfg=dict(type='GELU'), + norm_eval=False, + final_norm=False, + with_cls_token=True, + interpolate_mode='bicubic', + with_cp=False, + ): super(VisionTransformer, self).__init__() self.img_size = img_size self.patch_size = patch_size self.features = self.embed_dim = embed_dim self.patch_embed = PatchEmbed( - img_size=img_size, - patch_size=patch_size, - in_channels=in_channels, - embed_dim=embed_dim) + img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim + ) self.with_cls_token = with_cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) - self.pos_embed = nn.Parameter( - torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) if isinstance(out_indices, int): @@ -287,21 +272,24 @@ def __init__(self, else: raise TypeError('out_indices must be type of int, list or tuple') - dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) - ] # stochastic depth decay rule - self.blocks = nn.ModuleList([ - Block( - dim=embed_dim, - num_heads=num_heads, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=dpr[i], - attn_drop=attn_drop_rate, - act_cfg=act_cfg, - norm_cfg=norm_cfg, - with_cp=with_cp) for i in range(depth) - ]) + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule + self.blocks = nn.ModuleList( + [ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=dpr[i], + attn_drop=attn_drop_rate, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + ) + for i in range(depth) + ] + ) self.interpolate_mode = interpolate_mode self.final_norm = final_norm @@ -322,25 +310,26 @@ def init_weights(self, pretrained=None): if 'pos_embed' in state_dict.keys(): if self.pos_embed.shape != state_dict['pos_embed'].shape: - logger.info(msg=f'Resize the pos_embed shape from \ -{state_dict["pos_embed"].shape} to {self.pos_embed.shape}') + logger.info( + msg=f'Resize the pos_embed shape from \ +{state_dict["pos_embed"].shape} to {self.pos_embed.shape}' + ) h, w = self.img_size - pos_size = int( - math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + pos_size = int(math.sqrt(state_dict['pos_embed'].shape[1] - 1)) state_dict['pos_embed'] = self.resize_pos_embed( - state_dict['pos_embed'], (h, w), (pos_size, pos_size), - self.patch_size, self.interpolate_mode) + state_dict['pos_embed'], (h, w), (pos_size, pos_size), self.patch_size, self.interpolate_mode + ) self.load_state_dict(state_dict, False) elif pretrained is None: # We only implement the 'jax_impl' initialization implemented at # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 - trunc_normal_(self.pos_embed, std=.02) - trunc_normal_(self.cls_token, std=.02) + trunc_normal_(self.pos_embed, std=0.02) + trunc_normal_(self.cls_token, std=0.02) for n, m in self.named_modules(): if isinstance(m, Linear): - trunc_normal_(m.weight, std=.02) + trunc_normal_(m.weight, std=0.02) if m.bias is not None: if 'mlp' in n: normal_init(m.bias, std=1e-6) @@ -371,21 +360,19 @@ def _pos_embeding(self, img, patched_img, pos_embed): Return: torch.Tensor: The pos encoded image feature. """ - assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ - 'the shapes of patched_img and pos_embed must be [B, L, C]' + assert ( + patched_img.ndim == 3 and pos_embed.ndim == 3 + ), 'the shapes of patched_img and pos_embed must be [B, L, C]' x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] if x_len != pos_len: - if pos_len == (self.img_size[0] // self.patch_size) * ( - self.img_size[1] // self.patch_size) + 1: + if pos_len == (self.img_size[0] // self.patch_size) * (self.img_size[1] // self.patch_size) + 1: pos_h = self.img_size[0] // self.patch_size pos_w = self.img_size[1] // self.patch_size else: - raise ValueError( - 'Unexpected shape of pos_embed, got {}.'.format( - pos_embed.shape)) - pos_embed = self.resize_pos_embed(pos_embed, img.shape[2:], - (pos_h, pos_w), self.patch_size, - self.interpolate_mode) + raise ValueError('Unexpected shape of pos_embed, got {}.'.format(pos_embed.shape)) + pos_embed = self.resize_pos_embed( + pos_embed, img.shape[2:], (pos_h, pos_w), self.patch_size, self.interpolate_mode + ) return self.pos_drop(patched_img + pos_embed) @staticmethod @@ -405,14 +392,11 @@ def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): input_h, input_w = input_shpae pos_h, pos_w = pos_shape cls_token_weight = pos_embed[:, 0] - pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] - pos_embed_weight = pos_embed_weight.reshape( - 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w) :] + pos_embed_weight = pos_embed_weight.reshape(1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) pos_embed_weight = F.interpolate( - pos_embed_weight, - size=[input_h // patch_size, input_w // patch_size], - align_corners=False, - mode=mode) + pos_embed_weight, size=[input_h // patch_size, input_w // patch_size], align_corners=False, mode=mode + ) cls_token_weight = cls_token_weight.unsqueeze(1) pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) @@ -444,9 +428,9 @@ def forward(self, inputs): else: out = x B, _, C = out.shape - out = out.reshape(B, inputs.shape[2] // self.patch_size, - inputs.shape[3] // self.patch_size, - C).permute(0, 3, 1, 2) + out = out.reshape( + B, inputs.shape[2] // self.patch_size, inputs.shape[3] // self.patch_size, C + ).permute(0, 3, 1, 2) outs.append(out) return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py index 66541fd20178..4cc391e48a34 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py @@ -35,12 +35,9 @@ def build_loss(cfg): def build_segmentor(cfg, train_cfg=None, test_cfg=None): """Build segmentor.""" if train_cfg is not None or test_cfg is not None: - warnings.warn( - 'train_cfg and test_cfg is deprecated, ' - 'please specify them in model', UserWarning) - assert cfg.get('train_cfg') is None or train_cfg is None, \ - 'train_cfg specified in both outer field and model field ' - assert cfg.get('test_cfg') is None or test_cfg is None, \ - 'test_cfg specified in both outer field and model field ' - return SEGMENTORS.build( - cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) + warnings.warn('train_cfg and test_cfg is deprecated, ' 'please specify them in model', UserWarning) + assert ( + cfg.get('train_cfg') is None or train_cfg is None + ), 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py index ac66d3cfe0ea..1c4ab285953c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py @@ -13,6 +13,7 @@ from .lraspp_head import LRASPPHead from .nl_head import NLHead from .ocr_head import OCRHead + # from .point_head import PointHead from .psa_head import PSAHead from .psp_head import PSPHead @@ -21,8 +22,24 @@ from .uper_head import UPerHead __all__ = [ - 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', - 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', - 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', - 'APCHead', 'DMHead', 'LRASPPHead' + 'FCNHead', + 'PSPHead', + 'ASPPHead', + 'PSAHead', + 'NLHead', + 'GCHead', + 'CCHead', + 'UPerHead', + 'DepthwiseSeparableASPPHead', + 'ANNHead', + 'DAHead', + 'OCRHead', + 'EncHead', + 'DepthwiseSeparableFCNHead', + 'FPNHead', + 'EMAHead', + 'DNLHead', + 'APCHead', + 'DMHead', + 'LRASPPHead', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py index 69e5d05ba75e..363c155b214b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py @@ -16,8 +16,7 @@ class PPMConcat(nn.ModuleList): """ def __init__(self, pool_scales=(1, 3, 6, 8)): - super(PPMConcat, self).__init__( - [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + super(PPMConcat, self).__init__([nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) def forward(self, feats): """Forward function.""" @@ -49,9 +48,19 @@ class SelfAttentionBlock(_SelfAttentionBlock): act_cfg (dict|None): Config of activation layers. """ - def __init__(self, low_in_channels, high_in_channels, channels, - out_channels, share_key_query, query_scale, key_pool_scales, - conv_cfg, norm_cfg, act_cfg): + def __init__( + self, + low_in_channels, + high_in_channels, + channels, + out_channels, + share_key_query, + query_scale, + key_pool_scales, + conv_cfg, + norm_cfg, + act_cfg, + ): key_psp = PPMConcat(key_pool_scales) if query_scale > 1: query_downsample = nn.MaxPool2d(kernel_size=query_scale) @@ -73,7 +82,8 @@ def __init__(self, low_in_channels, high_in_channels, channels, with_out=True, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) class AFNB(nn.Module): @@ -96,9 +106,18 @@ class AFNB(nn.Module): act_cfg (dict|None): Config of activation layers. """ - def __init__(self, low_in_channels, high_in_channels, channels, - out_channels, query_scales, key_pool_scales, conv_cfg, - norm_cfg, act_cfg): + def __init__( + self, + low_in_channels, + high_in_channels, + channels, + out_channels, + query_scales, + key_pool_scales, + conv_cfg, + norm_cfg, + act_cfg, + ): super(AFNB, self).__init__() self.stages = nn.ModuleList() for query_scale in query_scales: @@ -113,14 +132,12 @@ def __init__(self, low_in_channels, high_in_channels, channels, key_pool_scales=key_pool_scales, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) self.bottleneck = ConvModule( - out_channels + high_in_channels, - out_channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) + out_channels + high_in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None + ) def forward(self, low_feats, high_feats): """Forward function.""" @@ -147,8 +164,9 @@ class APNB(nn.Module): act_cfg (dict|None): Config of activation layers. """ - def __init__(self, in_channels, channels, out_channels, query_scales, - key_pool_scales, conv_cfg, norm_cfg, act_cfg): + def __init__( + self, in_channels, channels, out_channels, query_scales, key_pool_scales, conv_cfg, norm_cfg, act_cfg + ): super(APNB, self).__init__() self.stages = nn.ModuleList() for query_scale in query_scales: @@ -163,14 +181,12 @@ def __init__(self, in_channels, channels, out_channels, query_scales, key_pool_scales=key_pool_scales, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) self.bottleneck = ConvModule( - 2 * in_channels, - out_channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + 2 * in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) def forward(self, feats): """Forward function.""" @@ -195,13 +211,8 @@ class ANNHead(BaseDecodeHead): Default: (1, 3, 6, 8). """ - def __init__(self, - project_channels, - query_scales=(1, ), - key_pool_scales=(1, 3, 6, 8), - **kwargs): - super(ANNHead, self).__init__( - input_transform='multiple_select', **kwargs) + def __init__(self, project_channels, query_scales=(1,), key_pool_scales=(1, 3, 6, 8), **kwargs): + super(ANNHead, self).__init__(input_transform='multiple_select', **kwargs) assert len(self.in_channels) == 2 low_in_channels, high_in_channels = self.in_channels self.project_channels = project_channels @@ -214,7 +225,8 @@ def __init__(self, key_pool_scales=key_pool_scales, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.bottleneck = ConvModule( high_in_channels, self.channels, @@ -222,7 +234,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.context = APNB( in_channels=self.channels, out_channels=self.channels, @@ -231,7 +244,8 @@ def __init__(self, key_pool_scales=key_pool_scales, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py index 119db3709457..04721c1d46f6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py @@ -22,8 +22,7 @@ class ACM(nn.Module): act_cfg (dict): Config of activation layers. """ - def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, - norm_cfg, act_cfg): + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): super(ACM, self).__init__() self.pool_scale = pool_scale self.fusion = fusion @@ -33,47 +32,27 @@ def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.pooled_redu_conv = ConvModule( - self.in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) self.input_redu_conv = ConvModule( - self.in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) self.global_info = ConvModule( - self.channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) - self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + self.gla = nn.Conv2d(self.channels, self.pool_scale ** 2, 1, 1, 0) self.residual_conv = ConvModule( - self.channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) if self.fusion: self.fusion_conv = ConvModule( - self.channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) def forward(self, x): """Forward function.""" @@ -84,13 +63,13 @@ def forward(self, x): pooled_x = self.pooled_redu_conv(pooled_x) batch_size = x.size(0) # [batch_size, pool_scale * pool_scale, channels] - pooled_x = pooled_x.view(batch_size, self.channels, - -1).permute(0, 2, 1).contiguous() + pooled_x = pooled_x.view(batch_size, self.channels, -1).permute(0, 2, 1).contiguous() # [batch_size, h * w, pool_scale * pool_scale] - affinity_matrix = self.gla(x + resize( - self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) - ).permute(0, 2, 3, 1).reshape( - batch_size, -1, self.pool_scale**2) + affinity_matrix = ( + self.gla(x + resize(self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:])) + .permute(0, 2, 3, 1) + .reshape(batch_size, -1, self.pool_scale ** 2) + ) affinity_matrix = F.sigmoid(affinity_matrix) # [batch_size, h * w, channels] z_out = torch.matmul(affinity_matrix, pooled_x) @@ -129,13 +108,16 @@ def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): acm_modules = [] for pool_scale in self.pool_scales: acm_modules.append( - ACM(pool_scale, + ACM( + pool_scale, self.fusion, self.in_channels, self.channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.acm_modules = nn.ModuleList(acm_modules) self.bottleneck = ConvModule( self.in_channels + len(pool_scales) * self.channels, @@ -144,7 +126,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py index 3eb5ece9e660..8d121ca61222 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py @@ -19,8 +19,7 @@ class ASPPModule(nn.ModuleList): act_cfg (dict): Config of activation layers. """ - def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, - act_cfg): + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, act_cfg): super(ASPPModule, self).__init__() self.dilations = dilations self.in_channels = in_channels @@ -38,7 +37,9 @@ def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, padding=0 if dilation == 1 else dilation, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) def forward(self, x): """Forward function.""" @@ -73,14 +74,17 @@ def __init__(self, dilations=(1, 6, 12, 18), **kwargs): 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) self.aspp_modules = ASPPModule( dilations, self.in_channels, self.channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.bottleneck = ConvModule( (len(dilations) + 1) * self.channels, self.channels, @@ -88,18 +92,13 @@ def __init__(self, dilations=(1, 6, 12, 18), **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" x = self._transform_inputs(inputs) - aspp_outs = [ - resize( - self.image_pool(x), - size=x.size()[2:], - mode='bilinear', - align_corners=self.align_corners) - ] + aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] aspp_outs.extend(self.aspp_modules(x)) aspp_outs = torch.cat(aspp_outs, dim=1) output = self.bottleneck(aspp_outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py index d02122ca0e68..40f498d3679c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py @@ -15,8 +15,7 @@ def forward(self, inputs, prev_output): """Placeholder of forward function.""" pass - def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, - train_cfg): + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): """Forward function for training. Args: inputs (list[Tensor]): List of multi-level img features. diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py index 0aa8380f4aab..98e0340501d5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py @@ -23,8 +23,7 @@ class CCHead(FCNHead): def __init__(self, recurrence=2, **kwargs): if CrissCrossAttention is None: - raise RuntimeError('Please install mmcv-full for ' - 'CrissCrossAttention ops') + raise RuntimeError('Please install mmcv-full for ' 'CrissCrossAttention ops') super(CCHead, self).__init__(num_convs=2, **kwargs) self.recurrence = recurrence self.cca = CrissCrossAttention(self.channels) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py index 784ebc8a20c8..d63ed0e84dd5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py @@ -1,9 +1,10 @@ import torch import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, Scale from torch import nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, Scale from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix + from ..builder import HEADS from ..utils import SelfAttentionBlock as _SelfAttentionBlock from .decode_head import BaseDecodeHead @@ -34,7 +35,8 @@ def __init__(self, in_channels, channels): with_out=False, conv_cfg=None, norm_cfg=None, - act_cfg=None) + act_cfg=None, + ) self.gamma = Scale(0) @@ -59,8 +61,7 @@ def forward(self, x): proj_query = x.view(batch_size, channels, -1) proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) energy = torch.bmm(proj_query, proj_key) - energy_new = torch.max( - energy, -1, keepdim=True)[0].expand_as(energy) - energy + energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy) - energy attention = F.softmax(energy_new, dim=-1) proj_value = x.view(batch_size, channels, -1) @@ -92,7 +93,8 @@ def __init__(self, pam_channels, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.pam = PAM(self.channels, pam_channels) self.pam_out_conv = ConvModule( self.channels, @@ -101,9 +103,9 @@ def __init__(self, pam_channels, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) - self.pam_conv_seg = nn.Conv2d( - self.channels, self.num_classes, kernel_size=1) + act_cfg=self.act_cfg, + ) + self.pam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) self.cam_in_conv = ConvModule( self.in_channels, @@ -112,7 +114,8 @@ def __init__(self, pam_channels, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.cam = CAM() self.cam_out_conv = ConvModule( self.channels, @@ -121,9 +124,9 @@ def __init__(self, pam_channels, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) - self.cam_conv_seg = nn.Conv2d( - self.channels, self.num_classes, kernel_size=1) + act_cfg=self.act_cfg, + ) + self.cam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) def pam_cls_seg(self, feat): """PAM feature classification.""" @@ -165,14 +168,7 @@ def losses(self, seg_logit, seg_label): """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit loss = dict() - loss.update( - add_prefix( - super(DAHead, self).losses(pam_cam_seg_logit, seg_label), - 'pam_cam')) - loss.update( - add_prefix( - super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) - loss.update( - add_prefix( - super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + loss.update(add_prefix(super(DAHead, self).losses(pam_cam_seg_logit, seg_label), 'pam_cam')) + loss.update(add_prefix(super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update(add_prefix(super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py index 2fac63380adb..e1aa23944d86 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py @@ -43,24 +43,23 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta): Default: False. """ - def __init__(self, - in_channels, - channels, - *, - num_classes, - dropout_ratio=0.1, - conv_cfg=None, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - in_index=-1, - input_transform=None, - loss_decode=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - ignore_index=255, - sampler=None, - align_corners=False): + def __init__( + self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False, + ): super(BaseDecodeHead, self).__init__() self._init_inputs(in_channels, in_index, input_transform) self.channels = channels @@ -87,9 +86,11 @@ def __init__(self, def extra_repr(self): """Extra repr.""" - s = f'input_transform={self.input_transform}, ' \ - f'ignore_index={self.ignore_index}, ' \ + s = ( + f'input_transform={self.input_transform}, ' + f'ignore_index={self.ignore_index}, ' f'align_corners={self.align_corners}' + ) return s def _init_inputs(self, in_channels, in_index, input_transform): @@ -147,11 +148,8 @@ def _transform_inputs(self, inputs): if self.input_transform == 'resize_concat': inputs = [inputs[i] for i in self.in_index] upsampled_inputs = [ - resize( - input=x, - size=inputs[0].shape[2:], - mode='bilinear', - align_corners=self.align_corners) for x in inputs + resize(input=x, size=inputs[0].shape[2:], mode='bilinear', align_corners=self.align_corners) + for x in inputs ] inputs = torch.cat(upsampled_inputs, dim=1) elif self.input_transform == 'multiple_select': @@ -211,24 +209,18 @@ def cls_seg(self, feat): output = self.conv_seg(feat) return output - @force_fp32(apply_to=('seg_logit', )) + @force_fp32(apply_to=('seg_logit',)) def losses(self, seg_logit, seg_label): """Compute segmentation loss.""" loss = dict() seg_logit = resize( - input=seg_logit, - size=seg_label.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + input=seg_logit, size=seg_label.shape[2:], mode='bilinear', align_corners=self.align_corners + ) if self.sampler is not None: seg_weight = self.sampler.sample(seg_logit, seg_label) else: seg_weight = None seg_label = seg_label.squeeze(1) - loss['loss_seg'] = self.loss_decode( - seg_logit, - seg_label, - weight=seg_weight, - ignore_index=self.ignore_index) + loss['loss_seg'] = self.loss_decode(seg_logit, seg_label, weight=seg_weight, ignore_index=self.ignore_index) loss['acc_seg'] = accuracy(seg_logit, seg_label) return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py index 52efb6c71e28..a5a58165b326 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py @@ -1,7 +1,12 @@ import torch import torch.nn as nn import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + build_activation_layer, + build_norm_layer, +) from ..builder import HEADS from .decode_head import BaseDecodeHead @@ -21,8 +26,7 @@ class DCM(nn.Module): act_cfg (dict): Config of activation layers. """ - def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, - norm_cfg, act_cfg): + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): super(DCM, self).__init__() self.filter_size = filter_size self.fusion = fusion @@ -31,16 +35,11 @@ def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg - self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, - 0) + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, 0) self.input_redu_conv = ConvModule( - self.in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) if self.norm_cfg is not None: self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] @@ -50,24 +49,18 @@ def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, if self.fusion: self.fusion_conv = ConvModule( - self.channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) def forward(self, x): """Forward function.""" - generated_filter = self.filter_gen_conv( - F.adaptive_avg_pool2d(x, self.filter_size)) + generated_filter = self.filter_gen_conv(F.adaptive_avg_pool2d(x, self.filter_size)) x = self.input_redu_conv(x) b, c, h, w = x.shape # [1, b * c, h, w], c = self.channels x = x.view(1, b * c, h, w) # [b * c, 1, filter_size, filter_size] - generated_filter = generated_filter.view(b * c, 1, self.filter_size, - self.filter_size) + generated_filter = generated_filter.view(b * c, 1, self.filter_size, self.filter_size) pad = (self.filter_size - 1) // 2 if (self.filter_size - 1) % 2 == 0: p2d = (pad, pad, pad, pad) @@ -111,13 +104,16 @@ def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): dcm_modules = [] for filter_size in self.filter_sizes: dcm_modules.append( - DCM(filter_size, + DCM( + filter_size, self.fusion, self.in_channels, self.channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.dcm_modules = nn.ModuleList(dcm_modules) self.bottleneck = ConvModule( self.in_channels + len(filter_sizes) * self.channels, @@ -126,7 +122,8 @@ def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py index 1823f20a6fd9..2ecd75787808 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py @@ -1,7 +1,8 @@ import torch -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d from torch import nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d + from ..builder import HEADS from .fcn_head import FCNHead @@ -25,7 +26,7 @@ def embedded_gaussian(self, theta_x, phi_x): pairwise_weight = torch.matmul(theta_x, phi_x) if self.use_scale: # theta_x.shape[-1] is `self.inter_channels` - pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight /= theta_x.shape[-1] ** 0.5 pairwise_weight /= self.temperature pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight @@ -65,8 +66,7 @@ def forward(self, x): # y: [N, HxW, C] y = torch.matmul(pairwise_weight, g_x) # y: [N, C, H, W] - y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, - *x.size()[2:]) + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) # unary_mask: [N, 1, HxW] unary_mask = self.conv_mask(x) @@ -75,8 +75,7 @@ def forward(self, x): # unary_x: [N, 1, C] unary_x = torch.matmul(unary_mask, g_x) # unary_x: [N, C, 1, 1] - unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( - n, self.inter_channels, 1, 1) + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, 1, 1) output = x + self.conv_out(y + unary_x) @@ -99,12 +98,7 @@ class DNLHead(FCNHead): temperature (float): Temperature to adjust attention. Default: 0.05 """ - def __init__(self, - reduction=2, - use_scale=True, - mode='embedded_gaussian', - temperature=0.05, - **kwargs): + def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', temperature=0.05, **kwargs): super(DNLHead, self).__init__(num_convs=2, **kwargs) self.reduction = reduction self.use_scale = use_scale @@ -117,7 +111,8 @@ def __init__(self, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, mode=self.mode, - temperature=self.temperature) + temperature=self.temperature, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py index 5bebc82ba632..f66406d38ff8 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py @@ -36,7 +36,7 @@ def __init__(self, channels, num_bases, num_stages, momentum): self.momentum = momentum bases = torch.zeros(1, channels, self.num_bases) - bases.normal_(0, math.sqrt(2. / self.num_bases)) + bases.normal_(0, math.sqrt(2.0 / self.num_bases)) # [1, channels, num_bases] bases = F.normalize(bases, dim=1, p=2) self.register_buffer('bases', bases) @@ -69,8 +69,7 @@ def forward(self, feats): bases = reduce_mean(bases) # l2 norm bases = F.normalize(bases, dim=1, p=2) - self.bases = (1 - - self.momentum) * self.bases + self.momentum * bases + self.bases = (1 - self.momentum) * self.bases + self.momentum * bases return feats_recon @@ -91,21 +90,14 @@ class EMAHead(BaseDecodeHead): momentum (float): Momentum to update the base. Default: 0.1. """ - def __init__(self, - ema_channels, - num_bases, - num_stages, - concat_input=True, - momentum=0.1, - **kwargs): + def __init__(self, ema_channels, num_bases, num_stages, concat_input=True, momentum=0.1, **kwargs): super(EMAHead, self).__init__(**kwargs) self.ema_channels = ema_channels self.num_bases = num_bases self.num_stages = num_stages self.concat_input = concat_input self.momentum = momentum - self.ema_module = EMAModule(self.ema_channels, self.num_bases, - self.num_stages, self.momentum) + self.ema_module = EMAModule(self.ema_channels, self.num_bases, self.num_stages, self.momentum) self.ema_in_conv = ConvModule( self.in_channels, @@ -114,25 +106,18 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) # project (0, inf) -> (-inf, inf) self.ema_mid_conv = ConvModule( - self.ema_channels, - self.ema_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=None, - act_cfg=None) + self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=None, act_cfg=None + ) for param in self.ema_mid_conv.parameters(): param.requires_grad = False self.ema_out_conv = ConvModule( - self.ema_channels, - self.ema_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None) + self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) self.bottleneck = ConvModule( self.ema_channels, self.channels, @@ -140,7 +125,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) if self.concat_input: self.conv_cat = ConvModule( self.in_channels + self.channels, @@ -149,7 +135,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py index 649c7357c2bf..8e94db9cebb4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py @@ -22,12 +22,8 @@ class EncModule(nn.Module): def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): super(EncModule, self).__init__() self.encoding_project = ConvModule( - in_channels, - in_channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + in_channels, in_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) # TODO: resolve this hack # change to 1d if norm_cfg is not None: @@ -35,17 +31,16 @@ def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): if encoding_norm_cfg['type'] in ['BN', 'IN']: encoding_norm_cfg['type'] += '1d' else: - encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( - '2d', '1d') + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace('2d', '1d') else: # fallback to BN1d encoding_norm_cfg = dict(type='BN1d') self.encoding = nn.Sequential( Encoding(channels=in_channels, num_codes=num_codes), build_norm_layer(encoding_norm_cfg, num_codes)[1], - nn.ReLU(inplace=True)) - self.fc = nn.Sequential( - nn.Linear(in_channels, in_channels), nn.Sigmoid()) + nn.ReLU(inplace=True), + ) + self.fc = nn.Sequential(nn.Linear(in_channels, in_channels), nn.Sigmoid()) def forward(self, x): """Forward function.""" @@ -75,17 +70,15 @@ class EncHead(BaseDecodeHead): Default: dict(type='CrossEntropyLoss', use_sigmoid=True). """ - def __init__(self, - num_codes=32, - use_se_loss=True, - add_lateral=False, - loss_se_decode=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - loss_weight=0.2), - **kwargs): - super(EncHead, self).__init__( - input_transform='multiple_select', **kwargs) + def __init__( + self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), + **kwargs + ): + super(EncHead, self).__init__(input_transform='multiple_select', **kwargs) self.use_se_loss = use_se_loss self.add_lateral = add_lateral self.num_codes = num_codes @@ -96,7 +89,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) if add_lateral: self.lateral_convs = nn.ModuleList() for in_channels in self.in_channels[:-1]: # skip the last one @@ -107,7 +101,9 @@ def __init__(self, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.fusion = ConvModule( len(self.in_channels) * self.channels, self.channels, @@ -115,13 +111,11 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.enc_module = EncModule( - self.channels, - num_codes=num_codes, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + self.channels, num_codes=num_codes, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) if self.use_se_loss: self.loss_se_decode = build_loss(loss_se_decode) self.se_layer = nn.Linear(self.channels, self.num_classes) @@ -132,11 +126,7 @@ def forward(self, inputs): feat = self.bottleneck(inputs[-1]) if self.add_lateral: laterals = [ - resize( - lateral_conv(inputs[i]), - size=feat.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + resize(lateral_conv(inputs[i]), size=feat.shape[2:], mode='bilinear', align_corners=self.align_corners) for i, lateral_conv in enumerate(self.lateral_convs) ] feat = self.fusion(torch.cat([feat, *laterals], 1)) @@ -170,8 +160,7 @@ def _convert_to_onehot_labels(seg_label, num_classes): batch_size = seg_label.size(0) onehot_labels = seg_label.new_zeros((batch_size, num_classes)) for i in range(batch_size): - hist = seg_label[i].float().histc( - bins=num_classes, min=0, max=num_classes - 1) + hist = seg_label[i].float().histc(bins=num_classes, min=0, max=num_classes - 1) onehot_labels[i] = hist > 0 return onehot_labels @@ -180,8 +169,6 @@ def losses(self, seg_logit, seg_label): seg_logit, se_seg_logit = seg_logit loss = dict() loss.update(super(EncHead, self).losses(seg_logit, seg_label)) - se_loss = self.loss_se_decode( - se_seg_logit, - self._convert_to_onehot_labels(seg_label, self.num_classes)) + se_loss = self.loss_se_decode(se_seg_logit, self._convert_to_onehot_labels(seg_label, self.num_classes)) loss['loss_se'] = se_loss return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py index 8fc58a5bb514..7e1a34a2a416 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py @@ -20,12 +20,7 @@ class FCNHead(BaseDecodeHead): dilation (int): The dilation rate for convs in the head. Default: 1. """ - def __init__(self, - num_convs=2, - kernel_size=3, - concat_input=True, - dilation=1, - **kwargs): + def __init__(self, num_convs=2, kernel_size=3, concat_input=True, dilation=1, **kwargs): assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) self.num_convs = num_convs self.concat_input = concat_input @@ -45,7 +40,9 @@ def __init__(self, dilation=dilation, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) for i in range(num_convs - 1): convs.append( ConvModule( @@ -56,7 +53,9 @@ def __init__(self, dilation=dilation, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) if num_convs == 0: self.convs = nn.Identity() else: @@ -69,7 +68,8 @@ def __init__(self, padding=kernel_size // 2, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py index 1fc3c4d0fc0b..28637489e7a7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py @@ -21,17 +21,14 @@ class FPNHead(BaseDecodeHead): """ def __init__(self, feature_strides, **kwargs): - super(FPNHead, self).__init__( - input_transform='multiple_select', **kwargs) + super(FPNHead, self).__init__(input_transform='multiple_select', **kwargs) assert len(feature_strides) == len(self.in_channels) assert min(feature_strides) == feature_strides[0] self.feature_strides = feature_strides self.scale_heads = nn.ModuleList() for i in range(len(feature_strides)): - head_length = max( - 1, - int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + head_length = max(1, int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) scale_head = [] for k in range(head_length): scale_head.append( @@ -42,13 +39,11 @@ def __init__(self, feature_strides, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) if feature_strides[i] != feature_strides[0]: - scale_head.append( - nn.Upsample( - scale_factor=2, - mode='bilinear', - align_corners=self.align_corners)) + scale_head.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=self.align_corners)) self.scale_heads.append(nn.Sequential(*scale_head)) def forward(self, inputs): @@ -59,10 +54,8 @@ def forward(self, inputs): for i in range(1, len(self.feature_strides)): # non inplace output = output + resize( - self.scale_heads[i](x[i]), - size=output.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + self.scale_heads[i](x[i]), size=output.shape[2:], mode='bilinear', align_corners=self.align_corners + ) output = self.cls_seg(output) return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py index 459a4a502c0d..8898bdffe5c6 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py @@ -20,20 +20,14 @@ class GCHead(FCNHead): Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) """ - def __init__(self, - ratio=1 / 4., - pooling_type='att', - fusion_types=('channel_add', ), - **kwargs): + def __init__(self, ratio=1 / 4.0, pooling_type='att', fusion_types=('channel_add',), **kwargs): super(GCHead, self).__init__(num_convs=2, **kwargs) self.ratio = ratio self.pooling_type = pooling_type self.fusion_types = fusion_types self.gc_block = ContextBlock( - in_channels=self.channels, - ratio=self.ratio, - pooling_type=self.pooling_type, - fusion_types=self.fusion_types) + in_channels=self.channels, ratio=self.ratio, pooling_type=self.pooling_type, fusion_types=self.fusion_types + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py index 9af362d83b1a..75e2fa4b1e19 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py @@ -23,9 +23,11 @@ class LRASPPHead(BaseDecodeHead): def __init__(self, branch_channels=(32, 64), **kwargs): super(LRASPPHead, self).__init__(**kwargs) if self.input_transform != 'multiple_select': - raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' - f'must be \'multiple_select\'. But received ' - f'\'{self.input_transform}\'') + raise ValueError( + 'in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'' + ) assert is_tuple_of(branch_channels, int) assert len(branch_channels) == len(self.in_channels) - 1 self.branch_channels = branch_channels @@ -33,10 +35,7 @@ def __init__(self, branch_channels=(32, 64), **kwargs): self.convs = nn.Sequential() self.conv_ups = nn.Sequential() for i in range(len(branch_channels)): - self.convs.add_module( - f'conv{i}', - nn.Conv2d( - self.in_channels[i], branch_channels[i], 1, bias=False)) + self.convs.add_module(f'conv{i}', nn.Conv2d(self.in_channels[i], branch_channels[i], 1, bias=False)) self.conv_ups.add_module( f'conv_up{i}', ConvModule( @@ -45,25 +44,19 @@ def __init__(self, branch_channels=(32, 64), **kwargs): 1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - bias=False)) + bias=False, + ), + ) self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) self.aspp_conv = ConvModule( - self.in_channels[-1], - self.channels, - 1, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - bias=False) + self.in_channels[-1], self.channels, 1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, bias=False + ) self.image_pool = nn.Sequential( nn.AvgPool2d(kernel_size=49, stride=(16, 20)), - ConvModule( - self.in_channels[2], - self.channels, - 1, - act_cfg=dict(type='Sigmoid'), - bias=False)) + ConvModule(self.in_channels[2], self.channels, 1, act_cfg=dict(type='Sigmoid'), bias=False), + ) def forward(self, inputs): """Forward function.""" @@ -72,18 +65,12 @@ def forward(self, inputs): x = inputs[-1] x = self.aspp_conv(x) * resize( - self.image_pool(x), - size=x.size()[2:], - mode='bilinear', - align_corners=self.align_corners) + self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners + ) x = self.conv_up_input(x) for i in range(len(self.branch_channels) - 1, -1, -1): - x = resize( - x, - size=inputs[i].size()[2:], - mode='bilinear', - align_corners=self.align_corners) + x = resize(x, size=inputs[i].size()[2:], mode='bilinear', align_corners=self.align_corners) x = torch.cat([x, self.convs[i](inputs[i])], 1) x = self.conv_ups[i](x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py index 130005d1c16d..0f3def19ccc5 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py @@ -20,11 +20,7 @@ class NLHead(FCNHead): 'dot_product'. Default: 'embedded_gaussian.'. """ - def __init__(self, - reduction=2, - use_scale=True, - mode='embedded_gaussian', - **kwargs): + def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', **kwargs): super(NLHead, self).__init__(num_convs=2, **kwargs) self.reduction = reduction self.use_scale = use_scale @@ -35,7 +31,8 @@ def __init__(self, use_scale=self.use_scale, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - mode=self.mode) + mode=self.mode, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py index 6ddd5a06cc55..c5d9c3bfa89b 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py @@ -39,8 +39,7 @@ def forward(self, feats, probs): class ObjectAttentionBlock(_SelfAttentionBlock): """Make a OCR used SelfAttentionBlock.""" - def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, - act_cfg): + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, act_cfg): if scale > 1: query_downsample = nn.MaxPool2d(kernel_size=scale) else: @@ -61,19 +60,15 @@ def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, with_out=True, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.bottleneck = ConvModule( - in_channels * 2, - in_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + in_channels * 2, in_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) def forward(self, query_feats, key_feats): """Forward function.""" - context = super(ObjectAttentionBlock, - self).forward(query_feats, key_feats) + context = super(ObjectAttentionBlock, self).forward(query_feats, key_feats) output = self.bottleneck(torch.cat([context, query_feats], dim=1)) if self.query_downsample is not None: output = resize(query_feats) @@ -104,7 +99,8 @@ def __init__(self, ocr_channels, scale=1, **kwargs): self.scale, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.spatial_gather_module = SpatialGatherModule(self.scale) self.bottleneck = ConvModule( @@ -114,7 +110,8 @@ def __init__(self, ocr_channels, scale=1, **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs, prev_output): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py index 033d2dbd36d8..7e4c0ef8e10f 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py @@ -57,19 +57,18 @@ class PointHead(BaseCascadeDecodeHead): loss_weight=1.0). """ - def __init__(self, - num_fcs=3, - coarse_pred_each_layer=True, - conv_cfg=dict(type='Conv1d'), - norm_cfg=None, - act_cfg=dict(type='ReLU', inplace=False), - **kwargs): + def __init__( + self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs + ): super(PointHead, self).__init__( - input_transform='multiple_select', - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - **kwargs) + input_transform='multiple_select', conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, **kwargs + ) self.num_fcs = num_fcs self.coarse_pred_each_layer = coarse_pred_each_layer @@ -86,17 +85,12 @@ def __init__(self, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.fcs.append(fc) fc_in_channels = fc_channels - fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ - else 0 - self.fc_seg = nn.Conv1d( - fc_in_channels, - self.num_classes, - kernel_size=1, - stride=1, - padding=0) + fc_in_channels += self.num_classes if self.coarse_pred_each_layer else 0 + self.fc_seg = nn.Conv1d(fc_in_channels, self.num_classes, kernel_size=1, stride=1, padding=0) if self.dropout_ratio > 0: self.dropout = nn.Dropout(self.dropout_ratio) delattr(self, 'conv_seg') @@ -133,10 +127,7 @@ def _get_fine_grained_point_feats(self, x, points): shape (batch_size, sum(channels of x), num_points). """ - fine_grained_feats_list = [ - point_sample(_, points, align_corners=self.align_corners) - for _ in x - ] + fine_grained_feats_list = [point_sample(_, points, align_corners=self.align_corners) for _ in x] if len(fine_grained_feats_list) > 1: fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) else: @@ -157,13 +148,11 @@ def _get_coarse_point_feats(self, prev_output, points): num_classes, num_points). """ - coarse_feats = point_sample( - prev_output, points, align_corners=self.align_corners) + coarse_feats = point_sample(prev_output, points, align_corners=self.align_corners) return coarse_feats - def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, - train_cfg): + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): """Forward function for training. Args: inputs (list[Tensor]): List of multi-level img features. @@ -182,18 +171,11 @@ def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, """ x = self._transform_inputs(inputs) with torch.no_grad(): - points = self.get_points_train( - prev_output, calculate_uncertainty, cfg=train_cfg) - fine_grained_point_feats = self._get_fine_grained_point_feats( - x, points) + points = self.get_points_train(prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) coarse_point_feats = self._get_coarse_point_feats(prev_output, points) - point_logits = self.forward(fine_grained_point_feats, - coarse_point_feats) - point_label = point_sample( - gt_semantic_seg.float(), - points, - mode='nearest', - align_corners=self.align_corners) + point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) + point_label = point_sample(gt_semantic_seg.float(), points, mode='nearest', align_corners=self.align_corners) point_label = point_label.squeeze(1).long() losses = self.losses(point_logits, point_label) @@ -224,32 +206,25 @@ def forward_test(self, inputs, prev_output, img_metas, test_cfg): refined_seg_logits, scale_factor=test_cfg.scale_factor, mode='bilinear', - align_corners=self.align_corners) + align_corners=self.align_corners, + ) batch_size, channels, height, width = refined_seg_logits.shape - point_indices, points = self.get_points_test( - refined_seg_logits, calculate_uncertainty, cfg=test_cfg) - fine_grained_point_feats = self._get_fine_grained_point_feats( - x, points) - coarse_point_feats = self._get_coarse_point_feats( - prev_output, points) - point_logits = self.forward(fine_grained_point_feats, - coarse_point_feats) + point_indices, points = self.get_points_test(refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) - refined_seg_logits = refined_seg_logits.reshape( - batch_size, channels, height * width) - refined_seg_logits = refined_seg_logits.scatter_( - 2, point_indices, point_logits) - refined_seg_logits = refined_seg_logits.view( - batch_size, channels, height, width) + refined_seg_logits = refined_seg_logits.reshape(batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_(2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view(batch_size, channels, height, width) return refined_seg_logits def losses(self, point_logits, point_label): """Compute segmentation loss.""" loss = dict() - loss['loss_point'] = self.loss_decode( - point_logits, point_label, ignore_index=self.ignore_index) + loss['loss_point'] = self.loss_decode(point_logits, point_label, ignore_index=self.ignore_index) loss['acc_point'] = accuracy(point_logits, point_label) return loss @@ -279,8 +254,7 @@ def get_points_train(self, seg_logits, uncertainty_func, cfg): assert 0 <= importance_sample_ratio <= 1 batch_size = seg_logits.shape[0] num_sampled = int(num_points * oversample_ratio) - point_coords = torch.rand( - batch_size, num_sampled, 2, device=seg_logits.device) + point_coords = torch.rand(batch_size, num_sampled, 2, device=seg_logits.device) point_logits = point_sample(seg_logits, point_coords) # It is crucial to calculate uncertainty based on the sampled # prediction value for the points. Calculating uncertainties of the @@ -294,16 +268,12 @@ def get_points_train(self, seg_logits, uncertainty_func, cfg): point_uncertainties = uncertainty_func(point_logits) num_uncertain_points = int(importance_sample_ratio * num_points) num_random_points = num_points - num_uncertain_points - idx = torch.topk( - point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] - shift = num_sampled * torch.arange( - batch_size, dtype=torch.long, device=seg_logits.device) + idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange(batch_size, dtype=torch.long, device=seg_logits.device) idx += shift[:, None] - point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( - batch_size, num_uncertain_points, 2) + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(batch_size, num_uncertain_points, 2) if num_random_points > 0: - rand_point_coords = torch.rand( - batch_size, num_random_points, 2, device=seg_logits.device) + rand_point_coords = torch.rand(batch_size, num_random_points, 2, device=seg_logits.device) point_coords = torch.cat((point_coords, rand_point_coords), dim=1) return point_coords @@ -336,14 +306,7 @@ def get_points_test(self, seg_logits, uncertainty_func, cfg): uncertainty_map = uncertainty_map.view(batch_size, height * width) num_points = min(height * width, num_points) point_indices = uncertainty_map.topk(num_points, dim=1)[1] - point_coords = torch.zeros( - batch_size, - num_points, - 2, - dtype=torch.float, - device=seg_logits.device) - point_coords[:, :, 0] = w_step / 2.0 + (point_indices % - width).float() * w_step - point_coords[:, :, 1] = h_step / 2.0 + (point_indices // - width).float() * h_step + point_coords = torch.zeros(batch_size, num_points, 2, dtype=torch.float, device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // width).float() * h_step return point_indices, point_coords diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py index c85bfc34a5a6..3ef4088a23e4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py @@ -32,14 +32,16 @@ class PSAHead(BaseDecodeHead): psa_softmax (bool): Whether use softmax for attention. """ - def __init__(self, - mask_size, - psa_type='bi-direction', - compact=False, - shrink_factor=2, - normalization_factor=1.0, - psa_softmax=True, - **kwargs): + def __init__( + self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs + ): if PSAMask is None: raise RuntimeError('Please install mmcv-full for PSAMask ops') super(PSAHead, self).__init__(**kwargs) @@ -60,7 +62,8 @@ def __init__(self, kernel_size=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.attention = nn.Sequential( ConvModule( self.channels, @@ -68,9 +71,10 @@ def __init__(self, kernel_size=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), - nn.Conv2d( - self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + act_cfg=self.act_cfg, + ), + nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), + ) if psa_type == 'bi-direction': self.reduce_p = ConvModule( self.in_channels, @@ -78,7 +82,8 @@ def __init__(self, kernel_size=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.attention_p = nn.Sequential( ConvModule( self.channels, @@ -86,9 +91,10 @@ def __init__(self, kernel_size=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), - nn.Conv2d( - self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + act_cfg=self.act_cfg, + ), + nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), + ) self.psamask_collect = PSAMask('collect', mask_size) self.psamask_distribute = PSAMask('distribute', mask_size) else: @@ -100,7 +106,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) self.bottleneck = ConvModule( self.in_channels * 2, self.channels, @@ -108,7 +115,8 @@ def __init__(self, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" @@ -127,23 +135,18 @@ def forward(self, inputs): h = h // self.shrink_factor w = w // self.shrink_factor align_corners = False - out = resize( - out, - size=(h, w), - mode='bilinear', - align_corners=align_corners) + out = resize(out, size=(h, w), mode='bilinear', align_corners=align_corners) y = self.attention(out) if self.compact: if self.psa_type == 'collect': - y = y.view(n, h * w, - h * w).transpose(1, 2).view(n, h * w, h, w) + y = y.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) else: y = self.psamask(y) if self.psa_softmax: y = F.softmax(y, dim=1) - out = torch.bmm( - out.view(n, c, h * w), y.view(n, h * w, h * w)).view( - n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.bmm(out.view(n, c, h * w), y.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) else: x_col = self.reduce(x) x_dis = self.reduce_p(x) @@ -157,40 +160,27 @@ def forward(self, inputs): h = h // self.shrink_factor w = w // self.shrink_factor align_corners = False - x_col = resize( - x_col, - size=(h, w), - mode='bilinear', - align_corners=align_corners) - x_dis = resize( - x_dis, - size=(h, w), - mode='bilinear', - align_corners=align_corners) + x_col = resize(x_col, size=(h, w), mode='bilinear', align_corners=align_corners) + x_dis = resize(x_dis, size=(h, w), mode='bilinear', align_corners=align_corners) y_col = self.attention(x_col) y_dis = self.attention_p(x_dis) if self.compact: - y_dis = y_dis.view(n, h * w, - h * w).transpose(1, 2).view(n, h * w, h, w) + y_dis = y_dis.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) else: y_col = self.psamask_collect(y_col) y_dis = self.psamask_distribute(y_dis) if self.psa_softmax: y_col = F.softmax(y_col, dim=1) y_dis = F.softmax(y_dis, dim=1) - x_col = torch.bmm( - x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( - n, c, h, w) * (1.0 / self.normalization_factor) - x_dis = torch.bmm( - x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( - n, c, h, w) * (1.0 / self.normalization_factor) + x_col = torch.bmm(x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) + x_dis = torch.bmm(x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) out = torch.cat([x_col, x_dis], 1) out = self.proj(out) - out = resize( - out, - size=identity.shape[2:], - mode='bilinear', - align_corners=align_corners) + out = resize(out, size=identity.shape[2:], mode='bilinear', align_corners=align_corners) out = self.bottleneck(torch.cat((identity, out), dim=1)) out = self.cls_seg(out) return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py index e8b5cf0fb261..ad87e1514885 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py @@ -21,8 +21,7 @@ class PPM(nn.ModuleList): align_corners (bool): align_corners argument of F.interpolate. """ - def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, - act_cfg, align_corners): + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, act_cfg, align_corners): super(PPM, self).__init__() self.pool_scales = pool_scales self.align_corners = align_corners @@ -41,18 +40,17 @@ def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg))) + act_cfg=self.act_cfg, + ), + ) + ) def forward(self, x): """Forward function.""" ppm_outs = [] for ppm in self: ppm_out = ppm(x) - upsampled_ppm_out = resize( - ppm_out, - size=x.size()[2:], - mode='bilinear', - align_corners=self.align_corners) + upsampled_ppm_out = resize(ppm_out, size=x.size()[2:], mode='bilinear', align_corners=self.align_corners) ppm_outs.append(upsampled_ppm_out) return ppm_outs @@ -80,7 +78,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - align_corners=self.align_corners) + align_corners=self.align_corners, + ) self.bottleneck = ConvModule( self.in_channels + len(pool_scales) * self.channels, self.channels, @@ -88,7 +87,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def forward(self, inputs): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py index dae028e41876..6a6db6e93945 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py @@ -22,7 +22,8 @@ def __init__(self, **kwargs): dilation=dilation, padding=dilation, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) @HEADS.register_module() @@ -48,53 +49,33 @@ def __init__(self, c1_in_channels, c1_channels, **kwargs): channels=self.channels, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) if c1_in_channels > 0: self.c1_bottleneck = ConvModule( - c1_in_channels, - c1_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + c1_in_channels, c1_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) else: self.c1_bottleneck = None self.sep_bottleneck = nn.Sequential( DepthwiseSeparableConvModule( - self.channels + c1_channels, - self.channels, - 3, - padding=1, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + self.channels + c1_channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ), DepthwiseSeparableConvModule( - self.channels, - self.channels, - 3, - padding=1, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + self.channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ), + ) def forward(self, inputs): """Forward function.""" x = self._transform_inputs(inputs) - aspp_outs = [ - resize( - self.image_pool(x), - size=x.size()[2:], - mode='bilinear', - align_corners=self.align_corners) - ] + aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] aspp_outs.extend(self.aspp_modules(x)) aspp_outs = torch.cat(aspp_outs, dim=1) output = self.bottleneck(aspp_outs) if self.c1_bottleneck is not None: c1_output = self.c1_bottleneck(inputs[0]) - output = resize( - input=output, - size=c1_output.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + output = resize(input=output, size=c1_output.shape[2:], mode='bilinear', align_corners=self.align_corners) output = torch.cat([output, c1_output], dim=1) output = self.sep_bottleneck(output) output = self.cls_seg(output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py index 35479f2419a3..1df80ae7b4ef 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py @@ -33,14 +33,16 @@ def __init__(self, **kwargs): self.channels, kernel_size=self.kernel_size, padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg) + norm_cfg=self.norm_cfg, + ) for i in range(1, self.num_convs): self.convs[i] = DepthwiseSeparableConvModule( self.channels, self.channels, kernel_size=self.kernel_size, padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg) + norm_cfg=self.norm_cfg, + ) if self.concat_input: self.conv_cat = DepthwiseSeparableConvModule( @@ -48,4 +50,5 @@ def __init__(self, **kwargs): self.channels, kernel_size=self.kernel_size, padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg) + norm_cfg=self.norm_cfg, + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py index ff3fc4f1846c..35148c150fa7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py @@ -21,8 +21,7 @@ class UPerHead(BaseDecodeHead): """ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): - super(UPerHead, self).__init__( - input_transform='multiple_select', **kwargs) + super(UPerHead, self).__init__(input_transform='multiple_select', **kwargs) # PSP Module self.psp_modules = PPM( pool_scales, @@ -31,7 +30,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - align_corners=self.align_corners) + align_corners=self.align_corners, + ) self.bottleneck = ConvModule( self.in_channels[-1] + len(pool_scales) * self.channels, self.channels, @@ -39,7 +39,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) # FPN Module self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() @@ -51,7 +52,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - inplace=False) + inplace=False, + ) fpn_conv = ConvModule( self.channels, self.channels, @@ -60,7 +62,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - inplace=False) + inplace=False, + ) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) @@ -71,7 +74,8 @@ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def psp_forward(self, inputs): """Forward function of PSP module.""" @@ -89,10 +93,7 @@ def forward(self, inputs): inputs = self._transform_inputs(inputs) # build laterals - laterals = [ - lateral_conv(inputs[i]) - for i, lateral_conv in enumerate(self.lateral_convs) - ] + laterals = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] laterals.append(self.psp_forward(inputs)) @@ -100,26 +101,17 @@ def forward(self, inputs): used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += resize( - laterals[i], - size=prev_shape, - mode='bilinear', - align_corners=self.align_corners) + laterals[i - 1] += resize(laterals[i], size=prev_shape, mode='bilinear', align_corners=self.align_corners) # build outputs - fpn_outs = [ - self.fpn_convs[i](laterals[i]) - for i in range(used_backbone_levels - 1) - ] + fpn_outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels - 1)] # append psp feature fpn_outs.append(laterals[-1]) for i in range(used_backbone_levels - 1, 0, -1): fpn_outs[i] = resize( - fpn_outs[i], - size=fpn_outs[0].shape[2:], - mode='bilinear', - align_corners=self.align_corners) + fpn_outs[i], size=fpn_outs[0].shape[2:], mode='bilinear', align_corners=self.align_corners + ) fpn_outs = torch.cat(fpn_outs, dim=1) output = self.fpn_bottleneck(fpn_outs) output = self.cls_seg(output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py index beca72045694..aaf307b3eaa1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py @@ -1,12 +1,19 @@ from .accuracy import Accuracy, accuracy -from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, - cross_entropy, mask_cross_entropy) +from .cross_entropy_loss import CrossEntropyLoss, binary_cross_entropy, cross_entropy, mask_cross_entropy from .dice_loss import DiceLoss from .lovasz_loss import LovaszLoss from .utils import reduce_loss, weight_reduce_loss, weighted_loss __all__ = [ - 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', - 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', - 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss' + 'accuracy', + 'Accuracy', + 'cross_entropy', + 'binary_cross_entropy', + 'mask_cross_entropy', + 'CrossEntropyLoss', + 'reduce_loss', + 'weight_reduce_loss', + 'weighted_loss', + 'LovaszLoss', + 'DiceLoss', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py index c0fd2e7e74a0..85b13399ee70 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py @@ -22,19 +22,18 @@ def accuracy(pred, target, topk=1, thresh=None): """ assert isinstance(topk, (int, tuple)) if isinstance(topk, int): - topk = (topk, ) + topk = (topk,) return_single = True else: return_single = False maxk = max(topk) if pred.size(0) == 0: - accu = [pred.new_tensor(0.) for i in range(len(topk))] + accu = [pred.new_tensor(0.0) for i in range(len(topk))] return accu[0] if return_single else accu assert pred.ndim == target.ndim + 1 assert pred.size(0) == target.size(0) - assert maxk <= pred.size(1), \ - f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + assert maxk <= pred.size(1), f'maxk {maxk} exceeds pred dimension {pred.size(1)}' pred_value, pred_label = pred.topk(maxk, dim=1) # transpose to shape (maxk, N, ...) pred_label = pred_label.transpose(0, 1) @@ -52,7 +51,7 @@ def accuracy(pred, target, topk=1, thresh=None): class Accuracy(nn.Module): """Accuracy calculation module.""" - def __init__(self, topk=(1, ), thresh=None): + def __init__(self, topk=(1,), thresh=None): """Module to calculate the accuracy. Args: diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py index 42c0790c9861..766812eb5221 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py @@ -6,28 +6,16 @@ from .utils import get_class_weight, weight_reduce_loss -def cross_entropy(pred, - label, - weight=None, - class_weight=None, - reduction='mean', - avg_factor=None, - ignore_index=-100): +def cross_entropy(pred, label, weight=None, class_weight=None, reduction='mean', avg_factor=None, ignore_index=-100): """The wrapper function for :func:`F.cross_entropy`""" # class_weight is a manual rescaling weight given to each class. # If given, has to be a Tensor of size C element-wise losses - loss = F.cross_entropy( - pred, - label, - weight=class_weight, - reduction='none', - ignore_index=ignore_index) + loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none', ignore_index=ignore_index) # apply weights and do the reduction if weight is not None: weight = weight.float() - loss = weight_reduce_loss( - loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + loss = weight_reduce_loss(loss, weight=weight, reduction=reduction, avg_factor=avg_factor) return loss @@ -54,13 +42,9 @@ def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): return bin_labels, bin_label_weights -def binary_cross_entropy(pred, - label, - weight=None, - reduction='mean', - avg_factor=None, - class_weight=None, - ignore_index=255): +def binary_cross_entropy( + pred, label, weight=None, reduction='mean', avg_factor=None, class_weight=None, ignore_index=255 +): """Calculate the binary CrossEntropy loss. Args: @@ -78,32 +62,23 @@ def binary_cross_entropy(pred, torch.Tensor: The calculated loss """ if pred.dim() != label.dim(): - assert (pred.dim() == 2 and label.dim() == 1) or ( - pred.dim() == 4 and label.dim() == 3), \ - 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + assert (pred.dim() == 2 and label.dim() == 1) or (pred.dim() == 4 and label.dim() == 3), ( + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' 'H, W], label shape [N, H, W] are supported' - label, weight = _expand_onehot_labels(label, weight, pred.shape, - ignore_index) + ) + label, weight = _expand_onehot_labels(label, weight, pred.shape, ignore_index) # weighted element-wise losses if weight is not None: weight = weight.float() - loss = F.binary_cross_entropy_with_logits( - pred, label.float(), pos_weight=class_weight, reduction='none') + loss = F.binary_cross_entropy_with_logits(pred, label.float(), pos_weight=class_weight, reduction='none') # do the reduction for the weighted loss - loss = weight_reduce_loss( - loss, weight, reduction=reduction, avg_factor=avg_factor) + loss = weight_reduce_loss(loss, weight, reduction=reduction, avg_factor=avg_factor) return loss -def mask_cross_entropy(pred, - target, - label, - reduction='mean', - avg_factor=None, - class_weight=None, - ignore_index=None): +def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None, class_weight=None, ignore_index=None): """Calculate the CrossEntropy loss for masks. Args: @@ -131,8 +106,7 @@ def mask_cross_entropy(pred, num_rois = pred.size()[0] inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) pred_slice = pred[inds, label].squeeze(1) - return F.binary_cross_entropy_with_logits( - pred_slice, target, weight=class_weight, reduction='mean')[None] + return F.binary_cross_entropy_with_logits(pred_slice, target, weight=class_weight, reduction='mean')[None] @LOSSES.register_module() @@ -151,12 +125,7 @@ class CrossEntropyLoss(nn.Module): loss_weight (float, optional): Weight of the loss. Defaults to 1.0. """ - def __init__(self, - use_sigmoid=False, - use_mask=False, - reduction='mean', - class_weight=None, - loss_weight=1.0): + def __init__(self, use_sigmoid=False, use_mask=False, reduction='mean', class_weight=None, loss_weight=1.0): super(CrossEntropyLoss, self).__init__() assert (use_sigmoid is False) or (use_mask is False) self.use_sigmoid = use_sigmoid @@ -172,27 +141,15 @@ def __init__(self, else: self.cls_criterion = cross_entropy - def forward(self, - cls_score, - label, - weight=None, - avg_factor=None, - reduction_override=None, - **kwargs): + def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): """Forward function.""" assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) + reduction = reduction_override if reduction_override else self.reduction if self.class_weight is not None: class_weight = cls_score.new_tensor(self.class_weight) else: class_weight = None loss_cls = self.loss_weight * self.cls_criterion( - cls_score, - label, - weight, - class_weight=class_weight, - reduction=reduction, - avg_factor=avg_factor, - **kwargs) + cls_score, label, weight, class_weight=class_weight, reduction=reduction, avg_factor=avg_factor, **kwargs + ) return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py index 27a77b962d7d..9384e60bd048 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py @@ -9,24 +9,15 @@ @weighted_loss -def dice_loss(pred, - target, - valid_mask, - smooth=1, - exponent=2, - class_weight=None, - ignore_index=255): +def dice_loss(pred, target, valid_mask, smooth=1, exponent=2, class_weight=None, ignore_index=255): assert pred.shape[0] == target.shape[0] total_loss = 0 num_classes = pred.shape[1] for i in range(num_classes): if i != ignore_index: dice_loss = binary_dice_loss( - pred[:, i], - target[..., i], - valid_mask=valid_mask, - smooth=smooth, - exponent=exponent) + pred[:, i], target[..., i], valid_mask=valid_mask, smooth=smooth, exponent=exponent + ) if class_weight is not None: dice_loss *= class_weight[i] total_loss += dice_loss @@ -69,14 +60,9 @@ class DiceLoss(nn.Module): ignore_index (int | None): The label index to be ignored. Default: 255. """ - def __init__(self, - smooth=1, - exponent=2, - reduction='mean', - class_weight=None, - loss_weight=1.0, - ignore_index=255, - **kwards): + def __init__( + self, smooth=1, exponent=2, reduction='mean', class_weight=None, loss_weight=1.0, ignore_index=255, **kwards + ): super(DiceLoss, self).__init__() self.smooth = smooth self.exponent = exponent @@ -85,15 +71,9 @@ def __init__(self, self.loss_weight = loss_weight self.ignore_index = ignore_index - def forward(self, - pred, - target, - avg_factor=None, - reduction_override=None, - **kwards): + def forward(self, pred, target, avg_factor=None, reduction_override=None, **kwards): assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) + reduction = reduction_override if reduction_override else self.reduction if self.class_weight is not None: class_weight = pred.new_tensor(self.class_weight) else: @@ -101,9 +81,7 @@ def forward(self, pred = F.softmax(pred, dim=1) num_classes = pred.shape[1] - one_hot_target = F.one_hot( - torch.clamp(target.long(), 0, num_classes - 1), - num_classes=num_classes) + one_hot_target = F.one_hot(torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes) valid_mask = (target != self.ignore_index).long() loss = self.loss_weight * dice_loss( @@ -115,5 +93,6 @@ def forward(self, smooth=self.smooth, exponent=self.exponent, class_weight=class_weight, - ignore_index=self.ignore_index) + ignore_index=self.ignore_index, + ) return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py index fb3b313b61e4..e1c049874490 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py @@ -2,11 +2,12 @@ ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)""" -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import torch import torch.nn as nn import torch.nn.functional as F +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + from ..builder import LOSSES from .utils import get_class_weight, weight_reduce_loss @@ -20,7 +21,7 @@ def lovasz_grad(gt_sorted): gts = gt_sorted.sum() intersection = gts - gt_sorted.float().cumsum(0) union = gts + (1 - gt_sorted).float().cumsum(0) - jaccard = 1. - intersection / union + jaccard = 1.0 - intersection / union if p > 1: # cover 1-pixel case jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] return jaccard @@ -33,7 +34,7 @@ def flatten_binary_logits(logits, labels, ignore_index=None): labels = labels.view(-1) if ignore_index is None: return logits, labels - valid = (labels != ignore_index) + valid = labels != ignore_index vlogits = logits[valid] vlabels = labels[valid] return vlogits, vlabels @@ -50,7 +51,7 @@ def flatten_probs(probs, labels, ignore_index=None): labels = labels.view(-1) if ignore_index is None: return probs, labels - valid = (labels != ignore_index) + valid = labels != ignore_index vprobs = probs[valid.nonzero().squeeze()] vlabels = labels[valid] return vprobs, vlabels @@ -69,9 +70,9 @@ def lovasz_hinge_flat(logits, labels): """ if len(labels) == 0: # only void pixels, the gradients should be 0 - return logits.sum() * 0. - signs = 2. * labels.float() - 1. - errors = (1. - logits * signs) + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * signs errors_sorted, perm = torch.sort(errors, dim=0, descending=True) perm = perm.data gt_sorted = labels[perm] @@ -80,14 +81,16 @@ def lovasz_hinge_flat(logits, labels): return loss -def lovasz_hinge(logits, - labels, - classes='present', - per_image=False, - class_weight=None, - reduction='mean', - avg_factor=None, - ignore_index=255): +def lovasz_hinge( + logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255, +): """Binary Lovasz hinge loss. Args: @@ -113,15 +116,12 @@ def lovasz_hinge(logits, """ if per_image: loss = [ - lovasz_hinge_flat(*flatten_binary_logits( - logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + lovasz_hinge_flat(*flatten_binary_logits(logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) for logit, label in zip(logits, labels) ] - loss = weight_reduce_loss( - torch.stack(loss), None, reduction, avg_factor) + loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) else: - loss = lovasz_hinge_flat( - *flatten_binary_logits(logits, labels, ignore_index)) + loss = lovasz_hinge_flat(*flatten_binary_logits(logits, labels, ignore_index)) return loss @@ -143,13 +143,13 @@ def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): """ if probs.numel() == 0: # only void pixels, the gradients should be 0 - return probs * 0. + return probs * 0.0 C = probs.size(1) losses = [] class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes for c in class_to_sum: fg = (labels == c).float() # foreground for class c - if (classes == 'present' and fg.sum() == 0): + if classes == 'present' and fg.sum() == 0: continue if C == 1: if len(classes) > 1: @@ -168,14 +168,16 @@ def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): return torch.stack(losses).mean() -def lovasz_softmax(probs, - labels, - classes='present', - per_image=False, - class_weight=None, - reduction='mean', - avg_factor=None, - ignore_index=255): +def lovasz_softmax( + probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255, +): """Multi-class Lovasz-Softmax loss. Args: @@ -205,19 +207,17 @@ def lovasz_softmax(probs, if per_image: loss = [ lovasz_softmax_flat( - *flatten_probs( - prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + *flatten_probs(prob.unsqueeze(0), label.unsqueeze(0), ignore_index), classes=classes, - class_weight=class_weight) + class_weight=class_weight + ) for prob, label in zip(probs, labels) ] - loss = weight_reduce_loss( - torch.stack(loss), None, reduction, avg_factor) + loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) else: loss = lovasz_softmax_flat( - *flatten_probs(probs, labels, ignore_index), - classes=classes, - class_weight=class_weight) + *flatten_probs(probs, labels, ignore_index), classes=classes, class_weight=class_weight + ) return loss @@ -245,15 +245,20 @@ class LovaszLoss(nn.Module): loss_weight (float, optional): Weight of the loss. Defaults to 1.0. """ - def __init__(self, - loss_type='multi_class', - classes='present', - per_image=False, - reduction='mean', - class_weight=None, - loss_weight=1.0): + def __init__( + self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ): super(LovaszLoss, self).__init__() - assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + assert loss_type in ( + 'binary', + 'multi_class', + ), "loss_type should be \ 'binary' or 'multi_class'." if loss_type == 'binary': @@ -262,7 +267,9 @@ def __init__(self, self.cls_criterion = lovasz_softmax assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) if not per_image: - assert reduction == 'none', "reduction should be 'none' when \ + assert ( + reduction == 'none' + ), "reduction should be 'none' when \ per_image is False." self.classes = classes @@ -271,17 +278,10 @@ def __init__(self, self.loss_weight = loss_weight self.class_weight = get_class_weight(class_weight) - def forward(self, - cls_score, - label, - weight=None, - avg_factor=None, - reduction_override=None, - **kwargs): + def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): """Forward function.""" assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) + reduction = reduction_override if reduction_override else self.reduction if self.class_weight is not None: class_weight = cls_score.new_tensor(self.class_weight) else: @@ -299,5 +299,6 @@ def forward(self, class_weight=class_weight, reduction=reduction, avg_factor=avg_factor, - **kwargs) + **kwargs + ) return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py index d9801a4195c9..e1719c276160 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py @@ -1,9 +1,10 @@ import functools -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np import torch.nn.functional as F +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + def get_class_weight(class_weight): """Get class weight for loss function. @@ -107,12 +108,7 @@ def weighted_loss(loss_func): """ @functools.wraps(loss_func) - def wrapper(pred, - target, - weight=None, - reduction='mean', - avg_factor=None, - **kwargs): + def wrapper(pred, target, weight=None, reduction='mean', avg_factor=None, **kwargs): # get element-wise loss loss = loss_func(pred, target, **kwargs) loss = weight_reduce_loss(loss, weight, reduction, avg_factor) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py index fa5c836de534..fd5ca4c087b4 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py @@ -60,20 +60,22 @@ class FPN(nn.Module): outputs[3].shape = torch.Size([1, 11, 43, 43]) """ - def __init__(self, - in_channels, - out_channels, - num_outs, - start_level=0, - end_level=-1, - add_extra_convs=False, - extra_convs_on_inputs=False, - relu_before_extra_convs=False, - no_norm_on_lateral=False, - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - upsample_cfg=dict(mode='nearest')): + def __init__( + self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest'), + ): super(FPN, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels @@ -119,7 +121,8 @@ def __init__(self, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, - inplace=False) + inplace=False, + ) fpn_conv = ConvModule( out_channels, out_channels, @@ -128,7 +131,8 @@ def __init__(self, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, - inplace=False) + inplace=False, + ) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) @@ -150,7 +154,8 @@ def __init__(self, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, - inplace=False) + inplace=False, + ) self.fpn_convs.append(extra_fpn_conv) # default init_weights for conv(msra) and norm in ConvModule @@ -163,10 +168,7 @@ def forward(self, inputs): assert len(inputs) == len(self.in_channels) # build laterals - laterals = [ - lateral_conv(inputs[i + self.start_level]) - for i, lateral_conv in enumerate(self.lateral_convs) - ] + laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)] # build top-down path used_backbone_levels = len(laterals) @@ -174,18 +176,14 @@ def forward(self, inputs): # In some cases, fixing `scale factor` (e.g. 2) is preferred, but # it cannot co-exist with `size` in `F.interpolate`. if 'scale_factor' in self.upsample_cfg: - laterals[i - 1] += F.interpolate(laterals[i], - **self.upsample_cfg) + laterals[i - 1] += F.interpolate(laterals[i], **self.upsample_cfg) else: prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += F.interpolate( - laterals[i], size=prev_shape, **self.upsample_cfg) + laterals[i - 1] += F.interpolate(laterals[i], size=prev_shape, **self.upsample_cfg) # build outputs # part 1: from original levels - outs = [ - self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) - ] + outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)] # part 2: add extra levels if self.num_outs > len(outs): # use max pool to get more levels on top of outputs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py index a61e2b968d72..395bcd5ed655 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py @@ -19,12 +19,7 @@ class MultiLevelNeck(nn.Module): Default: None. """ - def __init__(self, - in_channels, - out_channels, - scales=[0.5, 1, 2, 4], - norm_cfg=None, - act_cfg=None): + def __init__(self, in_channels, out_channels, scales=[0.5, 1, 2, 4], norm_cfg=None, act_cfg=None): super(MultiLevelNeck, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels @@ -35,36 +30,24 @@ def __init__(self, self.convs = nn.ModuleList() for in_channel in in_channels: self.lateral_convs.append( - ConvModule( - in_channel, - out_channels, - kernel_size=1, - norm_cfg=norm_cfg, - act_cfg=act_cfg)) + ConvModule(in_channel, out_channels, kernel_size=1, norm_cfg=norm_cfg, act_cfg=act_cfg) + ) for _ in range(self.num_outs): self.convs.append( ConvModule( - out_channels, - out_channels, - kernel_size=3, - padding=1, - stride=1, - norm_cfg=norm_cfg, - act_cfg=act_cfg)) + out_channels, out_channels, kernel_size=3, padding=1, stride=1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) + ) def forward(self, inputs): assert len(inputs) == len(self.in_channels) print(inputs[0].shape) - inputs = [ - lateral_conv(inputs[i]) - for i, lateral_conv in enumerate(self.lateral_convs) - ] + inputs = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] # for len(inputs) not equal to self.num_outs if len(inputs) == 1: inputs = [inputs[0] for _ in range(self.num_outs)] outs = [] for i in range(self.num_outs): - x_resize = F.interpolate( - inputs[i], scale_factor=self.scales[i], mode='bilinear') + x_resize = F.interpolate(inputs[i], scale_factor=self.scales[i], mode='bilinear') outs.append(self.convs[i](x_resize)) return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py index bf7d2829d6ea..bb1eb40a38d7 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py @@ -3,11 +3,12 @@ from abc import ABCMeta, abstractmethod from collections import OrderedDict -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import numpy as np import torch import torch.distributed as dist import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16 @@ -28,8 +29,7 @@ def with_neck(self): @property def with_auxiliary_head(self): """bool: whether the segmentor has auxiliary head""" - return hasattr(self, - 'auxiliary_head') and self.auxiliary_head is not None + return hasattr(self, 'auxiliary_head') and self.auxiliary_head is not None @property def with_decode_head(self): @@ -85,13 +85,11 @@ def forward_test(self, imgs, img_metas, **kwargs): """ for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: if not isinstance(var, list): - raise TypeError(f'{name} must be a list, but got ' - f'{type(var)}') + raise TypeError(f'{name} must be a list, but got ' f'{type(var)}') num_augs = len(imgs) if num_augs != len(img_metas): - raise ValueError(f'num of augmentations ({len(imgs)}) != ' - f'num of image meta ({len(img_metas)})') + raise ValueError(f'num of augmentations ({len(imgs)}) != ' f'num of image meta ({len(img_metas)})') # all images in the same aug batch all of the same ori_shape and pad # shape for img_meta in img_metas: @@ -107,7 +105,7 @@ def forward_test(self, imgs, img_metas, **kwargs): else: return self.aug_test(imgs, img_metas, **kwargs) - @auto_fp16(apply_to=('img', )) + @auto_fp16(apply_to=('img',)) def forward(self, img, img_metas, return_loss=True, **kwargs): """Calls either :func:`forward_train` or :func:`forward_test` depending on whether ``return_loss`` is ``True``. @@ -152,10 +150,7 @@ def train_step(self, data_batch, optimizer, **kwargs): losses = self(**data_batch) loss, log_vars = self._parse_losses(losses) - outputs = dict( - loss=loss, - log_vars=log_vars, - num_samples=len(data_batch['img_metas'])) + outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data_batch['img_metas'])) return outputs @@ -189,11 +184,9 @@ def _parse_losses(losses): elif isinstance(loss_value, list): log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) else: - raise TypeError( - f'{loss_name} is not a tensor or list of tensors') + raise TypeError(f'{loss_name} is not a tensor or list of tensors') - loss = sum(_value for _key, _value in log_vars.items() - if 'loss' in _key) + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) log_vars['loss'] = loss for loss_name, loss_value in log_vars.items(): @@ -205,15 +198,7 @@ def _parse_losses(losses): return loss, log_vars - def show_result(self, - img, - result, - palette=None, - win_name='', - show=False, - wait_time=0, - out_file=None, - opacity=0.5): + def show_result(self, img, result, palette=None, win_name='', show=False, wait_time=0, out_file=None, opacity=0.5): """Draw `result` over `img`. Args: @@ -241,8 +226,7 @@ def show_result(self, seg = result[0] if palette is None: if self.PALETTE is None: - palette = np.random.randint( - 0, 255, size=(len(self.CLASSES), 3)) + palette = np.random.randint(0, 255, size=(len(self.CLASSES), 3)) else: palette = self.PALETTE palette = np.array(palette) @@ -268,6 +252,5 @@ def show_result(self, mmcv.imwrite(img, out_file) if not (show or out_file): - warnings.warn('show==False and out_file is not specified, only ' - 'result image will be returned') + warnings.warn('show==False and out_file is not specified, only ' 'result image will be returned') return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py index 0de6d2991660..2f53cfb9e41c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -16,15 +16,17 @@ class CascadeEncoderDecoder(EncoderDecoder): will be the input of next decoder_head. """ - def __init__(self, - num_stages, - backbone, - decode_head, - neck=None, - auxiliary_head=None, - train_cfg=None, - test_cfg=None, - pretrained=None): + def __init__( + self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None, + ): self.num_stages = num_stages super(CascadeEncoderDecoder, self).__init__( backbone=backbone, @@ -33,7 +35,8 @@ def __init__(self, auxiliary_head=auxiliary_head, train_cfg=train_cfg, test_cfg=test_cfg, - pretrained=pretrained) + pretrained=pretrained, + ) def _init_decode_head(self, decode_head): """Initialize ``decode_head``""" @@ -68,13 +71,8 @@ def encode_decode(self, img, img_metas): x = self.extract_feat(img) out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) for i in range(1, self.num_stages): - out = self.decode_head[i].forward_test(x, out, img_metas, - self.test_cfg) - out = resize( - input=out, - size=img.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + out = self.decode_head[i].forward_test(x, out, img_metas, self.test_cfg) + out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) return out def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): @@ -82,17 +80,16 @@ def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): training.""" losses = dict() - loss_decode = self.decode_head[0].forward_train( - x, img_metas, gt_semantic_seg, self.train_cfg) + loss_decode = self.decode_head[0].forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) losses.update(add_prefix(loss_decode, 'decode_0')) for i in range(1, self.num_stages): # forward test again, maybe unnecessary for most methods. - prev_outputs = self.decode_head[i - 1].forward_test( - x, img_metas, self.test_cfg) + prev_outputs = self.decode_head[i - 1].forward_test(x, img_metas, self.test_cfg) loss_decode = self.decode_head[i].forward_train( - x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg + ) losses.update(add_prefix(loss_decode, f'decode_{i}')) return losses diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py index ca573d7af79e..198bf34de1e8 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py @@ -18,14 +18,9 @@ class EncoderDecoder(BaseSegmentor): which could be dumped during inference. """ - def __init__(self, - backbone, - decode_head, - neck=None, - auxiliary_head=None, - train_cfg=None, - test_cfg=None, - pretrained=None): + def __init__( + self, backbone, decode_head, neck=None, auxiliary_head=None, train_cfg=None, test_cfg=None, pretrained=None + ): super(EncoderDecoder, self).__init__() self.backbone = builder.build_backbone(backbone) if neck is not None: @@ -86,20 +81,14 @@ def encode_decode(self, img, img_metas): map of the same size as input.""" x = self.extract_feat(img) out = self._decode_head_forward_test(x, img_metas) - out = resize( - input=out, - size=img.shape[2:], - mode='bilinear', - align_corners=self.align_corners) + out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) return out def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): """Run forward function and calculate loss for decode head in training.""" losses = dict() - loss_decode = self.decode_head.forward_train(x, img_metas, - gt_semantic_seg, - self.train_cfg) + loss_decode = self.decode_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) losses.update(add_prefix(loss_decode, 'decode')) return losses @@ -116,13 +105,10 @@ def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): losses = dict() if isinstance(self.auxiliary_head, nn.ModuleList): for idx, aux_head in enumerate(self.auxiliary_head): - loss_aux = aux_head.forward_train(x, img_metas, - gt_semantic_seg, - self.train_cfg) + loss_aux = aux_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) losses.update(add_prefix(loss_aux, f'aux_{idx}')) else: - loss_aux = self.auxiliary_head.forward_train( - x, img_metas, gt_semantic_seg, self.train_cfg) + loss_aux = self.auxiliary_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) losses.update(add_prefix(loss_aux, 'aux')) return losses @@ -154,13 +140,11 @@ def forward_train(self, img, img_metas, gt_semantic_seg): losses = dict() - loss_decode = self._decode_head_forward_train(x, img_metas, - gt_semantic_seg) + loss_decode = self._decode_head_forward_train(x, img_metas, gt_semantic_seg) losses.update(loss_decode) if self.with_auxiliary_head: - loss_aux = self._auxiliary_head_forward_train( - x, img_metas, gt_semantic_seg) + loss_aux = self._auxiliary_head_forward_train(x, img_metas, gt_semantic_seg) losses.update(loss_aux) return losses @@ -191,16 +175,13 @@ def slide_inference(self, img, img_meta, rescale): x1 = max(x2 - w_crop, 0) crop_img = img[:, :, y1:y2, x1:x2] crop_seg_logit = self.encode_decode(crop_img, img_meta) - preds += F.pad(crop_seg_logit, - (int(x1), int(preds.shape[3] - x2), int(y1), - int(preds.shape[2] - y2))) + preds += F.pad(crop_seg_logit, (int(x1), int(preds.shape[3] - x2), int(y1), int(preds.shape[2] - y2))) count_mat[:, :, y1:y2, x1:x2] += 1 assert (count_mat == 0).sum() == 0 if torch.onnx.is_in_onnx_export(): # cast count_mat to constant while exporting to ONNX - count_mat = torch.from_numpy( - count_mat.cpu().detach().numpy()).to(device=img.device) + count_mat = torch.from_numpy(count_mat.cpu().detach().numpy()).to(device=img.device) preds = preds / count_mat if rescale: preds = resize( @@ -208,7 +189,8 @@ def slide_inference(self, img, img_meta, rescale): size=img_meta[0]['ori_shape'][:2], mode='bilinear', align_corners=self.align_corners, - warning=False) + warning=False, + ) return preds def whole_inference(self, img, img_meta, rescale): @@ -221,12 +203,7 @@ def whole_inference(self, img, img_meta, rescale): size = img.shape[2:] else: size = img_meta[0]['ori_shape'][:2] - seg_logit = resize( - seg_logit, - size=size, - mode='bilinear', - align_corners=self.align_corners, - warning=False) + seg_logit = resize(seg_logit, size=size, mode='bilinear', align_corners=self.align_corners, warning=False) return seg_logit @@ -259,9 +236,9 @@ def inference(self, img, img_meta, rescale): flip_direction = img_meta[0]['flip_direction'] assert flip_direction in ['horizontal', 'vertical'] if flip_direction == 'horizontal': - output = output.flip(dims=(3, )) + output = output.flip(dims=(3,)) elif flip_direction == 'vertical': - output = output.flip(dims=(2, )) + output = output.flip(dims=(2,)) return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py index 3d3bdd349b9f..be7a5bd7a676 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py @@ -8,6 +8,13 @@ from .weight_init import trunc_normal_ __all__ = [ - 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', - 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_' + 'ResLayer', + 'SelfAttentionBlock', + 'make_divisible', + 'InvertedResidual', + 'UpConvBlock', + 'InvertedResidualV3', + 'SELayer', + 'DropPath', + 'trunc_normal_', ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py index 4520b0ff407d..cf9492f1c324 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py @@ -14,18 +14,16 @@ class DropPath(nn.Module): to be between 0 and 1. Default: 0. """ - def __init__(self, drop_prob=0.): + def __init__(self, drop_prob=0.0): super(DropPath, self).__init__() self.drop_prob = drop_prob self.keep_prob = 1 - drop_prob def forward(self, x): - if self.drop_prob == 0. or not self.training: + if self.drop_prob == 0.0 or not self.training: return x - shape = (x.shape[0], ) + (1, ) * ( - x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets - random_tensor = self.keep_prob + torch.rand( - shape, dtype=x.dtype, device=x.device) + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = self.keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(self.keep_prob) * random_tensor return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py index e9390b6b09f3..6c2262f7922c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py @@ -1,7 +1,8 @@ -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule from torch import nn from torch.utils import checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + from .se_layer import SELayer @@ -28,20 +29,21 @@ class InvertedResidual(nn.Module): Tensor: The output tensor. """ - def __init__(self, - in_channels, - out_channels, - stride, - expand_ratio, - dilation=1, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU6'), - with_cp=False): + def __init__( + self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False, + ): super(InvertedResidual, self).__init__() self.stride = stride - assert stride in [1, 2], f'stride must in [1, 2]. ' \ - f'But received {stride}.' + assert stride in [1, 2], f'stride must in [1, 2]. ' f'But received {stride}.' self.with_cp = with_cp self.use_res_connect = self.stride == 1 and in_channels == out_channels hidden_dim = int(round(in_channels * expand_ratio)) @@ -55,31 +57,36 @@ def __init__(self, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) - layers.extend([ - ConvModule( - in_channels=hidden_dim, - out_channels=hidden_dim, - kernel_size=3, - stride=stride, - padding=dilation, - dilation=dilation, - groups=hidden_dim, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg), - ConvModule( - in_channels=hidden_dim, - out_channels=out_channels, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) - ]) + act_cfg=act_cfg, + ) + ) + layers.extend( + [ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + ), + ] + ) self.conv = nn.Sequential(*layers) def forward(self, x): - def _inner_forward(x): if self.use_res_connect: return x + self.conv(x) @@ -121,20 +128,22 @@ class InvertedResidualV3(nn.Module): Tensor: The output tensor. """ - def __init__(self, - in_channels, - out_channels, - mid_channels, - kernel_size=3, - stride=1, - se_cfg=None, - with_expand_conv=True, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - with_cp=False): + def __init__( + self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False, + ): super(InvertedResidualV3, self).__init__() - self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + self.with_res_shortcut = stride == 1 and in_channels == out_channels assert stride in [1, 2] self.with_cp = with_cp self.with_se = se_cfg is not None @@ -154,7 +163,8 @@ def __init__(self, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.depthwise_conv = ConvModule( in_channels=mid_channels, out_channels=mid_channels, @@ -162,10 +172,10 @@ def __init__(self, stride=stride, padding=kernel_size // 2, groups=mid_channels, - conv_cfg=dict( - type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + conv_cfg=dict(type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) if self.with_se: self.se = SELayer(**se_cfg) @@ -178,10 +188,10 @@ def __init__(self, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=None) + act_cfg=None, + ) def forward(self, x): - def _inner_forward(x): out = x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py index d094f15783dd..370d078863f1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py @@ -1,6 +1,7 @@ -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer from torch import nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + class ResLayer(nn.Sequential): """ResLayer to build ResNet style backbone. @@ -23,19 +24,21 @@ class ResLayer(nn.Sequential): Default: False """ - def __init__(self, - block, - inplanes, - planes, - num_blocks, - stride=1, - dilation=1, - avg_down=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - multi_grid=None, - contract_dilation=False, - **kwargs): + def __init__( + self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs + ): self.block = block downsample = None @@ -45,21 +48,16 @@ def __init__(self, if avg_down: conv_stride = 1 downsample.append( - nn.AvgPool2d( - kernel_size=stride, - stride=stride, - ceil_mode=True, - count_include_pad=False)) - downsample.extend([ - build_conv_layer( - conv_cfg, - inplanes, - planes * block.expansion, - kernel_size=1, - stride=conv_stride, - bias=False), - build_norm_layer(norm_cfg, planes * block.expansion)[1] - ]) + nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) + ) + downsample.extend( + [ + build_conv_layer( + conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False + ), + build_norm_layer(norm_cfg, planes * block.expansion)[1], + ] + ) downsample = nn.Sequential(*downsample) layers = [] @@ -79,7 +77,9 @@ def __init__(self, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - **kwargs)) + **kwargs + ) + ) inplanes = planes * block.expansion for i in range(1, num_blocks): layers.append( @@ -90,5 +90,7 @@ def __init__(self, dilation=dilation if multi_grid is None else multi_grid[i], conv_cfg=conv_cfg, norm_cfg=norm_cfg, - **kwargs)) + **kwargs + ) + ) super(ResLayer, self).__init__(*layers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py index a3836ed3adda..b00aaeade295 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py @@ -1,5 +1,6 @@ -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule from .make_divisible import make_divisible @@ -23,12 +24,13 @@ class SELayer(nn.Module): divisor=6.0)). """ - def __init__(self, - channels, - ratio=16, - conv_cfg=None, - act_cfg=(dict(type='ReLU'), - dict(type='HSigmoid', bias=3.0, divisor=6.0))): + def __init__( + self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), + ): super(SELayer, self).__init__() if isinstance(act_cfg, dict): act_cfg = (act_cfg, act_cfg) @@ -41,14 +43,16 @@ def __init__(self, kernel_size=1, stride=1, conv_cfg=conv_cfg, - act_cfg=act_cfg[0]) + act_cfg=act_cfg[0], + ) self.conv2 = ConvModule( in_channels=make_divisible(channels // ratio, 8), out_channels=channels, kernel_size=1, stride=1, conv_cfg=conv_cfg, - act_cfg=act_cfg[1]) + act_cfg=act_cfg[1], + ) def forward(self, x): out = self.global_avgpool(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py index 4c6d4da3cbf8..52f37c728381 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py @@ -1,8 +1,9 @@ import torch -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init from torch import nn as nn from torch.nn import functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init + class SelfAttentionBlock(nn.Module): """General self-attention block/non-local block. @@ -29,11 +30,25 @@ class SelfAttentionBlock(nn.Module): act_cfg (dict|None): Config of activation layers. """ - def __init__(self, key_in_channels, query_in_channels, channels, - out_channels, share_key_query, query_downsample, - key_downsample, key_query_num_convs, value_out_num_convs, - key_query_norm, value_out_norm, matmul_norm, with_out, - conv_cfg, norm_cfg, act_cfg): + def __init__( + self, + key_in_channels, + query_in_channels, + channels, + out_channels, + share_key_query, + query_downsample, + key_downsample, + key_query_num_convs, + value_out_num_convs, + key_query_norm, + value_out_norm, + matmul_norm, + with_out, + conv_cfg, + norm_cfg, + act_cfg, + ): super(SelfAttentionBlock, self).__init__() if share_key_query: assert key_in_channels == query_in_channels @@ -52,7 +67,8 @@ def __init__(self, key_in_channels, query_in_channels, channels, use_conv_module=key_query_norm, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) if share_key_query: self.query_project = self.key_project else: @@ -63,7 +79,8 @@ def __init__(self, key_in_channels, query_in_channels, channels, use_conv_module=key_query_norm, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.value_project = self.build_project( key_in_channels, channels if with_out else out_channels, @@ -71,7 +88,8 @@ def __init__(self, key_in_channels, query_in_channels, channels, use_conv_module=value_out_norm, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) if with_out: self.out_project = self.build_project( channels, @@ -80,7 +98,8 @@ def __init__(self, key_in_channels, query_in_channels, channels, use_conv_module=value_out_norm, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: self.out_project = None @@ -96,28 +115,12 @@ def init_weights(self): if not isinstance(self.out_project, ConvModule): constant_init(self.out_project, 0) - def build_project(self, in_channels, channels, num_convs, use_conv_module, - conv_cfg, norm_cfg, act_cfg): + def build_project(self, in_channels, channels, num_convs, use_conv_module, conv_cfg, norm_cfg, act_cfg): """Build projection layer for key/query/value/out.""" if use_conv_module: - convs = [ - ConvModule( - in_channels, - channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg) - ] + convs = [ConvModule(in_channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)] for _ in range(num_convs - 1): - convs.append( - ConvModule( - channels, - channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg)) + convs.append(ConvModule(channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)) else: convs = [nn.Conv2d(in_channels, channels, 1)] for _ in range(num_convs - 1): @@ -148,7 +151,7 @@ def forward(self, query_feats, key_feats): sim_map = torch.matmul(query, key) if self.matmul_norm: - sim_map = (self.channels**-.5) * sim_map + sim_map = (self.channels ** -0.5) * sim_map sim_map = F.softmax(sim_map, dim=-1) context = torch.matmul(sim_map, value) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py index 7d236018920a..8558925074e1 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py @@ -41,21 +41,23 @@ class UpConvBlock(nn.Module): plugins (dict): plugins for convolutional layers. Default: None. """ - def __init__(self, - conv_block, - in_channels, - skip_channels, - out_channels, - num_convs=2, - stride=1, - dilation=1, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - dcn=None, - plugins=None): + def __init__( + self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None, + ): super(UpConvBlock, self).__init__() assert dcn is None, 'Not implemented yet.' assert plugins is None, 'Not implemented yet.' @@ -71,7 +73,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, dcn=None, - plugins=None) + plugins=None, + ) if upsample_cfg is not None: self.upsample = build_upsample_layer( cfg=upsample_cfg, @@ -79,7 +82,8 @@ def __init__(self, out_channels=skip_channels, with_cp=with_cp, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: self.upsample = ConvModule( in_channels, @@ -89,7 +93,8 @@ def __init__(self, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, skip, x): """Forward function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py index 38141ba3d61f..fc3419e9a74c 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py @@ -13,13 +13,14 @@ def _no_grad_trunc_normal_(tensor, mean, std, a, b): def norm_cdf(x): # Computes standard normal cumulative distribution function - return (1. + math.erf(x / math.sqrt(2.))) / 2. + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 if (mean < a - 2 * std) or (mean > b + 2 * std): warnings.warn( 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' 'The distribution of values may be incorrect.', - stacklevel=2) + stacklevel=2, + ) with torch.no_grad(): # Values are generated by using a truncated uniform distribution and @@ -37,7 +38,7 @@ def norm_cdf(x): tensor.erfinv_() # Transform to proper mean, std - tensor.mul_(std * math.sqrt(2.)) + tensor.mul_(std * math.sqrt(2.0)) tensor.add_(mean) # Clamp to ensure it's in the proper range @@ -45,7 +46,7 @@ def norm_cdf(x): return tensor -def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): +def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): r"""Fills the input Tensor with values drawn from a truncated normal distribution. The values are effectively drawn from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py index 7eb3629a6426..ea4a06ba9297 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py @@ -18,28 +18,23 @@ def __init__(self, channels, num_codes): super(Encoding, self).__init__() # init codewords and smoothing factor self.channels, self.num_codes = channels, num_codes - std = 1. / ((num_codes * channels)**0.5) + std = 1.0 / ((num_codes * channels) ** 0.5) # [num_codes, channels] self.codewords = nn.Parameter( - torch.empty(num_codes, channels, - dtype=torch.float).uniform_(-std, std), - requires_grad=True) + torch.empty(num_codes, channels, dtype=torch.float).uniform_(-std, std), requires_grad=True + ) # [num_codes] - self.scale = nn.Parameter( - torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), - requires_grad=True) + self.scale = nn.Parameter(torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), requires_grad=True) @staticmethod def scaled_l2(x, codewords, scale): num_codes, channels = codewords.size() batch_size = x.size(0) reshaped_scale = scale.view((1, 1, num_codes)) - expanded_x = x.unsqueeze(2).expand( - (batch_size, x.size(1), num_codes, channels)) + expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) reshaped_codewords = codewords.view((1, 1, num_codes, channels)) - scaled_l2_norm = reshaped_scale * ( - expanded_x - reshaped_codewords).pow(2).sum(dim=3) + scaled_l2_norm = reshaped_scale * (expanded_x - reshaped_codewords).pow(2).sum(dim=3) return scaled_l2_norm @staticmethod @@ -48,10 +43,8 @@ def aggregate(assignment_weights, x, codewords): reshaped_codewords = codewords.view((1, 1, num_codes, channels)) batch_size = x.size(0) - expanded_x = x.unsqueeze(2).expand( - (batch_size, x.size(1), num_codes, channels)) - encoded_feat = (assignment_weights.unsqueeze(3) * - (expanded_x - reshaped_codewords)).sum(dim=1) + expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * (expanded_x - reshaped_codewords)).sum(dim=1) return encoded_feat def forward(self, x): @@ -61,14 +54,12 @@ def forward(self, x): # [batch_size, height x width, channels] x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() # assignment_weights: [batch_size, channels, num_codes] - assignment_weights = F.softmax( - self.scaled_l2(x, self.codewords, self.scale), dim=2) + assignment_weights = F.softmax(self.scaled_l2(x, self.codewords, self.scale), dim=2) # aggregate encoded_feat = self.aggregate(assignment_weights, x, self.codewords) return encoded_feat def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ - f'x{self.channels})' + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' f'x{self.channels})' return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py index 0ed9a0cb8d7c..d366cf693b49 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py @@ -4,35 +4,28 @@ import torch.nn.functional as F -def resize(input, - size=None, - scale_factor=None, - mode='nearest', - align_corners=None, - warning=True): +def resize(input, size=None, scale_factor=None, mode='nearest', align_corners=None, warning=True): if warning: if size is not None and align_corners: input_h, input_w = tuple(int(x) for x in input.shape[2:]) output_h, output_w = tuple(int(x) for x in size) if output_h > input_h or output_w > output_h: - if ((output_h > 1 and output_w > 1 and input_h > 1 - and input_w > 1) and (output_h - 1) % (input_h - 1) - and (output_w - 1) % (input_w - 1)): + if ( + (output_h > 1 and output_w > 1 and input_h > 1 and input_w > 1) + and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1) + ): warnings.warn( f'When align_corners={align_corners}, ' 'the output would more aligned if ' f'input size {(input_h, input_w)} is `x+1` and ' - f'out size {(output_h, output_w)} is `nx+1`') + f'out size {(output_h, output_w)} is `nx+1`' + ) return F.interpolate(input, size, scale_factor, mode, align_corners) class Upsample(nn.Module): - - def __init__(self, - size=None, - scale_factor=None, - mode='nearest', - align_corners=None): + def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None): super(Upsample, self).__init__() self.size = size if isinstance(scale_factor, tuple): diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py index 7d4c191368c2..8ad826babee2 100644 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py @@ -1,8 +1,7 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmseg as mmseg from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import collect_env as collect_base_env from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_git_hash -import nemo.collections.multimodal.models.controlnet.uniformer.mmseg as mmseg - def collect_env(): """Collect the information of the running environments.""" diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py index d410985870cd..72087d66b917 100644 --- a/nemo/collections/multimodal/models/controlnet/util.py +++ b/nemo/collections/multimodal/models/controlnet/util.py @@ -8,10 +8,20 @@ from pytorch_lightning.utilities.distributed import rank_zero_only from nemo.collections.multimodal.models.controlnet.uniformer import UniformerDetector + class ImageLogger(Callback): - def __init__(self, batch_frequency=2000, max_images=4, clamp=True, increase_log_steps=True, - rescale=True, disabled=False, log_on_batch_idx=False, log_first_step=False, - log_images_kwargs=None): + def __init__( + self, + batch_frequency=2000, + max_images=4, + clamp=True, + increase_log_steps=True, + rescale=True, + disabled=False, + log_on_batch_idx=False, + log_first_step=False, + log_images_kwargs=None, + ): super().__init__() self.rescale = rescale self.batch_freq = batch_frequency @@ -43,10 +53,12 @@ def log_local(self, save_dir, split, images, global_step, current_epoch, batch_i def log_img(self, pl_module, batch, batch_idx, split="train"): check_idx = batch_idx # if self.log_on_batch_idx else pl_module.global_step - if (self.check_frequency(check_idx) and # batch_idx % self.batch_freq == 0 - hasattr(pl_module, "log_images") and - callable(pl_module.log_images) and - self.max_images > 0): + if ( + self.check_frequency(check_idx) + and hasattr(pl_module, "log_images") # batch_idx % self.batch_freq == 0 + and callable(pl_module.log_images) + and self.max_images > 0 + ): logger = type(pl_module.logger) is_train = pl_module.training @@ -62,16 +74,17 @@ def log_img(self, pl_module, batch, batch_idx, split="train"): if isinstance(images[k], torch.Tensor): images[k] = images[k].detach().cpu() if self.clamp: - images[k] = torch.clamp(images[k], -1., 1.) + images[k] = torch.clamp(images[k], -1.0, 1.0) - self.log_local(pl_module.logger.save_dir, split, images, - pl_module.global_step, pl_module.current_epoch, batch_idx) + self.log_local( + pl_module.logger.save_dir, split, images, pl_module.global_step, pl_module.current_epoch, batch_idx + ) if is_train: pl_module.train() def check_frequency(self, check_idx): - return (check_idx % self.batch_freq == 0) + return check_idx % self.batch_freq == 0 def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): if not self.disabled: @@ -84,4 +97,4 @@ def get_preprocessing_function(name): return apply_uniformer else: print("The application is not yet supported") - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index 1cd7c5877f97..ea4c944fc9fe 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -114,7 +114,8 @@ def sample( if conditioning is not None: if isinstance(conditioning, dict): ctmp = conditioning[list(conditioning.keys())[0]] - while isinstance(ctmp, list): ctmp = ctmp[0] + while isinstance(ctmp, list): + ctmp = ctmp[0] cbs = ctmp.shape[0] if cbs != batch_size: print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index dc4882deb6f0..4c9da75d2257 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -38,8 +38,8 @@ def check_cuda(): try: - from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func from flash_attn.flash_attention import FlashAttention + from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func flash_attn_installed = check_cuda() print("FlashAttention Installed") @@ -296,8 +296,14 @@ def __init__( ): super().__init__() self.disable_self_attn = disable_self_attn - self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, - use_flash_attention=use_flash_attention, context_dim=context_dim if self.disable_self_attn else None) # is a self-attention + self.attn1 = CrossAttention( + query_dim=dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout, + use_flash_attention=use_flash_attention, + context_dim=context_dim if self.disable_self_attn else None, + ) # is a self-attention self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) self.attn2 = CrossAttention( query_dim=dim, @@ -366,7 +372,7 @@ def __init__( context_dim=context_dim[d], use_checkpoint=use_checkpoint, use_flash_attention=use_flash_attention, - disable_self_attn = disable_self_attn + disable_self_attn=disable_self_attn, ) for d in range(depth) ] diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index b0f277b83c02..bb0bc5f05cfe 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -863,7 +863,10 @@ def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from missing_keys = list(set(expected_keys) - set(loaded_keys)) unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - if 'input_blocks.1.0.in_layers.2.weight' in loaded_keys and 'input_blocks.1.0.in_layers.1.weight' in expected_keys: + if ( + 'input_blocks.1.0.in_layers.2.weight' in loaded_keys + and 'input_blocks.1.0.in_layers.1.weight' in expected_keys + ): # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following for key_ in missing_keys: s = key_.split('.') diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 39116ce0e9ec..3b95005e1ab1 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -53,6 +53,7 @@ def randn_like(x, generator=None): def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) + def setup_trainer_and_model_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ) -> Tuple[Trainer, Any]: From 9980c4cbff2a0aa52aae4ca3d27b7a65785c6967 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Tue, 27 Jun 2023 14:21:49 -0700 Subject: [PATCH 142/512] Resolve threads --- nemo/collections/multimodal/models/controlnet/util.py | 1 - nemo/collections/multimodal/parts/utils.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py index 72087d66b917..7379c2f39f25 100644 --- a/nemo/collections/multimodal/models/controlnet/util.py +++ b/nemo/collections/multimodal/models/controlnet/util.py @@ -34,7 +34,6 @@ def __init__( self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {} self.log_first_step = log_first_step - print("callback initialized") @rank_zero_only def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx): diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 3b95005e1ab1..3c4c696b723e 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -50,9 +50,6 @@ def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) -def randn_like(x, generator=None): - return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) - def setup_trainer_and_model_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, From e75916f9127e87031fe049d1bd150661e6866826 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Tue, 27 Jun 2023 14:24:26 -0700 Subject: [PATCH 143/512] rename training script --- .../controlnet/{train_controlnet.py => controlnet_train.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/multimodal/generative/controlnet/{train_controlnet.py => controlnet_train.py} (100%) diff --git a/examples/multimodal/generative/controlnet/train_controlnet.py b/examples/multimodal/generative/controlnet/controlnet_train.py similarity index 100% rename from examples/multimodal/generative/controlnet/train_controlnet.py rename to examples/multimodal/generative/controlnet/controlnet_train.py From e0b4424cb4f8e11c3934879fc116c36539c9ae76 Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Tue, 27 Jun 2023 14:27:08 -0700 Subject: [PATCH 144/512] format --- nemo/collections/multimodal/models/controlnet/util.py | 1 - nemo/collections/multimodal/parts/utils.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py index 7379c2f39f25..4266f5a12286 100644 --- a/nemo/collections/multimodal/models/controlnet/util.py +++ b/nemo/collections/multimodal/models/controlnet/util.py @@ -34,7 +34,6 @@ def __init__( self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {} self.log_first_step = log_first_step - @rank_zero_only def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx): root = os.path.join(save_dir, "image_log", split) diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 3c4c696b723e..686f92c5987b 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -50,7 +50,6 @@ def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) - def setup_trainer_and_model_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ) -> Tuple[Trainer, Any]: From cdd499c409a81eb34530eb44156e290d9822732c Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 28 Jun 2023 18:18:16 -0700 Subject: [PATCH 145/512] fix a small bug in image logger. --- nemo/collections/multimodal/models/controlnet/controlnet.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py index e7b2a463638e..c8b6a60f0c4c 100644 --- a/nemo/collections/multimodal/models/controlnet/controlnet.py +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -158,6 +158,7 @@ def log_images( log = dict() batch = next(batch) + N = batch['images'].shape[0] z, c = self.get_input(batch, self.first_stage_key, bs=N) c_cat, c = c["c_concat"][:N], c["c_crossattn"][:N] N = min(z.shape[0], N) From 6c7e0e41a425cd8cafdecbfafd8b60c0ddb18426 Mon Sep 17 00:00:00 2001 From: Alexandre Milesi Date: Thu, 29 Jun 2023 11:08:01 -0700 Subject: [PATCH 146/512] ControlNet TRT export --- .../controlnet/conf/controlnet_export.yaml | 24 ++ .../controlnet/controlnet_export.py | 329 ++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_export.yaml create mode 100644 examples/multimodal/generative/controlnet/controlnet_export.py diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml new file mode 100644 index 000000000000..35e7ce9c48e4 --- /dev/null +++ b/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml @@ -0,0 +1,24 @@ +name: controlnet-export + +infer: + unconditional_guidance_scale: 3 + num_images_per_prompt: 1 + hint_image_size: 512 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'DDIM' + eta: 0 + out_path: 'controlnet' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: /ckpts/controlnet/nemo_controlnet.nemo + precision: ${trainer.precision} diff --git a/examples/multimodal/generative/controlnet/controlnet_export.py b/examples/multimodal/generative/controlnet/controlnet_export.py new file mode 100644 index 000000000000..c05579cac447 --- /dev/null +++ b/examples/multimodal/generative/controlnet/controlnet_export.py @@ -0,0 +1,329 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import os +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from omegaconf.omegaconf import OmegaConf + +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.classes.exportable import Exportable +from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType +from nemo.utils.trt_utils import build_engine + + +@hydra_runner(config_path='conf', config_name='controlnet_export') +def main(cfg): + # setup default values for inference configs + + batch_size = cfg.infer.get('num_images_per_prompt', 1) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + hint_image_size = cfg.infer.get('hint_image_size', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + fp16 = 16 == cfg.trainer.get("precision", 32) + if cfg.trainer.get("precision", 32) == "bf16": + print("BF16 not supported for export, will use fp32") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.control_stage_config.from_pretrained_unet = None + model_cfg.channels_last = True + model_cfg.capture_cudagraph_iters = -1 + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronControlNet, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + in_channels = model.model.diffusion_model.in_channels + shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] + fake_text = [""] + out = model.cond_stage_model(fake_text) + + output_dir = cfg.infer.out_path + os.makedirs(f"{output_dir}/onnx/controlnet/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) + os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) + os.makedirs(f"{output_dir}/plan/", exist_ok=True) + deployment_conf = OmegaConf.create( + { + 'controlnet': OmegaConf.create({}), + 'clip': OmegaConf.create({}), + 'unet': OmegaConf.create({}), + 'vae': OmegaConf.create({}), + 'sampler': OmegaConf.create({}), + 'batch_size': batch_size, + 'downsampling_factor': downsampling_factor, + 'in_channels': in_channels, + 'height': height, + 'width': width, + 'hint_image_size': hint_image_size, + } + ) + deployment_conf.sampler.eta = cfg.infer.get('eta', 0) + deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) + deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") + + ### Controlnet Export + x = torch.randn(1, *shape_of_internal, device="cuda") + t = torch.randint(high=10, size=(1,), device="cuda") + cc = torch.randn(1, out.shape[1], out.shape[2], device="cuda") + hint = torch.randn(1, 3, hint_image_size, hint_image_size, device="cuda") # b c h w + + controlnet_inputs = (x, hint, t, cc) + control_outs = model.control_model(*controlnet_inputs) + control_names = [f"control_{i}" for i in range(len(control_outs))] + + input_names = ["x", "hint", "t", "context"] + output_names = control_names + + print('Running Controlnet onnx export') + torch.onnx.export( + model.control_model, + controlnet_inputs, + f"{output_dir}/onnx/controlnet/controlnet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"x": {0: 'B'}, "hint": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, + opset_version=17, + ) + + input_profile_controlnet = {} + input_profile_controlnet["x"] = [(batch_size, *(x.shape[1:]))] * 3 + input_profile_controlnet["hint"] = [(batch_size, *(hint.shape[1:]))] * 3 + input_profile_controlnet["t"] = [(batch_size, *(t.shape[1:]))] * 3 + input_profile_controlnet["context"] = [(batch_size, *(cc.shape[1:]))] * 3 + + deployment_conf.controlnet.x = input_profile_controlnet["x"][0] + deployment_conf.controlnet.hint = input_profile_controlnet["hint"][0] + deployment_conf.controlnet.t = input_profile_controlnet["t"][0] + deployment_conf.controlnet.context = input_profile_controlnet["context"][0] + deployment_conf.controlnet.control = OmegaConf.create({}) + + for control_name, control_out in zip(control_names, control_outs): + deployment_conf.controlnet.control.update({control_name: (batch_size, *(control_out.shape[1:]))}) + + ### UNet Export + input_names = ["x", "t", "context"] + control_names + output_names = ["logits"] + + class UNETControlWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, x, t, cc, *control): + if any(part_control is None for part_control in control): + control = None + else: + control = list(control) + + return self.model(x=x, timesteps=t, context=cc, control=control) + + print('Running UNET onnx export') + torch.onnx.export( + UNETControlWrapper(model.model.diffusion_model), + (x, t, cc, *control_outs), + f"{output_dir}/onnx/unet/unet.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={ + **{"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, + **{control_name: {0: 'B'} for control_name in control_names}, + }, + opset_version=17, + ) + + input_profile_unet = {} + input_profile_unet["x"] = [(batch_size, *(x.shape[1:]))] * 3 + input_profile_unet["t"] = [(batch_size, *(t.shape[1:]))] * 3 + input_profile_unet["context"] = [(batch_size, *(cc.shape[1:]))] * 3 + + deployment_conf.unet.x = input_profile_unet["x"][0] + deployment_conf.unet.t = input_profile_unet["t"][0] + deployment_conf.unet.context = input_profile_unet["context"][0] + deployment_conf.unet.logits = input_profile_unet["x"][0] + deployment_conf.unet.control = OmegaConf.create({}) + + for control_name, control_out in zip(control_names, control_outs): + input_profile_unet[control_name] = [(batch_size, *(control_out.shape[1:]))] * 3 + deployment_conf.unet.control.update({control_name: input_profile_unet[control_name][0]}) + + ### VAE Export + class VAEWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, z): + z = self.model.post_quant_conv(z) + return self.model.decoder(z) + + input_names = ["z"] + output_names = ["logits"] + z = torch.randn(1, *shape_of_internal, device="cuda") + + print('Running VAE onnx export') + torch.onnx.export( + VAEWrapper(model.first_stage_model), + (z,), + f"{output_dir}/onnx/vae/vae.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + ) + + input_profile_vae = {} + input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 + deployment_conf.vae.z = input_profile_vae["z"][0] + + ### CLIP Export + class CLIPWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model(input_ids=input_ids) + return outputs.last_hidden_state + + class OpenCLIPWrapper(nn.Module, Exportable): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model.encode_with_transformer(input_ids) + return outputs + + def input_example(self, max_text=64): + sample = next(self.parameters()) + tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('H', 'D'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'H'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + print('Running CLIP onnx export') + openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) + tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") + + if openai_clip: + input_names = ["tokens"] + output_names = ["logits"] + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) + else: + clip_model = OpenCLIPWrapper(model.cond_stage_model) + clip_model.export("stable-diffusion/onnx/clip/clip.onnx") + + input_profile_clip = {} + input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 + deployment_conf.clip.tokens = input_profile_clip["tokens"][0] + deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) + deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + deployment_conf.clip.max_length = model.cond_stage_model.max_length + deployment_conf.clip.openai_clip = openai_clip + with open(f"{output_dir}/plan/conf.yaml", "wb") as f: + OmegaConf.save(config=deployment_conf, f=f.name) + + del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out, hint, control_outs + torch.cuda.empty_cache() + gc.collect() + + print('Running Controlnet TRT conversion') + build_engine( + f"{output_dir}/onnx/controlnet/controlnet.onnx", + f"{output_dir}/plan/controlnet.plan", + fp16=fp16, + input_profile=input_profile_controlnet, + timing_cache=None, + workspace_size=0, + ) + + print('Running UNET TRT conversion') + build_engine( + f"{output_dir}/onnx/unet/unet.onnx", + f"{output_dir}/plan/unet.plan", + fp16=fp16, + input_profile=input_profile_unet, + timing_cache=None, + workspace_size=0, + ) + + print('Running VAE TRT conversion') + build_engine( + f"{output_dir}/onnx/vae/vae.onnx", + f"{output_dir}/plan/vae.plan", + fp16=fp16, + input_profile=input_profile_vae, + timing_cache=None, + workspace_size=0, + ) + + print('Running CLIP TRT conversion') + build_engine( + f"{output_dir}/onnx/clip/clip.onnx", + f"{output_dir}/plan/clip.plan", + fp16=fp16, + input_profile=input_profile_clip, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == "__main__": + main() From 08522c9d35cc440849e8e8da17116c722a054c66 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Thu, 29 Jun 2023 13:10:37 -0700 Subject: [PATCH 147/512] Final MR before release --- .../generative/imagen/conf/base64-2b.yaml | 2 +- .../generative/imagen/conf/export.yaml | 26 ++ .../imagen/conf/imagen_fid_images.yaml | 57 ++++ .../generative/imagen/conf/sr1024-600m.yaml | 1 + .../generative/imagen/generate_fid_images.py | 104 ++++++ .../generative/imagen/imagen_export.py | 218 ++++++++++++ .../imagen/imagen_generate_images.py | 2 +- .../models/imagen/imagen_pipeline.py | 36 +- .../multimodal/models/imagen/precond.py | 17 - .../imagen/diffusionmodules/attention_alt.py | 321 ++++++++++++++++++ .../modules/imagen/diffusionmodules/blocks.py | 22 +- .../modules/imagen/diffusionmodules/nets.py | 18 + .../modules/imagen/encoder/t5encoder.py | 10 +- nemo/collections/multimodal/parts/utils.py | 91 +++++ 14 files changed, 885 insertions(+), 40 deletions(-) create mode 100644 examples/multimodal/generative/imagen/conf/export.yaml create mode 100644 examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml create mode 100644 examples/multimodal/generative/imagen/generate_fid_images.py create mode 100644 examples/multimodal/generative/imagen/imagen_export.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml index 23773b0bf4b4..a5c643533eec 100644 --- a/examples/multimodal/generative/imagen/conf/base64-2b.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-2b.yaml @@ -63,7 +63,7 @@ model: channel_mult: [ 1, 2, 3, 4 ] num_attn_heads: 4 per_head_channels: 64 - cond_dim: 512 + cond_dim: 2048 attention_type: fused feature_pooling_type: attention learned_sinu_pos_emb_dim: 0 diff --git a/examples/multimodal/generative/imagen/conf/export.yaml b/examples/multimodal/generative/imagen/conf/export.yaml new file mode 100644 index 000000000000..9d0ddf462cf0 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/export.yaml @@ -0,0 +1,26 @@ +num_images_per_promt: 2 # The number of images generated for each promt text +model_name: null # Avaliable model_name defined in pretrained_models.yaml +run_ema_model: True # Whether load the reg/ema model when using pretrained models +customized_model: # Mutually exclusive with model_name + # base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-200k.ckpt # Either .ckpt or .nemo is accepatable + base_ckpt: /lm/data/nemo/imagen/edm-fused-1150k-ema.nemo + base_cfg: # Must provided if loading .ckpt checkpoint + sr256_ckpt: /lm/data/nemo/imagen/sr-noise-aug-280k.nemo + sr256_cfg: +target_resolution: 256 # in [64, 256, 1024] +inference_precision: 16 # [16, 32, AMP] +thresholding_method: dynamic + +output_path: 'output/edm' # Save location +record_time: True # Whether to record inference time meta +encoder_path: '/nemo/data/encoders' # Set to null if you wish to download encoders on the fly +samplings: + - # Base64 + step: 250 + cfg: 7.5 + - # SR256 + step: 20 + cfg: 8 + - # SR1024 + step: 20 + cfg: 7.5 diff --git a/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml b/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml new file mode 100644 index 000000000000..5a5867cfae50 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml @@ -0,0 +1,57 @@ +name: imagen_fid_images + +fid: + classifier_free_guidance: + - 1 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + nnodes_per_cfg: 1 + ntasks_per_node: 8 + local_task_id: null + num_images_to_eval: 30000 + coco_captions_path: /aot/datasets/coco2014/coco2014_val_sampled_30k/captions + coco_images_path: /aot/datasets/coco2014/coco2014_val/images_256 + save_path: output/fid-launcher-test + ncaptions_per_batch: 4 + save_all_res: False + save_text: False + +infer: + num_images_per_promt: 1 # The number of images generated for each promt text + model_name: null # Avaliable model_name defined in pretrained_models.yaml + run_ema_model: True # Whether load the reg/ema model when using pretrained models + customized_model: # Mutually exclusive with model_name + base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo # Either .ckpt or .nemo is accepatable + base_cfg: null # Must provided if loading .ckpt checkpoint + sr256_ckpt: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo + sr256_cfg: null + sr1024_ckpt: null + sr1024_cfg: null + target_resolution: 256 # in [64, 256, 1024] + inference_precision: '32' # [16, 32, AMP] + thresholding_method: 'dynamic' + record_time: True # Whether to record inference time meta + encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly + samplings: + - + step: 30 + - + step: 20 + +models: + - + restore_from_path: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo + - + restore_from_path: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 32 + logger: False # logger provided by exp_manager diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml index 5f40d2a076af..974aba83c076 100644 --- a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml @@ -74,6 +74,7 @@ model: scale_shift_norm: True stable_attention: True flash_attention: False + skip_connection_scaling: True # miscellaneous seed: 1234 diff --git a/examples/multimodal/generative/imagen/generate_fid_images.py b/examples/multimodal/generative/imagen/generate_fid_images.py new file mode 100644 index 000000000000..55ca92ace8bd --- /dev/null +++ b/examples/multimodal/generative/imagen/generate_fid_images.py @@ -0,0 +1,104 @@ +import os +import time + +import torch +from omegaconf.omegaconf import open_dict +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='imagen_fid_images') +def main(cfg): + # Read configuration parameters + nnodes_per_cfg = cfg.fid.nnodes_per_cfg + ntasks_per_node = cfg.fid.ntasks_per_node + local_task_id = cfg.fid.local_task_id + num_images_to_eval = cfg.fid.num_images_to_eval + path = cfg.fid.coco_captions_path + save_text = cfg.fid.save_text + + node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) + node_id_per_cfg = node_id % nnodes_per_cfg + + current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] + save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) + + # Read and store captions + captions = [] + caption_files = sorted(os.listdir(path)) + assert len(caption_files) >= num_images_to_eval + for file in caption_files[:num_images_to_eval]: + with open(os.path.join(path, file), 'r') as f: + captions += f.readlines() + print(f"The total number of captions to generate is: {len(captions)}") + + # Calculate partition sizes and select the partition for the current node + partition_size_per_node = num_images_to_eval // nnodes_per_cfg + start_idx = node_id_per_cfg * partition_size_per_node + end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None + captions = captions[start_idx:end_idx] + print(f"Current node {node_id} will generate images from {start_idx} to {end_idx}") + + local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) + partition_size_per_task = int(len(captions) // ntasks_per_node) + + # Select the partition for the current task + start_idx = local_task_id * partition_size_per_task + end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None + input = captions[start_idx:end_idx] + chunk_size = len(input) + + print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") + os.makedirs(save_path, exist_ok=True) + + trainer = Trainer() + pipeline = ImagenPipeline.from_pretrained(cfg=cfg.infer, trainer=trainer, megatron_loading=True, megatron_cfg=cfg) + + # Generate images using the model and save them + batch_idx = 0 + batch_size = cfg.fid.ncaptions_per_batch + while True: + if batch_idx * batch_size >= len(input): + break + batch_captions = input[batch_idx * batch_size : (batch_idx + 1) * batch_size] + # Different seed for every image + seeds = [local_task_id * chunk_size + batch_idx * batch_size + idx for idx in range(len(batch_captions))] + with torch.no_grad(): + images, all_res_images, *_ = pipeline( + prompts=batch_captions, seed=seeds, single_batch_mode=True, classifier_free_guidance=current_node_cfg, + ) + + if cfg.fid.save_all_res: + all_res = [f'_RES{model.image_size}' for model in pipeline.models] + outpaths = [] + # for the highest resolution we save as its original name so that + # we can automate the CLIP & FID calculation process from Megatron-Launcher + all_res[-1] = '' + for res in all_res: + outpath = f"{save_path}{res}" + os.makedirs(outpath, exist_ok=True) + outpaths.append(outpath) + for outpath, one_res in zip(outpaths, all_res_images): + for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): + image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx + image.save(os.path.join(outpath, f'image{image_idx:06d}.png')) + if save_text: + with open(os.path.join(outpath, f'image{image_idx:06d}.txt'), 'w') as f: + f.writelines(caption) + else: + for idx, (caption, image) in enumerate(zip(batch_captions, images[0])): + image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx + image.save(os.path.join(save_path, f'image{image_idx:06d}.png')) + if save_text: + with open(os.path.join(save_path, f'image{image_idx:06d}.txt'), 'w') as f: + f.writelines(caption) + print( + f'Save {len(images[0])} images to {save_path} with name from image{(local_task_id*chunk_size+batch_idx*batch_size):06d}.png to image{image_idx:06d}.png' + ) + batch_idx += 1 + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/imagen/imagen_export.py b/examples/multimodal/generative/imagen/imagen_export.py new file mode 100644 index 000000000000..efee38da42cc --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_export.py @@ -0,0 +1,218 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import os + +import torch +from omegaconf import OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.modules.imagen.diffusionmodules import attention_alt +from nemo.core.config import hydra_runner +from nemo.utils.trt_utils import build_engine + + +@hydra_runner(config_path='conf', config_name='export') +def main(inference_config): + if inference_config.get('infer'): + # invoking from launcher + trainer = Trainer(inference_config.trainer) + inference_config = inference_config.infer + else: + trainer = Trainer() + + # Set up variable to use alternative attention + attention_alt.USE_ALT = True + from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig + + inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) + fp16 = 16 == int(inference_config.get("inference_precision", 32)) + # Set model to FP32 for ONNX export + inference_config.inference_precision = 32 + + pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) + batch_size = inference_config.get('num_images_per_promt', 1) + thresholding_method = inference_config.get('thresholding_method', 'dynamic') + fake_text = [""] + out_embed, out_mask = pipeline.get_text_encodings(fake_text, repeat=batch_size) + output_dir = inference_config.output_path + deployment_conf = OmegaConf.create( + { + 't5': OmegaConf.create({}), + 'models': OmegaConf.create([]), + 'batch_size': batch_size, + 'thresholding_method': thresholding_method, + } + ) + + ### T5 Export + class T5Wrapper(torch.nn.Module): + def __init__(self, t5_encoder): + super(T5Wrapper, self).__init__() + self.t5_encoder = t5_encoder + + def forward(self, input_ids, attn_mask): + t5_encoder = self.t5_encoder + + with torch.no_grad(): + output = t5_encoder.model(input_ids=input_ids, attention_mask=attn_mask) + encoded_text = output.last_hidden_state + + encoded_text = encoded_text[:, 0 : t5_encoder.max_seq_len] + attn_mask = attn_mask[:, 0 : t5_encoder.max_seq_len] + + return encoded_text, attn_mask + + t5_wrapper = T5Wrapper(pipeline.text_encoder) + # Exporting T5Encoder in CPU + t5_wrapper.to('cpu') + + input_names = ['input_ids', 'attn_mask'] + output_names = ['encoded_text', 'text_mask'] + input_ids = torch.randint(high=10, size=(1, pipeline.text_encoder.model_seq_len), dtype=torch.int) + attn_mask = torch.zeros(1, pipeline.text_encoder.model_seq_len, dtype=torch.int) + + os.makedirs(f"{output_dir}/onnx/t5/", exist_ok=True) + torch.onnx.export( + t5_wrapper, + (input_ids, attn_mask), + f"{output_dir}/onnx/t5/t5.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"input_ids": {0: 'B'}, "attn_mask": {0: 'B'},}, + opset_version=17, + ) + + input_profile_t5 = {} + input_profile_t5["input_ids"] = [input_ids.shape] * 3 + input_profile_t5["attn_mask"] = [attn_mask.shape] * 3 + deployment_conf.t5.model_seq_len = pipeline.text_encoder.model_seq_len + del pipeline.text_encoder, input_ids, attn_mask + + ### UNet Export + os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) + + low_res_size = None + cfgs = [each.cfg for each in inference_config.samplings] + cfgs = cfgs[: len(pipeline.models)] + steps = [each.step for each in inference_config.samplings] + steps = steps[: len(pipeline.models)] + input_profile_unets = [] + + for i, model in enumerate(pipeline.models): + unet_model = model.unet + + ### UNet Export + x = torch.randn(batch_size, 3, unet_model.image_size, unet_model.image_size, device="cuda") + time = torch.randn(batch_size, device='cuda') + text_embed = torch.randn(batch_size, out_embed.shape[1], out_embed.shape[2], device='cuda') + text_mask = torch.zeros((batch_size, out_mask.shape[1]), dtype=torch.int, device='cuda') + input_names = ["x", "time", "text_embed", "text_mask"] + output_names = ["logits"] + dynamic_axes = { + "x": {0: 'B'}, + "time": {0: 'B'}, + "text_embed": {0: 'B'}, + "text_mask": {0: 'B'}, + } + inputs = [x, time, text_embed, text_mask] + + if low_res_size is not None: + input_names.append("x_low_res") + dynamic_axes['x_low_res'] = {0: 'batch'} + x_low_res = torch.randn(batch_size, 3, low_res_size, low_res_size, device="cuda") + inputs.append(x_low_res) + + if model.noise_cond_aug: + input_names.append("time_low_res") + dynamic_axes['time_low_res'] = {0: 'batch'} + time_low_res = torch.ones(batch_size, device="cuda") + inputs.append(time_low_res) + + torch.onnx.export( + unet_model, + tuple(inputs), + f"{output_dir}/onnx/unet/unet{i}.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + opset_version=17, + ) + + input_profile_unet = {} + input_profile_unet["x"] = [(batch_size, *(x.shape[1:]))] * 3 + input_profile_unet["time"] = [(batch_size,)] * 3 + input_profile_unet["text_embed"] = [(batch_size, *(text_embed.shape[1:]))] * 3 + input_profile_unet["text_mask"] = [(batch_size, *(text_mask.shape[1:]))] * 3 + + config = OmegaConf.create({}) + config.preconditioning_type = model.preconditioning_type + config.preconditioning = model.cfg.preconditioning + config.noise_cond_aug = model.noise_cond_aug + config.cond_scale = cfgs[i] + config.step = steps[i] + config.x = input_profile_unet["x"][0] + + if i == 0: + config.text_embed = input_profile_unet["text_embed"][0] + config.text_mask = input_profile_unet["text_mask"][0] + + if low_res_size is not None: + input_profile_unet["x_low_res"] = [(batch_size, *(x_low_res.shape[1:]))] * 3 + + if model.noise_cond_aug: + input_profile_unet["time_low_res"] = [(batch_size,)] * 3 + + for key in input_profile_unet: + # set up min and max batch to 1 and 2 * batch_size + input_profile_unet[key][0] = (1, *input_profile_unet[key][0][1:]) + input_profile_unet[key][2] = (2 * batch_size, *input_profile_unet[key][2][1:]) + + deployment_conf.models.append(config) + input_profile_unets.append(input_profile_unet) + + low_res_size = unet_model.image_size + + os.makedirs(f"{output_dir}/plan", exist_ok=True) + with open(f"{output_dir}/plan/conf.yaml", "wb") as f: + OmegaConf.save(config=deployment_conf, f=f.name) + + del pipeline, x, time, text_embed, text_mask + torch.cuda.empty_cache() + gc.collect() + + build_engine( + f"{output_dir}/onnx/t5/t5.onnx", + f"{output_dir}/plan/t5.plan", + fp16=False, + input_profile=input_profile_t5, + timing_cache=None, + workspace_size=0, + ) + + for i, input_profile in enumerate(input_profile_unets): + build_engine( + f"{output_dir}/onnx/unet/unet{i}.onnx", + f"{output_dir}/plan/unet{i}.plan", + fp16=fp16, + input_profile=input_profile, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/imagen/imagen_generate_images.py b/examples/multimodal/generative/imagen/imagen_generate_images.py index a2497425b70e..b7e4c857decc 100644 --- a/examples/multimodal/generative/imagen/imagen_generate_images.py +++ b/examples/multimodal/generative/imagen/imagen_generate_images.py @@ -12,7 +12,7 @@ @hydra_runner(config_path='conf', config_name='fid_inference.yaml') def main(inference_config): inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) - captions = pickle.load(open('coco_captions5k.pkl', 'rb')) + captions = pickle.load(open('coco_captions.pkl', 'rb')) ntasks = 8 if os.environ.get('CUDA_VISIBLE_DEVICES'): # Multi-GPU diff --git a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py index fb4932765b41..3e3f5206cb96 100644 --- a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py +++ b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py @@ -22,7 +22,7 @@ from torch.cuda.amp import autocast from nemo.collections.multimodal.models.imagen.imagen import Imagen, MegatronImagen -from nemo.collections.multimodal.parts.utils import numpy_to_pil +from nemo.collections.multimodal.parts.utils import numpy_to_pil, setup_trainer_and_models_for_inference @dataclass @@ -92,10 +92,26 @@ def _load_model(model_ckpt: str, model_cfg: str, eval_mode: bool = True, trainer model.unet.cuda().eval() return model - def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None): + def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None, megatron_loading=False, megatron_cfg=None): + if megatron_loading: + assert megatron_cfg + + def model_cfg_modifier(model_cfg): + model_cfg.inductor = False + model_cfg.unet.flash_attention = False + model_cfg.micro_batch_size = megatron_cfg.fid.ncaptions_per_batch + model_cfg.global_batch_size = model_cfg.micro_batch_size * megatron_cfg.fid.ntasks_per_node + + trainer, megatron_models = setup_trainer_and_models_for_inference( + MegatronImagen, cfg=megatron_cfg, model_cfg_modifier=model_cfg_modifier + ) + models = [mm.model for mm in megatron_models] + for model in models: + model.cuda().eval() + model.model.set_inference_mode(True) + return models customized_models = cfg.customized_model models = [] - print('Load base model.') model = ImagenPipeline._load_model( model_ckpt=customized_models.base_ckpt, model_cfg=customized_models.base_cfg, trainer=trainer, @@ -118,7 +134,9 @@ def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None): return models @classmethod - def from_pretrained(cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda'): + def from_pretrained( + cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda', megatron_loading=False, megatron_cfg=None + ): target_resolution = cfg.target_resolution assert target_resolution in [64, 256, 1024] @@ -128,7 +146,7 @@ def from_pretrained(cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda') assert cfg.model_name is None, 'No predefined model for now' assert cfg.customized_model is not None, 'Need to provide customized models for inference' - models = ImagenPipeline._load_customized_model(cfg, trainer) + models = ImagenPipeline._load_customized_model(cfg, trainer, megatron_loading, megatron_cfg) assert len(models) >= 1, 'Need to load at least one model' if cfg.inference_precision == '16': print('Running Inference in FP16.') @@ -151,7 +169,7 @@ def get_text_encodings(self, input_text, repeat=1): else: inp_text_batch = input_text # Encode the text embeddings using text encoder. - text_encodings, text_mask = self.text_encoder.encode(inp_text_batch) + text_encodings, text_mask = self.text_encoder.encode(inp_text_batch, device=self.device) if repeat != 1: assert len(inp_text_batch) == 1, 'Repeat should only be applied if we feed single text to encoder.' text_encodings = text_encodings.repeat(repeat, 1, 1) @@ -211,7 +229,7 @@ def __call__( cfgs = cfgs[: len(models)] else: cfgs = classifier_free_guidance - if isinstance(cfgs, int): + if isinstance(cfgs, int) or isinstance(cfgs, float): cfgs = [cfgs] * len(models) if inference_steps is None: @@ -239,7 +257,7 @@ def __call__( text_input = prompt.strip('\n') print('Input caption: {}'.format(text_input)) tic = time.perf_counter() - text_encondings, text_mask = self.get_text_encodings( + text_encodings, text_mask = self.get_text_encodings( text_input, repeat=num_images_per_promt if not single_batch_mode else 1 ) throughputs['text-encoding'].append(time.perf_counter() - tic) @@ -279,7 +297,7 @@ def __call__( with autocast(enabled=amp_enabled): generated_images = model.sample_image( noise_map=noise_map, - text_encoding=text_encondings, + text_encoding=text_encodings, text_mask=text_mask, x_low_res=x_low_res, cond_scale=cfg, diff --git a/nemo/collections/multimodal/models/imagen/precond.py b/nemo/collections/multimodal/models/imagen/precond.py index 77717ac8db72..fc3b3ed7d18d 100644 --- a/nemo/collections/multimodal/models/imagen/precond.py +++ b/nemo/collections/multimodal/models/imagen/precond.py @@ -130,23 +130,6 @@ def forward(self, x, time, text_embed, text_mask, **model_kwargs): if c_noise.ndim < 1: c_noise = c_noise.repeat(bs,) - def save_image_tensor(tensor, dir=None): - if tensor.ndim == 4: - print(f'Saving {tensor.shape[0]} images') - elif tensor.ndim == 3: - print('Saving single image') - tensor = tensor.unsqueeze(0) - import os - - import torchvision.transforms as T - - os.makedirs(dir, exist_ok=True) - transform = T.ToPILImage() - for idx, image_tensor in enumerate(tensor): - image_tensor = ((image_tensor + 1) / 2).clamp_(0, 1) - img = transform(image_tensor) - img.save(f'{dir}/{idx}.png') - if self.noise_cond_aug: # Applying noise conditioning augmentation assert 'x_low_res' in model_kwargs, 'x_low_res does not exist when attemping to apply noise augmentation' diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py new file mode 100644 index 000000000000..8927226c818e --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py @@ -0,0 +1,321 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math + +import numpy as np +import torch +import torch.nn as nn +from torch.cuda.amp import custom_bwd, custom_fwd + +USE_ALT = False + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += torch.DoubleTensor([matmul_ops]) + + +# Stable attention +class StableAttentionOp(torch.autograd.Function): + # This function defines the attention weight computation in a stable way + # The idea is to scale the gradients of weight matrix by the maximum absolute value. + # In case of overflow, this will prevent weight gradients from exploding. + # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. + + @staticmethod + def forward(ctx, q, k): + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) + ctx.save_for_backward(q, k, w) + return w + + @staticmethod + def backward(ctx, dw): + q, k, w = ctx.saved_tensors + + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + + # Due to softmax, w is fp32, making db fp32. + # Type casting is required for amp to work. + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) + s = s / math.sqrt(k.shape[1]) + + dq = torch.einsum('nck,nqk->ncq', k, db) * s + dk = torch.einsum('ncq,nqk->nck', q, db) * s + + return dq, dk + + +class QKVStableAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + + # Reshaping q and k + # try: + # q = q.view(bs * self.n_heads, ch, length) + # k = k.view(bs * self.n_heads, ch, length) + # except Exception: + q = q.reshape(bs * self.n_heads, ch, length) + k = k.reshape(bs * self.n_heads, ch, length) + + weight = StableAttentionOp.apply(q, k) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class StableMaskedAttentionOp(torch.autograd.Function): + # Robust attention operation in case of masked attention + @staticmethod + @custom_fwd + def forward(ctx, q, k, mask): + max_neg_value = -float('inf') + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) + w = w.masked_fill(mask, max_neg_value) + w = w.softmax(dim=2) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a stable number. + # w = w.nan_to_num_() + ctx.save_for_backward(q, k, w, mask) + return w + + @staticmethod + @custom_bwd + def backward(ctx, dw): + q, k, w, mask = ctx.saved_tensors + max_neg_value = -torch.finfo(q.dtype).max + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) + + # Masking db + db_in = db.clone().masked_fill_(mask, 0) + + s = s / math.sqrt(k.shape[1]) + dq = torch.einsum('nck,nqk->ncq', k, db_in) * s + dk = torch.einsum('ncq,nqk->nck', q, db_in) * s + + # These are dummy derivatives since mask is a constant + dmask = (max_neg_value - w) * db.clone() * s + + return dq, dk, dmask + + +class QKVMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length_q), + (k * scale).view(bs * self.n_heads, ch, length_k), + ) # More stable with f16 than dividing afterwards + + # Duplicate mask n_heads times + # mask = mask.repeat_interleave(self.n_heads, dim=0) + mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) + assert mask.shape == weight.shape + max_neg_value = -float('inf') + weight = weight.masked_fill(~mask, max_neg_value) + + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a non-nan number. + # weight = weight.nan_to_num_() + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVStableMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + q = q.view(bs * self.n_heads, ch, length_q) + k = k.view(bs * self.n_heads, ch, length_k) + + # Forming attention mask + # mask = mask.repeat_interleave(self.n_heads, dim=0) + mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) + + weight = StableMaskedAttentionOp.apply(q, k, ~mask) + + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class SelfAttentionPooling(nn.Module): + """ + Implementation of SelfAttentionPooling + Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition + https://arxiv.org/pdf/2008.01077v1.pdf + Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 + """ + + def __init__(self, input_dim): + super(SelfAttentionPooling, self).__init__() + self.W = nn.Linear(input_dim, 1) + + def forward(self, batch_rep): + """ + input: + batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension + + attention_weight: + att_w : size (N, T, 1) + + return: + utter_rep: size (N, H) + """ + softmax = nn.functional.softmax + att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) + utter_rep = torch.sum(batch_rep * att_w, dim=1) + + return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py index 2c57cfee8628..b8e44070b570 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py @@ -24,12 +24,22 @@ import torch.utils.checkpoint as checkpoint from einops import rearrange -from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import ( - QKVAttention, - QKVMaskedAttention, - QKVStableAttention, - QKVStableMaskedAttention, -) +from nemo.collections.multimodal.modules.imagen.diffusionmodules import attention_alt + +if attention_alt.USE_ALT: + from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention_alt import ( + QKVAttention, + QKVMaskedAttention, + QKVStableAttention, + QKVStableMaskedAttention, + ) +else: + from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import ( + QKVAttention, + QKVMaskedAttention, + QKVStableAttention, + QKVStableMaskedAttention, + ) from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import ( Downsample, Upsample, diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py index e8e52e7e9ddd..cf4a2e213b55 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py @@ -326,6 +326,15 @@ def forward( x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") x = torch.cat([x, x_low_res], dim=1) batch_size, device = x.shape[0], x.device + + if x.dtype != time.dtype or time.dtype != text_embed.dtype: + dtype = text_embed.dtype + x = x.to(dtype=dtype) + time = time.to(dtype=dtype) + if x_low_res is not None: + x_low_res = x_low_res.to(dtype=dtype) + if time_low_res is not None: + time_low_res = time_low_res.to(dtype=dtype) # Time Conditioning t = self.time_embed(time) # Add lowres time conditioning @@ -536,6 +545,15 @@ def forward( else: assert time_low_res is None, 'time_low_res cannot be presented' + if x.dtype != time.dtype or time.dtype != text_embed.dtype: + dtype = text_embed.dtype + x = x.to(dtype=dtype) + time = time.to(dtype=dtype) + if x_low_res is not None: + x_low_res = x_low_res.to(dtype=dtype) + if time_low_res is not None: + time_low_res = time_low_res.to(dtype=dtype) + batch_size, device = x.shape[0], x.device # Time Conditioning t = self.time_embed(time) diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py index 03e6f75253ec..56472db3f052 100644 --- a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py +++ b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py @@ -31,22 +31,20 @@ def __init__(self, max_seq_len=512, encoder_path=None): else: print(f'Load T5 encoder from {encoder_path}') hard_coded_encoder_weight_location = os.path.join(encoder_path, "t5xxl-encoder.bin") - hard_coded_encoder_config_location = os.path.join( - "nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json" - ) + hard_coded_encoder_config_location = os.path.join(os.path.dirname(__file__), "t5encoder.json") self.model = T5EncoderModel.from_pretrained( hard_coded_encoder_weight_location, config=T5Config.from_json_file(hard_coded_encoder_config_location), low_cpu_mem_usage=True, ) - def encode(self, text_batch): + def encode(self, text_batch, device='cuda'): encoded = self.tokenizer.batch_encode_plus( text_batch, return_tensors="pt", padding="max_length", max_length=self.model_seq_len, truncation=True ) # We expect all the processing is done in GPU. - input_ids = encoded.input_ids.cuda() - attn_mask = encoded.attention_mask.cuda() + input_ids = encoded.input_ids.to(device=device) + attn_mask = encoded.attention_mask.to(device=device) with torch.no_grad(): output = self.model(input_ids=input_ids, attention_mask=attn_mask) diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 686f92c5987b..54dcc7816728 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -50,6 +50,97 @@ def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) +def setup_trainer_and_models_for_inference( + model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, +): + """ + Set up a trainer and NeMo model for inference. + + Args: + model_provider (Any): An object that provides the NeMo model. + cfg (DictConfig): The configuration dictionary, containing the + necessary settings for the trainer and the models. + model_cfg_modifier (Callable): A function that modifies the model + configuration for inference. + + Returns: + Tuple[Trainer, Any]: A tuple containing the trainer and the model. + """ + + # Check if we need to use the TorchElasticEnvironment plugin for the trainer. + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # Use the NLPDDPStrategy for the distributed data parallel strategy. + # We don't use DDP for async grad allreduce and don't find unused parameters. + strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) + + # Set up the trainer with the specified plugins and strategy. + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + # Create the NLPSaveRestoreConnector object for model saving and restoring. + save_restore_connector = NLPSaveRestoreConnector() + + print(f'Loading {cfg.models} models') + models = [] + for single_model_cfg in cfg.models: + if not single_model_cfg.restore_from_path: + continue + if single_model_cfg.restore_from_path.endswith(".nemo"): + # Set the model_extracted_dir attribute if the restore path is a directory. + if os.path.isdir(single_model_cfg.restore_from_path): + save_restore_connector.model_extracted_dir = single_model_cfg.restore_from_path + + # Restore the model configuration from the specified path and modify it for inference. + model_cfg = model_provider.restore_from( + restore_path=single_model_cfg.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + with open_dict(model_cfg): + model_cfg_modifier(model_cfg) # modify the configuration for inference + + # Restore the model from the specified path and configuration, and set it up for inference. + model = model_provider.restore_from( + restore_path=single_model_cfg.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + models.append(model) + + elif single_model_cfg.restore_from_path.endswith(".ckpt"): + logging.warning( + "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" + ) + + model = model_provider.load_from_checkpoint( + single_model_cfg.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, + ) + models.append(model) + + else: + raise ValueError(f"Unrecognized checkpoint type: {single_model_cfg.restore_from_path}") + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + models = [model.cuda() for model in models] # move the model to the GPU + for model in models: + model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients + + # Return the trainer and model objects. + return trainer, models + + def setup_trainer_and_model_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ) -> Tuple[Trainer, Any]: From c0c5f7bd1dfe32e762e92a07bb2a04fce7b1c2cc Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Fri, 30 Jun 2023 14:23:49 -0700 Subject: [PATCH 148/512] SD2 update --- .../stable_diffusion/conf/sd2_train.yaml | 4 +++- .../generative/stable_diffusion/sd_export.py | 2 +- .../models/stable_diffusion/ldm/ddpm.py | 12 +++++++++++ .../stable_diffusion/samplers/base_sampler.py | 21 +++++++++++++------ .../models/stable_diffusion/samplers/ddim.py | 15 +++++++++++-- .../stable_diffusion/samplers/dpmsolver.py | 9 +++++++- .../models/stable_diffusion/samplers/plms.py | 8 +++++-- .../stable_diffusion/samplers/sampler_dpm.py | 4 +++- 8 files changed, 61 insertions(+), 14 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml index 7ba8c27c2971..fd579ef19487 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml @@ -84,8 +84,10 @@ model: cond_stage_forward: text_embedding_dropout_rate: 0.1 fused_opt: True - inductor: False + inductor: True inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True unet_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index 434161d7558c..a7b1614f0c31 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -193,7 +193,7 @@ def output_names(self) -> List[str]: ) else: clip_model = OpenCLIPWrapper(model.cond_stage_model) - clip_model.export("stable-diffusion/onnx/clip/clip.onnx") + clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") input_profile_clip = {} input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 6de6ed07f341..7fa4df0aa4f1 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -276,6 +276,18 @@ def predict_start_from_noise(self, x_t, t, noise): - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise ) + def predict_start_from_z_and_v(self, x_t, t, v): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v + ) + + def predict_eps_from_z_and_v(self, x_t, t, v): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t + ) + def q_posterior(self, x_start, x_t, t): posterior_mean = ( extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py index ea4c944fc9fe..c8f6901dcdd0 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -251,22 +251,26 @@ def _get_model_output( self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs, ): if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: - e_t = self.model.apply_model(x, t, c) + model_output = self.model.apply_model(x, t, c) elif isinstance(c, dict): ### Contolnet conditioning is dict format model_t = self.model.apply_model(x, t, c) model_uncond = self.model.apply_model(x, t, unconditional_conditioning) - e_t = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) + model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) else: x_in = torch.cat([x] * 2) t_in = torch.cat([t] * 2) c_in = torch.cat([unconditional_conditioning, c]) - e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) + e_t_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) + model_output = e_t_uncond + unconditional_guidance_scale * (model_t - e_t_uncond) + if self.model.parameterization == "v": + e_t = self.model.predict_eps_from_z_and_v(x, t, model_output) + else: + e_t = model_output if score_corrector is not None: assert self.model.parameterization == "eps" e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - return e_t + return e_t, model_output def _get_x_prev_and_pred_x0( self, @@ -275,6 +279,8 @@ def _get_x_prev_and_pred_x0( index, device, x, + t, + model_output, e_t, quantize_denoised, repeat_noise, @@ -294,7 +300,10 @@ def _get_x_prev_and_pred_x0( sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) # current prediction for x_0 - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + if self.model.parameterization != "v": + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + else: + pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output) if quantize_denoised: pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) # direction pointing to x_t diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py index 18863cee19fb..2d6b121dced4 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py @@ -47,11 +47,22 @@ def p_sampling_fn( t_next=None, ): b, *_, device = *x.shape, x.device - e_t = self._get_model_output( + e_t, model_output = self._get_model_output( x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs ) x_prev, pred_x0 = self._get_x_prev_and_pred_x0( - use_original_steps, b, index, device, x, e_t, quantize_denoised, repeat_noise, temperature, noise_dropout + use_original_steps, + b, + index, + device, + x, + t, + model_output, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, ) return x_prev, pred_x0 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py index 0ea758ace26c..b1b046a2c5db 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py @@ -169,6 +169,13 @@ def noise_pred_fn(x, t_continuous, cond=None): ) dims = x.dim() return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) + elif model_type == "v": + alpha_t, sigma_t = ( + noise_schedule.marginal_alpha(t_continuous), + noise_schedule.marginal_std(t_continuous), + ) + dims = x.dim() + return expand_dims(alpha_t, dims) * output + expand_dims(sigma_t, dims) * x def cond_grad_fn(x, t_input): """ @@ -204,7 +211,7 @@ def model_fn(x, t_continuous): noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) return noise_uncond + guidance_scale * (noise - noise_uncond) - assert model_type in ["noise", "x_start"] + assert model_type in ["noise", "x_start", "v"] assert guidance_type in ["uncond", "classifier", "classifier-free"] return model_fn diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py index 165c27a3f924..1602ec6245d4 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py @@ -48,7 +48,7 @@ def p_sampling_fn( t_next=None, ): b, *_, device = *x.shape, x.device - e_t = self._get_model_output( + e_t, model_output = self._get_model_output( x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs ) if len(old_eps) == 0: @@ -59,13 +59,15 @@ def p_sampling_fn( index, device, x, + t, + model_output, e_t, quantize_denoised, repeat_noise, temperature, noise_dropout, ) - e_t_next = self._get_model_output( + e_t_next, model_output = self._get_model_output( x_prev, t_next, unconditional_conditioning, @@ -91,6 +93,8 @@ def p_sampling_fn( index, device, x, + t, + model_output, e_t_prime, quantize_denoised, repeat_noise, diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py index d53c6dee0562..1da34e16508b 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py @@ -20,6 +20,8 @@ from .dpmsolver import DPMSolver, NoiseScheduleVP, model_wrapper +MODEL_TYPES = {"eps": "noise", "v": "v"} + class DPMSolverSampler(AbstractBaseSampler): def __init__(self, model, **kwargs): @@ -61,7 +63,7 @@ def dpm_sampling_fn( model_fn = model_wrapper( lambda x, t, c: self.model.apply_model(x, t, c), ns, - model_type="noise", + model_type=MODEL_TYPES[self.model.parameterization], guidance_type="classifier-free", condition=conditioning, unconditional_condition=unconditional_conditioning, From 117ad731ad73226077812be037411802bdf1a564 Mon Sep 17 00:00:00 2001 From: aot Date: Mon, 3 Jul 2023 09:34:45 -0700 Subject: [PATCH 149/512] Fixed export issue --- examples/multimodal/generative/imagen/conf/export.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/multimodal/generative/imagen/conf/export.yaml b/examples/multimodal/generative/imagen/conf/export.yaml index 9d0ddf462cf0..fa77478390a7 100644 --- a/examples/multimodal/generative/imagen/conf/export.yaml +++ b/examples/multimodal/generative/imagen/conf/export.yaml @@ -3,20 +3,22 @@ model_name: null # Avaliable model_name defined in pretrained_models.yaml run_ema_model: True # Whether load the reg/ema model when using pretrained models customized_model: # Mutually exclusive with model_name # base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-200k.ckpt # Either .ckpt or .nemo is accepatable - base_ckpt: /lm/data/nemo/imagen/edm-fused-1150k-ema.nemo + base_ckpt: /lm/data/nemo/imagen/base.nemo base_cfg: # Must provided if loading .ckpt checkpoint - sr256_ckpt: /lm/data/nemo/imagen/sr-noise-aug-280k.nemo + sr256_ckpt: /lm/data/nemo/imagen/sr256.nemo sr256_cfg: + sr1024_ckpt: /lm/data/nemo/imagen/sr1024.nemo + sr1024_cfg: target_resolution: 256 # in [64, 256, 1024] inference_precision: 16 # [16, 32, AMP] thresholding_method: dynamic -output_path: 'output/edm' # Save location +output_path: 'output/export' # Save location record_time: True # Whether to record inference time meta encoder_path: '/nemo/data/encoders' # Set to null if you wish to download encoders on the fly samplings: - # Base64 - step: 250 + step: 30 cfg: 7.5 - # SR256 step: 20 From 5a6e1862471b3cf981087f5a916f82c813668826 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 3 Jul 2023 10:14:18 -0700 Subject: [PATCH 150/512] Fix for instruct p2p and reformat --- .../models/stable_diffusion/ldm/ddpm.py | 281 +++++++++--------- .../stable_diffusion/encoders/modules.py | 93 +++--- 2 files changed, 191 insertions(+), 183 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 7fa4df0aa4f1..82c2a4287c4c 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools -from contextlib import contextmanager -from functools import partial -from typing import Any, Dict, Optional, Union - import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn +from contextlib import contextmanager from einops import rearrange, repeat +from functools import partial from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.accelerators import CPUAccelerator @@ -30,6 +28,7 @@ from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm +from typing import Any, Dict, Optional, Union from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import ( build_train_valid_datasets, @@ -126,10 +125,14 @@ def __init__(self, cfg): self.first_stage_key = cfg.first_stage_key self.image_size = cfg.image_size # try conv? self.channels = cfg.channels - self.channels_last = cfg.channels_last + self.channels_last = cfg.get("channels_last", False) self.use_positional_encodings = cfg.use_positional_encodings self.model = DiffusionWrapper( - cfg.unet_config, cfg.conditioning_key, cfg.inductor, cfg.inductor_cudagraphs, cfg.capture_cudagraph_iters + cfg.unet_config, + cfg.conditioning_key, + cfg.inductor, + cfg.inductor_cudagraphs, + cfg.get("capture_cudagraph_iters", -1), ) self.model_type = None count_params(self.model, verbose=True) @@ -154,16 +157,16 @@ def __init__(self, cfg): if self.learn_logvar: self.logvar = nn.Parameter(self.logvar, requires_grad=True) - self.rng = torch.Generator(device=torch.cuda.current_device(),) + self.rng = torch.Generator(device=torch.cuda.current_device(), ) def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, ): if exists(given_betas): betas = given_betas @@ -196,7 +199,7 @@ def register_schedule( # calculations for posterior q(x_{t-1} | x_t, x_0) posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( - 1.0 - alphas_cumprod + 1.0 - alphas_cumprod ) + self.v_posterior * betas # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.register_buffer('posterior_variance', to_torch(posterior_variance)) @@ -211,7 +214,7 @@ def register_schedule( if self.parameterization == "eps": lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) ) elif self.parameterization == "x0": lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) @@ -272,26 +275,26 @@ def q_mean_variance(self, x_start, t): def predict_start_from_noise(self, x_t, t, noise): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise ) def predict_start_from_z_and_v(self, x_t, t, v): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v ) def predict_eps_from_z_and_v(self, x_t, t, v): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t ) def q_posterior(self, x_start, x_t, t): posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start - + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t ) posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) @@ -345,14 +348,14 @@ def sample(self, batch_size=16, return_intermediates=False): def q_sample(self, x_start, t, noise=None): noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise ) def get_v(self, x, noise, t): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x + extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x ) def get_loss(self, pred, target, mean=True): @@ -515,7 +518,7 @@ def __init__(self, cfg): self.first_stage_model = self.first_stage_model.to(memory_format=torch.channels_last) self.model = self.model.to(memory_format=torch.channels_last) - def make_cond_schedule(self,): + def make_cond_schedule(self, ): self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() self.cond_ids[: self.num_timesteps_cond] = ids @@ -533,13 +536,13 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): logging.info("### USING STD-RESCALING ###") def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, ): super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) @@ -708,14 +711,14 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once @torch.no_grad() def get_input( - self, - batch, - k, - return_first_stage_outputs=False, - force_c_encode=False, - cond_key=None, - return_original_cond=False, - bs=None, + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, ): if self.first_stage_key.endswith('encoded'): gaussian_parameters = batch[self.first_stage_key] @@ -992,8 +995,8 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] if ( - self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] - and self.model.conditioning_key + self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] + and self.model.conditioning_key ): # todo check for completeness c_key = next(iter(cond.keys())) # get key c = next(iter(cond.values())) # get value @@ -1007,7 +1010,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): elif self.cond_stage_key == 'coordinates_bbox': assert ( - 'original_image_size' in self.split_input_params + 'original_image_size' in self.split_input_params ), 'BoudingBoxRescaling is missing original_image_size' # assuming padding of unfold is always 0 and its dilation is always 1 @@ -1081,7 +1084,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): def _predict_eps_from_xstart(self, x_t, t, pred_xstart): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) def _prior_bpd(self, x_start): @@ -1136,16 +1139,16 @@ def p_losses(self, x_start, cond, t, noise=None): return loss, loss_dict def p_mean_variance( - self, - x, - c, - t, - clip_denoised: bool, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - score_corrector=None, - corrector_kwargs=None, + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, ): t_in = t model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) @@ -1178,19 +1181,19 @@ def p_mean_variance( @torch.no_grad() def p_sample( - self, - x, - c, - t, - clip_denoised=False, - repeat_noise=False, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, ): b, *_, device = *x.shape, x.device outputs = self.p_mean_variance( @@ -1227,23 +1230,23 @@ def p_sample( @torch.no_grad() def progressive_denoising( - self, - cond, - shape, - verbose=True, - callback=None, - quantize_denoised=False, - img_callback=None, - mask=None, - x0=None, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - batch_size=None, - x_T=None, - start_T=None, - log_every_t=None, + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, ): if not log_every_t: log_every_t = self.log_every_t @@ -1313,20 +1316,20 @@ def progressive_denoising( @torch.no_grad() def p_sample_loop( - self, - cond, - shape, - return_intermediates=False, - x_T=None, - verbose=True, - callback=None, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - img_callback=None, - start_T=None, - log_every_t=None, + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, ): if not log_every_t: @@ -1379,18 +1382,18 @@ def p_sample_loop( @torch.no_grad() def sample( - self, - cond, - batch_size=16, - return_intermediates=False, - x_T=None, - verbose=True, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - shape=None, - **kwargs, + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, ): if shape is None: shape = (batch_size, self.channels, self.image_size, self.image_size) @@ -1431,20 +1434,20 @@ def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): @torch.no_grad() def log_images( - self, - batch, - N=8, - n_row=4, - sample=True, - ddim_steps=200, - ddim_eta=1.0, - return_keys=None, - quantize_denoised=True, - inpaint=True, - plot_denoise_rows=False, - plot_progressive_rows=True, - plot_diffusion_rows=True, - **kwargs, + self, + batch, + N=8, + n_row=4, + sample=True, + ddim_steps=200, + ddim_eta=1.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=True, + **kwargs, ): use_ddim = ddim_steps is not None @@ -1509,9 +1512,9 @@ def log_images( log["denoise_row"] = denoise_grid if ( - quantize_denoised - and not isinstance(self.first_stage_model, AutoencoderKL) - and not isinstance(self.first_stage_model, IdentityFirstStage) + quantize_denoised + and not isinstance(self.first_stage_model, AutoencoderKL) + and not isinstance(self.first_stage_model, IdentityFirstStage) ): # also display when quantizing x0 while sampling with self.ema_scope("Plotting Quantized Denoised"): @@ -1533,7 +1536,7 @@ def log_images( b, h, w = z.shape[0], z.shape[2], z.shape[3] mask = torch.ones(N, h, w) # zeros will be filled in - mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 + mask[:, h // 4: 3 * h // 4, w // 4: 3 * w // 4] = 0.0 mask = mask[:, None, ...] with self.ema_scope("Plotting Inpaint"): samples, _ = self.sample_log( @@ -1773,7 +1776,7 @@ def process_batch(batch): # SD has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): x, c = self.model.get_input(batch, self.cfg.first_stage_key) @@ -1981,12 +1984,12 @@ def parameters(self): class DiffusionWrapper(pl.LightningModule, Serialization): def __init__( - self, - diff_model_config, - conditioning_key, - inductor: bool = False, - inductor_cudagraphs: bool = False, - capture_cudagraph_iters: int = -1, + self, + diff_model_config, + conditioning_key, + inductor: bool = False, + inductor_cudagraphs: bool = False, + capture_cudagraph_iters: int = -1, ): super().__init__() self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 02fbef2b0f0f..970c4ee4800e 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -import tempfile -from functools import partial - import kornia import open_clip +import os +import tempfile import torch import torch.nn as nn from einops import rearrange, repeat +from functools import partial from omegaconf import OmegaConf from torch.utils.checkpoint import checkpoint from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -27,10 +26,10 @@ from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPModel +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test ) -from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector @@ -119,14 +118,14 @@ class BERTEmbedder(AbstractEncoder): """Uses the BERT tokenizr model and add some transformer encoder layers""" def __init__( - self, - n_embed, - n_layer, - vocab_size=30522, - max_seq_len=77, - device="cuda", - use_tokenizer=True, - embedding_dropout=0.0, + self, + n_embed, + n_layer, + vocab_size=30522, + max_seq_len=77, + device="cuda", + use_tokenizer=True, + embedding_dropout=0.0, ): super().__init__() self.use_tknz_fn = use_tokenizer @@ -182,7 +181,8 @@ class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" def __init__( - self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, capture_cudagraph_iters: int = -1 + self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, + capture_cudagraph_iters: int = -1 ): super().__init__() self.tokenizer = CLIPTokenizer.from_pretrained(version) @@ -214,28 +214,33 @@ def forward(self, text): padding="max_length", return_tensors="pt", ) - if self.static_tokens is None: - self.static_tokens = batch_encoding["input_ids"].to(device=self.device, non_blocking=True) - self.static_tokens.copy_(batch_encoding["input_ids"], non_blocking=True) - - if self.iterations == self.capture_cudagraph_iters: - # cuda graph capture - logging.info("Capturing CUDA graph for module: %s", self.transformer.__class__.__name__) - with torch.cuda.graph(self.transformer_graph): - self.static_outputs = self.transformer(input_ids=self.static_tokens) - - if 0 <= self.capture_cudagraph_iters <= self.iterations: - # cuda graph replay - self.transformer_graph.replay() - else: - # warmup - self.stream.wait_stream(torch.cuda.current_stream()) - with torch.cuda.stream(self.stream): - self.static_outputs = self.transformer(input_ids=self.static_tokens) - torch.cuda.current_stream().wait_stream(self.stream) - self.iterations += 1 + if self.capture_cudagraph_iters < 0: + tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) + outputs = self.transformer(input_ids=tokens) + z = outputs.last_hidden_state - z = self.static_outputs.last_hidden_state + else: + if self.static_tokens is None: + self.static_tokens = batch_encoding["input_ids"].to(device=self.device, non_blocking=True) + self.static_tokens.copy_(batch_encoding["input_ids"], non_blocking=True) + + if self.iterations == self.capture_cudagraph_iters: + # cuda graph capture + logging.info("Capturing CUDA graph for module: %s", self.transformer.__class__.__name__) + with torch.cuda.graph(self.transformer_graph): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + + if 0 <= self.capture_cudagraph_iters <= self.iterations: + # cuda graph replay + self.transformer_graph.replay() + else: + # warmup + self.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.stream): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + torch.cuda.current_stream().wait_stream(self.stream) + self.iterations += 1 + z = self.static_outputs.last_hidden_state # # Pad the seq length to multiple of 8 seq_len = (z.shape[1] + 8 - 1) // 8 * 8 @@ -258,14 +263,14 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder): ] def __init__( - self, - arch="ViT-H-14", - version="laion2b_s32b_b79k", - device="cuda", - max_length=77, - freeze=True, - layer="last", - use_fp16=False, + self, + arch="ViT-H-14", + version="laion2b_s32b_b79k", + device="cuda", + max_length=77, + freeze=True, + layer="last", + use_fp16=False, ): super().__init__() assert layer in self.LAYERS @@ -379,7 +384,7 @@ def build_tokenizer(self, cfg): legacy=legacy, ) - _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) + _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False, ) self.max_length = cfg.text.get("max_position_embeddings") def load_model(self, cfg, state_dict): From d2dfabd742cbbad684f7d06ba2a1e23baebbaa64 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 5 Jul 2023 11:23:41 -0700 Subject: [PATCH 151/512] Fix SD export issue --- .../dreambooth/dreambooth_export.py | 5 +- .../generative/stable_diffusion/sd_export.py | 5 +- .../models/stable_diffusion/ldm/ddpm.py | 273 +++++++++--------- .../stable_diffusion/encoders/modules.py | 46 +-- 4 files changed, 166 insertions(+), 163 deletions(-) diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py index ae2b1ed7d227..b1838b837fd3 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth_export.py +++ b/examples/multimodal/generative/dreambooth/dreambooth_export.py @@ -107,8 +107,9 @@ def __init__(self, model): self.model = model def forward(self, z): - outputs = self.model.decode(z=z) - return outputs + h = self.model.post_quant_conv(z) + dec = self.model.decoder(h) + return dec input_names = ["z"] output_names = ["logits"] diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index a7b1614f0c31..e51f57101eaf 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -111,8 +111,9 @@ def __init__(self, model): self.model = model def forward(self, z): - outputs = self.model.decode(z=z) - return outputs + h = self.model.post_quant_conv(z) + dec = self.model.decoder(h) + return dec input_names = ["z"] output_names = ["logits"] diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 82c2a4287c4c..47c02c18bf91 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools +from contextlib import contextmanager +from functools import partial +from typing import Any, Dict, Optional, Union + import numpy as np import pytorch_lightning as pl import torch import torch.nn as nn -from contextlib import contextmanager from einops import rearrange, repeat -from functools import partial from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.accelerators import CPUAccelerator @@ -28,7 +30,6 @@ from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm -from typing import Any, Dict, Optional, Union from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import ( build_train_valid_datasets, @@ -157,16 +158,16 @@ def __init__(self, cfg): if self.learn_logvar: self.logvar = nn.Parameter(self.logvar, requires_grad=True) - self.rng = torch.Generator(device=torch.cuda.current_device(), ) + self.rng = torch.Generator(device=torch.cuda.current_device(),) def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, ): if exists(given_betas): betas = given_betas @@ -199,7 +200,7 @@ def register_schedule( # calculations for posterior q(x_{t-1} | x_t, x_0) posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( - 1.0 - alphas_cumprod + 1.0 - alphas_cumprod ) + self.v_posterior * betas # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) self.register_buffer('posterior_variance', to_torch(posterior_variance)) @@ -214,7 +215,7 @@ def register_schedule( if self.parameterization == "eps": lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) ) elif self.parameterization == "x0": lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) @@ -275,26 +276,26 @@ def q_mean_variance(self, x_start, t): def predict_start_from_noise(self, x_t, t, noise): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise ) def predict_start_from_z_and_v(self, x_t, t, v): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v ) def predict_eps_from_z_and_v(self, x_t, t, v): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t ) def q_posterior(self, x_start, x_t, t): posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start - + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t ) posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) @@ -348,14 +349,14 @@ def sample(self, batch_size=16, return_intermediates=False): def q_sample(self, x_start, t, noise=None): noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise ) def get_v(self, x, noise, t): return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x + extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x ) def get_loss(self, pred, target, mean=True): @@ -518,7 +519,7 @@ def __init__(self, cfg): self.first_stage_model = self.first_stage_model.to(memory_format=torch.channels_last) self.model = self.model.to(memory_format=torch.channels_last) - def make_cond_schedule(self, ): + def make_cond_schedule(self,): self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() self.cond_ids[: self.num_timesteps_cond] = ids @@ -536,13 +537,13 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): logging.info("### USING STD-RESCALING ###") def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, ): super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) @@ -711,14 +712,14 @@ def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once @torch.no_grad() def get_input( - self, - batch, - k, - return_first_stage_outputs=False, - force_c_encode=False, - cond_key=None, - return_original_cond=False, - bs=None, + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, ): if self.first_stage_key.endswith('encoded'): gaussian_parameters = batch[self.first_stage_key] @@ -995,8 +996,8 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] if ( - self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] - and self.model.conditioning_key + self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] + and self.model.conditioning_key ): # todo check for completeness c_key = next(iter(cond.keys())) # get key c = next(iter(cond.values())) # get value @@ -1010,7 +1011,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): elif self.cond_stage_key == 'coordinates_bbox': assert ( - 'original_image_size' in self.split_input_params + 'original_image_size' in self.split_input_params ), 'BoudingBoxRescaling is missing original_image_size' # assuming padding of unfold is always 0 and its dilation is always 1 @@ -1084,7 +1085,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False): def _predict_eps_from_xstart(self, x_t, t, pred_xstart): return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) def _prior_bpd(self, x_start): @@ -1139,16 +1140,16 @@ def p_losses(self, x_start, cond, t, noise=None): return loss, loss_dict def p_mean_variance( - self, - x, - c, - t, - clip_denoised: bool, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - score_corrector=None, - corrector_kwargs=None, + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, ): t_in = t model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) @@ -1181,19 +1182,19 @@ def p_mean_variance( @torch.no_grad() def p_sample( - self, - x, - c, - t, - clip_denoised=False, - repeat_noise=False, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, ): b, *_, device = *x.shape, x.device outputs = self.p_mean_variance( @@ -1230,23 +1231,23 @@ def p_sample( @torch.no_grad() def progressive_denoising( - self, - cond, - shape, - verbose=True, - callback=None, - quantize_denoised=False, - img_callback=None, - mask=None, - x0=None, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - batch_size=None, - x_T=None, - start_T=None, - log_every_t=None, + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, ): if not log_every_t: log_every_t = self.log_every_t @@ -1316,20 +1317,20 @@ def progressive_denoising( @torch.no_grad() def p_sample_loop( - self, - cond, - shape, - return_intermediates=False, - x_T=None, - verbose=True, - callback=None, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - img_callback=None, - start_T=None, - log_every_t=None, + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, ): if not log_every_t: @@ -1382,18 +1383,18 @@ def p_sample_loop( @torch.no_grad() def sample( - self, - cond, - batch_size=16, - return_intermediates=False, - x_T=None, - verbose=True, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - shape=None, - **kwargs, + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, ): if shape is None: shape = (batch_size, self.channels, self.image_size, self.image_size) @@ -1434,20 +1435,20 @@ def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): @torch.no_grad() def log_images( - self, - batch, - N=8, - n_row=4, - sample=True, - ddim_steps=200, - ddim_eta=1.0, - return_keys=None, - quantize_denoised=True, - inpaint=True, - plot_denoise_rows=False, - plot_progressive_rows=True, - plot_diffusion_rows=True, - **kwargs, + self, + batch, + N=8, + n_row=4, + sample=True, + ddim_steps=200, + ddim_eta=1.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=True, + **kwargs, ): use_ddim = ddim_steps is not None @@ -1512,9 +1513,9 @@ def log_images( log["denoise_row"] = denoise_grid if ( - quantize_denoised - and not isinstance(self.first_stage_model, AutoencoderKL) - and not isinstance(self.first_stage_model, IdentityFirstStage) + quantize_denoised + and not isinstance(self.first_stage_model, AutoencoderKL) + and not isinstance(self.first_stage_model, IdentityFirstStage) ): # also display when quantizing x0 while sampling with self.ema_scope("Plotting Quantized Denoised"): @@ -1536,7 +1537,7 @@ def log_images( b, h, w = z.shape[0], z.shape[2], z.shape[3] mask = torch.ones(N, h, w) # zeros will be filled in - mask[:, h // 4: 3 * h // 4, w // 4: 3 * w // 4] = 0.0 + mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 mask = mask[:, None, ...] with self.ema_scope("Plotting Inpaint"): samples, _ = self.sample_log( @@ -1776,7 +1777,7 @@ def process_batch(batch): # SD has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, ): x, c = self.model.get_input(batch, self.cfg.first_stage_key) @@ -1984,12 +1985,12 @@ def parameters(self): class DiffusionWrapper(pl.LightningModule, Serialization): def __init__( - self, - diff_model_config, - conditioning_key, - inductor: bool = False, - inductor_cudagraphs: bool = False, - capture_cudagraph_iters: int = -1, + self, + diff_model_config, + conditioning_key, + inductor: bool = False, + inductor_cudagraphs: bool = False, + capture_cudagraph_iters: int = -1, ): super().__init__() self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index 970c4ee4800e..e4f12d68d371 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -11,14 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import kornia -import open_clip import os import tempfile +from functools import partial + +import kornia +import open_clip import torch import torch.nn as nn from einops import rearrange, repeat -from functools import partial from omegaconf import OmegaConf from torch.utils.checkpoint import checkpoint from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -26,10 +27,10 @@ from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPModel -from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test ) +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector @@ -118,14 +119,14 @@ class BERTEmbedder(AbstractEncoder): """Uses the BERT tokenizr model and add some transformer encoder layers""" def __init__( - self, - n_embed, - n_layer, - vocab_size=30522, - max_seq_len=77, - device="cuda", - use_tokenizer=True, - embedding_dropout=0.0, + self, + n_embed, + n_layer, + vocab_size=30522, + max_seq_len=77, + device="cuda", + use_tokenizer=True, + embedding_dropout=0.0, ): super().__init__() self.use_tknz_fn = use_tokenizer @@ -181,8 +182,7 @@ class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" def __init__( - self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, - capture_cudagraph_iters: int = -1 + self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, capture_cudagraph_iters: int = -1 ): super().__init__() self.tokenizer = CLIPTokenizer.from_pretrained(version) @@ -263,14 +263,14 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder): ] def __init__( - self, - arch="ViT-H-14", - version="laion2b_s32b_b79k", - device="cuda", - max_length=77, - freeze=True, - layer="last", - use_fp16=False, + self, + arch="ViT-H-14", + version="laion2b_s32b_b79k", + device="cuda", + max_length=77, + freeze=True, + layer="last", + use_fp16=False, ): super().__init__() assert layer in self.LAYERS @@ -384,7 +384,7 @@ def build_tokenizer(self, cfg): legacy=legacy, ) - _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False, ) + _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) self.max_length = cfg.text.get("max_position_embeddings") def load_model(self, cfg, state_dict): From 30879a51b284c5f3aadd506611f892456b7a94aa Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 5 Jul 2023 16:04:59 -0700 Subject: [PATCH 152/512] Add nemo clip export for DB --- .../dreambooth/dreambooth_export.py | 71 +++++++++++++++---- .../generative/stable_diffusion/sd_export.py | 2 + 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py index b1838b837fd3..ae1c85edfbb5 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth_export.py +++ b/examples/multimodal/generative/dreambooth/dreambooth_export.py @@ -14,14 +14,18 @@ import gc import os import time +from typing import Dict, List, Optional import torch import torch.nn as nn from omegaconf.omegaconf import OmegaConf, open_dict from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion +from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.classes.exportable import Exportable from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType from nemo.utils.trt_utils import build_engine @@ -138,21 +142,60 @@ def forward(self, input_ids): outputs = self.model(input_ids=input_ids) return outputs.last_hidden_state - input_names = ["tokens"] - output_names = ["logits"] + class OpenCLIPWrapper(nn.Module, Exportable): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model.encode_with_transformer(input_ids) + return outputs + + def input_example(self, max_text=64): + sample = next(self.parameters()) + tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('H', 'D'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'H'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) + + if openai_clip: + input_names = ["tokens"] + output_names = ["logits"] + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) + else: + clip_model = OpenCLIPWrapper(model.cond_stage_model) + clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") + input_profile_clip = {} input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index e51f57101eaf..ec47f9b13bdd 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -46,6 +46,8 @@ def model_cfg_modifier(model_cfg): model_cfg.ckpt_path = None model_cfg.inductor = False model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier From 877519eeb384affd117d8a35495aeae612d6a699 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 5 Jul 2023 18:30:03 -0700 Subject: [PATCH 153/512] Fix ins pix2pix --- .../instruct_pix2pix/sd_edit_export.py | 76 +++++++++++++++---- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py index 6125f5e32723..8c83f213edaf 100644 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py @@ -17,6 +17,7 @@ import random import sys from argparse import ArgumentParser +from typing import Dict, List, Optional import einops import numpy as np @@ -28,9 +29,12 @@ from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser +from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.classes.exportable import Exportable from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType from nemo.utils import logging from nemo.utils.trt_utils import build_engine @@ -186,8 +190,9 @@ def __init__(self, model): self.model = model def forward(self, z): - outputs = self.model.decode(z=z) - return outputs + h = self.model.post_quant_conv(z) + dec = self.model.decoder(h) + return dec input_names = ["z"] output_names = ["logits"] @@ -218,21 +223,60 @@ def forward(self, input_ids): outputs = self.model(input_ids=input_ids) return outputs.last_hidden_state - input_names = ["tokens"] - output_names = ["logits"] + class OpenCLIPWrapper(nn.Module, Exportable): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_ids): + outputs = self.model.encode_with_transformer(input_ids) + return outputs + + def input_example(self, max_text=64): + sample = next(self.parameters()) + tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('H', 'D'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'H'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) + + if openai_clip: + input_names = ["tokens"] + output_names = ["logits"] + torch.onnx.export( + CLIPWrapper(model.cond_stage_model.transformer), + (tokens,), + f"{output_dir}/onnx/clip/clip.onnx", + verbose=False, + input_names=input_names, + output_names=output_names, + dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, + opset_version=17, + do_constant_folding=True, + export_params=True, + ) + else: + clip_model = OpenCLIPWrapper(model.cond_stage_model) + clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") + input_profile_clip = {} input_profile_clip["tokens"] = [(1, *(tokens.shape[1:]))] * 3 deployment_conf.clip.tokens = input_profile_clip["tokens"][0] From 6ebb8be1cbca28b3dcc8ed0e4a1d4d811672698e Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 5 Jul 2023 20:24:50 -0700 Subject: [PATCH 154/512] fix sd2 config --- .../multimodal/generative/stable_diffusion/conf/sd2_train.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml index fd579ef19487..47a1ded9fbb2 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml @@ -107,7 +107,7 @@ model: - 2 - 4 - 4 - num_heads: 8 + num_head_channels: 64 use_spatial_transformer: true use_linear_in_transformer: true transformer_depth: 1 From e3777e7b1e3cb53424aa06ede32188b2bcb98f66 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Thu, 13 Jul 2023 11:31:34 -0700 Subject: [PATCH 155/512] [Mingyuan Ma] BF16 and SD conversion script --- .../generative/convert_hf_ckpt_to_nemo.py | 165 ++++++++++++++++++ .../stable_diffusion/ldm/autoencoder.py | 13 ++ .../models/stable_diffusion/ldm/ddpm.py | 3 + .../diffusionmodules/openaimodel.py | 10 +- 4 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 examples/multimodal/generative/convert_hf_ckpt_to_nemo.py diff --git a/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py b/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py new file mode 100644 index 000000000000..8391e3cb1580 --- /dev/null +++ b/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py @@ -0,0 +1,165 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage example: + python /opt/NeMo/examples/multimodal/generative/stable_diffusion/convert_hf_ckpt_to_nemo.py + --ckpt_path=path/to/hf.ckpt + --hparams_file=path/to/saved.yaml + --nemo_file_path=hf2sd.nemo + +Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. +""" + +import os +from argparse import ArgumentParser + +import torch +from omegaconf import OmegaConf +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer +from pytorch_lightning.utilities.cloud_io import load as pl_load + +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--ckpt_path", type=str, default=None, required=True, help="Path to checkpoint.") + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + parser.add_argument("--model_type", type=str, required=False, default="stable_diffusion") + + args = parser.parse_args() + return args + + +def mapping_hf_state_dict(hf_state_dict, model): + nemo_state = model.state_dict() + new_state_dict = {} + for k, v in hf_state_dict.items(): + k = 'model.' + k + # This is not necessary when you turn off model.inductor in config file + # if 'diffusion_model' in k: + # k = k.replace('diffusion_model', 'diffusion_model._orig_mod') + if 'in_layers' in k or 'out_layers' in k: + s = k.split('.') + idx = int(s[-2]) + if idx != 0: + k = ".".join(s[:-2] + [str(int(idx - 1))] + [s[-1]]) + if k in nemo_state: + new_state_dict[k] = v + + return new_state_dict + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + checkpoint = pl_load(args.ckpt_path, map_location='cpu') + if 'state_dict' in checkpoint.keys(): + checkpoint = checkpoint['state_dict'] + cfg = OmegaConf.load(args.hparams_file) + if args.model_type == 'stable_diffusion': + model = MegatronLatentDiffusion(cfg.model, trainer) + elif args.model_type == 'controlnet': + model = MegatronControlNet(cfg.model, trainer) + + state_dict = mapping_hf_state_dict(checkpoint, model) + + model.load_state_dict(state_dict) + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py index 7443dc014f94..241d2ae46ef0 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -417,6 +417,19 @@ def _find_mismatched_keys( del state_dict[checkpoint_key] return mismatched_keys + if state_dict['encoder.mid.attn_1.q.weight'].shape == torch.Size([512, 512]): + for key in [ + 'encoder.mid.attn_1.q.weight', + 'decoder.mid.attn_1.q.weight', + 'encoder.mid.attn_1.v.weight', + 'decoder.mid.attn_1.v.weight', + 'encoder.mid.attn_1.k.weight', + 'decoder.mid.attn_1.k.weight', + 'encoder.mid.attn_1.proj_out.weight', + 'decoder.mid.attn_1.proj_out.weight', + ]: + state_dict[key] = state_dict[key].unsqueeze(2).unsqueeze(3) + if state_dict is not None: # Whole checkpoint mismatched_keys = _find_mismatched_keys( diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 47c02c18bf91..96f7ab659fe8 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -491,6 +491,7 @@ def __init__(self, cfg): ignore_keys = cfg.ignore_keys cfg.conditioning_key = conditioning_key super().__init__(cfg=cfg) + self.precision = cfg.precision self.concat_mode = cfg.concat_mode self.cond_stage_trainable = cfg.cond_stage_trainable self.cond_stage_key = cfg.cond_stage_key @@ -1119,6 +1120,8 @@ def p_losses(self, x_start, cond, t, noise=None): else: raise NotImplementedError() + if (self.precision == 'bf16') or (int(self.precision) == 16): + model_output = model_output.type(torch.float32) loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) self.logvar = self.logvar.cuda(non_blocking=True) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index bb0bc5f05cfe..6fdc59c3e421 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -719,12 +719,16 @@ def __init__( if from_pretrained is not None: if from_NeMo: state_dict = torch.load(from_pretrained, map_location='cpu') - self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) + missing_key, _, _, _ = self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) else: state_dict = load_state_dict(from_pretrained) if 'state_dict' in state_dict.keys(): state_dict = state_dict['state_dict'] - self._load_pretrained_model(state_dict) + missing_key, _, _, _ = self._load_pretrained_model(state_dict) + if len(missing_key) > 0: + print( + 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' + ) def _input_blocks_mapping(self, input_dict): res_dict = {} @@ -916,6 +920,8 @@ def _strip_unet_key_prefix(self, state_dict): re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ if key_.startswith('model.model._orig_mod.diffusion_model.'): re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model.model.diffusion_model._orig_mod.'): + re_state_dict[key_.replace('model.model.diffusion_model._orig_mod.', '')] = value_ return re_state_dict def _load_state_dict_into_model(self, state_dict): From 1b672ce17e8d0a4cb7fd33197eb74560f2634833 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Wed, 19 Jul 2023 09:22:29 -0700 Subject: [PATCH 156/512] [Imagen] NHWC Feature --- .../generative/imagen/conf/base64-2b.yaml | 2 ++ .../generative/imagen/conf/base64-500m.yaml | 3 ++- .../generative/imagen/conf/sr1024-600m.yaml | 3 ++- .../generative/imagen/conf/sr256-400m.yaml | 3 ++- .../generative/imagen/conf/sr256-600m.yaml | 3 ++- .../multimodal/models/imagen/imagen.py | 20 ++++++++++++++++++- .../modules/imagen/diffusionmodules/layers.py | 13 +++++++++++- 7 files changed, 41 insertions(+), 6 deletions(-) diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml index a5c643533eec..2005ec8daee2 100644 --- a/examples/multimodal/generative/imagen/conf/base64-2b.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-2b.yaml @@ -55,6 +55,8 @@ model: inductor: False inductor_cudagraphs: False unet_type: base + channels_last: True + unet: embed_dim: 512 image_size: 64 diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml index 9c2b8436c572..2cd20ec75e62 100644 --- a/examples/multimodal/generative/imagen/conf/base64-500m.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -55,7 +55,8 @@ model: inductor: False inductor_cudagraphs: False unet_type: base - + channels_last: True + unet: embed_dim: 256 image_size: 64 diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml index 974aba83c076..92a4c918e0a2 100644 --- a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml @@ -53,8 +53,9 @@ model: global_batch_size: 64 # will use more micro batches to reach global batch size inductor: False inductor_cudagraphs: False - unet_type: sr + channels_last: True + unet: embed_dim: 128 image_size: 1024 diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml index ce0dc88f2abc..e8be690cf6ae 100644 --- a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml @@ -55,7 +55,8 @@ model: global_batch_size: 16 # will use more micro batches to reach global batch size inductor: False inductor_cudagraphs: False - + channels_last: True + unet_type: sr-unet unet: embed_dim: 128 diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml index 59141a595cd1..cf615d767b12 100644 --- a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml @@ -55,7 +55,8 @@ model: global_batch_size: 64 # will use more micro batches to reach global batch size inductor: False inductor_cudagraphs: False - + channels_last: True + unet_type: sr unet: embed_dim: 128 diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py index 076e5a96971c..48c395d6bece 100644 --- a/nemo/collections/multimodal/models/imagen/imagen.py +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -49,9 +49,16 @@ HAVE_MEGATRON_CORE = True except (ImportError, ModuleNotFoundError): - HAVE_MEGATRON_CORE = False +try: + from group_norm import GroupNormOpt + + OPT_GROUP_NORM = True +except Exception: + print('Fused optimized group norm has not been installed.') + OPT_GROUP_NORM = False + DUMMY_TENSOR = torch.tensor([1.0]) @@ -76,6 +83,12 @@ def __init__(self, cfg): else: raise NotImplemented(f'{self.unet_type} UNet is not implemented.') + self.channels_last = cfg.get('channels_last', False) + if self.channels_last: + assert OPT_GROUP_NORM, 'Training in channels last format requires optmized group norm implementation.' + logging.info('Training in torch channels last format.') + unet = unet.to(memory_format=torch.channels_last) + # Preconditioning self.preconditioning_type = cfg.get('preconditioning_type', 'DDPM') if self.preconditioning_type == 'DDPM': @@ -119,6 +132,11 @@ def forward(self, x_start, text_embed, text_mask, x_lowres=None): else: assert x_lowres[0].dim() not in [0, 1], 'SR model should have low-resolution conditioning' + if self.channels_last: + x_start = x_start.to(memory_format=torch.channels_last) + if x_lowres is not None: + x_lowres = x_lowres.to(memory_format=torch.channels_last) + # Apply random dropout to text embedding text_embed = random_dropout(text_embed, drop_rate=self.text_drop_rate) # UNet Forward Pass diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py index 3d9cac44e820..a5cb19444057 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py @@ -44,6 +44,14 @@ import torch.nn as nn import torch.nn.functional as F +try: + from group_norm import GroupNormOpt + + OPT_GROUP_NORM = True +except Exception: + print('Fused optimized group norm has not been installed.') + OPT_GROUP_NORM = False + def conv_nd(dims, *args, **kwargs): """ @@ -116,13 +124,16 @@ def mean_flat(tensor): return tensor.mean(dim=list(range(1, len(tensor.shape)))) -def normalization(channels): +def normalization(channels, act=""): """ Make a standard normalization layer. :param channels: number of input channels. :return: an nn.Module for normalization. """ + if OPT_GROUP_NORM: + return GroupNormOpt(32, channels, act=act) + return nn.GroupNorm(32, channels) From 47f442b914145fdd2d2be1d3526128f71365df26 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 19 Jul 2023 11:25:05 -0700 Subject: [PATCH 157/512] Fix .nemo loading issue for NeMo CLIP in SD --- .../models/stable_diffusion/ldm/ddpm.py | 10 +++++++++ .../stable_diffusion/encoders/modules.py | 22 +++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 96f7ab659fe8..686de294fe7d 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -1985,6 +1985,16 @@ def parameters(self): else: return self.model.parameters() + def save_to(self, save_path: str): + # Replace .nemo path in config for NeMo CLIP + cfg = self._cfg + if cfg.get('cond_stage_config').get('restore_from_path'): + with open_dict(cfg): + cfg.cond_stage_config.restore_from_path = None + cfg.cond_stage_config.cfg = self.model.cond_stage_model.cfg + self._cfg = cfg + super().save_to(save_path) + class DiffusionWrapper(pl.LightningModule, Serialization): def __init__( diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py index e4f12d68d371..fe27a2a639ca 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -324,9 +324,16 @@ def encode(self, text): class FrozenMegatronCLIPEmbedder(AbstractEncoder): - def __init__(self, restore_from_path, device="cuda", layer="last", freeze=True, use_fp16=False): + def __init__(self, restore_from_path, device="cuda", layer="last", freeze=True, cfg=None, use_fp16=False): super().__init__() - cfg, state_dict = self.load_config_and_state_from_nemo(restore_from_path) + if restore_from_path is not None: + cfg, state_dict = self.load_config_and_state_from_nemo(restore_from_path) + elif cfg is not None: + state_dict = None + else: + raise ValueError("Either restore_from_path or cfg should not be None") + + self.cfg = cfg self.build_tokenizer(cfg) self.load_model(cfg, state_dict) @@ -400,11 +407,12 @@ def load_model(self, cfg, state_dict): post_process=cfg.text.post_process, ) - clip_state_dict = {} - for key, value in state_dict.items(): - key = key[6:] - clip_state_dict[key] = value - model.load_state_dict(clip_state_dict) + if state_dict is not None: + clip_state_dict = {} + for key, value in state_dict.items(): + key = key[6:] + clip_state_dict[key] = value + model.load_state_dict(clip_state_dict) del model.vision_encoder self.model = model.text_encoder From 6cf322b1af02d5629b63de892141395ad9f81b51 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Wed, 2 Aug 2023 11:27:05 -0700 Subject: [PATCH 158/512] NeMo r1.20.0 Multimodal Merge --- .github/workflows/codeql.yml | 1 + .github/workflows/config/codeql.yml | 9 + .github/workflows/import-test.yml | 62 +- Dockerfile | 13 +- Jenkinsfile | 1274 ++++++++---- README.rst | 96 +- docs/source/_static/css/custom.css | 2 +- docs/source/asr/configs.rst | 39 + docs/source/asr/data/benchmark_en.csv | 10 +- .../asr/data/scores/en/conformer_en.csv | 14 + docs/source/asr/datasets.rst | 25 +- docs/source/asr/models.rst | 46 +- .../data/speaker_results.csv | 1 + docs/source/conf.py | 5 +- docs/source/core/core.rst | 2 +- docs/source/core/export.rst | 31 + docs/source/index.rst | 1 + docs/source/nlp/api.rst | 199 +- docs/source/nlp/megatron_onnx_export.rst | 47 + docs/source/nlp/models.rst | 1 + docs/source/nlp/nlp_all.bib | 9 + .../nlp/spellchecking_asr_customization.rst | 128 ++ docs/source/starthere/intro.rst | 4 +- docs/source/starthere/tutorials.rst | 26 +- docs/source/tools/comparison_tool.rst | 31 +- docs/source/tools/images/scr_10.png | Bin 0 -> 42125 bytes docs/source/tools/images/scr_11.png | Bin 0 -> 50431 bytes ...ech_to_text_cache_aware_streaming_infer.py | 20 +- examples/asr/asr_vad/README.md | 39 +- .../asr/asr_vad/speech_to_text_with_vad.py | 116 +- .../conformer_ctc_bpe_streaming.yaml | 8 +- .../conformer_transducer_bpe_streaming.yaml | 10 +- .../conformer_multiblank_transducer_bpe.yaml | 1 + .../conf/conformer/tdt/conformer_tdt_bpe.yaml | 281 +++ .../tdt/conformer_tdt_bpe_stateless.yaml | 278 +++ .../fastconformer_ctc_bpe_streaming.yaml | 9 +- .../fastconformer_ctc_char_streaming.yaml | 10 +- ...astconformer_transducer_bpe_streaming.yaml | 15 +- ...stconformer_transducer_char_streaming.yaml | 16 +- .../fastconformer/fast-conformer_ctc_bpe.yaml | 16 + .../fast-conformer_transducer_bpe.yaml | 16 + ...r_hybrid_transducer_ctc_bpe_streaming.yaml | 15 +- ..._hybrid_transducer_ctc_char_streaming.yaml | 15 +- .../conf/marblenet/marblenet_3x2x64_20ms.yaml | 209 ++ .../ssl/fastconformer/fast-conformer.yaml | 235 +++ .../conf/vad/frame_vad_infer_postprocess.yaml | 38 + examples/asr/speech_classification/README.md | 114 +- .../speech_classification/frame_vad_infer.py | 199 ++ .../speech_to_frame_label.py | 70 + examples/asr/speech_to_text_eval.py | 5 + examples/asr/transcribe_speech.py | 15 +- examples/asr/transcribe_speech_parallel.py | 20 +- .../conf/megatron_bert_config.yaml | 3 +- .../conf/megatron_gpt_config.yaml | 16 +- .../conf/megatron_gpt_export.yaml | 25 + .../conf/megatron_gpt_inference.yaml | 7 +- .../conf/megatron_gpt_validate_config.yaml | 22 + .../conf/megatron_model_base_config.yaml | 3 +- .../megatron_bert_pretraining.py | 5 +- .../megatron_change_num_partitions.py | 12 +- .../nlp/language_modeling/megatron_export.py | 175 ++ .../language_modeling/megatron_gpt_eval.py | 44 +- .../megatron_gpt_prompt_learning_eval.py | 16 +- .../megatron_gpt_validate.py | 155 ++ .../nlp/language_modeling/megatron_t5_eval.py | 10 +- .../conf/megatron_gpt_peft_eval_config.yaml | 5 +- .../tuning/conf/megatron_gpt_sft.yaml | 2 +- .../conf/megatron_t5_lora_inference.yaml | 36 + .../conf/megatron_t5_lora_tuning_config.yaml | 99 + .../tuning/megatron_gpt_adapter_eval.py | 15 +- .../tuning/megatron_gpt_ia3_eval.py | 15 +- .../tuning/megatron_gpt_peft_eval.py | 58 +- .../tuning/megatron_gpt_peft_tuning.py | 4 +- .../tuning/megatron_gpt_sft.py | 11 +- .../tuning/megatron_t5_lora_eval.py | 160 ++ .../tuning/megatron_t5_lora_tuning.py | 107 + .../spellchecking_asr_customization/README.md | 32 + .../checkpoint_to_nemo.py | 38 + ...pellchecking_asr_customization_config.yaml | 97 + .../convert_data_to_tarred.sh | 50 + .../create_custom_vocab_index.py | 72 + .../create_tarred_dataset.py | 99 + .../helpers.py | 86 + .../postprocess_and_update_manifest.py | 79 + .../prepare_input_from_manifest.py | 129 ++ .../run_infer.sh | 99 + .../run_training.sh | 56 + .../run_training_tarred.sh | 63 + .../spellchecking_asr_customization_infer.py | 123 ++ .../spellchecking_asr_customization_train.py | 66 + .../extract_giza_alignments.py | 215 +-- .../eval_utils/evaluator.py | 2 +- examples/tts/conf/fastpitch/fastpitch.yaml | 256 +++ .../conf/fastpitch_align_44100_adapter.yaml | 3 + examples/tts/conf/hifigan/hifigan_data.yaml | 133 ++ .../tts/conf/hifigan/sample/sample_22050.yaml | 3 + .../tts/conf/hifigan/sample/sample_44100.yaml | 3 + examples/tts/conf/text/normalizer_en.yaml | 3 + examples/tts/fastpitch_finetune_adapters.py | 12 + nemo/collections/asr/data/audio_to_label.py | 22 +- .../asr/data/audio_to_label_dataset.py | 6 +- .../asr/data/audio_to_text_dataset.py | 145 +- nemo/collections/asr/data/feature_to_label.py | 185 +- .../asr/data/feature_to_label_dataset.py | 26 +- nemo/collections/asr/data/feature_to_text.py | 91 +- .../asr/data/feature_to_text_dataset.py | 4 + nemo/collections/asr/losses/rnnt.py | 144 +- nemo/collections/asr/losses/rnnt_pytorch.py | 143 +- nemo/collections/asr/metrics/rnnt_wer.py | 115 +- nemo/collections/asr/metrics/rnnt_wer_bpe.py | 11 +- nemo/collections/asr/metrics/wer.py | 2 +- nemo/collections/asr/metrics/wer_bpe.py | 2 +- nemo/collections/asr/models/asr_model.py | 72 +- .../asr/models/classification_models.py | 16 +- .../asr/models/confidence_ensemble.py | 17 +- nemo/collections/asr/models/ctc_bpe_models.py | 16 +- nemo/collections/asr/models/ctc_models.py | 8 +- .../asr/models/hybrid_asr_tts_models.py | 2 + .../asr/models/hybrid_rnnt_ctc_bpe_models.py | 30 +- .../asr/models/hybrid_rnnt_ctc_models.py | 14 + nemo/collections/asr/models/label_models.py | 13 +- .../collections/asr/models/rnnt_bpe_models.py | 16 +- nemo/collections/asr/models/rnnt_models.py | 27 +- nemo/collections/asr/models/slu_models.py | 6 +- nemo/collections/asr/models/ssl_models.py | 15 +- .../asr/modules/audio_preprocessing.py | 2 +- .../asr/modules/conformer_encoder.py | 380 ++-- .../asr/modules/squeezeformer_encoder.py | 1 - .../asr/parts/k2/graph_transducer.py | 483 +++++ nemo/collections/asr/parts/k2/topologies.py | 8 +- nemo/collections/asr/parts/k2/w_transducer.py | 340 ++++ nemo/collections/asr/parts/mixins/mixins.py | 28 + .../asr/parts/numba/rnnt_loss/__init__.py | 6 +- .../asr/parts/numba/rnnt_loss/rnnt.py | 129 +- .../asr/parts/numba/rnnt_loss/rnnt_numpy.py | 5 + .../asr/parts/numba/rnnt_loss/rnnt_pytorch.py | 231 ++- .../rnnt_loss/utils/cpu_utils/cpu_rnnt.py | 8 +- .../rnnt_loss/utils/cuda_utils/gpu_rnnt.py | 315 ++- .../utils/cuda_utils/gpu_rnnt_kernel.py | 531 +++++ .../numba/rnnt_loss/utils/rnnt_helper.py | 3 +- .../asr/parts/preprocessing/features.py | 4 +- .../asr/parts/preprocessing/segment.py | 31 +- .../multi_head_attention_adapter_module.py | 16 +- .../asr/parts/submodules/causal_convs.py | 31 +- .../asr/parts/submodules/conformer_modules.py | 70 +- .../parts/submodules/multi_head_attention.py | 59 +- .../parts/submodules/rnnt_greedy_decoding.py | 561 ++++++ .../asr/parts/submodules/subsampling.py | 151 +- .../asr/parts/utils/transcribe_utils.py | 18 +- nemo/collections/asr/parts/utils/vad_utils.py | 443 ++++- nemo/collections/common/data/__init__.py | 2 +- nemo/collections/common/data/dataset.py | 375 +++- .../metrics/global_average_loss_metric.py | 9 +- nemo/collections/common/metrics/perplexity.py | 8 +- .../common/parts/preprocessing/collections.py | 3 + .../text_to_speech/tokenizer_utils.py | 7 +- .../text_to_speech/tts_tokenizers.py | 35 +- .../models/clip/megatron_clip_models.py | 5 + .../models/multimodal_base_model.py | 71 +- .../megatron/data_samplers.py | 3 +- .../language_modeling/megatron/gpt_dataset.py | 2 +- .../megatron/gpt_prompt_learning_dataset.py | 10 +- .../megatron/gpt_sft_chat_dataset.py | 150 +- .../megatron/gpt_sft_dataset.py | 10 +- .../megatron/indexed_dataset.py | 5 +- .../language_modeling/text_memmap_dataset.py | 208 +- .../__init__.py | 20 + .../bert_example.py | 593 ++++++ .../dataset.py | 521 +++++ .../spellchecking_asr_customization/utils.py | 929 +++++++++ .../text_normalization_as_tagging/utils.py | 196 ++ .../nlp/metrics/sequence_perplexity.py | 9 +- nemo/collections/nlp/models/__init__.py | 1 + .../models/language_modeling/bert_lm_model.py | 2 +- .../language_modeling/megatron/bert_model.py | 2 + .../language_modeling/megatron/gpt_model.py | 8 +- .../language_modeling/megatron_base_model.py | 108 +- .../megatron_base_prompt_learning_model.py | 2 +- .../language_modeling/megatron_bert_model.py | 152 +- .../megatron_finetune_model.py | 8 +- .../language_modeling/megatron_gpt_model.py | 260 ++- .../megatron_gpt_peft_models.py | 18 +- .../megatron_gpt_prompt_learning_model.py | 5 +- .../megatron_gpt_sft_model.py | 12 +- .../megatron_lm_encoder_decoder_model.py | 22 +- .../megatron_retrieval_model.py | 4 +- .../megatron_t5_adapter_model.py | 129 ++ nemo/collections/nlp/models/nlp_model.py | 12 +- .../__init__.py | 18 + .../spellchecking_model.py | 527 +++++ .../models/text2sparql/text2sparql_model.py | 2 +- .../nlp/modules/common/chatbot_component.py | 22 +- .../megatron/adapters/parallel_adapters.py | 88 +- .../nlp/modules/common/megatron/attention.py | 353 ++-- .../modules/common/megatron/fused_softmax.py | 2 +- .../modules/common/megatron/language_model.py | 106 +- .../modules/common/megatron/layer_norm_1p.py | 30 +- .../common/megatron/megatron_decoders.py | 6 +- .../common/megatron/megatron_encoders.py | 6 +- .../modules/common/megatron/megatron_init.py | 2 + .../megatron/megatron_transformer_decoder.py | 4 + .../megatron/megatron_transformer_encoder.py | 14 +- .../nlp/modules/common/megatron/module.py | 14 +- .../megatron/position_embedding/__init__.py | 31 + .../alibi_relative_position_embedding.py | 50 +- .../kerple_relative_position_embedding.py | 93 + .../rotary_position_embedding.py} | 3 +- .../sandwich_relative_position_embedding.py | 75 + .../t5_relative_position_embedding.py | 9 +- .../xpos_position_embedding.py | 78 + .../common/megatron/retrieval_transformer.py | 2 +- .../megatron/token_level_encoder_decoder.py | 27 +- .../modules/common/megatron/transformer.py | 102 +- .../nlp/modules/common/megatron/utils.py | 31 +- .../nlp/modules/common/megatron_web_server.py | 409 ++-- .../nlp/modules/common/prompt_encoder.py | 5 +- .../common/text_generation_strategy.py | 58 +- .../modules/common/text_generation_utils.py | 126 +- nemo/collections/nlp/parts/nlp_overrides.py | 41 +- .../tts/data/text_to_speech_dataset.py | 79 +- nemo/collections/tts/data/vocoder_dataset.py | 202 ++ nemo/collections/tts/g2p/models/i18n_ipa.py | 2 +- .../tts/g2p/models/zh_cn_pinyin.py | 122 +- nemo/collections/tts/losses/aligner_loss.py | 11 +- nemo/collections/tts/models/aligner.py | 20 +- nemo/collections/tts/models/base.py | 12 + nemo/collections/tts/models/fastpitch.py | 68 +- nemo/collections/tts/models/hifigan.py | 195 +- nemo/collections/tts/models/mixer_tts.py | 12 +- nemo/collections/tts/models/radtts.py | 12 +- .../tts/models/spectrogram_enhancer.py | 20 +- nemo/collections/tts/models/tacotron2.py | 12 +- nemo/collections/tts/models/vits.py | 18 +- nemo/collections/tts/modules/fastpitch.py | 27 +- nemo/collections/tts/modules/submodules.py | 16 +- .../tts/parts/preprocessing/audio_trimming.py | 18 +- nemo/collections/tts/parts/utils/callbacks.py | 428 ++++ nemo/collections/tts/parts/utils/helpers.py | 29 + .../megatron_vit_classification_models.py | 36 + .../vision/models/vision_base_model.py | 82 +- .../common/megatron/vision_transformer.py | 4 + .../vision/modules/vit/vit_backbone.py | 2 + nemo/core/classes/exportable.py | 16 +- nemo/core/classes/mixins/adapter_mixins.py | 22 +- nemo/core/optim/distributed_adam.py | 56 +- nemo/core/optim/lr_scheduler.py | 1 + nemo/core/optim/optimizer_with_main_params.py | 4 +- nemo/core/optim/optimizers.py | 1 - nemo/core/utils/k2_guard.py | 11 +- nemo/core/utils/k2_utils.py | 24 + nemo/core/utils/numba_utils.py | 37 + nemo/package_info.py | 2 +- nemo/utils/app_state.py | 17 + nemo/utils/decorators/experimental.py | 18 +- nemo/utils/export_utils.py | 3 +- nemo/utils/model_utils.py | 3 +- requirements/requirements.txt | 2 +- requirements/requirements_asr.txt | 1 - requirements/requirements_common.txt | 2 + requirements/requirements_lightning.txt | 1 - requirements/requirements_nlp.txt | 5 +- requirements/requirements_tts.txt | 1 - .../ngram_lm/create_lexicon_from_arpa.py | 155 +- .../ngram_lm/install_beamsearch_decoders.sh | 18 +- .../confidence_ensembles/build_ensemble.py | 6 +- .../test_confidence_ensembles.py | 2 +- .../prompt_learning_squad_preprocessing.py | 36 +- .../spoken_wikipedia/run.sh | 2 +- .../tts/compute_feature_stats.py | 6 +- .../tts/create_speaker_map.py | 4 +- .../preprocess_audio.py | 72 +- .../dataset_processing/tts/preprocess_text.py | 131 ++ scripts/export.py | 27 +- scripts/installers/Dockerfile.ngramtools | 4 +- scripts/installers/setup_os2s_decoders.py | 1 + .../convert_mpt_7b_hf_to_nemo.py | 212 ++ tests/collections/asr/conftest.py | 355 ++++ .../asr/decoding/test_rnnt_decoding.py | 2 +- .../asr/k2/test_graph_transducer.py | 263 +++ tests/collections/asr/k2/test_w_transducer.py | 260 +++ .../asr/numba/rnnt_loss/test_rnnt_pytorch.py | 197 +- .../rnnt_loss/utils/test_gpu_rnnt_kernel.py | 251 ++- .../asr/numba/rnnt_loss/utils/test_reduce.py | 18 +- .../numba/rnnt_loss/utils/test_rnnt_helper.py | 75 +- .../asr/test_asr_classification_model.py | 47 +- .../asr/test_asr_interctc_models.py | 11 +- .../asr/test_asr_rnnt_encdec_model.py | 44 + tests/collections/asr/test_asr_subsampling.py | 61 + tests/collections/asr/utils/test_vad_utils.py | 126 ++ tests/collections/common/pl_utils.py | 8 +- tests/collections/nlp/test_flash_attention.py | 353 ++++ tests/collections/nlp/test_huggingface.py | 19 +- tests/collections/nlp/test_mem_map_dataset.py | 117 ++ .../nlp/test_position_embedding.py | 211 ++ .../collections/nlp/test_retrieval_module.py | 2 +- .../nlp/test_retrieval_module_inference.py | 2 +- .../test_spellchecking_asr_customization.py | 1102 +++++++++++ .../preprocessing}/test_audio_trimming.py | 2 +- tests/collections/tts/test_tts_exportables.py | 3 +- .../ctc_segmentation/scripts/prepare_data.py | 2 +- .../customization_dataset_preparation.py | 7 +- .../test_customization_dataset_preparation.py | 26 +- tools/nemo_forced_aligner/README.md | 69 +- tools/nemo_forced_aligner/align.py | 186 +- tools/nemo_forced_aligner/requirements.txt | 3 +- .../test_add_t_start_end_to_boundary_info.py | 121 -- .../tests/test_add_t_start_end_to_utt_obj.py | 288 +++ .../tests/test_get_utt_obj.py | 344 ++++ .../test_get_y_and_boundary_info_for_utt.py | 158 -- .../tests/test_restore_token_case.py | 36 + tools/nemo_forced_aligner/utils/constants.py | 2 +- tools/nemo_forced_aligner/utils/data_prep.py | 752 +++++-- .../utils/make_ass_files.py | 428 ++++ .../utils/make_ctm_files.py | 114 ++ .../utils/make_output_files.py | 209 -- .../utils/make_output_manifest.py | 35 + .../utils/viterbi_decoding.py | 70 +- tools/speech_data_explorer/data_explorer.py | 532 ++++- tutorials/00_NeMo_Primer.ipynb | 6 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 4 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- tutorials/VoiceSwapSample.ipynb | 4 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 4 +- .../ASR_Example_CommonVoice_Finetuning.ipynb | 10 +- tutorials/asr/ASR_TTS_Tutorial.ipynb | 846 ++++++++ tutorials/asr/ASR_for_telephony_speech.ipynb | 4 +- tutorials/asr/ASR_with_NeMo.ipynb | 6 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 4 +- tutorials/asr/ASR_with_Transducers.ipynb | 4 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 4 +- tutorials/asr/Multilang_ASR.ipynb | 8 +- tutorials/asr/Offline_ASR.ipynb | 20 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 31 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 4 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 4 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 4 +- .../asr/Self_Supervised_Pre_Training.ipynb | 10 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 4 +- tutorials/asr/Voice_Activity_Detection.ipynb | 54 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 6 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 12 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 6 +- ...Language_Models_for_Downstream_Tasks.ipynb | 12 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 6 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 4 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 12 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 25 +- .../nlp/Multitask_Prompt_and_PTuning.ipynb | 4 +- .../nlp/Punctuation_and_Capitalization.ipynb | 6 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 6 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...pellMapper_English_ASR_Customization.ipynb | 1412 ++++++++++++++ ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- .../spellmapper_customization_vocabulary.png | Bin 0 -> 39243 bytes .../images/spellmapper_data_preparation.png | Bin 0 -> 75265 bytes .../images/spellmapper_inference_pipeline.png | Bin 0 -> 146148 bytes tutorials/nlp/lora.ipynb | 1720 +++++++++++++++++ .../ASR_with_SpeakerDiarization.ipynb | 6 +- .../Speaker_Diarization_Inference.ipynb | 12 +- .../Speaker_Diarization_Training.ipynb | 8 +- .../Speaker_Identification_Verification.ipynb | 10 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 8 +- tutorials/tools/Multispeaker_Simulator.ipynb | 4 +- .../tts/Aligner_Inference_Examples.ipynb | 4 +- .../Evaluation_MelCepstralDistortion.ipynb | 6 +- .../tts/FastPitch_Adapter_Finetuning.ipynb | 182 +- .../tts/FastPitch_ChineseTTS_Training.ipynb | 8 +- tutorials/tts/FastPitch_Finetuning.ipynb | 4 +- .../tts/FastPitch_GermanTTS_Training.ipynb | 8 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../FastPitch_MultiSpeaker_Pretraining.ipynb | 12 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 4 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 12 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- tutorials/tts/Vits_Training.ipynb | 2 +- 393 files changed, 27791 insertions(+), 3878 deletions(-) create mode 100644 .github/workflows/config/codeql.yml create mode 100644 docs/source/nlp/megatron_onnx_export.rst create mode 100644 docs/source/nlp/spellchecking_asr_customization.rst create mode 100644 docs/source/tools/images/scr_10.png create mode 100644 docs/source/tools/images/scr_11.png create mode 100644 examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml create mode 100644 examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml create mode 100644 examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml create mode 100644 examples/asr/conf/ssl/fastconformer/fast-conformer.yaml create mode 100644 examples/asr/conf/vad/frame_vad_infer_postprocess.yaml create mode 100644 examples/asr/speech_classification/frame_vad_infer.py create mode 100644 examples/asr/speech_classification/speech_to_frame_label.py create mode 100644 examples/nlp/language_modeling/conf/megatron_gpt_export.yaml create mode 100644 examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml create mode 100644 examples/nlp/language_modeling/megatron_export.py create mode 100644 examples/nlp/language_modeling/megatron_gpt_validate.py create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py create mode 100644 examples/nlp/spellchecking_asr_customization/README.md create mode 100644 examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py create mode 100644 examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml create mode 100644 examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh create mode 100644 examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py create mode 100644 examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py create mode 100644 examples/nlp/spellchecking_asr_customization/helpers.py create mode 100644 examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py create mode 100644 examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py create mode 100644 examples/nlp/spellchecking_asr_customization/run_infer.sh create mode 100644 examples/nlp/spellchecking_asr_customization/run_training.sh create mode 100644 examples/nlp/spellchecking_asr_customization/run_training_tarred.sh create mode 100644 examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py create mode 100644 examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py create mode 100644 examples/tts/conf/fastpitch/fastpitch.yaml create mode 100644 examples/tts/conf/hifigan/hifigan_data.yaml create mode 100644 examples/tts/conf/hifigan/sample/sample_22050.yaml create mode 100644 examples/tts/conf/hifigan/sample/sample_44100.yaml create mode 100644 examples/tts/conf/text/normalizer_en.yaml create mode 100644 nemo/collections/asr/parts/k2/graph_transducer.py create mode 100644 nemo/collections/asr/parts/k2/w_transducer.py create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/utils.py create mode 100644 nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py create mode 100644 nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py rename nemo/collections/nlp/modules/common/megatron/{ => position_embedding}/alibi_relative_position_embedding.py (73%) create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py rename nemo/collections/nlp/modules/common/megatron/{rotary_pos_embedding.py => position_embedding/rotary_position_embedding.py} (96%) create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py rename nemo/collections/nlp/modules/common/megatron/{ => position_embedding}/t5_relative_position_embedding.py (95%) create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py create mode 100644 nemo/collections/tts/data/vocoder_dataset.py create mode 100644 nemo/collections/tts/parts/utils/callbacks.py create mode 100644 nemo/core/utils/k2_utils.py mode change 100644 => 100755 scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh rename scripts/dataset_processing/tts/{audio_processing => }/preprocess_audio.py (76%) create mode 100644 scripts/dataset_processing/tts/preprocess_text.py create mode 100644 scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py create mode 100644 tests/collections/asr/conftest.py create mode 100644 tests/collections/asr/k2/test_graph_transducer.py create mode 100644 tests/collections/asr/k2/test_w_transducer.py create mode 100644 tests/collections/asr/test_asr_subsampling.py create mode 100644 tests/collections/asr/utils/test_vad_utils.py create mode 100644 tests/collections/nlp/test_flash_attention.py create mode 100644 tests/collections/nlp/test_mem_map_dataset.py create mode 100644 tests/collections/nlp/test_position_embedding.py create mode 100644 tests/collections/nlp/test_spellchecking_asr_customization.py rename tests/collections/tts/{data => parts/preprocessing}/test_audio_trimming.py (98%) delete mode 100644 tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py create mode 100644 tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py create mode 100644 tools/nemo_forced_aligner/tests/test_get_utt_obj.py delete mode 100644 tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py create mode 100644 tools/nemo_forced_aligner/tests/test_restore_token_case.py create mode 100644 tools/nemo_forced_aligner/utils/make_ass_files.py create mode 100644 tools/nemo_forced_aligner/utils/make_ctm_files.py delete mode 100644 tools/nemo_forced_aligner/utils/make_output_files.py create mode 100644 tools/nemo_forced_aligner/utils/make_output_manifest.py create mode 100644 tutorials/asr/ASR_TTS_Tutorial.ipynb create mode 100644 tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb create mode 100644 tutorials/nlp/images/spellmapper_customization_vocabulary.png create mode 100644 tutorials/nlp/images/spellmapper_data_preparation.png create mode 100644 tutorials/nlp/images/spellmapper_inference_pipeline.png create mode 100644 tutorials/nlp/lora.ipynb diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 673687412096..d82e99872853 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -51,6 +51,7 @@ jobs: # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs queries: security-and-quality # security-extended, + config-file: ./.github/workflows/config/codeql.yml # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). diff --git a/.github/workflows/config/codeql.yml b/.github/workflows/config/codeql.yml new file mode 100644 index 000000000000..5482696a2c9f --- /dev/null +++ b/.github/workflows/config/codeql.yml @@ -0,0 +1,9 @@ +name: "CodeQL config" + +paths: + - nemo/ + - tests/ + - tools/ + - scripts/ + - examples/ + - .github/ diff --git a/.github/workflows/import-test.yml b/.github/workflows/import-test.yml index 5fc34347710d..e9b10e1e34af 100644 --- a/.github/workflows/import-test.yml +++ b/.github/workflows/import-test.yml @@ -6,25 +6,24 @@ on: paths: - "**" +# Check https://hub.docker.com/r/pytorch/pytorch/tags for latest tags jobs: - ci-import-check: - runs-on: ubuntu-latest - # Check https://hub.docker.com/r/pytorch/pytorch/tags for latest tags + test-asr-imports: + runs-on: ubuntu-latest container: - image: pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime - + image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime steps: - - uses: actions/checkout@v2 - + - name: Checkout repo + uses: actions/checkout@v2 - name: Update base dependencies run: | apt-get update && apt-get install -y build-essential apt-get install -y libsndfile1 make - - name: Install nemo dependencies id: nemo-wheel run: | + pip install Cython # install test requirements pip install -r requirements/requirements_test.txt # Build nemo as a wheel @@ -33,7 +32,6 @@ jobs: # Preserve wheel location DIST_FILE=$(find ./dist -name "*.whl" | head -n 1) echo "::set-output name=DIST_FILE::${DIST_FILE}" - - name: Test ASR Domain Imports run: | # Install NeMo Domain @@ -43,6 +41,29 @@ jobs: # Uninstall NeMo pip uninstall -y nemo_toolkit + test-tts-imports: + runs-on: ubuntu-latest + container: + image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Update base dependencies + run: | + apt-get update && apt-get install -y build-essential + apt-get install -y libsndfile1 make + - name: Install nemo dependencies + id: nemo-wheel + run: | + pip install Cython + # install test requirements + pip install -r requirements/requirements_test.txt + # Build nemo as a wheel + pip install build + python -m build --no-isolation --wheel + # Preserve wheel location + DIST_FILE=$(find ./dist -name "*.whl" | head -n 1) + echo "::set-output name=DIST_FILE::${DIST_FILE}" - name: Test TTS Domain Imports run: | # Install NeMo Domain @@ -52,6 +73,29 @@ jobs: # Uninstall NeMo pip uninstall -y nemo_toolkit + test-nlp-imports: + runs-on: ubuntu-latest + container: + image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Update base dependencies + run: | + apt-get update && apt-get install -y build-essential + apt-get install -y libsndfile1 make + - name: Install nemo dependencies + id: nemo-wheel + run: | + pip install Cython + # install test requirements + pip install -r requirements/requirements_test.txt + # Build nemo as a wheel + pip install build + python -m build --no-isolation --wheel + # Preserve wheel location + DIST_FILE=$(find ./dist -name "*.whl" | head -n 1) + echo "::set-output name=DIST_FILE::${DIST_FILE}" - name: Test NLP Domain Imports run: | # Install NeMo Domain diff --git a/Dockerfile b/Dockerfile index 82d16a561886..2e6b617087bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.04-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.06-py3 # build an image that includes only the nemo dependencies, ensures that dependencies # are included first for optimal caching, and useful for building a development @@ -45,11 +45,11 @@ RUN apt-get update && \ WORKDIR /workspace/ WORKDIR /tmp/ -# TODO: Remove once this Apex commit (2/24/23) is included in PyTorch +# TODO: Remove once this Apex commit (5/12/23) is included in PyTorch # container RUN git clone https://github.com/NVIDIA/apex.git && \ cd apex && \ - git checkout 57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2 && \ + git checkout 8b7a1ff183741dd8f9b87e7bafd04cfde99cea28 && \ pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ # uninstall stuff from base container @@ -72,6 +72,11 @@ WORKDIR /tmp/nemo COPY requirements . RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-check --no-cache-dir -r $f; done +# install flash attention dependencies +RUN pip install flash-attn +# pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3 +RUN pip install triton==2.0.0.dev20221202 + # install k2, skip if installation fails COPY scripts /tmp/nemo/scripts/ RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/speech_recognition/k2/setup.sh); INSTALL_CODE=$?; \ @@ -89,7 +94,7 @@ COPY . . # start building the final container FROM nemo-deps as nemo -ARG NEMO_VERSION=1.19.0 +ARG NEMO_VERSION=1.20.0 # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container # version information as runtime environment variable for introspection purposes diff --git a/Jenkinsfile b/Jenkinsfile index 780e3e4b43c4..1e66f7f6bf8e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,7 +1,7 @@ pipeline { agent { docker { - image 'nvcr.io/nvidia/pytorch:23.04-py3' + image 'nvcr.io/nvidia/pytorch:23.06-py3' args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1' } } @@ -57,6 +57,24 @@ pipeline { } } + stage('Megatron Core installation') { + steps { + // commit points to core 23.05 ToT + sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ + cd Megatron-LM && \ + git checkout 060415572f4365a2e895f8036c4e37dad0efbdf5 && \ + pip install -e .' + } + } + + stage('Flash Attention installation') { + steps { + // pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3 + sh 'pip install flash-attn && \ + pip install triton==2.0.0.dev20221202' + } + } + stage('PyTorch Lightning version') { steps { sh 'python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"' @@ -85,8 +103,8 @@ pipeline { stage('L0: Unit Tests CPU') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } steps { @@ -97,8 +115,8 @@ pipeline { stage('L2: ASR dev run') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -183,8 +201,8 @@ pipeline { stage('L2: ASR dev run - part two') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -213,8 +231,8 @@ pipeline { stage('L2: Speech to Text EMA') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } steps { @@ -234,8 +252,8 @@ pipeline { stage('L2: Speaker dev run') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -357,8 +375,8 @@ pipeline { // stage('L2: ASR DALI dev run') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -425,8 +443,8 @@ pipeline { // stage('L2: ASR RNNT dev run') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -487,8 +505,8 @@ pipeline { // stage('L2: Hybrid ASR RNNT-CTC dev run') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -517,8 +535,8 @@ pipeline { stage('L2: ASR Multi-dataloader dev run') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -565,8 +583,8 @@ pipeline { stage('L2: ASR Adapters') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -607,101 +625,102 @@ pipeline { } } - stage('L2: Megatron T5 Adapter PP=2') { - when { - anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' - } - } - failFast true - parallel{ - stage('T5 Adapter tuning & inference TP=1 PP=2') { - steps { - sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ - --config-name=megatron_t5_adapter_tuning_config \ - name='test_tp1_pp2' \ - exp_manager.exp_dir='examples/adapter_tuning' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=1 \ - model.pipeline_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ - --config-name=megatron_t5_adapter_inference \ - adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - trainer.devices=2 \ - data.num_workers=1 \ - tensor_model_parallel_size=1 \ - pipeline_model_parallel_size=2 \ - data.global_batch_size=2 \ - data.micro_batch_size=2 \ - data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - pred_file_path='examples/adapter_tuning/test_tp1_pp2/preds.txt'" - sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo" - sh "rm -rf examples/adapter_tuning/test_tp1_pp2" - } - } - } - } - stage('L2: Megatron T5 Adapter TP=2') { - when { - anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' - } - } - failFast true - parallel{ - stage('T5 Adapter tuning & inference TP=2 PP=1') { - steps { - sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ - --config-name=megatron_t5_adapter_tuning_config \ - name='test_tp2_pp1' \ - exp_manager.exp_dir='examples/adapter_tuning' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ - --config-name=megatron_t5_adapter_inference \ - adapter_model_file='examples/adapter_tuning/test_tp2_pp1.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - trainer.devices=2 \ - tensor_model_parallel_size=2 \ - data.global_batch_size=2 \ - data.micro_batch_size=2 \ - data.num_workers=1 \ - data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - pred_file_path='examples/adapter_tuning/test_tp2_pp1/preds.txt'" - sh "rm -rf examples/adapter_tuning/test_tp2_pp1.nemo" - sh "rm -rf examples/adapter_tuning/test_tp2_pp1" - } - } - } - } + // commented out temporarily to save time on github ci + //stage('L2: Megatron T5 Adapter PP=2') { + // when { + // anyOf { + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' + // } + // } + // failFast true + // parallel{ + // stage('T5 Adapter tuning & inference TP=1 PP=2') { + // steps { + // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ + // --config-name=megatron_t5_adapter_tuning_config \ + // name='test_tp1_pp2' \ + // exp_manager.exp_dir='examples/adapter_tuning' \ + // trainer.devices=2 \ + // trainer.max_steps=1 \ + // trainer.val_check_interval=1 \ + // trainer.max_epochs=null \ + // model.data.num_workers=1 \ + // model.tensor_model_parallel_size=1 \ + // model.pipeline_model_parallel_size=2 \ + // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ + // model.existing_tasks=[] \ + // model.new_tasks=['rte'] \ + // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.global_batch_size=4" + // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ + // --config-name=megatron_t5_adapter_inference \ + // adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \ + // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ + // trainer.devices=2 \ + // data.num_workers=1 \ + // tensor_model_parallel_size=1 \ + // pipeline_model_parallel_size=2 \ + // data.global_batch_size=2 \ + // data.micro_batch_size=2 \ + // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // pred_file_path='examples/adapter_tuning/test_tp1_pp2/preds.txt'" + // sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo" + // sh "rm -rf examples/adapter_tuning/test_tp1_pp2" + // } + // } + // } + //} + //stage('L2: Megatron T5 Adapter TP=2') { + // when { + // anyOf { + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' + // } + // } + // failFast true + // parallel{ + // stage('T5 Adapter tuning & inference TP=2 PP=1') { + // steps { + // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ + // --config-name=megatron_t5_adapter_tuning_config \ + // name='test_tp2_pp1' \ + // exp_manager.exp_dir='examples/adapter_tuning' \ + // trainer.devices=2 \ + // trainer.max_steps=1 \ + // trainer.val_check_interval=1 \ + // trainer.max_epochs=null \ + // model.data.num_workers=1 \ + // model.tensor_model_parallel_size=2 \ + // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ + // model.existing_tasks=[] \ + // model.new_tasks=['rte'] \ + // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.global_batch_size=4" + // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ + // --config-name=megatron_t5_adapter_inference \ + // adapter_model_file='examples/adapter_tuning/test_tp2_pp1.nemo' \ + // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ + // trainer.devices=2 \ + // tensor_model_parallel_size=2 \ + // data.global_batch_size=2 \ + // data.micro_batch_size=2 \ + // data.num_workers=1 \ + // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // pred_file_path='examples/adapter_tuning/test_tp2_pp1/preds.txt'" + // sh "rm -rf examples/adapter_tuning/test_tp2_pp1.nemo" + // sh "rm -rf examples/adapter_tuning/test_tp2_pp1" + // } + // } + // } + //} stage('L2: Megatron T5 IA3 PP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -746,8 +765,8 @@ pipeline { stage('L2: Megatron T5 IA3 TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -790,8 +809,8 @@ pipeline { stage('L2: Megatron GPT Adapter TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -830,55 +849,56 @@ pipeline { } } } - stage('L2: Megatron GPT Adapter PP=2') { - when { - anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' - } - } - failFast true - parallel{ - stage('GPT Adapter tuning & inference TP=1 PP=2') { - steps { - sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py \ - --config-name=megatron_gpt_adapter_tuning_config \ - name='test_tp1_pp2' \ - exp_manager.exp_dir='examples/adapter_tuning' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=1 \ - model.pipeline_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py \ - --config-name=megatron_gpt_adapter_inference \ - adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \ - gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ - inference.greedy=True \ - inference.add_BOS=False \ - trainer.devices=2 \ - num_workers=1 \ - tensor_model_parallel_size=2 \ - data_paths=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl']" - sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo" - sh "rm -rf examples/adapter_tuning/test_tp1_pp2" - } - } - } - } + // commented out to save time on github ci @adithyare + //stage('L2: Megatron GPT Adapter PP=2') { + // when { + // anyOf { + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' + // } + // } + // failFast true + // parallel{ + // stage('GPT Adapter tuning & inference TP=1 PP=2') { + // steps { + // sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py \ + // --config-name=megatron_gpt_adapter_tuning_config \ + // name='test_tp1_pp2' \ + // exp_manager.exp_dir='examples/adapter_tuning' \ + // trainer.devices=2 \ + // trainer.max_steps=1 \ + // trainer.val_check_interval=1 \ + // trainer.max_epochs=null \ + // model.data.num_workers=1 \ + // model.tensor_model_parallel_size=1 \ + // model.pipeline_model_parallel_size=2 \ + // model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ + // model.existing_tasks=[] \ + // model.new_tasks=['rte'] \ + // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.global_batch_size=4" + // sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py \ + // --config-name=megatron_gpt_adapter_inference \ + // adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \ + // gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ + // inference.greedy=True \ + // inference.add_BOS=False \ + // trainer.devices=2 \ + // num_workers=1 \ + // tensor_model_parallel_size=2 \ + // data_paths=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl']" + // sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo" + // sh "rm -rf examples/adapter_tuning/test_tp1_pp2" + // } + // } + // } + //} stage('L2: Speech Transcription') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -898,8 +918,8 @@ pipeline { stage('L2: Transducer alignment') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -915,8 +935,8 @@ pipeline { stage('L2: Segmentation Tool') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } stages { @@ -971,8 +991,8 @@ pipeline { stage('L2: G2P Models') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1053,8 +1073,8 @@ pipeline { // stage('L2: Multi-GPU Megatron finetuning') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1080,8 +1100,8 @@ pipeline { stage('L2: STS-b') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1140,8 +1160,8 @@ pipeline { stage('L2: Dialogue Classification') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1311,8 +1331,8 @@ pipeline { stage('L2: Dialogue Generation') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1377,8 +1397,8 @@ pipeline { // stage('L2: Dialogue Generation Part 2') { // when { // anyOf { -// branch 'r1.19.0' -// changeRequest target: 'r1.19.0' +// branch 'r1.20.0' +// changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1407,8 +1427,8 @@ pipeline { stage('L2: COPY') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1437,8 +1457,8 @@ pipeline { stage('L2: Duplex Text Normalization') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1470,13 +1490,13 @@ pipeline { } } } - // Runs out of memory on the 12G TITAN V (GPU 0 on r1.19.0 CI) + // Runs out of memory on the 12G TITAN V (GPU 0 on main CI) // TODO: add when megatron bert is supported again in NeMo // stage('L2: MegaBERT Token Classification') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1501,8 +1521,8 @@ pipeline { stage('L2: BERT Text Classification') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1530,8 +1550,8 @@ pipeline { stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1589,8 +1609,8 @@ pipeline { stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1650,8 +1670,8 @@ pipeline { stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1711,8 +1731,8 @@ pipeline { stage('L2: Intent and Slot Classification Tasks') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1751,8 +1771,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Text Classification') { // when { // anyOf{ - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1780,8 +1800,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Autoresume') { // when { // anyOf{ - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1811,8 +1831,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') { // when { // anyOf{ - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1832,8 +1852,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') { // when { // anyOf{ - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -1855,8 +1875,8 @@ pipeline { stage('L2: Parallel NLP Examples 2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -1980,8 +2000,8 @@ pipeline { stage('Punctuation & Capitalization tarred dataset') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2039,8 +2059,8 @@ pipeline { stage('Punctuation & Capitalization, Different ways of passing labels to model') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2147,8 +2167,8 @@ pipeline { stage('Punctuation & Capitalization inference') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2173,8 +2193,8 @@ pipeline { stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2235,8 +2255,8 @@ pipeline { stage('L2: Entity Linking') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2263,8 +2283,8 @@ pipeline { stage('L2: NMT Attention is All You Need Training') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2386,8 +2406,8 @@ pipeline { stage('L2: NMT Attention is All You Need Inference') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2422,8 +2442,8 @@ pipeline { stage('L2: NMT Attention is All You Need Finetuning') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2457,8 +2477,8 @@ pipeline { stage('L2: NMT Tarred Dataset Creation') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2511,8 +2531,8 @@ pipeline { stage('L2: Megatron NMT Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2610,8 +2630,8 @@ pipeline { // stage('L2: NMT Bottleneck Fallback') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -2657,8 +2677,8 @@ pipeline { // stage('L2: NMT Bottleneck Architecture') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -2740,8 +2760,8 @@ pipeline { // stage('L2: NMT Bottleneck LVM') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -2823,8 +2843,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training with Pipeline Paralleism') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2893,8 +2913,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -2964,8 +2984,8 @@ pipeline { stage('L2: Megatron RETRO Pretraining and Resume Training') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3036,8 +3056,8 @@ pipeline { stage('L2: Megatron RETRO muTransfer Pretraining Performance') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3119,8 +3139,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: BioMegatron Bert NER Task') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3137,8 +3157,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3163,8 +3183,6 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.max_position_embeddings=128 \ model.encoder_seq_length=128 \ model.data.seq_length=128 \ - model.position_embedding_type=rope \ - model.rotary_percentage=0.5 \ model.normalization=rmsnorm \ model.bias=False \ model.bias_activation_fusion=False \ @@ -3200,6 +3218,140 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.max_position_embeddings=128 \ model.encoder_seq_length=128 \ model.data.seq_length=128 \ + model.normalization=rmsnorm \ + model.bias=False \ + model.bias_activation_fusion=False \ + model.bias_dropout_add_fusion=False \ + model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + model.num_layers=8 \ + model.hidden_size=256 \ + model.num_attention_heads=8 \ + model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ + model.activations_checkpoint_num_layers=1 \ + model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" + sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" + } + } + stage('L2: Megatron GPT with Rope Pretraining and Resume Training TP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=2 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=3 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + model.tensor_model_parallel_size=2 \ + model.optim.name=fused_adam \ + model.optim.lr=2e-4 \ + model.optim.sched.warmup_steps=1 \ + model.optim.sched.constant_steps=1 \ + model.optim.sched.min_lr=8e-5 \ + model.max_position_embeddings=128 \ + model.encoder_seq_length=128 \ + model.data.seq_length=128 \ + model.position_embedding_type=rope \ + model.rotary_percentage=0.5 \ + model.normalization=rmsnorm \ + model.bias=False \ + model.bias_activation_fusion=False \ + model.bias_dropout_add_fusion=False \ + model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + model.num_layers=8 \ + model.hidden_size=256 \ + model.num_attention_heads=8 \ + model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ + model.activations_checkpoint_num_layers=1 \ + model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + // commented out to save time on github ci @adithyare + //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + //trainer.devices=2 \ + //trainer.accelerator=gpu \ + //trainer.log_every_n_steps=1 \ + //trainer.val_check_interval=2 \ + //trainer.limit_val_batches=1 \ + //trainer.accumulate_grad_batches=1 \ + //trainer.max_steps=6 \ + //trainer.precision=16 \ + //trainer.gradient_clip_val=1.0 \ + //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + //exp_manager.resume_if_exists=True \ + //model.tensor_model_parallel_size=2 \ + //model.optim.name=fused_adam \ + //model.optim.lr=2e-4 \ + //model.optim.sched.warmup_steps=2 \ + //model.optim.sched.constant_steps=2 \ + //model.optim.sched.min_lr=8e-5 \ + //model.max_position_embeddings=128 \ + //model.encoder_seq_length=128 \ + //model.data.seq_length=128 \ + //model.position_embedding_type=rope \ + //model.rotary_percentage=0.5 \ + //model.normalization=rmsnorm \ + //model.bias=False \ + //model.bias_activation_fusion=False \ + //model.bias_dropout_add_fusion=False \ + //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + //model.num_layers=8 \ + //model.hidden_size=256 \ + //model.num_attention_heads=8 \ + //model.activations_checkpoint_method='block' \ + //model.activations_checkpoint_granularity='full' \ + //model.activations_checkpoint_num_layers=1 \ + //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" + sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" + } + } + stage('L2: Megatron GPT with Rope Pretraining using Flash Attention and Resume Training TP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=2 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=3 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + model.tensor_model_parallel_size=2 \ + model.optim.name=fused_adam \ + model.optim.lr=2e-4 \ + model.optim.sched.warmup_steps=1 \ + model.optim.sched.constant_steps=1 \ + model.optim.sched.min_lr=8e-5 \ + model.max_position_embeddings=128 \ + model.encoder_seq_length=128 \ + model.data.seq_length=128 \ model.position_embedding_type=rope \ model.rotary_percentage=0.5 \ model.normalization=rmsnorm \ @@ -3215,7 +3367,217 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.activations_checkpoint_granularity='full' \ model.activations_checkpoint_num_layers=1 \ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \ + model.use_flash_attention=True" + // commented out to save time on github ci @adithyare + //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + //trainer.devices=2 \ + //trainer.accelerator=gpu \ + //trainer.log_every_n_steps=1 \ + //trainer.val_check_interval=2 \ + //trainer.limit_val_batches=1 \ + //trainer.accumulate_grad_batches=1 \ + //trainer.max_steps=6 \ + //trainer.precision=16 \ + //trainer.gradient_clip_val=1.0 \ + //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + //exp_manager.resume_if_exists=True \ + //model.tensor_model_parallel_size=2 \ + //model.optim.name=fused_adam \ + //model.optim.lr=2e-4 \ + //model.optim.sched.warmup_steps=2 \ + //model.optim.sched.constant_steps=2 \ + //model.optim.sched.min_lr=8e-5 \ + //model.max_position_embeddings=128 \ + //model.encoder_seq_length=128 \ + //model.data.seq_length=128 \ + //model.position_embedding_type=rope \ + //model.rotary_percentage=0.5 \ + //model.normalization=rmsnorm \ + //model.bias=False \ + //model.bias_activation_fusion=False \ + //model.bias_dropout_add_fusion=False \ + //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + //model.num_layers=8 \ + //model.hidden_size=256 \ + //model.num_attention_heads=8 \ + //model.activations_checkpoint_method='block' \ + //model.activations_checkpoint_granularity='full' \ + //model.activations_checkpoint_num_layers=1 \ + //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \ + //model.use_flash_attention=True" + sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" + sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" + } + } + stage('L2: Megatron GPT with ALiBi Pretraining and Resume Training TP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=2 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=3 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + model.tensor_model_parallel_size=2 \ + model.optim.name=fused_adam \ + model.optim.lr=2e-4 \ + model.optim.sched.warmup_steps=1 \ + model.optim.sched.constant_steps=1 \ + model.optim.sched.min_lr=8e-5 \ + model.max_position_embeddings=128 \ + model.encoder_seq_length=128 \ + model.data.seq_length=128 \ + model.position_embedding_type=alibi \ + model.normalization=rmsnorm \ + model.bias=False \ + model.bias_activation_fusion=False \ + model.bias_dropout_add_fusion=False \ + model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + model.num_layers=8 \ + model.hidden_size=256 \ + model.num_attention_heads=8 \ + model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ + model.activations_checkpoint_num_layers=1 \ + model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + // not testing resume functionality to save time on ci @adithyare + //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + //trainer.devices=2 \ + //trainer.accelerator=gpu \ + //trainer.log_every_n_steps=1 \ + //trainer.val_check_interval=2 \ + //trainer.limit_val_batches=1 \ + //trainer.accumulate_grad_batches=1 \ + //trainer.max_steps=6 \ + //trainer.precision=16 \ + //trainer.gradient_clip_val=1.0 \ + //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + //exp_manager.resume_if_exists=True \ + //model.tensor_model_parallel_size=2 \ + //model.optim.name=fused_adam \ + //model.optim.lr=2e-4 \ + //model.optim.sched.warmup_steps=2 \ + //model.optim.sched.constant_steps=2 \ + //model.optim.sched.min_lr=8e-5 \ + //model.max_position_embeddings=128 \ + //model.encoder_seq_length=128 \ + //model.data.seq_length=128 \ + //model.position_embedding_type=alibi \ + //model.normalization=rmsnorm \ + //model.bias=False \ + //model.bias_activation_fusion=False \ + //model.bias_dropout_add_fusion=False \ + //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + //model.num_layers=8 \ + //model.hidden_size=256 \ + //model.num_attention_heads=8 \ + //model.activations_checkpoint_method='block' \ + //model.activations_checkpoint_granularity='full' \ + //model.activations_checkpoint_num_layers=1 \ + //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" + sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" + } + } + stage('L2: Megatron GPT with KERPLE Pretraining and Resume Training TP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + trainer.devices=2 \ + trainer.accelerator=gpu \ + trainer.log_every_n_steps=1 \ + trainer.val_check_interval=2 \ + trainer.limit_val_batches=2 \ + trainer.accumulate_grad_batches=1 \ + trainer.max_steps=3 \ + trainer.precision=16 \ + trainer.gradient_clip_val=1.0 \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + model.tensor_model_parallel_size=2 \ + model.optim.name=fused_adam \ + model.optim.lr=2e-4 \ + model.optim.sched.warmup_steps=1 \ + model.optim.sched.constant_steps=1 \ + model.optim.sched.min_lr=8e-5 \ + model.max_position_embeddings=128 \ + model.encoder_seq_length=128 \ + model.data.seq_length=128 \ + model.position_embedding_type=kerple \ + model.normalization=rmsnorm \ + model.bias=False \ + model.bias_activation_fusion=False \ + model.bias_dropout_add_fusion=False \ + model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + model.num_layers=8 \ + model.hidden_size=256 \ + model.num_attention_heads=8 \ + model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ + model.activations_checkpoint_num_layers=1 \ + model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" + // commented out to save time on github ci @adithyare + //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ + //trainer.devices=2 \ + //trainer.accelerator=gpu \ + //trainer.log_every_n_steps=1 \ + //trainer.val_check_interval=2 \ + //trainer.limit_val_batches=1 \ + //trainer.accumulate_grad_batches=1 \ + //trainer.max_steps=6 \ + //trainer.precision=16 \ + //trainer.gradient_clip_val=1.0 \ + //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ + //exp_manager.resume_if_exists=True \ + //model.tensor_model_parallel_size=2 \ + //model.optim.name=fused_adam \ + //model.optim.lr=2e-4 \ + //model.optim.sched.warmup_steps=2 \ + //model.optim.sched.constant_steps=2 \ + //model.optim.sched.min_lr=8e-5 \ + //model.max_position_embeddings=128 \ + //model.encoder_seq_length=128 \ + //model.data.seq_length=128 \ + //model.position_embedding_type=kerple \ + //model.normalization=rmsnorm \ + //model.bias=False \ + //model.bias_activation_fusion=False \ + //model.bias_dropout_add_fusion=False \ + //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \ + //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \ + //model.num_layers=8 \ + //model.hidden_size=256 \ + //model.num_attention_heads=8 \ + //model.activations_checkpoint_method='block' \ + //model.activations_checkpoint_granularity='full' \ + //model.activations_checkpoint_num_layers=1 \ + //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ + //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" } @@ -3223,8 +3585,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3307,8 +3669,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Finetuning PP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3372,11 +3734,95 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' sh "rm -rf examples/nlp/language_modeling/gpt_sft_results" } } + stage('L2: Megatron GPT PEFT Lora PP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results_pp2" + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \ + trainer.devices=2 \ + trainer.log_every_n_steps=1 \ + trainer.max_epochs=9999 \ + trainer.max_steps=3 \ + trainer.val_check_interval=3 \ + ++trainer.limit_val_batches=2 \ + trainer.precision=16 \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_peft_lora_results_pp2 \ + model.pipeline_model_parallel_size=2 \ + model.tensor_model_parallel_size=1 \ + model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \ + model.peft.peft_scheme='lora' \ + model.answer_only_loss=True \ + model.micro_batch_size=1 \ + model.global_batch_size=4 \ + model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.train_ds.concat_sampling_probabilities=[1.0] \ + model.data.train_ds.num_workers=0 \ + model.data.validation_ds.num_workers=0 \ + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.names=[quarel]" + sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results_pp2" + } + } + stage('L2: Megatron GPT PEFT Lora TP=2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/nlp/lora_tuning_tp2" + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \ + trainer.devices=2 \ + trainer.log_every_n_steps=1 \ + trainer.max_epochs=9999 \ + trainer.max_steps=3 \ + trainer.val_check_interval=3 \ + ++trainer.limit_val_batches=2 \ + trainer.precision=16 \ + exp_manager.exp_dir=/home/TestData/nlp/lora_tuning_tp2 \ + model.pipeline_model_parallel_size=1 \ + model.tensor_model_parallel_size=2 \ + model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \ + model.peft.peft_scheme='lora' \ + model.answer_only_loss=True \ + model.micro_batch_size=1 \ + model.global_batch_size=4 \ + model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.train_ds.concat_sampling_probabilities=[1.0] \ + model.data.train_ds.num_workers=0 \ + model.data.validation_ds.num_workers=0 \ + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.names=[quarel]" + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \ + model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \ + model.peft.restore_from_path=/home/TestData/nlp/lora_tuning_tp2/megatron_gpt_peft_tuning/checkpoints/megatron_gpt_peft_tuning.nemo \ + model.peft.restore_from_ckpt_name=null \ + model.peft.restore_from_hparams_path=null \ + trainer.devices=2 \ + model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \ + model.data.test_ds.names=['quarel4'] \ + model.data.test_ds.global_batch_size=1 \ + model.data.test_ds.micro_batch_size=1 \ + model.data.test_ds.tokens_to_generate=10 \ + inference.greedy=True \ + inference.repetition_penalty=1.0 \ + inference.outfile_path='/home/TestData/nlp/lora_tuning_tp2/out.jsonl'" + sh "rm -rf /home/TestData/nlp/lora_tuning_tp2" + } + } stage('L2: Megatron GPT Eval') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3392,8 +3838,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval PP2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3407,47 +3853,71 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.num_nodes=1" } } - - stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { + stage('L2: Megatron GPT SFT Eval (inference seq len > training seq len)') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true - parallel{ - stage('GPT Prompt Learning TP=1 PP=1') { - steps { - sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \ - --config-name=megatron_gpt_prompt_learning_config \ - name='/home/TestData/nlp/prompt_learning/prompt_tuning_test' \ - trainer.devices=1 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=1 \ - model.virtual_prompt_style='p-tuning' \ - model.p_tuning.encoder_type='embedding' \ - model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp1.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test" - sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test.nemo" - } - } + steps{ + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \ + model.restore_from_path=/home/TestData/nlp/megatron_gpt_sft/megatron_gpt_rope_sft.nemo \ + model.peft.restore_from_path=null \ + model.data.test_ds.file_names=['/home/TestData/nlp/megatron_gpt_sft/sample.jsonl'] \ + model.data.test_ds.names=['test'] \ + model.data.test_ds.global_batch_size=1 \ + model.data.test_ds.micro_batch_size=1 \ + model.data.test_ds.tokens_to_generate=30 \ + model.data.test_ds.max_seq_length=6000 \ + inference.greedy=True \ + inference.repetition_penalty=1.0 \ + inference.outfile_path='examples/nlp/language_modeling/out.jsonl' && \ + rm -rf examples/nlp/language_modeling/out.jsonl" } } + // commented out to save time we are testing tp>1 and pp>1 anyway. @adithyare + //stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { + // when { + // anyOf { + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' + // } + // } + // failFast true + // parallel{ + // stage('GPT Prompt Learning TP=1 PP=1') { + // steps { + // sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \ + // --config-name=megatron_gpt_prompt_learning_config \ + // name='/home/TestData/nlp/prompt_learning/prompt_tuning_test' \ + // trainer.devices=1 \ + // trainer.max_steps=1 \ + // trainer.val_check_interval=1 \ + // trainer.max_epochs=null \ + // model.data.num_workers=1 \ + // model.tensor_model_parallel_size=1 \ + // model.virtual_prompt_style='p-tuning' \ + // model.p_tuning.encoder_type='embedding' \ + // model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp1.nemo' \ + // model.existing_tasks=[] \ + // model.new_tasks=['rte'] \ + // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // model.global_batch_size=4" + // sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test" + // sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test.nemo" + // } + // } + // } + //} stage('L2: Megatron GPT Prompt Tuning TP2 PP1') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3486,57 +3956,56 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } - // TODO: add when https://github.com/NVIDIA/apex/pull/1596 is merged - // stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { - // when { - // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' - // } - // } - // failFast true - // parallel{ - // stage('GPT Prompt Learning TP=1 PP=2') { - // steps { - // sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \ - // --config-name=megatron_gpt_prompt_learning_config \ - // name='/home/TestData/nlp/prompt_learning/p_tuning_test_pp' \ - // trainer.devices=2 \ - // trainer.max_steps=1 \ - // trainer.val_check_interval=1 \ - // trainer.max_epochs=null \ - // model.optim.name=fused_adam \ - // model.data.num_workers=1 \ - // model.pipeline_model_parallel_size=2 \ - // model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ - // model.existing_tasks=[] \ - // model.new_tasks=['boolq'] \ - // model.data.train_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \ - // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \ - // model.global_batch_size=4" - // sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp" - // sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py \ - // virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo' \ - // gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ - // inference.greedy=True \ - // inference.add_BOS=False \ - // trainer.devices=2 \ - // pipeline_model_parallel_size=2 \ - // pred_file_path=/home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt \ - // data_paths=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl']" - // sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo" - // sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt" - // } - // } - // } - // } + stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { + when { + anyOf { + branch 'r1.20.0' + changeRequest target: 'r1.20.0' + } + } + failFast true + parallel{ + stage('GPT Prompt Learning TP=1 PP=2') { + steps { + sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \ + --config-name=megatron_gpt_prompt_learning_config \ + name='/home/TestData/nlp/prompt_learning/p_tuning_test_pp' \ + trainer.devices=2 \ + trainer.max_steps=1 \ + trainer.val_check_interval=1 \ + trainer.max_epochs=null \ + model.optim.name=fused_adam \ + model.data.num_workers=1 \ + model.pipeline_model_parallel_size=2 \ + model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ + model.existing_tasks=[] \ + model.new_tasks=['boolq'] \ + model.data.train_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \ + model.data.validation_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \ + model.global_batch_size=4" + sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp" + sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py \ + virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo' \ + gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \ + inference.greedy=True \ + inference.add_BOS=False \ + trainer.devices=2 \ + pipeline_model_parallel_size=2 \ + pred_file_path=/home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt \ + data_paths=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl']" + sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo" + sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt" + } + } + } + } // TODO: Add this test back. Test was failing on CI machines due to HW error // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -3562,8 +4031,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Change Partitions') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3609,8 +4078,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3705,8 +4174,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3801,8 +4270,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3897,8 +4366,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -3967,8 +4436,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 w/ Mixture of Expert Pretraining') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4009,52 +4478,53 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } - stage('L2: Megatron T5 Prompt Learning TP1 PP1') { - when { - anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' - } - } - failFast true - parallel{ - stage('T5 Prompt Learning TP=1 PP=1') { - steps { - sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \ - --config-name=megatron_t5_prompt_learning \ - name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test' \ - trainer.devices=1 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['squad'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - model.global_batch_size=4 \ - model.micro_batch_size=4" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test" - sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \ - virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ - data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt' \ - data.global_batch_size=4 \ - data.micro_batch_size=4" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt" - } - } - } - } + // commented out to save time in github ci, we have tp>1 and pp>1 tests anyway @adithyare + //stage('L2: Megatron T5 Prompt Learning TP1 PP1') { + // when { + // anyOf { + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' + // } + // } + // failFast true + // parallel{ + // stage('T5 Prompt Learning TP=1 PP=1') { + // steps { + // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \ + // --config-name=megatron_t5_prompt_learning \ + // name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test' \ + // trainer.devices=1 \ + // trainer.max_steps=1 \ + // trainer.val_check_interval=1 \ + // trainer.max_epochs=null \ + // model.data.num_workers=1 \ + // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ + // model.existing_tasks=[] \ + // model.new_tasks=['squad'] \ + // model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ + // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ + // model.global_batch_size=4 \ + // model.micro_batch_size=4" + // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test" + // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \ + // virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo' \ + // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ + // data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ + // pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt' \ + // data.global_batch_size=4 \ + // data.micro_batch_size=4" + // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo" + // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt" + // } + // } + // } + //} stage('L2: Megatron T5 Prompt Learning TP2 PP1') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4098,8 +4568,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { // anyOf { - // branch 'r1.19.0' - // changeRequest target: 'r1.19.0' + // branch 'r1.20.0' + // changeRequest target: 'r1.20.0' // } // } // failFast true @@ -4142,8 +4612,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4222,8 +4692,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Eval') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4239,8 +4709,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, TP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4308,8 +4778,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, PP=2') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4381,8 +4851,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 GLUE/XNLI Finetuning') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4454,8 +4924,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Mock Data Generation') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true @@ -4491,8 +4961,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: TTS Fast dev runs 1') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } parallel { @@ -4637,8 +5107,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L??: Speech Checkpoints tests') { when { anyOf { - branch 'r1.19.0' - changeRequest target: 'r1.19.0' + branch 'r1.20.0' + changeRequest target: 'r1.20.0' } } failFast true diff --git a/README.rst b/README.rst index 841509dfec5f..6fbe9047d0c4 100644 --- a/README.rst +++ b/README.rst @@ -5,9 +5,9 @@ :target: http://www.repostatus.org/#active :alt: Project Status: Active – The project has reached a stable, usable state and is being actively developed. -.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 +.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main :alt: Documentation - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ .. |license| image:: https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg :target: https://github.com/NVIDIA/NeMo/blob/master/LICENSE @@ -25,7 +25,7 @@ :target: https://pepy.tech/project/nemo-toolkit :alt: PyPi total downloads -.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=r1.19.0&event=push +.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=main&event=push :target: https://github.com/nvidia/nemo/actions/workflows/codeql.yml :alt: CodeQL @@ -33,7 +33,7 @@ :target: https://github.com/psf/black :alt: Code style: black -.. _r1.19.0-readme: +.. _main-readme: **NVIDIA NeMo** =============== @@ -61,7 +61,7 @@ We have extensive `tutorials `_. For advanced users that want to train NeMo models from scratch or finetune existing NeMo models -we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. +we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. For scaling NeMo LLM training on Slurm clusters or public clouds, please see the `NVIDIA NeMo Megatron Launcher `_. The NM launcher has extensive recipes, scripts, utilities, and documentation for training NeMo LLMs and also has an `Autoconfigurator `_ @@ -74,7 +74,7 @@ Key Features * Speech processing * `HuggingFace Space for Audio Transcription (File, Microphone and YouTube) `_ - * `Automatic Speech Recognition (ASR) `_ + * `Automatic Speech Recognition (ASR) `_ * Supported ASR models: ``_ * Jasper, QuartzNet, CitriNet, ContextNet * Conformer-CTC, Conformer-Transducer, FastConformer-CTC, FastConformer-Transducer @@ -84,46 +84,46 @@ Key Features * CTC * Transducer/RNNT * Hybrid Transducer/CTC - * NeMo Original `Multi-blank Transducers `_ + * NeMo Original `Multi-blank Transducers `_ and `Token-and-Duration Transducers (TDT) `_ * Streaming/Buffered ASR (CTC/Transducer) - `Chunked Inference Examples `_ - * Cache-aware Streaming Conformer - ``_ + * Cache-aware Streaming Conformer with multiple lookaheads - ``_ * Beam Search decoding - * `Language Modelling for ASR `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer - * `Support of long audios for Conformer with memory efficient local attention `_ - * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) + * `Language Modelling for ASR (CTC and RNNT) `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer + * `Support of long audios for Conformer with memory efficient local attention `_ + * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) * `Voice activity Detection (VAD) `_: MarbleNet * ASR with VAD Inference - `Example `_ - * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet - * `Speaker Diarization `_ + * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet + * `Speaker Diarization `_ * Clustering Diarizer: TitaNet, ECAPA_TDNN, SpeakerNet * Neural Diarizer: MSDD (Multi-scale Diarization Decoder) - * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer + * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer * `Pretrained models on different languages. `_: English, Spanish, German, Russian, Chinese, French, Italian, Polish, ... * `NGC collection of pre-trained speech processing models. `_ * Natural Language Processing * `NeMo Megatron pre-training of Large Language Models `_ - * `Neural Machine Translation (NMT) `_ - * `Punctuation and Capitalization `_ - * `Token classification (named entity recognition) `_ - * `Text classification `_ - * `Joint Intent and Slot Classification `_ - * `Question answering `_ - * `GLUE benchmark `_ - * `Information retrieval `_ - * `Entity Linking `_ - * `Dialogue State Tracking `_ - * `Prompt Learning `_ + * `Neural Machine Translation (NMT) `_ + * `Punctuation and Capitalization `_ + * `Token classification (named entity recognition) `_ + * `Text classification `_ + * `Joint Intent and Slot Classification `_ + * `Question answering `_ + * `GLUE benchmark `_ + * `Information retrieval `_ + * `Entity Linking `_ + * `Dialogue State Tracking `_ + * `Prompt Learning `_ * `NGC collection of pre-trained NLP models. `_ * `Synthetic Tabular Data Generation `_ -* `Speech synthesis (TTS) `_ +* `Speech synthesis (TTS) `_ * Spectrogram generation: Tacotron2, GlowTTS, TalkNet, FastPitch, FastSpeech2, Mixer-TTS, Mixer-TTS-X * Vocoders: WaveGlow, SqueezeWave, UniGlow, MelGAN, HiFiGAN, UnivNet * End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E, VITS * `NGC collection of pre-trained TTS models. `_ * `Tools `_ - * `Text Processing (text normalization and inverse text normalization) `_ - * `CTC-Segmentation tool `_ - * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets + * `Text Processing (text normalization and inverse text normalization) `_ + * `CTC-Segmentation tool `_ + * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets * `Speech Data Processor `_ @@ -132,17 +132,17 @@ Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training t Requirements ------------ -1) Python 3.8 or above -2) Pytorch 1.10.0 or above +1) Python 3.9 or above +2) Pytorch 1.13.1 or above 3) NVIDIA GPU for training Documentation ------------- -.. |r1.19.0| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 +.. |main| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main :alt: Documentation Status :scale: 100% - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ .. |stable| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=stable :alt: Documentation Status @@ -152,7 +152,7 @@ Documentation +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Version | Status | Description | +=========+=============+==========================================================================================================================================+ -| Latest | |r1.19.0| | `Documentation of the latest (i.e. main) branch. `_ | +| Latest | |main| | `Documentation of the latest (i.e. main) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Stable | |stable| | `Documentation of the stable (i.e. most recent release) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ @@ -263,7 +263,7 @@ packaging is also needed: .. code-block:: bash - pip install -y packaging + pip install packaging Transformer Engine @@ -280,19 +280,37 @@ It is highly recommended to use the NVIDIA PyTorch or NeMo container if having i Transformer Engine requires PyTorch to be built with CUDA 11.8. + +Flash Attention +~~~~~~~~~~~~~~~~~~~~ +Transformer Engine already supports Flash Attention for GPT models. If you want to use Flash Attention for non-causal models or use with attention bias (introduced from position encoding, e.g. Alibi), please install `flash-attn `_. + +.. code-block:: bash + + pip install flash-attn + pip install triton==2.0.0.dev20221202 + +NLP inference UI +~~~~~~~~~~~~~~~~~~~~ +To launch the inference web UI server, please install the gradio `gradio `_. + +.. code-block:: bash + + pip install gradio==3.34.0 + NeMo Text Processing ~~~~~~~~~~~~~~~~~~~~ NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separate repository `https://github.com/NVIDIA/NeMo-text-processing `_. Docker containers: ~~~~~~~~~~~~~~~~~~ -We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.16.0`` comes with container ``nemo:23.01``, you may find more details about released containers in `releases page `_. +We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.19.0`` comes with container ``nemo:23.04``, you may find more details about released containers in `releases page `_. To use built container, please run .. code-block:: bash - docker pull nvcr.io/nvidia/nemo:23.01 + docker pull nvcr.io/nvidia/nemo:23.04 To build a nemo container with Dockerfile from a branch, please run @@ -301,13 +319,13 @@ To build a nemo container with Dockerfile from a branch, please run DOCKER_BUILDKIT=1 docker build -f Dockerfile -t nemo:latest . -If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.04-py3 and then installing from GitHub. +If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.06-py3 and then installing from GitHub. .. code-block:: bash docker run --gpus all -it --rm -v :/NeMo --shm-size=8g \ -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \ - stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.04-py3 + stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.06-py3 Examples -------- diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index da134a02d86a..cf0ad0ff2d7f 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -255,7 +255,7 @@ article ul { } } -@media (min-width: 1400px) { +@media (min-width: none) { body { font-size: 18px; } diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst index 120969ee9dfa..f9a4ea9970b1 100644 --- a/docs/source/asr/configs.rst +++ b/docs/source/asr/configs.rst @@ -237,6 +237,45 @@ For example, a decoder config corresponding to a sub-word tokenization model sho vocabulary: [] # filled with vocabulary from tokenizer at runtime +On-the-fly Code Switching +------------------------- + +Nemo supports creating code-switched synthetic utterances on-the-fly during training/validation/testing. This allows you to create ASR models which +support intra-utterance code switching. If you have Nemo formatted audio data on disk (either JSON manifests or tarred audio data), you +can easily mix as many of these audio sources together as desired by adding some extra parameters to your `train_ds`, `validation_ds`, and `test_ds`. + +Please note that this allows you to mix any kind of audio sources together to create synthetic utterances which sample from all sources. The most +common use case for this is blending different languages together to create a multilingual code-switched model, but you can also blend +together different audio sources from the same languages (or language families), to create noise robust data, or mix fast and slow speech from the +same language. + +For multilingual code-switched models, we recommend using AggTokenizer for your Tokenizer if mixing different languages. + +The following example shows how to mix 3 different languages: English (en), German (de), and Japanese (ja) added to the `train_ds` model block, however +you can add similar logic to your `validation_ds` and `test_ds` blocks for on-the-fly code-switched validation and test data too. This example mixes +together 3 languages, but you can use as many as you want. However, be advised that the more languages you add, the higher your `min_duration` and `max_duration` +need to be set to ensure all languages are sampled into each synthetic utterance, and setting these hyperparameters higher will use more VRAM per mini-batch during +training and evaluation. + +.. code-block:: yaml + + model: + train_ds: + manifest_filepath: [/path/to/EN/tarred_manifest.json, /path/to/DE/tarred_manifest.json, /path/to/JA/tarred_manifest.json] + tarred_audio_filepaths: ['/path/to/EN/tars/audio__OP_0..511_CL_.tar', '/path/to/DE/tars/audio__OP_0..1023_CL_.tar', '/path/to/JA/tars/audio__OP_0..2047_CL_.tar'] + is_code_switched: true + is_tarred: true + shuffle: true + code_switched: # add this block for code-switching + min_duration: 12 # the minimum number of seconds for each synthetic code-switched utterance + max_duration: 20 # the maximum number of seconds for each synthetic code-switched utterance + min_monolingual: 0.3 # the minimum percentage of utterances which will be pure monolingual (0.3 = 30%) + probs: [0.25, 0.5, 0.25] # the probability to sample each language (matches order of `language` above) if not provided, assumes uniform distribution + force_monochannel: true # if your source data is multi-channel, then setting this to True will force the synthetic utterances to be mono-channel + sampling_scales: 0.75 # allows you to down/up sample individual languages. Can set this as an array for individual languages, or a scalar for all languages + seed: 123 # add a seed for replicability in future runs (highly useful for `validation_ds` and `test_ds`) + + Model Architecture Configurations --------------------------------- diff --git a/docs/source/asr/data/benchmark_en.csv b/docs/source/asr/data/benchmark_en.csv index 5f68e9ca22ce..b41c675f423c 100644 --- a/docs/source/asr/data/benchmark_en.csv +++ b/docs/source/asr/data/benchmark_en.csv @@ -28,4 +28,12 @@ stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/ca stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge" stt_en_fastconformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large" stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large" -stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc" \ No newline at end of file +stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc" +stt_en_fastconformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge" +stt_en_fastconformer_ctc_xlarge,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge" +stt_en_fastconformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge" +stt_en_fastconformer_ctc_xxlarge,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xxlarge" +stt_en_fastconformer_hybrid_large_streaming_80ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_80ms" +stt_en_fastconformer_hybrid_large_streaming_480ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_480ms" +stt_en_fastconformer_hybrid_large_streaming_1040ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_1040ms" +stt_en_fastconformer_hybrid_large_streaming_multi,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_multi" diff --git a/docs/source/asr/data/scores/en/conformer_en.csv b/docs/source/asr/data/scores/en/conformer_en.csv index 905bdf2ebedc..2b31a07b842a 100644 --- a/docs/source/asr/data/scores/en/conformer_en.csv +++ b/docs/source/asr/data/scores/en/conformer_en.csv @@ -12,3 +12,17 @@ stt_en_conformer_transducer_large,en,,,1.6,3.5,1.7,3.7,,,,,,,,,,,, stt_en_conformer_transducer_large_ls,en,,,2.1,5.0,2.3,5.1,,,,,,,,,,,, stt_en_conformer_transducer_xlarge,en,,,1.48 %,2.95 %,1.62 %,3.01 %,,6.46 %,4.59 %,5.32 %,5.70 %,6.47 %,21.32 %,,,,2.05 %,1.17 % stt_en_conformer_transducer_xxlarge,en,,,1.52 %,3.09 %,1.72 %,3.14 %,,,5.29 %,5.85 %,6.64 %,,,,,,2.42 %,1.49 % +stt_en_fastconformer_hybrid_large_streaming_80ms (CTC),en,,,,,3.5 %,8.1 %,,,10.2 %,7.2 %,,,,,,,3.5 %,2.3 % +stt_en_fastconformer_hybrid_large_streaming_480ms (CTC),en,,,,,3.6 %,7.5 %,,,9.8 %,7.0 %,,,,,,,3.5 %,2.1 % +stt_en_fastconformer_hybrid_large_streaming_1040ms (CTC),en,,,,,2.7 %,6.4 %,,,9.0 %,7.0 %,,,,,,,3.2 %,1.9 % +stt_en_fastconformer_hybrid_large_streaming_80ms (RNNT),en,,,,,2.7 %,6.5 %,,,9.1 %,6.9 %,,,,,,,3.2 %,1.9 % +stt_en_fastconformer_hybrid_large_streaming_480ms (RNNT),en,,,,,2.7 %,6.1 %,,,8.5 %,6.7 %,,,,,,,3.1 %,1.8 % +stt_en_fastconformer_hybrid_large_streaming_1040ms (RNNT),en,,,,,2.3 %,5.5 %,,,8.0 %,6.6 %,,,,,,,2.9 %,1.6 % +stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 0ms),en,,,,,,7.0 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 80ms),en,,,,,,6.4 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 480),en,,,,,,5.7 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (RNNT - 1040),en,,,,,,5.4 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 0ms),en,,,,,,8.4 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 80ms),en,,,,,,7.8 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 480),en,,,,,,6.7 %,,,,,,,,,,,, +stt_en_fastconformer_hybrid_large_streaming_multi (CTC - 1040),en,,,,,,6.2 %,,,,,,,,,,,, diff --git a/docs/source/asr/datasets.rst b/docs/source/asr/datasets.rst index 5f74510bd054..05278ecb2437 100644 --- a/docs/source/asr/datasets.rst +++ b/docs/source/asr/datasets.rst @@ -126,7 +126,7 @@ AN4 Dataset This is a small dataset recorded and distributed by Carnegie Mellon University. It consists of recordings of people spelling out addresses, names, etc. Information about this dataset can be found on the `official CMU site `_. -#. `Download and extract the dataset `_ (which is labeled "NIST's Sphere audio (.sph) format (64M)". +#. `Download and extract the dataset `_ (which is labeled "NIST's Sphere audio (.sph) format (64M)". #. Convert the ``.sph`` files to ``.wav`` using sox, and build one training and one test manifest. @@ -216,7 +216,7 @@ of filepaths, e.g. ``['/data/shard1.tar', '/data/shard2.tar']``, or in a single tag ``_CL_``. For SLURM based tasks, we suggest the use of the special tags for ease of use. As with non-tarred datasets, the manifest file should be passed in ``manifest_filepath``. The dataloader assumes that the length -of the manifest after filtering is the correct size of the dataset for reporting training progress. +of the manifest after filtering is the correct size of the dataset for reporting training progress. The ``tarred_shard_strategy`` field of the config file can be set if you have multiple shards and are running an experiment with multiple workers. It defaults to ``scatter``, which preallocates a set of shards per worker which do not change during runtime. @@ -237,6 +237,18 @@ see the corresponding class APIs in the `Datasets <./api.html#Datasets>`__ secti applied such that each worker ends up with the same number of files. We currently do not check for this in any dataloader, but the user's program may hang if the shards are uneven. +Sharded Manifests +~~~~~~~~~~~~~~~~~ +If your dataset / manifest is large, you may wish to use sharded manifest files instead of a single manifest file. The naming convention +is identical to the audio tarballs and there should be a 1:1 relationship between a sharded audio tarfile and its manifest shard; e.g. +``'/data/sharded_manifests/manifest__OP_1..64_CL_'`` in the above example. Using sharded manifests improves job startup times and +decreases memory usage, as each worker only loads manifest shards for the corresponding audio shards instead of the entire manifest. + +To enable sharded manifest filename expansion, set the ``shard_manifests`` field of the config file to true. In addition, the +``defer_setup`` flag needs to be true as well, so that the dataloader will be initialized after the DDP and its length can be collected from +the distributed workers. + + Conversion to Tarred Datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -266,13 +278,20 @@ The files in the target directory should look similar to the following: ├── audio_2.tar ├── ... ├── metadata.yaml - └── tarred_audio_manifest.json + ├── tarred_audio_manifest.json + ├── sharded_manifests/ + ├── manifest_1.json + ├── ... + └── manifest_N.json + Note that file structures are flattened such that all audio files are at the top level in each tarball. This ensures that filenames are unique in the tarred dataset and the filepaths do not contain "-sub" and forward slashes in each ``audio_filepath`` are simply converted to underscores. For example, a manifest entry for ``/data/directory1/file.wav`` would be ``_data_directory1_file.wav`` in the tarred dataset manifest, and ``/data/directory2/file.wav`` would be converted to ``_data_directory2_file.wav``. +Sharded manifests are generated by default; this behavior can be toggled via the ``no_shard_manifests`` flag. + Bucketing Datasets ------------------ diff --git a/docs/source/asr/models.rst b/docs/source/asr/models.rst index 80a0fd90f0fb..713dd222eef9 100644 --- a/docs/source/asr/models.rst +++ b/docs/source/asr/models.rst @@ -175,7 +175,7 @@ We support the following three right context modeling: * fully causal model with zero look-ahead: tokens would not see any future tokens. convolution layers are all causal and right tokens are masked for self-attention. It gives zero latency but with limited accuracy. -To train such a model, you need to set `encoder.att_context_size=[left_context, 0]` and `encoder.conv_context_size=causal` in the config. +To train such a model, you need to set `model.encoder.att_context_size=[left_context,0]` and `model.encoder.conv_context_size=causal` in the config. * regular look-ahead: convolutions would be able to see few future frames, and self-attention would also see the same number of future tokens. @@ -186,13 +186,11 @@ For example for a model of 17 layers with 4x downsampling and 10ms window shift, For example, in a model which chunk size of 20 tokens, tokens at the first position of each chunk would see all the next 19 tokens while the last token would see zero future tokens. This approach is more efficient than regular look-ahead in terms of computations as the activations for most of the look-ahead part would be cached and there is close to zero duplications in the calculations. -In terms of accuracy, this approach gives similar or even better results in term of accuracy than regular look-ahead as each token in each layer have access to more tokens on average. That is why we recommend to use this approach for streaming. - +In terms of accuracy, this approach gives similar or even better results in term of accuracy than regular look-ahead as each token in each layer have access to more tokens on average. That is why we recommend to use this approach for streaming. Therefore we recommend to use the chunk-aware for cache-aware models. ** Note: Latencies are based on the assumption that the forward time of the network is zero and it just estimates the time needed after a frame would be available until it is passed through the model. -Approaches with non-zero look-ahead can give significantly better accuracy by sacrificing latency. The latency can get controlled by the left context size. Increasing the right context would help the accuracy to a limit but would increase the compuation time. - +Approaches with non-zero look-ahead can give significantly better accuracy by sacrificing latency. The latency can get controlled by the left context size. Increasing the right context would help the accuracy to a limit but would increase the computation time. In all modes, left context can be controlled by the number of tokens to be visible in the self-attention and the kernel size of the convolutions. For example, if left context of self-attention in each layer is set to 20 tokens and there are 10 layers of Conformer, then effective left context is 20*10=200 tokens. @@ -202,18 +200,39 @@ Left context of convolutions is dependent to the their kernel size while it can Self-attention left context of around 6 secs would give close result to have unlimited left context. For a model with 4x downsampling and shift window of 10ms in the preprocessor, each token corresponds to 4*10=40ms. If striding approach is used for downsampling, all the convolutions in downsampling would be fully causal and don't see future tokens. -You may use stacking for downsampling in the streaming models which is significantly faster and uses less memory. -It also does not some of the the limitations with striding and vggnet and you may use any downsampling rate. -You may find the example config files of cache-aware streaming Conformer models at -``/examples/asr/conf/conformer/streaming/conformer_transducer_bpe_streaming.yaml`` for Transducer variant and -at ``/examples/asr/conf/conformer/streaming/conformer_ctc_bpe.yaml`` for CTC variant. +* Multiple Look-aheads +We support multiple look-aheads for cahce-aware models. You may specify a list of context sizes for att_context_size. +During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. +For example you may enable multiple look-aheads by setting `model.encoder.att_context_size=[[70,13],[70,6],[70,1],[70,0]]` for the training. +The first item in the list would be the default during test/validation/inference. To switch between different look-aheads, you may use the method `asr_model.encoder.set_default_att_context_size(att_context_size)` or set the att_context_size like the following when using the script `speech_transcribe.py`: + +.. code-block:: bash + + python [NEMO_GIT_FOLDER]/examples/asr/transcribe_speech.py \ + pretrained_name="stt_en_fastconformer_hybrid_large_streaming_multi" \ + audio_dir="" \ + att_context_size=[70,0] + +.. + +You may find the example config files for cache-aware streaming FastConformer models at +``/examples/asr/conf/fastconformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml`` for Transducer variant and +at ``/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe.yaml`` for CTC variant. It is recommended to use FastConformer as they are more than 2X faster in both training and inference than regular Conformer. +The hybrid versions of FastConformer can be found here: ``/examples/asr/conf/conformer/hybrid_cache_aware_streaming/`` + +Examples for regular Conformer can be found at +``/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml`` for Transducer variant and +at ``/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe.yaml`` for CTC variant. To simulate cache-aware streaming, you may use the script at ``/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py``. It can simulate streaming in single stream or multi-stream mode (in batches) for an ASR model. This script can be used for models trained offline with full-context but the accuracy would not be great unless the chunk size is large enough which would result in high latency. It is recommended to train a model in streaming model with limited context for this script. More info can be found in the script. -You may find FastConformer variants of cache-aware streaming models under ``/examples/asr/conf/fastconformer/``. +Note cache-aware streaming models are being exported without caching support by default. +To include caching support, `model.set_export_config({'cache_support' : 'True'})` should be called before export. +Or, if ``/scripts/export.py`` is being used: +`python export.py cache_aware_conformer.nemo cache_aware_conformer.onnx --export-config cache_support=True` .. _LSTM-Transducer_model: @@ -291,6 +310,11 @@ Similar example configs for FastConformer variants of Hybrid models can be found ``/examples/asr/conf/fastconformer/hybrid_transducer_ctc/`` ``/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/`` +Note Hybrid models are being exported as RNNT (encoder and decoder+joint parts) by default. +To export as CTC (single encoder+decoder graph), `model.set_export_config({'decoder_type' : 'ctc'})` should be called before export. +Or, if ``/scripts/export.py`` is being used: +`python export.py hybrid_transducer.nemo hybrid_transducer.onnx --export-config decoder_type=ctc` + .. _Conformer-HAT_model: Conformer-HAT (Hybrid Autoregressive Transducer) diff --git a/docs/source/asr/speaker_recognition/data/speaker_results.csv b/docs/source/asr/speaker_recognition/data/speaker_results.csv index a0e865c9c487..c92c971e4939 100644 --- a/docs/source/asr/speaker_recognition/data/speaker_results.csv +++ b/docs/source/asr/speaker_recognition/data/speaker_results.csv @@ -1,4 +1,5 @@ Model Name,Model Base Class,Model Card titanet_large,EncDecSpeakerLabelModel,"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_large" +titanet_small,EncDecSpeakerLabelModel,"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_small" speakerverification_speakernet,EncDecSpeakerLabelModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:speakerverification_speakernet" ecapa_tdnn,EncDecSpeakerLabelModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:ecapa_tdnn" \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index a78ba3528048..0765f8940ab0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,6 @@ sys.path.insert(0, os.path.abspath("../..")) sys.path.insert(0, os.path.abspath("../../nemo")) -sys.path.insert(0, os.path.abspath("../../nemo_text_processing")) from package_info import __version__ @@ -47,7 +46,6 @@ 'hydra', # hydra-core in requirements, hydra during import 'dateutil', # part of core python 'transformers.tokenization_bert', # has ., troublesome for this regex - 'megatron', # megatron-lm in requirements, megatron in import 'sklearn', # scikit_learn in requirements, sklearn in import 'nemo_text_processing.inverse_text_normalization', # Not installed automatically 'nemo_text_processing.text_normalization', # Not installed automatically @@ -55,10 +53,13 @@ 'torchmetrics', # inherited from PTL 'lightning_utilities', # inherited from PTL 'apex', + 'megatron.core', + 'transformer_engine', 'joblib', # inherited from optional code 'IPython', 'ipadic', 'psutil', + 'regex', ] _skipped_autodoc_mock_imports = ['wrapt', 'numpy'] diff --git a/docs/source/core/core.rst b/docs/source/core/core.rst index 4f5589653172..7b2edfa0f5c4 100644 --- a/docs/source/core/core.rst +++ b/docs/source/core/core.rst @@ -38,7 +38,7 @@ To see all available pretrained models for a specific NeMo model, use the ``list .. code-block:: Python - nemo_asr.model.EncDecCTCModel.list_available_models() + nemo_asr.models.EncDecCTCModel.list_available_models() For detailed information on the available pretrained models, refer to the collections documentation: diff --git a/docs/source/core/export.rst b/docs/source/core/export.rst index 0e598e215dbf..202099b13d66 100644 --- a/docs/source/core/export.rst +++ b/docs/source/core/export.rst @@ -177,6 +177,37 @@ Another common requirement for models that are being exported is to run certain # call base method for common set of modifications Exportable._prepare_for_export(self, **kwargs) +Some models that require control flow, need to be exported in multiple parts. Typical examples are RNNT nets. +To facilitate that, the hooks below are provided. To export, for example, 'encoder' and 'decoder' subnets of the model, overload list_export_subnets to return ['encoder', 'decoder']. + +.. code-block:: Python + + def get_export_subnet(self, subnet=None): + """ + Returns Exportable subnet model/module to export + """ + + + def list_export_subnets(self): + """ + Returns default set of subnet names exported for this model + First goes the one receiving input (input_example) + """ + +Some nertworks may be exported differently according to user-settable options (like ragged batch support for TTS or cache support for ASR). To facilitate that - `set_export_config()` method is provided by Exportable to set key/value pairs to predefined model.export_config dictionary, to be used during the export: + +.. code-block:: Python + def set_export_config(self, args): + """ + Sets/updates export_config dictionary + """ +Also, if an action hook on setting config is desired, this method may be overloaded by `Exportable` descendants to include one. +An example can be found in ``/nemo/collections/asr/models/rnnt_models.py``. + +Here is example on now `set_export_config()` call is being tied to command line arguments in ``/scripts/export.py`` : + +.. code-block:: Python + python scripts/export.py hybrid_conformer.nemo hybrid_conformer.onnx --export-config decoder_type=ctc Exportable Model Code ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/index.rst b/docs/source/index.rst index ee1d3fba805a..dcf2ff30e9c5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -45,6 +45,7 @@ NVIDIA NeMo User Guide nlp/machine_translation/machine_translation nlp/text_normalization/intro nlp/api + nlp/megatron_onnx_export nlp/models diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst index 46efb0851d4e..b13dedca300f 100755 --- a/docs/source/nlp/api.rst +++ b/docs/source/nlp/api.rst @@ -1,99 +1,152 @@ -NeMo NLP collection API +NeMo Megatron API ======================= -Model Classes -------------- +Pretraining Model Classes +------------------------- + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_base_model.MegatronBaseModel + :show-inheritance: + :no-members: + :members: __init__, configure_optimizers + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bert_model.MegatronBertModel + :show-inheritance: + :no-members: + :members: training_step, validation_step, build_train_valid_test_datasets, build_LDDL_data, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bart_model.MegatronBARTModel + :show-inheritance: + :no-members: + :members: training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_retrieval_model.MegatronRetrievalModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_model.MegatronT5Model + :show-inheritance: + :no-members: + :members: complete, encode, decode, add_special_tokens_to_tokenizer, training_step, validation_step, build_train_valid_test_datasets, setup + +Customization Model Classes +--------------------------- + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model.MegatronGPTSFTModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTAdapterLearningModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTInfusedAdapterModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model.MegatronGPTPromptLearningModel + :show-inheritance: + :no-members: + :members: built_virtual_prompt_dataset, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel + :show-inheritance: + :no-members: + :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5InfusedAdapterModel + :show-inheritance: + :no-members: + :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup -.. autoclass:: nemo.collections.nlp.models.TextClassificationModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact, classifytext +Modules +------- -.. autoclass:: nemo.collections.nlp.models.GLUEModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact +.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.MegatronModule + :show-inheritance: -.. autoclass:: nemo.collections.nlp.models.PunctuationCapitalizationModel - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.Float16Module + :show-inheritance: -.. autoclass:: nemo.collections.nlp.models.TokenClassificationModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact - -.. autoclass:: nemo.collections.nlp.models.QAModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end -.. autoclass:: nemo.collections.nlp.models.DuplexTaggerModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.gpt_model.GPTModel + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.models.DuplexDecoderModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.bert_model.BertModel + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.models.BERTLMModel - :show-inheritance: - :members: setup_training_data, setup_optimization +.. autoclass:: nemo.collections.nlp.modules.common.megatron.token_level_encoder_decoder.MegatronTokenLevelEncoderDecoderModule + :show-inheritance: + :no-members: + :members: forward -Modules -------- +.. autoclass:: nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder.MegatronRetrievalTokenLevelEncoderDecoderModule + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.modules.BertModule - :show-inheritance: - :members: -.. autoclass:: nemo.collections.nlp.modules.AlbertEncoder - :show-inheritance: - :members: +Datasets +-------- -.. autoclass:: nemo.collections.nlp.modules.BertEncoder - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset.BlendableDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.DistilBertEncoder - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.GPTDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.RobertaEncoder - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.MockGPTDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceClassifier - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.bert_dataset.BertDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceRegression - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.base_prompt_learning_dataset.BasePromptLearningDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceTokenClassifier - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTDataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.get_lm_model +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset.GPTSFTChatDataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.get_pretrained_lm_models_list +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.retro_dataset.RETRODataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.common.megatron.get_megatron_lm_models_list +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_dataset.T5Dataset + :show-inheritance: + :exclude-members: MAX_SEQ_LENGTH_DELTA -Datasets --------- +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_prompt_learning_dataset.T5PromptLearningDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_dataset.BertPunctuationCapitalizationDataset - :show-inheritance: - :members: - :special-members: __getitem__ +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.ul2_dataset.UL2Dataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.create_tarred_dataset +Exportable Model Classes +------------------------- -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.BertPunctuationCapitalizationTarredDataset - :show-inheritance: - :members: - :special-members: __iter__ - :exclude-members: reinforce_type +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTExportableModel + :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_infer_dataset.BertPunctuationCapitalizationInferDataset - :show-inheritance: - :members: - :special-members: __getitem__ +.. toctree:: + :maxdepth: 1 + + megatron_onnx_export \ No newline at end of file diff --git a/docs/source/nlp/megatron_onnx_export.rst b/docs/source/nlp/megatron_onnx_export.rst new file mode 100644 index 000000000000..ee6138d1f912 --- /dev/null +++ b/docs/source/nlp/megatron_onnx_export.rst @@ -0,0 +1,47 @@ +.. _megatron_onnx_export: + +ONNX Export of Megatron Models +==================================== + +This guide demonstrates the usage of the ONNX export functionality for Megatron models. + +Requirements +----------------- +Set up the development environment by launching the latest `NeMo container `_ + +The minimum version requirements for NeMo and TransformerEngine are below + +.. code-block:: bash + + nemo > 1.19 + transformer_engine > 0.10 + +Export to ONNX +----------------- +The export script supports the ONNX export of models with .nemo and .ckpt file extensions. The script also supports the export of the following types of models: GPT, T5, BERT, BART, NMT, RETRO. +Commands for both file formats are discussed in the following sections. The model type used for the examples is GPT. + + +Export using .nemo file +^^^^^^^^^^^^^^^^^^^^^^^^ +A model with .nemo file extension can be exported using the command below + +.. code-block:: bash + + python3 examples/nlp/language_modeling/megatron_export.py \ + model_type=gpt \ + onnx_model_file=gpt_126m.onnx \ + gpt_model_file=gpt_126m.nemo + +Export using .ckpt file +^^^^^^^^^^^^^^^^^^^^^^^^ +A model with .ckpt file extension can be exported using the command below + +.. code-block:: bash + + python3 examples/nlp/language_modeling/megatron_export.py \ + model_type=gpt \ + onnx_model_file=gpt_126m.onnx \ + checkpoint_dir=./gpt_126m/ \ + checkpoint_name=model_weights.ckpt \ + hparams_file=./gpt_126m/hparams.yaml \ No newline at end of file diff --git a/docs/source/nlp/models.rst b/docs/source/nlp/models.rst index 932be201bfb2..ad50d976db9f 100755 --- a/docs/source/nlp/models.rst +++ b/docs/source/nlp/models.rst @@ -9,6 +9,7 @@ NeMo's NLP collection supports provides the following task-specific models: :maxdepth: 1 punctuation_and_capitalization_models + spellchecking_asr_customization token_classification joint_intent_slot text_classification diff --git a/docs/source/nlp/nlp_all.bib b/docs/source/nlp/nlp_all.bib index fd0f15f6d1da..48a53240e52b 100644 --- a/docs/source/nlp/nlp_all.bib +++ b/docs/source/nlp/nlp_all.bib @@ -216,3 +216,12 @@ @article{jegou2022faiss pages={ascl--2210}, year={2022} } + +@misc{antonova2023spellmapper, + title={SpellMapper: A non-autoregressive neural spellchecker for ASR customization with candidate retrieval based on n-gram mappings}, + author={Alexandra Antonova and Evelina Bakhturina and Boris Ginsburg}, + year={2023}, + eprint={2306.02317}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} diff --git a/docs/source/nlp/spellchecking_asr_customization.rst b/docs/source/nlp/spellchecking_asr_customization.rst new file mode 100644 index 000000000000..c6666c4e338c --- /dev/null +++ b/docs/source/nlp/spellchecking_asr_customization.rst @@ -0,0 +1,128 @@ +.. _spellchecking_asr_customization: + +SpellMapper (Spellchecking ASR Customization) Model +===================================================== + +`SpellMapper `__ :cite:`nlp-ner-antonova2023spellmapper` is a non-autoregressive model for postprocessing of ASR output. It gets as input a single ASR hypothesis (text) and a custom vocabulary and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any. Unlike traditional spellchecking approaches, which aim to correct known words using language models, SpellMapper's goal is to correct highly specific user terms, out-of-vocabulary (OOV) words or spelling variations (e.g., "John Koehn", "Jon Cohen"). + +This model is an alternative to word boosting/shallow fusion approaches: + +- does not require retraining ASR model; +- does not require beam-search/language model (LM); +- can be applied on top of any English ASR model output; + +Model Architecture +------------------ +Though SpellMapper is based on `BERT `__ :cite:`nlp-ner-devlin2018bert` architecture, it uses some non-standard tricks that make it different from other BERT-based models: + +- ten separators (``[SEP]`` tokens) are used to combine the ASR hypothesis and ten candidate phrases into a single input; +- the model works on character level; +- subword embeddings are concatenated to the embeddings of each character that belongs to this subword; + + .. code:: + + Example input: [CLS] a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o [SEP] d i d i e r _ s a u m o n [SEP] a s t r o n o m i e [SEP] t r i s t a n _ g u i l l o t [SEP] ... + Input segments: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 + Example output: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 0 ... + +The model calculates logits for each character x 11 labels: + +- ``0`` - character doesn't belong to any candidate, +- ``1..10`` - character belongs to candidate with this id. + +At inference average pooling is applied to calculate replacement probability for the whole fragments. + +Quick Start Guide +----------------- + +We recommend you try this model in a Jupyter notebook (need GPU): +`NeMo/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb `__. + +A pretrained English checkpoint can be found at `HuggingFace `__. + +An example inference pipeline can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_infer.sh `__. + +An example script on how to train the model can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_training.sh `__. + +An example script on how to train on large datasets can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh `__. + +The default configuration file for the model can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml `__. + +.. _dataset_spellchecking_asr_customization: + +Input/Output Format at Inference stage +-------------------------------------- +Here we describe input/output format of the SpellMapper model. + +.. note:: + + If you use `inference pipeline `__ this format will be hidden inside and you only need to provide an input manifest and user vocabulary and you will get a corrected manifest. + +An input line should consist of 4 tab-separated columns: + 1. text of ASR-hypothesis + 2. texts of 10 candidates separated by semicolon + 3. 1-based ids of non-dummy candidates, separated by space + 4. approximate start/end coordinates of non-dummy candidates (correspond to ids in third column) + +Example input (in one line): + +.. code:: + + t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x + h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d + 1 2 6 7 8 9 10 + CUSTOM 6 23;CUSTOM 4 10;CUSTOM 4 15;CUSTOM 56 62;CUSTOM 5 19;CUSTOM 28 31;CUSTOM 39 48 + +Each line in SpellMapper output is tab-separated and consists of 4 columns: + 1. ASR-hypothesis (same as in input) + 2. 10 candidates separated by semicolon (same as in input) + 3. fragment predictions, separated by semicolon, each prediction is a tuple (start, end, candidate_id, probability) + 4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes) + +Example output (in one line): + +.. code:: + + t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x + h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d + 56 62 7 0.99998;4 20 8 0.95181;12 20 8 0.44829;4 17 8 0.99464;12 17 8 0.97645 + 8 8 8 0 8 8 8 8 8 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7 + +Training Data Format +-------------------- + +For training, the data should consist of 5 files: + +- ``config.json`` - BERT config +- ``label_map.txt`` - labels from 0 to 10, do not change +- ``semiotic_classes.txt`` - currently there are only two classes: ``PLAIN`` and ``CUSTOM``, do not change +- ``train.tsv`` - training examples +- ``test.tsv`` - validation examples + +Note that since all these examples are synthetic, we do not reserve a set for final testing. Instead, we run `inference pipeline `__ and compare resulting word error rate (WER) to the WER of baseline ASR output. + +One (non-tarred) training example should consist of 4 tab-separated columns: + 1. text of ASR-hypothesis + 2. texts of 10 candidates separated by semicolon + 3. 1-based ids of correct candidates, separated by space, or 0 if none + 4. start/end coordinates of correct candidates (correspond to ids in third column) + +Example (in one line): + +.. code:: + + a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o + d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y + 1 3 + CUSTOM 12 23;CUSTOM 28 41 + +For data preparation see `this script `__ + + +References +---------- + +.. bibliography:: nlp_all.bib + :style: plain + :labelprefix: NLP-NER + :keyprefix: nlp-ner- diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst index 2e0e272c93f4..70426d3fe4a0 100644 --- a/docs/source/starthere/intro.rst +++ b/docs/source/starthere/intro.rst @@ -34,9 +34,9 @@ Prerequisites Before you begin using NeMo, it's assumed you meet the following prerequisites. -#. You have Python version 3.6, 3.7 or 3.8. +#. You have Python version 3.9, 3.10. -#. You have Pytorch version 1.8.1. +#. You have Pytorch version 1.13.1 or 2.0+. #. You have access to an NVIDIA GPU for training. diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst index e819337cf012..3859cb630d02 100644 --- a/docs/source/starthere/tutorials.rst +++ b/docs/source/starthere/tutorials.rst @@ -106,6 +106,9 @@ To run a tutorial: * - ASR - Multi-lingual ASR - `Multi-lingual ASR `_ + * - ASR + - Hybrid ASR-TTS Models Tutorial + - `Multi-lingual ASR `_ * - NLP - Using Pretrained Language Models for Downstream Tasks - `Pretrained Language Models for Downstream Tasks `_ @@ -130,6 +133,9 @@ To run a tutorial: * - NLP - Punctuation and Capitalization - `Punctuation and Capitalization `_ + * - NLP + - Spellchecking ASR Customization - SpellMapper + - `Spellchecking ASR Customization - SpellMapper `_ * - NLP - Entity Linking - `Entity Linking `_ @@ -147,34 +153,34 @@ To run a tutorial: - `Synthetic Tabular Data Generation `_ * - TTS - NeMo TTS Primer - - `NeMo TTS Primer `_ + - `NeMo TTS Primer `_ * - TTS - TTS Speech/Text Aligner Inference - - `TTS Speech/Text Aligner Inference `_ + - `TTS Speech/Text Aligner Inference `_ * - TTS - FastPitch and MixerTTS Model Training - - `FastPitch and MixerTTS Model Training `_ + - `FastPitch and MixerTTS Model Training `_ * - TTS - FastPitch Finetuning - - `FastPitch Finetuning `_ + - `FastPitch Finetuning `_ * - TTS - FastPitch and HiFiGAN Model Training for German - - `FastPitch and HiFiGAN Model Training for German `_ + - `FastPitch and HiFiGAN Model Training for German `_ * - TTS - Tacotron2 Model Training - - `Tacotron2 Model Training `_ + - `Tacotron2 Model Training `_ * - TTS - FastPitch Duration and Pitch Control - - `FastPitch Duration and Pitch Control `_ + - `FastPitch Duration and Pitch Control `_ * - TTS - FastPitch Speaker Interpolation - - `FastPitch Speaker Interpolation `_ + - `FastPitch Speaker Interpolation `_ * - TTS - Inference and Model Selection - - `TTS Inference and Model Selection `_ + - `TTS Inference and Model Selection `_ * - TTS - Pronunciation_customization - - `TTS Pronunciation_customization `_ + - `TTS Pronunciation_customization `_ * - Tools - CTC Segmentation - `CTC Segmentation `_ diff --git a/docs/source/tools/comparison_tool.rst b/docs/source/tools/comparison_tool.rst index 1e28621704a6..6e5d28a0feb3 100644 --- a/docs/source/tools/comparison_tool.rst +++ b/docs/source/tools/comparison_tool.rst @@ -1,7 +1,7 @@ Comparison tool for ASR Models ============================== -The Comparison Tool (CT) allows to compare predictions of different ASR models at word accuracy level. +The Comparison Tool (CT) allows to compare predictions of different ASR models at word accuracy and utterance level. +--------------------------------------------------------------------------------------------------------------------------+ | **Comparison tool features:** | @@ -12,6 +12,10 @@ The Comparison Tool (CT) allows to compare predictions of different ASR models a +--------------------------------------------------------------------------------------------------------------------------+ | visual comparison of predictions of different models | +--------------------------------------------------------------------------------------------------------------------------+ +| visual comparison of utterances by their WER/CER | ++--------------------------------------------------------------------------------------------------------------------------+ +| listening selected utterance | ++--------------------------------------------------------------------------------------------------------------------------+ Getting Started --------------- @@ -151,3 +155,28 @@ In this case, all points lying above the diagonal have higher accuracy with the Points marked with circles should be explored first. Words in the first quarter were well recognized by both models, and conversely, words in the third quarter were poorly recognized by both models. + +To compare models at utterance level, pick it at top dropdown field. + +At the next field you could choose metric: WER or CER + + .. image:: images/scr_10.png + :align: center + :width: 800px + :alt: Switch mode + +When an utterance level is selected, it is possible to click on a point on the graph, and the corresponding utterance will be automatically selected. + +If audio files are available, there will be an option to listen to the audio recording and view its waveform. + + .. image:: images/scr_11.png + :align: center + :width: 800px + :alt: Audio player + +In this mode, filtering is still available as well. + +**Limitations** + +To ensure efficient processing and avoid issues with memory limitations and slow performance, it is recommended to keep the manifests within the limits of 320 hours or around 170,000 utterances. +Exceeding these limits may result in both memory constraints and slower processing. \ No newline at end of file diff --git a/docs/source/tools/images/scr_10.png b/docs/source/tools/images/scr_10.png new file mode 100644 index 0000000000000000000000000000000000000000..71c378efe57bbb800029b40a3c764d366ce6a930 GIT binary patch literal 42125 zcmcG#2UOEf*Di{Jq9P(9C>^OHO?q#N^rE659ciI=0tr=6QF`x1q=`U6Zy}%}H9&v} zp*LwU5G0|8asvPNJ?E_Ne&0R!u6sY$VoiQCzs&5}Gy9o6&))g?R9}Po8q+m0GBRo{ zO||D_WaJ;o$S%@dxlF40T4eH*R9x_RuJMQrGRT4Fq1)q&9T`~>rq-dFdEPEs zap$7rl-1PjMd``6uU^r&OX{)fwf)jdd#{!?8oQfsYWA_>?WY}v4{lla?>sZH`D|qV zwCGySlZyLQcj-Mo@Dfj30ZQV4L0R!tIUivdw3KZ(M|QRUpr2Gr03lb!^BKMI{_l6F zNFtJvbtQy!C{dn$y6#P!Xg~Wvc9Z+!Sut|u)`hdOt?TM7G_bF~7`738Q$9^wV~L0@ zvYi%e_J|BNxfHAe(CRiNa+(#`e|+Ax2bMD+v?q(`eMhEtExBQzV``yieMmH1m`nQd z<{6=hp0g78gTrTKJ=UA}Yhm|gosII@@YdEk5}NCn9G*R)4GGmrr;+tKDW~W>kUQIV z1|jJLpHf&pB6qgBPv5s_mF_4ggVu0W#~J0?+|vfEkaX%hn00T4)*FJi73Dpk_Cif8 z?kydmR%XAaHwTj-DY+SXtDzW9P+cX@@l4L#ueu_FCgJspa)|U^pzQP}$Z04IrIhTx zw<#!FT4~lIEV=0isjW=XnOS}ag(ID!*ZT*HF$WZ(0(`kyH0fH#dn~f0eM0qT6H7|; z)Uj!-E9TASa4S>@+0v%dtVoFNC%oA6t63iN4f7q0NM7>Fb!XG*8c1}JBoqDKSMR1* z^HaZFlR?(Um;9bQd$4(~x3bk&uK}?iN_uA8D5GYKo?DAfS6%(oH$FIv8$jcz%_Pn# z8&Zo9HOP|ollSTO+I7{M32x4r8x|^ql!-lx5IdEY?vvy{9$4`1RS=0E(@O8n$DZ!W zXU^5uQvJcWCNAH}-pd6Ir0b|pN?)f=W8ZX49D@x6)*?Mi6erWX(>vOB{VH!~u9j$^ zHPSDa7hn_~PwFbwH>`OBPS{BE4C_kqOc>{6Ma_`e{p{7_{g!zT@FzNPwKv6U&<YCD_nST*fS=FpxJK?ir3{klkh^ZC{)YW6f(^qv`WkLm*@V&CCsT=NcH6~rYQ^E<$BQDtO zmR6Q%yG-dhR-NTE2x9s3y2|~S&z@i{v>Jg?I8az2;1X}
F|a(A7cDinjq)cJ=+_c9uG8 zSZCWEQ2EEC%(++gt5Y@4OMHCFvGdzbJ^Ar|B#dw>!0iJHxOu~3K$&2-X9M zyx^7b8>1sfpQ(fV`e^-8W|T;~F683S?^A%==)q zSqAd6uS99+aGsUl{%4tRzY3L8)*~OQewaB?Q5HHUn;+>7;QXH~vPR&8=wTKH1+!Jx?aTqJwi? zn?r|wG0Y(D>%*m+UOVUo~Y&V+gtVy(P%7c zDe1dXn&nv@NaYDUU>OCA%jpqfa;d#o7UYJHJq%OxNK(KTATEy^h+gk--Eh zBCHIjRTET=X7x`LOb0m}K05f$Ix{lt-CWBysXLo1I~6j!WZ;7E^FAgW4)SNKayk5M z#7_oH2CiW#R;Ia^LPmT^H&_&}kSR8pDX3V^poJa9d|K_tpg$H_rnPk&MFr%eF?1sS z`RlKQ5N?PxR911N%L?JZDS9Hv375AqXh2>+fnn&9pu2v6LMB9;>V2!X$S+Oj`m-mV zZrfz;S?yLYsFF??h9Hj8nw>0mr8V?ZvAN$SivleJo@O}Z3SwK#AzR^61HUn`pERfW6>CezB0BKGjN4vt3cHQQ7m5_AB~W zYwM6n8)N#(sO&8J&^=+N`eZ7M*`#n;x!X5t()X=Z%0LNUgx*?Mq74fHTP%qybUA6= z8tmq|d2M|_Y1Ror+m=;Les z+-SBJ+jb{#IVc$&S1(Y62u{l{K~SWZW^?n!Qe1j<{zxP&g@tEnVtPXU5mf+Oy4`N^L`UnZwN1+uZAMbC*XUG}I zNr7h6r+6fV4Xlv0e2BXqWK6X=@&4N-FlZd=y8{pryy0UP1GHk5eF@oupst4?2OkVr zcCQteEArU3a*u*g*G8k9nlKBTE_$YD#Mww2is>%DR3S)(&VQ^CLA zX5KyEC`C^QrBqvx7c(!bn4KMKWTQhFXl|*Pc_H3u4fDpBo;La?+is#~EuRAF{sn0N zF8^DAjT$4pP8bjV`<=4JZAxmQ77)+9FMc-u!~4{XVrp?Ri&tbZMafIw$Z6^f1(F>T zn(Pq_vg=hHOHIGyY7NicB=Z9Aj2bWJ`p+hc+IEfer)t%~${ILsah&Z$6>o`^k#Pax zkC#9Cik`WpVN0Eifuf#gyNrb4|2a5YSzP`IJwC$XJN$OvDR3ztVNVm#>*e6BNo45# z(`rQ9m!~;|v);qBhm!t5|Ivqw6={XozrbRaN)q?=}$@f3EtppQ1v2;OLA-(t+o8z z$NUF}NUD0<%Wa$e##U1YG9#4*EkUzEsLJBA8C?E6)81z09ykvoKG^e9{0d9^S$MjX zZ+9@4FJOfOYv&zemDAfn&ILgSwLW$ShNs-2;Fzn8?k9PgFwiDHA;7wB+Wn$V_hRG> zM*6gjCAdGoPZ>izT)WjgLxpceW?-ng&H4OgRI@JUimPq!+Tsf|IH~Jup-Rvm5d+gCt^e3FB`mt&Q9!y zpFnSHsxK@~*1Ie>Ot~Wh*6N&{zPa3L7o>t4$pB}Jy zgKC8AR!d$>oRU|a!Y*7s3VRLQ@5p$J?capOr6Geu$`yhL9h^eEjuPs*L6GU@hXF#| zq#?uJ2#$A3jPKOl6nR(y4Ey4Imzzx}RL-_;&To4m86^~=k?&LB>USA3{BR@Ld*QCf zJQE=|`%a%@JHlcAlbF7-!EqbtG*Bc?cqqwnWg_pSRorucF_a5&8sgGI<jC zN>KSr#9VO7S$-wxUEuca7glJ;w1W;J9Bvj1-@LAhcG|@TIn#1{y?6XRm`#2| z+AR9rxiNj`9vm;f0M$B!EHdtQ9JFWUJeTJA^ZviDV`>pU?>&A$4mfKJEBeV8$iu|V z=Jk^n|EC-}X?$MWvyU#5?@YP(!coHl5vW|1V=Ig{z(S8~>k+^j|fffi7r0F96@Zehz6WNhkB-`3=Yi zBp5qK4M#y==`&0s`}gGjtDgQf_y0q4+qptvG)ns_4Q^AsfxGL86rtx6x?D_BSr4rS z%RRna-z1iR`8Pmk`&-nv8eJziBUynNEdJA8z_Y798HYZ!?eJ?WVNhPoq5rOYvDinT z^eBVc#d8H{#&wx91$Ocbb0i9voV}gUDq|~h8FGG4{;%dxZjsQ0i^P{@=U0pT632HK z1Y4ZgrUyGI_z+5$BC)=QrAvkvi!l_07=Tu1xP`_B`jgN&`vOrj<214Mgsw z_ZQXK465bim3FF9m{2&jE3apcvgB;{s=)tV?6e{O zS?_}Gb8DPQ+Zu8jeU9RIHrVvEtwJbr(m4QsA29MI`3YOd5#vP5GQxX-aj-}eC;1U! zA4fv+hvz3>nA}o52UsZTT~6@s#9&AAv7A#q^({BV!Uk1i-@DbOjj%H~=y}{YmDXv) zA`}WHJfi()nV$l}Ep#I+45+p)b~sah_}ZoVdCGhL)nD)!A4YpJ{L6(!ExufCO16Mcxu%(F5$F3_#rE|))7cwg|1T~4|CwF>N3&%f@fcs- zn(~Z4+F6X5VUi#`FO(1dsK3@S5yAnS4oI@P{$qe zLb%nrp+7tLUH>7<$9@UsV($egoo;OVVPpR9&1)h{ZY+wqsD^rp9nOY!Xol9@p< zxw}Lq6EA`?LS$Cn*r8D`k#6bn#P)prGOk5~|u zq~H1yw6{@N1pg~nXQ`CgDs@lkw(Q>?FFZiJU69Vv;oMfDUY=(CJXY_yv>HbTf*VWW z#;$tw=0zZrFrfsY<^`B>&(iDQniZUVO>e8uDjMGZ*@M@o=j{{;(|L$wcbyBzB zyr7M#6@pvngVK#DZsp?%x*83@IWT_F)VmCk{d*J6w)Hr14qG1IS1PPvrcT5DAd&^& zzGMiFPcdK>#D7Qa_4sd2aBElvy$cH5okKM8x}=Xd ziYPa!xPu&_L4Rsa1?Emj_5B8u=T~hq`tnB!`vtAUv8fus-f2%4Um8N_v`U3-)<3KN zNI5@+d;k_WJ7n}E@$n6%r%D0k#%yy56G;I*_Z~m#_m|$B6<|M_4LYer5Bwru3X9MU zYj&wT;$058aVFu@=HpeSUC1z22M2oSO>4Eo3ws!yWjo36Sq0ttSU7vYvvxL8R`}x zCMj`;pjMv9>ANv1(K5>ZC&Qv~~vtRDpshPv%r*?$; z6izqUq_3N{KR3cD=<{Z7IwBVRbCDG(#_`$QrP&j%O$2oi5Y0fC2_=-4QTvA&hm65= z#FDElBz zS6uAagHXdZ z*@L@`$}?_Pe?bbO;Yi;QLv~I2X%!WWQ#3lZ0YORI8rSJh>H;3 z+46>>8XTQfcI$GM`{xvdCxxTkjDJ}OG{8`jtQVMZ=7O+o;^FM}vzqd!$k87AziFqN zOMS|6>fpR;@yVTv?U+_g-uK+>XA76~A{$o{G9+m!jKA2t1;{jRma3St4zFi+3j-Aj zO4Cm0^QFnDm+0Ogs(+N=m?{>EkP5SK4e^L%u3!~E_?#f&mQb31wTPHu(35#7ym`&O z$Na>0Owr|$QF0Lgyp$ns^GfP$ocqrn$GHEgdjP)RuqyrAh<5jh!qN~mpEV2>)zw*R zRr7Y0HtW` znbG>OJtLnyt5ezeS1c;%qgC?xjX~4W9L>$k^ba$`L^Y##?)LoKhX14Y^7R%sE|^FS z!VdV`ZEmi=o!dzDpCt~pJq+241I-1kY`_M{32U(1@wR3rrTfRG6qdv$7T`40 z;#{RR^93o#)18rH^)I~n8Z|Gj%gGm={n)ru^c#Pg04f40`yIkVY1BqSP%UR`B33h@LnpuMzI#ES|-5z^= zSdom)wkksU20dPOjdwziu@tU33be%O5aBjZb9L*eCjRrl0IhdX$ zaR-6VI)E(%BJ3mem4k{{WP#AuddG->t(O^()uj;kx;I3K`9rQsbAs%$)4JUWPd@YRG!L>1xmg}%}Qq{P|Y z(p2$QU3?bGQF?Nm>b(sxNGKeb+X_{FlA~UAOp_eX0&2SMrF47a+vE4W-y_-XKUNa! z4+_a(scG+FRut*k3?Y7Ae|7k!mAFD#v)4Sci{e8Jr{@WGsOJdig%T;fzvO0NE`hM1 z8Ca%BOXqO2D|Q~6#~sI~ap`_4{|+JCEu$4V6aQEK*X!nV>Bls3D!Zpg`F&;L zC-FIl8&Jf4Z^wpw9{Ey2N2EGMm^)x>F1vdZxPH@(rT_2j)X67VBD=(k_aWbp-XG+~ z`yEy0fU|--CQmQpy8Qw4IyV zj{Rx#`{29}jd@_BB~*R_S1THM^1R{1J9*t~ifY6r#Z9r_!GVyBYGjVanP4UT}`W3H-c-6M|E$zHA%=qd%s51B+kZ?`-eG>9MEVwuj`!^!O)oo%JvAL3Q#Urt}Kx>A(EK zq$mG5`}%K}?Y|&r{}0V`XA!;l(f8EM{~}#Ry5iYA&fTu96hr-L=!3llT1xEP)@7*!0)DTwU+F2_v!yWrWf%YCEfdSZ?i$f z9P8eqp0+Mw&>vVDa^$<`g#zylPJ;9ODrvemgmRtdHhbwrn?e+L#f+mln zDl!G&cg8W|42WwUa-8dFJhdg!(+evT1Z42z!J%H>yRMt?ANpoI$!SQx2%ndcAl-9i zU*|PY=H@2=Uj(t6qH8Xj`65SpV*bE# zwOK)vMyX1t#(sPpY3ph$`1A1J;?9rrI?YZCU`Y|SQR7{QhF1fxT+N!NDZg7E4Q4k! zNa$|tsk7Z+_WN*7oZY>xu9ciI|1>{$2b4I)py}+I9CLS+dsQgt1D5)?=Q>3ux6g97 ziG{jGj)bX^h!t1xt{xr#j=X2=!cy?=w%mYixEh(` z?ZKhtJz|G*pir2*i#51K*|%BzN0xt=Sb&j?qQu_Y7=hK~BBy4pwEd3IK)}PyMat9M7=Sy>QutqfSv}5v5*Q0~!3(HH=O)fiV>ksuD=FQ5hHfy&&Ah1M z70h=m&9?VzqahRJUuB>zAXYgRuXm$o>b`}HDd29x=K7US57W1WIL9=J-Y^L6uF%F{ zHL~8bxkn-QM0l6%;ia4^5}1();F9FcTkYUrm6Nd@Xpnz(9NAwOD~f#7M$J z;to*9!g;iyRyg`t{sSp6uhNk9aI1_;?sHx+(*rx1-3|EY zA2urMr(X|+IInGVmm(D3%=toiB#2vY9weHn={{6#3uY3Qj^BB^WPJupv5u?9-q=SQ z7wxT9hiKQwmVkjH*M_oQ)|kTnI->KwS|n#U&E)#b(C&OzyWXwz)hS#s=;uaB3yv3} zMRBw7!7S(J`H<-%`$w8+?OJxIB0LV-kKOwk5dEQzc&Fz_D<5|U;T7t+516_l)I@#f zYzACgEsN$&+PsQ6Y@$N_jB`U+3*JeH{K^80tA6?fIQBiTvbo9_(q`L2T==T#k2y#Z z9Gv43u#wrNwaO46NWnm!E-0I_KRl^qm)|^?=KQ>N9I#t?yY;)#v^z0Y;J{2RAl+03 zT_w-etp2gbHaYFP``MAu?cDL6+Pjaxblf;KaN*q@n_R0ACLwr~f<6f)D=t{^ZdWXF zk=?~m>XCk##WcA0w=5T+=FnX4wBP^(p*1GLKCwqLHh7FUmT$At- z_)0b5H;q>E)sPts_+$8thKn2=OQ_4P)Pu$;WwFsuWgB>XP3@M8!bDL$*t}u>9PB?U zVm!ky=0q^xGu2i`ekRCdq)!fvDSCayKJZj@;Mj~VaAH#qX$^kKXnKyO7h;Et=^BTa zp9Q6&e)DXuHG-QpdNd(e_pJK@rQ=w!@<|lm9TGPo5mnpv3}y7QZ%r?|d@kw=nP0I3 zaqGKyi?3*5KJO3qH-1ey()Qs+4ItZ#ODkUcnh-E7bsOLk`wyz=-nEeP8sJ-|+4a`& zo)O+j=_J_!iSFR|zBZy|CZg<)%?7J=cn117i zj1cKGg%9~R9?i!oWJKO@e~3MK6&YagB6r2F*#^oELx`|F@h2GEZOj+m> z&=-tH(AAxl-!;Q>tuP@saGK1_` zW6&je6_B|q$r2fMCV6HlsAYWv8Y*^uv#a`W zPe{d$>sJ;UnJDm|T(fqwbtmDzU=U-ouDd|Y9t<;<@vVm856*7Hf+=^8ltgwq<7=8@ zbh}m`(x-3m>fqssQ+(N!Su7r!vB7B`1j@`~y9-uMv1qtREYMu>|Ir}dDEUxPF}Fd* zats4Z_c(dI+>yD`DA$4*K1hH;e8cW;o`hhK*MXg>&FfB?_Oq_cgL6zQM5eoEdxRqJ zvT@|kUCP^@NZjtsCN<2B?Hp-```(3EdX${f8G1=j4oo%lDZrg-<0?u!dNwj4nm~FmcI;G0oB`;1_Hj>6Tid4X!4jr?ko&CXcID1(VCH zM_sP~4YjCPqvk4Zc?1MQf4tl%*vImmMhVlUt`@s)N)9g>3#?0JG{-K{SI^|GmVU?P zp;e|VKK#Wm$o*!He$wxa`4|WC^M*xqq2{~WE~}7`n_9%vL1O9pHi+)4Y>NuxQEdmV zLlLo=X+7eR&+GhXHgW9{Apig^T8Gh7Eve=(*<9I`V~c7pIWX4Gyw4>fIUlD1X6B#$ zHjPtgUhZfs=nrX5i%7bDojPQ_+=t?$?3m8VeW$}i-$Nl=vLRu)?=Es*)u80o#EPIyJ>X*0>pWn*VQMr#c|VenYuVd#r~rD})^#@=9um0- zbvQ|Fuu$Wd7KuO<%GMmC8x&7ozZ3oO0sLRz;NAL(CC!F zZ)k|lzy_ULBkyKWTG=N{8RM*6VL*rdWNKmY@ekii3o%r>os-xS!%p&$!AlBo{?X)E!q*M+;^&I;sU7q302V()X|aQA;r zpb=un^+I>6-P5ONt(tzAD0w8%Z-@!b;>A4mN-it!N`5{MmzGmAOZD)V=Ga)h#J#&? zdH;0?xPg1<)C@-Fm{3Z4;;*(cvEmr}%h=o+W|nbH3;ebM;~kG;w=|=j5l@-4=YSLR z4E(cOd}z3fiyM_va#u<{lFAz-GpYR`*?dy%5eM0WMBam~ipc)Aoh!8~93H?=8!KT8 zEB;P>`p}YP{Y0&ji+?KcSAFbbQSW$KgLFfdm`q+G3&K0kP<2TR;zq@e-NW zatAy-0{iz}<=`zj8)BlR3iP&5-TWoRtnuUg>G@CuU6}o<*O*d;skRQ3Eq^sCsB!=J z>8(d7^THzm^#C@shUNQ@f^_6wu=2#Dk?dzQRjsxzf8jAsav6zZ1JiOlef-L)T8Q?H z(UBbA+|Lc#TF|lY;pkri?U1`k-TKo75gxKc=0+v%Q@cN&Ry3AoP62)|h&4C$2>V;V zH_H3252dNGPV`_4E4WgG{vtWviV27WrARc=iv}(UZcHx5e%)!!8}L>)WSR~yOa&Dt zJu$VFN)Z?Ui>fYauU`LaG?C0ZSW1#UF!&k=}# ze&e+nSr~xb&-KEFRtofJImV$hHSe=VWqC@8WI}`We7k3AS#%P(lSDEQouF%B!xziu;N~dVR(`%-^Y4Wyi#KH zwTt?b){WV=3Ip)1Mty-a>cmi0znt`EmluKPp3my7lDF6`z*RlT-IEp#c^b{E8LEr= z@C1*#G3ROi&PA*Q{!7L6vIF((o;h?rasC}%dv2ftCW?=69ENOFC zo|6wR>Gr$-Zab$>{D;e^dODk?Hq#Shk>Rq=`;DE<8LBp(6h)gKbn)5uFBTeLhMIuj zrTb1`#q`V)^2rLIy1~eBVqIY5l)}1iGRk2;sikzSv+TQ>>rYiGQ}9xWM6UbDt3ukE zkwV{0)`1jm^`2<6fP*jewN{DmvIneYs-yWM^i^Kk98kqTjJCh{*^4_7GO?@_{|IycK_!HN#6FSJl4&Igg^Q0~SgBuY|DNyAp+0dtq_)

?FkxDtA2MG7#8Pl8aRv@)gE{(COd|$ z*bXy0xB?&t{VD^ivhkH~Z+F#hybya}Q%QJ5lR`p_eE+qhu)_y*of`&pT0WPstv4HB zoK8<#`V+4d49j-K_{ro3Wg6P&W3wTD4Q%!I1gw(ryn;@6L!{!Qd)d ze1L;u_h+^yYA@j?Vu^ao-Fbdh0FM#EP^4lYME(}QGjAK7G>ycn%IwPt*(%KjIw*bn zbgOyeKDE+%U5wj+OTBlah$411XSD6iT{F+Xd7!<-ovz@RTWjN<5o2PGRk|4Gw(-Rt z$H@|U2$GvNcNze~G+Ztzr5wR5HS2|1D>F1G3uWP4;GoNcLnUT>BL}35QF0m)<<#>E zIu+=K4N>u&J;6@|!%dWD1LZ<3-VbI^g90}}?b-aV7`X6V*=#a#U*<)5o91Xv^vAd7 z>-v!zsin7m*37thqY57P1;T0j^y0ae{9|K+uCNGnG)i(+=&Dt;SBWiDC>!$TAm@DB zVKKCoR|>G=bw~Cd{@;T}E)4T7O@}JGn*Hq?yLGCV+ioHyzNw$BzX$g{vAV4Z0a=Cay)x^rzR81OJ^HcB2xk2nSbj(Ex0(rNTKEY~9>y%yZf=p!B)S;B zcZ~=$BS$w20@b_xN~0@|>W#?yzbm*p?uzY}IyvMGQn$M01ys^H!H4iEsk7lW6Szf|C}p3(zaBG%AUdy# ze)zKCi_9L=(0A@8OS;p}E0u)NR;kcjZ1%`>@WtD*6|{;QMwpWU5amTw5Vwm+W+8rRIb)raqVtx|{{$JG%R`uNMIF{TZP z1=aDvu^&0AuW7?N-wSLrkU4ZO<_ZbyUDuq08h_Nyv_{!n>=h_QcXn2*91l&!P?*YI z1o&cYG(As&oZyYe;G5yo!CmfvAMErdD73!Z}SNEHjLyQ%y*h? zpo}}m=L=DWnAp2gtvSbK2CJA=tH~A(-Q^#%E9!(z5xsXK2;XAnrp%jDn zPSutWmX;bRe6&CjiuZF)|VN?^)yPdB7JXF(0I;)%M zsR;^HbGW*f(z%vz9oKBFY)RWUJixuW0DxH076+(=J;-o3b6V#rv@tb!9KH&qN8EO- z9F8)o~HP?WD`~<7_O}*y8A_E zLHKw=1+50Yjz^~O5%m?A>|x}17gDZJRXS`YlguGuWAeeduRi=u^|KW7R4s+}-4`?0l#u8v9)+IBq#}C%#{u-MJmCabqmXcuxXv$>G*5#ir&F}+ zPd~O;Jp{OFT{_IP;`I9KDg7}w^^2U56g14ca!w^Ib1j~AF}+`YQZ^%2{HpP?P8K}n z^*N4>rA!v-)wsdtq`mv>qOJi>je>oPF<)RD0OqTMhGwB5(|O8H1>pzI+cWJd3=G%8 zAFnCN#WcKK>s3{BP@A3PU6fMFBp%m&!3?%7=_j`LJC1(1Z(#q?2}Ur>d~lPrfB|u- zqBCMTku5hoP18LPa_>7|oY+h-^dAlkeVektTocjQ2i_4Bz!r-SC0VxDQF;E9{G#Rj z*VanQ*<8bUnq$~9dp?gxGv0L-Sa8KM`=*zhCH<2bo5DIXHOD)b30|cFab@Ouz2(nCtSNTd4DF|e9bc1P!WTB3l@6b_H6m{d+PXB`)AKC zA|Ql1zUb|y0<=O)yDk2A+`MSfD~DO|Ms#kY&%i5L3$6hYddO5o4x*ZshJtv5U6{oO zkP#oI_9>J$2N6c=oF(#aR)2i+%{EwAE>oy>^{>9(nSTt!w3Z#cD%+v)xxTw%ffuT+ zSQmzyUWp^qaV+a@l6Ljbzx zWr!6(hZ35htQtc6;X>}}XyaCV_%SAvy~o%s5mu`eVu#lg5Suk|5kXG-Nem-(l;d+j zmmr~ArIdtMuxm?m!wm8gUJu>Gaj!&bH8Op%$KnI}Ef%HnSY4WFTFl7u^$?|}Z6o%B zf77{4`8)VER;;R~!3TAUG6UC$L9|ZSAMn6&hk=?|x}CF%k5?-@65P6ffyD5W3&BcF zY}e8wK8=i+v1@DljE7Y8^&xF7prX^epRUIm32WWiSlkW*2V$UhnC0caK4JRT#X5kc_C?kVf_C5EP*sl{Gyi(JW_}r z{B3Pn@;mlJi({h1K*S!y-IiNQAuY*eIBFc~&Ni&_Lv%FD#$ZKVRU#qgFKvpxzepC4 zP41YlMqfk?iN>2Bg?^LV0RicvuoI{Ydf@V2JvS?Oo> zzt>nhp`??cWWTV*{z{A*L#vc}gtk8`px*mk%+oa7pA_@_qv<0p>rg z^M^*VK*-cbrqt^<6d9i`iZy-xK@NL;;{ zy7%{)Q|KA*{-@EpDgG~(dfi%d(zIa$Ml(6D5cnBAxaTfC`~pQk@(WhduPTR+nYTCV z2`}7#nVL81#O|hnMj6SD(oj$sCLilXZCUnV7=`#|B^v%rg$1&4R5 zDhZ_E>6JnqKFBc1sc%)aA?blpcPK$LJgHF?!^i*#7f{s3d@JGdE@lI1E)}7cTT3jP z{hFty>gs6wuEoz6158?<9X+t6Bo5;;Iy^zZ^_f&8l>Uu9Z~8-EWQP1hyR zl1KBej^3=^kij7C=03D}Ax8RXZFE+Qu;Q2NG0)s7UnHynoluWlH2i%I&88rrtl{s^ zl(^Ifecr}?TDrdATB(ew!D% zB+*?HKSFK~;<7PH>s~%}e}9Yc>A~_NfsX}+7NM$3C)2}CgtC{iW7qyfQE6``Dcx^? zJ)FQ!o6OPK)yL_6XG}|T z&{v+8ypEm9P3%BwBTtn>fL0S2>bU(cV(LGIrK||U3N(zdb)OaDG5}@T6PI;#Jamb( z*IuS{OUmq~rp#vasT7l}+uzra>~6~TJP@o7{osqJ3Kp@;QofnOX1$|ZOfVKnGRS`k z(JnV{oAaBc%3h(qwk(7wAu&$2z^C=A?0EvKPCQomXzWg%Y33H*%uIRRrE;IBNPDr8 z6LSHs&HPGmAHLlZlZV?o@?rzRohRK}lr7rG?Ik5F4nfJgJS8=`@~0F5SUKBZgLKB{ zcG9O`)0wHSyMf6sjDf_}bvzPhj{N>g9fx?@?M(nMQeP|ia9Hh2&{1v5a-OAfu2*@} z-k(%6#5qi@pQdS>;qU|E6)|$CqYZ?f?ds!$#_!)NzsCnt$8d<6J&tF)Vm)6&&O;wO zb@RB@A3^Ig)T(E8O}BW6k>kNSkpF7*k%ghwHD>3TAj?Rn_Lnr<2B$xrRQHGPbUj(Q z5M_^_YEJn9A*a@K{d5Z~&u3VzO#`x;bX)}Uhr{B;C@<<5_(v}Kk5$uxn2V}jFY7(L zD;3n*@nX!!-QmmO#3&+5AM6G1lNe_q2H>7I00r74@(?YGx+=BEGRBf z7t}~iinPt$!xzV6gVR^W0GI9|-AuO+)3tb2WOoEO_dM~!Y*}+BZh6IkHXm85-R(kB6aVE^S+T335k z9u%ZPuxhl`5>@vt)4#ZtXpQA6RJffDmD|rLLa3@bz&@C@iG4-SM)PC6V%s0eNbXld zFOYJXR5}o$Qwap#7e2mk#NJ-7&y)IIG^f-_Rz;!=01%0(v0JRYldCxxksxMn58u`4 zh2Ocoqx6o7ebYFJPcvDgQH&$G4S#;XvgHO3TP3o!{XN56O9hJceDwda{cz!dya@0& zq)&prNxt#XUqtxXteEpIqMjwi&f1aWKINW~)#}gt+#c8Aeg7x%8n7$R*r-z$HR6t{ z!IgX*30xM!7YaOSyWI-p=A(Hk0jOKJRB%Cid?P1k3_i8kCe=3^I^IBGhf?7>lG4PG zVhEA(P!X;+&v!j;RNVGfutuGzD|i3=z~Ivjjbs}e@d0S|KZPlfF0H;^FXJerisZn+8haFy=QiHV%TJhn#cXzYdOw10ly}w# z#d`jWl2w#7#MIY#ah%U)+wJl8WNq`vx-UJ8FbcvVv~j0I;EF71MoBgvsL%iKt-}8s z-wLM!=9g<-LX^%_)d`N*Q9O_+X!Ma2@X6S zrL99md4=z2#U=i)u(HPy!nL@K@{wZ7@ec+s{0-ADTMxK+lkfeIAlq8Ww)QvF5Iu0- z1)Wq<)THnDW-4&y(zx5Vjz~oIi+_&!^A98I!Ph=e14hU=x|bQnopG_8?r#zk)ODbo z@yQx&e9>Zwq_hkWxd?o6Kx}r3{za5nVg;#{X$VwN89E{DfObCtSNfmPkc+dNow64` zp?8m8@Cu#Utn9Sw`y&_4Iw`%`uv(D6W*r%P_dpeVk8)MN=LBctxJsjsP^6>)#q~6q z&dpPaf$58+gtuR1)`+AAvNT$biyt(Wdh&Q>Ui)HREiubV3&y*M_qjQp<@u}huUG9f z*u2=iQb_Lk3iQeSVT&&wzvKmv_tyC*w?aLn+ft9p9elQsp&?`Q7+Bz2sJA@4|e zyW6p$ol~Y3NzvSIpXg9e*FbAe4w|{*(+**~EmGO#;SBJVst1(ZQRj6BZuq0P03nxDIx4PSA;H~7pU~a;CH|OU;tDA-SBC>mm zeKJjqaAlUy?;FDd&$8s5URVoQiy({ zcH^CUc4Y&PQV@_?lz#1nA$$p_lA|#FwzWp zo@^mYs6yD4?93<|gQ0bV;DFB+M}x;BrChdIXtJl*sFm96c6D$kL(GocLHW0XvE&^T zaqv4$D4Zzq7rp+xe@nj<6y1+tqjiX_6KQPJe%N09m)3u-9#?V+7+Q+j*m>g+Iyo+3 zjWYW^zKq{2U`msI6SPMernRTuE8B%pI}P|9#BE(&w`aM~&lOy8^~}$YU?oBQugo?# zGyAU8VqddsIhO`nUgJT8HYS;U27hRapR^)(a`f6rEsoabsW5-;vl7JU8!{A?6xVeV@inCnJa6VPx{XEw{C@X>k1g z^%x@R-J97v6Vg9DUdW?m_%6d+%ha5@o35UKS-nOaO*Uc{#Z?G0mr0T4r+|h%NHsF5 z7FU89j18En{nRYv!2>$vHStpAn%!3zdQ?V(tE9A6$q?QPAW8;%B7aaMbiXE<*Qdd? zzi13Z9o5;s=We~`wP88I_JJqcz}sxD&JS9ZP%y-bEhZ!)@8BGqWx1ATjQV<9p0%6t z`?p!5?~&b~^OW_7vh$r1xxYR}Sv@p+%Tjt&t^iHE1nCuwX1(`67<=!yr2cq+yt1<0 zCrwQ&ElbUvduz+gg}L{rMBD=h;>^s3np+TOx%U=S99fwIm7J)En2H-Lab!4t$GV?; zfA{ye_xt_vC+ax}Z_ar?U(dDCktbsM6f;@asywoMJ&Kd{n$w?G`!MK9xzg;f4n3r0 z+FX7G_4FAP)eo@j&}MZ{GdkNGy?r7@4sy%q^T|iMVTT6>Z)9woZGgB2R37(ue;D?W z`K&wj@l@FM%UySQVub8%v%4+fbDU(el{43Ee8vx7yXVQ1KDjkZ6o{DkeJ%3pW0!jj zE_3Qi%?qYN`^#d3ElQ>ald6NtFTzREw zmTL)KjO%V^ecS~kx^)P4-Mghyx1$G#2UKX%y+k2uU)p#EIcQ%g46o?Zh$={#67v_W zKR3-fKncAw{}V#6pE$0rhbXVVQ*nRzZos{@(MehGz-Yq&t;ex(g6^3(ks7Sq+^a1S zR&gB94QrmWX{~UYW50Bu^(*bszwfQ_xYt4BoNsx%B_CZ@{6y_HaIB(ee@^sxd@czO zLc+;cI|(m4QaZ+R$nG%u&rDAKsOETEw{MFvT<-G{&&qp*r9WDpIanCwY-&P+hwN?> z#hUqIxKdjx-28-}yv+dt5+YnIrDTsPcs&0Sz5YwThcS6q?ZP(H_k|}Q(*hjq{nVA@ zq+pAStV4B}SoDp+S0vNQi&$`125+c&1bcaEF__y_;e@~+e<@dnRQHmQc^Qc_W9o|w z`c#ZavyEc-){*pYH*?l2Cb743_$2Oqxs@~Q`n|ZQQ2AVpCu3cAjW5&;n)5|@lQ6!H(~&#geZX%M`5HHUOTMSLY$Gt8l6y>sAnu2W`nmb>Sd$5$V;^Jpo>R_Z^@ zuygYI{c8Q#LmoF*efX72-#5|xsCf#Q6Rtd6!>ZoY-$?B&?Q^SO&(~nZt37p5zC@C~ z{tjWI4j>F=5IdL`{&!LQT>+*#0UH+eeyr~=`l>u(i|gnbl$L7z<7v5eAk6|KDTfT# z3v+J~@AMrA*?`A?`rWjB{ul0ph}J!_hQi=EMJmg7hl_=M zE?q0wN~~c+wjt!J`DPuYW`{x5!x8@kogDf&?06>>JnFg2|X?gb1t= zhvmAH2|Hz0-Y&2`bkn=7u4L2wJKX*bAFN8sqD{#H_{DXys@Q2_%Z>eSKhJJ|Jx{2o zj{@$}61wX4R$2?cEp|<7`}OOb`6iF$R@^(={ZtgW?yrAVI`ivk7MSw&EmCM!X~1`5 z_*xqyeO_kOHqprFOCqoyCSx8c#%8^xaLLKDEpqZq5A?hLig%R<(*8@;>oTPL zR}_Y7BmQ+j#U1FcfXDylgc9%X|7)ulzCc*W2ZAHK_zc=r{x5prw#<(IiE1;^<1lC0)G& zl}l{!J6b$vdT!hpJ$q%;+2{Qa1fP_@9?zMjs3`nMy7y}vmG7)))Jh06RBd(*x5-gXT zfz%wtXwy2qXXPXQC;it!gpkcf4ViYBJU3Ra0Ck&G9`q%PNs5=ecuyJAX;CeT1P$%< zu3OHE$?eWHms`@074lQ5!eA|oH8ZIfs%>Yf(walWza|!(!x%>8F4ibbeYy*^v;BVc z$2y~J^_VU5xwWb*Jy-Lt&Ssy#UDU*AmQVJJ!&$5DhWrTRtYGb&u?yH$mTi^tUNlg1 zn_Pt$ErPL zVK;0^FzN)ZCJ3I64eF}uD2e$fp@*9-?rLYo{@%7s+f_^twDM+hST!r4E4FSQ-J*kW zis(twO&pIm0ix;K7jH=M6L)jqcBA_3s_W{R`3D=|dcb?@9ZCUUi%%Ub)5k{K4szlQ zEFbDB6#A;v#1%8Y-LKguKvw`0@51%hk2-WQIHYAO-`4*8XUUy~oY{}p8ME&)#7Fhm zpPu?s)+|YxxR?A@8N!5}f8?Bvovvk>q`2K|Tg*QsChYS##yUzmwp#CVv@1e_WMfwk zLioybjmFO{#tc^nO;`^&Sem%pg(*L2tvDPjxBGJLx0=gnjdOgh4Hd1te6C1a=Y2IP zxWeRXdU{2vi$az-(ox~WK!$&U=z>0-un1(wWyLxp=p=~xVxViDa-$&I=eDm#S1gV& zcOKdPhN)EvevucLAVOf8?4;XA`V&*rWE*}dOYumpe!Ls<_M&_zJ0Ja;>5rSSMLyO* z43ZjA)R6nk6}v_q#rcLoCD~p*+Q&q%IN>W_wtV`K4y~Vf`|bvPX-Is)ZRJMTa)-~} ztzvB4TpJx;n)RAHnXExH8L!PdKe8B^1h>ja@II1!4#qwA{Gk)+A8@oeWm9uQkx(oP7Ua(pP+^%R-8#LtEGyS>OA`F27`2}w>5qmXB!w+ z97E|XgtYG(Z6{37mfq{UoHzI3evSC>a@}X?t-p7v${6VJxEAf|YUH^_di3Kf;-Z?u$2XIdxm@o) z+KIMMm>%D5cZ%1c_uX^2Vm09ljZv^$GFNF0GJ0oXGvVXJSEWd~9ikF7-+2jp;|%1; zL`hn1uOPkvbxOZ;@Q3xDk}w6*ZP%JLMjH6mz%(aR(x)-3*hX-7_8kjnkxsZZ-zS6R48%Q z&y*o~Ap`Oqe#Q6|i`^zOMmmjLykOR3DeEB_%B{U>S#9YeV|QypyIkJlbu&MO`#w_l z%m_rg$lE#9b$Kn1SNq6euc7s&&N980m0S%yt_i*}lAxC!dHC?ES+;<-BQYBxAtQJO7E+EG6KkerPtc)tRCeXEF|_;dZT#gVJQL%eU! z`?c%3opRDFS86LK&3$*Z40~>eRS=UGeQ2@*FJ-HJ+LubiAMMUVUplU5lWWQ1%Tp>a zdl1MqS8rcHSWztbrmT(mwB9`GB1|YBM}DZ3?0-(o;CbW1StD8(^J7=bHyKSDZqa!# zYi`f|8u`KBX|+y$F%iLk$v2Z@Me*|2E3AfzZUJ@kpFVv&xht=iixo(fkstc4OD}7C zUi(t9AYPSS4*?E4{-$uT>vWNxA3oj*BwR0AgKjSu`_+*E=cO(x22z(%+r}!7R`#1O z-vsRr;*~;&>m>xb)D!!XVdP-Fa|-Jxlw`7et6dF$LiSFbPA@t`ad(tLme+*Bl#5YP(AiI*w_f<^ob-}sBvMWoRI3L^# zT5Vn}?UH5Bb=lX&dIf&x@GZ6?~oG(ol6q|MP!ebBTyu$N#tx!{Kl-5TPgO?>j(^}FO&oMIYY@b1-+5ML6HoQOtg95@AE6K4!ISM$IiIDVdd&F4J z_E8Lez&=;uS?lbs&AMWb)%+&i9B$kFTkN49H}l{WRpT=*`$8RACVkbOS~J}{GZEy& z{X2~lj@ZkHrkqG>U7^s|yVhSDPn)CD7vV`s9T%Y84M<4a#y>it$EF{7_-&s+ zL`+rNBjVTE6C7OL>^+-l{{vADT!*&c0ev0cIIia7`q&ib)6?bF!O)>M4xD?fAWT+w zQf5{r#q8EI>{1c#X?YONwt`Ev_g_3e+u*eKSkc08fJ>QP0mr!%S{g)b&v7QdmS_Ttg z7E+SRNUQID*(VTa`HkY3OJ#CABXg2PZRf{*_+lQ?9)}PeAQFX(jI{iy<6yFAyW*m= zOF~qipY39p4-mCVi#qd}l2HCGVJTJU#!6Da;GmOVW+PKw?2b|hwq}FJ;_b!rPBz6cp}z@gAr zhMhS}O!uZ)@P4K$5^k{J3HGPkWzIAYt}>k@akDIXj5%kAH@#4IS43w>0)0nCN^|q* zqaQEtDQtYGOuRd<>YW74C?%U0;=20KZ1@VE4SU8wl^jXaW6TpaIGJRV71LOAQ8F{#(C)xAfDOC@v7r~U)nN!0yKDT zF*QzPIH-N6T0T`6oh6%6x|QDq;zkD*v=w_ko4?9CiE;e}H<@^w70p!JtCum-?=;vj zr1F>_cI^zl#__v=cr8nc7Yl!R_ro{e);I)HZ0MDg!eVgtmxDcYJ<+s4iN`}pTO@X7 zjFyc))*C2}-?*UN(osoL7sH^obZV~t#+eyqUSz?y$S2Xq*qxSX2sA=|(2C#C&m$hY zQndnIY?QHf$FKfax_FtgS@fAYWKq+Gg+(_@*Jr&ghVVZk%olg9`D;XFObOMg=1v4m z_yXv9x$$Jls_}y@;|sp;#a1yoEn37+JB-x>G?rE#tyn$RQPvxhT4L>941_$abqhl$ zygIr*yykW;sF!wB{_&BC(kn-!zP@XA{Om`J9Lb-R^JP*8AgNd(51Y093}mjwGpE^WZkk(=e%^#5`%A!SMr*avNS|!sdw?)|9jZ2t9>|;; z+_8;sg<2;0Z?=nNn7%hA;iK3We}J?`s_P75-%hUng_ZAe^31gqy1#-5kPFM=p~0QE z+mbV-TZ5fzGVCrIl7)@!ui}ynJsS7QrOJ-Zq-LycmM$g7z)y~RKUD}oC=YT=`l0@u zu&4cZvrQ&CdW%aLx;Wnft`m+s;SP4cQ8gum_QGX@)gE&RNHj;6^r}u8Y^2BaA^--M z43Y=ZvWRS>Wxp&pqivU*NgoKR3(&>G5JYaeRGjcEZy?(B%aHlp6nT2iBT(z>MYwB} z>2IA?>4uVz0l{NNPt|3a`z{F(3?7@2qnhW)!;3Zj?X^c*vP@YsKqdMv&#lP1$ zhOxiU)_rgBrZwx5*s~5_ZD*`hU2&~C&YFxwUAtE1n;jVE)*RsMcK+AcdTB}TjQ6-s zSY2Q1FXj1b{vmCd*OQ~VYpepM+mD(~hBSa z?ZA7=dFYSIk-GK3C}X@#Iud=;88-A({&JSe!r7x-(Z})Yx;X6JMhImBpLS{^AE$F? z3jOhy()F5WzL^4}XN6K}Q*|XzJej1GE)(r(?h_K}ZEwT9e+*?rbm3?Y4eew&^IE;j zD`~>b<}j16x5AO5n&Jr*{y3;=1?k}tl9luKesoe=#aw*90gq5qkRwdH|yZJFv^aIX77(I}0D;K`W&Ct&4NX6Z<6!GQ7`U>rM^z5D;c65tuE6!HK{Pc&&$9s6X>UF!$BvRrN_8#Z zpzX3?fH!S-cX!jg95xBpcy;covn{0tgg|BZ1VK19qN`>X8<#TMQW|_RcCo~XJjX5ASn1b2ll9Mbl{wL^Wcb7iw#ouJ`(3=S{QLb zK<=1D#JXz=LVrHu37&79o&791N*lMg?FBMu{sT=%chrltGU9l&R zqiv8EsgssEQ4h~S0%iV%Gn(A;&Cn_IK(g4YV6T4o9Qq&e(9%!4O*LhyOf6f4F8b4e zsHCoCu{obaSbkFe_wENwz{XXKb)%A$Tv!|IwV)S4wn>jEX~7C!05o^QU(FL>r1jza~t;7uNHuc(m2r7bFY= zf4B0aJ8+XjyLCc*HTR=GN{5Ch2G1qzS<_0Y)kQ;-3Or8a_G`jYu~w!VWJYmDT%Ate zntkBXt8dEP1w+!WyUfo0K^A>VZPI~*LaZ7A4B*tO8Jv$QFtl*W4VPe}>?CQ(EZ7ev zy=+tzApWaFP1*@Am?`8Y^_^@!$sonOoR`Oh9cz8!lXXPf(RuP1kL^74eF zZLg&Mk{^c&{{Y;1jm)?_==GU(%MWRDj!_BbEw5(T^5bZj;3Q@t3U-#owfMli&bH~M zQ%V$?n&g00xt-$5XX4zB6BrTZT9qs3LdwO3IBa*e>Rzm{OTE#p^)=+S(!Oiv>whb(_c7sdR=2n1Q6abv+p;`daJVBoNRiXZu54n=bvaGXAmE=_p1P z`H5~EFX?B0z^dF``Q-C#VNOi-U(Atb4q%RA=>Y@OnrLXVbFFmZzu71Mv3LJNpBylY z(>mV6*tB9+J>`DRrnzBLTVGmYVfb(~myD3)ot|8h(6Z8W++qP=h3TwHvGFCF7qw_ZXLUcbLUw zlv()~KxR|%E7(goQ+8#f)qKcawk4enaH$S@ayuISzOg^jq>?9ki5Xx$v83}z=>n2IuTaj8>4j|lO z6hE(lXm`(grIF5`clX-`{;A+4Po**_05at0>Eg)fTGl0QqV+VFJJzLEQa}3RPZ`gh z(Q(rmt=yzZ@a;A zTpOjYbCqi(qlEjW^YeNj(4A8?s6Lug>S1bXYHmY=`6kgrO~6y6Yn)9zz3lA8^0<3@ zZ!iW;MpOpzbf@N4(bNr;hqV)tJaeFy||er zFX312?}dLBmEWwDZa$)USN=V`(gH*&dfCfrOB3#?{MP8BnF`omYay z*cLkl&opFr>)OMGw0=*SPQ=LgSFWEEY1H-Y4&Lii?P7A=3pP%fy4!C1FtHq^jeI2% zXt&p1tHt&iAI>`uuwwd&-H#k13OyT6%$bMPD&W}7_ya|{Y~uw*tov_;5PKEMIrEZ} zwUQc1oUsu}IhEDfgNnUB8@^C)w)&&7nV`hYyeGu_!R|0nfK-diNSdg4o{9Tu;T)Z& z6p**#Hz6;cj9a?6ASb_=hPm$fNkU2teJ`9#w49a$eyuP)`f{7Ue|e69oP z$TwGki!MOHgrrgm^#~M#P zi4}%EV(QqO;`TlDvly?oPJ^GoVKv#2$}SXMPL1Gi$Pd8-Zrjj-OgM z*(gtFL*i&%$Im+TuDEOy{R}s99bi zjCILJQ2i%r5L`{mZDm4AooD!U{j8nyau=Y5kZIXGT&2m8^dJujwg@4r>qMmf4d2+o zL{TKenvmhv{XOdiOHtfV0r( z-rHys53)bd##AgI?*|kc`DqjK+yl}q=Td^zlNFd-G@)UltiZK0VT5ZI+{m^d`Ib@8 zm+s#{(sK+c5RWJn+W(a&5H*o#nU~B4ab`P)u1HLq?N1t8%I=PAIUzzm#$g)Tnl3Kh z%o+vVw0EM8>}u?~!zvoYGqeg9GW&*ja<`8P$otpgN^EQ8+wO|~f)^!2AS7gcte*{E z!K!*MW_S_N8)w4=5K}h0)I%%XUz+fS6-1rzWRu=BKJ9Kop4{fggVVgr9 zWh-gO(U{kuRHfJ~d+X(fdvt2%-S8OzK&TVKHv6DQdd3NVF4CBRTjL3ZB>kxG7r8?< zNTcZpq1FU@A+bJskW!T0AOmO^Te+4YmQd`H*QvavqLSmMO*;B)v8KH}y{)1#?y4Rk z4=w!m?BwaX*>>^DOo8%S<-$pR7o0S^WZFhlTD7Ul@te+9Uanhw_i10avI>{!qnU&g zdh>=|bUk^xvUG%yn9$p7g2dD*<$IRTU2-kpMP+w@s;@FcgynoK+iGf3*+=WfPEhaX zqs-5^twe+KpJq17w}iPA1Gf>Ue}>8899^q*`dLI$KmRrI+O*=>gfcG4i}>QnF|3;G zNL{7LWciz)@LEwmj7NBus(%P-Qv8EGyRWV~YEZM6mk-wK6Z)F@9dK z;>V6~KAS{j2uxq`njFJ>9)1RS^cFRnx_P{3f=68!|3QEbFf|*VzTICA+1$``TkZo? z2*4heT4S}^M2|UhRdJ}L8g&l{ULlh}hhv1Z@uQlK?Ro0L z5{p-B0)y}PQ*6j-=&nq@Au0Dg^C$60Ly|A_(6H3_qEs*lgQ&-AGz9&{Q}h|M1hZZRx^tTbyZ~ia3uy+Vh6(>7v;L*h{ zK^04f!hj(fG#$)Uty|i znW~*zdV6kb#-!T!{d5vK7SyQYsG6~gIp-7=3wzPtD}2@ydV~oQta099rWQ1pM0XA_ zHz3iV@}kutCCwx8?8Zf^DDiz>iwmb`ON2-3r)l_%#Oo62wf&O$VRoDEb@T^6FWqUR z;epA4oangLUqeK8>pyhpP#5<@JX7I8`+`#2!+g6^M+SGc7uQkRzhMm(-RyNc5vX;C z-IjmR&p%fA-|*vG=@fCBZ6^@UoH^w8i{E8 zq`cZI$Z#^>=0eS_;d~T(Bj}Qrj+}Mi&%qB?j6lZ*O;6G!M-0FIt{_nRbdiZ6^SkQ-og&`b=y*p8@@)@vIY>^p7qu8Useu%_ z(W`!9F46XN&uIR{>sFD?X1ncfGi1cbGlaq5W#GQ{Fi*6!&pPIXQ%sMVF62SO$u2^V zq$~FrF~5mMe|(Kxx~yY+CSYyCu75O;UbHFsTC7G*~3)@=I2w`jFL`0!Q%9U zP1?G(6p9gm5lU-#Ann3!BYv7RHyU>4uXRQ;TXUHTeV!D{ibvYdoQrK> zC%n-7FY{8U_8*|>-~3Ct2O1Ss_-A!bJ|AcNsW*tL{W;S?gd}T9KZ+KP7ou(RmHnSY zoc|Xv1>iN?&&+TC?YA5N3MMzCo!o(5IKRjhGa;$)io<8_07aPTW1C8ZF@lpyhC^eE zoI7Fy^BE)(PYeAtXWbGPoUE~B4hr~;EN~d{Q*`PkohO351`b4Q&M}JP9Iv_cc##zW z=}>kPP?RWZ;~8U7sbm0F`E`@Uv`x4D3UJ25k^`PZYai4PggTEDl7?pWHdcuZyIT&( z++Op9+!AkIPjDTpRu2da{bMv?mAF@{cCT=J)|dH)WsDz= z3T_DE!-T>~W8)g01&b#9r&e7PXN`|Q{pQrUPZW#KzUL~vw2<>Pd2}j3v@S?K_NMR~ z8=GT;#?ZZEgu9KlN%V_^02@#=DPTYtbUaCE?qmdEZ^3j40XWhin1_ zSiIdYX$%i)a*VrNtVfpO1gOHBv!j0DnaPL5xMZH0NTllBXWTX!*B*=0x#3Jd6Jn|0 z9|KAH9&gLH9-b~Sd_A;Pe684>Tc+8@NafMcAupi$Ml$G^C#RG{w6u(jrnh&^3Wgj4 znlekM6Smjxh(4m!b|>sa(Ji%UH))&e17|-fx8lhu#pZ9u{{>;aG}n#(gJ!)U?VOXT zJn6IC&wG8GEY0d_1APQ)U7uonYDZ6UlNLPO(8=KeivKunI|6DsS+#KHVfG5iXWTNX zZBr&Jq*@V9ppLH>D|IsIJl%hyG@72KSGn4*gF*VmFR!*tfKJ@Hz8|1ysWZ0fr}l>o zDu6A`!$ue9^eq}Rd5oZbEkwh$h59*Og42(wq}jb|*Nq`Z_Wx%+2`^ZF^}IX zfu0Iao%4PQUuvK)2UcNnp~xC>leSYjR|FI#g&wp?W8vN)ON@^-*Q5IjYbd

hkY_ zwfCVLyB~iKYFt^A8AyJ!Qf(T9^31!3Gu=r}sz)o$RK@MYVs(IlzJiU8>HERtQM-mt z5~w@jho9-N$qMI-8I&w2_xfvJNyaTXCUYwYP&3#@L-nlyL-*Q<8V1qTk>7fiAov<1F_RX<#AE;;4FXz+C=vENt< zd>dJqmIhYoNOSM*PC;+_Z=RO`E_=U#D_*Bp$5nyX+yR9d(*y5+0`+J~a^#>vkl>fn zmA|Zw?5)68w@X=wDjEi?q)*vwt70CnS<@#QA0f7tvX44F(d?B}dgGw<;izcuG&hTm z=agIJpS^uqqmRHvzpu>@;|iai61Vqk&K=I?soVuq<=+QRmzR|aa@@12+s(s*^m(Rk?NF(Drc#q zu8Y$u>AJE1KxcGKy?PnnxBqi)yUM7Nq*Y)dJE$ef8dILkOP6+Yu(1}N(26uSC8W;8 zq|Ldb*6gk%C;6k53~%L%%8d2m{@^eh{>S`J$7x1{a9)DTcu!NKE1!vQaY$b60WY*g zEq4ar!zlFPP*bz|%c=tYxkXr~!4H;@Q`8Ozyq7Tfi1z;%51)&h_;b zL!e7l)59R>2_4*UvrPSfAPs?+ED;;S*qQce=S;%o(i>Wxk)6s|+ae+JQj_=KU z;ucC*PM!9SvV>Oxy{0lU*-EQGLoy?BI)0{ix5&I zHMoGF6$I2|#jTs1sno`Psi4~Ct8%116!o}l^5@!7?+H(gc$urp(lIvxQwB*M&U1%# zix7Rbq;7!Rp>)Z3$qVJ_%9}z#lEvnI` z{e`vWQugIL?~|WSNAhCL`6eP$^t!e&kMhUZkH{VJ!!Fdb$tY#jTeND;8QZPoBqb`y zNLY#0*Ao^G_t$7S*X+$qAG^4zzkO2aa;g0Jvxin@Q|u9hxJhB(Tcg8eE#<7y{{*5Q zrE=JhYfO;nEZ;oA*$){D>zk_P2=K8h?V0b(XcA)0`&gTCDW;~UmKR)%%~>PIO*}0fqE`7#uKo}3M}QO|#2tXvTl2B@we)fj zvxe-`{7$)AK^{x`u>_d(F0N)wN?;)+kxHL)7YFW{*|M3dXrokxMU)Jx+gEulp9RPc zsC^b&gBB~nZVa!pds*KtdypKPisn5|naxs3TU?zz=q_b~yxFkhYoOe(s5`klkSOBj zE7*c-7!2+f0pw-oa_Yy{e{uoUzhl+KhKMeE>|PDxGwxYIRH~qo3zdAe(LIt#xCz4O zR#@hWe+{mh3-cCDqGhpzjo521KL!(iHj?r);PRrg`9~Iaeb<_PLn=SZi~58R>`MSv zX4e(gFu|^fDRI4%ll=DO@}1<9WDNpZ=Rx{=-x1jQd*M2TuSJDGLQXT}+z!j*y2kiG zw@OnDg0|3viM_C}?TzK3tXtVF$bUp%kNJ92s}gA6KY$-<)qic$()osX1IY>=QN`7I z&-d-Mw5T$K426=H$XHlKaI&C$tF?CdQdZSX7`I}x(@Muc?B#%-q0GHi<^oj5R+eO< zt@VJ~joZChS)ii6eDZ=*VMl-N!2caY+*CaF7i=l%%>t}5&dU76bKL-8wSO^_&5i$L~&R!D?p3mBi zK@wu7f(91*%;eNvk(`UYW86m;{g+;qlJPfORP$U7+aDDu@QPmz-redG$-A;NA5J#? z4!0_{Z(% zjLXK+%`7iBf--ICV$rU!|FX0+0sa!>Rrg|$^3Mm5AE~Cc!NU!D+2>58oQDbz6kaxI z#^l3*FZ30aTiU%vnlR(|3E*3<18jtm1~)vHRb(lcC01?EU*XqOI92J;^EH2_D>9$# zST{5(z(xCEkAXf2o8A|HJYjcsfw1udy5`?Pzm^9Ye_INTG^}#cMrPSBQS3%1{v_uA zkB%Eak){Lt)6Dvx_AzkWeLdgl%SiiCVx9NrJcAr%vF*yE7z12XSzJawN_+^Gc*1Z>FLF;x3 zwAyva&V#E_!SYdS2C$rxd_c$JI-Usa{Sh|~i{LMjK<@e98aVZkda*K*IQ-KX& z>+Yuak&c8?>+6<^4xOk?aQ&TQ3c1<)Z%i*Wvt|6)%vvdqgjlO^+J zpp0?(KyH&)O)s%Xf%O-)hC;%eC*%*`n<-*}U(F?>$GFaWkL) zDtkUaQHp>G{PeTGG`vaGH8p(sv$!w{{$2U0Lc_(LnmdX&x*vS6hN4oQ#uW~rv5L1? zU15lf)x0R37veQp_*QWd6Mxj3Zd_qC%O`UU(SZ??sGDd5JvK|z(vPHFXEo1L5a}Fu zdoh!rKpA>#L6xxOaYG-@HPH9P4TRv5K4_~eR>{mh<+F?ZxHA{kjJVl)T6^5gk;7?(K{mHT=NR-lLmEB)p6HRKJK zs~2m6F1&zWY3bwbelWBiO|AL3mh>TkTCUoClxY3xRCjqnisXf4B;RUOl+}6QtP1x$ z)7A)gi@*c>P5DaJn_8j*?YVNzVmX_f?S=Lvdm*9T%s1N_$@^z6;HclWB6VQ>i@hkw zTgDjgT6G~sR;zpFNbzI>pnFnP0LsOvB|77czvj|B#!Zi{09EK6y$RY6#|f0%`hg`X zfzW&2QLga*hYMkgkiM8zS5U3*z=txv8hmBNX)~fAMjAG+NpNpi9kE$Vv69f9@qLE z+BK}ZXFla(s0(e83I+hH{hl&HHOR7yH>?Pf4yKG`LqlS;GXAw@rg3i)R`N zjd4#$P;{VqT=*L_yNsn6;EdJ^L(#VQTql&tG*|b_}G|I~?I>(yY(I-1ywp_SjG>tN5gK=xj=8*@oTOkAf9uFty@|AL| z81x>Y`xjb`qbrp+(O#kZ7iCvFDW}V~BuDNarg*@7AM*#f!debit>?iTC%M@U4i+%= zPgSwm?hKesQn&u9>{PWZ!icw4O{X@0RP4-}Tvxq8k55DIw1Nrdn>Xg^#}GuJBnLg< zZLBBXs-I{)7di3zV%p>m-3JHy$#}j(5pi!&Q7>pXVeFwP-RhJ@37F4-*)M$iuxYHi zY(JLo<6=u$?@8`V^3LJ)!G!MoWfc1AE_h|gZ~~(wYE5Y@Hg|XtnXc+GvpKfa2OYob z--IUzH>8vMuP&Sh2|wMoK^S2IuNGv&)%K_96Ur)quY79PyXP*I<8Hn6RsmRK+Bt=A zNJa9`a`m?&_p=-Uah0fmctIn$r)yZAmEtZHFe0<1c#n9BHNc^_i6E)*RcB~T2_Rct zG?A36f@1>%A_DalTAr_Joicq2dbmd5ez>)cY77bZTn=n_eaeIBF8MuBcxZ+jCPnHE zr_W`=GRzHi89egDlqORJV7dei9xLmC4P8zZ{v^Y%J^%6%CQ(`v>j926;sMfm4PGy# zF(L_I1wtgi$J`Kz6`tV8QZcu{&S{8e?PUr+dq%|tHBk#8#$gCfh$S_N`+$_l!>}yK z?)ksA4h6mv&W+_;<;Ly`D~lqYx=*dZ*@aZGm;(=wwGNoKiV8vo8wyoMdku%Z0P_lo z<3>4%tGQ?*@}V-Ax9~9uH!zbV-DS3#lHpt0cU9?Av%4tP*C2LyUJe;jds@f{vu?kK zz?KgZ`j!;wb5;Fg%nAfa2h0Fi95%+gbk1w&wGha%VZc}0v_&!QLJuwoXMNVa^ipf# zBft5+jNRbi?~w1cY+nEBus-2cBhpno`+R0Ik9lRo=cBy>Qq@|&^ck?@a^;q{NR&wV zs2@giGHCny{pW3SVF?T~!V0esU6mI>ynl6W-pfTcsGgD_uj0Dtd(@cae&%dYr` zud-0xBKJArw$spyAdfGbY*4hcfCrP)$$SZKfcy{-;ui%t2BQCW-^(&87M@olDxy9O zIQ8x9rc=j5#WT*Mz`1WmUf5+OHJm+8mzS~*BjYilXVWq@n&7TSarwRBE97q!tpZ(w zw#nr2xu3gy4yg3%gyB>`mE{#kKRTFJ6_bNrzJofEmFa#ARh8SJEM5gJdLD_!rXo+G zu~uy+HFr>AcfAfi0c|n|#FZf73Oj}(pT?6*cdCJ)%z{+>;s!FFLtW&)=HjsO*`($L za`m!mcDnNz9mz+AYTQbhn+m8szuVjc&5pg(fKTfH6e>cd@nlWm^bUP`PkQFI5N8ah4x2M5P_|`{svXs)!M@JXaDVJ1eg+r<2AS!k46P#eVK_>vpJ{w%Z~A<+x5MP zGyBayom5Nb|B9+U{#%p9r}#JP;l1KC7fHJ6ugCqH|NpmS^sh_3c;P_fjqlFcx*Th; z)I{$+-9Ozue-n3)kHt;%ilqIC*pxpNk#V^XELmg<;WO{#R)me5M=PJ)`z(Iw(4A*F zd{saD|2BGElKx+6kW1X-e%Z-B!WUhntydcR(; z$MgKKnyer&pGK~A9MCTn^(l7kBy(Xz|Aq146(7b4vod~=iR0o_UST6_V+O8khUDk9<@53}AZilvDZ)IU@4GJ}?S5 zF&bFTT>fvSeI3UVrvA&furo2pqH-bnvl;u!7hCwyQ^k(6q)B~9CN^rw9F}pwKvMaR z73EBPMqMUF#9RxSb`mh@D@I;5nSNAB>%f+(`cLP+e1|{mb_=~B*DJ($-q_{nUm@J- z92t8)wz>Uyaym8qMw=_~Y2TJ|8SX*8DM0yk;!?n#3M=o(dwXneR&eq2wJi&;23e@3 zO--oCR}SmrGVPBmsk5o$lIE-8$uw6%b_AmLFLgn7w}S5r{!dG{tcbzkkwZ$DV5s7Y zILiOB0dAgmU^DNe(v{%$fkPKAQYeRlS@vblwEn)0K&km~?ne9l_uGn%i zAO0p#0|?nNzUcFJa5thjsLdgA^KpeosQDdcsIp*s%UPcSp{#VrY-(oX-Frq;7usDuZlhZPF*F*hD$qmQuQ=>L>QMs`=B*R7Q+hj#A=xk%pF zi&=O=S{rRmNkOYX5B=6VvwE=eV-DxdR8knkE$Nr{W0z5<7RJK;3Lx|uY(7`8WB~%OYGEimqKI4(Xx+00QB67?l_m(r*Lkxk9ox)U@r9WHt z9~^>cgJ`)r>7tP6`80SCQp6uT98GPC|RA zuQJBzaUj?`1vyjRy!1pxC?_N|A9b|OEtWe4CHWhG!0n_Laqd_LGhBmfYnmCw&iznC z1mNBQ!~?aBs2n5F8%}fHQQ7HJg~rccl@7Zyn|F}y88tdPCZEIK%$2kRXnhm8nlawY zCMwq?sJs{XvrU9mmPo(GiwypG{KXBEzK2Ds~5fSdjZ*)pw;Nw#dQ`w?NC zbc_1ADmo=X<(xJo5itf?5!`j`-EqqDAv0#*%tahr zt+4U*Ot@(Tgxe?BT;q&BuXravU-_wM^jYW>x(O7+WLxUN8*I^In3&X_Yj;6(b- zU;N2u`DQATpkWc0)2tqp@^X`R z8Ta^kmb<*;rSKBZhqqP!nd)5#q^ zv{@ioFsJX_)e-iskM7*M_!3yS`+v5cy&S)ZzV(U9vDuP8J-=UBJaYkx%xR^L9c4jQ z%O{o>9GVtfjwGxxq22yUcQa)CB&kN1dI@KIE#C?KLlHCwLE_0pz8%gfE4bO|Mxxs5 zj(Q+lP!*n=KQsawjL2El&br_L30v{d%KkRsXt3}4Abs0Ye{#X$N=5TFz)zz0W6h@5 z-2>(c|6v!!<=jYbQ*001_9*Zd^V!|5l!H!geb;8Y3XeQOXd_NtQCppH*E02s&{2DI z>b}Fh2;lrK+CfEf9fj3rHq30OtXE&68F_BT%wU7s%;>|(M{nqd(8JPvi{&Gbj@~oD zx(OoEMKSB>vN%D!Jk;1dy3Gp6%LKNt!>`<@g_k8SWu6VC{q~E$Y@5^#`2!lD|0ZBs z{Zq8YzO_Yf^8ByO)ZvPyIGW!1db9BGt@YKVmFrUnW+{#evK!=QQa)9qn!kTOFl(jt z4@vN^L8YWD|M~x~x$oa>bpOUF{4ZGl|6BMrxOF_k2oe#S{)r|cN3VTQBz*i`}Ke!)^|f3y1nJ{?85VX~Zu!aU{D*>0k$F36vCSm;$`%M41)1^^he%vk%x~Y})qys~ zxGb;n?gA`>z!%Lylj&QaOf=EIPUl$sdQ)J7(!)oz5AThZzsEQIumtX%bDab|))*;5 zU=(molG9jy@0oNp8L&Zw_sW6H-vnyA9NvBf$W)~6dmmOgH1{%TSiQPKwqb{@``UJf zm$*Cnbm4D?oAS9>AztVl@_OMGyU12`Di$Vr9JZl#Lu#$bbH zBg18L@VqH4$|dWVpb|6ZKwQNHk2mfUnHT;XY8=kVwkABf*2Z6KS-CE{SqcT-_{*#2 zUGh-0C_-3`gtaau0E{>_>vL~9r^i{WI&XoY(N*aaroWB9TQW17d47vN#wbYtT|4O{ z))Y*;9)uqfy)x$1z|iAJ|0eEO!$Gotf`-Ke-cCZ!g^Gv*44tIjz}_py7$-N;xC z`!;sYTIy|k?>{}M;?R(qCvjaHS`ScE7Ff#GjbQ0d%fh(4!IAemg~&kx5t!MNx-Xp=_j~~b!>WeP z7CYU$)_vAQMf3POmI(t;tE5isk$stEvu{DGswAmVM0p#!$RKCVV+mFXsPN~C(n9tsQ1perhLo&8FKj z!nHwscr6bfW?dmMSJ66xyRfYh33?3PKe+0QJeLJRTp|aV^=;ozva;6#-eOy`O?*3! zh61V{VOuqqetz0;-**5NpwxqCdOfU=+}(TQKUS!W!JeyjckeaGo6xvdaG`P;9$H?k z`$6%XtRgShMpoWd4c)4+H*immU0Er;mAg z`-0-gA2&ds$n2XPaO5;?SFqc$C-cC~zaL%)8!rQ=R@CzV<5{X3y^Ifkg!=6Xq4jBRw?b=trl})RL8sj|6$l;+Gg!>c5fexBDBPPeqZ#xL36G< z^CWADe=Vy#LOH^5TG5%3W5K0MSQ0!W-flZg3|JNj1} z5S2;ZM9qz=fwye{q_z-Ru3oY{wMpCK(m+>3P$gBjP^byKV^Qkx`IOlK#q+Xzc}qeX zf9T_*=5kPikY(B~ySq2uDhC+g#GWr#{}=5D%H04vKw5e5%P8`1QVRZwt?6*y4?+_o zI2y(lemq;L+mUk6dO8IF`BYog#ffmw%Spg6_oMzEZb`g+IpyU(p<0V)y7q2}L&9#0 zN5b}E-53c8i1+OV4!P9SPF0pei7N0xq?U-aaHRvfu6qwHeiJpTZp`^?u{CRNQ4|~B z;%2$})0$yM-;mw@NN{6Y`mNfsGS-g<)(zCged9=HSd)^n4iY!Y45p}JsUFo&9@qr` z*y(MSoPjSJdl5UR>OE`@1ly-2(g2&9 z&EM(kI|4qg$L?L{w+0ouUH)*&FIXu;8-Y-{qeYw&8tYmEkMU}a0hBP#B$kJxYR$94 z#_BJ`{!~gF!rF82)dczu!G{tFNFrL-uj_Pj1c~%>>XA5)Dqv;;!XuGjd(`PG*K z+JqL|_fSmR#2MkH$84o-SPPQ*bf^0SV;+6R%{9R%a0W~T5uU5*y8PW5PWaQ<^mT14 z&b{CSfy;w6bXQ|(+Ir?@o?8c3TNu8vjH85I1f&F1O!COEa3=QV{9V6g+?Ofm=9@7^ zk!$GgrIYV!0#!99^4r)^cjI~x%OHM}OYFC41GH*aDI7-}B%9=*9P@h6gR!P}a$|9% zjJC1Su7$jjE>iS^rEHD1ZX{ntu-1B2%!q_Eot;5W!RYfe0`$s<2S`Fg#ie# zUaNEknjw5HryW*sDw*MnbJL6RZ7cPJ7qZ7%Jv|$ZA6dAMnirzh_VM4g5IE0aEY?&6iAI)_vF)e5tE18D zy27J?t>~FKWt2^*yXr};cQ=`X5vH#qsM*q_| z#o{E=@{gj|Lxdb$i?6e*XF|=bxA2Or^Dfj@B4^&+cWROE%MbIju<#G6@cX78BA?A)ezg7BI z1N(yk4OBEIn=_K}xO`V0$B#Hngw~`+I0p*sV30|qX;91Sp@M4!O3ll8P7b@)vxSVo zoAsGYmyPRVn=QmM2btntaP*ne&b-)_NH3p?P9%ED9@a#{WGdx2;P0Bu8wa_M)sLOQ z5vGFE%>tGe#sUWe%OM!2TPrIIU1W`YCLmgj$9SGNe+m^nnT?oE)(PAmW8bVqGYDP& z9r!{N_;5$d-82j6YzonYkBOtngf>Jy^6zyQ28c7NL(zwfe~*Rq{9XYK zz#|g>gpU}|XC68fu&*D+NTRD6Xv@hQXDHBRO%N&U=KG313Vmsn8W#;nFxxI)Q<}a# zyUndO&k(2?k_Bte7JtrZ3}3W);}T|pN}NEx4`-etgbDEO)17_i9gIQ;b^A!kuy5LE zs#jx;Sp|_eUeH6dPu5dVruSx?2epXToUv|<=ki}WMhWtkwg5K@Xi0NPgpwh>bb%b{ zmBFS;;I9xzL^V2!U44yUXW#igN8edlZWC|kv<$)QTCQ_&n!@e~TDo)sY1Sn<<;#P@ z$D((YE`Y^JvVdZQA64aVvJ2`bQE^|eNgK7nvo#!x*BKIN+Z44nGAT=fA7qTSix7f@-Di7BGk^TgpYAy zJ4?;?BXa;EzKFa#8q`A^l8Io+5A)7F_9S1YuBAJJ4@V!VM^|NXUI&zGwIFMI-q1z& zK-Rkro=HGl#cg>r)U-@eatdUv=fjNPgQ0grPWE>2g`DWv9wfSblOKmH<3q-v$m`Vg zfV>G_T8`g?zszPH5>*=(H?E?RRbX0>ktdmi`7-BAN}Jd^hJD-RKQ0sd#FN-* z+vb`s1G)qhi4)gA3Xi)*oY9jhjCp-#3A45CUCD)N8B`icb# zCZNDJPyh0O>jyITS?16rpMOUmLVrgJvGr6UN4hN5&Klu)Hh2kBKrByo8}NC@c&&z(C*@=zZ7_}n?% z>~rUEUJ&4ecSM9$dcZ&DpFNhle=e_^;TQPff`zn_^tp3|kwk}2@W9WP9OQMLojZ5A z5&P$St3Ar}+_|Q=524Z;?gmTa7mYMzjxVmQmTrF4y-)Glfa(g>c`8a9)cg04_r_97 zo+%T_TdP?XcZfKI-y(4cIS_~!gd7CgKVQfPy)<|%FFbl||D(&?`q~$}-)~xX0-|r( z7vHMFpkH>47Oi!^9Y)JR z*~R|v2P7&O@$?g`oezWlL3m3^nS_6%oKt9Pv!L#DF)z(@9Mz)P!JJ5D$LPNO$=B#v zGq0}&2O;*%;pK+w6@h^=R#2Mdnt)XWhRP%V`ML64co6A4m0wqfoW#q+LmUrrN2&2o ze0)4BK?vo$XA`Liz2LPoCdR-?qufLlmlR%Gy7X8Msi36Pk{O*6Tedo!%lOJFm38D~ zXH7}FWSiF|TfW39BTc$`hJug-&!dWiwcarM5ZuvH zc9G~=m6HQa@7i6F&4mf+?dBbp8fG#A`_V#rsU9!gh)I`@+j|~RXXCS2GKD~fDC&(p z1no!UOCGV`xkHwHG87gN^vq$oz<;R9mge!B=-0yqOs?k3GOGsq-H z|Cc45gsO7d{w7VBCH8qq209kTMmlX;P_w%4=n?Q%D$;kUGDiL#ZLE)G+(Gyth4oW^G zgp}hB-5775W9}6)+Lx#39HnDfnBE!K4!3R|iZl6dv~O@cAk0Ub{OnlbAd8Yd!{_*5 zZ7&WJMOf=M8mj;Criv+aR=tCm^S`$cZ9ySnmQ5=)(=N9nLYc^-)c%|xQ2td&N#^lZ z26B6+aq%$|JA|L`Y^71$Vo`y2Cm`BH&k&{)YWhItzQzEDIGtMDc6s>&2*l(pv+z?X z7>O8yktyPtb5tYy??DjP`Z5O+Tha$sV-alxe}T$WZcd8(>)U1R{RPQjL|ENf&?rL1 z`m>9W91q~fHJV_u8vex6rXmXNhF%!jheBKa#vMQX0!AQh{nq{w2ps>jQG2-i?89-6 zWsVRCn!kgiVyfL7G-R#w-|3*6`raje$k@sQ*(?Pwy>WK3D6&6CBfxBIWfToaLUR>7 z^h$&O-2ya3D;a%HJwIT7!Q^R#K=C7}tnq2eG^K`2QHt-je?rwV4)At3s{N1aK!r8( zBw}n~am6YiAkOl{jhT&*CKNAENoNtb??1C0{4>6Hw;zPa-*duW)9!$PymIy#b0pq7 ze$E9_!LV(msYA987PmM^OBC6@+uvZcp|? zf$wlc==uLFbMyZKX-DeCrtK4Yk&67n!)1Eu~4Rg+CgZoVFY;h zANKH*E)=&dX10%^#)d`84NGS|Ss?uQXJ?6i9jv+>;Je)azs>!1QyvXdSs8^;0W;25 zHEq~A2t13O;I5zlKUgOF9_izssrj*#}Pp!R`MRlw#97l}yf^@M?4Om@{NUAlfvbnu4NI z@MFpU!Z=XRVu4844Xg{U|DK?6JC>?6CHG7an#+E!LT#+o&j+Fo`6vS~0YbkJZLK0x z`SLSyD}a>-qb+wE&$-U-`_(_fO9nI#qoVy=jK^YwslOZ)N-bOkS5}*&82;*h%nIQT zX>Kj+jIiJFzPm|%dpi~P({pD=1NYlRX{n6QJx4y6&E8;jv~lZDkZOY58N-ttPN*#>*=mvf(Q?0A*L zQk#avxyRltk*|&YU7l;Fa^2NSbCH4!CW)WjIz7fuq|)=HX=v2*eM)B$Q+-@dr%cDi zy!+y#lHU72aX$GS;@Cu7tt2FI3Y`+XX4!elEb!2B=BnZn=Yu}YQ^ezfJU%`9^zuM% z;%m{32iM0vL@p2aVJzp+Q8NgI0{G^r51K|?cbrKcyEgHs1*c5lL5@2|FPh#;-FBHV z4Lk5=bW)=@;&pm56AC}6Sl1pXlNym>WCbI<&`@Ja7SdgL6{W8guAwd|Ax+mq&c$1M zPM~I&sT&fv#LRuOi6~#C`CVm7i=04>f9q_pm@HI-sWT@wt`(1+L0-ceo?cKG(3$f% z>Gm@dFj9b{SZw{!-X+mH>JDhuAuu$4^C8kgk3ajOR^8sBPk}ZU{Qa-cic-v?f0O!V zr`2)qeec7)vZ2b8D=SsM9s$;8blhO5nr%+^+08ao#oQ+-FBxuf&@Dk`Vhb0F*^!27 zP-Cyo&813(49CtlW_JDB^z5yIl_5fgt~h`6)3z|w?iDZY&^HNhUS{(!Kfe)A(oayp zYE24QFiXIwEXe|dT?88}Y96Pl4oRWl%VbnspV)h~+u9!I@ng*S;)1#v3VOC?gT-`H zCKV$A^verDIceZ>F&9t4NbJdwaGqR4C#l(}&%m7`Va8P7@P7Ax&oomd{skHf<>DQt zdr0aeS$b9o2k9y1JKrz(&fq^Asxoyaw~UVORy`m()sQvxO{wY!y6D*tWCPq)>9 z%g<^I!s1#~8VwWOkS-k=YOM%FdjLHb?3!HxbiIK@uk(xp<}l^Z4>XLHvgUZXHIBZI zq6^np7SR?f!09&I$u?NB+48zp*eZagNiNT*Dn=1}c^GVUeIGor4j;&LQ0R|2u6r{< z6h?;YL{mp{sA8kj?(Ms|qxBve$!n!nxw0zP;0{^y4{$wthxU)v zAjtOCQ|f@0ur}Z!XRo=`>h{cRQY$+mM+GXG-xd-gl!=Te$tU?oxL{rel_V+V>>qyq z&c?I_D}0B}eOB+)v{vaDA3O$Z<}69TbW3Au>q__CuHC|0d4`YfrAX`y33-WXd(%#? zoB9_VQE;cjHSVQJ9}>eZxYf%|J(LtPda!W&-`%6xv{Ij`InOe#30r@VmH&AE$XaC@ zwmenS78^TPj{3(s&?FrnaOJ(-7|3f5HMV`8{wlg56+;vwZLVt2Ke=-;HT^D=GZ6s3 zKhH|i5!JZ%X%T@W;?-iMl)jMQ$bn_uXG};f#=@}X>VS{ev_uxwSapwD8f9`@P)p|R zvVT+tmN2_GT)?YKoI08=`}19uWbQdhodbovePatF2qf(+_|acrAGMBnpKr7ju9*qO z`iSAv>MjhcN^mal&Mjpx=KdpNusg95!}6ZxTDJa?Vhdp5T_W0ZFSsaH+ejS3qb!vFV7b z8_$&Trd8@EXnu49+U)XpR&|m2U*hLTHtF@&q0tv+Sh15ZOPrDG32!k7rT$CJ)Y2dA z+R)`BiZmoVgNCc>1$Uy_5baB-pKojbM#N2ZWCN6k5;@1Kf$PZ~H*JnMlXRRkYGHSiJY`Y8u5ury^4|_taPQD1C3c)miEQ(!nGu`u! ztWWk$hX(U55Jof%zK;^SPsQn8{nUuW%5hTa$13x@=wH5~Hl}0adX1Z;n@j}f%Pp(7 z90^0sTIO=qNY;Jy_Ri68-(#jYFre2?=jW!K4gDbNlWq1Qedp=9qKtORB&u+&e?%^X zXtCQ?{?L^0uB{k~jQ>SvSUa*Phws1DWNh9wfoC86<7Uk?2le85YWoziEsT{RH?2R+W%Xfg8WdiM;<+=Tr1Nk zd)~E@j?F~p)5VrlHW*t50H-tVq!vFskoUr8Sfyy0&LJu%i*W7fNps=P{{o=B>9~XZ zdgl@4glE!kT_L<8WZ}Sn%#=eSm2`#4;{95S__VD@5#2{LG*5&oUM(MebhP&Q?HO;u zj(9}Jo5X;o;MwV28}!nF6(&{)CP8iJ;3|mXT`Xc61W^B{Zi?t|5@b&K!9D%?&`Rh& z3})#nSXy)J|8H?XOQ|}ZjMm^yyrOkT^{JstM{|lQ(A_ND0+V~j#gRpR;-uSw8zovq z-W960s4!IR%hk(z12qhQ7!mYHf*e5kNap6svx^OPeq44>5RCIdU*`Iqn!-r37&MqJ z$^-w$K%2QP?y#%uFiAqIo*c^F5~5=il8QRJk2f1sF0*0r_=%ZK#_>R zGPR5)b)g@90@>?;Sf7^YP-mQ?rSALR#Al60H`%>Q7I7{GFA%`|Jk14tu_$mFw95Ck zK_(dAaCV26sIi(-J82mfHrg{-&P;EE5F;d?2yUI{g%2Oh7!(8_^%}NdK z^G||8_cP{+Ord7WtJeM&^0k!MNW*F=h_;u9^tUR4BA!?3stjjPylf9^AEDPZ{G)Jy zf{wN|Q~k+!!R6Q_*0qfEY&EeDyHN9fLqTj#BR;L&+pcr*#7&@d=w$}S@eRWhJX4jg zLRir>hDlZ~t$!20Db7HbAIp;;K}E}AZtrt`HKolJTr;W%9Z2ZaQs`Jp<8kig#n~X63R^wsAT3@y;ZiX zUsQT9kRnSfKH>PU7DLhft{9hpCwVC%J=EydT|z@yb(fdQ&Q8@?ow4}2Qd?Nn16w1X zmT5?z2B>w@BIG3x2|a+ar@;AWn;Dq!m=}O-s!h{jjUPLwrtf$lhRl zOV;gwnt+DV&_E8fA(Od2t`{19C6J=xt2@o#vKfufCATq$|47DU<$0!(m(U~1hj}95 z>QS_s{~oHL>BqxM%vAv|?+BPp>el+_1V5rqA|vT}9b4D~ri=J=&QKhjPW6K~DY`l3 z(C3jR(Tj>li%MqPk{5Oq{PYC>qBe@KC|~E&=qr~s4C<2B2@&RI@A|JhYK77HDJ
IwtHB}y5oH{Wng(~#Vv)7je|L!cx%s8Q_uh`WHm*y z945&(q1N_ZIJ~uY^tXwKLQQqUC_o_zi@-CiFSL~8@{sE%P`it=LFxI?rSmL0&P{z_ zs1o$0OQGLkF-Tpz6+une{<~pOo$p6p<~q>0r`Fy3bAu7a773kJJ$B0uR)eKKUAHZ{ z4A+C`fX<43QDvZ0;k6?({Z4$!_M6^zu<;FkeZ@i6;Y6sl#A{>k42`bXC zYW(={g_#~D@dEc@x=rz7`U#I;%mt&iL!X2c#uE@XT{9B!$BK2mu!^*=ZiO_F_F%Re zD6r6tP7YG!lZ+P+5~$YL($8f*T>#H;Azugs=NCs)!9sm?A{J!^bJeZA`4`2#*SA#K zI(Y9TYAO|;l`H2%n_ryOD~4y6A<(qOD7NaC5kGkR##chDH|2K=t5XsJf#N6xaRFA0 zgWE!eEMkTe?kZT$v*BoVQqNkx`9jiiyy%v&-9)g@0cqnbnMgm|by1B_#7#-XVsluI z8kIE+4-0fibW6<=yH4f)cF;kXZ;_G)UxPqe;oIj_*22gq0b-V3P6B$OQ^a>j${)%e z&DdCNPvFX%);)qcnxdc9fs_J^GMV5S@;lNoNO^@qQd!8AkovoRayp!aR4xo|yxo2$ z_Dr%SsrCX9_0bEvWZg8cGS_z-TgDaZ-$1$3p2JC|R;B$(3M%dQ3Z2?hqxVuS)kkX! z$FBSb$T`?<7)4Uy!q5!xs#^nzo<49?7KY!8PTg=k-?(`Ab!>tj1By^HJJ6ql z3`U@gdjrJbxow$^=@k!rP-UUWknND)NXWZ>$$hsyZVOO>$Y%_bl%F+pFa&_C@-c+g z#<}!AcpB(I`O^RBDoJ?^1T1H;sHXL z(DY-9B()Is(b;+~XifIJ>XQ4fSh(nH?_&Q=m>k9@Aibe$v0ybh6UOWMFjqY7c*{>q z0+fW!LfNfR6MHyejAPPUhucr;gQEFX()P4HR5#0JB9n*(sqK-Y{JjtlsqN4w0PNFg zPSRDjsFa-;>VLwmO%GN?NIMeRM2~GOs|W#^c&Ec1j)`8!k)^iKWO zvl|f0NfM$}*9r}fjxef!)-&_#68zjjQ>eDE(Us508~cXe8=7d?O~#(h7UtOded?1+ z2GzY~fh}IQ{BR(hu!ZA$4)4liO$>xzd2Q>lNj@eSRG4)dea`~~av<71oBs5#-)czH zJmD21%;d%ku1}Qk)N^Q1yX*Qo9u`7jJ_7_0i>!) zL5Er3LU!e(H$FA@5M}P`M zb?b3Q-{2^aWN*t9c<##c^j5Lt}{*pTR+)Jx%7N>Ebv(@jqHE{aMX5!7hZ3_rxgsF=wF3vUb4FK#~27zWX9fs&)l7ITf1kZ%guUFPQZ+xO0bM-@4 zUsI5&Imf<8lRundyxw^X-LBrEYfC_VCXZK(A`6$}27`;wCsP3b>1@39S*d7yw0g%# zJ{5tuNtazNsFbD%VjvLA$Hd{_!>zc}ZW3Qt0_Fo~mHP&?wrABMseOQM28FVL?0DXK zn+Fs+LD?sPR&P-W8y3*H{k|bH)hUq$fD9nA_yBLqr9|1a)pdL6k>ZrzHasmd!9?HLpFL z2nN|1ul&ZEL-yev44Tav@{G__-eqq2ZyB3>#l|l$AQMJk?zOGS3jM)K{=rjL@TX@M zU%%cz_L*UR;1lmYmy4HW(J=1zoQ{oOAciWu+`vlv))pYnXYIN^C$4uyt&x)r2_+B0 zuf!*HhM?gZ_5WM>iLS0{RMzYNxja*3(s=&P%CCNsK)_kc90i8i`D@>Yp)@-rd;<2( zBtWTgUHw9KP~`U+;ATiGD3ITl;of(#N>4KxrFIHKAUbc&-(z`ImiW>0p1v28FTJOf zE3Y1&eIxNTHW(v|7~gJjdeK{FM$9*+L4d1jrx9vp>%5bNP8Sz&;T)heow64>%#%eZ zbNSn3)n@XiKXwonLh*)P@kj!i8%9$5XsJWyr-(b(4R6tl)%X)Hv8fD>f(hskfZmaT zOdyX=YV2qIf#OK#hM7a^o1KFeqgRqGLj;i0!@a+;N`l660H%XyDKxOSWg5g=!#59m z6YL@z1D*yKB9IG;H}b4PrG*C5yxLs_9lmF&T`PtuVIX2;d&TMXwBMXIE9?L>Hw9(@ zNC3x+zKM!qSXO+qc#A^6t#=;_6b{B@M4YofX%gZ7bwMMEmV4YqBT`qO z8VK<-3BY!Pj?Edae7)OkT~HpfmdYJDo%6wV&b{TK&~1yd8itoYo4)tFZux+%H1PRB z2)g1PTegUxGY_WcW3a4Fv28xUq+79EMVvx1NHY7!v7<%DwXNjTpbp92VEOOTTb-8P zV1%O-1xU(WvNNs$KOrl#g4N7@ss8?%% zZ&M}_jC4jkewBbVZ_&9YM*GR{OQF^fLrll?|v^bJiF*!vVbCk5T+M> z;O9iITgY{co;Km$nbSF>_En-&Ru(?YJJQ`Ii@Ky;W-kp&t1qS2Q*1>ykmLv`sy(D7 zEfy;$f%`lBo!B=AhpE|5#0F*>Bq18^8(c|2pv0oZkCi=%QetxfD~gF|e@$&6YXa64 zOtMcAm%1k57(&;d9WGY#Eh^6N`Tb0F(EU`~G8wHPnFwofi1FvrERe4#U0ZL$xA~^{4#_(ND&f()J(eY+nip(Rq!Z@c;KMP> z4w`}kpsC9MF1ga1n>=Rp#SjX>37z#4Dk?XLV`!B?cMbXNw*_=otsL78G)(~3l3cW3 zI2t9BS_wuA#$2jI|3tS)wKSB6iNhMg5ot;LO&iQ8pJFr?))zXq51g)h3!X>m=!RcW z^Jp_gorq;Q0y|jPV-VngB3~awDepR5_9Ur#xI(~XM!k*Xlm<=gt{IVl;tr^c(VigtDH!%knF3;GBZO9(6z2 zwZcgEuJu|yM;K@FgT_C4kZdggPJl(B631@dMJ>sgS@Ceh^mUFyVgjF8c<22udZ>6V{T2 zI=KF)l1FRHZA6LA*$&b8vQ`@J^OC_nf)RD1KA{8EI4)N3MiQEh9@13NDxFrLEJ#9% z6x#4Y5Yt~~C^&y+e*lgy$JPfYjp&lZetA?kBmBvjz|K|ATueZPj03fYM*eTpekIv$ ze;f>}04h1<`Gb*-;Ld5N$6ecOzaW6*00P6`QJkCz(y%i3(A~QH@x~}D-~0D~ZIhu& zZH8MD=iR8QB>6{=9IIsGTrKx@bTQ{Z%Mx8045g^Zn87Gj$dOBOP zXP!F<1)N50uJEMvLW-vC^wUvkFnM!C-4TSn3s}Db+ItMFBZRT>fo)cf{=C6CkUEQ$ zp!k=*guox6I$W{Hk5%GOV+=$!FHP2W3QU((S)rzPV;Ex&fvp5fdXx(Gv2ft|zrn$y z=Li&J_pY6eS7PaF`dfSaiS3ePv?_Wrw*|4blQGD6g`-8zLINKRqQibP5r>4~0Dds_g0728qykaB`yhv(eWAuk}t&^>X^# zWNLU$od*mnJedtzp_NDEFd=?gG(Ylq$x_iL_aPAK03iIZ1H_1`qh^y`M?6T6WapQU zX(9rRw4{ApjqvMjIbz$#=5Bz81=`u|LTGSo<2q)(;HZWYv~-?-*ARLHs_8a_X9=li ztVTW%lJ|F8|AAC99G0iX1_|K}Exu>}@ngNmZ1UQGJ!&TyW%XO8Bdi4uX68vk=R(q+ z!gn~VkhS3E;KVj`y+r}AKi7erWchdNA8aa{w$zthhi@R!O6Cua2IaRbF(eqA?$=9}71%l9>%s?TrDO(d8i?%O zEUZ&2j<4w0Kx=s(6Vm?LQzm!IjIp108SA3~(PFd(oRU;3=c(s_TM|PCfpBoQE~yw9 z3NptwUYxe=eH2cvwnhGy1vdPR-0i&NndJ8*K`2|nPd^AG^DnpRkSTlzURbq)gR_jN z8&KkI!H5Ul^hiRSO*S@N{+mX-oB0!I?Nzhue(wV^Y+#7O=UY*6z1R%k8efAMx znCyVhr{G(Lh-hPyPv zuPeDTPI!V|G8xSJI1Ct=77Rn-6E)?Rfh9|gYlNBQmJxh&)^Z4W`kE~?MsYDU6Os*!UY2Pp+Hxae>b&AG)Zh3HFfR2%= zS{ghMOjvytzv@5wc^$EHvw2IMLNbx#fLJ5A6Iv^Yu0pJsVkNDt*o1$O;JE( zE56hn){?fHt5-}`S+p~KEY~{ScZW#ryOW9B08@XKJ}z}AUN~L9jIv$Lf2TRO)Ir1g zWC6OI?|V(*KGd8F0CdUE%k$r-U!1p?3S@7Au+;kpN*!H}Y|W0i4~C_g0yNH$6wfQ? zos?&`2tmV5G{#w!(MW_2~hPpG*g70Z*60|)P|iX|_4v-Sjpq9T6Whj_JhUT>~) zh6DF|`74Zk2ieSX);adnJaRjEw!PK5;+=b*dX>Z*I&i^I(9X{?X@>wfuv;#;!EpW~o(L zwn21AVbBUwpl)6z@y^Mz6|hL4u&2!%xtN{mQ$cWqAKCnpbpAak@V=HzTIvu13owZ6 zpHO4nTKrhtgDG1KoJiv$&?Cj3>^2moHT$rX=eMs%TqIL}9vw|?wro+dU;>4%YyLk| zW+X+B&zJk6=03!rhYfH*8Okq!#!E(C&XI;46=5xgh`+qx&KCH!a|)A1g9m7dr;LG` zus66FDHC5$tL7(;db@`ejTEZ^CHndDb!t%47yJ4e<2&A$FJhS{F4P(Fdw;V8Rpbsc z%#*_`@-v=R5kMIU8GkO7upYkf%Xil!9V_pPAD|7VyKL~?%2Kd*WK%!fOa3SetAMux zDyLg!6#}SSV-Wl+hp>*A!y18zHUy$xoV%|yup+QukMVnLL(v#BH@x6|GG6*H{0i9~o|*jY<5QGq+ra5#1OMrd4ECS&S;p7-dT^Ll@%9rA{Hypo3 z7Ihv=+k1ZL!&Kb1zHR`IFG8`|bN4nmdlf#mR835|0QxQq1Y~LjmczYgtWW?P)V8!V z*%esk;^i(t2WS9N-qTz|5a%>Q%JTimkS&jp6II+qD%GN7z<9oJ6OdoC6Oea9d1~qM zBL$VzckeyU=TxO`6{K2DbfKF%eaVIFw?^p!@)3c;e0BIIfpMUYT?( z5V2S9J5Pl&H6R+7xaISJa>L;6Mmyyu{axw@ymt}-_eAagE;pOYRXfGyE=$M(6xvF| z{eVsZbp5d4D#fiI2vmSrahy>2W(CNkFgcL!qBf7+hh2B>Qq+8;1U547c2~U-t_shpVf=k zMRdqZ>VPDzkvyb%jC1`u(y|TXkm?%?Fo}acRg1Hp_OYK-6)?7fmbu1((N+=Bf$?;X zWBGJKXh`PyP^Ure#(r+VvNxbfxzY-C9{ zz;Yx@E`QlILZSeX8o0viqWlD!9`*iXV7N)0DqEr?cOe@u zP}$jL^nUnTm6cU>KK6AEJ&ZKTQc7fIs?JNPv8<~Ot&CSO!fXpe^(TKC0JPFVhu9fKeVz2-7Z!f3j? zumIk*6LIXrYBAl4mk1->UC-=@D*8cF&O?iqy_qmI4>#XP*!@1=i`Ei4Q`1)^V1Ct! zwS;8~Quco}!^)z5dB8WOXwmGinX$t0PmZ@S7?o@^yInW zTR~nKV$z+az2+;-<2`*z(Q}VuDmrk8$|dZk@X=^3899oun&0rvsxv*esQZCMsrfta%LT#$7luEnLxUEOY^v2yn7RZ_- zcMldwHSjDxC`7=-kHajJ2-SgJ2TZwtB;h5MEJEFNIc0ph(N7uN6x?riO#J%!kVD@Y z9nqSottbBaW(Ssst7s~#zI|LYOxU{Qo=5_wqe!EJp=Qs!+Uk7#_FWrSOp|WV=jlzB z4$RHKQN4y|R--KeXcHB)_W?q==&x?gX zj-F?^NQW-;owu7JW&AsYh}Hy{g!Yl@ma$g81rb)5y{fK)Yd3sXV7n_uv!}USXR*L( zz*JG+M}s~q_?V{R_ImS|t2<-vT#KX3%g6|_X7gR8Oq6C_0sZIt>D$|WqnioAmKe28 z76Ku=mMa*J#K<`PO!$XRO4LZHeaoB3%EpTd^6xort8(iEbPD$8CFXdZKS_2kv?6O( zIZ+q^XOc1$RVhK!oOr57MG}1Km;~wQQAyWn7gL;RpII|^{G;sszXHC&K*oJ{AY=PO zeW+7;Y{DwftLHQc{(d%E#Uir*uJ%~?V@9As$`^Sr*EwVPo*mUpUk>wphBVm@d-2--4NqnuqX(E6Piu-=}-Q>kmJ+5CnW+X#RI z8DsCz;cp|Lyr6(UV4kb#b?E*V>)eogt~31=5$~}u$#_)kv2i@A* zX9G5!ghm0c=L^ha>62gM%vlBxrBRdXk)cLgpJ)I>1Ua9Nr2OlzrUjQo)8#U+)%w?x zKoVHrdo4QqKvWWAcpLvBD1>4jTu5f4Vi_r~C8!6+Z4+#Ps_qkH;3WLFsRiHUv?07r z*2|~uW>Fva?q=pOIpD^QEI54j%K1KQz*C>M0_?`;GKP(^g{6T@-2!b=I2khhAoR)@c2`WsB~XEenHnBbkY^~pVB^&SxgI7NML-2C z#lR1LY6JsZ<86qwS_YaaOshCYMh&P!Sph>lXl*j9Y}sR0E=(;^~MS zV*~OLQadC`{@uRhk^=RYnzARaa=>N2Z=km_;M@WqPrXmZAx1Dgv@8(QLM>JEg~6sJ zIRoGGZn!sqVxeC~STo`>C7G5F@-UhZX(rd93&@8F_{h@!Xr&enF`X-Kiia8D+2IE! zLQ_uleYxylV zY7Ikb=@JDWpg4$v370x>Qd_4qQuEa*Gyv*#p{adGeLC%Gdyy2UVfbU6;o1|rOF;LF zp}$3@CN#UX_Z_b8k*kqQru&D)6`Q;K%3myS7!a+XC_q_s9;aAq{(8C5`3|pqYcmo? zdbQ7oW5Q?20+Ys{HFzaZWa7MiaV*fsP;{^B&9q|$f1EZJis>EVF6U~H%0E(sTIBLt zAt}DR;VnaJ*5!x{w!5|e+67_^1na`0WC*NO*V_GNjRG@<3EGWb-O$ls)IE8(2oFYZ zyu0OwyCSr9oahHp>mkWR=MB$PAk7l#d(n9ThKY6iM0?owUhz(mkd4)-PmKXe3UN-@u#grOLe1R1T+c-f|gt$x^T(1(pJn^IIItf|)Yn57AtHY3O z;I+e=8GpWfGXD9}#6F3t;CH%muPsdg8s_A0$zmMM?_$*tto@n&sjsj3T)*X#WLP^ zH;Tn9j>)mS9c*Nc#je`dv(Ol`-)S9bDg9FWJ@#msm@lQ~cyA_rsV4e0K%6)u5wVQn z=|DRs?laZQtXe0~|8{fS~;JTVAYHR1`z1irkE%k63A$45to|992I(S7*oRnIg_Y zcR1X22t%NL1Mgi(^b=Co0Jio!N7Y@d8lfBsb8AoH2^|asWC1)UYai08fh=qoy!aSw z@dC|JY!w7rDb&{gFxa?hr4C9p-Ge!BaBZ=^Ix;7rI4ndM+4K2cM5?RfGw*j2gu!2g zhciP=MoVddk_99sqP&h*SnQ;TlA@>U0V+RnpH0DaP~gc_a5^Nze*vG))JmPkG`^cV zjYz7Lektjac*rw;!v}fWRTR<-G zRci(5xwcj?cmc1*LoE#AA#Qj~o?nK?jDk7(d5PSv&8vP`z#H?Ln+J1IKc34TVpoN` z(wzyqDoBcCnNa@5qO`e{Kb5YRvo%c-eZfx(*hi|+VlYCc=N=Rba|0~IkyFvxR$z?F5H26T!& zfq|rrBo?}C*!V~zs^yS%e@mQ~saLuLXN-rJr^}rtP9*+0XF@IS=!Vq3I%E2`qN5U~ zfiVX65m#uh%mdJTAfpO>0~8#vrkjBthYcOP4rEfK)Wz2L7CV>x72i5 zmw6J~(1TPdxh{Q}c!u8@?;mI!ZHnZr9!DBCGc=FqP=P*Qqfa z^a>u-H7WPFy6<22-ek(L*22kpnn9oeVB5f5Q!uv14;%0CR$W&4{vy+#)WzGzpUbSZ zD-5cd-FY384>x<(`>t&(m(mC{=64qc0?JIZusz9EimHkJ`QoM_EjNHX=hPK(^U$Is ze=c_+9Umd+whvgd|=AIDS=zFPdJ*Q2~fABcu z!4?N~X87r53e>$#_NMHNHcrxzv$4eFK}VgZO1ra zOWXbnu$)l4NT%0JysA`^JxmmjAJL;UGps8WFJ00I3>CQVH@WUQTlbX3e_=RFcYWMN zrLiC!@YyAn8tiGN;n4A zB?PGem$=;#MtX%^q(eDoc~*X*>bwMv{T^@P?&>Qi>jRM2$6wafJP zabdNjDJM!Lp#B$#Vh&e%0vv8wlP{)y+CVxm(ITYgAgMy18fFOrI&sDb`+{aCNH7%T#?hZ)9sly8#@kIK

#x~4rcmt2U%|komUzih^@q2{#=?Y z_Nefxr!GzBun)yIE3xlFMpAkr=n1GkN--4$!WW@+!rMZ5mod<&QbS1ps2 z={Uybt!j<&=u7$yU(?`BG(!#0HqrKfc+1QsZ?)^08nZ$R0T6=V?-H-jn zD7b+176%Qli!pZVS^8(g&o>t5xxD#i6ePN_MxLC+&8IJjCYe9FAP^kP0@+j#YeCv$6Q2*Lpi@rz=%Nta5**K3jr7>3J(Czkj}u1C!Q^HE(~=QKGroll2qG zljnjdWWN3kIOz0+zfYicObodhmwZ<``vJ+*os@09{bkM?g5DM#w8iP!d%%hEbI3W# z%v!LM4mK^oXanW+he}soFTm`XYYQKNn%KSPK88cWQ72_7*MISK>_F4S*ep$y{Upu# zXLH5h=U71In3kycu2jVFNV-O$)5~PZ+NFH1_Qd~7U+tUa zG`v9f>8-4*&j$N;fsJ6zox@L-mMoqo0LGsrN_M%+n{@C~!V9l`zgH}@tJ7IzlTZn1 zO*fhDZ>dcp`I6WCctuk9Nd}$wRgVp7k~R15^}$}{!+LG}a$sT`;$G*9Ru$tHr<_CeziN zH`CC2)J*S=_m{h^lFcOi+`mUOH*`g&X zad5VZJTp3RWy+~6p0WkTt6Vz%gy5%bVEY=Rn?iD3CA%8UV{dqnKU9MoRA@DRBdpx% z8iVVRUeTYuej;<${qK34oHphFce;C9JDtP2`=tvSb~Lv0SPDaUh5h%3GFP=~6LC#V z=iaa`w2Mu3zK#EnWb15$x?OH9wB}sFcP8oHaFc1Q_{T9hrZ1AMSywwLng0#f4Q$i; zkZkW=uk$$f)A`gmk57PqH<_TFdUc0hsxnkiCi97kT>0lVJ3v3Jm6PLgyz zJI!NQ#AKyAacvvMe^^K^Zy4NAo|OFj z?M3(R0tT^jhRdG|Y!7jPqa28H*~ux02NlYgw1o)pB^g)u+!qFQ!np-1o;?(ntn|zy z76>q$iYc4SQ86J-xvLrx)3mr%ucHCnz2ABcSs)xF_}Nsbq5#aH9!inQ=rkL+hi$9*MS8<~+dLPeU{g|N@{#21h(2qV3S@+D+cRLpiD^d`+pMPuLW19p` zWeCJrG<<}aJQS~E(vG-eMIT57*|H|{Tn;Ag*$$dG#b{wW; z)~clJq&;&lfV&T=M_0UK)yD90rUfNJmNq%PYIP=WT)q3t8v}aidP(>;RpqgoVhN8{zmkuqXBX;k!i%c2zr5#Qeu@zbqRD7l#sZ=e4-S9MjQQm-V8#elwCp{+e%zf`y6e&EH@dMDBrUWK)x z^Pk)w09c!m zF_ps=-+wiD>OcdVU2YU^?^L5@)^D$-yOh_ix9-|YooJxjVcJuVbLA^b830ZMZBwH( zKOPOQSFY&$+C}B4Je&v+U*rzzB1bTcw-UVT;MjE{0PTZEBdMnOnwvS6n~w)(L51+7 zz>55c$1)54q5U%v_pdh}EC_avV>kl_+9h?^Gv8L3KdpdG2|Z5A%VW6IoBvDf@P|6qR__bG1_X8H^9u%} z=e!(4yXC}AR5qChrwkubyp^ORE)P9;{$-7P;HP?ira>cwn!r8R9WWGAnd$r|=} z4bEC*0i0Qa7GzmKkF1qVmKNOFgmr`lbj~|%YDM# z=F$&Ecu;imuL3y_k-bAN@tMN|G4LHX`=jsU+r(#@UNQ$+`!B3d8j_#Lo#gfqi<0NK zKdqm0Bw5NCEvq{|aH7-~Q&hgl)PKw^XsL%Q5F7Eh3{iWe<54~rMtjv)MT@wyGLbH~ zChwOxo~{T}gS9}=)LVge-!470XE}XAg~rbKb2qN;mau8#OQE~7yx8xl5m_C)8mBT1 z92BPD3$LgUgF3msO>ZYH?T+8`jwIQRxfq7~{x7cHGaRlyYWu!IkPt+)=m`M z5z)dZqb5Y}(Q6RVdlJ2iVe~ROqxa6}y)$|lj56jp@ksM28{h+BOPS!9xN7ViXQg zu%Rcl4l>eLeE3cx1XKb4;QS`y+t=^UzK<1K_-(rChbZzcsxjT8G?R%;jH%Zp7Pg}$ zaD{{>`g;xqR+J)3hUp@?tx#z|8K)N)8|B$5z4z; zOD|jlSz|>EVpUrvhIa+rwQ;w7Lyy0;0v=X`-^JBQ^;Wz-xSk(b1U(0QNy=1}T3X=xV zVDg524Q}QVMa}_0PFS@UTEf?uuD{2f2qz?tNXho7JE$n=a@HK`@zQS!L%Jg2gY6*S z0?~NZ#M4vm1k^D;Vl2X^hUFL^pJ)58AtJb#(PxvqOl)0*%FdkoJr?}b*YJ067}8D- zusIx77B%#plC9O+cl3~gRN_-Mnpu1!G#gb7mGq;`_HP8&_HGZ#Z3Cui%j=f*%l=JM zf?nA+&-(UujwVKw5jX3A;f=xHbg*r_l~q~5MQT31Z|KD2e3s-!>@;VcaYx)SK?5<(Zv(iL@g-tOd%Fylg7`Obd;UquqZOLT&AK!6thU}&eo1?1mA5X6g( zp=)*6JDX{HRT#85;3~UB$!k4b3aW{C#_MW_al+~3tkFX)`hKo0EXW@4rH^)wMMR9i zv@-HnAGp+7Ev33v^*JzIuMT|s8l98^cGO%(g_-7Qz7XPem6mxCpm?;AYJdeq;c$0J z-I2V&IjWXm*Q9UJ#^>7t$V_^Aww6{OQ8m2EE7{4u&wM)xMi*k78}xtOvY_hJOWXdI zG;%`QEs*QWN58Jc&W~luNn&y^0;eSscdoioy7R@K|C$z+R#FsofpWEu@Dr$Dhs~mB zIRP%e7$PWc-Z$j1JQ2xIV6a~6CexuDPlwpWt_f{72*0;t7A61DMJ`pkJk22odt;+2 z#^!VTJOudDPdBu*fRp@(Og-?Bq)xTFziaS=Oae($Kpshd@xPw;B$>EBF#9%aNXtF3 zCm<5jLJs#mLr|t&WVUE;Gy80o%Z&+w;KF@E6M_HxTd67xn3$eWKXueMLl6Xa;VQ1c z{AS6i^UyYWO)9VtX6Cc!Jg-+db&e~W7j9Qw-L^%Em6eAfSXTlAc_Dn<)|nol$^1s3 z^l7F<+J(Cj0asPi&LD$f=rDJ72VgspMKiu0b&1)d&kQQJa4%dsn?NwJfB<;9*k@r1ek3Q9U7XBCi)kJexmF}_{@cg7w z%WZ~1{z%^7I)bd^w0GLh-Njf>z~;N)8=&V-&=Cj$0eTSo_*k`f0T|I%h6T!8Jd6$C z&j#9?`<~1=&@ibPe_cKZMLdJlR8HitUG0`t9j#wnSC@-oP3oM_<~Q2Uoa5VUk5(e( z%fDu@1y3>_;cJ`_i%RuU7r?OPu6?DwZccBx=Kp-5x*gfVEBa=Kl+k~6iUakJ`RjeM=iCr_?jqYCC^t+^dUpRQBcE;I{Ht{ohsxJI9tu%@hax z_S|{R5rfG!`%)4-7FQ3(ZKM5NAVRHpI!q&;-3rGpUjoASm7%oy$sj|3hHx zeI#^~m%0aH>7K3T)+~zE_t0Vi6vQW-b>p&arUI|OZa&b>f`61t0Y+ z4_*kCtmFp3z0S^oiSv$h%w#4&U4`bF$*pPuHCDh~m4J)`>XG6aSePY9@gCwH2TpXK2&&f;)QyG)eS_SYIrjtWZl@>jrI%0~P^Z`Ce- z_2%yad8G@vq`d6v&3bcntt}yN@U8)F`?RpC{N?~X^jub$#fCIVgvc1qq7;Ee44*cH zt#s+OnyLctgv<7A^@duS5+Kp&?jtlYKn;}Q;P9_fKI%}4l7n9>f;NZpeqNoO&;fr( zr((h3bwV@sz`Sl`_dk5pBsu%HtED@#)iyWJIWU%D#a1-kb)j@YH|Mp!W|BVX)rwV7 ze}{s%tgJot)tWxq>u9GaKLU%>&+5a*AH23*czjBuS2vK@yRybd4RlN-RGdzG7Q1); z(5Hn-{qbUU?a-|HuS+vo()t3H^m}LljUv^tGz4GfvZVJ{nB_J_1V$YeS)?;N zc;)^l75cDh#eL+DoXYNqq)r>UTrlu|(K!`>tcd29O|CHfdpVzeJ7MPTsV(+5ZZ`r? z*qfA(im{%rPmi%>FKrQ{0;Mm2qtWV?zP2_ddTp%4kj8rX7u?=Rt7%8p!*~)@_1hOJ;16;RT|qca$POB-tDTTd1O9`;yYg>;HJSHb{+UcL#(@NRCHslVm+lTYsg2GA4t`KxmQ z+`giaMx)@AVrU8q9)XEu-!qu^!kIaXNTZ7U{XoMzQE9p1!m-9PJzI`_QX+BeXez(r z^T5ecn7kHL4RA47uSm?f0V5M2GUP8Cd!qrPK@YX2UgYnHhT|q%;6B)r$fWsr)>sr_ ziIbyaU322ccL+2?)vn0v|Hq=8t97bNAJny965|uy;&yzIdR_5d>%HsahGfVNM9_eKgAJ6kgr9gOlRW zqu}N`0D|J>8M+u)Q%;47Z* zu?iHU|3B1exABL;b+u7(vmi}a3&-qz--6U-%Exf@i`#f{AR&m*R5PU4{L6$uLZzpS zL;ltqffVN(>K9F7%2pZ?tY8Qs`z5b90C)zoB~sXhivM^ZCM)``(zMj4&b7z9{JIyRX45^X-Lu)XqHy zn>vaR))8#F63UtvCv=E0l^nzUCK8OuI>Fi=`#7EerrP^eA^1(T`G#r3X?36_+0&z_ z{ihyUMmC!nxpr-PMB>VrRKchEL)Jf%lXl|2lk5m8bkxy#f>h3BQ4xnVjZY^y>?J0{ z{|Fq-s7M1tLT{R=IBtq-pD70#Abbfm>EWhq3z6&FEL%P)wN(4Fb8j_XKLl54%-*51 z{x~pnktJD;H zP^0k}87HC)bBdqTj?xGts4g7yZ-46Qvb6Hiror`ui2K{fOsxd z#(pBjAcl9A1j!m}?zH@s_1XU4(5;R~Ej#rAn|)(H+yW`B=FxMH^o0JU5T63^pEHZte$t4E)4D-i`XO?-1Lwl0basg>%uX4Z*v68TwE9$B0m{)3(?(iiv$< z{fYy69d4^o2Wcz>$M_7?6Kv+lnr{Cfkb|`?8cOM&Iq@xkBPjQEay&BYW~}lxn>|#D zEoISM2b!nveRZ0?(QY(}p>v{VI37bG;<(r)`^VUQD@8 zxe<+R-7YErw>5Lau7CkqKh~H9#itoNLH|ip9^wlCFtSKRd!y8GZ0>FQvN*j3fEDkW zU7qP670fL;_}2Bciv%AfCqoVXp7qRBHo@@iQNi%{vG0GY)3p7!?FxZgu89)X%5;zt z1gN?BF{;E(lMIcx0_qa;P5SON#a9{#0&-4m8_5Qq4ezIkwR_$cigQ9u41;~N3Lwmoz-PuSU@-i_<$Zea{2-V#9u{_+Ffh~vf^BIF+l>E91)Qbh zHn0q~Sl%T#@;-vxi=QUpaNE}rm-x(U;I<;mLG${7{{KeP0oLvRvgy9eqC53$ebyc! z@RiDPC`b>gRL=vLe0$Umzl<1U0v zH=zHar$dq7-YWY#?a#lDz||XD#onF&9Y$$t5L#@u<(F9B9S}5xexg4#)E>-I-{&l@ z1xx3E)GF&ZM>Iqnw&5io_TETk#ce{GXuHVtG9` zX+?pu>63-{P+eSALcfhR{O#d)lR#tM(W%D*$=?m%{J8>1cYSr*Gzv^m2zXoFIyV_% z)2E27|2y9pJ<)MQsM+P!3k0cz15c60-&%cxaLZG_M;d_w1QlN~WiR={`Y6ET&ux z{d`{D^n6uX2XA(#Fv>lj^4HTm&95SO`0FzUKtc8a$Y^A39hkDZBiaB@-tXzZRHP=g z1sJoM!XwXeeG;xdj!U1o^*~!D0hLGdu=4;GUrf) zIMOZlXog@Giiiy3o-ccUTP){-ek`4=@@ua(8$Bck6u%^pCp;EhwlH(x0)fcg-#t+k zr*Amt;3X+ElcY0yW&_dZ@pd8f|Abh;>l#vCZMzcgF>YX=oQs_{DSF@#dS zZa|nIfOJyh%ZkjSJ0m8Lx7E8=tJ|brqze27P0rNra&mIH^j2N!dO^Av7>&VNAWmV= z7k%@D#-{EJ&HJGE1uBUvcku$>nTSr7NsUI;%_z$Puqa6>x-UAY4k#Bmkwj36VF5Ke z-{jeQhMV`Bs%g?hi!P4E$d`wk{A(H$5BmuTG}vRS{UL$LvOcz#i$(yQPk#i_z8;9W zGx7u4IhmH?rd?Nac(uJn!MVLc*u(P1<@3fJ^GmnN?`OdstYKp-A!x~x8vEMP?Dxc2 zk`7un(ZFuAwI0M zUgwyhrV>?R9`_V|Rmw8;dg~_&$cJ(@I83~zFW%v$3gA6EVA0Hny@P=9D!`Xq*JY1- zy>umUH%%H?ku98y>hgf-uY~cW6+XMXx`mJ3>HZp*z<4`RT>SHQ%fKCP1p;@lr%6uJ z0^d|BrO!`KUP#}k1g$!2B(+tb<7y<03Ey+^CHjH})VAiA*ZORt~gE2q>SrW0g3wwUwn3;S8v4#m;g z^sH6sXW2Cx97;iVU0ClXFs4!CWD8@TX(TSYWlyMR4xD>3AxBE`JN`nEX`!7Qc2Pk( zGl?9%bH4!TeQYBFRB_Uw!$^PAr*1Z4)2M>6XE?sTNZRX*byI;SJw?jy#IVGNX17P! z69orS%wMYK4FZx@2Vm%R!>pbyH5FJ^GJ3}jO8$EJd{_X|+8yNatkK0tYq|W%TX(M0 zX5pTl)^1Ta6BZVq9`aIdsnmcTx znxspXq|P3e7*Kf6Th&m_mayMaXWJno^-oZ%X@Anq&&j>)>flQI1>mm7U)8qieGNl3 zgx~c7#~n2LtI$=?@DQDW6*H=4GzO{giOEBi9xcXk0*ET&2@G2Mm<=fgAX3O3H`wi;AuKdjt?->tb+>_1k+J#jqfa0wq+_TXWJgX^|G2UaVWXP&5E(zAAaYv_Xzwgqlbi%qK3mV zFf7w>-9Vr+Bab(-!ko}37g+uN`_*Hqf4le0=oF(&pfqc_@pX|lR)$+S_uUHfT1#EUr~y#IqXmkYl6 zZM4%^`$WOA(n)z=gK|^Pr+f$J<81%p$DriXvvd0QAFJ-% z8eYLdsb{HPcc`>f@tr(YX6_q&;hN`9+wai2%_edfR&9Q->r$(-qpbv`)i@d^NT_}B z%*rI6yKH4_(CJ*@Mmz1)zp=(jyW9Jtrvr1Y1Z8cSlib8v!uYx|f}q>svQ$zYJ~mnY zvrjY$R8rVpa&YT6?RjQxR@x;=^@FRC?=|B$$M$oUaPL))6n7m01iMb<;$?@$Y_$3m zeQjDZ|DrAgB|Y2pwLW*JyNcnNRIjM~k!Q#muMox+iPews3fRH%O82g}RFj4NJNTW` z?&0`9@nFS;_oe)TTr&Y$+#HurjK3m^fww5;n)Y8$Lw{jX`|&}xUGB*lOUyk53vo%S zXiJKg_SxtFs?mhhSHr|JdH`@b~Q-R?8z+&e6T+Ynvy8sfk`h7AG`7$Lx z^4DPT?<){{>;tVJ%%(;osVpjLx?FOB)dex9pJ(;(Bf?p`@Nmy5-@s`MV~3L)eyfny zenW3{!C9_n0mak9pb~js`#MbiF8#BLDQ^@z)?goPDfB{w!VN#TjRgQtF!9Z94!n## zKx_K=ob#(F{fP!WW7gBx+5#aJc-5bZrpUBvFPxKLPzl2FjeViey_i! z8)kzubFZ(ot}p7+ev|n%EE;XR=bqYi9kVqDM*n-0qjmEOGB z)WwQls6!pG(lG-UgwNsGB>^sil7Gf<1=WlGad6KF|AVgFG1fU#qOi50y+}%zX#7Ao zGJo|EfLNT5V00RN*TqO4fi;m|p3zoBA>H>A9xyYK-bvoYjKrV3GT)|mhx!~kP4i~W zPHx5Pu@?C{UFpG`G&#}r1*?y0QbvUSJZUL9y!jEK&8weYm-3}7n+R+2IV@M3hfjA= zW~=v-GJDY%JKnR_3-wI=YTpt;P?NLmdEwyn?2WC|%g0Xo^RJlK=H|O&vv7_zb=nsS zBj_GJXP*8dYgyDhZWJ{eG+Wusozmv=n1Dzx;=Z9#&}79Y%S`D{Y%fy;6;WE=jRbsY z*ZNgArjlh?in(;YGpMMODoa0yXDKcgdtO?n&V?<tOoH@ z@j51o-AC5BzY>h3G6l8jw)>;X?98?_kOiV2|6mvZYR+nA(WiDp!>U&R^5s=0K-%9a z$?cS+6M#G`_2)pXXtee?Q(c4o*AxfqT#2EK;o_F~+Pf~F2n}uxF@eCddEeXc07(dk zh$Q7M*kqH^vGwroL#+`C#K1wMF zyTucJ>g0||1bDHhWOWnOvZl_+sXgI?NSE@PbtLk2mc>?Ni?`L5POq#8`m4Ez$WCTd z0_fcrn2tfjU!BM6QTst6+PZDXvkEG_*0BUDBse+&GF5`-=XfC6spG zOto{k2u6H)dey6zI2o0$?MK5a*(#Mh5bX_5XR!sa40_HdvlZ`&GtRGL0;C916Y6Kf z($A;t5A$v{Tz?EiIsPimbw28&Iqdnyesmkx_x0z@!5`d;vXWP?30)zxA$$Ap_?1D} zZ^f0lQ!X>dLOQ53+Q8rGtxfuf?N8rt?X}M=$I@DzFfSkOZ;3q{JL0@_EC70PkP0Iu z+rri_D`l7{>dstp_aU!Wx7!sh=gcz=m+R_JSBVrRBuHRIwl`~N!Dk|}OYqJ}&%Zei zq03XJb8~kY1{``wKMT7%j_>5ATJ}A2-ly@+_$kmkSt)0@$Tr16k$5i+Gl**~LOs>J z9X%9CN6+%4_s}W*q*wH#uS1}9J*Qu@M`w%VMRsZJ&OM<9H_^d7s)SZrj%+ji6baGO zV^R6a1Lh>Wq4c80SE_VMU&{4ijH%x`dZ6U`CtS`z1v2mo>8qmAlkaD-(2{nu`vqt+ zpv4@=@C%>^?Yw)IQyL48@%o{5gX%caOO zJaamaUNlrVP&=4TD*)efZ5U=*32ar0N+(L*o095s<~|1~p`lwJRx>BobZhFr;7r~m zNCCj7IIjn2i6Odo&Bb?~4C(?Mc1zX`&x&CtzT@kzhi1cEu-}=_76Ui!F!VOcHs60kGT9MrpcGFV>vL|=5Kf@lGB#oD`uH@e{gnu5U+Z)j)NMju>zZ5%MEgYyhsg*2c z(OmF!1tK9FUd7iFC*uHZKhN<|Va&=KOUE9*;19-at@Ccn{+H-WnG`PLLNTK5cKx?@ z?|_N(OMr#x4mJXH=Oz2^V*iDwS$y@}U8&VRx7wnf?c}st_Lb5P-7LPQ@V$OIMFv%y z!hOF|%{q}QcQQA~ZsJm#V+2$DaB5NNnQE(dd2C_st%;o@@UfMWyQHJvOw;1t_GfDs zNS-dvoQI)`*ZDC6lyWCzPdX~GkqlE$IFyp(ac`^wJNbib8)e(Sn8m-oA7ahPKESy4 zb6;4^oYjh?a6y6{ElCDgts+`1aiF_K&U-`CZrjI!P`nxrgNAhHh@Ic_K$iqkJpYV( zufv^KK|VBhTI9nucDl7l<_e#qJa@j;F#U*}dN-*nS%4E0mv)I0XK*VU@z0sn-_~#1 zh6JKDKIV@v_atvzUC6k|o|TTMhW(;`it1)fxU^s6H(U-*s$<_5@S2IeNK1Ml=X>w4 zV`Lusw|=?B!2Ig=?jC}vQ)UZn!ON}%+IwE*s6eq#lch3CAvZTqHz_rxeirU* zfE0>Jr<_W#-WWv==TSlEwOm0w+TO_N^rKtFKEQenujXp5ji>c&6x;46DIC;loN}g( zzRT-=r0#yQkoRtHs25Npae{_I@rEAgc~)|?VuRj>7ok~chUT6E@}?ohZnIKqE;$}_ zxok!$k>Mu*r8Ko&zOT?`0t(bQv>;h4DNu~+Ms65&p|;{_AHGZ*|5JLvsILcM4!x8= z2b!WEX7O1kqpZBDHodB zQrLLrdW^W;o>V~WXrn<(Dbg}5TmXu4)JrForu>L*~cR zUX^6C^{S~D$uAl)zCzXKb$aOh&|&}wKb+R0jHfZcw+4@2Z4A@-+~7)m`!FZ?`q<-x z&*P@FTZkpqD#kWsQ4DqD@(mLOVjq`?_bvu^is-eDUw_l3W>|}>6~{y!0Pj;KA>Y4X zC&t4NAsZOx+APlT90vG-?f}K*-jGEnV^_7FbI>TMsapU)0^dAA!?Bq;*wv;_S}z-P zAJQUwxTs6OsZD-G6B44aO;IrSR{Q+{#Z4@iPhzBAe~$&r?6jKTTYS`7WagHag*pQJ z_N?pgmPpp$osRwCrdVwEsC&qa0CIXx5z;LeSzq;xa0i%1Fss&IV2xb7xJr|KiB^m z5wnxAwq41rPYRo@O@xxOUJNXnf}3~Zvqn?nC4KShFcsNz_ofVacSXt{*fBh~CK5}h zx^%W3NTL2z>~+RZRT&zapn2@9)|YLYt$_%2S|}BSehm)-CLQ3IT_R&l4!^A$KsxDU z@3QGnKY5ZZC2#9lQw>r2^E;GVUMc>u&_**)JTz>80{*g)9&x;2N;-lf`YyHVT*NXP*}alQsH;7j0^lVDBOt05+O2H0m54dFzT< zfS{oiiHXAGJ61|}aApZmdbQ4fXOAX{FTT9+W#_q?7OG}t}gQJ1+wOk)2uJV*Mi8>o=z9k$L4hb~vUcEm@O z#H>0O5ea)6j4K^wsy&G|CH5BGxA!*)rE0OpNUGPVHzAzsVY9N@~9y!S{Ff@)eXR1)+v75i;U`r~pQEmEG zurmVuR6xml92p}OKbB>OD`11nGd9ib+gaMrt~vT;QGT-B1Noq#g)Zb~WixQk6gr>NZNG zP{zFl@a19_bcRdgmfb&}a&`a$ih;m&b^7h%)-;e}tJ2T)WV6H(B}Ql-er5Iz$n^rY zh9Lkdv1KPZ{9J>5sRJi|C5NcXpg@OO$TgkIm6oDT%<^|a?Bd3R$|0k+zHht#b;f$c zRfFuYxG;~W%=pKzrcj){Y?jYFV!Fh*dtiI;EOb+o6jlY|Z+#I`GeIhRm@MXm*RSRr zZ*)EqSC)DBLI6we!pJPeRqK@1Q8Cx9rQP$-4$Hx7JTGO=2Sll8anxT)VOO`^_+Yoa zrgUH=J3R19GzEycA8dGLOCtirC|lY~uiyEc`yUrd^1VWv2Ct7;ym?8VpnrT!48TI| z7R@iqTog|8^UhIG-jw5v5D0KCe>lk@p+YRZL1fBmaa#A7Tc~udDw$>dWahm}_|0(R z9S1+gZpoNZUSxpC5@_IH=*1qo7{zg`9K6zxb2X|iz@;GLS7oY;1>PXxpE*O}rZtCe zKu=v=Pg4{7*u+)ICvF;O0$bofa!wWymtoln28Lb1fEK}#)(C=qIhk*dsCmAcy|Uu$ zS?0d(-+HRDaj z#c7&aKdY-Zv5D(Hb$At$`#G8VV&G2zPT*C{I;)JNp1JvCMFBfO(h4#q((yy-()+4a zU?gO6rHvc8mjS-#E-5jq8_PO`2?Z=crXqWYZ?ZhMRyFs-aH=wdY&()`ifpz4?lI*hcDuLsxLCvO zGj)3lL-r&br0cUqstbCW>2HT#V^J_sC(+XSG`gW%0hy{WeDQ7r!(*HiZqGb|PBZ;L zlsAoqRJK+z$hmLK-HG(J;!&L81?bEvD*PHBjwuEFd7P}UH47sf<*>QAycYqyi=Q~> z$ErDpu_oX^hXdB!3~gOud+Z9V07u zN)qiLE5PBmw9Fqh_>8IeRcJ(SzZKg;bGp)X9u(68LxK^*ri)Fx9rL`{mrs75rs`yS z{d}N}SIyBXu!kiD(Y;E_y8o^34KMGih)JKhwJW56{`E?gt(!CAG1arhL=n34>LfqN zP%6u;UF4(G&g90+2f5-$VRNcm#rbB&?@$76?(3@Jyf#embCm=N=6 zV#On4QEA?(-)ctCP781q2-t$8VUc^&Ws5-14zat^1UlI6a2ZLB6gpWKjR_vq%}m-H zZOP5KSiwK?&cd7U2EYEHEh8|Iz$(+DdA~yw;V2kar60_%28?;x*ru$MkvqMvJ<`p7cC0q!K;~yK1QFAZu_K zimJ)IB%6-f@LkmZIq1!&L7X=!tmGqchLkMMt{vf90y1%3c4XANT7zMLmF6r_8AunZ znWrCzCPvcfc&b8N2t^k1-s-Qd;i75)zYTc+D6YoCS}4(*?CoE8Fkbb~gQ@t=SKSV) zY5m(ZCEF~=Cjns(RA6H<4f_5ap%r76D=C%W@{}~{?oarPMoO?h*G^B6IyY9}z~M2& zS7yJqVav&^slJZIkHGT!hBP+Z-R(EnW(7>5BzdN_@8De%3J-aw#E`9~htRO{(4`Sf z<7L=`?^;8Bt%v;z$AT>>I9y(+9ZLkZq^FC(4r6UizbA@VWIw%%mP?1ABm2@T#fLTB zK1zk=1$^`1*tSC#u9q*T$WlbX9R@hl8QJ#E)D?B{NXNV&gDFw!^_8*%)cIg3`$6UR z!8R<$j9K=8*7v2OdIU26>oK1~xc;wju$T1fy}(a zr)D2yUCx3h38Ok#!X5;-g$R_4%(<$Z3e>W@&b&r0`dRc6?#f5!1nwNr3KJS`{MbMA zY}>Qp*$ih*aJ}X4mq21qOYTHm`^*nFRm)hlKb$!@$3Y*=fVNsv;wO4tN|SkWR|*P~ z0fp(^LrkCd;dsf+q@9a9*M{>&@6& z$KUv6)VXH?eKeQr4dKmoj*O79#nQW|pJ0o}+7<5a2~18J=&9P#5L(Fihvpx<1tYh9 z731&UYy{dI%}%17^fMKYUBoAyM4!Ms^YXn<#Wu4v6OP90tug$^pI$TfNa!64pmqdC zgi_FqA2O(|0c6y%_g8dE(z7dOR#s9P_9FrLg%TCT;@MXqf-F0^RCG_d3Si=6S~w@O z^XYvu$K`71H9nw!$agzaPAYZXfM23=L8e=6_P_ff{xyuvG>{hQb~Y_Dk;J6SUNMnf zu**0&1A+mhz|p0Hoy}==hJqo_;7#)W*^#fI9)Nt%Q0%o_6j1J%F)F2T^R~ST7ATMm z!;{D|(F|>kBvC^Jc9WNTs^rs7-}F?p=~8AscIaY48fy0-;b@etk#sH6 z!F$Ba93alNX5CV}T9&HqyR@~LxYcy`^e3FLh;|9C)8Klpq@zm05$>wV)j+4|#@t}vnDyT7A$lAi?vUbB+?t2jMF8rr;<%@ja?7!);A z%FS{r`{wO0!aUT)JHs6Q3S6%FIEu=fu?DRyJZQ@VM{lPS=bJN{+a=a5PgzC87-2o; zSDv3`djIen(AFfsV_{wWa>JeB!v1qmwbJ=FgAwB|KroZum?v*aV401-oD5kn%MfP8 zi#8oRzsD1-BLl!?d5m}ObxQgRdfPC(=|6KT3kj_(n~niVeXecX zW#~M~^}I4>oj)IooQi2AbbmveY)N(fIaXw7l_I(MznkaGo6#v|&6%@lACP4FK^~{& z-(0T`55#pg&ESny$Wz9%?<2D!?~}<~dS4A(E?vBJbS`(WF=BHq{Vf&0pHzwHSlHDA z2G|+vZh#CJisztAKBsH~cC*6qBmFeb3- zlz_lPupagAi_-|d=%(=`oY-{a~h0S@DK(OIW* zpZ2Ed$35v;=$~4cwU!pF|G9_=I{AePoU%3-@Fcq|Z>DJjnK!?qfNID|tg*~85N$iF z4X}=jbXwg?z8~;+h-y!lIN-0)>L!gWcqcBNS^Hl+L$hC^_@}*14t@aWs(=&`#?3>1DU%(_S}aF0+(EQ;A#MC~?z~C(L@iBS4AEhuMbML>ffddymn+00oIy^RHP>)yg1H6JAVQ! z;us+L>8{aK{ko6K5xgWLp5SiZdM~+QVJmu8^z$hbWLqeB#V&CS5TUaW{uDh#HIKWsAYVm2T>1NB`7@KR#`k9#nsaq* z-fOkwT{K|rnUTHqjG^F%XrGc-RyVj(xD<*8za@5xtv*S%$9$S1Eqr>G-mM>cGbc;$ z1vJaUuG$1C;|hyZwc-o)duG?SmoC<4>)puFs&`7`dFek`v>r|Pa5-ncCe}wWPjJoy zq_DD?$x?nIWu5^7X3`>9cr8L8B<18$3%DpdR2HpUQdp6PQ@R1s(dMm1sVZ;g2&eS! zd}LM>rmH+i(7Y1koO9 zB*35Au{)d0^w*~Q#c&YAIeG=pvQS{_G9s>v?Zd+JY)CW=W&_GoqLCw|vE3rL#QPpq4bFwmg=))q!Nn)t})} zp?w#+lm)sn0HX2x!q#gET*|0Qu3SDI?FWsnD0zH|mu+Xw`~wf#n#4SP7;JT_;b3-Z z7paSJ|CtZ4m*?)Bhn@Y;?~d5k;BV=|joUEzuZog=V2?Dg$4=?XhE>euzIa;T!43J@ z!)N|IbwoFTPW}TyF5r|k_}bS>#l{>kcKxlXg`Z{3(szNLCvJXeg-MK_$m(xbB-VzM zwE;&~o?I=V{<-$LryO=&GjHP2%Hk6yFH=7IB|LC$*CL}Dw`Xi)Q*QFD!!I4tcSO0) zGWZ=V1BI&_YPaO@%53)N{p8SBj9ae6tr*Js;rxyB^ycj0-W3gq+o-b*-g9-0iXcol zYsa59IBqd@<{wRRA93v&SfEHeLd}+Me7_?+w?kaNuV49Op(rtKs&IZ_iyg3{4e^4j zmZvoLlis)S9kJVo8{q|KFh{{XR95htVtq?2QX8-A8i&(3tT% zKA&dWm#|-4Ik^t?_aC%}6%vhCh^$4n>5g}H(C>MEtk4as<^pd^{!mKOyIAbnOK2eU zwYmtjX0bF>D7`E+Pa~=<$z05J;QVbth?cxRLYn?Omm(~7^P6!=^*@0faXeKy&OemLl_evcG2wIR#DUg1WE^Vq2(;bVHez|hq6I4KM{IrllFJgq^@nRA z(hh!0uxZj1X=f&bo9+Xiqy5rBDF$Ny6qXhvhNWA{P~&16x)b;$%H6cD8`(Eu5zPpB z86+mQ>MVX%eR**?Yg3aqFnd?hB2q-T(Tf$RbRMRiQF<-F(?W7LiL6U!nq#bD2!Oq(1`#yLOBHN$wt_G|Dqp=M~W2&c3HGw<#;^${b4vHE+ zoMEAbs7k<@6a%d8XuvQH!k!8Fn9|oy11;yuH)mk%^bbS59Y^E+8@DD9@(^Q?3uZp4 zX}B;n^9C0!PB5Ud&cDChinWzLPl{X_6=ppg7hWM!#KQ#=nICdm)s0l|`Apjxh0S%8 z)=C^Za;?mnsSL^|%Vi=t82&?4%JrX;$+##ebgI;2omaS(;8avkm(A+FP%Iw^WsJI& zASS8{jVf@9b;$G2*luQ|+)L-$C7vOs=ZHS*AjacTQO}rSCnyz{4^)zm8d9 z-1g;{@|k=tJr7I4;;kl{Y_YXIM|VXQu%(~n+I@|s=-G%tLZ*O5A>z~a5*&|8_fLe} zKVeR*l}QhQJ$wx?&vu%`v$++vcC-4_3wx~?4)<6155av*rUN4UNV%g@zY5pPXNeV=9gK^(xDZd!+P!e5N7|>njwR$Y=Wc zEw-oF=7Wq=>xi<3<3K)&n+tZg@Sznr2=~46qbvhI6cnjcle=)tm*D(;z{*GzQJ$j4 z=7@X?#eCu$36|EhI_1!h$ccrF?mOc|o+dQi6}QsKtds+{J4_Cce|c3!^*0_)QgrpdrRnkNOb%dJSUytHDg z^p7dOaxvo0MJ_MyD^1#%xOSIcq()pUhi$bis2MJ~qLK zgkg|DsHyd;1r#L8y;$`gw{Y124oIn;t)vG;GGm&VQIieI++~XQ2|VjVno>uoi%F0KIWnb}Xh__xZ2RB+D)Q+h8sLJa zp-w5hK~UpVDsKi)S^}Ty+5g!6c~)Z7r2)IMfgNaIxeU)Hfy`Fgz(i(ebY&7SDCvf! z0QHj$AdFYf;s!B2R=rI$CIqrnW@-BUX5F8vPvY@bg;kH^KkWapKC52F%}qiW$N9e8 zTq**!nxn(G$+>xvWyk`=uaUvo8(b!cyiUfc5o7ujF8-GH1VI0Ih`XG5(qGP5lovnP ze;4&AtId~T^J^0o&qN-`XonxDHJT>)m0@)%z^!@Lva$}zEOsCm3MZ_o6Eu;VU#EQ;StUxRosYfGi;=2Ejzc%_v>%MW!X!K-a! zqV%S&BN7b$&N>rOgho#ot(d6{W6fP>^c`QHcBVf?Y+-XG)Tp6OzG25#IpFR|a#WVd zD|=JedS8A_RNIrt`U`}o;j+XxC?48{y@L=g-hMgAi?6sentcznuvw&GvU@8uo&cQw zZy>=h25(F2Qcn#2e1wbd@Jso3F@HDHA@F9v16pgL;Rz zLD+}%t3?yzzqZC8=t*s+J_A=Jfvj{~JGU~*!6xz3SgsZ#k9Q@3JMmDQECsd?iU_PJ zxU>CpHD-O}^M{20UwQ8t)@0MQjmCls2;72*QbnamReG<|d+#7Z5D2}58iJyrNUzeR z_fA45BGP*ap@$9u0#ZT`?07%#v-h#TAK(7|@BB%QD|5}PX=~24X4W}e{s7Jbxv;d; zMLxo=rB4(desN%+#d) zDnt2jS*3q0`3^B&>a&AM?*3jh&cPI~R2PdbeE#7&fIwXN<*z>)ePkLGp__cr@Na+q zL6)gR`0<_N1VT)(hI0svGyiuUGcrt}5FGc*lVF(Vn<6$n~S01BOmT^aEpDeHhENw|=fW z$Qo;U@&Gw53M;}7WOKhFSS4X|NR!B<;V!0=WyE`EzJC+y4)mK5-16%=_fc_6^p!8O z<*6q-WxQOW#$7r$1rU-1c~+*#r`)=BHo^e$aUJelOxm;VIz(~l=o5+i0& z7m<((3dZd}Pb6HpgD2X;7BNjh;8LOt$`GChx$~HIc9e+&$IwHR8c@^0rrAg^To9&!mPEW>^6Cjt7l3qW?1sKYS^4Ai4@9Z~xTH z(3RY($TWN53GNo-1xBtVNsyk*^v-0K)c(ScFDPOkiTS&eHiPZtp6-XlkLS0ai;$Z8 zZtt@^{=WZc?wu?j!Lyq*_jZ&`$mHSm(cq)!sxPCzssFt|CP#@+A1hFl_3_PA%d>q-V#!Xe?PD^<&W<(Evc(uLE{g1r;0l5+kvw#_&7F6Q^wzaEX8#Y3)Uvb zXW>=dy6n-!kvpQNM!9FoTvMObL6o#J49YN*5s}f*c&Fae(+^fiZrEe}3otoIc9Tcz zfkzW9-+by_%OyT?$YM!{^r40v3os2h~2a@~N zn@O(>HV+^49kzTM0s+=zh?SO?pHime!}$Ae)&jTPI(u*} zI0}53yR9m+aU6%*CV}ucepOQpwF_3W*+`n=YbOu}Fcb?Gx?d>$#~-!?JFC+k2$O8P zT&@{5`Vn=r(kCU~bmXQ2QY0$fhKoW&Lz<*;{DZs_gYWf;*nt!kavamu)bHkapt%FS z3$D|EPd&e>*`7^Q_AU%r1gTkD} zeV~?gOyab#(+fdX&x9k0Vu`55FT*{!+R<8Vs4!YX@iAD<0#(ZEh(}eSX*Elx>NAo# z4aUx+Syv%@+b@LeRVRZOb?Odb_w_q0(skgNIz#RpXb>u~XjI!OB;de=%odSzxdQVf zNY5~wnNJ{NuoYS!czKY0E~p!FMZgCHiot)mD&AdW_jsp%oN{8Hij~9X#Xo{48Qiul zA1f=u+=~c+Pdb!l{mn}@?B(0NMi`(MM%R0a;UBV@@$S~=ai2n)CVqH40dJMbW@jf$T z5tJLQ&1FIQi=@@^q5WEsq0p-TqPy9M5VJ{A*YT&K538nH#-u zp}QiJZI<8CB0tJ7T?`j5A4CDY(Ata|AasEgA3uk5m*uy=x2OI+oysP7uZ)81q~pxh zx}$Y`G#hh6Fo!cvFYp-_+&I!?$!t0~79#XF#;w;vxc7BC!deG%@n8;yBM{&rOOj|L zw=v!I>sw3qZIAv*&Vvhjc@~7++u_(@Ycs1Zbyc}+KcK$e4K2-*Fu>hxX&?0`7UEej zDyWa+H{C>>EM8-1rH;T1utB(bs7^$kZ=7x zV3dQ8ot9Z*vbfo<9rs+|6=n0L;&UNF1&V7m=RfIeTzw^Na#czenl|1ZYNtU625X43 z_ig&Ct@0qC4~2}fia|;56hcDp{+U01V*;nI>Cb>hukpI_kpKA6knk|!kv-aCklkcR zEa`J3k3w6?)?O^~-V;!LV#_Vtirh zRAUVLIg(A}oP+P18DX&2a?Y;g<{lf8QZX!@%SKs!^malR!0O^Nhd)N6@;lZ^=hi=j z9#%a=5PN8Uazg9SA2(|A_J$eu5{sC>?QDvSoZ!AiTc{wt_I0GL?oEvOY+P`B8Kx*B zF2YM>q|5Xc?$=}jW8YZeedzHyZ9{tY32)7hJxRDaF8D5ihbBxb^=qMKMhGwCxul~V z>;zW+>itP%!%1T8{$koj-?2$@`ll3YJ-_j~JvQ0^;s;fOI%6;F2?+6ks`=v1HyD9o zoDluR8oNT`bhI+UXn37y*r}-8J{CVG(u$K6d63e0bhnJK6VDcE5Ixn!?efVe37O;^ zOFvk*;dtWI?iq0B9Z*(p3far0AnvQHd-GUaEr(GNFJ)XylE0 zc&Fg-aSZmVLsJp0X4ZE+U}V%UW^AIhOB3NzNYG4g_MZ;@^0yOMwYpE>BmP&L#(iwI zPjW6VHXZdYF27q7MzYg0L@J@sL^xuNk8F#Z){u55QL8g{w=1`$msI>HfZmWzs{bcm z2y{|PR46zWUNvm=WsVYjLTx2`yR+T#=?(94V!rPW#feB+urIjUDy5;*r9M&FdERBW zT%AJxhFp1^J)c+oPD_8*D5`VQHQNCtMu;eM*4xx<{a3(Y&6zWue?!;3x`BjSUTiEZi42e_lcTMnNFGB2|B(=o4&;qRA~FK=SfP&>eYMR z(TpMEhHmsEP|Nd~Cj)2w?*-zw+o^aZu%eJ`zQyXjW%fT-``9Edf2={)Bs5Y3=+ z^Yjy%A|9Je-Al@4r*z&!yxhP$| zG-i`0`x@cGSWp7(A!~i-rwL<|*BdPy(u#I0Cb^npnk97Gwh;Mp36XO_;YlhwW1RVP z^e-5EJcgD|ABjXiVgEcoqXPe0;zvBYah_u`wpZrj!oJXn{#&E6^!tr=%NQ5-t*<3~ zXR>RX4dj5}$$!2k1@%9_9)$NglNR2RA$edp{zgMSmkd&!D33iB5K=~wvkC~=^$oR- zR)Otw#@9>Ur@3Qma})cduzai6I6p(F3Qjj03Da_h)Y;xly0j-GN~x z@$W4fZaWI8?=VeJ=0a`Fey%!MK|@1jCaUX#Mp(8c2VLq_i$)H}MO;3pF)(5^G~P(H zIdi(Vg{gZuU?BRg8o0TjT{;EiA}POaF#i~J0}!JMXQd@?HiL@n!#|JDOWb%$eXrC( zF0IFL(=jXd31%i`sE}|a%q?ApLs}N;W}TtEX5KMK*XpQ-xX0Q`s?#ho?aUinXEy0g z&%C!Qxe)Lpd!sbz;TveE0xx@AA&@S6T= zWn-iN2vt#c1G#6K<5_uJK>-LpdOIwR?Ash|$cK|W{Y((!neg*7T(GCP0w3Er>4z*) zm*n&r&t&WgwmjPPnc4S7`5?q0$MmVnU18Rer^BUzE;Fd7pQn41HVtH33z{|7QXMkX zm2uFIise%b_m*5~~BnoQK};^=cO!Eygau>-VsSJhT&YT)4DJTlmhQ zm^b$81=cmYYb!ODne_I+ zZl}LK8#=HBrqEG};=e&wi?{eHm~H@+IfJ7CW zbor%6(@+fnGLkA?D9jtHhjJ*`*!r`9i zHcP8DZJ>J7nt2n*ygPR-Sw&K1E{4uEYka+r3N4P2z`la=vsg!43maqjq+s9V3!1h0 z7IA;Uc_%84( zm1HV9lm2IBSZZ+k7>LdQYkyVv-qCUJA}QrF6V&?&G0mf>@xrHPuH{`@|BTZc8wr{u z;xd6oJ|O@g92hWJ|P807hp};ogreR zrj1qTbjS2GZY*EE{9zO48F*;MZWw=g7W!@V8tiNB?kcYwwT&N@MaQY0?<9}4;rMn= zO&jyjQW)E$@$?na?L3fO67dh0Hy>?ju3E3EnGZtxMm?_kmJUKRa-F3C0Fm445L8>% zc6r-fruJ8WJW4|CAHqS({=ivge2%oYi;KP zC{JlaayL^0gA0Mp*08C8cLw92LFv4WCe|}hM+J%m*fH6D$}-DAKXGT4)kbB>bZQ=s;zP~*s?y*M(M)1wL!N4Q-`Sr zuk+}BPh-wdFVwtJwW6z$A565m0r<&0x)<}DdhGkITj+mOXpV2NkII52Yuzz_HqLka z%;@GW&vw4mCS>~gXVw9sz7#yTPq1SlewZ4Y_-)u6w3llLnNr&VdFGW8=MZ6O2c27w zc;lx?zc@#0pKqO3y9lfwiEUawM&~;pT9wYQPN{lC+S3J+q zQSM^?aV&Y<^VY!?Z*KUfliqGhNDD0#w)vQ8MmxXiprZo=kAo+YbJ{J|lF;>Y31JOdE}QFGpb@QQvq#$Htb*rU z%m%Lkx~852axr|cMc7@jf?z9{J#fNWp7PwDL3>IAVMM}jxr{F~dWXJRCUB%w9tHlw z`332D$8XD*WI)GqYYJJ<#n_=M=sfg$v1aBvgA7s+;B3VyLe#!lw^3RgKAd#f;w0?l zn{)-t4aARm2z#xD-rr`EhF~~^Zv)D|^=;`?D_LF`B${9SX+4;TuAol2Xmaw zhksb^7$iE;o1(pwhr$VkuKFS3W+2?+lYk$<7EcY_L&kt@pDpjv7`_DXv%PbCX6>4ks;?kVgQ?q74QclS& zO{*`&oe{lpW}x`Jg9>8m=9#Xlog&byVRkMip&hWzii7ur>twT7iyrO3tXYojN?;b5 zchX5mRzr4?oWsU>Y6%5fmQ_&Cg^*Zv$yT%bGR-PtH~P9aP0#cF3o@up+iUIJ%l%Fl z_C}@!ys+QS2f|OwiWdk~k09A7uJhleEsQ-^GybCaEBA#T?p<~B#E%LKmXGX~7sr!| zUC)tq1~_0N$6Ae!NJ;D3q_Df7<8PrFFrNk~K$e|gz;=`z&^u%<@F8a@OUHSy4KJCG zJYb}$sEm;X7CYHC9XETamB`3Xc7Chqb=$R0F;noBfA@uV3RmSbl$UgFAUCgph5H!vs}Z84kX3WwW(#0!ngw6DWe=V)0n((+-)C{^XAI0w3nmw%+Ay>ZcjJVe8KAK zc#M|NG;b4;n8V7L8}j~5%)>0LL3Rrf^D+_JY(cUW2plP)eGT@UV$*m7hR8ZEmJPBJ zQC6oTWC{@MP`d~2GzmEed*a3ioqlD>hw~U#5kRdb9~$cI{Ti0yXpRpwd3`+yjc@Br zub)YBA9D0}INT^}Ym@DQxL07tPlzEQ;x)6`9q0YSMP)aYA8?c^DP)^pu8H}1bXBg! zh6S-grLW(KPHQ-D7m{IsV7u{ozrA2}jAet8*s5=~3EH0|u7zUP#Xg3i;9LI5;PBD< zG+T!_o1o!__7T0&^-Z-Dl}To+IK;lAwt?g?jkLmDQa@(V(Y$W8wX*&wA9)=bs|`U< zLxv8G;a2?IkCoMlcEUWty9^Fi@NzsPVGt-PiN6Dm=MVEoZX5Hq32VAQx%{Y>lkOM@ z9;a#W9(eUzYzPp{ok6`j&22@qSt4$pc6CvROV5_=Z%W!kZUPvZs(tFQE<2YK#V zA_<^a7KpQVEQ&XEzOzG;Q0^!A799%Rjj@_*)#mF+@i?iqs$2PG#~^iyc~=awVo%(| z#!1eC@+)3*HJ`i>SNt^-s(3h=r}0%`i;gbe)2_fa?WduI6`S{Z<0tj2Iz(=2;!C>Y zop>=%L>y^!a{VKx1oAUc-Ix>>rky$FjDXeNMbC@AUipBYh`lRh=)4+_l*)pM@kE8^^|E&XF7?W-^o3eTkkm%X`hqC9ElGW~7(tzX@H#Dx|GuZ?rwLt1+m4S9Yl&ae05<~?ufsb&HLp=6Cu@ZxoD38(X9qVC;C%SEDH0Z z;}Q5ax!o+tnQSFIZNSEdH?EdV6rHAM*;xsbMv%NC=(ry` zZ11;ui8c~e27?RkxBN(^3;8kC=3Uuxb->YB;}nd4ciWjM5qUH=$yo}$?!VKB8QT~P zaO=6Ff#+O7s!+I|rd#9JHh3dn*#Lgv9vDVv>whvp)`9|x&-2ba8Ap%*Sc|k)Nsp6W z@VDm48@KN3N-A{z*2I5#k9c^vys>mY%TonQT^j{fB<6ps^&1`a%Dfxq308fGUN&x@>=2zt>R>L2Xw_ST6GMN3IFHcx~Z?0{A8}&_!HJ@_WuaC4YgF5D2-_H$5>P7nGnT@ zF&`>@LCAIYOlH7z%}|S>rdq=l*!-<{Ntb(=AD*%mPO}Evp(B&k|I>#aP_R=L_nF_c z%RPV$keVP7;2)8{jZ0|s#ed5M06xh7Z7=}fHo5ixBrqpCeLT|!LTrO9GDtjh;ZF2) zXMQHSwe<^R+2uCS0-EFZ{wix%14AV4f^Pv|WdHmAU@)PR{$>ai9Gu{RDz*t9GEB#L z=FGjaPB%ASW&J^B!mR~)9~C&lf+okcd8N0a?lbmEVpPu-xLL-;$2ApW z1oitaMN3y(ws~S)z`nIDm$_LzmxA89ZbKrC(1EXGn#aGfvnqDzV{7ysy$;5+i}6a; zT{5FS8S!V+AXp5d!}%AZ-%6~*!9<@}tR`C@tiA!Sk1vsUg{?f0g|ywjIIdqiABJm5 zkpo7=1j?RSl#yHyOICgo2JK~jUb=ub2(%1GDa{@P`JQW`eegpiqDv~gP^W;C)-&VN ztNaUR>5oo&r^uA(u3_|I&l zi+&A?0~$6P?2YsVOt8oy?&QVcHcn`@IU z&oMX+W;X~HZ8wo%h^sTqBTt z?hNv;eGoqWy*HcgWQ~IB2o3sr3d1dNieVPiG#6S-z#yo6e%W*?ZzK5T3_ZR}n%_)Qq%GNL+Kg@f9GA?^wD+gL zz@RTlVvk<5j=NQQTm|lgI~R7IdB$*<*V~5w1et}uXfebiI^d~&=p~uba$lV`>H7FN z|L#{lWWfsujXY+yYPrX*Ng8PyA{t_Z?lx`sshc`|bdlBboCzG~Vun)f)`?$T*E?;6 z3%yrpoRfey-`_D|<4j)bAEba7+*x-@I^z!_9Bq5lmt=6}ft5t1y6;}yD^m5tent0R z<|mF_lv#V=HCV<1_DK9zvr%u(zm(1Srp`f7L}RVY*61Mgj+qqM-4RB=4dJze0VcyQ z>E3PT;^%67Zjd;XBO_r{gi;^;`SafPnwm}J136ey&*il5WnAqg>Hsfo?_|N5b_XKQ z*wD~C!AelE_6(npUUdjfNB&xtWX^E71FU)bMZNrG*xXF39li8p^g42vx|V4^xTDRv zIj+_4Onfd>@H?bXBgp2&qQ)H%Y!mfY69WME=!Z=Hmzn?nsT%$_31mzImevq`*KMt1 zWH4vYq(R?`*7YR8odC!aCMdqeFPk(sXBZ-a9RB2HNpBF$HUN1<70IHWGrA2gB;$;( z?vHe6d}=7>4e@FvN5(pZmGVfV!FSWTX*8uxt`4_z=e7HFbnXBa#|bORV$}t}vuuk5Dn=OsW^7{%Kf*Bkwi+`e2g-4oqB`mC1J&ztG5 z6Lj#c(Y;@j70iIgX~Mbd#PM=_LFbRDo25D`uQMkmc3>;1ToMHDi*!P*8IKA@D;ePb3R*iI}M%% zIs1grU@5Nv^q~|e7n1A!VYOcYCzGLU^B2ax7&E3Rcf={~8~FS(*|I+4pnDrqD0Rsx z9{9BSa2gIN%s?z``9fNTlrVhI)7tmW2u;+L^DMZNTOK95hoa9&hNSzosKOV55JNtb zbJ7ayG~#%fBdO}`T#IG+7f5CwRNZrXJbel%D69pYZo0laV{r*v|Lx+L6?K@Uwp$#<{7K7h{B?c&#KE; zF?@BY^+(vm8qX7+fNRTc@7fbl{lW4RV!VHEetrn>Fv%G%y1leDUK(Z*8b1H%{Bpsx ztxt*&KcE%#8s##~ZRXSCkR?63u%V5SfTgC0Xw3($bVtTWBdXOEE!?x4efl?rq~HH> zaM@gZcVbi)2-(dLCm{c}8Pl`3NU`R5Z-}?_=D}ab6qPmgPiu_6@l(kr?m7S7gh zEpJbMm!TDIDkIty_zsoO@jjN&F))StmEzcl$1B>mVAPHVt()NKyyt8~4_{``4o|J7 z1s2aKJ+r*)ZJ%o9>YRDrO{A9B>^Gz|_Rydsndf<|6oS`6!Z_)2pC%RvV&9$2*Is}? zVa=%QQ9nz&o8V;$!9Zy-&rEZ#g{LVOvxtU(`Un@#`Hk+6L9gADOb{rmz!x=b{yba& zz^T`>vYR4KetwO|h9TQuJ0W>K!@IHJ>%e^Zyt&)V zg(rXb7AK(xg(O|td?w@o2{Nsw_n00$4!Sz4p~>_NO}HL1W6J3MFl1t2Iz4%Tfsgzt zJfshF@XF*gDvu9f^#A3O%WLTiC+j$;r>d9tyez31(h|R(GeVB}2`Di^JAoU0-=Wt* zwpU>_88-kj{=^PuZ&4J04?mcl+v%=jDlL3I(~ZM1ou5>P#m9wM2!K?kpMY!fA`H<- z>Ep(d(c7%`*C^Zpl&M0x#2VHJ#2;mwMXP?9AhD_8s1SNNd8im|XN-I$eT863>f1Uk z7!r=eA6VlB)StET?QzEkJ?%FN-0bq@VKRK?136*s1^H|aHWG9iIrV?hQhM$GXw$#w z4gh%S_Al}S1poXOn*sng!~Uyo|NoI7U3+ zF2TV)djIj?@BafN`oCFj{@*!#`W3pZRXpp`Jc#?ZjHvv(Oo1afBBLL_9L=WQ{!KiR zq>(D1QlOEx9{TZ{-Os;|{x^Melisitc)WRG8++zMpsdhk2zXKySqrr7QRiQIr19is zt#H%+-70%kWyPW5<2wT@N_J=j|kZ-M{-J#bK~gYf?7N~alIdf0^S zl8wHZp@4DnHDO=|YgI0WB0Kr>on(p=8rnr5U8;Vkvv{cQi3(Y*l(es!VrJoXsLbmnkMo0A-I?y4ToB)jqwpSmVHJlzbR49u zEA|~+CWbjyKkVJ;96eHfoD2pnSvNG1-`5#@o@>O^o@7-Sb_o-w_sv!22zWU0+|Sr1 z(x9RY7-D*90KSxaA^;8y91*Cb@zig=G5jz-4uAG0dY|9v`%zbN|CE%A*GC0C#-w*X zh|jnBcA7#31Bck7%)&%z)oFPDNm*KNw9XsI66u!s_8a>Lem_IboPcFy`iX?fc>IAW z15K|T^aTgP)~6`is;M~(L! z;|=2l%HD8V=)nd+ES0PVZ+6qP2WJ{&)-(J|6WU(u`bTB3`J~=v1qW{ z)+Z-OLJRK;c6>ExzjyfEK5;O>vT3)qukU&PAy>==KHPKQII_I!tlwAwm@ptTD*!f6 zxI1yJx(=p9t4-hebq&E=YBj`K~a9SzJA zVk+t;@D>ForvK@<7&HYbe>05#wIpr!(_e zQXpeZz<|xA=1IS-`z<_+Zon1AEk6k|el-h$)d%%*jEZZu|y#=%COd|Buc~ODQd*k>Vtx!@CN<4EbnZ>;?BD^p< z_MP}(?h^PwyT0tZj8V0JO#;q)#ilLhOKr8>f=4T1Lo8%q(2jrDxJmQb{vch+hI(>+ zMU!%_Up&^WaF(3WGg^r|-3({G_@?Zb8u6Ba^r}==1v`;uj#H&AVIT8V%#!+ozej$8&te#!WvzjhXOO zsN758pmL_NwNZ9Z*VCm^T#rt04@rZ+uL!cSj9VkGW||CRloM3xsQQk5^_yFc6S7ZT z!QR}ugC79XSIEBTJq%os(4rF?1iQiy%GHA>N1QJNZBGq49~{(OH$)Q8_E&1AYPT)& z)DBi)F|6#o_mgnmi6d=5zF`T9*B2^%{@wbvtumfNf}@^t-gKMrfiKfZO+4pw1?rEf zB*CHG0=Y;#SVx*@Pr(w5+QZs5w!R)*KAv|ao?X(iPVtAigcZf975G*fP(=Zcb`$r^f6ttI1Wc0{G@;pRzhX18~00*HH$=*sa|E%eUWGPp;ew3 z#es_Oz@I41v>^_&#E1yt56nVluurA;8ECTjpT%*bDh}_*?yt)Sj{O)__e4E^4b%}> zd?7jfnvz4^C&t;&RviGoF#sNpOngS=wjm^>is5Vu-1~NSUNMqiF4j5E%S=cyv!i`j z&&A{)z7IR1(9D%QV|hc@T7Il3(&aL;^WZr6-OHMGi5b%$n-WwJFkRYa!r|8)@Z8Op zNM{Brx!lAqkI_55ohh}zbBkrcHKFK}v{*Kwez;Xj!<8{hazmvC<%w0^%SxMHd6W9h zv#O2>hOV6qA7*S(aDgv@vev&{!qPNjfK3c@FF?x0_xWbuj?z)<23meiAg3udeg}`2 zG@Ac}Yoq2Hl?+;H-F*#F1?G{#zS(|Ick<^VxCNg&-{vjj$$Uu&0dP~`T#$GP?Vte8 zIPnEeA(QO+)%XjolD>X_>X#~npfA<O&C!qOzw3RcHa4S`zyDnR~$Wg@eIhX_O|kpie{FvUcp$U59q{(M(Ah-Gkal!oh?2 zB;wu$sd}T!&jA`BZi{I7`*Jh%CcO97jjAN=n6%hy2V0!=W;XhPAIzl9L{a#Enz2rv z-c)0(_O)Q=vAKxOlU{9SR;b~CjmC!XbFKT!$Hr2x;RVGm1}ljOba&DWEOE~U0X2!av)`3gH1eL|-P#KdGXCE@*2h5{Fm>m`oAgGtOH$M^jcjqraM_?CtwDh;?Iu!0XC4&Fd@N(?6n$!2oAE6pev_nwd83S@Dr{C<(BZpj|f z=O3_wB}%Fbeaays{S~UD z=HyFW-PjX{Jv;~zm9@QSBD{50gV~%UccC^mKL6+w zIy*RlbQKw__pXPb+|WP+I%8YUZD{Zp*xx4lui5oM?ESy)YPTH>?&C+ESO0kqVEVtE z(Es&H|M!LeV_V{X^^0q=J2hM-m>!(u6HadtK5krOQWsn(8uJgte|`J}YuoJbGyti+zr0**9sZDV|S^N8@ih|~w3c0r*|1X%i-8KLK literal 0 HcmV?d00001 diff --git a/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py b/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py index 75912f1c03c1..7726c2b2740e 100644 --- a/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py +++ b/examples/asr/asr_cache_aware_streaming/speech_to_text_cache_aware_streaming_infer.py @@ -46,10 +46,13 @@ It may result in slightly different outputs from the sub-sampling module compared to offline mode for some techniques like striding and sw_striding. Enabling it would make it easier to export the model to ONNX. -# Hybrid ASR models +## Hybrid ASR models For Hybrid ASR models which have two decoders, you may select the decoder by --set_decoder DECODER_TYPE, where DECODER_TYPE can be "ctc" or "rnnt". If decoder is not set, then the default decoder would be used which is the RNNT decoder for Hybrid ASR models. +## Multi-lookahead models +For models which support multiple lookaheads, the default is the first one in the list of model.encoder.att_context_size. To change it, you may use --att_context_size, for example --att_context_size [70,1]. + ## Evaluate a model trained with full context for offline mode @@ -58,7 +61,7 @@ The accuracy of the model on the borders of chunks would not be very good. To use a model trained with full context, you need to pass the chunk_size and shift_size arguments. -If shift_size is not passed, chunk_size would be use as the shift_size too. +If shift_size is not passed, chunk_size would be used as the shift_size too. Also argument online_normalization should be enabled to simulate a realistic streaming. The following command would simulate cache-aware streaming on a pretrained model from NGC with chunk_size of 100, shift_size of 50 and 2 left chunks as left context. The chunk_size of 100 would be 100*4*10=4000ms for a model with 4x downsampling and 10ms shift in feature extraction. @@ -273,6 +276,13 @@ def main(): help="Selects the decoder for Hybrid ASR models which has both the CTC and RNNT decoder. Supported decoders are ['ctc', 'rnnt']", ) + parser.add_argument( + "--att_context_size", + type=str, + default=None, + help="Sets the att_context_size for the models which support multiple lookaheads", + ) + args = parser.parse_args() if (args.audio_file is None and args.manifest_file is None) or ( args.audio_file is not None and args.manifest_file is not None @@ -293,6 +303,12 @@ def main(): else: raise ValueError("Decoder cannot get changed for non-Hybrid ASR models.") + if args.att_context_size is not None: + if hasattr(asr_model.encoder, "set_default_att_context_size"): + asr_model.encoder.set_default_att_context_size(att_context_size=json.loads(args.att_context_size)) + else: + raise ValueError("Model does not support multiple lookaheads.") + global autocast if ( args.use_amp diff --git a/examples/asr/asr_vad/README.md b/examples/asr/asr_vad/README.md index 03c7efa146b9..f39b9735b20f 100644 --- a/examples/asr/asr_vad/README.md +++ b/examples/asr/asr_vad/README.md @@ -8,10 +8,16 @@ There are two types of input - A manifest passed to `manifest_filepath`, - A directory containing audios passed to `audio_dir` and also specify `audio_type` (default to `wav`). -The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration", "text"] are required. An example of a manifest file is: +The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration"] are required. An example of a manifest file is: ```json -{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "text": "a b c d e"} -{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000, "text": "f g h i j"} +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000} +``` + +If you want to calculate WER, provide `text` in manifest as groundtruth. An example of a manifest file is: +```json +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "text": "hello world"} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000, "text": "hello world"} ``` ## Output @@ -25,23 +31,30 @@ To run the code with ASR+VAD default settings: ```bash python speech_to_text_with_vad.py \ manifest_filepath=/PATH/TO/MANIFEST.json \ - vad_model=vad_multilingual_marblenet \ + vad_model=vad_multilingual_frame_marblenet \ asr_model=stt_en_conformer_ctc_large \ - vad_config=../conf/vad/vad_inference_postprocess.yaml + vad_config=../conf/vad/frame_vad_infer_postprocess.yaml ``` -To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`. +- To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`. -To use only VAD, set `asr_model=None` and specify both `vad_model` and `vad_config`. +- To use only VAD, set `asr_model=None` and specify both `vad_model` and `vad_config`. -To enable profiling, set `profiling=True`, but this will significantly slow down the program. +- To enable profiling, set `profiling=True`, but this will significantly slow down the program. -To use or disable feature masking, set `use_rttm` to `True` or `False`. +### Using RTTM to handle non-speech audio segments +- To use or disable RTTM usage, set `use_rttm` to `True` or `False`. There are two options to use RTTM files, as specified by the parameter `rttm_mode`, which must be one of `mask` or `drop`. For `mask`, the RTTM file will be used to mask the non-speech features. For `drop`, the RTTM file will be used to drop the non-speech features. -To normalize feature before masking, set `normalize=pre_norm`, -and set `normalize=post_norm` for masking before normalization. +- It's recommended that for `rttm_mode='drop'`, use larger `pad_onset` and `pad_offset` to avoid dropping speech features. -To use a specific value for feature masking, set `feat_mask_val` to the desired value. +- To use a specific value for feature masking, set `feat_mask_val` to the desired value. Default is `feat_mask_val=None`, where -16.530 (zero log mel-spectrogram value) will be used for `post_norm` and 0 (same as SpecAugment) will be used for `pre_norm`. -See more options in the `InferenceConfig` class. +- To normalize feature before masking, set `normalize=pre_norm`, and set `normalize=post_norm` for masking before normalization. + +### Frame-VAD and Segment-VAD +- By default, `speech_to_text_with_vad.py` and `vad_config=../conf/vad/frame_vad_infer_postprocess.yaml` will use a frame-VAD model, which generates a speech/non-speech prediction for each audio frame of 20ms. +- To use segment-VAD, use `speech_to_text_with_vad.py vad_type='segment' vad_config=../conf/vad/vad_inference_postprocessing.yaml` instead. In segment-VAD, the audio is split into segments and VAD is performed on each segment. The segments are then stitched together to form the final output. The segment size and stride can be specified by `window_length_in_sec` and `shift_length_in_sec` in the VAD config (e.g., `../conf/vad/vad_inference_postprocessing.yaml`) respectively. The default values are 0.63 seconds and 0.08 seconds respectively. + +### More options +- See more options in the `InferenceConfig` data class. diff --git a/examples/asr/asr_vad/speech_to_text_with_vad.py b/examples/asr/asr_vad/speech_to_text_with_vad.py index b22ff709c344..ecdfac42f665 100644 --- a/examples/asr/asr_vad/speech_to_text_with_vad.py +++ b/examples/asr/asr_vad/speech_to_text_with_vad.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,9 +29,9 @@ ```bash python speech_to_text_with_vad.py \ manifest_filepath=/PATH/TO/MANIFEST.json \ - vad_model=vad_multilingual_marblenet \ + vad_model=vad_multilingual_frame_marblenet\ asr_model=stt_en_conformer_ctc_large \ - vad_config=../conf/vad/vad_inference_postprocess.yaml + vad_config=../conf/vad/frame_vad_inference_postprocess.yaml ``` To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`. @@ -40,13 +40,15 @@ To enable profiling, set `profiling=True`, but this will significantly slow down the program. -To use or disable feature masking, set `use_rttm` to `True` or `False`. +To use or disable feature masking/droping based on RTTM files, set `use_rttm` to `True` or `False`. +There are two ways to use RTTM files, either by masking the features (`rttm_mode=mask`) or by dropping the features (`rttm_mode=drop`). +For audios that have long non-speech audios between speech segments, dropping frames is recommended. To normalize feature before masking, set `normalize=pre_norm`, and set `normalize=post_norm` for masking before normalization. To use a specific value for feature masking, set `feat_mask_val` to the desired value. -Default is `feat_mask_val=None`, where -16.530 will be used for `post_norm` and 0 will be used for `pre_norm`. +Default is `feat_mask_val=None`, where -16.635 will be used for `post_norm` and 0 will be used for `pre_norm`. See more options in the `InferenceConfig` class. """ @@ -72,10 +74,10 @@ from nemo.collections.asr.models import ASRModel, EncDecClassificationModel from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest from nemo.collections.asr.parts.utils.vad_utils import ( - extract_audio_features, generate_overlap_vad_seq, generate_vad_segment_table, get_vad_stream_status, + init_frame_vad_model, init_vad_model, ) from nemo.core.config import hydra_runner @@ -97,15 +99,16 @@ class InferenceConfig: vad_model: Optional[str] = None # Path to a .nemo file or a pretrained NeMo model on NGC vad_config: Optional[str] = None # Path to a yaml file containing VAD post-processing configs manifest_filepath: Optional[str] = None # Path to dataset's JSON manifest - audio_dir: Optional[str] = None + audio_dir: Optional[str] = None # Path to a directory containing audio files, use this if no manifest is provided use_rttm: bool = True # whether to use RTTM + rttm_mode: str = "mask" # how to use RTTM files, choices=[`mask`, `drop`] feat_mask_val: Optional[float] = None # value used to mask features based on RTTM, set None to use defaults normalize: Optional[ str - ] = "post_norm" # whether and where to normalize feature, choices=[None, `pre_norm`, `post_norm`] + ] = "post_norm" # whether and where to normalize audio feature, choices=[None, `pre_norm`, `post_norm`] normalize_type: str = "per_feature" # how to determine mean and std used for normalization - use_pure_noise: bool = False # whether input is pure noise or not. + normalize_audio_db: Optional[float] = None # set to normalize RMS DB of audio before extracting audio features profiling: bool = False # whether to enable pytorch profiling @@ -113,13 +116,13 @@ class InferenceConfig: batch_size: int = 1 # batch size for ASR. Feature extraction and VAD only support single sample per batch. num_workers: int = 8 sample_rate: int = 16000 - frame_unit_time_secs: float = 0.01 # unit time per frame in seconds, equal to `window_stride` in ASR configs. + frame_unit_time_secs: float = 0.01 # unit time per frame in seconds, equal to `window_stride` in ASR configs, typically 10ms. audio_type: str = "wav" # Output settings, no need to change output_dir: Optional[str] = None # will be automatically set by the program output_filename: Optional[str] = None # will be automatically set by the program - pred_name_postfix: Optional[str] = None # If you need to use another model name, rather than standard one. + pred_name_postfix: Optional[str] = None # If you need to use another model name, other than the standard one. # Set to True to output language ID information compute_langs: bool = False @@ -130,6 +133,9 @@ class InferenceConfig: # Decoding strategy for RNNT models rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig(fused_batch_size=-1) + # VAD model type + vad_type: str = "frame" # which type of VAD to use, choices=[`frame`, `segment`] + @hydra_runner(config_name="InferenceConfig", schema=InferenceConfig) def main(cfg): @@ -243,7 +249,10 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C out_dir.mkdir(parents=True, exist_ok=True) torch.set_grad_enabled(False) - vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet") + if cfg.vad_model: + vad_model = init_frame_vad_model(cfg.vad_model) + else: + vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") vad_model = vad_model.to(device) vad_model.eval() @@ -256,6 +265,7 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C 'labels': ['infer',], 'num_workers': cfg.num_workers, 'shuffle': False, + 'normalize_audio_db': cfg.normalize_audio_db, } ) @@ -284,7 +294,13 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callable) -> str: logging.info("Start VAD inference pipeline...") - vad_model = init_vad_model(cfg.vad_model) + if cfg.vad_type == "segment": + vad_model = init_vad_model(cfg.vad_model) + elif cfg.vad_type == "frame": + vad_model = init_frame_vad_model(cfg.vad_model) + else: + raise ValueError(f"Unknown VAD type: {cfg.vad_type}, supported types: ['segment', 'frame']") + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") vad_model = vad_model.to(device) vad_model.eval() @@ -358,8 +374,6 @@ def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callab logging.info(f"Generating segment tables with postprocessing params: {vad_cfg.vad.parameters.postprocessing}") segment_dir_name = "vad_rttm" for key, val in vad_cfg.vad.parameters.postprocessing.items(): - if key == "use_rttm": - continue segment_dir_name = segment_dir_name + "-" + str(key) + str(val) segment_dir = Path(cfg.output_dir) / Path(segment_dir_name) @@ -368,13 +382,13 @@ def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callab else: segment_dir.mkdir(parents=True) t0 = time.time() - vad_cfg.vad.parameters.postprocessing.use_rttm = True segment_dir = generate_vad_segment_table( vad_pred_dir=pred_dir, postprocessing_params=vad_cfg.vad.parameters.postprocessing, frame_length_in_sec=frame_length_in_sec, num_workers=cfg.num_workers, out_dir=segment_dir, + use_rttm=True, ) t1 = time.time() logging.info(f"Time elapsed: {t1 - t0: .2f} seconds") @@ -432,9 +446,14 @@ def generate_vad_frame_pred( with record_fn("vad_infer_other"): probs = torch.softmax(log_probs, dim=-1) + if len(probs.shape) == 3: + # squeeze the batch dimension, since batch size is 1 + probs = probs.squeeze(0) # [1,T,C] -> [T,C] pred = probs[:, 1] - if status[i] == 'start': + if window_length_in_sec == 0: + to_save = pred + elif status[i] == 'start': to_save = pred[:-trunc] elif status[i] == 'next': to_save = pred[trunc:-trunc_l] @@ -443,11 +462,13 @@ def generate_vad_frame_pred( else: to_save = pred + to_save = to_save.cpu().tolist() all_len += len(to_save) + outpath = os.path.join(out_dir, data[i] + ".frame") with open(outpath, "a", encoding='utf-8') as fout: - for f in range(len(to_save)): - fout.write('{0:0.4f}\n'.format(to_save[f])) + for p in to_save: + fout.write(f'{p:0.4f}\n') del test_batch if status[i] == 'end' or status[i] == 'single': @@ -476,18 +497,30 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: # Setup decoding strategy decode_function = None - if hasattr(asr_model, 'change_decoding_strategy'): - # Check if ctc or rnnt model - if hasattr(asr_model, 'joint'): # RNNT model + decoder_type = cfg.get("decoder_type", None) + if not hasattr(asr_model, 'change_decoding_strategy'): + raise ValueError(f"ASR model {cfg.asr_model} does not support decoding strategy.") + if decoder_type is not None: # Hybrid model + if decoder_type == 'rnnt': cfg.rnnt_decoding.fused_batch_size = -1 cfg.rnnt_decoding.compute_langs = cfg.compute_langs - asr_model.change_decoding_strategy(cfg.rnnt_decoding) + asr_model.change_decoding_strategy(cfg.rnnt_decoding, decoder_type=decoder_type) decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor - else: - asr_model.change_decoding_strategy(cfg.ctc_decoding) + elif decoder_type == 'ctc': + asr_model.change_decoding_strategy(cfg.ctc_decoding, decoder_type=decoder_type) decode_function = asr_model.decoding.ctc_decoder_predictions_tensor + else: + raise ValueError( + f"Unknown decoder type for hybrid model: {decoder_type}, supported types: ['rnnt', 'ctc']" + ) + elif hasattr(asr_model, 'joint'): # RNNT model + cfg.rnnt_decoding.fused_batch_size = -1 + cfg.rnnt_decoding.compute_langs = cfg.compute_langs + asr_model.change_decoding_strategy(cfg.rnnt_decoding) + decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor else: - raise ValueError(f"Only support CTC or RNNT models that have `change_decoding_strategy()` implemented.") + asr_model.change_decoding_strategy(cfg.ctc_decoding) + decode_function = asr_model.decoding.ctc_decoder_predictions_tensor # Compute output filename if cfg.output_filename is None: @@ -499,7 +532,10 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: if cfg.use_rttm: vad_tag = Path(manifest_filepath).stem vad_tag = vad_tag[len("temp_manifest_vad_rttm_") :] - tag += f"-mask{cfg.feat_mask_val}-{vad_tag}" + if cfg.rttm_mode == "mask": + tag += f"-mask{cfg.feat_mask_val}-{vad_tag}" + else: + tag += f"-dropframe-{vad_tag}" cfg.output_filename = cfg.manifest_filepath.replace('.json', f'-{Path(cfg.asr_model).stem}-{tag}.json') cfg.output_filename = Path(cfg.output_dir) / Path(cfg.output_filename).name @@ -509,10 +545,12 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: "normalize": cfg.normalize, "normalize_type": cfg.normalize_type, "use_rttm": cfg.use_rttm, + "rttm_mode": cfg.rttm_mode, "feat_mask_val": cfg.feat_mask_val, "frame_unit_time_secs": cfg.frame_unit_time_secs, } - logging.info(f"use_rttm = {cfg.use_rttm}") + logging.info(f"use_rttm = {cfg.use_rttm}, rttm_mode = {cfg.rttm_mode}, feat_mask_val = {cfg.feat_mask_val}") + if hasattr(asr_model, "tokenizer"): dataset = feature_to_text_dataset.get_bpe_dataset(config=data_config, tokenizer=asr_model.tokenizer) else: @@ -542,10 +580,13 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: processed_signal=test_batch[0].to(device), processed_signal_length=test_batch[1].to(device), ) + with record_fn("asr_infer_other"): logits, logits_len = outputs[0], outputs[1] current_hypotheses, all_hyp = decode_function(logits, logits_len, return_hypotheses=False,) + if isinstance(current_hypotheses, tuple) and len(current_hypotheses) == 2: + current_hypotheses = current_hypotheses[0] # handle RNNT output hypotheses += current_hypotheses if all_hyp is not None: @@ -562,9 +603,16 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: # Save output to manifest input_manifest_data = read_manifest(manifest_filepath) manifest_data = read_manifest(cfg.manifest_filepath) + + if "text" not in manifest_data[0]: + has_groundtruth = False + else: + has_groundtruth = True + groundtruth = [] for i in range(len(manifest_data)): - groundtruth.append(manifest_data[i]["text"]) + if has_groundtruth: + groundtruth.append(manifest_data[i]["text"]) manifest_data[i]["pred_text"] = hypotheses[i] manifest_data[i]["feature_file"] = input_manifest_data[i]["feature_file"] if "rttm_file" in input_manifest_data[i]: @@ -572,19 +620,19 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str: write_manifest(cfg.output_filename, manifest_data) - if cfg.use_pure_noise: + if not has_groundtruth: hypotheses = " ".join(hypotheses) words = hypotheses.split() chars = "".join(words) logging.info("-----------------------------------------") - logging.info(f"Number of hallucinated characters={len(chars)}") - logging.info(f"Number of hallucinated words={len(words)}") - logging.info(f"Concatenated predictions: {hypotheses}") + logging.info(f"Number of generated characters={len(chars)}") + logging.info(f"Number of generated words={len(words)}") logging.info("-----------------------------------------") else: wer_score = word_error_rate(hypotheses=hypotheses, references=groundtruth) + cer_score = word_error_rate(hypotheses=hypotheses, references=groundtruth, use_cer=True) logging.info("-----------------------------------------") - logging.info(f"WER={wer_score*100:.2f}") + logging.info(f"WER={wer_score:.4f}, CER={cer_score:.4f}") logging.info("-----------------------------------------") logging.info(f"ASR output saved at {cfg.output_filename}") diff --git a/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml b/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml index 98f23458cd86..32afd919a454 100644 --- a/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml +++ b/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml @@ -103,10 +103,16 @@ model: # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one - # for chunked_limited you may calculate the look-ahead or right context by the following formula: # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 27*4*0.01=1.08s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[140,27],[140,13],[140,2],[140,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [140, 27] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null xscaling: true # scales up the input embeddings by sqrt(d_model) untie_biases: true # unties the biases of the TransformerXL layers diff --git a/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml b/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml index 9d6e3a54d9fe..d55e5f927b2e 100644 --- a/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml +++ b/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml @@ -113,10 +113,16 @@ model: # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one - # for chunked_limited you may calculate the look-ahead or right context by the following formula: - # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 27*4*0.01=1.08s + # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[140,27],[140,13],[140,2],[140,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [140, 27] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null xscaling: true # scales up the input embeddings by sqrt(d_model) untie_biases: true # unties the biases of the TransformerXL layers diff --git a/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml b/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml index 84d767e4a3b5..51e57e72e2ad 100644 --- a/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml +++ b/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml @@ -179,6 +179,7 @@ model: decoding: strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + model_type: "multiblank" # this must not be None in order to use the multi-blank specific decoding method. # you could set this to [1, 1, 1] so that big blanks are treated the same diff --git a/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml new file mode 100644 index 000000000000..0210bd5a2dad --- /dev/null +++ b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml @@ -0,0 +1,281 @@ +# This file contains the default values for training a Conformer-TDT ASR model, large size (~120M) with sub-word encoding. + +# You can find detailed info about TDT models at https://arxiv.org/abs/2304.06795. + +# Architecture and training config: +# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective +# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. +# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file. + +# Note: the added duration outputs from the joiner make TDT models slightly larger than corresponding conventional RNN-T models, +# although the difference is tiny -- the added number of params is roughly num-durations X (joint_hidden + pred_hidden), typically in the +# order of thousands of params. This is negligible even with the "Small" config with around 14 million params. +# Recommended duraction config is [0, 1, 2, ... , n] where optimal n is usually between 4 and 8 depending on the dataset. + +# +--------------+---------+---------+----------+------------------+--------------+--------------------------+-----------------+ +# | Model | d_model | n_heads | n_layers | conv_kernel_size | weight_decay | pred_hidden/joint_hidden | pred_rnn_layers | +# +==============+=========+========+===========+==================+==============+==========================+=================+ +# | Small (14M)| 176 | 4 | 16 | 31 | 0.0 | 320 | 1 | +# +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+ +# | Medium (32M)| 256 | 4 | 16 | 31 | 1e-3 | 640 | 1 | +# +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+ +# | Large (120M)| 512 | 8 | 17 | 31 | 1e-3 | 640 | 1 | +# +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+ +# | XLarge (644M)| 1024 | 8 | 24 | 5 | 1e-3 | 640 | 2 | +# +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+ + +# Default learning parameters in this config are set for global batch size of 2K while you may use lower values. +# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches. +# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable. + +name: "Conformer-TDT-BPE" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + log_prediction: true # enables logging sample predictions in the output during training + skip_nan_grad: false + + model_defaults: + enc_hidden: ${model.encoder.d_model} + pred_hidden: 640 + joint_hidden: 640 + + # variables for TDT configs. + tdt_durations: [0, 1, 2, 3, 4] + num_tdt_durations: 5 + + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: true + use_start_end_token: false + trim_silence: false + max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset + min_duration: 0.1 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 0 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 17 + d_model: 512 + + # Sub-sampling params + subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 4 # must be power of 2 for striding and vggnet + subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model + causal_downsampling: false + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + att_context_style: regular # regular or chunked_limited + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 31 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 + t_max: null + dropout: 0.2 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # 'null' would set it automatically according to CPU/GPU device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.2 + num_extra_outputs: ${model.model_defaults.num_tdt_durations} + + decoding: + # Using greedy decoding is highly recommended for TDT models. Using greedy-batch will give very bad results + # if omega is 0; even if omega is non-zero, greedy-batch results are still going to be inaccurate. + strategy: "greedy" + + model_type: "tdt" + + # this must not be None in order to use the TDT specific decoding method. + durations: ${model.model_defaults.tdt_durations} + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 2 + return_best_hypothesis: False + score_norm: true + tsd_max_sym_exp: 50 # for Time Synchronous Decoding + alsd_max_target_len: 2.0 # for Alignment-Length Synchronous Decoding + + loss: + # This is the main different between a TDT model and a conventional RNNT model -- the loss function. + loss_name: "tdt" + + tdt_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + fastemit_lambda: 0.001 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + # refer to https://arxiv.org/abs/2304.06795 for the meaning of the following three configs. + durations: ${model.model_defaults.tdt_durations} + sigma: 0.05 # hyper-param for under-normalization. + omega: 0.1 # weight for regular RNN-T loss. + + # Adds Gaussian noise to the gradients of the decoder to avoid overfitting + variational_noise: + start_step: 0 + std: 0.0 + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 + +trainer: + devices: -1 # number of GPUs, -1 would use all available GPUs + num_nodes: 1 + max_epochs: 500 + max_steps: -1 # computed at runtime if not set + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + gradient_clip_val: 0.0 + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + log_every_n_steps: 10 # Interval of logging. + enable_progress_bar: True + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs + sync_batchnorm: true + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + benchmark: false # needs to be false for models with variable-length speech input as it slows down training + + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + # in case of multiple validation sets, first one is used + monitor: "val_wer" + mode: "min" + save_top_k: 5 + always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints + resume_if_exists: false + resume_ignore_no_checkpoint: false + + create_wandb_logger: false + wandb_logger_kwargs: + name: null + project: null + diff --git a/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml new file mode 100644 index 000000000000..fefbd6f8f56c --- /dev/null +++ b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml @@ -0,0 +1,278 @@ +# This file contains the default values for training an TDT Conformer-Transducer ASR model, large size (~120M) with sub-word encoding. + +# You can find detailed info about TDT models at https://arxiv.org/abs/2304.06795. + +# Architecture and training config: +# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective +# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. +# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file. + +# Note: the added duration outputs from the joiner make TDT models slightly larger than corresponding conventional RNN-T models, +# although the difference is tiny -- the added number of params is roughly num-durations X (joint_hidden + pred_hidden), typically in the +# order of thousands of params. This is negligible even with the "Small" config with around 14 million params. +# Recommended duraction config is [0, 1, 2, ... , n] where optimal n is usually between 4 and 8 depending on the dataset. + +# +--------------+---------+---------+----------+------------------+--------------+--------------------------+-----------------+ +# | Model | d_model | n_heads | n_layers | conv_kernel_size | weight_decay | pred_hidden/joint_hidden | decoder_context | +# +==============+=========+========+===========+==================+==============+==========================+=================+ +# | Large (117M)| 512 | 8 | 17 | 31 | 1e-3 | 640 | 2 | +# +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+ + +# Default learning parameters in this config are set for global batch size of 2K while you may use lower values. +# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches. +# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable. + + +name: "Conformer-TDT-BPE-Stateless" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + log_prediction: true # enables logging sample predictions in the output during training + skip_nan_grad: false + + model_defaults: + enc_hidden: ${model.encoder.d_model} + pred_hidden: 640 + joint_hidden: 640 + + # variables for TDT configs. + tdt_durations: [0, 1, 2, 3, 4] + num_tdt_durations: 5 + + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: true + use_start_end_token: false + trim_silence: false + max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset + min_duration: 0.1 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 0 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 17 + d_model: 512 + + # Sub-sampling params + subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 4 # must be power of 2 for striding and vggnet + subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model + causal_downsampling: false + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + att_context_style: regular # regular or chunked_limited + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 31 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + decoder: + _target_: nemo.collections.asr.modules.StatelessTransducerDecoder + context_size: 2 # The Stateless decoder uses 2 words as context by default. + normalization_mode: layer # This helps stabilize training for Stateless decoders. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 + t_max: null + dropout: 0.2 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # 'null' would set it automatically according to CPU/GPU device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.2 + + # this variable is non-zero for this TDT model, as well as multi-blank models. It represents the number of + # additional outputs from the joiner, besides all tokens in the BPE vocab plus the (standard) blank symbol. + num_extra_outputs: ${model.model_defaults.num_tdt_durations} + + decoding: + # Using greedy decoding is highly recommended for TDT models. Using greedy-batch will give very bad results + # if omega is 0; even if omega is non-zero, greedy-batch results are still going to be inaccurate. + strategy: "greedy" + + model_type: "tdt" + + # this must not be None in order to use the TDT specific decoding method. + durations: ${model.model_defaults.tdt_durations} + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 2 + return_best_hypothesis: False + score_norm: true + tsd_max_sym_exp: 50 # for Time Synchronous Decoding + alsd_max_target_len: 2.0 # for Alignment-Length Synchronous Decoding + + loss: + # This is the main different between a TDT model and a conventional RNNT model -- the loss function. + loss_name: "tdt" + + tdt_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + fastemit_lambda: 0.001 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + # refer to https://arxiv.org/abs/2304.06795 for the meaning of the following three configs. + durations: ${model.model_defaults.tdt_durations} + sigma: 0.05 # hyper-param for under-normalization. + omega: 0.1 # weight for regular RNN-T loss. + + # Adds Gaussian noise to the gradients of the decoder to avoid overfitting + variational_noise: + start_step: 0 + std: 0.0 + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 + +trainer: + devices: -1 # number of GPUs, -1 would use all available GPUs + num_nodes: 1 + max_epochs: 500 + max_steps: -1 # computed at runtime if not set + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + gradient_clip_val: 0.0 + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + log_every_n_steps: 10 # Interval of logging. + enable_progress_bar: True + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs + sync_batchnorm: true + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + benchmark: false # needs to be false for models with variable-length speech input as it slows down training + + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + # in case of multiple validation sets, first one is used + monitor: "val_wer" + mode: "min" + save_top_k: 5 + always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints + resume_if_exists: false + resume_ignore_no_checkpoint: false + + create_wandb_logger: false + wandb_logger_kwargs: + name: null + project: null + diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml index c68b30a33d5a..749216b1925d 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml @@ -97,10 +97,17 @@ model: # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one - # for chunked_limited you may calculate the look-ahead or right context by the following formula: # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null + xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml index 654895ec065d..17345119c529 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml @@ -100,11 +100,19 @@ model: n_heads: 8 # may need to be lower for smaller d_models # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention - # for att_context_style=regular, the right context is recommended to be a small number around 0 to 2 as multiple-layers may increase the effective right context too large + # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null + xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml index 5f223061a420..dbd036458cb8 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml @@ -102,10 +102,17 @@ model: # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one - # for chunked_limited you may calculate the look-ahead or right context by the following formula: # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null + xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 @@ -191,9 +198,9 @@ model: loss_name: "default" warprnnt_numba_kwargs: # FastEmit regularization: https://arxiv.org/abs/2010.11148 - # You may enable FastEmit to reduce the latency of the model for streaming - # It also helps to improve the accuracy of the model in streaming mode - fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming + # You may set it to lower values like 1e-3 for models with larger right context + fastemit_lambda: 5e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml index 68a78ba60aac..50f73d35ca75 100644 --- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml +++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml @@ -106,11 +106,19 @@ model: n_heads: 8 # may need to be lower for smaller d_models # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention - # for att_context_style=regular, the right context is recommended to be a small number around 0 to 2 as multiple-layers may increase the effective right context too large + # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null + xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 @@ -196,9 +204,9 @@ model: loss_name: "default" warprnnt_numba_kwargs: # FastEmit regularization: https://arxiv.org/abs/2010.11148 - # You may enable FastEmit to reduce the latency of the model for streaming - # It also helps to improve the accuracy of the model in streaming mode - fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming + # You may set it to lower values like 1e-3 for models with larger right context + fastemit_lambda: 5e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: diff --git a/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml index 8c7561381299..41a8abd93758 100644 --- a/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml +++ b/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml @@ -17,6 +17,22 @@ # | bf16 | 32GB | 64 | # | | 80GB | 128 | # +-----------+------------+------------+ +# Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file. +# +# +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Model | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers | xscaling | +# +==============+=========+========+===========+================+==============+==========================+=================+============+ +# | Small (14M) | 176 | 4 | 16 | 9 | 0.0 | 320 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Medium (32M) | 256 | 4 | 16 | 9 | 1e-3 | 640 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Large (120M) | 512 | 8 | 17 | 9 | 1e-3 | 640 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | XLarge (616M)| 1024 | 8 | 24 | 9 | 1e-3 | 640 | 2 | False | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | XXLarge(1.2B)| 1024 | 8 | 42 | 5 | 1e-3 | 640 | 2 | False | +# +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+ + # Note: They are based on the assumption of max_duration of 20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly. # Default learning parameters in this config are set for global batch size of 2K while you may use lower values. diff --git a/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml index 0b0ec78e077d..9e3da8d3545f 100644 --- a/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml +++ b/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml @@ -17,6 +17,22 @@ # | bf16 | 32GB | 64 | # | | 80GB | 128 | # +-----------+------------+------------+ +# Here are the recommended configs for different variants of FastConformer-Transducer-BPE, other parameters are the same as in this config file. +# +# +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Model | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers | xscaling | +# +==============+=========+========+===========+================+==============+==========================+=================+============+ +# | Small (14M) | 176 | 4 | 16 | 9 | 0.0 | 320 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Medium (32M) | 256 | 4 | 16 | 9 | 1e-3 | 640 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | Large (120M) | 512 | 8 | 17 | 9 | 1e-3 | 640 | 1 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | XLarge (616M)| 1024 | 8 | 24 | 9 | 1e-3 | 640 | 2 | True | +# +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+ +# | XXLarge(1.2B)| 1024 | 8 | 42 | 5 | 1e-3 | 640 | 2 | False | +# +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+ + # Note: They are based on the assumption of max_duration of 20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly. # Default learning parameters in this config are set for global batch size of 2K while you may use lower values. diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml index 8b7a2ce7b39d..26dabaa039fe 100644 --- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml +++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml @@ -8,6 +8,8 @@ # FastConformer-CTC's architecture config: NeMo/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml # FastConformer-Transducer's architecture config, along with the optimal batch size and precision: NeMo/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml +# Note: if training loss does not converge, you may increase warm-up to 20K. + name: "FastConformer-Hybrid-Transducer-CTC-BPE-Streaming" model: @@ -106,8 +108,15 @@ model: # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 @@ -206,9 +215,9 @@ model: loss_name: "default" warprnnt_numba_kwargs: # FastEmit regularization: https://arxiv.org/abs/2010.11148 - # You may enable FastEmit to reduce the latency of the model for streaming - # It also helps to improve the accuracy of the model in streaming mode - fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming + # You may set it to lower values like 1e-3 for models with larger right context + fastemit_lambda: 5e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml index a24829b50788..d8362636f04a 100644 --- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml +++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml @@ -8,6 +8,8 @@ # FastConformer-CTC's architecture config: NeMo/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml # FastConformer-Transducer's architecture config, along with the optimal batch size and precision: NeMo/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml +# Note: if training loss does not converge, you may increase warm-up to 20K. + name: "FastConformer-Hybrid-Transducer-CTC-Char-Streaming" model: @@ -111,8 +113,15 @@ model: # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s + + # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs. + # The first item in the list would be the default during test/validation/inference. + # An example of settings for multi-lookahead: + # att_context_size: [[70,13],[70,6],[70,1],[70,0]] + # att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25] att_context_size: [70, 13] # -1 means unlimited context att_context_style: chunked_limited # regular or chunked_limited + att_context_probs: null xscaling: true # scales up the input embeddings by sqrt(d_model) pos_emb_max_len: 5000 @@ -211,9 +220,9 @@ model: loss_name: "default" warprnnt_numba_kwargs: # FastEmit regularization: https://arxiv.org/abs/2010.11148 - # You may enable FastEmit to reduce the latency of the model for streaming - # It also helps to improve the accuracy of the model in streaming mode - fastemit_lambda: 1e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming + # You may set it to lower values like 1e-3 for models with larger right context + fastemit_lambda: 5e-3 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. optim: diff --git a/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml b/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml new file mode 100644 index 000000000000..2c98c210eb0e --- /dev/null +++ b/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml @@ -0,0 +1,209 @@ +name: &name "MarbleNet-3x2x64" + +model: + sample_rate: 16000 + repeat: 2 + dropout: 0.0 + kernel_size_factor: 1.0 + + labels: ['0', '1'] + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + labels: ${model.labels} + batch_size: 128 + shuffle: True + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + tarred_shard_strategy: "scatter" + shuffle_n: 2048 + num_workers: 8 + pin_memory: true + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + bucketing_weights: null + augmentor: + white_noise: + prob: 0.9 + min_level: -90 + max_level: -46 + gain: + prob: 0.5 + min_gain_dbfs: -10.0 + max_gain_dbfs: 10.0 + noise: + prob: 0.6 + manifest_path: /manifests/vad_noise/freesound_nonspeech_train_FL200.json + min_snr_db: 0 + max_snr_db: 20 + max_gain_db: 300.0 + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + labels: ${model.labels} + batch_size: 128 + shuffle: False + num_workers: 8 + pin_memory: true + val_loss_idx: 0 + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + labels: ${model.labels} + batch_size: 128 + shuffle: False + num_workers: 8 + pin_memory: true + test_loss_idx: 0 + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + normalize: "None" + window_size: 0.025 + sample_rate: ${model.sample_rate} + window_stride: 0.01 + window: "hann" + features: &n_mels 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + stft_conv: false + pad_to: 2 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConvASREncoder + feat_in: *n_mels + activation: relu + conv_mask: true + + jasper: + - filters: 128 + repeat: 1 + kernel: [11] + stride: [2] + dilation: [1] + dropout: ${model.dropout} + residual: false + separable: true + kernel_size_factor: ${model.kernel_size_factor} + + - filters: 64 + repeat: ${model.repeat} + kernel: [13] + stride: [1] + dilation: [1] + dropout: ${model.dropout} + residual: true + separable: true + kernel_size_factor: ${model.kernel_size_factor} + + - filters: 64 + repeat: ${model.repeat} + kernel: [15] + stride: [1] + dilation: [1] + dropout: ${model.dropout} + residual: true + separable: true + kernel_size_factor: ${model.kernel_size_factor} + + - filters: 64 + repeat: ${model.repeat} + kernel: [17] + stride: [1] + dilation: [1] + dropout: ${model.dropout} + residual: true + separable: true + kernel_size_factor: ${model.kernel_size_factor} + + - filters: 128 + repeat: 1 + kernel: [29] + stride: [1] + dilation: [2] + dropout: ${model.dropout} + residual: false + separable: true + kernel_size_factor: ${model.kernel_size_factor} + + - filters: &enc_filters 128 + repeat: 1 + kernel: [1] + stride: [1] + dilation: [1] + dropout: ${model.dropout} + residual: false + + decoder: + _target_: nemo.collections.common.parts.MultiLayerPerceptron + hidden_size: *enc_filters + num_classes: -1 + num_layers: 1 + activation: 'relu' + log_softmax: false + + optim: + name: sgd + lr: 0.01 + # optimizer arguments + weight_decay: 0.001 + momentum: 0.9 + + # scheduler setup + sched: + name: PolynomialHoldDecayAnnealing + # Scheduler params + power: 2.0 + warmup_ratio: 0.05 + hold_ratio: 0.45 + min_lr: 0.001 + last_epoch: -1 + +trainer: + devices: -1 # number of gpus, -1 to use all gpus + max_epochs: 100 + max_steps: -1 # computed at runtime if not set + num_nodes: 1 + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + enable_checkpointing: False # Provided by exp_manager + logger: False # Provided by exp_manager + log_every_n_steps: 10 # Interval of logging. + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + check_val_every_n_epoch: 1 + benchmark: false # needs to be false for models with variable-length speech input as it slows down training + +exp_manager: + exp_dir: null + name: *name + create_tensorboard_logger: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: "val_acc_macro" + mode: "max" + save_top_k: 3 + always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints + save_best_model: true + + # you need to set these two to True to continue the training + resume_if_exists: true + resume_ignore_no_checkpoint: true + + create_wandb_logger: False + wandb_logger_kwargs: + name: null + project: null diff --git a/examples/asr/conf/ssl/fastconformer/fast-conformer.yaml b/examples/asr/conf/ssl/fastconformer/fast-conformer.yaml new file mode 100644 index 000000000000..5a4483613a5e --- /dev/null +++ b/examples/asr/conf/ssl/fastconformer/fast-conformer.yaml @@ -0,0 +1,235 @@ +# This config contains the default values for self-supervised pre-training of a Conformer ASR model, large size (~120M). + +# Architecture and training config: +# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective +# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. +# Here are the recommended configs for different variants of Conformer-CTC, other parameters are the same as in this config file. +# One extra layer (compared to original paper) is added to the medium and large variants to compensate for replacing the LSTM decoder with a linear one. +# +# +-------------+---------+---------+----------+------------+-----+ +# | Model | d_model | n_heads | n_layers | time_masks | lr | +# +=============+=========+========+===========+============+=====+ +# | Large (121M)| 512 | 8 | 17 | 10 | 2.0 | +# +---------------------------------------------------------------+ +# +# If you do not want to train with AMP, you may use weight decay of 0.0 or reduce the number of time maskings to 2 +# with time_width=100. It may help when you want to train for fewer epochs and need faster convergence. +# With weight_decay=0.0, learning rate may need to get reduced to 2.0. + +name: "FastConformer-SSL" + +model: + sample_rate: 16000 + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: false + use_start_end_token: true + trim_silence: false + max_duration: 16.7 + min_duration: 8.0 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + min_duration: 8.0 + + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + log: true + frame_splicing: 1 + dither: 0.00001 + pad_to: 16 + pad_value: 0.0 + + spec_augment: + _target_: nemo.collections.asr.modules.MaskedPatchAugmentation + freq_masks: 3 + freq_width: 20 + patch_size: 48 + mask_patches: 0.5 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 17 + d_model: 512 + + # Sub-sampling params + subsampling: dw_striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 8 # must be power of 2 for striding and vggnet + subsampling_conv_channels: 256 # -1 sets it to d_model + causal_downsampling: false + + # Reduction parameters: Can be used to add another subsampling layer at a given position. + # Having a 2x reduction will speedup the training and inference speech while keeping similar WER. + # Adding it at the end will give the best WER while adding it at the beginning will give the best speedup. + reduction: null # pooling, striding, or null + reduction_position: null # Encoder block index or -1 for subsampling at the end of encoder + reduction_factor: 1 + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 9 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_pre_encoder: 0.1 # The dropout used before the encoder + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + # set to non-zero to enable stochastic depth + stochastic_depth_drop_prob: 0.0 + stochastic_depth_mode: linear # linear or uniform + stochastic_depth_start_layer: 1 + + decoder_out: 256 + + loss_list: + contrastive: + is_active: true # indicates whether to use this loss + decoder: + _target_: nemo.collections.asr.modules.ConvASRDecoderReconstruction + feat_in: ${model.encoder.d_model} + feat_hidden: ${model.decoder_out} + # features in hidden layer of decoder + feat_out: ${model.decoder_out} + stride_layers: 0 + # if loss.combine_time_steps is less than the encoder stride, then a corresponding amount of stride_layers needs to + # be added to the decoder (here stride and combine_time_steps are both 4) + non_stride_layers: 0 + loss: + _target_: nemo.collections.asr.losses.ContrastiveLoss + in_dim: ${model.preprocessor.features} + proj_dim: ${model.decoder_out} + combine_time_steps: 8 # how many spectrogram time steps are used for one target/representation for contrastive task + quantized_targets: false # should quantizer or linear layer be used + # (quantizer is required to extract pseudo-labels for other losses) + codebook_size: 300 # number of vectors in the quantization codebook per group + num_groups: 2 # number of groups in the quantizer codebook + num_negatives: 100 # number of sampled negatives for each target + sample_from_same_utterance_only: true # should negatives be sampled only from the same utterance + sample_from_non_masked: false # should negatives be sampled from non-masked steps + + mlm: + is_active: false # indicates whether to use this loss + decoder: + _target_: nemo.collections.asr.modules.ConvASRDecoder + feat_in: ${model.encoder.d_model} + num_classes: 90000 + # set this to be equal to codebook_size^groups in the contrastive loss + loss: + _target_: nemo.collections.asr.losses.MLMLoss + combine_time_steps: 4 + targets_from_loss: "contrastive" + # since this loss requires targets, we can either get them from a manifest or from a quantized contrastive loss + loss_alpha: 1000. + # multiplier applied to this loss relative to others + transpose_encoded: false + # transposing input may be necessary depending on which layer is used as input to decoder + start_step: 0 + # determines what global step this loss starts being used at; + # this can be set to a higher number if your training is long enough, + # which may increase early training stability + output_from_layer: null + # if we wanted to use outputs from non-final encoder layer as input to this decoder, + # the layer name should be specified here + + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 25000 + warmup_ratio: null + min_lr: 1e-6 + +trainer: + devices: -1 # number of GPUs, -1 would use all available GPUs + num_nodes: 1 + max_epochs: 1000 + max_steps: -1 # computed at runtime if not set + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + log_every_n_steps: 10 # Interval of logging. + enable_progress_bar: True + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs + sync_batchnorm: true + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + benchmark: false # needs to be false for models with variable-length speech input as it slows down training + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + # in case of multiple validation sets, first one is used + monitor: "val_loss" + mode: "min" + save_top_k: 3 + + # you need to set these two to True to continue the training + resume_if_exists: false + resume_ignore_no_checkpoint: false + + # You may use this section to create a W&B logger + create_wandb_logger: false + wandb_logger_kwargs: + name: null + project: null diff --git a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml new file mode 100644 index 000000000000..d759a809ec37 --- /dev/null +++ b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml @@ -0,0 +1,38 @@ +name: &name "vad_inference_postprocessing" + +dataset: null # Path of json file of evaluation data. Audio files should have unique names +num_workers: 12 +sample_rate: 16000 +evaluate: False # whether to get AUROC and DERs, the manifest must contains groundtruth if enabled + +prepare_manifest: + auto_split: True # whether to automatically split manifest entry by split_duration to avoid potential CUDA out of memory issue. + split_duration: 400 # try smaller number if you still have CUDA memory issue + +vad: + model_path: "vad_multilingual_frame_marblenet" #.nemo local model path or pretrained model name or none + use_rttm: True # set True to output as RTTM format + parameters: # Parameters not tuned on large datasets, please use default parameters with caution + normalize_audio_db: null # set to non null value to normalize RMS DB of audio before preprocessing + window_length_in_sec: 0.0 # window length in sec for VAD context input, must be 0 for frame-VAD + shift_length_in_sec: 0.02 # frame-length in seconds for frame-VAD, must be 0.02 for the pretrained NeMo VAD model + smoothing: False # Deprecated for Frame-VAD. false or type of smoothing method (eg: median, mean) + overlap: 0.875 # Deprecated for Frame-VAD. overlap ratio for overlapped mean/median smoothing filter. If smoothing=False, ignore this value. + postprocessing: + onset: 0.3 # onset threshold for detecting the beginning and end of a speech + offset: 0.3 # offset threshold for detecting the end of a speech. + pad_onset: 0.2 # adding durations before each speech segment + pad_offset: 0.2 # adding durations after each speech segment + min_duration_on: 0.2 # threshold for short speech deletion + min_duration_off: 0.2 # threshold for short non-speech segment deletion + filter_speech_first: True + +prepared_manifest_vad_input: null # if not specify, it will automatically generated be "manifest_vad_input.json" +frame_out_dir: "vad_frame_outputs" +smoothing_out_dir: null # if not specify, it will automatically generated be frame_out_dir + "/overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap) +rttm_out_dir: null # if not specify, it will automatically be frame_out_dir + "/seg_output_" + key and value in postprocessing params +out_manifest_filepath: null # if not specify it will automatically be "manifest_vad_out.json" + + +# json manifest line example +# {"audio_filepath": "/path/to/audio_file.wav", "offset": 0, "duration": 1.23, "label": "infer", "text": "-"} diff --git a/examples/asr/speech_classification/README.md b/examples/asr/speech_classification/README.md index 86bba3dc65a4..bdd3aead8db1 100644 --- a/examples/asr/speech_classification/README.md +++ b/examples/asr/speech_classification/README.md @@ -1,25 +1,105 @@ # Speech Classification -This directory contains example scripts to train speech classification and voice activity detection models. +This directory contains example scripts to train speech classification and voice activity detection models. There are two types of VAD models: Frame-VAD and Segment-VAD. -# Model execution overview +## Frame-VAD -The training scripts in this directory execute in the following order. When preparing your own training-from-scratch / fine-tuning scripts, please follow this order for correct training/inference. +The frame-level VAD model predicts for each frame of the audio whether it has speech or not. For example, with the default config file (`../conf/marblenet/marblenet_3x2x64_20ms.yaml`), the model provides a probability for each frame of 20ms length. -```mermaid +### Training +```sh +python speech_to_label.py \ + --config-path= + --config-name= \ + model.train_ds.manifest_filepath="[,]" \ + model.validation_ds.manifest_filepath=["",""] \ + trainer.devices=-1 \ + trainer.accelerator="gpu" \ + strategy="ddp" \ + trainer.max_epochs=100 +``` + +The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration", "label"] are required. An example of a manifest file is: +``` +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "label": "0 1 0 0 1"} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000, "label": "0 0 0 1 1 1 1 0 0"} +``` +For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like "0 0 0 0 1 1 0 1 .... 0 1". +However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame. + + +### Inference +python frame_vad_infer.py \ + --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \ + dataset= + +The manifest json file should have the following format (each line is a Python dictionary): +``` +{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000} +{"audio_filepath": "/path/to/audio_file2.wav", "offset": 0, "duration": 10000} +``` + +#### Evaluation +If you want to evaluate tne model's AUROC and DER performance, you need to set `evaluate: True` in config yaml (e.g., `../conf/vad/frame_vad_infer_postprocess.yaml`), and also provide groundtruth in label strings: +``` +{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000, "label": "0 1 0 0 0 1 1 1 0"} +``` +or RTTM files: +``` +{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000, "rttm_filepath": "/path/to/rttm_file1.rttm"} +``` + + +## Segment-VAD + +Segment-level VAD predicts a single label for each segment of audio (e.g., 0.63s by default). + +### Training +```sh +python speech_to_label.py \ + --config-path= \ + --config-name= \ + model.train_ds.manifest_filepath="[,]" \ + model.validation_ds.manifest_filepath=["",""] \ + trainer.devices=-1 \ + trainer.accelerator="gpu" \ + strategy="ddp" \ + trainer.max_epochs=100 +``` + +The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration", "label"] are required. An example of a manifest file is: +``` +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 0.63, "label": "0"} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 0.63, "label": "1"} +``` + + +### Inference +```sh +python vad_infer.py \ + --config-path="../conf/vad" \ + --config-name="vad_inference_postprocessing.yaml" + dataset= +``` +The manifest json file should have the following format (each line is a Python dictionary): +``` +{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000} +{"audio_filepath": "/path/to/audio_file2.wav", "offset": 0, "duration": 10000} +``` + + +## Visualization + +To visualize the VAD outputs, you can use the `nemo.collections.asr.parts.utils.vad_utils.plot_sample_from_rttm` function, which takes an audio file and an RTTM file as input, and plots the audio waveform and the VAD labels. Since the VAD inference script will output a json manifest `manifest_vad_out.json` by default, you can create a Jupyter Notebook with the following script and fill in the paths using the output manifest: +```python +from nemo.collections.asr.parts.utils.vad_utils import plot_sample_from_rttm -graph TD - A[Hydra Overrides + Yaml Config] --> B{Config} - B --> |Init| C[Trainer] - C --> D[ExpManager] - B --> D[ExpManager] - C --> E[Model] - B --> |Init| E[Model] - E --> |Constructor| F(Change Labels) - F --> G(Setup Train + Validation + Test Data loaders) - G --> H(Setup Optimization) - H --> I[Maybe init from pretrained] - I --> J["trainer.fit(model)"] +plot_sample_from_rttm( + audio_file="/path/to/audio_file.wav", + rttm_file="/path/to/rttm_file.rttm", + offset=0.0, + duration=1000, + save_path="vad_pred.png" +) ``` -During restoration of the model, you may pass the Trainer to the restore_from / from_pretrained call, or set it after the model has been initialized by using `model.set_trainer(Trainer)`. \ No newline at end of file diff --git a/examples/asr/speech_classification/frame_vad_infer.py b/examples/asr/speech_classification/frame_vad_infer.py new file mode 100644 index 000000000000..f716eb45bb64 --- /dev/null +++ b/examples/asr/speech_classification/frame_vad_infer.py @@ -0,0 +1,199 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script peforms VAD on each 20ms frames of the input audio files. +Postprocessing is also performed to generate speech segments and store them as RTTM files. +Long audio files will be splitted into smaller chunks to avoid OOM issues, but the frames close +to the split points might have worse performance due to truncated context. + +## Usage: +python frame_vad_infer.py \ + --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \ + dataset= + +The manifest json file should have the following format (each line is a Python dictionary): +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000} + +If you want to evaluate tne model's AUROC and DER performance, you need to set `evaluate=True` in config yaml, +and also provide groundtruth in either RTTM files or label strings: +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "label": "0 1 0 0 0 1 1 1 0"} +or +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "rttm_filepath": "/path/to/rttm_file1.rttm"} + +""" + +import os +from pathlib import Path + +import torch + +from nemo.collections.asr.parts.utils.manifest_utils import write_manifest +from nemo.collections.asr.parts.utils.vad_utils import ( + frame_vad_eval_detection_error, + frame_vad_infer_load_manifest, + generate_overlap_vad_seq, + generate_vad_frame_pred, + generate_vad_segment_table, + init_frame_vad_model, + prepare_manifest, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +@hydra_runner(config_path="../conf/vad", config_name="frame_vad_infer_postprocess") +def main(cfg): + if not cfg.dataset: + raise ValueError("You must input the path of json file of evaluation data") + + # each line of dataset should be have different audio_filepath and unique name to simplify edge cases or conditions + logging.info(f"Loading manifest file {cfg.dataset}") + manifest_orig, key_labels_map, key_rttm_map = frame_vad_infer_load_manifest(cfg) + + # Prepare manifest for streaming VAD + manifest_vad_input = cfg.dataset + if cfg.prepare_manifest.auto_split: + logging.info("Split long audio file to avoid CUDA memory issue") + logging.debug("Try smaller split_duration if you still have CUDA memory issue") + config = { + 'input': manifest_vad_input, + 'window_length_in_sec': cfg.vad.parameters.window_length_in_sec, + 'split_duration': cfg.prepare_manifest.split_duration, + 'num_workers': cfg.num_workers, + 'prepared_manifest_vad_input': cfg.prepared_manifest_vad_input, + } + manifest_vad_input = prepare_manifest(config) + else: + logging.warning( + "If you encounter CUDA memory issue, try splitting manifest entry by split_duration to avoid it." + ) + + torch.set_grad_enabled(False) + vad_model = init_frame_vad_model(cfg.vad.model_path) + + # setup_test_data + vad_model.setup_test_data( + test_data_config={ + 'batch_size': 1, + 'sample_rate': 16000, + 'manifest_filepath': manifest_vad_input, + 'labels': ['infer'], + 'num_workers': cfg.num_workers, + 'shuffle': False, + 'normalize_audio_db': cfg.vad.parameters.normalize_audio_db, + } + ) + + vad_model = vad_model.to(device) + vad_model.eval() + + if not os.path.exists(cfg.frame_out_dir): + logging.info(f"Frame predictions do not exist at {cfg.frame_out_dir}, generating frame prediction.") + os.mkdir(cfg.frame_out_dir) + extract_frame_preds = True + else: + logging.info(f"Frame predictions already exist at {cfg.frame_out_dir}, skipping frame prediction generation.") + extract_frame_preds = False + + if extract_frame_preds: + logging.info("Generating frame-level prediction ") + pred_dir = generate_vad_frame_pred( + vad_model=vad_model, + window_length_in_sec=cfg.vad.parameters.window_length_in_sec, + shift_length_in_sec=cfg.vad.parameters.shift_length_in_sec, + manifest_vad_input=manifest_vad_input, + out_dir=cfg.frame_out_dir, + ) + logging.info(f"Finish generating VAD frame level prediction. You can find the prediction in {pred_dir}") + else: + pred_dir = cfg.frame_out_dir + + frame_length_in_sec = cfg.vad.parameters.shift_length_in_sec + + # overlap smoothing filter + if cfg.vad.parameters.smoothing: + # Generate predictions with overlapping input segments. Then a smoothing filter is applied to decide the label for a frame spanned by multiple segments. + # smoothing_method would be either in majority vote (median) or average (mean) + logging.info("Generating predictions with overlapping input segments") + smoothing_pred_dir = generate_overlap_vad_seq( + frame_pred_dir=pred_dir, + smoothing_method=cfg.vad.parameters.smoothing, + overlap=cfg.vad.parameters.overlap, + window_length_in_sec=cfg.vad.parameters.window_length_in_sec, + shift_length_in_sec=cfg.vad.parameters.shift_length_in_sec, + num_workers=cfg.num_workers, + out_dir=cfg.smoothing_out_dir, + ) + logging.info( + f"Finish generating predictions with overlapping input segments with smoothing_method={cfg.vad.parameters.smoothing} and overlap={cfg.vad.parameters.overlap}" + ) + pred_dir = smoothing_pred_dir + + # postprocessing and generate speech segments + logging.info("Converting frame level prediction to RTTM files.") + rttm_out_dir = generate_vad_segment_table( + vad_pred_dir=pred_dir, + postprocessing_params=cfg.vad.parameters.postprocessing, + frame_length_in_sec=frame_length_in_sec, + num_workers=cfg.num_workers, + use_rttm=cfg.vad.use_rttm, + out_dir=cfg.rttm_out_dir, + ) + logging.info( + f"Finish generating speech semgents table with postprocessing_params: {cfg.vad.parameters.postprocessing}" + ) + + logging.info("Writing VAD output to manifest") + key_pred_rttm_map = {} + manifest_new = [] + for entry in manifest_orig: + key = Path(entry['audio_filepath']).stem + entry['rttm_filepath'] = Path(os.path.join(rttm_out_dir, key + ".rttm")).absolute().as_posix() + if not Path(entry['rttm_filepath']).is_file(): + logging.warning(f"Not able to find {entry['rttm_filepath']} for {entry['audio_filepath']}") + entry['rttm_filepath'] = "" + manifest_new.append(entry) + key_pred_rttm_map[key] = entry['rttm_filepath'] + + if not cfg.out_manifest_filepath: + out_manifest_filepath = "manifest_vad_output.json" + else: + out_manifest_filepath = cfg.out_manifest_filepath + write_manifest(out_manifest_filepath, manifest_new) + logging.info(f"Finished writing VAD output to manifest: {out_manifest_filepath}") + + if cfg.get("evaluate", False): + logging.info("Evaluating VAD results") + auroc, report = frame_vad_eval_detection_error( + pred_dir=pred_dir, + key_labels_map=key_labels_map, + key_rttm_map=key_rttm_map, + key_pred_rttm_map=key_pred_rttm_map, + frame_length_in_sec=frame_length_in_sec, + ) + DetER = report.iloc[[-1]][('detection error rate', '%')].item() + FA = report.iloc[[-1]][('false alarm', '%')].item() + MISS = report.iloc[[-1]][('miss', '%')].item() + logging.info(f"AUROC: {auroc:.4f}") + logging.info(f"DetER={DetER:0.4f}, False Alarm={FA:0.4f}, Miss={MISS:0.4f}") + logging.info(f"with params: {cfg.vad.parameters.postprocessing}") + logging.info("Done!") + + +if __name__ == "__main__": + main() # pylint: disable=no-value-for-parameter diff --git a/examples/asr/speech_classification/speech_to_frame_label.py b/examples/asr/speech_classification/speech_to_frame_label.py new file mode 100644 index 000000000000..04fcbdd1b61c --- /dev/null +++ b/examples/asr/speech_classification/speech_to_frame_label.py @@ -0,0 +1,70 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The script trains a model that peforms classification on each frame of the input audio. +The default config (i.e., marblenet_3x2x64_20ms.yaml) outputs 20ms frames. + +## Training +```sh +python speech_to_label.py \ + --config-path= + --config-name= \ + model.train_ds.manifest_filepath="" \ + model.validation_ds.manifest_filepath=["",""] \ + trainer.devices=2 \ + trainer.accelerator="gpu" \ + strategy="ddp" \ + trainer.max_epochs=200 +``` + +The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration", "label"] are required. An example of a manifest file is: +``` +{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "label": "0 1 0 0 1"} +{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000, "label": "0 0 0 1 1 1 1 0 0"} +``` +For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like "0 0 0 0 1 1 0 1 .... 0 1". +However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame. + +""" + +import pytorch_lightning as pl +from omegaconf import OmegaConf +from nemo.collections.asr.models.classification_models import EncDecFrameClassificationModel + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="../conf/marblenet", config_name="marblenet_3x2x64_20ms") +def main(cfg): + logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') + + trainer = pl.Trainer(**cfg.trainer) + exp_manager(trainer, cfg.get("exp_manager", None)) + model = EncDecFrameClassificationModel(cfg=cfg.model, trainer=trainer) + + # Initialize the weights of the model from another model, if provided via config + model.maybe_init_from_pretrained_checkpoint(cfg) + + trainer.fit(model) + + if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: + if model.prepare_test(trainer): + trainer.test(model) + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/asr/speech_to_text_eval.py b/examples/asr/speech_to_text_eval.py index f4d2a66ffec0..452aa8202660 100644 --- a/examples/asr/speech_to_text_eval.py +++ b/examples/asr/speech_to_text_eval.py @@ -76,6 +76,11 @@ class EvaluationConfig(transcribe_speech.TranscriptionConfig): dataset_manifest: str = MISSING output_filename: Optional[str] = "evaluation_transcripts.json" + # decoder type: ctc or rnnt, can be used to switch between CTC and RNNT decoder for Joint RNNT/CTC models + decoder_type: Optional[str] = None + # att_context_size can be set for cache-aware streaming models with multiple look-aheads + att_context_size: Optional[list] = None + use_cer: bool = False tolerance: Optional[float] = None diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py index 4ed3d92a6305..f0a1f3667162 100644 --- a/examples/asr/transcribe_speech.py +++ b/examples/asr/transcribe_speech.py @@ -130,6 +130,8 @@ class TranscriptionConfig: # Set to True to output greedy timestamp information (only supported models) compute_timestamps: bool = False + # set to True if need to return full alignment information + preserve_alignment: bool = False # Set to True to output language ID information compute_langs: bool = False @@ -151,8 +153,10 @@ class TranscriptionConfig: # Decoding strategy for RNNT models rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig(fused_batch_size=-1) - # decoder type: ctc or rnnt, can be used to switch between CTC and RNNT decoder for Joint RNNT/CTC models + # decoder type: ctc or rnnt, can be used to switch between CTC and RNNT decoder for Hybrid RNNT/CTC models decoder_type: Optional[str] = None + # att_context_size can be set for cache-aware streaming models with multiple look-aheads + att_context_size: Optional[list] = None # Use this for model-specific changes before transcription model_change: ModelChangeConfig = ModelChangeConfig() @@ -230,6 +234,8 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis # we will adjust this flag if the model does not support it compute_timestamps = cfg.compute_timestamps compute_langs = cfg.compute_langs + # has to be True if timestamps are required + preserve_alignment = True if cfg.compute_timestamps else cfg.preserve_alignment # Check whether model and decoder type match if isinstance(asr_model, EncDecCTCModel): @@ -242,6 +248,9 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis if cfg.decoder_type and cfg.decoder_type != 'rnnt': raise ValueError('RNNT model only support rnnt decoding!') + if cfg.decoder_type and hasattr(asr_model.encoder, 'set_default_att_context_size'): + asr_model.encoder.set_default_att_context_size(cfg.att_context_size) + # Setup decoding strategy if hasattr(asr_model, 'change_decoding_strategy'): if cfg.decoder_type is not None: @@ -252,7 +261,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis decoding_cfg = cfg.rnnt_decoding if cfg.decoder_type == 'rnnt' else cfg.ctc_decoding decoding_cfg.compute_timestamps = cfg.compute_timestamps # both ctc and rnnt support it if 'preserve_alignments' in decoding_cfg: - decoding_cfg.preserve_alignments = cfg.compute_timestamps + decoding_cfg.preserve_alignments = preserve_alignment if 'compute_langs' in decoding_cfg: decoding_cfg.compute_langs = cfg.compute_langs if hasattr(asr_model, 'cur_decoder'): @@ -267,7 +276,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis cfg.rnnt_decoding.compute_langs = cfg.compute_langs if 'preserve_alignments' in cfg.rnnt_decoding: - cfg.rnnt_decoding.preserve_alignments = cfg.compute_timestamps + cfg.rnnt_decoding.preserve_alignments = preserve_alignment asr_model.change_decoding_strategy(cfg.rnnt_decoding) else: diff --git a/examples/asr/transcribe_speech_parallel.py b/examples/asr/transcribe_speech_parallel.py index 74019d7668f0..a57922f20d29 100644 --- a/examples/asr/transcribe_speech_parallel.py +++ b/examples/asr/transcribe_speech_parallel.py @@ -32,6 +32,15 @@ predict_ds.batch_size=16 \ output_path=/tmp/ +Example for Hybrid-CTC/RNNT models with non-tarred datasets: + +python transcribe_speech_parallel.py \ + model=stt_en_fastconformer_hybrid_large \ + decoder_type=ctc \ + predict_ds.manifest_filepath=/dataset/manifest_file.json \ + predict_ds.batch_size=16 \ + output_path=/tmp/ + Example for tarred datasets: python transcribe_speech_parallel.py \ @@ -73,7 +82,7 @@ from nemo.collections.asr.data.audio_to_text_dataset import ASRPredictionWriter from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig from nemo.collections.asr.metrics.wer import word_error_rate -from nemo.collections.asr.models import ASRModel +from nemo.collections.asr.models import ASRModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.models.configs.asr_models_config import ASRDatasetConfig from nemo.core.config import TrainerConfig, hydra_runner from nemo.utils import logging @@ -92,6 +101,12 @@ class ParallelTranscriptionConfig: # decoding strategy for RNNT models rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig() + + # decoder type: ctc or rnnt, can be used to switch between CTC and RNNT decoder for Hybrid RNNT/CTC models + decoder_type: Optional[str] = None + # att_context_size can be set for cache-aware streaming models with multiple look-aheads + att_context_size: Optional[list] = None + trainer: TrainerConfig = TrainerConfig(devices=-1, accelerator="gpu", strategy="ddp") @@ -137,6 +152,9 @@ def main(cfg: ParallelTranscriptionConfig): ) model = ASRModel.from_pretrained(model_name=cfg.model, map_location="cpu") + if isinstance(model, EncDecHybridRNNTCTCModel) and cfg.decoder_type is not None: + model.change_decoding_strategy(decoder_type=cfg.decoder_type) + trainer = ptl.Trainer(**cfg.trainer) cfg.predict_ds.return_sample_id = True diff --git a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml index cbc0562e2904..4e53ded4a453 100644 --- a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml @@ -50,6 +50,7 @@ model: # model architecture encoder_seq_length: 512 max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope', 'alibi', 'kerple' , 'xpos', 'sandwich'] xpos and sandwich are experimental. num_layers: 12 hidden_size: 768 ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. @@ -133,7 +134,7 @@ model: seq_length: ${model.encoder_seq_length} skip_warmup: True num_workers: 0 - dataloader_type: single # cyclic + dataloader_type: single # cyclic, LDDL reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token eod_mask_loss: False # Mask loss for the end of document tokens diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index d502f255bd8e..e588e94a6720 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -63,7 +63,7 @@ model: attention_dropout: 0.1 # Dropout probability for attention ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + apply_query_key_layer_scaling: False # scale Q * K^T by 1 / layer-number. normalization: 'layernorm' # Normalization layer to use. Options are 'layernorm', 'rmsnorm' layernorm_epsilon: 1e-5 do_layer_norm_weight_decay: False # True means weight decay on all params @@ -77,7 +77,7 @@ model: transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] openai_gelu: False # Use OpenAI's GELU instead of the default GeLU normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope'] + position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope', 'alibi', 'kerple' , 'xpos', 'sandwich'] xpos and sandwich are experimental. rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. attention_type: 'multihead' # Attention type. Options ['multihead'] share_embeddings_and_output_weights: True # Share embedding and output layer weights. @@ -166,7 +166,17 @@ model: fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - + ub_tp_comm_overlap: False + # Use userbuffer backend to overlap tensor-parallel communications with computes. + # This feature is only available with Transformer Engine and squence parallelism enabled and, currently, supports only GPT models. + ub_tp_comm_overlap_cfg: null + # A yaml file with userbuffer communicator configurations. This file should provide `method`, `dtype`, `num_sm`, `num_splits`, + # `cga_size`, `num_splits`, `set_sm_margin`, and `aggregate` for the communicators to use custom settings. + # If the configuration file is not provided a default setting is used for all communicators. + + ## Flash Attention + use_flash_attention: False # Use flash attention in self-attention module, this config does nothing when transformer_engine=True + data: # Path to data must be specified by the user. # Supports List, String and Dictionary diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_export.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_export.yaml new file mode 100644 index 000000000000..24d0c1548e69 --- /dev/null +++ b/examples/nlp/language_modeling/conf/megatron_gpt_export.yaml @@ -0,0 +1,25 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: bf16 # 16, 32, or bf16 + +model_type: gpt +tensor_model_parallel_size: 1 +pipeline_model_parallel_size: 1 +pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) +gpt_model_file: null # GPT nemo file path +onnx_model_file: null # ONNX file path +checkpoint_dir: null # Checkpoint directory +checkpoint_name: null # Checkpoint name +hparams_file: null # hparams filepath + +export_options: + runtime_check: False + verbose: False + onnx_opset: 17 + do_constant_folding: True + cache_support: False + device: 'cuda' + check_tolerance: 0.01 diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml index 6bd1be905a97..53d4e9b7e82b 100644 --- a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml @@ -35,4 +35,9 @@ share: False # whether create a public URL username: test # user name for web client password: test2 # password for web client web_port: 9889 # the port number of the web server -chat: False # use the chat interface \ No newline at end of file +chat: False # use the chat interface +chatbot_config: + value: False # whether to inject the value attributes + user: User + assistant: Assistant + system: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n" diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml new file mode 100644 index 000000000000..39b0c7ed2176 --- /dev/null +++ b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml @@ -0,0 +1,22 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + log_every_n_steps: 1 + limit_val_batches: 10 + limit_test_batches: 50 + max_steps: 100 # needed to setup dataloaders + max_epochs: null + replace_sampler_ddp: False + +tensor_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from +pipeline_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from +micro_batch_size: null # limited by GPU memory, defaults to pretrained model config +global_batch_size: null # will use more micro batches to reach global batch size, defaults to pretrained model config +virtual_pipeline_model_parallel_size: null +gpt_model_file: null # GPT nemo file path +checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training +checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null # model configuration file, only used for PTL checkpoint loading diff --git a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml index d3feb97ea9b4..e98ebae6da63 100644 --- a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml @@ -36,4 +36,5 @@ megatron_legacy: False # Whether to use the legacy Megatron model. This affects normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. num_moe_experts: 1 # When >1, FFNs are changed to MoE layers moe_frequency: 1 # every Nth ffn layer will be made MoE -moe_dropout: 0.0 # Dropout value for MoE layers \ No newline at end of file +moe_dropout: 0.0 # Dropout value for MoE layers +use_flash_attention: false # Use flash attention in self-attention module \ No newline at end of file diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py index 1f8fff9e92a0..3f1bd4a5bff3 100644 --- a/examples/nlp/language_modeling/megatron_bert_pretraining.py +++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py @@ -29,11 +29,12 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager -mp.set_start_method("spawn", force=True) - @hydra_runner(config_path="conf", config_name="megatron_bert_config") def main(cfg) -> None: + if cfg.model.data.dataloader_type != "LDDL": + mp.set_start_method("spawn", force=True) + logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index 2938a16098a1..72655089e0ee 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -199,7 +199,7 @@ def compute_tp_splits( # alias the global index to idx idx = global_idx - swiglu_activation = 'swiglu' in str(model_cfg.get('activation', '')).lower() + fast_glu_activation = str(model_cfg.get('activation', '')).lower() in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] if param.shape == partitions[0][idx].shape: split = [partitions[0][idx].data] * tp_size @@ -230,8 +230,8 @@ def compute_tp_splits( for i in range(tp_size): tp_qkv = torch.cat([tp_qkv_splits[item] for item in range(i, tp_size * 2, tp_size)]) split.append(tp_qkv) - elif 'dense_h_to_4h.weight' in param_name and swiglu_activation: - # For Megatron GPT model with Swiglu activation + elif 'dense_h_to_4h.weight' in param_name and fast_glu_activation: + # For Megatron GPT model with Fast Glu activation # Handle gated linear units # concat all the first halves ('W's) and all the second halves ('V's) w_split, k_split = torch.chunk(partitions[0][idx].data, 2, dim=0) @@ -261,7 +261,7 @@ def compute_tp_merge(idx, name, param, partitions_pp, model_cfg): Returns: The concatenated parameter for TP 1 PP 1. """ - swiglu_activation = 'swiglu' in str(model_cfg.get('activation', '')).lower() + fast_glu_activation = str(model_cfg.get('activation', '')).lower() in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] # Logic from original TP rank change if param.shape == partitions_pp[0][idx].shape: @@ -271,8 +271,8 @@ def compute_tp_merge(idx, name, param, partitions_pp, model_cfg): else: concated = torch.cat([partitions_pp[i][idx].data for i in range(len(partitions_pp))], dim=0) - # Logic for Swiglu activation - if 'dense_h_to_4h.weight' in name and swiglu_activation: + # Logic for Fast Glu activation + if 'dense_h_to_4h.weight' in name and fast_glu_activation: # concat all the first halves ('W's) and all the second halves ('V's) wk_splits = [] for tpr in range(len(partitions_pp)): diff --git a/examples/nlp/language_modeling/megatron_export.py b/examples/nlp/language_modeling/megatron_export.py new file mode 100644 index 000000000000..bf9157884bfc --- /dev/null +++ b/examples/nlp/language_modeling/megatron_export.py @@ -0,0 +1,175 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_bart_model import MegatronBARTModel +from nemo.collections.nlp.models.language_modeling.megatron_bert_model import MegatronBertModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_retrieval_model import MegatronRetrievalModel +from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model +from nemo.collections.nlp.models.machine_translation.megatron_nmt_model import MegatronNMTModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core import ModelPT +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + + +def get_model_class(cfg): + if cfg.model_type == 'gpt': + return MegatronGPTModel + elif cfg.model_type == 'bert': + return MegatronBertModel + elif cfg.model_type == 't5': + return MegatronT5Model + elif cfg.model_type == 'bart': + return MegatronBARTModel + elif cfg.model_type == 'nmt': + return MegatronNMTModel + elif cfg.model_type == 'retro': + return MegatronRetrievalModel + else: + raise ValueError("Invalid Model Type") + + +@hydra_runner(config_path="conf", config_name="megatron_gpt_export") +def nemo_export(cfg): + """Convert a nemo model into .onnx ONNX format.""" + nemo_in = None + if cfg.gpt_model_file: + nemo_in = cfg.gpt_model_file + elif cfg.checkpoint_dir: + nemo_in = os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name) + assert nemo_in is not None, "NeMo model not provided. Please provide the path to the .nemo or .ckpt file" + + onnx_out = cfg.onnx_model_file + + trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + logging.info("Restoring NeMo model from '{}'".format(nemo_in)) + try: + if cfg.gpt_model_file: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + + pretrained_cfg = ModelPT.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + OmegaConf.set_struct(pretrained_cfg, True) + with open_dict(pretrained_cfg): + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + if trainer.precision == "16": + pretrained_cfg.megatron_amp_O2 = False + model = ModelPT.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + save_restore_connector=save_restore_connector, + ) + elif cfg.checkpoint_dir: + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + model_cls = get_model_class(cfg) + model = model_cls.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + except Exception as e: + logging.error( + "Failed to restore model from NeMo file : {}. Please make sure you have the latest NeMo package installed with [all] dependencies.".format( + nemo_in + ) + ) + raise e + + logging.info("Model {} restored from '{}'".format(model.__class__.__name__, nemo_in)) + + # Export + check_trace = cfg.export_options.runtime_check + + try: + model.to(device=cfg.export_options.device).freeze() + model.eval() + model.export( + onnx_out, + onnx_opset_version=cfg.export_options.onnx_opset, + do_constant_folding=cfg.export_options.do_constant_folding, + dynamic_axes={ + 'input_ids': {0: "sequence", 1: "batch"}, + 'position_ids': {0: "sequence", 1: "batch"}, + 'logits': {0: "sequence", 1: "batch"}, + }, + check_trace=check_trace, + check_tolerance=cfg.export_options.check_tolerance, + verbose=cfg.export_options.verbose, + ) + except Exception as e: + logging.error( + "Export failed. Please make sure your NeMo model class ({}) has working export() and that you have the latest NeMo package installed with [all] dependencies.".format( + model.__class__ + ) + ) + raise e + + +if __name__ == '__main__': + nemo_export() diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index 14cdbf8a760c..2a6890e1a9b4 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -15,6 +15,7 @@ import asyncio import os import threading +from functools import partial import torch from omegaconf import OmegaConf, open_dict @@ -23,7 +24,6 @@ from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer from nemo.collections.nlp.modules.common.text_generation_utils import generate from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam @@ -154,6 +154,15 @@ def __getitem__(self, idx): return self.sentences[idx] +def remove_padded_prompts(response, nb_paddings): + result = {} + for k, v in response.items(): + if v != None and (type(v) is list or type(v) is torch.Tensor): + v = v[:-nb_paddings] + result[k] = v + return result + + @hydra_runner(config_path="conf", config_name="megatron_gpt_inference") def main(cfg) -> None: @@ -165,8 +174,14 @@ def main(cfg) -> None: or cfg.pipeline_model_parallel_size < 0 or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 ): + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file model_config = MegatronGPTModel.restore_from( - restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True, + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, ) with open_dict(cfg): @@ -254,32 +269,51 @@ def main(cfg) -> None: "compute_logprob": cfg.inference.compute_logprob, } + fp8_enabled = hasattr(model.cfg, "fp8") and (model.cfg.fp8 == True) + if fp8_enabled: + nb_paddings = 0 + while len(cfg.prompts) % 8 != 0: + cfg.prompts.append("") + nb_paddings += 1 + # First method of running text generation, call model.generate method response = model.generate( inputs=OmegaConf.to_container(cfg.prompts), length_params=length_params, sampling_params=sampling_params ) + if fp8_enabled: + response = remove_padded_prompts(response, nb_paddings) print("***************************") print(response) print("***************************") - # Second method of running text generation, call trainer.predict + # Second method of running text generation, call trainer.predict [recommended] + bs = 8 if fp8_enabled else 2 ds = RequestDataSet(OmegaConf.to_container(cfg.prompts)) - request_dl = DataLoader(dataset=ds, batch_size=2) + request_dl = DataLoader(dataset=ds, batch_size=bs) config = OmegaConf.to_container(cfg.inference) model.set_inference_config(config) response = trainer.predict(model, request_dl) + if fp8_enabled: + response[-1] = remove_padded_prompts(response[-1], nb_paddings) print("***************************") print(response) print("***************************") # Third method of running text generation, use inference server if cfg.server: + from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo + if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: if cfg.web_server: if cfg.chat: - web_ui = get_chatbot_demo + defaults = { + 'user': cfg.chatbot_config.user, + 'assistant': cfg.chatbot_config.assistant, + 'system': cfg.chatbot_config.system, + } + web_ui = partial(get_chatbot_demo, defaults=defaults, value=cfg.chatbot_config.value) else: web_ui = get_demo loop = asyncio.new_event_loop() diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py index 3a490b3532f1..fd10dacf6ee5 100644 --- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + import torch import torch.multiprocessing as mp from megatron.core import parallel_state @@ -19,11 +21,12 @@ from omegaconf.omegaconf import open_dict from pytorch_lightning.trainer.trainer import Trainer +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model import ( MegatronGPTPromptLearningModel, ) from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner mp.set_start_method("spawn", force=True) @@ -86,8 +89,14 @@ def main(cfg) -> None: or cfg.pipeline_model_parallel_size < 0 or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 ): - model_config = MegatronGPTPromptLearningModel.restore_from( - restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True, + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + model_config = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, ) with open_dict(cfg): @@ -114,6 +123,7 @@ def main(cfg) -> None: # Load prompt tuned model, virtual_prompt_model_file must be provided in config # Now load prompt learning model with frozen gpt model base + model = MegatronGPTPromptLearningModel.restore_from( restore_path=cfg.virtual_prompt_model_file, trainer=trainer, override_config_path=prompt_learning_cfg, ) diff --git a/examples/nlp/language_modeling/megatron_gpt_validate.py b/examples/nlp/language_modeling/megatron_gpt_validate.py new file mode 100644 index 000000000000..b5a61e627a14 --- /dev/null +++ b/examples/nlp/language_modeling/megatron_gpt_validate.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile + +from omegaconf import OmegaConf, open_dict +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import ( + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + +""" Example script showing how to run validation on a MegatronGPT model. + + Sample usage: + + From nemo model: + + python megatron_gpt_validate.py \ + trainer.devices=4 \ + trainer.num_nodes=1 \ + trainer.limit_val_batches=10 \ + trainer.max_steps=100 \ + tensor_model_parallel_size=1 \ + pipeline_model_parallel_size=4 \ + trainer.precision=bf16 \ + gpt_model_file=/path/to/megatron_gpt_tp_1_pp4.nemo + + from PTL checkpoint: + python megatron_gpt_validate.py \ + trainer.devices=4 \ + trainer.num_nodes=1 \ + trainer.limit_val_batches=10 \ + trainer.max_steps=100 \ + tensor_model_parallel_size=1 \ + pipeline_model_parallel_size=4 \ + virtual_pipeline_model_parallel_size=4 \ + trainer.precision=bf16 \ + checkpoint_dir='/path/to/experiment/checkpoints' \ + checkpoint_name='megatron_gpt--val_loss=7.78-step=100-consumed_samples=6336.0-last.ckpt' \ + hparams_file='/path/to/experiment/hparams.yaml + +""" + + +def modify_pretrained_cfg(pretrained_cfg, trainer, cfg): + with open_dict(pretrained_cfg): + OmegaConf.set_struct(pretrained_cfg, True) + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + if cfg.micro_batch_size is not None: + pretrained_cfg.micro_batch_size = cfg.micro_batch_size + if cfg.global_batch_size is not None: + pretrained_cfg.global_batch_size = cfg.global_batch_size + if trainer.precision == "16": + pretrained_cfg.megatron_amp_O2 = False + return pretrained_cfg + + +@hydra_runner(config_path="conf", config_name="megatron_gpt_validate_config") +def main(cfg) -> None: + + trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + if cfg.gpt_model_file: + logging.info(f"Restoring model from {cfg.gpt_model_file}") + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + + pretrained_cfg = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + pretrained_cfg = modify_pretrained_cfg(pretrained_cfg, trainer, cfg) + model = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + save_restore_connector=save_restore_connector, + map_location=f'cuda:{trainer.local_rank}', # map_location is needed for converted models + ) + elif cfg.checkpoint_dir: + logging.info( + f"Restoring model from checkpoint_dir: {cfg.checkpoint_dir} with checkpoint name: {cfg.checkpoint_name}" + ) + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + app_state.virtual_pipeline_model_parallel_size = cfg.virtual_pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size_=cfg.virtual_pipeline_model_parallel_size, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + pretrained_cfg = OmegaConf.load(cfg.hparams_file) + pretrained_cfg = modify_pretrained_cfg(pretrained_cfg.cfg, trainer, cfg) + with tempfile.NamedTemporaryFile(suffix='.yaml') as f: + OmegaConf.save(config=pretrained_cfg, f=f.name) + model = MegatronGPTModel.load_from_checkpoint( + checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name, + ) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + + logging.info("\n\n************** Model configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(model.cfg)}') + + trainer.validate(model=model) + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/megatron_t5_eval.py b/examples/nlp/language_modeling/megatron_t5_eval.py index 0282f9fb2913..0b6ea54b6b99 100644 --- a/examples/nlp/language_modeling/megatron_t5_eval.py +++ b/examples/nlp/language_modeling/megatron_t5_eval.py @@ -13,6 +13,7 @@ # limitations under the License. +import os from argparse import ArgumentParser import torch @@ -61,8 +62,15 @@ def main(): or args.pipeline_model_parallel_size < 0 or args.pipeline_model_parallel_split_rank < 0 ): + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(args.model_file): + save_restore_connector.model_extracted_dir = args.model_file + model_config = MegatronT5Model.restore_from( - restore_path=args.model_file, trainer=Trainer(strategy=NLPDDPStrategy()), return_config=True, + restore_path=args.model_file, + trainer=Trainer(strategy=NLPDDPStrategy()), + return_config=True, + save_restore_connector=save_restore_connector, ) args.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml index d7ebd69f31be..8c21117969ab 100755 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml @@ -70,6 +70,8 @@ model: peft: peft_scheme: "adapter" # can be either adapter,ia3, or ptuning restore_from_path: null + restore_from_ckpt_name: null + restore_from_hparams_path: null # Used for adapter peft training adapter_tuning: @@ -127,4 +129,5 @@ inference: repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - outfile_path: /home/adithyare/exp/foo.txt \ No newline at end of file + outfile_path: output.txt + compute_attention_mask: True \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml index 678851db3b01..f8a8e6b9dbc0 100644 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml @@ -29,7 +29,7 @@ exp_manager: monitor: validation_${model.data.validation_ds.metric.name} save_top_k: 2 mode: max - save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below, + save_nemo_on_train_end: False filename: 'megatron_gpt_sft--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' model_parallel_size: ${model.tensor_model_parallel_size} save_best_model: True diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml new file mode 100644 index 000000000000..008241d19389 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml @@ -0,0 +1,36 @@ +inference: + greedy: True # Whether or not to use sampling ; use greedy decoding otherwise + top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: 1.0 # sampling temperature + add_BOS: True # add the bos token at the begining of the prompt + tokens_to_generate: 30 # The minimum length of the sequence to be generated. + all_probs: False # whether return the log prob for all the tokens in vocab + repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. + min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. + compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +data: + test_ds: ??? + num_workers: 1 + global_batch_size: 4 + micro_batch_size: 4 + +tensor_model_parallel_size: -1 +pipeline_model_parallel_size: -1 +pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) +language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file +adapter_model_file: ??? # .nemo file saved during training (using megatron_t5_lora_tuning.py) +pred_file_path: null # save predictions to this file +checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training +checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null # model configuration file, only used for PTL checkpoint loading +batch_size: 8 diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml new file mode 100644 index 000000000000..6663df58c823 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml @@ -0,0 +1,99 @@ +name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_lora_dim${model.lora_tuning.kqv_adapter_dim} + +trainer: + devices: 1 + accelerator: gpu + num_nodes: 1 + precision: 16 + logger: False + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: 10 + max_steps: 1000 + log_every_n_steps: 1 + val_check_interval: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 0.0 + resume_from_checkpoint: null + benchmark: False + +exp_manager: + explicit_log_dir: null + exp_dir: nemo-lora-mt0-tr + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 1 + mode: min + save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below + filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" + model_parallel_size: ${model.tensor_model_parallel_size} + save_best_model: True + +model: + seed: 1234 + nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved + virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts + encoder_seq_length: 2048 + gradient_as_bucket_view: false + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + global_batch_size: 4 + micro_batch_size: 4 + validation_global_batch_size: ${model.global_batch_size} + validation_micro_batch_size: ${model.micro_batch_size} + validation_drop_last: False + report_validation_metric: False + validation_metric: accuracy + + restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with + language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required + existing_tasks: [] + new_tasks: ["taskname"] + + task_templates: + - taskname: "taskname" # The task name + prompt_template: "{prompt} {completion}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|> + total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time. + virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens + truncate_field: "prompt" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped. + answer_field: "completion" + + lora_tuning: + kqv_adapter_dim: 24 + kv_adapter_dim: 16 + q_adapter_dim: 8 + adapter_dropout: 0.1 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + + data: + train_ds: ??? + validation_ds: ??? + shuffle: True + num_workers: 0 + pin_memory: True + add_eos: True + + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.01 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 50 + constant_steps: 0 + min_lr: 0.0 + monitor: val_loss + reduce_on_plateau: false diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py index a4408b7b1c3d..8036a2be7319 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py @@ -13,6 +13,8 @@ # limitations under the License. +import os + import torch import torch.multiprocessing as mp from megatron.core import parallel_state @@ -21,7 +23,8 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model import MegatronGPTAdapterLearningModel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner mp.set_start_method("spawn", force=True) @@ -54,8 +57,14 @@ def main(cfg) -> None: or cfg.pipeline_model_parallel_size < 0 or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 ): - model_config = MegatronGPTAdapterLearningModel.restore_from( - restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True, + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + model_config = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, ) with open_dict(cfg): diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py index b780ff821c47..a30818f29fb3 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import os import torch import torch.multiprocessing as mp from megatron.core import parallel_state @@ -21,7 +21,8 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model import MegatronGPTInfusedAdapterModel -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner mp.set_start_method("spawn", force=True) @@ -54,8 +55,14 @@ def main(cfg) -> None: or cfg.pipeline_model_parallel_size < 0 or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 ): - model_config = MegatronGPTInfusedAdapterModel.restore_from( - restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True, + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + model_config = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, ) with open_dict(cfg): diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py index b45f5da69e89..e2294e4e2cce 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -74,7 +74,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f"\n{OmegaConf.to_yaml(cfg)}") assert cfg.model.restore_from_path is not None - megatron_amp_o2 = cfg.model.get("megatron_amp_O2", False) + megatron_amp_O2 = cfg.model.get("megatron_amp_O2", False) with_distributed_adam = False plugins = [] @@ -94,7 +94,7 @@ def main(cfg) -> None: if cfg.model.pipeline_model_parallel_size > 1 else True, # turn off the grad scale for pipeline parallel LM model ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device="cuda", scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device="cuda", scaler=scaler)) @@ -104,9 +104,17 @@ def main(cfg) -> None: trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) if cfg.model.peft.restore_from_path: - peft_model_cfg = MegatronGPTPEFTModel.restore_from( - restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, - ) + if cfg.model.peft.restore_from_path.endswith(".nemo"): + peft_model_cfg = MegatronGPTPEFTModel.restore_from( + restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, + ) + elif cfg.model.peft.restore_from_hparams_path: # not a .nemo model we expect a hparams.yaml file + peft_model_cfg = OmegaConf.to_container(OmegaConf.load(cfg.model.peft.restore_from_hparams_path).cfg) + peft_model_cfg = OmegaConf.create(peft_model_cfg) + # extract dict inside cfg key and convert it to DictConfig + # this allows interpolation to work the same way as config from the .restore_from method + else: + raise RuntimeError("This script requires a .nemo peft model or path to hparams.yaml (and a ckpt path).") else: peft_model_cfg = MegatronGPTSFTModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True, @@ -127,15 +135,27 @@ def main(cfg) -> None: cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate if cfg.model.peft.restore_from_path: - save_restore_connector = PEFTSaveRestoreConnector( - peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, - ) + if cfg.model.peft.restore_from_path.endswith(".nemo"): + save_restore_connector = PEFTSaveRestoreConnector( + peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, + ) + else: + # attempting to load a ckpt peft model. + if cfg.model.peft.restore_from_ckpt_name: + ckpt_name = cfg.model.peft.restore_from_ckpt_name + else: + ckpt_name = "model_weights.ckpt" + save_restore_connector = PEFTSaveRestoreConnector( + peft_model_nemo_path=None, + peft_model_ckpt_path=cfg.model.peft.restore_from_path, + peft_model_ckpt_name=ckpt_name, + ) else: save_restore_connector = NLPSaveRestoreConnector() if os.path.isdir(cfg.model.restore_from_path): save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - model = NLPModel.restore_from( + model = MegatronGPTSFTModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=peft_model_cfg, @@ -160,15 +180,17 @@ def main(cfg) -> None: for batch in response: batch_sentences = [s for s in batch['sentences']] batch_tokens = [s for s in batch['tokens']] - batch_logprob = [s.tolist() for s in batch['logprob']] - for s, t, l in zip(batch_sentences, batch_tokens, batch_logprob): - if cfg.inference.get("verbose", False): - d = { - 'sentence': s, - 'tokens_with_logprobs': ', '.join([f"{_t} {_l:.4f}" for _t, _l in zip(t, l)]), - } - f.write(json.dumps(d, sort_keys=True, indent=2) + '\n') - else: + if cfg.inference.compute_logprob: + batch_logprob = [s.tolist() for s in batch['logprob']] + for s, t, l in zip(batch_sentences, batch_tokens, batch_logprob): + if cfg.inference.get("verbose", False): + d = { + 'sentence': s, + 'tokens_with_logprobs': ', '.join([f"{_t} {_l:.4f}" for _t, _l in zip(t, l)]), + } + f.write(json.dumps(d, sort_keys=True, indent=2) + '\n') + else: + for s in batch_sentences: d = {'sentence': s} f.write(json.dumps(d) + '\n') print("predictions saved to {}".format(cfg.inference.outfile_path)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py index bf2705aa99e1..a6d5561a0ec2 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py @@ -170,7 +170,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -190,7 +190,7 @@ def main(cfg) -> None: if cfg.model.pipeline_model_parallel_size > 1 else True, # turn off the grad scale for pipeline parallel LM model ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index b2b8786df8c1..93b793c5475b 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -61,6 +61,8 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.0) gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0) gpt_cfg.ffn_dropout = cfg.model.ffn_dropout + sft_cls = MegatronGPTSFTModel + gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}" # This is needed when modifying a hparam file directly to load `.ckpt` files. # This is not needed to modify the cfg in `.nemo` files. @@ -130,7 +132,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name', 'fused_adam') == 'distributed_fused_adam' plugins = [] strategy = NLPDDPStrategy( @@ -146,7 +148,7 @@ def main(cfg) -> None: growth_interval=cfg.model.get('native_amp_growth_interval', 1000), hysteresis=cfg.model.get('hysteresis', 2), ) - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) @@ -167,6 +169,10 @@ def main(cfg) -> None: trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + if cfg.model.restore_from_path: save_restore_connector = NLPSaveRestoreConnector() if os.path.isdir(cfg.model.restore_from_path): @@ -177,6 +183,7 @@ def main(cfg) -> None: return_config=True, save_restore_connector=save_restore_connector, ) + gpt_cfg = _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False) model = load_from_nemo(MegatronGPTSFTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config) else: validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py new file mode 100644 index 000000000000..d9de94843071 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py @@ -0,0 +1,160 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.multiprocessing as mp +from megatron.core import parallel_state +from omegaconf import OmegaConf +from omegaconf.omegaconf import open_dict +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.core.config import hydra_runner +from nemo.utils.app_state import AppState + +mp.set_start_method("spawn", force=True) + +""" +This is the script to run an Adapter Tuned GPT Model for text generation. + +Usage: + Assume the model has TP=1, PP=1 in the following use cases. + a. run greedy inference using a base gpt nemo file, and an adapter nemo file: + python megatron_gpt_ia3_eval.py \ + gpt_model_file=PATH TO GPT MODEL NEMO FILE \ + adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \ + data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \ + pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS +""" + +if not torch.cuda.is_available(): + raise EnvironmentError("GPU is needed for the inference") + + +@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference") +def main(cfg) -> None: + + # trainer required for restoring model parallel models + trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) + + if ( + cfg.tensor_model_parallel_size < 0 + or cfg.pipeline_model_parallel_size < 0 + or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 + ): + model_config = MegatronT5LoraModel.restore_from( + restore_path=cfg.language_model_path, trainer=trainer, return_config=True, + ) + + with open_dict(cfg): + cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) + cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) + cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) + + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, + ) + + # Load an adapter model, must be provided in config + if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None: + # Update frozen GPT model path in case it has changed + adapter_tuning_cfg = MegatronT5LoraModel.restore_from( + cfg.adapter_model_file, trainer=trainer, return_config=True + ) + with open_dict(adapter_tuning_cfg): + adapter_tuning_cfg.language_model_path = cfg.language_model_path + adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path + adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size + adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size + + # Now load prompt learning model with frozen gpt model base + model = MegatronT5LoraModel.restore_from( + restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg + ) + + # Or load regular GPT model + else: + raise NotImplementedError( + "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path" + ) + + # check whether the DDP is initialized + if parallel_state.is_unitialized(): + + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + model.freeze() + + # Have to turn off activations_checkpoint_method for inference + try: + model.model.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + + try: + model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + + test_ds, test_dl = model.build_virtual_prompt_dataset( + dataset_paths=cfg.data.test_ds, + batch_size=cfg.data.global_batch_size, + for_train=False, + drop_last=False, + shuffle=False, + num_workers=cfg.data.num_workers, + pin_memory=True, + ) + + config = OmegaConf.to_container(cfg.inference) + model.set_inference_config(config) + response = trainer.predict(model, test_dl) + print("***************************") + if cfg.pred_file_path is not None: + with open(cfg.pred_file_path, "w", encoding="utf-8") as f: + for batch in response: + for inp, pred in zip(batch['input_text'], batch['preds_text']): + inp = ' '.join(inp.split('\n')) + pred = ' '.join(pred.split('\n')) + f.write(f'{inp} {pred}\n') + print("predictions saved to {}".format(cfg.pred_file_path)) + else: + print(response) + print("***************************") + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py new file mode 100644 index 000000000000..458887d97bef --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py @@ -0,0 +1,107 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment + +from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + +""" +This is the script to train an Adapter infused GPT Model for text generation. +A base GPT Model is required as a starting point. This script will then insert +Adapters into each Transformer layer and will train/update only these adapters +during training. The base GPT Model weights will remain frozen. + +During training this script will only save the newly trained Adapter weights +in checkpoints. At the end of training a .nemo file of Adapter weights will +be saved. + +Usage: + Assuming the base model is a 125m GPT Model, with TP=1, PP=1: + a. run a training run for a base gpt nemo file: + python megatron_gpt_adapter_tuning.py \ + "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", + "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", + model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" + name="NAME OF TRAINING RUN" + exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", + trainer.max_epochs=2 +""" + + +@hydra_runner(config_path="conf", config_name="megatron_t5_lora_tuning_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + # load existing or init new soft prompt GPT model + if cfg.model.get("restore_path", None): + model = MegatronT5LoraModel.restore_from( + cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector() + ) + else: + model = MegatronT5LoraModel(cfg.model, trainer=trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/nlp/spellchecking_asr_customization/README.md b/examples/nlp/spellchecking_asr_customization/README.md new file mode 100644 index 000000000000..9d2063eff181 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/README.md @@ -0,0 +1,32 @@ +# SpellMapper - spellchecking model for ASR Customization +Paper: https://arxiv.org/abs/2306.02317 +This model was partly inspired by Microsoft's paper https://arxiv.org/pdf/2203.00888.pdf. +The goal is to build a model that gets as input a single ASR hypothesis (text) and a vocabulary of custom words/phrases and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any. +Our model is non-autoregressive (NAR) based on transformer architecture (BERT with multiple separators). + +As initial data we use about 5 mln entities from [YAGO corpus](https://www.mpi-inf.mpg.de/departments/databases-and-information-systems/research/yago-naga/yago/downloads/). These entities are short phrases from Wikipedia headings. +In order to get misspelled predictions we feed these data to TTS model and then to ASR model. +Having a "parallel" corpus of "correct + misspelled" phrases, we use statistical machine translation techniques to create a dictionary of possible ngram mappings with their respective frequencies. +We create an auxiliary algorithm that takes as input a sentence (ASR hypothesis) and a large custom dictionary (e.g. 5000 phrases) and selects top 10 candidate phrases that are probably contained in this sentence in a misspelled way. +The task of our final neural model is to predict which fragments in the ASR hypothesis should be replaced by which of top-10 candidate phrases if any. + +The pipeline consists of multiple steps: + +1. Download or generate training data. + See `https://github.com/bene-ges/nemo_compatible/tree/main/scripts/nlp/en_spellmapper/dataset_preparation` + +2. [Optional] Convert training dataset to tarred files. + `convert_dataset_to_tarred.sh` + +3. Train spellchecking model. + `run_training.sh` + or + `run_training_tarred.sh` + +4. Run evaluation. + - [test_on_kensho.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh) + - [test_on_userlibri.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh) + - [test_on_spoken_wikipedia.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh) + +5. Run inference. + `python run_infer.sh` diff --git a/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py b/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py new file mode 100644 index 000000000000..c2f514f3e67e --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py @@ -0,0 +1,38 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script converts checkpoint .ckpt to .nemo file. + +This script uses the `examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml` +config file by default. The other option is to set another config file via command +line arguments by `--config-name=CONFIG_FILE_PATH'. +""" + +from omegaconf import DictConfig, OmegaConf + +from nemo.collections.nlp.models import SpellcheckingAsrCustomizationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging + + +@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config") +def main(cfg: DictConfig) -> None: + logging.debug(f'Config Params: {OmegaConf.to_yaml(cfg)}') + SpellcheckingAsrCustomizationModel.load_from_checkpoint(cfg.checkpoint_path).save_to(cfg.target_nemo_path) + + +if __name__ == "__main__": + main() diff --git a/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml b/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml new file mode 100644 index 000000000000..c98915cdfc6f --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml @@ -0,0 +1,97 @@ +name: &name spellchecking +lang: ??? # e.g. 'ru', 'en' + +# Pretrained Nemo Models +pretrained_model: null + +trainer: + devices: 1 # the number of gpus, 0 for CPU + num_nodes: 1 + max_epochs: 3 # the number of training epochs + enable_checkpointing: false # provided by exp_manager + logger: false # provided by exp_manager + accumulate_grad_batches: 1 # accumulates grads every k batches + gradient_clip_val: 0.0 + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + strategy: ddp + log_every_n_steps: 1 # Interval of logging. + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + +model: + do_training: true + label_map: ??? # path/.../label_map.txt + semiotic_classes: ??? # path/.../semiotic_classes.txt + max_sequence_len: 128 + lang: ${lang} + hidden_size: 768 + + optim: + name: adamw + lr: 3e-5 + weight_decay: 0.1 + + sched: + name: WarmupAnnealing + + # pytorch lightning args + monitor: val_loss + reduce_on_plateau: false + + # scheduler config override + warmup_ratio: 0.1 + last_epoch: -1 + + language_model: + pretrained_model_name: bert-base-uncased # For ru, try DeepPavlov/rubert-base-cased | For de or multilingual, try bert-base-multilingual-cased + lm_checkpoint: null + config_file: null # json file, precedence over config + config: null + + tokenizer: + tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece + vocab_file: null # path to vocab file + tokenizer_model: null # only used if tokenizer is sentencepiece + special_tokens: null + +exp_manager: + exp_dir: nemo_experiments # where to store logs and checkpoints + name: training # name of experiment + create_tensorboard_logger: True + create_checkpoint_callback: True + checkpoint_callback_params: + save_top_k: 3 + monitor: "val_loss" + mode: "min" + +tokenizer: + tokenizer_name: ${model.transformer} # or sentencepiece + vocab_file: null # path to vocab file + tokenizer_model: null # only used if tokenizer is sentencepiece + special_tokens: null + +# Data +data: + train_ds: + data_path: ??? # provide the full path to the file + batch_size: 8 + shuffle: true + num_workers: 3 + pin_memory: false + drop_last: false + + validation_ds: + data_path: ??? # provide the full path to the file. + batch_size: 8 + shuffle: false + num_workers: 3 + pin_memory: false + drop_last: false + + +# Inference +inference: + from_file: null # Path to the raw text, no labels required. Each sentence on a separate line + out_file: null # Path to the output file + batch_size: 16 # batch size for inference.from_file diff --git a/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh b/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh new file mode 100644 index 000000000000..d4265eb4beb6 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh @@ -0,0 +1,50 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Path to NeMo repository +NEMO_PATH=NeMo + +DATA_PATH="data_folder" + +## data_folder_example +## ├── tarred_data +## | └── (output) +## ├── config.json +##   ├── label_map.txt +##   ├── semiotic_classes.txt +## ├── test.tsv +## ├── 1.tsv +## ├── ... +## └── 200.tsv + +## Each of {1-200}.tsv input files are 110'000 examples subsets of all.tsv (except for validation part), +## generated by https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/dataset_preparation/build_training_data.sh +## Note that in this example we use 110'000 as input and only pack 100'000 of them to tar file. +## This is because some input examples, e.g. too long, can be skipped during preprocessing, and we want all tar files to contain fixed equal number of examples. + +for part in {1..200} +do + python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py \ + lang="en" \ + data.train_ds.data_path=${DATA_PATH}/${part}.tsv \ + data.validation_ds.data_path=${DATA_PATH}/test.tsv \ + model.max_sequence_len=256 \ + model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \ + model.language_model.config_file=${DATA_PATH}/config.json \ + model.label_map=${DATA_PATH}/label_map.txt \ + model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \ + +output_tar_file=${DATA_PATH}/tarred_data/part${part}.tar \ + +take_first_n_lines=100000 +done diff --git a/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py new file mode 100644 index 000000000000..68c55ff51a4f --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py @@ -0,0 +1,72 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script is used to create an index of custom vocabulary and save it to file. +See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline. +""" + +from argparse import ArgumentParser + +from nemo.collections.nlp.data.spellchecking_asr_customization.utils import get_index, load_ngram_mappings + +parser = ArgumentParser(description="Create an index of custom vocabulary and save it to file") + +parser.add_argument( + "--input_name", required=True, type=str, help="Path to input file with custom vocabulary (plain text)" +) +parser.add_argument( + "--ngram_mappings", required=True, type=str, help="Path to input file with n-gram mapping vocabulary" +) +parser.add_argument("--output_name", required=True, type=str, help="Path to output file with custom vocabulary index") +parser.add_argument("--min_log_prob", default=-4.0, type=float, help="Threshold on log probability") +parser.add_argument( + "--max_phrases_per_ngram", + default=500, + type=int, + help="Threshold on number of phrases that can be stored for one n-gram key in index. Keys with more phrases are discarded.", +) +parser.add_argument( + "--max_misspelled_freq", default=125000, type=int, help="Threshold on maximum frequency of misspelled n-gram" +) + +args = parser.parse_args() + +# Load custom vocabulary +custom_phrases = set() +with open(args.input_name, "r", encoding="utf-8") as f: + for line in f: + phrase = line.strip() + custom_phrases.add(" ".join(list(phrase.replace(" ", "_")))) +print("Size of customization vocabulary:", len(custom_phrases)) + +# Load n-gram mappings vocabulary +ngram_mapping_vocab, ban_ngram = load_ngram_mappings(args.ngram_mappings, max_misspelled_freq=args.max_misspelled_freq) + +# Generate index of custom phrases +phrases, ngram2phrases = get_index( + custom_phrases, + ngram_mapping_vocab, + ban_ngram, + min_log_prob=args.min_log_prob, + max_phrases_per_ngram=args.max_phrases_per_ngram, +) + +# Save index to file +with open(args.output_name, "w", encoding="utf-8") as out: + for ngram in ngram2phrases: + for phrase_id, begin, size, logprob in ngram2phrases[ngram]: + phrase = phrases[phrase_id] + out.write(ngram + "\t" + phrase + "\t" + str(begin) + "\t" + str(size) + "\t" + str(logprob) + "\n") diff --git a/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py b/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py new file mode 100644 index 000000000000..d0bdc2c9bd30 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py @@ -0,0 +1,99 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script is used to create a tarred dataset for SpellcheckingAsrCustomizationModel. + +This script uses the `/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml` +config file by default. The other option is to set another config file via command +line arguments by `--config-name=CONFIG_FILE_PATH'. Probably it is worth looking +at the example config file to see the list of parameters used for training. + +USAGE Example: +1. Obtain a processed dataset +2. Run: + python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py \ + lang=${LANG} \ + data.train_ds.data_path=${DATA_PATH}/train.tsv \ + model.language_model.pretrained_model_name=${LANGUAGE_MODEL} \ + model.label_map=${DATA_PATH}/label_map.txt \ + +output_tar_file=tarred/part1.tar \ + +take_first_n_lines=100000 + +""" +import pickle +import tarfile +from io import BytesIO + +from helpers import MODEL, instantiate_model_and_trainer +from omegaconf import DictConfig, OmegaConf + +from nemo.core.config import hydra_runner +from nemo.utils import logging + + +@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config") +def main(cfg: DictConfig) -> None: + logging.info(f'Config Params: {OmegaConf.to_yaml(cfg)}') + logging.info("Start creating tar file from " + cfg.data.train_ds.data_path + " ...") + _, model = instantiate_model_and_trainer( + cfg, MODEL, True + ) # instantiate model like for training because we may not have pretrained model + dataset = model._train_dl.dataset + archive = tarfile.open(cfg.output_tar_file, mode="w") + max_lines = int(cfg.take_first_n_lines) + for i in range(len(dataset)): + if i >= max_lines: + logging.info("Reached " + str(max_lines) + " examples") + break + ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) = dataset[i] + + # do not store masks as they are just arrays of 1 + content = { + "input_ids": input_ids, + "input_mask": input_mask, + "segment_ids": segment_ids, + "input_ids_for_subwords": input_ids_for_subwords, + "input_mask_for_subwords": input_mask_for_subwords, + "segment_ids_for_subwords": segment_ids_for_subwords, + "character_pos_to_subword_pos": character_pos_to_subword_pos, + "labels_mask": labels_mask, + "labels": labels, + "spans": spans, + } + b = BytesIO() + pickle.dump(content, b) + b.seek(0) + tarinfo = tarfile.TarInfo(name="example_" + str(i) + ".pkl") + tarinfo.size = b.getbuffer().nbytes + archive.addfile(tarinfo=tarinfo, fileobj=b) + + archive.close() + logging.info("Tar file " + cfg.output_tar_file + " created!") + + +if __name__ == '__main__': + main() diff --git a/examples/nlp/spellchecking_asr_customization/helpers.py b/examples/nlp/spellchecking_asr_customization/helpers.py new file mode 100644 index 000000000000..2db11b0e7d96 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/helpers.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +from typing import Tuple + +import pytorch_lightning as pl +from omegaconf import DictConfig + +from nemo.collections.nlp.models import SpellcheckingAsrCustomizationModel +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import logging + +__all__ = ["MODEL", "MODEL_NAMES", "instantiate_model_and_trainer"] + +MODEL = "spellchecking" +MODEL_NAMES = [MODEL] + + +def instantiate_model_and_trainer( + cfg: DictConfig, model_name: str, do_training: bool +) -> Tuple[pl.Trainer, SpellcheckingAsrCustomizationModel]: + """ Function for instantiating a model and a trainer + Args: + cfg: The config used to instantiate the model and the trainer. + model_name: A str indicates the model direction, currently only 'itn'. + do_training: A boolean flag indicates whether the model will be trained or evaluated. + + Returns: + trainer: A PyTorch Lightning trainer + model: A SpellcheckingAsrCustomizationModel + """ + + if model_name not in MODEL_NAMES: + raise ValueError(f"{model_name} is unknown model type") + + # Get configs for the corresponding models + trainer_cfg = cfg.get("trainer") + model_cfg = cfg.get("model") + pretrained_cfg = cfg.get("pretrained_model", None) + trainer = pl.Trainer(**trainer_cfg) + if not pretrained_cfg: + logging.info(f"Initializing {model_name} model") + if model_name == MODEL: + model = SpellcheckingAsrCustomizationModel(model_cfg, trainer=trainer) + else: + raise ValueError(f"{model_name} is unknown model type") + elif os.path.exists(pretrained_cfg): + logging.info(f"Restoring pretrained {model_name} model from {pretrained_cfg}") + save_restore_connector = NLPSaveRestoreConnector() + model = SpellcheckingAsrCustomizationModel.restore_from( + pretrained_cfg, save_restore_connector=save_restore_connector + ) + else: + logging.info(f"Loading pretrained model {pretrained_cfg}") + if model_name == MODEL: + if pretrained_cfg not in SpellcheckingAsrCustomizationModel.get_available_model_names(): + raise ( + ValueError( + f"{pretrained_cfg} not in the list of available Tagger models." + f"Select from {SpellcheckingAsrCustomizationModel.list_available_models()}" + ) + ) + model = SpellcheckingAsrCustomizationModel.from_pretrained(pretrained_cfg) + else: + raise ValueError(f"{model_name} is unknown model type") + + # Setup train and validation data + if do_training: + model.setup_training_data(train_data_config=cfg.data.train_ds) + model.setup_validation_data(val_data_config=cfg.data.validation_ds) + + logging.info(f"Model {model_name} -- Device {model.device}") + return trainer, model diff --git a/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py b/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py new file mode 100644 index 000000000000..871d5e5c0c0c --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script is used to postprocess SpellMapper results and generate an updated nemo ASR manifest. +See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline. +""" + +from argparse import ArgumentParser + +from nemo.collections.nlp.data.spellchecking_asr_customization.utils import ( + update_manifest_with_spellmapper_corrections, +) + +parser = ArgumentParser(description="Postprocess SpellMapper results and generate an updated nemo ASR manifest") + +parser.add_argument("--input_manifest", required=True, type=str, help="Path to input nemo ASR manifest") +parser.add_argument( + "--field_name", default="pred_text", type=str, help="Name of json field with original ASR hypothesis text" +) +parser.add_argument( + "--short2full_name", + required=True, + type=str, + help="Path to input file with correspondence between sentence fragments and full sentences", +) +parser.add_argument( + "--spellmapper_results", required=True, type=str, help="Path to input file with SpellMapper inference results" +) +parser.add_argument("--output_manifest", required=True, type=str, help="Path to output nemo ASR manifest") +parser.add_argument("--min_prob", default=0.5, type=float, help="Threshold on replacement probability") +parser.add_argument( + "--use_dp", + action="store_true", + help="Whether to use additional replacement filtering by using dynamic programming", +) +parser.add_argument( + "--replace_hyphen_to_space", + action="store_true", + help="Whether to use space instead of hyphen in replaced fragments", +) +parser.add_argument( + "--ngram_mappings", type=str, required=True, help="File with ngram mappings, only needed if use_dp=true" +) +parser.add_argument( + "--min_dp_score_per_symbol", + default=-1.5, + type=float, + help="Minimum dynamic programming sum score averaged by hypothesis length", +) + +args = parser.parse_args() + +update_manifest_with_spellmapper_corrections( + input_manifest_name=args.input_manifest, + short2full_name=args.short2full_name, + output_manifest_name=args.output_manifest, + spellmapper_results_name=args.spellmapper_results, + min_prob=args.min_prob, + replace_hyphen_to_space=args.replace_hyphen_to_space, + field_name=args.field_name, + use_dp=args.use_dp, + ngram_mappings=args.ngram_mappings, + min_dp_score_per_symbol=args.min_dp_score_per_symbol, +) + +print("Resulting manifest saved to: ", args.output_manifest) diff --git a/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py b/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py new file mode 100644 index 000000000000..6fd5e524390a --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py @@ -0,0 +1,129 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script contains an example on how to prepare input for SpellMapper inference from a nemo ASR manifest. +It splits sentences to shorter fragments, runs candidate retrieval and generates input in the required format. +It produces two output files: + 1. File with correspondence between sentence fragments and full sentences. + 2. File that will serve as input for SpellMapper inference. + +See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline. +""" + +from argparse import ArgumentParser + +from nemo.collections.nlp.data.spellchecking_asr_customization.utils import ( + extract_and_split_text_from_manifest, + get_candidates, + load_index, +) + +parser = ArgumentParser(description="Prepare input for SpellMapper inference from a nemo ASR manifest") +parser.add_argument("--manifest", required=True, type=str, help="Path to input manifest file") +parser.add_argument( + "--custom_vocab_index", required=True, type=str, help="Path to input file with custom vocabulary index" +) +parser.add_argument( + "--big_sample", + required=True, + type=str, + help="Path to input file with big sample of phrases to sample dummy candidates if there less than 10 are found by retrieval", +) +parser.add_argument( + "--short2full_name", + required=True, + type=str, + help="Path to output file with correspondence between sentence fragments and full sentences", +) +parser.add_argument( + "--output_name", + required=True, + type=str, + help="Path to output file that will serve as input for SpellMapper inference", +) +parser.add_argument("--field_name", default="pred_text", type=str, help="Name of json field with ASR hypothesis text") +parser.add_argument("--len_in_words", default=16, type=int, help="Maximum fragment length in words") +parser.add_argument( + "--step_in_words", + default=8, + type=int, + help="Step in words for moving to next fragment. If less than len_in_words, fragments will intersect", +) + +args = parser.parse_args() + +# Split ASR hypotheses to shorter fragments, because SpellMapper can't handle arbitrarily long sequences. +# The correspondence between short and original fragments is saved to a file and will be used at post-processing. +extract_and_split_text_from_manifest( + input_name=args.manifest, + output_name=args.short2full_name, + field_name=args.field_name, + len_in_words=args.len_in_words, + step_in_words=args.step_in_words, +) + +# Load index of custom vocabulary from file +phrases, ngram2phrases = load_index(args.custom_vocab_index) + +# Load big sample of phrases to sample dummy candidates if there less than 10 are found by retrieval +big_sample_of_phrases = set() +with open(args.big_sample, "r", encoding="utf-8") as f: + for line in f: + phrase, freq = line.strip().split("\t") + if int(freq) > 50: # do not want to use frequent phrases as dummy candidates + continue + if len(phrase) < 6 or len(phrase) > 15: # do not want to use too short or too long phrases as dummy candidates + continue + big_sample_of_phrases.add(phrase) + +big_sample_of_phrases = list(big_sample_of_phrases) + +# Generate input for SpellMapper inference +out = open(args.output_name, "w", encoding="utf-8") +with open(args.short2full_name, "r", encoding="utf-8") as f: + for line in f: + short_sent, _ = line.strip().split("\t") + sent = "_".join(short_sent.split()) + letters = list(sent) + candidates = get_candidates(ngram2phrases, phrases, letters, big_sample_of_phrases) + if len(candidates) == 0: + continue + if len(candidates) != 10: + raise ValueError("expect 10 candidates, got: ", len(candidates)) + + # We add two columns with targets and span_info. + # They have same format as during training, but start and end positions are APPROXIMATE, they will be adjusted when constructing BertExample. + targets = [] + span_info = [] + for idx, c in enumerate(candidates): + if c[1] == -1: + continue + targets.append(str(idx + 1)) # targets are 1-based + start = c[1] + # ensure that end is not outside sentence length (it can happen because c[2] is candidate length used as approximation) + end = min(c[1] + c[2], len(letters)) + span_info.append("CUSTOM " + str(start) + " " + str(end)) + out.write( + " ".join(letters) + + "\t" + + ";".join([x[0] for x in candidates]) + + "\t" + + " ".join(targets) + + "\t" + + ";".join(span_info) + + "\n" + ) +out.close() diff --git a/examples/nlp/spellchecking_asr_customization/run_infer.sh b/examples/nlp/spellchecking_asr_customization/run_infer.sh new file mode 100644 index 000000000000..b4bbdc4da375 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/run_infer.sh @@ -0,0 +1,99 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## RUN INFERENCE ON NEMO MANIFEST AND CUSTOM VOCABULARY + +## Path to NeMo repository +NEMO_PATH=NeMo + +## Download model repo from Hugging Face (if clone doesn't work, run "git lfs install" and try again) +git clone https://huggingface.co/bene-ges/spellmapper_asr_customization_en +## Download repo with test data +git clone https://huggingface.co/datasets/bene-ges/spellmapper_en_evaluation + +## Files in model repo +PRETRAINED_MODEL=spellmapper_asr_customization_en/training_10m_5ep.nemo +NGRAM_MAPPINGS=spellmapper_asr_customization_en/replacement_vocab_filt.txt +BIG_SAMPLE=spellmapper_asr_customization_en/big_sample.txt + +## Override these two files if you want to test on your own data +## File with input nemo ASR manifest +INPUT_MANIFEST=spellmapper_en_evaluation/medical_manifest_ctc.json +## File containing custom words and phrases (plain text) +CUSTOM_VOCAB=spellmapper_en_evaluation/medical_custom_vocab.txt + +## Other files will be created +## File with index of custom vocabulary +INDEX="index.txt" +## File with short fragments and corresponding original sentences +SHORT2FULL="short2full.txt" +## File with input for SpellMapper inference +SPELLMAPPER_INPUT="spellmapper_input.txt" +## File with output of SpellMapper inference +SPELLMAPPER_OUTPUT="spellmapper_output.txt" +## File with output nemo ASR manifest +OUTPUT_MANIFEST="out_manifest.json" + + +# Create index of custom vocabulary +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py \ + --input_name ${CUSTOM_VOCAB} \ + --ngram_mappings ${NGRAM_MAPPINGS} \ + --output_name ${INDEX} \ + --min_log_prob -4.0 \ + --max_phrases_per_ngram 600 + +# Prepare input for SpellMapper inference +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py \ + --manifest ${INPUT_MANIFEST} \ + --custom_vocab_index ${INDEX} \ + --big_sample ${BIG_SAMPLE} \ + --short2full_name ${SHORT2FULL} \ + --output_name ${SPELLMAPPER_INPUT} \ + --field_name "pred_text" \ + --len_in_words 16 \ + --step_in_words 8 + +# Run SpellMapper inference +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \ + pretrained_model=${PRETRAINED_MODEL} \ + model.max_sequence_len=512 \ + inference.from_file=${SPELLMAPPER_INPUT} \ + inference.out_file=${SPELLMAPPER_OUTPUT} \ + inference.batch_size=16 \ + lang=en + +# Postprocess and create output corrected manifest +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \ + --input_manifest ${INPUT_MANIFEST} \ + --short2full_name ${SHORT2FULL} \ + --output_manifest ${OUTPUT_MANIFEST} \ + --spellmapper_result ${SPELLMAPPER_OUTPUT} \ + --replace_hyphen_to_space \ + --field_name "pred_text" \ + --use_dp \ + --ngram_mappings ${NGRAM_MAPPINGS} \ + --min_dp_score_per_symbol -1.5 + +# Check WER of initial manifest +python ${NEMO_PATH}/examples/asr/speech_to_text_eval.py \ + dataset_manifest=${INPUT_MANIFEST} \ + use_cer=False \ + only_score_manifest=True + +# Check WER of corrected manifest +python ${NEMO_PATH}/examples/asr/speech_to_text_eval.py \ + dataset_manifest=${OUTPUT_MANIFEST} \ + use_cer=False \ + only_score_manifest=True diff --git a/examples/nlp/spellchecking_asr_customization/run_training.sh b/examples/nlp/spellchecking_asr_customization/run_training.sh new file mode 100644 index 000000000000..85dddbb2a038 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/run_training.sh @@ -0,0 +1,56 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## TRAIN WITH NON-TARRED DATA + +# Path to NeMo repository +NEMO_PATH=NeMo + +## Download repo with training data (very small example) +## If clone doesn't work, run "git lfs install" and try again +git clone https://huggingface.co/datasets/bene-ges/spellmapper_en_train_micro + +DATA_PATH=spellmapper_en_train_micro + +## Example of all files needed to run training with non-tarred data: +## spellmapper_en_train_micro +## ├── config.json +##   ├── label_map.txt +##   ├── semiotic_classes.txt +## ├── test.tsv +## └── train.tsv + +## To generate files config.json, label_map.txt, semiotic_classes.txt - run generate_configs.sh +## Files "train.tsv" and "test.tsv" contain training examples. +## For data preparation see https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/dataset_preparation/build_training_data.sh + +## Note that training with non-tarred data only works on single gpu. It makes sense if you use 1-2 million examples or less. + +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py \ + lang="en" \ + data.validation_ds.data_path=${DATA_PATH}/test.tsv \ + data.train_ds.data_path=${DATA_PATH}/train.tsv \ + data.train_ds.batch_size=32 \ + data.train_ds.num_workers=8 \ + model.max_sequence_len=512 \ + model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \ + model.language_model.config_file=${DATA_PATH}/config.json \ + model.label_map=${DATA_PATH}/label_map.txt \ + model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \ + model.optim.lr=3e-5 \ + trainer.devices=[1] \ + trainer.num_nodes=1 \ + trainer.accelerator=gpu \ + trainer.strategy=ddp \ + trainer.max_epochs=5 diff --git a/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh b/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh new file mode 100644 index 000000000000..655c3e23e610 --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh @@ -0,0 +1,63 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## TRAIN WITH TARRED DATA + +# Path to NeMo repository +NEMO_PATH=NeMo + +DATA_PATH=data_folder + +## data_folder_example +## ├── train_tarred +## | ├── part1.tar +## | ├── ... +## | └── part200.tar +## ├── config.json +##   ├── label_map.txt +##   ├── semiotic_classes.txt +## └── test.tsv +## To generate files config.json, label_map.txt, semiotic_classes.txt, run generate_configs.sh +## To prepare data, see ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/dataset_preparation/build_training_data.sh +## To convert data to tarred format, split all.tsv to pieces of 110'000 examples (except for validation part) and use ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/dataset_preparation/convert_data_to_tarred.sh +## To run training with tarred data, use ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh + +## ATTENTION: How to calculate model.optim.sched.max_steps: +## Suppose, you have 2'000'000 training examples, and want to train for 5 epochs on 4 gpus with batch size 32. +## 5 (epochs) * 32 (bs) * 4 (gpus) +## 1 step consumes 128 examples (32(bs) * 4(gpus)) +## 1 epoch makes 2000000/128=15625 steps (updates) +## 5 epochs make 5*15625=78125 steps + +python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py \ + lang="en" \ + data.validation_ds.data_path=${DATA_PATH}/test.tsv \ + data.train_ds.data_path=${DATA_PATH}/train_tarred/part_OP_1..100_CL_.tar \ + data.train_ds.batch_size=32 \ + data.train_ds.num_workers=16 \ + +data.train_ds.use_tarred_dataset=true \ + data.train_ds.shuffle=false \ + data.validation_ds.batch_size=16 \ + model.max_sequence_len=512 \ + model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \ + model.language_model.config_file=${DATA_PATH}/config.json \ + model.label_map=${DATA_PATH}/label_map.txt \ + model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \ + model.optim.sched.name=CosineAnnealing \ + +model.optim.sched.max_steps=195313 \ + trainer.devices=8 \ + trainer.num_nodes=1 \ + trainer.accelerator=gpu \ + trainer.strategy=ddp \ + trainer.max_epochs=5 diff --git a/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py new file mode 100644 index 000000000000..593264f14a5d --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py @@ -0,0 +1,123 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script contains an example on how to run inference with the SpellcheckingAsrCustomizationModel. + +An input line should consist of 4 tab-separated columns: + 1. text of ASR-hypothesis + 2. texts of 10 candidates separated by semicolon + 3. 1-based ids of non-dummy candidates + 4. approximate start/end coordinates of non-dummy candidates (correspond to ids in third column) + +Example input (in one line): + t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x + h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d + 1 2 6 7 8 9 10 + CUSTOM 6 23;CUSTOM 4 10;CUSTOM 4 15;CUSTOM 56 62;CUSTOM 5 19;CUSTOM 28 31;CUSTOM 39 48 + +Each line in SpellMapper output is tab-separated and consists of 4 columns: + 1. ASR-hypothesis (same as in input) + 2. 10 candidates separated with semicolon (same as in input) + 3. fragment predictions, separated with semicolon, each prediction is a tuple (start, end, candidate_id, probability) + 4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes) + +Example output (in one line): + t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x + h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d + 56 62 7 0.99998;4 20 8 0.95181;12 20 8 0.44829;4 17 8 0.99464;12 17 8 0.97645 + 8 8 8 0 8 8 8 8 8 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7 + + +USAGE Example: +1. Train a model, or use a pretrained checkpoint. +2. Run on a single file: + python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \ + pretrained_model=${PRETRAINED_NEMO_CHECKPOINT} \ + model.max_sequence_len=512 \ + inference.from_file=input.txt \ + inference.out_file=output.txt \ + inference.batch_size=16 \ + lang=en +or on multiple files: + python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \ + pretrained_model=${PRETRAINED_NEMO_CHECKPOINT} \ + model.max_sequence_len=512 \ + +inference.from_filelist=filelist.txt \ + +inference.output_folder=output_folder \ + inference.batch_size=16 \ + lang=en + +This script uses the `/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml` +config file by default. The other option is to set another config file via command +line arguments by `--config-name=CONFIG_FILE_PATH'. +""" + + +import os + +from helpers import MODEL, instantiate_model_and_trainer +from omegaconf import DictConfig, OmegaConf + +from nemo.core.config import hydra_runner +from nemo.utils import logging + + +@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config") +def main(cfg: DictConfig) -> None: + logging.debug(f'Config Params: {OmegaConf.to_yaml(cfg)}') + + if cfg.pretrained_model is None: + raise ValueError("A pre-trained model should be provided.") + _, model = instantiate_model_and_trainer(cfg, MODEL, False) + + if cfg.model.max_sequence_len != model.max_sequence_len: + model.max_sequence_len = cfg.model.max_sequence_len + model.builder._max_seq_length = cfg.model.max_sequence_len + input_filenames = [] + output_filenames = [] + + if "from_filelist" in cfg.inference and "output_folder" in cfg.inference: + filelist_file = cfg.inference.from_filelist + output_folder = cfg.inference.output_folder + with open(filelist_file, "r", encoding="utf-8") as f: + for line in f: + path = line.strip() + input_filenames.append(path) + folder, name = os.path.split(path) + output_filenames.append(os.path.join(output_folder, name)) + else: + text_file = cfg.inference.from_file + logging.info(f"Running inference on {text_file}...") + if not os.path.exists(text_file): + raise ValueError(f"{text_file} not found.") + input_filenames.append(text_file) + output_filenames.append(cfg.inference.out_file) + + dataloader_cfg = { + "batch_size": cfg.inference.get("batch_size", 8), + "num_workers": cfg.inference.get("num_workers", 4), + "pin_memory": cfg.inference.get("num_workers", False), + } + for input_filename, output_filename in zip(input_filenames, output_filenames): + if not os.path.exists(input_filename): + logging.info(f"Skip non-existing {input_filename}.") + continue + model.infer(dataloader_cfg, input_filename, output_filename) + logging.info(f"Predictions saved to {output_filename}.") + + +if __name__ == "__main__": + main() diff --git a/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py new file mode 100644 index 000000000000..7ea9314d196d --- /dev/null +++ b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py @@ -0,0 +1,66 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This script contains an example on how to train SpellMapper (SpellcheckingAsrCustomizationModel). +It uses the `examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml` +config file by default. The other option is to set another config file via command +line arguments by `--config-name=CONFIG_FILE_PATH'. Probably it is worth looking +at the example config file to see the list of parameters used for training. + +USAGE Example: + See `examples/nlp/spellchecking_asr_customization/run_training.sh` for training on non-tarred data. + and + `examples/nlp/spellchecking_asr_customization/run_training_tarred.sh` for training on tarred data. + +One (non-tarred) training example should consist of 4 tab-separated columns: + 1. text of ASR-hypothesis + 2. texts of 10 candidates separated by semicolon + 3. 1-based ids of correct candidates, or 0 if none + 4. start/end coordinates of correct candidates (correspond to ids in third column) +Example (in one line): + a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o + d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y + 1 3 + CUSTOM 12 23;CUSTOM 28 41 +""" + +from helpers import MODEL, instantiate_model_and_trainer +from omegaconf import DictConfig, OmegaConf + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config") +def main(cfg: DictConfig) -> None: + logging.info(f'Config Params: {OmegaConf.to_yaml(cfg)}') + + # Train the model + if cfg.model.do_training: + logging.info( + "================================================================================================" + ) + logging.info('Start training...') + trainer, model = instantiate_model_and_trainer(cfg, MODEL, True) + spellchecking_exp_manager = cfg.get('exp_manager', None) + exp_manager(trainer, spellchecking_exp_manager) + trainer.fit(model) + logging.info('Training finished!') + + +if __name__ == '__main__': + main() diff --git a/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py b/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py index e2ae48a37a0b..f5a53b1f331d 100644 --- a/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py +++ b/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py @@ -19,9 +19,14 @@ import re from argparse import ArgumentParser -from typing import List, Tuple -import numpy as np +from nemo.collections.nlp.data.text_normalization_as_tagging.utils import ( + check_monotonicity, + fill_alignment_matrix, + get_targets, + get_targets_from_back, +) + parser = ArgumentParser(description='Extract final alignments from GIZA++ alignments') parser.add_argument('--mode', type=str, required=True, help='tn or itn') @@ -34,211 +39,13 @@ args = parser.parse_args() -def fill_alignment_matrix( - fline2: str, fline3: str, gline2: str, gline3: str -) -> Tuple[np.ndarray, List[str], List[str]]: - """Parse Giza++ direct and reverse alignment results and represent them as an alignment matrix - - Args: - fline2: e.g. "_2 0 1 4_" - fline3: e.g. "NULL ({ }) twenty ({ 1 }) fourteen ({ 2 3 4 })" - gline2: e.g. "twenty fourteen" - gline3: e.g. "NULL ({ }) _2 ({ 1 }) 0 ({ }) 1 ({ }) 4_ ({ 2 })" - - Returns: - matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment - the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the - words were aligned only in one direction, 0 - no alignment. - srctokens: e.g. ["twenty", "fourteen"] - dsttokens: e.g. ["_2", "0", "1", "4_"] - - For example, the alignment matrix for the above example may look like: - [[3, 0, 0, 0] - [0, 2, 2, 3]] - """ - if fline2 is None or gline2 is None or fline3 is None or gline3 is None: - raise ValueError(f"empty params") - srctokens = gline2.split() - dsttokens = fline2.split() - pattern = r"([^ ]+) \(\{ ([^\(\{\}\)]*) \}\)" - src2dst = re.findall(pattern, fline3.replace("({ })", "({ })")) - dst2src = re.findall(pattern, gline3.replace("({ })", "({ })")) - if len(src2dst) != len(srctokens) + 1: - raise ValueError( - "length mismatch: len(src2dst)=" - + str(len(src2dst)) - + "; len(srctokens)" - + str(len(srctokens)) - + "\n" - + gline2 - + "\n" - + fline3 - ) - if len(dst2src) != len(dsttokens) + 1: - raise ValueError( - "length mismatch: len(dst2src)=" - + str(len(dst2src)) - + "; len(dsttokens)" - + str(len(dsttokens)) - + "\n" - + fline2 - + "\n" - + gline3 - ) - matrix = np.zeros((len(srctokens), len(dsttokens))) - for i in range(1, len(src2dst)): - token, to_str = src2dst[i] - if to_str == "": - continue - to = list(map(int, to_str.split())) - for t in to: - matrix[i - 1][t - 1] = 2 - - for i in range(1, len(dst2src)): - token, to_str = dst2src[i] - if to_str == "": - continue - to = list(map(int, to_str.split())) - for t in to: - matrix[t - 1][i - 1] += 1 - - return matrix, srctokens, dsttokens - - -def check_monotonicity(matrix: np.ndarray) -> bool: - """Check if alignment is monotonous - i.e. the relative order is preserved (no swaps). - - Args: - matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment - the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the - words were aligned only in one direction, 0 - no alignment. - """ - is_sorted = lambda k: np.all(k[:-1] <= k[1:]) - - a = np.argwhere(matrix == 3) - b = np.argwhere(matrix == 2) - c = np.vstack((a, b)) - d = c[c[:, 1].argsort()] # sort by second column (less important) - d = d[d[:, 0].argsort(kind="mergesort")] - return is_sorted(d[:, 1]) - - -def get_targets(matrix: np.ndarray, dsttokens: List[str]) -> List[str]: - """Join some of the destination tokens, so that their number becomes the same as the number of input words. - Unaligned tokens tend to join to the left aligned token. - - Args: - matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment - the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the - words were aligned only in one direction, 0 - no alignment. - dsttokens: e.g. ["_2", "0", "1", "4_"] - Returns: - targets: list of string tokens, with one-to-one correspondence to matrix.shape[0] - - Example: - If we get - matrix=[[3, 0, 0, 0] - [0, 2, 2, 3]] - dsttokens=["_2", "0", "1", "4_"] - it gives - targets = ["_201", "4_"] - Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions. - """ - targets = [] - last_covered_dst_id = -1 - for i in range(len(matrix)): - dstlist = [] - for j in range(last_covered_dst_id + 1, len(dsttokens)): - # matrix[i][j] == 3: safe alignment point - if matrix[i][j] == 3 or ( - j == last_covered_dst_id + 1 - and np.all(matrix[i, :] == 0) # if the whole line does not have safe points - and np.all(matrix[:, j] == 0) # and the whole column does not have safe points, match them - ): - if len(targets) == 0: # if this is first safe point, attach left unaligned columns to it, if any - for k in range(0, j): - if np.all(matrix[:, k] == 0): # if column k does not have safe points - dstlist.append(dsttokens[k]) - else: - break - dstlist.append(dsttokens[j]) - last_covered_dst_id = j - for k in range(j + 1, len(dsttokens)): - if np.all(matrix[:, k] == 0): # if column k does not have safe points - dstlist.append(dsttokens[k]) - last_covered_dst_id = k - else: - break - - if len(dstlist) > 0: - if args.mode == "tn": - targets.append("_".join(dstlist)) - else: - targets.append("".join(dstlist)) - else: - targets.append("") - return targets - - -def get_targets_from_back(matrix: np.ndarray, dsttokens: List[str]) -> List[str]: - """Join some of the destination tokens, so that their number becomes the same as the number of input words. - Unaligned tokens tend to join to the right aligned token. - - Args: - matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment - the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the - words were aligned only in one direction, 0 - no alignment. - dsttokens: e.g. ["_2", "0", "1", "4_"] - Returns: - targets: list of string tokens, with one-to-one correspondence to matrix.shape[0] - - Example: - If we get - matrix=[[3, 0, 0, 0] - [0, 2, 2, 3]] - dsttokens=["_2", "0", "1", "4_"] - it gives - targets = ["_2", "014_"] - Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions. - """ - - targets = [] - last_covered_dst_id = len(dsttokens) - for i in range(len(matrix) - 1, -1, -1): - dstlist = [] - for j in range(last_covered_dst_id - 1, -1, -1): - if matrix[i][j] == 3 or ( - j == last_covered_dst_id - 1 and np.all(matrix[i, :] == 0) and np.all(matrix[:, j] == 0) - ): - if len(targets) == 0: - for k in range(len(dsttokens) - 1, j, -1): - if np.all(matrix[:, k] == 0): - dstlist.append(dsttokens[k]) - else: - break - dstlist.append(dsttokens[j]) - last_covered_dst_id = j - for k in range(j - 1, -1, -1): - if np.all(matrix[:, k] == 0): - dstlist.append(dsttokens[k]) - last_covered_dst_id = k - else: - break - if len(dstlist) > 0: - if args.mode == "tn": - targets.append("_".join(list(reversed(dstlist)))) - else: - targets.append("".join(list(reversed(dstlist)))) - else: - targets.append("") - return list(reversed(targets)) - - def main() -> None: g = open(args.giza_dir + "/GIZA++." + args.giza_suffix, "r", encoding="utf-8") f = open(args.giza_dir + "/GIZA++reverse." + args.giza_suffix, "r", encoding="utf-8") + target_inner_delimiter = "" if args.mode == "tn": g, f = f, g + target_inner_delimiter = "_" out = open(args.giza_dir + "/" + args.out_filename, "w", encoding="utf-8") cache = {} good_count, not_mono_count, not_covered_count, exception_count = 0, 0, 0, 0 @@ -277,8 +84,8 @@ def main() -> None: else: matrix[matrix <= 2] = 0 # leave only 1-to-1 alignment points if check_monotonicity(matrix): - targets = get_targets(matrix, dsttokens) - targets_from_back = get_targets_from_back(matrix, dsttokens) + targets = get_targets(matrix, dsttokens, delimiter=target_inner_delimiter) + targets_from_back = get_targets_from_back(matrix, dsttokens, delimiter=target_inner_delimiter) if len(targets) != len(srctokens): raise ValueError( "targets length doesn't match srctokens length: len(targets)=" diff --git a/examples/slu/speech_intent_slot/eval_utils/evaluator.py b/examples/slu/speech_intent_slot/eval_utils/evaluator.py index 7475e9fc48ed..e56711edf215 100644 --- a/examples/slu/speech_intent_slot/eval_utils/evaluator.py +++ b/examples/slu/speech_intent_slot/eval_utils/evaluator.py @@ -42,7 +42,7 @@ def parse_semantics_str2dict(semantics_str: Union[List[str], str, Dict]) -> Tupl "entities": [], } invalid = True - except SyntaxError: # need this if the output is not a valid dict + except Exception: # need this if the output is not a valid dict _dict = { "scenario": "none", "action": "none", diff --git a/examples/tts/conf/fastpitch/fastpitch.yaml b/examples/tts/conf/fastpitch/fastpitch.yaml new file mode 100644 index 000000000000..1d552d058d76 --- /dev/null +++ b/examples/tts/conf/fastpitch/fastpitch.yaml @@ -0,0 +1,256 @@ +# This config contains the default values for training an English FastPitch model. +# If you want to train a model on other dataset, you can change config values according to your dataset. +# Most dataset-specific arguments are in the head of the config file, see below. + +name: FastPitch + +defaults: + - feature: ??? + +max_epochs: ??? +batch_size: 32 +weighted_sampling_steps_per_epoch: null + +n_speakers: ??? +speaker_path: null +feature_stats_path: null + +train_ds_meta: ??? +val_ds_meta: ??? +log_ds_meta: ??? + +phoneme_dict_path: ??? +heteronyms_path: ??? + +log_dir: ??? +vocoder_type: ??? +vocoder_name: null +vocoder_checkpoint_path: null + +model: + learn_alignment: true + bin_loss_warmup_epochs: 100 + + n_speakers: ${n_speakers} + n_mel_channels: ${feature.mel_feature.mel_dim} + min_token_duration: 1 + max_token_duration: 75 + symbols_embedding_dim: 384 + pitch_embedding_kernel_size: 3 + energy_embedding_kernel_size: 3 + speaker_emb_condition_prosody: true + speaker_emb_condition_aligner: true + use_log_energy: false + dur_loss_scale: 0.1 + pitch_loss_scale: 0.1 + energy_loss_scale: 0.1 + aligner_loss_scale: 0.1 + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + features: ${feature.mel_feature.mel_dim} + lowfreq: ${feature.mel_feature.lowfreq} + highfreq: ${feature.mel_feature.highfreq} + n_fft: ${feature.win_length} + n_window_size: ${feature.win_length} + window_size: false + n_window_stride: ${feature.hop_length} + window_stride: false + pad_to: 1 + pad_value: 0 + sample_rate: ${feature.sample_rate} + window: hann + normalize: null + preemph: null + dither: 0.0 + frame_splicing: 1 + log: true + log_zero_guard_type: add + log_zero_guard_value: 1.0 + mag_power: 1.0 + mel_norm: null + + text_tokenizer: + _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.IPATokenizer + punct: true + apostrophe: true + pad_with_space: true + g2p: + _target_: nemo.collections.tts.g2p.models.i18n_ipa.IpaG2p + phoneme_dict: ${phoneme_dict_path} + heteronyms: ${heteronyms_path} + phoneme_probability: 0.8 + ignore_ambiguous_words: false + use_chars: true + use_stresses: true + + pitch_processor: + _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization + field: pitch + stats_path: ${feature_stats_path} + + energy_processor: + _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization + field: energy + stats_path: ${feature_stats_path} + + train_ds: + dataset: + _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset + dataset_meta: ${train_ds_meta} + weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} + sample_rate: ${feature.sample_rate} + speaker_path: ${speaker_path} + align_prior_hop_length: ${feature.hop_length} + featurizers: ${feature.featurizers} + feature_processors: + pitch: ${model.pitch_processor} + energy: ${model.energy_processor} + min_duration: 0.1 + max_duration: 10.0 + + dataloader_params: + batch_size: ${batch_size} + num_workers: 4 + + validation_ds: + dataset: + _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset + dataset_meta: ${val_ds_meta} + sample_rate: ${feature.sample_rate} + speaker_path: ${speaker_path} + align_prior_hop_length: ${feature.hop_length} + featurizers: ${feature.featurizers} + feature_processors: + pitch: ${model.pitch_processor} + energy: ${model.energy_processor} + + dataloader_params: + batch_size: ${batch_size} + num_workers: 2 + + log_config: + log_dir: ${log_dir} + log_epochs: [10, 50] + epoch_frequency: 100 + log_tensorboard: false + log_wandb: false + + generators: + - _target_: nemo.collections.tts.parts.utils.callbacks.FastPitchArtifactGenerator + log_spectrogram: true + log_alignment: true + audio_params: + _target_: nemo.collections.tts.parts.utils.callbacks.LogAudioParams + log_audio_gta: true + vocoder_type: ${vocoder_type} + vocoder_name: ${vocoder_name} + vocoder_checkpoint_path: ${vocoder_checkpoint_path} + + dataset: + _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset + text_tokenizer: ${model.text_tokenizer} + sample_rate: ${feature.sample_rate} + speaker_path: ${speaker_path} + align_prior_hop_length: ${feature.hop_length} + featurizers: ${feature.featurizers} + + feature_processors: + pitch: ${model.pitch_processor} + energy: ${model.energy_processor} + + dataset_meta: ${log_ds_meta} + + dataloader_params: + batch_size: 8 + num_workers: 2 + + input_fft: + _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder + n_layer: 6 + n_head: 2 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + d_embed: ${model.symbols_embedding_dim} + + output_fft: + _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder + n_layer: 6 + n_head: 1 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + + alignment_module: + _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder + n_text_channels: ${model.symbols_embedding_dim} + + duration_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + pitch_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + energy_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + optim: + name: adamw + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 1e-6 + + sched: + name: NoamAnnealing + warmup_steps: 1000 + last_epoch: -1 + d_model: 1 # Disable scaling based on model dim + +trainer: + num_nodes: 1 + devices: 1 + accelerator: gpu + strategy: ddp + precision: 16 + max_epochs: ${max_epochs} + accumulate_grad_batches: 1 + gradient_clip_val: 10.0 + enable_checkpointing: false # Provided by exp_manager + logger: false # Provided by exp_manager + log_every_n_steps: 100 + check_val_every_n_epoch: 10 + benchmark: false + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + monitor: val_loss + resume_if_exists: false + resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/fastpitch_align_44100_adapter.yaml b/examples/tts/conf/fastpitch_align_44100_adapter.yaml index b2957b057d28..3c41cf3e55e5 100644 --- a/examples/tts/conf/fastpitch_align_44100_adapter.yaml +++ b/examples/tts/conf/fastpitch_align_44100_adapter.yaml @@ -32,6 +32,9 @@ phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10" heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722" model: + unfreeze_aligner: false + unfreeze_duration_predictor: false + unfreeze_pitch_predictor: false learn_alignment: true bin_loss_warmup_epochs: 100 diff --git a/examples/tts/conf/hifigan/hifigan_data.yaml b/examples/tts/conf/hifigan/hifigan_data.yaml new file mode 100644 index 000000000000..fde2f169aa8d --- /dev/null +++ b/examples/tts/conf/hifigan/hifigan_data.yaml @@ -0,0 +1,133 @@ +# This config contains the default values for training a HiFi-GAN model. +# If you want to train model on other dataset, you can change config values according to your dataset. +# Most dataset-specific arguments are in the head of the config file, see below. + +name: "HifiGan" + +defaults: + - feature: ??? + - sample: ??? + - model/generator: ??? + +max_epochs: ??? +batch_size: 16 +weighted_sampling_steps_per_epoch: null + +train_ds_meta: ??? +val_ds_meta: ??? +log_ds_meta: ??? + +log_dir: ??? + +model: + + max_epochs: ${max_epochs} + steps_per_epoch: ${weighted_sampling_steps_per_epoch} + l1_loss_factor: 60 + + preprocessor: + _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures + nfilt: ${feature.mel_feature.mel_dim} + lowfreq: ${feature.mel_feature.lowfreq} + highfreq: ${feature.mel_feature.highfreq} + n_fft: ${feature.win_length} + n_window_size: ${feature.win_length} + n_window_stride: ${feature.hop_length} + pad_to: 0 + pad_value: 0 + exact_pad: true + sample_rate: ${feature.sample_rate} + window: hann + normalize: null + preemph: null + dither: 0.0 + frame_splicing: 1 + log: true + log_zero_guard_type: add + log_zero_guard_value: 1.0 + mag_power: 1.0 + mel_norm: null + use_grads: false + + train_ds: + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} + sample_rate: ${feature.sample_rate} + n_samples: ${sample.train_n_samples} + min_duration: 0.4 + max_duration: null + dataset_meta: ${train_ds_meta} + + dataloader_params: + batch_size: ${batch_size} + num_workers: 4 + + validation_ds: + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + sample_rate: ${feature.sample_rate} + n_samples: ${sample.val_n_samples} + min_duration: 3.0 + max_duration: null + dataset_meta: ${val_ds_meta} + + dataloader_params: + batch_size: ${batch_size} + num_workers: 2 + + log_config: + log_dir: ${log_dir} + log_epochs: [10, 50] + epoch_frequency: 100 + log_tensorboard: false + log_wandb: false + + generators: + - _target_: nemo.collections.tts.parts.utils.callbacks.VocoderArtifactGenerator + + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + sample_rate: ${feature.sample_rate} + n_samples: null + min_duration: null + max_duration: null + dataset_meta: ${log_ds_meta} + + dataloader_params: + batch_size: 4 + num_workers: 2 + + optim: + _target_: torch.optim.AdamW + lr: 2e-4 + betas: [0.8, 0.99] + weight_decay: 1e-6 + sched: + name: ExponentialLR + gamma: 0.999 + +trainer: + num_nodes: 1 + devices: 1 + accelerator: gpu + strategy: ddp + precision: 16 + max_epochs: ${max_epochs} + accumulate_grad_batches: 1 + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + log_every_n_steps: 100 + check_val_every_n_epoch: 10 + benchmark: false + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + create_wandb_logger: false + checkpoint_callback_params: + monitor: val_loss + resume_if_exists: false + resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/hifigan/sample/sample_22050.yaml b/examples/tts/conf/hifigan/sample/sample_22050.yaml new file mode 100644 index 000000000000..18bc206e2566 --- /dev/null +++ b/examples/tts/conf/hifigan/sample/sample_22050.yaml @@ -0,0 +1,3 @@ +# Audio dataset sampling config for 22.05khz sampling rate +train_n_samples: 8192 +val_n_samples: 66048 diff --git a/examples/tts/conf/hifigan/sample/sample_44100.yaml b/examples/tts/conf/hifigan/sample/sample_44100.yaml new file mode 100644 index 000000000000..d8315623bbbe --- /dev/null +++ b/examples/tts/conf/hifigan/sample/sample_44100.yaml @@ -0,0 +1,3 @@ +# Audio dataset sampling config for 44.1khz sampling rate +train_n_samples: 16384 +val_n_samples: 131072 diff --git a/examples/tts/conf/text/normalizer_en.yaml b/examples/tts/conf/text/normalizer_en.yaml new file mode 100644 index 000000000000..aef142544a78 --- /dev/null +++ b/examples/tts/conf/text/normalizer_en.yaml @@ -0,0 +1,3 @@ +_target_: nemo_text_processing.text_normalization.normalize.Normalizer +lang: en +input_case: cased \ No newline at end of file diff --git a/examples/tts/fastpitch_finetune_adapters.py b/examples/tts/fastpitch_finetune_adapters.py index 396552b0f4fd..1361d63fb4cf 100644 --- a/examples/tts/fastpitch_finetune_adapters.py +++ b/examples/tts/fastpitch_finetune_adapters.py @@ -107,6 +107,18 @@ def main(cfg): if adapter_global_cfg is not None: add_global_adapter_cfg(model, adapter_global_cfg) + if cfg.model.get("unfreeze_aligner", False): + for name, param in model.fastpitch.aligner.named_parameters(): + param.requires_grad = True + + if cfg.model.get("unfreeze_duration_predictor", False): + for name, param in model.fastpitch.duration_predictor.named_parameters(): + param.requires_grad = True + + if cfg.model.get("unfreeze_pitch_predictor", False): + for name, param in model.fastpitch.pitch_predictor.named_parameters(): + param.requires_grad = True + # Add adapters model.add_adapter(name=adapter_name, cfg=cfg.model.adapter) assert model.is_adapter_available() diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 7585e4d7ea4f..f00f961b4c81 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -505,7 +505,7 @@ class _TarredAudioLabelDataset(IterableDataset): - `replicate`: Optional shard strategy, where each node gets all of the set of shards available in the tarred dataset, which are permanently pre-allocated and never changed at runtime. The benefit of replication is that it allows each node to sample data points from the entire - dataset independently of other nodes, and reduces dependence on value of `shuffle_n`. + dataset independently of other nodes, and reduces dependence on the value of `shuffle_n`. .. warning:: Replicated strategy allows every node to sample the entire set of available tarfiles, @@ -894,9 +894,8 @@ class AudioToMultiLabelDataset(Dataset): Defaults to False. cal_labels_occurrence (bool): Whether to calculate occurrence of labels Defaults to False. - delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None. - normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False. - normalize_audio_db_target (float): Target db to normalize audio signal, default to -20. + delimiter (Optional[str]): Delimiter to use when splitting the label string, default to None. + normalize_audio_db (Optional[float]): normalize audio signal to a target db, default to None. """ @property @@ -942,8 +941,7 @@ def __init__( is_regression_task: bool = False, cal_labels_occurrence: Optional[bool] = False, delimiter: Optional[str] = None, - normalize_audio_db: bool = False, - normalize_audio_db_target: float = -20.0, + normalize_audio_db: Optional[float] = None, ): super().__init__() if isinstance(manifest_filepath, str): @@ -951,7 +949,6 @@ def __init__( self.delimiter = delimiter self.normalize_audio_db = normalize_audio_db - self.normalize_audio_db_target = normalize_audio_db_target self.collection = collections.ASRSpeechLabel( manifests_files=manifest_filepath, @@ -1022,7 +1019,6 @@ def __getitem__(self, index): duration=sample.duration, trim=self.trim, normalize_db=self.normalize_audio_db, - normalize_db_target=self.normalize_audio_db_target, ) f, fl = features, torch.tensor(features.size(0)).long() @@ -1104,9 +1100,8 @@ class TarredAudioToMultiLabelDataset(IterableDataset): or test datasets. global_rank (int): Worker rank, used for partitioning shards. Defaults to 0. world_size (int): Total number of processes, used for partitioning shards. Defaults to 0. - delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None. - normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False. - normalize_audio_db_target (float): Target db to normalize audio signal, default to -20. + delimiter (Optional[str]): Delimiter to use when splitting the label string, default to None. + normalize_audio_db (Optional[float]): normalize audio signal to a target db, default to None. """ def __init__( @@ -1127,8 +1122,7 @@ def __init__( global_rank: int = 0, world_size: int = 0, delimiter: Optional[str] = None, - normalize_audio_db: bool = False, - normalize_audio_db_target: float = -20.0, + normalize_audio_db: Optional[float] = None, ): super().__init__() if isinstance(manifest_filepath, str): @@ -1138,7 +1132,6 @@ def __init__( self.is_regression_task = is_regression_task self.delimiter = delimiter self.normalize_audio_db = normalize_audio_db - self.normalize_audio_db_target = normalize_audio_db_target self.collection = collections.ASRSpeechLabel( manifests_files=manifest_filepath, @@ -1278,7 +1271,6 @@ def _build_sample(self, tup): duration=manifest_entry.duration, trim=self.trim, normalize_db=self.normalize_audio_db, - normalize_db_target=self.normalize_audio_db_target, ) audio_filestream.close() diff --git a/nemo/collections/asr/data/audio_to_label_dataset.py b/nemo/collections/asr/data/audio_to_label_dataset.py index a242308d4042..dcead6df94b8 100644 --- a/nemo/collections/asr/data/audio_to_label_dataset.py +++ b/nemo/collections/asr/data/audio_to_label_dataset.py @@ -240,8 +240,7 @@ def get_audio_multi_label_dataset(cfg: DictConfig) -> audio_to_label.AudioToMult is_regression_task=cfg.get("is_regression_task", False), cal_labels_occurrence=cfg.get("cal_labels_occurrence", False), delimiter=cfg.get("delimiter", None), - normalize_audio_db=cfg.get("normalize_audio_db", False), - normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20), + normalize_audio_db=cfg.get("normalize_audio_db", None), ) return dataset @@ -294,8 +293,7 @@ def get_tarred_audio_multi_label_dataset( shard_strategy=cfg.get('tarred_shard_strategy', 'scatter'), global_rank=global_rank, world_size=world_size, - normalize_audio_db=cfg.get("normalize_audio_db", False), - normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20), + normalize_audio_db=cfg.get("normalize_audio_db", None), ) if bucketing_weights: diff --git a/nemo/collections/asr/data/audio_to_text_dataset.py b/nemo/collections/asr/data/audio_to_text_dataset.py index 14e8dea19651..3234b617cc9c 100644 --- a/nemo/collections/asr/data/audio_to_text_dataset.py +++ b/nemo/collections/asr/data/audio_to_text_dataset.py @@ -19,14 +19,14 @@ from typing import Any, List, Optional, Union import torch -from omegaconf import DictConfig, open_dict +from omegaconf import DictConfig, OmegaConf, open_dict from omegaconf.listconfig import ListConfig from pytorch_lightning.callbacks import BasePredictionWriter from torch.utils.data import ChainDataset from nemo.collections.asr.data import audio_to_text, audio_to_text_dali from nemo.collections.asr.parts.preprocessing.perturb import process_augmentations -from nemo.collections.common.data.dataset import ConcatDataset +from nemo.collections.common.data.dataset import CodeSwitchedDataset, ConcatDataset from nemo.utils import logging @@ -398,6 +398,88 @@ def get_tarred_dataset( return get_chain_dataset(datasets=datasets, ds_config=config, rank=global_rank) +def get_code_switched_dataset( + config: dict, + shuffle_n: int, + global_rank: int, + world_size: int, + tokenizer: Optional['TokenizerSpec'] = None, + augmentor: Optional['AudioAugmentor'] = None, +) -> CodeSwitchedDataset: + + if 'manifest_filepath' not in config: + raise ValueError("`manifest_filepath` must be provided in the dataset config if `is_code_switched=True`") + if 'code_switched' not in config: + raise ValueError("`code_switched` param group must be in the dataset config if `is_code_switched=True`") + + manifest_filepaths = config['manifest_filepath'] + tarred_audio_filepaths = config.get('tarred_audio_filepaths', None) + + cs_config = OmegaConf.to_container(config['code_switched']) + + # needed to support validation Datasets that arrive here as + # [[dataset1,dataset2]] otherwise ModelPT would interfere + if len(manifest_filepaths) == 1 and not isinstance(manifest_filepaths[0], str): + manifest_filepaths = config['manifest_filepath'][0] + if tarred_audio_filepaths is None: + tarred_audio_filepaths = [None] * len(manifest_filepaths) + + if len(manifest_filepaths) != len(tarred_audio_filepaths): + raise ValueError( + f"manifest_filepaths (length={len(manifest_filepaths)}) and tarred_audio_filepaths (length={len(tarred_audio_filepaths)}) need to have the same number of items." + ) + + datasets = [] + for dataset_idx, (tarred_audio_filepath, manifest_filepath) in enumerate( + zip(tarred_audio_filepaths, manifest_filepaths) + ): + conf = copy.deepcopy(config) + conf['manifest_filepath'] = manifest_filepath + with open_dict(conf): + conf['tarred_audio_filepaths'] = tarred_audio_filepath + if tarred_audio_filepath is None or len(tarred_audio_filepath) == 0: + if tokenizer is None: + dataset = get_char_dataset(config=conf, augmentor=None) + else: + dataset = get_bpe_dataset(config=conf, tokenizer=tokenizer, augmentor=None) + else: + dataset = get_tarred_dataset( + config=conf, + tokenizer=tokenizer, + shuffle_n=shuffle_n, + global_rank=global_rank, + world_size=world_size, + augmentor=None, + ) + datasets.append(dataset) + + config = OmegaConf.to_container(config) + + dataset = CodeSwitchedDataset( + datasets, + shuffle=cs_config.get('shuffle', True), + min_duration=cs_config.get('min_duration', 4), + max_duration=cs_config.get('max_duration', 20), + min_monolingual=cs_config.get('min_monolingual', 0.3), + lang_probs=cs_config.get('probs', None), + db_norm=cs_config.get('db_norm', -25.0), + pause_start=cs_config.get('pause_start', 0), + pause_join=cs_config.get('pause_join', 0), + pause_end=cs_config.get('pause_end', 0), + sampling_scales=cs_config.get('sampling_scales', None), + seed=cs_config.get('seed', None), + global_rank=global_rank, + world_size=world_size, + pure_random=cs_config.get('pure_random', False), + force_monochannel=cs_config.get('force_monochannel', True), + infinity_mode=cs_config.get('infinity_mode', False), + sample_rate=config['sample_rate'], + augmentor=augmentor, + ) + + return dataset + + def get_dali_char_dataset( config: dict, shuffle: bool, @@ -546,8 +628,35 @@ def get_audio_to_text_char_dataset_from_config( ) return dataset + # Instantiate a code-switched dataset if config is present + if config.get('is_code_switched', False): + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}") + return None + if not ('code_switched' in config and config['code_switched'] is not None): + logging.warning( + f"Code switched dataset requires `*_ds.code_switched.*` dict but it was not provided. Config: {config}" + ) + return None + if ( + ('probs' in config['code_switched']) + and (config['code_switched']['probs'] is not None) + and (not isclose(sum(config['code_switched']['probs']), 1, abs_tol=1e-6)) + ): + logging.warning(f"`.code_switched.probs` need to sum to 1. Config: {config['code_switched']}") + return None + + shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 + dataset = get_code_switched_dataset( + config=config, + shuffle_n=shuffle_n, + global_rank=global_rank, + world_size=world_size, + tokenizer=None, + augmentor=augmentor, + ) # Instantiate tarred dataset loader or normal dataset loader - if config.get('is_tarred', False): + elif config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None ): @@ -645,8 +754,35 @@ def get_audio_to_text_bpe_dataset_from_config( ) return dataset + # Instantiate a code-switched dataset if config is present + if config.get('is_code_switched', False): + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}") + return None + if not ('code_switched' in config and config['code_switched'] is not None): + logging.warning( + f"Code switched dataset requires `*_ds.code_switched.*` dict but it was not provided. Config: {config}" + ) + return None + if ( + ('probs' in config['code_switched']) + and (config['code_switched']['probs'] is not None) + and (not isclose(sum(config['code_switched']['probs']), 1, abs_tol=1e-6)) + ): + logging.warning(f"`.code_switched.probs` need to sum to 1. Config: {config['code_switched']}") + return None + + shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 + dataset = get_code_switched_dataset( + config=config, + shuffle_n=shuffle_n, + global_rank=global_rank, + world_size=world_size, + tokenizer=tokenizer, + augmentor=augmentor, + ) # Instantiate tarred dataset loader or normal dataset loader - if config.get('is_tarred', False): + elif config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None ): @@ -713,6 +849,7 @@ def write_on_batch_end( item = {} sample = self.dataset.get_manifest_sample(sample_id) item["audio_filepath"] = sample.audio_file + item["offset"] = sample.offset item["duration"] = sample.duration item["text"] = sample.text_raw item["pred_text"] = transcribed_text diff --git a/nemo/collections/asr/data/feature_to_label.py b/nemo/collections/asr/data/feature_to_label.py index 673f50374581..058d0157fcbd 100644 --- a/nemo/collections/asr/data/feature_to_label.py +++ b/nemo/collections/asr/data/feature_to_label.py @@ -262,14 +262,20 @@ class FeatureToLabelDataset(Dataset): Dataset that loads tensors via a json file containing paths to feature files and their labels. Each new line is a different sample. Example below: and their target labels. JSON files should be of the following format: - {"feature_filepath": "/path/to/audio_feature.pt", "label": "1"} \ + {"feature_filepath": "/path/to/audio_feature.pt", "label": "1"} ... {"feature_filepath": "/path/to/audio_feature.pt", "label": "0"} Args: - manifest_filepath (str): Dataset parameter. Path to JSON containing data. - labels (Optional[list]): Dataset parameter. List of unique labels collected from all samples. + manifest_filepath (str): Path to JSON containing data. + labels (Optional[list]): List of unique labels collected from all samples. augmentor (Optional): feature augmentation - + window_length_in_sec (float): Window length in seconds. + shift_length_in_sec (float): Shift length in seconds. + is_regression_task (bool): if True, the labels are treated as for a regression task. + cal_labels_occurrence (bool): if True, the labels occurrence will be calculated. + zero_spec_db_val (float): Value to replace non-speech signals in log-melspectrogram. + min_duration (float): Minimum duration of the audio file in seconds. + max_duration (float): Maximum duration of the audio file in seconds. """ ZERO_LEVEL_SPEC_DB_VAL = -16.635 # Log-Melspectrogram value for zero signal @@ -296,22 +302,53 @@ def __init__( augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None, window_length_in_sec: float = 0.63, shift_length_in_sec: float = 0.01, + is_regression_task: bool = False, + cal_labels_occurrence: Optional[bool] = False, + zero_spec_db_val: float = -16.635, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, ): super().__init__() self.window_length_in_sec = window_length_in_sec self.shift_length_in_sec = shift_length_in_sec - self.collection = collections.ASRFeatureLabel(manifests_files=manifest_filepath.split(','),) + self.zero_spec_db_val = zero_spec_db_val + + if isinstance(manifest_filepath, str): + manifest_filepath = manifest_filepath.split(',') + + self.collection = collections.ASRFeatureLabel( + manifests_files=manifest_filepath, + is_regression_task=is_regression_task, + cal_labels_occurrence=cal_labels_occurrence, + min_duration=min_duration, + max_duration=max_duration, + ) self.feature_loader = ExternalFeatureLoader(augmentor=augmentor) self.labels = labels if labels else self.collection.uniq_labels - self.label2id, self.id2label = {}, {} - for label_id, label in enumerate(self.labels): - self.label2id[label] = label_id - self.id2label[label_id] = label + self.is_regression_task = is_regression_task - for idx in range(len(self.labels[:5])): - logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx])) + if not is_regression_task: + self.labels = labels if labels else self.collection.uniq_labels + self.num_classes = len(self.labels) if self.labels is not None else 1 + self.label2id, self.id2label = {}, {} + self.id2occurrence, self.labels_occurrence = {}, [] + + for label_id, label in enumerate(self.labels): + self.label2id[label] = label_id + self.id2label[label_id] = label + if cal_labels_occurrence: + self.id2occurrence[label_id] = self.collection.labels_occurrence[label] + + if cal_labels_occurrence: + self.labels_occurrence = [self.id2occurrence[k] for k in sorted(self.id2occurrence)] + + for idx in range(len(self.labels[:5])): + logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx])) + else: + self.labels = [] + self.num_classes = 1 def __len__(self): return len(self.collection) @@ -328,9 +365,133 @@ def __getitem__(self, index): return f, fl, t, tl def _collate_fn(self, batch): - return _audio_feature_collate_fn(batch, self.ZERO_LEVEL_SPEC_DB_VAL, 0) + return _audio_feature_collate_fn(batch, self.zero_spec_db_val, 0) def _vad_segment_collate_fn(self, batch): return _vad_feature_segment_collate_fn( batch, self.window_length_in_sec, self.shift_length_in_sec, self.FRAME_UNIT_TIME_SECS ) + + +class FeatureToMultiLabelDataset(Dataset): + """ + Dataset that loads tensors via a json file containing paths to feature files and their labels. + Each new line is a different sample. Example below: + and their target labels. JSON files should be of the following format: + {"feature_filepath": "/path/to/audio_feature.pt", "label": "1 1 0 0 1"} + ... + {"feature_filepath": "/path/to/audio_feature.pt", "label": "0 1 0 0"} + Args: + manifest_filepath (str): Path to JSON containing data. + labels (Optional[list]): List of unique labels collected from all samples. + augmentor (Optional): feature augmentation + delimiter (str): delimiter to split the labels. + is_regression_task (bool): if True, the labels are treated as for a regression task. + cal_labels_occurrence (bool): if True, the labels occurrence will be calculated. + zero_spec_db_val (float): Value to replace non-speech signals in log-melspectrogram. + min_duration (float): Minimum duration of the audio file in seconds. + max_duration (float): Maximum duration of the audio file in seconds. + """ + + ZERO_LEVEL_SPEC_DB_VAL = -16.635 # Log-Melspectrogram value for zero signal + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + output_types = { + 'audio_feat': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), + 'feat_length': NeuralType(tuple('B'), LengthsType()), + 'labels': NeuralType(('B', 'T'), LabelsType()), + 'labels_length': NeuralType(tuple('B'), LengthsType()), + } + + return output_types + + def __init__( + self, + *, + manifest_filepath: str, + labels: List[str] = None, + augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None, + delimiter: Optional[str] = None, + is_regression_task: bool = False, + cal_labels_occurrence: Optional[bool] = False, + zero_spec_db_val: float = -16.635, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + ): + super().__init__() + self.delimiter = delimiter + self.zero_spec_db_val = zero_spec_db_val + + if isinstance(manifest_filepath, str): + manifest_filepath = manifest_filepath.split(',') + + self.collection = collections.ASRFeatureLabel( + manifests_files=manifest_filepath, + is_regression_task=is_regression_task, + cal_labels_occurrence=cal_labels_occurrence, + delimiter=delimiter, + min_duration=min_duration, + max_duration=max_duration, + ) + + self.is_regression_task = is_regression_task + self.feature_loader = ExternalFeatureLoader(augmentor=augmentor) + self.labels = labels if labels else self.collection.uniq_labels + + self.label2id, self.id2label = {}, {} + if not is_regression_task: + self.labels = labels if labels else self._get_label_set() + self.num_classes = len(self.labels) if self.labels is not None else 1 + self.label2id, self.id2label = {}, {} + for label_id, label in enumerate(self.labels): + self.label2id[label] = label_id + self.id2label[label_id] = label + if cal_labels_occurrence: + self.id2occurrence[label_id] = self.collection.labels_occurrence[label] + self.labels_occurrence.append(self.id2occurrence[label_id]) + + for idx in range(len(self.labels[:5])): + logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx])) + else: + self.labels = [] + self.num_classes = 1 + + def _get_label_set(self): + labels = [] + for sample in self.collection: + label_str = sample.label + if label_str: + label_str_list = label_str.split(self.delimiter) if self.delimiter else label_str.split() + labels.extend(label_str_list) + return sorted(set(labels)) + + def _label_str_to_tensor(self, label_str: str): + labels = label_str.split(self.delimiter) if self.delimiter else label_str.split() + + if self.is_regression_task: + labels = [float(s) for s in labels] + labels = torch.tensor(labels).float() + else: + labels = [self.label2id[s] for s in labels] + labels = torch.tensor(labels).long() + return labels + + def __len__(self): + return len(self.collection) + + def __getitem__(self, index): + sample = self.collection[index] + + features = self.feature_loader.process(sample.feature_file) + f, fl = features, torch.tensor(features.shape[1]).long() + + t = self._label_str_to_tensor(sample.label) + tl = torch.tensor(t.size(0)).long() + + return f, fl, t, tl + + def _collate_fn(self, batch): + return _audio_feature_collate_fn(batch, self.zero_spec_db_val, 0) diff --git a/nemo/collections/asr/data/feature_to_label_dataset.py b/nemo/collections/asr/data/feature_to_label_dataset.py index dabe06aa62bb..08803f43ce8d 100644 --- a/nemo/collections/asr/data/feature_to_label_dataset.py +++ b/nemo/collections/asr/data/feature_to_label_dataset.py @@ -34,13 +34,35 @@ def get_feature_seq_speakerlabel_dataset( def get_feature_label_dataset( - config: dict, augmentor: Optional['AudioAugmentor'] = None + config: dict, augmentor: Optional['FeatureAugmentor'] = None ) -> feature_to_label.FeatureToLabelDataset: dataset = feature_to_label.FeatureToLabelDataset( manifest_filepath=config['manifest_filepath'], labels=config['labels'], augmentor=augmentor, window_length_in_sec=config.get("window_length_in_sec", 0.63), - shift_length_in_sec=config.get("shift_length_in_sec", 0.01), + shift_length_in_sec=config.get("shift_length_in_sec", 0.08), + is_regression_task=config.get("is_regression_task", False), + cal_labels_occurrence=config.get("cal_labels_occurrence", False), + zero_spec_db_val=config.get("zero_spec_db_val", -16.635), + max_duration=config.get('max_duration', None), + min_duration=config.get('min_duration', None), + ) + return dataset + + +def get_feature_multi_label_dataset( + config: dict, augmentor: Optional['FeatureAugmentor'] = None +) -> feature_to_label.FeatureToMultiLabelDataset: + dataset = feature_to_label.FeatureToMultiLabelDataset( + manifest_filepath=config['manifest_filepath'], + labels=config['labels'], + augmentor=augmentor, + delimiter=config.get('delimiter', None), + is_regression_task=config.get("is_regression_task", False), + cal_labels_occurrence=config.get("cal_labels_occurrence", False), + zero_spec_db_val=config.get("zero_spec_db_val", -16.635), + max_duration=config.get('max_duration', None), + min_duration=config.get('min_duration', None), ) return dataset diff --git a/nemo/collections/asr/data/feature_to_text.py b/nemo/collections/asr/data/feature_to_text.py index eaec7b3afba5..a7e295051ae8 100644 --- a/nemo/collections/asr/data/feature_to_text.py +++ b/nemo/collections/asr/data/feature_to_text.py @@ -86,30 +86,32 @@ class _FeatureTextDataset(Dataset): {"feature_filepath": "/path/to/audio_feature.pt", "text": "the transcription", "offset": 301.75, "duration": 0.82, "utt": "utterance_id", "ctm_utt": "en_4156", "side": "A"} Args: - manifest_filepath: Path to manifest json as described above. Can be comma-separated paths. + manifest_filepath (str): Path to manifest json as described above. Can be comma-separated paths. parser: Str for a language specific preprocessor or a callable. - normalize: whether and where to normalize feature, must be one of [None, "post_norm", "pre_norm"] + normalize (bool): whether and where to normalize feature, must be one of [None, "post_norm", "pre_norm"] normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch` - use_rttm: whether to use RTTM files if there is any, default to False + use_rttm (bool): whether to use RTTM files if there is any, default to False + rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask' + feat_min_len (int): minimum length of feature when rttm_mode=deop, default to 4. feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram frame_unit_time_secs (float): time in seconds for each frame sample_rate (int): Sample rate to resample loaded audio to int_values (bool): If true, load samples as 32-bit integers. Defauts to False. - augmentor (nemo.collections.asr.parts.perturb.AudioAugmentor): An AudioAugmentor object used to augment loaded - audio - max_duration: If audio exceeds this length, do not include in dataset - min_duration: If audio is less than this length, do not include in dataset - max_utts: Limit number of utterances - trim: whether or not to trim silence. Defaults to False - bos_id: Id of beginning of sequence symbol to append if not None - eos_id: Id of end of sequence symbol to append if not None - pad_id: Id of pad symbol. Defaults to 0 + augmentor (nemo.collections.asr.parts.perturb.AudioAugmentor): An AudioAugmentor object used to augment loaded audio + max_duration (float): If audio exceeds this length, do not include in dataset + min_duration (float): If audio is less than this length, do not include in dataset + max_utts (int): Limit number of utterances + trim (bool): whether or not to trim silence. Defaults to False + bos_id (int): Id of beginning of sequence symbol to append if not None + eos_id (int): Id of end of sequence symbol to append if not None + pad_id (int): Id of pad symbol. Defaults to 0 return_sample_id (bool): whether to return the sample_id as a part of each sample channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. """ ZERO_LEVEL_SPEC_DB_VAL = -16.635 # Log-Melspectrogram value for zero signal NORM_MODES = ["pre_norm", "post_norm"] + RTTM_MODES = ["mask", "drop"] @property def output_types(self) -> Optional[Dict[str, NeuralType]]: @@ -130,6 +132,8 @@ def __init__( normalize: Optional[str] = "post_norm", normalize_type: Union[str, dict] = "per_feature", use_rttm: bool = False, + rttm_mode: str = "mask", + feat_min_len: int = 4, feat_mask_val: Optional[float] = None, frame_unit_time_secs: float = 0.01, sample_rate: Optional[int] = 16000, @@ -151,6 +155,11 @@ def __init__( self.normalize = normalize self.normalize_type = normalize_type self.use_rttm = use_rttm + self.rttm_mode = rttm_mode + if self.use_rttm and self.rttm_mode not in self.RTTM_MODES: + raise ValueError(f"`rttm_mode` must be one of {self.RTTM_MODES}, got `{rttm_mode}` instead") + + self.feat_min_len = feat_min_len if feat_mask_val is not None: self.feat_mask_val = feat_mask_val elif normalize == "pre_norm": @@ -197,17 +206,18 @@ def __getitem__(self, index): # Feature normalization if self.normalize is None: if self.use_rttm and sample.rttm_file: - f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val) + f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val) elif self.normalize == "post_norm": # (Optional) Masking based on RTTM file if self.use_rttm and sample.rttm_file: - f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val) + f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val) + f = self.normalize_feature(f) else: # pre-norm f = self.normalize_feature(f) # (Optional) Masking based on RTTM file if self.use_rttm and sample.rttm_file: - f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val) + f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val) if self.return_sample_id: output = f, fl, torch.tensor(t).long(), torch.tensor(tl).long(), index @@ -216,17 +226,32 @@ def __getitem__(self, index): return output - def mask_features_from_rttm(self, features, offset, rttm_file, mask_val): + def process_features_with_rttm(self, features, offset, rttm_file, mask_val): segments = load_speech_segments_from_rttm(rttm_file) - sid = 0 + new_features = features.clone() + sid, fid = 0, 0 for i in range(features.size(1)): t = offset + i * self.frame_unit_time_secs while sid < len(segments) - 1 and segments[sid][1] < t: sid += 1 if segments[sid][1] == 0 or t < segments[sid][0] or t > segments[sid][1]: - features[:, i] = mask_val - - return features + # not in speech segment + if self.rttm_mode == "drop": + # drop the frame + continue + else: + # mask the frame with specified value + new_features[:, i] = mask_val + fid += 1 + else: + # in speech segment + new_features[:, fid] = features[:, i] + fid += 1 + + if fid < self.feat_min_len and self.rttm_mode == "drop": + new_features[:, : self.feat_min_len] = mask_val + return new_features[:, : self.feat_min_len] + return new_features[:, :fid] def __len__(self): return len(self.manifest_processor.collection) @@ -259,12 +284,14 @@ class FeatureToCharDataset(_FeatureTextDataset): "utterance_id", "ctm_utt": "en_4156", "side": "A"} Args: - manifest_filepath: Path to manifest json as described above. Can + manifest_filepath (str): Path to manifest json as described above. Can be comma-separated paths. - labels: String containing all the possible characters to map to - normalize: how to normalize feature, must be one of [None, "post_norm", "pre_norm"] + labels (str): String containing all the possible characters to map to + normalize (str): how to normalize feature, must be one of [None, "post_norm", "pre_norm"] normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch` - use_rttm: whether to use RTTM files if there is any, default to False + use_rttm (bool): whether to use RTTM files if there is any, default to False + rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask' + feat_min_len (int): minimum length of feature, default to 4 feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram frame_unit_time_secs: time in seconds for each frame sample_rate (int): Sample rate to resample loaded audio to @@ -290,6 +317,8 @@ def __init__( normalize: Optional[str] = "post_norm", normalize_type: Union[str, dict] = "per_feature", use_rttm: bool = False, + rttm_mode: str = "mask", + feat_min_len: int = 4, feat_mask_val: Optional[float] = None, frame_unit_time_secs: float = 0.01, sample_rate: Optional[int] = 16000, @@ -319,6 +348,8 @@ def __init__( normalize=normalize, normalize_type=normalize_type, use_rttm=use_rttm, + rttm_mode=rttm_mode, + feat_min_len=feat_min_len, feat_mask_val=feat_mask_val, frame_unit_time_secs=frame_unit_time_secs, sample_rate=sample_rate, @@ -352,14 +383,16 @@ class FeatureToBPEDataset(_FeatureTextDataset): the manifest. Args: - manifest_filepath: Path to manifest json as described above. Can + manifest_filepath (str): Path to manifest json as described above. Can be comma-separated paths. tokenizer: A subclass of the Tokenizer wrapper found in the common collection, nemo.collections.common.tokenizers.TokenizerSpec. ASR Models support a subset of all available tokenizers. - normalize: how to normalize feature, must be one of [None, "post_norm", "pre_norm"] + normalize (str): how to normalize feature, must be one of [None, "post_norm", "pre_norm"] normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch` - use_rttm: whether to use RTTM files if there is any, default to False + use_rttm (bool): whether to use RTTM files if there is any, default to False + rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask' + feat_min_len (int): minimum length of feature, default to 4 feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram frame_unit_time_secs: time in seconds for each frame sample_rate (int): Sample rate to resample loaded audio to @@ -384,6 +417,8 @@ def __init__( normalize: Optional[str] = "post_norm", normalize_type: Union[str, dict] = "per_feature", use_rttm: bool = False, + rttm_mode: str = "mask", + feat_min_len: int = 4, feat_mask_val: Optional[float] = None, frame_unit_time_secs: float = 0.01, sample_rate: Optional[int] = 16000, @@ -435,6 +470,8 @@ def __call__(self, *args): normalize=normalize, normalize_type=normalize_type, use_rttm=use_rttm, + rttm_mode=rttm_mode, + feat_min_len=feat_min_len, feat_mask_val=feat_mask_val, frame_unit_time_secs=frame_unit_time_secs, sample_rate=sample_rate, diff --git a/nemo/collections/asr/data/feature_to_text_dataset.py b/nemo/collections/asr/data/feature_to_text_dataset.py index 7efd3be3cd24..6bc03bc0b33d 100644 --- a/nemo/collections/asr/data/feature_to_text_dataset.py +++ b/nemo/collections/asr/data/feature_to_text_dataset.py @@ -38,6 +38,8 @@ def get_char_dataset(config: dict, augmentor: Optional['FeatureAugmentor'] = Non normalize=config.get('normalize', 'post_norm'), normalize_type=config.get('normalize_type', 'per_feature'), use_rttm=config.get('use_rttm', False), + rttm_mode=config.get('rttm_mode', 'mask'), + feat_min_len=config.get('feat_min_len', 4), feat_mask_val=config.get('feat_mask_val', None), frame_unit_time_secs=config.get('frame_unit_time_secs', 0.01), sample_rate=config.get('sample_rate', 16000), @@ -75,6 +77,8 @@ def get_bpe_dataset( normalize=config.get('normalize', 'post_norm'), normalize_type=config.get('normalize_type', 'per_feature'), use_rttm=config.get('use_rttm', False), + rttm_mode=config.get('rttm_mode', 'mask'), + feat_min_len=config.get('feat_min_len', 4), feat_mask_val=config.get('feat_mask_val', None), frame_unit_time_secs=config.get('frame_unit_time_secs', 0.01), sample_rate=config.get('sample_rate', 16000), diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py index ee89cb9e0f8e..894be6319c99 100644 --- a/nemo/collections/asr/losses/rnnt.py +++ b/nemo/collections/asr/losses/rnnt.py @@ -27,18 +27,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import inspect import operator from dataclasses import dataclass -from typing import List, Optional +from typing import Any, Callable, Dict, List, Optional, Set import torch from omegaconf import DictConfig, OmegaConf -from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, RNNTLossPytorch +from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, RNNTLossPytorch, TDTLossPytorch from nemo.core.classes import Loss, typecheck from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, LossType, NeuralType +from nemo.core.utils import numba_utils +from nemo.core.utils.k2_utils import K2_INSTALLATION_MESSAGE from nemo.core.utils.numba_utils import NUMBA_INSTALLATION_MESSAGE -from nemo.utils import logging, model_utils +from nemo.utils import logging, logging_mode, model_utils try: import warprnnt_pytorch as warprnnt @@ -48,12 +51,19 @@ WARP_RNNT_AVAILABLE = False try: - from nemo.collections.asr.parts.numba.rnnt_loss import MultiblankRNNTLossNumba, RNNTLossNumba + from nemo.collections.asr.parts.numba.rnnt_loss import MultiblankRNNTLossNumba, RNNTLossNumba, TDTLossNumba NUMBA_RNNT_AVAILABLE = True except (ImportError, ModuleNotFoundError): NUMBA_RNNT_AVAILABLE = False +try: + from nemo.collections.asr.parts.k2.graph_transducer import GraphRnntLoss + from nemo.collections.asr.parts.k2.w_transducer import GraphWTransducerLoss + + K2_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + K2_AVAILABLE = False WARP_RNNT_INSTALLATION_MESSAGE = ( "Could not import `warprnnt_pytorch`.\n" @@ -71,6 +81,7 @@ class RNNTLossConfig: is_available: bool = False installation_msg: str = "" min_version: Optional[str] = None + force_float32: bool = True # default True for now for all losses except graph-based # Resolved list of available RNNT losses @@ -80,6 +91,7 @@ class RNNTLossConfig: lib_name="warprnnt_pytorch", is_available=WARP_RNNT_AVAILABLE, installation_msg=WARP_RNNT_INSTALLATION_MESSAGE, + force_float32=True, ), "warprnnt_numba": RNNTLossConfig( loss_name="warprnnt_numba", @@ -87,6 +99,7 @@ class RNNTLossConfig: min_version='0.53.0', is_available=NUMBA_RNNT_AVAILABLE, installation_msg=NUMBA_INSTALLATION_MESSAGE, + force_float32=False, # This is only temporarily false, will be dynamically updated during resolution ), "pytorch": RNNTLossConfig( loss_name="pytorch", @@ -94,6 +107,7 @@ class RNNTLossConfig: min_version='0.0', is_available=True, installation_msg="Pure Pytorch implementation of RNN-T loss. Slow and for debugging purposes only.", + force_float32=True, ), "multiblank_rnnt": RNNTLossConfig( loss_name="multiblank_rnnt", @@ -101,6 +115,7 @@ class RNNTLossConfig: min_version='0.53.0', is_available=NUMBA_RNNT_AVAILABLE, installation_msg=NUMBA_INSTALLATION_MESSAGE, + force_float32=True, ), "multiblank_rnnt_pytorch": RNNTLossConfig( loss_name="pytorch", @@ -108,6 +123,35 @@ class RNNTLossConfig: min_version='0.0', is_available=True, installation_msg="Pure Pytorch implementation of Multiblank RNN-T loss. Slow and for debugging purposes only.", + force_float32=True, + ), + "graph_w_transducer": RNNTLossConfig( + loss_name="graph_w_transducer", + lib_name="k2", + is_available=K2_AVAILABLE, + installation_msg=K2_INSTALLATION_MESSAGE, + force_float32=False, + ), + "graph_rnnt": RNNTLossConfig( + loss_name="graph_rnnt", + lib_name="k2", + is_available=K2_AVAILABLE, + installation_msg=K2_INSTALLATION_MESSAGE, + force_float32=False, + ), + "tdt": RNNTLossConfig( + loss_name="tdt", + lib_name="numba", + min_version='0.53.0', + is_available=NUMBA_RNNT_AVAILABLE, + installation_msg=NUMBA_INSTALLATION_MESSAGE, + ), + "tdt_pytorch": RNNTLossConfig( + loss_name="tdt_pytorch", + lib_name="torch", + min_version='0.0', + is_available=True, + installation_msg="Pure Pytorch implementation of TDT loss. Slow and for debugging purposes only.", ), } @@ -123,6 +167,38 @@ def _warn_unused_additional_kwargs(loss_name, kwargs): ) +def _clean_kwargs( + loss_name: str, kwargs: Optional[Dict[str, Any]], init_method: Callable, ignore_params: Optional[Set[str]] = None +) -> Dict[str, Any]: + """ + Cleans kwargs for the given loss function. Warn if there are unused kwargs. + + Args: + loss_name: name of the loss function + kwargs: kwargs to clean + init_method: LossClass.__init__ method + ignore_params: set of argument names for init_method to ignore + + Returns: + only used kwargs for the given `init_method` + """ + if not kwargs: + return {} + init_params = set(inspect.signature(init_method).parameters.keys()) - {"self"} + if ignore_params is not None: + init_params -= ignore_params + unused_kwargs = dict() + used_kwargs = dict() + for key, value in kwargs.items(): + if key not in init_params: + unused_kwargs[key] = value + else: + used_kwargs[key] = value + if len(unused_kwargs) > 0: + _warn_unused_additional_kwargs(loss_name, unused_kwargs) + return used_kwargs + + def resolve_rnnt_default_loss_name() -> str: return RNNT_LOSS_RESOLVER['default'].loss_name @@ -182,6 +258,9 @@ def resolve_rnnt_loss(loss_name: str, blank_idx: int, loss_kwargs: dict = None) _warn_unused_additional_kwargs(loss_name, loss_kwargs) elif loss_name == 'warprnnt_numba': + # Update loss config's forced float32 flag if set to None + loss_config.force_float32 = not numba_utils.is_numba_cuda_fp16_supported() + fastemit_lambda = loss_kwargs.pop('fastemit_lambda', 0.0) clamp = loss_kwargs.pop('clamp', -1.0) loss_func = RNNTLossNumba(blank=blank_idx, reduction='none', fastemit_lambda=fastemit_lambda, clamp=clamp) @@ -214,6 +293,35 @@ def resolve_rnnt_loss(loss_name: str, blank_idx: int, loss_kwargs: dict = None) ) _warn_unused_additional_kwargs(loss_name, loss_kwargs) + elif loss_name == 'tdt': + fastemit_lambda = loss_kwargs.pop('fastemit_lambda', 0.0) + clamp = loss_kwargs.pop('clamp', -1.0) + durations = loss_kwargs.pop('durations', None) + sigma = loss_kwargs.pop('sigma', 0.0) + omega = loss_kwargs.pop('omega', 0.0) + loss_func = TDTLossNumba( + blank=blank_idx, + durations=durations, + reduction='none', + fastemit_lambda=fastemit_lambda, + clamp=clamp, + sigma=sigma, + omega=omega, + ) + _warn_unused_additional_kwargs(loss_name, loss_kwargs) + + elif loss_name == 'tdt_pytorch': + durations = loss_kwargs.pop('durations', None) + sigma = loss_kwargs.pop('sigma', 0.0) + loss_func = TDTLossPytorch(blank=blank_idx, durations=durations, reduction='none', sigma=sigma) + _warn_unused_additional_kwargs(loss_name, loss_kwargs) + + elif loss_name == "graph_rnnt": + loss_kwargs = _clean_kwargs(loss_name, loss_kwargs, GraphRnntLoss.__init__, ignore_params={"blank"}) + loss_func = GraphRnntLoss(blank=blank_idx, **loss_kwargs) + elif loss_name == "graph_w_transducer": + loss_kwargs = _clean_kwargs(loss_name, loss_kwargs, GraphWTransducerLoss.__init__, ignore_params={"blank"}) + loss_func = GraphWTransducerLoss(blank=blank_idx, **loss_kwargs) else: raise ValueError( f"Invalid value of `loss_name`: {loss_name}. Allowed loss names are :" f"{loss_function_names}" @@ -279,7 +387,13 @@ def __init__(self, num_classes, reduction: str = 'mean_batch', loss_name: str = Args: num_classes: Number of target classes for the joint network to predict. - (Excluding the RNN-T blank token). + In all cases (conventional RNNT, multi-blank RNNT, and TDT model), this equals the token-id + for the standard "blank" symbol. In particular, say V is the number of non-blank tokens in + the vocabulary, then in the case of, + standard RNNT: num_classes = V + multiblank RNNT: num_classes = V + number-big-blanks (since we store big-blanks before + standard blank, and the standard blank is the last symbol in the vocab) + TDT: num_classes = V. Note, V here does not include any of the "duration outputs". reduction: Type of reduction to perform on loss. Possible values are `mean_batch`, 'mean_volume`, `mean`, `sum` or None. @@ -302,6 +416,8 @@ def __init__(self, num_classes, reduction: str = 'mean_batch', loss_name: str = self._blank = num_classes self.reduction = reduction self._loss = resolve_rnnt_loss(loss_name, blank_idx=self._blank, loss_kwargs=loss_kwargs) + self._force_float32 = RNNT_LOSS_RESOLVER[loss_name].force_float32 + self._fp16_compat_checked = False def reduce(self, losses, target_lengths): @@ -331,8 +447,22 @@ def forward(self, log_probs, targets, input_lengths, target_lengths): max_targets_len = target_lengths.max() # Force cast joint to float32 - # TODO: Remove once Numba supports FP16 - if log_probs.dtype != torch.float32: + if not self._force_float32 and numba_utils.is_numba_cuda_fp16_supported(): + # Execute the kernel in fp16 + pass + elif self._force_float32 and log_probs.dtype != torch.float32: + # Log just once if fp16 tensor was passed and fp16 Numba CUDA loss could not be used. + if log_probs.dtype == torch.float16 and not self._fp16_compat_checked: + _, reason = numba_utils.is_numba_cuda_fp16_supported(return_reason=True) + logging.warning( + f"Provided RNNT Joint tensor is of dtype {log_probs.dtype}, but RNNT loss could not be calculated " + f"in fp16 due to following reason stated below. Loss will be calculated in fp32. \n\n" + f"{reason}", + mode=logging_mode.ONCE, + ) + self._fp16_compat_checked = True + + # Upcast the activation tensor and compute loss and grads in fp32 logits_orig = log_probs log_probs = log_probs.float() del logits_orig # save memory *before* computing the loss diff --git a/nemo/collections/asr/losses/rnnt_pytorch.py b/nemo/collections/asr/losses/rnnt_pytorch.py index ab0b5cf4f630..c8eee90a2eb5 100644 --- a/nemo/collections/asr/losses/rnnt_pytorch.py +++ b/nemo/collections/asr/losses/rnnt_pytorch.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + import torch from nemo.core.classes import Loss @@ -45,7 +47,12 @@ def __init__(self, blank, reduction): self.reduction = reduction def forward(self, acts, labels, act_lens, label_lens): + # CPU patch for FP16 + if not acts.is_cuda and acts.dtype == torch.float16: + acts = acts.float() + acts = torch.log_softmax(acts, -1) + forward_logprob = self.compute_forward_prob(acts, labels, act_lens, label_lens) losses = -forward_logprob if self.reduction == 'mean_batch': @@ -112,6 +119,136 @@ def compute_forward_prob(self, acts, labels, act_lens, label_lens): return log_prob +class TDTLossPytorch(Loss): + """ + Pure Python implementation of TDT loss (https://arxiv.org/pdf/2304.06795.pdf) + """ + + @property + def input_types(self): + """Input types definitions for CTCLoss. + """ + return { + "acts": NeuralType(('B', 'T', 'T', 'D'), LogprobsType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "act_lens": NeuralType(tuple('B'), LengthsType()), + "label_lens": NeuralType(tuple('B'), LengthsType()), + } + + @property + def output_types(self): + """Output types definitions for CTCLoss. + loss: + NeuralType(None) + """ + return {"loss": NeuralType(elements_type=LossType())} + + def __init__(self, blank: int, durations: List[int] = [], reduction: str = 'sum', sigma: float = 0.0): + super().__init__() + self.blank = blank + self.durations = durations + self.n_durations = len(durations) + self.reduction = reduction + self.sigma = sigma + + def forward(self, acts, labels, act_lens, label_lens): + label_acts = acts[:, :, :, : -self.n_durations] + duration_acts = acts[:, :, :, -self.n_durations :] + + # the - self.sigma here is for logit-undernormalization. Check the paper for details. + label_acts = torch.log_softmax(label_acts, -1) - self.sigma + + duration_acts = torch.log_softmax(duration_acts, -1) + + forward_logprob, _ = self.compute_forward_prob(label_acts, duration_acts, labels, act_lens, label_lens) + losses = -forward_logprob + if self.reduction == 'mean_batch': + losses = losses.mean() # global batch size average + elif self.reduction == 'mean': + losses = torch.div(losses, label_lens).mean() + elif self.reduction == 'sum': + losses = losses.sum() + elif self.reduction == 'mean_volume': + losses = losses.sum() / label_lens.sum() # same as above but longer samples weigh more + + return losses + + def logsumexp(self, a, b): + ret = torch.logsumexp(torch.stack([a, b]), dim=0) + return ret + + def compute_forward_prob(self, acts, duration_acts, labels, act_lens, label_lens): + """This function implements Equation 7 in the TDT paper https://arxiv.org/pdf/2304.06795.pdf, + Simply put, for each alpha(t, u), it sums over the contribution from all incoming blank arcs and non-blank arcs. + """ + B, T, U, _ = acts.shape + + log_alpha = torch.zeros(B, T, U) + log_alpha = log_alpha.cuda() + for b in range(B): + for t in range(T): + for u in range(U): + if u == 0: + if t == 0: + # both t and u are 0, this is the base case for alphas. + log_alpha[b, t, u] = 0.0 + else: + # u = 0 and t != 0: only considers blank emissions. + log_alpha[b, t, u] = -1000.0 + for n, l in enumerate(self.durations): + if ( + t - l >= 0 and l > 0 + ): # checking conditions for blank emission, l has to be at least 1 + tmp = ( + log_alpha[b, t - l, u] + + acts[b, t - l, u, self.blank] + + duration_acts[b, t - l, u, n] + ) + log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u]) + + else: + # u != 0 here, need to consider both blanks and non-blanks. + log_alpha[b, t, u] = -1000.0 + for n, l in enumerate(self.durations): + if t - l >= 0: + if l > 0: # for blank emissions. Need to ensure index is not out-of-bound. + tmp = ( + log_alpha[b, t - l, u] + + acts[b, t - l, u, self.blank] + + duration_acts[b, t - l, u, n] + ) + log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u]) + + # non-blank emissions. + tmp = ( + log_alpha[b, t - l, u - 1] + + acts[b, t - l, u - 1, labels[b, u - 1]] + + duration_acts[b, t - l, u - 1, n] + ) + log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u]) + + log_probs = [] + for b in range(B): + tt = torch.Tensor([-1000.0]).cuda()[0] + + # need to loop over all possible ways that blank with different durations contributes to the final loss. + for n, l in enumerate(self.durations): + if act_lens[b] - l >= 0 and l > 0: + bb = ( + log_alpha[b, act_lens[b] - l, label_lens[b]] + + acts[b, act_lens[b] - l, label_lens[b], self.blank] + + duration_acts[b, act_lens[b] - l, label_lens[b], n] + ) + + tt = self.logsumexp(bb, 1.0 * tt) + + log_probs.append(tt) + + log_prob = torch.stack(log_probs) + + return log_prob, log_alpha + + class MultiblankRNNTLossPytorch(Loss): """ Pure Python implementation of multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf) @@ -136,7 +273,7 @@ def output_types(self): """ return {"loss": NeuralType(elements_type=LossType())} - def __init__(self, blank, big_blank_durations, reduction, sigma): + def __init__(self, blank, big_blank_durations, reduction: str = "sum", sigma: float = 0.0): super().__init__() self.blank = blank self.big_blank_durations = big_blank_durations @@ -145,7 +282,7 @@ def __init__(self, blank, big_blank_durations, reduction, sigma): def forward(self, acts, labels, act_lens, label_lens): acts = torch.log_softmax(acts, -1) - self.sigma - forward_logprob = self.compute_forward_prob(acts, labels, act_lens, label_lens) + forward_logprob, _ = self.compute_forward_prob(acts, labels, act_lens, label_lens) losses = -forward_logprob if self.reduction == 'mean_batch': @@ -234,4 +371,4 @@ def compute_forward_prob(self, acts, labels, act_lens, label_lens): log_probs.append(to_append) log_prob = torch.stack(log_probs) - return log_prob + return log_prob, log_alpha diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index 1ccc2d0ac6fc..7e5636191a1d 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -204,6 +204,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): self.blank_id = blank_id self.num_extra_outputs = joint.num_extra_outputs self.big_blank_durations = self.cfg.get("big_blank_durations", None) + self.durations = self.cfg.get("durations", None) self.compute_hypothesis_token_set = self.cfg.get("compute_hypothesis_token_set", False) self.compute_langs = decoding_cfg.get('compute_langs', False) self.preserve_alignments = self.cfg.get('preserve_alignments', None) @@ -211,9 +212,21 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): self.compute_timestamps = self.cfg.get('compute_timestamps', None) self.word_seperator = self.cfg.get('word_seperator', ' ') - if self.big_blank_durations is not None: + if self.durations is not None: # this means it's a TDT model. + if blank_id == 0: + raise ValueError("blank_id must equal len(non_blank_vocabs) for TDT models") + if self.big_blank_durations is not None: + raise ValueError("duration and big_blank_durations can't both be not None") + if self.cfg.strategy not in ['greedy', 'greedy_batch']: + raise ValueError("currently only greedy and greedy_batch inference is supported for TDT models") + + if self.big_blank_durations is not None: # this means it's a multi-blank model. if blank_id == 0: raise ValueError("blank_id must equal len(vocabs) for multi-blank RNN-T models") + if self.cfg.strategy not in ['greedy', 'greedy_batch']: + raise ValueError( + "currently only greedy and greedy_batch inference is supported for multi-blank models" + ) possible_strategies = ['greedy', 'greedy_batch', 'beam', 'tsd', 'alsd', 'maes'] if self.cfg.strategy not in possible_strategies: @@ -254,17 +267,33 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): if self.cfg.strategy == 'greedy': if self.big_blank_durations is None: - self.decoding = greedy_decode.GreedyRNNTInfer( - decoder_model=decoder, - joint_model=joint, - blank_index=self.blank_id, - max_symbols_per_step=( - self.cfg.greedy.get('max_symbols', None) or self.cfg.greedy.get('max_symbols_per_step', None) - ), - preserve_alignments=self.preserve_alignments, - preserve_frame_confidence=self.preserve_frame_confidence, - confidence_method_cfg=self.confidence_method_cfg, - ) + if self.durations is None: + self.decoding = greedy_decode.GreedyRNNTInfer( + decoder_model=decoder, + joint_model=joint, + blank_index=self.blank_id, + max_symbols_per_step=( + self.cfg.greedy.get('max_symbols', None) + or self.cfg.greedy.get('max_symbols_per_step', None) + ), + preserve_alignments=self.preserve_alignments, + preserve_frame_confidence=self.preserve_frame_confidence, + confidence_method_cfg=self.confidence_method_cfg, + ) + else: + self.decoding = greedy_decode.GreedyTDTInfer( + decoder_model=decoder, + joint_model=joint, + blank_index=self.blank_id, + durations=self.durations, + max_symbols_per_step=( + self.cfg.greedy.get('max_symbols', None) + or self.cfg.greedy.get('max_symbols_per_step', None) + ), + preserve_alignments=self.preserve_alignments, + preserve_frame_confidence=self.preserve_frame_confidence, + confidence_method_cfg=self.confidence_method_cfg, + ) else: self.decoding = greedy_decode.GreedyMultiblankRNNTInfer( decoder_model=decoder, @@ -281,17 +310,34 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): elif self.cfg.strategy == 'greedy_batch': if self.big_blank_durations is None: - self.decoding = greedy_decode.GreedyBatchedRNNTInfer( - decoder_model=decoder, - joint_model=joint, - blank_index=self.blank_id, - max_symbols_per_step=( - self.cfg.greedy.get('max_symbols', None) or self.cfg.greedy.get('max_symbols_per_step', None) - ), - preserve_alignments=self.preserve_alignments, - preserve_frame_confidence=self.preserve_frame_confidence, - confidence_method_cfg=self.confidence_method_cfg, - ) + if self.durations is None: + self.decoding = greedy_decode.GreedyBatchedRNNTInfer( + decoder_model=decoder, + joint_model=joint, + blank_index=self.blank_id, + max_symbols_per_step=( + self.cfg.greedy.get('max_symbols', None) + or self.cfg.greedy.get('max_symbols_per_step', None) + ), + preserve_alignments=self.preserve_alignments, + preserve_frame_confidence=self.preserve_frame_confidence, + confidence_method_cfg=self.confidence_method_cfg, + ) + else: + self.decoding = greedy_decode.GreedyBatchedTDTInfer( + decoder_model=decoder, + joint_model=joint, + blank_index=self.blank_id, + durations=self.durations, + max_symbols_per_step=( + self.cfg.greedy.get('max_symbols', None) + or self.cfg.greedy.get('max_symbols_per_step', None) + ), + preserve_alignments=self.preserve_alignments, + preserve_frame_confidence=self.preserve_frame_confidence, + confidence_method_cfg=self.confidence_method_cfg, + ) + else: self.decoding = greedy_decode.GreedyBatchedMultiblankRNNTInfer( decoder_model=decoder, @@ -481,12 +527,12 @@ def decode_hypothesis(self, hypotheses_list: List[Hypothesis]) -> List[Union[Hyp # RNN-T sample level is already preprocessed by implicit RNNT decoding # Simply remove any blank and possibly big blank tokens - if self.blank_id != 0: - num_extra_outputs = 0 - if self.big_blank_durations is not None: - num_extra_outputs += len(self.big_blank_durations) + if self.big_blank_durations is not None: # multi-blank RNNT + num_extra_outputs = len(self.big_blank_durations) prediction = [p for p in prediction if p < self.blank_id - num_extra_outputs] - else: + elif self.durations is not None: # TDT model. + prediction = [p for p in prediction if p < self.blank_id] + else: # standard RNN-T prediction = [p for p in prediction if p != self.blank_id] # De-tokenize the integer tokens; if not computing timestamps @@ -1058,9 +1104,12 @@ class RNNTDecoding(AbstractRNNTDecoding): def __init__( self, decoding_cfg, decoder, joint, vocabulary, ): - blank_id = ( - len(vocabulary) + joint.num_extra_outputs - ) # we need to ensure blank is the last token in the vocab. This is needed for multi-blank RNN-T models. + # we need to ensure blank is the last token in the vocab for the case of RNNT and Multi-blank RNNT. + blank_id = len(vocabulary) + joint.num_extra_outputs + + if hasattr(decoding_cfg, 'model_type') and decoding_cfg.model_type == 'tdt': + blank_id = len(vocabulary) + self.labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))]) super(RNNTDecoding, self).__init__( @@ -1175,7 +1224,7 @@ def validation_epoch_end(self, outputs): def __init__( self, decoding: RNNTDecoding, batch_dim_index=0, use_cer=False, log_prediction=True, dist_sync_on_step=False ): - super(RNNTWER, self).__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False) + super(RNNTWER, self).__init__(dist_sync_on_step=dist_sync_on_step) self.decoding = decoding self.batch_dim_index = batch_dim_index self.use_cer = use_cer @@ -1239,7 +1288,9 @@ def compute(self): @dataclass class RNNTDecodingConfig: + model_type: str = "rnnt" # one of "rnnt", "multiblank" or "tdt" strategy: str = "greedy_batch" + compute_hypothesis_token_set: bool = False # preserve decoding alignments diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py index 99c71daebaa9..d2e2c3cc5923 100644 --- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py +++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py @@ -196,11 +196,16 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): """ def __init__(self, decoding_cfg, decoder, joint, tokenizer: TokenizerSpec): - blank_id = tokenizer.tokenizer.vocab_size + blank_id = tokenizer.tokenizer.vocab_size # RNNT or TDT models. + + # multi-blank RNNTs + if hasattr(decoding_cfg, 'model_type') and decoding_cfg.model_type == 'multiblank': + blank_id = tokenizer.tokenizer.vocab_size + joint.num_extra_outputs + self.tokenizer = tokenizer super(RNNTBPEDecoding, self).__init__( - decoding_cfg=decoding_cfg, decoder=decoder, joint=joint, blank_id=blank_id + joint.num_extra_outputs + decoding_cfg=decoding_cfg, decoder=decoder, joint=joint, blank_id=blank_id ) if isinstance(self.decoding, rnnt_beam_decoding.BeamRNNTInfer): @@ -354,7 +359,7 @@ def __init__( log_prediction: bool = True, dist_sync_on_step=False, ): - super(RNNTBPEWER, self).__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False) + super(RNNTBPEWER, self).__init__(dist_sync_on_step=dist_sync_on_step) self.decoding = decoding self.batch_dim_index = batch_dim_index self.use_cer = use_cer diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index 7f7f853d307d..4d90810cc3df 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -1125,7 +1125,7 @@ def __init__( fold_consecutive=True, dist_sync_on_step=False, ): - super().__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False) + super().__init__(dist_sync_on_step=dist_sync_on_step) self.decoding = decoding self.use_cer = use_cer diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py index 762acf172a16..8a92e4745a1b 100644 --- a/nemo/collections/asr/metrics/wer_bpe.py +++ b/nemo/collections/asr/metrics/wer_bpe.py @@ -247,7 +247,7 @@ def __init__( fold_consecutive=True, dist_sync_on_step=False, ): - super().__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False) + super().__init__(dist_sync_on_step=dist_sync_on_step) self.decoding = decoding self.tokenizer = self.decoding.tokenizer self.blank_id = self.decoding.tokenizer.tokenizer.vocab_size diff --git a/nemo/collections/asr/models/asr_model.py b/nemo/collections/asr/models/asr_model.py index c0f4c1cd0a70..7e03d587139f 100644 --- a/nemo/collections/asr/models/asr_model.py +++ b/nemo/collections/asr/models/asr_model.py @@ -161,7 +161,7 @@ def output_module(self): @property def output_names(self): otypes = self.output_module.output_types - if hasattr(self.input_module, 'export_cache_support') and self.input_module.export_cache_support: + if getattr(self.input_module, 'export_cache_support', False): in_types = self.input_module.output_types otypes = {n: t for (n, t) in list(otypes.items())[:1]} for (n, t) in list(in_types.items())[1:]: @@ -174,7 +174,6 @@ def forward_for_export( """ This forward is used when we need to export the model to ONNX format. Inputs cache_last_channel and cache_last_time are needed to be passed for exporting streaming models. - When they are passed, it just passes the inputs through the encoder part and currently the ONNX conversion does not fully work for this case. Args: input: Tensor that represents a batch of raw audio signals, of shape [B, T]. T here represents timesteps. @@ -187,49 +186,26 @@ def forward_for_export( Returns: the output of the model """ - if hasattr(self.input_module, 'forward_for_export'): - if cache_last_channel is None and cache_last_time is None: - encoder_output = self.input_module.forward_for_export(audio_signal=input, length=length) - else: - encoder_output = self.input_module.forward_for_export( - audio_signal=input, - length=length, - cache_last_channel=cache_last_channel, - cache_last_time=cache_last_time, - cache_last_channel_len=cache_last_channel_len, - ) + enc_fun = getattr(self.input_module, 'forward_for_export', self.input_module.forward) + if cache_last_channel is None: + encoder_output = enc_fun(audio_signal=input, length=length) + if isinstance(encoder_output, tuple): + encoder_output = encoder_output[0] else: - if cache_last_channel is None and cache_last_time is None: - encoder_output = self.input_module(audio_signal=input, length=length) - else: - encoder_output = self.input_module( - audio_signal=input, - length=length, - cache_last_channel=cache_last_channel, - cache_last_time=cache_last_time, - cache_last_channel_len=cache_last_channel_len, - ) - if isinstance(encoder_output, tuple): - decoder_input = encoder_output[0] - else: - decoder_input = encoder_output - if hasattr(self.output_module, 'forward_for_export'): - if cache_last_channel is None and cache_last_time is None: - ret = self.output_module.forward_for_export(encoder_output=decoder_input) - else: - ret = self.output_module.forward_for_export(encoder_output=decoder_input) - else: - if cache_last_channel is None and cache_last_time is None: - ret = self.output_module(encoder_output=decoder_input) - else: - ret = self.output_module(encoder_output=decoder_input) - if cache_last_channel is None and cache_last_time is None: - pass - else: - if isinstance(ret, tuple): - ret = (ret[0], encoder_output[1], encoder_output[2], encoder_output[3], encoder_output[4]) - else: - ret = (ret, encoder_output[1], encoder_output[2], encoder_output[3], encoder_output[4]) + encoder_output, length, cache_last_channel, cache_last_time, cache_last_channel_len = enc_fun( + audio_signal=input, + length=length, + cache_last_channel=cache_last_channel, + cache_last_time=cache_last_time, + cache_last_channel_len=cache_last_channel_len, + ) + + dec_fun = getattr(self.output_module, 'forward_for_export', self.output_module.forward) + ret = dec_fun(encoder_output=encoder_output) + if isinstance(ret, tuple): + ret = ret[0] + if cache_last_channel is not None: + ret = (ret, length, cache_last_channel, cache_last_time, cache_last_channel_len) return cast_all(ret, from_dtype=torch.float16, to_dtype=torch.float32) @property @@ -239,3 +215,11 @@ def disabled_deployment_input_names(self): @property def disabled_deployment_output_names(self): return self.encoder.disabled_deployment_output_names + + def set_export_config(self, args): + if 'cache_support' in args: + enable = bool(args['cache_support']) + self.encoder.export_cache_support = enable + logging.info(f"Caching support enabled: {enable}") + self.encoder.setup_streaming_params() + super().set_export_config(args) diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py index a7b55e49d754..432674225f5a 100644 --- a/nemo/collections/asr/models/classification_models.py +++ b/nemo/collections/asr/models/classification_models.py @@ -174,7 +174,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). @@ -845,6 +849,7 @@ def output_types(self) -> Optional[Dict[str, NeuralType]]: def __init__(self, cfg: DictConfig, trainer: Trainer = None): self.num_classes = len(cfg.labels) self.eval_loop_cnt = 0 + self.ratio_threshold = cfg.get('ratio_threshold', 0.2) super().__init__(cfg=cfg, trainer=trainer) @classmethod @@ -1063,8 +1068,9 @@ def reshape_labels(self, logits, labels, logits_len, labels_len): Reshape labels to match logits shape. For example, each label is expected to cover a 40ms frame, while each frme prediction from the model covers 20ms. If labels are shorter than logits, labels are repeated, otherwise labels are folded and argmax is applied to obtain the label of each frame. When lengths of labels and logits are not factors of each other, labels are truncated or padded with zeros. - The threshold 0.2 is used to determine whether to pad or truncate labels, where the value 0.2 is not important as in real cases the ratio - is very close to either ceil(ratio) or floor(ratio). We use 0.2 here for easier unit-testing. + The ratio_threshold=0.2 is used to determine whether to pad or truncate labels, where the value 0.2 is not important as in real cases the ratio + is very close to either ceil(ratio) or floor(ratio). We use 0.2 here for easier unit-testing. This implementation does not allow frame length + and label length that are not multiples of each other. Args: logits: logits tensor with shape [B, T1, C] labels: labels tensor with shape [B, T2] @@ -1080,7 +1086,7 @@ def reshape_labels(self, logits, labels, logits_len, labels_len): if logits_max_len < labels_max_len: ratio = labels_max_len // logits_max_len res = labels_max_len % logits_max_len - if ceil(ratio) - ratio < 0.2: # e.g., ratio is 1.99 + if ceil(ratio) - ratio < self.ratio_threshold: # e.g., ratio is 1.99 # pad labels with zeros until labels_max_len is a multiple of logits_max_len labels = labels.cpu().tolist() if len(labels) % ceil(ratio) != 0: @@ -1101,7 +1107,7 @@ def reshape_labels(self, logits, labels, logits_len, labels_len): elif logits_max_len > labels_max_len: ratio = logits_max_len / labels_max_len res = logits_max_len % labels_max_len - if ceil(ratio) - ratio < 0.2: # e.g., ratio is 1.99 + if ceil(ratio) - ratio < self.ratio_threshold: # e.g., ratio is 1.99 # repeat labels for ceil(ratio) times, and DROP additional labels based on logits_max_len labels = labels.repeat_interleave(ceil(ratio), dim=1).long() labels = labels[:, :logits_max_len] diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py index 0a5441a1cd52..dd52d9a7010a 100644 --- a/nemo/collections/asr/models/confidence_ensemble.py +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -86,9 +86,10 @@ def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch. filtered_logprobs = [] for alignment in hypothesis.alignments: for align_elem in alignment: - if exclude_blank and align_elem[1].item() != align_elem[0].shape[-1] - 1: + if not exclude_blank: + filtered_logprobs.append(align_elem[0]) + elif align_elem[1].item() != align_elem[0].shape[-1] - 1: filtered_logprobs.append(align_elem[0]) - filtered_logprobs.append(align_elem[0]) if not filtered_logprobs: # for the edge-case of all blanks filtered_logprobs.append(align_elem[0]) filtered_logprobs = torch.stack(filtered_logprobs) @@ -101,8 +102,15 @@ def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch. if exclude_blank: # filtering blanks labels = logprobs.argmax(dim=-1) filtered_logprobs = logprobs[labels != logprobs.shape[1] - 1] + if filtered_logprobs.shape[0] == 0: # for the edge-case of all blanks + filtered_logprobs = logprobs[:1] else: filtered_logprobs = logprobs + + # need to make sure logprobs are always normalized, so checking if they sum up to 1 + if not torch.allclose(filtered_logprobs[0].exp().sum(), torch.tensor(1.0)): + filtered_logprobs = torch.log_softmax(filtered_logprobs, dim=1) + return filtered_logprobs @@ -136,6 +144,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) conf_func = get_confidence_measure_bank()[conf_type] conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item() + return conf_value @@ -213,10 +222,6 @@ def update_decoding_parameters(self, decoding_cfg: DictConfig): with open_dict(decoding_cfg): decoding_cfg.temperature = self.cfg.temperature decoding_cfg.preserve_alignments = True - if 'confidence_cfg' in decoding_cfg: - decoding_cfg.confidence_cfg.preserve_frame_confidence = True - else: - decoding_cfg.confidence_cfg = ConfidenceConfig(preserve_frame_confidence=True) def setup_training_data(self, train_data_config: Union[DictConfig, Dict]): """Pass-through to the ensemble models. diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py index a74c7f3de5c2..aa26f27c29ab 100644 --- a/nemo/collections/asr/models/ctc_bpe_models.py +++ b/nemo/collections/asr/models/ctc_bpe_models.py @@ -106,7 +106,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): return dataset shuffle = config['shuffle'] - if config.get('is_tarred', False): + if isinstance(dataset, torch.utils.data.IterableDataset): shuffle = False if hasattr(dataset, 'collate_fn'): @@ -606,4 +606,18 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_ctc_xlarge", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_xlarge/versions/1.20.0/files/stt_en_fastconformer_ctc_xlarge.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_ctc_xxlarge", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xxlarge", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_xxlarge/versions/1.20.1/files/stt_en_fastconformer_ctc_xxlarge.nemo", + ) + results.append(model) + return results diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py index 1446e1ce871f..d995544513de 100644 --- a/nemo/collections/asr/models/ctc_models.py +++ b/nemo/collections/asr/models/ctc_models.py @@ -365,7 +365,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): return dataset shuffle = config['shuffle'] - if config.get('is_tarred', False): + if isinstance(dataset, torch.utils.data.IterableDataset): shuffle = False if hasattr(dataset, 'collate_fn'): @@ -413,7 +413,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). diff --git a/nemo/collections/asr/models/hybrid_asr_tts_models.py b/nemo/collections/asr/models/hybrid_asr_tts_models.py index 8486f956c3b7..8494a093b29d 100644 --- a/nemo/collections/asr/models/hybrid_asr_tts_models.py +++ b/nemo/collections/asr/models/hybrid_asr_tts_models.py @@ -311,8 +311,10 @@ def from_pretrained_models( ) ) else: + cfg = copy.deepcopy(cfg) # copy to avoid modifying original config cfg.tts_model_path = f"{tts_model_path}" cfg.asr_model_path = f"{asr_model_path}" + cfg.enhancer_model_path = f"{enhancer_model_path}" if enhancer_model_path is not None else None return ASRWithTTSModel(cfg, trainer=trainer) def __setattr__(self, name, value): diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py index b88669a1fbc0..7f1a22a9b2b8 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py @@ -144,7 +144,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): return dataset shuffle = config['shuffle'] - if config.get('is_tarred', False): + if isinstance(dataset, torch.utils.data.IterableDataset): shuffle = False if hasattr(dataset, 'collate_fn'): @@ -523,4 +523,32 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_80ms", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_80ms", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_80ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_80ms.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_480ms", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_480ms", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_480ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_480ms.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_1040ms", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_1040ms", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_1040ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_1040ms.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_multi", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_multi", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_multi/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_multi.nemo", + ) + results.append(model) + return results diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py index 5ca6124ecfd7..11c616b1257f 100644 --- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py +++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py @@ -645,6 +645,20 @@ def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): self.finalize_interctc_metrics(metrics, outputs, prefix="test_") return metrics + # EncDecRNNTModel is exported in 2 parts + def list_export_subnets(self): + if self.cur_decoder == 'rnnt': + return ['encoder', 'decoder_joint'] + else: + return ['self'] + + @property + def output_module(self): + if self.cur_decoder == 'rnnt': + return self.decoder + else: + return self.ctc_decoder + @classmethod def list_available_models(cls) -> Optional[PretrainedModelInfo]: """ diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index aefa8743826b..1a284aca609d 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -92,6 +92,13 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) result.append(model) + model = PretrainedModelInfo( + pretrained_model_name="titanet_small", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:titanet_small", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/titanet_small/versions/1.19.0/files/titanet-s.nemo", + ) + result.append(model) + return result def __init__(self, cfg: DictConfig, trainer: Trainer = None): @@ -268,7 +275,11 @@ def setup_training_data(self, train_data_layer_config: Optional[Union[DictConfig # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_layer_config and train_data_layer_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py index 6fed8be9d410..c72d18a8023b 100644 --- a/nemo/collections/asr/models/rnnt_bpe_models.py +++ b/nemo/collections/asr/models/rnnt_bpe_models.py @@ -253,6 +253,20 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_transducer_xlarge", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_xlarge/versions/1.20.1/files/stt_en_fastconformer_transducer_xlarge.nemo", + ) + results.append(model) + + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_transducer_xxlarge", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_xxlarge/versions/1.20.1/files/stt_en_fastconformer_transducer_xxlarge.nemo", + ) + results.append(model) + return results def __init__(self, cfg: DictConfig, trainer: Trainer = None): @@ -480,7 +494,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): return dataset shuffle = config['shuffle'] - if config.get('is_tarred', False): + if isinstance(dataset, torch.utils.data.IterableDataset): shuffle = False if hasattr(dataset, 'collate_fn'): diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index 84e08635834d..0c1da97c5012 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -28,7 +28,7 @@ from nemo.collections.asr.data.audio_to_text_dali import AudioToCharDALIDataset, DALIOutputs from nemo.collections.asr.losses.rnnt import RNNTLoss, resolve_rnnt_default_loss_name from nemo.collections.asr.metrics.rnnt_wer import RNNTWER, RNNTDecoding, RNNTDecodingConfig -from nemo.collections.asr.models.asr_model import ASRModel +from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel from nemo.collections.asr.modules.rnnt import RNNTDecoderJoint from nemo.collections.asr.parts.mixins import ASRModuleMixin from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType @@ -39,7 +39,7 @@ from nemo.utils import logging -class EncDecRNNTModel(ASRModel, ASRModuleMixin, Exportable): +class EncDecRNNTModel(ASRModel, ASRModuleMixin, ExportableEncDecModel): """Base class for encoder decoder RNNT-based models.""" def __init__(self, cfg: DictConfig, trainer: Trainer = None): @@ -71,8 +71,13 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): # Setup RNNT Loss loss_name, loss_kwargs = self.extract_rnnt_loss_cfg(self.cfg.get("loss", None)) + num_classes = self.joint.num_classes_with_blank - 1 # for standard RNNT and multi-blank + + if loss_name == 'tdt': + num_classes = num_classes - self.joint.num_extra_outputs + self.loss = RNNTLoss( - num_classes=self.joint.num_classes_with_blank - 1, + num_classes=num_classes, loss_name=loss_name, loss_kwargs=loss_kwargs, reduction=self.cfg.get("rnnt_reduction", "mean_batch"), @@ -470,7 +475,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]): return dataset shuffle = config['shuffle'] - if config.get('is_tarred', False): + if isinstance(dataset, torch.utils.data.IterableDataset): shuffle = False if hasattr(dataset, 'collate_fn'): @@ -518,7 +523,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). @@ -951,6 +960,14 @@ def list_export_subnets(self): def decoder_joint(self): return RNNTDecoderJoint(self.decoder, self.joint) + def set_export_config(self, args): + if 'decoder_type' in args: + if hasattr(self, 'change_decoding_strategy'): + self.change_decoding_strategy(decoder_type=args['decoder_type']) + else: + raise Exception("Model does not have decoder type option") + super().set_export_config(args) + @classmethod def list_available_models(cls) -> List[PretrainedModelInfo]: """ diff --git a/nemo/collections/asr/models/slu_models.py b/nemo/collections/asr/models/slu_models.py index 2062397c511c..6df907334662 100644 --- a/nemo/collections/asr/models/slu_models.py +++ b/nemo/collections/asr/models/slu_models.py @@ -436,7 +436,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). diff --git a/nemo/collections/asr/models/ssl_models.py b/nemo/collections/asr/models/ssl_models.py index 3433a52f3090..8de713ca948d 100644 --- a/nemo/collections/asr/models/ssl_models.py +++ b/nemo/collections/asr/models/ssl_models.py @@ -94,6 +94,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): # need to be separate for moduledict for decoder_loss_name, decoder_loss_cfg in self._cfg.loss_list.items(): + if not decoder_loss_cfg.get("is_active", True): # active by default + continue + new_decoder_loss = { 'decoder': SpeechEncDecSelfSupervisedModel.from_config_dict(decoder_loss_cfg.decoder), 'loss': SpeechEncDecSelfSupervisedModel.from_config_dict(decoder_loss_cfg.loss), @@ -231,7 +234,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + if ( + self._train_dl is not None + and hasattr(self._train_dl, 'dataset') + and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). @@ -267,7 +274,11 @@ def setup_validation_data(self, val_data_config: Optional[Union[DictConfig, Dict # Need to set this because if using an IterableDataset, the length of the dataloader is the total number # of samples rather than the number of batches, and this messes up the tqdm progress bar. # So we set the number of steps manually (to the correct number) to fix this. - if 'is_tarred' in val_data_config and val_data_config['is_tarred']: + if ( + self._validation_dl is not None + and hasattr(self._validation_dl, 'dataset') + and isinstance(self._validation_dl.dataset, torch.utils.data.IterableDataset) + ): # We also need to check if limit_train_batches is already set. # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py index fbd05cb1809b..91c0c10b9604 100644 --- a/nemo/collections/asr/modules/audio_preprocessing.py +++ b/nemo/collections/asr/modules/audio_preprocessing.py @@ -608,7 +608,7 @@ def forward(self, input_spec, length): for idx in range(input_spec.shape[0]): cur_len = length[idx] - patches = range(cur_len // self.patch_size - 1) + patches = range(cur_len // self.patch_size) masked_patches = random.sample(patches, mask_patches) for mp in masked_patches: diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py index df5b8f5c69ed..323ea3341000 100644 --- a/nemo/collections/asr/modules/conformer_encoder.py +++ b/nemo/collections/asr/modules/conformer_encoder.py @@ -13,6 +13,7 @@ # limitations under the License. import math +import random from collections import OrderedDict from dataclasses import dataclass from typing import List, Optional, Set @@ -46,6 +47,7 @@ from nemo.core.classes.mixins import AccessMixin, adapter_mixins from nemo.core.classes.module import NeuralModule from nemo.core.neural_types import AcousticEncodedRepresentation, ChannelType, LengthsType, NeuralType, SpectrogramType +from nemo.utils import logging __all__ = ['ConformerEncoder'] @@ -67,6 +69,8 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin): Defaults to striding. subsampling_factor (int): the subsampling factor which should be power of 2 Defaults to 4. + subsampling_conv_chunking_factor(int): optionally, force chunk inputs (helpful for large inputs) + Should be power of 2, 1 (auto-chunking, default), or -1 (no chunking) subsampling_conv_channels (int): the size of the convolutions in the subsampling module Defaults to -1 which would set it to d_model. reduction (str, Optional): the method of reduction, choices=['pooling', 'striding']. If no value @@ -87,9 +91,13 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin): Defaults to 5000 n_heads (int): number of heads in multi-headed attention layers Defaults to 4. - att_context_size (List[int]): List of 2 ints corresponding to left and right attention context sizes, - or None for full context. - Defaults to None. + att_context_size (List[Union[List[int],int]]): specifies the context sizes on each side. Each context size should be a list of two integers like [100,100]. + A list of context sizes like [[100,100],[100,50]] can also be passed. -1 means unlimited context. + Defaults to [-1,-1] + att_context_probs (List[float]): a list of probabilities of each one of the att_context_size when a list of them is passed. If not specified, uniform distribution is being used. + Defaults to None + att_context_style (str): 'regular' or 'chunked_limited'. + Defaults to 'regular' xscaling (bool): enables scaling the inputs to the multi-headed attention layers by sqrt(d_model) Defaults to True. untie_biases (bool): whether to not share (untie) the bias weights between layers of Transformer-XL @@ -98,6 +106,11 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin): Defaults to 31. conv_norm_type (str): the type of the normalization in the convolutional modules Defaults to 'batch_norm'. + conv_context_size (list): it can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size. + None means [(conv_kernel_size-1)//2, (conv_kernel_size-1)//2], and 'causal' means [(conv_kernel_size-1), 0]. + Defaults to None. + conv_dual_mode (bool): specifies if convolution should be dual mode when dual_offline mode is being used. When enables, the left half of the convolution kernel would get masked in streaming cases. + Defaults to False dropout (float): the dropout rate used in all layers except the attention layers Defaults to 0.1. dropout_pre_encoder (float): the dropout rate used before the encoder @@ -245,6 +258,7 @@ def __init__( causal_downsampling=False, subsampling='striding', subsampling_factor=4, + subsampling_conv_chunking_factor=1, subsampling_conv_channels=-1, reduction=None, reduction_position=None, @@ -253,6 +267,7 @@ def __init__( self_attention_model='rel_pos', n_heads=4, att_context_size=None, + att_context_probs=None, att_context_style='regular', xscaling=True, untie_biases=True, @@ -276,60 +291,28 @@ def __init__( self.d_model = d_model self.n_layers = n_layers self._feat_in = feat_in - self.scale = math.sqrt(self.d_model) self.att_context_style = att_context_style self.subsampling_factor = subsampling_factor + self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor self.self_attention_model = self_attention_model self.global_tokens = global_tokens self.global_attn_separate = global_attn_separate self.global_tokens_spacing = global_tokens_spacing - if att_context_size: - self.att_context_size = list(att_context_size) - else: - self.att_context_size = [-1, -1] - - if isinstance(conv_context_size, ListConfig): - conv_context_size = list(conv_context_size) - - if conv_context_size is not None: - if ( - not isinstance(conv_context_size, list) - and not isinstance(conv_context_size, str) - and not isinstance(conv_context_size, ListConfig) - ): - raise ValueError( - f"Invalid conv_context_size! It should be the string 'causal' or a list of two integers." - ) - if conv_context_size == "causal": - conv_context_size = [conv_kernel_size - 1, 0] - else: - if conv_context_size[0] + conv_context_size[1] + 1 != conv_kernel_size: - raise ValueError(f"Invalid conv_context_size: {self.conv_context_size}!") - else: - conv_context_size = [(conv_kernel_size - 1) // 2, (conv_kernel_size - 1) // 2] - self.conv_context_size = conv_context_size - - if att_context_style == "chunked_limited": - # the left context for self-attention in chunked_limited mode should be dividable by the right context - # right context=att_context_size[1]+1, and left_context=self.att_context_size[0] - if self.att_context_size[0] > 0 and self.att_context_size[0] % (self.att_context_size[1] + 1) > 0: - raise ValueError("att_context_size[0] % (att_context_size[1] + 1) should be zero!") - if self.att_context_size[1] < 0: - raise ValueError("Right context can not be unlimited for chunked_limited style!") - self.chunk_size = self.att_context_size[1] + 1 - - # left_chunks_num specifies the number of chunks to be visible by each chunk on the left side - if self.att_context_size[0] >= 0: - self.left_chunks_num = self.att_context_size[0] // self.chunk_size - else: - self.left_chunks_num = 100000 - - elif att_context_style == "regular": - self.chunk_size = None - else: - raise ValueError("Invalid att_context_style!") + # Setting up the att_context_size + ( + self.att_context_size_all, + self.att_context_size, + self.att_context_probs, + self.conv_context_size, + ) = self._calc_context_sizes( + att_context_style=att_context_style, + att_context_size=att_context_size, + att_context_probs=att_context_probs, + conv_context_size=conv_context_size, + conv_kernel_size=conv_kernel_size, + ) if xscaling: self.xscale = math.sqrt(d_model) @@ -355,6 +338,7 @@ def __init__( feat_in=feat_in, feat_out=d_model, conv_channels=subsampling_conv_channels, + subsampling_conv_chunking_factor=subsampling_conv_chunking_factor, activation=nn.ReLU(True), is_causal=causal_downsampling, ) @@ -374,6 +358,7 @@ def __init__( self._feat_out = d_model + # Biases for relative positional encoding if not untie_biases and self_attention_model == "rel_pos": d_head = d_model // n_heads pos_bias_u = nn.Parameter(torch.Tensor(n_heads, d_head)) @@ -384,8 +369,8 @@ def __init__( pos_bias_u = None pos_bias_v = None + # Positional encodings self.pos_emb_max_len = pos_emb_max_len - self.att_mask = None if self_attention_model == "rel_pos": self.pos_enc = RelPositionalEncoding( d_model=d_model, @@ -453,51 +438,6 @@ def __init__( # will be set in self.forward() if defined in AccessMixin config self.interctc_capture_at_layers = None - def update_max_seq_length(self, seq_length: int, device): - # Find global max audio length across all nodes - if torch.distributed.is_initialized(): - global_max_len = torch.tensor([seq_length], dtype=torch.float32, device=device) - - # Update across all ranks in the distributed system - torch.distributed.all_reduce(global_max_len, op=torch.distributed.ReduceOp.MAX) - - seq_length = global_max_len.to(torch.int64).item() - - if seq_length > self.max_audio_length: - self.set_max_audio_length(seq_length) - - def set_max_audio_length(self, max_audio_length): - """ - Sets maximum input length. - Pre-calculates internal seq_range mask. - """ - self.max_audio_length = max_audio_length - device = next(self.parameters()).device - self.pos_enc.extend_pe(max_audio_length, device) - - if self.self_attention_model != "rel_pos_local_attn": - att_mask = torch.ones(1, max_audio_length, max_audio_length, dtype=torch.bool, device=device) - if self.chunk_size is None: - if self.att_context_size[0] >= 0: - att_mask = att_mask.triu(diagonal=-self.att_context_size[0]) - if self.att_context_size[1] >= 0: - att_mask = att_mask.tril(diagonal=self.att_context_size[1]) - else: - chunk_idx = torch.arange(0, max_audio_length, dtype=torch.int64, device=att_mask.device) - chunk_idx = torch.div(chunk_idx, self.chunk_size, rounding_mode="trunc") - diff_chunks = chunk_idx.unsqueeze(1) - chunk_idx.unsqueeze(0) - chunked_limited_mask = torch.logical_and( - torch.le(diff_chunks, self.left_chunks_num), torch.ge(diff_chunks, 0) - ) - att_mask = torch.logical_and(att_mask, chunked_limited_mask.unsqueeze(0)) - - if hasattr(self, 'att_mask'): - self.att_mask = att_mask - else: - self.register_buffer('att_mask', att_mask, persistent=False) - else: - self.att_mask = None - def forward_for_export( self, audio_signal, length, cache_last_channel=None, cache_last_time=None, cache_last_channel_len=None ): @@ -560,17 +500,19 @@ def forward_internal( self, audio_signal, length, cache_last_channel=None, cache_last_time=None, cache_last_channel_len=None ): self.update_max_seq_length(seq_length=audio_signal.size(2), device=audio_signal.device) - max_audio_length = audio_signal.size(-1) if length is None: length = audio_signal.new_full( - (audio_signal.size(0),), max_audio_length, dtype=torch.int64, device=audio_signal.device + (audio_signal.size(0),), audio_signal.size(-1), dtype=torch.int64, device=audio_signal.device ) - if cache_last_time is not None: - cache_last_time_next = torch.zeros_like(cache_last_time) + # select a random att_context_size with the distribution specified by att_context_probs during training + # for non-validation cases like test, validation or inference, it uses the first mode in self.att_context_size + if self.training and len(self.att_context_size_all) > 1: + cur_att_context_size = random.choices(self.att_context_size_all, weights=self.att_context_probs)[0] else: - cache_last_time_next = None + cur_att_context_size = self.att_context_size + audio_signal = torch.transpose(audio_signal, 1, 2) if isinstance(self.pre_encode, nn.Linear): @@ -583,15 +525,13 @@ def forward_internal( audio_signal = audio_signal[:, self.streaming_cfg.drop_extra_pre_encoded :, :] length = (length - self.streaming_cfg.drop_extra_pre_encoded).clamp(min=0) - max_audio_length = audio_signal.size(1) - if self.reduction_position is not None and cache_last_channel is not None: raise ValueError("Caching with reduction feature is not supported yet!") + max_audio_length = audio_signal.size(1) if cache_last_channel is not None: cache_len = self.streaming_cfg.last_channel_cache_size cache_keep_size = max_audio_length - self.streaming_cfg.cache_drop_size - cache_last_channel_next = torch.zeros_like(cache_last_channel) max_audio_length = max_audio_length + cache_len padding_length = length + cache_len offset = torch.neg(cache_last_channel_len) + cache_len @@ -601,31 +541,47 @@ def forward_internal( cache_len = 0 offset = None - if self.self_attention_model == 'abs_pos': - audio_signal, pos_emb = self.pos_enc(x=audio_signal) - else: - audio_signal, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len) + audio_signal, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len) # Create the self-attention and padding masks - pad_mask, att_mask = self._create_masks(max_audio_length, padding_length, offset, audio_signal.device) + pad_mask, att_mask = self._create_masks( + att_context_size=cur_att_context_size, + padding_length=padding_length, + max_audio_length=max_audio_length, + offset=offset, + device=audio_signal.device, + ) if cache_last_channel is not None: pad_mask = pad_mask[:, cache_len:] - if self.att_mask is not None: + if att_mask is not None: att_mask = att_mask[:, cache_len:] + # Convert caches from the tensor to list + cache_last_time_next = [] + cache_last_channel_next = [] for lth, (drop_prob, layer) in enumerate(zip(self.layer_drop_probs, self.layers)): original_signal = audio_signal + if cache_last_channel is not None: + cache_last_channel_cur = cache_last_channel[lth] + cache_last_time_cur = cache_last_time[lth] + else: + cache_last_channel_cur = None + cache_last_time_cur = None audio_signal = layer( x=audio_signal, att_mask=att_mask, pos_emb=pos_emb, pad_mask=pad_mask, - cache_last_channel=cache_last_channel, - cache_last_time=cache_last_time, - cache_last_channel_next=cache_last_channel_next, - cache_last_time_next=cache_last_time_next, + cache_last_channel=cache_last_channel_cur, + cache_last_time=cache_last_time_cur, ) + + if cache_last_channel_cur is not None: + (audio_signal, cache_last_channel_cur, cache_last_time_cur) = audio_signal + cache_last_channel_next.append(cache_last_channel_cur) + cache_last_time_next.append(cache_last_time_cur) + # applying stochastic depth logic from https://arxiv.org/abs/2102.03216 if self.training and drop_prob > 0.0: should_drop = torch.rand(1) < drop_prob @@ -645,7 +601,13 @@ def forward_internal( # Don't update the audio_signal here because then it will again scale the audio_signal # and cause an increase in the WER _, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len) - pad_mask, att_mask = self._create_masks(max_audio_length, length, offset, audio_signal.device) + pad_mask, att_mask = self._create_masks( + att_context_size=cur_att_context_size, + padding_length=length, + max_audio_length=max_audio_length, + offset=offset, + device=audio_signal.device, + ) # saving tensors if required for interctc loss if self.is_access_enabled(): @@ -672,6 +634,8 @@ def forward_internal( length = length.to(dtype=torch.int64) if cache_last_channel is not None: + cache_last_channel_next = torch.stack(cache_last_channel_next, dim=0) + cache_last_time_next = torch.stack(cache_last_time_next, dim=0) return ( audio_signal, length, @@ -682,7 +646,60 @@ def forward_internal( else: return audio_signal, length - def _create_masks(self, max_audio_length, padding_length, offset, device): + def update_max_seq_length(self, seq_length: int, device): + # Find global max audio length across all nodes + if torch.distributed.is_initialized(): + global_max_len = torch.tensor([seq_length], dtype=torch.float32, device=device) + + # Update across all ranks in the distributed system + torch.distributed.all_reduce(global_max_len, op=torch.distributed.ReduceOp.MAX) + + seq_length = global_max_len.int().item() + + if seq_length > self.max_audio_length: + self.set_max_audio_length(seq_length) + + def set_max_audio_length(self, max_audio_length): + """ + Sets maximum input length. + Pre-calculates internal seq_range mask. + """ + self.max_audio_length = max_audio_length + device = next(self.parameters()).device + self.pos_enc.extend_pe(max_audio_length, device) + + def _create_masks(self, att_context_size, padding_length, max_audio_length, offset, device): + if self.self_attention_model != "rel_pos_local_attn": + att_mask = torch.ones(1, max_audio_length, max_audio_length, dtype=torch.bool, device=device) + + if self.att_context_style == "regular": + if att_context_size[0] >= 0: + att_mask = att_mask.triu(diagonal=-att_context_size[0]) + if att_context_size[1] >= 0: + att_mask = att_mask.tril(diagonal=att_context_size[1]) + elif self.att_context_style == "chunked_limited": + # When right context is unlimited, just the left side of the masking need to get updated + if att_context_size[1] == -1: + if att_context_size[0] >= 0: + att_mask = att_mask.triu(diagonal=-att_context_size[0]) + else: + chunk_size = att_context_size[1] + 1 + # left_chunks_num specifies the number of chunks to be visible by each chunk on the left side + if att_context_size[0] >= 0: + left_chunks_num = att_context_size[0] // chunk_size + else: + left_chunks_num = 10000 + + chunk_idx = torch.arange(0, max_audio_length, dtype=torch.int, device=att_mask.device) + chunk_idx = torch.div(chunk_idx, chunk_size, rounding_mode="trunc") + diff_chunks = chunk_idx.unsqueeze(1) - chunk_idx.unsqueeze(0) + chunked_limited_mask = torch.logical_and( + torch.le(diff_chunks, left_chunks_num), torch.ge(diff_chunks, 0) + ) + att_mask = torch.logical_and(att_mask, chunked_limited_mask.unsqueeze(0)) + else: + att_mask = None + # pad_mask is the masking to be used to ignore paddings pad_mask = torch.arange(0, max_audio_length, device=device).expand( padding_length.size(0), -1 @@ -692,24 +709,19 @@ def _create_masks(self, max_audio_length, padding_length, offset, device): pad_mask_off = torch.arange(0, max_audio_length, device=device).expand( padding_length.size(0), -1 ) >= offset.unsqueeze(-1) - pad_mask = pad_mask_off.logical_and(pad_mask) - if self.att_mask is not None: + if att_mask is not None: # pad_mask_for_att_mask is the mask which helps to ignore paddings pad_mask_for_att_mask = pad_mask.unsqueeze(1).repeat([1, max_audio_length, 1]) pad_mask_for_att_mask = torch.logical_and(pad_mask_for_att_mask, pad_mask_for_att_mask.transpose(1, 2)) # att_mask is the masking to be used by the MHA layers to ignore the tokens not supposed to be visible - att_mask = self.att_mask[:, :max_audio_length, :max_audio_length] + att_mask = att_mask[:, :max_audio_length, :max_audio_length] # paddings should also get ignored, so pad_mask_for_att_mask is used to ignore their corresponding scores att_mask = torch.logical_and(pad_mask_for_att_mask, att_mask.to(pad_mask_for_att_mask.device)) - att_mask = ~att_mask - else: - att_mask = None pad_mask = ~pad_mask - return pad_mask, att_mask def enable_pad_mask(self, on=True): @@ -718,8 +730,69 @@ def enable_pad_mask(self, on=True): self.use_pad_mask = on return mask + def _calc_context_sizes( + self, att_context_size, att_context_probs, att_context_style, conv_context_size, conv_kernel_size + ): + # convert att_context_size to a standard list of lists + if att_context_size: + att_context_size_all = list(att_context_size) + if isinstance(att_context_size_all[0], int): + att_context_size_all = [att_context_size_all] + for i, att_cs in enumerate(att_context_size_all): + if isinstance(att_cs, ListConfig): + att_context_size_all[i] = list(att_cs) + if att_context_style == "chunked_limited": + if att_cs[0] > 0 and att_cs[0] % (att_cs[1] + 1) > 0: + raise ValueError(f"att_context_size[{i}][0] % (att_context_size[{i}][1] + 1) should be zero!") + if att_cs[1] < 0 and len(att_context_size_all) <= 1: + raise ValueError( + f"Right context (att_context_size[{i}][1]) can not be unlimited for chunked_limited style!" + ) + else: + att_context_size_all = [[-1, -1]] + + if att_context_probs: + if len(att_context_probs) != len(att_context_size_all): + raise ValueError("The size of the att_context_probs should be the same as att_context_size.") + att_context_probs = list(att_context_probs) + if sum(att_context_probs) != 1: + raise ValueError( + "The sum of numbers in att_context_probs should be equal to one to be a distribution." + ) + else: + att_context_probs = [1.0 / len(att_context_size_all)] * len(att_context_size_all) + + if conv_context_size is not None: + if isinstance(conv_context_size, ListConfig): + conv_context_size = list(conv_context_size) + if not isinstance(conv_context_size, list) and not isinstance(conv_context_size, str): + raise ValueError( + f"Invalid conv_context_size! It should be the string 'causal' or a list of two integers." + ) + if conv_context_size == "causal": + conv_context_size = [conv_kernel_size - 1, 0] + else: + if conv_context_size[0] + conv_context_size[1] + 1 != conv_kernel_size: + raise ValueError(f"Invalid conv_context_size: {self.conv_context_size}!") + else: + conv_context_size = [(conv_kernel_size - 1) // 2, (conv_kernel_size - 1) // 2] + return att_context_size_all, att_context_size_all[0], att_context_probs, conv_context_size + + def set_default_att_context_size(self, att_context_size): + if att_context_size not in self.att_context_size_all: + logging.warning( + f"att_context_size={att_context_size} is not among the list of the supported look-aheads: {self.att_context_size_all}" + ) + if att_context_size is not None: + self.att_context_size = att_context_size + def setup_streaming_params( - self, chunk_size: int = None, shift_size: int = None, left_chunks: int = None, max_context: int = 10000 + self, + chunk_size: int = None, + shift_size: int = None, + left_chunks: int = None, + att_context_size: list = None, + max_context: int = 10000, ): """ This function sets the needed values and parameters to perform streaming. The configuration would be stored in self.streaming_cfg. @@ -732,25 +805,28 @@ def setup_streaming_params( Defaults to -1 (means feat_out is d_model) """ streaming_cfg = CacheAwareStreamingConfig() + + # When att_context_size is not specified, it uses the default_att_context_size + if att_context_size is None: + att_context_size = self.att_context_size + if chunk_size is not None: if chunk_size < 1: raise ValueError("chunk_size needs to be a number larger or equal to one.") lookahead_steps = chunk_size - 1 streaming_cfg.cache_drop_size = chunk_size - shift_size elif self.att_context_style == "chunked_limited": - lookahead_steps = self.att_context_size[1] + lookahead_steps = att_context_size[1] streaming_cfg.cache_drop_size = 0 elif self.att_context_style == "regular": - lookahead_steps = self.att_context_size[1] * self.n_layers + self.conv_context_size[1] * self.n_layers + lookahead_steps = att_context_size[1] * self.n_layers + self.conv_context_size[1] * self.n_layers streaming_cfg.cache_drop_size = lookahead_steps else: streaming_cfg.cache_drop_size = 0 lookahead_steps = None if chunk_size is None: - streaming_cfg.last_channel_cache_size = ( - self.att_context_size[0] if self.att_context_size[0] >= 0 else max_context - ) + streaming_cfg.last_channel_cache_size = att_context_size[0] if att_context_size[0] >= 0 else max_context else: if left_chunks is None: raise ValueError("left_chunks can not be None when chunk_size is set.") @@ -799,20 +875,12 @@ def setup_streaming_params( else: streaming_cfg.drop_extra_pre_encoded = streaming_cfg.pre_encode_cache_size // self.subsampling_factor - # counting the number of the layers need caching - streaming_cfg.last_channel_num = 0 - streaming_cfg.last_time_num = 0 for m in self.layers.modules(): if hasattr(m, "_max_cache_len"): if isinstance(m, MultiHeadAttention): - m._cache_id = streaming_cfg.last_channel_num m.cache_drop_size = streaming_cfg.cache_drop_size - streaming_cfg.last_channel_num += 1 - if isinstance(m, CausalConv1D): - m._cache_id = streaming_cfg.last_time_num m.cache_drop_size = streaming_cfg.cache_drop_size - streaming_cfg.last_time_num += 1 self.streaming_cfg = streaming_cfg @@ -825,19 +893,12 @@ def get_initial_cache_state(self, batch_size=1, dtype=torch.float32, device=None create_tensor = torch.zeros last_time_cache_size = self.conv_context_size[0] cache_last_channel = create_tensor( - ( - self.streaming_cfg.last_channel_num, - batch_size, - self.streaming_cfg.last_channel_cache_size, - self.d_model, - ), + (len(self.layers), batch_size, self.streaming_cfg.last_channel_cache_size, self.d_model,), device=device, dtype=dtype, ) cache_last_time = create_tensor( - (self.streaming_cfg.last_time_num, batch_size, self.d_model, last_time_cache_size), - device=device, - dtype=dtype, + (len(self.layers), batch_size, self.d_model, last_time_cache_size), device=device, dtype=dtype, ) if max_dim > 0: cache_last_channel_len = torch.randint( @@ -873,9 +934,9 @@ def change_attention_model( 'rel_pos_local_attn': relative positional embedding and Transformer-XL with local attention using overlapping windows. Attention context is determined by att_context_size parameter. 'abs_pos': absolute positional embedding and Transformer - If None is provided, the self_attention_model isn't changed. Defauts to None. + If None is provided, the self_attention_model isn't changed. Defaults to None. att_context_size (List[int]): List of 2 ints corresponding to left and right attention context sizes, - or None to keep as it is. Defauts to None. + or None to keep as it is. Defaults to None. update_config (bool): Whether to update the config or not with the new attention model. Defaults to True. device (torch.device): If provided, new layers will be moved to the device. @@ -884,19 +945,16 @@ def change_attention_model( if att_context_size: att_context_size = list(att_context_size) - elif hasattr(self._cfg, "att_context_size"): - att_context_size = self._cfg.att_context_size else: att_context_size = self.att_context_size if self_attention_model is None: - self_attention_model = self._cfg.self_attention_model + self_attention_model = self.self_attention_model if self_attention_model == 'rel_pos_local_attn' and max(att_context_size) <= 0: raise ValueError("When using local attention, context size must be set > 0") if self_attention_model == "rel_pos": - self.att_mask = None new_pos_enc = RelPositionalEncoding( d_model=self._cfg.d_model, dropout_rate=self._cfg.dropout, @@ -933,7 +991,6 @@ def change_attention_model( for name, m in self.named_modules(): if type(m) == ConformerLayer: - if self_attention_model == 'rel_pos': new_attn = RelPositionMultiHeadAttention( n_head=self._cfg.n_heads, @@ -977,6 +1034,25 @@ def change_attention_model( self._cfg.self_attention_model = self_attention_model self._cfg.att_context_size = att_context_size + def change_subsampling_conv_chunking_factor(self, subsampling_conv_chunking_factor: int): + """ + Update the conv_chunking_factor (int) + Default is 1 (auto) + Set it to -1 (disabled) or to a specific value (power of 2) if you OOM in the conv subsampling layers + + + Args: + subsampling_conv_chunking_factor (int) + """ + + if not hasattr(self.pre_encode, "change_subsampling_conv_chunking_factor"): + logging.info("Model pre_encoder doesn't have a change_subsampling_conv_chunking_factor method ") + return + + self.pre_encode.change_subsampling_conv_chunking_factor( + subsampling_conv_chunking_factor=subsampling_conv_chunking_factor + ) + class ConformerEncoderAdapter(ConformerEncoder, adapter_mixins.AdapterModuleMixin): diff --git a/nemo/collections/asr/modules/squeezeformer_encoder.py b/nemo/collections/asr/modules/squeezeformer_encoder.py index 952c9b53d233..a887abd19ebb 100644 --- a/nemo/collections/asr/modules/squeezeformer_encoder.py +++ b/nemo/collections/asr/modules/squeezeformer_encoder.py @@ -149,7 +149,6 @@ def __init__( d_ff = d_model * ff_expansion_factor self.d_model = d_model self._feat_in = feat_in - self.scale = math.sqrt(self.d_model) if att_context_size: self.att_context_size = att_context_size else: diff --git a/nemo/collections/asr/parts/k2/graph_transducer.py b/nemo/collections/asr/parts/k2/graph_transducer.py new file mode 100644 index 000000000000..5de8064224a1 --- /dev/null +++ b/nemo/collections/asr/parts/k2/graph_transducer.py @@ -0,0 +1,483 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from contextlib import nullcontext +from typing import ContextManager +import torch +import torch.nn.functional as F + +from nemo.core.classes.loss import Loss +from nemo.core.utils.k2_guard import k2 + + +def force_float32_context() -> ContextManager: + """Get context manager to force float32 precision in autocast mode.""" + if torch.is_autocast_enabled(): + return torch.cuda.amp.autocast(dtype=torch.float32) + return nullcontext() + + +class GraphTransducerLossBase(Loss): + """ + Base class for graph transducer losses. + Implementation of the approach described in "Powerful and Extensible WFST Framework for RNN-Transducer Losses" + https://ieeexplore.ieee.org/document/10096679 + + Compose-Transducer: compose the unit (target text) and temporal schemas (graphs) into lattice. + Subclass should implement `get_unit_schema` and `get_temporal_schema` methods. + Grid-Transducer: construct the RNN-T lattice (grid) directly in code. + Subclass should implement `get_grid` method. + """ + + def __init__( + self, use_grid_implementation: bool, connect_composed=False, double_scores=False, cast_to_float32=False + ): + """ + + Args: + use_grid_implementation: Whether to use the grid implementation (Grid-Transducer). + connect_composed: Connect graph after composing unit and temporal schemas (only for Compose-Transducer). + `connect` operation is slow, it is useful for visualization, but not necessary for loss computation. + double_scores: Use calculation of loss in double precision (float64) in the lattice. + Does not significantly affect memory usage since the lattice is ~V/2 times smaller + than the joint tensor. + cast_to_float32: Force cast joint tensor to float32 before log-softmax calculation. + """ + super().__init__() + self.use_grid_implementation = use_grid_implementation + self.connect_composed = connect_composed + self.double_scores = double_scores + self.cast_to_float32 = cast_to_float32 + + @abc.abstractmethod + def get_unit_schema(self, units_tensor: torch.Tensor, vocab_size: int) -> "k2.Fsa": + """ + Get unit schema (target text) graph for Compose-Transducer. + + Args: + units_tensor: tensor with target text + vocab_size: number of labels (including blank). Needed to construct additional eps-arcs (in some cases). + + Returns: + unit schema graph (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + pass + + @abc.abstractmethod + def get_temporal_schema(self, num_frames: int, vocab_size: int, device: torch.device) -> "k2.Fsa": + """ + Get temporal schema graph for Compose-Transducer. + + Args: + num_frames: length of the sequence (in frames) + vocab_size: number of labels (including blank) + device: device for tensor to construct + + Returns: + temporal schema graph (k2.Fsa). + Labels: :. is a unit from vocab + special units (e.g., additional eps). + """ + pass + + @abc.abstractmethod + def get_grid(self, units_tensor: torch.Tensor, num_frames: int, vocab_size: int) -> "k2.Fsa": + """ + Construct the transducer lattice (grid) directly for Grid-Transducer. + + Args: + units_tensor: tensor with target text + num_frames: length of the sequence (in frames) + vocab_size: number of labels (including blank) + + Returns: + transducer lattice (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + pass + + def get_composed_lattice(self, units_tensor: torch.Tensor, num_frames: int, vocab_size: int) -> "k2.Fsa": + """ + Get composed lattice (unit and temporal schemas) for Compose-Transducer. Useful for visualization. + Should be equivalent to the lattice from `get_grid` method. + + Args: + units_tensor: tensor with target text + num_frames: length of the sequence (in frames) + vocab_size: vocab size (including blank) + + Returns: + composed lattice (k2.Fsa) from unit and temporal schemas + """ + fsa_text = self.get_unit_schema(units_tensor, vocab_size) + fsa_temporal = self.get_temporal_schema(num_frames, vocab_size, units_tensor.device) + composed = k2.compose(fsa_text, fsa_temporal, treat_epsilons_specially=False) + if self.connect_composed: + composed = k2.connect(composed) + return composed + + def get_graphs_batched( + self, logits_lengths: torch.Tensor, targets: torch.Tensor, target_lengths: torch.Tensor, vocab_size: int + ) -> "k2.Fsa": + """ + Get batched lattice (grid or composed) for the batch of sequences. + + Args: + logits_lengths: tensor with lengths of logits + targets: tensor with target units + target_lengths: tensor with lengths of targets + vocab_size: vocab size (including blank) + + Returns: + batched lattice - FsaVec (k2.Fsa) + """ + batch_size = logits_lengths.shape[0] + with torch.no_grad(): + if self.use_grid_implementation: + return k2.create_fsa_vec( + [ + self.get_grid( + units_tensor=targets[i, : target_lengths[i].item()], + num_frames=logits_lengths[i].item(), + vocab_size=vocab_size, + ) + for i in range(batch_size) + ] + ) + + # composed version + text_fsas = [ + self.get_unit_schema(units_tensor=targets[i, : target_lengths[i].item()], vocab_size=vocab_size,) + for i in range(batch_size) + ] + temporal_fsas = [ + self.get_temporal_schema( + num_frames=logits_lengths[i].item(), vocab_size=vocab_size, device=targets.device + ) + for i in range(batch_size) + ] + target_fsas_vec = k2.compose( + k2.create_fsa_vec(text_fsas), k2.create_fsa_vec(temporal_fsas), treat_epsilons_specially=False + ) + if self.connect_composed: + k2.connect(target_fsas_vec) + return target_fsas_vec + + def get_logits_indices(self, target_fsas_vec: k2.Fsa, logits_shape: torch.Size) -> torch.Tensor: + """ + Get indices of flatten logits for each arc in the lattices. + + Args: + target_fsas_vec: batch of target FSAs with lattices + logits_shape: shape of the logits tensor + + Returns: + 1d tensor with indices + """ + # logits_shape: B x Time x Text+1 x Labels + batch_size = logits_shape[0] + device = target_fsas_vec.device + scores_to_batch_i = torch.repeat_interleave( + torch.arange(batch_size, device=device, dtype=torch.int64), + torch.tensor( + [target_fsas_vec.arcs.index(0, i)[0].values().shape[0] for i in range(batch_size)], device=device, + ), + ) + indices = ( + scores_to_batch_i * logits_shape[1] * logits_shape[2] * logits_shape[3] # Batch + + target_fsas_vec.aux_labels.to(torch.int64) * logits_shape[2] * logits_shape[3] # Time indices + + target_fsas_vec.unit_positions.to(torch.int64) * logits_shape[3] # Units (text) indices + + target_fsas_vec.labels.to(torch.int64) # Labels + ) + return indices + + +class GraphRnntLoss(GraphTransducerLossBase): + """ + RNN-T loss implementation based on WFST according + to "Powerful and Extensible WFST Framework for RNN-Transducer Losses" + https://ieeexplore.ieee.org/document/10096679 + """ + + def __init__( + self, + blank: int, + use_grid_implementation=True, + connect_composed=False, + double_scores=False, + cast_to_float32=False, + ): + """ + Init method + + Args: + blank: blank label index + use_grid_implementation: Whether to use the grid implementation (Grid-Transducer). + connect_composed: Connect graph after composing unit and temporal schemas (only for Compose-Transducer). + `connect` operation is slow, it is useful for visualization, but not necessary for loss computation. + double_scores: Use calculation of loss in double precision (float64) in the lattice. + Does not significantly affect memory usage since the lattice is ~V/2 times smaller than the joint tensor. + cast_to_float32: Force cast joint tensor to float32 before log-softmax calculation. + """ + super().__init__( + use_grid_implementation=use_grid_implementation, + connect_composed=connect_composed, + double_scores=double_scores, + cast_to_float32=cast_to_float32, + ) + self.blank = blank + + def get_unit_schema(self, units_tensor: torch.Tensor, vocab_size: int) -> "k2.Fsa": + """ + Get unit schema (target text) graph for RNN-T loss (Compose-Transducer). + Forward arcs represent text labels. + + Example graph: text [1, 2], blank=0. + + graph:: + + 0:0:0 0:0:1 0:0:2 + +-------+ +-------+ +-------+ + v | v | v | + +-----------+ 1:1:0 +-----------+ 2:2:1 +-----------+ -1:-1:-1 #===# + | 0 | -------> | 1 | -------> | 2 | ---------> H 3 H + +-----------+ +-----------+ +-----------+ #===# + + Args: + units_tensor: 1d tensor with text units + vocab_size: number of total labels (vocab size including blank) + + Returns: + unit schema graph (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + + blank_id = self.blank + device = units_tensor.device + text_len = units_tensor.shape[0] + + # arcs + # text_len + 1 states, in every state - self-loops (blank) and forward (text label / last forward -1) + arcs = torch.zeros(((text_len + 1) * 2, 4), dtype=torch.int32, device=device) + text_indices = torch.arange(0, text_len + 1, dtype=torch.int32, device=device) + # blank labels + arcs[::2, 0] = text_indices # from state + arcs[::2, 1] = text_indices # to state + arcs[::2, 2] = blank_id + + # text labels + arcs[1::2, 0] = text_indices # from state + arcs[1::2, 1] = text_indices + 1 # to state + arcs[1:-1:2, 2] = units_tensor # labels: text + + arcs[-1, 2] = -1 # last transition to final state, ilabel=-1 (special for k2) + olabels = arcs[:, 2].detach().clone() # same as ilabels + + fsa_text = k2.Fsa(arcs, olabels) + fsa_text.unit_positions = text_indices.expand(2, -1).transpose(0, 1).flatten() + fsa_text.unit_positions[-1] = -1 # last transition to final state + return fsa_text + + def get_temporal_schema(self, num_frames: int, vocab_size: int, device: torch.device) -> "k2.Fsa": + """ + Get temporal schema graph for RNN-T loss (Compose-Transducer). + Forward arc - blank, self-loops - all labels excluding blank + + Example graph: blank=0, num_frames=3, vocab_size=3. + Labels: :. is a unit from vocab. + + graph:: + + 1:0 1:1 1:2 + +-----+ +-----+ +-----+ + v | v | v | + +---------+ 0:0 +---------+ 0:1 +---------+ 0:2 +---+ -1:-1 #===# + | 0 | -----> | 1 | -----> | 2 | -----> | 3 | -------> H 4 H + +---------+ +---------+ +---------+ +---+ #===# + ^ 2:0 | ^ 2:1 | ^ 2:2 | + +-----+ +-----+ +-----+ + + Args: + num_frames: length of the sequence (in frames) + vocab_size: number of labels (including blank) + device: device for tensor to construct + + Returns: + temporal schema graph (k2.Fsa). + Labels: :. is a unit from vocab. + """ + blank_id = self.blank + + fsa_temporal_arcs = torch.zeros((num_frames * vocab_size + 1, 4), dtype=torch.int32, device=device) + sequence_states = torch.arange(0, num_frames, dtype=torch.int32, device=device) + # for every state - vocab_size arcs, [0, 1, ..., vocab_size-1, 0, 1, ..., vocab_size-1, ...] + start_states = sequence_states.expand(vocab_size, num_frames).transpose(0, 1).flatten() + # first: make all arcs - self-loops + fsa_temporal_arcs[:-1, 0] = start_states # from + fsa_temporal_arcs[:-1, 1] = start_states # to + fsa_temporal_arcs[:-1, 2] = ( + torch.arange(0, vocab_size, dtype=torch.int32, device=device).expand(num_frames, vocab_size).flatten() + ) + + # blank-arcs: forward + fsa_temporal_arcs[blank_id:-1:vocab_size, 1] = sequence_states + 1 # blanks + + # transition to last final state + fsa_temporal_arcs[-1, :3] = torch.tensor((num_frames, num_frames + 1, -1), dtype=torch.int32, device=device) + + # output symbols: position in the sequence, same as start states for arcs + olabels = fsa_temporal_arcs[:, 0].detach().clone() + olabels[-1] = -1 # last arc to final state + + fsa_temporal = k2.Fsa(fsa_temporal_arcs, olabels) + fsa_temporal = k2.arc_sort(fsa_temporal) # need for compose + return fsa_temporal + + @staticmethod + def relabel_states(states: torch.Tensor, n: int, m: int) -> torch.Tensor: + """ + Relabel states to be in topological order: by diagonals + + Args: + states: tensor with states + n: number of rows + m: number of columns + + Returns: + tensor with relabeled states (same shape as `states`) + """ + i = states % n + j = torch.div(states, n, rounding_mode='floor') # states // n, torch.div to avoid pytorch warnings + min_mn = min(m, n) + max_mn = max(m, n) + diag = i + j + anti_diag = m + n - 1 - diag + max_idx = n * m - 1 + cur_diag_idx = i if m > n else m - j - 1 + states = ( + diag.lt(min_mn) * ((diag * (diag + 1) >> 1) + i) + + torch.logical_and(diag.ge(min_mn), diag.lt(max_mn)) + * ((min_mn * (min_mn + 1) >> 1) + (diag - min_mn) * min_mn + cur_diag_idx) + + diag.ge(max_mn) * (max_idx - (anti_diag * (anti_diag + 1) >> 1) + m - j) + ) + return states + + def get_grid(self, units_tensor: torch.Tensor, num_frames: int, vocab_size: int) -> "k2.Fsa": + """ + Construct the RNN-T lattice directly (Grid-Transducer). + + Args: + units_tensor: 1d tensor with text units + num_frames: length of the sequence (number of frames) + vocab_size: number of total labels (vocab size including blank) + + Returns: + transducer lattice (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + blank_id = self.blank + text_length = units_tensor.shape[0] + device = units_tensor.device + num_grid_states = num_frames * (text_length + 1) + num_forward_arcs = (num_frames - 1) * (text_length + 1) + num_text_arcs = text_length * num_frames + arcs = torch.zeros((num_forward_arcs + num_text_arcs + 2, 4), dtype=torch.int32, device=device) + # blank transitions + # i, i+, 0 , i / , i % + from_states = torch.arange(num_forward_arcs, device=device) + to_states = from_states + (text_length + 1) + arcs[:num_forward_arcs, 0] = from_states + arcs[:num_forward_arcs, 1] = to_states + arcs[:num_forward_arcs, 2] = blank_id + + # text arcs + from_states = ( + torch.arange(num_grid_states, dtype=torch.int32, device=device) + .reshape(num_frames, text_length + 1)[:, :-1] + .flatten() + ) + to_states = from_states + 1 + ilabels = units_tensor.expand(num_frames, -1).flatten() + arcs[num_forward_arcs:-2, 0] = from_states + arcs[num_forward_arcs:-2, 1] = to_states + arcs[num_forward_arcs:-2, 2] = ilabels + + # last 2 states + arcs[-2, :3] = torch.tensor((num_grid_states - 1, num_grid_states, blank_id), dtype=torch.int32, device=device) + arcs[-1, :3] = torch.tensor((num_grid_states, num_grid_states + 1, -1), dtype=torch.int32, device=device) + + # sequence indices, time indices + olabels = torch.div(arcs[:, 0], (text_length + 1), rounding_mode="floor") # arcs[:, 0] // (text_length + 1) + unit_positions = arcs[:, 0] % (text_length + 1) + # last state: final + olabels[-1] = -1 + unit_positions[-1] = -1 + + # relabel + # instead of using top sort (extremely expensive) k2.top_sort(rnnt_graph) + arcs[:-2, 0] = self.relabel_states(arcs[:-2, 0], text_length + 1, num_frames) + arcs[:-3, 1] = self.relabel_states(arcs[:-3, 1], text_length + 1, num_frames) + + # sort by start state - required in k2 + # TODO: maybe it is more optimal to avoid sort, construct arcs in ascending order + _, indices = torch.sort(arcs[:, 0], dim=0) + sorted_arcs = arcs[indices] + olabels = olabels[indices] + unit_positions = unit_positions[indices] + + rnnt_graph = k2.Fsa(sorted_arcs, olabels) + rnnt_graph.unit_positions = unit_positions + return rnnt_graph + + def forward( + self, acts: torch.Tensor, labels: torch.Tensor, act_lens: torch.Tensor, label_lens: torch.Tensor, + ) -> torch.Tensor: + """ + Compute forward method for RNN-T. + + Args: + acts: activations (joint tensor). NB: raw logits, not after log-softmax + labels: target labels + act_lens: lengths of activations + label_lens: length of labels sequences + + Returns: + batch of RNN-T scores (loss) + """ + # argument names are consistent with NeMo, see RNNTLoss.forward: + # self._loss(acts=log_probs, labels=targets, act_lens=input_lengths, label_lens=target_lengths) + logits, targets, logits_lengths, target_lengths = acts, labels, act_lens, label_lens + + # logits: B x Time x Text+1 x C + vocab_size = logits.shape[-1] + target_fsas_vec = self.get_graphs_batched(logits_lengths, targets, target_lengths, vocab_size) + + cast_context = force_float32_context() if self.cast_to_float32 else nullcontext() + with cast_context: + log_probs = F.log_softmax(logits, dim=-1) + with torch.no_grad(): + indices = self.get_logits_indices(target_fsas_vec, logits.shape) + # transition to the last state + # use 0 index (for valid index_select) and manually assign score after index_select for this case + indices[target_fsas_vec.labels == -1] = 0 + + # NB: do not assign scores -> modify, k2 will not update all scores correctly (modify -> assign) + scores = log_probs.flatten().index_select(-1, indices) + # fix weights for the arcs to the last state + scores[target_fsas_vec.labels == -1] = 0 + + target_fsas_vec.scores = scores + scores = -1 * target_fsas_vec.get_tot_scores(use_double_scores=self.double_scores, log_semiring=True) + return scores diff --git a/nemo/collections/asr/parts/k2/topologies.py b/nemo/collections/asr/parts/k2/topologies.py index c892b2643332..a3b6fcf0fef7 100644 --- a/nemo/collections/asr/parts/k2/topologies.py +++ b/nemo/collections/asr/parts/k2/topologies.py @@ -46,9 +46,11 @@ def build_topo(name: str, tokens: List[int], blank_num: int, with_self_loops: bo else: raise ValueError(f"Unknown topo name: {name}") if blank_num != 0: - blank_mask = ans.labels == 0 - ans.labels[(ans.labels != -1) & (ans.labels <= blank_num)] -= 1 - ans.labels[blank_mask] = blank_num + labels = ans.labels + blank_mask = labels == 0 + labels[(labels != -1) & (labels <= blank_num)] -= 1 + labels[blank_mask] = blank_num + ans.labels = labels # force update ans.labels property to notify FSA about modifications, required by k2 ans = k2.arc_sort(ans) return ans diff --git a/nemo/collections/asr/parts/k2/w_transducer.py b/nemo/collections/asr/parts/k2/w_transducer.py new file mode 100644 index 000000000000..b38a6c560fcd --- /dev/null +++ b/nemo/collections/asr/parts/k2/w_transducer.py @@ -0,0 +1,340 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from contextlib import nullcontext +from typing import Union + +import torch +import torch.nn.functional as F + +from nemo.collections.asr.parts.k2.graph_transducer import GraphRnntLoss, force_float32_context +from nemo.core.utils.k2_guard import k2 +from nemo.utils.enum import PrettyStrEnum + + +class GraphWTransducerLoss(GraphRnntLoss): + """ + W-Transducer loss: RNN-T loss modification for training RNN-T model for the case + when some text at the beginning/end of the utterance is missing. + The resulting model behaves like the RNN-T model (no modification for decoding is required). + For details see "Powerful and Extensible WFST Framework for RNN-Transducer Losses" paper + https://ieeexplore.ieee.org/document/10096679 + """ + + class LastBlankMode(PrettyStrEnum): + ALLOW_IGNORE = "allow_ignore" + FORCE_FINAL = "force_final" + + def __init__( + self, + blank: int, + eps_weight: float = 0.0, + last_blank_mode: Union[LastBlankMode, str] = LastBlankMode.FORCE_FINAL, + use_grid_implementation=True, + connect_composed=False, + double_scores=False, + cast_to_float32=False, + ): + """ + Init method + + Args: + blank: blank label index + eps_weight: weight of epsilon transitions, 0 means no penalty (default) + last_blank_mode: allow to skip last blank in the prediction (default) or force it + use_grid_implementation: Whether to use the grid implementation (Grid-Transducer). + connect_composed: Connect graph after composing unit and temporal schemas + (only for Compose-Transducer). `connect` operation is slow, it is useful for visualization, + but not necessary for loss computation. + double_scores: Use calculation of loss in double precision (float64) in the lattice. + Does not significantly affect memory usage since the lattice is ~V/2 times smaller than the joint tensor. + cast_to_float32: Force cast joint tensor to float32 before log-softmax calculation. + """ + super().__init__( + blank=blank, + use_grid_implementation=use_grid_implementation, + connect_composed=connect_composed, + double_scores=double_scores, + cast_to_float32=cast_to_float32, + ) + self.eps_weight = eps_weight + self.last_blank_mode = self.LastBlankMode(last_blank_mode) + + def get_unit_schema(self, units_tensor: torch.Tensor, vocab_size: int) -> "k2.Fsa": + """ + Get unit schema (target text) graph for W-Transducer loss (Compose-Transducer). + Forward arcs represent text labels. + + Example graph: text [1, 2], blank=0. Eps ids: 3, 4. + + graph:: + + 3:3:0 0:0:1 0:0:2 + +-------+ +-------+ +-------+ + v | v | v | + +-----------+ 1:1:0 +-----------+ 2:2:1 +-----------+ -1:-1:-1 #===# + | 0 | -------> | 1 | -------> | 2 | ---------> H 3 H + +-----------+ +-----------+ +-----------+ #===# + ^ 0:0:0 | ^ 4:4:2 | + +-------+ +-------+ + + Args: + units_tensor: 1d tensor with text units + vocab_size: number of total labels (vocab size including blank) + + Returns: + unit schema graph (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + + blank_id = self.blank + start_eps_id = vocab_size + end_eps_id = vocab_size + 1 + device = units_tensor.device + text_len = units_tensor.shape[0] + + # arcs: scr, dest, label, score + arcs = torch.zeros(((text_len + 1) * 2 + 2, 4), dtype=torch.int32, device=device) + text_indices = torch.arange(0, text_len + 1, dtype=torch.int32, device=device) + # eps + arcs[0, 2] = start_eps_id + # blank labels + arcs[1:-1:2, 0] = text_indices # from state + arcs[1:-1:2, 1] = text_indices # to state + arcs[1:-1:2, 2] = blank_id + + # text labels + arcs[2:-1:2, 0] = text_indices # from state + arcs[2:-1:2, 1] = text_indices + 1 # to state + arcs[2:-2:2, 2] = units_tensor # labels: text + + arcs[-1] = arcs[-2] + arcs[-2, 1] = text_len + arcs[-2, 2] = end_eps_id + arcs[-1, 2] = -1 # last transition to final state, ilabel=-1 (special for k2) + olabels = arcs[:, 2].detach().clone() # same as ilabels + + fsa_text = k2.Fsa(arcs, olabels) + fsa_text.unit_positions = torch.zeros_like(olabels) + fsa_text.unit_positions[1:-1] = text_indices.expand(2, -1).transpose(0, 1).flatten() + fsa_text.unit_positions[-1] = -1 + return fsa_text + + def get_temporal_schema(self, num_frames: int, vocab_size: int, device: torch.device) -> "k2.Fsa": + """ + Get temporal schema graph for W-Transducer loss (Compose-Transducer). + + Example graph: blank=0, num_frames=3, vocab_size=3, last_blank_mode="force_final". + Labels: :. is a unit from vocab + special eps ids `vocab_size`, `vocab_size+1`. + + graph for force_final:: + + 4:0 + +--------------------------------------------+ + | 4:1 | + | +--------------------+ | + 1:0 | 1:1 | 1:2 | | + +-----+ | +-----+ | +-----+ | | + v | | v | | v | v v + +--------------+ 0:0 +------------+ 0:1 +------------+ 0:2 +---+ -1:-1 #===# + | 0 | ----> | 1 | -----> | 2 | -----> | 3 | -------> H 4 H + +--------------+ +------------+ +------------+ +---+ #===# + ^ 2:0 | | | ^ 2:1 | ^ ^ 2:2 | ^ + +-----+ | | +-----+ | +-----+ | + | | 3:0 | | + | +------------------+ 3:0 | + +-------------------------------------------+ + + + Args: + num_frames: length of the sequence (in frames) + vocab_size: number of labels (including blank) + device: device for tensor to construct + + Returns: + temporal schema graph (k2.Fsa). + Labels: :. is a unit from vocab + special units (e.g., additional eps). + """ + blank_id = self.blank + start_eps_id = vocab_size + end_eps_id = vocab_size + 1 + num_eps = 2 + + num_sequence_arcs = num_frames * vocab_size + (num_frames - 1) * num_eps + 1 + fsa_temporal_arcs = torch.zeros((num_sequence_arcs, 4), dtype=torch.int32, device=device) + sequence_states = torch.arange(0, num_frames, dtype=torch.int32, device=device) + sequence_states_next = sequence_states + 1 + # for every state - vocab_size+1 arcs, [0, 1, ..., vocab_size-1, eps, 0, 1, ..., vocab_size-1, eps, ...] + start_states = sequence_states.expand(vocab_size + num_eps, num_frames).transpose(0, 1).flatten() + + # self-loops - all, make forward arcs later + fsa_temporal_arcs[:num_sequence_arcs, 0] = start_states[:-1] # from + fsa_temporal_arcs[:num_sequence_arcs, 1] = start_states[:-1] # to + fsa_temporal_arcs[:num_sequence_arcs, 2] = ( + torch.arange(0, vocab_size + num_eps, dtype=torch.int32, device=device) + .expand(num_frames, vocab_size + num_eps) + .flatten()[:-1] + ) + # forward arcs + fsa_temporal_arcs[blank_id : num_sequence_arcs : vocab_size + num_eps, 1] = sequence_states_next # blanks + # eps arcs + fsa_temporal_arcs[start_eps_id : num_sequence_arcs : vocab_size + num_eps, 0] = 0 + fsa_temporal_arcs[start_eps_id : num_sequence_arcs : vocab_size + num_eps, 1] = sequence_states + 1 + fsa_temporal_arcs[end_eps_id : num_sequence_arcs : vocab_size + num_eps, 0] = sequence_states[:-1] + fsa_temporal_arcs[end_eps_id : num_sequence_arcs : vocab_size + num_eps, 1] = ( + num_frames - 1 if self.last_blank_mode == self.LastBlankMode.FORCE_FINAL else num_frames + ) + + # transition to last final state + fsa_temporal_arcs[-1, :3] = torch.tensor((num_frames, num_frames + 1, -1), dtype=torch.int32, device=device) + + # need to sort arcs + _, indices = torch.sort(fsa_temporal_arcs[:, 0], dim=0) + fsa_temporal_arcs = fsa_temporal_arcs[indices] + + # output symbols: position in the sequence, same as start states for arcs + olabels = fsa_temporal_arcs[:, 0].detach().clone() + olabels[-1] = -1 # transition to the last final state + + fsa_temporal = k2.Fsa(fsa_temporal_arcs, olabels) + fsa_temporal = k2.arc_sort(fsa_temporal) # need for compose + return fsa_temporal + + def get_grid(self, units_tensor: torch.Tensor, num_frames: int, vocab_size: int) -> "k2.Fsa": + """ + Construct W-Transducer lattice directly (Grid-Transducer). + + Args: + units_tensor: 1d tensor with text units + num_frames: length of the sequence (number of frames) + vocab_size: number of total labels (vocab size including blank) + + Returns: + transducer lattice (k2.Fsa). + Labels: :: (k2.Fsa: labels, aux_labels, unit_positions) + """ + blank_id = self.blank + eps_id = vocab_size # beyond vocabulary + text_length = units_tensor.shape[0] + device = units_tensor.device + num_grid_states = num_frames * (text_length + 1) + num_forward_arcs_base = (num_frames - 1) * (text_length + 1) + num_forward_arcs_additional = (num_frames - 1) * 2 + num_forward_arcs = num_forward_arcs_base + num_forward_arcs_additional + num_text_arcs = text_length * num_frames + arcs = torch.zeros((num_forward_arcs + num_text_arcs + 2, 4), dtype=torch.int32, device=device) + # blank transitions + # i, i+, 0 , i / , i % + from_states = torch.arange(num_forward_arcs_base, device=device) + to_states = from_states + (text_length + 1) + arcs[:num_forward_arcs_base, 0] = from_states + arcs[:num_forward_arcs_base, 1] = to_states + arcs[:num_forward_arcs_base, 2] = blank_id + + from_states = torch.cat( + [ + torch.arange(num_frames - 1, device=device) * (text_length + 1), + text_length + torch.arange(num_frames - 1, device=device) * (text_length + 1), + ] + ) + to_states = from_states + (text_length + 1) + arcs[num_forward_arcs_base : num_forward_arcs_base + (num_frames - 1) * 2, 0] = from_states + arcs[num_forward_arcs_base : num_forward_arcs_base + (num_frames - 1) * 2, 1] = to_states + arcs[num_forward_arcs_base : num_forward_arcs_base + (num_frames - 1), 2] = eps_id + arcs[num_forward_arcs_base + (num_frames - 1) : num_forward_arcs_base + (num_frames - 1) * 2, 2] = eps_id + 1 + + arcs[num_forward_arcs_base : num_forward_arcs_base + (num_frames - 1), 0] = 0 + arcs[num_forward_arcs_base + (num_frames - 1) : num_forward_arcs_base + (num_frames - 1) * 2, 1] = ( + num_grid_states - 1 + ) # if other mode - fix later + # last eps ark - after relabel + + # text arcs + from_states = ( + torch.arange(num_grid_states, dtype=torch.int32, device=device) + .reshape(num_frames, text_length + 1)[:, :-1] + .flatten() + ) + to_states = from_states + 1 + ilabels = units_tensor.expand(num_frames, -1).flatten() + arcs[num_forward_arcs:-2, 0] = from_states + arcs[num_forward_arcs:-2, 1] = to_states + arcs[num_forward_arcs:-2, 2] = ilabels + + # last 2 states + arcs[-2, :3] = torch.tensor((num_grid_states - 1, num_grid_states, blank_id), dtype=torch.int32, device=device) + arcs[-1, :3] = torch.tensor((num_grid_states, num_grid_states + 1, -1), dtype=torch.int32, device=device) + + # sequence indices, time indices + olabels = torch.div(arcs[:, 0], (text_length + 1), rounding_mode="floor") # arcs[:, 0] // (text_length + 1) + unit_positions = arcs[:, 0] % (text_length + 1) + # last state: final + olabels[-1] = -1 + unit_positions[-1] = -1 + + # relabel + # instead of using top sort (extremely expensive) k2.top_sort(rnnt_graph) + arcs[:-2, 0] = self.relabel_states(arcs[:-2, 0], text_length + 1, num_frames) + arcs[:-3, 1] = self.relabel_states(arcs[:-3, 1], text_length + 1, num_frames) + + if self.last_blank_mode == self.LastBlankMode.ALLOW_IGNORE: + arcs[ + num_forward_arcs_base + (num_frames - 1) : num_forward_arcs_base + (num_frames - 1) * 2, 1 + ] = num_grid_states + + # sort by start state - required in k2 + # TODO: maybe it is more optimal to avoid sort, construct arcs in ascending order + _, indices = torch.sort(arcs[:, 0], dim=0) + arcs = arcs[indices] + olabels = olabels[indices] + unit_positions = unit_positions[indices] + + rnnt_graph = k2.Fsa(arcs, olabels) + rnnt_graph.unit_positions = unit_positions + return rnnt_graph + + def forward( + self, acts: torch.Tensor, labels: torch.Tensor, act_lens: torch.Tensor, label_lens: torch.Tensor, + ): + """ + Forward method is similar to RNN-T Graph-Transducer forward method, + but we need to assign eps weight to eps-transitions. + """ + # argument names are consistent with NeMo, see RNNTLoss.forward: + # self._loss(acts=log_probs, labels=targets, act_lens=input_lengths, label_lens=target_lengths) + logits, targets, logits_lengths, target_lengths = acts, labels, act_lens, label_lens + + # logits: B x Time x Text+1 x C + vocab_size = logits.shape[-1] + target_fsas_vec = self.get_graphs_batched(logits_lengths, targets, target_lengths, vocab_size) + + cast_context = force_float32_context() if self.cast_to_float32 else nullcontext() + with cast_context: + log_probs = F.log_softmax(logits, dim=-1) + with torch.no_grad(): + indices = self.get_logits_indices(target_fsas_vec, logits.shape) + # transition to the last state + eps-transitions + # use 0 index (for valid index_select) and manually assign score after index_select for this case + indices[target_fsas_vec.labels == -1] = 0 + indices[target_fsas_vec.labels >= vocab_size] = 0 # eps + + # NB: do not assign scores -> modify, k2 will not update all scores correctly (modify -> assign) + scores = log_probs.flatten().index_select(-1, indices) + # fix weights for the arcs to the last state + eps-transitions + scores[target_fsas_vec.labels == -1] = 0 + scores[target_fsas_vec.labels >= vocab_size] = self.eps_weight # eps + + target_fsas_vec.scores = scores + scores = -1 * target_fsas_vec.get_tot_scores(use_double_scores=self.double_scores, log_semiring=True) + return scores diff --git a/nemo/collections/asr/parts/mixins/mixins.py b/nemo/collections/asr/parts/mixins/mixins.py index eba896d0478d..4c43960ac9d2 100644 --- a/nemo/collections/asr/parts/mixins/mixins.py +++ b/nemo/collections/asr/parts/mixins/mixins.py @@ -432,6 +432,34 @@ def change_attention_model( self.cfg.encoder.self_attention_model = self_attention_model self.cfg.encoder.att_context_size = att_context_size + def change_subsampling_conv_chunking_factor( + self, subsampling_conv_chunking_factor: int, update_config: bool = True + ): + """ + Update the conv_chunking_factor (int) if function is available in encoder. + Default is 1 (auto) + Set it to -1 (disabled) or to a specific value (power of 2) if you OOM in the conv subsampling layers + + Args: + conv_chunking_factor (int) + """ + + if not hasattr(self, 'encoder'): + logging.info( + "Could not call the change_subsampling_conv_chunking_factor method in encoder " + "since the model provided does not contain an `encoder` module in its config." + ) + return + + if not hasattr(self.encoder, "change_subsampling_conv_chunking_factor"): + logging.info("Model encoder doesn't have a change_subsampling_conv_chunking_factor method ") + return + + self.encoder.change_subsampling_conv_chunking_factor(subsampling_conv_chunking_factor) + if update_config: + with open_dict(self.cfg): + self.cfg.encoder.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor + def conformer_stream_step( self, processed_signal: torch.Tensor, diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py b/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py index 66e30c77590a..055d7aeb5fd9 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py @@ -13,4 +13,8 @@ # limitations under the License. from nemo.collections.asr.parts.numba.rnnt_loss.rnnt import rnnt_loss_cpu, rnnt_loss_gpu -from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import MultiblankRNNTLossNumba, RNNTLossNumba +from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import ( + MultiblankRNNTLossNumba, + RNNTLossNumba, + TDTLossNumba, +) diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py index 64c8955006ed..046aea425e20 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py @@ -186,7 +186,7 @@ def rnnt_loss_gpu( # Select GPU index cuda.select_device(acts.device.index) - gpu_workspace = torch.zeros(gpu_size, device=acts.device, dtype=acts.dtype, requires_grad=False) + gpu_workspace = torch.zeros(gpu_size, device=acts.device, dtype=torch.float32, requires_grad=False) ### VIEW TENSORS AS VECTORS FOR POINTER INDEXING ### acts, acts_shape = rnnt_helper.flatten_tensor(acts) @@ -236,6 +236,133 @@ def rnnt_loss_gpu( return True +def tdt_loss_gpu( + label_acts: torch.Tensor, + duration_acts: torch.Tensor, + labels: torch.Tensor, + input_lengths: torch.Tensor, + label_lengths: torch.Tensor, + costs: torch.Tensor, + label_grads: torch.Tensor, + duration_grads: torch.Tensor, + blank_label: int, + durations: list, + fastemit_lambda: float, + clamp: float, + num_threads: int, + sigma: float, + omega: float, +): + """ + Wrapper method for accessing GPU TDT loss (https://arxiv.org/abs/2304.06795). + + CUDA implementation ported from [HawkAaron/warp-transducer](https://github.com/HawkAaron/warp-transducer). + + Args: + label_acts: Activation tensor of shape [B, T, U, V], where V includes the blank symbol. + duration_acts: Activation tensor of shape [B, T, U, D], where D is the number of durations. + labels: Ground truth labels of shape [B, U]. + input_lengths: Lengths of the acoustic sequence as a vector of ints [B]. + label_lengths: Lengths of the target sequence as a vector of ints [B]. + costs: Zero vector of length [B] in which costs will be set. + label_grads: Zero tensor of shape [B, T, U, V] where the gradient to label_acts will be set. + duration_grads: Zero tensor of shape [B, T, U, D] where the gradient to duration_acts will be set. + blank_label: Index of the standard blank token in the vocabulary. + durations: A list of supported durations for TDT. Must include 0 and 1. + fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to + FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization. + clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp]. + num_threads: Number of threads for OpenMP. + sigma: logit-undernormalization weight used in the multi-blank model. Refer to + the multi-blank paper https://arxiv.org/abs/2304.06795 for detailed explanations. + omega: weight for regular RNN-T loss + """ + minibatch_size = label_acts.shape[0] + maxT = label_acts.shape[1] + maxU = label_acts.shape[2] + alphabet_size = label_acts.shape[3] + + if hasattr(cuda, 'external_stream'): + stream = cuda.external_stream(torch.cuda.current_stream(label_acts.device).cuda_stream) + else: + stream = cuda.default_stream() + + if num_threads < 0: + num_threads = multiprocessing.cpu_count() + + num_threads = max(1, num_threads) # have to use at least 1 thread + + gpu_size, status = rnnt_helper.get_workspace_size(maxT, maxU, minibatch_size, gpu=True) + + if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS: + raise RuntimeError("Invalid parameter passed when calculating working space memory") + + # Select GPU index + cuda.select_device(label_acts.device.index) + gpu_workspace = torch.zeros(gpu_size, device=label_acts.device, dtype=label_acts.dtype, requires_grad=False) + + tdt_workspace = torch.zeros(len(durations), device=label_acts.device, dtype=torch.long, requires_grad=False) + + for i in range(0, len(durations)): + tdt_workspace[i] = durations[i] + + ### VIEW TENSORS AS VECTORS FOR POINTER INDEXING ### + label_acts, label_acts_shape = rnnt_helper.flatten_tensor(label_acts) + duration_acts, duration_acts_shape = rnnt_helper.flatten_tensor(duration_acts) + + wrapper = gpu_rnnt.GPUTDT( + minibatch=minibatch_size, + maxT=maxT, + maxU=maxU, + alphabet_size=alphabet_size, + workspace=gpu_workspace, + tdt_workspace=tdt_workspace, + num_durations=len(durations), + blank=blank_label, + fastemit_lambda=fastemit_lambda, + clamp=clamp, + num_threads=num_threads, + stream=stream, + sigma=sigma, + omega=omega, + ) + + if label_grads is None: + status = wrapper.score_forward( + label_acts=label_acts.data, + duration_acts=duration_acts.data, + costs=costs.data, + pad_labels=labels.data, + label_lengths=label_lengths.data, + input_lengths=input_lengths.data, + ) + + if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS: + raise RuntimeError("Could not calculate forward scores") + + else: + ### FLATTEN GRAD TENSOR ### + label_grads, label_grads_shape = rnnt_helper.flatten_tensor(label_grads) + duration_grads, duration_grads_shape = rnnt_helper.flatten_tensor(duration_grads) + + status = wrapper.cost_and_grad( + label_acts=label_acts.data, + duration_acts=duration_acts.data, + label_grads=label_grads.data, + duration_grads=duration_grads.data, + costs=costs.data, + pad_labels=labels.data, + label_lengths=label_lengths.data, + input_lengths=input_lengths.data, + ) + + if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS: + raise RuntimeError("Could not calculate forward scores") + + del gpu_workspace, tdt_workspace, wrapper + return True + + def multiblank_rnnt_loss_gpu( acts: torch.Tensor, labels: torch.Tensor, diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py index eaa6d332a0fc..58508970aa83 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py @@ -344,10 +344,15 @@ def forward(self, acts, labels, act_lens, label_lens): _assert_no_grad(label_lens) certify_inputs(acts, labels, act_lens, label_lens) + # CPU Patch for fp16 - force cast to fp32 + if not acts.is_cuda and acts.dtype == torch.float16: + acts = acts.float() + if self.clamp > 0.0: acts = LogSoftmaxGradModification.apply(acts, self.clamp) acts = torch.nn.functional.log_softmax(acts, -1) + return self.rnnt(acts, labels, act_lens, label_lens, self.blank, self.fastemit_lambda) diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py index 3ed9b82bf996..5960d5ab6b18 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py @@ -34,7 +34,7 @@ from nemo.collections.asr.parts.numba.rnnt_loss import rnnt from nemo.collections.asr.parts.numba.rnnt_loss.utils.cpu_utils import cpu_rnnt -__all__ = ['rnnt_loss', 'RNNTLossNumba', 'MultiblankRNNTLossNumba'] +__all__ = ['rnnt_loss', 'RNNTLossNumba', 'MultiblankRNNTLossNumba', 'TDTLossNumba'] class _RNNTNumba(Function): @@ -57,7 +57,7 @@ def forward(ctx, acts, labels, act_lens, label_lens, blank, reduction, fastemit_ loss_func = rnnt.rnnt_loss_gpu if is_cuda else rnnt.rnnt_loss_cpu grads = torch.zeros_like(acts) if acts.requires_grad else None minibatch_size = acts.size(0) - costs = torch.zeros(minibatch_size, device=acts.device, dtype=acts.dtype) + costs = torch.zeros(minibatch_size, device=acts.device, dtype=torch.float32) loss_func( acts, @@ -91,6 +91,110 @@ def backward(ctx, grad_output): return ctx.grads.mul_(grad_output), None, None, None, None, None, None, None +class _TDTNumba(Function): + """ + Numba class for Token-and-Duration Transducer (TDT) loss (https://arxiv.org/abs/2304.06795) + """ + + @staticmethod + def forward( + ctx, + label_acts, + duration_acts, + labels, + act_lens, + label_lens, + blank, + durations, + reduction, + fastemit_lambda, + clamp, + sigma, + omega, + ): + """ + log_probs: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network + labels: 2 dimensional Tensor containing all the targets of the batch with zero padded + act_lens: Tensor of size (batch) containing size of each output sequence from the network + label_lens: Tensor of (batch) containing label length of each example + fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to + FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization. + durations: list of durations for TDT model, must include 0 and 1, e.g. + [0, 1, 2, 3, 4]. + sigma: hyper-parameter for logit under-normalization method for training + TDT models. Recommended value 0.05. + omega: probability for sampling the standard RNN-T loss. + Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for + the above parameters; + """ + is_cuda = label_acts.is_cuda + + certify_inputs(label_acts, labels, act_lens, label_lens) + if clamp < 0: + raise ValueError("`clamp` must be 0.0 or positive float value.") + + if is_cuda: + loss_func = rnnt.tdt_loss_gpu + else: + raise ValueError("TDT is not yet implemented for non CUDA computation.") + + label_grads = torch.zeros_like(label_acts) if label_acts.requires_grad else None + duration_grads = torch.zeros_like(duration_acts) if duration_acts.requires_grad else None + minibatch_size = label_acts.size(0) + costs = torch.zeros(minibatch_size, device=label_acts.device, dtype=label_acts.dtype) + + loss_func( + label_acts, + duration_acts, + labels=labels, + input_lengths=act_lens, + label_lengths=label_lens, + costs=costs, + label_grads=label_grads, + duration_grads=duration_grads, + blank_label=blank, + durations=durations, + fastemit_lambda=fastemit_lambda, + clamp=clamp, + sigma=sigma, + omega=omega, + num_threads=0, + ) + + if reduction in ['sum', 'mean']: + costs = costs.sum().unsqueeze_(-1) + if reduction == 'mean': + costs /= minibatch_size + + if label_grads is not None: + label_grads /= minibatch_size + duration_grads /= minibatch_size + + ctx.label_grads = label_grads + ctx.duration_grads = duration_grads + + return costs + + @staticmethod + def backward(ctx, grad_output): + if grad_output is not None and ctx.label_grads is not None: + grad_output = grad_output.view(-1, 1, 1, 1).to(ctx.label_grads) + return ( + ctx.label_grads.mul_(grad_output), + ctx.duration_grads.mul_(grad_output), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) + + class _MultiblankRNNTNumba(Function): """ Numba class for multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf) @@ -237,6 +341,52 @@ def multiblank_rnnt_loss( ) +def tdt_loss( + acts, + labels, + act_lens, + label_lens, + blank, + durations=[], + reduction='mean', + fastemit_lambda: float = 0.0, + clamp: float = 0.0, +): + """ + TDT RNN Transducer (https://arxiv.org/abs/2304.06795) Loss (functional form) + Args: + acts: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network + labels: 2 dimensional Tensor containing all the targets of the batch with zero padded + act_lens: Tensor of size (batch) containing size of each output sequence from the network + label_lens: Tensor of (batch) containing label length of each example + blank (int): standard blank label. + durations: list of durations for TDT model, e.g. + [0,1,2,3,4]. + sigma: hyper-parameter for logit under-normalization method for training + multi-blank transducers. Recommended value 0.05. + Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for + the last two params. + reduction (string, optional): Specifies the reduction to apply to the output: + 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, + 'mean': the output losses will be divided by the target lengths and + then the mean over the batch is taken. Default: 'mean' + """ + if not acts.is_cuda: + # Since CPU requires log_softmax to be computed explicitly, we need to perform grad clipping + # *after* we have obtained the gradients of loss(logsoftmax()). + # This is highly wasteful since it requires a copy of the entire joint tensor which is expensive. + # CUDA version is much more efficient since it performs an inplace logsoftmax, and therefore + # can inplace clamp the gradient. + if clamp > 0.0: + acts = cpu_rnnt.LogSoftmaxGradModification.apply(acts, clamp) + + # NOTE manually done log_softmax for CPU version, + # log_softmax is computed within GPU version. + acts = torch.nn.functional.log_softmax(acts, -1) + + return _TDTNumba.apply(acts, labels, act_lens, label_lens, blank, durations, reduction, fastemit_lambda, clamp) + + class RNNTLossNumba(Module): """ Parameters: @@ -266,6 +416,10 @@ def forward(self, acts, labels, act_lens, label_lens): label_lens: Tensor of (batch) containing label length of each example """ if not acts.is_cuda: + # Force FP32 until log_softmax() is implemented for fp16 on CPU + if acts.dtype == torch.float16: + acts = acts.float() + # Since CPU requires log_softmax to be computed explicitly, we need to perform grad clipping # *after* we have obtained the gradients of loss(logsoftmax()). # This is highly wasteful since it requires a copy of the entire joint tensor which is expensive. @@ -354,6 +508,79 @@ def forward(self, acts, labels, act_lens, label_lens): ) +class TDTLossNumba(Module): + """ + Parameters: + blank (int): standard blank label. + durations: list of durations for TDT model, e.g. + [0, 1, 2, 3, 4]. + sigma: hyper-parameter for logit under-normalization method for training + TDT. Recommended value 0.05. + omega: hyper-parameter for RNN-T loss for loss combination. + Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for + the above parameters; + + reduction (string, optional): Specifies the reduction to apply to the output: + 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, + 'mean': the output losses will be divided by the target lengths and + then the mean over the batch is taken. Default: 'mean' + fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to + FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization. + clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp]. + """ + + def __init__( + self, + blank, + durations=None, + reduction='mean', + fastemit_lambda: float = 0.0, + clamp: float = -1, + sigma: float = 0.0, + omega: float = 0.0, + ): + super(TDTLossNumba, self).__init__() + self.blank = blank + self.durations = durations if durations is not None else [] + self.fastemit_lambda = fastemit_lambda + self.clamp = float(clamp) if clamp > 0 else 0.0 + self.reduction = reduction + self.loss = _TDTNumba.apply + self.sigma = sigma + self.omega = omega + + def forward(self, acts, labels, act_lens, label_lens): + """ + log_probs: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network + labels: 2 dimensional Tensor containing all the targets of the batch with zero padded + act_lens: Tensor of size (batch) containing size of each output sequence from the network + label_lens: Tensor of (batch) containing label length of each example + """ + + # TODO(hainan): in the future, we could further optimize this so that we don't need to + # make contiguous copies of the acts tensor. + label_acts, duration_acts = torch.split( + acts, [acts.shape[-1] - len(self.durations), len(self.durations)], dim=-1 + ) + label_acts = label_acts.contiguous() + duration_acts = torch.nn.functional.log_softmax(duration_acts, dim=-1).contiguous() + + return self.loss( + label_acts, + duration_acts, + labels, + act_lens, + label_lens, + self.blank, + self.durations, + self.reduction, + self.fastemit_lambda, + self.clamp, + self.sigma, + self.omega, + ) + + def check_type(var, t, name): if var.dtype is not t: raise TypeError("{} must be {}".format(name, t)) diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py index 1528606716e1..3feb7b513a50 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py @@ -231,8 +231,8 @@ def cost_and_grad_kernel( ) # Scale llForward by FastEmit lambda - llForward *= 1.0 + self.fastemit_lambda_ - llBackward *= 1.0 + self.fastemit_lambda_ + llForward += llForward * self.fastemit_lambda_ + llBackward += llBackward * self.fastemit_lambda_ diff = (llForward - llBackward).abs() if diff > 0.1: @@ -300,6 +300,10 @@ def compute_betas_and_grads( Returns: Loglikelihood of the forward variable and inplace updates the grad tensor. """ + # Patch for CPU + fp16 + if log_probs.dtype == torch.float16 and not log_probs.is_cuda: + log_probs = log_probs.float() + idx = CpuRNNT_index(U, self.maxU_, self.minibatch_, self.alphabet_size_, self.batch_first) betas[idx(T - 1, U - 1)] = log_probs[idx(T - 1, U - 1) * 2] diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py index dca4e732c062..70ffb459cb97 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py @@ -27,6 +27,7 @@ # limitations under the License. import multiprocessing +import random from typing import Optional, Tuple import numba @@ -499,24 +500,306 @@ def _prepare_workspace(self) -> (int, Tuple[torch.Tensor]): An int, representing the offset of the used workspace (practically, the slice of the workspace consumed) A tuple of tensors representing the shared workspace. """ - used_offset = 0 + used_offset, (denom, alphas, betas, llForward, llBackward) = super()._prepare_workspace() - # // denom - denom = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_] - used_offset += self.maxT_ * self.maxU_ * self.minibatch_ + bigblank_durations = self.big_blank_workspace[: self.num_big_blanks] - # // alphas & betas - alphas = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_] - used_offset += self.maxT_ * self.maxU_ * self.minibatch_ - betas = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_] - used_offset += self.maxT_ * self.maxU_ * self.minibatch_ + return used_offset, (denom, alphas, betas, llForward, llBackward, bigblank_durations) - # // logllh - llForward = self.gpu_workspace[used_offset : used_offset + self.minibatch_] - used_offset += self.minibatch_ - llBackward = self.gpu_workspace[used_offset : used_offset + self.minibatch_] - used_offset += self.minibatch_ - bigblank_durations = self.big_blank_workspace[: self.num_big_blanks] +class GPUTDT(GPURNNT): + def __init__( + self, + sigma: float, + omega: float, + num_durations: int, + minibatch: int, + maxT: int, + maxU: int, + alphabet_size: int, + workspace, + tdt_workspace, + blank: int, + fastemit_lambda: float, + clamp: float, + num_threads: int, + stream, + ): + """ + Helper class to launch the CUDA Kernels to compute TDT Loss (https://arxiv.org/pdf/2211.03541). - return used_offset, (denom, alphas, betas, llForward, llBackward, bigblank_durations) + Args: + sigma: Hyper-parameter related to the logit-normalization method in training tdt transducers. + omega: Hyper-parameter related to the sampled training. + num_durations: Number of durations the model supports. + minibatch: Int representing the batch size. + maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor. + maxU: The maximum possible target sequence length. Represents U in the logprobs tensor. + alphabet_size: The vocabulary dimension V + 1 + num-big-blanks + workspace: An allocated chunk of memory that will be sliced off and reshaped into required + blocks used as working memory. + tdt_workspace: An allocated chunk of memory that will be sliced off and reshaped into required + blocks used as working memory specifically for the tdt related computations. + blank: Index of the blank token in the vocabulary. Must be the last token in the vocab. + fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to + FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization. + clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp]. + num_threads: Number of OMP threads to launch. + stream: Numba Cuda Stream. + """ + super().__init__( + minibatch, maxT, maxU, alphabet_size, workspace, blank, fastemit_lambda, clamp, num_threads, stream + ) + self.tdt_workspace = cuda.as_cuda_array( + tdt_workspace + ) # a flat vector of integer numbers that represents allocated memory slices + + self.num_durations = num_durations + self.sigma = sigma + self.omega = omega + + def compute_cost_and_score( + self, + label_acts: torch.Tensor, + duration_acts: torch.Tensor, + label_grads: Optional[torch.Tensor], + duration_grads: Optional[torch.Tensor], + costs: torch.Tensor, + labels: torch.Tensor, + label_lengths: torch.Tensor, + input_lengths: torch.Tensor, + ) -> global_constants.RNNTStatus: + """ + Compute both the loss and the gradients. + + Args: + label_acts: A flattened tensor of shape [B, T, U, V] representing the activation matrix for tokens. + duration_acts: A flattened tensor of shape [B, T, U, D] representing the activation matrix for durations. + label_grad: A flattented zero tensor of same shape as label_acts. + duration_grad: A flattented zero tensor of same shape as duration_acts. + costs: A zero vector of length B which will be updated inplace with the log probability costs. + flat_labels: A flattened matrix of labels of shape [B, U] + label_lengths: A vector of length B that contains the original lengths of the acoustic sequence. + input_lengths: A vector of length B that contains the original lengths of the target sequence. + + Updates: + This will launch kernels that will update inline the following variables: + - *_grads: Gradients of the activation matrix wrt the costs vector. + - costs: Negative log likelihood of the forward variable. + + Returns: + An enum that either represents a successful RNNT operation or failure. + """ + training = label_grads is not None + + if training: + label_grads *= 0.0 # zero grads + duration_grads *= 0.0 # zero grads + + _, (denom, alphas, betas, llForward, llBackward, durations) = self._prepare_workspace() + + ######## START EXECUTION ######## + self.log_softmax(label_acts, denom) + + r = random.uniform(0, 1) + if r < self.omega: + # Compute alphas + gpu_rnnt_kernel.compute_alphas_kernel[self.minibatch_, self.maxU_, self.stream_, 0]( + label_acts, + denom, + alphas, + llForward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + ) + else: + # Compute alphas + gpu_rnnt_kernel.compute_tdt_alphas_kernel[self.minibatch_, self.maxU_, self.stream_, 0]( + label_acts, + duration_acts, + denom, + self.sigma, + alphas, + llForward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + durations, + self.num_durations, + ) + + if training: + # Compute betas + if r < self.omega: + gpu_rnnt_kernel.compute_betas_kernel[self.minibatch_, self.maxU_, self.stream_, 0]( + label_acts, + denom, + betas, + llBackward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + ) + + # Compute gradient + grad_blocks_per_grid = self.minibatch_ * self.maxT_ * self.maxU_ + grad_threads_per_block = gpu_rnnt_kernel.GPU_RNNT_THREAD_SIZE + gpu_rnnt_kernel.compute_grad_kernel[grad_blocks_per_grid, grad_threads_per_block, self.stream_, 0]( + label_grads, + label_acts, + denom, + alphas, + betas, + llForward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + self.fastemit_lambda_, + self.clamp_, + ) + else: + gpu_rnnt_kernel.compute_tdt_betas_kernel[self.minibatch_, self.maxU_, self.stream_, 0]( + label_acts, + duration_acts, + denom, + self.sigma, + betas, + llBackward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + durations, + self.num_durations, + ) + + # Compute gradient + grad_blocks_per_grid = self.minibatch_ * self.maxT_ * self.maxU_ + grad_threads_per_block = gpu_rnnt_kernel.GPU_RNNT_THREAD_SIZE + gpu_rnnt_kernel.compute_tdt_grad_kernel[grad_blocks_per_grid, grad_threads_per_block, self.stream_, 0]( + label_grads, + duration_grads, + label_acts, + duration_acts, + denom, + self.sigma, + alphas, + betas, + llForward, + input_lengths, + label_lengths, + labels, + self.minibatch_, + self.maxT_, + self.maxU_, + self.alphabet_size_, + self.blank_, + durations, + self.num_durations, + self.fastemit_lambda_, + self.clamp_, + ) + + # // cost copy, negate (for log likelihood) and update with additional regularizers + # This needs to be done via CUDA, because we used temporary memory llForward + # passed to alpha, which was updated with log likelihoods. + # But copying this data into a pytorch pointer is more difficult (numba api is one way) + # Therefore launch a pointwise CUDA kernel to update the costs inplace from data of llForward + # Then negate to compute the loglikelihood. + threadsperblock = min(costs.shape[0], 32) + blockspergrid = (costs.shape[0] + (threadsperblock - 1)) // threadsperblock + rnnt_helper.compute_costs_data[blockspergrid, threadsperblock, self.stream_, 0]( + llForward, costs, self.fastemit_lambda_ + ) + self.stream_.synchronize() + + return global_constants.RNNTStatus.RNNT_STATUS_SUCCESS + + def cost_and_grad( + self, + label_acts: torch.Tensor, + duration_acts: torch.Tensor, + label_grads: torch.Tensor, + duration_grads: torch.Tensor, + costs: torch.Tensor, + pad_labels: torch.Tensor, + label_lengths: torch.Tensor, + input_lengths: torch.Tensor, + ): + if ( + duration_acts is None + or label_acts is None + or label_grads is None + or duration_grads is None + or costs is None + or pad_labels is None + or label_lengths is None + or input_lengths is None + ): + return global_constants.RNNTStatus.RNNT_STATUS_INVALID_VALUE + + return self.compute_cost_and_score( + label_acts, duration_acts, label_grads, duration_grads, costs, pad_labels, label_lengths, input_lengths + ) + + def score_forward( + self, + label_acts: torch.Tensor, + duration_acts: torch.Tensor, + costs: torch.Tensor, + pad_labels: torch.Tensor, + label_lengths: torch.Tensor, + input_lengths: torch.Tensor, + ): + if ( + label_acts is None + or duration_acts is None + or costs is None + or pad_labels is None + or label_lengths is None + or input_lengths is None + ): + return global_constants.RNNTStatus.RNNT_STATUS_INVALID_VALUE + + return self.compute_cost_and_score( + label_acts, duration_acts, None, None, costs, pad_labels, label_lengths, input_lengths + ) + + def _prepare_workspace(self) -> (int, Tuple[torch.Tensor]): + """ + Helper method that uses the workspace and constructs slices of it that can be used. + + Returns: + An int, representing the offset of the used workspace (practically, the slice of the workspace consumed) + A tuple of tensors representing the shared workspace. + """ + used_offset, (denom, alphas, betas, llForward, llBackward) = super()._prepare_workspace() + + durations = self.tdt_workspace[: self.num_durations] + + return used_offset, (denom, alphas, betas, llForward, llBackward, durations) diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py index dbeb1544e7e3..4153af060941 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py @@ -35,6 +35,8 @@ GPU_RNNT_THREAD_SIZE = 256 +INF = 10000.0 + @cuda.jit(device=True, inline=True) def logp( @@ -62,6 +64,12 @@ def logp( return denom[col] + acts[col * alphabet_size + v] +@cuda.jit(device=True, inline=True) +def logp_duration(acts: torch.Tensor, maxT: int, maxU: int, num_durations: int, mb: int, t: int, u: int, v: int): + col = (mb * maxT + t) * maxU + u + return acts[col * num_durations + v] + + @cuda.jit() def compute_alphas_kernel( acts: torch.Tensor, @@ -875,3 +883,526 @@ def compute_multiblank_grad_kernel( # update internal index through the thread_buffer; # until idx < V + 1, such that entire vocabulary has been updated. idx += GPU_RNNT_THREAD_SIZE + + +@cuda.jit() +def compute_tdt_alphas_kernel( + acts: torch.Tensor, + duration_acts: torch.Tensor, + denom: torch.Tensor, + sigma: float, + alphas: torch.Tensor, + llForward: torch.Tensor, + xlen: torch.Tensor, + ylen: torch.Tensor, + mlabels: torch.Tensor, # [B] + minibatch: int, + maxT: int, + maxU: int, + alphabet_size: int, + blank_: int, + durations: torch.Tensor, + num_durations: int, +): + """ + Compute alpha (forward variable) probabilities over the transduction step. + + Args: + acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens. + duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for duration. + denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor for tokens. + + alphas: Zero tensor of shape [B, T, U]. Will be updated inside the kernel with the forward variable + probabilities. + llForward: Zero tensor of shape [B]. Represents the log-likelihood of the forward pass. + Returned as the forward pass loss that is reduced by the optimizer. + xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded + activation tensor. + ylen: Vector of length B which contains the actual target sequence lengths in the padded + activation tensor. + mlabels: Matrix of shape [B, U+1] (+1 here is due to token - usually the RNNT blank). + The matrix contains the padded target transcription that must be predicted. + minibatch: Int representing the batch size. + maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor. + maxU: The maximum possible target sequence length. Represents U in the logprobs tensor. + alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank). + blank_: Index of the TDT blank token in the vocabulary. Must be the last token in the vocab. + + Updates: + Kernel inplace updates the following inputs: + - alphas: forward variable scores. + - llForward: log-likelihood of forward variable. + """ + # // launch B blocks, each block has U threads + b = cuda.blockIdx.x # // batch id + u = cuda.threadIdx.x # label id, u + T = xlen[b] # select AM length of current sample + U = ylen[b] + 1 # select target length of current sample, +1 for the blank token + + labels: torch.Tensor = mlabels[b] # mb label start point, equivalent to mlabels + b * (maxU - 1) + offset = b * maxT * maxU # pointer indexing offset + + # alphas += offset # pointer offset, ignored since we explicitly add offset + + # Initilize alpha[b, t=0, u=0] for all b in B + if u == 0: + alphas[offset] = 0 + + # sync until all alphas are initialized + cuda.syncthreads() + + # Ordinary alpha calculations, broadcast across B=b and U=u + # Look up forward variable calculation from rnnt_numpy.forward_pass() + for n in range(1, T + U - 1): + t = n - u + + if u == 0: + # when u == 0, we only consider blank emissions. + if t > 0 and t < T: + alphas[offset + t * maxU + u] = -INF + + for i in range(1, num_durations): # skip 0 since blank emission has to advance by at least one + if t >= durations[i]: + alphas[offset + t * maxU + u] = rnnt_helper.log_sum_exp( + alphas[offset + t * maxU + u], # the current alpha value + alphas[offset + (t - durations[i]) * maxU + u] # alpha(t - duration, u) + + logp( + denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u, blank_ + ) # logp of blank emission + - sigma # logit under-normalization + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t - durations[i], u, i + ), # logp of duration + ) + else: + break # since durations are in ascending order, when we encounter a duration that is too large, then + # there is no need to check larger durations after that. + + elif u < U: + # when t == 0, we only consider the non-blank emission. + if t == 0: + alphas[offset + u] = ( + alphas[offset + u - 1] # alpha(t, u - 1) + + logp( + denom, acts, maxT, maxU, alphabet_size, b, t, u - 1, labels[u - 1] + ) # logp of token emission + - sigma # logit under-normalization + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t, u - 1, 0 + ) # t = 0, so it must be duration = 0. Therefore the last argument passed to logp_duration() is 0. + ) + + # now we have t != 0 and u != 0, and we need to consider both non-blank and blank emissions. + elif t > 0 and t < T: + no_emit = -INF # no_emit stores the score for all blank emissions. + for i in range(1, num_durations): + if t >= durations[i]: + no_emit = rnnt_helper.log_sum_exp( + no_emit, # current score + alphas[offset + (t - durations[i]) * maxU + u] # alpha(t - duration, u) + + logp( + denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u, blank_ + ) # logp of blank emission + - sigma # logit under-normalization + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t - durations[i], u, i + ), # logp of duration + ) + else: + break # we can exit the loop early here, same as the case for u == 0 above. + + emit = -INF # emit stores the score for non-blank emissions. + for i in range(0, num_durations): + if t >= durations[i]: + emit = rnnt_helper.log_sum_exp( + emit, # current score + alphas[offset + (t - durations[i]) * maxU + u - 1] # alpha(t - duration, u - 1) + + logp( + denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u - 1, labels[u - 1] + ) # logp of non-blank emission + - sigma # logit under-normalization + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t - durations[i], u - 1, i + ), # logp of duration + ) + else: + break # we can exit the loop early here, same as the case for u == 0 above. + + # combining blank and non-blank emissions. + alphas[offset + t * maxU + u] = rnnt_helper.log_sum_exp(emit, no_emit) + + # sync across all B=b and U=u + cuda.syncthreads() + + # After final sync, the forward log-likelihood can be computed as the summataion of + # alpha(T - duration, U - 1) + logp(blank, duration | t - duration, U - 1), over different durations. + if u == 0: + # first we consider duration = 1 + loglike = ( + alphas[offset + (T - 1) * maxU + U - 1] + + logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, U - 1, blank_) + - sigma + + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, U - 1, 1) + ) + + # then we add the scores for duration > 1, if such durations are possible given the audio lengths. + for i in range(2, num_durations): + if T >= durations[i]: + big_blank_loglike = ( + alphas[offset + (T - durations[i]) * maxU + U - 1] + + logp(denom, acts, maxT, maxU, alphabet_size, b, T - durations[i], U - 1, blank_) + - sigma + + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - durations[i], U - 1, i) + ) + loglike = rnnt_helper.log_sum_exp(loglike, big_blank_loglike) + else: + break + + llForward[b] = loglike + + +@cuda.jit() +def compute_tdt_betas_kernel( + acts: torch.Tensor, + duration_acts: torch.Tensor, + denom: torch.Tensor, + sigma: float, + betas: torch.Tensor, + llBackward: torch.Tensor, + xlen: torch.Tensor, + ylen: torch.Tensor, + mlabels: torch.Tensor, # [B, U] + minibatch: int, + maxT: int, + maxU: int, + alphabet_size: int, + blank_: int, + durations: torch.Tensor, + num_durations: int, +): + """ + Compute beta (backward variable) probabilities over the transduction step. + + Args: + acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens. + duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for duations. + denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor + across entire vocabulary. + betas: Zero tensor of shape [B, T, U]. Will be updated inside the kernel with the backward variable + probabilities. + llBackward: Zero tensor of shape [B]. Represents the log-likelihood of the backward pass. + Returned as the backward pass loss that is reduced by the optimizer. + xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded + activation tensor. + ylen: Vector of length B which contains the actual target sequence lengths in the padded + activation tensor. + mlabels: Matrix of shape [B, U+1] (+1 here is due to token - usually the RNNT blank). + The matrix contains the padded target transcription that must be predicted. + minibatch: Int representing the batch size. + maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor. + maxU: The maximum possible target sequence length. Represents U in the logprobs tensor. + alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank). + blank_: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab. + + Updates: + Kernel inplace updates the following inputs: + - betas: backward variable scores. + - llBackward: log-likelihood of backward variable. + """ + # // launch B blocks, each block has U threads + b = cuda.blockIdx.x # // batch id + u = cuda.threadIdx.x # label id, u + T = xlen[b] # select AM length of current sample + U = ylen[b] + 1 # select target length of current sample, +1 for the blank token + + labels: torch.Tensor = mlabels[b] # mb label start point, equivalent to mlabels + b * (maxU - 1) + offset = b * maxT * maxU # pointer indexing offset + + # betas += offset # pointer offset, ignored since we explicitly add offset + + # Initilize beta[b, t=T-1, u=U-1] for all b in B with log_probs[b, t=T-1, u=U-1, blank] + if u == 0: + betas[offset + (T - 1) * maxU + U - 1] = ( + logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, U - 1, blank_) + - sigma + + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, U - 1, 1) + ) + + # sync until all betas are initialized + cuda.syncthreads() + + # Ordinary beta calculations, broadcast across B=b and U=u + # Look up backward variable calculation from rnnt_numpy.backward_pass() + for n in range(T + U - 2, -1, -1): + t = n - u + + if u == U - 1: + # u == U - 1, we only consider blank emissions. + if t >= 0 and t + 1 < T: + betas[offset + t * maxU + U - 1] = -INF + for i in range(1, num_durations): + # although similar, the computation for beta's is slightly more complex for boundary cases. + # the following two cases correspond to whether t is exactly certain duration away from T. + # and they have slightly different update rules. + + if t + durations[i] < T: + betas[offset + t * maxU + U - 1] = rnnt_helper.log_sum_exp( + betas[offset + t * maxU + U - 1], + betas[ + offset + (t + durations[i]) * maxU + U - 1 + ] # beta[t, U - 1] depends on the value beta[t + duration, U - 1] here. + + logp(denom, acts, maxT, maxU, alphabet_size, b, t, U - 1, blank_) # log prob of blank + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t, U - 1, i + ) # log prob of duration (durations[i]) + - sigma, # for logit undernormalization + ) + elif t + durations[i] == T: + betas[offset + t * maxU + U - 1] = rnnt_helper.log_sum_exp( + betas[offset + t * maxU + U - 1], + # here we have one fewer term than the "if" block above. This could be seen as having "0" here since + # beta[t + duration, U - 1] isn't defined because t + duration is out of bound. + logp(denom, acts, maxT, maxU, alphabet_size, b, t, U - 1, blank_) # log prob of blank + + logp_duration( + duration_acts, maxT, maxU, num_durations, b, t, U - 1, i + ) # log prob of duration (durations[i]) + - sigma, # for logit undernormalization. Basically every time sigma shows up is because of logit undernormalization. + ) + + elif u < U - 1: + if t == T - 1: + # t == T - 1, so we only consider non-blank with duration 0. (Note, we can't have blank emissions with duration = 0) + betas[offset + (T - 1) * maxU + u] = ( + betas[offset + (T - 1) * maxU + u + 1] + + logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, u, labels[u]) # non-blank log prob + + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, u, 0) # log prob of duration 0 + - sigma + ) + + elif t >= 0 and t < T - 1: + # now we need to consider both blank andnon-blanks. Similar to alphas, we first compute them separately with no_emit and emit. + no_emit = -INF + for i in range(1, num_durations): + if t + durations[i] < T: + no_emit = rnnt_helper.log_sum_exp( + no_emit, + betas[offset + (t + durations[i]) * maxU + u] + + logp(denom, acts, maxT, maxU, alphabet_size, b, t, u, blank_) + + logp_duration(duration_acts, maxT, maxU, num_durations, b, t, u, i) + - sigma, + ) + + emit = -INF + for i in range(0, num_durations): + if t + durations[i] < T: + emit = rnnt_helper.log_sum_exp( + emit, + betas[offset + (t + durations[i]) * maxU + u + 1] + + logp(denom, acts, maxT, maxU, alphabet_size, b, t, u, labels[u]) + + logp_duration(duration_acts, maxT, maxU, num_durations, b, t, u, i) + - sigma, + ) + + # combining all blank emissions and all non-blank emissions. + betas[offset + t * maxU + u] = rnnt_helper.log_sum_exp(emit, no_emit) + + # sync across all B=b and U=u + cuda.syncthreads() + + # After final sync, betas[b, 0, 0] gives log-likelihood of backward pass, same with conventional Transducers. + if u == 0: + llBackward[b] = betas[offset] + + +@cuda.jit() +def compute_tdt_grad_kernel( + label_grads: torch.Tensor, + duration_grads: torch.Tensor, + acts: torch.Tensor, + duration_acts: torch.Tensor, + denom: torch.Tensor, + sigma: float, + alphas: torch.Tensor, + betas: torch.Tensor, + logll: torch.Tensor, + xlen: torch.Tensor, + ylen: torch.Tensor, + mlabels: torch.Tensor, # [B, U] + minibatch: int, + maxT: int, + maxU: int, + alphabet_size: int, + blank_: int, + durations: torch.Tensor, + num_durations: int, + fastemit_lambda: float, + clamp: float, +): + """ + Compute gradients over the transduction step. + + Args: + grads: Zero Tensor of shape [B, T, U, V] to store gradients for tokens. + duration_grads: Zero Tensor of shape [B, T, U, D] to store gradients for durations. + + acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens. + duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for durations. + denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor + across entire vocabulary. + alphas: Alpha variable, contains forward probabilities. A tensor of shape [B, T, U]. + betas: Beta varoable, contains backward probabilities. A tensor of shape [B, T, U]. + logll: Log-likelihood of the forward variable, represented as a vector of shape [B]. + Represents the log-likelihood of the forward pass. + xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded + activation tensor. + ylen: Vector of length B which contains the actual target sequence lengths in the padded + activation tensor. + mlabels: Matrix of shape [B, U+1] (+1 here is due to token - usually the RNNT blank). + The matrix contains the padded target transcription that must be predicted. + minibatch: Int representing the batch size. + maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor. + maxU: The maximum possible target sequence length. Represents U in the logprobs tensor. + alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank). + blank_: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab. + fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to + FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization. + clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp]. + + Updates: + Kernel inplace updates the following inputs: + - grads: Gradients with respect to the log likelihood (logll). + """ + # Kernel call: + # blocks_per_grid = minibatch (b) * maxT (t) * maxU (u) + # threads_per_block = constant buffer size of parallel threads (v :: Constant) + tid = cuda.threadIdx.x # represents v, taking steps of some constant size + idx = tid # index of v < V+1; in steps of constant buffer size + col = cuda.blockIdx.x # represents a fused index of b * t * u + + # Decompose original indices from fused `col` + u = col % maxU # (b * t * u) % u = u + bt = (col - u) // maxU # (b * t * u - u) // U = b * t + t = bt % maxT # (b * t) % t = t + mb = (bt - t) // maxT # (b * t - t) // T = b + + # constants + T = xlen[mb] # select AM length of current sample + U = ylen[mb] + 1 # select target length of current sample, +1 for the blank token + labels: torch.Tensor = mlabels[mb] # labels = mlabels + mb * (maxU - 1); + + # Buffered gradient calculations, broadcast across B=b, T=t and U=u, looped over V with some constant stride. + # Look up gradient calculation from rnnt_numpy.compute_gradient() + + if t < T and u < U: + logpk_blank = ( + denom[col] + acts[col * alphabet_size + blank_] - sigma + ) # whenever sigma is used, it is for logit under-normalization. + + if idx < num_durations: + grad = 0.0 + if t + durations[idx] < T and u < U - 1: # for label + logpk_label = denom[col] + acts[col * alphabet_size + labels[u]] - sigma + grad -= math.exp(alphas[col] + betas[col + 1 + durations[idx] * maxU] + logpk_label - logll[mb]) + + if t + durations[idx] < T and idx > 0: # for blank in the middle + grad -= math.exp(alphas[col] + betas[col + durations[idx] * maxU] + logpk_blank - logll[mb]) + + if t + durations[idx] == T and idx >= 1 and u == U - 1: # for blank as the last symbol + grad -= math.exp(alphas[col] + logpk_blank - logll[mb]) + + grad = grad * math.exp(duration_acts[col * num_durations + idx]) + duration_grads[col * num_durations + idx] = grad + + # For cuda kernels, maximum number of threads per block is limited to some value. + # However, it may be the case that vocabulary size is larger than this limit + # To work around this, an arbitrary thread buffer size is chosen such that, + # 1) each element within the thread pool operates independently of the other + # 2) An inner while loop moves the index of each buffer element by the size of the buffer itself, + # such that all elements of the vocabulary size are covered in (V + 1 // thread_buffer) number of steps. + # As such, each thread will perform the while loop at least (V + 1 // thread_buffer) number of times + while idx < alphabet_size: + # remember, `col` represents the tri-index [b, t, u] + # therefore; logpk = denom[b, t, u] + acts[b, t, u, v] + logpk = denom[col] + acts[col * alphabet_size + idx] + # initialize the grad of the sample acts[b, t, u, v] + grad = math.exp(alphas[col] + betas[col] + logpk - logll[mb]) + + # If FastEmit regularization is enabled, calculate the gradeint of probability of predicting the next label + # at the current timestep. + # The formula for this is Equation 9 in https://arxiv.org/abs/2010.11148, multiplied by the log probability + # of the current step (t, u), normalized by the total log likelihood. + # Once the gradient has been calculated, scale it by `fastemit_lambda`, as in Equation 10. + if fastemit_lambda > 0.0 and u < U - 1: + fastemit_grad = 0.0 + + for i in range(0, num_durations): + if t + durations[i] < T: + fastemit_grad += fastemit_lambda * math.exp( + alphas[col] # alphas(t, u) + + (denom[col] + acts[col * alphabet_size + labels[u]]) # log prob of token emission + + duration_acts[col * num_durations + i] # duration log-prob + + betas[col + 1 + durations[i] * maxU] # betas(t, u+1) + + logpk # log Pr(k|t, u) + - sigma # for logit under-normalization + - logll[mb] # total log likelihood for normalization + ) + else: + fastemit_grad = 0.0 + + # Update the gradient of act[b, t, u, v] with the gradient from FastEmit regularization + grad = grad + fastemit_grad + + # grad to last blank transition + # grad[b, T-1, U-1, v=blank] -= exp(alphas[b, t, u] + logpk - sigma - logll[b] + logp(duration) for all possible non-zero durations. + if idx == blank_ and u == U - 1: + for i in range(1, num_durations): + if t == T - durations[i]: + grad -= math.exp( + alphas[col] + logpk - sigma - logll[mb] + duration_acts[col * num_durations + i] + ) + + # grad of blank across t < T; + # grad[b, t 0.0: + g = label_grads[col * alphabet_size + idx] + g = min(g, clamp) + g = max(g, -clamp) + label_grads[col * alphabet_size + idx] = g + + # update internal index through the thread_buffer; + # until idx < V + 1, such that entire vocabulary has been updated. + idx += GPU_RNNT_THREAD_SIZE diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py index b579b7315ef2..6ca7cd237264 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py @@ -30,6 +30,7 @@ import math from typing import Optional, Tuple +import numba import torch from numba import cuda @@ -112,7 +113,7 @@ def compute_costs_data(source: torch.Tensor, dest: torch.Tensor, fastemit_lambda if idx < length: copy_data_1d(source, dest, idx) dest[idx] *= -1.0 - dest[idx] *= 1.0 + fastemit_lambda + dest[idx] *= numba.float32(1.0 + fastemit_lambda) def get_workspace_size( diff --git a/nemo/collections/asr/parts/preprocessing/features.py b/nemo/collections/asr/parts/preprocessing/features.py index c2e84b04e981..531cd3105c04 100644 --- a/nemo/collections/asr/parts/preprocessing/features.py +++ b/nemo/collections/asr/parts/preprocessing/features.py @@ -181,8 +181,7 @@ def process( trim_hop_length=512, orig_sr=None, channel_selector=None, - normalize_db=False, - normalize_db_target=-20.0, + normalize_db=None, ): audio = AudioSegment.from_file( file_path, @@ -198,7 +197,6 @@ def process( orig_sr=orig_sr, channel_selector=channel_selector, normalize_db=normalize_db, - normalize_db_target=normalize_db_target, ) return self.process_segment(audio) diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py index af6034f9af3a..d586137d5ff2 100644 --- a/nemo/collections/asr/parts/preprocessing/segment.py +++ b/nemo/collections/asr/parts/preprocessing/segment.py @@ -36,6 +36,7 @@ import math import os import random +from typing import Optional import librosa import numpy as np @@ -78,8 +79,8 @@ def __init__( trim_hop_length=512, orig_sr=None, channel_selector=None, - normalize_db=False, - normalize_db_target=-20.0, + normalize_db: Optional[float] = None, + ref_channel: Optional[int] = None, ): """Create audio segment from samples. Samples are convert float32 internally, with int scaled to [-1, 1]. @@ -114,8 +115,11 @@ def __init__( self._samples = samples self._sample_rate = sample_rate self._orig_sr = orig_sr if orig_sr is not None else sample_rate - if normalize_db: - self.normalize_db(normalize_db_target) + self._ref_channel = ref_channel + self._normalize_db = normalize_db + + if normalize_db is not None: + self.normalize_db(normalize_db, ref_channel) def __eq__(self, other): """Return whether two objects are equal.""" @@ -185,8 +189,8 @@ def from_file( trim_hop_length=512, orig_sr=None, channel_selector=None, - normalize_db=False, - normalize_db_target=-20.0, + normalize_db=None, + ref_channel=None, ): """ Load a file supported by librosa and return as an AudioSegment. @@ -207,8 +211,8 @@ def from_file( :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable of integers denoting a subset of channels. Channel selector is using zero-based indexing. If set to `None`, the original signal will be used. - :param normalize_db (bool): if true, normalize the audio signal to a target RMS value - :param normalize_db_target (float): the target RMS value in decibels + :param normalize_db (Optional[float]): if not None, normalize the audio signal to a target RMS value + :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio, set None to use max RMS across channels :return: AudioSegment instance """ samples = None @@ -226,6 +230,8 @@ def from_file( trim_hop_length=trim_hop_length, orig_sr=orig_sr, channel_selector=channel_selector, + normalize_db=normalize_db, + ref_channel=ref_channel, ) if not isinstance(audio_file, str) or os.path.splitext(audio_file)[-1] in sf_supported_formats: @@ -283,7 +289,7 @@ def from_file( orig_sr=orig_sr, channel_selector=channel_selector, normalize_db=normalize_db, - normalize_db_target=normalize_db_target, + ref_channel=ref_channel, ) @classmethod @@ -417,6 +423,7 @@ def segment_from_file( samples = f.read(dtype='float32') except RuntimeError as e: logging.error(f"Loading {audio_file} via SoundFile raised RuntimeError: `{e}`.") + raise e features = cls( samples, sample_rate, target_sr=target_sr, trim=trim, orig_sr=orig_sr, channel_selector=channel_selector @@ -464,10 +471,14 @@ def orig_sr(self): def gain_db(self, gain): self._samples *= 10.0 ** (gain / 20.0) - def normalize_db(self, target_db=-20): + def normalize_db(self, target_db=-20, ref_channel=None): """Normalize the signal to a target RMS value in decibels. + For multi-channel audio, the RMS value is determined by the reference channel (if not None), + otherwise it will be the maximum RMS across all channels. """ rms_db = self.rms_db + if self.num_channels > 1: + rms_db = max(rms_db) if ref_channel is None else rms_db[ref_channel] gain = target_db - rms_db self.gain_db(gain) diff --git a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py index 169dde48602f..563d4219baa7 100644 --- a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py +++ b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py @@ -147,18 +147,18 @@ def __init__( # reset parameters for Q to be identity operation self.reset_parameters() - def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=None): + def forward(self, query, key, value, mask, pos_emb=None, cache=None): """Compute 'Scaled Dot Product Attention'. Args: query (torch.Tensor): (batch, time1, size) key (torch.Tensor): (batch, time2, size) value(torch.Tensor): (batch, time2, size) mask (torch.Tensor): (batch, time1, time2) - cache (torch.Tensor) : (cache_nums, batch, time_cache, size) - cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size) + cache (torch.Tensor) : (batch, time_cache, size) returns: output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention + cache (torch.Tensor) : (batch, time_cache_next, size) """ # Need to perform duplicate computations as at this point the tensors have been # separated by the adapter forward @@ -166,7 +166,7 @@ def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next= key = self.pre_norm(key) value = self.pre_norm(value) - return super().forward(query, key, value, mask, pos_emb, cache=cache, cache_next=cache_next) + return super().forward(query, key, value, mask, pos_emb, cache=cache) def reset_parameters(self): with torch.no_grad(): @@ -242,7 +242,7 @@ def __init__( # reset parameters for Q to be identity operation self.reset_parameters() - def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None): + def forward(self, query, key, value, mask, pos_emb, cache=None): """Compute 'Scaled Dot Product Attention' with rel. positional encoding. Args: query (torch.Tensor): (batch, time1, size) @@ -250,10 +250,10 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None) value(torch.Tensor): (batch, time2, size) mask (torch.Tensor): (batch, time1, time2) pos_emb (torch.Tensor) : (batch, time1, size) - cache (torch.Tensor) : (cache_nums, batch, time_cache, size) - cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size) + cache (torch.Tensor) : (batch, time_cache, size) Returns: output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention + cache_next (torch.Tensor) : (batch, time_cache_next, size) """ # Need to perform duplicate computations as at this point the tensors have been # separated by the adapter forward @@ -261,7 +261,7 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None) key = self.pre_norm(key) value = self.pre_norm(value) - return super().forward(query, key, value, mask, pos_emb, cache=cache, cache_next=cache_next) + return super().forward(query, key, value, mask, pos_emb, cache=cache) def reset_parameters(self): with torch.no_grad(): diff --git a/nemo/collections/asr/parts/submodules/causal_convs.py b/nemo/collections/asr/parts/submodules/causal_convs.py index 25f841802154..32f08a8d2feb 100644 --- a/nemo/collections/asr/parts/submodules/causal_convs.py +++ b/nemo/collections/asr/parts/submodules/causal_convs.py @@ -45,7 +45,6 @@ def __init__( raise ValueError("Argument padding should be set to None for CausalConv2D.") self._left_padding = kernel_size - 1 self._right_padding = stride - 1 - self._cache_id = None padding = 0 super(CausalConv2D, self).__init__( @@ -113,7 +112,6 @@ def __init__( raise ValueError(f"Invalid padding param: {padding}!") self._max_cache_len = self._left_padding - self._cache_id = None super(CausalConv1D, self).__init__( in_channels=in_channels, @@ -129,21 +127,24 @@ def __init__( dtype=dtype, ) - def update_cache(self, x, cache=None, cache_next=None): + def update_cache(self, x, cache=None): if cache is None: new_x = F.pad(x, pad=(self._left_padding, self._right_padding)) + next_cache = cache else: new_x = F.pad(x, pad=(0, self._right_padding)) - new_x = torch.cat([cache[self._cache_id], new_x], dim=-1) - # todo: we should know input_x.size(-1) at config time - if cache_next is not None: - cache_keep_size = torch.tensor(x.size(-1) - self.cache_drop_size, dtype=torch.int64, device=x.device) - cache_keep_size = torch.clip(cache_keep_size, min=1, max=cache_next.size(-1)) - cache_next[self._cache_id, :, :, :-cache_keep_size] = cache[self._cache_id, :, :, cache_keep_size:] - cache_next[self._cache_id, :, :, -cache_keep_size:] = x[:, :, :cache_keep_size] - return new_x - - def forward(self, x, cache=None, cache_next=None): - x = self.update_cache(x, cache=cache, cache_next=cache_next) + new_x = torch.cat([cache, new_x], dim=-1) + if self.cache_drop_size > 0: + next_cache = new_x[:, :, : -self.cache_drop_size] + else: + next_cache = new_x + next_cache = next_cache[:, :, -cache.size(-1) :] + return new_x, next_cache + + def forward(self, x, cache=None): + x, cache = self.update_cache(x, cache=cache) x = super().forward(x) - return x + if cache is None: + return x + else: + return x, cache diff --git a/nemo/collections/asr/parts/submodules/conformer_modules.py b/nemo/collections/asr/parts/submodules/conformer_modules.py index 579b78a8f5a8..677d2acd9f2e 100644 --- a/nemo/collections/asr/parts/submodules/conformer_modules.py +++ b/nemo/collections/asr/parts/submodules/conformer_modules.py @@ -138,29 +138,19 @@ def __init__( self.dropout = nn.Dropout(dropout) self.norm_out = LayerNorm(d_model) - def forward( - self, - x, - att_mask=None, - pos_emb=None, - pad_mask=None, - cache_last_channel=None, - cache_last_time=None, - cache_last_channel_next=None, - cache_last_time_next=None, - ): + def forward(self, x, att_mask=None, pos_emb=None, pad_mask=None, cache_last_channel=None, cache_last_time=None): """ Args: x (torch.Tensor): input signals (B, T, d_model) att_mask (torch.Tensor): attention masks(B, T, T) pos_emb (torch.Tensor): (L, 1, d_model) pad_mask (torch.tensor): padding mask - cache_last_channel (torch.tensor) : cache for MHA layers (N, B, T_cache, d_model) - cache_last_time (torch.tensor) : cache for convolutional layers (N, B, d_model, T_cache) - cache_last_channel_next (torch.tensor) : next cache for MHA layers (N, B, T_cache, d_model) - cache_last_time_next (torch.tensor) : next cache for convolutional layers (N, B, d_model, T_cache) + cache_last_channel (torch.tensor) : cache for MHA layers (B, T_cache, d_model) + cache_last_time (torch.tensor) : cache for convolutional layers (B, d_model, T_cache) Returns: x (torch.Tensor): (B, T, d_model) + cache_last_channel (torch.tensor) : next cache for MHA layers (B, T_cache, d_model) + cache_last_time (torch.tensor) : next cache for convolutional layers (B, d_model, T_cache) """ residual = x x = self.norm_feed_forward1(x) @@ -169,31 +159,17 @@ def forward( x = self.norm_self_att(residual) if self.self_attention_model == 'rel_pos': - x = self.self_attn( - query=x, - key=x, - value=x, - mask=att_mask, - pos_emb=pos_emb, - cache=cache_last_channel, - cache_next=cache_last_channel_next, - ) + x = self.self_attn(query=x, key=x, value=x, mask=att_mask, pos_emb=pos_emb, cache=cache_last_channel) elif self.self_attention_model == 'rel_pos_local_attn': - x = self.self_attn( - query=x, - key=x, - value=x, - pad_mask=pad_mask, - pos_emb=pos_emb, - cache=cache_last_channel, - cache_next=cache_last_channel_next, - ) + x = self.self_attn(query=x, key=x, value=x, pad_mask=pad_mask, pos_emb=pos_emb, cache=cache_last_channel) elif self.self_attention_model == 'abs_pos': - x = self.self_attn( - query=x, key=x, value=x, mask=att_mask, cache=cache_last_channel, cache_next=cache_last_channel_next - ) + x = self.self_attn(query=x, key=x, value=x, mask=att_mask, cache=cache_last_channel) else: x = None + + if x is not None and cache_last_channel is not None: + (x, cache_last_channel) = x + residual = residual + self.dropout(x) if self.is_adapter_available(): @@ -208,7 +184,9 @@ def forward( residual = pack_ip['x'] x = self.norm_conv(residual) - x = self.conv(x, pad_mask=pad_mask, cache=cache_last_time, cache_next=cache_last_time_next) + x = self.conv(x, pad_mask=pad_mask, cache=cache_last_time) + if cache_last_time is not None: + (x, cache_last_time) = x residual = residual + self.dropout(x) x = self.norm_feed_forward2(residual) @@ -228,8 +206,10 @@ def forward( if self.is_access_enabled() and self.access_cfg.get('save_encoder_tensors', False): self.register_accessible_tensor(name='encoder', tensor=x) - - return x + if cache_last_channel is None: + return x + else: + return x, cache_last_channel, cache_last_time def forward_single_enabled_adapter_( self, @@ -355,7 +335,7 @@ def __init__( in_channels=dw_conv_input_dim, out_channels=d_model, kernel_size=1, stride=1, padding=0, bias=True ) - def forward(self, x, pad_mask=None, cache=None, cache_next=None): + def forward(self, x, pad_mask=None, cache=None): x = x.transpose(1, 2) x = self.pointwise_conv1(x) @@ -368,10 +348,9 @@ def forward(self, x, pad_mask=None, cache=None, cache_next=None): if pad_mask is not None: x = x.float().masked_fill(pad_mask.unsqueeze(1), 0.0) + x = self.depthwise_conv(x, cache=cache) if cache is not None: - x = self.depthwise_conv(x, cache=cache, cache_next=cache_next) - else: - x = self.depthwise_conv(x) + x, cache = x if self.norm_type == "layer_norm": x = x.transpose(1, 2) @@ -383,7 +362,10 @@ def forward(self, x, pad_mask=None, cache=None, cache_next=None): x = self.activation(x) x = self.pointwise_conv2(x) x = x.transpose(1, 2) - return x + if cache is None: + return x + else: + return x, cache def reset_parameters_conv(self): pw1_max = pw2_max = self.d_model ** -0.5 diff --git a/nemo/collections/asr/parts/submodules/multi_head_attention.py b/nemo/collections/asr/parts/submodules/multi_head_attention.py index 40baf1141bd3..a0253524419e 100644 --- a/nemo/collections/asr/parts/submodules/multi_head_attention.py +++ b/nemo/collections/asr/parts/submodules/multi_head_attention.py @@ -73,7 +73,6 @@ def __init__(self, n_head, n_feat, dropout_rate, max_cache_len=0): self.dropout = nn.Dropout(p=dropout_rate) self._max_cache_len = max_cache_len - self._cache_id = None def forward_qkv(self, query, key, value): """Transforms query, key and value. @@ -119,20 +118,20 @@ def forward_attention(self, value, scores, mask): return self.linear_out(x) # (batch, time1, d_model) - def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=None): + def forward(self, query, key, value, mask, pos_emb=None, cache=None): """Compute 'Scaled Dot Product Attention'. Args: query (torch.Tensor): (batch, time1, size) key (torch.Tensor): (batch, time2, size) value(torch.Tensor): (batch, time2, size) mask (torch.Tensor): (batch, time1, time2) - cache (torch.Tensor) : (cache_nums, batch, time_cache, size) - cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size) + cache (torch.Tensor) : (batch, time_cache, size) returns: output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention + cache (torch.Tensor) : (batch, time_cache_next, size) """ - key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next) + key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache) if torch.is_autocast_enabled(): query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32) @@ -142,17 +141,17 @@ def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next= q, k, v = self.forward_qkv(query, key, value) scores = torch.matmul(q, k.transpose(-2, -1)) / self.s_d_k out = self.forward_attention(v, scores, mask) + if cache is None: + return out + else: + return out, cache - return out - - def update_cache(self, key, value, query, cache, cache_next): + def update_cache(self, key, value, query, cache): if cache is not None: - key = value = torch.cat([cache[self._cache_id], key], dim=1) + key = value = torch.cat([cache, key], dim=1) q_keep_size = query.shape[1] - self.cache_drop_size - if cache_next is not None: - cache_next[self._cache_id, :, :-q_keep_size, :] = cache[self._cache_id, :, q_keep_size:, :] - cache_next[self._cache_id, :, -q_keep_size:, :] = query[:, :q_keep_size, :] - return key, value, query + cache = torch.cat([cache[:, q_keep_size:, :], query[:, :q_keep_size, :]], dim=1) + return key, value, query, cache class RelPositionMultiHeadAttention(MultiHeadAttention): @@ -195,7 +194,7 @@ def rel_shift(self, x): x = x[:, :, 1:].view(b, h, qlen, pos_len) # (b, h, t1, t2) return x - def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None): + def forward(self, query, key, value, mask, pos_emb, cache=None): """Compute 'Scaled Dot Product Attention' with rel. positional encoding. Args: query (torch.Tensor): (batch, time1, size) @@ -203,12 +202,13 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None) value(torch.Tensor): (batch, time2, size) mask (torch.Tensor): (batch, time1, time2) pos_emb (torch.Tensor) : (batch, time1, size) - cache (torch.Tensor) : (cache_nums, batch, time_cache, size) - cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size) + cache (torch.Tensor) : (batch, time_cache, size) + Returns: output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention + cache (torch.Tensor) : (batch, time_cache_next, size) """ - key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next) + key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache) if torch.is_autocast_enabled(): query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32) @@ -244,7 +244,10 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None) out = self.forward_attention(v, scores, mask) - return out + if cache is None: + return out + else: + return out, cache class RelPositionMultiHeadAttentionLongformer(RelPositionMultiHeadAttention): @@ -298,7 +301,7 @@ def __init__( self.global_k = nn.Linear(n_feat, n_feat) self.global_v = nn.Linear(n_feat, n_feat) - def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=None): + def forward(self, query, key, value, pad_mask, pos_emb, cache=None): """Compute Scaled Dot Product Local Attention with rel. positional encoding. using overlapping chunks Args: query (torch.Tensor): (batch, time, size) @@ -306,13 +309,13 @@ def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=N value(torch.Tensor): (batch, time, size) pad_mask (torch.Tensor): (batch, time) pos_emb (torch.Tensor) : (batch, 2w + 1, size) - cache (torch.Tensor) : (cache_nums, batch, time_cache, size) - cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size) + cache (torch.Tensor) : (batch, time_cache, size) Returns: output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention + cache (torch.Tensor) : (batch, time_cache_next, size) """ - key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next) + key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache) if torch.is_autocast_enabled(): query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32) @@ -453,7 +456,11 @@ def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=N out[is_index_global_attn_nonzero] += out_global_to_all - return self.linear_out(out.reshape(n_batch, -1, self.h * self.d_k)[:, :T]) + ret = self.linear_out(out.reshape(n_batch, -1, self.h * self.d_k)[:, :T]) + if cache is None: + return ret + else: + return ret, cache def _get_global_attn_indices(self, is_index_global_attn: torch.Tensor) -> Tuple: """ @@ -888,17 +895,19 @@ def extend_pe(self, length, device): positions = torch.arange(0, length, dtype=torch.float32, device=device).unsqueeze(1) self.create_pe(positions=positions) - def forward(self, x: torch.Tensor): + def forward(self, x: torch.Tensor, cache_len=0): """Adds positional encoding. Args: x (torch.Tensor): Input. Its shape is (batch, time, feature_size) + cache_len (int): the size of the cache which is used to shift positions Returns: x+pos_emb (torch.Tensor): Its shape is (batch, time, feature_size) pos_emb (torch.Tensor): Its shape is (1, time, feature_size) """ + input_len = x.size(1) + cache_len if self.xscale: x = x * self.xscale - pos_emb = self.pe[:, : x.size(1)] + pos_emb = self.pe[:, :input_len] if self.dropout_emb: pos_emb = self.dropout_emb(pos_emb) x = x + pos_emb diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index 5e98b03f2fe2..42b14fd7b8bf 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -2202,3 +2202,564 @@ class GreedyBatchedRNNTInferConfig: preserve_alignments: bool = False preserve_frame_confidence: bool = False confidence_method_cfg: Optional[ConfidenceMethodConfig] = None + + +class GreedyTDTInfer(_GreedyRNNTInfer): + """A greedy TDT decoder. + + Sequence level greedy decoding, performed auto-repressively. + + Args: + decoder_model: rnnt_utils.AbstractRNNTDecoder implementation. + joint_model: rnnt_utils.AbstractRNNTJoint implementation. + blank_index: int index of the blank token. Must be len(vocabulary) for TDT models. + durations: a list containing durations for TDT. + max_symbols_per_step: Optional int. The maximum number of symbols that can be added + to a sequence in a single time step; if set to None then there is + no limit. + preserve_alignments: Bool flag which preserves the history of alignments generated during + greedy decoding (sample / batched). When set to true, the Hypothesis will contain + the non-null value for `alignments` in it. Here, `alignments` is a List of List of + Tuple(Tensor (of length V + 1 + num-big-blanks), Tensor(scalar, label after argmax)). + The length of the list corresponds to the Acoustic Length (T). + Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more targets from a vocabulary. + U is the number of target tokens for the current timestep Ti. + preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated + during greedy decoding (sample / batched). When set to true, the Hypothesis will contain + the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of List of floats. + The length of the list corresponds to the Acoustic Length (T). + Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. + U is the number of target tokens for the current timestep Ti. + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame + confidence scores. + name: The method name (str). + Supported values: + - 'max_prob' for using the maximum token probability as a confidence. + - 'entropy' for using normalized entropy of a log-likelihood vector. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. + Supported values: + - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided, + the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). + Note that for this entropy, the temperature should comply the following inequality: + 1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size. + - 'tsallis' for the Tsallis entropy with the Boltzmann constant one. + Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)), + where α is a parameter. When α == 1, it works like the Gibbs entropy. + More: https://en.wikipedia.org/wiki/Tsallis_entropy + - 'renui' for the Rényi entropy. + Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)), + where α is a parameter. When α == 1, it works like the Gibbs entropy. + More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy + temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0. + When the temperature equals one, scaling is not applied to 'max_prob', + and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i)) + entropy_norm: A mapping of the entropy value to the interval [0,1]. + Supported values: + - 'lin' for using the linear mapping. + - 'exp' for using exponential mapping with linear shift. + """ + + def __init__( + self, + decoder_model: rnnt_abstract.AbstractRNNTDecoder, + joint_model: rnnt_abstract.AbstractRNNTJoint, + blank_index: int, + durations: list, + max_symbols_per_step: Optional[int] = None, + preserve_alignments: bool = False, + preserve_frame_confidence: bool = False, + confidence_method_cfg: Optional[DictConfig] = None, + ): + super().__init__( + decoder_model=decoder_model, + joint_model=joint_model, + blank_index=blank_index, + max_symbols_per_step=max_symbols_per_step, + preserve_alignments=preserve_alignments, + preserve_frame_confidence=preserve_frame_confidence, + confidence_method_cfg=confidence_method_cfg, + ) + self.durations = durations + + @typecheck() + def forward( + self, + encoder_output: torch.Tensor, + encoded_lengths: torch.Tensor, + partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, + ): + """Returns a list of hypotheses given an input batch of the encoder hidden embedding. + Output token is generated auto-repressively. + Args: + encoder_output: A tensor of size (batch, features, timesteps). + encoded_lengths: list of int representing the length of each sequence + output sequence. + Returns: + packed list containing batch number of sentences (Hypotheses). + """ + # Preserve decoder and joint training state + decoder_training_state = self.decoder.training + joint_training_state = self.joint.training + + with torch.inference_mode(): + # Apply optional preprocessing + encoder_output = encoder_output.transpose(1, 2) # (B, T, D) + + self.decoder.eval() + self.joint.eval() + + hypotheses = [] + # Process each sequence independently + with self.decoder.as_frozen(), self.joint.as_frozen(): + for batch_idx in range(encoder_output.size(0)): + inseq = encoder_output[batch_idx, :, :].unsqueeze(1) # [T, 1, D] + logitlen = encoded_lengths[batch_idx] + + partial_hypothesis = partial_hypotheses[batch_idx] if partial_hypotheses is not None else None + hypothesis = self._greedy_decode(inseq, logitlen, partial_hypotheses=partial_hypothesis) + hypotheses.append(hypothesis) + + # Pack results into Hypotheses + packed_result = pack_hypotheses(hypotheses, encoded_lengths) + + self.decoder.train(decoder_training_state) + self.joint.train(joint_training_state) + + return (packed_result,) + + @torch.no_grad() + def _greedy_decode( + self, x: torch.Tensor, out_len: torch.Tensor, partial_hypotheses: Optional[rnnt_utils.Hypothesis] = None + ): + # x: [T, 1, D] + # out_len: [seq_len] + + # Initialize blank state and empty label set in Hypothesis + hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[], last_token=None) + + if partial_hypotheses is not None: + hypothesis.last_token = partial_hypotheses.last_token + hypothesis.y_sequence = ( + partial_hypotheses.y_sequence.cpu().tolist() + if isinstance(partial_hypotheses.y_sequence, torch.Tensor) + else partial_hypotheses.y_sequence + ) + if partial_hypotheses.dec_state is not None: + hypothesis.dec_state = self.decoder.batch_concat_states([partial_hypotheses.dec_state]) + hypothesis.dec_state = _states_to_device(hypothesis.dec_state, x.device) + + if self.preserve_alignments: + # Alignments is a 2-dimensional dangling list representing T x U + hypothesis.alignments = [[]] + + if self.preserve_frame_confidence: + hypothesis.frame_confidence = [[]] + + time_idx = 0 + while time_idx < out_len: + # Extract encoder embedding at timestep t + # f = x[time_idx, :, :].unsqueeze(0) # [1, 1, D] + f = x.narrow(dim=0, start=time_idx, length=1) + + # Setup exit flags and counter + not_blank = True + symbols_added = 0 + + need_loop = True + # While blank is not predicted, or we dont run out of max symbols per timestep + while need_loop and (self.max_symbols is None or symbols_added < self.max_symbols): + # In the first timestep, we initialize the network with RNNT Blank + # In later timesteps, we provide previous predicted label as input. + if hypothesis.last_token is None and hypothesis.dec_state is None: + last_label = self._SOS + else: + last_label = label_collate([[hypothesis.last_token]]) + + # Perform prediction network and joint network steps. + g, hidden_prime = self._pred_step(last_label, hypothesis.dec_state) + # If preserving per-frame confidence, log_normalize must be true + logits = self._joint_step(f, g, log_normalize=False) + logp = logits[0, 0, 0, : -len(self.durations)] + if self.preserve_frame_confidence: + logp = torch.log_softmax(logp, -1) + + duration_logp = torch.log_softmax(logits[0, 0, 0, -len(self.durations) :], dim=-1) + del g + + # torch.max(0) op doesnt exist for FP 16. + if logp.dtype != torch.float32: + logp = logp.float() + + # get index k, of max prob + v, k = logp.max(0) + k = k.item() # K is the label at timestep t_s in inner loop, s >= 0. + + d_v, d_k = duration_logp.max(0) + d_k = d_k.item() + + skip = self.durations[d_k] + + if self.preserve_alignments: + # insert logprobs into last timestep + hypothesis.alignments[-1].append((logp.to('cpu'), torch.tensor(k, dtype=torch.int32))) + + if self.preserve_frame_confidence: + # insert confidence into last timestep + hypothesis.frame_confidence[-1].append(self._get_confidence(logp)) + + del logp + + # If blank token is predicted, exit inner loop, move onto next timestep t + if k == self._blank_index: + not_blank = False + + # this rarely happens, but we manually increment the `skip` number + # if blank is emitted and duration=0 is predicted. This prevents possible + # infinite loops. + if skip == 0: + skip = 1 + + if self.preserve_alignments: + # convert Ti-th logits into a torch array + hypothesis.alignments.append([]) # blank buffer for next timestep + + if self.preserve_frame_confidence: + hypothesis.frame_confidence.append([]) # blank buffer for next timestep + else: + # Append token to label set, update RNN state. + hypothesis.y_sequence.append(k) + hypothesis.score += float(v) + hypothesis.timestep.append(time_idx) + hypothesis.dec_state = hidden_prime + hypothesis.last_token = k + + # Increment token counter. + symbols_added += 1 + time_idx += skip + need_loop = skip == 0 + + if symbols_added == self.max_symbols: + time_idx += 1 + + # Remove trailing empty list of Alignments + if self.preserve_alignments: + if len(hypothesis.alignments[-1]) == 0: + del hypothesis.alignments[-1] + + # Remove trailing empty list of per-frame confidence + if self.preserve_frame_confidence: + if len(hypothesis.frame_confidence[-1]) == 0: + del hypothesis.frame_confidence[-1] + + # Unpack the hidden states + hypothesis.dec_state = self.decoder.batch_select_state(hypothesis.dec_state, 0) + + return hypothesis + + +class GreedyBatchedTDTInfer(_GreedyRNNTInfer): + """A batch level greedy TDT decoder. + Batch level greedy decoding, performed auto-repressively. + Args: + decoder_model: rnnt_utils.AbstractRNNTDecoder implementation. + joint_model: rnnt_utils.AbstractRNNTJoint implementation. + blank_index: int index of the blank token. Must be len(vocabulary) for TDT models. + durations: a list containing durations. + max_symbols_per_step: Optional int. The maximum number of symbols that can be added + to a sequence in a single time step; if set to None then there is + no limit. + preserve_alignments: Bool flag which preserves the history of alignments generated during + greedy decoding (sample / batched). When set to true, the Hypothesis will contain + the non-null value for `alignments` in it. Here, `alignments` is a List of List of + Tuple(Tensor (of length V + 1 + num-big-blanks), Tensor(scalar, label after argmax)). + The length of the list corresponds to the Acoustic Length (T). + Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more targets from a vocabulary. + U is the number of target tokens for the current timestep Ti. + preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated + during greedy decoding (sample / batched). When set to true, the Hypothesis will contain + the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of List of floats. + The length of the list corresponds to the Acoustic Length (T). + Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. + U is the number of target tokens for the current timestep Ti. + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame + confidence scores. + name: The method name (str). + Supported values: + - 'max_prob' for using the maximum token probability as a confidence. + - 'entropy' for using normalized entropy of a log-likelihood vector. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. + Supported values: + - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided, + the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). + Note that for this entropy, the temperature should comply the following inequality: + 1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size. + - 'tsallis' for the Tsallis entropy with the Boltzmann constant one. + Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)), + where α is a parameter. When α == 1, it works like the Gibbs entropy. + More: https://en.wikipedia.org/wiki/Tsallis_entropy + - 'renui' for the Rényi entropy. + Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)), + where α is a parameter. When α == 1, it works like the Gibbs entropy. + More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy + temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0. + When the temperature equals one, scaling is not applied to 'max_prob', + and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i)) + entropy_norm: A mapping of the entropy value to the interval [0,1]. + Supported values: + - 'lin' for using the linear mapping. + - 'exp' for using exponential mapping with linear shift. + """ + + def __init__( + self, + decoder_model: rnnt_abstract.AbstractRNNTDecoder, + joint_model: rnnt_abstract.AbstractRNNTJoint, + blank_index: int, + durations: List[int], + max_symbols_per_step: Optional[int] = None, + preserve_alignments: bool = False, + preserve_frame_confidence: bool = False, + confidence_method_cfg: Optional[DictConfig] = None, + ): + super().__init__( + decoder_model=decoder_model, + joint_model=joint_model, + blank_index=blank_index, + max_symbols_per_step=max_symbols_per_step, + preserve_alignments=preserve_alignments, + preserve_frame_confidence=preserve_frame_confidence, + confidence_method_cfg=confidence_method_cfg, + ) + self.durations = durations + + # Depending on availability of `blank_as_pad` support + # switch between more efficient batch decoding technique + if self.decoder.blank_as_pad: + self._greedy_decode = self._greedy_decode_blank_as_pad + else: + self._greedy_decode = self._greedy_decode_masked + + @typecheck() + def forward( + self, + encoder_output: torch.Tensor, + encoded_lengths: torch.Tensor, + partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, + ): + """Returns a list of hypotheses given an input batch of the encoder hidden embedding. + Output token is generated auto-repressively. + Args: + encoder_output: A tensor of size (batch, features, timesteps). + encoded_lengths: list of int representing the length of each sequence + output sequence. + Returns: + packed list containing batch number of sentences (Hypotheses). + """ + # Preserve decoder and joint training state + decoder_training_state = self.decoder.training + joint_training_state = self.joint.training + + with torch.inference_mode(): + # Apply optional preprocessing + encoder_output = encoder_output.transpose(1, 2) # (B, T, D) + logitlen = encoded_lengths + + self.decoder.eval() + self.joint.eval() + + with self.decoder.as_frozen(), self.joint.as_frozen(): + inseq = encoder_output # [B, T, D] + hypotheses = self._greedy_decode( + inseq, logitlen, device=inseq.device, partial_hypotheses=partial_hypotheses + ) + + # Pack the hypotheses results + packed_result = pack_hypotheses(hypotheses, logitlen) + + self.decoder.train(decoder_training_state) + self.joint.train(joint_training_state) + + return (packed_result,) + + def _greedy_decode_blank_as_pad( + self, + x: torch.Tensor, + out_len: torch.Tensor, + device: torch.device, + partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, + ): + if partial_hypotheses is not None: + raise NotImplementedError("`partial_hypotheses` support is not supported") + + with torch.inference_mode(): + # x: [B, T, D] + # out_len: [B] + # device: torch.device + + # Initialize list of Hypothesis + batchsize = x.shape[0] + hypotheses = [ + rnnt_utils.Hypothesis(score=0.0, y_sequence=[], timestep=[], dec_state=None) for _ in range(batchsize) + ] + + # Initialize Hidden state matrix (shared by entire batch) + hidden = None + + # If alignments need to be preserved, register a danling list to hold the values + if self.preserve_alignments: + # alignments is a 3-dimensional dangling list representing B x T x U + for hyp in hypotheses: + hyp.alignments = [[]] + + # If confidence scores need to be preserved, register a danling list to hold the values + if self.preserve_frame_confidence: + # frame_confidence is a 3-dimensional dangling list representing B x T x U + for hyp in hypotheses: + hyp.frame_confidence = [[]] + hyp.y_3best = [[]] + hyp.frame_confidence_3best = [[[]]] + hyp.logp = [[]] + + # Last Label buffer + Last Label without blank buffer + # batch level equivalent of the last_label + last_label = torch.full([batchsize, 1], fill_value=self._blank_index, dtype=torch.long, device=device) + + # Mask buffers + blank_mask = torch.full([batchsize], fill_value=0, dtype=torch.bool, device=device) + + # Get max sequence length + max_out_len = out_len.max() + + # skip means the number of frames the next decoding step should "jump" to. When skip == 1 + # it means the next decoding step will just use the next input frame. + skip = 1 + for time_idx in range(max_out_len): + if skip > 1: # if skip > 1 at the current step, we decrement it and skip the current frame. + skip -= 1 + continue + f = x.narrow(dim=1, start=time_idx, length=1) # [B, 1, D] + + # need_to_stay is a boolean indicates whether the next decoding step should remain in the same frame. + need_to_stay = True + symbols_added = 0 + + # Reset blank mask + blank_mask.mul_(False) + + # Update blank mask with time mask + # Batch: [B, T, D], but Bi may have seq len < max(seq_lens_in_batch) + # Forcibly mask with "blank" tokens, for all sample where current time step T > seq_len + blank_mask = time_idx >= out_len + + # Start inner loop + while need_to_stay and (self.max_symbols is None or symbols_added < self.max_symbols): + # Batch prediction and joint network steps + # If very first prediction step, submit SOS tag (blank) to pred_step. + # This feeds a zero tensor as input to AbstractRNNTDecoder to prime the state + if time_idx == 0 and symbols_added == 0 and hidden is None: + g, hidden_prime = self._pred_step(self._SOS, hidden, batch_size=batchsize) + else: + # Perform batch step prediction of decoder, getting new states and scores ("g") + g, hidden_prime = self._pred_step(last_label, hidden, batch_size=batchsize) + + # Batched joint step - Output = [B, V + 1 + num-big-blanks] + # Note: log_normalize must not be True here since the joiner output is contanetation of both token logits and duration logits, + # and they need to be normalized independently. + joined = self._joint_step(f, g, log_normalize=None) + logp = joined[:, 0, 0, : -len(self.durations)] + duration_logp = joined[:, 0, 0, -len(self.durations) :] + + if logp.dtype != torch.float32: + logp = logp.float() + duration_logp = duration_logp.float() + + # get the max for both token and duration predictions. + v, k = logp.max(1) + dv, dk = duration_logp.max(1) + + # here we set the skip value to be the minimum of all predicted durations, hense the "torch.min(dk)" call there. + # Please refer to Section 5.2 of our paper https://arxiv.org/pdf/2304.06795.pdf for explanation of this. + skip = self.durations[int(torch.min(dk))] + + # this is a special case: if all batches emit blanks, we require that skip be at least 1 + # so we don't loop forever at the current frame. + if blank_mask.all(): + if skip == 0: + skip = 1 + + need_to_stay = skip == 0 + del g + + # Update blank mask with current predicted blanks + # This is accumulating blanks over all time steps T and all target steps min(max_symbols, U) + k_is_blank = k == self._blank_index + blank_mask.bitwise_or_(k_is_blank) + + del k_is_blank + del logp, duration_logp + + # If all samples predict / have predicted prior blanks, exit loop early + # This is equivalent to if single sample predicted k + if not blank_mask.all(): + # Collect batch indices where blanks occurred now/past + blank_indices = (blank_mask == 1).nonzero(as_tuple=False) + + # Recover prior state for all samples which predicted blank now/past + if hidden is not None: + hidden_prime = self.decoder.batch_copy_states(hidden_prime, hidden, blank_indices) + + elif len(blank_indices) > 0 and hidden is None: + # Reset state if there were some blank and other non-blank predictions in batch + # Original state is filled with zeros so we just multiply + # LSTM has 2 states + hidden_prime = self.decoder.batch_copy_states(hidden_prime, None, blank_indices, value=0.0) + + # Recover prior predicted label for all samples which predicted blank now/past + k[blank_indices] = last_label[blank_indices, 0] + + # Update new label and hidden state for next iteration + last_label = k.clone().view(-1, 1) + hidden = hidden_prime + + # Update predicted labels, accounting for time mask + # If blank was predicted even once, now or in the past, + # Force the current predicted label to also be blank + # This ensures that blanks propogate across all timesteps + # once they have occured (normally stopping condition of sample level loop). + for kidx, ki in enumerate(k): + if blank_mask[kidx] == 0: + hypotheses[kidx].y_sequence.append(ki) + hypotheses[kidx].timestep.append(time_idx) + hypotheses[kidx].score += float(v[kidx]) + + symbols_added += 1 + + # Remove trailing empty list of alignments at T_{am-len} x Uj + if self.preserve_alignments: + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].alignments[-1]) == 0: + del hypotheses[batch_idx].alignments[-1] + + # Remove trailing empty list of confidence scores at T_{am-len} x Uj + if self.preserve_frame_confidence: + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].frame_confidence[-1]) == 0: + del hypotheses[batch_idx].frame_confidence[-1] + del hypotheses[batch_idx].y_3best[-1] + del hypotheses[batch_idx].frame_confidence_3best[-1] + del hypotheses[batch_idx].logp[-1] + + # Preserve states + for batch_idx in range(batchsize): + hypotheses[batch_idx].dec_state = self.decoder.batch_select_state(hidden, batch_idx) + + return hypotheses + + def _greedy_decode_masked( + self, + x: torch.Tensor, + out_len: torch.Tensor, + device: torch.device, + partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, + ): + raise NotImplementedError("masked greedy-batched decode is not supported for TDT models.") diff --git a/nemo/collections/asr/parts/submodules/subsampling.py b/nemo/collections/asr/parts/submodules/subsampling.py index 4358d09977fe..23bd625108c7 100644 --- a/nemo/collections/asr/parts/submodules/subsampling.py +++ b/nemo/collections/asr/parts/submodules/subsampling.py @@ -19,6 +19,7 @@ from torch.nn import LayerNorm from nemo.collections.asr.parts.submodules.causal_convs import CausalConv2D +from nemo.utils import logging class StackingSubsampling(torch.nn.Module): @@ -65,6 +66,8 @@ class ConvSubsampling(torch.nn.Module): Args: subsampling (str): The subsampling technique from {"vggnet", "striding"} subsampling_factor (int): The subsampling factor which should be a power of 2 + subsampling_conv_chunking_factor (int): Input chunking factor which can be -1 (no chunking) + 1 (auto) or a power of 2. Default is 1 feat_in (int): size of the input features feat_out (int): size of the output features conv_channels (int): Number of channels for the convolution layers. @@ -72,7 +75,15 @@ class ConvSubsampling(torch.nn.Module): """ def __init__( - self, subsampling, subsampling_factor, feat_in, feat_out, conv_channels, activation=nn.ReLU(), is_causal=False + self, + subsampling, + subsampling_factor, + feat_in, + feat_out, + conv_channels, + subsampling_conv_chunking_factor=1, + activation=nn.ReLU(), + is_causal=False, ): super(ConvSubsampling, self).__init__() self._subsampling = subsampling @@ -86,6 +97,14 @@ def __init__( self.subsampling_factor = subsampling_factor self.is_causal = is_causal + if ( + subsampling_conv_chunking_factor != -1 + and subsampling_conv_chunking_factor != 1 + and subsampling_conv_chunking_factor % 2 != 0 + ): + raise ValueError("subsampling_conv_chunking_factor should be -1, 1, or a power of 2") + self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor + in_channels = 1 layers = [] @@ -264,7 +283,32 @@ def forward(self, x, lengths): ) x = x.unsqueeze(1) - x = self.conv(x) + # split inputs if chunking_factor is set + if self.subsampling_conv_chunking_factor != -1: + if self.subsampling_conv_chunking_factor == 1: + # if subsampling_conv_chunking_factor is 1, we split only if needed + # avoiding a bug / feature limiting indexing of tensors to 2**31 + # see https://github.com/pytorch/pytorch/issues/80020 + x_ceil = 2 ** 31 / self._conv_channels * self._stride * self._stride + if torch.numel(x) > x_ceil: + need_to_split = True + else: + need_to_split = False + else: + # if subsampling_conv_chunking_factor > 1 we always split + need_to_split = True + + if need_to_split: + x, success = self.conv_split_by_batch(x) + if not success: # if unable to split by batch, try by channel + if self._subsampling == 'dw_striding': + x = self.conv_split_by_channel(x) + else: + x = self.conv(x) # try anyway + else: + x = self.conv(x) + else: + x = self.conv(x) b, c, t, f = x.size() x = self.out(x.transpose(1, 2).reshape(b, t, -1)) @@ -293,6 +337,109 @@ def reset_parameters(self): torch.nn.init.uniform_(self.out.weight, -fc_scale, fc_scale) torch.nn.init.uniform_(self.out.bias, -fc_scale, fc_scale) + def conv_split_by_batch(self, x): + """ Tries to split input by batch, run conv and concat results """ + b, _, _, _ = x.size() + if b == 1: # can't split if batch size is 1 + return x, False + + if self.subsampling_conv_chunking_factor > 1: + cf = self.subsampling_conv_chunking_factor + logging.debug(f'using manually set chunking factor: {cf}') + else: + # avoiding a bug / feature limiting indexing of tensors to 2**31 + # see https://github.com/pytorch/pytorch/issues/80020 + x_ceil = 2 ** 31 / self._conv_channels * self._stride * self._stride + p = math.ceil(math.log(torch.numel(x) / x_ceil, 2)) + cf = 2 ** p + logging.debug(f'using auto set chunking factor: {cf}') + + new_batch_size = b // cf + if new_batch_size == 0: # input is too big + return x, False + + logging.debug(f'conv subsampling: using split batch size {new_batch_size}') + return torch.cat([self.conv(chunk) for chunk in torch.split(x, new_batch_size, 0)]), True + + def conv_split_by_channel(self, x): + """ For dw convs, tries to split input by time, run conv and concat results """ + x = self.conv[0](x) # full conv2D + x = self.conv[1](x) # activation + + for i in range(self._sampling_num - 1): + _, c, t, _ = x.size() + + if self.subsampling_conv_chunking_factor > 1: + cf = self.subsampling_conv_chunking_factor + logging.debug(f'using manually set chunking factor: {cf}') + else: + # avoiding a bug / feature limiting indexing of tensors to 2**31 + # see https://github.com/pytorch/pytorch/issues/80020 + p = math.ceil(math.log(torch.numel(x) / 2 ** 31, 2)) + cf = 2 ** p + logging.debug(f'using auto set chunking factor: {cf}') + + new_c = int(c // cf) + if new_c == 0: + logging.warning(f'chunking factor {cf} is too high; splitting down to one channel.') + new_c = 1 + + new_t = int(t // cf) + if new_t == 0: + logging.warning(f'chunking factor {cf} is too high; splitting down to one timestep.') + new_t = 1 + + logging.debug(f'conv dw subsampling: using split C size {new_c} and split T size {new_t}') + x = self.channel_chunked_conv(self.conv[i * 3 + 2], new_c, x) # conv2D, depthwise + + # splitting pointwise convs by time + x = torch.cat([self.conv[i * 3 + 3](chunk) for chunk in torch.split(x, new_t, 2)], 2) # conv2D, pointwise + x = self.conv[i * 3 + 4](x) # activation + return x + + def channel_chunked_conv(self, conv, chunk_size, x): + """ Performs channel chunked convolution""" + + ind = 0 + out_chunks = [] + for chunk in torch.split(x, chunk_size, 1): + step = chunk.size()[1] + + if self.is_causal: + chunk = nn.functional.pad( + chunk, pad=(self._kernel_size - 1, self._stride - 1, self._kernel_size - 1, self._stride - 1) + ) + ch_out = nn.functional.conv2d( + chunk, + conv.weight[ind : ind + step, :, :, :], + bias=conv.bias[ind : ind + step], + stride=self._stride, + padding=0, + groups=step, + ) + else: + ch_out = nn.functional.conv2d( + chunk, + conv.weight[ind : ind + step, :, :, :], + bias=conv.bias[ind : ind + step], + stride=self._stride, + padding=self._left_padding, + groups=step, + ) + out_chunks.append(ch_out) + ind += step + + return torch.cat(out_chunks, 1) + + def change_subsampling_conv_chunking_factor(self, subsampling_conv_chunking_factor: int): + if ( + subsampling_conv_chunking_factor != -1 + and subsampling_conv_chunking_factor != 1 + and subsampling_conv_chunking_factor % 2 != 0 + ): + raise ValueError("subsampling_conv_chunking_factor should be -1, 1, or a power of 2") + self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor + def calc_length(lengths, all_paddings, kernel_size, stride, ceil_mode, repeat_num=1): """ Calculates the output length of a Tensor passed through a convolution or max pooling layer""" diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 60f936306d05..f8a69fbe817d 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -23,8 +23,7 @@ from tqdm.auto import tqdm import nemo.collections.asr as nemo_asr -from nemo.collections.asr.models import ASRModel -from nemo.collections.asr.models.ctc_models import EncDecCTCModel +from nemo.collections.asr.models import ASRModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils import rnnt_utils from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR from nemo.collections.common.parts.preprocessing.manifest import get_full_path @@ -190,11 +189,6 @@ def setup_model(cfg: DictConfig, map_location: torch.device) -> Tuple[ASRModel, asr_model = imported_class.restore_from( restore_path=cfg.model_path, map_location=map_location, ) # type: ASRModel - if hasattr(cfg, "model_change"): - asr_model.change_attention_model( - self_attention_model=cfg.model_change.conformer.get("self_attention_model", None), - att_context_size=cfg.model_change.conformer.get("att_context_size", None), - ) model_name = os.path.splitext(os.path.basename(cfg.model_path))[0] else: # restore model by name @@ -203,6 +197,12 @@ def setup_model(cfg: DictConfig, map_location: torch.device) -> Tuple[ASRModel, ) # type: ASRModel model_name = cfg.pretrained_name + if hasattr(cfg, "model_change"): + asr_model.change_attention_model( + self_attention_model=cfg.model_change.conformer.get("self_attention_model", None), + att_context_size=cfg.model_change.conformer.get("att_context_size", None), + ) + return asr_model, model_name @@ -388,7 +388,7 @@ def transcribe_partial_audio( decode_function = ( asr_model.decoding.rnnt_decoder_predictions_tensor if decoder_type == 'rnnt' - else asr_model.decoding.ctc_decoder_predictions_tensor + else asr_model.ctc_decoding.ctc_decoder_predictions_tensor ) elif hasattr(asr_model, 'joint'): # RNNT model decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor @@ -421,6 +421,8 @@ def transcribe_partial_audio( input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) ) logits, logits_len = outputs[0], outputs[1] + if isinstance(asr_model, EncDecHybridRNNTCTCModel) and decoder_type == "ctc": + logits = asr_model.ctc_decoder(encoder_output=logits) if logprobs: # dump log probs per file for idx in range(logits.shape[0]): diff --git a/nemo/collections/asr/parts/utils/vad_utils.py b/nemo/collections/asr/parts/utils/vad_utils.py index d35d5466a523..68dfaf3d6c76 100644 --- a/nemo/collections/asr/parts/utils/vad_utils.py +++ b/nemo/collections/asr/parts/utils/vad_utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import glob import json import math @@ -18,8 +19,9 @@ import os import shutil from itertools import repeat +from math import ceil, floor from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union import IPython.display as ipd import librosa @@ -27,12 +29,15 @@ import numpy as np import pandas as pd import torch +from omegaconf import DictConfig from pyannote.core import Annotation, Segment from pyannote.metrics import detection +from sklearn.metrics import roc_auc_score from sklearn.model_selection import ParameterGrid from tqdm import tqdm -from nemo.collections.asr.models import EncDecClassificationModel +from nemo.collections.asr.models import EncDecClassificationModel, EncDecFrameClassificationModel +from nemo.collections.common.parts.preprocessing.manifest import get_full_path from nemo.utils import logging try: @@ -78,6 +83,7 @@ def prepare_manifest(config: dict) -> str: 'label': 'infer', 'split_duration': config['split_duration'], 'window_length_in_sec': config['window_length_in_sec'], + 'manifest_dir': Path(config['input']).parent if type(config['input']) == str else '', } if config.get('num_workers') is not None and config['num_workers'] > 1: @@ -138,6 +144,12 @@ def write_vad_infer_manifest(file: dict, args_func: dict) -> list: in_duration = file.get('duration', None) in_offset = file.get('offset', 0) + # if filepath is not found, try to find it in the dir of manifest + if not Path(filepath).is_file(): + new_filepath = Path(args_func['manifest_dir']) / filepath + if new_filepath.is_file(): + filepath = new_filepath.absolute().as_posix() + try: sr = 16000 x, _sr = librosa.load(filepath, sr=sr, offset=in_offset, duration=in_duration) @@ -263,7 +275,9 @@ def generate_overlap_vad_seq( if out_dir: overlap_out_dir = out_dir else: - overlap_out_dir = frame_pred_dir + "/overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap) + overlap_out_dir = os.path.join( + frame_pred_dir, "overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap) + ) if not os.path.exists(overlap_out_dir): os.mkdir(overlap_out_dir) @@ -692,7 +706,12 @@ def generate_vad_segment_table_per_file(pred_filepath: str, per_args: dict) -> s def generate_vad_segment_table( - vad_pred_dir: str, postprocessing_params: dict, frame_length_in_sec: float, num_workers: int, out_dir: str = None, + vad_pred_dir: str, + postprocessing_params: dict, + frame_length_in_sec: float, + num_workers: int, + out_dir: str = None, + use_rttm: bool = False, ) -> str: """ Convert frame level prediction to speech segment in start and end times format. @@ -706,27 +725,26 @@ def generate_vad_segment_table( out_dir (str): output dir of generated table/csv file. num_workers(float): number of process for multiprocessing Returns: - table_out_dir(str): directory of the generated table. + out_dir(str): directory of the generated table. """ suffixes = ("frame", "mean", "median") vad_pred_filepath_list = [os.path.join(vad_pred_dir, x) for x in os.listdir(vad_pred_dir) if x.endswith(suffixes)] - if out_dir: - table_out_dir = out_dir - else: - table_out_dir_name = "table_output_tmp_" + if not out_dir: + out_dir_name = "seg_output" for key in postprocessing_params: - table_out_dir_name = table_out_dir_name + str(key) + str(postprocessing_params[key]) + "_" + out_dir_name = out_dir_name + "-" + str(key) + str(postprocessing_params[key]) - table_out_dir = os.path.join(vad_pred_dir, table_out_dir_name) + out_dir = os.path.join(vad_pred_dir, out_dir_name) - if not os.path.exists(table_out_dir): - os.mkdir(table_out_dir) + if not os.path.exists(out_dir): + os.mkdir(out_dir) per_args = { "frame_length_in_sec": frame_length_in_sec, - "out_dir": table_out_dir, + "out_dir": out_dir, + "use_rttm": use_rttm, } per_args = {**per_args, **postprocessing_params} num_workers = None @@ -741,12 +759,11 @@ def generate_vad_segment_table( leave=True, ) ) - else: for vad_pred_filepath in tqdm(vad_pred_filepath_list, desc='creating speech segments', leave=True): generate_vad_segment_table_per_file(vad_pred_filepath, per_args) - return table_out_dir + return out_dir def generate_vad_segment_table_per_file_star(args): @@ -955,33 +972,50 @@ def pred_rttm_map(vad_pred: str, groundtruth_RTTM: str, vad_pred_method: str = " def plot( path2audio_file: str, - path2_vad_pred: str, - path2ground_truth_label: str = None, + path2_vad_pred: Optional[str] = None, + path2groundtruth_rttm: Optional[str] = None, + groundtruth_labels: Optional[str] = None, + sample_rate: int = 16000, offset: float = 0, duration: float = None, threshold: float = None, per_args: dict = None, + unit_frame_len: float = 0.01, + label_repeat: int = 1, + xticks_step: int = 5, ) -> ipd.Audio: """ - Plot VAD outputs for demonstration in tutorial + Plot Audio and/or VAD output and/or groundtruth labels for visualization Args: path2audio_file (str): path to audio file. path2_vad_pred (str): path to vad prediction file, - path2ground_truth_label(str): path to groundtruth label file. + path2groundtruth_rttm(str): path to groundtruth RTTM file. + ground_truth_labels(str): a list of groundtruth label. + sample_rate (int): sample rate of audio file. + offset (float): offset in seconds. + duration (float): duration in seconds. threshold (float): threshold for prediction score (from 0 to 1). per_args(dict): a dict that stores the thresholds for postprocessing. + unit_frame_len (float): unit frame length in seconds for VAD predictions. + label_repeat (int): repeat the label for this number of times to match different frame lengths in preds and labels. + xticks_step (int): step size for xticks. """ plt.figure(figsize=[20, 2]) - UNIT_FRAME_LEN = 0.01 - audio, sample_rate = librosa.load(path=path2audio_file, sr=16000, mono=True, offset=offset, duration=duration) + audio, sample_rate = librosa.load( + path=path2audio_file, sr=sample_rate, mono=True, offset=offset, duration=duration + ) dur = librosa.get_duration(y=audio, sr=sample_rate) - time = np.arange(offset, offset + dur, UNIT_FRAME_LEN) - frame, _ = load_tensor_from_file(path2_vad_pred) - frame_snippet = frame[int(offset / UNIT_FRAME_LEN) : int((offset + dur) / UNIT_FRAME_LEN)] + time = np.arange(offset, offset + dur, unit_frame_len) + len_pred = int(dur / unit_frame_len) + 1 + + frame_snippet = None + if path2_vad_pred: + frame, _ = load_tensor_from_file(path2_vad_pred) + frame_snippet = frame[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)] + len_pred = len(frame_snippet) - len_pred = len(frame_snippet) ax1 = plt.subplot() ax1.plot(np.arange(audio.size) / sample_rate, audio, 'gray') ax1.set_xlim([0, int(dur) + 1]) @@ -995,27 +1029,41 @@ def plot( if not threshold and not per_args: raise ValueError("One and only one of threshold and per_args must have been used!") - if threshold: + if threshold and frame_snippet is not None: pred_snippet = np.where(frame_snippet >= threshold, 1, 0) - if per_args: + elif per_args and frame_snippet is not None: _, per_args_float = prepare_gen_segment_table( frame, per_args ) # take whole frame here for calculating onset and offset speech_segments = generate_vad_segment_table_per_tensor(frame, per_args_float) pred = gen_pred_from_speech_segments(speech_segments, frame) - pred_snippet = pred[int(offset / UNIT_FRAME_LEN) : int((offset + dur) / UNIT_FRAME_LEN)] + pred_snippet = pred[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)] + else: + pred_snippet = None + + if path2groundtruth_rttm and path2groundtruth_rttm.endswith('.rttm'): + label = extract_labels(path2groundtruth_rttm, time) + elif groundtruth_labels: + label = [float(x) for x in groundtruth_labels] + if label_repeat > 1: + label = np.repeat(label, label_repeat) + label = label[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)] + else: + label = None - if path2ground_truth_label: - label = extract_labels(path2ground_truth_label, time) - ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, label, 'r', label='label') + if label is not None: + ax2.plot(np.arange(len_pred) * unit_frame_len, label, 'r', label='label') + if pred_snippet is not None: + ax2.plot(np.arange(len_pred) * unit_frame_len, pred_snippet, 'b', label='pred') + if frame_snippet is not None: + ax2.plot(np.arange(len_pred) * unit_frame_len, frame_snippet, 'g--', label='speech prob') - ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, pred_snippet, 'b', label='pred') - ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, frame_snippet, 'g--', label='speech prob') ax2.tick_params(axis='y', labelcolor='r') ax2.legend(loc='lower right', shadow=True) ax2.set_ylabel('Preds and Probas') ax2.set_ylim([-0.1, 1.1]) - return ipd.Audio(audio, rate=16000) + ax2.set_xticks(np.arange(0, int(dur) + 1, xticks_step)) + return ipd.Audio(audio, rate=sample_rate) def gen_pred_from_speech_segments( @@ -1038,11 +1086,11 @@ def gen_pred_from_speech_segments( def extract_labels(path2ground_truth_label: str, time: list) -> list: """ Extract ground-truth label for given time period. - path2ground_truth_label (str): path of groundtruth label file + path2ground_truth_label (str): path of groundtruth RTTM file time (list) : a list of array representing time period. """ - data = pd.read_csv(path2ground_truth_label, sep=" ", delimiter=None, header=None) + data = pd.read_csv(path2ground_truth_label, sep="\s+", delimiter=None, header=None) data = data.rename(columns={3: "start", 4: "dur", 7: "speaker"}) labels = [] for pos in time: @@ -1086,9 +1134,14 @@ def generate_vad_frame_pred( else: log_probs = vad_model(input_signal=test_batch[0], input_signal_length=test_batch[1]) probs = torch.softmax(log_probs, dim=-1) + if len(probs.shape) == 3 and probs.shape[0] == 1: + # squeeze the batch dimension, since batch size is 1 for frame-VAD + probs = probs.squeeze(0) # [1,T,C] -> [T,C] pred = probs[:, 1] - if status[i] == 'start': + if window_length_in_sec == 0: + to_save = pred + elif status[i] == 'start': to_save = pred[:-trunc] elif status[i] == 'next': to_save = pred[trunc:-trunc_l] @@ -1097,6 +1150,7 @@ def generate_vad_frame_pred( else: to_save = pred + to_save = to_save.cpu().tolist() all_len += len(to_save) outpath = os.path.join(out_dir, data[i] + ".frame") with open(outpath, "a", encoding='utf-8') as fout: @@ -1125,6 +1179,21 @@ def init_vad_model(model_path: str): return vad_model +def init_frame_vad_model(model_path: str): + """ + Initiate VAD model with model path + """ + if model_path.endswith('.nemo'): + logging.info(f"Using local VAD model from {model_path}") + vad_model = EncDecFrameClassificationModel.restore_from(restore_path=model_path) + elif model_path.endswith('.ckpt'): + vad_model = EncDecFrameClassificationModel.load_from_checkpoint(checkpoint_path=model_path) + else: + logging.info(f"Using NGC cloud VAD model {model_path}") + vad_model = EncDecFrameClassificationModel.from_pretrained(model_name=model_path) + return vad_model + + def stitch_segmented_asr_output( segmented_output_manifest: str, speech_segments_tensor_dir: str = "speech_segments", @@ -1238,32 +1307,6 @@ def construct_manifest_eval( return aligned_vad_asr_output_manifest -def extract_audio_features(vad_model: EncDecClassificationModel, manifest_vad_input: str, out_dir: str) -> str: - """ - Extract audio features and write to out_dir - """ - - file_list = [] - with open(manifest_vad_input, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - file_list.append(Path(json.loads(line)['audio_filepath']).stem) - - logging.info(f"Extracting features on {len(file_list)} audio files/json lines!") - - for i, test_batch in enumerate(tqdm(vad_model.test_dataloader(), total=len(vad_model.test_dataloader()))): - test_batch = [x.to(vad_model.device) for x in test_batch] - with autocast(): - processed_signal, processed_signal_length = vad_model.preprocessor( - input_signal=test_batch[0], length=test_batch[1], - ) - processed_signal = processed_signal.squeeze(0)[:, :processed_signal_length] - processed_signal = processed_signal.cpu() - outpath = os.path.join(out_dir, file_list[i] + ".pt") - torch.save(processed_signal, outpath) - del test_batch - return out_dir - - def load_rttm_file(filepath: str) -> pd.DataFrame: """ Load rttm file and extract speech segments @@ -1321,7 +1364,7 @@ def load_speech_overlap_segments_from_rttm(rttm_file: str) -> Tuple[List[List[fl Returns: merged (List[List[float]]): merged speech intervals without overlaps - overlaps (List[List[float]]): intervals without overlap speech + overlaps (List[List[float]]): intervals with overlap speech """ speech_segments = list(load_rttm_file(rttm_file)['segment']) speech_segments = [list(x) for x in speech_segments] @@ -1367,7 +1410,9 @@ def get_nonspeech_segments( return nonspeech_segments -def get_frame_labels(segments: List[List[float]], frame_length: float, offset: float, duration: float) -> str: +def get_frame_labels( + segments: List[List[float]], frame_length: float, offset: float, duration: float, as_str: bool = True +) -> str: """ Generate frame-level binary labels for audio, '0' for non-speech and '1' for speech @@ -1379,30 +1424,39 @@ def get_frame_labels(segments: List[List[float]], frame_length: float, offset: f """ labels = [] n_frames = int(np.ceil(duration / frame_length)) - sid = 0 for i in range(n_frames): t = offset + i * frame_length while sid < len(segments) - 1 and segments[sid][1] < t: sid += 1 - if segments[sid][0] <= t <= segments[sid][1]: - labels.append('1') + if segments[sid][1] != 0 and segments[sid][0] <= t <= segments[sid][1]: + labels.append(1) else: - labels.append('0') - return ' '.join(labels) + labels.append(0) + if as_str: + return ' '.join([str(x) for x in labels]) + return [float(x) for x in labels] def plot_sample_from_rttm( - audio_file: str, rttm_file: str, max_duration: Optional[float] = None, save_path: str = "", show: bool = True + audio_file: str, + rttm_file: str, + max_duration: Optional[float] = None, + save_path: str = "", + show: bool = True, + offset: float = 0.0, + unit_frame_len: float = 0.01, ): + """ + Plot audio signal and frame-level labels from RTTM file + """ plt.figure(figsize=[20, 2]) - UNIT_FRAME_LEN = 0.01 - audio, sample_rate = librosa.load(path=audio_file, sr=16000, mono=True, offset=0, duration=max_duration) + audio, sample_rate = librosa.load(path=audio_file, sr=16000, mono=True, offset=offset, duration=max_duration) dur = librosa.get_duration(y=audio, sr=sample_rate) segments = load_speech_segments_from_rttm(rttm_file) - labels = get_frame_labels(segments, UNIT_FRAME_LEN, 0.0, dur) + labels = get_frame_labels(segments, unit_frame_len, offset, dur) labels = [float(x) for x in labels.split()] length = len(labels) @@ -1415,7 +1469,7 @@ def plot_sample_from_rttm( ax1.set_ylim([-1, 1]) ax2 = ax1.twinx() - ax2.plot(np.arange(length) * UNIT_FRAME_LEN, labels, 'r', label='label') + ax2.plot(np.arange(length) * unit_frame_len, labels, 'r', label='label') ax2.tick_params(axis='y', labelcolor='r') ax2.legend(loc='lower right', shadow=True) ax2.set_ylabel('Labels') @@ -1425,3 +1479,240 @@ def plot_sample_from_rttm( if save_path: plt.savefig(save_path) return ipd.Audio(audio, rate=16000) + + +def align_labels_to_frames(probs, labels, threshold=0.2): + """ + Aligns labels to frames when the frame length (e.g., 10ms) is different from the label length (e.g., 20ms). + The threshold 0.2 is not important, since the actual ratio will always be close to an integer unless using frame/label + lengths that are not multiples of each other (e.g., 15ms frame length and 20ms label length), which is not valid. + The value 0.2 here is just for easier unit testing. + Args: + probs (List[float]): list of probabilities + labels (List[int]): list of labels + threshold (float): threshold for rounding ratio to integer + Returns: + labels (List[int]): list of labels aligned to frames + """ + frames_len = len(probs) + labels_len = len(labels) + probs = torch.tensor(probs).float() + labels = torch.tensor(labels).long() + + if frames_len < labels_len: + # pad labels with zeros until labels_len is a multiple of frames_len + ratio = labels_len / frames_len + res = labels_len % frames_len + if ( + ceil(ratio) - ratio < threshold + ): # e.g., ratio = 2.9, ceil(ratio) = 3, then we pad labels to make it a multiple of 3 + # pad labels with zeros until labels_max_len is a multiple of logits_max_len + labels = labels.tolist() + if len(labels) % ceil(ratio) != 0: + labels += [0] * (ceil(ratio) - len(labels) % ceil(ratio)) + labels = torch.tensor(labels).long() + labels = labels.view(-1, ceil(ratio)).amax(1) + return align_labels_to_frames(probs.tolist(), labels.long().tolist()) + # otherwise, truncate additional labels until labels_max_len is a multiple of logits_max_len + if res > 0: + labels = labels[:-res] + labels = labels.view(-1, floor(ratio)).amax(1) + return labels.long().tolist() + elif frames_len > labels_len: + # repeat labels until labels_len is a multiple of frames_len + ratio = frames_len / labels_len + res = frames_len % labels_len + if ceil(ratio) - ratio < threshold: + # e.g., ratio is 1.83, ceil(ratio) = 2, then we repeat labels to make it a multiple of 2, and discard the redundant labels + labels = labels.repeat_interleave(ceil(ratio), dim=0).long().tolist() + labels = labels[:frames_len] + else: + # e.g., ratio is 2.02, floor(ratio) = 2, then we repeat labels to make it a multiple of 2 and add additional labels + labels = labels.repeat_interleave(floor(ratio), dim=0).long().tolist() + if res > 0: + labels += labels[-res:] + return labels + else: + return labels.long().tolist() + + +def read_rttm_as_pyannote_object(rttm_file: str, speaker_override: Optional[str] = None) -> Annotation: + """ + Read rttm file and construct a Pyannote object. + Args: + rttm_file(str) : path of rttm file. + speaker_override(str) : if not None, all speakers will be replaced by this value. + Returns: + annotation(pyannote.Annotation): annotation object + """ + annotation = Annotation() + data = pd.read_csv(rttm_file, sep="\s+", delimiter=None, header=None) + data = data.rename(columns={3: "start", 4: "dur", 7: "speaker"}) + for index, row in data.iterrows(): + if speaker_override is not None: + annotation[Segment(row['start'], row['start'] + row['dur'])] = speaker_override + else: + annotation[Segment(row['start'], row['start'] + row['dur'])] = row['speaker'] + return annotation + + +def convert_labels_to_speech_segments(labels: List[float], frame_length_in_sec: float = 0.01): + """ + Convert a list of labels to a list of speech segments. + Args: + labels (List[float]): list of labels + frame_length_in_sec (float): frame length in seconds + Returns: + segments (List[Tuple[float, float]]): list of speech segments + """ + segments = [] + start = -1 + for i, label in enumerate(labels): + if label == 1: + if start == -1: + start = i * frame_length_in_sec + else: + if start > -1: + segments.append([start, (i - 1) * frame_length_in_sec]) + start = -1 + if start != -1: + segments.append([start, (len(labels) - 1) * frame_length_in_sec]) + return segments + + +def frame_vad_construct_pyannote_object_per_file( + prediction: Union[str, List[float]], groundtruth: Union[str, List[float]], frame_length_in_sec: float = 0.01 +) -> Tuple[Annotation, Annotation]: + """ + Construct a Pyannote object for evaluation. + Args: + prediction (str) : path of VAD predictions stored as RTTM or CSV-like txt. + groundtruth (str): path of groundtruth rttm file. + frame_length_in_sec(float): frame length in seconds + Returns: + reference(pyannote.Annotation): groundtruth + hypothesis(pyannote.Annotation): prediction + """ + + hypothesis = Annotation() + if isinstance(groundtruth, str) and prediction.endswith('.rttm'): + hypothesis = read_rttm_as_pyannote_object(prediction, speaker_override='speech') + elif isinstance(groundtruth, str) and prediction.endswith('.txt'): + pred = pd.read_csv(prediction, sep=" ", header=None) + for index, row in pred.iterrows(): + hypothesis[Segment(float(row[0]), float(row[0]) + float(row[1]))] = 'speech' + elif isinstance(groundtruth, list): + segments = convert_labels_to_speech_segments(prediction, frame_length_in_sec) + for segment in segments: + hypothesis[Segment(segment[0], segment[1])] = 'speech' + else: + raise ValueError('prediction must be a path to rttm file or a list of frame labels.') + + reference = Annotation() + if isinstance(groundtruth, str) and groundtruth.endswith('.rttm'): + reference = read_rttm_as_pyannote_object(groundtruth, speaker_override='speech') + elif isinstance(groundtruth, list): + segments = convert_labels_to_speech_segments(groundtruth, frame_length_in_sec) + for segment in segments: + reference[Segment(segment[0], segment[1])] = 'speech' + else: + raise ValueError('groundtruth must be a path to rttm file or a list of frame labels.') + return reference, hypothesis + + +def frame_vad_infer_load_manifest(cfg: DictConfig): + """ + Load manifest file and prepare label/rttm mapping + Args: + cfg: config file + Returns: + manifest_orig (List[Dict]): original manifest data + key_labels_map (Dict): mapping from unique_audio_name to its labels + key_rttm_map (Dict): mapping from unique_audio_name to its rttm file + """ + unique_audio_names = set() + key_labels_map = {} + key_rttm_map = {} + manifest_orig = [] + manifest_file = Path(cfg.dataset).absolute().as_posix() + with open(manifest_file, 'r') as fin: + for line in fin.readlines(): + entry = json.loads(line.strip()) + audio_filepath = get_full_path(audio_file=entry['audio_filepath'], manifest_file=manifest_file) + entry['audio_filepath'] = str(audio_filepath) + uniq_audio_name = Path(audio_filepath).stem + + if uniq_audio_name in unique_audio_names: + raise ValueError("Please make sure each line is with different audio_filepath! ") + else: + unique_audio_names.add(uniq_audio_name) + + manifest_orig.append(entry) + + # always prefer RTTM labels if exist + if "label" not in entry and ("rttm_filepath" in entry or "rttm_file" in entry): + rttm_key = "rttm_filepath" if "rttm_filepath" in entry else "rttm_file" + segments = load_speech_segments_from_rttm(entry[rttm_key]) + label_str = get_frame_labels( + segments=segments, + frame_length=cfg.vad.parameters.shift_length_in_sec, + duration=entry['duration'], + offset=entry['offset'], + ) + key_rttm_map[uniq_audio_name] = entry[rttm_key] + key_labels_map[uniq_audio_name] = [float(x) for x in label_str.split()] + elif entry.get("label", None) is not None: + key_labels_map[uniq_audio_name] = [float(x) for x in entry["label"].split()] + elif cfg.evaluate: + raise ValueError("Must have either `label` or `rttm_filepath` in manifest when evaluate=True") + + return manifest_orig, key_labels_map, key_rttm_map + + +def frame_vad_eval_detection_error( + pred_dir: str, key_labels_map: dict, key_rttm_map: dict, key_pred_rttm_map: dict, frame_length_in_sec: float +): + """ + Perform evaluation on frame-VAD results + Args: + pred_dir: directory of frame-VAD prediction files with in `.frame` format + key_labels_map: dictionary of mapping each to its labels + key_rttm_map: dictionary of mapping each to its GROUNDTRUTH rttm file + key_pred_rttm_map: dictionary of mapping each to its PREDICTED rttm file + frame_length_in_sec: frame length in seconds, e.g. 0.02s + Returns: + auroc: AUROC score in 0~100% + report: Pyannote detection.DetectionErrorRate() report + """ + all_probs = [] + all_labels = [] + metric = detection.DetectionErrorRate() + key_probs_map = {} + predictions_list = list(Path(pred_dir).glob("*.frame")) + for frame_pred in tqdm(predictions_list, desc="Evaluating VAD results", total=len(predictions_list)): + pred_probs = [] + with frame_pred.open("r") as fin: + for line in fin.readlines(): + line = line.strip() + if not line: + continue + pred_probs.append(float(line)) + key = frame_pred.stem + key_probs_map[key] = pred_probs + key_labels_map[key] = align_labels_to_frames(probs=pred_probs, labels=key_labels_map[key]) + all_probs.extend(key_probs_map[key]) + all_labels.extend(key_labels_map[key]) + + if key in key_rttm_map: + groundtruth = key_rttm_map[key] + else: + groundtruth = key_labels_map[key] + + reference, hypothesis = frame_vad_construct_pyannote_object_per_file( + prediction=key_pred_rttm_map[key], groundtruth=groundtruth, frame_length_in_sec=frame_length_in_sec, + ) + metric(reference, hypothesis) + + auroc = roc_auc_score(y_true=all_labels, y_score=all_probs) + report = metric.report(display=False) + return auroc, report diff --git a/nemo/collections/common/data/__init__.py b/nemo/collections/common/data/__init__.py index afb12338e548..ecc67ef05ea5 100644 --- a/nemo/collections/common/data/__init__.py +++ b/nemo/collections/common/data/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.collections.common.data.dataset import ConcatDataset, ConcatMapDataset +from nemo.collections.common.data.dataset import CodeSwitchedDataset, ConcatDataset, ConcatMapDataset diff --git a/nemo/collections/common/data/dataset.py b/nemo/collections/common/data/dataset.py index 030e997802bc..5b4fba5ef24a 100644 --- a/nemo/collections/common/data/dataset.py +++ b/nemo/collections/common/data/dataset.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import io import logging -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Tuple, Union import numpy as np +import torch import torch.utils.data as pt_data from torch.utils.data import Dataset, IterableDataset -__all__ = ['ConcatDataset', 'ConcatMapDataset'] +__all__ = ['ConcatDataset', 'ConcatMapDataset', 'CodeSwitchedDataset'] class ConcatDataset(IterableDataset): @@ -286,3 +288,372 @@ def __len__(self): def __getitem__(self, idx): dataset_id, dataset_index = self.indices[idx] return self.datasets[dataset_id][dataset_index] + + +class CodeSwitchedDataset(IterableDataset): + """ + A dataset that accepts as argument multiple sub-datasets (usually from different languages, but that's not required) and then + samples from them in order to create synthetic code-switched samples of up to N different sub-datasets + Args: + datasets (list): A list of datasets + lang_probs (list): A list of probabilities (which must sum to 1) corresponding to the sampling probability for each dataset + shuffle (bool): Whether to shuffle individual datasets. Only works with non-iterable datasets. + Defaults to True. + min_duration (int): the minimum duration (secs) of each synthetic code-switched sample. Will draw randomly until this is hit. + Defaults to 4 + max_duration (int): the maximum duration (secs) of each synthetic code-switched sample. + Defaults to 20 + min_monolingual (float): this percentage of the dataset will be original monolingual samples + Defaults to 0.3 - means 30% + db_norm (float): will normalise the composite CS sample to this DB level + Defaults to -25.0 + pause_start (int): inserts silence equal to this value (msecs) at the start of each CS sample + Defaults to 0 + pause_join (int): inserts silence equal to this value (msecs) between all language changes in the CS sample + Defaults to 0 + pause_end (int): terminates all CS samples with silence equal to this value (msecs) + Defaults to 0 + sampling_scales (list or float): gives you the ability to upsample/downsample each individual dataset + seed: Optional value to seed the numpy RNG. + global_rank (int): Worker rank, used for partitioning map style datasets. Defaults to 0. + world_size (int): Total number of processes, used for partitioning map style datasets. Defaults to 1. + pure_random (bool): If true, then always draw random sample from lang_probs. If false, you only draw from those datasets + which you haven't sampled from yet for the composite sample + force_monochannel (bool): If true, then all output audio will be mono-channel + infinity_mode (bool): If true, then the dataset iterable will generate an infinite amount of samples + sample_rate (int): the sample rate of all audio being sent to this Dataset + augmentor (AudioAugmentor): The any perturbations you wish to have applied on the CS samples + """ + + def __init__( + self, + datasets: List[Any], + lang_probs: Optional[List[float]] = None, + shuffle: bool = True, + min_duration: int = 4, + max_duration: int = 20, + min_monolingual: float = 0.3, + db_norm: float = -25.0, + pause_start: int = 0, + pause_join: int = 0, + pause_end: int = 0, + sampling_scales: Optional[Union[float, List[float]]] = None, + seed: Optional[int] = None, + global_rank: int = 0, + world_size: int = 1, + pure_random: bool = False, + force_monochannel: bool = True, + infinity_mode: bool = False, + sample_rate: int = 16000, + augmentor: Optional['AudioAugmentor'] = None, + ): + super().__init__() + + if len(datasets) == 0: + raise ValueError("CodeSwitchedDataset must receive a non-zero length datasets dict object") + + self.datasets = datasets + self.langs = list(range(len(datasets))) + self.langs_set = set(self.langs) + self.lang_iterables = {k: None for k in self.langs} + self.lang_kind = {k: None for k in self.langs} + self.shuffle = shuffle + self.min_duration = min_duration + self.max_duration = max_duration + self.min_monolingual = min_monolingual + self.db_norm = db_norm + self.pause_start = pause_start + self.pause_join = pause_join + self.pause_end = pause_end + self.pure_random = pure_random + self.force_monochannel = force_monochannel + self.infinity_mode = infinity_mode + self.global_rank = global_rank + self.world_size = world_size + self.augmentor = augmentor + self.sample_rate = sample_rate + self.length = 0 + if lang_probs is None: + self.prob_dict = {l: 1.0 / len(self.langs) for l in self.langs} + else: + assert len(self.langs) == len( + lang_probs + ), "Size mismatch between languages and respective probs in CodeSwitchedDataset" + self.prob_dict = {l: lang_probs[l] for l in self.langs} + self.lang_probs = np.array(list(self.prob_dict.values())) + if sampling_scales is not None and not isinstance(sampling_scales, list): + self.sampling_scales = {k: sampling_scales for k in self.langs} + elif ( + sampling_scales is not None + and isinstance(sampling_scales, list) + and len(sampling_scales) == len(self.langs) + ): + self.sampling_scales = {k: v for k, v in zip(self.langs, sampling_scales)} + else: + self.sampling_scales = {k: 1 for k in self.langs} + + for lang, dataset in enumerate(self.datasets): + isiterable = isinstance(dataset, IterableDataset) + + if isiterable: + self.lang_kind[lang] = 'iterable' + self.length += int(len(dataset) * self.sampling_scales[lang]) + else: + self.lang_kind[lang] = 'map' + self.length += int((len(dataset) // world_size) * self.sampling_scales[lang]) + + if seed is not None: + np.random.seed(seed) + + # set this to ensure compatibility with models searching for the collate_fn + # since this class stores datasets as a dict, not list + # self.collate_fn = self.datasets[self.langs[0]].collate_fn + if hasattr(self.datasets[self.langs[0]], 'collate_fn'): + self.collate_fn = self.datasets[self.langs[0]].collate_fn + elif ( + hasattr(self.datasets[self.langs[0]], 'datasets') + and isinstance(self.datasets[self.langs[0]].datasets, list) + and len(self.datasets[self.langs[0]].datasets) > 0 + and hasattr(self.datasets[self.langs[0]].datasets[0], 'collate_fn') + ): + # support datasets that are lists of entries + self.collate_fn = self.datasets[self.langs[0]].datasets[0].collate_fn + elif ( + hasattr(self.datasets[self.langs[0]], 'datasets') + and isinstance(self.datasets[self.langs[0]].datasets, list) + and len(self.datasets[self.langs[0]].datasets) > 0 + and hasattr(self.datasets[self.langs[0]].datasets[0], 'datasets') + and isinstance(self.datasets[self.langs[0]].datasets[0].datasets, list) + and len(self.datasets[self.langs[0]].datasets[0].datasets) > 0 + and hasattr(self.datasets[self.langs[0]].datasets[0].datasets[0], 'collate_fn') + ): + # support datasets that are lists of lists + self.collate_fn = self.datasets[self.langs[0]].datasets[0].datasets[0].collate_fn + else: + raise RuntimeError("CodeSwitchedDataset could not locate a valid dataset collate_fn to bind to") + + # this method returns an iterator object for a given language ID + # it correctly handles whether the underlying dataset is IterableDataset or mappable + def get_iterable_by_lang(self, lang): + dataset = self.datasets[lang] + + if isinstance(dataset, IterableDataset): + return dataset.__iter__() + else: + indices = np.arange(len(dataset)) + if self.shuffle: + np.random.shuffle(indices) + return iter(indices) + + # this method is the main function which builds and returns a composite, synthetic code-switched + # utterance on the fly. It automatically works with all of the class-based variables stored to create + # the synthetic utterance + def build_single_CS_sample(self): + # get_sample_from_language returns a LongTensor for the transcripts so we create a LongTensor to hold + # all returned transcripts + comp_text = torch.LongTensor([]) + created_sample_duration_sec = 0 + created_sample_langs = [] + created_sample_audios = [] + + # if min_monolingual fires, it means we will just return a single, original monolingual utterance + # from one of our languages based on that language's probability + pure_mono = np.random.rand() <= self.min_monolingual + + # we continue to add to the composite utterance until we hit the min_duration + while created_sample_duration_sec < self.min_duration: + # we sample from only those languages which haven't already been sampled for this particular + # synthetic utterance, unless pure_random=True, in which case, you just sample with replacement + # every time + if (self.pure_random and not pure_mono) or ( + len(set(created_sample_langs)) == 0 or len(set(created_sample_langs)) == len(self.langs) + ): + lang_id = np.random.choice(self.langs, p=self.lang_probs) + # elif pure_mono: + # use this approach if you want synthetic utterances which are all monolingual + # lang_id = created_sample_langs[0] + else: + # this code is for when we need to sample from only those languages which haven't been sampled + # yet for this utterance + p = np.array(list(map(self.prob_dict.get, list(self.langs_set - set(created_sample_langs))))) + p = p / p.sum() + lang_id = np.random.choice(list(self.langs_set - set(created_sample_langs)), p=p) + + audio, audio_len, labels, labels_len, *_ = self.get_sample_from_language(lang_id) + + # in case you get an audio which is all silence we keep sampling + if audio.count_nonzero().item() == 0: + continue + + sample_duration = len(audio) / self.sample_rate + if (created_sample_duration_sec + sample_duration) > self.max_duration: + continue + + if comp_text.device != labels.device: + comp_text = comp_text.to(labels.device) + + if audio.ndim > 1 and self.force_monochannel: + audio = audio.mean(dim=-1) + + created_sample_duration_sec += sample_duration + created_sample_langs.append(lang_id) + # need to use numpy instead of torch here because we need numpy's trim_zeros function + created_sample_audios.append(audio.cpu().numpy()) + comp_text = torch.cat([comp_text, labels], dim=0) + + # we want a real, non-synth pure_mono sample so we break soon as we have one + if pure_mono: + break + + # check that all samples have the same number of channels + sample_channels = list(set([s.ndim for s in created_sample_audios])) + if len(sample_channels) > 1: + raise RuntimeError( + "Mixture of audios with different number of channels in CodeSwitchedDataset. All sources must be same number of channels." + ) + + multichannel = sample_channels[0] > 1 + + # we start with pause_start amount of silence (zero array) which needs the correct shape for multi/mono channel + if multichannel: + comp_audio = np.zeros( + shape=(int(self.pause_start * self.sample_rate / 1000.0), created_sample_audios[0].shape[-1]), + dtype=created_sample_audios[0].dtype, + ) + else: + comp_audio = np.zeros( + shape=(int(self.pause_start * self.sample_rate / 1000.0),), dtype=created_sample_audios[0].dtype + ) + + # iterate over all mono-lingual samples to build the final composite + for idx, wav in enumerate(created_sample_audios): + if not multichannel: + # this function only works if mono-channel + wav = np.trim_zeros(wav) + + # normalise to provided DB level + wav_norm = wav * (10.0 ** (self.db_norm / 20.0) / np.maximum(0.01, (wav ** 2).mean(axis=0) ** 0.5)) + + # this part appends the normed waveform to the existing waveform, and inserts pause_join amount of silence + # if necessary, otherwise just a straight append + if idx < len(created_sample_audios) - 1: + if multichannel: + wav_norm = np.append( + wav_norm, + np.zeros( + shape=( + int(self.pause_join * self.sample_rate / 1000.0), + created_sample_audios[0].shape[-1], + ), + dtype=comp_audio.dtype, + ), + axis=0, + ) + else: + wav_norm = np.append( + wav_norm, + np.zeros(shape=(int(self.pause_join * self.sample_rate / 1000.0),), dtype=comp_audio.dtype), + axis=0, + ) + + # this is the penultimate composite wavform, just need to add pause_end silence + comp_audio = np.append(comp_audio, wav_norm, axis=0) + + # here we add the pause_end amount of silence, in correct channel shape + if multichannel: + comp_audio = np.append( + comp_audio, + np.zeros( + shape=(int(self.pause_end * self.sample_rate / 1000.0), created_sample_audios[0].shape[-1]), + dtype=comp_audio.dtype, + ), + axis=0, + ) + else: + comp_audio = np.append( + comp_audio, + np.zeros(shape=(int(self.pause_end * self.sample_rate / 1000.0),), dtype=comp_audio.dtype), + axis=0, + ) + + # we only want augmentation to happen on the final, synthetic utterance, and not on any of the individual + # languages, which is why we set augmentor=None when building the individual language datasets in audio_to_text_dataset.get_code_switched_dataset + # here we now apply augmentation to the final, synthetic utterance only + # all of this logic here happens in-memory, nothing is written to disk + if self.augmentor is not None: + # import here to avoid circular import error + # import here because otherwise CI test-nlp-imports fails since soundfile is only in requirements_asr and not in requirements_common + import soundfile as sf + + from nemo.collections.asr.parts.preprocessing import AudioSegment + + mb = io.BytesIO() + sf.write(mb, comp_audio, self.sample_rate, format='WAV') + mb.seek(0) + comp_audio_as = AudioSegment.from_file(mb, target_sr=self.sample_rate) + self.augmentor.perturb(comp_audio_as) + comp_audio = comp_audio_as.samples + + return ( + torch.tensor(comp_audio, dtype=audio.dtype, device=audio.device), + torch.tensor(len(comp_audio), device=audio_len.device).long(), + comp_text, + torch.tensor(len(comp_text), device=labels_len.device).long(), + ) + + # this is a helper method which prepares all of the iterator objects for all languages + # based on whether that language's underlying dataset is a map or an IterableDataset + def prep_underlying_datasets(self): + worker_info = pt_data.get_worker_info() + if worker_info is None: + max_elements = self.length + wid = 0 + wnum = 1 + else: + wid = worker_info.id + wnum = worker_info.num_workers + max_elements = len(range(wid, self.length, wnum)) + + for lang in self.langs: + if self.lang_kind[lang] == 'map': + start_idx = (len(self.datasets[lang]) // self.world_size) * self.global_rank + end_idx = start_idx + (len(self.datasets[lang]) // self.world_size) + if self.global_rank == self.world_size - 1: + end_idx = len(self.datasets[lang]) + indices = range(start_idx + wid, end_idx, wnum) + self.datasets[lang] = pt_data.Subset(self.datasets[lang], indices) + + self.lang_iterables[lang] = self.get_iterable_by_lang(lang) + + return max_elements + + # returns a sample (audio and transcript) from any underlying language stored by the class on instantiation + # the sample returned is a tensor for the audio and a tensor of ints for the transcript + # this method automatically handles StopIteration errors for the underyling language and rebuilds + # the iterator if necessary + def get_sample_from_language(self, lang): + while True: + try: + val = next(self.lang_iterables[lang]) + if self.lang_kind[lang] == 'map': + val = self.datasets[lang][val] + return val + except StopIteration: + self.lang_iterables[lang] = self.get_iterable_by_lang(lang) + + def __iter__(self): + # we create primed iterators for all languages and return the grand total of samples for each + # underlying language as a sum + max_elements = self.prep_underlying_datasets() + + if self.infinity_mode: + while True: + yield self.build_single_CS_sample() + else: + n = 0 + while n < max_elements: + yield self.build_single_CS_sample() + n += 1 + + def __len__(self): + return self.length diff --git a/nemo/collections/common/metrics/global_average_loss_metric.py b/nemo/collections/common/metrics/global_average_loss_metric.py index fae1dbfea5e8..3bbd4d13abf4 100644 --- a/nemo/collections/common/metrics/global_average_loss_metric.py +++ b/nemo/collections/common/metrics/global_average_loss_metric.py @@ -28,9 +28,6 @@ class GlobalAverageLossMetric(Metric): See :doc:`PyTorch Lightning Metrics` for the metric usage instruction. Args: - compute_on_step: - The method :meth:`forward` only calls ``update()`` and returns ``None`` if this is set to ``False``. - default: ``True`` dist_sync_on_step: Synchronize metric state across processes at each method :meth:`forward` call before returning the value at the step @@ -44,10 +41,8 @@ class GlobalAverageLossMetric(Metric): full_state_update = True - def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, take_avg_loss=True): - super().__init__( - compute_on_step=compute_on_step, dist_sync_on_step=dist_sync_on_step, process_group=process_group - ) + def __init__(self, dist_sync_on_step=False, process_group=None, take_avg_loss=True): + super().__init__(dist_sync_on_step=dist_sync_on_step, process_group=process_group) self.add_state("loss_sum", torch.tensor(0.0, dtype=torch.float64), dist_reduce_fx='sum') self.add_state("num_measurements", torch.tensor(0, dtype=torch.int64), dist_reduce_fx='sum') self.take_avg_loss = take_avg_loss diff --git a/nemo/collections/common/metrics/perplexity.py b/nemo/collections/common/metrics/perplexity.py index 1158e3408611..9e1c21737ec8 100644 --- a/nemo/collections/common/metrics/perplexity.py +++ b/nemo/collections/common/metrics/perplexity.py @@ -29,8 +29,6 @@ class Perplexity(Metric): See `PyTorch Lightning Metrics `_ for the metric usage instructions. Args: - compute_on_step: - Forward only calls ``update()`` and returns ``None`` if this is set to ``False``. default: ``True`` dist_sync_on_step: Synchronize metric state across processes at each ``forward()`` before returning the value at the step. @@ -44,10 +42,8 @@ class Perplexity(Metric): full_state_update = True - def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, validate_args=True): - super().__init__( - compute_on_step=compute_on_step, dist_sync_on_step=dist_sync_on_step, process_group=process_group - ) + def __init__(self, dist_sync_on_step=False, process_group=None, validate_args=True): + super().__init__(dist_sync_on_step=dist_sync_on_step, process_group=process_group) self.validate_args = validate_args self.add_state('perplexities_sum', torch.tensor(0.0, dtype=torch.float64), dist_reduce_fx='sum') # Total number of distributions seen since last reset diff --git a/nemo/collections/common/parts/preprocessing/collections.py b/nemo/collections/common/parts/preprocessing/collections.py index 4616f95e1a4f..ed9e53ae6ffe 100644 --- a/nemo/collections/common/parts/preprocessing/collections.py +++ b/nemo/collections/common/parts/preprocessing/collections.py @@ -159,6 +159,9 @@ def __init__( if hasattr(parser, "is_aggregate") and parser.is_aggregate and isinstance(text, str): if lang is not None: text_tokens = parser(text, lang) + # for future use if want to add language bypass to audio_to_text classes + # elif hasattr(parser, "lang") and parser.lang is not None: + # text_tokens = parser(text, parser.lang) else: raise ValueError("lang required in manifest when using aggregate tokenizers") else: diff --git a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py index 2644e487d585..92a3e0fb49e0 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py +++ b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py @@ -185,10 +185,9 @@ def any_locale_word_tokenize(text: str) -> List[Tuple[List[str], bool]]: return _word_tokenize(words) -# TODO @xueyang: deprecate language-specific text preprocessing and use any_locale_text_preprocessing. -def spanish_text_preprocessing(text): +def spanish_text_preprocessing(text: str) -> str: return text.lower() -def chinese_text_preprocessing(text): - return text.lower() +def chinese_text_preprocessing(text: str) -> str: + return text diff --git a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py index 9c231696ca88..abcbdb1661b9 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py +++ b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py @@ -673,12 +673,6 @@ def set_phone_prob(self, prob): class ChinesePhonemesTokenizer(BaseTokenizer): # fmt: off - PRONUNCIATION_LIST = ['#' + i for i in ['^', 'A', 'AI', 'AN', 'ANG', 'AO', 'B', 'C', 'CH', 'D', - 'E', 'EI', 'EN', 'ENG', 'ER', 'F', 'G', 'H', 'I', 'IE', - 'IN', 'ING', 'IU', 'J', 'K', 'L', 'M', 'N', 'O', 'ONG', - 'OU', 'P', 'Q', 'R', 'S', 'SH', 'T', 'U', 'UI', 'UN', - 'V', 'VE', 'VN', 'W', 'X', 'Y', 'Z', 'ZH']] - TONES_LIST = ['#1', '#2', '#3', '#4', '#5'] PUNCT_LIST = ( # Derived from LJSpeech and "/" additionally ',', '.', '!', '?', '-', ':', ';', '/', '"', '(', @@ -698,7 +692,7 @@ def __init__( sep='|', # To be able to distinguish between 2/3 letters codes. add_blank_at=None, pad_with_space=False, - text_preprocessing_func=lambda text: chinese_text_preprocessing(text), + text_preprocessing_func=chinese_text_preprocessing, ): """Chinese phoneme-based tokenizer. Args: @@ -722,9 +716,15 @@ def __init__( if silence is not None: self.silence, tokens = len(tokens), tokens + [silence] # Silence - tokens.extend(self.PRONUNCIATION_LIST) - tokens.extend(self.TONES_LIST) - tokens.extend(string.ascii_lowercase) + self.phoneme_list = g2p.phoneme_list + self.tone_list = g2p.tone_list + self.ascii_letter_list = g2p.ascii_letter_list + + tokens.extend(self.phoneme_list) + tokens.extend(self.tone_list) + tokens.extend(self.ascii_letter_list) + + self.text_preprocessing_func = text_preprocessing_func if apostrophe: tokens.append("'") # Apostrophe @@ -740,15 +740,12 @@ def __init__( self.punct = punct self.pad_with_space = pad_with_space - - self.text_preprocessing_func = text_preprocessing_func self.g2p = g2p - def encode(self, text): + def encode(self, text: str) -> List[int]: """See base class for more information.""" - text = self.text_preprocessing_func(text) - g2p_text = self.g2p(text) # TODO: handle infer + g2p_text = self.g2p(text) return self.encode_from_g2p(g2p_text, text) def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None): @@ -765,15 +762,15 @@ def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None): # Add space if last one isn't one if p == space and len(ps) > 0 and ps[-1] != space: ps.append(p) - # Add next phoneme or char (if chars=True) - elif (p.isalnum() or p == "'" or p in self.PRONUNCIATION_LIST or p in self.TONES_LIST) and p in tokens: + # Add next phoneme or tone or ascii letter or apostrophe. + elif (p.isalnum() or p == "'" or p in self.phoneme_list + self.tone_list + self.ascii_letter_list) and p in tokens: ps.append(p) - # Add punct + # Add punctuation elif (p in self.PUNCT_LIST) and self.punct: ps.append(p) # Warn about unknown char/phoneme elif p != space: - message = f"Text: [{''.join(g2p_text)}] contains unknown char/phoneme: [{p}]." + message = f"Text: [{' '.join(g2p_text)}] contains unknown char/phoneme: [{p}]." if raw_text is not None: message += f"Original text: [{raw_text}]. Symbol will be skipped." logging.warning(message) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index 1653faaf6900..c423c05ff601 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -187,6 +187,7 @@ def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process= reduce_amax=model_cfg.get('reduce_amax', True), use_emha=model_cfg.use_emha, activation=model_cfg.get('activation', 'gelu'), + use_flash_attention=model_cfg.get('flash_attention', False), ) self.initialize_word_embeddings( @@ -364,6 +365,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) self._nsys_profile_start_step *= grad_accum_steps self._nsys_profile_end_step *= grad_accum_steps + self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) def get_module_list(self): if isinstance(self.model, list): @@ -503,6 +506,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): no_sync_func=no_sync_func, grad_sync_func=grad_sync_func, param_sync_func=param_sync_func, + overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False), + batch_p2p_comm=self.cfg.get('batch_p2p_comm', True), ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index b34506671011..a953bbbbbaf9 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -32,6 +32,7 @@ from pytorch_lightning.utilities.migration import pl_legacy_patch from transformers import TRANSFORMERS_CACHE +from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION from nemo.collections.nlp.modules.common.megatron.clip_grads import ( clip_grad_norm_distributed_optimizer, clip_grad_norm_fp32, @@ -244,6 +245,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if trainer is None: raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") + if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION: + raise ImportError( + "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention." + "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202." + ) + # this prevents base constructor from initializing tokenizer self.tokenizer = None super().__init__(cfg, trainer=trainer) @@ -284,6 +291,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): global_batch_size=cfg.get('global_batch_size'), rampup_batch_size=cfg.get('rampup_batch_size'), use_fp8=cfg.get('fp8', False), + init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -307,6 +315,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): "default_on_epoch": False, } + self.gc_interval = cfg.get('gc_interval', 0) + assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." + # If gc_interval > 0, memory garbage collection is manually controlled. + # The automatic garbage collector sould be disabled before training starts. + if self.gc_interval > 0: + gc.disable() + self.validation_global_step = 1 + def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" @@ -354,13 +370,32 @@ def _build_tokenizer(self): self.tokenizer = get_nmt_tokenizer( library=self._cfg.tokenizer.library, model_name=self._cfg.tokenizer.type, - tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.model), - vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.vocab_file), - merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.merge_file), + tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.get('model', None)), + vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.get('vocab_file', None)), + merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.get('merge_file', None)), + use_fast=self.cfg.tokenizer.get('use_fast', False), delimiter=self.cfg.tokenizer.get('delimiter', None), legacy=legacy, ) + if self._cfg.tokenizer.get('additional_special_tokens', None) is not None: + tokens_list = omegaconf.OmegaConf.to_object(self._cfg.tokenizer.additional_special_tokens) + self.tokenizer.add_special_tokens({'additional_special_tokens': tokens_list}) + + def on_train_start(self) -> None: + super().on_train_start() + self.init_global_step = self.trainer.global_step + + def on_validation_start(self) -> None: + super().on_validation_start() + if self.gc_interval > 0: + gc.collect() + + def on_validation_end(self) -> None: + super().on_validation_end() + if self.gc_interval > 0: + gc.collect() + def _build_vocab(self): """ Manipulate vocabulary (e.g., pad vocabulary for increased performance)/ @@ -385,18 +420,17 @@ def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by ) return after - def on_train_start(self) -> None: - super().on_train_start() - self.init_global_step = self.trainer.global_step - - def _get_parameters(self): + def get_parameters_with_grad(self): """ - private method to load all the trainable parameters from optimizer param groups + Get all parameters with grad from optimizer param groups """ params = [] for param_group in self._optimizer_param_groups: for param in param_group['params']: - params.append(param) + if ( + param.grad is not None + ): # (@adithyare) adapter training with pp>1 can result in params with no grads + params.append(param) return params def configure_gradient_clipping(self, *args, **kwargs): @@ -420,9 +454,9 @@ def configure_gradient_clipping(self, *args, **kwargs): else: if self.megatron_amp_O2: # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters() + parameters = self._optimizer.get_parameters_with_grad() else: - parameters = self._get_parameters() + parameters = self.get_parameters_with_grad() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) @@ -451,7 +485,7 @@ def allreduce_gradients(self): for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): buf.copy_(synced) - def reduce_overlap_gradients(self): + def reduce_overlap_gradients(self, params=None): """Reduce grads if overlapped grad sync is enabled Used for pipeline parallelism with the distributed Adam @@ -506,6 +540,17 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus # accumulated gradient updates. grad_scaler.optimizer_update_skipped = None + if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): + gc.collect() + + def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: + super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) + + if self.gc_interval > 0: + if self.validation_global_step % self.gc_interval == 0: + gc.collect() + self.validation_global_step += 1 + def setup_optimization( self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py index b3d167419494..3ca4767e3480 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py @@ -33,6 +33,7 @@ def __init__( data_parallel_size: int, drop_last: bool = True, global_batch_size: Optional[int] = None, + rampup_batch_size: Optional[list] = None, pad_samples_to_global_batch_size: Optional[bool] = False, ) -> None: # Sanity checks. @@ -48,7 +49,7 @@ def __init__( data_parallel_rank, data_parallel_size ) ) - if global_batch_size is not None: + if global_batch_size is not None and rampup_batch_size is None: if global_batch_size % (micro_batch_size * data_parallel_size) != 0: raise RuntimeError( f"`global_batch_size` ({global_batch_size}) is not divisible by " diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py index cf1de245d0e7..d7113e7cdde3 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py @@ -601,7 +601,7 @@ def _build_index_mappings( last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one assert last_epoch_num_samples >= 0, 'last epoch number of samples should be non-negative.' num_samples_per_epoch = (tokens_per_epoch - add_extra_token) // seq_length - assert last_epoch_num_samples < ( + assert last_epoch_num_samples <= ( num_samples_per_epoch + 1 ), 'last epoch number of samples exceeded max value.' # If we have less than 80% of the samples for the last epoch, diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py index 15edc673b7cc..4b1b4f61d439 100755 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py @@ -327,6 +327,9 @@ def __len__(self): def __getitem__(self, idx): return self.examples[idx] + def _ceil_to_nearest(self, n, m): + return (n + m - 1) // m * m + def collate_fn(self, batch, tp_workers=0): """ Prepares input_ids, labels, loss mask, attention_mask, and position ids for global batch """ taskname_ids, input_ids, answer_starts = zip(*batch) @@ -350,11 +353,16 @@ def collate_fn(self, batch, tp_workers=0): else: resi_padding = 0 batch_max += resi_padding + ceil_batch_max = self._ceil_to_nearest( + batch_max, 8 + ) # @adithyare this padding does not conflict with the tp_workers padding above + # since tp_workers is always a multiple of 2. the padding to multiple of 8 is to ensure an mem-optimized softmax is used. + batch_max = ceil_batch_max + 1 input_ids, loss_mask = self.pad_batch_and_build_loss_mask(input_ids, batch_max, answer_starts) # Should be a label for every token in batch, label is the next token labels = input_ids[:, 1:].contiguous() input_ids = input_ids[:, :-1].contiguous() - batch_max -= 1 + batch_max -= 1 # @adithyare I *think* this negatition is done to account for the above 2 lines which removes one item from the input_ids seq. # Loss mask should align with labels loss_mask = loss_mask[:, 1:].contiguous() diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py index 2c896c2e61af..d6c2257ebabb 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -16,6 +16,7 @@ import torch +from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset from nemo.utils import logging @@ -29,25 +30,65 @@ SYSTEM_TOKEN = "System\n" TURN_TOKEN = "" -GUARD_RAIL_INSTRUCTION = { - "TEXT_TO_CANONICAL_FORM": "Given a dialogue, for each turn you need to generate a short summary called a canonical form. Generate the canonical form for the last turn in the dialogue.", - "CANONICAL_FORM_TO_TEXT": "Given a dialogue, for each turn we also have a short summary called a canonical form. Generate the canonical form given the last turn message and canonical form. Then generate the message.", +TYPE_INSTRUCTION = { + 'TEXT_TO_VALUE': "", + 'VALUE_TO_TEXT': '', } -def _mask_targets(target, tokenized_lens, speakers, header_len, s_ids, tokenizer, mask_role): +def _mask_targets( + target, + tokenized_lens, + speakers, + header_len, + s_ids, + tokenizer, + mask_role, + gtype, + extra_id_2_token_id, + new_line_token_id, +): + """ This function masks the tokens so the loss is computed only on the non-masked role's responses. + For 'TEXT_TO_VALUE' type, the loss is computed on the value attributes. + + Args: + target (Tensor): input ids + tokenized_lens (List[int]): array of lengths of each turns + speakers (List[str]): array of speakers of each turns + header_len (int): the system prompt length + s_ids (List[Tensor]): array of tokenized ids of each turns + tokenizer (TokenizerSpec): tokenizer object + mask_role (str): the speaker id to be masked from loss computation + gtype (str): either 'TEXT_TO_VALUE' or 'VALUE_TO_TEXT' + extra_id_2_token_id (int): token id + new_line_token_id (int): new line token id + + """ cur_idx = header_len tgt_len = target.shape[0] for i, (tokenized_len, speaker, s_id) in enumerate(zip(tokenized_lens, speakers, s_ids)): - # note, sentence piece will add extra empty token in front. s_id has that extra token too - skip_name_len = len(tokenizer.text_to_ids(TURN_TOKEN + speaker + END_NAME_SIGNAL)) + # note, sentence piece will add extra empty token in front. has to compute the diff + id1 = tokenizer.text_to_ids("") + id2 = tokenizer.text_to_ids("" + TURN_TOKEN + speaker + END_NAME_SIGNAL) + skip_name_len = len(id2) - len(id1) + if extra_id_2_token_id is None: + raise ValueError("extra_id_2 is not in the vocabulary") + if (s_id == extra_id_2_token_id).any().item(): + if gtype == 'VALUE_TO_TEXT': + # if contains the token + assert skip_name_len == torch.where((s_id == extra_id_2_token_id))[0].item() + # find new line token id 14 + more_skip_len = torch.where((s_id[skip_name_len:] == new_line_token_id))[0][0].item() + 1 + skip_name_len += more_skip_len + elif gtype == 'TEXT_TO_VALUE': + skip_name_len = torch.where((s_id == extra_id_2_token_id))[0].item() + 1 if cur_idx >= tgt_len: break elif cur_idx + tokenized_len < tgt_len: # Check whether the mask is applied to the correct position, the first token is turn token: # s_id[2:] skips the artifact empty token and the turn token # target[cur_idx + 1:cur_idx + tokenized_len] skip the turn token - if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[2:]): + if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[1:]): logging.warning("a sentence mismatches the corresponding piece " "in the conversation") if i == 0: # mask the first turn completely to provide at least one turn as context @@ -57,7 +98,7 @@ def _mask_targets(target, tokenized_lens, speakers, header_len, s_ids, tokenizer target[cur_idx + 1 : cur_idx + tokenized_len] = IGNORE_INDEX else: # mask up to the name end, need to remove one as skip name has an extra artifact empty token - target[cur_idx : cur_idx + skip_name_len - 1] = IGNORE_INDEX + target[cur_idx : cur_idx + skip_name_len] = IGNORE_INDEX cur_idx += tokenized_len @@ -65,6 +106,13 @@ def cannonical_form_formater(cannoical_form): return f'{cannoical_form}\n' +def response_value_formater(label): + if isinstance(label, str): + return '' + label + '\n' + else: + raise ValueError(f'Unknown label type {type(label)}, only str type is supported') + + def _add_speaker_and_signal(header, source, mask_role, gtype): """Add speaker and start/end signal on each round.""" BEGIN_SIGNAL = "" @@ -76,28 +124,30 @@ def _add_speaker_and_signal(header, source, mask_role, gtype): sentence["value"] = ( BEGIN_SIGNAL + role_token + sentence_from + END_NAME_SIGNAL + sentence["value"] + END_SIGNAL ) - elif gtype == "TEXT_TO_CANONICAL_FORM": + elif gtype == "VALUE_TO_TEXT": sentence["value"] = ( BEGIN_SIGNAL + role_token + sentence_from + END_NAME_SIGNAL + + (response_value_formater(sentence['label']) if 'label' in sentence else '') + sentence["value"] + END_SIGNAL - + cannonical_form_formater(sentence['canonical_form']) ) - elif gtype == "CANONICAL_FORM_TO_TEXT": + elif gtype == "TEXT_TO_VALUE": sentence["value"] = ( BEGIN_SIGNAL + role_token + sentence_from + END_NAME_SIGNAL - + cannonical_form_formater(sentence['canonical_form']) + sentence["value"] + END_SIGNAL + + (response_value_formater(sentence['label']) if 'label' in sentence else '') ) else: - raise ValueError(f"source type {gtype} not supported") + raise ValueError( + f"source type {gtype} not supported, only 'VALUE_TO_TEXT' and 'TEXT_TO_VALUE' are supported" + ) conversation += sentence["value"] # if the last turn is not masked, add next token start token to the end, which will be included for loss calculation if sentence_from != mask_role and i == len(source) - 1: @@ -105,9 +155,7 @@ def _add_speaker_and_signal(header, source, mask_role, gtype): return conversation -def preprocess( - source: dict, tokenizer: TokenizerSpec, -): +def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, new_line_token_id: int): """ Given a conversation list. This transform: 1. Add signal '### ' at the beginning each sentence, with end signal '\n'; @@ -115,17 +163,18 @@ def preprocess( 3. Tokenize the concatenated conversation; 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. """ - canonical_type = None + data_type = None if 'type' in source: - canonical_type = source['type'] - assert canonical_type in GUARD_RAIL_INSTRUCTION, f"source type {canonical_type} not supported" + data_type = source['type'] + assert data_type in TYPE_INSTRUCTION, f"source type {data_type} not supported" # add end signal and concatenate together conversation = source['system'] - if canonical_type is not None: - conversation = conversation + '\n' + GUARD_RAIL_INSTRUCTION[canonical_type] + if data_type is not None: + if TYPE_INSTRUCTION[data_type] != '': + conversation = conversation + '\n' + TYPE_INSTRUCTION[data_type] mask_role = source.get('mask', 'User') - header = f"{SYSTEM_TOKEN}{conversation}\n\n" - conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, canonical_type) + header = f"{SYSTEM_TOKEN}{conversation}" + conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, data_type) # tokenize conversations input_ids = tokenizer.text_to_ids(conversation) target = copy.deepcopy(input_ids) @@ -134,10 +183,16 @@ def preprocess( ids = [] tokenized_lens = [] for s in source['conversations']: - tokenized_sentence = tokenizer.text_to_ids(s["value"]) - ids.append(torch.tensor(tokenized_sentence)) - # remove one token as it adds an empty token in front - tokenized_lens.append(len(tokenized_sentence) - 1) + if isinstance(tokenizer, SentencePieceTokenizer): + tokenized_sentence = tokenizer.text_to_ids(s["value"]) + ids.append(torch.tensor(tokenized_sentence)[1:]) + # remove one token as it adds an empty token in front + tokenized_lens.append(len(tokenized_sentence) - 1) + else: + tokenized_sentence = tokenizer.text_to_ids(s["value"]) + ids.append(torch.tensor(tokenized_sentence)) + # remove one token as it adds an empty token in front + tokenized_lens.append(len(tokenized_sentence)) speakers = [sentence["from"] for sentence in source['conversations']] assert mask_role in speakers, "mask role not in the conversation" target = torch.LongTensor(target) @@ -145,18 +200,51 @@ def preprocess( target[:header_len] = IGNORE_INDEX input_ids = torch.LongTensor(input_ids) - _mask_targets(target, tokenized_lens, speakers, header_len, ids, tokenizer, mask_role) + _mask_targets( + target, + tokenized_lens, + speakers, + header_len, + ids, + tokenizer, + mask_role, + data_type, + extra_id_2_token_id, + new_line_token_id, + ) mask = (target != IGNORE_INDEX).bool() assert mask.sum().item() != 0, "mask is empty" return dict(input_ids=input_ids, mask=mask) +def _check_token_in_vocab(tokenizer, token): + ids = tokenizer.text_to_ids(token) + if isinstance(tokenizer, SentencePieceTokenizer): + return len(ids) == 2 + else: + return len(ids) == 1 + + class GPTSFTChatDataset(GPTSFTDataset): def _build_samples_mapping(self): super()._build_samples_mapping() assert hasattr(self.tokenizer, "vocab"), "tokenizer should have vocab property, not supported" - assert '' in self.tokenizer.vocab, " not in the tokenizer vocab. not supported" - assert '' in self.tokenizer.vocab, " not in the tokenizer vocab. not supported" + assert _check_token_in_vocab( + self.tokenizer, '' + ), " not in the tokenizer vocab. not supported" + assert _check_token_in_vocab( + self.tokenizer, '' + ), " not in the tokenizer vocab. not supported" + # calcuilate id value + if _check_token_in_vocab(self.tokenizer, ''): + ids_1 = self.tokenizer.text_to_ids('') + ids_2 = self.tokenizer.text_to_ids('') + self.extra_id_2_token_id = ids_1[len(ids_2) :][0] + else: + self.extra_id_2_token_id = None + ids_1 = self.tokenizer.text_to_ids('\n') + ids_2 = self.tokenizer.text_to_ids('') + self.new_line_token_id = ids_1[len(ids_2) :][0] def _process_example(self, example): """ @@ -164,7 +252,7 @@ def _process_example(self, example): Truncation is carried out when needed, but it is performed only on the prompt side. BOS, EOS, and SEP, are added if specified. """ - result = preprocess(example, self.tokenizer) + result = preprocess(example, self.tokenizer, self.extra_id_2_token_id, self.new_line_token_id) return result diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 24b7fe8d3d6d..94c4b3c54c63 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -51,7 +51,7 @@ def __init__( file_path: Path to a JSONL GPT supervised fine-tuning dataset. Data is formatted as multiple JSON lines with each line formatted as follows. {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'} tokenizer: Tokenizer for the dataset. Instance of a class that inherits TokenizerSpec (ex: YTTM, SentencePiece). max_seq_length (int): maximum sequence length for each dataset examples. Examples will either be truncated to fit this length or dropped if they cannot be truncated. - min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements. + min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements. add_bos (bool): Whether to add a beginning of sentence token to each data example add_eos (bool): Whether to add an end of sentence token to each data example add_sep (bool): Whether to add a separation token to each data example (goes between prompt and answer) @@ -93,7 +93,9 @@ def __init__( self.prompt_template = self.prompt_template.encode('utf-8').decode('unicode_escape') assert self.truncation_field in ["answer", "context"] - self.indexed_dataset = JSONLMemMapDataset(dataset_paths=[file_path], tokenizer=None, header_lines=0) + self.indexed_dataset = JSONLMemMapDataset( + dataset_paths=[file_path], tokenizer=None, header_lines=0, index_mapping_dir=index_mapping_dir + ) # Will be None after this call if `max_num_samples` is None self._build_samples_mapping() @@ -169,7 +171,9 @@ def _process_example(self, example): tokenized_text = pre_pad + self.tokenizer.text_to_ids(text) context_ids = pre_pad + self.tokenizer.text_to_ids(context) answer_ids = tokenized_text[len(context_ids) :] - total_ids = len(context_ids) + len(answer_ids) + + # for the long context cases, collate_fn includes self.tokens_to_generate for padding + total_ids = len(context_ids) + max(len(answer_ids), self.tokens_to_generate) if self.add_bos: total_ids += 1 if self.add_sep: diff --git a/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py index 0fffb5b64a23..fe71e7f78019 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py @@ -513,6 +513,8 @@ def _do_init(self, path, skip_warmup=True, delay_data_mmap=False): self._create_data_mmap(skip_warmup) else: logging.info(" skip creating data numpy buffer of mmap...") + self._bin_buffer_mmap = None + self._bin_buffer = None def _create_data_mmap(self, skip_warmup): if not skip_warmup: @@ -524,7 +526,8 @@ def _create_data_mmap(self, skip_warmup): self._bin_buffer = memoryview(self._bin_buffer_mmap) def __del__(self): - self._bin_buffer_mmap._mmap.close() + if self._bin_buffer_mmap is not None: + self._bin_buffer_mmap._mmap.close() del self._bin_buffer_mmap del self._index diff --git a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py index b26f213282bb..05d10b42e115 100644 --- a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py @@ -19,27 +19,28 @@ import pickle import time from functools import partial +from typing import Callable, List, Optional, Type import numpy as np import torch from nemo.core import Dataset -from nemo.utils import logging +from nemo.utils import AppState, logging -__all__ = ['TextMemMapDataset', 'CSVMemMapDataset', 'build_index_files'] -__idx_version__ = '0.2' # index file version -__idx_suffix__ = 'idx' # index file suffix +__all__ = ["TextMemMapDataset", "CSVMemMapDataset", "build_index_files"] +__idx_version__ = "0.2" # index file version +__idx_suffix__ = "idx" # index file suffix def _build_index_from_memdata(fn, newline_int): """ Build index of delimiter positions between samples in memmap. Can be provided externally. - + Returns a 1D array of ints. """ # use memmap to read file - mdata = np.memmap(fn, dtype=np.uint8, mode='r') + mdata = np.memmap(fn, dtype=np.uint8, mode="r") # find newline positions midx = np.where(mdata == newline_int)[0] midx_dtype = midx.dtype @@ -68,17 +69,28 @@ class TextMemMapDataset(Dataset): def __init__( self, - dataset_paths, - newline_int=10, - header_lines=0, - workers=None, - tokenizer=None, - sort_dataset_paths=True, - build_index_fn=_build_index_from_memdata, + dataset_paths: List[str], + newline_int: Optional[int] = 10, + header_lines: Optional[int] = 0, + workers: Optional[int] = None, + tokenizer: Optional[Type["TokenizerSpec"]] = None, + build_index_fn: Optional[Callable[[str, Optional[int]], bool]] = _build_index_from_memdata, + sort_dataset_paths: Optional[bool] = True, + index_mapping_dir: Optional[str] = None, ): """ - build_index_fn - a callable build_index_fn(fn, newline_int) -> midx [np.array] that returns the index of newlines in a file fn - must be pickleable (to be used in multiprocessing.Pool.map) + Args: + dataset_paths: list of JSONL file paths. + newline_int: ASCII code to use to interpret newlines in file. + header_lines: number of header lines in JSON files. + workers: number of workers to use for creating index files. + tokenizer: tokenizer to use to convert text to tokens. + build_index_fn: a callable build_index_fn(fn, newline_int) -> midx [np.array] + that returns the index of newlines in a file fn must be pickleable + (to be used in multiprocessing.Pool.map). + sort_dataset_paths: whether to sort datasets by paths. + index_mapping_dir: directory to save the index mapping to. + If None, will write to the same folder as the dataset. """ super().__init__() self.mdata_midx_list = [] @@ -103,19 +115,51 @@ def __init__( logging.info(f"Building data files") # load all files into memmap - is_ditributed = torch.distributed.is_available() and torch.distributed.is_initialized() + is_distributed = torch.distributed.is_available() and torch.distributed.is_initialized() + + if not is_distributed or (is_distributed and torch.distributed.get_rank() == 0): + # Create index files on global rank 0. + build_index_files( + dataset_paths, + newline_int, + workers=self._worker, + build_index_fn=build_index_fn, + index_mapping_dir=index_mapping_dir, + ) + + if is_distributed: + torch.distributed.barrier() - if not is_ditributed or (is_ditributed and torch.distributed.get_rank() == 0): - build_index_files(dataset_paths, newline_int, workers=self._worker, build_index_fn=build_index_fn) + if is_distributed and AppState().local_rank == 0: + # If we are in a distributed multi-node set-up and index files are not stored on + # a shared filesystem, then the index files created on global rank 0 are only + # accessible to the workers on that node. + # + # Two cases may occur here: + # + # 1. case of a shared filesystem, or global_rank==0: the index files are present in + # the locally available filesystem, calling build_index_files() again is a no-op. + # 2. case of a non-shared filesystem, and global_rank>0: the index files are not + # present in the locally available filesystem, calling build_index_files() again + # will create them. + # + # Outcome in all cases: all nodes have access to the index files in their filesystem. + build_index_files( + dataset_paths, + newline_int, + workers=self._worker, + build_index_fn=build_index_fn, + index_mapping_dir=index_mapping_dir, + ) - if is_ditributed: + if is_distributed: torch.distributed.barrier() logging.info(f"Loading data files") start_time = time.time() - mdata_midx_list = [self.load_file(fn) for fn in self._files_list] + mdata_midx_list = [self.load_file(fn, index_mapping_dir) for fn in self._files_list] logging.info( - f'Time loading {len(mdata_midx_list)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}' + f"Time loading {len(mdata_midx_list)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}" ) logging.info("Computing global indices") @@ -193,7 +237,7 @@ def _build_data_from_text(self, text): return data - def load_file(self, fn): + def load_file(self, fn, index_mapping_dir: Optional[str] = None): """ Loads a text file as np.int8. @@ -203,37 +247,37 @@ def load_file(self, fn): size - number of lines in file """ logging.info(f"Loading {fn}") - idx_fn = f"{fn}.{__idx_suffix__}" + idx_fn = _index_fn(fn, index_mapping_dir) # create data map - mdata = np.memmap(fn, dtype=np.uint8, mode='r') + mdata = np.memmap(fn, dtype=np.uint8, mode="r") if _index_file_exists(idx_fn): # load index file into memory map - midx = np.load(idx_fn + ".npy", allow_pickle=True, mmap_mode='r') + midx = np.load(idx_fn + ".npy", allow_pickle=True, mmap_mode="r") # test for header if len(midx) < self._header_lines: raise RuntimeError(f"Missing header, expected {self._header_lines} header lines") # load meta info - idx_info_dict = pickle.load(open(idx_fn + ".info", 'rb')) + idx_info_dict = pickle.load(open(idx_fn + ".info", "rb")) # test for mismatch in expected newline_int - if 'newline_int' in idx_info_dict: - newline_int = idx_info_dict['newline_int'] + if "newline_int" in idx_info_dict: + newline_int = idx_info_dict["newline_int"] if self._newline_int != newline_int: logging.warning( f"Mismatch in newline_int, expected = {self._newline_int} but loaded {newline_int}" ) # test for version mismatch (useful to force recreation of index files) - idx_version = idx_info_dict.get('version', '0.0') + idx_version = idx_info_dict.get("version", "0.0") if __idx_version__ != idx_version: raise RuntimeError( f"Version mismatch: Please delete existing '.{__idx_suffix__}' files. Expected version = {__idx_version__}, but file version = {idx_version}. File path = {idx_fn}" ) else: raise ValueError( - f'Memory Map for {fn} is not found, missing one or more of files: {idx_fn}.{{.npy,.info}}' + f"Memory Map for {fn} is not found, missing one or more of files: {idx_fn}.{{.npy,.info}}" ) return (mdata, midx) @@ -246,15 +290,29 @@ class CSVMemMapDataset(TextMemMapDataset): def __init__( self, - dataset_paths, - newline_int=10, - header_lines=1, - workers=None, - tokenizer=None, - sort_dataset_paths=True, + dataset_paths: List[str], + newline_int: Optional[int] = 10, + header_lines: Optional[int] = 0, + workers: Optional[int] = None, + tokenizer: Optional[Type["TokenizerSpec"]] = None, + sort_dataset_paths: Optional[bool] = True, data_col=1, - data_sep=',', + data_sep=",", + index_mapping_dir: Optional[str] = None, ): + """ + Args: + dataset_paths: list of JSONL file paths. + newline_int: ASCII code to use to interpret newlines in file. + header_lines: number of header lines in JSON files. + workers: number of workers to use for creating index files. + tokenizer: tokenizer to use to convert text to tokens. + sort_dataset_paths: whether to sort datasets by paths. + data_col: index of data column. + data_sep: data separator. + index_mapping_dir: directory to save the index mapping to. + If None, will write to the same folder as the dataset. + """ super().__init__( dataset_paths=dataset_paths, newline_int=newline_int, @@ -262,6 +320,7 @@ def __init__( workers=workers, tokenizer=tokenizer, sort_dataset_paths=sort_dataset_paths, + index_mapping_dir=index_mapping_dir, ) self._data_col = data_col self._data_sep = data_sep @@ -280,8 +339,26 @@ class JSONLMemMapDataset(TextMemMapDataset): """ def __init__( - self, dataset_paths, newline_int=10, header_lines=1, workers=None, tokenizer=None, sort_dataset_paths=True, + self, + dataset_paths: List[str], + newline_int: Optional[int] = 10, + header_lines: Optional[int] = 0, + workers: Optional[int] = None, + tokenizer: Optional[Type["TokenizerSpec"]] = None, + sort_dataset_paths: Optional[bool] = True, + index_mapping_dir: Optional[str] = None, ): + """ + Args: + dataset_paths: list of JSONL file paths. + newline_int: ASCII code to use to interpret newlines in file. + header_lines: number of header lines in JSON files. + workers: number of workers to use for creating index files. + tokenizer: tokenizer to use to convert text to tokens. + sort_dataset_paths: whether to sort datasets by paths. + index_mapping_dir: directory to save the index mapping to. + If None, will write to the same folder as the dataset. + """ super().__init__( dataset_paths=dataset_paths, newline_int=newline_int, @@ -289,6 +366,7 @@ def __init__( workers=workers, tokenizer=tokenizer, sort_dataset_paths=sort_dataset_paths, + index_mapping_dir=index_mapping_dir, ) def _build_data_from_text(self, text): @@ -304,9 +382,48 @@ def _index_file_exists(idx_fn): return False -def _build_memmap_index_files(newline_int, build_index_fn, fn): +def _index_fn(fn: str, index_mapping_dir: str) -> str: + """Return base file name of index files. + + This returns the base file name associated with specified index + files. This base name is the base on top of which suffixes + like .npy or .info are added. + + The parent directory is created if it does not already exist. + + fn may be specified in multiple ways: + 1. file name: data.jsonl, + 2. relative path to a file: relative/path/to/data.jsonl, + 3. absolute path to a file: /absolute/path/to/data.jsonl. + + This function returns paths in the pattern of: + 1. /path/to/input_mapping_dir/data.jsonl.idx + 2. /path/to/input_mapping_dir/relative/path/to/data.jsonl.idx + 3. /path/to/input_mapping_dir/absolute/path/to/data.jsonl.idx + + Args: + fn: filename to get base name for. + index_mapping_dir: directory to save the index mapping to. + If None, will write to the same folder as the dataset. + """ + if index_mapping_dir: + # Remove leading "/" and "..". + while fn.startswith(("/", "..")): + if fn.startswith(".."): + fn = fn.lstrip("..") + if fn.startswith("/"): + fn = fn.lstrip("/") + idx_fn = f"{os.path.join(index_mapping_dir, fn)}.{__idx_suffix__}" + # Create parent directory if needed. + os.makedirs(os.path.dirname(idx_fn), exist_ok=True) + else: + idx_fn = f"{fn}.{__idx_suffix__}" + return idx_fn + + +def _build_memmap_index_files(newline_int, build_index_fn, fn, index_mapping_dir: str): """Helper function to build an index file""" - idx_fn = f"{fn}.{__idx_suffix__}" + idx_fn = _index_fn(fn, index_mapping_dir) # create data map if _index_file_exists(idx_fn): @@ -332,7 +449,9 @@ def _build_memmap_index_files(newline_int, build_index_fn, fn): return True -def build_index_files(dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata): +def build_index_files( + dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata, index_mapping_dir: str = None, +): """Auxiliary method to build multiple index files""" if len(dataset_paths) < 1: raise ValueError("files_list must contain at leat one file name") @@ -344,8 +463,11 @@ def build_index_files(dataset_paths, newline_int, workers=None, build_index_fn=_ # load all files into memmap start_time = time.time() with mp.Pool(workers) as p: - build_status = p.map(partial(_build_memmap_index_files, newline_int, build_index_fn), dataset_paths) + build_status = p.map( + partial(_build_memmap_index_files, newline_int, build_index_fn, index_mapping_dir=index_mapping_dir,), + dataset_paths, + ) logging.info( - f'Time building {sum(build_status)} / {len(build_status)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}' + f"Time building {sum(build_status)} / {len(build_status)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}" ) diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py b/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py new file mode 100644 index 000000000000..4e786276108c --- /dev/null +++ b/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from nemo.collections.nlp.data.spellchecking_asr_customization.dataset import ( + SpellcheckingAsrCustomizationDataset, + SpellcheckingAsrCustomizationTestDataset, + TarredSpellcheckingAsrCustomizationDataset, +) diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py b/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py new file mode 100644 index 000000000000..803d0eaf8aed --- /dev/null +++ b/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py @@ -0,0 +1,593 @@ +# Copyright 2019 The Google Research Authors. +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from collections import OrderedDict +from os import path +from typing import Dict, List, Optional, Tuple, Union + +from transformers import PreTrainedTokenizerBase + +"""Build BERT Examples from asr hypothesis, customization candidates, target labels, span info. +""" + + +class BertExample(object): + """Class for training and inference examples for BERT. + + Attributes: + features: Feature dictionary. + """ + + def __init__( + self, + input_ids: List[int], + input_mask: List[int], + segment_ids: List[int], + input_ids_for_subwords: List[int], + input_mask_for_subwords: List[int], + segment_ids_for_subwords: List[int], + character_pos_to_subword_pos: List[int], + fragment_indices: List[Tuple[int, int, int]], + labels_mask: List[int], + labels: List[int], + spans: List[Tuple[int, int, int]], + default_label: int, + ) -> None: + """Inputs to the example wrapper + + Args: + input_ids: indices of single characters (treated as subwords) + input_mask: list of bools with 0s in place of input_ids to be masked + segment_ids: list of ints from 0 to 10 to denote the text segment type ( + 0 - for tokens of ASR hypothesis, + 1 - for tokens of the first candidate + ... + 10 - for tokens of the tenth candidate + ) + input_ids_for_subwords: indices of real subwords (as tokenized by bert tokenizer) + input_mask_for_subwords: list of bools with 0s in place of input_ids_for_subwords to be masked + segment_ids_for_subwords: same as segment_ids but for input_ids_for_subwords + character_pos_to_subword_pos: list of size=len(input_ids), value=(position of corresponding subword in input_ids_for_subwords) + fragment_indices: list of tuples (start_position, end_position, candidate_id), end is exclusive, candidate_id can be -1 if not set + labels_mask: bool tensor with 0s in place of label tokens to be masked + labels: indices of semiotic classes which should be predicted from each of the + corresponding input tokens + spans: list of tuples (class_id, start_position, end_position), end is exclusive, class is always 1(CUSTOM) + default_label: The default label + """ + input_len = len(input_ids) + if not ( + input_len == len(input_mask) + and input_len == len(segment_ids) + and input_len == len(labels_mask) + and input_len == len(labels) + and input_len == len(character_pos_to_subword_pos) + ): + raise ValueError("All feature lists should have the same length ({})".format(input_len)) + + input_len_for_subwords = len(input_ids_for_subwords) + if not ( + input_len_for_subwords == len(input_mask_for_subwords) + and input_len_for_subwords == len(segment_ids_for_subwords) + ): + raise ValueError( + "All feature lists for subwords should have the same length ({})".format(input_len_for_subwords) + ) + + self.features = OrderedDict( + [ + ("input_ids", input_ids), + ("input_mask", input_mask), + ("segment_ids", segment_ids), + ("input_ids_for_subwords", input_ids_for_subwords), + ("input_mask_for_subwords", input_mask_for_subwords), + ("segment_ids_for_subwords", segment_ids_for_subwords), + ("character_pos_to_subword_pos", character_pos_to_subword_pos), + ("fragment_indices", fragment_indices), + ("labels_mask", labels_mask), + ("labels", labels), + ("spans", spans), + ] + ) + self._default_label = default_label + + +class BertExampleBuilder(object): + """Builder class for BertExample objects.""" + + def __init__( + self, + label_map: Dict[str, int], + semiotic_classes: Dict[str, int], + tokenizer: PreTrainedTokenizerBase, + max_seq_length: int, + ) -> None: + """Initializes an instance of BertExampleBuilder. + + Args: + label_map: Mapping from tags to tag IDs. + semiotic_classes: Mapping from semiotic classes to their ids. + tokenizer: Tokenizer object. + max_seq_length: Maximum sequence length. + """ + self._label_map = label_map + self._semiotic_classes = semiotic_classes + self._tokenizer = tokenizer + self._max_seq_length = max_seq_length + # one span usually covers one or more words and it only exists for custom phrases, so there are much less spans than characters. + self._max_spans_length = max(4, int(max_seq_length / 20)) + self._pad_id = self._tokenizer.pad_token_id + self._default_label = 0 + + def build_bert_example( + self, hyp: str, ref: str, target: Optional[str] = None, span_info: Optional[str] = None, infer: bool = False + ) -> Optional[BertExample]: + """Constructs a BERT Example. + + Args: + hyp: Hypothesis text. + ref: Candidate customization variants divided by ';' + target: + if infer==False, string of labels (each label is 1-based index of correct candidate) or 0. + if infer==True, it can be None or string of labels (each label is 1-based index of some candidate). In inference this can be used to get corresponding fragments to fragment_indices. + span_info: + string of format "CUSTOM 6 20;CUSTOM 40 51", number of parts corresponds to number of targets. Can be empty if target is 0. + If infer==False, numbers are correct start and end(exclusive) positions of the corresponding target candidate in the text. + If infer==True, numbers are EXPECTED positions in the text. In inference this can be used to get corresponding fragments to fragment_indices. + infer: inference mode + Returns: + BertExample, or None if the conversion from text to tags was infeasible + + Example (infer=False): + hyp: "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o" + ref: "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y" + target: "1 3" + span_info: "CUSTOM 12 23;CUSTOM 28 41" + """ + if not ref.count(";") == 9: + raise ValueError("Expect 10 candidates: " + ref) + + span_info_parts = [] + targets = [] + + if len(target) > 0 and target != "0": + span_info_parts = span_info.split(";") + targets = list(map(int, target.split(" "))) + if len(span_info_parts) != len(targets): + raise ValueError( + "len(span_info_parts)=" + + str(len(span_info_parts)) + + " is different from len(target_parts)=" + + str(len(targets)) + ) + + tags = [0 for _ in hyp.split()] + if not infer: + for p, t in zip(span_info_parts, targets): + c, start, end = p.split(" ") + start = int(start) + end = int(end) + tags[start:end] = [t for i in range(end - start)] + + # get input features for characters + (input_ids, input_mask, segment_ids, labels_mask, labels, _, _,) = self._get_input_features( + hyp=hyp, ref=ref, tags=tags + ) + + # get input features for words + hyp_with_words = hyp.replace(" ", "").replace("_", " ") + ref_with_words = ref.replace(" ", "").replace("_", " ") + ( + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + _, + _, + _, + _, + ) = self._get_input_features(hyp=hyp_with_words, ref=ref_with_words, tags=None) + + # used in forward to concatenate subword embeddings to character embeddings + character_pos_to_subword_pos = self._map_characters_to_subwords(input_ids, input_ids_for_subwords) + + fragment_indices = [] + if infer: + # used in inference to take argmax over whole fragments instead of separate characters to get more consistent predictions + fragment_indices = self._get_fragment_indices(hyp, targets, span_info_parts) + + spans = [] + if not infer: + # during training spans are used in validation step to calculate accuracy on whole custom phrases instead of separate characters + spans = self._get_spans(span_info_parts) + + if len(input_ids) > self._max_seq_length or len(spans) > self._max_spans_length: + print( + "Max len exceeded: len(input_ids)=", + len(input_ids), + "; _max_seq_length=", + self._max_seq_length, + "; len(spans)=", + len(spans), + "; _max_spans_length=", + self._max_spans_length, + ) + return None + + example = BertExample( + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + input_ids_for_subwords=input_ids_for_subwords, + input_mask_for_subwords=input_mask_for_subwords, + segment_ids_for_subwords=segment_ids_for_subwords, + character_pos_to_subword_pos=character_pos_to_subword_pos, + fragment_indices=fragment_indices, + labels_mask=labels_mask, + labels=labels, + spans=spans, + default_label=self._default_label, + ) + return example + + def _get_spans(self, span_info_parts: List[str]) -> List[Tuple[int, int, int]]: + """ Converts span_info string into a list of (class_id, start, end) where start, end are coordinates of starting and ending(exclusive) tokens in input_ids of BertExample + + Example: + span_info_parts: ["CUSTOM 37 41", "CUSTOM 47 52", "CUSTOM 42 46", "CUSTOM 0 7"] + result: [(1, 38, 42), (1, 48, 53), (1, 43, 47), (1, 1, 8)] + """ + result_spans = [] + + for p in span_info_parts: + if p == "": + break + c, start, end = p.split(" ") + if c not in self._semiotic_classes: + raise KeyError("class=" + c + " not found in self._semiotic_classes") + cid = self._semiotic_classes[c] + # +1 because this should be indexing on input_ids which has [CLS] token at beginning + start = int(start) + 1 + end = int(end) + 1 + result_spans.append((cid, start, end)) + return result_spans + + def _get_fragment_indices( + self, hyp: str, targets: List[int], span_info_parts: List[str] + ) -> Tuple[List[Tuple[int, int, int]]]: + """ Build fragment indices for real candidates. + This is used only at inference. + After external candidate retrieval we know approximately, where the candidate is located in the text (from the positions of matched n-grams). + In this function we + 1) adjust start/end positions to match word borders (possibly in multiple ways). + 2) generate content for fragment_indices tensor (it will be used during inference to average all predictions inside each fragment). + + Args: + hyp: ASR-hypothesis where space separates single characters (real space is replaced to underscore). + targets: list of candidate ids (only for real candidates, not dummy) + span_info_parts: list of strings of format like "CUSTOM 12 25", corresponding to each of targets, with start/end coordinates in text. + Returns: + List of tuples (start, end, target) where start and end are positions in ASR-hypothesis, target is candidate_id. + Note that returned fragments can be unsorted and can overlap, it's ok. + Example: + hyp: "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o" + targets: [1 2 3 4 6 7 9] + span_info_parts: ["CUSTOM 12 25", "CUSTOM 0 10", "CUSTOM 27 42", ...], where numbers are EXPECTED start/end positions of corresponding target candidates in the text. These positions will be adjusted in this functuion. + fragment_indices: [(1, 12, 2), (13, 24, 1), (13, 28, 1), ..., (29, 42, 3)] + """ + + fragment_indices = [] + + letters = hyp.split() + + for target, p in zip(targets, span_info_parts): + _, start, end = p.split(" ") + start = int(start) + end = min(int(end), len(hyp)) # guarantee that end is not outside length + + # Adjusting strategy 1: expand both sides to the nearest space. + # Adjust start by finding the nearest left space or beginning of text. If start is already some word beginning, it won't change. + k = start + while k > 0 and letters[k] != '_': + k -= 1 + adjusted_start = k if k == 0 else k + 1 + + # Adjust end by finding the nearest right space. If end is already space or sentence end, it won't change. + k = end + while k < len(letters) and letters[k] != '_': + k += 1 + adjusted_end = k + + # +1 because this should be indexing on input_ids which has [CLS] token at beginning + fragment_indices.append((adjusted_start + 1, adjusted_end + 1, target)) + + # Adjusting strategy 2: try to shrink to the closest space (from left or right or both sides). + # For example, here the candidate "shippers" has a matching n-gram covering part of previous word + # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w + # 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + expanded_fragment = "".join(letters[adjusted_start:adjusted_end]) + left_space_position = expanded_fragment.find("_") + right_space_position = expanded_fragment.rfind("_") + is_left_shrink = False + is_right_shrink = False + if left_space_position > -1 and left_space_position < len(expanded_fragment) / 2: + # +1 because of CLS token, another +1 to put start position after found space + fragment_indices.append((adjusted_start + 1 + left_space_position + 1, adjusted_end + 1, target)) + is_left_shrink = True + if right_space_position > -1 and right_space_position > len(expanded_fragment) / 2: + fragment_indices.append((adjusted_start + 1, adjusted_start + 1 + right_space_position, target)) + is_right_shrink = True + if is_left_shrink and is_right_shrink: + fragment_indices.append( + (adjusted_start + 1 + left_space_position + 1, adjusted_start + 1 + right_space_position, target) + ) + + return fragment_indices + + def _map_characters_to_subwords(self, input_ids: List[int], input_ids_for_subwords: List[int]) -> List[int]: + """ Maps each single character to the position of its corresponding subword. + + Args: + input_ids: List of character token ids. + input_ids_for_subwords: List of subword token ids. + Returns: + List of subword positions in input_ids_for_subwords. Its length is equal to len(input_ids) + + Example: + input_ids: [101, 1037, 1055, 1056, 1054, 1051, 1050, ..., 1051, 102, 1040, ..., 1050, 102, 1037, ..., 1041, 102, ..., 102] + input_ids_for_subwords: [101, 26357, 2106, 2666, 2061, 8202, 1998, 13012, 16643, 2319, 1043, 7174, 102, 2106, 3771, 7842, 2819, 2239, 102, ..., 102] + result: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, ... , 45, 46, 46, 46, 46, 46, 47] + """ + character_pos_to_subword_pos = [0 for _ in input_ids] + + ## '[CLS]', 'a', 's', 't', 'r', 'o', 'n', 'o', 'm', 'e', 'r', 's', '_', 'd', 'i', ..., 'l', 'o', '[SEP]', 'd', 'i', 'd', 'i', 'e', 'r', '_', 's', 'a', 'u', 'm', 'o', 'n', ..., '[SEP]' + tokens = self._tokenizer.convert_ids_to_tokens(input_ids) + ## '[CLS]', 'astronomers', 'did', '##ie', 'so', '##mon', 'and', 'tri', '##sti', '##an', 'g', '##llo', '[SEP]', 'did', '##ier', 'sa', '##um', '##on', '[SEP]', 'astro', '##no', '##mie', '[SEP]', 'tristan', 'gui', '##llo', '##t', '[SEP]', ..., '[SEP]', 'mercy', '[SEP]'] + tokens_for_subwords = self._tokenizer.convert_ids_to_tokens(input_ids_for_subwords) + j = 0 # index for tokens_for_subwords + j_offset = 0 # current letter index within subword + for i in range(len(tokens)): + character = tokens[i] + subword = tokens_for_subwords[j] + if character == "[CLS]" and subword == "[CLS]": + character_pos_to_subword_pos[i] = j + j += 1 + continue + if character == "[SEP]" and subword == "[SEP]": + character_pos_to_subword_pos[i] = j + j += 1 + continue + if character == "[CLS]" or character == "[SEP]" or subword == "[CLS]" or subword == "[SEP]": + raise IndexError( + "character[" + + str(i) + + "]=" + + character + + "; subword[" + + str(j) + + ";=" + + subword + + "subwords=" + + str(tokens_for_subwords) + ) + # At this point we expect that + # subword either 1) is a normal first token of a word or 2) starts with "##" (not first word token) + # character either 1) is a normal character or 2) is a space character "_" + if character == "_": + character_pos_to_subword_pos[i] = j - 1 # space is assigned to previous subtoken + continue + if j_offset < len(subword): + if character == subword[j_offset]: + character_pos_to_subword_pos[i] = j + j_offset += 1 + else: + raise IndexError( + "character mismatch:" + + "i=" + + str(i) + + "j=" + + str(j) + + "j_offset=" + + str(j_offset) + + "; len(tokens)=" + + str(len(tokens)) + + "; len(subwords)=" + + str(len(tokens_for_subwords)) + ) + # if subword is finished, increase j + if j_offset >= len(subword): + j += 1 + j_offset = 0 + if j >= len(tokens_for_subwords): + break + if tokens_for_subwords[j].startswith("##"): + j_offset = 2 + # check that all subword tokens are processed + if j < len(tokens_for_subwords): + raise IndexError( + "j=" + + str(j) + + "; len(tokens)=" + + str(len(tokens)) + + "; len(subwords)=" + + str(len(tokens_for_subwords)) + ) + return character_pos_to_subword_pos + + def _get_input_features( + self, hyp: str, ref: str, tags: List[int] + ) -> Tuple[List[int], List[int], List[int], List[int], List[int], List[str], List[int]]: + """Converts given ASR-hypothesis(hyp) and candidate string(ref) to features(token ids, mask, segment ids, etc). + + Args: + hyp: Hypothesis text. + ref: Candidate customization variants divided by ';' + tags: List of labels corresponding to each token of ASR-hypothesis or None when building an example during inference. + Returns: + Features (input_ids, input_mask, segment_ids, labels_mask, labels, hyp_tokens, token_start_indices) + + Note that this method is called both for character-based example and for word-based example (to split to subwords). + + Character-based example: + hyp: "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o" + ref: "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y" + tags: "0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3" + + resulting token sequence: + '[CLS]', 'a', 's', 't', 'r', 'o', 'n', 'o', 'm', 'e', 'r', 's', '_', 'd', 'i', ..., 'l', 'o', '[SEP]', 'd', 'i', 'd', 'i', 'e', 'r', '_', 's', 'a', 'u', 'm', 'o', 'n', ..., '[SEP]' + + Word-based example: + hyp: "astronomers didie somon and tristian gllo" + ref: "didier saumon;astronomie;tristan guillot;tristesse;monade;christian;astronomer;solomon;dididididi;mercy" + tags: None (not used for word-based case) + + resulting token sequence: + '[CLS]', 'astronomers', 'did', '##ie', 'so', '##mon', 'and', 'tri', '##sti', '##an', 'g', '##llo', '[SEP]', 'did', '##ier', 'sa', '##um', '##on', '[SEP]', 'astro', '##no', '##mie', '[SEP]', 'tristan', 'gui', '##llo', '##t', '[SEP]', ..., '[SEP]', 'mercy', '[SEP]'] + """ + + labels_mask = [] + labels = [] + if tags is None: + hyp_tokens, token_start_indices = self._split_to_wordpieces(hyp.split()) + else: + hyp_tokens, labels, token_start_indices = self._split_to_wordpieces_with_labels(hyp.split(), tags) + references = ref.split(";") + all_ref_tokens = [] + all_ref_segment_ids = [] + for i in range(len(references)): + ref_tokens, _ = self._split_to_wordpieces(references[i].split()) + all_ref_tokens.extend(ref_tokens + ["[SEP]"]) + all_ref_segment_ids.extend([i + 1] * (len(ref_tokens) + 1)) + + input_tokens = ["[CLS]"] + hyp_tokens + ["[SEP]"] + all_ref_tokens # ends with [SEP] + input_ids = self._tokenizer.convert_tokens_to_ids(input_tokens) + input_mask = [1] * len(input_ids) + segment_ids = [0] + [0] * len(hyp_tokens) + [0] + all_ref_segment_ids + if len(input_ids) != len(segment_ids): + raise ValueError( + "len(input_ids)=" + + str(len(input_ids)) + + " is different from len(segment_ids)=" + + str(len(segment_ids)) + ) + + if tags: + labels_mask = [0] + [1] * len(labels) + [0] + [0] * len(all_ref_tokens) + labels = [0] + labels + [0] + [0] * len(all_ref_tokens) + return (input_ids, input_mask, segment_ids, labels_mask, labels, hyp_tokens, token_start_indices) + + def _split_to_wordpieces_with_labels( + self, tokens: List[str], labels: List[int] + ) -> Tuple[List[str], List[int], List[int]]: + """Splits tokens (and the labels accordingly) to WordPieces. + + Args: + tokens: Tokens to be split. + labels: Labels (one per token) to be split. + + Returns: + 3-tuple with the split tokens, split labels, and the indices of starting tokens of words + """ + bert_tokens = [] # Original tokens split into wordpieces. + bert_labels = [] # Label for each wordpiece. + # Index of each wordpiece that starts a new token. + token_start_indices = [] + for i, token in enumerate(tokens): + # '+ 1' is because bert_tokens will be prepended by [CLS] token later. + token_start_indices.append(len(bert_tokens) + 1) + pieces = self._tokenizer.tokenize(token) + bert_tokens.extend(pieces) + bert_labels.extend([labels[i]] * len(pieces)) + return bert_tokens, bert_labels, token_start_indices + + def _split_to_wordpieces(self, tokens: List[str]) -> Tuple[List[str], List[int]]: + """Splits tokens to WordPieces. + + Args: + tokens: Tokens to be split. + + Returns: + tuple with the split tokens, and the indices of the WordPieces that start a token. + """ + bert_tokens = [] # Original tokens split into wordpieces. + # Index of each wordpiece that starts a new token. + token_start_indices = [] + for i, token in enumerate(tokens): + # '+ 1' is because bert_tokens will be prepended by [CLS] token later. + token_start_indices.append(len(bert_tokens) + 1) + pieces = self._tokenizer.tokenize(token) + bert_tokens.extend(pieces) + return bert_tokens, token_start_indices + + def read_input_file( + self, input_filename: str, infer: bool = False + ) -> Union[List['BertExample'], Tuple[List['BertExample'], Tuple[str, str]]]: + """Reads in Tab Separated Value file and converts to training/inference-ready examples. + + Args: + example_builder: Instance of BertExampleBuilder + input_filename: Path to the TSV input file. + infer: If true, input examples do not contain target info. + + Returns: + examples: List of converted examples (BertExample). + or + (examples, hyps_refs): If infer==true, returns h + """ + + if not path.exists(input_filename): + raise ValueError("Cannot find file: " + input_filename) + examples = [] # output list of BertExample + hyps_refs = [] # output list of tuples (ASR-hypothesis, candidate_str) + with open(input_filename, 'r') as f: + for line in f: + if len(examples) % 1000 == 0: + logging.info("{} examples processed.".format(len(examples))) + if infer: + parts = line.rstrip('\n').split('\t') + hyp, ref, target, span_info = parts[0], parts[1], None, None + if len(parts) == 4: + target, span_info = parts[2], parts[3] + try: + example = self.build_bert_example(hyp, ref, target=target, span_info=span_info, infer=infer) + except Exception as e: + logging.warning(str(e)) + logging.warning(line) + continue + if example is None: + logging.info("cannot create example: ") + logging.info(line) + continue + hyps_refs.append((hyp, ref)) + examples.append(example) + else: + hyp, ref, target, semiotic_info = line.rstrip('\n').split('\t') + try: + example = self.build_bert_example( + hyp, ref, target=target, span_info=semiotic_info, infer=infer + ) + except Exception as e: + logging.warning(str(e)) + logging.warning(line) + continue + if example is None: + logging.info("cannot create example: ") + logging.info(line) + continue + examples.append(example) + logging.info(f"Done. {len(examples)} examples converted.") + if infer: + return examples, hyps_refs + return examples diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py b/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py new file mode 100644 index 000000000000..69705ec21b9d --- /dev/null +++ b/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py @@ -0,0 +1,521 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pickle +from io import BytesIO +from typing import Dict, List, Optional, Tuple + +import braceexpand +import numpy as np +import torch +import webdataset as wd + +from nemo.collections.nlp.data.spellchecking_asr_customization.bert_example import BertExampleBuilder +from nemo.core.classes.dataset import Dataset, IterableDataset +from nemo.core.neural_types import ChannelType, IntType, LabelsType, MaskType, NeuralType +from nemo.utils import logging + +__all__ = [ + "SpellcheckingAsrCustomizationDataset", + "SpellcheckingAsrCustomizationTestDataset", + "TarredSpellcheckingAsrCustomizationDataset", +] + + +def collate_train_dataset( + batch: List[ + Tuple[ + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + ] + ], + pad_token_id: int, +) -> Tuple[ + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, +]: + """collate batch of training items + Args: + batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans). + pad_token_id: integer id of padding token (to use in padded_input_ids, padded_input_ids_for_subwords) + """ + max_length = 0 + max_length_for_subwords = 0 + max_length_for_spans = 1 # to avoid empty tensor + for ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) in batch: + if len(input_ids) > max_length: + max_length = len(input_ids) + if len(input_ids_for_subwords) > max_length_for_subwords: + max_length_for_subwords = len(input_ids_for_subwords) + if len(spans) > max_length_for_spans: + max_length_for_spans = len(spans) + + padded_input_ids = [] + padded_input_mask = [] + padded_segment_ids = [] + padded_input_ids_for_subwords = [] + padded_input_mask_for_subwords = [] + padded_segment_ids_for_subwords = [] + padded_character_pos_to_subword_pos = [] + padded_labels_mask = [] + padded_labels = [] + padded_spans = [] + for ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) in batch: + if len(input_ids) < max_length: + pad_length = max_length - len(input_ids) + padded_input_ids.append(np.pad(input_ids, pad_width=[0, pad_length], constant_values=pad_token_id)) + padded_input_mask.append(np.pad(input_mask, pad_width=[0, pad_length], constant_values=0)) + padded_segment_ids.append(np.pad(segment_ids, pad_width=[0, pad_length], constant_values=0)) + padded_labels_mask.append(np.pad(labels_mask, pad_width=[0, pad_length], constant_values=0)) + padded_labels.append(np.pad(labels, pad_width=[0, pad_length], constant_values=0)) + padded_character_pos_to_subword_pos.append( + np.pad(character_pos_to_subword_pos, pad_width=[0, pad_length], constant_values=0) + ) + else: + padded_input_ids.append(input_ids) + padded_input_mask.append(input_mask) + padded_segment_ids.append(segment_ids) + padded_labels_mask.append(labels_mask) + padded_labels.append(labels) + padded_character_pos_to_subword_pos.append(character_pos_to_subword_pos) + + if len(input_ids_for_subwords) < max_length_for_subwords: + pad_length = max_length_for_subwords - len(input_ids_for_subwords) + padded_input_ids_for_subwords.append( + np.pad(input_ids_for_subwords, pad_width=[0, pad_length], constant_values=pad_token_id) + ) + padded_input_mask_for_subwords.append( + np.pad(input_mask_for_subwords, pad_width=[0, pad_length], constant_values=0) + ) + padded_segment_ids_for_subwords.append( + np.pad(segment_ids_for_subwords, pad_width=[0, pad_length], constant_values=0) + ) + else: + padded_input_ids_for_subwords.append(input_ids_for_subwords) + padded_input_mask_for_subwords.append(input_mask_for_subwords) + padded_segment_ids_for_subwords.append(segment_ids_for_subwords) + + if len(spans) < max_length_for_spans: + padded_spans.append(np.ones((max_length_for_spans, 3), dtype=int) * -1) # pad value is [-1, -1, -1] + if len(spans) > 0: + padded_spans[-1][: spans.shape[0], : spans.shape[1]] = spans # copy actual spans to the beginning + else: + padded_spans.append(spans) + + return ( + torch.LongTensor(np.array(padded_input_ids)), + torch.LongTensor(np.array(padded_input_mask)), + torch.LongTensor(np.array(padded_segment_ids)), + torch.LongTensor(np.array(padded_input_ids_for_subwords)), + torch.LongTensor(np.array(padded_input_mask_for_subwords)), + torch.LongTensor(np.array(padded_segment_ids_for_subwords)), + torch.LongTensor(np.array(padded_character_pos_to_subword_pos)), + torch.LongTensor(np.array(padded_labels_mask)), + torch.LongTensor(np.array(padded_labels)), + torch.LongTensor(np.array(padded_spans)), + ) + + +def collate_test_dataset( + batch: List[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]], + pad_token_id: int, +) -> Tuple[ + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, + torch.LongTensor, +]: + """collate batch of test items + Args: + batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, fragment_indices). + pad_token_id: integer id of padding token (to use in padded_input_ids, padded_input_ids_for_subwords) + """ + max_length = 0 + max_length_for_subwords = 0 + max_length_for_fragment_indices = 1 # to avoid empty tensor + for ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + fragment_indices, + ) in batch: + if len(input_ids) > max_length: + max_length = len(input_ids) + if len(input_ids_for_subwords) > max_length_for_subwords: + max_length_for_subwords = len(input_ids_for_subwords) + if len(fragment_indices) > max_length_for_fragment_indices: + max_length_for_fragment_indices = len(fragment_indices) + + padded_input_ids = [] + padded_input_mask = [] + padded_segment_ids = [] + padded_input_ids_for_subwords = [] + padded_input_mask_for_subwords = [] + padded_segment_ids_for_subwords = [] + padded_character_pos_to_subword_pos = [] + padded_fragment_indices = [] + for ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + fragment_indices, + ) in batch: + if len(input_ids) < max_length: + pad_length = max_length - len(input_ids) + padded_input_ids.append(np.pad(input_ids, pad_width=[0, pad_length], constant_values=pad_token_id)) + padded_input_mask.append(np.pad(input_mask, pad_width=[0, pad_length], constant_values=0)) + padded_segment_ids.append(np.pad(segment_ids, pad_width=[0, pad_length], constant_values=0)) + padded_character_pos_to_subword_pos.append( + np.pad(character_pos_to_subword_pos, pad_width=[0, pad_length], constant_values=0) + ) + else: + padded_input_ids.append(input_ids) + padded_input_mask.append(input_mask) + padded_segment_ids.append(segment_ids) + padded_character_pos_to_subword_pos.append(character_pos_to_subword_pos) + + if len(input_ids_for_subwords) < max_length_for_subwords: + pad_length = max_length_for_subwords - len(input_ids_for_subwords) + padded_input_ids_for_subwords.append( + np.pad(input_ids_for_subwords, pad_width=[0, pad_length], constant_values=pad_token_id) + ) + padded_input_mask_for_subwords.append( + np.pad(input_mask_for_subwords, pad_width=[0, pad_length], constant_values=0) + ) + padded_segment_ids_for_subwords.append( + np.pad(segment_ids_for_subwords, pad_width=[0, pad_length], constant_values=0) + ) + else: + padded_input_ids_for_subwords.append(input_ids_for_subwords) + padded_input_mask_for_subwords.append(input_mask_for_subwords) + padded_segment_ids_for_subwords.append(segment_ids_for_subwords) + + if len(fragment_indices) < max_length_for_fragment_indices: + # we use [0, 1, 0] as padding value for fragment_indices, it corresponds to [CLS] token, which is ignored and won't affect anything + p = np.zeros((max_length_for_fragment_indices, 3), dtype=int) + p[:, 1] = 1 + p[:, 2] = 0 + padded_fragment_indices.append(p) + if len(fragment_indices) > 0: + padded_fragment_indices[-1][ + : fragment_indices.shape[0], : fragment_indices.shape[1] + ] = fragment_indices # copy actual fragment_indices to the beginning + else: + padded_fragment_indices.append(fragment_indices) + + return ( + torch.LongTensor(np.array(padded_input_ids)), + torch.LongTensor(np.array(padded_input_mask)), + torch.LongTensor(np.array(padded_segment_ids)), + torch.LongTensor(np.array(padded_input_ids_for_subwords)), + torch.LongTensor(np.array(padded_input_mask_for_subwords)), + torch.LongTensor(np.array(padded_segment_ids_for_subwords)), + torch.LongTensor(np.array(padded_character_pos_to_subword_pos)), + torch.LongTensor(np.array(padded_fragment_indices)), + ) + + +class SpellcheckingAsrCustomizationDataset(Dataset): + """ + Dataset as used by the SpellcheckingAsrCustomizationModel for training and validation pipelines. + + Args: + input_file (str): path to tsv-file with data + example_builder: instance of BertExampleBuilder + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + return { + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), MaskType()), + "segment_ids": NeuralType(('B', 'T'), ChannelType()), + "input_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()), + "input_mask_for_subwords": NeuralType(('B', 'T'), MaskType()), + "segment_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()), + "character_pos_to_subword_pos": NeuralType(('B', 'T'), ChannelType()), + "labels_mask": NeuralType(('B', 'T'), MaskType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "spans": NeuralType(('B', 'T', 'C'), IntType()), + } + + def __init__(self, input_file: str, example_builder: BertExampleBuilder) -> None: + self.example_builder = example_builder + self.examples = self.example_builder.read_input_file(input_file, infer=False) + self.pad_token_id = self.example_builder._pad_id + + def __len__(self): + return len(self.examples) + + def __getitem__(self, idx: int): + example = self.examples[idx] + input_ids = np.array(example.features["input_ids"], dtype=np.int16) + input_mask = np.array(example.features["input_mask"], dtype=np.int8) + segment_ids = np.array(example.features["segment_ids"], dtype=np.int8) + input_ids_for_subwords = np.array(example.features["input_ids_for_subwords"], dtype=np.int16) + input_mask_for_subwords = np.array(example.features["input_mask_for_subwords"], dtype=np.int8) + segment_ids_for_subwords = np.array(example.features["segment_ids_for_subwords"], dtype=np.int8) + character_pos_to_subword_pos = np.array(example.features["character_pos_to_subword_pos"], dtype=np.int16) + labels_mask = np.array(example.features["labels_mask"], dtype=np.int8) + labels = np.array(example.features["labels"], dtype=np.int8) + spans = np.array(example.features["spans"], dtype=np.int16) + return ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) + + def _collate_fn(self, batch): + """collate batch of items + Args: + batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans). + """ + return collate_train_dataset(batch, pad_token_id=self.pad_token_id) + + +class TarredSpellcheckingAsrCustomizationDataset(IterableDataset): + """ + This Dataset loads training examples from tarred tokenized pickle files. + If using multiple processes the number of shards should be divisible by the number of workers to ensure an + even split among workers. If it is not divisible, logging will give a warning but training will proceed. + Additionally, please note that the len() of this DataLayer is assumed to be the number of tokens + of the text data. Shard strategy is scatter - each node gets a unique set of shards, which are permanently + pre-allocated and never changed at runtime. + Args: + text_tar_filepaths: a string (can be brace-expandable). + shuffle_n (int): How many samples to look ahead and load to be shuffled. + See WebDataset documentation for more details. + Defaults to 0. + global_rank (int): Worker rank, used for partitioning shards. Defaults to 0. + world_size (int): Total number of processes, used for partitioning shards. Defaults to 1. + pad_token_id: id of pad token (used in collate_fn) + """ + + def __init__( + self, + text_tar_filepaths: str, + shuffle_n: int = 1, + global_rank: int = 0, + world_size: int = 1, + pad_token_id: int = -1, # use real value or get error + ): + super(TarredSpellcheckingAsrCustomizationDataset, self).__init__() + if pad_token_id < 0: + raise ValueError("use non-negative pad_token_id: " + str(pad_token_id)) + + self.pad_token_id = pad_token_id + + # Replace '(', '[', '<' and '_OP_' with '{' + brace_keys_open = ['(', '[', '<', '_OP_'] + for bkey in brace_keys_open: + if bkey in text_tar_filepaths: + text_tar_filepaths = text_tar_filepaths.replace(bkey, "{") + + # Replace ')', ']', '>' and '_CL_' with '}' + brace_keys_close = [')', ']', '>', '_CL_'] + for bkey in brace_keys_close: + if bkey in text_tar_filepaths: + text_tar_filepaths = text_tar_filepaths.replace(bkey, "}") + + # Brace expand + text_tar_filepaths = list(braceexpand.braceexpand(text_tar_filepaths)) + + logging.info("Tarred dataset shards will be scattered evenly across all nodes.") + if len(text_tar_filepaths) % world_size != 0: + logging.warning( + f"Number of shards in tarred dataset ({len(text_tar_filepaths)}) is not divisible " + f"by number of distributed workers ({world_size}). " + f"Some shards will not be used ({len(text_tar_filepaths) % world_size})." + ) + begin_idx = (len(text_tar_filepaths) // world_size) * global_rank + end_idx = begin_idx + (len(text_tar_filepaths) // world_size) + logging.info('Begin Index : %d' % (begin_idx)) + logging.info('End Index : %d' % (end_idx)) + text_tar_filepaths = text_tar_filepaths[begin_idx:end_idx] + logging.info( + "Partitioning tarred dataset: process (%d) taking shards [%d, %d)", global_rank, begin_idx, end_idx + ) + + self.tarpath = text_tar_filepaths + + # Put together WebDataset + self._dataset = wd.WebDataset(urls=text_tar_filepaths, nodesplitter=None) + + if shuffle_n > 0: + self._dataset = self._dataset.shuffle(shuffle_n, initial=shuffle_n) + else: + logging.info("WebDataset will not shuffle files within the tar files.") + + self._dataset = self._dataset.rename(pkl='pkl', key='__key__').to_tuple('pkl', 'key').map(f=self._build_sample) + + def _build_sample(self, fname): + # Load file + pkl_file, _ = fname + pkl_file = BytesIO(pkl_file) + data = pickle.load(pkl_file) + pkl_file.close() + input_ids = data["input_ids"] + input_mask = data["input_mask"] + segment_ids = data["segment_ids"] + input_ids_for_subwords = data["input_ids_for_subwords"] + input_mask_for_subwords = data["input_mask_for_subwords"] + segment_ids_for_subwords = data["segment_ids_for_subwords"] + character_pos_to_subword_pos = data["character_pos_to_subword_pos"] + labels_mask = data["labels_mask"] + labels = data["labels"] + spans = data["spans"] + + return ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) + + def __iter__(self): + return self._dataset.__iter__() + + def _collate_fn(self, batch): + """collate batch of items + Args: + batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans). + """ + return collate_train_dataset(batch, pad_token_id=self.pad_token_id) + + +class SpellcheckingAsrCustomizationTestDataset(Dataset): + """ + Dataset for inference pipeline. + + Args: + sents: list of strings + example_builder: instance of BertExampleBuilder + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + return { + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), MaskType()), + "segment_ids": NeuralType(('B', 'T'), ChannelType()), + "input_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()), + "input_mask_for_subwords": NeuralType(('B', 'T'), MaskType()), + "segment_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()), + "character_pos_to_subword_pos": NeuralType(('B', 'T'), ChannelType()), + "fragment_indices": NeuralType(('B', 'T', 'C'), IntType()), + } + + def __init__(self, input_file: str, example_builder: BertExampleBuilder) -> None: + self.example_builder = example_builder + self.examples, self.hyps_refs = self.example_builder.read_input_file(input_file, infer=True) + self.pad_token_id = self.example_builder._pad_id + + def __len__(self): + return len(self.examples) + + def __getitem__(self, idx: int): + example = self.examples[idx] + input_ids = np.array(example.features["input_ids"]) + input_mask = np.array(example.features["input_mask"]) + segment_ids = np.array(example.features["segment_ids"]) + input_ids_for_subwords = np.array(example.features["input_ids_for_subwords"]) + input_mask_for_subwords = np.array(example.features["input_mask_for_subwords"]) + segment_ids_for_subwords = np.array(example.features["segment_ids_for_subwords"]) + character_pos_to_subword_pos = np.array(example.features["character_pos_to_subword_pos"], dtype=np.int64) + fragment_indices = np.array(example.features["fragment_indices"], dtype=np.int16) + return ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + fragment_indices, + ) + + def _collate_fn(self, batch): + """collate batch of items + Args: + batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos). + """ + return collate_test_dataset(batch, pad_token_id=self.pad_token_id) diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py new file mode 100644 index 000000000000..7385f19b414a --- /dev/null +++ b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py @@ -0,0 +1,929 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import math +import random +import re +from collections import defaultdict, namedtuple +from typing import Dict, List, Set, Tuple, Union + +import numpy as np +from numba import jit + +"""Utility functions for Spellchecking ASR Customization.""" + + +def replace_diacritics(text): + text = re.sub(r"[éèëēêęěė]", "e", text) # latin + text = re.sub(r"[ё]", "е", text) # cyrillic + text = re.sub(r"[ãâāáäăàąåạảǎ]", "a", text) + text = re.sub(r"[úūüùưûů]", "u", text) + text = re.sub(r"[ôōóöõòőø]", "o", text) + text = re.sub(r"[ćçč]", "c", text) + text = re.sub(r"[ïīíîıì]", "i", text) + text = re.sub(r"[ñńňņ]", "n", text) + text = re.sub(r"[țťţ]", "t", text) + text = re.sub(r"[łľļ]", "l", text) + text = re.sub(r"[żžź]", "z", text) + text = re.sub(r"[ğ]", "g", text) + text = re.sub(r"[ďđ]", "d", text) + text = re.sub(r"[ķ]", "k", text) + text = re.sub(r"[ř]", "r", text) + text = re.sub(r"[ý]", "y", text) + text = re.sub(r"[æ]", "ae", text) + text = re.sub(r"[œ]", "oe", text) + text = re.sub(r"[șşšś]", "s", text) + return text + + +def load_ngram_mappings(input_name: str, max_misspelled_freq: int = 1000000000) -> Tuple[defaultdict, Set]: + """Loads n-gram mapping vocabularies in form required by dynamic programming + Args: + input_name: file with n-gram mappings + max_misspelled_freq: threshold on misspelled n-gram frequency + Returns: + vocab: dict {key=original_ngram, value=dict{key=misspelled_ngram, value=frequency}} + ban_ngram: set of banned misspelled n-grams + + Input format: + u t o u+i t o 49 8145 114 + u t o t e 63 8145 16970 + u t o o+_ t o 42 8145 1807 + """ + vocab = defaultdict(dict) + ban_ngram = set() + + with open(input_name, "r", encoding="utf-8") as f: + for line in f: + orig, misspelled, joint_freq, orig_freq, misspelled_freq = line.strip().split("\t") + if orig == "" or misspelled == "": + raise ValueError("Empty n-gram: orig=" + orig + "; misspelled=" + misspelled) + misspelled = misspelled.replace("", "=") + if misspelled.replace("=", "").strip() == "": # skip if resulting ngram doesn't contain any real character + continue + if int(misspelled_freq) > max_misspelled_freq: + ban_ngram.add(misspelled + " ") # space at the end is required within get_index function + vocab[orig][misspelled] = int(joint_freq) / int(orig_freq) + return vocab, ban_ngram + + +def load_ngram_mappings_for_dp(input_name: str) -> Tuple[defaultdict, defaultdict, defaultdict, int]: + """Loads n-gram mapping vocabularies in form required by dynamic programming + Args: + input_name: file with n-gram mappings + Returns: + joint_vocab: dict where key=(original_ngram, misspelled_ngram), value=frequency + orig_vocab: dict where key=original_ngram, value=frequency + misspelled_vocab: dict where key=misspelled_ngram, value=frequency + max_len: maximum n-gram length seen in vocabulary + + Input format: original \t misspelled \t joint_freq \t original_freq \t misspelled_freq + u t o u+i t o 49 8145 114 + u t o t e 63 8145 16970 + u t o o+_ t o 42 8145 1807 + """ + joint_vocab = defaultdict(int) + orig_vocab = defaultdict(int) + misspelled_vocab = defaultdict(int) + max_len = 0 + with open(input_name, "r", encoding="utf-8") as f: + for line in f: + orig, misspelled, joint_freq, _, _ = line.strip().split("\t") + if orig == "" or misspelled == "": + raise ValueError("Emty n-gram: orig=" + orig + "; misspelled=" + misspelled) + misspelled = misspelled.replace("", " ").replace("+", " ") + misspelled = " ".join(misspelled.split()) + if misspelled == "": # skip if resulting ngram doesn't contain any real character + continue + max_len = max(max_len, orig.count(" ") + 1, misspelled.count(" ") + 1) + joint_vocab[(orig, misspelled)] += int(joint_freq) + orig_vocab[orig] += int(joint_freq) + misspelled_vocab[misspelled] += int(joint_freq) + return joint_vocab, orig_vocab, misspelled_vocab, max_len + + +def get_alignment_by_dp( + ref_phrase: str, hyp_phrase: str, dp_data: Tuple[defaultdict, defaultdict, defaultdict, int] +) -> List[Tuple[str, str, float, float, int, int, int]]: + """Get best alignment path between a reference and (possibly) misspelled phrase using n-gram mappings vocabulary. + Args: + ref_phrase: candidate reference phrase (letters separated by space, real space replaced by underscore) + hyp_phrase: (possibly) misspelled phrase (letters separated by space, real space replaced by underscore) + dp_data: n-gram mapping vocabularies used by dynamic programming + Returns: + list of tuples (hyp_ngram, ref_ngram, logprob, sum_logprob, joint_freq, orig_freq, misspelled_freq) + This is best alignment path. + + Example: + ref_phrase: "a n h y d r i d e" + hyp_phrase: "a n d _ h y d r o d" + + Result: + [("*", "*", 0.0, 0.0, 0, 0, 0) + ("a n d _ h", "a n h", -2.34, -2.34, 226, 2338, 2203) + ("y d r o", "y d r i", -2.95, -5.29, 11, 211, 1584) + ("d", "d e", -1.99, -7.28, 60610, 444714, 2450334) + ] + Final path score is in path[-1][3]: -7.28 + Note that the order of ref_phrase and hyp_phrase matters, because n-gram mappings vocabulary is not symmetrical. + """ + joint_vocab, orig_vocab, misspelled_vocab, max_len = dp_data + hyp_letters = ["*"] + hyp_phrase.split() + ref_letters = ["*"] + ref_phrase.split() + DpInfo = namedtuple( + "DpInfo", ["hyp_pos", "ref_pos", "best_hyp_ngram_len", "best_ref_ngram_len", "score", "sum_score"] + ) + history = defaultdict(DpInfo) + history[(0, 0)] = DpInfo( + hyp_pos=0, ref_pos=0, best_hyp_ngram_len=1, best_ref_ngram_len=1, score=0.0, sum_score=0.0 + ) + for hyp_pos in range(len(hyp_letters)): + for ref_pos in range(len(ref_letters)): + if hyp_pos == 0 and ref_pos == 0: # cell (0, 0) is already defined + continue + # consider cell (hyp_pos, ref_pos) and find best path to get there + best_hyp_ngram_len = 0 + best_ref_ngram_len = 0 + best_ngram_score = float("-inf") + best_sum_score = float("-inf") + # loop over paths ending on non-empty ngram mapping + for hyp_ngram_len in range(1, 1 + min(max_len, hyp_pos + 1)): + hyp_ngram = " ".join(hyp_letters[(hyp_pos - hyp_ngram_len + 1) : (hyp_pos + 1)]) + for ref_ngram_len in range(1, 1 + min(max_len, ref_pos + 1)): + ref_ngram = " ".join(ref_letters[(ref_pos - ref_ngram_len + 1) : (ref_pos + 1)]) + if (ref_ngram, hyp_ngram) not in joint_vocab: + continue + joint_freq = joint_vocab[(ref_ngram, hyp_ngram)] + orig_freq = orig_vocab.get(ref_ngram, 1) + ngram_score = math.log(joint_freq / orig_freq) + previous_cell = (hyp_pos - hyp_ngram_len, ref_pos - ref_ngram_len) + if previous_cell not in history: + print("cell ", previous_cell, "does not exist") + continue + previous_score = history[previous_cell].sum_score + sum_score = ngram_score + previous_score + if sum_score > best_sum_score: + best_sum_score = sum_score + best_ngram_score = ngram_score + best_hyp_ngram_len = hyp_ngram_len + best_ref_ngram_len = ref_ngram_len + # loop over two variants with deletion of one character + deletion_score = -6.0 + insertion_score = -6.0 + if hyp_pos > 0: + previous_cell = (hyp_pos - 1, ref_pos) + previous_score = history[previous_cell].sum_score + sum_score = deletion_score + previous_score + if sum_score > best_sum_score: + best_sum_score = sum_score + best_ngram_score = deletion_score + best_hyp_ngram_len = 1 + best_ref_ngram_len = 0 + + if ref_pos > 0: + previous_cell = (hyp_pos, ref_pos - 1) + previous_score = history[previous_cell].sum_score + sum_score = insertion_score + previous_score + if sum_score > best_sum_score: + best_sum_score = sum_score + best_ngram_score = insertion_score + best_hyp_ngram_len = 0 + best_ref_ngram_len = 1 + + if best_hyp_ngram_len == 0 and best_ref_ngram_len == 0: + raise ValueError("best_hyp_ngram_len = 0 and best_ref_ngram_len = 0") + + # save cell to history + history[(hyp_pos, ref_pos)] = DpInfo( + hyp_pos=hyp_pos, + ref_pos=ref_pos, + best_hyp_ngram_len=best_hyp_ngram_len, + best_ref_ngram_len=best_ref_ngram_len, + score=best_ngram_score, + sum_score=best_sum_score, + ) + # now trace back on best path starting from last positions + path = [] + hyp_pos = len(hyp_letters) - 1 + ref_pos = len(ref_letters) - 1 + cell_info = history[(hyp_pos, ref_pos)] + path.append(cell_info) + while hyp_pos > 0 or ref_pos > 0: + hyp_pos -= cell_info.best_hyp_ngram_len + ref_pos -= cell_info.best_ref_ngram_len + cell_info = history[(hyp_pos, ref_pos)] + path.append(cell_info) + + result = [] + for info in reversed(path): + hyp_ngram = " ".join(hyp_letters[(info.hyp_pos - info.best_hyp_ngram_len + 1) : (info.hyp_pos + 1)]) + ref_ngram = " ".join(ref_letters[(info.ref_pos - info.best_ref_ngram_len + 1) : (info.ref_pos + 1)]) + joint_freq = joint_vocab.get((ref_ngram, hyp_ngram), 0) + orig_freq = orig_vocab.get(ref_ngram, 0) + misspelled_freq = misspelled_vocab.get(hyp_ngram, 0) + result.append((hyp_ngram, ref_ngram, info.score, info.sum_score, joint_freq, orig_freq, misspelled_freq)) + return result + + +def get_index( + custom_phrases: List[str], + vocab: defaultdict, + ban_ngram_global: Set[str], + min_log_prob: float = -4.0, + max_phrases_per_ngram: int = 100, +) -> Tuple[List[str], Dict[str, List[Tuple[int, int, int, float]]]]: + """Given a restricted vocabulary of replacements, + loops through custom phrases, + generates all possible conversions and creates index. + + Args: + custom_phrases: list of all custom phrases, characters should be split by space, real space replaced to underscore. + vocab: n-gram mappings vocabulary - dict {key=original_ngram, value=dict{key=misspelled_ngram, value=frequency}} + ban_ngram_global: set of banned misspelled n-grams + min_log_prob: minimum log probability, after which we stop growing this n-gram. + max_phrases_per_ngram: maximum phrases that we allow to store per one n-gram. N-grams exceeding that quantity get banned. + + Returns: + phrases - list of phrases. Position in this list is used as phrase_id. + ngram2phrases - resulting index, i.e. dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob) + """ + + ban_ngram_local = set() # these ngrams are banned only for given custom_phrases + ngram_to_phrase_and_position = defaultdict(list) + + for custom_phrase in custom_phrases: + inputs = custom_phrase.split(" ") + begin = 0 + index_keys = [{} for _ in inputs] # key - letter ngram, index - beginning positions in phrase + + for begin in range(len(inputs)): + for end in range(begin + 1, min(len(inputs) + 1, begin + 5)): + inp = " ".join(inputs[begin:end]) + if inp not in vocab: + continue + for rep in vocab[inp]: + lp = math.log(vocab[inp][rep]) + + for b in range(max(0, end - 5), end): # try to grow previous ngrams with new replacement + new_ngrams = {} + for ngram in index_keys[b]: + lp_prev = index_keys[b][ngram] + if len(ngram) + len(rep) <= 10 and b + ngram.count(" ") == begin: + if lp_prev + lp > min_log_prob: + new_ngrams[ngram + rep + " "] = lp_prev + lp + index_keys[b].update(new_ngrams) # join two dictionaries + # add current replacement as ngram + if lp > min_log_prob: + index_keys[begin][rep + " "] = lp + + for b in range(len(index_keys)): + for ngram, lp in sorted(index_keys[b].items(), key=lambda item: item[1], reverse=True): + if ngram in ban_ngram_global: # here ngram ends with a space + continue + real_length = ngram.count(" ") + ngram = ngram.replace("+", " ").replace("=", " ") + ngram = " ".join(ngram.split()) # here ngram doesn't end with a space anymore + if ngram + " " in ban_ngram_global: # this can happen after deletion of + and = + continue + if ngram in ban_ngram_local: + continue + ngram_to_phrase_and_position[ngram].append((custom_phrase, b, real_length, lp)) + if len(ngram_to_phrase_and_position[ngram]) > max_phrases_per_ngram: + ban_ngram_local.add(ngram) + del ngram_to_phrase_and_position[ngram] + continue + + phrases = [] # id to phrase + phrase2id = {} # phrase to id + ngram2phrases = defaultdict(list) # ngram to list of tuples (phrase_id, begin, length, logprob) + + for ngram in ngram_to_phrase_and_position: + for phrase, b, length, lp in ngram_to_phrase_and_position[ngram]: + if phrase not in phrase2id: + phrases.append(phrase) + phrase2id[phrase] = len(phrases) - 1 + ngram2phrases[ngram].append((phrase2id[phrase], b, length, lp)) + + return phrases, ngram2phrases + + +def load_index(input_name: str) -> Tuple[List[str], Dict[str, List[Tuple[int, int, int, float]]]]: + """ Load index from file + Args: + input_name: file with index + Returns: + phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id. + ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob) + """ + phrases = [] # id to phrase + phrase2id = {} # phrase to id + ngram2phrases = defaultdict(list) # ngram to list of tuples (phrase_id, begin_pos, size, logprob) + with open(input_name, "r", encoding="utf-8") as f: + for line in f: + ngram, phrase, b, size, lp = line.split("\t") + b = int(b) + size = int(size) + lp = float(lp) + if phrase not in phrase2id: + phrases.append(phrase) + phrase2id[phrase] = len(phrases) - 1 + ngram2phrases[ngram].append((phrase2id[phrase], b, size, lp)) + return phrases, ngram2phrases + + +def search_in_index( + ngram2phrases: Dict[str, List[Tuple[int, int, int, float]]], phrases: List[str], letters: Union[str, List[str]] +) -> Tuple[np.ndarray, List[Set[str]]]: + """ Function used to search in index + + Args: + ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob) + phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id. + letters: list of letters of ASR-hypothesis. Should not contain spaces - real spaces should be replaced with underscores. + + Returns: + phrases2positions: a matrix of size (len(phrases), len(letters)). + It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere. + It is used later to find phrases with many hits within a contiguous window - potential matching candidates. + position2ngrams: positions in ASR-hypothesis mapped to sets of ngrams starting from that position. + It is used later to check how well each found candidate is covered by n-grams (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered). + """ + + if " " in letters: + raise ValueError("letters should not contain space: " + str(letters)) + + phrases2positions = np.zeros((len(phrases), len(letters)), dtype=float) + # positions mapped to sets of ngrams starting from that position + position2ngrams = [set() for _ in range(len(letters))] + + begin = 0 + for begin in range(len(letters)): + for end in range(begin + 1, min(len(letters) + 1, begin + 7)): + ngram = " ".join(letters[begin:end]) + if ngram not in ngram2phrases: + continue + for phrase_id, b, size, lp in ngram2phrases[ngram]: + phrases2positions[phrase_id, begin:end] = 1.0 + position2ngrams[begin].add(ngram) + return phrases2positions, position2ngrams + + +@jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit +def get_all_candidates_coverage(phrases, phrases2positions): + """Get maximum hit coverage for each phrase - within a moving window of length of the phrase. + Args: + phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id. + phrases2positions: a matrix of size (len(phrases), len(ASR-hypothesis)). + It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere. + Returns: + candidate2coverage: list of size len(phrases) containing coverage (0.0 to 1.0) in best window. + candidate2position: list of size len(phrases) containing starting position of best window. + """ + candidate2coverage = [0.0] * len(phrases) + candidate2position = [-1] * len(phrases) + + for i in range(len(phrases)): + phrase_length = phrases[i].count(" ") + 1 + all_coverage = np.sum(phrases2positions[i]) / phrase_length + # if total coverage on whole ASR-hypothesis is too small, there is no sense in using moving window + if all_coverage < 0.4: + continue + moving_sum = np.sum(phrases2positions[i, 0:phrase_length]) + max_sum = moving_sum + best_pos = 0 + for pos in range(1, phrases2positions.shape[1] - phrase_length + 1): + moving_sum -= phrases2positions[i, pos - 1] + moving_sum += phrases2positions[i, pos + phrase_length - 1] + if moving_sum > max_sum: + max_sum = moving_sum + best_pos = pos + + coverage = max_sum / (phrase_length + 2) # smoothing + candidate2coverage[i] = coverage + candidate2position[i] = best_pos + return candidate2coverage, candidate2position + + +def get_candidates( + ngram2phrases: Dict[str, List[Tuple[int, int, int, float]]], + phrases: List[str], + letters: Union[str, List[str]], + pool_for_random_candidates: List[str], + min_phrase_coverage: float = 0.8, +) -> List[Tuple[str, int, int, float, float]]: + """Given an index of custom vocabulary and an ASR-hypothesis retrieve 10 candidates. + Args: + ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob) + phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id. + letters: list of letters of ASR-hypothesis. Should not contain spaces - real spaces should be replaced with underscores. + pool_for_random_candidates: large list of strings, from which to sample random candidates in case when there are less than 10 real candidates + min_phrase_coverage: We discard candidates which are not covered by n-grams to at least to this extent + (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered). + Returns: + candidates: list of tuples (candidate_text, approximate_begin_position, length, coverage of window in ASR-hypothesis, coverage of phrase itself). + """ + phrases2positions, position2ngrams = search_in_index(ngram2phrases, phrases, letters) + candidate2coverage, candidate2position = get_all_candidates_coverage(phrases, phrases2positions) + + # mask for each custom phrase, how many which symbols are covered by input ngrams + phrases2coveredsymbols = [[0 for x in phrases[i].split(" ")] for i in range(len(phrases))] + candidates = [] + k = 0 + for idx, coverage in sorted(enumerate(candidate2coverage), key=lambda item: item[1], reverse=True): + begin = candidate2position[idx] # this is most likely beginning of this candidate + phrase_length = phrases[idx].count(" ") + 1 + for pos in range(begin, begin + phrase_length): + # we do not know exact end of custom phrase in text, it can be different from phrase length + if pos >= len(position2ngrams): + break + for ngram in position2ngrams[pos]: + for phrase_id, b, size, lp in ngram2phrases[ngram]: + if phrase_id != idx: + continue + for ppos in range(b, b + size): + if ppos >= phrase_length: + break + phrases2coveredsymbols[phrase_id][ppos] = 1 + k += 1 + if k > 100: + break + real_coverage = sum(phrases2coveredsymbols[idx]) / len(phrases2coveredsymbols[idx]) + if real_coverage < min_phrase_coverage: + continue + candidates.append((phrases[idx], begin, phrase_length, coverage, real_coverage)) + + # no need to process this sentence further if it does not contain any real candidates + if len(candidates) == 0: + print("WARNING: no real candidates", candidates) + return [] + + while len(candidates) < 10: + dummy = random.choice(pool_for_random_candidates) + dummy = " ".join(list(dummy.replace(" ", "_"))) + candidates.append((dummy, -1, dummy.count(" ") + 1, 0.0, 0.0)) + + candidates = candidates[:10] + random.shuffle(candidates) + if len(candidates) != 10: + print("WARNING: cannot get 10 candidates", candidates) + return [] + + return candidates + + +def read_spellmapper_predictions(filename: str) -> List[Tuple[str, List[Tuple[int, int, str, float]], List[int]]]: + """Read results of SpellMapper inference from file. + Args: + filename: file with SpellMapper results + Returns: + list of tuples (sent, list of fragment predictions, list of letter predictions) + One fragment prediction is a tuple (begin, end, replacement_text, prob) + """ + results = [] + with open(filename, "r", encoding="utf-8") as f: + for line in f: + text, candidate_str, fragment_predictions_str, letter_predictions_str = line.strip().split("\t") + text = text.replace(" ", "").replace("_", " ") + candidate_str = candidate_str.replace(" ", "").replace("_", " ") + candidates = candidate_str.split(";") + letter_predictions = list(map(int, letter_predictions_str.split())) + if len(candidates) != 10: + raise IndexError("expect 10 candidates, got: ", len(candidates)) + if len(text) != len(letter_predictions): + raise IndexError("len(text)=", len(text), "; len(letter_predictions)=", len(letter_predictions)) + replacements = [] + if fragment_predictions_str != "": + for prediction in fragment_predictions_str.split(";"): + begin, end, candidate_id, prob = prediction.split(" ") + begin = int(begin) + end = int(end) + candidate_id = int(candidate_id) + prob = float(prob) + replacements.append((begin, end, candidates[candidate_id - 1], prob)) + replacements.sort() # it will sort by begin, then by end + results.append((text, replacements, letter_predictions)) + return results + + +def substitute_replacements_in_text( + text: str, replacements: List[Tuple[int, int, str, float]], replace_hyphen_to_space: bool +) -> str: + """Substitute replacements to the input text, iterating from end to beginning, so that indexing does not change. + Note that we expect intersecting replacements to be already filtered. + Args: + text: sentence; + replacements: list of replacements, each is a tuple (begin, end, text, probability); + replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces; + Returns: + corrected sentence + """ + replacements.sort() + last_begin = len(text) + 1 + corrected_text = text + for begin, end, candidate, prob in reversed(replacements): + if end > last_begin: + print("WARNING: skip intersecting replacement [", candidate, "] in text: ", text) + continue + if replace_hyphen_to_space: + candidate = candidate.replace("-", " ") + corrected_text = corrected_text[:begin] + candidate + corrected_text[end:] + last_begin = begin + return corrected_text + + +def apply_replacements_to_text( + text: str, + replacements: List[Tuple[int, int, str, float]], + min_prob: float = 0.5, + replace_hyphen_to_space: bool = False, + dp_data: Tuple[defaultdict, defaultdict, defaultdict, int] = None, + min_dp_score_per_symbol: float = -99.9, +) -> str: + """Filter and apply replacements to the input sentence. + Args: + text: input sentence; + replacements: list of proposed replacements (probably intersecting), each is a tuple (begin, end, text, probability); + min_prob: threshold on replacement probability; + replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces; + dp_data: n-gram mapping vocabularies used by dynamic programming, if None - dynamic programming is not used; + min_dp_score_per_symbol: threshold on dynamic programming sum score averaged by hypothesis length + Returns: + corrected sentence + """ + # sort replacements by positions + replacements.sort() + # filter replacements + # Note that we do not skip replacements with same text, otherwise intersecting candidates with lower probability can win + filtered_replacements = [] + for j in range(len(replacements)): + replacement = replacements[j] + begin, end, candidate, prob = replacement + fragment = text[begin:end] + candidate_spaced = " ".join(list(candidate.replace(" ", "_"))) + fragment_spaced = " ".join(list(fragment.replace(" ", "_"))) + # apply penalty if candidate length is bigger than fragment length + # to avoid cases like "forward-looking" replacing "looking" in "forward looking" resulting in "forward forward looking" + if len(candidate) > len(fragment): + penalty = len(fragment) / len(candidate) + prob *= penalty + # skip replacement with low probability + if prob < min_prob: + continue + # skip replacements with some predefined templates, e.g. "*'s" => "*s" + if check_banned_replacements(fragment, candidate): + continue + if dp_data is not None: + path = get_alignment_by_dp(candidate_spaced, fragment_spaced, dp_data) + # path[-1][3] is the sum of logprobs for best path of dynamic programming: divide sum_score by length + if path[-1][3] / (len(fragment)) < min_dp_score_per_symbol: + continue + + # skip replacement if it intersects with previous replacement and has lower probability, otherwise remove previous replacement + if len(filtered_replacements) > 0 and filtered_replacements[-1][1] > begin: + if filtered_replacements[-1][3] > prob: + continue + else: + filtered_replacements.pop() + filtered_replacements.append((begin, end, candidate, prob)) + + return substitute_replacements_in_text(text, filtered_replacements, replace_hyphen_to_space) + + +def update_manifest_with_spellmapper_corrections( + input_manifest_name: str, + short2full_name: str, + output_manifest_name: str, + spellmapper_results_name: str, + min_prob: float = 0.5, + replace_hyphen_to_space: bool = True, + field_name: str = "pred_text", + use_dp: bool = True, + ngram_mappings: Union[str, None] = None, + min_dp_score_per_symbol: float = -1.5, +) -> None: + """Post-process SpellMapper predictions and write corrected sentence to the specified field of nemo manifest. + The previous content of this field will be copied to "*_before_correction" field. + If the sentence was split into fragments before running SpellMapper, all replacements will be first gathered together and then applied to the original long sentence. + Args: + input_manifest_name: input nemo manifest; + short2full_name: text file with two columns: short_sent \t full_sent; + output_manifest_name: output nemo manifest; + spellmapper_results_name: text file with SpellMapper inference results; + min_prob: threshold on replacement probability; + replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces; + field_name: name of json field whose text we want to correct; + use_dp: bool = If True, additional replacement filtering will be applied using dynamic programming (works slow); + ngram_mappings: file with n-gram mappings, only needed if use_dp=True + min_dp_score_per_symbol: threshold on dynamic programming sum score averaged by hypothesis length + """ + short2full_sent = defaultdict(list) + sent2corrections = defaultdict(dict) + with open(short2full_name, "r", encoding="utf-8") as f: + for line in f: + s = line.strip() + short_sent, full_sent = s.split("\t") + short2full_sent[short_sent].append(full_sent) + sent2corrections[full_sent] = [] + + spellmapper_results = read_spellmapper_predictions(spellmapper_results_name) + dp_data = None + if use_dp: + dp_data = load_ngram_mappings_for_dp(ngram_mappings) + + for text, replacements, _ in spellmapper_results: + short_sent = text + if short_sent not in short2full_sent: + continue + # it can happen that one short sentence occurred in multiple full sentences + for full_sent in short2full_sent[short_sent]: + offset = full_sent.find(short_sent) + for begin, end, candidate, prob in replacements: + sent2corrections[full_sent].append((begin + offset, end + offset, candidate, prob)) + + out = open(output_manifest_name, "w", encoding="utf-8") + with open(input_manifest_name, "r", encoding="utf-8") as f: + for line in f: + record = json.loads(line.strip()) + sent = record[field_name] + record[field_name + "_before_correction"] = record[field_name] + if sent in sent2corrections: + record[field_name] = apply_replacements_to_text( + sent, + sent2corrections[sent], + min_prob=min_prob, + replace_hyphen_to_space=replace_hyphen_to_space, + dp_data=dp_data, + min_dp_score_per_symbol=min_dp_score_per_symbol, + ) + out.write(json.dumps(record) + "\n") + out.close() + + +def extract_and_split_text_from_manifest( + input_name: str, output_name: str, field_name: str = "pred_text", len_in_words: int = 16, step_in_words: int = 8 +) -> None: + """Extract text of the specified field in nemo manifest and split it into fragments (possibly with intersection). + The result is saved to a text file with two columns: short_sent \t full_sent. + This is useful if we want to process shorter sentences and then apply the results to the original long sentence. + Args: + input_name: input nemo manifest, + output_name: output text file, + field_name: name of json field from which we extract the sentence text, + len_in_words: maximum number of words in a fragment, + step_in_words: on how many words we move at each step. + For example, if the len_in_words=16 and step_in_words=8 the fragments will be intersected by half. + """ + short2full_sent = set() + with open(input_name, "r", encoding="utf-8") as f: + for line in f: + record = json.loads(line.strip()) + sent = record[field_name] + if " " in sent: + raise ValueError("found multiple space in: " + sent) + words = sent.split() + for i in range(0, len(words), step_in_words): + short_sent = " ".join(words[i : i + len_in_words]) + short2full_sent.add((short_sent, sent)) + + with open(output_name, "w", encoding="utf-8") as out: + for short_sent, full_sent in short2full_sent: + out.write(short_sent + "\t" + full_sent + "\n") + + +def check_banned_replacements(src: str, dst: str) -> bool: + """This function is used to check is a pair of words/phrases is matching some common template that we don't want to replace with one another. + Args: + src: first phrase + dst: second phrase + Returns True if this replacement should be banned. + """ + # customers' => customer's + if src.endswith("s'") and dst.endswith("'s") and src[0:-2] == dst[0:-2]: + return True + # customer's => customers' + if src.endswith("'s") and dst.endswith("s'") and src[0:-2] == dst[0:-2]: + return True + # customers => customer's + if src.endswith("s") and dst.endswith("'s") and src[0:-1] == dst[0:-2]: + return True + # customer's => customers + if src.endswith("'s") and dst.endswith("s") and src[0:-2] == dst[0:-1]: + return True + # customers => customers' + if src.endswith("s") and dst.endswith("s'") and src[0:-1] == dst[0:-2]: + return True + # customers' => customers + if src.endswith("s'") and dst.endswith("s") and src[0:-2] == dst[0:-1]: + return True + # utilities => utility's + if src.endswith("ies") and dst.endswith("y's") and src[0:-3] == dst[0:-3]: + return True + # utility's => utilities + if src.endswith("y's") and dst.endswith("ies") and src[0:-3] == dst[0:-3]: + return True + # utilities => utility + if src.endswith("ies") and dst.endswith("y") and src[0:-3] == dst[0:-1]: + return True + # utility => utilities + if src.endswith("y") and dst.endswith("ies") and src[0:-1] == dst[0:-3]: + return True + # group is => group's + if src.endswith(" is") and dst.endswith("'s") and src[0:-3] == dst[0:-2]: + return True + # group's => group is + if src.endswith("'s") and dst.endswith(" is") and src[0:-2] == dst[0:-3]: + return True + # trex's => trex + if src.endswith("'s") and src[0:-2] == dst: + return True + # trex => trex's + if dst.endswith("'s") and dst[0:-2] == src: + return True + # increases => increase (but trimass => trimas is ok) + if src.endswith("s") and (not src.endswith("ss")) and src[0:-1] == dst: + return True + # increase => increases ((but trimas => trimass is ok)) + if dst.endswith("s") and (not dst.endswith("ss")) and dst[0:-1] == src: + return True + # anticipate => anticipated + if src.endswith("e") and dst.endswith("ed") and src[0:-1] == dst[0:-2]: + return True + # anticipated => anticipate + if src.endswith("ed") and dst.endswith("e") and src[0:-2] == dst[0:-1]: + return True + # blocks => blocked + if src.endswith("s") and dst.endswith("ed") and src[0:-1] == dst[0:-2]: + return True + # blocked => blocks + if src.endswith("ed") and dst.endswith("s") and src[0:-2] == dst[0:-1]: + return True + # lives => lived + if src.endswith("es") and dst.endswith("ed") and src[0:-2] == dst[0:-2]: + return True + # lived => lives + if src.endswith("ed") and dst.endswith("es") and src[0:-2] == dst[0:-2]: + return True + # regarded => regard + if src.endswith("ed") and src[0:-2] == dst: + return True + # regard => regarded + if dst.endswith("ed") and dst[0:-2] == src: + return True + # regardeding => regard + if src.endswith("ing") and src[0:-3] == dst: + return True + # regard => regarding + if dst.endswith("ing") and dst[0:-3] == src: + return True + # longer => long + if src.endswith("er") and src[0:-2] == dst: + return True + # long => longer + if dst.endswith("er") and dst[0:-2] == src: + return True + # discussed => discussing + if src.endswith("ed") and dst.endswith("ing") and src[0:-2] == dst[0:-3]: + return True + # discussing => discussed + if src.endswith("ing") and dst.endswith("ed") and src[0:-3] == dst[0:-2]: + return True + # live => living + if src.endswith("e") and dst.endswith("ing") and src[0:-1] == dst[0:-3]: + return True + # living => live + if src.endswith("ing") and dst.endswith("e") and src[0:-3] == dst[0:-1]: + return True + # discussion => discussing + if src.endswith("ion") and dst.endswith("ing") and src[0:-3] == dst[0:-3]: + return True + # discussing => discussion + if src.endswith("ing") and dst.endswith("ion") and src[0:-3] == dst[0:-3]: + return True + # alignment => aligning + if src.endswith("ment") and dst.endswith("ing") and src[0:-4] == dst[0:-3]: + return True + # aligning => alignment + if src.endswith("ing") and dst.endswith("ment") and src[0:-3] == dst[0:-4]: + return True + # dispensers => dispensing + if src.endswith("ers") and dst.endswith("ing") and src[0:-3] == dst[0:-3]: + return True + # dispensing => dispensers + if src.endswith("ing") and dst.endswith("ers") and src[0:-3] == dst[0:-3]: + return True + # integrate => integrity + if src.endswith("ate") and dst.endswith("ity") and src[0:-3] == dst[0:-3]: + return True + # integrity => integrate + if src.endswith("ity") and dst.endswith("ate") and src[0:-3] == dst[0:-3]: + return True + # discussion => discussed + if src.endswith("ion") and dst.endswith("ed") and src[0:-3] == dst[0:-2]: + return True + # discussed => discussion + if src.endswith("ed") and dst.endswith("ion") and src[0:-2] == dst[0:-3]: + return True + # anticipation => anticipate + if src.endswith("ion") and dst.endswith("e") and src[0:-3] == dst[0:-1]: + return True + # anticipate => anticipation + if src.endswith("e") and dst.endswith("ion") and src[0:-1] == dst[0:-3]: + return True + # incremental => increment + if src.endswith("ntal") and dst.endswith("nt") and src[0:-4] == dst[0:-2]: + return True + # increment => incremental + if src.endswith("nt") and dst.endswith("ntal") and src[0:-2] == dst[0:-4]: + return True + # national => nation + if src.endswith("nal") and dst.endswith("n") and src[0:-3] == dst[0:-1]: + return True + # nation => national + if src.endswith("n") and dst.endswith("nal") and src[0:-1] == dst[0:-3]: + return True + # significantly => significant + if src.endswith("ntly") and dst.endswith("nt") and src[0:-4] == dst[0:-2]: + return True + # significant => significantly + if src.endswith("nt") and dst.endswith("ntly") and src[0:-2] == dst[0:-4]: + return True + # delivery => deliverer + if src.endswith("ery") and dst.endswith("erer") and src[0:-3] == dst[0:-4]: + return True + # deliverer => delivery + if src.endswith("erer") and dst.endswith("ery") and src[0:-4] == dst[0:-3]: + return True + # deliver => deliverer + if src.endswith("er") and dst.endswith("erer") and src[0:-2] == dst[0:-4]: + return True + # deliverer => deliver + if src.endswith("erer") and dst.endswith("er") and src[0:-4] == dst[0:-2]: + return True + # comparably => comparable + if src.endswith("bly") and dst.endswith("ble") and src[0:-3] == dst[0:-3]: + return True + # comparable => comparably + if src.endswith("ble") and dst.endswith("bly") and src[0:-3] == dst[0:-3]: + return True + # comparably => comparability + if src.endswith("bly") and dst.endswith("bility") and src[0:-3] == dst[0:-6]: + return True + # comparability => comparably + if src.endswith("bility") and dst.endswith("bly") and src[0:-6] == dst[0:-3]: + return True + # beautiful => beautifully + if src.endswith("l") and dst.endswith("lly") and src[0:-1] == dst[0:-3]: + return True + # beautifully => beautiful + if src.endswith("lly") and dst.endswith("l") and src[0:-3] == dst[0:-1]: + return True + # active => actively + if src.endswith("e") and dst.endswith("ely") and src[0:-1] == dst[0:-3]: + return True + # actively => active + if src.endswith("ely") and dst.endswith("e") and src[0:-3] == dst[0:-1]: + return True + # america => american + if src.endswith("a") and dst.endswith("an") and src[0:-1] == dst[0:-2]: + return True + # american => america + if src.endswith("an") and dst.endswith("a") and src[0:-2] == dst[0:-1]: + return True + # reinvesting => investing + if src.startswith("re") and src[2:] == dst: + return True + # investing => reinvesting + if dst.startswith("re") and dst[2:] == src: + return True + # unchanged => changed + if src.startswith("un") and src[2:] == dst: + return True + # changed => unchanged + if dst.startswith("un") and dst[2:] == src: + return True + # disrespected => respected + if src.startswith("dis") and src[3:] == dst: + return True + # respected => disrespected + if dst.startswith("dis") and dst[3:] == src: + return True + # outperformance => performance + if src.startswith("out") and src[3:] == dst: + return True + # performance => outperformance + if dst.startswith("out") and dst[3:] == src: + return True + return False diff --git a/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py b/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py index 253f7a41c703..9d5f5b7b23ad 100644 --- a/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py +++ b/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py @@ -17,6 +17,8 @@ from itertools import groupby from typing import Dict, List, Tuple +import numpy as np + """Utility functions for Thutmose Tagger.""" @@ -305,3 +307,197 @@ def get_src_and_dst_for_alignment( ) return written_str, spoken, " ".join(same_begin), " ".join(same_end) + + +def fill_alignment_matrix( + fline2: str, fline3: str, gline2: str, gline3: str +) -> Tuple[np.ndarray, List[str], List[str]]: + """Parse Giza++ direct and reverse alignment results and represent them as an alignment matrix + + Args: + fline2: e.g. "_2 0 1 4_" + fline3: e.g. "NULL ({ }) twenty ({ 1 }) fourteen ({ 2 3 4 })" + gline2: e.g. "twenty fourteen" + gline3: e.g. "NULL ({ }) _2 ({ 1 }) 0 ({ }) 1 ({ }) 4_ ({ 2 })" + + Returns: + matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment + the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the + words were aligned only in one direction, 0 - no alignment. + srctokens: e.g. ["twenty", "fourteen"] + dsttokens: e.g. ["_2", "0", "1", "4_"] + + For example, the alignment matrix for the above example may look like: + [[3, 0, 0, 0] + [0, 2, 2, 3]] + """ + if fline2 is None or gline2 is None or fline3 is None or gline3 is None: + raise ValueError(f"empty params") + srctokens = gline2.split() + dsttokens = fline2.split() + pattern = r"([^ ]+) \(\{ ([^\(\{\}\)]*) \}\)" + src2dst = re.findall(pattern, fline3.replace("({ })", "({ })")) + dst2src = re.findall(pattern, gline3.replace("({ })", "({ })")) + if len(src2dst) != len(srctokens) + 1: + raise ValueError( + "length mismatch: len(src2dst)=" + + str(len(src2dst)) + + "; len(srctokens)" + + str(len(srctokens)) + + "\n" + + gline2 + + "\n" + + fline3 + ) + if len(dst2src) != len(dsttokens) + 1: + raise ValueError( + "length mismatch: len(dst2src)=" + + str(len(dst2src)) + + "; len(dsttokens)" + + str(len(dsttokens)) + + "\n" + + fline2 + + "\n" + + gline3 + ) + matrix = np.zeros((len(srctokens), len(dsttokens))) + for i in range(1, len(src2dst)): + token, to_str = src2dst[i] + if to_str == "": + continue + to = list(map(int, to_str.split())) + for t in to: + matrix[i - 1][t - 1] = 2 + + for i in range(1, len(dst2src)): + token, to_str = dst2src[i] + if to_str == "": + continue + to = list(map(int, to_str.split())) + for t in to: + matrix[t - 1][i - 1] += 1 + + return matrix, srctokens, dsttokens + + +def check_monotonicity(matrix: np.ndarray) -> bool: + """Check if alignment is monotonous - i.e. the relative order is preserved (no swaps). + + Args: + matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment + the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the + words were aligned only in one direction, 0 - no alignment. + """ + is_sorted = lambda k: np.all(k[:-1] <= k[1:]) + + a = np.argwhere(matrix == 3) + b = np.argwhere(matrix == 2) + c = np.vstack((a, b)) + d = c[c[:, 1].argsort()] # sort by second column (less important) + d = d[d[:, 0].argsort(kind="mergesort")] + return is_sorted(d[:, 1]) + + +def get_targets(matrix: np.ndarray, dsttokens: List[str], delimiter: str) -> List[str]: + """Join some of the destination tokens, so that their number becomes the same as the number of input words. + Unaligned tokens tend to join to the left aligned token. + + Args: + matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment + the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the + words were aligned only in one direction, 0 - no alignment. + dsttokens: e.g. ["_2", "0", "1", "4_"] + Returns: + targets: list of string tokens, with one-to-one correspondence to matrix.shape[0] + + Example: + If we get + matrix=[[3, 0, 0, 0] + [0, 2, 2, 3]] + dsttokens=["_2", "0", "1", "4_"] + it gives + targets = ["_201", "4_"] + Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions. + """ + targets = [] + last_covered_dst_id = -1 + for i in range(len(matrix)): + dstlist = [] + for j in range(last_covered_dst_id + 1, len(dsttokens)): + # matrix[i][j] == 3: safe alignment point + if matrix[i][j] == 3 or ( + j == last_covered_dst_id + 1 + and np.all(matrix[i, :] == 0) # if the whole line does not have safe points + and np.all(matrix[:, j] == 0) # and the whole column does not have safe points, match them + ): + if len(targets) == 0: # if this is first safe point, attach left unaligned columns to it, if any + for k in range(0, j): + if np.all(matrix[:, k] == 0): # if column k does not have safe points + dstlist.append(dsttokens[k]) + else: + break + dstlist.append(dsttokens[j]) + last_covered_dst_id = j + for k in range(j + 1, len(dsttokens)): + if np.all(matrix[:, k] == 0): # if column k does not have safe points + dstlist.append(dsttokens[k]) + last_covered_dst_id = k + else: + break + + if len(dstlist) > 0: + targets.append(delimiter.join(dstlist)) + else: + targets.append("") + return targets + + +def get_targets_from_back(matrix: np.ndarray, dsttokens: List[str], delimiter: str) -> List[str]: + """Join some of the destination tokens, so that their number becomes the same as the number of input words. + Unaligned tokens tend to join to the right aligned token. + + Args: + matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment + the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the + words were aligned only in one direction, 0 - no alignment. + dsttokens: e.g. ["_2", "0", "1", "4_"] + Returns: + targets: list of string tokens, with one-to-one correspondence to matrix.shape[0] + + Example: + If we get + matrix=[[3, 0, 0, 0] + [0, 2, 2, 3]] + dsttokens=["_2", "0", "1", "4_"] + it gives + targets = ["_2", "014_"] + Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions. + """ + + targets = [] + last_covered_dst_id = len(dsttokens) + for i in range(len(matrix) - 1, -1, -1): + dstlist = [] + for j in range(last_covered_dst_id - 1, -1, -1): + if matrix[i][j] == 3 or ( + j == last_covered_dst_id - 1 and np.all(matrix[i, :] == 0) and np.all(matrix[:, j] == 0) + ): + if len(targets) == 0: + for k in range(len(dsttokens) - 1, j, -1): + if np.all(matrix[:, k] == 0): + dstlist.append(dsttokens[k]) + else: + break + dstlist.append(dsttokens[j]) + last_covered_dst_id = j + for k in range(j - 1, -1, -1): + if np.all(matrix[:, k] == 0): + dstlist.append(dsttokens[k]) + last_covered_dst_id = k + else: + break + if len(dstlist) > 0: + targets.append(delimiter.join(list(reversed(dstlist)))) + else: + targets.append("") + return list(reversed(targets)) diff --git a/nemo/collections/nlp/metrics/sequence_perplexity.py b/nemo/collections/nlp/metrics/sequence_perplexity.py index 688f9db87ea6..339f062f7cc1 100644 --- a/nemo/collections/nlp/metrics/sequence_perplexity.py +++ b/nemo/collections/nlp/metrics/sequence_perplexity.py @@ -31,8 +31,6 @@ class SequencePerplexity(Metric): See :doc:`PyTorch Lightning Metrics` for the metric usage instructions. Args: - compute_on_step: - Forward only calls ``update()`` and returns ``None`` if this is set to ``False``. default: ``True`` dist_sync_on_step: Synchronize metric state across processes at each ``forward()`` before returning the value at the step. process_group: @@ -43,12 +41,9 @@ class SequencePerplexity(Metric): to perform the allgather. """ - def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, dist_sync_fn=None): + def __init__(self, dist_sync_on_step=False, process_group=None, dist_sync_fn=None): super().__init__( - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - dist_sync_fn=dist_sync_fn, + dist_sync_on_step=dist_sync_on_step, process_group=process_group, dist_sync_fn=dist_sync_fn, ) # Total sum of exponentiated average negative log likelihoods diff --git a/nemo/collections/nlp/models/__init__.py b/nemo/collections/nlp/models/__init__.py index 90e692a238a6..75b48f64df13 100644 --- a/nemo/collections/nlp/models/__init__.py +++ b/nemo/collections/nlp/models/__init__.py @@ -30,6 +30,7 @@ from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel from nemo.collections.nlp.models.machine_translation import MTEncDecModel from nemo.collections.nlp.models.question_answering.qa_model import QAModel +from nemo.collections.nlp.models.spellchecking_asr_customization import SpellcheckingAsrCustomizationModel from nemo.collections.nlp.models.text2sparql.text2sparql_model import Text2SparqlModel from nemo.collections.nlp.models.text_classification import TextClassificationModel from nemo.collections.nlp.models.text_normalization_as_tagging import ThutmoseTaggerModel diff --git a/nemo/collections/nlp/models/language_modeling/bert_lm_model.py b/nemo/collections/nlp/models/language_modeling/bert_lm_model.py index 4c9d43c20d54..5cf509e77846 100644 --- a/nemo/collections/nlp/models/language_modeling/bert_lm_model.py +++ b/nemo/collections/nlp/models/language_modeling/bert_lm_model.py @@ -116,7 +116,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): # create extra bias # setup to track metrics - self.validation_perplexity = Perplexity(compute_on_step=False) + self.validation_perplexity = Perplexity() self.setup_optimization(cfg.optim) diff --git a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py index 132f900298a6..cbbef2d56a15 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py @@ -188,6 +188,7 @@ def __init__( add_binary_head=True, megatron_legacy=False, sequence_parallel=False, + position_embedding_type='learned_absolute', ): super(BertModel, self).__init__() # args = get_args() @@ -234,6 +235,7 @@ def __init__( onnx_safe=onnx_safe, megatron_legacy=megatron_legacy, sequence_parallel=sequence_parallel, + position_embedding_type=position_embedding_type, ) self.initialize_word_embeddings( diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py index e890e6ae4807..8e28b6cab362 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py @@ -114,7 +114,7 @@ def __init__( num_layers, num_attention_heads, ffn_hidden_size, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, num_tokentypes=0, parallel_output=True, @@ -151,6 +151,7 @@ def __init__( gradient_accumulation_fusion=False, persist_layer_norm=False, openai_gelu=False, + megatron_legacy=False, onnx_safe=False, sequence_parallel=False, transformer_engine=False, @@ -163,6 +164,8 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, + use_flash_attention=False, ): super(GPTModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights) @@ -232,6 +235,7 @@ def __init__( persist_layer_norm=persist_layer_norm, openai_gelu=openai_gelu, onnx_safe=onnx_safe, + megatron_legacy=megatron_legacy, sequence_parallel=sequence_parallel, transformer_engine=transformer_engine, fp8=fp8, @@ -243,6 +247,8 @@ def __init__( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, + use_flash_attention=use_flash_attention, ) if self.share_embeddings_and_output_weights: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 1237491fa39c..f553f32c1665 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -17,6 +17,7 @@ import re from typing import Any, Dict, Optional, Union +import omegaconf import torch from omegaconf import open_dict from omegaconf.dictconfig import DictConfig @@ -25,6 +26,7 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.models.nlp_model import NLPModel +from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION from nemo.collections.nlp.modules.common.megatron.clip_grads import ( clip_grad_norm_distributed_optimizer, clip_grad_norm_fp32, @@ -60,18 +62,19 @@ class MegatronBaseModel(NLPModel): """ - Megatron base class - It does the following things: - 1. Initialize the model parallel for nemo given the model parallel parameters. - 2. Turn on all the nvidia optimizations. - 3. If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the correct size for tensor model parallelism. - 4. If using distributed optimizer, configure to be compatible with - O2-level optimizations and/or model parallelism. - 5. Perform gradient clipping: `grad_clip_pl_default` triggers the - PyTorch Lightning default implementation, `with_distributed_adam` - triggers the distributed optimizer's implementation, - `megatron_amp_o2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. + Megatron base class. All NeMo Megatron models inherit from this class. + + - Initialize the model parallel world for nemo. + - Turn on all of the nvidia optimizations. + - If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the + correct size for tensor model parallelism. + - If using distributed optimizer, configure to be compatible + with O2 level optimizations and/or model parallelism. + - Perform gradient clipping: `grad_clip_pl_default` triggers + the PyTorch Lightning default implementation, `with_distributed_adam` triggers + the distributed optimizer's implementation, `megatron_amp_O2` triggers gradient clipping on the main grads, + and otherwise gradient clipping is performed on the model grads. + """ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): @@ -84,6 +87,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): if trainer is None: raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") + if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION: + raise ImportError( + "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention." + "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202." + ) + # this prevents base constructor from initializing tokenizer self.tokenizer = None @@ -123,6 +132,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): global_batch_size=cfg.get('global_batch_size'), rampup_batch_size=cfg.get('rampup_batch_size'), use_fp8=cfg.get('fp8', False), + init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -155,6 +165,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): # The automatic garbage collector sould be disabled before training starts. if self.gc_interval > 0: gc.disable() + self.validation_global_step = 1 def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" @@ -205,17 +216,32 @@ def _build_tokenizer(self): self.tokenizer = get_nmt_tokenizer( library=self._cfg.tokenizer.library, model_name=self._cfg.tokenizer.type, - tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.model), - vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.vocab_file), - merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.merge_file), + tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.get('model', None)), + vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.get('vocab_file', None)), + merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.get('merge_file', None)), + use_fast=self.cfg.tokenizer.get('use_fast', False), delimiter=self.cfg.tokenizer.get('delimiter', None), legacy=legacy, ) + if self._cfg.tokenizer.get('additional_special_tokens', None) is not None: + tokens_list = omegaconf.OmegaConf.to_object(self._cfg.tokenizer.additional_special_tokens) + self.tokenizer.add_special_tokens({'additional_special_tokens': tokens_list}) + def on_train_start(self) -> None: super().on_train_start() self.init_global_step = self.trainer.global_step + def on_validation_start(self) -> None: + super().on_validation_start() + if self.gc_interval > 0: + gc.collect() + + def on_validation_end(self) -> None: + super().on_validation_end() + if self.gc_interval > 0: + gc.collect() + def _build_vocab(self): """ Manipulate vocabulary (e.g., pad vocabulary for increased performance)/ @@ -240,14 +266,16 @@ def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by ) return after - def _get_parameters(self): + def get_parameters_with_grad(self): """ - private method to load all the trainable parameters from optimizer param groups + Get all parameters with grad from optimizer param groups """ params = [] for param_group in self._optimizer_param_groups: for param in param_group['params']: - if param.requires_grad: # (@adithyare) adapter training with pp>1 can result in params with no grads + if ( + param.grad is not None + ): # (@adithyare) adapter training with pp>1 can result in params with no grads params.append(param) return params @@ -270,11 +298,11 @@ def configure_gradient_clipping(self, *args, **kwargs): if self.with_distributed_adam: grad_norm = clip_grad_norm_distributed_optimizer(self._optimizer, clip_val) else: - if self.megatron_amp_o2: + if self.megatron_amp_O2: # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters() + parameters = self._optimizer.get_parameters_with_grad() else: - parameters = self._get_parameters() + parameters = self.get_parameters_with_grad() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) @@ -362,6 +390,14 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): gc.collect() + def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: + super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) + + if self.gc_interval > 0: + if self.validation_global_step % self.gc_interval == 0: + gc.collect() + self.validation_global_step += 1 + def setup_optimization( self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): @@ -383,7 +419,7 @@ def setup_optimization( # Match param allgather with model dtype model_dtype = torch.float32 - if self.megatron_amp_o2 and hasattr(self, 'autocast_dtype'): + if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): model_dtype = self.autocast_dtype optim_kwargs['param_sync_dtype'] = model_dtype @@ -402,7 +438,7 @@ def configure_optimizers(self): self.setup_optimization() # Wrap the baseline optimizer with the optimizer class with master parameters - if self.megatron_amp_o2 and not self.with_distributed_adam and self._optimizer is not None: + if self.megatron_amp_O2 and not self.with_distributed_adam and self._optimizer is not None: if self.cfg.precision == 'bf16': fp32_grad_accum = True contiguous_grad_bucket = True @@ -479,10 +515,20 @@ def configure_optimizers(self): def compute_consumed_samples(self, steps_since_resume=0): app_state = AppState() - consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() - ) + + if self.cfg.get('rampup_batch_size', None): + from apex.transformer.pipeline_parallel.utils import _GLOBAL_NUM_MICROBATCHES_CALCULATOR + + current_global_batch_size = getattr(_GLOBAL_NUM_MICROBATCHES_CALCULATOR, 'current_global_batch_size', 1) + consumed_samples = self.prev_consumed_samples + self.if_first_step * current_global_batch_size + else: + consumed_samples = ( + self.init_consumed_samples + + steps_since_resume + * app_state.data_parallel_size + * self.cfg.micro_batch_size + * get_num_microbatches() + ) return int(consumed_samples) def _extract_consumed_samples_from_ckpt(self, ckpt_path): @@ -538,6 +584,14 @@ def _validate_and_override_config(self): 'Make sure the number of model chunks is the same across all pipeline stages.' ) + if self.cfg.get('ub_tp_comm_overlap', False): + if not self.cfg.get('transformer_engine', False) or not self.cfg.get('sequence_parallel', False): + logging.info( + "Userbuffer tensor-parallel communication overlap is available with both Transformer Engine and sequence-parallelism." + ) + with open_dict(self.cfg): + self.cfg.ub_tp_comm_overlap = False + def is_data_parallel_rank_zero(self): if is_global_rank_zero(): return True diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py index 88da586832df..134cc0d91af6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py @@ -147,7 +147,7 @@ def init_model(self, cfg: DictConfig, trainer: Trainer): self.prompt_encoder = None self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) # define validation metric diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index 64430a669269..e7bd05f475b5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -40,6 +40,7 @@ from nemo.core.neural_types import ChannelType, MaskType, NeuralType from nemo.utils import AppState, logging + try: from apex.transformer.pipeline_parallel.utils import get_num_microbatches @@ -49,6 +50,14 @@ HAVE_APEX = False +try: + import logging + from lddl.torch_mp import get_bert_pretrain_data_loader + + HAVE_LDDL = True +except (ImportError, ModuleNotFoundError): + HAVE_LDDL = False + try: from megatron.core import parallel_state from megatron.core.pipeline_parallel.schedules import get_forward_backward_func @@ -71,10 +80,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) self.cfg = cfg - if not self.megatron_amp_o2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') super().__init__(cfg, trainer=trainer, no_lm_init=False) @@ -91,7 +100,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('precision must be in [32, 16, "bf16"]') self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) # used in NVIDIA NGC PyTorch containers @@ -110,7 +119,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: self.model = self.model[0] - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -173,6 +182,7 @@ def model_provider_func(self, pre_process, post_process): add_binary_head=cfg.bert_binary_head, megatron_legacy=cfg.get('megatron_legacy', False), sequence_parallel=self.cfg.get('sequence_parallel', False), + position_embedding_type=self.cfg.get("position_embedding_type", "learned_absolute"), ) return model @@ -300,7 +310,12 @@ def training_step(self, dataloader_iter, batch_idx): for param in module.embedding.parameters(): param.data_ptr() - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + if self.cfg.data.dataloader_type == "LDDL": + # this is of type bert dataset + seq_length = dataloader_iter.iterator.loaders.get_seqlen() + tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + else: + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism @@ -324,7 +339,10 @@ def training_step(self, dataloader_iter, batch_idx): loss_tensor = torch.vstack(loss_tensors_list) loss_mean = loss_tensor.mean(axis=0) else: - loss_mean = torch.tensor([0.0, 0.0]).cuda() + if self.cfg.bert_binary_head == True: + loss_mean = torch.tensor([0.0, 0.0, 0.0]).cuda() + else: + loss_mean = torch.tensor([0.0, 0.0]).cuda() # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): @@ -335,7 +353,7 @@ def training_step(self, dataloader_iter, batch_idx): # note: not necessary, but reduces performance degradation # from multiple simultaneous NCCL calls self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) self._optimizer.allreduce_main_grads() @@ -396,7 +414,7 @@ def allreduce_first_last_embeddings(self): module = self.model if module.share_token_embeddings: word_embeddings_weight = module.word_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: # O2 recipe stores a "main" copy of weights and grads grad = word_embeddings_weight.main_grad else: @@ -404,7 +422,12 @@ def allreduce_first_last_embeddings(self): torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group()) def validation_step(self, dataloader_iter, batch_idx): - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + if self.cfg.data.dataloader_type == "LDDL": + seq_length = dataloader_iter.iterator.get_seqlen() + tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + else: + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] fwd_bwd_function = get_forward_backward_func() @@ -476,6 +499,95 @@ def loss_func(self, loss_mask, sentence_order, output_tensor): # [lm_loss]) # return loss, {'lm loss': averaged_losses[0]} + def build_LDDL_data(self, cfg): + if not HAVE_LDDL: + raise ImportError( + "LDDL was not found. Please see the LDDL README for installation instructions: https://github.com/NVIDIA/LDDL#installation." + ) + logging.info(f'Starting building LDDL Dataloaders') + self._train_ds = None + self._validation_ds = None + self._test_ds = None + data_parallel_size = parallel_state.get_data_parallel_world_size() + num_micro_batches = self.cfg.global_batch_size // (self.cfg.micro_batch_size * data_parallel_size) + global_batch_size_on_this_data_parallel_rank = num_micro_batches * self.cfg.micro_batch_size + samples_consumed_dploader = self.compute_consumed_samples(0) // data_parallel_size + # We run under the assumption that the datapath is the prefix if LDDL dataloader + train_lddl_data_path = self.cfg.data.data_prefix[0] + self._train_dl = get_bert_pretrain_data_loader( + train_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + samples_seen=samples_consumed_dploader, + micro_batch_size=self.cfg.micro_batch_size, + ) + logging.info(f'Completed build train LDDL Dataloader') + if len(self.cfg.data.data_prefix) > 1: + val_lddl_data_path = self.cfg.data.data_prefix[1] + self._validation_dl = get_bert_pretrain_data_loader( + val_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + micro_batch_size=self.cfg.micro_batch_size, + ) + if len(self.cfg.data.data_prefix) > 2: + test_lddl_data_path = self.cfg.data.data_prefix[2] + self._test_dl = get_bert_pretrain_data_loader( + test_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + micro_batch_size=self.cfg.micro_batch_size, + ) + logging.info(f'Finished building LDDL Dataloaders') + def build_train_valid_test_datasets(self): logging.info('Building Bert datasets.') if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): @@ -543,7 +655,7 @@ def _append_sequence_parallel_module_grads(self, module, grads): for param in module.parameters(): sequence_parallel_param = getattr(param, 'sequence_parallel', False) if sequence_parallel_param: - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = param.main_grad else: grad = param.grad @@ -581,10 +693,14 @@ def setup(self, stage=None): else: # TODO: consider adding a ModelPT guard to check if model is being restored. # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) + if self.cfg.data.dataloader_type == "LDDL": + self.build_LDDL_data(self.cfg.data) + torch.distributed.barrier() + else: + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) # when using pipeline model parallel the final stage need to initialize word embeddings if parallel_state.get_pipeline_model_parallel_world_size() > 1: @@ -748,7 +864,7 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True if parallel_state.is_pipeline_last_stage(ignore_virtual=True): if isinstance(self.model, list): @@ -757,20 +873,20 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled for param in self.parameters(): if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # sequence parallelism is enabled for param in self.parameters(): if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Initialize parameter buckets for overlapped grad and param syncs diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py index 4ed71756e60e..32024deb19b4 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py @@ -49,7 +49,6 @@ HAVE_MEGATRON_CORE = False - __all__ = ['MegatronT5FinetuneModel'] @@ -204,7 +203,7 @@ def on_train_epoch_start(self) -> None: return super().on_train_epoch_start() def cast_for_metric(self, pred, label, metric_name, class_labels=None, labels_are_strings=False): - if metric_name == 'exact_string_match': + if metric_name == 'exact_string_match' or 'rouge': return pred, label pred = pred.replace(' ', '') label = label.replace(' ', '') @@ -445,6 +444,8 @@ def inference_epoch_end(self, outputs, mode, data_cfg): self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx] ) metric = metric_object.compute() + if metric_name == 'rouge': + metric = metric['rouge1_fmeasure'] # Handle logging of GLUE/XNLI separately here. XNLI has a separate metric per language. if isinstance(metric, dict): # GLUE case: @@ -458,7 +459,8 @@ def inference_epoch_end(self, outputs, mode, data_cfg): if k != 'acc' and 'total' not in k: self.log(metric_log_key + f'_{k}', v, batch_size=1) logging.info(f"{mode} {metric_name} lang {k} : {v}") - metric = metric['acc'] + if metric_name != 'rouge': + metric = metric['acc'] else: self.log(metric_log_key, metric, batch_size=1) logging.info(f"{metric_log_key}: {metric}") diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 66fa0ed2716e..41baccb1aaf1 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -14,14 +14,14 @@ import itertools import queue +import warnings from functools import partial -from typing import Any, Iterator, List, Optional, Union +from typing import Any, Dict, Iterator, List, Optional, Union import numpy as np import torch from omegaconf.dictconfig import DictConfig from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( @@ -36,6 +36,7 @@ from nemo.collections.nlp.modules.common.megatron.utils import ( average_losses_across_data_parallel_group, get_all_params_for_weight_decay_optimization, + get_ltor_masks_and_position_ids, get_params_for_weight_decay_optimization, ) from nemo.collections.nlp.modules.common.text_generation_utils import ( @@ -51,9 +52,10 @@ SamplingParam, TextGeneration, ) -from nemo.collections.nlp.parts.nlp_overrides import GradScaler from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes import Exportable from nemo.core.classes.common import PretrainedModelInfo +from nemo.core.neural_types import ChannelType, NeuralType from nemo.utils import logging try: @@ -81,6 +83,7 @@ try: import transformer_engine + from transformer_engine.pytorch import module as te_module HAVE_TE = True @@ -88,6 +91,99 @@ HAVE_TE = False +class MegatronGPTExportableModel(torch.nn.Module, Exportable): + """ + Megatron GPT Wrapper for ONNX export + """ + + def __init__(self, model): + super().__init__() + self.model = model + self.fp8_enabled = model.cfg.get('fp8', False) + self.fp8_recipe = None + if self.fp8_enabled and HAVE_TE: + self.fp8_recipe = transformer_engine.common.recipe.DelayedScaling( + margin=0, interval=1, fp8_format=transformer_engine.common.recipe.Format.E4M3 + ) + + self.dtype = None + if model.cfg['precision'] == 'bf16': + self.dtype = torch.bfloat16 + elif int(model.cfg['precision']) == 32: + self.dtype = torch.float + elif int(model.cfg['precision']) == 16: + self.dtype = torch.float16 + else: + raise ValueError(f"precision: {model.cfg['precision']} is not supported.") + + def forward(self, tokens, position_ids, attention_mask): + if self.fp8_enabled and HAVE_TE: + with transformer_engine.pytorch.onnx_export(self.fp8_enabled), transformer_engine.pytorch.fp8_autocast( + enabled=self.fp8_enabled, fp8_recipe=self.fp8_recipe + ), torch.no_grad(), torch.inference_mode(), torch.autocast( + 'cuda', dtype=self.dtype + ), warnings.catch_warnings(): + warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning, module=r'.*') + assert tokens.shape == position_ids.shape + assert attention_mask.shape[2] == attention_mask.shape[3] == tokens.shape[1] == position_ids.shape[1] + output_tensor = self.model.forward( + tokens=tokens.cuda(), + text_position_ids=position_ids.cuda(), + attention_mask=attention_mask.cuda(), + labels=None, + ) + else: + with torch.no_grad(), torch.inference_mode(), torch.autocast( + 'cuda', dtype=self.dtype + ), warnings.catch_warnings(): + warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning, module=r'.*') + assert tokens.shape == position_ids.shape + assert attention_mask.shape[2] == attention_mask.shape[3] == tokens.shape[1] == position_ids.shape[1] + output_tensor = self.model.forward( + tokens=tokens.cuda(), + text_position_ids=position_ids.cuda(), + attention_mask=attention_mask.cuda(), + labels=None, + ) + + return output_tensor + + def freeze(self): + for param in self.parameters(): + param.requires_grad = False + + def input_example(self, max_batch=1, max_dim=768, seq_len=6): + ids = [self.model.tokenizer.text_to_ids(text) for text in ["how is the weather on Sunday"]] + id_tensors = [torch.unsqueeze(torch.LongTensor(id_list), dim=0) for id_list in ids] + masks_and_position_ids = [ + get_ltor_masks_and_position_ids(id_tensor, self.model.tokenizer.eos_id, False, False, False) + for id_tensor in id_tensors + ] + for tokens, attn_mask_and_pos_ids in zip(id_tensors, masks_and_position_ids): + attn_mask, _, pos_ids = attn_mask_and_pos_ids + return tokens, pos_ids, attn_mask + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "position_ids": NeuralType(('B', 'T'), ChannelType()), + "attention_mask": NeuralType(('D', 'D', 'T', 'T'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'T', 'D'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['input_ids', 'position_ids', 'attention_mask'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + class MegatronGPTModel(MegatronBaseModel, TextGeneration): """ Megatron GPT pretraining @@ -109,9 +205,15 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self._validate_trainer() - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.rampup_batch_size = self.cfg.get('rampup_batch_size', None) + if self.rampup_batch_size: + self.prev_consumed_samples = 0 + self.if_first_step = 0 + self.prev_global_batch_size = None - if not self.megatron_amp_o2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') # build_model returns a list of modules which are used for interleaved pipeline parallelism @@ -133,7 +235,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: self.model = self.model[0] - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -162,7 +264,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('precision must be in [32, 16, "bf16"]') self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) self.transformer_engine = cfg.get('transformer_engine', False) @@ -179,6 +281,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self._nsys_profile_end_step *= grad_accum_steps self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) def get_gpt_module_list(self): if isinstance(self.model, list): @@ -197,7 +300,7 @@ def get_inference_config(self): def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" model = GPTModel( - vocab_size=self.padded_vocab_size, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), hidden_size=self.cfg.hidden_size, max_position_embeddings=self.cfg.max_position_embeddings, num_layers=self.cfg.num_layers, @@ -254,6 +357,9 @@ def model_provider_func(self, pre_process, post_process): fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), reduce_amax=self.cfg.get('reduce_amax', True), use_emha=self.cfg.get('use_emha', False), + ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=self.cfg.get('use_flash_attention', False), + megatron_legacy=self.cfg.get('megatron_legacy', False), ) return model @@ -283,7 +389,7 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True if parallel_state.is_pipeline_last_stage(ignore_virtual=True): if isinstance(self.model, list): @@ -292,14 +398,14 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled for param in self.parameters(): if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Initialize parameter buckets for overlapped grad and param syncs @@ -348,7 +454,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): grad_sync_func = None param_sync_func = None if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_o2,) + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) grad_sync_func = self.reduce_overlap_gradients param_sync_func = self.sync_overlap_parameters @@ -358,7 +464,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), + forward_step_func=self.get_forward_output_and_loss_func(forward_only), data_iterator=self._make_data_iterator_list(dataloader_iter), model=self.model, num_microbatches=get_num_microbatches(), @@ -371,6 +477,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): no_sync_func=no_sync_func, grad_sync_func=grad_sync_func, param_sync_func=param_sync_func, + overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False), + batch_p2p_comm=self.cfg.get('batch_p2p_comm', True), ) # only the last stages of the pipeline return losses @@ -402,12 +510,45 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): return loss_mean + def initialize_ub_func(self): + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None) + ub_cfgs = None + if ub_cfg_file_name is not None: + try: + import yaml + + with open(ub_cfg_file_name, 'r') as ub_cfg_file: + ub_cfgs = yaml.safe_load(ub_cfg_file) + except (ImportError, TypeError): + logging.error(f"Fail to read ub_tp_comm_overlap config file: {ub_cfg_file_name}.") + te_module.base.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False + def training_step(self, dataloader_iter, batch_idx): """ We pass the dataloader iterator function to the micro-batch scheduler. The input batch to each micro-batch is fetched using the dataloader function in the micro-batch fwd function. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + + if self.rampup_batch_size: + num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR + current_global_batch_size = num_microbatch_calculator.current_global_batch_size + # do validation and save the checkpoint when gbs is changed + if self.prev_global_batch_size != current_global_batch_size and self.prev_global_batch_size: + self.trainer.should_stop = True # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() @@ -441,7 +582,7 @@ def training_step(self, dataloader_iter, batch_idx): # note: not necessary, but reduces performance degradation # from multiple simultaneous NCCL calls self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): # main grads are stored in the MainParamsOptimizer wrapper @@ -481,16 +622,15 @@ def training_step(self, dataloader_iter, batch_idx): 'consumed_samples', consumed_samples, prog_bar=True, rank_zero_only=True, batch_size=1, ) - if self.cfg.get('rampup_batch_size', None): - micro_batch_size = self.cfg.get('micro_batch_size', 1) - total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes - current_global_batch_size = get_num_microbatches() * micro_batch_size * total_gpus_number - self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1) - - num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR + if self.rampup_batch_size: + self.prev_global_batch_size = current_global_batch_size + self.prev_consumed_samples = consumed_samples num_microbatch_calculator.update( - consumed_samples=consumed_samples, consistency_check=True, + consumed_samples=consumed_samples, consistency_check=False, ) + current_global_batch_size = num_microbatch_calculator.current_global_batch_size + self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1) + self.if_first_step = 1 return loss_mean @@ -517,7 +657,7 @@ def _append_sequence_parallel_module_grads(self, module, grads): # perform all_reduce when grad is None. # grad can be None when performing PeFT training. if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = param.main_grad else: grad = param.grad @@ -567,7 +707,7 @@ def allreduce_first_last_embeddings(self): # (@adithyare) adapter training now extends MegatronGPTModel so we have to add this check here to ensure we do not perform all_reduce when grad is None. # grad can be None when performing PeFT training. if word_embeddings_weight.requires_grad: - if self.megatron_amp_o2: + if self.megatron_amp_O2: # O2 recipe stores a "main" copy of weights and grads grad = word_embeddings_weight.main_grad else: @@ -654,7 +794,6 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ if self.get_attention_mask_from_fusion: required_keys.remove('attention_mask') batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in batch.items()} - # Model forward pass output_tensor = model( batch['tokens'], @@ -711,9 +850,10 @@ def fwd_output_only_func(dataloader_iter, model): inference_max_sequence_len, ) = batch tokens = tokens.cuda() - attention_mask = attention_mask.cuda() position_ids = position_ids.cuda() - attention_mask = attention_mask[0:1] + if attention_mask is not None: + attention_mask = attention_mask.cuda() + attention_mask = attention_mask[0:1] extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() output_tensor = model(tokens, position_ids, attention_mask, **extra_arg) @@ -732,6 +872,10 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + if isinstance(self.model, list): for model_module in self.model: model_module.eval() @@ -837,6 +981,7 @@ def build_pretraining_data_loader( data_parallel_size=parallel_state.get_data_parallel_world_size(), drop_last=drop_last, global_batch_size=self.cfg.global_batch_size, + rampup_batch_size=self.cfg.rampup_batch_size, pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, ) elif self.cfg.data.dataloader_type == 'cyclic': @@ -887,28 +1032,15 @@ def setup(self, stage=None): self.init_consumed_samples = init_consumed_samples self.init_global_step = self.trainer.global_step - rampup_batch_size = self.cfg.get('rampup_batch_size', None) - if rampup_batch_size: - start_batch_size = rampup_batch_size[0] - batch_size_increment = rampup_batch_size[1] - total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes - - assert start_batch_size % (total_gpus_number) == 0, ( - 'expected' - ' start batch size ({}) to be divisible by total number of GPUs' - ' ({})'.format(start_batch_size, total_gpus_number) - ) + if self.rampup_batch_size: + optimizer = self.cfg.optim.get('name', None) + assert ( + optimizer == 'fused_adam' + ), f'{optimizer} optimizer is not supported yet with rampup batch size. Please, use fused_adam optimizer instead.' - micro_batch_size = self.cfg.get('micro_batch_size', 1) - tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) - pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) - total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) - - assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( - 'expected' - ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' - ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) - ) + num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR + num_microbatch_calculator.update(self.init_consumed_samples, consistency_check=False) + self.prev_consumed_samples = self.init_consumed_samples if stage == 'predict': return @@ -920,17 +1052,18 @@ def setup(self, stage=None): self.setup_validation_data(self.cfg.data) self.setup_test_data(self.cfg.data) - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if stage == 'fit': + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: if self.cfg.get('share_embeddings_and_output_weights', True): - module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - if self.cfg.get('share_embeddings_and_output_weights', True): - self.model.sync_initial_word_embeddings() + self.model.sync_initial_word_embeddings() if self.cfg.get('transformer_engine', False): self.setup_transformer_engine_tp_groups() @@ -1012,7 +1145,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] inference_config = inference_config.copy() compute_logprob = inference_config['compute_logprob'] if compute_logprob: - del inference_config['compute_logprob'] inference_config['inputs'] = batch inference_config['tokens_to_generate'] = 1 inference_config['all_probs'] = True @@ -1022,7 +1154,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) return compute_prob_response else: - del inference_config['compute_logprob'] inference_config['inputs'] = batch return generate(self, **inference_config) @@ -1113,6 +1244,13 @@ def parameters(self): else: return self.model.parameters() + @property + def mgpt_wrapper(self): + return MegatronGPTExportableModel(self) + + def list_export_subnets(self): + return ['mgpt_wrapper'] + def _reset_activation_checkpointing_args(self): """ Disables activation checkpointing completely and saves the values so that _restore_activation_checkpointing_args can restore them later. This function must always be @@ -1176,7 +1314,7 @@ def _reset_sequence_parallelism_args(self): for module in self.get_gpt_module_list(): for mod in module.modules(): if hasattr(mod, "sequence_parallel"): - mod.sequence_parallel = self.last_sequence_parallel + mod.sequence_parallel = False def _restore_sequence_parallelism_args(self): """ Restores the sequence parallelism parameters using the values saved by diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py index 930bfbc8cf25..73579114234d 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -225,6 +225,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.name_key_to_cfg = {AdapterName.PTUNING_ADAPTER: adapter_cfg} super().__init__(cfg, trainer) self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens + self.trainable_keys = self.adapter_keys - set( + [ + "model.language_model.adapter_layer.ptuning_adapter.inference_table.prompt_table.taskname.prompt_embeddings.weight" + ] + ) + # we exclude the above parameter from training because it is present for backward compatibility for inference using FasterTransformer (@adithyare) def init_peft_modules(self,): """ @@ -268,7 +274,15 @@ def load_state_dict(self, state_dict, strict: bool = True): def setup_optimizer_param_groups(self): if self.first_stage_of_pipeline(): - super().setup_optimizer_param_groups() + # super().setup_optimizer_param_groups() + self.freeze() # Freeze the entire model + opt_params = [] + for n, p in self.named_parameters(): + if n in self.trainable_keys: + p.requires_grad = True + opt_params.append(p) + + self._optimizer_param_groups = ({"params": opt_params},) else: self.freeze() # Freeze the entire model self._optimizer_param_groups = ({"params": []},) @@ -332,7 +346,7 @@ def __init__( AdapterName.LORA_KQV_ADAPTER, ] lora_cfg = cfg.peft.lora_tuning - if cfg.kv_channels is None: + if cfg.get("kv_channels", None) is None: assert ( cfg.hidden_size % cfg.num_attention_heads == 0 ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index 95448e67bd11..86beee3b22f1 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -136,7 +136,7 @@ def init_model(self, cfg: DictConfig, trainer: Trainer): override_config_path=frozen_model_cfg, ).to(dtype=self.autocast_dtype) - self.megatron_amp_o2 = self.cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = self.cfg.get('megatron_amp_O2', False) self.pipeline_parallel = self.cfg.get('pipeline_model_parallel_size', 1) > 1 self.tokenizer = self.frozen_model.tokenizer self.hidden_size = self.frozen_model.cfg.hidden_size @@ -151,7 +151,7 @@ def init_model(self, cfg: DictConfig, trainer: Trainer): self.model_type = ModelType.encoder_or_decoder self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) if self.pipeline_parallel: @@ -753,6 +753,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] "add_BOS": inference_config["add_BOS"], "all_probs": inference_config["all_probs"], "compute_logprob": inference_config["compute_logprob"], + "compute_attention_mask": inference_config.get("compute_attention_mask", True), } task_ids, processed_inputs = batch diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 61b491d4af1d..9507a01d01f0 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -35,6 +35,7 @@ LengthParam, SamplingParam, generate, + get_computeprob_response, megatron_gpt_generate, ) from nemo.utils import AppState, logging @@ -235,7 +236,7 @@ def _build_dataset(self, data_cfg, is_train=True): num_train_samples_per_dataset = [[None]] * len(data_cfg.file_names) for file_path, num_samples in zip(data_cfg.file_names, num_train_samples_per_dataset): - if self.cfg.data.chat: + if self.cfg.data.get("chat", False): dataset_cls = GPTSFTChatDataset else: dataset_cls = GPTSFTDataset @@ -539,7 +540,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] inference_config = inference_config.copy() compute_logprob = inference_config['compute_logprob'] if compute_logprob: - del inference_config['compute_logprob'] inference_config['inputs'] = batch inference_config['tokens_to_generate'] = 1 inference_config['all_probs'] = True @@ -549,8 +549,12 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) return compute_prob_response else: - del inference_config['compute_logprob'] - inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda()) + # for megatron_gpt_eval.py + if isinstance(batch, list): + inference_config['inputs'] = batch + else: + # peft_eval.py + inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda()) return generate(self, **inference_config) def write_predictions_to_file(self, outputs, output_file_path_prefix): diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 217b707f5014..b32bb8dc29fe 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -115,9 +115,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # We don't need to call it explicitly? Since it is a pytorch lightning hook function # self.setup_optimizer_param_groups() - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -136,7 +136,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ValueError('precision must be in [32, 16, "bf16"]') self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) self.enc_dec_model.model_type = ModelType.encoder_and_decoder @@ -192,7 +192,7 @@ def configure_optimizers(self): # Disable async grad reductions for params that are # synchronized for pipeline parallelism for param in model_parallel_params: - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True return super().configure_optimizers() @@ -369,7 +369,7 @@ def training_step(self, dataloader_iter, batch_idx): # note: not necessary, but reduces performance degradation # from multiple simultaneous NCCL calls self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: # when using pipeline parallelism grads must be reduced after the pipeline (not asynchronously) if self.cfg.get('pipeline_model_parallel_size', 1) > 1: # main grads are stored in the MainParamsOptimizer wrapper @@ -469,7 +469,7 @@ def allreduce_word_and_position_embeddings(self): 'share_decoder_tokens_head_embeddings', True ): word_embeddings_weight = self.enc_dec_model.word_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: # O2 recipe stores a "main" copy of weights and grads grad = word_embeddings_weight.main_grad else: @@ -490,7 +490,7 @@ def allreduce_word_and_position_embeddings(self): ): if self.cfg.get('share_token_embeddings', True): position_embeddings_weight = self.enc_dec_model.position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -509,7 +509,7 @@ def allreduce_word_and_position_embeddings(self): and parallel_state.get_pipeline_model_parallel_split_rank() > 1 ): position_embeddings_weight = self.enc_dec_model.encoder_relative_position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -523,7 +523,7 @@ def allreduce_word_and_position_embeddings(self): and parallel_state.is_rank_in_decoder_relative_position_embedding_group() ): position_embeddings_weight = self.enc_dec_model.decoder_relative_position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -536,7 +536,7 @@ def allreduce_word_and_position_embeddings(self): position_embeddings_weight = ( self.enc_dec_model.decoder_cross_attention_relative_position_embeddings_weight() ) - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -594,7 +594,7 @@ def _kwargs_to_arg_idx(self): Computed on first call, and then cached. """ # build mapping of kwargs to arg index at first run - module = self.enc_dec_model.forward if not self.megatron_amp_o2 else self.enc_dec_model.module.forward + module = self.enc_dec_model.forward if not self.megatron_amp_O2 else self.enc_dec_model.module.forward args_name = inspect.getfullargspec(module)[0][1:] kwargs_to_arg_idx = {k: v for k, v in zip(args_name, range(len(args_name)))} diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py index 31c361b29d44..24439237eec3 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py @@ -106,7 +106,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.model.model_type = ModelType.encoder_and_decoder self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) if hasattr(self.cfg, "shape_file"): @@ -464,7 +464,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] inference_config = inference_config.copy() compute_logprob = inference_config['compute_logprob'] if compute_logprob: - del inference_config['compute_logprob'] inference_config['inputs'] = batch inference_config['tokens_to_generate'] = 1 inference_config['all_probs'] = True @@ -474,7 +473,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) return compute_prob_response else: - del inference_config['compute_logprob'] inference_config['inputs'] = batch return generate(self, **inference_config, strategy=self.inference_strategy) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py index 31c147022486..03bc11cc3d3c 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py @@ -35,6 +35,9 @@ from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( AdapterName, InfusedAdapterConfig, + LoraKQVAdapterConfig, + LoraKVAdapterConfig, + LoraQAdapterConfig, MLPInfusedAdapterConfig, ParallelLinearAdapterConfig, ) @@ -420,6 +423,132 @@ def list_available_models(cls): pass +class MegatronT5LoraModel(MegatronT5BaseAdapterModel): + """ + TODO (@adithyare) + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + super().__init__(cfg, trainer) + # assert cfg.lora_tuning.get('adapter_dim', 0) > 0, "adapter_dim has not been set." + # assert ( + # cfg.lora_tuning.adapter_dim % cfg.tensor_model_parallel_size == 0 + # ), "The adapter dim should be divisible by tensor_model_parallel_size." + + encoder_adapter_name_keys = [AdapterName.LORA_KQV_ADAPTER] + decoder_adapter_name_keys = [ + AdapterName.LORA_KQV_ADAPTER, + AdapterName.LORA_KV_ADAPTER, + AdapterName.LORA_Q_ADAPTER, + ] + + # add adapter keys to the list -> to update state dict + self.adapter_name_keys = encoder_adapter_name_keys + decoder_adapter_name_keys + + frozen_model_cfg = MegatronT5Model.restore_from( + cfg.get('language_model_path'), trainer=trainer, return_config=True + ) + for _, layer in self.frozen_model.named_modules(): + if hasattr(layer, 'activations_checkpoint_method'): + layer.activations_checkpoint_method = ( + None # (@adithyare) adapter learning does not support activations checkpointing atm. + ) + + self.frozen_model.freeze() + logging.info(f'Before adding adapters:\n{self.frozen_model.summarize()}') + encoder = self.frozen_model.enc_dec_model.enc_dec_model.encoder + decoder = self.frozen_model.enc_dec_model.enc_dec_model.decoder + + if encoder: + encoder_cfg = self._get_component_cfg('encoder', frozen_model_cfg, cfg) + self._add_adapters_to_component(encoder, encoder_cfg, encoder_adapter_name_keys) + logging.info(f'Adding encoder adapters:\n{self.frozen_model.summarize()}') + + if decoder: + decoder_cfg = self._get_component_cfg('decoder', frozen_model_cfg, cfg) + self._add_adapters_to_component(decoder, decoder_cfg, decoder_adapter_name_keys) + logging.info(f'Adding decoder adapters:\n{self.frozen_model.summarize()}') + + def _add_adapters_to_component(self, component, component_cfg, adapter_name_keys): + for _, module in component.named_modules(): + if isinstance(module, adapter_mixins.AdapterModuleMixin): + for adapter_key in adapter_name_keys: + adapter_cfg = self._get_adapter_cfg(component_cfg, adapter_key) + if model_utils.import_class_by_path(adapter_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter(name=adapter_key, cfg=adapter_cfg) + print(f"in adding {adapter_key}") + + def _get_component_cfg(self, component_name, frozen_model_cfg, cfg): + if component_name in frozen_model_cfg: + component_cfg = frozen_model_cfg.get(component_name) + with open_dict(component_cfg): + component_cfg.tensor_model_parallel_size = frozen_model_cfg.tensor_model_parallel_size + component_cfg.lora_tuning = cfg.lora_tuning + else: + component_cfg = frozen_model_cfg + with open_dict(component_cfg): + component_cfg.lora_tuning = cfg.lora_tuning + return component_cfg + + def _get_adapter_cfg(self, component_cfg, adapter_key): + if component_cfg.kv_channels is None: + assert ( + component_cfg.hidden_size % component_cfg.num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = component_cfg.hidden_size // component_cfg.num_attention_heads + else: + kv_channels = component_cfg.kv_channels + projection_size = kv_channels * component_cfg.num_attention_heads + + if adapter_key == AdapterName.LORA_KQV_ADAPTER: + adapter_cfg = LoraKQVAdapterConfig( + in_features=component_cfg.hidden_size, + out_features=3 * projection_size, + dim=component_cfg.lora_tuning.kqv_adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"), + row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"), + gather_output=False, + dropout=0.0, + ) + elif adapter_key == AdapterName.LORA_KV_ADAPTER: + adapter_cfg = LoraKVAdapterConfig( + in_features=component_cfg.hidden_size, + out_features=2 * projection_size, + dim=component_cfg.lora_tuning.kv_adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"), + row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"), + gather_output=False, + dropout=0.0, + ) + elif adapter_key == AdapterName.LORA_Q_ADAPTER: + adapter_cfg = LoraQAdapterConfig( + in_features=component_cfg.hidden_size, + out_features=1 * projection_size, + dim=component_cfg.lora_tuning.q_adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"), + row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"), + gather_output=False, + dropout=0.0, + ) + else: + raise RuntimeError("Unexpected adapter key name..") + + return adapter_cfg + + @classmethod + def list_available_models(cls): + pass + + class MegatronT5InfusedAdapterModel(MegatronT5BaseAdapterModel): """ MegatronGPTInfusedAdapterModel is a model that combines a base model (GPTModel) with a "Infused Adapter that can Inhibiting and Amplify Inner Activations", known as IA3. diff --git a/nemo/collections/nlp/models/nlp_model.py b/nemo/collections/nlp/models/nlp_model.py index 032a7449c27e..d739efa88485 100644 --- a/nemo/collections/nlp/models/nlp_model.py +++ b/nemo/collections/nlp/models/nlp_model.py @@ -16,7 +16,7 @@ import hashlib import json import os -from typing import Any, Optional +from typing import Any, Mapping, Optional from omegaconf import DictConfig, OmegaConf from pytorch_lightning import Trainer @@ -385,3 +385,13 @@ def load_from_checkpoint( finally: cls._set_model_restore_state(is_being_restored=False) return checkpoint + + def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True): + # starting with trasformers v4.31.0, buffer for position_ids is persistent=False + if ( + self.bert_model is not None + and "position_ids" not in self.bert_model.embeddings._modules + and "bert_model.embeddings.position_ids" in state_dict + ): + del state_dict["bert_model.embeddings.position_ids"] + super(NLPModel, self).load_state_dict(state_dict, strict=strict) diff --git a/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py b/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py new file mode 100644 index 000000000000..5e94de32e9aa --- /dev/null +++ b/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from nemo.collections.nlp.models.spellchecking_asr_customization.spellchecking_model import ( + SpellcheckingAsrCustomizationModel, +) diff --git a/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py new file mode 100644 index 000000000000..15ffb2dd1bcd --- /dev/null +++ b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py @@ -0,0 +1,527 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from time import perf_counter +from typing import Dict, Optional + +import torch +from omegaconf import DictConfig +from pytorch_lightning import Trainer + +from nemo.collections.common.losses import CrossEntropyLoss +from nemo.collections.nlp.data.spellchecking_asr_customization import ( + SpellcheckingAsrCustomizationDataset, + SpellcheckingAsrCustomizationTestDataset, + TarredSpellcheckingAsrCustomizationDataset, + bert_example, +) +from nemo.collections.nlp.data.text_normalization_as_tagging.utils import read_label_map +from nemo.collections.nlp.metrics.classification_report import ClassificationReport +from nemo.collections.nlp.models.nlp_model import NLPModel +from nemo.collections.nlp.modules.common.token_classifier import TokenClassifier +from nemo.collections.nlp.parts.utils_funcs import tensor2list +from nemo.core.classes.common import PretrainedModelInfo, typecheck +from nemo.core.neural_types import LogitsType, NeuralType +from nemo.utils import logging +from nemo.utils.decorators import experimental + +__all__ = ["SpellcheckingAsrCustomizationModel"] + + +@experimental +class SpellcheckingAsrCustomizationModel(NLPModel): + """ + https://arxiv.org/abs/2306.02317 + BERT-based model for Spellchecking ASR Customization. + It takes as input ASR hypothesis and candidate customization entries. + It labels the hypothesis with correct entry index or 0. + Example input: [CLS] a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o [SEP] d i d i e r _ s a u m o n [SEP] a s t r o n o m i e [SEP] t r i s t a n _ g u i l l o t [SEP] ... + Input segments: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 + Example output: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 0 ... + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + } + + @property + def input_module(self): + return self + + @property + def output_module(self): + return self + + def __init__(self, cfg: DictConfig, trainer: Trainer = None) -> None: + super().__init__(cfg=cfg, trainer=trainer) + + # Label map contains 11 labels: 0 for nothing, 1..10 for target candidate ids + label_map_file = self.register_artifact("label_map", cfg.label_map, verify_src_exists=True) + + # Semiotic classes for this model consist only of classes CUSTOM(means fragment containing custom candidate) and PLAIN (any other single-character fragment) + # They are used only during validation step, to calculate accuracy for CUSTOM and PLAIN classes separately + semiotic_classes_file = self.register_artifact( + "semiotic_classes", cfg.semiotic_classes, verify_src_exists=True + ) + self.label_map = read_label_map(label_map_file) + self.semiotic_classes = read_label_map(semiotic_classes_file) + + self.num_labels = len(self.label_map) + self.num_semiotic_labels = len(self.semiotic_classes) + self.id_2_tag = {tag_id: tag for tag, tag_id in self.label_map.items()} + self.id_2_semiotic = {semiotic_id: semiotic for semiotic, semiotic_id in self.semiotic_classes.items()} + self.max_sequence_len = cfg.get('max_sequence_len', self.tokenizer.tokenizer.model_max_length) + + # Setup to track metrics + # We will have (len(self.semiotic_classes) + 1) labels. + # Last one stands for WRONG (span in which the predicted tags don't match the labels) + # This is needed to feed the sequence of classes to classification_report during validation + label_ids = self.semiotic_classes.copy() + label_ids["WRONG"] = len(self.semiotic_classes) + self.tag_classification_report = ClassificationReport( + len(self.semiotic_classes) + 1, label_ids=label_ids, mode='micro', dist_sync_on_step=True + ) + + self.hidden_size = cfg.hidden_size + + # hidden size is doubled because in forward we concatenate embeddings for characters and embeddings for subwords + self.logits = TokenClassifier( + self.hidden_size * 2, num_classes=self.num_labels, num_layers=1, log_softmax=False, dropout=0.1 + ) + + self.loss_fn = CrossEntropyLoss(logits_ndim=3) + + self.builder = bert_example.BertExampleBuilder( + self.label_map, self.semiotic_classes, self.tokenizer.tokenizer, self.max_sequence_len + ) + + @typecheck() + def forward( + self, + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + ): + """ + Same BERT-based model is used to calculate embeddings for sequence of single characters and for sequence of subwords. + Then we concatenate subword embeddings to each character corresponding to this subword. + We return logits for each character x 11 labels: 0 - character doesn't belong to any candidate, 1..10 - character belongs to candidate with this id. + + # Arguments + input_ids: token_ids for single characters; .shape = [batch_size, char_seq_len]; .dtype = int64 + input_mask: mask for input_ids(1 - real, 0 - padding); .shape = [batch_size, char_seq_len]; .dtype = int64 + segment_ids: segment types for input_ids (0 - ASR-hypothesis, 1..10 - candidate); .shape = [batch_size, char_seq_len]; .dtype = int64 + input_ids_for_subwords: token_ids for subwords; .shape = [batch_size, subword_seq_len]; .dtype = int64 + input_mask_for_subwords: mask for input_ids_for_subwords(1 - real, 0 - padding); .shape = [batch_size, subword_seq_len]; .dtype = int64 + segment_ids_for_subwords: segment types for input_ids_for_subwords (0 - ASR-hypothesis, 1..10 - candidate); .shape = [batch_size, subword_seq_len]; .dtype = int64 + character_pos_to_subword_pos: tensor mapping character position in the input sequence to subword position; .shape = [batch_size, char_seq_len]; .dtype = int64 + """ + + # src_hiddens.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=float32 + src_hiddens = self.bert_model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask) + # src_hiddens_for_subwords.shape = [batch_size, subword_seq_len, bert_hidden_size]; .dtype=float32 + src_hiddens_for_subwords = self.bert_model( + input_ids=input_ids_for_subwords, + token_type_ids=segment_ids_for_subwords, + attention_mask=input_mask_for_subwords, + ) + + # Next three commands concatenate subword embeddings to each character embedding of the corresponding subword + # index.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=int64 + index = character_pos_to_subword_pos.unsqueeze(-1).expand((-1, -1, src_hiddens_for_subwords.shape[2])) + # src_hiddens_2.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=float32 + src_hiddens_2 = torch.gather(src_hiddens_for_subwords, 1, index) + # src_hiddens.shape = [batch_size, char_seq_len, bert_hidden_size * 2]; .dtype=float32 + src_hiddens = torch.cat((src_hiddens, src_hiddens_2), 2) + + # logits.shape = [batch_size, char_seq_len, num_labels]; num_labels=11: ids from 0 to 10; .dtype=float32 + logits = self.logits(hidden_states=src_hiddens) + return logits + + # Training + def training_step(self, batch, batch_idx): + """ + Lightning calls this inside the training loop with the data from the training dataloader + passed in as `batch`. + """ + + ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + _, + ) = batch + logits = self.forward( + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + input_ids_for_subwords=input_ids_for_subwords, + input_mask_for_subwords=input_mask_for_subwords, + segment_ids_for_subwords=segment_ids_for_subwords, + character_pos_to_subword_pos=character_pos_to_subword_pos, + ) + loss = self.loss_fn(logits=logits, labels=labels, loss_mask=labels_mask) + lr = self._optimizer.param_groups[0]['lr'] + self.log('train_loss', loss) + self.log('lr', lr, prog_bar=True) + return {'loss': loss, 'lr': lr} + + # Validation and Testing + def validation_step(self, batch, batch_idx): + """ + Lightning calls this inside the validation loop with the data from the validation dataloader + passed in as `batch`. + """ + ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + labels_mask, + labels, + spans, + ) = batch + logits = self.forward( + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + input_ids_for_subwords=input_ids_for_subwords, + input_mask_for_subwords=input_mask_for_subwords, + segment_ids_for_subwords=segment_ids_for_subwords, + character_pos_to_subword_pos=character_pos_to_subword_pos, + ) + tag_preds = torch.argmax(logits, dim=2) + + # Update tag classification_report + for input_mask_seq, segment_seq, prediction_seq, label_seq, span_seq in zip( + input_mask.tolist(), segment_ids.tolist(), tag_preds.tolist(), labels.tolist(), spans.tolist() + ): + # Here we want to track whether the predicted output matches ground truth labels for each whole span. + # We construct the special input for classification report, for example: + # span_labels = [PLAIN, PLAIN, PLAIN, PLAIN, CUSTOM, CUSTOM] + # span_predictions = [PLAIN, WRONG, PLAIN, PLAIN, WRONG, CUSTOM] + # Note that the number of PLAIN and CUSTOM occurrences in the report is not comparable, + # because PLAIN is for characters, and CUSTOM is for phrases. + span_labels = [] + span_predictions = [] + plain_cid = self.semiotic_classes["PLAIN"] + wrong_cid = self.tag_classification_report.num_classes - 1 + + # First we loop through all predictions for input characters with label=0, they are regarded as separate spans with PLAIN class. + # It either stays as PLAIN if the model prediction is 0, or turns to WRONG. + for i in range(len(segment_seq)): + if input_mask_seq[i] == 0: + continue + if segment_seq[i] > 0: # token does not belong to ASR-hypothesis => it's over + break + if label_seq[i] == 0: + span_labels.append(plain_cid) + if prediction_seq[i] == 0: + span_predictions.append(plain_cid) + else: + span_predictions.append(wrong_cid) + # if label_seq[i] != 0 then it belongs to CUSTOM span and will be handled later + + # Second we loop through spans tensor which contains only spans for CUSTOM class. + # It stays as CUSTOM if all predictions for the whole span are equal to the labels, otherwise it turns to WRONG. + for cid, start, end in span_seq: + if cid == -1: + break + span_labels.append(cid) + if prediction_seq[start:end] == label_seq[start:end]: + span_predictions.append(cid) + else: + span_predictions.append(wrong_cid) + + if len(span_labels) != len(span_predictions): + raise ValueError( + "Length mismatch: len(span_labels)=" + + str(len(span_labels)) + + "; len(span_predictions)=" + + str(len(span_predictions)) + ) + self.tag_classification_report( + torch.tensor(span_predictions).to(self.device), torch.tensor(span_labels).to(self.device) + ) + + val_loss = self.loss_fn(logits=logits, labels=labels, loss_mask=labels_mask) + return {'val_loss': val_loss} + + def validation_epoch_end(self, outputs): + """ + Called at the end of validation to aggregate outputs. + :param outputs: list of individual outputs of each validation step. + """ + avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + + # Calculate metrics and classification report + # Note that in our task recall = accuracy, and the recall column is the per class accuracy + _, tag_accuracy, _, tag_report = self.tag_classification_report.compute() + + logging.info("Total tag accuracy: " + str(tag_accuracy)) + logging.info(tag_report) + + self.log('val_loss', avg_loss, prog_bar=True) + self.log('tag accuracy', tag_accuracy) + + self.tag_classification_report.reset() + + def test_step(self, batch, batch_idx): + """ + Lightning calls this inside the test loop with the data from the test dataloader + passed in as `batch`. + """ + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + """ + Called at the end of test to aggregate outputs. + :param outputs: list of individual outputs of each test step. + """ + return self.validation_epoch_end(outputs) + + # Functions for inference + + @torch.no_grad() + def infer(self, dataloader_cfg: DictConfig, input_name: str, output_name: str) -> None: + """ Main function for Inference + + Args: + dataloader_cfg: config for dataloader + input_name: Input file with tab-separated text records. Each record consists of 2 items: + - ASR hypothesis + - candidate phrases separated by semicolon + output_name: Output file with tab-separated text records. Each record consists of 2 items: + - ASR hypothesis + - candidate phrases separated by semicolon + - list of possible replacements with probabilities (start, pos, candidate_id, prob), separated by semicolon + - list of labels, predicted for each letter (for debug purposes) + + Returns: None + """ + mode = self.training + device = "cuda" if torch.cuda.is_available() else "cpu" + + try: + # Switch model to evaluation mode + self.eval() + self.to(device) + logging_level = logging.get_verbosity() + logging.set_verbosity(logging.WARNING) + infer_datalayer = self._setup_infer_dataloader(dataloader_cfg, input_name) + + all_tag_preds = ( + [] + ) # list(size=number of sentences) of lists(size=number of letters) of tag predictions (best candidate_id for each letter) + all_possible_replacements = ( + [] + ) # list(size=number of sentences) of lists(size=number of potential replacements) of tuples(start, pos, candidate_id, prob) + for batch in iter(infer_datalayer): + ( + input_ids, + input_mask, + segment_ids, + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + character_pos_to_subword_pos, + fragment_indices, + ) = batch + + # tag_logits.shape = [batch_size, char_seq_len, num_labels]; num_labels=11: ids from 0 to 10; .dtype=float32 + tag_logits = self.forward( + input_ids=input_ids.to(self.device), + input_mask=input_mask.to(self.device), + segment_ids=segment_ids.to(self.device), + input_ids_for_subwords=input_ids_for_subwords.to(self.device), + input_mask_for_subwords=input_mask_for_subwords.to(self.device), + segment_ids_for_subwords=segment_ids_for_subwords.to(self.device), + character_pos_to_subword_pos=character_pos_to_subword_pos.to(self.device), + ) + + # fragment_indices.shape=[batsh_size, num_fragments, 3], where last dimension is [start, end, label], where label is candidate id from 1 to 10 + # Next we want to convert predictions for separate letters to probabilities for each whole fragment from fragment_indices. + # To achieve this we first sum the letter logits in each fragment and divide by its length. + # (We use .cumsum and then difference between end and start to get sum per fragment). + # Then we convert logits to probs with softmax and for each fragment extract only the prob for given label. + # Finally we get a list of tuples (start, end, label, prob) + indices_len = fragment_indices.shape[1] + # this padding adds a row of zeros (size=num_labels) as first element of sequence in second dimension. This is needed for cumsum operations. + padded_logits = torch.nn.functional.pad(tag_logits, pad=(0, 0, 1, 0)) + ( + batch_size, + seq_len, + num_labels, + ) = padded_logits.shape # seq_len is +1 compared to that of tag_logits, because of padding + # cumsum.shape=[batch_size, seq_len, num_labels] + cumsum = padded_logits.cumsum(dim=1) + # the size -1 is inferred from other dimensions. We get rid of batch dimension. + cumsum_view = cumsum.view(-1, num_labels) + word_index = ( + torch.ones((batch_size, indices_len), dtype=torch.long) + * torch.arange(batch_size).reshape((-1, 1)) + * seq_len + ).view(-1) + lower_index = (fragment_indices[..., 0]).view(-1) + word_index + higher_index = (fragment_indices[..., 1]).view(-1) + word_index + d_index = (higher_index - lower_index).reshape((-1, 1)).to(self.device) # word lengths + dlog = cumsum_view[higher_index, :] - cumsum_view[lower_index, :] # sum of logits + # word_logits.shape=[batch_size, indices_len, num_labels] + word_logits = (dlog / d_index.float()).view(batch_size, indices_len, num_labels) + # convert logits to probs, same shape + word_probs = torch.nn.functional.softmax(word_logits, dim=-1).to(self.device) + # candidate_index.shape=[batch_size, indices_len] + candidate_index = fragment_indices[:, :, 2].to(self.device) + # candidate_probs.shape=[batch_size, indices_len] + candidate_probs = torch.take_along_dim(word_probs, candidate_index.unsqueeze(2), dim=-1).squeeze(2) + for i in range(batch_size): + possible_replacements = [] + for j in range(indices_len): + start, end, candidate_id = ( + int(fragment_indices[i][j][0]), + int(fragment_indices[i][j][1]), + int(fragment_indices[i][j][2]), + ) + if candidate_id == 0: # this is padding + continue + prob = round(float(candidate_probs[i][j]), 5) + if prob < 0.01: + continue + # -1 because in the output file we will not have a [CLS] token + possible_replacements.append( + str(start - 1) + " " + str(end - 1) + " " + str(candidate_id) + " " + str(prob) + ) + all_possible_replacements.append(possible_replacements) + + # torch.argmax(tag_logits, dim=-1) gives a tensor of best predicted labels with shape [batch_size, char_seq_len], .dtype = int64 + # character_preds is list of lists of predicted labels + character_preds = tensor2list(torch.argmax(tag_logits, dim=-1)) + all_tag_preds.extend(character_preds) + + if len(all_possible_replacements) != len(all_tag_preds) or len(all_possible_replacements) != len( + infer_datalayer.dataset.examples + ): + raise IndexError( + "number of sentences mismatch: len(all_possible_replacements)=" + + str(len(all_possible_replacements)) + + "; len(all_tag_preds)=" + + str(len(all_tag_preds)) + + "; len(infer_datalayer.dataset.examples)=" + + str(len(infer_datalayer.dataset.examples)) + ) + # save results to file + with open(output_name, "w", encoding="utf-8") as out: + for i in range(len(infer_datalayer.dataset.examples)): + hyp, ref = infer_datalayer.dataset.hyps_refs[i] + num_letters = hyp.count(" ") + 1 + tag_pred_str = " ".join(list(map(str, all_tag_preds[i][1 : (num_letters + 1)]))) + possible_replacements_str = ";".join(all_possible_replacements[i]) + out.write(hyp + "\t" + ref + "\t" + possible_replacements_str + "\t" + tag_pred_str + "\n") + + except Exception as e: + raise ValueError("Error processing file " + input_name) + + finally: + # set mode back to its original value + self.train(mode=mode) + logging.set_verbosity(logging_level) + + # Functions for processing data + def setup_training_data(self, train_data_config: Optional[DictConfig]): + if not train_data_config or not train_data_config.data_path: + logging.info( + f"Dataloader config or file_path for the train is missing, so no data loader for train is created!" + ) + self._train_dl = None + return + self._train_dl = self._setup_dataloader_from_config(cfg=train_data_config, data_split="train") + + def setup_validation_data(self, val_data_config: Optional[DictConfig]): + if not val_data_config or not val_data_config.data_path: + logging.info( + f"Dataloader config or file_path for the validation is missing, so no data loader for validation is created!" + ) + self._validation_dl = None + return + self._validation_dl = self._setup_dataloader_from_config(cfg=val_data_config, data_split="val") + + def setup_test_data(self, test_data_config: Optional[DictConfig]): + if not test_data_config or test_data_config.data_path is None: + logging.info( + f"Dataloader config or file_path for the test is missing, so no data loader for test is created!" + ) + self._test_dl = None + return + self._test_dl = self._setup_dataloader_from_config(cfg=test_data_config, data_split="test") + + def _setup_dataloader_from_config(self, cfg: DictConfig, data_split: str): + start_time = perf_counter() + logging.info(f'Creating {data_split} dataset') + if cfg.get("use_tarred_dataset", False): + dataset = TarredSpellcheckingAsrCustomizationDataset( + cfg.data_path, + shuffle_n=cfg.get("tar_shuffle_n", 100), + global_rank=self.global_rank, + world_size=self.world_size, + pad_token_id=self.builder._pad_id, + ) + else: + input_file = cfg.data_path + dataset = SpellcheckingAsrCustomizationDataset(input_file=input_file, example_builder=self.builder) + dl = torch.utils.data.DataLoader( + dataset=dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle, collate_fn=dataset.collate_fn + ) + running_time = perf_counter() - start_time + logging.info(f'Took {running_time} seconds') + return dl + + def _setup_infer_dataloader(self, cfg: DictConfig, input_name: str) -> 'torch.utils.data.DataLoader': + """ + Setup function for a infer data loader. + Args: + cfg: config dictionary containing data loader params like batch_size, num_workers and pin_memory + input_name: path to input file. + Returns: + A pytorch DataLoader. + """ + dataset = SpellcheckingAsrCustomizationTestDataset(input_name, example_builder=self.builder) + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=cfg["batch_size"], + shuffle=False, + num_workers=cfg.get("num_workers", 0), + pin_memory=cfg.get("pin_memory", False), + drop_last=False, + collate_fn=dataset.collate_fn, + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None diff --git a/nemo/collections/nlp/models/text2sparql/text2sparql_model.py b/nemo/collections/nlp/models/text2sparql/text2sparql_model.py index 5290209b0c95..50046aef0344 100644 --- a/nemo/collections/nlp/models/text2sparql/text2sparql_model.py +++ b/nemo/collections/nlp/models/text2sparql/text2sparql_model.py @@ -100,7 +100,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): decoder=cfg.language_model.pretrained_decoder_model_name, ) - self.validation_perplexity = Perplexity(compute_on_step=False) + self.validation_perplexity = Perplexity() self.setup_optimization(cfg.optim) diff --git a/nemo/collections/nlp/modules/common/chatbot_component.py b/nemo/collections/nlp/modules/common/chatbot_component.py index 548458df7e29..afc86d9defec 100644 --- a/nemo/collections/nlp/modules/common/chatbot_component.py +++ b/nemo/collections/nlp/modules/common/chatbot_component.py @@ -19,9 +19,29 @@ """ from __future__ import annotations -from gradio.components import * +import warnings + from markdown2 import Markdown +try: + from typing import Any, Callable, Dict, List, Literal, Tuple + + from gradio.components import ( + Changeable, + Component, + Enum, + EventListenerMethod, + IOComponent, + JSONSerializable, + Selectable, + document, + processing_utils, + ) + + GRADIO_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + GRADIO_AVAILABLE = False + class _Keywords(Enum): NO_VALUE = "NO_VALUE" # Used as a sentinel to determine if nothing is provided as a argument for `value` in `Component.update()` diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index b26b971a38ba..fe339c6f9a8b 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -27,6 +27,7 @@ from nemo.collections.common.parts.utils import activation_registry from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu from nemo.collections.nlp.modules.common.megatron.utils import init_method_const, init_method_normal +from nemo.collections.nlp.modules.common.prompt_encoder import InferenceTable from nemo.core.classes.mixins import adapter_mixin_strategies try: @@ -65,13 +66,11 @@ class AdapterName(str, enum.Enum): class InfusedAdapter(nn.Module, AdapterModuleUtil): - def __init__( - self, in_features: int, adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None, - ) -> None: + def __init__(self, in_features: int,) -> None: super().__init__() self.scalers = nn.Parameter(torch.ones(in_features)) # Setup adapter strategy - self.setup_adapter_strategy(adapter_strategy) + self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy()) def forward(self, x): x = x * self.scalers[None, None, :] @@ -90,7 +89,6 @@ class MLPInfusedAdapter(InfusedAdapter): @dataclass class InfusedAdapterConfig: in_features: int - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() _target_: str = "{0}.{1}".format(InfusedAdapter.__module__, InfusedAdapter.__name__) @@ -112,7 +110,6 @@ def __init__( row_init_method: str = 'zero', # TODO: (@adithyare) should rename this to output_init_method to be more precise. gather_output: bool = True, dropout: float = 0.0, - adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None, ): super().__init__() if not HAVE_APEX: @@ -153,7 +150,7 @@ def __init__( self.dropout = None # Setup adapter strategy - self.setup_adapter_strategy(adapter_strategy) + self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy()) def _get_init_fn(self, init_method: str): if init_method == 'xavier': @@ -196,13 +193,30 @@ class ParallelLinearAdapterConfig: row_init_method: str = 'zero' gather_output: bool = True dropout: float = 0.0 - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() _target_: str = "{0}.{1}".format(ParallelLinearAdapter.__module__, ParallelLinearAdapter.__name__) class LoraKQVAdapter(ParallelLinearAdapter): """ - Lora Adapters are the same arch as regualr adapters but with potentially different input and output feature sizes + Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes + and they do not use an bottleneck activation function + """ + + pass + + +class LoraKVAdapter(ParallelLinearAdapter): + """ + Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes + and they do not use an bottleneck activation function + """ + + pass + + +class LoraQAdapter(ParallelLinearAdapter): + """ + Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes and they do not use an bottleneck activation function """ @@ -214,6 +228,16 @@ class LoraKQVAdapterConfig(ParallelLinearAdapterConfig): _target_: str = "{0}.{1}".format(LoraKQVAdapter.__module__, LoraKQVAdapter.__name__) +@dataclass +class LoraQAdapterConfig(ParallelLinearAdapterConfig): + _target_: str = "{0}.{1}".format(LoraQAdapter.__module__, LoraQAdapter.__name__) + + +@dataclass +class LoraKVAdapterConfig(ParallelLinearAdapterConfig): + _target_: str = "{0}.{1}".format(LoraKVAdapter.__module__, LoraKVAdapter.__name__) + + class PromptEncoderAdapter(nn.Module, AdapterModuleUtil): """ The Tensor Parallel MLP prompt encoder network that is used to generate the virtual @@ -222,13 +246,7 @@ class PromptEncoderAdapter(nn.Module, AdapterModuleUtil): """ def __init__( - self, - virtual_tokens: int, - bottleneck_dim: int, - embedding_dim: int, - init_std: float, - output_dim: int, - adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None, + self, virtual_tokens: int, bottleneck_dim: int, embedding_dim: int, init_std: float, output_dim: int, ): """ Initializes the Tensor Model parallel MLP PromptEncoderMLP module. @@ -250,6 +268,7 @@ def __init__( # (@adithyare) the persistent=False will not pollute the indices into the state_dict of this module. self.register_buffer("indices", torch.LongTensor(list(range(self.virtual_tokens))), persistent=False) self.embedding = torch.nn.Embedding(self.virtual_tokens, self.embedding_dim) + self.inference_table = InferenceTable("taskname", self.embedding_dim, self.virtual_tokens) self.first = ColumnParallelLinear( self.embedding_dim, self.bottleneck_dim, @@ -273,15 +292,47 @@ def __init__( gradient_accumulation_fusion=gradient_accumulation_fusion, ) # Setup adapter strategy - self.setup_adapter_strategy(adapter_strategy) + self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy()) + + def set_inference_table(self, prompt_representation: torch.Tensor): + """ + This method caches the output representation from the Encoder and saves it inside `self.inference_table`. + """ + prompt_representation = prompt_representation.detach().clone() + self.inference_table.set_prompt_table(prompt_representation) - def forward(self, batch_size): + def clear_inference_table(self,): + self.inference_table.clear_prompt_table() + + def get_inference_table(self,): + return self.inference_table.get_prompt_table() + + def inner_forward(self,): input_embeds = self.embedding(self.indices).unsqueeze(0) intermediate_parallel, bias_parallel = self.first(input_embeds) intermediate_parallel = fused_bias_gelu(intermediate_parallel, bias_parallel) output_embeds, bias_parallel = self.second(intermediate_parallel) output_embeds = output_embeds + bias_parallel output_embeds = output_embeds.transpose(0, 1) + return output_embeds + + def forward(self, batch_size: int, use_cached_reps: bool = False) -> torch.Tensor: + """ + Forward pass through the encoder with caching of prompt representations + """ + if use_cached_reps: + output_embeds = self.get_inference_table().unsqueeze(1) + else: + if self.training: + if self.inference_table.is_inference_ready: + self.clear_inference_table() + output_embeds = self.inner_forward() + else: + if not self.inference_table.is_inference_ready: + output_embeds = self.inner_forward() + self.set_inference_table(output_embeds.squeeze(1)) + output_embeds = self.get_inference_table().unsqueeze(1) + output_embeds = output_embeds.expand(self.virtual_tokens, batch_size, self.output_dim) return output_embeds @@ -293,5 +344,4 @@ class PromptEncoderAdapterConfig: embedding_dim: int init_std: float output_dim: int - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() _target_: str = "{0}.{1}".format(PromptEncoderAdapter.__module__, PromptEncoderAdapter.__name__) diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index bb28ba630e34..6025b31c0bd5 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -22,11 +22,20 @@ AdapterName, InfusedAdapterConfig, LoraKQVAdapterConfig, + LoraKVAdapterConfig, + LoraQAdapterConfig, ) from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import apply_rotary_pos_emb -from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, attention_mask_func +from nemo.collections.nlp.modules.common.megatron.position_embedding import XPOSPositionEmbedding +from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import ( + apply_rotary_pos_emb, +) +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + _cast_if_autocast_enabled, + attention_mask_func, +) from nemo.collections.nlp.parts import utils_funcs from nemo.core import adapter_mixins @@ -53,6 +62,20 @@ HAVE_MEGATRON_CORE = False +try: + from flash_attn.bert_padding import pad_input, unpad_input + from flash_attn.flash_attn_interface import flash_attn_unpadded_func + from flash_attn.flash_attn_triton import flash_attn_func + + HAVE_FLASH_ATTENTION = True + +except (ImportError, ModuleNotFoundError): + + HAVE_FLASH_ATTENTION = False + + flash_attn_unpadded_func, flash_attn_func = None, None + unpad_input, pad_input = None, None + """ We use the following notation throughout this file: h: hidden size n: number of attention heads @@ -86,7 +109,7 @@ def __init__( attention_type=AttnType.self_attn, attn_mask_type=AttnMaskType.padding, precision=16, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, use_cpu_initialization=False, megatron_amp_O2=False, @@ -102,9 +125,9 @@ def __init__( sequence_parallel=False, gradient_accumulation_fusion=False, normalize_attention_scores=True, + use_flash_attention=False, ): super(ParallelAttention, self).__init__() - self.layer_number = max(1, layer_number) self.attention_type = attention_type self.attn_mask_type = attn_mask_type @@ -115,7 +138,14 @@ def __init__( self.megatron_legacy = megatron_legacy self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) - self.set_accepted_adapter_types([InfusedAdapterConfig._target_, LoraKQVAdapterConfig._target_]) + self.set_accepted_adapter_types( + [ + InfusedAdapterConfig._target_, + LoraKQVAdapterConfig._target_, + LoraQAdapterConfig._target_, + LoraKVAdapterConfig._target_, + ] + ) if kv_channels is None: assert ( @@ -192,6 +222,8 @@ def __init__( multi_query_attention=multi_query_attention, sequence_parallel=sequence_parallel, normalize_attention_scores=normalize_attention_scores, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) # Output. @@ -283,14 +315,14 @@ def custom_forward(*inputs): return hidden_states - def _allocate_memory(self, inference_max_sequence_len, batch_size, dtype): + def _allocate_memory(self, inference_max_sequence_len, batch_size, dtype, device): return torch.empty( inference_max_sequence_len, batch_size, self.num_attention_heads_per_partition, self.hidden_size_per_attention_head, dtype=dtype, - device=torch.cuda.current_device(), + device=device, ) def _transpose_last_dim(self, mixed_layer, num_splits, num_splits_first): @@ -348,10 +380,10 @@ def forward( if set_inference_key_value_memory: assert inference_max_sequence_len and inference_max_sequence_len > 0 self.inference_key_memory = self._allocate_memory( - inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype + inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype, hidden_states.device ) self.inference_value_memory = self._allocate_memory( - inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype + inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype, hidden_states.device ) self.inference_current_sequence_len = 0 @@ -395,6 +427,11 @@ def forward( else: # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)] mixed_kv_layer, _ = self.key_value(encoder_output) + if self.is_adapter_available(): + lora_kv_adapter = self.get_adapter_module(AdapterName.LORA_KV_ADAPTER) + if lora_kv_adapter: + lora_mixed_kv_layer = lora_kv_adapter(encoder_output) + mixed_kv_layer = mixed_kv_layer + lora_mixed_kv_layer # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn] new_tensor_shape = mixed_kv_layer.size()[:-1] + ( @@ -412,6 +449,11 @@ def forward( # Attention head [sq, b, h] --> [sq, b, hp] query_layer, _ = self.query(hidden_states) + if self.is_adapter_available(): + lora_q_adapter = self.get_adapter_module(AdapterName.LORA_Q_ADAPTER) + if lora_q_adapter: + lora_q_layer = lora_q_adapter(hidden_states) + query_layer = query_layer + lora_q_layer # [sq, b, hp] --> [sq, b, np, hn] new_tensor_shape = query_layer.size()[:-1] + ( self.num_attention_heads_per_partition, @@ -450,7 +492,8 @@ def forward( key_layer = self.inference_key_memory[:end, ...] value_layer = self.inference_value_memory[:end, ...] # Adjust attention mask - attention_mask = attention_mask[..., start:end, :end] + if attention_mask is not None: + attention_mask = attention_mask[..., start:end, :end] # adjust the key rotary positional embedding if rotary_pos_emb is not None: q_pos_emb, k_pos_emb = rotary_pos_emb @@ -521,7 +564,7 @@ def __init__( num_attention_heads, hidden_size, precision=16, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, use_cpu_initialization=False, megatron_amp_O2=False, @@ -685,21 +728,28 @@ def __init__( attention_type=AttnType.self_attn, attn_mask_type=AttnMaskType.padding, precision=16, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, masked_softmax_fusion=True, attention_dropout=0.1, sequence_parallel=False, normalize_attention_scores=True, multi_query_attention=False, + position_embedding_type='learned_absolute', + use_flash_attention=False, ): super(CoreAttention, self).__init__() self.precision = precision - self.fp16 = precision == 16 or precision == '16' - self.bf16 = precision == 'bf16' + self.fp16 = False + self.bf16 = False + if precision == 'bf16': + self.bf16 = True + elif int(precision) == 16: + self.fp16 = True self.multi_query_attention = multi_query_attention + self.position_embedding_type = position_embedding_type self.apply_query_key_layer_scaling = apply_query_key_layer_scaling self.attention_softmax_in_fp32 = False @@ -749,8 +799,17 @@ def __init__( # Dropout. Note that for a single iteration, this layer will generate # different outputs on different number of parallel partitions but # on average it should not be partition dependent. + self.attention_dropout_p = attention_dropout self.attention_dropout = torch.nn.Dropout(attention_dropout) + if use_flash_attention: + self.attn_fn = self.flash_attention + else: + self.attn_fn = self.torch_attention + + if position_embedding_type.lower() == 'xpos': + self.xpos = XPOSPositionEmbedding(kv_channels) + def forward( self, query_layer, @@ -763,19 +822,43 @@ def forward( relative_position_bias=None, headscale_tensor=None, ): + b, np, sq, sk, hn = ( + query_layer.size(1), + query_layer.size(2), + query_layer.size(0), + key_layer.size(0), + query_layer.size(3), + ) - # =================================== - # Raw attention scores. [b, np, s, s] - # =================================== + # ================================================== + # Update attention mask for inference. [b, np, sq, sk] + # ================================================== + if get_key_value: + with torch.no_grad(): + if layer_past is not None: + attention_mask = attention_mask[..., sq - 1, :sk].unsqueeze(2) + else: + attention_mask = attention_mask[..., :sq, :sk] - # [b, np, sq, sk] - output_size = (query_layer.size(1), query_layer.size(2), query_layer.size(0), key_layer.size(0)) + # ================================================== + # Update attention bias. [b, np, sq, sk] + # ================================================== + if relative_position_bias is not None: + relative_position_bias = relative_position_bias[ + :, + self.num_attention_heads_partition_offset : self.num_attention_heads_partition_offset + + self.num_attention_heads_per_partition, + -sq:, + -sk:, + ] + # ================================================== + # Update query_layer, key_layer, value_layer + # ================================================== # TODO: figure out how to do this # apply relative positional encoding (rotary embedding) if rotary_pos_emb is not None: q_pos_emb, k_pos_emb = rotary_pos_emb - query_layer = apply_rotary_pos_emb(query_layer, q_pos_emb) key_layer = apply_rotary_pos_emb(key_layer, k_pos_emb) # TODO, can apply positional embedding to value_layer so it has @@ -783,88 +866,68 @@ def forward( # otherwise, only relative positional embedding takes effect # value_layer = apply_rotary_pos_emb(value_layer, k_pos_emb) - if self.multi_query_attention: - # [sq, b, np, hn] -> [b, np * sq, hn] - query_layer = query_layer.permute([1, 2, 0, 3]).reshape( - output_size[0], output_size[1] * output_size[2], -1 - ) + if self.position_embedding_type.lower() == 'xpos': + query_layer = self.xpos(query_layer, offset=key_layer.shape[-2] - query_layer.shape[-2], downscale=False) + key_layer = self.xpos(key_layer, offset=0, downscale=True) - # [sk, b, 1, hn] -> [b, hn, sk] - key_layer = key_layer.squeeze(2).permute(1, 2, 0) + # ================================================== + # query_layer [sq, b, np, hn] + # key_layer [sk, b, np, hn] + # value_layer [sk, b, np, hn] + # attention_mask [b, 1, sq, sk] or [b, s] + # relative_position_bias [b, np, sq, sk] + # context_layer [b, np, sq, hn] + # ================================================== + context_layer = self.attn_fn(query_layer, key_layer, value_layer, attention_mask, relative_position_bias) - # preallocting input tensor: [b * np, sq, sk] - matmul_input_buffer = torch.empty( - output_size[0] * output_size[1], - output_size[2], - output_size[3], - dtype=query_layer.dtype, - device=torch.cuda.current_device(), - ) + if headscale_tensor is not None: + context_layer = context_layer * headscale_tensor - # Raw attention scores. [b * np, sq, sk] - matmul_result = torch.baddbmm( - matmul_input_buffer, - query_layer, # [b * np, sq, hn] - key_layer, # [b * np, hn, sk] - beta=0.0, - alpha=(1.0 / self.norm_factor), - ) - else: - # [sq, b, np, hn] -> [sq, b * np, hn] - query_layer = query_layer.view(output_size[2], output_size[0] * output_size[1], -1) - # [sk, b, np, hn] -> [sk, b * np, hn] - key_layer = key_layer.view(output_size[3], output_size[0] * output_size[1], -1) - - # preallocting input tensor: [b * np, sq, sk] - matmul_input_buffer = torch.empty( - output_size[0] * output_size[1], - output_size[2], - output_size[3], - dtype=query_layer.dtype, - device=torch.cuda.current_device(), - ) + # [b, np, sq, hn] --> [sq, b, np, hn] + context_layer = context_layer.permute(2, 0, 1, 3).contiguous() - # Raw attention scores. [b * np, sq, sk] - matmul_result = torch.baddbmm( - matmul_input_buffer, - query_layer.transpose(0, 1), # [b * np, sq, hn] - key_layer.transpose(0, 1).transpose(1, 2), # [b * np, hn, sk] - beta=0.0, - alpha=(1.0 / self.norm_factor) if self.normalize_attention_scores else 1.0, - ) + # [sq, b, np, hn] --> [sq, b, hp] + new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,) + context_layer = context_layer.view(*new_context_layer_shape) - # change view to [b, np, sq, sk] - attention_scores = matmul_result.view(*output_size) + return context_layer - if relative_position_bias is not None: - attention_scores += relative_position_bias[ - :, - self.num_attention_heads_partition_offset : self.num_attention_heads_partition_offset - + self.num_attention_heads_per_partition, - : attention_scores.size(2), - : attention_scores.size(3), - ] + def torch_attention(self, query_layer, key_layer, value_layer, attention_mask, attention_bias): + sq, b, np, hn = query_layer.shape + sk = key_layer.shape[0] - # ================================================== - # Update attention mask for inference. [b, np, sq, sk] - # ================================================== + if self.multi_query_attention: + query_layer = rearrange(query_layer, 'sq b np hn -> b (np sq) hn') + key_layer = rearrange(key_layer, 'sk b 1 hn -> b hn sk') + value_layer = rearrange(value_layer, 'sv b np hn -> (b np) sv hn') + else: + query_layer = rearrange(query_layer, 'sq b np hn -> (b np) sq hn') + key_layer = rearrange(key_layer, 'sk b np hn -> (b np) hn sk') + value_layer = rearrange(value_layer, 'sv b np hn -> (b np) sv hn') + + matmul_input_buffer = torch.empty( + query_layer.shape[0], + query_layer.shape[1], + key_layer.shape[2], + dtype=query_layer.dtype, + device=query_layer.device, + ) - if get_key_value: - with torch.no_grad(): - if layer_past is not None: - attention_mask = attention_mask[ - ..., attention_scores.size(3) - 1, : attention_scores.size(3) - ].unsqueeze(2) - else: - attention_mask = attention_mask[..., : attention_scores.size(3), : attention_scores.size(3)] + matmul_result = torch.baddbmm( + matmul_input_buffer, + query_layer, + key_layer, + beta=0.0, + alpha=(1.0 / self.norm_factor) if self.normalize_attention_scores else 1.0, + ) - # =========================== - # Attention probs and dropout - # =========================== + # change view to [b, np, sq, sk] + attention_scores = matmul_result.view(b, np, sq, sk) - # attention scores and attention mask [b, np, sq, sk] - attention_probs = self.scale_mask_softmax(attention_scores, attention_mask) + if attention_bias is not None: + attention_scores += attention_bias + attention_probs = self.scale_mask_softmax(attention_scores, attention_mask) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. @@ -874,36 +937,100 @@ def forward( else: attention_probs = self.attention_dropout(attention_probs) - # ========================= - # Context layer. [sq, b, hp] - # ========================= + # change view [b * np, sq, sk] + attention_probs = rearrange(attention_probs, 'b np sq sk -> (b np) sq sk') - # value_layer -> context layer. - # [sk, b, np, hn] --> [b, np, sq, hn] + # matmul: [b * np, sq, hn] + context_layer = torch.bmm(attention_probs, value_layer) - # context layer shape: [b, np, sq, hn] - output_size = (value_layer.size(1), value_layer.size(2), query_layer.size(0), value_layer.size(3)) + # change view [b, np, sq, hn] + context_layer = rearrange(context_layer, '(b np) sq hn -> b np sq hn', np=np) - # change view [sk, b * np, hn] - value_layer = value_layer.view(value_layer.size(0), output_size[0] * output_size[1], -1) + return context_layer - # change view [b * np, sq, sk] - attention_probs = attention_probs.view(output_size[0] * output_size[1], output_size[2], -1) + def flash_attention(self, query_layer, key_layer, value_layer, attention_mask, attention_bias): + query_layer = rearrange(query_layer, 'sq b np hn -> b sq np hn') + key_layer = rearrange(key_layer, 'sk b np hn -> b sk np hn') + value_layer = rearrange(value_layer, 'sv b np hn -> b sv np hn') - # matmul: [b * np, sq, hn] - context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1)) + # Use to ensure dtype cast to fp16 or bf16 + query_layer = _cast_if_autocast_enabled(query_layer) + key_layer = _cast_if_autocast_enabled(key_layer) + value_layer = _cast_if_autocast_enabled(value_layer) + attention_mask = _cast_if_autocast_enabled(attention_mask) + attention_bias = _cast_if_autocast_enabled(attention_bias) - # change view [b, np, sq, hn] - context_layer = context_layer.view(*output_size) + if attention_bias is not None: + return self.flash_attention_triton(query_layer, key_layer, value_layer, attention_mask, attention_bias,) + else: + return self.flash_attention_cuda(query_layer, key_layer, value_layer, attention_mask,) + + def flash_attention_cuda(self, query_layer, key_layer, value_layer, attention_mask): + batch_size, seqlen, nheads, _ = query_layer.shape + + # True: attend / False: not attend + if attention_mask is None: + attention_mask_q = torch.ones(batch_size, query_layer.shape[1], device=query_layer.device).bool() + attention_mask_kv = torch.ones(batch_size, key_layer.shape[1], device=query_layer.device).bool() + elif len(attention_mask.shape) == 4: + # [b, 1, sq, sk] -> [b, sq] / [b, sk] + attention_mask_q = torch.any(torch.eq(attention_mask, False), dim=3).squeeze(1) + attention_mask_kv = torch.any(torch.eq(attention_mask, False), dim=2).squeeze(1) + else: + assert len(attention_mask.shape) == 2 + attention_mask_q = attention_mask + attention_mask_kv = attention_mask + + q, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input(query_layer, attention_mask_q) + k, _, cu_seqlens_k, max_seqlen_k = unpad_input(key_layer, attention_mask_kv) + v, _, _, _ = unpad_input(value_layer, attention_mask_kv) + is_causal = self.attn_mask_type == AttnMaskType.causal and query_layer.shape[1] == key_layer.shape[1] + context_layer = flash_attn_unpadded_func( + q, + k, + v, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + dropout_p=self.attention_dropout_p if self.training else 0.0, + causal=is_causal, + ) - if headscale_tensor is not None: - context_layer = context_layer * headscale_tensor + # [b, sq, np, hn] + context_layer = pad_input(context_layer, indices_q, batch_size, seqlen) - # [b, np, sq, hn] --> [sq, b, np, hn] - context_layer = context_layer.permute(2, 0, 1, 3).contiguous() + # [b, sq, np, hn] -> [b, np, sq, hn] + context_layer = context_layer.permute(0, 2, 1, 3) + return context_layer - # [sq, b, np, hn] --> [sq, b, hp] - new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,) - context_layer = context_layer.view(*new_context_layer_shape) + def flash_attention_triton(self, query_layer, key_layer, value_layer, attention_mask, attention_bias): + if self.attention_dropout_p > 0.0: + raise NotImplementedError(f'attention_dropout not implemented for flash_attention with attention bias') + + if attention_mask is not None: + if len(attention_mask.shape) == 4: + # [b, 1, sq, sk] -> [b, 1, sq, 1] / [b, 1, 1, sk] + attention_mask_q = torch.any(torch.eq(attention_mask, False), dim=3).unsqueeze(3) + attention_mask_kv = torch.any(torch.eq(attention_mask, False), dim=2).unsqueeze(2) + else: + # [b, s] -> [b, 1, s, 1] / [b, 1, 1, s] + assert len(attention_mask.shape) == 2 + attention_mask_q = attention_mask.unsqueeze(1).unsqueeze(3) + attention_mask_kv = attention_mask.unsqueeze(1).unsqueeze(2) + + if attention_bias.shape[2] == attention_mask_q.shape[2]: + attention_bias = attention_bias.masked_fill(~attention_mask_q, torch.finfo(query_layer.dtype).min) + if attention_bias.shape[3] == attention_mask_kv.shape[3]: + attention_bias = attention_bias.masked_fill(~attention_mask_kv, torch.finfo(query_layer.dtype).min) + + is_causal = self.attn_mask_type == AttnMaskType.causal and query_layer.shape[1] == key_layer.shape[1] + context_layer = flash_attn_func(query_layer, key_layer, value_layer, attention_bias, is_causal,) + + # [b, sq, np, hn] -> [b, np, sq, hn] + context_layer = context_layer.permute(0, 2, 1, 3) + + if attention_mask is not None: + context_layer = context_layer * attention_mask_q return context_layer diff --git a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py index 2c914a67dd12..3da56e597751 100644 --- a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py +++ b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py @@ -53,7 +53,7 @@ def forward_torch_softmax(self, input, mask): probs = torch.nn.Softmax(dim=-1)(mask_output) if mask is not None: all_k_masked = mask.all(axis=-1) - zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None] + zero_attention_mask = (1.0 - all_k_masked.type(probs.type()))[:, :, :, None] probs = probs * zero_attention_mask if self.input_in_float16 and self.softmax_in_fp32: diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index 36eacc43327a..277749189b3f 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -21,7 +21,12 @@ ) from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding +from nemo.collections.nlp.modules.common.megatron.position_embedding import ( + ALiBiRelativePositionEmbedding, + KERPLERelativePositionEmbedding, + RotaryEmbedding, + SandwichRelativePositionEmbedding, +) from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer from nemo.collections.nlp.modules.common.megatron.utils import ( ApexGuardDefaults, @@ -65,7 +70,7 @@ def get_language_model( vocab_size, num_attention_heads, encoder_attn_mask_type, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, init_method=None, scaled_init_method=None, @@ -116,6 +121,8 @@ def get_language_model( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, + use_flash_attention=False, ): """Build language model and return along with the key to save.""" @@ -191,6 +198,8 @@ def get_language_model( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, + use_flash_attention=use_flash_attention, ) # key used for checkpoints. language_model_key = 'language_model' @@ -506,6 +515,8 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, + use_flash_attention=False, ): super(TransformerLanguageModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights) @@ -527,7 +538,6 @@ def __init__( self.share_embeddings_and_output_weights = share_embeddings_and_output_weights self.sequence_parallel = sequence_parallel self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2) - if kv_channels is None: assert ( @@ -560,6 +570,40 @@ def __init__( rotary_dim = int(rotary_dim * rotary_percentage) self.rotary_pos_emb = RotaryEmbedding(rotary_dim) + elif position_embedding_type == 'alibi': + # TODO: If this is used for encoder-decodemax_position_embeddingsr model, implement proper logic and following + # addition for decoder. Currently it is only used for decoder model only. + # Encoder-decoder model, such as T5 is implemented in token_level_encoder_decoder.py + self.encoder_relative_position_embedding = ALiBiRelativePositionEmbedding( + bidirectional=encoder_attn_mask_type != AttnMaskType.causal, + num_attention_heads=num_attention_heads, + layer_type=LayerType.encoder, + num_attention_heads_alibi=None, + max_seq_len=max_position_embeddings, + ) + + elif position_embedding_type == 'kerple': + # TODO: If this is used for encoder-decodemax_position_embeddingsr model, implement proper logic and following + # addition for decoder. Currently it is only used for decoder model only. + # Encoder-decoder model, such as T5 is implemented in token_level_encoder_decoder.py + self.encoder_relative_position_embedding = KERPLERelativePositionEmbedding( + bidirectional=encoder_attn_mask_type != AttnMaskType.causal, + num_attention_heads=num_attention_heads, + layer_type=LayerType.encoder, + num_attention_heads_kerple=None, + max_seq_len=max_position_embeddings, + ) + assert use_flash_attention == False # flash-attention not supported with kerple at this point + + elif position_embedding_type == 'sandwich': + self.encoder_relative_position_embedding = SandwichRelativePositionEmbedding( + bidirectional=encoder_attn_mask_type != AttnMaskType.causal, + num_attention_heads=num_attention_heads, + layer_type=LayerType.encoder, + hidden_size=self.hidden_size // num_attention_heads if kv_channels is None else kv_channels, + max_seq_len=max_position_embeddings, + ) + # Transformer. self.encoder = ParallelTransformer( init_method=self.init_method, @@ -611,6 +655,9 @@ def __init__( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) self._encoder_key = 'encoder' @@ -651,6 +698,8 @@ def __init__( activations_checkpoint_granularity=activations_checkpoint_granularity, activations_checkpoint_layers_per_pipeline=activations_checkpoint_layers_per_pipeline, transformer_engine=transformer_engine, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) self._decoder_key = 'decoder' @@ -710,10 +759,7 @@ def forward( ptuning_adapter = self.get_adapter_module(AdapterName.PTUNING_ADAPTER) v = ptuning_adapter.virtual_tokens if ptuning_adapter and _sq >= v: # The sequence should be longer the v to insert virtual embeddings. - strategy = ptuning_adapter.adapter_strategy - virtual_embeddings = self.forward_single_enabled_adapter_( - _bs, ptuning_adapter, adapter_name=AdapterName.PTUNING_ADAPTER, adapter_strategy=strategy, - ) + virtual_embeddings = ptuning_adapter(_bs) encoder_input = encoder_input[ v:, :, : ] # the first v tokens are pads so that they can be swapped out with virtual embeddings. @@ -722,26 +768,35 @@ def forward( pass # enc_attn_mask: [1, 1, s, s] - - if self.position_embedding_type == 'rope': - if inference_max_sequence_len is not None: - rotary_pos_emb = self.rotary_pos_emb(inference_max_sequence_len) - elif self.encoder.input_tensor is not None: - if self.sequence_parallel: - rotary_pos_emb = self.rotary_pos_emb( - self.encoder.input_tensor.size(0) * parallel_state.get_tensor_model_parallel_world_size() - ) - else: - rotary_pos_emb = self.rotary_pos_emb(self.encoder.input_tensor.size(0)) + if inference_max_sequence_len is not None: + enc_seq_length = inference_max_sequence_len + elif self.encoder.input_tensor is not None: + if self.sequence_parallel: + enc_seq_length = ( + self.encoder.input_tensor.size(0) * parallel_state.get_tensor_model_parallel_world_size() + ) else: - if self.sequence_parallel: - rotary_pos_emb = self.rotary_pos_emb( - encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size() - ) - else: - rotary_pos_emb = self.rotary_pos_emb(encoder_input.size(0)) + enc_seq_length = self.encoder.input_tensor.size(0) else: - rotary_pos_emb = None + if self.sequence_parallel: + enc_seq_length = encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size() + else: + enc_seq_length = encoder_input.size(0) + + rotary_pos_emb = None + encoder_self_attention_relative_position_bias = None + if self.position_embedding_type == 'rope': + rotary_pos_emb = self.rotary_pos_emb(enc_seq_length) + elif ( + self.position_embedding_type == 'alibi' + or self.position_embedding_type == 'sandwich' + or self.position_embedding_type == 'kerple' + ): + encoder_self_attention_relative_position_bias = self.encoder_relative_position_embedding( + query_seq_length=enc_seq_length, key_seq_length=enc_seq_length, + ) + # causal attention bias: [1, head, 1, k] + # non-causal attention bias: [1, head, q, k] # encoder. if enc_hidden_states is None: @@ -756,6 +811,7 @@ def forward( rotary_pos_emb=(rotary_pos_emb, None, None) if rotary_pos_emb is not None else None, # This assumes that this being used as a GPT/BERT model only (no cross-attention) + self_attention_relative_position_bias=encoder_self_attention_relative_position_bias, ) else: encoder_output = enc_hidden_states.to(encoder_input.dtype) diff --git a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py index ca59bcc8850a..4a94b37aae7b 100644 --- a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py +++ b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from torch import nn +import torch +from nemo.collections.nlp.modules.common.megatron.utils import _cast_if_autocast_enabled try: from apex.contrib.layer_norm.layer_norm import FastLayerNorm as OrigFastLayerNorm @@ -35,8 +36,8 @@ def __init__(self, *args, **kwargs): ), 'LayerNorm1P implemented only as an apex.contrib.layer_norm.FastLayerNorm extension' def reset_parameters(self): - nn.init.zeros_(self.weight) - nn.init.zeros_(self.bias) + torch.nn.init.zeros_(self.weight) + torch.nn.init.zeros_(self.bias) def forward(self, x): return _fast_layer_norm(x, self.weight + 1, self.bias, self.epsilon) @@ -44,6 +45,27 @@ def forward(self, x): else: - class LayerNorm1P(nn.Module): + class LayerNorm1P(torch.nn.Module): def __init__(self, *args, **kwargs): raise NotImplementedError('LayerNorm1P available only with apex installed') + + +class LPLayerNorm(torch.nn.LayerNorm): + def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True, device=None, dtype=None): + super().__init__( + normalized_shape=normalized_shape, + eps=eps, + elementwise_affine=elementwise_affine, + device=device, + dtype=dtype, + ) + + def forward(self, x): + module_device = x.device + downcast_x = _cast_if_autocast_enabled(x) + downcast_weight = _cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight + downcast_bias = _cast_if_autocast_enabled(self.bias) if self.bias is not None else self.bias + with torch.autocast(enabled=False, device_type=module_device.type): + return torch.nn.functional.layer_norm( + downcast_x, self.normalized_shape, downcast_weight, downcast_bias, self.eps + ) diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py index 28eb39e630fc..20f25a25179a 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py @@ -44,7 +44,7 @@ def get_decoder_model( ffn_hidden_size, num_layers, num_attention_heads, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, init_method=None, scaled_init_method=None, @@ -88,6 +88,8 @@ def get_decoder_model( moe_dropout=0.0, turn_off_rop=False, # turn off the RoP positional embedding version=1, + position_embedding_type='learned_absolute', + use_flash_attention=False, ): """Build language model and return along with the key to save.""" @@ -145,6 +147,8 @@ def get_decoder_model( num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) elif arch == "retro": decoder = MegatronRetrievalTransformerDecoderModule( diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py index 4005ffbd879e..b98aa26b1b23 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py @@ -45,7 +45,7 @@ def get_encoder_model( ffn_hidden_size, num_layers, num_attention_heads, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, init_method=None, scaled_init_method=None, @@ -90,6 +90,8 @@ def get_encoder_model( moe_dropout=0.0, turn_off_rop=False, # turn off the RoP positional embedding version=1, # model version + position_embedding_type='learned_absolute', + use_flash_attention=False, ): """Build language model and return along with the key to save.""" @@ -147,6 +149,8 @@ def get_encoder_model( num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) elif arch == "retro": encoder = MegatronRetrievalTransformerEncoderModule( diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_init.py b/nemo/collections/nlp/modules/common/megatron/megatron_init.py index e0551fad5d16..7431bffad26c 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_init.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_init.py @@ -67,6 +67,7 @@ def initialize_model_parallel_for_nemo( global_batch_size=None, rampup_batch_size=None, use_fp8=False, + init_mpi_proc_group=False, seed=1234, apex_transformer_log_level=30, ): @@ -83,6 +84,7 @@ def initialize_model_parallel_for_nemo( app_state.pipeline_model_parallel_size = pipeline_model_parallel_size app_state.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size app_state.use_fp8 = use_fp8 + app_state.init_mpi_proc_group = init_mpi_proc_group ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py index c3cb1fd05c3b..f2c42597eb83 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py @@ -85,6 +85,8 @@ def __init__( num_moe_experts=1, moe_frequency=1, moe_dropout=0.0, + position_embedding_type='learned_absolute', + use_flash_attention=False, ): super(MegatronTransformerDecoderModule, self).__init__() @@ -149,6 +151,8 @@ def __init__( num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) self._model_key = 'model' diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py index 2eacf8aad672..60c347338105 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py @@ -82,6 +82,8 @@ def __init__( num_moe_experts=1, moe_frequency=1, moe_dropout=0.0, + position_embedding_type='learned_absolute', + use_flash_attention=False, ): super(MegatronTransformerEncoderModule, self).__init__() @@ -96,6 +98,7 @@ def __init__( self.parent_model_type = parent_model_type self.normalization = normalization self.transformer_block_type = transformer_block_type + self.use_flash_attention = use_flash_attention if kv_channels is None: @@ -147,6 +150,8 @@ def __init__( num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + position_embedding_type=position_embedding_type, + use_flash_attention=use_flash_attention, ) self._model_key = 'model' @@ -163,9 +168,12 @@ def forward( enc_self_attention_relative_position_bias=None, ): # convert to Megatron mask - enc_attn_mask_3d = build_attention_mask_3d( - source_mask=enc_attn_mask, target_mask=enc_attn_mask, attn_mask_type=self.model_attn_mask_type, - ) + if self.use_flash_attention: + enc_attn_mask_3d = enc_attn_mask < 0.5 + else: + enc_attn_mask_3d = build_attention_mask_3d( + source_mask=enc_attn_mask, target_mask=enc_attn_mask, attn_mask_type=self.model_attn_mask_type, + ) # transformer encoder enc_output = self.model( diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py index 49759bfa7a91..22a223013fd2 100644 --- a/nemo/collections/nlp/modules/common/megatron/module.py +++ b/nemo/collections/nlp/modules/common/megatron/module.py @@ -262,17 +262,17 @@ def __init__(self, module, precision): super().__init__() self.precision = precision - if precision == 16 or precision == '16': - self.add_module('module', module.half()) + if precision == 'bf16': + self.add_module('module', module.bfloat16()) def float16_converter(val): - return val.half() + return val.bfloat16() - elif precision == 'bf16': - self.add_module('module', module.bfloat16()) + elif int(precision) == 16: + self.add_module('module', module.half()) def float16_converter(val): - return val.bfloat16() + return val.half() else: raise Exception( @@ -290,7 +290,7 @@ def forward(self, *inputs, **kwargs): if getattr(self.module, 'pre_process', True): inputs = fp32_to_float16(inputs, self.float16_converter) outputs = self.module(*inputs, **kwargs) - if parallel_state.is_pipeline_last_stage(): + if parallel_state.is_pipeline_last_stage() and self.training: outputs = float16_to_fp32(outputs) return outputs diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py new file mode 100644 index 000000000000..fdbbed86cb2c --- /dev/null +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py @@ -0,0 +1,31 @@ +# coding=utf-8 +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import ( + ALiBiRelativePositionEmbedding, +) +from nemo.collections.nlp.modules.common.megatron.position_embedding.kerple_relative_position_embedding import ( + KERPLERelativePositionEmbedding, +) +from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import RotaryEmbedding +from nemo.collections.nlp.modules.common.megatron.position_embedding.sandwich_relative_position_embedding import ( + SandwichRelativePositionEmbedding, +) +from nemo.collections.nlp.modules.common.megatron.position_embedding.t5_relative_position_embedding import ( + T5RelativePositionEmbedding, +) +from nemo.collections.nlp.modules.common.megatron.position_embedding.xpos_position_embedding import ( + XPOSPositionEmbedding, +) diff --git a/nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py similarity index 73% rename from nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py rename to nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py index 4f5abd96743b..6425e288f277 100644 --- a/nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py @@ -42,20 +42,31 @@ def build_slopes(num_attention_heads, num_attention_heads_alibi): """ Builds a slopes tensor. """ - slopes = torch.Tensor( - get_slopes(num_attention_heads_alibi) + [0] * (num_attention_heads - num_attention_heads_alibi) - ).cuda() - return slopes.unsqueeze(-1).unsqueeze(-1) + slopes = ( + torch.Tensor(get_slopes(num_attention_heads_alibi) + [0] * (num_attention_heads - num_attention_heads_alibi)) + .unsqueeze(-1) + .unsqueeze(-1) + ) + if torch.cuda.is_available(): + slopes = slopes.to(torch.cuda.current_device()) -def build_relative_position(query_length, key_length, num_attention_heads): - context_position = torch.arange(query_length)[:, None].cuda() - memory_position = torch.arange(key_length)[None, :].cuda() - # shape (query_length, key_length, num_heads) - relative_position = memory_position - context_position + return slopes + + +def build_relative_position(max_seq_len, full=True): + """ + full=True: shape (max_seq_len, max_seq_len) + full=False: shape (max_seq_len) + """ + relative_position = torch.arange(1 - max_seq_len, 1)[None, :].mul(-1) # (1, max_seq_len) + + if full: + memory_position = torch.arange(1 - max_seq_len, 1)[:, None].mul(-1) + relative_position = torch.abs(memory_position - relative_position) # (max_seq_len, max_seq_len) - # shape (num_attention_heads, max_seq_len, max_seq_len) - relative_position = torch.abs(relative_position).unsqueeze(0).expand(num_attention_heads, -1, -1) + if torch.cuda.is_available(): + relative_position = relative_position.to(torch.cuda.current_device()) return relative_position @@ -68,7 +79,7 @@ class ALiBiRelativePositionEmbedding(torch.nn.Module): """ def __init__( - self, bidirectional, num_attention_heads, layer_type, num_attention_heads_alibi=None, max_seq_len=512 + self, bidirectional, num_attention_heads, layer_type, num_attention_heads_alibi=None, max_seq_len=512, ): """ Args: @@ -101,20 +112,25 @@ def __init__( # cache the slopes self.slopes = build_slopes(num_attention_heads, num_attention_heads_alibi) # cache the relative position bias. shape (num_attention_heads, max_seq_len, max_seq_len) - self.relative_position = build_relative_position(max_seq_len, max_seq_len, num_attention_heads) + # if we use causal attention (not bidrectional), we can use singleton relative position + self.relative_position = ( + build_relative_position(max_seq_len, full=bidirectional).unsqueeze(0).expand(num_attention_heads, -1, -1) + ) def forward(self, query_seq_length, key_seq_length): # used cached relative position if possible max_seq_len = max(query_seq_length, key_seq_length) if max_seq_len > self.max_seq_len: - relative_position = build_relative_position(max_seq_len, max_seq_len, self.num_attention_heads) + relative_position = ( + build_relative_position(max_seq_len, full=self.bidirectional) + .unsqueeze(0) + .expand(self.num_attention_heads, -1, -1) + ) else: relative_position = self.relative_position # shape (num_attention_heads, query_seq_length, key_seq_length) - relative_position = relative_position[:, :query_seq_length, :key_seq_length] + relative_position = relative_position[:, -query_seq_length:, -key_seq_length:] # if not bidirectional, mask out the future positions - if not self.bidirectional: - relative_position = torch.tril(relative_position) # shape (1, num_heads, query_length, key_length) return -relative_position.unsqueeze(0) * self.slopes diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py new file mode 100644 index 000000000000..fc0c837da556 --- /dev/null +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py @@ -0,0 +1,93 @@ +# coding=utf-8 +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch + +from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import ( + build_relative_position, + build_slopes, +) + +__all__ = ['KERPLERelativePositionEmbedding'] + + +class KERPLERelativePositionEmbedding(torch.nn.Module): + """ + kerple (Attention with Linear Biases) relative position embedding for auto-regressive decoder + and joint encoder (symmetric for forward and backward distance). + Based on https://arxiv.org/bas/2108.12409 + """ + + def __init__( + self, bidirectional, num_attention_heads, layer_type, num_attention_heads_kerple=None, max_seq_len=512, + ): + """ + Args: + bidirectional: Whether to use bidirectional relative position embedding + num_attention_heads: Number of attention heads + layer_type: Layer type. Can be one of [LayerType.encoder or LayerType.decoder]. Willdetermine the bias construction + num_attention_heads_kerple: Number of attention heads for which kerple bias will be used + max_seq_len: Maximum sequence length for precomputed relative positions. Larger sizes will result in more memory usage by computing kerple mask on-the-fly. + """ + super().__init__() + + if (num_attention_heads_kerple is None) or (num_attention_heads_kerple <= 0): + num_attention_heads_kerple = num_attention_heads + + if num_attention_heads_kerple > num_attention_heads: + raise ValueError( + f"num_attention_heads_kerple ({num_attention_heads_kerple}) cannot be larger than num_attention_heads ({num_attention_heads})" + ) + + self.bidirectional = bidirectional + self.num_attention_heads = num_attention_heads + # LayerType.encoder or LayerType.decoder. Is only needed to determine the group for the all_reduce + self.layer_type = layer_type + # define the size of pre-computed relative position slopes. + # define the number of attention heads for which kerple mask will be pre-computed (the rest are disabled). + self.num_attention_heads_kerple = num_attention_heads_kerple + # Larger sizes will result in more memory usage by computing kerple mask on-the-fly. + self.max_seq_len = max_seq_len + + # initialize the slopes + self.kerple_b = torch.nn.Parameter(build_slopes(num_attention_heads, num_attention_heads_kerple)) + self.kerple_a = torch.nn.Parameter(torch.ones_like(self.kerple_b)) + self.kerple_p = torch.nn.Parameter(torch.ones_like(self.kerple_b)) + + # cache the relative position bias. shape (num_attention_heads, max_seq_len, max_seq_len) + # if we use causal attention (not bidrectional), we can use singleton relative position + self.relative_position = ( + build_relative_position(max_seq_len, full=True).unsqueeze(0).expand(num_attention_heads, -1, -1) + ) + + def forward(self, query_seq_length, key_seq_length): + # used cached relative position if possible + max_seq_len = max(query_seq_length, key_seq_length) + if max_seq_len > self.max_seq_len: + relative_position = ( + build_relative_position(max_seq_len, full=True).unsqueeze(0).expand(self.num_attention_heads, -1, -1) + ) + else: + relative_position = self.relative_position + # shape (num_attention_heads, query_seq_length, key_seq_length) + relative_position = relative_position[:, -query_seq_length:, -key_seq_length:] + # if not bidirectional, mask out the future positions + if not self.bidirectional: + relative_position = torch.tril(relative_position) + + # shape (1, num_heads, query_length, key_length) + return -self.kerple_b * torch.log(1 + self.kerple_a * relative_position.unsqueeze(0).pow(self.kerple_p)) diff --git a/nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py similarity index 96% rename from nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py rename to nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py index 191601054ef8..5a8d6d7dd333 100644 --- a/nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py @@ -38,7 +38,8 @@ def forward(self, max_seq_len, offset=0): def _rotate_half(x): """ - change sign so the last dimension becomes [-odd, +even] + change sign so the last dimension + [A, B, C, D] -> [-C, -D, A, B] """ x = rearrange(x, '... (j d) -> ... j d', j=2) x1, x2 = x.unbind(dim=-2) diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py new file mode 100644 index 000000000000..0e2dfd7d2ef6 --- /dev/null +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py @@ -0,0 +1,75 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import ( + build_relative_position, +) +from nemo.utils.decorators import experimental + +__all__ = ['SandwichRelativePositionEmbedding'] + + +@experimental +class SandwichRelativePositionEmbedding(torch.nn.Module): + """ + Dissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis + Based on https://arxiv.org/abs/2212.10356 + """ + + def __init__( + self, bidirectional, num_attention_heads, layer_type, hidden_size, max_seq_len=512, + ): + """ + Args: + num_attention_heads: Number of attention heads + hidden_size: Hidden size per attention head + """ + super().__init__() + self.bidirectional = bidirectional + self.layer_type = layer_type + self.num_attention_heads = num_attention_heads + self.hidden_size = hidden_size + self.max_seq_len = max_seq_len + self.relative_position = build_relative_position(max_seq_len, full=True) + + def forward(self, query_seq_length, key_seq_length): + # used cached relative position if possible + max_seq_len = max(query_seq_length, key_seq_length) + if max_seq_len > self.max_seq_len: + relative_position = build_relative_position(max_seq_len, full=True) + else: + relative_position = self.relative_position + + # shape (query_seq_length, key_seq_length) + relative_position = relative_position[-query_seq_length:, -key_seq_length:] + # if not bidirectional, mask out the future positions + if not self.bidirectional: + relative_position = torch.tril(relative_position) + + inv_freq = 1.0 / ( + 10000 + ** (2 * torch.arange(1, self.hidden_size / 2 + 1, device=relative_position.device) / self.hidden_size) + ) + + _bias = torch.sum((relative_position[:, :, None].repeat(1, 1, len(inv_freq)) * inv_freq).cos(), axis=2) + bias = _bias.repeat(self.num_attention_heads, 1, 1) + + _bias_scales = torch.arange(1, self.num_attention_heads + 1, 1, device=relative_position.device) + bias_scales = _bias_scales[:, None, None] + + scaled_bias = (bias - self.hidden_size / 2) / (bias_scales * 8 / self.num_attention_heads).unsqueeze(0) + + return scaled_bias diff --git a/nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py similarity index 95% rename from nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py rename to nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py index c2a0c8661acf..4566d9aa7876 100644 --- a/nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py @@ -43,9 +43,7 @@ def __init__( # Relative position Embedding # Relative Position embedding (all attention layers). - self.relative_position_embedding = torch.nn.Embedding( - self.relative_position_num_buckets, num_attention_heads - ).to(torch.cuda.current_device()) + self.relative_position_embedding = torch.nn.Embedding(self.relative_position_num_buckets, num_attention_heads) self._relative_position_embedding_key = 'relative_position_embedding' init_method(self.relative_position_embedding.weight) @@ -104,8 +102,9 @@ def _compute_relative_position_bucket(self, query_length, key_length): """ """Compute binned relative position bias""" - context_position = torch.arange(query_length, dtype=torch.long, device=torch.cuda.current_device())[:, None] - memory_position = torch.arange(key_length, dtype=torch.long, device=torch.cuda.current_device())[None, :] + device = self.relative_position_embedding.weight.device + context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None] + memory_position = torch.arange(key_length, dtype=torch.long, device=device)[None, :] relative_position = memory_position - context_position # shape (query_length, key_length) relative_position_bucket_tensor = self._relative_position_bucket( diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py new file mode 100644 index 000000000000..ef59234790c5 --- /dev/null +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py @@ -0,0 +1,78 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +from einops import rearrange +from nemo.utils.decorators import experimental + + +def fixed_pos_embedding(x): + seq_len, dim = x.shape + inv_freq = 1.0 / (10000 ** (torch.arange(0, dim) / dim)) + sinusoid_inp = torch.einsum("i , j -> i j", torch.arange(0, seq_len, dtype=torch.float), inv_freq).to(x) + return torch.sin(sinusoid_inp), torch.cos(sinusoid_inp) + + +def rotate_every_two(x): + x1 = x[:, :, ::2] + x2 = x[:, :, 1::2] + x = torch.stack((-x2, x1), dim=-1) + return x.flatten(-2) # in einsum notation: rearrange(x, '... d j -> ... (d j)')\ + + +def duplicate_interleave(m): + """ + A simple version of `torch.repeat_interleave` for duplicating a matrix while interleaving the copy. + """ + dim0 = m.shape[0] + m = m.view(-1, 1) # flatten the matrix + m = m.repeat(1, 2) # repeat all elements into the 2nd dimension + m = m.view(dim0, -1) # reshape into a matrix, interleaving the copy + return m + + +def apply_rotary_pos_emb(x, sin, cos, scale=1): + sin, cos = map(lambda t: duplicate_interleave(t * scale), (sin, cos)) + # einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2) + return (x * cos) + (rotate_every_two(x) * sin) + + +@experimental +class XPOSPositionEmbedding(nn.Module): + def __init__(self, head_dim, scale_base=2048): + super().__init__() + self.head_dim = head_dim + self.scale_base = scale_base + self.register_buffer("scale", (torch.arange(0, head_dim, 2) + 0.4 * head_dim) / (1.4 * head_dim)) + + def forward(self, x, offset=0, downscale=False): + length, b = x.shape[0], x.shape[1] + x = rearrange(x, 's b np hn -> (b np) s hn') + min_pos = -(length + offset) // 2 + max_pos = length + offset + min_pos + scale = self.scale ** torch.arange(min_pos, max_pos, 1).to(self.scale).div(self.scale_base)[:, None] + sin, cos = fixed_pos_embedding(scale) + + if scale.shape[0] > length: + scale = scale[-length:] + sin = sin[-length:] + cos = cos[-length:] + + if downscale: + scale = 1 / scale + + x = apply_rotary_pos_emb(x, sin, cos, scale) + x = rearrange(x, '(b np) s hn -> s b np hn', b=b) + return x diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py index 73c41cee6c6f..83dea362c3e1 100644 --- a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py @@ -19,7 +19,7 @@ from einops import rearrange, repeat from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding +from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, build_attention_mask_3d diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py index 229a9af48048..fc16295020fb 100644 --- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py @@ -15,12 +15,6 @@ import torch from omegaconf import DictConfig -from nemo.collections.nlp.modules.common.megatron.alibi_relative_position_embedding import ( - ALiBiRelativePositionEmbedding, -) -from nemo.collections.nlp.modules.common.megatron.kerple_relative_position_embedding import ( - KERPLERelativePositionEmbedding, -) from nemo.collections.nlp.modules.common.megatron.language_model import Embedding from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.megatron_decoders import get_decoder_model @@ -29,7 +23,11 @@ ) from nemo.collections.nlp.modules.common.megatron.megatron_encoders import get_encoder_model from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.t5_relative_position_embedding import T5RelativePositionEmbedding +from nemo.collections.nlp.modules.common.megatron.position_embedding import ( + ALiBiRelativePositionEmbedding, + KERPLERelativePositionEmbedding, + T5RelativePositionEmbedding, +) from nemo.collections.nlp.modules.common.megatron.utils import ( ApexGuardDefaults, build_position_ids, @@ -197,6 +195,11 @@ def __init__( else: self.encoder_relative_position_embedding = None + if encoder_cfg.get('use_flash_attention', False) and encoder_cfg.get( + 'position_embedding_type', 'learned_absolute' + ) in ['relative', 'kerple']: + raise ValueError('flash-attention not supported with relative or kerple at this point') + encoder = get_encoder_model( arch=encoder_cfg.arch, hidden_size=encoder_cfg.hidden_size, @@ -243,6 +246,8 @@ def __init__( num_moe_experts=encoder_cfg.get('num_moe_experts', 1), moe_frequency=encoder_cfg.get('moe_frequency', 1), moe_dropout=encoder_cfg.get('moe_dropout', 0.0), + position_embedding_type=encoder_cfg.get('position_embedding_type', 'learned_absolute'), + use_flash_attention=encoder_cfg.get('use_flash_attention', False), ) if add_decoder: @@ -307,6 +312,7 @@ def __init__( ): self.decoder_cross_attention_relative_position_embeddings_weight().data.fill_(0) self.decoder_cross_attention_relative_position_embeddings_weight().shared = True + elif self.decoder_cfg.get('position_embedding_type', 'learned_absolute') == 'alibi': self.decoder_relative_position_embedding = ALiBiRelativePositionEmbedding( bidirectional=False, @@ -328,6 +334,11 @@ def __init__( else: self.decoder_relative_position_embedding = None + if decoder_cfg.get('use_flash_attention', False) and decoder_cfg.get( + 'position_embedding_type', 'learned_absolute' + ) in ['relative', 'kerple']: + raise ValueError('flash-attention not supported with relative or kerple at this point') + decoder = get_decoder_model( arch=decoder_cfg.arch, hidden_size=decoder_cfg.hidden_size, @@ -373,6 +384,8 @@ def __init__( num_moe_experts=decoder_cfg.get('num_moe_experts', 1), moe_frequency=decoder_cfg.get('moe_frequency', 1), moe_dropout=decoder_cfg.get('moe_dropout', 0.0), + position_embedding_type=decoder_cfg.get('position_embedding_type', 'learned_absolute'), + use_flash_attention=decoder_cfg.get('use_flash_attention', False), ) self.enc_dec_model = MegatronTransformerEncoderDecoderModule( diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 9a09a9f9aa0b..652a3e6f4e3a 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -18,6 +18,7 @@ from typing import Any, Callable, Optional import torch +import torch.nn as nn from einops import rearrange from nemo.collections.common.parts.adapter_modules import LinearAdapterConfig @@ -33,7 +34,7 @@ dropout_add, ) from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm -from nemo.collections.nlp.modules.common.megatron.layer_norm_1p import LayerNorm1P +from nemo.collections.nlp.modules.common.megatron.layer_norm_1p import LayerNorm1P, LPLayerNorm from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.mlp import ParallelMLP, SwitchMLP from nemo.collections.nlp.modules.common.megatron.module import MegatronModule @@ -115,6 +116,12 @@ def _dropout_add(x, bias, residual, prob): return _dropout_add +def remove_bias_from_layernorm(layer): + for module in layer.modules(): + if hasattr(module, 'bias') and isinstance(module.bias, nn.Parameter): + module.register_parameter('bias', None) + + class ParallelTransformerLayer_(MegatronModule, adapter_mixins.AdapterModuleMixin): """A single transformer layer. @@ -134,7 +141,7 @@ def __init__( self_attn_mask_type=AttnMaskType.padding, fp32_residual_connection=False, precision=16, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, layernorm_epsilon=1e-5, hidden_dropout=0.1, @@ -164,6 +171,7 @@ def __init__( num_moe_experts=1, moe_frequency=1, moe_dropout=0.0, + use_flash_attention=False, ): super(ParallelTransformerLayer_, self).__init__() @@ -187,7 +195,9 @@ def __init__( 'bias_dropout_add_fusion=True requires bias=True, found bias=False. Either set both to True or both to False.' ) - if normalization not in ['layernorm', 'layernorm1p', 'rmsnorm']: + # the low_precision_layernorm does not require a bias term, whereas layernorm1p from apex + # does require a bias, so it cannot be used for bias-less low precision LN such as in MPT-7B + if normalization not in ['layernorm', 'layernorm1p', 'rmsnorm', 'low_precision_layernorm']: raise ValueError(f'normalization must be "layernorm", "layernorm1p" or "rmsnorm", found {normalization}') if transformer_block_type not in ['pre_ln', 'post_ln', 'normformer']: @@ -212,8 +222,16 @@ def __init__( self.input_layernorm = LayerNorm1P( hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel ) + elif normalization == 'low_precision_layernorm': + self.input_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon) else: self.input_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon) + # for architectures such as MPT, there is no bias term even on the layernorms + # this code allows us to remove the bias terms from the layernorm module + # so that we can support MPT. However, certain apex-based LNs don't support + # removing bias, so we also have to check for that + if not bias and normalization not in ['layernorm', 'layernorm1p']: + remove_bias_from_layernorm(self.input_layernorm) self.self_attention = ParallelAttention( init_method=init_method, @@ -240,6 +258,7 @@ def __init__( sequence_parallel=sequence_parallel, gradient_accumulation_fusion=gradient_accumulation_fusion, normalize_attention_scores=normalize_attention_scores, + use_flash_attention=use_flash_attention, ) if transformer_block_type == 'normformer': @@ -261,8 +280,12 @@ def __init__( self.post_attention_layernorm = LayerNorm1P( hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel ) + elif normalization == 'low_precision_layernorm': + self.post_attention_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon) else: self.post_attention_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon) + if not bias and normalization not in ['layernorm', 'layernorm1p']: + remove_bias_from_layernorm(self.post_attention_layernorm) if self.layer_type == LayerType.decoder_pre_mlp: # skip MLP and cross attention @@ -280,8 +303,12 @@ def __init__( self.post_attention_layernorm = LayerNorm1P( hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel ) + elif normalization == 'low_precision_layernorm': + self.post_attention_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon) else: self.post_attention_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon) + if not bias and normalization not in ['layernorm', 'layernorm1p']: + remove_bias_from_layernorm(self.post_attention_layernorm) if self.layer_type == LayerType.decoder or self.layer_type == LayerType.retrieval_encoder: self.inter_attention = ParallelAttention( @@ -522,13 +549,9 @@ def forward( if self.is_adapter_available(): adapter_1 = self.get_adapter_module(AdapterName.PRE_ATTN_ADAPTER) if adapter_1: - strategy = adapter_1.adapter_strategy - attention_output = self.forward_single_enabled_adapter_( - attention_output, - adapter_1, - adapter_name=AdapterName.PRE_ATTN_ADAPTER, - adapter_strategy=strategy, - ) + attention_output = ( + adapter_1(attention_output) + attention_output + ) # simple adapter call with residual connection layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}") @@ -599,15 +622,12 @@ def forward( layernorm_input = normalization_output # MLP. mlp_output, mlp_bias = self.mlp(normalization_output) - if ( - self.is_adapter_available() - ): # TODO: (@adithyre) was able to move adapter_2 back to the end of the transformer after ptl 1.7 update. + if self.is_adapter_available(): + # TODO: (@adithyre) was able to move adapter_2 back to the end of the transformer after ptl 1.7 update. adapter_2 = self.get_adapter_module(AdapterName.POST_ATTN_ADAPTER) if adapter_2: - strategy = adapter_2.adapter_strategy - mlp_output = self.forward_single_enabled_adapter_( - mlp_output, adapter_2, adapter_name=AdapterName.POST_ATTN_ADAPTER, adapter_strategy=strategy - ) + mlp_output = adapter_2(mlp_output) + mlp_output # simple adapter call with residual connection + residual = layernorm_input bias_dropout_add_func = self._get_bias_droput_add_func( @@ -639,7 +659,7 @@ def __init__( self_attn_mask_type=AttnMaskType.padding, fp32_residual_connection=False, precision=16, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, layernorm_epsilon=1e-5, hidden_dropout=0.1, @@ -669,6 +689,7 @@ def __init__( num_moe_experts=1, moe_frequency=1, moe_dropout=0.0, + use_flash_attention=False, ): super(ParallelTransformerLayer, self).__init__( init_method=init_method, @@ -711,6 +732,7 @@ def __init__( num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + use_flash_attention=use_flash_attention, ) # Dtype for forward pass - ignore amp O2 @@ -782,7 +804,7 @@ def __init__( params_dtype: torch.dtype = torch.float32, get_rng_state_tracker: Optional[Callable] = None, fuse_wgrad_accumulation: bool = False, - apply_query_key_layer_scaling: bool = True, + apply_query_key_layer_scaling: bool = False, attention_softmax_in_fp32: bool = False, seq_length: Optional[int] = None, micro_batch_size: Optional[int] = None, @@ -792,6 +814,7 @@ def __init__( layer_type: str = "encoder", drop_path_rate: float = 0, use_emha: bool = False, + ub_tp_comm_overlap: bool = False, autocast_dtype: Any = 16, zero_centered_gamma: bool = False, ) -> None: @@ -824,6 +847,7 @@ def __init__( set_parallel_mode=tp_size > 1, fuse_qkv_params=True, zero_centered_gamma=zero_centered_gamma, + ub_tp_comm_overlap=ub_tp_comm_overlap, ) # use_emha=use_emha, @@ -873,7 +897,7 @@ def __init__( hidden_size, ffn_hidden_size, num_attention_heads, - apply_query_key_layer_scaling=True, + apply_query_key_layer_scaling=False, kv_channels=None, layer_type=LayerType.encoder, # it can be a list of types or single type self_attn_mask_type=AttnMaskType.padding, @@ -919,11 +943,13 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, normalize_attention_scores=True, multi_query_attention=False, num_moe_experts=1, moe_frequency=1, moe_dropout=0.0, + use_flash_attention=False, ): super(ParallelTransformer, self).__init__() @@ -945,6 +971,9 @@ def __init__( self.position_embedding_type = position_embedding_type self.multi_query_attention = multi_query_attention + self.inference_current_sequence_len = 0 + self.inference_params = None + self.activations_checkpoint_method = activations_checkpoint_method self.activations_checkpoint_num_layers = activations_checkpoint_num_layers self.activations_checkpoint_granularity = activations_checkpoint_granularity @@ -1058,6 +1087,7 @@ def build_layer(layer_number): apply_residual_connection_post_layernorm=False, autocast_dtype=precision, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, zero_centered_gamma=normalization == 'layernorm1p', ) else: @@ -1101,6 +1131,7 @@ def build_layer(layer_number): num_moe_experts=num_moe_experts, moe_frequency=moe_frequency, moe_dropout=moe_dropout, + use_flash_attention=use_flash_attention, ) if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: @@ -1151,8 +1182,16 @@ def build_layer(layer_number): self.final_layernorm = LayerNorm1P( hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel ) + elif normalization == 'low_precision_layernorm': + self.final_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon) else: self.final_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon) + # for architectures such as MPT, there is no bias term even on the layernorms + # this code allows us to remove the bias terms from the layernorm module + # so that we can support MPT. However, certain apex-based LNs don't support + # removing bias, so we also have to check for that + if not bias and normalization not in ['layernorm', 'layernorm1p']: + remove_bias_from_layernorm(self.final_layernorm) def _get_layer(self, layer_number): return self.layers[layer_number] @@ -1451,6 +1490,20 @@ def forward( if get_key_value: presents = [] + if self.transformer_engine: + # Pass key value information to TE through inference_params to pre-allocate memory + if set_inference_key_value_memory: + self.inference_params = type('', (), {})() + self.inference_params.max_sequence_len = inference_max_sequence_len + self.inference_params.max_batch_size = hidden_states.size(1) + self.inference_params.batch_size_offset = 0 + self.inference_params.key_value_memory_dict = {} + self.inference_params.sequence_len_offset = 0 + self.inference_current_sequence_len = 0 + + if self.inference_params != None: + self.inference_params.sequence_len_offset = self.inference_current_sequence_len + for index in range(self.num_layers): layer = self._get_layer(index) past = None @@ -1479,19 +1532,15 @@ def forward( checkpoint_core_attention = False if self.transformer_engine: - - inference_params = None - hidden_states = layer( hidden_states, attention_mask, encoder_output=encoder_output, enc_dec_attn_mask=enc_dec_attn_mask, - inference_params=inference_params, + inference_params=self.inference_params, is_first_microbatch=self.is_first_microbatch, checkpoint_core_attention=checkpoint_core_attention, ) - else: hidden_states = layer( hidden_states, @@ -1507,6 +1556,9 @@ def forward( cross_attention_relative_position_bias=cross_attention_relative_position_bias, checkpoint_core_attention=checkpoint_core_attention, ) + # Update current sequence length outside of the loops + if self.transformer_engine: + self.inference_current_sequence_len += hidden_states.size(0) # Skip counter update for eval and activation checkpointing if torch.is_grad_enabled() and self.training: diff --git a/nemo/collections/nlp/modules/common/megatron/utils.py b/nemo/collections/nlp/modules/common/megatron/utils.py index 919990065057..b9e49eff7829 100644 --- a/nemo/collections/nlp/modules/common/megatron/utils.py +++ b/nemo/collections/nlp/modules/common/megatron/utils.py @@ -189,7 +189,9 @@ def average_losses_across_data_parallel_group(losses): return averaged_losses -def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_attention_mask, eod_mask_loss): +def get_ltor_masks_and_position_ids( + data, eod_token, reset_position_ids, reset_attention_mask, eod_mask_loss, compute_attention_mask=True +): """Build masks and position id for left to right model.""" # Extract batch size and sequence length. @@ -200,9 +202,12 @@ def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_a att_mask_batch = micro_batch_size else: att_mask_batch = 1 - attention_mask = torch.tril(torch.ones((att_mask_batch, seq_length, seq_length), device=data.device)).view( - att_mask_batch, 1, seq_length, seq_length - ) + + attention_mask = None + if compute_attention_mask: + attention_mask = torch.tril(torch.ones((att_mask_batch, seq_length, seq_length), device=data.device)).view( + att_mask_batch, 1, seq_length, seq_length + ) # Loss mask. loss_mask = torch.ones(data.size(), dtype=torch.float, device=data.device) @@ -238,8 +243,9 @@ def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_a position_ids[b, (i + 1) :] -= i + 1 - prev_index prev_index = i + 1 - # Convert attention mask to binary: - attention_mask = attention_mask < 0.5 + if compute_attention_mask: + # Convert attention mask to binary: + attention_mask = attention_mask < 0.5 return attention_mask, loss_mask, position_ids @@ -391,3 +397,16 @@ def get_iterator_k_split(batch: List[torch.Tensor], num_microbatches: int) -> It microbatches = [[elem[i] for elem in split_batch] for i in range(num_microbatches)] return itertools.chain(microbatches) + + +def _cast_if_autocast_enabled(tensor): + if torch.is_autocast_enabled(): + if isinstance(tensor, torch.Tensor): + if tensor.device.type == 'cuda': + dtype = torch.get_autocast_gpu_dtype() + elif tensor.device.type == 'cpu': + dtype = torch.get_autocast_cpu_dtype() + else: + raise NotImplementedError() + return tensor.to(dtype=dtype) + return tensor diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py index 884f7abe5f01..7c04ef201927 100644 --- a/nemo/collections/nlp/modules/common/megatron_web_server.py +++ b/nemo/collections/nlp/modules/common/megatron_web_server.py @@ -14,10 +14,14 @@ import asyncio -import gradio as gr +try: + import gradio as gr + + GRADIO_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + GRADIO_AVAILABLE = False from nemo.collections.nlp.modules.common.chat_css import CSS -from nemo.collections.nlp.modules.common.chatbot_component import Chatbot from nemo.collections.nlp.modules.common.megatron.retrieval_services.util import ( convert_retrieved_to_md, request_data, @@ -28,95 +32,124 @@ TURN_TOKEN = '' -DEFAULT_SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n" -SYSTEM_TOKEN = 'System\n' -# HUMAN_TOKEN = 'Human:' -# ASSITANT_TOKEN = 'Assistant:' +PROMPT_PRESETS = { + "DIALOGUE": { + "SYSTEM_TURN_TOKEN": '', + "USER_TURN_TOKEN": '', + "BOT_TURN_TOKEN": '', + "END_OF_NAME": '', + "END_OF_TURN": '\n', + }, + "DIALOGUE2": { + "SYSTEM_TURN_TOKEN": 'System\n', + "USER_TURN_TOKEN": '', + "BOT_TURN_TOKEN": '', + "END_OF_NAME": '\n', + "END_OF_TURN": '\n', + }, +} + + +PRESETS = { + "K1-Greedy": {"temperature": 1.0, "top_p": 0.9, "top_k": 1, "repetition_penalty": 1.0,}, + "K50": {"temperature": 0.75, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,}, + "K50-Creative": {"temperature": 0.85, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,}, + "K50-Precise": {"temperature": 0.1, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,}, + "K50-Original": {"temperature": 0.9, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,}, + "Nucleus9": {"temperature": 0.8, "top_p": 0.9, "top_k": 10000, "repetition_penalty": 1.0,}, + "Custom": {"temperature": 0.75, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,}, +} + + +def check_gradio_import(): + if not GRADIO_AVAILABLE: + msg = ( + f"could not find the gradio library.\n" + f"****************************************************************\n" + f"To install it, please follow the steps below:\n" + f"pip install gradio==3.34.0\n" + ) + raise ImportError(msg) def create_gen_function(port=5555, chat=False): - if chat: - - def get_generation( - prompt, preamble, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings - ): - if preamble is not None and preamble != '': - prompt = SYSTEM_TOKEN + preamble + prompt - data = { - "sentences": [prompt], - "tokens_to_generate": int(token_to_gen), - "temperature": temp, - "add_BOS": add_BOS, - "top_k": top_k, - "top_p": top_p, - "greedy": greedy, - "all_probs": False, - "repetition_penalty": repetition, - "min_tokens_to_generate": int(min_tokens), - "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], - } - response = text_generation(data, port=port) - sentences = response['sentences'] - bot_message = sentences[0] - bot_message = bot_message[len(prompt) :] - return bot_message - - else: - - def get_generation( - prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings - ): - data = { - "sentences": [prompt], - "tokens_to_generate": int(token_to_gen), - "temperature": temp, - "add_BOS": add_BOS, - "top_k": top_k, - "top_p": top_p, - "greedy": greedy, - "all_probs": False, - "repetition_penalty": repetition, - "min_tokens_to_generate": int(min_tokens), - "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], - } - response = text_generation(data, port=port) - sentences = response['sentences'] - bot_message = sentences[0] - bot_message = bot_message[len(prompt) :] - return bot_message + def get_generation(prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings): + data = { + "sentences": [prompt], + "tokens_to_generate": int(token_to_gen), + "temperature": temp, + "add_BOS": add_BOS, + "top_k": top_k, + "top_p": top_p, + "greedy": greedy, + "all_probs": False, + "repetition_penalty": repetition, + "min_tokens_to_generate": int(min_tokens), + "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0], + } + response = text_generation(data, port=port) + sentences = response['sentences'] + bot_message = sentences[0] + if bot_message.find(' token + prompt = prompt.replace('', '').replace('', '').replace('', '') + bot_message = bot_message[len(prompt) :] + return bot_message return get_generation def get_demo(share, username, password, server_port=5555, web_port=9889, loop=None): + check_gradio_import() asyncio.set_event_loop(loop) - with gr.Blocks() as demo: + with gr.Blocks(css=CSS) as demo: with gr.Row(): with gr.Column(scale=2, width=200): - greedy_flag = gr.Checkbox(label="Greedy") - add_BOS = gr.Checkbox(label="Add BOS token", value=False) + # store the mutliple turn conversation token_to_gen = gr.Number(label='Number of Tokens to generate', value=300, type=int) min_token_to_gen = gr.Number(label='Min number of Tokens to generate', value=1, type=int) - temperature = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, label='Temperature', step=0.1) - top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.9, label='Top P') - top_k = gr.Slider(minimum=0, maximum=10000, step=2, value=0, label='Top K') + seed = gr.Number(label='Random seed', value=0, type=int) + end_strings = gr.Textbox(label="End strings (comma separated)", value=",", lines=1,) + add_BOS = gr.Checkbox(label="Add BOS token", value=False) + sampling_method = gr.Dropdown( + list(PRESETS.keys()), label='Sampling Presets', default='K50', value='K50' + ) + temperature = gr.Slider(minimum=0.0, maximum=5.0, value=0.75, label='Temperature', step=0.1) + top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.95, label='Top P') + top_k = gr.Slider(minimum=0, maximum=1024, step=2, value=50, label='Top K') + repetition_penality = gr.Slider( - minimum=1.0, maximum=5.0, step=0.02, value=1.2, label='Repetition penalty' + minimum=1.0, maximum=5.0, step=0.02, value=1.0, label='Repetition penalty' ) - end_strings = gr.Textbox(label="End strings (comma separated)", value="<|endoftext|>,", lines=1,) - with gr.Column(scale=1, min_width=800): - input_prompt = gr.Textbox( - label="Input", - value="Ariel was playing basketball. 1 of her shots went in the hoop. 2 of her shots did not go in the hoop. How many shots were there in total?", - lines=5, + + def set_sampling(x): + return list(PRESETS[x].values()) + + sampling_method.change( + set_sampling, inputs=[sampling_method], outputs=[temperature, top_p, top_k, repetition_penality] ) - output_box = gr.Textbox(value="", label="Output") - btn = gr.Button(value="Submit") - btn.click( - create_gen_function(server_port, chat=False), - inputs=[ - input_prompt, - greedy_flag, + + with gr.Column(scale=1, min_width=900): + text = gr.Textbox(label="Playground", value="", lines=60, placeholder="Type something here...",) + submit_btn = gr.Button("Generate") + clear = gr.Button("Clear") + + def on_submit( + prompt_text, + token_to_gen, + temperature, + top_p, + top_k, + repetition_penality, + seed, + end_strings, + add_BOS, + min_token_to_gen, + ): + + output = create_gen_function(server_port)( + prompt_text, + False, add_BOS, token_to_gen, min_token_to_gen, @@ -125,38 +158,142 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No top_k, repetition_penality, end_strings, + ) + print(output) + print('-------------------') + return prompt_text + output + + def clear_fun(): + return '' + + submit_btn.click( + on_submit, + [ + text, + token_to_gen, + temperature, + top_p, + top_k, + repetition_penality, + seed, + end_strings, + add_BOS, + min_token_to_gen, ], - outputs=[output_box], + [text], + queue=False, ) - demo.launch(share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password)) + clear.click(clear_fun, None, text, queue=False) + demo.queue(concurrency_count=16).launch( + share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password) + ) + +def get_chatbot_demo( + share, username, password, server_port=5555, web_port=9889, loop=None, value=False, defaults=None +): + check_gradio_import() + from nemo.collections.nlp.modules.common.chatbot_component import Chatbot -def get_chatbot_demo(share, username, password, server_port=5555, web_port=9889, loop=None): asyncio.set_event_loop(loop) with gr.Blocks(css=CSS) as demo: - # store the mutliple turn conversation with gr.Row(): with gr.Column(scale=2, width=200): # store the mutliple turn conversation session_state = gr.State(value=[]) - greedy_flag = gr.Checkbox(label="Greedy", value=True) - add_BOS = gr.Checkbox(label="Add BOS token", value=False) token_to_gen = gr.Number(label='Number of Tokens to generate', value=300, type=int) - min_token_to_gen = gr.Number(label='Min number of Tokens to generate', value=1, type=int) - temperature = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, label='Temperature', step=0.1) - top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.9, label='Top P') - top_k = gr.Slider(minimum=0, maximum=10000, step=2, value=0, label='Top K') - repetition_penality = gr.Slider( - minimum=1.0, maximum=5.0, step=0.02, value=1.2, label='Repetition penalty' + seed = gr.Number(label='Random seed', value=0, type=int) + prompt_presets = gr.Dropdown( + list(PROMPT_PRESETS.keys()), label='Template Presets', default='DIALOGUE2', value='DIALOGUE2' ) - end_strings = gr.Textbox( - label="End strings (comma separated)", value=f"<|endoftext|>,,", lines=1, + sampling_method = gr.Dropdown( + list(PRESETS.keys()), label='Sampling Presets', default='K50', value='K50' + ) + with gr.Accordion("Sampling Parameters", open=False): + temperature = gr.Slider( + minimum=0.0, maximum=5.0, value=0.75, label='Temperature', step=0.1, interactive=False + ) + top_p = gr.Slider( + minimum=0.0, maximum=1.0, step=0.02, value=0.95, label='Top P', interactive=False + ) + top_k = gr.Slider(minimum=0, maximum=1024, step=2, value=50, label='Top K', interactive=False) + repetition_penality = gr.Slider( + minimum=1.0, maximum=5.0, step=0.02, value=1.0, label='Repetition penalty', interactive=False + ) + + with gr.Accordion("Value Parameters", open=True, visible=value): + keys = ['quality', 'toxicity', 'humor', 'creativity', 'violence', 'helpfulness', 'not_appropriate'] + quality_value = gr.Slider( + minimum=0, maximum=9, step=1, value=9, label='Quality', interactive=True, visible=True + ) + toxicity_value = gr.Slider( + minimum=0, maximum=9, step=1, value=0, label='Toxicity', interactive=True, visible=True + ) + humor_value = gr.Slider( + minimum=0, maximum=9, step=1, value=0, label='Humor', interactive=True, visible=True + ) + creativity_value = gr.Slider( + minimum=0, maximum=9, step=1, value=0, label='Creativity', interactive=True, visible=True + ) + violence_value = gr.Slider( + minimum=0, maximum=9, step=1, value=0, label='Violence', interactive=True, visible=True + ) + helpfulness_value = gr.Slider( + minimum=0, maximum=9, step=1, value=9, label='Helpfulness', interactive=True, visible=True + ) + not_appropriate_value = gr.Slider( + minimum=0, maximum=9, step=1, value=0, label='Not Appropriate', interactive=True, visible=True + ) + used_value = gr.CheckboxGroup(keys, value=keys) + + def change_visibility(x): + values = [] + for key in keys: + if key in x: + values.append(gr.update(visible=True)) + else: + values.append(gr.update(visible=False)) + return values + + used_value.change( + change_visibility, + inputs=[used_value], + outputs=[ + quality_value, + toxicity_value, + humor_value, + creativity_value, + violence_value, + helpfulness_value, + not_appropriate_value, + ], + ) + + def set_sampling(x): + if x == 'Custom': + values = [gr.update(value=v, interactive=True) for v in PRESETS[x].values()] + return values + else: + values = [gr.update(value=v, interactive=False) for v in PRESETS[x].values()] + return values + + sampling_method.change( + set_sampling, inputs=[sampling_method], outputs=[temperature, top_p, top_k, repetition_penality] ) - gr.HTML("


") - human_name = gr.Textbox(label="Human Name", value="User", line=1,) - assistant_name = gr.Textbox(label="Assistant Name", value="Assistant", line=1,) - preamble = gr.Textbox(label="System", value=DEFAULT_SYSTEM, lines=2,) - with gr.Column(scale=1, min_width=800): + + gr.HTML("
") + human_name = gr.Textbox(label="Human Name", value=defaults['user'], line=1,) + assistant_name = gr.Textbox(label="Assistant Name", value=defaults['assistant'], line=1,) + preamble = gr.Textbox(label="System", value=defaults['system'], lines=2,) + + def set_prompt(x): + if x == "DIALOGUE": + return '', '' + return defaults['user'], defaults['assistant'] + + prompt_presets.change(set_prompt, inputs=[prompt_presets], outputs=[human_name, assistant_name]) + + with gr.Column(scale=1, min_width=900): chatbot = Chatbot(elem_id="chatbot").style(height=800) msg = gr.Textbox(label="User", value="", lines=1,) clear = gr.Button("Clear") @@ -166,45 +303,86 @@ def user(user_message, history, session_state): user_message = user_message.replace('\n', '
') return "", history + [[user_message, None]] + def get_value_str(values_array, used_value): + if len(used_value) == 0: + return '' + assert len(values_array) == len(keys) + value_str = '' + elements = [] + for i, key in enumerate(keys): + if key in used_value: + elements.append(f'{key}:{values_array[i]}') + value_str += ','.join(elements) + '\n' + return value_str + def bot( history, preamble, - greedy_flag, - add_BOS, token_to_gen, - min_token_to_gen, temperature, top_p, top_k, repetition_penality, - end_strings, + seed, human_name, assistant_name, session_state, + prompts_presets, + quality_value, + toxicity_value, + humor_value, + creativity_value, + violence_value, + helpfulness_value, + not_appropriate_value, + used_value, ): + + values_array = [ + quality_value, + toxicity_value, + humor_value, + creativity_value, + violence_value, + helpfulness_value, + not_appropriate_value, + ] + if value: + value_str = get_value_str(values_array, used_value) + else: + value_str = '' + + prompt_preset = PROMPT_PRESETS[prompts_presets] prompt_text = '' names = [human_name, assistant_name] + turn_tokens = [prompt_preset['USER_TURN_TOKEN'], prompt_preset['BOT_TURN_TOKEN']] for i, meg in enumerate(session_state): name = names[i % 2] - prompt_text += TURN_TOKEN + name + '\n' + meg + '\n' - prompt_text += TURN_TOKEN + assistant_name + '\n' - bot_message = create_gen_function(server_port, chat=True)( + turn = turn_tokens[i % 2] + prompt_text += turn + name + prompt_preset['END_OF_NAME'] + meg + prompt_preset['END_OF_TURN'] + prompt_text += ( + prompt_preset['BOT_TURN_TOKEN'] + assistant_name + prompt_preset['END_OF_NAME'] + value_str + ) + prompt_text = prompt_preset['SYSTEM_TURN_TOKEN'] + preamble + prompt_text + bot_message = create_gen_function(server_port)( prompt_text, - preamble, - greedy_flag, - add_BOS, + False, + False, token_to_gen, - min_token_to_gen, + 1, temperature, top_p, top_k, repetition_penality, - end_strings, + '', ) if bot_message.endswith(TURN_TOKEN): bot_message = bot_message[: -len(TURN_TOKEN)] history[-1][1] = bot_message - session_state.append(bot_message.strip()) + print(prompt_text) + print(bot_message) + print('-------------------') + session_state.append(value_str + bot_message.strip()) return history msg.submit(user, [msg, chatbot, session_state], [msg, chatbot], queue=False).then( @@ -212,20 +390,26 @@ def bot( [ chatbot, preamble, - greedy_flag, - add_BOS, token_to_gen, - min_token_to_gen, temperature, top_p, top_k, repetition_penality, - end_strings, + seed, human_name, assistant_name, session_state, + prompt_presets, + quality_value, + toxicity_value, + humor_value, + creativity_value, + violence_value, + helpfulness_value, + not_appropriate_value, + used_value, ], - chatbot, + [chatbot], ) def clear_fun(session_state): @@ -294,6 +478,7 @@ def reset_index(self): return request_data(data, self.combo_service_ip, self.combo_service_port) def run_demo(self, share, username, password, port): + check_gradio_import() with gr.Blocks(css="table, th, td { border: 1px solid blue; table-layout: fixed; width: 100%; }") as demo: with gr.Row(): with gr.Column(scale=2, width=200): diff --git a/nemo/collections/nlp/modules/common/prompt_encoder.py b/nemo/collections/nlp/modules/common/prompt_encoder.py index 282ad053bc86..283608367b62 100644 --- a/nemo/collections/nlp/modules/common/prompt_encoder.py +++ b/nemo/collections/nlp/modules/common/prompt_encoder.py @@ -70,7 +70,7 @@ def __init__( self.prompt_embeddings.weight.requires_grad = False # Set fixed indicies for forward pass - self.register_buffer('indices', torch.LongTensor(list(range(self.total_virtual_tokens)))) + self.register_buffer("indices", torch.LongTensor(list(range(self.total_virtual_tokens))), persistent=False) def clear_prompt_embedding_weights(self,): """ @@ -104,9 +104,10 @@ def __init__(self, taskname, hidden_size, total_virtual_tokens, is_inference_rea self.total_virtual_tokens = total_virtual_tokens self.prompt_table = torch.nn.ModuleDict() self.prompt_table[self.taskname] = PromptEmbedding(self.hidden_size, self.total_virtual_tokens) - self.prompt_table[self.taskname].prompt_embeddings.weight.requires_grad = False self.prompt_table[self.taskname].clear_prompt_embedding_weights() self.is_inference_ready = is_inference_ready + for p in self.prompt_table.parameters(): + p.requires_grad = False def set_prompt_table(self, prompt_representation: torch.Tensor): """ diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index 16935be1cc2d..573bdc80735e 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -53,7 +53,6 @@ def __init__(self, model): def forward_step(self, batch, tensor_shape): fwd_bwd_function = get_forward_backward_func() - output_tensor = fwd_bwd_function( forward_step_func=self.model.get_forward_output_only_func(), data_iterator=iter([batch,]), @@ -98,10 +97,11 @@ def clip_max_len(self, maxlen: int) -> int: pass @abc.abstractclassmethod - def init_batch(self, context_tokens: torch.Tensor, context_length: int): + def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool): """initialize the batch data before the inference steps. It will save the intermediate results as object attributes context_length (int): the context token length + compute_attention_mask: bool: set to True to compute attention mask (not needed for FA) Args: context_tokens (torch.Tensor): The padded context tokens including the space for tokens to be generated """ @@ -153,15 +153,19 @@ def end_of_generation_condition( else: tokenizer = self.model.tokenizer conditions = [] + end_tokens = set() + end_tokens.add(eod_id) + for end_string in end_strings: + ids_1 = tokenizer.text_to_ids(f'{end_string}') + ids_2 = tokenizer.text_to_ids('') + if len(ids_1) <= len(ids_2): + continue + token_id = ids_1[len(ids_2) :][0] + end_tokens.add(token_id) for p, token_item in zip(prev, tokens): text = tokenizer.ids_to_text(token_item.tolist()) conditions.append( - any( - [ - p.item() == eod_id if end_string == END_OF_SEQ else text.endswith(end_string) - for end_string in end_strings - ] - ) + any([text.endswith(end_string) for end_string in end_strings] + [p.item() in end_tokens]) ) return torch.tensor(conditions, dtype=torch.bool, device=tokens.device) @@ -181,11 +185,14 @@ def __init__(self, model): def clip_max_len(self, maxlen: int) -> int: """ clip the max len based on the LM model max sequence length""" - if maxlen > self.model.cfg.encoder_seq_length + 1: - maxlen = self.model.cfg.encoder_seq_length + 1 + + # for positional embedding types that allow length extrapolation, don't clip the max length + if self.model.cfg.get("position_embedding_type", "learned_absolute") == "learned_absolute": + if maxlen > self.model.cfg.encoder_seq_length + 1: + maxlen = self.model.cfg.encoder_seq_length + 1 return maxlen - def init_batch(self, context_tokens: torch.Tensor, context_length: int): + def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool): """initialize the batch data before the inference steps.""" # Move to GPU. tokenizer = self.model.tokenizer @@ -197,10 +204,17 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int): self.model.cfg.get('reset_position_ids', False), self.model.cfg.get('reset_attention_mask', False), self.model.cfg.get('eod_mask_loss', False), + compute_attention_mask=compute_attention_mask, ) def prepare_batch_at_step( - self, tokens: torch.Tensor, maxlen: int, micro_batch_size: int, step: int, context_length: int + self, + tokens: torch.Tensor, + maxlen: int, + micro_batch_size: int, + step: int, + context_length: int, + compute_attention_mask: bool = True, ) -> Tuple[List[torch.Tensor], List[int]]: """ generate the batch used in inference for each of the steps @@ -224,7 +238,10 @@ def prepare_batch_at_step( # types2use = type_ids[:, context_length - 1].view(batch_size, -1) """Prepare batch for each of the inference steps""" - attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)]) + attention_mask_repeat = None + if compute_attention_mask: + attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)]) + setkey_value_array = torch.tensor( [set_inference_key_value_memory] * micro_batch_size, device=torch.cuda.current_device() ) @@ -241,7 +258,7 @@ def __init__(self, model, task_ids): self.task_ids = task_ids self.forward_model = self.model - def init_batch(self, context_tokens: torch.Tensor, context_length: int): + def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool): """initialize the batch data before the inference steps.""" # Move to GPU. tokenizer = self.model.tokenizer @@ -253,6 +270,7 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int): self.model.cfg.get('reset_position_ids', False), self.model.cfg.get('reset_attention_mask', False), self.model.cfg.get('eod_mask_loss', False), + compute_attention_mask=compute_attention_mask, ) def clip_max_len(self, maxlen: int) -> int: @@ -262,7 +280,13 @@ def clip_max_len(self, maxlen: int) -> int: return maxlen def prepare_batch_at_step( - self, tokens: torch.Tensor, maxlen: int, micro_batch_size: int, step: int, context_length: int + self, + tokens: torch.Tensor, + maxlen: int, + micro_batch_size: int, + step: int, + context_length: int, + compute_attention_mask: bool, ) -> Tuple[List[torch.Tensor], List[int]]: # types2use = None if step == 0: @@ -283,7 +307,9 @@ def prepare_batch_at_step( # types2use = type_ids[:, context_length - 1].view(batch_size, -1) """Prepare batch for each of the inference steps""" - attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)]) + attention_mask_repeat = None + if compute_attention_mask: + attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)]) setkey_value_array = torch.tensor( [set_inference_key_value_memory] * micro_batch_size, device=torch.cuda.current_device() ) diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 8cfb02c5e321..3a41901f76ce 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -97,6 +97,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para inputs=inputs, tokens_to_generate=length_params['max_length'], all_probs=sampling_params['all_probs'], + compute_logprob=sampling_params['compute_logprob'], temperature=sampling_params['temperature'], add_BOS=sampling_params['add_BOS'], top_k=sampling_params['top_k'], @@ -104,6 +105,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para greedy=sampling_params['use_greedy'], repetition_penalty=sampling_params['repetition_penalty'], min_tokens_to_generate=length_params['min_length'], + compute_attention_mask=sampling_params.get("compute_attention_mask", True), **strategy_args, ) compute_prob_response = get_computeprob_response(tokenizer, response, inputs) @@ -116,6 +118,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para inputs=inputs, tokens_to_generate=length_params['max_length'], all_probs=sampling_params['all_probs'], + compute_logprob=sampling_params['compute_logprob'], temperature=sampling_params['temperature'], add_BOS=sampling_params['add_BOS'], top_k=sampling_params['top_k'], @@ -269,6 +272,7 @@ def send_generate_info( context_length_tensor, tokens_to_generate, all_probs, + compute_logprob, temperature, top_k, top_p, @@ -288,6 +292,7 @@ def send_generate_info( context_tokens_tensor.size(1), # seq_len tokens_to_generate, all_probs, + compute_logprob, # whether to compute log probabilities matrix temperature, top_k, top_p, @@ -317,18 +322,19 @@ def receive_generate_info(): """ model_parallel_group = parallel_state.get_model_parallel_group() src = get_model_parallel_src_rank() - input_info_tensor = torch.empty(10, dtype=torch.float32, device=torch.cuda.current_device()) + input_info_tensor = torch.empty(11, dtype=torch.float32, device=torch.cuda.current_device()) torch.distributed.broadcast(input_info_tensor, src, model_parallel_group) batch_size = int(input_info_tensor[0].item()) seq_len = int(input_info_tensor[1].item()) tokens_to_generate = int(input_info_tensor[2].item()) all_probs = bool(input_info_tensor[3].item()) - temperature = float(input_info_tensor[4].item()) - top_k = int(input_info_tensor[5].item()) - top_p = float(input_info_tensor[6].item()) - greedy = bool(input_info_tensor[7].item()) - repetition_penalty = float(input_info_tensor[8].item()) - min_tokens_to_generate = int(input_info_tensor[9].item()) + compute_logprob = bool(input_info_tensor[4].item()) # whether to compute log probabilities matrix + temperature = float(input_info_tensor[5].item()) + top_k = int(input_info_tensor[6].item()) + top_p = float(input_info_tensor[7].item()) + greedy = bool(input_info_tensor[8].item()) + repetition_penalty = float(input_info_tensor[9].item()) + min_tokens_to_generate = int(input_info_tensor[10].item()) context_length_tensor = torch.empty(batch_size, dtype=torch.int64, device=torch.cuda.current_device()) context_tokens_tensor = torch.empty(batch_size, seq_len, dtype=torch.int64, device=torch.cuda.current_device()) @@ -349,6 +355,7 @@ def receive_generate_info(): context_tokens_tensor, tokens_to_generate, all_probs, + compute_logprob, temperature, top_k, top_p, @@ -370,6 +377,8 @@ def synced_generate( top_k=0, top_p=0.0, greedy=False, + compute_attention_mask=True, + compute_logprob=False, repetition_penalty=1.2, min_tokens_to_generate=0, end_strings=[], @@ -384,6 +393,7 @@ def synced_generate( context_length_tensor, tokens_to_generate, all_probs, + compute_attention_mask=compute_attention_mask, temperature=temperature, ) else: @@ -394,6 +404,8 @@ def synced_generate( context_length_tensor, tokens_to_generate, all_probs, + compute_attention_mask=compute_attention_mask, + compute_logprob=compute_logprob, temperature=temperature, end_strings=end_strings, extra={ @@ -411,7 +423,8 @@ def synced_generate( if parallel_state.is_pipeline_last_stage(): src = parallel_state.get_pipeline_model_parallel_last_rank() group = parallel_state.get_embedding_group() - torch.distributed.broadcast(output_logits, src, group) + if compute_logprob: + torch.distributed.broadcast(output_logits, src, group) if all_probs: src = parallel_state.get_pipeline_model_parallel_last_rank() group = parallel_state.get_embedding_group() @@ -421,10 +434,19 @@ def synced_generate( if parallel_state.is_pipeline_first_stage(): src = parallel_state.get_pipeline_model_parallel_last_rank() group = parallel_state.get_embedding_group() - output_logits = torch.empty( - tokens.size(0), context_length - 1, dtype=torch.float32, device=torch.device("cuda") - ) - torch.distributed.broadcast(output_logits, src, group) + + if compute_logprob: + precision = model._trainer.precision + if precision in [16, "16"]: + dtype = torch.float16 + elif precision == "bf16": + dtype = torch.bfloat16 + else: + dtype = torch.float32 + output_logits = torch.empty( + tokens.size(0), context_length - 1, dtype=dtype, device=torch.device("cuda") + ) + torch.distributed.broadcast(output_logits, src, group) if all_probs: src = parallel_state.get_pipeline_model_parallel_last_rank() @@ -433,7 +455,7 @@ def synced_generate( tokens.size(0), context_length - 1, model.padded_vocab_size, - dtype=torch.float32, + dtype=dtype, device=torch.device("cuda"), ) torch.distributed.broadcast(full_logits, src, group) @@ -451,6 +473,8 @@ def generate( top_k=0, top_p=0.0, greedy=False, + compute_attention_mask=True, + compute_logprob=False, repetition_penalty=1.0, min_tokens_to_generate=0, end_strings=['<|endoftext|>'], @@ -498,6 +522,7 @@ def generate( context_length_tensor, tokens_to_generate, all_probs, + compute_logprob, temperature, top_k, top_p, @@ -512,6 +537,7 @@ def generate( context_tokens_tensor, tokens_to_generate, all_probs, + compute_logprob, temperature, top_k, top_p, @@ -529,6 +555,8 @@ def generate( tokens_to_generate, all_probs, temperature, + compute_attention_mask=compute_attention_mask, + compute_logprob=compute_logprob, top_k=top_k, top_p=top_p, greedy=greedy, @@ -613,6 +641,8 @@ def sample_sequence_batch( context_lengths, tokens_to_generate, all_probs=False, + compute_attention_mask=True, + compute_logprob=False, type_ids=None, temperature=None, end_strings=['<|endoftext|>'], @@ -643,7 +673,7 @@ def sample_sequence_batch( # initialize the batch with torch.no_grad(): context_length = context_lengths.min().item() - inference_strategy.init_batch(context_tokens, context_length) + inference_strategy.init_batch(context_tokens, context_length, compute_attention_mask) # added eos_id to support the function generate_samples_eval that passes # eos_id as an argument and needs termination when that id id found. eod_id = tokenizer.eos_id @@ -662,16 +692,23 @@ def sample_sequence_batch( lengths = torch.ones([batch_size]).long().cuda() * maxlen while context_length < maxlen: batch, tensor_shape = inference_strategy.prepare_batch_at_step( - tokens, maxlen, micro_batch_size, counter, context_length + tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask ) output = inference_strategy.forward_step(batch, tensor_shape) if parallel_state.is_pipeline_last_stage(): - output = output[0]['logits'].float() - output = tensor_parallel.gather_from_tensor_model_parallel_region(output) - assert output is not None - output = output.float() - logits = output[:, -1].view(batch_size, -1).contiguous() + + if compute_logprob: + output = output[0]['logits'] + output = tensor_parallel.gather_from_tensor_model_parallel_region(output) + assert output is not None + logits = output[:, -1].view(batch_size, -1).contiguous() + + else: + logits = output[0]['logits'][:, -1].contiguous() + logits = tensor_parallel.gather_from_tensor_model_parallel_region(logits) + assert logits is not None + logits = logits.view(batch_size, -1) # make sure it will generate at least min_length min_length = extra.get('min_tokens_to_generate', 0) @@ -683,6 +720,7 @@ def sample_sequence_batch( logits[:, tokenizer.vocab_size :] = -float('Inf') # started indicates whether the current token step passes the context_length, so we make sure not to overwrite the context tokens + started = context_lengths <= context_length if extra.get('greedy', False): prev = torch.argmax(logits, dim=-1).view(-1) @@ -710,23 +748,25 @@ def sample_sequence_batch( # Insert either new predicted or next prompt token tokens[:, context_length] = new_tokens - if output_logits is None: - output = F.log_softmax(output[:, :context_length, :], 2) - indices = torch.unsqueeze(tokens[:, 1 : context_length + 1], 2) - output_logits = torch.gather(output, 2, indices).squeeze(2) - all_generated_indices = indices[:, :, 0] - if all_probs: - full_logits = output - else: - output = F.log_softmax(output, 2) - indices = torch.unsqueeze(new_tokens, 1).unsqueeze(2) - new_output_logits = torch.gather(output, 2, indices).squeeze(2) + if compute_logprob: + if output_logits is None: + output = F.log_softmax(output[:, :context_length, :], 2) - # TODO(rprenger) we're copying output_logits every time. Should pre-allocate - output_logits = torch.cat([output_logits, new_output_logits], 1) - all_generated_indices = torch.cat([all_generated_indices, indices[:, :, 0]], 1) - if all_probs: - full_logits = torch.cat([full_logits, output], 1) + indices = torch.unsqueeze(tokens[:, 1 : context_length + 1], 2) + output_logits = torch.gather(output, 2, indices).squeeze(2) + all_generated_indices = indices[:, :, 0] + if all_probs: + full_logits = output + else: + output = F.log_softmax(output, 2) + indices = torch.unsqueeze(new_tokens, 1).unsqueeze(2) + new_output_logits = torch.gather(output, 2, indices).squeeze(2) + + # TODO(rprenger) we're copying output_logits every time. Should pre-allocate + output_logits = torch.cat([output_logits, new_output_logits], 1) + all_generated_indices = torch.cat([all_generated_indices, indices[:, :, 0]], 1) + if all_probs: + full_logits = torch.cat([full_logits, output], 1) src = parallel_state.get_pipeline_model_parallel_last_rank() group = parallel_state.get_embedding_group() @@ -746,10 +786,13 @@ def sample_sequence_batch( src = parallel_state.get_pipeline_model_parallel_last_rank() group = parallel_state.get_pipeline_model_parallel_group() torch.distributed.broadcast(done, src, group) - if all_probs: - yield tokens, lengths, output_logits, full_logits + if compute_logprob: + if all_probs: + yield tokens, lengths, output_logits, full_logits + else: + yield tokens, lengths, output_logits, None else: - yield tokens, lengths, output_logits, None + yield tokens, lengths, None, None else: if parallel_state.is_pipeline_first_stage(): @@ -780,6 +823,7 @@ def tab_sample_sequence_batch( context_lengths, tokens_to_generate, all_probs=True, + compute_attention_mask=True, type_ids=None, temperature=None, ): @@ -803,7 +847,7 @@ def tab_sample_sequence_batch( # initialize the batch with torch.no_grad(): context_length = context_lengths.min().item() - inference_strategy.init_batch(context_tokens, context_length) + inference_strategy.init_batch(context_tokens, context_length, compute_attention_mask) context = context_tokens[:, :context_length] # the context may start in the middle of the row, # calculate the offset according to the position of '\n' or '<|endoftext|>' @@ -837,7 +881,7 @@ def tab_sample_sequence_batch( while context_length < maxlen: batch, tensor_shape = inference_strategy.prepare_batch_at_step( - tokens, maxlen, micro_batch_size, counter, context_length + tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask ) output = inference_strategy.forward_step(batch, tensor_shape) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index c1938e7dca41..c043dd7ba7ab 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -168,6 +168,7 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None: pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size, pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + use_fp8=app_state.use_fp8, ) # assert that fake tp and pp rank match after model parallel init @@ -180,6 +181,10 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None: app_state.data_parallel_size = parallel_state.get_data_parallel_world_size() app_state.pipeline_model_parallel_group = parallel_state.get_pipeline_model_parallel_group() + # create MPI process group for UCX-based communication APIs + if app_state.init_mpi_proc_group: + torch.distributed.new_group(backend='mpi') + def save_checkpoint( self, checkpoint: Dict[str, Any], filepath: Union[str, Path], storage_options: Optional[Any] = None ) -> None: @@ -349,28 +354,6 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict - # compatibility for inductor in inference - if not conf.get('inductor', False): - new_state_dict = {} - for key in state_dict.keys(): - new_key = key.replace('._orig_mod', '', 1) - new_state_dict[new_key] = state_dict[key] - state_dict = new_state_dict - - # Modify state key for Dreambooth inference - if ( - conf.get('target') - == 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' - ): - new_state_dict = {} - for key in state_dict.keys(): - new_key = key.replace('unet', 'model.diffusion_model') - new_key = new_key.replace('vae', 'first_stage_model') - new_key = new_key.replace('text_encoder', 'cond_stage_model') - new_key = new_key.replace('.noise_scheduler', '') - new_state_dict[new_key] = state_dict[key] - state_dict = new_state_dict - return state_dict def restore_from( @@ -426,14 +409,20 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector): Args: peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params) - peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training. - If both are provided the peft_model_ckpt_path takes precedence. + peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training. + peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder + If both are provided the peft_model_ckpt_path takes precedence. If neither are provided, PEFT params are initialized at random (not loaded from any external source). """ - def __init__(self, peft_model_nemo_path: Optional[str] = None, peft_model_ckpt_path: Optional[str] = None) -> None: + def __init__( + self, + peft_model_nemo_path: Optional[str] = None, + peft_model_ckpt_path: Optional[str] = None, + peft_model_ckpt_name: Optional[str] = "model_weights.ckpt", + ) -> None: super().__init__() - self.peft_model_ckpt_name = "model_weights.ckpt" + self.peft_model_ckpt_name = peft_model_ckpt_name if peft_model_ckpt_path: # First we will try to load a adapter ckpt path # this is given priority over loading from nemo path to make resumption of training possible diff --git a/nemo/collections/tts/data/text_to_speech_dataset.py b/nemo/collections/tts/data/text_to_speech_dataset.py index f6230fa3493a..23ddb50346a2 100644 --- a/nemo/collections/tts/data/text_to_speech_dataset.py +++ b/nemo/collections/tts/data/text_to_speech_dataset.py @@ -25,7 +25,6 @@ from nemo.collections.tts.parts.preprocessing.feature_processors import FeatureProcessor from nemo.collections.tts.parts.preprocessing.features import Featurizer from nemo.collections.tts.parts.utils.tts_dataset_utils import ( - BetaBinomialInterpolator, beta_binomial_prior_distribution, filter_dataset_by_duration, get_abs_rel_paths, @@ -55,12 +54,6 @@ class DatasetSample: speaker_index: int = None -@dataclass -class AlignPriorConfig: - hop_length: int - use_beta_binomial_interpolator: bool = False - - @experimental class TextToSpeechDataset(Dataset): """ @@ -71,15 +64,16 @@ class TextToSpeechDataset(Dataset): sample_rate: Sample rate to load audio as. If the audio is stored at a different sample rate, then it will be resampled. text_tokenizer: Tokenizer to apply to the text field. - weighted_sample_steps: Optional int, If provided, then data will be sampled (with replacement) based on + weighted_sampling_steps_per_epoch: Optional int, If provided, then data will be sampled (with replacement) based on the sample weights provided in the dataset metadata. If None, then sample weights will be ignored. speaker_path: Optional, path to JSON file with speaker indices, for multi-speaker training. Can be created with scripts.dataset_processing.tts.create_speaker_map.py featurizers: Optional, list of featurizers to load feature data from. Should be the same config provided when running scripts.dataset_processing.tts.compute_features.py before training. feature_processors: Optional, list of feature processors to run on training examples. - align_prior_config: Optional, if provided alignment prior will be calculated and included in - batch output. + align_prior_hop_length: Optional int, hop length of audio features. + If provided alignment prior will be calculated and included in batch output. Must match hop length + of audio features used for training. min_duration: Optional float, if provided audio files in the training manifest shorter than 'min_duration' will be ignored. max_duration: Optional float, if provided audio files in the training manifest longer than 'max_duration' @@ -88,14 +82,14 @@ class TextToSpeechDataset(Dataset): def __init__( self, - dataset_meta: Dict[str, DatasetMeta], + dataset_meta: Dict, sample_rate: int, text_tokenizer: BaseTokenizer, - weighted_sample_steps: Optional[int] = None, + weighted_sampling_steps_per_epoch: Optional[int] = None, speaker_path: Optional[Path] = None, featurizers: Optional[Dict[str, Featurizer]] = None, feature_processors: Optional[Dict[str, FeatureProcessor]] = None, - align_prior_config: Optional[AlignPriorConfig] = None, + align_prior_hop_length: Optional[int] = None, min_duration: Optional[float] = None, max_duration: Optional[float] = None, ): @@ -103,7 +97,9 @@ def __init__( self.sample_rate = sample_rate self.text_tokenizer = text_tokenizer - self.weighted_sample_steps = weighted_sample_steps + self.weighted_sampling_steps_per_epoch = weighted_sampling_steps_per_epoch + self.align_prior_hop_length = align_prior_hop_length + self.include_align_prior = self.align_prior_hop_length is not None if speaker_path: self.include_speaker = True @@ -115,26 +111,21 @@ def __init__( if featurizers: logging.info(f"Found featurizers {featurizers.keys()}") - self.featurizers = featurizers.values() + self.featurizers = list(featurizers.values()) else: self.featurizers = [] if feature_processors: logging.info(f"Found featurize processors {feature_processors.keys()}") - self.feature_processors = feature_processors.values() + self.feature_processors = list(feature_processors.values()) else: self.feature_processors = [] - self.align_prior_config = align_prior_config - if self.align_prior_config.use_beta_binomial_interpolator: - self.beta_binomial_interpolator = BetaBinomialInterpolator() - else: - self.beta_binomial_interpolator = None - self.data_samples = [] self.sample_weights = [] - for dataset_name, dataset in dataset_meta.items(): - samples, weights = self._process_dataset( + for dataset_name, dataset_info in dataset_meta.items(): + dataset = DatasetMeta(**dataset_info) + samples, weights = self._preprocess_manifest( dataset_name=dataset_name, dataset=dataset, min_duration=min_duration, @@ -145,15 +136,15 @@ def __init__( self.sample_weights += weights def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]: - if not self.weighted_sample_steps: + if not self.weighted_sampling_steps_per_epoch: return None sampler = get_weighted_sampler( - sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sample_steps + sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sampling_steps_per_epoch ) return sampler - def _process_dataset( + def _preprocess_manifest( self, dataset_name: str, dataset: DatasetMeta, @@ -169,8 +160,8 @@ def _process_dataset( logging.info(dataset_name) logging.info(f"Original # of files: {len(entries)}") logging.info(f"Filtered # of files: {len(filtered_entries)}") - logging.info(f"Original duration: {total_hours} hours") - logging.info(f"Filtered duration: {filtered_hours} hours") + logging.info(f"Original duration: {total_hours:.2f} hours") + logging.info(f"Filtered duration: {filtered_hours:.2f} hours") samples = [] sample_weights = [] @@ -190,8 +181,8 @@ def _process_dataset( sample = DatasetSample( manifest_entry=entry, - audio_dir=dataset.audio_dir, - feature_dir=dataset.feature_dir, + audio_dir=Path(dataset.audio_dir), + feature_dir=Path(dataset.feature_dir), text=text, speaker=speaker, speaker_index=speaker_index, @@ -208,26 +199,21 @@ def __getitem__(self, index): data = self.data_samples[index] audio_filepath = Path(data.manifest_entry["audio_filepath"]) - audio_path, _ = get_abs_rel_paths(input_path=audio_filepath, base_path=data.audio_dir) + audio_filepath_abs, audio_filepath_rel = get_abs_rel_paths(input_path=audio_filepath, base_path=data.audio_dir) - audio, _ = librosa.load(audio_path, sr=self.sample_rate) + audio, _ = librosa.load(audio_filepath_abs, sr=self.sample_rate) tokens = self.text_tokenizer(data.text) - example = {"audio": audio, "tokens": tokens} + example = {"audio_filepath": audio_filepath_rel, "audio": audio, "tokens": tokens} if data.speaker is not None: example["speaker"] = data.speaker example["speaker_index"] = data.speaker_index - if self.align_prior_config: + if self.include_align_prior: text_len = len(tokens) - spec_len = 1 + librosa.core.samples_to_frames( - audio.shape[0], hop_length=self.align_prior_config.hop_length - ) - if self.beta_binomial_interpolator: - align_prior = self.beta_binomial_interpolator(w=spec_len, h=text_len) - else: - align_prior = beta_binomial_prior_distribution(phoneme_count=text_len, mel_count=spec_len) + spec_len = 1 + librosa.core.samples_to_frames(audio.shape[0], hop_length=self.align_prior_hop_length) + align_prior = beta_binomial_prior_distribution(phoneme_count=text_len, mel_count=spec_len) align_prior = torch.tensor(align_prior, dtype=torch.float32) example["align_prior"] = align_prior @@ -243,7 +229,7 @@ def __getitem__(self, index): return example def collate_fn(self, batch: List[dict]): - + audio_filepath_list = [] audio_list = [] audio_len_list = [] token_list = [] @@ -252,6 +238,8 @@ def collate_fn(self, batch: List[dict]): prior_list = [] for example in batch: + audio_filepath_list.append(example["audio_filepath"]) + audio_tensor = torch.tensor(example["audio"], dtype=torch.float32) audio_list.append(audio_tensor) audio_len_list.append(audio_tensor.shape[0]) @@ -263,7 +251,7 @@ def collate_fn(self, batch: List[dict]): if self.include_speaker: speaker_list.append(example["speaker_index"]) - if self.align_prior_config: + if self.include_align_prior: prior_list.append(example["align_prior"]) batch_audio_len = torch.IntTensor(audio_len_list) @@ -276,6 +264,7 @@ def collate_fn(self, batch: List[dict]): batch_tokens = stack_tensors(token_list, max_lens=[token_max_len], pad_value=self.text_tokenizer.pad) batch_dict = { + "audio_filepaths": audio_filepath_list, "audio": batch_audio, "audio_lens": batch_audio_len, "text": batch_tokens, @@ -285,7 +274,7 @@ def collate_fn(self, batch: List[dict]): if self.include_speaker: batch_dict["speaker_id"] = torch.IntTensor(speaker_list) - if self.align_prior_config: + if self.include_align_prior: spec_max_len = max([prior.shape[0] for prior in prior_list]) text_max_len = max([prior.shape[1] for prior in prior_list]) batch_dict["align_prior_matrix"] = stack_tensors(prior_list, max_lens=[text_max_len, spec_max_len],) diff --git a/nemo/collections/tts/data/vocoder_dataset.py b/nemo/collections/tts/data/vocoder_dataset.py new file mode 100644 index 000000000000..9bb115ba2448 --- /dev/null +++ b/nemo/collections/tts/data/vocoder_dataset.py @@ -0,0 +1,202 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import traceback +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import librosa +import torch.utils.data + +from nemo.collections.asr.parts.preprocessing.segment import AudioSegment +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest +from nemo.collections.tts.parts.preprocessing.feature_processors import FeatureProcessor +from nemo.collections.tts.parts.utils.tts_dataset_utils import ( + filter_dataset_by_duration, + get_abs_rel_paths, + get_weighted_sampler, + stack_tensors, +) +from nemo.core.classes import Dataset +from nemo.utils import logging +from nemo.utils.decorators import experimental + + +@dataclass +class DatasetMeta: + manifest_path: Path + audio_dir: Path + sample_weight: float = 1.0 + + +@dataclass +class DatasetSample: + manifest_entry: dict + audio_dir: Path + + +@experimental +class VocoderDataset(Dataset): + """ + Class for processing and loading Vocoder training examples. + + Args: + dataset_meta: Dict of dataset names (string) to dataset metadata. + sample_rate: Sample rate to load audio as. If the audio is stored at a different sample rate, then it will + be resampled. + n_samples: Optional int, if provided then n_samples samples will be randomly sampled from the full + audio file. + weighted_sampling_steps_per_epoch: Optional int, If provided, then data will be sampled (with replacement) based on + the sample weights provided in the dataset metadata. If None, then sample weights will be ignored. + feature_processors: Optional, list of feature processors to run on training examples. + min_duration: Optional float, if provided audio files in the training manifest shorter than 'min_duration' + will be ignored. + max_duration: Optional float, if provided audio files in the training manifest longer than 'max_duration' + will be ignored. + num_audio_retries: Number of read attempts to make when sampling audio file, to avoid training failing + from sporadic IO errors. + """ + + def __init__( + self, + dataset_meta: Dict, + sample_rate: int, + n_samples: Optional[int] = None, + weighted_sampling_steps_per_epoch: Optional[int] = None, + feature_processors: Optional[Dict[str, FeatureProcessor]] = None, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + num_audio_retries: int = 5, + ): + super().__init__() + + self.sample_rate = sample_rate + self.n_samples = n_samples + self.weighted_sampling_steps_per_epoch = weighted_sampling_steps_per_epoch + self.num_audio_retries = num_audio_retries + self.load_precomputed_mel = False + + if feature_processors: + logging.info(f"Found feature processors {feature_processors.keys()}") + self.feature_processors = list(feature_processors.values()) + else: + self.feature_processors = [] + + self.data_samples = [] + self.sample_weights = [] + for dataset_name, dataset_info in dataset_meta.items(): + dataset = DatasetMeta(**dataset_info) + samples, weights = self._preprocess_manifest( + dataset_name=dataset_name, dataset=dataset, min_duration=min_duration, max_duration=max_duration, + ) + self.data_samples += samples + self.sample_weights += weights + + def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]: + if not self.weighted_sampling_steps_per_epoch: + return None + + sampler = get_weighted_sampler( + sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sampling_steps_per_epoch + ) + return sampler + + def _segment_audio(self, audio_filepath: Path) -> AudioSegment: + # Retry file read multiple times as file seeking can produce random IO errors. + for _ in range(self.num_audio_retries): + try: + audio_segment = AudioSegment.segment_from_file( + audio_filepath, target_sr=self.sample_rate, n_segments=self.n_samples, + ) + return audio_segment + except Exception: + traceback.print_exc() + + raise ValueError(f"Failed to read audio {audio_filepath}") + + def _sample_audio(self, audio_filepath: Path) -> Tuple[torch.Tensor, torch.Tensor]: + if not self.n_samples: + audio_array, _ = librosa.load(audio_filepath, sr=self.sample_rate) + else: + audio_segment = self._segment_audio(audio_filepath) + audio_array = audio_segment.samples + audio = torch.tensor(audio_array) + audio_len = torch.tensor(audio.shape[0]) + return audio, audio_len + + @staticmethod + def _preprocess_manifest( + dataset_name: str, dataset: DatasetMeta, min_duration: float, max_duration: float, + ): + entries = read_manifest(dataset.manifest_path) + filtered_entries, total_hours, filtered_hours = filter_dataset_by_duration( + entries=entries, min_duration=min_duration, max_duration=max_duration + ) + + logging.info(dataset_name) + logging.info(f"Original # of files: {len(entries)}") + logging.info(f"Filtered # of files: {len(filtered_entries)}") + logging.info(f"Original duration: {total_hours:.2f} hours") + logging.info(f"Filtered duration: {filtered_hours:.2f} hours") + + samples = [] + sample_weights = [] + for entry in filtered_entries: + sample = DatasetSample(manifest_entry=entry, audio_dir=Path(dataset.audio_dir),) + samples.append(sample) + sample_weights.append(dataset.sample_weight) + + return samples, sample_weights + + def __len__(self): + return len(self.data_samples) + + def __getitem__(self, index): + data = self.data_samples[index] + + audio_filepath = Path(data.manifest_entry["audio_filepath"]) + audio_filepath_abs, audio_filepath_rel = get_abs_rel_paths(input_path=audio_filepath, base_path=data.audio_dir) + + audio, audio_len = self._sample_audio(audio_filepath_abs) + + example = {"audio_filepath": audio_filepath_rel, "audio": audio, "audio_len": audio_len} + + for processor in self.feature_processors: + processor.process(example) + + return example + + def collate_fn(self, batch: List[dict]): + audio_filepath_list = [] + audio_list = [] + audio_len_list = [] + + for example in batch: + audio_filepath_list.append(example["audio_filepath"]) + audio_list.append(example["audio"]) + audio_len_list.append(example["audio_len"]) + + batch_audio_len = torch.IntTensor(audio_len_list) + audio_max_len = int(batch_audio_len.max().item()) + + batch_audio = stack_tensors(audio_list, max_lens=[audio_max_len]) + + batch_dict = { + "audio_filepaths": audio_filepath_list, + "audio": batch_audio, + "audio_lens": batch_audio_len, + } + + return batch_dict diff --git a/nemo/collections/tts/g2p/models/i18n_ipa.py b/nemo/collections/tts/g2p/models/i18n_ipa.py index 82164802f620..d4289f217cb5 100644 --- a/nemo/collections/tts/g2p/models/i18n_ipa.py +++ b/nemo/collections/tts/g2p/models/i18n_ipa.py @@ -42,7 +42,7 @@ class IpaG2p(BaseG2p): def __init__( self, - phoneme_dict: Union[str, pathlib.Path, dict], + phoneme_dict: Union[str, pathlib.Path, Dict[str, List[List[str]]]], locale: str = "en-US", apply_to_oov_word: Optional[Callable[[str], str]] = None, ignore_ambiguous_words: bool = True, diff --git a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py index 4b69afb2a28c..aab57c925c82 100644 --- a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py +++ b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py @@ -14,29 +14,51 @@ import pathlib from collections import defaultdict -from typing import Optional +from typing import Dict, List, Optional, Union +from nemo.collections.common.tokenizers.text_to_speech.ipa_lexicon import get_grapheme_character_set from nemo.collections.tts.g2p.models.base import BaseG2p +from nemo.collections.tts.g2p.utils import set_grapheme_case from nemo.utils import logging class ChineseG2p(BaseG2p): def __init__( self, - phoneme_dict=None, + phoneme_dict: Union[str, pathlib.Path, Dict[str, List[str]]], + phoneme_prefix: str = "#", + phoneme_case: str = "upper", + tone_prefix: str = "#", + ascii_letter_prefix: str = "", + ascii_letter_case: str = "lower", word_tokenize_func=None, apply_to_oov_word=None, mapping_file: Optional[str] = None, word_segmenter: Optional[str] = None, ): - """Chinese G2P module. This module first converts Chinese characters into pinyin sequences using pypinyin, then pinyin sequences would - be further converted into phoneme sequences using pinyin_dict_nv_22.10.txt dict file. For Chinese and English bilingual sentences, the English words - would be converted into letters. + """ + Chinese G2P module. This module first converts Chinese characters into pinyin sequences using pypinyin, then + pinyin sequences would be further converted into phoneme sequences by looking them up in the `phoneme_dict`. + This G2P module also works with Chinese/English bilingual sentences where English words would be converted + into letters. It is advised to attach prefix symbols for Chinese phonemes and tones to discriminate them + from English letters to avoid any potential symbol set overlaps. Args: - phoneme_dict (str, Path, Dict): Path to pinyin_dict_nv_22.10.txt dict file. + phoneme_dict (str, Path, Dict): Path to pinyin_dict_nv_22.10.txt dict file or a dict object. + phoneme_prefix (str): Prepend a special symbol to any phonemes in order to distinguish phonemes from + graphemes because there may be overlaps between the two sets. Phoneme dictionary typically applies + uppercase initials and finals. It is suggested to choose a prefix that + is not used or preserved somewhere else. Default to "#". + phoneme_case (str): Specify the case chosen from `"lower"`, `"upper"`, or `"mixed"`, and process the + cases of Chinese phonemes. Default to `"upper"`. + tone_prefix (str): Prepend a special symbol to any tone digits. Default to "#". + ascii_letter_prefix (str): Prepend a special symbol to any ASCII letters. Default to "". + ascii_letter_case (str): Specify the case chosen from `"lower"`, `"upper"`, or `"mixed"`, and process the + cases of non-Chinese words. Default to `"lower"`. word_tokenize_func: Function for tokenizing text to words. - It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation and flag whether to leave unchanged or not. - It is expected that unchangeable word representation will be represented as List[str], other cases are represented as str. + It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation + and flag whether to leave unchanged or not. + It is expected that unchangeable word representation will be represented as List[str], other cases are + represented as str. It is useful to mark word as unchangeable which is already in phoneme representation. apply_to_oov_word: Function that will be applied to out of phoneme_dict word. word_segmenter: method that will be applied to segment utterances into words for better polyphone disambiguation. @@ -47,11 +69,31 @@ def __init__( 'jieba', ], f"{word_segmenter} is not supported now. Please choose correct word_segmenter." + if phoneme_prefix is None: + phoneme_prefix = "" + if tone_prefix is None: + tone_prefix = "" + if ascii_letter_prefix is None: + ascii_letter_prefix = "" + + # phonemes phoneme_dict = ( - self._parse_as_pinyin_dict(phoneme_dict) + self._parse_as_pinyin_dict(phoneme_dict, phoneme_prefix, phoneme_case) if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path) else phoneme_dict ) + self.phoneme_list = sorted({pron for prons in phoneme_dict.values() for pron in prons}) + + # tones + self.tone_dict = {str(x): tone_prefix + str(x) for x in range(1, 6)} + self.tone_list = sorted(self.tone_dict.values()) + + # ascii letters + self.ascii_letter_dict = { + x: ascii_letter_prefix + x for x in get_grapheme_character_set(locale="en-US", case=ascii_letter_case) + } + self.ascii_letter_list = sorted(self.ascii_letter_dict) + self.ascii_letter_case = ascii_letter_case if apply_to_oov_word is None: logging.warning( @@ -67,7 +109,6 @@ def __init__( apply_to_oov_word=apply_to_oov_word, mapping_file=mapping_file, ) - self.tones = {'1': '#1', '2': '#2', '3': '#3', '4': '#4', '5': '#5'} if word_segmenter == "jieba": try: @@ -93,32 +134,50 @@ def __init__( self._Style = Style @staticmethod - def _parse_as_pinyin_dict(phoneme_dict_path): + def _parse_as_pinyin_dict( + phoneme_dict_path: Union[str, pathlib.Path], phoneme_prefix: str, phoneme_case: str + ) -> Dict[str, List[str]]: """Loads pinyin dict file, and generates a set of all valid symbols.""" g2p_dict = defaultdict(list) with open(phoneme_dict_path, 'r') as file: for line in file: + # skip empty lines and comment lines starting with `;;;`. + if line.startswith(";;;") or len(line.strip()) == 0: + continue + parts = line.split('\t') - # let the key be lowercased, since pypinyin would give lower representation - pinyin = parts[0].lower() - pronunciation = parts[1].split() - pronunciation_with_sharp = ['#' + pron for pron in pronunciation] - g2p_dict[pinyin] = pronunciation_with_sharp + # Convert the cases of Chinese syllables loaded from the dictionary to lowercase to match the lowercase + # Chinese syllable outputs generated by the function `pypinyin.lazy_pinyin`. Note that the function + # `pypinyin.lazy_pinyin` preserves the cases of ASCII letters. + syllable = parts[0].lower() + pronunciation = set_grapheme_case(parts[1], case=phoneme_case).split() + + # add a prefix to distinguish phoneme symbols from non-phoneme symbols. + pronunciation_with_prefix = [phoneme_prefix + pron for pron in pronunciation] + g2p_dict[syllable] = pronunciation_with_prefix + return g2p_dict - def __call__(self, text): + def __call__(self, text: str) -> List[str]: """ - errors func handle below is to process the bilingual situation, - where English words would be split into letters. - e.g. 我今天去了Apple Store, 买了一个iPhone。 - would return a list - ['wo3', 'jin1', 'tian1', 'qu4', 'le5', 'A', 'p', 'p', 'l', 'e', - ' ', 'S', 't', 'o', 'r', 'e', ',', ' ', 'mai3', 'le5', 'yi2', - 'ge4', 'i', 'P', 'h', 'o', 'n', 'e', '。'] + This forward pass function translates Chinese characters into pinyin sequences and then converts the pinyin + into phonemes. It is primarily designed to process texts containing with Chinese characters, but we have + extended its support to handle texts that include both Chinese and English. This extension was mainly + necessitated by the limited availability of bilingual datasets. The `errors` argument used in the + `pypinyin.lazy_pinyin` function below is used to process non-Chinese words, where each English word is split + into letters. + + For example, The text "我今天去了Apple Store, 买了一个iPhone。" would be converted as a list, + `['wo3', 'jin1', 'tian1', 'qu4', 'le5', 'A', 'p', 'p', 'l', 'e', ' ', 'S', 't', 'o', 'r', 'e', ',', ' ', 'mai3', + 'le5', 'yi2', 'ge4', 'i', 'P', 'h', 'o', 'n', 'e', '。']` """ + text = set_grapheme_case(text, case=self.ascii_letter_case) + pinyin_seq = [] words_list = self.word_segmenter(text) + # TODO @xueyang: add a g2p process for non-pinyin words by customizing a function for `errors` argument. For + # example, add a dict look up for English words. for word in words_list: pinyin_seq += self._lazy_pinyin( word, @@ -128,13 +187,18 @@ def __call__(self, text): ) phoneme_seq = [] for pinyin in pinyin_seq: - if pinyin[-1] in self.tones: - assert pinyin[:-1] in self.phoneme_dict, pinyin[:-1] - phoneme_seq += self.phoneme_dict[pinyin[:-1]] - phoneme_seq.append(self.tones[pinyin[-1]]) + # only pinyin has tones while non-pinyin doesn't. + tone_hyp = pinyin[-1] + if tone_hyp in self.tone_dict: + syllable = pinyin[:-1] + assert syllable in self.phoneme_dict, f"Syllable <{syllable}> does not exist in the dictionary." + phoneme_seq += self.phoneme_dict[syllable] + phoneme_seq.append(self.tone_dict[tone_hyp]) # All pinyin would end up with a number in 1-5, which represents tones of the pinyin. - # For symbols which are not pinyin, e.g. English letters, Chinese puncts, we directly + # For symbols which are not pinyin, such as English letters and Chinese punctuations, we directly # use them as inputs. + elif tone_hyp in self.ascii_letter_dict: + phoneme_seq.append(self.ascii_letter_dict[tone_hyp]) else: phoneme_seq.append(pinyin) return phoneme_seq diff --git a/nemo/collections/tts/losses/aligner_loss.py b/nemo/collections/tts/losses/aligner_loss.py index 1a666d750521..792125a25edb 100644 --- a/nemo/collections/tts/losses/aligner_loss.py +++ b/nemo/collections/tts/losses/aligner_loss.py @@ -22,11 +22,12 @@ class ForwardSumLoss(Loss): - def __init__(self, blank_logprob=-1): + def __init__(self, blank_logprob=-1, loss_scale=1.0): super().__init__() self.log_softmax = torch.nn.LogSoftmax(dim=-1) self.ctc_loss = torch.nn.CTCLoss(zero_infinity=True) self.blank_logprob = blank_logprob + self.loss_scale = loss_scale @property def input_types(self): @@ -67,13 +68,15 @@ def forward(self, attn_logprob, in_lens, out_lens): # Evaluate CTC loss cost = self.ctc_loss(attn_logprob, target_seqs, input_lengths=query_lens, target_lengths=key_lens) + cost *= self.loss_scale return cost class BinLoss(Loss): - def __init__(self): + def __init__(self, loss_scale=1.0): super().__init__() + self.loss_scale = loss_scale @property def input_types(self): @@ -91,4 +94,6 @@ def output_types(self): @typecheck() def forward(self, hard_attention, soft_attention): log_sum = torch.log(torch.clamp(soft_attention[hard_attention == 1], min=1e-12)).sum() - return -log_sum / hard_attention.sum() + loss = -log_sum / hard_attention.sum() + loss *= self.loss_scale + return loss diff --git a/nemo/collections/tts/models/aligner.py b/nemo/collections/tts/models/aligner.py index 49301afc1591..9aeb5fbe23ca 100644 --- a/nemo/collections/tts/models/aligner.py +++ b/nemo/collections/tts/models/aligner.py @@ -24,7 +24,12 @@ from torch import nn from nemo.collections.tts.losses.aligner_loss import BinLoss, ForwardSumLoss -from nemo.collections.tts.parts.utils.helpers import binarize_attention, get_mask_from_lengths, plot_alignment_to_numpy +from nemo.collections.tts.parts.utils.helpers import ( + binarize_attention, + g2p_backward_compatible_support, + get_mask_from_lengths, + plot_alignment_to_numpy, +) from nemo.core.classes import ModelPT from nemo.core.classes.common import PretrainedModelInfo from nemo.utils import logging, model_utils @@ -99,11 +104,14 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer: # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} @@ -126,7 +134,7 @@ def forward(self, *, spec, spec_len, text, text_len, attn_prior=None): attn_soft, attn_logprob = self.alignment_encoder( queries=spec, keys=self.embed(text).transpose(1, 2), - mask=get_mask_from_lengths(text_len).unsqueeze(-1), + mask=get_mask_from_lengths(text_len).unsqueeze(-1) == 0, attn_prior=attn_prior, ) diff --git a/nemo/collections/tts/models/base.py b/nemo/collections/tts/models/base.py index 8ef147b9b145..fe19ae75a3b3 100644 --- a/nemo/collections/tts/models/base.py +++ b/nemo/collections/tts/models/base.py @@ -68,6 +68,18 @@ def list_available_models(cls) -> 'List[PretrainedModelInfo]': list_of_models.extend(subclass_models) return list_of_models + def set_export_config(self, args): + for k in ['enable_volume', 'enable_ragged_batches']: + if k in args: + self.export_config[k] = bool(args[k]) + args.pop(k) + if 'num_speakers' in args: + self.export_config['num_speakers'] = int(args['num_speakers']) + args.pop('num_speakers') + if 'emb_range' in args: + raise Exception('embedding range is not user-settable') + super().set_export_config(args) + class Vocoder(ModelPT, ABC): """ diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py index 281a7c2891b3..82b997e03e5d 100644 --- a/nemo/collections/tts/models/fastpitch.py +++ b/nemo/collections/tts/models/fastpitch.py @@ -13,6 +13,7 @@ # limitations under the License. import contextlib from dataclasses import dataclass +from pathlib import Path from typing import List, Optional import torch @@ -27,8 +28,10 @@ from nemo.collections.tts.models.base import SpectrogramGenerator from nemo.collections.tts.modules.fastpitch import FastPitchModule from nemo.collections.tts.parts.mixins import FastPitchAdapterModelMixin +from nemo.collections.tts.parts.utils.callbacks import LoggingCallback from nemo.collections.tts.parts.utils.helpers import ( batch_from_ragged, + g2p_backward_compatible_support, plot_alignment_to_numpy, plot_spectrogram_to_numpy, process_batch, @@ -115,6 +118,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): super().__init__(cfg=cfg, trainer=trainer) self.bin_loss_warmup_epochs = cfg.get("bin_loss_warmup_epochs", 100) + self.log_images = cfg.get("log_images", False) self.log_train_images = False loss_scale = 0.1 if self.learn_alignment else 1.0 @@ -135,9 +139,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): self.aligner = None if self.learn_alignment: + aligner_loss_scale = cfg.aligner_loss_scale if "aligner_loss_scale" in cfg else 1.0 self.aligner = instantiate(self._cfg.alignment_module) - self.forward_sum_loss_fn = ForwardSumLoss() - self.bin_loss_fn = BinLoss() + self.forward_sum_loss_fn = ForwardSumLoss(loss_scale=aligner_loss_scale) + self.bin_loss_fn = BinLoss(loss_scale=aligner_loss_scale) self.preprocessor = instantiate(self._cfg.preprocessor) input_fft = instantiate(self._cfg.input_fft, **input_fft_kwargs) @@ -154,6 +159,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): speaker_emb_condition_prosody = cfg.get("speaker_emb_condition_prosody", False) speaker_emb_condition_decoder = cfg.get("speaker_emb_condition_decoder", False) speaker_emb_condition_aligner = cfg.get("speaker_emb_condition_aligner", False) + min_token_duration = cfg.get("min_token_duration", 0) use_log_energy = cfg.get("use_log_energy", True) if n_speakers > 1 and "add" not in input_fft.cond_input.condition_types: input_fft.cond_input.condition_types.append("add") @@ -178,6 +184,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): cfg.pitch_embedding_kernel_size, energy_embedding_kernel_size, cfg.n_mel_channels, + min_token_duration, cfg.max_token_duration, use_log_energy, ) @@ -190,6 +197,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): if self.fastpitch.speaker_emb is not None: self.export_config["num_speakers"] = cfg.n_speakers + self.log_config = cfg.get("log_config", None) + # Adapter modules setup (from FastPitchAdapterModelMixin) self.setup_adapters() @@ -223,13 +232,15 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer: - # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} @@ -462,7 +473,7 @@ def training_step(self, batch, batch_idx): self.log("t_bin_loss", bin_loss) # Log images to tensorboard - if self.log_train_images and isinstance(self.logger, TensorBoardLogger): + if self.log_images and self.log_train_images and isinstance(self.logger, TensorBoardLogger): self.log_train_images = False self.tb_logger.add_image( @@ -571,7 +582,7 @@ def validation_epoch_end(self, outputs): _, _, _, _, _, spec_target, spec_predict = outputs[0].values() - if isinstance(self.logger, TensorBoardLogger): + if self.log_images and isinstance(self.logger, TensorBoardLogger): self.tb_logger.add_image( "val_mel_target", plot_spectrogram_to_numpy(spec_target[0].data.cpu().float().numpy()), @@ -658,6 +669,31 @@ def setup_test_data(self, cfg): """Omitted.""" pass + def configure_callbacks(self): + if not self.log_config: + return [] + + sample_ds_class = self.log_config.dataset._target_ + if sample_ds_class != "nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset": + raise ValueError(f"Logging callback only supported for TextToSpeechDataset, got {sample_ds_class}") + + data_loader = self._setup_test_dataloader(self.log_config) + + generators = instantiate(self.log_config.generators) + log_dir = Path(self.log_config.log_dir) if self.log_config.log_dir else None + log_callback = LoggingCallback( + generators=generators, + data_loader=data_loader, + log_epochs=self.log_config.log_epochs, + epoch_frequency=self.log_config.epoch_frequency, + output_dir=log_dir, + loggers=self.trainer.loggers, + log_tensorboard=self.log_config.log_tensorboard, + log_wandb=self.log_config.log_wandb, + ) + + return [log_callback] + @classmethod def list_available_models(cls) -> 'List[PretrainedModelInfo]': """ @@ -742,6 +778,20 @@ def list_available_models(cls) -> 'List[PretrainedModelInfo]': ) list_of_models.append(model) + # en, multi speaker, LibriTTS, 16000 Hz + # stft 25ms 10ms matching ASR params + # for use during Enhlish ASR training/adaptation + model = PretrainedModelInfo( + pretrained_model_name="tts_en_fastpitch_for_asr_finetuning", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning/versions/1.20.0/files/tts_en_fastpitch_for_asr_finetuning.nemo", + description="This model is trained on LibriSpeech, train-960 subset." + " STFT parameters follow those commonly used in ASR: 25 ms window, 10 ms hop." + " This model is supposed to be used with its companion SpetrogramEnhancer for " + " ASR fine-tuning. Usage for regular TTS tasks is not advised.", + class_=cls, + ) + list_of_models.append(model) + return list_of_models # Methods for model exportability diff --git a/nemo/collections/tts/models/hifigan.py b/nemo/collections/tts/models/hifigan.py index b7ab37e6589e..bf2eef33cdcf 100644 --- a/nemo/collections/tts/models/hifigan.py +++ b/nemo/collections/tts/models/hifigan.py @@ -13,6 +13,7 @@ # limitations under the License. import itertools +from pathlib import Path import torch import torch.nn.functional as F @@ -23,12 +24,13 @@ from nemo.collections.tts.losses.hifigan_losses import DiscriminatorLoss, FeatureMatchingLoss, GeneratorLoss from nemo.collections.tts.models.base import Vocoder from nemo.collections.tts.modules.hifigan_modules import MultiPeriodDiscriminator, MultiScaleDiscriminator +from nemo.collections.tts.parts.utils.callbacks import LoggingCallback from nemo.collections.tts.parts.utils.helpers import get_batch_size, get_num_workers, plot_spectrogram_to_numpy from nemo.core.classes import Exportable from nemo.core.classes.common import PretrainedModelInfo, typecheck from nemo.core.neural_types.elements import AudioSignal, MelSpectrogramType from nemo.core.neural_types.neural_type import NeuralType -from nemo.core.optim.lr_scheduler import CosineAnnealing, compute_max_steps +from nemo.core.optim.lr_scheduler import compute_max_steps, prepare_lr_scheduler from nemo.utils import logging, model_utils HAVE_WANDB = True @@ -47,6 +49,7 @@ def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): # Convert to Hydra 1.0 compatible DictConfig cfg = model_utils.convert_model_config_to_dict_config(cfg) cfg = model_utils.maybe_update_config_version(cfg) + self.ds_class = cfg.train_ds.dataset._target_ super().__init__(cfg=cfg, trainer=trainer) @@ -69,9 +72,22 @@ def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): if self._train_dl: self.input_as_mel = self._train_dl.dataset.load_precomputed_mel + self.log_audio = cfg.get("log_audio", False) + self.log_config = cfg.get("log_config", None) + self.lr_schedule_interval = None self.automatic_optimization = False - def _get_max_steps(self): + @property + def max_steps(self): + if "max_steps" in self._cfg: + return self._cfg.get("max_steps") + + if "max_epochs" not in self._cfg: + raise ValueError("Must specify 'max_steps' or 'max_epochs'.") + + if "steps_per_epoch" in self._cfg: + return self._cfg.max_epochs * self._cfg.steps_per_epoch + return compute_max_steps( max_epochs=self._cfg.max_epochs, accumulate_grad_batches=self.trainer.accumulate_grad_batches, @@ -84,16 +100,13 @@ def _get_max_steps(self): @staticmethod def get_warmup_steps(max_steps, warmup_steps, warmup_ratio): - if warmup_steps is not None and warmup_ratio is not None: - raise ValueError(f'Either use warmup_steps or warmup_ratio for scheduler') - if warmup_steps is not None: return warmup_steps if warmup_ratio is not None: return warmup_ratio * max_steps - raise ValueError(f'Specify warmup_steps or warmup_ratio for scheduler') + return None def configure_optimizers(self): optim_config = self._cfg.optim.copy() @@ -102,42 +115,47 @@ def configure_optimizers(self): sched_config = optim_config.pop("sched", None) OmegaConf.set_struct(optim_config, True) - optim_g = instantiate(optim_config, params=self.generator.parameters(),) - optim_d = instantiate(optim_config, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()),) - - # Backward compatibility - if sched_config is None and 'sched' in self._cfg: - sched_config = self._cfg.sched - - if sched_config is not None: - max_steps = self._cfg.get("max_steps", None) - if max_steps is None or max_steps < 0: - max_steps = self._get_max_steps() - - warmup_steps = HifiGanModel.get_warmup_steps( - max_steps=max_steps, - warmup_steps=sched_config.get("warmup_steps", None), - warmup_ratio=sched_config.get("warmup_ratio", None), - ) - - scheduler_g = CosineAnnealing( - optimizer=optim_g, max_steps=max_steps, min_lr=sched_config.min_lr, warmup_steps=warmup_steps, - ) # Use warmup to delay start - sch1_dict = { - 'scheduler': scheduler_g, - 'interval': 'step', - } - - scheduler_d = CosineAnnealing(optimizer=optim_d, max_steps=max_steps, min_lr=sched_config.min_lr,) - sch2_dict = { - 'scheduler': scheduler_d, - 'interval': 'step', - } - - return [optim_g, optim_d], [sch1_dict, sch2_dict] - else: + gen_params = self.generator.parameters() + disc_params = itertools.chain(self.msd.parameters(), self.mpd.parameters()) + optim_g = instantiate(optim_config, params=gen_params) + optim_d = instantiate(optim_config, params=disc_params) + + if sched_config is None: return [optim_g, optim_d] + max_steps = self.max_steps + warmup_steps = self.get_warmup_steps( + max_steps=max_steps, + warmup_steps=sched_config.get("warmup_steps", None), + warmup_ratio=sched_config.get("warmup_ratio", None), + ) + + OmegaConf.set_struct(sched_config, False) + sched_config["max_steps"] = max_steps + if warmup_steps: + sched_config["warmup_steps"] = warmup_steps + sched_config.pop("warmup_ratio", None) + OmegaConf.set_struct(sched_config, True) + + scheduler_g = prepare_lr_scheduler( + optimizer=optim_g, scheduler_config=sched_config, train_dataloader=self._train_dl + ) + + scheduler_d = prepare_lr_scheduler( + optimizer=optim_d, scheduler_config=sched_config, train_dataloader=self._train_dl + ) + + self.lr_schedule_interval = scheduler_g["interval"] + + return [optim_g, optim_d], [scheduler_g, scheduler_d] + + def update_lr(self, interval="step"): + schedulers = self.lr_schedulers() + if schedulers is not None and self.lr_schedule_interval == interval: + sch1, sch2 = schedulers + sch1.step() + sch2.step() + @typecheck() def forward(self, *, spec): """ @@ -153,12 +171,7 @@ def convert_spectrogram_to_audio(self, spec: 'torch.tensor') -> 'torch.tensor': return self(spec=spec).squeeze(1) def training_step(self, batch, batch_idx): - if self.input_as_mel: - # Pre-computed spectrograms will be used as input - audio, audio_len, audio_mel = batch - else: - audio, audio_len = batch - audio_mel, _ = self.audio_to_melspec_precessor(audio, audio_len) + audio, audio_len, audio_mel, _ = self._process_batch(batch) # Mel as input for L1 mel loss audio_trg_mel, _ = self.trg_melspec_fn(audio, audio_len) @@ -196,12 +209,7 @@ def training_step(self, batch, batch_idx): self.manual_backward(loss_g) optim_g.step() - # Run schedulers - schedulers = self.lr_schedulers() - if schedulers is not None: - sch1, sch2 = schedulers - sch1.step() - sch2.step() + self.update_lr() metrics = { "g_loss_fm_mpd": loss_fm_mpd, @@ -218,18 +226,13 @@ def training_step(self, batch, batch_idx): self.log_dict(metrics, on_step=True, sync_dist=True) self.log("g_l1_loss", loss_mel, prog_bar=True, logger=False, sync_dist=True) + def training_epoch_end(self, outputs) -> None: + self.update_lr("epoch") + def validation_step(self, batch, batch_idx): - if self.input_as_mel: - audio, audio_len, audio_mel = batch - audio_mel_len = [audio_mel.shape[1]] * audio_mel.shape[0] - else: - audio, audio_len = batch - audio_mel, audio_mel_len = self.audio_to_melspec_precessor(audio, audio_len) - audio_pred = self(spec=audio_mel) + audio, audio_len, audio_mel, audio_mel_len = self._process_batch(batch) - # Perform bias denoising - pred_denoised = self._bias_denoise(audio_pred, audio_mel).squeeze(1) - pred_denoised_mel, _ = self.audio_to_melspec_precessor(pred_denoised, audio_len) + audio_pred = self(spec=audio_mel) if self.input_as_mel: gt_mel, gt_mel_len = self.audio_to_melspec_precessor(audio, audio_len) @@ -239,7 +242,11 @@ def validation_step(self, batch, batch_idx): self.log_dict({"val_loss": loss_mel}, on_epoch=True, sync_dist=True) # Plot audio once per epoch - if batch_idx == 0 and isinstance(self.logger, WandbLogger) and HAVE_WANDB: + if self.log_audio and batch_idx == 0 and isinstance(self.logger, WandbLogger) and HAVE_WANDB: + # Perform bias denoising + pred_denoised = self._bias_denoise(audio_pred, audio_mel).squeeze(1) + pred_denoised_mel, _ = self.audio_to_melspec_precessor(pred_denoised, audio_len) + clips = [] specs = [] for i in range(min(5, audio.shape[0])): @@ -284,6 +291,21 @@ def validation_step(self, batch, batch_idx): self.logger.experiment.log({"audio": clips, "specs": specs}) + def _process_batch(self, batch): + if self.input_as_mel: + audio, audio_len, audio_mel = batch + audio_mel_len = [audio_mel.shape[1]] * audio_mel.shape[0] + return audio, audio_len, audio_mel, audio_mel_len + + if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset": + audio = batch.get("audio") + audio_len = batch.get("audio_lens") + else: + audio, audio_len = batch + + audio_mel, audio_mel_len = self.audio_to_melspec_precessor(audio, audio_len) + return audio, audio_len, audio_mel, audio_mel_len + def _bias_denoise(self, audio, mel): def stft(x): comp = torch.stft(x.squeeze(1), n_fft=1024, hop_length=256, win_length=1024, return_complex=True) @@ -311,6 +333,19 @@ def istft(mags, phase): return audio_denoised + def _setup_train_dataloader(self, cfg): + dataset = instantiate(cfg.dataset) + sampler = dataset.get_sampler(cfg.dataloader_params.batch_size) + data_loader = torch.utils.data.DataLoader( + dataset, collate_fn=dataset.collate_fn, sampler=sampler, **cfg.dataloader_params + ) + return data_loader + + def _setup_test_dataloader(self, cfg): + dataset = instantiate(cfg.dataset) + data_loader = torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params) + return data_loader + def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, name: str = "train"): if "dataset" not in cfg or not isinstance(cfg.dataset, DictConfig): raise ValueError(f"No dataset for {name}") @@ -333,14 +368,44 @@ def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, na return torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params) def setup_training_data(self, cfg): - self._train_dl = self.__setup_dataloader_from_config(cfg) + if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset": + self._train_dl = self._setup_train_dataloader(cfg) + else: + self._train_dl = self.__setup_dataloader_from_config(cfg) def setup_validation_data(self, cfg): - self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="validation") + if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset": + self._validation_dl = self._setup_test_dataloader(cfg) + else: + self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="validation") def setup_test_data(self, cfg): pass + def configure_callbacks(self): + if not self.log_config: + return [] + + sample_ds_class = self.log_config.dataset._target_ + if sample_ds_class != "nemo.collections.tts.data.vocoder_dataset.VocoderDataset": + raise ValueError(f"Sample logging only supported for VocoderDataset, got {sample_ds_class}") + + data_loader = self._setup_test_dataloader(self.log_config) + generators = instantiate(self.log_config.generators) + log_dir = Path(self.log_config.log_dir) if self.log_config.log_dir else None + log_callback = LoggingCallback( + generators=generators, + data_loader=data_loader, + log_epochs=self.log_config.log_epochs, + epoch_frequency=self.log_config.epoch_frequency, + output_dir=log_dir, + loggers=self.trainer.loggers, + log_tensorboard=self.log_config.log_tensorboard, + log_wandb=self.log_config.log_wandb, + ) + + return [log_callback] + @classmethod def list_available_models(cls) -> 'Optional[Dict[str, str]]': list_of_models = [] diff --git a/nemo/collections/tts/models/mixer_tts.py b/nemo/collections/tts/models/mixer_tts.py index 38efd5a147a0..1a44cd5b31c8 100644 --- a/nemo/collections/tts/models/mixer_tts.py +++ b/nemo/collections/tts/models/mixer_tts.py @@ -36,6 +36,7 @@ from nemo.collections.tts.modules.fastpitch import average_features, regulate_len from nemo.collections.tts.parts.utils.helpers import ( binarize_attention_parallel, + g2p_backward_compatible_support, get_mask_from_lengths, plot_pitch_to_numpy, plot_spectrogram_to_numpy, @@ -149,11 +150,14 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer: # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/radtts.py b/nemo/collections/tts/models/radtts.py index 98bfbb4c2a18..7d0860ab9cf9 100644 --- a/nemo/collections/tts/models/radtts.py +++ b/nemo/collections/tts/models/radtts.py @@ -24,6 +24,7 @@ from nemo.collections.tts.models.base import SpectrogramGenerator from nemo.collections.tts.parts.utils.helpers import ( batch_from_ragged, + g2p_backward_compatible_support, plot_alignment_to_numpy, regulate_len, sample_tts_input, @@ -333,11 +334,14 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer: # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/spectrogram_enhancer.py b/nemo/collections/tts/models/spectrogram_enhancer.py index bcc7e69a10bf..ca2fe6122230 100644 --- a/nemo/collections/tts/models/spectrogram_enhancer.py +++ b/nemo/collections/tts/models/spectrogram_enhancer.py @@ -56,7 +56,7 @@ HingeLoss, ) from nemo.collections.tts.parts.utils.helpers import mask_sequence_tensor, to_device_recursive -from nemo.core import Exportable, ModelPT, typecheck +from nemo.core import Exportable, ModelPT, PretrainedModelInfo, typecheck from nemo.core.neural_types import LengthsType, MelSpectrogramType, NeuralType from nemo.core.neural_types.elements import BoolType from nemo.utils import logging @@ -277,7 +277,23 @@ def setup_validation_data(self, val_data_config): @classmethod def list_available_models(cls): - return [] + list_of_models = [] + + # en, multi speaker, LibriTTS, 16000 Hz + # stft 25ms 10ms matching ASR params + # for use during Enhlish ASR training/adaptation + model = PretrainedModelInfo( + pretrained_model_name="tts_en_spectrogram_enhancer_for_asr_finetuning", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning/versions/1.20.0/files/tts_en_spectrogram_enhancer_for_asr_finetuning.nemo", + description="This model is trained to add details to synthetic spectrograms." + " It was trained on pairs of real-synthesized spectrograms generated by FastPitch." + " STFT parameters follow ASR with 25 ms window and 10 ms hop." + " It is supposed to be used in conjunction with that model for ASR training/adaptation.", + class_=cls, + ) + list_of_models.append(model) + + return list_of_models def log_illustration(self, target_spectrograms, input_spectrograms, enhanced_spectrograms, lengths): if self.global_rank != 0: diff --git a/nemo/collections/tts/models/tacotron2.py b/nemo/collections/tts/models/tacotron2.py index 37880a0eae6f..37e00bd26cae 100644 --- a/nemo/collections/tts/models/tacotron2.py +++ b/nemo/collections/tts/models/tacotron2.py @@ -27,6 +27,7 @@ from nemo.collections.tts.losses.tacotron2loss import Tacotron2Loss from nemo.collections.tts.models.base import SpectrogramGenerator from nemo.collections.tts.parts.utils.helpers import ( + g2p_backward_compatible_support, get_mask_from_lengths, tacotron2_log_to_tb_func, tacotron2_log_to_wandb_func, @@ -333,11 +334,14 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer and cfg.text_tokenizer.g2p is not None: # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/models/vits.py b/nemo/collections/tts/models/vits.py index 78614fa6264b..319221d04ee0 100644 --- a/nemo/collections/tts/models/vits.py +++ b/nemo/collections/tts/models/vits.py @@ -28,7 +28,12 @@ from nemo.collections.tts.losses.vits_losses import DiscriminatorLoss, FeatureMatchingLoss, GeneratorLoss, KlLoss from nemo.collections.tts.models.base import TextToWaveform from nemo.collections.tts.modules.vits_modules import MultiPeriodDiscriminator -from nemo.collections.tts.parts.utils.helpers import clip_grad_value_, plot_spectrogram_to_numpy, slice_segments +from nemo.collections.tts.parts.utils.helpers import ( + clip_grad_value_, + g2p_backward_compatible_support, + plot_spectrogram_to_numpy, + slice_segments, +) from nemo.collections.tts.torch.tts_data_types import SpeakerID from nemo.core.classes.common import PretrainedModelInfo, typecheck from nemo.core.neural_types.elements import AudioSignal, FloatType, Index, IntType, TokenIndex @@ -113,11 +118,14 @@ def _setup_tokenizer(self, cfg): text_tokenizer_kwargs = {} if "g2p" in cfg.text_tokenizer and cfg.text_tokenizer.g2p is not None: # for backward compatibility - if self._is_model_being_restored() and cfg.text_tokenizer.g2p.get('_target_', None): - cfg.text_tokenizer.g2p['_target_'] = cfg.text_tokenizer.g2p['_target_'].replace( - "nemo_text_processing.g2p", "nemo.collections.tts.g2p" + if ( + self._is_model_being_restored() + and (cfg.text_tokenizer.g2p.get('_target_', None) is not None) + and cfg.text_tokenizer.g2p["_target_"].startswith("nemo_text_processing.g2p") + ): + cfg.text_tokenizer.g2p["_target_"] = g2p_backward_compatible_support( + cfg.text_tokenizer.g2p["_target_"] ) - logging.warning("This checkpoint support will be dropped after NeMo 1.18.0.") g2p_kwargs = {} diff --git a/nemo/collections/tts/modules/fastpitch.py b/nemo/collections/tts/modules/fastpitch.py index b26aafa72e32..f7601302d81e 100644 --- a/nemo/collections/tts/modules/fastpitch.py +++ b/nemo/collections/tts/modules/fastpitch.py @@ -80,6 +80,12 @@ def average_features(pitch, durs): return pitch_avg +def log_to_duration(log_dur, min_dur, max_dur, mask): + dur = torch.clamp(torch.exp(log_dur) - 1.0, min_dur, max_dur) + dur *= mask.squeeze(2) + return dur + + class ConvReLUNorm(torch.nn.Module, adapter_mixins.AdapterModuleMixin): def __init__(self, in_channels, out_channels, kernel_size=1, dropout=0.0, condition_dim=384, condition_types=[]): super(ConvReLUNorm, self).__init__() @@ -163,6 +169,7 @@ def __init__( pitch_embedding_kernel_size: int, energy_embedding_kernel_size: int, n_mel_channels: int = 80, + min_token_duration: int = 0, max_token_duration: int = 75, use_log_energy: bool = True, ): @@ -188,8 +195,8 @@ def __init__( else: self.speaker_emb = None + self.min_token_duration = min_token_duration self.max_token_duration = max_token_duration - self.min_token_duration = 0 self.pitch_emb = torch.nn.Conv1d( 1, @@ -294,7 +301,9 @@ def forward( # Predict duration log_durs_predicted = self.duration_predictor(enc_out, enc_mask, conditioning=spk_emb) - durs_predicted = torch.clamp(torch.exp(log_durs_predicted) - 1, 0, self.max_token_duration) + durs_predicted = log_to_duration( + log_dur=log_durs_predicted, min_dur=self.min_token_duration, max_dur=self.max_token_duration, mask=enc_mask + ) attn_soft, attn_hard, attn_hard_dur, attn_logprob = None, None, None, None if self.learn_alignment and spec is not None: @@ -398,8 +407,8 @@ def infer( # Predict duration and pitch log_durs_predicted = self.duration_predictor(enc_out, enc_mask, conditioning=spk_emb) - durs_predicted = torch.clamp( - torch.exp(log_durs_predicted) - 1.0, self.min_token_duration, self.max_token_duration + durs_predicted = log_to_duration( + log_dur=log_durs_predicted, min_dur=self.min_token_duration, max_dur=self.max_token_duration, mask=enc_mask ) pitch_predicted = self.pitch_predictor(enc_out, enc_mask, conditioning=spk_emb) + pitch pitch_emb = self.pitch_emb(pitch_predicted.unsqueeze(1)) @@ -444,6 +453,7 @@ def __init__( symbols_embedding_dim: int, pitch_embedding_kernel_size: int, n_mel_channels: int = 80, + min_token_duration: int = 0, max_token_duration: int = 75, ): super().__init__() @@ -453,8 +463,8 @@ def __init__( self.duration_predictor = duration_predictor self.pitch_predictor = pitch_predictor + self.min_token_duration = min_token_duration self.max_token_duration = max_token_duration - self.min_token_duration = 0 if self.pitch_predictor is not None: self.pitch_emb = torch.nn.Conv1d( @@ -497,7 +507,12 @@ def forward(self, *, enc_out=None, enc_mask=None, durs=None, pitch=None, pace=1. log_durs_predicted, durs_predicted = None, None if self.duration_predictor is not None: log_durs_predicted = self.duration_predictor(enc_out, enc_mask) - durs_predicted = torch.clamp(torch.exp(log_durs_predicted) - 1, 0, self.max_token_duration) + durs_predicted = log_to_duration( + log_dur=log_durs_predicted, + min_dur=self.min_token_duration, + max_dur=self.max_token_duration, + mask=enc_mask, + ) # Predict pitch pitch_predicted = None diff --git a/nemo/collections/tts/modules/submodules.py b/nemo/collections/tts/modules/submodules.py index 6efccf18eeea..408ab02dead2 100644 --- a/nemo/collections/tts/modules/submodules.py +++ b/nemo/collections/tts/modules/submodules.py @@ -758,15 +758,11 @@ def forward(self, batch_size=None, speaker=None, reference_spec=None, reference_ embs = self.lookup_module(speaker) # Get GST based speaker embedding - if self.gst_module is not None: - if reference_spec is None or reference_spec_lens is None: - raise ValueError( - "You should add `reference_audio` in sup_data_types or remove `speaker_encoder`in config." - ) - out = self.gst_module(reference_spec, reference_spec_lens) - embs = out if embs is None else embs + out - - elif self.gst_module is None and reference_spec is not None and reference_spec_lens is not None: - logging.warning("You may add `gst_module` in speaker_encoder to use reference_audio.") + if reference_spec is not None and reference_spec_lens is not None: + if self.gst_module is not None: + out = self.gst_module(reference_spec, reference_spec_lens) + embs = out if embs is None else embs + out + else: + logging.warning("You may add `gst_module` in speaker_encoder to use reference_audio.") return embs diff --git a/nemo/collections/tts/parts/preprocessing/audio_trimming.py b/nemo/collections/tts/parts/preprocessing/audio_trimming.py index ead2fe023074..71a10e4a5cc6 100644 --- a/nemo/collections/tts/parts/preprocessing/audio_trimming.py +++ b/nemo/collections/tts/parts/preprocessing/audio_trimming.py @@ -101,6 +101,8 @@ def trim_audio(self, audio: np.array, sample_rate: int, audio_id: str = "") -> T start_frame, end_frame = get_start_and_end_of_speech_frames( is_speech=speech_frames, speech_frame_threshold=self.speech_frame_threshold, audio_id=audio_id, ) + if not start_frame and not end_frame: + return np.array([]), 0, 0 start_sample = librosa.core.frames_to_samples(start_frame, hop_length=self.trim_hop_length) end_sample = librosa.core.frames_to_samples(end_frame, hop_length=self.trim_hop_length) @@ -170,6 +172,9 @@ def __init__( self.volume_norm = volume_norm def _detect_speech(self, audio: np.array) -> np.array: + if audio.shape[0] < self.trim_win_length: + return np.array([]) + # [num_frames, win_length] audio_frames = librosa.util.frame( audio, frame_length=self.trim_win_length, hop_length=self.trim_hop_length @@ -214,6 +219,8 @@ def trim_audio(self, audio: np.array, sample_rate: int, audio_id: str = "") -> T start_frame, end_frame = get_start_and_end_of_speech_frames( is_speech=speech_frames, speech_frame_threshold=self.speech_frame_threshold, audio_id=audio_id, ) + if not start_frame and not end_frame: + return np.array([]), 0, 0 if start_frame == 0: start_sample = 0 @@ -276,13 +283,10 @@ def get_start_and_end_of_speech_frames( end_frame = i break - if start_frame is None: - logging.warning(f"Could not find start of speech for '{audio_id}'") - start_frame = 0 - - if end_frame is None: - logging.warning(f"Could not find end of speech for '{audio_id}'") - end_frame = num_frames + if start_frame is None or end_frame is None: + # Algorithm is symmetric, so if the start is not found then the end should also not be found. + logging.warning(f"Could not find start or end of speech for '{audio_id}'") + return 0, 0 return start_frame, end_frame diff --git a/nemo/collections/tts/parts/utils/callbacks.py b/nemo/collections/tts/parts/utils/callbacks.py new file mode 100644 index 000000000000..2320e5b21a7c --- /dev/null +++ b/nemo/collections/tts/parts/utils/callbacks.py @@ -0,0 +1,428 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Type + +import librosa +import numpy as np +import soundfile as sf +import torch +from einops import rearrange +from pytorch_lightning import Callback, LightningModule, Trainer +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers.logger import Logger +from pytorch_lightning.loggers.wandb import WandbLogger + +from nemo.collections.tts.parts.utils.helpers import create_plot +from nemo.utils.decorators import experimental + +HAVE_WANDB = True +try: + import wandb +except ModuleNotFoundError: + HAVE_WANDB = False + + +def _get_logger(loggers: List[Logger], logger_type: Type[Logger]): + for logger in loggers: + if isinstance(logger, logger_type): + if hasattr(logger, "experiment"): + return logger.experiment + else: + return logger + raise ValueError(f"Could not find {logger_type} logger in {loggers}.") + + +def _load_vocoder(model_name: Optional[str], checkpoint_path: Optional[str], type: str): + assert (model_name is None) != ( + checkpoint_path is None + ), f"Must provide exactly one of vocoder model_name or checkpoint: ({model_name}, {checkpoint_path})" + + checkpoint_path = str(checkpoint_path) + if type == "hifigan": + from nemo.collections.tts.models import HifiGanModel + + model_type = HifiGanModel + elif type == "univnet": + from nemo.collections.tts.models import UnivNetModel + + model_type = UnivNetModel + else: + raise ValueError(f"Unknown vocoder type '{type}'") + + if model_name is not None: + vocoder = model_type.from_pretrained(model_name) + elif checkpoint_path.endswith(".nemo"): + vocoder = model_type.restore_from(checkpoint_path) + else: + vocoder = model_type.load_from_checkpoint(checkpoint_path) + + return vocoder.eval() + + +@dataclass +class AudioArtifact: + id: str + data: np.ndarray + sample_rate: int + filename: str + + +@dataclass +class ImageArtifact: + id: str + data: np.ndarray + filename: str + x_axis: str + y_axis: str + + +@dataclass +class LogAudioParams: + vocoder_type: str + vocoder_name: str + vocoder_checkpoint_path: str + log_audio_gta: bool = False + + +def create_id(filepath: Path) -> str: + path_prefix = str(filepath.with_suffix("")) + file_id = path_prefix.replace(os.sep, "_") + return file_id + + +class ArtifactGenerator(ABC): + @abstractmethod + def generate_artifacts( + self, model: LightningModule, batch_dict: Dict + ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]: + """ + Create artifacts for the input model and test batch. + + Args: + model: Model instance being trained to use for inference. + batch_dict: Test batch to generate artifacts for. + + Returns: + List of audio and image artifacts to log. + """ + + +@experimental +class LoggingCallback(Callback): + """ + Callback which can log artifacts (eg. model predictions, graphs) to local disk, Tensorboard, and/or WandB. + + Args: + generators: List of generators to create and log artifacts from. + data_loader: Data to log artifacts for. + log_epochs: Optional list of specific training epoch numbers to log artifacts for. + epoch_frequency: Frequency with which to log + output_dir: Optional local directory. If provided, artifacts will be saved in output_dir. + loggers: Optional list of loggers to use if logging to tensorboard or wandb. + log_tensorboard: Whether to log artifacts to tensorboard. + log_wandb: Whether to log artifacts to WandB. + """ + + def __init__( + self, + generators: List[ArtifactGenerator], + data_loader: torch.utils.data.DataLoader, + log_epochs: Optional[List[int]] = None, + epoch_frequency: int = 1, + output_dir: Optional[Path] = None, + loggers: Optional[List[Logger]] = None, + log_tensorboard: bool = False, + log_wandb: bool = False, + ): + self.generators = generators + self.data_loader = data_loader + self.log_epochs = log_epochs if log_epochs else [] + self.epoch_frequency = epoch_frequency + self.output_dir = Path(output_dir) if output_dir else None + self.loggers = loggers if loggers else [] + self.log_tensorboard = log_tensorboard + self.log_wandb = log_wandb + + if log_tensorboard: + self.tensorboard_logger = _get_logger(self.loggers, TensorBoardLogger) + else: + self.tensorboard_logger = None + + if log_wandb: + if not HAVE_WANDB: + raise ValueError("Wandb not installed.") + self.wandb_logger = _get_logger(self.loggers, WandbLogger) + else: + self.wandb_logger = None + + def _log_audio(self, audio: AudioArtifact, log_dir: Path, step: int): + if log_dir: + filepath = log_dir / audio.filename + sf.write(file=filepath, data=audio.data, samplerate=audio.sample_rate) + + if self.tensorboard_logger: + self.tensorboard_logger.add_audio( + tag=audio.id, snd_tensor=audio.data, global_step=step, sample_rate=audio.sample_rate, + ) + + if self.wandb_logger: + wandb_audio = (wandb.Audio(audio.data, sample_rate=audio.sample_rate, caption=audio.id),) + self.wandb_logger.log({audio.id: wandb_audio}) + + def _log_image(self, image: ImageArtifact, log_dir: Path, step: int): + if log_dir: + filepath = log_dir / image.filename + else: + filepath = None + + image_plot = create_plot(output_filepath=filepath, data=image.data, x_axis=image.x_axis, y_axis=image.y_axis) + + if self.tensorboard_logger: + self.tensorboard_logger.add_image( + tag=image.id, img_tensor=image_plot, global_step=step, dataformats="HWC", + ) + + if self.wandb_logger: + wandb_image = (wandb.Image(image_plot, caption=image.id),) + self.wandb_logger.log({image.id: wandb_image}) + + def on_train_epoch_end(self, trainer: Trainer, model: LightningModule): + epoch = 1 + model.current_epoch + if (epoch not in self.log_epochs) and (epoch % self.epoch_frequency != 0): + return + + if self.output_dir: + log_dir = self.output_dir / f"epoch_{epoch}" + log_dir.mkdir(parents=True, exist_ok=True) + else: + log_dir = None + + audio_list = [] + image_list = [] + for batch_dict in self.data_loader: + for key, value in batch_dict.items(): + if isinstance(value, torch.Tensor): + batch_dict[key] = value.to(model.device) + + for generator in self.generators: + audio, images = generator.generate_artifacts(model=model, batch_dict=batch_dict) + audio_list += audio + image_list += images + + for audio in audio_list: + self._log_audio(audio=audio, log_dir=log_dir, step=model.global_step) + + for image in image_list: + self._log_image(image=image, log_dir=log_dir, step=model.global_step) + + +class VocoderArtifactGenerator(ArtifactGenerator): + """ + Generator for logging Vocoder model outputs. + """ + + def generate_artifacts( + self, model: LightningModule, batch_dict: Dict + ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]: + + audio_artifacts = [] + + audio_filepaths = batch_dict.get("audio_filepaths") + audio_ids = [create_id(p) for p in audio_filepaths] + + audio = batch_dict.get("audio") + audio_len = batch_dict.get("audio_lens") + + spec, spec_len = model.audio_to_melspec_precessor(audio, audio_len) + + with torch.no_grad(): + audio_pred = model.forward(spec=spec) + audio_pred = rearrange(audio_pred, "B 1 T -> B T") + + for i, audio_id in enumerate(audio_ids): + audio_pred_i = audio_pred[i][: audio_len[i]].cpu().numpy() + audio_artifact = AudioArtifact( + id=f"audio_{audio_id}", data=audio_pred_i, filename=f"{audio_id}.wav", sample_rate=model.sample_rate, + ) + audio_artifacts.append(audio_artifact) + + return audio_artifacts, [] + + +class FastPitchArtifactGenerator(ArtifactGenerator): + """ + Generator for logging FastPitch model outputs. + + Args: + log_spectrogram: Whether to log predicted spectrograms. + log_alignment: Whether to log alignment graphs. + audio_params: Optional parameters for saving predicted audio. + Requires a vocoder model checkpoint for generating audio from predicted spectrograms. + """ + + def __init__( + self, + log_spectrogram: bool = False, + log_alignment: bool = False, + audio_params: Optional[LogAudioParams] = None, + ): + self.log_spectrogram = log_spectrogram + self.log_alignment = log_alignment + + if not audio_params: + self.log_audio = False + self.log_audio_gta = False + self.vocoder = None + else: + self.log_audio = True + self.log_audio_gta = audio_params.log_audio_gta + self.vocoder = _load_vocoder( + model_name=audio_params.vocoder_name, + checkpoint_path=audio_params.vocoder_checkpoint_path, + type=audio_params.vocoder_type, + ) + + def _generate_audio(self, mels, mels_len, hop_length): + voc_input = mels.to(self.vocoder.device) + with torch.no_grad(): + audio_pred = self.vocoder.convert_spectrogram_to_audio(spec=voc_input) + + mels_len_array = mels_len.cpu().numpy() + audio_pred_lens = librosa.core.frames_to_samples(mels_len_array, hop_length=hop_length) + return audio_pred, audio_pred_lens + + def _generate_predictions(self, model: LightningModule, audio_ids: List[str], batch_dict: Dict): + audio_artifacts = [] + image_artifacts = [] + + text = batch_dict.get("text") + text_lens = batch_dict.get("text_lens") + speaker = batch_dict.get("speaker_id", None) + + with torch.no_grad(): + # [B, C, T_spec] + mels_pred, mels_pred_len, *_ = model.forward(text=text, input_lens=text_lens, speaker=speaker,) + + if self.log_spectrogram: + for i, audio_id in enumerate(audio_ids): + spec_i = mels_pred[i][:, : mels_pred_len[i]].cpu().numpy() + spec_artifact = ImageArtifact( + id=f"spec_{audio_id}", + data=spec_i, + filename=f"{audio_id}_spec.png", + x_axis="Audio Frames", + y_axis="Channels", + ) + image_artifacts.append(spec_artifact) + + if self.log_audio: + # [B, T_audio] + audio_pred, audio_pred_lens = self._generate_audio( + mels=mels_pred, mels_len=mels_pred_len, hop_length=model.preprocessor.hop_length + ) + for i, audio_id in enumerate(audio_ids): + audio_pred_i = audio_pred[i][: audio_pred_lens[i]].cpu().numpy() + audio_artifact = AudioArtifact( + id=f"audio_{audio_id}", + data=audio_pred_i, + filename=f"{audio_id}.wav", + sample_rate=self.vocoder.sample_rate, + ) + audio_artifacts.append(audio_artifact) + + return audio_artifacts, image_artifacts + + def _generate_gta_predictions(self, model: LightningModule, audio_ids: List[str], batch_dict: Dict): + audio_artifacts = [] + image_artifacts = [] + + audio = batch_dict.get("audio") + audio_lens = batch_dict.get("audio_lens") + text = batch_dict.get("text") + text_lens = batch_dict.get("text_lens") + attn_prior = batch_dict.get("align_prior_matrix", None) + pitch = batch_dict.get("pitch", None) + energy = batch_dict.get("energy", None) + speaker = batch_dict.get("speaker_id", None) + + mels, spec_len = model.preprocessor(input_signal=audio, length=audio_lens) + with torch.no_grad(): + mels_pred, mels_pred_len, _, _, _, attn, _, _, _, _, _, _ = model.forward( + text=text, + input_lens=text_lens, + pitch=pitch, + energy=energy, + speaker=speaker, + spec=mels, + mel_lens=spec_len, + attn_prior=attn_prior, + ) + + if self.log_alignment: + attn = rearrange(attn, "B 1 T_spec T_text -> B T_text T_spec") + for i, audio_id in enumerate(audio_ids): + attn_i = attn[i][: text_lens[i], : mels_pred_len[i]].cpu().numpy() + alignment_artifact = ImageArtifact( + id=f"align_{audio_id}", + data=attn_i, + filename=f"{audio_id}_align.png", + x_axis="Audio Frames", + y_axis="Text Tokens", + ) + image_artifacts.append(alignment_artifact) + + if self.log_audio_gta: + # [B, T_audio] + audio_pred, audio_pred_lens = self._generate_audio( + mels=mels_pred, mels_len=mels_pred_len, hop_length=model.preprocessor.hop_length + ) + for i, audio_id in enumerate(audio_ids): + audio_pred_i = audio_pred[i][: audio_pred_lens[i]].cpu().numpy() + audio_artifact = AudioArtifact( + id=f"audio_gta_{audio_id}", + data=audio_pred_i, + filename=f"{audio_id}_gta.wav", + sample_rate=self.vocoder.sample_rate, + ) + audio_artifacts.append(audio_artifact) + + return audio_artifacts, image_artifacts + + def generate_artifacts( + self, model: LightningModule, batch_dict: Dict + ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]: + + audio_artifacts = [] + image_artifacts = [] + audio_filepaths = batch_dict.get("audio_filepaths") + audio_ids = [create_id(p) for p in audio_filepaths] + + if self.log_audio or self.log_spectrogram: + audio_pred, spec_pred = self._generate_predictions(model=model, batch_dict=batch_dict, audio_ids=audio_ids) + audio_artifacts += audio_pred + image_artifacts += spec_pred + + if self.log_audio_gta or self.log_alignment: + audio_gta_pred, alignments = self._generate_gta_predictions( + model=model, batch_dict=batch_dict, audio_ids=audio_ids + ) + audio_artifacts += audio_gta_pred + image_artifacts += alignments + + return audio_artifacts, image_artifacts diff --git a/nemo/collections/tts/parts/utils/helpers.py b/nemo/collections/tts/parts/utils/helpers.py index 3109a9658ba3..b9ea0854e48c 100644 --- a/nemo/collections/tts/parts/utils/helpers.py +++ b/nemo/collections/tts/parts/utils/helpers.py @@ -54,6 +54,7 @@ from nemo.collections.tts.torch.tts_data_types import DATA_STR2DATA_CLASS, MAIN_DATA_TYPES, WithLens from nemo.utils import logging +from nemo.utils.decorators import deprecated HAVE_WANDB = True try: @@ -484,6 +485,23 @@ def plot_spectrogram_to_numpy(spectrogram): return data +def create_plot(data, x_axis, y_axis, output_filepath=None): + fig, ax = plt.subplots(figsize=(12, 3)) + im = ax.imshow(data, aspect="auto", origin="lower", interpolation="none") + plt.colorbar(im, ax=ax) + plt.xlabel(x_axis) + plt.ylabel(y_axis) + plt.tight_layout() + + if output_filepath: + plt.savefig(output_filepath, format="png") + + fig.canvas.draw() + data = save_figure_to_numpy(fig) + plt.close() + return data + + def plot_gate_outputs_to_numpy(gate_targets, gate_outputs): fig, ax = plt.subplots(figsize=(12, 3)) ax.scatter( @@ -808,3 +826,14 @@ def sample_tts_input( 0, export_config["num_speakers"], (max_batch,), device=device, dtype=torch.int64 ) return inputs + + +@deprecated( + explanation="But it will not be removed until a further notice. G2P object root directory " + "`nemo_text_processing.g2p` has been replaced with `nemo.collections.tts.g2p`. " + "Please use the latter instead as of NeMo 1.18.0." +) +def g2p_backward_compatible_support(g2p_target: str) -> str: + # for backward compatibility + g2p_target_new = g2p_target.replace("nemo_text_processing.g2p", "nemo.collections.tts.g2p") + return g2p_target_new diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index 5c6da1508062..c70b1e408d7d 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -23,6 +23,7 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingSampler +from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule from nemo.collections.nlp.modules.common.megatron.utils import ( @@ -191,6 +192,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) self._nsys_profile_start_step *= grad_accum_steps self._nsys_profile_end_step *= grad_accum_steps + self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) def get_module_list(self): if isinstance(self.model, list): @@ -323,6 +326,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): no_sync_func=no_sync_func, grad_sync_func=grad_sync_func, param_sync_func=param_sync_func, + overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False), + batch_p2p_comm=self.cfg.get('batch_p2p_comm', True), ) # only the last stages of the pipeline return losses @@ -349,6 +354,30 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): return loss_mean, accuracy_mean + def initialize_ub_func(self): + raise NotImplementedError("userbuffer is not implemented!") + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None) + ub_cfgs = None + if ub_cfg_file_name is not None: + try: + import yaml + + with open(ub_cfg_file_name, 'r') as ub_cfg_file: + ub_cfgs = yaml.safe_load(ub_cfg_file) + except (ImportError, TypeError): + logging.error(f"Fail to read ub_tp_comm_overlap config file: {ub_cfg_file_name}.") + te_module.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False + def training_step(self, dataloader_iter, batch_idx): """ Our dataloaders produce a micro-batch and then we fetch @@ -358,6 +387,9 @@ def training_step(self, dataloader_iter, batch_idx): Microbatches are then moved to GPU during the pipeline. The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() @@ -515,6 +547,10 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + loss, accuracy = self.fwd_bwd_step(dataloader_iter, batch_idx, True) return loss, accuracy diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py index aefa5287bf83..988d030c9e58 100644 --- a/nemo/collections/vision/models/vision_base_model.py +++ b/nemo/collections/vision/models/vision_base_model.py @@ -28,6 +28,7 @@ from pytorch_lightning.utilities.migration import pl_legacy_patch from transformers import TRANSFORMERS_CACHE +from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION from nemo.collections.nlp.modules.common.megatron.clip_grads import ( clip_grad_norm_distributed_optimizer, clip_grad_norm_fp32, @@ -165,17 +166,17 @@ def load_from_checkpoint( class MegatronVisionModel(VisionModel): """ Megatron vision base class - It does the following things: - 1. Initialize the model parallel for nemo given the model parallel parameters. - 2. Turn on all the nvidia optimizations. - 3. If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the correct size for tensor model parallelism. - 4. If using distributed optimizer, configure to be compatible with - O2-level optimizations and/or model parallelism. - 5. Perform gradient clipping: `grad_clip_pl_default` triggers the - PyTorch Lightning default implementation, `with_distributed_adam` - triggers the distributed optimizer's implementation, - `megatron_amp_o2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. + + - Initialize the model parallel world for nemo. + - Turn on all of the nvidia optimizations. + - If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the + correct size for tensor model parallelism. + - If using distributed optimizer, configure to be compatible + with O2 level optimizations and/or model parallelism. + - Perform gradient clipping: `grad_clip_pl_default` triggers + the PyTorch Lightning default implementation, `with_distributed_adam` triggers + the distributed optimizer's implementation, `megatron_amp_O2` triggers gradient clipping on the main grads, + and otherwise gradient clipping is performed on the model grads. """ def __init__(self, cfg: DictConfig, trainer: Trainer): @@ -188,6 +189,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if trainer is None: raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") + if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION: + raise ImportError( + "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention." + "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202." + ) + super().__init__(cfg, trainer=trainer) self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' @@ -224,6 +231,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): global_batch_size=cfg.get('global_batch_size'), rampup_batch_size=cfg.get('rampup_batch_size'), use_fp8=cfg.get('fp8', False), + init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -241,6 +249,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): "default_on_epoch": False, } + self.gc_interval = cfg.get('gc_interval', 0) + assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." + # If gc_interval > 0, memory garbage collection is manually controlled. + # The automatic garbage collector sould be disabled before training starts. + if self.gc_interval > 0: + gc.disable() + self.validation_global_step = 1 + def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" @@ -278,14 +294,27 @@ def on_train_start(self) -> None: super().on_train_start() self.init_global_step = self.trainer.global_step - def _get_parameters(self): + def on_validation_start(self) -> None: + super().on_validation_start() + if self.gc_interval > 0: + gc.collect() + + def on_validation_end(self) -> None: + super().on_validation_end() + if self.gc_interval > 0: + gc.collect() + + def get_parameters_with_grad(self): """ - private method to load all the trainable parameters from optimizer param groups + Get all parameters with grad from optimizer param groups """ params = [] for param_group in self._optimizer_param_groups: for param in param_group['params']: - params.append(param) + if ( + param.grad is not None + ): # (@adithyare) adapter training with pp>1 can result in params with no grads + params.append(param) return params def configure_gradient_clipping(self, *args, **kwargs): @@ -309,9 +338,9 @@ def configure_gradient_clipping(self, *args, **kwargs): else: if self.megatron_amp_O2: # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters() + parameters = self._optimizer.get_parameters_with_grad() else: - parameters = self._get_parameters() + parameters = self.get_parameters_with_grad() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) @@ -340,7 +369,7 @@ def allreduce_gradients(self): for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): buf.copy_(synced) - def reduce_overlap_gradients(self): + def reduce_overlap_gradients(self, params=None): """Reduce grads if overlapped grad sync is enabled Used for pipeline parallelism with the distributed Adam @@ -395,6 +424,17 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus # accumulated gradient updates. grad_scaler.optimizer_update_skipped = None + if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): + gc.collect() + + def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: + super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) + + if self.gc_interval > 0: + if self.validation_global_step % self.gc_interval == 0: + gc.collect() + self.validation_global_step += 1 + def setup_optimization( self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): @@ -573,6 +613,14 @@ def _validate_and_override_config(self): 'Make sure the number of model chunks is the same across all pipeline stages.' ) + if self.cfg.get('ub_tp_comm_overlap', False): + if not self.cfg.get('transformer_engine', False) or not self.cfg.get('sequence_parallel', False): + logging.info( + "Userbuffer tensor-parallel communication overlap is available with both Transformer Engine and sequence-parallelism." + ) + with open_dict(self.cfg): + self.cfg.ub_tp_comm_overlap = False + def is_data_parallel_rank_zero(self): if is_global_rank_zero(): return True diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py index b1ab196ada78..a04111d8201c 100644 --- a/nemo/collections/vision/modules/common/megatron/vision_transformer.py +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -129,6 +129,7 @@ def __init__( sequence_parallel=False, gradient_accumulation_fusion=False, normalize_attention_scores=True, + use_flash_attention=False, ): kwargs = locals() for key in ["self", "__class__"]: @@ -394,6 +395,8 @@ def __init__( sequence_parallel=False, gradient_accumulation_fusion=False, normalize_attention_scores=True, + ub_tp_comm_overlap=False, + use_flash_attention=False, ): kwargs = locals() for key in ["self", "__class__"]: @@ -452,6 +455,7 @@ def build_layer(layer_number): sequence_parallel=sequence_parallel, gradient_accumulation_fusion=gradient_accumulation_fusion, normalize_attention_scores=normalize_attention_scores, + use_flash_attention=use_flash_attention, ) if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index 661eeec8b3f3..37bb97a818ea 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -302,6 +302,8 @@ def __init__( activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, gradient_accumulation_fusion=model_cfg.gradient_accumulation_fusion, activation=model_cfg.get('activation', 'gelu'), + ub_tp_comm_overlap=model_cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=model_cfg.get('use_flash_attention', False), ) def set_input_tensor(self, input_tensor): diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py index 38b8e1c1e31b..8469e80219d6 100644 --- a/nemo/core/classes/exportable.py +++ b/nemo/core/classes/exportable.py @@ -204,7 +204,7 @@ def _export( check_trace=check_trace, check_tolerance=check_tolerance, ) - jitted_model = torch.jit.optimize_for_inference(torch.jit.freeze(jitted_model)) + jitted_model = torch.jit.freeze(jitted_model) if verbose: logging.info(f"JIT code:\n{jitted_model.code}") jitted_model.save(output) @@ -302,3 +302,17 @@ def list_export_subnets(self): First goes the one receiving input (input_example) """ return ['self'] + + def get_export_config(self): + """ + Returns export_config dictionary + """ + return getattr(self, 'export_config', {}) + + def set_export_config(self, args): + """ + Sets/updates export_config dictionary + """ + ex_config = self.get_export_config() + ex_config.update(args) + self.export_config = ex_config diff --git a/nemo/core/classes/mixins/adapter_mixins.py b/nemo/core/classes/mixins/adapter_mixins.py index 3d789be7dc61..557c43fb5813 100644 --- a/nemo/core/classes/mixins/adapter_mixins.py +++ b/nemo/core/classes/mixins/adapter_mixins.py @@ -143,7 +143,9 @@ class AdapterModuleMixin(ABC): - `adapter_metadata_cfg_key`: A str representing a key in the model config that is used to preserve the metadata of the adapter config. - **Note**: This module is **not** responsible for maintaining its config. Subclasses must ensure config is updated + .. note:: + + This module is **not** responsible for maintaining its config. Subclasses must ensure config is updated or preserved as needed. It is the responsibility of the subclasses to propagate the most up to date config to lower layers. """ @@ -435,8 +437,6 @@ def forward_enabled_adapters(self, input: 'torch.Tensor'): Utilizes the implicit merge strategy of each adapter when computing the adapter's output, and how that output will be merged back with the original input. - **Note**: - Args: input: The output tensor of the calling module is the input to the first adapter, whose output is then chained to the next adapter until all adapters are consumed. @@ -519,7 +519,9 @@ def forward_single_enabled_adapter_( """ Perform the forward step of a single adapter module on some input data. - **Note**: Subclasses can override this method to accommodate more complicate adapter forward steps. + .. note:: + + Subclasses can override this method to accommodate more complicate adapter forward steps. Args: input: input: The output tensor of the calling module is the input to the first adapter, whose output @@ -756,8 +758,10 @@ def save_adapters(self, filepath: str, name: str = None): Utility method that saves only the adapter module(s), and not the entire model itself. This allows the sharing of adapters which are often just a fraction of the size of the full model, enabling easier deliver. + + .. note:: - Note: The saved file is a pytorch compatible pickle file, containing the state dicts of the adapter(s), + The saved file is a pytorch compatible pickle file, containing the state dicts of the adapter(s), as well as a binary representation of the adapter config. Args: @@ -835,7 +839,9 @@ def load_adapters(self, filepath: str, name: str = None, map_location: str = Non This allows the sharing of adapters which are often just a fraction of the size of the full model, enabling easier deliver. - Note: During restoration, assumes that the model does not currently already have an adapter with + .. note:: + + During restoration, assumes that the model does not currently already have an adapter with the name (if provided), or any adapter that shares a name with the state dict's modules (if name is not provided). This is to ensure that each adapter name is globally unique in a model. @@ -964,7 +970,9 @@ def adapter_module_names(self) -> List[str]: """ List of valid adapter modules that are supported by the model. - **Note**: Subclasses should override this property and return a list of str names, of all the modules + .. note:: + + Subclasses should override this property and return a list of str names, of all the modules that they support, which will enable users to determine where to place the adapter modules. Returns: diff --git a/nemo/core/optim/distributed_adam.py b/nemo/core/optim/distributed_adam.py index 1f2ce90f3ff7..706bc48774e3 100644 --- a/nemo/core/optim/distributed_adam.py +++ b/nemo/core/optim/distributed_adam.py @@ -19,6 +19,7 @@ from apex.contrib.optimizers.distributed_fused_adam import ( DistributedFusedAdam, _coalescing_manager, + _coalescing_manager_append_work, _disable_pre_forward_hook, ) from megatron.core import parallel_state @@ -76,24 +77,27 @@ def __init__(self, params, disable_distributed_parameters=False, **kwargs): distopt_param_groups = param_groups dtype = kwargs['dtype'] if 'dtype' in kwargs else torch.float32 grad_sync_dtype = kwargs['grad_sync_dtype'] if 'grad_sync_dtype' in kwargs else dtype - needs_fp32_optimizer = any( - getattr(param, '_with_fp32_optimizer', False) - for param in itertools.chain.from_iterable(param_group['params'] for param_group in param_groups) - ) - if (dtype != torch.float32 or grad_sync_dtype != torch.float32) and needs_fp32_optimizer: + needs_fp32_optimizer = dtype != torch.float32 or grad_sync_dtype != torch.float32 + if needs_fp32_optimizer: + needs_fp32_optimizer = any( + any(getattr(param, '_with_fp32_optimizer', False) for param in param_group['params']) + for param_group in param_groups + ) + if needs_fp32_optimizer: # Find params that require explicit FP32 optimizer distopt_param_groups = [] fp32_param_groups = [] self._fp32_optim_main_params = collections.OrderedDict() for param_group in param_groups: - distopt_param_group = {key: val for key, val in param_group.items() if key != 'params'} + distopt_param_group = param_group.copy() distopt_param_group['params'] = [] - fp32_param_group = {key: val for key, val in param_group.items() if key != 'params'} + fp32_param_group = param_group.copy() fp32_param_group['params'] = [] for model_param in param_group['params']: if getattr(model_param, '_with_fp32_optimizer', False): main_param = model_param.detach().clone().float() + model_param.main_grad = main_param.grad fp32_param_group['params'].append(main_param) self._fp32_optim_main_params[model_param] = main_param else: @@ -101,6 +105,9 @@ def __init__(self, params, disable_distributed_parameters=False, **kwargs): distopt_param_groups.append(distopt_param_group) fp32_param_groups.append(fp32_param_group) + # Add callback hook so grads accumulate into FP32 buffer + self._fp32_register_post_backward_hooks() + # Construct explicit FP32 optimizer adamw_kwargs = {} for name in ('lr', 'betas', 'eps', 'weight_decay', 'amsgrad'): @@ -112,6 +119,30 @@ def __init__(self, params, disable_distributed_parameters=False, **kwargs): # Construct distributed optimizer super().__init__(distopt_param_groups, **kwargs) + def _fp32_register_post_backward_hooks(self): + """Attach hooks for FP32 gradients""" + + # Helper function to avoid issues with late binding closures + def make_post_backward_hook(param): + def post_backward_hook(*unused): + self._fp32_optim_grad_sync_needed = True + if hasattr(param, 'main_grad'): + with torch.no_grad(): + if param.grad is not None: + param.main_grad += param.grad + param.grad = None + + return post_backward_hook + + # Construct hooks and register with params + self._fp32_grad_accs = [] + for param in self._fp32_optim_main_params.keys(): + param_tmp = param.expand_as(param) + grad_acc = param_tmp.grad_fn.next_functions[0][0] + hook = make_post_backward_hook(param) + grad_acc.register_hook(hook) + self._fp32_grad_accs.append(grad_acc) + def _make_post_backward_hook(self, param, param_group_id, param_id): def hook(*unused): if getattr(param, '_pre_forward_hook_is_enabled', False): @@ -173,16 +204,15 @@ def _fp32_optim_grad_sync(self): for model_param, main_param in self._fp32_optim_main_params.items(): if model_param.grad is not None: main_param.grad += model_param.grad.detach() - sync_requests = [] - with _coalescing_manager(self.process_group, self.device, sync_requests): + with _coalescing_manager(self.process_group, self.device, async_ops=True) as cm: for main_param in self._fp32_optim_main_params.values(): - sync_requests.append( + _coalescing_manager_append_work( + cm, torch.distributed.all_reduce( main_param.grad, op=torch.distributed.ReduceOp.AVG, group=self.process_group, async_op=True, - ) + ), ) - for req in sync_requests: - req.wait() + cm.wait() self._fp32_optim_grad_sync_needed = False def zero_grad(self, *args, **kwargs): diff --git a/nemo/core/optim/lr_scheduler.py b/nemo/core/optim/lr_scheduler.py index c454e6290477..73ad1e18a94b 100644 --- a/nemo/core/optim/lr_scheduler.py +++ b/nemo/core/optim/lr_scheduler.py @@ -975,5 +975,6 @@ def compute_max_steps( } EPOCH_SCHEDULERS = { + 'ExponentialLR': pt_scheduler.ExponentialLR, 'ReduceLROnPlateau': pt_scheduler.ReduceLROnPlateau, } diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py index 6f0a9b513337..44d54a0e63ff 100644 --- a/nemo/core/optim/optimizer_with_main_params.py +++ b/nemo/core/optim/optimizer_with_main_params.py @@ -488,11 +488,11 @@ def async_master_grads_allreudce(self): def fp32_grad_accumulation(self): return self._fp32_grad_accum - def get_parameters(self): + def get_parameters_with_grad(self): params = [] for param_group in self.optimizer.param_groups: for param in param_group['params']: - if param is not None: + if param.grad is not None: # (@adithyare) added to enable pp>1 training for adapters params.append(param) return params diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index 76e47e20e0cc..9473ef0af969 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -51,7 +51,6 @@ AVAILABLE_OPTIMIZERS['fused_adam'] = FusedAdam except ModuleNotFoundError: HAVE_APEX = False - logging.warning("Apex was not found. Using the lamb or fused_adam optimizer will error out.") HAVE_APEX_DISTRIBUTED_ADAM = False if HAVE_APEX: diff --git a/nemo/core/utils/k2_guard.py b/nemo/core/utils/k2_guard.py index df4a01b03963..a9f64ce39c6b 100644 --- a/nemo/core/utils/k2_guard.py +++ b/nemo/core/utils/k2_guard.py @@ -20,25 +20,16 @@ """ import textwrap -from typing import Tuple from packaging.version import Version from pytorch_lightning.utilities.imports import package_available +from nemo.core.utils.k2_utils import K2_INSTALLATION_MESSAGE __K2_MINIMUM_MAJOR_VERSION = 1 __K2_MINIMUM_MINOR_VERSION = 14 __K2_MINIMUM_VERSION = Version(f"{__K2_MINIMUM_MAJOR_VERSION}.{__K2_MINIMUM_MINOR_VERSION}") -K2_INSTALLATION_MESSAGE = ( - "Could not import `k2`.\n" - "Please install k2 in one of the following ways:\n" - "1) Run `bash scripts/speech_recognition/k2/setup.sh`\n" - "2) (not recommended) Use any approach from https://k2-fsa.github.io/k2/installation/index.html " - "if your your cuda and pytorch versions are supported.\n" - "It is advised to always install k2 using setup.sh only, " - "as different versions of k2 may not interact with the NeMo code as expected." -) if not package_available("k2"): raise ModuleNotFoundError("Module k2 is not available.\n" + K2_INSTALLATION_MESSAGE) diff --git a/nemo/core/utils/k2_utils.py b/nemo/core/utils/k2_utils.py new file mode 100644 index 000000000000..3dff6a35d3e3 --- /dev/null +++ b/nemo/core/utils/k2_utils.py @@ -0,0 +1,24 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +K2_INSTALLATION_MESSAGE = ( + "Could not import `k2`.\n" + "Please install k2 in one of the following ways:\n" + "1) (recommended) Run `bash scripts/speech_recognition/k2/setup.sh`\n" + "2) Use any approach from https://k2-fsa.github.io/k2/installation/index.html " + "if your your cuda and pytorch versions are supported.\n" + "It is advised to always install k2 using setup.sh only, " + "as different versions of k2 may not interact with the NeMo code as expected." +) diff --git a/nemo/core/utils/numba_utils.py b/nemo/core/utils/numba_utils.py index 6e1a8cb247d6..9117b2ea1010 100644 --- a/nemo/core/utils/numba_utils.py +++ b/nemo/core/utils/numba_utils.py @@ -17,6 +17,8 @@ import operator import os +from typing import Tuple, Union + from nemo.utils import model_utils # Prevent Numba CUDA logs from showing at info level @@ -26,6 +28,8 @@ __NUMBA_DEFAULT_MINIMUM_VERSION__ = "0.53.0" __NUMBA_MINIMUM_VERSION__ = os.environ.get("NEMO_NUMBA_MINVER", __NUMBA_DEFAULT_MINIMUM_VERSION__) +__NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__ = "0.57.0" + NUMBA_INSTALLATION_MESSAGE = ( "Could not import `numba`.\n" @@ -148,6 +152,39 @@ def numba_cuda_is_supported(min_version: str) -> bool: return False +def is_numba_cuda_fp16_supported(return_reason: bool = False) -> Union[bool, Tuple[bool, str]]: + """ + Utility method that returns a bool, stating if FP16 is supported for numba cuda kernels or not. + + Returns: + bool, whether Numba CUDA will support fp16 or not. + """ + reason = "" + use_nvidia_binding = os.environ.get('NUMBA_CUDA_USE_NVIDIA_BINDING', None) + if use_nvidia_binding is not None: + use_nvidia_binding = use_nvidia_binding.lower() == "1" + reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is available and set to `1`. " + else: + use_nvidia_binding = False + reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is not available or has not set to `1`." + + numba_fp16_version_correct = model_utils.check_lib_version( + 'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge + )[0] + + if numba_fp16_version_correct: + reason += f"Numba CUDA FP16 is supported in installed numba version." + else: + reason += f"Numba CUDA FP16 is not supported in installed numba version." + + result = use_nvidia_binding and numba_fp16_version_correct + + if return_reason: + return result, reason + else: + return result + + def skip_numba_cuda_test_if_unsupported(min_version: str): """ Helper method to skip pytest test case if numba cuda is not supported. diff --git a/nemo/package_info.py b/nemo/package_info.py index d77e3046359b..fc465c89cfdb 100644 --- a/nemo/package_info.py +++ b/nemo/package_info.py @@ -14,7 +14,7 @@ MAJOR = 1 -MINOR = 19 +MINOR = 20 PATCH = 0 PRE_RELEASE = '' diff --git a/nemo/utils/app_state.py b/nemo/utils/app_state.py index c3ead0bff48f..d06e1ac32e36 100644 --- a/nemo/utils/app_state.py +++ b/nemo/utils/app_state.py @@ -55,6 +55,7 @@ def __init__(self): self._data_parallel_group = None self._megatron_checkpoint_version = None self._use_fp8 = False + self._init_mpi_proc_gruop = False self._random_seed = None @@ -363,6 +364,22 @@ def use_fp8(self, use_fp8): """ self._use_fp8 = use_fp8 + @property + def init_mpi_proc_group(self): + """ Property sets the initialization of mpi process group. + Returns: + Initialize mpi process group. + """ + return self._init_mpi_proc_group + + @init_mpi_proc_group.setter + def init_mpi_proc_group(self, init_mpi_proc_group): + """ Property sets the initialization of mpi process group. + Args: + init_mpi_proc_group: Initialize mpi process group. + """ + self._init_mpi_proc_group = init_mpi_proc_group + @property def random_seed(self): """ Property returns the random seed. diff --git a/nemo/utils/decorators/experimental.py b/nemo/utils/decorators/experimental.py index 35b26fb8690d..de62dbaf9ffb 100644 --- a/nemo/utils/decorators/experimental.py +++ b/nemo/utils/decorators/experimental.py @@ -15,19 +15,13 @@ __all__ = ['experimental'] -from nemo.utils import logging - -def experimental(cls): - """ Decorator which indicates that module is experimental. - Use it to mark experimental or research modules. - """ +import wrapt - def wrapped(cls): - logging.warning( - f'Module {cls} is experimental, not ready for production and is not fully supported. Use at your own risk.' - ) +from nemo.utils import logging - return cls - return wrapped(cls=cls) +@wrapt.decorator +def experimental(wrapped, instance, args, kwargs): + logging.warning(f"`{wrapped}` is experimental and not ready for production yet. Use at your own risk.") + return wrapped(*args, **kwargs) diff --git a/nemo/utils/export_utils.py b/nemo/utils/export_utils.py index 9fa2bc239eb8..60203ed6b9f0 100644 --- a/nemo/utils/export_utils.py +++ b/nemo/utils/export_utils.py @@ -309,7 +309,8 @@ def replace_FusedScaleMaskSoftmax(n: nn.Module) -> Optional[nn.Linear]: Equivalent LayerNorm module """ if not isinstance(n, FusedScaleMaskSoftmax): - raise ValueError("This function can only change the FusedScaleMaskSoftmax module.") + logging.warning("This function can only change the FusedScaleMaskSoftmax module.") + return n # disable the fusion only mod = FusedScaleMaskSoftmax( diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py index 211ffdcdf11e..42a0b108944d 100644 --- a/nemo/utils/model_utils.py +++ b/nemo/utils/model_utils.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import importlib import os from dataclasses import dataclass, is_dataclass from enum import Enum @@ -554,7 +555,7 @@ def check_lib_version(lib_name: str, checked_version: str, operator) -> Tuple[Op if '.' in lib_name: mod = import_class_by_path(lib_name) else: - mod = __import__(lib_name) + mod = importlib.import_module(lib_name) if hasattr(mod, '__version__'): lib_ver = version.Version(mod.__version__) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 9d4fab43186b..7481e337c999 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,6 +1,6 @@ huggingface_hub numba -numpy>=1.22 +numpy>=1.22,<1.24 onnx>=1.7.0 python-dateutil ruamel.yaml diff --git a/requirements/requirements_asr.txt b/requirements/requirements_asr.txt index fdeaeb2d450d..011862ad723b 100644 --- a/requirements/requirements_asr.txt +++ b/requirements/requirements_asr.txt @@ -1,7 +1,6 @@ braceexpand editdistance g2p_en -inflect ipywidgets jiwer kaldi-python-io diff --git a/requirements/requirements_common.txt b/requirements/requirements_common.txt index 29d8ac4dd49b..a4d343a32d1a 100644 --- a/requirements/requirements_common.txt +++ b/requirements/requirements_common.txt @@ -1,4 +1,6 @@ +inflect pandas +pydantic<2 # remove after inflect supports Pydantic 2.0+ sacremoses>=0.0.43 sentencepiece<1.0.0 youtokentome>=1.0.5 diff --git a/requirements/requirements_lightning.txt b/requirements/requirements_lightning.txt index 100216aebc54..9c41c355e8cd 100644 --- a/requirements/requirements_lightning.txt +++ b/requirements/requirements_lightning.txt @@ -1,7 +1,6 @@ hydra-core>=1.2.0,<1.3 omegaconf>=2.2,<2.3 pytorch-lightning>=1.9.0,<=1.9.4 -pyyaml<6 # Pinned until omegaconf works with pyyaml>=6 torchmetrics>=0.11.0 transformers>=4.0.1 wandb diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index d88280b363c2..68d8b8985748 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -5,16 +5,13 @@ fasttext flask_restful ftfy gdown -gradio==3.28.3 h5py ijson -inflect jieba markdown2 matplotlib>=3.3.2 -megatron_core==0.1.0 +megatron_core==0.2.0 nltk>=3.6.5 -numpy opencc pangu rapidfuzz diff --git a/requirements/requirements_tts.txt b/requirements/requirements_tts.txt index 20484871ee4b..bb330aaf2e58 100644 --- a/requirements/requirements_tts.txt +++ b/requirements/requirements_tts.txt @@ -1,6 +1,5 @@ attrdict einops -inflect jieba kornia librosa diff --git a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py index 6e992f5348ae..a38c33de05af 100644 --- a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py +++ b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py @@ -1,76 +1,79 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Use this file to create a lexicon file for Flashlight decoding from an existing KenLM arpa file -# A lexicon file is required for Flashlight decoding in most cases, as it acts as a map from the words -# in you arpa file to the representation used by your ASR AM. -# For more details, see: https://github.com/flashlight/flashlight/tree/main/flashlight/app/asr#data-preparation -# -# Usage: python create_lexicon_from_arpa.py --arpa /path/to/english.arpa --model /path/to/model.nemo --lower -# -# - - -import argparse -import os -import re - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Utility script for generating lexicon file from a KenLM arpa file") - parser.add_argument("--arpa", required=True, help="path to your arpa file") - parser.add_argument("--dst", help="directory to store generated lexicon", default=None) - parser.add_argument("--lower", action='store_true', help="Whether to lowercase the arpa vocab") - parser.add_argument("--model", default=None, help="path to Nemo model for its tokeniser") - - args = parser.parse_args() - - if not os.path.exists(args.arpa): - print("ARPA file not detected on disk, aborting!", flush=True) - exit(255) - - if args.dst is not None: - save_path = args.dst - else: - save_path = os.path.dirname(args.arpa) - os.makedirs(save_path, exist_ok=True) - - tokenizer = None - if args.model is not None: - from nemo.collections.asr.models import ASRModel - - model = ASRModel.restore_from(restore_path=args.model, map_location='cpu') - if hasattr(model, 'tokenizer'): - tokenizer = model.tokenizer - else: - print('WARNING: supplied Nemo model does not contain a tokenizer', flush=True) - - lex_file = os.path.join(save_path, os.path.splitext(os.path.basename(args.arpa))[0] + '.lexicon') - print(f"Writing Lexicon file - {lex_file}...", flush=True) - with open(lex_file, "w", encoding='utf_8', newline='\n') as f: - with open(args.arpa, "r", encoding='utf_8') as arpa: - for line in arpa: - # verify if the line corresponds to unigram - if not re.match(r"[-]*[0-9\.]+\t\S+\t*[-]*[0-9\.]*$", line): - continue - word = line.split("\t")[1] - word = word.strip().lower() if args.lower else word.strip() - if word == "" or word == "" or word == "" or word == "": - continue - - if tokenizer is None: - f.write("{w}\t{s}\n".format(w=word, s=" ".join(word))) - else: - f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word)))) - - print("Done!", flush=True) +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Use this file to create a lexicon file for Flashlight decoding from an existing KenLM arpa file +# A lexicon file is required for Flashlight decoding in most cases, as it acts as a map from the words +# in you arpa file to the representation used by your ASR AM. +# For more details, see: https://github.com/flashlight/flashlight/tree/main/flashlight/app/asr#data-preparation +# +# Usage: python create_lexicon_from_arpa.py --arpa /path/to/english.arpa --model /path/to/model.nemo --lower +# +# + + +import argparse +import os +import re + +from nemo.utils import logging + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Utility script for generating lexicon file from a KenLM arpa file") + parser.add_argument("--arpa", required=True, help="path to your arpa file") + parser.add_argument("--dst", help="directory to store generated lexicon", default=None) + parser.add_argument("--lower", action='store_true', help="Whether to lowercase the arpa vocab") + parser.add_argument("--model", default=None, help="path to Nemo model for its tokeniser") + + args = parser.parse_args() + + if not os.path.exists(args.arpa): + logging.critical(f"ARPA file [ {args.arpa} ] not detected on disk, aborting!") + exit(255) + + if args.dst is not None: + save_path = args.dst + else: + save_path = os.path.dirname(args.arpa) + os.makedirs(save_path, exist_ok=True) + + tokenizer = None + if args.model is not None: + from nemo.collections.asr.models import ASRModel + + model = ASRModel.restore_from(restore_path=args.model, map_location='cpu') + if hasattr(model, 'tokenizer'): + tokenizer = model.tokenizer + else: + logging.warning('Supplied Nemo model does not contain a tokenizer') + + lex_file = os.path.join(save_path, os.path.splitext(os.path.basename(args.arpa))[0] + '.lexicon') + + logging.info(f"Writing Lexicon file to: {lex_file}...") + with open(lex_file, "w", encoding='utf_8', newline='\n') as f: + with open(args.arpa, "r", encoding='utf_8') as arpa: + for line in arpa: + # verify if the line corresponds to unigram + if not re.match(r"[-]*[0-9\.]+\t\S+\t*[-]*[0-9\.]*$", line): + continue + word = line.split("\t")[1] + word = word.strip().lower() if args.lower else word.strip() + if word == "" or word == "" or word == "" or word == "": + continue + + if tokenizer is None: + f.write("{w}\t{s}\n".format(w=word, s=" ".join(word))) + else: + w_ids = tokenizer.text_to_ids(word) + if tokenizer.unk_id not in w_ids: + f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word)))) diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh old mode 100644 new mode 100755 index c1a94df53a41..3ba337a6afd3 --- a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh +++ b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh @@ -14,6 +14,8 @@ # limitations under the License. # Use this script to install KenLM, OpenSeq2Seq decoder, Flashlight decoder +shopt -s expand_aliases + NEMO_PATH=/workspace/nemo # Path to NeMo folder: /workspace/nemo if you use NeMo/Dockerfile if [ "$#" -eq 1 ] then @@ -22,7 +24,17 @@ fi KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_os2s_decoders.py cd $NEMO_PATH -apt-get update && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # needed for flashlight decoder + +if [ $(id -u) -eq 0 ]; then + alias aptupdate='apt-get update' + alias b2install='./b2' + else + alias aptupdate='sudo apt-get update' + alias b2install='sudo ./b2' +fi + +aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder + git clone https://github.com/NVIDIA/OpenSeq2Seq cd OpenSeq2Seq @@ -31,14 +43,12 @@ cd .. mv OpenSeq2Seq/decoders . rm -rf OpenSeq2Seq cd decoders -# patch setup code to support the recent distutils -sed -i 's/, distutils/, distutils\nimport distutils.ccompiler/g' setup.py cp $NEMO_PATH/scripts/installers/setup_os2s_decoders.py ./setup.py ./setup.sh # install Boost package for KenLM -wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && ./b2 --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE +wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && b2install --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE export BOOST_ROOT=$NEMO_PATH/decoders/boost_1_80_0 # install KenLM diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index 07ceccb8b3d5..e953dec02b7a 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -458,7 +458,7 @@ def find_best_confidence( return best_conf_spec.to_confidence_config(), best_pipe -@hydra_runner(schema=BuildEnsembleConfig) +@hydra_runner(config_name="BuildEnsembleConfig", schema=BuildEnsembleConfig) def main(cfg: BuildEnsembleConfig): # silencing all messages from nemo/ptl to avoid dumping tons of configs to the stdout logging.getLogger('pytorch_lightning').setLevel(logging.CRITICAL) @@ -471,12 +471,10 @@ def main(cfg: BuildEnsembleConfig): pl.seed_everything(cfg.random_seed) cfg.transcription.random_seed = None # seed is already applied cfg.transcription.return_transcriptions = True - # that sets preserve_alignment to True - cfg.transcription.compute_timestamps = True + cfg.transcription.preserve_alignment = True cfg.transcription.ctc_decoding.temperature = cfg.temperature cfg.transcription.rnnt_decoding.temperature = cfg.temperature # this ensures that generated output is after log-softmax for consistency with CTC - cfg.transcription.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True train_confidences = [] dev_confidences = [] diff --git a/scripts/confidence_ensembles/test_confidence_ensembles.py b/scripts/confidence_ensembles/test_confidence_ensembles.py index b665375c0c33..fa537529ab6b 100644 --- a/scripts/confidence_ensembles/test_confidence_ensembles.py +++ b/scripts/confidence_ensembles/test_confidence_ensembles.py @@ -113,4 +113,4 @@ def test_confidence_ensemble(tmp_path, build_args): ) results = speech_to_text_eval.main(eval_cfg) - assert results.metric_value < 0.15 # relaxed check for better than 15% WER + assert results.metric_value < 0.20 # relaxed check for better than 20% WER diff --git a/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py b/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py index 785f8d59a80f..5803efd58fc2 100644 --- a/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py +++ b/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py @@ -56,6 +56,7 @@ def main(): parser.add_argument("--save-name-base", type=str, default="squad") parser.add_argument("--include-topic-name", action='store_true') parser.add_argument("--random-seed", type=int, default=1234) + parser.add_argument("--sft-format", action='store_true') args = parser.parse_args() train_data_dict = json.load(open(f"{args.data_dir}/{args.train_file}")) @@ -65,21 +66,21 @@ def main(): save_name_base = f"{args.data_dir}/{args.save_name_base}" - process_data(train_data, val_data, save_name_base, args.include_topic_name) + process_data(train_data, val_data, save_name_base, args.include_topic_name, args.sft_format) -def process_data(train_data, val_data, save_name_base, include_topic): - train_set = extract_questions(train_data, include_topic, split="train") - val_set = extract_questions(val_data, include_topic, split="val") - test_set = extract_questions(val_data, include_topic, split="test") +def process_data(train_data, val_data, save_name_base, include_topic, sft_format): + train_set = extract_questions(train_data, include_topic, sft_format, split="train") + val_set = extract_questions(val_data, include_topic, sft_format, split="val") + test_set = extract_questions(val_data, include_topic, sft_format, split="test") - gen_file(train_set, save_name_base, 'train') - gen_file(val_set, save_name_base, 'val') - gen_file(test_set, save_name_base, 'test', make_ground_truth=True) - gen_file(test_set, save_name_base, 'test', make_ground_truth=False) + gen_file(train_set, save_name_base, 'train', sft_format) + gen_file(val_set, save_name_base, 'val', sft_format) + gen_file(test_set, save_name_base, 'test', sft_format, make_ground_truth=True) + gen_file(test_set, save_name_base, 'test', sft_format, make_ground_truth=False) -def extract_questions(data, include_topic, split): +def extract_questions(data, include_topic, sft_format, split): processed_data = [] # Iterate over topics, want to keep them seprate in train/val/test splits @@ -109,7 +110,13 @@ def extract_questions(data, include_topic, split): except IndexError: continue - example_json = {"taskname": "squad", "context": context, "question": question, "answer": answers} + if sft_format: + example_json = { + "input": f"User: Context:{context} Question:{question}\n\nAssistant:", + "output": answers, + } + else: + example_json = {"taskname": "squad", "context": context, "question": question, "answer": answers} if include_topic: example_json["topic"] = topic @@ -120,7 +127,7 @@ def extract_questions(data, include_topic, split): return processed_data -def gen_file(data, save_name_base, split_type, make_ground_truth=False): +def gen_file(data, save_name_base, split_type, sft_format, make_ground_truth=False): save_path = f"{save_name_base}_{split_type}.jsonl" if make_ground_truth: @@ -133,7 +140,10 @@ def gen_file(data, save_name_base, split_type, make_ground_truth=False): # Dont want labels in the test set if split_type == "test" and not make_ground_truth: - del example_json["answer"] + if sft_format: + example_json["output"] = "" + else: + del example_json["answer"] save_file.write(json.dumps(example_json) + '\n') diff --git a/scripts/dataset_processing/spoken_wikipedia/run.sh b/scripts/dataset_processing/spoken_wikipedia/run.sh index 2894eb1dc55e..5ae447c9a1a4 100644 --- a/scripts/dataset_processing/spoken_wikipedia/run.sh +++ b/scripts/dataset_processing/spoken_wikipedia/run.sh @@ -102,7 +102,7 @@ ${NEMO_PATH}/tools/ctc_segmentation/run_segmentation.sh \ --MODEL_NAME_OR_PATH=${MODEL_FOR_SEGMENTATION} \ --DATA_DIR=${INPUT_DIR}_prepared \ --OUTPUT_DIR=${OUTPUT_DIR} \ ---MIN_SCORE=${MIN_SCORE} +--MIN_SCORE=${THRESHOLD} # Thresholds for filtering CER_THRESHOLD=20 diff --git a/scripts/dataset_processing/tts/compute_feature_stats.py b/scripts/dataset_processing/tts/compute_feature_stats.py index 6774563810d9..3b8b3509bfc1 100644 --- a/scripts/dataset_processing/tts/compute_feature_stats.py +++ b/scripts/dataset_processing/tts/compute_feature_stats.py @@ -98,7 +98,9 @@ def get_args(): help="Path to output JSON file with dataset feature statistics.", ) parser.add_argument( - "--overwrite", default=False, type=bool, help="Whether to overwrite the output stats file if it exists.", + "--overwrite", + action=argparse.BooleanOptionalAction, + help="Whether to overwrite the output stats file if it exists.", ) args = parser.parse_args() @@ -132,7 +134,7 @@ def main(): if not feature_dir.exists(): raise ValueError( - f"Feature directory {audio_dir} does not exist. " + f"Feature directory {feature_dir} does not exist. " f"Please check that the path is correct and that you ran compute_features.py" ) diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py index 027a5c6e3e35..ab8dd7b0828b 100644 --- a/scripts/dataset_processing/tts/create_speaker_map.py +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -54,7 +54,9 @@ def get_args(): "--speaker_map_path", required=True, type=Path, help="Path for output speaker index JSON", ) parser.add_argument( - "--overwrite", default=False, type=bool, help="Whether to overwrite the output speaker file if it exists.", + "--overwrite", + action=argparse.BooleanOptionalAction, + help="Whether to overwrite the output speaker file if it exists.", ) args = parser.parse_args() return args diff --git a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py b/scripts/dataset_processing/tts/preprocess_audio.py similarity index 76% rename from scripts/dataset_processing/tts/audio_processing/preprocess_audio.py rename to scripts/dataset_processing/tts/preprocess_audio.py index c1121dae7f71..1912d45d4bed 100644 --- a/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py +++ b/scripts/dataset_processing/tts/preprocess_audio.py @@ -24,7 +24,7 @@ Most of these can also be done by the TTS data loader at training time, but doing them ahead of time lets us implement more complex processing, validate the correctness of the output, and save on compute time. -$ python /scripts/dataset_processing/tts/audio_processing/preprocess_audio.py \ +$ python /scripts/dataset_processing/tts/preprocess_audio.py \ --input_manifest="/manifest.json" \ --output_manifest="/manifest_processed.json" \ --input_audio_dir="/audio" \ @@ -72,6 +72,16 @@ def get_args(): parser.add_argument( "--output_audio_dir", required=True, type=Path, help="Path to output directory for audio files.", ) + parser.add_argument( + "--overwrite_audio", + action=argparse.BooleanOptionalAction, + help="Whether to reprocess and overwrite existing audio files in output_audio_dir.", + ) + parser.add_argument( + "--overwrite_manifest", + action=argparse.BooleanOptionalAction, + help="Whether to overwrite the output manifest file if it exists.", + ) parser.add_argument( "--num_workers", default=1, type=int, help="Number of parallel threads to use. If -1 all CPUs are used." ) @@ -110,6 +120,7 @@ def _process_entry( entry: dict, input_audio_dir: Path, output_audio_dir: Path, + overwrite_audio: bool, audio_trimmer: AudioTrimmer, output_sample_rate: int, volume_level: float, @@ -120,30 +131,34 @@ def _process_entry( output_path = output_audio_dir / audio_path_rel output_path.parent.mkdir(exist_ok=True, parents=True) - audio_path = str(audio_path) - output_path = str(output_path) - - audio, sample_rate = librosa.load(audio_path, sr=None) - - if audio_trimmer is not None: - audio, start_i, end_i = audio_trimmer.trim_audio(audio=audio, sample_rate=sample_rate, audio_id=audio_path) - - if output_sample_rate: - audio = librosa.resample(y=audio, orig_sr=sample_rate, target_sr=output_sample_rate) - sample_rate = output_sample_rate - - if volume_level: - audio = normalize_volume(audio, volume_level=volume_level) - - sf.write(file=output_path, data=audio, samplerate=sample_rate) - - original_duration = librosa.get_duration(filename=audio_path) - output_duration = librosa.get_duration(filename=output_path) + if output_path.exists() and not overwrite_audio: + original_duration = librosa.get_duration(path=audio_path) + output_duration = librosa.get_duration(path=output_path) + else: + audio, sample_rate = librosa.load(audio_path, sr=None) + original_duration = librosa.get_duration(y=audio, sr=sample_rate) + if audio_trimmer is not None: + audio, start_i, end_i = audio_trimmer.trim_audio( + audio=audio, sample_rate=int(sample_rate), audio_id=str(audio_path) + ) + + if output_sample_rate: + audio = librosa.resample(y=audio, orig_sr=sample_rate, target_sr=output_sample_rate) + sample_rate = output_sample_rate + + if volume_level: + audio = normalize_volume(audio, volume_level=volume_level) + + if audio.size > 0: + sf.write(file=output_path, data=audio, samplerate=sample_rate) + output_duration = librosa.get_duration(y=audio, sr=sample_rate) + else: + output_duration = 0.0 entry["duration"] = round(output_duration, 2) if os.path.isabs(audio_filepath): - entry["audio_filepath"] = output_path + entry["audio_filepath"] = str(output_path) return entry, original_duration, output_duration @@ -155,6 +170,8 @@ def main(): output_manifest_path = args.output_manifest input_audio_dir = args.input_audio_dir output_audio_dir = args.output_audio_dir + overwrite_audio = args.overwrite_audio + overwrite_manifest = args.overwrite_manifest num_workers = args.num_workers max_entries = args.max_entries output_sample_rate = args.output_sample_rate @@ -163,6 +180,12 @@ def main(): max_duration = args.max_duration filter_file = args.filter_file + if output_manifest_path.exists(): + if overwrite_manifest: + print(f"Will overwrite existing manifest path: {output_manifest_path}") + else: + raise ValueError(f"Manifest path already exists: {output_manifest_path}") + if args.trim_config_path: audio_trimmer_config = OmegaConf.load(args.trim_config_path) audio_trimmer = instantiate(audio_trimmer_config) @@ -181,6 +204,7 @@ def main(): entry=entry, input_audio_dir=input_audio_dir, output_audio_dir=output_audio_dir, + overwrite_audio=overwrite_audio, audio_trimmer=audio_trimmer, output_sample_rate=output_sample_rate, volume_level=volume_level, @@ -195,7 +219,11 @@ def main(): for output_entry, original_duration, output_duration in job_outputs: original_durations += original_duration - if (min_duration and output_duration < min_duration) or (max_duration and output_duration > max_duration): + if ( + output_duration == 0.0 + or (min_duration and output_duration < min_duration) + or (max_duration and output_duration > max_duration) + ): if output_duration != original_duration: output_entry["original_duration"] = original_duration filtered_entries.append(output_entry) diff --git a/scripts/dataset_processing/tts/preprocess_text.py b/scripts/dataset_processing/tts/preprocess_text.py new file mode 100644 index 000000000000..8b9bdebe940d --- /dev/null +++ b/scripts/dataset_processing/tts/preprocess_text.py @@ -0,0 +1,131 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script is used to preprocess text before TTS model training. This is needed mainly for text normalization, +which is slow to rerun during training. + +The output manifest will be the same as the input manifest but with final text stored in the 'normalized_text' field. + +$ python /scripts/dataset_processing/tts/preprocess_text.py \ + --input_manifest="/manifest.json" \ + --output_manifest="/manifest_processed.json" \ + --normalizer_config_path="/examples/tts/conf/text/normalizer_en.yaml" \ + --lower_case=True \ + --num_workers=1 +""" + +import argparse +from pathlib import Path + +from hydra.utils import instantiate +from joblib import Parallel, delayed +from nemo_text_processing.text_normalization.normalize import Normalizer +from omegaconf import OmegaConf +from tqdm import tqdm + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Process and normalize text data.", + ) + parser.add_argument( + "--input_manifest", required=True, type=Path, help="Path to input training manifest.", + ) + parser.add_argument( + "--output_manifest", required=True, type=Path, help="Path to output training manifest with processed text.", + ) + parser.add_argument( + "--overwrite", + action=argparse.BooleanOptionalAction, + help="Whether to overwrite the output manifest file if it exists.", + ) + parser.add_argument( + "--lower_case", default=False, type=bool, help="Whether to convert the final text to lower case.", + ) + parser.add_argument( + "--normalizer_config_path", + required=False, + type=Path, + help="Path to config file for nemo_text_processing.text_normalization.normalize.Normalizer.", + ) + parser.add_argument( + "--num_workers", default=1, type=int, help="Number of parallel threads to use. If -1 all CPUs are used." + ) + parser.add_argument( + "--max_entries", default=0, type=int, help="If provided, maximum number of entries in the manifest to process." + ) + + args = parser.parse_args() + return args + + +def _process_entry(entry: dict, normalizer: Normalizer, lower_case: bool, lower_case_norm: bool) -> dict: + text = entry["text"] + + if normalizer is not None: + if lower_case_norm: + text = text.lower() + text = normalizer.normalize(text, punct_pre_process=True, punct_post_process=True) + + if lower_case: + text = text.lower() + + entry["normalized_text"] = text + + return entry + + +def main(): + args = get_args() + + input_manifest_path = args.input_manifest + output_manifest_path = args.output_manifest + lower_case = args.lower_case + num_workers = args.num_workers + max_entries = args.max_entries + overwrite = args.overwrite + + if output_manifest_path.exists(): + if overwrite: + print(f"Will overwrite existing manifest path: {output_manifest_path}") + else: + raise ValueError(f"Manifest path already exists: {output_manifest_path}") + + if args.normalizer_config_path: + normalizer_config = OmegaConf.load(args.normalizer_config_path) + normalizer = instantiate(normalizer_config) + lower_case_norm = normalizer.input_case == "lower_cased" + else: + normalizer = None + lower_case_norm = False + + entries = read_manifest(input_manifest_path) + if max_entries: + entries = entries[:max_entries] + + output_entries = Parallel(n_jobs=num_workers)( + delayed(_process_entry)( + entry=entry, normalizer=normalizer, lower_case=lower_case, lower_case_norm=lower_case_norm + ) + for entry in tqdm(entries) + ) + + write_manifest(output_path=output_manifest_path, target_manifest=output_entries, ensure_ascii=False) + + +if __name__ == "__main__": + main() diff --git a/scripts/export.py b/scripts/export.py index 80cbcf3dc666..8fa44bb305f9 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -62,6 +62,15 @@ def get_args(argv): ) parser.add_argument("--device", default="cuda", help="Device to export for") parser.add_argument("--check-tolerance", type=float, default=0.01, help="tolerance for verification") + parser.add_argument( + "--export-config", + metavar="KEY=VALUE", + nargs='+', + help="Set a number of key-value pairs to model.export_config dictionary " + "(do not put spaces before or after the = sign). " + "Note that values are always treated as strings.", + ) + args = parser.parse_args(argv) return args @@ -130,10 +139,18 @@ def nemo_export(argv): in_args["max_dim"] = args.max_dim max_dim = args.max_dim - if args.cache_support and hasattr(model, "encoder") and hasattr(model.encoder, "export_cache_support"): - model.encoder.export_cache_support = True - logging.info("Caching support is enabled.") - model.encoder.setup_streaming_params() + if args.cache_support: + model.set_export_config({"cache_support": "True"}) + + if args.export_config: + kv = {} + for key_value in args.export_config: + lst = key_value.split("=") + if len(lst) != 2: + raise Exception("Use correct format for --export_config: k=v") + k, v = lst + kv[k] = v + model.set_export_config(kv) autocast = nullcontext if args.autocast: @@ -158,7 +175,7 @@ def nemo_export(argv): check_trace=check_trace, check_tolerance=args.check_tolerance, onnx_opset_version=args.onnx_opset, - verbose=args.verbose, + verbose=bool(args.verbose), ) except Exception as e: diff --git a/scripts/installers/Dockerfile.ngramtools b/scripts/installers/Dockerfile.ngramtools index 49d3c12b3529..fad6716a1874 100644 --- a/scripts/installers/Dockerfile.ngramtools +++ b/scripts/installers/Dockerfile.ngramtools @@ -17,9 +17,9 @@ # How to use? Build it from NeMo root folder: # 1. git clone https://github.com/NVIDIA/NeMo.git && cd NeMo -# 2. DOCKER_BUILDKIT=1 docker build -t nemo:23.01.1 -f ./scripts/installers/Dockerfile.ngramtools . +# 2. DOCKER_BUILDKIT=1 docker build -t nemo:23.03.1 -f ./scripts/installers/Dockerfile.ngramtools . -from nvcr.io/nvidia/nemo:23.01 +from nvcr.io/nvidia/nemo:23.03 WORKDIR /workspace/nemo diff --git a/scripts/installers/setup_os2s_decoders.py b/scripts/installers/setup_os2s_decoders.py index 6dfe1bef54e8..9728829aaa44 100644 --- a/scripts/installers/setup_os2s_decoders.py +++ b/scripts/installers/setup_os2s_decoders.py @@ -22,6 +22,7 @@ from __future__ import absolute_import, division, print_function import argparse +import distutils.ccompiler import glob import multiprocessing.pool import os diff --git a/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py new file mode 100644 index 000000000000..14d7b6ae54ea --- /dev/null +++ b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py @@ -0,0 +1,212 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +A script to convert the Mosaic MPT-7B checkpoint on HuggingFace to Megatron GPTModel +This script is hardcoded specifically for the MPT-7B pretrained model only, and is not +generalisable to any other models. + +This script will load and convert the model entirely on CPU for OOM safety, but there +is an option to put the model onto GPU before the save down, which sets the map_location +to cuda for the restore_from call. You can do this by adding --cuda to this script call. + +This script requires that you have downloaded the 2 .bin weight files for MPT-7B from +HuggingFace located here: https://huggingface.co/mosaicml/mpt-7b/tree/main +These files MUST have the following file names and be saved somewhere where this script +can read them: + pytorch_model-00001-of-00002.bin + pytorch_model-00002-of-00002.bin + +This script will generate a Megatron model with TP=1 and PP=1. If you need different TP/PP +values, then after running this script, please use the script located below to set whatever +TP/PP values you want: + NeMo/examples/nlp/language_modeling/megatron_change_num_partitions.py + + +Here is an example usage command: + +```python +python scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py -i /path/to/mpt_7b -o /path/to/save +``` + +""" + + +import argparse +import os + +import pytorch_lightning as pl +import torch +from omegaconf import OmegaConf + +from nemo.collections.nlp.models.language_modeling.megatron import GPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.utils import logging + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '-i', '--input', required=True, type=str, help='path to the two MPT-7B .bin weight files from HuggingFace' + ) + parser.add_argument( + '-o', '--output', required=False, default=None, type=str, help='path to dir where to store output .nemo file' + ) + parser.add_argument('--cuda', action='store_true', help='put Nemo model onto GPU prior to savedown') + + args = parser.parse_args() + + if not os.path.exists(args.input): + logging.critical(f'Input directory [ {args.input} ] does not exist or cannot be found. Aborting.') + exit(255) + + model_dict = { + 'micro_batch_size': 4, + 'global_batch_size': 8, + 'rampup_batch_size': None, + 'tensor_model_parallel_size': 1, + 'pipeline_model_parallel_size': 1, + 'virtual_pipeline_model_parallel_size': None, + 'megatron_amp_O2': True, + 'transformer_engine': False, + 'use_cpu_initialization': True, + 'hidden_size': 4096, + 'max_position_embeddings': 2048, + 'num_layers': 32, + 'num_attention_heads': 32, + 'ffn_hidden_size': 4 * 4096, + 'precision': 'bf16', + 'pre_process': True, + 'post_process': True, + 'num_tokentypes': 0, + 'apply_query_key_layer_scaling': False, + 'parallel_output': False, + 'bias': False, + 'bias_dropout_add_fusion': False, + 'bias_activation_fusion': False, + 'transformer_block_type': 'pre_ln', + 'normalization': 'low_precision_layernorm', + 'fp32_residual_connection': False, + 'hidden_dropout': 0, + 'attention_dropout': 0, + 'ffn_dropout': 0, + 'megatron_legacy': True, + 'share_embeddings_and_output_weights': True, + 'sequence_parallel': False, + 'position_embedding_type': 'alibi', + 'normalize_attention_scores': True, + 'use_flash_attention': False, + 'override_vocab_size': 50432, + } + tokeniser_dict = { + 'library': 'huggingface', + 'type': 'EleutherAI/gpt-neox-20b', + 'use_fast': True, + } + optim_dict = { + 'name': 'fused_adam', + 'lr': 2e-4, + 'weight_decay': 0.01, + } + trainer_dict = { + 'devices': 1, + 'num_nodes': 1, + 'accelerator': 'gpu' if args.cuda else 'cpu', + 'precision': 'bf16', + 'logger': False, # logger provided by exp_manager + 'enable_checkpointing': False, + 'replace_sampler_ddp': False, + 'max_epochs': -1, # PTL default. In practice, max_steps will be reached first. + 'max_steps': 100000, # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + 'log_every_n_steps': 10, + 'val_check_interval': 100, + 'limit_val_batches': 50, + 'limit_test_batches': 500, + 'accumulate_grad_batches': 1, + 'gradient_clip_val': 1.0, + 'benchmark': False, + 'enable_model_summary': False, + } + + model_dict['tokenizer'] = tokeniser_dict + model_dict['optim'] = optim_dict + + omega_cfg = OmegaConf.create(model_dict) + + trainer = pl.Trainer(**trainer_dict) + + model = MegatronGPTModel(omega_cfg, trainer) + + model_keys = list(model.state_dict().keys()) + model_dtypes = list(set([model.state_dict()[x].dtype for x in model_keys])) + + if not (len(model_dtypes) == 1 and model_dtypes[0] is torch.bfloat16): + model = model.bfloat16() + + if args.cuda: + model = model.cuda() + + mpt_1 = torch.load(os.path.join(args.input, 'pytorch_model-00001-of-00002.bin'), map_location="cpu") + mpt_2 = torch.load(os.path.join(args.input, 'pytorch_model-00002-of-00002.bin'), map_location="cpu") + mpt_dict = {**mpt_1, **mpt_2} + del mpt_1, mpt_2 + + def convert_state_dict(state_dict, amp=False): + def get_new_key(old_key): + if old_key == 'transformer.wte.weight': + return 'language_model.embedding.word_embeddings.weight' + elif old_key == 'transformer.norm_f.weight': + return 'language_model.encoder.final_layernorm.weight' + else: + p1 = old_key.replace('transformer.blocks.', 'language_model.encoder.layers.') + p2 = p1.replace('norm_1.weight', 'input_layernorm.weight') + p3 = p2.replace('attn.Wqkv.weight', 'self_attention.query_key_value.weight') + p4 = p3.replace('attn.out_proj.weight', 'self_attention.dense.weight') + p5 = p4.replace('norm_2.weight', 'post_attention_layernorm.weight') + p6 = p5.replace('ffn.up_proj.weight', 'mlp.dense_h_to_4h.weight') + p7 = p6.replace('ffn.down_proj.weight', 'mlp.dense_4h_to_h.weight') + + return p7 + + new_dict = {} + + for old_key, val in state_dict.items(): + new_key = get_new_key(old_key) + if amp: + new_key = 'module.' + new_key + + new_dict[new_key] = val + + return new_dict + + convert_dict = convert_state_dict(mpt_dict, amp=model_dict['megatron_amp_O2']) + + if model_dict['megatron_amp_O2']: + missing_keys, unexpected_keys = model.model.load_state_dict(convert_dict, strict=True) + else: + missing_keys, unexpected_keys = super(GPTModel, model.model).load_state_dict(convert_dict, strict=True) + + if len(missing_keys) > 0: + logging.critical('Missing keys were detected during the load, something has gone wrong. Aborting.') + logging.critical(f'Missing keys: \n{missing_keys}') + exit(255) + + if len(unexpected_keys) > 0: + logging.warning('Unexpected keys were detected which should not happen. Please investigate.') + logging.warning(f'Unexpected keys: \n{unexpected_keys}') + + if args.output is None: + args.output = os.path.dirname(os.path.abspath(__file__)) + + model.save_to(os.path.join(args.output, 'megatron_mpt_7b_base_tp1_pp1.nemo')) diff --git a/tests/collections/asr/conftest.py b/tests/collections/asr/conftest.py new file mode 100644 index 000000000000..dba29f949fb0 --- /dev/null +++ b/tests/collections/asr/conftest.py @@ -0,0 +1,355 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Optional, Type + +import numpy as np +import pytest +import torch + + +class RNNTTestHelper: + @staticmethod + def wrap_and_call(fn, acts, labels, device, input_lengths=None, target_lengths=None): + if not torch.is_tensor(acts): + acts = torch.FloatTensor(acts) + + if 'cuda' in device: + acts = acts.cuda() + + if not acts.requires_grad: + acts.requires_grad = True + + labels = torch.LongTensor(labels) + + if input_lengths is None: + lengths = [acts.shape[1]] * acts.shape[0] + lengths = torch.LongTensor(lengths) + else: + lengths = input_lengths + + if target_lengths is None: + label_lengths = [len(l) for l in labels] + label_lengths = torch.LongTensor(label_lengths) + else: + label_lengths = target_lengths + + if 'cuda' in device: + labels = labels.cuda() + lengths = lengths.cuda() + label_lengths = label_lengths.cuda() + + costs = fn(acts, labels, lengths, label_lengths) + cost = torch.sum(costs) + cost.backward() + + if 'cuda' in device: + torch.cuda.synchronize() + + if acts.grad is not None: + grad = acts.grad.data.cpu().numpy() + else: + grad = None + + return costs.data.cpu().numpy(), grad + + +@dataclass +class RnntLossSampleData: + vocab_size: int + blank_id: int + + logits: torch.Tensor + targets: torch.Tensor + input_lengths: torch.Tensor + target_lengths: torch.Tensor + + expected_cost: Optional[torch.Tensor] = None + expected_grads: Optional[torch.Tensor] = None + + @classmethod + def get_sample_small(cls) -> "RnntLossSampleData": + activations = np.array( + [ + [ + [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], + [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], + ] + ] + ) + labels = np.asarray([[1, 2]]) + + expected_cost = [4.495666] + expected_grads = np.array( + [ + [ + [ + [-0.13116688, -0.3999269, 0.17703125, 0.17703125, 0.17703125], + [-0.18572757, 0.12247056, -0.18168412, 0.12247056, 0.12247056], + [-0.32091254, 0.06269141, 0.06928472, 0.12624499, 0.06269141], + ], + [ + [0.05456069, -0.21824276, 0.05456069, 0.05456069, 0.05456069], + [0.12073959, 0.12073959, -0.48295835, 0.12073959, 0.12073959], + [-0.6925882, 0.16871116, 0.18645467, 0.16871116, 0.16871116], + ], + ] + ] + ) + return RnntLossSampleData( + vocab_size=3, + blank_id=0, + logits=torch.from_numpy(activations).to(torch.float32), + targets=torch.from_numpy(labels), + input_lengths=torch.tensor([2]), + target_lengths=torch.tensor([2]), + expected_cost=torch.tensor(expected_cost).to(torch.float32), + expected_grads=torch.from_numpy(expected_grads), + ) + + @classmethod + def get_sample_small_blank_last(cls) -> "RnntLossSampleData": + activations = np.array( + [ + [ + [[0.0, 1.0, 3.0], [0.0, 2.0, 3.0], [1.0, 1.0, 3.0], [2.0, 3.0, 2.0]], + [[0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [2.0, 2.0, 0.0]], + [[0.0, 2.0, 5.0], [0.0, 3.0, 5.0], [1.0, 2.0, 5.0], [2.0, 4.0, 4.0]], + [[0.0, 3.0, 4.0], [0.0, 4.0, 4.0], [1.0, 3.0, 4.0], [2.0, 5.0, 3.0]], + [[2.0, 2.0, 1.0], [2.0, 3.0, 1.0], [3.0, 2.0, 1.0], [4.0, 4.0, 0.0]], + ] + ] + ) + labels = np.array([[0, 1, 0]]) + + expected_cost = [6.789285182952881] + expected_grads = np.array( + [ + [ + [ + [-0.03551076725125313, 0.11419519782066345, -0.07868456840515137], + [0.0027224558871239424, 0.00704305712133646, -0.009765520691871643], + [0.0013856772566214204, 0.0013924005907028913, -0.0027780719101428986], + [1.4249643527364242e-06, 3.873454716085689e-06, -5.298420546751004e-06], + ], + [ + [-0.1934257447719574, 0.19551163911819458, -0.0020859241485595703], + [0.07043898105621338, 0.05738453567028046, -0.12782356142997742], + [0.061031512916088104, 0.02286236733198166, -0.08389391005039215], + [0.0005252412520349026, 0.0005252412520349026, -0.0010504829697310925], + ], + [ + [-0.007841046899557114, 0.025142310187220573, -0.017301201820373535], + [0.0019501042552292347, 0.0005148053169250488, -0.0024650096893310547], + [0.0027856370434165, 0.008609085343778133, -0.01139475405216217], + [9.526080975774676e-05, 0.0007038871408440173, -0.000799147819634527], + ], + [ + [-0.01533521432429552, 0.1386115401983261, -0.12327653169631958], + [0.002850571647286415, -0.006693005561828613, 0.003842458128929138], + [0.009236274287104607, 0.08995233476161957, -0.0991886705160141], + [0.0001865450612967834, 0.0037468576338142157, -0.003933403175324202], + ], + [ + [-0.2888762652873993, 0.211185485124588, 0.07769080251455307], + [0.15952755510807037, -0.2182144820690155, 0.05868690833449364], + [-0.3332723379135132, 0.2436419129371643, 0.0896308496594429], + [0.4954628646373749, 0.4954628646373749, -0.9909257292747498], + ], + ] + ] + ) + return RnntLossSampleData( + vocab_size=3, + blank_id=2, + logits=torch.from_numpy(activations).to(torch.float32), + targets=torch.from_numpy(labels), + input_lengths=torch.tensor([5]), + target_lengths=torch.tensor([3]), + expected_cost=torch.tensor(expected_cost).to(torch.float32), + expected_grads=torch.from_numpy(expected_grads), + ) + + @classmethod + def get_sample_medium(cls) -> "RnntLossSampleData": + # minibatch x T x U x alphabet_size + activations = [ + [ + [ + [0.06535690384862791, 0.7875301411923206, 0.08159176605666074], + [0.5297155426466327, 0.7506749639230854, 0.7541348379087998], + [0.6097641124736383, 0.8681404965673826, 0.6225318186056529], + ], + [ + [0.6685222872103057, 0.8580392805336061, 0.16453892311765583], + [0.989779515236694, 0.944298460961015, 0.6031678586829663], + [0.9467833543605416, 0.666202507295747, 0.28688179752461884], + ], + [ + [0.09418426230195986, 0.3666735970751962, 0.736168049462793], + [0.1666804425271342, 0.7141542198635192, 0.3993997272216727], + [0.5359823524146038, 0.29182076440286386, 0.6126422611507932], + ], + [ + [0.3242405528768486, 0.8007644367291621, 0.5241057606558068], + [0.779194617063042, 0.18331417220174862, 0.113745182072432], + [0.24022162381327106, 0.3394695622533106, 0.1341595066017014], + ], + ], + [ + [ + [0.5055615569388828, 0.051597282072282646, 0.6402903936686337], + [0.43073311517251, 0.8294731834714112, 0.1774668847323424], + [0.3207001991262245, 0.04288308912457006, 0.30280282975568984], + ], + [ + [0.6751777088333762, 0.569537369330242, 0.5584738347504452], + [0.08313242153985256, 0.06016544344162322, 0.10795752845152584], + [0.7486153608562472, 0.943918041459349, 0.4863558118797222], + ], + [ + [0.4181986264486809, 0.6524078485043804, 0.024242983423721887], + [0.13458171554507403, 0.3663418070512402, 0.2958297395361563], + [0.9236695822497084, 0.6899291482654177, 0.7418981733448822], + ], + [ + [0.25000547599982104, 0.6034295486281007, 0.9872887878887768], + [0.5926057265215715, 0.8846724004467684, 0.5434495396894328], + [0.6607698886038497, 0.3771277082495921, 0.3580209022231813], + ], + ], + ] + + expected_cost = [4.2806528590890736, 3.9384369822503591] + expected_grads = [ + [ + [ + [-1.86843902e-01, -6.25548810e-02, 2.49398798e-01], + [-2.03376666e-01, 2.02399328e-01, 9.77333169e-04], + [-1.41016081e-01, 7.91234672e-02, 6.18926100e-02], + ], + [ + [-1.15517676e-02, -8.12802389e-02, 9.28319991e-02], + [-1.54257029e-01, 2.29432687e-01, -7.51756504e-02], + [-2.46593088e-01, 1.46404594e-01, 1.00188486e-01], + ], + [ + [-1.29182907e-02, -6.15932420e-02, 7.45115355e-02], + [-5.59857301e-02, 2.19830811e-01, -1.63845062e-01], + [-4.97626871e-01, 2.09239945e-01, 2.88386941e-01], + ], + [ + [1.36048580e-02, -3.02196294e-02, 1.66147724e-02], + [1.13924511e-01, 6.27811998e-02, -1.76705718e-01], + [-6.67078257e-01, 3.67658824e-01, 2.99419403e-01], + ], + ], + [ + [ + [-3.56343776e-01, -5.53474613e-02, 4.11691219e-01], + [-9.69219357e-02, 2.94591039e-02, 6.74628317e-02], + [-6.35175705e-02, 2.76544970e-02, 3.58630717e-02], + ], + [ + [-1.54499024e-01, -7.39420280e-02, 2.28441030e-01], + [-1.66789949e-01, -8.78955179e-05, 1.66877866e-01], + [-1.72369644e-01, 1.05565332e-01, 6.68043196e-02], + ], + [ + [2.38748826e-02, -1.18255816e-01, 9.43809375e-02], + [-1.04707085e-01, -1.08934477e-01, 2.13641584e-01], + [-3.69844258e-01, 1.80118099e-01, 1.89726159e-01], + ], + [ + [2.57137045e-02, -7.94617534e-02, 5.37480488e-02], + [1.22328237e-01, -2.38788679e-01, 1.16460443e-01], + [-5.98686993e-01, 3.02203178e-01, 2.96483815e-01], + ], + ], + ] + activations = np.array(activations) + labels = np.array([[1, 2], [1, 1]]) + expected_grads = np.array(expected_grads) + + return RnntLossSampleData( + vocab_size=3, + blank_id=0, + logits=torch.from_numpy(activations).to(torch.float32), + targets=torch.from_numpy(labels), + input_lengths=torch.tensor([4, 4]), + target_lengths=torch.tensor([2, 2]), + expected_cost=torch.tensor(expected_cost).to(torch.float32), + expected_grads=torch.from_numpy(expected_grads), + ) + + @classmethod + def get_sample_small_random(cls, blank_first: bool, device=torch.device("cpu")) -> "RnntLossSampleData": + vocab_size = 4 + blank_id = 0 if blank_first else vocab_size - 1 + num_frames = 4 + text_len = 2 + if blank_first: + text = np.asarray([1, 3]) + else: + text = np.asarray([0, 2]) + + targets = torch.from_numpy(text).unsqueeze(0).to(device) + logits = torch.rand([1, num_frames, text_len + 1, vocab_size], requires_grad=True, device=device) + input_lengths = torch.tensor([num_frames], device=device) + target_lengths = torch.tensor([text_len], device=device) + return RnntLossSampleData( + vocab_size=vocab_size, + blank_id=blank_id, + logits=logits, + targets=targets, + input_lengths=input_lengths, + target_lengths=target_lengths, + ) + + @classmethod + def get_sample_medium_random_var_size(cls, blank_first: bool, device=torch.device("cpu")) -> "RnntLossSampleData": + vocab_size = 32 + blank_id = 0 if blank_first else vocab_size - 1 + num_frames = 32 + text_len = 27 + min_symbol = 1 if blank_first else 0 + max_symbol = vocab_size if blank_first else vocab_size - 1 + batch_size = 4 + + rs = np.random.RandomState(2021) + text = rs.randint(min_symbol, max_symbol, size=(batch_size, text_len)) + targets = torch.from_numpy(text).to(device) + + logits = torch.rand([batch_size, num_frames, text_len + 1, vocab_size], requires_grad=True, device=device) + input_lengths = torch.tensor([num_frames, num_frames // 2, text_len, text_len // 2], device=device).long() + target_lengths = torch.tensor([text_len, text_len - 1, text_len - 3, text_len - 10], device=device) + return RnntLossSampleData( + vocab_size=vocab_size, + blank_id=blank_id, + logits=logits, + targets=targets, + input_lengths=input_lengths, + target_lengths=target_lengths, + ) + + +@pytest.fixture(scope="session") +def rnnt_test_helper() -> Type[RNNTTestHelper]: + return RNNTTestHelper + + +@pytest.fixture(scope="session") +def rnn_loss_sample_data() -> Type[RnntLossSampleData]: + return RnntLossSampleData diff --git a/tests/collections/asr/decoding/test_rnnt_decoding.py b/tests/collections/asr/decoding/test_rnnt_decoding.py index 9dd955c24a70..ac90e62036e0 100644 --- a/tests/collections/asr/decoding/test_rnnt_decoding.py +++ b/tests/collections/asr/decoding/test_rnnt_decoding.py @@ -130,7 +130,7 @@ def test_constructor(self): @pytest.mark.unit def test_constructor_subword(self, tmp_tokenizer): - cfg = RNNTBPEDecodingConfig() + cfg = RNNTDecodingConfig() vocab = tmp_tokenizer.vocab decoder = get_rnnt_decoder(vocab_size=len(vocab)) joint = get_rnnt_joint(vocab_size=len(vocab)) diff --git a/tests/collections/asr/k2/test_graph_transducer.py b/tests/collections/asr/k2/test_graph_transducer.py new file mode 100644 index 000000000000..5879226e782d --- /dev/null +++ b/tests/collections/asr/k2/test_graph_transducer.py @@ -0,0 +1,263 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import numpy as np +import pytest +import torch + +from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_numpy import RNNTLoss as RNNTLoss_Numpy + +try: + from nemo.collections.asr.parts.k2.graph_transducer import GraphRnntLoss + from nemo.core.utils.k2_guard import k2 +except (ImportError, ModuleNotFoundError): + pytest.skip("k2 is not installed, skipping Graph-RNNT tests.", allow_module_level=True) + +EPS_SM_INPUT = 1e-6 +EPS_L_INPUT = 1e-4 + +DEVICES = ['cpu'] + +if torch.cuda.is_available() and k2.with_cuda: + DEVICES.append('cuda') + + +class TestGraphRnnt: + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + @pytest.mark.parametrize("num_frames", [1, 3, 6]) + @pytest.mark.parametrize("vocab_size", [3]) + def test_temporal_schema(self, device, blank_first, num_frames, vocab_size): + blank_id = 0 if blank_first else vocab_size - 1 + loss = GraphRnntLoss(blank=blank_id) + temporal_schema = loss.get_temporal_schema( + num_frames=num_frames, vocab_size=vocab_size, device=torch.device(device) + ) + + etalon_schema_fst: List[List[int]] = [] + for time_i in range(num_frames): + for label_i in range(vocab_size): + if label_i == blank_id: + # transition to the next state + etalon_schema_fst.append([time_i, time_i + 1, label_i, time_i, 0]) + else: + # self-loop + etalon_schema_fst.append([time_i, time_i, label_i, time_i, 0]) + etalon_schema_fst.append([num_frames, num_frames + 1, -1, -1, 0]) # transition to final state + etalon_schema_fst.append([num_frames + 1]) # final state + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_temporal_schema = k2.Fsa.from_str(etalon_schema_fst_str, num_aux_labels=1) + + assert temporal_schema.num_arcs == etalon_temporal_schema.num_arcs + assert temporal_schema.shape == etalon_temporal_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + temporal_schema, etalon_temporal_schema, log_semiring=True, treat_epsilons_specially=False + ), "Temporal schema mismatch" + assert k2.is_rand_equivalent( + temporal_schema.invert(), + etalon_temporal_schema.invert(), + log_semiring=True, + treat_epsilons_specially=False, + ), "Temporal schema output labels mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + def test_unit_schema(self, device, blank_first): + vocab_size = 3 + blank_id = 0 if blank_first else vocab_size - 1 + if blank_first: + labels = [1, 1, 2, 1] + else: + labels = [1, 1, 0, 1] + loss = GraphRnntLoss(blank=blank_id) + unit_schema = loss.get_unit_schema( + units_tensor=torch.tensor(labels, device=torch.device(device)), vocab_size=vocab_size + ) + + etalon_schema_fst: List[List[int]] = [] + for label_i, label in enumerate(labels): + etalon_schema_fst.append([label_i, label_i + 1, label, label, label_i, 0]) # forward: label + etalon_schema_fst.append([label_i, label_i, blank_id, blank_id, label_i, 0]) # self-loop: blank + etalon_schema_fst.append([len(labels), len(labels), blank_id, blank_id, len(labels), 0]) + etalon_schema_fst.append([len(labels), len(labels) + 1, -1, -1, -1, 0]) # transition to final state + etalon_schema_fst.append([len(labels) + 1]) # final state + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_unit_schema = k2.Fsa.from_str(etalon_schema_fst_str, aux_label_names=["aux_labels", "unit_positions"]) + + assert unit_schema.num_arcs == etalon_unit_schema.num_arcs + assert unit_schema.shape == etalon_unit_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + unit_schema, etalon_unit_schema, log_semiring=True, treat_epsilons_specially=False + ), "Unit schema input labels mismatch" + assert k2.is_rand_equivalent( + unit_schema.invert(), etalon_unit_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Unit schema output labels mismatch" + + # swap aux_labels and unit positions to test unit_positions + unit_schema.aux_labels, unit_schema.unit_positions = unit_schema.unit_positions, unit_schema.aux_labels + etalon_unit_schema.aux_labels, etalon_unit_schema.unit_positions = ( + etalon_unit_schema.unit_positions, + etalon_unit_schema.aux_labels, + ) + assert k2.is_rand_equivalent( + unit_schema.invert(), etalon_unit_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Unit schema unit positions mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + def test_grid_schema(self, device, blank_first): + vocab_size = 3 + blank_id = 0 if blank_first else vocab_size - 1 + if blank_first: + labels = [1, 1, 2, 1] + else: + labels = [1, 1, 0, 1] + text_length = len(labels) + num_frames = 5 + loss = GraphRnntLoss(blank=blank_id) + grid_schema = loss.get_grid( + units_tensor=torch.tensor(labels, device=torch.device(device)), + num_frames=num_frames, + vocab_size=vocab_size, + ) + + etalon_schema_fst: List[List[int]] = [] + for frame_i in range(num_frames): + for label_i in range(text_length + 1): + state = frame_i * (text_length + 1) + label_i + if label_i < text_length: + next_state_label = state + 1 + # next unit + etalon_schema_fst.append([state, next_state_label, labels[label_i], frame_i, label_i, 0]) + if frame_i < num_frames - 1: + next_state_frame = (frame_i + 1) * (text_length + 1) + label_i + # next time frame (blank) + etalon_schema_fst.append([state, next_state_frame, blank_id, frame_i, label_i, 0]) + + last_grid_state = num_frames * (text_length + 1) - 1 + etalon_schema_fst.append([last_grid_state, last_grid_state + 1, blank_id, num_frames - 1, text_length, 0]) + etalon_schema_fst.append( + [last_grid_state + 1, last_grid_state + 2, -1, -1, -1, 0] + ) # transition to final state + etalon_schema_fst.append([last_grid_state + 2]) # final state + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_grid_schema = k2.Fsa.from_str(etalon_schema_fst_str, aux_label_names=["aux_labels", "unit_positions"]) + + assert grid_schema.num_arcs == etalon_grid_schema.num_arcs + assert grid_schema.shape == etalon_grid_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + grid_schema, etalon_grid_schema, log_semiring=True, treat_epsilons_specially=False + ), "Grid schema input labels mismatch" + assert k2.is_rand_equivalent( + grid_schema.invert(), etalon_grid_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Grid schema output labels mismatch" + + # swap aux_labels and unit positions to test unit_positions + grid_schema.aux_labels, grid_schema.unit_positions = grid_schema.unit_positions, grid_schema.aux_labels + etalon_grid_schema.aux_labels, etalon_grid_schema.unit_positions = ( + etalon_grid_schema.unit_positions, + etalon_grid_schema.aux_labels, + ) + assert k2.is_rand_equivalent( + grid_schema.invert(), etalon_grid_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Grid schema unit positions mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("connect_composed", [True, False]) + @pytest.mark.parametrize("blank_first", [True, False]) + def test_small_compose_transducer( + self, device, connect_composed, blank_first, rnnt_test_helper, rnn_loss_sample_data + ): + if blank_first: + sample_data = rnn_loss_sample_data.get_sample_small() + else: + sample_data = rnn_loss_sample_data.get_sample_small_blank_last() + graph_rnnt = GraphRnntLoss( + blank=sample_data.blank_id, connect_composed=connect_composed, use_grid_implementation=False + ) + graph_cost, graph_grads = rnnt_test_helper.wrap_and_call( + graph_rnnt, sample_data.logits, sample_data.targets, device + ) + assert np.allclose(graph_cost, sample_data.expected_cost.numpy(), rtol=EPS_SM_INPUT), "costs mismatch." + assert np.allclose(graph_grads, sample_data.expected_grads.numpy(), atol=1e-6), "gradient mismatch." + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + def test_small_grid_transducer(self, device, rnnt_test_helper, rnn_loss_sample_data): + sample_data = rnn_loss_sample_data.get_sample_small() + graph_rnnt = GraphRnntLoss(blank=0, use_grid_implementation=True) + graph_cost, graph_grads = rnnt_test_helper.wrap_and_call( + graph_rnnt, sample_data.logits, sample_data.targets, device + ) + assert np.allclose(graph_cost, sample_data.expected_cost.numpy(), rtol=EPS_SM_INPUT), "costs mismatch." + assert np.allclose(graph_grads, sample_data.expected_grads.numpy(), atol=1e-6), "gradient mismatch." + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + def test_medium_grid_transducer(self, device, rnnt_test_helper, rnn_loss_sample_data): + sample_data = rnn_loss_sample_data.get_sample_medium() + graph_rnnt = GraphRnntLoss(blank=0, use_grid_implementation=True) + graph_cost, graph_grads = rnnt_test_helper.wrap_and_call( + graph_rnnt, sample_data.logits, sample_data.targets, device + ) + assert np.allclose(graph_cost, sample_data.expected_cost.numpy(), rtol=EPS_SM_INPUT), "costs mismatch." + assert np.allclose(graph_grads, sample_data.expected_grads.numpy(), atol=1e-6), "gradient mismatch." + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + def test_medium_random_var_size(self, device, rnnt_test_helper, rnn_loss_sample_data): + sample_data = rnn_loss_sample_data.get_sample_medium_random_var_size(blank_first=True) + graph_rnnt = GraphRnntLoss(blank=0, use_grid_implementation=True) + graph_cost, graph_grads = rnnt_test_helper.wrap_and_call( + graph_rnnt, + sample_data.logits.detach(), + sample_data.targets, + device, + input_lengths=sample_data.input_lengths, + target_lengths=sample_data.target_lengths, + ) + etalon_rnnt = RNNTLoss_Numpy(blank=0) + etalon_cost, etalon_grads = rnnt_test_helper.wrap_and_call( + etalon_rnnt, + sample_data.logits.detach(), + sample_data.targets, + device, + input_lengths=sample_data.input_lengths, + target_lengths=sample_data.target_lengths, + ) + assert np.allclose(graph_cost.sum(), etalon_cost, rtol=EPS_SM_INPUT), "costs mismatch." + assert np.allclose(graph_grads, etalon_grads, atol=1e-4), "gradient mismatch." + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + def test_small_random_grid_compose_equivalent(self, device: torch.device, blank_first: bool, rnn_loss_sample_data): + sample_data = rnn_loss_sample_data.get_sample_small_random(blank_first, device=device) + criterion = GraphRnntLoss(blank=sample_data.blank_id, connect_composed=True, use_grid_implementation=False) + text_tensor = sample_data.targets[0] + num_frames = sample_data.logits.shape[1] + graph_grid = criterion.get_grid(text_tensor, num_frames, sample_data.vocab_size) + graph_composed = criterion.get_composed_lattice(text_tensor, num_frames, sample_data.vocab_size) + assert k2.is_rand_equivalent( + graph_grid, graph_composed, log_semiring=True, treat_epsilons_specially=False + ), "Grid and composed graphs are not equivalent." diff --git a/tests/collections/asr/k2/test_w_transducer.py b/tests/collections/asr/k2/test_w_transducer.py new file mode 100644 index 000000000000..cee469a89b7c --- /dev/null +++ b/tests/collections/asr/k2/test_w_transducer.py @@ -0,0 +1,260 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import numpy as np +import pytest +import torch + +try: + from nemo.collections.asr.parts.k2.w_transducer import GraphWTransducerLoss + from nemo.core.utils.k2_guard import k2 +except (ImportError, ModuleNotFoundError): + pytest.skip("k2 is not installed, skipping Graph-W-Transducer tests.", allow_module_level=True) + +DEVICES = ['cpu'] + +if torch.cuda.is_available() and k2.with_cuda: + DEVICES.append('cuda') + + +class TestGraphWTransducerLoss: + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + @pytest.mark.parametrize("num_frames", [1, 3, 6]) + @pytest.mark.parametrize("vocab_size", [3]) + @pytest.mark.parametrize("last_blank_mode", ["force_final", "allow_ignore"]) + def test_temporal_schema(self, device, blank_first, num_frames, vocab_size, last_blank_mode): + blank_id = 0 if blank_first else vocab_size - 1 + loss = GraphWTransducerLoss(blank=blank_id, last_blank_mode=last_blank_mode) + temporal_schema = loss.get_temporal_schema( + num_frames=num_frames, vocab_size=vocab_size, device=torch.device(device) + ) + + etalon_schema_fst: List[List[int]] = [] + for time_i in range(num_frames): + for label_i in range(vocab_size): + if label_i == blank_id: + # transition to the next state + etalon_schema_fst.append([time_i, time_i + 1, label_i, time_i, 0]) + else: + # self-loop + etalon_schema_fst.append([time_i, time_i, label_i, time_i, 0]) + + # eps transitions from the first state + eps_from_first_state = vocab_size + for time_i in range(1, num_frames): + etalon_schema_fst.append([0, time_i, eps_from_first_state, 0, 0]) + + # eps transitions to the last state + eps_to_last_state = vocab_size + 1 + last_state_eps = num_frames - 1 if last_blank_mode == "force_final" else num_frames + for time_i in range(0, num_frames - 1): + etalon_schema_fst.append([time_i, last_state_eps, eps_to_last_state, time_i, 0]) + + # transition to the final state + etalon_schema_fst.append([num_frames, num_frames + 1, -1, -1, 0]) + # final state + etalon_schema_fst.append([num_frames + 1]) + + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_temporal_schema = k2.Fsa.from_str(etalon_schema_fst_str, num_aux_labels=1) + + assert temporal_schema.num_arcs == etalon_temporal_schema.num_arcs + assert temporal_schema.shape == etalon_temporal_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + temporal_schema, etalon_temporal_schema, log_semiring=True, treat_epsilons_specially=False + ), "Temporal schema mismatch" + assert k2.is_rand_equivalent( + temporal_schema.invert(), + etalon_temporal_schema.invert(), + log_semiring=False, + treat_epsilons_specially=False, + ), "Temporal schema output labels mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + def test_unit_schema(self, device, blank_first): + vocab_size = 3 + blank_id = 0 if blank_first else vocab_size - 1 + if blank_first: + labels = [1, 1, 2, 1] + else: + labels = [1, 1, 0, 1] + loss = GraphWTransducerLoss(blank=blank_id) + unit_schema = loss.get_unit_schema( + units_tensor=torch.tensor(labels, device=torch.device(device)), vocab_size=vocab_size + ) + + etalon_schema_fst: List[List[int]] = [] + for label_i, label in enumerate(labels): + etalon_schema_fst.append([label_i, label_i + 1, label, label, label_i, 0]) # forward: label + etalon_schema_fst.append([label_i, label_i, blank_id, blank_id, label_i, 0]) # self-loop: blank + etalon_schema_fst.append([len(labels), len(labels), blank_id, blank_id, len(labels), 0]) + # eps-transitions + etalon_schema_fst.append([0, 0, vocab_size, vocab_size, 0, 0]) + etalon_schema_fst.append([len(labels), len(labels), vocab_size + 1, vocab_size + 1, len(labels), 0]) + + etalon_schema_fst.append([len(labels), len(labels) + 1, -1, -1, -1, 0]) # transition to final state + etalon_schema_fst.append([len(labels) + 1]) # final state + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_unit_schema = k2.Fsa.from_str(etalon_schema_fst_str, aux_label_names=["aux_labels", "unit_positions"]) + + assert unit_schema.num_arcs == etalon_unit_schema.num_arcs + assert unit_schema.shape == etalon_unit_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + unit_schema, etalon_unit_schema, log_semiring=True, treat_epsilons_specially=False + ), "Unit schema input labels mismatch" + assert k2.is_rand_equivalent( + unit_schema.invert(), etalon_unit_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Unit schema output labels mismatch" + + # swap aux_labels and unit positions to test unit_positions + unit_schema.aux_labels, unit_schema.unit_positions = unit_schema.unit_positions, unit_schema.aux_labels + etalon_unit_schema.aux_labels, etalon_unit_schema.unit_positions = ( + etalon_unit_schema.unit_positions, + etalon_unit_schema.aux_labels, + ) + assert k2.is_rand_equivalent( + unit_schema.invert(), etalon_unit_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Unit schema unit positions mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + @pytest.mark.parametrize("last_blank_mode", ["force_final", "allow_ignore"]) + def test_grid_schema(self, device, blank_first, last_blank_mode): + vocab_size = 3 + blank_id = 0 if blank_first else vocab_size - 1 + if blank_first: + labels = [1, 1, 2, 1] + else: + labels = [1, 1, 0, 1] + text_length = len(labels) + num_frames = 5 + loss = GraphWTransducerLoss(blank=blank_id, last_blank_mode=last_blank_mode) + grid_schema = loss.get_grid( + units_tensor=torch.tensor(labels, device=torch.device(device)), + num_frames=num_frames, + vocab_size=vocab_size, + ) + + etalon_schema_fst: List[List[int]] = [] + for frame_i in range(num_frames): + for label_i in range(text_length + 1): + state = frame_i * (text_length + 1) + label_i + if label_i < text_length: + next_state_label = state + 1 + # next unit + etalon_schema_fst.append([state, next_state_label, labels[label_i], frame_i, label_i, 0]) + if frame_i < num_frames - 1: + next_state_frame = (frame_i + 1) * (text_length + 1) + label_i + # next time frame (blank) + etalon_schema_fst.append([state, next_state_frame, blank_id, frame_i, label_i, 0]) + + # start eps-transition + for frame_i in range(1, num_frames): + etalon_schema_fst.append([0, frame_i * (text_length + 1), vocab_size, 0, 0, 0]) + + last_grid_state = num_frames * (text_length + 1) - 1 + + # end eps-transitions + if last_blank_mode == "force_final": + last_eps_state = last_grid_state + else: + assert last_blank_mode == "allow_ignore" + last_eps_state = last_grid_state + 1 + + for frame_i in range(num_frames - 1): + etalon_schema_fst.append( + [(frame_i + 1) * (text_length + 1) - 1, last_eps_state, vocab_size + 1, frame_i, text_length, 0] + ) + + etalon_schema_fst.append([last_grid_state, last_grid_state + 1, blank_id, num_frames - 1, text_length, 0]) + etalon_schema_fst.append( + [last_grid_state + 1, last_grid_state + 2, -1, -1, -1, 0] + ) # transition to final state + etalon_schema_fst.append([last_grid_state + 2]) # final state + etalon_schema_fst = sorted(etalon_schema_fst) # required for k2.Fsa.from_str + etalon_schema_fst_str = "\n".join([" ".join(map(str, line)) for line in etalon_schema_fst]) + etalon_grid_schema = k2.Fsa.from_str(etalon_schema_fst_str, aux_label_names=["aux_labels", "unit_positions"]) + + assert grid_schema.num_arcs == etalon_grid_schema.num_arcs + assert grid_schema.shape == etalon_grid_schema.shape # (num_states, None) + assert k2.is_rand_equivalent( + grid_schema, etalon_grid_schema, log_semiring=True, treat_epsilons_specially=False + ), "Grid schema input labels mismatch" + assert k2.is_rand_equivalent( + grid_schema.invert(), etalon_grid_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Grid schema output labels mismatch" + + # swap aux_labels and unit positions to test unit_positions + grid_schema.aux_labels, grid_schema.unit_positions = grid_schema.unit_positions, grid_schema.aux_labels + etalon_grid_schema.aux_labels, etalon_grid_schema.unit_positions = ( + etalon_grid_schema.unit_positions, + etalon_grid_schema.aux_labels, + ) + assert k2.is_rand_equivalent( + grid_schema.invert(), etalon_grid_schema.invert(), log_semiring=True, treat_epsilons_specially=False + ), "Grid schema unit positions mismatch" + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("blank_first", [True, False]) + @pytest.mark.parametrize("last_blank_mode", ["allow_ignore", "force_final"]) + def test_small_random_grid_compose_equivalent( + self, device: torch.device, blank_first: bool, last_blank_mode, rnn_loss_sample_data + ): + sample_data = rnn_loss_sample_data.get_sample_small_random(blank_first, device=device) + criterion = GraphWTransducerLoss( + blank=sample_data.blank_id, + last_blank_mode=last_blank_mode, + connect_composed=True, + use_grid_implementation=False, + ) + text_tensor = sample_data.targets[0] + num_frames = sample_data.logits.shape[1] + graph_grid = criterion.get_grid(text_tensor, num_frames, sample_data.vocab_size) + graph_composed = criterion.get_composed_lattice(text_tensor, num_frames, sample_data.vocab_size) + assert k2.is_rand_equivalent( + graph_grid, graph_composed, log_semiring=True, treat_epsilons_specially=False + ), "Grid and composed graphs are not equivalent." + + @pytest.mark.unit + @pytest.mark.parametrize("device", DEVICES) + @pytest.mark.parametrize("last_blank_mode", ["allow_ignore", "force_final"]) + @pytest.mark.parametrize("use_grid_implementation", [True, False]) + def test_small_grid_transducer_inf_penalty( + self, device, last_blank_mode, use_grid_implementation, rnnt_test_helper, rnn_loss_sample_data + ): + """ + With -inf eps penalty W-Transducer loss should be equivalent to RNN-T loss. + """ + sample_data = rnn_loss_sample_data.get_sample_small() + graph_rnnt = GraphWTransducerLoss( + blank=0, + eps_weight=-100.0, + last_blank_mode=last_blank_mode, + use_grid_implementation=use_grid_implementation, + ) + graph_cost, graph_grads = rnnt_test_helper.wrap_and_call( + graph_rnnt, sample_data.logits, sample_data.targets, device + ) + assert np.allclose(graph_cost, sample_data.expected_cost.numpy(), rtol=1e-6), "costs mismatch." + assert np.allclose(graph_grads, sample_data.expected_grads.numpy(), atol=1e-6), "gradient mismatch." diff --git a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py index 7764649bf1fa..1a29a14f540d 100644 --- a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py +++ b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py @@ -18,9 +18,13 @@ import pytest import torch -from nemo.collections.asr.losses.rnnt import MultiblankRNNTLossPytorch, RNNTLossPytorch +from nemo.collections.asr.losses.rnnt import MultiblankRNNTLossPytorch, RNNTLossPytorch, TDTLossPytorch from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_numpy import RNNTLoss as RNNTLoss_Numpy -from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import MultiblankRNNTLossNumba, RNNTLossNumba +from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import ( + MultiblankRNNTLossNumba, + RNNTLossNumba, + TDTLossNumba, +) from nemo.core.utils import numba_utils from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ @@ -30,9 +34,14 @@ DEVICES.append('cuda') +DTYPES = [np.float32] +if numba_utils.is_numba_cuda_fp16_supported(): + DTYPES.append(np.float16) + + def wrap_and_call(fn, acts, labels, device): if not torch.is_tensor(acts): - acts = torch.FloatTensor(acts) + acts = torch.tensor(acts) if 'cuda' in device: acts = acts.cuda() @@ -68,7 +77,8 @@ def wrap_and_call(fn, acts, labels, device): class TestRNNTLossPytorch: @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) - def test_case_small(self, device): + @pytest.mark.parametrize('dtype', DTYPES) + def test_case_small(self, device, dtype): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) @@ -79,9 +89,13 @@ def test_case_small(self, device): [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], ] ] - ) + ).astype(dtype) labels = [[1, 2]] + cost_threshold = 1e-8 if dtype == np.float32 else 5e-4 + grad_threshold = 1e-8 if dtype == np.float32 else 1e-4 + rtol = 1e-5 if dtype == np.float32 else 1e-3 + fn_pt = RNNTLossNumba(blank=0, reduction='sum') pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) @@ -109,23 +123,28 @@ def test_case_small(self, device): ] ) - assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_grads, expected_grads), "small_test gradient mismatch." + assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=1e-6), "small_test costs mismatch." + assert np.allclose(pt_grads, expected_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch." - assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch." + assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch." - assert np.allclose(ag_cost, np_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(ag_grads, np_grads), "small_test gradient mismatch." + assert np.allclose(ag_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(ag_grads, np_grads, atol=cost_threshold, rtol=rtol), "small_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) - def test_case_small_random(self, device): + @pytest.mark.parametrize('dtype', DTYPES) + def test_case_small_random(self, device, dtype): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + cost_threshold = 1e-8 if dtype == np.float32 else 5e-4 + grad_threshold = 1e-8 if dtype == np.float32 else 1e-4 + rtol = 1e-5 if dtype == np.float32 else 1e-3 + rng = np.random.RandomState(0) - acts = rng.randn(1, 4, 3, 3) + acts = rng.randn(1, 4, 3, 3).astype(dtype) labels = [[1, 2]] fn_pt = RNNTLossNumba(blank=0, reduction='sum') @@ -137,16 +156,17 @@ def test_case_small_random(self, device): fn_ag = RNNTLossPytorch(blank=0, reduction='sum') # ag for automatic gradient computation ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device) - assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_random_test costs mismatch." - assert np.allclose(pt_grads, np_grads), "small_random_test gradient mismatch." + assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_random_test costs mismatch." + assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_random_test gradient mismatch." - assert np.allclose(pt_cost, ag_cost, rtol=1e-6), "small_random_test costs mismatch." - assert np.allclose(pt_grads, ag_grads), "small_random_test gradient mismatch." + assert np.allclose(pt_cost, ag_cost, atol=cost_threshold, rtol=rtol), "small_random_test costs mismatch." + assert np.allclose(pt_grads, ag_grads, atol=grad_threshold, rtol=rtol), "small_random_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) + @pytest.mark.parametrize('dtype', DTYPES) @pytest.mark.parametrize('fastemit_lambda', [1.0, 0.01, 0.00001]) - def test_case_small_random_fastemit_reg(self, device, fastemit_lambda): + def test_case_small_random_fastemit_reg(self, device, dtype, fastemit_lambda): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) @@ -161,11 +181,12 @@ def test_case_small_random_fastemit_reg(self, device, fastemit_lambda): np_cost, np_grads = wrap_and_call(fn_np, acts, labels, device) assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_random_test costs mismatch." - assert np.allclose(pt_grads, np_grads, atol=1e-5, rtol=1e-5), "small_random_test gradient mismatch." + assert np.allclose(pt_grads, np_grads, rtol=1e-5), "small_random_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) - def test_case_big_tensor(self, device): + @pytest.mark.parametrize('dtype', DTYPES) + def test_case_big_tensor(self, device, dtype): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) @@ -265,9 +286,13 @@ def test_case_big_tensor(self, device): ], ] - activations = np.array(activations) + activations = np.array(activations).astype(dtype) labels = [[1, 2], [1, 1]] + cost_threshold = 1e-8 if dtype == np.float32 else 5e-4 + grad_threshold = 1e-8 if dtype == np.float32 else 1e-4 + rtol = 1e-3 if dtype == np.float32 else 0.1 + fn_pt = RNNTLossNumba(blank=0, reduction='sum') pt_costs, pt_grads = wrap_and_call(fn_pt, activations, labels, device) @@ -277,23 +302,30 @@ def test_case_big_tensor(self, device): fn_ag = RNNTLossPytorch(blank=0, reduction='sum') ag_costs, ag_grads = wrap_and_call(fn_ag, activations, labels, device) - assert np.allclose(pt_costs, sum(expected_costs)), "big_test average costs mismatch." - assert np.allclose(pt_grads, expected_grads, rtol=1e-3), "big_test grads for average cost mismatch." + assert np.allclose(pt_costs, sum(expected_costs), atol=cost_threshold), "big_test average costs mismatch." + assert np.allclose( + pt_grads, expected_grads, atol=grad_threshold, rtol=1e-3 + ), "big_test grads for average cost mismatch." - assert np.allclose(pt_costs, np_costs), "big_test average costs mismatch." - assert np.allclose(pt_grads, np_grads, rtol=1e-3), "big_test grads for average cost mismatch." + assert np.allclose(pt_costs, np_costs, atol=cost_threshold, rtol=rtol), "big_test average costs mismatch." + assert np.allclose( + pt_grads, np_grads, atol=grad_threshold, rtol=rtol + ), "big_test grads for average cost mismatch." - assert np.allclose(pt_costs, ag_costs), "big_test average costs mismatch." - assert np.allclose(pt_grads, ag_grads, rtol=1e-3), "big_test grads for average cost mismatch." + assert np.allclose(pt_costs, ag_costs, atol=cost_threshold, rtol=rtol), "big_test average costs mismatch." + assert np.allclose( + pt_grads, ag_grads, atol=grad_threshold, rtol=rtol + ), "big_test grads for average cost mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) - def test_case_large_random(self, device): + @pytest.mark.parametrize('dtype', DTYPES) + def test_case_large_random(self, device, dtype): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) rng = np.random.RandomState(0) - acts = rng.randn(4, 8, 11, 5) + acts = rng.randn(4, 8, 11, 5).astype(dtype) labels = [ [1, 2, 4, 3, 2, 2, 1, 1, 1, 1], [3, 2, 2, 3, 4, 1, 1, 1, 1, 1], @@ -301,6 +333,10 @@ def test_case_large_random(self, device): [1, 1, 2, 1, 2, 3, 3, 1, 1, 1], ] + cost_threshold = 1e-8 if dtype == np.float32 else 5e-4 + grad_threshold = 1e-8 if dtype == np.float32 else 1e-4 + rtol = 1e-3 if dtype == np.float32 else 5e-2 + fn_pt = RNNTLossNumba(blank=0, reduction='sum') pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) @@ -310,14 +346,15 @@ def test_case_large_random(self, device): fn_ag = RNNTLossPytorch(blank=0, reduction='sum') ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device) - assert np.allclose(pt_cost, np_cost, atol=1e-5, rtol=1e-3), "large_random_test costs mismatch." - assert np.allclose(ag_cost, np_cost, atol=1e-5, rtol=1e-3), "large_random_test costs mismatch." - assert np.allclose(pt_grads, np_grads, atol=1e-5, rtol=1e-3), "large_random_test gradient mismatch." - assert np.allclose(ag_grads, np_grads, atol=1e-5, rtol=1e-3), "large_random_test gradient mismatch." + assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "large_random_test costs mismatch." + assert np.allclose(ag_cost, np_cost, atol=cost_threshold, rtol=rtol), "large_random_test costs mismatch." + assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "large_random_test gradient mismatch." + assert np.allclose(ag_grads, np_grads, atol=grad_threshold, rtol=rtol), "large_random_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) - def test_case_small_clamp(self, device): + @pytest.mark.parametrize('dtype', DTYPES) + def test_case_small_clamp(self, device, dtype): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) @@ -329,9 +366,13 @@ def test_case_small_clamp(self, device): [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], ] ] - ) + ).astype(dtype) labels = [[1, 2]] + cost_threshold = 1e-8 if dtype == np.float32 else 5e-4 + grad_threshold = 1e-8 if dtype == np.float32 else 5e-5 + rtol = 1e-5 if dtype == np.float32 else 1e-3 + fn_pt = RNNTLossNumba(blank=0, reduction='sum', clamp=GRAD_CLAMP) pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) @@ -356,16 +397,17 @@ def test_case_small_clamp(self, device): ] ) - assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_grads, expected_grads), "small_test gradient mismatch." + assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(pt_grads, expected_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch." - assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch." + assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) + @pytest.mark.parametrize('dtype', DTYPES) @pytest.mark.parametrize('fastemit_lambda', [1.0, 0.01, 0.00001]) - def test_case_small_fastemit_clamp(self, device, fastemit_lambda): + def test_case_small_fastemit_clamp(self, device, dtype, fastemit_lambda): if device == 'cuda': numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) @@ -377,9 +419,13 @@ def test_case_small_fastemit_clamp(self, device, fastemit_lambda): [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], ] ] - ) + ).astype(dtype) labels = [[1, 2]] + cost_threshold = 1e-8 if dtype == np.float32 else 1e-3 + grad_threshold = 1e-8 if dtype == np.float32 else 5e-4 + rtol = 1e-5 if dtype == np.float32 else 1e-3 + fn_pt = RNNTLossNumba(blank=0, reduction='sum', fastemit_lambda=fastemit_lambda, clamp=GRAD_CLAMP) pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) @@ -389,9 +435,9 @@ def test_case_small_fastemit_clamp(self, device, fastemit_lambda): expected_cost = 4.495666 expected_cost += expected_cost * fastemit_lambda - assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch." - assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch." + assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch." + assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch." @pytest.mark.unit @pytest.mark.parametrize('device', DEVICES) @@ -494,5 +540,68 @@ def test_case_randomized_act_label(self, device): assert np.allclose(pt_grads, ag_grads, rtol=1e-2), "multi-blank gradient mismatch." +class TestTDTLoss: + @pytest.mark.unit + @pytest.mark.parametrize('device', DEVICES) + def test_case_randomized_act_label(self, device): + if device == 'cuda': + numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + + B, T, U, V = 4, 8, 4, 8 # here V is number of non blank labels + durations = [0, 1, 2, 3, 4, 5] + sigma = 0.05 + + acts = torch.rand([B, T, U, V + 1 + len(durations)]) + labels = [[random.randrange(0, V) for i in range(U - 1)] for j in range(B)] + + fn_pt = TDTLossNumba(blank=V, reduction='sum', durations=durations, sigma=sigma) + pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) + + fn_ag = TDTLossPytorch( + blank=V, reduction='sum', durations=durations, sigma=sigma + ) # ag for automatic gradient computation + ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device) + + assert np.allclose(pt_cost, ag_cost, rtol=1e-6), "tdt costs mismatch." + assert np.allclose(pt_grads, ag_grads, rtol=1e-2), "td gradient mismatch." + + @pytest.mark.unit + @pytest.mark.parametrize('device', DEVICES) + def test_case_fixed_case_act_label(self, device): + if device == 'cuda': + numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + + B, T, U, V = 1, 3, 2, 3 # here V is number of non blank labels + durations = [0, 1, 2] + sigma = 0.05 + + acts = torch.zeros([B, T, U, V + 1 + len(durations)]) + labels = [[(i + j) % (V - 1) for i in range(U - 1)] for j in range(B)] + + fn_pt = TDTLossNumba(blank=V, reduction='sum', durations=durations, sigma=sigma) + pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device) + + expected_cost = 4.155739 + expected_grads = [ + [ + [ + [-0.64962804, 0.25, 0.25, 0.14962798, 0.2672583, -0.16792619, -0.09933221], + [0.01651875, 0.01651875, 0.01651875, -0.04955626, 0.022025, -0.01227201, -0.009753], + ], + [ + [-0.04892651, 0.01714851, 0.01714851, 0.01462949, -0.01143234, -0.01143234, 0.02286467], + [0.12531489, 0.12531489, 0.12531489, -0.37594467, 0.16708651, 0.13027048, -0.29735702], + ], + [ + [-0.02572276, 0.00857425, 0.00857425, 0.00857425, -0.02286468, 0.01143234, 0.01143234], + [0.13388914, 0.13388914, 0.13388914, -0.40166742, 0.17851885, -0.35703772, 0.17851885], + ], + ] + ] + + assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "tdt costs mismatch." + assert np.allclose(pt_grads, expected_grads, rtol=1e-2), "td gradient mismatch." + + if __name__ == "__main__": pytest.main([__file__]) diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py index acab5963fa72..cb5a9816e237 100644 --- a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py +++ b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py @@ -17,6 +17,7 @@ import torch from numba import cuda +from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, TDTLossPytorch from nemo.collections.asr.parts.numba.rnnt_loss import rnnt_numpy from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import certify_inputs from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import gpu_rnnt_kernel, reduce @@ -24,8 +25,14 @@ from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ +DTYPES = [torch.float32] +if numba_utils.is_numba_cuda_fp16_supported(): + DTYPES.append(torch.float16) + + def log_softmax(x, axis=-1): x = torch.from_numpy(x) # zero-copy + x = x.float() x = torch.log_softmax(x, dim=axis) x = x.numpy() return x @@ -41,12 +48,14 @@ def log_softmax_grad(x, axis=-1): class TestRNNTCUDAKernels: @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_compute_alphas_kernel(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_compute_alphas_kernel(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) random = np.random.RandomState(0) original_shape = [1, 5, 11, 3] B, T, U, V = original_shape + threshold = 1e-5 if dtype == torch.float32 else 3e-4 # Numpy kernel x = random.randn(*original_shape) @@ -66,7 +75,7 @@ def test_compute_alphas_kernel(self): else: stream = cuda.default_stream() - x_c = torch.tensor(x, device=device, dtype=torch.float32) + x_c = torch.tensor(x, device=device, dtype=dtype) labels_c = torch.tensor(labels, device=device, dtype=torch.int64) # Allocate workspace memory @@ -99,22 +108,24 @@ def test_compute_alphas_kernel(self): alphas = alphas.view([B, T, U]) diff = ground_alphas - alphas[0].cpu().numpy() - assert np.abs(diff).mean() <= 1e-5 - assert np.square(diff).mean() <= 1e-10 + assert np.abs(diff).mean() <= threshold + assert np.square(diff).mean() <= (threshold ** 2) ll_diff = ground_log_likelihood - llForward[0].cpu().numpy() - assert np.abs(ll_diff).mean() <= 1e-5 - assert np.square(ll_diff).mean() <= 1e-10 + assert np.abs(ll_diff).mean() <= threshold + assert np.square(ll_diff).mean() <= (threshold ** 2) @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_compute_betas_kernel(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_compute_betas_kernel(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) random = np.random.RandomState(0) original_shape = [1, 5, 11, 3] B, T, U, V = original_shape + threshold = 1e-5 if dtype == torch.float32 else 3e-4 # Numpy kernel x = random.randn(*original_shape) @@ -134,7 +145,7 @@ def test_compute_betas_kernel(self): else: stream = cuda.default_stream() - x_c = torch.tensor(x, device=device, dtype=torch.float32) + x_c = torch.tensor(x, device=device, dtype=dtype) labels_c = torch.tensor(labels, device=device, dtype=torch.int64) # Allocate workspace memory @@ -167,17 +178,18 @@ def test_compute_betas_kernel(self): betas = betas.view([B, T, U]) diff = ground_alphas - betas[0].cpu().numpy() - assert np.abs(diff).mean() <= 1e-5 - assert np.square(diff).mean() <= 1e-10 + assert np.abs(diff).mean() <= threshold + assert np.square(diff).mean() <= (threshold ** 2) ll_diff = ground_log_likelihood - llBackward[0].cpu().numpy() - assert np.abs(ll_diff).mean() <= 1e-5 - assert np.square(ll_diff).mean() <= 1e-10 + assert np.abs(ll_diff).mean() <= threshold + assert np.square(ll_diff).mean() <= (threshold ** 2) @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_compute_grads_kernel(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_compute_grads_kernel(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) fastemit_lambda = 0.0 @@ -186,6 +198,7 @@ def test_compute_grads_kernel(self): random = np.random.RandomState(0) original_shape = [1, 5, 11, 3] B, T, U, V = original_shape + threshold = 1e-5 if dtype == torch.float32 else 3e-5 # Numpy kernel x = random.randn(*original_shape) @@ -219,7 +232,7 @@ def test_compute_grads_kernel(self): else: stream = cuda.default_stream() - x_c = torch.tensor(x, device=device, dtype=torch.float32) + x_c = torch.tensor(x, device=device, dtype=dtype) labels_c = labels.clone().to(device=device, dtype=torch.int64) # Allocate workspace memory @@ -282,12 +295,13 @@ def test_compute_grads_kernel(self): grads = grads.view([B, T, U, V]) diff = true_grads - grads[0].cpu().numpy() - assert np.abs(diff).mean() <= 1e-5 - assert np.square(diff).mean() <= 1e-10 + assert np.abs(diff).mean() <= threshold + assert np.square(diff).mean() <= (threshold ** 2) * 5.0 @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_compute_grads_kernel_fastemit(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_compute_grads_kernel_fastemit(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) fastemit_lambda = 0.001 @@ -296,6 +310,7 @@ def test_compute_grads_kernel_fastemit(self): random = np.random.RandomState(0) original_shape = [1, 5, 11, 3] B, T, U, V = original_shape + threshold = 1e-5 if dtype == torch.float32 else 3e-5 # Numpy kernel x = random.randn(*original_shape) @@ -329,7 +344,7 @@ def test_compute_grads_kernel_fastemit(self): else: stream = cuda.default_stream() - x_c = torch.tensor(x, device=device, dtype=torch.float32) + x_c = torch.tensor(x, device=device, dtype=dtype) labels_c = labels.clone().to(device=device, dtype=torch.int64) # Allocate workspace memory @@ -392,12 +407,13 @@ def test_compute_grads_kernel_fastemit(self): grads = grads.view([B, T, U, V]) diff = true_grads - grads[0].cpu().numpy() - assert np.abs(diff).mean() <= 1e-5 - assert np.square(diff).mean() <= 1e-10 + assert np.abs(diff).mean() <= threshold + assert np.square(diff).mean() <= (threshold ** 2) * 5 @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_compute_grads_kernel_clamp(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_compute_grads_kernel_clamp(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) fastemit_lambda = 0.0 @@ -406,6 +422,7 @@ def test_compute_grads_kernel_clamp(self): random = np.random.RandomState(0) original_shape = [1, 5, 11, 3] B, T, U, V = original_shape + threshold = 1e-5 if dtype == torch.float32 else 3e-5 # Numpy kernel x = random.randn(*original_shape) @@ -439,7 +456,7 @@ def test_compute_grads_kernel_clamp(self): else: stream = cuda.default_stream() - x_c = torch.tensor(x, device=device, dtype=torch.float32) + x_c = torch.tensor(x, device=device, dtype=dtype) labels_c = labels.clone().to(device=device, dtype=torch.int64) # Allocate workspace memory @@ -502,5 +519,191 @@ def test_compute_grads_kernel_clamp(self): grads = grads.view([B, T, U, V]) diff = true_grads - grads[0].cpu().numpy() - assert np.abs(diff).mean() <= 1e-5 - assert np.square(diff).mean() <= 1e-10 + assert np.abs(diff).mean() <= threshold + assert np.square(diff).mean() <= (threshold ** 2) * 5 + + +class TestTDTCUDAKernels: + @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") + @pytest.mark.unit + def test_compute_alphas_kernel(self): + numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + + random = np.random.RandomState(0) + original_shape = [1, 15, 11, 3] + durations = [0, 1, 2] + B, T, U, V = original_shape + Vd = len(durations) + + duration_act_shape = [B, T, U, Vd] + sigma = 0.05 + + # for passing into the kernel function -- it expected unnormalized logits + x = random.randn(*original_shape) + # for passing into the pytorch function -- it expected normalized logits + normalized_x = log_softmax(x, axis=-1) - 0.05 + + xd = random.randn(*duration_act_shape) + # duration logits are normalized before passing into the loss computation. + xd = log_softmax(xd, axis=-1) + + labels = np.array([[1, 1, 1, 1, 0, 0, 1, 0, 0, 1]]) # [1, 10] + blank_idx = V - 1 + + pytorch_tdt_loss = TDTLossPytorch(blank_idx, durations, sigma=sigma) + + # Pytorch kernel + device = torch.device('cuda') + if hasattr(cuda, 'external_stream'): + stream = cuda.external_stream(torch.cuda.current_stream(device).cuda_stream) + else: + stream = cuda.default_stream() + + x = torch.tensor(x, device=device, dtype=torch.float32) + normalized_x = torch.tensor(normalized_x, device=device, dtype=torch.float32) + xd = torch.tensor(xd, device=device, dtype=torch.float32) + labels = torch.tensor(labels, device=device, dtype=torch.long) + durations = torch.tensor(durations, device=device, dtype=torch.long) + + # Allocate workspace memory + denom = torch.zeros(B * T * U, device=device, dtype=x.dtype) + alphas = torch.zeros(B * T * U, device=device, dtype=x.dtype) + llForward = torch.zeros(B, device=device, dtype=x.dtype) + input_lengths = torch.tensor([T], dtype=torch.long, device=device) + label_lengths = torch.tensor([U - 1], dtype=torch.long, device=device) + + ground_log_likelihood, ground_alphas = pytorch_tdt_loss.compute_forward_prob( + normalized_x, xd, labels, input_lengths, label_lengths + ) + + # certify input data + certify_inputs(x, labels, input_lengths, label_lengths) + + # flatten activation tensor (for pointer based indexing) + x = x.view([-1]) + xd = xd.view([-1]) + + # call kernel + # log softmax reduction + reduce.reduce_max(x, denom, rows=V, cols=B * T * U, minus=False, stream=stream) + reduce.reduce_exp(x, denom, rows=V, cols=B * T * U, minus=True, stream=stream) + + # alpha kernel + gpu_rnnt_kernel.compute_tdt_alphas_kernel[B, U, stream, 0]( + x, + xd, + denom, + sigma, + alphas, + llForward, + input_lengths, + label_lengths, + labels, + B, + T, + U, + V, + blank_idx, + durations, + Vd, + ) + + # sync kernel + stream.synchronize() + + # reshape alphas + alphas = alphas.view([B, T, U]) + diff = torch.norm(ground_alphas - alphas) + ll_diff = torch.norm(ground_log_likelihood - llForward) + + assert diff <= 1e-3 + assert ll_diff <= 1e-3 + + +class TestMultiblankRNNTCUDAKernels: + @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") + @pytest.mark.unit + def test_compute_alphas_kernel(self): + numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + + random = np.random.RandomState(0) + original_shape = [1, 15, 11, 6] + big_blank_durations = [2, 3, 4] + B, T, U, V = original_shape + num_big_blanks = len(big_blank_durations) + + sigma = 0.05 + + # for passing into the kernel function -- it expected unnormalized logits + x = random.randn(*original_shape) + # for passing into the pytorch function -- it expected normalized logits + normalized_x = log_softmax(x, axis=-1) - sigma + + labels = np.array([[1, 1, 1, 1, 0, 0, 1, 0, 0, 1]]) # [1, 10] + blank_idx = V - 1 + + pytorch_multiblank_loss = MultiblankRNNTLossPytorch(blank_idx, big_blank_durations, sigma=sigma) + + # Pytorch kernel + device = torch.device('cuda') + if hasattr(cuda, 'external_stream'): + stream = cuda.external_stream(torch.cuda.current_stream(device).cuda_stream) + else: + stream = cuda.default_stream() + + x = torch.tensor(x, device=device, dtype=torch.float32) + normalized_x = torch.tensor(normalized_x, device=device, dtype=torch.float32) + labels = torch.tensor(labels, device=device, dtype=torch.long) + big_blank_durations = torch.tensor(big_blank_durations, device=device, dtype=torch.long) + + # Allocate workspace memory + denom = torch.zeros(B * T * U, device=device, dtype=x.dtype) + alphas = torch.zeros(B * T * U, device=device, dtype=x.dtype) + llForward = torch.zeros(B, device=device, dtype=x.dtype) + input_lengths = torch.tensor([T], dtype=torch.long, device=device) + label_lengths = torch.tensor([U - 1], dtype=torch.long, device=device) + + ground_log_likelihood, ground_alphas = pytorch_multiblank_loss.compute_forward_prob( + normalized_x, labels, input_lengths, label_lengths + ) + + # certify input data + certify_inputs(x, labels, input_lengths, label_lengths) + + # flatten activation tensor (for pointer based indexing) + x = x.view([-1]) + + # call kernel + # log softmax reduction + reduce.reduce_max(x, denom, rows=V, cols=B * T * U, minus=False, stream=stream) + reduce.reduce_exp(x, denom, rows=V, cols=B * T * U, minus=True, stream=stream) + + # alpha kernel + gpu_rnnt_kernel.compute_multiblank_alphas_kernel[B, U, stream, 0]( + x, + denom, + sigma, + alphas, + llForward, + input_lengths, + label_lengths, + labels, + B, + T, + U, + V, + blank_idx, + big_blank_durations, + num_big_blanks, + ) + + # sync kernel + stream.synchronize() + + # reshape alphas + alphas = alphas.view([B, T, U]) + diff = torch.norm(ground_alphas - alphas) + ll_diff = torch.norm(ground_log_likelihood - llForward) + + assert diff <= 1e-3 + assert ll_diff <= 1e-3 diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py b/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py index 7c2ba6a41208..5994d53e1d8f 100644 --- a/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py +++ b/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py @@ -20,17 +20,22 @@ from nemo.core.utils import numba_utils from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ +DTYPES = [np.float32] +if numba_utils.is_numba_cuda_fp16_supported(): + DTYPES.append(np.float16) + class TestRNNTCUDAReductions: @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_reduce_max(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_reduce_max(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) random = np.random.RandomState(0) original_shape = [1, 5, 4, 3] - x = random.randn(*original_shape).reshape([-1]) - dx = random.randn(*x.shape) + x = random.randn(*original_shape).reshape([-1]).astype(dtype) + dx = random.randn(*x.shape).astype(dtype) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -53,13 +58,14 @@ def test_reduce_max(self): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available") @pytest.mark.unit - def test_reduce_exp(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_reduce_exp(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) random = np.random.RandomState(0) original_shape = [1, 5, 4, 2] - x = random.randn(*original_shape).reshape([-1]) - dx = np.zeros_like(x) + x = random.randn(*original_shape).reshape([-1]).astype(dtype) + dx = np.zeros_like(x).astype(dtype) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py b/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py index 243fe727e172..08f12da8324d 100644 --- a/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py +++ b/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py @@ -20,11 +20,16 @@ from nemo.core.utils import numba_utils from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ +DTYPES = [np.float32] +if numba_utils.is_numba_cuda_fp16_supported(): + DTYPES.append(np.float16) + class TestRNNTHelper: @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_log_sum_exp(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_log_sum_exp(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -34,8 +39,9 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.log_sum_exp(x[x_pos], y[x_pos]) - x = np.zeros([8]) # np.random.rand(8192) - y = np.ones([8]) # np.random.rand(8192) + x = np.zeros([8]).astype(dtype) # np.random.rand(8192) + y = np.ones([8]).astype(dtype) # np.random.rand(8192) + threshold = 1e-5 if dtype == np.float32 else 2e-3 stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -52,11 +58,12 @@ def _kernel(x, y): x_new = x_c.copy_to_host(stream=stream) del x_c, y_c - assert (x_new.sum() - 10.506093500145782) <= 1e-5 + assert (x_new.sum() - 10.506093500145782) <= threshold @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_log_sum_exp_neg_inf(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_log_sum_exp_neg_inf(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -66,8 +73,8 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.log_sum_exp(x[x_pos], y[x_pos]) - x = np.asarray([global_constants.FP32_NEG_INF] * 8) - y = np.ones([len(x)]) + x = np.asarray([global_constants.FP32_NEG_INF] * 8).astype(dtype) + y = np.ones([len(x)]).astype(dtype) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -88,7 +95,8 @@ def _kernel(x, y): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_div_up(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_div_up(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -98,8 +106,8 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.div_up(x[x_pos], y[x_pos]) - x = np.full([8], fill_value=10) # np.random.rand(8192) - y = np.full([8], fill_value=2) # np.random.rand(8192) + x = np.full([8], fill_value=10).astype(dtype) # np.random.rand(8192) + y = np.full([8], fill_value=2).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -121,7 +129,8 @@ def _kernel(x, y): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_add(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_add(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -131,8 +140,8 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.add(x[x_pos], y[x_pos]) - x = np.full([8], fill_value=10) # np.random.rand(8192) - y = np.full([8], fill_value=2) # np.random.rand(8192) + x = np.full([8], fill_value=10).astype(dtype) # np.random.rand(8192) + y = np.full([8], fill_value=2).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -154,7 +163,8 @@ def _kernel(x, y): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_maximum(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_maximum(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -164,8 +174,8 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.maximum(x[x_pos], y[x_pos]) - x = np.full([8], fill_value=10) # np.random.rand(8192) - y = np.full([8], fill_value=2) # np.random.rand(8192) + x = np.full([8], fill_value=10).astype(dtype) # np.random.rand(8192) + y = np.full([8], fill_value=2).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -187,7 +197,8 @@ def _kernel(x, y): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_identity(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_identity(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -197,7 +208,7 @@ def _kernel(x): if x_pos < x.shape[0]: x[x_pos] = rnnt_helper.identity(x[x_pos]) - x = np.full([8], fill_value=10) # np.random.rand(8192) + x = np.full([8], fill_value=10).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -218,7 +229,8 @@ def _kernel(x): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_negate(self): + @pytest.mark.parametrize('dtype', [np.float32, np.float16]) + def test_negate(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -228,7 +240,7 @@ def _kernel(x): if x_pos < x.shape[0]: x[x_pos] = rnnt_helper.negate(x[x_pos]) - x = np.full([8], fill_value=10) # np.random.rand(8192) + x = np.full([8], fill_value=10).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -249,7 +261,8 @@ def _kernel(x): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_exponential(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_exponential(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -259,7 +272,7 @@ def _kernel(x): if x_pos < x.shape[0]: x[x_pos] = rnnt_helper.exponential(x[x_pos]) - x = np.random.rand(8) + x = np.random.rand(8).astype(dtype) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -281,7 +294,8 @@ def _kernel(x): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.unit - def test_log_plus(self): + @pytest.mark.parametrize('dtype', DTYPES) + def test_log_plus(self, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) # wrapper kernel for device function that is tested @@ -291,8 +305,8 @@ def _kernel(x, y): if x_pos < x.shape[0] and x_pos < y.shape[0]: x[x_pos] = rnnt_helper.log_plus(x[x_pos], y[x_pos]) - x = np.full([8], fill_value=10.0) # np.random.rand(8192) - y = np.full([8], fill_value=2.0) # np.random.rand(8192) + x = np.full([8], fill_value=10.0).astype(dtype) # np.random.rand(8192) + y = np.full([8], fill_value=2.0).astype(dtype) # np.random.rand(8192) stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -317,12 +331,15 @@ def _kernel(x, y): @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available") @pytest.mark.parametrize('batch_size', [8, 128, 256]) @pytest.mark.parametrize('fastemit_lambda', [0.0, 0.001]) + @pytest.mark.parametrize('dtype', DTYPES) @pytest.mark.unit - def test_compute_costs_data(self, batch_size, fastemit_lambda): + def test_compute_costs_data(self, batch_size, fastemit_lambda, dtype): numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__) + np.random.seed(0) x = np.full([batch_size], fill_value=0.0) # np.random.rand(8192) - y = np.random.randn(batch_size) # np.random.rand(8192) + y = np.random.randn(batch_size).astype(dtype) # np.random.rand(8192) + threshold = 1e-5 if dtype == np.float32 else 1e-5 stream = cuda.stream() x_c = cuda.to_device(x, stream=stream) @@ -340,11 +357,11 @@ def test_compute_costs_data(self, batch_size, fastemit_lambda): x_new = x_c.copy_to_host(stream=stream) del x_c, y_c - res = -(y.copy()) + res = -(y.astype(np.float32).copy()) res *= 1.0 + fastemit_lambda for i in range(len(x_new)): - assert x_new[i] == res[i], f"index failed {i}" + assert abs(x_new[i] - res[i]) < threshold, f"index failed {i}" if __name__ == '__main__': diff --git a/tests/collections/asr/test_asr_classification_model.py b/tests/collections/asr/test_asr_classification_model.py index 44125de92b3d..876bb6073a38 100644 --- a/tests/collections/asr/test_asr_classification_model.py +++ b/tests/collections/asr/test_asr_classification_model.py @@ -255,52 +255,13 @@ def test_EncDecClassificationDatasetConfig_for_AudioToSpeechLabelDataset(self): class TestEncDecFrameClassificationModel(TestEncDecClassificationModel): + @pytest.mark.parametrize(["logits_len", "labels_len"], [(20, 10), (21, 10), (19, 10), (20, 9), (20, 11)]) @pytest.mark.unit - def test_reshape_labels(self, frame_classification_model): + def test_reshape_labels(self, frame_classification_model, logits_len, labels_len): model = frame_classification_model.eval() - logits = torch.ones(4, 20, 2) - labels = torch.ones(4, 10) - logits_len = torch.tensor([6, 7, 8, 9]) - labels_len = torch.tensor([5, 6, 7, 8]) - labels_new, labels_len_new = model.reshape_labels( - logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len - ) - assert labels_new.size(1) == logits.size(1) - assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) - - logits = torch.ones(4, 21, 2) - labels = torch.ones(4, 10) - logits_len = torch.tensor([6, 7, 8, 9]) - labels_len = torch.tensor([5, 6, 7, 8]) - labels_new, labels_len_new = model.reshape_labels( - logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len - ) - assert labels_new.size(1) == logits.size(1) - assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) - - logits = torch.ones(4, 19, 2) - labels = torch.ones(4, 10) - logits_len = torch.tensor([6, 7, 8, 9]) - labels_len = torch.tensor([5, 6, 7, 8]) - labels_new, labels_len_new = model.reshape_labels( - logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len - ) - assert labels_new.size(1) == logits.size(1) - assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) - - logits = torch.ones(4, 20, 2) - labels = torch.ones(4, 9) - logits_len = torch.tensor([6, 7, 8, 9]) - labels_len = torch.tensor([5, 6, 7, 8]) - labels_new, labels_len_new = model.reshape_labels( - logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len - ) - assert labels_new.size(1) == logits.size(1) - assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9])) - - logits = torch.ones(4, 20, 2) - labels = torch.ones(4, 11) + logits = torch.ones(4, logits_len, 2) + labels = torch.ones(4, labels_len) logits_len = torch.tensor([6, 7, 8, 9]) labels_len = torch.tensor([5, 6, 7, 8]) labels_new, labels_len_new = model.reshape_labels( diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py index 6225eecf9660..848267c4ada3 100644 --- a/tests/collections/asr/test_asr_interctc_models.py +++ b/tests/collections/asr/test_asr_interctc_models.py @@ -199,11 +199,18 @@ def __len__(self): def __getitem__(self, idx): return self.values + # this sometimes results in all zeros in the output which breaks tests + # so using this only for the ptl calls in the bottom, but using + # processed signal directly initially to remove the chance of + # this edge-case input_signal = torch.randn(size=(1, 512)) input_length = torch.randint(low=161, high=500, size=[1]) target = torch.randint(size=(1, input_length[0]), low=0, high=28) target_length = torch.tensor([input_length[0]]) + processed_signal = torch.randn(size=([1, 64, 12])) + processed_length = torch.tensor([8]) + if len(apply_at_layers) != len(loss_weights): # has to throw an error here with pytest.raises( @@ -216,7 +223,9 @@ def __getitem__(self, idx): asr_model = model_class(cfg=model_config) asr_model.train() AccessMixin.set_access_enabled(access_enabled=True) - logprobs, *_ = asr_model.forward(input_signal=input_signal, input_signal_length=input_length) + logprobs, *_ = asr_model.forward( + processed_signal=processed_signal, processed_signal_length=processed_length + ) captured_tensors = asr_model.get_captured_interctc_tensors() AccessMixin.reset_registry(asr_model) assert len(captured_tensors) == len(apply_at_layers) diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index 5b30489f846c..68f1e38f797b 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -363,6 +363,50 @@ def test_multiblank_rnnt_greedy_decoding(self, greedy_class): with torch.no_grad(): _ = greedy(encoder_output=enc_out, encoded_lengths=enc_len) + @pytest.mark.skipif( + not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', + ) + @pytest.mark.unit + @pytest.mark.parametrize( + "greedy_class", [greedy_decode.GreedyMultiblankRNNTInfer, greedy_decode.GreedyBatchedMultiblankRNNTInfer], + ) + def test_multiblank_rnnt_greedy_decoding(self, greedy_class): + token_list = [" ", "a", "b", "c"] + vocab_size = len(token_list) + big_blank_durations = [2, 4] + + encoder_output_size = 4 + decoder_output_size = 4 + joint_output_shape = 4 + + prednet_cfg = {'pred_hidden': decoder_output_size, 'pred_rnn_layers': 1} + jointnet_cfg = { + 'encoder_hidden': encoder_output_size, + 'pred_hidden': decoder_output_size, + 'joint_hidden': joint_output_shape, + 'activation': 'relu', + } + + decoder = RNNTDecoder(prednet_cfg, vocab_size) + joint_net = RNNTJoint( + jointnet_cfg, vocab_size, vocabulary=token_list, num_extra_outputs=len(big_blank_durations) + ) + + greedy = greedy_class( + decoder, + joint_net, + blank_index=len(token_list), + big_blank_durations=big_blank_durations, + max_symbols_per_step=5, + ) + + # (B, D, T) + enc_out = torch.randn(1, encoder_output_size, 30) + enc_len = torch.tensor([30], dtype=torch.int32) + + with torch.no_grad(): + _ = greedy(encoder_output=enc_out, encoded_lengths=enc_len) + @pytest.mark.skipif( not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', ) diff --git a/tests/collections/asr/test_asr_subsampling.py b/tests/collections/asr/test_asr_subsampling.py new file mode 100644 index 000000000000..fe5295be11f1 --- /dev/null +++ b/tests/collections/asr/test_asr_subsampling.py @@ -0,0 +1,61 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +import torch + +from nemo.collections.asr.models import ASRModel + + +class TestASRSubsamplingConvChunking: + @pytest.mark.with_downloads() + @pytest.mark.unit + def test_forward(self): + asr_model = ASRModel.from_pretrained("stt_en_fastconformer_ctc_large") + asr_model = asr_model.eval() + asr_model.preprocessor.featurizer.dither = 0.0 + asr_model.preprocessor.featurizer.pad_to = 0 + + len = 512 + + input_signal_batch1 = torch.randn(size=(1, len), device=asr_model.device) + length_batch1 = torch.randint(low=161, high=500, size=[1], device=asr_model.device) + + input_signal_batch4 = torch.randn(size=(4, len), device=asr_model.device) + length_batch4 = torch.randint(low=161, high=500, size=[4], device=asr_model.device) + + with torch.no_grad(): + # regular inference + logprobs_batch1_nosplit, _, _ = asr_model.forward( + input_signal=input_signal_batch1, input_signal_length=length_batch1 + ) + logprobs_batch4_nosplit, _, _ = asr_model.forward( + input_signal=input_signal_batch4, input_signal_length=length_batch4 + ) + + # force chunking to 2 + asr_model.change_subsampling_conv_chunking_factor(subsampling_conv_chunking_factor=2) + + # chunked inference by channels as batch is 1 + logprobs_batch1_split, _, _ = asr_model.forward( + input_signal=input_signal_batch1, input_signal_length=length_batch1 + ) + # chunked inference by batch as it is 4 [> 1] + logprobs_batch4_split, _, _ = asr_model.forward( + input_signal=input_signal_batch4, input_signal_length=length_batch4 + ) + + diff = torch.mean(torch.abs(logprobs_batch1_split - logprobs_batch1_nosplit)) + assert diff <= 1e-6 + diff = torch.max(torch.abs(logprobs_batch4_split - logprobs_batch4_nosplit)) + assert diff <= 1e-6 diff --git a/tests/collections/asr/utils/test_vad_utils.py b/tests/collections/asr/utils/test_vad_utils.py new file mode 100644 index 000000000000..a7672e1aa43d --- /dev/null +++ b/tests/collections/asr/utils/test_vad_utils.py @@ -0,0 +1,126 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from pyannote.core import Annotation, Segment + +from nemo.collections.asr.parts.utils.vad_utils import ( + align_labels_to_frames, + convert_labels_to_speech_segments, + frame_vad_construct_pyannote_object_per_file, + get_frame_labels, + get_nonspeech_segments, + load_speech_overlap_segments_from_rttm, + load_speech_segments_from_rttm, + read_rttm_as_pyannote_object, +) + + +def get_simple_rttm_without_overlap(rttm_file="test1.rttm"): + line = "SPEAKER 1 0 2 speech \n" + speech_segments = [[0.0, 2.0]] + with open(rttm_file, "w") as f: + f.write(line) + return rttm_file, speech_segments + + +def get_simple_rttm_with_overlap(rttm_file="test2.rttm"): + speech_segments = [[0.0, 3.0]] + overlap_segments = [[1.0, 2.0]] + with open(rttm_file, "w") as f: + f.write("SPEAKER 1 0 2 speech \n") + f.write("SPEAKER 1 1 2 speech \n") + return rttm_file, speech_segments, overlap_segments + + +def get_simple_rttm_with_silence(rttm_file="test3.rttm"): + line = "SPEAKER 1 1 2 speech \n" + speech_segments = [[1.0, 2.0]] + silence_segments = [[0.0, 1.0]] + with open(rttm_file, "w") as f: + f.write(line) + return rttm_file, speech_segments, silence_segments + + +class TestVADUtils: + @pytest.mark.parametrize(["logits_len", "labels_len"], [(20, 10), (20, 11), (20, 9), (10, 21), (10, 19)]) + @pytest.mark.unit + def test_align_label_logits(self, logits_len, labels_len): + logits = np.arange(logits_len).tolist() + labels = np.arange(labels_len).tolist() + labels_new = align_labels_to_frames(probs=logits, labels=labels) + + assert len(labels_new) == len(logits) + + @pytest.mark.unit + def test_load_speech_segments_from_rttm(self, test_data_dir): + rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test1.rttm") + speech_segments_new = load_speech_segments_from_rttm(rttm_file) + assert speech_segments_new == speech_segments + + @pytest.mark.unit + def test_load_speech_overlap_segments_from_rttm(self, test_data_dir): + rttm_file, speech_segments, overlap_segments = get_simple_rttm_with_overlap(test_data_dir + "/test2.rttm") + speech_segments_new, overlap_segments_new = load_speech_overlap_segments_from_rttm(rttm_file) + assert speech_segments_new == speech_segments + assert overlap_segments_new == overlap_segments + + @pytest.mark.unit + def test_get_nonspeech_segments(self, test_data_dir): + rttm_file, speech_segments, silence_segments = get_simple_rttm_with_silence(test_data_dir + "/test3.rttm") + speech_segments_new = load_speech_segments_from_rttm(rttm_file) + silence_segments_new = get_nonspeech_segments(speech_segments_new) + assert silence_segments_new == silence_segments + + @pytest.mark.unit + def test_get_frame_labels(self, test_data_dir): + rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test4.rttm") + speech_segments_new = load_speech_segments_from_rttm(rttm_file) + frame_labels = get_frame_labels(speech_segments_new, 0.02, 0.0, 3.0, as_str=False) + assert frame_labels[0] == 1 + assert len(frame_labels) == 150 + + @pytest.mark.unit + def test_convert_labels_to_speech_segments(self, test_data_dir): + rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test5.rttm") + speech_segments_new = load_speech_segments_from_rttm(rttm_file) + frame_labels = get_frame_labels(speech_segments_new, 0.02, 0.0, 3.0, as_str=False) + speech_segments_new = convert_labels_to_speech_segments(frame_labels, 0.02) + assert speech_segments_new == speech_segments + + @pytest.mark.unit + def test_read_rttm_as_pyannote_object(self, test_data_dir): + rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test6.rttm") + pyannote_object = read_rttm_as_pyannote_object(rttm_file) + pyannote_object_gt = Annotation() + pyannote_object_gt[Segment(0.0, 2.0)] = 'speech' + assert pyannote_object == pyannote_object_gt + + @pytest.mark.unit + def test_frame_vad_construct_pyannote_object_per_file(self, test_data_dir): + rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test7.rttm") + # test for rttm input + ref, hyp = frame_vad_construct_pyannote_object_per_file(rttm_file, rttm_file) + pyannote_object_gt = Annotation() + pyannote_object_gt[Segment(0.0, 2.0)] = 'speech' + assert ref == hyp == pyannote_object_gt + + # test for list input + speech_segments = load_speech_segments_from_rttm(rttm_file) + frame_labels = get_frame_labels(speech_segments, 0.02, 0.0, 3.0, as_str=False) + speech_segments_new = convert_labels_to_speech_segments(frame_labels, 0.02) + assert speech_segments_new == speech_segments + ref, hyp = frame_vad_construct_pyannote_object_per_file(frame_labels, frame_labels, 0.02) + assert ref == hyp == pyannote_object_gt diff --git a/tests/collections/common/pl_utils.py b/tests/collections/common/pl_utils.py index 395c8cef5969..a2e9609c8492 100644 --- a/tests/collections/common/pl_utils.py +++ b/tests/collections/common/pl_utils.py @@ -90,7 +90,7 @@ def _class_test( calculated across devices for each batch (and not just at the end) """ # Instanciate lightning metric - metric = metric_class(compute_on_step=True, dist_sync_on_step=dist_sync_on_step, **metric_args) + metric = metric_class(dist_sync_on_step=dist_sync_on_step, **metric_args) # verify metrics work after being loaded from pickled state pickled_metric = pickle.dumps(metric) @@ -303,7 +303,7 @@ def _perplexity_class_test( calculated across devices for each batch (and not just at the end) """ # Instanciate lightning metric - perplexity = Perplexity(compute_on_step=True, dist_sync_on_step=dist_sync_on_step, **metric_args) + perplexity = Perplexity(dist_sync_on_step=dist_sync_on_step, **metric_args) if (probs is None) == (logits is None): with pytest.raises(ValueError): perplexity(probs, logits) @@ -464,9 +464,7 @@ def _loss_class_test( calculated across devices for each batch (and not just at the end) """ # Instantiate lightning metric - loss_metric = GlobalAverageLossMetric( - compute_on_step=True, dist_sync_on_step=dist_sync_on_step, take_avg_loss=take_avg_loss - ) + loss_metric = GlobalAverageLossMetric(dist_sync_on_step=dist_sync_on_step, take_avg_loss=take_avg_loss) # verify loss works after being loaded from pickled state pickled_metric = pickle.dumps(loss_metric) diff --git a/tests/collections/nlp/test_flash_attention.py b/tests/collections/nlp/test_flash_attention.py new file mode 100644 index 000000000000..727742fdffb5 --- /dev/null +++ b/tests/collections/nlp/test_flash_attention.py @@ -0,0 +1,353 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random + +import pytest +import torch +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.modules.common.megatron.attention import CoreAttention +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.megatron.utils import build_attention_mask_3d +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy + +try: + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + import flash_attn + + HAVE_FA = True +except (ImportError, ModuleNotFoundError): + HAVE_FA = False + +try: + import triton + + HAVE_TRITON = True +except (ImportError, ModuleNotFoundError): + HAVE_TRITON = False + +import pynvml + + +def HAVE_AMPERE_GPU(): + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + device_arch = pynvml.nvmlDeviceGetArchitecture(handle) + pynvml.nvmlShutdown() + return device_arch == pynvml.NVML_DEVICE_ARCH_AMPERE + + +@pytest.mark.run_only_on('GPU') +@pytest.mark.skipif(not HAVE_APEX, reason="apex is not installed") +class TestFlashAttention: + @classmethod + def setup_class(cls): + if not torch.cuda.is_available(): + return + + GPUS = 1 + TP_SIZE = GPUS + PP_SIZE = 1 + MB_SIZE = 4 + GB_SIZE = 8 + SEED = 1234 + trainer = Trainer(strategy=NLPDDPStrategy(), devices=GPUS, accelerator='gpu', num_nodes=1, logger=None,) + + initialize_model_parallel_for_nemo( + world_size=trainer.world_size, + global_rank=trainer.global_rank, + local_rank=trainer.local_rank, + tensor_model_parallel_size=TP_SIZE, + pipeline_model_parallel_size=PP_SIZE, + micro_batch_size=MB_SIZE, + global_batch_size=GB_SIZE, + seed=SEED, + apex_transformer_log_level=30, + ) + + @pytest.fixture() + def cfg(self): + cfg = { + 'bz': random.randint(1, 7), + 'sq': random.randint(2, 7), + 'sk': random.randint(2, 7), + 'head': random.randint(1, 7), + 'layer_number': random.randint(1, 7), + 'device': torch.cuda.current_device(), + } + # flash attention requires head dimensions are multiples of 8 + head_dim = random.randint(1, 7) * 8 + cfg['hidden'] = cfg['head'] * head_dim + + return cfg + + @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed") + @pytest.mark.unit + def test_flash_self_attention(self, cfg): + device = cfg['device'] + layer_number = cfg['layer_number'] + bz, sl, np, h = cfg['bz'], cfg['sq'], cfg['head'], cfg['hidden'] + hn = h // np + + q = torch.rand(sl, bz, np, hn, device=device).half() + k = torch.rand(sl, bz, np, hn, device=device).half() + v = torch.rand(sl, bz, np, hn, device=device).half() + + attention_mask_2d = torch.arange(sl, device=device).unsqueeze(0) < torch.randint( + 1, sl, (bz,), device=device + ).unsqueeze(1) + + attention_mask_padding_3d = build_attention_mask_3d( + source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.padding + ).unsqueeze(1) + + attention_mask_causal_3d = build_attention_mask_3d( + source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.causal + ).unsqueeze(1) + + # Non-causal + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_padding_3d) + out_fa = attention_fa(q, k, v, attention_mask_padding_3d) + torch.testing.assert_close(out, out_fa) + out_fa = attention_fa(q, k, v, attention_mask_2d) + torch.testing.assert_close(out, out_fa) + + # Causal + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.causal, + attention_dropout=0.0, + apply_query_key_layer_scaling=False, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.causal, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_causal_3d) + out_fa = attention_fa(q, k, v, attention_mask_causal_3d) + torch.testing.assert_close(out, out_fa) + out_fa = attention_fa(q, k, v, attention_mask_2d) + torch.testing.assert_close(out, out_fa) + + @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed") + @pytest.mark.unit + def test_flash_cross_attention(self, cfg): + device = cfg['device'] + layer_number = cfg['layer_number'] + bz, sq, sk, np, h = cfg['bz'], cfg['sq'], cfg['sk'], cfg['head'], cfg['hidden'] + hn = h // np + + q = torch.rand(sq, bz, np, hn, device=device).half() + k = torch.rand(sk, bz, np, hn, device=device).half() + v = torch.rand(sk, bz, np, hn, device=device).half() + + attention_mask_2d_q = torch.arange(sq, device=device).unsqueeze(0) < torch.randint( + 1, sq, (bz,), device=device + ).unsqueeze(1) + + attention_mask_2d_k = torch.arange(sk, device=device).unsqueeze(0) < torch.randint( + 1, sk, (bz,), device=device + ).unsqueeze(1) + + attention_mask_padding_3d = build_attention_mask_3d( + source_mask=attention_mask_2d_q, target_mask=attention_mask_2d_k, attn_mask_type=AttnMaskType.padding + ).unsqueeze(1) + + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + apply_query_key_layer_scaling=False, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_padding_3d) + out_fa = attention_fa(q, k, v, attention_mask_padding_3d) + torch.testing.assert_close(out, out_fa) + + @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed") + @pytest.mark.skipif(not HAVE_TRITON, reason="triton is not installed") + @pytest.mark.skipif( + not HAVE_AMPERE_GPU(), + reason="should only run on AMPERE GPU. Please see https://github.com/HazyResearch/flash-attention/issues/245", + ) + @pytest.mark.unit + def test_flash_self_attention_triton(self, cfg): + device = cfg['device'] + layer_number = cfg['layer_number'] + bz, sl, np, h = cfg['bz'], cfg['sq'], cfg['head'], cfg['hidden'] + hn = h // np + + q = torch.rand(sl, bz, np, hn, device=device).half() + k = torch.rand(sl, bz, np, hn, device=device).half() + v = torch.rand(sl, bz, np, hn, device=device).half() + + attention_mask_2d = torch.arange(sl, device=device).unsqueeze(0) < torch.randint( + 1, sl, (bz,), device=device + ).unsqueeze(1) + + attention_mask_padding_3d = build_attention_mask_3d( + source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.padding + ).unsqueeze(1) + + attention_mask_causal_3d = build_attention_mask_3d( + source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.causal + ).unsqueeze(1) + + attention_bias = torch.rand(bz, np, sl, sl, device=device) + + # Non-causal + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + apply_query_key_layer_scaling=False, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias) + out_fa = attention_fa(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias) + torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3) + out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias) + torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3) + + # Causal + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.causal, + attention_dropout=0.0, + apply_query_key_layer_scaling=False, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.causal, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias) + out_fa = attention_fa(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias) + torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3) + out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias) + torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3) + + @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed") + @pytest.mark.skipif(not HAVE_TRITON, reason="triton is not installed") + @pytest.mark.skipif( + not HAVE_AMPERE_GPU(), + reason="should only run on AMPERE GPU. Please see https://github.com/HazyResearch/flash-attention/issues/245", + ) + @pytest.mark.unit + def test_flash_cross_attention_triton(self, cfg): + device = cfg['device'] + layer_number = cfg['layer_number'] + bz, sq, sk, np, h = cfg['bz'], cfg['sq'], cfg['sk'], cfg['head'], cfg['hidden'] + hn = h // np + + q = torch.rand(sq, bz, np, hn, device=device).half() + k = torch.rand(sk, bz, np, hn, device=device).half() + v = torch.rand(sk, bz, np, hn, device=device).half() + + attention_mask_2d_q = torch.arange(sq, device=device).unsqueeze(0) < torch.randint( + 1, sq, (bz,), device=device + ).unsqueeze(1) + + attention_mask_2d_k = torch.arange(sk, device=device).unsqueeze(0) < torch.randint( + 1, sk, (bz,), device=device + ).unsqueeze(1) + + attention_mask_padding_3d = build_attention_mask_3d( + source_mask=attention_mask_2d_q, target_mask=attention_mask_2d_k, attn_mask_type=AttnMaskType.padding + ).unsqueeze(1) + + attention_bias = torch.rand(bz, np, sq, sk, device=device) + + attention = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + apply_query_key_layer_scaling=False, + ) + + attention_fa = CoreAttention( + layer_number=layer_number, + num_attention_heads=np, + hidden_size=h, + attn_mask_type=AttnMaskType.padding, + attention_dropout=0.0, + use_flash_attention=True, + ) + + out = attention(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias) + out_fa = attention_fa(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias) + torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3) diff --git a/tests/collections/nlp/test_huggingface.py b/tests/collections/nlp/test_huggingface.py index cfe2845caa9b..0ad7b5850475 100644 --- a/tests/collections/nlp/test_huggingface.py +++ b/tests/collections/nlp/test_huggingface.py @@ -85,12 +85,13 @@ def test_get_pretrained_chinese_bert_wwm_model(self): tokenizer = get_tokenizer(tokenizer_name=model_name) assert isinstance(tokenizer, AutoTokenizer) - @pytest.mark.with_downloads() - @pytest.mark.unit - def test_get_pretrained_arabic_model(self): - model_name = 'asafaya/bert-base-arabic' - self.omega_conf.language_model.pretrained_model_name = model_name - model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) - assert isinstance(model, nemo_nlp.modules.BertModule) - tokenizer = get_tokenizer(tokenizer_name=model_name) - assert isinstance(tokenizer, AutoTokenizer) + # model is not on HF anymore + # @pytest.mark.with_downloads() + # @pytest.mark.unit + # def test_get_pretrained_arabic_model(self): + # model_name = 'asafaya/bert-base-arabic' + # self.omega_conf.language_model.pretrained_model_name = model_name + # model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) + # assert isinstance(model, nemo_nlp.modules.BertModule) + # tokenizer = get_tokenizer(tokenizer_name=model_name) + # assert isinstance(tokenizer, AutoTokenizer) diff --git a/tests/collections/nlp/test_mem_map_dataset.py b/tests/collections/nlp/test_mem_map_dataset.py new file mode 100644 index 000000000000..1e21b6d270c9 --- /dev/null +++ b/tests/collections/nlp/test_mem_map_dataset.py @@ -0,0 +1,117 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +import json +import os + +import pytest + +from nemo.collections.nlp.data.language_modeling import text_memmap_dataset + + +@pytest.fixture +def jsonl_file(tmp_path): + # Create a temporary file path + file_path = tmp_path / "data.jsonl" + + # Generate data to write to the JSONL file + data = [ + {"name": "John", "age": 30}, + {"name": "Jane", "age": 25}, + {"name": "Bob", "age": 35}, + ] + + # Write data to the JSONL file + with open(file_path, mode="w") as file: + for item in data: + json.dump(item, file) + file.write("\n") + + # Provide the file path to the test function + yield str(file_path) + + # Optional: Clean up the temporary file after the test + file_path.unlink() + + +@pytest.fixture +def csv_file(tmp_path): + # Create a temporary file path + file_path = tmp_path / "data.csv" + + # Generate data to write to the CSV file + data = [["ID", "Name"], [1, "John"], [2, "Jane"], [3, "Bob"]] + + # Write data to the CSV file + with open(file_path, mode="w", newline="") as file: + writer = csv.writer(file) + writer.writerows(data) + + # Provide the file path to the test function + yield str(file_path) + + # Optional: Clean up the temporary file after the test + file_path.unlink() + + +def test_jsonl_mem_map_dataset(jsonl_file): + """Test for JSONL memory-mapped datasets.""" + + indexed_dataset = text_memmap_dataset.JSONLMemMapDataset(dataset_paths=[jsonl_file], header_lines=0) + assert indexed_dataset[0] == {"name": "John", "age": 30} + assert indexed_dataset[1] == {"name": "Jane", "age": 25} + assert indexed_dataset[2] == {"name": "Bob", "age": 35} + + +def test_csv_mem_map_dataset(csv_file): + """Test for CSV memory-mapped datasets.""" + + indexed_dataset = text_memmap_dataset.CSVMemMapDataset(dataset_paths=[csv_file], data_col=1, header_lines=1) + assert indexed_dataset[0].strip() == "John" + assert indexed_dataset[1].strip() == "Jane" + assert indexed_dataset[2].strip() == "Bob" + + +@pytest.mark.parametrize( + "dataset_class", [text_memmap_dataset.JSONLMemMapDataset, text_memmap_dataset.CSVMemMapDataset], +) +@pytest.mark.parametrize("use_alternative_index_mapping_dir", [True, False]) +@pytest.mark.parametrize("relative_index_fn", [True, False]) +def test_mem_map_dataset_index_mapping_dir( + tmp_path, dataset_class, jsonl_file, use_alternative_index_mapping_dir, relative_index_fn, +): + """Test for index_mapping_dir.""" + if relative_index_fn: + jsonl_file = os.path.relpath(jsonl_file) + else: + jsonl_file = os.path.abspath(jsonl_file) + + if use_alternative_index_mapping_dir: + index_mapping_dir = tmp_path / "subdir" + dataset_class(dataset_paths=[jsonl_file], header_lines=0, index_mapping_dir=str(index_mapping_dir)) + # Index files should not be created in default location. + assert not os.path.isfile(f"{jsonl_file}.idx.npy") + assert not os.path.isfile(f"{jsonl_file}.idx.info") + if relative_index_fn: + # Remove leading ".." sequences. + while jsonl_file.startswith(("../")): + jsonl_file = jsonl_file.lstrip("../") + idx_fn = f"{str(index_mapping_dir)}/{jsonl_file}.idx" + assert os.path.isfile(f"{idx_fn}.npy") + assert os.path.isfile(f"{idx_fn}.info") + else: + text_memmap_dataset.JSONLMemMapDataset(dataset_paths=[jsonl_file], header_lines=0) + assert os.path.isfile(f"{jsonl_file}.idx.npy") + assert os.path.isfile(f"{jsonl_file}.idx.info") diff --git a/tests/collections/nlp/test_position_embedding.py b/tests/collections/nlp/test_position_embedding.py new file mode 100644 index 000000000000..263ca8669d81 --- /dev/null +++ b/tests/collections/nlp/test_position_embedding.py @@ -0,0 +1,211 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random + +import pytest +import torch + +from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType +from nemo.collections.nlp.modules.common.megatron.position_embedding import ( + ALiBiRelativePositionEmbedding, + KERPLERelativePositionEmbedding, + RotaryEmbedding, + SandwichRelativePositionEmbedding, + T5RelativePositionEmbedding, + XPOSPositionEmbedding, +) +from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import ( + apply_rotary_pos_emb, +) +from nemo.collections.nlp.modules.common.megatron.utils import init_method_normal + + +@pytest.fixture() +def cfg(): + cfg = { + 'max_seq_len': 8, + 'num_attention_heads': 2, + 'layer_type': LayerType.encoder, + 'hidden_size': 4, + 'rpe_init_method_std': 0.02, + 'rpe_num_buckets': 6, + 'rpe_max_distance': 16, + } + return cfg + + +@pytest.mark.unit +def test_alibi(cfg): + # non-causal + PE_nc = ALiBiRelativePositionEmbedding( + bidirectional=True, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + ) + + # causal + PE_c = ALiBiRelativePositionEmbedding( + bidirectional=False, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + ) + + q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2) + + bias_nc = PE_nc(q_len, k_len) + assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert torch.equal(bias_nc, bias_nc.transpose(2, 3)) + + bias_c = PE_c(q_len, k_len) + assert bias_c.shape == (1, cfg['num_attention_heads'], 1, k_len) + assert torch.equal(bias_c, bias_nc[:, :, -1:, :]) + + +@pytest.mark.unit +def test_sandwich(cfg): + # non-causal + PE_nc = SandwichRelativePositionEmbedding( + bidirectional=True, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + hidden_size=cfg['hidden_size'], + ) + + # causal + PE_c = SandwichRelativePositionEmbedding( + bidirectional=False, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + hidden_size=cfg['hidden_size'], + ) + + q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2) + + bias_nc = PE_nc(q_len, k_len) + assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert torch.equal(bias_nc, bias_nc.transpose(2, 3)) + + bias_c = PE_c(q_len, k_len) + assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert torch.all(torch.triu(bias_c, diagonal=0) == 0) + + +@pytest.mark.unit +def test_kerple(cfg): + # non-causal + PE_nc = KERPLERelativePositionEmbedding( + bidirectional=True, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + ) + + # causal + PE_c = KERPLERelativePositionEmbedding( + bidirectional=False, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + max_seq_len=cfg['max_seq_len'], + ) + + q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2) + + bias_nc = PE_nc(q_len, k_len) + assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert torch.equal(bias_nc, bias_nc.transpose(2, 3)) + + bias_c = PE_c(q_len, k_len) + assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert torch.all(torch.triu(bias_c, diagonal=0) == 0) + + +@pytest.mark.unit +def test_t5relative(cfg): + # non-causal + PE_nc = T5RelativePositionEmbedding( + bidirectional=True, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + init_method=init_method_normal(cfg['rpe_init_method_std']), + relative_position_num_buckets=cfg['rpe_num_buckets'], + relative_position_max_distance=cfg['rpe_max_distance'], + ) + + # causal + PE_c = T5RelativePositionEmbedding( + bidirectional=False, + num_attention_heads=cfg['num_attention_heads'], + layer_type=cfg['layer_type'], + init_method=init_method_normal(cfg['rpe_init_method_std']), + relative_position_num_buckets=cfg['rpe_num_buckets'], + relative_position_max_distance=cfg['rpe_max_distance'], + ) + + q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2) + + bias_nc = PE_nc(q_len, k_len) + assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len) + + bias_c = PE_c(q_len, k_len) + assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len) + assert ( + len(torch.triu(bias_c, diagonal=0).unique()) == cfg['num_attention_heads'] + 1 + if q_len > 1 + else cfg['num_attention_heads'] + ) + + +@pytest.mark.unit +def test_rotary(cfg): + PE = RotaryEmbedding(dim=cfg['hidden_size']) + rotary_embedding = PE(cfg['max_seq_len']) + + x = torch.rand(cfg['max_seq_len'], 1, cfg['num_attention_heads'], cfg['hidden_size']) + x_rotary = apply_rotary_pos_emb(x, rotary_embedding) + assert x_rotary.shape == x.shape + + hd = cfg['hidden_size'] // 2 + x_rotary_test = torch.cat( + ( + x[..., :hd] * rotary_embedding[..., :hd].cos() + x[..., hd:] * rotary_embedding[..., hd:].sin() * -1, + x[..., :hd] * rotary_embedding[..., :hd].sin() + x[..., hd:] * rotary_embedding[..., hd:].cos(), + ), + dim=-1, + ) + assert torch.equal(x_rotary, x_rotary_test) + + offset = random.choice(range(1, cfg['max_seq_len'])) + rotary_embedding_offset = PE(cfg['max_seq_len'], offset=offset) + x_rotary = apply_rotary_pos_emb(x[: offset + 1], rotary_embedding[: offset + 1]) + x_rotary_offset = apply_rotary_pos_emb(x[offset : offset + 1], rotary_embedding_offset[:1]) + assert torch.equal(x_rotary[-1], x_rotary_offset[0]) + + +@pytest.mark.unit +def test_xpos(cfg): + PE = XPOSPositionEmbedding(head_dim=cfg['hidden_size']) + x = torch.rand(cfg['max_seq_len'], 1, cfg['num_attention_heads'], cfg['hidden_size']) + + x_rotary = PE(x) + assert x_rotary.shape == x.shape + + offset = random.choice(range(1, cfg['max_seq_len'])) + x_rotary = PE(x[: offset + 1]) + x_rotary_offset = PE(x[offset : offset + 1], offset=offset) + assert torch.equal(x_rotary[-1], x_rotary_offset[0]) diff --git a/tests/collections/nlp/test_retrieval_module.py b/tests/collections/nlp/test_retrieval_module.py index 3a2d46f4fed2..08425964e566 100644 --- a/tests/collections/nlp/test_retrieval_module.py +++ b/tests/collections/nlp/test_retrieval_module.py @@ -21,6 +21,7 @@ from nemo.collections.nlp.modules.common.megatron.attention import ParallelChunkedCrossAttention from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding from nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder import ( MegatronRetrievalTokenLevelEncoderDecoderModule, ) @@ -28,7 +29,6 @@ MegatronRetrievalTransformerDecoderModule, MegatronRetrievalTransformerEncoderModule, ) -from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding from nemo.collections.nlp.modules.common.megatron.utils import ( build_attention_mask_3d, init_method_normal, diff --git a/tests/collections/nlp/test_retrieval_module_inference.py b/tests/collections/nlp/test_retrieval_module_inference.py index 16e7e556bd10..a9aa002815b2 100644 --- a/tests/collections/nlp/test_retrieval_module_inference.py +++ b/tests/collections/nlp/test_retrieval_module_inference.py @@ -22,6 +22,7 @@ from nemo.collections.nlp.modules.common.megatron.attention import ParallelChunkedCrossAttention from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding from nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder import ( MegatronRetrievalTokenLevelEncoderDecoderModule, ) @@ -29,7 +30,6 @@ MegatronRetrievalTransformerDecoderModule, MegatronRetrievalTransformerEncoderModule, ) -from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding from nemo.collections.nlp.modules.common.megatron.utils import ( build_attention_mask_3d, init_method_normal, diff --git a/tests/collections/nlp/test_spellchecking_asr_customization.py b/tests/collections/nlp/test_spellchecking_asr_customization.py new file mode 100644 index 000000000000..8e4d6e9a7b8f --- /dev/null +++ b/tests/collections/nlp/test_spellchecking_asr_customization.py @@ -0,0 +1,1102 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from transformers import AutoTokenizer + +from nemo.collections.nlp.data.spellchecking_asr_customization.bert_example import BertExampleBuilder +from nemo.collections.nlp.data.spellchecking_asr_customization.utils import ( + apply_replacements_to_text, + substitute_replacements_in_text, +) + + +@pytest.mark.unit +def test_substitute_replacements_in_text(): + text = "we began the further diversification of our revenue base with the protterra supply agreement and the navastar joint development agreement" + replacements = [(66, 75, 'pro-terra', 0.99986), (101, 109, 'navistar', 0.996)] + gold_text = "we began the further diversification of our revenue base with the pro-terra supply agreement and the navistar joint development agreement" + corrected_text = substitute_replacements_in_text(text, replacements, replace_hyphen_to_space=False) + assert corrected_text == gold_text + + gold_text_no_hyphen = "we began the further diversification of our revenue base with the pro terra supply agreement and the navistar joint development agreement" + corrected_text = substitute_replacements_in_text(text, replacements, replace_hyphen_to_space=True) + assert corrected_text == gold_text_no_hyphen + + +@pytest.mark.unit +def test_apply_replacements_to_text(): + + # min_prob = 0.5 + # dp_data = None, + # min_dp_score_per_symbol: float = -99.9 + + # test more than one fragment to replace, test multiple same replacements + text = "we began the further diversification of our revenue base with the protterra supply agreement and the navastar joint development agreement" + replacements = [ + (66, 75, 'proterra', 0.99986), + (66, 75, 'proterra', 0.9956), + (101, 109, 'navistar', 0.93), + (101, 109, 'navistar', 0.91), + (101, 109, 'navistar', 0.92), + ] + gold_text = "we began the further diversification of our revenue base with the proterra supply agreement and the navistar joint development agreement" + corrected_text = apply_replacements_to_text( + text, replacements, min_prob=0.5, replace_hyphen_to_space=False, dp_data=None + ) + assert corrected_text == gold_text + + # test that min_prob works + gold_text = "we began the further diversification of our revenue base with the proterra supply agreement and the navastar joint development agreement" + corrected_text = apply_replacements_to_text( + text, replacements, min_prob=0.95, replace_hyphen_to_space=False, dp_data=None + ) + assert corrected_text == gold_text + + +@pytest.fixture() +def bert_example_builder(): + tokenizer = AutoTokenizer.from_pretrained("huawei-noah/TinyBERT_General_6L_768D") + label_map = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "10": 10} + semiotic_classes = {"PLAIN": 0, "CUSTOM": 1} + max_seq_len = 256 + builder = BertExampleBuilder(label_map, semiotic_classes, tokenizer, max_seq_len) + return builder + + +@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason") +@pytest.mark.with_downloads +@pytest.mark.unit +def test_creation(bert_example_builder): + assert bert_example_builder._tokenizer is not None + + +@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason") +@pytest.mark.with_downloads +@pytest.mark.unit +def test_builder_get_spans(bert_example_builder): + span_info_parts = ["CUSTOM 37 41", "CUSTOM 47 52", "CUSTOM 42 46", "CUSTOM 0 7"] + gold_sorted_spans = [(1, 1, 8), (1, 38, 42), (1, 43, 47), (1, 48, 53)] + spans = bert_example_builder._get_spans(span_info_parts) + spans.sort() + assert spans == gold_sorted_spans + + +@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason") +@pytest.mark.with_downloads +@pytest.mark.unit +def test_builder_get_fragment_indices(bert_example_builder): + hyp = "a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w" + targets = [1] + # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w + # 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + span_info_parts = ["CUSTOM 8 17"] + gold_sorted_fragment_indices = [(7, 18, 1), (11, 18, 1)] + fragment_indices = bert_example_builder._get_fragment_indices(hyp, targets, span_info_parts) + fragment_indices.sort() + assert fragment_indices == gold_sorted_fragment_indices + + # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w + # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + span_info_parts = ["CUSTOM 10 16"] + gold_sorted_fragment_indices = [(11, 18, 1)] + fragment_indices = bert_example_builder._get_fragment_indices(hyp, targets, span_info_parts) + fragment_indices.sort() + assert fragment_indices == gold_sorted_fragment_indices + + +@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason") +@pytest.mark.with_downloads +@pytest.mark.unit +def test_builder_get_input_features(bert_example_builder): + hyp = "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o" + ref = "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y" + targets = [1, 3] + span_info_parts = ["CUSTOM 12 23", "CUSTOM 28 41"] + + gold_tags = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + ] + gold_input_ids = [ + 101, + 1037, + 1055, + 1056, + 1054, + 1051, + 1050, + 1051, + 1049, + 1041, + 1054, + 1055, + 1035, + 1040, + 1045, + 1040, + 1045, + 1041, + 1035, + 1055, + 1051, + 1049, + 1051, + 1050, + 1035, + 1037, + 1050, + 1040, + 1035, + 1056, + 1054, + 1045, + 1055, + 1056, + 1045, + 1037, + 1050, + 1035, + 1043, + 1048, + 1048, + 1051, + 102, + 1040, + 1045, + 1040, + 1045, + 1041, + 1054, + 1035, + 1055, + 1037, + 1057, + 1049, + 1051, + 1050, + 102, + 1037, + 1055, + 1056, + 1054, + 1051, + 1050, + 1051, + 1049, + 1045, + 1041, + 102, + 1056, + 1054, + 1045, + 1055, + 1056, + 1037, + 1050, + 1035, + 1043, + 1057, + 1045, + 1048, + 1048, + 1051, + 1056, + 102, + 1056, + 1054, + 1045, + 1055, + 1056, + 1041, + 1055, + 1055, + 1041, + 102, + 1049, + 1051, + 1050, + 1037, + 1040, + 1041, + 102, + 1039, + 1044, + 1054, + 1045, + 1055, + 1056, + 1045, + 1037, + 1050, + 102, + 1037, + 1055, + 1056, + 1054, + 1051, + 1050, + 1051, + 1049, + 1041, + 1054, + 102, + 1055, + 1051, + 1048, + 1051, + 1049, + 1051, + 1050, + 102, + 1040, + 1045, + 1040, + 1045, + 1040, + 1045, + 1040, + 1045, + 1040, + 1045, + 102, + 1049, + 1041, + 1054, + 1039, + 1061, + 102, + ] + gold_input_mask = [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ] + gold_segment_ids = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 10, + 10, + 10, + 10, + 10, + 10, + ] + gold_labels_mask = [ + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + gold_input_ids_for_subwords = [ + 101, + 26357, + 2106, + 2666, + 2061, + 8202, + 1998, + 13012, + 16643, + 2319, + 1043, + 7174, + 102, + 2106, + 3771, + 7842, + 2819, + 2239, + 102, + 28625, + 3630, + 9856, + 102, + 9822, + 26458, + 7174, + 2102, + 102, + 13012, + 13473, + 11393, + 102, + 13813, + 3207, + 102, + 3017, + 102, + 15211, + 102, + 9168, + 102, + 2106, + 28173, + 4305, + 4305, + 102, + 8673, + 102, + ] + gold_input_mask_for_subwords = [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ] + gold_segment_ids_for_subwords = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 6, + 6, + 7, + 7, + 8, + 8, + 9, + 9, + 9, + 9, + 9, + 10, + 10, + ] + gold_character_pos_to_subword_pos = [ + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 2, + 3, + 3, + 3, + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + 6, + 6, + 7, + 7, + 7, + 8, + 8, + 8, + 9, + 9, + 9, + 10, + 11, + 11, + 11, + 12, + 13, + 13, + 13, + 14, + 14, + 14, + 14, + 15, + 15, + 16, + 16, + 17, + 17, + 18, + 19, + 19, + 19, + 19, + 19, + 20, + 20, + 21, + 21, + 21, + 22, + 23, + 23, + 23, + 23, + 23, + 23, + 23, + 23, + 24, + 24, + 24, + 25, + 25, + 25, + 26, + 27, + 28, + 28, + 28, + 29, + 29, + 29, + 30, + 30, + 30, + 31, + 32, + 32, + 32, + 32, + 33, + 33, + 34, + 35, + 35, + 35, + 35, + 35, + 35, + 35, + 35, + 35, + 36, + 37, + 37, + 37, + 37, + 37, + 37, + 37, + 37, + 37, + 37, + 38, + 39, + 39, + 39, + 39, + 39, + 39, + 39, + 40, + 41, + 41, + 41, + 42, + 42, + 42, + 43, + 43, + 44, + 44, + 45, + 46, + 46, + 46, + 46, + 46, + 47, + ] + + tags = [0 for _ in hyp.split()] + for p, t in zip(span_info_parts, targets): + c, start, end = p.split(" ") + start = int(start) + end = int(end) + tags[start:end] = [t for i in range(end - start)] + + # get input features for characters + (input_ids, input_mask, segment_ids, labels_mask, labels, _, _,) = bert_example_builder._get_input_features( + hyp=hyp, ref=ref, tags=tags + ) + + # get input features for words + hyp_with_words = hyp.replace(" ", "").replace("_", " ") + ref_with_words = ref.replace(" ", "").replace("_", " ") + ( + input_ids_for_subwords, + input_mask_for_subwords, + segment_ids_for_subwords, + _, + _, + _, + _, + ) = bert_example_builder._get_input_features(hyp=hyp_with_words, ref=ref_with_words, tags=None) + + character_pos_to_subword_pos = bert_example_builder._map_characters_to_subwords(input_ids, input_ids_for_subwords) + + assert tags == gold_tags + assert input_ids == gold_input_ids + assert input_mask == gold_input_mask + assert segment_ids == gold_segment_ids + assert labels_mask == gold_labels_mask + assert input_ids_for_subwords == gold_input_ids_for_subwords + assert input_mask_for_subwords == gold_input_mask_for_subwords + assert segment_ids_for_subwords == gold_segment_ids_for_subwords + assert character_pos_to_subword_pos == gold_character_pos_to_subword_pos diff --git a/tests/collections/tts/data/test_audio_trimming.py b/tests/collections/tts/parts/preprocessing/test_audio_trimming.py similarity index 98% rename from tests/collections/tts/data/test_audio_trimming.py rename to tests/collections/tts/parts/preprocessing/test_audio_trimming.py index 4e5b01b8a11a..14a79c5823e5 100644 --- a/tests/collections/tts/data/test_audio_trimming.py +++ b/tests/collections/tts/parts/preprocessing/test_audio_trimming.py @@ -47,7 +47,7 @@ def test_get_start_and_end_of_speech_frames_not_frames_found(self): ) assert start_frame == 0 - assert end_frame == 4 + assert end_frame == 0 @pytest.mark.run_only_on('CPU') @pytest.mark.unit diff --git a/tests/collections/tts/test_tts_exportables.py b/tests/collections/tts/test_tts_exportables.py index 05b23e6afb1b..67f016b0c2af 100644 --- a/tests/collections/tts/test_tts_exportables.py +++ b/tests/collections/tts/test_tts_exportables.py @@ -54,8 +54,7 @@ def radtts_model(): model = RadTTSModel(cfg=cfg.model) app_state.is_model_being_restored = False model.eval() - model.export_config['enable_ragged_batches'] = True - model.export_config['enable_volume'] = True + model.set_export_config({'enable_ragged_batches': 'True', 'enable_volume': 'True'}) return model diff --git a/tools/ctc_segmentation/scripts/prepare_data.py b/tools/ctc_segmentation/scripts/prepare_data.py index 429b642d5ba0..c6ea024273fb 100644 --- a/tools/ctc_segmentation/scripts/prepare_data.py +++ b/tools/ctc_segmentation/scripts/prepare_data.py @@ -151,7 +151,7 @@ def split_text( ) # end of quoted speech - to be able to split sentences by full stop - transcript = re.sub(r"([\.\?\!])([\"\'])", r"\g<2>\g<1> ", transcript) + transcript = re.sub(r"([\.\?\!])([\"\'”])", r"\g<2>\g<1> ", transcript) # remove extra space transcript = re.sub(r" +", " ", transcript) diff --git a/tools/customization_dataset_preparation/customization_dataset_preparation.py b/tools/customization_dataset_preparation/customization_dataset_preparation.py index 9a83f61f60e6..071c06e20803 100644 --- a/tools/customization_dataset_preparation/customization_dataset_preparation.py +++ b/tools/customization_dataset_preparation/customization_dataset_preparation.py @@ -92,13 +92,8 @@ def recommend_hyperparameters(df, model=None): """ Makes recommendations on the batch_size to use for training, based on the dataset size - All hyperparameters except batch_size, max_batch_size and max_seq_length are hardcoded based on API defaults for now """ potential_batch_sizes = [2, 4, 8, 12, 16, 32, 64, 128] - bs = 2 - for potential_bs in potential_batch_sizes: - if 0.002 * len(df) > potential_bs: - bs = potential_bs max_bs = 128 if len(df) < 128: @@ -107,6 +102,8 @@ def recommend_hyperparameters(df, model=None): if potential_bs < len(df) * 0.9: max_bs = potential_bs + bs = min(max_bs, 32) + df_char_length = df.apply(lambda x: len(x.prompt) + len(x.completion), axis=1) length_by_chars = sorted(list(df_char_length)) n_samples_under_99p5_limit = math.ceil(len(df_char_length) * 0.995) diff --git a/tools/customization_dataset_preparation/tests/test_customization_dataset_preparation.py b/tools/customization_dataset_preparation/tests/test_customization_dataset_preparation.py index 6fbb239d9765..7bc9b701672e 100644 --- a/tools/customization_dataset_preparation/tests/test_customization_dataset_preparation.py +++ b/tools/customization_dataset_preparation/tests/test_customization_dataset_preparation.py @@ -39,39 +39,43 @@ def test_recommend_hyperparameters(): df_100 = pd.DataFrame({'prompt': ['prompt'] * 100, 'completion': ['completion'] * 100}) assert recommend_hyperparameters(df_100) == { - 'batch_size': 2, + 'batch_size': 32, 'max_batch_size': 64, 'num_virtual_tokens': 10, - 'lr': 0.0001, - 'epochs': 25, + 'encoder_hidden_size': 1024, + 'lr': 0.005, + 'epochs': 10, 'max_seq_length': 104, } df_1000 = pd.DataFrame({'prompt': ['prompt'] * 1000, 'completion': ['completion'] * 1000}) assert recommend_hyperparameters(df_1000) == { - 'batch_size': 2, + 'batch_size': 32, 'max_batch_size': 128, 'num_virtual_tokens': 10, - 'lr': 0.0001, - 'epochs': 25, + 'encoder_hidden_size': 2048, + 'lr': 0.001, + 'epochs': 10, 'max_seq_length': 104, } df_10000 = pd.DataFrame({'prompt': ['prompt'] * 10000, 'completion': ['completion'] * 10000}) assert recommend_hyperparameters(df_10000) == { - 'batch_size': 16, + 'batch_size': 32, 'max_batch_size': 128, 'num_virtual_tokens': 10, - 'lr': 0.0001, - 'epochs': 25, + 'encoder_hidden_size': 4096, + 'lr': 0.0005, + 'epochs': 10, 'max_seq_length': 104, } df_100000 = pd.DataFrame({'prompt': ['prompt'] * 100000, 'completion': ['completion'] * 100000}) assert recommend_hyperparameters(df_100000) == { - 'batch_size': 128, + 'batch_size': 32, 'max_batch_size': 128, 'num_virtual_tokens': 10, + 'encoder_hidden_size': 4096, 'lr': 0.0001, - 'epochs': 25, + 'epochs': 10, 'max_seq_length': 104, } diff --git a/tools/nemo_forced_aligner/README.md b/tools/nemo_forced_aligner/README.md index 35ee78ffecb0..423c76878db6 100644 --- a/tools/nemo_forced_aligner/README.md +++ b/tools/nemo_forced_aligner/README.md @@ -7,7 +7,6 @@ A tool for doing Forced Alignment using Viterbi decoding of NeMo CTC-based model ``` bash python /tools/nemo_forced_aligner/align.py \ pretrained_name="stt_en_citrinet_1024_gamma_0_25" \ - model_downsample_factor=8 \ manifest_filepath= \ output_dir= ``` @@ -23,34 +22,44 @@ Call the `align.py` script, specifying the parameters as follows: * `model_path`: string specifying the local filepath to a CTC NeMo ASR model which will be used to generate the log-probs which we will use to do alignment. If `pretrained_name` is specified, `model_path` must not be specified. >Note: NFA can only use CTC models (not Transducer models) at the moment. If you want to transcribe a long audio file (longer than ~5-10 mins), do not use Conformer CTC model as that will likely give Out Of Memory errors. -* `model_downsample_factor`: the downsample factor of the ASR model. It should be 2 if your model is QuartzNet, 4 if it is Conformer CTC, 8 if it is Citrinet. - * `manifest_filepath`: The path to the manifest of the data you want to align, containing `'audio_filepath'` and `'text'` fields. The audio filepaths need to be absolute paths. -* `output_dir`: The folder where to save CTM files containing the generated alignments and new JSON manifest containing paths to those CTM files. There will be one CTM file per utterance (ie one CTM file per line in the manifest). The files will be called `/{tokens,words,additional_segments}/.ctm` and each line in each file will start with ``. By default, `utt_id` will be the stem of the audio_filepath. This can be changed by overriding `audio_filepath_parts_in_utt_id`. The new JSON manifest will be at `/_with_ctm_paths.json`. +* `output_dir`: The folder where to save the output files (e.g. CTM, ASS) containing the generated alignments and new JSON manifest containing paths to those CTM/ASS files. The CTM file will be called `/ctm/{tokens,words,segments}/.ctm` and each line in each file will start with ``. By default, `utt_id` will be the stem of the audio_filepath. This can be changed by overriding `audio_filepath_parts_in_utt_id`. The new JSON manifest will be at `/_with_ctm_paths.json`. The ASS files will be at `/ass/{tokens,words}/.ass`. You can adjust which files should be saved by adjusting the parameter `save_output_file_formats`. + +### Optional parameters: + +* `align_using_pred_text`: if True, will transcribe the audio using the ASR model (specified by `pretrained_name` or `model_path`) and then use that transcription as the reference text for the forced alignment. The `"pred_text"` will be saved in the output JSON manifest at `/{original manifest name}_with_ctm_paths.json`. To avoid over-writing other transcribed texts, if there are already `"pred_text"` entries in the original manifest, the program will exit without attempting to generate alignments. (Default: False). + +* `transcribe_device`: The device that will be used for generating log-probs (i.e. transcribing). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method. (Default: `None`). + +* `viterbi_device`: The device that will be used for doing Viterbi decoding. If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method.(Default: `None`). -* **[OPTIONAL]** `align_using_pred_text`: if True, will transcribe the audio using the ASR model (specified by `pretrained_name` or `model_path`) and then use that transcription as the 'ground truth' for the forced alignment. The `"pred_text"` will be saved in the output JSON manifest at `/{original manifest name}_with_ctm_paths.json`. To avoid over-writing other transcribed texts, if there are already `"pred_text"` entries in the original manifest, the program will exit without attempting to generate alignments. (Default: False). +* `batch_size`: The batch_size that will be used for generating log-probs and doing Viterbi decoding. (Default: 1). -* **[OPTIONAL]** `transcribe_device`: The device that will be used for generating log-probs (i.e. transcribing). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method. (Default: `None`). +* `use_local_attention`: boolean flag specifying whether to try to use local attention for the ASR Model (will only work if the ASR Model is a Conformer model). If local attention is used, we will set the local attention context size to [64,64]. -* **[OPTIONAL]** `viterbi_device`: The device that will be used for doing Viterbi decoding. If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method.(Default: `None`). +* `additional_segment_grouping_separator`: an optional string used to separate the text into smaller segments. If this is not specified, then the whole text will be treated as a single segment. (Default: `None`. Cannot be empty string or space (" "), as NFA will automatically produce word-level timestamps for substrings separated by spaces). +> Note: the `additional_segment_grouping_separator` will be removed from the reference text and all the output files, ie it is treated as a marker which is not part of the reference text. The separator will essentially be treated as a space, and any additional spaces around it will be amalgamated into one, i.e. if `additional_segment_grouping_separator="|"`, the following texts will be treated equivalently: `“abc|def”`, `“abc |def”`, `“abc| def”`, `“abc | def"`. -* **[OPTIONAL]** `batch_size`: The batch_size that will be used for generating log-probs and doing Viterbi decoding. (Default: 1). +* `remove_blank_tokens_from_ctm`: a boolean denoting whether to remove tokens from token-level output CTMs. (Default: False). -* **[OPTIONAL]** `additional_ctm_grouping_separator`: the string used to separate CTM segments if you want to obtain CTM files at a level that is not the token level or the word level. NFA will always produce token-level and word-level CTM files in: `/tokens/.ctm` and `/words/.ctm`. If `additional_ctm_grouping_separator` is specified, an additional folder `/{tokens/words/additional_segments}/.ctm` will be created containing CTMs for `addtional_ctm_grouping_separator`-separated segments. (Default: `None`. Cannot be empty string or space (" "), as space-separated word-level CTMs will always be saved in `/words/.ctm`.) -> Note: the `additional_ctm_grouping_separator` will be removed from the ground truth text and all the output CTMs, ie it is treated as a marker which is not part of the ground truth. The separator will essentially be treated as a space, and any additional spaces around it will be amalgamated into one, i.e. if `additional_ctm_grouping_separator="|"`, the following texts will be treated equivalently: `“abc|def”`, `“abc |def”`, `“abc| def”`, `“abc | def"`. +* `audio_filepath_parts_in_utt_id`: This specifies how many of the 'parts' of the audio_filepath we will use (starting from the final part of the audio_filepath) to determine the utt_id that will be used in the CTM files. (Default: 1, i.e. utt_id will be the stem of the basename of audio_filepath). Note also that any spaces that are present in the audio_filepath will be replaced with dashes, so as not to change the number of space-separated elements in the CTM files. -* **[OPTIONAL]** `remove_blank_tokens_from_ctm`: a boolean denoting whether to remove tokens from token-level output CTMs. (Default: False). +* `minimum_timestamp_duration`: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file. Note that this may cause timestamps to overlap. (Default: 0, i.e. no modifications to predicted duration). -* **[OPTIONAL]** `audio_filepath_parts_in_utt_id`: This specifies how many of the 'parts' of the audio_filepath we will use (starting from the final part of the audio_filepath) to determine the utt_id that will be used in the CTM files. (Default: 1, i.e. utt_id will be the stem of the basename of audio_filepath). Note also that any spaces that are present in the audio_filepath will be replaced with dashes, so as not to change the number of space-separated elements in the CTM files. +* `use_buffered_chunked_streaming`: a flag to indicate whether to do buffered chunk streaming. Notice only CTC models (e.g., stt_en_citrinet_1024_gamma_0_25)with `per_feature` preprocessor are supported. The below two params are needed if this option set to `True`. -* **[OPTIONAL]** `minimum_timestamp_duration`: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file. Note that this may cause timestamps to overlap. (Default: 0, i.e. no modifications to predicted duration). +* `chunk_len_in_secs`: the chunk size for buffered chunked streaming inference. Default is 1.6 seconds. -* **[OPTIONAL]** `use_buffered_chunked_streaming`: a flag to indicate whether to do buffered chunk streaming. Notice only CTC models (e.g., stt_en_citrinet_1024_gamma_0_25)with `per_feature` preprocessor are supported. The below two params are needed if this option set to `True`. +* `total_buffer_in_secs`: the buffer size for buffered chunked streaming inference. Default is 4.0 seconds. -* **[OPTIONAL]** `chunk_len_in_secs`: the chunk size for buffered chunked streaming inference. Default is 1.6 seconds. +* `simulate_cache_aware_streaming`: a flag to indicate whether to use cache aware streaming to do get the logits for alignment. Default: `False`. -* **[OPTIONAL]** `total_buffer_in_secs`: the buffer size for buffered chunked streaming inference. Default is 4.0 seconds. +* `save_output_file_formats`: list of file formats to use for saving the output. Default: `["ctm", "ass"]` (these are all the available ones currently). + +* `ctm_file_config`: `CTMFileConfig` to specify the configuration of the output CTM files. + +* `ass_file_config`: `ASSFileConfig` to specify the configuration of the output ASS files. # Input manifest file format By default, NFA needs to be provided with a 'manifest' file where each line specifies the absolute "audio_filepath" and "text" of each utterance that you wish to produce alignments for, like the format below: @@ -58,25 +67,35 @@ By default, NFA needs to be provided with a 'manifest' file where each line spec {"audio_filepath": "/absolute/path/to/audio.wav", "text": "the transcription of the utterance"} ``` -You can omit the `"text"` field from the manifest if you specify `align_using_pred_text=true`. In that case, any `"text"` fields in the manifest will be ignored: the ASR model at `pretrained_name` or `model_path` will be used to transcribe the audio and obtain `"pred_text"`, which will be used as the 'ground truth' for the forced alignment process. The `"pred_text"` will also be saved in the output manifest JSON file at `/_with_ctm_paths.json`. To remove the possibility of overwriting `"pred_text"`, NFA will raise an error if `align_using_pred_text=true` and there are existing `"pred_text"` fields in the original manifest. +You can omit the `"text"` field from the manifest if you specify `align_using_pred_text=true`. In that case, any `"text"` fields in the manifest will be ignored: the ASR model at `pretrained_name` or `model_path` will be used to transcribe the audio and obtain `"pred_text"`, which will be used as the reference text for the forced alignment process. The `"pred_text"` will also be saved in the output manifest JSON file at `/_with_output_file_paths.json`. To remove the possibility of overwriting `"pred_text"`, NFA will raise an error if `align_using_pred_text=true` and there are existing `"pred_text"` fields in the original manifest. -> Note: NFA does not require `"duration"` fields in the manifest, and can align long audio files without running out of memory. Depending on your machine specs, you can align audios up to 5-10 minutes on Conformer CTC models, up to around 1.5 hours for QuartzNet models, and up to several hours for Citrinet models. NFA will also produce better alignments the more accurate the ground-truth `"text"` is. +> Note: NFA does not require `"duration"` fields in the manifest, and can align long audio files without running out of memory. The duration of audio file you can align will depend on the amount of memory on your machine. NFA will also produce better alignments the more accurate the reference text in `"text"` is. # Output CTM file format For each utterance specified in a line of `manifest_filepath`, several CTM files will be generated: -* a CTM file containing token-level alignments at `/tokens/.ctm`, -* a CTM file containing word-level alignments at `/words/.ctm`, -* if `additional_ctm_grouping_separator` is specified, there will also be a CTM file containing those segments at `output_dir/additional_segments`. +* a CTM file containing token-level alignments at `/ctm/tokens/.ctm`, +* a CTM file containing word-level alignments at `/ctm/words/.ctm`, +* a CTM file containing segment-level alignments at `/ctm/segments/.ctm`. If `additional_segment_grouping_separator` is specified, the segments will be parts of the text separated by `additonal_segment_grouping_separator`. If it is not specified, the entire text will be treated as a single segment. + Each CTM file will contain lines of the format: ` 1 `. Note the second item in the line (the 'channel ID', which is required by the CTM file format) is always 1, as NFA operates on single channel audio. +# Output ASS file format +NFA will produce the following ASS files, which you can use to generate subtitle videos: +* ASS files with token-level highlighting will be at `/ass/tokens/.ass,` +* ASS files with word-level highlighting will be at `/ass/words/.ass`. +All words belonging to the same segment 'segments' will appear at the same time in the subtitles generated with the ASS files. If you find that your segments are not the right size, you can use set `ass_file_config.resegment_text_to_fill_space=true` and specify some number of `ass_file_config.max_lines_per_segment`. + + # Output JSON manifest file format -A new manifest file will be saved at `/_with_ctm_paths.json`. It will contain the same fields as the original manifest, and additionally: -* `"token_level_ctm_filepath"` -* `"word_level_ctm_filepath"` -* `"additonal_segment_level_ctm_filepath"` (if `additional_ctm_grouping_separator` is specified) +A new manifest file will be saved at `/_with_output_file_paths.json`. It will contain the same fields as the original manifest, and additionally: +* `"token_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`) +* `"word_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`) +* `"segment_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`) +* `"token_level_ass_filepath"` (if `save_output_file_formats` contains `ass`) +* `"word_level_ass_filepath"` (if `save_output_file_formats` contains `ass`) * `"pred_text"` (if `align_using_pred_text=true`) diff --git a/tools/nemo_forced_aligner/align.py b/tools/nemo_forced_aligner/align.py index ed3ca3e45b5b..296c4a009cc4 100644 --- a/tools/nemo_forced_aligner/align.py +++ b/tools/nemo_forced_aligner/align.py @@ -15,22 +15,27 @@ import copy import math import os -from dataclasses import dataclass, is_dataclass -from typing import Optional +from dataclasses import dataclass, field, is_dataclass +from pathlib import Path +from typing import List, Optional import torch from omegaconf import OmegaConf from utils.data_prep import ( + add_t_start_end_to_utt_obj, get_batch_starts_ends, - get_batch_tensors_and_boundary_info, + get_batch_variables, get_manifest_lines_batch, is_entry_in_all_lines, is_entry_in_any_lines, ) -from utils.make_output_files import make_ctm, make_new_manifest +from utils.make_ass_files import make_ass_files +from utils.make_ctm_files import make_ctm_files +from utils.make_output_manifest import write_manifest_out_line from utils.viterbi_decoding import viterbi_decoding from nemo.collections.asr.models.ctc_models import EncDecCTCModel +from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR from nemo.collections.asr.parts.utils.transcribe_utils import setup_model from nemo.core.config import hydra_runner @@ -48,16 +53,11 @@ log-probs which we will use to do alignment. Note: NFA can only use CTC models (not Transducer models) at the moment. Note: if a model_path is provided, it will override the pretrained_name. - model_downsample_factor: an int indicating the downsample factor of the ASR model, ie the ratio of input - timesteps to output timesteps. - If the ASR model is a QuartzNet model, its downsample factor is 2. - If the ASR model is a Conformer CTC model, its downsample factor is 4. - If the ASR model is a Citirnet model, its downsample factor is 8. manifest_filepath: filepath to the manifest of the data you want to align, containing 'audio_filepath' and 'text' fields. output_dir: the folder where output CTM files and new JSON manifest will be saved. align_using_pred_text: if True, will transcribe the audio using the specified model and then use that transcription - as the 'ground truth' for the forced alignment. + as the reference text for the forced alignment. transcribe_device: None, or a string specifying the device that will be used for generating log-probs (i.e. "transcribing"). The string needs to be in a format recognized by torch.device(). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). @@ -65,12 +65,11 @@ The string needs to be in a format recognized by torch.device(). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). batch_size: int specifying batch size that will be used for generating log-probs and doing Viterbi decoding. - additional_ctm_grouping_separator: the string used to separate CTM segments if you want to obtain CTM files at a - level that is not the token level or the word level. NFA will always produce token-level and word-level CTM - files in: `/tokens/.ctm` and `/words/.ctm`. - If `additional_ctm_grouping_separator` is specified, an additional folder - `/{tokens/words/additional_segments}/.ctm` will be created containing CTMs - for `addtional_ctm_grouping_separator`-separated segments. + use_local_attention: boolean flag specifying whether to try to use local attention for the ASR Model (will only + work if the ASR Model is a Conformer model). If local attention is used, we will set the local attention context + size to [64,64]. + additional_segment_grouping_separator: an optional string used to separate the text into smaller segments. + If this is not specified, then the whole text will be treated as a single segment. remove_blank_tokens_from_ctm: a boolean denoting whether to remove tokens from token-level output CTMs. audio_filepath_parts_in_utt_id: int specifying how many of the 'parts' of the audio_filepath we will use (starting from the final part of the audio_filepath) to determine the @@ -80,11 +79,6 @@ e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 1 => utt_id will be "e1" e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 2 => utt_id will be "d_e1" e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 3 => utt_id will be "c_d_e1" - minimum_timestamp_duration: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any - line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the - middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio - file. Note that this may cause timestamps to overlap. - use_buffered_infer: False, if set True, using streaming to do get the logits for alignment This flag is useful when aligning large audio file. However, currently the chunk streaming inference does not support batch inference, @@ -96,15 +90,39 @@ which will cut one audio into segments and do inference on chunk_batch_size segments at a time simulate_cache_aware_streaming: False, if set True, using cache aware streaming to do get the logits for alignment + + save_output_file_formats: List of strings specifying what type of output files to save (default: ["ctm", "ass"]) + ctm_file_config: CTMFileConfig to specify the configuration of the output CTM files + ass_file_config: ASSFileConfig to specify the configuration of the output ASS files """ +@dataclass +class CTMFileConfig: + remove_blank_tokens: bool = False + # minimum duration (in seconds) for timestamps in the CTM.If any line in the CTM has a + # duration lower than this, it will be enlarged from the middle outwards until it + # meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file. + # Note that this may cause timestamps to overlap. + minimum_timestamp_duration: float = 0 + + +@dataclass +class ASSFileConfig: + fontsize: int = 20 + marginv: int = 20 + # if resegment_text_to_fill_space is True, the ASS files will use new segments + # such that each segment will not take up more than (approximately) max_lines_per_segment + # when the ASS file is applied to a video + resegment_text_to_fill_space: bool = False + max_lines_per_segment: int = 2 + + @dataclass class AlignmentConfig: # Required configs pretrained_name: Optional[str] = None model_path: Optional[str] = None - model_downsample_factor: Optional[int] = None manifest_filepath: Optional[str] = None output_dir: Optional[str] = None @@ -113,9 +131,8 @@ class AlignmentConfig: transcribe_device: Optional[str] = None viterbi_device: Optional[str] = None batch_size: int = 1 - additional_ctm_grouping_separator: Optional[str] = None - remove_blank_tokens_from_ctm: bool = False - minimum_timestamp_duration: float = 0 + use_local_attention: bool = True + additional_segment_grouping_separator: Optional[str] = None audio_filepath_parts_in_utt_id: int = 1 # Buffered chunked streaming configs @@ -127,6 +144,11 @@ class AlignmentConfig: # Cache aware streaming configs simulate_cache_aware_streaming: Optional[bool] = False + # Output file configs + save_output_file_formats: List[str] = field(default_factory=lambda: ["ctm", "ass"]) + ctm_file_config: CTMFileConfig = CTMFileConfig() + ass_file_config: ASSFileConfig = ASSFileConfig() + @hydra_runner(config_name="AlignmentConfig", schema=AlignmentConfig) def main(cfg: AlignmentConfig): @@ -143,9 +165,6 @@ def main(cfg: AlignmentConfig): if cfg.model_path is not None and cfg.pretrained_name is not None: raise ValueError("One of cfg.model_path and cfg.pretrained_name must be None") - if cfg.model_downsample_factor is None: - raise ValueError("cfg.model_downsample_factor must be specified") - if cfg.manifest_filepath is None: raise ValueError("cfg.manifest_filepath must be specified") @@ -155,10 +174,10 @@ def main(cfg: AlignmentConfig): if cfg.batch_size < 1: raise ValueError("cfg.batch_size cannot be zero or a negative number") - if cfg.additional_ctm_grouping_separator == "" or cfg.additional_ctm_grouping_separator == " ": + if cfg.additional_segment_grouping_separator == "" or cfg.additional_segment_grouping_separator == " ": raise ValueError("cfg.additional_grouping_separator cannot be empty string or space character") - if cfg.minimum_timestamp_duration < 0: + if cfg.ctm_file_config.minimum_timestamp_duration < 0: raise ValueError("cfg.minimum_timestamp_duration cannot be a negative number") # Validate manifest contents @@ -179,18 +198,18 @@ def main(cfg: AlignmentConfig): if not is_entry_in_all_lines(cfg.manifest_filepath, "text"): raise RuntimeError( "At least one line in cfg.manifest_filepath does not contain a 'text' entry. " - "NFA requires all lines to contain a 'text' entry when cfg.align_using_pred_text=True." + "NFA requires all lines to contain a 'text' entry when cfg.align_using_pred_text=False." ) # init devices if cfg.transcribe_device is None: - transcribe_device = torch.device("cuda" if torch.cuda.is_available else "cpu") + transcribe_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: transcribe_device = torch.device(cfg.transcribe_device) logging.info(f"Device to be used for transcription step (`transcribe_device`) is {transcribe_device}") if cfg.viterbi_device is None: - viterbi_device = torch.device("cuda" if torch.cuda.is_available else "cpu") + viterbi_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: viterbi_device = torch.device(cfg.viterbi_device) logging.info(f"Device to be used for viterbi step (`viterbi_device`) is {viterbi_device}") @@ -205,15 +224,24 @@ def main(cfg: AlignmentConfig): model, _ = setup_model(cfg, transcribe_device) model.eval() - if not isinstance(model, EncDecCTCModel): + if isinstance(model, EncDecHybridRNNTCTCModel): + model.change_decoding_strategy(decoder_type="ctc") + + if cfg.use_local_attention: + logging.info( + "Flag use_local_attention is set to True => will try to use local attention for model if it allows it" + ) + model.change_attention_model(self_attention_model="rel_pos_local_attn", att_context_size=[64, 64]) + + if not (isinstance(model, EncDecCTCModel) or isinstance(model, EncDecHybridRNNTCTCModel)): raise NotImplementedError( - f"Model {cfg.model_name} is not an instance of NeMo EncDecCTCModel." - " Currently only instances of EncDecCTCModels are supported" + f"Model is not an instance of NeMo EncDecCTCModel or ENCDecHybridRNNTCTCModel." + " Currently only instances of these models are supported" ) - if cfg.minimum_timestamp_duration > 0: + if cfg.ctm_file_config.minimum_timestamp_duration > 0: logging.warning( - f"cfg.minimum_timestamp_duration has been set to {cfg.minimum_timestamp_duration} seconds. " + f"cfg.ctm_file_config.minimum_timestamp_duration has been set to {cfg.ctm_file_config.minimum_timestamp_duration} seconds. " "This may cause the alignments for some tokens/words/additional segments to be overlapping." ) @@ -255,84 +283,48 @@ def main(cfg: AlignmentConfig): # get start and end line IDs of batches starts, ends = get_batch_starts_ends(cfg.manifest_filepath, cfg.batch_size) - if cfg.align_using_pred_text: - # record pred_texts to save them in the new manifest at the end of this script - pred_text_all_lines = [] - else: - pred_text_all_lines = None + # init output_timestep_duration = None and we will calculate and update it during the first batch + output_timestep_duration = None + + # init f_manifest_out + os.makedirs(cfg.output_dir, exist_ok=True) + tgt_manifest_name = str(Path(cfg.manifest_filepath).stem) + "_with_output_file_paths.json" + tgt_manifest_filepath = str(Path(cfg.output_dir) / tgt_manifest_name) + f_manifest_out = open(tgt_manifest_filepath, 'w') # get alignment and save in CTM batch-by-batch for start, end in zip(starts, ends): manifest_lines_batch = get_manifest_lines_batch(cfg.manifest_filepath, start, end) - ( - log_probs_batch, - y_batch, - T_batch, - U_batch, - token_info_batch, - word_info_batch, - segment_info_batch, - pred_text_batch, - ) = get_batch_tensors_and_boundary_info( + (log_probs_batch, y_batch, T_batch, U_batch, utt_obj_batch, output_timestep_duration,) = get_batch_variables( manifest_lines_batch, model, - cfg.additional_ctm_grouping_separator, + cfg.additional_segment_grouping_separator, cfg.align_using_pred_text, + cfg.audio_filepath_parts_in_utt_id, + output_timestep_duration, cfg.simulate_cache_aware_streaming, cfg.use_buffered_chunked_streaming, buffered_chunk_params, ) - if cfg.align_using_pred_text: - pred_text_all_lines.extend(pred_text_batch) - alignments_batch = viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device) - make_ctm( - token_info_batch, - alignments_batch, - manifest_lines_batch, - model, - cfg.model_downsample_factor, - os.path.join(cfg.output_dir, "tokens"), - cfg.remove_blank_tokens_from_ctm, - cfg.audio_filepath_parts_in_utt_id, - cfg.minimum_timestamp_duration, - ) + for utt_obj, alignment_utt in zip(utt_obj_batch, alignments_batch): - make_ctm( - word_info_batch, - alignments_batch, - manifest_lines_batch, - model, - cfg.model_downsample_factor, - os.path.join(cfg.output_dir, "words"), - False, # dont try to remove blank tokens because we dont expect them to be there anyway - cfg.audio_filepath_parts_in_utt_id, - cfg.minimum_timestamp_duration, - ) + utt_obj = add_t_start_end_to_utt_obj(utt_obj, alignment_utt, output_timestep_duration) + + if "ctm" in cfg.save_output_file_formats: + utt_obj = make_ctm_files(utt_obj, cfg.output_dir, cfg.ctm_file_config,) + + if "ass" in cfg.save_output_file_formats: + utt_obj = make_ass_files(utt_obj, cfg.output_dir, cfg.ass_file_config) - if cfg.additional_ctm_grouping_separator: - make_ctm( - segment_info_batch, - alignments_batch, - manifest_lines_batch, - model, - cfg.model_downsample_factor, - os.path.join(cfg.output_dir, "additional_segments"), - False, # dont try to remove blank tokens because we dont expect them to be there anyway - cfg.audio_filepath_parts_in_utt_id, - cfg.minimum_timestamp_duration, + write_manifest_out_line( + f_manifest_out, utt_obj, ) - make_new_manifest( - cfg.output_dir, - cfg.manifest_filepath, - cfg.additional_ctm_grouping_separator, - cfg.audio_filepath_parts_in_utt_id, - pred_text_all_lines, - ) + f_manifest_out.close() return None diff --git a/tools/nemo_forced_aligner/requirements.txt b/tools/nemo_forced_aligner/requirements.txt index 3af8ebf1b488..9daa6d2f2496 100644 --- a/tools/nemo_forced_aligner/requirements.txt +++ b/tools/nemo_forced_aligner/requirements.txt @@ -1,2 +1,3 @@ nemo_toolkit[all] -pytest +prettyprinter # for testing +pytest # for testing diff --git a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py deleted file mode 100644 index 406c4be1fb70..000000000000 --- a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from utils.make_output_files import add_t_start_end_to_boundary_info - -ALIGNMENT = [ - 1, - 1, - 3, - 3, - 4, - 5, - 7, - 7, - 9, - 10, - 11, - 12, - 13, - 15, - 17, - 17, - 19, - 21, - 23, - 23, -] - -INPUT_TOKEN_INFO = [ - {'text': '', 's_start': 0, 's_end': 0}, - {'text': 'h', 's_start': 1, 's_end': 1}, - {'text': '', 's_start': 2, 's_end': 2}, - {'text': 'i', 's_start': 3, 's_end': 3}, - {'text': '', 's_start': 4, 's_end': 4}, - {'text': '', 's_start': 5, 's_end': 5}, - {'text': '', 's_start': 6, 's_end': 6}, - {'text': 'w', 's_start': 7, 's_end': 7}, - {'text': '', 's_start': 8, 's_end': 8}, - {'text': 'o', 's_start': 9, 's_end': 9}, - {'text': '', 's_start': 10, 's_end': 10}, - {'text': 'r', 's_start': 11, 's_end': 11}, - {'text': '', 's_start': 12, 's_end': 12}, - {'text': 'l', 's_start': 13, 's_end': 13}, - {'text': '', 's_start': 14, 's_end': 14}, - {'text': 'd', 's_start': 15, 's_end': 15}, - {'text': '', 's_start': 16, 's_end': 16}, - {'text': '', 's_start': 17, 's_end': 17}, - {'text': '', 's_start': 18, 's_end': 18}, - {'text': 'h', 's_start': 19, 's_end': 19}, - {'text': '', 's_start': 20, 's_end': 20}, - {'text': 'e', 's_start': 21, 's_end': 21}, - {'text': '', 's_start': 22, 's_end': 22}, - {'text': 'y', 's_start': 23, 's_end': 23}, - {'text': '', 's_start': 24, 's_end': 24}, -] - -EXPECTED_OUTPUT_TOKEN_INFO = [ - {'text': 'h', 's_start': 1, 's_end': 1, 't_start': 0, 't_end': 1}, - {'text': 'i', 's_start': 3, 's_end': 3, 't_start': 2, 't_end': 3}, - {'text': '', 's_start': 4, 's_end': 4, 't_start': 4, 't_end': 4}, - {'text': '', 's_start': 5, 's_end': 5, 't_start': 5, 't_end': 5}, - {'text': 'w', 's_start': 7, 's_end': 7, 't_start': 6, 't_end': 7}, - {'text': 'o', 's_start': 9, 's_end': 9, 't_start': 8, 't_end': 8}, - {'text': '', 's_start': 10, 's_end': 10, 't_start': 9, 't_end': 9}, - {'text': 'r', 's_start': 11, 's_end': 11, 't_start': 10, 't_end': 10}, - {'text': '', 's_start': 12, 's_end': 12, 't_start': 11, 't_end': 11}, - {'text': 'l', 's_start': 13, 's_end': 13, 't_start': 12, 't_end': 12}, - {'text': 'd', 's_start': 15, 's_end': 15, 't_start': 13, 't_end': 13}, - {'text': '', 's_start': 17, 's_end': 17, 't_start': 14, 't_end': 15}, - {'text': 'h', 's_start': 19, 's_end': 19, 't_start': 16, 't_end': 16}, - {'text': 'e', 's_start': 21, 's_end': 21, 't_start': 17, 't_end': 17}, - {'text': 'y', 's_start': 23, 's_end': 23, 't_start': 18, 't_end': 19}, -] - - -INPUT_WORD_INFO = [ - {'text': 'hi', 's_start': 1, 's_end': 3}, - {'text': 'world', 's_start': 7, 's_end': 15}, - {'text': 'hey', 's_start': 19, 's_end': 23}, -] - -EXPECTED_OUTPUT_WORD_INFO = [ - {'text': 'hi', 's_start': 1, 's_end': 3, 't_start': 0, 't_end': 3}, - {'text': 'world', 's_start': 7, 's_end': 15, 't_start': 6, 't_end': 13}, - {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19}, -] - -INPUT_SEGMENT_INFO = [ - {'text': 'hi world', 's_start': 1, 's_end': 15}, - {'text': 'hey', 's_start': 19, 's_end': 23}, -] - -EXPECTED_OUTPUT_SEGMENT_INFO = [ - {'text': 'hi world', 's_start': 1, 's_end': 15, 't_start': 0, 't_end': 13}, - {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19}, -] - - -@pytest.mark.parametrize( - "input_boundary_info_utt,alignment_utt,expected_output_boundary_info_utt", - [ - (INPUT_TOKEN_INFO, ALIGNMENT, EXPECTED_OUTPUT_TOKEN_INFO), - (INPUT_WORD_INFO, ALIGNMENT, EXPECTED_OUTPUT_WORD_INFO), - (INPUT_SEGMENT_INFO, ALIGNMENT, EXPECTED_OUTPUT_SEGMENT_INFO), - ], -) -def test_add_t_start_end_to_boundary_info(input_boundary_info_utt, alignment_utt, expected_output_boundary_info_utt): - output_boundary_info_utt = add_t_start_end_to_boundary_info(input_boundary_info_utt, alignment_utt) - assert output_boundary_info_utt == expected_output_boundary_info_utt diff --git a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py new file mode 100644 index 000000000000..62092d5afaeb --- /dev/null +++ b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py @@ -0,0 +1,288 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import pytest +from utils.data_prep import Segment, Token, Utterance, Word, add_t_start_end_to_utt_obj + +OUTPUT_TIMESTEP_DURATION = 0.04 + +ALIGNMENT = [ + 1, + 1, + 3, + 3, + 4, + 5, + 7, + 7, + 9, + 10, + 11, + 12, + 13, + 15, + 17, + 17, + 19, + 21, + 23, + 23, +] + +EXPECTED_OUTPUT_UTTERANCE = Utterance( + text='hi world | hey', + token_ids_with_blanks=[ + 28, + 8, + 28, + 9, + 28, + 0, + 28, + 23, + 28, + 15, + 28, + 18, + 28, + 12, + 28, + 4, + 28, + 0, + 28, + 8, + 28, + 5, + 28, + 25, + 28, + ], + segments_and_tokens=[ + Token(text='', text_cased='', s_start=0, s_end=0, t_start=-1, t_end=-1), + Segment( + text="hi world", + s_start=1, + s_end=15, + t_start=0 * OUTPUT_TIMESTEP_DURATION, + t_end=14 * OUTPUT_TIMESTEP_DURATION, + words_and_tokens=[ + Word( + text="hi", + s_start=1, + s_end=3, + t_start=0 * OUTPUT_TIMESTEP_DURATION, + t_end=4 * OUTPUT_TIMESTEP_DURATION, + tokens=[ + Token( + text='h', + text_cased='h', + s_start=1, + s_end=1, + t_start=0 * OUTPUT_TIMESTEP_DURATION, + t_end=2 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=2, s_end=2, t_start=-1, t_end=-1), + Token( + text='i', + text_cased='i', + s_start=3, + s_end=3, + t_start=2 * OUTPUT_TIMESTEP_DURATION, + t_end=4 * OUTPUT_TIMESTEP_DURATION, + ), + ], + ), + Token( + text='', + text_cased='', + s_start=4, + s_end=4, + t_start=4 * OUTPUT_TIMESTEP_DURATION, + t_end=5 * OUTPUT_TIMESTEP_DURATION, + ), + Token( + text='', + text_cased='', + s_start=5, + s_end=5, + t_start=5 * OUTPUT_TIMESTEP_DURATION, + t_end=6 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=6, s_end=6, t_start=-1, t_end=-1), + Word( + text="world", + s_start=7, + s_end=15, + t_start=6 * OUTPUT_TIMESTEP_DURATION, + t_end=14 * OUTPUT_TIMESTEP_DURATION, + tokens=[ + Token( + text='w', + text_cased='w', + s_start=7, + s_end=7, + t_start=6 * OUTPUT_TIMESTEP_DURATION, + t_end=8 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=8, s_end=8, t_start=-1, t_end=-1), + Token( + text='o', + text_cased='o', + s_start=9, + s_end=9, + t_start=8 * OUTPUT_TIMESTEP_DURATION, + t_end=9 * OUTPUT_TIMESTEP_DURATION, + ), + Token( + text='', + text_cased='', + s_start=10, + s_end=10, + t_start=9 * OUTPUT_TIMESTEP_DURATION, + t_end=10 * OUTPUT_TIMESTEP_DURATION, + ), + Token( + text='r', + text_cased='r', + s_start=11, + s_end=11, + t_start=10 * OUTPUT_TIMESTEP_DURATION, + t_end=11 * OUTPUT_TIMESTEP_DURATION, + ), + Token( + text='', + text_cased='', + s_start=12, + s_end=12, + t_start=11 * OUTPUT_TIMESTEP_DURATION, + t_end=12 * OUTPUT_TIMESTEP_DURATION, + ), + Token( + text='l', + text_cased='l', + s_start=13, + s_end=13, + t_start=12 * OUTPUT_TIMESTEP_DURATION, + t_end=13 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=14, s_end=14, t_start=-1, t_end=-1), + Token( + text='d', + text_cased='d', + s_start=15, + s_end=15, + t_start=13 * OUTPUT_TIMESTEP_DURATION, + t_end=14 * OUTPUT_TIMESTEP_DURATION, + ), + ], + ), + ], + ), + Token(text='', text_cased='', s_start=16, s_end=16, t_start=-1, t_end=-1), + Token( + text='', + text_cased='', + s_start=17, + s_end=17, + t_start=14 * OUTPUT_TIMESTEP_DURATION, + t_end=16 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=18, s_end=18, t_start=-1, t_end=-1), + Segment( + text="hey", + s_start=19, + s_end=23, + t_start=16 * OUTPUT_TIMESTEP_DURATION, + t_end=20 * OUTPUT_TIMESTEP_DURATION, + words_and_tokens=[ + Word( + text="hey", + s_start=19, + s_end=23, + t_start=16 * OUTPUT_TIMESTEP_DURATION, + t_end=20 * OUTPUT_TIMESTEP_DURATION, + tokens=[ + Token( + text='h', + text_cased='h', + s_start=19, + s_end=19, + t_start=16 * OUTPUT_TIMESTEP_DURATION, + t_end=17 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=20, s_end=20, t_start=-1, t_end=-1), + Token( + text='e', + text_cased='e', + s_start=21, + s_end=21, + t_start=17 * OUTPUT_TIMESTEP_DURATION, + t_end=18 * OUTPUT_TIMESTEP_DURATION, + ), + Token(text='', text_cased='', s_start=22, s_end=22, t_start=-1, t_end=-1), + Token( + text='y', + text_cased='y', + s_start=23, + s_end=23, + t_start=18 * OUTPUT_TIMESTEP_DURATION, + t_end=20 * OUTPUT_TIMESTEP_DURATION, + ), + ], + ) + ], + ), + Token(text='', text_cased='', s_start=24, s_end=24, t_start=-1, t_end=-1), + ], +) + + +@pytest.mark.parametrize( + "alignment,expected_output_utterance, output_timestep_duration", + [(ALIGNMENT, EXPECTED_OUTPUT_UTTERANCE, OUTPUT_TIMESTEP_DURATION),], +) +def test_add_t_start_end_to_utt_obj(alignment, expected_output_utterance, output_timestep_duration): + input_utterance = copy.deepcopy(expected_output_utterance) + + # set all t_start and t_end to None in input_utterance + for segment_or_token in input_utterance.segments_and_tokens: + if type(segment_or_token) is Segment: + segment = segment_or_token + segment.t_start = None + segment.t_end = None + + for word_or_token in segment.words_and_tokens: + if type(word_or_token) is Word: + word = word_or_token + word.t_start = None + word.t_end = None + + for token in word.tokens: + token.t_start = None + token.t_end = None + else: + token = word_or_token + token.t_start = None + token.t_end = None + + else: + token = segment_or_token + token.t_start = None + token.t_end = None + + output_utterance = add_t_start_end_to_utt_obj(input_utterance, alignment, output_timestep_duration) + assert output_utterance == expected_output_utterance diff --git a/tools/nemo_forced_aligner/tests/test_get_utt_obj.py b/tools/nemo_forced_aligner/tests/test_get_utt_obj.py new file mode 100644 index 000000000000..31dd978263c0 --- /dev/null +++ b/tools/nemo_forced_aligner/tests/test_get_utt_obj.py @@ -0,0 +1,344 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import prettyprinter +import pytest +from prettyprinter import pretty_call, register_pretty +from utils.data_prep import Segment, Token, Utterance, Word, get_utt_obj + +from nemo.collections.asr.models import ASRModel + + +def get_utt_obj_pp_string(utt_obj): + @register_pretty(Word) + def pretty_utterance(value, ctx): + return pretty_call( + ctx, + Word, + text=value.text, + s_start=value.s_start, + s_end=value.s_end, + t_start=value.t_start, + t_end=value.t_end, + tokens=value.tokens, + ) + + @register_pretty(Segment) + def pretty_utterance(value, ctx): + return pretty_call( + ctx, + Segment, + text=value.text, + s_start=value.s_start, + s_end=value.s_end, + t_start=value.t_start, + t_end=value.t_end, + words_and_tokens=value.words_and_tokens, + ) + + @register_pretty(Utterance) + def pretty_utterance(value, ctx): + return pretty_call( + ctx, + Utterance, + text=value.text, + token_ids_with_blanks=value.token_ids_with_blanks, + segments_and_tokens=value.segments_and_tokens, + audio_filepath=value.audio_filepath, + utt_id=value.utt_id, + ) + + return prettyprinter.pformat(utt_obj) + + +T_FOR_TEST = 999 +AUDIO_FILEPATH_FOR_TEST = "arbitrary_string.wav" +UTT_ID_FOR_TEST = "arbitrary_string" + +EN_TEXT = "hi world | hey" + +EN_CN_EXPECTED_UTTERANCE = Utterance( + text='hi world | hey', + token_ids_with_blanks=[1024, 317, 1024, 472, 1024, 25, 1024, 20, 1024], + segments_and_tokens=[ + Token(text='', text_cased='', s_start=0, s_end=0, t_start=None, t_end=None), + Segment( + text='hi world', + s_start=1, + s_end=3, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text='hi', + s_start=1, + s_end=1, + t_start=None, + t_end=None, + tokens=[Token(text='▁hi', text_cased='▁hi', s_start=1, s_end=1, t_start=None, t_end=None)], + ), + Token(text='', text_cased='', s_start=2, s_end=2, t_start=None, t_end=None), + Word( + text='world', + s_start=3, + s_end=3, + t_start=None, + t_end=None, + tokens=[Token(text='▁world', text_cased='▁world', s_start=3, s_end=3, t_start=None, t_end=None)], + ), + ], + ), + Token(text='', text_cased='', s_start=4, s_end=4, t_start=None, t_end=None), + Segment( + text='hey', + s_start=5, + s_end=7, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text='hey', + s_start=5, + s_end=7, + t_start=None, + t_end=None, + tokens=[ + Token(text='▁he', text_cased='▁he', s_start=5, s_end=5, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=6, s_end=6, t_start=None, t_end=None), + Token(text='y', text_cased='y', s_start=7, s_end=7, t_start=None, t_end=None), + ], + ) + ], + ), + Token(text='', text_cased='', s_start=8, s_end=8, t_start=None, t_end=None), + ], + audio_filepath=AUDIO_FILEPATH_FOR_TEST, + utt_id=UTT_ID_FOR_TEST, +) + +EN_QN_EXPECTED_UTTERANCE = Utterance( + text='hi world | hey', + token_ids_with_blanks=[ + 28, + 8, + 28, + 9, + 28, + 0, + 28, + 23, + 28, + 15, + 28, + 18, + 28, + 12, + 28, + 4, + 28, + 0, + 28, + 8, + 28, + 5, + 28, + 25, + 28, + ], + segments_and_tokens=[ + Token(text='', text_cased='', s_start=0, s_end=0, t_start=None, t_end=None), + Segment( + text="hi world", + s_start=1, + s_end=15, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text="hi", + s_start=1, + s_end=3, + t_start=None, + t_end=None, + tokens=[ + Token(text='h', text_cased='h', s_start=1, s_end=1, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=2, s_end=2, t_start=None, t_end=None), + Token(text='i', text_cased='i', s_start=3, s_end=3, t_start=None, t_end=None), + ], + ), + Token(text='', text_cased='', s_start=4, s_end=4, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=5, s_end=5, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=6, s_end=6, t_start=None, t_end=None), + Word( + text="world", + s_start=7, + s_end=15, + t_start=None, + t_end=None, + tokens=[ + Token(text='w', text_cased='w', s_start=7, s_end=7, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=8, s_end=8, t_start=None, t_end=None), + Token(text='o', text_cased='o', s_start=9, s_end=9, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=10, s_end=10, t_start=None, t_end=None), + Token(text='r', text_cased='r', s_start=11, s_end=11, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=12, s_end=12, t_start=None, t_end=None), + Token(text='l', text_cased='l', s_start=13, s_end=13, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=14, s_end=14, t_start=None, t_end=None), + Token(text='d', text_cased='d', s_start=15, s_end=15, t_start=None, t_end=None), + ], + ), + ], + ), + Token(text='', text_cased='', s_start=16, s_end=16, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=17, s_end=17, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=18, s_end=18, t_start=None, t_end=None), + Segment( + text="hey", + s_start=19, + s_end=23, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text="hey", + s_start=19, + s_end=23, + t_start=None, + t_end=None, + tokens=[ + Token(text='h', text_cased='h', s_start=19, s_end=19, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=20, s_end=20, t_start=None, t_end=None), + Token(text='e', text_cased='e', s_start=21, s_end=21, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=22, s_end=22, t_start=None, t_end=None), + Token(text='y', text_cased='y', s_start=23, s_end=23, t_start=None, t_end=None), + ], + ) + ], + ), + Token(text='', text_cased='', s_start=24, s_end=24, t_start=None, t_end=None), + ], + audio_filepath=AUDIO_FILEPATH_FOR_TEST, + utt_id=UTT_ID_FOR_TEST, +) + + +ZH_TEXT = "人工 智能|技术" + +ZH_CN_EXPECTED_UTTERANCE = Utterance( + text='人工 智能|技术', + token_ids_with_blanks=[ + 5206, + 125, + 5206, + 1329, + 5206, + 0, + 5206, + 2029, + 5206, + 3668, + 5206, + 0, + 5206, + 1695, + 5206, + 2075, + 5206, + ], + segments_and_tokens=[ + Token(text='', text_cased='', s_start=0, s_end=0, t_start=None, t_end=None), + Segment( + text='人工 智能', + s_start=1, + s_end=9, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text='人工', + s_start=1, + s_end=3, + t_start=None, + t_end=None, + tokens=[ + Token(text='人', text_cased='人', s_start=1, s_end=1, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=2, s_end=2, t_start=None, t_end=None), + Token(text='工', text_cased='工', s_start=3, s_end=3, t_start=None, t_end=None), + ], + ), + Token(text='', text_cased='', s_start=4, s_end=4, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=5, s_end=5, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=6, s_end=6, t_start=None, t_end=None), + Word( + text='智能', + s_start=7, + s_end=9, + t_start=None, + t_end=None, + tokens=[ + Token(text='智', text_cased='智', s_start=7, s_end=7, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=8, s_end=8, t_start=None, t_end=None), + Token(text='能', text_cased='能', s_start=9, s_end=9, t_start=None, t_end=None), + ], + ), + ], + ), + Token(text='', text_cased='', s_start=10, s_end=10, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=11, s_end=11, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=12, s_end=12, t_start=None, t_end=None), + Segment( + text='技术', + s_start=13, + s_end=15, + t_start=None, + t_end=None, + words_and_tokens=[ + Word( + text='技术', + s_start=13, + s_end=15, + t_start=None, + t_end=None, + tokens=[ + Token(text='技', text_cased='技', s_start=13, s_end=13, t_start=None, t_end=None), + Token(text='', text_cased='', s_start=14, s_end=14, t_start=None, t_end=None), + Token(text='术', text_cased='术', s_start=15, s_end=15, t_start=None, t_end=None), + ], + ) + ], + ), + Token(text='', text_cased='', s_start=16, s_end=16, t_start=None, t_end=None), + ], + audio_filepath=AUDIO_FILEPATH_FOR_TEST, + utt_id=UTT_ID_FOR_TEST, +) + + +@pytest.mark.parametrize( + "text,model_pretrained_name,separator,expected_utterance", + [ + (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_UTTERANCE), + (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_UTTERANCE), + (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_CN_EXPECTED_UTTERANCE), + ], +) +def test_token_info(text, model_pretrained_name, separator, expected_utterance): + model = ASRModel.from_pretrained(model_pretrained_name) + utt_obj = get_utt_obj( + text, model, separator, T=T_FOR_TEST, audio_filepath=AUDIO_FILEPATH_FOR_TEST, utt_id=UTT_ID_FOR_TEST + ) + print(f"expected utterance object: {get_utt_obj_pp_string(expected_utterance)}\n") + print(f"output utterance object in test: {get_utt_obj_pp_string(utt_obj)}\n") + + assert utt_obj == expected_utterance diff --git a/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py b/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py deleted file mode 100644 index f5bc722d5a1c..000000000000 --- a/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from utils.data_prep import get_y_and_boundary_info_for_utt - -from nemo.collections.asr.models import ASRModel - -EN_TEXT = "hi world | hey" - -EN_QN_EXPECTED_TOKEN_INFO = [ - {'text': '', 's_start': 0, 's_end': 0}, - {'text': 'h', 's_start': 1, 's_end': 1}, - {'text': '', 's_start': 2, 's_end': 2}, - {'text': 'i', 's_start': 3, 's_end': 3}, - {'text': '', 's_start': 4, 's_end': 4}, - {'text': '', 's_start': 5, 's_end': 5}, - {'text': '', 's_start': 6, 's_end': 6}, - {'text': 'w', 's_start': 7, 's_end': 7}, - {'text': '', 's_start': 8, 's_end': 8}, - {'text': 'o', 's_start': 9, 's_end': 9}, - {'text': '', 's_start': 10, 's_end': 10}, - {'text': 'r', 's_start': 11, 's_end': 11}, - {'text': '', 's_start': 12, 's_end': 12}, - {'text': 'l', 's_start': 13, 's_end': 13}, - {'text': '', 's_start': 14, 's_end': 14}, - {'text': 'd', 's_start': 15, 's_end': 15}, - {'text': '', 's_start': 16, 's_end': 16}, - {'text': '', 's_start': 17, 's_end': 17}, - {'text': '', 's_start': 18, 's_end': 18}, - {'text': 'h', 's_start': 19, 's_end': 19}, - {'text': '', 's_start': 20, 's_end': 20}, - {'text': 'e', 's_start': 21, 's_end': 21}, - {'text': '', 's_start': 22, 's_end': 22}, - {'text': 'y', 's_start': 23, 's_end': 23}, - {'text': '', 's_start': 24, 's_end': 24}, -] - -EN_QN_EXPECTED_WORD_INFO = [ - {'text': 'hi', 's_start': 1, 's_end': 3}, - {'text': 'world', 's_start': 7, 's_end': 15}, - {'text': 'hey', 's_start': 19, 's_end': 23}, -] - -EN_QN_EXPECTED_SEGMENT_INFO = [ - {'text': 'hi world', 's_start': 1, 's_end': 15}, - {'text': 'hey', 's_start': 19, 's_end': 23}, -] - -EN_CN_EXPECTED_TOKEN_INFO = [ - {'text': '', 's_start': 0, 's_end': 0}, - {'text': '▁hi', 's_start': 1, 's_end': 1}, - {'text': '', 's_start': 2, 's_end': 2}, - {'text': '▁world', 's_start': 3, 's_end': 3}, - {'text': '', 's_start': 4, 's_end': 4}, - {'text': '▁he', 's_start': 5, 's_end': 5}, - {'text': '', 's_start': 6, 's_end': 6}, - {'text': 'y', 's_start': 7, 's_end': 7}, - {'text': '', 's_start': 8, 's_end': 8}, -] - -EN_CN_EXPECTED_WORD_INFO = [ - {'text': 'hi', 's_start': 1, 's_end': 1}, - {'text': 'world', 's_start': 3, 's_end': 3}, - {'text': 'hey', 's_start': 5, 's_end': 7}, -] - -EN_CN_EXPECTED_SEGMENT_INFO = [ - {'text': 'hi world', 's_start': 1, 's_end': 3}, - {'text': 'hey', 's_start': 5, 's_end': 7}, -] - - -ZH_TEXT = "人工 智能|技术" - -ZH_EXPECTED_TOKEN_INFO = [ - {'text': '', 's_start': 0, 's_end': 0}, - {'text': '人', 's_start': 1, 's_end': 1}, - {'text': '', 's_start': 2, 's_end': 2}, - {'text': '工', 's_start': 3, 's_end': 3}, - {'text': '', 's_start': 4, 's_end': 4}, - {'text': '', 's_start': 5, 's_end': 5}, - {'text': '', 's_start': 6, 's_end': 6}, - {'text': '智', 's_start': 7, 's_end': 7}, - {'text': '', 's_start': 8, 's_end': 8}, - {'text': '能', 's_start': 9, 's_end': 9}, - {'text': '', 's_start': 10, 's_end': 10}, - {'text': '', 's_start': 11, 's_end': 11}, - {'text': '', 's_start': 12, 's_end': 12}, - {'text': '技', 's_start': 13, 's_end': 13}, - {'text': '', 's_start': 14, 's_end': 14}, - {'text': '术', 's_start': 15, 's_end': 15}, - {'text': '', 's_start': 16, 's_end': 16}, -] - -ZH_EXPECTED_WORD_INFO = [ - {'text': '人工', 's_start': 1, 's_end': 3}, - {'text': '智能', 's_start': 7, 's_end': 9}, - {'text': '技术', 's_start': 13, 's_end': 15}, -] - -ZH_EXPECTED_SEGMENT_INFO = [ - {'text': '人工 智能', 's_start': 1, 's_end': 9}, - {'text': '技术', 's_start': 13, 's_end': 15}, -] - - -@pytest.mark.parametrize( - "text,model_pretrained_name,separator,expected_token_info", - [ - (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_TOKEN_INFO), - (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_TOKEN_INFO), - (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_TOKEN_INFO), - ], -) -def test_token_info(text, model_pretrained_name, separator, expected_token_info): - model = ASRModel.from_pretrained(model_pretrained_name) - _, token_info, *_ = get_y_and_boundary_info_for_utt(text, model, separator) - assert token_info == expected_token_info - - -@pytest.mark.parametrize( - "text,model_pretrained_name,separator,expected_word_info", - [ - (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_WORD_INFO), - (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_WORD_INFO), - (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_WORD_INFO), - ], -) -def test_word_info(text, model_pretrained_name, separator, expected_word_info): - model = ASRModel.from_pretrained(model_pretrained_name) - _, _, word_info, _ = get_y_and_boundary_info_for_utt(text, model, separator) - assert word_info == expected_word_info - - -@pytest.mark.parametrize( - "text,model_pretrained_name,separator,expected_segment_info", - [ - (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_SEGMENT_INFO), - (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_SEGMENT_INFO), - (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_SEGMENT_INFO), - ], -) -def test_segment_info(text, model_pretrained_name, separator, expected_segment_info): - model = ASRModel.from_pretrained(model_pretrained_name) - *_, segment_info = get_y_and_boundary_info_for_utt(text, model, separator) - assert segment_info == expected_segment_info diff --git a/tools/nemo_forced_aligner/tests/test_restore_token_case.py b/tools/nemo_forced_aligner/tests/test_restore_token_case.py new file mode 100644 index 000000000000..6217dfc0ba94 --- /dev/null +++ b/tools/nemo_forced_aligner/tests/test_restore_token_case.py @@ -0,0 +1,36 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from utils.data_prep import restore_token_case + + +@pytest.mark.parametrize( + "word,word_tokens,expected_word_tokens_cased", + [ + ("HEY!", ['▁he', 'y', '!'], ['▁HE', 'Y', '!']), + ("BabABa▁", ['▁b', 'a', 'b', 'a', 'b', 'a'], ['▁B', 'a', 'b', 'A', 'B', 'a']), + ("BabAB▁a", ['▁b', 'a', 'b', 'a', 'b', '_a'], ['▁B', 'a', 'b', 'A', 'B', '_a']), + ("Bab▁AB▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']), + ("▁Bab▁AB▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']), + ("▁Bab▁AB▁▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']), + ("▁▁BabAB▁a", ['▁b', 'a', 'b', 'a', 'b', '▁a'], ['▁B', 'a', 'b', 'A', 'B', '▁a']), + ("m²", ['▁', 'm', '2'], ['▁', 'm', '2']), + ("²", ['▁', '2'], ['▁', '2']), + ], +) +def test_restore_token_case(word, word_tokens, expected_word_tokens_cased): + word_tokens_cased = restore_token_case(word, word_tokens) + assert word_tokens_cased == expected_word_tokens_cased diff --git a/tools/nemo_forced_aligner/utils/constants.py b/tools/nemo_forced_aligner/utils/constants.py index 894f880401cb..51ce934be479 100644 --- a/tools/nemo_forced_aligner/utils/constants.py +++ b/tools/nemo_forced_aligner/utils/constants.py @@ -16,4 +16,4 @@ SPACE_TOKEN = "" -V_NEGATIVE_NUM = -1e30 +V_NEGATIVE_NUM = -3.4e38 # this is just above the most negative number in torch.float32 diff --git a/tools/nemo_forced_aligner/utils/data_prep.py b/tools/nemo_forced_aligner/utils/data_prep.py index 852be91d78c4..20f401389c4e 100644 --- a/tools/nemo_forced_aligner/utils/data_prep.py +++ b/tools/nemo_forced_aligner/utils/data_prep.py @@ -13,13 +13,24 @@ # limitations under the License. import json -import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Union import soundfile as sf import torch from tqdm.auto import tqdm from utils.constants import BLANK_TOKEN, SPACE_TOKEN, V_NEGATIVE_NUM +from nemo.utils import logging + + +def _get_utt_id(audio_filepath, audio_filepath_parts_in_utt_id): + fp_parts = Path(audio_filepath).parts[-audio_filepath_parts_in_utt_id:] + utt_id = Path("_".join(fp_parts)).stem + utt_id = utt_id.replace(" ", "-") # replace any spaces in the filepath with dashes + return utt_id + def get_batch_starts_ends(manifest_filepath, batch_size): """ @@ -70,10 +81,16 @@ def is_entry_in_all_lines(manifest_filepath, entry): def get_manifest_lines_batch(manifest_filepath, start, end): manifest_lines_batch = [] - with open(manifest_filepath, "r") as f: + with open(manifest_filepath, "r", encoding="utf-8-sig") as f: for line_i, line in enumerate(f): if line_i >= start and line_i <= end: - manifest_lines_batch.append(json.loads(line)) + data = json.loads(line) + if "text" in data: + # remove any BOM, any duplicated spaces, convert any + # newline chars to spaces + data["text"] = data["text"].replace("\ufeff", "") + data["text"] = " ".join(data["text"].split()) + manifest_lines_batch.append(data) if line_i == end: break @@ -91,42 +108,138 @@ def get_char_tokens(text, model): return tokens -def get_y_and_boundary_info_for_utt(text, model, separator): +def is_sub_or_superscript_pair(ref_text, text): + """returns True if ref_text is a subscript or superscript version of text""" + sub_or_superscript_to_num = { + "⁰": "0", + "¹": "1", + "²": "2", + "³": "3", + "⁴": "4", + "⁵": "5", + "⁶": "6", + "⁷": "7", + "⁸": "8", + "⁹": "9", + "₀": "0", + "₁": "1", + "₂": "2", + "₃": "3", + "₄": "4", + "₅": "5", + "₆": "6", + "₇": "7", + "₈": "8", + "₉": "9", + } + + if text in sub_or_superscript_to_num: + if sub_or_superscript_to_num[text] == ref_text: + return True + return False + + +def restore_token_case(word, word_tokens): + + # remove repeated "▁" and "_" from word as that is what the tokenizer will do + while "▁▁" in word: + word = word.replace("▁▁", "▁") + + while "__" in word: + word = word.repalce("__", "_") + + word_tokens_cased = [] + word_char_pointer = 0 + + for token in word_tokens: + token_cased = "" + + for token_char in token: + if token_char == word[word_char_pointer]: + token_cased += token_char + word_char_pointer += 1 + + else: + if token_char.upper() == word[word_char_pointer] or is_sub_or_superscript_pair( + token_char, word[word_char_pointer] + ): + token_cased += token_char.upper() + word_char_pointer += 1 + else: + if token_char == "▁" or token_char == "_": + if word[word_char_pointer] == "▁" or word[word_char_pointer] == "_": + token_cased += token_char + word_char_pointer += 1 + elif word_char_pointer == 0: + token_cased += token_char + + else: + raise RuntimeError( + f"Unexpected error - failed to recover capitalization of tokens for word {word}" + ) + + word_tokens_cased.append(token_cased) + + return word_tokens_cased + + +@dataclass +class Token: + text: str = None + text_cased: str = None + s_start: int = None + s_end: int = None + t_start: float = None + t_end: float = None + + +@dataclass +class Word: + text: str = None + s_start: int = None + s_end: int = None + t_start: float = None + t_end: float = None + tokens: List[Token] = field(default_factory=list) + + +@dataclass +class Segment: + text: str = None + s_start: int = None + s_end: int = None + t_start: float = None + t_end: float = None + words_and_tokens: List[Union[Word, Token]] = field(default_factory=list) + + +@dataclass +class Utterance: + token_ids_with_blanks: List[int] = field(default_factory=list) + segments_and_tokens: List[Union[Segment, Token]] = field(default_factory=list) + text: str = None + pred_text: str = None + audio_filepath: str = None + utt_id: str = None + saved_output_files: dict = field(default_factory=dict) + + +def get_utt_obj( + text, model, separator, T, audio_filepath, utt_id, +): """ - Get y_token_ids_with_blanks, token_info, word_info and segment_info for the text provided, tokenized - by the model provided. - y_token_ids_with_blanks is a list of the indices of the text tokens with the blank token id in between every - text token. - token_info, word_info and segment_info are lists of dictionaries containing information about - where the tokens/words/segments start and end. - For example, 'hi world | hey ' with separator = '|' and tokenized by a BPE tokenizer can have token_info like: - token_info = [ - {'text': '', 's_start': 0, 's_end': 0}, - {'text': '▁hi', 's_start': 1, 's_end': 1}, - {'text': '', 's_start': 2, 's_end': 2}, - {'text': '▁world', 's_start': 3, 's_end': 3}, - {'text': '', 's_start': 4, 's_end': 4}, - {'text': '▁he', 's_start': 5, 's_end': 5}, - {'text': '', 's_start': 6, 's_end': 6}, - {'text': 'y', 's_start': 7, 's_end': 7}, - {'text': '', 's_start': 8, 's_end': 8}, - ] - 's_start' and 's_end' indicate where in the sequence of tokens does each token start and end. - - The word_info will be as follows: - word_info = [ - {'text': 'hi', 's_start': 1, 's_end': 1}, - {'text': 'world', 's_start': 3, 's_end': 3}, - {'text': 'hey', 's_start': 5, 's_end': 7}, - ] - 's_start' and 's_end' indicate where in the sequence of tokens does each word start and end. - - segment_info will be as follows: - segment_info = [ - {'text': 'hi world', 's_start': 1, 's_end': 3}, - {'text': 'hey', 's_start': 5, 's_end': 7}, - ] - 's_start' and 's_end' indicate where in the sequence of tokens does each segment start and end. + Function to create an Utterance object and add all necessary information to it except + for timings of the segments / words / tokens according to the alignment - that will + be done later in a different function, after the alignment is done. + + The Utterance object has a list segments_and_tokens which contains Segment objects and + Token objects (for blank tokens in between segments). + Within the Segment objects, there is a list words_and_tokens which contains Word objects and + Token objects (for blank tokens in between words). + Within the Word objects, there is a list tokens tokens which contains Token objects for + blank and non-blank tokens. + We will be building up these lists in this function. This data structure will then be useful for + generating the various output files that we wish to save. """ if not separator: # if separator is not defined - treat the whole text as one segment @@ -137,157 +250,429 @@ def get_y_and_boundary_info_for_utt(text, model, separator): # remove any spaces at start and end of segments segments = [seg.strip() for seg in segments] + utt = Utterance(text=text, audio_filepath=audio_filepath, utt_id=utt_id,) + + # build up lists: token_ids_with_blanks, segments_and_tokens. + # The code for these is different depending on whether we use char-based tokens or not if hasattr(model, 'tokenizer'): if hasattr(model, 'blank_id'): BLANK_ID = model.blank_id else: - BLANK_ID = len(model.decoder.vocabulary) # TODO: check + BLANK_ID = len(model.tokenizer.vocab) # TODO: check - y_token_ids_with_blanks = [BLANK_ID] - token_info = [{"text": BLANK_TOKEN, "s_start": 0, "s_end": 0,}] - word_info = [] - segment_info = [] + utt.token_ids_with_blanks = [BLANK_ID] + + # check for text being 0 length + if len(text) == 0: + return utt + + # check for # tokens + token repetitions being > T + all_tokens = model.tokenizer.text_to_ids(text) + n_token_repetitions = 0 + for i_tok in range(1, len(all_tokens)): + if all_tokens[i_tok] == all_tokens[i_tok - 1]: + n_token_repetitions += 1 + + if len(all_tokens) + n_token_repetitions > T: + logging.info( + f"Utterance {utt_id} has too many tokens compared to the audio file duration." + " Will not generate output alignment files for this utterance." + ) + return utt + + # build up data structures containing segments/words/tokens + utt.segments_and_tokens.append(Token(text=BLANK_TOKEN, text_cased=BLANK_TOKEN, s_start=0, s_end=0,)) segment_s_pointer = 1 # first segment will start at s=1 because s=0 is a blank word_s_pointer = 1 # first word will start at s=1 because s=0 is a blank for segment in segments: + # add the segment to segment_info and increment the segment_s_pointer + segment_tokens = model.tokenizer.text_to_tokens(segment) + utt.segments_and_tokens.append( + Segment( + text=segment, + s_start=segment_s_pointer, + # segment_tokens do not contain blanks => need to muliply by 2 + # s_end needs to be the index of the final token (including blanks) of the current segment: + # segment_s_pointer + len(segment_tokens) * 2 is the index of the first token of the next segment => + # => need to subtract 2 + s_end=segment_s_pointer + len(segment_tokens) * 2 - 2, + ) + ) + segment_s_pointer += ( + len(segment_tokens) * 2 + ) # multiply by 2 to account for blanks (which are not present in segment_tokens) + words = segment.split(" ") # we define words to be space-separated sub-strings - for word in words: + for word_i, word in enumerate(words): word_tokens = model.tokenizer.text_to_tokens(word) - word_ids = model.tokenizer.text_to_ids(word) - for token, id_ in zip(word_tokens, word_ids): - # add the text token and the blank that follows it - # to our token-based variables - y_token_ids_with_blanks.extend([id_, BLANK_ID]) - token_info.extend( - [ - { - "text": token, - "s_start": len(y_token_ids_with_blanks) - 2, - "s_end": len(y_token_ids_with_blanks) - 2, - }, - { - "text": BLANK_TOKEN, - "s_start": len(y_token_ids_with_blanks) - 1, - "s_end": len(y_token_ids_with_blanks) - 1, - }, - ] - ) + word_token_ids = model.tokenizer.text_to_ids(word) + word_tokens_cased = restore_token_case(word, word_tokens) # add the word to word_info and increment the word_s_pointer - word_info.append( - { - "text": word, - "s_start": word_s_pointer, - "s_end": word_s_pointer + (len(word_tokens) - 1) * 2, # TODO check this, - } + utt.segments_and_tokens[-1].words_and_tokens.append( + # word_tokens do not contain blanks => need to muliply by 2 + # s_end needs to be the index of the final token (including blanks) of the current word: + # word_s_pointer + len(word_tokens) * 2 is the index of the first token of the next word => + # => need to subtract 2 + Word(text=word, s_start=word_s_pointer, s_end=word_s_pointer + len(word_tokens) * 2 - 2) ) - word_s_pointer += len(word_tokens) * 2 # TODO check this + word_s_pointer += ( + len(word_tokens) * 2 + ) # multiply by 2 to account for blanks (which are not present in word_tokens) + + for token_i, (token, token_id, token_cased) in enumerate( + zip(word_tokens, word_token_ids, word_tokens_cased) + ): + # add the text tokens and the blanks in between them + # to our token-based variables + utt.token_ids_with_blanks.extend([token_id, BLANK_ID]) + # adding Token object for non-blank token + utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append( + Token( + text=token, + text_cased=token_cased, + # utt.token_ids_with_blanks has the form [...., , ] => + # => if do len(utt.token_ids_with_blanks) - 1 you get the index of the final + # => we want to do len(utt.token_ids_with_blanks) - 2 to get the index of + s_start=len(utt.token_ids_with_blanks) - 2, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 2, + ) + ) - # add the segment to segment_info and increment the segment_s_pointer - segment_tokens = model.tokenizer.text_to_tokens(segment) - segment_info.append( - { - "text": segment, - "s_start": segment_s_pointer, - "s_end": segment_s_pointer + (len(segment_tokens) - 1) * 2, - } + # adding Token object for blank tokens in between the tokens of the word + # (ie do not add another blank if you have reached the end) + if token_i < len(word_tokens) - 1: + utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form [...., ] => + # => if do len(utt.token_ids_with_blanks) -1 you get the index of this + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) + ) + + # add a Token object for blanks in between words in this segment + # (but only *in between* - do not add the token if it is after the final word) + if word_i < len(words) - 1: + utt.segments_and_tokens[-1].words_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form [...., ] => + # => if do len(utt.token_ids_with_blanks) -1 you get the index of this + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) + ) + + # add the blank token in between segments/after the final segment + utt.segments_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form [...., ] => + # => if do len(utt.token_ids_with_blanks) -1 you get the index of this + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) ) - segment_s_pointer += len(segment_tokens) * 2 - return y_token_ids_with_blanks, token_info, word_info, segment_info + return utt elif hasattr(model.decoder, "vocabulary"): # i.e. tokenization is simply character-based BLANK_ID = len(model.decoder.vocabulary) # TODO: check this is correct SPACE_ID = model.decoder.vocabulary.index(" ") - y_token_ids_with_blanks = [BLANK_ID] - token_info = [{"text": BLANK_TOKEN, "s_start": 0, "s_end": 0,}] - word_info = [] - segment_info = [] + utt.token_ids_with_blanks = [BLANK_ID] + + # check for text being 0 length + if len(text) == 0: + return utt + + # check for # tokens + token repetitions being > T + all_tokens = get_char_tokens(text, model) + n_token_repetitions = 0 + for i_tok in range(1, len(all_tokens)): + if all_tokens[i_tok] == all_tokens[i_tok - 1]: + n_token_repetitions += 1 + + if len(all_tokens) + n_token_repetitions > T: + logging.info( + f"Utterance {utt_id} has too many tokens compared to the audio file duration." + " Will not generate output alignment files for this utterance." + ) + return utt + + # build up data structures containing segments/words/tokens + utt.segments_and_tokens.append(Token(text=BLANK_TOKEN, text_cased=BLANK_TOKEN, s_start=0, s_end=0,)) segment_s_pointer = 1 # first segment will start at s=1 because s=0 is a blank word_s_pointer = 1 # first word will start at s=1 because s=0 is a blank for i_segment, segment in enumerate(segments): - words = segment.split(" ") # we define words to be space-separated characters + # add the segment to segment_info and increment the segment_s_pointer + segment_tokens = get_char_tokens(segment, model) + utt.segments_and_tokens.append( + Segment( + text=segment, + s_start=segment_s_pointer, + # segment_tokens do not contain blanks => need to muliply by 2 + # s_end needs to be the index of the final token (including blanks) of the current segment: + # segment_s_pointer + len(segment_tokens) * 2 is the index of the first token of the next segment => + # => need to subtract 2 + s_end=segment_s_pointer + len(segment_tokens) * 2 - 2, + ) + ) + + # for correct calculation: multiply len(segment_tokens) by 2 to account for blanks (which are not present in segment_tokens) + # and + 2 to account for [, ] + segment_s_pointer += len(segment_tokens) * 2 + 2 + + words = segment.split(" ") # we define words to be space-separated substrings for i_word, word in enumerate(words): # convert string to list of characters word_tokens = list(word) # convert list of characters to list of their ids in the vocabulary - word_ids = get_char_tokens(word, model) - for token, id_ in zip(word_tokens, word_ids): - # add the text token and the blank that follows it + word_token_ids = get_char_tokens(word, model) + + # add the word to word_info and increment the word_s_pointer + utt.segments_and_tokens[-1].words_and_tokens.append( + # note for s_end: + # word_tokens do not contain blanks => need to muliply by 2 + # s_end needs to be the index of the final token (including blanks) of the current word: + # word_s_pointer + len(word_tokens) * 2 is the index of the first token of the next word => + # => need to subtract 2 + Word(text=word, s_start=word_s_pointer, s_end=word_s_pointer + len(word_tokens) * 2 - 2) + ) + + # for correct calculation: multiply len(word_tokens) by 2 to account for blanks (which are not present in word_tokens) + # and + 2 to account for [, ] + word_s_pointer += len(word_tokens) * 2 + 2 + + for token_i, (token, token_id) in enumerate(zip(word_tokens, word_token_ids)): + # add the text tokens and the blanks in between them # to our token-based variables - y_token_ids_with_blanks.extend([id_, BLANK_ID]) - token_info.extend( - [ - { - "text": token, - "s_start": len(y_token_ids_with_blanks) - 2, - "s_end": len(y_token_ids_with_blanks) - 2, - }, - { - "text": BLANK_TOKEN, - "s_start": len(y_token_ids_with_blanks) - 1, - "s_end": len(y_token_ids_with_blanks) - 1, - }, - ] + utt.token_ids_with_blanks.extend([token_id]) + utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append( + Token( + text=token, + text_cased=token, + # utt.token_ids_with_blanks has the form [..., ] + # => do len(utt.token_ids_with_blanks) - 1 to get the index of this non-blank token + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) ) - # add space token (and the blank after it) unless this is the final word in the final segment - if not (i_segment == len(segments) - 1 and i_word == len(words) - 1): - y_token_ids_with_blanks.extend([SPACE_ID, BLANK_ID]) - token_info.extend( - ( - { - "text": SPACE_TOKEN, - "s_start": len(y_token_ids_with_blanks) - 2, - "s_end": len(y_token_ids_with_blanks) - 2, - }, - { - "text": BLANK_TOKEN, - "s_start": len(y_token_ids_with_blanks) - 1, - "s_end": len(y_token_ids_with_blanks) - 1, - }, + if token_i < len(word_tokens) - 1: # only add blank tokens that are in the middle of words + utt.token_ids_with_blanks.extend([BLANK_ID]) + utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form [..., ] + # => do len(utt.token_ids_with_blanks) - 1 to get the index of this blank token + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) + ) + + # add space token (and the blanks around it) unless this is the final word in a segment + if i_word < len(words) - 1: + utt.token_ids_with_blanks.extend([BLANK_ID, SPACE_ID, BLANK_ID]) + utt.segments_and_tokens[-1].words_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form + # [..., , , , ] + # => do len(utt.token_ids_with_blanks) - 3 to get the index of the blank token before the space token + s_start=len(utt.token_ids_with_blanks) - 3, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 3, + ) + ) + utt.segments_and_tokens[-1].words_and_tokens.append( + Token( + text=SPACE_TOKEN, + text_cased=SPACE_TOKEN, + # utt.token_ids_with_blanks has the form + # [..., , , , ] + # => do len(utt.token_ids_with_blanks) - 2 to get the index of the space token + s_start=len(utt.token_ids_with_blanks) - 2, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 2, + ) + ) + utt.segments_and_tokens[-1].words_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form + # [..., , , , ] + # => do len(utt.token_ids_with_blanks) - 1 to get the index of the blank token after the space token + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, ) ) - # add the word to word_info and increment the word_s_pointer - word_info.append( - { - "text": word, - "s_start": word_s_pointer, - "s_end": word_s_pointer + len(word_tokens) * 2 - 2, # TODO check this, - } - ) - word_s_pointer += len(word_tokens) * 2 + 2 # TODO check this - # add the segment to segment_info and increment the segment_s_pointer - segment_tokens = get_char_tokens(segment, model) - segment_info.append( - { - "text": segment, - "s_start": segment_s_pointer, - "s_end": segment_s_pointer + (len(segment_tokens) - 1) * 2, - } + # add a blank to the segment, and add a space after if this is not the final segment + utt.token_ids_with_blanks.extend([BLANK_ID]) + utt.segments_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form [..., ] + # => do len(utt.token_ids_with_blanks) - 1 to get the index of this blank token + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) ) - segment_s_pointer += len(segment_tokens) * 2 + 2 - return y_token_ids_with_blanks, token_info, word_info, segment_info + if i_segment < len(segments) - 1: + utt.token_ids_with_blanks.extend([SPACE_ID, BLANK_ID]) + utt.segments_and_tokens.append( + Token( + text=SPACE_TOKEN, + text_cased=SPACE_TOKEN, + # utt.token_ids_with_blanks has the form + # [..., , ] + # => do len(utt.token_ids_with_blanks) - 2 to get the index of the space token + s_start=len(utt.token_ids_with_blanks) - 2, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 2, + ) + ) + utt.segments_and_tokens.append( + Token( + text=BLANK_TOKEN, + text_cased=BLANK_TOKEN, + # utt.token_ids_with_blanks has the form + # [..., , ] + # => do len(utt.token_ids_with_blanks) - 1 to get the index of the blank token + s_start=len(utt.token_ids_with_blanks) - 1, + # s_end is same as s_start since the token only occupies one element in the list + s_end=len(utt.token_ids_with_blanks) - 1, + ) + ) + + return utt else: raise RuntimeError("Cannot get tokens of this model.") -def get_batch_tensors_and_boundary_info( +def add_t_start_end_to_utt_obj(utt_obj, alignment_utt, output_timestep_duration): + """ + Function to add t_start and t_end (representing time in seconds) to the Utterance object utt_obj. + Args: + utt_obj: Utterance object to which we will add t_start and t_end for its + constituent segments/words/tokens. + alignment_utt: a list of ints indicating which token does the alignment pass through at each + timestep (will take the form [0, 0, 1, 1, ..., ]). + output_timestep_duration: a float indicating the duration of a single output timestep from + the ASR Model. + + Returns: + utt_obj: updated Utterance object. + """ + + # General idea for the algorithm of how we add t_start and t_end + # the timestep where a token s starts is the location of the first appearance of s_start in alignment_utt + # the timestep where a token s ends is the location of the final appearance of s_end in alignment_utt + # We will make dictionaries num_to_first_alignment_appearance and + # num_to_last_appearance and use that to update all of + # the t_start and t_end values in utt_obj. + # We will put t_start = t_end = -1 for tokens that are skipped (should only be blanks) + + num_to_first_alignment_appearance = dict() + num_to_last_alignment_appearance = dict() + + prev_s = -1 # use prev_s to keep track of when the s changes + for t, s in enumerate(alignment_utt): + if s > prev_s: + num_to_first_alignment_appearance[s] = t + + if prev_s >= 0: # dont record prev_s = -1 + num_to_last_alignment_appearance[prev_s] = t - 1 + prev_s = s + # add last appearance of the final s + num_to_last_alignment_appearance[prev_s] = len(alignment_utt) - 1 + + # update all the t_start and t_end in utt_obj + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + segment = segment_or_token + segment.t_start = num_to_first_alignment_appearance[segment.s_start] * output_timestep_duration + segment.t_end = (num_to_last_alignment_appearance[segment.s_end] + 1) * output_timestep_duration + + for word_or_token in segment.words_and_tokens: + if type(word_or_token) is Word: + word = word_or_token + word.t_start = num_to_first_alignment_appearance[word.s_start] * output_timestep_duration + word.t_end = (num_to_last_alignment_appearance[word.s_end] + 1) * output_timestep_duration + + for token in word.tokens: + if token.s_start in num_to_first_alignment_appearance: + token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration + else: + token.t_start = -1 + + if token.s_end in num_to_last_alignment_appearance: + token.t_end = ( + num_to_last_alignment_appearance[token.s_end] + 1 + ) * output_timestep_duration + else: + token.t_end = -1 + else: + token = word_or_token + if token.s_start in num_to_first_alignment_appearance: + token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration + else: + token.t_start = -1 + + if token.s_end in num_to_last_alignment_appearance: + token.t_end = (num_to_last_alignment_appearance[token.s_end] + 1) * output_timestep_duration + else: + token.t_end = -1 + + else: + token = segment_or_token + if token.s_start in num_to_first_alignment_appearance: + token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration + else: + token.t_start = -1 + + if token.s_end in num_to_last_alignment_appearance: + token.t_end = (num_to_last_alignment_appearance[token.s_end] + 1) * output_timestep_duration + else: + token.t_end = -1 + + return utt_obj + + +def get_batch_variables( manifest_lines_batch, model, separator, align_using_pred_text, + audio_filepath_parts_in_utt_id, + output_timestep_duration, simulate_cache_aware_streaming=False, use_buffered_chunked_streaming=False, buffered_chunk_params={}, @@ -296,10 +681,9 @@ def get_batch_tensors_and_boundary_info( Returns: log_probs, y, T, U (y and U are s.t. every other token is a blank) - these are the tensors we will need during Viterbi decoding. - token_info_list, word_info_list, segment_info_list - these are lists of dictionaries which we will need - for writing the CTM files with the human-readable alignments. - pred_text_list - this is a list of the transcriptions from our model which we will save to our output JSON - file if align_using_pred_text is True. + utt_obj_batch: a list of Utterance objects for every utterance in the batch. + output_timestep_duration: a float indicating the duration of a single output timestep from + the ASR Model. """ # get hypotheses by calling 'transcribe' @@ -320,6 +704,11 @@ def get_batch_tensors_and_boundary_info( hypotheses = model.transcribe_simulate_cache_aware_streaming( audio_filepaths_batch, return_hypotheses=True, batch_size=B ) + + # if hypotheses form a tuple (from Hybrid model), extract just "best" hypothesis + if type(hypotheses) == tuple and len(hypotheses) == 2: + hypotheses = hypotheses[0] + for hypothesis in hypotheses: log_probs_list_batch.append(hypothesis.y_sequence) T_list_batch.append(hypothesis.y_sequence.shape[0]) @@ -341,30 +730,52 @@ def get_batch_tensors_and_boundary_info( # token_info_batch, word_info_batch, segment_info_batch y_list_batch = [] U_list_batch = [] - token_info_batch = [] - word_info_batch = [] - segment_info_batch = [] + utt_obj_batch = [] for i_line, line in enumerate(manifest_lines_batch): if align_using_pred_text: - gt_text_for_alignment = pred_text_batch[i_line] + gt_text_for_alignment = " ".join(pred_text_batch[i_line].split()) else: gt_text_for_alignment = line["text"] - y_utt, token_info_utt, word_info_utt, segment_info_utt = get_y_and_boundary_info_for_utt( - gt_text_for_alignment, model, separator + utt_obj = get_utt_obj( + gt_text_for_alignment, + model, + separator, + T_list_batch[i_line], + audio_filepaths_batch[i_line], + _get_utt_id(audio_filepaths_batch[i_line], audio_filepath_parts_in_utt_id), ) - y_list_batch.append(y_utt) - U_list_batch.append(len(y_utt)) - token_info_batch.append(token_info_utt) - word_info_batch.append(word_info_utt) - segment_info_batch.append(segment_info_utt) + # update utt_obj.pred_text or utt_obj.text + if align_using_pred_text: + utt_obj.pred_text = pred_text_batch[i_line] + if len(utt_obj.pred_text) == 0: + logging.info( + f"'pred_text' of utterance {utt_obj.utt_id} is empty - we will not generate" + " any output alignment files for this utterance" + ) + if "text" in line: + utt_obj.text = line["text"] # keep the text as we will save it in the output manifest + else: + utt_obj.text = line["text"] + if len(utt_obj.text) == 0: + logging.info( + f"'text' of utterance {utt_obj.utt_id} is empty - we will not generate" + " any output alignment files for this utterance" + ) + + y_list_batch.append(utt_obj.token_ids_with_blanks) + U_list_batch.append(len(utt_obj.token_ids_with_blanks)) + utt_obj_batch.append(utt_obj) # turn log_probs, y, T, U into dense tensors for fast computation during Viterbi decoding T_max = max(T_list_batch) U_max = max(U_list_batch) # V = the number of tokens in the vocabulary + 1 for the blank token. - V = len(model.decoder.vocabulary) + 1 + if hasattr(model, 'tokenizer'): + V = len(model.tokenizer.vocab) + 1 + else: + V = len(model.decoder.vocabulary) + 1 T_batch = torch.tensor(T_list_batch) U_batch = torch.tensor(U_list_batch) @@ -383,13 +794,40 @@ def get_batch_tensors_and_boundary_info( U_utt = U_batch[b] y_batch[b, :U_utt] = torch.tensor(y_utt) + # calculate output_timestep_duration if it is None + if output_timestep_duration is None: + if not 'window_stride' in model.cfg.preprocessor: + raise ValueError( + "Don't have attribute 'window_stride' in 'model.cfg.preprocessor' => cannot calculate " + " model_downsample_factor => stopping process" + ) + + if not 'sample_rate' in model.cfg.preprocessor: + raise ValueError( + "Don't have attribute 'sample_rate' in 'model.cfg.preprocessor' => cannot calculate start " + " and end time of segments => stopping process" + ) + + with sf.SoundFile(audio_filepaths_batch[0]) as f: + audio_dur = f.frames / f.samplerate + n_input_frames = audio_dur / model.cfg.preprocessor.window_stride + model_downsample_factor = round(n_input_frames / int(T_batch[0])) + + output_timestep_duration = ( + model.preprocessor.featurizer.hop_length * model_downsample_factor / model.cfg.preprocessor.sample_rate + ) + + logging.info( + f"Calculated that the model downsample factor is {model_downsample_factor}" + f" and therefore the ASR model output timestep duration is {output_timestep_duration}" + " -- will use this for all batches" + ) + return ( log_probs_batch, y_batch, T_batch, U_batch, - token_info_batch, - word_info_batch, - segment_info_batch, - pred_text_batch, + utt_obj_batch, + output_timestep_duration, ) diff --git a/tools/nemo_forced_aligner/utils/make_ass_files.py b/tools/nemo_forced_aligner/utils/make_ass_files.py new file mode 100644 index 000000000000..f1beea838573 --- /dev/null +++ b/tools/nemo_forced_aligner/utils/make_ass_files.py @@ -0,0 +1,428 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This file contains functions for make ASS-format subtitle files based on the generated alignment. +ASS files can be generated highlighting token-level alignments or word-level alignments. +In both cases, 'segment' boundaries will be used to determine which parts of the text will appear +at the same time. +For the token-level ASS files, the text will be highlighted token-by-token, with the timings determined +by the NFA alignments. +For the word-level ASS files, the text will be highlighted word-by-word, with the timings determined +by the NFA alignemtns. +""" + +import os + +from utils.constants import BLANK_TOKEN, SPACE_TOKEN +from utils.data_prep import Segment, Token, Word + +PLAYERRESX = 384 +PLAYERRESY = 288 +MARGINL = 10 +MARGINR = 10 + + +def seconds_to_ass_format(seconds_float): + seconds_float = float(seconds_float) + mm, ss_decimals = divmod(seconds_float, 60) + hh, mm = divmod(mm, 60) + + hh = str(round(hh)) + if len(hh) == 1: + hh = '0' + hh + + mm = str(round(mm)) + if len(mm) == 1: + mm = '0' + mm + + ss_decimals = f"{ss_decimals:.2f}" + if len(ss_decimals.split(".")[0]) == 1: + ss_decimals = "0" + ss_decimals + + srt_format_time = f"{hh}:{mm}:{ss_decimals}" + + return srt_format_time + + +def make_ass_files( + utt_obj, output_dir_root, ass_file_config, +): + + # don't try to make files if utt_obj.segments_and_tokens is empty, which will happen + # in the case of the ground truth text being empty or the number of tokens being too large vs audio duration + if not utt_obj.segments_and_tokens: + return utt_obj + + if ass_file_config.resegment_text_to_fill_space: + utt_obj = resegment_utt_obj(utt_obj, ass_file_config) + + utt_obj = make_word_level_ass_file(utt_obj, output_dir_root, ass_file_config,) + utt_obj = make_token_level_ass_file(utt_obj, output_dir_root, ass_file_config,) + + return utt_obj + + +def _get_word_n_chars(word): + n_chars = 0 + for token in word.tokens: + if token.text != BLANK_TOKEN: + n_chars += len(token.text) + return n_chars + + +def _get_segment_n_chars(segment): + n_chars = 0 + for word_or_token in segment.words_and_tokens: + if word_or_token.text == SPACE_TOKEN: + n_chars += 1 + elif word_or_token.text != BLANK_TOKEN: + n_chars += len(word_or_token.text) + return n_chars + + +def resegment_utt_obj(utt_obj, ass_file_config): + + # get list of just all words and tokens + all_words_and_tokens = [] + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + all_words_and_tokens.extend(segment_or_token.words_and_tokens) + else: + all_words_and_tokens.append(segment_or_token) + + # figure out how many chars will fit into one 'slide' and thus should be the max + # size of a segment + approx_chars_per_line = (PLAYERRESX - MARGINL - MARGINR) / ( + ass_file_config.fontsize * 0.6 + ) # assume chars 0.6 as wide as they are tall + approx_lines_per_segment = (PLAYERRESY - ass_file_config.marginv) / ( + ass_file_config.fontsize * 1.15 + ) # assume line spacing is 1.15 + if approx_lines_per_segment > ass_file_config.max_lines_per_segment: + approx_lines_per_segment = ass_file_config.max_lines_per_segment + + max_chars_per_segment = int(approx_chars_per_line * approx_lines_per_segment) + + new_segments_and_tokens = [] + all_words_and_tokens_pointer = 0 + for word_or_token in all_words_and_tokens: + if type(word_or_token) is Token: + new_segments_and_tokens.append(word_or_token) + all_words_and_tokens_pointer += 1 + else: + break + + new_segments_and_tokens.append(Segment()) + + while all_words_and_tokens_pointer < len(all_words_and_tokens): + word_or_token = all_words_and_tokens[all_words_and_tokens_pointer] + if type(word_or_token) is Word: + + # if this is going to be the first word in the segment, we definitely want + # to add it to the segment + if not new_segments_and_tokens[-1].words_and_tokens: + new_segments_and_tokens[-1].words_and_tokens.append(word_or_token) + + else: + # if not the first word, check what the new length of the segment will be + # if short enough - add this word to this segment; + # if too long - add to a new segment + this_word_n_chars = _get_word_n_chars(word_or_token) + segment_so_far_n_chars = _get_segment_n_chars(new_segments_and_tokens[-1]) + if this_word_n_chars + segment_so_far_n_chars < max_chars_per_segment: + new_segments_and_tokens[-1].words_and_tokens.append(word_or_token) + else: + new_segments_and_tokens.append(Segment()) + new_segments_and_tokens[-1].words_and_tokens.append(word_or_token) + + else: # i.e. word_or_token is a token + # currently this breaks the convention of tokens at the end/beginning + # of segments being listed as separate tokens in segment.word_and_tokens + # TODO: change code so we follow this convention + new_segments_and_tokens[-1].words_and_tokens.append(word_or_token) + + all_words_and_tokens_pointer += 1 + + utt_obj.segments_and_tokens = new_segments_and_tokens + + return utt_obj + + +def make_word_level_ass_file( + utt_obj, output_dir_root, ass_file_config, +): + + default_style_dict = { + "Name": "Default", + "Fontname": "Arial", + "Fontsize": str(ass_file_config.fontsize), + "PrimaryColour": "&Hffffff", + "SecondaryColour": "&Hffffff", + "OutlineColour": "&H0", + "BackColour": "&H0", + "Bold": "0", + "Italic": "0", + "Underline": "0", + "StrikeOut": "0", + "ScaleX": "100", + "ScaleY": "100", + "Spacing": "0", + "Angle": "0", + "BorderStyle": "1", + "Outline": "1", + "Shadow": "0", + "Alignment": "2", + "MarginL": str(MARGINL), + "MarginR": str(MARGINR), + "MarginV": str(ass_file_config.marginv), + "Encoding": "0", + } + + output_dir = os.path.join(output_dir_root, "ass", "words") + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, f"{utt_obj.utt_id}.ass") + + with open(output_file, 'w') as f: + default_style_top_line = "Format: " + ", ".join(default_style_dict.keys()) + default_style_bottom_line = "Style: " + ",".join(default_style_dict.values()) + + f.write( + ( + "[Script Info]\n" + "ScriptType: v4.00+\n" + f"PlayResX: {PLAYERRESX}\n" + f"PlayResY: {PLAYERRESY}\n" + "\n" + "[V4+ Styles]\n" + f"{default_style_top_line}\n" + f"{default_style_bottom_line}\n" + "\n" + "[Events]\n" + "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n" + ) + ) + + # write first set of subtitles for text before speech starts to be spoken + words_in_first_segment = [] + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + first_segment = segment_or_token + + for word_or_token in first_segment.words_and_tokens: + if type(word_or_token) is Word: + words_in_first_segment.append(word_or_token) + break + + text_before_speech = r"{\c&c7c1c2&}" + " ".join([x.text for x in words_in_first_segment]) + r"{\r}" + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(0)},{seconds_to_ass_format(words_in_first_segment[0].t_start)},Default,,0,0,0,," + + text_before_speech.rstrip() + ) + + f.write(subtitle_text + '\n') + + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + segment = segment_or_token + + words_in_segment = [] + for word_or_token in segment.words_and_tokens: + if type(word_or_token) is Word: + words_in_segment.append(word_or_token) + + for word_i, word in enumerate(words_in_segment): + + text_before = " ".join([x.text for x in words_in_segment[:word_i]]) + if text_before != "": + text_before += " " + text_before = r"{\c&H3d2e31&}" + text_before + r"{\r}" + + if word_i < len(words_in_segment) - 1: + text_after = " " + " ".join([x.text for x in words_in_segment[word_i + 1 :]]) + else: + text_after = "" + text_after = r"{\c&c7c1c2&}" + text_after + r"{\r}" + + aligned_text = r"{\c&H09ab39&}" + word.text + r"{\r}" + aligned_text_off = r"{\c&H3d2e31&}" + word.text + r"{\r}" + + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(word.t_start)},{seconds_to_ass_format(word.t_end)},Default,,0,0,0,," + + text_before + + aligned_text + + text_after.rstrip() + ) + f.write(subtitle_text + '\n') + + # add subtitles without word-highlighting for when words are not being spoken + if word_i < len(words_in_segment) - 1: + last_word_end = float(words_in_segment[word_i].t_end) + next_word_start = float(words_in_segment[word_i + 1].t_start) + if next_word_start - last_word_end > 0.001: + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(last_word_end)},{seconds_to_ass_format(next_word_start)},Default,,0,0,0,," + + text_before + + aligned_text_off + + text_after.rstrip() + ) + f.write(subtitle_text + '\n') + + utt_obj.saved_output_files[f"words_level_ass_filepath"] = output_file + + return utt_obj + + +def make_token_level_ass_file( + utt_obj, output_dir_root, ass_file_config, +): + + default_style_dict = { + "Name": "Default", + "Fontname": "Arial", + "Fontsize": str(ass_file_config.fontsize), + "PrimaryColour": "&Hffffff", + "SecondaryColour": "&Hffffff", + "OutlineColour": "&H0", + "BackColour": "&H0", + "Bold": "0", + "Italic": "0", + "Underline": "0", + "StrikeOut": "0", + "ScaleX": "100", + "ScaleY": "100", + "Spacing": "0", + "Angle": "0", + "BorderStyle": "1", + "Outline": "1", + "Shadow": "0", + "Alignment": "2", + "MarginL": str(MARGINL), + "MarginR": str(MARGINR), + "MarginV": str(ass_file_config.marginv), + "Encoding": "0", + } + + output_dir = os.path.join(output_dir_root, "ass", "tokens") + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, f"{utt_obj.utt_id}.ass") + + with open(output_file, 'w') as f: + default_style_top_line = "Format: " + ", ".join(default_style_dict.keys()) + default_style_bottom_line = "Style: " + ",".join(default_style_dict.values()) + + f.write( + ( + "[Script Info]\n" + "ScriptType: v4.00+\n" + f"PlayResX: {PLAYERRESX}\n" + f"PlayResY: {PLAYERRESY}\n" + "ScaledBorderAndShadow: yes\n" + "\n" + "[V4+ Styles]\n" + f"{default_style_top_line}\n" + f"{default_style_bottom_line}\n" + "\n" + "[Events]\n" + "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n" + ) + ) + + # write first set of subtitles for text before speech starts to be spoken + tokens_in_first_segment = [] + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + for word_or_token in segment_or_token.words_and_tokens: + if type(word_or_token) is Token: + if word_or_token.text != BLANK_TOKEN: + tokens_in_first_segment.append(word_or_token) + else: + for token in word_or_token.tokens: + if token.text != BLANK_TOKEN: + tokens_in_first_segment.append(token) + + break + + for token in tokens_in_first_segment: + token.text_cased = token.text_cased.replace( + "▁", " " + ) # replace underscores used in subword tokens with spaces + token.text_cased = token.text_cased.replace(SPACE_TOKEN, " ") # space token with actual space + + text_before_speech = r"{\c&c7c1c2&}" + "".join([x.text_cased for x in tokens_in_first_segment]) + r"{\r}" + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(0)},{seconds_to_ass_format(tokens_in_first_segment[0].t_start)},Default,,0,0,0,," + + text_before_speech.rstrip() + ) + + f.write(subtitle_text + '\n') + + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + segment = segment_or_token + + tokens_in_segment = [] # make list of (non-blank) tokens + for word_or_token in segment.words_and_tokens: + if type(word_or_token) is Token: + if word_or_token.text != BLANK_TOKEN: + tokens_in_segment.append(word_or_token) + else: + for token in word_or_token.tokens: + if token.text != BLANK_TOKEN: + tokens_in_segment.append(token) + + for token in tokens_in_segment: + token.text_cased = token.text_cased.replace( + "▁", " " + ) # replace underscores used in subword tokens with spaces + token.text_cased = token.text_cased.replace(SPACE_TOKEN, " ") # space token with actual space + + for token_i, token in enumerate(tokens_in_segment): + + text_before = "".join([x.text_cased for x in tokens_in_segment[:token_i]]) + text_before = r"{\c&H3d2e31&}" + text_before + r"{\r}" + + if token_i < len(tokens_in_segment) - 1: + text_after = "".join([x.text_cased for x in tokens_in_segment[token_i + 1 :]]) + else: + text_after = "" + text_after = r"{\c&c7c1c2&}" + text_after + r"{\r}" + + aligned_text = r"{\c&H09ab39&}" + token.text_cased + r"{\r}" + aligned_text_off = r"{\c&H3d2e31&}" + token.text_cased + r"{\r}" + + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(token.t_start)},{seconds_to_ass_format(token.t_end)},Default,,0,0,0,," + + text_before + + aligned_text + + text_after.rstrip() + ) + f.write(subtitle_text + '\n') + + # add subtitles without word-highlighting for when words are not being spoken + if token_i < len(tokens_in_segment) - 1: + last_token_end = float(tokens_in_segment[token_i].t_end) + next_token_start = float(tokens_in_segment[token_i + 1].t_start) + if next_token_start - last_token_end > 0.001: + subtitle_text = ( + f"Dialogue: 0,{seconds_to_ass_format(last_token_end)},{seconds_to_ass_format(next_token_start)},Default,,0,0,0,," + + text_before + + aligned_text_off + + text_after.rstrip() + ) + f.write(subtitle_text + '\n') + + utt_obj.saved_output_files[f"tokens_level_ass_filepath"] = output_file + + return utt_obj diff --git a/tools/nemo_forced_aligner/utils/make_ctm_files.py b/tools/nemo_forced_aligner/utils/make_ctm_files.py new file mode 100644 index 000000000000..f0326c07cf8f --- /dev/null +++ b/tools/nemo_forced_aligner/utils/make_ctm_files.py @@ -0,0 +1,114 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import soundfile as sf +from utils.constants import BLANK_TOKEN, SPACE_TOKEN +from utils.data_prep import Segment, Word + + +def make_ctm_files( + utt_obj, output_dir_root, ctm_file_config, +): + """ + Function to save CTM files for all the utterances in the incoming batch. + """ + + # don't try to make files if utt_obj.segments_and_tokens is empty, which will happen + # in the case of the ground truth text being empty or the number of tokens being too large vs audio duration + if not utt_obj.segments_and_tokens: + return utt_obj + + # get audio file duration if we will need it later + if ctm_file_config.minimum_timestamp_duration > 0: + with sf.SoundFile(utt_obj.audio_filepath) as f: + audio_file_duration = f.frames / f.samplerate + else: + audio_file_duration = None + + utt_obj = make_ctm("tokens", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,) + utt_obj = make_ctm("words", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,) + utt_obj = make_ctm("segments", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,) + + return utt_obj + + +def make_ctm( + alignment_level, utt_obj, output_dir_root, audio_file_duration, ctm_file_config, +): + output_dir = os.path.join(output_dir_root, "ctm", alignment_level) + os.makedirs(output_dir, exist_ok=True) + + boundary_info_utt = [] + for segment_or_token in utt_obj.segments_and_tokens: + if type(segment_or_token) is Segment: + segment = segment_or_token + if alignment_level == "segments": + boundary_info_utt.append(segment) + + for word_or_token in segment.words_and_tokens: + if type(word_or_token) is Word: + word = word_or_token + if alignment_level == "words": + boundary_info_utt.append(word) + + for token in word.tokens: + if alignment_level == "tokens": + boundary_info_utt.append(token) + + else: + token = word_or_token + if alignment_level == "tokens": + boundary_info_utt.append(token) + + else: + token = segment_or_token + if alignment_level == "tokens": + boundary_info_utt.append(token) + + with open(os.path.join(output_dir, f"{utt_obj.utt_id}.ctm"), "w") as f_ctm: + for boundary_info_ in boundary_info_utt: # loop over every token/word/segment + + # skip if t_start = t_end = negative number because we used it as a marker to skip some blank tokens + if not (boundary_info_.t_start < 0 or boundary_info_.t_end < 0): + text = boundary_info_.text + start_time = boundary_info_.t_start + end_time = boundary_info_.t_end + + if ( + ctm_file_config.minimum_timestamp_duration > 0 + and ctm_file_config.minimum_timestamp_duration > end_time - start_time + ): + # make the predicted duration of the token/word/segment longer, growing it outwards equal + # amounts from the predicted center of the token/word/segment + token_mid_point = (start_time + end_time) / 2 + start_time = max(token_mid_point - ctm_file_config.minimum_timestamp_duration / 2, 0) + end_time = min( + token_mid_point + ctm_file_config.minimum_timestamp_duration / 2, audio_file_duration + ) + + if not ( + text == BLANK_TOKEN and ctm_file_config.remove_blank_tokens + ): # don't save blanks if we don't want to + # replace any spaces with so we dont introduce extra space characters to our CTM files + text = text.replace(" ", SPACE_TOKEN) + + f_ctm.write(f"{utt_obj.utt_id} 1 {start_time:.2f} {end_time - start_time:.2f} {text}\n") + + utt_obj.saved_output_files[f"{alignment_level}_level_ctm_filepath"] = os.path.join( + output_dir, f"{utt_obj.utt_id}.ctm" + ) + + return utt_obj diff --git a/tools/nemo_forced_aligner/utils/make_output_files.py b/tools/nemo_forced_aligner/utils/make_output_files.py deleted file mode 100644 index a2d8c80a6580..000000000000 --- a/tools/nemo_forced_aligner/utils/make_output_files.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os -from pathlib import Path - -import soundfile as sf -from utils.constants import BLANK_TOKEN, SPACE_TOKEN - - -def _get_utt_id(audio_filepath, audio_filepath_parts_in_utt_id): - fp_parts = Path(audio_filepath).parts[-audio_filepath_parts_in_utt_id:] - utt_id = Path("_".join(fp_parts)).stem - utt_id = utt_id.replace(" ", "-") # replace any spaces in the filepath with dashes - return utt_id - - -def add_t_start_end_to_boundary_info(boundary_info_utt, alignment_utt): - """ - We use the list of alignments to add the timesteps where each token/word/segment is predicted to - start and end. - boundary_info_utt can be any one of the variables referred to as `token_info`, `word_info`, `segment_info` - in other parts of the code. - - e.g. the input boundary info could be - boundary_info_utt = [ - {'text': 'hi', 's_start': 1, 's_end': 3}, - {'text': 'world', 's_start': 7, 's_end': 15}, - {'text': 'hey', 's_start': 19, 's_end': 23}, - ] - - and the alignment could be - alignment_utt = [ 1, 1, 3, 3, 4, 5, 7, 7, 9, 10, 11, 12, 13, 15, 17, 17, 19, 21, 23, 23] - - in which case the output would be: - boundary_info_utt = [ - {'text': 'hi', 's_start': 1, 's_end': 3, 't_start': 0, 't_end': 3}, - {'text': 'world', 's_start': 7, 's_end': 15, 't_start': 6, 't_end': 13}, - {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19}, - ] - """ - # first remove boundary_info of any items that are not in the alignment - # the only items we expect not to be in the alignment are blanks that the alignment chooses to skip - # we will iterate boundary_info in reverse order for this to make popping the items simple - s_in_alignment = set(alignment_utt) - for boundary_info_pointer in range(len(boundary_info_utt) - 1, -1, -1): - s_in_boundary_info = set( - range( - boundary_info_utt[boundary_info_pointer]["s_start"], - boundary_info_utt[boundary_info_pointer]["s_end"] + 1, - ) - ) - item_not_in_alignment = True - for s_ in s_in_boundary_info: - if s_ in s_in_alignment: - item_not_in_alignment = False - - if item_not_in_alignment: - boundary_info_utt.pop(boundary_info_pointer) - - # now update boundary_info with t_start and t_end - boundary_info_pointer = 0 - for t, s_at_t in enumerate(alignment_utt): - if s_at_t == boundary_info_utt[boundary_info_pointer]["s_start"]: - if "t_start" not in boundary_info_utt[boundary_info_pointer]: - # we have just reached the start of the word/token/segment in the alignment => update t_start - boundary_info_utt[boundary_info_pointer]["t_start"] = t - - if t < len(alignment_utt) - 1: # this if is to avoid accessing an index that is not in the list - if alignment_utt[t + 1] > boundary_info_utt[boundary_info_pointer]["s_end"]: - if "t_end" not in boundary_info_utt[boundary_info_pointer]: - boundary_info_utt[boundary_info_pointer]["t_end"] = t - - boundary_info_pointer += 1 - else: # i.e. t == len(alignment) - 1, i.e. we are a the final element in alignment - # add final t_end if we haven't already - if "t_end" not in boundary_info_utt[boundary_info_pointer]: - boundary_info_utt[boundary_info_pointer]["t_end"] = t - - if boundary_info_pointer == len(boundary_info_utt): - # we have finished populating boundary_info with t_start and t_end, - # but we might have some final remaining elements (blanks) in the alignment which we dont care about - # => break, so as not to cause issues trying to access boundary_info[boundary_info_pointer] - break - - return boundary_info_utt - - -def make_ctm( - boundary_info_batch, - alignments_batch, - manifest_lines_batch, - model, - model_downsample_factor, - output_dir, - remove_blank_tokens_from_ctm, - audio_filepath_parts_in_utt_id, - minimum_timestamp_duration, -): - """ - Function to save CTM files for all the utterances in the incoming batch. - """ - - assert len(boundary_info_batch) == len(alignments_batch) == len(manifest_lines_batch) - # we also assume that utterances are in the same order in boundary_info_batch, alignments_batch - # and manifest_lines_batch - this should be the case unless there is a strange bug upstream in the - # code - - os.makedirs(output_dir, exist_ok=True) - - # the ratio to convert from timesteps (the units of 't_start' and 't_end' in boundary_info_utt) - # to the number of samples ('samples' in the sense of 16000 'samples' per second) - timestep_to_sample_ratio = model.preprocessor.featurizer.hop_length * model_downsample_factor - - for boundary_info_utt, alignment_utt, manifest_line in zip( - boundary_info_batch, alignments_batch, manifest_lines_batch - ): - - boundary_info_utt = add_t_start_end_to_boundary_info(boundary_info_utt, alignment_utt) - - # get utt_id that will be used for saving CTM file as .ctm - utt_id = _get_utt_id(manifest_line['audio_filepath'], audio_filepath_parts_in_utt_id) - - # get audio file duration if we will need it later - if minimum_timestamp_duration > 0: - with sf.SoundFile(manifest_line["audio_filepath"]) as f: - audio_file_duration = f.frames / f.samplerate - - with open(os.path.join(output_dir, f"{utt_id}.ctm"), "w") as f_ctm: - for boundary_info_ in boundary_info_utt: # loop over every token/word/segment - text = boundary_info_["text"] - start_sample = boundary_info_["t_start"] * timestep_to_sample_ratio - end_sample = (boundary_info_["t_end"] + 1) * timestep_to_sample_ratio - 1 - - start_time = start_sample / model.cfg.sample_rate - end_time = end_sample / model.cfg.sample_rate - - if minimum_timestamp_duration > 0 and minimum_timestamp_duration > end_time - start_time: - # make the predicted duration of the token/word/segment longer, growing it outwards equal - # amounts from the predicted center of the token/word/segment - token_mid_point = (start_time + end_time) / 2 - start_time = max(token_mid_point - minimum_timestamp_duration / 2, 0) - end_time = min(token_mid_point + minimum_timestamp_duration / 2, audio_file_duration) - - if not (text == BLANK_TOKEN and remove_blank_tokens_from_ctm): # don't save blanks if we don't want to - # replace any spaces with so we dont introduce extra space characters to our CTM files - text = text.replace(" ", SPACE_TOKEN) - - f_ctm.write(f"{utt_id} 1 {start_time:.2f} {end_time - start_time:.2f} {text}\n") - - return None - - -def make_new_manifest( - output_dir, - original_manifest_filepath, - additional_ctm_grouping_separator, - audio_filepath_parts_in_utt_id, - pred_text_all_lines, -): - """ - Function to save a new manifest with the same info as the original manifest, but also the paths to the - CTM files for each utterance and the "pred_text" if it was used for the alignment. - """ - if pred_text_all_lines: - with open(original_manifest_filepath, 'r') as f: - num_lines_in_manifest = sum(1 for _ in f) - - if not num_lines_in_manifest == len(pred_text_all_lines): - raise RuntimeError( - f"Number of lines in the original manifest ({num_lines_in_manifest}) does not match " - f"the number of pred_texts we have ({len(pred_text_all_lines)}). Something has gone wrong." - ) - - tgt_manifest_name = str(Path(original_manifest_filepath).stem) + "_with_ctm_paths.json" - tgt_manifest_filepath = str(Path(output_dir) / tgt_manifest_name) - - with open(original_manifest_filepath, 'r') as fin, open(tgt_manifest_filepath, 'w') as fout: - for i_line, line in enumerate(fin): - data = json.loads(line) - - utt_id = _get_utt_id(data["audio_filepath"], audio_filepath_parts_in_utt_id) - - data["token_level_ctm_filepath"] = str(Path(output_dir) / "tokens" / f"{utt_id}.ctm") - data["word_level_ctm_filepath"] = str(Path(output_dir) / "words" / f"{utt_id}.ctm") - - if additional_ctm_grouping_separator: - data["additional_segment_level_ctm_filepath"] = str( - Path(output_dir) / "additional_segments" / f"{utt_id}.ctm" - ) - - if pred_text_all_lines: - data['pred_text'] = pred_text_all_lines[i_line] - - new_line = json.dumps(data) - - fout.write(f"{new_line}\n") diff --git a/tools/nemo_forced_aligner/utils/make_output_manifest.py b/tools/nemo_forced_aligner/utils/make_output_manifest.py new file mode 100644 index 000000000000..7ee3fc77f7ab --- /dev/null +++ b/tools/nemo_forced_aligner/utils/make_output_manifest.py @@ -0,0 +1,35 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + + +def write_manifest_out_line( + f_manifest_out, utt_obj, +): + + data = {"audio_filepath": utt_obj.audio_filepath} + if not utt_obj.text is None: + data["text"] = utt_obj.text + + if not utt_obj.pred_text is None: + data["pred_text"] = utt_obj.pred_text + + for key, val in utt_obj.saved_output_files.items(): + data[key] = val + + new_line = json.dumps(data) + f_manifest_out.write(f"{new_line}\n") + + return None diff --git a/tools/nemo_forced_aligner/utils/viterbi_decoding.py b/tools/nemo_forced_aligner/utils/viterbi_decoding.py index bc9a45dda527..78336f800e14 100644 --- a/tools/nemo_forced_aligner/utils/viterbi_decoding.py +++ b/tools/nemo_forced_aligner/utils/viterbi_decoding.py @@ -36,6 +36,7 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device) Looks like: [[0, 0, 1, 2, 2, 3, 3, ..., ], ..., [0, 1, 2, 2, 2, 3, 4, ....]]. Each list inside alignments_batch is of length T_batch[location of utt in batch]. """ + B, T_max, _ = log_probs_batch.shape U_max = y_batch.shape[1] @@ -50,15 +51,14 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device) # make log_probs_padded tensor of shape (B, T_max, V +1 ) where all of # log_probs_padded[:,:,-1] is the 'V_NEGATIVE_NUM' log_probs_padded = torch.cat((log_probs_batch, padding_for_log_probs), dim=2) - # make log_probs_reordered tensor of shape (B, T_max, U_max) - # it contains the log_probs for only the tokens that are in the Ground Truth, and in the order - # that they occur - log_probs_reordered = torch.gather(input=log_probs_padded, dim=2, index=y_batch.unsqueeze(1).repeat(1, T_max, 1)) - # initialize tensors of viterbi probabilies and backpointers - v_matrix = V_NEGATIVE_NUM * torch.ones_like(log_probs_reordered) - backpointers = -999 * torch.ones_like(v_matrix) - v_matrix[:, 0, :2] = log_probs_reordered[:, 0, :2] + # initialize v_prev - tensor of previous timestep's viterbi probabilies, of shape (B, U_max) + v_prev = V_NEGATIVE_NUM * torch.ones((B, U_max), device=viterbi_device) + v_prev[:, :2] = torch.gather(input=log_probs_padded[:, 0, :], dim=1, index=y_batch[:, :2]) + + # initialize backpointers_rel - which contains values like 0 to indicate the backpointer is to the same u index, + # 1 to indicate the backpointer pointing to the u-1 index and 2 to indicate the backpointer is pointing to the u-2 index + backpointers_rel = -99 * torch.ones((B, T_max, U_max), dtype=torch.int8, device=viterbi_device) # Make a letter_repetition_mask the same shape as y_batch # the letter_repetition_mask will have 'True' where the token (including blanks) is the same @@ -70,24 +70,23 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device) letter_repetition_mask[:, :2] = 1 # make sure dont apply mask to first 2 tokens letter_repetition_mask = letter_repetition_mask == 0 - # bp_absolute_template is a tensor we will need during the Viterbi decoding to convert our argmaxes from indices between 0 and 2, - # to indices in the range (0, U_max-1) indicating from which token the mostly path up to that point came from. - # it is a tensor of shape (B, U_max) that looks like - # bp_absolute_template = [ - # [0, 1, 2, ...,, U_max] - # [0, 1, 2, ...,, U_max] - # [0, 1, 2, ...,, U_max] - # ... rows repeated so there are B number of rows in total - # ] - bp_absolute_template = torch.arange(U_max, device=viterbi_device).unsqueeze(0).repeat(B, 1) - for t in range(1, T_max): # e_current is a tensor of shape (B, U_max) of the log probs of every possible token at the current timestep - e_current = log_probs_reordered[:, t, :] + e_current = torch.gather(input=log_probs_padded[:, t, :], dim=1, index=y_batch) + + # apply a mask to e_current to cope with the fact that we do not keep the whole v_matrix and continue + # calculating viterbi probabilities during some 'padding' timesteps + t_exceeded_T_batch = t >= T_batch - # v_prev is a tensor of shape (B, U_max) of the viterbi probabilities 1 timestep back and in the same token position - v_prev = v_matrix[:, t - 1, :] + U_can_be_final = torch.logical_or( + torch.arange(0, U_max, device=viterbi_device).unsqueeze(0) == (U_batch.unsqueeze(1) - 0), + torch.arange(0, U_max, device=viterbi_device).unsqueeze(0) == (U_batch.unsqueeze(1) - 1), + ) + + mask = torch.logical_not(torch.logical_and(t_exceeded_T_batch.unsqueeze(1), U_can_be_final,)).long() + + e_current = e_current * mask # v_prev_shifted is a tensor of shape (B, U_max) of the viterbi probabilities 1 timestep back and 1 token position back v_prev_shifted = torch.roll(v_prev, shifts=1, dims=1) @@ -111,26 +110,27 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device) # candidates_v_current are our candidate viterbi probabilities for every token position, from which # we will pick the max and record the argmax candidates_v_current = v_prev_dup + e_current.unsqueeze(2) - v_current, bp_relative = torch.max(candidates_v_current, dim=2) - - # convert our argmaxes from indices between 0 and 2, to indices in the range (0, U_max-1) indicating - # from which token the mostly path up to that point came from - bp_absolute = bp_absolute_template - bp_relative + # we straight away save results in v_prev instead of v_current, so that the variable v_prev will be ready for the + # next iteration of the for-loop + v_prev, bp_relative = torch.max(candidates_v_current, dim=2) - # update our tensors containing all the viterbi probabilites and backpointers - v_matrix[:, t, :] = v_current - backpointers[:, t, :] = bp_absolute + backpointers_rel[:, t, :] = bp_relative - # trace backpointers TODO: parallelize over batch_size + # trace backpointers alignments_batch = [] for b in range(B): T_b = int(T_batch[b]) U_b = int(U_batch[b]) - final_state = int(torch.argmax(v_matrix[b, T_b - 1, U_b - 2 : U_b])) + U_b - 2 - alignment_b = [final_state] - for t in range(T_b - 1, 0, -1): - alignment_b.insert(0, int(backpointers[b, t, alignment_b[0]])) + if U_b == 1: # i.e. we put only a blank token in the reference text because the reference text is empty + current_u = 0 # set initial u to 0 and let the rest of the code block run as usual + else: + current_u = int(torch.argmax(v_prev[b, U_b - 2 : U_b])) + U_b - 2 + alignment_b = [current_u] + for t in range(T_max - 1, 0, -1): + current_u = current_u - int(backpointers_rel[b, t, current_u]) + alignment_b.insert(0, current_u) + alignment_b = alignment_b[:T_b] alignments_batch.append(alignment_b) return alignments_batch diff --git a/tools/speech_data_explorer/data_explorer.py b/tools/speech_data_explorer/data_explorer.py index 23ea375fa608..65eafc5c9d49 100755 --- a/tools/speech_data_explorer/data_explorer.py +++ b/tools/speech_data_explorer/data_explorer.py @@ -59,6 +59,7 @@ 'contains ': 'contains', } comparison_mode = False + # parse table filter queries def split_filter_part(filter_part): for op in filter_operators: @@ -125,6 +126,7 @@ def parse_args(): # automaticly going in comparison mode, if there is names_compared argument if args.names_compared is not None: comparison_mode = True + logging.error("comparison mod set to true") else: comparison_mode = False @@ -549,6 +551,7 @@ def absolute_audio_filepath(audio_filepath, audio_base_path): name_1, name_2 = args.names_compared print(name_1, name_2) + print('Loading data...') if not comparison_mode: data, wer, cer, wmr, mwa, num_hours, vocabulary, alphabet, metrics_available = load_data( @@ -930,8 +933,7 @@ def update_wordstable(page_current, sort_by, filter_query): wordstable_columns_tool = [{'name': 'Word', 'id': 'word'}, {'name': 'Count', 'id': 'count'}] wordstable_columns_tool.append({'name': 'Accuracy_1, %', 'id': 'accuracy_1'}) wordstable_columns_tool.append({'name': 'Accuracy_2, %', 'id': 'accuracy_2'}) -# wordstable_columns_tool.append({'name': 'Accuracy_' + name_1 + ', %', 'id': 'accuracy_1'}) -# wordstable_columns_tool.append({'name': 'Accuracy_' + name_2 + ', %', 'id': 'accuracy_2'}) + if comparison_mode: model_name_1, model_name_2 = name_1, name_2 @@ -939,6 +941,47 @@ def update_wordstable(page_current, sort_by, filter_query): for i in range(len(vocabulary_1)): vocabulary_1[i].update(vocabulary_2[i]) + def _wer_(grnd, pred): + grnd_words = grnd.split() + pred_words = pred.split() + edit_distance = editdistance.eval(grnd_words, pred_words) + wer = edit_distance / len(grnd_words) + return wer + + def metric(a, b, met=None): + cer = editdistance.distance(a, b) / len(a) + wer = _wer_(a, b) + return round(float(wer) * 100, 2), round(float(cer) * 100, 2) + + def write_metrics(data, Ox, Oy): + da = pd.DataFrame.from_records(data) + gt = da['text'] + tt_1 = da[Ox] + tt_2 = da[Oy] + + wer_tt1_c, cer_tt1_c = [], [] + wer_tt2_c, cer_tt2_c = [], [] + + for j in range(len(gt)): + wer_tt1, cer_tt1 = metric(gt[j], tt_1[j]) # first model + wer_tt2, cer_tt2 = metric(gt[j], tt_2[j]) # second model + wer_tt1_c.append(wer_tt1) + cer_tt1_c.append(cer_tt1) + wer_tt2_c.append(wer_tt2) + cer_tt2_c.append(cer_tt2) + + da['wer_' + Ox] = pd.Series(wer_tt1_c, index=da.index) + da['wer_' + Oy] = pd.Series(wer_tt2_c, index=da.index) + da['cer_' + Ox] = pd.Series(cer_tt1_c, index=da.index) + da['cer_' + Oy] = pd.Series(cer_tt2_c, index=da.index) + return da.to_dict('records') + + data_with_metrics = write_metrics(data, model_name_1, model_name_2) + if args.show_statistics is not None: + textdiffstyle = {'border': 'none', 'width': '100%', 'height': '100%'} + else: + textdiffstyle = {'border': 'none', 'width': '1%', 'height': '1%', 'display': 'none'} + def prepare_data(df, name1=model_name_1, name2=model_name_2): res = pd.DataFrame() tmp = df['word'] @@ -1050,6 +1093,33 @@ def read_query(query): return "No filter query" return dcc.Markdown('`filter_query = "{}"`'.format(query)) + ############ + @app.callback( + Output('filter-query-input-2', 'style'), + Output('filter-query-output-2', 'style'), + Input('filter-query-read-write', 'value'), + ) + def query_input_output(val): + input_style = {'width': '100%'} + output_style = {} + input_style.update(display='inline-block') + output_style.update(display='none') + return input_style, output_style + + @app.callback(Output('datatable-advanced-filtering-2', 'filter_query'), Input('filter-query-input-2', 'value')) + def write_query(query): + if query is None: + return '' + return query + + @app.callback(Output('filter-query-output-2', 'children'), Input('datatable-advanced-filtering-2', 'filter_query')) + def read_query(query): + if query is None: + return "No filter query" + return dcc.Markdown('`filter_query = "{}"`'.format(query)) + + ############ + def display_query(query): if query is None: return '' @@ -1070,48 +1140,324 @@ def display_query(query): comparison_layout = [ html.Div( - [dcc.Markdown("model 1:" + ' ' + model_name_1[10:]), dcc.Markdown("model 2:" + ' ' + model_name_2[10:])] + [ + dcc.Markdown("model 1:" + ' ' + model_name_1[10:]), + dcc.Markdown("model 2:" + ' ' + model_name_2[10:]), + dcc.Dropdown( + ['word level', 'utterance level'], + 'word level', + placeholder="choose comparison lvl", + id='lvl_choose', + ), + ] ), html.Hr(), html.Div( [ - dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'), - dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'), - dcc.Dropdown( - for_col_names.select_dtypes(include='number').columns[::], - placeholder='Select what will encode color of points', - id='color-column', + html.Div( + [ + dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'), + dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'), + dcc.Dropdown( + for_col_names.select_dtypes(include='number').columns[::], + placeholder='Select what will encode color of points', + id='color-column', + ), + dcc.Dropdown( + for_col_names.select_dtypes(include='number').columns[::], + placeholder='Select what will encode size of points', + id='size-column', + ), + dcc.Dropdown( + ['yes', 'no'], + placeholder='if you want to enable dot spacing', + id='dot_spacing', + style={'width': '200%'}, + ), + dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'), + html.Hr(), + dcc.Input(id='filter-query-input', placeholder='Enter filter query',), + ], + style={'width': '200%', 'display': 'inline-block', 'float': 'middle'}, ), - dcc.Dropdown( - for_col_names.select_dtypes(include='number').columns[::], - placeholder='Select what will encode size of points', - id='size-column', + html.Hr(), + html.Div(id='filter-query-output'), + dash_table.DataTable( + id='datatable-advanced-filtering', + columns=wordstable_columns_tool, + data=vocabulary_1, + editable=False, + page_action='native', + page_size=5, + filter_action="native", ), - dcc.Dropdown(['yes', 'no'], placeholder='if you want to enable dot spacing', id='dot_spacing'), - dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'), html.Hr(), - dcc.Input(id='filter-query-input', placeholder='Enter filter query'), + html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}), + html.Hr(), + dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),), + html.Hr(), ], - style={'width': '50%', 'display': 'inline-block', 'float': 'middle'}, + id='wrd_lvl', + style={'display': 'block'}, ), - html.Hr(), - html.Div(id='filter-query-output'), - dash_table.DataTable( - id='datatable-advanced-filtering', - columns=wordstable_columns_tool, - data=vocabulary_1, - editable=False, - page_action='native', - page_size=5, - filter_action="native", + html.Div( + [ + html.Div( + [ + dcc.Dropdown(['WER', 'CER'], 'WER', placeholder="Choose metric", id="choose_metric"), + dbc.Row(dbc.Col(html.H5('Data'), class_name='text-secondary'), class_name='mt-3'), + html.Hr(), + html.Hr(), + dcc.Input( + id='filter-query-input-2', placeholder='Enter filter query', style={'width': '100%'} + ), + html.Div(id='filter-query-output-2'), + dbc.Row( + dbc.Col( + [ + dash_table.DataTable( + id='datatable-advanced-filtering-2', + columns=[ + {'name': k.replace('_', ' '), 'id': k, 'hideable': True} + for k in data_with_metrics[0] + ], + data=data_with_metrics, + editable=False, + page_action='native', + page_size=5, + row_selectable='single', + selected_rows=[0], + page_current=0, + filter_action="native", + style_cell={ + 'overflow': 'hidden', + 'textOverflow': 'ellipsis', + 'maxWidth': 0, + 'textAlign': 'center', + }, + style_header={ + 'color': 'text-primary', + 'text_align': 'center', + 'height': 'auto', + 'whiteSpace': 'normal', + }, + css=[ + { + 'selector': '.dash-spreadsheet-menu', + 'rule': 'position:absolute; bottom: 8px', + }, + {'selector': '.dash-filter--case', 'rule': 'display: none'}, + {'selector': '.column-header--hide', 'rule': 'display: none'}, + ], + ), + dbc.Row(dbc.Col(html.Audio(id='player-1', controls=True),), class_name='mt-3'), + ] + ) + ), + ] + + [ + dbc.Row( + [ + dbc.Col( + html.Div(children=k.replace('_', '-')), + width=2, + class_name='mt-1 bg-light font-monospace text-break small rounded border', + ), + dbc.Col( + html.Div(id='__' + k), + class_name='mt-1 bg-light font-monospace text-break small rounded border', + ), + ] + ) + for k in data_with_metrics[0] + ] + ), + ], + id='unt_lvl', + ), + ] + [ + html.Div( + [ + html.Div( + [ + dbc.Row(dbc.Col(dcc.Graph(id='utt_graph'),),), + html.Hr(), + dcc.Input(id='clicked_aidopath', style={'width': '100%'}), + html.Hr(), + dcc.Input(id='my-output-1', style={'display': 'none'}), # we do need this + ] + ), + html.Div([dbc.Row(dbc.Col(dcc.Graph(id='signal-graph-1')), class_name='mt-3'),]), + ], + id='down_thing', + style={'display': 'block'}, + ) + ] + +if args.show_statistics is not None: + comparison_layout += [ + html.Div( + [ + dbc.Row( + [ + dbc.Col( + html.Div(children='text diff'), + width=2, + class_name='mt-1 bg-light font-monospace text-break small rounded border', + ), + dbc.Col( + html.Iframe( + id='__diff', + sandbox='', + srcDoc='', + style=textdiffstyle, + className='bg-light font-monospace text-break small', + ), + class_name='mt-1 bg-light font-monospace text-break small rounded border', + ), + ], + id="text_diff_div", + ) + ], + id='mid_thing', + style={'display': 'block'}, ), - html.Hr(), - html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}), - html.Hr(), - dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),), - html.Hr(), ] + @app.callback( + [ + Output(component_id='wrd_lvl', component_property='style'), + Output(component_id='unt_lvl', component_property='style'), + Output(component_id='mid_thing', component_property='style'), + Output(component_id='down_thing', component_property='style'), + Input(component_id='lvl_choose', component_property='value'), + ] + ) + def show_hide_element(visibility_state): + if visibility_state == 'word level': + return ( + {'width': '50%', 'display': 'inline-block', 'float': 'middle'}, + {'width': '50%', 'display': 'none', 'float': 'middle'}, + {'display': 'none'}, + {'display': 'none'}, + ) + else: + return ( + {'width': '100%', 'display': 'none', 'float': 'middle'}, + {'width': '100%', 'display': 'inline-block', 'float': 'middle'}, + {'display': 'block'}, + {'display': 'block'}, + ) + + +if args.show_statistics is None: + + @app.callback( + [ + Output(component_id='wrd_lvl', component_property='style'), + Output(component_id='unt_lvl', component_property='style'), + Output(component_id='down_thing', component_property='style'), + Input(component_id='lvl_choose', component_property='value'), + ] + ) + def show_hide_element(visibility_state): + if args.show_statistics is not None: + a = {'border': 'none', 'width': '100%', 'height': '100%', 'display': 'block'} + else: + a = {'border': 'none', 'width': '100%', 'height': '100%', 'display': 'none'} + if visibility_state == 'word level': + return ( + {'width': '50%', 'display': 'inline-block', 'float': 'middle'}, + {'width': '50%', 'display': 'none', 'float': 'middle'}, + {'display': 'none'}, + ) + else: + return ( + {'width': '100%', 'display': 'none', 'float': 'middle'}, + {'width': '100%', 'display': 'inline-block', 'float': 'middle'}, + {'display': 'block'}, + ) + + +store = [] + + +@app.callback( + [Output('datatable-advanced-filtering-2', 'page_current'), Output('my-output-1', 'value')], + [Input('utt_graph', 'clickData'),], +) +def real_select_click(hoverData): + if hoverData is not None: + path = str(hoverData['points'][0]['customdata'][-1]) + for t in range(len(data_with_metrics)): + if data_with_metrics[t]['audio_filepath'] == path: + ind = t + s = t #% 5 + sel = s + pg = math.ceil(ind // 5) + return pg, sel + else: + return 0, 0 + + +@app.callback( + [Output('datatable-advanced-filtering-2', 'selected_rows')], [Input('my-output-1', 'value')], +) +def real_select_click(num): + s = num + return [[s]] + + +CALCULATED_METRIC = [False, False] + + +@app.callback( + [ + Output('utt_graph', 'figure'), + Output('clicked_aidopath', 'value'), + Input('choose_metric', 'value'), + Input('utt_graph', 'clickData'), + Input('datatable-advanced-filtering-2', 'derived_virtual_data'), + ], +) +def draw_table_with_metrics(met, hoverData, data_virt): + Ox = name_1 + Oy = name_2 + if met == "WER": + cerower = 'wer_' + else: + cerower = 'cer_' + da = pd.DataFrame.from_records(data_virt) + + c = da + fig = px.scatter( + c, + x=cerower + Ox, + y=cerower + Oy, + width=1000, + height=900, + color='num_words', + hover_data={ + 'text': True, + Ox: True, + Oy: True, + 'wer_' + Ox: True, + 'wer_' + Oy: True, + 'cer_' + Ox: True, + 'cer_' + Oy: True, + 'audio_filepath': True, + }, + ) #'numwords': True, + fig.add_shape(type="line", x0=0, y0=0, x1=100, y1=100, line=dict(color="Red", width=1, dash="dot",)) + fig.update_layout(clickmode='event+select') + fig.update_traces(marker_size=10) + path = None + + if hoverData is not None: + path = str(hoverData['points'][0]['customdata'][-1]) + + return fig, path + @app.callback( [Output('datatable', 'data'), Output('datatable', 'page_count')], @@ -1219,6 +1565,18 @@ def show_item(idx, data): return [data[idx[0]][k] for k in data[0]] +if comparison_mode: + + @app.callback( + [Output('__' + k, 'children') for k in data_with_metrics[0]], + [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')], + ) + def show_item(idx, data): + if len(idx) == 0: + raise PreventUpdate + return [data[idx[0]][k] for k in data_with_metrics[0]] + + @app.callback(Output('_diff', 'srcDoc'), [Input('datatable', 'selected_rows'), Input('datatable', 'data'),]) def show_diff( idx, data, @@ -1245,6 +1603,35 @@ def show_diff( return diff_html +@app.callback( + Output('__diff', 'srcDoc'), + [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data'),], +) +def show_diff( + idx, data, +): + if len(idx) == 0: + raise PreventUpdate + orig_words = data[idx[0]]['text'] + orig_words = '\n'.join(orig_words.split()) + '\n' + + pred_words = data[idx[0]][fld_nm] + pred_words = '\n'.join(pred_words.split()) + '\n' + + diff = diff_match_patch.diff_match_patch() + diff.Diff_Timeout = 0 + orig_enc, pred_enc, enc = diff.diff_linesToChars(orig_words, pred_words) + diffs = diff.diff_main(orig_enc, pred_enc, False) + diff.diff_charsToLines(diffs, enc) + diffs_post = [] + for d in diffs: + diffs_post.append((d[0], d[1].replace('\n', ' '))) + + diff_html = diff.diff_prettyHtml(diffs_post) + + return diff_html + + @app.callback(Output('signal-graph', 'figure'), [Input('datatable', 'selected_rows'), Input('datatable', 'data')]) def plot_signal(idx, data): if len(idx) == 0: @@ -1298,6 +1685,62 @@ def plot_signal(idx, data): return figs +@app.callback( + Output('signal-graph-1', 'figure'), + [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')], +) +def plot_signal(idx, data): + if len(idx) == 0: + raise PreventUpdate + figs = make_subplots(rows=2, cols=1, subplot_titles=('Waveform', 'Spectrogram')) + try: + filename = absolute_audio_filepath(data[idx[0]]['audio_filepath'], args.audio_base_path) + audio, fs = librosa.load(path=filename, sr=None) + if 'offset' in data[idx[0]]: + audio = audio[ + int(data[idx[0]]['offset'] * fs) : int((data[idx[0]]['offset'] + data[idx[0]]['duration']) * fs) + ] + time_stride = 0.01 + hop_length = int(fs * time_stride) + n_fft = 512 + # linear scale spectrogram + s = librosa.stft(y=audio, n_fft=n_fft, hop_length=hop_length) + s_db = librosa.power_to_db(S=np.abs(s) ** 2, ref=np.max, top_db=100) + figs.add_trace( + go.Scatter( + x=np.arange(audio.shape[0]) / fs, + y=audio, + line={'color': 'green'}, + name='Waveform', + hovertemplate='Time: %{x:.2f} s
Amplitude: %{y:.2f}
', + ), + row=1, + col=1, + ) + figs.add_trace( + go.Heatmap( + z=s_db, + colorscale=[[0, 'rgb(30,62,62)'], [0.5, 'rgb(30,128,128)'], [1, 'rgb(30,255,30)'],], + colorbar=dict(yanchor='middle', lenmode='fraction', y=0.2, len=0.5, ticksuffix=' dB'), + dx=time_stride, + dy=fs / n_fft / 1000, + name='Spectrogram', + hovertemplate='Time: %{x:.2f} s
Frequency: %{y:.2f} kHz
Magnitude: %{z:.2f} dB', + ), + row=2, + col=1, + ) + figs.update_layout({'margin': dict(l=0, r=0, t=20, b=0, pad=0), 'height': 500}) + figs.update_xaxes(title_text='Time, s', row=1, col=1) + figs.update_yaxes(title_text='Amplitude', row=1, col=1) + figs.update_xaxes(title_text='Time, s', row=2, col=1) + figs.update_yaxes(title_text='Frequency, kHz', row=2, col=1) + except Exception as ex: + app.logger.error(f'ERROR in plot signal: {ex}') + + return figs + + @app.callback(Output('player', 'src'), [Input('datatable', 'selected_rows'), Input('datatable', 'data')]) def update_player(idx, data): if len(idx) == 0: @@ -1320,5 +1763,30 @@ def update_player(idx, data): return '' +@app.callback( + Output('player-1', 'src'), + [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')], +) +def update_player(idx, data): + if len(idx) == 0: + raise PreventUpdate + try: + filename = absolute_audio_filepath(data[idx[0]]['audio_filepath'], args.audio_base_path) + signal, sr = librosa.load(path=filename, sr=None) + if 'offset' in data[idx[0]]: + signal = signal[ + int(data[idx[0]]['offset'] * sr) : int((data[idx[0]]['offset'] + data[idx[0]]['duration']) * sr) + ] + with io.BytesIO() as buf: + # convert to PCM .wav + sf.write(buf, signal, sr, format='WAV') + buf.seek(0) + encoded = base64.b64encode(buf.read()) + return 'data:audio/wav;base64,{}'.format(encoded.decode()) + except Exception as ex: + app.logger.error(f'ERROR in audio player: {ex}') + return '' + + if __name__ == '__main__': app.run_server(host='0.0.0.0', port=args.port, debug=args.debug) diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 2eff9c596b7f..1e484c14f607 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", @@ -1146,7 +1146,7 @@ "\n", "NeMo constantly adds new models and new tasks to these examples, such that these examples serve as the basis to train and evaluate models from scratch with the provided config files.\n", "\n", - "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples" + "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/examples" ] }, { @@ -1251,7 +1251,7 @@ "\n", "While the tutorials are a great example of the simplicity of NeMo, please note for the best performance when training on real datasets, we advice the use of the example scripts instead of the tutorial notebooks. \n", "\n", - "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials" + "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/tutorials" ] } ], diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 2a65509bd8cd..748b24be7eea 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index e6874d14169f..41747c4fa5d9 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index b5c9d13a5c6d..02fa0325121c 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -249,7 +249,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " ] } ], diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index ede417d3583c..13b61626e746 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index ea8356981908..10abc42e2fb9 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -283,7 +283,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " ] }, { diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index fac120e1b699..eaeb93bb92ff 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -71,7 +71,7 @@ "\n", "For this tutorial (and limited by the compute and storage available on Colab environments), we will attempt to fine-tune an English ASR model onto the [Mozilla Common Voice](https://commonvoice.mozilla.org/en) dataset for Japanese. This dataset will also allow us to discuss a few details for fine-tuning low-resource languages. The methods discussed here can also be applied to languages with several thousand hours of data!\n", "\n", - "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/asr_ctc/README.md)\n" + "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/asr_ctc/README.md)\n" ] }, { diff --git a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb index c0af01bd27c2..5293f85044fc 100644 --- a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb +++ b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb @@ -10,7 +10,7 @@ "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\n", "\n", - "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/r1.19.0/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", + "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/main/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", "\n", "This example describes all basic steps required to build ASR model for Esperanto:\n", "\n", @@ -160,7 +160,7 @@ "\n", "The tarred dataset allows storing the dataset as large *.tar files instead of small separate audio files. It may speed up the training and minimizes the load when data is moved from storage to GPU nodes.\n", "\n", - "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", + "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", "\n", "```bash\n", "\n", @@ -207,11 +207,11 @@ "source": [ "## Training hyper-parameters\n", "\n", - "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", + "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", "\n", "### Select Training Batch Size\n", "\n", - "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", + "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", "\n", "### Selecting Optimizer and Learning Rate Scheduler\n", "\n", @@ -327,7 +327,7 @@ "+init_from_pretrained_model=${PRETRAINED_MODEL_NAME}\n", "```\n", "\n", - "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", + "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", "\n", "```python\n", "model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(f\"nvidia/{PRETRAINED_MODEL_NAME}\", map_location='cpu')\n", diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb new file mode 100644 index 000000000000..d2d682d4b93c --- /dev/null +++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb @@ -0,0 +1,846 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3570803-9bfa-4e97-9891-5ae0759eb8ca", + "metadata": {}, + "source": [ + "# Hybrid ASR-TTS Models Tutorial" + ] + }, + { + "cell_type": "markdown", + "id": "50fc294f-f319-4465-8f90-a28b49843e60", + "metadata": {}, + "source": [ + "This tutorial is intended to introduce you to using ASR-TTS Hybrid Models, also known as `ASRWithTTSModel`, to finetune existing ASR models using an integrated text-to-mel-spectrogram generator. " + ] + }, + { + "cell_type": "markdown", + "id": "d2a01ca5-bd48-4d82-a97d-5b07a7b27ca0", + "metadata": {}, + "source": [ + "## ASR-TTS Models: Description" + ] + }, + { + "cell_type": "markdown", + "id": "b32467a9-c458-4590-aff7-e8d1e91b0870", + "metadata": {}, + "source": [ + "### Problem\n", + "\n", + "Adapting ASR models to a new text domain is a challenging task. Modern end-to-end systems can require several hundreds and thousands of hours to perform recognition with high accuracy. Acquiring audio-text paired data for a specific domain can be prohibitively expensive. Text-only data, on the other side, is widely available. \n", + "\n", + "One of the approaches for efficient adaptation is synthesizing audio data from text and using such data for training the ASR model conventionally. We modify this approach, incorporating TTS and ASR systems into a single model. We use only a lightweight multi-speaker text-to-mel-spectrogram generator (without vocoder) with an optional enhancer that mitigates the mismatch between natural and synthetic spectrograms.\n", + "\n", + "### Architecture\n", + "\n", + "\"ASR-TTS\n", + "\n", + "`ASRWithTTSModel` is a transparent wrapper for three models:\n", + "- ASR model (`EncDecCTCModelBPE`, `EncDecRNNTBPEModel` or `EncDecHybridRNNTCTCBPEModel` are supported)\n", + "- frozen text-to-mel-spectrogram model (currently, only `FastPitch` model is supported)\n", + "- optional frozen enhancer model\n", + "\n", + "The architecture is shown in the figure. \n", + "\n", + "The model can take text or audio as input during training. In the case of audio input, a mel spectrogram is extracted as usual and passed to the ASR neural network. In the case of textual input, the mel spectrogram generator produces a spectrogram on the fly from the text. The spectrogram is improved by the enhancer (if present) and fed into the ASR model. \n", + "\n", + "### Capabilities and Limitations\n", + "\n", + "This approach can be used to finetune the pretrained ASR model using text-only data. Training new models from scratch is also possible. The text should contain phrases and sentences and be split into sentences (~45 words maximum, corresponding to ~16.7 seconds of synthesized audio). Using only separate words is not recommended since this doesn't allow to adapt ASR model adapts to recognize new words in context. \n", + "\n", + "Mixing audio-text pairs with text-only data from the original domain is recommended to preserve performance on the original data. \n", + "Also, fusing BatchNorm (see parameters below) is recommended for the best performance when using a large proportion of text compared to the amount of audio-text pairs in finetuning process.\n", + "\n", + "\n", + "### Implementation Details and Experiments\n", + "\n", + "Further details about implementation and experiments can be found in the paper [Text-only domain adaptation for end-to-end ASR using integrated text-to-mel-spectrogram generator](https://arxiv.org/abs/2302.14036)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2702d081-c675-4a96-8263-6059e310d048", + "metadata": {}, + "source": [ + "## Example: Finetuning ASR Model Using Text-Only Data" + ] + }, + { + "cell_type": "markdown", + "id": "30fe41a3-f36c-4803-a7f0-4260fb111478", + "metadata": {}, + "source": [ + "In this example, we will finetune a pretrained small Conformer-CTC model using text-only data from the AN4 dataset. [AN4 dataset](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/datasets.html#an4-dataset) is a small dataset that consists of sentences of people spelling out addresses, names, and other entities.\n", + "\n", + "The model is pretrained on LibriSpeech data and performs poorly on AN4 data (`~17.7%` WER on test data).\n", + "We will use only text from the train part to construct text-only training data for our model and will achieve a good performance on the test part of the AN4 dataset (`~2%` WER)." + ] + }, + { + "cell_type": "markdown", + "id": "923819bb-7822-412a-8f9b-98c76c70e0bb", + "metadata": {}, + "source": [ + "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", + "\n", + "Instructions for setting up Colab are as follows:\n", + "1. Open a new Python 3 notebook.\n", + "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", + "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", + "4. Run the following cell to set up dependencies.\n", + "\n", + "NOTE: The user is responsible for checking the content of datasets and the applicable licenses and determining if they are suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "4685a9da-b3f8-4b95-ba74-64a114223233", + "metadata": {}, + "source": [ + "### Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d22d241-6c46-492c-99db-3bd69777243c", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import google.colab\n", + "\n", + " IN_COLAB = True\n", + "except (ImportError, ModuleNotFoundError):\n", + " IN_COLAB = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc38a961-8822-4685-89ae-ab6f591f9c28", + "metadata": {}, + "outputs": [], + "source": [ + "BRANCH = 'r1.20.0'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd60b1c4-7b1d-421d-9d63-95d7458bbcbd", + "metadata": {}, + "outputs": [], + "source": [ + "# If you're using Google Colab and not running locally, run this cell.\n", + "\n", + "if IN_COLAB:\n", + " ## Install dependencies\n", + " !pip install wget\n", + " !apt-get install sox libsndfile1 ffmpeg\n", + " !pip install text-unidecode\n", + "\n", + " ## Install NeMo\n", + " !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" + ] + }, + { + "cell_type": "markdown", + "id": "08f99618-6f83-44b3-bc8e-f7df04fc471c", + "metadata": {}, + "source": [ + "### Import necessary libraries and utils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74f780b1-9b72-4acf-bcf0-64e1ce84e76d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "import string\n", + "import tempfile\n", + "\n", + "from omegaconf import OmegaConf\n", + "import pytorch_lightning as pl\n", + "import torch\n", + "from tqdm.auto import tqdm\n", + "import wget\n", + "\n", + "from nemo.collections.asr.models import EncDecCTCModelBPE\n", + "from nemo.collections.asr.models.hybrid_asr_tts_models import ASRWithTTSModel\n", + "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest\n", + "from nemo.collections.tts.models import FastPitchModel, SpectrogramEnhancerModel\n", + "from nemo.utils.notebook_utils import download_an4\n", + "\n", + "from nemo_text_processing.text_normalization.normalize import Normalizer" + ] + }, + { + "cell_type": "markdown", + "id": "ca928d36-fb0d-439b-bac0-299e98a72d02", + "metadata": {}, + "source": [ + "### Prepare Data" + ] + }, + { + "cell_type": "markdown", + "id": "702e8e92-17b2-4f34-a2d9-c72b94501bf5", + "metadata": {}, + "source": [ + "Download and preprocess AN4 data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62c7cfec-aa98-4fc5-8b31-23ee1d59f311", + "metadata": {}, + "outputs": [], + "source": [ + "DATASETS_DIR = Path(\"./datasets\") # directory for data\n", + "CHECKPOINTS_DIR = Path(\"./checkpoints/\") # directory for checkpoints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "659db73e-dcd7-455c-8140-20e104d6ac00", + "metadata": {}, + "outputs": [], + "source": [ + "# create directories if necessary\n", + "DATASETS_DIR.mkdir(parents=True, exist_ok=True)\n", + "CHECKPOINTS_DIR.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36830e7f-5293-4401-8c56-780127b47385", + "metadata": {}, + "outputs": [], + "source": [ + "download_an4(data_dir=f\"{DATASETS_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e77f5062-9acb-4f39-b811-a5b11dd6f76f", + "metadata": {}, + "outputs": [], + "source": [ + "AN4_DATASET = DATASETS_DIR / \"an4\"" + ] + }, + { + "cell_type": "markdown", + "id": "403b63b0-8aab-43aa-a455-31f588d1772f", + "metadata": {}, + "source": [ + "### Construct text-only training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35654ee1-3869-4289-bd52-15818c0ccf69", + "metadata": {}, + "outputs": [], + "source": [ + "# read original training data\n", + "an4_train_data = read_manifest(AN4_DATASET / \"train_manifest.json\")" + ] + }, + { + "cell_type": "markdown", + "id": "a17f583c-2a5c-4faf-84bd-eb04c2921e01", + "metadata": {}, + "source": [ + "Text-only manifest should contain three fields:\n", + "- `text`: target text for the ASR model\n", + "- `tts_text`: text to use as a source for the TTS model (unnormalized)\n", + "- `tts_text_normalized`: text to use as a source for TTS model (normalized)\n", + "\n", + "If `tts_text_normalized` is not present, `tts_text` will be used, and normalization will be done when loading the dataset.\n", + "It is highly recommended to normalize the text and manually create the `tts_text_normalized` field since current normalizers are unsuitable for processing a large amount of text on the fly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5938a8c2-e239-4a45-a716-dc11a981aec7", + "metadata": {}, + "outputs": [], + "source": [ + "# fill `text` and `tts_text` fields with the source data\n", + "textonly_data = []\n", + "for record in an4_train_data:\n", + " text = record[\"text\"]\n", + " textonly_data.append({\"text\": text, \"tts_text\": text})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f6a5735-a5c2-4a8b-8116-bfc535a2c299", + "metadata": {}, + "outputs": [], + "source": [ + "WHITELIST_URL = (\n", + " \"https://raw.githubusercontent.com/NVIDIA/NeMo-text-processing/main/\"\n", + " \"nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv\"\n", + ")\n", + "\n", + "\n", + "def get_normalizer() -> Normalizer:\n", + " with tempfile.TemporaryDirectory() as data_dir:\n", + " whitelist_path = Path(data_dir) / \"lj_speech.tsv\"\n", + " if not whitelist_path.exists():\n", + " wget.download(WHITELIST_URL, out=str(data_dir))\n", + "\n", + " normalizer = Normalizer(\n", + " lang=\"en\",\n", + " input_case=\"cased\",\n", + " whitelist=str(whitelist_path),\n", + " overwrite_cache=True,\n", + " cache_dir=None,\n", + " )\n", + " return normalizer" + ] + }, + { + "cell_type": "markdown", + "id": "dd0253aa-d7f1-47ee-a142-099b71241270", + "metadata": {}, + "source": [ + "Сonstruct the `tts_text_normalized` field by applying an English normalizer to the text.\n", + "\n", + "AN4 data doesn't contain numbers, currency, and other entities, so the normalizer is used here only for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27bb29d5-d44d-4026-98f8-5f0b1241b39a", + "metadata": {}, + "outputs": [], + "source": [ + "normalizer = get_normalizer()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9400e6d3-ba92-442a-8dd4-117e95dce2ea", + "metadata": {}, + "outputs": [], + "source": [ + "for record in tqdm(textonly_data):\n", + " record[\"tts_text_normalized\"] = normalizer.normalize(\n", + " record[\"tts_text\"], verbose=False, punct_pre_process=True, punct_post_process=True\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "30a934b0-9b58-4bad-bb9a-ab78d81c3859", + "metadata": {}, + "source": [ + "Save manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1833ac15-1750-4468-88bc-2343fbabe4d8", + "metadata": {}, + "outputs": [], + "source": [ + "write_manifest(AN4_DATASET / \"train_text_manifest.json\", textonly_data)" + ] + }, + { + "cell_type": "markdown", + "id": "fa3a2371-8c78-4dd1-9605-a668adf52b4a", + "metadata": {}, + "source": [ + "### Save pretrained checkpoints" + ] + }, + { + "cell_type": "markdown", + "id": "7eb14117-8b8b-4170-ab8c-ce496522a361", + "metadata": {}, + "source": [ + "Firstly we will load pretrained models from NGC and save them as `nemo` checkpoints. \n", + "Our hybrid model will be constructed from these checkpoints.\n", + "We will use:\n", + "- small Conformer-CTC ASR model trained on LibriSpeech data (for finetuning)\n", + "- multi-speaker TTS FastPitch model is trained on LibriTTS data. Spectrogram parameters for this model are the same as those used in the ASR model\n", + "- enhancer, which is trained adversarially on the output of the TTS model and natural spectrograms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43c5c75a-b6e0-4b3c-ad26-a07b483d84e6", + "metadata": {}, + "outputs": [], + "source": [ + "ASR_MODEL_PATH = CHECKPOINTS_DIR / \"stt_en_conformer_ctc_small_ls.nemo\"\n", + "TTS_MODEL_PATH = CHECKPOINTS_DIR / \"fastpitch.nemo\"\n", + "ENHANCER_MODEL_PATH = CHECKPOINTS_DIR / \"enhancer.nemo\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40976e22-7a7b-42b2-86a1-9eaaef4c1c22", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# asr model: stt_en_conformer_ctc_small_ls\n", + "asr_model = EncDecCTCModelBPE.from_pretrained(model_name=\"stt_en_conformer_ctc_small_ls\")\n", + "asr_model.save_to(f\"{ASR_MODEL_PATH}\")\n", + "\n", + "# tts model: tts_en_fastpitch_for_asr_finetuning\n", + "tts_model = FastPitchModel.from_pretrained(model_name=\"tts_en_fastpitch_for_asr_finetuning\")\n", + "tts_model.save_to(f\"{TTS_MODEL_PATH}\")\n", + "\n", + "# enhancer model: tts_en_spectrogram_enhancer_for_asr_finetuning\n", + "enhancer_model = SpectrogramEnhancerModel.from_pretrained(model_name=\"tts_en_spectrogram_enhancer_for_asr_finetuning\")\n", + "enhancer_model.save_to(f\"{ENHANCER_MODEL_PATH}\")" + ] + }, + { + "cell_type": "markdown", + "id": "32d1e242-0ab0-43bf-aaa0-997d284c2c1b", + "metadata": {}, + "source": [ + "### Construct hybrid ASR-TTS model " + ] + }, + { + "cell_type": "markdown", + "id": "2210eb07-6d44-44e0-a0ad-866f1e89873a", + "metadata": {}, + "source": [ + "#### Config Parameters\n", + "\n", + "`Hybrid ASR-TTS model` consists of three parts:\n", + "\n", + "* ASR model (``EncDecCTCModelBPE``, ``EncDecRNNTBPEModel`` or ``EncDecHybridRNNTCTCBPEModel``)\n", + "* TTS Mel Spectrogram Generator (currently, only `FastPitch` model is supported)\n", + "* Enhancer model (optional)\n", + "\n", + "Also, the config allows to specify a text-only dataset.\n", + "\n", + "Main parts of the config:\n", + "\n", + "* ASR model\n", + " * ``asr_model_path``: path to the ASR model checkpoint (`.nemo`) file, loaded only once, then the config of the ASR model is stored in the ``asr_model`` field\n", + " * ``asr_model_type``: needed only when training from scratch. ``rnnt_bpe`` corresponds to ``EncDecRNNTBPEModel``, ``ctc_bpe`` to ``EncDecCTCModelBPE``, ``hybrid_rnnt_ctc_bpe`` to ``EncDecHybridRNNTCTCBPEModel``\n", + " * ``asr_model_fuse_bn``: fusing BatchNorm in the pretrained ASR model, can improve quality in finetuning scenario\n", + "* TTS model\n", + " * ``tts_model_path``: path to the pretrained TTS model checkpoint (`.nemo`) file, loaded only once, then the config of the model is stored in the ``tts_model`` field\n", + "* Enhancer model\n", + " * ``enhancer_model_path``: optional path to the enhancer model. Loaded only once, the config is stored in the ``enhancer_model`` field\n", + "* ``train_ds``\n", + " * ``text_data``: properties related to text-only data\n", + " * ``manifest_filepath``: path (or paths) to text-only dataset manifests\n", + " * ``speakers_filepath``: path (or paths) to the text file containing speaker ids for the multi-speaker TTS model (speakers are sampled randomly during training)\n", + " * ``min_words`` and ``max_words``: parameters to filter text-only manifests by the number of words\n", + " * ``tokenizer_workers``: number of workers for initial tokenization (when loading the data). ``num_CPUs / num_GPUs`` is a recommended value.\n", + " * ``asr_tts_sampling_technique``, ``asr_tts_sampling_temperature``, ``asr_tts_sampling_probabilities``: sampling parameters for text-only and audio-text data (if both specified). Correspond to ``sampling_technique``, ``sampling_temperature``, and ``sampling_probabilities`` parameters of the `nemo.collections.common.data.dataset.ConcatDataset`.\n", + " * all other components are similar to conventional ASR models\n", + "* ``validation_ds`` and ``test_ds`` correspond to the underlying ASR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d6dd499-d388-4ee3-9a01-d739b16e6ad7", + "metadata": {}, + "outputs": [], + "source": [ + "# load config\n", + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6701dc8-cb3b-44cc-aab5-fb6e2c1dadb5", + "metadata": {}, + "outputs": [], + "source": [ + "config = OmegaConf.load(\"./configs/hybrid_asr_tts.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c13b3c96-4074-415f-95d2-17569886bfcd", + "metadata": {}, + "outputs": [], + "source": [ + "NUM_EPOCHS = 10" + ] + }, + { + "cell_type": "markdown", + "id": "4d090c5d-44a7-401a-a753-b8779b1c1e0b", + "metadata": {}, + "source": [ + "We will use all available speakers (sampled uniformly)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c41e5e8-d926-4b83-8725-bae5a82121cf", + "metadata": {}, + "outputs": [], + "source": [ + "TTS_SPEAKERS_PATH = Path(\"./checkpoints/speakers.txt\")\n", + "\n", + "with open(TTS_SPEAKERS_PATH, \"w\", encoding=\"utf-8\") as f:\n", + " for speaker_id in range(tts_model.cfg.n_speakers):\n", + " print(speaker_id, file=f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c07c07c-cb15-4a1c-80bf-20eaffaa65d9", + "metadata": {}, + "outputs": [], + "source": [ + "config.model.asr_model_path = ASR_MODEL_PATH\n", + "config.model.tts_model_path = TTS_MODEL_PATH\n", + "config.model.enhancer_model_path = ENHANCER_MODEL_PATH\n", + "\n", + "# fuse BathNorm automatically in Conformer for better performance\n", + "config.model.asr_model_fuse_bn = True\n", + "\n", + "# training data\n", + "# constructed dataset\n", + "config.model.train_ds.text_data.manifest_filepath = str(AN4_DATASET / \"train_text_manifest.json\")\n", + "# speakers for TTS model\n", + "config.model.train_ds.text_data.speakers_filepath = f\"{TTS_SPEAKERS_PATH}\"\n", + "config.model.train_ds.manifest_filepath = None # audio-text pairs - we don't use them here\n", + "config.model.train_ds.batch_size = 8\n", + "\n", + "# validation data\n", + "config.model.validation_ds.manifest_filepath = str(AN4_DATASET / \"test_manifest.json\")\n", + "config.model.validation_ds.batch_size = 8\n", + "\n", + "config.trainer.max_epochs = NUM_EPOCHS\n", + "\n", + "config.trainer.devices = 1\n", + "config.trainer.strategy = None # use 1 device, no need for ddp strategy\n", + "\n", + "OmegaConf.resolve(config)" + ] + }, + { + "cell_type": "markdown", + "id": "8ae6cb2e-f571-4b53-8897-bb8ba0fc1146", + "metadata": {}, + "source": [ + "#### Construct trainer and ASRWithTTSModel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac4ae885-dec4-4ce9-8f69-a1f35d04b08c", + "metadata": {}, + "outputs": [], + "source": [ + "trainer = pl.Trainer(**config.trainer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f815762-b08d-4d3c-8fd3-61afa511eab4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "hybrid_model = ASRWithTTSModel(config.model)" + ] + }, + { + "cell_type": "markdown", + "id": "ca2c1bf2-28a9-4902-9c73-d96e04b21a46", + "metadata": {}, + "source": [ + "#### Validate the model\n", + "\n", + "Expect `~17.7%` WER on the AN4 test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffa5f5c6-0609-4f46-aa0c-747319035417", + "metadata": {}, + "outputs": [], + "source": [ + "trainer.validate(hybrid_model)" + ] + }, + { + "cell_type": "markdown", + "id": "701ee9c7-91a1-4917-bf7d-ab26b625c7bf", + "metadata": {}, + "source": [ + "#### Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f79761c9-b882-4f14-911f-4a960ff81554", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trainer.fit(hybrid_model)" + ] + }, + { + "cell_type": "markdown", + "id": "eac18c7c-bdcb-40ad-9c50-37f89fb4aa2a", + "metadata": {}, + "source": [ + "#### Validate the model after training\n", + "\n", + "Expect `~2%` WER on the AN4 test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd927e87-13fb-4b61-8b4a-a6850780f605", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trainer.validate(hybrid_model)" + ] + }, + { + "cell_type": "markdown", + "id": "6d25a77d-35ed-44b5-9ef5-318afa321acf", + "metadata": {}, + "source": [ + "### Save final model. Extract pure ASR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f53ebd3-b89a-47e4-a0a5-ed3a3572f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# save full model: the model can be further used for finetuning\n", + "hybrid_model.save_to(\"checkpoints/finetuned_hybrid_model.nemo\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0560c2c-af28-4d8f-b36d-c18ec6a482a8", + "metadata": {}, + "outputs": [], + "source": [ + "# extract the resulting ASR model from the hybrid model\n", + "hybrid_model.save_asr_model_to(\"checkpoints/finetuned_asr_model.nemo\")" + ] + }, + { + "cell_type": "markdown", + "id": "2de58fbb-50be-42cd-9095-01cacfdb6931", + "metadata": {}, + "source": [ + "## Using Scripts (examples)" + ] + }, + { + "cell_type": "markdown", + "id": "86655198-b1fc-4615-958c-7c01f3cbd024", + "metadata": {}, + "source": [ + "`/examples/asr/asr_with_tts/` contains scripts for finetuning existing models and training new models from scratch." + ] + }, + { + "cell_type": "markdown", + "id": "b5837536-8280-475c-a581-caaee00edfca", + "metadata": {}, + "source": [ + "### Finetuning Existing Model" + ] + }, + { + "cell_type": "markdown", + "id": "84df9aeb-3b5e-41fc-a8d0-dfc660e71375", + "metadata": {}, + "source": [ + "To finetune existing ASR model using text-only data use `/examples/asr/asr_with_tts/speech_to_text_bpe_with_text_finetune.py` script with the corresponding config `/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml`.\n", + "\n", + "Please specify paths to all the required models (ASR, TTS, and Enhancer checkpoints), along with `train_ds.text_data.manifest_filepath` and `train_ds.text_data.speakers_filepath`." + ] + }, + { + "cell_type": "markdown", + "id": "78b9028c-02ce-4af4-b510-a431f4a2f62b", + "metadata": {}, + "source": [ + "```shell\n", + "python speech_to_text_bpe_with_text_finetune.py \\\n", + " model.asr_model_path= \\\n", + " model.tts_model_path= \\\n", + " model.enhancer_model_path= \\\n", + " model.asr_model_fuse_bn= \\\n", + " model.train_ds.manifest_filepath= \\\n", + " model.train_ds.text_data.manifest_filepath= \\\n", + " model.train_ds.text_data.speakers_filepath= \\\n", + " model.train_ds.text_data.tokenizer_workers=4 \\\n", + " model.validation_ds.manifest_filepath= \\\n", + " model.train_ds.batch_size=\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0b17c097-a3b1-49a3-8f54-f07b94218d0b", + "metadata": {}, + "source": [ + "### Training a New Model from Scratch" + ] + }, + { + "cell_type": "markdown", + "id": "6d75b928-57b3-4180-bd09-37e018eef7ef", + "metadata": {}, + "source": [ + "```shell\n", + "python speech_to_text_bpe_with_text.py \\\n", + " # (Optional: --config-path= --config-name=) \\\n", + " ++asr_model_type= \\\n", + " ++tts_model_path= \\\n", + " ++enhancer_model_path= \\\n", + " model.tokenizer.dir= \\\n", + " model.tokenizer.type=\"bpe\" \\\n", + " model.train_ds.manifest_filepath= \\\n", + " ++model.train_ds.text_data.manifest_filepath= \\\n", + " ++model.train_ds.text_data.speakers_filepath= \\\n", + " ++model.train_ds.text_data.min_words=1 \\\n", + " ++model.train_ds.text_data.max_words=45 \\\n", + " ++model.train_ds.text_data.tokenizer_workers=4 \\\n", + " model.validation_ds.manifest_filepath= \\\n", + " model.train_ds.batch_size= \\\n", + " trainer.max_epochs= \\\n", + " trainer.num_nodes= \\\n", + " trainer.accumulate_grad_batches= \\\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "01c17712-ae8d-49cb-ade1-ded168676e27", + "metadata": {}, + "source": [ + "## Training TTS Models for ASR Finetuning" + ] + }, + { + "cell_type": "markdown", + "id": "422dc3b2-d29f-4ed0-b4d2-6d32b35dfb7b", + "metadata": {}, + "source": [ + "### TTS Model (FastPitch)\n", + "\n", + "TTS model for the purpose of ASR model finetuning should be trained with the same mel spectrogram parameters as used in the ASR model. The typical parameters are `10ms` hop length, `25ms` window length, and the highest band of 8kHz (for 16kHz data). Other parameters are the same as for common multi-speaker TTS models.\n", + "\n", + "Mainly we observed two differences specific to TTS models for ASR:\n", + "- adding more speakers and more data improves the final ASR model quality (but not the perceptual quality of the TTS model)\n", + "- training for more epochs can also improve the quality of the ASR system (but MSE loss used for the TTS model can be higher than optimal on validation data)\n", + "\n", + "Use script `/examples/tts/fastpitch.py` to train a FastPitch model.\n", + "More details about the FastPitch model can be found in the [documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/tts/models.html#fastpitch). \n", + "\n", + "### Enhancer\n", + "Use script `/examples/tts/spectrogram_enhancer.py` to train an Enhancer model. More details can be found in the \n", + "[documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/tts/models.html).\n", + "\n", + "### Models Used in This Tutorial\n", + "\n", + "Some details about the models used in this tutorial can be found on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning).\n", + "\n", + "The system is also described in detail in the paper in the paper [Text-only domain adaptation for end-to-end ASR using integrated text-to-mel-spectrogram generator](https://arxiv.org/abs/2302.14036)." + ] + }, + { + "cell_type": "markdown", + "id": "9a9a6cd3-4bdc-4b6e-b4b1-3bfd50fd01b3", + "metadata": {}, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "markdown", + "id": "e2890c61-e4b7-47aa-a086-bc483ae7141f", + "metadata": {}, + "source": [ + "The tutorial demonstrated the main concepts related to hybrid ASR-TTS models to finetune ASR models and train new ones from scratch. \n", + "The ability to achieve good text-only adaptation results is demonstrated by finetuning a small Conformer model on text-only data from the AN4 dataset." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml38", + "language": "python", + "name": "ml38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index 48be4b4db737..ed1fd1157f1d 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -103,7 +103,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index c1f62a871a91..bab9299f57d4 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -54,7 +54,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -189,7 +189,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", @@ -588,7 +588,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'r1.19.0'\n", + " BRANCH = 'r1.20.0'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index cf4d8442fe0f..fdb83d9a0dac 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -41,7 +41,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -372,7 +372,7 @@ "# Download the dataset. This will take a few moments...\r\n", "print(\"******\")\r\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\r\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \r\n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\r\n", " an4_path = wget.download(an4_url, data_dir)\r\n", " print(f\"Dataset downloaded at: {an4_path}\")\r\n", "else:\r\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index 7846a1468d98..8a302a8c7130 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -29,7 +29,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -137,7 +137,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index bc1209a80410..a6519e73f6ec 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index fad96a6097b0..29ded2c98fa4 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -46,7 +46,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index c82d7ed86dcd..4420085f319f 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -44,7 +44,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, @@ -225,7 +225,7 @@ "id": "0W12xF_CqcVF" }, "source": [ - "![](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/images/transducer.png?raw=true)" + "![](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/images/transducer.png?raw=true)" ] }, { diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 431dc515a459..3ce77648a60e 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -104,7 +104,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -204,7 +204,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_librispeech_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_librispeech_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_librispeech_data.py" ] }, { @@ -296,7 +296,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_commonvoice_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_commonvoice_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_commonvoice_data.py" ] }, { @@ -800,7 +800,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"process_asr_text_tokenizer.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py" ] }, { diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 2d963a6b77d3..751eedba1519 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -3,9 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "name": "Offline_ASR.ipynb", "provenance": [], - "collapsed_sections": [], "toc_visible": true }, "kernelspec": { @@ -31,7 +29,9 @@ "\n", "You may find more info on how to train and use language models for ASR models here:\n", "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", - "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" + "\n", + "\n", + "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, { @@ -41,7 +41,7 @@ }, "source": [ "## Installation\n", - "NeMo can be installed via simple pip command. \n", + "NeMo can be installed via simple pip command.\n", "\n", "Optional CTC beam search decoder might require restart of Colab runtime after installation." ] @@ -52,7 +52,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", @@ -77,12 +77,14 @@ " import ctc_decoders\n", "except ModuleNotFoundError:\n", " # install beam search decoder\n", + " import os\n", " !apt-get update && apt-get install -y swig\n", " !git clone https://github.com/NVIDIA/NeMo -b \"$BRANCH\"\n", - " !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh\n", + " pwd = !pwd\n", + " NEMO_PATH = os.path.join(pwd[0], \"NeMo\")\n", + " !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh $NEMO_PATH\n", " print('Restarting Colab runtime to successfully import built module.')\n", " print('Please re-run the notebook.')\n", - " import os\n", " os.kill(os.getpid(), 9)" ], "execution_count": null, @@ -110,7 +112,7 @@ }, "source": [ "## Instantiate pre-trained NeMo model\n", - "``from_pretrained(...)`` API downloads and initializes model directly from the cloud. \n", + "``from_pretrained(...)`` API downloads and initializes model directly from the cloud.\n", "\n", "Alternatively, ``restore_from(...)`` allows loading a model from a disk.\n", "\n", @@ -403,7 +405,7 @@ "source": [ "## Offline inference with beam search decoder and N-gram language model re-scoring\n", "\n", - "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates. \n", + "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates.\n", "\n", "Let's download and preprocess LibriSpeech 3-gram language model." ] diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index 9d4f66b82599..07149d752e5f 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -15,7 +15,9 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", - "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", + "\n", + "\n", + "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", @@ -23,7 +25,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -48,6 +50,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -55,6 +58,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -70,6 +74,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -130,6 +135,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -137,6 +143,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -152,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -180,6 +188,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -196,6 +205,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -213,6 +223,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -237,6 +248,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -253,6 +265,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -260,6 +273,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -287,6 +301,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -311,6 +326,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -318,6 +334,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -364,6 +381,16 @@ "metric_value = word_error_rate(hypotheses=predicted_text, references=ground_truth_text, use_cer=False)\n", "print(f\"WER is {metric_value}\")" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Further Reading\n", + "\n", + "There are two ways to incorporate VAD into ASR pipeline. The first strategy is to drop the frames that are predicted as `non-speech` by VAD, as already discussed in this tutorial. The second strategy is to keep all the frames and mask the `non-speech` frames with zero-signal values. Also, instead of using segment-VAD as shown in this tutorial, we can use frame-VAD model for faster inference and better accuracy. For more information, please refer to the script [speech_to_text_with_vad.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/asr_vad/speech_to_text_with_vad.py)." + ] } ], "metadata": { diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 6a1ac0bb1079..1ffe81dff02f 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index b2fbf1a2b17d..a4c192f7f7e5 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -32,7 +32,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -135,7 +135,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index e642fd4f6961..d11640844e60 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -67,7 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 23e31e5b0da0..18f2a8139973 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -29,7 +29,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -59,7 +59,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index fe47a62e2f27..e13ca31e8195 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -51,7 +51,7 @@ "\n", "The approach we will use for pre-training our models is represented in the following diagram:\n", "\n", - " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/tutorials/asr/images/contrastive_ssl.png)\n", + " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/main/tutorials/asr/images/contrastive_ssl.png)\n", "\n", "We first mask parts of our input using SpecAugment. The model is then trained to solve a contrastive task of distinguishing the latent representation of the masked time steps from several sampled distractors. Since our encoders also contain stride blocks which reduce the length of the inputs, in order to obtain target representations we combine several consecutive time steps. They are then passed through a quantizer, which has been found to help with contrastive pre-training." ] @@ -272,8 +272,8 @@ "source": [ "## Grab the configs we'll use in this example\n", "!mkdir configs\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/citrinet/citrinet_1024.yaml\n" + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/citrinet/citrinet_1024.yaml\n" ] }, { @@ -482,7 +482,7 @@ "outputs": [], "source": [ "!mkdir scripts\n", - "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\n", + "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\n", "\n", "!python ./scripts/process_asr_text_tokenizer.py \\\n", " --manifest=\"{data_dir}/an4/train_manifest.json\" \\\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index b26cba7da0b3..7e04e3e6cd68 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -61,7 +61,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index d90bf363370a..19c998120d9d 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -62,7 +62,7 @@ "* Real-time or close to real-time inference for live transcriptions\n", "* Offline transcriptions of very long audio\n", "\n", - "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." + "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/main/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." ] }, { diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index b4c7e33f7bab..ed4893d83a84 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -18,7 +18,9 @@ "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", - "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", + "\n", + "\n", + "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\"\"\"\n", "# If you're using Google Colab and not running locally, run this cell.\n", "\n", @@ -28,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -39,6 +41,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -79,6 +82,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab": {}, @@ -96,6 +100,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -170,6 +175,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -203,6 +209,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -242,6 +249,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -270,6 +278,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -334,6 +343,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -360,6 +370,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -389,6 +400,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -467,6 +479,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -520,6 +533,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -546,6 +560,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -583,6 +598,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -590,6 +606,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -626,6 +643,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -650,6 +668,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -669,6 +688,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -697,6 +717,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -709,6 +730,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -721,6 +743,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -749,6 +772,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -833,6 +857,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -916,6 +941,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -941,6 +967,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -967,6 +994,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -1066,6 +1094,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1102,6 +1131,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1115,6 +1145,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -1124,6 +1155,25 @@ "# Inference and more\n", "If you are interested in **pretrained** model and **streaming inference**, please have a look at our [VAD inference tutorial](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb) and script [vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/vad_infer.py)\n" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Frame-VAD: More Effective and Efficient VAD for More Fine-grained Timestamps\n", + "\n", + "In this notebook, we are using the segment-VAD model, which predicts a single label for each short segment (0.63s), which is not optimal for some applications that require very precise timestamps. \n", + "\n", + "To get more precise timestamps, we can use a frame-VAD model, which predicts a label for each input frame (20ms). To prepare manifest for frame-VAD, you'll need to have `label` field in each manifest entry, which is a string of labels for each frame. For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like \"0 0 0 0 1 1 0 1 .... 0 1\".\n", + "However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame. \n", + "\n", + "The Frame-VAD model shares the same MarbleNet architecture as the segment-VAD model, but with a different input/output resolution and loss function. The frame-VAD model is trained with more data than segment-VAD and achieves better performance, as shown in the [NGC model card](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/vad_multilingual_frame_marblenet). \n", + "\n", + "During inference, since frame-VAD model doesn't require splicing input into overlapping segments, it is more efficient than segment-VAD model, with 8x less GPU memory consumption.\n", + "\n", + "For more information on the frame-VAD model, please refer to the [README.md](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/README.md). For training and running inference on frame-VAD, please refer to [speech_to_frame_label.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/speech_to_frame_label.py) and [frame_vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/frame_vad_infer.py)." + ] } ], "metadata": { diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 80cf4ecacc1d..957ecdb49985 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -50,7 +50,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -190,7 +190,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", @@ -1297,7 +1297,7 @@ "source": [ "# Further reading\n", "\n", - "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/asr/asr_adapters).\n", + "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/main/examples/asr/asr_adapters).\n", "\n", "Please follow the following articles that discuss the use of adapters in ASR - \n", "- [Exploiting Adapters for Cross-lingual Low-resource Speech Recognition](https://arxiv.org/abs/2105.11905)\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index 97697781dddd..bda6338dd0bf 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -193,17 +193,17 @@ "config_path = str(config_dir / \"config.yaml\")\n", "\n", "# download scripts to format the data source.\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", " str(code_dir))\n", "\n", "# download scripts to run training\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", " str(code_dir))\n", "\n", "# download script to create tokenizer\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", " str(code_dir))" ] }, diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index 078e76d55ba7..88ddb5189cc4 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -173,8 +173,8 @@ "outputs": [], "source": [ "config_path = str(config_dir / \"config.yaml\")\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" ] }, { diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index c18ebbac596b..75428c686e7e 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -152,7 +152,7 @@ "id": "jEgEo0aPj3Ws" }, "source": [ - "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." + "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." ] }, { @@ -261,7 +261,7 @@ "id": "EVp4zvxPatga" }, "source": [ - "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials/nlp) for more details about training of a particular model). \n", + "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/main/tutorials/nlp) for more details about training of a particular model). \n", "\n", "You can also provide a pretrained language model checkpoint and a configuration file if available.\n", "\n", @@ -349,7 +349,7 @@ "model.language_model.lm_checkpoint= \\\n", "model.language_model.config_file=`\n", "\n", - "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) for all NeMo supported arguments.\n", + "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) for all NeMo supported arguments.\n", "\n", "## Download pretrained model\n", "\n", @@ -373,7 +373,7 @@ "source": [ "# Using any HuggingFace Pretrained Model\n", "\n", - "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/r1.19.0/nemo/collections/nlp/modules/common/huggingface): \n", + "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/nlp/modules/common/huggingface): \n", "\n", "* BERT\n", "* RoBERTa\n", @@ -383,7 +383,7 @@ "As was mentioned before, just set `model.language_model.pretrained_model_name` to the desired model name in your config and get_lm_model() will take care of the rest.\n", "\n", "If you want to use another language model from [https://huggingface.co/models](https://huggingface.co/models), use HuggingFace API directly in NeMo.\n", - "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials)." + "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/main/tutorials)." ] } ], diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index 5c909fe73432..ce6334f9712d 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index 28d5330ac3b2..a1aa3cdae55b 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -217,7 +217,7 @@ "print()\n", "\n", "\n", - "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/main/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", " -O filter_langs_nmt.py\n", "\n", "!python filter_langs_nmt.py \\\n", @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", @@ -760,7 +760,7 @@ "metadata": {}, "outputs": [], "source": [ - "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/main/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", " -O create_tarred_parallel_dataset.py\n", "\n", "!python create_tarred_parallel_dataset.py \\\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index cf0392da9c36..e5ec1941b032 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 892eb881b528..54debaaac416 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, @@ -68,7 +68,7 @@ "#### Task Description\n", "[Entity linking](https://en.wikipedia.org/wiki/Entity_linking) is the process of connecting concepts mentioned in natural language to their canonical forms stored in a knowledge base. For example, say a knowledge base contained the entity 'ID3452 influenza' and we wanted to process some natural language containing the sentence \"The patient has flu like symptoms\". An entity linking model would match the word 'flu' to the knowledge base entity 'ID3452 influenza', allowing for disambiguation and normalization of concepts referenced in text. Entity linking applications range from helping automate data ingestion to assisting in real time dialogue concept normalization. We will be focusing on entity linking in the medical domain for this demo, but the entity linking model, dataset, and training code within NVIDIA NeMo can be applied to other domains like finance and retail.\n", "\n", - "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The r1.19.0 idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", + "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The main idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", "\n", "In this tutorial we will be using the [faiss](https://github.com/facebookresearch/faiss) library to build our concept index." ] diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 1c60b95bcc8c..516cd9b9811e 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb index 50ec879b7761..6b1c7f7f2583 100644 --- a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb +++ b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb @@ -21,7 +21,7 @@ "import os\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "\n", "GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n", "\n", @@ -284,7 +284,7 @@ "id": "miXYxOv_mNVo" }, "source": [ - "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", + "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", "```\n", "content/alignment/punct has 920953 instances\n", "content/alignment/date has 150499 instances\n", @@ -405,7 +405,7 @@ { "cell_type": "markdown", "source": [ - "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." + "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." ], "metadata": { "id": "ueJYVF0cU3ic" @@ -1016,11 +1016,11 @@ "\n", "See also the scripts for the whole pipeline:\n", "\n", - "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", + "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", "\n", - "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", + "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", "\n", - "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", + "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", "\n" ], "metadata": { diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index c656fdd7088a..1c75afc67352 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index c435d6e76d54..4e88195c9635 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.19.0'" + "BRANCH='r1.20.0'" ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index a92317b17320..cc158388feb5 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "c3217a15", "metadata": {}, @@ -15,6 +16,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8c72dc42", "metadata": {}, @@ -25,6 +27,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "79154a9e", "metadata": {}, @@ -62,7 +65,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", @@ -73,6 +76,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7e0bbc89", "metadata": {}, @@ -92,6 +96,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1ff1d46f", "metadata": {}, @@ -141,6 +146,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "aa356012", "metadata": {}, @@ -239,6 +245,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "02bff63f", "metadata": {}, @@ -267,6 +274,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "89e1e5b3", "metadata": {}, @@ -339,6 +347,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "05ebadc3", "metadata": {}, @@ -347,6 +356,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2fe38a29", "metadata": {}, @@ -381,6 +391,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "678f65ef", "metadata": {}, @@ -411,6 +422,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8af66b4a", "metadata": {}, @@ -464,6 +476,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6ecec681", "metadata": {}, @@ -472,6 +485,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "58a3d4fa", "metadata": {}, @@ -543,6 +557,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "45ac928f", "metadata": {}, @@ -557,6 +572,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "158a4bbe", "metadata": {}, @@ -586,7 +602,7 @@ "outputs": [], "source": [ "CHECKPONT_FILE_NAME = megatron_gpt--val_loss=1.17-step=10047-consumed_samples=80376.0-last.ckpt # change it to your checkpoint file name\n", - "!python -m torch.distributed.launch --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n", + "!torchrun --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n", " --checkpoint_folder=gpt_creditcard_results/megatron_gpt/checkpoints/ \\\n", " --checkpoint_name={CHECKPONT_FILE_NAME} \\\n", " --nemo_file_path=tabular.nemo \\\n", @@ -597,6 +613,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "fa16378e", "metadata": {}, @@ -605,6 +622,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ed056ec6", "metadata": {}, @@ -630,6 +648,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a62b48dc", "metadata": {}, @@ -685,6 +704,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cccd54d9", "metadata": {}, @@ -790,6 +810,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0f2f6e3a", "metadata": {}, diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 7ccf33826157..87bfc5c9b30d 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.19.0'" + "BRANCH='r1.20.0'" ] }, { @@ -723,7 +723,7 @@ "- `length_params`\n", "- `sampling_params`\n", "\n", - "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", + "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", "\n", "If `length_params` and `sampling_params` are set to `None`, the model generates output with a greedy decoding strategy and generates up to `30` new tokens. Most predictive downstream tasks (not text generation tasks), use greedy sampling. To see other ways to run inference with your prompt learning model and more details on how to define various inference parameters, visit `examples/nlp/language_modeling/megatron_gpt_eval.py`.\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index ea6dc45ef273..1d5c73255f68 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { @@ -293,7 +293,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", + "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", "\n", "```\n", "NEMO_ROOT = \"\"\n", @@ -950,7 +950,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 62b3255d119b..2ce757d57ac9 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { @@ -369,7 +369,7 @@ } }, "source": [ - "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." + "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." ] }, { @@ -913,7 +913,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index b337c569425d..211c78d45342 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 0cd718e71754..3d428235aa03 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { diff --git a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb new file mode 100644 index 000000000000..c554bbbeb82a --- /dev/null +++ b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb @@ -0,0 +1,1412 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "PiRuohn_FQco" + }, + "source": [ + "# Overview\n", + "This tutorial demonstrates how to run inference with [SpellMapper](https://arxiv.org/abs/2306.02317) - a model for Spellchecking ASR (Automatic Speech Recognition) Customization.\n", + "\n", + "Estimated time: 10-15 min.\n", + "\n", + "SpellMapper is a non-autoregressive (NAR) model based on transformer architecture ([BERT](https://arxiv.org/pdf/1810.04805.pdf) with multiple separators).\n", + "It gets as input a single ASR hypothesis (text) and a **custom vocabulary** and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any.\n", + "\n", + "This model is an alternative to word boosting/shallow fusion approaches:\n", + " - does not require retraining ASR model;\n", + " - does not require beam-search/language model(LM);\n", + " - can be applied on top of any English ASR model output;" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qm5wmxVEGXgH" + }, + "source": [ + "## What is custom vocabulary?\n", + "**Custom vocabulary** is a list of words/phrases that are important for a particular user. For example, user's contact names, playlist, selected terminology and so on. The size of the custom vocabulary can vary from several hundreds to **several thousand entries** - but this is not an equivalent to ngram language model.\n", + "\n", + "![Scope of customization with user vocabulary](images/spellmapper_customization_vocabulary.png)\n", + "\n", + "Note that unlike traditional spellchecking approaches, which aim to correct known words using language models, the goal of contextual spelling correction is to correct highly specific user terms, most of which can be 1) out-of-vocabulary (OOV) words, 2) spelling variations (e.g., \"John Koehn\", \"Jon Cohen\") and language models cannot help much with that." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D5_XwuXDOKho" + }, + "source": [ + "## Tutorial Plan\n", + "\n", + "1. Create a sample custom vocabulary using some medical terminology.\n", + "2. Study what customization does - a detailed analysis of a small example.\n", + "3. Run a bigger example:\n", + " * Create sample ASR results by running TTS (text-to-speech synthesis) + ASR on some medical paper abstracts.\n", + " * Run SpellMapper inference and show how it can improve ASR results using custom vocabulary.\n", + "\n", + "TL;DR We reduce WER from `14.3%` to `11.4%` by correcting medical terms, e.g.\n", + "* `puramesin` => `puromycin`\n", + "* `parromsin` => `puromycin`\n", + "* `and hydrod` => `anhydride`\n", + "* `lesh night and` => `lesch-nyhan`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agz8B2CxXBBG" + }, + "source": [ + "# Preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "koRPpYISNPuH" + }, + "source": [ + "## Installing NeMo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HCnnz3cgVc4Q" + }, + "outputs": [], + "source": [ + "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", + "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", + "GITHUB_ACCOUNT = \"NVIDIA\"\n", + "BRANCH = 'r1.20.0'\n", + "!python -m pip install git+https://github.com/{GITHUB_ACCOUNT}/NeMo.git@{BRANCH}#egg=nemo_toolkit[all]\n", + "\n", + "# Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n", + "# comment out the below lines and set NEMO_DIR to your local path.\n", + "NEMO_DIR = 'nemo'\n", + "!git clone -b {BRANCH} https://github.com/{GITHUB_ACCOUNT}/NeMo.git $NEMO_DIR" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_M92gCn_NW1_" + }, + "source": [ + "## Additional installs\n", + "We will use `sentence_splitter` to split abstracts to sentences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ddyJA3NtGl9C" + }, + "outputs": [], + "source": [ + "!pip install sentence_splitter" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qVa91rGkeFje" + }, + "source": [ + "Clone the SpellMapper model from HuggingFace.\n", + "Note that we will need not only the checkpoint itself, but also the ngram mapping vocabulary `replacement_vocab_filt.txt` from the same folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JiI9dkEm5cpW" + }, + "outputs": [], + "source": [ + "!git clone https://huggingface.co/bene-ges/spellmapper_asr_customization_en" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8saqFOePVfFf" + }, + "source": [ + "## Imports\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tAJyiYn_VnrF" + }, + "outputs": [], + "source": [ + "import IPython.display as ipd\n", + "import json\n", + "import random\n", + "import re\n", + "import soundfile as sf\n", + "import torch\n", + "\n", + "from collections import Counter, defaultdict\n", + "from difflib import SequenceMatcher\n", + "from matplotlib.pyplot import imshow\n", + "from matplotlib import pyplot as plt\n", + "from sentence_splitter import SentenceSplitter\n", + "from typing import List, Set, Tuple\n", + "\n", + "from nemo.collections.tts.models import FastPitchModel\n", + "from nemo.collections.tts.models import HifiGanModel\n", + "\n", + "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest\n", + "\n", + "from nemo.collections.nlp.data.spellchecking_asr_customization.utils import (\n", + " get_all_candidates_coverage,\n", + " get_index,\n", + " load_ngram_mappings,\n", + " search_in_index,\n", + " get_candidates,\n", + " read_spellmapper_predictions,\n", + " apply_replacements_to_text,\n", + " load_ngram_mappings_for_dp,\n", + " get_alignment_by_dp,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mfAaOdAWUGUV" + }, + "source": [ + "Use seed to get a reproducible behaviour." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UlGnNKTuT_6A" + }, + "outputs": [], + "source": [ + "random.seed(0)\n", + "torch.manual_seed(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RPPHI7Zd_fDz" + }, + "source": [ + "## Download data\n", + "\n", + "File `pubmed23n0009.xml` taken from public ftp server of https://www.ncbi.nlm.nih.gov/pmc/ contains information about 5593 medical papers, from which we extract only their abstracts. We will feed sentences from there to TTS + ASR to get initial ASR results.\n", + "\n", + "File `wordlist.txt` contains 100k **single-word** medical terms.\n", + "\n", + "File `valid_adam.txt` contains 24k medical abbreviations with their full forms. We will use those full forms as examples of **multi-word** medical terms.\n", + "\n", + "File `count_1w.txt` contains 330k single words with their frequencies from Google Ngrams corpus. We will use this file to filter out frequent words from our custom vocabulary.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mX6cvE8xw2n1" + }, + "outputs": [], + "source": [ + "!wget https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/pubmed23n0009.xml.gz\n", + "!gunzip pubmed23n0009.xml.gz\n", + "!grep \"AbstractText\" pubmed23n0009.xml > abstract.txt\n", + "\n", + "!wget https://raw.githubusercontent.com/McGill-NLP/medal/master/toy_data/valid_adam.txt\n", + "!wget https://raw.githubusercontent.com/glutanimate/wordlist-medicalterms-en/master/wordlist.txt\n", + "!wget https://norvig.com/ngrams/count_1w.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mBm9BeqNaRlC" + }, + "source": [ + "## Auxiliary functions\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kVUKhSh48Ypi" + }, + "outputs": [], + "source": [ + "CHARS_TO_IGNORE_REGEX = re.compile(r\"[\\.\\,\\?\\:!;()«»…\\]\\[/\\*–‽+&_\\\\½√>€™$•¼}{~—=“\\\"”″‟„]\")\n", + "\n", + "\n", + "def get_medical_vocabulary() -> Tuple[Set[str], Set[str]]:\n", + " \"\"\"This function builds a vocabulary of medical terms using downloaded sources:\n", + " wordlist.txt - 100k single-word medical terms.\n", + " valid_adam.txt - 24k medical abbreviations with their full forms. We use those full forms as examples of multi-word medical terms.\n", + " count_1w.txt - 330k single words with their frequencies from Google Ngrams corpus. We will use this file to filter out frequent words from our custom vocabulary.\n", + " \"\"\"\n", + " common_words = set()\n", + " with open(\"count_1w.txt\", \"r\", encoding=\"utf-8\") as f:\n", + " for line in f:\n", + " word, freq = line.strip().casefold().split(\"\\t\")\n", + " if int(freq) < 500000:\n", + " break\n", + " common_words.add(word)\n", + " print(\"Size of common words vocabulary:\", len(common_words))\n", + "\n", + " abbreviations = defaultdict(set)\n", + " medical_vocabulary = set()\n", + " with open(\"valid_adam.txt\", \"r\", encoding=\"utf-8\") as f:\n", + " lines = f.readlines()\n", + " # first line is header\n", + " for line in lines[1:]:\n", + " abbrev, _, phrase = line.strip().split(\"\\t\")\n", + " # skip phrases longer than 3 words because some of them are long explanations\n", + " if phrase.count(\" \") > 2:\n", + " continue\n", + " if phrase in common_words:\n", + " continue\n", + " medical_vocabulary.add(phrase)\n", + " abbrev = abbrev.lower()\n", + " abbreviations[abbrev].add(phrase)\n", + "\n", + " with open(\"wordlist.txt\", \"r\", encoding=\"utf-8\") as f:\n", + " for line in f:\n", + " word = line.strip().casefold()\n", + " # skip words contaning digits\n", + " if re.match(r\".*\\d.*\", word):\n", + " continue\n", + " if re.match(r\".*[\\[\\]\\(\\)\\+\\,\\.].*\", word):\n", + " continue\n", + " if word in common_words:\n", + " continue\n", + " medical_vocabulary.add(word)\n", + "\n", + " print(\"Size of medical vocabulary:\", len(medical_vocabulary))\n", + " print(\"Size of abbreviation vocabulary:\", len(abbreviations))\n", + " return medical_vocabulary, abbreviations\n", + "\n", + "\n", + "def read_abstracts(medical_vocabulary: Set[str]) -> Tuple[List[str], Set[str], Set[str]]:\n", + " \"\"\"This function reads the downloaded medical abstracts, and extracts sentences containing any word/phrase from the medical vocabulary.\n", + " Args:\n", + " medical_vocabulary: set of known medical words or phrases\n", + " Returns:\n", + " sentences: list of extracted sentences\n", + " all_found_singleword: set of single words from medical vocabulary that occurred at least in one sentence\n", + " all_found_multiword: set of multi-word phrases from medical vocabulary that occurred at least in one sentence\n", + " \"\"\"\n", + " splitter = SentenceSplitter(language='en')\n", + "\n", + " all_sentences = []\n", + " all_found_singleword = set()\n", + " all_found_multiword = set()\n", + " with open(\"abstract.txt\", \"r\", encoding=\"utf-8\") as f:\n", + " for line in f:\n", + " text = line.strip().replace(\"\", \"\").replace(\"\", \"\")\n", + " sents = splitter.split(text)\n", + " found_singleword = set()\n", + " found_multiword = set()\n", + " for sent in sents:\n", + " # remove anything in brackets from text\n", + " sent = re.sub(r\"\\(.+\\)\", r\"\", sent)\n", + " # remove quotes from text\n", + " sent = sent.replace(\"\\\"\", \"\")\n", + " # skip sentences contaning digits because normalization is out of scope of this tutorial\n", + " if re.match(r\".*\\d.*\", sent):\n", + " continue\n", + " # skip sentences contaning abbreviations with period inside the sentence (for the same reason)\n", + " if \". \" in sent:\n", + " continue\n", + " # skip long sentences as they may cause OOM issues\n", + " if len(sent) > 150:\n", + " continue\n", + " # replace all punctuation to space and convert to lowercase\n", + " sent_clean = CHARS_TO_IGNORE_REGEX.sub(\" \", sent).lower()\n", + " sent_clean = \" \".join(sent_clean.split(\" \"))\n", + " words = sent_clean.split(\" \")\n", + "\n", + " found_phrases = set()\n", + " for begin in range(len(words)):\n", + " for end in range(begin + 1, min(begin + 4, len(words))):\n", + " phrase = \" \".join(words[begin:end])\n", + " if phrase in medical_vocabulary:\n", + " found_phrases.add(phrase)\n", + " if end - begin == 1:\n", + " found_singleword.add(phrase)\n", + " else:\n", + " found_multiword.add(phrase)\n", + " if len(found_phrases) > 0:\n", + " all_sentences.append((sent, \";\".join(found_phrases)))\n", + " all_found_singleword = all_found_singleword.union(found_singleword)\n", + " all_found_multiword = all_found_multiword.union(found_multiword)\n", + "\n", + " print(\"Sentences:\", len(all_sentences))\n", + " print(\"Unique single-word terms found:\", len(all_found_singleword))\n", + " print(\"Unique multi-word terms found:\", len(all_found_multiword))\n", + " print(\"Examples of multi-word terms\", str(list(all_found_multiword)[0:10]))\n", + " \n", + " return all_sentences, all_found_singleword, all_found_multiword" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XU3xeCBVpWOL" + }, + "outputs": [], + "source": [ + "def get_fragments(i_words: List[str], j_words: List[str]) -> List[Tuple[str, str, str, int, int, int, int]]:\n", + " \"\"\"This function is used to compare two word sequences to find minimal fragments that differ.\n", + " Args:\n", + " i_words: list of words in first sequence\n", + " j_words: list of words in second sequence\n", + " Returns:\n", + " list of tuples (difference_type, fragment1, fragment2, begin_of_fragment1, end_of_fragment1, begin_of_fragment2, end_of_fragment2)\n", + " \"\"\"\n", + " s = SequenceMatcher(None, i_words, j_words)\n", + " result = []\n", + " for tag, i1, i2, j1, j2 in s.get_opcodes():\n", + " result.append((tag, \" \".join(i_words[i1:i2]), \" \".join(j_words[j1:j2]), i1, i2, j1, j2))\n", + " result = sorted(result, key=lambda x: x[3])\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ydXp_pFYmYu" + }, + "source": [ + "## Read medical data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WAeauax0SV1-" + }, + "outputs": [], + "source": [ + "medical_vocabulary, _ = get_medical_vocabulary()\n", + "sentences, found_singleword, found_multiword = read_abstracts(medical_vocabulary)\n", + "# in case if we need random candidates from a big sample - we will use full medical vocabulary for that purpose.\n", + "big_sample = list(medical_vocabulary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FRli7-Kx7sOO" + }, + "outputs": [], + "source": [ + "for sent, phrases in sentences[0:10]:\n", + " print(sent, \"\\t\", phrases)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rL1VqH2_dk93" + }, + "source": [ + "# SpellMapper ASR Customization\n", + "\n", + "SpellMapper model relies on two offline preparation steps:\n", + "1. Collecting n-gram mappings from a large corpus (this mappings vocabulary had been collected once on a large corpus and is supplied with the model).\n", + "2. Indexing of user vocabulary by n-grams.\n", + "\n", + "![Offline data preparation](images/spellmapper_data_preparation.png)\n", + "\n", + "At inference time we take as input an ASR hypothesis and an n-gram-indexed user vocabulary and perform following steps:\n", + "1. Retrieve the top 10 candidate phrases from the user vocabulary that are likely to be contained in the given ASR-hypothesis, possibly in a misspelled form.\n", + "2. Run the neural model that tags the input characters with correct candidate labels or 0 if no match is found.\n", + "3. Do post-processing to combine results.\n", + "\n", + "![Inference pipeline](images/spellmapper_inference_pipeline.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OeJpsMwslmrd" + }, + "source": [ + "## N-gram mappings\n", + "Note that n-gram mappings vocabulary had been collected from a large corpus and is supplied with the model. It is supposed to be \"universal\" for English language.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uH6p0mOd12pi" + }, + "source": [ + "Let's see what n-gram mappings are like, for example, for an n-gram `l u c`.\n", + "Note that n-grams in `replacement_vocab_filt.txt` preserve one-to-one correspondence between original letters and misspelled fragments (this additional markup is handled during loading). \n", + "* `+` means that adjacent letters are concatenated and correspond to a single source letter. \n", + "* `` means that the original letter is deleted. \n", + "This auxiliary markup will be removed automatically during loading.\n", + "\n", + "`_` is used instead of real space symbol.\n", + "\n", + "Last three columns are:\n", + "* joint frequency\n", + "* frequency of original n-gram\n", + "* frequency of misspelled n-gram\n", + "\n", + "$$\\frac{JointFrequency}{SourceFrequency}=TranslationProbability$$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qul163dB1sKp" + }, + "outputs": [], + "source": [ + "!awk 'BEGIN {FS=\"\\t\"} ($1==\"l u c\"){print $0}' < spellmapper_asr_customization_en/replacement_vocab_filt.txt | sort -t$'\\t' -k3nr" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eWxcrVWZ3Pfq" + }, + "source": [ + "Now we read n-gram mappings from the file. Parameter `max_misspelled_freq` controls maximum frequency of misspelled n-grams. N-grams more frequent than that are put in the list of banned n-grams and won't be used in indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WHKhE945-N7o" + }, + "outputs": [], + "source": [ + "print(\"load n-gram mappings...\")\n", + "ngram_mapping_vocab, ban_ngram = load_ngram_mappings(\"spellmapper_asr_customization_en/replacement_vocab_filt.txt\", max_misspelled_freq=125000)\n", + "# CAUTION: entries in ban_ngram end with a space and can contain \"+\" \"=\"\n", + "print(\"Size of ngram mapping vocabulary:\", len(ngram_mapping_vocab))\n", + "print(\"Size of banned ngrams:\", len(ban_ngram))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "49IcMBfllvXN" + }, + "source": [ + "## Indexing of custom vocabulary" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1K6paeee2Iu" + }, + "source": [ + "As we mentioned earlier, this model pipeline is intended to work with custom vocabularies up to several thousand entries. Since the whole medical vocabulary contains 110k entries, we restrict our custom vocabulary to 5000+ terms that occurred in given corpus of abstracts.\n", + "\n", + "The goal of indexing our custom vocabulary is to build an index where key is a letter n-gram and value is the whole phrase. The keys are n-grams in the given user phrase and their misspelled variants taken from our collection of n-\n", + "gram mappings (see Index of custom vocabulary in Fig. 1)\n", + "\n", + "*Though it is possible to index and search the whole 110k vocabulary, it will require additional optimizations and is beyond the scope of this tutorial.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xWb0jGqw6Woi" + }, + "outputs": [], + "source": [ + "custom_phrases = []\n", + "for phrase in medical_vocabulary:\n", + " if phrase not in found_singleword and phrase not in found_multiword:\n", + " continue\n", + " custom_phrases.append(\" \".join(list(phrase.replace(\" \", \"_\"))))\n", + "print(\"Size of customization vocabulary:\", len(custom_phrases))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UHWor5pD2Eyb" + }, + "source": [ + "Now we build the index for our custom phrases.\n", + "\n", + "Parameter `min_log_prob` controls minimum log probability, after which we stop growing this n-gram.\n", + "\n", + "Parameter `max_phrases_per_ngram` controls maximum number of phrases that can be indexed by one ngram. N-grams exceeding this limit are also banned and not used in indexing.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hs4RDXj0-xW9" + }, + "outputs": [], + "source": [ + "phrases, ngram2phrases = get_index(custom_phrases, ngram_mapping_vocab, ban_ngram, min_log_prob=-4.0, max_phrases_per_ngram=600)\n", + "print(\"Size of phrases:\", len(phrases))\n", + "print(\"Size of ngram2phrases:\", len(ngram2phrases))\n", + "\n", + "# Save index to file - later we will use it in other script\n", + "with open(\"index.txt\", \"w\", encoding=\"utf-8\") as out:\n", + " for ngram in ngram2phrases:\n", + " for phrase_id, begin, size, logprob in ngram2phrases[ngram]:\n", + " phrase = phrases[phrase_id]\n", + " out.write(ngram + \"\\t\" + phrase + \"\\t\" + str(begin) + \"\\t\" + str(size) + \"\\t\" + str(logprob) + \"\\n\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RV1sdQ9rvar8" + }, + "source": [ + "## Small detailed example\n", + "\n", + "Let's consider, for example, one custom phrase `thoracic aorta` and an incorrect ASR-hypothesis `the tarasic oorda is a part of the aorta located in the thorax`, containing a misspelled phrase `tarasic_oorda`. \n", + "\n", + "We will see \n", + "1. How this custom phrase is indexed.\n", + "2. How candidate retrieval works, given ASR-hypothesis.\n", + "3. How inference and post-processing work.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kGBTTJXixnrG" + }, + "source": [ + "### N-grams in index" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ryfUlqNMl4vQ" + }, + "source": [ + "Let's look, for example, by what n-grams a custom phrase `thoracic aorta` is indexed. \n", + "Columns: \n", + "1. n-gram\n", + "2. beginning position in the phrase\n", + "3. length\n", + "4. log probability\n", + "\n", + "Note that many n-grams are not from n-gram mappings file. Those are derived by growing previous n-grams with new replacements. In this case log probabilities are summed up. Growing stops, when minimum log prob is exceeded.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x0ZVsXGBo8pt" + }, + "outputs": [], + "source": [ + "for ngram in ngram2phrases:\n", + " for phrase_id, b, length, lprob in ngram2phrases[ngram]:\n", + " if phrases[phrase_id] == \"t h o r a c i c _ a o r t a\":\n", + " print(ngram.ljust(16) + \"\\t\" + str(b).rjust(4) + \"\\t\" + str(length).rjust(4) + \"\\t\" + str(lprob))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "20ov23ze4xeQ" + }, + "source": [ + "### Candidate retrieval\n", + "Candidate retrieval tasks are:\n", + " - Given an input sentence and an index of custom vocabulary find all n-grams from the index matching the sentence. \n", + " - Find which sentence fragments and which custom phrases have most \"hits\" - potential candidates.\n", + " - Find approximate starting position for each candidate phrase. \n", + "\n", + "\n", + "Let's look at the hits, that phrase \"thoracic aorta\" gets by searching all ngrams in the input text. We can see some hits in different part of the sentence, but a moving window can find a fragment with most hits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t_rhKQ3Xqa8A" + }, + "outputs": [], + "source": [ + "sent = \"the_tarasic_oorda_is_a_part_of_the_aorta_located_in_the_thorax\"\n", + "phrases2positions, position2ngrams = search_in_index(ngram2phrases, phrases, sent)\n", + "print(\" \".join(list(sent)))\n", + "print(\" \".join(list(map(str, phrases2positions[phrases.index(\"t h o r a c i c _ a o r t a\")].astype(int)))))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "orkRapbjF4aZ" + }, + "source": [ + "`phrases2positions` is a matrix of size (len(phrases), len(ASR_hypothesis)).\n", + "It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere.\n", + "It is used to find phrases with many hits within a contiguous window - potential matching candidates.\n", + "\n", + "`position2ngrams` is a list of sets of ngrams. List index is the starting position in the ASR-hypothesis.\n", + "It is used later to check how well each found candidate is covered by n-grams (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JF7u4_iiHLyI" + }, + "outputs": [], + "source": [ + "candidate2coverage, candidate2position = get_all_candidates_coverage(phrases, phrases2positions)\n", + "print(\"Coverage=\", candidate2coverage[phrases.index(\"t h o r a c i c _ a o r t a\")])\n", + "print(\"Starting position=\", candidate2position[phrases.index(\"t h o r a c i c _ a o r t a\")])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "45mvKg8ZyNbr" + }, + "source": [ + "`candidate2coverage` is a list of size len(phrases) containing coverage (0.0 to 1.0) in best window.\n", + "Coverage is a smoothed percentage of hits in the window of size of the given phrase.\n", + "\n", + "`candidate2position` is a list of size len(phrases) containing starting position of best window.\n", + "\n", + "Starting position is approximate, it's ok. If it is not at the beginning of some word, SpellMapper will try to adjust it later. In this particular example we get 5 as starting position instead of 4, missing the first letter." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sjyn9I98udL9" + }, + "source": [ + "### Inference\n", + "\n", + "Now let's generate input for SpellMapper inference. \n", + "An input line should consist of 4 tab-separated columns:\n", + " - text of ASR-hypothesis\n", + " - texts of 10 candidates separated by semicolon\n", + " - 1-based ids of non-dummy candidates\n", + " - approximate start/end coordinates of non-dummy candidates (correspond to ids)\n", + "Note that candidate retrieval is done inside the function `get_candidates`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cJnusVfBRhRX" + }, + "outputs": [], + "source": [ + "out = open(\"spellmapper_input.txt\", \"w\", encoding=\"utf-8\")\n", + "letters = list(sent)\n", + "candidates = get_candidates(ngram2phrases, phrases, letters, big_sample)\n", + "# We add two columns with targets and span_info. \n", + "# They have same format as during training, but start and end positions are APPROXIMATE, they will be adjusted when constructing BertExample.\n", + "targets = []\n", + "span_info = []\n", + "for idx, c in enumerate(candidates):\n", + " if c[1] == -1:\n", + " continue\n", + " targets.append(str(idx + 1)) # targets are 1-based\n", + " start = c[1]\n", + " end = min(c[1] + c[2], len(letters)) # ensure that end is not outside sentence length (it can happen because c[2] is candidate length used as approximation)\n", + " span_info.append(\"CUSTOM \" + str(start) + \" \" + str(end))\n", + "\n", + "out.write(\" \".join(letters) + \"\\t\" + \";\".join([x[0] for x in candidates]) + \"\\t\" + \" \".join(targets) + \"\\t\" + \";\".join(span_info) + \"\\n\")\n", + "out.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qpei5o89SmaU" + }, + "outputs": [], + "source": [ + "!cat spellmapper_input.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9rAmO15SS6go" + }, + "outputs": [], + "source": [ + "!python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \\\n", + " pretrained_model=spellmapper_asr_customization_en/training_10m_5ep.nemo \\\n", + " model.max_sequence_len=512 \\\n", + " inference.from_file=spellmapper_input.txt \\\n", + " inference.out_file=spellmapper_output.txt \\\n", + " inference.batch_size=16 \\\n", + " lang=en\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wd2aq4T1N5cs" + }, + "source": [ + "Each line in SpellMapper output is tab-separated and consists of 4 columns:\n", + "1. ASR-hypothesis (same as in input)\n", + "2. 10 candidates separated with semicolon (same as in input)\n", + "3. fragment predictions, separated with semicolon, each prediction is a tuple (start, end, candidate_id, probability)\n", + "4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ravgEX8cTFty" + }, + "outputs": [], + "source": [ + "!cat spellmapper_output.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "az26364-PHb2" + }, + "source": [ + "We can use some utility functions to apply found replacements and get actual corrected text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lPtFa_EhK8pb" + }, + "outputs": [], + "source": [ + "spellmapper_results = read_spellmapper_predictions(\"spellmapper_output.txt\")\n", + "text, replacements, _ = spellmapper_results[0]\n", + "corrected_text = apply_replacements_to_text(text, replacements, replace_hyphen_to_space=False)\n", + "print(\"Text before correction:\\n\", text)\n", + "print(\"Text after correction:\\n\", corrected_text)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "efF7O-D91FLX" + }, + "source": [ + "# Bigger customization example\n", + "\n", + "Let's test customization on more data. The plan is\n", + " * Get baseline ASR transcriptions by running TTS + ASR on some medical paper abstracts.\n", + " * Run SpellMapper inference and show how it can improve ASR results using custom vocabulary.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r_EFPnyDcXZt" + }, + "source": [ + "## Run TTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i9F5SBhmr8rk" + }, + "outputs": [], + "source": [ + "# create a folder for wav files (TTS output)\n", + "!rm -r audio\n", + "!mkdir audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JMbkNVt7YBAO" + }, + "outputs": [], + "source": [ + "if torch.cuda.is_available():\n", + " device = \"cuda\"\n", + "else:\n", + " device = \"cpu\"\n", + "\n", + "# Load FastPitch from HuggingFace\n", + "spectrogram_generator = FastPitchModel.from_pretrained(\"nvidia/tts_en_fastpitch\").eval().to(device)\n", + "# Load HifiGan vocoder from HuggingFace\n", + "vocoder = HifiGanModel.from_pretrained(model_name=\"nvidia/tts_hifigan\").eval().to(device)\n", + "\n", + "# Write sentences that we want to feed to TTS\n", + "with open(\"tts_input.txt\", \"w\", encoding=\"utf-8\") as out:\n", + " for sent, _ in sentences[0:100]:\n", + " out.write(sent + \"\\n\")\n", + "\n", + "out_manifest = open(\"manifest.json\", \"w\", encoding=\"utf-8\")\n", + "i = 0\n", + "with open(\"tts_input.txt\", \"r\", encoding=\"utf-8\") as inp:\n", + " for line in inp:\n", + " text = line.strip()\n", + " text_clean = CHARS_TO_IGNORE_REGEX.sub(\" \", text).lower() #replace all punctuation to space and convert to lowercase\n", + " text_clean = \" \".join(text_clean.split())\n", + "\n", + " parsed = spectrogram_generator.parse(text, normalize=True)\n", + "\n", + " spectrogram = spectrogram_generator.generate_spectrogram(tokens=parsed)\n", + " audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)\n", + "\n", + " # Note that vocoder return a batch of audio. In this example, we just take the first and only sample.\n", + " filename = \"audio/\" + str(i) + \".wav\"\n", + " sf.write(filename, audio.to('cpu').detach().numpy()[0], 16000)\n", + " out_manifest.write(\n", + " \"{\\\"audio_filepath\\\": \\\"\" + filename + \"\\\", \\\"text\\\": \\\"\" + text_clean + \"\\\", \\\"orig_text\\\": \\\"\" + text + \"\\\"}\\n\"\n", + " )\n", + " i += 1\n", + "\n", + " # display some examples\n", + " if i < 10:\n", + " print(f'\"{text}\"\\n')\n", + " ipd.display(ipd.Audio(audio.to('cpu').detach(), rate=22050))\n", + "\n", + "out_manifest.close()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9T3CZcCAmxCz" + }, + "source": [ + "Now we have a folder with generated audios `audio/*.wav` and a nemo manifest with json records like `{\"audio_filepath\": \"audio/0.wav\", \"text\": \"no renal auditory or vestibular toxicity was observed\", \"orig_text\": \"No renal, auditory, or vestibular toxicity was observed.\"}`.", + "\n", + "Note that TTS model may mispronounce some unknown words, for example, abbreviations like `tRNAs`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pR_T1HnttVjm" + }, + "outputs": [], + "source": [ + "lines = []\n", + "with open(\"manifest.json\", \"r\", encoding=\"utf-8\") as f:\n", + " lines = f.readlines()\n", + "\n", + "for line in lines:\n", + " try:\n", + " data = json.loads(line.strip())\n", + " except:\n", + " print(line)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bt2TMLLvdUHm" + }, + "source": [ + "Free GPU memory to avoid OOM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZwEpAOCaRH7s" + }, + "outputs": [], + "source": [ + "del spectrogram_generator\n", + "del vocoder\n", + "torch.cuda.empty_cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HrensakWdLkt" + }, + "source": [ + "## Run baseline ASR" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IQNIo2M_mqJc" + }, + "source": [ + "Next we transcribe our .wav files with a general domain [ASR model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_conformer_ctc_large). It will generate an output file `ctc_baseline_transcript.json` where the predicted transcriptions are stored in the field `pred_text` of each record.\n", + "\n", + "Note that this ASR model was not trained or fine-tuned on medical domain, so we expect it to make mistakes on medical terms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NMN63ux1mJiG" + }, + "outputs": [], + "source": [ + "!python nemo/examples/asr/transcribe_speech.py \\\n", + " pretrained_name=\"stt_en_conformer_ctc_large\" \\\n", + " dataset_manifest=manifest.json \\\n", + " output_filename=ctc_baseline_transcript_tmp.json \\\n", + " batch_size=2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L3swQ8uqqgnp" + }, + "source": [ + "ATTENTION: SpellMapper relies on words to be separated by _single_ space\n", + "\n", + "There is a bug with multiple space, observed in ASR results produced by Conformer-CTC, probably connected to this issue: https://github.com/NVIDIA/NeMo/issues/4034.\n", + "\n", + "So we need to correct the manifests to ensure that all spaces are single." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z17sxkmXrXpJ" + }, + "outputs": [], + "source": [ + "test_data = read_manifest(\"ctc_baseline_transcript_tmp.json\")\n", + "\n", + "for i in range(len(test_data)):\n", + " # if there are multiple spaces in the string they will be merged to one\n", + " test_data[i][\"pred_text\"] = \" \".join(test_data[i][\"pred_text\"].split())\n", + "\n", + "with open(\"ctc_baseline_transcript.json\", \"w\", encoding=\"utf-8\") as out:\n", + " for d in test_data:\n", + " line = json.dumps(d)\n", + " out.write(line + \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PuKtfhbVkVJY" + }, + "outputs": [], + "source": [ + "!head -n 4 ctc_baseline_transcript.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aCJw9NEXqRg8" + }, + "source": [ + "### Calculating WER of baseline transcript\n", + "We use the standard script from NeMo to calculate WER and CER of our baseline transcript. Internally it compares the text in `pred_text` (predicted transcript) to `text` (reference transcript). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZmNEGVWQsGo2" + }, + "outputs": [], + "source": [ + "!python nemo/examples/asr/speech_to_text_eval.py \\\n", + " dataset_manifest=ctc_baseline_transcript.json \\\n", + " only_score_manifest=True\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AvPwJr0ZqdkN" + }, + "source": [ + "### See fragments that differ\n", + "We use SequenceMatcher to see fragments that differ. (Another option is to use a more powerful analytics tool [Speech Data Explorer](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tools/speech_data_explorer.html))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RAeaVCpMv78y" + }, + "outputs": [], + "source": [ + "test_data = read_manifest(\"ctc_baseline_transcript.json\")\n", + "pred_text = [data['pred_text'] for data in test_data]\n", + "ref_text = [data['text'] for data in test_data]\n", + "audio_filepath = [data['audio_filepath'] for data in test_data]\n", + "\n", + "diff_vocab = Counter()\n", + "\n", + "for i in range(len(test_data)):\n", + " ref_sent = \" \" + ref_text[i] + \" \"\n", + " pred_sent = \" \" + pred_text[i] + \" \"\n", + "\n", + " pred_words = pred_sent.strip().split()\n", + " ref_words = ref_sent.strip().split()\n", + "\n", + " for tag, hyp_fragment, ref_fragment, i1, i2, j1, j2 in get_fragments(pred_words, ref_words):\n", + " if tag != \"equal\":\n", + " diff_vocab[(tag, hyp_fragment, ref_fragment)] += 1\n", + "\n", + "sum_ = 0\n", + "print(\"PRED vs REF\")\n", + "for k, v in diff_vocab.most_common(1000000):\n", + " sum_ += v\n", + " print(k, v, \"sum=\", sum_)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dUSOF7iD1w_9" + }, + "source": [ + "## Run SpellMapper" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x39BQhYB6_Fr" + }, + "source": [ + "Now we run retrieval on our input manifest and prepare input for SpellMapper inference. Note that we use index of custom vocabulary (file `index.txt` that we saved earlier)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8x-yT5WqfFz" + }, + "outputs": [], + "source": [ + "!python nemo/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py \\\n", + " --manifest ctc_baseline_transcript.json \\\n", + " --custom_vocab_index index.txt \\\n", + " --big_sample spellmapper_asr_customization_en/big_sample.txt \\\n", + " --short2full_name short2full.txt \\\n", + " --output_name spellmapper_input.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueq_JAPWGs_Y" + }, + "source": [ + "Run the inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zgkqiiZtJjcB" + }, + "outputs": [], + "source": [ + "!python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \\\n", + " pretrained_model=spellmapper_asr_customization_en/training_10m_5ep.nemo \\\n", + " model.max_sequence_len=512 \\\n", + " inference.from_file=spellmapper_input.txt \\\n", + " inference.out_file=spellmapper_output.txt \\\n", + " inference.batch_size=16 \\\n", + " lang=en\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RPQWJX8dFLfX" + }, + "source": [ + "Now we postprocess SpellMapper output and create output corrected manifest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3eFU515yKvXP" + }, + "outputs": [], + "source": [ + "!python nemo/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \\\n", + " --input_manifest ctc_baseline_transcript.json \\\n", + " --short2full_name short2full.txt \\\n", + " --output_manifest ctc_corrected_transcript.json \\\n", + " --spellmapper_result spellmapper_output.txt \\\n", + " --replace_hyphen_to_space \\\n", + " --field_name pred_text \\\n", + " --ngram_mappings \"\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hRoIhhGh17tp" + }, + "source": [ + "### Calculating WER of corrected transcript." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qIT957bGo9AY" + }, + "outputs": [], + "source": [ + "!python nemo/examples/asr/speech_to_text_eval.py \\\n", + " dataset_manifest=ctc_corrected_transcript.json \\\n", + " only_score_manifest=True\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NYXIPusupqOQ" + }, + "outputs": [], + "source": [ + "test_data = read_manifest(\"ctc_corrected_transcript.json\")\n", + "pred_text = [data['pred_text'] for data in test_data]\n", + "ref_text = [data['pred_text_before_correction'] for data in test_data]\n", + "\n", + "diff_vocab = Counter()\n", + "\n", + "for i in range(len(test_data)):\n", + " ref_sent = \" \" + ref_text[i] + \" \"\n", + " pred_sent = \" \" + pred_text[i] + \" \"\n", + "\n", + " pred_words = pred_sent.strip().split()\n", + " ref_words = ref_sent.strip().split()\n", + "\n", + " for tag, hyp_fragment, ref_fragment, i1, i2, j1, j2 in get_fragments(pred_words, ref_words):\n", + " if tag != \"equal\":\n", + " diff_vocab[(tag, hyp_fragment, ref_fragment)] += 1\n", + "\n", + "sum_ = 0\n", + "print(\"Corrected vs baseline\")\n", + "for k, v in diff_vocab.most_common(1000000):\n", + " sum_ += v\n", + " print(k, v, \"sum=\", sum_)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DJtXlqXbTD6M" + }, + "source": [ + "### Filtering by Dynamic Programming(DP) score\n", + "\n", + "What else can be done?\n", + "Given a fragment and its potential replacement, we can apply **dynamic programming** to find the most probable \"translation\" path between them. We will use the same n-gram mapping vocabulary, because its frequencies give us \"translation probability\" of each n-gram pair. The final path score can be calculated as maximum sum of log probabilities of matching n-grams along this path.\n", + "Let's look at an example. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "05Qf9wgHU_UR" + }, + "outputs": [], + "source": [ + "joint_vocab, orig_vocab, misspelled_vocab, max_len = load_ngram_mappings_for_dp(\"spellmapper_asr_customization_en/replacement_vocab_filt.txt\")\n", + "\n", + "fragment = \"and hydrod\"\n", + "replacement = \"anhydride\"\n", + "fragment_spaced = \" \".join(list(fragment.replace(\" \", \"_\")))\n", + "replacement_spaced = \" \".join(list(replacement.replace(\" \", \"_\")))\n", + "path = get_alignment_by_dp(\n", + " replacement_spaced,\n", + " fragment_spaced,\n", + " dp_data=(joint_vocab, orig_vocab, misspelled_vocab, max_len)\n", + ")\n", + "print(\"Dynamic Programming path:\")\n", + "for fragment_ngram, replacement_ngram, score, sum_score, joint_freq, orig_freq, misspelled_freq in path:\n", + " print(\n", + " \"\\t\",\n", + " \"frag=\",\n", + " fragment_ngram,\n", + " \"; repl=\",\n", + " replacement_ngram,\n", + " \"; score=\",\n", + " score,\n", + " \"; sum_score=\",\n", + " sum_score,\n", + " \"; joint_freq=\",\n", + " joint_freq,\n", + " \"; orig_freq=\",\n", + " orig_freq,\n", + " \"; misspelled_freq=\",\n", + " misspelled_freq,\n", + " )\n", + "\n", + "print(\"Final path score is in path[-1][3]: \", path[-1][3])\n", + "print(\"Dynamic programming(DP) score per symbol is final score divided by len(fragment): \", path[-1][3] / (len(fragment)))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hgfKPKckaLnc" + }, + "source": [ + "The idea is that we can skip replacements whose average DP score per symbol is below some predefined minimum, say -1.5.\n", + "Note that dynamic programming works slow because of quadratic complexity, but it allows to get rid of some false positives. Let's apply it on the same test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UhSXh7ht_JRn" + }, + "outputs": [], + "source": [ + "!python nemo/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \\\n", + " --input_manifest ctc_baseline_transcript.json \\\n", + " --short2full_name short2full.txt \\\n", + " --output_manifest ctc_corrected_transcript_dp.json \\\n", + " --spellmapper_result spellmapper_output.txt \\\n", + " --replace_hyphen_to_space \\\n", + " --field_name pred_text \\\n", + " --use_dp \\\n", + " --ngram_mappings spellmapper_asr_customization_en/replacement_vocab_filt.txt \\\n", + " --min_dp_score_per_symbol -1.5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u8R5YHB3vPC8" + }, + "outputs": [], + "source": [ + "!python nemo/examples/asr/speech_to_text_eval.py \\\n", + " dataset_manifest=ctc_corrected_transcript_dp.json \\\n", + " only_score_manifest=True" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "upvTbkFAeYtR" + }, + "source": [ + "# Final notes\n", + "1. Bash-script with example of inference pipeline [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/spellchecking_asr_customization/run_infer.sh)\n", + "\n", + "2. Check our paper: [SpellMapper: A non-autoregressive neural spellchecker for ASR customization with candidate retrieval based on n-gram mappings](https://arxiv.org/abs/2306.02317)\n", + "\n", + "3. To reproduce evaluation experiments from this paper see these scripts:\n", + " - [test_on_kensho.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n", + " - [test_on_userlibri.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n", + " - [test_on_spoken_wikipedia.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n", + "\n", + "4. To reproduce creation of training data see [README.md](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/README.md)\n", + "\n", + "5. To run training see [run_training.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/spellchecking_asr_customization/run_training.sh)\n", + "\n", + "6. Promising future research directions would be:\n", + " - add a simple trainable classifier on top of SpellMapper predictions instead of using multiple thresholds\n", + " - retrain with adding more various false positives to the training data" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 3296acd05919..e6df8b71c294 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index c3f95bff841a..3ac90f354fef 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.19.0'" + "BRANCH='r1.20.0'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 9b8007751f55..73438b590827 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.19.0'" + "BRANCH = 'r1.20.0'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index a1b0c4fd8561..b4a9315f9670 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/images/spellmapper_customization_vocabulary.png b/tutorials/nlp/images/spellmapper_customization_vocabulary.png new file mode 100644 index 0000000000000000000000000000000000000000..1ecd7ab5add501b7e2889142a4df67442318b161 GIT binary patch literal 39243 zcmd42WmHw&7e0Cb0g+M>kyNC+4j|niT>{eGDczt*cXvxj!=Y2@?oNS2!yyix|Bb%C zalhP;_ug^W7!3D5?mhQfbImp9T=RKWu!5Wf<}-q4AP@*s@}sB{2!y-}0wIt-MFf6v z!N0Wy{y}h1k`M-!4HNAEH&0B3WQ9PW%1HEEePrMs&F-VR0|#4|vaqG`uwQqVzUk6U{= z)BGz>ungr(3n^#Y`xI)&z$29}OIgv+A`my%y==!xm*yiR?s)~wABBQp29+-opDp=R& zE?yG})c8A!RAg2U)6EEngM$NNI^qqDmZ)d{_h#0@Y4)3;WH!^Hq9VBrUN_hx7vsQe zg?>k`EZOfNBtf;yjYM4nLPFc+=ARQg_4G)7sMy};bEaE^$%cl8Y&MG@81eo|YScLF zF^eGKa}UX$vS^?WN#S)n!AQlAjC4)I?5`m4I#sk`0XZV2%elZeWjS<21UQyIWh(l> zhd37vLQ*)aQ!ovO)3~%=|C8nzy&N{?c{ZhnicP}nbq(gZIh~M~PiF0!pHEptz#`_3 z8+ziqd^RC3Ff}=u#^<40syZ`0U6l;q8quvb8>=>-nEg*Tu68XrxeczzYhRI2TW?RK zV=}lL^3&6o;%Jn+BZvnWk$qct7Hr%2JkIwk4SQN|U_7jQ!-7cEfBWIfI5xqO(%s#S zgo;JP>v}Yt&VxfG=ji0*k(dS~;WAv!9_ zcbdVLTT(MJ?ty8OOXV0^Snz27jt0E;<~gHoYqQ4%OgfQqD2>ZusnLm-BvSLAwn4zEeK8bo1PJv6Ev&2-TfIDFWruL&d5@vEw+jbHx ze9R1-QdmeG6D5`RGWa_%D_?2(_*#mJLdcih_e!}OHY<|KoW^n(1|( z`EVOZetUO^$Dq+^@~}R;B*K8Y_X7omg}({D(_{N5 zH;cG!Jzslke-5(WGRJe+><7a9)zR18P5JJfFL_Q*4lpNu{}am7_(2@rfi2K{vFt%! z>3MZ>yxs#z1|2=Ox3fDsI(irU;|Dnh>c6qgk9DyS*t4Aj_Q>~e3m@UTAWB3AVHbCW z;*DeqJS;XiBqt~T&$g?x=?uYdJ&%3^|GPDmTIaZ<#E1?evGMlyo}Qi##3H7prv6j) zPw=la#?Ud(lRheAV`Hd^)kc5(qZBa)%~~s9yiG2L{O+gJz^DGUQb*aa{A))iZ}0n9 zdHx2^D|n+)G_KbZ*OmZFzm(v z1pFf6Pu8!-hd1FGrLAAqyC;7tOGFU$0m2psfPvF9FvtU5a&Ma9e}g3<7gyHS zmL(~WOYa{>1UYtQ3HjyArshxV03Nj-P|{;_je3LW;o}bk6~}~{8g@6pHInoEr>#z} zZnH9-7WdV5WNGOE$ymzAK@%HVT1u11r*n4!GXJ(OnJfh7n;~~BM4&8X5}(`SpUMRm zH5%dK9YEh68}BKMO}rzb?;G%y#Y}aGNwkEcp1_}Fa1e;B6!U(N#l|118Y*S77hn}Ua)kr8&fGhsPhlFVUE0Icm%Nz;*xr7Gi4 zPQL*}y~ReS;;~t#w%c|Fq&L(km;~VV*`w8t24i5}jw*zK-0E?H^|oh#Y~^~dyzP3I z?!*1Xtw^z2XwH0uIY0=RYE9Qz<(t&tCwLaY6G&VMtslM zn^VBM+V^28Q@?`l{_g8D2fQQT)di`hW@Zu-5{C8c)Q_HDZl>DC#KsB$9V6tlnO#_D zyIc!pYB?SMAsHKhf!|(=a5M_3s7T^=Dhdr9P?qNtGbt`Eo{Jg+Jp5fJj(o-5-+p~3 z0aEp>z)vbFU0q$;^>$wuEdu=gkx@{FGx$<9Q9z}G={y(mKGzdPD#axwCA~grKYsi$ z?1}g?0KBgE?orhs_x5NqiDjP$uXH(1pZnX23g&EuUIvh_^H2(VsI{K=-gN2Ysicfi^s*o-O9t?*u@<6*Mlr==SK2# z1D;g!cKz*x>t?d)tPH4LKu%6BI5?QR63}~55iJJ}ARh(e(K%!_K{q$OkGt^5j}zN& z{d?8{VMp)i>FH;MxK?}~?sj)A{ymNI_i4?zgoMq_&6v&@3h7zW?+97afdAhFRD~ni z+1;HX;FB@*1O)O$06lISw%viWd%nSeO7a#MgTz}fNE{AXs@zJnu)CQ$+KXYA*EMz?2CvY z+~uEMndY?+ukpJyFO4Uq?V>Qg1#C|X$UGYywmu@V5cra?*)H>*e7xUN$t-&eRiLcQ zT~wBofRHd>f!QmY-k)Pt_@0({BUxvam4rKOqZk8{Sm^8WGJqI&nlw?cqbVBwu@e~ih+gsd zC$(G>;CI1?={iGjqJ8`A+8n>YWh+hI!%&1a;~G54%`jntJbNJ&J$JSV(OS+i<(514 z$+rFZi?fomdURyw6ZnXLnu?10=g)&_T(Q9XlN|3uMl{sb)A_wL3Pg+z02A~sVd^of z<{}ifpFNj7ALHS?QrBeN;ZP5mE-0YaiN6Jbm-Q$v0=}jEq(4&R^VWUxzL;D6SXuja^G2Y`&oX59+ zjO9wi_5kZNARUnFdtW%xZRTivb!-*o|MqpG=nCBt-?7$fl!ef{b|q)edL!<{nGm_+ zLD^7@IBv8#(zt^~U98ACikWJ5@;L7jV;kNblyFZqnR>hnt855ZPdniDwxv6^aX|sD zt}2r>xs>6S5xq(S2RX zgcxP673?XJaBIbh;g~l;3tPvikdbe_=hZULwW%F*dsh(6%h%vG6;>CQAHmToB;OI`ryw@Z*`6sPg?W{Lz_~F-rO_WB1}k7Y3I|_EbV>xFg~-an383- zcb`g<(r;chF@997{fuJafhDh$rf+2S`5I$2`bU*ok9BX61H^+@L}d8IzVM{`{B-%Qsw zs?%r`Rm^CysXxWU5jnbG0haKFRSY8Mbvv^OJV%)WG~T&CtX^*4%)=>p2LL zqWKADeN(_j9s5fa5$C~>Clq36d? z1SnQ~>B9;Q4dwB=K22e_Y%DkYXl>)m{2^0rD1zbs{bWL)ryyqFd*Vc6W>Unjy1Nmd zbu|Zm4aI2bXcV(0n;N)>`5PIlxVdv6^k2^n5zXBX?+53PfU&mTLxxx|-)x|1H4nVgjWAdW&P4O?{m3EO}LW;?_He z7ECa|BeYC~od*=nZ04%EQhyr@`8WfC&DYmgg2W996bGPHpe$2=fB)mFTOS||S>$~T zAe>&+B(h6+tc>i&`yUw3d_Sq9S-E~agvcgTEm8}L9N~$YKG;6N!5GiK-HDyWXNdTUvTuGCw7X~ zJu!Rg)yJM`T)B$o{n@=p(l_7_8ZW1~gEqA?1(n5Y=biFF#MW?@t^2hH3Ic%vv+hes zo1GWoZ*aQ(mO%ZBM)K5i({lD_qt87Pg9b~?Z2p`^u0z6x_0jEiB2PVYSH2j>-l%BNu?3V_2@q!Kd;H+Q8hxH{o*m5clA}}s|IBsbMY?Au z2;&K`M<3l2k0c5IyP3PwcpE;p4XbiFJy71gv0;zgGfv2+>ytM=4AGt`h1=i`>m8*n z2Ynv?(XYH=Rac(>?OXF8-;6;-C{wD^5{H$d1HXVJUg|TA3)AQ?p5nGS@}1%w;J9cN ztMfv>#QG!#JHPX?nu6lb;4Y5Ao~)2C(hR$6PYz?Piks}Xf=VIG<3>)eLEIp!XTeA7 zuSWRa2lHXa(W*=rX}=uW*FX8rM$`b^ub>F1$^7tbN6QPB z!FX0}*oE7o;q$)1kS=U%PCRn*+YXl2uDb}8ybje06$nYV1Cu+++|QZhp1A*E}Dgh8x8m@ z`gCjX`exL;DLYg)LxY9Vk;`xRTK$3@Q?~WKi5*a9Y~vhuwJ%~G;uDSHFMOaMA@Tkw z*c&emC7dOz#b&Y;bxDCY2QJt5R{3FUyT@IhrR_@n%Y;@&hG(hf`Fs+{ zdhBV^%g){{!jZKn*`is}RnOGr#4Thl2b6w7qPX0g%XAP{CGq$5wk~o*bHx7j`AA`> z*gD;csD4@HrJ<8?6l`Lxu%dYeA@)eS7CP=GFDuGtsgPp*=%G zWtZNQ5IV-m=|ntOkXA1Puz`7O+umNaBYFe=(hd5#HK`Nl_z@|d0vgwnie3{cr9QCI zhE;iQS2OR;^b_@WuVvRYQ{L|GcpoDha41d=|E_lA zGyG-0&!#2+yN9XTgJihq91=YDt#WDc^G66BD|u=#`!U6s+wS~zNV~iXxwwA-q+V>+7fF@X9PPb+9ftagcVW|JYII{O21E&p91Q z$*oD6s=bKHaqjvuymD$rg(XK>%}FKmx3U$56@| z0+|^?>}Y=1gYukeD1&T%d3+1c+)qcp+vUL7eY}uRvoPX$@lxz`Rto-bX@L)#k{Mi|5*pYFmG2#tQ4%@Y9gZ;!*LkQ6rm2zzup3XXS0wdDxC9Ko8oJCFn!`lgOT-A9 z8sl&tT)a^KE3|@^%7N_$wMCA+g=%$DaH$z~S3S2>$HWHvwa~XPuz6fVAL+2ecj;JM zlP2laV80bpB>wp{J#)b#VM|u~D7n~&^U(26rL`K_)SGUN_Uw8jVS19Ezzbbrtor`8 zj&k9W7@IbJ_QmBl1YAx!KdDtmoN-)of+MFsmmQANEEyyAyWm0v-LcUeLkL0t-RN+MGl$*5Okp{am_` zk~**SeDyhoIFyo5d1+ZPpHtrn^n()0*#^{}t?IA=vZF<6=@WwvM50!;-C=4Dy_~+4 zceum$F#%o7gu#&*j&+2#cu~V*Xr8qc&MIn6yiGx;)>>8{E7?2EtI|F6jChq#l9+!^ zl=Q*I>}yAJ3eykkohdn3j8`)~=a42hOR8|VZ^S7_Q)Q*Y(Jt&?$~j=p6^$x+{(c1` zpY$~4aGt_{ox>SpIAG+H~ z^=XJ{=AcXPazo$5w(;I;UI`jJp4|TRrBv+iZafVOk$CQ1&`c`R&+qhRXhwOD8va8+ z>jPU!a_%J8_ z?7=Br(M{RyS#q_seAP6b6i#jm-3ctoSi|?oqF+kY%#(iFJTbr@u4jo9T}20HjY*Cv zI8d6bsJr+4vRylVw!^eSEyK=%1`ugIexb*UO$d}yrQWp*SC8lXrmQRbZBsQ+fT!VG8h?* zNy|TqS~%~_g%IyieEW^4iM9Y`@L4g{e!lu25MkCs)^>MJn7Ch2UtbS^3grOQ02CGe z7BJKKJe&s-7?uGf=Nhs*F;)4c66*2QlgCbdbS0ijlOe2tdgL^g$1?+ZlEp;!wg{T* z1mLItCCztvkEIgclK$9I4Twu3ig^A8tt^l4p)TWw1f!$9Mne96hfs-@;)dVc8)+!;eZ4OhFt{kI_^7lvCHHhFz)8s0m02ltcHT|5an zky5iYZCYrPx3(@Q&pbj6a0g>UGJWo^0H{Z!^gQKjK4mA_TZ`APUjsY*RrnS33)t`k z+6RE2PwL))uE~Mo;3I^=_UlwIjMCuHpR=BHj#|p4riff%@9jd;i@QsH&GUlm6}29n zmOU%gU&X7&@?t-Hh!{wQHqJ%F zXWy>WkFHHU5<`;g>Atd2`To*tRSZlQ`QbsW`lXhJk_4c|S=>k{{`34}

v%~iH zO23{k0f-+Wq=|m?B=jdUP?+zQc{sNzFkt}_F?Pz`{)>SY{+}I#5CZY zAX0!h|2@xMa3uE#KCKfZ=mEYRl}Hc7TmYCGA0J;^TLZxVj#5;R7t-G@<7S-?=tco!c^v;?d2L0G^d4Aae7Ole*7p5Ncje}V}jv3qj$uj zTM*Pw$-G7u&8Hi5(eP9&9<)|h-loS~iOMr;S+K9=f%>M=X`y7x7@Ta!D9p!%^$NQ1 zQ2NcKE9u5ynT_^NkRiRU8ZpVpNLleD&nK`riEUpxNLD#mLYg*CqAC3v06z(HLRh?3 zb%uS7HfJ!EJd~0HwSeDkz2tgnET>dInauT8m`XSHz+vm_&s$-rf5lcf9~uM9P4%#W z*nvu`TnMf{FlQqF?bdS-o@c}vttFQzF=ijI6C0&CA3THlr!_itx2?Nb1Xk%I;PUy? zAL^3})WFq9%|AsXFzeBG{cf<)AY8IsHI1Ptox?`yR>jFK9?d99OkV|_eT(JnA# z-Bg>~V)qtK0w`6A#jfyerMvXGC{xGNhn~5-Y(%Xq`B|!JloufF4~q^QJe&?|;*3kp zGbry31eNj*8mwEVc)dPvmGq{NQ zPLI6g_WX{vH~GM2&B}2~-5MRfHqT(qwXiRFRO52GRK$vNm0P&5LKoZ8HJ-pGlli?@ zfI9%nZ>JBMSJBdH29R7r%y0yt?4wISng;%^2^z(p{wAS&QG5DI^Y&zb31ELnN=dB& zD2(l2zRxw!Z^v3D{SAD-8^d%1zb4zp82YotB<-TI((>|_EXJXT=%c=no!0Ts{>dJP zr%6c!J+m!&yXasib?4LfjRH8b?n!%aX+tbbLyj)`VtN_0?2DoyEJ;S@LcTBBB@iH%}Yj&B&{r3Cd8!Wb-h2TXH%~8yohJ>8Um>_F^+`(m9tb*^2W@|XC z^-dUtwT$tUV+-dVFyKB}$P%R*H`~EOtwonrG-SrIeVkfY{8ZSQ}WG7Z? zSe%?o5q$I1MC8n0aj-CWSml;;!i|!r>VkKHC$uro6Da;+GH#&EPQ--W4|MB56;Gzf&#uY zfG!5Mup>*oouFAlpTxw(6cnB(qJvtV{RK%bT*-E(dYvfekkO%`FEhC_R`k#Wt; z9@Yb}Zyb3NBa~P3(2@RmOvLS1Tr7?Y8OStMlkdhv^Pl>;-Olj`_f^W|R#=DNIFd-$ z@MJJDQ%DYuM4Z1yOKL&ezcYY8OOEgk+j(`9vtun?+p8ojiil1*@y&KGJ*hf+xpsFY zWV!|M*lPmaMdD`GVc|7HWc&t|)HcOQy*BYPGsh)$b`fA>bIZnxD`slk!$@W7DRe9kx%e!e zzb}7lG5+#%Q|-OEEH|VQINL7Otb0WD00Ngg_Ft!a7`{H?B!dB_K8e|BI!8Vf^+=`9 zd9plqI9iEoZJ*Izee;}2{#cZ;uhK5=k`>L{u3{(uNi95?T3k_OqO#WVf7n{2om${X$aBDVgAb=wS1_9yW~(&dTMUQ-^|ahy`A38FED z{)z3ZXFk^3&nB`7koghl>W$D>Usi4Ks_#D}*(^A>dM}jHKI3A1o?6DoK%El2W5zkK zfc>Geala%boAzU3ESit?Dd9Qg`nO}z)v*_+bo3+CU`#$Yo3L{h@8YBVE}1{UjgG`E zbY8V)+PW}W4C%R2s`(kOCcOPJ_1z^j~%D64}Dq9~fAusCi9 zI*_bH&~$*M8FPsf!L*cGkj}0#nXjloVPnYEQ?tq#uky@{alLgeWY$-_FPOk&zKoHl zxHMg@FIhv3j@*Eaa4Q%OWmGv{%b4H^fz{@jxl=2su^ZrhIPcF2y+WquOM&^E~ZGe8V9F`<;Cw|m3y;y!~) zi||=KjOa9giFk2jaMAmb{f7f`C;8XS9UEp@MXjFNUBSGq9Sdjw1+*m*0A9eO^Qn3f zs@)*4mqACwd8*`Uk&yGlB+1JGUvyJMHEv@xVg}Su7Ia}>tx1qkE1g|PLOK36eYQ2+ z+9kZzP8Gpj!o~)+F_35~gI~4gL$Xx`Y78{%R=~y!g>SLnoFA+MW!s*=ERnnP-Ahsv zZjZGOCi9^LSY6F;67sSyW8Ne;{;fpZ#NA2ja%O54H0=cHyhOI9Yt1Sx|FEbP@F71? z+W}@j;h~t1?qScl3OGP?e*f_G zMujCm-jogLxqqH%8Lo@SAq{bMoX;<;mbi>#S26!kf*mJ9XWVTc90s*HTUhKtkS_#|`JK8{NDAN@}n8t#Bv+&G&1 z*lxdtXARuUO`F_ODtD-prm z;)Ea=DZ=-N($~Q(PFAq=-NQEmZ7>XO5KKOYv?K||3WSD^u!oPE*{`qjlPP<5j{9S) z;{IS?R-lAN>uQ;>-d1fY%-13^F166p_pnaf8d7$M?S0bZJb;c5S!wIMhz@pkm>-5% z$+5`}OY~_eMVkJ$zWJd1<)>IIEkoW^?O8v+a#AZwqJ0eKXxDmqd=kFGF5WH`V^`w} z#G;WHV#O!C{UX0QbzOj-GkAPEj^=pqIV>jYfyLDU;WQ4QR5jMtI-`SQaAG*HX2za@3}$se93nn=J=PZ5B$g6;g@c zW6H#4PVM%-KOuFE4Za zP9iVt0v_I{VxGD!ldc+%bI|nu>vA(S|9rVq3#l*6<|@>_?GC{Ml`ndKF0Y;!8MLzgEa8^9vHI8V97RBGy5FD)tQmamPq z>79vcf<^7O%f^(IiHbnZ^w#9gcNi@Hvzr@jgDV5wDH+SBnrIIzAF;!bTf%0`_x}`GoGEO!6^*(Ky zVBJSzCdYqOBT3KWdldkvY01t0TE@-c(Er@|bykl|`qB0%XS zxozH5|B;1DGtn)**q}1743z=EV41j|=wzP6NhJQ^pBu&Tij0Hl_)^u*L#3EL_2#IB z5GP@{`I`%N+6xi6Nl7JT&Ia&MyfnEPD=W=0l6TMFEC^)O3z5~Km-d2|LQHE)530M` zIUHToyOulOG2I7dC22LEMGW!DC@H7HxaBzBc+-vt$6?-{i1`_2>vU zqF20c!lj)*YiTX+OcczNsIytmgBgnl{*^aybar+R1|#)Bd?o)}BINgQF z0l+#2XuHQIKs6*2V6a4lhf=JdLl?ddd<$HRPxDoGOr;#L-X&hXq&!4}io|-UkZSUl%J zAMFt57jL_Gq>BS|l*pYr7J_tDl_*>s%6M+{oq%<4c5LEbnncN*6!cm@@@$>ja=>AH z8o7RfEg9I}G6hm?(bgQeY!pt=I>dJ90+yOV=9mybwLv{Mw9{jb)yr-MU5TztD2G1T zR$Jxf2Y;!{&i~F>H>_`?cZnmL@QL0L(L5bD8UU=}XV7olXWod-j(*O_0VIAFhqrD+($5JQCX^M42ijMm&_0s&rK^ zg%|yuS4&Zx4`?y-%n{^b)0?+*X!|wKb7<6{K14WG3Lkpt)Xn)xMza?8c}x>LJ_;w? z2Sab|dXgN1`^?a}1jdxE8wgoXq%G><9y&i~<6pN<4v*1M7KAHLvS5Zh`I*~dYXt9` znI6?Vf7wVyvva^PW#m7H*0PP4xNzZSeBTq@dS15bwmAZUS;>XX5S=4idkaLVN%({q zoGftbnCQl4VlzL;DQFu*2KVM6oe7;|oV+zxXXCa*D=OQsq7=XQ=mWef&~~B?`_lW! zToQ>e6!#v(j_sRHfQMKC0gW17>xPNP5}q5_rF?6k+FS^_Yd0|DjA;u>@V@WmZW0z& zQn6lqZ|NM+05Po5Xu9+tAHh6ZcUK{I=~7?K`0~tTQC6h!1e)g)w$)9a$HNo$jwk#x zjeaBJq!Dsc;BC_#3mFU*qU?NxUJDm?FMu*tH^8Ms#uGZrU3oImpI494#3nTM><(Cx zp=pey<%kW!hfV$IG3K08)TcFKS?3n`d`SksQC3_SWh{7B7)6?sX0-NgT;Sz56n)g7 zmCcdpuh4n*N(jr;E9(1!a7<1d8lAzjPh5LC7%O?^PBCb*s(MDR@g&WKhQ~uANX@m_ ziSA5Nu@h+QehL3fMvNuX?%oe2t>E-#i*jji#4riw()LUsfD)!izklvl8K`HszXd0I zcs_nT`wwx_d$(iDl0wJjgK0uYckETGS{T`@#1Rk>5Z$)jdEk#*gUhq6uWH^u>t$?W z8wFomhtvRgyj4vjn2936+s3MYH3S_sr@_=>;@&H$Z5IL+*X(9HK$j zO&0+oEOamH3s8(zKN0uIg&D|=!IWw=Udv)LrNnleYAs0ld3J=jaxAlSF>QpR;|97g zk8EIZc(f!f|5SZ(Xi6C~g@AwjL+@OOr@vUrg-SfOBUoyXUx`w41k18$=*R`LZ#qPH zTzUXQ&R$&~JkwIfn>+g9wKIi)%)|H*p4dRIWxvhzksey(FP4+NL3j6}L6>^64QYc1 z>4?ccLw4Ws18QZpy%OY%x_kz$hWHzggvC~!&!5j4G8qS^QuL4{GD>LwJFw-8P?FhY z1_p=_mCqlE5Vj62_yvy+5>^1wqov_@(CwY@RQUwl(WtOKED{ zbahrzjOEi6!OG8Hf&|N5$4w>pF#Wh6P0>-|A!H0}cBtXe7$T-@)#2GGo*ihyyLx(M zCFRryQhpHglF9;gg%rY-Wt70-X;+;MSN7ncj{*(d{0 zMq^`cEVcI(m+Y5})XjOH^0VIkLR*&Xp8i)EIfM--h$1}*z0tpXCt0EaBm;D^{ z9ssj6xQnVi{qN`I*V5=YaI0XL9uBs7!Tc`>=eCl!yCI*Hz640)yDROl(>-qp;Zuv4 ztXU9wWO52hOsW*&2~66I6vm_W&Pc7ba{iS(;60mIZTmGU+5K_$uGxpJ+$ zNMwXlebO*doB}=@S;AZ|5uH=x4jjKWY?!Pwu~XHPxR@|~G2gmlUNA;$`qZS{ch;V( zTwGskE>woK>F4!>8qPN)R@Hb$eQoUx)C2Iid>mdNWj-?VxV$)~Au! zufucK3S*|SC5-u+4ME$s*SeX%H+KlCP2*1%!;&g+k2gIu%uRU4wJ1_*!JY;Bbg~LS zVauPFI)tj)8e%Mi*Hmm-|3m4OQCB$}^M*6gG;4L2^ZG=iPHmY>N2|(T)gb_rU*S_c zaFC{^w2cUPrb2ABwTlv+hX;H`KNn8cXq)eJy zLf=YU&+|y(oT@x>xTwy&$h4$d4$LAQZ~2i>uYXi zHQQ>!X0!S+t<~;tUhJt&Z8Q_r6X>=B@WI_{h!L0UK7>E7qPg|l%3R+>s#8kd&ox{% znjCY4UD@gJ{PCY<@C=qpY~Q3>xl@hdksD@$0QYjKX^3O36{T(Ir9(EHs#*}*2YB~68e8ZF!lDe6Wo)p}>cp?I=; zr`7w{0h?=^g<6`XCJ++0VwdTvo5s7PuF}KJ9k_$;K)vhnYlz@THL23Z7x&syf%S8# zNo)=KFzH0^LHZ?=JtlePUCU%1!!_)r2JEWjsb}XE3{CaK#7xGU7>>|ia|{!y?@?zx z{}^r5JtIjO&Ojg`xT6)%Rkk__-9nMUl4Ro7Ea!h8o7Wu>fP6*&JT(89p(%U!ozI#h z3`^53KV9`x3PBn6yj+v_)S?Ny`kwHtV%}9~jxm+x-T=Ds0#0s+@EvdG+8p8lN7Ljg zddb;Fsd;&`sm8_p#CWwtUPEWzHS0jZDYDIl0!@*4SB+H_E^ik{2tiNgpbSmvuIGYc zq%?aE;Fw9cq?^a4-($CJ%kpfy%I49#zqm-*Jmg-4MI-(~YKpSJv3m z_sH5gWc)E1(rS;W%$XR*5$E6>Y|C4^bt(i}e(z~S}+;3#Y9LW$i zmKlrd5z~#i=`KTVqekL#F)JIPc)q0Pp5EX$nODH^-DF<8=!`uQCEO?j)>*5I7iL;( zVm?dKAP$rnr zb3Bc!I}by)ZmYkTn%aENTSv{_l`0WzWZe`a3q7QRVP}K(=3bsF$nVpcd+d{IH+&=9 z39(Ne!tCr!WfKUJ-M~w%Q9&&br{RQ;JKm{}_bMB<>n1cV^nkH3ZGt!ziC$bpB&E}| zhJ=7Y=|7b7uerIJBskQhpMYGcms+PpWexa8KCh z5&P&23qu=_qmhl;UWev3qt|&Hx_a@pG`4g^9GvbKsSiSaQqDEG*?KL5hxAiAv{*96(@q}XV@m5G%-^jFreSaIHOfZTRf;}v!#>qD{aQVv4T)QQGJ{ve z0q3HM(bcHFgV%ZpXRrsxbzY8G=8{F0-LPIXv@9C4i{05BmyYI&&Cu%v!)Hj23)IN` z=`}7QS>ZE8nnf<}; zMLv4IqsEe=OASMD+8^bx7<{Y^nrk(N9(879TMePPX(>g_S$fJkYuFRk4LA4HYvA2P zsw7|8n<>F=d$o|$5+cldYE_JZ?b=(l!92`kE#8h<5;y+GdZ+HP*H4oHOPcc#y|y5z zPvnSsam>fq_u|x!CoxjG=R@1KvkTfkH}Vv_M9Jxr(Xo|f`>`Khjs$32T2e$@UxeqS zqPi3|P219PT-12cm@4OdlP26bfq+X^_o_<_{7*dR+tBA-3EA20YCdcC?3TX*W%+#y z!8}Q^Fz#ktuW)i>Ro13_xAiQMWi(6pIp{PkpZhw;d^b}!87!x3YV_l5u&p$or#_m- z;*cdox#s%NQNG)O|N2iu*ZQf7rTFKTmyk;`7Ik-C*1C*=Ne2(Lldlr+UoyO_N{O9a zGD`G)VM$;)pBB#XK2nxAIjd5RC1D;LHE-f8AqK9r!67WUjs}^c=#*F6C*FfOop~ro z!h94)kzYKzyrztHZol$~xoxy?2>HbHeHCb$4j`9PSvP&k{7C)=&LOL2mfZuOF>pRT z6FpsGcmX&c*UME>TH4~(BtcgU9G(gX*lHZV|FYG*UTLdsh~AWa)LRf9BgqnZO;$UQ zs#!sP*x2W zDHAIA;$}j|g%v`C@R02~&ln?fnOJeI=a^w!sU@>)3jV>K>e% zuA7R$l=G9&<4k-3xRSTg^O@3R`4$1t9%Df9@jjlt%DZHGH#$u6m)nG#C zr$jTshe#PI2gW3@Y88?Gi9=}@d2LyB`V_gzywy0Ha>FN7uL3MIJ`E}L<`e8n#gYDR zl9qm(rP{w8=M43>?efW2*q0;^6D5o{Us>IlXTif5hDAdTgMos_N=` zIbUOGxNh)pHR1!Cg9L_h3z|m36M$oh!}_A4o&E820@vGQH5x z>+T7(a>q00a~d7ab*wn3xNTu2q!zHICNA?&P!;_AAr zpAaQj5~Ohw2=4A4v~dj{+`VxN!GgOq?(Ptv(Gc7lcXto&@Llpe@61%qS2g`7)zy73 zee0Zk_FC(=2U(A6h22M`2cwSA)k+*)TZ{{?7^DY%$c$s%#QX3xT3EB#*<8jmgHK2- zq){{Zxd8)49IZwpjwaTr@pxWQ&m{Jh_J>7OO%A<**8M8fbm@eb3do?2u(CUZ!y=^4x^>)dvoH+ai(@MS4u4D0%zi z-f3;Sy4akD*9*J4)pn-1ql$-Si)q5p4JFq(LX_iK_2`WGM#zOmY|NEC4arLK(hY(?MasD=c2mcCJ z2~k0Q{w1)qgcD>nw3Sy@CUV#kyWsP>o+dErKgNDbpAk3#YY>s=r8mLz+|ts65z_jn zem*{JKu*xw*tn~7RQh_dPR4ymnmbFYF3HgHCl3iFnbET=aG&0R{uP}kW zhBt=0Z6;9|r{|?{!iTP;)I}~Je)8#XAE=L1AuEGX_koJHBPR+xP!S`@k=px9iriDgBYshqnHF-OH3| z9n76$&rf$C(sjDyLk%Jh8|>G|$H!4oQGlKAyq|ff@wk`T>#9%ZyN^#(Q3zNC z91l&!FYP^0T!QBpt=RY#W8~VTZH(N6V-o=5#@ZC*54Pggah^j*@GJ z5LQEe&da8f^l|bt6Wi+M{yFBnA3Um^Q3#;sO-hD4?+oZK=AS|y9K4P>c~kzgWYbxj zy+2}_azK(!pK0F8DO$iBSzK;VfpCxoWmr^ftZ|<2)ppuot>aPff=YgOXl(U!RErlS z)FofO+~lZeQpD$2BOi|u(VRf+QWY!v=#(tbFroY9u=qc@SM3|hP;S*z36;%`YbkT; z8eK~Dq!JZSE^zeQp9V$pfinu29880Tm`l9)7~wySdw$PsHvL92oTl8Jol%dl1U+ye z!0GcxQuDZ#BaLc;VDaQgJqWs$&A3|3(Rn8J2-|8!pBzO?<=;zIMCg`0AQHLm7}Dz|(me)38^fxtL=KJUY;J8f2Ey8il03Gje_P z@&@&e(gYiNa>oWCV^e2^u*&tT>s7;DLItH;A)^|HA#TwQbS@XaM^&U`<~3fNoJQ0; zoGq&^gV!k@B%_!EyLj}(BE9UqN>8>dNxH(*&A%#5>R1$cO-Cyqc*737#exs$=@^WD z7dka5i7mhhk5scICg=#|Rb9=_il9Za+hvmiT1~KVPhE1XxPiOhkfh)jX|#9gakwP? zb38-&g>Q=LsXpzDbuj)8f1@(KcXs*pjHiBE5#Ff0a)bT6PrQtXQjA+l^sr9-Ny=yXS3oY8ib??31a@B;wjN?EDQy7wO`f1GQzSs-r zVY$1d!KJwk#HR9w-R3GNXRN({l7glyQ0~5KMeXN>f5E9tCO(=?$n>gzY=-~VXeS?H zrLayK4yM#gD{m*b_AyMA8LUb(z~y##*!(eJuVI?id)K6$;?~T9F*FA=qb>AA0jcPbc7}FfiD(JjfWIYbQAf}fatZSyq4qO)0fXRUZGt-q-A!0c+)C}WtnAP zebpshzuv6Swl*cXr=Gcav3$;cwd_@X;=wJfJ+Pr%Xd zNM`l-{Wx;6J2lJlIYF(DAv*XJ6eqM!n)zc(P^3hOTXR9Y62Ue(NptB_h;6!JfR)A z+hea0idHcTD(eBxzwMW=LVf4@oOZg|#~sElLrsxF>NAFmXn2Kd*VMVl;q^sktQVor z0z3g&MKcmAs!Gu~z=iPGEven;hqlpvJ75G*7-zea{CB6LKY#wr$jF#1Wo%n=`gyMs zo(sw!X`K5D4A{t@X=Iulv*CvXl?-?kU*w~kj`%5&t>0C0CM9Xem35TT`?RN+^9Es2~DgK_pB+;H| z)fWb8SEL8D$~qs$a-bIWs)n~ZjuYkP)zYe|_?0w1?zF(pVi*fD`+0U2Ki)Ab)Pq$` zdPF?|B_b|=rerGkRBe;8mycrIro(UM`g#V?ITXKdC6qB;t-p1Zx2L%@;F?uuzGh%H zfBSS2v#V}5I9@B4tfPqZRULbx)s2#eLK>IbVm8$JIX;ixdHzY>)@p9)W#nA;IFbt!Emo%mDLYPe*xi`m z=wddWp<4?vI(b@;5mgN&L*pl+Ia*J581dZ1AgfNt-8ty zq=@uyl#QWk6M<14Yl==aCA1<1YgBC!VgrG9r|qikVf|hYds#>i(eJB&C0a}TDdEj@ zvN+B<=|c)js-b1g=)FXmkK|lr*Q()F;u($-qr0TNtXZg-RvRV`8Zl~#Rvs~W-NcJ| ztW;lQH#@cVncvNuG(6Ke4#BOZUDlC-G};)$@dH_=qJ=AO=$u)3N3_&x^vY59Q@S7B ze91DZxIAh2$7mu&$J3&>){c92k>B`$06w~i=V4ABO*+VA(EKrhyL$H-9V+5(-ZRsG z%5dEv5=$l)@iJW&XY=D=jJn+9*u8o1xzcW8S5%Yjz25Jn7l`8!RT?kTZC6L%6y`fG zmz$b0)EzIXW-TYE#^^8c%>sFSik*$hxeeYOnDpJh2Y@G!LU#eNQx8a0OTxWl%E}UI|J$hYO z*}G`I`+ApFID!A3c-ns`sgsj8ubW{f$gnh^jaq-}X<%bAj8*K1J$o|t^5X8G znEDE%Bi)NH>uW`pM+{!rqfot((5lU+KgA>d@?PXY2~5F$^TwESct14(q03hy#R$su8Igqn*CBmgtC#M!mYH;NfF9 zyc`a93z&CM?n=;cTEBF$_9WOwFL)jbw)>UxcCW8np8Wah zf1>KKe>XPV7eI&I{_o4Lf<$J2*^HwZmD5D&o1!_Ef;(UHyDFYe)ST7wAD)*J&B7X84Ed zdkMs})i<(f)tl#7G>bRaqY$OtZa@V-mt;F(dSJC?9G&myFh>HPG$}K=9{$Q`p{AlD z4yRTr#%A`vOG9q@+Vj4?=3==mprPhr=`Ze-Zd%vY);3QzO}olypxV@^RNz%E0OfG_ z$mn8u_VL4?xrEgBWIIUja8-m&)i3cFuPl|j(^hb9bS3i@7b)p^LI5OGgJB<*^~?Ad zcWP(bJ5zOEj++FU3F?YV?fAfEM5XE9E9pN;c5hBx(dgXRz|18(@;tvE8!Z2JE&(C} z+p`VJa%*dbI5D1a83TwI{%i6|O1gKhK77W#(a?+(o>5s`CuL7k@^3irD6egbnww_T z3!|`b@fp(l#l-_CZ(okOQL}7Oi$#OzQ2O;+f-c!s{E&Fs=}`4R8Xjp^FZ<(b zLaT;_pcd;gY4ML{DM(=5dt8u@VU2HcjSM}6`8Y$8Pu=NoVPHVmtJ5>TdF?u3Y@kko zNFY1{ll3m|QcN@NO!^VJWM@|?{^{E5;qbv$4GzA)@On@!P>KTSE&uk@oOmY8n;3>e z`(|Dy+&2Y#f=8SgCQyUbaP60C6rWl$H5~s4WKi4+Q*cVT_<2{Nha8ZhFeM&Z9}aY* zro)%Wz>_q!Uzi?tg=D|4b@heL7movf{^XMF!Om4k5yN zrNX~*b-bij{~ndH$=iu>o{;Mxut`3;;P~^{q~O%B08+H=JQ2s{+4>YB zbB9(%mj=TFbA?%2tzLSJ9pjVg8dM&Z9UhQG%Y@g@uRlYZ2LImj?+L2Q#vff&q+5UB zj+MH6*Rr`H{=EnF^s|QDEe-Qxu>|zQnoND zYdoU#oFuX51j12C)Y>xc#mQ1v<{nsjWBkDN@oir_)T0Ar;%ih#lafaoS&E0v-~ZZT zSLm;SFT_k{1ML$i!)Al#lvLtOE&8F5;wdtTt69fxRumMJ8UGs`E0|3y1~~H5kFW#^Vqp?{850RXTwIpI@73Qn=T|k( zc*&1L^=DXe{AhxEs3vV@$0xek1Bw?#Ms7dQGd3|!n3O7y4iczQIkxS+J8s{t5rJ!= z;(=rpqp<$iiKBA*-KLN62g?ToIJ$J}ddGcSm(L-&hhOzs^2LBmG8(8k#W%1}PAQ>% zn8Wugr_OJdr3?!C08WBEF~i>9{8uASQPz!g8UTzM60?-XTiz$B9-prdKD^^qatwhS ztQ`Tp;#G2P+R%7&N^jMb1!+E758UuL-%jQ%t9WBy9{Nu&pghxqY;w{CW|-rdR<&tuqXhIpGs<44f~$*_ zkMk>g*JCJa7Fco$!<$p%ykF z1@ImEwoQ#i=QrG}fL50%nx6E?1YMk#hW+PqiV8Wc1j2Rl8eQ9zSGXM?A8|9@o2PE(IZ&__*M3q0 zz}QdVYLCw|@DkAYpN^h%y#KI3H|8&I^EnC)9Jb`u2mnUillfQw0>2ro*Fq4z5zLZD zotG)-0`jRTu=Sb@yUkcv*q%JmLi+bzCT(>EL_L$5y~*s8&c0rNLlX7hZv?*dzo_3Y zt9Q!qi))b)c^o2>jOS@37uece`*wyLG+!3UG{q367G(bzDCLo1H?QQ=mKueIKb>n8MxVv*-WDBz`9A9Tt_5cbF5esPU)E;RT81Ka><@oRv+z1;aM?Q#5U-4H z)^L#1cgMh=7ZDlXnPi6ASyi1`YrS`DdEvHmP>3Yi$kUBD9cL4z@si;e;7xw#&8cqH z>{kj9zqB_}HD`VqqPztX9Cn&dIaTYf_+I~Wli!{WZ1lu;%8f-L<%r&OX;R-+2TIjY znW3~{vCB;Mb{kAT=e7?``i~ZFDL54Jj~0$<6t}Is25Q=pFaLN^d5z2KR&hM$5AO%@ zk2sEZ%Yo2(33%Qt%ywVR$oM4aGT0Cri{FQd9n=U zp|;1w;hSM8Z21-nE-uvQ!WoV_r3~FFs-q4D+2{JAl_#P!SYj?9tBZxBYiHNN1>G)3 zSI2Q0ql{FGO_+pjU-h`TXQ|qW{aN9v?QdHQlbW{aV*7#Mx;E zRAW0rIGXgUo-6~I^g5U?&+^D78&`BY>=`Cx;DfruxCV}N%z!=ga1bMG={TP;nyqe%F{`(F0s*VzLAEN*|@XxZ}0!JFYi}n18p9wA=xd zbiB_Rq|`8E=Pwk|391X4+*e`0xL^H(i{JDVb$=>TSu6{#q0j+yB4vH|v%)x&Fr@`G zT$SE`VHM$&b;Ru&Ttp+^-AI3J-frOLs-RVbmd^8fAsx0R?|JYwRQDANiT2&MQiSMZ z1W;W8(3lDvvxLEW*68ftEXML^H)xjVZgQ6NBL8_iLK!UtBu(8HPfssCl7V2T{+IIJ+sZYRCu_Pf^Zg=n08$5gi0HCK=Hf*%A{)xug7m1n5-m`v$L z+bUrE(Vk&X3GEW+$}tfk?5*24?}+Vb30MP&=lw>y!_8@oe{_`Ud3t!NClsv~;@68N z`c3K6UctME_>C}lMkY^6C?{&1JockL;RT-8oXIzq;JdieQ@ zZpqC-70#V?cpZ@kgCA$>vxg0x{)SZmo7+;-nmD_8RIE;eAgc;Rb<6;SJ!#`Mw?xd0 zvatF1wMZzGE3N0Suak4&YDwolUZ%Jn((PpZo2MYXz{EOoDr+mYDSVbHY4@EGlBI2G&h`*Ok;7Q)u!~kY-%Q+D*2q3Wg_DvO_T;&((Vdmb?}70T`mH_sgziJ27_I z#Vp~J_E&x@<0P#9A2aw6|6R!gzJ2W{#xugm+v#%)le6a~hZ8hxo$R9O6!3WM%dv{M zgl-<+WHvaE(}QBQRjMkm39aX1jufopn*wh6-ztv%ETpw_+@lDVP~7VAD%^#tJu#WR zvg{Y1txk@ylw_V4-NtBGS;JVbr7R%*&irKw9eq84ApY2?gDXiqaS%QETs3e}}j_4u%q}p-p4=;vAzc zvJUaf0n9}jk#|Xpk#b7+{`jl+8{ZXb0Rxi7!M8%e3i4Eo;F#&5T$f85@T_v+AzVx0 z?C&W6*ot|OFHb<~axt#E0h>V-_eFyPD2bO=3Vg~Q8`1cd??g%*2>E0B4kfTwx4fbG z!IQ{t2)X$hq2nOfr`7nNWKE}sx$#$_KJ-4tdwW}^!>kAQcBNTu96^OQOGUhk6*w>pyy$bmm#Vw zB~RU9pCBGAN*Ql2of9IhR_ALWnPfd>7nIo<#$uM4Baay&Mil5{3oXzUmS(#CJJ=+> z#f*b~=nH7cO7`M;mm^8M$}dfFYt4)mU$fy4EgRwR0DLIdNn7u}g6WW*f zj1M4k%{<6Ut(DlSS*vjg!q$@hy~!_KK?B18c%zD7iSh14B$v9{>5K#miudP-nojTe zCi&ki5X*z!I32(MzQZW`MCuW!6oG%xVdb;O!UM~3CJU}RP8dzUq9I|5pyqT#UEiGL zk}S;NYB|%b32^bw9^A9#l4y&v@8yOhBimt^642xQ5@2##=(kmhmhx=P797>g+qS3N z-w2@!N?hf-S>XK{g5NQ>u36mT91J}~AlXYu7(TbuA!zK}kgMILBX3lT_~mHVzYL?z z^r06q6~V)x<-b_W=R6ylH!yq08cw*LguTN;EiZX6HF#AY{Pqy^>Catftx9v=O5+FY?JcoJhd7d>=OOC#B!hmd!piBkPkYUcy6gO(}Tm zgEXNArD)Nul#KlQp{s!=v=b34y~@bd{s)uXqQnms1hRkJh!{!ZRpH!lWeO&W-l15e zA`|-cXtAd)XCLg{Tal0RSL#z3!A`3$n|iia6ZMDh{u1xuM649=v%}7tBLRCYO#LUW zp|)s9OY~|bODxo^er*2`_%*YwjR6v?D&gsMq!$F45%x9%kAWQAaqtmQZ7yzz`h-6$ z=FNPx6G0w57NK}vP>G^dToA~=9r!&XmHSSZ7vkPLBs40T)_lOdnmrJ5^$&qMAV3w+8=={t2x6aiT_Rwxq^G8D&jbhbqcN zNK`3iMP*s`0=sQWn#LO%y7jc#Iv+$>-I7^@w2nR^`5jaQxe*xyS)E{f5vkouKLcEp zH6(@)C_j__P(@?cT6{;3UYe3}jzyWKA+(juenb$>A9~>+JCoJXngiiA4UhUU7IHb*=<_y1p91H-`wg@f8c^fVY5IHNAi}K4!encW)+6NxlRw- zNpM~n#y{wfxn9Z)y2MLgWUG{$@d7>bliw=sOfS?hB0UqR#dXc{FBRO~xoX3nrZ7>q zvujB_LOimkexA+DAjMXGp?|6sy|w-}E4ailyQ_VSqjP5{s-A}1_1zs(3>k0#*;Ni> zfWM1X|3;iLR<3Wvcf}b1n2XRh^@EHW&bkpXQTJ)DSKZ}wOYX6Ma++(;DSf9v zL5A=E^$k#<4pVf{pI`4}apj5F&AqyIEu$&yL17g_Z`g+8`&Z(1vRgmO(*i^-pt zYC{@-tqyGWk6`(1zH}{k!?U2nHg4+dF&x>KQ(HXOxakptQ~U#Jz+!Ou=SUuUU&s=# z9NZ;4G8V;Z9gDzVM%O&?38N zDAwnw_S-$6run8ZP0v=#&=IV)?&(lERp{~X^-bjmTQ8&Pa9-87!^dBymocsrJFwZE zpOOX*_J(Q61Px@pJfR^E60Yr2z=~K0h$Nzs`86)NUiBnOUUC0ntQ6swZ1AjCN!O4_ z{iaQ#ICi5?ndXQ+^McR>=1gPa+~YtKV|l8Ai9|aikt| zYVbSUpiD=juA4xzs1ijUus4TAfCUnaE_=<3J**B2r#knLIvlW8*zpjvul!FE=Ka z#7VTTB|#r`v(qJxOC(YTBkQdm(hXMSOFtQ~m}}2JDq1*GAjnW{R$97~&$l_CKr+pQ zT834p)}L#k`6X+gJ|6s@#|w~G;cX(4p734%s`aPDZjdI=`lF;__|lo<$YL@#FB&QJ z_|i$;ldG~uOM>tMN1Gl>ns6xb$`n9I2UT^)ml!llgkl4313qP%p{d9$hA(C}H>5R6 z&BV0=+EUY!7D;$wh(>S{o1*iW1nsdD`e{{x(%ny0B`{?BT;?nfV7*UpuUOo=k;nVn zTwHud*Av}yY*6tqddoXlkuu~ z4v(!pRhUedbPFIfkUR$Q+j5_oP^4zX#6Wx1Yx#C9PW%LY?CjTa^{jILMRed1E>4Kh zIKI=Uy*{uWTG~i#q?PKT5j!w zW7TG9YDt}&M^5*Xa4uWexs6ai_vA%x1!)j2`*38c3FX0YKtYpKaYRCc%}OAMNf%U~ z${d|>B|5&KTVuv6uh`1MeiB=)ImJrS)f1BIvZAmnIqbUk8UrKtcJlJJO0*>l5zEit z9{`?$e)4$(bue0dAfJTB)`5@xf-Cvv`;^wIVDZHRa`=WXw``3DMQpRw{oH;otP6hy z$>Q>*PD-4f6F(c~DgVlZPC?^UlpNXe_D8mSn!YeZ5qWpnb940}g-ToFY9t-lwCdK{ z;GWXXns{v|R$2H&Wo;MzK3;p$ z)zkk&=1A>DZz3BPInX$E=1`*xn#U1wBEqkv%knR1BmN^vNjD^;Nx7`0KoNu)1diG7 z{S2*Vu#J>2ZXfB0E}f8k@zZRpeQDs`=s@nL_uN_M{d);=x9h|{>MY0rk+C$>Mx#0{ zHg3VRqSMY(PA|3o4{r}IF$+toNr_Y>XxEs(IQMe~hZg$?YsDXN*K$GjP7n0I;2Sh8 z)pj?|8Cn$C*&I0~G;*$Ev?egSIUyXaf=pZkhj_F6kcckr)`}aA&#~ z_jG*Z%~Z6jYiSr|$xyUW2d8m8{?gBGUb|j?-#5DU%6f*si?p5N=97p=q9GIc+_INW zd35@0-H)5?1&)gU@?4Wg5|6)2LV<{IRg49(?4VJnB?AL@bzQSIfqv=xgRjlq+{EiP zFRCVke(whg(sjCd0QRmXitmKo>6p!6iVIB#)AQU9!zSKCj!ac;VvEeSM81E;AMV0x z)tnwub-+w>7mRcYay(f=@Y*z+;>+JK8qz8_>A3g14g@=hpXy!{XVbnrpEL82AxDc2 z(ksQCtz6g>Nqh=6A3EF4*#YX1j_KL=!c8StF*RiD{)F01jU*oDz06@x^I( zofWhf{<@11-~?CHV7D@mcKjk8G~-g&uK5CM)ZWC@vMZ!Lw)W7bZ)-j!UDCLoQ#0Ng zcV!uTd*A*!n9sb7`BAH50J~9$?yP@G-bix%PK1GJz;#IsdHMH|8~Q^trA~5{dlXEj ztlld_=aFVXngM|e{QL+d=uYpLPrRF!B6ur|UP7=HSd;yIzrO}518o{G*dN54m3c4v zid%{Cp_>tK?Mvn^QjSyOv3-e!;4SSFzDe#q2EB0IPL7&Ews2cWL9mkih`1OT#S7P zBH`CpW&FDRK~18pK3TC}3V7nz2&wU(N0plUkNqc?jk!dSHFS4R(yOIxognZqPG z3yG;J!YDYeD4cDJ63(_|{SNYLqpBNpw`$+JUGw=_G|X}W!91U7*-KWd zXND63JEwry5wF77+1U=sc81JrI1k0&QlSMn$J~QP-C9%#4-d`QU5*VhqFFfj9>i=v zBP*h82y(>$ZoaZqvo)=n)$X740I27O_&4nYaos)t45bzOSrrH312o` zl-HXU7S}>iS;bH<`s`8exO*bI*iKgtH}uJk4CfGRcN#ttjJN)FO9dYEw{f-d<48+SF)CP@fapZ# z2oh%HR;*s`(L2(ztuFyg8-I)fjEE>@!jH`t>NfxG4OZT;G=D(-1OAFQ8t-dOt4)~V z`Q;-R#frC4jy!mvN-I|DeDQQ^ow)P@%WWwA!eHl_Ez-cfsG;tDym|(o)C6es85ljA zL0b@zyn7u3X4DxtdE5S*MpL@B$Jzf6jHZzB-Ovnw(_VL&N&wEXe%I+c{UGBeo*W@9 zaqSNkq4ykG-HMgey_Leoi$x_PpCwkF0(OKiZIz7l(0bzW=)R}lQdM2tf1JgAoah3> zu1e+dv?Z;pzRD5+1@B@_E+zO7dnm@yt7493Q!{JN-D>v*<4{NDTK-m~Rs0e)Y*C!*f?ZIjB^+C~xE&Hx54*UQt0DSG#@F>_~Jz3%d zCK-~5hI}=Cdi~mRc+hgaH^lIux#Eh-@|jk6j)caEn)x{7A2xo?@cAsOgLfk<6@rfB}X>gbVeR;UzJSF`ScSzLs$6-@?l_w z5Fcs87k=M;j6-tKwgtt<0t@Z?9kQ-|GXUvbB*|f57XNIzp7c!9FM4}9lI(R#T<2O^ zUOuHKzg{ww{+0be@2N|11)t#qX&{gJsG#E4k}hbDX1$rO`lxLr&hpNEzGbs3w5>nrS{) zsRGI24vwR{OVAz#x;;BZD&`Yp3@^a>Ya3@k;D;?~v6sGH@=A5!l}XLiW7z-ch*<%V z8M_#xxoB!pgM%&WNla~6!ugertu21j4shk5$bn>N)ds%aru+N(^zK_fGhw z`FRDvm;}A};45D2G*cr^k@wZ6V!E8FZ`I6-)4*jI&!1dZWH_QDbjgjqbBL7C(cstQ zO8}EGgJyU6Rjo|fTT)F4l&a6Z?py96TG@+f7W34ysu=ClE}jt72Bns6`Dg~@1hB1X ztDq4bZWxG+YcAXidhf3Ycbc>HW-^gq_pxtkH{XptXWvBpt?{#RUgThK8ZoxSt#<65 znMn~}gS5!NiluZMv>sPov2jlHA+1FJ$a)s0*;Q>&RyzN{c$xX?eI#*3j z0~p0i{Ti3x!=iikIa`%xS;f|pArs#$f zn#;%jiD!1D@Ljp}er9!CfeH;P%~?SgMd9}UIMaVk?5(k0$Hs3uw>!e*F)EemkJE@-5&huYh9D>m8|{A#c{>`m*t zV^$5xW68UHaIZeaYQWvklsZUtgjt3B$!b*Q@AE)nz5@con@wz~^||LdZjA~$^m%qS ztvRsJG0lKR5_7Y2S3n?EAyCWt!ke0D#%_^sro6sBml~|d-1E|W0`6tl$9GJ2LGTk# zC>#@K@A~N$DjAjqg(WU|Y<{ngGBlf~nRT3+IR^_;!O!F5qaiJ8o3R3QA&L~H*2xJ? z;R?eA1bj{i2HENgV_T3|VqYbRf!^C4ge?%Hkj1*_GQyFkntn{4TBk@RM?~y?@#6A@R$Yq@9I2*H8A))iw zc-7xhBa5>~E2^$|ynSVg&oUaWllIXC%Shf;t14bz4wi#bZfgmML_t%Dg^^zP2lb!S z#uYSEm;#I6rYV=cn+on=?lgTvJfETyLcf7^f%NFLY4UOR2cjev-4P%Ea@!$zd7#u3 zu)N4CdOQ}V5lk=7piG*~_SJw&objEOzQlaS|Acm*(TA10k{TU}=~bRO8(8ti7c?~T zZsWragCy17nq;9#AyVhXF!lw6QH&DV>UNtv3XQ7B_-I(oe`q%ngNQ?2_?cGFLaW5oOT9kHko3vAB48bBl zTUq)Yg;KH_ErfZ&o^z>%;&({$ciS$QVYd_b9UyVZxRt&*_~JAuGjxvtGzzi_s|Ww#08C;WnZ8A6q{Ov@=FCf}h8odqP%Zk+y5r z6|r89?p_AY+&ooHbr4N1QHkFeenEFv$S{*iOgf#1mblZ)ubGyfeaS~DMyG^tG$F4; z;cl4#l%9=V!0gf|u3ZC3?89(GV8aLiy5{4Zbujcb4tj>o8;Q?~BtnZ}{t<{GcHCA?A9B1Cl+ zg-}A4Zu0I|f6bDtYY^Dhc~wV?&s)lHeNu`VHW&Ub*hk!DqYlaZQ{z-Z$+g0sbjL2H6BuwUDq*Wi8 z3oZBS-jq!>P*?Eqhg!?+1*%4_baq`$V`Dg3u5hVZ)>FU*U^U$&?P|osd$n9+3t~VMm{d}? zx&0|nI`s8g)?K`lwxJeA->)V`)U?uV?r+M$1* zhf3D?q*9fZh$?@emVtY$)$u{WLYc55#-@_}s0zx;M)rJ_BN00>qhgv%GCLDuoyYzo_hnm}N!OnPE&H{AK0E|w zOP`>cIQCG`kQ@Jq-Jf>dPDR5*%$>7}>HA(@>`Ji2_>ncnxEc7l8NO8l)?w+T5izY$gbN!=X>H9u8! zdyY^)oVLSOReJt@ezWY`Vtt=Q^VgRyYbv{*TtNrAQz{IRDf?x>L}+22o4u;Fqkfzk znRibFHp>}tH8e&|xPcpkfLg}{tUc`WI%k9%wmT)QPDA4&>s{?@6WlfC1A9%EIN6>4PO-){?r5nRFVCx&In=y%VaajIUJ1X>wx7Y%{uIaODUB5dGb`$~ z-TKCGQGaarn7}vvo_Urp-3j4$Q0ri0aCn^SJ@^I~^S8uzdp>KuHrn?4x88=-_4~(} zt*+09&R!f3fZ3hu9w;oo=t~f4^`{pvIkyu zG_JVr*3_&=hKCdEy`$K_&`-Kw>;Xb^zh{)OWs={_T28Ud~;bNu_<#*iH}5a)jDT6 zGT@8f2HY_Pe4T0HPH(bw)dg@z8B)&diX8X}MJADW6pcAto%tbYjER8nG8~8bF|NEb zUi3IAqzf2Gyba-c4vgh};yNYXWm>egA+u8g5zgyTaLXz+-WW{c3?rny0kTb{?*El- z(xlOwx%|b50Z94$Iu)zztfK_iUuzq@4#*c7>}#zinG_WlS(R)k(Px4eZ7IV_&!e^C zdsyHujeLcAjDyfr%|;`4G?$`^>(r7ANJ};Dev;!&!}TC*HZJRS><1>!pS(vH^LgPA z1R)kU8s%!qkA#g@=<9{(_x$xBZOhOJ4i)obqC58hQ!AkB0h13z0s zsVF)k-7}lFuJ4ZL+2{^WdY))KSEt&P&bQ8|&(b~zJ2f5#=GX#r*T13aB}oti14Kgb zMRa)BSx$=%1-^N&cZT9&VeOxOLJ*KANsVhZ5+vZP*JT%>O>Isf{8Q5*m&Zw|fV89j z7zHA1>@-xU7ggI6^s3jGbHQsWsgqjp{dK0;6RP=pR%F)v)XBMp1=9mU!4p46&I8)& zzqR5UG~=6(+tFNml}9yQhc@r^$F?lT@@&{`(93c-)$H!27u?G0V#(jEUTi_NeSp6` zyj=epKHa=uv>~)m+K~LmNDx7-%0uz8XshR_do9>uPX;2L0~0~g;k9CO9Ia_R=|Ekj z*!jSlIqIuSpKuO5|3qK7i$7Vw;g5cE%d@UjH<)y37rs{dzsZh4?Rmt zD)ag4;++kBGdVIp5iEP=yWe+RCKeV6vYJ4vBRzV`C<+**tZS_M6`R=z*`@LR&H*@} zxh?>4#pzb|r!#EM&CFVA{;y9H<_ZIVJ-9*pu;nWd!=38`RN!#H0@5E?#jjmo75BdX z)@0M2-Dl8b>nKSZPu5hXX`{m7$ER6dhS?dTB)4NfuGaphB!UgQT7z3eyW4oum@lz;+f-xMM$38NNn0D4*=a3h4@ct@N!nfh z^X6)>ARDQtlv60VVY&Tvo{GnA0xqNJKELl;KA1mBI(dM{>7310NqJtcp4~8L$)=LmUEyIsb}KYV^lb_(_zJ& zU&ETcFHplcRnfU7t}z)B%*Jh^EWt||u z1%@%x9d)iaMzP*I1S<;;ul<%=WmCE$?ea`BI#BEcS_-#=a>zvvg2WO=^stkQ)w|~S zbGn%=UP0%gnk{kKj#3wKVvisVA8f8kLEH8lAH!%rsn7cFs|JtuqbB#arZab3T}Le1 z6O;oD4u@V08fJ#-7h}=eQ}e9gu}(TquI3tHU9wr*C!hKL8V?QwIX6ywFB6ly&?sO! zTbvROLEc+@ge1Zk2Och8p_ZOCf}bF=@4VGN`$jUv8mfbqv26s|3!= zCDi~2E%S@C^hNL>4ejiXKE5*~jQn-l>qy-W>Hla8M75Nm z>$mCcJPyll@7tu+=zh%Kq=vKVWYMVLooee7#6%L*3#6B*P|(d2njK4wa`Fsc3pr>P zr`?q&zV};!%!^Nch7$8Szp1U$abBxk^jwWfiBC?x9aq)4y1cA|!5-&*@Avli(@;~N z0AOMUxmzF?s#r9xtE(%N5dqAbPPN+^EAYBMNkc_NHJ>P6{x4JaP1LTw0(-U2ZW5$2 zzXWWbXGF!LZK_OU`s6=BoANWke*#G|VDRcjLp&R~9Cp+u)YC3tC|Xm6KU)&3zSF$n zh(btMxM%fXzF~U$73AT}VltDcXRZvkmKOL2QI0`@gf6blw zKikcW^VrmdxDidsu8jfUx>F0FOiixx>xYDzWs zQ2Q=TF{&uBM#WM~go+^dF?a60|HA$Ll90TTe9w8#bDnd)-_PfrD_arlV0BgkdcNxD%(#HA-mSm z(P2Y0^YQX}{3%gr{~%Hy*0Y>lh9)t}%ZSR0j^+YACn%caBzg6qtFs%&P2GXvUc4OY z#??KMWZu_&7TLOSf?~Z@HiRJzKWOo2iBsUUVz=yad&oy55hW9#4RXP%c6E3nUSNV| z+~Q4hx;^HHsEq&=h_4{<47r79Q1^3$4X#OyNh-&mE04grlM7`(WQB=JKS4g>TvWAD zsQa4cJ3<}_W1f#p-^o(nk&(B5=3NtM;Asw3hUwFctFrXr+4eah<+GHfb(~B*|I7wO zVTMuQBbp#W_tJ$6x4CQ_8jZd>7{0IZ5lXBOYWZQbh5I-B{8*J{rE zJigkn>z74*kf+n>hvh!}nLi4|UG?M5*1zX99{b}@#BZ0um|H_z8)fL-51f0drYxy{ zWKjwN!ew%DlG)wz2xTfOEZpQl3Ne6-MO|kqm8>i`w!YQMME|62lh$ACjs#5tsoZa* z2K-dU;sM~>V~eHZ>Jc~}pK~Y;4UO5Enf`-NAouq6x_4%%iUDoxRA`BEpD%n|wGa^L zhyOGu1$JCVBqrV>X)KxqtA6+?pIfvT9b}Q9*=j? z=uY6{nWvx7@7;5057690{Pl235DE+5H%JFakgd)@!_{ZDdJ_3~2Wg4#K>u|p{-wHaaWTU5OBdxTGm#)-;oNBDci_N* zfy=BQPzy0B;%?6!@Afg7%o6W`ACpdgWVB2ifaM}hH>lqgAF*j9#V zn7+IujB>^ptVj4Eqx@5X&pd>)cZy3VwEnrT-0qn$K4pBC*p^jXtY<7v*cIkT*j$I% z9M6Dh!xlxv&-?&lnV?ObK@M+YthTs#=NluW-Ob-$cQ9JhUO29^^N=tU-@zAe0gopa8Ph?hOUcX z@=ikgE4!>VHv$37!>i+BlUJr?C8cC#J>1>xuU>tuD_LjB+IC5hlvItL$_V>Fr^nJC zM$t1*JZ>|1Xq7ck=5qid&GF_)4S`wdzp=Ko&5DM2^$+LsMt{zQPlHIi}o#@YyfDIB77gov(Q|3XZZ*g3R{{3Vol*Yox2cG(i$EZ0o6@odL5&&sHF&E@m_Sb!Q7F_a zeA?JAl@WV>n-3lz>TI=TWS9{u)6*5;oF#~z>hU1*yAHx6O5si_2j{Lhq*P{V5t4%T z8wU6UjYb0ux%#>1O*N^oJ5KBD?4;9GqxoDS-z98MRFquU$ghr$gw?r$R6X}3za55( z2vpZI4f`@*#_7F4gYvM&jBd_{)E-Xm$^F3JV5ZdDE{gtZ|4;}Cy^0rjqt8>d-)qv zK=pV$a26q(u=;f{1c(MnqN1X|*4#CEA%-;a^IMoAMr5dfXU}a+<-9(^gp`w$v$htR z6bc3x-rnB*XJzXg92_9)o!y(d6CJ^QMi*j9^E-UjwxX(ZsyTZH+jg&OuI@adqXW7v z8A{>y2sa*7-DVMb&Gl0{&3JRs`_!MFo`f*F@gc2U3%ux6^z@dY+29(%c2~9asw9io z-Std23bcCN3%tQykrt41d_hCyuM04|;uo8` z+A^dwkeMg?ZZe+8>0K(g(u^~AUSa-jo5b@PpUx$s(7p$g#ZBjdI6w?vpMLAoC@3V< zemI#|R|$diMBd=E%3c77aMzY5nBq5JE05{etR?hY_?!Ivx!-Ngj36-p4bi^PDONFv zY!IMsy*| zLxQ0x30~6;4HhfS#M;`WS(5{(Wbwtit(wg!9gT=XV(cZ08q&a7*2E4$0V)X-w(5QH zVz0Q)yShmWn+@J^E38xDO!j!)Jt1ad_Q0(>cDcf&k-WS- zE>F4j6uq+<5$uLPxPj_^7#zJ%h z_JPpwPz-+PnnP8v52G>&99N`l$&o1eW3DUNOC z*u8Rc*4l4!Y|^2izM?ITZ-OKX<6jNN>28m~Qar}05vYrp&_$&?*riYtdwY8k5s{KC zA(o6b)aXDi2cMV3lG4%@$X{t`Y5gY<3QI6C;&8Y`KY^q&_xBm5FC2Fu)NIm@*?my* zM5D`rw1iYNOLN?TEG{Y0XFF~*y*kM!wz?JPX4)JY%8k*hjF@7)8{p6&xTwAi%uw9Q zd8qX9Zf+C(6gMC!x!>h)ej6sj1ohM(1V}1#Gk~`)H;^jnul{YcH-HlR z2TUVa@;BD#Nf_-F#vuz1wc*t@H5n#Jyu*e)_4Q-iDdNIB-~* z#q#j;!@>r|$mZ_yPhDb6>CBRnl6UVmz^jrOcb~RX_qdr$TlISW<>IvTbh}1ZL{;e0 zb`#!Vwn+sj(sAbYp&8wJ6OJ6J>^U#*VsWJYly={wO&bW^}GSxJ%H$2s+!78#WUxg1;tGeXD3G&EkfDn_KiKkL$sLuRJs zmEPB`<1gSVkWg*2Slw9#R+nQB8U%cNWoH?+>QL^y`M2g0w(oRD$!c2gxP2U7`0JLa zzn+u6{YQBV+wHu+1$poaivSht-^YJtv+y@war-W%e;?JrR{s0x{{+APe;;ocv(6tH XKcsO>wuQ9)bFEL<{Pf1++Wmh49aE4n literal 0 HcmV?d00001 diff --git a/tutorials/nlp/images/spellmapper_data_preparation.png b/tutorials/nlp/images/spellmapper_data_preparation.png new file mode 100644 index 0000000000000000000000000000000000000000..24df8a8e0525ad6deaf7156fd2f5dc79f9ec9a39 GIT binary patch literal 75265 zcmY(qb9h|c_Xip_wj0}4W7}4fG`4Nq$;3&6##V)j2YW{;D@q~3e}xAF14ELL7FPuWgSZ9*18;FTlk+MVsTeP6~WoA|`s_&!;QWT7SXRLpc^ zh+?H!R8{}kdcL)wd!tp!S0VI#9mbCo>Toh&&YSV>yfrY z(0Ps+*NkwBB34IXLI`c&G&a3k#qk zv|K&vJ8OPpAnpIc_`m3obt32LHNGe-!j+^G;?|NSf~#$)vHcV9!` zBV#Fv4((1lY&^x68Q(m^I zJA4=v6UrJD`B1gEVsfRp;$-0Hj%$QO3+gE%& z+Q|uSwkAspm4tfL2{aWnm4$$WtS6W+^4yDMBigiBME~UjjGWqFx`R;7nkx$@V=gNe z0vzkDqGrqCY$cQ1Z0zcZdPK0%8yVRw##*#IOtBiZWgXT^c;bpzy8^2 zxINYB4|VnCvmDxlsr|e8fr|57R+JWt_`fp@hVFK6@C#ONxw6n^x^Odd@yb>}p0}Fv zr-okS5Lm)mrc;e*#>$8fX>O}a7PdLKVaMdL?)oKE)jJby-_K55tB}T~syYO7If=(W;jq!!Oj$eGWrAl0m&{cqb)RJIue+fzfB*aJ zD>?ad|EotbX1eF&{rQ-h9_qqU*VO~Iz;BMnp&@aMc~w8M5T^#z-Mxt87<!&UgJq#^y^N)a@hKfA;6pZ;!|0NqZq|Zoa;g}9N^;WG)A{;{RQl`|uxim$r?mf0 zmF*|#2PD~WAKQm~5=mgh zlD=Z7n5qELS@lnA*A}mVKqN_%>H4euf|_Q6bgWvc+KxbIhc_n`_qn;v54susC0ME3 z72y6vXK4!ipOt+-;)4iVx70;c(r-+EXU}~sHu8+K)iGtCS)U`~Y6=RQY|Y)5Mo~ZG z5}5p9Kl;)kdt#!Zb*%8E<)o8;>A4>ZT_EF$@-dyVlttBZ!sq== zJ;leXsw}Eus*RjD?Fhgl%jXZV)g#SnE7O*AUz$0&*>@l=Y(Z0(gnbU@;#vOo7E*g< z@6j$9yn-QX4VEsldpO#jG1s6a;D?a|yx7R83tdTZASU#?2NNl(m~UXqd-k`rwvB`B z;v=hDW^8|pOsYkc<3$$bQCU~&4X`%1jf-KS=fkJAdC67l(#rSn{9&yrd1*cJF=tfv z;_sFnXSp&ht*O4#i<#4|mUq3Z1qvK(SJi>$ znObiCjxhz@8tSGM2g~%E?xS2*(cS{a{eX zEV=7958yeUwt34L=TZ;3y;x++-4)*3*D21&EALL`Za=nb!c3g+qvst7)~1%bL=S$` zi(&`P3=+>zTdj(XQ3{>X(J|M)?vT~c7&2f>u7p9vxclQMZ)}9|HH>%KW7m1tMB?Qn z6ckhf{!dRY=fJu8Ta~%`Or7BAka~_oc~q;#A_GauBT%YqI`FprSh6fw+N1DAezz*O z%cme;uC-UYo7^i7lbI?egxQyr?}}XN>x;F$5B~D2+sv;F^?Oj;=Jgq0yE&)W2`+lH zd1tDX9g-L2(=np;+1Ds%W5>Px%`@B0{9Flr9T_Y13I=+ip%yo3--j1q&Cr09q^72; zb753C(`o!?EL;SOYP#0M^mx$L0CRBEsGT791A$3d#qe{x)Ldhlz=tAlZw#m!=KGno zh>JgHaG1BA#p%}xX#)}oV+WP+Q|k)vXYta37;*+@XPNn2V_ZBPbweu(3Cz1kjVod>GKs#gR7Hp)qD%o z$u!cFSHAAKxW$wdyt-?JPmYshX=EZt7sDgLl{JkTkj~iON~=0ngP)L1rfta!`o7EM zbwiADU2!yfaX;OWLuap6aHA7>dQm&nb7BD^R+}}F)G`9zVFB}LTl)-64>B~f%F0Bm zW!=$LZv)w*^sjJ?d=*2|^9d))t~zxE<@v$HLR|Ut&6wX>t-j}{t^Jk=S;t^%{+_gv z$BIT+ZF_#Q*hh%UJw;{#2Dl&UW?Dk3sI)$#D|f548qJVJ;f1Te+apHxeeR{lu&7=2 zxDa)=t|;hnk64Nef4t^5*#IlG8jNwDk4W+5ZE84QuAVBb?WyDGUm8oPvh=TL*sL_Rharv`-ZFt8zT|Cx5;H z!Wh*BVZ(K71cEz|b%hvBnl6y6;81Q#-q7Wt@&awqbQiA(STkui}m>x=C^yzl#b_6qp5K_a6m zOf7cAtjyF{pT-$6;nr{mX%XV|+U_#&%pu49d{Pe+KOLi8^xJ(sKwY_l&v1J;KW$qe z%myrLWNQ{357};IL|zSL&hc<3G?^Z%fKkCl?2E<|hOWCh9OpefSMu*O)wErXKPxPn z%cS(%Vm)g7(o8uH&wPjpt`sX^Zr9V z*y*VoA@k^q$H+VFY55Lt{42L$2LPxY|Lz~+q_B*(VM~LRgs@B`pVjy!pqW!+6zRPE zx6{KATKB7m!+eWZhE}&{Xd7G?gL})XU{t?-I+subsX=tb01Tj z_OChuRwH(Z6MO~p`-KUbPR{P4DFlTF*^k!>=Q~E>ZgZV;pjYBNoYT__|hV zOMHzIvIN7PsY{CU(Hb~esw=fZ9$fNy9AVM-_9&3Gy{?;5?>zw)E8}sN@_BLEFv%hWcr_NV&o`)C#~_%LnBTuUb+TeSpK`D>#6$-_X!luOfY`J*#jmz{I8t zZuh{+|9Snk`RFb+1JikBY3Ab_xqrwSBel{Tk_$`{lPF2Nv!Sm@R-_fbG>9la_?`$Z zXazTZW-ZUn$w!T^n`nyw%d{Do5&4+Fbin5xl1-5`=}S>m%D+E>9Ej1KpF4Pum)*m# zlAvmL!kPtK5E~008vQXSDi@#+R7ohU$=;ss7Z*EzNnqyWa4pb9h0^FQ+9z>V)~~`J zAfsifsV-ApRkB=F(b%$r-w~uwOG^yK9tls%!uIh|O;xkBwyiF$D(LH8+nZ0`hVngs z@qsSY!_Z8$>}p~uKV`Fn6zyf$=>2tFq*>BD4)NWzoqDT@dz4v!Rxrm;zFekL3A*aB+f1enO_TLDukmL(Y zPh+_j3%+eQmhXQ}Ndq88$Na@c!yLTK0Wi@}Wc7Ij^(eDgQt@doMR4=-+`gbNQ;j%$ zS#sd%wF`KN9FmZjt1I9?n48=#&Ij_$zGONZaSu_#M@fE#31e?4yicr;<6~w0Ad0ej zS-@Yja&{Ss5>`@8yt6nyAfc+28T^6J7hpf*-e=GA0Bq*tOB81HsV;%nB6-Gn_g-lGzj7xvs=l?12roV%G z?^MU)yceLu>=xM2b4y)_U1)M^3y~E>>t_+&#IHQhusoS?hLIK9`{lIhh;fMW*T`k< zfik4o!QWrcaoO3=qEM(f)&}W2FU*hwa#X{8Ft#D6rqWDOu>WS5g9q1UET?iI9{h+;rHOWz(s% zm)?%A&RY1y!@WDq7BWGPh1uQ`djCN4Fnv3b-iHzld!rCyqQqMr$g{W+rNaBMzuSre z>Jb-Cj?hq0aCB}P5Iw`iRFl%u$vhcCg**vXGN^JsG2SNvzn}&7cmFtD8v`O@`7p&f zDLK8ay2ChThQ<(hH^gs>%?|>oX+!IvrfZx6#()yW8zqi;D(PBlw-;HLC8w4}#IKHu z@PFUq==*|&>uGbShiJOsW`=*1S10HZb2*ZcrR*Wl2B@p@Y0NoeRg(Uew~BmAQn$&zT-C&;qbIJ3{iLH4zH^9phFsYUXLp(@YV;(@U#R!LD!YW zvMHugff)x%=Td{k1bQA7a|NR-ibnEQ#mdL((GreDV8u@tJ03+SmGc;qS)5+27zI{WE(XPL&O!dp-0Q z1!h^B9l*Z7jB_d?z0lZOGM<(Ld^()MC$VV)>zT`g;Km-r!C~2OF#KX~GBI0!nwIXS z#gcYW^~}G}vn(&@tr(Gbzh!&J+u0c4_yFVJnZ|+ivN#)hW46g$=FJ|!>p78}0rFW71)^j&exNUL`oN0L$8A$sVAUO3yCgCEF%??^4@ z*3e{j%oXUQz`L8z1^mig?8SXD2NcVPeF?KT%&?VwW_@OV0eD>VsLs1EP_0G76ZvT_I71Am-pe{Y8q4 zWkAJ9-zgc*Ib_NReEUItW#`e3oivsyS6iGP8*jdw(ggx5H~4*opFOR7!N}bm2r__c z7Dkc8Q*$$^B#hfxI}p)YQc)*YyHXhdH6ex$x4onp7)5Vcr*{A2(aJA!<{URu=Ju zdP&lGb6=cdPS6Ryg38?cBAu<$(FW~HF4TH*5XueyQiccYyH+N8hVy8#cC;CY9-r!< za~_7bM>x?=`35!ur_fP#!Vlm->L=X*`YQ#)axS=`^0w=6o9TFYzW*A$^6E?>>2H5J z<>WxM$2p-ws@O)yTbg5+l#!+xTD>PWQ2v5y;PY4MrR9VqI&>Yu9<7$Hk0S=dfq3)& zN%&ToVgom~Gwwvc6Al?F;{NEkbxMf|N9TjQ_Bvun$|MAIdxQ9S#99-7)BL6H`Zz^8 zjQ-Wf8K|mH&|DVhMHuM}1rl3!!JOnq9|IxrOt#Tr+#GihzyFE+^8jI&hL)Bo?@%W`GlBLDObxM49E2m@yTBo z5V_F*s3PFd&^5px6h6emno|2>!7*J8xlUi=W^Fc=$rzYY{m^uG_ar+mF{F65!(`j6 zs-kr!Bl6Fj3GkiMniI}D9qig%9$^X_@_N4XgUdVAbOH%#*F4TWQlm%B)e3lORxTfZ zfB7gGcvx z4@-zNm_YU#Z%G)+#&Cmg8@G{@Z0(A?2r{g`T>K5VCX%A2E=07{>XCrzM%nAah)38` zzyCyu>l*OiZ3%&mlq>=431_feb#11981Ad3pvktRZ@wh>F;ILvA!2CrdU#6rIDk{I zHvgi_*Ps9*$zvZl9>M#+RM0)y8ybDdg0;4VV(ihQZahS8TLjyKU82vy4KUf*Xg{8_ zBh;9$NE5mdn1z^1b+nLoyh&1|cN|T6KVBY-5IVN$`55n7|aGhP`OjR|n>GkK!Nr>Pz10b;~89M6%LAxiV zi1K#4BK_HK?<4*<8s4(8w>ee2woZ+APenn&U@I5l_L-NBKXb4YcJ23V6IWnXTz)H;dhBkaq74&aUsQmJJPJh@cWs-C?Mf)o}U@84|F4f3{ zoJcQ2nyR5MIyd$M_D_Exg+UA0#b(tTnj_HZC| z(@P?t89*oax_fu0PnbRMb7E+O`ikGzs6R^pM$y3U>2C-S7+9FgeBX3&mTQs7w#nOq zDn0>=khS%4`Stb#GSn1`#Ii9eC&})xju6f(! zF7`1V*~X9^e6gxp*urm=vB+&h8{^|qU-U8Q{whl4MT-2l%2d54MguZ5-fpAZXG*e7z^U<| zbnMieGNQ`^EOwz81l11k#cC?U8ltsKuQn|n^#nbD&lv8HVZSe zb?&n9=lh$R)pZ@YmiB-LqP5<0_)?itlqw0FQb(=zwVa|bK)d&7u|@il+9)zb+MnWr zg7%u^td6cdkeHl+_;az>-o+KM!NrB>)xT@^@iTRT2Ew?Y*r8fK_r4mk@(T>iw4?R0 zFcsE|k`}c?te&EQ6uexdTbK8Zem--{Y7$`RVFAlSF-gx_gk0g^ z)Cq*s=y_2F3Go{CPqJZHj`;qB`g$6{l+FrlZrD_XwFvU*B%q=b3jo)4%ML;>{iw99 zj~nSMd&m_cV5V$!2eX*z!P=Lu$G+YX&@e*_pv>_7;V4xwbbTIskUVJFyE@%!#=jS& zlS*)uI4&^N5jE%uea~_=(xRVVi?4po=jtL4?veVEZGEO=eM(MzCnHK_ZFTREz@d=Q z4NvX!vQZ!VspJm@RlDI^)X{TQxN3rg1my>W5qgS^b(Lc$m(J|jT#v`!N{2%YyvWH( zAQkP3h1{(y&WD&n8lTSOOim3Wl7RKl)jJi{1jZ{W{vKoMElpi1k1wSAi`$kbtjeNE z1TqFNi_M(&k>p~L7@x~zcG(*neqS z^e8brM1+!{ut@pNn5?Yc;W94P!1hyBlrFo&zH=3AcUGc4&~Nv7}m_mBGHP)UBWHYJr3i1ufaYm>9YQ0CPa8a zbP}^p2_KUEyr(xhcX)V}NZh}Oh-|7>TdxpTt&#z2Tyd zMe@l?J@4L3TyR?7$Mbj&gmd?^RuO=3ymuzV3#IpwBAa+AeLFV{)pBibA!mbu}eR9BtM#p^7Mm&CKqI38;5MDJb4Qhv{X*p!JoZy^ee~ zAw(DD@jwraePT`a|NG&QZ9wHqkvBXt71Xq<*DX3fZv2-W<#S^2c zO!o~P-PoFLfbr$FbwM603tGP54HD{$i+Qb0*-X3xZzkS;j!f6wuHG-JFMgYZ!xk5@G7|Fm@UD*$ zFBezl6#Hk1XD`?ORLVZ0At#TqYkFT=;)M1UhKK?y0K@vzCgU;!ApZH2_@-6oK5b}n zvO0UYAdky_P2@?j05jBgDvLKHVodM2QCYKGco8v7l$Mi8pMNy|TU&egT7wtibr}pk ze1x6$SR=T&pw}L3>9 zqWV}$=pB)pP6%&oBy4)>Yj1|Kn%3uYf5=7~%u7OO% zvFnwR&zRX~>pj^-BuJ}9TYRtXrH%)3D`bz6C7VEqspUr(+$4+>56|%5$46=fHYbBh zcGt34($A14bkY9?Oz8ND1bZVTMSS(=a9xk98Ug9hHwh4^$MSyz53=9%+3fI|dez&x z;8A=$%UECMMuLqMT~qDa#WrZ^dH+%eEq1aO<}y}eW@^f9zgBN?jfNiQsiLUAY0bvQ zMsaeI1POzj8;URFj=M(!6S~#ml}qsg(CD3xk-Bpz9HWZde>`dER&LQHIbJVlb-Xd|h1RLL-f(gAocIF7v4zarlw!x{Waz zrHH}s(B$L>KNGQ09!|}w1$SJMV)B#Y1Nex4Ycy1?ceWI`BIS)>NM;p=?7qf9!9lNt zhq)5h2W2xPmz7(|XYw}Mv=T`}D~K7VVMC|Jp@fR6Lx21oevUET46n8P6q&U4N+@KU9|%o$xYw6we4BvSwyO!s|!;2 zy`8jL8qIEX!#QlSned3ps_N?(OCbCTLx{s>T)LPO6Ain60UC?_UwZ9J8m73;?|SAH z2E6U4)cQk?+~aG@Itdp!!qeglvyZ&K_4+Kk%L}I-|817De46E8`w$8`^7(r(x8W5d z4aHGRE*Xym4^LJgfsTa*{yyQ^CJZGE&+=20Rz?@o=dNh(;*`pM=mwmXRYe|=>B}P0 zdXv?RC)5`WR-?V99>0!6+n15Cd`3^HHU>sS&pb42eAAf8_1_D@w=NMdX(re-}YL_kbgk;1z(r=)Gmfiqh zo(1-`^gqa(0nT6EweIrqa!v>XuXx%$LILyATnA``ux=D>B4HS%l%K+c-ql`pG`tV zrYx9!XIrXzSvYv2Djc7xVmWUtD5LC1PcxRv^**~t`HU@spxsB*dT!D#_hC>*c@V}h zJVheoqZDOjP5H6FT>G1wn$*-jKG_+nqbcu0S4wc;!FX#Odci>xE;6%RLR1vudJxyk z%d1C}W7Zck6U#CI#~*Q6^p!Iiqz+cJbW~~Cxv50x$!OSAbfiXV ztw$BmDVb%#B8&2rV15(uD>~xk23>#i%gV-{5umv@8jovJHJ2k)!TYOS20TkpWc#|Y z-WA!t1Z97-uj)cMu0x91`C3SUX}}e7wwTx1fyx&se~fP>8j5f{lQ@DhY`@lKZ)^;$ z%HG%G)Odfalw{|e;fsY}vZeJy7MafR_X^xA@Y0n2c<$G0Fn6AQ=VP1CF_n)&9jpIE z-E*7v9%D{&k;LK3%EFO;D$JqOXq7tM@tz!U+2fOkHqPeDvS(sCEMx;7Jj5gx+jomR10`mUPG z-cQ5M%X>68m}j&6?rN~#>#ebq#^}HOhIznaNQFl5Z<&tlX`(l0p7Ir}?(UTBi2((mM0 zbTAe{2Z^l*mL5hR)DNZ1^Yo-3b7;HTWWPIb2#Q!Fk6jiyy!=y4Tu6k{fMNw&JPixS z3w1p4h@ovGGZD_uGyx_|TVRddMY+YJ?6yDT$G6ijUoUculhZs(9YyscKnD2URJ+=- zz4&+0w_C+>qgu+RVCu6(0QMYc#+)s6<8+NC5Q@qq+NCR2zc8^xk~)w1W)ALxjiaHh z4pCA6Vcc%iDH&U?vg)$Uyd4OmW7KsNk1exwH#Q#7YwgSB^Jmce>UAh9sZA4=`FDQ% zaktU*=V)B8GGz}h9$s$r;E+zJC-CmCcE_JvCCY6eLFN)hQBoE#c@`ythZbe$bEDkwAmU zG(;J5{L7Q(i7E=)&TQt;LzHq8V4ISa{cIP0{%lk5; z9Br7Z-q1J?YFAX`DJfZ5fA1|eT$YOxh%@W>H|9Bs*{ni4vQQm(Gzu)(EseblA0p5# zuvmpVPu>}obtK%De#^j$LN)9`)9cUm{1ga4Vmx{GX)P!pXWKgGvX#gSoPxsa!4V-D zz-!1l2qZM}h(Ng_FF=(vO-0y)o`s1V;lq@e0CNga2k%{8Ik{$8S@q&!lY#%LDMEu{@;8MHD2f~rYhBP()aM9lg1%w@)43n6gRJ1^_$4g*gMLC z8e?8sTBoDX)Qs4hNDyeuJX!PA}3($hM^~02V+?>f8y(d*y&Nm!2r0 zZN+jq@HiF&@NIjSVX6gVW4~(x;{lB9>=82twBM0EYLwjFiy8Q%73g|VpOclfSoO^{ z=So$D8}{~G{`?Ui9;O6Dvo9!9Xh=*AId8k~>$kwxYDx zxZe=Lp5Nt;PsZ%nC<08G0q~;s_V(X>y`f!;JmI3=KtqzIPfm`e3KS~azPUBc0597e%w8blPk@j#70a4uX}cPjUZ}xK#B0*;@FYc3c%iANI6*^O33U4sPzO849Myw-%Mi_Zr)dax zq~c+OH^ni>D~HL_eDDZ+*IUhTRo}$3RzM(naO#lhScw6Z-j+(@ zN&+<>QnT71U>C@1VyaWhx=&>+wL<6~{Y@My5vy_s_2n%L-xf+!qW(1uT1rGSX}}52 znIRtpAXYd#alSR=W8guGavX%mxt+Jv6%UhMuX`X4@0<4}>DcNf^Q= zQ5*-yjbuaq8xGyKOtd7pt84U)4S1KUYt%Ry5(*8=M}CmB2{F5Pj~mb~tTjWAaB}7a z5zI87rf~k8wowGYE^hPwseBPT{{EBJ88|>R!VIQyxC-KW|LAuB)_<#8i~kJ9pJAJz zV&5iJB+lK);mp(zZus8BQ>Cza8?E*_90SoA&g?9fpD$i)hCNwALg);FXeq^IrL@XL z#3=dr1WV_-BetGzU9laFXvXB|CgfDK+;YBo5Nf~|M}u)X0;d)3qLM$5Lp*zt^}m@8 zeL^T9BQ|zxNJbI? z=PaI}oX;03Pwjd|WqKoc8iEN?2MHrM7WOwgtH4sB3v07FJmYK zuQhR3tZQ4IbMhO(EMu2&lioF?6*5v7E=5+Na1EWN zzfWp)E!bEXlT#BD<6}F&+Xp3eb?wcmL|t5**-VTZ)S`I`DrCzt0Si@t1^Kgzc=Gcy z)d}J)^FR){B47g5?m-kei!mPsMFt04*CotYHg?_kTw&r9gM+BZkNJUHF8{7ej9Q$4 z{XOGxF4=g9n>29utuL{yi>8yB_Kd{wlw@SzH9=}LSy_4IeYK7T>*?e>#pQ&?IS42Q zBJHg_c90~Kq@sq}J1w@OJjCc1!e^uwEz&236yl^3Z>-M^@SjRX24i}6v zBH)v7!?Zd6+Wak<-6@@E+n~!4xt(7+DKQZk($mqgXusK|h^`7RGBT??y+MEv=dkVX zkwxtPgm}yc@b~X=Aqc23%p)RTNY2fD*cH1wo}uWe1FaED$w|%x$m?S{r`{5#1%PoJ zTrd(Luh%7R1+-pugZ?u-+~SF#)Z6X%b;r|w zodBM!rr)L$b>+2ia&l&~IDM_hfdf)FixctS3}+jJP_$V>2D}@;efJbP;j>=w%-L4O zDgPu=&C=a-cFwG?uUE=DC@N;vWWqBlh`PF3Q7+R`l9z1LD$_n;_yMB7@WKRRlau%E zj`c_gXhDN=R7|ln|0rQV0pc6Q2;_=6-eqJ*9%9!F9zC zsGcMGmh4udDbB3VT97VgUHb?8>97>O>9ReEJ%EJ`Io&cPzR$G!DyOYw-KAlFnI*W; zRV$X>Qd)-(&eb=ek-L*djQ2CAf^EB5Yih8Vcb{qGvPvR;C_V_uD2pt!v^SU36$Ufe zC4D@exi~O|Go$pPw;7=T>64&Bpt`dUli>Y`&+>D-k8CVaD#@zA_F0gDnW1RdZ1X_o zS7u=H?&~{#mb~T!sv8j9(wa$rs6X_Jqemu@HJoI4){7vp&Z{}@cVH8Z^f;9UTvcI%K3tnSCM$cnCCLD1r9 zDM2#`_g(?PXY1fO#!ee5=L`6(EHu<}O3oaU^HTb{Pv|D6Fl0%kUK~f}%H#i!zB$Z8 zn`{f}OZ&)VaogUQBagOZ9j@MqbPCtkLpwU+4nSc-mm3t3X~;V7ypNmUV=o{^D_}zI zr3{EQ6Yx2u3D`tJ(uzR^Dd*Sx(@iD8h=u&7&RA^wCXtcRr6{CD3T&OX_@gF)VI`~l z4juI%@ez)DMG}<@HIrM9tuY&_AL}}-oKAWn4&Pr6J0h1S<}x-ivX5|maQbIdz&r2V z*`lEy7j?ib*<3XE`7_k4ihUp?3?Qmp!%>k~!FxT|$5CZg#j&LupOgZdqt)lAx zwm<TnR72$H)2p3QkCCqOvjJ2D{|+T`r%4q* zLTm%d`07C7pJa%>&2E!HqalqwOjb8+2*bUy1Kv%$ZMAhc&X z+6xDLS$tzcOXOM z(}Mp+L@J98VVKBDccDC~Wf05==V?kt4nZh@sedXio&!z<1W&5%tR+Xq+5MSro$AFW ze&8FM4GdX<7#J!H1)xtB&z8IdBqD-va#DWRL1+3vyW2f)ZRkLJ9p44O6FP$7M@4O| z14!!HT$-Q9s9=ErVdP(9W07$i*E9VfUKFl0q5NUzDfFV_L_nJ@fB1I7P=pANqR?ki zi&^7HD^?C)^!yx}%R}hB*QSgtlbARzDqT^NCGx8>KYy&Xqg}*Jj9qP-Ei4SIT%;(L z1at+WfzHAHdwTQ`nGYVgL!MA#Dk>@p3gp-BcykqQdUs#oi>;HBlXbueY4xnFu|d?U z?ZS4NJTo@d_8%pq;VSo)O{Jt~Vw&3&=$}%ciJ*`kGbN??kLlE57)R>$+GAm8dIKDp z%W2D_qM|@;DmG8F{N~{3h^p9BQ&=iGJ$b^R7L$pd4<<}$X6&d@3qHa)WujDhA`S%e zjZKUtk%=uGwgFX0!W+J&Z`8z?*~!-unu?k#D)kIsLO@A=S63NmcP&9L-+Tzo!u3{b8;*)$$5tgoq&QejE_q;!% z^jS>SiuoV;5M6p1Nr>|#H5=no7?SIY_bjC*cLp}RB*Si_CFz0bwWtN>;r?1 zYSAwZdLW8bQeqzOg3V#~J^keN|79TVb57rL(|UV>B1~%9r~x2NNT0V^(nWD8JComv zgFwe1Oq)b=MLv2XhGfjq(0EUuxvO_D|3Ch50jY5@28xJIBq4IrjA5GynP*$CM39Ot zW=PjtPDVzCNA?N&-WX}3owH?*v*Ruw*&V6z-87D{APX4)C`Q-C1KW@;8XqPo@B!?j zRJmaP*K`8&Uce%Vs@!p+lZ%R#DDxy%G#n*_=ghYkc2Gc1 z(vfIBNnE?}MhuyDMpJ>4Uo4kF!c<9d@l@*yI;Kn#JxeMDBQvz$YJUEkkzv!UQ{25d zjqL~k#Ni1Q%EHb9`2y!Bp6NsTT(PhZ*F9bsEFZ1bd2>x!d!(MrIyPn`Jwl9ggg3ix$Ml z!OjcpziBIDtt(ImdDnMZp`xOzf1$eg4x1ZqB%eoVTAA@_I4#Q#cR+yUtMh>)2rlV6 zfcBuO!_M-9_~Ckf2kHK&g2+BZ7l5Lg1QQ^1aXf7}vtD-_+)d2q#C3fWN@6vMY+2sv zVgCh6GE9ofrKh~zhYv#;QyC7D(EIDxuPIvC<}*&t?grC>+|Vmq?JoBqlLl6z_;R~7 z1-Oc9n%Gj(m7ZRz|I2r2ivgX?O!-HH2PR~+V_zTXx5*Js`5T;d3aSdiEu30G5YEIv zIGKFsDgw6lb(J=RIzF!Aspb{^xY##k6K}S-P6DB?WaYL1jR^p?4jf!N9v%q8f{<*i zjP!q7X8{va=UOx45uNpMM@wrRR1ZjSbOeRyD>yibnp&+{rN=OP&?{F9?f5EIk^CSp zf!o0#nIuq?!0x5%B?hlIk;KL4wPc0%wh$K?Squbeqks!^1@=)Ce7ghUnCo zbj+l*oHB^0;z9`)`-&ip@>vJQfp`aG05nT~SUkdiH#t)XPmtyW2}c^wox!~MzN8@J zRnYG)5{{nE?hs;vfrFz6a@yKCy{96V{Rq9&Yoaw4v7-VIo1h{|U#UQzT4q zk7wLFKH*Rfq~r=9i|3F9+05J;4HRYI8yvKl6=|iBJhLA>e=Zj>(2ma zHb`?2@dC|4EC+^K+=Uwb<;lqukd@}hAMk;BEu9h`zVv7#fA4;}*WSg9kqTX5Z9QA7 z+!ve2H~#hTz|S;HZm+!(VO7!%$@L^^4~@B)1pXEIu--%{Hy6_5>NhNc+5cTpnCR{g zi)Gy4quYI$*xuNrq@-M~DW}^39@D)!5U@>S(4Pu*I$f)lvb2PS1G%#Sw`YS>_*%!# zhRneOmh%%t?(VrPwvB4(kpqFlv%(<^JrZm|Kj_#nA1=AUI>{x!0TOdf6-Z`Yq-osa zWKHt6RNxEiu9bUy^w0^#^8f8B2SA)i-uuxD@>21N)WP}vOFYVP;_jP1!(LpJe_Gzo z&W{l_aAgg(`9FUo%W(tz8*RcMka!x02@&_IJA5t1(xC2Cu`*MdoaT{xa(2U#LZ{tb z**H0U-X1ukBwJ)q&wm&h_8K=Hx`~H@dJhRoWu%xXg%O1RAbt=x>7m1ajE5F_Hm(O` zr2NafTa$?4(P4Cl-G5mJpa|elx7=Jc;Olryp^CqIq&*TJW*hi_y_q5vi=ZU_@gFFm zV9eJx#*r;v^J_!`ujtHW`khxUS~%#Sj)=8xx7I{0V%86mI8Gh_0r+rOSet0%^4zv{ zEA5lU%l7f>?USyTyqiXYXhY(3*#F~!V4G%JDv-7W{J!WE4k z+j*pnHxeHmd~~JCOKz(a)AWkv0%T{99fxV*!@<5j@ln%@nf%S!Czjk=8~HJUSRC%Y z=X{q_YzA9cxECEQ8}}7YK9|Zu^X-wkatPF300H3>cV;2c!Xk!TWtgz9W+CtrNrqft znL0ebv5O%>jtoV|YKJiI%pgOo<_{SdoY~n^dVOU)&(ofR8c7O_H#=nA-S4=#3ir@K zswec-1~~b91NeUe8+bCh@q&ggbrpaXDonSzs8VBcYC7J<58L+!EQt*azaIv9sJ)E@ z77aCSB`oYe!&`M|c$hB#RiA5`7bkT(Y4pa}IJM!1hO7087Vt?w4u6F*$Geh|4qHKR@kq*69}V zoAxZscubTbkJ1lePcbu3&rB;!OiU>#tTuOi$&5iOG}$B6{Ytp@Rom@RyY(q2JD1+j zxO_V_xQ$k||DOT@1p^i4Y-6+Hsr>SUY9{30LJIue7m9f$)NDOn`h(KaVfC&QV@oYX z%@dc$o$-kKhO1RM^3Cd!xA^h96NwRd8on^rM#!jzV#nT4(+hQnH|j)#LaYD#(X!Z)#zw0dH1 z!R9I^F0I;j(e^vyv|s}_iMzESV6%+O43)3tYL1h(A{0t#^&#H zRpNt;7T*9p z5elTt8(T+p2dkD zf!7iSuitrm0#}ihsoK29jupN8%fEs2EQzcD&{|j;CIpRo(~h)wCZSBd+`y<;5H(6X z+mNfpRlDVBwn1tGq$x3x!6$s4oY|Wxg{2kcSQZ2m;hRrZ7V;K{D!ZsAZf+*r+&pBN zS^Rl#(Zj+2jK}4cOF+k(q0HGM1HKIiVN%kO1^~d#}6T@olTBs@B)}WpLo_afd0^NQ z7j$-w$L!rHP$4a(!kXd2LrAs$x!Jz;QBeui-nBHyxwbaf-;X3N{;pzP%&=4ZUeBGLj0xo%QSI) zlZZy6s2&=iZvJB+APRY(d~S}OYV4`bdU)J7>#W=BOyu2gjRNp^xs249t!!^1%qOK(KJE;+9MDRtTIQ(xHqh)JohL-2_*DLJ}RrJ>xQ zGmt=#pjM9{DOr8BY3U-);^J|=N;6q?QCP*$cW1hmW#yM1=?2~FG_BT)I4k=kmc{Xs zgyMH{k}^3-`telGe${jJOi2cC90LAd6w z!^e&X!5j=+Z1|h4BSTS$DI4yLG^DacU2%nZnL-HhT1SiI}LQUI!&FW+Y++ z0}|!HPy7APq%@7h3`VrYxdh<>V!Y;GT2MW?C7=g#l!Eyr+ z>43pK@@F9&s$etc{kt638&p(eWs@;EqutLGxSD#sADTaZf<%0oWKqQZ4Eucj+PwK$ zsZ-}0DRy8Q!4`&a|J|a-^x4Urjdm_2qVEpwH}980p8%7#H(fcou}4Rt=ybKSHP~?2 zrybyV)yabX7lse2HrRN4m&7?9R>LX^2?FQLPp`CVX(JJHYPG6h6-rK^DR^-@_O$(o zO^u@N7@iv%@Ah>A9hJ~pH%7XS~2_l|c|W@byK zQ%THLt*+hB+S)74<%ON=S6ji^+4=&UoJe*HwHEE|?UJ9-d3oC$j^+ttW)c&VIoa57 zc-^^3Nc@77e{44OHXk#f^XO{@ltd6l*93~{=&atuV$VD7dJ;^+o9~-{8(zW>m419 z=n|CR)$0m$Wa*lWnUnt$N0Y=l z1B`}C)6;SEHj&Zs0s34TSj?t585zHo6%717M&b-{`NH_VK>-9ErMJ1cxrU}@ZwM}@ z^FT~S2Ep<0pP#VzB`yH$7Zn% z3B1*Jdou}T>d!5SGuDqxR$Wt5XSr~_e`Q9Bl&j(NcxWMM?z-V$1Mn3;1)4q~==Fc`Whvr}YH8J5XLEv`<5J@Da4^K=?e1CH=(_m**K9cSuz#mvau(`R}6N($$CzHm- zCqJYDLrOrP)6o?a!LzsLvE$-GJCgSP{d-t!7C?@NJ>6o4>2=(m6;znblmTZWqCVkb zi=)+Q2H)+UTTq$7g#nbTi9{(PfwJk7@Mgz(XxF)=X$ zf1mY*!`*LB*DiKOg6Ora1uYh8Q<-e^5$p_1Ra=IVdy~m&xg~2H%KyF#$U}yLPKPEDVhP<9k^LMKThKv1;8Tu(}Hi zV{gY5O_nwIRoSU{cn&Z3PGC~p%j}=`n*BN7-!g>&zZPlHDT>ID_2$&>7#R5U#>Yc~ zV5p;#lAvo2jKA-7Bv5{+x8t_A`6?^>WuXLkDOEh=i7HwVXkx#S^mxdcMO z_xgKzc@^y9n|;kL3keOeMge~fDb$gRiq_Q70Eq(_VPj)*ln?uDcKOmn$*dmF>+e@K ziNE+aUS3|RPzZR=ET&vgAoua)~A0 z&Z6^$_>E1IEC!Z!Tyk*O)@36V5r8NRA3m@rCep(n_qN^Z3p4(6s3C+_K#SQ-hNvyn zMwyy|X#~uJzE6%(ZpD;fulJJg(qN}LfWC4frKB8qI8dgNkf+=FA|c83fq={CwbIk$|AaumY+G4nhJpQuiu`jPzt?5^iUFbe&AAAmnk14hmP_*U zjh0PFstkvqVWIXW^5Hk=EPhSY0bh;rvWBwqOqs3#Jbd^_IxnMc8=;*~bE|7V=c=D~ zrV}~wM!v#l0F-t^Fb8-&lZuLJYilbi=o@Rg#;@p}^?=2+Nq~s6DNianjlB3C8UG|JT4VQIYsip7Um%xd+9nQOxJ)#%KbW~bT+<*o9U`Kl0vqa8LzPj!bmE(nv22lt>jR^O zjP^$RgRJaqv6SWrh*EQ}e92U3qUXxEPx&1UE>5@i_w&r=kaH0K+(Fi3n1$xaBE^H- z3jI+gN=i!QGA&#dS0^8TSfin&*b`s8<)o6leC2%0C1$f}6;;(z^`=_?3VP%qx$B%9 z>$HmgSO;x~gXvPXs#~v&U~jYE1M~BPi>{NqW5>tGu8bq=>+253^L{Jn>yAg$M_ob4 zcW}_FtPktmFq@vP5!jpOoM|OoTwGZ5b62}_(KmXXRydd)VX{4!WOve9r1F9nWf#=| zY{`i}b~`K=H=ZKBkt5p~NpJgcgaz$yTmVZN0wMQm(be1A+sQ01btd8uRen_%E>4I*_Pj@{}MTjFCh>@tM8y+7foDU z$zX}2(UzWm;y1t{S`iaezL+jm_X+se{m#qZaRMXMb&ba~@CA=Mwu&7$TiPkGnYT!{BUfNiR^%$99p z(cgMQ_p(n5>X-0;s!kGnK#hovkX;pNIhgip9*~w6gQr~(5*LTNd%9@!e%A$#=9x0h zg%XpA{%F!Ix5kUhJ$W;1Y!+m0w*1NxE z3%6Ig^@gRj{8&2`^BpH4BwPc}_Tgt_ua7L;&i70w+31RjQ$fgNt*&S5*QM%=hJ&6Z z>@LR|0b#f{58F)==Vhz{X!Cn>*}WcQGW_b9Aj+a|F zZN_r|_Mvb8^a-%qqa4G-!+U#s^>_x~zRi@Wv)WCU6!5<1n4bhk?&ZM>KEwR<^mI?p zhxnBYKA%%$(iz2;A6JIM30}#ah4T4*T?~)h9>+M}2LbFs92m+=AW-jv*(}M-^!kIU zeD(bFpu6@ohN{XRhfF2|P!9Tg332fODi{^5R#zxp-&&<|HRi`JuCA^rXw)h-KbX@q zf0K6rT#EY{l)r6+vR=QX=F2USvjbBp(c#R5mO6$0||$h?Y-j8zA$ zmLWLoQ8yV83*e}!SOFea95&Te)6Igf57$TfMki~#eH<#XpADAE0|Nsw8I8(R8lq?@ zp{srjB~@(nJl$Wx#0n^7c{|#tb?R1BS9VB!P=okhFA}STyOd*>ru2^IK*#JiOy(2aDG5 zFOrgycs#CsXihh@!;WL8@ii&zZdlARpLX7BVlEEM>5{&|H19aN?G48d*^?nAAsKUL z8%jU9Kg8tl`&FV^)iW^>?Th3zlg#eC{?1@4yEmNo88bj`&iP_{NX=*mfb4bZl?FHL zQVCgE1X|Ja4?NFl>@L^GEiMB79~)eDh8yg5XtnBB&o2veb4gAnsjB0Z%QU=9@#*#6 zWf4*dHrd`)HZDC=-`||TUrEmEgM9{^0l4=!J1(%YefXvlQ214?Sh6oL~7jWw7-5#QVH`52njV>LU3q32yi74`&f;yi6N@Yz+)yvm)Hh zR_O0ye5S8=a=Y_>zynROL^uOV;1pA@v${B9TAe8$%ND+Keeh4c-kX4ULKx1{fPxYU z$6>!(@whEqKY4b009$7m4!5{c`>UJdl{SYbj^odDmdl&mZUq8GM z>zN;9;hTlr9*&n18IAlu;z?3Mhn=Z;N{^J}Sh~Af1D4+L5*0i8_<0n6z8CW3XWXROkQ;K{qyjVJKJR+vo zN^Fk5e{~uj4$CPgV66gw3!>t!i6jwlm21m9UrQBrhWDv>hFXdGehWL2 zz=P`u(K-`Eg}ZAinaJ>M)${cz4&KQMaOwcRT2S(l-HwG^s)7gx6 zuyOn+f-YPeJ}*J6`N0gjK^8Io8zs#t;{J&VkM5tps*Iv|oNbjxVBmUQ2%^9s1?r#K zhPLD5-VP@}TOH}Bc<_#p^t-R(qmcgYE1tIdkUSz87G{^_DODLt?S8u!M}7Brnv$Ao z!#Y>5>(kQ0t^c(1yS&k|<;O&x%uY)rqIGuR*X$20KQ%^$RAh_m@z1!YKmC5w^w8ue z_=CSB!)UQiyX_a372_JM7%}OJJSz_Ys|^)c;?jKbIOMU2)nYjn#5(Gd3Mez55!;Q~ zN2|&;n`*DqCmwQgbHMq734gQiEhi%IXXdZ)g^%yb)8niS1eL@esEyv0dK>-OU%Dw) zEur|~gJQB3d~ZYiG2^%LhLYP{pTw{I+-Krw)US`1%znEWQaC}q)=ow7O{iyU5(TwU zz4ur9?;jugIqTl;hfa~dVkg36xkX!yijUtpYT#qwU;K#0_RIP!s4k?zc^=km0z0B8 z-YkI!v0%V2%_fJ_bp`kPOQ?>(@-*Pc4Kb zjz}bIEr05R%V(jp!Ce;}7a8dldS6=$=0~&foc?51t3Hy;p#^Ma4YD>URH08gQ;N)8 z1_nTY>2r7tMsI((u~QmRKL~+!3Thz+Qz1f@UBIGXL{}4@BjXad0l`8V`q0M?Y@O6-BF&TfWE@- zP15_YP-j$D_H=*E?Y58dpc2fcq9a1SPLP+Bqy~$OEK0kmA*!a;C+AW>Ru%kZ^F6L{ zrPgrnpZL9vL`r@+2COh*8mS?E+K&jn?z(d0UT-0au(2xPA=fw@_TO>)K65;HBETYH zTHUXmXrI%QzB!hV&zHt!x83xEL7%U+FtgBSbn1)vqg-~XmFpeAc+P zBC`Fr5-t*uBK>Gn^YcGC@2_SPn9b0x$D4(Mx2TkyTwKr@EFduy%Hbk6H#U5ryR@ut ziWRH!05|?teE}Yyr>nQuu6lB31c5u({h)Y&e6q}=dh6-4<&E(hG!d1R|>)CQ3v*1UtgYm zB%w2yE(!{Qy3je5nVgz(_INNcF$r4ChfJ)eg>nBTOm3~qS+2VAe7v+hl-vZU1x%Gh zhFrUNTJVP$gy^VQA_VtxzYEq|9WFK1DVJ-3(cM>Yp~dAUHyHhllO$X~1+X_bI5>;T z&ET}}+U$?kRc+H1NUpVTJe=j*XMy-OFfed)Mx)b~5Zu`-gM9;i*gZCe=vejia8qYF zVR#lra)^`K9Fcz|%Q!YRhM=|Em-&a1mR4(J8ldtzcX#*5uU{ccHH($R;>fc%g%mV2 z@_yKt^0-MVl}@KKC}ygJw?b##nhKX%sCTm4O?h~9eQ7lu+OIM!tU3A8j0&r9pDhxa zMDNCt#PrD3c(Y4yJ(}_Q?FXjHIzZp)Sc~?p3kA;x{4^R#<#gIbh|*|vb>97@8`yua zJ(wup7_2Vvtljt>>OFZgzN+x5!>expmTaH#OnnhH93s;^n`lyGQfa}+lBul2>w zkiJKhan1;cLfkC3gyC{v z@7ozd;pS=`ht)FIqd9<1IOymB%kc9O`b62?!RP|>;TZJZxKE)wVSS(AXw3vS-qIMa zh12`Je-N?@)_z#I-^zH!=Bn_ffqfk#y`gcPrO|v(?$YVB3Oz@sKC_^x2#x(M;+Fv@ zCnqM;Nzp1#zbIX>GiJ~_9*T#jr& zn;uM;p8#$wQy{=;g>^WX<7Qr3LSkbG;wBUakIr_qRLKHR)8;sjNXe{bV5tZ1?ivSL zsl21hT+R6C>iCOJyKUXj*w`TF+(k{G;_Uf)XlAAkO#J1pYn;|Nxc3sx(QBvJB1qqh zttTTFZ*&QThlYkOaP@=@HxU`6N$%rrO|l<}A&t$WV#*;3e)=Ze;J27-s?U4TM$$hxyfaJ!E3&){;h%1$ zl3BPN?O|b|&aTebJ)R%IWOa3UnSh~TXu~V55TUQlMOsdd{)}gv_-60R@};H9`bz?QeJeZ zO!F(XOmK?rf?mev#AbnaqbjY=7lx;+?Q|Y6+T=l?H$z8%aCeMjem+|yTzI4I?{BX_ z@0rXmOu$InXX8-n1rFxy-muK{FU_yo*fc8Q%Qc*@_E9-Iun`bWrxudkBmwyJd(*MC zUt@NtqYB2PFgS)R`(mY0gf12H(UmWxmR2kGe;>uD$*%#u=LE0u_3PMt&j-KWkwlsg z&|j7dN%1?PV`2`cYp5ot{6nz;8}@q|esgb7wF+>7fxfqi>MCmMzG-YW-hsTYIIqKC z0dcO^kG?opQd0W$6&+pqeOr2agb@oM1-O4)k{g5)IPy;74O4GqLP`Sllds9qEuEG1 z^45kt;INvBLQ1N};V1;1Mu~bg(Jd0P)DXy}TZ2P(c6NIB+|Y{^+-Ka3h=fGcDBV9@ zw!I11VI(Nh;!nUkKXSRe2*7yc0fsi2RF0`ufdH^M1B#)|KAGkD`MFu@C2!CEf{O#E zOPuYxFxiY{!_&B3u8wIaN%K10LdmJ9U@{#~mav&^4Th4Kbt@{XaahbT8X@v!(rJyG zp`HS0Q#l2s30VzbUbwZ`Kk?4-dW@exCk#F5n0i1!FkFW|>2>z@8Ubb?CzwPO$`5oT z=#8XtTg=xADfJ;-`RKQ65hN#Iko@lH$pajsOghieXy#jJD5I77GjQTsl$6TL%l9AR zP*4<}ZS>Tb%~*mJnuEc{$HoaJWD)u#Rx{yB)6tCfETLcm2u{a6{(XpCA=f}&H#awO zc+rU0aBytsqJf)z?`#lW&t*-FH#u5G$+Uj4owMQeLWp5g?Sizn?MF%Hs4>$=?@FKp ze12YD9#BRwKU=SYub_G*jQF$Uwmzl0S|e>GvcQ<0>^Eepq`16 z5XdN5xu`$PN7Qey&)r0VRA2j!ygeHt={Z(wkMx| zcyz?;{@GmYEs!xj`YH2<6|e~cZ_%VrUgv)>UQ1CUU3LJ~h7dsmaw}6r~Fvyubxe!8p$Ii(3Vopq%!IDhRoE(+r|4PqELGgT5{9BU~ zc^UPhm?!HHxw&~zy>{p#c*Nm|S2_yh3Trz(OKxvHL$S|WLug2e^nsk_TWNcHXP0f+ zTxem@s@p9|TJp~6$K9}c>y*j7wFDz&6gld9s})asPMMEKEA|Bf$~_9#|97`kQOX~A z$V(&jvB1#K)WIjTg|wxnztgvDxx@+u_)dJQc(LmTj8ZiC&Jc*%i#b6lek7YPo0Qb- zyGDqN18^Uwnf4m-3VZ_8?&H zDPjS3#2?6{wrO4D6kFQ{`A_$#Hg}aPQQcs3I1@`;Cj`Xk_4RPbPn7s*`GZlu>D}<1 zlz*%RUzmEtU*b8?eo@KEn7FtXr)%Ng{=8u+33q!eK#5_7^pmb>nwR^N}xc7=IrU=G9#n<_itZE zt2IAFK1t%2AEbbxpr|;K!t;b4-@Q2V@{5b=#RG-vwO)WpkBJE|t67Q0w^sY)?y@E4RFFs#_n<`*`u94>34U zKU|ku)|g&wkH~Da`gyj^`oDKTT3g90!nLYsaJ^$NH38E|7dAFFr}6130EBmUcZe{# z=7`9s@zK$YtTTsGFqJ({{c=ZMZ@+z%II1G+PI;e_92qIpruhYYey)hHa1S8+#|Dt- zA}L~X8-N)4aDHVbEe*F(u7@BtBu*l(CFkmzb+RmVx1E}r0CJL)xuvD!B@JsS4S8lW zd!g9nlb#o17f}S&N&S%UBhu1nsOad?aeDtef)w=Vbeg6snVsN#o0ymYE)qg6F4vwA zOm3HJ*T2u)z-aPJ3}QjsYd)YSq>aw{T)T?&{GNF?`EU{iWa|Am?7xeIF)cLb-Vds} zYD*&FWKKz{efd&3V?^)loa=vCN&@a61#>m1)TyZn{Rm4!OpGBD; zB#L;ufv;cBVjF(F{=<#)g}cQ<-1ZR( zcM5lB7Y;vi#snTq!Q45P&eoRU78@5^rWr+Ch=3u9@l!@#ktFyFl*vNvQNYa!nPdV1 zE$wguCDhNKKRF!_>)nr%Dk2fVuLmtOJSYiZGFGvYVmkfXg{-WsxuBqaEiGW_NyK1b<>dt>C8uXq`iE)p!6`UHRq?PlAIjqTghl~0oM_l{YTF1iY=1b5a>6{3GmN@fI#2o z7eEXZlf@$M`?GE(>cn&=2Oj*>K#a$!Ow+s&F)=ZA_eZDnRyTVvXIIwZ@#=PfSy-HH zA!cx{`TJhfOgtS>TKQ`Yx3{x}!-tdYEG^5*%TFq|{$S6~5sE#);Q|>76?e#kZlJHP z-u_^El~5o89*1SsFA{27&H$%1-cn_Jd|Wz(ZLCI})tAL=nQP_E=6f3ip!_2(>9 zt+$}JjV-7^-GJHoeADsxn9WV{*KbHUD?&oTLlQ5_Mz;qCB_%XuVTxC~7}3nbKlFQb z1vIpH7-?C+{_iP;{e(sf$?eW{7mSWvUJpP~6yR`coF_(VhX2im@tI^0&?$F{rt>6c zDv)=4$~ME;pD<$lHURJOcTB&BjYh@u$V5J$#cFwRYRVn}6yowL%x$K>i{Box*Mh8Z;6y?!4AB zBRCxnM@L5qy0u#Mx*nHijc#SCIYS++-;ckH}q<~ zrE@c^{I;O;Ke4Ii=f04VOd1a^lksS5v9e#I^t(dVL%qe@Ga%kY#Kj4xgbWV8C^(u; zaZq_Ep1@$85?Da)u8fI@7-Y2~ywseChVc7cJVqwySIlywaw$k0S>}E57b~BgJ)q)Y zPPEo=Z{)|k#>B)VlS%vT=(tj2x6=)S4ulmaoxjf)JHvbX`*@r`P@e2bwQQi^;k6GA zT~Cges*HwL!GD^-z5t-e_JzxtjEu|(xDJyAUpG4LE|h7t4YS_|_?oY@a)0t>oP-3_ zlp~-O2z?+{TI?Wffb`|~FU=3+kLjgOfm60SZ+-yp8oAtlVtpwh3&#iZ>pIXyBJ}3w z<|K1?pnt^0`8&plL_{mBmYN#?bHIY;fio7)`vaWJGoomkEh@(m>c};SrbS9kv zRhYEC+GM4uBF=xlp&0(f!m8Y0^Y=Zis772!y4(i?LqjZV%uIGb%!ejm*i|tAX|Jv> zfDICsX8V={=EoY41xVO8tN@Y}0qlEPU2-xuwQolB_HN8ZK@sfU|S+^Ut?SlrTtS+NK!TG8nvC5)#2qk*?=CV!&(Q zaj`9Bpzs`CjL++Q4*0za{T?QZ`4|*+adFe#2twfNq9sOsZQqgC(avHqT>=hD@Mpt$ zz%$BmZ~T40#l%Xw$~h7qntvM_8rm##g-(<5NE$GZ!F)8xbB0EO0J6MP%4fc(n+3Vq zhrvWf8_Zw5Z&aOui1A|MD%FC_pwAYmGLlCCx4=02;pW8gc+nXQM7FjmP^rHsC%=#9 zh#4fyL7vfRSuc;Kv9jKdwu62syQt_~Pu|__S(4Qo@UH4U{{7lSCQNUiS)K$S+^9;fuo5jA7u{ z(t2@70ZL6yQSqB(?$7!J43la|BqSt&?K^rLd3hlV!LWVfc!I@Z?75vHNI-zagrGJQ zo|YE)0cKvu&Fx7Z3})Ppv@9%fA1%HgflVEB@e5lcL!ppq8#I7^o}{ECrMS4Xw6u0U zK45DHt|BmT@p&)-uinir3My*mtH;O3PaU0@^6sZoK}_>@6cn)b_9P@EJzajy_Na=# zJ{Kt#qZJwNlxaq|87xCp;<8%egw+Ac%DeY<%9}G3s4pA9LeYl@gOcC-a083QG&=Q^ z9Sk4o^@G4DnjSU8^J@F=g%NnnCrc~O@#z=`o}9zT{oYa_yPp2Oc`xn7SxeSz|3_Rj zLgA}ot(Ey_i$*ycUR6yT=IrTAAMxlskY!V?sIVE)WXCt;xYB+sj{nb38%HUC+c;gx z;&OK#eG;zM+AfqClXBXZARm8RV>N|=POP65L$aG!zVa(m{pPrFs;C`vroiTWQ;f8v zdTey`S7oKRloUwj`X>d2g~f^R(u@#zgR7-`wNH%I3GIwlpud(_m?za4R{-CI zm6g>pX1RA4B0?ZGFc#$GP&tJgX`^c=_`2Mr7I)92B~5N zwOGi1|KrD^V9YvEW>(gGWvp62*m;~1QiQ+8`^4nvRI1gl;W^8v5q zbjVO_`f&t49`{8=G%~6Ky0Qd1UDn2Op*$I>ut{`eV&d@AY_x0!)e0CWUkG!ju&^+& zuY$E>wUjrsP9iZoqx&w1h@c?u(3=^77#w+V-roG#!l7#00o5R%Q$r2PV!(iE?Q*fb z8H5~7W_P?0kpmckr3!tB2dsE-Io|gs^dl1{bAFrc>hfjp;QgwsyyH?GO^2GZ{OlZq z#ZW2BT%=`HC?c2BR?rxB|7)2l@BLQA(#`d|Yuh8qWPu2Xzm{X_-%9#@toRDe51JajcYaukBtkuyiC2*K z`8^mCs;iWg$jWNHGt8!}%=48HrGqy)G8Q#9X0S(wt8MP^KdV+O%QPDE^TLj~d4c2p zc*~H>$nEWu-W&25hK;J5aFPSKX4eNXc}@GjaU_P!MU1awDt!k73V>+I`4$DV0Bob& zs50Ttv9mY(V1aWM;KB-BQ@u}s&&kO#F!^?My~GI)b|f!PyvK=rnF?UI_A)LkEVSDm zG?}m7i!av}d*|t2tG&`XOMtBTK1d!x6b1tx-MFxPdD&Rw>hcN`86H8BoR?|N1s=YY zFgb-cyzbC*663SZmlgM?4)>8U-~=nxHDAnjxdAIw^tegKyNl2o`pKvQDA`-r*QKDe zAg-3Bxv{Yk7MscX=CtqOxq(1m^4;r$p~1nAey&g3uqz9D;9r3p;VaO}J}_EeUeS(KZZS6ZkhB_c1AU&3J#QzZP|+|}4g0igTmsG6{(*{M`!lfbMuZm3l$ zvR}W_muM|@Eh&NkB`o|QN!>hxoz=UG0S>3HP{Kbzs!n62O` zR$DwcK$2eDf39u?x{UB0ID>{+bL%F*DkxOfR?Gnu3iL!W>4Yy@s{H-^O}pododA=r z0pnJnk6!eT;NW1O4n=@o3}68fh5a)0eSgwqYJV!3#Y$^+Q?QyY>=KAo>b2&H^!gvz zGlf2hw(t#9n@;TmqPbo=PcqS)F_>+o)lGoX5-;Wru!~_h>?^E4bW9VGL-7|UR%^db z+3Ur^f@ZcjFT%tCd<$cC7O)`3!d5bj2jlj-{YAW;oq|2Cs#`I#A`RrH(END${lnGK zV)a)|dHvzH7UB(F*GXFQr8H`AbaZspieS%9=k<`jy9g4Z4-5^)=FxlS zY?d?;*4U)~mNFAibj18%^`g_Ly^{c3C>ZroJOu?Lwl_C(va@~IBG}&I;EdMu8v+U# zvNW|IK}A+JB0XI@KMWV(L>iqG@6nl=@X$~pf#5fy5@LLd1cY+1KsC=8VJG}c5{y2Q z$muwHx&{!jVCdHNCIR91S7Cl`<@JZ2Oqjfui(oDTwm294%O6S4 zOpzr<;iRE~jn9qECF%E-Ljbvy&Eoe+l$7GISOZB(;zdRLH1F`aUS)&dc@8S5kQc^v zr%lvXi4hD6Kz~wXN-nN8hmt~m*cAI&PIn060;WgvnO{<#Cs?fniO&9@OkjPm-VnJA zU317@>)8F#9wWyuFQBNbF0U>jr5^2$c6;K<0@dPBP2U@aYv=ZiLlfZR(>Y78pT^?` z3fx>z%C*&*tirJ2kky@lR<&3mpDzFbzuW>m7*sG%F4g5AWMG~)yE~T+QT@jwUwWz4 z4Lg(^s{sh*aCl6Cj;Ao1*HgtxHRfaMlY0{&V<5yiUvINvqu+ZCCfry!@2-AydHD&N zWL93M)78bh3!s2|UGu;8hs7<*%IfIqgU9IxZc{pqdh4@s5@5qoz}+{!OrW1uQX(m& zZy%eQT57YxhqgYtw?}#PGe@`6|4%%P85mxiKN9KS=%}>ZgImDO#Wk#5@7Z=Y{c7L( zY`q)6Umk!_fM;IpstmYG`}Vg4B7O+G4#Q)Av-Ld)8=}JuNRhz&>sG(vAJZ|myR2Cw zB;i2&vw&^qbwYDZ=(VR8$e6Afji9-#23@$u&JA`pe_P;cyEUKLZoBL@FenPeq=KhN zx>1fyPCh)swY`;ydGBwcqC$;2m#(7%d?1d74tv@7@dj-SJoO;_?Z$ z3D>)^-EqZIH_kbkF&NfZ<= z-6ls{k8;xlKyK}AJELE$Be@h#d^m>#EF`mv%dlluEiT-ZThV`r`x-jLpe?yF? zpGx!T$_jvi9RFZFN|`L@3Azt3WFXhM-kPuU@n@l-%J`c%U@Th|Ka)uKmyLWT5hNjD zvog-l-FEYU1o_B7$TnIQIuMTekFE53>6itbXSC%Rvc+in>wKApG&cYAPnZ+N_Za>g zW@g6%3JVqzfBq|FYXbuVmJ;p1r)vTn?{D!50QV3QXbsr4sp(RZXICN_(6tCk#%qoaV>!ouO;m7Dv?9gGCG&*hz z{KDVJjG>IuLoyQK(AD28qzT%i=n_Rtfs3m}PWo=hMUoBeK1V>!*5#T$fv=H6W0@^GEyGcOXK9kEemy7k%ZYFo46L{)Bae<5g zfPGl|`{(EHuXn(9(k-$)pvEsn!^+5bzs&tNmI}mkh7a5=mSg@c3lcAS(Kl-#;7Tg= zzFlE5Zqm>I$VJdJF2K*w(E$uG)I%Zvd^UKRiYPZZr9hCnaOIDIzZv~h2)IL_w<{0> zyoZ2tC`qEIxHI8J0@+;~sX1`xC_mF~8L-*gPH*EEbp;mz@n>~)HBgwg zaE+7+@bd$Ye3)f=T@0w5G%Qc&3=SZhHD&aoQc z!06OZb_#Eb-U#uxi=G3YIiRj08GgT5@x{pfT?kASmcN;r+f?seBui zskOCrqbHmf48gVFbWV#(;xIf^F?jAw_vj9er_+W5PWi-$b!UHx4uw%Hp*e+o<$zU)Vd(C#k&I$K1K})&(1G2^|w*w}Z&vSQs)mO7Rl70*gsxv!` zEK5nhvi!uPdK5GcL#flHR&FiE_#V4i^h*nHuoJsn_%+s`nk7{4^@ll0mDHFy4+e^uE5I@ zvcM_m-^h{kdy+z4$17FPv(_^T^z%zw%Z`GfR#8zASaR@qYz?{s5kHsNJNP%b9lThu z!*Dsb?;qL4-s8SJp)ZoIh)8_COs0b6a{bw5KeAze5bxQ=#ldpR!-h!sa2mVa)j%vZ zh+vd!{{?s)gg)yXt|*&-5q$@yNLdBo&K?;a7N*>drBW_4U#akkd1nLM=hj>2-|Xxz z0Ka&5dmAwb1$J{j_LHTSU}Hzi6@OPgC14X2REnTrGr(bk!;keW!T=sUFe}ToxZNBB z7Ej;#<^-8^x z&G@G5rQF@~@%9+M&pZ}=6wSzg7jG0 z6a}Q-e`A`$rLn-2M}2FruDvoTDwTX@SHb)_Ha5nPOM^pgZhEZPYhrAm+M#>KG)7zy zR)3>A7;UOhUL>SR^j!o3dOE#+&oA9h6cm&|Au+K+;5wPTUGJt|c|{L8K)}`K2*IJ} z2bColG}P1}^TAi^Y%40ATu>N9BV5pp+9~Ec?myI z^t;ZXd?T606hs5H*#~^&zyYzexESd7s%5;^Lf!U-N0cS*+keR_+Ujby_XP&#hySsk!n*R1R5A4R*P@lI+6A5Z0+2L ze0cp7mKw8SI4U(j2@~Ld`EWoogz@?D^zR)b7gw`ksoMVHoIAC_y8w848yh9c*ETjb zn3%&I$bh{Q966VZN>F(~9mM-a(?(URMr8|{wB&2%-5|Yw&Ccgi3 z?iu&qFAT=mu=ieT%{ia?g>lw&GzE$sk^ZNH>t5$J-#)P)r6u{R6|SEHq1*KBfysEo z`0db={x{ZQ5pC}Su`!p#!^Tm*gN+|)<(Nt|pixq*qS6TeL;_`N`mv0us@L83{IpI1({4LXe16k zdkm>@u~V9nelsaiKvw}59Q+nXxVo`%wc3{-IQV3k*_urMAKdOP1QLe`=A;zg$Hv@Z zwwlURksY5~hR|emB4aZ(HIoGFT7Vx9E_yIMlMP(7%Y~JctV93I8r-eGxyh>b6EV)& zeJGxeAri|e{Bm)sT-r6a-$bJT*6MpZ8aA3=pWZ-G5OGTVXxwqBiq^k1`|f)zj)4AW z=ENgxNMb$HMc7=qSz)cY?wC?cB7qs@%8;;QaC|Z#lc2-~0bUN+cL$E{2xndJON_{> z|AK=DHgv|SnBwAMwx7!D0_7WTi&-%f!uQIMET3{i=j${~Vq18Xc}mHREj4;}mIvW- z&ab+|bUX!2rUP8jY}BKym-kL*T&gMlCQh%u-b%Tu7Pd4@0@8-12{3KgZQU{Rs!EjHl9C7$)I0?*X!}bg{-nRN`|jY&W8wP!v5q2} zvX3h5g-r1GiHT>PG$8n^#--+RIPWZg@=(7Wunq=cQ}M*!rKhFUkG^toxkx~vgG~O{ zOpSw?nVMSc^7A|3w8drCPKb-^7Ry&E#}qG3uRJ^iLUkae-_Y~)vIVJA*N6QMzo|jqYKYtYNOvMB;HlPBB&Pj6$!X%}5 z)U=QQC$ACDuqevSd}9um=&)bHY%O`aX(`1G*o9^oVyuVSBfh{8kJ6|}R8DP1keY}P zLk4;uHU;cZhWd*d4E)#W8QSaC`K4Ne!WwS*DeO2&z%N{X(ehn;oz6whul&%Q+V_Vi zGU2jm)_Y_2OMV|7QnGsWlFYVWYjC9|L*&f5*BWtMw4ov(+}8d3@42l{jPbHEj#+f# zZr;<*UkYG>BOyLZ`2}`h5`w{f@U)8BMwT{9ji)DdNO{&!T2h3ltLr5I%7d8I>bKUp z(&iRRBG?I9Ve%R5Wk&Y{<(iwAS%!{E4)0D=;Naomua_ERQ&pcH+%8861h@c;%yLg~ zCk5v%6bM1Pmxqk;~WDQ!y5E1!uiOD4!xMPA1IU z9@|d7bMQy9vD8e>I*bMJtqT#j%(+yFvZc8yF2z7Z(8+ z#U;YDAF?6owNABy@OYf3rP<>AY7AD7<8yAf*8PlKUdZZDdmrpSB6Wl<`ol_($r(C7 zF^JfE`vzhmY|pLc6DJbWXi{F8R14}3l}bpZ2sC7_+s9@^#}P@uGPZjxQ~c&7--^1b z(*1jXLx!qDLA^-{JVc-H}_hx%g;Cfg_?@uA?Yr$j+iObUn(x}2hW2jf5^a3n&;ligs=c|8mu&YelX|2dfh_Eto+-TYqW~pK5E6&)W zx@505Wb&OpH2>bI@tBL{7UaI;$nt>r=&45+*w{w3-zb__}(kA0o)Z-S{%T1CZhl>_-7oewipu) z&A|4;Gsr&?5mpr|k<)e&B+;Rjfs#?HtE-t!He^dw2z|e2%BBGfgwuAF=*`$o^b2f2 zLf{HSc;$O@Mpc6-4q&gkI$-gGSrp0RBaK3qe+Y#HQSH)F)BKlQ|7*?gsLgu9eJejB z1*@X2;dP9sZkOKp~KRE%$7j05oNoNHmWuDuk3z^O3E9uribG|}g2wr3&7 zZMfE9xMI%L&-gT$%>b=AxTKy8?a1P2MsULgwF+X~$30%1Ma({bm{ z@YwGm!6}ceM+L-+`dnSvf#D(J=Ol&+Y*R;`Z6#)AGGXCv^|ky0y{BhseH4d_WM>Dt ztIIR@umg@6$cG9#A}X@N!7t%{O>>bYx6c`sxAN_Z<4_UspvaHYY6%NDintkr+a5tF zDNs#nVN0csbQEC=dH`_AK?!&q95JvATpWR%oo-bIx3{;>yJObVjeoj@0|AwQ$MsOl z?C3QTV0S=T!jh6^z`z?cue-o#4DAgsxlYw;N88nJVF?;pI7i3Rc+fU@&3g%Oj$;{I z$8%kRms156iic8aa&o&9c-awRva%yHGk-yO2Lq2r#O2%mG?Clfg$h*10Wk$&UUi!+ z**$cy%^7r7V(aFzEw|UmP0WSl3#kkIfu6WzL4( zr4b_{e;Be`ddq8~y1cPO8aE9~gNU&=_0l5pfHJ(L=p?2a_|We*{jvX6M8iZM0qq@$ zC?m65DXZ*32ECd5G&es(sV%(+GT)i)WYYCz=uQ?ESO%_x48VseEtYso*4tBK}~hXlOfhc)UwP+^-+ug>@9-jdBs zi0>~qjZgxe?6NX?$-bn-3@A`#3OL?hcSFGdM*hmm3YfRVyy0(k>Wd)kf0od$i)HyB zSx>-bURYAn*4!*5En~OT>H=(GDC^b!-#FUAg~+6t)&}cXB7sq^*b*r)%ibN!T?5!z zpdtaVatYr85)!8N)hp26@P(N?E0o-b{v~|^VZ++W3BG~uiTEcA7&4Flp;S6k0l&;P zjlsxoo-MQV8skNPUpy{iA!v$QqVO?w6mismjV7K_kHE@jbeLl2I?W zr{cA3y36iC8-rbwfr3h_opPj%s{2nhO~$+oz)D}J_I>CHeI+l)9XpH&8p;+5xjSr_ zm6er^b}P3RBY1`+xdi~vXS+Dl-@gOcA8{DN!%z0`fcBKVGvRlzgG-3%^mu;*U_ufD z4#Whtq!tjx6nt|zJ&!l%PLkj@2?ccwTSH_FJ5CVjV)ReOonJPTew1)`@7~~f(p~b~ zgh&R7~#D&fSc%SLbIsv1X<=aQc$jbKiS7#%`M%0|Xs88w; zq0wgQY|g*<9W8UsbYpG7RK1}nxGV{s;AQ&}p4Hv#Xc;4* zgCb>V9XT>$QBo|i)`E3+?Fg7c2C2V~a&QJbs5Qogf^FP@Y;#X~4k$smOMPY8W#Q>F ze&0&c&>VUA9*E{uEuA?1nXWj29Va8@q5Sz5m!G})TqrWsYyIkPr5ua$&v6Y@J9#aZ z7Hv&&%W=YD+O#12bEFQL;d2s$QeB(%7yQtX1}YEzyIe??bP1c7CWpMfx}Q$gy9wfQ z&|`8%{fK>iUbUF*?g*U=$B=#!Nn{F3&q#lakfh9;+CRtyJrb5L1}KB_g%>YRyEZQ1 zt#+mc@GP~{v$zVQ7%+^4c@bo7VQj^dFL5u@d*PfR7Z2#)Ovx;WhK8^>XvxG076v*= zh6w7}>V=eOD&S3H(25)u*_%K$?CkXve5(0kYI^z^J!iH*1JcTbRFc!VzP{}r!6>LL zaUX#I9JC5GHv^_1ZNDK>JR29D=}oJZ$w0d8Jh$H%^ipdu<9>GPW)%BpAj?;Y+#)f} z)-}ktt|)fYViy+bhN0DkNZy9iVoJV8WcRECZu zEYS!C-&2|dltvF0UT@hGkGJ{4uHMYW9{M@ba0?K+y0g{Rd6Nfdt_gbmo^Z<`^&IS0 zvNG@g!+h*eEH&wxT2XP7ErCfHgqKcQ@m28~LyJ#<7%gnpskSECx(b=9>-v{g2QvBA z>bDGJxEAP4B_L;V6Wa*Ieb=*$?c|9?`SbWoRr%9gUvf@ z@?A|j?tE|Ax)WCW1TUs6>&)EQMaV5D_xU9F+Vq~<0u6bV( zt18_{t}d2Fnw46Qw6^2b-v-4hd}{#6cu0Bs9_4C;0r9MYuh9f)>nSmQS`+nuI0A-H zCntXmnynVrW zInWF3_ewmA`Ui(tnbV8(;*^QZuY~%oGd79Z{^c4D4dGx-$ zQ@|pcn^iop7tCG6Rk|9@fCeM;s{4I^0Xy$&DlvV1pT(K7S4V5FP?0>2_FKIdSi$%U zCo!!#2!`g{uPWQw5-;6;HJxCo{U?Ipl2@WfJ z#Q&Pc(^MM-?g>#tyUD9j)hAgqYQPJ^z03P=1cp-?eV$|n&CVzu8Dr-po@1ZBggqs_!;@bbJwQRujq9>0N`%1<~6`nL17}pw%qK2PrHG z&E|_g9DBN~XfG-nuigOKV(6~`F#tie(4sQK;!8;nEMR`P%^xNu{hgcy8gM`KKGNp1 zAPu89var-Q7ya)2`Qjmk^@Z-o#OqD9H)N{$0dV2R{$Go@`+n;?)$ zMoCG{tkW@lHBcp>uY2)Z>u-q=T~hhb3Qg67frme8NqT`UhpkXla&SP-*ozJ%EF7y} zqhmvWLlNzF%6Hm>fWQ6K77UY;n`!`QL=m)?-Jb*iarP5}K+@A^#rWvH0HuX4ApmX2YF5zq_)Xp<})hy+FFQ_O~iQoB0cp z!l;(;jU!YP`s^gStTeByDN#}JJ}Su9%JS$pk^;enf*!RZf8NKj?vatj&wV4y03hi& z)}JCw6il5!Ov;??)vKINp!|DOsq_VSnl&leuIK+;V0c zSwcg@EF-m-k8897kJmtv&s(y;qAZdbV7S44_c!_HoP~_& z;FvrLazwi~UEdsDrxURy@J*ihLLqMkqflG?e)o=9`DfW*7EzYCWPoK|_r9u!sO`s7 z3$(XC=oz%OwY%fxp3LF0fQNy`m^t5QW3|-#jcKbY3!8PSH#Dn7T)|TZcx7zu8d=C4 zUCG6_;a$G-g`$lCQ02y-1bkW|;&5jGn-^Y3TU}Zj6j9q;NyJIo`9mRB;c-)Zc2p2X zosBA}JY56`8b-u~M@EE4#)J<@$k6WwR$up?gBEVL`=suKGFNHv&3J~9Gd z;)@bFjMXGG=838oEP)_v z8Bpz`#U2(61?%!GB_%(hs7?=#MBPW38B8d_OJ>noEx}Qrp*RydIvzBu9ywiP40d@3 zSb!x_FCH#nzi5vuibzY3PbILNzMu%0-@YHtoli^qjK<%AH-#6X?a;TzaJwXYkD(q0iaeO3jdWF1X}d`zPFy8(+NEYSI^E}+z)Ty zd3l*FmbMmR6qpTsq{3oB=M*!6A^`ZApzJE1HzoxiZ|8=pQFvX0KLy@makI`occ?K+mOg?^;a}s*CvtBQJ_Z>PgQQYYR+s1S@xCD_hbO(VV zfYZ;;rhp98z!h>+$k$Bd04%4prPY4)8%>iN+kYxFzj~Gmku?I|F7q6 z_xLxKMl(}W8o8{U?d>6%eqFArr6sQ5BQgUqAj;K|P|JFX`^psh6gGbN8lW74Rg9$m z$)J$}mA^d7WrQ@?LT)Q+Fz*MfMxc32o}ZsUNI2!`=`asGY@d<7-C|!~;1d!eJl^)X z`%YJSJj#~U0w?aNIbb%fCv>(Vivz_fA}k!QI7$E}1Z)i-`23*dVa!xZ?0otsf z)Qzd937+otgKa4-IkL_-5sYWc<=qKb&E*M1QSu3W%ec6V_)R@!=2wZmyjO# zcDZiDfco0`54$See-^xgf>Lm|>r#LS*(FXJ1&iFE8 z2jEn~nEA`Y55+X{baNH~G79@ogZ>98=&OCL7$Gpn#7S1tpe}fSub_$dJo8c^=TQBL z2w1ZwoF2n3`wcmhCJg8q{`0~eRq&vLXJxGgrbB|hCm-(hzF;tC3HG?dzBuh@=hNi1 zS5Yr``x?mU%81jjk+Xh(&YsRXR{*Rdz#?Ih ze%K@N-H`)ervL4ab+2j#zw+tRuk4*M@;chv%RT7hoDNA+jvc2tF{8{?;O&-oDvGJq z`pK%pYmSpW2(oQ3Mf(Gq*Rsw?*t?MYU>LgvPeJ)nUrv6EXqQ01O`0bH@Iw~yVV@FgPx2FA7Zo`Ppm@#-?A;hi{ezID zJiJ{Jo>vJ-!KBz*T`D*|qaGo2$S(W9y(`^z(Wb`QjSLOQOG-0PQmL>3Y$8zC@AJ)# z_@DoQBuR~d$<@J2!phkSyyxul`5@T91aV?8zK1OI6^+k*+WO88??C)9TcYuRRIJUm)b@rKAG4!-SaszQt9)JLIA!mSh{4vepDc z)}HoFb#(SaE-DaS5g?&{2LivkoK@Dp+Nla(Y*lq*-^>opr{P=dZUaR$O?6chIUN58 zRr=dmX{dBZkF}Mmqo`1k8%ay8m3uH2B$yCHIM$@`P_A#7Jvt`+_NDt2vSUs|Q*wRX z+ae%)I%WY0@^wjo;f;aeavT3Ulk}v>wecG3^Q17WmV0PEDXY|vxVR6!ls9^^+Sss~HHMs=YbMfreho!%>3?5Kw}E(9JZBK))cKp6 zfH%WKKmdTaqOl}TN9=--!42;sxl_O#QwI1x`{irG5zREPg}B{_S*$+omCsbCk#qxz zBn?Ib?%k>FrH%L@2e6+n{!CMG(}o3|MTY}HW4q;g3etBGZnRd71=Eq5B$z-id@QSe z(F2QBH~~cKy04@JKa@0-#bF__h=XM8>_JWOqKSD`lI31TUDy2-eb z1gmCfF~RdL#Z5p`SO!+L?i1K_IBj@YjzlQKiXy`C0ACZTr}=yUE7As>(bGIHvkX%S zF$ukv!zKse_3>5B)^CaLkg3qndANJa z^~hU5^KpOj{n3hB(i?4mZ(+8*gSUSr$^Xk1r~_38QO5+65Rk}w2m0QRYYX*(8KGF2=@KL?|pgh~mXVB#qL%{n2ELj^Qz(o zE+1Ql?X;KmY2XYWu{9Xp4uCf+}?zb4AUD+2lS=E*_f!x8XD%FLJ;0>0(;%4UB!Ur*nl%?K(Rgcigk+$mGui1uB0Yqw_Z(9 z?wg+u1^R^*VFWyU%XAnE$R9#fgTGkHP-?88P7}cte3;|1@Oe2*5v# zGJK~*pA>!S@O}GpWY8dW5<(HVEAC7}VzWr*&V2kQkZ=SOARV@MC_bx+lRa&d2S@e@JIk zp7VR%TUM%*Ic8B&lm2r}k6TE5-e-8;>40XC4&@ioWIqNdsZIknB?BLtY|WJlCN%Ut zYPqHbpBWO`XSW^HiSN9nmcfA98-`6$9BAu zvbo$gqKs>6Q>YhuvhNi!0Z^ysMePKjZ zRaW-%3mI0QaEGrCoUJXsX`fX4W#RRgKLn7?M&mx?MxV3uwqqWzkcNvZBjz)&jBH!v zkzkY^hi!%McTSMz3+s*`>WG|CHzBA`I633}amlQkif?ev>8Mb8Bfi=eeAVS-G5U_> zgdon$&Brp4!7UjpS54yzbZ$@jd|Y9JWK@@RWAsBqCO%#nEA5^NvxXT*32b^Ndy@2mjY?wjJlHs`CpA`-V00 zEupqalvt!OelcRual63f`~j7eZhfuR!z9*jypTg&?l>oVGARL7{Da0^|NZ)B{%gu; zBI6)CVUw1pb!EP6zF=LEW!DiPS=MVgt-t#!lBv1@PhR)YEl-; zM6oGhi)OD;@$W-_Oc&0HCW0%4h!1}-K@b1%fxEGds5HeU zU^&zbz>lj!mi|osMp!w|Qk!5HIikZ>cZL)FXYQ?66PXf>4!u<^YweknlS}lNe8WY$ zH4zbmjn$rJAnJXW+Z)z`ho{j`FF(YR3LN088%~-zOx%vaHF~RIC&w`)Y+i|s4C?$Q zU_9Ip!Iw9CJm_bXuh5mZUyH?$6iwXMK&PTbyf2TCSvla!oX_a$*zoWFy8DSpO?5)c zKm!de-A3;r)Y8xPE1}K4U|x|K|8g*?O}X^}#gppNWeeu@q4`NjlJMl^^S|axc^r49trGjWD5xB|IM6@)BC3%3jxY5FPGKcKh;j zRuC-WxAsZ~Q*mWOMGXUChyAPgq&cW=f-4*f1l}M*&kag4*yU7Pmd*B z*PqI@Um3QrF(0oz{dHAtBHNdt|Nb+N-;F_JtD|!F-5d4j%B(}{w0CBeaCS|xT7XUHC`9@2Z37}Acp#uNvd6;Q(d)K67Q7EeBRrt)hnXdL_N zba;k>8IBoi9)Sa7*L0je?)DgcIL7kEvZMtELeoxQd^KOEY5lcaN7Tp6!QXwH_%=Yc z=0)uVy6yc)L7nYOIm$W~> z-KBD^ysztVvASD~j(f5&T+Ma&Z7S1dJo_8VvVrBzDcWZ9hMmCpEkEypJ|s1ym?M5B zn)E4LAQNsOFUhcc$Do@Y7q_@TVx*!>{L%US#*w~y#bPmqq==?dLdv0`;l>6lE&201CF*L98q)U^wDg}=Z(wK)mlw|0*M38eb-8O{C4*buXw6#h zZqm^-y3QY3)_L31HFOE{3&rz2-lI)Hakg3oto(u_h2J!P>Un!$mxU|~ajPyeH$Uur zI{sPGG>&lHS8{qKuBNWWI*IONIPikeEmRbpm98(or<^;f}i=Wag&~>N*{*(2Orp^wa4iVu`|dt|nnz`NR$N z>#X^`s5SiaS{w8E{8%hLKvI7oXymJ6PM%=-aWJlM_(%$7wQOAQJ+`MyfAxwNL#9!r zX6C&D7X##!1$P4S{S(4~-(?e559e;&9e9vq5ZYC@qjR+Q(HeC63v;}1abRi zJty74Q!`^~v1@L=t)wXvqvoW6`_+ zbYH{iPgh&ktU|>l#XJ>T-7E?#O8sLnG;c_F>?Q7=$cWRAO{diiG?-ZUFRa$tYCFU2 zCPd3>r3%cW;9t(zt{EsRDrzWi#&1ng@Nm2pAPa1razF~dUkiIdoF_XIzNIpK-GPX` zCp9GZbocWD_WNJml%-?Cl^b+_YCUCxJlziSx1Ok>?}eww^>F(lt4KsP^Vh+qHF)ZB zY=c*|sOsGGY`XpAHV~!5U!YnF>kZ-4)O zCxJ;`lc8KwtyG~ZDWSp1MoxY+iVkcjkC${BGfgET4-Ho2O&rzoIkeN_)PU zAjhEbpKdC6y5iRBPu!xBH|^#r`M#H#l^dm2EUPmrxUug_fB8CRP2KtR4?KJovQu9+ zCB=TGZyqfz@d5(jvEqln2Slzi%h)F$%p;Ii>?P!ivlo4D#;~ZButE5 z?NT1O(TBIDt?-o(6MoMuDj4sMT-2`2ag_-b$GY!waHg9CJt(gjd%~0QdskE9Zp^=Z zRPK@QnfrDl9c8`XV^)QM5Yp;aHFxM2trKIr@J0pxR9M-tED10hUr(|O7vCH7KvJ0n znRIcv&76GkARA0)Le`FPT4SIS04XdyaT`@-N?*$WeJ*l1`7x_9dODt8m2EXo?p3Q4 z)_a*}3yJ{Y?t z!@r+Wllv`|$Q?x0pW}CNlAF3oOm6<=8%JS!oha(-Hykdo)2wW3;yygLe=lXnV_F4K zJa}&Q7w;40x29Dwr>|9g=&)1JTrytP*xTp#J%y+VAQM`WTi`*=%!A+XrQOExT(^)G z;#($Yjg|hYpRrp|nO)S8HWZ8=7>U@d!ayhIddqhD80h#)P>Dm8hZcsT&Nq9GzN*NX z53e><6?tRVt;=mfVSGHh(RNKzp*tmjJXb~L*~?O?9P!yKlA>ULi)PT;3YTXOH>t0k zK%7EUo|l(ORFr^+M_pF-XtDfwypv!w?KLeni+?+}+Xg)rdT?Oi^intaIiU(Mt|ILj zT~aPeA*-*UI$CFPmg+KlCrMp;I4ra?q(uMgWkXo$G|p)OII ze>Ju|I-2Ik9NTi4a5<)iHf&xvG;#d+W=%KvZ$@ulKad4?Uv2MGxf=5a-V&**Ohfj)keAoBdhFpzrw7uV7n2(~Gp%AuzXYz?goed8pVHxEhNMB$7g7)VPMlAIo zNVo~P?XRkH5eP%A?*Rc!`(JbR0%=<<4in%0@U2Q3vkjUd`kvPPVexHP z>v-a0Av5a#8{Yf|-X2CY=8kBCGbmWFXiBs^p7D_suL+|EzRuIGTS#*ztucm)9zFF# zH%d4f3d*(&>I`R4OmHxjG!*=RPy4A#+6eTASSdYn&UX@2G)|XrLctTcyPeKgWX9c! zp;z&5J$pUC{>xL639K9}?WKvudbp=_P$b@B+VMpyv$2vutZ!q~sgH`@KE4_2E4Jfz zt<1Bi5&%;|!%lB9VG(bU>1_9&@5VmN!AyNS?IO$qo!om}F+r~l%B;I-Vh%uHrnMi5BMe_%7O zbr4Hs4dIsu>ELm-VfU=1OZ=G z35k#}L&SKzJtWx0_BHhh1vrt}D48GwS>ACekB-FXCa#s0|0_!NCE?$oRd}V;Dr{U>-9w@GVk#mOU^X zfKj|LQxdJ&dD0mY!S@YZzvMZUoeccKqGBNGw0E$_W~6-LPBI1@7NFsPz%vJmGmIVQ zx(*Kq5*nHa+Vdx=tadle%@!%Lgnm6;3+IK%<0FycPgxtW{PN=R zvNAe$Rw}W0AN@GG4$mtSi&=-2j0V5*($a`?!0j#(DsIAazIbsXEUu0cEP^gOW*QPI zPJ9)9b7gp4x3vXM1&o}Q^MXLA(ZLS0AwVdd^q0@zi$+RC0~hQhtK{%S6otl>H3D16 zYJQQ^^Yoq$3-?8|`|?00(%>Mwo$)}ng=n+y!@u55qWI$Dl5N7 zEHuT?%NWdP_Rl#Tw|wpnsoCF9;%gkfXY~fbV!~eAqcPUHj8IcEXl+8aoV9c%Q;|jn zft`}wgAQ|^_}sh93)?>Tc*W--lg#!Pl`?{R1H(q+ue*l?rpSH%kNoJ%;I;+p}i(q6{|J~|MY|c{7W)f#er_O!6KBx~yK`T%Z`TS+ee* zWo9H&kM4rVWr9c*GFngO^(|*f>qY5CAv!L8{vW&ZgU+w^47!tHJoimm^1PrTkrq(! zQ8P<+N44vA6QS^6X%z4+4SE9Fj7h#KXlc>D4wOI1C_=9agvnWYLSm^CmnL62 zKkyl7=T=8A<92!^aAW(OQ_jf6>$2AbdGF}AU9LR_N`}t|q^GICd*7UG519zg@bF6K zjvgCn(k0*xD=dWu{t9YFLf*-(h&qhtR|sbz4Hgfe49i;Xcp@5xM1n8Ml(Yx)Kuw0ezUL@6j$*!TLicvV=>}O(<36Ws+@MuX-zc(?5 zqy1>rU8T|^Qsr#aVHd0#lmSqVsjBM0Eq-w4koI}M7a`w#9)EP*JrzJk%0Ld{&ZNOn zkhaZHyT+A|Qm*-oztZTFTgNudaB;MNq_{C2^|`am+JuRHO04YfU|H$MCY$hyPsGRP zw-*?a9b}AyMKfQ%xUVgN-g$XLWl~fj5gNEv06#c9{80TWCnYfOV$mf91^1oNbC@%u zme0XOUyqeYMbe~==u&nMx5=v3$P(?qqJFROdWgS zrWdZFtz6frF*7?REgjx*X85>;G9}DTCRD=6XXIq8tPC_#vOwdy>~Deg zbH*${qe%3&okjw(E+tGsCzwqR?mDJa^rn4|n0YsLt+=Z6XE%#c?e9UOvAOSNe>EC` zeR-CZk`WTX%j>p3na9TB@3TQ6-z*EM-~3UgWtdRzpH*I7nQyG%q%!4r?O;qsUo ziQK8T`g;w_J=5FYWXqWu8PI|Y3@`~r&_2@T&>q?g9xXgh^yOAAVsdIzALo?}^eKRx z5tsaMl`)1D5JBd@y}zlP*OH5GW6b5SwbJO{r$c=`K z__$z_eiKU2PLlnL-z!&fF2Z;90n7Tq$Y-epA}!*ML7a-@4+IV$TZS$*DF-?GKl-QQ zwt8R^@j>Y61WptAb)?9ZFoW%X291)!w*)ACn3XuKh-Bb4#xvbecM45uv|GybD;)a% zJ)A)ay7^%-`tC4;`jieL3$3(kBfZe%pRs!sB~B--vf4f#6RkcuHD&iRApABYDbg*wxi?a

JquC=K0@(A-p>T(WipyoB9+qN@3j1xx}$GULpc!CEJ zsM5_m+sFC0E&w2c{#BApz?x_&?nk`r#y=3?O7b!~>h_h=Hv@hk33j@NMQ&DDfXK1+ zajklB(l81hEx|Lxal1GvBrBLUv*aFI>Z*BC$TwV-^S}7MA?EHCdbE~$`N;3*8}+3` z5IkK9cJ{O0(-YCqu;@?NdFH^Z`MT#N=wD7LusycGqQu;<#}K6wpp(cxrWxYE8xCoC zog&Pg2PU;7UehKMdwB4O_sMKZg=6CcZIu-$2zQ31G&v>SZ}#n?9t4_uz$#>8a+{sC z5ljny)gt!0<8;>6iAdw3prL8AS#*G7{0d55ikLgofdOkCZjcw74-EL|HAdCE^|!?E z*fv^ysTMrwtSFP2d$BzAWzd6dlI5)iLk^cci#D8&iqKdXpIz|};oUJMdMMdcriX{C zwRC^lR@XnC*C$3k51U74l4X3wXM^yh$>7^|@i(03C4fDWT{ zc5a^2g)D~($<5CD*<3R_W%lcU3;&gCqN;`lJ8L7zvq>!)yn~B7OcB&oK$c`@XP}@! zsQuRxGbf_`-r3C2<{`$hC_Oig45RK0WwbCyAz2Z>Yo*~wKX0?-vo_dvbKt)K*r9l z85u9`K4Imt#Q!CEl_5Fm`JL|anvz&An5t$Yrm#DZlsr6oG72}La#CjGJ_Ul|Fo=_h zb(*VUsx5X9t8**+FAO*>X z3nppGOiTOKz%oE{kH(nw&9F;kbl#`!c-8M)S(!As_Pi;LVmca30v31PpPo|&vGG$q z{MT4)#`7OcalWS$=a@d(D_yHbFv#wG-g#<{DBekHg*L zCnESjy^t@^JwHU?8d$at(-SG-UdTGN4a$!VvADS7HOq{KZ4v8f>N+I7DkFk~nj*!t z!TFLYl5zAOSwhW+vOY(*yd-o`7rpQ+7y6`3mK=Bj zv})dzX^}~1`|A0*h8lA0e#A}+X!md}g*1xIFdxh$^lTV1>TQ>-~UoQIzFz&CpY7VZtgewJkc%(~1%fy;Jwsh3gC@~@(x|nqgkap;waf4LAJeT{kqbKjEHH5%(T^`G9E;Kc*{8>$g^_C2CqEf-y3L^OQZsSZ0ls@6% zWs_nv0I@2ysQ+1PzBuE6nd(auz0$$m=8dZEl7xuMRGz3sV=AIrm4Uu`G|T#}EK<8+ zKKDPosesYArHr(K?@dDZroR+eP^15iGr_??!|(CKE~aGl3lz_oP@OauPjYU1i_zfj z1TOAh@6xH@fxC|e1@Ks3o>6k*FOdaHkm8$RL0zo;+_WU4{rx*FhRCeLH*g^+98xGf z&0_LZqf(5LiABN;O=m%c@MV6r5VHseD-G9H~CZj<9C?z3U&iRyEKnKOVG zFT+Kt2&Eva%!-uJno39%!?q8Pj7;0CE*t-{SwVX<*p0@ANRFBM;RV|cJW{;G*l(Ak zLK`b&I4r-rU!iMw@E*+wEfd~PcOtMD;j-^O$c^CY^LGd{*@45;d`ahi?j(!9XYtmn z3pf1)1ABB7C1m>toO82N2{=#yYi2>6&y@e2wyd0SgkpMfuq>!c8ylk=?G}22;AFGB z|C`EhB#0_btZ(#q9UiMq!LKL6%EIymyd95oKh6={U6L((&wbxa8jteW8FQ0`V?hqE zNJFr`u&Kj0#?Ugel((CGrcZb$rCM6}7bHUPuIs|+&rj`h9=lYf`H5P?;xCYjOfq2z z+^>9==KJqrg;+H&*VkZSqk~(Z7PukJu~$Q?cn$Rpx;z|4O*jO=Sm?H4)PTAAaJC}X zhYi8lpX9x1EY_uks;{5cN7{uF*cc*`2?2%Dyli$49q?#cU|&lPXdIH;f=Ec>aL zG0Z@*(9ElF1_)hnQg96|-2bBMtE0N=+BQ+T8|m(nmhSHEP+GdXyBp~aX`~yZTTog` zTBJK9X7fDn%zW=!Gk+2be&?Kh_P+1yz9Qt?5~z;t*1Lts1Z!$y@4$;DZMQ^mRZw&O zR|V?Y%?=r^T;M^;{U$vcYE&Zy%N9cC0a{s796{(H z$jasAw177`i2b<5=p*?tIvy-W85A)wJk5`T(lq6J^pp^&HhXi;3oZz>hl2V$dAk&L0ymt-D8DDm!feT+^V$U*PGDg>+OzXXipVgSeRXNtz^id=7r^1AIP2ZFbYGS zutg%8k(QH#hu%BjAWX^J8(=S4?(*A`p{UE^H5(fOf)k7vh$%sk*UpD=X-aMKvuQ&Y zFP@IQZ?CvD?F%Bfa`BT!*zQ4x@gTn$dhmp3RiQ;?5x$q$CG+2X+ou znecl{aLqIbF0-&m&|-QUiDZ{|rvJkB{fJ{8MRj(jNA((n|NCEhD{>ftUbxde!}ZwK zGUPN$2HPitU!6lc+lQk|rG9Wt)mn$kvlNNx?UE zH}L!Q$5KM8(s;YsS}-F9Xtw%YysE+BcpW;HAv8r46hPVc0(+`62CBcsgZ2jCPzNR* zvtvg{G}Om>ozUp0sO`7_G4_pW)>p5lm6TN?Ec!MBIU=C~X~|S!ghdmp$eRGH+?ENi z1RA~E9#Z0CH5p!r7H5VK)j%+Z*e)&EF*6&rwzA?ts~bcn59MMiRi{o;BsSTBb_w`o z<8>KVJ(fr&E|12E*0NPQp{J#-0HHJ=yj-WpS_s}> z-hmLl9^w`;bH{b)rJZcogwF~1rU<0pO(V#L0?V{z>)Ej*r$Rpjcj5P*YY}Eo$(Qn~14Yje~)j+Up1b zQb0q1NW-=*CIVZIT}-S$?QLNG_XQ0JpNA&{JbuF!84@^rsEvB!`=P_-96TJDR580* z`A8I_It~g(q!w#4Dsq^!I{)f%1mx%kXGkbynW(pB%(+U)9;pu75J3?kDe!tvut`zc zb>dPIFs$wP2#QdZ92smKoXwtWCxmrpD zg@AcmS2xkaGP(ZDBPb&`5dt_v!FHke(EJ9_%0BGdMhdI+ED4y-~0%#Se7J{sMED zo+7>oXi?EKmF_|{vSRmWR4WBk5KPjhnBJ1eujZs>D=JE(f`IeGz`#&~TuPEeS9kSJ z-}>ZdMb6(d;f$6TZr77ou}FBwzQ;WRIfi}OUog`bs{#AOQf^$!Mt)l zW5Ni_|J>sogl~4gGr2yl;POw*Sjb$No&8ye}Gd0XD?qao>_d|2SCBVym5_WXg{jXbx^#(>6W zkc4f&ywzZIV#Q`3p=jNlR4BvVaERoQ?Ln^hz!Aoj>DU z+q1J23pFVUQK))n;qAeg=lOio*a&nt3~$Z%<_{Nf8|FqHPU@skKkN69t3wVto}aHv zwbE(;aE3eMQ|70Xl;YaYQz0cgpoTgOai4E;vU&}zQ!k*325UY4B;zFx9wrWRdcR=b z)QO(~fpZxG1|u@56sat!LI@XmIQ0Ze1Cg7v0oK$YSL=vV>*xxS>NVO+|EQHcus znY)k4)8jiDP7Os3Y}IIK`dAp`?YY^1>n6HXlz#Z+9d!*sl;hg@S;GZaIAabz*Y5=U zmlp%9`+@caCZ?ulsf<>l0^%nt_;oOB#)(ny_oTLToEymdJd47uZw{+hi{2T#JiQYx zs=oOAA^41nE0YV8wE`J`V`zF1(YOm$BkQK4FfuYb3W2{o{!z44kp-7eq=@M`FY)P1 z;#iZFKjExWZ)@gLSzUyRyA`y^h1t)P?d2BvU8ba&X=N6Uf!MRpN+Dieqzat9C%XNk zJ!IY_;V7!^4zol9Cyfh#ejo?!UbH6S5Ca1l3QL95!X9rbu;34$pqm6L=3@2Ip-z7kgBocn416Kg#gXysyUgt`0Dd z*T-X`$deF=UpA^3^pYfFj>)3k0+aUOJJm1V*auG*49KAF7r?2yL8*ds_)GLk+1G4B=v}S zt+@R#Fmb?74!AO^SK;WyjIoenC+2NMDHGYCOi-sXVY^&mkStyECX?f>zr(sXBCP)T1)aeR4vnMY&OB#)Z#-BxxwI;UOnBO%1WBY<*h61) zse)_kFANm&R#6HMGqc!2mEpIX8o5vSkwWO$>uF%`^YHq%jgSdAw<4Gh7FsFD9^|=t z6Ua2)*MBa4^%}UfGk^SriHRYtEI)FsF6!);nDKC-8#Ovfu+X|QSov_$BPZT1tj^Qu z5cI9g+C(v|ebG#+%iGP^Su}-ViDIm8B|thOARrCYL4=Dv1bL?v{|!No8To}mZMYMh zGjOOq!7AjkLE1wp1tP1ccp%Wf$~gQ!iT5$-^L8Gd0v+{sHg!g4Cx34)G*#TCRoKg5 z7m~xn(C`3EnMh;N2;*-OtGdY=q`t!yKpSiQh`KBq%;fZb0z<4taYjbQB~B5LCNxs8 zZ=7~W4r7b!zBP;iH8nW1l>Jg><$W{#u>#>KtSFtp^yYHuhgs!fc@qo4_9mCtA(T4w z(thGBQd^X)F#nmm<;e&REOrNAz8*j5bDM?{eH1lE>z_wrCEZWP8W?arRkrgyqdyZ} zT59!3T`4H3UcTNX`55FxC6^EY9=r9s;|A-@0dcLg9;YY;sDvwpL-zpHq2f-@9Bd+r z&!wd{y7U$#*v(q$BdZq!7P{hwI>)(EU<_I@a_pH3QLuVJi-j{-(e9lKVE-`RzveWQ z;)Hye`09)HVQfnR@!-C^w89u*PJGqjo~EI!T%kE>bB=Y;ba2jBxx?^&2)vOma^!yo z&IYDCJ3A?60?Nq~q8>E+@f9#l8Gp{=GOdsFWHH5|jM@4BtMkBZaEg zs~>mcwU6Ety@2F3zV*z0umUybh3oWgbLb76^ z_N}PU_HJpXA5ptM$t9$yS%EnqWXns%YR=A#1Fz&BZ-Ay`w0#Yf-_9V!)grgr9jtU; z@O@nC+LMqN7MWzJ1OG%^No!fzs6|FB3&oa@aiU~NG^!#a>a_-YN>Ew7)liwge~#Oc znW@=~kWU2o;AdwPzX>LX6dH4~)ve}|M#rOjT-ncO>9jwEW`emDi<{%*_+;$mnTTe( zwhER%&gs>%--6F$F26srQ06O4ibt!Vd(H|F67k?LEflKybccb1hk<`5@r#X(eP?@{ z)RMQ+^@}y7&#l-7Am^y$TZO5jX{b%o;^AqK2#JhNvz&b|`XUCKv1p(Ez$;(rc`;E| zR?F9HAu0JOCv#Lm36;--1wdVx3{jh98Plg_)!x3uLWs)B$_`fZUq`z|d?uEnW1C;0#%_-z$;h5ML0bhS`yCnkWce2VdQU;p5 ztX%4P5~%+HCdurbslGNGEw8lZbbntco1(xM$EaT?VQ9E!@?+X>by<_WFjzKaOvb=E z8ovK|p{r;{YE3Imr2%rS+k#oykCL0aOw3#Act28A_?ueFCd42{I952}Fl}-FVlqYa zti>T+>Ybc8vx=`IoqYm(2R@f+AvftiNDsBFD5WNQ1t*gkHdceNF-I0h`Nr`F zHX9E=i^E+Q00hY_emOdk(>j?gv+H!7tAcU0W(5mtXRyhh2F~4Mz%;@Oa6_#P&I1SLis&trvb{z z{sE;m1glK2DCM2g%uMp5(XN?=n5eeL#E+A;U^tjZW&`U97eJ@!4XB#_^cr~i07kK- zw)W6MZD`vUmY0tJ``t z%xZBsN;PkT7kSBA8iwxJ^(j{-2zguGM@cRc@tu!`e+F!s2QyIM85DFp{QJ~LYPeNJI-SC|hSENlv~S-@mFsc_=L(Fzj}yrg94jpa z$Pg||X%a(G zJKa1vD<~*XH)D}bUV!;z`j?mqK+Rjp=gx;e`966EG$8WYl0#<7_IApI2_p}l$9(Q2 zcDAJhN-vp`X>CZtCKf(Fx@PHyN3;91t|Fr=R zH+S#uN@yjev?TFS7EhI76Ovi)!9}(JBnols^Hayb7IeB-bMH*_# ziF857rKlzr;okVago(f!NCsZWiTmMn1B_zdu_q6_%tjm|2c*LXtV%gN?s$Jc6}_<~ z%yO}riHQUrx5SU$&F|P*+YfHC;k9I@u?nDnHv5w3YGMQrO;}%r<>&WZ2cx}a(i9GO zeyIh%<8b7G>IrjmOZp4rQo9+ZX?TM(4acWqoypp-Bd3_mk%WPwIl|gdP@Mw+qoE9i z${1f4zR;hhf>3eYK7HC@nBUNmPx*!8vuocYCU{X}@Ynviw`iN`t9hyI>%7j|;OXxS1cJgpYa1Y$L1uBsldt3RxDtgTTe0e~#wK7cF{a4$mTK!TNr zHariHzz6WAxDzI)NL5+N`@6IGDwIqf3WqUMZVxnZN-%Q+?Z|3#&`sa}dCzb#_ z=U`m@o1K%sJ=667D7qB-Xtf3y5>F!%0p$068u36+hKZ>L_+a>)R^(1N4kDc%PqXvQ ziPHm#C`7!`+0gnG14Bdi0-g5O%V<4MOQL`VrLdin79^ar(&~ua@=FpUxHy3{iAu!r zmi+gMsJBuOqcX-+uAuqrbzzCgtQPG*8Sh4DDCzJz$clJ?NwlxPBZ$j-*oq9zFE20u zJnXXV{K#lAm2G21^Kg-lm8TB<$~C=5kro?0axMlWOUeHl*^=f(XR(^fz$+Fw+& zdR=KWT(ABvH8MKdnSiZW?X%*^`i$$Bl=Enl2AB$ZdW|-3YQU01jNW`=_;n`e^cTjD zysqwhVj}wWBw?!pwR%vI2?GEH$}oL4acVk+gQP#fM+}297toWm6GilaMyj`0Bkzy3{fS(h77j96P(a=H|OQ5gi44;Ox!F z$hegm3mHc+y|~JHx3RGy->M&UNR~W8kBKAU<#m6^;~YgXD=I3gS#7x5CiezYP3?f! zH4KT!3xs9O`dqXs%z7r=UFGaigofy&Z_Ou=3BSch!cnH%%+V$y$xuNww zfu21)yor1#4M=(@-4J+b#7i_42q7W?0Z0l86OK7iB2MasV>~_t@vn?Jq$MCOHL|kv z6Y^4Te9Hj?t8{`Wa%$#Rd9GDs_u;dv-Z%HxM|{@ThapgYFk@us=;$ChR*X7NztM5*SFl z7mH~VE)>8{`Mq1Mmtahp)MuDU;^YLSORn>%eD*Np0Y)nk-!Cd9%&$UX% zQR~PQA+@z9AeEO?HjY@p>+gJZ(b^hK9LbIM?eyyEBZy}*G&BsA765j|KsUXKLw0jh zAXcq+=KbCOZYM3Q@Y<)-`Q{je*ll>8{(2QJ)sYtZedc@L-d-&_rS_a~5#PvQ?DF;2 z=T1Q7o1f-uKHns3y7k-4fgXxTGWpGa=`HmHn{hD zIL$R6lV{#Yh8mUqI6Eqj`s8B3MJ$sDt$@biYCEw27{;$g0oW+BDT7oQn2`J;>D+zT zv`W0trG);Gtams44wzq;i-muqwEXbF6Hp*4kIv&c_V@7|u4vC=+R6P`d)>bRo=b-@ zB4?ZV)j64%REm`p){ccrAB95320GBBo@e5Z%?)DrB&yWSRmybE8J^WOHFa$rbZo~9 zsr#*@e0czy#oa=lf3AS@P`$YVb%l7KDvv{gf5JYnPF!xX9ltL6)wVSpO90Y_fi){J zzmGKeAT0jQ?>ZA;PE5k-RGgG_`oR$FI3SH?GK*91^6!a=h{!4X8-({$YhA&sD-Vy4 z84j=XpKgrOHkjs8^SSRxjaJE3Rc$l zZ|FiT8pJHq*wxh5LtDNp-M;>!K7{y+D=6%^$#T+eiZ9*v^Y4JBlE6KRTFrTvc8$#+bQfZN9O98V^b&e$yEq}K!?Gxs_(6f&zbVS|iEzD$!t0PfyetB;qh$j?d@$3 zm%ZI;DKhib;oQNm-e7)W`>$UEtMD@J5#C^9yPj=IfFL$)E_Vm*dKWr;rzB~=Q%10h z&)4V;&&9Ratq*naxgVSMy?%N;5fT?o`gZC=C>7UQa-+{ ztP+XnH{qbJUJyS!TxtNU9FXz7lg0*7Ps+&Qa=g+eJ{AGV=CD1ISP4A3&d1*pW#?pQ zqtMQRdqF`?4rtu(t*m}dtUZAYFW`{=o>>j5to;0Wl8XAgg|P@Qi)4cV7YbaVc2$G4mgm!%kTdA})Hw?)CDUtb zv@YO%gF+(sa9$$Epj8#n;jz*8H4qYHBA$5fp_r&+CA?XaiZ^_^SqBe#?d(bb*iZ|7 zR{FcvZ3}#UaMD=sB|&E_YxQT2LF>Kq;UX)4jov~{8nb~HHmTPPyRSeO3^INv3f%4a z6+@(qw6w=lc28>n0em&&pvgkx$5@|>yV#hRArPkXp)Uj^JK+C5=xA$G$l%XlBqH~@ zy=7%*XZQa^{N_t5BLzjU3-$i~r-@E!%U%KRYfyE?;B$@v#cG|xY# zi@lI{2CX-Vv+gJV7zS`AW8dZLhAr&6A@e0fq%j_<@UtnRegG-=|D8N?-dh#XjWU_p%hez|_+f#1ttPqOBd z(S_$hloL#%@BOuz$w(Oqo|g#)U>v<>_o-*K6AK#{d9?>+9u5mWK!2*l^V%AK|LI@e=p(-q*c;)#a9%SnK|;H(z=0!k3alR^8c z;&uAhcjr<`yF64SU)xYXYSqXFqW?3n)b8x;1QDrRR^(WclI3Hjd)q*Lsg5Ego06*GDa$eJY$$2&Ms|GIhM1|#E9$P2wr*zerkTuY zhx#o$dzSO9HN0wHL%<9F?t8w-ALKC?d!w>IinTQmbw<+=urU(vdIq351yKJPc6s?7 zsgmOo5K!3LuXoCTjYL+~>tS04#OZ)5*_iIUt4w?Q{Raq<;rBg<3d%QJZJ!V*i3H5W z#P>@1(_dIHAVKO;mDR%TaR3d9ii!%@dSl|_N%-7sp8Obb{QaM$;z=6%<)PRXFE5wa z-PCqLU_p3jSiRkKl}-e|_bw7agV!8A95n+&CD>!@6ZN?4!|()`a8D?tT;wd#VpYwH zi_HgVO#iw31WH0BB3BtYjk96Bf8i)_+q3H;`^o7dz|;9T)jktxG^ zJs__9di?FDi#L`n4k>S%q zzanCy#zx-zsi$&-VDH-yhgWlzS~R{aAZXTRt@AE895pyNSg6wg?86qS4Y{xjQ?Y9 z4g_N7H#cu|dsCvFg1&pN3s?#DF z>%)KyhtGh<3if?o|EEqD>tYjQV?dQ5CLs~>y8QhUI9%2h257@0AmFomU8X9@}EnTs05Np3eK!{w)%RR zsa#*(I)1>>obK`#YqkljPt?+=Hr3zW~eD&PCydYT66|6E)f7j9t9l!0I$}^EOUa$p`S#>5ui%UyPdfgA8K3`et zE(?@Pd)rU(l6`!EjCi?OXEID01~M!btF0G;CWL_jimj26iHQj;Z1{uuw6ke%+R4#! zN!{555N3gLPD(Kg6gpUAN@O&goD__VTL%k$`r5ca4oWGG#A#v!W|hgG5IFnA&!$We zD;x#u=t=E(Ja$RF zaQ{Uh%V*{LI6VeZdr7L#H~PanTu+Yw{NZ(&Cf!3J>h^hzGEZlvCqBd=UcSF-^JT%o z!AZb6B{fu2OBRoU3I&O~c3|#NJer)?5B%U*-V_i1{COWQ^i*bN#S22oxonro71ZB@ z$PWSzn^q8D_brDno`|nGUyVm7JS3ZETxjzdgm@H5#t+5bt>$(;^jnTNU2@s}{N)&= z=MTXdFSStL=<|Co&40ogATy?OyqL@ZFUMvKqwJeLkQfE(R|#S`i2`-FO=A<-j!UMq z{$kpcAJjW@l~V`ve!6Xi|B4KyrN3KADr#tGs<6IgXV@EV>dsI}1d4&B$jA?WufvYl zG_HBW21iHte+5DY8yJ9<3mW<$YUk%}U%97SNMuuxjP%oQsD>5^8M(z~sor!Hqj9~( z*LUW}4}Ktw>aok|Z8@@tir1*t1&vs+CJTDZ3cB=ONLW~yZ-aU0r30F>yt}(Q*4Xoo z+;#TI<}|2n!7gRUprt!sb$7Yc0B+>q@UYlCp@5gmO^rdQywF&v;>5SGJ+Pg$%27tb z!+Uyqo{Ec$|KhA8cnz;I(^uFQ99&!b7GY4_SIoe`Kub%jt}K?K;Nm^C)25<}3y0tw zq6bGwkkk2c;&%t~tm}V8wvn%HNO$s}*>s5O6LWK$x8v`&69#1=zPpk4KpvKec0)#( zB1bv@p!`vAfBsqRvOM{(SIf&Z=078m)feNDI=-n)i+GN{=JKvx6+QZF%;K|$bCI`@ZdAP<=`Xr;d- zdjB@@7trM0F_0MLz5pTXOKl*2=I>%~H&x-t+X6I=i&+rKNTk8Quq5iPHa1;Oak?qq-jIOz;fg6hh)4#ffvv_Gj}+dpk@jS(^tR#u>2xtO&hKg^wvxBMk*~rEt80Q#v`V^7J#dC9W zClUwo zddhwd+fYBPMC_?I2%?KdY)~ZTaBkDx&G@9Qno5I_dtfGGx>E|ax zd>p7oz2{Dr#bq8AE;dZL(nfK0&1`%#nct+Pxoe*@%=@sInu?8%`ngK~c2WCoU~KG9 zSxd{pSn4g2KZv*mUFY@w>fgohMHEso%F6E*2TEQfZJDMV8!znlENu?pN1Q1mBSWd( z*7h&aT?O}r=+2xNu$4DiI5bP3B7H{_EiInc4GGTi)|_{wf@F4gQLdCGmdzGk9&2Yi zCznhp)2|K|&Uer&j}{Y22T1#gcwL5QwCW0oPknK@d^j+1gk{5*TkN*pW;0SfJm&H7 zHgC?8%@U7w0OcCBJRQ7haL}gR`q=p7Tw{w?lH0onAd>(NF+dK{`dJQVKgzl6W=cXq z!S?oau1*5wu(n*(gQ;B z15f6=(f}EnZ<|adEmHdO>au`hM%7=L9jG-yxDogjC}`L(P~{%s9Ng1;FkiWXGW6AkS7R@%XqTcDBXSuxG0t8La%j1%i zSWz(0ysf0&k%r5G1|}h6=;Q>=i;KVM*?y=82{TV*wcrLxJ{Uqt=K;CtYTI8lhR8LT zu=Is5@0rsGQHfV};tV5GzNFAYhctASN*Zcv)-+1NtGpOMjG=pZ!Mu9)B#Oy7koQR? zRmbR1U`2V#u=1TWhnkdXmGv?=pF6L|`>A=RENP^3{*Rlqn{VK|kr3ZZ^fJk}Rs|3q z&UY=mumn%v1qDGW$jk5Z8_E3tP>_i^`9?J zbkMSveR8IT(P+617GL5aKp3+N(4O16e;2j3v;^>E5)>Jhx#z+$JNt|z4mD1SUAc3z4#oUgs|BEaoq{z1*eP-C~$nV2&)jsuIOW13%6bKKun;q37zLi#baL;P(dvg{iWOmm_DJr<{^KOSzj)# zp1cSnj~^$~OisTq7#3Y{*uz@$b0^CCDyq-Flaaab5uap<<5EdjO(}qHnL&%sB z%w(xV6(t7Co&$8{KAp!SXg4&Me*WC7Cb#?ZUA{@h<9YPX12oVG?#$ihWU2~FWp-aY zJ^lsd-15?L+L_}L0hf(-m-*xpyG7FiQR&Ed_WAQtFSjSdaY3zF^ zr=@AAX_&rFMLCvER^`N;Nu&T$#XA~U9||A}yuQEs?QF3T9>@B7POiZblc#riN*(b0x>}v5&^7S$6A}_WJzOZQ^SL+MudlTzkt~vi z0<6OVv|d+#UgDLPxojn4i4A%T1&-Sk%hnd`*A7=3_L^(oj^Z8@aabl~X6oweuYlgb zeywvf9U)!7dOik^Jskp8^W$U|-y0?V-K~queKpgm$w}MWFCPGk7L)TU5|x;LvCVoR zhtD10AMLJ3ydWccIG$uGmpvR?evl5Uva7zK;pS*P9wB6SY02|X3LP>&M?`GwYJ>Y* zJE^dv!$W;#b&(IFU0AH;^;V_%oC?_JFFYzV4O4|=$^NjpFr?JPwCGd8%*(^S)FChe z-utc_C#h*!85!sIt}Anu`b1)7h1QwVeqsZcBrIQthll(63PCOrh<%$WReClVO`4rm z$JL&%+81;-_wnJ!*R2=gwQBFM+Eo5D~ip)^bx@Ti@%Iwih5;2JZM*u=(2G-`_#6Y0&8s zIhnq<(l%o;5fK#yVvPT+RTJbc*O^G+#^du~Jb`&alD7l~8X81(!w(SjtI?!Usi{m1 zpbEVKRfJ=01u??07M9bO~}E; zMchw%sK&5ubK^L!`5E98R#sLZ7QEm2=;+p>QS|aF>PLxV>`^d`yh27gzP%^sY;Cak z2D&lguEW7>*Hx>1E6Y}%;CGSwxnhKMQ%_4H?1`6>+{6p ziyzZ#MTF2k8X&tXqV)f*>A?vO za18!8ea@c^8;yZw)-thPl zRul$b+)xFD7S+_C2n;}(ngSwXKI94vDzj-l3)}VaD&O|lu3n`Q3k%CF&`l>|dU{4$ z1eRA+IOV}Lk-be|qD&F{FEDs0YUWM+WO$vudg}ndgP>X zJUl$jR|jIFK-{TnS^&VEsZc@@2}Om%fM35zwbBHI4sMxb6KO3!0+M2GkL%hy3;sm~ zMI9c0{_MGPY(RgR$iYzE0J(4Yr-wEmhMqRn?e0s3LDuqP0w8FR;;#2998T!a0Y z$gl@l&&O*W3jlXw(reD-p87)cI$oqodj)Cm0pi`T+U2Y4RBTnA!;R&XpH0>AciDq9 zm``}XvtqGw=OsMUJQ!*d5V)Dbbt39LAq z$TfBQUkeV?-sV_HX(upZw6MHe1n$f)zRjwlCY~#wzdEu*j7%fh!6QG8%~vPHawkkA zERUk0%HZW4QgP`tb1l}RjW9WsJUT7G_C{7Qck0}pMd`2?RH=>yfV(5yU_QA!mG=y` z>0<$}%i|e~FImfAzRT-!2<8&LaBs>GqzU07z%;M}34gZRfNw>$T9 zDLOIn<)ZCZS~tM{zUa8MC@CP++}sQaivmsna{y_-z#0y5ii%)-(Nb+B#KaU!q-*uF zaUWDP#QYx9)!y!=@OTGHm7u|Aa6AD^WzkFonSd)KCcWqVD;u}t6&do9{*@IUF!(q- zJL^FqnEz~V#9wFKVrN*XsWV&FtBnP6V+#c_hfZcWpeJ z#11OBh=>S)aE6A5ceJqw0qsos-T7cp z-xC-~VRgEntOW)JN`DEgW-=xG6Ie%!%ZexgaKO`pJ3virJUDbg04nrH*m$Qwk-Kd` z1XWQ<6#ww`vD>muoL#g+tG1?MNE)*+VR}0`-{o?jkCcZea+%=OCg$d+T5Tz|u&A8< zjmV$WX#*eDyK`DYv7MzY<`3_bi2o+Ozk5}(R`*y7WG28lD$JbxC!#6_YXps}=m5$0 zv}wHon40uC#l)bod~*n0M{uAB;k@@QAYhQ_0SnfH&IR)c%a=y#uHu&?o%eFziS`bav>)-so1TVurDe4RG1ghid!r zicU-d;Q4W;6T`!wL6tAc&`;*&_jsj+3)SWQ&hNp4`dSJU5efb;6GT^6FtZ)OO_HF_ z`0^_fEcBH&6IfMm;HCr$QY$?jiO1G0IY)%l=SblPQ0|xo^gKKJ6c$j4ydDSy4*74U zdh*_=p?o}9as09UQC?V-6tUg)#O#CJ*Eo0SSu=8se+iY1vm~=I)wW`w{G=u&6Ndtb z51Gho)du_BuTW#iV00BY6qKUVZiBt-8`=__{$e|If?M*SEHjG~x&vPJZASV}fcAoV zvM#2+|9Cr<18^X=k$fb_e5KxeSZqtxpv9&GCulKy$(^)PCpf4m_`76B>DUm zGy(!5v{MW`lM4H8lWV<#j#obOSV_qn|I- zgTbZ0>7wPr99FYXd7{Y_j0(Z%xv0{cBW~=4dZR2n^xQ@XMiB- zaGHQye~s+Q>DQ;Ow;YDXacDzX-~F*PoTf!nuVv$Hs+pE0AO%v%W{Awmn~7Pjos}ok zNW~J$++38XW3?VBgEI+?%1sF{nvZBy1GpQRZ5a9%Cv|F%U(TCR$_`mP))Az7@#a_&`9xt@56^0g3wYEZ(SD^eW@#lZnrZD97lyn( zEA&MN_vwuqnzw=@Lswa-t#X^A_teLKyaGtwWq6>bSN~3he(F|}C0G*u1T|&Y`2G2+ zLtzM-S)Dl>7!t+)S;G-c1%N~dJ!c0=BUNExyHlCKh>dVMks6_7 zMa4`i^9OsqvlIPjYfO74xuDuieN;=2`wk@&)1-6UaIQdyv_o&7L~iayWDh+l4NYZ5 z;a4bnA|dPI$O^oF{s55Lh+^X6{{Hg^SaTp!Ge!Zm*2^eHK0p<(z^W9XJfpR>6=3f3 zw7X!_$^pVlOFiy2U3zAr9RPbg=l}787xfjou{?eh?!d4`HT;7UeaKYueaiHUmQ2+GSV04kFU z*s?);zEEkj-mXaG0lKG*w6r&%asxFWaM1Z$YEf$ldUYT>xZ@V^;D}oFjgT++F7MJM=C|w-+Faq%~7~}9pS-?g9!3w$s^Gt(A57yDJ=;(2O zt(xuKT?_(IkcmA`?Sb`m;rAI(X!{_?v^6AlgNScKc7Vo#vdx!~Tg0d)3ZX!@g z{BtEGBFzV1BV)9%b`jW!?^VC;=ny2oNt(QsP#rG3&$HfpK$K>D}Y$Be`bJT$a~>}-C(1#8RAH5EX=1VC{(DV&RoOF~d! zU@thojk>(%z!V)cVg&g3eK7Z1fW8Tej1M+8F#t%hMUO1r+}bMP5~~5#hZsy&z6mv; zI%SQ3;M5Q(Q-Tbi^X-(!CM1p`SyY)H~;J|pgyu4glS?+s#t0Ds!H!?CJ zhJJu0O3TMA!ZVpC^gOZD0I;U%g@s{*5lBH`cp+%F!VMiN+c7F)7WC~kv&cxV0DyaC zzD(pd1dNA)f$QDH3NTyH3glX^Y;6eeB>SM}b`ihLn4Odf9pfJ?-wHt;@7Tm*VV*yh zp~cRjqLN zYQ3Ad+LOqw;f)0IleMl7&Gyev+1V%0{(~IjQ#l>(FAoN^&La~5P(q8W5?w+I2521k zO@g}~mosuC<-!j4V!9f$f2sa zBA88J;C$#AC4?S9cHKC>8Gzwxc_NKs@d;lYjwh*sdp}=%GY?It;Db_W2<9Ees2my0 zAWuzwaW~~o-pFrlU@t?y-W6;}H!!D3|0(5_bU`z@5jOVbEl=dr6Xz#A7Vn^-xZ@|R zyHnA>DzXUt&{KVwXZo6|sc;lwsNpfBepFN?FX&u-qxiBA04k7?kumH|OtuaW2?5wh zj4omfu*Gm7@3gx74G@V$T&K7Ey`Xp?OO#fwHSB`l8a_BUNG3f5N57q|?IS>i%6?m7 zzz=|1b^4)S9|oCRKoT&xj(#>bS^rGA*;fPbpfUIcvq3i@Iy$S@Y#Fse=2wVBmZz&U zqjs15zMf|S^o@;MaTyuaf%RH@BQ>>|1hgl|RxH_6$3wZc zaa_@2D0@-8F-#b{u~i0RXY3 zRZK{ENA}EBFw*j1`72RGn0%0UJ_Kp({%jJ`Nalic>wJwr3UoraUwxXv=IJ`t+I}@? zS~ky?)iTkQwy3hr&F1XO(zVPGo!wm~Vv>XrP^C+GL_gl-xy6T;MT3^&>*M3&>x;43 zc~eZWle&K7i-rB@__$Rb_Mydx5tRMj;0F#gq`jQqNT~PgO{ljr-_-N*4$A4k3^?H` zc0x>%3NY%-%os5&n0TX4f$@z`SqxV@X(TB|bA6UzeF@%C*S{_RRLY=TOb1GMt%W6v zQ-{C@L@O&Rt8(}*<>*pBXbubhhWemdiC9k;J!c=kgw2^Q#v-XdBJ*i3hQTQy8~vs} zHDjSC^Wa zI_6A`^?uXx~EJZ85&}%zQ>M zqi*s%X9NUOP{*H7Yh;NAoSs6+@HGy#*Pz)Z;$^g8`Gk97Hl*bDtkP-ueG2m!?lVbM z9}wspe6@Ugt}wXMhS>kR9FD_V1jI<(1U&xEiIaSJiEpD!We(_=F=zG`r?g6=*(pM= z==EDuCoe)(kT@^=91P*7xHfkA`l1# z1qC9HXpvmDq=jD3-Qjzhp^NKt)F{11zQZuf>YZc(uTDFxI~4MEB52-)ROaxY=0eD? z;AL_m&&oe!ywOR1&B#KAE=Rq_@4u_$0^4oC7y;y7JRW~Rfh(bmSEn-sNf1*Hyr%Fw zP-}M@c=67&)38A3GT1bXt(|r*eiF9}7`b#hUg^h;e-$9pI=?e6w%SNtQv)B2N{JQ9 zF0nCiV(x}Possny0A(MYd24huCiJ5?R9ZIuCe&4(i@;m0$;k|Yn1jRCnu#-S3u-$t zYIAH~=IJtyEW6`eTxM1F8)%oT&$uzJ{6%bD^QSOHt_;lNccKbiYWvFVI0J6!tIHD$ zVBTFB*9aRZhF8b7JO#3R&pW-C<2OhoWp#DP5O@|DAWIyk7+qbuC6FuYkV3(3aYg0j zq*}SN)swAJELg@?-MKS8EAMpl+lF#3nvCC(zW~^salk_RV43x_cM`enx@Kl(F6r}& zC$SZy`5O>8 z)&E$2SNT|*tqIDMN_i@T2^cP2iJqB3HC#>Pg^*npQm?}^{92ZAa#u^FW)Ro+l8nBJ zu`g7tLOQG9SLa`^S=lw_mHw;^hveCKw3%-9E_8Or+THVLcH*ekeN_&NupA7Ohl(zr zqdJEwoel=a80^eUP7nUFuC!wZRVV82h*bsz?HQvK692e40W6;jFYnE*tgvRk`bpy1 ziV1iGOA5WCwrhC!GWfO@RlOV>#vF5UBE#1#EiKcMQc#(Y_hqyAx=j)u6!lI38vV&_ z{Ax5Vi7$JA&377dzuwcN*eP{-yql{l$qF;043E+}vM;H;pZ0fFR#sg2Rk4JgpWXpp zSNC({2^^dzy)Aj{0@4~{;I}QfG-2>y$zT@nh_Y+;Srl?AtxIsVn?q2>;|Y)NZgK^U z&$wH7;RBIV-ap)bzOcC1-QS&~AEYlvPB^bPQ{%7(>HVw7FSDuX>Cp8)sjW?Xx%?MI zZvG>f;mlP`GjnrSMk;UB)O^aRbb`xb$sthOv&Fs#?jA$`=#mo8NT>0LwPhsF;sgGu zwR{Lb*{~WKk_o?)lG2`)KFnKvl`Af}=q(t8ecee_@D>xK)f5kkg5#x@U3D9SR+om7 z&t&%8`nhTi>-i&#t!*zM8t6ao)RdJyc9Ic>K3w_SG32lUhdA<%HW(Ipdsu7|`pp?k z>Y$gHtn7H5mzAbwlSS0lSXE>M1Q8U@HMP-8ig6mnbh?LElwvECM`Iy#w>3kfMcz-) zpZ_!lE-k2EQ@VQMCdHG$tag2-?AC#e{1%##{G9b|O>}W~q|mL?%{cwl=yldj_DJ=B z#Zee><=Kb)XIy{7im5=IDy)1V?zmZ_v`zs-cq{+!;ewpnZuXbf0o60pbS3yIjfub6;?c2;(_xTKyJp>}IE{+Dh zX^GswUuTt>PoXT1w*2My4lf~b2h=UC$iKNg3<|gGAKP1`ww^%KZwm;R4VyazLR^nJ zE&#iOoZ(dHAPLBY=l|*ObY^lA4Ck)6C(lPm+patkFHm;By8?S~z|t{;Q<)B=6$D~G zO1BLfx^c6;l$T{s*NI0NzKzSx&e*xD{vo=vNXF<1D?#f+v65=JiXN|%VD9#Y3nAg% zo{Pb`oHX?=a$GWo`TEGHcG-v%mM z|ERhM1_e@+-eF=eiJaCR_|me@agUYPOBJrl)dK_ii);cxO3(BkW}jl}ZF_mNb6nZS2h0YKrF#--6QrMQnD z1COSAx%D!r>lDHOB`;-yp#G$mM7XB}0 zZ7zE|zMCjK&77FMKfu3{bt)8(XQH@@%`;Gh-3)~MlP(@06^tz`0RmcLDCDltW%t5j zLV{A{>jj?2_hn}-l$Zi9hCdA==M1zXKgVf}*`O}uoAzc)SrNZ4H9|}y4VqmkBH(^i z1n)FFW@b5ZA}Zna%%kn(BnF^08W%NB1dK*+HJkZyrG7T|xeW$UTB#+TOKAEnPy9oO=H@kKn#gh`^r0qx*i;IT$u69bHDxI;- zS1PHkwCU1}%sl8RKj)A+?t3?B-Q2AI04vIW7zxp`KL2#Bh%1?sZSl$2xjv~aAi>|R zNUEK2Qxe)oO1S+t*!^#*naHfu-E5pg+`MRS0OakBs(2m5dXe0>bR=Q_`|j)>1tt?XlOi3muZXCA2wJ zXIq!>17Hh7is^n0XfprQ#yuK)r$Wau)RiS>a7Vif<>x`q-dHRo^~~}pMapxc*C&^GUi z=$-#q8bMTC*1o=VOf?pu5;Dgd)S+pp>SMZg6GR<5?$gY;+%ocxcjtPmf2hAmPix!?OM!O`uIu_f+w?lPsHY%5Uv3{e*fz`*vC)IN)AJ+W8f9US+zQyyj$0U;eiglO`KBk722330J GWB&~oY(3`y literal 0 HcmV?d00001 diff --git a/tutorials/nlp/images/spellmapper_inference_pipeline.png b/tutorials/nlp/images/spellmapper_inference_pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..07d85d2e2295c4b4005ab33d26fc10cd56936a39 GIT binary patch literal 146148 zcmd?Qg;!hMw>BDDqJE$;3PrBGZ#pg?i=;_mLnifeFpm*Vd3#hu_TH+_HSJ?A@j zeD^Q7Ym6i#J1c9iJ=Zhmn)0krMR`e7Btj$r0Dvm>Ra_YWc((}vyvcg^270BYeLfue z0b{Q$DF!GXBR+tBfi)4869oXOqLH8U;Go|Te|%N92LRBz|Gr@QZ3+zmfY&@JaZwc) z?c-HMftdqRgtJ9VX$-S`VAa7l>1<`WT2swjF&a8=NpVVZ?nN|*QggNDpL11obU1@k zOO;hXC6%1W{K$NJDvZ1cmDlxi6!wuuNIN8b{^lv;eiV|?31J_<^tisiH2FL2|J6uJ z@E-8~*A)Q32Mv}L=l^wWAND;E=6_%Fkq7+$x|@#Y%M$_4{|H6Ixv%Wu?f`4$@PMby z-J5c)fLpcj;p}`%lIP-&%WjUIIWmK;z$Kp~)2T^a5QpH*Q}Cqt6y1 z`DDl!>*(BzzBS5y1e*ceU`^#_JF_@(08fA4vfw`E|3xeS@JFU+pz+$KvwKr5ua~ky z2Y2f=SQzzpVo^mdtYalHNZb17iim5IFJBFZBw9iC9$-J{WB#Eo6b`@9_2~d2$Re_T zVGRKM#tXQt-o8jEt)*9g#Ss(*3B1iiK#7ezC5H!V`!n~p+^(qQ8R00M&=N>!j7MmVPmjKpjF)Up1J`aer_~ zSN#@Y05$$sVq8oj(Lw<_tjZr5mGe_iTPbzh!6P+Fqmkffo0WFSIR*R@4wGL&V+^Cd zY;Ok$0nKnBMSS?sJDmALZ!q!ZUr0|}!UblA$~>hqlqe8$%RQ%>9n!V6KZKRS0A^gC zBJ7`cS4UD`?ZRX#2S2H}YL)~-Wiem^v;Xja7lepH-)<`(vD-u6r~u-*kXucGJn+@{ zY9M=`7vS@bw78^V{Rz)~`)Rm`N^b9ncPS}G0cN6C^#3k_4@XYco%tsSm3~u&8*|3% zo`P<$>gWK|oPA{4&_T5-zo9!;FaEi>1b~Cld!QprND<%<0u*2Wv%pya{Y=(jWt@k}DBq&gD z-2ENK)K`07=O2UsL=I+7*LDm@PP2opG3wx zcT{=H*6;m*rF<_(q~A*w839=*E~iLtV@%q{*0m#D@1U4V5Gn8`L4oR&&wAqIfsXCP z+P7B((sfKMa9<&0okuQ;=GHKCO`zHCd4Y0H4y^(9@zZok0~^Tc)I)ICtlAVFpaJu* z_D$FyBMZFv0R=1vbO#CnrqmWEqgAh&JaIo=LPh8P&gzehvPWj3lHLbFU17=Ij3biE zyZQl~{^UXCkO-v91M;-Ej`$Pvj4Wef;~FkIx;OOt7^(!eN*p2Fo30{%1;;a z^s<#*onWOer6z`yf5+w<_}n*lE$KiU+JCrS-zWIxI;-u6O;BFIkBI;ug^2*{TGam* z=qDzf!LFB%g>TL)^GJrMfGok036(qrwQeM{o>a8IqmkrquYs>p@?}Yd@;hAohqBV+ zcYD@EzW|mHp{nywyMk@H{jf69M9rS{>fnf@0$pV8ZK9KlgyZ63bm*V}4mtG;Rh%VV z+9i(hZ8nuzHzV2%V3iS?>bbmGOCWT`0+p{J(+(;WSEQ#MCPqt>AhSm~vF`=6U=4r< z927bKG`Ba^pni^Pn+KWeJG~N^eGe>Z4?zjr9{U@aY8-OtUDS>4Ysj9~yCipT#NkL! zYlz{=8N(Nk3qv(;LIPhkd+16i=2%EZE@Fwyu(;S7m&pZc6tjdDqg>Nhk z?Q$G&4WAl?_XB&+l{C^WgCVpS#+0eoKkqmZOu} z_H{ZMgdNrhYt_$$#b}1K)14|wFdwbumUOCrwFTbWyNZM={WAwfBw-r+))*o$Ivr7GIB-Q30sR2|HC zyd<@uZ6lSW2Aj|mL@5rtZ~?D2>{(3djCihiB|RaWz31RCKrQCKn(kmjD(vN<9`Op| z{i+%S-4Gp67Jcs;D#LXUSLXiuJ|gpH=aYFgOkVc~oCi@^Ew8zjZQmlO?gSKg>d2b! zFj6qD>Rlie^9GXvG?3FElI~x>u~%#Bic4Mk%YldrlN5nu+4|Y}xq6+~8UZfzL;QU_T(B_h+>XBBccM9<{g+MqowzxswVT?2 zW155D3evcNI$Q(?FP@KF_Z1+3i002kqLAlTBgEQ&{GH}3=Ym&^7_M_e3Zr?{IO&Su zo(_!3p|G=}`o%a!8{pO|TO;bY|KpW0WMeqme2$6VD$rV3rO#7<#1mEUvFPlI*XiU9 zLZl9>ByIu`(2;(PyuQjwN#LMLstR36U0%rxn=WiZM*G@A1vV?;Umr^+fZsWpsO?$M z15KsXJ-XN}OpWr6=E&bL+rS0Dm(ohS7J#7v^erzbrMb^7XbFAj=~S-mR0g7!3ZVD1 zo|6)isU)%j$y&8gBl9;d4WdJrf}DuZ`wXirI;M2`PyB)BypG+c7;y`PpXW3!=r@?v zl(_9z!`ptjyrCTqUxt`}Rok#q_)1tnC^$Q227v{jBw1`Nrx zIGjil5eL8ggC#2z;~x{iob3;)pX7r}cdS`mL; zDj`r`#5u+IowyZn9QjKRLs_rqF9Lg)HWJ8GZv~}29^~hKsT=<=Bdu-^?4xxp+ZM^; z(!Bx|tIUMuo6|2@GT7_Ji2){s%@XQi3Zt63hq3SUxm|~1TPH0*rO|jlL8bA3QozA& zMk}6Vu=3JU@?Cmssc-n}Z;rZ^!w6mQHXFAIL?PH7>?miwRN-E?NEz7D>Uzox67;or zJ^ZX2+T=&a8xO-a4mI-p&2)^v`NX|_c#0`o>1Bt%e8IdFlV|h(F)Pz$*EE<%UNRN6 zkYT4fSI+JO^jvu|z>~Hh&(p?~K;G*;)d{V5-XD_DJV5(A!QQBLUo~&Begejouh~W~ z*4`m)iFl1VzqzR|~g?oQKTY_W7Z<{UtR^y+Be+FI*y$N^2 zbNMyD_=o#4YGb*T&MEu%JMCX%Jr-5{82Cv@wFLhH8*hWsmh@h4GR_mGe=z047S3es zXX}qwcL*YqNAqgR#X9TI?Y3h0uFno3%i_;2No_`v{)rn)n3BjEyNOY6;QhBr6GHga zatb=w_*Jy}RNjd89?E?~4P55&KPxga?`|Eq6sI`OWxXQk;8fJA&%D`&>1m7-Jx3}z z-QcO$$#ZDsVYYIkYdTSCo|Esi7Q+1S6;`X(HSRNH4KRrUY@!P`#J8@y5?9Iuqnt8} z47azj9C_n-gj&dbK}ooAYkL17@z4>t*aYm)B&&CVM&$Ab4+o^U+B+|5xVvM-4;xv?I?K1wB)gcX`_DAfD zw@DP0B~8tIz%DLd-yLo^`&_k-z>i&hcdAQam=KAJu!Ootx>kQEx-n{}0-x3WZ(!H{ z&QO+9Rcd4Cdbxun{1d%9JQ`sVQv2vT5$ccYZoL&*>%tSg@f5C_Zd6;hjgm*p5Af;I z0(eAN+dH`xAvX6|4~E$21w6{Wtw6abGNzSzk($4!kp7N+m9|oe z6@X@^6kpPOF!zfATNrXF7!s~A1c|!#l!~pOX_Xxj zkM5q3+t$%cXxx_(sMe;x%u&S5zYXrftF0ukjnw8%)*C(+hlm}mEEqhuf!@ER)b^{q ze%U@S`|2F|o=~z%Uc>c6MPjDjHEX@ZD}M-~0j+$4hJPFCIhIu}Z%ScKB~$6VPAj*J zA@$sXRI>hVVi2Q<=yYGx4OwSpTmv715#k_;JKwz^eMb`~xLL6C0av3zj4 zfcsvIN?N#=(y@6yPSx=|!#IHIaBFR3w}T?cV*LF#Zh8IIIUbcBK}OFg>BblPeI|cJ zg4@%B4919`OLDP4Sd!;-WvP6UyXQe419Pmt;t62$oSU)#y5v5~iQnA;V**Iv|H~ym z`$9uioo05pJ}`_bfR{=9YC@>K{3WRH{)HOJPHl$&gccfE$-;&u&+dcuAYF+5MokljD|1tjA z?Qm@RL!3l>vN7WMbYr^>a$s$evC307j>Rv!Alm*CfpHu_B7r7p`fVyJrW0L7+n)WEw4MvBCU7Ldjx4f}&gaR=g zm#=u7*X7eA*ILkB zC|Zll?aLP(Pd1LRi}MFLl@^+d-2KLMHUYoPdXe@5t;VxW(Re)T=r;wpcBvUwu)4PH|zTBta#HZ0=!2Y}cN|^XCzgbKP z5)kc?}ZTKOky2ja7j9gYS1B-qsA7b*|%`>ysS&RNn%(+g<`MO|m5LL+~Tp zspzJBUsIprS7=DtRI``*#H}(UamdYGDX{S#;NcIkjO4FAOuAx@a(v{RvU0OBAgr3q zAG5}qgbmCw#kw(RoDEpoqvX&MT%B0GBzi?MQ=ei?ph#adCmut=J~qS6 zs#k-S%*i3CZg-{4_I`35ZEO5Ii8>Ig>+nZ|;!iW88id8+k&ac>I=|2R6;fYk4FG^UeTqEo zReKg<`c}x~Xs;Uu9VqyY`!w1jvQL;d!rqY7hwqer4-rx&H#y~ssy-yuPO0yUCi=(- zOU@cmJm%q`rqa*NHepJkUjB*K@fW4Kw9Cq8FS5kdui<1eGFB0aCKgW=oU3m<#c%Pb zzN^D;QK(l}u@yW2JpXM2?wA+jm{j2%j1ku)JuJSr57?EB#j3-_2u$Ng56mQa_SCaZ zu}V~$b$lH2)XpKJk^;@KuD%~@Y_H%q!OTDihs=fr&|1{)cg*NZDl%=Q_T5vFv# zbHG{1KRe^>iV$3BI3Mrd{k|2jcKLPo=J9GL*!hLBr^uzd_`#OKXdL5Kq>mCeJVOe?t6 zPRuEm&FQG>I)X@HCHCHAz5Sp4lldP02isI;5x4u{QMFyiZvzmBG-MB|F7HBK*++%< zVb$}88pb18C#E4YQS%jnC+}kLAet3 z`xeg^=Yh+YQFku#$WO;Hk0@}TB7&I*#7KbD?*>OSj20S;>R1)zCt_(G3ffpB{Ho1W zW*s=pHEmK^tb-2&D$Id<*_e9ZarpL|K!%n9)6+Ii=uKiFq znoj*~yq|%5rggR?9iLRNQ;aLkmrH#6K%fS9U2tjM_gnC-*4RP#(_6#yR(Hbp)DwBS znniBZXCHvSaPV7@ebD|*I^L1Bx`un})u}OGAy@W_wb2$ETte{XRsx_YXT`1W9(E%8 zW~D3^gkCZZm8~I~TUo=(wdbgHO21_+`!F=0ZKdx{UKr|sa#0@Vz*c8*1XQF}HXFj9 zODuSU;5~yleKbY6DXe{<@awWn zo*U6u%!eFrix7DlUIo`GEUT;#A1pPjjvZBwquj63Ii2wf@w+BoI`J=W;aWP}Z!i-y zOkMs!vtg)0V#j^+2mR5mr^d(BD&+!kj1ZI3vnz?r#9RjxtB!wkv?RB|ND7`xF%7Bt+)>kmy89?K-MkZI#hwF-%cV^~MuH}#!kG`a0ekJGy4+WdM&~a|G$E)&7-HJ5>ws#^?8o5}|@h+mZNue(N>S5s|5B7|i&tyfrXok5u=T-YE5F8Vq z_7(R-yJ=Q3Xpvq!9I&J0NZ1Sz;?ycpI;~tPoz*ybr~N8qUSk^=6e;0P!RfNbABitF z?O>qz@Of@je0_({Bk_B_jS3+zpN6_}fHSUAms|sXlG|2iNs0p_x63ED{M4U6H#w8_ zI&g)ah2nUu+HJlv(R0UXp~k1zKAO}2aZz7bQ%3|BavEt&7*9HuJSeA{f_1X_4Y zKY$ibWel^L5^*v0n}yknbK59~{^`mu(9zF35)o-t8Zud;Ae`Wxsz;7?{)JzSz0DMePPz*^ldIxcU=aWx z{C_1Hv-S+I%8ItX)WWlHrhLvU?{ih(Bz4%Frp9=S`xXGTcAkK~*JLW%PcPGasWyGF ztKCY{E`l%T(I*JaoJ%YY7LS~X^Y$q{hRuxX(n$R2v`;rKYf9_ghJ!&uI4N_S0erS- zNJMA|)bB#Qp`m!1ATu~LKc{#!$XIefM zj>I=<6euGBRe#?ukP&;;e)RGI4iM!+uf?hC0il1dWGOK;tcBf z{#EY4$d3Bv*j^q}Zs&YwWfKk-U|MN~G&YZW_h!JY;kRE76N#-ZDv1+=$)fIe#`VEL zBDFv@=FlDF1h@xy>swdW7`n4`a6Tg}n} zhF(#)kx&vTHN5Xhh;~JRbQ83Hn*UQF7SuT}Q9gdHhL$}CA4=u+C}r&DZ6OYzH>mD{%5NA0`DV=?ZjN8(((+os1 z372A;p*A^j9+E8_{w5aMk^VW${HX;DXg2ijvtC5l^~Tc2$YRb6;#V2bmZ zZtQ{k{vyrCo~j9~F2z}6u)X-uN{2Y2_sN7PP)-0#yq?z5#@zT6*RVFGa%rq)?J+TK z)1W~pX`eL8xM47;LlocC;#ozNX^H*I9Ue!I(RPEi_Qu4Is96Qqro{6(o0K!0Xzk~q z*mO6$%V;f{og)vM-xq;f_u5RF4UzNVtp<+pqnLSkqo&jPuj+4I1Y?m3o+1yc(!=Yr zDRK5G{tcQkAe0zDRAL&o{KI86{XsI%r9YxlMQVe#>VK<)XW7ke{S_V0&*P5JV!Loz zjS@Z+%Yqc`Vv2MIHxt@E7aeV&nW+4sK|!+HY1CKUTvmKDL~6nGI>&V8KMFubfswlu z{jai4*NYtKG}Up=1oCM`77dfs`)8e5-5ICwbOv==IUz*1D&c2YAP>(9m<@?kE3qn4 z>ZL#-;OZg*e=I_qmSaKG^mVIZ0EuJ+CtuIyXqN8b&Y)&t%MEsO9JQC9$M*-K z10~aZ>+(Vzwdtbf(;LcQ&mok@%psMxf0Mn2o924C;=w1{HYQnOL}fX`++>MKZ8BLJ z$J)}`CD@0=L@m4A-#$ffY3?jd$ly5g<&|MN`X)xh-G53W*v*xn=W?KC%E($hDyvJN zVp<&jC`LCbN8PsPIT=Z~LlrSq9{T;^YVL#b9`_RrIOw(<(M2LM``kG(7$xsuzDKsr z?I`+}9massS3AwNb9iO)`DR8wd76y6%k8(L%>UQ7iN}F_w}kJ z^9k#fPGOd?xL7jt?BTkOm}^LD-rQYT2`Uy^a$%f;pqE?Rd>_s-18wACp)D=iaPi&*u&IT}?N`vWhtS7t+IY!X7S(J0_;Jnl2?IhXU(`y7QsRrm+QV z+&yDx=mG&bliUHpR#O;u76KN`qrVZDEv1cYMDAfoog&&WVAKJEIHeQfIEBo3EG=_!vBt6TQNYe~sj zTV$RCUamp;Q7&IG*1rWux}V%8_fiBWeaIPj^oox~8VMtDM`hp13C-Pe@m#jzQ5Ste zXa2m=EMV!fOhJAtHv=}^B5fL%ZgK+9S3n72!2;WlU+YCdMarA+~3Sv(d;AFFMOwX@v?KCZ36J-$K^8e-#6zc#12vh6anvQ`OdAF1?t zN1C^Ag%qXak};d_y)1Ou1s=#BeZRs!T>k@;^`+TH7WB~UG46Y|>y5~xRU8RuWZxedJ0BESG{Y%%=8p`s)X=TD638|Zqp2g^?+7hgYnAj^=;r3^&> zhHg>0>sb111c~gSD}MR1^C?gc#fXFNZsn4t$Zu%*XwR*hh6C&~!-C9lhC}M)cZAXs zXBn1bu}!J-{J=Wjrd!i{L_i$oH$O{c(QE`{%x1-jiX*TWAY(gK=o91$WYe(k6kw-U zV{e;WV5N~cpV6`V`K1o=k$>WAmXgBg#!}K*bfWR|rON17^Lq@}ClxlZ&*y9!Q>Gd2 z8e6W+0v~3MLHBYq#ZRM$w-zrFZw4h#^>92h$&eiNCPP4ba*H{$8yY9NC$(3y=nvSl?JEp-e_h=B**_?qUGg(6|ym?lyN>btw{ybFMj@I>EeP0t|TOISSF)lW5Bc+|o)@8I8+ZxZZ{?yR+gkcvD zjj9KFdiG$JxzMWi5a2aC1ZV^-c|$2n_yYNEukYmm6Lp@Htg!mUd~0fU za=%)JQRCMdJV}bpCk7W}q`XZc@Fgx+cllftx$jUc=aa%*m-Ld|b1IqD=}pb)BS4&! z?33&&W_p?YB%$_eqN9{C3^rlj_457 z-%2X154CidrLPjZxqIDys4YrKLUKTffog#c#ou^@5n%8i7Mjn8sLN{DNAKscQgL;2 zHs`}_M1_nhba=_^VrD#QZjh-&pq^14d(Io~h4J)K<95Mw)?zFE6$WpTI=${7wCM{e zMoGskj8c5+vg`%XR7;GE=wd)2G zIp)@%NqN&e4v1!19+my1IF^9N)v@l^tD|aev|%|s<_XCS{)-;GWWsXY|`P?Py>dASVoeF8%g-Mj#*YqraPV<16$Ao zI{si=m=*Fd>CBYOtL-D|33)yk?$yhPeHXacAAimhuWx*#h3fDy-wB-}l%iGO{g-h! z_aACc)@MPFE9?ZmxunT%r6RL<2~^GMchPlXQTE&57G(=Vl4fPvvrl4!{T~E8-J%Hj_(mJUTsny8)%!99Sr5)!(_R zEPsSDELv_}h0cj~bUap+aQ4x%I6(;hb1IIN@+tC;a$;C=1x?YmlSM=v(r}T!P$~SM zvS{-Fg*|(pW%+I!;+Y6U3z_6UwA@0_VI^Y>YJHj|qVvGYpNGPgbDXSMxaSD@Eb(8G z%q&ma1rL#e!!s(BGOCT(SkZ?HSwfi%QQa8$BGly3N67PsllK~!rzv+c z%`7|E;T*q)&?=lKhmIHJy9cC!c;uaA{}{X5lW^KO#yO6pbCEVBscA=MRCG2q`lgvQ z0~yxgXA|+PQvtMwu~C+LPO?4FA+#fsR0~BjsHXq1IVJq{?%8hqss9J17-FB zJpQ4Y3KBzE7iF+zMPH#S?m({9!I_ZPCaHd(y8a~ac!lv9q?MsFF}UK6ZY-DD3iS(A zW)mFjEa;?fPWAc_LSoNc>5`N*f7$#Q|8=KmQilBQCTlob>V-2*?D~n`gt4NabFb=# zumr2BEBbtV6@j{0a{;+@faN29PrS$HnGvb;5U6d7E5uS6yX)0+997hGjL<%GJ~k;~ zy}R9sX_4F=Ta~BKF4j(b(Z~~1nM?E(8FfK#mpyD|PyXQ04i{elsd5(lsPBoS`qJ-R zud$H zMk)}Ew1{}PVin_CqLMdZh(33%3Dhz+JP~>y^~_gu=5B@cnM7q7*83&hmhJ{KWnZK` zN;Swn71R6eMk9z+*war_kfgvFQql8x)rSNk74mv|C;7qS0Lo4!qn)(!8bkm%VEnUP z)M?3Hvlxr-k>`;isAzJRfWXLjygh zUU(X3uj|G7RQy4duxtn%wxf=8#fle~{6-jVuskh^ZVbT7sh~q;3UrZl>&)2tg zp1%E6e)n$#$)A%Ajj&dsjBAlQUXDp$%Q9Szl#$O^|7>1a1^!9JoOBgdsl4|6j7Ua9 z6%E#Z%|#oJfnYD$AzYw7DUeWq3g?tUh_^#maG8DykuU#MnG+XSwRzB)IA}BZq6^u} z7mp7xw~_^6Z#$aeEo#X_OMFSiG6$!uAhQ)NqHun3+qCI!L5c2A{ma?NqQ=`BxhB4s z>NY(|)P$)QR*>yVtnN+zXjD&h&F({vL9q$4K#-*U0(c?57)bOsKB%0F2&E3!Qt3LKW7n}f>C!u7x{*2@R(2L+)($K z9ryRH3yV`-6z_VNX%0OpofFFL$+~TntmY?@S#i9P@{ST+^^0_=lrlDp+8EwMGZxwl zzVT9*+|v7vP^Z_4qPuG>g?k*Gf*h2TU5Y!gN4!l2v4k`LAD5hV!aVrt+5D zgccE~h#*u>w!o*8rOuqw53Ct2g2>Ef3-G7QwIM6I0gkqp=8r!v)R`?kbC%2yCu^&U z%;%#Tj4!Vpxrl)+FBdIUE!<^jl7l%vQ42$hTq?EBRylM)DNnl&yzIu1huQ6o9K}+x z%V(53=W>L@A=X(^se|Cp@_K=J`Rk*p?CGV)LvOkb%GaA)Y)@Vt>I$;|Lxr7~hSJ0N zY=GX1bC>R-1e?jqcQxe{+8}(RY(vi=G1hmkNv0KFM`nvoVPz~IA>YolhwGbf;hK}F z>hrteK1o0JsSJ$Xp(4{jDdOaUxtz1pJ(1F8d{+2C!IlkH+VdkcS)(Q4d^!O-@8gbN zOr#pX^FP5cQzKhGnJWQOCwM@dAR^jDN@W^$r?s!ErJ=9&%E?t0yAg~HG@nazY`&zR ze=<2@aXw|^c6+3cBy6|xCn-J@DqkBAbbAs|JzLu6FeoNeU&L{cmhENStE|>{vRRCB ziv*{@_8=t9ajkdS)}W7YulaM=PMw7a*U8s-z1rZQWUAvzo?%OlLk?|ACJh;jp!g6S z=ROID3@OZlLYnIcvl0I9;&7@^P?z*(i%S>>{M~M{zy^`*{w%Rnm1S`?qb2t6S7p;|OG@ulyfT3#pXTcpMjMD=7)Qu%uk_Bvsm<5^Dk zr*7GYPUPiQW+Culighn>4}?l#@vy;nppYhhK2)+{rR#QZR#ZhBjY;+M_9!GiNzV?6UniK*i zYfu~qmDif+J+1}R4>k+8_@;UEJ5^5?Vkifc?GhsA8(6k0Ak*s#DW#2bB zg%u|jgj9iZuU9+=pYs&u`zfFt`Q{Ln3T=r%B^He;w4E?#AS8tF1*x_G$m$(SkZ=-v zze)-2YGrcwfA;=y`$RD5=mAeswmZ<>i2w**gGL1thVOxOb0S-eFvG!$G}+nbfraJ~ z&tsnKmBQO^_0Kg${;{~}JC2sW(Hm&vKM{GY^1SsdICt8w*jNw_`XsFBC|v{5C(9=k zC=53)kJK>C60|?eoj2ME7L#vxeMFn}8tlV&OCPw~8{zu==|KJi`A-J?u@r{tW$r_k z_-P8{MV$b`}nVy@Xw1{ucNO+2i(4>iB;PspurO^&(xY-k8At z^Wdg1Aw?QCAqP&v$2OHc{;ZruGw}&tsnkI06aGYvQ1gE}@vW3iv}NDDQ;}@gzoE9E z1A@KVB(g{DTnii#IHLceYkuLi<(hkkB$1i;u}=RLdX;1pERuJR;RV@G1O~P#v!YNWGP1b7)>iKrhQhVilGEK;p_k@p zvb^v&%Tlvkl&OMBr+qAkS9NKl@5oI+ta+DkVrgPC1{vmhcB^$WLa`8VB88Cksx3y3 z5l&7-1fPZe=c0_-YPt&K!Pb6D6>~nsz)<+Yf$cvKw+TIH7$cG1TTun#%D*y;8yK?Q za9m6+Mkj?ILfI#mKy!fq=c(J@v2cNwX`V*E4xaDs&UB@(=h)DxX+Mab^9zHMrru+H z(t)LC9B5CmV@!>4kM1o0{jv|1nrzS1s~U4E51WwZcF`7=3P*|-%zULN%LyZ6D&gKd zI$$?unEEC6jY!Bt$7l2%2hr&U;>q=7vZ)x2`l}}f(=|NV5SoqvdpVLE{^>T~ruUpm z4Mbc5o^-&)BazI=tWqztEPtYEu4GX?CT5Sk=JI?L9HLMV`MTzj41T$LU>BBi0P=-J zpX=}5X~yflwhZbakWG_GpQ{x`ByH;+Z5N7K?4!qp1=~(i%NMNkO11}Z`IWbgbbq&R ze!J@q{ILB3_xB)9Gw9{n?= zp3Z!UOT3qxnhRN#9{%}(bQ~@LQ_VCUAA3HBb!ucU35V!0q0rDfr@5}EpuiQ{X9bV# zhgYiw%aDq;^00oXy>9ZU!6O)c2XZQfv?6Xb6Wt|q;gr|FK*rV`vG>HO#j($~!sq>L zNr`kNwAifIdP7tW2JD9Xh9zIy?ap@|6S0T(VY=%$ zcX`HCNf~MFEQ|LD9`pNzzhE^|Maf;i*venLghx!^O>kou?!@=CElq#>x|BI#0bZ6&!a=kzLwZ6 zt_+H*wy~}mzcrvf+L<}9ku9vzGoeh5K{YB4>*ci2 z<6&m$jgWjjOTzIWBGEG_dKcRd!5 zn-vs~l>EZ9f{f(cN76$4Sl+@LkbKIyuJwbmClB3qsds`zo^Rayv!my}-0VtBn^|#j z&6cc?toIvUYK$Y1=VLTkAGJ0^E~f$v>bY7I89Btk?-OteNEJ|`X&4x>iK=#6u7%}6 z#h0Ibh6QA!rFS*8fc^Phzlfjb?*sZ3@Do7Ttc$!C(kbgBNy17u<34A4^1)?y!ihTK zUZfsD#1cBN>N*8y30=)W?oI~Jx#I0GEl}qS=hjiw&Uz`b>o}tSnBggszMR)EC%GJ4 zR{fJ*vj%B1>$=04UVw&E@Q~3=HD>ZA`!&Tm7v@9F@_o9+B~zM;;T1_$E1BqPPJRB zAG5oSSXiGr&~}@tqd0?48QAU4l}$H6mS>az#fW6=&wWOA0OY0i|Kk9uesR4Wy~sSv z6vg%7Qc4G`Hg&V?045znJL^*oM-^u;EQ8XA9T>9>T_Y;t@d(6+d>cB-IRa6cP~-PVsh zxP!@z7|NSv!N`Epbk8LNqm>b>x%*CpTn04tqlma(?Qftb7Dio#GjJe`o_HT{9qh1P zH;H{zS2EH!yAoUC=0<%!?=u{Bd+$JwH@@KV1~ z1{v}I|7hLS+rNp#*mBNaj*?^+T|ji_N--XOnEUX!4T2_>zIWBKnFy%XGVZ%+DiPv2qS0ukRQ zu*`zguQW}+*2f;8Ur8Cj|FPI?cXi|3(ti*3D37dJT4l_SWZKpn2~m{B(l3scQ9`gn z&gD+kU{WOzN@@TW7$}f>R@nX?6mtjgY7+g9)~5L^Hhh6lCIvPMg*dBrc#jOBgg4!3 zn$?9|oj$M1h-Ta-aH|uHl(zXk?TU`&X(bMo9lvs`ozb~v*qWMiogh>lciE0<+e^rF zw6<=X8G+o_fXEJdwT-0c+lDMZ`J}P(vM`TBL>EBBdt35w`B4^pn z_Y1s~hWpg<3DabnPwKe(s0OivA&K9Nr5%XFOe(H($M~!HBKI7B%^gZV1KD2SUbAmn zBRgJEnMLJDqf)HNMW2d8mR#GvdTT~(QRs0GY2MuXv{^L}XW^eza?RlTb|e&+d@!NT z6h3pu9kngcoXxJX!T`>?ICUE+Z_ba(lCyC7ioY%wY0mFXrQv;JRw4fMMNcxgJW_eZ zR@<34rK){C^af!nf-t&>?&4+1+}3}yS~s#6mXgj^^ap+B_FC9VvRkB*++ywK6S|nY zTlZzcYYM(UH2!nhoh`GfnSN{>nMnp0vf5?EqqMt%O1s!QBzmjzEkildD2|s2dqf|N zOM17!qOrRj`{wkw{T0D#%v0X{p7ETQin`NN>G_3B^5024B&6p(PiCaLL#*7dDO_Kh z2C9?Y8MMpQuU5{Z+BR71RF#%E^NSho81tC)KTEYw6q9vzoj3}&pu%7KZqMjgqgsG!(dq_eL=Ycej;%brhh zw)`A)UVSQ@6_N6VNVV_&*TG_#{i}*HS@raX?<_Hdrug3q;0Zc}9xP|v!UN;tO~s+H zOFT%U(b&6Dwf%9jRzD}7>1IdJ7AMsiqEitGjazbO1`dz~4Fq0~KL$IPpT3t?P&xWiA->MR23GkXnJJToKC@pVA@d#850jWrt0 zbX!A-K#W8}PP0koN3Q1?-J|ARy;(1{JA0void?sGL_Mqj3j47G@8 z9;{v=I1j1O_=V6@iW2(2&usSi#LJY%!xw65zBF-qiFi%Wj`Z>M;HahLqE6anchvHq=WG&M@-*=#vMH+(OmE%AHl{ zhwywX==5bhwjH|blr}67B%7Iqr}MYmYNt~D3Js#xY(~c`E-U{Zvd%KB&2HV+Z7FSW zC{P?)+}*XfJHg%EUE5OJ-Mv_FcPMVf-GaNjh2*5)THo3GI)@+mM*=hRo$@?m+@pt$ z7TGSZvOBkl!mRJQ;)-_+s@)%R_dG8r2K#Lh=bwAXF;Yl=aiY1diNdOl&^G(y^wMSE z)!$FoP^uR^FlF@Wg{D?3`cF5^3P4^oBpD~EVAJy<_sdhp2~qz?4>1I2_mXeYg8RE1 zT>ri-+6t>gzm&?NxI3jt3qMe}}v)zH_rPFdV@=FU+@K zHf4eBkLQxX>0q|&kf{b z0$PJzG34K)z&R{vHOCIl6o$_e! z*7#3RvolCpPb+4)jz8y&3(s(vgZF-^*4f3YywCv1IIFmTdP85AQ)%yzAip=H^A(Mu zzljuPf(RW;o%XUrBq+hSSuwO?+#Y*M%YAw*Hs%*xd4XBBn#W=F+Aro zGD5b5${SM2+HItUS?tyEaS9T|R|T_f>c+~wnFs~^oNqUu+o!~qCq59#7EVV(#ZcX+ z$|pja7J4$*m(Mi0HQxm|cP4r+yDRlc=vXU`t)>L%-K#fOx9^znZMCBVM@)0^9 z#8?)`QblWUoz10YD~vN+;mczg9Frq>dV?e_-=#o6zu#;;)rVkEK-HSD(1#Y<9k{%M z_W7TJ!lWGxZ&G+8jDqqL*J#H~v&II-rU(yTcysE{oZuh+i7*vd@n_M|sN?GhsT@FC z#JI3?+;wV&vC7Ug%a2rH(PL)}*-h*P1zV$d_*2l8fw$|0Sb-OcYZ$wIWx~Uw>cSD> z=a%no=ZBa03}LY@hpyAyz31Y@DobAPc*SvM@ClzH@+~^=OIoUpN{;ZYT9yyoei`ia zhA&_P+|GU|c<`^ZelJBoTiY=J?$fJ$Y*_beXm|*@wGTR4RSgKJp2F{Kdr?kF9HAfN zY-RG|qI+}*(ppEANVr9QP?vpif-M-~>y}9J7D)!-{P!n*US@7tGr#QHi$TBD<}}!F zb>-dMrc%;ZceEmq7$J`z>`{2xyZ;B|=jgs>RI(h|zFAa?-IW<( zqOP7eCvckJzevx`4A7XqXy|lIZ=9?|WI-r|ody9GP^UoS$EZ*R97$Z>#=(iDqOwkR zh(yxvFX?h5RLWd$W>mv7kyEM>g>;Fr=a1#MD+*wIG`{ysF-QY3nBCw5 zwy2I?Z6A*#XJICN{7y+b@gzQy7;j47SNwCFyypq+#=MWJU3!w7bfXbw=sbYCqz}%@ zvSanfuH{rLu_HEI>O*H2aH0SuiIv`A6VCcOXLEgCt$YO)+Y*Pbo7M&j%Ea^Flt9W} zL9fPh6m!#6LY~My!wy3c58}HRTY32Z2pxW>yfX&ci(0pCI|S(5^E3_0dn~;P6Gy@- z*TAn_Kf_FmUGrbhOir>B@jyazsV%!1;Ns4YYmNtgd>+YCx3?6BWDPEc zbs@0YpG}>9|A=k9-DPk1ieOkeJpx#JCFY?Jghx#`=h&)L03|Q(6CLgQ=*K`<@#_;q zY_;Z7Rzk+{CT6jYPoFh5fBW^-@g~YkKd-IpN~CGGadxFTF&QgwPoxrtz?FhaJB?|F zhB9t+Ob6+Yz6DHV;Q8*t@ONz)nPK*kv0M-HpS?^Ok`5r|9-D~%s%QW34o^tljVxD6Er{17q!9-oFPd1;_sblC+)kb{ zkH?0l1-W*;E1eihe|vE2Eub{VEON}35f5mxa-)kP42y1&8nG@a!6HeZf4YZLIFdGQ zX7F9D(aL|qjrkCPF-2SsmLc`#>36q&pEby&{Fu?Sa{tc4eBWQdyLoN;aYSaVZByF{ z4^mdc|6h5v0WOSuB#zB@8ktitBjbj|@7P1)$yzggdCzD04#P4b(tjDUGIoHpdX1wQ z*VTXGsLdLG+GJih(V%d$Q+#9xZ}v?M&LhOh*pE6Xv9;oN`r+wHQ{wliN~z50Bc{1+ z`E=(k&U{s}8>3SkIitFjv!WlkcJaIESpnkN{p0jpPdf%7?slAnk2fAE=cooY312jT zHBfx}Cerz)3Vio=+h5{8HKC1oQrB?q>1V8{QH$-uJ0_AlEHVhiNi66jEK4jAa_Zs^ zYuBbJ>zFxNQ}*L$GgcE!XEmW?6&?&~8fq%Po;c&hZax(_(vVDwA$3%FD#j(-UhEj_ zFa~F(jH~H7TCJmUiLUf>IOIFn!k&?PzBR-76=c97l^j_GIYnV-IRUYncrS_iTWH;* zZ_UDEfWQwjJHoMdb!hP1r?3m65L2v%Vmz8;g~5r%m-E14%S97l&dk<^-~O_7Xle4} z#g~}rt^l9Ckc#PFW5jQ@j31$h0HMDd@ zTVt6LEDv}J?sTYLMC(?p7H}VL-<6X|$t_gy82H@vBjy}BmB)CHYR$S%&5+C9IvH!| z*rPj})78S=XNj4OYN--{1yL+(V8AaypFoG*y%++vk%4I`r&;D3v8LPBOiOu+^%;MO zfA+3#!LZKbCakoP#;8LCcAq|~cve;1_>LJTCnk2j(jr{!<)KVbXt`Z~nvbf}e&zA} zs9=H8MhY?rzbeX)`v81`6OW&k>X}W2ygaMwbG7Ev%9QEI4UNoDTO-}WX3W#2>htXv zN=9+jX{|0MN4L_P!zs=O^LE@j5_eR5B`uJ;z&LWz>?x+=q|eqnvt#MeVHLFlo6EkE z?hu_oh8Jz|^79R&qwz9<6Hw-Z0T`~^BR4gXyl~5p%p*cbU6q_Vid-)0kZvwTMSn?Z zJ}4_bSi8A_$-Re|;MPwo5LvGmO%iYc67=|SFImB*Y$MW?Do+~i*E4&{Ru^sTP8A86 za+8vpPNZ56uH+2Bu7fP}C=eD!WMSFyJ?l2E&T@P&Q*6KJQDr}IvRms^suRcL^Z1o7 z=%+^Gug((E@t!YCW*`66$2Q!g}@_JlOB$mMrDPlha*D(tD zy(BSleM=+iD^Dp=YEzq+okeH$jO3GmEt&6Wl#L>vx+1Bm@$*$S&iAba2U`dg2%5g; zE2+t?;S|Ike%roUD)+};QaKq^>gKjBZ6q_?K$-af7{ zJyVGHJ!xv~_nMq>(hcO(MoQvgZQmzErf=)@Y|Z*U$xc=@=E9Bgv@Ne5e%R)L(l%$( zNuDrh495E8Zf=^59p@{R20uX_-&uMI~{2yrlqz1&(nvGSleBcMWt_WEIuPo!tW@lfS-ey-%ZoUY zZ2advO2ev=kmpaC3bG>LFW;2gdZfrf!Hs7gSNnnfF8Fl2C?!0qf_u!ZwX z7x6brosz2Bj|^G2_1%!LMKnkOz4}q25hAcQ#@>~&=7DN)MnW`<60ou%EvIX}@uBe2 z?iQf6B+W}!J(2d#_+&bN{EvBSB;<)xwamS^r~yE~CI@K3jsz6dLJE8ddCk-ht;OKDlX zGCn|Gth3s-U!Ks_{C=0Nyz=Ox)wR8|+jk#-=UV1$9@`sF29uOAgg>mtjYa9)D7S)A zEk3^qb$UJ`xtLVs0ks878*(6s5lunbw6SLDV0Rak{yes*u^OJRH8r;NY$*sxiUZ+; zZt>FB44>^n^+g93k#Z`7I$lnm(V%20no(S~#4yim# zEDcOMF2+{!UYNRIrL;qC4l(cG@;%N$f1NU~4vI#uMfd|Ujy=WR`;?iSN zZYe_z%K4a}Dk2V*tQp-W34bDnJl`Puq6cw)=5!iv@*`y>4t<(;=wG9gq*HE5+Mzs# z{%*OE3<3WNNOQ83nU#-IR$A{2R4NT#lw)WW8%T1U;(O*lzFKm6x8Bs6G-Ju}f(@mB ze&NO65p4gcEMO5QQ(Rv?q$thU(8P=%M0sr89A)4V$zC@f#vdSU+6vxLu_Vg^a%Lo& zF37>jY>P1PyNLUf%HCpkEP485xhDlYsvjcSiMkrpX9w#TH}bJWJ*v_aL$NQXV)R5% z{AKgk!p1b&0pQ77oBV*qTFgrG#iJv-{9(ojTX_n$+=|xpQ*Bps1(xX?fS#To_s;A} z__8q6in7y$Ju0w<(3bWmVjq7#=7!s;4nXQY8Y4B$K4YJ2S znlqowGN#p+_c6YA?}WU;x_;9S*vY}O+0%jz57^m5J-`2|alLuK{+ z%4AjqUk2E2zScAYz(t{@+pe+(1cC*+dnA>C{?7i#=ZOY{Zp)wTWR+_Ldzmn8CE_gW9IA}v0R_ncVQ`yrOH&j3ufuA2n%kXhw_&mR)F#qxNJWoP zi1EF6cuI|T2g_WB!?!1Glf%i>o%bSSH6uR4Qy$3lH++W?e`?ujDW1b7L3gy3glk(% zhJ1g_4~2ReO9CF|n+HYWqr=N{nk2fC?Y1+vSVgH0RUsx}p^UV=3{Is_Ql;Ue9e0TG zh`>LBehstk%g7)i17X;_rb3eqg6eLlH$nD;2;DyKy4OseK4kD_D3G6$6jYt{yxBFbX1SDo?Kq!49W=l>rVhgc;Vkt{M)vbH=+4 zmvje*E((ts^B&79;!-1#YLtSUSnoJ*q=C%q_Sifm93g=F;&Um{Dz-G9%hy6}^o0|o z`#pb!>;sL*jSoQs#n^qzv<23$5hLC(W-Xc7lncM3U6e((bAM#qlMVd!3p)$tn&%#(OEauIiFwCaBX}kXV_Zs zy2L|;5_p;J-~Hx`@$!%b>I)4k${@Sw)w!P3ilWs2ux52aS$XN2R0wE-DCr7Yja4|? zcCl{zqIj&b@zpQXMzzY2RCck=A9OVwH@!7qI6-X`g*7aV)T61M91y|MYO)RHM?%Ag zKiX`+&B&w-)xY69Hw6F;Ll_yIpFI@blN_GA5=zcDY6FskW~Q+>ed&7CYG$JC_0#zF z|2#EHJuo8P zS<0_+DMCA4`))3<&FEqn>~K=0XwtH)V^N`Q3j?xTB#Bqr#%f(m0a`cvh3F9mib7M4 z`pP~LrEb7;9&oD}N~0`zG1jY89g=6IKgzx0jQv#;Bo=5s!g`IhVf*Bi3It`Ab4uiNfW^j`La6~I6PMj0)yW|~{^BqFs~heT`brrpnowGuin`(_U8t1f~G_pc)Y5GjaR8`ktp&jME4 z#XEqUU-h9OrI`#EA(;A!`PVvn-Cj;qn>l`!{-0C>CtQtjoCxP}4T)19ZqgUpVhWNSTXmZ`%WIkc$KAO10qRNIZ7IfD8G zS-~iW?aHrLQtLs<^_dnE*+Qci89JEE2wy7N7+yFzSL!m=*u@e)BLIWLPCs?ASqv4% zxk%(7fEtF)O7nti>ksHtc}MT|Hy-3bvLb$V@LQNx0##)wS#nU=;ffzV$(hcZM%u4`D^3F@(Ih+~V-k#nF|&#;%Wy45C)Qo(yP= zxKRsfq(6vL=hWOEqobo>l9uWzUEdt_e+h1?5X!-6j$leaH(}82vCmUXPegwEt zrlaW5=LXsDtzIt?fPpu+e@Zd!=uvzYEodAJfeKK+zfb9_SFZI{f&q}o@Z5q#bE@nQ z(QQvuZEr~AWpw9l^T$PcTdvt9P}?!-YZ$(#O0L@2%qrq@WGy6RUu$~#DEIL$2WZsJ zs8p^Q@f)pIY<0Nj#VwI*_ZnO=b|yI9*xrQk6+$;vw`Q!!C~d|#(o0L(qA4!`(?6;m z_{TA?KiuE;bU>HG2_H+-vtWuiK~_GYiZ|=chxM218B8v-BnC`oMv|2VR)Y)S#;uPK zR~w32-OP5ght2fymLtOV+5cA{6%uf4pd0kBMv5C6S!h+M;NRW;>}gns{ZW@r`wvO8!Sh-H31?APk$vMA(--on_ z;7fHdjkNgQoYUbLbOwYmxcqjsi{go{o0F7Pu^({Ij1~Sv%1Jdfm^x&>4#qn=os{au zufhJLbv}z5y>eHTJWc(o28AeHc0?%lei*3~Yo*7Ck}I6%dzAJXgYUrHmQ(el_CXK6glo9j*j@c!J4LifzE>drSc zex8Vk+ZEsiBViFddR+pUS-?4L#YnP9mo!de-^nNyCUfqi22L>8yZH(W5e9ATLes=B zn>bJ);>_4()Cw_A$KaNpkUkD-L8A>%0k_kB%v-?wS`(2h!r>xyQNnX>Jk(y`MD~m>?2ADdnmDH3 zFbqOJpk8U*;S=%z{R#Arh3gVuA7MCPnwyvzQ!u&WDrqdp3{~KEqBM&8K28c}2O}7_ zZDx8Xxk&fP3JnN(R2>>#j5a)P)PUc|+udM5&TQO@l^t2r%bMW5`E*Y>d<|<6%ePuuobMhT*zBQqJl^(S`k_s$F$5;K% z^Z`%x#l~TgX!*VCQtHuCM2fpjNAUEH9~WN%Fnep^_fR`H#2LEWlY?)!ykNspK;?}N-B9K6NXwXC8q2fEr=3)Ro$agl z2Gz`lbu6?=X!5CBCG@Z6a9BA_WD^TLkC=Fd@*~{^-R5+vqCO#hy>_#dyFDB$D9@Bl zAu;FoA0%((Ib83`Ne0Frg7Ik8I&+eA_b%;dzO+MT0OrT12Kx7W(EuK4`!6ssa;M7E zjgGy^7IOuNt58-8uXG7vA;KZqpZwVqW`H+7LH9bvvvz5~db;nX%^vU`IL84Y2q`i> z68YfREI2erGx6`MD6m{(-(&E^dQ^c%Sf_zASW)$}i6KaKM$ms)(5k-Z^iBxN(R!5H zi^f9jMnx|!v@cczNWNtK6y$1>w3>fG(112R2a+{8?yQuH1ENk+TL!hImS7k9hUFqI zs#n_oCLUPIxp9cQ?JP`&0FGk?Ah+$PS^k3PaVb#84*-x&foicS%(q8;{xsGOV`j%F z6B?OVEt0{y@=J#~?fkxch`tE4qd6FQnplFgRa~fW_gbidamoFCk~TJb?H_z&hPmbE{v&+cAm;~+^z=_;PqG@BAX%$%NmDC930l29RxjFp`Q7@G_`G! zToVN2CCoYD*GI0Vx0_a1kc*awo*c@q4cHqflB5+D)<|;^`SuXqu@QHO&iR61CTke& z`F^d;Be4lSrKwe%SFA2REHb=gurc0bd23P^V``MS@qE{i%A z%0@KpzwqR9ZLb{$gcAr-*6deuwO2~wam}n8q`sci{Vxq<*7Kv_oJE}PsY#mKktose z1_S0sqtt0}WpP^$fqVj;!w3bn9%s!+6;b=XhdHwp}@3sVJ3^ z8Etxa*X0Y~)pA>HB*`kday~$K)rmaay26?x-~%~vZ!Mq&z9H&y?F7MGW;HzyR9nPY z0x*be@YYA?3y}wf4-Z@a18*m`WJjKBz#p?*A6reqJ~?I?PoPjHdbOtErO-nJODk|$ zG3?Ndpag}a#sem6Yogb<_1j|lG=pA?0c?`^d%I-2XB2&s^9h+=FwPZK=NDFv+E}|Q z;mKwNJ6uZ>yWA4zMKEE}0eLS&ePvA#Qggc?N&mN7AxX^P{1e6eU2|2 zZ1m3 z-dpQ7+jS@|Ewf)aU{@-#gFL}NXhoW4Hi9n8JFv$c>ds$zA`cHdg@nsqD1`198UA&d zMU{^DrALOp1#pm2sFi;aG#`(Cr8CN6S(~&Ie5oz82|7iSY1MhckZ%}>=!w)Q2|X@8 zF#IYCW35<9Q1Bq@{0FepfvN4dyu);b*1gky>Z zl@1qP2$`+IqmJNP^fb(If932qCC2B81>4TjA}Fb`UMS{u+z6gQi%7wPjqS+n!miy- zS^C97KjVK&B{VNX&gwrgf=v33)dDkM_DT+jKa`o)1S@jS`VXKW49N~|`X`6FURV}E zw*)3ETOk=lktbhU-B8u>DT0!@7cK_BEZ5mTcYAlHByuwVvDBrRK1-uTPT1_%cu}-k z#2POgZ^SDiG+0o{WG_nWZ-1lU_x#$BZ;|0QNvVG{jB78xNqTtYA~n%H!U#m=VEL}j z_%#n(AT&JNe$D0Y7_TkOG=i&ZkN{;kq|gp>e9pVJEg18c?V}gc5qV#1 zm{u*)SV9(7(n=Hw>0y0TT8TYI5LH4dg=Mh!B0{CiVkV|Q!cdifb8Y^8+mQl=zTn*N z5jOI$eyE=mV+KYoX#6X@L34O(+A-*-Pn4ftZEIxO$IVk~OZKQ$^3wky-k%rGJ3W7{ zdEIJsA39u{zmuMXs-B52D|jtEv#iin+u(QQwgNGWQrMk2^3pj(f5CN ztp0DbJ*8q4AhHD1b0IHqU=_oe)GqS>Qtj*KDH(D9#o7m*noq)@@)-}}yLkO29eEQm z&)ig-JHjX0tYsZNmt-d9U4kE0;Eh6R==+QJdNNvf4DCC-QU~P^X?KuiTuE*izn+O} zUN?^L1$hSZPqjB}Q$0BnCY*DuT39IyGWtH(;n=8cNExmh)i!>j#M?nTQOIZMquyU|@${_v z{az-(;`2DIA&T>&AW<@_YxjFWP5N-ejv@4x;@5kG)g+n#(4tQE`m0i20uU|?z`*-& zwoC3>oB#xcn7ulqi0|=FKGAbebf2A22pO@T$<>n1%(wpwIFNDBrWiBDQv zE>i)I(?rV8F#;n#HsV_Y4|$*8QZXZX-g8FOGD^N}NuM4pT@2d!ATQdC{b1*cS{$jP zjJ^wGo%%h0Z%P^A8J>iVvuDNYRw?_2w+NR7&&PEVGL@pm%ml-a|5@yEbQa?a0n?LSmt5F*>6aJMlT z8e2-Tp8J5K8?*N=ZBiboF$HTDw0Jy^VtRW*_Q9AItzs;f-HT#hV)zTIzxA&Z;Fh*h zzso`bInn)*`(WTul)vR)n->~AN}(v98Edv>agj^ZS(e(?P8yY1V}t7jW{jx@FZN82j}x1*ypp)aVR)$ zCbc}fVb5ADw|E;bd@Q&_9E?HU7OCcHL^>)GzIFeuqpsw#S);<5dW$NHM->*9+;3hx z^0smZ(Vf+yOw+8euU{_H6104Yd1@k_BVE}PSwr9hhu=yPy|IErViAaE{>&ECB|PHX za{?GpVC!42Yma!}J3SU?7mv9* z6cAp@vdvl!vmklxKHX0VwaY4)q#h!c8Cjk^SMnP~pk5aDz<` zQ`28TOsOZ|N7R(%KplHb< zslYMp{_>dyzIX{xk@{r46D&yK?#iuBucTB;O*#A{zeV~}ShHRiq{WWzugpZfDDm4&a);5JcyUfb zTWs||PsvSj<3bsTq9o zDf_$`H4&bFaw?XiH2DoAdW#cs!T~BEtD1ffnDf?YpJTeT_wl=NEPI>0iq<*%+WQ~M z5A2BWwT(mn^f&Q=O9F^lXx&rqS6uN%N)h5JvNM@&Z$gK=p z$@jYlsq~2AGc*TBT`-vYNdt_sju+r*6+cl_-sB!`Xf)B*zoQ>@xwdoEZe?KVZ5<1r zrndVaQ9Nh21^b%pwgVYH26oZzoA2l|+18BP_`aApnCr#VlHrVn-7V`CugO*XJg`I! zl34#%!9KT+hJI*mc#UXEAx$g+!1^Pzcc@}v)w6Pgu_Y$k$Hbn$8E=O< zOT@@U>@>qtcfkMN@hVGj?etF_LGhiJ@gNG{1`MY9BdfZ1V3Qr{lDp?>s4+AbFmZZa z%Q~Gm<8~vTRgIbD_{M-fDE_bmQ)slOuip>{7qVI&h*Wc$N3gVkqwn2Z2cov3F=iBt7s zqOQ-LU$FW;GlDtl`|#5`g^01{7|1Co5EP@uM{WdD<{y>qS77-#e7S0E3B;9V9o!eO zr!*FT(Jv*aUZcFpx{)LDV-s7>OtDvsPl0aK{vCU0b5$bB`fkMhPsW5s+e^x|gi2;+ORsbkkA z#+K>LDS}$a6Q2hdfk_0rqI+CwE2-4qH8%gljgPO|6+7plsvUbj=DOZrBL9X- zqx#}%mMj8;>nzmXa{9GQ{P-zVZP&{ z9~V^UQlc{F8jj0U1PC0&YI28O9Joh_Zrbk=E{%BYe@9*wo4D~dZr`}o;%Dqa<7%Cx z$IwgWeMiw(Qt`Mn!%e>t4TB94;$kcD-V6j{7-{V3!e1Hwnc~tDax_pHMJ-1`&gP7d zXs;~|o80FX58foJH{lk{8@=KO`o7Rxe*5*dGnuqu%P`MELT#bk=eoV!9At~i0 z{sLiIZ@2PgkzxwVQ$1i@(a2`~h+9p&Eq!fF+hSA;W(vr@Kfs_xENQ)EevOSFb5)>( z@QwK2{I$t5{d@)J+jA~gJF5kIcoM!CY)F%9cR0nsKPx)Tab|g{A_1_q;FiUfqUxz@cQ%ZQWs;9#e^p)NC-h&8w)A9Z96S)OmT195FKMMu z?R)*G!pinD$NqH&%7Y&q4nt@R5!|N+xKl*2Tvs*5F+m)e`Gwc`wTTJ7vSLWMA-OLw zTE2|}bGm(NEPag(e!JiLxOEIzg@A0fg;YpdzUeCo8H^)uhKJm4#WgT6^5NETNKwS4 zpbKaDg>qCnn*4{{|3{Z^LY#(W1Qac~<6)blqnzVoB-~i2Z}&z@Pjdq$8{l?l6aNs3Ahyr+RR zeQPm+Lvb%2TBH4k93<*6a(ZN5oLxB*24VA>MdB$giR`;w#f#?LtX%mKsE!72(2EHBxAk9(C8?z#14+tU!07yH$xds|;x;Hf(7 z(s<7DRTaoN*p4mt>159`?BuhIQ>~y(^|bSGy^qK{RIED`%%f_A5^^D>#hzywMAU~E ze#T~D5Wf10Kaf=!UBo#*8B@!6=hKHSBqrZNEPU(+mga76;ZMBqW#~~DW?~wc#Qn-o zD&7SuszxEv^tfYbL+ktzF~1CF>Ia;{&ti+WGwy9A@I@EN?(wWs20W?3u?ktA$=PhM z!w#S9#d71D77lcLzZE9$$+E&7H`!SGAo-!%V_Du(6mw<>c`+}aT77X0rBuiYk!j{u zw*#l-;J~6$I20scS!v0@!k0H{eQVU{Q+QjED0MkX>JLax97c}_6`I>)bq!9V-W`6(C~8K2D;Yp`nyA8^A++vWLe z>hBh#nI#6gPMXdI^R;co+|+gTGL~Xq7&D!qUa|#~YY)=AR15H}{2(fg%~X7VoW- z98IIB-j6enB}iXj8m9CCI=V`|FpuuL!9u#j1^c+9H7S;<3;1_M);6}EnB4PGKE@?& z1WzUoiJ&&YJq?@S2D8V09zxzidAtIaYheV51-RZPU)Qkly=<$;G7L~BBrY@d zl$QUYK*E&F8<42D|9`3Z)89kX;?LZt6BKkm2RVT#VMKX30nzwWlxQyX0l5f-OII!~ z_8Im3zsP)vwyC<)GW#v+RN=d#Sv#BgHYVEH%abW0wH`cqLMHl%LK@#dnP&btyS}IS zQDK{c-E70=DPQ~m53fYD!tcU0E*d0LPW<`XpN`|z{*b%mYS(JBy`9NwvPH{r^o2Ck zoZ+40pPg47G2AoU>p4qg4?;A_n$|Bg;^<_y }Mrm#Bq5g)Zx|H21h8>Coq zKn+5Q2MPbYQH+r4)IwSY#hJ0Ut1hCt=6Sm>gx~J(VX2ihOAG&Q^ddW?M*#vWW5<@C zFv57_2U?5e0m`K)PMS@ySAc3dzwV4>+wit>4V;v`2cOGnW)%v)T^S6EJOhic8n(4z zuk*idjb}H&EUaNe$r3=5&{Hs3#RTyf#pLBKu2vHHP+n?X-`TJeFCY_X@`VGSOGl#w z;Lc2LfPNRYP^g5}pGK?$eYa**Jws#$L6Y^^w~?aeCyw}_zM3L6^JA}ro(Y*rlYdeB z%S3j}AW?Z3be8Cm+YR!u<*ta&n;@(O6ScqT#~UQx5366s zm!Evn@91&h7nOteM#Qtu;P3!akS+8*FEZ4A=D#3g1`3VLU)`^ z!nV2ikejs9m@Rs7dcT$Y0u87Zzr(ZQC{z$4sR~{!PxgIZMs-wN>+F;}{_Wils#~oa zY_^AddIYl&lp_gJ{cmHw-fcs{GFyHGhbovpkC3EoczjP~YUg)PIze4d6v0;=Ieo>r z@9mX1tRINxExrW?nr%4m@|L~IrGoWs{uUfx)we}eHp1|_d{^Hq)YK(rJ#d2}_)`*N zeG3&yYAS@d{bh5)tzm`b49!zR9z7G|;p3}q3y3dMEmH|&2u#bygvJ6`iBH_S(HYNTLjk9{rt7x}s115u|_h<0g_kmyKP zO#13uAWN@Afp8)|KT(#noJ5-a<|kjad;3U&qj${4s-D_o#X9!&ikf4ax#QRTZfy7e zfcA|ry^DNs#kME7FrBZ1{6g!}T736MEM2*L1M9Q&;nd12Fz;~B(j;e+PxUZ`qT-V{ z(zs3+vy&j4Kxa35AGDlCrwQW#TmLkWDToXfwpo#f-<*)CWLA zo_DwCj8j}+Pu&>CSC&4U(}6jVjJ&6A1S(<}H0DW2m9I}qzFdtm+(arllu1FIZlZi_ z6?w{9OGap~8&PJux_&<421TI;BRp5Y(mtP5oqv~XHeAQxi(=xUM|@^v(5oBU`uGw& z54|o~|GJxQc7Ifn&#T+_;PI-S>0i`l8v#osue5keFL6@ei`LS2k@t=`j9|5%!un&F zjoAKq%4+^*xa8Bfy8-DXW|zfDqu$v+yu z^4WH?%W%n}6S+iGR$zE@Z#P1Wa?Lv~I?|grv?i3+Q8ppTK5J3sKa$giqXpbW9j*2E3+iySZ*?` zD90mS?iY!XzS7C?)w2HDgnh3lxEwkT$L=eOWCwu3Mke=34fJVhM4f)D*1|Hg4-PBU zl0AYt(5PUE8LX!TQxi87t}9>`baBAy7PAOa7iH7I z$ej;KIwAO+?&o&jQfANZ$mH}78XR#XP=UL-{4}c@iF1B(UgYf#I@+6 zp5naZ+^E)6G-tVnXsiL_sE>y_ijR577(eUMiPf3zs z^u6TW0ncUOpJh1coc6B{dxKGkIJKUsA1S~C zPB|8XZ|xho7L#+Zv;S|X5t!y zg_@fuB&8aa>tEU&Nd)4=w|ffhr)ImqV%5SoDfUQR$P1sR7vCTV&Pm#-ec{Io%xM2vtwNC0cr zZ7t>($4#J1k1HJ{GaU02a>@*DAPYX^L(uXWR7%}99qa;Vb;IEKh@zJQJ)E%kOsX`8 z-Vz(%Q#EBgm_YZ{zYCFKptdSU-b|ODq=NbGDglr05NznFBC$`Xh-X!^1DpdJll=@4 zm!v|prhcxd&y|ZgL|EYulz9YG=^I_ySHrZ#io{L^l@-?D887pYHSim|Tbs{w9mX8)aKK10N&Wcyrl$M6)PoTQndNOIj7-t@PH0G&DPU+c6Ac?z!##-sNs{j+0QQLmwJGbFQ zSHvRy?Dn8m6N{N)Od)$gMhUhMJg- z_)AvfqAWUf(g&IjTMn&iI7yW=JrqsBtNiC5TzACQJ4D3H({REI&0MJ$*ZMSg8X!+L4w|Jt*+k?%dz`- zZ1#pjSG(3dOLqA@(~!2*Dj~%_{ajS>DEZb0je*kblys|$fUZ{64r7Y&lR|XYFEC;d z+mroW$k7p|1=P3OE6AGQ8u(VIOw{sfls=O8%80vZ$zwq{A+r9%6Wc(VLL;{k8y<$R zU^YlF`?)t^vgsX`9k`0J)#?gmf98ywgNvNUN00fhpzPOtj3p=UI;9bJ!S3q`b%&1M5`9p6vi( zU(Cv8Xt{`O9T;}PMiMs0j+Kp_v1PD8@}l<>JXI`}c+9+! z(9aT89E>H}&cJMiqMc;V5Gsr=YPlvpj6Ke@b7z1X3ZgiH!GtB5fdQY|ClRB^&g#O3 zuIP)rDGhOo%**r$O1{LOdjlmxzESx21Rqw~FH$iHI0`4(VXGAKzqgg_;Px3>nBo|_ z>8Mu$`giO7StFwp;Hri@E)>sMQpPWIN0l(V*Gbc-Cr{fj%vE+3U8(Pv+>Q-2+=Xhq zrh~~CZ_-q!E;Vwnf4y6@MxRjPW?{_8Nb~o@+c`K^Fy#-pqoK6!D1TUyRSznSByS#Vi;s*}9k{vz*A5QM@$#4G0!UQFjmRqSAtd zKkR*OTG8-Gf^MWZV-(v?w^sYSp3^_Rdnj4MW>FYOJxP#Io*FL0s`F*eD+#vv?TW0K z+D;%+1`yKJ8wI7!2zs6G*U+kM({dy@%%)SS-PRtRa1=@g0g-72_7VRK#J=3nTC1nh z12*-0D=8&;^>&A~v&f990%^I0hwdT#pK^6Ipk+s*oa{*zC~(=8amvA6pU4wYmAnkf z0la_q(S{sfPFEPq1sFyE9QDjw^&u>VFtq?1Tq|j1kK*Fm;w@f`JJ`kO2u=?oapYih z@84}7;_uYO&r56^U96}u%;0r$p9K7(TsR;Y^SU<&8`~P*w&1l1iRX=AbbJ;Sg~nrE zfrvY;ytVl^e*?x%EW<(DDsXu^Tmi$%Pj(_HNoRa2)_3GwWVSVJO%aQgdIDAbk_PVM z#Ec8LS>He10<<>|hc*s^2d2(N=+{41u#F$6A|*$?xi*v!iwOpLG4)g3}vV9fR|9=s8! zV2&yt`!#&0ybGbN#?;nO^2p<5-S!?=&KLD3#+CWEOZBlZuH!auLEYhl09gg95qw1f zUa~X2_dKUgV-tpW4h5qkbh~lv9>7N2zJn7Sb2iKTDvo-cqEfdMC~S6ZwF#>@bmcF2 z38@qFOULMpm1`fd|CVfQ{iTO=i&d#O&sVIzPL>B(Hmn{HXoH%cG#;kGE@nvR?N=Ee zXtQRM+}G+_HCQm~h6`yL&BQfXDE^4~l^{zOx4%Qd&ttX9;GxSd+ZIc&tQL91rOtw^1)Tpb$Xw9Fsk$1ryqLk=7a zLb|PbwNAYWD1_;{O?ST?s6L1pU5ON&u~d~~d$&|!q+Vks`x=v}=4foTs7Buf7q$1& z{Hh}m_%o~s8Y!Ce{~^MSe#aL!5FBC;75fK>ujdWuXYG&9`Td+4itvl$4rJyMLUKx& zA>v)Kae!_!QeLlM@$fC z0nTFSzVuPCJ=YsDe`%beHJAF4I17xyr*msBK{Ym49I=l9tW%f%X zjybs7hFC(g@W~SkyyWCRH}}5*qL03M{Ksoon;uFaRH2LwVc2x(CaMuPhe| zYlo1Z?dzo&TDC-d`K(PRVG*IHJP(=)yt^@L)rR1RL^v#n`ZiHRYBA71mY2*51KA&9 zH^X*I&W^NHpH~vJU>*sW7(`eM^~b&JucC8kh$-tY+`6`0xlk#qVI+YG*wYjrbc^`J z@1REWFlKss%x*(1-JjP=QtgqY9k)K0)4I(6n|(vw$7wQ_O|^Id>4z@aZ-Pl5dc?mn zE<$KPrIxg~%lA(uK}j+1NJc^1su5N()Jail_eL){Q}kUUC|kBDq)?F3@KSp+2qdv| z;yp+fqS*X+k=nI%V3Iwa8p0dn)cvLlH zgnn}lijRoZA{XF-f|(JDjWmZM9v?vc$BJbJx9; zG=+5{YzVr8la1YMEIIqm*zIW-SmQSRua?2&;%X)a0D$(T?bK}rl7LuDzLqE%=lhGK z(xnTogw4>gi14oni-$4sv$DXN1t%gNg0ZOXRfM+euQRIauqJr4UL zyKwfEP;64Yo=FD9{-|sSPKf`$3a;mv7@@MF)2Bz>+pI0LA(N?lHw)bOpK+OOO3IaT zgZ$t5+DzYjS5E(IEo|E`%*;E)ciR-ayV&VpCi zcV29=g<3pH-Lrfr@=`-HmZc6Nk(omDi0|jkR9j3Li}TZT9`dXZ=+qb3c(?p2%2qMW z2t|4pM-f=<;7yjTH^q>zxM$Z=29S%$-h6M7?1y*8qRiKAWbTwnpfDl0--3IkN(2;b zTVpNK&3^Pqf^?cK1}Y-Cwb4RLHRxT#`ELl6ueG|9@uDkT04{IvZeAaF^1~N`N?9PD3%dW;or_hfLSTg#vH6PWe z0CI&?LSp5F)z27Xa!UF6xNvd$i@^v9nu45aDvDw;lFSe8gX_~Zh4509tW7oJG)8Y# z6@7()=pm{-AwwIFClWa^ygr0ojH~ zfJ-j224i9}9}j1aN>_+epBnb>QZ$SPQsw17kayfXEJ!-57h((18bj#X&vEeI=%WS5bO0@tXG)#DIK>_ zQCdG3BNfw$h#YON^+z&1vN;PIIpx{Di$hzmZjRX{``4TAdVMVyzIy!5K}DUS>K-yi z0q^D#_V%0ywGT8zed~jHG9Pi7J4>`Jk>b){)!$z*y^cJ zb_nT}SoBIN2%>xOTkrWIrHvPvz@`^!Esd+}@bKZCO&}c#ci#d`0QMQ7_%(ZCPe|3A zx+gL{Ywc?C4kn~wz$}SX9gLKNm1FKt%}VOU{~=a$jEsjcQ>pg24b=0WaSknzPUlzF zFE`rJ&q!*nDX?;UCIA;2{%)DiUoCzYQG#*Db7DW9@QG_D#9+mB+sCA*$vH_WQdRiN z<4~J?%I;$?#vY;_ENt8q8p2(s7D!{9ViP&ETU)sF75FhtE!lZCw6ZG+z6c2}<7XUZHJIZRKj8JB$H#*M z7YP1-dY|+k8cKD*MFc5X3OZ*D_z%08!RaON_4&nC#H2uF;UMGsh+->3lH4qrA48X@ z-AJdfT;ru&zKUhg8VWO>AqDjuvFqGBlzibj_z?;FUMe|d!l-fAb-AocfRMdABV^a2 zENAub`g|%AvVhYyJme)Hve{RQ;zb_wY|i10T|B4C8|k8z!U?x}#Mc_F+=6ALsTk|I zm#fdFD6`N(Xlgb`fFy30U&yB)h*~=ZW^|d1|W0f|_ zW?R+UhC>6VYw~IoANpAp9nYBxo2|7|a6i4kbTq$v3vq>8p*@3`S2H&FATdzb%=x{Z z>6@bRc>ozUhbna3A};&cJ~%{x*n%@uxTr3~ffJWYn!+imjH(oW(Fcrk5*FNzQniuNnsy6_%5t4{6>?GKUfYM_}@o>8x7j;91_Bh(J5_vc~9 zbPRf*?@M}6-Jd8`v&MlJ(@g*{yse_N-1YViKHg2?cJ>8k)wi^Q^*539=NG5XFd#he zlS}=F=ZYglLOaHvVD4p{-y1^>8ZVNwG+#|j%}AgPOwxIwkD_$g$tLaKIz(?(VV0Jo zdyM=sL$1@~=1Fao*bDVSu|?0$wMah2z)n4uDK7U;)kZ|buy!NA^KRtnTl+f@&5Oup z(Ja%4fpV? zl}{J$I;sX$diKcwt~@{f6XA^zqZ)#KxEejeyQ&2$oh*F#@;rE$DkK?xhf7(TMQ0Jl}dEv z5NB7Pj${xXVd#nEo)CR&`qhe>ba#hERTp-J$)mBhmE_+r?s}M&uLRySO6)p%#$Pgs z82h3(3U|Tli4t8wQm{rr>;6Lt@i+C_@8|IhEnA6Y#%Xnym-iRBIFFfgjPut&6s zmQ#jmNUBL9e!qC=8fRv@KLLnVJuFxCHMNzx$S~O2LfU4Cb8%q(SL=mmbrOxDIk6%Y zJ>*FH+LMrh2#G(<#A`$CNn}tDe=;1^s`qRUMb%dn#fQbM3mAmKqE=PtZAB~7*6PiG znc=Fg1=lNH!v3Ov6-@VEe*x^Np~UR76(+i}2`R=#Ct!|!mS`rsnlLOvHc+yXqa=!& zh#kA)wWpR7+s2eTo9M8T&^vaVh77X12nizY9OXoh4+0}@F{HEGD!38l1kud&j`CvR zM-DknJ9bmpn^LokgfMiY$!e2C7v~c$HYZM(k_iP%WikxEFT1rKg@v*b_!E-M7Pm%M z#;2hdQU3jenap-c{XG8#%GmjW`pU8Lz1Tila~8$Ig_{Xnjk?7_YxWC?AIxR)4_7&a zsl(*e{gJF{R5LlZ*1S%Aq`|XGb^xRDtgzHENJiZt9-S)v3TYJM;gbWvt2ELZ4o=if z;yfEO%Rn&BER85x1u#L?vV(EY4l7vtKPBT7Dn49Ck30-679NZI#m{EOoQQR!s5cF z?DGej(uXc9HtqJ1I3%Cfvvt_8gLD{g=OkvcrB!FMxbX~>zRvUR9nkA<&=I1RCs<8* z4BIfu!EkUhV~qJ8d)Agi1(AkXI~*J3;GvA!)Sl14KA?WpvCdveuKGhI}j1IkV9jw?TmM>hUBLA(9#{pf6{;I2kv z7F>+%x4!p$-Gv+lJCXNeKSGKg_3iUp-$e|&6u_kt;alHt9Vb@KbHD7_BTDv8!Mh8GfgV8W?eqSKw6oVZA~^g)Ur&IWt#n&D-fJR{~M^`XLlsx+~`U26N2Up z{otBISeFK(hFM0^+Bj=2i|!pDN|^|o&7{5}75`$kV)Z?u`0eu{PlT3LD`!4injCOi z`Dd=|{QU=GDbg8|&%B+EB+2H{A;<3|!d&KrxfOTR49wan(Z)Q5d2O10FZ}V*w6%=~ zO9iwXzUi4%DRkPLauHp|WF+ZhWZ3(%tgMA<3jnl`-&V$ftUepbV<*Gbifh)30{6JV z3F|NW$pgg7Eoq8VI!`Q939o*)V|&Q2mT6>i!&B>vNBEGN^InE)9lnUP zPE~8X;ju>PcllOb7~%4?-=Mlp;)PC-)eOz^#PecxMYc%*#zolJ)0Ue-kYEZwKY2=g zx^9=|d%{&avYVhr63K5iFz0TMB@KWa)(=km3L-=6J0bn-)o0szF)%v1b6};7Z-Od~ zhRzaxZCn(N8>B@$G9B9i`1E0hysgZJw+OmS74tp2kT0Ygz8 z{?i#_vG9=(Ts?9ctN@FAqJPXk>1_Cb@&{HD1bfKcuUw zgE%ahs18e=OypCNw~nu zU1)lKKU9cT`ZRuAW1}DWW0WkbiVif*=_chWfX`^pRpgd3Rsf?aLz11c+l}~5@QQO4 zQ)z!bJ1hju>+m}ME05?CtV9#2&qOTMqIb}g%1XQ0W;#&$C42X#oJC6*GNtvR#k6yBYnIi;23Su zWw8|4YS9F<7yfCcr2no65D-Lifmw)~*g|O)>U>K-F>9vjdHA0F)~LW5n7$(Wx9@KW zAJpOb93}$a@rf!AjJFYmO0aHZAW9G!jR1wv92csypSQ$eQjY~EsDm;T?{%;+;YB9_ zU{;7T!CL(7(k5(}6wwe9{+N?DB5_BRnT?JobI6=vVsgsZj8c##yS1o&>%A#2puV&v z>88Gqp&@_(I@c}HZXV*}B)`0CHk(8X$sLi+_HukCOu*J?!S$Rz~Er} z=9yC+h|p;-Z#t2{+e&c2hwi%g|23}vx%~HEAne~@D8vd5m}M{%6>NY5XZS6O5uwY9 zAlB#7%Zk93S;p$!cuPZ%;Oh(M6z9_s5+NC697&o}i|o_>J8g`FpV#x2YA*FM%X=K` zldk}`1H1d2>PRI)t=AV6gFxWbUg4pn-2Pf%Alpg*ioo}|7-4=G7)EP6PG zxoJ2y|A_3?)>7a|m=s6jMp6LuOd+eSfPN!;IaX&1VFau%E+9y&=L-RUCxT~gy2+i*6qMtB#fnw3S&3z1 zq|De)WKSqOT;)a8wdI2xd3|DA@~t$ zBM=kS$Wp^{3X-b30802WQKSyNP~COg5bVl@xkHN_xT>c-ApU z8Y+eK>P5=L=`dHcYE@*TWP2FgW#N?;sLUgBB}%x3jt^z|Th=PK?(ubyV~{Z^7>^vx z>6wC*I4`ah!EABkyC;PlbEX=H@o8o)OWqluKBsHhsI9!=V^py{ib`p?)0gy5t|fQI zW>&)fzYw7=k+~R7LD5bd-&Oo@`=UPy3j5z*T>v-|Kv#>(h^pvo!iV64}{j=7}sBkY-ZW9G2JW-GB9Q)$Rf&^48)?B zMV_-sEJmTyPuJnL!s={QL;e2#dP7Qd;Gpc&f@$=cLO}nnMZqvo(R0Mx=2T|Wh?yQl z&m-Im-Rc>8w(e2}DHYr~=y1?LN?w)LZw<9lOhiGiq!E!11bp8KD;?e9z4 z+3&x5aj?oMAaKwcedi?tftZKnqQ<9E7M|b}W+u>P=@1-I{|(*$ecJuWo&pN2i?tCt z&*c6LQRLNCDoGKcW!dm5R!@-fO2};x?T*dlw9nOi?tEZjm~9fF7XT~Vso3ojz(~ca zHnY|GTH``*E2m`f+DTp$kj*a_SiAN9rCY)$(I-(_*C**C_Q%4)Kz#938&D=$l?#MQm=|@5C(0`?@GUiP}Q8Qu-7RWib`hUp>{BM>;>)cf7P`JV%*S z@FaL@xc5Des7q*m)n9p8f12EUTz%RMJyy=`X-)&J?-|h^pI`c;i#XEZn{TiL_=UL_ zq%_#HD0!;a9FtyB3X}M?zc>5GE&tQ={gp;k+%nODyQaokF=)D>agkDoSyjN}E%eZ!sPPivXTUaA<=&vd2oL} zu!KAz#OD)#kSc(?ymrx@Q4V>V#dDoZOp^o8c_#GuX=mq7NTEneP|5%*bc*w&y}Fvl z?u`S#2&Ouhn~^4AFT^wa+>mt1TE}HVg@qA)tjf5zz_-PkCo}q(^9J|?z8Q~w2l~?x zK>zbjmQ0OHf0F?1GMJ3W!;XZx|KUVq6m2;CiM}@ACrFdYyR4-NHX&&vS>3Ip#{075 zGMq!5;#EU{GXi`JzD#+#eQPEpEW=DrS^`y@Y>tGl^9 z!oRi;1Ot#|dZj$;b+u7VWgLyg{HF?<7zyc2Bo>VjOx|XG*oy|S0t*9Fcx=NkNi>n! zHNT7wWqHOpc^R2`$)$Oni-^Gei>wP-u+hmrx>lxzDXyniakFYCt}LR67pY9SlO)%9 zHAI`_l-FxA%j~@{+3Y^9Oih2sVfEsgsHlS@sw%I9eqBL9d-FCF3iGLwCMojX!zj@x z%?uDWojIAuJ5g%?X1z|jY&e0N%M~*iv*Rhh$xVvqadYY2^@M#_Hf%>;)(-}*5c|+@`FJ5!$kvhkuX64!A%dW@6qXw^zI&+| zyw$};Y4-`Q0`06BU{Q9~j_~`k86kRjAanA~V}g#IR$OwkUHcU>Eaf<;KOW zC}GJLVPXJ@x&FLunIkYy9aO7xYH@wX-k0knmGAXR$M>Re>(sPCUL5K1nE9}p6YqXw zkzal$YjOAPD*5dQQx4q?ESpYJ`5FXR;^D$U2zEx?4@-6B`DG=+kR0~x5 z0%%E;^9GX7*aRx2#Sdl6N;;Kfi#u z9x6Z&c{WLJG2EVxKK%p-+`(;sY5sA`<;=`^1PNm0tK4lUsA25`LWbwqIhH&m@Yaqz zFi)qj;(4RlaIaOm1!|}cmP1qRQNR;m=le(@4=r_J9N71u&cKz%ZdZR!>}`?XnA5y( z^ahNfWdZ6kylb?yq~09ftdB@2Q|UO^%{{d4CJ+ndp1)x04aLY^f7!?He%8G$ z4(*Ur_Q+d^EIMG}V)=N2^q+ALKxAZMSm(W~d(#c>1v48Dw?5|NZ_LS+z|-4EzuL9( z<mWrO|pI!ZEctl}ihmdjD_44lq6 z4a5ekrn{n+Z;WB)s0c9#BbHYT;?VG)f&lEPq@F{tR|~ z%z%Qo`)_U&zZV5Cqkqgya8sp5w}QX5t0&Mi!?RYJ*l1exZrsb7{!3lX}y<%Qc*~h?l~|eQ?|YIn3AArP<9vUX_Q6 zCirLM%MRJC_bY$^P28d5?y3#_m3|?)&xYE?_wNPjlI(Gv!qj4& z&{(YFDYY$~n7?qg_AcpeJ=}vyz-)DEO*ZsGeIe|4yrXr2=HhZ`0UkEFD8=M{itOi4 z1K#B*!LW?t8hTUK`0VEwt(S;tpuxzU7?RhN`N7rkRI(b~5IyVdBG6{F?a(NN)TLJW z?pNM;3ANaW>xNKCA7OGjLH>*-DC#a-I4~&otbO^TN}1NQdIt{>x8i7UFWJ%k>Ct5_ z5f7-Ap2GM`f%u2r-cvX+_T#T9=Wj81@SbRYL=ytSb5(R|W4dEaaYfhUO{Tk|PKVHQ zsPt>*ZF3wtuV<;dtYlvj3YNm>m~>I(W-xm3XyNSPy!fy{p6?*XV6n|%W5si&Fq*L$ z5BX=d8qp!I<7Eoug7TaC+7IKF2fU5d=}mLqIpVYJ1n0MuOmc;7c@6{dEQ9E5-Ezb{ zNiyy!t8F?2ZTaMOEg-^@D?V}l(Pki4|!{#_79b?eOO zG;f#~hQvayid7v6Eo0cK>V;K4|B148?cTiI)XF|giL}%11-ZQblctVCC{K<>0reR( zwOR=<=3b7&@{XO#*f7wGvh?r>Qa{{i9lz{=?<>#i}I?DXb$}MJxYS@n|g6 z{kx&;TqaLg^4^{8&Vw9C_;08)N@1%dLYs0KVXaVc)|ou&L@T0A0lWPR?< zJ4C;Qi0)>E(zo|w=eb~m$0ekU64orTnwq7Ue+vKA)IgiHfrN0Ypk$bLgva}N)y8sc z>#NmH)p&f~%uC-$6^%}fxn;DG;TCVOUzmu9d<4n%@M$%SJm75iQ`1I?J#VqR^2@*# z%w3o5=-A;GqlBA`3Zu27h77~KsmZ+pkK4o>)kUusrHeTv2l^F#dxN1x3Cav`%@VU# zkDX&J3n8HuiH?P08J7uZOS`V}I?_(sWm6VG9Qvy?M%~OnhiNUoAiG;AKU1eU>0UO2+_^X|eKiB|cuu^WmT%74HD< zJ@9CG6tx}e=T~colqTrX@Q4sCbcPg}(G^kn3|{g>?v_F+wEHuzK*Q*ICXa+nCAq2z zAnhB!e4_dW1kV$6LW$%i^IWysvLkA^ODD#${ zR#8F@RM^mI)NpRaPOFZWueQ?&wYLC@3*o3;b1Nv?CT-Y{(%`o%gArKu6+f#biTJZzy@q(caTliFL^w7+3W*Gx5ht`I2f!Bxp{fmt4lSIqFN6$cabm%)QJ zk^-fLEC8Q5tBu(Sq zACwEn?xQHX7` zU!1;%l**UrDK8~Ty2&~4EteLn)}QKJsnfnXe%(~E+iO_dl1?(?jqX_lZt3v({%0{# z&}E-Jrhj!2iI7C+Z_q1AxwKFaX=CuEeX}oxO%hBhv;Z&9%3FryrsC(Vpn7+O38bl;;1-WC>dSZ*5-Wx zXnK~%h8yS-X?~w%NCoLHo$?5dx7!)0ZfuLbbOm_%aZ~5{Y%6n3CD9<)#XElu-fNE{ z>6gC_T{MUu&q?Iz+dwPT?lGi3Q4Ss&57sA6N|x75+#fbB9?}P|L#yK|1n@U%MhWxg z!>39I{$cz#c{^%y8q)h*pL*2P>?4Xk`?^haHBuZlO?wr#HqIrC+Q-?54bM%%>#t4a zu5}2Hx<47%(XKgZa3zm$_c?U)g(R-9%&|J}1^~wS>O(m~vn&mAjLph!aPSWoL67!( zk1eWfMNR@%7^Vd~$=Fw=j}j-vML7j8N;a4I$en zC&$3%jH89aK4l&s?LF(&DlN-CQ)kU@vHUgE6k8YCRZW}M(zy#FI253o`YQH*EtmZF zCgyC3jP>V>l$_|KrjMaF%pp(2Ei9<-!Q=GWrUf=$Ptum6yQ@weYigX$xHD-2JGUL? zPNx+rJWf(6+~~~crVwYc%Or1xSqyEmGyIhw0=qJvt8AfSlqp3qj-Ep{K1)trEF%`i zc?yiJsv$1YOQf|C1ZQj2Vq2)u49AfKHYKy${4&yI=M(J|j0TlsB@NPvd$<-P^wr~V zLHg5?Fs^V~d1jeDTdwLdjc+6Zo{5Lz5ERDQv9Joci33IjL2m`I)W@^!M)!KjNGiwU zOsshFZ*3FZPPpTci>@8pZDN7`2vc-kWjW|7Yty-FeCS#{FUUV6SBTewQ!bty47J%q z6Uum3t|D@bC8*+}PT_g4@yc_G;^D5B5p0UmIh=RvjXXS&ycQD)8u;hj_dDxAd21>q zpT5?8T_zn~h4st>UGB`iA7pKr0fIwcuBR&$R_!(XVDxLcVob{~-0-{A%Iuk-WLdC%&evzI|#Qrh14A^shN)}e%%(8~C=bA_sO_4Iv=l@ES? zqiXCgVfbgKJmC+=%A`GwF-f!XsR@SFQ5_jA5v}j%tgSSsxtW;x<+ymm{c0ENV7b~# z6R!q!4d4?hW2gMTeJ{s3ikx!g$13Mi&~LL0OXBvc>OKDXZf`NpB{z1bAOYwN_+>LX zon*pPnSR5D8tdt)<)Xl0Zlt{6q#qHYKa%s9pSzt#cc3cm|M)>n`#IOT2-Sm`{qK%>cQ|S>) ziJ$Sfr*-ktixs{`T7V3rSG1vDy4$3F*g-KV{({Cz{&|K{Me(5R$&LItFQ2>@wAo?7(%X2c8SH@KiO&lCK^?|0(&xB2>PqI!{jH5`+jtkk z4V2wq$ad*fi*I~Ly@Ph)xOwyo-6p1%P0DKfaJ9ChY_oi`SR@uqSGo5YzUen zv5H=-dKAVZroD;>;m1pEmmJ0s2nt%bI`aFzsB;!0VQd|Ufme4Oz~(sLD+%FmKMvx` zNG!v{SyS__IL!&Dv%n<|0VMS*dUZNV|IyH1qJ?uqr>^kT#zAJ{{PD~?mR!nZg?|m? zyrz+~SyEFZRc3lH|1(CBfxe2S+X78vpfL^I6qO&zk%hhP3F3e{Od0v?p}8^@Zdrle zYf}RBesPDY#DHq&gM9#99jdb8~%} zS^L+vP)xZ^g4t-^P1D1+o*5f>)gGA zhU&Kk3?e`(w$~25SxY_Ukk*+k4Du(QuET(rr zv(p_LUQkruzFzwGHzn$)GanyfF`|e6ya-@IE2Bg5#O1grB`u#)oGSt=@N8+41T8Y;{Y!g>9j&oB!2plsXg8&{(3p_fK z8K6EB@4mBOP`T_Y%6?Hnal%w#W>mVh*kcb&N>P6i@U)UpFe;UB(YbQ<&1^hAnj708 z69xxjq)N7_b)Z0L_VDu$beFVVqjb)ow8f-R4V3DuM8>3YOp&=7571T*(K+@Be{ z9Rk8C|FEAs51yheOSblr1#oD*Tq1U`(va!C79pl%!ODbdYoYu{u}=Ok%MHH#J||=hy_9 z>TN+GqH(cWGLfHGAdE|;q6@iNOTHv^BDNTPigTlwm0?IX85p_YfQB~YF2;!T-z+#2 zGh`HYD5UlAI&OeR&oDPGf^OTN_U|tFI{w#7=vb(sjecl&cqk(#2Q8pL>^CO9a-3WI zu+O^1h0*XdSbBGb;Q_D1_Ax}K7i?73xUu_RKf9&bRh zv72Tq;ph6yc(B_|QGDr4>(%S70$Y1+1%?)Tx^xc(qAn)*! z!#V}}i02fgvjC>Xd*D(M7oeqbBDiqWY$n;WXN6Wwjk8Tl4T1UloFyBIP0&bV_C+=l z`sA}$Iq%Y9obQ23sR>L|^iQ$`OASNj5l6qQ107*ljGpa=mIlF+rtjN6Oxr>&7-2gK z^j!M$jR)FQfRodzaefAyI6PD_c0c&u<|TByLyIB$8nr5GLOx`s8Tu$JYjFwB!GrOI zeJg4Qw)OM7m<3CP`>3r+H!}ICU={bax-tMS&1hgn37UKFP{T zUEYO-m)vm`#ab1EtxA+87kWWsGzLC}VM*eO$k$UcNl4)91&_h2vzv}BGP7s%#MFgQ zHN*qz-_SXE&K?4v)bdsSa9GRj5!>mP^R=3UfwvH*X^RA>r=lYx^zHn;S>G=O62?r1 z-#ol!J`=Sc)<{REO5`OaA>*}^@o-G6d3H6~7{o>#cbS1BHwNIGw`Kbke?oBa5c)`x ztIfh)pLXogCYn>r2DYQ!zPR4k-}_9KcGdS( z8}P>Ewpy6+k+ZmYE4Sy0ev^VyqtsA_8LMJpt#M3?%V%GkI>4ujW0w|mkEnD0AX_Gx zZaRJ_RKaytoSCpxcIz#*5<6h+Owpo4Ro~Qc`Y8?ItA6Pr*J}Gc@401AK7ul`Hpx8T z=xTOtFe=*G4ny`#C~ZVTnLe3Q{G%YBTqUp}MaKAayE0MKInMl&y+uvN1yK`6K&6VO z>zpHg3s-YJ#0dxPKBDqbssiAO9sLK9{8 zh5Of{fle-69ex*$1$_Bv@zt`@+xnEoMRPgs>*NXd$z1lrPfqGiS0z0@D$pJ>hS0Mm z@{S3MuQIVj)$C4iV(}u(C%pTtusnVkcZ$95k(!}JG!o9-) zGy@c-d?7X>469L(J^?S}gLQ9@0dr`-2uguokO9O&bvMppyF!Vhk~61$G>XRI29^xWN(nj=$@5I+#ve)k=0BSHB!zu# z7=`<-+)&9kCg^(y<-B|^ELVMP*y$OR9WOM?ZSxuXfr&EYA0)$BC*RYQ6V@Q^Zryh$ z-oK2Du+~>TDHhvi#Xz-0x^6+i=b*@Fbzq*^=$Y#FkhesTGt91bLkk<=i9-9bug1-> zob+V0DQFe}_@!teU%IxtKn}287|_#il-kl_q6kqxMgr6h-(bL_(s^kl;S=n#E7Xml zIl%mqeEnJI7yGI_gc!vgkDV|~;_7IBAnU>D>1L6w~HOGn03q!yfEehZ>l$ zOHGEQe{wl}U^L&s85bk(7*Mn`GFtAs$RBY7eMm`*Vqw0$bNGaG_90c`2FYggxU=xX zVAHZ`#f2squiuLaiA`5~R8cpZ+0Im#C1k~Ev>1Q1(@m|fw^(ANN*0gxfqxu6OdLJI z(}8AE%mpNQoqw7$h;MPoXaNNQj7h({9On~z|Hnprp8KJL6EYT`&OuFEQm8MXjc>y1 z7!)=5b5AM&8-X2*X}Kry;kKy2ahd7OenO%6#gtZWF-9pd_rx}Df8`@tJz<3F(=d}hD? zllIZoZPJtz+AATM)WHEh67Vaa_J^l&bkCXvtbE+s6GP_D@MTeA7f6QVM+Jwgfr$Uw zgC7Jz1gFY63D2&#FOht5Q#?-RkKKXW$PmwI8#UwS0V(Igz$x1p!tTTa^rH(yM*cOw z|JTp_c2`hE6`YvCs)FTrQugHEWC-{KKi~I1UlAu)%TMrB#ZNNhX-SJ#_=;f&{Xhb~ zJ`)c*s^iZf{{6G^9$X^zZ+n2P%|A2-m-}VHf)8fm{hK2KpWP?-dVP-Mvn?sWLBPK1 zQTTB%7UJc72UB;n@XhDV5iH`~K|uxyGXzB0kAGree}D1{0t^!W_rvf1JP`kTKQL76 zC<7b1583alPJCv6=bir3fbahM0KX6V;QzOye=fR$_*Yy0`;|JN? zzKVapz>oB=cKG)*7{=k>{KCHn{{MIcnWy``3=+scci10(p^ox$X^9_k%wfzCX}>u{ zV|oYPH*!je4IXeLrgvJA!(c)<03AAomoimch5vWOL82vKna<4?-5o;3+ zoKys%Zd5!P4RAaKeG|)lm*y_lf@{MkL9vzCd`2sI!Ak7!!!pyE_w)Gu`?YBDoNueH za|0t+q+1J6XllwnI+DtDsc@sm$|obLd`pR13=qHX_aDK$LbXh$0-brM=`seOIkmZT z^Jc#Yf#;H2Fk)VjwF%TBWKfZ=wfX%U9ej;}Xlv#)Q~{VM|2kwuNn~L27pa-g6%}0_9pq%K54xSgdynZKSJoEE#Sq}^Wem8BV+Q*VrfVW%aK?}F5u^woZPkw2!$Q*mmFA7;QmKY zt~l!&N<>Ls+m*k5+vmLwQAc5bgHhqWwPRGRfV5PEo+1TZCCJqD`kz#Gq9 zl8s?_ZQG2Z-IDBhay%CZ=bP&8zJYjn_rDV*`QP+(#dB7Bt7@${@H>VTc%>xx{}%oI z3^aC-{OkjODd8T%T618p#7v+c_XyBSA~+h&Onv}vGP`>ja>PKgfD@)X{F9CucRDEM&9O#{ z4IycjL!wCa)X~LGZldfe9jXajx2Gb#XxkZX!+9dhpcT*auvUc0>_XB9KwjbCS_e3X z%8WQ0RP}ZosI^&4$I${#+wzpW<0)XsVL3=&zEz2WaVTcOD8V=5Al1~A2$?|xR2ilf zdG5z$v3evBc{96f?(flY7J7}2WfhJM2fE5T5n^c3?3?b4KyZzjg9RzMn$A+#%{W*9(`Cd@bYKWv_tc#hgJqS>1ugfb@*%y zN1E~~<8rCN7wOhbqKP!|Ok28k<|U+NRcF%3G!gV?BX6p(hi)GiG&lo$NJ2@Kif5+3 zJ{TpLA9w=N52HZEVI{|hW`!z>F;4k6dg+=LRyc>{HU0ix*6mNcIL>dO+PeFtZsFEq z$mBX&TE*>|I5-Cu+(I-Q({J2(w`(h_aK~;jojyQ3u>8*u;UrOz=sF;KTBPi4e~L>^ zPjf$x@%!0id@PS>QhzQB&Sq)aU1{P|en(_-x`%0#h%`F8A|`#Qx}r%Ud4rg%`Xl@E zq=j@?ET=dsI8vgo0yR>@?6QeKEOD){-R~;AH?bhnE+Ok!9pv_WwLQ=O>F{n=Tpu)V zEZ>?WcdXG*r?<<2+hG-c3RbZ8>cyK4H*j{oq=1-M4|E~bIhD?12&r7Jn2mWWE;5B zr5tK%0Z%%O4Ws6VvXC4XEt?_Odegw!5eg1X6A;8sDh_Vf-_6Vw{89zw8x+;CS?4mtU-Tw4sq02X2c}Q}pKaKDu6)71kzvlh>|ck#@r+ zlYA0}y@Ns|-YV86Yl@qvN=4;-cihfG)hS(H-$>3EvaN-N^eZB+aV&$CMqc0GZ8@~^ z3wR7VJ-dSPgioRR#Lp{9FpX|dLgYFqXKDT1xgdW6*3 zg{)1#^E6Gd;5<#oupnM8Z;6?)*Tr7>N)cGyXe-B4r&@`JC>?7j0W~fwLY@-+tES}& z=F6liTIo1y6&igP&GaTNnxZA*D2L~<)u|Uh61R`~p$RZ^?PktL;&fqPMyAZVQk;yq z*~ue9tY7@sLfBV{)p>FMjL&T7}DU@%n3 zYCRW})Nxu<07~KyCx0)&fou46SABTis9@W+G(&635Xqm9K8Q!}i^))V)px{vZLZ(` z#x4TcCE9;d9xM9ZLH~!gw~VS|i@F6#2qb6{GVp_dKLS&5YhsI<_Mtv7 zd#Q~4OvFg;kZ5Q?J3vMn8NNf?=UAh4{VpYCIN?zXB*( z`C@}gxN~JDsTyRYQd?NSIl>rTVOc#rzZ7)WKgi9DTme_e%kRi9z{(R~6c<6jjME{%>{r>P@=_!Ycu zq{Q(Ml^ewmyX?LvDvRAaG1tMZG3Q-yG`PJ&<+ARU;@u8acO`^|NXy#$T=0!S;ty} zwkKJQ7aF+UU`Zrm}6qx}@bEv@rXy-nwh|=`*fbwCxUfUPb&+ zOY4&WDht)hLdYF1@D93!S1~tD+fu^-`J6Y0v#TJN5$-6BqjiX5f3M2{&iMkk7ylWQ zPgYD+G>5MOEY@>6Uwt~YIdfy8KOsm8JeIn*FHXne!{edqWT>`#8_1#N2x8`$giHS# zlur?oi6qctM-5JN;({2a?^8E_90C^)(JU_DA}k%740RCO1&zw8TqaQjQ8U2*nhrJNXq{{>~Ie(_E;?4bgQ6Q*P$+PY%iLa#1x6oh*uwa3ntGT$yv) zG&sYV?YWeG7uWc>7Baa9NGDtA!}Bl97w;4KIf@i7Lka`lZ{6=unW!|IX#Q*!$5Zj8 zwz5u5cC0v6lvy3yDZx0MK~Y)kXme6|hp}-a7By9=ji^tmx;EQ8Vw6aO!H|{z6vGb) zqy7`dJ@iSZvW&azuCM4Kn|+Z7Pw9+48io`)Z8ddP$w%(DS9Y@KEi|_?)sI}$Au6k3 z)V=$zdm=~&9%ISe6DVqFC@a=H6s!dPH{;s(%@ z&l(k0)`np)`1W)fBFU3tN4_qISAj8z3dddC(0@R^4Y(&Y|6$4$zu4=L+d3K=Kvo0s z$}EarB!aY%w&Ol?R#Dfk-1AVPQoaebr_z&OjX)P4%%kP)KQXY!W9Q6;_x|V76Lv?% z>%FXAkV{D8&60Kpl1v0FRB?%p0ZoWR{shtO3}kmF@!@;Fk#hP)nGPhomm68=BC;Mo z2?qpBbZI*U5JV#QlTonfyIfi5KBA2eB;%?W`=A2Pf3O9pTp9rgi*~s}8JAfR*){ zFOT^p6${K+LnCP)7!R@h4F9V z$hq1y`$Gs}QRqhyhH`T);g~0!KJi+Xu1`P$A-b7muzD7)2&Aw^^n1jz{1XTXi1+nq zGUM_77xdqu?BEeq`kBhjvwd5!@PF(}jf{%KqY#eXh@Ux@=Z$nx(BM}tdYRNJo-d~y zZtkmvoa>#IeAp#0Vkh|xJ?>(24?EZiS5P&QT8W@vbgCCPPA<-v;DT{0{hNfRa}^+$ zrXZ#Yyj}keutiwI9I-><{&KomI$2pA_xm$Rr-YJjP9XBsRu7b9L|4qgqG0-McFa_!sK!Tp&@Ad9r?NPKinNyEm?n|bNhdZVbJyr1Cw#4E@Ee|CgDW7{;Ub9*6(w$-IXlJZ&oa0pe0eh01I_Ky+ESMmUnJT=aGVC za`YPntd!yZ*h>KG<~feltbR?GNH2nW3JV#u`@REXIM*0@*XwlqNKI1NymV%|EFXYZ zg`S?7m@{ed82nM^EduP<{NFXMS8J>%e%&QNvB%IjurOF^JigZ^20T%g2d51PA^4-| zi~=%g>v>bO&7>E|`rbj?M<@qAsqRaw0t-0mUsMG6+x_?z%*zBz%YOShD9mOb)kpyd z0R$+HjiAWq37IQ}i?4KJu1xTSGxR-osjewk5=RjlAt}!|NE6+FwIe*iFC=Ldv!&JN z6VMrirMA;tHHbh989yZbwi397dWN{T!z3>5aQXw+!`miTF|wsm<#@iT0SXh&Tfw~# z02oG8Ax?Ey#!fr#WXDFB=XE`46cNNrr@~sqf_din<4GLWaujfHXh81Hed%HMZzZIM zo39CSwAEqF`qz#^5672mbh4r}_U{JI-v*5IQr&IN2=&VFXm(T|hczC{-4smBAU}E~ zz`Mwb(^S7>z5&f7I26j-QG&CO0V~eyKlogF+OOl+Fd!#*miPn6T;j7+#@vfP3(_|6 zbLf426Ss`b$(vYs$9^#pHNtU+gnWPc^J>{$C=nX6fk%p6+mxT!`~{fG-v#~mZqLum zZPh=qyT_7#&ytkMhJBwn#)W*M@z}uH%q-(d81~NALvEjRaOUrE4E|U8JKA{dZ=V0K zLQFeu_roMMAp{!`SZe^}a2;-=gu$Ho(dE@s+N@?TEEqRnFMRzFCR8^$?h5%9TdxgE z90IQ2|1C~tqrivlREJe4zFZ7(0M`Ye+UH9GbBd#)iZz=}DU^S_Y-pPTgm1EQtbj^n zKZq*$Bi-Vv^NrLMa%*_PRD|o5tKT-4{JWn_7A;$*HSfUEMeqas-6y(0Bi^%e*vz*<-f(_M^LZRO!GBVEmpp z4^OziBc&NAF$fHRP2il|oI05Ek(b$++|%P(mU;R)y)IQrNT^R0x^5N>TV0MWgz{@& zFj$8_u^xA6_Xg*~O5#VQJR`29=CtDgVW%5txtByZhA*a&a8L43wuZsXx2^ z1#m}+?{&L0w^~>#AKOcz3NlyE;zW#&?C$PxqPRFT+%{A(icTW2}Z?y>D;{ObB`PfTYmw(u1mOa3IWTKRpA)4j zaMlO#Up?K@d3C^~cyIOs8IwyPI>9xOVqc-9u&%~rIRYSJ*$vo0H;r7Rf5w8r-&x1G z31Y&W_Jfe-S#lrA({mX{xkT6!Bey$Z%ang)T(-VE)jty<{oNbef0QF&=N!YOvqGgX zj@@nP*Y`@!a-8@%9H8w5B@v_9=gAP3IK%sUm(hQq>8RXUDLRo|3R*>Ag0$x1YqBf?MCq^|nB z%Wu{?^&(6PJ_v@patb)fKhmzJNFe>VeF1Jk?e8HL1kRx9mHd^2v2 zegOP;b7ROP^Yg7lC0W)r08cbv6xDcFA}U9ahWNRL7qiW}=`a z?;DvPK4YuSkd`dpQ!+k|13HBUR*Ac>L(Xc4<9g3yLnf9v20O8Lj!rpd{Y#$ZI7(fj zoVx@?EVBfy!N43JgLHywKeFsgkj~2bfsWwAhQvAQXZxREHVF01@(jC^pWAm4vkiIv zlhADYyKD~K-YuiZw(n%ZJh9q%QS7H%lq}95UG-62yxnYJIm01;6n!3;RrDklqwebn z0Ms23f^6%h$$tGt@&V<-qlk^>64c3jyMKez46I?Cw8S8_a1i(QpD z6gV+O`J>HQkjg~jNkA*}sMy5sQcKtZdJC$SF^AxFV(^(-yJcQcoh|Axy}aqQ0TG&v zCUv4=B)>a)j>E`#xb^oEYX2v4K^I$gYpdofj$PPJvV0ML5wd?s4h#ifSA&qX?7D?k z&!O=n*rat{x3Ddi$EEXURm$A3lv@0=+w(@LlS`3qrb`ddWM!d9FgL) z1o=~JQj(#gqfW==e5zib7k>*IG{QW7UgtbN<7cS2;{fu98)S`}n6ySR)M?67r9>bK z${A$-oIGbNqO#v8_bO)IbkAgm>-4VQf6zq^&bD^8 zywQf;gZ)Y{W}n!gg>F<~(4cs)@w_}rO;VL#AlcE>5I&5c@yFjC8NtF+HO5%s2niV zxt|CTA?jK&sI-p)lGv_6-+=0!kkB_)wRF0H%oR~Z>dUi40=c5E<>K7eu?KQBdo?VTLZ(bYHM2V(JM6vYa-7I-+a}{g^E6wD@_U z&piH=_?(|62A73*4meeQ)y#u@<5Ke@501rfiq?cTY76Rnlno<?W3I7pF~!@x22kpL5zcYo=6*f&5V znIby+9nJR2l^Vl<58p7#BeGdy8?ql9;Y^KpX+U5^lwKoCG96_3q}o z)n{YXFu%e74qiO_+~mS0JF=-52}ohx%*Pz#QgHNOVpgbE6ZHRIs0u~q|6iyIv)@$3 zJ`17%sIRNJW&cLDg^;2ZZYW+AkFA~|c208wJLpq1=C=#$yeaFN=|CUdC&>$)rD7Ge zDQ=6k>u$3>iD*LA?@a5}3$%fRBhcSa3eZ*H=&GeRR&3TnECh@Z!P2jI|0ENTfj~&W zsfI`pf7$dgjYw!_gU zrZjTOTArc&q;K&-MBYsDE>jE=3;mk|AQIb4nTu7dSI0MaY#2%HH+;#E`tj4oSKzIe zCKS4HDL^ran_aG+2DrP1^c##6(#CE`*5B0{EJ;l9gL2m`GQ6ma;*Hm~K@Ye!iXr^b zLQfkY*!I7R(%(Vki)hKs+X=9}NIrK*me+=c(NW)&Yd>Je|u+4i5@sOG!?} zK$vJ+KX=s1b?;z5w-IQ9_-Qa}3R3yLm52!p;HXKv97TXvp!-sl1MkJDqx1Hmn=NA^ z2ERqp_`J%v%;-J_ONUJD-sOne&}70~(*BNd6my_3KtG@{HrOJTga*-}oT8k$0+72* z7RRJ%KT0Msm&_)e5J$>YMlBV=$R~@Fe#Gr!#iLup*EB%{w~IB=Swl`r!A_4LEoS~y z@DX?lJ^e-bVVR5g;+hUFS&_4`AK7bviq(Y6URk?7=a@kHLmEXS2!eC6)Eh|I2O5m8 zG?G`w(`M?-$7nn}IOnTZMxazQ1IDU1N4|D`X!bCe(lqNQN({ch;hPyKDKSWb4DA>r zFH3+szLBKo>Gs`!+jLO3AAb=?BieNY5j*H}wpf00w_o=%D&+^kp|f^=bNmR_=JD}K zGRBS;YmE1`O)fhfBT1={Ng=l0W-W&|R);8Mz*|Sx=xW_IN|}HrK72pY{}&d8)G+A9 zbl{EigY#s*M2Mc!Ospbii^vgXTp)g3l+f*h+)*cfaM2Wl+?ew%!0T0MxcT{Qwu~-O z#2YXzus@0|Fi6TY$I8dYT3>l-k_fVx*5-rD+uc#ZeLNsj)fjSUa^~;udTLC4>Ko=p zt%*?$VeTI4B5r9i8!AN4eu8~th{&~hmdV9pe=?tD?4GYA;mfy-x)q76)%85s)~5xZU`(QnbLj{ZF5cxL#{gX-q|J>3Jm<+pZJqHy}tEiZmtY0Dx({w zdF$2d>u_L2ec@+v{W8G#sFoUA5|mt}V~Jzd4>|h1E_dpXr`(^s6qz``;RTZU`|2E- z=*;bzP+HcXH$>P2Z!Bw9Fs{ZRMK%&t=?Z7dYE+PL9!rvH^OS_e6Mcqd5}>D0i9X?` z$X@cYry>S?F)Ug97=fZ3{<|ulfLqr4W_{?*OD4ab|x=sbgz3l%F_txfN$T>S12(>;hE4ghRl+_y) z#_^qB5$4stcLw1V`rOowhJQj+n}Ip=MZ@yVBNZHhakDB@E0K)UT?tXzH=(|s zji-f2k@%TS3X!<5uziL4hDI#T3C9msVQ# z)oo5rxij*2LM$A4#DDJUaEO#f*S=F&p`2hEyA5fRZ*B+o2W2XU9x)ay=) zB2<>pYK^iGv=`rWQ4`QOR5b^X)8m!JbsO!AB+kp>vad3zT|#qq;D-qK{Yh{pg(F|S zqEe<*!HB}8j1&$eSgqPlCp{dH8Vz!6S+Qogi(5O^OzqA^-f&)cCFVOp69R8tgHtZi zk7Ok{bP1ysb{7!NwAgW_w^5`aO)VBrwRP;!tm~5@M$A zJ83|6VD;eOQJbV)HjPC0rKB66*xOe6vzh^!Ay{C-^is#U7IDo^z`dwUH@3D-FtwAA z)g$udVQYfsJ)w*bcwE8@*W$QNC=rcpLGh#S*RTbLZbBX% zSb97l8Y{!IG^|tUY;LZ*Lec7lc7yFO?#tbilVFQP(AGtlrKfxUCfFMo8xR@ogI>V9;}%d&9-Q zPHG678s`Q%kbuqP(-!Jr-MH#H=JM(d)ag<== z$K;?ow9$7m4R6FsdZek2$3lAOBNVYKj`MxtKVbUN;*C9Zr8M|-_$>X|$=pS6+ZWTd zRQnAhvlN=JVkmXXXXeJpZ>@PkLEQDa7hV&?{(7w18VwiTNea3}sW6+FuMXgH^cDyA z^Vi&%2ayjjUZhhw|EC-g7+&Z4S4eQJPmiww^u1?;)99W(B|!SQG!UoUcND@{K&;0v zIs0W&`1ia_b(kq$MT0{ zjho;$Joe9)kN;?+fy4Y2HP7+K?&#yh3K^)3nCGp_hePBMx$}K-kc~)=-qqoxveVT0 zsmIa$k{5V4(=#6dxI_NB3g8DfmEq>COi5KSH4WpSMQ#+t{OgZH0ExGb{Vplqb!V;~ zc0C_JL@)u5$UnPm6OSeqCS@=H3S{6zfKvGry8}Q{n#1*W7*|BjuAn!hWvWZ$Ve$5l zEUw44)YlGPXA@dB%Mf5@f3v4QNBgfH`8ELfu50Sy#Vf;6L@8p=@sRKf8F?5?-+NzW){;IqK70SyRP5`hKpcdtM;xVw_E#zu{FM`^hA4e2+m zR6>q$nrL0`(?Kre&076B&^8nH{al-P|LeEtlBk*_6pj6{udj!aj#!dH?kNBFLzZZ| zn_W97^>>LOOL5+;W6-OEqY~s4@v##o#VwMAortp^ zCiOpj9fRMJSD2P=_nY)Ig?j!R3{0%uGn7vGbY*Ae^2zj|-spwFg`ZKDudkgN|AZVV+N%9`yXc1{YgFMa_5DOk$i>)*6WY z^?k4}iL^%S7n+b{%=Q_<6whDwpnS&mVL#e(J;_Ro=M`vFSL=xV{&J%3)4*4hUo4IX z<#TET`_Z=x^=L`1iGBEf<75*k%6#6^(RN#5C)_bhTvg8$9Fa+%=avWMGwB5TQ8W1| z@b*f9jXvksjPvNxdRmiqy1}sf#nv>u1cn&3$Nlv<$L?~u0aZoiuLFYJ#-9nvJWVY$ z3=Kc;_FEtT8My53{`u~^lzbaty7lYYfn~^_sl(^?D)(9E{(RQwClxFb*J88O1KWaL zubdQ|!Jo#QJKh$wfA>RwIv(c`Sfm23om&jVY+AhYhHt!pMwXBikFsZ8ZUes?(qJ+i z==#xT8TJQ~b*?dvM>r-L1H<39RE1Gz=H2dcQte?`CHUw60OpPK(MNA>(|!xUe~>q% z$0G4DIp^oc$||IbFA<=2t&r>`cw1$O3SKQ`u4sD)KgyA}_+~+Z9e?(gZRW0SjCDsQn)#yKA zNwcwqQiSlME6B@d6l)h*g~E~yJ&|G4(>y)D8N>>|yud$C0_Gw4Z%oboBp2Ijhq9HR zCLua!v`_^BSkq|A1rbU{v5tl~-C&D3A>oMCMw4e}YszYbYIg2B8dqq((_D%e9MhtV zaecC&yyC&{+l(Q^hElSH8qWW+TLK^Z)BZF6RfqPqL2^>-p$jPPVw0bg0 z$ImWrv``4}7OSQ!LXT78mTWegDsb%VSFc0}6(GpQz0KxQNcS=O@5D>J8E6H->E42k z;`}Fk9fd*rOeLm|Kyb0q1z5AeSLp*3+BWDv{r7wq{!?ed-6}(_F$~YMjwvRpvykvU z;9mrqqM)Ok*De|3Hv&y}8shx9Nq$l3qhox=Lo5OoA3CY=DV^_1s$4v-HSEFGHPlsd z{?xXiv>UC5tH~8bHh~>Hhgj}23T>&W6jkoAgtwQDnC)SQ*D2Usu7Or)e#IQv^rfd< zJYyr%TfwGpF*3SZqc9~c7hO_3qBVg7&o4Jtt{1t?Ara)m1d6P(M810V> znz)c{ilikzm4NPilGLt3CZM&y%6`BOAxqy8tzdFaWpW2|Je+C!l)Kh1nfXgEZ`P1j zpgY}^sJ_%aMRj3`^xDY$Vi$PZBYuW+kRUn1;85G#efQ78>pFVN3Sbh&YLVrtp^Bt9f}aNl$~KgcV|w`hC$ z?JM&s*=~fX@I)LTrQ58NB)82j6qF-F=)GtJ0iDK&^?hC|3NvN$-Qx|YW-~!3_Jrx^ zLRs2VvcA6cZTmbJ7aan7kpv=rj)z05dR}D!h%gWV$(kC-FgWx6@eZ>=*|atA@}Hv{ zQ#bc2(BKU*vk287M3CLm25%(Rzcm(a_L9ikaR75P2VKW5T^VBtDEG?_SdKS$TxByX zjsVVJatoPB`z8LEcJ{j`u20xUaA3C_|Cu&${~uv!r^TV5RI0%|i!{KPs*?yiS3@)U z1M<4@@Jeq`CX8?t(jOeU4B|iX__y-Q1R%V|atUL8?eVO$bHgv#)$t`0ApR=mRFTQ} zSqj)72^5+h5`0Q8mfQ&w(HdV<<_vp@JxPIPIzskJKUOFTgV&(Izz~fUlXl&>7=R?w z$rnyuDqI3`H? zCVNnnKNIdNsb>!qlAyBrf1{^WKy>XAcNGcvssr=tJz6%T%y>cAqELAkAbCRggRTJ6 z_rd>!pV-qQkthro!XRo}nN~SITvj5o_!FtWZY`OTY%+Q?o_HMvp9}7ESf6+mw;K_K zuZVKp5ptdd%$Ze&hC2ZKrtEB$-~=UZ8MFI1BKhmmrn>HlP(^ytOoi;>$A1wQydpMBqeCyLM*E6$H1FqKJ~GX55!)ua@?y;>WydOgEkIy*=MI4jlD zJ+%536>kgmRz{vB*&yWG5sC4q2yQ-Fc4>;hA0U;yfe8?eNhU{%WM2<>6_L7hIVY+d z-GUp`Cv!ve$i@UXcac}89zOnfE!q`fio$qw4nob3sl_mB8@wd@t)&mm{H3LryqpXy z+rC!o%ju8S-zS;~WbFJl8NdO^=yO|U!tf{i9TwDH2K4-7unU+dfF*50(QYq|8lJzaOq_F$wc52n;3 zk`YVbS~AtXw7u1SvR6G_CYkPGco{iB$+TG$o2^kS(=0Y*D`HnTYQp{`!pJwCTaAtr z?@DUfW$Jcz);6jw^Km{jFaJLA(K3903j5C9E6rf{#|*CCgNmI=e-LF9JS>~w%|@#| z4aU(ePxSPc+;7xns_&6I_{W;gQcg8eO7YCZ$i^WXn0DA5N~Qs6#wI@f2vH&TAjVr#-q)R$LL zh9Dw*XSMoH&r^fXZ<{knyk}6)o4lFce!|tbaHJA0+f?n~f{|&xg_bTnWH`Ed)CLYL zefi&X2W)P5f$P$PYaFsjF%sz~t6TBMFG<2GMdmZ$Ao}jA18^JV8zD$)Q0eDWvLCTv zM>|=?Sm1U6qU&!(ES-DB_}N~*iuJpX6P#^5+&mD;ZNm~+e_?hR?D-ypBnC^F2DxMf z#`J4tjWWED2VO zT=2PHgmD8P+z&;*F<9njW;Qa%H3dz1Q$5d-I&>vLFb zQg}_KufHX$=Yq$IBI~ zCs7CD+crV7@M=W6F~8jIn6`=JcJ$+9GriP^EBaXI8Wa8$r~?vxFCtjj;@O``Ne#~Q zF6ZP>4WoZl_J)zh4J56sQ5}LW#y`yf`Gw|lqObL_fc#qmr}28e4CAl}a<(vtb<&Ww z`pf$HH!sO;-zIBKP|*db8e*H0LY?N8!I9@x;|gcNWOlyXPg!s>@EWTZ++p?8qs`4n z386avg{uonjAcBCSMER5m$*J81>?Ry==3Z`AX1)A&o7CzU-AsscbcX8oI*lY7)vf+ zI+tGOta*m@s;RqM0ZXF@a+aalD5krf9S#ia>0E@LNB;1lxj5O#gx-GBB&)lzbS6Q= zB?W+(WnJjCW8%Q2G-0ThK180fIBx2k#fsNfbUd|$bgK9MAH@7ee%a_?+)U476KHyA z6wOm*xTe|TQ&&SnCEz-O?wg6|b~s&MIyE_0TSF_N<}>ngIj2M32xgH1>|Fi152}T1 zuV&%6RHqAC!HzOv@Jj?1B@~<3bUaI?MsFv^SY;;^e|3vJ-5(UXan(t##-vcSD!*{H zM?Zvr(5}~j&Zt-r#|9JLf*x&Q`?HM5713}5vi-I2n6W?Z;wrm2U|aG{do5S0kA&D4 zi?}s~m|Qp%-U{(^=<0asNK+S5X=k@4cYF(D#5xQq=rg(8*qn+#VyR5nlcv>XNGPoMV)t<|#@8dwf|g)=w_`cULJV(V4&w85 zRJq@=A;d!pCrCPAoTqMVy$kBp4u7^bqHoAPiXd_CjrhrE7qSR?4zlDv8LPVWuiaO>o3Stt;EuoX9MCa1Vd;(%oIM!F#spu zYhqwc&w9qm4Mf*};p73%t64zJt&pkiQQsAgqND_4lhCot+Wm~pn<8WreI{q*yWmr- z;fWXh@8o*urK$`sGIwG<@5Gqd<*+NO0F2o!9BYpum3Rc&-4!V+O*gk-c!|^e9EnK$ zJ#wE7LD}E}xYvwCxFN)hRNk;^gRBp8X!T`qU!3(d%!7zdt(c`5FK3|6RV6!B_x#wH z>(1^B$3!CaXS2-9t1{4jMa9TpJ*QOQL`P+m3o( zdbZuEUQ8LM;pY;OF)k_?y^Zrq{!lLjog24$bhkm%vZ+K5Uh;$zU4a>0|1*bfkjrkf+Gdd-RBb`d|^)@m9_ea=aB%_}Ie1N!%UvxeA z5{v1p2>A153`bHnUT}`ZCXU6I-&X~cw&7NU^b3mp1^1V(LQIh zfU6WB?Ma@WTC6`6FRjA#MRwU9*|0a=as0kI0(bb1B{O301T zsG!FxLNGUw3zz$>y|6F#fr>t-uPCDLyLmE{BN%t zAuyFa6!%NUEj`E;MFfNdMSHW4_q&B*rC0Gy{6|L!=iykIhvoDl52~+PD6Cn`nij0G zNe7nS8>}@-)b6EB0`^3F{tEt7IsCUUM6@Z{f67S8C5k@)?k=2nh*M+Oh5WKJ~Ez6*zROdoAA|$BVI(C_7Ok3zYOjrsJyxN^>?wQN84Ge zvoq<6sV{$V)|+Zr^UL0|fg>uTTwqNyX=uE>^z*a7ORqva;P1lZ14nxAn-KXOswltq zBU==2b5PWBpwGE=EmilOD!ubiPbu<6g*76j;^tUSG<)};d2TCpec8R5S6Ua08f&GF z@t}U}m!If>{|e3sPWK>FmQ&jY{ASfTr|Q*e>yFBJ)BKO04!<6jn;NhtqhlarU?Fb` z*_n#;uX2Y_zEg&@E3$%-%mGUEBA)Kz*`%HKKkMuFL%-GyeNpL&>2_wbss+AD65kMJ zU@wN>JYVueid3Q?-CsXH@g7!rTTkdz0ezB~O*H+9i`J!d!fQXp)4DI9y*3H;L*tk} z5*;9iX5u*6N^_3{6!e;R4HZsiHVZy+-TZzJF-W%2YDn~sj_`=5YIs+NZ&KUk+|^6( zYCh`T-F~?B>u5-xC%1ht01+?rX&5)M$Au=AD(oD}V5@GonOW?(l}3mw-`O4(*;cny z*I+3&IA`}L5{+3}O(_^y1gNEoqEpSfsl_9Xewri*!PqEI37A@6fU4EcR5UJTFo%kJ z+XT9O%dlr|d+2%5!92MrvxP! zpG3*Xk=s?2tfHhuS+94Mh^#6ZRYV%r6E%=9qidmDVJJ4;-%k*xj4@kMdwph)Ia4+& z>KayDqEcmcaf)<$^0VUMK0|;)BO@c~qHA|l$~55Ve7BaVR={7XqUxhS^Ad-^_oLP) z9)t9~-*%BtLeeLdvIYC#;AfYPV`k4aJa232*$7W~hAJ(QlylQuKX7oPTRhYQ_rXzl z+vy(@MnH!Ci&oHJfJiV64b-i+T7HZHN^Jsvh*7rpZHd9Zf-(3|eO}4$q0!wtcDClz zY_3;AuQVB>Z%`hR*T*4huol{E0i$yrcRI z>RoucNn(yJc@&jT9)F@WO~HGQUEg8E*%oowGQ_MpgEY1}{7x4k z2z!B~ms{$C_~R7?i)N-)77>yHT3Rkm#17f(XS{m*m;KC_JU^!fs(rfx;ly9TC;Sfx zJ(4qjtDTd@_niCbds(K*uK24JIiPwjDk@~cV$DLQ)!sfyPM*CeNredxf&n(oDEs8Y za`|`yvKX#M$(SidNxSoaZ!CBbwFVq)Wg(|y`vfbrI9q+^hy_@H6~cp$=}rpcl44P* zFq{JDl5?nJs7TH=UMUq@W^`XmfuL+HS!g{{z_y}e;>w#HGoB&i?MbfqMK6xiWFK^> zseIaphP*CynrpPMyj|>+to!Ol3%({bF{1;$Sb1QrZmF8oQ?Eu?DWbbJnLw>S=eu=ORc%B6;(D6=$_{ zZZluZX=MnqPe~yusF8C4l)Lw>TTte%X%eGrs=ph2u~4O;pt7{BBKeKMTXEH<(oBli zIld+`XS%rL!pf}^-u2wotFq#h`r}N6oF!EzudDl38Vi~wVi^>a-VL?G*}SUy&2|=2t7ezPz_=6r5wLlPY_oqmu+X^0p==Y(pmwE-hybd~qW< zZ;+26qdKO-Q-+9ITThOyCO=R~2hGAV2_XcECc$6*sA%X{3wf)*d#=kFty*Gp9_OFk z?IfBSqiPxV$s!lAMThiAejQsRlX8=D{j2mieHetiK572cUCR*X)kb9Y*ULmfrK$X!*&1Q)d*cgch_vqEcaTur|#Rvme~ z1haum)#CX$URUelpVan4td3AwUYa0cRIY`D1&Ft_ZjorMVvb@3yrv#i007+Smp17@ z&ouKf@<`5um_lOHVadGBb3CfWOZX(XqIU3)>I#uaOVnu&^LiE%D$L%NMc?Xv?rey0#Q4I zhm~lS)pToWAX`@&vUQ7+I+eB~8EQ)xy))xIhYa5qkDSkTp)tE{0#R3AZ5$+Ac@4Z}~7&p88HY*JZ|G)071d z_b!E{FOHS1CYeRrEeVar)$9A}w`_cbg5A`*;%Ju93DR2i%rm>nRmvok2(iQ6W~@c& zk&K&=?$;5B-MNpbXA43&LnXzSa!|q(NuoBhTUs6FijUQ-E|)155013f#y|6H7v6f7 zhUoH}2Ti}e4bjHyZh@R{tiR@IV7yWF4Sy0;=VaG~T!-fjo@5(~ z%#RvPCjc7nsLsDBkFUA*mRB%L4QoY;5>pNl=SjgGdY(l>HDfqRINMG6j3kHQd}LW% zhc&?xr>!YG^jA2v6r6jn@ zWS&`|N5eDXp^q>d$h3C0syvFvo;}yf1N5BH1><2i(y9C98^wnleD3gbGMdUFDS(xUOIkf+I(~8 z(vaEwgu|GVS1hc$sm19yHTEsu2gv=pF{sl}gRqF{izE&T;0OJrzUpyCPAq%n7h$@F zKwp$u9*3|@Mu5_*wty-1r9PtOi%*7867g0vIc8|z{x&|_V#4xdU+H?z{%5R;wxRi` zR1^r%QcPlx+PMToJKE}E%Lk$NVChoNFk5;`ouZu^L39&Ni|Sm-Q{;eTsV2RlE2XuV zFB8wluShctk8bt!lkPX>i>=480UemGt26U@>_KQS-dxwzyWVfTw_rZaI14c(ybnFv zr!?ttm356Cs}rc?lwMTKU5?gJZGSb{Tv#a*R9oZ*DwWWvQ)muxQN2~xBP7i!2;#Hf ztG`u_ywZq zMLiK|OLf9C$VK*=4Hf}CvX_eqL6D6)u(J{R15e5_S7w`5hHvfOrQ}YxB@X6fh&Z8} zNZX#lg^(lE*L-Q=w3SiWEmzB?yR)kO`EVm949fn3Wuh!tH_ldmL!t4baT~gL-KL#J z`Ug9TSp!nhf^ml3*KAU;*q$Ae@CkTD%9BCmTOP0-&CrJRn@Ym{D#wfzr-O@fgzNAD zr9;w-_NnxHm5xhD{)nKQ2Eh(F4bG%~!O+x~uArbdsf(FIgAJhb3IYMeBkH)97wg59 zZ?zm9Z>>PatNE6TJB1AeQB4i;{EAr;BH1t_rjP-WeJZu)qC1!RL2@&CFw5p{)RlZ* z55m3_i>^FC@yYDTMk#|EJoG(pg6oF{=?k=e`APR0o4J}Tazz2p+X3(kBrBTf;>BC8 zI*i4Bjb}aY*Od99Zu=H5oSIT2Z>b5$6 zQFN>)Fxpwz=$TZKR#%w|yu_3SywAF-p+#BjRjrt{724Q`_SkwBc&Nv@2srUu_mgyS zhSq9x6rK%Cew72w4RjK6e-T?917TYf%DxB1RW5wzQs`efHUP1yIAs-6?`^LXCtKGJ zgqFxXylJoz(=$G6tU|tvjampWYg_7AUCm<4Mi3pIV@~Th@*Q&6OPkFOa?>d6(MmJaqFWaWU(4U z0g4u@Q}XI5JhoaGd(ey&ifDJ&uLgFsZS}^=8E#B@bxZt0)ZnhxlJ3+8U}DBNzNoo#oj-m|Ay{ zaX4OM*ss}&j^4k(llrvv`C7)e-9pvY%7oalJr=O=5&u$4#L8{kTAefzcNn@L99m%Q zp?cMFcUVSPaGs!x5iTv?mCDGnWiov5_9L=7WgzNKzMMUbPHaU_&=%ex8Tl~FyT(vX zdJ{&RUKUN;KunVx!=oeCMFlQuDdnN$GxiZIU!@%tyQ=*C?#T-27uMzLZE7~B zs-~zoWAPVJ&`~Pg9~EWWL|#ujK-g)JSjJ`9ixV8r<8prt9Y_AZ*n7*UD8KJ-SW!f! z6zNm}rMpw41Oz08PKhChj-dq+rEAEcbB3;=K|pHg?(U&m`X2p$f4}>Gul2lmUOvxS z_j$ou%sSU~ookd#+gmM>}gm1p4xF@1>Y(zgSb)FN)MktwNif1UFVL zPKva)fbil0=dE&Pj(ABvbm5V3XQV5V$^l7D~6qT3YV%mN}l5)vn2bDCaeS3R8H>m_rQ`fM4c_0DVMmwYcFJ zP4t-+!ha>QzKV&@=`V0WU0IjIBuz+as(&+GO(m&0N_xL`QJfov6(iyVOBf|_bT5sT z!pLP`_g86DZ4^D<4K{IQA8@kkzJS)?CsG|P;lW!qu7UfW7}Y8^N0O3e!$+Sys!K^$ z*`HRpdi~t#z<2$WmO+p8fhxWOCB8jawgS&=>W@| zqn6tP?QcBmjcX@neQ}ohF2kL*C3EGtciM-)?y)wIZfUNh)tU)G`w+8M3HGWxK>9i%rFZAz6!9I&1P zW#Q0|b^mPrQX?+X8=(V9tO)D9DAX5ZBAS87V&fO9pGHe@mgCNo?aG*ke@_Pa=B$!l z^~ce4e4DYdy|x%g5q^;HAvI~lrnxFnJvK6SRKV7IVWHdnD0=Rs{&-HTOAg<6)|@8n zOnb4wNoN|-;v#?)0=pOw4{KAy39~llCj%oI*=H}@Q-y=YuYTKNpMo}$YNe%1)W$lq zF~SZlN#!l4H4q+?MS}K=`--yuoaIl%p3(GDdV;NzYT6fytJ9!OfSsd5py$mfB+&X7 zw=pekyy8a43k{E{quM?nWu%iq6;mWG^HVmr?rD{=JLlQ=%;j98x^+^wPC;GN>8zM%_&6YqXbY8Fk1Z_Y{&sPM!!a_=H-{}omx~$9Ou{B`mf_*9@Ekl30{n+!@h~BLsMQ=e%L7vlZo-~KjEW+RmKjM^7tG^d{h&@*;&{$w9F}KW zK+`1>L^MuHg6o-e<33Vv+`T1ecLJwlTZvIFvyO;ullB=On^mZh8bx*_DA3TVp%p ztJ&C?ihIqUYSc&`mGTP-LPrUTl3$g#enjMijCsCN+{b%%T}{ktigB^$l;U7lQ28?I zdfwIex9S`iI}6rA)0|hYjgNMpc)Hlo4tT3@JP|JdIXacn&M@3Wzz=_212WWHN(%Ki z*W>Eb3>Fw-t3+!?~eI|0{A!d$IgJ7*5?XuO5)0sjn-%_e1Ngm8@=nW zr2T3H^M|=g&RmD-6b~KV>!4knmaiJu|rW_7s zhJ{B$7as_Eo>E@2nNfg^@)i^n@%bmlx&3B^Vtu^>c3!;)t2CsD>S0S>yQDGb$(B4j zb9FNQQkl~dC*+S_cY=%bTqDGK*22-8kS<@N0Hy5u%jCX;B+2)6{*+To<0o1aPiDI; z`=j_o2bdiRrnVn+9<_ZCji;jB0EGQ0UU6oA%{w(^;}|f;C9t}?ey&=pvA-)`Dl@iN zkjU1EaV+XeXxQYvadsi;fheGzc3>R;T+jAazcVM)#A3VpBgEBaYu-Dev^+&wEG;IO z-Kr#M$C~earDiaqXs#V|SomYQ2BTny&)MPZZe6)q-dxfHVG`}ln!N@E$Ja+s#~`%| zmOhr2wISImUSs<#Iyo5zkUOiKB>#_y7411lY)4u}AcT$q@M&7;Y?{IM=7|oc!n+8hkrjx zhMt9;fTWZ08BV@#q({4h81Y4z{~Z!JH6?6*~WQd~>R-pGE{q>I~S z%aRKl77II_K>pTzQ7YSpRbrRTYZ6&;o4D#t;-w)EJbqv3<+yPSTQRc#O__v3RX`_g zir<>Sh^J(!iAd)7R&C-;)(OB@%uv7LCi#Xm&Dhjfy4Vkbe8l%seF_s#_MkX<9{a0# zCUf4k&Wg7MSo%qA!=|s$1V|!1wf;cV9&?Wlt<Al3Gep%OB5@$g-p345T< zJ$J8Z>3G03oN1u?(+#v))u;K8-lxVj7`L$aMz*4!Z+A<4qAMk39M5jvziyghH8%)S z)G=N)ztUM$1Q?C^K7>B>6+oiC8ETyE>S(vGW#^beU%ip#t@4Jk;Rev86~c-pn(#|h z^D6WVJj0}PVj)wCZc$e9tmAG4Ui$sJGj2R@r^4XT&_?Uo$r;}x^a)eaO`$409KkE7 zqdbv--`nl+keT&&%0q`i@iUYqr=(ZDBY+L7}u@tCa|4+lAbU4WAv)@ z(sg3iHudL3;ui}g#*(;H)@M09#Rl@mn&zq}JF3*mE)iNZ#b5T0Wf=?1%dtnT9~bIO zZ+1wLtHplSfzko~Y>)u|oeak(R^Ij+1=owYJwQK!))suDjf_8gz>=I+{cuhoKc z;wb9}#>#ac%$HZG;u}MBGX?!2dOE%9^6uDqs6L4EHx0UzIdYuS^ zXrQcs5UV0XJ_yE0AnOyb+EWNxMFv^o}j3plj(&9$L1r8a<8TdA_bbJ3Pk&&ndE4 zxgSW4roTC_OM@iT*?J5f2-k0*t0cYtnDA)vcSN%q7WA?NXd4Ym@Fw$12|f z`aU3I33)1}w(u<2ER7oy&B%?j!o!gCaEn|sV0CqFkfJC~uv zxBi0C{Nj=&Y|wo~eb~$bYYec_!RiLJ5u0DarAae;9#*o@f{z`jH&(jiFd-Z83bk$G zS?6gxaz8NjaArS(JT{@u?(*hbd{R^yQ<2!XmFM2uHM=6xdV+iJDE_GIlrZ2n30h?@ zLEvKktC_ekR@{Rk{D_4^gKxjTXsmpgv~xl69X(jalPY&JSe@z#Ow^z1l@iyDf96W{ zw7RLeRQ>JPV5SM^Ca)(heX5B*(vkGAu5G50-{&Msb7`H@{(r!OX=YPfjJ%foQ$#7;S$ z75~JUw*ZtRG)6>KJ=hv8%`DY(kt<;^qZk8EYEw7}=y}F}GLNH;{?Og7G&URDc9_O# zX=DoH5h?ao4N!#~TA#ANm#O#is@e2|!`NIBOT?O7YCBh9{XH>k&O26i#71UHt$c%5 zL<-tvcHP$fJqXHu_~Fl3h+GI&N*rilFYbQl$sxdgd-wNrU@? ztkxXgJkH(jWXdg_3WPjusB=1NbN{Z?p$CZ3qzUV6Z6nvFxd^XUM0$pyeCE!!jby95A`+yma=p_mg z?*6h%{wWpwrSTv?S=Rvrx{K8zv1%`K#QNb^ZOL4{eu(7W{y_y6Y=6x`3a@;Lf$5+*~C-rpGQa|7P0f-QTp(lRsHV z=>-SCd{CIf>cq6zyC13d$ienUgDMP5zya2T|n_eZ0~Y1y|@Cj4xgJ7ix5X2*U>HF4=&?fJSPtq z{qrE8&BsiQf+qQnmi68t()WSFztqRbSEhI4_9dK&seLhiFo8y0Wjk_dXYw8JbzJLj zn0tIZ^+$zw4C3>Op9j~WWD-F{mJ~6YyRK6zURA4u>-X?=9cc!8$NTQu8U^DOmWg$o zQDNM3vACjx4iP!J_k=lwWt*-l{O1>(2GOMiFC^qVht(T~;Cik%CHk?k1x>MX8Z0OJ z`{8q8J4Ei+4sgR=U+=6OeFIH>iQ){AZHKVx4B606bKlvEKv(-Wo{LXsNxTZC$0+%C zS%@O;2!5bY^^6lm*yv(m!`-#?Yc9~bBLalDIJ@pxo~{W~LHWTpIf_Y79v&7{pMJQy zb^h^U6^KD8=YUO1j%;G13)9EcAl_A4Ce!xt@e%;i!#*WOtDc^w3V+ShiJ+*`SMPK7 zZI9N3(o0G{!SH&hADNat?}r1TcXu-;X}BLzg3{D582X&%KYCi05au4!Tg7Bszx98L zz7|k)*;oeI;z((Z>r)ghEK#m~+O6V9(S?*aoyuz=YN_OJ-^qXUDo)@LBO&$ zdha*bMiw#D8XOuoAcrWvVh)yCT|o|mpl4OBKi^eVGs9i> z*vu@=%@wE~g;VcJWQH#)yS~|Yr?b>9Y^X4tpk+PCcuRNmOzuouTN;*7#K+dZTeHY_ zi^wWbar5a~TK#@>s_(<>q$c;Rt=&l`nL^Eyv(g1%hO*`*NYp^U*?{8Ak%&88*_r0g zqN>%DOY`0HlMve$M20QHt+o9qB9eZXOk7xm?hMmv0v*l z*2%xe<~55XpMGSgimM8!<25&|##cQp5?FDU)>qDXhEjOp3Kyb=?z353rA)hWJD%yu z%@@DqjUUaJx6TeZ_u_+p|1kivKBk=4vUY@P28-GG)Xg)wi#uz}bUL~`3IJd!=N9m; z7VQ)zpO~6@Fc#lz!(U|EZ)=Fsrgw#R{#q@~u6kfVmNvU3Ua}Mi#SX>{> z;Q0t@uQG6>)HPidkoCTp^Z1Zyy=IpAJ8ac`!0>k*8AX+Dik^$QBJT(vg6$PtM)0!9 zrBWuj`7`esBfMf^QOHv71Vm6BBXmVSGDcak*SeP=Kv4bB&}C6bfS5={|nJ2zK zN$Ge=cZx8sqg-KUVLDydl{0$UCfM<5-JZGMZeJ&5*RV9U%K17I)i3Cl=X@&O#tSds z(||G1*E{VwC)Hp|#auNIIVh0Wu@8Fm)DvY|7+Q(b*_PDE1Pc{Dn6cn_`%=EDye~~x zGm#1%TP97a%=zdxL!_ujQ6vG^a2ttS)WD!|eITzUxj;HV!|O;WIwf#*$zqg;jxYAk z%BNgqJALxpy1}Hb@75QY>2lMrbFm>VUMxEQ5C)rkI}XmOawfA16YG)i* z-<=+zT6;$;`t}s~=0!^;8&Nl*1thFjsr*5$u2QmFPX4nlwziP6l_vQ&yJ9Pq=!0%f7e^&VyYufq=7Q?3zro?M z{`jM!!9fpTB> zOl@+5V*{{moIAT;P)e$Gdp}JD+Gri)+#e_^6Bp{imPN6y#7!nByPg;$CcsA@3i#vpO+fbrkGghalm?@0E1!PX z$ibc5P;&i@AGWfpvI1lU?w37vr=1L{483zzFSLqeQrW!3Ls(nV7n~0}ePvU0&Ao<8 zsrD)*$|b*!>rWCH1Db$jvHFmquV5t|KjswRJC6sLL3X8Ksr?ZQvRTO@`kp#zo$g125C`wBc z-+1~VGnNpmP~y$bf&HR#0#T|Q)tmWH7f%y_9F;z~ZZlx-ERmxxKoqGl^7-iuqJrEo zsNJdB`sl^8J-RRO0j)TQ@<@kq(XV7Ql<+xp_A!=uj5joYpig@%n@lzSijUjB34>p$ zX9cho5v||DOY+1ic}KS3{A`at&4bbs>Dc)MjvUjHU=S(!+6NcrjHq%HCOYfzEk zsUqX&79gS!-b{NA^WS{^gDeR6?jY>IrN{8(qnIOw2Qt9<=gsMA#H zgTI*e!N26I)16ELqRhyw!|F*Ci@$nG_u`X3F7nIsb$+?AP`z>LYZlO}E2eVQ%PkaV z37Fd-e{IKQM9{rHV;6aH=T+!)o-{mLImemIH>{>-1#)1tJZ&{}CYKL3*XHP4fv0o+dF894+J78@99z^J%V%&g=1q`P}2(NA)iDzuFb;d~HpC@*7HIO8$L ze=$IRJnvFjt3q+Pjvo2|MR>G;rIjprYf>sO@AJ4bq?PZltc8m`61>24Qsb zs-I4^?7hBJg~rsX4h}c1+XD+#bC?-==|jMXGKm=!<^<@Q)gX0;Fk+lemAl+^@M8k+ zu0u6$FR@Rdync+Ln8Jjmfda`$KV1n!S9-}$@|a5_nAo%K*f(+TxO}v^8)4-m6^+bs zsL`Zmo4;~p7MEls_OFGjuL0y_{NcHg!L3d|Zj~<=ijsCIiuK(uw-iC}>L@v<1Zxt} zU8O!1--+c!@-5*P)5k<1+ciozgjR+>5~>u1T8o(%*!MR3bBY%1^>HwxP##i^Er)a- zAp~4geM^Gj!*7M&SdG;lm!m=qg3x7Qa_bi_L zH7VL(Jbas0!gw?Ar3P}J8UuTbigEto7bBm8a7k$Z8o?exTei@iHJ zLyXR9m;Kop)bHzXA{kG-_mWf_lP;}KiR2|!kK>ulMLmq0-b?q3;b!=QJob31n?Xfu zTbgq#!WT`L_1)%PbNo&-)nPcrW>F~%>sqx?8pHs8KndWHr%qp|TZN*@2>xOU*>2y<*{-Z`2mFtvf8fa#!GkiDP7{+zaZZAIW$f zF@5u}D?F5`(EoP$@J*ePfK}LrmwPO{`$=-Z27fNW)&x{ycL1W|aOD(Svi?vf@-wSm z!8x9fN=+JEJB;MZ#rD~{9-ACqY395wN4&Num1iNZwG#JpDRbg9v3CG0fM_Pu^J8L? z@ZuK<_O-oN=-8=2b`G-)wvNh8Z3pZAKxcE+4^|gUHsMBJ+0FM7D{Ehh*0DmE`)MBA z!Uh;=J6&W$Lh!tvEy|=SP%)x@Xq(YvX)5KU0*yqX8Tm(!_jVhEsv3ZWBH}t-&@-=i zi+FRnSmMfq@*)A^h(^K!dqbU?fTrT&W9wN<(EVh*I6|OwpVcf@-+?~gS}fI^1OV5O zm#@O*c)@E^kF%Dr;_ZV0{{k0njjR!(cC7J>zLx{J;%?(R_zCl^OJjl7`&7H{$D@JI zMZ8syh1#4Bj?{eX1ldRV^Xb;gG~I7!Je6tY3X2^^Ngi0mm@Eu+c~<2*;;zJO_v2*o z=`V)LMtYyRBV2{3tt^&EI-&DPkdO;U0!XBqMl{jZWNykp)upErz+v0~UdO9JyuNy2 zWT#}=opHXo4P}$jK!bpog7G*P%DdL0WB^X6`pzJlx0E9KI!9$r*vup0C0ZwH!3q_d zwr}4)5FCINAj2<+J64@}`78!D#KMYY?;5>$*tERmEAl!smJ8?|uvtTEBop{aGp{n| zkticmlDg(qu2W3jN#zxcb{}BY0)ybBlCpK4hK+PR{@+8LpTU0$V{T3~6&D&%Q1LJ2 za7G#X(o2ixI-fH{usSays-2tT`_EK4i~w)?lG<@T?KfntM$SL7I$G(md7n7MJ3nPe zTte=QiROWpTaf|Vtxt-VKpV~zT8ywS<>UosfcWIq44%YT@#J)YO< zwLUs0V#VpszOIw%mC?}f;<5ivCqFcM4VtRN&%hK4xJRLGLjYooWu78aln1w*(f7QV z{IaNzF>kT|^6ozu+!OuFqwi<%tINSSo0S4>BZ*Ae)CDR3?#TX1XFg3@_Oq?U>u6cv zNlf-KnBL~hni4?$pNrEY{$g9x`ImHxKKA@L`V^)BP-7B|Cakce<)$|yj7Ls7a#!cC zw0kaqjDP>3Q2E>Z=JEa*xO+fTo&YkXuD_b=-n*C|w)RIC@a4irnuj%lPg%%2Qa=KT@CbdIAh#zF}dhCVziPANk|Qc<_h4X|!2<9#Hx({|)QaR`D{dC7?5u zS`J;k6a7vVe>(=24LN-q5IbI+_;wX+_~H5AclM(`-@CiE4j2v<5AOX_5a1ojpM29t z&qkiN<@@^n-l7)%I|KL92k_nY)$YQ{!q7g(Ki&Ss8y`Ijx$DfkZFIkQ`k!tQu@@0- zHSrz)jAoDs9ftC||LdX(HrkA?4}kJ12L4N_lazJa^X{f|iR?iDnVRiC!*RZk280>i zD=Q-h@?V&t#G!`r%>NGb9rvg&S^k|suYD<*IoDUhKQH$0Scwr5V{8^yizSR6tm%D#_vy&2tJ15`QcGPj~Y0G^RLj&21xV zQ7X~Dgka8`e+Kds?3RIbYNu_+dzz;F&u#Acx$J$u@a>N!r4X&=>WcVh9PxiP$|K5x zAM6+GPz6kMXzv<%H7j%Z-F{xlx*xQ^ruT`&FJ%pcB{2k`?9+MH3Z&@}jWkl3a> zhvD0kmufaO8Qm`H(f3j17LS;3x|_Khn4kM84OqK-Kr>8`#z{jyjO1hu;%Tp> zRPA|X-CjsJtkLC4G{aXmjWNSIdIOw9JVNLi)zeSE_{|(3{{RtMHu_GDjMan%QZ}04 z_1Np7+LG_y&AjJ&Zwl5UDez^3pe-*@w4dl%VzK~eJcSLsoiz*plxjU+ zO#1vZ=zgPC=04TH0&l{4cQM7ql`ziE9rL4)o<^e(9_3-{-`<=Z%v#H+E?=o&=67>1 zH_)pO^(H?zS4IxwUZs4!ysyVy*1$Lk(Ma2uo06@;4DU|77OEL6EUfCR#J}%6_!$Z> zj9!lAnXA2$<;!!*3$KnK7qdlvWlIqr;_B197=m*H;SenQr7G)*Tx+1^z~ScMNS<;U z`ud`A`uK;ZH4TkN_wK#qAVq^*BPm6$x2I2&CqR?s=Xnp1EPtP2u~odk;I_OE3aJ5q z2s&#)7MkO=sgsa>gWZ!tU*+fVlY{7ze|9*lbj04A zhbw+m-kgxL#j7ke%5N|>E*!P{-#I>F-aGX({*Q+DBgXhYZd)UDLGyP2|NdEg-HLwD z|7biI=)3%n|KHL6Y(xV;|7kkkQC5^&wfX-3*~y2c4;`uB5oBA_PM{N|ei|qIJstk1 zC;5H0%fGa2|M?;8UK`rW_rKfV<3A()AMO8k_(vH3R}TL_lY_m))JR{3M6X{WSjmjo zkEB1da04ilbSalZ&%o0w;WcR4yd~BgO5#LkNjOR8ZfdaeM-0M2g6!sPs%cTGx zPT7km7o2*803tkah|um^a{JygdJ8eZl~OTr?xX&$P3wM@d0G&)sDDy@Y6i6#XLBr+ntp{R5@NW$O)m!e+S0 zKW56`#v_8(D|$Qt>4Bi3oDG6w=2&nkC%ah|bg)RR& zCd!5BtzoC|r+N<*mnH#)p#>j9Do%1@SxTK?OlL&Q;JW6E4NC~GXW2|jFqg)t&r%ya z3_DAvw_gRT+WAIt-NF1=AVvbbQ`tmmjpi+X>vR$@c~s1Cs=3bHUj-Y=on#4B$J{Fo zKmDNQ!>1f?*C6Agqp{xgqqck^aR9 zt;~}iD)^pWr)j`j>l|N&scLt0N{dPcNP)GjZ^E7diUv~a!=mGQi7YuK+qdZ+ZOeA# zXzjL_XoX%z*5Teii z0aRL>y)T?EDDYADBPpo?`M&AR!khJXz!Yg_;(m1v99-^!k9r`NR6y!UG1BsigZ6YQ zG0IKmB<7BMWPniccq_CaMXbcs+*8hg&)FQ0O5aUK4LFDzrcqpY(=bv&bXKLkbqthU zAcOg&rVqA6eeubBDm#DlEAhH9@SAl1hu*Gg%^ERv>G_qDx{NYrh_)an$}n0{Ywt8P z@u{qj%yMYg_xs4tC$OR`?|+#!(e0EsLiyubUv~ai*XxeM%i_G{w-Ydy1K( z`drY=6&3utBvI?6pev2LH*Wf~^Y46 z{#$wb)Os8jU*rHA(cm9)E!xx4G6@)hA(U-1p|g79OUxcr~JD77i-r3L4ePe$Y+Ej+&K^ z^ebo{z@qX3^+YJn*Z-)CXoBpF|zx#FK?r4F3XDM;-22J3!hMylz=Pd$4HMIRpo_VxBadxG{%BkIn4l6B7=9 zHUmGI&YRtOn@dQOXV%Ejci8CRtB_QrY5-?(D(4y?xxsEfie`hHxnfzy_~-NJzFkir_?oMlT*H9iHs5ZK+_UY&Jt3d&VFgisXPW9 z(k@48Jy%NJsnSYZ)&#RE1|JPXd&Lt=&XyBQ&!4K+Ig2zZ>7BhY(^$e-i`deqP%1ca z5sM^D+pbUdSYe7&O?B5y$YxmvlKJW-|M33GoinBE(I@Uby8OAika-|XGj+JVRZ{leYIWC}%VJaVix1 zZk}0uqpeDW9x*FUaHKL@X?6ps_jcsH&Ire_&1NC<)i|myaIDD+pefl@d_~E|Gp5VI3L{VyoELSSyC`cloy_ft$iVdoF1Saytgwqf~ukrUp}Q z_7UDLmkEWfy|7~1yZ)-JMBVHcVwBb|Mvij?L0|$(>Z^qPSFg-Vc)fEJkt258Bu%uw zaT%O)oNapG=}t=VF#yvjdU+7D@~-!)D>Y?Qqd{mXjNl4h%NoQ>uI2S{d@f~D8nYd; zPbxTFd!bJrn6H=pI@vigBVNPso>2`7d@G-(n5Mq?fzQ)hzbRCmjte`!om9&k=X=3{ zZaK0%&!fj?A|xnpGWf+eB=>e}?Q0#ux!tnL9HMUSg(&Xev9*>A5N{w+#JO!^%!Oxu zxtR6$>!60xgjaid>|*G&b_1vmVpPV&(R``0^{eMFjJiybm>6zynr=~jt@T-+t^7^c zNw0=%e`uG*(nHxCt29>TjHmBUI1{B4Ij`S`ytUCT(eBY|HCFUeoQ@XKSHFH;QShst zWb1N0RKZ*KN1cYRCPiv3yKruVi1Jcb(y+3n_-RMJ9C5`J#esJ5-(Ko#p=dAS;_s(swcOVoGCW~LW7`cnCjQW-@ zJL0&zMx93(8{a^kRZNk#T?Y3!3pG&24*Q5)^F4Rht}XWh@0mPW8C~to97b~Ubmp+I z%ZX}Hq@qco>LyixXXv$^sj4%X!1B+4%*2~3Ef&N_Q{2EPy z(tTWw8$aIM_eSX}WUaoIlJOae0dj>{E(HT&c?2!z;K zYcw|$vOzU+V7DMt3U{zaHGxaKc`Ie#+oz@p@N)YvT%V0~7DZ#2#T7n<<9Xsx zKRkl(Nj5EP9B3gQ4QMKi;61yQXhin|g!2&it7H<9|hl;qZjY93y|Oqk-_k-$h7& zcD7T|pyVAf7t^aZzSh#xAay}G9Qr7EAPH&uCJC0N00 z)bkBiM2KQG?e7b)wCCDa5;1X?u_s$dQ(O~W^I*sBUk9-y1g*j<)A}DqXXzc}J;3Jk zKJ-%-=-tg6-6t9=3-wWL(0Nv|lcYD7k}N^vWbrxEe(rsgu_~qahYj(<${Up5M4=$D zBJPtk(vn$ED{$&5M~=uzn0$|0{Y;$6+N_pZzOlWHRa2XC`=aCL_W=)g>(N#c{dyNk zXAG#7&S1LuR_jaecZz)0p%z+`Wk`!T@Q|%6E-=|7RwWXOU$i%-ysVeTYRILWmrT7@ z%lpGp!Z%rr2U+DYRb$E%=qY9 z&QyT|3W^7gSBb}}#?0#BX4_e`qfTPv9mLj5^w(yN7Mu6vS7U#Zo*o{;kU?NMj_QL=E^=oIdp{lv! z53*HqQkT6h`A`&)=dJl}zGfr2g`D~pP}Nt6+TYIBDf53#RJZG*qp zBFhxWnua2%)^F%&2|nX!vJY6y%-C-@jw#Qa$+uRQzW@d8r_{$jk2=t=rJnJZ1^wXq zgsv|m3SE^37|)Aa#4`2oto|EDH%p6?4T9n*C{V^g*pGAgE7o z%MNh!vI0yQ*j=^W4?Tg5bL|6$Es5YiJWZz>iXCA`%`|-HHWRp2`5Pi@YlUvUcUMJI zNK3CLaSmeNGOxKv$AmxHGBa=ZB9O+0l8lXl4RHiTcurH48(@E8(Zpp^b$;}8UcKoqxFljFi6(vZwOQZ0*bN3vsS~govnD1d`&>?M0pOx| zmT-y7Zy2)1XKgUwxgn}%03|93U)nq|*e88E`?EXd_o2RSr<>xTTC#jRqoi{Z1xu}g zYdA0$b8wv_IcAJlTB~pz@twIxqo3|*cGjhlA28?C!7I)&&1G1P+GiJMwq;ZSu~xi7 z4OQyZ56tAFX_}7Jqxm#Idsm%nmnfP09A<%u*vq}NFl;z~c$h*zBW&eXxdb{d-5a1wJ`x66eV-Mf)*6|i)8XpM;DoPgfFy2&D#XJ-V9%COQ%J8b|TIirD=ua?wCxGG|aQucqn zEjeQ#5It$?Nn!l8&5A^|bEwp=#IuR_px_zT!M^isF{CdWU%i(b3lZb_D8~y6}8d0^P}CbntBmBcPUM8p0YrMdH-HIBP?Hw+3lBGye1R^yyu=jUBkg8 z^4itsn+vUrV9Hq)nGgZ zWD!gc09)*)7IzkS^x!VY}ZIIDZ2y(%M&W{QSIP1ru|;`{M(IH4mM1~nB1P&0aY zxXM`lADzF@MtJ8TF_jUmw**y}^|2}(YOtrp%Rs+uB?02(?S>Al0LI_ZD<`KNODH~+ zKjDI1YzHiAcUW)W;P%cLt9xUzC#vnTHT=hGRr!=O*I#I4e&EjEp-@RVuQ4S<4l}tT zCN~M^6>e>oS{0vgU`M`lT#huR0!;`PYWp`95=n8kO514AxYk$gQs1T`in*<*7Q@xd z%3N&*N2kJ>x3ly<*>UCds;(%F6IKGm70`4HFD$V7CS^bKHl&AC)PKP`pxkykiZwMt zL9+%FDuxyiz1_$9)as*0$Fyb|h6`naYC*M8A^ zYKsp?f}<$SH?y*xZT0Ev;!tj_XlDNcLl)d|d8py3+ejPfsg}-OF%oSfdXW9htc}QB zd4_gBRto%a%mqH;ydgMd|0Vr6DK89+G=PY`smP~uQhDDBU9AMrn?V>=PHRIL^mrHs z&;lA%N4tqIviWf|R1x}SBsuuol|_K?rDR}xBKuG3aBq`uj*)IkUWG!0&mT+rp0X6Y z_H4t`a~`39jCyoRljFn&;)KcRswHGYU80OxpPFKCdukzA$ejOmJEriqWX3SH2&%r?tEHw@{VHqG=yWbquSL%|>Z)_D5`GXqR0E0C+~FLC@R0$Q0!vnV^#y;x?w5 z2{}D&3nlSRfVb%W@BsLh_@A1@IEIYsdNC%W<&#biv-jFQhJ%XYOf<`eosx)w&(GoZ zY>*H!FOfOW)U&Ft!m|MiDkL{rFg!%)mGh1)h$)YE)1aqpd}ZUzMKi}0A}Lg*sY(uv zz1z-16?)LsIeC`Ue}ae=H<$3sFw_&f2;-qK5y$j&BJTxTw|ko=W!;4n-{fc6nZ3oB zR^Y^o0H`u^J9#h6_9Ui7DKXyI9NWwDk~SJAMAsR$Dh?qR+l9e_xLQZmYuO{4`nx10 z*qqAhpyAbPS%Z`g7!#%IX(+LY#iAx{ENLc<#%(?bEaPf2*T5>!Dw0yZ5Lergi%$LP zj;Fy5*~M*_&fPWudY zA`c0jzn1W2Qr25FWXH^(BnXQZsOOfRTbd`}Cutrq?!!bMTVIOW{KqOZYzevg(X!>2z?mDwc2$qf@f?&VX?nXl<%!Vkp}Tq*T^v+7n6$U&@avI60Dh|kK{~4%KWmU zaSN|ZSWyfPR>aM(J8ug&?+?M8HJwV72y@txpN4lzMh|0uMILf`UFG^tYE9T-^C_#a ziCgrPn->Al>izc9Zu5KAB)h}|e0Ja?!Ff3)bs4u-lg?oF7nGa^H@_~NHZE%vVoapp zW6V)10FOLn;;U9M!glo|H8T6h7OJ^U4V;&LKjB4x)e44jO3)P|&`l8oZN?gjvK)p$ zOv(`(tS{C#1lu2+1=(sVOOs=D=Jr=#kSD`vy?;+K$WIaHneGtYX$5+>@8l0Tg~SG( zno^SGQAQi2MwfT&S|FTuTc4oa(rKM{Z7amTn74~V!=O>dFO6vSHE|9ZN^3b7yxq;tbmHVMVmx}H}Sr*N%mFFb?7=Yyp6wm@3`-c zcm8DT!QN}Fx#s-N&zx&-jkdMS);VRoP2k0m{9gL6jC#yGdG$${V~T@c^H7%IFs?zb zr6Skn6?)LA$Yb-9(AN8dgF?M};MGMN`IMxYQKn`c%4F((2mOYek&zo+{f5-$aq<5B1Sl7-p1pe2g%KGoxR2jx zTR0;$Xi1et5gQ$VW)FMSa^4DPxSvrU85!+YW$z|5 z-pDx=&PLGch>5Gzig~rpfqX}^dl#JF`X3*E**o9rkU9?M8qK1>&{fPWTv1Nom%ALd zJpW^-7E0hP5vQ0|w=rCt-P@h`VX(1ml(zSroq!pmTe>1j9G6s!h3$JI2R|MZqi+AW z_{rsX2P6Eu!QCtSd5SBetq!f9HCArv(+|HQ=A(@joF66(y(6P=)?}^i&pG0;jZl95 zt&XC$CjaBffSB44k|R3h_Gwm*$qQ9*(SZWVzRO~03%3RvtMc0p1>RFm(&e1o%6yak z?Z`^M_J9*r_?1e1ZgzvI8e8heD2dPf%ld)tklZNrhg7hU%u1WME&oPuzfZJLUi6#{ zmhzF@no0a>$H5QfwT@4pe$?=9#_brVU`26R&t!!6AZ8i(%h$$>zQ9zilD#yc;TMV; zN1;b1>+n~^;c=EUi9@0$@CoVh%rCyqYj)1Z>;VPQJmj&t!KPGV&4mktA|MK%A3A&g4?lX{X}UijW5?@W%y0e6Pa zay}`$&MB2LboJv&`Stq^!OFr{Mgm1_cO%@1NAg2avFco7)3FIhjzdj*9v-i8HnQvb z@K!I~$H)i9@MAWkqZ&k10Dg5VIM|SDq}%X(BimJG2F!KHJetEcy>%)#u>z_9*BIpp znJYDW&pe5_{21?de*fI~v4Su~EU?=n8>Z01be0h0hjF|40jt{%GRTo475sZXkU7!C92}Q=0~~GdG(JE*lzZ8pecEoN=9B zw5qv%lAZ%HbMk4MilS6k$Qa$JCo=2Z^iRv0k-$@OsJ@Qojq(7E!QeYjvmhOfDSN(G z!!6;GC|?X3FGhgqi;kUlxO9`gJ~|J)+M}iFT5FWoeaEeVT3hX>`(8oR=G#jzGryf^ zJR>tzq}2NIOBCDMkCN0?wY3K~7#n^eEFyi+=9D8r=#2-uy-yaDz20*+uI~&-J*R}~ z_l7`@FF1^@FJWRQB|>0f9%(-M@Y_l_0wq3my0juYT+d&VC zJ_zSGm+-<^<~)-YlNm+2hIpSK`Fz?c0k0ji?jUp&oA?KSS2K%nE_OLv7#&Awx+y}t z>>9nSav^@BOGPZwlU}=dOW{VYRUS0(|0-!5U3m6hEA^KCE?8R+*)%Z;O{FBQu65P@ zK6UWHCpDq*c)rWSvnVm5&~d-&YVom(G3-@WG3axWg_AJ&0}^Jr&B+#PQHu>0%bG`;SQ?Y*IIHS{c*S!Iw3?Q`Bn7jO>G7f>I0b zUzRlY2nlQ4Eg0Db-!5W?c-*fSd4LZ_`ER=%;6uDc_g)lg#&T=kVIpA{N=c822zSjA zm$WF8VIO@qOCQ@ALa+r%wCW7h@bQ>v$_8g5Rd@14@O^V{a770sJwLm3=#caaCE&4) zP|U(Lqo}ugM9F2f4iU~zFPWqLPPZG$@oq7BD4BY6Hl3~sv=+I;JV&D{3+GqGxD<wB(S0VALvkeh6Pe|3~Nz+tg;x8OrgB3;VUy=!YZSK*+N9s%-M zu4pMbcftB0aO^$7&1_5s zZ9v}xRfI8%PR)N!V1wA}|FH<~nGy?R&pvE;D3_`e@ZEgo=_wmCw~aHGmp(*Qa-ZMB zs-VkwG}S?1=4HRRF^hR^cz&C6%BbX##;~(L?{c5{8u5Debkj41K=(Fyt~R}zXxl*F z;{m%cMvqA=VNM1__kt1@X9kB;wW$~Fu7_1fnZxWRuZQCVEw zRXw)CRkA%cVfjFd9pc?YR9WdAUH|nr`J{P7yZunzc^0UBOwSs9gC0AY{bWAu2S(-- z&bxNQsmr$blhzKUTgCbsnP;(Czqmj2h;p8i@G`KwCgUj7COgTFBa=6#5Rt{NUk|h& zm)jX#oy>P(cINfOf6R`iBBo;LTU#?oi z)OU)-34PbO#FgL=EK5ll#_=Lv(Fg<7v%IUI%@pEE5tXE?teMrraeEvIk?k%zqlSn6 z?D?1-vXPPhR2imh#?#;XHG4hOX7jF;$Pm-WrsIcZou(21I~#btP(MLxwzg7u-hA}_ zb=Co~vEqxZi(UHQ*boNxfiHIChWV%OqQ4j7P^()`13U0BdKy->&xtWqluqGl{Nu5 zUZ2lIiW?L|{?!WcIe!jHw?3dx%9#|@lov5n}MBXmmg{N{X(|*R<`kJ zNf#yG)2s1ft(d`rIStU_i$oh(c)^>Pt$J&cy?s>4EiPHr6#SgRegPZZ5^`7ATYGum z3$0+cm68v2SfCw)nAs`}6(ZUK`n;em8{C!q%r}u3!d$&HN$DG`%(s}JI)8fy==$rS z*K1zirKm5UC)u0|tgO}-zv+mpg-&n!0OGIN_UQby+0wm$oJMR+1waBu4dWcG?9;&Q2ETvNI*B3A!V^Xk0$U23udL&UWTZ?ZH)@0~BcfyF=>Uq}rS{ zxfId~J|-)_8o|xU;{12s=_jLO_-j&bM5L1A%<=K}));VwqvaRb*o7nG-~T2_xfTnE zGe5%PaiGHyzy?vO>y4Rv^}DG-!AJG>kG)UGrG!SzU3v70Z`>4_zQv5Ai1^!wwvC^D z=fNSsP)>@a4Lys4v7D@^|J&e_cr4P|<6zFx4}hH1o05@my7@T81>@1t00Iz4<4t4ezC1CV2)% zn|vn3Y@vlcr>?Y|>YIwsu7fx~MqYkJP3tvd*G!B(&)+>CJ84W^+L+eK&7$)ok5*~> z_I^(q{y`6V^kv~qh2j9ijmTk6?d8pQn?b~m+NZ(7_|DSp*pt0;il@1Dd*#Z~qE2yB z9G^c1a%eZkrC8atCHGZL6|JIesWrR}F zE_Du$xfs0j7c15hbDd}CE}1;-w$x>(SJd+O&I8&@sy^=+eQ-0F zuZTe^M#0D9R9hrGSYpjWKswG!Q$o>vv8v!xn*UuxHppiCp>3sU(J}C z-mRiem%jRovxFunqVaD=n9Tcq=FQFo39KImAUO4d>yUC&Ex~cNb;c=ki>7E1i7PhVs)ds(dsl-8zpBYZ#L|qtc%2?r@(=KszoTJxN z0bqJQL_j45cyPlV*1q*r!$6=9RZ_$P+^k=U8ypLHg zY#p_$!T$tzEA~g#f{&x^kZ)uyQd~$|e~F)?{+8TocZ_0}qNXA4(Rdg-(PJ7PtD~u* z78gyy^?B#wy|W>07;k5>>Cp=MKg8S3<`R@YfKvBBmbWD{ryG`h#H6#UXyf&sb?QJD zRhwQgCDkom0A4486E(@DwP6NWF$y6u(nCyjH4NumeKD|5f@7$|-6qVlwH>N!= zTGLP3$1miQlk2^$E}W0a(|tZ1PgwDezO+FVr668ay^k`7sc!OR8JbmusW<`^DlzKHTMXeW@9`K?NVQd&|U|E5S9(8-)PWmot;*$O}|BOU1_})(h`z3p^68t8Z}Qhiql0P60L8dB##F7LOfqIMVdVk<=FZl z_LQn~rA*q~$hi10(VhR4Ym_lN)rv|!&wsd!Bt5iXhI8tfgYJMIn^My=@_;;zL^IZt zKQ-EwIt zY^t>TH~(twN(@936H?(+4>uVPO%0skR7|Iji!l!%Mq6plH&=v*U9-$ihO{E7Q;J)f z#}>W(2zP$`O#QajpIgxmch726hxTk;1Vt@vCg*ePqbfTU?F$9OA}%FjF7We{CdV~M zoi@I!g%!2mAR|PS%ElNsZlc+vf;rC*{hZLZ>up*xGb5r{`u`uoC>v!?va(5HR6C4t zu#1_(&mR;hVjmcsP8X^lrtQrUf#x1Z*|B*rj&Wt?I~lhp=vUk(M{I^Ha*G5j4|-=A zyI~Whl$r<;;NFCV$66QIPaqGKZB>L7UZIJ=eG)^oQu@ZVlKoDx9@Rz>a0CEYQXDex zjPH+=!QvEqUzz73bzCE;W1vFd;R;-X1P!3|D&y?^>b}US=am8y;`E%3IakTD{bmXG zNGvIf7Z?o)R}E{P`@hj)bRv8_*48k|F4W6ax|gixJJZ&zAO0;C`OhxaK=7mvDLMa} zHae=+lK;gGt(rN*Ym134mEGv5)$;5FbP57K3`ncl&74Np;mf;weY&1?!?bvBPX*^& z*$K_Ko7F=x@sjSflkSX{`>un_K21^cSy*c`dAgf1Ppu}l$gWuK7gdh&TKD>RK(tb} zKc0xN#sVgP*_yGjB|cxq?bNUIKMFZpo-&8azOf8Tm?Po&huq%Adutj__wqw((`x;^ z^G~ZaH{1@*Vmfdv(@AB{H4V4L)M(K`?H3nW69G=Kcb~It#S;F zl>fnbY!E%~H?)58{bXboYM(*?>q?s_KnAL48TIv>^K}UoCKiK9c~|>23HkX6y}h@$ zRn7|!+x=blgQKHKV>#I-Gn|%Rs)hcY->`$dbImHJXBZ>zN*Gw{-xTjnx9+LU&h8zR z)hp+mil|eF&#qEv91Gpz?83m)W-sKEm;0;{6~ReAI1WkX8U3(E6;sG*M5wy*#pwrG35dj7oA>9z`YCj6VjCa)B}w~HFTw%N@t z$iDa*W;KSI=!HbXdExRagpeYtu!xXufvj@(gH8-8IabG4oVUVLzCHG!iFgafE4 zI|jt=C*C3|gM{J_4?7Z(q9cT*e(0&cca)fQzpr1nHRr9jspYJw%?wm6HGmYe4~?qV186|m~GkB^5~ z#JI!_K{fJ$F=~h*Fq)zLp9+K5Z{rI4qR+!4KMW2zI$0p zRL_~VUtxhJ##LrxJL|%#8#rVkn)=|$bD=`&?d7g?->kA$8VBG?F$expWD&xSHqq|Y`F=pD{(R?HkN26m5FE&_CxTf~rhTK#J z?AxCXllZS%=62F6G)77aTl}&eQN<^b;OBkJ)~UIRv~XBJp85i#Mn35%D4wd1gm@rFtS~QxEXpt8%I^~f!?F(P3E|*HxFlbjwP?;YdSmRkZdq?kW{2#27 zGBvtnn39sDp|V*@m+1ZsweN6X`Ht$ne{Kf~LZ?n zGfJ;qL(o|E)2NO%0DX*(F_G+X4bk#jR%8Ii5k6~vfy@+~<=t3U^`Rc?81*4xE)OK7 zlgO*AZ5MTPY@rrkkbK0BoPJ0-o?kS08=fL4zcF=*F(9Rp{>Lx1G-wk{ zOqdKM&8$ohrI3m&CnP@CZo^BIK@5Y6JKl3Vmi82F#`Drr{BIlLslBhS%1nq^?-ih0 z^gmG`BuJWQWvd`TMV&^y%>A7$NY9;RV?of%SY)JJLK_Z-^@D(e`8tEHu!S$3EwbT@bIdG^%(f=s?m1tDB)O?sH51l$`bNffRU z=ot!!KNWpw-J}$_Ef~eY6{PVXL^7py=>U`e6b|#W^sf$DmU)8o%(se!U_7*YO<5{D z`Rfpc;XsISD6jwtIR9>k3JdRmnlD*&QJHz$wEI}7Uw2v*8}7&=(sWrmt7OZHiB$Bg@*PYrc+Z3bq7++m0Z;W9gLrj z4KkZWMMv_{Jl^yrds*em@1`w~2%ip$%waCjPJXEwSMmgD0QsNXdbgN|*PvnN0rToR zc-kBnI)%MwkHU(_@9-+I{3=KNq2Iuj@$sf)&m39De|#V$9Oc-CC%_GEEzv06J~6d& zkuyPC#O{{*#k`)_#n7hWkrnJ$i;g5#q^>2-ipfK_HApFF8=P{rv5X5ol?X}KLpD|W zH{!};?3kB?UGg36 zEG51&6j9xpt60estl*he?teQ8F!9*0pvjgr#c*-w7uH=Ve~RzB!o^SJUxl1%X1RwwUK{m?f*FT7wtxcH6~J)0ssE)S5!`q0h|Cps*QyfblSMqY0?ARy zuNj`I2>JTb{h}IFyT0rzaXu7YhG!h>l0Aw+pZW~8XQ}0B*qwFu7JM?zMpQ1*DRMOj z^O?e*KLD1m$yDa~m8J%VWs=al%Q(DFss7i2T7wjR%TuJkIAwKTX&}c#)fVjTbhonw z#aR32W9(80ew?~0)=DBIZv21G|3v4=e>K{x;lS;lBbgC&$y*jW%dRegw?Qk z)S%^`R8TDM&0dq>^yF$c@X_rl(vN#Dd!kKt{dOmVc-MpsG&O}cS#)7HAfowqdfO7H zULaLI*Ds2l``+7|znUC|y+8YlirYsR0;^D&6G1|{q^ZUmH_f%`?xYkQA0I#4o8^Z= zQjhMzWn0ZN=b2ikt-h11ASBb)f7aZ7q#H$IXJRq6c*kh7QA&@mPp9jwn#;*O0sVys z|ExOSUUql0y&vYu4!if3rVjJf4L*8gZ(=^V>@5=e;5}!m#ipAs?0q)!>8;?AA0-AM zmN$zla7V>&`eaC&m~s=wI1`k=v8p4+Oa@6x&>Ep8H3jaD&QX-}?#sj*8s^kMHtB&o zhrZ~b0-p0yl+QNL99#IN${^!ip+yB9t$cTe4tSY=5s-1EkpX?NE5BIebS@r6hI1w| zT>XH;x$osvo(juswc{@p6copUX+6YUb3>t(R<%~`TZ^g_^fU9@=&Pm2Ky{?B$FC^m z>o?yrt-my|Nm_!9wewPFwcY}F`4-N}JHkURUqd< z^h03C2;2j({FX!vB0E%r9D@hqxxKykBQJ1S{M7kD$0)>li7{D`mQ5D;@OZlU{ukUlj{$2YaeQCCuXPNB+)av$Q6mb?9 z+We0TFfZDt{cga|DUg3CF(prArTkHxN9y2cdcl_5G;RD~UE}sU@IpWxNvyAvSrEf@ zYOO(7w1|~WZ;0eZyM+oHK9`vNP-Al$hBWwVR-=0NMY&c*U2$Edyo z6Crgbo+K4iAD3VMTmRvw3L}WiS|fg!LantL%RSn42r%79O-IE~4n-AJz}{yYQ|`$D z#Rn4$b{6jw$I2FroR>I%vR;nV4~R5SaHIGTNfwT?u;qWYuKbXgmEIz=s zY)Jl+VVO0-e6%FRflcX?&ikE@zz(D0?q<}NRd9OGT}rwU+wQQNT(Aab@p7TqcVLMD z3)e=EB;kn|zW<}v=%hm=xg2?^I-bkez-r9*JNp`(ybgvy*&XXj3^g1~UL&GJ+#Vf7 z!#h;L^>@{Je5LJo-d{vKvLrm@YMMVj?#bn7=+G(RwB|7_seS^K*Q5y>P){v!8TIGXc<-=#RAV zSHMN2OKK3xuDeo_pmTls$i2VcJ zFQ`Ex&4@3gVXk5+9;BF6*a}{6 z=SJeD{pTq$3vE6uypb+_>8`XjSjI40zt+-Dd_i?-^mhX6bu=kt&&b1pvY_s4VaVO@6_j74>jz`whO$&7?i zOM;xA7GqO#FDDG~SuNErcwL{x>*?J^)4$O2YZh6%LSNK#KF#tPl-_?+%1DO9Lw%|znN^IRV<0UZ}Tia4P- za!S5(GqN#Xxjorm4~<-G^SVu^jovkT)LkX&xZ^k+y!6;06pjKLgET`#*M%ThNNJ7| z?k|e44slAK`MD)+?}v*%RF=9NRe|Z{8*h{+arD$t(tkHFOzC(U&6E%}AOxGQ^bV7u zda*iAc^1e-Kk;s9khs}lMlZ=wgT0MmhwqR2+Lic(UA7o@G;}W*+3MO&w;RvhyAJLK z7ewzYn~YuzsOZLI^8=<<^vr_WrM1zoN7L%ATeCgo%6CpD+jC> zOmBAnvg-&s!ZO?g&nO!4!*V*}0WcX{<3BD}gn`5j8Anb#K*p#lY1PjxTAwIX-Rtr5 zPMOWLpf}@KBwrDZOiR-Wz`TXeQDajGlvf!}{Tz8?3axyX`Q<18X;@pa?=QXt^Koad z$1Zfg?bqD{@4l{?$b{zwg66WkuNZB=ZgbbhOXSRArUhbyg%hb89AhYMCNKL`tnbr4 zn!<~Pa31pN`>i`|ME}wn-I7D2xTb#sf!Eg$R(b-@YRFBuPn$EWnr}6gyw2KLf0Td| zaIZ9I|Jj}j7N6|L{TDf|-HP~Y{M1Nk9cx&xPXE8xO!h1Xqjofd%FuDhKM5*CeK5YaC!f0v3k#1GV3mGH$uu2P>YU!$Xk#2P<@ z3s+pSd%v)Dj#sJDI6{b<(iYbWWob9&<#eLDJC>~4 z^P=tap+|q{cH_c1ym9Bb8R;ZT>5<#lw+y*bpJTlm&mptYPIoTL*G-M?7w}@C4)D4e z;MqEfR3zoZ8iNW>E2rYug&ru7#40k)@OILZ-a8}qoNf;GX&Y(|rmJppo?0SwaN7>* z_BSouAuF08^@`)WClr~hcTD%?aNUg2>@~A1cIm!%Cl;Gl)7QtuiC(u?#Jp?OxRTFt z_V_G^9l>z-dfiC+7eHL8#7l5e@Yt@h`XGolKzS%X9CeLj?z|fNBjDkvCU%-DeDR^d zy({#0ioSS93b9v0-mFUb)?vv5wT$f5LLBKuxbwN;^vU?I^jCo3m&2d1&fg^PfUM2e zvGdK|A9uR%T~#*U=RHI>M$roI@+9WD?YHqzdtdR$u(<4{CkBvYcYZ(IIYNBbZ8>#B zN;6uvdQ!ziATbwV!Rp3yYJj-afgaI&i;8RLj(vEy3YS$J&Og_R@py%m^kB95v?j5& z3RRiA;Pe?1cf=_Pbd}j0F9q}(ij1WBZfG-XNG~WMq5OxG< zxr4(DD$lo^u(-m9NkZx+N#ag_+i|6`&W{QJRlG#OA^$(3yYWb-Y`HdKx$NTNIxqMq z3zw4WZ=WJ=k4o=*yi=t6q}9LYyjq2qM^FADrS=}gGq9mLnG(GNKFE=|)BiFB$%js& zXb`U~?38YGz7D+9+oMB|^pC}-S*-ObG16#sxwzPC7!dX(U5Al;XJY(;PhvyZgiI_IfAa`Czm}S7N!FE zF^YQvBry!9?av)8ZwLdtSVbMyx6>iafW&GEfW{@ZKF8!jklQ7vds8fG$ z7>jo?W17G7afG=v?LOY*2&FbHZ7o_=Yu`Os24TrMVH3Vd%m)C0iV#>84BDj z|7zN6M)lr-}c;EmD};ac&^IbKZW(D7LQZoe)Vvu+SDzHV1X- zF*UuT%^f`bNj5Pt0kL78F=S?306R&LB}6>W9lU`10@KqCShF=2A9iE$5@5N~tn_bq zBVYyXZt?^kKEb~1e`B`_`u0l+#`)JMd8oJ}W7fB`eSNF%mM?)R;`O|_1H{NWJ&+hq z(bVdgxWc5D3T}$pP;saoJOPvv?G%UlhVF%e*cgr19kL{8YYu_L(8`7pwa4`Y5n}z$ z`9nYK0((b;X0wUyZolP`KEVRx0ujOx=O}S%dEQ5e3j?CX_rFQE9y5))c+o zMVE)}sUFAYe0%YzT}ucc0|M4jLTT-aV3Mzx_kB&+bYYykH_b)mGy>hSj`F1qK zzcuvLJM8^M90@*O>$*f54tBuV8I=qkw4BvRL2S=zxcSy(yuSXFgN=WlB=kf0V@vGT zrNzlIB@c%j4&$X}=utwB{6lBGtrSyO#m-7*7fx)&SsCcoK^@Q;2YYq!HF^aAYr*4c zQnh~7+AbfpJPlYJen=2Wmrg81^M~p`A@l3nv|oFeAyU{wd))d~>RPqunL`-O&U+1& zR&%0D4-IHF*q|80j|t8)+|5Q%$DM?>gEy+pw{8x$lkoipM0usreD3zrubsul!~z4! z$N!o^c=9l#Wk1L*rDAcL>-fF1b%}&_I?7iS4u^B)c;A=~_yfuu;a2 zEbB38hRSt3Q@u<`*k9VeJ{ogOte(3nIN!w=U@3eU`0?CfXSm5e2XHgh%~4ga03r5s zmZbiOKScp|ku<{#@6Zm&Vw8?lxwp?}R4^B1GM%(!OBo?DKbo7is86nQfS{9=mWH2` zAC?@!hO_+s+c%k>;vzvO)f6@VeN;q)K(A%JegA-R+cs(Fd%uuRr;VFeJX_}>CFd_i zMP$Y#rMr`V76~>zxD0?H-W8Dp2i~gNZIK5QoXccJ9BOf{TR*AaW;|hs`A+@G2jzHc zC7t*9ThX#C6u5srOU^u`C!xs ziHX#ro9oP>W~K#jX9@r}j*piVwr9(JoDnepCK!JoDEG{p(wr1VDRqCJQx0D0TVQRr zY%Y*EY*wUoIh5ndVQhANrSksc_kLuEi!P7@xT=6t1>Kz-#aGNN zM!rL^E9e6P&b`0gT0I4{SB#;$r(Vk(QHk(NBG^rf7K#4=4*K=}UL7s-=>u53-}qTg zgXOCF*uozuvzt?_+Ez*1!%KQ-3r-1Xc6?C(v*srx&4#Hx>U$fV1LD#B zJvSXHJIM!&=jYA-JhWi#MO97zl5W7G1HwjEOY}Oz7{rkq@Ii--V}ST;%hb8FrPoZ9 z-@HC^BX`QOndT?$k8MK$WHN1OpHwh)pSeG%PR_1Al(9kxH+Wyi50IMgc^}p)K@*>w zC0bCy>%X-GE$IvG$G=K}xEB4SlblxhmE3^vDzDzQzj`&OdYyvb6`FYVap1jj}b6u>PcD{ZfY=2$MWuV%bc!xq^n|%T;%f5$?`4$3Ex!L>w_o= zRL%wlCm_~rY%?9kp=2VLi?*51$#0nZOv%2r;twx#FF{g3B|&Pfr>7@Ld3lVYZnxw& zDf@_pxxEJO7#sPS-IJGQKIUgfh*%xCW^A-BJhgO~!haJbkyX|clj!yxcVe&iXII9$ zPwF*$>Wy9%jSkd_BYBcHEHIP|;4+El?zC3N!q18qd%-nw$F_DpnXN=FG9UtHun zKH#Ujimu)i=FME3vM=o!%C%EXexLxV6$8A82J_Xp#DO-{ktd@Og_lZ-eXn?NxgnlB zj4pj*Xy|T~bPTyCHC(Bd)Aeo(n?IV>N=X4RMB$OF2TZP(zrW7NsolmzJ7bRPbnRv0 z$XlTtAS4j82PFI~2?&jQ{>Nl6unr8Wa6i0fUD~^KyBUlS7L~SlsAqQ9^*XewnRlOi zVMf{{P(V38b2J8fSBY+;+`j3D;w|nPT~d8`dl@3P)J^=_Si)Ub#4v5VHO_w_4Mx|< z;%z%M1va~r^Ia1U}%OYJZ&L2Oj*9;S_I>7DN1PjjRL+G+2TO`>2CAL8tXfo z-y4_uePIfdo{f@=Q+Eq-iEB{V?fLA;>Mz7ozW&))gZdy*w3%akdZ7H`x_8-Au{AhC zajy~rw%z;CF@jInHFNepQ2^&oT2z$!HYzz#?i;SM|7=nhv_1R9wb-`oqEO<_DkgXD_@^=45i0Mz*WM!; z(6_+EDT(2e20ycr(kN)1Z8V@)AM3))7QOSxT8%rYl<&Hx`f&rXKa9ieT`>w%MK@Q^ zRXqkwz_OPNp9AJ3K~h(ISJJI^wvRLC6q6!i?{5s5t=?bW#Vka3j&7h<&alssr<@zB zSYBOJFZ0-)$S z;j@NS>Gy@*h@ScZ(Y=MGhlIt7967T0X(blKnmV7*n_ef0Hkn-#$Czw8h#ok5H5LK& zzs<9w{YoMmbeFy~{xDgY6EHUVtn*R#)YAU!?Lx7YV0MTf@0;A_fv3>|DPKVx zG+BURv%W*zNJ=pQsn;O%@nyGniJUOx^I@7=%3XI%J_PmrLqZBovW|vauCk(x1Pw3y z8G$MhFClN_Xhfh@$TZrDO#W_4p+IAYZv8|P@X?kt#VaK8ew*vogU~y zLz$D!cG@l*E+0OM0?7C1S*5;+f1XQrcA6$DeN59u59$>}cd_a;a_oQ{cM_uEQ__0H zWX0fwG%RhJAXuhqIQCakyTHrkra#eQDD9IjX4Bghv`?~6kt!`y2GUG?YewyE?OhSG zB{dKyvpM?vmz4bGwdD{wcT=HD&2J*6_(yqk`2{szH=f5eOEz~00gX4wqny6jE&J=K zE6$d4P{$E(Zf5z>c^qW%vJy4{IvqoRb+JevMH6yxUHcXQ6eMm z{wJYrm^(FQ{U`C2@l^Pz>1t1}fu-AO6b1cIvOV>@IZ9S)Qr{YBufYW2Lc;s~r z_h^CQyt*rYnCQ=%}hh6*0EzJ89ZnQv|+}9Py&|tYMAQyiAU4jCtFOD%B;i2PtJvU=7 zh$yAYIGO%pPLYN9UMxACm3Gfd4=Wu}!E_xg*^sL7sIZ@NZm^_n=nbceHO`-{v6!l= zPyzX}H>kv;h*UQ0jj~Z{=sE&54yrfmwR4*a~*tW>c6sVJS!}m+MwaXY4Ylq^a0gW~(Iai>Yq>ane|zqIcqZrms;)L|WBzU}6I6D};MJ zi&en#a-ucKJLAhVAWpOWT65a_Hj{0YO^c@B^Er|=VGl3oLm`R^54UJo`f}e<70JE1 z5I0r`i3cfeW7t+ypxMjgBsBE=;$nWclW~qKlV{}3Gj#yeuy*UkgdyN)25&llk?iz>eN1VK3<;G~{qx+}L{T>KfL_Yv^Qy;p=c~y+t9XotKwwNgUDtt__2MQWg zi<8Hl!xp|dS3oxKV$mApE7$RJOZGnt7*V1oKlV15W-z^S)}P%~)O#W}QT?PEmrsOi zS3I?05^Juz22ZHQOgjCbZrYjO?!LzBF2-g$5^#?=)YCTGf4wsONgABs%bFp9JCi+J zs4ive;uR)ZNLAyo+F0<{fO=S&piaVYiX|w*uDhucVK!GVVodm}^QH`gV`fi>YT?SJ$ z(#iQ@!G*Uq(6R2z6Q)rA(AjgxcS7y-w-%pjHO*DIcHt}uW;I~?2Vm7Y&*%Q(!%`;t z+ET+<)R8lMU5f)!O2}uJQ0EnPwq8QsYWtG67k``?Aj;vGQI52N66iOrezZJf2bA*D zj812-m3$RC0(9}Q!fIBcUKl>|E~Lt@u#U0HU?J{?T7tI>+nMZ5_d}>xEUt3{k||gA zp+gwa+O?BSA2TR6K}|!eUe^*`h!%XC23J6)HaPFUWfeni3jFv-nht|0*P}Ot4OkIE z{$6vDjk(XVN8E`g%NF0C59y_>h>N?DS8v)(m+$7J&V-J%*;L+IhyFPT$e9sixeOkD z<)X2X9%FJWG^p2k?;K8wYrA&d%jwZ!96(G}_CBZxIa*+Vf;XG?Dr&8+kvk^{c1_wNpOP(WVJdm8l{~OP2uH$4ASU<>r9w&A^t{ zFVrHGt#D!`^mG*ySZo950|vryx(cj7!KJ@<)Kc7`9_V3(!JEqZ1(3CC85e(Zgj_2A z;(JKb6mJA^{#0-ke6^LOCE=l_ot=kv%X-WL$W0b~TYVaWx(()2fW7vmj9k#`X(*FIQ>`ZUxxi%2J^y$?mv}2t%nz51&{*EwJSus1Bz3<)b;RAp#oIvUi zeib>4L!7YrBf2*5pNwbO0D_?znp8+yWA+?TEEo*J;ZflD=dXko&j3KtvD{EAbO{2A z2(cZs59;^GGEH&s?6jOvI9JNe;kgv(lNAG-KO1c&H19L;_GU(Yqm*fD`mM(OCKrhUmag< zzCiE2$-iu7K9zrP+B_(O&UlwH3}CoR&wr~#A5W}hRX@^a>1vKCx?Q>+pq%|bOucnf zl->J1jDd&7~bIuie?~6d%zR}L_O+(eg(0A3(Q)VegS{QMT-62-h&nN{LYFZd2 zth!pv+65BTB!eeM57Ndq&JBIm~t?<&~XDu=^L!t3Mt`0G#6zhF#U%1?P70a3*s(-qwBK#G3uw(e&!-1 zi{zJ;6}1zAXJBZQah~mQQ$`dV?x`0aDXt!5Io)#md7V3$XJjEOItWE0%Jkbr-Di|e z*BJL2&9{tm<1hRu-VT}~AELib%3v$ZNX{P&@i2gspM1$G`(2RoSEyUy+#n5Q8rdTa zOQu_iAEt~&EV#8`5sKmcF(g8#ki$)BcJA-fh+&po_VGJNKgt$D0H@@y@;q_Xm(cjIZy$x zH^L5i!A%eSi!Z`1efbWs=xm*}%mEUFaoQoKohe(A7h*f>9}H`qb@0yEUD5enS4gJf zaVSAf#?^J37pgyO=12Ps{@8}&o|9e|Wf<>U!dTSvaizhxDtmo%H9Ma#Ry?IHJ*F0c zXcV_Z26;NYblp+FB@1MzYiM@tVvj* zh7vf_ZoiKp>2@lhvVI?~a6XwG0?Wcb*ZNTNWCgvhFPr_|^=ML5!~GI=AOw~nsY#&Ha0Y0Nw4*PHm zr!WN>Jc#n&Kglqb*k)Vi>|*|!?C6%^^nJ0+DkVx2nb~^vlF<2m2bnsj3beAw zDrgwmvi1}I=DMu|$h}^SfA8~*Zk#DwFb5e_G0f4Y6lBp(@(5^|ja#Pdesr2|TD{3{ zU4R=mw@DveWlx(M!XZqxsYLZygEbPm350= zfLm}TJgE+F@7^&;a^33!@kpMV?_ZRI_+)nSa;8S|vq#4a9c~ z?q&HHDGzS%oAc?r@-Bf-cQL%L-)MHNF>LOEZkcJ`HW2ep{-#98Y+Z0U-`K|E<9~IQ z?`ow=BQ##f!KIdg}JW0eQZzRtlv9i~DLd3@eH# znir~)R!{hhrE8+Rg4Bda|JDEOi3J;%X-Z!l2y)BM@vvFX^x*G9n)Xn11Uz0YVI{H4 z+JSXUD%m;`zWN6&g-w^|m1Lj{J#`SsHFM?|z0n+xe}A1D`I!&xebC+Y2J{S0GbH{A zlrj9F`%UZNLvphCQz2%vrmw>-+DMt})?_ahP#uU{%G0suEdMJnE?g4x+c>vCO^`G4 zamJhkx?AOUP~tMEFo7jk0`o(SUDifrMdDCpF!HX75GkSBD>GW`BiEQwGs7OavE$gR zEcd#fREcopPZ$DXFVj|HyvqRdJTe<4HxP7B{+41xX&(cu^L#BG*#9gyBKyyUK*2kt z=C6h13d0@ zMMpcTcUGBvbxV1?w&gcHz>&D49=a=VE|-k=J6U}W)$Whh94rPDhakJE4m(7|CZ%t99-n4y_8TP9Fl(l zO(j8kBJP}=y~u6J|6oa@>by7qYbhZ0s0)*zUEoAdE9`{&WO1usPk8+#iFI1*AyC4* z3!KoLD))Sb)c6+E3*ULMNT3DGXl3#mBb?Dqn1~)L%}L0XQez{%*d%hsrjXvOqth}yvmRH2a;!7b4$C6bLejjn~{d`Ncy}QPhcVqg`87 z!gqS9bDa2#_{5Zimk$=-f`&o%{R0n}|1Bw=frJX9-VUMfHbs5&@u=&BP`CKm&h3R6d6 zLc7_$nCs+h$+}*wUbm>V&5q`qhu@s_?(^=?YnW&u-q*Ra`PItRJj~RL*o4xgc)nvy zEZ}{52s@~OiK=k7z~!aqQb1#cZNy>46?&1#bVFSG%>&`t@(qTcxO(YY)0y6k{1%J3 zh)})IBHUf_U*pPKlCl00qGV8 zCS=->lJs+7q{>tGF`?|%07)Ngrx2JuTsfq?f&BNOgsH5zw(<2y3NcXoUho*p(m%UV z?c)JXzYI*)1$hlMH&S<*)X?D(Qj{tRok@wVnSBJP6?G5CyHX0qsmvTznJhF(Joe>P z{#3^E+WjbA^J|waetd*>~vgRCqunIc}43z2@ccQp!F%k18$ zG6aHVv!Ka9uQ2e2CSPz_^BH7xa5{g{p->4y8P{eUC{$D9^G=v)pa~7$3j2L*JIoEc zImWc7sZDX7Lq+$Yn~mS z$Ndi|<3(NMBxU@npVt)BJVe?KRlZaiHgiEWPaaX=Mah9kB!hW7BL#Vv-5SD!P{X&Y z3Jvp(uhN9*0_OMKYK6Iga6Sb6b^hpJ+HuLaVq_B7k5`V(Gqn$wDC2Kd!alZ(P%g2> zACL|2Sl4A)9awa|kd?)M%OzYB`0%72=Y>#@uKXeJc_@rq_$tFNoK}%oNJ-$L?qL2A zd5x$wpVBKoC=Ac~P|#vgKyctdth*!`+BCu;wvZs%^e=D&fFJ%Xmcf9xbH{*Z*&g6& z?bBbK;;;k2RsuK(Wl}IoDh@sgfZOtKP%nz0j4djeRFM5+srOFJ$I6(SYkk4L5B&bl zyg+LnMl-^vu_{QQKy#<-&7kS*2*sHfJCfbnG~=GN$G@azO9;pY{X7u@ggYfd}j2(!}hNdFNr_b510?3@^BuP|{98=TG5ZtPO|dmE`T19Q}nU zBEzYH14wz0O>`TPE_zj28zITGupW|K_FLpGtlLb%Qg$F>;EqgL*JZ~TxmLA=-BU?J zkbv`X1d!%L*lr^vrETXVfjnzfgYmIbv$0r$ZF0-p4u_JTM6nTvrUcG-QltwO!Y)DuJR_gJ2#v$JO3lQOi>_YwK&o z?MG?4CK-T*+;t!VXbr|+6C&_oW?WsgiqE&lpP%pXG3l+W2EA~|RZO5YU~^AaT3ieT zAh9~eAG<=KZl#70HnXZ9*L>vNUk8`rD zG$7kiE@5a*cXx$uzIsp*HDBj7sk#P0KUFDJ;VR5IEM`4j(;pxI@Km5<`0872YMq1S zm69#D4y_FEMuQ5^pv6U^$9CPSJ-Do_tb^@KC8T;^)?fTk{QuZyKrGZnB0w~|`NXqz z4gYN~tCj=s6C2Q*e)ZzF+yc`AygFV|VBX7NPWXtFTZl$kR$$_(4isSj=pl-x?TYV& z4ho)&2`?B_m0a$K=35>by!E<$AIT_Mj7;aO+jpFUM!8ATy&e24rHR0#1ySBPP~v*7 z^|A1^#_&@g%9Hx?E_+j1`mbAC?x}4T>+ibHBTP-hq(-YH=DKfj@L_<^9B=qpmu+{D zu1Mu+kpQp57;0~1lu#RNKU+R-Ry^`hO(t@+X=CY+!)TF~{nGwuokK#9T>Ahq>3@cM zpfKhQ{VV>_WnOCMUfo|Pz4SX{XXmL%AZiF@Q_};;CrLY&0%4=tj9plA`5{9Knn22! zByT>!1UU~^y&E7r)_E#@aZ)et+|a|A!77+vSiT5%MZy$gNa?0|^FRc;Qq}B?St~TE z?Zf21sZ0#IvVyuIs z4GR`?JHGKAYW9~|p)SOrU7)<>E3q+KQp+6ngdAWN{~G{_j&~@?XG&~gc>5ASW}?pf z`TxK*|$=SJ_x7@EzIp7Ju`p`M?nR`q5TE14fb^AFb17%?wIv7p3PO9L3lFhW9 z3ZI!_%8PX45l0#(PNe)*yY)L%Ui-VQNU=|fZQ+>v?l>RzG5`qWIC*B)b#mFMQ{;$N zXrUa^K53GBi?!7NdLv}&zYl?T(}fU7J)(alCCyf7*K@klN5;yTZ!s>_7!M6EBSR7} zY!zvZ-ZFbT_~F5G9T*Dr*l^43?3{KZDp7H%bt$m`LTYY4PPQ`3aostk zX(pSzwB?zBV03lbL_hg60Er%v&8}P6=H5{qs-kbwH&c7kY;6DM-Nn(@CmHqe$ii+s zDjJxX)~1(AUe+&LG-R73#*`xr3LJf5MI;G~<}vxE92~@2QeG+l-~Kp=Im?%9ngmz_ zAIDY;I4X13L!06{Ryb4LoLw%t8{fqJGNa9MUkS~e!hcXq&#w~<022=eVjuJb6zH|( zp{*YPw;8Q8!}v>2f2DuDhgSxE&6}fF2|Gz(&gmN8i2CZH{}wo_ zTrl01vO`J_sLRg0=n=94Y1GJJL6H~F6la#fGxnQ!d`zgk26<(#+gTJV6JPLQ&stJ8 z-0obRT8>r8D++3VFi3(vL*26=u?y0RK4gh_T%9xun9-*|pPUzO+KH%nEeqSe{Jr=s zpcrl5qe?z^R^u>c`9=@IEY0pE1^Nf2Ci=ET3k&2D)Ye=CdY{RFdjLqQL~|bAl_}{i!P6!{4sSNOCx-puj80!mAo9!M`nfv&6=WYaewWWIaRpCAki7D~ zH}Uuq9-)vfaHi~^?O7oo?m>vTUmU)sEx(Ir^r2<+>%8C2%0HK9srz-e~%Z)n_?nJI(yZ$_EVKU&y-^`2Li9DYvsLcS;;6! zW0Up?VqOR<5(Ef$SrbpT@rlHS<;SwJ9Mi=s6*!`mDf+cX*aBB+ax4cbLkNM2UBr87+=EVS(2pJskRYo9!&!93%uV7@+Z-{Z_n<>DsDhTHTP@}*L;Y%fAW8D zUrRpLo&vVvF~tVXm`zdz^xw)Ak}ak~|N8G_J#$YkR{C{^_Q+)8SG84%U9or_>E5*L zwP^Z}aG;%)DoA|%$2|kLrre`O*~MCvymSgum2{>CirR2y@5yT>{C2@E{02B{5Cpus zdtWXM30#KOzpfs=bG4F|_Q(c!fpQe;r7`5ANxZ}$?S?HOCTKRkMw5sh7 z(lr06_38EhU7a$4g7h|92GtpLPX)L(M@p8&&)1G5RR3_j1{?IRMy5f@G(P9;6p$uS<|GmMcoN}R1 z=+sbTTX#348@67985zgWG~p?0IBuQBkH@W6rc#iQ8#NI(9473ezB;toUSo9e(k0f z$#nh4RN?-qY;08Ff9^8g-RR5d@foPeS)=PlfN#4?a1il9P5l3=DbWwBJD(u+g^!F=Ixmk)ztVf^2$0Ey>l4M6^&I8TeD==^Kw4}4CWmh;1$ zck4fFhSq;rkN~IL*G7P@5;RrypnmQ~Owwm3g3kCNZ2pe5SfPJzShTLmKjEnWN!lxW zC7`A_bCXh1)>4Y7Vh`@0)9mHu$9KwNpBAS4pF_K)xTA}~)xq{WITb}^^krvWpn%66 zxQ&NBF!0v{#4eQ9h}Iuk#3noSN;X&*ws_xD`j~zf|71HC^0`BAlSCxIh4dHU@J1o+ z5iN%z0BF~{PA8{%E*X-@QwS~YKWTnw6J|gzxCA*S|L^sT6Q0y~5AXZ2t8~Fd_P>l_ zCxcyrUBar~;U4Gnhc7q28G%Oq)BqR|h42HPH?p-~Gt9bt-VmC^7`Qr^UC1y0a*B7C zSn862%ihWP$wDbt`Sgj&sX9a~^!`$&k5jiEM@y;!%Oa2gKOf57b>x z8-Cul-V6$w-I#snDFnlJuPd4wlJ9C>A{Q*EeJoI5AX)Z}fnUN^P`rfe?gll$#A+3h z(|%j3Xr^se$Y28%Y!ku^ibyf{iZtAp7DCV?MG{BWqTnLX! zOMMQZ^lb`k5{gU*jfPBt8EPlM)*eB&h>V1~rZ7uobKg5MAonb@{UQO@1Ohy&fpvNx zkNs(7Z#)LufUA7eF|z+oAm#2)k|_eg58diXsi*L+@DvkIyS;!&{jQ+hB+Z~)tm(4v z)N$9Qxkb+@J1L$)%`lIyyX};M_p=-W`HAo#Te6`wits6>2Prcthg>UP{DLE6s6-jq z!f&mJ{t2?6#n0@-e6yejzIBAB^h*j^#Q?Kp$>QUm5jWMO9tCP^rfAkqjDy2CcvbF{ zv<>eR(&^1Nt;mN#|Ff{q8D1i(a$}@cvI8|P@f0;wTS8ux&tZ5H8m(DAZ?cPI$yaXs zl}&y}-yzM=HG!S?1i|o!)x8>5+6OYIfnRYys#X}}seDlVi0V!tODG3a&s1NX0(wz{ z2izNbhPn^`PlrYw5O-A}gvlc749rN>!z%uY7p?@bgCF9xtW1Q+l`2Y{SNnPYZIpeE zN=SzO9k?UNWzxjvHA5CGJ&;6@K?=OgUvJwp*NN`P!g}ujEtp z=cfT^!M}mXgjNp)BkkAO2+rg8qctN)fk@*o1&qbadW)RsKq*ey1QP|tqq@y&ZkC0+ z7gQB`6uzE3^>+Y}e|@fF!jRHG5G0My9o>`k^nV_*_2(3mdg|ZmQV}ybKv@CrFA-2! z^^^#fCQ4(mYPly#^jY@sCyMjD`uEqF4pJ*$$=^dkyrg(44`6v_=?d2l_~ZZQfZ(Ov zhTrs!0oC7Uk5ayM1|#nR(d!f7Tf_aX7p(U(2{YIX&$V2wQwhNL@KQ)jIK8#=Z&{ke zZn8^-CgT4iZUbZZczCbS3Qs28+V%(a-*6UK7hZ$4poQAQgHV3OK~4bO?dU^^m-5U+ zjeVKsn7rm+EBgQb5cm>E`GpZNnMX4OFh_&@CP3p$l*Iox?SE4T4L^2J0oYpEMJ-%e z3jZ`h{(nO-h~?FXT5(P>!FPPj14V=Qc-F73bO)Qtr=ESA5e-jhgxmWY8@O2RJzsZj zOMC>&W^k&u=n9hQQ-Sr9-*`Kf=cYUyJ5nSo2yQ( zfH?|pUBG#SlQy?Sq+{y%O9JQuTzZ{PvW`_S;CF96qW+sBb9G0(If^KMbWDQ8graYM zLZxffn`C{)vaCZ)`X{537Wc!_$mtLMn4s}g6(eUE?}tN4nBX@yOKX`+Y%$k%{=V`j zlR?dVO`VgCmNCNn7m1T7p)aPzIKCMRp%2TqE$-ELXiqj~Uu)L0#o!^pnMKM0uNzjw zgMX=U2=KIc)Jrr5Uw6}kuhAqHBr&Yl1qkRKt!ijTCsZ?8X9rr}E2H<0xiS_PR;t{vPfG+XT;WFXMP>NRNS$G+CNa)Qfs4vtiVhcFX(LfNNixz0twbM0f>x3Np$ zJW&zLe8jhC^)UDKloL&z*y@}O#@OuaSR#>P0>6@{qBviC+LSM?WOd`d*t+|g=2frk za$`n*UwdyN4e3ntD%_7xOD6Nr7%oXpZBo8j){Bhi6~9;B{PTJ$kJo7K2x1r^TK-cvoEUi~d6Ve{l>WC54+UH7_tx5iKxij4l(vk$ea zjr!Wb${V!}%e$53eK3XFcv?cn6m`uq_-;tgPM0=oL1A*eNFevpUEqz?&ZKB!k5$ns z^Qn+4W(yM7t$!SSgBq{_r^eyVLjwNvc1@TdC~OpVNl&5eykq9hP$?yY?GSE%r8aet z{feEaLVrcMQFTUi)csw`!_tY(8iaAqUryY&h`XyY0yp~C^7Q3uv| zZyx`g74)x@{@}<$VBt7P#M1KHY9j>Hv-kvyY)3Vl_%59f-<#8A4>kAxljQij2bF#= z?O`I8U2t^%g8Hb?t!k=`#iU5$USL=*HezSyasMid!<)2Y<@)+R^;ibByT5-jgFgqd z52#SnmJYguV$ZzRLXzyRgepM=4A%GgRH8-LH=DN2zxi@y)u!8lSU(u+tvLf=*N^j~ z##>hXRcx-QU*4Xl?Acr-jnloS&XKs+&~V!t;zn^%&9wi%to4>AsTNHSv*N_fwu(~< zr5-PQg(0f*EkX5$-lHyfhbtJvz-)R}i1 zwHSb=0-p{&9&t>+5>*jXujK`az7-b*Ei61vuuz2-2tCw*y|cqGwUkTBXzH8%Oft*H zPzWPxTRwL0f4*f<%3;_4-7WuCO4?65UR}Cp+4P^O`1#p&Z3WJ6Sxh;*#=?AWQL|h+ z!8rzY8`N90*2A7XxDTtQ zP46Jn>EjMESdWXe#-BJG?XVy`4hI_AAxuo>&S~&u{tidk{QPGDz!=6(Px9m~-RLj! z^k$G*>||rLUNwaC*{XW9&n?Cf8OyXk|8qzWX^jSgf`2ZMJ89ivQp%pgWAHlofIzT^+#f%0Y?T$nZ8mzd!CYW%KaTx zp;+2jie}8HmCBDL?>0g;s~Jh&;F7}6YXul2wb*NZm$znHM^EPh42#*_JOUin-@|uc zEzq-VRbfKbr*+uUs=^0M3+B61l;BnsFVdHu_;&PoVgG$IbWo6wN-IuD= zGLYG=V|RQ#9nK`ywk9s1B;wiDFDRMT&Uljqhc z_I*AcVW(W`7>n06Tf%jb#3PZUl7SPRL@f>EIdm!RBbw_}TXP}x9)0-oyY@VPdC~)>; zuYvm-W}jf#Gw9>E*>ugi&a9eP9^jbRIJLW4z&$wsSubOKA-k**Ci(d6H}^4>6Qk3w zo0V@W;ePXUeb@U{YEA5z&9=hr#gpG;FxcYOzEgUVm`L@hTdntZljjpAXoE?Q#>w;x zKkftr&E;NS_qtiyaTgc@qj`iYT>!Kv@lBz}Lkdwj<`BcIRjx?H0?Vb#d?5pSZ_@#ru4e>qWSLh{1|MQ{8 zhL?=K$eFCZ;%`{0t1&Pii+|zRP^8;cv+X%Na_0;7s+Oo(%AQ-5p`xr0Ty@ZWwSS#^ z#1Mu%jMk@NOgy(eeyTiPuSm0EbcQpBd@j2>kj6v?WHa*yRx3`zuZ}unYi#}yH~F>` zG5Rqm0M+Y-l>!WXN zmsL6rR#9XoVeP@fxKs24zuJ!lF}t(4nQtOszZa?tDe?8Ge)cL?{OcuYp-`~rY~t2+ zb@kcl`q&nzXl^2!U_edBen&}hdun$8=Z7qFAziLc$BF%N-;9d41oTMm4IyHE=);8@ z#A>D%o}|Vb3{1B4rd{pCS9}okwh1)a{jFeGAyHc6w>cN@Ih|~PTogRv4mBKBL@Eu< zpjOR=ds;WI^+)<6KbIS?8jRbCU~^o>^muii2${yC@vDMVNV`)Ljuj|?cEtf{X|p=k zOT4VKofjvko;*5YaV}U4okiVVgZa=^jO3$*)S;)B=V_!*7q^n8-|MV|4)wd<`f=06 z$Pjy$8g{~P(Mw-#VuCH7@c4*c>2;1N8}WnCBOyvNU$!JW`)M`Yr4f>0@Kw9au-zbS z?7>j@K3u?RlBwS@ejK9ex6_vDPoL*v!L^Hf>h5=r#4mSe^m6V?x${LZ>Ew6-Wr3k& zGxo3R5#coNw)76ym=}-q%Y-X)=+%)pm^4r6Rwyl~O3)PvYrIiJo|YeWCY>~GW2;L$u4H$QI&^cd zRQhcgXJG;EG@`&LRGyaBz;wiuv4~5ItBW(mdR0- z`iqW#KXjnkh=`~P$}12tPfuCx4&-e@NlrgF%Y9awe$6{IIkh{aEJ(*>!WrphTh#JN zWu_j#k++LRZH+maIu}p4D3UaVn98aSk*Ssj2e_@fbl-74GHU(}P0o zm9SnXQ&+izoL3l(f{F0pVp><>v>Uo2!~&mL=_@z4pZJVX0u8;CfO$a=tz^#VbY?C{ zXkeFylsMHvfaMc==t8B`kY>VT(SW+C)NoD?`%Tb`m8oz&%5}=({g$=rnsa#F`Qunu zA#+slO1^;nx?N0Si-7BN+R?P1qKdwAO3^Dsd@{O}%_pG4zFBm2E`{GP#d~W0fJW2= zzIBQkVxXsO)}%U-bNuCl3+M~)0|o=IbW>pyWx+@$7fiEF% z6>+lW^$mdhPBO*gFKWe&tB!eBZ(5*Rr;3Xe0ncIZt*Gu1$+}jTr7}q0yvKY`#r-k! z70oFy)^tvKUkf$0<<5iF!-;!H>g(Z#1ji?3ufuL_77-9zjxr zn#yf@2*fS{n<4GLzkpud+o-x3QXGO9Ia_Z#XEJ@dl7cvIftMtyDbM!5W9g5*rTuxY z4`a0(tA{YUdoBanc2<^E_={JZbLn%KcCxcJJ-+2#fSPU)Cpaj4^94OgFV=KjYeVE2 zWAFoV5Q(L7oM}D4g=jCnjftKWu=*oaUx`VFfR7egE+fHfZJEb^wNKH1x?8dNNuBS@ z+e*ghFOL!kSWsIG$(TQ4-V2$!`I7Tj&7qZ~H$8j4c9XjPDf;Fu;}ZQ=DVf~WQgn|G zqj@|uObE;D2EOitB|MKq#vP>Eg7fA=qE&0oVvu^ukc!gRoTA@q4FFf4l&071Vx8# zUriY#rn#wKI1b+-V%>B@QXRB${@E(awp;fj2ZKyZeF6TM^lfmB37Y&10vX`S}m^G?gj_G~DtSAfUpMV-P<5wGU{$Y!*?kHc&(_}F(RH`7y3 zfOTUrnalm&={n1N(t@`KN|!*|-~7^obN>coH`mg-EOz$xvKv)x!Jql5&ox|_=ez&M z{Jo5}8&+^YoV4BzyKcFjA2-bEI9`~xD2 zlk(Yskc=B^*nxAOW}FbesBIzk6Ft7?9yE{ExMU{P!!P;ubCy>7H*zSeEyQto#~?nc zxIVYj@{`wvsJZKW>K<8~3Pe#Sd@gmu6S!W-=z#0>XDq7r^J(TI>4mJZ)HsH+YZR9@ z(21r(sv8iP&A&{}DR}SyExB*8{Hb2^>8^ad7L!=pBJOv=IAX0?iKTi!X$Z3y?LFW~YAMZAIt5|J(!|on3ur z6&Y-lv_znoEEI+4afnr_NOADtRzRAseJaX46Xk_l9mzG}Vj~jn!g8+%^IEq#4J9&@ zhVHBG`zN7sDIxrc9uSUz^9_LH&qb1Y>UgUxwRt(ex7hQ>?<|d&xHX2u*{>q&#&}=* z`>)f!w->Iv28Zq7a?d<$0Qr?GIuAd-%;;4pd{>SIk+hUhf2}mNnAX;c_y|hV{I)jz zBX7_tXA3Uk1{d$9qH5w(0dEg8Bq>4YBa|@5+X8Hb&$^o>5jY@1Ed3y|*zjIC6`$$&w1mOr8h& zMt^Wz9E+YVD*6&Vt~0H zjl}r7+ff=Z-0N1%BEIruN*?P_?uhE|f?LLn{9Ui5*hQpA9+Rausr*7`A7Tp3l(+7V z$t?(U<43N3&?1o^bOkFDp3EovR-5%$kQ%Odm^HtA{nF1?sol2PedR8%2n>`zl>CuC z9q77YHqMzDN8yf1+X3`{q5yEG+eGOIxT(LTx}2? z+F^r2eH#AL2JK-sMMb$|Rxu2TS;`3GN6gpPb@v9aU4;8xR`1%B$ST@i3zhr5egED> zXpV;2xVZ4d@j6Tud$Qa`c3$@G`lUFrI5=Gv@=>Y{vCueZ1)xDEcz*U`LJE@XRP*UlVfNUB(#nxaKU zo-z8B^2*$=dV9amlXix!^q>2DuOmySTGIugtXFosK58FQ$Z)6*f<2==WrL$npGY_9 zjof?UVxsh~OMAK*RUIL_XMH^kxEMpekF#~#0$VXQ7cm5FUXW3aB}GCJ`0+tx6Dx+Q zYjM1psBEFm7%5AJJ4FQ9jr~mWmLzeP+FBY^dXO2NQliXbLJ6yJ`M@e^*Wi*w4PsEK z^)4BWP7q)MZzQ%ziJP{JlsRX_85@x63Uj{jcwfn1Pbk*pdU%JA$X%e`?`ZA&YZmfb zar<7}(kEviTyX3@jmf~}->({f_WBKp_Z+D+5BkI2EOhu%ema+A#F&X+3=t(#apJJW z=Z?${yaK=$mzDLqT1yU>J{Fn&JQ5P4lEWmWfMM#$u`DR>t==swbh&0>dhMqwtFHny z*{~29Bt>t~;M9<5O1OlV)xlc+zhmyTF_*QRtX3cqejS4HMH{+7FI>?XA_8u~lDotZ2pK z<+~7MSIsJ&)+Cw5TakNeF}4TJxf)Xx)r{`jZ^QJ?xZX@&4QJ1);uB3DA~h|`NYXKj z;xTG#;M%Qbf2LJ6cu$e|QX+tpggi0lHrKo9seLh*!iz;mwd+@B8crRc<9UZ>um4t! z+x+1vJunIw$nA^SSGyWGznWUeX#9Be%;r_yz_7H`#fB4afq%K=<}TKUsSeHHuYY!~ zc#}bk@b#hht%x3@i@dZL^e)N!`Vi1$H&?k=~NSI{_KE2FOO zotoHM3H$mY>pe4@eRi);KP3u(kek5nb(!+!z40n|>lkRzw^(DYV80gLIjZyiDr}1( z^JRmg*WgOfyz?aCnb*|>tk`=;Wy>9gXa~!d*tErCy5sM zLKgUa=63W#E-0>y0kPW%PqXU0G{LSG?h|%?)~OOT4Q}h@kg+^P56S zkvGnnqon&{FE-a%PuiY3Hu$HkGQ!WjCKJ>gym$kOtCn68!aH8!VlOgV=sj!e9=rdl>FgsDCAQ#oI zvwqws`faA`^P=HV(_b)^Uu26SRs zI}=$|k~K-gv}OT|n_=2sb=PTBpPplXfZ&Yo((EfvfmPF7bU5Gl$S5yXNzeKtb@kY@ z8fH%=pHVi()Rs7Q8wHn(_%}|hx5Elr5(dTgWc~K`-!x_aQS?mgySP= zH2fTW?aV7Fb>w0Z*6cB*YWPFAOmMj|5cZfnSzg+nb7VpIh-LY8r1Jh8RCU)Igy!j& zkM*C)G})^9xYtz@-(#Sw;u3?FlgTBP6CH+hY2QA@rU(slm<~hJrkD9(4c$rWxNJIf=dCNY%Sm(U zy!G$wbiv|9v#(`;P|sV$bXtb?cp$45J@RI1C0!uXr++QqjP{T8$>`du-F0-Ku~82O z&Ag_MJ1wpEa?qNmzbnXNq~I@d9YtR__-5)viNPL z?Z|cAwDYyYmD*8rFcsF{KpF-fZ5nX$K01M%Vy%w&VbTwrP77Ks@)*;{xwB}Hy$J7ncxsFYT0~NPzpX zY4*bG*m^tKYR%sDep~P%59`}aP*=GYDy_aaCka)27W0)FWu7V`!1*&wZN|p?W&+*Q zlJXC<6MW$L_gcZ3vRz5kW5SBYqLL<4^WiLZ_^V#+nwEaGYPjHm3JE7_VYND&l&B1 zuu8xlgIL-m)<^o=cpoq<^%RSfeDyCiRa+LT*vf>tdA}Ajl6c>1%s9ZfO1me(&&&wF zn33coyKc2?K41qq8J?iF?zQggCYfCOYzf|x<1u~_2k+1HDLd+2*IWEXrFP?dWGp2n zQsq(qrpM&)@(GfW{A_y$^uFU2<&}Of}pMh)cFQdmsHN0_`84NOr(a98+jT zwBL(-2Twr`kn*Hbul~%%C}@pREoCPy&1r4xCbP2(OiX>&4c;7K(yU){{@wFVxosND zi(}Y?q5m2iPX11lqMud;(;F>}379)I{HYe-+iUSFT>-79S$T8#l<|Mhw^S8f$GutD7AnhG>F zFnzyF?xnpP`smcONb4qoHM>uuW&i?Y^`X8SHL#)gkN`_JueMWN<3P@kE0?kpH|CXA(3>phXT7>6QEb=p&OdnT^u>K4~vHf>Dl zxjoSb8LYedb`_`Z(qryjSzfg?E6K@g#-upV-t9Sod~gip447>&n4l%9s790 z>P+3auc=AA{M!>et+*&W?uz{rwVWvKR82KUcSOG9k<0L2Sqygs2H)8Cd@*0#r(WJ{ z*o}O;V7y$rU;#ZNuBU0^nx1@C076wy*Nx|YdGocTO zykwuBc#h@yi^z8kCY6nY`6WR=_-Zvs*#Hd2vFhBnPEV4vC7R8r!W;)z;K{w?`Z=Ag zJrbBjMVaw=VY9oFW6ADH%E;N(MWb5qrlj{so1+(GB$88jSJVruH};yh(6qLA@m+7j zT9E%b^WA~#Bwfq1xort{BL^WQmd4YhmByUiz&$+5+PGE8v~5(}kc9TQXf7TpxG z-5Ys|Co_G|7%dr6lIj~xN%+Co)=eL~;e~_9{5U>GeVcXo%8mu0^Xi9LNl1av#1oAH zhY3WdQJiKvb5b3tkm@_F0M61FZ=lil8a}OR%La5&q;K1Tj#h7%ri(Mvmt_ zr{k2{79nA2(Mb^D-l=!wKMJx3bZV(xPvc}7(s-n$lw|wn^I4V+j`}i7LOKPHH0!2O zDlK9S<702kinqQiP8%5k>996Qm<()wq3%UL98Ab~GM|s#KsC~#3va;D*ygrK z#EX@P4|U<-}5_(H1ZLysJT_nD0tO9=mHMYV~5J^Jt9iIyfvc=5oi=l$DnJ$tK%voSDMPQ9pYY=P%lqI*1`;>%vvOlh&4$MY!mWBvQ``0M)<_e z9BJK_`@0x>M`zI99yegNvSoh!B%{{!w9S<=33MXs*1-wL>)r^*;!S)uTvHk%06wt|Us+%flA-FMG1U9+6e@%y#>7@1cXn+exrw*4%f%NL`uatozm)6s{VXuOq%8f zzl2#{fw__6>6QGt($l>H7Pot~lc|*QgY^lc+KyJLV!O*8&OtPu`licIy|K@$#Jf`( zqXU9W^@7aA%$L|MB;*D{oU0Q7WaNg(R&Du_Omhjr7Ts1qqn&CKkb!aDf_PYhr#yDZ zDpGGMF8-r47owN6^w2yaLg@tjSaE-u(|{eGzL|OB;2^yd{qBQK@sbOPM{n*e)WbgZ z`cg=%1+{xr!-6USq~87-;&GXnqMURyaVq)g|Imbo%sgDAJ8={g)gUlYy${K}STk6`s1_m3&>3r0J zxCr7g7_ivRZMuNHl(#X?!@!C*Kd$7{8|p=*pbBz>Iw$eh3uPB1_AE())yNyVv)+~@ z^(_=mX>2}o=1(9BYu9 zvUD_L#T}KII^1v-jP+8jEXi5VpA-2ov8JGcdQ-DD7B`erJ#OaM6VQyZMH$;e>VWg` z{YM0^bNC{{LXu+>fb0e(H~=%#$`jF#B@M#yLxO~tR>>SLVjhR}EV=5f%lMgpO7geKZ zn4H1|dJ|GP;WiV259JfzCDg^cjdv;<6CYu-CZW1zv1TOu;f5T_9(4(y=esm*bzZp^ zPF@?!?&k0xz4r-HkzMVie16h5Q;;ayP9@?pOWW)6wF2y` zQ@+(1%E&sE%Kd-fG=MKY`U5S zS{J+ENvk(*$iT*{YL=vq)a}bX0IMxR6drAips$)UoV%^=H3H06vFm$e-UesSmUe~O z4Q86iom7S1`)*cpAWvVPUUj<{mG!4x5zSj=RZQGA#cTX9Uwvw_80ttaw}_{=o+9BV zPD6azeBu=(z?7nPWADPSX)4DQoI z`Hw1t>&i{$tNBrR_k9P}Gu+u*_2L-oCn8rQoi0dQTrY_Msj&*+-j5>Df;G{FlW-Dz z_LFOR!Rr|q*NyAx!PykbSt_ndc^X>5%v_J0&)06-Z&%!SW`DF5KB4V@G9y!b?=o;i z;vJOo6$%y-S6U^w41Rhwt(SHNp^|s-Y;PnupbZyIOZ2|8$BD<_M@V% z8n30VPO}?em1aMp@GOY_)g+Ox13he^FRrYcYM_p;)${U!q@*xav$l6p z&(4#_iIKin0-l@L_zE8MQ>Oe|H}n><2V92(!~y#^EP3@ls%sVvYytsUZKWSIrx;&A z>rg|#;aS+Ow`cjA$-j3W%^xas<#j#TY2bl(t;-F!hj?26X5m>nE#-WrI$pvz`bxps zv$m5!o?Tc};`-h!)D-VO}B&%&BaEA{_Z7BMI6d&tRU9YQj z-yopZn<+!v>yHZGh|X*7Y3a$oru+bphCG_(7SJr zcYB?6n7np2OV-RZ)4zGx>;)L$w?)TqRGyK! zJX4XTvC(lUad(Y)YUxQ~l6!ivbx;2uZdlD`lx4fV+}zF+mBJF&Kmr2#%Yw~vd}x8r z(Fn}pQICk)FymEXevGZLb#0v%{;FSyFQVeW%3iq6UiJcDq)8)Pd)Vf`)=IROy1TK! zL(OEDqv}4dC?7ZDix4-&`*6>Nw~GSL#(TK#<0iWdKgcu^+WrSC^FI%d&?&Aw{XRuy?(%APJKiId2ml?+yV=m?ff* zHyN0eSpbKfSpF$QD;q2b2@ZAu1 z-CPi#Zq@rg?dGdk8T`^}Z5oJS)>9D+#+2uA`rD_=kb3ltAxjamjP|0rGY5>$K#e0~ z^u+GKDiN*S!hMTTd8<1BL_bH-lD06Y>|&q>9f>82G17 zBWz2Q;px(b7xf=duy5Q(;WGeU&#|;hjXA~LWoJYvIi-RJGM`}{&k>A$T$Ex)_c5yA zg^eaBMc~YIv0QRPeDi6YaoVea*@<;rCb&QrDTnTw-DBPjH+P<+A4?G^Z5ZoD7sR?n zrd#+LV`YGQdh&c2C9Cn##=OKzf)R$9s*T$%G}sRmednudw>2GN+Y7fX6V+fMibM>j zZ{)(c^=$cMb_gUR3Hw8F5%u?~E9Y2x>jp5ZzHygx4fkifD6)taaX`Am+&>J*OA!%+ zeFO5EAcqE3B}vVv(fh;;X`VOfk#fnmMMfM_9Z`sLXjl%HDkrpYRPCadYU_3$VW)QI8Dx@@#VRpn znfbu=Mjt=wn{f$xN5jr9s=Q*SPnWu6VupHJ9290VVdCS!JFJ_T7Eoi_qy(K0ZybA$ zwK?Dvo4=FQVa}sX)Nk>DzQU1prADduiwiR1)VpincRuJ0Ziva-2d}fE>fA0|Pn=`A z+%;&l&g*eVc84MMP#a#PyKysC(r&xPPnD0NSt)lJDy{hKdLW+97G#gs?Ciy#Vu*;6 z9o}YQ-C1#EZvkmWR3n>e)BBvyvl*4CJS5*XT-#$jTVH8=@-w96!nSkwRyg)#iG}I* z`~eQ37e(DUjANWUExaUgrE%AqIhGB9l0C!A#|BWIhK(2MM6DZjN7>JyZzJp?BLLNf zJe!VVPCZT2-mf$N^ldc8LBvNsGF(lo?p55L>`oo=d%)l#oiWwZhJXiUE?w$ z^7DG9U`TsJN)h}m&xZ!IP|$X*dUScXQx_I_)1=WPf4H7lf3ce^9-+F?cE0T#P(`gj z3|c3!=yuz&uCCl6kZW}sq?Q1v8-0U{Oj7+8)X-2hY-ipq6XL7iFG!$%h zY=>)n8C zumShcnQ4INkJrpG1s?)&ZOMyJ4So`xgfKp&ev=Q>#>^`WSu|+A5Wnl8btysZwA91a zYx$ge_T|Q}vSEi+JVh#dYqi;RJVkxraUi8rD&V)G=sQnUDES`iZ`0-v<}RNWkuCbi ze~$ZUH)+m<6E&mY=L_FljWTGQA%aGYyWLfuMyIGzFJfp4TN-=m#$nbUt>M5)FeAc@ z;ZX2o;d*-CqNwF|0~8rAZ!#Qc#V$9$kjuLi?0-gHfF!cKZuMBZ&R>j)8+kZ77R3Hh zL6TW=Y(Co$EWh=wJ^OhuNy*gHx01BsGsKoIyn-ZUU?Pj{Y95WdHTaD%u?Ux_## zxC~3mTbT*Ka5;vhc~^H<+sH?b2XQEFH-zO%dEO;-Pi+Q${{6$FXN@d;AWRn9g?0@; zKvDOaO}k3QEK5MQl-V$@aSe};4=X$Kz@#F$w2{l;wDc;Vy~?AG0aoVUBj9b1u&lIjFC^`t}Sv>tGC5!;C}<#3<3x?WT| z;93^vZ|ry)-!R8Tssk{qiji|DR_}06-9r6mWK95mbklEWZJ*0xeb8duPwuV1vklhG zpw{qDEJ{28xxaNo4v!Bhh?U++bR|px^=Cgm^er;w3!Ax*5{B9ZEV|lCA2TGl^ z=1m~(xA=aLkP_=IO9;BWR)9`d2aTO(7S_Y-KDzNDvvZDxUs&6Vgr=@`bLd@iH&Alm7j0Inhh2w4d}r~18ZD? z?&U9fEhZFD*PW7868g?Qi0uBu!!ycxF8kS`W`ir>NCWvBUcL-b&-0^>cy^J-{iCad zW)YFFp|kL%p625FOLDGeI~Vdhq`?*3QZUC!uj{=_lRO{u z(r%VFe;;3Ce|wtBR{yf5aqP15;1Dj9I3!QOu~df(7kLchYUnj(bSf8cv=XxKfk9hi zDAyD+E@k&ST#J^jxS8gOxj9QUhO7?V5i;M(a2l9JjvA9>nzfP)dimrX8m$LpKVg;G}QWczKeHotDXgJ z$%&nlUTtU-nen+6(hq*@*KsE5xpFwmXm3-s$D!A7%DisfrqN0jG(F#>Tb{Ij)Y}Yi z_x9=RWjFAUe#fJJAvv7fGnKv=KbW{>%{0hc0N~q6hpcN<3a`|JihCjE7uHNNaF8y$(42^<3$EvO#nq`__ zd4vT3_*2cx?Ug!po=~i|2Mb6?>1gBv=T6yPP3%TIg*YpuYHk%sy?fixMBZOAGGCpG zqPUVx9Y{ayxLOOKBo)%25q+>%})Ft{3_ewlfI|j<4HJHogo-5?WNK zBS&hZFzrg9NS6oaZ)ImZ4>M4H125eKJN^u`?w(Vf#A9Onf2%TW7zif_N_>&hS)Bm6}dIL>C8VrImt%~Eama{l<)eM1(nt9Z4Iy|qGEmhG1w{G@+zl#{qs_}X=Mp)5RV!dr` zG;6atbgdHD;8?d|M$T(xw{Y)JiQ(i?olV6T<(G)>y+K?J8dL5D!?~ECX==7yQ`@=Pf~Q2c-flMI^KK})^^bcx`7kOAgN%SZ#axSM1zi@`hn>Ph%}7XsgXg~DiRz)& zoI9U*Z3|D>8i^@|I|8w?N;pgJ4SG3jqc^#PuUuVOvX#n}PVB7Iw6Im1?SB0Ws@FHv z_FC9|TO6irO1O>na&r4?GnCo;Xx)Aqkm&bO;!Lk*FLwu=c+IpEAE{DZQ8rbVm0kC= z3S8YnZ-Ja$$hy)AeM0LBFx7dUd%GifwdlDpeTc);+v1 zHP4Y~BG|w>oohJz+$s|^BVeEZov?o6&xmH>>AEd6U4-5I^EnD?wo{tIZ8Dr->niH5 zpW!B`UM=JuXQ(CEo*bDp#xb+fXfCCuC%gl; zoZ24VqF$HN+Wt9n!{h6t2~1!oc3tpQn>Mhy`h_0tjVNyVUc?X!eDEfk5#As{CD!rVook;j8#FRC*L_Ga5 zov&Ll{8ysKm>j~9qvZ?@<<1uo z($H_U`WM83X%>X>1k?IWZWfeAyBU(^^;;?lh!Meh=^161?&9IF%VF+tr7{h(jVL^o zjw;$w3eZqJcxVet&KWL_waZ|bRc@D)V-sw>v~7fXx{)jAmYiC6yCh8?Obl^gcFjCP zt+tP>PM3lA-?Ciu`9Gog=O>}^( zaBJw&_=vt9p!N__ZCN9SvYBAe3Tsl+EbIefJi)p9W8zM)_$3xk@?TKv_(s9D*UeAw zFGh-z?q>GjGcZ1}L9M;(QOl0oL*BYlK>p72l{J#&4z8v%KWHv=h;Yr>X zp*i>i14FP+_Jf#)2MoJAJs=MgLp1R6aTa)~jUM4H#F>5T{Z6b}{EM`kIC~NE!u45_ zdtvv(Snh}QH%_lQgJVs5k8cI>P1Ntb|J3rZ)5xpNIe z2)LyAqt}^{ikKqbtBODU*}wRWzSU8Ro&r*t3SBUNvst${q<5`}&SX^H75f}y!}P@E zXX@X%RlPDDuZp~>?SXtE*Yj5?e-cl@1iyU#4M2b95JstB1^(_FFb%VT$SF^2z~=kFs$ zuP!mr2V;OgsU6acm&$aI&7ZZ2v~DQV-ZLO)_b|UEv!NC`Y_V51ReXcB(E&L-VJPsi zq-V4rVzv5BjYEY6w$P;{k2ZdTW$&<9S<-z9GfH}z5$u7|i`nnNn8opRe(ZVA_s{xm zTHD0=G7ed<(ergZ%rHXd(A?^Ajllr# z4}#fl&SCjL+O_bez1uzSJxDn1FUp5-DAwi9yBLi3=o`CRj>!^rNufMT*}3oDgq_3t zgzS6&_+ntRtWDphW%;&0dw|hGZMokm=C{B7`6m`W>fa1be-Fj@0{CLb^zY-p^~QsZ z|1(nxde{HgA8bHf4=bnbe9#2+`(>{8=mgj3#Q{4TZB7pG9KFm8em3qtHg0MT#p0SM zcZdnkPITd6=W7zbj-2&=p5C(6^xcnJ=Eg#QR-Ac_~n|U7dnP)<*t5m5@6F?++n?r);VaDF#^Gx3^bB-6x;}$OGYPE%z z24d_fgYISNx1!Eca6->Hk!YG6!{l4v#wCWs0-^fQ0Qj7d_eFeFE;)&P>brG+G?NP? z>u=iCuY3J=#3+qNfC(CXkii&h=xZ+_nf^H1EIqn9<9C~xBy{yRBQ(s>By&Wk?kY&S zcOfX!J4jj7thCQ*5+v66h!eYcS_rPeo6gOx;24$n<_8bSskWT{7){ zupuylM_A2jQt++P$UXcdJo}4*Aqo_{uK#2-7KyMTNY_$3NKnta-8z9F> z+8}mnA!p;9RW(#bhdHlmX>7pjt4pMFq}AfI-Lpr;3<21Af@fra9?QBWq8F-ssiBE2oYY!YI1vUn2L=MPzl8?8#GDTOxs&IkDphx!D&ojca!d*NNZ*1vB$zIW-&k#!mw z)VqWIX;wz)=3u=^qDe=+#0_Mr{htl0pq4_at*aM5dslBsg*BU-$|j-@oMvu7Vh&cP zLF)_5w_jPL9vp~)ya+OXnQK&Nd`A>v7shh`Se^GqOJVAwa!en^e0#4|nlZezzM&@* zQ^YE_y=A8QLbNS~)+%|D z@pbFasHfSEEOSHML*TuY<6JPI{u;l z3S*`0mj&?@cZ^HwOZDuQZ%Q+p)~k3973A~P0~J@r0V6d#-h?w>mAngiw^vr?+w)(f zKtN6=irFh%@<~qB(2ZFxbFh;7zilz^=ZP7Aym8*r*;# zS= zDWI>YN&@7?2>7q@L@sL5+_{JfIf|}IEv`q-?|)8=J5&TOeYIP(^`dQ+a@=57r+C99 z?B=oJlXH8uPl?&FWExP&7FTd1z@_g)eWc3SX|m2^FMMI$#jyTzzYiz!&Kcdgq3!bX ze~!8>l=gmL9k&RmMA5S~ikAYh({C)_$vn(?&u5b2T^KNzMpUVS*L>VIhqrTJW5X{7 zO_s?c=DPp@;@Zs>0$VcDr?KLFX{SoWbk1CzuI4%liD_kAYEqb@YOG}yhT_@-^nS<1 zmL^iRGxHVsA3%^`W$#bI7v&T5rg@`0OAoQts*B>8SWyOB zFNe)B>lCQ-fYeL$!QD-V6M-_-g!08|VrKF&8>C<{ul!TVg}F>XO)-zv{AF7^jx0BX=nQDP1L*48B3YIXPhp% zkB+7AI~RL!rlIgI2}VU};J6o}~}sQZ;Xt{+;L z>dIeXc;CUC?Pru-g^A0ZkffkU@@2pC-xu0EobQ*>&=0=CT3Tt;tH8L$+2;VCT7$QN@|vTk z%5MO8LkFAMTO%pOSnXj4NCr~Y+kzrviH|R8T6d+buXk*goew&!tQGdjY1jNIOu6S8 z=9-#YH2d%l9PtmZJ`&fKOg=L9P55KS`M|3q5w0;ce*EK2Vc-YSkeEY*V5N>1kNXd8 z%e>Y4Q`xve1a}HIU;eO0L~E$1I7QcZc6iB^D#%BW6K|!FkW$GE&zA7O$TMp#iq*`9 zgXn>Fe1d`kimDoG{n_6p^(z!IpOY|uHd!}gthWxc2!HBp>E-DT{@GuYwsc*S zU8R$M<8d#%JCDzax^t6_&TUyrp^&?9W9s^+sw`HAbc_bEK3inYZ_{}hte$ri z75Hg z+ojjGcuR{lUQ4QCs@@yN7qPtsYZ*SEvWT=!iRO`hK@?$}j#$Ato9DR7)>m~9-VA@Zx1z4CzhA{+3EbNUJyrj6@QX-~o~8%!c@ z?t$~KqIo-|ik(a|YLO;8Ux|#|)cqA0-#k#vRK;eLGk2g9EFK1hu`JTl7Vk@$n9&Ck zycBqxv3>Xx+scONcb{-xqy43Yj`x8%gS0^SLJf0cJpF1o37Pa+pSQnvh5*}L{T zk!Bv$3hqp;)gh#*DPcL$g3JxgaoD{q+^eR9s>DeaCY5~h`lLu{ZB?zN>?k2y!i?vR zBRR$c`%OyrmWev1k3lK{ z@-sC+DF}ZV#(ZFGpwQKed51)30NbGge_sX>^MzbBM^1kZL%u1@7pWC*M_#c%S9)VL zQWZH;OfadXWFGSiiMD>;&54Vi?{{N~8^4|(WAm04>u2;4C$DRjTso53Ij0-Z!_Ot(2a619gM+@ zeJX9Uz?r)1IXmAle%m@d;INfL4o(=#i<(phHY z_P$LiU;dshd@?rB+z7Ip(`0F^`_^27T%W~hq^{D`H(?n}Wqz_=i-4pO zQm`{lBtMldrde4FpyzC&DWzvnMP-TK7PHWmz$sV1Gc;>`tY>Zl+hn~AjcERSsCQ`3 zpw;V>wu?jY!O+F6eOytgAnY{0{7q#N4*jiL54v^1|N0l4uisTPTM)iyH^u=~<*W17 zyYLDf@pIKPHCl?f4nTZw_Fp?!eXK>#Li3zs!EES@4=4raaHe>&CSJ$UkoOrH@+OCl ze+Y@@oUyT&4?cg$3$!Xh-jf#d3zVYW$PL{(DQ({CJo1(@JG4FfAXqTU7z@aw)l4CG z{+VI(aOyDac}3Wn2p8{#Lv6|D+c`p!kdbcGk+cxk0wK?K6^_qp31ozpBf~}c2ULc9 zB+|(&tQokShTGQcv7H54prLThe*@U10-(NWrm|1=403K>GJYW!B~10<};q|rsN=M~1lc(LYm!hS;3E4PqBtk(Eh()b{_ zbXBB5d|7*HhAZO=t*H{`>o3z!$+Wkm-D~8t8ArrUw&_;Hmm{NE*U}sXZ+;&A6FOo4j+5^q9VKwua%Dog(Nmk}`b!<#m+{u*!+VjP z!G%MGLpTk-{_;2{m^h_gFt~8P(z-eUlKH6MQ|Y6>b@t_J-&pUn%*m*N>52_){D)gz zV~@G&zF$=tiolv)Vw@g0y5!TErYlb;S{$`NdV^f0EGQM0*(zqUl#E|*M5Ac|@WVid zT(xTVsjWmcPi4u1UE`k=BBO8~7Gb*{L`%v}BxOeYxo+!NZpNd)KUDL<2ZOh|kXu~2 z1O0*P@laFWGduhX)OoT!BE3W*|0zN^q2P$W8V$nQ&z#YtoiDn@JZk~ohoN)fL-_4m z|JvkF=B^R7zS#-{YjD8h`J+NRCO$rm85~JuEmmpQ{Q!HT7lb=*O@E{S-DA643)lL3 zb-v7WAzO$gP&?>rklI#cw5gE(K|oAjBv(!SmcmP*d#=jg#7^ic2|xA0L7h|4@Lhds z20_WL&TIGN4wx8_VSvqCanR-SU7L6}BI$T~nBg1e%Ur5!KB6WQ>qJwZ&$Q0M-6*jA zo{#tMgznTqkv&U?3m?#u5|%}8)1lLdU4d1n;L2VP`z=2^!U$-jPR`&g?0@(2KcoN_ zi|b~*(F&-WDFnYe)sN`a+KEli=1pV`Yl8txi!QmQ@$N|U#MKeqNymto{+Lg1BVXogT>s%9JX1xtlQRw(Z-Uh*IHOI$JU3$=TR(5UH@MfSO#BdiQlLT>QA=6SB_TmC z%$oJ|cFo6B$0E-oQdE{)0Mr~o+w$eF)!@ljv6RXs@id5v=KicaH{jEpCM&Ta+G>{g zys6KJzX3~5k_xug1nVjf}3 zi$j(sMZw8yhsu=N-@SptF-rP6OoSe;RqV*H@{wxh`9gxogNh}*0}_l!>V9vbSvN-e z?JtNxWS%?WpDaLj42g3lg1 zq%SJv*LHYs1@+!pf>cE>K!FDE zO8y_B!b4lv_H`*K$(sXo6Qn#-vKss#JJMR)nxzTwYU;p>v#+ok>1Ui8%?U(wS8t=+ zphx%h6i?WgJXzp%VklYL@(5DGi~V;o$7$vDUifrjuf$>ue_9KcamnS#mCJ({hpv!xKSw& z9S}T6Y}OqctjFqppY<7onrAL{m=TAg&Kd(Y-zmL2gkB&c$*>?1a6e zfp5{$VLmm(4}y$Bf|@zA1I-)?&jw#=F`tcA+Q|C)AgG^9kPqC!rDc=A^J6v8T}y?o zw`Y`XrT;N8!YP!~J)_x}g1vPgjXB9Nu7Pt(f^Bp*f= z+bjp@yxSD9Zn0>0MVs9EJUd*aUQh#|SCR`<%wpRx4IPL%>wD-POVYLfy;+14sTN&3 zKl}qzmr<0vv>7(5J0#8eQW48`#a|*;tWmDth%Us<``PDt7Ny0t?eyw9@v1JwEfqk_ zNvF=Cl&n3UPrMVQ2I0}Ei_4bmxBK=XT0OR*YG)?AoXwJGknB(^m_#>j+aso~I%QH$ zf>5@h+WIIT_kxpSrRQ194~^%ojFlVQvJD(|aV0St5#X!aqVuYdhEeV-46S2NSniL=BN!@#_;4$gJxK7z!knh4_^g z*zOHo&aDVdDIp;|b4*haNm22)M5rW*DW8EEl0@fF1Wf_(aQr)JPiA(Yn0SNq{rP17_(5AIUz;f{znR%f zVq2{h3M*XmK!|48P&WaG0 z0_fIXk|^of?)XCLVS=6^Dny1khCR81FCBg|USLXS_Y~Oi=*l$33|A{>@Eb5NjW}P? z_I$c|ns1r;)-k`HwR%cI57HbA4xwVa^-h)RFys@v9DP0w(!>%$Q!1m7;T&&14$n-P)IDG3&;P?3{>vQwZ)^AiQ$8Ujj|*@s>}QOOE-h=Oe&U4-J4#;Q))J{XoqCPdk->lvl#A zc4WeSvsT9xxiG-Rr?vvi8^>c~im;wn+d zX>cLq!h3oM@7s!6TU5ko$cj`;8Pm;xjTQZot?28S>g{+@HhzMmHe{mm_Uy8ozm8#=oNbNJA0|7iMQe}RrM(wlD--HWkuch&SZj7flJx*i2d*Ps zdMI@>qs@Tanl|92{#8vsTBEY?Lev;Fn%7WXe&E~vlUd$1BuB}rhP#~aP5NZ<)D}ye zWP?_|5HdxQgt@vlqFY0!3DiL8EFw~+%{>uUmhdE3x8av6NGy%tIUO?ean326Wf^)w zP$e?$;wmD`^^c2R0r|8-Xr)B7JN3+Rp><*KGS0C(Ss4Aw6VAV~uvk>7 z4K@pdu(7dNzJ+A7V|lBS!5r_ewUPMv*E^?IZ?;@9V>UrCTyiY136G?jCClKlleyI} z#i+8&)AS)Ux4FolJn(m zEMHU0C+F{$P1&v&sj#teoLQ=oS#JkufS&P__pPNtOP^r+ z1T#MlWQ*;0M2^i0yCF(wt|T?Y%{~b&m=YEkXwD?#lCQo~seI-pTwd`N2MH?uhxOk9 zCFcz~i=88_`PTy`*bQEd$Epe0zM$k*QbCse@^{h{IfC5R>z$Nyo{#BxU75LM&`M`;~tI9 zuyj6Kc`G1>8uxmDjd89q`X|uu5nO@*qvdq8Eu)4cBwja61@~VJ{&zj9d;#Q}#4NWq zYcVx;Y8c`8Z*+C{?U(nfg>a3Y9nZf+jGc7%zex1o;a|S2p}9;&!4qpghvFdV@qc^& ztw$HnrTHdja#pk40=k%3;^^rn4!m3-r}uSAXiCj9oQ+%Ka=3E>(<;ehLZLBq|MSpa z_L%UWj(*Mml0`i+{oCV@k|DrA%?2%oHy@og$)NL_fOh_N`CHLrVeA+GW8?#AF-8?J zF}_wTi5|P_-#@O;cE@9=%nBu&hN|=V(qVn^W!3J;xq)XI6-W5hp`rn!jnmKD4OFfi&uue^N`%a zX2eO76o5Ttb+vHdEJ=9SYK##^K%>tr#Kz&%0$nzi;JRbN;GYZ>P7kvGZ*@RLn&@eF zh5JGn<(1TCBw8*|acz0o%+G$bDOw2j`Z{ztI#Q>Zf`$vA-DrHgSOJR9>CX=wBV9rF zH_y6TNsW`o+{S)*Ujf_Xf2?U(Z&yS(nzCO5mBda zxn2ke5mde@I_bJ$Wp<+PRdO^BBfA1YOW@MNmuhIS_kTnL0-RD2L0)DzXw7kEU+V6U zJIRFMr|v@KAC*%7siI4@w#@?0QUY=Hvxj|~KNT5pan)bbGJ>Z~qeO|AL`YcYELx2% zrUpH{R4r?p2c%wpGG+5()c+AL$5`QL+N_nEu@J+@rmtZ`zsJQz9NMv>F=696%p*OO z^4WNBD`-D41Tt(@8Y3Mjr>NY2ToDgvSRsGr51`}YMvDpl+ihkVUv<%Ob~VV`%0?jk zE0VbNVxhhg8p8*1&Og|$VxN$QE%)AR%C?iu3{>xpM7u+r*A7Ww>;8uq%D0~L-#0fa z?U_Y(ttf}$ozc~-ICC%}!fC!GUx_GYM)r}xjnS2j<_*sIF(>vyeRw*UOmdg}%2Zbp z>>K+O^;&Lz_iv#UrMHLS!d%r#2Wk7BYmR(twWRfJ@2DK7PJ8yX6L;W7OMU0g{inwq z2zVo62_e?xsOGauW8$oe|=P|Esz43Tt9p`?y^J5he5@ z(u)ETgwRo>DP6kKgwTsX1VhIHxIsck0s;b|gertYYEYyIQiRZ}^xhF^!Z!ijy7xI3 z=klAI%rj3iS((gw+wc9)dTkX3U#whZ`D}h8zPa{`3H_y<8)xbT_TdCxrMlUrFGG}P z^jj`VPhnjl2_WKa^^-@&C%{MKXeFbs_0NDV?r}>q;m^-gb3$fStXw~z_jAYbY_V#g zMikwtkR$a?3W5r>uezr7mY+8;orM+hc&=!6wmo|>0#yeJ^Z7U7VB>uk($`Hc?{phZ z?-oJRWMC(dgcu7XD*o|v2}=&-*jH$Hy~69}tS-UP2yx>?0m>CJwvFkcG>P2Y%5%sk zIjW450D;UuXnaXG_l#JnSJ`*M&(5B`?#Q%LCu=^I6cT??NI>eDnc=c+sh-wPd_Ff5A#`O(q*l$V)ofjc-2K z2aBMe!$JEz7r|!&#ENGc^zpkexq%mdjrcLqt~gJ}w3_~eS(R+dPo_bSbL9#HGjUl! z22ilG`2P6G@sL)zN^qm|9HL3oM3iy>r`tE&G=Mu6`}eB*)iHdxXhpCrd&kwg5ii z;j{a(vA1~Q247UNtkg~jmQcOK2J17p+NG{8^QOqJe{=<^?nR6{PkDO#!lu|e9F418 zR0{L|o@7Ap-nQJg$J$5;ccA0TF2*9s8(v~Pk5x(yZ{OHPkh3{DP#X?dHwO!I@=`>K zJ2E93vStq><@R+&W{MQ+4GIjWi)gP%kD)8gB6lK~O*No83Htv;_Y@2~xs>2F}t z_=734{p@adbE8e=)yMDh`LX<9qSsfAxrY5xZpet3nH7@K*bLWTE zv~l5YJWD6~M#cC>;u_Q-O+C5< zS|OviQb)8lJ}|d)+xX2o`b7L0Q2B_|tw^DQj+>Fp202w-&h3=+u7S(uFY3Jc9o5rP zYQVAJ`uJo=d?YMqN*y9jYbEL<@a< z5*|$}U~*ksXWDpZh!5IrvjtHgXRJ1NG$aJ_413}bP9+Bfu162={2I38rh*w-y%uc7wG_u~G^ZJV0yY zf-e64y!bD)TJQ_4Zv6wTewRDAS{kPgEwx121KH@SqzLZHh;QGcxY1>!&%#3QV6>+L zZ@JE8r!0^0k8$!E*I%7QEPPO}MF^D&vshoYeRZ5@Qx{L`czcrU*d0bz$3DoeD?(yA zOJu|)ZnJ=;3z{^&KB4z)EN?M%mxzJg)ALoPC~eK9?j;G*V4Ob(*=DQQ@OE@iRHTboeC0- zDjRjK*Q7EQ&dMu%n#v|epWwjTqRfi@qDp|8+q;Z=e^yf$xNZMPB_t;JazjNCh3hBY z?D+iVSmF%i9;S`*@jkY#c;zR)N1uIuj^zwaQ9EFEt7o2ok>T@xNQaz>Am*C-m15Ri zKM45GVb&(B3^*2qgJF@`y^)vA53C5%tf7YghMs8$e6{jJu*M^EUbzJyH=JH4O(@4o2%c>Z~7Q&|1PSG7sgy4veLv|DGni4M0+4*zai+PP4 zOo0l>mAvvvd1Nxi;PE(h)NrJqWTC|6ps|*3x5gE$X|D~ALmO>Odr4P=8_ZT8Srezx zMZ5sl&gQZ8(3{&ng$0T|-LlLEUto#@Addu}vL$3;Lr|J7_0Dk8bXzk7Fzzf1O>m8l zclF@)h@4rOs2X^Ow2PE%V~o&hej!YBcXhPz`Km`4yd%b3hpQYsL?dC@9u$qbi?O)S z9kobukp7Y7ZVoWy=3F}Z!LlP`PJH&Zv6>0FMjx$hz~?3lG{3}0VsK9gJ#JrYT+_Rl zu-6doKDSThL^m*`6jxVCWXokhWBDn?3ic;RyU`q=H!#39?wTm?HkN(qta|gmkvW^o zEh-jGqkLXz}h_=1p^(bL$EMz-a_l$bo{76!-F+BKK)#tYceh|6;m{a zx4#}x;6B(sfba_$;A}lU(Cgqq#KjNP0!>|lx-+||xuB>-@dOtXdGIKSFdMaf=U#h` zqBCgqYGNxSm)|3F3Zjt+jComg`Ql>D=7b;r;QpC(xPYbai~_jI>wVC!?`{)hO{aKG2^Wt`>8q-(1rgOM+bX^D1q?@E5f04Bk< zZ`WyeHEhj%54$1K=>o3TzP=;HyOE@^hf(nPMxeRdX50FYJ5SGZVx1SUd3 z{Q@_lNVL$vaD&*03{(;}R*p|IK3(52Hw}GP;5YCmY#KUIuPZ1Fwl*Hnbm_q;y~hg6 zg(GF^G`&Cc4^ag(#76H?F5TS1vvVv6{klTyq+Olo4oB*ryu1Q5=O>@PFrrEAR0iUo zOrsJ(^kYrI@M+BpmbmLvHu@wT76}5$!1#`NQ5eO|;d7{A+8w(WK0gVG5B)Bxrid~%(A35H}f9Q za?{~oP0!qrc3vb?xrhwbEO1vWI3Ht1C5UX-NrsZ<^*W%$*7>=NOeoXE8#mMAH1s5K z%<)#57hcz=W!upupw+vtW0Q-B$xH`XEtG}DU*?h~G5$L@_iB(dKPxaQ0@kyvm_WK$#_(_byz)HX@5w}!cKEf?2B!AH~rFZPm$$5Ru>i={} zV>AMKmbJ~rd{VI1ZXk1{X@W30Ui%VG&A@zHIJE6;`Y33#cp>w@2M05O|G`S0Dx9p? z+TXhq$|Ah#8@Vp`o2xMWn&+M;=YG4n&5aa4DHjCF0o$%T95`X#VLVe_f3BNNq$uZbiyxC z()u~N-q#(rit7Q`VbOe6^fN_G72h&mdtnxz*XZLS99b7tJoS&DPR2|UMHLETiV8%4 zcqu$!s(nzM40$n_;>!xjcoF;8T!G>g&q1&4kQAeb;p!E$O}ImLIIjn=76!@KowUyI ztC2&-k&uM^%0;1DqV<|SZ%}=O@wg1Gj2Y$N%MdUkvp>y3$`IRYqv0PQ?vawC&`#PX zb3I3Z8>5;PQS0XDKDL_4VRIqQMuw0YqfUMG&a+$2&~>A`#vh&Ulo>~rW84m-fGr0F z4A<>ksqr`cl7PS9tK(e@F0c81#7asPTn|x6+t*SK+T!2|a*v#v^ZYn>sfPQ8&35T2 zf1w^2Vz=4@y^(Q+XikoYXf|(clQ!E(CyN0dT~0LW#xOgeh=%cvd6LQ%;al$2|t3%IIR}B5;!9l-}Y8m zwymbpZEYLg_%R$uYD&Md+cz4o7`gZ@+qPd4v$wH{z!J*p4nmW1Ur;i>8qYApo961| z*(Ovkgq|?a+P>%ZTEq+rAR|FhaaepXxTxb51^;}L3tffc-0p3wg?M>Og(<#^hcom9 zBCH(E!xvX2m&25ImkNr`Zo=U*p3aHFbR!*SDJdJ`A5W!8DeUPry0T4+yNWCmv-IU>B>Rc6zTJso+9WVWx14L2G%2D=Vw+8Slg zh6gx!iH0l=){OhGIr={9|B_Bj7G~1_Vb2xnzLj(cqS%W^ONdV!r~;PYYVhu-P{LjF zng}C9)a)*8Pk?65(3IKV+t|-=xf&Y=zh>XLcrqh7KXR+tEs)usTZtcXPNr{X5?uI^VDYltQ<4~?$b$&>1M>NPQ#jb8^GXA){g^Fc zO#c3^X|FrxI>pUzQHh64y`lzGR&(h@DX20QE!G6$c9O|a83!P>Xw z=XdHmHBi;Rh&sCrt3K@_*AQGyd0XvdvuW2Cp1TG*OK^|8l>6-VU#9Q(7e$e`)W(VE z&CKCLten3Jys9a4=4WGbPfWTQ36xem*^KAQTV60HX~s%}^s{d`Jo{+IRD12+!@4XxlVLk&O+-Xso?g3i)hNkF_EV?VM% zc#>SvyWqCMF5gdX48AORKmL~FxTY6HORw`gl{!$+kY6l-6*PJ$5?y<_yXY?^i{f9l zZYh%Wp6Vw-4eL{6U(x{Yd43-_fC9399u5WGYS~w~p1rzXlgOj|0JGSEbpLYwkIKLG zV;(*S?|ZWkyeNEnZ42|YF=(3~fxT(_7Wq(xu38lf$XJbOF)y9KLT`!RNdEQ?< zP*58d5o(B5CzDfBZI3ybu{+_j`|x`phn4q#wW4)~WEFQ~1uO)5lJwvIzbx-{R~4 z^6q#Kg9LnYxqOM6WO}p?y9RNvzH{QODIhEj(|kXuKzZcfzi}qZt{<*ySP=~U@=NZj!5%0o`9Du0~h*iAvQGMBM49wf&u`ukR_FHPpC zaU6HfInONRy*ceOSGvA!)v9ERig*MgrwH9f0;k?lYn2b^S?5~z+}dG`}OGmNw6 zCw^=IBtBj)J;;zMR>!{{woASB(Gw literal 0 HcmV?d00001 diff --git a/tutorials/nlp/lora.ipynb b/tutorials/nlp/lora.ipynb new file mode 100644 index 000000000000..01d3c99bc516 --- /dev/null +++ b/tutorials/nlp/lora.ipynb @@ -0,0 +1,1720 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "b7a434f4", + "metadata": {}, + "outputs": [], + "source": [ + "BRANCH='r1.20.0'\n", + "import os\n", + "import wget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "developmental-gibraltar", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", + "\n", + "Instructions for setting up Colab are as follows:\n", + "1. Open a new Python 3 notebook.\n", + "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", + "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", + "4. Run this cell to set up dependencies.\n", + "\"\"\"\n", + "# If you're using Google Colab and not running locally, run this cell\n", + "\n", + "# install NeMo\n", + "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "42daf8bf", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "In this notebook we demonstrate how to use NeMo's implementation of LoRA (Low Rank Adaptation) for fine-tuning large language models. Our implementation is based on the [paper](https://openreview.net/pdf?id=nZeVKeeFYf9) by Hu et al.\n", + "\n", + "We are going to show you how to:\n", + " \n", + " 1. Train a LoRA model on a simple Extractive QA task.\n", + " 2. Inspect the trained LoRA model showing the parameters it contains.\n", + " 3. Run inference with the based model with the LoRA parameters.\n", + " 4. Merge the LoRA parameters into the base model and run inference again on the merged model.\n", + "\n", + "In this tutorial we will be focusing on LoRA, but the training and evaluation methods described here will be applicable for other Parameter-efficient Fine tuning (PEFT) methods in NeMo." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "0bfc7709", + "metadata": {}, + "source": [ + "### Tasks and Datasets\n", + "We will be using LoRA to teach our GPT model to do Extractive Question Answering.\n", + "\n", + "We will be using the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) reading comprehension dataset, consisting of questions posed by crowd workers on a set of Wikipedia articles, where the answer to every question is a segment of text. More information on [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) can be found on their website or in their paper by Rajpurkar et. al \"[Know What You Don’t Know: Unanswerable Questions for SQuAD](https://arxiv.org/pdf/1806.03822.pdf)\".\n", + "\n", + "LoRA (and all PEFT tuning) models expect at least two fields in the jsonl files. The `input` field should contain all the tokens necessary for the model to generate the `output`. For example for extractive QA, the `input` should contain the context text as well as the question.\n", + "\n", + "```\n", + "[\n", + " {\"input\": \"User: Context: [CONTEXT_1] Question: [QUESTION_1]\\n\\nAssistant:\", \"output\": [ANSWER_1]},\n", + " {\"input\": \"User: Context: [CONTEXT_2] Question: [QUESTION_2]\\n\\nAssistant:\", \"output\": [ANSWER_2]},\n", + " {\"input\": \"User: Context: [CONTEXT_3] Question: [QUESTION_3]\\n\\nAssistant:\", \"output\": [ANSWER_3]},\n", + "]\n", + "```\n", + "Note that we use keywords in the input like `Context:`, `Question:` to separate the text representing the context and question. We also use the keyword `User:` and end each of the input with `\\n\\nAssistant:` tokens. These are recommended because NeMo's instruction-tuned models are trained with a prefix of `User:` and suffix `\\n\\nAssistant:`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0dbd41fd", + "metadata": {}, + "outputs": [], + "source": [ + "# You can replace DATA_DIR and NEMO_DIR with your own locations\n", + "DATA_DIR = \"data\"\n", + "NEMO_DIR = \".\"\n", + "os.makedirs(DATA_DIR, exist_ok=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "504a7b40", + "metadata": {}, + "source": [ + "\n", + "For each dataset we have preprocessing scripts pre-written in NeMo's example directory located in `examples/nlp`. Let's download those now. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e72a1dc1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘prompt_learning_squad_preprocessing.py’ already there; not retrieving.\n", + "\n" + ] + } + ], + "source": [ + "# download the preprocessing scripts from github for the purpose of this tutorial\n", + "! wget -nc https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "71813919", + "metadata": {}, + "source": [ + "Now let's down load and process the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fa16d8ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-05-30 14:07:23-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.109.153, 185.199.111.153, 185.199.108.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.109.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30288272 (29M) [application/json]\n", + "Saving to: ‘train-v1.1.json’\n", + "\n", + "train-v1.1.json 100%[===================>] 28.88M 84.3MB/s in 0.3s \n", + "\n", + "2023-05-30 14:07:25 (84.3 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", + "\n", + "--2023-05-30 14:07:26-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.110.153, 185.199.108.153, 185.199.111.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.110.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4854279 (4.6M) [application/json]\n", + "Saving to: ‘dev-v1.1.json’\n", + "\n", + "dev-v1.1.json 100%[===================>] 4.63M --.-KB/s in 0.1s \n", + "\n", + "2023-05-30 14:07:27 (43.8 MB/s) - ‘dev-v1.1.json’ saved [4854279/4854279]\n", + "\n" + ] + } + ], + "source": [ + "SQUAD_DIR = os.path.join(DATA_DIR, \"SQuAD\")\n", + "os.makedirs(SQUAD_DIR, exist_ok=True)\n", + "\n", + "# Download the SQuAD dataset\n", + "!wget -nc https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "!wget -nc https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\n", + "!mv train-v1.1.json {SQUAD_DIR}\n", + "!mv dev-v1.1.json {SQUAD_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "64e3e25b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving train split to data/SQuAD/squad_train.jsonl\n", + "100%|█████████████████████████████████| 87599/87599 [00:00<00:00, 204336.27it/s]\n", + "Saving val split to data/SQuAD/squad_val.jsonl\n", + "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 158654.55it/s]\n", + "Saving test split to data/SQuAD/squad_test_ground_truth.jsonl\n", + "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 183040.92it/s]\n", + "Saving test split to data/SQuAD/squad_test.jsonl\n", + "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 196367.94it/s]\n" + ] + } + ], + "source": [ + "# Preprocess squad data\n", + "!python prompt_learning_squad_preprocessing.py --sft-format --data-dir {SQUAD_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b562d1de", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the AFC at Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Denver Broncos\"}\n", + "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the NFC at Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Carolina Panthers\"}\n", + "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Where did Super Bowl 50 take place?\\n\\nAssistant:\", \"output\": \"Santa Clara, California\"}\n", + "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team won Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Denver Broncos\"}\n", + "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\\n\\nAssistant:\", \"output\": \"Saint Bernadette Soubirous\"}\n", + "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:What is in front of the Notre Dame Main Building?\\n\\nAssistant:\", \"output\": \"a copper statue of Christ\"}\n", + "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:The Basilica of the Sacred heart at Notre Dame is beside to which structure?\\n\\nAssistant:\", \"output\": \"the Main Building\"}\n", + "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:What is the Grotto at Notre Dame?\\n\\nAssistant:\", \"output\": \"a Marian place of prayer and reflection\"}\n" + ] + } + ], + "source": [ + "# What the squad dataset looks like after processing\n", + "! head -200 $SQUAD_DIR/squad_train.jsonl > $SQUAD_DIR/squad_short_train.jsonl\n", + "! head -20 $SQUAD_DIR/squad_val.jsonl > $SQUAD_DIR/squad_short_val.jsonl\n", + "! head -4 $SQUAD_DIR/squad_short_val.jsonl\n", + "! head -4 $SQUAD_DIR/squad_short_train.jsonl" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2e19c8dc", + "metadata": {}, + "source": [ + "### Model Config Setup\n", + "Now we will begin setting up the config file needed for PEFT tuning. We use a single config for all supported PEFT methods (LoRA, Adapter and P-Tuning). All PEFT methods use classes defined in [megatron_gpt_peft_models.py](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py). All PEFT Classes inherit from `MegatronGPTSFTModel` which is the class that governs instruction tuning." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5749c387", + "metadata": {}, + "outputs": [], + "source": [ + "from omegaconf import OmegaConf\n", + "\n", + "CONFIG_DIR = os.path.join(NEMO_DIR, \"conf\")\n", + "os.makedirs(CONFIG_DIR, exist_ok=True)\n", + "\n", + "# Download the example config file\n", + "wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml', CONFIG_DIR)\n", + "\n", + "# Load the example config file so we can start editing it\n", + "CONFIG_PATH = os.path.join(CONFIG_DIR, \"megatron_gpt_peft_tuning_config.yaml\")\n", + "config = OmegaConf.load(CONFIG_PATH)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ce966bcf", + "metadata": {}, + "source": [ + "The `config` contains several attributes required by the `MegatronGPTPEFTModel`. First we will set the training data path and the validation data path in the config.\n", + "The `config` allows us to set a list of `jsonl` files as training files and sample examples from each file with different probabilities. For simplicity we are going to use just one training file and thus the sampling probability is set to `1.0`\n", + "\n", + "We can also monitor validation loss from multiple validation files during training. Again for simplicity we will use just one validation file." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6bb1590f", + "metadata": {}, + "outputs": [], + "source": [ + "config.model.data.train_ds.file_names = [f\"{SQUAD_DIR}/squad_short_train.jsonl\"]\n", + "config.model.data.train_ds.concat_sampling_probabilities=[1.0]\n", + "config.model.data.validation_ds.file_names = [f\"{SQUAD_DIR}/squad_short_val.jsonl\"]\n", + "config.model.data.validation_ds.names=[\"squad_val\"]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f6b7831a", + "metadata": {}, + "source": [ + "### PEFT Config\n", + "The attribute [config.model.peft](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml#L78) contains settings that control the PEFT training method and its related hyperpameters. We currently support `lora`, `adapters`, `ptuning` and `ia3`. We can instruct the training script to use one of these methods by setting the config.model.peft.peft_scheme attribute.\n", + "\n", + "The other hyperparams associated with lora tuning are present in the [config.model.peft.lora_tuning](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml#L92) attribute." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72c9f966", + "metadata": {}, + "outputs": [], + "source": [ + "config.model.peft.peft_scheme=\"lora\" # we can also set this to adapter or ptuning or ia3\n", + "print(OmegaConf.to_yaml(config.model.peft.lora_tuning))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c32e73c3", + "metadata": {}, + "source": [ + "**Note:** In the original LoRA paper each attention projection (`K`, `Q`, `V` and `O`) can have their own Low-Rank projections. However, NeMo's attention implementation fuses `KQV` into a single projection and thus our LoRA implementation learns a single Low-Rank projection for `KQV` in a combined fashion. We do not support LoRA for the `O` matrix at this point." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4e021b24", + "metadata": {}, + "source": [ + "### Prompt Formatting\n", + "The `config.model.data.train_ds.prompt_template` attribute allows us to further tweak the format of the input and output if needed. In this example, we have \"encoding\" our format inside the `jsonl` file directly. So we can keep the `prompt_template` in the config simple.(See previous section on Data Preparation). " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1b6aa5c7", + "metadata": {}, + "outputs": [], + "source": [ + "config.model.data.train_ds.prompt_template =\"{input} {output}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a0d5017e", + "metadata": {}, + "source": [ + "### Setting the Pretrained GPT Model\n", + "Next we will set the \"base language model\" upon which we will perform LoRA tuning. Obviously, larger base models will have better performance on downstream tasks but for the purposes of this tutorial we will use a small 345M parameter GPT model." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "48cdf868", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:08:23 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n", + "[NeMo W 2023-05-30 14:08:24 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n" + ] + }, + { + "data": { + "text/plain": [ + "'https://api.ngc.nvidia.com/v2/models/nvidia/nemo/megatron_gpt_345m/versions/1/files/megatron_gpt_345m.nemo'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check what GPT .nemo models we have available on NGC\n", + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel\n", + "megatron_gpt_345m_nemo_url = MegatronGPTModel.list_available_models()[0].location\n", + "megatron_gpt_345m_nemo_url # should point to the 345m megatron gpt model '.nemo' file" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ede350ed", + "metadata": {}, + "source": [ + "If we wanted to use the GPT model class directly, we could instantiate a trainer then download the model by calling running \n", + "`gpt_model = MegatronGPTModel.from_pretrained(model_name=\"megatron_gpt_345m\", trainer=trainer).cuda()`. But we just need the `.nemo` file in our working NeMo directory in this tutorial, so we will download it using `wget`. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "364439a1", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File ‘./megatron_gpt_345m.nemo’ already there; not retrieving.\n" + ] + } + ], + "source": [ + "# Download the model from NGC\n", + "gpt_file_name = \"megatron_gpt_345m.nemo\"\n", + "!wget -nc --content-disposition {megatron_gpt_345m_nemo_url} -O {NEMO_DIR}/{gpt_file_name}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1d6a8a67", + "metadata": {}, + "source": [ + "Now that we have a `.nemo` GPT file to work with. We need to add its path in our prompt learning config. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2778a5fa", + "metadata": {}, + "outputs": [], + "source": [ + "# Set GPT model path on prompt learning config\n", + "config.model.restore_from_path = gpt_file_name" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "943a9c83", + "metadata": {}, + "source": [ + "Next, we will set where we want to save all the intermediate training logs and checkpoints. As well as other training settings such as: number of training steps, batch size and validation check interval, and num_workers for data processing." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a278cbdf", + "metadata": {}, + "outputs": [], + "source": [ + "config.exp_manager.exp_dir=f\"{NEMO_DIR}/peft_lora\"\n", + "config.exp_manager.explicit_log_dir=\"training_info\"\n", + "config.trainer.max_steps=100\n", + "config.model.micro_batch_size=1\n", + "config.model.global_batch_size=4\n", + "config.trainer.val_check_interval=50\n", + "config.model.data.train_ds.num_workers=0 # 0 is recommended which just uses the main thread to process training examples\n", + "config.model.data.validation_ds.num_workers=0 # 0 is recommended which just uses the main thread to process the validation examples" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a988d16e", + "metadata": {}, + "source": [ + "Let's have a look at all the values we've set in the model config. You can change any of these values in the same manner we've been using above. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "12a37ada", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seed: 1234\n", + "tensor_model_parallel_size: 1\n", + "pipeline_model_parallel_size: 1\n", + "global_batch_size: 4\n", + "micro_batch_size: 1\n", + "restore_from_path: megatron_gpt_345m.nemo\n", + "resume_from_checkpoint: null\n", + "save_nemo_on_validation_end: false\n", + "sync_batch_comm: false\n", + "megatron_amp_O2: false\n", + "sequence_parallel: false\n", + "activations_checkpoint_granularity: null\n", + "activations_checkpoint_method: null\n", + "activations_checkpoint_num_layers: null\n", + "answer_only_loss: true\n", + "gradient_as_bucket_view: false\n", + "hidden_dropout: 0.0\n", + "attention_dropout: 0.0\n", + "ffn_dropout: 0.0\n", + "peft:\n", + " peft_scheme: adapter\n", + " restore_from_path: null\n", + " adapter_tuning:\n", + " type: parallel_adapter\n", + " adapter_dim: 32\n", + " adapter_dropout: 0.0\n", + " norm_position: pre\n", + " column_init_method: xavier\n", + " row_init_method: zero\n", + " norm_type: mixedfusedlayernorm\n", + " lora_tuning:\n", + " adapter_dim: 32\n", + " adapter_dropout: 0.0\n", + " column_init_method: xavier\n", + " row_init_method: zero\n", + " p_tuning:\n", + " virtual_tokens: 10\n", + " bottleneck_dim: 1024\n", + " embedding_dim: 1024\n", + " init_std: 0.023\n", + "data:\n", + " train_ds:\n", + " file_names:\n", + " - data/SQuAD/squad_short_train.jsonl\n", + " global_batch_size: ${model.global_batch_size}\n", + " micro_batch_size: ${model.micro_batch_size}\n", + " shuffle: true\n", + " num_workers: 0\n", + " pin_memory: true\n", + " max_seq_length: 2048\n", + " min_seq_length: 1\n", + " drop_last: true\n", + " concat_sampling_probabilities:\n", + " - 1.0\n", + " context_key: input\n", + " label_key: output\n", + " add_eos: true\n", + " add_sep: false\n", + " add_bos: false\n", + " separate_prompt_and_response_with_newline: false\n", + " truncation_field: context\n", + " index_mapping_dir: null\n", + " prompt_template: '{input} {output}'\n", + " validation_ds:\n", + " file_names:\n", + " - data/SQuAD/squad_short_val.jsonl\n", + " names:\n", + " - squad_val\n", + " global_batch_size: ${model.global_batch_size}\n", + " micro_batch_size: ${model.micro_batch_size}\n", + " shuffle: false\n", + " num_workers: 0\n", + " pin_memory: true\n", + " max_seq_length: 2048\n", + " min_seq_length: 1\n", + " drop_last: false\n", + " context_key: input\n", + " label_key: output\n", + " add_eos: ${model.data.train_ds.add_eos}\n", + " add_sep: ${model.data.train_ds.add_sep}\n", + " add_bos: ${model.data.train_ds.add_bos}\n", + " separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline}\n", + " write_predictions_to_file: false\n", + " output_file_path_prefix: null\n", + " truncation_field: context\n", + " index_mapping_dir: null\n", + " prompt_template: ${model.data.train_ds.prompt_template}\n", + " metric:\n", + " name: loss\n", + " average: null\n", + " num_classes: null\n", + "test_ds:\n", + " file_names: null\n", + " names: null\n", + " global_batch_size: ${model.global_batch_size}\n", + " micro_batch_size: ${model.micro_batch_size}\n", + " shuffle: false\n", + " num_workers: 4\n", + " pin_memory: true\n", + " max_seq_length: 2048\n", + " min_seq_length: 1\n", + " drop_last: false\n", + " context_key: input\n", + " label_key: output\n", + " add_eos: ${model.data.train_ds.add_eos}\n", + " add_sep: ${model.data.train_ds.add_sep}\n", + " add_bos: ${model.data.train_ds.add_bos}\n", + " separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline}\n", + " write_predictions_to_file: false\n", + " output_file_path_prefix: null\n", + " truncation_field: context\n", + " index_mapping_dir: null\n", + " prompt_template: ${model.data.train_ds.prompt_template}\n", + " metric:\n", + " name: loss\n", + " average: null\n", + " num_classes: null\n", + "optim:\n", + " name: fused_adam\n", + " lr: 0.0001\n", + " weight_decay: 0.01\n", + " betas:\n", + " - 0.9\n", + " - 0.98\n", + " sched:\n", + " name: CosineAnnealing\n", + " warmup_steps: 50\n", + " min_lr: 0.0\n", + " constant_steps: 0\n", + " monitor: val_loss\n", + " reduce_on_plateau: false\n", + "\n" + ] + } + ], + "source": [ + "# Final model config\n", + "print(OmegaConf.to_yaml(config.model))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4c048852", + "metadata": {}, + "source": [ + "### Building the PyTorch Lightning Trainer\n", + "NeMo models are primarily PyTorch Lightning modules - and therefore are entirely compatible with the PyTorch Lightning ecosystem.\n", + "\n", + "Let's first instantiate a Trainer object" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "90f85b2a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using 16bit None Automatic Mixed Precision (AMP)\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trainer config - \n", + "\n", + "devices: 1\n", + "accelerator: gpu\n", + "num_nodes: 1\n", + "precision: 16\n", + "logger: false\n", + "enable_checkpointing: false\n", + "replace_sampler_ddp: false\n", + "max_epochs: 4\n", + "max_steps: 100\n", + "log_every_n_steps: 10\n", + "val_check_interval: 1.0\n", + "gradient_clip_val: 1.0\n", + "\n" + ] + } + ], + "source": [ + "import torch\n", + "import pytorch_lightning as pl\n", + "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n", + "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n", + "\n", + "# let's modify some trainer configs\n", + "# check if we have GPU available and uses it\n", + "accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'\n", + "config.trainer.accelerator = accelerator\n", + "config.trainer.devices = 1\n", + "config.trainer.max_epochs = 4\n", + "config.trainer.val_check_interval = 1.0\n", + "\n", + "# for PyTorch Native AMP set precision=16\n", + "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", + "\n", + "# setup cluster environment parameters\"\n", + "# use torch elastic cluster environment so `create_process_externally` is True\n", + "# the launcher is set to None. It will not try to spawn new processes.\n", + "# It won't create the misconfiguration error because of the `interactive session`\n", + "os.environ[\"LOCAL_RANK\"] = '0'\n", + "os.environ[\"RANK\"] = '0'\n", + "os.environ[\"WORLD_SIZE\"] = '1'\n", + "\n", + "strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", + "plugins = [TorchElasticEnvironment()]\n", + "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n", + "\n", + "print(\"Trainer config - \\n\")\n", + "print(OmegaConf.to_yaml(config.trainer))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "890f0dc5", + "metadata": {}, + "outputs": [], + "source": [ + "print(OmegaConf.to_yaml(config.exp_manager))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4d0124c1", + "metadata": {}, + "source": [ + "### Setting up a NeMo Experiment\n", + "\n", + "NeMo has an experiment manager that handles logging and checkpointing for us, so let's use it:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f2c943ba", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo E 2023-05-30 14:09:17 exp_manager:646] exp_manager received explicit_log_dir: training_info and at least one of exp_dir: ./peft_lora, or version: None. Please note that exp_dir, name, and version will be ignored.\n", + "[NeMo W 2023-05-30 14:09:17 exp_manager:651] Exp_manager is logging to training_info, but it already exists.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:17 exp_manager:374] Experiments will be logged at training_info\n", + "[NeMo I 2023-05-30 14:09:17 exp_manager:797] TensorboardLogger has been set up\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:17 exp_manager:893] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training_info\n" + ] + } + ], + "source": [ + "from nemo.utils.exp_manager import exp_manager\n", + "\n", + "# Set name of the experiment \n", + "config.name = 'lora_example_tuning'\n", + "config.exp_manager.resume_if_exists = False\n", + "\n", + "# Init the experiment manager and view the exp_dir\n", + "exp_dir = exp_manager(trainer, config.get(\"exp_manager\", None))\n", + "exp_dir = str(exp_dir)\n", + "print(exp_dir)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "298b3dce", + "metadata": {}, + "source": [ + "### LoRA Training\n", + "We now set up the process for training a LoRA model. We first require a config that contains details about the base language model upon which we will train our LoRA model. So we first extract the `base_model_cfg`" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "edb38445", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:30 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n" + ] + } + ], + "source": [ + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTModel\n", + "from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector, PEFTSaveRestoreConnector\n", + "base_model_save_restore_connector = NLPSaveRestoreConnector()\n", + "base_model_cfg = MegatronGPTModel.restore_from(\n", + " restore_path=config.model.restore_from_path,\n", + " trainer=trainer,\n", + " return_config=True,\n", + " save_restore_connector=base_model_save_restore_connector,\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "16bace39", + "metadata": {}, + "source": [ + "Next, we update the `base_model_cfg` with any new settings we employ in our current (LoRA) `config`." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "fd350dbc", + "metadata": {}, + "outputs": [], + "source": [ + "from omegaconf.omegaconf import open_dict\n", + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTLoRAModel\n", + "OmegaConf.set_struct(base_model_cfg, True)\n", + "OmegaConf.resolve(config)\n", + "with open_dict(base_model_cfg):\n", + " base_model_cfg.megatron_amp_O2 = config.model.get('megatron_amp_O2', False)\n", + " base_model_cfg.micro_batch_size = config.model.data.train_ds.micro_batch_size\n", + " base_model_cfg.global_batch_size = config.model.data.train_ds.global_batch_size\n", + " base_model_cfg.sequence_parallel = config.model.get(\"sequence_parallel\", False)\n", + " base_model_cfg.data = config.model.data\n", + " base_model_cfg.optim = config.model.optim\n", + " base_model_cfg.precision = config.trainer.precision\n", + " base_model_cfg.answer_only_loss = config.model.answer_only_loss\n", + " base_model_cfg.restore_from_path = config.model.restore_from_path\n", + " base_model_cfg.resume_from_checkpoint = config.model.resume_from_checkpoint\n", + " base_model_cfg.save_nemo_on_validation_end = config.model.save_nemo_on_validation_end\n", + " base_model_cfg.peft = config.model.peft\n", + " base_model_cfg.target = f\"{MegatronGPTLoRAModel.__module__}.{MegatronGPTLoRAModel.__name__}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "dfc55a1c", + "metadata": {}, + "source": [ + "Next, we instantiate the LoRA model class" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a81d8741", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:39 megatron_init:232] Rank 0 has data parallel group: [0]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:235] All data parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:236] Ranks 0 has data parallel rank: 0\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:244] Rank 0 has model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:245] All model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:255] Rank 0 has tensor model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:259] All tensor model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:260] Rank 0 has tensor model parallel rank: 0\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:274] Rank 0 has pipeline model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:286] Rank 0 has embedding group: [0]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:292] All pipeline model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:293] Rank 0 has pipeline model parallel rank 0\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:294] All embedding group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:09:39 megatron_init:295] Rank 0 has embedding rank: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:39 modelPT:244] You tried to register an artifact under config key=tokenizer.vocab_file but an artifact for it has already been registered.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:39 tokenizer_utils:204] Getting Megatron tokenizer for pretrained model name: megatron-gpt-345m, custom vocab file: /tmp/tmp1qljai9b/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, and merges file: /tmp/tmp1qljai9b/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt\n", + "[NeMo I 2023-05-30 14:09:39 tokenizer_utils:130] Getting HuggingFace AutoTokenizer with pretrained_model_name: gpt2, vocab_file: /tmp/tmp1qljai9b/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, merges_files: /tmp/tmp1qljai9b/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt, special_tokens_dict: {}, and use_fast: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using sep_token, but it is not set yet.\n", + "Using cls_token, but it is not set yet.\n", + "Using pad_token, but it is not set yet.\n", + "Using mask_token, but it is not set yet.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:40 megatron_base_model:238] Padded vocab_size: 50304, original vocab_size: 50257, dummy tokens: 47.\n", + "[NeMo I 2023-05-30 14:09:41 megatron_gpt_peft_models:56] Before adding PEFT params:\n", + " | Name | Type | Params\n", + " -----------------------------------\n", + " 0 | model | GPTModel | 354 M \n", + " -----------------------------------\n", + " 354 M Trainable params\n", + " 0 Non-trainable params\n", + " 354 M Total params\n", + " 1,419.485 Total estimated model params size (MB)\n", + "[NeMo I 2023-05-30 14:09:41 megatron_gpt_peft_models:65] After adding PEFT params:\n", + " | Name | Type | Params\n", + " -----------------------------------\n", + " 0 | model | GPTModel | 358 M \n", + " -----------------------------------\n", + " 358 M Trainable params\n", + " 0 Non-trainable params\n", + " 358 M Total params\n", + " 1,432.068 Total estimated model params size (MB)\n", + "[NeMo I 2023-05-30 14:09:42 nlp_overrides:491] Model MegatronGPTLoRAModel was successfully restored from /home/adithyare/NeMo/tutorials/nlp/megatron_gpt_345m.nemo.\n" + ] + } + ], + "source": [ + "from nemo.collections.nlp.parts.nlp_overrides import PEFTSaveRestoreConnector\n", + "peft_save_restore_connector = PEFTSaveRestoreConnector(\n", + " peft_model_nemo_path=None, peft_model_ckpt_path=None\n", + " )\n", + "model = MegatronGPTLoRAModel.restore_from(\n", + " restore_path=config.model.restore_from_path,\n", + " trainer=trainer,\n", + " override_config_path=base_model_cfg,\n", + " save_restore_connector=peft_save_restore_connector,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2d99f433", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:175: UserWarning: The `batch_idx` argument in `MegatronGPTLoRAModel.on_train_batch_start` hook may not match with the actual batch index when using a `dataloader_iter` argument in your `training_step`.\n", + " rank_zero_warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:175: UserWarning: The `batch_idx` argument in `MegatronGPTLoRAModel.on_train_batch_end` hook may not match with the actual batch index when using a `dataloader_iter` argument in your `training_step`.\n", + " rank_zero_warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/lightning_fabric/plugins/environments/torchelastic.py:36: UserWarning: MASTER_ADDR environment variable is not defined. Set as localhost\n", + " rank_zero_warn(\"MASTER_ADDR environment variable is not defined. Set as localhost\")\n", + " \n", + "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/lightning_fabric/plugins/environments/torchelastic.py:44: UserWarning: MASTER_PORT environment variable is not defined. Set as 12910\n", + " rank_zero_warn(\"MASTER_PORT environment variable is not defined. Set as 12910\")\n", + " \n", + "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", + "----------------------------------------------------------------------------------------------------\n", + "distributed_backend=nccl\n", + "All distributed processes registered. Starting with 1 processes\n", + "----------------------------------------------------------------------------------------------------\n", + "\n", + "You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:613: UserWarning: Checkpoint directory /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints exists and is not empty.\n", + " rank_zero_warn(f\"Checkpoint directory {dirpath} exists and is not empty.\")\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:46 megatron_gpt_sft_model:634] Building GPT SFT validation datasets.\n", + "[NeMo I 2023-05-30 14:09:46 text_memmap_dataset:104] Building data files\n", + "[NeMo I 2023-05-30 14:09:46 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.360761\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:114] Loading data files\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:205] Loading data/SQuAD/squad_short_val.jsonl\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.002361\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:121] Computing global indices\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:637] Length of val dataset: 20\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:648] Building GPT SFT traing datasets.\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:104] Building data files\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.299554\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:114] Loading data files\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:205] Loading data/SQuAD/squad_short_train.jsonl\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.001065\n", + "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:121] Computing global indices\n", + "[NeMo I 2023-05-30 14:09:47 dataset_utils:1341] > loading indexed mapping from data/SQuAD/squad_short_train.jsonl_squad_short_train.jsonl_indexmap_402mns_2046msl_0.00ssp_1234s.npy\n", + "[NeMo I 2023-05-30 14:09:47 dataset_utils:1344] loaded indexed file in 0.001 seconds\n", + "[NeMo I 2023-05-30 14:09:47 dataset_utils:1345] total number of samples: 600\n", + "make: Entering directory '/home/adithyare/NeMo/nemo/collections/nlp/data/language_modeling/megatron'\n", + "make: Nothing to be done for 'default'.\n", + "make: Leaving directory '/home/adithyare/NeMo/nemo/collections/nlp/data/language_modeling/megatron'\n", + "[NeMo I 2023-05-30 14:09:47 blendable_dataset:67] > elapsed time for building blendable dataset indices: 0.09 (sec)\n", + "> building indices for blendable datasets ...\n", + " > sample ratios:\n", + " dataset 0, input: 1, achieved: 1\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:650] Length of train dataset: 402\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:655] Building dataloader with consumed samples: 0\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:655] Building dataloader with consumed samples: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:09:47 nlp_overrides:124] Configuring DDP for model parallelism.\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", + "[NeMo I 2023-05-30 14:09:47 megatron_gpt_peft_models:130] Optimizer groups set:\n", + " | Name | Type | Params\n", + " -----------------------------------\n", + " 0 | model | GPTModel | 358 M \n", + " -----------------------------------\n", + " 3.1 M Trainable params\n", + " 354 M Non-trainable params\n", + " 358 M Total params\n", + " 716.034 Total estimated model params size (MB)\n", + "[NeMo I 2023-05-30 14:09:47 modelPT:721] Optimizer config = FusedAdam (\n", + " Parameter Group 0\n", + " betas: [0.9, 0.98]\n", + " bias_correction: True\n", + " eps: 1e-08\n", + " lr: 0.0001\n", + " weight_decay: 0.01\n", + " )\n", + "[NeMo I 2023-05-30 14:09:47 lr_scheduler:910] Scheduler \"\" \n", + " will be used during training (effective maximum steps = 100) - \n", + " Parameters : \n", + " (warmup_steps: 50\n", + " min_lr: 0.0\n", + " constant_steps: 0\n", + " max_steps: 100\n", + " )\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "-----------------------------------\n", + "0 | model | GPTModel | 358 M \n", + "-----------------------------------\n", + "3.1 M Trainable params\n", + "354 M Non-trainable params\n", + "358 M Total params\n", + "716.034 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3cb87a7b9d4b46e4a0fb0f0670351fbd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " rank_zero_warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py:401: UserWarning: Found `dataloader_iter` argument in the `validation_step`. Note that the support for this signature is experimental and the behavior is subject to change.\n", + " rank_zero_warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/apex/transformer/pipeline_parallel/utils.py:81: UserWarning: This function is only for unittest\n", + " warnings.warn(\"This function is only for unittest\")\n", + " \n", + "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('val_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", + " warning_cache.warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('validation_loss_squad_val', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", + " warning_cache.warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('validation_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", + " warning_cache.warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " rank_zero_warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:344: UserWarning: Found `dataloader_iter` argument in the `training_step`. Note that the support for this signature is experimental and the behavior is subject to change.\n", + " rank_zero_warn(\n", + " \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c7a473adeca64c828d2a1338dab1e76b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:232: UserWarning: You called `self.log('global_step', ...)` in your `training_step` but the value needs to be floating point. Converting it to torch.float32.\n", + " warning_cache.warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:232: UserWarning: You called `self.log('consumed_samples', ...)` in your `training_step` but the value needs to be floating point. Converting it to torch.float32.\n", + " warning_cache.warn(\n", + " \n", + "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:139: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", + " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", + " \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a0606700c7ab495eb08ed88c16949569", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 0, global step 100: 'validation_loss' reached 0.30823 (best 0.30823), saving model to '/home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt' as top 1\n", + "Metric val_loss improved. New best score: 0.308\n", + "`Trainer.fit` stopped: `max_steps=100` reached.\n", + "Restoring states from the checkpoint path at /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt\n", + "Restored all states from the checkpoint file at /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt\n" + ] + } + ], + "source": [ + "# Training set to 2 epochs by default in a cell above\n", + "trainer.fit(model)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "b8210d6d", + "metadata": {}, + "source": [ + "Once training is completed you should see a saved '.nemo' file in this folder `{config.exp_manager.explicit_log_dir}/checkpoints`" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e4e19e65", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 230M\n", + "-rw-rw-r-- 1 adithyare adithyare 14M May 30 14:10 lora_example_tuning.nemo\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 27 09:47 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0.ckpt'\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 27 09:47 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last.ckpt'\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 30 11:12 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last-v1.ckpt'\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 30 14:10 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last-v2.ckpt'\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 30 11:12 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v1.ckpt'\n", + "-rw-rw-r-- 1 adithyare adithyare 37M May 30 14:10 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt'\n", + "training_info\n" + ] + } + ], + "source": [ + "# The trained '.nemo' model is saved in the location below:\n", + "! ls -lh {config.exp_manager.explicit_log_dir}/checkpoints\n", + "print(config.exp_manager.explicit_log_dir)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "6aab09d4", + "metadata": {}, + "source": [ + "### Inference\n", + "The model object from `trainer.fit(model)` is also capable of doing inference. But for the tutorial we will re-load the saved `.nemo` lora model along with a `.nemo` base language model to simulate a more realistic scenario (where training does not happen right before inference).\n", + "\n", + "First, we will load and modify a config file that will be used for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "41ab98a9", + "metadata": {}, + "outputs": [], + "source": [ + "# Download the example config file\n", + "wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml', CONFIG_DIR)\n", + "\n", + "# Load the example config file so we can start editing it\n", + "CONFIG_EVAL_PATH = os.path.join(CONFIG_DIR, \"megatron_gpt_peft_eval_config.yaml\")\n", + "config_eval = OmegaConf.load(CONFIG_EVAL_PATH)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "36c58c18", + "metadata": {}, + "source": [ + "We are going to modify the `config_eval` object that we created above. We will set the base language model as the `345m` model we downloaded earlier.\n", + "\n", + "Additionally, we will also set the `model.peft.restore_from_path` with the lora model we just trained. For the tutorial we will just use the validation data for inference as well." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "64a4e71a", + "metadata": {}, + "outputs": [], + "source": [ + "config_eval.model.restore_from_path=\"megatron_gpt_345m.nemo\"\n", + "config_eval.model.peft.restore_from_path=\"./training_info/checkpoints/lora_example_tuning.nemo\"\n", + "config_eval.model.data.test_ds.file_names=[f\"{SQUAD_DIR}/squad_short_val.jsonl\"]\n", + "config_eval.model.data.test_ds.names=[\"test_set\"]\n", + "config_eval.model.data.test_ds.global_batch_size=1\n", + "config_eval.model.data.test_ds.micro_batch_size=1\n", + "config_eval.model.data.test_ds.tokens_to_generate=30\n", + "config_eval.inference.greedy=True" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "d8ace8f9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using 16bit None Automatic Mixed Precision (AMP)\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "strategy_eval = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", + "plugins_eval = [TorchElasticEnvironment()]\n", + "# notice the plugins, strategy and config.trainer args are the same as is training portion of this tutorial\n", + "# we just create a new object with no overlap from the training section of this tutorial\n", + "trainer_eval = pl.Trainer(plugins= plugins_eval, strategy=strategy_eval, **config_eval.trainer) " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e745ac5e", + "metadata": {}, + "source": [ + "The `config_eval` object is the hydra config at \"inference/test time\". This means it should contain information relevant for inference/test time. But we still need to know some properties that were set at training time. For example, was the training done with `BOS` enabled or not? And other model specific attributes.\n", + "\n", + "So we extract the `peft_model_cfg` from the '.nemo' file of the lora model we just trained." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e04a2201", + "metadata": {}, + "outputs": [], + "source": [ + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel\n", + "peft_model_cfg = MegatronGPTPEFTModel.restore_from(\n", + " restore_path=\"./training_info/checkpoints/lora_example_tuning.nemo\", trainer=trainer_eval, return_config=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "79a17ac7", + "metadata": {}, + "source": [ + "We modify `peft_model_cfg` to include attributes from the `config_eval` that are specific to inference time." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "0e0a17aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'file_names': ['data/SQuAD/squad_short_val.jsonl'], 'names': ['test_set'], 'global_batch_size': 1, 'micro_batch_size': 1, 'shuffle': False, 'num_workers': 0, 'pin_memory': True, 'max_seq_length': 2048, 'min_seq_length': 1, 'drop_last': False, 'context_key': '${data.train_ds.context_key}', 'label_key': '${data.train_ds.label_key}', 'add_eos': '${data.train_ds.add_eos}', 'add_sep': '${data.train_ds.add_sep}', 'add_bos': '${data.train_ds.add_bos}', 'separate_prompt_and_response_with_newline': '${data.train_ds.separate_prompt_and_response_with_newline}', 'write_predictions_to_file': False, 'output_file_path_prefix': None, 'truncation_field': '${data.train_ds.truncation_field}', 'index_mapping_dir': None, 'prompt_template': '${data.train_ds.prompt_template}', 'tokens_to_generate': 30, 'metric': {'name': 'loss', 'average': None, 'num_classes': None}}\n" + ] + } + ], + "source": [ + "with open_dict(peft_model_cfg):\n", + " # update the model config of the trained model with params we want to set at inference time.\n", + " peft_model_cfg.precision = config_eval.trainer.precision\n", + " peft_model_cfg.data.test_ds = config_eval.model.data.test_ds\n", + " peft_model_cfg.activations_checkpoint_granularity = None\n", + " peft_model_cfg.activations_checkpoint_method = None\n", + "\n", + "with open_dict(config_eval):\n", + " # update the config with the trained model config\n", + " # required for hydra interpolation to work inside cfg.inference\n", + " config_eval.inference.add_BOS = peft_model_cfg.data.test_ds.add_bos\n", + " config_eval.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate\n", + "\n", + "print(peft_model_cfg.data.test_ds)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "132ae378", + "metadata": {}, + "source": [ + "Next, we load the base language model as well as the lora model we just trained." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "b19cd0ce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:11:11 megatron_init:232] Rank 0 has data parallel group: [0]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:235] All data parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:236] Ranks 0 has data parallel rank: 0\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:244] Rank 0 has model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:245] All model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:255] Rank 0 has tensor model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:259] All tensor model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:260] Rank 0 has tensor model parallel rank: 0\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:274] Rank 0 has pipeline model parallel group: [0]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:286] Rank 0 has embedding group: [0]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:292] All pipeline model parallel group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:293] Rank 0 has pipeline model parallel rank 0\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:294] All embedding group ranks: [[0]]\n", + "[NeMo I 2023-05-30 14:11:11 megatron_init:295] Rank 0 has embedding rank: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:11:11 modelPT:244] You tried to register an artifact under config key=tokenizer.vocab_file but an artifact for it has already been registered.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:11:11 tokenizer_utils:204] Getting Megatron tokenizer for pretrained model name: megatron-gpt-345m, custom vocab file: /tmp/tmp5lxz3z8d/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, and merges file: /tmp/tmp5lxz3z8d/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt\n", + "[NeMo I 2023-05-30 14:11:11 tokenizer_utils:130] Getting HuggingFace AutoTokenizer with pretrained_model_name: gpt2, vocab_file: /tmp/tmp5lxz3z8d/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, merges_files: /tmp/tmp5lxz3z8d/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt, special_tokens_dict: {}, and use_fast: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using sep_token, but it is not set yet.\n", + "Using cls_token, but it is not set yet.\n", + "Using pad_token, but it is not set yet.\n", + "Using mask_token, but it is not set yet.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:11:12 megatron_base_model:238] Padded vocab_size: 50304, original vocab_size: 50257, dummy tokens: 47.\n", + "[NeMo I 2023-05-30 14:11:12 build_model:143] > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 354871296\n", + "[NeMo I 2023-05-30 14:11:12 megatron_gpt_peft_models:56] Before adding PEFT params:\n", + " | Name | Type | Params\n", + " -----------------------------------\n", + " 0 | model | GPTModel | 354 M \n", + " -----------------------------------\n", + " 354 M Trainable params\n", + " 0 Non-trainable params\n", + " 354 M Total params\n", + " 1,419.485 Total estimated model params size (MB)\n", + "[NeMo I 2023-05-30 14:11:12 megatron_gpt_peft_models:65] After adding PEFT params:\n", + " | Name | Type | Params\n", + " -----------------------------------\n", + " 0 | model | GPTModel | 358 M \n", + " -----------------------------------\n", + " 358 M Trainable params\n", + " 0 Non-trainable params\n", + " 358 M Total params\n", + " 1,432.068 Total estimated model params size (MB)\n", + "[NeMo I 2023-05-30 14:11:13 nlp_overrides:491] Model MegatronGPTLoRAModel was successfully restored from /home/adithyare/NeMo/tutorials/nlp/megatron_gpt_345m.nemo.\n" + ] + } + ], + "source": [ + "save_restore_connector = PEFTSaveRestoreConnector(\n", + " peft_model_nemo_path=config_eval.model.peft.restore_from_path, peft_model_ckpt_path=None,\n", + ")\n", + "from nemo.collections.nlp.models.nlp_model import NLPModel\n", + "model_eval = MegatronGPTPEFTModel.restore_from(\n", + " restore_path=config_eval.model.restore_from_path,\n", + " trainer=trainer,\n", + " override_config_path=peft_model_cfg,\n", + " save_restore_connector=save_restore_connector,\n", + ")\n", + "\n", + "model_eval.freeze()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "012439d9", + "metadata": {}, + "source": [ + "Next, we prepare the dataset and the dataloader objects that the model will perform inference on." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "12c390f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:104] Building data files\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.706630\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:114] Loading data files\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:205] Loading data/SQuAD/squad_short_val.jsonl\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.001054\n", + "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:121] Computing global indices\n" + ] + } + ], + "source": [ + "_test_ds = model_eval._build_dataset(peft_model_cfg.data.test_ds, is_train=False)\n", + "from torch.utils.data import DataLoader\n", + "request_dl = DataLoader(\n", + " dataset=_test_ds[0],\n", + " batch_size=peft_model_cfg.data.test_ds.global_batch_size,\n", + " collate_fn=_test_ds[0].collate_fn,\n", + ")\n", + "config_inference = OmegaConf.to_container(config_eval.inference, resolve=True)\n", + "model_eval.set_inference_config(config_inference)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "76592a1e", + "metadata": {}, + "source": [ + "And finally, we call `trainer.predict` which triggers the inference process. The `response` object contains the outputs of the model." + ] + }, + { + "cell_type": "markdown", + "id": "733c172c", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5ba6a70c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "[NeMo W 2023-05-30 14:11:30 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, predict_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " rank_zero_warn(\n", + " \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ddcc3ce26ed74665a8429953b929a037", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Predicting: 100it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NeMo W 2023-05-30 14:11:30 nemo_logging:349] /home/adithyare/NeMo/nemo/collections/nlp/modules/common/text_generation_utils.py:306: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1678402379298/work/torch/csrc/utils/tensor_numpy.cpp:206.)\n", + " string_tensor = torch.as_tensor(\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the AFC at Super Bowl 50?\n", + "\n", + "Assistant: Denver Broncos\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the NFC at Super Bowl 50?\n", + "\n", + "Assistant: Denver Broncos\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Where did Super Bowl 50 take place?\n", + "\n", + "Assistant: Santa Clara, California\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team won Super Bowl 50?\n", + "\n", + "Assistant: Denver Broncos\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What color was used to emphasize the 50th anniversary of the Super Bowl?\n", + "\n", + "Assistant: gold\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the theme of Super Bowl 50?\n", + "\n", + "Assistant: \"Gold\"\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What day was the game played on?\n", + "\n", + "Assistant: February 7, 2016\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What is the AFC short for?\n", + "\n", + "Assistant: Super Bowl 50\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the theme of Super Bowl 50?\n", + "\n", + "Assistant: \"Gold\"\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What does AFC stand for?\n", + "\n", + "Assistant: Super Bowl L\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What day was the Super Bowl played on?\n", + "\n", + "Assistant: February 7, 2016\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Who won Super Bowl 50?\n", + "\n", + "Assistant: Denver Broncos\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What venue did Super Bowl 50 take place in?\n", + "\n", + "Assistant: Levi's Stadium\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What city did Super Bowl 50 take place in?\n", + "\n", + "Assistant: San Francisco\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:If Roman numerals were used, what would Super Bowl 50 have been called?\n", + "\n", + "Assistant: Super Bowl L\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Super Bowl 50 decided the NFL champion for what season?\n", + "\n", + "Assistant: 2015\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What year did the Denver Broncos secure a Super Bowl title for the third time?\n", + "\n", + "Assistant: 2015\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What city did Super Bowl 50 take place in?\n", + "\n", + "Assistant: San Francisco\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What stadium did Super Bowl 50 take place in?\n", + "\n", + "Assistant: Levi's Stadium\n", + "\n", + "\n", + "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the final score of Super Bowl 50? \n", + "\n", + "Assistant: 24–10\n", + "\n", + "\n" + ] + } + ], + "source": [ + "response = trainer.predict(model_eval, request_dl)\n", + "for batch in response:\n", + " for s in batch['sentences']:\n", + " print(f\"{s}\\n\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index 0c0b8163622c..90dacd5f6b8d 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -58,7 +58,7 @@ "For detailed parameter setting and execution of speaker diarization, refer to this [Diarization Inference](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb) tutorial.\n", "\n", "\n", - "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", + "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", "\n", "### Speaker diarization in ASR pipeline\n", "\n", @@ -196,7 +196,7 @@ "DOMAIN_TYPE = \"meeting\" # Can be meeting or telephonic based on domain type of the audio file\n", "CONFIG_FILE_NAME = f\"diar_infer_{DOMAIN_TYPE}.yaml\"\n", "\n", - "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", + "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", "\n", "if not os.path.exists(os.path.join(data_dir,CONFIG_FILE_NAME)):\n", " CONFIG = wget.download(CONFIG_URL, data_dir)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 93ff3ed97b2e..6caaa35e0765 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -62,9 +62,9 @@ "* **with oracle VAD**: use ground-truth speech/non-speech labels. \n", "* **with system VAD**: use speech/non-speech labels generated by an actual VAD model. \n", "\n", - "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", + "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", "\n", - "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", + "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", "\n", "For demonstration purposes we would be using simulated audio from [an4 dataset](http://www.speech.cs.cmu.edu/databases/an4/)." ] @@ -140,7 +140,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." + " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." ] }, { @@ -191,7 +191,7 @@ "MSDD models employ pairwise (two-speaker) unit-model for both training and inference. While training, pairwise model is trained on data samples with two speakers or two-speaker subset from data samples with more than two speakers. \n", "In inference mode, we retrieve all possible pairs from the estimated number of speakers and average the results. For example, if there are four speakers `(A, B, C, D)`, we extract 6 pairs: `(A,B)`, `(A,C)`, `(A,D)`, `(B,C)`, `(B,D)`, `(C,D)`. Finally, the sigmoid outputs are averaged. In this way, MSDD can deal with flexible number of speakers using a pairwise model. \n", "\n", - "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " + "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " ] }, { @@ -399,7 +399,7 @@ "from omegaconf import OmegaConf\n", "MODEL_CONFIG = os.path.join(data_dir,'diar_infer_telephonic.yaml')\n", "if not os.path.exists(MODEL_CONFIG):\n", - " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", + " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", " MODEL_CONFIG = wget.download(config_url,data_dir)\n", "\n", "config = OmegaConf.load(MODEL_CONFIG)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index ab5cab58bc69..7ecbc63af67d 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -197,9 +197,9 @@ "\n", "- Please skip this section and go directly to [Prepare Training data for MSDD](#Prepare-Training-data-for-MSDD) section if you have your own speaker diarization dataset. \n", "\n", - "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", + "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", "\n", - "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" + "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" ] }, { @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index f956334b892c..2cd3ded24e8b 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", @@ -58,7 +58,7 @@ "source": [ "In this tutorial, we shall first train these embeddings on speaker-related datasets, and then get speaker embeddings from a pretrained network for a new dataset. Since Google Colab has very slow read-write speeds, I'll be demonstrating this tutorial using [an4](http://www.speech.cs.cmu.edu/databases/an4/). \n", "\n", - "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/r1.19.0/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " + "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/main/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " ] }, { @@ -85,7 +85,7 @@ "# Download the dataset. This will take a few moments...\n", "print(\"******\")\n", "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n", - " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz' # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n", + " an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n", " an4_path = wget.download(an4_url, data_dir)\n", " print(f\"Dataset downloaded at: {an4_path}\")\n", "else:\n", @@ -276,7 +276,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." + "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." ] }, { @@ -760,7 +760,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" + "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" ] }, { diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 5f1ffd27ea05..770d9e7894c9 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", @@ -126,7 +126,7 @@ "id": "S1DZk-inQGTI" }, "source": [ - "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/ctc_segmentation/scripts)." + "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/main/tools/ctc_segmentation/scripts)." ] }, { @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", @@ -699,7 +699,7 @@ "source": [ "# Next Steps\n", "\n", - "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", + "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/main/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", "- Try Audio-based normalization tool." ] }, diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index 8264854dfd59..2841b1df62d1 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -326,7 +326,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"multispeaker_data_analysis.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speaker_tasks/multispeaker_data_analysis.py\n", + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speaker_tasks/multispeaker_data_analysis.py\n", "\n", "from multispeaker_data_analysis import run_multispeaker_data_analysis\n", "\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index e113af5565ae..3653b9e3ed8d 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -700,7 +700,7 @@ "## Resources\n", "\n", "- For more information about the Aligner architecture, check out the [RAD-TTS Aligner paper](https://arxiv.org/abs/2108.10447).\n", - "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/aligner_heteronym_disambiguation.py)." + "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/aligner_heteronym_disambiguation.py)." ] }, { diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 0f501f89a90e..647242294197 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] @@ -601,9 +601,9 @@ "source": [ "## Additional NeMo Resources\n", "\n", - "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_Finetuning.ipynb).\n", + "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_Finetuning.ipynb).\n", "\n", - "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." + "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." ] } ], diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index 95bc3805030c..81d817f11016 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "ea49c0e5", + "id": "ed07e3c2", "metadata": {}, "source": [ "# FastPitch Adapter Finetuning\n", @@ -16,14 +16,14 @@ "2. **Fine-tune HiFiGAN on adaptation data**: fine-tune a vocoder for the fine-tuned multi-speaker FastPitch\n", "* Dataset Preparation: extract mel-spectrograms from fine-tuned FastPitch.\n", "* Training: fine-tune HiFiGAN with fine-tuned adaptation data.\n", - "3. **Inference**: generate speech from adpated FastPitch\n", + "3. **Inference**: generate speech from adapted FastPitch\n", "* Load Model: load pre-trained multi-speaker FastPitch with **fine-tuned adapters**.\n", "* Output Audio: generate audio files." ] }, { "cell_type": "markdown", - "id": "37259555", + "id": "772e7404", "metadata": {}, "source": [ "# License\n", @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d61cbea5", + "id": "8f799aa0", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", - "# BRANCH = 'r1.19.0'\n", + "# BRANCH = 'r1.20.0'\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", @@ -73,7 +73,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fef9aba9", + "id": "0a4d3371", "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49bc38ab", + "id": "25d94e3a", "metadata": {}, "outputs": [], "source": [ @@ -95,7 +95,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9459f9dc", + "id": "79cb9932", "metadata": {}, "outputs": [], "source": [ @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb26f54d", + "id": "ec7fed4e", "metadata": {}, "outputs": [], "source": [ @@ -131,7 +131,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12b28329", + "id": "f815deff", "metadata": {}, "outputs": [], "source": [ @@ -149,7 +149,7 @@ }, { "cell_type": "markdown", - "id": "30996769", + "id": "539e8f0d", "metadata": {}, "source": [ "# 1. Fine-tune FastPitch on adaptation data" @@ -157,17 +157,17 @@ }, { "cell_type": "markdown", - "id": "2f5f5945", + "id": "270ed53f", "metadata": {}, "source": [ "## a. Data Preparation\n", - "For our tutorial, we use small part of VCTK dataset with a new target speaker (p267). Usually, the audios should have total duration more than 15 mintues." + "For our tutorial, we use small part of VCTK dataset with a new target speaker (p267). Usually, the audios should have total duration more than 15 minutes." ] }, { "cell_type": "code", "execution_count": null, - "id": "8047f988", + "id": "21ce4a34", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b8242769", + "id": "2d5edbe5", "metadata": {}, "outputs": [], "source": [ @@ -188,7 +188,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79cf8539", + "id": "c1de2249", "metadata": {}, "outputs": [], "source": [ @@ -198,7 +198,7 @@ }, { "cell_type": "markdown", - "id": "35c3b97b", + "id": "e657c830", "metadata": {}, "source": [ "## b. Preprocessing" @@ -206,17 +206,17 @@ }, { "cell_type": "markdown", - "id": "ba3a7c3a", + "id": "4d0076d4", "metadata": {}, "source": [ "### Add absolute file path in manifest\n", - "We use absoluate path for audio_filepath to get the audio during training." + "We use absolute path for audio_filepath to get the audio during training." ] }, { "cell_type": "code", "execution_count": null, - "id": "8bc485b5", + "id": "7ccb5fb6", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f9cb8ef5", + "id": "23dc1ba6", "metadata": {}, "outputs": [], "source": [ @@ -241,18 +241,18 @@ }, { "cell_type": "markdown", - "id": "f92054d5", + "id": "b852072b", "metadata": {}, "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { "cell_type": "code", "execution_count": null, - "id": "0adc618b", + "id": "f6bdd226", "metadata": {}, "outputs": [], "source": [ @@ -267,7 +267,7 @@ }, { "cell_type": "markdown", - "id": "96dd5fe1", + "id": "fdae4e4e", "metadata": {}, "source": [ "After running the above command line, you will observe a new folder NeMoTTS_sup_data/pitch and printouts of pitch statistics like below. Specify these values to the FastPitch training configurations. We will be there in the following section.\n", @@ -280,7 +280,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23703c76", + "id": "ac8fae15", "metadata": {}, "outputs": [], "source": [ @@ -295,7 +295,7 @@ }, { "cell_type": "markdown", - "id": "7c70e5db", + "id": "c9f98c86", "metadata": {}, "source": [ "## c. Model Setting\n", @@ -305,7 +305,7 @@ { "cell_type": "code", "execution_count": null, - "id": "439f2f82", + "id": "fd8c66fb", "metadata": {}, "outputs": [], "source": [ @@ -318,7 +318,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30f865cb", + "id": "ff535c8f", "metadata": {}, "outputs": [], "source": [ @@ -350,7 +350,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e92910b5", + "id": "4f457111", "metadata": {}, "outputs": [], "source": [ @@ -360,7 +360,7 @@ }, { "cell_type": "markdown", - "id": "7f03219f", + "id": "ef40def3", "metadata": {}, "source": [ "### Precompute Speaker Embedding\n", @@ -370,7 +370,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c2a35241", + "id": "30664bcb", "metadata": {}, "outputs": [], "source": [ @@ -405,7 +405,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5fa1b309", + "id": "43001c75", "metadata": {}, "outputs": [], "source": [ @@ -417,7 +417,7 @@ }, { "cell_type": "markdown", - "id": "3b77e95f", + "id": "42915e02", "metadata": {}, "source": [ "## d. Training" @@ -426,21 +426,21 @@ { "cell_type": "code", "execution_count": null, - "id": "9e8c3740", + "id": "884bc2d0", "metadata": {}, "outputs": [], "source": [ "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", - "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=175.48513793945312\n", "PITCH_STD=42.3786735534668" ] }, { "cell_type": "markdown", - "id": "19bb6d8b", + "id": "6f04fc86", "metadata": {}, "source": [ "### Important notes\n", @@ -451,13 +451,16 @@ "* Other optional arguments based on your preference:\n", " * batch_size\n", " * exp_manager\n", - " * trainer" + " * trainer\n", + " * model.unfreeze_aligner=true\n", + " * model.unfreeze_duration_predictor=true\n", + " * model.unfreeze_pitch_predictor=true" ] }, { "cell_type": "code", "execution_count": null, - "id": "8c8cbea2", + "id": "7ae8383a", "metadata": {}, "outputs": [], "source": [ @@ -476,9 +479,11 @@ "~model.speaker_encoder.gst_module \\\n", "model.train_ds.dataloader_params.batch_size=8 \\\n", "model.validation_ds.dataloader_params.batch_size=8 \\\n", + "+model.text_tokenizer.add_blank_at=True \\\n", "model.optim.name=adam \\\n", - "model.optim.lr=2e-4 \\\n", - "~model.optim.sched \\\n", + "model.optim.lr=1e-3 \\\n", + "model.optim.sched.warmup_steps=0 \\\n", + "+model.optim.sched.min_lr=1e-4 \\\n", "exp_manager.exp_dir={logs_dir} \\\n", "+exp_manager.create_wandb_logger=True \\\n", "+exp_manager.wandb_logger_kwargs.name=\"tutorial-FastPitch-finetune-adaptation\" \\\n", @@ -495,7 +500,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fe5c7b2f", + "id": "39d3074c", "metadata": {}, "outputs": [], "source": [ @@ -510,7 +515,7 @@ }, { "cell_type": "markdown", - "id": "75856d0e", + "id": "9e9a1f45", "metadata": {}, "source": [ "# 3. Fine-tune HiFiGAN on adaptation data" @@ -518,7 +523,7 @@ }, { "cell_type": "markdown", - "id": "3444698f", + "id": "deec135f", "metadata": {}, "source": [ "## a. Dataset Preparation\n", @@ -528,7 +533,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bb2fd64d", + "id": "1aecaa68", "metadata": {}, "outputs": [], "source": [ @@ -554,7 +559,7 @@ { "cell_type": "code", "execution_count": null, - "id": "da69cb66", + "id": "6a153ea0", "metadata": {}, "outputs": [], "source": [ @@ -564,7 +569,7 @@ }, { "cell_type": "markdown", - "id": "fa2cbb02", + "id": "b05cd550", "metadata": {}, "source": [ "## b. Training" @@ -573,7 +578,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ffdce5d5", + "id": "e5d5f281", "metadata": {}, "outputs": [], "source": [ @@ -601,7 +606,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9e6376cf", + "id": "9c1c42f3", "metadata": {}, "outputs": [], "source": [ @@ -613,7 +618,7 @@ }, { "cell_type": "markdown", - "id": "e5076e51", + "id": "0665ac78", "metadata": {}, "source": [ "# 4. Inference" @@ -622,7 +627,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52358549", + "id": "5f4afb24", "metadata": {}, "outputs": [], "source": [ @@ -633,7 +638,7 @@ }, { "cell_type": "markdown", - "id": "9e96ee13", + "id": "0d9ff309", "metadata": {}, "source": [ "## a. Load Model" @@ -642,17 +647,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2cb5d524", - "metadata": {}, - "outputs": [], - "source": [ - "wave_model = WaveformFeaturizer(sample_rate=sample_rate)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "32dbd30c", + "id": "81e4dee0", "metadata": {}, "outputs": [], "source": [ @@ -668,7 +663,7 @@ { "cell_type": "code", "execution_count": null, - "id": "74a7ad03", + "id": "1eaef8be", "metadata": {}, "outputs": [], "source": [ @@ -678,7 +673,7 @@ }, { "cell_type": "markdown", - "id": "4f882975", + "id": "837bdbab", "metadata": {}, "source": [ "## b. Output Audio" @@ -687,26 +682,14 @@ { "cell_type": "code", "execution_count": null, - "id": "2178a8ef", + "id": "fef139cb", "metadata": {}, "outputs": [], "source": [ - "def gt_spectrogram(audio_path, wave_model, spec_gen_model):\n", - " features = wave_model.process(audio_path, trim=False)\n", - " audio, audio_length = features, torch.tensor(features.shape[0]).long()\n", - " audio = audio.unsqueeze(0).to(device=spec_gen_model.device)\n", - " audio_length = audio_length.unsqueeze(0).to(device=spec_gen_model.device)\n", - " with torch.no_grad():\n", - " spectrogram, spec_len = spec_gen_model.preprocessor(input_signal=audio, length=audio_length)\n", - " return spectrogram, spec_len\n", - "\n", - "def gen_spectrogram(text, spec_gen_model, reference_spec, reference_spec_lens):\n", + "def gen_spectrogram(text, spec_gen_model):\n", " parsed = spec_gen_model.parse(text)\n", " with torch.no_grad(): \n", - " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed, \n", - " reference_spec=reference_spec, \n", - " reference_spec_lens=reference_spec_lens)\n", - "\n", + " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed)\n", " return spectrogram\n", " \n", "def synth_audio(vocoder_model, spectrogram): \n", @@ -720,16 +703,10 @@ { "cell_type": "code", "execution_count": null, - "id": "766154e3", + "id": "b98ac280", "metadata": {}, "outputs": [], "source": [ - "# Reference Audio\n", - "with open(train_manifest, \"r\") as f:\n", - " for i, line in enumerate(f):\n", - " reference_record = json.loads(line)\n", - " break\n", - " \n", "# Validatation Audio\n", "num_val = 3\n", "val_records = []\n", @@ -743,27 +720,19 @@ { "cell_type": "code", "execution_count": null, - "id": "dfa71ca6", + "id": "b17446f9", "metadata": {}, "outputs": [], "source": [ "for i, val_record in enumerate(val_records):\n", - " reference_spec, reference_spec_lens = gt_spectrogram(reference_record['audio_filepath'], wave_model, spec_model)\n", - " reference_spec = reference_spec.to(spec_model.device)\n", - " spec_pred = gen_spectrogram(val_record['text'], spec_model,\n", - " reference_spec=reference_spec, \n", - " reference_spec_lens=reference_spec_lens)\n", - "\n", + " spec_pred = gen_spectrogram(val_record['text'], spec_model)\n", " audio_gen = synth_audio(vocoder_model, spec_pred)\n", - " \n", - " audio_ref = ipd.Audio(reference_record['audio_filepath'], rate=sample_rate)\n", + "\n", " audio_gt = ipd.Audio(val_record['audio_filepath'], rate=sample_rate)\n", " audio_gen = ipd.Audio(audio_gen, rate=sample_rate)\n", " \n", " print(\"------\")\n", " print(f\"Text: {val_record['text']}\")\n", - " print('Reference Audio')\n", - " ipd.display(audio_ref)\n", " print('Ground Truth Audio')\n", " ipd.display(audio_gt)\n", " print('Synthesized Audio')\n", @@ -775,18 +744,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51d9d176", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Pretraind FastPitch: {pretrained_fastpitch_checkpoint}\")\n", - "print(f\"Finetuned Adapter: {finetuned_adapter_checkpoint}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6180a7d2", + "id": "f8f525d1", "metadata": {}, "outputs": [], "source": [ @@ -797,7 +755,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5b33263b", + "id": "66e8ab7d", "metadata": {}, "outputs": [], "source": [] @@ -819,7 +777,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.13" } }, "nbformat": 4, diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 6685eca56251..6ca7a8975b2d 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -134,10 +134,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFi-GAN\n", - "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." + "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -780,7 +780,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index cf94862263cd..dd5ed0983a58 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -627,7 +627,7 @@ "id": "843674e7", "metadata": {}, "source": [ - "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/conf/hifigan/hifigan.yaml):\n", + "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/hifigan/hifigan.yaml):\n", "\n", "```bash\n", "python examples/tts/hifigan_finetune.py \\\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index 512ec8249694..2a05c8dace35 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -133,10 +133,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFiGAN\n", - "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." + "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -649,7 +649,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 403faa965534..9eac34c499d8 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index a67744ef0f58..ba85e6025978 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -56,7 +56,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "# BRANCH = 'r1.19.0'\n", + "# BRANCH = 'r1.20.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", @@ -195,8 +195,8 @@ "id": "cae8567d", "metadata": {}, "source": [ - "### Add absoluate audio path in manifest\n", - "We use absoluate path for `audio_filepath` to get the audio during training." + "### Add absolute audio path in manifest\n", + "We use absolute path for `audio_filepath` to get the audio during training." ] }, { @@ -258,7 +258,7 @@ "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { @@ -337,7 +337,7 @@ "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", - "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=140.84278869628906\n", "PITCH_STD=65.4063949584961" ] @@ -727,7 +727,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.13" } }, "nbformat": 4, diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index 5a7f56dc201d..7ab6d1d58ac8 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index d4e1b1ba0678..b7acbd364507 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -202,7 +202,7 @@ "\n", "Let's see the `pitch_predicted` for a sample text. You can run the below cell. You should get an image that looks like the following for the input `Hey, what is my pitch?`:\n", "\n", - "\n", + "\n", "\n", "Notice that the last word `pitch` has an increase in pitch to stress that it is a question." ] diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index abdda3e16747..4c39e591f203 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index 497552a9ac43..dea4a8936053 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index 6185610fe4ab..be99231ec9eb 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -128,7 +128,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "During preprocessing, unambiguous dictionary words are converted to phonemes, while OOV and words with multiple entries are kept as graphemes. For example, **paracetamol** is missing from the phoneme dictionary, and **can** has 2 forms." @@ -186,7 +186,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "## Dictionary customization\n", @@ -212,7 +212,7 @@ "if os.path.exists(ipa_cmu_dict):\n", " ! rm $ipa_cmu_dict\n", "\n", - "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tts_dataset_files/$ipa_cmu_dict\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/$ipa_cmu_dict\n", "\n", "with open(ipa_cmu_dict, \"a\") as f:\n", " f.write(f\"PARACETAMOL {new_pronunciation}\\n\")\n", @@ -267,7 +267,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " " + " " ] }, { @@ -276,7 +276,7 @@ "source": [ "# Resources\n", "* [TTS pipeline customization](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-custom.html#tts-pipeline-configuration)\n", - "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", + "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", "* [G2P models in NeMo](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/g2p.html)\n", "* [Riva TTS documentation](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html)" ] diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index a696ee26e8a7..e9615b734ae4 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index dbe4e9362cc1..1cdc31117709 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -63,7 +63,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.19.0'\n", + "BRANCH = 'r1.20.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", From a3c96435d385bc6ed5ec105a0d0d2d066120b8a5 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 7 Aug 2023 16:57:44 -0700 Subject: [PATCH 159/512] fix the inductor issue in inference --- .../multimodal/models/multimodal_base_model.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index a953bbbbbaf9..646605960223 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -154,6 +154,9 @@ def load_from_checkpoint( cfg.unet_config.from_pretrained = None if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): cfg.first_stage_config.from_pretrained = None + ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod + if cfg.get('inductor'): + cfg.inductor = False ## Append some dummy configs that DB didn't support if not cfg.get('channels_last'): cfg.channels_last = True @@ -192,14 +195,6 @@ def load_from_checkpoint( new_state_dict[new_key] = checkpoint['state_dict'][key] checkpoint['state_dict'] = new_state_dict - # compatibility for inductor in inference - if not cfg.get('inductor', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', '', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - if cfg.get('megatron_amp_O2', False): new_state_dict = {} for key in checkpoint['state_dict'].keys(): From 7367a793b90f583bb32e59eb97ca52e6575a0453 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Tue, 8 Aug 2023 18:17:02 -0700 Subject: [PATCH 160/512] Fix inductor loading .nemo issue --- nemo/collections/nlp/parts/nlp_overrides.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index c043dd7ba7ab..43db0096bc3a 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -354,6 +354,14 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict + # compatibility for inductor in inference + if not conf.get('inductor', False): + new_state_dict = {} + for key in state_dict.keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = state_dict[key] + state_dict = new_state_dict + return state_dict def restore_from( From d5dabd978c86ab90e0104d2da72336d893806dcb Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Fri, 18 Aug 2023 13:03:36 -0700 Subject: [PATCH 161/512] Add Neva Model Support --- examples/multimodal/convert_ckpt_to_nemo.py | 10 + .../mllm/kosmos/conf/kosmos_config.yaml | 316 +++++ .../multimodal/mllm/kosmos/kosmos_pretrain.py | 87 ++ .../mllm/neva/conf/neva_config.yaml | 201 +++ .../mllm/neva/conf/neva_export.yaml | 18 + .../mllm/neva/conf/neva_finetune.yaml | 200 +++ .../mllm/neva/conf/neva_inference.yaml | 47 + .../multimodal/mllm/neva/conf/neva_peft.yaml | 206 +++ .../multimodal/mllm/neva/neva_evaluation.py | 324 +++++ examples/multimodal/mllm/neva/neva_export.py | 232 ++++ .../multimodal/mllm/neva/neva_finetune.py | 99 ++ examples/multimodal/mllm/neva/neva_peft.py | 100 ++ .../multimodal/mllm/neva/neva_pretrain.py | 90 ++ .../megatron_change_num_partitions.py | 5 + .../multimodal/data/common/data_samplers.py | 18 +- .../multimodal/data/common/webdataset.py | 71 +- .../multimodal/data/common/webdataset_s3.py | 237 ++++ .../multimodal/data/kosmos/__init__.py | 0 .../multimodal/data/kosmos/kosmos_dataset.py | 366 ++++++ .../multimodal/data/neva/__init__.py | 0 .../multimodal/data/neva/conversation.py | 401 ++++++ .../multimodal/data/neva/neva_dataset.py | 401 ++++++ .../models/clip/megatron_clip_models.py | 11 +- .../multimodal/models/kosmos/__init__.py | 0 .../models/kosmos/megatron_kosmos_model.py | 1153 +++++++++++++++++ .../models/kosmos/perceiver_resampler.py | 131 ++ .../multimodal/models/neva/neva_model.py | 909 +++++++++++++ .../models/neva/neva_peft_models.py | 60 + nemo/collections/multimodal/parts/utils.py | 50 +- .../language_modeling/megatron_base_model.py | 9 +- .../language_modeling/megatron_gpt_model.py | 6 +- .../megatron_gpt_peft_models.py | 13 +- .../megatron_gpt_sft_model.py | 5 +- nemo/collections/nlp/models/nlp_model.py | 2 +- .../megatron/adapters/parallel_adapters.py | 18 + .../modules/common/megatron/language_model.py | 11 +- .../modules/common/megatron/transformer.py | 18 + .../common/text_generation_strategy.py | 154 +++ .../modules/common/text_generation_utils.py | 86 +- .../vision/modules/vit/vit_backbone.py | 2 +- nemo/core/optim/optimizer_with_main_params.py | 3 +- 41 files changed, 6032 insertions(+), 38 deletions(-) create mode 100644 examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml create mode 100644 examples/multimodal/mllm/kosmos/kosmos_pretrain.py create mode 100644 examples/multimodal/mllm/neva/conf/neva_config.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_export.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_finetune.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_inference.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_peft.yaml create mode 100644 examples/multimodal/mllm/neva/neva_evaluation.py create mode 100644 examples/multimodal/mllm/neva/neva_export.py create mode 100644 examples/multimodal/mllm/neva/neva_finetune.py create mode 100644 examples/multimodal/mllm/neva/neva_peft.py create mode 100644 examples/multimodal/mllm/neva/neva_pretrain.py create mode 100644 nemo/collections/multimodal/data/common/webdataset_s3.py create mode 100644 nemo/collections/multimodal/data/kosmos/__init__.py create mode 100644 nemo/collections/multimodal/data/kosmos/kosmos_dataset.py create mode 100644 nemo/collections/multimodal/data/neva/__init__.py create mode 100644 nemo/collections/multimodal/data/neva/conversation.py create mode 100644 nemo/collections/multimodal/data/neva/neva_dataset.py create mode 100644 nemo/collections/multimodal/models/kosmos/__init__.py create mode 100644 nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py create mode 100644 nemo/collections/multimodal/models/kosmos/perceiver_resampler.py create mode 100644 nemo/collections/multimodal/models/neva/neva_model.py create mode 100644 nemo/collections/multimodal/models/neva/neva_peft_models.py diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index 3ae6cd1ba117..e5f1f628c430 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -36,6 +36,8 @@ from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector from nemo.utils import AppState, logging @@ -164,6 +166,14 @@ def convert(local_rank, rank, world_size, args): model = MegatronControlNet.load_from_checkpoint( checkpoint_path, hparams_file=args.hparams_file, trainer=trainer ) + elif args.model_type == 'kosmos': + model = MegatronKosmosModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'neva': + model = MegatronNevaModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) else: raise ValueError(f"Unrecognized model_type {args.model_type}.") diff --git a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml new file mode 100644 index 000000000000..177cc79261d2 --- /dev/null +++ b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml @@ -0,0 +1,316 @@ +name: nemo_kosmos +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_kosmos + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 256 # will use more micro batches to reach global batch size + + media_start_token: "" + media_end_token: "" + + enabled_data_types: ["text", "image_caption"] + per_type_micro_batch_size: + text: 1 + image_caption: 32 + image_interleaved: 1 + per_type_sequence_length: + text: 2048 # placeholder + image_caption: 128 + image_interleaved: 2048 + per_type_loss_weights: + text: 1 + image_caption: 1 + image_interleaved: 1 + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + num_media_latents: 64 # each media is encoded and sampled into `num_media_latents` LM embeddings + + llm: + precision: ${trainer.precision} + + # model architecture + encoder_seq_length: 2048 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: True # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + vision: + from_pretrained: /path/to/clip_model.nemo + precision: ${trainer.precision} + # vision configs + patch_dim: 14 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + class_token_length: 1 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 24 + hidden_size: 1024 + ffn_hidden_size: 4096 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add final layer norm + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + # TEXT DATA + # ================================================================================== + # Path to data must be specified by the user. + # Supports List, String and Dictionary + # List : can override from the CLI: "model.data.data_prefix=[.5,/raid/data/pile/my-gpt3_00_text_document,.5,/raid/data/pile/my-gpt3_01_text_document]", + # Or see example below: + # data_prefix: + # - .5 + # - /raid/data/pile/my-gpt3_00_text_document + # - .5 + # - /raid/data/pile/my-gpt3_01_text_document + # Dictionary: can override from CLI "model.data.data_prefix"={"train":[1.0, /path/to/data], "validation":/path/to/data, "test":/path/to/test} + # Or see example below: + # "model.data.data_prefix: {train:[1.0,/path/to/data], validation:[/path/to/data], test:[/path/to/test]}" + num_workers: 2 + data_prefix: ??? + index_mapping_dir: null # path to save index mapping .npy files, by default will save in the same location as data_prefix + data_impl: mmap + splits_string: 950,50,0 + seq_length: ${model.llm.encoder_seq_length} + skip_warmup: True + dataloader_type: single # cyclic + reset_position_ids: False # Reset position ids after end-of-document token + reset_attention_mask: False # Reset attention mask after end-of-document token + eod_mask_loss: False # Mask loss for the end of document tokens + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + no_seqlen_plus_one_input_tokens: False # Set to True to disable fetching (sequence length + 1) input tokens, instead get (sequence length) input tokens and mask the last token + pad_samples_to_global_batch_size: False # Set to True if you want to pad the last partial batch with -1's to equal global batch size + shuffle_documents: True # Set to False to disable documents shuffling. Sample index will still be shuffled + + # ================================================================================= + # MEDIA DATA + image_caption: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + image_interleaved: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + chunk_size: 190 + infinite_sampler: False + local_root_path: /datasets/coyo +# boto3: +# credentials_file: /lustre/fsw/joc/yuya/kosmos/s3_cred +# bucket: webdataset + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/kosmos/kosmos_pretrain.py b/examples/multimodal/mllm/kosmos/kosmos_pretrain.py new file mode 100644 index 000000000000..42fe67442633 --- /dev/null +++ b/examples/multimodal/mllm/kosmos/kosmos_pretrain.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="kosmos_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronKosmosModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml new file mode 100644 index 000000000000..90c737e64dd2 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -0,0 +1,201 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: True + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: True # only support True now + + + # LLM configs + # model architecture + encoder_seq_length: 2048 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 0 + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-3 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 140 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_export.yaml b/examples/multimodal/mllm/neva/conf/neva_export.yaml new file mode 100644 index 000000000000..9867b8bf7bfc --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_export.yaml @@ -0,0 +1,18 @@ +name: nemo_neva +infer: + out_dir: ./out/ + vision: + min_batch_size: 1 + opt_batch_size: 1 + max_batch_size: 1 + clip: openai/clip-vit-large-patch14 + llm: + tekit_path: /tekit + tensor_parallelism: 1 + max_input_len: 2048 + max_output_len: 1024 + max_batch_size: 1 + +model: + precision: bf16 + restore_from_path: /path/to/pretrained.nemo \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml new file mode 100644 index 000000000000..4fa4faf862e8 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml @@ -0,0 +1,200 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: False + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: True # only support True now + + + # LLM configs + # model architecture + encoder_seq_length: 2048 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 0 + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml new file mode 100644 index 000000000000..bb874ba1d44c --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_inference.yaml @@ -0,0 +1,47 @@ +inference: + greedy: True # Whether or not to use sampling ; use greedy decoding otherwise + top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: 0.2 # sampling temperature + add_BOS: False # add the bos token at the begining of the prompt + tokens_to_generate: 40 # The minimum length of the sequence to be generated. + all_probs: False # whether return the log prob for all the tokens in vocab + repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. + min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. + compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + + +trainer: + devices: 8 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: bf16 # 16, 32, or bf16 + +cluster_type: BCP +tensor_model_parallel_size: 8 +pipeline_model_parallel_size: 1 +pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model (0 for others) +neva_model_file: /pwd/nemo_multimodal/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo +checkpoint_dir: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/checkpoints # checkpoint file dir. This is used to load the PTL checkpoint generated during the Kosmos training +checkpoint_name: null #megatron_clip--val_loss=0.41-step=13499-consumed_samples=431904.0.ckpt # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/version_0/hparams.yaml # model configuration file, only used for PTL checkpoint loading +quality: 9 +toxicity: 0 +humor: 6 +creativity: 6 +violence: 0 +helpfulness: 6 +not_appropriate: 0 + +# MORE THAN ONE INFERENCE IS NOT RUNNING PROPERLY NEED TO CHECK WHY SECOND IS OUTPUTING JUNK N +prompts: + - " What is unusual about this Image?" + +server: False # whether launch the API server +port: 5555 # the port number for the inference server +web_server: False # whether launch the web inference server +share: False # whether create a public URL +username: test # user name for web client +password: test2 # password for web client +web_port: 9889 # the port number of the web server \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml new file mode 100644 index 000000000000..02dff49607cf --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -0,0 +1,206 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + replace_sampler_ddp: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: True # Set this to True in adapter learning! + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: True # only support True now + + peft: + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + + # LLM configs + # model architecture + encoder_seq_length: 2048 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 0 + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py new file mode 100644 index 000000000000..2f6de74379ef --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_evaluation.py @@ -0,0 +1,324 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import os +import re +import threading + +import torch +from omegaconf import OmegaConf, open_dict +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer +from torch.utils.data import DataLoader, Dataset + +import nemo.collections.multimodal.data.neva.conversation as conversation_lib +from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.modules.common.megatron_web_server import get_demo +from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer +from nemo.collections.nlp.modules.common.text_generation_utils import generate +from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +""" +This is the script to run GPT text generation. + +Usage: + Assume the model has TP=1, PP=1 in the following use cases. + a. run greedy inference from a nemo file: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.greedy=True \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + b. run greedy inference from a PTL checkpoint file: + python neva_evaluation.py \ + checkpoint_dir=PATH_TO_CHECKPOINT_FILE \ + checkpoint_name=CHECKPOINT_FILE_NAME \ + hparams_file=HPARAMS_FILE \ + inference.greedy=True \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + c. run top_p inference from a nemo file: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.greedy=False \ + inference.top_k=0 \ + inference.top_p=0.9 \ + inference.repetition_penalty=1.2 \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + d. If you don't need to generate tokens and need model to compute logprobs: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.compute_logprob=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[text to get logprob] + + e. Launch the inference server + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + server=True + + To send a request to the server, here is one example code: + ```python + import json + import requests + + batch_size = 8 + port_num = 5555 + headers = {"Content-Type": "application/json"} + + + def request_data(data): + resp = requests.put('http://localhost:{}/generate'.format(port_num), + data=json.dumps(data), + headers=headers) + sentences = resp.json()['sentences'] + return sentences + + + data = { + "sentences": [""] * batch_size, + "images" : [] * batch_size, + "tokens_to_generate": 300, + "temperature": 1.0, + "add_BOS": True, + "top_k": 0, + "top_p": 0.9, + "greedy": False, + "all_probs": False, + "repetition_penalty": 1.2, + "min_tokens_to_generate": 2, + } + + sentences = request_data(data) + ``` +""" + +if not torch.cuda.is_available(): + raise EnvironmentError("GPU is needed for the inference") + + +class RequestDataSet(Dataset): + def __init__(self, sentences): + super().__init__() + self.sentences = sentences + + def __len__(self,): + return len(self.sentences) + + def __getitem__(self, idx): + return self.sentences[idx] + + +@hydra_runner(config_path="conf", config_name="neva_inference") +def main(cfg) -> None: + + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=NLPDDPStrategy(), **cfg.trainer) + + if ( + cfg.tensor_model_parallel_size < 0 + or cfg.pipeline_model_parallel_size < 0 + or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 + ): + model_config = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, trainer=trainer, return_config=True, + ) + + with open_dict(cfg): + cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) + cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) + cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + if cfg.neva_model_file: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.neva_model_file): + save_restore_connector.model_extracted_dir = cfg.neva_model_file + + pretrained_cfg = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + OmegaConf.set_struct(pretrained_cfg, True) + with open_dict(pretrained_cfg): + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + pretrained_cfg.mm_cfg.llm.from_pretrained = None + # pretrained_cfg.mm_cfg.vision_encoder.from_pretrained = None + + model = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + save_restore_connector=save_restore_connector, + ) + + elif cfg.checkpoint_dir: + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + # TODO: This wont work properly (We need to set model.llm.from_pretrained model.vision.from_pretrained to nul) + model = MegatronNevaModel.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + + model.freeze() + + # Have to turn off activations_checkpoint_method for inference + model.model.module.language_model.encoder.activations_checkpoint_method = None + length_params: LengthParam = { + "max_length": cfg.inference.tokens_to_generate, + "min_length": cfg.inference.min_tokens_to_generate, + } + + sampling_params: SamplingParam = { + "use_greedy": cfg.inference.greedy, + "temperature": cfg.inference.temperature, + "top_k": cfg.inference.top_k, + "top_p": cfg.inference.top_p, + "repetition_penalty": cfg.inference.repetition_penalty, + "add_BOS": cfg.inference.add_BOS, + "all_probs": cfg.inference.all_probs, + "compute_logprob": cfg.inference.compute_logprob, + } + + original_input_prompts = OmegaConf.to_container(cfg.prompts) + modified_input_prompts = [] + conv = conversation_lib.conv_nvgpt.copy() + modified_input_prompts = [] + pattern = r"()" + for prompt in original_input_prompts: + conv.messages = [("User", prompt), ("Assistant", "")] + base_prompt = conv.get_prompt() + img_path = re.findall(pattern, base_prompt)[0] + edited_img_path = "" + img_path + "\n" + split_prompt = base_prompt.split(img_path) + final_prompt = split_prompt[0] + edited_img_path + split_prompt[1] + """ + if not final_prompt.endswith("\n"): + final_prompt = final_prompt + "\n" + labels=f"quality:{cfg.quality},toxicity:{cfg.toxicity},humor:{cfg.humor},creativity:{cfg.creativity},violence:{cfg.violence},helpfulness:{cfg.helpfulness},not_appropriate:{cfg.not_appropriate}" + final_prompt = final_prompt + "" + labels + "\n" + """ + modified_input_prompts.append(final_prompt) + + response = model.generate( + input_prompts=modified_input_prompts, length_params=length_params, sampling_params=sampling_params + ) + if torch.cuda.current_device() == 0: + print(modified_input_prompts) + print("***************************") + print(response) + print("***************************") + """ + # Second method of running text generation, call trainer.predict + ds = RequestDataSet(modified_input_prompts) + request_dl = DataLoader(dataset=ds, batch_size=2) + config = OmegaConf.to_container(cfg.inference) + model.set_inference_config(config) + response = trainer.predict(model, request_dl) + + print("***************************") + print(response) + print("***************************") + """ + # Third method of running text generation, use inference server + if cfg.server: + if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: + if cfg.web_server: + loop = asyncio.new_event_loop() + thread = threading.Thread( + target=get_demo, + daemon=True, + args=(cfg.share, cfg.username, cfg.password, cfg.port, cfg.web_port, loop), + ) + thread.start() + server = MegatronServer(model.cuda()) + server.run("0.0.0.0", port=cfg.port) + + while True: + choice = torch.cuda.LongTensor(1) + torch.distributed.broadcast(choice, 0) + if choice[0].item() == 0: + generate(model.cuda()) + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py new file mode 100644 index 000000000000..017af8deabb3 --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_export.py @@ -0,0 +1,232 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from omegaconf.omegaconf import OmegaConf +from polygraphy.backend.trt import CreateConfig, Profile, engine_from_network, network_from_onnx_path, save_engine +from polygraphy.logger import G_LOGGER + +from nemo.core.classes.exportable import Exportable +from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, LogitsType, NeuralType + +G_LOGGER.module_severity = G_LOGGER.EXTRA_VERBOSE +import logging +import os +import shutil +import subprocess +import tempfile +from pathlib import Path + +import torch +from omegaconf import OmegaConf +from transformers import CLIPImageProcessor, CLIPVisionModel + +from nemo.core.classes.exportable import Exportable +from nemo.core.connectors.save_restore_connector import SaveRestoreConnector +from nemo.core.neural_types import ChannelType, LogitsType, NeuralType + +LOGGER = logging.getLogger(__name__) + + +def build_vision_encoder(model_path, clip_path, precision, bs_min, bs_opt, bs_max, out_dir): + torch_precision = torch.bfloat16 if precision == 'bf16' else torch.float16 + + with tempfile.TemporaryDirectory() as temp: + LOGGER.info('Extracting model') + connector = SaveRestoreConnector() + connector._unpack_nemo_file(path2file=model_path, out_folder=temp) + config_yaml = os.path.join(temp, connector.model_config_yaml) + config = OmegaConf.load(config_yaml) + if config.tensor_model_parallel_size > 1: + path = os.path.join(temp, 'mp_rank_00', connector.model_weights_ckpt) + else: + path = os.path.join(temp, connector.model_weights_ckpt) + state_dict = connector._load_state_dict_from_disk(path) + LOGGER.info('Done') + + vision_connector = torch.nn.Linear(config.vision.hidden_size, config.llm.hidden_size, bias=True,) + vision_encoder = CLIPVisionModel.from_pretrained(clip_path, torch_dtype=torch_precision) + image_size = vision_encoder.vision_model.config.image_size + + new_state_dict = { + 'weight': state_dict['model.vision_connector.weight'], + 'bias': state_dict['model.vision_connector.bias'], + } + vision_connector.load_state_dict(new_state_dict) + vision_connector = vision_connector.to(dtype=torch_precision) + + class VisionEncoderWrapper(torch.nn.Module, Exportable): + def __init__(self, encoder, connector): + super().__init__() + self.encoder = encoder + self.connector = connector + + def forward(self, images): + vision_x = self.encoder(images, output_hidden_states=True) + vision_x = vision_x.hidden_states[-2] + vision_x = vision_x[:, 1:] + vision_x = self.connector(vision_x) + return vision_x + + # For onnx export + def input_example(self, max_batch=8): + sample = next(self.parameters()) + images = torch.randn(max_batch, 3, image_size, image_size, device=sample.device, dtype=sample.dtype) + return (images,) + + @property + def input_types(self): + return {'images': NeuralType(('B', 'C', 'H', 'W'), ChannelType())} + + @property + def output_types(self): + return {'features': NeuralType(('B', 'S', 'D'), LogitsType())} + + @property + def input_names(self): + return ['images'] + + @property + def output_names(self): + return ['features'] + + wrapper = VisionEncoderWrapper(vision_encoder, vision_connector) + + os.makedirs(f'./onnx/', exist_ok=True) + dynamic_axes = {'images': {0: 'B'}} + + LOGGER.info('Exporting ONNX') + wrapper.export(f'./onnx/vision_encoder.onnx', dynamic_axes=dynamic_axes, onnx_opset_version=17) + LOGGER.info('Done') + + bsmin_example = wrapper.input_example(max_batch=bs_min) + bsopt_example = wrapper.input_example(max_batch=bs_opt) + bsmax_example = wrapper.input_example(max_batch=bs_max) + + input_profile = {} + input_profile['images'] = [ + tuple(bsmin_example[0].shape), + tuple(bsopt_example[0].shape), + tuple(bsmax_example[0].shape), + ] + + p = Profile() + if input_profile: + for name, dims in input_profile.items(): + assert len(dims) == 3 + p.add(name, min=dims[0], opt=dims[1], max=dims[2]) + + LOGGER.info('Exporting TRT') + engine = engine_from_network( + network_from_onnx_path('./onnx/vision_encoder.onnx'), + config=CreateConfig(fp16=precision == 16, bf16=precision == 'bf16', profiles=[p],), + ) + save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) + + processor = CLIPImageProcessor.from_pretrained(clip_path) + processor.save_pretrained(out_dir) + LOGGER.info('Done') + + +def build_trtllm_engines( + tekit_path, in_file, out_dir, tensor_parallelism, precision, max_input_len, max_output_len, max_batch_size +): + with tempfile.TemporaryDirectory() as temp_dir: + gpt_example_path = f'{tekit_path}/examples/gpt' + build_precision = 'bfloat16' if precision == 'bf16' else 'float16' + LOGGER.info('Converting model weights') + convert_command = [ + 'python3', + 'nemo_ckpt_convert.py', + f'--out-dir={temp_dir}', + f'--in-file={in_file}', + f'--tensor-parallelism={tensor_parallelism}', + f'--storage-type={build_precision}', + '--verbose', + ] + convert_process = subprocess.Popen( + convert_command, cwd=gpt_example_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = convert_process.communicate() + print(stdout.decode()) + assert convert_process.returncode == 0, stderr.decode() + LOGGER.info('Done') + + shutil.copy(os.path.join(temp_dir, f'{tensor_parallelism}-gpu/tokenizer.model'), out_dir) + + LOGGER.info('Building TRT-LLM engines') + build_command = [ + 'python3', + 'build.py', + f'--model_dir={temp_dir}/{tensor_parallelism}-gpu', + f'--dtype={build_precision}', + f'--output_dir={os.path.abspath(out_dir)}', + f'--use_gpt_attention_plugin={build_precision}', + f'--world_size={tensor_parallelism}', + f'--max_input_len={max_input_len}', + f'--max_output_len={max_output_len}', + f'--max_batch_size={max_batch_size}', + f'--use_layernorm_plugin={build_precision}', + f'--use_gemm_plugin={build_precision}', + '--parallel_build', + '--embeddings_override', + '--enable_context_fmha', + '--remove_input_padding', + '--log_level=verbose', + ] + build_process = subprocess.Popen( + build_command, cwd=gpt_example_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = build_process.communicate() + print(stdout.decode()) + assert build_process.returncode == 0, stderr.decode() + LOGGER.info('Done') + os.remove(os.path.join(out_dir, 'model.cache')) + + +@hydra_runner(config_path='conf', config_name='neva_export') +def main(cfg): + precision = cfg.model.get('precision', 16) + assert precision != 32, 'FP32 export not supported' + + os.makedirs(cfg.infer.out_dir, exist_ok=True) + LOGGER.info('Building TRT-LLM engines') + build_trtllm_engines( + cfg.infer.llm.tekit_path, + cfg.model.restore_from_path, + cfg.infer.out_dir, + cfg.infer.llm.get('tensor_parallelism', 1), + precision, + cfg.infer.llm.get('max_input_len', 2048), + cfg.infer.llm.get('max_output_len', 2048), + cfg.infer.llm.get('max_batch_size', 1), + ) + + LOGGER.info('Building vision TRT engine') + build_vision_encoder( + cfg.model.restore_from_path, + cfg.infer.vision.clip, + precision, + cfg.infer.vision.get('min_batch_size', 1), + cfg.infer.vision.get('opt_batch_size', 1), + cfg.infer.vision.get('max_batch_size', 1), + cfg.infer.out_dir, + ) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/neva_finetune.py b/examples/multimodal/mllm/neva/neva_finetune.py new file mode 100644 index 000000000000..878ad3679baf --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_finetune.py @@ -0,0 +1,99 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_finetune") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + if cfg.model.restore_from_pretrained is None: + model = MegatronNevaModel(cfg.model, trainer) + else: + model = MegatronNevaModel.restore_from( + restore_path=cfg.model.restore_from_pretrained, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py new file mode 100644 index 000000000000..b92070e91a7d --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_peft.py @@ -0,0 +1,100 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaLoRAModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +# mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_peft") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + if cfg.model.restore_from_pretrained is None: + model = MegatronNevaLoRAModel(cfg.model, trainer) + else: + model = MegatronNevaLoRAModel.restore_from( + restore_path=cfg.model.restore_from_pretrained, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/neva_pretrain.py b/examples/multimodal/mllm/neva/neva_pretrain.py new file mode 100644 index 000000000000..517f096b41bd --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_pretrain.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + if cfg.trainer.precision in [16, 'bf16']: + scaler = None + if cfg.trainer.precision == 16: + scaler = GradScaler( + init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), + growth_interval=cfg.model.get('native_amp_growth_interval', 1000), + hysteresis=cfg.model.get('hysteresis', 2), + ) + if megatron_amp_O2 and not with_distributed_adam: + plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler)) + + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + exp_manager(trainer, cfg.exp_manager) + + # update resume from checkpoint found by exp_manager + if cfg.model.resume_from_checkpoint is not None: + resume_from_checkpoint = cfg.model.resume_from_checkpoint + else: + resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path + + logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}') + + trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronNevaModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index 72655089e0ee..dd0e23a3f7ee 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -1022,6 +1022,8 @@ def main(): save_restore_connector=save_restore_connector, return_config=True, ) + if "neva" in args.model_class: + tmp_cfg.mm_cfg.llm.from_pretrained = None # Force model onto CPU tmp_cfg, restore_dict = force_cpu_model(tmp_cfg) @@ -1269,6 +1271,8 @@ def main(): save_restore_connector=save_restore_connector, return_config=True, ) + if "neva" in args.model_class: + tmp_cfg.mm_cfg.llm.from_pretrained = None tmp_cfg, restore_dict = force_cpu_model(tmp_cfg) @@ -1277,6 +1281,7 @@ def main(): trainer=trainer, map_location=torch.device("cpu"), save_restore_connector=save_restore_connector, + override_config_path=tmp_cfg, ) model.to(dtype=dtype) diff --git a/nemo/collections/multimodal/data/common/data_samplers.py b/nemo/collections/multimodal/data/common/data_samplers.py index 7944087fee72..1cfd3d046a76 100644 --- a/nemo/collections/multimodal/data/common/data_samplers.py +++ b/nemo/collections/multimodal/data/common/data_samplers.py @@ -15,6 +15,7 @@ import torch from webdataset.pytorch import IterableDataset +from nemo.utils import logging class SharedEpoch: @@ -37,6 +38,7 @@ def __init__( consumed_samples: int, data_parallel_rank: int, data_parallel_size: int, + num_workers: int, drop_last: bool, data_sharding: bool, ): @@ -57,9 +59,13 @@ def __init__( self.urls = urls self.total_urls = total_urls self.chunk_size = chunk_size - self.consumed_samples = consumed_samples - assert consumed_samples % data_parallel_size == 0 - self.consumed_urls = consumed_samples // data_parallel_size // chunk_size * data_parallel_size + + if consumed_samples % data_parallel_size == 0: + logging.warning("Multimodal data resuming will be approximate!") + self.consumed_urls = ( + consumed_samples // (data_parallel_size * num_workers) // chunk_size * (data_parallel_size * num_workers) + ) + self.consumed_samples = self.consumed_urls * chunk_size self.data_parallel_rank = data_parallel_rank self.data_parallel_size = data_parallel_size @@ -76,12 +82,16 @@ def __len__(self): return (self.total_urls + self.data_parallel_size - 1) // self.data_parallel_size def __iter__(self): + worker_id, num_workers = 0, 1 worker_info = torch.utils.data.get_worker_info() if worker_info is not None: worker_id, num_workers = worker_info.id, worker_info.num_workers self.consumed_urls = ( - self.consumed_samples // self.data_parallel_size // self.chunk_size * self.data_parallel_size + self.consumed_samples + // (self.data_parallel_size * num_workers) + // self.chunk_size + * (self.data_parallel_size * num_workers) ) if self.drop_last or self.remaining_urls == 0: diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py index 5ce77d6f3ba7..785f834b34f9 100644 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -13,21 +13,24 @@ # limitations under the License. import io import itertools +import json import os import pickle import random import re from typing import Callable, List, Union +import boto3 import torch.distributed as dist import webdataset as wds from botocore.config import Config from PIL import Image -from webdataset import WebDataset +from webdataset import WebDataset, warn_and_continue from webdataset.filters import _shuffle from webdataset.utils import pytorch_worker_info from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch, WDSUrlsRandomSampler +from nemo.collections.multimodal.data.common.webdataset_s3 import WebDataset as WebDatasetS3 from nemo.core.classes import IterableDataset as NeMoIterableDataset from nemo.utils import logging @@ -124,7 +127,6 @@ def __init__( self.consumed_samples = consumed_samples self.local_root_path = self.webdata_cfg.local_root_path - logging.info(f'Read Webdataset locally. Data stores at {self.local_root_path}') if is_train: dataset_path = dataset_cfg.train.dataset_path self.augmentations = dataset_cfg.train.get("augmentations", None) @@ -134,6 +136,22 @@ def __init__( self.augmentations = dataset_cfg.validation.get("augmentations", None) self.filterings = dataset_cfg.validation.get("filterings", None) + if "boto3" in dataset_cfg: + logging.info(f'Init boto3 using credentials file at {dataset_cfg.boto3.credentials_file}') + self.use_boto3 = True + assert dataset_cfg.boto3.credentials_file is not None + with open(dataset_cfg.boto3.credentials_file) as fin: + self.credentials = json.load(fin) + config = Config(connect_timeout=30, signature_version="s3", retries={"max_attempts": 999999}) + self.s3 = boto3.client('s3', **self.credentials, config=config) + self.bucket = dataset_cfg.boto3.bucket + self.local_root_path = "" + else: + logging.info(f'Read Webdataset locally. Data stores at {self.local_root_path}') + self.use_boto3 = False + self.s3 = None + self.bucket = None + # wdinfo in a dict containing webdata information self.wdinfo = dict() if dataset_path[0].endswith(".pkl"): @@ -155,6 +173,19 @@ def __init__( train_info['chunk_size'] = self.webdata_cfg.get("chunk_size", 1000) train_info['total_key_count'] = train_info['chunk_size'] * len(train_info['tar_files']) + self.data_parallel_size = parallel_state.get_data_parallel_world_size() + chunk_size = train_info['chunk_size'] + + num_workers = dataset_cfg.get("num_workers") or 1 + self.consumed_urls = ( + consumed_samples + // (self.data_parallel_size * num_workers) + // chunk_size + * (self.data_parallel_size * num_workers) + ) + self.consumed_samples = self.consumed_urls * chunk_size + self.skip_ahead = consumed_samples - self.consumed_samples + decode_fn = pil_loader if decode_fn is None else decode_fn shards_train_list = train_info["tar_files"] num_shards = len(shards_train_list) @@ -169,8 +200,8 @@ def __init__( logging.info(f'Estimated {self.filterings.estimated_portion} will be remaining after filtering') train_info["total_key_count"] = int(train_info["total_key_count"] * self.filterings.estimated_portion) - from webdataset import warn_and_continue - + # WDS Dataset Pipeline + # DetShuffle -> Decode -> Filter -> Map -> Compose train_dataset, epoch = self._get_webdataset_and_epoch() train_dataset = train_dataset.compose(detshuffle2(bufsize=shuffle_buffer_size, epoch=epoch)) train_dataset = train_dataset.decode(decode_fn, handler=warn_and_continue) @@ -179,8 +210,7 @@ def __init__( if self.filterings.resolution is not None: train_dataset = train_dataset.select(filter_fn) - # traindataset.to_tuple("").map_tuple(fns) - train_dataset = train_dataset.map(map_fn) + train_dataset = train_dataset.map(map_fn, handler=warn_and_continue) if not isinstance(compose_fn, list): compose_fn = [compose_fn] for fn in compose_fn: @@ -220,20 +250,37 @@ def _get_webdataset_and_epoch(self): consumed_samples=self.consumed_samples, data_parallel_rank=parallel_state.get_data_parallel_rank(), data_parallel_size=parallel_state.get_data_parallel_world_size(), + num_workers=self.dataset_cfg.get("num_workers") or 1, drop_last=True, data_sharding=self.dataset_cfg.train.get("data_sharding", True), ) epoch = shards_train_list.epoch - train_dataset = WebDataset( - shards_train_list, handler=warn_and_continue, resampled=self.infinite_sampler or False, - ) + if self.use_boto3: + train_dataset = WebDatasetS3( + shards_train_list, + handler=warn_and_continue, + resampled=self.infinite_sampler or False, + load_from_object_store=self.use_boto3, + s3_client=self.s3, + s3_bucket_name=self.bucket, + ) + else: + train_dataset = WebDataset( + shards_train_list, handler=warn_and_continue, resampled=self.infinite_sampler or False, + ) return train_dataset, epoch def __iter__(self): - return self._dataset.__iter__() + ds_iter = self._dataset.__iter__() + while self.skip_ahead > 0 and not self.infinite_sampler: + try: + _ = next(ds_iter) + self.skip_ahead -= self.data_parallel_size * self.num_workers + except StopIteration: + self.skip_ahead = 0 + return ds_iter def __len__(self): - world_size = get_world_size() - return self._dataset.total_images // world_size + return self._dataset.total_images diff --git a/nemo/collections/multimodal/data/common/webdataset_s3.py b/nemo/collections/multimodal/data/common/webdataset_s3.py new file mode 100644 index 000000000000..d1cc7c9278e3 --- /dev/null +++ b/nemo/collections/multimodal/data/common/webdataset_s3.py @@ -0,0 +1,237 @@ +import io +import os +import sys +from urllib.parse import urlparse + +import webdataset.gopen as gopen_webdata +import yaml +from webdataset import cache, filters, shardlists +from webdataset.compat import FluidInterface +from webdataset.handlers import reraise_exception +from webdataset.pipeline import DataPipeline +from webdataset.pytorch import IterableDataset +from webdataset.tariterators import group_by_keys, tar_file_expander + +# Number of attempts to read aws objects. +_NUM_OBJECT_STORE_READ_ATTEMPTS = 10 + + +def gopen(url, mode="rb", bufsize=8192, **kw): + r"""Open the URL. + This uses the `gopen_schemes` dispatch table to dispatch based + on scheme. + Support for the following schemes is built-in: pipe, file, + http, https, sftp, ftps, scp. + When no scheme is given the url is treated as a file. + You can use the OPEN_VERBOSE argument to get info about + files being opened. + + This implementation is based on webdataset's gopen, + with the modification of supporting reading from s3 object_store: + https://webdataset.github.io/webdataset/api/webdataset/gopen.html#gopen + Args: + url (list[str]): the source URL + mode (str): the mode ("rb", "r") + bufsize (int): the buffer size + """ + global fallback_gopen + verbose = int(os.environ.get("GOPEN_VERBOSE", 0)) + if verbose: + print("GOPEN", url, gopen_webdata.info, file=sys.stderr) + + assert mode in ["rb", "wb"], mode + if url == "-": + if mode == "rb": + return sys.stdin.buffer + elif mode == "wb": + return sys.stdout.buffer + else: + raise ValueError(f"unknown mode {mode}") + + # If we specify 'object_store' in keyword arguments, + # then we would load from AWS. + # In this case, you also need to specify s3_client and s3_bucket_name + # in arguments. + if 'object_store' in kw and kw['object_store']: + # Load from object store + attempt = 0 + + while attempt < _NUM_OBJECT_STORE_READ_ATTEMPTS: + try: + s3_response_object = kw['s3_client'].get_object(Bucket=kw['s3_bucket_name'], Key=url) + object_content = s3_response_object['Body'].read() + + # This is a check to verify is the object is fully read. + full_read = s3_response_object['ContentLength'] == len(object_content) + if full_read: + return io.BytesIO(object_content) + else: + attempt += 1 + except Exception as e: # noqa + # If there is an exception (usually connectivity error or protocol error), read again + attempt += 1 + print(e) + print('Retrying tar file download, attempt {}'.format(attempt)) + continue + raise ConnectionError('Unable to read {} from PBSS. {} attempts tried.'.format(url, attempt)) + + # Append root path to the url if dataset is stored on local disk system + elif 'local_root_path' in kw and kw['local_root_path'] is not None: + url = os.path.join(kw['local_root_path'], url) + + # For all other gopen schemes, use the native webdataset gopen functions. + pr = urlparse(url) + if pr.scheme == "": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(url, mode, buffering=bufsize) + if pr.scheme == "file": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(pr.path, mode, buffering=bufsize) + handler = gopen_webdata.gopen_schemes["__default__"] + handler = gopen_webdata.gopen_schemes.get(pr.scheme, handler) + return handler(url, mode, bufsize, **kw) + + +def url_opener(data, handler=reraise_exception, **kw): + r"""Given a stream of url names (packaged in `dict(url=url)`), yield opened streams. + + Args: + data: Iterator of dictionaires containing url paths. + handler: Exception handler. + """ + for sample in data: + assert isinstance(sample, dict), sample + assert "url" in sample + url = sample["url"] + try: + stream = gopen(url, **kw) + sample.update(stream=stream) + yield sample + except Exception as exn: + exn.args = exn.args + (url,) + if handler(exn): + continue + else: + break + + +# Define a new tarfile_samples +def tarfile_samples( + src, + handler=reraise_exception, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, +): + r""" + Given an iterator of filenames, this function opens the URL streams + and groups data by keys. + + Args: + src: Iterator of data dictionaires containing URL names. + handler: Exception handler. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + streams = url_opener( + src, + handler=handler, + object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + files = tar_file_expander(streams, handler=handler) + samples = group_by_keys(files, handler=handler) + return samples + + +tarfile_to_samples = filters.pipelinefilter(tarfile_samples) + + +class WebDataset(DataPipeline, FluidInterface): + r"""Webdataset class modified to support loading from object store.""" + + def __init__( + self, + urls, + handler=reraise_exception, + resampled=False, + shardshuffle=None, + cache_size=-1, + cache_dir=None, + detshuffle=False, + nodesplitter=shardlists.single_node_only, + verbose=False, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, + ): + r""" + Args: + urls: An iterator containing a list of url names. + handler: Exception handler. + resampled: If true, sample shards from shard list with replacement. + shardshuffle: If true, shuffles the entire shard list. + cache_size: Size of cache. + cache_dir: Path to store cache. + detshuffle: Whether to use deterministic shuffling when shardshuffle is True. + nodesplitter: Function for splitting urls among nodes. + verbose: If True, prints logs. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + super().__init__() + if isinstance(urls, IterableDataset): + assert not resampled + self.append(urls) + elif isinstance(urls, str) and (urls.endswith(".yaml") or urls.endswith(".yml")): + with (open(urls)) as stream: + spec = yaml.safe_load(stream) + assert "datasets" in spec + self.append(shardlists.MultiShardSample(spec)) + elif isinstance(urls, dict): + assert "datasets" in urls + self.append(shardlists.MultiShardSample(urls)) + elif resampled: + self.append(shardlists.ResampledShards(urls)) + else: + self.append(shardlists.SimpleShardList(urls)) + self.append(nodesplitter) + self.append(shardlists.split_by_worker) + if shardshuffle is True: + shardshuffle = 100 + if shardshuffle is not None: + if detshuffle: + self.append(filters.detshuffle(shardshuffle)) + else: + self.append(filters.shuffle(shardshuffle)) + if cache_dir is None or cache_size == 0: + self.append( + tarfile_to_samples( + handler=handler, + load_from_object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + ) + else: + + # We dont use cache. + assert cache_size == -1 or cache_size > 0 + self.append( + cache.cached_tarfile_to_samples( + handler=handler, verbose=verbose, cache_size=cache_size, cache_dir=cache_dir, + ) + ) diff --git a/nemo/collections/multimodal/data/kosmos/__init__.py b/nemo/collections/multimodal/data/kosmos/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py b/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py new file mode 100644 index 000000000000..78b736ca36b8 --- /dev/null +++ b/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py @@ -0,0 +1,366 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import json +import re +from functools import partial +from typing import Any, Dict, List, Optional, Union + +import torch +from einops import rearrange +from PIL import Image +from torch.utils.data import Dataset, default_collate + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import _create_ltor_masks_and_position_ids +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +MIN_KB = 10 +MAX_NUM_IMAGES = 6 +Image.MAX_IMAGE_PIXELS = 933120000 +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() +_DATASET_TYPES = ["image_caption", "image_interleaved"] + + +def pil_loader(key, data): + r""" + Function to load an image. + If the image is corrupt, it returns a black image. + Args: + key: Image key. + data: Image data stream. + """ + extension = re.sub(r".*[.]", "", key) + if extension.lower() not in _IMG_EXTENSIONS: + return None + if len(data) // 1000 <= MIN_KB: + return None + + with io.BytesIO(data) as stream: + img = Image.open(stream) + img.load() + img = img.convert("RGB") + + return img + + +def tokenize_and_insert_media_tokens( + texts: Union[str, List[str]], + tokenizer: Any, + context_length: int, + num_media_tokens: int, + add_extra_token: int, + media_start_id: str, + media_end_id: str, +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) with media tokens inserted. + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize. + tokenizer : Any + A tokenizer to be used for tokenization. + context_length : int + The context length to be used for the output tensor. + num_media_tokens : int + The number of media latents to insert between media tokens. + + Returns + ------- + torch.LongTensor + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. + """ + assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." + + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + + # bos token is never used + # bos_id = tokenizer.bos_id + eos_id = tokenizer.eos_id + + all_tokens = [] + for text in texts: + tokens = tokenizer.text_to_ids(text) + media_positions = [i for i, x in enumerate(tokens) if x == media_start_id] + for media_pos in media_positions[::-1]: + tokens[media_pos : media_pos + 1] = [media_start_id] + [-1] * num_media_tokens + [media_end_id] + tokens = tokens + [eos_id] + all_tokens.append(tokens) + + # truncate and padding + result = torch.zeros(len(all_tokens), context_length + add_extra_token, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length + add_extra_token: + tokens = tokens[: context_length + add_extra_token] # Truncate + result[i, : len(tokens)] = torch.tensor(tokens) + + if texts_is_str: + result = result[0] + return result + + +def get_preprocess_fns( + model_cfg, data_type, tokenizer=None, is_train=True, add_extra_token=1, media_start_id=None, media_end_id=None, +): + assert ( + media_start_id is not None and media_end_id is not None + ), "`media_start_id` and `media_end_id` should be provided." + + # Define transforms + img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) + img_mean = model_cfg.vision.get("img_mean") + img_std = model_cfg.vision.get("img_std") + img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) + + text_transform = lambda x: x + if tokenizer is not None: + text_transform = partial( + tokenize_and_insert_media_tokens, + tokenizer=tokenizer, + context_length=model_cfg.per_type_sequence_length[data_type], + num_media_tokens=model_cfg.num_media_latents, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + else: + raise ValueError("tokenizer should not be None here!") + + return img_transform, text_transform + + +def transform_fn_for_image_caption(sample, img_transform, text_transform, media_start_token): + image, text = sample["jpg"], sample["txt"] + caption_template = lambda x: f"{media_start_token}{x.strip()}" + text = caption_template(text) + return img_transform(image), text_transform(text) + + +def transform_fn_for_image_interleaved(sample, img_transform, text_transform, media_start_token, sim_threshold=0.3): + info = sample["json"] + sentences = info["text_list"] + + images, sentence_ixs = [], [] + for sample_image in info["image_info"]: + image = sample[sample_image["image_name"]] + # filter to images >= 10KB + if isinstance(image, bytes): + continue + if sample_image["matched_sim"] < sim_threshold: + continue + + images.append(image) + sentence_ixs.append(sample_image["matched_text_index"]) + + if len(images) == 0: + raise ValueError("No images in sample") + + keep_ixs = min(len(images), MAX_NUM_IMAGES) + images = images[:keep_ixs] + sentence_ixs = sentence_ixs[:keep_ixs] + + def interleaved_template(sentences, sentence_ixs): + for ix in sentence_ixs: + sentences[ix] = f"{media_start_token}{sentences[ix]}" + text = " ".join(sentences) + return text + + text = interleaved_template(sentences, sentence_ixs) + images_tensors = torch.stack([img_transform(image) for image in images]) + image_size = images_tensors.shape[1:] + if len(images_tensors) < MAX_NUM_IMAGES: + zero_padding = torch.zeros((MAX_NUM_IMAGES - len(images_tensors), *image_size), dtype=torch.float) + images_tensors = torch.cat((images_tensors, zero_padding), dim=0) + + return images_tensors, text_transform(text) + + +def compose_batch(inp, model_cfg, tokenizer, add_extra_token, media_start_id, media_end_id, newline_id): + pad_id = tokenizer.pad_id + for input in inp: + media = input[0] + + # vision_x should be of shape (b, T_img, F, C, H, W) + if len(media.shape) == 3: # image_caption + media = rearrange(media, "c h w -> 1 1 c h w") + elif len(media.shape) == 4: # image_interleaved + media = rearrange(media, "T c h w -> T 1 c h w") + else: + raise ValueError(f"Media shape length is not expected: {media.shape}.") + + tokens = input[1] + if add_extra_token: + tokens = input[1][:-1].contiguous() + labels = input[1][1:].contiguous().clone().detach() + else: + labels = torch.roll(tokens, shifts=-1, dims=0) + labels[-1] = -1 + + labels[labels == media_start_id] = newline_id + labels[labels == media_end_id] = -1 + labels[labels == pad_id] = -1 + + attention_mask, loss_mask, position_ids = _create_ltor_masks_and_position_ids( + tokens=tokens, + eod_token=tokenizer.eos_id, + eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), + reset_attention_mask=False, + reset_position_ids=False, + ) + + loss_mask[labels == -1] = 0.0 + tokens[tokens == -1] = 0 + labels[labels == -1] = 0 + + yield { + 'tokens': tokens, + 'labels': labels, + 'attention_mask': attention_mask, + 'loss_mask': loss_mask, + 'position_ids': position_ids, + 'media': media, + } + + +def build_train_valid_datasets( + model_cfg, consumed_samples, tokenizer=None, data_type='image_caption', +): + assert data_type in _DATASET_TYPES, f"`data_type={data_type}` is not available: {_DATASET_TYPES}." + + media_start_token = model_cfg.media_start_token + media_end_token = model_cfg.media_end_token + assert ( + media_start_token in tokenizer.vocab and media_end_token in tokenizer.vocab + ), f"Cannot find media tokens in tokenizer vocab: {media_start_token} {media_end_token}" + media_start_id = tokenizer.token_to_id(media_start_token) + media_end_id = tokenizer.token_to_id(media_end_token) + newline_id = tokenizer.text_to_ids("\n")[-1] + + data_cfg = model_cfg.data.get(data_type) + + no_seqlen_plus_one_input_tokens = model_cfg.data.get('no_seqlen_plus_one_input_tokens', False) + add_extra_token = 0 if no_seqlen_plus_one_input_tokens else 1 + + compose_fn = compose_batch + if data_type == 'image_caption': + transform_fn = transform_fn_for_image_caption + elif data_type == 'image_interleaved': + transform_fn = transform_fn_for_image_interleaved + + train_img_transform, text_transform = get_preprocess_fns( + model_cfg, + data_type=data_type, + tokenizer=tokenizer, + is_train=True, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + decode_fn=pil_loader if data_type == 'interleaved' else None, + map_fn=partial( + transform_fn, + img_transform=train_img_transform, + text_transform=text_transform, + media_start_token=media_start_token, + ), + compose_fn=partial( + compose_fn, + model_cfg=model_cfg, + tokenizer=tokenizer, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + newline_id=newline_id, + ), + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): + val_img_transform, text_transform = get_preprocess_fns( + model_cfg, + data_type=data_type, + tokenizer=tokenizer, + is_train=False, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=0, + decode_fn=pil_loader if data_type == 'interleaved' else None, + map_fn=partial( + transform_fn, + img_transform=train_img_transform, + text_transform=text_transform, + media_start_token=media_start_token, + ), + compose_fn=partial( + compose_fn, + model_cfg=model_cfg, + tokenizer=tokenizer, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + newline_id=newline_id, + ), + is_train=False, + ) + + return train_data, val_data + + +class MergedKosmosDataLoader: + def __init__(self, dataloaders): + self.dataloaders = dataloaders + self.dataloader_iters = {type: iter(dataloader) for type, dataloader in dataloaders.items()} + self.lengths = {type: len(dataloader) for type, dataloader in dataloaders.items()} + self.min_length = min(self.lengths.values()) + + def __iter__(self): + while True: + try: + batch = {type: next(iter) for type, iter in self.dataloader_iters.items()} + except StopIteration: + return + yield batch + + def __len__(self): + return self.min_length diff --git a/nemo/collections/multimodal/data/neva/__init__.py b/nemo/collections/multimodal/data/neva/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py new file mode 100644 index 000000000000..ef8947290645 --- /dev/null +++ b/nemo/collections/multimodal/data/neva/conversation.py @@ -0,0 +1,401 @@ +import dataclasses +import re +from enum import Enum, auto +from typing import List, Tuple + + +class SeparatorStyle(Enum): + """Different separator style.""" + + SINGLE = auto() + TWO = auto() + MPT = auto() + NVGPT = auto() + + +@dataclasses.dataclass +class Conversation: + """A class that keeps all conversation history.""" + + system: str + roles: List[str] + messages: List[List[str]] + offset: int + sep_style: SeparatorStyle = SeparatorStyle.SINGLE + sep: str = "###" + sep2: str = None # sep before system + version: str = "Unknown" + skip_next: bool = False + + def get_prompt(self): + if self.sep_style == SeparatorStyle.SINGLE: + ret = self.system + self.sep + for role, message in self.messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + self.sep + else: + ret += role + ":" + return ret + elif self.sep_style == SeparatorStyle.TWO: + seps = [self.sep, self.sep2] + ret = self.system + seps[0] + for i, (role, message) in enumerate(self.messages): + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + return ret + if self.sep_style == SeparatorStyle.MPT: + ret = self.system + self.sep + for role, message in self.messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + message + self.sep + else: + ret += role + return ret + if self.sep_style == SeparatorStyle.NVGPT: + ret = self.sep2 + self.system + self.sep + for role, message in self.messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + '\n' + message + '\n' + self.sep + else: + ret += role + '\n' + return ret + else: + raise ValueError(f"Invalid style: {self.sep_style}") + + def append_message(self, role, message): + self.messages.append([role, message]) + + def get_images(self, return_pil=False): + images = [] + for i, (role, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + + from PIL import Image + + msg, image, image_process_mode = msg + if image_process_mode == "Pad": + + def expand2square(pil_img, background_color=(122, 116, 104)): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image) + elif image_process_mode == "Crop": + pass + elif image_process_mode == "Resize": + image = image.resize((224, 224)) + else: + raise ValueError(f"Invalid image_process_mode: {image_process_mode}") + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + if return_pil: + images.append(image) + else: + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + images.append(img_b64_str) + return images + + def to_gradio_chatbot(self): + ret = [] + for i, (role, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + + msg, image, image_process_mode = msg + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + # image = image.resize((224, 224)) + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + img_str = f'user upload image' + msg = msg.replace('', img_str) + ret.append([msg, None]) + else: + ret[-1][-1] = msg + return ret + + def copy(self): + return Conversation( + system=self.system, + roles=self.roles, + messages=[[x, y] for x, y in self.messages], + offset=self.offset, + sep_style=self.sep_style, + sep=self.sep, + sep2=self.sep2, + ) + + def dict(self): + if len(self.get_images()) > 0: + return { + "system": self.system, + "roles": self.roles, + "messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages], + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + return { + "system": self.system, + "roles": self.roles, + "messages": self.messages, + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + + +# . . +# NVGPT +# . . + +conv_nvgpt = Conversation( + system="""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n""", + roles=("User", "Assistant"), + version="nvgpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.NVGPT, + sep="", + sep2="System\n", +) +conv_nvgpt_text = conv_nvgpt +simple_conv_nvgpt_multimodal = conv_nvgpt + + +conv_v1 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=( + ("Human", "Give three tips for staying healthy."), + ( + "Assistant", + "Sure, here are three tips for staying healthy:\n" + "1. Exercise regularly: Regular physical activity can help improve your overall health and wellbeing. " + "It can also help reduce your risk of chronic conditions such as obesity, diabetes, heart disease, " + "and certain cancers. Aim for at least 150 minutes of moderate-intensity aerobic exercise or " + "75 minutes of vigorous-intensity aerobic exercise per week, along with muscle-strengthening " + "activities at least two days per week.\n" + "2. Eat a balanced diet: Eating a balanced diet that is rich in fruits, " + "vegetables, whole grains, lean proteins, and healthy fats can help support " + "your overall health. Try to limit your intake of processed and high-sugar foods, " + "and aim to drink plenty of water throughout the day.\n" + "3. Get enough sleep: Getting enough quality sleep is essential for your physical " + "and mental health. Adults should aim for seven to nine hours of sleep per night. " + "Establish a regular sleep schedule and try to create a relaxing bedtime routine to " + "help improve the quality of your sleep.", + ), + ), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_v1_2 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=( + ("Human", "What are the key differences between renewable and non-renewable energy sources?"), + ( + "Assistant", + "Renewable energy sources are those that can be replenished naturally in a relatively " + "short amount of time, such as solar, wind, hydro, geothermal, and biomass. " + "Non-renewable energy sources, on the other hand, are finite and will eventually be " + "depleted, such as coal, oil, and natural gas. Here are some key differences between " + "renewable and non-renewable energy sources:\n" + "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable " + "energy sources are finite and will eventually run out.\n" + "2. Environmental impact: Renewable energy sources have a much lower environmental impact " + "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, " + "and other negative effects.\n" + "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically " + "have lower operational costs than non-renewable sources.\n" + "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote " + "locations than non-renewable sources.\n" + "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different " + "situations and needs, while non-renewable sources are more rigid and inflexible.\n" + "6. Sustainability: Renewable energy sources are more sustainable over the long term, while " + "non-renewable sources are not, and their depletion can lead to economic and social instability.\n", + ), + ), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_vicuna_v1_1 = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the user's questions.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +conv_mpt = Conversation( + system="""<|im_start|>system +- You are a helpful language and vision assistant. +- You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. +- You should follow the instructions carefully and explain your answers in detail.""", + roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), + version="mpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.MPT, + sep="<|im_end|>", +) + +conv_mpt_text = Conversation( + system="""<|im_start|>system +- You are a helpful assistant chatbot trained by MosaicML. +- You answer questions. +- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. +- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.""", + roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), + version="mpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.MPT, + sep="<|im_end|>", +) + +conv_bair_v1 = Conversation( + system="BEGINNING OF CONVERSATION:", + roles=("USER", "GPT"), + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +simple_conv = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=(("Human", "Hi!"), ("Assistant", "Hi there! How can I help you today?")), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +simple_conv_multimodal = Conversation( + system="You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab." + "You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "Follow the instructions carefully and explain your answers in detail.", + roles=("Human", "Assistant"), + messages=(("Human", "Hi!"), ("Assistant", "Hi there! How can I help you today?\n")), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +simple_conv_mpt_multimodal = Conversation( + system="""<|im_start|>system +- You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab. +- You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. +- You should follow the instructions carefully and explain your answers in detail.""", + roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), + version="mpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.MPT, + sep="<|im_end|>", +) + +simple_conv_legacy = Conversation( + system="You are LLaVA, a large language model trained by UW Madison WAIV Lab." + "You are designed to assist human with a variety of tasks using natural language." + "Follow the instructions carefully.", + roles=("Human", "Assistant"), + messages=(("Human", "Hi!\n\n### Response:"), ("Assistant", "Hi there! How can I help you today?\n")), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_neva_v1 = Conversation( + system="You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab." + "You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "Follow the instructions carefully and explain your answers in detail.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +default_conversation = conv_v1_2 +conv_templates = { + "default": conv_v1_2, + "simple": simple_conv, + "simple_legacy": simple_conv_legacy, + "multimodal": simple_conv_multimodal, + "mpt_multimodal": simple_conv_mpt_multimodal, + "nvgpt_multimodal": simple_conv_nvgpt_multimodal, + "neva_v1": conv_neva_v1, + # fastchat + "v1": conv_v1_2, + "bair_v1": conv_bair_v1, + "vicuna_v1_1": conv_vicuna_v1_1, + "mpt": conv_mpt, + "mpt_text": conv_mpt_text, + "nvgpt": conv_nvgpt, + "nvgpt_text": conv_nvgpt_text, +} + + +if __name__ == "__main__": + print(default_conversation.get_prompt()) diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py new file mode 100644 index 000000000000..f46670b66303 --- /dev/null +++ b/nemo/collections/multimodal/data/neva/neva_dataset.py @@ -0,0 +1,401 @@ +import copy +import json +import logging +import os +import pathlib +import re +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Sequence, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +import transformers +from einops import rearrange +from omegaconf import DictConfig +from PIL import Image +from torch.utils.data import Dataset, default_collate +from transformers import CLIPImageProcessor + +import nemo.collections.multimodal.data.neva.conversation as conversation_lib +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import tokenize_and_insert_media_tokens +from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids + +IGNORE_INDEX = -1 +DEFAULT_PAD_TOKEN = "" +DEFAULT_EOS_TOKEN = "" +DEFAULT_BOS_TOKEN = "" +DEFAULT_UNK_TOKEN = "" +DEFAULT_IMAGE_TOKEN = "" +DEFAULT_SYSTEM_TOKEN = "" +DEFAULT_SEPARATOR_TOKEN = "" +DEFAULT_LABELS_TOKEN = "" +DEFAULT_IMAGE_PATCH_TOKEN = "" +DEFAULT_IM_START_TOKEN = "" +DEFAULT_IM_END_TOKEN = "" +CLIP_MODEL = os.environ.get("CLIP_MODEL", "openai/clip-vit-large-patch14") + + +def tokenize( + texts: Union[str, List[str]], tokenizer: Any, context_length: int, add_extra_token: int, +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s). If the list of tokens exceeds the context + length plus the number of extra tokens, it gets truncated. If it's smaller, it gets padded with zeros. + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize. + tokenizer : Any + A tokenizer to be used for tokenization. + context_length : int + The context length to be used for the output tensor. + add_extra_token : int + Number of extra tokens to add, should be either 0 or 1. + + Returns + ------- + torch.LongTensor + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length + add_extra_token]. + """ + assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." + + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + tokens = tokenizer.text_to_ids(texts) + max_len = max([len(token) for token in tokens]) + context_length = min(max_len - add_extra_token, context_length) + # truncate and padding + result = torch.zeros(len(tokens), context_length + add_extra_token, dtype=torch.long) + + for i, token in enumerate(tokens): + if len(token) > context_length + add_extra_token: + token = token[: context_length + add_extra_token] # Truncate + result[i, : len(token)] = torch.tensor(token) + if texts_is_str: + result = result[0] + return result + + +def preprocess_multimodal(sources: dict, multimodal_cfg: dict, cur_token_len: int,) -> Dict: + is_multimodal = multimodal_cfg['is_multimodal'] + image_token_len = cur_token_len + if not is_multimodal: + return sources + + for source in sources: + conversation = source['conversations'] + if multimodal_cfg['sep_image_conv_front']: + assert DEFAULT_IMAGE_TOKEN in conversation[0]['value'] + conversation[0]['value'] = conversation[0]['value'].replace(DEFAULT_IMAGE_TOKEN, '').strip() + conversation[0]['value'] = ( + DEFAULT_IMAGE_TOKEN + + conversation_lib.default_conversation.sep + + conversation_lib.default_conversation.roles[0] + + ": " + + conversation[0]['value'] + ) + for turn in conversation: + replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len + if multimodal_cfg['use_im_start_end']: + replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN + turn["value"] = turn["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) + + return sources + + +def preprocess(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: + """ + Given a record this transform: + 1. Add signal '<>' at the beginning each sentence, with end signal '\n'; + 2. Concatenate conversations together; + 3. Tokenize the concatenated conversation; + 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. + """ + + conv = conversation_lib.conv_nvgpt.copy() + + # Apply prompt templates + conversations = [] + for source in sources: + conv.messages = [] + conv.system = source['system'] + if len(source['conversations']) >= 2: + conv.roles = (source['conversations'][0]['from'], source['conversations'][1]['from']) + + for turn in source['conversations']: + if 'label' in turn: + value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value'] + conv.append_message(turn['from'], value) + else: + conv.append_message(turn['from'], turn['value']) + context = conv.get_prompt() + conversations.append(context) + + add_extra_token = cfg.get("add_extra_token") + # Tokenize conversations + tokens = tokenize( + texts=conversations, + tokenizer=tokenizer, + context_length=cfg.get("context_length"), + add_extra_token=add_extra_token, + ) + assert conv.sep_style == conversation_lib.SeparatorStyle.NVGPT + + labels = tokens.clone().detach() + + # Mask targets + sep = conv.sep + conv.roles[1] + "\n" + labels_str_regexp = re.compile(f"{DEFAULT_LABELS_TOKEN}quality:.*\n") + for conversation, target in zip(conversations, labels): + rounds = conversation.split(conv.sep) + re_rounds = [conv.sep.join(rounds[:3])] # system + user + gpt + + for conv_idx in range(3, len(rounds), 2): + re_rounds.append(conv.sep.join(rounds[conv_idx : conv_idx + 2])) # user + gpt + + cur_len = 0 + for i, rou in enumerate(re_rounds): + if rou == "": + break + parts = rou.split(sep) + if len(parts) != 2: + break + + # Match the pattern + match = labels_str_regexp.search(parts[1]) + labels_str = match.group() if match else "" + + instruction_len = len(tokenizer.text_to_ids(parts[0] + sep + labels_str)) + round_len = len(tokenizer.text_to_ids(rou + conv.sep)) + target[cur_len : cur_len + instruction_len] = IGNORE_INDEX + + cur_len += round_len + target[cur_len:] = IGNORE_INDEX + + if add_extra_token: + tokens = tokens[:, :-1].contiguous() + labels = labels[:, 1:].contiguous() + else: + labels = torch.roll(labels, shifts=-1, dims=-1) + labels[:, -1] = IGNORE_INDEX + + return dict(tokens=tokens, labels=labels,) + + +class SupervisedDataset(Dataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, data_path, tokenizer, multimodal_cfg=None): + super(SupervisedDataset, self).__init__() + logging.warning("Loading data...") + list_data_dict = json.load(open(data_path, "r")) + + logging.warning("Formatting inputs...") + sources = [example["conversations"] for example in list_data_dict] + data_dict = preprocess(sources, tokenizer) + + self.tokens = data_dict["tokens"] + self.labels = data_dict["labels"] + + def __len__(self): + return len(self.tokens) + + def __getitem__(self, i) -> Dict[str, torch.Tensor]: + return dict(tokens=self.tokens[i], labels=self.labels[i]) + + +class LazySupervisedDataset(Dataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): + super(LazySupervisedDataset, self).__init__() + logging.warning("Loading data...") + if data_path is not None: + logging.warning("Loading data...") + list_data_dict = json.load(open(data_path, "r")) + else: + list_data_dict = [] + logging.warning("Formatting inputs...Skip in lazy mode") + self.tokenizer = tokenizer + self.list_data_dict = list_data_dict + self.multimodal_cfg = multimodal_cfg + self.processor = multimodal_cfg["image_processor"] + + def __len__(self): + return len(self.list_data_dict) + + def __getitem__(self, i) -> Dict[str, torch.Tensor]: + sources = self.list_data_dict[i] + processor = self.processor + if isinstance(i, int): + sources = [sources] + assert len(sources) == 1, "Don't know why it is wrapped to a list" # FIXME + if 'image' in sources[0]: + image_file = self.list_data_dict[i]['image'] + image_folder = self.multimodal_cfg['image_folder'] + image = Image.open(os.path.join(image_folder, image_file)).convert('RGB') + if self.multimodal_cfg['image_aspect_ratio'] == 'keep': + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 448, 224 + shortest_edge = int(min(max_len / aspect_ratio, min_len)) + image = processor.preprocess( + image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} + )['pixel_values'][0] + elif self.multimodal_cfg['image_aspect_ratio'] == 'pad': + + def expand2square(pil_img, background_color): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + else: + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + cur_token_len = (image.shape[1] // 14) * (image.shape[2] // 14) # FIXME: 14 is hardcoded patch size + sources = preprocess_multimodal(copy.deepcopy(sources), self.multimodal_cfg, cur_token_len) + else: + sources = copy.deepcopy(sources) + + data_dict = preprocess(sources, self.tokenizer, self.multimodal_cfg,) + + if isinstance(i, int): + data_dict = dict(tokens=data_dict["tokens"][0], labels=data_dict["labels"][0]) + + # image exist in the data + if 'image' in self.list_data_dict[i]: + data_dict['image'] = image + elif self.multimodal_cfg['is_multimodal']: + # image does not exist in the data, but the model is multimodal + crop_size = self.processor.crop_size + data_dict['image'] = torch.zeros(3, crop_size['height'], crop_size['width']) + return data_dict + + +class SteerLMDataset(LazySupervisedDataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): + + super(SteerLMDataset, self).__init__(None, tokenizer, multimodal_cfg) + logging.warning("Loading image inputs from SteerLM Dataset") + image_folder = multimodal_cfg['image_folder'] + for line in open(data_path, "r"): + record = json.loads(line) + + # This currently supports only a single image + # search for tag + for turn in record['conversations']: + # TODO (yuya): this is required? + if "image" not in record: + matches = re.finditer('', DEFAULT_IMAGE_TOKEN, turn['value']) + self.list_data_dict.append(record) + + +@dataclass +class DataCollatorForSupervisedDataset(object): + """Collate examples for supervised fine-tuning.""" + + model_cfg: DictConfig + tokenizer: transformers.PreTrainedTokenizer + + def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: + max_len = max(instance['tokens'].shape[0] for instance in instances) + max_len = (max_len - 1) // 4 * 4 + 4 + for instance in instances: + pad_len = max_len - instance['tokens'].shape[0] + instance['tokens'] = F.pad(instance['tokens'], (0, pad_len), 'constant', 0) + instance['labels'] = F.pad(instance['labels'], (0, pad_len), 'constant', -1) + + batch = default_collate(instances) + tokenizer = self.tokenizer + model_cfg = self.model_cfg + + tokens = batch['tokens'] + labels = batch['labels'] + media = batch.get('image') + + attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids( + data=tokens, + eod_token=tokenizer.eos_id, + eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), + reset_attention_mask=False, + reset_position_ids=False, + ) + + loss_mask[labels == -1] = 0.0 + tokens[tokens == -1] = 0 + labels[labels == -1] = 0 + + if media is None: + raise NotImplementedError + else: + media = rearrange(media, "b c h w -> b 1 1 c h w") + + batch = { + 'tokens': tokens, + 'labels': labels, + 'attention_mask': attention_mask, + 'loss_mask': loss_mask, + 'position_ids': position_ids, + 'media': media, + } + return batch + + +def make_supervised_data_module(tokenizer, model_cfg) -> Dict: + """Make dataset and collator for supervised fine-tuning.""" + data_cfg = model_cfg.data + mm_cfg = model_cfg.mm_cfg + add_extra_token = 1 + if getattr(model_cfg, 'no_seqlen_plus_one_input_tokens', False): + add_extra_token = 0 + if mm_cfg.vision_encoder.from_hf: + image_processor = CLIPImageProcessor.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16 + ) + else: + image_processor = CLIPImageProcessor.from_pretrained( + "openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16 + ) + train_dataset = SteerLMDataset( + tokenizer=tokenizer, + data_path=data_cfg.data_path, + multimodal_cfg=dict( + is_multimodal=data_cfg.is_multimodal, + sep_image_conv_front=data_cfg.sep_image_conv_front, + image_token_len=data_cfg.image_token_len, + image_folder=data_cfg.image_folder, + image_aspect_ratio=data_cfg.image_aspect_ratio, + use_im_start_end=getattr(model_cfg.mm_cfg, 'use_im_start_end', False), + image_processor=image_processor, + add_extra_token=add_extra_token, + context_length=model_cfg.encoder_seq_length, + ), + ) + # data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) + return dict(train_dataset=train_dataset, eval_dataset=train_dataset) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index c423c05ff601..e4a45312aa19 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -71,7 +71,7 @@ class CLIPVisionTransformer(MegatronModule): """Vision Transformer Model.""" - def __init__(self, model_cfg, pre_process=True, post_process=True): + def __init__(self, model_cfg, pre_process=True, post_process=True, skip_head=False): super(CLIPVisionTransformer, self).__init__() scaled_init_method = ( @@ -81,10 +81,10 @@ def __init__(self, model_cfg, pre_process=True, post_process=True): ) self.hidden_size = model_cfg.hidden_size - self.output_dim = model_cfg.output_dim self.global_average_pool = model_cfg.global_average_pool self.pre_process = pre_process self.post_process = post_process + self.skip_head = skip_head if model_cfg.get("class_token_length") is None or model_cfg.get("class_token_length") <= 0: class_token = False @@ -100,7 +100,8 @@ def __init__(self, model_cfg, pre_process=True, post_process=True): single_token_output=False, ) - if self.post_process: + if self.post_process and not skip_head: + self.output_dim = model_cfg.output_dim self.head = torch.nn.Linear(self.hidden_size, self.output_dim, bias=False,) def set_input_tensor(self, input_tensor): @@ -110,7 +111,7 @@ def set_input_tensor(self, input_tensor): def forward(self, input): hidden_states = self.backbone(input) - if self.post_process: + if self.post_process and not self.skip_head: if self.global_average_pool: hidden_states = hidden_states.mean(dim=1) else: @@ -126,7 +127,6 @@ class CLIPTextTransformer(MegatronModule): def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process=True): super(CLIPTextTransformer, self).__init__() - self.output_dim = model_cfg.output_dim self.pre_process = pre_process self.post_process = post_process self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy @@ -202,6 +202,7 @@ def __init__(self, model_cfg, padded_vocab_size, pre_process=True, post_process= self.position_ids = torch.arange(model_cfg.max_position_embeddings).expand(1, -1).cuda() if self.post_process: + self.output_dim = model_cfg.output_dim self.head = torch.nn.Linear(model_cfg.hidden_size, self.output_dim, bias=False,) self.attn_mask = self.build_attention_mask(model_cfg.max_position_embeddings) diff --git a/nemo/collections/multimodal/models/kosmos/__init__.py b/nemo/collections/multimodal/models/kosmos/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py new file mode 100644 index 000000000000..e2c4e5b47eac --- /dev/null +++ b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py @@ -0,0 +1,1153 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import math +import os +import random +import tempfile +from functools import partial +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd +import torch +from einops import rearrange, repeat +from omegaconf.dictconfig import DictConfig +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import MAX_NUM_IMAGES, MergedKosmosDataLoader +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import ( + build_train_valid_datasets as build_media_train_valid_datasets, +) +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer +from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( + build_train_valid_test_datasets as build_text_train_valid_test_datasets, +) +from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, +) +from nemo.collections.nlp.modules.common.text_generation_utils import ( + generate, + get_computeprob_response, + get_default_length_params, + get_default_sampling_params, + megatron_gpt_generate, +) +from nemo.collections.nlp.modules.common.transformer.text_generation import ( + LengthParam, + OutputType, + SamplingParam, + TextGeneration, +) +from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + +try: + import apex.transformer.pipeline_parallel.utils + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + import transformer_engine + + HAVE_TE = True + +except (ImportError, ModuleNotFoundError): + HAVE_TE = False + + +class FrozenCLIPVisionTransformer(CLIPVisionTransformer): + def __init__(self, model_cfg, pre_process=True, post_process=True): + super().__init__( + model_cfg, pre_process=pre_process, post_process=post_process, skip_head=True, + ) + self.frozen = False + + def train(self, mode): + if self.frozen: + return self + + super().train(mode) + return self + + def forward(self, input): + assert self.training == False + hidden_states = self.backbone(input) + # Do not add header after backbone + return hidden_states + + def freeze(self) -> None: + for param in self.parameters(): + param.requires_grad = False + + self.eval() + self.frozen = True + + +class KosmosModel(MegatronModule): + def __init__( + self, model_cfg, vocab_size, media_start_id=None, media_end_id=None, pre_process=True, post_process=True, + ): + super(KosmosModel, self).__init__() + + llm_cfg = model_cfg.llm + vision_cfg = model_cfg.vision + + self.parallel_output = True # TODO (yuya): Fix this hard-code + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.pre_process = pre_process + self.post_process = post_process + self.fp16_lm_cross_entropy = llm_cfg.get('fp16_lm_cross_entropy', False) + self.sequence_parallel = llm_cfg.sequence_parallel + self.gradient_accumulation_fusion = llm_cfg.gradient_accumulation_fusion + self.share_embeddings_and_output_weights = llm_cfg.share_embeddings_and_output_weights + self.position_embedding_type = llm_cfg.get('position_embedding_type', 'learned_absolute') + + use_scaled_init_method = llm_cfg.get('use_scaled_init_method', True) + kv_channels = llm_cfg.get('kv_channels', None) + hidden_size = llm_cfg.hidden_size + num_attention_heads = llm_cfg.num_attention_heads + num_layers = llm_cfg.num_layers + init_method_std = llm_cfg.init_method_std + + if kv_channels is None: + assert ( + hidden_size % num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = hidden_size // num_attention_heads + + scaled_init_method = ( + scaled_init_method_normal(init_method_std, num_layers) + if use_scaled_init_method + else init_method_normal(init_method_std) + ) + self.language_model, self._language_model_key = get_language_model( + vocab_size=vocab_size, + hidden_size=llm_cfg.hidden_size, + max_position_embeddings=llm_cfg.max_position_embeddings, + num_layers=llm_cfg.num_layers, + num_attention_heads=llm_cfg.num_attention_heads, + apply_query_key_layer_scaling=llm_cfg.get('apply_query_key_layer_scaling', True), + kv_channels=kv_channels, + ffn_hidden_size=llm_cfg.ffn_hidden_size, + num_tokentypes=0, + add_pooler=False, + encoder_attn_mask_type=AttnMaskType.causal, + pre_process=pre_process, + post_process=post_process, + init_method_std=llm_cfg.get('init_method_std', 0.02), + scaled_init_method=scaled_init_method, + use_cpu_initialization=llm_cfg.get('use_cpu_initialization', False), + hidden_dropout=llm_cfg.get('hidden_dropout', 0.1), + attention_dropout=llm_cfg.get('attention_dropout', 0.1), + ffn_dropout=llm_cfg.get('ffn_dropout', 0.0), + precision=llm_cfg.get('precision', 16), + fp32_residual_connection=llm_cfg.get('fp32_residual_connection', False), + activations_checkpoint_granularity=llm_cfg.get('activations_checkpoint_granularity', None), + activations_checkpoint_method=llm_cfg.get('activations_checkpoint_method', None), + activations_checkpoint_num_layers=llm_cfg.get('activations_checkpoint_num_layers', 1), + activations_checkpoint_layers_per_pipeline=llm_cfg.get('activations_checkpoint_layers_per_pipeline', None), + normalization=llm_cfg.get('normalization', 'layernorm'), + layernorm_epsilon=llm_cfg.get('layernorm_epsilon', 1e-5), + onnx_safe=llm_cfg.get('onnx_safe', False), + bias=llm_cfg.get('bias', True), + bias_activation_fusion=llm_cfg.get('bias_activation_fusion', True), + bias_dropout_add_fusion=llm_cfg.get('bias_dropout_add_fusion', True), + activation=llm_cfg.get('activation', 'gelu'), + headscale=llm_cfg.get('headscale', False), + transformer_block_type=llm_cfg.get('transformer_block_type', 'pre_ln'), + openai_gelu=llm_cfg.get('openai_gelu', False), + normalize_attention_scores=llm_cfg.get('normalize_attention_scores', True), + position_embedding_type=llm_cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percentage=llm_cfg.get('rotary_percentage', 1.0), + share_embeddings_and_output_weights=llm_cfg.get('share_embeddings_and_output_weights', True), + attention_type=llm_cfg.get('attention_type', 'multihead'), + masked_softmax_fusion=llm_cfg.get('masked_softmax_fusion', True), + gradient_accumulation_fusion=llm_cfg.get('gradient_accumulation_fusion', False), + persist_layer_norm=llm_cfg.get('persist_layer_norm', False), + sequence_parallel=llm_cfg.get('sequence_parallel', False), + transformer_engine=llm_cfg.get('transformer_engine', False), + fp8=llm_cfg.get('fp8', False), + fp8_e4m3=llm_cfg.get('fp8_e4m3', False), + fp8_hybrid=llm_cfg.get('fp8_hybrid', False), + fp8_margin=llm_cfg.get('fp8_margin', 0), + fp8_interval=llm_cfg.get('fp8_interval', 1), + fp8_amax_history_len=llm_cfg.get('fp8_amax_history_len', 1), + fp8_amax_compute_algo=llm_cfg.get('fp8_amax_compute_algo', 'most_recent'), + reduce_amax=llm_cfg.get('reduce_amax', True), + use_emha=llm_cfg.get('use_emha', False), + ) + + if self.share_embeddings_and_output_weights: + self.initialize_word_embeddings( + init_method=init_method_normal(init_method_std), vocab_size=vocab_size, hidden_size=hidden_size + ) + + # TODO (yuya): check when PP is added + self.vision_encoder = FrozenCLIPVisionTransformer( + vision_cfg, pre_process=vision_cfg.pre_process, post_process=vision_cfg.post_process, + ) + if vision_cfg.from_pretrained is not None: + logging.info(f"Loading CLIP vision encoder weights from checkpoint {vision_cfg.from_pretrained}") + self.load_vision_encoder_weights(vision_cfg.from_pretrained) + self.perceiver = PerceiverResampler(dim=vision_cfg.hidden_size, num_latents=model_cfg.num_media_latents) + self.vision_connector = torch.nn.Linear(vision_cfg.hidden_size, llm_cfg.hidden_size, bias=False,) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.language_model.set_input_tensor(input_tensor) + + def encode_vision_x(self, vision_x: torch.Tensor): + """ + Compute media tokens from vision input by passing it through vision encoder and conditioning language model. + Args: + vision_x (torch.Tensor): Vision input + shape (B, T_img, F, C, H, W) + Images in the same chunk are collated along T_img, and frames are collated along F + Currently only F=1 is supported (single-frame videos) + + rearrange code based on https://github.com/dhansmair/flamingo-mini + """ + + assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" + b, T, F = vision_x.shape[:3] + assert F == 1, "Only single frame supported" + + vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") + with torch.no_grad(): + vision_x = self.vision_encoder(vision_x) + vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) + vision_x = self.perceiver(vision_x) # reshapes to (b, T, n, d) + vision_x = self.vision_connector(vision_x) + return vision_x + + def replace_media_embeddings(self, input_ids, inputs_embeds, media=None): + if media is None: + return inputs_embeds + + batch_size, sequence_length, hidden_size = inputs_embeds.shape + + # calculate media features without gradients + with torch.no_grad(): + media_features = self.encode_vision_x(media) + num_images_per_sample = media_features.size(1) + num_patches = media_features.size(2) + + # flatten patches + media_features = media_features.view(batch_size, -1, hidden_size) + + # create an indices matrix used in torch.scatter + padded_media_indices = torch.ones( + (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device + ) + padded_media_indices *= sequence_length + for idx, input_id in enumerate(input_ids): + media_end_positions = torch.where(input_id == self.media_end_id)[0] + # locate the first media token positions + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + + # use indices to create a span + padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( + num_patches, device=padded_media_indices.device + ).repeat(*padded_media_indices.shape, 1) + padded_media_indices = padded_media_indices.reshape(batch_size, -1) + padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) + + # concat placeholder + updated_input_embeds = torch.cat( + (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 + ) + updated_input_embeds = updated_input_embeds.type(media_features.dtype) + # scatter media_features + updated_input_embeds.scatter_(1, padded_media_indices, media_features) + + # chop off placeholder + updated_input_embeds = updated_input_embeds[:, :sequence_length] + + return updated_input_embeds + + def forward( + self, + input_ids, + position_ids, + attention_mask, + labels=None, + media=None, + token_type_ids=None, + layer_past=None, + get_key_value=False, + forward_method_parallel_output=None, + encoder_input=None, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + checkpoint_activations_all_layers=None, + ): + # input_ids: [b, s] + # position_ids: [b, s] + # attention_mask: [1, 1, s, s] + + # Multimodal uses different forward pass. Vision tower must be inserted. + enc_input_ids, enc_position_ids, enc_attn_mask = input_ids, position_ids, attention_mask + + # Embeddings. + if self.pre_process and encoder_input is None: + embedding_module = self.language_model.embedding + + words_embeddings = embedding_module.word_embeddings(enc_input_ids) + words_embeddings = self.replace_media_embeddings(enc_input_ids, words_embeddings, media=media) + + if self.position_embedding_type == 'learned_absolute': + assert position_ids is not None + position_embeddings = embedding_module.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + elif self.position_embedding_type == 'learned_parameters': + embeddings = words_embeddings + embedding_module.position_embeddings + else: + embeddings = words_embeddings + + if token_type_ids is not None: + assert embedding_module.tokentype_embeddings is not None + embeddings = embeddings + embedding_module.tokentype_embeddings(token_type_ids) + else: + assert embedding_module.tokentype_embeddings is None + + # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + if embedding_module.transpose_batch_sequence: + embeddings = embeddings.transpose(0, 1).contiguous() + + # If the input flag for fp32 residual connection is set, convert for float. + if embedding_module.fp32_residual_connection: + embeddings = embeddings.float() + + # Dropout. + if self.sequence_parallel: + embeddings = tensor_parallel.mappings.scatter_to_sequence_parallel_region(embeddings) + with tensor_parallel.random.get_cuda_rng_tracker().fork(): + embeddings = embedding_module.embedding_dropout(embeddings) + else: + embeddings = embedding_module.embedding_dropout(embeddings) + + encoder_input = embeddings + else: + pass + + # enc_attn_mask: [1, 1, s, s] + + if self.position_embedding_type == 'rope': + if inference_max_sequence_len is not None: + rotary_pos_emb = self.language_model.rotary_pos_emb(inference_max_sequence_len) + elif self.language_model.encoder.input_tensor is not None: + if self.sequence_parallel: + rotary_pos_emb = self.language_model.rotary_pos_emb( + self.language_model.encoder.input_tensor.size(0) + * parallel_state.get_tensor_model_parallel_world_size() + ) + else: + rotary_pos_emb = self.language_model.rotary_pos_emb(self.encoder.input_tensor.size(0)) + else: + if self.sequence_parallel: + rotary_pos_emb = self.language_model.rotary_pos_emb( + encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size() + ) + else: + rotary_pos_emb = self.language_model.rotary_pos_emb(encoder_input.size(0)) + else: + rotary_pos_emb = None + + # encoder but decoder for GPT + encoder_output = self.language_model.encoder( + encoder_input, + enc_attn_mask, + layer_past=layer_past, + get_key_value=get_key_value, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + checkpoint_activations_all_layers=checkpoint_activations_all_layers, + rotary_pos_emb=(rotary_pos_emb, None, None) + if rotary_pos_emb is not None + else None, # This assumes that this being used as a GPT/BERT model only (no cross-attention) + ) + + lm_output = encoder_output + + if self.post_process: + return post_language_model_processing( + lm_output, + labels, + self.language_model.output_layer.weight + if not self.share_embeddings_and_output_weights + else self.word_embeddings_weight(), + get_key_value, + self.parallel_output, + forward_method_parallel_output, + self.fp16_lm_cross_entropy, + return_logits=False, + sequence_parallel=self.sequence_parallel, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + ) + else: + return lm_output + + def load_vision_encoder_weights(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.lstrip("model.vision_encoder.") + new_state_dict[new_k] = v + + missing, unexpected = self.vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def state_dict_for_save_checkpoint(self, destination=None, prefix='', keep_vars=False): + + state_dict_ = {} + state_dict_[self._language_model_key] = self.language_model.state_dict_for_save_checkpoint( + destination, prefix, keep_vars + ) + # Save word_embeddings. + if self.post_process and not self.pre_process: + state_dict_[self._word_embeddings_for_head_key] = self.word_embeddings.state_dict( + destination, prefix, keep_vars + ) + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + # Load word_embeddings. + if self.post_process and not self.pre_process: + self.word_embeddings.load_state_dict(state_dict[self._word_embeddings_for_head_key], strict=strict) + if self._language_model_key in state_dict: + state_dict = state_dict[self._language_model_key] + self.language_model.load_state_dict(state_dict, strict=strict) + + +class MegatronKosmosModel(MegatronGPTModel): + """ + Megatron Kosmos pretraining + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + super().__init__(cfg, trainer) + + self.image_size = (self.cfg.vision.img_h, self.cfg.vision.img_w) + self.megatron_amp_O2 = getattr(self, 'megatron_amp_O2', False) + self.enabled_data_types = self.cfg.get("enabled_data_types", []) + logging.info(f"Data types enabled in Kosmos training: {self.enabled_data_types}") + self.per_type_micro_batch_size = self.cfg.per_type_micro_batch_size + self.per_type_global_batch_size = {} + self.per_type_loss_weights = {} + for data_type in self.enabled_data_types: + self.per_type_global_batch_size[data_type] = ( + self.per_type_micro_batch_size[data_type] * self.cfg.global_batch_size // self.cfg.micro_batch_size + ) + self.per_type_loss_weights[data_type] = self.cfg.per_type_loss_weights[data_type] + + def get_gpt_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def set_inference_config(self, inference_config): + self._inference_config = inference_config + + def get_inference_config(self): + return self._inference_config + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + media_start_id = self.tokenizer.token_to_id(self.cfg.media_start_token) + media_end_id = self.tokenizer.token_to_id(self.cfg.media_end_token) + + model = KosmosModel( + model_cfg=self.cfg, + vocab_size=self.padded_vocab_size, + media_start_id=media_start_id, + media_end_id=media_end_id, + pre_process=pre_process, + post_process=post_process, + ) + + # Freeze vit + model.vision_encoder.freeze() + + logging.info( + f"Kosmos model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" + ) + + return model + + def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): + output_tensor = self.model(tokens, text_position_ids, attention_mask, labels=labels, media=media) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + + tensor_shape = [self.cfg.llm.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.llm.hidden_size] + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, + ) + + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + for data_type in self.enabled_data_types: + loss_tensors_list = [loss_reduced[data_type] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[data_type] = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + We pass the dataloader iterator function to the micro-batch scheduler. + The input batch to each micro-batch is fetched using the dataloader function + in the micro-batch fwd function. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + if self.with_distributed_adam: + # hack to enable overlapping param sync and forward compute + # note: the distributed optimizer monkey-patches each + # parameter's __getattribute__ function so that it can + # launch parameter all-gathers the first time the + # parameter is accessed after the optimizer step. However, + # PyTorch directly passes embedding parameters into a C++, + # bypassing this process. A quick-and-dirty hack is to + # manually interact with the parameter. + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + module = module.language_model + if hasattr(module, 'embedding'): + for param in module.embedding.parameters(): + param.data_ptr() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + elif self.megatron_amp_O2: + # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # main grads are stored in the MainParamsOptimizer wrapper + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.get('pipeline_model_parallel_size', 1) > 1 and self.cfg.get( + 'share_embeddings_and_output_weights', True + ): + # when using pipeline parallelism the first and last stage must keep embeddings in sync + self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log_dict({'train/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log( + 'global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1, + ) + + consumed_samples = self.compute_consumed_samples(self.trainer.global_step - self.init_global_step) + # TODO: make sure compute_consumed_samples works for pipeline parallelism + self.log( + 'consumed_samples', consumed_samples, prog_bar=True, rank_zero_only=True, batch_size=1, + ) + + if self.cfg.get('rampup_batch_size', None): + micro_batch_size = self.cfg.get('micro_batch_size', 1) + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + current_global_batch_size = get_num_microbatches() * micro_batch_size * total_gpus_number + self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1) + + num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR + num_microbatch_calculator.update( + consumed_samples=consumed_samples, consistency_check=True, + ) + + return loss_mean + + def get_forward_output_and_loss_func(self, validation_step=False): + def loss_func(output_tensors, loss_masks): + loss_list = [] + loss_for_ub = 0 + for data_type in self.enabled_data_types: + output_tensor = output_tensors[data_type] + loss_mask = loss_masks[data_type] + # Loss for a micro-batch (ub) + loss_list.append(self.loss_func(loss_mask, output_tensor)) + loss_for_ub += loss_list[-1] * self.per_type_loss_weights[data_type] + loss_for_ub /= sum(self.per_type_loss_weights.values()) + + if validation_step and not self.cfg.data.get('validation_drop_last', True): + raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Kosmos!") + # num_valid_tokens_in_ub = loss_mask.sum() + # if loss_for_ub.isnan(): + # assert loss_mask.count_nonzero() == 0, 'Got NaN loss with non-empty input' + # loss_sum_for_ub = torch.zeros_like(num_valid_tokens_in_ub) + # else: + # loss_sum_for_ub = num_valid_tokens_in_ub * loss_for_ub + # + # loss_sum_and_ub_size_all_gpu = torch.cat( + # [ + # loss_sum_for_ub.clone().detach().view(1), + # torch.tensor([num_valid_tokens_in_ub]).cuda().clone().detach(), + # ] + # ) + # # Could potentially reduce num_valid_samples_in_microbatch and use that to aggregate instead of len(self._validation_ds) + # torch.distributed.all_reduce( + # loss_sum_and_ub_size_all_gpu, group=parallel_state.get_data_parallel_group() + # ) + # return loss_for_ub, {'loss_sum_and_ub_size': loss_sum_and_ub_size_all_gpu} + else: + reduced_loss = average_losses_across_data_parallel_group([loss_for_ub] + loss_list) + loss_dict = {data_type: reduced_loss[i + 1] for i, data_type in enumerate(self.enabled_data_types)} + loss_dict['avg'] = reduced_loss[0] + return loss_for_ub, loss_dict + + def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): + output_tensors = {} + loss_masks = {} + combined_batch = next(dataloader_iter) + for data_type in self.enabled_data_types: + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + batch = combined_batch[data_type] + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None + else: + batch[k] = batch[k].cuda(non_blocking=True) + else: + if parallel_state.is_pipeline_first_stage(): + batch = combined_batch[data_type] + # First pipeline stage needs tokens, position_ids, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'media'] + else None + ) + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'attention_mask', 'media'] + else None + ) + elif parallel_state.is_pipeline_last_stage(): + batch = combined_batch[data_type] + # Last pipeline stage needs the labels, loss_mask, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['labels', 'loss_mask', 'attention_mask'] + else None + ) + else: + # Intermediate pipeline stage doesn't need any inputs + batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} + + output_tensor = model( + batch['tokens'], + batch['position_ids'], + batch['attention_mask'], + batch['labels'], + batch.get('media'), + checkpoint_activations_all_layers=checkpoint_activations_all_layers, + ) + output_tensors[data_type] = output_tensor + loss_masks[data_type] = batch['loss_mask'] + + return output_tensors, partial(loss_func, loss_masks=loss_masks) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + extra_arg = {} + if len(batch) == 3: + batch = [x.cuda() for x in batch] + tokens, attention_mask, position_ids = batch + attention_mask = attention_mask[0:1] + else: + ( + tokens, + attention_mask, + position_ids, + set_inference_key_value_memory, + inference_max_sequence_len, + ) = batch + tokens = tokens.cuda() + attention_mask = attention_mask.cuda() + position_ids = position_ids.cuda() + attention_mask = attention_mask[0:1] + extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() + extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() + output_tensor = model(tokens, position_ids, attention_mask, **extra_arg) + + def id_func(output_tensor): + return output_tensor, {'logits': output_tensor} + + return output_tensor, id_func + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. + """ + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + loss_dict['avg'] = loss_mean + return loss_dict + + def validation_epoch_end(self, outputs): + loss_dict = {} + if parallel_state.is_pipeline_last_stage(): + # only the last pipeline parallel stages return loss with their batch size + if self.cfg.data.get('validation_drop_last', True): + averaged_loss = torch.stack([loss['avg'] for loss in outputs]).mean() + for data_type in self.enabled_data_types: + loss_dict[data_type] = torch.stack([loss[data_type] for loss in outputs]).mean() + else: + # Compute the avg loss by total_loss across all samples / total number of samples + # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) + # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] + # averaged_loss = avg_loss.type(torch.float32).cuda() + raise NotImplementedError("`validation_drop_last=False` is not supported!") + else: + averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() + for data_type in self.enabled_data_types: + loss_dict[data_type] = torch.tensor(0.0, dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_loss, get_last_rank()) + for data_type in self.enabled_data_types: + torch.distributed.broadcast(loss_dict[data_type], get_last_rank()) + + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log_dict({'val/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) + + return averaged_loss + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def loss_func(self, loss_mask, output_tensor): + losses = output_tensor.float() + loss_mask = loss_mask.view(-1).float() + # TODO: add nemo version here + loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( + self.model + ) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + rampup_batch_size = self.cfg.get('rampup_batch_size', None) + if rampup_batch_size: + start_batch_size = rampup_batch_size[0] + batch_size_increment = rampup_batch_size[1] + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + + assert start_batch_size % (total_gpus_number) == 0, ( + 'expected' + ' start batch size ({}) to be divisible by total number of GPUs' + ' ({})'.format(start_batch_size, total_gpus_number) + ) + + micro_batch_size = self.cfg.get('micro_batch_size', 1) + tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) + pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) + total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) + + assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( + 'expected' + ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' + ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) + ) + + if stage == 'predict': + return + else: + # TODO: consider adding a ModelPT guard to check if model is being restored. + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + if self.cfg.get('share_embeddings_and_output_weights', True): + self.model.sync_initial_word_embeddings() + + if self.cfg.get('transformer_engine', False): + self.setup_transformer_engine_tp_groups() + + def build_train_valid_test_datasets(self): + logging.info('Building Kosmos datasets.') + + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + global_batch_size = self.cfg.global_batch_size + max_train_steps = self.trainer.max_steps + eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches + test_iters = self.trainer.limit_test_batches + + train_valid_test_num_samples = [ + max_train_steps * global_batch_size, + eval_iters * global_batch_size, + test_iters * global_batch_size, + ] + + if self.trainer.limit_val_batches <= 1.0 and isinstance(self.trainer.limit_val_batches, float): + train_valid_test_num_samples[ + 1 + ] = 1 # This is to make sure we only have one epoch on every validation iteration + + self._train_ds, self._validation_ds, self._test_ds = {}, {}, {} + + for data_type in self.enabled_data_types: + if data_type == "text": + ( + self._train_ds[data_type], + self._validation_ds[data_type], + self._test_ds[data_type], + ) = build_text_train_valid_test_datasets( + cfg=self.cfg, + trainer=self.trainer, + data_prefix=self.cfg.data.data_prefix, + data_impl=self.cfg.data.data_impl, + splits_string=self.cfg.data.splits_string, + train_valid_test_num_samples=train_valid_test_num_samples, + seq_length=self.cfg.data.seq_length, + seed=self.cfg.seed, + skip_warmup=self.cfg.data.get('skip_warmup', True), + tokenizer=self.tokenizer, + ) + + if data_type in ["image_caption", "image_interleaved"]: + self._train_ds[data_type], self._validation_ds[data_type] = build_media_train_valid_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0) + * self.per_type_micro_batch_size[data_type] + // self.cfg.micro_batch_size, + tokenizer=self.tokenizer, + data_type=data_type, + ) + self._test_ds[data_type] = None + + data = [] + for ds_name, ds in [("Train", self._train_ds), ("Validation", self._validation_ds), ("Test", self._test_ds)]: + for key in self.enabled_data_types: + # Append the name of the dataset, the key, and the length of the data under that key to the list + if ds_name == "Train": + consumed_samples = ( + self.compute_consumed_samples(0) + * self.per_type_micro_batch_size[key] + // self.cfg.micro_batch_size + ) + else: + consumed_samples = 0 + data.append([ds_name, key, len(ds[key]) if ds[key] is not None else 0, consumed_samples]) + + df = pd.DataFrame(data, columns=["Dataset", "Type", "Length", "Consumed"]) + df['Length'] = df['Length'].apply(lambda x: "{:,}".format(x)) + df['Consumed'] = df['Consumed'].apply(lambda x: "{:,}".format(x)) + + logging.info(f"\nFinished Building Kosmos Dataset:\n{df}") + return self._train_ds, self._validation_ds, self._test_ds + + def build_pretraining_text_data_loader( + self, + dataset, + consumed_samples, + micro_batch_size, + global_batch_size, + drop_last=True, + pad_samples_to_global_batch_size=False, + ): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + global_batch_size=global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=self.cfg.get('drop_last', True), + global_batch_size=global_batch_size, + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + return torch.utils.data.DataLoader( + dataset, + batch_sampler=batch_sampler, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + Returns: + List of available pre-trained models. + """ + return [] + + def setup_training_data(self, cfg): + consumed_samples = self.compute_consumed_samples(0) + + train_dls = {} + for data_type in self.enabled_data_types: + if hasattr(self, '_train_ds') and self._train_ds.get(data_type) is not None: + if data_type == "text": + train_dls[data_type] = self.build_pretraining_text_data_loader( + self._train_ds[data_type], + consumed_samples=consumed_samples + * self.per_type_micro_batch_size[data_type] + // self.cfg.micro_batch_size, + micro_batch_size=self.per_type_micro_batch_size[data_type], + global_batch_size=self.per_type_global_batch_size[data_type], + ) + elif data_type in ["image_caption", "image_interleaved"]: + train_dls[data_type] = torch.utils.data.DataLoader( + self._train_ds[data_type], + batch_size=self.per_type_micro_batch_size[data_type], + num_workers=cfg.get(data_type).num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + else: + raise ValueError(f"Unrecognized dataset type {data_type}") + + self._train_dl = MergedKosmosDataLoader(train_dls) + + def setup_validation_data(self, cfg): + consumed_samples = 0 + + validation_dls = {} + for data_type in self.enabled_data_types: + if hasattr(self, '_validation_ds') and self._validation_ds.get(data_type) is not None: + if data_type == "text": + validation_dls[data_type] = self.build_pretraining_text_data_loader( + self._validation_ds[data_type], + consumed_samples=consumed_samples, + micro_batch_size=self.per_type_micro_batch_size[data_type], + global_batch_size=self.per_type_global_batch_size[data_type], + ) + elif data_type in ["image_caption", "image_interleaved"]: + validation_dls[data_type] = torch.utils.data.DataLoader( + self._validation_ds[data_type], + batch_size=self.per_type_micro_batch_size[data_type], + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + else: + raise ValueError(f"Unrecognized dataset type {data_type}") + + self._validation_dl = MergedKosmosDataLoader(validation_dls) + + def setup_test_data(self, cfg): + pass diff --git a/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py b/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py new file mode 100644 index 000000000000..14bdedc5324f --- /dev/null +++ b/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py @@ -0,0 +1,131 @@ +""" +Taken from https://github.com/lucidrains/flamingo-pytorch +""" + +""" +# Usage: +perceive = PerceiverResampler( + dim = 1024, + depth = 2, + dim_head = 64, + heads = 8, + num_latents = 64, # the number of latents to shrink your media sequence to, perceiver style + num_time_embeds = 4 # say you have 4 images maximum in your dialogue +) + +medias = torch.randn(1, 2, 256, 1024) # (batch, time, sequence length, dimension) +perceived = perceive(medias) # (1, 2, 64, 1024) - (batch, time, num latents, dimension) +""" + +import torch +from einops import rearrange, repeat +from einops_exts import rearrange_many +from torch import einsum, nn + + +def exists(val): + return val is not None + + +def FeedForward(dim, mult=4): + inner_dim = int(dim * mult) + return nn.Sequential( + nn.LayerNorm(dim), nn.Linear(dim, inner_dim, bias=False), nn.GELU(), nn.Linear(inner_dim, dim, bias=False), + ) + + +class PerceiverAttention(nn.Module): + def __init__(self, *, dim, dim_head=64, heads=8): + super().__init__() + self.scale = dim_head ** -0.5 + self.heads = heads + inner_dim = dim_head * heads + + self.norm_media = nn.LayerNorm(dim) + self.norm_latents = nn.LayerNorm(dim) + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) + self.to_out = nn.Linear(inner_dim, dim, bias=False) + + def forward(self, x, latents): + """ + Args: + x (torch.Tensor): image features + shape (b, T, n1, D) + latent (torch.Tensor): latent features + shape (b, T, n2, D) + """ + x = self.norm_media(x) + latents = self.norm_latents(latents) + + h = self.heads + + q = self.to_q(latents) + kv_input = torch.cat((x, latents), dim=-2) + k, v = self.to_kv(kv_input).chunk(2, dim=-1) + q, k, v = rearrange_many((q, k, v), "b t n (h d) -> b h t n d", h=h) + q = q * self.scale + + # attention + sim = einsum("... i d, ... j d -> ... i j", q, k) + sim = sim - sim.amax(dim=-1, keepdim=True).detach() + attn = sim.softmax(dim=-1) + + out = einsum("... i j, ... j d -> ... i d", attn, v) + out = rearrange(out, "b h t n d -> b t n (h d)", h=h) + return self.to_out(out) + + +class PerceiverResampler(nn.Module): + def __init__( + self, + *, + dim, + depth=6, + dim_head=64, + heads=8, + num_latents=64, + max_num_media=None, + max_num_frames=None, + ff_mult=4, + ): + super().__init__() + self.latents = nn.Parameter(torch.randn(num_latents, dim)) + self.frame_embs = nn.Parameter(torch.randn(max_num_frames, dim)) if exists(max_num_frames) else None + self.media_time_embs = nn.Parameter(torch.randn(max_num_media, 1, dim)) if exists(max_num_media) else None + + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append( + nn.ModuleList( + [PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), FeedForward(dim=dim, mult=ff_mult),] + ) + ) + + self.norm = nn.LayerNorm(dim) + + def forward(self, x): + """ + Args: + x (torch.Tensor): image features + shape (b, T, F, v, D) + Returns: + shape (b, T, n, D) where n is self.num_latents + """ + b, T, F, v = x.shape[:4] + + # frame and media time embeddings + if exists(self.frame_embs): + frame_embs = repeat(self.frame_embs[:F], "F d -> b T F v d", b=b, T=T, v=v) + x = x + frame_embs + x = rearrange(x, "b T F v d -> b T (F v) d") # flatten the frame and spatial dimensions + if exists(self.media_time_embs): + x = x + self.media_time_embs[:T] + + # blocks + latents = repeat(self.latents, "n d -> b T n d", b=b, T=T) + for attn, ff in self.layers: + latents = attn(x, latents) + latents + latents = ff(latents) + latents + return self.norm(latents) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py new file mode 100644 index 000000000000..939bc9bf6602 --- /dev/null +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -0,0 +1,909 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import math +import os +import random +import re +import tempfile +from functools import partial +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd +import torch +from einops import rearrange, repeat +from omegaconf.dictconfig import DictConfig +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.trainer.trainer import Trainer +from transformers import CLIPVisionModel + +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import MAX_NUM_IMAGES +from nemo.collections.multimodal.data.neva.neva_dataset import ( + CLIP_MODEL, + DEFAULT_IM_END_TOKEN, + DEFAULT_IM_START_TOKEN, + DataCollatorForSupervisedDataset, + make_supervised_data_module, +) +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer, MegatronCLIPModel +from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler +from nemo.collections.multimodal.parts.utils import extend_instance +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( + build_train_valid_test_datasets as build_text_train_valid_test_datasets, +) +from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel +from nemo.collections.nlp.models.nlp_model import NLPModel +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, MMLinearAdapterConfig +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.language_model import Embedding, get_language_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, +) +from nemo.collections.nlp.modules.common.text_generation_utils import ( + generate, + get_computeprob_response, + get_default_length_params, + get_default_sampling_params, + megatron_neva_generate, +) +from nemo.collections.nlp.modules.common.transformer.text_generation import ( + LengthParam, + OutputType, + SamplingParam, + TextGeneration, +) +from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone +from nemo.core import adapter_mixins +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import AppState, logging + +try: + import apex.transformer.pipeline_parallel.utils + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + import transformer_engine + + HAVE_TE = True + +except (ImportError, ModuleNotFoundError): + HAVE_TE = False + + +class FrozenCLIPVisionTransformer(CLIPVisionTransformer): + def __init__(self, model_cfg, pre_process=True, post_process=True): + super().__init__( + model_cfg, pre_process=pre_process, post_process=post_process, skip_head=True, + ) + self.frozen = False + + def train(self, mode): + if self.frozen: + return self + + super().train(mode) + return self + + def forward(self, input): + assert self.training == False + hidden_states = self.backbone(input) + # Do not add header after backbone + return hidden_states + + def freeze(self) -> None: + for param in self.parameters(): + param.requires_grad = False + + self.eval() + self.frozen = True + + +class NevaEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): + def init_vision(self, vision_encoder, media_start_id, media_end_id, vision_select_layer=-1, class_token_length=1): + self.vision_encoder = vision_encoder + self.from_hf = isinstance(vision_encoder, CLIPVisionModel) + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.class_token_length = class_token_length + self.vision_select_layer = vision_select_layer + self.media = None + self.set_accepted_adapter_types([MMLinearAdapterConfig._target_]) + + def set_media(self, media): + self.media = media + + def forward(self, input_ids, **kwargs): + media = self.media # avoid change the signature of embedding forward function + words_embeddings = super().forward(input_ids, **kwargs) + return self.replace_media_embeddings(input_ids, words_embeddings, media) + + def encode_vision_x(self, vision_x: torch.Tensor): + """ + Compute media tokens from vision input by passing it through vision encoder and conditioning language model. + Args: + vision_x (torch.Tensor): Vision input + shape (B, T_img, F, C, H, W) + Images in the same chunk are collated along T_img, and frames are collated along F + Currently only F=1 is supported (single-frame videos) + + rearrange code based on https://github.com/dhansmair/flamingo-mini + """ + + assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" + b, T, F = vision_x.shape[:3] + assert F == 1, "Only single frame supported" + + vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") + with torch.no_grad(): + if self.from_hf: + vision_x = self.vision_encoder(vision_x, output_hidden_states=True) + vision_x = vision_x.hidden_states[self.vision_select_layer] + else: + self.vision_encoder.backbone.transformer.return_select_layer = self.vision_select_layer + vision_x = self.vision_encoder(vision_x) + vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) + vision_x = vision_x[:, :, :, self.class_token_length :] + assert self.is_adapter_available(), "Cannot find multimodal vision adapter!" + vision_connector = self.get_adapter_module(AdapterName.MM_LINEAR_ADAPTER) + vision_x = vision_connector(vision_x) + return vision_x + + def replace_media_embeddings(self, input_ids, inputs_embeds, media): + if media is None: + return inputs_embeds + + batch_size, sequence_length, hidden_size = inputs_embeds.shape + + # calculate media features without gradients + media_features = self.encode_vision_x(media) # b T F S(eq) H(idden) + num_images_per_sample = media_features.size(1) + num_patches = media_features.size(3) + + # flatten patches + media_features = media_features.view(batch_size, -1, hidden_size) + + # create an indices matrix used in torch.scatter + padded_media_indices = torch.ones( + (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device + ) + padded_media_indices *= sequence_length + for idx, input_id in enumerate(input_ids): + media_end_positions = torch.where(input_id == self.media_end_id)[0] + # locate the first media token positions + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + assert (input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id).all() + + # use indices to create a span + padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( + num_patches, device=padded_media_indices.device + ).repeat(*padded_media_indices.shape, 1) + padded_media_indices = padded_media_indices.reshape(batch_size, -1) + padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) + + # concat placeholder + updated_input_embeds = torch.cat( + (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 + ) + updated_input_embeds = updated_input_embeds.type(media_features.dtype) + # scatter media_features + updated_input_embeds.scatter_(1, padded_media_indices, media_features) + + # chop off placeholder + updated_input_embeds = updated_input_embeds[:, :sequence_length] + + return updated_input_embeds + + +class NevaModel(GPTModel): + def __init__( + self, mm_cfg, media_start_id, media_end_id, **kwargs, + ): + super(NevaModel, self).__init__(**kwargs,) + + self.mm_cfg = mm_cfg + self.media_start_id = media_start_id + self.media_end_id = media_end_id + + if mm_cfg.llm.from_pretrained is not None: + logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") + self.load_llm_weights(self.language_model, mm_cfg.llm.from_pretrained) + if mm_cfg.llm.freeze: + for param in self.language_model.parameters(): + param.requires_grad = False + self.language_model = self.language_model.eval() + + # Initialize vision encoder and freeze it + if mm_cfg.vision_encoder.from_hf: + vision_encoder = CLIPVisionModel.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, + ).cuda() + vision_encoder = vision_encoder.to(torch.bfloat16) + if mm_cfg.vision_encoder.freeze: + for param in vision_encoder.parameters(): + param.requires_grad = False + vision_encoder = vision_encoder.eval() + else: + vision_cfg = MegatronCLIPModel.restore_from( + mm_cfg.vision_encoder.from_pretrained, return_config=True + ).vision + vision_encoder = FrozenCLIPVisionTransformer(vision_cfg) + self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) + if mm_cfg.vision_encoder.freeze: + vision_encoder.freeze() + + # Monkey patch embedding + if kwargs.get("pre_process", True): + extend_instance(self.language_model.embedding.word_embeddings, NevaEmbeddingMixin) + self.language_model.embedding.word_embeddings.init_vision( + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), + class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), + ) + + def forward( + self, *args, **kwargs, + ): + media = args[-1] + self.language_model.embedding.word_embeddings.set_media(media) + return super().forward(*args[:-1], **kwargs) + + def _load_model_weights(self, nemo_path): + """ + Shared method to load model weights from a given nemo_path. + """ + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + app_state = AppState() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + if os.path.isfile(nemo_path): + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + else: + tmpdir = nemo_path + os.chdir(tmpdir) + if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: + model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( + tmpdir, save_restore_connector.model_weights_ckpt + ) + else: + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return state_dict + + def load_vision_encoder_weights(self, vision_encoder, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.replace("model.vision_encoder.", "") + new_state_dict[new_k] = v + + missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def load_llm_weights(self, language_model, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.language_model."): + new_k = k.replace("model.language_model.", "", 1) + module_key, param_key = new_k.split(".", 1) + if module_key not in new_state_dict: + new_state_dict[module_key] = {} + new_state_dict[module_key][param_key] = v + + language_model.load_state_dict(new_state_dict, strict=True) + print(f"Restored LLM weights from {nemo_path}.") + + +class MegatronNevaModel(MegatronGPTPEFTModel): + """ + Megatron Neva pretraining + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if getattr(self, "peft_name_keys", None) is None: + self.peft_name_keys = [] + self.name_key_to_cfg = {} + + self.peft_name_keys += [AdapterName.MM_LINEAR_ADAPTER] + adapter_cfg = MMLinearAdapterConfig( + in_features=cfg.mm_cfg.vision_encoder.hidden_size, out_features=cfg.hidden_size, bias=True, + ) + self.name_key_to_cfg.update( + {AdapterName.MM_LINEAR_ADAPTER: adapter_cfg,} + ) + super().__init__(cfg, trainer) + + def get_gpt_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) + media_end_id = self.tokenizer.token_to_id(DEFAULT_IM_END_TOKEN) + + model = NevaModel( + mm_cfg=self.cfg.mm_cfg, + media_start_id=media_start_id, + media_end_id=media_end_id, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), + hidden_size=self.cfg.hidden_size, + max_position_embeddings=self.cfg.max_position_embeddings, + num_layers=self.cfg.num_layers, + num_attention_heads=self.cfg.num_attention_heads, + apply_query_key_layer_scaling=self.cfg.get('apply_query_key_layer_scaling', True), + kv_channels=self.cfg.get('kv_channels', None), + ffn_hidden_size=self.cfg.ffn_hidden_size, + num_tokentypes=0, + parallel_output=True, + pre_process=pre_process, + post_process=post_process, + init_method_std=self.cfg.get('init_method_std', 0.02), + use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), + fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), + use_cpu_initialization=self.cfg.get('use_cpu_initialization', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), + hidden_dropout=self.cfg.get('hidden_dropout', 0.1), + attention_dropout=self.cfg.get('attention_dropout', 0.1), + ffn_dropout=self.cfg.get('ffn_dropout', 0.0), + precision=self.cfg.get('precision', 16), + fp32_residual_connection=self.cfg.get('fp32_residual_connection', False), + activations_checkpoint_granularity=self.cfg.get('activations_checkpoint_granularity', None), + activations_checkpoint_method=self.cfg.get('activations_checkpoint_method', None), + activations_checkpoint_num_layers=self.cfg.get('activations_checkpoint_num_layers', 1), + activations_checkpoint_layers_per_pipeline=self.cfg.get( + 'activations_checkpoint_layers_per_pipeline', None + ), + normalization=self.cfg.get('normalization', 'layernorm'), + layernorm_epsilon=self.cfg.get('layernorm_epsilon', 1e-5), + onnx_safe=self.cfg.get('onnx_safe', False), + bias=self.cfg.get('bias', True), + bias_activation_fusion=self.cfg.get('bias_activation_fusion', True), + bias_dropout_add_fusion=self.cfg.get('bias_dropout_add_fusion', True), + activation=self.cfg.get('activation', 'gelu'), + headscale=self.cfg.get('headscale', False), + transformer_block_type=self.cfg.get('transformer_block_type', 'pre_ln'), + openai_gelu=self.cfg.get('openai_gelu', False), + normalize_attention_scores=self.cfg.get('normalize_attention_scores', True), + position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percentage=self.cfg.get('rotary_percentage', 1.0), + share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), + attention_type=self.cfg.get('attention_type', 'multihead'), + masked_softmax_fusion=self.cfg.get('masked_softmax_fusion', True), + gradient_accumulation_fusion=self.cfg.get('gradient_accumulation_fusion', False), + persist_layer_norm=self.cfg.get('persist_layer_norm', False), + sequence_parallel=self.cfg.get('sequence_parallel', False), + transformer_engine=self.cfg.get('transformer_engine', False), + fp8=self.cfg.get('fp8', False), + fp8_e4m3=self.cfg.get('fp8_e4m3', False), + fp8_hybrid=self.cfg.get('fp8_hybrid', False), + fp8_margin=self.cfg.get('fp8_margin', 0), + fp8_interval=self.cfg.get('fp8_interval', 1), + fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1), + fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), + reduce_amax=self.cfg.get('reduce_amax', True), + use_emha=self.cfg.get('use_emha', False), + ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=self.cfg.get('use_flash_attention', False), + megatron_legacy=self.cfg.get('megatron_legacy', False), + ) + + logging.info( + f"Neva model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" + ) + + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.mm_cfg.llm.freeze: + super().setup_optimizer_param_groups() + else: + MegatronGPTModel.setup_optimizer_param_groups(self) + + # filter out params doesn't have grad + for param_group in self._optimizer_param_groups: + params_with_grad = [param for param in param_group['params'] if param.requires_grad] + param_group['params'] = params_with_grad + + def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): + output_tensor = self.model(tokens, text_position_ids, attention_mask, labels, media) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean + + def training_step(self, dataloader_iter, batch_idx): + """ + We pass the dataloader iterator function to the micro-batch scheduler. + The input batch to each micro-batch is fetched using the dataloader function + in the micro-batch fwd function. + """ + return MegatronGPTModel.training_step(self, dataloader_iter, batch_idx) + + def get_forward_output_and_loss_func(self, validation_step=False): + def loss_func(output_tensor, loss_mask): + loss_for_ub = self.loss_func(loss_mask, output_tensor) + if validation_step and not self.cfg.data.get('validation_drop_last', True): + raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Neva!") + else: + reduced_loss = average_losses_across_data_parallel_group([loss_for_ub]) + return loss_for_ub, dict(avg=reduced_loss[0]) + + def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): + batch = next(dataloader_iter) + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None + else: + batch[k] = batch[k].cuda(non_blocking=True) + else: + if parallel_state.is_pipeline_first_stage(): + # First pipeline stage needs tokens, position_ids, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = ( + batch[k].cuda(non_blocking=True) if k in ['tokens', 'position_ids', 'media'] else None + ) + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'attention_mask', 'media'] + else None + ) + elif parallel_state.is_pipeline_last_stage(): + # Last pipeline stage needs the labels, loss_mask, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['labels', 'loss_mask', 'attention_mask'] + else None + ) + else: + # Intermediate pipeline stage doesn't need any inputs + batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} + + output_tensor = model( + batch['tokens'], + batch['position_ids'], + batch['attention_mask'], + batch['labels'], + batch.get('media'), + checkpoint_activations_all_layers=checkpoint_activations_all_layers, + ) + + return output_tensor, partial(loss_func, loss_mask=batch['loss_mask']) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) + extra_arg = {} + ( + tokens, + attention_mask, + position_ids, + media, + set_inference_key_value_memory, + inference_max_sequence_len, + ) = batch + tokens = tokens.cuda() + attention_mask = attention_mask.cuda() + position_ids = position_ids.cuda() + attention_mask = attention_mask[0:1] + if media is not None: + media = media.cuda() + labels = None + extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() + extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() + # TODO : Should I add labels ? + output_tensor = model(tokens, position_ids, attention_mask, labels, media, **extra_arg) + + def id_func(output_tensor): + return output_tensor, {'logits': output_tensor} + + return output_tensor, id_func + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + loss_mean = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + return loss_mean + + def validation_epoch_end(self, outputs): + if parallel_state.is_pipeline_last_stage(): + # only the last pipeline parallel stages return loss with their batch size + if self.cfg.data.get('validation_drop_last', True): + averaged_loss = torch.stack(outputs).mean() + else: + # Compute the avg loss by total_loss across all samples / total number of samples + # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) + # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] + # averaged_loss = avg_loss.type(torch.float32).cuda() + raise NotImplementedError("`validation_drop_last=False` is not supported!") + else: + averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_loss, get_last_rank()) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + return averaged_loss + + def on_validation_epoch_start(self): + pass + + def on_validation_epoch_end(self): + pass + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def loss_func(self, loss_mask, output_tensor): + losses = output_tensor.float() + loss_mask = loss_mask.view(-1).float() + # TODO: add nemo version here + loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( + self.model + ) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer._checkpoint_connector.resume_from_checkpoint_fit_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + rampup_batch_size = self.cfg.get('rampup_batch_size', None) + if rampup_batch_size: + start_batch_size = rampup_batch_size[0] + batch_size_increment = rampup_batch_size[1] + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + + assert start_batch_size % (total_gpus_number) == 0, ( + 'expected' + ' start batch size ({}) to be divisible by total number of GPUs' + ' ({})'.format(start_batch_size, total_gpus_number) + ) + + micro_batch_size = self.cfg.get('micro_batch_size', 1) + tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) + pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) + total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) + + assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( + 'expected' + ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' + ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) + ) + + if stage == 'predict': + return + else: + # TODO: consider adding a ModelPT guard to check if model is being restored. + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + if self.cfg.get('share_embeddings_and_output_weights', True): + self.model.sync_initial_word_embeddings() + + if self.cfg.get('transformer_engine', False): + self.setup_transformer_engine_tp_groups() + + if self.cfg.mm_cfg.llm.freeze: + self.setup_complete = True + + def build_train_valid_test_datasets(self): + logging.info('Building Neva datasets.') + ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg,) + self._train_ds = ds_dict["train_dataset"] + self._validation_ds = ds_dict["eval_dataset"] + + return self._train_ds, self._validation_ds + + def build_pretraining_data_loader( + self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False + ): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + global_batch_size=self.cfg.global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=self.cfg.get('drop_last', True), + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + collate_func = DataCollatorForSupervisedDataset(self.cfg, self.tokenizer) + return torch.utils.data.DataLoader( + dataset, + batch_sampler=batch_sampler, + collate_fn=collate_func, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + Returns: + List of available pre-trained models. + """ + return [] + + def setup_test_data(self, cfg): + pass + + def state_dict(self, destination=None, prefix='', keep_vars=False): + # Get the original state dictionary + original_state_dict = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) + keys_to_keep = list(self.adapter_keys) + # TODO(yuya): maybe not hard-code vision_encoder keys here + if self.megatron_amp_O2: + vision_encoder_keys = [ + k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" in k + ] + llm_keys = [k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" not in k] + else: + vision_encoder_keys = [k for k in self.base_keys if "vision_encoder" in k] + llm_keys = [k for k in self.base_keys if "vision_encoder" not in k] + if not self.cfg.mm_cfg.llm.freeze: + keys_to_keep += llm_keys + if not self.cfg.mm_cfg.vision_encoder.freeze: + keys_to_keep += vision_encoder_keys + return {k: original_state_dict[k] for k in keys_to_keep if k in original_state_dict} + + def load_state_dict(self, state_dict, strict=False): + logging.warning('Loading state dict for MegatronNevaModel...') + missing_keys, unexpected_keys = NLPModel.load_state_dict(self, state_dict, strict=False) + + if len(missing_keys) > 0: + logging.warning('Missing keys were detected during the load. Please double check.') + logging.warning(f'Missing keys: \n{missing_keys}') + if len(unexpected_keys) > 0: + logging.critical('Unexpected keys were detected during the load. Please double check.') + logging.critical(f'Unexpected keys: \n{unexpected_keys}') + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + inference_config = self.get_inference_config() + + image_list = [] + for prompt in batch: + image_paths_in_prompts = re.findall(r'', prompt) + image_list.append(image_paths_in_prompts) + + if inference_config is None: + return None + else: + # need to overwrite some configuration, make it immutable + inference_config = inference_config.copy() + compute_logprob = inference_config['compute_logprob'] + if compute_logprob: + inference_config['inputs'] = batch + inference_config['tokens_to_generate'] = 1 + inference_config['all_probs'] = True + inference_config["add_BOS"] = False + inference_config['greedy'] = True + inference_config['image_list'] = image_list + response = generate(self, **inference_config) + compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) + return compute_prob_response + else: + inference_config['inputs'] = batch + inference_config['image_list'] = image_list + return generate(self, **inference_config) + + def generate( + self, + input_prompts: Union[List[str], torch.Tensor, List[dict]], + length_params: LengthParam, + sampling_params: SamplingParam = None, + ) -> OutputType: + + # check whether the DDP is initialized + if parallel_state.is_unitialized(): + + def dummy(): + return + + import os + + if self.trainer.strategy.launcher is not None: + self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) + self.trainer.strategy.setup_environment() + + # set the default sampling params if it is None. + # default do greedy sampling + if sampling_params is None: + sampling_params = get_default_sampling_params() + + # set the default length params if it is None. + # default do greedy sampling + if length_params is None: + length_params = get_default_length_params() + + import time + + start = time.time() + # Supports only one prompt at a time + result = megatron_neva_generate(self.cuda(), input_prompts, self.tokenizer, length_params, sampling_params) + end = time.time() + print(f'Time taken {end - start}') + + return result diff --git a/nemo/collections/multimodal/models/neva/neva_peft_models.py b/nemo/collections/multimodal/models/neva/neva_peft_models.py new file mode 100644 index 000000000000..ac03b5983430 --- /dev/null +++ b/nemo/collections/multimodal/models/neva/neva_peft_models.py @@ -0,0 +1,60 @@ +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + InfusedAdapterConfig, + LoraKQVAdapterConfig, + MLPInfusedAdapterConfig, + ParallelLinearAdapterConfig, + PromptEncoderAdapterConfig, +) +from nemo.core.classes.mixins import adapter_mixins +from nemo.utils import logging, model_utils + + +class MegatronNevaLoRAModel(MegatronNevaModel): + """ + MegatronNevaLoRAModel is a model that combines a base model (MegatronNevaModel) with a low-rank adapters. + The lora adapters will be added in `nemo/collections/nlp/modules/common/megatron/attention.py` + The implementation is based on Hu et al. nemo/collections/nlp/modules/common/megatron/attention.py + + A single low-rank feedfowrad layer is used in parallel with the KQV projection layer. + TODO: Add support to also include an option to adda low-rank adapter in the output projection layer. + """ + + def __init__( + self, cfg: DictConfig, trainer: Trainer, + ): + self.peft_name_keys = [ + AdapterName.LORA_KQV_ADAPTER, + ] + lora_cfg = cfg.peft.lora_tuning + if cfg.get("kv_channels", None) is None: + assert ( + cfg.hidden_size % cfg.num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = cfg.hidden_size // cfg.num_attention_heads + else: + kv_channels = cfg.kv_channels + projection_size = kv_channels * cfg.num_attention_heads + + adapter_cfg = LoraKQVAdapterConfig( + in_features=cfg.hidden_size, + out_features=3 * projection_size, + dim=lora_cfg.adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=lora_cfg.get("column_init_method", "normal"), + row_init_method=lora_cfg.get("row_init_method", "zero"), + gather_output=False, + dropout=lora_cfg.adapter_dropout, + ) + + self.name_key_to_cfg = {} + for k in self.peft_name_keys: + self.name_key_to_cfg[k] = adapter_cfg + + super().__init__(cfg, trainer) diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py index 54dcc7816728..5db718009013 100644 --- a/nemo/collections/multimodal/parts/utils.py +++ b/nemo/collections/multimodal/parts/utils.py @@ -50,6 +50,50 @@ def randn_like(x, generator=None): return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) +def extend_instance(obj, mixin): + """Apply mixins to a class instance after creation""" + base_cls = obj.__class__ + base_cls_name = obj.__class__.__name__ + obj.__class__ = type( + base_cls_name, (mixin, base_cls), {} + ) # mixin needs to go first for our forward() logic to work + + +def getattr_recursive(obj, att): + """ + Return nested attribute of obj + Example: getattr_recursive(obj, 'a.b.c') is equivalent to obj.a.b.c + """ + if att == "": + return obj + i = att.find(".") + if i < 0: + return getattr(obj, att) + else: + return getattr_recursive(getattr(obj, att[:i]), att[i + 1 :]) + + +def setattr_recursive(obj, att, val): + """ + Set nested attribute of obj + Example: setattr_recursive(obj, 'a.b.c', val) is equivalent to obj.a.b.c = val + """ + if "." in att: + obj = getattr_recursive(obj, ".".join(att.split(".")[:-1])) + setattr(obj, att.split(".")[-1], val) + + +def apply_with_stopping_condition(module, apply_fn, apply_condition=None, stopping_condition=None, **other_args): + if stopping_condition(module): + return + if apply_condition(module): + apply_fn(module, **other_args) + for child in module.children(): + apply_with_stopping_condition( + child, apply_fn, apply_condition=apply_condition, stopping_condition=stopping_condition, **other_args + ) + + def setup_trainer_and_models_for_inference( model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, ): @@ -160,8 +204,8 @@ def setup_trainer_and_model_for_inference( # Check if we need to use the TorchElasticEnvironment plugin for the trainer. plugins = [] - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) + # if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) # Use the NLPDDPStrategy for the distributed data parallel strategy. # We don't use DDP for async grad allreduce and don't find unused parameters. @@ -173,7 +217,7 @@ def setup_trainer_and_model_for_inference( # Create the NLPSaveRestoreConnector object for model saving and restoring. save_restore_connector = NLPSaveRestoreConnector() - if cfg.model.restore_from_path.endswith(".nemo"): + if cfg.model.restore_from_path.endswith(".nemo") or os.path.isdir(cfg.model.restore_from_path): # Set the model_extracted_dir attribute if the restore path is a directory. if os.path.isdir(cfg.model.restore_from_path): save_restore_connector.model_extracted_dir = cfg.model.restore_from_path diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index f553f32c1665..0563287c7292 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -452,8 +452,12 @@ def configure_optimizers(self): # if using tensor parallel only, we automatically use async grad all-reduce # if using pipeline parallel or sequence parallel or gradient accumulation fusion, then we disable it - if self.cfg.get('pipeline_model_parallel_size', 1) == 1 and not ( - self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) + if ( + self.cfg.get('pipeline_model_parallel_size', 1) == 1 + and not ( + self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) + ) + and self.cfg.get('async_grad_allreduce', True) ): async_grad_allreduce = True else: @@ -627,6 +631,7 @@ def _get_total_params_across_model_parallel_groups_gpt_bert(self, model): parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage(ignore_virtual=True) and self.cfg.get('share_embeddings_and_output_weights', True) + and ("llm" in self.cfg and self.cfg.llm.get('share_embeddings_and_output_weights', True)) ): # substract the embedding weights on the last stage num_word_embedding_parameters = sum([p.nelement() for p in model.word_embeddings_weight()]) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 41baccb1aaf1..0fe1c957a413 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -584,7 +584,11 @@ def training_step(self, dataloader_iter, batch_idx): self._optimizer._finish_bucket_grad_sync() elif self.megatron_amp_O2: # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + if ( + self.cfg.get('pipeline_model_parallel_size', 1) > 1 + or self.cfg.get('sequence_parallel', False) + or (not self.cfg.get('async_grad_allreduce', True)) + ): # main grads are stored in the MainParamsOptimizer wrapper self._optimizer.allreduce_main_grads() else: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py index 73579114234d..c0f9f7c7d276 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -40,6 +40,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.base_keys = self.get_all_keys() self.init_peft_modules() self.adapter_keys = self.get_all_keys() - self.base_keys + if self.megatron_amp_O2: + self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) def first_stage_of_pipeline(self): if hasattr(self, "model") and hasattr(self.model, "pre_process"): @@ -62,6 +64,9 @@ def init_peft_modules(self): module.add_adapter( name=peft_key, cfg=peft_cfg, ) + if self.megatron_amp_O2: + for adapter_name in getattr(module, 'adapter_layer', []): + module.adapter_layer[adapter_name] = module.adapter_layer[adapter_name].to(self.autocast_dtype) logging.info(f"After adding PEFT params:\n{self.summarize()}") return True @@ -69,7 +74,7 @@ def setup(self, stage=None): super().setup(stage) self.setup_complete = True - def get_all_keys(self,): + def get_all_keys(self,): # TODO (yuya): why just state_dict? """ Returns all the keys in the model """ @@ -103,7 +108,11 @@ def load_state_dict(self, state_dict, strict: bool = True): # setting strict=False will ignore the missing keys (which are not being updated anyway) # explicitly check if state_dict.keys matches all the expected self.adapter_keys since we don't have the # safety in strict=True anymore. - assert set(state_dict.keys()) == self.adapter_keys + if self.megatron_amp_O2: + adapter_keys = set(key.replace("model.", "model.module.", 1) for key in self.adapter_keys) + else: + adapter_keys = self.adapter_keys + assert set(state_dict.keys()) == adapter_keys super().load_state_dict(state_dict, strict=False) else: super().load_state_dict(state_dict, strict=True) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 9507a01d01f0..e1847ac92c19 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -77,8 +77,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): ) super().__init__(cfg, trainer=trainer) self.sep_id = cfg.get('sep_id', 49704) - self.val_metric, self.val_metric_name = self.setup_metric(self.cfg.data.validation_ds) - self.val_metric = torch.nn.ModuleList(self.val_metric) if self.val_metric is not None else None + if hasattr(self.cfg.data, "validation_ds"): + self.val_metric, self.val_metric_name = self.setup_metric(self.cfg.data.validation_ds) + self.val_metric = torch.nn.ModuleList(self.val_metric) if self.val_metric is not None else None if hasattr(self.cfg.data, "test_ds"): self.test_metric, self.test_metric_name = self.setup_metric(self.cfg.data.test_ds) self.test_metric = torch.nn.ModuleList(self.test_metric) if self.test_metric is not None else None diff --git a/nemo/collections/nlp/models/nlp_model.py b/nemo/collections/nlp/models/nlp_model.py index d739efa88485..71bfd7b3a175 100644 --- a/nemo/collections/nlp/models/nlp_model.py +++ b/nemo/collections/nlp/models/nlp_model.py @@ -394,4 +394,4 @@ def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True): and "bert_model.embeddings.position_ids" in state_dict ): del state_dict["bert_model.embeddings.position_ids"] - super(NLPModel, self).load_state_dict(state_dict, strict=strict) + return super(NLPModel, self).load_state_dict(state_dict, strict=strict) diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index fe339c6f9a8b..5d9e20913882 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -63,6 +63,7 @@ class AdapterName(str, enum.Enum): LORA_KQV_ADAPTER = "lora_kqv_adapter" LORA_KV_ADAPTER = "lora_kv_adapter" LORA_Q_ADAPTER = "lora_q_adapter" + MM_LINEAR_ADAPTER = "mm_linear_adapter" class InfusedAdapter(nn.Module, AdapterModuleUtil): @@ -345,3 +346,20 @@ class PromptEncoderAdapterConfig: init_std: float output_dim: int _target_: str = "{0}.{1}".format(PromptEncoderAdapter.__module__, PromptEncoderAdapter.__name__) + + +class MMLinearAdapter(nn.Module, AdapterModuleUtil): + def __init__(self, in_features: int, out_features: int, bias: bool) -> None: + super().__init__() + self.linear = torch.nn.Linear(in_features, out_features, bias,) + + def forward(self, x): + return self.linear(x) + + +@dataclass +class MMLinearAdapterConfig: + in_features: int + out_features: int + bias: bool + _target_: str = "{0}.{1}".format(MMLinearAdapter.__module__, MMLinearAdapter.__name__) diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index 277749189b3f..e71e3ceaf473 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -897,7 +897,9 @@ def load_state_dict(self, state_dict, strict=True): # for backward compatibility. state_dict_ = {} for key in state_dict.keys(): - if 'transformer.' in key: + if self._encoder_key + '.' in key: + state_dict_[key.split(self._encoder_key + '.')[1]] = state_dict[key] + elif 'transformer.' in key: state_dict_[key.split('transformer.')[1]] = state_dict[key] # for backward compatibility. @@ -916,6 +918,13 @@ def load_state_dict(self, state_dict, strict=True): if self.add_pooler: assert 'pooler' in state_dict, 'could not find data for pooler in the checkpoint' self.pooler.load_state_dict(state_dict[self._pooler_key], strict=strict) + if not self.share_embeddings_and_output_weights: + # import pdb; pdb.set_trace() + assert ( + self._output_layer_key in state_dict + ), 'could not find data for output embedding layer in the checkpoint' + self.output_layer.load_state_dict(state_dict[self._output_layer_key], strict=strict) + # decoder if self.add_decoder: assert 'decoder' in state_dict, 'could not find data for pooler in the checkpoint' diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 652a3e6f4e3a..545e9e1eae5e 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1193,6 +1193,9 @@ def build_layer(layer_number): if not bias and normalization not in ['layernorm', 'layernorm1p']: remove_bias_from_layernorm(self.final_layernorm) + # Hacky set up for vision encoder select layer, won't support PP + self.return_select_layer = 0 + def _get_layer(self, layer_number): return self.layers[layer_number] @@ -1504,6 +1507,14 @@ def forward( if self.inference_params != None: self.inference_params.sequence_len_offset = self.inference_current_sequence_len + if self.return_select_layer < 0: + assert ( + parallel_state.get_pipeline_model_parallel_world_size() == 1 + ), f"##{parallel_state.get_pipeline_model_parallel_world_size}" + if self.num_layers + self.return_select_layer < 0: + logging.warning("Returning embeddings states only!") + return hidden_states + for index in range(self.num_layers): layer = self._get_layer(index) past = None @@ -1560,6 +1571,13 @@ def forward( if self.transformer_engine: self.inference_current_sequence_len += hidden_states.size(0) + if self.return_select_layer < 0: + assert ( + parallel_state.get_pipeline_model_parallel_world_size() == 1 + ), f"##{parallel_state.get_pipeline_model_parallel_world_size}" + if index == self.num_layers + self.return_select_layer: + return hidden_states + # Skip counter update for eval and activation checkpointing if torch.is_grad_enabled() and self.training: self.microbatch_count += 1 diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index 573bdc80735e..e2a122d95cbb 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import os +import re from typing import List, Tuple import torch @@ -252,6 +254,155 @@ def prepare_batch_at_step( return batch, tensor_shape +class NevaModelTextGenerationStrategy(TextGenerationStrategy): + def __init__(self, model): + super().__init__(model) + self.forward_model = self.model.model.module + self.num_media_latents = 576 # TODO: Need to obtain this from the config ideally + self.tokenizer = self.model.tokenizer + self.image_paths = [] + self.data_cfg = model.cfg.data + from transformers import CLIPImageProcessor + + CLIP_MODEL = os.environ.get("CLIP_MODEL", "openai/clip-vit-large-patch14") + self.processor = CLIPImageProcessor.from_pretrained(CLIP_MODEL, torch_dtype=torch.bfloat16) + self.model = model + + def clip_max_len(self, maxlen: int) -> int: + """ clip the max len based on the LM model max sequence length""" + if maxlen > self.model.cfg.encoder_seq_length + 1: + maxlen = self.model.cfg.encoder_seq_length + 1 + return maxlen + + def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool): + """initialize the batch data before the inference steps.""" + # Move to GPU. + tokenizer = self.model.tokenizer + tokens = context_tokens.contiguous().cuda() + # Get the attention mask and postition ids. + self.attention_mask, _, self.position_ids = get_ltor_masks_and_position_ids( + tokens, + eod_token=tokenizer.eos_id, + eod_mask_loss=False, + reset_attention_mask=False, + reset_position_ids=False, + compute_attention_mask=compute_attention_mask, + ) + + def tokenize_batch(self, sentences, max_len, add_BOS): + pattern = r"()" + context_tokens = [] + # TODO : Should get this from config + image_tokens = [self.tokenizer.token_to_id("")] * self.num_media_latents + for sentence in sentences: + img_path = re.findall(pattern, sentence)[0] + split_sentence = sentence.split(img_path) + if add_BOS: + output_tokens = [self.tokenizer.bos_id] + else: + output_tokens = [] + + output_tokens = ( + self.tokenizer.text_to_ids(split_sentence[0]) + + image_tokens + + self.tokenizer.text_to_ids(split_sentence[1]) + ) + context_tokens.append(output_tokens) + + context_tokens, context_lengths = pad_batch(context_tokens, self.tokenizer.eos_id, max_len) + context_tokens_tensor = torch.cuda.LongTensor(context_tokens) + context_length_tensor = torch.cuda.LongTensor(context_lengths) + return context_tokens_tensor, context_length_tensor + + def get_media_tensor(self, image_path): + from PIL import Image + + image = Image.open(image_path).convert('RGB') + + if self.data_cfg.image_aspect_ratio == 'keep': + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 448, 224 + shortest_edge = int(min(max_len / aspect_ratio, min_len)) + image = self.processor.preprocess( + image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} + )['pixel_values'][0] + elif self.data_cfg.image_aspect_ratio == 'pad': + + def expand2square(pil_img, background_color): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image, tuple(int(x * 255) for x in self.processor.image_mean)) + image = self.processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + else: + image = self.processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + + model_cfg = self.model.cfg + + if model_cfg.precision == 16: + media = image.type(torch.float16) + elif model_cfg.precision == 32: + media = image.type(torch.float32) + else: + media = image.type(torch.bfloat16) + + return media.unsqueeze(dim=0).unsqueeze(dim=0).unsqueeze(dim=0) + + def prepare_batch_at_step( + self, + tokens: torch.Tensor, + maxlen: int, + micro_batch_size: int, + step: int, + context_length: int, + compute_attention_mask: bool = True, + media=None, + ) -> Tuple[List[torch.Tensor], List[int]]: + """ + generate the batch used in inference for each of the steps + """ + # types2use = None + if step == 0: + # Allocate memory for the entire context. + set_inference_key_value_memory = True + tokens2use = tokens[:, :context_length] + positions2use = self.position_ids[:, :context_length] + # not using type2use. uncomment it if it is used + # if type_ids is not None: + # types2use = type_ids[:, :context_length] + else: + # Set this to false so the memory is not reallocated. + set_inference_key_value_memory = False + tokens2use = tokens[:, context_length - 1].view(micro_batch_size, -1) + positions2use = self.position_ids[:, context_length - 1].view(micro_batch_size, -1) + # not using type2use. uncomment it if it is used + # if type_ids is not None: + # types2use = type_ids[:, context_length - 1].view(batch_size, -1) + + """Prepare batch for each of the inference steps""" + attention_mask_repeat = None + if compute_attention_mask: + attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)]) + + setkey_value_array = torch.tensor( + [set_inference_key_value_memory] * micro_batch_size, device=torch.cuda.current_device() + ) + len_array = torch.tensor([maxlen] * micro_batch_size, device=torch.cuda.current_device()) + batch = [tokens2use, attention_mask_repeat, positions2use, media, setkey_value_array, len_array] + tensor_shape = [tokens2use.shape[1], micro_batch_size, self.model.cfg.encoder_seq_length] + return batch, tensor_shape + + class PromptLearningModelTextGenerationStrategy(TextGenerationStrategy): def __init__(self, model, task_ids): super().__init__(model) @@ -334,6 +485,7 @@ def post_process(self, tokens: torch.Tensor, new_tokens: torch.Tensor, context_l def model_inference_strategy_dispatcher(model, **args): + from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model import ( MegatronGPTPromptLearningModel, @@ -345,6 +497,8 @@ def model_inference_strategy_dispatcher(model, **args): RetroQAModelTextGenerationStrategy, ) + if isinstance(model, MegatronNevaModel): + return NevaModelTextGenerationStrategy(model) if isinstance(model, MegatronGPTPromptLearningModel): return PromptLearningModelTextGenerationStrategy(model, **args) elif isinstance(model, MegatronGPTModel): diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 3a41901f76ce..617d736fef73 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -51,6 +51,7 @@ "get_default_sampling_params", "get_default_length_params", "megatron_gpt_generate", + "megatron_neva_generate", "get_computeprob_response", "generate", "sample_token_greedy", @@ -137,6 +138,70 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para raise NotImplementedError("unknown type is not implemented") +def megatron_neva_generate(model, input_prompts, tokenizer, length_params, sampling_params, **strategy_args): + # reproduce the old compute_prob method + # a very special case + import re + + image_list = [] + for prompt in input_prompts: + image_paths_in_prompts = re.findall(r'', prompt) + image_list.append(image_paths_in_prompts) + + if sampling_params['compute_logprob']: + # need to overwrite some configuration, make it immutable + sampling_params = sampling_params.copy() + length_params = length_params.copy() + length_params['max_length'] = 1 + sampling_params['all_probs'] = True + sampling_params["add_BOS"] = False + sampling_params['greedy'] = True + + response = generate( + model, + inputs=input_prompts, + tokens_to_generate=length_params['max_length'], + all_probs=sampling_params['all_probs'], + temperature=sampling_params['temperature'], + add_BOS=sampling_params['add_BOS'], + top_k=sampling_params['top_k'], + top_p=sampling_params['top_p'], + greedy=sampling_params['use_greedy'], + repetition_penalty=sampling_params['repetition_penalty'], + min_tokens_to_generate=length_params['min_length'], + image_list=image_list ** strategy_args, + ) + + # TODO: Check if this is the correct way to do it ? Or should we remove the Image and the prompts we add before it + compute_prob_response = get_computeprob_response(tokenizer, response, input_prompts) + return compute_prob_response + + if isinstance(input_prompts, (list, tuple)): + if isinstance(input_prompts[0], (str, torch.Tensor)): + output = generate( + model, + inputs=input_prompts, + tokens_to_generate=length_params['max_length'], + all_probs=sampling_params['all_probs'], + temperature=sampling_params['temperature'], + add_BOS=sampling_params['add_BOS'], + top_k=sampling_params['top_k'], + top_p=sampling_params['top_p'], + greedy=sampling_params['use_greedy'], + repetition_penalty=sampling_params['repetition_penalty'], + min_tokens_to_generate=length_params['min_length'], + image_list=image_list, + **strategy_args, + ) + return output + elif isinstance(input_prompts[0], dict): + raise NotImplementedError("json object not implemented") + else: + raise NotImplementedError("unknown type is not implemented") + else: + raise NotImplementedError("unknown type is not implemented") + + def get_computeprob_response(tokenizer, response, inputs): if parallel_state.is_pipeline_first_stage() or parallel_state.is_pipeline_last_stage(): # we only have a response on the first and last pipeline stages @@ -382,6 +447,7 @@ def synced_generate( repetition_penalty=1.2, min_tokens_to_generate=0, end_strings=[], + image_list=None, ): context_length = context_length_tensor.min().item() tokenizer = model.tokenizer @@ -408,6 +474,7 @@ def synced_generate( compute_logprob=compute_logprob, temperature=temperature, end_strings=end_strings, + image_list=image_list, extra={ "top_p": top_p, "top_k": top_k, @@ -478,6 +545,7 @@ def generate( repetition_penalty=1.0, min_tokens_to_generate=0, end_strings=['<|endoftext|>'], + image_list=None, **strategy_args, ) -> OutputType: """ @@ -563,6 +631,7 @@ def generate( repetition_penalty=repetition_penalty, min_tokens_to_generate=min_tokens_to_generate, end_strings=end_strings, + image_list=image_list, ) special_tokens = set() if hasattr(tokenizer, 'pad_token') and tokenizer.pad_token is not None: @@ -646,6 +715,7 @@ def sample_sequence_batch( type_ids=None, temperature=None, end_strings=['<|endoftext|>'], + image_list=None, extra={}, ): # Importing here to avoid circular import errors @@ -690,10 +760,20 @@ def sample_sequence_batch( maxlen = inference_strategy.clip_max_len(maxlen) lengths = torch.ones([batch_size]).long().cuda() * maxlen + + if image_list is not None: + # Note this will just be one image that comes here + media_tensor = inference_strategy.get_media_tensor(image_list[0][0]) + while context_length < maxlen: - batch, tensor_shape = inference_strategy.prepare_batch_at_step( - tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask - ) + if media_tensor is not None: + batch, tensor_shape = inference_strategy.prepare_batch_at_step( + tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask, media_tensor + ) + else: + batch, tensor_shape = inference_strategy.prepare_batch_at_step( + tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask + ) output = inference_strategy.forward_step(batch, tensor_shape) if parallel_state.is_pipeline_last_stage(): diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py index 37bb97a818ea..839434494e86 100644 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -136,7 +136,7 @@ def twod_interpolate_position_embeddings_hook( key = prefix + "weight" - assert key in state_dict + assert key in state_dict, f"{key} not in {state_dict.keys()}" if key in state_dict: input_param = state_dict[key] diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py index 44d54a0e63ff..5725b4fe070b 100644 --- a/nemo/core/optim/optimizer_with_main_params.py +++ b/nemo/core/optim/optimizer_with_main_params.py @@ -254,6 +254,7 @@ def __init__( fp32_from_float16_params_this_group = [] # For all the parameters in this group: for j, param in enumerate(param_group['params']): + main_param = None if param.requires_grad: # float16 params: if param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']: @@ -297,7 +298,7 @@ def __init__( ) # Add gradient accumulation hook for fp32 grad accumulation - if self._fp32_grad_accum and param.requires_grad: + if main_param is not None and self._fp32_grad_accum and param.requires_grad: # Expand so we get access to grad_fn param_tmp = param.expand_as(param) # Get the gradient accumulator function. From 2e1ab68d6b4cd791f32e10965921db55fa556e8c Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Thu, 24 Aug 2023 09:08:03 -0700 Subject: [PATCH 162/512] Imagen Optimizations --- .../generative/imagen/conf/base64-2b.yaml | 7 +- .../generative/imagen/conf/base64-500m.yaml | 11 +- .../generative/imagen/conf/sr1024-600m.yaml | 3 +- .../generative/imagen/conf/sr256-400m.yaml | 3 +- .../generative/imagen/conf/sr256-600m.yaml | 3 +- .../generative/imagen/imagen_training.py | 16 ++- .../multimodal/models/imagen/imagen.py | 135 +++++++++++------- .../models/imagen/imagen_pipeline.py | 15 +- .../models/multimodal_base_model.py | 9 -- .../modules/imagen/diffusionmodules/nets.py | 2 +- nemo/core/classes/modelPT.py | 4 +- 11 files changed, 128 insertions(+), 80 deletions(-) diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml index 2005ec8daee2..d0697f2cd169 100644 --- a/examples/multimodal/generative/imagen/conf/base64-2b.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-2b.yaml @@ -50,9 +50,9 @@ model: precision: ${trainer.precision} # specify micro_batch_size, global_batch_size, and model parallelism # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 16 # will use more micro batches to reach global batch size - inductor: False + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: True inductor_cudagraphs: False unet_type: base channels_last: True @@ -85,6 +85,7 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce preconditioning_type: EDM preconditioning: diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml index 2cd20ec75e62..0398b33e0f01 100644 --- a/examples/multimodal/generative/imagen/conf/base64-500m.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -50,9 +50,9 @@ model: precision: ${trainer.precision} # specify micro_batch_size, global_batch_size, and model parallelism # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 64 # limited by GPU memory - global_batch_size: 64 # will use more micro batches to reach global batch size - inductor: False + micro_batch_size: 128 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + inductor: True inductor_cudagraphs: False unet_type: base channels_last: True @@ -85,6 +85,7 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: False # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce preconditioning_type: EDM preconditioning: @@ -126,10 +127,6 @@ model: infinite_sampler: False local_root_path: /datasets verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 optim: # We need weight decay for large-scale odel diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml index 92a4c918e0a2..b77f47df0a23 100644 --- a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml @@ -51,7 +51,7 @@ model: # gradient accumulation will be done automatically based on data_parallel_size micro_batch_size: 64 # limited by GPU memory global_batch_size: 64 # will use more micro batches to reach global batch size - inductor: False + inductor: True inductor_cudagraphs: False unet_type: sr channels_last: True @@ -82,6 +82,7 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce noise_cond_aug: True preconditioning_type: EDM diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml index e8be690cf6ae..2eaa691ed304 100644 --- a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml @@ -53,7 +53,7 @@ model: # gradient accumulation will be done automatically based on data_parallel_size micro_batch_size: 16 # limited by GPU memory global_batch_size: 16 # will use more micro batches to reach global batch size - inductor: False + inductor: True inductor_cudagraphs: False channels_last: True @@ -87,6 +87,7 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce noise_cond_aug: True preconditioning_type: EDM diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml index cf615d767b12..122090b8225d 100644 --- a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml +++ b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml @@ -53,7 +53,7 @@ model: # gradient accumulation will be done automatically based on data_parallel_size micro_batch_size: 64 # limited by GPU memory global_batch_size: 64 # will use more micro batches to reach global batch size - inductor: False + inductor: True inductor_cudagraphs: False channels_last: True @@ -84,6 +84,7 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce noise_cond_aug: True preconditioning_type: EDM diff --git a/examples/multimodal/generative/imagen/imagen_training.py b/examples/multimodal/generative/imagen/imagen_training.py index 641dde746590..446acd9b008d 100644 --- a/examples/multimodal/generative/imagen/imagen_training.py +++ b/examples/multimodal/generative/imagen/imagen_training.py @@ -49,10 +49,17 @@ def main(cfg) -> None: torch.backends.cudnn.allow_tf32 = allow_tf32 plugins = [] + ddp_overlap = cfg.model.get('ddp_overlap', True) + if ddp_overlap: + logging.info('Enable DDP Overlap.') + else: + logging.info('Use Megatron default configuration for async grad allreduce') + strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce + no_ddp_communication_hook=not ddp_overlap, # we don't use DDP for async grad allreduce gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, + find_unused_parameters=True, + bucket_cap_mb=256, ) if cfg.trainer.precision in [16, 'bf16']: @@ -92,9 +99,6 @@ def main(cfg) -> None: model = MegatronImagen(cfg.model, trainer) - assert not cfg.model.get( - "inductor", False - ), 'Inductor is currently under investigation of its impact on convergence.' if cfg.model.get("inductor", False): # Temporary hack to get rid of TorchDynamo issue with DDP # TODO: remove these if https://github.com/pytorch/pytorch/issues/94574 fixed @@ -110,7 +114,7 @@ def main(cfg) -> None: # TorchInductor with CUDA graph can lead to OOM inductor_config.triton.cudagraphs = cfg.model.inductor_cudagraphs - model.model.unet = torch.compile(model.model.unet) + model.model.model.unet = torch.compile(model.model.model.unet) trainer.fit(model) diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py index 48c395d6bece..c072cff7856c 100644 --- a/nemo/collections/multimodal/models/imagen/imagen.py +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -13,6 +13,7 @@ # limitations under the License. import itertools from datetime import datetime +from functools import partial from typing import Any import numpy as np @@ -202,6 +203,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.online_encoding = cfg.conditioning.get("online_encoding", False) self.text_encoder_path = cfg.conditioning.get("encoder_path", None) + self.enable_autocast = (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) def model_provider_func(self, pre_process=True, post_process=True): """Model depends on pipeline paralellism.""" @@ -318,35 +320,31 @@ def setup_test_data(self, cfg): self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, ) - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - tensor_shape = None # Placeholder + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() + # handle asynchronous grad reduction + no_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism fwd_bwd_function = get_forward_backward_func() + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready losses_reduced_per_micro_batch = fwd_bwd_function( forward_step_func=self.get_forward_output_and_loss_func(), data_iterator=dataloader_iter, - model=[self.model], + model=self.model, num_microbatches=get_num_microbatches(), - forward_only=False, + forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, ) # losses_reduced_per_micro_batch is a list of dictionaries @@ -354,14 +352,39 @@ def training_step(self, dataloader_iter, batch_idx): # only the last stages of the pipeline return losses loss_dict = {} if losses_reduced_per_micro_batch: - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[key] = loss_tensor.mean() - loss_mean = loss_dict["train/loss"] + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") else: - loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) torch.distributed.broadcast(loss_mean, get_last_rank()) @@ -370,15 +393,17 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_sequence_parallel_gradients() if self.with_distributed_adam: - # gradients are reduced internally in distributed optimizer - pass + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() elif self.megatron_amp_O2: # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): # # main grads are stored in the MainParamsOptimizer wrapper # self._optimizer.allreduce_main_grads() self._optimizer.allreduce_main_grads() - else: + elif not self.cfg.get('ddp_overlap', True): # async grad allreduce is not currently implemented for O1/autocasting mixed precision training # so we all-reduce gradients after the pipeline self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) @@ -428,31 +453,16 @@ def _append_sequence_parallel_module_grads(self, module, grads): grads.append(grad.data) def validation_step(self, dataloader_iter, batch_idx): - tensor_shape = None # Placeholder - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=True, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - # only the last stages of the pipeline return losses - val_loss_dict = {} - if losses_reduced_per_micro_batch: - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - val_loss_dict[key] = loss_tensor.mean() + loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) + return loss def setup(self, stage=None): """ PTL hook that is executed after DDP spawns. @@ -537,6 +547,35 @@ def on_save_checkpoint(self, checkpoint) -> None: for k in frozen_weights_keys: del checkpoint['state_dict'][k] + def on_load_checkpoint(self, checkpoint) -> None: + # make sure inductor naming is consistent with checkpoint's + inductor_enabled = self.cfg.get('inductor', False) + state_dict = checkpoint['state_dict'] + inductor_checkpoint = False + for k, v, in state_dict.items(): + if '_orig_mod' in k: + inductor_checkpoint = True + break + + if inductor_enabled and not inductor_checkpoint: + # ckpt needs to be converted to inductor-format weights (add .orig_mod) + logging.info('Add .orig_mod to all weight keys.') + new_state_dict = {} + for k, v in state_dict.items(): + idx = k.find('._orig_mod') + new_key = k[:idx] + k[idx + len('._orig_mod') :] + new_state_dict[new_key] = v + checkpoint['state_dict'] = new_state_dict + elif not inductor_enabled and inductor_checkpoint: + # ckpt needs to be converted to non-inductor-format weights (remove .orig_mod) + logging.info('Remove .orig_mod to all weight keys.') + new_state_dict = {} + for k, v in state_dict.items(): + new_key = k.replace("._orig_mod", "") + new_state_dict[new_key] = v + checkpoint['state_dict'] = new_state_dict + super().on_load_checkpoint(checkpoint) + def on_fit_start(self) -> None: if self.online_encoding: # if encoding text online, set up text_encoder here (after loading checkpoints) instead of in __init__. diff --git a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py index 3e3f5206cb96..15916907dbfc 100644 --- a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py +++ b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py @@ -82,7 +82,20 @@ def _load_model(model_ckpt: str, model_cfg: str, eval_mode: bool = True, trainer model_cfg.model.global_batch_size = 1 model = MegatronImagen(cfg=model_cfg.model, trainer=trainer) checkpoint = torch.load(model_ckpt, map_location=lambda storage, loc: storage) - model.load_state_dict(checkpoint['state_dict'], strict=True) + + # Change weight keys if training using TorchInductor + state_dict = checkpoint['state_dict'] + del_keys = [] + for k, v in state_dict.items(): + if '._orig_mod' in k: + del_keys.append(k) + if len(del_keys) != 0: + print('ckpt was saved with TorchInductor. Renaming weights..') + for k in del_keys: + new_k = k.replace("._orig_mod", "") + state_dict[new_k] = state_dict[k] + del state_dict[k] + model.load_state_dict(state_dict, strict=True) else: raise Exception('Invalid ckpt type. Should be either .nemo or .ckpt with cfg') diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py index 646605960223..a9def70957c2 100644 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ b/nemo/collections/multimodal/models/multimodal_base_model.py @@ -552,19 +552,10 @@ def setup_optimization( optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() if self.with_distributed_adam: - # Allocate contiguous buffers to avoid extra copies - optim_kwargs['contiguous_grad_buffer'] = True - optim_kwargs['contiguous_param_buffer'] = True - # Make sure optimizer state is in FP32 optim_dtype = torch.float32 optim_kwargs['dtype'] = optim_dtype - # Make sure embedding grad reductions are in FP32 - for name, param in self.named_parameters(): - if 'word_embedding' in name or 'position_embedding' in name: - param._with_fp32_optimizer = True - # Match param allgather with model dtype model_dtype = torch.float32 if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py index cf4a2e213b55..0ce1a46a5884 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py @@ -602,7 +602,7 @@ def forward( for level in range(self.n_levels - 1, -1, -1): level_key = str(level) if level < self.n_levels - 1: - x += feats[level_key] + x = x + feats[level_key] x = self.UBlocks[level_key](x, t, context_emb, context_mask) return self.out(x) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 01cf1611f7a4..b10b794ca53e 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -772,7 +772,7 @@ def setup_optimizer_param_groups(self): raise ValueError(f"{group} not found in model.") elif hasattr(module, "parameters"): known_groups.append(group) - new_group = {"params": module.parameters()} + new_group = {"params": list(module.parameters())} for k, v in group_cfg.items(): new_group[k] = v param_groups.append(new_group) @@ -791,7 +791,7 @@ def setup_optimizer_param_groups(self): if len(other_params): param_groups = [{"params": other_params}] + param_groups else: - param_groups = [{"params": self.parameters()}] + param_groups = [{"params": list(self.parameters())}] self._optimizer_param_groups = param_groups From 74a07301fca40cd28971bd843d553165175859d8 Mon Sep 17 00:00:00 2001 From: Shanmugam Ramasamy Date: Thu, 24 Aug 2023 16:04:24 -0700 Subject: [PATCH 163/512] Neva inference code --- .../mllm/neva/conf/neva_inference.yaml | 10 +-- .../multimodal/mllm/neva/neva_evaluation.py | 72 ++++++----------- .../multimodal/models/neva/neva_model.py | 28 +++---- .../common/text_generation_strategy.py | 80 ++++++++++++++----- .../modules/common/text_generation_utils.py | 63 +++------------ 5 files changed, 111 insertions(+), 142 deletions(-) diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml index bb874ba1d44c..13f1c078eb25 100644 --- a/examples/multimodal/mllm/neva/conf/neva_inference.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_inference.yaml @@ -4,12 +4,12 @@ inference: top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. temperature: 0.2 # sampling temperature add_BOS: False # add the bos token at the begining of the prompt - tokens_to_generate: 40 # The minimum length of the sequence to be generated. + tokens_to_generate: 10 # The minimum length of the sequence to be generated. all_probs: False # whether return the log prob for all the tokens in vocab repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - + images_base_path: /pwd/images trainer: devices: 8 @@ -22,7 +22,7 @@ cluster_type: BCP tensor_model_parallel_size: 8 pipeline_model_parallel_size: 1 pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model (0 for others) -neva_model_file: /pwd/nemo_multimodal/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo +neva_model_file: /pwd/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo checkpoint_dir: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/checkpoints # checkpoint file dir. This is used to load the PTL checkpoint generated during the Kosmos training checkpoint_name: null #megatron_clip--val_loss=0.41-step=13499-consumed_samples=431904.0.ckpt # PTL checkpoint file name, only used for PTL checkpoint loading hparams_file: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/version_0/hparams.yaml # model configuration file, only used for PTL checkpoint loading @@ -35,8 +35,8 @@ helpfulness: 6 not_appropriate: 0 # MORE THAN ONE INFERENCE IS NOT RUNNING PROPERLY NEED TO CHECK WHY SECOND IS OUTPUTING JUNK N -prompts: - - " What is unusual about this Image?" +prompt_file: /pwd/nemo_multimodal/sample.jsonl +output_file: /pwd/nemo_multimodal/results.jsonl server: False # whether launch the API server port: 5555 # the port number for the inference server diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py index 2f6de74379ef..b1eba489566c 100644 --- a/examples/multimodal/mllm/neva/neva_evaluation.py +++ b/examples/multimodal/mllm/neva/neva_evaluation.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio +import json import os import re import threading @@ -24,7 +25,7 @@ from torch.utils.data import DataLoader, Dataset import nemo.collections.multimodal.data.neva.conversation as conversation_lib -from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaModel +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel from nemo.collections.nlp.modules.common.megatron_web_server import get_demo from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer @@ -259,38 +260,36 @@ def main(cfg) -> None: "compute_logprob": cfg.inference.compute_logprob, } - original_input_prompts = OmegaConf.to_container(cfg.prompts) - modified_input_prompts = [] - conv = conversation_lib.conv_nvgpt.copy() - modified_input_prompts = [] - pattern = r"()" - for prompt in original_input_prompts: - conv.messages = [("User", prompt), ("Assistant", "")] - base_prompt = conv.get_prompt() - img_path = re.findall(pattern, base_prompt)[0] - edited_img_path = "" + img_path + "\n" - split_prompt = base_prompt.split(img_path) - final_prompt = split_prompt[0] + edited_img_path + split_prompt[1] - """ - if not final_prompt.endswith("\n"): - final_prompt = final_prompt + "\n" - labels=f"quality:{cfg.quality},toxicity:{cfg.toxicity},humor:{cfg.humor},creativity:{cfg.creativity},violence:{cfg.violence},helpfulness:{cfg.helpfulness},not_appropriate:{cfg.not_appropriate}" - final_prompt = final_prompt + "" + labels + "\n" - """ - modified_input_prompts.append(final_prompt) - - response = model.generate( - input_prompts=modified_input_prompts, length_params=length_params, sampling_params=sampling_params + with open(cfg.prompt_file, 'r') as f: + lines = f.readlines() + + final_prompts = [] + for line in lines: + prompt_dict = json.loads(line) + final_prompts.append(prompt_dict) + + responses = model.generate( + input_prompts=final_prompts, length_params=length_params, sampling_params=sampling_params, inference_config=cfg ) + if torch.cuda.current_device() == 0: - print(modified_input_prompts) print("***************************") - print(response) + print(responses) print("***************************") + + results = [] + for response, prompt in zip(responses, final_prompts): + prompt['response'] = response + results.append(prompt) + + with open(cfg.output_file, 'w') as f: + for result in results: + f.write(json.dumps(result) + '\n') + """ # Second method of running text generation, call trainer.predict - ds = RequestDataSet(modified_input_prompts) - request_dl = DataLoader(dataset=ds, batch_size=2) + ds = RequestDataSet(final_prompts) + request_dl = DataLoader(dataset=ds, batch_size=1) config = OmegaConf.to_container(cfg.inference) model.set_inference_config(config) response = trainer.predict(model, request_dl) @@ -299,25 +298,6 @@ def main(cfg) -> None: print(response) print("***************************") """ - # Third method of running text generation, use inference server - if cfg.server: - if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: - if cfg.web_server: - loop = asyncio.new_event_loop() - thread = threading.Thread( - target=get_demo, - daemon=True, - args=(cfg.share, cfg.username, cfg.password, cfg.port, cfg.web_port, loop), - ) - thread.start() - server = MegatronServer(model.cuda()) - server.run("0.0.0.0", port=cfg.port) - - while True: - choice = torch.cuda.LongTensor(1) - torch.distributed.broadcast(choice, 0) - if choice[0].item() == 0: - generate(model.cuda()) if __name__ == '__main__': diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index 939bc9bf6602..850f06dd6940 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -32,9 +32,7 @@ from pytorch_lightning.trainer.trainer import Trainer from transformers import CLIPVisionModel -from nemo.collections.multimodal.data.kosmos.kosmos_dataset import MAX_NUM_IMAGES from nemo.collections.multimodal.data.neva.neva_dataset import ( - CLIP_MODEL, DEFAULT_IM_END_TOKEN, DEFAULT_IM_START_TOKEN, DataCollatorForSupervisedDataset, @@ -206,7 +204,6 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): media_features = self.encode_vision_x(media) # b T F S(eq) H(idden) num_images_per_sample = media_features.size(1) num_patches = media_features.size(3) - # flatten patches media_features = media_features.view(batch_size, -1, hidden_size) @@ -278,7 +275,6 @@ def __init__( self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) if mm_cfg.vision_encoder.freeze: vision_encoder.freeze() - # Monkey patch embedding if kwargs.get("pre_process", True): extend_instance(self.language_model.embedding.word_embeddings, NevaEmbeddingMixin) @@ -843,37 +839,31 @@ def load_state_dict(self, state_dict, strict=False): def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: inference_config = self.get_inference_config() - image_list = [] - for prompt in batch: - image_paths_in_prompts = re.findall(r'', prompt) - image_list.append(image_paths_in_prompts) - if inference_config is None: return None else: # need to overwrite some configuration, make it immutable + image = os.path.join(inference_config['images_base_path'], batch['image'][0]) + prompt = batch['prompt'][0] inference_config = inference_config.copy() compute_logprob = inference_config['compute_logprob'] if compute_logprob: - inference_config['inputs'] = batch + inference_config['inputs'] = prompt inference_config['tokens_to_generate'] = 1 inference_config['all_probs'] = True inference_config["add_BOS"] = False inference_config['greedy'] = True - inference_config['image_list'] = image_list + inference_config['image_list'] = image response = generate(self, **inference_config) - compute_prob_response = get_computeprob_response(self.tokenizer, response, batch) + compute_prob_response = get_computeprob_response(self.tokenizer, response, prompt) return compute_prob_response else: - inference_config['inputs'] = batch - inference_config['image_list'] = image_list + inference_config['inputs'] = prompt + inference_config['image_list'] = image return generate(self, **inference_config) def generate( - self, - input_prompts: Union[List[str], torch.Tensor, List[dict]], - length_params: LengthParam, - sampling_params: SamplingParam = None, + self, input_prompts, inference_config, length_params: LengthParam, sampling_params: SamplingParam = None, ) -> OutputType: # check whether the DDP is initialized @@ -902,7 +892,7 @@ def dummy(): start = time.time() # Supports only one prompt at a time - result = megatron_neva_generate(self.cuda(), input_prompts, self.tokenizer, length_params, sampling_params) + result = megatron_neva_generate(self.cuda(), input_prompts, length_params, sampling_params, inference_config) end = time.time() print(f'Time taken {end - start}') diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index e2a122d95cbb..3f76051bdebd 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -261,11 +261,19 @@ def __init__(self, model): self.num_media_latents = 576 # TODO: Need to obtain this from the config ideally self.tokenizer = self.model.tokenizer self.image_paths = [] + self.cfg = model.cfg self.data_cfg = model.cfg.data from transformers import CLIPImageProcessor - CLIP_MODEL = os.environ.get("CLIP_MODEL", "openai/clip-vit-large-patch14") - self.processor = CLIPImageProcessor.from_pretrained(CLIP_MODEL, torch_dtype=torch.bfloat16) + if self.cfg.mm_cfg.vision_encoder.from_hf: + self.processor = CLIPImageProcessor.from_pretrained( + self.cfg.mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16 + ) + else: + self.processor = CLIPImageProcessor.from_pretrained( + "openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16 + ) + self.model = model def clip_max_len(self, maxlen: int) -> int: @@ -289,26 +297,56 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_ compute_attention_mask=compute_attention_mask, ) - def tokenize_batch(self, sentences, max_len, add_BOS): - pattern = r"()" - context_tokens = [] - # TODO : Should get this from config - image_tokens = [self.tokenizer.token_to_id("")] * self.num_media_latents - for sentence in sentences: - img_path = re.findall(pattern, sentence)[0] - split_sentence = sentence.split(img_path) - if add_BOS: - output_tokens = [self.tokenizer.bos_id] - else: - output_tokens = [] - - output_tokens = ( - self.tokenizer.text_to_ids(split_sentence[0]) - + image_tokens - + self.tokenizer.text_to_ids(split_sentence[1]) - ) - context_tokens.append(output_tokens) + def process_prompts(self, prompt): + from nemo.collections.multimodal.data.neva.neva_dataset import DEFAULT_IMAGE_TOKEN, preprocess + + list_data_dict = [] + + record = { + 'system': 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n', + 'conversations': [ + {'from': 'User', 'value': prompt,}, + { + 'from': 'Assistant', + 'value': '', + 'label': 'quality:8,toxicity:0,humor:0,creativity:0,violence:0,helpfulness:8,not_appropriate:0', + }, + ], + } + + for turn in record['conversations']: # + if turn.get('value') is not None: + turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) + list_data_dict.append(record) + + add_extra_token = 1 + if getattr(self.model.cfg, 'no_seqlen_plus_one_input_tokens', False): + add_extra_token = 0 + data_cfg = self.model.cfg.data + model_cfg = self.model.cfg + + multimodal_cfg = dict( + is_multimodal=data_cfg.is_multimodal, + sep_image_conv_front=data_cfg.sep_image_conv_front, + image_token_len=data_cfg.image_token_len, + image_folder=data_cfg.image_folder, + image_aspect_ratio=data_cfg.image_aspect_ratio, + use_im_start_end=getattr(model_cfg.mm_cfg, 'use_im_start_end', False), + image_processor=self.processor, + add_extra_token=add_extra_token, + context_length=model_cfg.encoder_seq_length, + ) + + import copy + + from nemo.collections.multimodal.data.neva.neva_dataset import preprocess_multimodal + + sources = preprocess_multimodal(copy.deepcopy(list_data_dict), multimodal_cfg, 576) # HARDCODED FOR NOW + data_dict = preprocess(sources, self.tokenizer, multimodal_cfg) + return data_dict['tokens'].tolist() + def tokenize_batch(self, prompt, max_len, add_BOS): + context_tokens = self.process_prompts(prompt) context_tokens, context_lengths = pad_batch(context_tokens, self.tokenizer.eos_id, max_len) context_tokens_tensor = torch.cuda.LongTensor(context_tokens) context_length_tensor = torch.cuda.LongTensor(context_lengths) diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 617d736fef73..6a44d0861c89 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -14,6 +14,7 @@ """Utilities for generating text.""" +import os import pickle from collections.abc import Iterable from functools import partial @@ -138,28 +139,14 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para raise NotImplementedError("unknown type is not implemented") -def megatron_neva_generate(model, input_prompts, tokenizer, length_params, sampling_params, **strategy_args): - # reproduce the old compute_prob method - # a very special case - import re - - image_list = [] - for prompt in input_prompts: - image_paths_in_prompts = re.findall(r'', prompt) - image_list.append(image_paths_in_prompts) - - if sampling_params['compute_logprob']: - # need to overwrite some configuration, make it immutable - sampling_params = sampling_params.copy() - length_params = length_params.copy() - length_params['max_length'] = 1 - sampling_params['all_probs'] = True - sampling_params["add_BOS"] = False - sampling_params['greedy'] = True +def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_params, inference_config, **strategy_args): + final_response = [] + for prompt_dict in prompt_dict_list: + img = os.path.join(inference_config.inference.images_base_path, prompt_dict['image']) response = generate( model, - inputs=input_prompts, + inputs=prompt_dict['prompt'], tokens_to_generate=length_params['max_length'], all_probs=sampling_params['all_probs'], temperature=sampling_params['temperature'], @@ -169,37 +156,11 @@ def megatron_neva_generate(model, input_prompts, tokenizer, length_params, sampl greedy=sampling_params['use_greedy'], repetition_penalty=sampling_params['repetition_penalty'], min_tokens_to_generate=length_params['min_length'], - image_list=image_list ** strategy_args, + image_list=img, + **strategy_args, ) - - # TODO: Check if this is the correct way to do it ? Or should we remove the Image and the prompts we add before it - compute_prob_response = get_computeprob_response(tokenizer, response, input_prompts) - return compute_prob_response - - if isinstance(input_prompts, (list, tuple)): - if isinstance(input_prompts[0], (str, torch.Tensor)): - output = generate( - model, - inputs=input_prompts, - tokens_to_generate=length_params['max_length'], - all_probs=sampling_params['all_probs'], - temperature=sampling_params['temperature'], - add_BOS=sampling_params['add_BOS'], - top_k=sampling_params['top_k'], - top_p=sampling_params['top_p'], - greedy=sampling_params['use_greedy'], - repetition_penalty=sampling_params['repetition_penalty'], - min_tokens_to_generate=length_params['min_length'], - image_list=image_list, - **strategy_args, - ) - return output - elif isinstance(input_prompts[0], dict): - raise NotImplementedError("json object not implemented") - else: - raise NotImplementedError("unknown type is not implemented") - else: - raise NotImplementedError("unknown type is not implemented") + final_response.append(response) + return final_response def get_computeprob_response(tokenizer, response, inputs): @@ -762,8 +723,8 @@ def sample_sequence_batch( lengths = torch.ones([batch_size]).long().cuda() * maxlen if image_list is not None: - # Note this will just be one image that comes here - media_tensor = inference_strategy.get_media_tensor(image_list[0][0]) + # + media_tensor = inference_strategy.get_media_tensor(image_list) while context_length < maxlen: if media_tensor is not None: From 071b8202ef3a8705e401a19cff1f0de3afee0b07 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Fri, 25 Aug 2023 14:48:17 -0700 Subject: [PATCH 164/512] NeMo TOT 1.21 to Internal/main --- .github/labeler.yml | 8 + Dockerfile | 10 +- Jenkinsfile | 507 +++--- PUBLICATIONS.md | 49 +- README.rst | 20 +- docs/source/asr/api.rst | 5 + docs/source/asr/configs.rst | 2 +- .../asr/data/benchmark_code_switching.csv | 3 + docs/source/asr/data/benchmark_fr.csv | 3 +- .../asr/data/benchmark_multilingual.csv | 5 + docs/source/asr/data/benchmark_ru.csv | 1 + docs/source/asr/intro.rst | 2 +- docs/source/asr/models.rst | 36 +- docs/source/asr/results.rst | 20 + .../nlp/nemo_megatron/retro/retro_model.rst | 446 ++++- .../wfst/wfst_text_normalization.rst | 18 +- docs/source/starthere/tutorials.rst | 15 +- docs/source/tools/intro.rst | 1 + docs/source/tools/nemo_forced_aligner.rst | 164 ++ docs/source/tools/speech_data_processor.rst | 162 +- docs/source/tts/api.rst | 23 +- .../helpers/convert_nemo_asr_hybrid_to_ctc.py | 184 +++ .../asr/conf/asr_adapters/asr_adaptation.yaml | 1 - .../asr/conf/conformer/conformer_ctc_bpe.yaml | 1 - .../conformer_hybrid_transducer_ctc_bpe.yaml | 1 - .../fast-conformer_transformer.yaml | 218 +++ .../squeezeformer/squeezeformer_ctc_bpe.yaml | 1 - examples/asr/conf/ssl/wav2vec/wav2vec_ci.yaml | 1 - .../conf/vad/frame_vad_infer_postprocess.yaml | 3 +- .../speech_classification/frame_vad_infer.py | 24 +- .../speech_to_text_transformer.py | 70 + .../speech_translation/translate_speech.py | 210 +++ examples/asr/transcribe_speech.py | 1 - .../audio_to_audio_eval.py | 0 .../conf/beamforming.yaml} | 8 +- examples/audio_tasks/conf/masking.yaml | 126 ++ .../process_audio.py | 0 .../speech_enhancement.py | 4 +- .../clip/conf/megatron_clip_config.yaml | 3 +- .../clip/megatron_clip_imagenet_zeroshot.py | 8 +- .../foundation/clip/megatron_clip_infer.py | 8 +- .../foundation/clip/megatron_clip_pretrain.py | 48 +- .../controlnet/conf/controlnet_v1-5.yaml | 4 +- .../generative/controlnet/controlnet_train.py | 43 +- .../dreambooth/conf/dreambooth.yaml | 4 +- .../generative/dreambooth/dreambooth.py | 51 +- .../generative/imagen/conf/base64-2b.yaml | 1 + .../imagen/conf/base64-500m-edm.yaml | 1 + .../generative/imagen/conf/base64-500m.yaml | 3 +- .../conf/base64-500m_online_encoding.yaml | 1 + .../generative/imagen/conf/sr1024-600m.yaml | 1 + .../imagen/conf/sr256-400m-edm.yaml | 1 + .../generative/imagen/conf/sr256-400m.yaml | 1 + .../imagen/conf/sr256-450m-edm.yaml | 1 + .../imagen/conf/sr256-600m-edm-noise.yaml | 1 + .../imagen/conf/sr256-600m-edm.yaml | 1 + .../generative/imagen/conf/sr256-600m.yaml | 1 + .../generative/imagen/imagen_training.py | 58 +- .../instruct_pix2pix/conf/sd_finetune.yaml | 3 +- .../instruct_pix2pix/sd_edit_cli.py | 8 +- .../instruct_pix2pix/sd_finetune.py | 51 +- .../stable_diffusion/conf/sd2_train.yaml | 3 +- .../stable_diffusion/conf/sd_train.yaml | 7 +- .../generative/stable_diffusion/sd_train.py | 53 +- .../mllm/kosmos/conf/kosmos_config.yaml | 3 +- .../multimodal/mllm/kosmos/kosmos_pretrain.py | 40 +- .../mllm/neva/conf/neva_config.yaml | 10 +- .../mllm/neva/conf/neva_finetune.yaml | 9 +- .../multimodal/mllm/neva/conf/neva_peft.yaml | 9 +- .../multimodal/mllm/neva/neva_finetune.py | 50 +- examples/multimodal/mllm/neva/neva_peft.py | 54 +- .../multimodal/mllm/neva/neva_pretrain.py | 49 +- .../nlp/dialogue/conf/dialogue_config.yaml | 1 - .../conf/duplex_tn_config.yaml | 1 - .../self_alignment_pretraining.py | 4 + .../intent_slot_classification_config.yaml | 1 - ...bel_intent_slot_classification_config.yaml | 1 - ..._pretraining_from_preprocessed_config.yaml | 2 +- .../conf/megatron_bart_config.yaml | 2 +- .../conf/megatron_bert_config.yaml | 2 +- .../conf/megatron_gpt_config.yaml | 11 +- .../conf/megatron_gpt_inference.yaml | 51 +- .../megatron_gpt_prompt_learning_config.yaml | 3 +- .../conf/megatron_gpt_validate_config.yaml | 2 +- .../conf/megatron_hiddens_base_config.yaml | 43 + .../conf/megatron_model_base_config.yaml | 2 +- .../conf/megatron_retro_config.yaml | 2 +- .../conf/megatron_retro_finetune_config.yaml | 2 +- .../conf/megatron_retro_mutransfer.yaml | 2 +- .../conf/megatron_t0_config.yaml | 2 +- .../conf/megatron_t5_config.yaml | 2 +- .../megatron_t5_config_finetune_eval.yaml | 2 +- ...megatron_t5_config_finetune_glue_eval.yaml | 2 +- ...megatron_t5_config_finetune_glue_mnli.yaml | 2 +- ...megatron_t5_config_finetune_glue_xnli.yaml | 2 +- .../conf/megatron_t5_finetune.yaml | 4 +- .../megatron_t5_lm_adaptation_finetune.yaml | 2 +- .../conf/megatron_t5_prompt_learning.yaml | 3 +- .../conf/megatron_ul2_config.yaml | 2 +- ...b_cfg_a100_h12288_tp4_mbs1_seqlen2048.yaml | 53 + ...b_cfg_a100_h12288_tp4_mbs2_seqlen2048.yaml | 53 + ...b_cfg_h100_h12288_tp4_mbs1_seqlen2048.yaml | 59 + ...b_cfg_h100_h12288_tp8_mbs2_seqlen2048.yaml | 59 + .../megatron_bart_pretraining.py | 21 +- .../megatron_bert_pretraining.py | 45 +- .../megatron_change_num_partitions.py | 5 - .../megatron_gpt_continue_training.py | 193 +++ .../language_modeling/megatron_gpt_eval.py | 8 +- .../megatron_gpt_pretraining.py | 48 +- .../megatron_gpt_prompt_learning.py | 18 +- .../megatron_gpt_prompt_learning_eval.py | 7 + .../language_modeling/megatron_gpt_test.py | 4 +- .../megatron_lm_ckpt_to_nemo.py | 2 +- .../megatron_retro_cal_shape.py | 13 +- .../megatron_retro_fine_tune.py | 22 +- .../megatron_retro_mutransfer_pretrain.py | 21 +- .../megatron_retro_pretraining.py | 21 +- .../megatron_t5_lm_adaptation_finetune.py | 22 +- .../megatron_t5_pretraining.py | 46 +- .../megatron_t5_prompt_learning.py | 5 +- .../megatron_t5_seq2seq_eval.py | 14 +- .../megatron_t5_seq2seq_finetune.py | 37 +- .../megatron_gpt_adapter_tuning_config.yaml | 3 +- .../conf/megatron_gpt_ia3_tuning_config.yaml | 2 +- .../conf/megatron_gpt_peft_eval_config.yaml | 5 +- .../conf/megatron_gpt_peft_tuning_config.yaml | 23 +- .../tuning/conf/megatron_gpt_sft.yaml | 42 +- .../megatron_t5_adapter_tuning_config.yaml | 3 +- .../conf/megatron_t5_ia3_tuning_config.yaml | 3 +- .../conf/megatron_t5_lora_tuning_config.yaml | 2 +- .../tuning/megatron_gpt_adapter_eval.py | 7 + .../tuning/megatron_gpt_adapter_tuning.py | 19 +- .../tuning/megatron_gpt_ia3_eval.py | 8 + .../tuning/megatron_gpt_ia3_tuning.py | 18 +- .../tuning/megatron_gpt_peft_eval.py | 53 +- .../tuning/megatron_gpt_peft_tuning.py | 39 +- .../tuning/megatron_gpt_sft.py | 42 +- .../tuning/megatron_t5_adapter_tuning.py | 12 +- .../tuning/megatron_t5_ia3_tuning.py | 13 +- .../tuning/megatron_t5_lora_tuning.py | 12 +- .../conf/aayn_base_megatron.yaml | 2 +- .../nlp/machine_translation/enc_dec_nmt.py | 3 +- .../megatron_nmt_training.py | 23 +- .../nlp/question_answering/conf/qa_conf.yaml | 1 - .../conf/text_classification_config.yaml | 1 - .../punctuation_capitalization_config.yaml | 3 - .../conf/token_classification_config.yaml | 1 - examples/tts/audio_codec.py | 34 + examples/tts/conf/audio_codec/encodec.yaml | 164 ++ .../tts/conf/fastpitch/fastpitch_22050.yaml | 122 +- .../tts/conf/fastpitch/fastpitch_44100.yaml | 286 ++++ .../tts/conf/fastpitch_align_ipa_adapter.yaml | 324 ++++ examples/tts/conf/feature/feature_22050.yaml | 20 +- examples/tts/conf/feature/feature_44100.yaml | 18 +- examples/tts/conf/hifigan/hifigan.yaml | 2 +- examples/tts/conf/hifigan/hifigan_44100.yaml | 2 +- .../conf/hifigan_dataset/hifigan_22050.yaml | 151 ++ .../conf/hifigan_dataset/hifigan_44100.yaml | 151 ++ examples/tts/conf/univnet/univnet.yaml | 2 +- .../tts/conf/zh/fastpitch_align_22050.yaml | 17 +- ...ronym_classification_train_and_evaluate.py | 4 + examples/tts/vits.py | 2 +- .../megatron_vit_classification_config.yaml | 2 +- .../megatron_vit_classification_evaluate.py | 8 +- .../megatron_vit_classification_finetune.py | 51 +- .../megatron_vit_classification_infer.py | 8 +- .../megatron_vit_classification_pretrain.py | 50 +- nemo/collections/asr/data/audio_to_audio.py | 2 +- nemo/collections/asr/data/data_simulation.py | 916 +++++++---- .../asr/metrics/multi_binary_acc.py | 12 +- nemo/collections/asr/metrics/rnnt_wer.py | 80 +- nemo/collections/asr/metrics/rnnt_wer_bpe.py | 31 +- nemo/collections/asr/metrics/wer.py | 156 +- nemo/collections/asr/metrics/wer_bpe.py | 30 +- nemo/collections/asr/models/__init__.py | 1 + .../asr/models/classification_models.py | 57 +- .../asr/models/confidence_ensemble.py | 22 +- nemo/collections/asr/models/ctc_models.py | 16 +- .../asr/models/enhancement_models.py | 15 +- .../asr/models/hybrid_asr_tts_models.py | 19 +- .../asr/models/hybrid_rnnt_ctc_bpe_models.py | 44 +- .../asr/models/hybrid_rnnt_ctc_models.py | 18 +- .../asr/models/transformer_bpe_models.py | 614 +++++++ nemo/collections/asr/modules/audio_modules.py | 104 +- .../asr/modules/conformer_encoder.py | 2 +- .../transformer/transformer_generators.py | 12 +- .../asr/parts/preprocessing/perturb.py | 2 +- .../parts/submodules/ctc_greedy_decoding.py | 55 +- .../parts/submodules/multi_head_attention.py | 66 +- .../parts/submodules/rnnt_greedy_decoding.py | 274 ++-- .../asr/parts/submodules/subsampling.py | 2 +- .../asr_confidence_benchmarking_utils.py | 183 +++ .../asr/parts/utils/asr_confidence_utils.py | 251 ++- .../asr/parts/utils/audio_utils.py | 8 +- .../asr/parts/utils/confidence_metrics.py | 202 ++- .../asr/parts/utils/transcribe_utils.py | 7 +- nemo/collections/common/callbacks/ema.py | 3 +- .../common/metrics/classification_accuracy.py | 69 +- .../metrics/metric_string_to_torchmetric.py | 3 +- .../collections/common/parts/ptl_overrides.py | 4 +- .../tokenizers/sentencepiece_tokenizer.py | 2 +- .../text_to_speech/tts_tokenizers.py | 5 +- .../multimodal/data/neva/conversation.py | 233 +-- .../multimodal/data/neva/neva_dataset.py | 263 ++- .../models/clip/megatron_clip_models.py | 101 +- .../models/controlnet/controlnet.py | 111 +- .../multimodal/models/controlnet/util.py | 2 +- .../models/dreambooth/dreambooth.py | 237 ++- .../multimodal/models/imagen/__init__.py | 13 + .../multimodal/models/imagen/imagen.py | 46 +- .../models/instruct_pix2pix/ldm/ddpm_edit.py | 2 +- .../models/kosmos/megatron_kosmos_model.py | 3 +- .../models/multimodal_base_model.py | 3 +- .../multimodal/models/neva/__init__.py | 13 + .../multimodal/models/neva/neva_model.py | 82 +- .../stable_diffusion/ldm/autoencoder.py | 3 +- .../models/stable_diffusion/ldm/ddpm.py | 233 ++- .../diffusionmodules/openaimodel.py | 3 +- .../stable_diffusion/diffusionmodules/util.py | 3 + .../stable_diffusion/encoders/modules.py | 12 + nemo/collections/multimodal/parts/utils.py | 6 - .../megatron/data_samplers.py | 25 +- .../megatron/gpt_sft_chat_dataset.py | 37 +- .../megatron/gpt_sft_dataset.py | 94 +- .../language_modeling/text_memmap_dataset.py | 8 +- .../nlp/metrics/classification_report.py | 4 +- .../dialogue_gpt_classification_model.py | 18 +- .../dialogue/dialogue_gpt_generation_model.py | 18 +- .../dialogue_nearest_neighbour_model.py | 18 +- .../dialogue/dialogue_s2s_generation_model.py | 18 +- .../dialogue_zero_shot_intent_model.py | 17 +- .../intent_slot_classification_model.py | 22 +- .../nlp/models/dialogue/sgdqa_model.py | 10 + .../duplex_tagger.py | 6 +- .../entity_linking/entity_linking_model.py | 14 +- .../information_retrieval/base_ir_model.py | 13 +- .../intent_slot_classification_model.py | 24 +- ..._label_intent_slot_classification_model.py | 4 +- .../models/language_modeling/bert_lm_model.py | 11 +- .../language_modeling/megatron/bert_model.py | 22 +- .../language_modeling/megatron/gpt_model.py | 21 +- .../language_modeling/megatron_base_model.py | 177 +- .../megatron_base_prompt_learning_model.py | 18 +- .../language_modeling/megatron_bert_model.py | 73 +- .../megatron_finetune_model.py | 264 +-- .../language_modeling/megatron_gpt_model.py | 517 ++++-- .../megatron_gpt_peft_models.py | 191 ++- .../megatron_gpt_prompt_learning_model.py | 64 +- .../megatron_gpt_sft_model.py | 505 +++--- .../megatron_lm_encoder_decoder_model.py | 261 ++- .../megatron_retrieval_model.py | 35 +- .../language_modeling/megatron_t0_model.py | 2 +- .../megatron_t5_adapter_model.py | 46 +- .../megatron_t5_prompt_learning_model.py | 39 +- .../language_modeling/transformer_lm_model.py | 12 +- .../machine_translation/megatron_nmt_model.py | 157 +- .../machine_translation/mt_enc_dec_model.py | 23 +- nemo/collections/nlp/models/nlp_model.py | 5 +- .../question_answering/qa_bert_model.py | 31 +- .../models/question_answering/qa_gpt_model.py | 30 +- .../nlp/models/question_answering/qa_model.py | 13 +- .../models/question_answering/qa_s2s_model.py | 30 +- .../models/text2sparql/text2sparql_model.py | 4 +- .../text_classification_model.py | 40 +- .../thutmose_tagger.py | 6 +- .../punctuation_capitalization_model.py | 22 +- .../token_classification_model.py | 18 +- .../megatron/adapters/parallel_adapters.py | 230 ++- .../nlp/modules/common/megatron/attention.py | 108 +- .../common/megatron/fused_layer_norm.py | 3 +- .../modules/common/megatron/language_model.py | 97 +- .../common/megatron/megatron_decoders.py | 21 +- .../megatron/megatron_encoder_decoder.py | 65 +- .../common/megatron/megatron_encoders.py | 23 +- .../megatron/megatron_perceiver_encoders.py | 22 +- .../megatron/megatron_transformer_decoder.py | 18 +- .../megatron/megatron_transformer_encoder.py | 26 +- .../modules/common/megatron/megatron_utils.py | 19 +- .../nlp/modules/common/megatron/mlp.py | 49 +- .../nlp/modules/common/megatron/module.py | 18 +- .../rotary_position_embedding.py | 17 +- .../retrieval_token_level_encoder_decoder.py | 15 +- .../common/megatron/retrieval_transformer.py | 31 +- .../megatron/token_level_encoder_decoder.py | 63 +- .../megatron/transformations/__init__.py | 16 + .../transformations/megatron_hidden_loss.py | 189 +++ .../megatron_hidden_transform.py | 170 ++ .../transformations/megatron_hiddens.py | 310 ++++ .../modules/common/megatron/transformer.py | 95 +- .../nlp/modules/common/megatron/utils.py | 12 +- .../nlp/modules/common/megatron_web_server.py | 76 +- .../nlp/modules/common/prompt_encoder.py | 27 +- .../modules/common/text_generation_server.py | 18 +- .../common/text_generation_strategy.py | 5 +- .../modules/common/text_generation_utils.py | 7 +- .../common/transformer/text_generation.py | 2 + .../transformer/transformer_generators.py | 12 +- .../nlp/parts/megatron_trainer_builder.py | 105 ++ nemo/collections/nlp/parts/nlp_overrides.py | 78 +- nemo/collections/nlp/parts/utils_funcs.py | 57 +- nemo/collections/tts/data/vocoder_dataset.py | 11 + nemo/collections/tts/g2p/models/ctc.py | 19 +- .../g2p/models/heteronym_classification.py | 35 +- nemo/collections/tts/g2p/models/t5.py | 4 +- .../tts/losses/audio_codec_loss.py | 294 ++++ nemo/collections/tts/models/__init__.py | 2 + nemo/collections/tts/models/audio_codec.py | 407 +++++ nemo/collections/tts/models/fastpitch.py | 29 +- nemo/collections/tts/models/fastpitch_ssl.py | 2 +- nemo/collections/tts/models/hifigan.py | 2 +- nemo/collections/tts/models/radtts.py | 13 +- nemo/collections/tts/models/ssl_tts.py | 4 +- nemo/collections/tts/models/tacotron2.py | 25 +- nemo/collections/tts/models/two_stages.py | 2 +- nemo/collections/tts/models/waveglow.py | 11 +- nemo/collections/tts/modules/aligner.py | 92 +- .../tts/modules/audio_codec_modules.py | 161 ++ nemo/collections/tts/modules/common.py | 27 + .../tts/modules/encodec_modules.py | 812 ++++++++++ nemo/collections/tts/modules/submodules.py | 2 +- .../tts/modules/vector_quantization.py | 30 + nemo/collections/tts/parts/utils/callbacks.py | 87 + .../tts/parts/utils/distributed.py | 64 + nemo/collections/tts/parts/utils/helpers.py | 5 +- .../megatron_vit_classification_models.py | 93 +- .../vision/models/vision_base_model.py | 1 - .../common/megatron/vision_transformer.py | 12 +- .../vision/modules/vit/vit_backbone.py | 5 +- nemo/core/classes/modelPT.py | 51 +- nemo/core/config/hydra_runner.py | 21 + nemo/core/config/pytorch_lightning.py | 15 +- .../core/connectors/save_restore_connector.py | 3 + nemo/core/neural_types/elements.py | 1 + nemo/core/optim/optimizers.py | 2 +- nemo/package_info.py | 4 +- nemo/utils/callbacks/nemo_model_checkpoint.py | 2 +- nemo/utils/exp_manager.py | 37 +- nemo/utils/model_utils.py | 2 +- pyproject.toml | 2 +- requirements/requirements_lightning.txt | 6 +- requirements/requirements_nlp.txt | 1 + .../ngram_lm/kenlm_utils.py | 18 +- .../ngram_lm/ngram_merge.py | 83 +- .../checkpoint_averaging_model_parallel.py | 112 ++ .../confidence_ensembles/build_ensemble.py | 82 +- .../confidence_ensembles/ensemble_config.yaml | 6 +- scripts/dataset_processing/add_noise.py | 24 +- .../tts/preprocess_audio.py | 23 + .../ds_conf/ds_for_fastpitch_align.yaml | 2 +- .../convert_mpt_7b_hf_to_nemo.py | 46 +- .../sft/attribute_annotate.py | 366 +++++ .../nlp_language_modeling/sft/data_clean.py | 97 ++ .../sft/preprocessing.py | 165 ++ .../confidence/benchmark_asr_confidence.py | 173 +- .../zh/24finals/ipa_dict_nv23.05.txt | 426 +++++ .../{ => 24finals}/pinyin_dict_nv_22.10.txt | 14 + .../zh/36finals/ipa_dict_nv23.05.txt | 427 +++++ .../zh/36finals/pinyin_dict_nv23.05.txt | 442 +++++ setup.py | 2 +- .../asr/confidence/test_asr_confidence.py | 144 ++ .../confidence/test_asr_confidence_metrics.py | 115 ++ .../test_asr_confidence_primitives.py | 142 ++ .../asr/test_asr_classification_model.py | 4 +- .../asr/test_asr_data_simulation.py | 40 +- .../test_asr_hybrid_rnnt_ctc_model_char.py | 6 +- .../asr/test_asr_interctc_models.py | 5 +- tests/collections/asr/test_asr_local_attn.py | 6 +- tests/collections/asr/test_asr_metrics.py | 10 + .../asr/test_asr_rnnt_encdec_model.py | 6 +- .../asr/test_confidence_ensembles.py | 170 ++ .../collections/asr/utils/test_audio_utils.py | 11 +- tests/collections/common/test_ema.py | 13 +- .../collections/multimodal/test_clip_model.py | 2 +- .../collections/nlp/test_chat_sft_dataset.py | 317 ++++ tests/collections/nlp/test_flash_attention.py | 54 +- tests/collections/nlp/test_gpt_eval.py | 1 + tests/collections/nlp/test_gpt_model.py | 2 +- tests/collections/nlp/test_nlp_exportables.py | 3 +- .../collections/nlp/test_rampup_batch_size.py | 2 +- .../collections/nlp/test_retrieval_module.py | 20 +- .../nlp/test_retrieval_module_inference.py | 20 +- .../tts/losses/test_audio_codec_loss.py | 44 + .../tts/modules/test_audio_codec_modules.py | 96 ++ tests/collections/vision/test_vit_model.py | 2 +- tests/core/test_config_utils.py | 8 +- tests/core/test_exp_manager.py | 28 +- tests/core/test_optimizers_schedulers.py | 2 +- tests/core_ptl/check_for_ranks.py | 12 +- tests/core_ptl/test_ptl_stateless_timer.py | 13 +- .../customization_dataset_preparation.py | 14 +- tools/nemo_forced_aligner/README.md | 135 +- tools/nemo_forced_aligner/align.py | 21 +- tools/nemo_forced_aligner/utils/data_prep.py | 8 + .../utils/make_ass_files.py | 140 +- tutorials/01_NeMo_Models.ipynb | 6 +- tutorials/VoiceSwapSample.ipynb | 2 +- tutorials/asr/ASR_Confidence_Estimation.ipynb | 1432 +++++++++++++++++ tutorials/asr/ASR_TTS_Tutorial.ipynb | 2 +- tutorials/asr/Confidence_Ensembles.ipynb | 517 ++++++ tutorials/audio_tasks/README.md | 6 + .../Speech_Enhancement_with_NeMo.ipynb | 1316 +++++++++++++++ tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 2 +- ...pellMapper_English_ASR_Customization.ipynb | 2 +- tutorials/nlp/lora.ipynb | 4 +- .../tools/NeMo_Forced_Aligner_Tutorial.ipynb | 558 +++++++ .../tts/FastPitch_Adapter_Finetuning.ipynb | 36 +- .../tts/FastPitch_ChineseTTS_Training.ipynb | 2 +- .../tts/FastPitch_Data_Preparation.ipynb | 1135 +++++++++++++ .../FastPitch_MultiSpeaker_Pretraining.ipynb | 2 +- tutorials/tts/Vits_Training.ipynb | 2 +- 410 files changed, 22022 insertions(+), 4802 deletions(-) create mode 100644 docs/source/asr/data/benchmark_code_switching.csv create mode 100644 docs/source/asr/data/benchmark_multilingual.csv create mode 100644 docs/source/tools/nemo_forced_aligner.rst create mode 100644 examples/asr/asr_hybrid_transducer_ctc/helpers/convert_nemo_asr_hybrid_to_ctc.py create mode 100644 examples/asr/conf/speech_translation/fast-conformer_transformer.yaml create mode 100644 examples/asr/speech_translation/speech_to_text_transformer.py create mode 100644 examples/asr/speech_translation/translate_speech.py rename examples/{asr/audio_to_audio => audio_tasks}/audio_to_audio_eval.py (100%) rename examples/{asr/audio_to_audio/conf/multichannel_enhancement.yaml => audio_tasks/conf/beamforming.yaml} (96%) create mode 100644 examples/audio_tasks/conf/masking.yaml rename examples/{asr/audio_to_audio => audio_tasks}/process_audio.py (100%) rename examples/{asr/audio_to_audio => audio_tasks}/speech_enhancement.py (94%) create mode 100644 examples/nlp/language_modeling/conf/megatron_hiddens_base_config.yaml create mode 100644 examples/nlp/language_modeling/conf/tp_overlap/ub_cfg_a100_h12288_tp4_mbs1_seqlen2048.yaml create mode 100644 examples/nlp/language_modeling/conf/tp_overlap/ub_cfg_a100_h12288_tp4_mbs2_seqlen2048.yaml create mode 100644 examples/nlp/language_modeling/conf/tp_overlap/ub_cfg_h100_h12288_tp4_mbs1_seqlen2048.yaml create mode 100644 examples/nlp/language_modeling/conf/tp_overlap/ub_cfg_h100_h12288_tp8_mbs2_seqlen2048.yaml create mode 100644 examples/nlp/language_modeling/megatron_gpt_continue_training.py create mode 100644 examples/tts/audio_codec.py create mode 100644 examples/tts/conf/audio_codec/encodec.yaml create mode 100644 examples/tts/conf/fastpitch/fastpitch_44100.yaml create mode 100644 examples/tts/conf/fastpitch_align_ipa_adapter.yaml create mode 100644 examples/tts/conf/hifigan_dataset/hifigan_22050.yaml create mode 100644 examples/tts/conf/hifigan_dataset/hifigan_44100.yaml create mode 100644 nemo/collections/asr/models/transformer_bpe_models.py create mode 100644 nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py create mode 100644 nemo/collections/multimodal/models/imagen/__init__.py create mode 100644 nemo/collections/multimodal/models/neva/__init__.py create mode 100644 nemo/collections/nlp/modules/common/megatron/transformations/__init__.py create mode 100644 nemo/collections/nlp/modules/common/megatron/transformations/megatron_hidden_loss.py create mode 100644 nemo/collections/nlp/modules/common/megatron/transformations/megatron_hidden_transform.py create mode 100644 nemo/collections/nlp/modules/common/megatron/transformations/megatron_hiddens.py create mode 100644 nemo/collections/nlp/parts/megatron_trainer_builder.py create mode 100644 nemo/collections/tts/losses/audio_codec_loss.py create mode 100644 nemo/collections/tts/models/audio_codec.py create mode 100644 nemo/collections/tts/modules/audio_codec_modules.py create mode 100644 nemo/collections/tts/modules/encodec_modules.py create mode 100644 nemo/collections/tts/modules/vector_quantization.py create mode 100644 nemo/collections/tts/parts/utils/distributed.py create mode 100644 scripts/checkpoint_averaging/checkpoint_averaging_model_parallel.py create mode 100644 scripts/nlp_language_modeling/sft/attribute_annotate.py create mode 100644 scripts/nlp_language_modeling/sft/data_clean.py create mode 100644 scripts/nlp_language_modeling/sft/preprocessing.py create mode 100644 scripts/tts_dataset_files/zh/24finals/ipa_dict_nv23.05.txt rename scripts/tts_dataset_files/zh/{ => 24finals}/pinyin_dict_nv_22.10.txt (85%) create mode 100644 scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt create mode 100644 scripts/tts_dataset_files/zh/36finals/pinyin_dict_nv23.05.txt create mode 100644 tests/collections/asr/confidence/test_asr_confidence.py create mode 100644 tests/collections/asr/confidence/test_asr_confidence_metrics.py create mode 100644 tests/collections/asr/confidence/test_asr_confidence_primitives.py create mode 100644 tests/collections/asr/test_confidence_ensembles.py create mode 100644 tests/collections/nlp/test_chat_sft_dataset.py create mode 100644 tests/collections/tts/losses/test_audio_codec_loss.py create mode 100644 tests/collections/tts/modules/test_audio_codec_modules.py create mode 100644 tutorials/asr/ASR_Confidence_Estimation.ipynb create mode 100644 tutorials/asr/Confidence_Ensembles.ipynb create mode 100644 tutorials/audio_tasks/README.md create mode 100644 tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb create mode 100644 tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb create mode 100644 tutorials/tts/FastPitch_Data_Preparation.ipynb diff --git a/.github/labeler.yml b/.github/labeler.yml index e0e6691b14c6..77ac8cc68cb7 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -3,12 +3,14 @@ ASR: - examples/asr/**/* - tutorials/asr/**/* - docs/source/asr/**/* +- tests/collections/asr/** NLP: - nemo/collections/nlp/**/* - examples/nlp/**/* - tutorials/nlp/**/* - docs/source/nlp/**/* +- tests/collections/nlp/** Speaker Tasks: - examples/speaker_tasks/**/* @@ -16,12 +18,18 @@ Speaker Tasks: TTS: - nemo/collections/tts/**/* +- nemo/collections/common/tokenizers/text_to_speech/** - examples/tts/**/* - tutorials/tts/**/* - docs/source/tts/**/* +- scripts/dataset_processing/tts/** +- scripts/tts_dataset_files/** +- tests/collections/tts/** +- tests/collections/common/tokenizers/text_to_speech/** core: - nemo/core/**/* +- tests/core/** common: - nemo/collections/common/**/* diff --git a/Dockerfile b/Dockerfile index 2e6b617087bc..e0143367ff2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,6 +52,12 @@ RUN git clone https://github.com/NVIDIA/apex.git && \ git checkout 8b7a1ff183741dd8f9b87e7bafd04cfde99cea28 && \ pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ +# install megatron core, this can be removed once 0.3 pip package is released +RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ + cd Megatron-LM && \ + git checkout 0609f27fe8376f17ab65c001d3d8f35cd8175950 && \ + pip install -e . + # uninstall stuff from base container RUN pip3 uninstall -y sacrebleu torchtext @@ -76,6 +82,8 @@ RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-chec RUN pip install flash-attn # pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3 RUN pip install triton==2.0.0.dev20221202 +# install numba for latest containers +RUN pip install numba>=0.57.1 # install k2, skip if installation fails COPY scripts /tmp/nemo/scripts/ @@ -94,7 +102,7 @@ COPY . . # start building the final container FROM nemo-deps as nemo -ARG NEMO_VERSION=1.20.0 +ARG NEMO_VERSION=1.21.0 # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container # version information as runtime environment variable for introspection purposes diff --git a/Jenkinsfile b/Jenkinsfile index 1e66f7f6bf8e..727813f3bf32 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,11 +2,11 @@ pipeline { agent { docker { image 'nvcr.io/nvidia/pytorch:23.06-py3' - args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1' + args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1 --env HYDRA_FULL_ERROR=1' } } options { - timeout(time: 2, unit: 'HOURS') + timeout(time: 8, unit: 'HOURS') disableConcurrentBuilds(abortPrevious: true) } @@ -59,10 +59,10 @@ pipeline { stage('Megatron Core installation') { steps { - // commit points to core 23.05 ToT + // commit has api fix for TE sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ - git checkout 060415572f4365a2e895f8036c4e37dad0efbdf5 && \ + git checkout 0609f27fe8376f17ab65c001d3d8f35cd8175950 && \ pip install -e .' } } @@ -103,8 +103,8 @@ pipeline { stage('L0: Unit Tests CPU') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } steps { @@ -115,8 +115,8 @@ pipeline { stage('L2: ASR dev run') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -164,19 +164,21 @@ pipeline { } } - stage('L2: Speech Pre-training - Wav2Vec') { - steps { - sh 'python examples/asr/speech_pretraining/speech_pre_training.py \ - --config-path="../conf/ssl/wav2vec/" --config-name="wav2vec_ci" \ - model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \ - model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \ - trainer.devices=[1] \ - trainer.accelerator="gpu" \ - +trainer.fast_dev_run=True \ - exp_manager.exp_dir=examples/asr/speech_pre_training_results' - sh 'rm -rf examples/asr/speech_pre_training_results' - } - } + // TODO: Please Fix Me + // Error locating target 'nemo.collections.asr.modules.wav2vec_modules.ConvFeatureEncoder', see chained exception above. + // stage('L2: Speech Pre-training - Wav2Vec') { + // steps { + // sh 'python examples/asr/speech_pretraining/speech_pre_training.py \ + // --config-path="../conf/ssl/wav2vec/" --config-name="wav2vec_ci" \ + // model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \ + // model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \ + // trainer.devices=[1] \ + // trainer.accelerator="gpu" \ + // +trainer.fast_dev_run=True \ + // exp_manager.exp_dir=examples/asr/speech_pre_training_results' + // sh 'rm -rf examples/asr/speech_pre_training_results' + // } + // } stage('L2: Speech to Text WPE - Conformer') { steps { @@ -201,8 +203,8 @@ pipeline { stage('L2: ASR dev run - part two') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -231,8 +233,8 @@ pipeline { stage('L2: Speech to Text EMA') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } steps { @@ -252,8 +254,8 @@ pipeline { stage('L2: Speaker dev run') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -375,8 +377,8 @@ pipeline { // stage('L2: ASR DALI dev run') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -443,8 +445,8 @@ pipeline { // stage('L2: ASR RNNT dev run') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -505,8 +507,8 @@ pipeline { // stage('L2: Hybrid ASR RNNT-CTC dev run') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -535,8 +537,8 @@ pipeline { stage('L2: ASR Multi-dataloader dev run') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -583,8 +585,8 @@ pipeline { stage('L2: ASR Adapters') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -629,8 +631,8 @@ pipeline { //stage('L2: Megatron T5 Adapter PP=2') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -675,8 +677,8 @@ pipeline { //stage('L2: Megatron T5 Adapter TP=2') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -719,8 +721,8 @@ pipeline { stage('L2: Megatron T5 IA3 PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -744,18 +746,19 @@ pipeline { model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ model.global_batch_size=4" - sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py \ - --config-name=megatron_t5_ia3_inference \ - adapter_model_file='examples/ia3_tuning/test_tp1_pp2.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - trainer.devices=2 \ - data.num_workers=1 \ - tensor_model_parallel_size=1 \ - pipeline_model_parallel_size=2 \ - data.global_batch_size=2 \ - data.micro_batch_size=2 \ - data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - pred_file_path='examples/ia3_tuning/test_tp1_pp2/preds.txt'" + // TODO: @eharper temporarily comment while investigating how to fix + // sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py \ + // --config-name=megatron_t5_ia3_inference \ + // adapter_model_file='examples/ia3_tuning/test_tp1_pp2.nemo' \ + // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ + // trainer.devices=2 \ + // data.num_workers=1 \ + // tensor_model_parallel_size=1 \ + // pipeline_model_parallel_size=2 \ + // data.global_batch_size=2 \ + // data.micro_batch_size=2 \ + // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ + // pred_file_path='examples/ia3_tuning/test_tp1_pp2/preds.txt'" sh "rm -rf examples/ia3_tuning/test_tp1_pp2.nemo" sh "rm -rf examples/ia3_tuning/test_tp1_pp2" } @@ -765,8 +768,8 @@ pipeline { stage('L2: Megatron T5 IA3 TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -809,8 +812,8 @@ pipeline { stage('L2: Megatron GPT Adapter TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -853,8 +856,8 @@ pipeline { //stage('L2: Megatron GPT Adapter PP=2') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -897,8 +900,8 @@ pipeline { stage('L2: Speech Transcription') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -918,8 +921,8 @@ pipeline { stage('L2: Transducer alignment') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -935,8 +938,8 @@ pipeline { stage('L2: Segmentation Tool') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } stages { @@ -991,8 +994,8 @@ pipeline { stage('L2: G2P Models') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1073,8 +1076,8 @@ pipeline { // stage('L2: Multi-GPU Megatron finetuning') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1100,8 +1103,8 @@ pipeline { stage('L2: STS-b') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1160,8 +1163,8 @@ pipeline { stage('L2: Dialogue Classification') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1331,8 +1334,8 @@ pipeline { stage('L2: Dialogue Generation') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1397,8 +1400,8 @@ pipeline { // stage('L2: Dialogue Generation Part 2') { // when { // anyOf { -// branch 'r1.20.0' -// changeRequest target: 'r1.20.0' +// branch 'main' +// changeRequest target: 'main' // } // } // failFast true @@ -1427,8 +1430,8 @@ pipeline { stage('L2: COPY') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1457,8 +1460,8 @@ pipeline { stage('L2: Duplex Text Normalization') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1495,8 +1498,8 @@ pipeline { // stage('L2: MegaBERT Token Classification') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1521,8 +1524,8 @@ pipeline { stage('L2: BERT Text Classification') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1550,8 +1553,8 @@ pipeline { stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1609,8 +1612,8 @@ pipeline { stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1670,8 +1673,8 @@ pipeline { stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1731,8 +1734,8 @@ pipeline { stage('L2: Intent and Slot Classification Tasks') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -1771,8 +1774,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Text Classification') { // when { // anyOf{ - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1800,8 +1803,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Autoresume') { // when { // anyOf{ - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1831,8 +1834,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') { // when { // anyOf{ - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1852,8 +1855,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') { // when { // anyOf{ - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -1875,8 +1878,8 @@ pipeline { stage('L2: Parallel NLP Examples 2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2000,8 +2003,8 @@ pipeline { stage('Punctuation & Capitalization tarred dataset') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2059,8 +2062,8 @@ pipeline { stage('Punctuation & Capitalization, Different ways of passing labels to model') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2167,8 +2170,8 @@ pipeline { stage('Punctuation & Capitalization inference') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2193,8 +2196,8 @@ pipeline { stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2234,7 +2237,10 @@ pipeline { trainer.devices=[1] \ trainer.accelerator="gpu" \ trainer.precision=16 \ - +trainer.fast_dev_run=true \ + +trainer.fast_dev_run=false \ + +trainer.max_epochs=1 \ + +trainer.limit_val_batches=0 \ + +trainer.limit_train_batches=1 \ model.train_ds.data_file=/home/TestData/nlp/wiki_book_mini/training \ model.train_ds.batch_size=8 \ model.language_model.lm_checkpoint=/home/TestData/nlp/bert_ckpts/nemo1.0/bert_base_uncased_mlm_final_1074591_nemo1.0.pt \ @@ -2255,8 +2261,8 @@ pipeline { stage('L2: Entity Linking') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2283,8 +2289,8 @@ pipeline { stage('L2: NMT Attention is All You Need Training') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2406,8 +2412,8 @@ pipeline { stage('L2: NMT Attention is All You Need Inference') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2442,8 +2448,8 @@ pipeline { stage('L2: NMT Attention is All You Need Finetuning') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2477,8 +2483,8 @@ pipeline { stage('L2: NMT Tarred Dataset Creation') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2531,8 +2537,8 @@ pipeline { stage('L2: Megatron NMT Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2580,11 +2586,13 @@ pipeline { model.encoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \ model.decoder_tokenizer.library=sentencepiece \ model.decoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model" + // Change val_check_interval to 1 for resume as the len(dataloder) is 1 due to max_steps being the same as that of training and Lightning 2.0 raises an error + // if val_check_interval > len(dataloder: https://github.com/Lightning-AI/lightning/blob/2.0.6/src/lightning/pytorch/loops/fit_loop.py#L259 at the beginning of fit_loop.run() sh "python examples/nlp/machine_translation/megatron_nmt_training.py \ trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ +trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -2626,12 +2634,11 @@ pipeline { sh "rm -rf examples/nlp/machine_translation/megatron_nmt_results" } } - // stage('L2: NMT Bottleneck Fallback') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -2677,8 +2684,8 @@ pipeline { // stage('L2: NMT Bottleneck Architecture') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -2760,8 +2767,8 @@ pipeline { // stage('L2: NMT Bottleneck LVM') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -2843,8 +2850,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training with Pipeline Paralleism') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2913,8 +2920,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -2984,8 +2991,8 @@ pipeline { stage('L2: Megatron RETRO Pretraining and Resume Training') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3056,8 +3063,8 @@ pipeline { stage('L2: Megatron RETRO muTransfer Pretraining Performance') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3139,8 +3146,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: BioMegatron Bert NER Task') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3157,8 +3164,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3202,7 +3209,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ trainer.val_check_interval=2 \ - trainer.limit_val_batches=1 \ + trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=6 \ trainer.precision=16 \ @@ -3239,8 +3246,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT with Rope Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3321,13 +3328,13 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results" sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" - } - } + } + } stage('L2: Megatron GPT with Rope Pretraining using Flash Attention and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3415,8 +3422,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3500,8 +3507,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3585,8 +3592,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3666,11 +3673,12 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings" } } + // @athitten Remove /home/TestData/nlp/megatron_sft/trec.jsonl for validation and test file until we have support for multiple dataloaders in lightning 2.0 stage('L2: Megatron GPT Finetuning PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3695,18 +3703,18 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.train_ds.concat_sampling_probabilities=[0.3,0.7] \ model.data.train_ds.num_workers=0 \ model.data.test_ds.micro_batch_size=1 \ - model.data.test_ds.global_batch_size=4 \ - model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ - model.data.test_ds.names=[quarel,trec] \ + model.data.test_ds.global_batch_size=1 \ + model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.test_ds.names=[quarel] \ model.data.validation_ds.micro_batch_size=1 \ - model.data.validation_ds.global_batch_size=4 \ + model.data.validation_ds.global_batch_size=1 \ model.data.validation_ds.num_workers=0 \ - model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ - model.data.validation_ds.names=[quarel,trec]" + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.names=[quarel]" sh "python examples/nlp/language_modeling/tuning/megatron_gpt_sft.py \ trainer.devices=2 \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=2 \ + trainer.val_check_interval=1 \ +trainer.limit_val_batches=2 \ trainer.max_steps=3 \ trainer.precision=16 \ @@ -3724,21 +3732,21 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.train_ds.num_workers=0 \ model.data.test_ds.micro_batch_size=1 \ model.data.test_ds.global_batch_size=4 \ - model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ - model.data.test_ds.names=[quarel,trec] \ + model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.test_ds.names=[quarel] \ model.data.validation_ds.micro_batch_size=1 \ model.data.validation_ds.global_batch_size=4 \ model.data.validation_ds.num_workers=0 \ - model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ - model.data.validation_ds.names=[quarel,trec]" + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.names=[quarel]" sh "rm -rf examples/nlp/language_modeling/gpt_sft_results" } } stage('L2: Megatron GPT PEFT Lora PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3759,7 +3767,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.peft.peft_scheme='lora' \ model.answer_only_loss=True \ model.micro_batch_size=1 \ - model.global_batch_size=4 \ + model.global_batch_size=1 \ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ model.data.train_ds.concat_sampling_probabilities=[1.0] \ model.data.train_ds.num_workers=0 \ @@ -3772,8 +3780,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT PEFT Lora TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3794,7 +3802,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.peft.peft_scheme='lora' \ model.answer_only_loss=True \ model.micro_batch_size=1 \ - model.global_batch_size=4 \ + model.global_batch_size=1 \ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ model.data.train_ds.concat_sampling_probabilities=[1.0] \ model.data.train_ds.num_workers=0 \ @@ -3812,6 +3820,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.test_ds.global_batch_size=1 \ model.data.test_ds.micro_batch_size=1 \ model.data.test_ds.tokens_to_generate=10 \ + model.data.test_ds.write_predictions_to_file=True \ + model.data.test_ds.output_file_path_prefix='/home/TestData/nlp/lora_tuning_tp2/out' \ inference.greedy=True \ inference.repetition_penalty=1.0 \ inference.outfile_path='/home/TestData/nlp/lora_tuning_tp2/out.jsonl'" @@ -3821,8 +3831,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3832,14 +3842,14 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' prompts=['How to fix GPU memory? A:'] \ tensor_model_parallel_size=1 \ inference.tokens_to_generate=32 \ - trainer.precision=16" + trainer.precision=32" } } stage('L2: Megatron GPT Eval PP2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3850,14 +3860,15 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' tensor_model_parallel_size=1 \ pipeline_model_parallel_size=2 \ trainer.devices=2 \ - trainer.num_nodes=1" + trainer.num_nodes=1 \ + trainer.precision=32" } } stage('L2: Megatron GPT SFT Eval (inference seq len > training seq len)') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3871,6 +3882,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.test_ds.micro_batch_size=1 \ model.data.test_ds.tokens_to_generate=30 \ model.data.test_ds.max_seq_length=6000 \ + model.data.test_ds.write_predictions_to_file=True \ + model.data.test_ds.output_file_path_prefix='examples/nlp/language_modeling/out' \ inference.greedy=True \ inference.repetition_penalty=1.0 \ inference.outfile_path='examples/nlp/language_modeling/out.jsonl' && \ @@ -3881,8 +3894,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' //stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -3912,12 +3925,11 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // } // } //} - stage('L2: Megatron GPT Prompt Tuning TP2 PP1') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3955,12 +3967,11 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } } - stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -3995,17 +4006,17 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' data_paths=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl']" sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo" sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt" - } - } - } - } + } + } + } + } // TODO: Add this test back. Test was failing on CI machines due to HW error // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -4031,8 +4042,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Change Partitions') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4078,8 +4089,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4129,7 +4140,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4174,8 +4185,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4225,7 +4236,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4270,8 +4281,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4321,7 +4332,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4366,8 +4377,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4404,7 +4415,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4436,8 +4447,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 w/ Mixture of Expert Pretraining') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4482,8 +4493,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' //stage('L2: Megatron T5 Prompt Learning TP1 PP1') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -4523,8 +4534,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP2 PP1') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4568,8 +4579,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { // anyOf { - // branch 'r1.20.0' - // changeRequest target: 'r1.20.0' + // branch 'main' + // changeRequest target: 'main' // } // } // failFast true @@ -4608,12 +4619,11 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // } // } // } - stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4655,7 +4665,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4692,8 +4702,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Eval') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4709,8 +4719,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, TP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4748,7 +4758,6 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ trainer.val_check_interval=2 \ - trainer.limit_val_batches=1 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=6 \ trainer.precision=16 \ @@ -4778,8 +4787,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, PP=2') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4818,7 +4827,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=2 \ trainer.accelerator=gpu \ trainer.log_every_n_steps=1 \ - trainer.val_check_interval=10 \ + trainer.val_check_interval=1 \ trainer.limit_val_batches=2 \ trainer.accumulate_grad_batches=1 \ trainer.max_steps=10 \ @@ -4851,8 +4860,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 GLUE/XNLI Finetuning') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4924,8 +4933,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Mock Data Generation') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true @@ -4934,7 +4943,6 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' steps { sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \ trainer.max_steps=10 \ - trainer.limit_val_batches=1 \ trainer.val_check_interval=10 \ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \ model.data.data_impl=mock \ @@ -4947,7 +4955,6 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' steps { sh "python examples/nlp/language_modeling/megatron_t5_pretraining.py \ trainer.max_steps=10 \ - trainer.limit_val_batches=1 \ trainer.val_check_interval=10 \ exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \ model.data.data_impl=mock \ @@ -4961,8 +4968,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: TTS Fast dev runs 1') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } parallel { @@ -4974,7 +4981,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' trainer.devices=[0] \ trainer.accelerator="gpu" \ +trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.decoder.decoder_rnn_dim=256 \ model.decoder.attention_rnn_dim=1024 \ model.decoder.prenet_dim=128 \ @@ -4996,7 +5003,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' validation_datasets=/home/TestData/an4_dataset/an4_val.json \ trainer.devices="[0]" \ +trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.train_ds.dataloader_params.batch_size=4 \ model.train_ds.dataloader_params.num_workers=0 \ model.validation_ds.dataloader_params.batch_size=4 \ @@ -5018,7 +5025,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' +trainer.limit_train_batches=1 \ +trainer.limit_val_batches=1 \ trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.pitch_mean=212.35873413085938 \ model.pitch_std=68.52806091308594 \ model.train_ds.dataloader_params.batch_size=4 \ @@ -5045,7 +5052,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' +trainer.limit_train_batches=1 \ +trainer.limit_val_batches=1 \ trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.pitch_mean=212.35873413085938 \ model.pitch_std=68.52806091308594 \ model.train_ds.dataloader_params.batch_size=4 \ @@ -5070,7 +5077,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' +trainer.limit_train_batches=1 \ +trainer.limit_val_batches=1 \ trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.pitch_mean=212.35873413085938 \ model.pitch_std=68.52806091308594 \ model.train_ds.dataloader_params.batch_size=4 \ @@ -5091,7 +5098,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' +trainer.limit_train_batches=1 \ +trainer.limit_val_batches=1 \ +trainer.max_epochs=1 \ - trainer.strategy=null \ + trainer.strategy=auto \ model.train_ds.dataloader_params.batch_size=4 \ model.train_ds.dataloader_params.num_workers=0 \ model.validation_ds.dataloader_params.batch_size=4 \ @@ -5107,8 +5114,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L??: Speech Checkpoints tests') { when { anyOf { - branch 'r1.20.0' - changeRequest target: 'r1.20.0' + branch 'main' + changeRequest target: 'main' } } failFast true diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md index 365ed2773ed3..cd120efc7e7b 100644 --- a/PUBLICATIONS.md +++ b/PUBLICATIONS.md @@ -9,6 +9,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I

2023 + * [Confidence-based Ensembles of End-to-End Speech Recognition Models](https://arxiv.org/abs/2306.15824) * [Fast Entropy-Based Methods of Word-Level Confidence Estimation for End-to-End Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10022960) * [Damage Control During Domain Adaptation for Transducer Based Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10023219) @@ -23,13 +24,13 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2021 - + * [Citrinet: Closing the Gap between Non-Autoregressive and Autoregressive End-to-End Models for Automatic Speech Recognition](https://arxiv.org/abs/2104.01721) * [SPGISpeech: 5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition](https://www.isca-speech.org/archive/interspeech_2021/oneill21_interspeech.html) * [CarneliNet: Neural Mixture Model for Automatic Speech Recognition](https://arxiv.org/abs/2107.10708) * [CTC Variations Through New WFST Topologies](https://arxiv.org/abs/2110.03098) * [A Toolbox for Construction and Analysis of Speech Datasets](https://openreview.net/pdf?id=oJ0oHQtAld) - +
@@ -45,11 +46,11 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2019 - + * [Jasper: An End-to-End Convolutional Neural Acoustic Model](https://arxiv.org/abs/1904.03288) * [QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions](https://arxiv.org/abs/1910.10261) - - + +
@@ -60,7 +61,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2022 - + * [TitaNet: Neural Model for Speaker Representation with 1D Depth-Wise Separable Convolutions and Global Context](https://ieeexplore.ieee.org/abstract/document/9746806)
@@ -68,8 +69,8 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2020 - - * [SpeakerNet: 1D Depth-wise Separable Convolutional Network for Text-Independent Speaker Recognition and Verification]( https://arxiv.org/pdf/2010.12653.pdf) + + * [SpeakerNet: 1D Depth-wise Separable Convolutional Network for Text-Independent Speaker Recognition and Verification]( https://arxiv.org/pdf/2010.12653.pdf)
@@ -79,7 +80,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2022 - + * [AmberNet: A Compact End-to-End Model for Spoken Language Identification](https://arxiv.org/abs/2210.15781) * [Accidental Learners: Spoken Language Identification in Multilingual Self-Supervised Models](https://arxiv.org/abs/2211.05103) @@ -88,17 +89,17 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2021 - + * [MarbleNet: Deep 1D Time-Channel Separable Convolutional Neural Network for Voice Activity Detection](https://ieeexplore.ieee.org/abstract/document/9414470/)
- +
2020 - + * [MatchboxNet - 1D Time-Channel Separable Convolutional Neural Network Architecture for Speech Commands Recognition](http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=337&id=993) - +
@@ -108,7 +109,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2022 - + * [NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2022](https://aclanthology.org/2022.iwslt-1.18/)
@@ -130,7 +131,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2021 - + * [BioMegatron: Larger Biomedical Domain Language Model ](https://aclanthology.org/2020.emnlp-main.379/)
@@ -157,16 +158,16 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2021 - + * [SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services](https://arxiv.org/abs/2105.08049) - +
2020 - + * [A Fast and Robust BERT-based Dialogue State Tracker for Schema-Guided Dialogue Dataset](https://arxiv.org/abs/2008.12335) - +
-------- @@ -175,19 +176,19 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
2022 - + * [Adapter-Based Extension of Multi-Speaker Text-to-Speech Model for New Speakers](https://arxiv.org/abs/2211.00585)
2021 - + * [TalkNet: Fully-Convolutional Non-Autoregressive Speech Synthesis Model](https://www.isca-speech.org/archive/interspeech_2021/beliaev21_interspeech.html) * [TalkNet 2: Non-Autoregressive Depth-Wise Separable Convolutional Model for Speech Synthesis with Explicit Pitch and Duration Prediction](https://arxiv.org/abs/2104.08189) * [Hi-Fi Multi-Speaker English TTS Dataset](https://www.isca-speech.org/archive/pdfs/interspeech_2021/bakhturina21_interspeech.pdf) * [Mixer-TTS: non-autoregressive, fast and compact text-to-speech model conditioned on language model embeddings](https://arxiv.org/abs/2110.03584) - +
@@ -196,7 +197,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I # (Inverse) Text Normalization
2022 - + * [Shallow Fusion of Weighted Finite-State Transducer and Language Model for Text Normalization](https://arxiv.org/abs/2203.15917) * [Thutmose Tagger: Single-pass neural model for Inverse Text Normalization](https://arxiv.org/abs/2208.00064) @@ -207,7 +208,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I * [NeMo Inverse Text Normalization: From Development to Production](https://www.isca-speech.org/archive/pdfs/interspeech_2021/zhang21ga_interspeech.pdf) * [A Unified Transformer-based Framework for Duplex Text Normalization](https://arxiv.org/pdf/2108.09889.pdf ) - +
-------- \ No newline at end of file diff --git a/README.rst b/README.rst index 6fbe9047d0c4..e23f12a4bbfe 100644 --- a/README.rst +++ b/README.rst @@ -115,13 +115,15 @@ Key Features * `Prompt Learning `_ * `NGC collection of pre-trained NLP models. `_ * `Synthetic Tabular Data Generation `_ -* `Speech synthesis (TTS) `_ - * Spectrogram generation: Tacotron2, GlowTTS, TalkNet, FastPitch, FastSpeech2, Mixer-TTS, Mixer-TTS-X - * Vocoders: WaveGlow, SqueezeWave, UniGlow, MelGAN, HiFiGAN, UnivNet - * End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E, VITS - * `NGC collection of pre-trained TTS models. `_ +* Text-to-Speech Synthesis (TTS): + * `Documentation `_ + * Mel-Spectrogram generators: FastPitch, SSL FastPitch, Mixer-TTS/Mixer-TTS-X, RAD-TTS, Tacotron2 + * Vocoders: HiFiGAN, UnivNet, WaveGlow + * End-to-End Models: VITS + * `Pre-trained Model Checkpoints in NVIDIA GPU Cloud (NGC) `_ * `Tools `_ * `Text Processing (text normalization and inverse text normalization) `_ + * `NeMo Forced Aligner `_ * `CTC-Segmentation tool `_ * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets * `Speech Data Processor `_ @@ -132,7 +134,7 @@ Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training t Requirements ------------ -1) Python 3.9 or above +1) Python 3.10 or above 2) Pytorch 1.13.1 or above 3) NVIDIA GPU for training @@ -176,7 +178,7 @@ We recommend installing NeMo in a fresh Conda environment. .. code-block:: bash - conda create --name nemo python==3.8.10 + conda create --name nemo python==3.10.12 conda activate nemo Install PyTorch using their `configurator `_. @@ -304,13 +306,13 @@ NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separa Docker containers: ~~~~~~~~~~~~~~~~~~ -We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.19.0`` comes with container ``nemo:23.04``, you may find more details about released containers in `releases page `_. +We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.20.0`` comes with container ``nemo:23.06``, you may find more details about released containers in `releases page `_. To use built container, please run .. code-block:: bash - docker pull nvcr.io/nvidia/nemo:23.04 + docker pull nvcr.io/nvidia/nemo:23.06 To build a nemo container with Dockerfile from a branch, please run diff --git a/docs/source/asr/api.rst b/docs/source/asr/api.rst index 1e2073798d64..1d880018fd15 100644 --- a/docs/source/asr/api.rst +++ b/docs/source/asr/api.rst @@ -39,6 +39,11 @@ Model Classes :show-inheritance: :members: from_asr_config, from_pretrained_models, save_asr_model_to, setup_training_data +.. _confidence-ensembles-api: + +.. autoclass:: nemo.collections.asr.models.confidence_ensembles.ConfidenceEnsembleModel + :show-inheritance: + :members: transcribe Modules ------- diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst index f9a4ea9970b1..d21b40e34570 100644 --- a/docs/source/asr/configs.rst +++ b/docs/source/asr/configs.rst @@ -64,7 +64,7 @@ An example ASR train and validation configuration should look similar to the fol There are two ways to test/validate on more than one manifest: - Specify a list in the `manifest_filepath` field. Results will be reported for each, the first one being used for overall loss / WER (specify `val_dl_idx` if you wish to change that). In this case, all manifests will share configuration parameters. -- Use the ds_item key and pass a list of config objects to it. This allows you to use differently configured datasets for validation, e.g. +- Use the ds_item key and pass a list of config objects to it. This allows you to use differently configured datasets for validation, e.g. .. code-block:: yaml diff --git a/docs/source/asr/data/benchmark_code_switching.csv b/docs/source/asr/data/benchmark_code_switching.csv new file mode 100644 index 000000000000..1320f19911e6 --- /dev/null +++ b/docs/source/asr/data/benchmark_code_switching.csv @@ -0,0 +1,3 @@ +Model,Model Base Class,Model Card +stt_enes_conformer_ctc_large_codesw,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_ctc_large_codesw" +stt_enes_conformer_transducer_large_codesw,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_transducer_large_codesw" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_fr.csv b/docs/source/asr/data/benchmark_fr.csv index 2f27a0ab2009..0f17318caead 100644 --- a/docs/source/asr/data/benchmark_fr.csv +++ b/docs/source/asr/data/benchmark_fr.csv @@ -5,4 +5,5 @@ stt_fr_no_hyphen_citrinet_1024_gamma_0_25,EncDecCTCModelBPE,"https://ngc.nvidia. stt_fr_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_contextnet_1024" stt_fr_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_conformer_ctc_large" stt_fr_no_hyphen_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_conformer_ctc_large" -stt_fr_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_conformer_transducer_large" \ No newline at end of file +stt_fr_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_conformer_transducer_large" +stt_fr_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_fr_fastconformer_hybrid_large_pc" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_multilingual.csv b/docs/source/asr/data/benchmark_multilingual.csv new file mode 100644 index 000000000000..c56f05def825 --- /dev/null +++ b/docs/source/asr/data/benchmark_multilingual.csv @@ -0,0 +1,5 @@ +Model,Model Base Class,Model Card +stt_enes_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_ctc_large" +stt_enes_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_transducer_large" +stt_multilingual_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_multilingual_fastconformer_hybrid_large_pc" +stt_multilingual_fastconformer_hybrid_large_pc_blend_eu,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_multilingual_fastconformer_hybrid_large_pc_blend_eu" \ No newline at end of file diff --git a/docs/source/asr/data/benchmark_ru.csv b/docs/source/asr/data/benchmark_ru.csv index b46d4d9ca65c..66b9b321f5fe 100644 --- a/docs/source/asr/data/benchmark_ru.csv +++ b/docs/source/asr/data/benchmark_ru.csv @@ -1,3 +1,4 @@ Model,Model Base Class,Model Card stt_ru_quartznet15x5,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ru_quartznet15x5" +stt_ru_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ru_fastconformer_hybrid_large_pc" diff --git a/docs/source/asr/intro.rst b/docs/source/asr/intro.rst index e655da836a76..46a192c546a2 100644 --- a/docs/source/asr/intro.rst +++ b/docs/source/asr/intro.rst @@ -33,7 +33,7 @@ A demo below allows evaluation of NeMo ASR models in multiple langauges from the .. raw:: html + width="100%" class="gradio-asr" allow="microphone *"> \n", + " \n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + " \n", + " \n" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "UghOVs2xUG1b" + }, + "id": "UghOVs2xUG1b" + }, + { + "cell_type": "markdown", + "source": [ + "Words in the first group seem to have varied origins, with a mix of English, Germanic, and potentially Latin-derived terms.\n", + "The second group, on the other hand, also appears to contain words of varied linguistic origins but might have a slightly more European or Old World feel, especially with words like `gothic` and `heidelberg`.\n", + "\n", + "**Word Length**\n", + "\n", + "The average word length in the first group might be longer with words like `battlefields` and `postscript`.\n", + "The second group also contains long words, but when we consider `bough` or `gothic`, it might have a slightly shorter average length.\n", + "\n", + "**Functional vs. Descriptive**\n", + "\n", + "The first group contains a mix of nouns that are both functional (like `allspice` or `panada`) and more abstract or descriptive (like `dante` or `vapours`).\n", + "The second group also contains nouns, but they seem more descriptive or pertaining to concepts or themes like `gothic` or `nocturnal`." + ], + "metadata": { + "id": "5_rKgNoovt1k" + }, + "id": "5_rKgNoovt1k" + }, + { + "cell_type": "markdown", + "source": [ + "Next, we will look at several interesting examples that were easily discovered using this tool." + ], + "metadata": { + "id": "C0hcCrZI98fF" + }, + "id": "C0hcCrZI98fF" + }, + { + "cell_type": "markdown", + "source": [ + "# Examples with audio" + ], + "metadata": { + "id": "FV8rLFfr9KVv" + }, + "id": "FV8rLFfr9KVv" + }, + { + "cell_type": "markdown", + "source": [ + "(It is worth noting that the examples below are taken from LibriSpeech dev-clean set, while the rest of the tutorial is based on LibriSpeech test-other)" + ], + "metadata": { + "id": "W35kfqsYkVlf" + }, + "id": "W35kfqsYkVlf" + }, + { + "cell_type": "code", + "source": [ + "#This cell is made so that you can quickly listen to those utterances.\n", + "from IPython.display import Audio, display\n", + "!python3 ./NeMo/scripts/dataset_processing/get_librispeech_data.py --data_sets dev_clean --data_root ." + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9Q1aCuBC8QZm", + "outputId": "9bf30ea1-c15f-434d-c5cf-56a1111f9a0f" + }, + "id": "9Q1aCuBC8QZm", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "100% 97/97 [00:05<00:00, 16.64it/s]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "BjgoarWOrntG" + }, + "id": "BjgoarWOrntG" + }, + { + "cell_type": "markdown", + "source": [ + "Above is example how Conformer-Small trying to use more common phrases fails with recognizing word `southwark` (Southwark is a district of Central London). We see how Conformer-Small fails on proper nouns and just names in general." + ], + "metadata": { + "id": "QogvdaUzrq2L" + }, + "id": "QogvdaUzrq2L" + }, + { + "cell_type": "code", + "source": [ + "sound_file = \"/content/LibriSpeech/dev-clean-processed/5895-34629-0012.wav\"\n", + "display(Audio(sound_file, autoplay=True))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "iA-wfLFE-Lke", + "outputId": "e4b35c24-3047-448c-8006-176fe68c80be" + }, + "id": "iA-wfLFE-Lke", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "In this case, the speaker does not really pause between `over` and `all`, and QuartzNet transcribes it as a single word." + ], + "metadata": { + "id": "jyX2PVgovdjj" + }, + "id": "jyX2PVgovdjj" + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "z3jISeCTrRVI" + }, + "id": "z3jISeCTrRVI" + }, + { + "cell_type": "code", + "source": [ + "sound_file = \"/content/LibriSpeech/dev-clean-processed/652-129742-0008.wav\"\n", + "display(Audio(sound_file, autoplay=True))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "5aYtqrTG8Xpa", + "outputId": "b5ff0a91-0bc2-42e6-a575-611d3170731d" + }, + "id": "5aYtqrTG8Xpa", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "This is an example of an error in the dataset.\n", + "The audio is challenging (quality is not very good, and the speaker is singing this phrase). But it is obvious that the reference transcript should be `grub pile grub pile`. Likely, the extra space in word `pile` was introduced by replacing a hyphen character with a space in the original dataset:" + ], + "metadata": { + "id": "jz9qolfuvdFj" + }, + "id": "jz9qolfuvdFj" + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "FrjjbUv0A3g9" + }, + "id": "FrjjbUv0A3g9" + }, + { + "cell_type": "code", + "source": [ + "sound_file = \"/content/LibriSpeech/dev-clean-processed/6313-76958-0023.wav\"\n", + "display(Audio(sound_file, autoplay=True))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "sdKQ2hvB7bIa", + "outputId": "b4e7ddcd-f124-47ea-ac93-67f68efdf707" + }, + "id": "sdKQ2hvB7bIa", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Here is another example. I would not say that the models were wrong. When listening to the audio, you might actually think that the announcer is saying “guessed”.\n", + "\n", + "Therefore, if we see that both models make the same errors - this is a good reason to check the dataset." + ], + "metadata": { + "id": "uXbTGzVCvd8K" + }, + "id": "uXbTGzVCvd8K" + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "D4xWxRk8BLtb" + }, + "id": "D4xWxRk8BLtb" + }, + { + "cell_type": "code", + "source": [ + "sound_file = \"/content/LibriSpeech/dev-clean-processed/2428-83705-0008.wav\"\n", + "display(Audio(sound_file, autoplay=True))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "MsOJGMOn8ERg", + "outputId": "09269135-92b6-415d-cb4f-8d7b86d31f3e" + }, + "id": "MsOJGMOn8ERg", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From 2639f77e8d1424ab754e490117164529741bd4da Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Mon, 11 Sep 2023 20:35:08 -0600 Subject: [PATCH 210/512] Update ptl training ckpt conversion script to work with dist ckpt (#7416) * update ptl convert script Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * don't break legacy Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: eharper Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../megatron_ckpt_to_nemo.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py index e2fd1d4bbcd1..1161614e0a53 100644 --- a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py +++ b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py @@ -24,10 +24,12 @@ --pipeline_model_parallel_size """ +import dis import os from argparse import ArgumentParser import torch +from genericpath import isdir from megatron.core import parallel_state from omegaconf import open_dict from pytorch_lightning.plugins.environments import TorchElasticEnvironment @@ -40,7 +42,7 @@ from nemo.collections.nlp.models.language_modeling.megatron_retrieval_model import MegatronRetrievalModel from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model from nemo.collections.nlp.models.machine_translation.megatron_nmt_model import MegatronNMTModel -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed from nemo.utils.model_utils import inject_model_parallel_rank @@ -100,12 +102,16 @@ def convert(local_rank, rank, world_size, args): app_state = AppState() app_state.data_parallel_rank = 0 num_nodes = world_size // args.gpus_per_node + plugins = [] + strategy = "auto" if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + plugins.append(TorchElasticEnvironment()) + if args.model_type == 'gpt': + strategy = NLPDDPStrategy() + + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=plugins, strategy=strategy + ) app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size app_state.tensor_model_parallel_size = args.tensor_model_parallel_size @@ -135,8 +141,13 @@ def convert(local_rank, rank, world_size, args): app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - # inject model parallel rank - checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) + # check for distributed checkpoint + dist_ckpt_dir = os.path.join(args.checkpoint_folder, args.checkpoint_name) + if os.path.isdir(dist_ckpt_dir): + checkpoint_path = dist_ckpt_dir + else: + # legacy checkpoint needs model parallel injection + checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) logging.info( f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' From 9b3a91eb3bfa81acb41851d9600b7db7dfe17a30 Mon Sep 17 00:00:00 2001 From: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Date: Mon, 11 Sep 2023 21:08:01 -0700 Subject: [PATCH 211/512] Allow disabling sanity checking when num_sanity_val_steps=0 (#7413) * Allow disabling sanity checking when num_sanity_val_steps=0 Signed-off-by: Abhishree * Update num_sanity_val_steps to be a multiple of num_microbatches Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../nlp/models/language_modeling/megatron_base_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 4f05bbdc483b..1637e70dacc5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -211,8 +211,8 @@ def _reconfigure_val_batches(self): """ # Override limit_val_batches to be a multiple of num microbatches and so there are limit_val_batches//num_micro_batches num of global batches self.trainer.limit_val_batches *= get_num_microbatches() - # Override num sanity steps equal to num of microbatches and perform one val_step - self.trainer.num_sanity_val_steps = get_num_microbatches() + # Override num sanity steps to be a multiple of num of microbatches + self.trainer.num_sanity_val_steps *= get_num_microbatches() def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" From a8076053615d61a3228bf3768bc264699df49d71 Mon Sep 17 00:00:00 2001 From: PeganovAnton Date: Tue, 12 Sep 2023 06:28:39 +0200 Subject: [PATCH 212/512] Add comprehensive error messages (#7261) Signed-off-by: Anton Peganov --- .../nlp/modules/common/text_generation_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index d00358807d74..90ddcc5db9ae 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -190,6 +190,14 @@ def get_computeprob_response(tokenizer, response, inputs): token_len = int(inputs[1][batch_id].item()) new_token_id = inputs[0][batch_id][:token_len].tolist() new_text = tokenizer.ids_to_text(new_token_id) + else: + raise TypeError( + f"Unsupported type of `inputs[0]`: {type(inputs[0])}. Supported types: `str`, `torch.Tensor`." + ) + else: + raise TypeError( + f"Unsupported type of parameter `inputs`: {type(inputs)}. Supported types: `list` and `tuple`" + ) new_token_ids.append(new_token_id) new_tokens.append(response['tokens'][batch_id][:token_len]) new_texts.append(new_text) From 92a71ca3cb4a8b4ebabc834fe51550efc31bdea9 Mon Sep 17 00:00:00 2001 From: Nikolay Karpov Date: Tue, 12 Sep 2023 13:13:34 +0400 Subject: [PATCH 213/512] check NEMO_PATH (#7418) Signed-off-by: Nikolay Karpov --- .../ngram_lm/install_beamsearch_decoders.sh | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh index 3ba337a6afd3..b09ed351bd15 100755 --- a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh +++ b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh @@ -17,20 +17,25 @@ shopt -s expand_aliases NEMO_PATH=/workspace/nemo # Path to NeMo folder: /workspace/nemo if you use NeMo/Dockerfile -if [ "$#" -eq 1 ] -then +if [ "$#" -eq 1 ]; then NEMO_PATH=$1 fi KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_os2s_decoders.py +if [ -d "$NEMO_PATH" ]; then + echo "The folder '$NEMO_PATH' exists." +else + echo "Error: The folder '$NEMO_PATH' does not exist. Specify it as a first command line positional argument!" + exit 1 +fi cd $NEMO_PATH if [ $(id -u) -eq 0 ]; then - alias aptupdate='apt-get update' - alias b2install='./b2' - else - alias aptupdate='sudo apt-get update' - alias b2install='sudo ./b2' + alias aptupdate='apt-get update' + alias b2install='./b2' +else + alias aptupdate='sudo apt-get update' + alias b2install='sudo ./b2' fi aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder From 603c77ebf6e7a2f0b2b2b6d9f5c2e05e5b8b0d61 Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Wed, 13 Sep 2023 09:20:30 -0700 Subject: [PATCH 214/512] layer selection for ia3 (#7417) * layer selection for ia3 Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../nlp/models/language_modeling/megatron_gpt_peft_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py index 9d22cd65aac7..0c3b1adc8dbd 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -443,6 +443,11 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.name_key_to_cfg[k] = infused_adapter_cfg else: raise ValueError(f"PEFT Key {k} is unknown.") + + self.layer_selection = cfg.peft.ia3_tuning.get("layer_selection", None) + if self.layer_selection is None: + self.layer_selection = list(range(1, cfg.num_layers + 1)) + super().__init__(cfg, trainer) From 3c4bba6d791f63f9c97edd5fe7d10d2ff9c4cd92 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Fri, 15 Sep 2023 08:52:58 +1000 Subject: [PATCH 215/512] Fix missing pip package 'einops' (#7397) Signed-off-by: Robin Dong --- tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 5e7f5dab7338..8544631b3af7 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -672,7 +672,8 @@ "# !git clone --depth 1 --branch v1.8.0 https://github.com/microsoft/onnxruntime.git .\n", "# !./build.sh --skip_tests --config Release --build_shared_lib --parallel --use_cuda --cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu --build_wheel\n", "# !pip install ./build/Linux/Release/dist/onnxruntime*.whl\n", - "# %cd .." + "# %cd ..\n", + "!pip install einops\n" ] }, { From 429932a14cd00404ccd0868131bf7ec522e2bace Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Fri, 15 Sep 2023 16:50:51 +1000 Subject: [PATCH 216/512] Fix failure of pyaudio in Google Colab (#7396) Signed-off-by: Robin Dong --- tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 8544631b3af7..21f2bc58a099 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -24,7 +24,7 @@ "\n", "## Install dependencies\n", "!pip install wget\n", - "!apt-get install sox libsndfile1 ffmpeg portaudio19-dev\n", + "!apt-get install libsndfile1 ffmpeg portaudio19-dev\n", "!pip install text-unidecode\n", "!pip install pyaudio\n", "\n", From e6ba5f0a211c048a0123255f2d118c553eb9f0ca Mon Sep 17 00:00:00 2001 From: Samuele Cornell Date: Mon, 18 Sep 2023 18:06:13 +0100 Subject: [PATCH 217/512] Update README.md: output_path --> output_manifest_filepath (#7442) Signed-off-by: Samuele Cornell --- tools/speech_data_simulator/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/speech_data_simulator/README.md b/tools/speech_data_simulator/README.md index 94b917af88f3..ad589ef9194d 100644 --- a/tools/speech_data_simulator/README.md +++ b/tools/speech_data_simulator/README.md @@ -89,7 +89,7 @@ python scripts/dataset_processing/get_librispeech_data.py \ python /scripts/speaker_tasks/create_alignment_manifest.py \ --input_manifest_filepath \ --base_alignment_path \ - --output_path train-clean-100-align.json \ + --output_manifest_filepath train-clean-100-align.json \ --ctm_output_directory ./ctm_out \ --libri_dataset_split train-clean-100 ``` From 4f12143e7d2cd375fba70ab4e5cc578e004e6307 Mon Sep 17 00:00:00 2001 From: Parth Mannan Date: Tue, 19 Sep 2023 11:25:50 -0700 Subject: [PATCH 218/512] Updating FlashAttention API to match FlashAttentionV2 --- .../stable_diffusion/conf/sd_train.yaml | 2 +- .../modules/imagen/diffusionmodules/blocks.py | 9 +++--- .../modules/stable_diffusion/attention.py | 31 +++++++++---------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 14e00a38fbe1..a8973186dc4d 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -118,7 +118,7 @@ model: context_dim: 768 use_checkpoint: False legacy: False - use_flash_attention: False + use_flash_attention: True first_stage_config: _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py index b8e44070b570..1d6b8395a58f 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py @@ -65,8 +65,7 @@ def check_cuda(): try: - from flash_attn.flash_attention import FlashAttention - from flash_attn.flash_attn_interface import flash_attn_unpadded_func, flash_attn_unpadded_kvpacked_func + from flash_attn import flash_attn_varlen_func, flash_attn_varlen_kvpacked_func flash_attn_installed = check_cuda() except ImportError: @@ -614,7 +613,7 @@ def _forward(self, x, context, mask): cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) cu_seqlens_k += cu_seqlens_q - out = flash_attn_unpadded_kvpacked_func( + out = flash_attn_varlen_kvpacked_func( q, kv_full_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 ) h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) @@ -699,7 +698,7 @@ def _forward(self, x): v = rearrange(v, 'b (h d) s -> (b s) h d', h=self.num_heads) cu_seqlens_q = th.arange(0, (b + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device) cu_seqlens_k = th.arange(0, (b + 1) * max_seqlen_k, step=max_seqlen_k, dtype=th.int32, device=k.device) - h = flash_attn_unpadded_func(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0) + h = flash_attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0) h = rearrange(h, '(b s) h d -> b (h d) s', b=b, h=self.num_heads) else: h, _ = self.attention(qkv) @@ -793,7 +792,7 @@ def _forward(self, x, context, mask): cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=q.device) cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) - out = flash_attn_unpadded_kvpacked_func( + out = flash_attn_varlen_kvpacked_func( q, kv_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 ) h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index 4c9da75d2257..6393d9f24b93 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -38,15 +38,15 @@ def check_cuda(): try: - from flash_attn.flash_attention import FlashAttention - from flash_attn.flash_attn_interface import flash_attn_unpadded_kvpacked_func + import torch.nn as nn + from flash_attn.modules.mha import FlashCrossAttention, FlashSelfAttention flash_attn_installed = check_cuda() print("FlashAttention Installed") # Disable TorchDynamo on FlashAttention - flash_attn_unpadded_kvpacked_func = disable(flash_attn_unpadded_kvpacked_func) - FlashAttention.forward = disable(FlashAttention.forward) + FlashSelfAttention.forward = disable(FlashSelfAttention.forward) + FlashCrossAttention.forward = disable(FlashCrossAttention.forward) except ImportError: flash_attn_installed = False @@ -204,8 +204,11 @@ def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0. self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.use_flash_attention = use_flash_attention - if context_dim == query_dim and dim_head <= 128 and (dim_head % 8) == 0 and flash_attn_installed: - self.flash_attn = FlashAttention(self.scale) + if dim_head <= 128 and (dim_head % 8) == 0 and flash_attn_installed: + if context_dim == query_dim: + self.flash_attn = FlashSelfAttention(self.scale) + else: + self.flash_attn = FlashCrossAttention(self.scale) def forward(self, x, context=None, mask=None): h = self.heads @@ -258,7 +261,7 @@ def _attention(self, q, k, v, mask=None): d = hd // h qkv = qkv.view(b, s, t, h, d) - out, _ = self.flash_attn(qkv) + out = self.flash_attn(qkv) out = out.view(b, s, hd) else: # cross-attention @@ -268,14 +271,10 @@ def _attention(self, q, k, v, mask=None): b, s_kv, t, hd = kv.shape d = hd // h - q = q.view(b * s_q, h, d) - kv = kv.view(b * s_kv, t, h, d) - - cu_seqlens_q = torch.arange(0, (b + 1) * s_q, step=s_q, dtype=torch.int32, device=q.device) - cu_seqlens_k = torch.arange(0, (b + 1) * s_kv, step=s_kv, dtype=torch.int32, device=kv.device) - - out = flash_attn_unpadded_kvpacked_func(q, kv, cu_seqlens_q, cu_seqlens_k, s_q, s_kv, 0.0, self.scale) + q = q.view(b, s_q, h, d) + kv = kv.view(b, s_kv, t, h, d) + out = self.flash_attn(q, kv) out = out.view(b, s_q, hd) return out @@ -394,7 +393,7 @@ def forward(self, x, context=None): if not self.use_linear: x = self.proj_in(x) x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c - x = x.contiguous() # workaround for dynamo ddp bug + # x = x.contiguous() # workaround for dynamo ddp bug if self.use_linear: x = self.proj_in(x) for i, block in enumerate(self.transformer_blocks): @@ -402,7 +401,7 @@ def forward(self, x, context=None): if self.use_linear: x = self.proj_out(x) x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w - x = x.contiguous() # workaround for dynamo ddp bu + # x = x.contiguous() # workaround for dynamo ddp bu if not self.use_linear: x = self.proj_out(x) return x + x_in From 70a36024857ba6408a9e74c1423f765bdeb36652 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 19 Sep 2023 14:35:34 -0700 Subject: [PATCH 219/512] Multiple fixes for mm --- .../models/instruct_pix2pix/ldm/ddpm_edit.py | 1 - .../models/multimodal_base_model.py | 708 ------------------ .../vision/models/vision_base_model.py | 635 ---------------- 3 files changed, 1344 deletions(-) delete mode 100644 nemo/collections/multimodal/models/multimodal_base_model.py delete mode 100644 nemo/collections/vision/models/vision_base_model.py diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py index 600b26eeebcc..7cfd8be460b0 100644 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -23,7 +23,6 @@ import torch import torch.nn as nn from einops import rearrange, repeat -from pytorch_lightning.utilities.distributed import rank_zero_only from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm diff --git a/nemo/collections/multimodal/models/multimodal_base_model.py b/nemo/collections/multimodal/models/multimodal_base_model.py deleted file mode 100644 index 3e10e3a49a16..000000000000 --- a/nemo/collections/multimodal/models/multimodal_base_model.py +++ /dev/null @@ -1,708 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import hashlib -import json -import os -import re -from typing import Any, Dict, Optional, Union - -import torch -from lightning_fabric.utilities.cloud_io import _load as pl_load -from omegaconf import open_dict -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.core.saving import _load_state as ptl_load_state -from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml -from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator -from pytorch_lightning.trainer.trainer import Trainer -from pytorch_lightning.utilities import rank_zero_only -from pytorch_lightning.utilities.migration import pl_legacy_patch -from transformers import TRANSFORMERS_CACHE - -from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION -from nemo.collections.nlp.modules.common.megatron.clip_grads import ( - clip_grad_norm_distributed_optimizer, - clip_grad_norm_fp32, -) -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer, get_tokenizer -from nemo.collections.nlp.parts.nlp_overrides import ( - NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, - GradScaler, - NLPSaveRestoreConnector, -) -from nemo.core.classes import ModelPT -from nemo.core.classes.exportable import Exportable -from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler -from nemo.utils import AppState, logging -from nemo.utils.get_rank import is_global_rank_zero - -try: - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True -except (ImportError, ModuleNotFoundError): - HAVE_MEGATRON_CORE = False - - -__all__ = ['MultimodalModel', 'MegatronMultimodalModel'] - -NEMO_MULTIMODAL_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_multimodal_tmp") - -os.makedirs(NEMO_MULTIMODAL_TMP, exist_ok=True) - - -class MultimodalModel(ModelPT, Exportable): - """Base class for Multimodal Models. - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer = None): - - super().__init__(cfg, trainer) - - # handles model parallel save and restore logic - self._save_restore_connector = NLPSaveRestoreConnector() - - # TODO(yuya): check below - @property - def input_module(self): - return self - - @property - def output_module(self): - return self - - @property - def is_model_parallel_initialized(self): - app_state = AppState() - if app_state.model_parallel_group is not None: - return True - else: - return False - - @classmethod - def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, - ): - """ - Loads ModelPT from checkpoint, with some maintenance of restoration. - For documentation, please refer to LightningModule.load_from_checkpoin() documentation. - """ - checkpoint = None - try: - cls._set_model_restore_state(is_being_restored=True) - # TODO: replace with proper PTL API - with pl_legacy_patch(): - if map_location is not None: - checkpoint = pl_load(checkpoint_path, map_location=map_location) - else: - checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) - - if hparams_file is not None: - extension = hparams_file.split(".")[-1] - if extension.lower() == "csv": - hparams = load_hparams_from_tags_csv(hparams_file) - elif extension.lower() in ("yml", "yaml"): - hparams = load_hparams_from_yaml(hparams_file) - else: - raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") - - hparams["on_gpu"] = False - - # overwrite hparams by the given file - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams - - # for past checkpoint need to add the new key - if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} - # override the hparams with values that were passed in - cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) - # TODO: can we do this without overriding? - config_kwargs = kwargs.copy() - if 'trainer' in config_kwargs: - config_kwargs.pop('trainer') - cfg.update(config_kwargs) - - # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error - if cfg: - if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): - cfg.unet_config.from_pretrained = None - if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): - cfg.first_stage_config.from_pretrained = None - ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod - if cfg.get('inductor'): - cfg.inductor = False - ## Append some dummy configs that DB didn't support - if not cfg.get('channels_last'): - cfg.channels_last = True - if not cfg.get('capture_cudagraph_iters'): - cfg.capture_cudagraph_iters = -1 - - # compatibility for stable diffusion old checkpoint tweaks - first_key = list(checkpoint['state_dict'].keys())[0] - if first_key == "betas": - # insert "model." into for megatron wrapper - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = "model." + key - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - elif ( - first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' - or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' - ): - # remap state keys from dreambooth when using HF clip - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', "") - new_key = new_key.replace('unet', 'model.diffusion_model') - new_key = new_key.replace('vae', 'first_stage_model') - new_key = new_key.replace('text_encoder', 'cond_stage_model') - new_key = new_key.replace('.noise_scheduler', '') - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - # compatibility for inductor in inference - if not cfg.get('inductor', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', '', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if cfg.get('megatron_amp_O2', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('model.', 'model.module.', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if 'cfg' in kwargs: - model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) - else: - model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) - # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg - - checkpoint = model - - finally: - cls._set_model_restore_state(is_being_restored=False) - return checkpoint - - -class MegatronMultimodalModel(MultimodalModel): - """ - Megatron multimodal base class - It does the following things: - 1. Initialize the model parallel for nemo given the model parallel parameters. - 2. Turn on all the nvidia optimizations. - 3. If using distributed optimizer, configure to be compatible with - O2-level optimizations and/or model parallelism. - 4. Perform gradient clipping: `grad_clip_pl_default` triggers the - PyTorch Lightning default implementation, `with_distributed_adam` - triggers the distributed optimizer's implementation, - `megatron_amp_O2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer): - - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - if trainer is None: - raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") - - if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION: - raise ImportError( - "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention." - "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202." - ) - - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - super().__init__(cfg, trainer=trainer) - - self._validate_config() - - self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' - - # used in NVIDIA NGC PyTorch containers - self._enable_nvidia_optimizations() - - if self._cfg.get('use_cpu_initialization', False) is False: - torch.cuda.set_device(trainer.local_rank) - - # buffer used during train_step for logging average loss over gradient accumulation steps - self._reduced_loss_buffer = [] - - # Overrides used when converting checkpoints - if os.environ.get(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, "false").lower() == "true": - app_state = AppState() - init_world_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - init_global_rank = app_state.global_rank - init_local_rank = app_state.local_rank - else: - init_world_size = trainer.world_size - init_global_rank = trainer.global_rank - init_local_rank = trainer.local_rank - - initialize_model_parallel_for_nemo( - world_size=init_world_size, - global_rank=init_global_rank, - local_rank=init_local_rank, - tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), - pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), - virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), - pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), - micro_batch_size=cfg.get('micro_batch_size'), - global_batch_size=cfg.get('global_batch_size'), - rampup_batch_size=cfg.get('rampup_batch_size'), - use_fp8=cfg.get('fp8', False), - init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), - seed=self.cfg.get('seed', 1234), - apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), - ) - - self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False - - if hasattr(self._cfg, "tokenizer") or ( - hasattr(self._cfg, "encoder_tokenizer") and hasattr(self._cfg, "decoder_tokenizer") - ): - # build tokenizer (defaults to nemo supported tokenizers) - self._build_tokenizer() - - # manipulate vocabulary (e.g., pad vocabulary for better efficiency) - self._build_vocab() - - # TODO: remove this when PTL 1.7.3 is released - _FxValidator.functions["configure_gradient_clipping"] = { - "allowed_on_step": (False, True), - "allowed_on_epoch": (False, True), - "default_on_step": True, - "default_on_epoch": False, - } - - self.gc_interval = cfg.get('gc_interval', 0) - assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." - # If gc_interval > 0, memory garbage collection is manually controlled. - # The automatic garbage collector sould be disabled before training starts. - if self.gc_interval > 0: - gc.disable() - self.validation_global_step = 1 - - def _enable_nvidia_optimizations(self): - "These optimizations are present in NVIDIA NGC PyTorch Containers" - - # NVIDIA container version check - nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) - if nvidia_torch_version is not None: - try: - NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) - except Exception: - NVIDIA_TORCH_MAJOR = 0 - try: - NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) - except Exception: - NVIDIA_TORCH_MINOR = 0 - - # Apex Persistent layer norm is supported from Nvidia PyTorch container v21.11 - if NVIDIA_TORCH_MAJOR < 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR < 11): - self.cfg.persist_layer_norm = False - - if NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11): - # NVFUSER - torch._C._jit_set_profiling_executor(True) - torch._C._jit_set_profiling_mode(True) - torch._C._jit_override_can_fuse_on_cpu(False) - torch._C._jit_override_can_fuse_on_gpu(False) - torch._C._jit_set_texpr_fuser_enabled(False) - torch._C._jit_set_nvfuser_enabled(True) - torch._C._debug_set_autodiff_subgraph_inlining(False) - - else: - # Not a Nvidia container. NVFUSER Dependency check is on users - pass - - def _build_tokenizer(self): - """ - Default tokenizer is based on available nemo tokenizers. - Override this method to use an external tokenizer. - All tokenizers are expected to provide compatible interface. - Override default Encoder-decoder tokenizer to use legacy=True for sentencepiece. - """ - if hasattr(self._cfg.tokenizer, "sentencepiece_legacy"): - legacy = self._cfg.tokenizer.sentencepiece_legacy - else: - legacy = True if self._cfg.tokenizer.library == 'sentencepiece' else False - self.tokenizer = get_nmt_tokenizer( - library=self._cfg.tokenizer.library, - model_name=self._cfg.tokenizer.type, - tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.get('model', None)), - vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.get('vocab_file', None)), - merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.get('merge_file', None)), - use_fast=self.cfg.tokenizer.get('use_fast', False), - delimiter=self.cfg.tokenizer.get('delimiter', None), - legacy=legacy, - ) - - if self._cfg.tokenizer.get('additional_special_tokens', None) is not None: - tokens_list = omegaconf.OmegaConf.to_object(self._cfg.tokenizer.additional_special_tokens) - self.tokenizer.add_special_tokens({'additional_special_tokens': tokens_list}) - - def on_train_start(self) -> None: - super().on_train_start() - self.init_global_step = self.trainer.global_step - - def on_validation_start(self) -> None: - super().on_validation_start() - if self.gc_interval > 0: - gc.collect() - - def on_validation_end(self) -> None: - super().on_validation_end() - if self.gc_interval > 0: - gc.collect() - - def _build_vocab(self): - """ - Manipulate vocabulary (e.g., pad vocabulary for increased performance)/ - """ - # TODO: add config to allow to disable it? - self.padded_vocab_size = self._vocab_size_with_padding( - orig_vocab_size=self.tokenizer.vocab_size, - make_vocab_size_divisible_by=self._cfg.get('make_vocab_size_divisible_by', 128), - tensor_model_parallel_size=self._cfg.get('tensor_model_parallel_size', 1), - ) - - def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size): - """Pad vocab size so it is divisible by model parallel size and - still having GPU friendly size.""" - - after = orig_vocab_size - multiple = make_vocab_size_divisible_by * tensor_model_parallel_size - while (after % multiple) != 0: - after += 1 - logging.info( - f'Padded vocab_size: {after}, original vocab_size: {orig_vocab_size}, dummy tokens: {after - orig_vocab_size}.' - ) - return after - - def get_parameters_with_grad(self): - """ - Get all parameters with grad from optimizer param groups - """ - params = [] - for param_group in self._optimizer_param_groups: - for param in param_group['params']: - if ( - param.grad is not None - ): # (@adithyare) adapter training with pp>1 can result in params with no grads - params.append(param) - return params - - def configure_gradient_clipping(self, *args, **kwargs): - """PTL hook to configure gradients. - We use gradient clipping implementation from megatron-lm. - """ - clip_val = self.trainer.gradient_clip_val - if clip_val is None: - return - - clip_val = float(clip_val) - if clip_val <= 0: - return - - if self.grad_clip_pl_default: - # use the default behavior - return super().configure_gradient_clipping(*args, **kwargs) - - if self.with_distributed_adam: - grad_norm = clip_grad_norm_distributed_optimizer(self._optimizer, clip_val) - else: - if self.megatron_amp_O2: - # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters_with_grad() - else: - parameters = self.get_parameters_with_grad() - grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) - - self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) - - def allreduce_gradients(self): - """Reduce gradients across data parallel ranks. - Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/model/distributed.py#L188 - """ - # Bucketize and all-reduce - buckets = {} - for param in self.parameters(): - if param.requires_grad and param.grad is not None: - tp = param.data.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(param) - # param.main_grad = param.grad - - # For each bucket, all-reduce and copy all-reduced grads. - for tp in buckets: - bucket = buckets[tp] - grads = [param.grad.data for param in bucket] - coalesced = torch._utils._flatten_dense_tensors(grads) - coalesced /= parallel_state.get_data_parallel_world_size() - torch.distributed.all_reduce(coalesced, group=parallel_state.get_data_parallel_group()) - for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): - buf.copy_(synced) - - def reduce_overlap_gradients(self, params=None): - """Reduce grads if overlapped grad sync is enabled - - Used for pipeline parallelism with the distributed Adam - optimizer. In the first pipeline stage, the grad sync is - overlapped with the final backward pass. In other pipeline - stages, the grad sync is deferred until the bubble overhead. - - """ - if self.with_distributed_adam and self._optimizer.overlap_grad_sync: - if params is None: - params = self._optimizer.parameters() - self._optimizer.try_grad_sync(params) - - def sync_overlap_parameters(self, params=None): - if self.with_distributed_adam: - self._optimizer._try_start_bucket_param_sync(params) - - def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: - super().on_train_batch_end(outputs, dataloader_iter, batch_idx) - - # TODO: Replace with newer override for scheduler.step() instead of - # search for plugins for fp16 GradScalar - if self.trainer.precision_plugin is not None and isinstance( - self.trainer.precision_plugin, NativeMixedPrecisionPlugin - ): - precision_plugin = self.trainer.precision_plugin - - if ( - hasattr(precision_plugin, 'scaler') - and precision_plugin.scaler is not None - and isinstance(precision_plugin.scaler, GradScaler) - ): - grad_scaler = precision_plugin.scaler - - # If the grad scaler skipped its optimizer step due to infs/nans, - # decrement the step of all schedulers. - if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True: - scheduler_cfgs = self.trainer.lr_scheduler_configs - - if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization: - return - - for scheduler_cfg in scheduler_cfgs: - # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up - # as well as update the optimizer lr in all param groups - scheduler_cfg.scheduler.last_epoch -= 2 - scheduler_cfg.scheduler.step() - - # Increase the max step count by 1 - - # Reset the optimizer update skipped to `None` - this is to prevent scheduler no-ops during - # accumulated gradient updates. - grad_scaler.optimizer_update_skipped = None - - if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): - gc.collect() - - def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: - super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) - - if self.gc_interval > 0: - if self.validation_global_step % self.gc_interval == 0: - gc.collect() - self.validation_global_step += 1 - - def setup_optimization( - self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, - ): - optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() - if self.with_distributed_adam: - - # Make sure optimizer state is in FP32 - optim_dtype = torch.float32 - optim_kwargs['dtype'] = optim_dtype - - # Match param allgather with model dtype - model_dtype = torch.float32 - if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): - model_dtype = self.autocast_dtype - optim_kwargs['param_sync_dtype'] = model_dtype - - # Determine whether to store master params in optimizer - if optim_dtype == model_dtype: - optim_kwargs['store_params'] = False - elif optim_dtype == torch.float32 and model_dtype == torch.bfloat16: - optim_kwargs['store_params'] = False - optim_kwargs['store_param_remainders'] = True - else: - optim_kwargs['store_params'] = True - - return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) - - def configure_optimizers(self): - self.setup_optimization() - - # Wrap the baseline optimizer with the optimizer class with master parameters - if self.megatron_amp_O2 and not self.with_distributed_adam and self._optimizer is not None: - if self.cfg.precision == 'bf16': - fp32_grad_accum = True - contiguous_grad_bucket = True - elif self.cfg.precision == 16: - fp32_grad_accum = False - # TODO: contiguous grad bucket for fp16 is also planned to be supported - contiguous_grad_bucket = False - raise ValueError( - "fp16 training is not yet supported with O2. Please set megatron_amp_O2 to False in the model config." - ) - - # if using tensor parallel only, we automatically use async grad all-reduce - # if using pipeline parallel or sequence parallel or gradient accumulation fusion, then we disable it - # if self.cfg.get('pipeline_model_parallel_size', 1) == 1 and not ( - # self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) - # ): - # async_grad_allreduce = True - # else: - # async_grad_allreduce = False - - async_grad_allreduce = False - - if async_grad_allreduce: - # we need this to be configurable until make_nccl_premul_sum is in public PyTorch. - # currently cannot be imported in PyTorch 1.12.0 - grad_div_ar_fusion = self.cfg.get('grad_div_ar_fusion', False) - else: - grad_div_ar_fusion = False - - self._optimizer = MainParamsOptimizerWrapper( - self._optimizer, - fp32_grad_accum=fp32_grad_accum, - contiguous_grad_bucket=contiguous_grad_bucket, - async_grad_allreduce=async_grad_allreduce, - grad_div_ar_fusion=grad_div_ar_fusion, - grad_allreduce_chunk_size_mb=self.cfg.get('grad_allreduce_chunk_size_mb', 125), - ) - - assert self._trainer.max_steps is not None, "'max_steps' is missing in trainer config." - if hasattr(self._cfg.optim, 'sched'): - sched_config = self._cfg.optim.sched - sched_config['max_steps'] = self._trainer.max_steps - self._scheduler = prepare_lr_scheduler( - optimizer=self._optimizer, scheduler_config=sched_config, train_dataloader=self._train_dl - ) - - # Configure distributed optimizer - if self.with_distributed_adam: - - # Initialize param buckets if explicitly provided - if hasattr(self, 'distributed_adam_buckets'): - for bucket in self.distributed_adam_buckets: - self._optimizer.init_params_bucket(bucket) - del self.distributed_adam_buckets - - # Make sure all params are initialized so main grads are - # available - # Note: Consolidate grads without overlap - overlap_params = [] - no_overlap_params = [] - for p in self.parameters(): - if getattr(p, '_disable_overlap_grad_sync', False): - no_overlap_params.append(p) - else: - overlap_params.append(p) - self._optimizer.init_params(reversed(overlap_params)) - self._optimizer.init_params(reversed(no_overlap_params)) - - # Initialize contiguous parameter buffer - self._optimizer.init_param_buffer() - - if self._scheduler is None: - return self._optimizer - else: - return [self._optimizer], [self._scheduler] - - def compute_consumed_samples(self, steps_since_resume=0): - app_state = AppState() - consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() - ) - return int(consumed_samples) - - def _extract_consumed_samples_from_ckpt(self, ckpt_path): - try: - init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) - except (ValueError, TypeError, IndexError): - logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") - init_consumed_samples = 0 - - return init_consumed_samples - - def _validate_config(self): - """ Certain configurations might be incompatible or discouraged. We can check for them here.""" - - if self.cfg.get('sequence_parallel', False) and self.cfg.get('tensor_model_parallel_size', 1) == 1: - logging.info( - "Sequence parallel should only be used with tensor parallel size > 1. Setting sequence parallel to False" - ) - with open_dict(self.cfg): - self.cfg.sequence_parallel = False - - if ( - self.cfg.get('gradient_accumulation_fusion', False) - and self.cfg.get('pipeline_model_parallel_size', 1) == 1 - ): - logging.info("Gradient accumulation fusion can only be used with pipeline parallel size > 1.") - with open_dict(self.cfg): - self.cfg.gradient_accumulation_fusion = False - - if self.cfg.get('gradient_accumulation_fusion', False) and not self.cfg.get('megatron_amp_O2', False): - logging.info("Gradient accumulation fusion can only be used with megatron amp O2 mixed precision.") - with open_dict(self.cfg): - self.cfg.gradient_accumulation_fusion = False - - def is_data_parallel_rank_zero(self): - if is_global_rank_zero(): - return True - else: - try: - data_parallel_rank = parallel_state.get_data_parallel_rank() - except: - data_parallel_rank = None - - if data_parallel_rank is not None and data_parallel_rank == 0: - return True - else: - return False diff --git a/nemo/collections/vision/models/vision_base_model.py b/nemo/collections/vision/models/vision_base_model.py deleted file mode 100644 index a7eb67ff200c..000000000000 --- a/nemo/collections/vision/models/vision_base_model.py +++ /dev/null @@ -1,635 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -from typing import Any, Dict, Optional, Union - -import torch -from omegaconf import open_dict -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.core.saving import _load_state as ptl_load_state -from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml -from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator -from pytorch_lightning.trainer.trainer import Trainer -from pytorch_lightning.utilities.cloud_io import load as pl_load -from pytorch_lightning.utilities.migration import pl_legacy_patch -from transformers import TRANSFORMERS_CACHE - -from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION -from nemo.collections.nlp.modules.common.megatron.clip_grads import ( - clip_grad_norm_distributed_optimizer, - clip_grad_norm_fp32, -) -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.parts.nlp_overrides import ( - NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, - GradScaler, - NLPSaveRestoreConnector, -) -from nemo.core.classes import ModelPT -from nemo.core.classes.exportable import Exportable -from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler -from nemo.utils import AppState, logging -from nemo.utils.get_rank import is_global_rank_zero - -try: - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True -except (ImportError, ModuleNotFoundError): - HAVE_MEGATRON_CORE = False - - -__all__ = ['VisionModel', 'MegatronVisionModel'] - -NEMO_VISION_TMP = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), "nemo_vision_tmp") - -os.makedirs(NEMO_VISION_TMP, exist_ok=True) - - -class VisionModel(ModelPT, Exportable): - """Base class for Vision Models. - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer = None): - - super().__init__(cfg, trainer) - - # handles model parallel save and restore logic - self._save_restore_connector = NLPSaveRestoreConnector() - - # TODO(yuya): check below - @property - def input_module(self): - return self - - @property - def output_module(self): - return self - - @property - def is_model_parallel_initialized(self): - app_state = AppState() - if app_state.model_parallel_group is not None: - return True - else: - return False - - @classmethod - def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, - ): - """ - Loads ModelPT from checkpoint, with some maintenance of restoration. - For documentation, please refer to LightningModule.load_from_checkpoin() documentation. - """ - checkpoint = None - try: - cls._set_model_restore_state(is_being_restored=True) - # TODO: replace with proper PTL API - with pl_legacy_patch(): - if map_location is not None: - checkpoint = pl_load(checkpoint_path, map_location=map_location) - else: - checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) - - if hparams_file is not None: - extension = hparams_file.split(".")[-1] - if extension.lower() == "csv": - hparams = load_hparams_from_tags_csv(hparams_file) - elif extension.lower() in ("yml", "yaml"): - hparams = load_hparams_from_yaml(hparams_file) - else: - raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") - - hparams["on_gpu"] = False - - # overwrite hparams by the given file - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams - - # for past checkpoint need to add the new key - if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} - # override the hparams with values that were passed in - cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) - # TODO: can we do this without overriding? - config_kwargs = kwargs.copy() - if 'trainer' in config_kwargs: - config_kwargs.pop('trainer') - cfg.update(config_kwargs) - - if cfg.get('megatron_amp_O2', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('model.', 'model.module.', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if 'cfg' in kwargs: - model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) - else: - model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) - # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg - - checkpoint = model - - finally: - cls._set_model_restore_state(is_being_restored=False) - return checkpoint - - -class MegatronVisionModel(VisionModel): - """ - Megatron vision base class - - - Initialize the model parallel world for nemo. - - Turn on all of the nvidia optimizations. - - If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the - correct size for tensor model parallelism. - - If using distributed optimizer, configure to be compatible - with O2 level optimizations and/or model parallelism. - - Perform gradient clipping: `grad_clip_pl_default` triggers - the PyTorch Lightning default implementation, `with_distributed_adam` triggers - the distributed optimizer's implementation, `megatron_amp_O2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer): - - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - if trainer is None: - raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.") - - if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION: - raise ImportError( - "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention." - "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202." - ) - - super().__init__(cfg, trainer=trainer) - - self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' - - # used in NVIDIA NGC PyTorch containers - self._enable_nvidia_optimizations() - - if self._cfg.get('use_cpu_initialization', False) is False: - torch.cuda.set_device(trainer.local_rank) - - # buffer used during train_step for logging average loss over gradient accumulation steps - self._reduced_loss_buffer = [] - - # Overrides used when converting checkpoints - if os.environ.get(NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, "false").lower() == "true": - app_state = AppState() - init_world_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - init_global_rank = app_state.global_rank - init_local_rank = app_state.local_rank - else: - init_world_size = trainer.world_size - init_global_rank = trainer.global_rank - init_local_rank = trainer.local_rank - - initialize_model_parallel_for_nemo( - world_size=init_world_size, - global_rank=init_global_rank, - local_rank=init_local_rank, - tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), - pipeline_model_parallel_size=cfg.get('pipeline_model_parallel_size', 1), - virtual_pipeline_model_parallel_size=cfg.get('virtual_pipeline_model_parallel_size', None), - pipeline_model_parallel_split_rank=cfg.get('pipeline_model_parallel_split_rank', 0), - micro_batch_size=cfg.get('micro_batch_size'), - global_batch_size=cfg.get('global_batch_size'), - rampup_batch_size=cfg.get('rampup_batch_size'), - use_fp8=cfg.get('fp8', False), - init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), - seed=self.cfg.get('seed', 1234), - apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), - ) - - # This must be called after initialize model parallel since it needs to know the data parallel size - self._validate_and_override_config() - - self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False - - # TODO: remove this when PTL 1.7.3 is released - _FxValidator.functions["configure_gradient_clipping"] = { - "allowed_on_step": (False, True), - "allowed_on_epoch": (False, True), - "default_on_step": True, - "default_on_epoch": False, - } - - self.gc_interval = cfg.get('gc_interval', 0) - assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." - # If gc_interval > 0, memory garbage collection is manually controlled. - # The automatic garbage collector sould be disabled before training starts. - if self.gc_interval > 0: - gc.disable() - self.validation_global_step = 1 - - def _enable_nvidia_optimizations(self): - "These optimizations are present in NVIDIA NGC PyTorch Containers" - - # NVIDIA container version check - nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None) - if nvidia_torch_version is not None: - try: - NVIDIA_TORCH_MAJOR = int(nvidia_torch_version.split('.')[0]) - except Exception: - NVIDIA_TORCH_MAJOR = 0 - try: - NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1]) - except Exception: - NVIDIA_TORCH_MINOR = 0 - - # Apex Persistent layer norm is supported from Nvidia PyTorch container v21.11 - if NVIDIA_TORCH_MAJOR < 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR < 11): - self.cfg.persist_layer_norm = False - - if NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11): - # NVFUSER - torch._C._jit_set_profiling_executor(True) - torch._C._jit_set_profiling_mode(True) - torch._C._jit_override_can_fuse_on_cpu(False) - torch._C._jit_override_can_fuse_on_gpu(False) - torch._C._jit_set_texpr_fuser_enabled(False) - torch._C._jit_set_nvfuser_enabled(True) - torch._C._debug_set_autodiff_subgraph_inlining(False) - - else: - # Not a Nvidia container. NVFUSER Dependency check is on users - pass - - def on_train_start(self) -> None: - super().on_train_start() - self.init_global_step = self.trainer.global_step - - def on_validation_start(self) -> None: - super().on_validation_start() - if self.gc_interval > 0: - gc.collect() - - def on_validation_end(self) -> None: - super().on_validation_end() - if self.gc_interval > 0: - gc.collect() - - def get_parameters_with_grad(self): - """ - Get all parameters with grad from optimizer param groups - """ - params = [] - for param_group in self._optimizer_param_groups: - for param in param_group['params']: - if ( - param.grad is not None - ): # (@adithyare) adapter training with pp>1 can result in params with no grads - params.append(param) - return params - - def configure_gradient_clipping(self, *args, **kwargs): - """PTL hook to configure gradients. - We use gradient clipping implementation from megatron-lm. - """ - clip_val = self.trainer.gradient_clip_val - if clip_val is None: - return - - clip_val = float(clip_val) - if clip_val <= 0: - return - - if self.grad_clip_pl_default: - # use the default behavior - return super().configure_gradient_clipping(*args, **kwargs) - - if self.with_distributed_adam: - grad_norm = clip_grad_norm_distributed_optimizer(self._optimizer, clip_val) - else: - if self.megatron_amp_O2: - # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters_with_grad() - else: - parameters = self.get_parameters_with_grad() - grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) - - self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) - - def allreduce_gradients(self): - """Reduce gradients across data parallel ranks. - Modified from megatron-lm: https://github.com/NVIDIA/Megatron-LM/blob/d41696840ed0a7edb7e0499eb82a48ae112d9bb3/megatron/model/distributed.py#L188 - """ - # Bucketize and all-reduce - buckets = {} - for param in self.parameters(): - if param.requires_grad and param.grad is not None: - tp = param.data.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(param) - # param.main_grad = param.grad - - # For each bucket, all-reduce and copy all-reduced grads. - for tp in buckets: - bucket = buckets[tp] - grads = [param.grad.data for param in bucket] - coalesced = torch._utils._flatten_dense_tensors(grads) - coalesced /= parallel_state.get_data_parallel_world_size() - torch.distributed.all_reduce(coalesced, group=parallel_state.get_data_parallel_group()) - for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): - buf.copy_(synced) - - def reduce_overlap_gradients(self, params=None): - """Reduce grads if overlapped grad sync is enabled - - Used for pipeline parallelism with the distributed Adam - optimizer. In the first pipeline stage, the grad sync is - overlapped with the final backward pass. In other pipeline - stages, the grad sync is deferred until the bubble overhead. - - """ - if self.with_distributed_adam and self._optimizer.overlap_grad_sync: - if params is None: - params = self._optimizer.parameters() - self._optimizer.try_grad_sync(params) - - def sync_overlap_parameters(self, params=None): - if self.with_distributed_adam: - self._optimizer._try_start_bucket_param_sync(params) - - def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unused: Optional[int] = 0) -> None: - super().on_train_batch_end(outputs, dataloader_iter, batch_idx) - - # TODO: Replace with newer override for scheduler.step() instead of - # search for plugins for fp16 GradScalar - if self.trainer.precision_plugin is not None and isinstance( - self.trainer.precision_plugin, NativeMixedPrecisionPlugin - ): - precision_plugin = self.trainer.precision_plugin - - if ( - hasattr(precision_plugin, 'scaler') - and precision_plugin.scaler is not None - and isinstance(precision_plugin.scaler, GradScaler) - ): - grad_scaler = precision_plugin.scaler - - # If the grad scaler skipped its optimizer step due to infs/nans, - # decrement the step of all schedulers. - if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True: - scheduler_cfgs = self.trainer.lr_scheduler_configs - - if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization: - return - - for scheduler_cfg in scheduler_cfgs: - # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up - # as well as update the optimizer lr in all param groups - scheduler_cfg.scheduler.last_epoch -= 2 - scheduler_cfg.scheduler.step() - - # Increase the max step count by 1 - - # Reset the optimizer update skipped to `None` - this is to prevent scheduler no-ops during - # accumulated gradient updates. - grad_scaler.optimizer_update_skipped = None - - if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): - gc.collect() - - def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: - super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) - - if self.gc_interval > 0: - if self.validation_global_step % self.gc_interval == 0: - gc.collect() - self.validation_global_step += 1 - - def setup_optimization( - self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, - ): - optim_kwargs = {} if optim_kwargs is None else optim_kwargs.copy() - if self.with_distributed_adam: - - # Allocate contiguous buffers to avoid extra copies - optim_kwargs['contiguous_grad_buffer'] = True - optim_kwargs['contiguous_param_buffer'] = True - - # Make sure optimizer state is in FP32 - optim_dtype = torch.float32 - optim_kwargs['dtype'] = optim_dtype - - # Make sure embedding grad reductions are in FP32 - for name, param in self.named_parameters(): - if 'word_embedding' in name or 'position_embedding' in name: - param._with_fp32_optimizer = True - - # Match param allgather with model dtype - model_dtype = torch.float32 - if self.megatron_amp_O2 and hasattr(self, 'autocast_dtype'): - model_dtype = self.autocast_dtype - optim_kwargs['param_sync_dtype'] = model_dtype - - # Determine whether to store master params in optimizer - if optim_dtype == model_dtype: - optim_kwargs['store_params'] = False - elif optim_dtype == torch.float32 and model_dtype == torch.bfloat16: - optim_kwargs['store_params'] = False - optim_kwargs['store_param_remainders'] = True - else: - optim_kwargs['store_params'] = True - - return super().setup_optimization(optim_config=optim_config, optim_kwargs=optim_kwargs) - - def configure_optimizers(self): - self.setup_optimization() - - # Wrap the baseline optimizer with the optimizer class with master parameters - if self.megatron_amp_O2 and not self.with_distributed_adam and self._optimizer is not None: - if self.cfg.precision == 'bf16': - fp32_grad_accum = True - contiguous_grad_bucket = True - elif self.cfg.precision == 16: - fp32_grad_accum = False - # TODO: contiguous grad bucket for fp16 is also planned to be supported - contiguous_grad_bucket = False - raise ValueError( - "fp16 training is not yet supported with O2. Please set megatron_amp_O2 to False in the model config." - ) - - # if using tensor parallel only, we automatically use async grad all-reduce - # if using pipeline parallel or sequence parallel or gradient accumulation fusion, then we disable it - # if self.cfg.get('pipeline_model_parallel_size', 1) == 1 and not ( - # self.cfg.get('sequence_parallel', False) or self.cfg.get('gradient_accumulation_fusion', False) - # ): - # async_grad_allreduce = True - # else: - # async_grad_allreduce = False - - async_grad_allreduce = False - - if async_grad_allreduce: - # we need this to be configurable until make_nccl_premul_sum is in public PyTorch. - # currently cannot be imported in PyTorch 1.12.0 - grad_div_ar_fusion = self.cfg.get('grad_div_ar_fusion', False) - else: - grad_div_ar_fusion = False - - self._optimizer = MainParamsOptimizerWrapper( - self._optimizer, - fp32_grad_accum=fp32_grad_accum, - contiguous_grad_bucket=contiguous_grad_bucket, - async_grad_allreduce=async_grad_allreduce, - grad_div_ar_fusion=grad_div_ar_fusion, - grad_allreduce_chunk_size_mb=self.cfg.get('grad_allreduce_chunk_size_mb', 125), - ) - - assert self._trainer.max_steps is not None, "'max_steps' is missing in trainer config." - if hasattr(self._cfg.optim, 'sched'): - sched_config = self._cfg.optim.sched - sched_config['max_steps'] = self._trainer.max_steps - self._scheduler = prepare_lr_scheduler( - optimizer=self._optimizer, scheduler_config=sched_config, train_dataloader=self._train_dl - ) - - # Configure distributed optimizer - if self.with_distributed_adam: - - # Initialize param buckets if explicitly provided - if hasattr(self, 'distributed_adam_buckets'): - for bucket in self.distributed_adam_buckets: - self._optimizer.init_params_bucket(bucket) - del self.distributed_adam_buckets - - # Make sure all params are initialized so main grads are - # available - # Note: Consolidate grads without overlap - overlap_params = [] - no_overlap_params = [] - for p in self.parameters(): - if getattr(p, '_disable_overlap_grad_sync', False): - no_overlap_params.append(p) - else: - overlap_params.append(p) - self._optimizer.init_params(reversed(overlap_params)) - self._optimizer.init_params(reversed(no_overlap_params)) - - # Initialize contiguous parameter buffer - self._optimizer.init_param_buffer() - - if self._scheduler is None: - return self._optimizer - else: - return [self._optimizer], [self._scheduler] - - def compute_consumed_samples(self, steps_since_resume=0): - app_state = AppState() - consumed_samples = ( - self.init_consumed_samples - + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches() - ) - return int(consumed_samples) - - def _extract_consumed_samples_from_ckpt(self, ckpt_path): - try: - init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) - except (ValueError, TypeError, IndexError): - logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") - init_consumed_samples = 0 - - return init_consumed_samples - - def _validate_and_override_config(self): - """ Certain configurations might be incompatible or discouraged. - We can check for them here and override if necessary. - """ - app_state = AppState() - - if self.cfg.get('sequence_parallel', False) and self.cfg.get('tensor_model_parallel_size', 1) == 1: - logging.info( - "Sequence parallel should only be used with tensor parallel size > 1. Setting sequence parallel to False" - ) - with open_dict(self.cfg): - self.cfg.sequence_parallel = False - - # Gradient accumulation fusion does not work with our baseline implementaiton of - # async grad allreduce. This should be fixed! - # For now we must disable it whenever using the baseline implementaion. - # The distributed adam from apex does work with gradient accumulation fusion. - distributed_fused_adam = self.cfg.optim.get('name', 'fused_adam') == 'distributed_fused_adam' - pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) - data_parallel_size = app_state.data_parallel_size - - if self.cfg.get('gradient_accumulation_fusion', False): - if data_parallel_size > 1 and pipeline_model_parallel_size == 1 and not distributed_fused_adam: - logging.info( - "When not using pipeline model parallel, gradient accumulation fusion can only be used with distributed_fused_adam." - ) - with open_dict(self.cfg): - self.cfg.gradient_accumulation_fusion = False - - if self.cfg.get('gradient_accumulation_fusion', False) and not self.cfg.get('megatron_amp_O2', False): - logging.info("Gradient accumulation fusion can only be used with megatron amp O2 mixed precision.") - with open_dict(self.cfg): - self.cfg.gradient_accumulation_fusion = False - - if self.cfg.get('use_emha', False): - raise ValueError('use_emha is not yet supported please set to False') - - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: - assert ( - self.cfg.num_layers // self.cfg.pipeline_model_parallel_size - ) % self.cfg.virtual_pipeline_model_parallel_size == 0, ( - 'Make sure the number of model chunks is the same across all pipeline stages.' - ) - - if self.cfg.get('ub_tp_comm_overlap', False): - if not self.cfg.get('transformer_engine', False) or not self.cfg.get('sequence_parallel', False): - logging.info( - "Userbuffer tensor-parallel communication overlap is available with both Transformer Engine and sequence-parallelism." - ) - with open_dict(self.cfg): - self.cfg.ub_tp_comm_overlap = False - - def is_data_parallel_rank_zero(self): - if is_global_rank_zero(): - return True - else: - try: - data_parallel_rank = parallel_state.get_data_parallel_rank() - except: - data_parallel_rank = None - - if data_parallel_rank is not None and data_parallel_rank == 0: - return True - else: - return False From 61569c1eebdb4d74c75460fc349befdae1512bea Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 20 Sep 2023 13:35:24 -0700 Subject: [PATCH 220/512] Fix CI inductor issue and update to torch compile --- examples/multimodal/generative/dreambooth/dreambooth.py | 1 - examples/multimodal/generative/imagen/imagen_training.py | 2 +- .../multimodal/models/controlnet/controlnet.py | 4 +++- .../multimodal/models/dreambooth/dreambooth.py | 7 ++++--- .../multimodal/models/stable_diffusion/ldm/ddpm.py | 8 +++++++- .../multimodal/modules/imagen/sampler/continuous_ddpm.py | 1 - .../multimodal/modules/stable_diffusion/attention.py | 5 ++++- .../stable_diffusion/diffusionmodules/openaimodel.py | 5 ++++- 8 files changed, 23 insertions(+), 10 deletions(-) diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index 7cd1abb6f0ab..f3476b481f0f 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -110,5 +110,4 @@ def main(cfg): if __name__ == '__main__': - torch.multiprocessing.set_start_method('spawn') main() diff --git a/examples/multimodal/generative/imagen/imagen_training.py b/examples/multimodal/generative/imagen/imagen_training.py index 54053e20cc78..66a1f0aedefb 100644 --- a/examples/multimodal/generative/imagen/imagen_training.py +++ b/examples/multimodal/generative/imagen/imagen_training.py @@ -17,7 +17,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector -from torch._dynamo import disable, optimize +from torch._dynamo import disable from torch._inductor import config as inductor_config from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py index bffbd36c99c8..2fb1c834ac1a 100644 --- a/nemo/collections/multimodal/models/controlnet/controlnet.py +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -93,7 +93,9 @@ def __init__(self, cfg, model_parallel_config): if cfg.get("inductor", False): # TorchInductor with CUDA graph can lead to OOM inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) - self.control_model = optimize("inductor")(self.control_model) + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False + self.control_model = torch.compile(self.control_model) if self.channels_last: self.control_model = self.control_model.to(memory_format=torch.channels_last) diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index 04d8a4172ed9..6a5bf4c503b5 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -22,7 +22,6 @@ from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.utilities import GradClipAlgorithmType -from torch._dynamo import optimize from torch._inductor import config as inductor_config from torch.optim.lr_scheduler import LambdaLR @@ -117,8 +116,10 @@ def instantiate_unet(self, cfg): self.unet.train() if self.inductor: # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = cfg.inductor_cudagraphs - self.unet = optimize("inductor")(self.unet) + inductor_config.triton.cudagraphs = self.inductor_cudagraphs + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False + self.unet = torch.compile(self.unet) def instantiate_vae(self, cfg): model = DreamBooth.from_config_dict(cfg) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 19e0c2aa1f96..24d6ad5d5ef5 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -89,6 +89,10 @@ __conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} +import torch._dynamo + +torch._dynamo.config.suppress_errors = True + def random_dropout(embeddings, drop_rate): r""" @@ -2126,8 +2130,10 @@ def __init__( # Fusing VAE and CLIP doesn't give benefit if inductor: # TorchInductor with CUDA graph can lead to OOM + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False inductor_config.triton.cudagraphs = inductor_cudagraphs - self.diffusion_model = optimize("inductor")(self.diffusion_model) + self.diffusion_model = torch.compile(self.diffusion_model) # CUDA graph self.capture_cudagraph_iters = capture_cudagraph_iters self.iterations = 0 diff --git a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py index 27b1732f1e28..2b48f28ce9c9 100644 --- a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py +++ b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py @@ -15,7 +15,6 @@ from functools import partial, wraps import torch -import torch._dynamo import torch.nn as nn from einops import repeat from torch.special import expm1 diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index 6393d9f24b93..22a8271daf3a 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -318,7 +318,10 @@ def __init__( self.use_checkpoint = use_checkpoint def forward(self, x, context=None): - return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) + if self.use_checkpoint: + return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) + else: + return self._forward(x, context) def _forward(self, x, context=None): x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 2c4136faea8d..515544c22028 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -255,7 +255,10 @@ def forward(self, x, emb): :param emb: an [N x emb_channels] Tensor of timestep embeddings. :return: an [N x C x ...] Tensor of outputs. """ - return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + if self.use_checkpoint: + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + else: + return self._forward(x, emb) def _forward(self, x, emb): if self.updown: From 9fe3784e5dfbd0843c4cd867c37f587c71e90248 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 20 Sep 2023 13:55:05 -0700 Subject: [PATCH 221/512] Remove suppress error --- .../multimodal/models/stable_diffusion/ldm/ddpm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index 24d6ad5d5ef5..c15dcb84f3bd 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -89,10 +89,6 @@ __conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} -import torch._dynamo - -torch._dynamo.config.suppress_errors = True - def random_dropout(embeddings, drop_rate): r""" From 880161136e3dd41fac2f95202d6b4162534d68be Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Thu, 21 Sep 2023 12:40:11 -0700 Subject: [PATCH 222/512] Fix when conversion config uses fp16 and it complains about precision plugin --- examples/multimodal/convert_ckpt_to_nemo.py | 16 ++++++++++------ .../multimodal/models/dreambooth/dreambooth.py | 1 + .../models/stable_diffusion/ldm/ddpm.py | 5 +++++ .../language_modeling/megatron_base_model.py | 2 +- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py index e5f1f628c430..d640e0c11ede 100644 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -28,6 +28,7 @@ from argparse import ArgumentParser import torch +from omegaconf.omegaconf import OmegaConf, open_dict from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.trainer import Trainer @@ -39,6 +40,7 @@ from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector from nemo.utils import AppState, logging from nemo.utils.distributed import initialize_distributed @@ -101,12 +103,14 @@ def convert(local_rank, rank, world_size, args): app_state = AppState() app_state.data_parallel_rank = 0 num_nodes = world_size // args.gpus_per_node - if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + cfg = OmegaConf.load(args.hparams_file) + with open_dict(cfg): + cfg['model'] = cfg['cfg'] + cfg['trainer'] = {'precision': cfg['model']['precision']} + if args.bcp: + cfg['cluster_type'] = 'BCP' + trainer = MegatronTrainerBuilder(cfg).create_trainer() app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size app_state.tensor_model_parallel_size = args.tensor_model_parallel_size diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index 6a5bf4c503b5..c6f2d810c297 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -522,6 +522,7 @@ def parameters(self): else: return self.model.parameters() + @classmethod def load_from_checkpoint( cls, checkpoint_path: str, diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index c15dcb84f3bd..fe91e0766e36 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -21,9 +21,13 @@ import torch import torch.nn as nn from einops import rearrange, repeat +from lightning_fabric.utilities.cloud_io import _load as pl_load from omegaconf import DictConfig, OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml +from pytorch_lightning.utilities.migration import pl_legacy_patch from pytorch_lightning.utilities.rank_zero import rank_zero_only from torch._dynamo import optimize from torch._inductor import config as inductor_config @@ -1995,6 +1999,7 @@ def save_to(self, save_path: str): self._cfg = cfg super().save_to(save_path) + @classmethod def load_from_checkpoint( cls, checkpoint_path: str, diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 1637e70dacc5..b6009e830064 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -784,7 +784,7 @@ def build_model_parallel_config(self) -> ModelParallelConfig: "async_tensor_model_parallel_allreduce": self.cfg.get('tensor_model_parallel_world_size', 1) > 1 and not self.cfg.get('sequence_parallel', False), "pipeline_dtype": pipeline_dtype, - "grad_scale_func": self.trainer.precision_plugin.scaler.scale + "grad_scale_func": self.trainer.precision_plugin.scaler._scale if self.torch_dtype == torch.float16 else None, "enable_autocast": not megatron_amp_O2 and self.torch_dtype in [torch.bfloat16, torch.float16], From 3c331936707e2bdfd2703c7ddcf1fb458a9f4664 Mon Sep 17 00:00:00 2001 From: Parth Mannan Date: Fri, 29 Sep 2023 14:58:09 -0700 Subject: [PATCH 223/512] Fixing FAv2 API usage --- .../multimodal/modules/stable_diffusion/attention.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index 22a8271daf3a..07f37ece3d6f 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -204,11 +204,11 @@ def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0. self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.use_flash_attention = use_flash_attention - if dim_head <= 128 and (dim_head % 8) == 0 and flash_attn_installed: + if dim_head <= 160 and (dim_head % 8) == 0 and flash_attn_installed: if context_dim == query_dim: - self.flash_attn = FlashSelfAttention(self.scale) + self.flash_attn = FlashSelfAttention(softmax_scale=self.scale) else: - self.flash_attn = FlashCrossAttention(self.scale) + self.flash_attn = FlashCrossAttention(softmax_scale=self.scale) def forward(self, x, context=None, mask=None): h = self.heads @@ -229,7 +229,7 @@ def _attention(self, q, k, v, mask=None): not flash_attn_installed or not self.use_flash_attention or q.dtype == torch.float32 - or (self.dim_head > 128 or (self.dim_head % 8) != 0) + or (self.dim_head > 160 or (self.dim_head % 8) != 0) or mask is not None ): # original implementation @@ -396,7 +396,6 @@ def forward(self, x, context=None): if not self.use_linear: x = self.proj_in(x) x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c - # x = x.contiguous() # workaround for dynamo ddp bug if self.use_linear: x = self.proj_in(x) for i, block in enumerate(self.transformer_blocks): @@ -404,7 +403,6 @@ def forward(self, x, context=None): if self.use_linear: x = self.proj_out(x) x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w - # x = x.contiguous() # workaround for dynamo ddp bu if not self.use_linear: x = self.proj_out(x) return x + x_in From f24a22442cb5ea64e765b128691192d262e1435d Mon Sep 17 00:00:00 2001 From: Lukasz Pierscieniewski Date: Tue, 3 Oct 2023 10:21:56 -0700 Subject: [PATCH 224/512] Initial release of content filtering model --- .../nsfw/conf/megatron_nsfw_config.yaml | 230 ++++++++++ .../nsfw/conf/megatron_nsfw_export.yaml | 15 + .../nsfw/conf/megatron_nsfw_infer.yaml | 12 + .../nsfw/megatron_nsfw_export.py | 111 +++++ .../nsfw/megatron_nsfw_infer.py | 78 ++++ .../nsfw/megatron_nsfw_pretrain.py | 60 +++ .../multimodal/data/nsfw/__init__.py | 13 + .../multimodal/data/nsfw/nsfw_dataset.py | 53 +++ .../models/content_filter/__init__.py | 13 + .../megatron_nsfw_clip_models.py | 398 ++++++++++++++++++ 10 files changed, 983 insertions(+) create mode 100644 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml create mode 100644 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml create mode 100755 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml create mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py create mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py create mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py create mode 100644 nemo/collections/multimodal/data/nsfw/__init__.py create mode 100644 nemo/collections/multimodal/data/nsfw/nsfw_dataset.py create mode 100644 nemo/collections/multimodal/models/content_filter/__init__.py create mode 100644 nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml new file mode 100644 index 000000000000..11dc65155cf5 --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml @@ -0,0 +1,230 @@ +name: megatron_clip +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + max_epochs: 10 + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + use_distributed_sampler: False + check_val_every_n_epoch: 1 + limit_val_batches: 1.0 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_nsfw + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + output_dim: 768 + # As the number of devices used to train increases, so does the space complexity of + # the logit matrix. Using a naïve all-gather scheme, space complexity will be + # `O(n^2)`. Instead, complexity may become effectively linear if the flags + # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one + # numerical results as the naïve method. + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: ${trainer.precision} + patch_dim: 14 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: false + output_dim: ${model.output_dim} + class_token_length: 1 + preprocess_layernorm: true + encoder_seq_length: 196 + max_position_embeddings: 196 + position_embedding_type: learned_parameters + num_layers: 24 + hidden_size: 1024 + ffn_hidden_size: 4096 + num_attention_heads: 16 + init_method_std: 0.02 + use_scaled_init_method: true + hidden_dropout: 0.0 + attention_dropout: 0.0 + kv_channels: null + apply_query_key_layer_scaling: true + normalization: layernorm + layernorm_epsilon: 1.0e-05 + do_layer_norm_weight_decay: false + pre_process: true + post_process: true + persist_layer_norm: true + activations_checkpoint_granularity: null + activations_checkpoint_method: null + activations_checkpoint_num_layers: null + sequence_parallel: false + native_amp_init_scale: 4294967296 + native_amp_growth_interval: 1000 + hysteresis: 2 + fp32_residual_connection: false + fp16_lm_cross_entropy: false + masked_softmax_fusion: true + bias_dropout_add_fusion: true + use_cpu_initialization: false + onnx_safe: false + gradient_accumulation_fusion: false + openai_gelu: false + bias_activation_fusion: false + megatron_legacy: true + activation: quick-gelu + + text: + precision: ${trainer.precision} + # text configs + output_dim: ${model.output_dim} + + encoder_seq_length: 77 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 + num_attention_heads: 12 + init_method_std: 0.02 + use_scaled_init_method: true + hidden_dropout: 0.0 + attention_dropout: 0.0 + kv_channels: null + apply_query_key_layer_scaling: true + normalization: layernorm + layernorm_epsilon: 1.0e-05 + do_layer_norm_weight_decay: false + pre_process: true + post_process: true + persist_layer_norm: true + activations_checkpoint_granularity: null + activations_checkpoint_method: null + activations_checkpoint_num_layers: null + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: false + native_amp_init_scale: 4294967296 + native_amp_growth_interval: 1000 + hysteresis: 2 + fp32_residual_connection: false + fp16_lm_cross_entropy: false + masked_softmax_fusion: true + bias_dropout_add_fusion: true + use_cpu_initialization: false + onnx_safe: false + gradient_accumulation_fusion: false + openai_gelu: false + bias_activation_fusion: false + megatron_legacy: true + transformer_engine: false + fp8: false + fp8_e4m3: false + fp8_hybrid: false + fp8_margin: 0 + fp8_interval: 1 + fp8_amax_history_len: 1 + fp8_amax_compute_algo: most_recent + use_emha: false + activation: quick-gelu + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + sim_hidden_dim: 64 + cls_hidden_dim: 64 + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 8 + train: + dataset_path: /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: adam + lr: 1e-3 + weight_decay: 0.0 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 1e-5 + concepts: ??? + diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml new file mode 100644 index 000000000000..19f5d8ee6e67 --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml @@ -0,0 +1,15 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +infer: + out_path: "megatron-nsfw" + max_batch_size: 64 + max_dim: 224 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml new file mode 100755 index 000000000000..f78eba0bdc96 --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml @@ -0,0 +1,12 @@ +image_path: ??? # Path to a image for inference + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py new file mode 100644 index 000000000000..344e5b492dbe --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py @@ -0,0 +1,111 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Dict, List, Optional + +import torch +from omegaconf.omegaconf import OmegaConf + +from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType +from nemo.utils import logging +from nemo.utils.trt_utils import build_engine + + +class ContentFilteringWrapper(MegatronContentFilteringModel): + def __init__(self, cfg, trainer): + super(ContentFilteringWrapper, self).__init__(cfg, trainer) + + def forward(self, image: torch.Tensor): + return super().forward(image, mlp_factor=1.0, emb_factor=1.0).sigmoid() + + def input_example(self, max_batch: int = 64, max_dim: int = 224): + device = next(self.parameters()).device + return (torch.randn(max_batch, 3, max_dim, max_dim, device=device),) + + @property + def input_names(self) -> List[str]: + return ["images"] + + @property + def output_names(self) -> List[str]: + return ["nsfw_probs"] + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return {"images": NeuralType(("B", "C", "H", "W"), ChannelType())} + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"nsfw_probs": NeuralType(("B",), ChannelType())} + + +def set_envvar(): + os.environ["LOCAL_RANK"] = os.environ.get("LOCAL_RANK", "0") + os.environ["RANK"] = os.environ.get("RANK", "0") + os.environ["LOCAL_SIZE"] = os.environ.get("LOCAL_SIZE", "1") + os.environ["WORLD_SIZE"] = os.environ.get("WORLD_SIZE", "1") + + +@hydra_runner(config_path="conf", config_name="megatron_nsfw_export") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + set_envvar() + + output_dir = cfg.infer.out_path + max_batch_size = cfg.infer.max_batch_size + trt_precision = cfg.trainer.precision + cfg.trainer.precision = 32 + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=ContentFilteringWrapper, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + + bs1_example = model.input_example(max_batch=1, max_dim=cfg.infer.max_dim) + bsmax_example = model.input_example(max_batch=max_batch_size, max_dim=cfg.infer.max_dim) + + os.makedirs(f"{output_dir}/onnx", exist_ok=True) + model.export(f"{output_dir}/onnx/nsfw.onnx", dynamic_axes={"images": {0: "B"}}, input_example=bsmax_example) + + input_profile = { + "images": [tuple(bs1_example[0].shape), tuple(bsmax_example[0].shape), tuple(bsmax_example[0].shape),] + } + + build_engine( + f"{output_dir}/onnx/nsfw.onnx", + f"{output_dir}/plan/nsfw.plan", + fp16=(trt_precision == 16), + input_profile=input_profile, + timing_cache=None, + workspace_size=0, + ) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py new file mode 100644 index 000000000000..d6b4bed6d01a --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py @@ -0,0 +1,78 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from omegaconf.omegaconf import OmegaConf +from PIL import Image + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +def _get_autocast_dtype(precision: str): + if precision in ["bf16", "bf16-mixed"]: + return torch.bfloat16 + if precision in [32, "32", "32-true"]: + return torch.float + if precision in [16, "16", "16-mixed"]: + return torch.half + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + +@hydra_runner(config_path="conf", config_name="megatron_nsfw_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronContentFilteringModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, + ) + image_transform_fn = image_transform( + (model.cfg.vision.img_h, model.cfg.vision.img_w), + is_train=False, + mean=model.cfg.vision.image_mean, + std=model.cfg.vision.image_std, + resize_longest_max=True, + ) + + autocast_dtype = _get_autocast_dtype(trainer.precision) + image = Image.open(cfg.image_path).convert('RGB') + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + image = image_transform_fn(image).unsqueeze(0).cuda() + probability = model(image).sigmoid() + + if is_global_rank_zero: + print("Given image's NSFW probability: ", probability.cpu().item()) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py new file mode 100644 index 000000000000..51ccd596431a --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_nsfw_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( + "Gradient accumulation is not supported in CLIP yet." + ) + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronContentFilteringModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + if "save_path" in cfg.model: + logging.info(f"Saving model to path: {cfg.model.save_path}") + model.save_to(cfg.model.save_path) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/data/nsfw/__init__.py b/nemo/collections/multimodal/data/nsfw/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/nsfw/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py b/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py new file mode 100644 index 000000000000..de2406012fbc --- /dev/null +++ b/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py @@ -0,0 +1,53 @@ +import pathlib +from typing import Callable, List, Optional, Tuple + +import torch +from omegaconf.dictconfig import DictConfig +from PIL import Image + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform + + +class DirectoryBasedDataset(torch.utils.data.Dataset): + def __init__(self, path: str, transform: Optional[Callable] = None): + super(DirectoryBasedDataset, self).__init__() + + self._transform = transform + self._samples = self._get_files(path, "nsfw", 1) + self._get_files(path, "safe", 0) + + def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]: + if index >= len(self): + raise IndexError(f"Index {index} ot of bound {len(self)}") + + sample_path, category = self._samples[index] + + image = Image.open(sample_path) + + if self._transform is not None: + image = self._transform(image) + + return image, category + + def __len__(self) -> int: + return len(self._samples) + + def _get_files(self, path: str, subdir: str, category: int) -> List[Tuple[str, int]]: + globpath = pathlib.Path(path) / subdir + return [(x, category) for x in globpath.glob("*.*")] + + +def build_dataset(model_cfg: DictConfig, consumed_samples: int, is_train: bool): + img_fn = image_transform( + (model_cfg.vision.img_h, model_cfg.vision.img_w), + is_train=False, + mean=model_cfg.vision.image_mean, + std=model_cfg.vision.image_std, + resize_longest_max=True, + ) + + if is_train: + path = model_cfg.data.train.dataset_path + else: + path = model_cfg.data.validation.dataset_path + + return DirectoryBasedDataset(path, transform=img_fn) diff --git a/nemo/collections/multimodal/models/content_filter/__init__.py b/nemo/collections/multimodal/models/content_filter/__init__.py new file mode 100644 index 000000000000..9ff638194e7a --- /dev/null +++ b/nemo/collections/multimodal/models/content_filter/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py new file mode 100644 index 000000000000..d49e2bfafe6b --- /dev/null +++ b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py @@ -0,0 +1,398 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import itertools +from typing import List, Optional, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from apex.transformer.pipeline_parallel.utils import get_num_microbatches +from megatron.core import parallel_state +from megatron.core.pipeline_parallel.schedules import get_forward_backward_func +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.data.clip.clip_dataset import tokenize +from nemo.collections.multimodal.data.nsfw.nsfw_dataset import build_dataset +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPTextTransformer, CLIPVisionTransformer +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + + +class ContentFilteringModel(MegatronModule): + """Clip based content filtering model for NSFW.""" + + def __init__(self, model_cfg: DictConfig, model_parallel_config, padded_vocab_size: int, tokenizer: Optional): + super(ContentFilteringModel, self).__init__() + self.cfg = model_cfg + self.config = model_parallel_config + self.tokenizer = tokenizer + + self.concept_list = self._load_concept_list(model_cfg.concepts) + self.concept_count = len(self.concept_list) + + self.vision_encoder = CLIPVisionTransformer( + model_cfg.vision, model_parallel_config, pre_process=True, post_process=True + ) + + if "text" in model_cfg and model_cfg.text is not None: + self.text_encoder = CLIPTextTransformer( + model_cfg.text, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True + ) + else: + self.text_encoder = None + + self.mlp_similarity_model = nn.Sequential( + nn.Linear(model_cfg.output_dim * 2, model_cfg.sim_hidden_dim), + nn.ReLU(), + nn.Linear(model_cfg.sim_hidden_dim, 1), + ) + + self.nn_classifier = nn.Sequential( + nn.Linear(self.concept_count * 2 + model_cfg.output_dim, model_cfg.cls_hidden_dim), + nn.ReLU(), + nn.Linear(model_cfg.cls_hidden_dim, 1), + ) + + self.register_buffer("concepts", torch.zeros(self.concept_count, model_cfg.output_dim)) + + def initialize_concept_embeddings(self, concepts: torch.Tensor): + if self.text_encoder is None: + return + + self.concepts.copy_(concepts.detach()) + del self.text_encoder + self.text_encoder = None + + def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: + """Perform model forward pass for given image and factor. + While inferencing, factors should be equal to default value + """ + + with torch.no_grad(): + embedding = self.vision_encoder(image).detach() + cos_similarity = self.cosine_similarity(embedding, self.concepts) + mlp_similarity = self.mlp_similarity(embedding, self.concepts) + + features = torch.cat([cos_similarity, mlp_similarity * mlp_factor, embedding * emb_factor], dim=-1) + + return self.nn_classifier(features) + + def cosine_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + """Compute cosine similarity between prediction tensor and target tensor + Args: + prediction: Tensor of shape [X, H] for prediction embedding + target: Tensor of shape [Y, H] for target to compare + Returns: + Similarity matrix of shape [X, Y] and value range [-1, 1] + """ + normalized_prediction = F.normalize(prediction) + normalized_target = F.normalize(target) + + return torch.matmul(normalized_prediction, normalized_target.t()) + + def mlp_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + """Compute mlp based similarity between prediction tensor and target tensor + Args: + prediction: Tensor of shape [X, H] for prediction embedding + target: Tensor of shape [Y, H] for target to compare + Returns: + Similarity matrix of shape [X, Y] and value range [-1, 1] + """ + + prediction, target = torch.broadcast_tensors(prediction.unsqueeze(1), target.unsqueeze(0)) + + combined = torch.cat([prediction, target], dim=-1) + + return torch.tanh(self.mlp_similarity_model(combined).squeeze(-1)) + + def set_input_tensor(self, input_tensor: torch.Tensor): + pass + + def _load_concept_list(self, config: Union[str, List[str]]) -> List[str]: + if isinstance(config, str): + config = [config] + + result_list = [] + for concept_file in config: + with open(concept_file, "r") as f: + result_list += [x.strip() for x in f.readlines() if x.strip() != ""] + + return result_list + + +def _get_autocast_dtype(precision: str): + if precision in ["bf16", "bf16-mixed"]: + return torch.bfloat16 + if precision in [32, "32", "32-true"]: + return torch.float + if precision in [16, "16", "16-mixed"]: + return torch.half + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + +class MegatronContentFilteringModel(MegatronBaseModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + super(MegatronContentFilteringModel, self).__init__(cfg, trainer) + + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=isinstance(self.trainer.accelerator, CPUAccelerator), + virtual_pipeline_model_parallel_size=None, + ) + self.model = self.model[0] + + self.megatron_amp_O2 = cfg.get("megatron_amp_O2", False) + if self.megatron_amp_O2: + if isinstance(self.model, list): + self.model = [ + Float16Module(config=self.model_parallel_config, module=x, precision=cfg.precision) + for x in self.model + ] + else: + self.model = Float16Module( + config=self.model_parallel_config, module=self.model, precision=cfg.precision + ) + + self.autocast_dtype = _get_autocast_dtype(self.trainer.precision) + self.enable_autocast = (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) + + self.init_consumed_samples = 0 + self.mlp_factor = 1.0 + self.emb_factor = 1.0 + + self.validation_metrics = None + + def get_module_list(self): + if isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process, post_process): + return ContentFilteringModel(self.cfg, self.model_parallel_config, self.padded_vocab_size, self.tokenizer) + + def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: + return self.model(image, mlp_factor, emb_factor) + + def get_forward_output_and_loss_func(self, with_accuracy: bool = False): + def loss_fn(prediction: torch.Tensor, target: torch.Tensor): + loss = F.binary_cross_entropy_with_logits(prediction, target) + out_dict = {"loss": loss} + + if with_accuracy: + accuracy_components = torch.stack( + [ + ((prediction > 0) & (target == 1.0)).sum(), # tp + ((prediction < 0) & (target == 0.0)).sum(), # tn + ((prediction > 0) & (target == 0.0)).sum(), # fp + ((prediction < 0) & (target == 1.0)).sum(), # fn + ] + ) + out_dict["accuracy"] = accuracy_components + + return loss, out_dict + + def forward_step(dataloader_iter, model): + images, labels = next(dataloader_iter) + + if ( + parallel_state.get_pipeline_model_parallel_world_size() == 1 + or parallel_state.is_pipeline_first_stage() + ): + images = images.cuda(non_blocking=True) + labels = labels.cuda(non_blocking=True) + else: + images, labels = None, None + + classification = model(images, mlp_factor=self.mlp_factor, emb_factor=self.emb_factor) + + return classification.squeeze(-1), functools.partial(loss_fn, target=labels.float()) + + return forward_step + + def get_forward_embedding_func(self): + def forward_step(dataloader_iter, model): + concepts = next(dataloader_iter) + concepts = tokenize(concepts, self.tokenizer, self.cfg.text.max_position_embeddings) + return (model.text_encoder(concepts.cuda(non_blocking=True)), lambda x: (0.0, {"concepts": x})) + + return forward_step + + def fwd_bwd_step(self, dataloader_iter, batch_idx: int, forward_only: bool): + fwd_bwd_function = get_forward_backward_func() + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(with_accuracy=forward_only), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + metrics = None + if losses_reduced_per_micro_batch: + loss_mean = torch.stack([l["loss"] for l in losses_reduced_per_micro_batch]).mean() + if forward_only: + metrics = torch.stack([l["accuracy"] for l in losses_reduced_per_micro_batch]).sum(dim=0) + else: + loss_mean = 0.0 + + return loss_mean, metrics + + def training_step(self, dataloader_iter, batch_idx): + self._optimizer.zero_grad() + + loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=False) + + if self.megatron_amp_O2: + self._optimizer.allreduce_main_grads() + else: + self.allreduce_gradients() + + torch.distributed.broadcast(loss_mean, get_last_rank()) + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log("loss_scale", loss_scale, batch_size=1, prog_bar=True) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1, prog_bar=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + + return loss_mean + + def validation_step(self, dataloader_iter, batch_idx): + loss, metrics = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=True) + if self.validation_metrics is None: + self.validation_metrics = metrics + else: + self.validation_metrics += metrics + + self.validation_step_outputs.append(loss) + return loss + + def on_validation_epoch_end(self): + torch.distributed.all_reduce(self.validation_metrics, op=torch.distributed.ReduceOp.SUM) + accuracy = (self.validation_metrics[0] + self.validation_metrics[1]) / self.validation_metrics.sum() + self.validation_metrics = None + + averaged_metrics = 0 + if parallel_state.is_pipeline_last_stage(): + averaged_metrics = torch.stack(self.validation_step_outputs).mean() + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + self.log("val_loss", averaged_metrics, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log("accuracy", accuracy, prog_bar=True, rank_zero_only=True, batch_size=1) + + logging.info(f"Current evaluation accuracy: {accuracy}") + + return averaged_metrics + + def test_step(self, dataloader_iter, batch_idx): + return self.validation_step(dataloader_iter, batch_idx) + + def backward(self, *args, **kwargs): + pass + + def optimizer_zero_grad(self, *args, **kwargs): + pass + + def on_fit_start(self): + if self.model.text_encoder is not None: + fwd_bwd_function = get_forward_backward_func() + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_embedding_func(), + data_iterator=iter([self.model.concept_list]), + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=True, + seq_length=None, + micro_batch_size=self.model.concept_count, + ) + + concepts = torch.cat([x["concepts"] for x in losses_reduced_per_micro_batch], dim=0) + self.model.initialize_concept_embeddings(concepts) + self._cfg["text"] = None + + def setup(self, stage): + resume_checkpoint_path = self.trainer.ckpt_path + self.init_consumed_samples = ( + self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) if resume_checkpoint_path else 0 + ) + self.setup_training_data(self.cfg) + self.setup_validation_data(self.cfg) + + def setup_training_data(self, cfg: DictConfig) -> None: + logging.info("Setting up training dataset.") + train_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=True) + + sampler = torch.utils.data.distributed.DistributedSampler( + train_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True + ) + + self._train_dl = torch.utils.data.DataLoader( + train_ds, + sampler=sampler, + batch_size=cfg.micro_batch_size, + num_workers=cfg.data.num_workers, + pin_memory=True, + drop_last=cfg.data.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_validation_data(self, cfg: DictConfig) -> None: + logging.info("Setting up validation dataset.") + val_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=False) + + sampler = torch.utils.data.distributed.DistributedSampler( + val_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True + ) + + self._validation_dl = torch.utils.data.DataLoader( + val_ds, + sampler=sampler, + batch_size=cfg.micro_batch_size, + num_workers=cfg.data.num_workers, + pin_memory=True, + drop_last=cfg.data.validation.get("drop_last", True), + persistent_workers=True, + ) + + def parameters(self): + return itertools.chain(self.model.mlp_similarity_model.parameters(), self.model.nn_classifier.parameters()) + + def on_load_checkpoint(self, checkpoint) -> None: + if "model.concepts" in checkpoint["state_dict"]: + self.model.text_encoder = None + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None From 0696c119abc9cc706f453c8fc87e0477fc49e5db Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 4 Oct 2023 10:57:53 -0700 Subject: [PATCH 225/512] Added synthetic dataloader for precached and online mode --- .../stable_diffusion/conf/sd2_train.yaml | 3 + .../stable_diffusion/conf/sd_train.yaml | 4 +- .../stable_diffusion_dataset.py | 134 ++++++++++++++---- 3 files changed, 110 insertions(+), 31 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml index 12e328bc3110..3cfc822f8462 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml @@ -15,6 +15,7 @@ trainer: gradient_clip_val: 1.0 benchmark: False enable_model_summary: True + limit_val_batches: 0 exp_manager: exp_dir: null @@ -175,6 +176,8 @@ model: data: num_workers: 16 + synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 train: dataset_path: - /datasets/coyo/test.pkl diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index a8973186dc4d..938f898cc504 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -186,9 +186,11 @@ model: data: num_workers: 16 + synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 train: dataset_path: - - /datasets/coyo/test.pkl + - /datasets/coyo/wdinfo.pkl augmentations: resize_smallest_side: 512 center_crop_h_w: 512, 512 diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py index 018735377c4d..963982e14cb6 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -18,6 +18,40 @@ construct_image_augmentations, identical_transform, ) +from nemo.core.classes import Dataset as NeMoDataset +from nemo.utils import logging + + +class SDSyntheticDataset(NeMoDataset): + def __init__( + self, image_H, image_W, fake_len=100000, image_key='images', txt_key='txt', seq_len=80, context_dim=768 + ): + super().__init__() + self.fake_len = fake_len + self.H = image_H + self.W = image_W + self.image_key = image_key + self.txt_key = txt_key + assert image_key.endswith('encoded') == txt_key.endswith( + 'encoded' + ), 'In precached mode, first and second stage key must both end with "encoded"' + self.precached = self.image_key.endswith('encoded') + self.seq_len = seq_len + self.context_dim = context_dim + + def __getitem__(self, index): + item = {} + if self.precached: + item[self.image_key] = torch.randn(8, self.H // 8, self.W // 8) + item[self.txt_key] = torch.randn(self.seq_len, self.context_dim) + else: + item[self.image_key] = torch.randn(self.H, self.W, 3) + item[self.txt_key] = f'This is meaningless fake text No.{index}' + + return item + + def __len__(self): + return self.fake_len def build_train_valid_datasets( @@ -28,17 +62,17 @@ def build_train_valid_datasets( def build_resolution_filter(value=None, method='larger'): assert method == 'larger' or method == 'smaller' if method == 'larger': - print(f'Only Selecting images with resolution >= {value}') + logging.info(f'Only Selecting images with resolution >= {value}') return lambda x: x['jpg'].size[0] >= value and x['jpg'].size[1] >= value - print(f'Only Selecting images with resolution <= {value}') + logging.info(f'Only Selecting images with resolution <= {value}') return lambda x: x['jpg'].size[0] <= value and x['jpg'].size[1] <= value # This function maps data that are tuples to dictionary. def tuple_to_dict(inp): for input in inp: out_dict = dict() - out_dict['images'] = input[0].permute(1, 2, 0) - out_dict['captions'] = input[1] + out_dict[model_cfg.first_stage_key] = input[0].permute(1, 2, 0) + out_dict[model_cfg.cond_stage_key] = input[1] yield out_dict def transform_fn(sample): @@ -48,28 +82,48 @@ def transform_fn(sample): text_transform = identical_transform return img_transform(image), text_transform(text) - filter_cfg = data_cfg.train.get('filterings', None) - filter_fn = build_resolution_filter(**filter_cfg.resolution) if filter_cfg else None - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - filter_fn=filter_fn, - is_train=True, - ) + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - val_data = WebDatasetCommon( + else: + filter_cfg = data_cfg.train.get('filterings', None) + filter_fn = build_resolution_filter(**filter_cfg.resolution) if filter_cfg else None + train_data = WebDatasetCommon( dataset_cfg=data_cfg, consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, filter_fn=filter_fn, - is_train=False, + is_train=True, ) + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + if data_cfg.get('synthetic_data', False): + val_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + filter_fn=filter_fn, + is_train=False, + ) + return train_data, val_data @@ -89,22 +143,42 @@ def tuple_to_dict(inp): def transform_fn(sample): return sample['pickle'] - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - val_data = WebDatasetCommon( + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + train_data = WebDatasetCommon( dataset_cfg=data_cfg, consumed_samples=consumed_samples, map_fn=transform_fn, compose_fn=tuple_to_dict, - is_train=False, + is_train=True, ) + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + return train_data, val_data From 2cda57304c682e8b337f68af25f9f4b8e90b35e1 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 4 Oct 2023 16:48:34 -0700 Subject: [PATCH 226/512] Mingyuanm/dreambooth opt --- .../dreambooth/conf/dreambooth.yaml | 22 ++-- .../generative/dreambooth/dreambooth.py | 7 +- .../stable_diffusion/conf/sd_train.yaml | 4 +- .../data/dreambooth/dreambooth_dataset.py | 90 ++++++++++++--- .../models/dreambooth/dreambooth.py | 37 ++++-- .../stable_diffusion/ldm/autoencoder.py | 87 ++------------- .../parts/stable_diffusion/pipeline.py | 6 +- nemo/collections/nlp/parts/nlp_overrides.py | 105 ++++++++++++++++++ 8 files changed, 239 insertions(+), 119 deletions(-) diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml index 4ce79b569993..771c6d85d82a 100644 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml @@ -1,10 +1,10 @@ name: Dreambooth trainer: - devices: 2 + devices: 1 num_nodes: 1 accelerator: gpu - precision: 16 + precision: bf16-mixed logger: False # logger provided by exp_manager enable_checkpointing: False use_distributed_sampler: False @@ -23,7 +23,7 @@ exp_manager: create_checkpoint_callback: True create_tensorboard_logger: True checkpoint_callback_params: - every_n_train_steps: 100 + every_n_train_steps: 200 every_n_epochs: 0 monitor: reduced_train_loss save_on_train_epoch_end: False @@ -46,10 +46,10 @@ model: # specify micro_batch_size, global_batch_size, and model parallelism # gradient accumulation will be done automatically based on data_parallel_size micro_batch_size: 2 # limited by GPU memory - global_batch_size: 4 # will use more micro batches to reach global batch size + global_batch_size: 2 # will use more micro batches to reach global batch size with_prior_preservation: False - pretrained_ckpt: + use_cached_latents: True prior_loss_weight: 0.5 train_text_encoder: False restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed @@ -63,7 +63,7 @@ model: log_every_t: 200 timesteps: 1000 first_stage_key: images - cond_stage_key: captions # txt for cifar, caption for pbss + cond_stage_key: captions image_size: 64 channels: 4 cond_stage_trainable: false @@ -94,10 +94,11 @@ model: fused_opt: True inductor: False inductor_cudagraphs: False + channels_last: False unet_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: /ckpts/unet-v1-5.bin #load unet weights for finetuning, can use .ckpt ckpts from various sources + from_pretrained: /ckpts/unet.bin #load unet weights for finetuning, can use .ckpt ckpts from various sources from_NeMo: False #Must be specified when from pretrained is not None, False means loading unet from HF ckpt image_size: 32 # unused in_channels: 4 @@ -196,15 +197,16 @@ model: data: name: pbss num_workers: 4 - instance_dir: /dataset/instance_dir + instance_dir: /datasets/instance_dir instance_prompt: a photo of a sks dog - regularization_dir: /dataset/nemo_dogs + regularization_dir: /datasets/nemo_dogs regularization_prompt: a photo of a dog num_reg_images: 200 num_images_per_prompt: 4 - train_batch_size: 2 resolution: 512 center_crop: True + cached_instance_dir: #/datasets/instance_dir_cached + cached_reg_dir: #/datasets/nemo_dogs_cached ##The below infer config is to use inference script generating regularization images infer: diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index f3476b481f0f..777484734eb0 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -75,7 +75,9 @@ def model_cfg_modifier(model_cfg): images_to_generate = cfg.model.data.num_reg_images - NUM_REG_IMAGES images_to_generate = images_to_generate // trainer.world_size - logging.info(f"No enough images in regularization folder, generating {images_to_generate} from provided ckpt") + logging.info( + f"No enough images in regularization folder, generating {images_to_generate} from provided ckpt on each device" + ) for i in range(images_to_generate // num_images_per_prompt + 1): output = pipeline(model, cfg, verbose=False, rng=rng) @@ -100,6 +102,9 @@ def main(cfg): torch.backends.cuda.matmul.allow_tf32 = True + prepare_reg_data(cfg) + parallel_state.destroy_model_parallel() + trainer = MegatronTrainerBuilder(cfg).create_trainer() exp_manager(trainer, cfg.exp_manager) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 938f898cc504..ce865e7d739f 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -89,14 +89,14 @@ model: cond_stage_forward: text_embedding_dropout_rate: 0.1 fused_opt: True - inductor: True + inductor: False inductor_cudagraphs: False capture_cudagraph_iters: -1 # -1 to disable channels_last: True unet_config: _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: /ckpts/nemo-v1-2.ckpt + from_pretrained: #/ckpts/nemo-v1-2.ckpt from_NeMo: True #Must be specified when from pretrained is not None, False means loading unet from HF ckpt image_size: 32 # unused in_channels: 4 diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py index 5392d4ed827d..492cc00ec5cc 100644 --- a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py +++ b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py @@ -11,13 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os import pickle from pathlib import Path import torch from PIL import Image +from pytorch_lightning.utilities import rank_zero_only from torch.utils.data import Dataset from torchvision import transforms +from tqdm import tqdm class DreamBoothDataset(Dataset): @@ -30,29 +33,37 @@ def __init__( self, instance_data_root, instance_prompt, + with_prior_preservation=False, reg_data_root=None, reg_prompt=None, size=512, - center_crop=False, - repeat=100, + center_crop=True, + repeat=10000, + load_cache_latents=False, + cached_instance_data_root=None, + cached_reg_data_root=None, + vae=None, + text_encoder=None, ): self.size = size self.center_crop = center_crop + assert instance_data_root or cached_instance_data_root, "must provide instance images to start training." self.instance_data_root = Path(instance_data_root) - if not self.instance_data_root.exists(): - raise ValueError("Instance images root doesn't exists.") + self.cached_instance_data_root = cached_instance_data_root + self.cached_reg_data_root = cached_reg_data_root self.instance_images_path = list(Path(instance_data_root).iterdir()) self.num_instance_images = len(self.instance_images_path) self.instance_prompt = instance_prompt self._length = self.num_instance_images * repeat + self.load_cache_latents = load_cache_latents + self.with_prior_preservation = with_prior_preservation if reg_data_root is not None: self.reg_data_root = Path(reg_data_root) self.reg_images_path = list(self.reg_data_root.iterdir()) self.num_reg_images = len(self.reg_images_path) - self._length = max(self.num_reg_images, self.num_instance_images) self.reg_prompt = reg_prompt else: self.reg_data_root = None @@ -66,22 +77,75 @@ def __init__( ] ) + if self.load_cache_latents: + if (self.cached_instance_data_root is None) or ( + self.with_prior_preservation and self.cached_reg_data_root is None + ): + self.cache_latents(vae, text_encoder) + + self.cached_instance_data_root = f'{self.instance_data_root}_cached' + self.cached_reg_data_root = f'{self.reg_data_root}_cached' + self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) + self.num_reg_images = len(self.reg_images_path) + + if self.cached_instance_data_root: + self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + if self.with_prior_preservation and self.cached_reg_data_root: + self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) + self.num_reg_images = len(self.reg_images_path) + def __len__(self): return self._length + def get_image(self, path): + image = Image.open(path) + if not image.mode == "RGB": + image = image.convert("RGB") + image = self.image_transforms(image) + return image + def __getitem__(self, index): example = {} - instance_image = Image.open(self.instance_images_path[index % self.num_instance_images]) - if not instance_image.mode == "RGB": - instance_image = instance_image.convert("RGB") - example["instance_images"] = self.image_transforms(instance_image) + if self.load_cache_latents: + example["instance_images"] = torch.load(self.instance_images_path[index % self.num_instance_images]) + else: + example["instance_images"] = self.get_image(self.instance_images_path[index % self.num_instance_images]) example["instance_prompt"] = self.instance_prompt if self.reg_data_root: - reg_image = Image.open(self.reg_images_path[index % self.num_reg_images]) - if not reg_image.mode == "RGB": - reg_image = reg_image.convert("RGB") - example["reg_images"] = self.image_transforms(reg_image) + if self.load_cache_latents: + example["reg_images"] = torch.load(self.reg_images_path[index % self.num_reg_images]) + else: + example["reg_images"] = self.get_image(self.reg_images_path[index % self.num_reg_images]) example["reg_prompt"] = self.reg_prompt return example + + @rank_zero_only + def cache_latents(self, vae, text_encoder): + os.makedirs(f'{self.instance_data_root}_cached', exist_ok=True) + self.cached_instance_data_root = f'{self.instance_data_root}_cached' + self.cached_reg_data_root = f'{self.reg_data_root}_cached' + if self.instance_data_root and (self.cached_instance_data_root is None): + + for i in tqdm(range(self.num_instance_images)): + if len(os.listdir(self.cached_instance_data_root)) == self.num_instance_images: + break + x = torch.Tensor(self.get_image(self.instance_images_path[i % self.num_instance_images])) + x = torch.unsqueeze(x, dim=0) + params = vae.encode(x).parameters.squeeze(dim=0) + torch.save(params, f'{self.instance_data_root}_cached/instance_image_cache_{i}.pt') + + if self.with_prior_preservation and self.reg_data_root and (self.cached_reg_data_root is None): + os.makedirs(f'{self.reg_data_root}_cached', exist_ok=True) + + for i in tqdm(range(self.num_reg_images)): + if len(os.listdir(self.cached_reg_data_root)) == self.num_reg_images: + break + x = torch.Tensor(self.get_image(self.reg_images_path[i % self.num_reg_images])) + x = torch.unsqueeze(x, dim=0) + params = vae.encode(x).parameters.squeeze(dim=0) + torch.save(params, f'{self.reg_data_root}_cached/reg_image_cache_{i}.pt') diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py index c6f2d810c297..5e4abd8522d7 100644 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -31,6 +31,9 @@ make_beta_schedule, noise_like, ) +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists from nemo.collections.multimodal.parts.utils import randn_like from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler @@ -90,7 +93,6 @@ def __init__(self, cfg, model_parallel_config): self.config = model_parallel_config self.with_prior_preservation = self.cfg.with_prior_preservation self.num_reg_images = self.cfg.data.num_reg_images - self.pretrained_ckpt = self.cfg.pretrained_ckpt self.prior_loss_weight = self.cfg.prior_loss_weight self.num_images_per_prompt = self.cfg.data.num_images_per_prompt @@ -111,6 +113,11 @@ def __init__(self, cfg, model_parallel_config): self.model_type = None self.rng = torch.Generator(device=torch.cuda.current_device(),) + self.use_cached_latents = self.cfg.use_cached_latents + + if self.cfg.channels_last: + self.unet = self.unet.to(memory_format=torch.channels_last) + def instantiate_unet(self, cfg): self.unet = DreamBooth.from_config_dict(cfg) self.unet.train() @@ -145,10 +152,14 @@ def get_noise_scheduler(self, cfg): self.noise_scheduler = model.eval() def forward(self, batch): - x, cond = batch - latents = self.vae.encode(x).sample().detach() - latents = latents * self.scale_factor + x, cond = batch + if self.use_cached_latents: + x = DiagonalGaussianDistribution(x) + latents = x.sample().detach() * self.scale_factor + else: + latents = self.vae.encode(x).sample().detach() + latents = latents * self.scale_factor noise = randn_like(latents, generator=self.rng) t = torch.randint(0, self.num_timesteps, (latents.shape[0],), generator=self.rng, device=latents.device).long() @@ -177,7 +188,6 @@ def forward(self, batch): else: loss = torch.nn.functional.mse_loss(target.float(), model_output.float(), reduction="mean") - return loss def parameters(self): @@ -326,7 +336,7 @@ def training_step(self, dataloader_iter, batch_idx): if self.cfg.precision in [16, '16', '16-mixed']: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) + self.log('loss_scale', loss_scale, prog_bar=True, batch_size=1) self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) @@ -378,7 +388,6 @@ def get_forward_output_and_loss_func(self): def process_batch(batch): # noise_map, condition prompts, images = batch - # DB has more dedicated structure for encoding, so we enable autocasting here as well with torch.cuda.amp.autocast( self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, @@ -461,17 +470,25 @@ def setup_training_data(self, cfg): if cfg.regularization_prompt is None: raise ValueError("Regularization prompts must be provided to train with prior preservation loss") - train_dataset = DreamBoothDataset( + self.train_dataset = DreamBoothDataset( instance_data_root=cfg.instance_dir, instance_prompt=cfg.instance_prompt, + with_prior_preservation=self.cfg.with_prior_preservation, reg_data_root=cfg.regularization_dir if self.cfg.with_prior_preservation else None, reg_prompt=cfg.regularization_prompt if self.cfg.with_prior_preservation else None, size=cfg.resolution, center_crop=cfg.center_crop, + load_cache_latents=self.model.use_cached_latents, + cached_instance_data_root=self.cfg.data.get("cached_instance_dir", None), + cached_reg_data_root=self.cfg.data.get("cached_reg_dir", None) + if self.cfg.with_prior_preservation + else None, + vae=self.model.vae, + text_encoder=self.model.text_encoder, ) batch_sampler = MegatronPretrainingRandomSampler( - total_samples=len(train_dataset), + total_samples=len(self.train_dataset), consumed_samples=self.compute_consumed_samples(0), micro_batch_size=self.cfg.micro_batch_size, global_batch_size=self.cfg.global_batch_size, @@ -481,7 +498,7 @@ def setup_training_data(self, cfg): ) self._train_dl = torch.utils.data.DataLoader( - train_dataset, + self.train_dataset, batch_sampler=batch_sampler, collate_fn=partial(_collate_fn, with_prior_preservation=self.cfg.with_prior_preservation), num_workers=cfg.num_workers, diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py index 825923bb5fd4..6f2dd37424d0 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -469,88 +469,15 @@ def init_from_ckpt(self, path, ignore_keys=list()): print(f"Restored from {path}") def encode(self, x): - if self.training: - if self.encoder_iterations == self.capture_cudagraph_iters: - logging.info("Capturing CUDA graph for module: %s", self.encoder.__class__.__name__) - self.graphed_encoder = torch.cuda.make_graphed_callables(self.encoder, (x,)) - - if 0 <= self.capture_cudagraph_iters <= self.encoder_iterations: - h = self.graphed_encoder(x) - else: - h = self.encoder(x) - self.encoder_iterations += 1 - - moments = self.quant_conv(h) - posterior = DiagonalGaussianDistribution(moments) - return posterior - else: - # create static input and copy input to static buffer - if self.static_x is None: - self.static_x = torch.randn_like(x) - self.static_x.copy_(x) - - if self.encoder_iterations == self.capture_cudagraph_iters: - # cuda graph capture - logging.info("Capturing CUDA graph for module: %s", self.encoder.__class__.__name__) - with torch.cuda.graph(self.encoder_graph): - h = self.encoder(self.static_x) - self.static_moments = self.quant_conv(h) - - if 0 <= self.capture_cudagraph_iters <= self.encoder_iterations: - # cuda graph replay - self.encoder_graph.replay() - else: - # warmup - self.stream.wait_stream(torch.cuda.current_stream()) - with torch.cuda.stream(self.stream): - h = self.encoder(self.static_x) - self.static_moments = self.quant_conv(h) - torch.cuda.current_stream().wait_stream(self.stream) - self.encoder_iterations += 1 - - posterior = DiagonalGaussianDistribution(self.static_moments) - return posterior + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior def decode(self, z): - if self.training: - if self.decoder_iterations == self.capture_cudagraph_iters: - logging.info("Capturing CUDA graph for module: %s", self.decoder.__class__.__name__) - self.graphed_decoder = torch.cuda.make_graphed_callables(self.decoder, (z,)) - - h = self.post_quant_conv(z) - if 0 <= self.capture_cudagraph_iters <= self.decoder_iterations: - dec = self.graphed_decoder(h) - else: - dec = self.decoder(h) - self.decoder_iterations += 1 - - return dec - else: - # create static input and copy input to static buffer - if self.static_z is None: - self.static_z = torch.randn_like(z) - self.static_z.copy_(z) - - if self.decoder_iterations == self.capture_cudagraph_iters: - # cuda graph capture - logging.info("Capturing CUDA graph for module: %s", self.decoder.__class__.__name__) - with torch.cuda.graph(self.decoder_graph): - h = self.post_quant_conv(self.static_z) - self.static_dec = self.decoder(h) - - if 0 <= self.capture_cudagraph_iters <= self.decoder_iterations: - # cuda graph replay - self.decoder_graph.replay() - else: - # warmup - self.stream.wait_stream(torch.cuda.current_stream()) - with torch.cuda.stream(self.stream): - h = self.post_quant_conv(self.static_z) - self.static_dec = self.decoder(h) - torch.cuda.current_stream().wait_stream(self.stream) - self.decoder_iterations += 1 - - return self.static_dec + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec def forward(self, input, sample_posterior=True): posterior = self.encode(input) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 7ea058a396b2..8fbbe16f6642 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -104,11 +104,11 @@ def pipeline(model, cfg, verbose=True, rng=None): ) # get autocast_dtype - if cfg.trainer.precision == 'bf16': + if cfg.trainer.precision in ['bf16', 'bf16-mixed']: autocast_dtype = torch.bfloat16 - elif int(cfg.trainer.precision) == 32: + elif int(cfg.trainer.precision) in ['32', '32-tru']: autocast_dtype = torch.float - elif int(cfg.trainer.precision) == 16: + elif int(cfg.trainer.precision) in ['16', '16-mixed']: autocast_dtype = torch.half else: raise ValueError('precision must be in [32, 16, "bf16"]') diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index e9e96d597f06..a4eb161be42c 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -589,6 +589,111 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict + loaded_keys = state_dict.keys() + if 'model.model.diffusion_model.input_blocks.1.0.in_layers.2.weight' in loaded_keys: + new_state_dict = {} + # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + for key_ in state_dict.keys(): + if key_ == "model.cond_stage_model.transformer.text_model.embeddings.position_ids": + continue + if key_ in [ + "model.model.diffusion_model.input_blocks.1.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.1.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.1.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.1.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.2.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.2.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.2.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.2.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.4.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.4.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.4.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.4.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.5.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.5.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.5.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.5.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.7.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.7.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.7.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.7.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.8.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.8.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.8.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.8.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.10.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.10.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.10.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.10.0.out_layers.3.bias", + "model.model.diffusion_model.input_blocks.11.0.in_layers.2.weight", + "model.model.diffusion_model.input_blocks.11.0.in_layers.2.bias", + "model.model.diffusion_model.input_blocks.11.0.out_layers.3.weight", + "model.model.diffusion_model.input_blocks.11.0.out_layers.3.bias", + "model.model.diffusion_model.middle_block.0.in_layers.2.weight", + "model.model.diffusion_model.middle_block.0.in_layers.2.bias", + "model.model.diffusion_model.middle_block.0.out_layers.3.weight", + "model.model.diffusion_model.middle_block.0.out_layers.3.bias", + "model.model.diffusion_model.middle_block.2.in_layers.2.weight", + "model.model.diffusion_model.middle_block.2.in_layers.2.bias", + "model.model.diffusion_model.middle_block.2.out_layers.3.weight", + "model.model.diffusion_model.middle_block.2.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.0.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.0.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.0.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.0.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.1.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.1.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.1.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.1.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.2.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.2.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.2.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.2.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.3.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.3.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.3.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.3.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.4.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.4.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.4.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.4.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.5.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.5.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.5.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.5.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.6.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.6.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.6.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.6.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.7.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.7.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.7.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.7.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.8.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.8.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.8.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.8.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.9.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.9.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.9.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.9.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.10.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.10.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.10.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.10.0.out_layers.3.bias", + "model.model.diffusion_model.output_blocks.11.0.in_layers.2.weight", + "model.model.diffusion_model.output_blocks.11.0.in_layers.2.bias", + "model.model.diffusion_model.output_blocks.11.0.out_layers.3.weight", + "model.model.diffusion_model.output_blocks.11.0.out_layers.3.bias", + ]: + s = key_.split('.') + idx = int(s[-2]) + new_key_ = ".".join(s[:-2] + [str(int(idx - 1))] + [s[-1]]) + new_state_dict[new_key_] = state_dict[key_] + else: + new_state_dict[key_] = state_dict[key_] + state_dict = new_state_dict + return state_dict def _load_state_dict_from_disk(self, model_weights, map_location=None): From eb252f9c7f7d6a78f0ad607c325a64b7ecc6e256 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Tue, 3 Oct 2023 10:49:56 -0700 Subject: [PATCH 227/512] Add llama2 support in neva training --- .../clip/conf/megatron_clip_config.yaml | 2 +- .../mllm/kosmos/conf/kosmos_config.yaml | 2 +- .../mllm/neva/conf/llava_config.yaml | 213 +++++++++++ .../mllm/neva/conf/neva_config.yaml | 27 +- .../mllm/neva/conf/neva_finetune.yaml | 8 +- .../mllm/neva/conf/neva_inference.yaml | 9 +- .../multimodal/mllm/neva/conf/neva_peft.yaml | 8 +- .../mllm/neva/convert_hf_llava_to_neva.py | 343 ++++++++++++++++++ .../multimodal/mllm/neva/neva_evaluation.py | 60 ++- examples/multimodal/mllm/neva/neva_export.py | 34 +- .../multimodal/mllm/neva/neva_finetune.py | 4 +- examples/multimodal/mllm/neva/neva_peft.py | 4 +- .../multimodal/data/neva/conversation.py | 14 +- .../multimodal/data/neva/neva_dataset.py | 158 ++++---- .../models/kosmos/megatron_kosmos_model.py | 6 +- .../multimodal/models/neva/neva_model.py | 96 ++++- .../language_modeling/megatron_base_model.py | 2 +- .../common/text_generation_strategy.py | 105 +++--- .../modules/common/text_generation_utils.py | 35 +- .../collections/multimodal/test_clip_model.py | 2 +- tests/collections/vision/test_vit_model.py | 2 +- 21 files changed, 952 insertions(+), 182 deletions(-) create mode 100644 examples/multimodal/mllm/neva/conf/llava_config.yaml create mode 100644 examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml index e480e8e28c47..a6b1928ef13f 100644 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml @@ -58,7 +58,7 @@ model: pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # multimodal configs output_dim: 512 # As the number of devices used to train increases, so does the space complexity of diff --git a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml index 920295c19e67..fbe1883276fe 100644 --- a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml +++ b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml @@ -79,7 +79,7 @@ model: pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # multimodal configs num_media_latents: 64 # each media is encoded and sampled into `num_media_latents` LM embeddings diff --git a/examples/multimodal/mllm/neva/conf/llava_config.yaml b/examples/multimodal/mllm/neva/conf/llava_config.yaml new file mode 100644 index 000000000000..0b2cf826c606 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/llava_config.yaml @@ -0,0 +1,213 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: False + model_type: llama_2 # Only support nvgpt or llama_2 + vision_encoder: + from_pretrained: "openai/clip-vit-large-patch14" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: False + + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: False + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.0 # Dropout probability for hidden state transformer. + attention_dropout: 0.0 # Dropout probability for attention + ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: 'rmsnorm' # Normalization layer to use. Options are 'layernorm', 'rmsnorm' + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + override_vocab_size: 32000 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'sentencepiece' + type: null + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + additional_special_tokens: null # ["", "", "", "", "", ""] + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: llama_2 # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-3 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 140 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml index c6d6a6f6e377..4535dba9e2ea 100644 --- a/examples/multimodal/mllm/neva/conf/neva_config.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -62,13 +62,14 @@ model: pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # Multimodal configs mm_cfg: llm: from_pretrained: null # path to nemo checkpoint freeze: True + model_type: llama_2 # `nvgpt` or `llama_2` supported vision_encoder: from_pretrained: "" # path or name from_hf: True @@ -78,7 +79,7 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True # only support True now + use_im_start_end: True # LLM configs @@ -86,20 +87,20 @@ model: mcore_gpt: False # model architecture - encoder_seq_length: 2048 + encoder_seq_length: 4096 max_position_embeddings: ${.encoder_seq_length} position_embedding_type: rope - num_layers: 24 - hidden_size: 2048 - ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 + num_layers: 40 + hidden_size: 5120 + ffn_hidden_size: 13824 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 40 init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') use_scaled_init_method: True # use scaled residuals initialization hidden_dropout: 0. # Dropout probability for hidden state transformer. attention_dropout: 0. kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm1p # Type of normalization layers + normalization: rmsnorm # Type of normalization layers layernorm_epsilon: 1e-5 do_layer_norm_weight_decay: False # True means weight decay on all params pre_process: True # add embedding @@ -117,6 +118,7 @@ model: batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + use_flash_attention: True ## Activation Checkpointing activations_checkpoint_granularity: null # 'selective' or 'full' @@ -156,6 +158,7 @@ model: # Megatron O2-style half-precision megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + async_grad_allreduce: False grad_allreduce_chunk_size_mb: 125 grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce @@ -166,8 +169,8 @@ model: gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) tokenizer: - library: 'megatron' - type: 'GPT2BPETokenizer' + library: 'sentencepiece' + type: null model: null vocab_file: null merge_file: null @@ -182,8 +185,8 @@ model: lazy_preprocess: True is_multimodal: True sep_image_conv_front: False - conv_template: nvgpt # check `nemo/collections/multimodal/data/neva/conversation.py` - image_token_len: 0 + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml index 6c3160709004..bd902b9f5d15 100644 --- a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml @@ -62,13 +62,14 @@ model: pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # Multimodal configs mm_cfg: llm: from_pretrained: null # path to nemo checkpoint freeze: False + model_type: nvgpt # Only support nvgpt or llama_2 vision_encoder: from_pretrained: "" # path or name from_hf: True @@ -86,7 +87,7 @@ model: mcore_gpt: False # model architecture - encoder_seq_length: 2048 + encoder_seq_length: 4096 max_position_embeddings: ${.encoder_seq_length} position_embedding_type: rope num_layers: 24 @@ -181,7 +182,8 @@ model: lazy_preprocess: True is_multimodal: True sep_image_conv_front: False - image_token_len: 0 + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml index 2b555f3ba479..35ca1e179f98 100644 --- a/examples/multimodal/mllm/neva/conf/neva_inference.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_inference.yaml @@ -1,14 +1,15 @@ inference: - greedy: True # Whether or not to use sampling ; use greedy decoding otherwise + greedy: False # Whether or not to use sampling ; use greedy decoding otherwise top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. temperature: 0.2 # sampling temperature add_BOS: False # add the bos token at the begining of the prompt - tokens_to_generate: 10 # The minimum length of the sequence to be generated. + tokens_to_generate: 256 # The minimum length of the sequence to be generated. all_probs: False # whether return the log prob for all the tokens in vocab repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + end_strings: ["","",] # generation will stop when one of these tokens is generated images_base_path: /pwd/images trainer: @@ -45,3 +46,7 @@ share: False # whether create a public URL username: test # user name for web client password: test2 # password for web client web_port: 9889 # the port number of the web server + +quantization: + algorithm: awq # int8_sq, fp8, int8, awq + enable: False \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml index 87f675f3eee1..0099d1d8c4d4 100644 --- a/examples/multimodal/mllm/neva/conf/neva_peft.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -62,13 +62,14 @@ model: pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # Multimodal configs mm_cfg: llm: from_pretrained: null # path to nemo checkpoint freeze: True # Set this to True in adapter learning! + model_type: nvgpt # Only support nvgpt or llama_2 vision_encoder: from_pretrained: "" # path or name from_hf: True @@ -92,7 +93,7 @@ model: mcore_gpt: False # model architecture - encoder_seq_length: 2048 + encoder_seq_length: 4096 max_position_embeddings: ${.encoder_seq_length} position_embedding_type: rope num_layers: 24 @@ -187,7 +188,8 @@ model: lazy_preprocess: True is_multimodal: True sep_image_conv_front: False - image_token_len: 0 + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py new file mode 100644 index 000000000000..b70faf61a413 --- /dev/null +++ b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py @@ -0,0 +1,343 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert Huggingface LLaMA checkpoints into nemo checkpoint. + Example to run this conversion script: + python convert_hf_llava_to_nevo.py \ + --in-file \ + --out-file \ + --tokenizer-model +""" + +import os +from argparse import ArgumentParser +from collections import OrderedDict + +import torch +from llava import LlavaLlamaForCausalLM +from omegaconf import OmegaConf +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.trainer.trainer import Trainer +from transformers import LlamaTokenizer + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.utils import logging + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--in-file", type=str, default=None, required=True, help="Path to Huggingface LLaMA checkpoints", + ) + parser.add_argument("--out-file", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument( + "--tokenizer-model", type=str, default=None, required=False, help="Path to sentencepiece tokenizer model." + ) + parser.add_argument("--precision", type=str, default="32", help="Model precision") + args = parser.parse_args() + return args + + +def load_model(cls, checkpoint, strict, **kwargs): + try: + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + # model = ptl_load_state( + # cls, checkpoint, strict=strict, cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs + # ) + model = cls(cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs) + for name, module in model.named_parameters(): + if name in checkpoint['state_dict']: + module.data = checkpoint['state_dict'][name] + checkpoint['state_dict'].pop(name) + else: + print(f"Unexpected key: {name} not in checkpoint but in model.") + + for name, buffer in model.named_buffers(): + if name in checkpoint['state_dict']: + buffer.data = checkpoint['state_dict'][name] + checkpoint['state_dict'].pop(name) + + if len(checkpoint['state_dict'].keys()) != 0: + raise RuntimeError( + f"Additional keys: {checkpoint['state_dict'].keys()} in checkpoint but not in model." + ) + + # register the artifacts + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] + if cfg.tokenizer.model is not None: + model.register_artifact("tokenizer.tokenizer_model", cfg.tokenizer.model) + if cfg.tokenizer.vocab_file is not None: + model.register_artifact("tokenizer.vocab_file", cfg.tokenizer.vocab_file) + if cfg.tokenizer.merge_file is not None: + model.register_artifact("tokenizer.merge_file", cfg.tokenizer.merge_file) + finally: + cls._set_model_restore_state(is_being_restored=False) + return model + + +def load_config(args, llama_config): + nemo_config = OmegaConf.load(os.path.join(os.path.dirname(__file__), 'conf/llava_config.yaml')).model + nemo_config.encoder_seq_length = llama_config['max_position_embeddings'] + nemo_config.num_layers = int(llama_config['num_hidden_layers']) + nemo_config.hidden_size = llama_config['hidden_size'] + nemo_config.ffn_hidden_size = llama_config['intermediate_size'] + nemo_config.num_attention_heads = llama_config['num_attention_heads'] + nemo_config.max_position_embeddings = llama_config['max_position_embeddings'] + nemo_config.init_method_std = llama_config['initializer_range'] + nemo_config.layernorm_epsilon = llama_config['rms_norm_eps'] + if 'num_key_value_heads' in llama_config: + nemo_config.num_query_groups = llama_config['num_key_value_heads'] + nemo_config.use_cpu_initialization = True + nemo_config.activation = 'fast-swiglu' + if args.tokenizer_model is None: + nemo_config.tokenizer.model = llama_config['tokenizer_model'] + else: + nemo_config.tokenizer.model = args.tokenizer_model + if llama_config['rope_scaling'] is not None: + if llama_config['rope_scaling']['type'] == 'linear': + nemo_config['seq_len_interpolation_factor'] = llama_config['rope_scaling']['factor'] + else: + raise ValueError("Only linear rope scaling type is supported now") + + base = 128 + while llama_config['vocab_size'] % base != 0: + base //= 2 + nemo_config.make_vocab_size_divisible_by = base + + return nemo_config + + +def convert(args): + logging.info(f"loading checkpoint {args.in_file}") + model = LlavaLlamaForCausalLM.from_pretrained(args.in_file) + tokenizer = LlamaTokenizer.from_pretrained(args.in_file) + hf_config = vars(model.config) + hf_config['tokenizer_model'] = str(tokenizer.vocab_file) + print(f"hf_config: {hf_config}") + print("named parameters:") + for name, param in model.named_parameters(): + print(f"- {name}") + + nemo_config = load_config(args, hf_config) + print(nemo_config) + + if args.precision in ["32", "16"]: + precision = int(float(args.precision)) + elif args.precision in ["bf16", "bf16-mixed"]: + if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): + precision = args.precision + else: + logging.warning("BF16 is not supported on this device. Using FP16 instead.") + precision = args.precision[2:] # prune bf in string + else: + precision = args.precision + + plugins = [] + if precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: + scaler = None + if precision in [16, '16', '16-mixed']: + scaler = GradScaler( + init_scale=nemo_config.get('native_amp_init_scale', 2 ** 32), + growth_interval=nemo_config.get('native_amp_growth_interval', 1000), + hysteresis=nemo_config.get('hysteresis', 2), + ) + # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed + plugin_precision = '16-mixed' + else: + plugin_precision = 'bf16-mixed' + + if nemo_config.get('megatron_amp_O2', False): + plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) + + if precision == 32: + dtype = torch.float32 + elif precision in [16, "16", "16-mixed"]: + dtype = torch.float16 + elif precision in ["bf16", "bf16-mixed"]: + dtype = torch.bfloat16 + else: + dtype = torch.float32 # fallback + + nemo_config.precision = precision + print(f"nemo_config: {nemo_config}") + + trainer = Trainer(plugins=plugins, accelerator='cpu', precision=precision, strategy=NLPDDPStrategy()) + + hidden_size = hf_config["hidden_size"] + head_num = hf_config["num_attention_heads"] + head_size = hidden_size // head_num + num_layers = hf_config["num_hidden_layers"] + + mcore_gpt = nemo_config.mcore_gpt + + assert mcore_gpt == nemo_config.get( + 'transformer_engine', False + ), "mcore_gpt transformer_engine must be enabled (or disabled) together." + + param_to_weights = lambda param: param.float() + + checkpoint = OrderedDict() + checkpoint['state_dict'] = OrderedDict() + + # Multimodal projection + if mcore_gpt: + raise NotImplementedError + else: + mm_projection_layer_base_name = ( + f'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear' + ) + checkpoint['state_dict'][f'{mm_projection_layer_base_name}.weight'] = param_to_weights( + model.state_dict()[f'model.mm_projector.weight'] + ) + checkpoint['state_dict'][f'{mm_projection_layer_base_name}.bias'] = param_to_weights( + model.state_dict()[f'model.mm_projector.bias'] + ) + + embed_weight = model.state_dict()[f'model.embed_tokens.weight'] + if mcore_gpt: + embed_weights_base_name = f'model.embedding.word_embeddings.weight' + else: + embed_weights_base_name = f'model.language_model.embedding.word_embeddings.weight' + checkpoint['state_dict'][embed_weights_base_name] = param_to_weights(embed_weight) + + # in hf, this is defined as register_buffer(..., persistent=False) so it won't be in the state dict + if f'model.layers.0.self_attn.rotary_emb.inv_freq' in model.state_dict(): + rotary_embed_weight = model.state_dict()[f'model.layers.0.self_attn.rotary_emb.inv_freq'] + if mcore_gpt: + rotary_embed_weight_base_name = f'model.rotary_pos_emb.inv_freq' + else: + rotary_embed_weight_base_name = f'model.language_model.rotary_pos_emb.inv_freq' + checkpoint['state_dict'][rotary_embed_weight_base_name] = param_to_weights(rotary_embed_weight) + + if nemo_config.num_query_groups is None or nemo_config.num_query_groups == head_num: + num_query_groups = head_num + else: + num_query_groups = nemo_config.num_query_groups + assert head_num % num_query_groups == 0, 'head_num must be divisible by num_query_groups' + if mcore_gpt: + assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.' + + for l in range(int(num_layers)): + print(f"converting layer {l}") + old_tensor_shape = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].size() + new_q_tensor_shape = (head_num, head_size) + old_tensor_shape[1:] + new_kv_tensor_shape = (num_query_groups, head_size) + old_tensor_shape[1:] + q = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].view(*new_q_tensor_shape) + k = model.state_dict()[f'model.layers.{l}.self_attn.k_proj.weight'].view(*new_kv_tensor_shape) + v = model.state_dict()[f'model.layers.{l}.self_attn.v_proj.weight'].view(*new_kv_tensor_shape) + qkv_weights = torch.empty((0, head_size) + old_tensor_shape[1:]) + heads_per_group = head_num // num_query_groups + for i in range(num_query_groups): + qkv_weights = torch.cat((qkv_weights, q[i * heads_per_group : (i + 1) * heads_per_group, :, :])) + qkv_weights = torch.cat((qkv_weights, k[i : i + 1, :, :])) + qkv_weights = torch.cat((qkv_weights, v[i : i + 1, :, :])) + qkv_weights = qkv_weights.reshape([head_size * (head_num + 2 * num_query_groups), hidden_size]) + if mcore_gpt: + qkv_weights_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.weight' + else: + qkv_weights_base_name = f'model.language_model.encoder.layers.{l}.self_attention.query_key_value.weight' + checkpoint['state_dict'][qkv_weights_base_name] = param_to_weights(qkv_weights) + + # attention dense + o_weight = model.state_dict()[f'model.layers.{l}.self_attn.o_proj.weight'] + if mcore_gpt: + o_weight_base_name = f'model.decoder.layers.{l}.self_attention.linear_proj.weight' + else: + o_weight_base_name = f'model.language_model.encoder.layers.{l}.self_attention.dense.weight' + checkpoint['state_dict'][o_weight_base_name] = param_to_weights(o_weight) + + # MLP + mlp_down_weight = model.state_dict()[f'model.layers.{l}.mlp.gate_proj.weight'] + mlp_gate_weight = model.state_dict()[f'model.layers.{l}.mlp.up_proj.weight'] + if mcore_gpt: + mlp_down_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.weight' + else: + mlp_down_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_h_to_4h.weight' + mlp_down_weight = torch.cat((mlp_down_weight, mlp_gate_weight), axis=0) + checkpoint['state_dict'][mlp_down_base_name] = param_to_weights(mlp_down_weight) + + mlp_up_weight = model.state_dict()[f'model.layers.{l}.mlp.down_proj.weight'] + if mcore_gpt: + mlp_up_base_name = f'model.decoder.layers.{l}.mlp.linear_fc2.weight' + else: + mlp_up_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_4h_to_h.weight' + checkpoint['state_dict'][mlp_up_base_name] = param_to_weights(mlp_up_weight) + + # LayerNorm + input_ln_weight = model.state_dict()[f'model.layers.{l}.input_layernorm.weight'] + if mcore_gpt: + input_ln_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight' + else: + input_ln_base_name = f'model.language_model.encoder.layers.{l}.input_layernorm.weight' + checkpoint['state_dict'][input_ln_base_name] = param_to_weights(input_ln_weight) + + post_attn_ln_weight = model.state_dict()[f'model.layers.{l}.post_attention_layernorm.weight'] + if mcore_gpt: + post_attn_ln_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight' + else: + post_attn_ln_base_name = f'model.language_model.encoder.layers.{l}.post_attention_layernorm.weight' + checkpoint['state_dict'][post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight) + + print(f"done layer {l}") + + final_ln_weight = model.state_dict()[f'model.norm.weight'] + if mcore_gpt: + final_ln_base_name = f'model.decoder.final_layernorm.weight' + else: + final_ln_base_name = f'model.language_model.encoder.final_layernorm.weight' + checkpoint['state_dict'][final_ln_base_name] = param_to_weights(final_ln_weight) + + output_layer_weight = model.state_dict()[f'lm_head.weight'] + if mcore_gpt: + output_layer_base_name = f'model.output_layer.weight' + else: + output_layer_base_name = f'model.language_model.output_layer.weight' + checkpoint['state_dict'][output_layer_base_name] = param_to_weights(output_layer_weight) + + checkpoint[MegatronNevaModel.CHECKPOINT_HYPER_PARAMS_KEY] = nemo_config + + del model + + if nemo_config.get('megatron_amp_O2', False): + keys = list(checkpoint['state_dict'].keys()) + for key in keys: + checkpoint['state_dict'][key.replace('model.', 'model.module.', 1)] = checkpoint['state_dict'].pop(key) + + model = load_model(MegatronNevaModel, checkpoint, strict=False, trainer=trainer) + + model._save_restore_connector = NLPSaveRestoreConnector() + + # cast to target precision and disable cpu init + model = model.to(dtype=dtype) + model.cfg.use_cpu_initialization = False + + model.save_to(args.out_file) + logging.info(f'NeMo model saved to: {args.out_file}') + + +if __name__ == '__main__': + args = get_args() + convert(args) diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py index b71566aa5915..256d58018c9b 100644 --- a/examples/multimodal/mllm/neva/neva_evaluation.py +++ b/examples/multimodal/mllm/neva/neva_evaluation.py @@ -24,7 +24,6 @@ from pytorch_lightning.trainer.trainer import Trainer from torch.utils.data import DataLoader, Dataset -import nemo.collections.multimodal.data.neva.conversation as conversation_lib from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel from nemo.collections.nlp.modules.common.megatron_web_server import get_demo @@ -45,6 +44,16 @@ HAVE_MEGATRON_CORE = False +try: + import ammo.torch.quantization as atq + + HAVE_AMMO = True + +except (ImportError, ModuleNotFoundError): + + HAVE_AMMO = False + + """ This is the script to run GPT text generation. @@ -243,7 +252,16 @@ def main(cfg) -> None: model.freeze() # Have to turn off activations_checkpoint_method for inference - model.model.module.language_model.encoder.activations_checkpoint_method = None + # Have to turn off activations_checkpoint_method for inference + try: + model.model.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + try: + model.model.module.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + length_params: LengthParam = { "max_length": cfg.inference.tokens_to_generate, "min_length": cfg.inference.min_tokens_to_generate, @@ -258,6 +276,7 @@ def main(cfg) -> None: "add_BOS": cfg.inference.add_BOS, "all_probs": cfg.inference.all_probs, "compute_logprob": cfg.inference.compute_logprob, + "end_strings": cfg.inference.end_strings, } with open(cfg.prompt_file, 'r') as f: @@ -272,9 +291,44 @@ def main(cfg) -> None: input_prompts=final_prompts, length_params=length_params, sampling_params=sampling_params, inference_config=cfg ) + # =================== Start Quantization ==================== + # see https://gitlab-master.nvidia.com/omniml/ammo/-/tree/main/examples/nemo/neva for details + if HAVE_AMMO and cfg.quantization.enable == True: + print(f"Using quantization algorithm: {cfg.quantization.algorithm}") + if cfg.quantization.algorithm == "int8_sq": + atq_config = atq.INT8_SMOOTHQUANT_CFG + elif cfg.quantization.algorithm == "fp8": + atq_config = atq.FP8_DEFAULT_CFG + elif cfg.quantization.algorithm == "awq": + atq_config = atq.INT4_AWQ_CFG + else: + raise ValueError(f"Unsupported quantization algorithm: {cfg.quantization.algorithm}") + + def forward_loop(): + model.generate( + input_prompts=final_prompts, + length_params=length_params, + sampling_params=sampling_params, + inference_config=cfg, + ) + + atq.quantize(model, atq_config, forward_loop) + + responses = model.generate( + input_prompts=final_prompts, + length_params=length_params, + sampling_params=sampling_params, + inference_config=cfg, + ) + # ============== Quantization End ========================= + results = [] for response, prompt in zip(responses, final_prompts): - prompt['response'] = response + prompt['full_text'] = response["clean_text"] + prompt['text'] = response["clean_response"] + prompt['model_id'] = cfg.neva_model_file + prompt['answer_id'] = 0 + prompt['metadata'] = {} results.append(prompt) with open(cfg.output_file, 'w') as f: diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py index 017af8deabb3..185be63b6310 100644 --- a/examples/multimodal/mllm/neva/neva_export.py +++ b/examples/multimodal/mllm/neva/neva_export.py @@ -43,7 +43,7 @@ def build_vision_encoder(model_path, clip_path, precision, bs_min, bs_opt, bs_max, out_dir): - torch_precision = torch.bfloat16 if precision == 'bf16' else torch.float16 + torch_precision = torch.bfloat16 if precision in ['bf16', 'bf16-mixed'] else torch.float16 with tempfile.TemporaryDirectory() as temp: LOGGER.info('Extracting model') @@ -62,10 +62,21 @@ def build_vision_encoder(model_path, clip_path, precision, bs_min, bs_opt, bs_ma vision_encoder = CLIPVisionModel.from_pretrained(clip_path, torch_dtype=torch_precision) image_size = vision_encoder.vision_model.config.image_size - new_state_dict = { - 'weight': state_dict['model.vision_connector.weight'], - 'bias': state_dict['model.vision_connector.bias'], - } + if 'model.vision_connector.weight' in state_dict: + new_state_dict = { + 'weight': state_dict['model.vision_connector.weight'], + 'bias': state_dict['model.vision_connector.bias'], + } + else: + new_state_dict = { + 'weight': state_dict[ + 'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear.weight' + ], + 'bias': state_dict[ + 'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear.bias' + ], + } + vision_connector.load_state_dict(new_state_dict) vision_connector = vision_connector.to(dtype=torch_precision) @@ -106,11 +117,11 @@ def output_names(self): wrapper = VisionEncoderWrapper(vision_encoder, vision_connector) - os.makedirs(f'./onnx/', exist_ok=True) + os.makedirs(f'/tmp/onnx/', exist_ok=True) dynamic_axes = {'images': {0: 'B'}} LOGGER.info('Exporting ONNX') - wrapper.export(f'./onnx/vision_encoder.onnx', dynamic_axes=dynamic_axes, onnx_opset_version=17) + wrapper.export(f'/tmp/onnx/vision_encoder.onnx', dynamic_axes=dynamic_axes, onnx_opset_version=17) LOGGER.info('Done') bsmin_example = wrapper.input_example(max_batch=bs_min) @@ -133,7 +144,7 @@ def output_names(self): LOGGER.info('Exporting TRT') engine = engine_from_network( network_from_onnx_path('./onnx/vision_encoder.onnx'), - config=CreateConfig(fp16=precision == 16, bf16=precision == 'bf16', profiles=[p],), + config=CreateConfig(fp16=precision in [16, '16', '16-mixed'], bf16=precision in ['bf16', 'bf16-mixed'], profiles=[p],), ) save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) @@ -147,7 +158,7 @@ def build_trtllm_engines( ): with tempfile.TemporaryDirectory() as temp_dir: gpt_example_path = f'{tekit_path}/examples/gpt' - build_precision = 'bfloat16' if precision == 'bf16' else 'float16' + build_precision = 'bfloat16' if precision in ['bf16', 'bf16-mixed'] else 'float16' LOGGER.info('Converting model weights') convert_command = [ 'python3', @@ -182,8 +193,8 @@ def build_trtllm_engines( f'--max_batch_size={max_batch_size}', f'--use_layernorm_plugin={build_precision}', f'--use_gemm_plugin={build_precision}', + f'--max_prompt_embedding_table_size={max_batch_size*max_input_len}', '--parallel_build', - '--embeddings_override', '--enable_context_fmha', '--remove_input_padding', '--log_level=verbose', @@ -195,7 +206,6 @@ def build_trtllm_engines( print(stdout.decode()) assert build_process.returncode == 0, stderr.decode() LOGGER.info('Done') - os.remove(os.path.join(out_dir, 'model.cache')) @hydra_runner(config_path='conf', config_name='neva_export') @@ -220,7 +230,7 @@ def main(cfg): build_vision_encoder( cfg.model.restore_from_path, cfg.infer.vision.clip, - precision, + 32, # WAR for TRT precision issue cfg.infer.vision.get('min_batch_size', 1), cfg.infer.vision.get('opt_batch_size', 1), cfg.infer.vision.get('max_batch_size', 1), diff --git a/examples/multimodal/mllm/neva/neva_finetune.py b/examples/multimodal/mllm/neva/neva_finetune.py index 83653abe753e..fa32e5e2d24b 100644 --- a/examples/multimodal/mllm/neva/neva_finetune.py +++ b/examples/multimodal/mllm/neva/neva_finetune.py @@ -37,11 +37,11 @@ def main(cfg) -> None: with open_dict(cfg): cfg.model.precision = cfg.trainer.precision - if cfg.model.restore_from_pretrained is None: + if cfg.model.restore_from_path is None: model = MegatronNevaModel(cfg.model, trainer) else: model = MegatronNevaModel.restore_from( - restore_path=cfg.model.restore_from_pretrained, + restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=cfg.model, save_restore_connector=NLPSaveRestoreConnector(), diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py index a86bb69c6809..1738c41c2e48 100644 --- a/examples/multimodal/mllm/neva/neva_peft.py +++ b/examples/multimodal/mllm/neva/neva_peft.py @@ -38,11 +38,11 @@ def main(cfg) -> None: with open_dict(cfg): cfg.model.precision = cfg.trainer.precision - if cfg.model.restore_from_pretrained is None: + if cfg.model.restore_from_path is None: model = MegatronNevaLoRAModel(cfg.model, trainer) else: model = MegatronNevaLoRAModel.restore_from( - restore_path=cfg.model.restore_from_pretrained, + restore_path=cfg.model.restore_from_path, trainer=trainer, override_config_path=cfg.model, save_restore_connector=NLPSaveRestoreConnector(), diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py index b1cbade03985..4d46bf4decf2 100644 --- a/nemo/collections/multimodal/data/neva/conversation.py +++ b/nemo/collections/multimodal/data/neva/conversation.py @@ -292,7 +292,7 @@ def dict(self): offset=0, sep_style=SeparatorStyle.TWO, sep=" ", - sep2="
", + sep2="", ) conv_llama_2 = Conversation( @@ -304,8 +304,8 @@ def dict(self): messages=(), offset=0, sep_style=SeparatorStyle.LLAMA_2, - sep="", - sep2="", + sep="", + sep2="", ) conv_llava_llama_2 = Conversation( @@ -317,8 +317,8 @@ def dict(self): messages=(), offset=0, sep_style=SeparatorStyle.LLAMA_2, - sep="", - sep2="", + sep="", + sep2="", ) conv_mpt = Conversation( @@ -367,7 +367,7 @@ def dict(self): offset=0, sep_style=SeparatorStyle.TWO, sep=" ", - sep2="
", + sep2="", ) conv_llava_v1_mmtag = Conversation( @@ -379,7 +379,7 @@ def dict(self): offset=0, sep_style=SeparatorStyle.TWO, sep=" ", - sep2="", + sep2="", version="v1_mmtag", ) diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py index 5b29f5c76a63..dc4a609f8d48 100644 --- a/nemo/collections/multimodal/data/neva/neva_dataset.py +++ b/nemo/collections/multimodal/data/neva/neva_dataset.py @@ -22,10 +22,11 @@ from nemo.collections.multimodal.data.kosmos.kosmos_dataset import tokenize_and_insert_media_tokens from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids +MAX_NUM_IMAGES = 4 IGNORE_INDEX = -1 DEFAULT_PAD_TOKEN = "" -DEFAULT_EOS_TOKEN = "" -DEFAULT_BOS_TOKEN = "" +DEFAULT_BOS_TOKEN = "" +DEFAULT_EOS_TOKEN = "" DEFAULT_UNK_TOKEN = "" DEFAULT_IMAGE_TOKEN = "" DEFAULT_SYSTEM_TOKEN = "" @@ -123,16 +124,18 @@ def preprocess_multimodal(sources: dict, multimodal_cfg: dict, cur_token_len: in + conversation[0]['value'] ) for turn in conversation: - replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len if multimodal_cfg['use_im_start_end']: - replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN + replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len + else: + replace_token = DEFAULT_IMAGE_PATCH_TOKEN * (image_token_len - 2) + replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN turn["value"] = turn["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) return sources def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: - conv = conversation_lib.default_conversation.copy() + conv = conversation_lib.conv_llava_llama_2.copy() roles = {"human": conv.roles[0], "gpt": conv.roles[1]} # Apply prompt templates @@ -160,15 +163,19 @@ def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenize add_extra_token=add_extra_token, ) + # llama tricks + tokens[tokens == 32003] = 0 # DEFAULT_IMAGE_PATCH_TOKEN + tokens[tokens == 32006] = 1 # + tokens[tokens == 32007] = 2 # labels = tokens.clone().detach() # Mask labels sep = "[/INST] " for conversation, target in zip(conversations, labels): rounds = conversation.split(conv.sep2) - cur_len = 1 - target[:cur_len] = IGNORE_INDEX + cur_len = 0 for i, rou in enumerate(rounds): + if rou == "": break @@ -177,14 +184,18 @@ def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenize break parts[0] += sep - round_len = len(tokenizer.text_to_ids(rou).tokens) - instruction_len = len(tokenizer.text_to_ids(parts[0]).tokens) - 2 - + round_len = len(tokenizer.text_to_ids(rou + conv.sep2)) + if i > 0: + round_len -= 1 # Remove extra token added by sp tokenizer + instruction_len = len(tokenizer.text_to_ids(parts[0])) - 1 target[cur_len : cur_len + instruction_len] = IGNORE_INDEX cur_len += round_len target[cur_len:] = IGNORE_INDEX + # Check if masking working correctly + # print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())]) + if add_extra_token: tokens = tokens[:, :-1].contiguous() labels = labels[:, 1:].contiguous() @@ -196,7 +207,7 @@ def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenize def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: - conv = conversation_lib.default_conversation.copy() + conv = conversation_lib.conv_vicuna_v1.copy() roles = {"human": conv.roles[0], "gpt": conv.roles[1]} # Apply prompt templates @@ -243,8 +254,8 @@ def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cf break parts[0] += sep - round_len = len(tokenizer.text_to_ids(rou).tokens) - instruction_len = len(tokenizer.text_to_ids(parts[0]).tokens) - 2 + round_len = len(tokenizer.text_to_ids(rou)) + instruction_len = len(tokenizer.text_to_ids(parts[0])) - 2 target[cur_len : cur_len + instruction_len] = IGNORE_INDEX @@ -280,13 +291,20 @@ def preprocess_nvgpt(sources: dict, tokenizer: transformers.PreTrainedTokenizer, if len(source['conversations']) >= 2: conv.roles = (source['conversations'][0]['from'], source['conversations'][1]['from']) + strip_end_for_inference = False for turn in source['conversations']: if 'label' in turn: value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value'] conv.append_message(turn['from'], value) + if not turn["value"]: + strip_end_for_inference = ( + True # in inference, current turn is empty, thus end tokens need to striped. + ) else: conv.append_message(turn['from'], turn['value']) context = conv.get_prompt() + if strip_end_for_inference: + context = context.rstrip("\n") + "\n" conversations.append(context) add_extra_token = cfg.get("add_extra_token") @@ -371,40 +389,51 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]: sources = [sources] assert len(sources) == 1, "Don't know why it is wrapped to a list" # FIXME if 'image' in sources[0]: - image_file = self.list_data_dict[i]['image'] - image = self.image_loader.open_image(image_file) - if image is None: - logging.warning(f"Image {image_file} could not be found!") - if self.multimodal_cfg['image_aspect_ratio'] == 'keep': - max_hw, min_hw = max(image.size), min(image.size) - aspect_ratio = max_hw / min_hw - max_len, min_len = 448, 224 - shortest_edge = int(min(max_len / aspect_ratio, min_len)) - image = processor.preprocess( - image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} - )['pixel_values'][0] - elif self.multimodal_cfg['image_aspect_ratio'] == 'pad': - - def expand2square(pil_img, background_color): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) - image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] - else: - image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] - cur_token_len = (image.shape[1] // 14) * (image.shape[2] // 14) # FIXME: 14 is hardcoded patch size - sources = preprocess_multimodal(copy.deepcopy(sources), self.multimodal_cfg, cur_token_len) + if not isinstance(self.list_data_dict[i]['image'], list): + self.list_data_dict[i]['image'] = [self.list_data_dict[i]['image']] + + images = [] + for image_file in self.list_data_dict[i]['image']: + image = self.image_loader.open_image(image_file) + if image is None: + logging.warning(f"Image {image_file} could not be found!") + if self.multimodal_cfg['image_aspect_ratio'] == 'keep': + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 448, 224 + shortest_edge = int(min(max_len / aspect_ratio, min_len)) + image = processor.preprocess( + image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} + )['pixel_values'][0] + elif self.multimodal_cfg['image_aspect_ratio'] == 'pad': + + def expand2square(pil_img, background_color): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + else: + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + images.append(image) + images_tensors = torch.tensor([]) + if images: + images_tensors = torch.stack(images) + cur_token_len = (images_tensors[0].shape[1] // 14) * ( + images_tensors[0].shape[2] // 14 + ) # FIXME: 14 is hardcoded patch size + sources = preprocess_multimodal(copy.deepcopy(sources), self.multimodal_cfg, cur_token_len) else: + images_tensors = torch.tensor([]) sources = copy.deepcopy(sources) if self.conv_template == "nvgpt": @@ -420,12 +449,14 @@ def expand2square(pil_img, background_color): data_dict = dict(tokens=data_dict["tokens"][0], labels=data_dict["labels"][0]) # image exist in the data - if 'image' in self.list_data_dict[i]: - data_dict['image'] = image - elif self.multimodal_cfg['is_multimodal']: - # image does not exist in the data, but the model is multimodal + if self.multimodal_cfg['is_multimodal']: crop_size = self.processor.crop_size - data_dict['image'] = torch.zeros(3, crop_size['height'], crop_size['width']) + # image does not exist in the data, but the model is multimodal + zero_padding = torch.zeros( + (MAX_NUM_IMAGES - len(images_tensors), 3, crop_size['height'], crop_size['width']), dtype=torch.float + ) + images_tensors = torch.cat((images_tensors, zero_padding), dim=0) + data_dict['image'] = images_tensors return data_dict @@ -447,20 +478,19 @@ def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, # This currently supports only a single image # search for tag + + record['image'] = [] for turn in record['conversations']: - # TODO (yuya): this is required? - if "image" not in record: - matches = re.finditer('', DEFAULT_IMAGE_TOKEN, turn['value']) + self.list_data_dict.append(record) else: @@ -505,7 +535,7 @@ def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: if media is None: raise NotImplementedError else: - media = rearrange(media, "b c h w -> b 1 1 c h w") + media = rearrange(media, "b T c h w -> b T 1 c h w") batch = { 'tokens': tokens, diff --git a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py index 52730c3f9c89..e4aaddd3214a 100644 --- a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py +++ b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py @@ -588,7 +588,9 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): forward_only=forward_only, tensor_shape=tensor_shape, dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None, + grad_scaler=self.trainer.precision_plugin.scaler.scale + if self.cfg.precision in [16, '16', '16-mixed'] + else None, sequence_parallel=self.cfg.get('sequence_parallel', False), enable_autocast=self.enable_autocast, no_sync_func=no_sync_func, @@ -672,7 +674,7 @@ def training_step(self, dataloader_iter, batch_idx): # we can avoid this broadcast by updating the PTL log function to accept specific ranks torch.distributed.broadcast(loss_mean, get_last_rank()) - if self.cfg.precision == 16: + if self.cfg.precision in [16, '16', '16-mixed']: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index b167121321c4..a8baed37338c 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -32,6 +32,8 @@ from transformers import CLIPVisionModel from nemo.collections.multimodal.data.neva.neva_dataset import ( + DEFAULT_BOS_TOKEN, + DEFAULT_EOS_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IM_START_TOKEN, DataCollatorForSupervisedDataset, @@ -83,7 +85,7 @@ from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone from nemo.core import adapter_mixins from nemo.core.classes.common import PretrainedModelInfo -from nemo.utils import AppState, logging +from nemo.utils import AppState, logging, model_utils try: import apex.transformer.pipeline_parallel.utils @@ -116,9 +118,9 @@ class FrozenCLIPVisionTransformer(CLIPVisionTransformer): - def __init__(self, model_cfg, pre_process=True, post_process=True): + def __init__(self, model_cfg, model_parallel_config, pre_process=True, post_process=True): super().__init__( - model_cfg, pre_process=pre_process, post_process=post_process, skip_head=True, + model_cfg, model_parallel_config, pre_process=pre_process, post_process=post_process, skip_head=True, ) self.frozen = False @@ -143,23 +145,44 @@ def freeze(self) -> None: self.frozen = True -class NevaEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): - def init_vision(self, vision_encoder, media_start_id, media_end_id, vision_select_layer=-1, class_token_length=1): +class NevaWordEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): + def init_vision( + self, + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=-1, + class_token_length=1, + use_im_start_end=False, + llama_tricks=False, + ): self.vision_encoder = vision_encoder self.from_hf = isinstance(vision_encoder, CLIPVisionModel) self.media_start_id = media_start_id self.media_end_id = media_end_id self.class_token_length = class_token_length + self.use_im_start_end = use_im_start_end self.vision_select_layer = vision_select_layer self.media = None self.set_accepted_adapter_types([MMLinearAdapterConfig._target_]) + self.llama_tricks = llama_tricks def set_media(self, media): self.media = media def forward(self, input_ids, **kwargs): media = self.media # avoid change the signature of embedding forward function - words_embeddings = super().forward(input_ids, **kwargs) + if self.llama_tricks and not self.use_im_start_end: + masked_input_ids = input_ids.detach().clone() + if self.num_embeddings < 32000: + raise ValueError("Not supported tokenizer with llama 2!") + else: + masked_input_ids[masked_input_ids >= 32000] = 0 + words_embeddings = super().forward(masked_input_ids, **kwargs) + + else: + words_embeddings = super().forward(input_ids, **kwargs) + return self.replace_media_embeddings(input_ids, words_embeddings, media) def encode_vision_x(self, vision_x: torch.Tensor): @@ -213,9 +236,15 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): padded_media_indices *= sequence_length for idx, input_id in enumerate(input_ids): media_end_positions = torch.where(input_id == self.media_end_id)[0] - # locate the first media token positions - padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches - assert (input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id).all() + if self.use_im_start_end: + # locate the first media token positions + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + assert ( + input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id + ).all() + else: + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + 1 + assert (input_id[padded_media_indices[idx, : len(media_end_positions)]] == self.media_start_id).all() # use indices to create a span padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( @@ -270,19 +299,23 @@ def __init__( vision_cfg = MegatronCLIPModel.restore_from( mm_cfg.vision_encoder.from_pretrained, return_config=True ).vision - vision_encoder = FrozenCLIPVisionTransformer(vision_cfg) + vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) if mm_cfg.vision_encoder.freeze: vision_encoder.freeze() + + model_type = self.mm_cfg.llm.get("model_type", "nvgpt") # Monkey patch embedding if kwargs.get("pre_process", True): - extend_instance(self.language_model.embedding.word_embeddings, NevaEmbeddingMixin) + extend_instance(self.language_model.embedding.word_embeddings, NevaWordEmbeddingMixin) self.language_model.embedding.word_embeddings.init_vision( vision_encoder, media_start_id, media_end_id, vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), + use_im_start_end=mm_cfg.get("use_im_start_end", False), + llama_tricks=(model_type == "llama_2"), ) def forward( @@ -385,6 +418,36 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if self.megatron_amp_O2: self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) + def get_all_keys(self,): + # TODO (yuya): p-tuning need additional handle, check peft models. + """ + Returns all the keys in the model + """ + k = [n for n, p in self.named_parameters()] + return set(k) + + def init_peft_modules(self): + """ + Randomly initialize the peft params and add them to the appropriate modules. + """ + assert len(self.peft_name_keys) > 0, "peft_name_keys have not been set no PEFT modules will be added" + assert len(self.name_key_to_cfg) > 0, "name_key_to_cfg has not been set no PEFT modules will be added" + logging.info(f"Before adding PEFT params:\n{self.summarize()}") + for _, module in self.named_modules(): + if isinstance(module, adapter_mixins.AdapterModuleMixin): + for peft_key in self.peft_name_keys: + peft_cfg = self.name_key_to_cfg[peft_key] + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_key, + cfg=peft_cfg, # TODO (yuya): override this line in gpt peft models due to a conf merging issue + ) + if self.megatron_amp_O2: + for adapter_name in getattr(module, 'adapter_layer', []): + module.adapter_layer[adapter_name] = module.adapter_layer[adapter_name].to(self.autocast_dtype) + logging.info(f"After adding PEFT params:\n{self.summarize()}") + return True + def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) @@ -495,7 +558,7 @@ def loss_func(output_tensor, loss_mask): raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Neva!") else: reduced_loss = average_losses_across_data_parallel_group([loss_for_ub]) - return loss_for_ub, dict(avg=reduced_loss[0]) + return loss_for_ub, dict(avg=reduced_loss[0].unsqueeze(0)) def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): batch = next(dataloader_iter) @@ -538,6 +601,7 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ batch['tokens'], batch['position_ids'], batch['attention_mask'], + None, # placehpolder for loss mask batch['labels'], batch.get('media'), checkpoint_activations_all_layers=checkpoint_activations_all_layers, @@ -696,9 +760,6 @@ def setup(self, stage=None): if self.cfg.get('transformer_engine', False): self.setup_transformer_engine_tp_groups() - if self.cfg.mm_cfg.llm.freeze: - self.setup_complete = True - def build_train_valid_test_datasets(self): logging.info('Building Neva datasets.') ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg,) @@ -792,6 +853,9 @@ def load_state_dict(self, state_dict, strict=False): logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') + def sharded_state_dict(self, prefix: str = ''): + return None + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: inference_config = self.get_inference_config() @@ -850,6 +914,6 @@ def dummy(): # Supports only one prompt at a time result = megatron_neva_generate(self.cuda(), input_prompts, length_params, sampling_params, inference_config) end = time.time() - print(f'Time taken {end - start}') + # print(f'Time taken {end - start}') return result diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index b6009e830064..b7e8daaddfa5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -274,7 +274,7 @@ def _build_tokenizer(self): if self._cfg.tokenizer.get('additional_special_tokens', None) is not None: tokens_list = omegaconf.OmegaConf.to_object(self._cfg.tokenizer.additional_special_tokens) - self.tokenizer.add_special_tokens({'additional_special_tokens': tokens_list}) + self.tokenizer.add_special_tokens(tokens_list) def on_train_start(self) -> None: super().on_train_start() diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index 5dab2a177a38..eba496994152 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -13,12 +13,14 @@ # limitations under the License. import abc +import copy import os import re import warnings from typing import List, Set, Tuple import torch +from transformers import CLIPImageProcessor from nemo.collections.nlp.modules.common.lm_utils import pad_batch from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids @@ -343,13 +345,12 @@ def prepare_batch_at_step( class NevaModelTextGenerationStrategy(TextGenerationStrategy): def __init__(self, model): super().__init__(model) - self.forward_model = self.model.model.module - self.num_media_latents = 576 # TODO: Need to obtain this from the config ideally + self.forward_model = self.model.model + self.num_media_latents = model.cfg.data.get("image_token_len", 576) self.tokenizer = self.model.tokenizer self.image_paths = [] - self.cfg = model.cfg - self.data_cfg = model.cfg.data - from transformers import CLIPImageProcessor + self.cfg = self.model.cfg + self.data_cfg = self.model.cfg.data if self.cfg.mm_cfg.vision_encoder.from_hf: self.processor = CLIPImageProcessor.from_pretrained( @@ -360,7 +361,19 @@ def __init__(self, model): "openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16 ) - self.model = model + add_extra_token = 0 + self.multimodal_cfg = dict( + is_multimodal=self.data_cfg.is_multimodal, + sep_image_conv_front=self.data_cfg.sep_image_conv_front, + conv_template=self.data_cfg.get("conv_template", "nvgpt"), + image_token_len=self.data_cfg.image_token_len, + image_folder=self.data_cfg.image_folder, + image_aspect_ratio=self.data_cfg.image_aspect_ratio, + use_im_start_end=getattr(self.cfg.mm_cfg, 'use_im_start_end', False), + image_processor=self.processor, + add_extra_token=add_extra_token, + context_length=self.cfg.encoder_seq_length, + ) def clip_max_len(self, maxlen: int) -> int: """ clip the max len based on the LM model max sequence length""" @@ -384,51 +397,53 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_ ) def process_prompts(self, prompt): - from nemo.collections.multimodal.data.neva.neva_dataset import DEFAULT_IMAGE_TOKEN, preprocess_nvgpt + from nemo.collections.multimodal.data.neva.neva_dataset import ( + DEFAULT_IMAGE_TOKEN, + preprocess_llama_2, + preprocess_multimodal, + preprocess_nvgpt, + ) list_data_dict = [] + if self.multimodal_cfg["conv_template"] == "nvgpt": + record = { + 'system': 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n', + 'conversations': [ + {'from': 'User', 'value': prompt,}, + { + 'from': 'Assistant', + 'value': '', + 'label': 'quality:6,toxicity:0,humor:0,creativity:0,violence:0,helpfulness:6,not_appropriate:0', + }, + ], + } - record = { - 'system': 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n', - 'conversations': [ - {'from': 'User', 'value': prompt,}, - { - 'from': 'Assistant', - 'value': '', - 'label': 'quality:8,toxicity:0,humor:0,creativity:0,violence:0,helpfulness:8,not_appropriate:0', - }, - ], - } - - for turn in record['conversations']: # - if turn.get('value') is not None: - turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) - list_data_dict.append(record) - - add_extra_token = 1 - if getattr(self.model.cfg, 'no_seqlen_plus_one_input_tokens', False): - add_extra_token = 0 - data_cfg = self.model.cfg.data - model_cfg = self.model.cfg + for turn in record['conversations']: # + if turn.get('value') is not None: + turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) + list_data_dict.append(record) - multimodal_cfg = dict( - is_multimodal=data_cfg.is_multimodal, - sep_image_conv_front=data_cfg.sep_image_conv_front, - image_token_len=data_cfg.image_token_len, - image_folder=data_cfg.image_folder, - image_aspect_ratio=data_cfg.image_aspect_ratio, - use_im_start_end=getattr(model_cfg.mm_cfg, 'use_im_start_end', False), - image_processor=self.processor, - add_extra_token=add_extra_token, - context_length=model_cfg.encoder_seq_length, - ) + sources = preprocess_multimodal( + copy.deepcopy(list_data_dict), self.multimodal_cfg, self.num_media_latents + ) # HARDCODED FOR NOW + data_dict = preprocess_nvgpt(sources, self.tokenizer, self.multimodal_cfg) - import copy + elif self.multimodal_cfg["conv_template"] == "llama_2": + record = { + 'conversations': [{'from': 'human', 'value': prompt,}, {'from': 'gpt', 'value': '',},], + } - from nemo.collections.multimodal.data.neva.neva_dataset import preprocess_multimodal + for turn in record['conversations']: # + if turn.get('value') is not None: + turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) + list_data_dict.append(record) - sources = preprocess_multimodal(copy.deepcopy(list_data_dict), multimodal_cfg, 576) # HARDCODED FOR NOW - data_dict = preprocess_nvgpt(sources, self.tokenizer, multimodal_cfg) + sources = preprocess_multimodal( + copy.deepcopy(list_data_dict), self.multimodal_cfg, self.num_media_latents + ) # HARDCODED FOR NOW + data_dict = preprocess_llama_2(sources, self.tokenizer, self.multimodal_cfg) + else: + raise ValueError(f"Conversation template `{self.conv_template}` is not supported in Neva now.") return data_dict['tokens'].tolist() def tokenize_batch(self, prompt, max_len, add_BOS): @@ -523,7 +538,7 @@ def prepare_batch_at_step( ) len_array = torch.tensor([maxlen] * micro_batch_size, device=torch.cuda.current_device()) batch = [tokens2use, attention_mask_repeat, positions2use, media, setkey_value_array, len_array] - tensor_shape = [tokens2use.shape[1], micro_batch_size, self.model.cfg.encoder_seq_length] + tensor_shape = [tokens2use.shape[1], micro_batch_size, self.model.cfg.hidden_size] return batch, tensor_shape diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 90ddcc5db9ae..5ccb51fa7d9f 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -16,6 +16,7 @@ import os import pickle +import re from collections.abc import Iterable from functools import partial from typing import Callable, Tuple @@ -144,12 +145,13 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_params, inference_config, **strategy_args): + conv_template = model.cfg.data.get("conv_template", "nvgpt") final_response = [] for idx, prompt_dict in enumerate(prompt_dict_list): img = os.path.join(inference_config.inference.images_base_path, prompt_dict['image']) response = generate( model, - inputs=prompt_dict['prompt'], + inputs=prompt_dict.get("prompt") or prompt_dict.get("text"), tokens_to_generate=length_params['max_length'], all_probs=sampling_params['all_probs'], temperature=sampling_params['temperature'], @@ -158,15 +160,40 @@ def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_para top_p=sampling_params['top_p'], greedy=sampling_params['use_greedy'], repetition_penalty=sampling_params['repetition_penalty'], + end_strings=sampling_params['end_strings'], min_tokens_to_generate=length_params['min_length'], image_list=img, **strategy_args, ) + + # Regular expression pattern to match the sequence + pattern = re.compile(r'( ⁇ )+') + clean_text = re.sub(pattern, '', response['sentences'][0]) + + clean_response = clean_text + for string in sampling_params['end_strings']: + clean_response = clean_response.rstrip(string) + if conv_template == "nvgpt": + labels_str_regexp = re.compile(f"quality:.*\n") + last_match_end_position = None + for match in re.finditer(labels_str_regexp, clean_response): + last_match_end_position = match.end() + if last_match_end_position is not None: + clean_response = clean_response[last_match_end_position:] + elif conv_template == "llama_2": + clean_response = clean_response.rsplit("[/INST] ", 1)[-1] + clean_response.strip() + response["clean_text"] = clean_text + response["clean_response"] = clean_response + final_response.append(response) + if torch.cuda.current_device() == 0: print(f"------------- PROMPT {idx} of {len(prompt_dict_list)} ------------ ") - print(response['sentences'][0].replace('', '')) + print(clean_text) + print() + print(f"CLEAN RESPONSE: {clean_response}") print("---------------------------------------------\n") - final_response.append(response) + return final_response @@ -737,8 +764,8 @@ def sample_sequence_batch( lengths = torch.ones([batch_size]).long().cuda() * maxlen + media_tensor = None if image_list is not None: - # media_tensor = inference_strategy.get_media_tensor(image_list) while context_length < maxlen: diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py index 2fbc6304f475..474e7328fa78 100644 --- a/tests/collections/multimodal/test_clip_model.py +++ b/tests/collections/multimodal/test_clip_model.py @@ -45,7 +45,7 @@ def model_cfg(): pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # multimodal configs output_dim: 64 local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py index f4f0c1379760..5f32b813b9df 100644 --- a/tests/collections/vision/test_vit_model.py +++ b/tests/collections/vision/test_vit_model.py @@ -40,7 +40,7 @@ def model_cfg(): pipeline_model_parallel_size: 1 # inter-layer model parallelism virtual_pipeline_model_parallel_size: null # interleaved pipeline - restore_from_pretrained: null # used in fine-tuning + restore_from_path: null # used in fine-tuning # vision configs vision_pretraining_type: "classify" From ee82ff5573dcd79f62988e84d07daf3ff598f32c Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Thu, 5 Oct 2023 12:42:55 -0700 Subject: [PATCH 228/512] Fix sampler length --- .../megatron/data_samplers.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py index fe0d28d3c3c2..266159fd44c9 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py @@ -79,12 +79,13 @@ def __init__( def __len__(self): num_available_samples: int = self.total_samples - self.consumed_samples - if self.drop_last: - return num_available_samples // self.micro_batch_times_data_parallel_size + if self.global_batch_size is not None: + if self.drop_last: + return num_available_samples // self.global_batch_size + else: + return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size else: - return ( - num_available_samples + self.global_batch_size if self.global_batch_size is None else 0 - 1 - ) // self.micro_batch_times_data_parallel_size + return (num_available_samples - 1) // self.micro_batch_times_data_parallel_size + 1 @abc.abstractmethod def __iter__(self): @@ -151,12 +152,16 @@ def __init__( def __len__(self): num_available_samples: int = self.total_samples - if self.drop_last: - return num_available_samples // self.micro_batch_times_data_parallel_size + if self.global_batch_size is not None: + if self.drop_last: + return num_available_samples // self.global_batch_size + else: + return (num_available_samples + self.global_batch_size - 1) // self.global_batch_size else: - return ( - num_available_samples + self.global_batch_size if self.global_batch_size is not None else 0 - 1 - ) // self.micro_batch_times_data_parallel_size + if self.drop_last: + return num_available_samples // self.micro_batch_times_data_parallel_size + else: + return (num_available_samples - 1) // self.micro_batch_times_data_parallel_size def __iter__(self): active_total_samples = self.total_samples - self.last_batch_size @@ -185,4 +190,4 @@ def __iter__(self): # Check the last partial batch and see drop_last is set if len(batch) > 0 and not self.drop_last: - yield batch + yield batch \ No newline at end of file From f39c629e2561673f06985c4bbc4ba9cd1f3e5355 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Thu, 21 Sep 2023 11:39:26 -0700 Subject: [PATCH 229/512] Fix all precision issues in nemo multimodal --- .../foundation/clip/megatron_clip_export.py | 2 +- .../generative/controlnet/controlnet_export.py | 2 +- .../generative/controlnet/controlnet_infer.py | 6 +++--- .../generative/dreambooth/dreambooth_export.py | 2 +- .../instruct_pix2pix/sd_edit_export.py | 2 +- .../generative/stable_diffusion/sd_export.py | 2 +- .../megatron_change_num_partitions.py | 2 +- .../nlp/language_modeling/megatron_gpt_test.py | 4 ++-- .../megatron_t5_prompt_learning.py | 2 +- .../megatron_vit_classification_export.py | 2 +- .../models/clip/megatron_clip_models.py | 2 +- .../multimodal/models/imagen/imagen.py | 2 +- .../models/instruct_pix2pix/ldm/ddpm_edit.py | 2 +- .../models/stable_diffusion/ldm/ddpm.py | 4 ++-- .../parts/stable_diffusion/pipeline.py | 4 ++-- .../modules/common/text_generation_strategy.py | 4 ++-- .../modules/common/text_generation_utils.py | 2 +- .../vision/data/megatron/vit_dataset.py | 7 +++---- .../megatron_vit_classification_models.py | 2 +- .../common/megatron/vision_transformer.py | 6 +++--- .../convert_hf_llama_to_nemo.py | 2 +- .../asr/test_asr_ctcencdec_model.py | 4 ++-- .../collections/multimodal/test_clip_model.py | 18 +++++++++--------- tests/collections/vision/test_vit_model.py | 18 +++++++++--------- 24 files changed, 51 insertions(+), 52 deletions(-) diff --git a/examples/multimodal/foundation/clip/megatron_clip_export.py b/examples/multimodal/foundation/clip/megatron_clip_export.py index 3dbf0fe0fe1d..a15df91922ed 100644 --- a/examples/multimodal/foundation/clip/megatron_clip_export.py +++ b/examples/multimodal/foundation/clip/megatron_clip_export.py @@ -137,7 +137,7 @@ def model_cfg_modifier(model_cfg): build_engine( f"{output_dir}/onnx/clip.onnx", f"{output_dir}/plan/clip.plan", - fp16=(trt_precision == 16), + fp16=(trt_precision in [16, '16', '16-mixed']), input_profile=input_profile, timing_cache=None, workspace_size=0, diff --git a/examples/multimodal/generative/controlnet/controlnet_export.py b/examples/multimodal/generative/controlnet/controlnet_export.py index c05579cac447..57f2f3c90b3e 100644 --- a/examples/multimodal/generative/controlnet/controlnet_export.py +++ b/examples/multimodal/generative/controlnet/controlnet_export.py @@ -39,7 +39,7 @@ def main(cfg): hint_image_size = cfg.infer.get('hint_image_size', 512) downsampling_factor = cfg.infer.get('down_factor', 8) fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) == "bf16": + if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: print("BF16 not supported for export, will use fp32") def model_cfg_modifier(model_cfg): diff --git a/examples/multimodal/generative/controlnet/controlnet_infer.py b/examples/multimodal/generative/controlnet/controlnet_infer.py index 156433b59c14..c050010a73c0 100644 --- a/examples/multimodal/generative/controlnet/controlnet_infer.py +++ b/examples/multimodal/generative/controlnet/controlnet_infer.py @@ -101,11 +101,11 @@ def pipeline(model, cfg, rng=None, verbose=True): control_image_preprocess = cfg.infer.get('control_image_preprocess', None) # get autocast_dtype - if cfg.trainer.precision == 'bf16': + if cfg.trainer.precision in ['bf16', 'bf16-mixed']: autocast_dtype = torch.bfloat16 - elif int(cfg.trainer.precision) == 32: + elif cfg.trainer.precision in [32, '32', '32-true']: autocast_dtype = torch.float - elif int(cfg.trainer.precision) == 16: + elif cfg.trainer.precision in [16, '16', '16-mixed']: autocast_dtype = torch.half else: raise ValueError('precision must be in [32, 16, "bf16"]') diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py index ae1c85edfbb5..5de4c038a9ea 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth_export.py +++ b/examples/multimodal/generative/dreambooth/dreambooth_export.py @@ -36,7 +36,7 @@ def main(cfg): width = cfg.infer.get('width', 512) downsampling_factor = cfg.infer.get('down_factor', 8) fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) == "bf16": + if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: print("BF16 not supported for export, will use fp32") def model_cfg_modifier(model_cfg): diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py index 8c83f213edaf..a97ed6e09a63 100644 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py @@ -62,7 +62,7 @@ def main(cfg): logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) == "bf16": + if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: print("BF16 not supported for export, will use fp32") with open_dict(cfg): edit_cfg = cfg.pop("edit") diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py index ec47f9b13bdd..9a2ff4c7fa64 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ b/examples/multimodal/generative/stable_diffusion/sd_export.py @@ -38,7 +38,7 @@ def main(cfg): width = cfg.infer.get('width', 512) downsampling_factor = cfg.infer.get('down_factor', 8) fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) == "bf16": + if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: print("BF16 not supported for export, will use fp32") def model_cfg_modifier(model_cfg): diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index a17f9313d36c..9b6aa2b3e5ef 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -858,7 +858,7 @@ def main(): logging.warning("BF16 is not supported on this device. Using FP16 instead.") precision = precision[2:] - if precision == 32: + if precision in [32, '32', '32-true']: dtype = torch.float32 elif precision in [16, "16", "16-mixed"]: dtype = torch.float16 diff --git a/examples/nlp/language_modeling/megatron_gpt_test.py b/examples/nlp/language_modeling/megatron_gpt_test.py index 5fc40039098b..62a1d40dbaed 100644 --- a/examples/nlp/language_modeling/megatron_gpt_test.py +++ b/examples/nlp/language_modeling/megatron_gpt_test.py @@ -34,7 +34,7 @@ def main(cfg) -> None: logging.info(f'\n{OmegaConf.to_yaml(cfg)}') trainer = None - if cfg.trainer.precision == 16: + if cfg.trainer.precision in [16, '16', '16-mixed']: trainer = Trainer( plugins=[ NLPMixedPrecisionPlugin( @@ -45,7 +45,7 @@ def main(cfg) -> None: strategy=NLPDDPStrategy(), **cfg.trainer, ) - elif cfg.trainer.precision == 'bf16': + elif cfg.trainer.precision in ['bf16', 'bf16-mixed']: trainer = Trainer(plugins=[NLPNativeBfloat16PrecisionPlugin(),], strategy=NLPDDPStrategy(), **cfg.trainer,) else: trainer = Trainer(plugins=[NLPPrecisionPlugin()], strategy=NLPDDPStrategy(), **cfg.trainer) diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py index ba335e39c225..1bef27352fb1 100644 --- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py +++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py @@ -50,7 +50,7 @@ def main(cfg) -> None: plugins = [] strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) - if cfg.trainer.precision == 16 or cfg.trainer.precision == '16-mixed': + if cfg.trainer.precision in [16, '16', '16-mixed'] or cfg.trainer.precision == '16-mixed': scaler = GradScaler( init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), growth_interval=cfg.model.get('native_amp_growth_interval', 1000), diff --git a/examples/vision/vision_transformer/megatron_vit_classification_export.py b/examples/vision/vision_transformer/megatron_vit_classification_export.py index 8a732e0a6558..06f6447bd788 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_export.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_export.py @@ -108,7 +108,7 @@ def dummy(): build_engine( f"{output_dir}/onnx/vit.onnx", f"{output_dir}/plan/vit.plan", - fp16=(trt_precision == 16), + fp16=(trt_precision in [16, '16', '16-mixed']), input_profile=input_profile, timing_cache=None, workspace_size=0, diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index f5db8a9cd177..e24e95e68af8 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -623,7 +623,7 @@ def training_step(self, dataloader_iter, batch_idx): # we can avoid this broadcast by updating the PTL log function to accept specific ranks torch.distributed.broadcast(loss_mean, get_last_rank()) - if self.cfg.precision == 16: + if self.cfg.precision in [16, '16', '16-mixed']: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py index 99bfba35fb65..64c1382e2d54 100644 --- a/nemo/collections/multimodal/models/imagen/imagen.py +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -418,7 +418,7 @@ def training_step(self, dataloader_iter, batch_idx): # so we all-reduce gradients after the pipeline self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - if self.cfg.precision == 16: + if self.cfg.precision in [16, '16', '16-mixed']: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py index 7cfd8be460b0..e25b0ecbe041 100644 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -144,7 +144,7 @@ def get_input( class MegatronLatentDiffusionEdit(MegatronLatentDiffusion): def model_provider_func(self, pre_process=True, post_process=True): """Model depends on pipeline paralellism.""" - model = LatentDiffusionEdit(cfg=self.cfg) + model = LatentDiffusionEdit(cfg=self.cfg, model_parallel_config=self.model_parallel_config) return model def setup(self, stage=None): diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py index fe91e0766e36..c88c7f932339 100644 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -1126,7 +1126,7 @@ def p_losses(self, x_start, cond, t, noise=None): else: raise NotImplementedError() - if (self.precision == 'bf16') or (int(self.precision) == 16): + if (self.precision in ['bf16', 'bf16-mixed']) or (self.precision in [16, '16', '16-mixed']): model_output = model_output.type(torch.float32) loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) @@ -1701,7 +1701,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] loss_tensor = torch.stack(loss_tensors_list) loss_dict[key] = loss_tensor.mean() - loss_mean = loss_dict["train/loss"] + loss_mean = loss_dict["val/loss"] if forward_only else loss_dict["train/loss"] else: raise NotImplementedError("Losses of micro batches sizes must be uniform!") else: diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py index 8fbbe16f6642..cdfd3c37300e 100644 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -106,9 +106,9 @@ def pipeline(model, cfg, verbose=True, rng=None): # get autocast_dtype if cfg.trainer.precision in ['bf16', 'bf16-mixed']: autocast_dtype = torch.bfloat16 - elif int(cfg.trainer.precision) in ['32', '32-tru']: + elif cfg.trainer.precision in [32, '32', '32-true']: autocast_dtype = torch.float - elif int(cfg.trainer.precision) in ['16', '16-mixed']: + elif cfg.trainer.precision in [16, '16', '16-mixed']: autocast_dtype = torch.half else: raise ValueError('precision must be in [32, 16, "bf16"]') diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index eba496994152..7702fe1c374d 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -488,9 +488,9 @@ def expand2square(pil_img, background_color): model_cfg = self.model.cfg - if model_cfg.precision == 16: + if model_cfg.precision in [16, '16', '16-mixed']: media = image.type(torch.float16) - elif model_cfg.precision == 32: + elif model_cfg.precision in [32, '32', '32-true']: media = image.type(torch.float32) else: media = image.type(torch.bfloat16) diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 5ccb51fa7d9f..7951ff563290 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -509,7 +509,7 @@ def synced_generate( precision = model._trainer.precision if precision in [16, "16"]: dtype = torch.float16 - elif precision == "bf16": + elif precision in ['bf16', 'bf16-mixed']: dtype = torch.bfloat16 else: dtype = torch.float32 diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py index 29852e70446b..649840872aa5 100644 --- a/nemo/collections/vision/data/megatron/vit_dataset.py +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -26,16 +26,15 @@ def _to_torch_data_type(precision): - if precision == 'bf16': + if precision in ['bf16', 'bf16-mixed']: return torch.bfloat16 - elif int(precision) == 16: + elif precision in [16, '16', '16-mixed']: return torch.float16 - elif int(precision) == 32: + elif precision in [32, '32', '32-true']: return torch.float32 else: raise ValueError(f"Cannot recognize precision {precision}") - class RandomSeedDataset(Dataset): def __init__(self, dataset, seed=1234): self.base_seed = seed diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index c9b69218c248..a1e4a52d8f20 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -436,7 +436,7 @@ def training_step(self, dataloader_iter, batch_idx): # we can avoid this broadcast by updating the PTL log function to accept specific ranks torch.distributed.broadcast(loss_mean, get_last_rank()) - if self.cfg.precision == 16: + if self.cfg.precision in [16, '16', '16-mixed']: loss_scale = self.trainer.precision_plugin.scaler._scale if loss_scale is not None: self.log('loss_scale', loss_scale, batch_size=1) diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py index 2a554647488d..792f0bdc4253 100644 --- a/nemo/collections/vision/modules/common/megatron/vision_transformer.py +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -314,11 +314,11 @@ class ParallelVisionTransformerLayer(ParallelVisionTransformerLayer_): def __init__(self, **kwargs): super(ParallelVisionTransformerLayer, self).__init__(**kwargs) precision = kwargs['precision'] - if precision == 'bf16': + if precision in ['bf16', 'bf16-mixed']: self.dtype = torch.bfloat16 - elif int(precision) == 16: + elif precision in [16, '16', '16-mixed']: self.dtype = torch.float16 - elif int(precision) == 32: + elif precision in [32, '32', '32-true']: self.dtype = torch.float32 else: raise ValueError(f"Cannot recognize precision {precision}") diff --git a/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py b/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py index c281088f8c5c..ad67a6e5aff1 100644 --- a/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py +++ b/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py @@ -168,7 +168,7 @@ def convert(args): else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - if precision == 32: + if precision in [32, '32', '32-true']: dtype = torch.float32 elif precision in [16, "16", "16-mixed"]: dtype = torch.float16 diff --git a/tests/collections/asr/test_asr_ctcencdec_model.py b/tests/collections/asr/test_asr_ctcencdec_model.py index 8d90079d0c51..02b2cadc8ab1 100644 --- a/tests/collections/asr/test_asr_ctcencdec_model.py +++ b/tests/collections/asr/test_asr_ctcencdec_model.py @@ -167,11 +167,11 @@ def test_change_conv_asr_se_context_window(self, asr_model): new_config = asr_model.cfg assert old_cfg.encoder.jasper[0].se_context_size == -1 - assert new_config.encoder.jasper[0].se_context_size == 32 + assert new_config.encoder.jasper[0].se_context_size in [32, '32', '32-true'] for name, m in asr_model.encoder.named_modules(): if type(m).__class__.__name__ == 'SqueezeExcite': - assert m.context_window == 32 + assert m.context_window in [32, '32', '32-true'] @pytest.mark.unit def test_change_conv_asr_se_context_window_no_config_update(self, asr_model): diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py index 474e7328fa78..781757c5869f 100644 --- a/tests/collections/multimodal/test_clip_model.py +++ b/tests/collections/multimodal/test_clip_model.py @@ -377,11 +377,11 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): trainer, clip_model = clip_trainer_and_model dtype = None - if clip_model.cfg['precision'] == 32: + if clip_model.cfg['precision'] in [32, '32', '32-true']: dtype = torch.float - elif clip_model.cfg['precision'] == 16: + elif clip_model.cfg['precision'] in [16, '16', '16-mixed']: dtype = torch.float16 - elif clip_model.cfg['precision'] == 'bf16': + elif clip_model.cfg['precision'] in ['bf16', 'bf16-mixed']: dtype = torch.bfloat16 else: raise ValueError(f"precision: {clip_model.cfg['precision']} is not supported.") @@ -420,11 +420,11 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): # ) # # dtype = None - # if trainer_cfg['precision'] == 32: + # if trainer_cfg['precision'] in [32, '32', '32-true']: # dtype = torch.float - # elif trainer_cfg['precision'] == 16: + # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: # dtype = torch.float16 - # elif trainer_cfg['precision'] == 'bf16': + # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: # dtype = torch.bfloat16 # else: # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") @@ -456,11 +456,11 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): # @pytest.mark.unit # def test_vit_head(self, model_cfg, trainer_cfg, precision): # dtype = None - # if trainer_cfg['precision'] == 32: + # if trainer_cfg['precision'] in [32, '32', '32-true']: # dtype = torch.float - # elif trainer_cfg['precision'] == 16: + # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: # dtype = torch.float16 - # elif trainer_cfg['precision'] == 'bf16': + # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: # dtype = torch.bfloat16 # else: # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py index 5f32b813b9df..e50106957679 100644 --- a/tests/collections/vision/test_vit_model.py +++ b/tests/collections/vision/test_vit_model.py @@ -281,11 +281,11 @@ def test_forward(self, vit_classification_trainer_and_model, test_data_dir): trainer, vit_classification_model = vit_classification_trainer_and_model dtype = None - if vit_classification_model.cfg['precision'] == 32: + if vit_classification_model.cfg['precision'] in [32, '32', '32-true']: dtype = torch.float - elif vit_classification_model.cfg['precision'] == 16: + elif vit_classification_model.cfg['precision'] in [16, '16', '16-mixed']: dtype = torch.float16 - elif vit_classification_model.cfg['precision'] == 'bf16': + elif vit_classification_model.cfg['precision'] in ['bf16', 'bf16-mixed']: dtype = torch.bfloat16 else: raise ValueError(f"precision: {vit_classification_model.cfg['precision']} is not supported.") @@ -323,11 +323,11 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): ) dtype = None - if trainer_cfg['precision'] == 32: + if trainer_cfg['precision'] in [32, '32', '32-true']: dtype = torch.float - elif trainer_cfg['precision'] == 16: + elif trainer_cfg['precision'] in [16, '16', '16-mixed']: dtype = torch.float16 - elif trainer_cfg['precision'] == 'bf16': + elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: dtype = torch.bfloat16 else: raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") @@ -357,11 +357,11 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): @pytest.mark.unit def test_vit_head(self, model_cfg, trainer_cfg, precision): dtype = None - if trainer_cfg['precision'] == 32: + if trainer_cfg['precision'] in [32, '32', '32-true']: dtype = torch.float - elif trainer_cfg['precision'] == 16: + elif trainer_cfg['precision'] in [16, '16', '16-mixed']: dtype = torch.float16 - elif trainer_cfg['precision'] == 'bf16': + elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: dtype = torch.bfloat16 else: raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") From 6a6c286ad787c08849f44a20edd068a5c7c844c4 Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Mon, 18 Sep 2023 18:44:36 -0400 Subject: [PATCH 230/512] Add rope dynamic linear scaling (#7437) * Add dynamic linear scaling Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yang Zhang --- .../megatron_gpt_continue_training.py | 4 +++- .../modules/common/megatron/language_model.py | 4 +++- .../rotary_position_embedding.py | 21 ++++++++++++++----- 3 files changed, 22 insertions(+), 7 deletions(-) mode change 100644 => 100755 examples/nlp/language_modeling/megatron_gpt_continue_training.py diff --git a/examples/nlp/language_modeling/megatron_gpt_continue_training.py b/examples/nlp/language_modeling/megatron_gpt_continue_training.py old mode 100644 new mode 100755 index 57abf91fc73d..31d7145e6bbb --- a/examples/nlp/language_modeling/megatron_gpt_continue_training.py +++ b/examples/nlp/language_modeling/megatron_gpt_continue_training.py @@ -61,7 +61,9 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.max_position_embeddings = cfg.model.max_position_embeddings gpt_cfg.seq_len_interpolation_factor = cfg.model.seq_len_interpolation_factor gpt_cfg.use_flash_attention = cfg.model.use_flash_attention - + assert ( + gpt_cfg.encoder_seq_length == gpt_cfg.max_position_embeddings * gpt_cfg.seq_len_interpolation_factor + ), 'seq_length should be equal to max_position_embedding * seq_len_interpolation_factor' # This is needed when modifying a hparam file directly to load `.ckpt` files. # This is not needed to modify the cfg in `.nemo` files. if add_cfg_to_tree: diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index 07d3b6744a2a..4807908dd1cc 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -563,7 +563,9 @@ def __init__( if rotary_percentage < 1: rotary_dim = int(rotary_dim * rotary_percentage) self.rotary_pos_emb = RotaryEmbedding( - rotary_dim, seq_len_interpolation_factor=seq_len_interpolation_factor + rotary_dim, + seq_len_interpolation_factor=seq_len_interpolation_factor, + pretrained_max_position_embeddings=max_position_embeddings, ) elif position_embedding_type == 'alibi': diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py index c97010ecb911..6dba53dd1e7a 100644 --- a/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py +++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py @@ -25,25 +25,36 @@ class RotaryEmbedding(nn.Module): Implements Rotary Position Embedding from https://arxiv.org/abs/2104.09864. """ - def __init__(self, dim: int, seq_len_interpolation_factor: int = None): + def __init__( + self, dim: int, seq_len_interpolation_factor: int = None, pretrained_max_position_embeddings: int = None + ): """ Args: dim (int): rotary embedding dimension seq_len_interpolation_factor (int): if not None, discrete positions will be interpolated by this factor via the trick in https://arxiv.org/abs/2306.15595. + pretrained_max_position_embeddings (int): pre-trained max_position_embeddings before position interpolation. """ super().__init__() self.seq_len_interpolation_factor = seq_len_interpolation_factor inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) self.register_buffer('inv_freq', inv_freq) + self.pretrained_max_position_embeddings = pretrained_max_position_embeddings def forward(self, max_seq_len, offset=0): seq = torch.arange(max_seq_len, device=self.inv_freq.device) + offset - if self.seq_len_interpolation_factor is not None: - seq = seq.type_as(self.inv_freq) - seq *= 1 / self.seq_len_interpolation_factor - freqs = einsum('i , j -> i j', seq.type_as(self.inv_freq), self.inv_freq) + seq = seq.type_as(self.inv_freq) + + if self.pretrained_max_position_embeddings is not None and self.seq_len_interpolation_factor is not None: + if max_seq_len > self.pretrained_max_position_embeddings * self.seq_len_interpolation_factor: + # dynamic linear scaling (length > position we have learned) + seq *= 1 / (max_seq_len / self.pretrained_max_position_embeddings) + else: + # fixed linear scaling + seq *= 1 / self.seq_len_interpolation_factor + + freqs = einsum('i , j -> i j', seq, self.inv_freq) # first part even vector components, second part odd vector components, # 2 * dim in dimension size emb = torch.cat((freqs, freqs), dim=-1) From 703d1ef9fb20874f8a8f1134c62a721ef6bc6673 Mon Sep 17 00:00:00 2001 From: Kunal Dhawan Date: Mon, 18 Sep 2023 17:48:37 -0700 Subject: [PATCH 231/512] Fix None dataloader issue in PTL2.0 (#7455) * Fix None dataloader issue in PTL2.0 Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: KunalDhawan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/core/classes/modelPT.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 0e68d4937282..6f5d0f90fe07 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -856,12 +856,18 @@ def train_dataloader(self): return self._train_dl def val_dataloader(self): - if self._validation_dl is not None: - return self._validation_dl + if self._validation_dl is None: + # None dataloader no longer supported in PTL2.0 + self._validation_dl = [] + + return self._validation_dl def test_dataloader(self): - if self._test_dl is not None: - return self._test_dl + if self._test_dl is None: + # None dataloader no longer supported in PTL2.0 + self._test_dl = [] + + return self._test_dl def on_validation_epoch_end(self) -> Optional[Dict[str, Dict[str, torch.Tensor]]]: """ From 88e2285439c483ccf3be8df87171301e57f875b8 Mon Sep 17 00:00:00 2001 From: Aleksandr Laptev Date: Tue, 19 Sep 2023 07:53:05 +0700 Subject: [PATCH 232/512] [ASR] Confidence measure -> method renames (#7434) * measure -> method Signed-off-by: Aleksandr Laptev * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Aleksandr Laptev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/asr/metrics/rnnt_wer.py | 28 ++-- nemo/collections/asr/metrics/rnnt_wer_bpe.py | 8 +- nemo/collections/asr/metrics/wer.py | 18 +-- nemo/collections/asr/metrics/wer_bpe.py | 8 +- .../asr/models/confidence_ensemble.py | 12 +- .../parts/submodules/ctc_greedy_decoding.py | 41 ++---- .../parts/submodules/rnnt_greedy_decoding.py | 128 +++++++----------- .../asr_confidence_benchmarking_utils.py | 6 +- .../asr/parts/utils/asr_confidence_utils.py | 105 ++++++-------- .../confidence_ensembles/build_ensemble.py | 4 +- .../confidence_ensembles/ensemble_config.yaml | 2 +- .../confidence/benchmark_asr_confidence.py | 8 +- .../asr/confidence/test_asr_confidence.py | 27 ++-- .../test_asr_hybrid_rnnt_ctc_model_char.py | 6 +- .../asr/test_asr_rnnt_encdec_model.py | 6 +- .../asr/test_confidence_ensembles.py | 6 +- tutorials/asr/ASR_Confidence_Estimation.ipynb | 14 +- 17 files changed, 172 insertions(+), 255 deletions(-) diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index fc083a2ab3f3..97c9c4575982 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -100,16 +100,16 @@ class AbstractRNNTDecoding(ConfidenceMixin): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -140,7 +140,7 @@ class AbstractRNNTDecoding(ConfidenceMixin): timestep during greedy decoding. Setting to larger values allows longer sentences to be decoded, at the cost of increased execution time. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. @@ -277,7 +277,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyTDTInfer( @@ -291,7 +291,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyMultiblankRNNTInfer( @@ -304,7 +304,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'greedy_batch': @@ -320,7 +320,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyBatchedTDTInfer( @@ -334,7 +334,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: @@ -348,7 +348,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'beam': @@ -1005,16 +1005,16 @@ class RNNTDecoding(AbstractRNNTDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1047,7 +1047,7 @@ class RNNTDecoding(AbstractRNNTDecoding): preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py index 40ae00b413b3..e8ea8f399b99 100644 --- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py +++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py @@ -100,16 +100,16 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -142,7 +142,7 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index 7802e3b8a0c9..14fa46b308ab 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -258,16 +258,16 @@ class AbstractCTCDecoding(ConfidenceMixin): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -300,7 +300,7 @@ class AbstractCTCDecoding(ConfidenceMixin): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. @@ -389,7 +389,7 @@ def __init__(self, decoding_cfg, blank_id: int): preserve_alignments=self.preserve_alignments, compute_timestamps=self.compute_timestamps, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'beam': @@ -1037,16 +1037,16 @@ class CTCDecoding(AbstractCTCDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1079,7 +1079,7 @@ class CTCDecoding(AbstractCTCDecoding): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py index 0a277e57e86a..b95bb62008ae 100644 --- a/nemo/collections/asr/metrics/wer_bpe.py +++ b/nemo/collections/asr/metrics/wer_bpe.py @@ -74,16 +74,16 @@ class CTCBPEDecoding(AbstractCTCDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -116,7 +116,7 @@ class CTCBPEDecoding(AbstractCTCDecoding): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py index bf65ff96ef5c..dcbb0a05976c 100644 --- a/nemo/collections/asr/models/confidence_ensemble.py +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -25,7 +25,7 @@ from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils.asr_confidence_utils import ( ConfidenceConfig, - ConfidenceMeasureConfig, + ConfidenceMethodConfig, get_confidence_aggregation_bank, get_confidence_measure_bank, ) @@ -61,7 +61,7 @@ def to_confidence_config(self) -> ConfidenceConfig: return ConfidenceConfig( exclude_blank=self.exclude_blank, aggregation=self.aggregation, - measure_cfg=ConfidenceMeasureConfig( + method_cfg=ConfidenceMethodConfig( name=name, entropy_type=entropy_type, alpha=self.alpha, entropy_norm=entropy_norm, ), ) @@ -126,7 +126,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) hypothesis: generated hypothesis as returned from the transcribe method of the ASR model. confidence_cfg: confidence config specifying what kind of - measure/aggregation should be used. + method/aggregation should be used. Returns: float: confidence score. @@ -135,12 +135,12 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) filtered_logprobs = get_filtered_logprobs(hypothesis, confidence_cfg.exclude_blank) vocab_size = filtered_logprobs.shape[1] aggr_func = get_confidence_aggregation_bank()[confidence_cfg.aggregation] - if confidence_cfg.measure_cfg.name == "max_prob": + if confidence_cfg.method_cfg.name == "max_prob": conf_type = "max_prob" alpha = 1.0 else: - conf_type = f"entropy_{confidence_cfg.measure_cfg.entropy_type}_{confidence_cfg.measure_cfg.entropy_norm}" - alpha = confidence_cfg.measure_cfg.alpha + conf_type = f"entropy_{confidence_cfg.method_cfg.entropy_type}_{confidence_cfg.method_cfg.entropy_norm}" + alpha = confidence_cfg.method_cfg.alpha conf_func = get_confidence_measure_bank()[conf_type] conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item() diff --git a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py index 1f29a511fc9c..44ae9f4a134b 100644 --- a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py @@ -19,7 +19,7 @@ from omegaconf import DictConfig, OmegaConf from nemo.collections.asr.parts.utils import rnnt_utils -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMethodConfig, ConfidenceMethodMixin from nemo.core.classes import Typing, typecheck from nemo.core.neural_types import HypothesisType, LengthsType, LogprobsType, NeuralType from nemo.utils import logging @@ -55,7 +55,7 @@ def _states_to_device(dec_state, device='cpu'): return dec_state -class GreedyCTCInfer(Typing, ConfidenceMeasureMixin): +class GreedyCTCInfer(Typing, ConfidenceMethodMixin): """A greedy CTC decoder. Provides a common abstraction for sample level and batch level greedy decoding. @@ -71,15 +71,15 @@ class GreedyCTCInfer(Typing, ConfidenceMeasureMixin): preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated during decoding. When set to true, the Hypothesis will contain the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of floats. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -130,7 +130,7 @@ def __init__( preserve_alignments: bool = False, compute_timestamps: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__() @@ -140,8 +140,8 @@ def __init__( self.compute_timestamps = compute_timestamps | preserve_frame_confidence self.preserve_frame_confidence = preserve_frame_confidence - # set confidence calculation measure - self._init_confidence_measure(confidence_measure_cfg) + # set confidence calculation method + self._init_confidence_method(confidence_method_cfg) @typecheck() def forward( @@ -253,27 +253,12 @@ class GreedyCTCInferConfig: preserve_alignments: bool = False compute_timestamps: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index dfa3ac27854b..185a3abf1151 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -35,7 +35,7 @@ from nemo.collections.asr.modules import rnnt_abstract from nemo.collections.asr.parts.utils import rnnt_utils -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMethodConfig, ConfidenceMethodMixin from nemo.collections.common.parts.rnn import label_collate from nemo.core.classes import Typing, typecheck from nemo.core.neural_types import AcousticEncodedRepresentation, ElementType, HypothesisType, LengthsType, NeuralType @@ -69,7 +69,7 @@ def _states_to_device(dec_state, device='cpu'): return dec_state -class _GreedyRNNTInfer(Typing, ConfidenceMeasureMixin): +class _GreedyRNNTInfer(Typing, ConfidenceMethodMixin): """A greedy transducer decoder. Provides a common abstraction for sample level and batch level greedy decoding. @@ -96,15 +96,15 @@ class _GreedyRNNTInfer(Typing, ConfidenceMeasureMixin): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -154,7 +154,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__() self.decoder = decoder_model @@ -166,8 +166,8 @@ def __init__( self.preserve_alignments = preserve_alignments self.preserve_frame_confidence = preserve_frame_confidence - # set confidence calculation measure - self._init_confidence_measure(confidence_measure_cfg) + # set confidence calculation method + self._init_confidence_method(confidence_method_cfg) def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) @@ -263,15 +263,15 @@ class GreedyRNNTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -305,7 +305,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -314,7 +314,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) @typecheck() @@ -502,15 +502,15 @@ class GreedyBatchedRNNTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -544,7 +544,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -553,7 +553,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) # Depending on availability of `blank_as_pad` support @@ -1478,15 +1478,15 @@ class GreedyMultiblankRNNTInfer(GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1521,7 +1521,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -1530,7 +1530,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.big_blank_durations = big_blank_durations self._SOS = blank_index - len(big_blank_durations) @@ -1682,15 +1682,15 @@ class GreedyBatchedMultiblankRNNTInfer(GreedyBatchedRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1725,7 +1725,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -1734,7 +1734,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.big_blank_durations = big_blank_durations @@ -2203,31 +2203,15 @@ class GreedyRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) - self.confidence_method_cfg = "DEPRECATED" @dataclass @@ -2235,31 +2219,15 @@ class GreedyBatchedRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) - self.confidence_method_cfg = "DEPRECATED" class GreedyTDTInfer(_GreedyRNNTInfer): @@ -2288,15 +2256,15 @@ class GreedyTDTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -2331,7 +2299,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -2340,7 +2308,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.durations = durations @@ -2544,15 +2512,15 @@ class GreedyBatchedTDTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -2587,7 +2555,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -2596,7 +2564,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.durations = durations diff --git a/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py index 958195a4bb11..0e057e012542 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py @@ -173,11 +173,11 @@ def apply_confidence_parameters(decoding_cfg, hp): """ new_decoding_cfg = copy.deepcopy(decoding_cfg) confidence_cfg_fields = ("aggregation", "exclude_blank") - confidence_measure_cfg_fields = ("name", "alpha", "entropy_type", "entropy_norm") + confidence_method_cfg_fields = ("name", "alpha", "entropy_type", "entropy_norm") with open_dict(new_decoding_cfg): for p, v in hp.items(): if p in confidence_cfg_fields: new_decoding_cfg.confidence_cfg[p] = v - elif p in confidence_measure_cfg_fields: - new_decoding_cfg.confidence_cfg.measure_cfg[p] = v + elif p in confidence_method_cfg_fields: + new_decoding_cfg.confidence_cfg.method_cfg[p] = v return new_decoding_cfg diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py index 29c49529a509..ddfac3744c6a 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py @@ -25,7 +25,7 @@ from nemo.utils import logging -class ConfidenceMeasureConstants: +class ConfidenceMethodConstants: NAMES = ("max_prob", "entropy") ENTROPY_TYPES = ("gibbs", "tsallis", "renyi") ENTROPY_NORMS = ("lin", "exp") @@ -48,17 +48,17 @@ def print(cls): @dataclass -class ConfidenceMeasureConfig: - """A Config which contains the measure name and settings to compute per-frame confidence scores. +class ConfidenceMethodConfig: + """A Config which contains the method name and settings to compute per-frame confidence scores. Args: - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -92,31 +92,25 @@ class ConfidenceMeasureConfig: def __post_init__(self): if self.temperature != "DEPRECATED": - logging.warning( - "`temperature` is deprecated and will be removed in the future. Please use `alpha` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `alpha` with the value of `temperature`.") # self.temperature has type str self.alpha = float(self.temperature) self.temperature = "DEPRECATED" - if self.name not in ConfidenceMeasureConstants.NAMES: + if self.name not in ConfidenceMethodConstants.NAMES: raise ValueError( f"`name` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.NAMES) + '`'}. Provided: `{self.name}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.NAMES) + '`'}. Provided: `{self.name}`" ) - if self.entropy_type not in ConfidenceMeasureConstants.ENTROPY_TYPES: + if self.entropy_type not in ConfidenceMethodConstants.ENTROPY_TYPES: raise ValueError( f"`entropy_type` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_TYPES) + '`'}. Provided: `{self.entropy_type}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.ENTROPY_TYPES) + '`'}. Provided: `{self.entropy_type}`" ) if self.alpha <= 0.0: raise ValueError(f"`alpha` must be > 0. Provided: {self.alpha}") - if self.entropy_norm not in ConfidenceMeasureConstants.ENTROPY_NORMS: + if self.entropy_norm not in ConfidenceMethodConstants.ENTROPY_NORMS: raise ValueError( f"`entropy_norm` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_NORMS) + '`'}. Provided: `{self.entropy_norm}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.ENTROPY_NORMS) + '`'}. Provided: `{self.entropy_norm}`" ) @@ -142,15 +136,15 @@ class ConfidenceConfig: from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -181,34 +175,19 @@ class ConfidenceConfig: preserve_word_confidence: bool = False exclude_blank: bool = True aggregation: str = "min" - measure_cfg: ConfidenceMeasureConfig = ConfidenceMeasureConfig() - method_cfg: str = "DEPRECATED" + method_cfg: ConfidenceMethodConfig = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.measure_cfg = OmegaConf.structured( - self.measure_cfg - if isinstance(self.measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.measure_cfg) + self.method_cfg = OmegaConf.structured( + self.method_cfg + if isinstance(self.method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.method_cfg) ) - if self.method_cfg != "DEPRECATED": - logging.warning( - "`method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `measure_cfg` with the value of `method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.measure_cfg = OmegaConf.structured( - self.method_cfg - if isinstance(self.method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.method_cfg) - ) - self.method_cfg = "DEPRECATED" if self.aggregation not in ConfidenceConstants.AGGREGATIONS: raise ValueError( f"`aggregation` has to be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.AGGREGATIONS) + '`'}. Provided: `{self.aggregation}`" + f"{'`' + '`, `'.join(ConfidenceConstants.AGGREGATIONS) + '`'}. Provided: `{self.aggregation}`" ) @@ -284,7 +263,7 @@ def entropy_gibbs_exp(x, v, t): def get_confidence_aggregation_bank(): """Generate a dictionary with confidence aggregation functions. - Supported confidence measures: + Supported confidence aggregation functions: min: minimum max: maximum mean: arithmetic mean @@ -305,26 +284,26 @@ def get_confidence_aggregation_bank(): return confidence_aggregation_bank -class ConfidenceMeasureMixin(ABC): - """Confidence Measure Mixin class. +class ConfidenceMethodMixin(ABC): + """Confidence Method Mixin class. - It initializes per-frame confidence measure. + It initializes per-frame confidence method. """ - def _init_confidence_measure(self, confidence_measure_cfg: Optional[DictConfig] = None): - """Initialize per-frame confidence measure from config. + def _init_confidence_method(self, confidence_method_cfg: Optional[DictConfig] = None): + """Initialize per-frame confidence method from config. """ # OmegaConf.structured ensures that post_init check is always executed - confidence_measure_cfg = OmegaConf.structured( - ConfidenceMeasureConfig() - if confidence_measure_cfg is None - else ConfidenceMeasureConfig(**confidence_measure_cfg) + confidence_method_cfg = OmegaConf.structured( + ConfidenceMethodConfig() + if confidence_method_cfg is None + else ConfidenceMethodConfig(**confidence_method_cfg) ) - # set confidence calculation measure + # set confidence calculation method # we suppose that self.blank_id == len(vocabulary) self.num_tokens = (self.blank_id if hasattr(self, "blank_id") else self._blank_index) + 1 - self.alpha = confidence_measure_cfg.alpha + self.alpha = confidence_method_cfg.alpha # init confidence measure bank self.confidence_measure_bank = get_confidence_measure_bank() @@ -332,14 +311,14 @@ def _init_confidence_measure(self, confidence_measure_cfg: Optional[DictConfig] measure = None # construct measure_name measure_name = "" - if confidence_measure_cfg.name == "max_prob": + if confidence_method_cfg.name == "max_prob": measure_name = "max_prob" - elif confidence_measure_cfg.name == "entropy": + elif confidence_method_cfg.name == "entropy": measure_name = '_'.join( - [confidence_measure_cfg.name, confidence_measure_cfg.entropy_type, confidence_measure_cfg.entropy_norm] + [confidence_method_cfg.name, confidence_method_cfg.entropy_type, confidence_method_cfg.entropy_norm] ) else: - raise ValueError(f"Unsupported `confidence_measure_cfg.name`: `{confidence_measure_cfg.name}`") + raise ValueError(f"Unsupported `confidence_method_cfg.name`: `{confidence_method_cfg.name}`") if measure_name not in self.confidence_measure_bank: raise ValueError(f"Unsupported measure setup: `{measure_name}`") measure = partial(self.confidence_measure_bank[measure_name], v=self.num_tokens, t=self.alpha) @@ -359,7 +338,7 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None): confidence_cfg = OmegaConf.structured( ConfidenceConfig() if confidence_cfg is None else ConfidenceConfig(**confidence_cfg) ) - self.confidence_measure_cfg = confidence_cfg.measure_cfg + self.confidence_method_cfg = confidence_cfg.method_cfg # extract the config self.preserve_word_confidence = confidence_cfg.get('preserve_word_confidence', False) @@ -384,11 +363,11 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None): if self.cfg.strategy in ['greedy', 'greedy_batch']: self.preserve_frame_confidence = self.cfg.greedy.get('preserve_frame_confidence', False) # OmegaConf.structured ensures that post_init check is always executed - confidence_measure_cfg = OmegaConf.structured(self.cfg.greedy).get('confidence_measure_cfg', None) - self.confidence_measure_cfg = ( - OmegaConf.structured(ConfidenceMeasureConfig()) - if confidence_measure_cfg is None - else OmegaConf.structured(ConfidenceMeasureConfig(**confidence_measure_cfg)) + confidence_method_cfg = OmegaConf.structured(self.cfg.greedy).get('confidence_method_cfg', None) + self.confidence_method_cfg = ( + OmegaConf.structured(ConfidenceMethodConfig()) + if confidence_method_cfg is None + else OmegaConf.structured(ConfidenceMethodConfig(**confidence_method_cfg)) ) @abstractmethod diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index bc32a4f99840..99bfa6187b30 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -97,7 +97,7 @@ ) from nemo.collections.asr.parts.utils.asr_confidence_utils import ( ConfidenceConfig, - ConfidenceMeasureConfig, + ConfidenceMethodConfig, get_confidence_aggregation_bank, get_confidence_measure_bank, ) @@ -214,7 +214,7 @@ class BuildEnsembleConfig: preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) temperature: float = 1.0 diff --git a/scripts/confidence_ensembles/ensemble_config.yaml b/scripts/confidence_ensembles/ensemble_config.yaml index 590318ee3b28..8184d4d5acb5 100644 --- a/scripts/confidence_ensembles/ensemble_config.yaml +++ b/scripts/confidence_ensembles/ensemble_config.yaml @@ -16,7 +16,7 @@ temperature: 1.0 confidence: exclude_blank: True aggregation: mean - measure_cfg: + method_cfg: name: entropy entropy_type: renyi alpha: 0.25 diff --git a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py index 8922fe09176d..246aa61c2c0e 100644 --- a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py +++ b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py @@ -83,11 +83,11 @@ def get_experiment_params(cfg): """ blank = "no_blank" if cfg.exclude_blank else "blank" aggregation = cfg.aggregation - method_name = cfg.measure_cfg.name - alpha = cfg.measure_cfg.alpha + method_name = cfg.method_cfg.name + alpha = cfg.method_cfg.alpha if method_name == "entropy": - entropy_type = cfg.measure_cfg.entropy_type - entropy_norm = cfg.measure_cfg.entropy_norm + entropy_type = cfg.method_cfg.entropy_type + entropy_norm = cfg.method_cfg.entropy_norm experiment_param_list = [ aggregation, str(cfg.exclude_blank), diff --git a/tests/collections/asr/confidence/test_asr_confidence.py b/tests/collections/asr/confidence/test_asr_confidence.py index 11b127424908..e95a0bd8127b 100644 --- a/tests/collections/asr/confidence/test_asr_confidence.py +++ b/tests/collections/asr/confidence/test_asr_confidence.py @@ -106,32 +106,21 @@ def test_run_confidence_benchmark( @pytest.mark.integration @pytest.mark.with_downloads @pytest.mark.parametrize('model_name', ("ctc", "rnnt")) - @pytest.mark.parametrize('arg', ("method_cfg", "temperature", "all")) - def test_deprecated_config_args(self, model_name, arg, conformer_ctc_bpe_model, conformer_rnnt_bpe_model): - assert ConfidenceConfig().measure_cfg.alpha == 0.33, "default `alpha` is supposed to be 0.33" + def test_deprecated_config_args(self, model_name, conformer_ctc_bpe_model, conformer_rnnt_bpe_model): + assert ConfidenceConfig().method_cfg.alpha == 0.33, "default `alpha` is supposed to be 0.33" model = conformer_ctc_bpe_model if model_name == "ctc" else conformer_rnnt_bpe_model assert isinstance(model, ASRModel) - if arg == "all": - conf = OmegaConf.create({"temperature": 0.5}) - test_args_main = {"method_cfg": conf} - test_args_greedy = {"confidence_method_cfg": conf} - elif arg == "method_cfg": - conf = OmegaConf.create({"alpha": 0.5}) - test_args_main = {"method_cfg": conf} - test_args_greedy = {"confidence_method_cfg": conf} - elif arg == "temperature": - conf = OmegaConf.create({"temperature": 0.5}) - test_args_main = {"measure_cfg": conf} - test_args_greedy = {"confidence_measure_cfg": conf} - else: - raise NotImplementedError(arg) + + conf = OmegaConf.create({"temperature": 0.5}) + test_args_main = {"method_cfg": conf} + test_args_greedy = {"confidence_method_cfg": conf} confidence_cfg = ConfidenceConfig(preserve_word_confidence=True, **test_args_main) model.change_decoding_strategy( RNNTDecodingConfig(fused_batch_size=-1, strategy="greedy", confidence_cfg=confidence_cfg) if model_name == "rnnt" else CTCDecodingConfig(confidence_cfg=confidence_cfg) ) - assert model.cfg.decoding.confidence_cfg.measure_cfg.alpha == 0.5 + assert model.cfg.decoding.confidence_cfg.method_cfg.alpha == 0.5 model.change_decoding_strategy( RNNTDecodingConfig( fused_batch_size=-1, @@ -141,4 +130,4 @@ def test_deprecated_config_args(self, model_name, arg, conformer_ctc_bpe_model, if model_name == "rnnt" else CTCDecodingConfig(greedy=GreedyCTCInferConfig(preserve_frame_confidence=True, **test_args_greedy)) ) - assert model.cfg.decoding.greedy.confidence_measure_cfg.alpha == 0.5 + assert model.cfg.decoding.greedy.confidence_method_cfg.alpha == 0.5 diff --git a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py index 8687ed683833..22926b6516ee 100644 --- a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py +++ b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py @@ -242,8 +242,7 @@ def test_decoding_change(self, hybrid_asr_model): @pytest.mark.unit def test_GreedyRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS @@ -257,8 +256,7 @@ def test_GreedyRNNTInferConfig(self): @pytest.mark.unit def test_GreedyBatchedRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index 775a146c74c4..68f1e38f797b 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -242,8 +242,7 @@ def test_decoding_change(self, asr_model): @pytest.mark.unit def test_GreedyRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS @@ -257,8 +256,7 @@ def test_GreedyRNNTInferConfig(self): @pytest.mark.unit def test_GreedyBatchedRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS diff --git a/tests/collections/asr/test_confidence_ensembles.py b/tests/collections/asr/test_confidence_ensembles.py index b8b027dd3426..e926475009e2 100644 --- a/tests/collections/asr/test_confidence_ensembles.py +++ b/tests/collections/asr/test_confidence_ensembles.py @@ -19,7 +19,7 @@ from nemo.collections.asr.metrics.wer import CTCDecodingConfig from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel, EncDecRNNTModel from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMeasureConfig +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMethodConfig def get_model_config(model_class): @@ -117,7 +117,7 @@ def test_model_creation_2models(self, tmp_path, model_class0, model_class1): preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) # just checking that no errors are raised when creating the model @@ -148,7 +148,7 @@ def test_model_creation_5models(self, tmp_path): preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) # just checking that no errors are raised when creating the model diff --git a/tutorials/asr/ASR_Confidence_Estimation.ipynb b/tutorials/asr/ASR_Confidence_Estimation.ipynb index 2a1ad024a889..7a92ed026f07 100644 --- a/tutorials/asr/ASR_Confidence_Estimation.ipynb +++ b/tutorials/asr/ASR_Confidence_Estimation.ipynb @@ -443,8 +443,8 @@ "from nemo.collections.asr.parts.utils.asr_confidence_utils import (\n", " ConfidenceConfig,\n", " ConfidenceConstants,\n", - " ConfidenceMeasureConfig,\n", - " ConfidenceMeasureConstants,\n", + " ConfidenceMethodConfig,\n", + " ConfidenceMethodConstants,\n", ")\n", "from nemo.collections.asr.parts.utils.asr_confidence_benchmarking_utils import (\n", " apply_confidence_parameters,\n", @@ -454,11 +454,11 @@ ")\n", "\n", "\n", - "# List allowed options for ConfidenceMeasureConfig and ConfidenceConfig\n", - "print(f\"Allowed options for ConfidenceMeasureConfig: {ConfidenceMeasureConstants.print()}\\n\")\n", + "# List allowed options for ConfidenceMethodConfig and ConfidenceConfig\n", + "print(f\"Allowed options for ConfidenceMethodConfig: {ConfidenceMethodConstants.print()}\\n\")\n", "print(f\"Allowed options for ConfidenceConfig: {ConfidenceConstants.print()}\\n\")\n", "\n", - "# Initialize ConfidenceConfig and ConfidenceMeasureConfig\n", + "# Initialize ConfidenceConfig and ConfidenceMethodConfig\n", "confidence_cfg = ConfidenceConfig(\n", " preserve_frame_confidence=True, # Internally set to true if preserve_token_confidence == True\n", " # or preserve_word_confidence == True\n", @@ -466,7 +466,7 @@ " preserve_word_confidence=True,\n", " aggregation=\"prod\", # How to aggregate frame scores to token scores and token scores to word scores\n", " exclude_blank=False, # If true, only non-blank emissions contribute to confidence scores\n", - " measure_cfg=ConfidenceMeasureConfig( # Config for per-frame scores calculation (before aggregation)\n", + " method_cfg=ConfidenceMethodConfig( # Config for per-frame scores calculation (before aggregation)\n", " name=\"max_prob\", # Or \"entropy\" (default), which usually works better\n", " entropy_type=\"gibbs\", # Used only for name == \"entropy\". Recommended: \"tsallis\" (default) or \"renyi\"\n", " alpha=0.5, # Low values (<1) increase sensitivity, high values decrease sensitivity\n", @@ -1058,7 +1058,7 @@ " preserve_word_confidence=True,\n", " preserve_token_confidence=True,\n", " aggregation=\"min\",\n", - " measure_cfg=DictConfig({\"entropy_type\": \"tsallis\", \"alpha\": 1.5, \"entropy_norm\": \"lin\"}),\n", + " method_cfg=DictConfig({\"entropy_type\": \"tsallis\", \"alpha\": 1.5, \"entropy_norm\": \"lin\"}),\n", ")\n", "\n", "model.change_decoding_strategy(\n", From 38942eee882a8a58795a719da3250b8a576afc92 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Wed, 20 Sep 2023 01:01:14 +1000 Subject: [PATCH 233/512] Add steps for document of getting dataset 'SF Bilingual Speech' (#7378) * Add steps for document of getting dataset 'SF Bilingual Speech' Signed-off-by: Robin Dong * Update datasets.rst added a link from a tutorial demonstrating detailed data prep steps. Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- docs/source/tts/datasets.rst | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/source/tts/datasets.rst b/docs/source/tts/datasets.rst index b5317ce01f64..dabf50b30dae 100644 --- a/docs/source/tts/datasets.rst +++ b/docs/source/tts/datasets.rst @@ -172,18 +172,24 @@ SFSpeech Chinese/English Bilingual Speech ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Dataset URL: https://catalog.ngc.nvidia.com/orgs/nvidia/resources/sf_bilingual_speech_zh_en * Dataset Processing Script: https://github.com/NVIDIA/NeMo/tree/stable/scripts/dataset_processing/tts/sfbilingual/get_data.py -* Command Line Instruction: +* Command Line Instruction: please refer details in Section 1 (NGC Registry CLI installation), Section 2 (Downloading SFSpeech Dataset), and Section 3 (Creatiung Data Manifests) from https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb. Below code block briefly describes the steps. .. code-block:: bash + # [prerequisite] Install and setup 'ngc' cli tool by following document https://docs.ngc.nvidia.com/cli/cmd.html + + $ ngc registry resource download-version "nvidia/sf_bilingual_speech_zh_en:v1" + + $ unzip sf_bilingual_speech_zh_en_vv1/SF_bilingual.zip -d + $ python scripts/dataset_processing/tts/sfbilingual/get_data.py \ - --data-root \ - --val-size 0.1 \ - --test-size 0.2 \ + --data-root /SF_bilingual \ + --val-size 0.005 \ + --test-size 0.01 \ --seed-for-ds-split 100 $ python scripts/dataset_processing/tts/extract_sup_data.py \ --config-path sfbilingual/ds_conf \ --config-name ds_for_fastpitch_align.yaml \ manifest_filepath= \ - sup_data_path= \ No newline at end of file + sup_data_path= From 4be356a63a88660af59dbfc222c2ccc61173a2dc Mon Sep 17 00:00:00 2001 From: Aleksandr Laptev Date: Tue, 19 Sep 2023 22:53:14 +0700 Subject: [PATCH 234/512] RNN-T confidence and alignment bugfix (#7381) * new frame_confidence and alignments lists are now always created after the while loop Signed-off-by: Aleksandr Laptev * tests added Signed-off-by: Aleksandr Laptev --------- Signed-off-by: Aleksandr Laptev --- .../parts/submodules/rnnt_greedy_decoding.py | 278 ++++++++---------- .../asr/test_asr_rnnt_encdec_model.py | 238 ++++++++++++++- 2 files changed, 363 insertions(+), 153 deletions(-) diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index 185a3abf1151..95b0bdf5fd13 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -441,13 +441,6 @@ def _greedy_decode( # If blank token is predicted, exit inner loop, move onto next timestep t if k == self._blank_index: not_blank = False - - if self.preserve_alignments: - # convert Ti-th logits into a torch array - hypothesis.alignments.append([]) # blank buffer for next timestep - - if self.preserve_frame_confidence: - hypothesis.frame_confidence.append([]) # blank buffer for next timestep else: # Append token to label set, update RNN state. hypothesis.y_sequence.append(k) @@ -459,6 +452,13 @@ def _greedy_decode( # Increment token counter. symbols_added += 1 + if self.preserve_alignments: + # convert Ti-th logits into a torch array + hypothesis.alignments.append([]) # blank buffer for next timestep + + if self.preserve_frame_confidence: + hypothesis.frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of Alignments if self.preserve_alignments: if len(hypothesis.alignments[-1]) == 0: @@ -642,9 +642,6 @@ def _greedy_decode_blank_as_pad( # frame_confidence is a 3-dimensional dangling list representing B x T x U for hyp in hypotheses: hyp.frame_confidence = [[]] - hyp.y_3best = [[]] - hyp.frame_confidence_3best = [[[]]] - hyp.logp = [[]] # Last Label buffer + Last Label without blank buffer # batch level equivalent of the last_label @@ -731,32 +728,6 @@ def _greedy_decode_blank_as_pad( # This is equivalent to if single sample predicted k if all_blanks: not_blank = False - - # If preserving alignments, convert the current Uj alignments into a torch.Tensor - # Then preserve U at current timestep Ti - # Finally, forward the timestep history to Ti+1 for that sample - # All of this should only be done iff the current time index <= sample-level AM length. - # Otherwise ignore and move to next sample / next timestep. - if self.preserve_alignments: - - # convert Ti-th logits into a torch array - for batch_idx in range(batchsize): - - # this checks if current timestep <= sample-level AM length - # If current timestep > sample-level AM length, no alignments will be added - # Therefore the list of Uj alignments is empty here. - if len(hypotheses[batch_idx].alignments[-1]) > 0: - hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep - - # Do the same if preserving per-frame confidence - if self.preserve_frame_confidence: - - for batch_idx in range(batchsize): - if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: - hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep - hypotheses[batch_idx].y_3best.append([]) - hypotheses[batch_idx].frame_confidence_3best.append([]) - hypotheses[batch_idx].logp.append([]) else: # Collect batch indices where blanks occurred now/past blank_indices = (blank_mask == 1).nonzero(as_tuple=False) @@ -791,6 +762,29 @@ def _greedy_decode_blank_as_pad( hypotheses[kidx].score += float(v[kidx]) symbols_added += 1 + # If preserving alignments, convert the current Uj alignments into a torch.Tensor + # Then preserve U at current timestep Ti + # Finally, forward the timestep history to Ti+1 for that sample + # All of this should only be done iff the current time index <= sample-level AM length. + # Otherwise ignore and move to next sample / next timestep. + if self.preserve_alignments: + + # convert Ti-th logits into a torch array + for batch_idx in range(batchsize): + + # this checks if current timestep <= sample-level AM length + # If current timestep > sample-level AM length, no alignments will be added + # Therefore the list of Uj alignments is empty here. + if len(hypotheses[batch_idx].alignments[-1]) > 0: + hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep + + # Do the same if preserving per-frame confidence + if self.preserve_frame_confidence: + + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: + hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of alignments at T_{am-len} x Uj if self.preserve_alignments: for batch_idx in range(batchsize): @@ -802,9 +796,6 @@ def _greedy_decode_blank_as_pad( for batch_idx in range(batchsize): if len(hypotheses[batch_idx].frame_confidence[-1]) == 0: del hypotheses[batch_idx].frame_confidence[-1] - del hypotheses[batch_idx].y_3best[-1] - del hypotheses[batch_idx].frame_confidence_3best[-1] - del hypotheses[batch_idx].logp[-1] # Preserve states for batch_idx in range(batchsize): @@ -946,29 +937,6 @@ def _greedy_decode_masked( # This is equivalent to if single sample predicted k if blank_mask.all(): not_blank = False - - # If preserving alignments, convert the current Uj alignments into a torch.Tensor - # Then preserve U at current timestep Ti - # Finally, forward the timestep history to Ti+1 for that sample - # All of this should only be done iff the current time index <= sample-level AM length. - # Otherwise ignore and move to next sample / next timestep. - if self.preserve_alignments: - - # convert Ti-th logits into a torch array - for batch_idx in range(batchsize): - - # this checks if current timestep <= sample-level AM length - # If current timestep > sample-level AM length, no alignments will be added - # Therefore the list of Uj alignments is empty here. - if len(hypotheses[batch_idx].alignments[-1]) > 0: - hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep - - # Do the same if preserving per-frame confidence - if self.preserve_frame_confidence: - - for batch_idx in range(batchsize): - if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: - hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep else: # Collect batch indices where blanks occurred now/past blank_indices = (blank_mask == 1).nonzero(as_tuple=False) @@ -1004,6 +972,29 @@ def _greedy_decode_masked( symbols_added += 1 + # If preserving alignments, convert the current Uj alignments into a torch.Tensor + # Then preserve U at current timestep Ti + # Finally, forward the timestep history to Ti+1 for that sample + # All of this should only be done iff the current time index <= sample-level AM length. + # Otherwise ignore and move to next sample / next timestep. + if self.preserve_alignments: + + # convert Ti-th logits into a torch array + for batch_idx in range(batchsize): + + # this checks if current timestep <= sample-level AM length + # If current timestep > sample-level AM length, no alignments will be added + # Therefore the list of Uj alignments is empty here. + if len(hypotheses[batch_idx].alignments[-1]) > 0: + hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep + + # Do the same if preserving per-frame confidence + if self.preserve_frame_confidence: + + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: + hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of alignments at T_{am-len} x Uj if self.preserve_alignments: for batch_idx in range(batchsize): @@ -1624,13 +1615,6 @@ def _greedy_decode( # If any type of blank token is predicted, exit inner loop, move onto next timestep t if k >= self._blank_index - len(self.big_blank_durations): not_blank = False - - if self.preserve_alignments: - # convert Ti-th logits into a torch array - hypothesis.alignments.append([]) # blank buffer for next timestep - - if self.preserve_frame_confidence: - hypothesis.frame_confidence.append([]) # blank buffer for next timestep else: # Append token to label set, update RNN state. hypothesis.y_sequence.append(k) @@ -1642,6 +1626,13 @@ def _greedy_decode( # Increment token counter. symbols_added += 1 + if self.preserve_alignments: + # convert Ti-th logits into a torch array + hypothesis.alignments.append([]) # blank buffer for next timestep + + if self.preserve_frame_confidence: + hypothesis.frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of Alignments if self.preserve_alignments: if len(hypothesis.alignments[-1]) == 0: @@ -1781,9 +1772,6 @@ def _greedy_decode_blank_as_pad( # frame_confidence is a 3-dimensional dangling list representing B x T x U for hyp in hypotheses: hyp.frame_confidence = [[]] - hyp.y_3best = [[]] - hyp.frame_confidence_3best = [[[]]] - hyp.logp = [[]] # Last Label buffer + Last Label without blank buffer # batch level equivalent of the last_label @@ -1897,40 +1885,6 @@ def _greedy_decode_blank_as_pad( # This is equivalent to if single sample predicted k if blank_mask.all(): not_blank = False - - for i in range(len(big_blank_masks) + 1): - # The task here is find the shortest blank duration of all batches. - # so we start from the shortest blank duration and go up, - # and stop once we found the duration whose corresponding mask isn't all True. - if i == len(big_blank_masks) or not big_blank_masks[i].all(): - big_blank_duration = self.big_blank_durations[i - 1] if i > 0 else 1 - break - - # If preserving alignments, convert the current Uj alignments into a torch.Tensor - # Then preserve U at current timestep Ti - # Finally, forward the timestep history to Ti+1 for that sample - # All of this should only be done iff the current time index <= sample-level AM length. - # Otherwise ignore and move to next sample / next timestep. - if self.preserve_alignments: - - # convert Ti-th logits into a torch array - for batch_idx in range(batchsize): - - # this checks if current timestep <= sample-level AM length - # If current timestep > sample-level AM length, no alignments will be added - # Therefore the list of Uj alignments is empty here. - if len(hypotheses[batch_idx].alignments[-1]) > 0: - hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep - - # Do the same if preserving per-frame confidence - if self.preserve_frame_confidence: - - for batch_idx in range(batchsize): - if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: - hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep - hypotheses[batch_idx].y_3best.append([]) - hypotheses[batch_idx].frame_confidence_3best.append([]) - hypotheses[batch_idx].logp.append([]) else: # Collect batch indices where blanks occurred now/past blank_indices = (blank_mask == 1).nonzero(as_tuple=False) @@ -1966,6 +1920,37 @@ def _greedy_decode_blank_as_pad( symbols_added += 1 + for i in range(len(big_blank_masks) + 1): + # The task here is find the shortest blank duration of all batches. + # so we start from the shortest blank duration and go up, + # and stop once we found the duration whose corresponding mask isn't all True. + if i == len(big_blank_masks) or not big_blank_masks[i].all(): + big_blank_duration = self.big_blank_durations[i - 1] if i > 0 else 1 + break + + # If preserving alignments, convert the current Uj alignments into a torch.Tensor + # Then preserve U at current timestep Ti + # Finally, forward the timestep history to Ti+1 for that sample + # All of this should only be done iff the current time index <= sample-level AM length. + # Otherwise ignore and move to next sample / next timestep. + if self.preserve_alignments: + + # convert Ti-th logits into a torch array + for batch_idx in range(batchsize): + + # this checks if current timestep <= sample-level AM length + # If current timestep > sample-level AM length, no alignments will be added + # Therefore the list of Uj alignments is empty here. + if len(hypotheses[batch_idx].alignments[-1]) > 0: + hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep + + # Do the same if preserving per-frame confidence + if self.preserve_frame_confidence: + + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: + hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of alignments at T_{am-len} x Uj if self.preserve_alignments: for batch_idx in range(batchsize): @@ -1977,9 +1962,6 @@ def _greedy_decode_blank_as_pad( for batch_idx in range(batchsize): if len(hypotheses[batch_idx].frame_confidence[-1]) == 0: del hypotheses[batch_idx].frame_confidence[-1] - del hypotheses[batch_idx].y_3best[-1] - del hypotheses[batch_idx].frame_confidence_3best[-1] - del hypotheses[batch_idx].logp[-1] # Preserve states for batch_idx in range(batchsize): @@ -2121,29 +2103,6 @@ def _greedy_decode_masked( # This is equivalent to if single sample predicted k if blank_mask.all(): not_blank = False - - # If preserving alignments, convert the current Uj alignments into a torch.Tensor - # Then preserve U at current timestep Ti - # Finally, forward the timestep history to Ti+1 for that sample - # All of this should only be done iff the current time index <= sample-level AM length. - # Otherwise ignore and move to next sample / next timestep. - if self.preserve_alignments: - - # convert Ti-th logits into a torch array - for batch_idx in range(batchsize): - - # this checks if current timestep <= sample-level AM length - # If current timestep > sample-level AM length, no alignments will be added - # Therefore the list of Uj alignments is empty here. - if len(hypotheses[batch_idx].alignments[-1]) > 0: - hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep - - # Do the same if preserving per-frame confidence - if self.preserve_frame_confidence: - - for batch_idx in range(batchsize): - if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: - hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep else: # Collect batch indices where blanks occurred now/past blank_indices = (blank_mask == 1).nonzero(as_tuple=False) @@ -2179,6 +2138,29 @@ def _greedy_decode_masked( symbols_added += 1 + # If preserving alignments, convert the current Uj alignments into a torch.Tensor + # Then preserve U at current timestep Ti + # Finally, forward the timestep history to Ti+1 for that sample + # All of this should only be done iff the current time index <= sample-level AM length. + # Otherwise ignore and move to next sample / next timestep. + if self.preserve_alignments: + + # convert Ti-th logits into a torch array + for batch_idx in range(batchsize): + + # this checks if current timestep <= sample-level AM length + # If current timestep > sample-level AM length, no alignments will be added + # Therefore the list of Uj alignments is empty here. + if len(hypotheses[batch_idx].alignments[-1]) > 0: + hypotheses[batch_idx].alignments.append([]) # blank buffer for next timestep + + # Do the same if preserving per-frame confidence + if self.preserve_frame_confidence: + + for batch_idx in range(batchsize): + if len(hypotheses[batch_idx].frame_confidence[-1]) > 0: + hypotheses[batch_idx].frame_confidence.append([]) # blank buffer for next timestep + # Remove trailing empty list of alignments at T_{am-len} x Uj if self.preserve_alignments: for batch_idx in range(batchsize): @@ -2443,19 +2425,6 @@ def _greedy_decode( # If blank token is predicted, exit inner loop, move onto next timestep t if k == self._blank_index: not_blank = False - - # this rarely happens, but we manually increment the `skip` number - # if blank is emitted and duration=0 is predicted. This prevents possible - # infinite loops. - if skip == 0: - skip = 1 - - if self.preserve_alignments: - # convert Ti-th logits into a torch array - hypothesis.alignments.append([]) # blank buffer for next timestep - - if self.preserve_frame_confidence: - hypothesis.frame_confidence.append([]) # blank buffer for next timestep else: # Append token to label set, update RNN state. hypothesis.y_sequence.append(k) @@ -2469,6 +2438,19 @@ def _greedy_decode( time_idx += skip need_loop = skip == 0 + # this rarely happens, but we manually increment the `skip` number + # if blank is emitted and duration=0 is predicted. This prevents possible + # infinite loops. + if skip == 0: + skip = 1 + + if self.preserve_alignments: + # convert Ti-th logits into a torch array + hypothesis.alignments.append([]) # blank buffer for next timestep + + if self.preserve_frame_confidence: + hypothesis.frame_confidence.append([]) # blank buffer for next timestep + if symbols_added == self.max_symbols: time_idx += 1 @@ -2652,9 +2634,6 @@ def _greedy_decode_blank_as_pad( # frame_confidence is a 3-dimensional dangling list representing B x T x U for hyp in hypotheses: hyp.frame_confidence = [[]] - hyp.y_3best = [[]] - hyp.frame_confidence_3best = [[[]]] - hyp.logp = [[]] # Last Label buffer + Last Label without blank buffer # batch level equivalent of the last_label @@ -2781,9 +2760,6 @@ def _greedy_decode_blank_as_pad( for batch_idx in range(batchsize): if len(hypotheses[batch_idx].frame_confidence[-1]) == 0: del hypotheses[batch_idx].frame_confidence[-1] - del hypotheses[batch_idx].y_3best[-1] - del hypotheses[batch_idx].frame_confidence_3best[-1] - del hypotheses[batch_idx].logp[-1] # Preserve states for batch_idx in range(batchsize): diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index 68f1e38f797b..12e08006a3e4 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy +from typing import Any, Dict, List, Optional, Tuple import pytest import torch @@ -31,6 +32,71 @@ ) or numba_utils.numba_cuda_is_supported(__NUMBA_MINIMUM_VERSION__) +@pytest.fixture() +def max_symbols_setup(): + from nemo.collections.asr.modules.rnnt_abstract import AbstractRNNTDecoder, AbstractRNNTJoint + from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis + + class DummyRNNTDecoder(AbstractRNNTDecoder): + def predict( + self, + y: Optional[torch.Tensor] = None, + state: Optional[torch.Tensor] = None, + add_sos: bool = False, + batch_size: Optional[int] = None, + ) -> Tuple[torch.Tensor, List[torch.Tensor]]: + if y is not None and state is not None: + return (y + state) / 2, y * state + elif state is not None: + return torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat(state.size()), state + elif y is not None: + return y, torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat(y.size()) + return ( + torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, 1, 1]), + torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, 1, 1]), + ) + + def initialize_state(self, y: torch.Tensor) -> List[torch.Tensor]: + return [torch.tensor()] + + def score_hypothesis( + self, hypothesis: Hypothesis, cache: Dict[Tuple[int], Any] + ) -> Tuple[torch.Tensor, List[torch.Tensor], torch.Tensor]: + return torch.tensor(), [torch.tensor()], torch.tensor() + + def batch_select_state(self, batch_states: List[torch.Tensor], idx: int) -> List[List[torch.Tensor]]: + if batch_states is not None: + states = batch_states[0][idx] + states = states.long() + return [states] + else: + return None + + def batch_copy_states( + self, + old_states: List[torch.Tensor], + new_states: List[torch.Tensor], + ids: List[int], + value: Optional[float] = None, + ) -> List[torch.Tensor]: + if value is None: + old_states[0][ids, :] = new_states[0][ids, :] + + return old_states + + class DummyRNNTJoint(AbstractRNNTJoint): + def joint(self, f: torch.Tensor, g: torch.Tensor) -> torch.Tensor: + return f.unsqueeze(dim=2) + g.unsqueeze(dim=1) + + setup = {} + setup["decoder"] = DummyRNNTDecoder(vocab_size=2, blank_idx=2, blank_as_pad=True) + setup["decoder_masked"] = DummyRNNTDecoder(vocab_size=2, blank_idx=2, blank_as_pad=False) + setup["joint"] = DummyRNNTJoint() + setup["encoder_output"] = torch.tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=torch.float32).transpose(1, 2) + setup["encoded_lengths"] = torch.tensor([3]) + return setup + + @pytest.fixture() def asr_model(): preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} @@ -589,11 +655,16 @@ def test_greedy_decoding_preserve_alignment(self, greedy_class): decoder = RNNTDecoder(prednet_cfg, vocab_size) + max_symbols_per_step = 5 for joint_type in [RNNTJoint, HATJoint]: joint_net = joint_type(jointnet_cfg, vocab_size, vocabulary=token_list) greedy = greedy_class( - decoder, joint_net, blank_index=len(token_list) - 1, preserve_alignments=True, max_symbols_per_step=5 + decoder, + joint_net, + blank_index=len(token_list), + preserve_alignments=True, + max_symbols_per_step=max_symbols_per_step, ) # (B, D, T) @@ -604,12 +675,175 @@ def test_greedy_decoding_preserve_alignment(self, greedy_class): hyp = greedy(encoder_output=enc_out, encoded_lengths=enc_len)[0][0] # type: rnnt_utils.Hypothesis assert hyp.alignments is not None + timestep_count = { + u.item(): c.item() for u, c in zip(*torch.unique(torch.tensor(hyp.timestep), return_counts=True)) + } for t in range(len(hyp.alignments)): - for u in range(len(hyp.alignments[t])): + + # check that the number of alignment elements is consistent with hyp.timestep + alignment_len = len(hyp.alignments[t]) + assert alignment_len <= max_symbols_per_step + if t in timestep_count: # non-blank + assert alignment_len == timestep_count[t] + (1 if alignment_len < max_symbols_per_step else 0) + else: # blank + assert alignment_len == 1 + + for u in range(alignment_len): logp, label = hyp.alignments[t][u] assert torch.is_tensor(logp) assert torch.is_tensor(label) + @pytest.mark.skipif( + not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', + ) + @pytest.mark.unit + @pytest.mark.parametrize( + "greedy_class", [greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyBatchedRNNTInfer], + ) + def test_greedy_decoding_preserve_frame_confidence(self, greedy_class): + token_list = [" ", "a", "b", "c"] + vocab_size = len(token_list) + + encoder_output_size = 4 + decoder_output_size = 4 + joint_output_shape = 4 + + prednet_cfg = {'pred_hidden': decoder_output_size, 'pred_rnn_layers': 1} + jointnet_cfg = { + 'encoder_hidden': encoder_output_size, + 'pred_hidden': decoder_output_size, + 'joint_hidden': joint_output_shape, + 'activation': 'relu', + } + + decoder = RNNTDecoder(prednet_cfg, vocab_size) + + max_symbols_per_step = 5 + for joint_type in [RNNTJoint, HATJoint]: + joint_net = joint_type(jointnet_cfg, vocab_size, vocabulary=token_list) + + greedy = greedy_class( + decoder, + joint_net, + blank_index=len(token_list), + preserve_alignments=True, + preserve_frame_confidence=True, + max_symbols_per_step=max_symbols_per_step, + ) + + # (B, D, T) + enc_out = torch.randn(1, encoder_output_size, 30) + enc_len = torch.tensor([30], dtype=torch.int32) + + with torch.no_grad(): + hyp = greedy(encoder_output=enc_out, encoded_lengths=enc_len)[0][0] # type: rnnt_utils.Hypothesis + assert hyp.frame_confidence is not None + + timestep_count = { + u.item(): c.item() for u, c in zip(*torch.unique(torch.tensor(hyp.timestep), return_counts=True)) + } + for t in range(len(hyp.frame_confidence)): + + # check that the number of confidence elements is consistent with hyp.timestep + confidence_len = len(hyp.frame_confidence[t]) + assert confidence_len <= max_symbols_per_step + if t in timestep_count: # non-blank + assert confidence_len == timestep_count[t] + ( + 1 if confidence_len < max_symbols_per_step else 0 + ) + else: # blank + assert confidence_len == 1 + + for u in range(confidence_len): + score = hyp.frame_confidence[t][u] + assert 0 <= score <= 1 + + @pytest.mark.skipif( + not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', + ) + @pytest.mark.unit + @pytest.mark.parametrize( + "greedy_class", [greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyBatchedRNNTInfer], + ) + @pytest.mark.parametrize("max_symbols_per_step", [0, 1, 5]) + def test_greedy_decoding_max_symbols_alignment(self, max_symbols_setup, greedy_class, max_symbols_per_step): + decoders = [max_symbols_setup["decoder"]] + if greedy_class is greedy_decode.GreedyBatchedRNNTInfer: + decoders.append(max_symbols_setup["decoder_masked"]) + joint = max_symbols_setup["joint"] + encoder_output = max_symbols_setup["encoder_output"] + encoded_lengths = max_symbols_setup["encoded_lengths"] + + for decoder in decoders: + greedy = greedy_class( + decoder_model=decoder, + joint_model=joint, + blank_index=decoder.blank_idx, + max_symbols_per_step=max_symbols_per_step, + preserve_alignments=True, + ) + + with torch.no_grad(): + hyp = greedy(encoder_output=encoder_output, encoded_lengths=encoded_lengths)[0][0] + assert hyp.alignments is not None + + timestep_count = { + u.item(): c.item() for u, c in zip(*torch.unique(torch.tensor(hyp.timestep), return_counts=True)) + } + for t in range(len(hyp.alignments)): + + # check that the number of confidence elements is consistent with hyp.timestep + alignment_len = len(hyp.alignments[t]) + assert alignment_len <= max_symbols_per_step + if t in timestep_count: # non-blank + assert alignment_len == timestep_count[t] + (1 if alignment_len < max_symbols_per_step else 0) + else: # blank or max_symbols_per_step == 0 + assert alignment_len <= 1 + + @pytest.mark.skipif( + not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', + ) + @pytest.mark.unit + @pytest.mark.parametrize( + "greedy_class", [greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyBatchedRNNTInfer], + ) + @pytest.mark.parametrize("max_symbols_per_step", [0, 1, 5]) + def test_greedy_decoding_max_symbols_confidence(self, max_symbols_setup, greedy_class, max_symbols_per_step): + decoders = [max_symbols_setup["decoder"]] + if greedy_class is greedy_decode.GreedyBatchedRNNTInfer: + decoders.append(max_symbols_setup["decoder_masked"]) + joint = max_symbols_setup["joint"] + encoder_output = max_symbols_setup["encoder_output"] + encoded_lengths = max_symbols_setup["encoded_lengths"] + + for decoder in decoders: + greedy = greedy_class( + decoder_model=decoder, + joint_model=joint, + blank_index=decoder.blank_idx, + max_symbols_per_step=max_symbols_per_step, + preserve_frame_confidence=True, + ) + + with torch.no_grad(): + hyp = greedy(encoder_output=encoder_output, encoded_lengths=encoded_lengths)[0][0] + assert hyp.frame_confidence is not None + + timestep_count = { + u.item(): c.item() for u, c in zip(*torch.unique(torch.tensor(hyp.timestep), return_counts=True)) + } + for t in range(len(hyp.frame_confidence)): + + # check that the number of confidence elements is consistent with hyp.timestep + confidence_len = len(hyp.frame_confidence[t]) + assert confidence_len <= max_symbols_per_step + if t in timestep_count: # non-blank + assert confidence_len == timestep_count[t] + ( + 1 if confidence_len < max_symbols_per_step else 0 + ) + else: # blank or max_symbols_per_step == 0 + assert confidence_len <= 1 + @pytest.mark.skipif( not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', ) From 85a8bf1eb0d71b352be9a5916808a5213e632555 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:44:46 -0600 Subject: [PATCH 235/512] Fix resume from checkpoint in exp_manager (#7424) (#7426) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Eric Harper --- nemo/utils/exp_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index 587c9898a695..70a2e2ebb94e 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -585,8 +585,8 @@ def check_resume( end_dist_checkpoints = [d for d in dist_checkpoints if d.match("*end")] last_dist_checkpoints = [d for d in dist_checkpoints if d.match("*last")] - end_checkpoints = end_dist_checkpoints if end_dist_checkpoints else list(checkpoint_dir.glob("*end.ckpt")) - last_checkpoints = last_dist_checkpoints if last_dist_checkpoints else list(checkpoint_dir.glob("*last.ckpt")) + end_checkpoints = end_dist_checkpoints if end_dist_checkpoints else list(checkpoint_dir.rglob("*end.ckpt")) + last_checkpoints = last_dist_checkpoints if last_dist_checkpoints else list(checkpoint_dir.rglob("*last.ckpt")) if not checkpoint_dir.exists() or (not len(end_checkpoints) > 0 and not len(last_checkpoints) > 0): if resume_ignore_no_checkpoint: From e6d8fa9acd8d107902303c948a384b93eed0e551 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Wed, 20 Sep 2023 02:49:18 +1000 Subject: [PATCH 236/512] Fix checking of cuda/cpu device for inputs of Decoder (#7444) * Fix checking of cuda/cpu device for inputs of Decoder Signed-off-by: Robin Dong * Update tacotron2.py Signed-off-by: Jason --------- Signed-off-by: Robin Dong Signed-off-by: Jason Co-authored-by: Jason --- nemo/collections/tts/modules/tacotron2.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/nemo/collections/tts/modules/tacotron2.py b/nemo/collections/tts/modules/tacotron2.py index 7a7c63eb5ad4..dc86074abbe2 100644 --- a/nemo/collections/tts/modules/tacotron2.py +++ b/nemo/collections/tts/modules/tacotron2.py @@ -312,11 +312,8 @@ def infer(self, *, memory, memory_lengths): self.initialize_decoder_states(memory, mask=mask) - mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32) - not_finished = torch.ones([memory.size(0)], dtype=torch.int32) - if torch.cuda.is_available(): - mel_lengths = mel_lengths.cuda() - not_finished = not_finished.cuda() + mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32).to(memory.device) + not_finished = torch.ones([memory.size(0)], dtype=torch.int32).to(memory.device) mel_outputs, gate_outputs, alignments = [], [], [] stepped = False From 701befeed516d47bd1cf6a061ab93414bb065138 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Wed, 20 Sep 2023 03:19:56 +1000 Subject: [PATCH 237/512] Fix failure of ljspeech's get_data.py (#7430) * Fix failure of ljspeech's get_data.py Signed-off-by: Robin Dong * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../tts/ljspeech/get_data.py | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/scripts/dataset_processing/tts/ljspeech/get_data.py b/scripts/dataset_processing/tts/ljspeech/get_data.py index d8a0b1c2834c..c8aeed5dbfca 100644 --- a/scripts/dataset_processing/tts/ljspeech/get_data.py +++ b/scripts/dataset_processing/tts/ljspeech/get_data.py @@ -27,11 +27,6 @@ def get_args(): parser = argparse.ArgumentParser(description='Download LJSpeech and create manifests with predefined split') parser.add_argument("--data-root", required=True, type=Path) - parser.add_argument( - '--whitelist-path', - type=str, - default="lj_speech.tsv extracted from the readme file in the dataset. You can also download the file from https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv", - ) args = parser.parse_args() return args @@ -57,20 +52,9 @@ def __extract_file(filepath, data_dir): print(f"Error while extracting {filepath}. Already extracted?") -def __process_data(data_root, whitelist_path): - if whitelist_path is None: - wget.download( - "https://raw.githubusercontent.com/NVIDIA/NeMo-text-processing/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv", - out=str(data_root), - ) - whitelist_path = data_root / "lj_speech.tsv" - +def __process_data(data_root): text_normalizer = Normalizer( - lang="en", - input_case="cased", - whitelist=whitelist_path, - overwrite_cache=True, - cache_dir=data_root / "cache_dir", + lang="en", input_case="cased", overwrite_cache=True, cache_dir=data_root / "cache_dir", ) text_normalizer_call_kwargs = {"punct_pre_process": True, "punct_post_process": True} normalizer_call = lambda x: text_normalizer.normalize(x, **text_normalizer_call_kwargs) @@ -117,9 +101,8 @@ def main(): __extract_file(str(tarred_data_path), str(args.data_root)) data_root = args.data_root / "LJSpeech-1.1" - whitelist_path = args.whitelist_path - __process_data(data_root, whitelist_path) + __process_data(data_root) if __name__ == '__main__': From 16bcf5aaa89b67b1acee22bb26fc57ab82495f96 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Tue, 19 Sep 2023 10:56:27 -0700 Subject: [PATCH 238/512] [TTS] Fix audio codec type checks (#7373) * [TTS] Fix audio codec type checks Signed-off-by: Ryan * [TTS] Fix audio codec tests Signed-off-by: Ryan --------- Signed-off-by: Ryan --- .../tts/losses/audio_codec_loss.py | 6 +- nemo/collections/tts/models/audio_codec.py | 6 +- .../tts/modules/audio_codec_modules.py | 28 ++++---- .../tts/modules/encodec_modules.py | 64 +++++++++++-------- .../tts/modules/test_audio_codec_modules.py | 6 +- 5 files changed, 61 insertions(+), 49 deletions(-) diff --git a/nemo/collections/tts/losses/audio_codec_loss.py b/nemo/collections/tts/losses/audio_codec_loss.py index bde96fadb4c2..8819282f07bd 100644 --- a/nemo/collections/tts/losses/audio_codec_loss.py +++ b/nemo/collections/tts/losses/audio_codec_loss.py @@ -40,8 +40,8 @@ def __init__(self, loss_fn, loss_scale: float = 1.0): @property def input_types(self): return { - "target": NeuralType(('B', 'D', 'T'), RegressionValuesType()), "predicted": NeuralType(('B', 'D', 'T'), PredictionsType()), + "target": NeuralType(('B', 'D', 'T'), RegressionValuesType()), "target_len": NeuralType(tuple('B'), LengthsType()), } @@ -97,7 +97,7 @@ def input_types(self): @property def output_types(self): return { - "loss": [NeuralType(elements_type=LossType())], + "loss": NeuralType(elements_type=LossType()), } @typecheck() @@ -146,7 +146,7 @@ def input_types(self): @property def output_types(self): return { - "loss": [NeuralType(elements_type=LossType())], + "loss": NeuralType(elements_type=LossType()), } @typecheck() diff --git a/nemo/collections/tts/models/audio_codec.py b/nemo/collections/tts/models/audio_codec.py index 6414fa20e52d..63140b77f2b5 100644 --- a/nemo/collections/tts/models/audio_codec.py +++ b/nemo/collections/tts/models/audio_codec.py @@ -484,9 +484,11 @@ def configure_optimizers(self): sched_config = optim_config.pop("sched", None) OmegaConf.set_struct(optim_config, True) - gen_params = itertools.chain(self.audio_encoder.parameters(), self.audio_decoder.parameters()) - disc_params = self.discriminator.parameters() + vq_params = self.vector_quantizer.parameters() if self.vector_quantizer else [] + gen_params = itertools.chain(self.audio_encoder.parameters(), self.audio_decoder.parameters(), vq_params) optim_g = instantiate(optim_config, params=gen_params) + + disc_params = self.discriminator.parameters() optim_d = instantiate(optim_config, params=disc_params) if sched_config is None: diff --git a/nemo/collections/tts/modules/audio_codec_modules.py b/nemo/collections/tts/modules/audio_codec_modules.py index aaf4fb0a7f21..90c53b1f4337 100644 --- a/nemo/collections/tts/modules/audio_codec_modules.py +++ b/nemo/collections/tts/modules/audio_codec_modules.py @@ -12,15 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Optional, Tuple +from typing import Optional, Tuple -import torch import torch.nn as nn -from einops import rearrange from nemo.collections.tts.parts.utils.helpers import mask_sequence_tensor +from nemo.core.classes.common import typecheck from nemo.core.classes.module import NeuralModule -from nemo.core.neural_types.elements import AudioSignal, EncodedRepresentation, LengthsType, VoidType +from nemo.core.neural_types.elements import LengthsType, VoidType from nemo.core.neural_types.neural_type import NeuralType @@ -64,21 +63,22 @@ def __init__( def input_types(self): return { "inputs": NeuralType(('B', 'C', 'T'), VoidType()), - "lengths": NeuralType(tuple('B'), LengthsType()), + "input_len": NeuralType(tuple('B'), LengthsType()), } @property def output_types(self): return { - "out": [NeuralType(('B', 'C', 'T'), VoidType())], + "out": NeuralType(('B', 'C', 'T'), VoidType()), } def remove_weight_norm(self): nn.utils.remove_weight_norm(self.conv) - def forward(self, inputs, lengths): + @typecheck() + def forward(self, inputs, input_len): out = self.conv(inputs) - out = mask_sequence_tensor(out, lengths) + out = mask_sequence_tensor(out, input_len) return out @@ -101,21 +101,22 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride def input_types(self): return { "inputs": NeuralType(('B', 'C', 'T'), VoidType()), - "lengths": NeuralType(tuple('B'), LengthsType()), + "input_len": NeuralType(tuple('B'), LengthsType()), } @property def output_types(self): return { - "out": [NeuralType(('B', 'C', 'T'), VoidType())], + "out": NeuralType(('B', 'C', 'T'), VoidType()), } def remove_weight_norm(self): nn.utils.remove_weight_norm(self.conv) - def forward(self, inputs, lengths): + @typecheck() + def forward(self, inputs, input_len): out = self.conv(inputs) - out = mask_sequence_tensor(out, lengths) + out = mask_sequence_tensor(out, input_len) return out @@ -151,11 +152,12 @@ def input_types(self): @property def output_types(self): return { - "out": [NeuralType(('B', 'C', 'H', 'T'), VoidType())], + "out": NeuralType(('B', 'C', 'H', 'T'), VoidType()), } def remove_weight_norm(self): nn.utils.remove_weight_norm(self.conv) + @typecheck() def forward(self, inputs): return self.conv(inputs) diff --git a/nemo/collections/tts/modules/encodec_modules.py b/nemo/collections/tts/modules/encodec_modules.py index 031b2001e5ca..b05187ccb74b 100644 --- a/nemo/collections/tts/modules/encodec_modules.py +++ b/nemo/collections/tts/modules/encodec_modules.py @@ -72,13 +72,13 @@ def __init__(self, channels: int): def input_types(self): return { "inputs": NeuralType(('B', 'C', 'T_input'), VoidType()), - "lengths": NeuralType(tuple('B'), LengthsType()), + "input_len": NeuralType(tuple('B'), LengthsType()), } @property def output_types(self): return { - "out": [NeuralType(('B', 'C', 'T_out'), VoidType())], + "out": NeuralType(('B', 'C', 'T_out'), VoidType()), } def remove_weight_norm(self): @@ -86,14 +86,15 @@ def remove_weight_norm(self): self.res_conv1.remove_weight_norm() self.res_conv2.remove_weight_norm() - def forward(self, inputs, lengths): + @typecheck() + def forward(self, inputs, input_len): res = self.activation(inputs) - res = self.res_conv1(res, lengths) + res = self.res_conv1(inputs=res, input_len=input_len) res = self.activation(res) - res = self.res_conv2(res, lengths) + res = self.res_conv2(inputs=res, input_len=input_len) - out = self.pre_conv(inputs, lengths) + res - out = mask_sequence_tensor(out, lengths) + out = self.pre_conv(inputs=inputs, input_len=input_len) + res + out = mask_sequence_tensor(out, input_len) return out @@ -112,20 +113,21 @@ def __init__(self, dim: int, num_layers: int, rnn_type: str = "lstm", use_skip: def input_types(self): return { "inputs": NeuralType(('B', 'C', 'T'), VoidType()), - "lengths": NeuralType(tuple('B'), LengthsType()), + "input_len": NeuralType(tuple('B'), LengthsType()), } @property def output_types(self): return { - "out": [NeuralType(('B', 'C', 'T'), VoidType())], + "out": NeuralType(('B', 'C', 'T'), VoidType()), } - def forward(self, inputs, lengths): + @typecheck() + def forward(self, inputs, input_len): inputs = rearrange(inputs, "B C T -> B T C") packed_inputs = nn.utils.rnn.pack_padded_sequence( - inputs, lengths=lengths.cpu(), batch_first=True, enforce_sorted=False + inputs, lengths=input_len.cpu(), batch_first=True, enforce_sorted=False ) packed_out, _ = self.rnn(packed_inputs) out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True) @@ -183,15 +185,15 @@ def __init__( @property def input_types(self): return { - "audio": NeuralType(('B', 'C', 'T_audio'), AudioSignal()), + "audio": NeuralType(('B', 'T_audio'), AudioSignal()), "audio_len": NeuralType(tuple('B'), LengthsType()), } @property def output_types(self): return { - "encoded": [NeuralType(('B', 'D', 'T_encoded'), EncodedRepresentation())], - "encoded_len": [NeuralType(tuple('B'), LengthsType())], + "encoded": NeuralType(('B', 'D', 'T_encoded'), EncodedRepresentation()), + "encoded_len": NeuralType(tuple('B'), LengthsType()), } def remove_weight_norm(self): @@ -201,26 +203,27 @@ def remove_weight_norm(self): for down_sample_conv in self.down_sample_conv_layers: down_sample_conv.remove_weight_norm() + @typecheck() def forward(self, audio, audio_len): encoded_len = audio_len audio = rearrange(audio, "B T -> B 1 T") # [B, C, T_audio] - out = self.pre_conv(audio, encoded_len) + out = self.pre_conv(inputs=audio, input_len=encoded_len) for res_block, down_sample_conv, down_sample_rate in zip( self.res_blocks, self.down_sample_conv_layers, self.down_sample_rates ): # [B, C, T] - out = res_block(out, encoded_len) + out = res_block(inputs=out, input_len=encoded_len) out = self.activation(out) encoded_len = encoded_len // down_sample_rate # [B, 2 * C, T / down_sample_rate] - out = down_sample_conv(out, encoded_len) + out = down_sample_conv(inputs=out, input_len=encoded_len) - out = self.rnn(out, encoded_len) + out = self.rnn(inputs=out, input_len=encoded_len) out = self.activation(out) # [B, encoded_dim, T_encoded] - encoded = self.post_conv(out, encoded_len) + encoded = self.post_conv(inputs=out, input_len=encoded_len) return encoded, encoded_len @@ -274,7 +277,7 @@ def input_types(self): @property def output_types(self): return { - "audio": NeuralType(('B', 'C', 'T_audio'), AudioSignal()), + "audio": NeuralType(('B', 'T_audio'), AudioSignal()), "audio_len": NeuralType(tuple('B'), LengthsType()), } @@ -285,23 +288,24 @@ def remove_weight_norm(self): for res_block in self.res_blocks: res_block.remove_weight_norm() + @typecheck() def forward(self, inputs, input_len): audio_len = input_len # [B, C, T_encoded] - out = self.pre_conv(inputs, audio_len) - out = self.rnn(out, audio_len) + out = self.pre_conv(inputs=inputs, input_len=audio_len) + out = self.rnn(inputs=out, input_len=audio_len) for res_block, up_sample_conv, up_sample_rate in zip( self.res_blocks, self.up_sample_conv_layers, self.up_sample_rates ): audio_len = audio_len * up_sample_rate out = self.activation(out) # [B, C / 2, T * up_sample_rate] - out = up_sample_conv(out, audio_len) - out = res_block(out, audio_len) + out = up_sample_conv(inputs=out, input_len=audio_len) + out = res_block(inputs=out, input_len=audio_len) out = self.activation(out) # [B, 1, T_audio] - out = self.post_conv(out, audio_len) + out = self.post_conv(inputs=out, input_len=audio_len) audio = self.out_activation(out) audio = rearrange(audio, "B 1 T -> B T") return audio, audio_len @@ -356,6 +360,7 @@ def output_types(self): "fmap": [NeuralType(('B', 'D', 'T_spec', 'C'), VoidType())], } + @typecheck() def forward(self, audio): fmap = [] @@ -363,11 +368,11 @@ def forward(self, audio): out = self.stft(audio) for conv in self.conv_layers: # [batch, filters, T_spec, fft // 2**i] - out = conv(out) + out = conv(inputs=out) out = self.activation(out) fmap.append(out) # [batch, 1, T_spec, fft // 8] - scores = self.conv_post(out) + scores = self.conv_post(inputs=out) fmap.append(scores) scores = rearrange(scores, "B 1 T C -> B C T") @@ -382,7 +387,7 @@ def __init__(self, resolutions): @property def input_types(self): return { - "audio": NeuralType(('B', 'T_audio'), AudioSignal()), + "audio_real": NeuralType(('B', 'T_audio'), AudioSignal()), "audio_gen": NeuralType(('B', 'T_audio'), AudioSignal()), } @@ -395,6 +400,7 @@ def output_types(self): "fmaps_gen": [[NeuralType(('B', 'D', 'T_spec', 'C'), VoidType())]], } + @typecheck() def forward(self, audio_real, audio_gen): scores_real = [] scores_gen = [] @@ -627,6 +633,7 @@ def output_types(self): "indices": NeuralType(('B', 'T'), Index()), } + @typecheck() def forward(self, inputs, input_len): input_flat = rearrange(inputs, "B T D -> (B T) D") self._init_codes(input_flat) @@ -746,6 +753,7 @@ def output_types(self): "commit_loss": NeuralType((), LossType()), } + @typecheck() def forward(self, inputs: Tensor, input_len: Tensor) -> Tuple[Tensor, Tensor, float]: commit_loss = 0.0 residual = rearrange(inputs, "B D T -> B T D") diff --git a/tests/collections/tts/modules/test_audio_codec_modules.py b/tests/collections/tts/modules/test_audio_codec_modules.py index 948b1220f39c..4650a6508edd 100644 --- a/tests/collections/tts/modules/test_audio_codec_modules.py +++ b/tests/collections/tts/modules/test_audio_codec_modules.py @@ -40,7 +40,7 @@ def test_conv1d(self): lengths = torch.tensor([self.len1, self.len2], dtype=torch.int32) conv = Conv1dNorm(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=self.kernel_size) - out = conv(inputs, lengths) + out = conv(inputs=inputs, input_len=lengths) assert out.shape == (self.batch_size, self.out_channels, self.max_len) assert torch.all(out[0, :, : self.len1] != 0.0) @@ -66,7 +66,7 @@ def test_conv1d_downsample(self): stride=stride, padding=padding, ) - out = conv(inputs, lengths) + out = conv(inputs=inputs, input_len=lengths) assert out.shape == (self.batch_size, self.out_channels, out_len) assert torch.all(out[0, :, :out_len_1] != 0.0) @@ -87,7 +87,7 @@ def test_conv1d_transpose_upsample(self): conv = ConvTranspose1dNorm( in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=self.kernel_size, stride=stride ) - out = conv(inputs, lengths) + out = conv(inputs=inputs, input_len=lengths) assert out.shape == (self.batch_size, self.out_channels, out_len) assert torch.all(out[0, :, :out_len_1] != 0.0) From b9f2cfe7f4f6f23b24ab67320b4c3baa5eb105e4 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Wed, 20 Sep 2023 09:03:59 -0700 Subject: [PATCH 239/512] [TTS] Add dataset to path of logged artifacts (#7462) * [TTS] Add dataset to path of logged artifacts Signed-off-by: Ryan * [TTS] Revert axis name back to Audio Frames Signed-off-by: Ryan --------- Signed-off-by: Ryan --- .../tts/data/text_to_speech_dataset.py | 12 +- nemo/collections/tts/data/vocoder_dataset.py | 13 +- nemo/collections/tts/parts/utils/callbacks.py | 185 ++++++++++++------ 3 files changed, 149 insertions(+), 61 deletions(-) diff --git a/nemo/collections/tts/data/text_to_speech_dataset.py b/nemo/collections/tts/data/text_to_speech_dataset.py index 23ddb50346a2..8530356c5cd0 100644 --- a/nemo/collections/tts/data/text_to_speech_dataset.py +++ b/nemo/collections/tts/data/text_to_speech_dataset.py @@ -46,6 +46,7 @@ class DatasetMeta: @dataclass class DatasetSample: + dataset_name: str manifest_entry: Dict[str, Any] audio_dir: Path feature_dir: Path @@ -180,6 +181,7 @@ def _preprocess_manifest( speaker_index = 0 sample = DatasetSample( + dataset_name=dataset_name, manifest_entry=entry, audio_dir=Path(dataset.audio_dir), feature_dir=Path(dataset.feature_dir), @@ -204,7 +206,12 @@ def __getitem__(self, index): audio, _ = librosa.load(audio_filepath_abs, sr=self.sample_rate) tokens = self.text_tokenizer(data.text) - example = {"audio_filepath": audio_filepath_rel, "audio": audio, "tokens": tokens} + example = { + "dataset_name": data.dataset_name, + "audio_filepath": audio_filepath_rel, + "audio": audio, + "tokens": tokens, + } if data.speaker is not None: example["speaker"] = data.speaker @@ -229,6 +236,7 @@ def __getitem__(self, index): return example def collate_fn(self, batch: List[dict]): + dataset_name_list = [] audio_filepath_list = [] audio_list = [] audio_len_list = [] @@ -238,6 +246,7 @@ def collate_fn(self, batch: List[dict]): prior_list = [] for example in batch: + dataset_name_list.append(example["dataset_name"]) audio_filepath_list.append(example["audio_filepath"]) audio_tensor = torch.tensor(example["audio"], dtype=torch.float32) @@ -264,6 +273,7 @@ def collate_fn(self, batch: List[dict]): batch_tokens = stack_tensors(token_list, max_lens=[token_max_len], pad_value=self.text_tokenizer.pad) batch_dict = { + "dataset_names": dataset_name_list, "audio_filepaths": audio_filepath_list, "audio": batch_audio, "audio_lens": batch_audio_len, diff --git a/nemo/collections/tts/data/vocoder_dataset.py b/nemo/collections/tts/data/vocoder_dataset.py index 6bf03068a395..a5a30870dfff 100644 --- a/nemo/collections/tts/data/vocoder_dataset.py +++ b/nemo/collections/tts/data/vocoder_dataset.py @@ -43,6 +43,7 @@ class DatasetMeta: @dataclass class DatasetSample: + dataset_name: str manifest_entry: dict audio_dir: Path @@ -165,7 +166,7 @@ def _preprocess_manifest( samples = [] sample_weights = [] for entry in filtered_entries: - sample = DatasetSample(manifest_entry=entry, audio_dir=Path(dataset.audio_dir),) + sample = DatasetSample(dataset_name=dataset_name, manifest_entry=entry, audio_dir=Path(dataset.audio_dir)) samples.append(sample) sample_weights.append(dataset.sample_weight) @@ -182,7 +183,12 @@ def __getitem__(self, index): audio, audio_len = self._sample_audio(audio_filepath_abs) - example = {"audio_filepath": audio_filepath_rel, "audio": audio, "audio_len": audio_len} + example = { + "dataset_name": data.dataset_name, + "audio_filepath": audio_filepath_rel, + "audio": audio, + "audio_len": audio_len, + } for processor in self.feature_processors: processor.process(example) @@ -190,11 +196,13 @@ def __getitem__(self, index): return example def collate_fn(self, batch: List[dict]): + dataset_name_list = [] audio_filepath_list = [] audio_list = [] audio_len_list = [] for example in batch: + dataset_name_list.append(example["dataset_name"]) audio_filepath_list.append(example["audio_filepath"]) audio_list.append(example["audio"]) audio_len_list.append(example["audio_len"]) @@ -205,6 +213,7 @@ def collate_fn(self, batch: List[dict]): batch_audio = stack_tensors(audio_list, max_lens=[audio_max_len]) batch_dict = { + "dataset_names": dataset_name_list, "audio_filepaths": audio_filepath_list, "audio": batch_audio, "audio_lens": batch_audio_len, diff --git a/nemo/collections/tts/parts/utils/callbacks.py b/nemo/collections/tts/parts/utils/callbacks.py index c1be48bdaa3d..09e5c41112f3 100644 --- a/nemo/collections/tts/parts/utils/callbacks.py +++ b/nemo/collections/tts/parts/utils/callbacks.py @@ -27,6 +27,7 @@ from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.loggers.logger import Logger from pytorch_lightning.loggers.wandb import WandbLogger +from torch import Tensor from nemo.collections.tts.parts.utils.helpers import create_plot from nemo.utils import logging @@ -81,14 +82,14 @@ class AudioArtifact: id: str data: np.ndarray sample_rate: int - filename: str + filepath: Path @dataclass class ImageArtifact: id: str data: np.ndarray - filename: str + filepath: Path x_axis: str y_axis: str @@ -187,7 +188,8 @@ def __init__( def _log_audio(self, audio: AudioArtifact, log_dir: Path, step: int): if log_dir: - filepath = log_dir / audio.filename + filepath = log_dir / audio.filepath + filepath.parent.mkdir(parents=True, exist_ok=True) sf.write(file=filepath, data=audio.data, samplerate=audio.sample_rate) if self.tensorboard_logger: @@ -201,7 +203,8 @@ def _log_audio(self, audio: AudioArtifact, log_dir: Path, step: int): def _log_image(self, image: ImageArtifact, log_dir: Path, step: int): if log_dir: - filepath = log_dir / image.filename + filepath = log_dir / image.filepath + filepath.parent.mkdir(parents=True, exist_ok=True) else: filepath = None @@ -247,7 +250,7 @@ def on_fit_start(self, trainer: Trainer, model: LightningModule): logging.debug('List are empty, no initial artifacts to log.') return - log_dir = self.output_dir / f"initial" if self.output_dir else None + log_dir = self.output_dir / "initial" if self.output_dir else None self._log_artifacts(audio_list=audio_list, image_list=image_list, log_dir=log_dir) @@ -287,28 +290,38 @@ class VocoderArtifactGenerator(ArtifactGenerator): def generate_artifacts( self, model: LightningModule, batch_dict: Dict, initial_log: bool = False ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]: - if initial_log: - # Currently, nothing to log before training starts - return [], [] - - audio_artifacts = [] + dataset_names = batch_dict.get("dataset_names") audio_filepaths = batch_dict.get("audio_filepaths") audio_ids = [create_id(p) for p in audio_filepaths] audio = batch_dict.get("audio") audio_len = batch_dict.get("audio_lens") + audio_artifacts = [] + + if initial_log: + # Log ground truth audio + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_gt_path = Path(f"{dataset_name}/{audio_id}_gt.wav") + audio_gt_i = audio[i, : audio_len[i]].cpu().numpy() + audio_artifact = AudioArtifact( + id=f"audio_gt_{audio_id}", data=audio_gt_i, filepath=audio_gt_path, sample_rate=model.sample_rate, + ) + audio_artifacts.append(audio_artifact) + return audio_artifacts, [] + spec, spec_len = model.audio_to_melspec_precessor(audio, audio_len) with torch.no_grad(): audio_pred = model.forward(spec=spec) audio_pred = rearrange(audio_pred, "B 1 T -> B T") - for i, audio_id in enumerate(audio_ids): - audio_pred_i = audio_pred[i][: audio_len[i]].cpu().numpy() + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_pred_path = Path(f"{dataset_name}/{audio_id}.wav") + audio_pred_i = audio_pred[i, : audio_len[i]].cpu().numpy() audio_artifact = AudioArtifact( - id=f"audio_{audio_id}", data=audio_pred_i, filename=f"{audio_id}.wav", sample_rate=model.sample_rate, + id=f"audio_{audio_id}", data=audio_pred_i, filepath=audio_pred_path, sample_rate=model.sample_rate, ) audio_artifacts.append(audio_artifact) @@ -327,19 +340,26 @@ def __init__(self, log_audio: bool = True, log_encoding: bool = False, log_dequa self.log_encoding = log_encoding # Log dequantized encoded representation of the input audio (decoder input) self.log_dequantized = log_dequantized - # Input audio will be logged only once - self.input_audio_logged = False logging.debug('Initialized %s with', self.__class__.__name__) logging.debug('\tlog_audio: %s', self.log_audio) logging.debug('\tlog_encoding: %s', self.log_encoding) logging.debug('\tlog_dequantized: %s', self.log_dequantized) - def _generate_audio(self, model, audio_ids, audio, audio_len, save_input: bool = False): + def _generate_audio( + self, + model: LightningModule, + dataset_names: List[str], + audio_ids: List[str], + audio: Tensor, + audio_len: Tensor, + save_input: bool = False, + ): """Generate audio artifacts. Args: model: callable model, outputs (audio_pred, audio_pred_len) + dataset_names: list of dataset names for the examples in audio batch audio_ids: list of IDs for the examples in audio batch audio: tensor of input audio signals, shape (B, T) audio_len: tensor of lengths for each example in the batch, shape (B,) @@ -354,35 +374,34 @@ def _generate_audio(self, model, audio_ids, audio, audio_len, save_input: bool = audio_artifacts = [] # Log output audio - for i, audio_id in enumerate(audio_ids): + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_pred_path = Path(f"{dataset_name}/{audio_id}_audio_out.wav") audio_pred_i = audio_pred[i, : audio_pred_len[i]].cpu().numpy() audio_artifact = AudioArtifact( - id=f"audio_out_{audio_id}", - data=audio_pred_i, - filename=f"{audio_id}_audio_out.wav", - sample_rate=model.sample_rate, + id=f"audio_out_{audio_id}", data=audio_pred_i, filepath=audio_pred_path, sample_rate=model.sample_rate, ) audio_artifacts.append(audio_artifact) if save_input: # save input audio - for i, audio_id in enumerate(audio_ids): + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_in_path = Path(f"{dataset_name}/{audio_id}_audio_in.wav") audio_in_i = audio[i, : audio_len[i]].cpu().numpy() audio_artifact = AudioArtifact( - id=f"audio_in_{audio_id}", - data=audio_in_i, - filename=f"{audio_id}_audio_in.wav", - sample_rate=model.sample_rate, + id=f"audio_in_{audio_id}", data=audio_in_i, filepath=audio_in_path, sample_rate=model.sample_rate, ) audio_artifacts.append(audio_artifact) return audio_artifacts - def _generate_images(self, model, audio_ids, audio, audio_len): + def _generate_images( + self, model: LightningModule, dataset_names: List[str], audio_ids: List[str], audio: Tensor, audio_len: Tensor + ): """Generate image artifacts. Args: model: model, needs to support `model.encode_audio`, `model.quantize` and `model.dequantize` + dataset_names: list of dataset names for the examples in audio batch audio_ids: list of IDs for the examples in audio batch audio: tensor of input audio signals, shape (B, T) audio_len: tensor of lengths for each example in the batch, shape (B,) @@ -397,12 +416,13 @@ def _generate_images(self, model, audio_ids, audio, audio_len): encoded, encoded_len = model.encode_audio(audio=audio, audio_len=audio_len) if self.log_encoding: - for i, audio_id in enumerate(audio_ids): + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + encoded_path = Path(f"{dataset_name}/{audio_id}_encoded.png") encoded_i = encoded[i, :, : encoded_len[i]].cpu().numpy() encoded_artifact = ImageArtifact( id=f"encoded_{audio_id}", data=encoded_i, - filename=f"{audio_id}_encoded.png", + filepath=encoded_path, x_axis="Audio Frames", y_axis="Channels", ) @@ -416,12 +436,13 @@ def _generate_images(self, model, audio_ids, audio, audio_len): tokens = model.quantize(encoded=encoded, encoded_len=encoded_len) dequantized = model.dequantize(tokens=tokens, tokens_len=encoded_len) - for i, audio_id in enumerate(audio_ids): + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + dequantized_path = Path(f"{dataset_name}/{audio_id}_dequantized.png") dequantized_i = dequantized[i, :, : encoded_len[i]].cpu().numpy() dequantized_artifact = ImageArtifact( id=f"dequantized_{audio_id}", data=dequantized_i, - filename=f"{audio_id}_dequantized.png", + filepath=dequantized_path, x_axis="Audio Frames", y_axis="Channels", ) @@ -439,6 +460,7 @@ def generate_artifacts( initial_log: save input audio for the initial log """ + dataset_names = batch_dict.get("dataset_names") audio_filepaths = batch_dict.get("audio_filepaths") audio_ids = [create_id(p) for p in audio_filepaths] @@ -446,9 +468,16 @@ def generate_artifacts( audio_len = batch_dict.get("audio_lens") audio_artifacts = self._generate_audio( - model=model, audio_ids=audio_ids, audio=audio, audio_len=audio_len, save_input=initial_log + model=model, + dataset_names=dataset_names, + audio_ids=audio_ids, + audio=audio, + audio_len=audio_len, + save_input=initial_log, + ) + image_artifacts = self._generate_images( + model=model, dataset_names=dataset_names, audio_ids=audio_ids, audio=audio, audio_len=audio_len ) - image_artifacts = self._generate_images(model=model, audio_ids=audio_ids, audio=audio, audio_len=audio_len) return audio_artifacts, image_artifacts @@ -486,7 +515,36 @@ def __init__( type=audio_params.vocoder_type, ) - def _generate_audio(self, mels, mels_len, hop_length): + def _create_ground_truth_artifacts( + self, model: LightningModule, dataset_names: List[str], audio_ids: List[str], batch_dict: Dict + ): + audio = batch_dict.get("audio") + audio_lens = batch_dict.get("audio_lens") + spec, spec_len = model.preprocessor(input_signal=audio, length=audio_lens) + + audio_artifacts = [] + image_artifacts = [] + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_gt_path = Path(f"{dataset_name}/{audio_id}_gt.wav") + audio_gt_i = audio[i, : audio_lens[i]].cpu().numpy() + audio_artifact = AudioArtifact( + id=f"audio_gt_{audio_id}", + data=audio_gt_i, + filepath=audio_gt_path, + sample_rate=self.vocoder.sample_rate, + ) + audio_artifacts.append(audio_artifact) + + spec_gt_path = Path(f"{dataset_name}/{audio_id}_spec_gt.png") + spec_gt_i = spec[i, :, : spec_len[i]].cpu().numpy() + spec_artifact = ImageArtifact( + id=f"spec_{audio_id}", data=spec_gt_i, filepath=spec_gt_path, x_axis="Audio Frames", y_axis="Channels", + ) + image_artifacts.append(spec_artifact) + + return audio_artifacts, image_artifacts + + def _generate_audio(self, mels: Tensor, mels_len: Tensor, hop_length: int): voc_input = mels.to(self.vocoder.device) with torch.no_grad(): audio_pred = self.vocoder.convert_spectrogram_to_audio(spec=voc_input) @@ -495,7 +553,9 @@ def _generate_audio(self, mels, mels_len, hop_length): audio_pred_lens = librosa.core.frames_to_samples(mels_len_array, hop_length=hop_length) return audio_pred, audio_pred_lens - def _generate_predictions(self, model: LightningModule, audio_ids: List[str], batch_dict: Dict): + def _generate_predictions( + self, model: LightningModule, dataset_names: List[str], audio_ids: List[str], batch_dict: Dict + ): audio_artifacts = [] image_artifacts = [] @@ -508,14 +568,11 @@ def _generate_predictions(self, model: LightningModule, audio_ids: List[str], ba mels_pred, mels_pred_len, *_ = model.forward(text=text, input_lens=text_lens, speaker=speaker,) if self.log_spectrogram: - for i, audio_id in enumerate(audio_ids): - spec_i = mels_pred[i][:, : mels_pred_len[i]].cpu().numpy() + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + spec_path = Path(f"{dataset_name}/{audio_id}_spec.png") + spec_i = mels_pred[i, :, : mels_pred_len[i]].cpu().numpy() spec_artifact = ImageArtifact( - id=f"spec_{audio_id}", - data=spec_i, - filename=f"{audio_id}_spec.png", - x_axis="Audio Frames", - y_axis="Channels", + id=f"spec_{audio_id}", data=spec_i, filepath=spec_path, x_axis="Audio Frames", y_axis="Channels", ) image_artifacts.append(spec_artifact) @@ -524,19 +581,22 @@ def _generate_predictions(self, model: LightningModule, audio_ids: List[str], ba audio_pred, audio_pred_lens = self._generate_audio( mels=mels_pred, mels_len=mels_pred_len, hop_length=model.preprocessor.hop_length ) - for i, audio_id in enumerate(audio_ids): - audio_pred_i = audio_pred[i][: audio_pred_lens[i]].cpu().numpy() + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_pred_path = Path(f"{dataset_name}/{audio_id}.wav") + audio_pred_i = audio_pred[i, : audio_pred_lens[i]].cpu().numpy() audio_artifact = AudioArtifact( id=f"audio_{audio_id}", data=audio_pred_i, - filename=f"{audio_id}.wav", + filepath=audio_pred_path, sample_rate=self.vocoder.sample_rate, ) audio_artifacts.append(audio_artifact) return audio_artifacts, image_artifacts - def _generate_gta_predictions(self, model: LightningModule, audio_ids: List[str], batch_dict: Dict): + def _generate_gta_predictions( + self, model: LightningModule, dataset_names: List[str], audio_ids: List[str], batch_dict: Dict + ): audio_artifacts = [] image_artifacts = [] @@ -564,12 +624,13 @@ def _generate_gta_predictions(self, model: LightningModule, audio_ids: List[str] if self.log_alignment: attn = rearrange(attn, "B 1 T_spec T_text -> B T_text T_spec") - for i, audio_id in enumerate(audio_ids): - attn_i = attn[i][: text_lens[i], : mels_pred_len[i]].cpu().numpy() + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + attn_path = Path(f"{dataset_name}/{audio_id}_align.png") + attn_i = attn[i, : text_lens[i], : mels_pred_len[i]].cpu().numpy() alignment_artifact = ImageArtifact( id=f"align_{audio_id}", data=attn_i, - filename=f"{audio_id}_align.png", + filepath=attn_path, x_axis="Audio Frames", y_axis="Text Tokens", ) @@ -580,12 +641,13 @@ def _generate_gta_predictions(self, model: LightningModule, audio_ids: List[str] audio_pred, audio_pred_lens = self._generate_audio( mels=mels_pred, mels_len=mels_pred_len, hop_length=model.preprocessor.hop_length ) - for i, audio_id in enumerate(audio_ids): - audio_pred_i = audio_pred[i][: audio_pred_lens[i]].cpu().numpy() + for i, (dataset_name, audio_id) in enumerate(zip(dataset_names, audio_ids)): + audio_pred_path = Path(f"{dataset_name}/{audio_id}_gta.wav") + audio_pred_i = audio_pred[i, : audio_pred_lens[i]].cpu().numpy() audio_artifact = AudioArtifact( id=f"audio_gta_{audio_id}", data=audio_pred_i, - filename=f"{audio_id}_gta.wav", + filepath=audio_pred_path, sample_rate=self.vocoder.sample_rate, ) audio_artifacts.append(audio_artifact) @@ -596,23 +658,30 @@ def generate_artifacts( self, model: LightningModule, batch_dict: Dict, initial_log: bool = False ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]: + dataset_names = batch_dict.get("dataset_names") + audio_filepaths = batch_dict.get("audio_filepaths") + audio_ids = [create_id(p) for p in audio_filepaths] + if initial_log: - # Currently, nothing to log before training starts - return [], [] + # Log ground truth audio and spectrograms + audio_gt, spec_gt = self._create_ground_truth_artifacts( + model=model, dataset_names=dataset_names, audio_ids=audio_ids, batch_dict=batch_dict + ) + return audio_gt, spec_gt audio_artifacts = [] image_artifacts = [] - audio_filepaths = batch_dict.get("audio_filepaths") - audio_ids = [create_id(p) for p in audio_filepaths] if self.log_audio or self.log_spectrogram: - audio_pred, spec_pred = self._generate_predictions(model=model, batch_dict=batch_dict, audio_ids=audio_ids) + audio_pred, spec_pred = self._generate_predictions( + model=model, dataset_names=dataset_names, audio_ids=audio_ids, batch_dict=batch_dict + ) audio_artifacts += audio_pred image_artifacts += spec_pred if self.log_audio_gta or self.log_alignment: audio_gta_pred, alignments = self._generate_gta_predictions( - model=model, batch_dict=batch_dict, audio_ids=audio_ids + model=model, dataset_names=dataset_names, audio_ids=audio_ids, batch_dict=batch_dict ) audio_artifacts += audio_gta_pred image_artifacts += alignments From 2e0133c2a94f35384a8ab7d8420bed17d8150507 Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Wed, 20 Sep 2023 15:10:25 -0400 Subject: [PATCH 240/512] Fix sft dataset truncation (#7464) * Add fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../nlp/data/language_modeling/megatron/gpt_sft_dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 72ba68945391..2c655d5cde6b 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -272,7 +272,10 @@ def _multiple_truncation(self, template_ids: List[List[int]], template_ids_keys: for i, (ids, key) in enumerate(zip(template_ids, template_ids_keys)): if key in self.truncation_fields: truncation_length = truncation_length_list.pop() - assert len(ids) >= truncation_length, f'{key} is not long enough to truncate.' + if len(ids) < truncation_length: + logging.warning(f'{key} is not long enough to truncate.') + truncation_length = len(ids) + if self.truncation_method == 'left': window_offset = truncation_length elif self.truncation_method == 'right': @@ -328,6 +331,7 @@ def _process_example(self, example): if len(input_ids) > self.max_seq_length: logging.warning(f'Input ids length {len(input_ids)} exceed max sequence length {self.max_seq_length}') input_ids = input_ids[: self.max_seq_length] + answer_ids = input_ids[answer_start_idx:] # store metadata in dataset, in case user may have keys required in the prediction json files metadata = {k: v for k, v in example.items() if k not in self.prompt_template_keys} From d25cac2ef60c7d6c55e019ad8611015e782958f8 Mon Sep 17 00:00:00 2001 From: Maxime Burchi <60737204+burchim@users.noreply.github.com> Date: Wed, 20 Sep 2023 14:14:16 -0700 Subject: [PATCH 241/512] Automatic Lip Reading Recognition (ALR) - ASR/CV (Visual ASR) (#7330) * striding_conv1d_k5 and dw_striding_conv1d_k5 subsampling Signed-off-by: mburchi * transpose conv1d inputs Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: mburchi * Update subsampling.py change striding_conv1d_k5 to striding_conv1d Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * video manifest Signed-off-by: mburchi * add collection classes Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test_step_outputs Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * clean references Signed-off-by: mburchi * freeze unfreeze transcribe cv models Signed-off-by: mburchi * correct manifest get_full_path bug Signed-off-by: mburchi * update for PR Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * guard torchvision Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * _video_speech_collate_fn in cv/data/video_to_text.py Signed-off-by: mburchi * add self.out = None to asr subsampling Signed-off-by: mburchi * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv -> multimodal/speech_cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Igor Gitman --- .../asr/parts/submodules/subsampling.py | 152 ++- .../common/parts/preprocessing/collections.py | 144 +++ .../common/parts/preprocessing/manifest.py | 21 +- .../multimodal/speech_cv/__init__.py | 25 + .../multimodal/speech_cv/data/__init__.py | 13 + .../speech_cv/data/video_to_text.py | 870 +++++++++++++++++ .../speech_cv/data/video_to_text_dataset.py | 287 ++++++ .../multimodal/speech_cv/models/__init__.py | 27 + .../speech_cv/models/visual_ctc_bpe_models.py | 314 ++++++ .../speech_cv/models/visual_ctc_models.py | 692 +++++++++++++ .../visual_hybrid_rnnt_ctc_bpe_models.py | 455 +++++++++ .../models/visual_hybrid_rnnt_ctc_models.py | 644 ++++++++++++ .../models/visual_rnnt_bpe_models.py | 321 ++++++ .../speech_cv/models/visual_rnnt_models.py | 920 ++++++++++++++++++ .../multimodal/speech_cv/modules/__init__.py | 20 + .../linear_projection_video_front_end.py | 143 +++ .../modules/resnet_video_front_end.py | 84 ++ .../speech_cv/modules/video_augment.py | 225 +++++ .../speech_cv/modules/video_preprocessing.py | 138 +++ .../multimodal/speech_cv/parts/__init__.py | 13 + .../speech_cv/parts/preprocessing/features.py | 62 ++ .../speech_cv/parts/submodules/__init__.py | 13 + .../speech_cv/parts/submodules/conv2d.py | 72 ++ .../parts/submodules/global_avg_pool2d.py | 28 + .../speech_cv/parts/submodules/permute.py | 28 + .../speech_cv/parts/submodules/resnet.py | 175 ++++ .../parts/submodules/resnet_block.py | 86 ++ .../submodules/resnet_bottleneck_block.py | 107 ++ nemo/core/neural_types/elements.py | 16 + 29 files changed, 6075 insertions(+), 20 deletions(-) create mode 100644 nemo/collections/multimodal/speech_cv/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/data/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/data/video_to_text.py create mode 100644 nemo/collections/multimodal/speech_cv/data/video_to_text_dataset.py create mode 100644 nemo/collections/multimodal/speech_cv/models/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_ctc_bpe_models.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_ctc_models.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_bpe_models.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_models.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_rnnt_bpe_models.py create mode 100644 nemo/collections/multimodal/speech_cv/models/visual_rnnt_models.py create mode 100644 nemo/collections/multimodal/speech_cv/modules/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/modules/linear_projection_video_front_end.py create mode 100644 nemo/collections/multimodal/speech_cv/modules/resnet_video_front_end.py create mode 100644 nemo/collections/multimodal/speech_cv/modules/video_augment.py create mode 100644 nemo/collections/multimodal/speech_cv/modules/video_preprocessing.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/preprocessing/features.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/__init__.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/conv2d.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/global_avg_pool2d.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/permute.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/resnet.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/resnet_block.py create mode 100644 nemo/collections/multimodal/speech_cv/parts/submodules/resnet_bottleneck_block.py diff --git a/nemo/collections/asr/parts/submodules/subsampling.py b/nemo/collections/asr/parts/submodules/subsampling.py index 9e9d6726f5c0..068cd36022b0 100644 --- a/nemo/collections/asr/parts/submodules/subsampling.py +++ b/nemo/collections/asr/parts/submodules/subsampling.py @@ -18,7 +18,7 @@ import torch.nn as nn from torch.nn import LayerNorm -from nemo.collections.asr.parts.submodules.causal_convs import CausalConv2D +from nemo.collections.asr.parts.submodules.causal_convs import CausalConv1D, CausalConv2D from nemo.utils import logging @@ -251,19 +251,129 @@ def __init__( ) layers.append(activation) in_channels = conv_channels + + elif subsampling == 'striding_conv1d': + + in_channels = feat_in + + self._stride = 2 + self._kernel_size = 5 + self._ceil_mode = False + + if self.is_causal: + self._left_padding = self._kernel_size - 1 + self._right_padding = self._stride - 1 + self._max_cache_len = subsampling_factor + 1 + else: + self._left_padding = (self._kernel_size - 1) // 2 + self._right_padding = (self._kernel_size - 1) // 2 + self._max_cache_len = 0 + + for i in range(self._sampling_num): + if self.is_causal: + layers.append( + CausalConv1D( + in_channels=in_channels, + out_channels=feat_out if self._sampling_num == i + 1 else conv_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=None, + ) + ) + else: + layers.append( + torch.nn.Conv1d( + in_channels=in_channels, + out_channels=feat_out if self._sampling_num == i + 1 else conv_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=self._left_padding, + ) + ) + layers.append(activation) + in_channels = conv_channels + + elif subsampling == 'dw_striding_conv1d': + + in_channels = feat_in + + self._stride = 2 + self._kernel_size = 5 + self._ceil_mode = False + + self._left_padding = (self._kernel_size - 1) // 2 + self._right_padding = (self._kernel_size - 1) // 2 + + # Layer 1 + layers.extend( + [ + torch.nn.Conv1d( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=self._left_padding, + groups=in_channels, + ), + torch.nn.Conv1d( + in_channels=in_channels, + out_channels=feat_out if self._sampling_num == 1 else conv_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + ), + ] + ) + in_channels = conv_channels + layers.append(activation) + + for i in range(self._sampling_num - 1): + layers.extend( + [ + torch.nn.Conv1d( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=self._kernel_size, + stride=self._stride, + padding=self._left_padding, + groups=in_channels, + ), + torch.nn.Conv1d( + in_channels=in_channels, + out_channels=feat_out if self._sampling_num == i + 2 else conv_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + ), + ] + ) + layers.append(activation) + in_channels = conv_channels + + else: + raise ValueError(f"Not valid sub-sampling: {subsampling}!") + + if subsampling in ["vggnet", "dw_striding", "striding"]: + + in_length = torch.tensor(feat_in, dtype=torch.float) + out_length = calc_length( + lengths=in_length, + all_paddings=self._left_padding + self._right_padding, + kernel_size=self._kernel_size, + stride=self._stride, + ceil_mode=self._ceil_mode, + repeat_num=self._sampling_num, + ) + self.out = torch.nn.Linear(conv_channels * int(out_length), feat_out) + self.conv2d_subsampling = True + elif subsampling in ["striding_conv1d", "dw_striding_conv1d"]: + self.out = None + self.conv2d_subsampling = False else: raise ValueError(f"Not valid sub-sampling: {subsampling}!") - in_length = torch.tensor(feat_in, dtype=torch.float) - out_length = calc_length( - lengths=in_length, - all_paddings=self._left_padding + self._right_padding, - kernel_size=self._kernel_size, - stride=self._stride, - ceil_mode=self._ceil_mode, - repeat_num=self._sampling_num, - ) - self.out = torch.nn.Linear(conv_channels * int(out_length), feat_out) self.conv = torch.nn.Sequential(*layers) def get_sampling_frames(self): @@ -281,10 +391,16 @@ def forward(self, x, lengths): ceil_mode=self._ceil_mode, repeat_num=self._sampling_num, ) - x = x.unsqueeze(1) + + # Unsqueeze Channel Axis + if self.conv2d_subsampling: + x = x.unsqueeze(1) + # Transpose to Channel First mode + else: + x = x.transpose(1, 2) # split inputs if chunking_factor is set - if self.subsampling_conv_chunking_factor != -1: + if self.subsampling_conv_chunking_factor != -1 and self.conv2d_subsampling: if self.subsampling_conv_chunking_factor == 1: # if subsampling_conv_chunking_factor is 1, we split only if needed # avoiding a bug / feature limiting indexing of tensors to 2**31 @@ -310,8 +426,14 @@ def forward(self, x, lengths): else: x = self.conv(x) - b, c, t, f = x.size() - x = self.out(x.transpose(1, 2).reshape(b, t, -1)) + # Flatten Channel and Frequency Axes + if self.conv2d_subsampling: + b, c, t, f = x.size() + x = self.out(x.transpose(1, 2).reshape(b, t, -1)) + # Transpose to Channel Last mode + else: + x = x.transpose(1, 2) + return x, lengths def reset_parameters(self): diff --git a/nemo/collections/common/parts/preprocessing/collections.py b/nemo/collections/common/parts/preprocessing/collections.py index ed9e53ae6ffe..66def034400f 100644 --- a/nemo/collections/common/parts/preprocessing/collections.py +++ b/nemo/collections/common/parts/preprocessing/collections.py @@ -199,6 +199,114 @@ def __init__( super().__init__(data) +class VideoText(_Collection): + """List of video-transcript text correspondence with preprocessing.""" + + OUTPUT_TYPE = collections.namedtuple( + typename='AudioTextEntity', + field_names='id video_file duration text_tokens offset text_raw speaker orig_sr lang', + ) + + def __init__( + self, + ids: List[int], + video_files: List[str], + durations: List[float], + texts: List[str], + offsets: List[str], + speakers: List[Optional[int]], + orig_sampling_rates: List[Optional[int]], + token_labels: List[Optional[int]], + langs: List[Optional[str]], + parser: parsers.CharParser, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + max_number: Optional[int] = None, + do_sort_by_duration: bool = False, + index_by_file_id: bool = False, + ): + """Instantiates video-text manifest with filters and preprocessing. + + Args: + ids: List of examples positions. + video_files: List of video files. + durations: List of float durations. + texts: List of raw text transcripts. + offsets: List of duration offsets or None. + speakers: List of optional speakers ids. + orig_sampling_rates: List of original sampling rates of audio files. + langs: List of language ids, one for eadh sample, or None. + parser: Instance of `CharParser` to convert string to tokens. + min_duration: Minimum duration to keep entry with (default: None). + max_duration: Maximum duration to keep entry with (default: None). + max_number: Maximum number of samples to collect. + do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id. + index_by_file_id: If True, saves a mapping from filename base (ID) to index in data. + """ + + output_type = self.OUTPUT_TYPE + data, duration_filtered, num_filtered, total_duration = [], 0.0, 0, 0.0 + if index_by_file_id: + self.mapping = {} + + for id_, video_file, duration, offset, text, speaker, orig_sr, token_labels, lang in zip( + ids, video_files, durations, offsets, texts, speakers, orig_sampling_rates, token_labels, langs + ): + # Duration filters. + if min_duration is not None and duration < min_duration: + duration_filtered += duration + num_filtered += 1 + continue + + if max_duration is not None and duration > max_duration: + duration_filtered += duration + num_filtered += 1 + continue + + if token_labels is not None: + text_tokens = token_labels + else: + if text != '': + if hasattr(parser, "is_aggregate") and parser.is_aggregate and isinstance(text, str): + if lang is not None: + text_tokens = parser(text, lang) + else: + raise ValueError("lang required in manifest when using aggregate tokenizers") + else: + text_tokens = parser(text) + else: + text_tokens = [] + + if text_tokens is None: + duration_filtered += duration + num_filtered += 1 + continue + + total_duration += duration + + data.append(output_type(id_, video_file, duration, text_tokens, offset, text, speaker, orig_sr, lang)) + if index_by_file_id: + file_id, _ = os.path.splitext(os.path.basename(video_file)) + if file_id not in self.mapping: + self.mapping[file_id] = [] + self.mapping[file_id].append(len(data) - 1) + + # Max number of entities filter. + if len(data) == max_number: + break + + if do_sort_by_duration: + if index_by_file_id: + logging.warning("Tried to sort dataset by duration, but cannot since index_by_file_id is set.") + else: + data.sort(key=lambda entity: entity.duration) + + logging.info("Dataset loaded with %d files totalling %.2f hours", len(data), total_duration / 3600) + logging.info("%d files were filtered totalling %.2f hours", num_filtered, duration_filtered / 3600) + + super().__init__(data) + + class ASRAudioText(AudioText): """`AudioText` collector from asr structured json files.""" @@ -235,6 +343,42 @@ def __init__(self, manifests_files: Union[str, List[str]], *args, **kwargs): ) +class ASRVideoText(VideoText): + """`VideoText` collector from cv structured json files.""" + + def __init__(self, manifests_files: Union[str, List[str]], *args, **kwargs): + """Parse lists of video files, durations and transcripts texts. + + Args: + manifests_files: Either single string file or list of such - + manifests to yield items from. + *args: Args to pass to `VideoText` constructor. + **kwargs: Kwargs to pass to `VideoText` constructor. + """ + + ids, video_files, durations, texts, offsets, = ( + [], + [], + [], + [], + [], + ) + speakers, orig_srs, token_labels, langs = [], [], [], [] + for item in manifest.item_iter(manifests_files): + ids.append(item['id']) + video_files.append(item['video_file']) + durations.append(item['duration']) + texts.append(item['text']) + offsets.append(item['offset']) + speakers.append(item['speaker']) + orig_srs.append(item['orig_sr']) + token_labels.append(item['token_labels']) + langs.append(item['lang']) + super().__init__( + ids, video_files, durations, texts, offsets, speakers, orig_srs, token_labels, langs, *args, **kwargs + ) + + class SpeechLabel(_Collection): """List of audio-label correspondence with preprocessing.""" diff --git a/nemo/collections/common/parts/preprocessing/manifest.py b/nemo/collections/common/parts/preprocessing/manifest.py index 98194505c589..882895570711 100644 --- a/nemo/collections/common/parts/preprocessing/manifest.py +++ b/nemo/collections/common/parts/preprocessing/manifest.py @@ -91,16 +91,26 @@ def __parse_item(line: str, manifest_file: str) -> Dict[str, Any]: item['audio_file'] = item.pop('audio_filename') elif 'audio_filepath' in item: item['audio_file'] = item.pop('audio_filepath') - elif 'audio_file' not in item: + + # Video File + if 'video_filename' in item: + item['video_file'] = item.pop('video_filename') + elif 'video_filepath' in item: + item['video_file'] = item.pop('video_filepath') + + if 'video_file' not in item and 'audio_file' not in item: raise ValueError( - f"Manifest file {manifest_file} has invalid json line structure: {line} without proper audio file key." + f"Manifest file {manifest_file} has invalid json line structure: {line} without proper audio/video file key." ) - # If the audio path is a relative path and does not exist, + # If the audio/video path is a relative path and does not exist, # try to attach the parent directory of manifest to the audio path. # Revert to the original path if the new path still doesn't exist. # Assume that the audio path is like "wavs/xxxxxx.wav". - item['audio_file'] = get_full_path(audio_file=item['audio_file'], manifest_file=manifest_file) + if 'audio_file' in item: + item['audio_file'] = get_full_path(audio_file=item['audio_file'], manifest_file=manifest_file) + if 'video_file' in item: + item['video_file'] = get_full_path(audio_file=item['video_file'], manifest_file=manifest_file) # Duration. if 'duration' not in item: @@ -144,7 +154,8 @@ def __parse_item(line: str, manifest_file: str) -> Dict[str, Any]: item['feature_file'] = get_full_path(audio_file=item['feature_file'], manifest_file=manifest_file) item = dict( - audio_file=item['audio_file'], + audio_file=item.get('audio_file', None), + video_file=item.get('video_file', None), duration=item['duration'], text=item['text'], rttm_file=item['rttm_file'], diff --git a/nemo/collections/multimodal/speech_cv/__init__.py b/nemo/collections/multimodal/speech_cv/__init__.py new file mode 100644 index 000000000000..e13e4812457b --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.collections.multimodal.speech_cv import data, models, modules +from nemo.package_info import __version__ + +# Set collection version equal to NeMo version. +__version = __version__ + +# Authorship. +__author__ = "NVIDIA Corporation" + +# Set collection name. +__description__ = "Speech Computer Vision collection" diff --git a/nemo/collections/multimodal/speech_cv/data/__init__.py b/nemo/collections/multimodal/speech_cv/data/__init__.py new file mode 100644 index 000000000000..9e3250071955 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/speech_cv/data/video_to_text.py b/nemo/collections/multimodal/speech_cv/data/video_to_text.py new file mode 100644 index 000000000000..d0b903a5895b --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/data/video_to_text.py @@ -0,0 +1,870 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union + +import braceexpand +import torch +import webdataset as wd + +from nemo.collections.asr.data.audio_to_text import cache_datastore_manifests, expand_sharded_filepaths +from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.collections.common import tokenizers +from nemo.collections.common.parts.preprocessing import collections, parsers +from nemo.collections.multimodal.speech_cv.parts.preprocessing.features import VideoFeaturizer +from nemo.core.classes import Dataset, IterableDataset +from nemo.core.neural_types import * +from nemo.utils import logging +from nemo.utils.data_utils import datastore_path_to_webdataset_url, is_datastore_path + + +def _video_speech_collate_fn(batch, pad_id): + """collate batch of video sig, video len, tokens, tokens len + Args: + batch (Optional[FloatTensor], Optional[LongTensor], LongTensor, + LongTensor): A tuple of tuples of signal, signal lengths, + encoded tokens, and encoded tokens length. This collate func + assumes the signals are 4d torch tensors (Time, Height, Width, Channels). + """ + packed_batch = list(zip(*batch)) + + if len(packed_batch) == 5: + _, video_lengths, _, tokens_lengths, sample_ids = packed_batch + elif len(packed_batch) == 4: + sample_ids = None + _, video_lengths, _, tokens_lengths = packed_batch + else: + raise ValueError("Expects 4 or 5 tensors in the batch!") + + # Max Video Len + max_video_len = 0 + has_video = video_lengths[0] is not None + if has_video: + max_video_len = max(video_lengths).item() + + # Max Token Len + max_tokens_len = max(tokens_lengths).item() + + video_signal, tokens = [], [] + for b in batch: + + if len(b) == 5: + video_sig, video_sig_len, tokens_i, tokens_i_len, _ = b + else: + video_sig, video_sig_len, tokens_i, tokens_i_len = b + + # Pad and Append Video + if has_video: + video_sig_len = video_sig_len.item() + if video_sig_len < max_video_len: + pad = (0, 0, 0, 0, 0, 0, 0, max_video_len - video_sig_len) + video_sig = torch.nn.functional.pad(video_sig, pad) + video_signal.append(video_sig) + + # Pad and Append Token + tokens_i_len = tokens_i_len.item() + if tokens_i_len < max_tokens_len: + pad = (0, max_tokens_len - tokens_i_len) + tokens_i = torch.nn.functional.pad(tokens_i, pad, value=pad_id) + tokens.append(tokens_i) + + # Stack Video + if has_video: + video_signal = torch.stack(video_signal) + video_lengths = torch.stack(video_lengths) + else: + video_signal, video_lengths = None, None + + # Stack Text + tokens = torch.stack(tokens) + tokens_lengths = torch.stack(tokens_lengths) + + # Return + if sample_ids is None: + return video_signal, video_lengths, tokens, tokens_lengths + else: + sample_ids = torch.tensor(sample_ids, dtype=torch.int32) + return video_signal, video_lengths, tokens, tokens_lengths, sample_ids + + +class _VideoTextDataset(Dataset): + """ + Dataset that loads tensors via a json file containing paths to video files, transcripts, and durations (in seconds). + Each new line is a different sample. Example below: + {"video_filepath": "/path/to/video.mp4", "text_filepath": "/path/to/video.txt", "duration": 23.147} + ... + {"video_filepath": "/path/to/video.mp4", "text": "the transcription", "offset": 301.75, "duration": 0.82, "utt": + "utterance_id", "ctm_utt": "en_4156", "side": "A"} + Args: + manifest_filepath: Path to manifest json as described above. Can be comma-separated paths. + parser: Str for a language specific preprocessor or a callable. + int_values (bool): If true, load samples as 32-bit integers. Defauts to False. + max_duration: If video exceeds this length, do not include in dataset + min_duration: If video is less than this length, do not include in dataset + max_utts: Limit number of utterances + trim: whether or not to trim silence. Defaults to False + bos_id: Id of beginning of sequence symbol to append if not None + eos_id: Id of end of sequence symbol to append if not None + pad_id: Id of pad symbol. Defaults to 0 + return_sample_id (bool): whether to return the sample_id as a part of each sample + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + return { + 'video_signal': NeuralType(('B', 'C', 'T', 'H', 'W'), VideoSignal()), + 'video_sig_length': NeuralType(tuple('B'), LengthsType()), + 'transcripts': NeuralType(('B', 'T'), LabelsType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), + 'sample_id': NeuralType(tuple('B'), LengthsType(), optional=True), + } + + def __init__( + self, + manifest_filepath: str, + parser: Union[str, Callable], + int_values: bool = False, + max_duration: Optional[int] = None, + min_duration: Optional[int] = None, + max_utts: int = 0, + trim: bool = False, + bos_id: Optional[int] = None, + eos_id: Optional[int] = None, + pad_id: int = 0, + return_sample_id: bool = False, + channel_selector: Optional[ChannelSelectorType] = None, + ): + if type(manifest_filepath) == str: + manifest_filepath = manifest_filepath.split(",") + + # If necessary, cache manifests and audio from object store + cache_datastore_manifests(manifest_filepaths=manifest_filepath, cache_audio=True) + + self.manifest_processor = VSRManifestProcessor( + manifest_filepath=manifest_filepath, + parser=parser, + max_duration=max_duration, + min_duration=min_duration, + max_utts=max_utts, + bos_id=bos_id, + eos_id=eos_id, + pad_id=pad_id, + ) + self.video_featurizer = VideoFeaturizer() + self.trim = trim + self.return_sample_id = return_sample_id + self.channel_selector = channel_selector + + def get_manifest_sample(self, sample_id): + return self.manifest_processor.collection[sample_id] + + def __getitem__(self, index): + + # Select Sample + sample = self.manifest_processor.collection[index] + + # Offset + offset = sample.offset + if offset is None: + offset = 0 + + # Load Video + video_features = self.video_featurizer.process(sample.video_file, offset=offset, duration=sample.duration) + vf, vfl = video_features, torch.tensor(video_features.shape[0]).long() + + # Load Tokens + t, tl = self.manifest_processor.process_text_by_sample(sample=sample) + + if self.return_sample_id: + output = vf, vfl, torch.tensor(t).long(), torch.tensor(tl).long(), index + else: + output = vf, vfl, torch.tensor(t).long(), torch.tensor(tl).long() + + return output + + def __len__(self): + return len(self.manifest_processor.collection) + + def _collate_fn(self, batch): + return _video_speech_collate_fn(batch, pad_id=self.manifest_processor.pad_id) + + +class VSRManifestProcessor: + """ + Class that processes a manifest json file containing paths to video files, transcripts, and durations (in seconds). + Each new line is a different sample. Example below: + {"video_filepath": "/path/to/video.mp4", "text_filepath": "/path/to/video.txt", "duration": 23.147} + ... + {"video_filepath": "/path/to/video.mp4", "text": "the transcription", "offset": 301.75, "duration": 0.82, "utt": + "utterance_id", "ctm_utt": "en_4156", "side": "A"} + Args: + manifest_filepath: Path to manifest json as described above. Can be comma-separated paths. + parser: Str for a language specific preprocessor or a callable. + max_duration: If video exceeds this length, do not include in dataset. + min_duration: If video is less than this length, do not include in dataset. + max_utts: Limit number of utterances. + bos_id: Id of beginning of sequence symbol to append if not None. + eos_id: Id of end of sequence symbol to append if not None. + pad_id: Id of pad symbol. Defaults to 0. + """ + + def __init__( + self, + manifest_filepath: str, + parser: Union[str, Callable], + max_duration: Optional[float] = None, + min_duration: Optional[float] = None, + max_utts: int = 0, + bos_id: Optional[int] = None, + eos_id: Optional[int] = None, + pad_id: int = 0, + index_by_file_id: bool = False, + ): + self.parser = parser + + self.collection = collections.ASRVideoText( + manifests_files=manifest_filepath, + parser=parser, + min_duration=min_duration, + max_duration=max_duration, + max_number=max_utts, + index_by_file_id=index_by_file_id, + ) + + self.eos_id = eos_id + self.bos_id = bos_id + self.pad_id = pad_id + + def process_text_by_id(self, index: int) -> Tuple[List[int], int]: + sample = self.collection[index] + return self.process_text_by_sample(sample) + + def process_text_by_file_id(self, file_id: str) -> Tuple[List[int], int]: + manifest_idx = self.collection.mapping[file_id][0] + sample = self.collection[manifest_idx] + return self.process_text_by_sample(sample) + + def process_text_by_sample(self, sample: collections.ASRAudioText.OUTPUT_TYPE) -> Tuple[List[int], int]: + t, tl = sample.text_tokens, len(sample.text_tokens) + + if self.bos_id is not None: + t = [self.bos_id] + t + tl += 1 + if self.eos_id is not None: + t = t + [self.eos_id] + tl += 1 + + return t, tl + + +class VideoToBPEDataset(_VideoTextDataset): + """ + Dataset that loads tensors via a json file containing paths to video + files, transcripts, and durations (in seconds). Each new line is a + different sample. Example below: + {"video_filepath": "/path/to/video.mp4", "text_filepath": + "/path/to/video.txt", "duration": 23.147} + ... + {"video_filepath": "/path/to/video.mp4", "text": "the + transcription", "offset": 301.75, "duration": 0.82, "utt": + "utterance_id", "ctm_utt": "en_4156", "side": "A"} + + In practice, the dataset and manifest used for character encoding and byte pair encoding + are exactly the same. The only difference lies in how the dataset tokenizes the text in + the manifest. + + Args: + manifest_filepath: Path to manifest json as described above. Can + be comma-separated paths. + tokenizer: A subclass of the Tokenizer wrapper found in the common collection, + nemo.collections.common.tokenizers.TokenizerSpec. ASR Models support a subset of + all available tokenizers. + int_values (bool): If true, load samples as 32-bit integers. Defauts to False. + max_duration: If video exceeds this length, do not include in dataset + min_duration: If video is less than this length, do not include + in dataset + max_utts: Limit number of utterances + trim: Whether to trim silence segments + use_start_end_token: Boolean which dictates whether to add [BOS] and [EOS] + tokens to beginning and ending of speech respectively. + return_sample_id (bool): whether to return the sample_id as a part of each sample + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + return { + 'video_signal': NeuralType(('B', 'C', 'T', 'H', 'W'), VideoSignal()), + 'video_sig_length': NeuralType(tuple('B'), LengthsType()), + 'transcripts': NeuralType(('B', 'T'), LabelsType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), + 'sample_id': NeuralType(tuple('B'), LengthsType(), optional=True), + } + + def __init__( + self, + manifest_filepath: str, + tokenizer: 'nemo.collections.common.tokenizers.TokenizerSpec', + int_values: bool = False, + max_duration: Optional[int] = None, + min_duration: Optional[int] = None, + max_utts: int = 0, + trim: bool = False, + use_start_end_token: bool = True, + return_sample_id: bool = False, + channel_selector: Optional[ChannelSelectorType] = None, + ): + if use_start_end_token and hasattr(tokenizer, "bos_id") and tokenizer.bos_id > 0: + bos_id = tokenizer.bos_id + else: + bos_id = None + + if use_start_end_token and hasattr(tokenizer, "eos_id") and tokenizer.eos_id > 0: + eos_id = tokenizer.eos_id + else: + eos_id = None + + if hasattr(tokenizer, "pad_id") and tokenizer.pad_id > 0: + pad_id = tokenizer.pad_id + else: + pad_id = 0 + + class TokenizerWrapper: + def __init__(self, tokenizer): + if isinstance(tokenizer, tokenizers.aggregate_tokenizer.AggregateTokenizer): + self.is_aggregate = True + else: + self.is_aggregate = False + self._tokenizer = tokenizer + + def __call__(self, *args): + if isinstance(args[0], List) and self.is_aggregate: + t = [] + for span in args[0]: + t.extend(self._tokenizer.text_to_ids(span['str'], span['lang'])) + return t + + t = self._tokenizer.text_to_ids(*args) + return t + + super().__init__( + manifest_filepath=manifest_filepath, + parser=TokenizerWrapper(tokenizer), + int_values=int_values, + max_duration=max_duration, + min_duration=min_duration, + max_utts=max_utts, + bos_id=bos_id, + eos_id=eos_id, + pad_id=pad_id, + trim=trim, + return_sample_id=return_sample_id, + channel_selector=channel_selector, + ) + + +class VideoToCharDataset(_VideoTextDataset): + """ + Dataset that loads tensors via a json file containing paths to video + files, transcripts, and durations (in seconds). Each new line is a + different sample. Example below: + {"video_filepath": "/path/to/video.mp4", "text_filepath": + "/path/to/video.txt", "duration": 23.147} + ... + {"video_filepath": "/path/to/video.mp4", "text": "the + transcription", "offset": 301.75, "duration": 0.82, "utt": + "utterance_id", "ctm_utt": "en_4156", "side": "A"} + + Args: + manifest_filepath: Path to manifest json as described above. Can + be comma-separated paths. + labels: String containing all the possible characters to map to + int_values (bool): If true, load samples as 32-bit integers. Defauts to False. + max_duration: If video exceeds this length, do not include in dataset + min_duration: If video is less than this length, do not include + in dataset + max_utts: Limit number of utterances + blank_index: blank character index, default = -1 + unk_index: unk_character index, default = -1 + normalize: whether to normalize transcript text (default): True + bos_id: Id of beginning of sequence symbol to append if not None + eos_id: Id of end of sequence symbol to append if not None + return_sample_id (bool): whether to return the sample_id as a part of each sample + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. + """ + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + """Returns definitions of module output ports. + """ + return { + 'video_signal': NeuralType(('B', 'C', 'T', 'H', 'W'), VideoSignal()), + 'video_sig_length': NeuralType(tuple('B'), LengthsType()), + 'transcripts': NeuralType(('B', 'T'), LabelsType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), + 'sample_id': NeuralType(tuple('B'), LengthsType(), optional=True), + } + + def __init__( + self, + manifest_filepath: str, + labels: Union[str, List[str]], + int_values: bool = False, + max_duration: Optional[float] = None, + min_duration: Optional[float] = None, + max_utts: int = 0, + blank_index: int = -1, + unk_index: int = -1, + normalize: bool = True, + trim: bool = False, + bos_id: Optional[int] = None, + eos_id: Optional[int] = None, + pad_id: int = 0, + parser: Union[str, Callable] = 'en', + return_sample_id: bool = False, + channel_selector: Optional[ChannelSelectorType] = None, + ): + self.labels = labels + + parser = parsers.make_parser( + labels=labels, name=parser, unk_id=unk_index, blank_id=blank_index, do_normalize=normalize + ) + + super().__init__( + manifest_filepath=manifest_filepath, + parser=parser, + int_values=int_values, + max_duration=max_duration, + min_duration=min_duration, + max_utts=max_utts, + trim=trim, + bos_id=bos_id, + eos_id=eos_id, + pad_id=pad_id, + return_sample_id=return_sample_id, + channel_selector=channel_selector, + ) + + +class _TarredVideoToTextDataset(IterableDataset): + """ + A similar Dataset to the VideoToCharDataset/VideoToBPEDataset, but which loads tarred video files. + + Accepts a single comma-separated JSON manifest file (in the same style as for the VideoToCharDataset/VideoToBPEDataset), + as well as the path(s) to the tarball(s) containing the mp4 files. Each line of the manifest should + contain the information for one video file, including at least the transcript and name of the audio + file within the tarball. + + Valid formats for the audio_tar_filepaths argument include: + (1) a single string that can be brace-expanded, e.g. 'path/to/audio.tar' or 'path/to/audio_{1..100}.tar.gz', or + (2) a list of file paths that will not be brace-expanded, e.g. ['audio_1.tar', 'audio_2.tar', ...]. + + Note: For brace expansion in (1), there may be cases where `{x..y}` syntax cannot be used due to shell interference. + This occurs most commonly inside SLURM scripts. Therefore we provide a few equivalent replacements. + Supported opening braces - { <=> (, [, < and the special tag _OP_. + Supported closing braces - } <=> ), ], > and the special tag _CL_. + For SLURM based tasks, we suggest the use of the special tags for ease of use. + + See the WebDataset documentation for more information about accepted data and input formats. + + If using multiple workers the number of shards should be divisible by world_size to ensure an + even split among workers. If it is not divisible, logging will give a warning but training will proceed. + In addition, if using mutiprocessing, each shard MUST HAVE THE SAME NUMBER OF ENTRIES after filtering + is applied. We currently do not check for this, but your program may hang if the shards are uneven! + + Notice that a few arguments are different from the AudioToCharDataset; for example, shuffle (bool) has been + replaced by shuffle_n (int). + + Additionally, please note that the len() of this DataLayer is assumed to be the length of the manifest + after filtering. An incorrect manifest length may lead to some DataLoader issues down the line. + + Args: + audio_tar_filepaths: Either a list of audio tarball filepaths, or a + string (can be brace-expandable). + manifest_filepath (str): Path to the manifest. + parser (callable): A callable which is used to pre-process the text output. + int_values (bool): If true, load samples as 32-bit integers. Defauts to False. + shuffle_n (int): How many samples to look ahead and load to be shuffled. + See WebDataset documentation for more details. + Defaults to 0. + min_duration (float): Dataset parameter. + All training files which have a duration less than min_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to 0.1. + max_duration (float): Dataset parameter. + All training files which have a duration more than max_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to None. + blank_index (int): Blank character index, defaults to -1. + unk_index (int): Unknown character index, defaults to -1. + normalize (bool): Dataset parameter. + Whether to use automatic text cleaning. + It is highly recommended to manually clean text for best results. + Defaults to True. + trim (bool): Whether to use trim silence from beginning and end + of audio signal using librosa.effects.trim(). + Defaults to False. + bos_id (id): Dataset parameter. + Beginning of string symbol id used for seq2seq models. + Defaults to None. + eos_id (id): Dataset parameter. + End of string symbol id used for seq2seq models. + Defaults to None. + pad_id (id): Token used to pad when collating samples in batches. + If this is None, pads using 0s. + Defaults to None. + shard_strategy (str): Tarred dataset shard distribution strategy chosen as a str value during ddp. + - `scatter`: The default shard strategy applied by WebDataset, where each node gets + a unique set of shards, which are permanently pre-allocated and never changed at runtime. + - `replicate`: Optional shard strategy, where each node gets all of the set of shards + available in the tarred dataset, which are permanently pre-allocated and never changed at runtime. + The benefit of replication is that it allows each node to sample data points from the entire + dataset independently of other nodes, and reduces dependence on value of `shuffle_n`. + + .. warning:: + Replicated strategy allows every node to sample the entire set of available tarfiles, + and therefore more than one node may sample the same tarfile, and even sample the same + data points! As such, there is no assured guarantee that all samples in the dataset will be + sampled at least once during 1 epoch. Scattered strategy, on the other hand, on specific + occasions (when the number of shards is not divisible with ``world_size``), will not sample + the entire dataset. For these reasons it is not advisable to use tarred datasets as validation + or test datasets. + global_rank (int): Worker rank, used for partitioning shards. Defaults to 0. + world_size (int): Total number of processes, used for partitioning shards. Defaults to 0. + return_sample_id (bool): whether to return the sample_id as a part of each sample + """ + + def __init__( + self, + audio_tar_filepaths: Union[str, List[str]], + manifest_filepath: str, + parser: Callable, + int_values: bool = False, + shuffle_n: int = 0, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + trim: bool = False, + bos_id: Optional[int] = None, + eos_id: Optional[int] = None, + pad_id: int = 0, + shard_strategy: str = "scatter", + global_rank: int = 0, + world_size: int = 0, + return_sample_id: bool = False, + ): + # If necessary, cache manifests from object store + cache_datastore_manifests(manifest_filepaths=manifest_filepath) + + self.manifest_processor = VSRManifestProcessor( + manifest_filepath=manifest_filepath, + parser=parser, + max_duration=max_duration, + min_duration=min_duration, + max_utts=0, + bos_id=bos_id, + eos_id=eos_id, + pad_id=pad_id, + index_by_file_id=True, # Must set this so the manifest lines can be indexed by file ID + ) + + self.video_featurizer = VideoFeaturizer() + self.trim = trim + self.eos_id = eos_id + self.bos_id = bos_id + self.pad_id = pad_id + self.return_sample_id = return_sample_id + + audio_tar_filepaths = expand_sharded_filepaths( + audio_tar_filepaths=audio_tar_filepaths, + shard_strategy=shard_strategy, + world_size=world_size, + global_rank=global_rank, + ) + + # Put together WebDataset + self._dataset = wd.WebDataset(urls=audio_tar_filepaths, nodesplitter=None) + + if shuffle_n > 0: + self._dataset = self._dataset.shuffle(shuffle_n) + else: + logging.info("WebDataset will not shuffle files within the tar files.") + + self._dataset = ( + self._dataset.map(wd.autodecode.Decoder([wd.torch_video])) + .rename(video="mp4", key='__key__') + .to_tuple('video', 'key') + .pipe(self._filter) + .pipe(self._loop_offsets) + .map(f=self._build_sample) + ) + + def _filter(self, iterator): + """This function is used to remove samples that have been filtered out by ASRVideoText already. + Otherwise, we would get a KeyError as _build_sample attempts to find the manifest entry for a sample + that was filtered out (e.g. for duration). + Note that if using multi-GPU training, filtering may lead to an imbalance in samples in each shard, + which may make your code hang as one process will finish before the other. + """ + + class TarredAudioFilter: + def __init__(self, collection): + self.iterator = iterator + self.collection = collection + + def __iter__(self): + return self + + def __next__(self): + while True: + try: + video_bytes, audio_filename = next(self.iterator) + except: + print("except") + continue + file_id, _ = os.path.splitext(os.path.basename(audio_filename)) + if file_id in self.collection.mapping: + return video_bytes, audio_filename + + return TarredAudioFilter(self.manifest_processor.collection) + + def _loop_offsets(self, iterator): + """This function is used to iterate through utterances with different offsets for each file. + """ + + class TarredAudioLoopOffsets: + def __init__(self, collection): + self.iterator = iterator + self.collection = collection + self.current_fn = None + self.current_video_bytes = None + self.offset_id = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.current_fn is None: + self.current_video_bytes, self.current_fn = next(self.iterator) + self.offset_id = 0 + else: + offset_list = self.collection.mapping[self.current_fn] + if len(offset_list) == self.offset_id + 1: + self.current_video_bytes, self.current_fn = next(self.iterator) + self.offset_id = 0 + else: + self.offset_id += 1 + + return self.current_video_bytes, self.current_fn, self.offset_id + + return TarredAudioLoopOffsets(self.manifest_processor.collection) + + def _collate_fn(self, batch): + return _video_speech_collate_fn(batch, self.pad_id) + + def _build_sample(self, tup): + """Builds the training sample by combining the data from the WebDataset with the manifest info. + """ + video_tuple, audio_filename, offset_id = tup + + # Grab manifest entry from self.manifest_preprocessor.collection + file_id, _ = os.path.splitext(os.path.basename(audio_filename)) + manifest_idx = self.manifest_processor.collection.mapping[file_id][offset_id] + manifest_entry = self.manifest_processor.collection[manifest_idx] + + offset = manifest_entry.offset + if offset is None: + offset = 0 + + # Load Video + video_features = video_tuple[0] + + # Signal length + vf, vfl = video_features, torch.tensor(video_features.shape[0]).long() + + # Load Tokens + t, tl = manifest_entry.text_tokens, len(manifest_entry.text_tokens) + + self.manifest_processor.process_text_by_sample(sample=manifest_entry) + + if self.bos_id is not None: + t = [self.bos_id] + t + tl += 1 + if self.eos_id is not None: + t = t + [self.eos_id] + tl += 1 + + if self.return_sample_id: + return vf, vfl, torch.tensor(t).long(), torch.tensor(tl).long(), manifest_idx + else: + return vf, vfl, torch.tensor(t).long(), torch.tensor(tl).long() + + def get_manifest_sample(self, sample_id): + return self.manifest_processor.collection[sample_id] + + def __iter__(self): + return self._dataset.__iter__() + + def __len__(self): + return len(self.manifest_processor.collection) + + +class TarredVideoToBPEDataset(_TarredVideoToTextDataset): + """ + A similar Dataset to the VideoToBPEDataset, but which loads tarred audio files. + + Accepts a single comma-separated JSON manifest file (in the same style as for the VideoToBPEDataset), + as well as the path(s) to the tarball(s) containing the wav files. Each line of the manifest should + contain the information for one audio file, including at least the transcript and name of the audio + file within the tarball. + + Valid formats for the audio_tar_filepaths argument include: + (1) a single string that can be brace-expanded, e.g. 'path/to/audio.tar' or 'path/to/audio_{1..100}.tar.gz', or + (2) a list of file paths that will not be brace-expanded, e.g. ['audio_1.tar', 'audio_2.tar', ...]. + + See the WebDataset documentation for more information about accepted data and input formats. + + If using multiple workers the number of shards should be divisible by world_size to ensure an + even split among workers. If it is not divisible, logging will give a warning but training will proceed. + In addition, if using mutiprocessing, each shard MUST HAVE THE SAME NUMBER OF ENTRIES after filtering + is applied. We currently do not check for this, but your program may hang if the shards are uneven! + + Notice that a few arguments are different from the AudioToBPEDataset; for example, shuffle (bool) has been + replaced by shuffle_n (int). + + Additionally, please note that the len() of this DataLayer is assumed to be the length of the manifest + after filtering. An incorrect manifest length may lead to some DataLoader issues down the line. + + Args: + audio_tar_filepaths: Either a list of audio tarball filepaths, or a + string (can be brace-expandable). + manifest_filepath (str): Path to the manifest. + tokenizer (TokenizerSpec): Either a Word Piece Encoding tokenizer (BERT), + or a Sentence Piece Encoding tokenizer (BPE). The CTC blank + symbol is automatically added later for models using ctc. + int_values (bool): If true, load samples as 32-bit integers. Defauts to False. + shuffle_n (int): How many samples to look ahead and load to be shuffled. + See WebDataset documentation for more details. + Defaults to 0. + min_duration (float): Dataset parameter. + All training files which have a duration less than min_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to 0.1. + max_duration (float): Dataset parameter. + All training files which have a duration more than max_duration + are dropped. Note: Duration is read from the manifest JSON. + Defaults to None. + trim (bool): Whether to use trim silence from beginning and end + of audio signal using librosa.effects.trim(). + Defaults to False. + use_start_end_token: Boolean which dictates whether to add [BOS] and [EOS] + tokens to beginning and ending of speech respectively. + pad_id (id): Token used to pad when collating samples in batches. + If this is None, pads using 0s. + Defaults to None. + shard_strategy (str): Tarred dataset shard distribution strategy chosen as a str value during ddp. + + - `scatter`: The default shard strategy applied by WebDataset, where each node gets + a unique set of shards, which are permanently pre-allocated and never changed at runtime. + - `replicate`: Optional shard strategy, where each node gets all of the set of shards + available in the tarred dataset, which are permanently pre-allocated and never changed at runtime. + The benefit of replication is that it allows each node to sample data points from the entire + dataset independently of other nodes, and reduces dependence on value of `shuffle_n`. + + .. warning:: + + Replicated strategy allows every node to sample the entire set of available tarfiles, + and therefore more than one node may sample the same tarfile, and even sample the same + data points! As such, there is no assured guarantee that all samples in the dataset will be + sampled at least once during 1 epoch. Scattered strategy, on the other hand, on specific + occasions (when the number of shards is not divisible with ``world_size``), will not sample + the entire dataset. For these reasons it is not advisable to use tarred datasets as validation + or test datasets. + + global_rank (int): Worker rank, used for partitioning shards. Defaults to 0. + world_size (int): Total number of processes, used for partitioning shards. Defaults to 0. + return_sample_id (bool): whether to return the sample_id as a part of each sample + """ + + def __init__( + self, + audio_tar_filepaths: Union[str, List[str]], + manifest_filepath: str, + tokenizer: 'nemo.collections.common.tokenizers.TokenizerSpec', + int_values: bool = False, + shuffle_n: int = 0, + min_duration: Optional[float] = None, + max_duration: Optional[float] = None, + trim: bool = False, + use_start_end_token: bool = True, + shard_strategy: str = "scatter", + global_rank: int = 0, + world_size: int = 0, + return_sample_id: bool = False, + ): + if use_start_end_token and hasattr(tokenizer, "bos_id") and tokenizer.bos_id > 0: + bos_id = tokenizer.bos_id + else: + bos_id = None + + if use_start_end_token and hasattr(tokenizer, "eos_id") and tokenizer.eos_id > 0: + eos_id = tokenizer.eos_id + else: + eos_id = None + + if hasattr(tokenizer, "pad_id") and tokenizer.pad_id > 0: + pad_id = tokenizer.pad_id + else: + pad_id = 0 + + class TokenizerWrapper: + def __init__(self, tokenizer): + if isinstance(tokenizer, tokenizers.aggregate_tokenizer.AggregateTokenizer): + self.is_aggregate = True + else: + self.is_aggregate = False + self._tokenizer = tokenizer + + def __call__(self, *args): + if isinstance(args[0], Iterable) and self.is_aggregate: + t = [] + for span in args[0]: + t.extend(self._tokenizer.text_to_ids(span['str'], span['lang'])) + return t + + t = self._tokenizer.text_to_ids(*args) + return t + + super().__init__( + audio_tar_filepaths=audio_tar_filepaths, + manifest_filepath=manifest_filepath, + parser=TokenizerWrapper(tokenizer), + int_values=int_values, + shuffle_n=shuffle_n, + min_duration=min_duration, + max_duration=max_duration, + trim=trim, + bos_id=bos_id, + eos_id=eos_id, + pad_id=pad_id, + shard_strategy=shard_strategy, + global_rank=global_rank, + world_size=world_size, + return_sample_id=return_sample_id, + ) diff --git a/nemo/collections/multimodal/speech_cv/data/video_to_text_dataset.py b/nemo/collections/multimodal/speech_cv/data/video_to_text_dataset.py new file mode 100644 index 000000000000..cf34cc14974e --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/data/video_to_text_dataset.py @@ -0,0 +1,287 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from math import isclose +from typing import Optional + +import torch +from omegaconf import DictConfig +from omegaconf.listconfig import ListConfig +from torch.utils.data import ChainDataset + +from nemo.collections.asr.data.audio_to_text_dataset import convert_to_config_list, get_chain_dataset +from nemo.collections.multimodal.speech_cv.data import video_to_text +from nemo.utils import logging + + +def get_video_to_text_bpe_dataset_from_config( + config, + local_rank: int, + global_rank: int, + world_size: int, + tokenizer, + preprocessor_cfg: Optional[DictConfig] = None, +): + """ + Construct Video-To-Text BPE dataset from a config. + Args: + config: BPE dataset config + local_rank: model local rank + global_rank: model global rand + world_size: world size + tokenizer: BPE tokenizer + preprocessor_cfg: preprocessor config, for DALI BPE dataset + + Returns: + constructed dataset or None if dataset config is invalid or nothing to load + """ + + is_concat = config.get('is_concat', False) + if is_concat: + if 'concat_sampling' in config and config['concat_sampling'] is None: + logging.warning(f"Concat dataset requires `concat_sampling` but it was not provided. Config: {config}") + return None + + if not 'concat_probabilities' in config: + logging.warning( + f"Concat dataset requires `concat_probabilities` list but it was not provided. Config: {config}" + ) + return None + else: + if not isclose(sum(config['concat_probabilities']), 1, abs_tol=1e-6): + logging.warning(f"`concat_probabilities` need to sum to 1. Config: {config}") + return None + + shuffle = config['shuffle'] + + # Instantiate tarred dataset loader or normal dataset loader + if config.get('is_tarred', False): + if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( + 'manifest_filepath' in config and config['manifest_filepath'] is None + ): + logging.warning( + "Could not load dataset as `manifest_filepath` was None or " + f"`tarred_audio_filepaths` is None. Provided config : {config}" + ) + return None + + shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 + if is_concat: + raise NotImplementedError("get_concat_tarred_dataset method not implemented") + else: + dataset = get_tarred_dataset( + config=config, tokenizer=tokenizer, shuffle_n=shuffle_n, global_rank=global_rank, world_size=world_size + ) + else: + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}") + return None + if is_concat: + raise NotImplementedError("get_concat_bpe_dataset method not implemented") + else: + dataset = get_bpe_dataset(config=config, tokenizer=tokenizer) + return dataset + + +def get_video_to_text_char_dataset_from_config( + config, local_rank: int, global_rank: int, world_size: int, preprocessor_cfg: Optional[DictConfig] = None +): + """ + Construct Video-To-Text Char dataset from a config. + Args: + config: dataset config + local_rank: model local rank + global_rank: model global rand + world_size: world size + preprocessor_cfg: preprocessor config, for DALI dataset + + Returns: + constructed dataset or None if dataset config is invalid or nothing to load + """ + + is_concat = config.get('is_concat', False) + if is_concat: + if 'concat_sampling' in config and config['concat_sampling'] is None: + logging.warning(f"Concat dataset requires `concat_sampling` but it was not provided. Config: {config}") + return None + + if not 'concat_probabilities' in config: + logging.warning( + f"Concat dataset requires `concat_probabilities` list but it was not provided. Config: {config}" + ) + return None + else: + if not isclose(sum(config['concat_probabilities']), 1, abs_tol=1e-6): + logging.warning(f"`concat_probabilities` need to sum to 1. Config: {config}") + return None + + shuffle = config['shuffle'] + + # Instantiate tarred dataset loader or normal dataset loader + if config.get('is_tarred', False): + if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( + 'manifest_filepath' in config and config['manifest_filepath'] is None + ): + logging.warning( + "Could not load dataset as `manifest_filepath` was None or " + f"`tarred_audio_filepaths` is None. Provided config : {config}" + ) + return None + + shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 + if is_concat: + raise Exception("get_concat_tarred_dataset method not implemented") + else: + dataset = get_tarred_dataset( + config=config, shuffle_n=shuffle_n, global_rank=global_rank, world_size=world_size, + ) + else: + if 'manifest_filepath' in config and config['manifest_filepath'] is None: + logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}") + return None + if is_concat: + raise Exception("get_concat_char_dataset method not implemented") + else: + dataset = get_char_dataset(config=config) + return dataset + + +def get_bpe_dataset(config: dict, tokenizer: 'TokenizerSpec') -> video_to_text.VideoToBPEDataset: + """ + Instantiates a Byte Pair Encoding / Word Piece Encoding based VideoToBPEDataset. + + Args: + config: Config of the VideoToBPEDataset. + tokenizer: An instance of a TokenizerSpec object. + + Returns: + An instance of VideoToBPEDataset. + """ + dataset = video_to_text.VideoToBPEDataset( + manifest_filepath=config['manifest_filepath'], + tokenizer=tokenizer, + int_values=config.get('int_values', False), + max_duration=config.get('max_duration', None), + min_duration=config.get('min_duration', None), + max_utts=config.get('max_utts', 0), + trim=config.get('trim_silence', False), + use_start_end_token=config.get('use_start_end_token', True), + return_sample_id=config.get('return_sample_id', False), + channel_selector=config.get('channel_selector', None), + ) + return dataset + + +def get_char_dataset(config: dict) -> video_to_text.VideoToCharDataset: + """ + Instantiates a Character Encoding based VideoToCharDataset. + + Args: + config: Config of the VideoToCharDataset. + + Returns: + An instance of VideoToCharDataset. + """ + if 'labels' not in config: + logging.warning(f"dataset does not have explicitly defined labels") + + dataset = video_to_text.VideoToCharDataset( + manifest_filepath=config['manifest_filepath'], + labels=config.get('labels', None), + int_values=config.get('int_values', False), + max_duration=config.get('max_duration', None), + min_duration=config.get('min_duration', None), + max_utts=config.get('max_utts', 0), + blank_index=config.get('blank_index', -1), + unk_index=config.get('unk_index', -1), + normalize=config.get('normalize_transcripts', False), + trim=config.get('trim_silence', False), + parser=config.get('parser', 'en'), + return_sample_id=config.get('return_sample_id', False), + channel_selector=config.get('channel_selector', None), + ) + return dataset + + +def get_tarred_dataset( + config: dict, shuffle_n: int, global_rank: int, world_size: int, tokenizer: Optional['TokenizerSpec'] = None, +) -> video_to_text.TarredVideoToBPEDataset: + """ + Instantiates a Word Piece/BPE Encoding based TarredVideoToBPEDataset or a char based TarredVideoToCharDataset. + + Args: + config: Config of the TarredVideoToBPEDataset or TarredVideoToCharDataset. + shuffle_n: How many samples to look ahead and load to be shuffled. + See WebDataset documentation for more details. + tokenizer: An instance of a TokenizerSpec object if BPE dataset is needed. + global_rank: Global rank of this device. + world_size: Global world size in the training method. + Passsing None would return a char-based dataset. + + Returns: + An instance of TarredVideoToBPEDataset or TarredVideoToCharDataset. + """ + tarred_audio_filepaths = config['tarred_audio_filepaths'] + manifest_filepaths = config['manifest_filepath'] + datasets = [] + tarred_audio_filepaths = convert_to_config_list(tarred_audio_filepaths) + manifest_filepaths = convert_to_config_list(manifest_filepaths) + + bucketing_weights = config.get('bucketing_weights', None) # For upsampling buckets + if bucketing_weights: + for idx, weight in enumerate(bucketing_weights): + if not isinstance(weight, int) or weight <= 0: + raise ValueError(f"bucket weights must be positive integers") + + if len(manifest_filepaths) != len(tarred_audio_filepaths): + raise ValueError( + f"manifest_filepaths (length={len(manifest_filepaths)}) and tarred_audio_filepaths (length={len(tarred_audio_filepaths)}) need to have the same number of buckets." + ) + + if 'labels' not in config: + logging.warning(f"dataset does not have explicitly defined labels") + + if 'max_utts' in config: + raise ValueError('"max_utts" parameter is not supported for tarred datasets') + + for dataset_idx, (tarred_audio_filepath, manifest_filepath) in enumerate( + zip(tarred_audio_filepaths, manifest_filepaths) + ): + if len(tarred_audio_filepath) == 1: + tarred_audio_filepath = tarred_audio_filepath[0] + if tokenizer is None: + raise Exception("video_to_text.TarredVideoToCharDataset class not Implemented") + else: + dataset = video_to_text.TarredVideoToBPEDataset( + audio_tar_filepaths=tarred_audio_filepath, + manifest_filepath=manifest_filepath, + tokenizer=tokenizer, + int_values=config.get('int_values', False), + shuffle_n=shuffle_n, + max_duration=config.get('max_duration', None), + min_duration=config.get('min_duration', None), + trim=config.get('trim_silence', False), + use_start_end_token=config.get('use_start_end_token', True), + shard_strategy=config.get('tarred_shard_strategy', 'scatter'), + global_rank=global_rank, + world_size=world_size, + return_sample_id=config.get('return_sample_id', False), + ) + if bucketing_weights: + [datasets.append(dataset) for _ in range(bucketing_weights[dataset_idx])] + else: + datasets.append(dataset) + + return get_chain_dataset(datasets=datasets, ds_config=config) diff --git a/nemo/collections/multimodal/speech_cv/models/__init__.py b/nemo/collections/multimodal/speech_cv/models/__init__.py new file mode 100644 index 000000000000..c34b4c174ac1 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/__init__.py @@ -0,0 +1,27 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# CTC +from nemo.collections.multimodal.speech_cv.models.visual_ctc_bpe_models import VisualEncDecCTCModelBPE +from nemo.collections.multimodal.speech_cv.models.visual_ctc_models import VisualEncDecCTCModel +from nemo.collections.multimodal.speech_cv.models.visual_hybrid_rnnt_ctc_bpe_models import ( + VisualEncDecHybridRNNTCTCBPEModel, +) + +# Hybrid CTC/RNN-T +from nemo.collections.multimodal.speech_cv.models.visual_hybrid_rnnt_ctc_models import VisualEncDecHybridRNNTCTCModel +from nemo.collections.multimodal.speech_cv.models.visual_rnnt_bpe_models import VisualEncDecRNNTBPEModel + +# RNN-T +from nemo.collections.multimodal.speech_cv.models.visual_rnnt_models import VisualEncDecRNNTModel diff --git a/nemo/collections/multimodal/speech_cv/models/visual_ctc_bpe_models.py b/nemo/collections/multimodal/speech_cv/models/visual_ctc_bpe_models.py new file mode 100644 index 000000000000..529acb4cc86f --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_ctc_bpe_models.py @@ -0,0 +1,314 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +from typing import Dict, List, Optional, Union + +import torch +from omegaconf import DictConfig, ListConfig, OmegaConf, open_dict + +from nemo.collections.asr.losses.ctc import CTCLoss +from nemo.collections.asr.metrics.wer_bpe import WERBPE, CTCBPEDecoding, CTCBPEDecodingConfig +from nemo.collections.asr.parts.mixins import ASRBPEMixin +from nemo.collections.multimodal.speech_cv.data import video_to_text_dataset +from nemo.collections.multimodal.speech_cv.models.visual_ctc_models import VisualEncDecCTCModel +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging, model_utils + +__all__ = ['VisualEncDecCTCModelBPE'] + + +class VisualEncDecCTCModelBPE(VisualEncDecCTCModel, ASRBPEMixin): + """Encoder decoder CTC-based models with Byte Pair Encoding.""" + + def __init__(self, cfg: DictConfig, trainer=None): + # Convert to Hydra 1.0 compatible DictConfig + cfg = model_utils.convert_model_config_to_dict_config(cfg) + cfg = model_utils.maybe_update_config_version(cfg) + + if 'tokenizer' not in cfg: + raise ValueError("`cfg` must have `tokenizer` config to create a tokenizer !") + + # Setup the tokenizer + self._setup_tokenizer(cfg.tokenizer) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + # Set the new vocabulary + with open_dict(cfg): + # sidestepping the potential overlapping tokens issue in aggregate tokenizers + if self.tokenizer_type == "agg": + cfg.decoder.vocabulary = ListConfig(vocabulary) + else: + cfg.decoder.vocabulary = ListConfig(list(vocabulary.keys())) + + # Override number of classes if placeholder provided + num_classes = cfg.decoder["num_classes"] + + if num_classes < 1: + logging.info( + "\nReplacing placeholder number of classes ({}) with actual number of classes - {}".format( + num_classes, len(vocabulary) + ) + ) + cfg.decoder["num_classes"] = len(vocabulary) + + super().__init__(cfg=cfg, trainer=trainer) + + # Setup decoding objects + decoding_cfg = self.cfg.get('decoding', None) + + # In case decoding config not found, use default config + if decoding_cfg is None: + decoding_cfg = OmegaConf.structured(CTCBPEDecodingConfig) + with open_dict(self.cfg): + self.cfg.decoding = decoding_cfg + + self.decoding = CTCBPEDecoding(self.cfg.decoding, tokenizer=self.tokenizer) + + # Setup metric with decoding strategy + self._wer = WERBPE( + decoding=self.decoding, + use_cer=self._cfg.get('use_cer', False), + dist_sync_on_step=True, + log_prediction=self._cfg.get("log_prediction", False), + ) + + def _setup_dataloader_from_config(self, config: Optional[Dict]): + dataset = video_to_text_dataset.get_video_to_text_bpe_dataset_from_config( + config=config, + local_rank=self.local_rank, + global_rank=self.global_rank, + world_size=self.world_size, + tokenizer=self.tokenizer, + preprocessor_cfg=self.cfg.get("preprocessor", None), + ) + + if dataset is None: + return None + + shuffle = config['shuffle'] + if config.get('is_tarred', False): + shuffle = False + + if hasattr(dataset, 'collate_fn'): + collate_fn = dataset.collate_fn + else: + collate_fn = dataset.datasets[0].collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config['batch_size'], + collate_fn=collate_fn, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + prefetch_factor=config.get('prefetch_factor', 2), + ) + + def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader': + """ + Setup function for a temporary data loader which wraps the provided video file. + + Args: + config: A python dictionary which contains the following keys: + paths2video_files: (a list) of paths to video files. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + temp_dir: (str) A temporary directory where the video manifest is temporarily + stored. + num_workers: (int) number of workers. Depends of the batch_size and machine. \ + 0 - only the main process will load batches, 1 - one worker (not main process) + + Returns: + A pytorch DataLoader for the given video file(s). + """ + + if 'manifest_filepath' in config: + manifest_filepath = config['manifest_filepath'] + batch_size = config['batch_size'] + else: + manifest_filepath = os.path.join(config['temp_dir'], 'manifest.json') + batch_size = min(config['batch_size'], len(config['paths2video_files'])) + + dl_config = { + 'manifest_filepath': manifest_filepath, + 'batch_size': batch_size, + 'shuffle': False, + 'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)), + 'pin_memory': True, + 'channel_selector': config.get('channel_selector', None), + 'use_start_end_token': self.cfg.validation_ds.get('use_start_end_token', False), + } + + if config.get("augmentor"): + dl_config['augmentor'] = config.get("augmentor") + + temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config)) + return temporary_datalayer + + def change_vocabulary( + self, + new_tokenizer_dir: Union[str, DictConfig], + new_tokenizer_type: str, + decoding_cfg: Optional[DictConfig] = None, + ): + """ + Changes vocabulary of the tokenizer used during CTC decoding process. + Use this method when fine-tuning on from pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on a data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + Args: + new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer (if the tokenizer type is `agg`) + new_tokenizer_type: Either `agg`, `bpe` or `wpe`. `bpe` is used for SentencePiece tokenizers, + whereas `wpe` is used for `BertTokenizer`. + new_tokenizer_cfg: A config for the new tokenizer. if provided, pre-empts the dir and type + + Returns: None + + """ + if isinstance(new_tokenizer_dir, DictConfig): + if new_tokenizer_type == 'agg': + new_tokenizer_cfg = new_tokenizer_dir + else: + raise ValueError( + f'New tokenizer dir should be a string unless the tokenizer is `agg`, but this tokenizer type is: {new_tokenizer_type}' + ) + else: + new_tokenizer_cfg = None + + if new_tokenizer_cfg is not None: + tokenizer_cfg = new_tokenizer_cfg + else: + if not os.path.isdir(new_tokenizer_dir): + raise NotADirectoryError( + f'New tokenizer dir must be non-empty path to a directory. But I got: {new_tokenizer_dir}' + f"New tokenizer dir must be non-empty path to a directory. But I got: {new_tokenizer_dir}" + ) + + if new_tokenizer_type.lower() not in ('bpe', 'wpe'): + raise ValueError(f'New tokenizer type must be either `bpe` or `wpe`') + + tokenizer_cfg = OmegaConf.create({'dir': new_tokenizer_dir, 'type': new_tokenizer_type}) + + # Setup the tokenizer + self._setup_tokenizer(tokenizer_cfg) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + # Set the new vocabulary + decoder_config = copy.deepcopy(self.decoder.to_config_dict()) + # sidestepping the potential overlapping tokens issue in aggregate tokenizers + if self.tokenizer_type == "agg": + decoder_config.vocabulary = ListConfig(vocabulary) + else: + decoder_config.vocabulary = ListConfig(list(vocabulary.keys())) + + decoder_num_classes = decoder_config['num_classes'] + + # Override number of classes if placeholder provided + logging.info( + "\nReplacing old number of classes ({}) with new number of classes - {}".format( + decoder_num_classes, len(vocabulary) + ) + ) + + decoder_config['num_classes'] = len(vocabulary) + + del self.decoder + self.decoder = VisualEncDecCTCModelBPE.from_config_dict(decoder_config) + del self.loss + self.loss = CTCLoss( + num_classes=self.decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self._cfg.get("ctc_reduction", "mean_batch"), + ) + + if decoding_cfg is None: + # Assume same decoding config as before + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = CTCBPEDecoding(decoding_cfg=decoding_cfg, tokenizer=self.tokenizer) + + self._wer = WERBPE( + decoding=self.decoding, + use_cer=self._cfg.get('use_cer', False), + log_prediction=self._cfg.get("log_prediction", False), + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.decoder): + self._cfg.decoder = decoder_config + + with open_dict(self.cfg.decoding): + self._cfg.decoding = decoding_cfg + + logging.info(f"Changed tokenizer to {self.decoder.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig): + """ + Changes decoding strategy used during CTC decoding process. + + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + """ + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = CTCBPEDecoding(decoding_cfg=decoding_cfg, tokenizer=self.tokenizer,) + + self._wer = WERBPE( + decoding=self.decoding, + use_cer=self._wer.use_cer, + log_prediction=self._wer.log_prediction, + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.decoding)}") + + @classmethod + def list_available_models(cls) -> List[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + + return results diff --git a/nemo/collections/multimodal/speech_cv/models/visual_ctc_models.py b/nemo/collections/multimodal/speech_cv/models/visual_ctc_models.py new file mode 100644 index 000000000000..a2eeba03ee8f --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_ctc_models.py @@ -0,0 +1,692 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json +import os +import tempfile +from math import ceil +from typing import Dict, List, Optional, Union + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from tqdm.auto import tqdm + +from nemo.collections.asr.data import audio_to_text_dataset +from nemo.collections.asr.losses.ctc import CTCLoss +from nemo.collections.asr.metrics.wer import WER, CTCDecoding, CTCDecodingConfig +from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel +from nemo.collections.asr.parts.mixins import ASRModuleMixin, InterCTCMixin +from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.collections.multimodal.speech_cv.data import video_to_text_dataset +from nemo.core.classes.common import PretrainedModelInfo, typecheck +from nemo.core.classes.mixins import AccessMixin +from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, NeuralType, VideoSignal +from nemo.utils import logging + +__all__ = ['VisualEncDecCTCModel'] + + +class VisualEncDecCTCModel(ASRModel, ExportableEncDecModel, ASRModuleMixin, InterCTCMixin): + """Base class for encoder decoder CTC-based models.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + + # Get global rank and total number of GPU workers for IterableDataset partitioning, if applicable + # Global_rank and local_rank is set by LightningModule in Lightning 1.2.0 + self.world_size = 1 + if trainer is not None: + self.world_size = trainer.world_size + + # Init + super().__init__(cfg=cfg, trainer=trainer) + + # Preprocessor, video transforms + self.video_preprocessor = VisualEncDecCTCModel.from_config_dict(self._cfg.video_preprocessor) + + # Augmentation, video augmentations + self.video_augmentation = VisualEncDecCTCModel.from_config_dict(self._cfg.video_augment) + + # Front-end Network, learned module that transform videos to temporal sequence + self.video_front_end = VisualEncDecCTCModel.from_config_dict(self._cfg.video_front_end) + + # Encoder Network + self.encoder = VisualEncDecCTCModel.from_config_dict(self._cfg.encoder) + + with open_dict(self._cfg): + if "feat_in" not in self._cfg.decoder or ( + not self._cfg.decoder.feat_in and hasattr(self.encoder, '_feat_out') + ): + self._cfg.decoder.feat_in = self.encoder._feat_out + if "feat_in" not in self._cfg.decoder or not self._cfg.decoder.feat_in: + raise ValueError("param feat_in of the decoder's config is not set!") + + if self.cfg.decoder.num_classes < 1 and self.cfg.decoder.vocabulary is not None: + logging.info( + "\nReplacing placeholder number of classes ({}) with actual number of classes - {}".format( + self.cfg.decoder.num_classes, len(self.cfg.decoder.vocabulary) + ) + ) + cfg.decoder["num_classes"] = len(self.cfg.decoder.vocabulary) + + # Decoder + self.decoder = VisualEncDecCTCModel.from_config_dict(self._cfg.decoder) + + # CTC Loss + self.loss = CTCLoss( + num_classes=self.decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self._cfg.get("ctc_reduction", "mean_batch"), + ) + + # Setup decoding objects + decoding_cfg = self.cfg.get('decoding', None) + + # In case decoding config not found, use default config + if decoding_cfg is None: + decoding_cfg = OmegaConf.structured(CTCDecodingConfig) + with open_dict(self.cfg): + self.cfg.decoding = decoding_cfg + + # Decoding + self.decoding = CTCDecoding(self.cfg.decoding, vocabulary=self.decoder.vocabulary) + + # Setup metric with decoding strategy + self._wer = WER( + decoding=self.decoding, + use_cer=self._cfg.get('use_cer', False), + dist_sync_on_step=True, + log_prediction=self._cfg.get("log_prediction", False), + ) + + # Setup optional Optimization flags + self.setup_optimization_flags() + + # setting up interCTC loss (from InterCTCMixin) + self.setup_interctc(decoder_name='decoder', loss_name='loss', wer_name='_wer') + + # Adapter modules setup (from ASRAdapterModelMixin) + self.setup_adapters() + + @torch.no_grad() + def transcribe( + self, + paths2video_files: List[str], + batch_size: int = 4, + logprobs: bool = False, + return_hypotheses: bool = False, + num_workers: int = 0, + channel_selector: Optional[ChannelSelectorType] = None, + augmentor: DictConfig = None, + ) -> List[str]: + """ + If modify this function, please remember update transcribe_partial_audio() in + nemo/collections/asr/parts/utils/trancribe_utils.py + + Uses greedy decoding to transcribe video files. Use this method for debugging and prototyping. + + Args: + paths2video_files: (a list) of paths to video files. + batch_size: (int) batch size to use during inference. + Bigger will result in better throughput performance but would use more memory. + logprobs: (bool) pass True to get log probabilities instead of transcripts. + return_hypotheses: (bool) Either return hypotheses or text + With hypotheses can do some postprocessing like getting timestamp or rescoring + num_workers: (int) number of workers for DataLoader + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. + augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied. + Returns: + A list of transcriptions (or raw log probabilities if logprobs is True) in the same order as paths2video_files + """ + if paths2video_files is None or len(paths2video_files) == 0: + return {} + + if return_hypotheses and logprobs: + raise ValueError( + "Either `return_hypotheses` or `logprobs` can be True at any given time." + "Returned hypotheses will contain the logprobs." + ) + + if num_workers is None: + num_workers = min(batch_size, os.cpu_count() - 1) + + # We will store transcriptions here + hypotheses = [] + all_hypotheses = [] + + # Model's mode and device + mode = self.training + device = next(self.parameters()).device + + try: + # Switch model to evaluation mode + self.eval() + # Freeze the visual front-end, encoder and decoder modules + self.video_front_end.freeze() + self.encoder.freeze() + self.decoder.freeze() + logging_level = logging.get_verbosity() + logging.set_verbosity(logging.WARNING) + # Work in tmp directory - will store manifest file there + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, 'manifest.json'), 'w', encoding='utf-8') as fp: + for video_file in paths2video_files: + entry = {'video_filepath': video_file, 'duration': 100000, 'text': ''} + fp.write(json.dumps(entry) + '\n') + + config = { + 'paths2video_files': paths2video_files, + 'batch_size': batch_size, + 'temp_dir': tmpdir, + 'num_workers': num_workers, + 'channel_selector': channel_selector, + } + + if augmentor: + config['augmentor'] = augmentor + + temporary_datalayer = self._setup_transcribe_dataloader(config) + for test_batch in tqdm(temporary_datalayer, desc="Transcribing"): + logits, logits_len, greedy_predictions = self.forward( + input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) + ) + if logprobs: + # dump log probs per file + for idx in range(logits.shape[0]): + lg = logits[idx][: logits_len[idx]] + hypotheses.append(lg.cpu().numpy()) + else: + current_hypotheses, all_hyp = self.decoding.ctc_decoder_predictions_tensor( + logits, decoder_lengths=logits_len, return_hypotheses=return_hypotheses, + ) + + if return_hypotheses: + # dump log probs per file + for idx in range(logits.shape[0]): + current_hypotheses[idx].y_sequence = logits[idx][: logits_len[idx]] + if current_hypotheses[idx].alignments is None: + current_hypotheses[idx].alignments = current_hypotheses[idx].y_sequence + + if all_hyp is None: + hypotheses += current_hypotheses + else: + hypotheses += all_hyp + + del greedy_predictions + del logits + del test_batch + finally: + # set mode back to its original value + self.train(mode=mode) + if mode is True: + self.video_front_end.unfreeze() + self.encoder.unfreeze() + self.decoder.unfreeze() + logging.set_verbosity(logging_level) + + return hypotheses + + def change_vocabulary(self, new_vocabulary: List[str], decoding_cfg: Optional[DictConfig] = None): + """ + Changes vocabulary used during CTC decoding process. Use this method when fine-tuning on from pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on a data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + If new_vocabulary == self.decoder.vocabulary then nothing will be changed. + + Args: + + new_vocabulary: list with new vocabulary. Must contain at least 2 elements. Typically, \ + this is target alphabet. + + Returns: None + + """ + if self.decoder.vocabulary == new_vocabulary: + logging.warning(f"Old {self.decoder.vocabulary} and new {new_vocabulary} match. Not changing anything.") + else: + if new_vocabulary is None or len(new_vocabulary) == 0: + raise ValueError(f'New vocabulary must be non-empty list of chars. But I got: {new_vocabulary}') + decoder_config = self.decoder.to_config_dict() + new_decoder_config = copy.deepcopy(decoder_config) + new_decoder_config['vocabulary'] = new_vocabulary + new_decoder_config['num_classes'] = len(new_vocabulary) + + del self.decoder + self.decoder = VisualEncDecCTCModel.from_config_dict(new_decoder_config) + del self.loss + self.loss = CTCLoss( + num_classes=self.decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self._cfg.get("ctc_reduction", "mean_batch"), + ) + + if decoding_cfg is None: + # Assume same decoding config as before + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = CTCDecoding(decoding_cfg=decoding_cfg, vocabulary=self.decoder.vocabulary) + + self._wer = WER( + decoding=self.decoding, + use_cer=self._cfg.get('use_cer', False), + dist_sync_on_step=True, + log_prediction=self._cfg.get("log_prediction", False), + ) + + # Update config + with open_dict(self.cfg.decoder): + self._cfg.decoder = new_decoder_config + + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + ds_keys = ['train_ds', 'validation_ds', 'test_ds'] + for key in ds_keys: + if key in self.cfg: + with open_dict(self.cfg[key]): + self.cfg[key]['labels'] = OmegaConf.create(new_vocabulary) + + logging.info(f"Changed decoder to output to {self.decoder.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig): + """ + Changes decoding strategy used during CTC decoding process. + + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + """ + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = CTCDecoding(decoding_cfg=decoding_cfg, vocabulary=self.decoder.vocabulary) + + self._wer = WER( + decoding=self.decoding, + use_cer=self._wer.use_cer, + log_prediction=self._wer.log_prediction, + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.decoding)}") + + def _setup_dataloader_from_config(self, config: Optional[Dict]): + # Automatically inject args from model config to dataloader config + audio_to_text_dataset.inject_dataloader_value_from_model_config(self.cfg, config, key='sample_rate') + audio_to_text_dataset.inject_dataloader_value_from_model_config(self.cfg, config, key='labels') + dataset = video_to_text_dataset.get_video_to_text_char_dataset_from_config( + config=config, + local_rank=self.local_rank, + global_rank=self.global_rank, + world_size=self.world_size, + preprocessor_cfg=self._cfg.get("preprocessor", None), + ) + + if dataset is None: + return None + + shuffle = config['shuffle'] + if config.get('is_tarred', False): + shuffle = False + + if hasattr(dataset, 'collate_fn'): + collate_fn = dataset.collate_fn + else: + collate_fn = dataset.datasets[0].collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config['batch_size'], + collate_fn=collate_fn, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the training data loader via a Dict-like object. + + Args: + train_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in train_data_config: + train_data_config['shuffle'] = True + + # preserve config + self._update_dataset_config(dataset_name='train', config=train_data_config) + + self._train_dl = self._setup_dataloader_from_config(config=train_data_config) + + # Need to set this because if using an IterableDataset, the length of the dataloader is the total number + # of samples rather than the number of batches, and this messes up the tqdm progress bar. + # So we set the number of steps manually (to the correct number) to fix this. + if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + # We also need to check if limit_train_batches is already set. + # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, + # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). + if self._trainer is not None and isinstance(self._trainer.limit_train_batches, float): + self._trainer.limit_train_batches = int( + self._trainer.limit_train_batches + * ceil((len(self._train_dl.dataset) / self.world_size) / train_data_config['batch_size']) + ) + elif self._trainer is None: + logging.warning( + "Model Trainer was not set before constructing the dataset, incorrect number of " + "training batches will be used. Please set the trainer and rebuild the dataset." + ) + + def setup_validation_data(self, val_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the validation data loader via a Dict-like object. + + Args: + val_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in val_data_config: + val_data_config['shuffle'] = False + + # preserve config + self._update_dataset_config(dataset_name='validation', config=val_data_config) + + self._validation_dl = self._setup_dataloader_from_config(config=val_data_config) + + def setup_test_data(self, test_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the test data loader via a Dict-like object. + + Args: + test_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in test_data_config: + test_data_config['shuffle'] = False + + # preserve config + self._update_dataset_config(dataset_name='test', config=test_data_config) + + self._test_dl = self._setup_dataloader_from_config(config=test_data_config) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "input_video_signal": NeuralType(('B', 'C', 'T', 'H', 'W'), VideoSignal(), optional=True), + "input_video_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), + "sample_id": NeuralType(tuple('B'), LengthsType(), optional=True), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "outputs": NeuralType(('B', 'T', 'D'), LogprobsType()), + "encoded_lengths": NeuralType(tuple('B'), LengthsType()), + "greedy_predictions": NeuralType(('B', 'T'), LabelsType()), + } + + @typecheck() + def forward(self, input_video_signal=None, input_video_signal_length=None): + """ + Forward pass of the model. + + Args: + input_video_signal: Tensor that represents a batch of video signals, + of shape [B, T, H, W, C]. T here represents timesteps, H height, W width and C channels + input_video_signal_length: Vector of length B, that contains the individual lengths of the video + sequences. + + Returns: + A tuple of 3 elements - + 1) The log probabilities tensor of shape [B, T, D]. + 2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B]. + 3) The greedy token predictions of the model of shape [B, T] (via argmax) + """ + + # Preprocessing + processed_video_signal, processed_video_signal_length = self.video_preprocessor( + input_signal=input_video_signal, length=input_video_signal_length + ) + + # Augmentation + processed_video_signal = self.video_augmentation( + input_signal=processed_video_signal, length=processed_video_signal_length + ) + + # Front-end Networks + processed_video_signal, processed_video_signal_length = self.video_front_end( + input_signal=processed_video_signal, length=processed_video_signal_length + ) + + # Back-end Networks + encoded, encoded_len = self.encoder(audio_signal=processed_video_signal, length=processed_video_signal_length) + + log_probs = self.decoder(encoder_output=encoded) + greedy_predictions = log_probs.argmax(dim=-1, keepdim=False) + + return ( + log_probs, + encoded_len, + greedy_predictions, + ) + + # PTL-specific methods + def training_step(self, batch, batch_nb): + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + if self.is_interctc_enabled(): + AccessMixin.set_access_enabled(access_enabled=True) + + video_signal, video_signal_len, transcript, transcript_len = batch + log_probs, encoded_len, predictions = self.forward( + input_video_signal=video_signal, input_video_signal_length=video_signal_len + ) + + if hasattr(self, '_trainer') and self._trainer is not None: + log_every_n_steps = self._trainer.log_every_n_steps + else: + log_every_n_steps = 1 + + loss_value = self.loss( + log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len + ) + + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + # only computing WER when requested in the logs (same as done for final-layer WER below) + loss_value, tensorboard_logs = self.add_interctc_losses( + loss_value, transcript, transcript_len, compute_wer=((batch_nb + 1) % log_every_n_steps == 0) + ) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + tensorboard_logs.update( + { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + ) + + if (batch_nb + 1) % log_every_n_steps == 0: + self._wer.update( + predictions=log_probs, + targets=transcript, + target_lengths=transcript_len, + predictions_lengths=encoded_len, + ) + wer, _, _ = self._wer.compute() + self._wer.reset() + tensorboard_logs.update({'training_batch_wer': wer}) + + return {'loss': loss_value, 'log': tensorboard_logs} + + def predict_step(self, batch, batch_idx, dataloader_idx=0): + video_signal, video_signal_len, transcript, transcript_len, sample_id = batch + log_probs, encoded_len, predictions = self.forward( + input_video_signal=video_signal, input_video_signal_length=video_signal_len + ) + + transcribed_texts, _ = self._wer.decoding.ctc_decoder_predictions_tensor( + decoder_outputs=log_probs, decoder_lengths=encoded_len, return_hypotheses=False, + ) + + sample_id = sample_id.cpu().detach().numpy() + return list(zip(sample_id, transcribed_texts)) + + def validation_step(self, batch, batch_idx, dataloader_idx=0): + if self.is_interctc_enabled(): + AccessMixin.set_access_enabled(access_enabled=True) + + video_signal, video_signal_len, transcript, transcript_len = batch + log_probs, encoded_len, predictions = self.forward( + input_video_signal=video_signal, input_video_signal_length=video_signal_len + ) + + loss_value = self.loss( + log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len + ) + loss_value, metrics = self.add_interctc_losses( + loss_value, transcript, transcript_len, compute_wer=True, log_wer_num_denom=True, log_prefix="val_", + ) + + self._wer.update( + predictions=log_probs, targets=transcript, target_lengths=transcript_len, predictions_lengths=encoded_len + ) + wer, wer_num, wer_denom = self._wer.compute() + self._wer.reset() + metrics.update({'val_loss': loss_value, 'val_wer_num': wer_num, 'val_wer_denom': wer_denom, 'val_wer': wer}) + + self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32)) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + return metrics + + def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): + metrics = super().multi_validation_epoch_end(outputs, dataloader_idx) + self.finalize_interctc_metrics(metrics, outputs, prefix="val_") + return metrics + + def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): + metrics = super().multi_test_epoch_end(outputs, dataloader_idx) + self.finalize_interctc_metrics(metrics, outputs, prefix="test_") + return metrics + + def test_step(self, batch, batch_idx, dataloader_idx=0): + logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = {name.replace("val_", "test_"): value for name, value in logs.items()} + if type(self.trainer.test_dataloaders) == list and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(test_logs) + else: + self.test_step_outputs.append(test_logs) + return test_logs + + def test_dataloader(self): + if self._test_dl is not None: + return self._test_dl + + def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader': + """ + Setup function for a temporary data loader which wraps the provided video file. + + Args: + config: A python dictionary which contains the following keys: + paths2video_files: (a list) of paths to video files. The files should be relatively short fragments. \ + Recommended length per file is between 5 and 25 seconds. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + temp_dir: (str) A temporary directory where the video manifest is temporarily + stored. + num_workers: (int) number of workers. Depends of the batch_size and machine. \ + 0 - only the main process will load batches, 1 - one worker (not main process) + + Returns: + A pytorch DataLoader for the given video file(s). + """ + if 'manifest_filepath' in config: + manifest_filepath = config['manifest_filepath'] + batch_size = config['batch_size'] + else: + manifest_filepath = os.path.join(config['temp_dir'], 'manifest.json') + batch_size = min(config['batch_size'], len(config['paths2video_files'])) + + dl_config = { + 'manifest_filepath': manifest_filepath, + 'labels': self.decoder.vocabulary, + 'batch_size': batch_size, + 'trim_silence': False, + 'shuffle': False, + 'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)), + 'pin_memory': True, + 'channel_selector': config.get('channel_selector', None), + } + if config.get("augmentor"): + dl_config['augmentor'] = config.get("augmentor") + + temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config)) + return temporary_datalayer + + @classmethod + def list_available_models(cls) -> List[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + + return results diff --git a/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_bpe_models.py new file mode 100644 index 000000000000..882d15700593 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_bpe_models.py @@ -0,0 +1,455 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +from typing import Dict, Optional, Union + +import torch +from omegaconf import DictConfig, ListConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer + +from nemo.collections.asr.losses.ctc import CTCLoss +from nemo.collections.asr.losses.rnnt import RNNTLoss +from nemo.collections.asr.metrics.rnnt_wer_bpe import RNNTBPEWER, RNNTBPEDecoding, RNNTBPEDecodingConfig +from nemo.collections.asr.metrics.wer_bpe import WERBPE, CTCBPEDecoding, CTCBPEDecodingConfig +from nemo.collections.asr.parts.mixins import ASRBPEMixin +from nemo.collections.multimodal.speech_cv.data import video_to_text_dataset +from nemo.collections.multimodal.speech_cv.models.visual_hybrid_rnnt_ctc_models import VisualEncDecHybridRNNTCTCModel +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging, model_utils + + +class VisualEncDecHybridRNNTCTCBPEModel(VisualEncDecHybridRNNTCTCModel, ASRBPEMixin): + """Base class for encoder decoder RNNT-based models with auxiliary CTC decoder/loss and subword tokenization.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + # Convert to Hydra 1.0 compatible DictConfig + cfg = model_utils.convert_model_config_to_dict_config(cfg) + cfg = model_utils.maybe_update_config_version(cfg) + + # Tokenizer is necessary for this model + if 'tokenizer' not in cfg: + raise ValueError("`cfg` must have `tokenizer` config to create a tokenizer !") + + if not isinstance(cfg, DictConfig): + cfg = OmegaConf.create(cfg) + + # Setup the tokenizer + self._setup_tokenizer(cfg.tokenizer) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + # Set the new vocabulary + with open_dict(cfg): + cfg.labels = ListConfig(list(vocabulary)) + + with open_dict(cfg.decoder): + cfg.decoder.vocab_size = len(vocabulary) + + with open_dict(cfg.joint): + cfg.joint.num_classes = len(vocabulary) + cfg.joint.vocabulary = ListConfig(list(vocabulary)) + cfg.joint.jointnet.encoder_hidden = cfg.model_defaults.enc_hidden + cfg.joint.jointnet.pred_hidden = cfg.model_defaults.pred_hidden + + # setup auxiliary CTC decoder + if 'aux_ctc' not in cfg: + raise ValueError( + "The config need to have a section for the CTC decoder named as aux_ctc for Hybrid models." + ) + + with open_dict(cfg): + if self.tokenizer_type == "agg": + cfg.aux_ctc.decoder.vocabulary = ListConfig(vocabulary) + else: + cfg.aux_ctc.decoder.vocabulary = ListConfig(list(vocabulary.keys())) + + if cfg.aux_ctc.decoder["num_classes"] < 1: + logging.info( + "\nReplacing placholder number of classes ({}) with actual number of classes - {}".format( + cfg.aux_ctc.decoder["num_classes"], len(vocabulary) + ) + ) + cfg.aux_ctc.decoder["num_classes"] = len(vocabulary) + + super().__init__(cfg=cfg, trainer=trainer) + + # Setup decoding object + self.decoding = RNNTBPEDecoding( + decoding_cfg=self.cfg.decoding, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + # Setup wer object + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=0, + use_cer=self.cfg.get('use_cer', False), + log_prediction=self.cfg.get('log_prediction', True), + dist_sync_on_step=True, + ) + + # Setup fused Joint step if flag is set + if self.joint.fuse_loss_wer: + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Setup CTC decoding + ctc_decoding_cfg = self.cfg.aux_ctc.get('decoding', None) + if ctc_decoding_cfg is None: + ctc_decoding_cfg = OmegaConf.structured(CTCBPEDecodingConfig) + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoding = ctc_decoding_cfg + self.ctc_decoding = CTCBPEDecoding(self.cfg.aux_ctc.decoding, tokenizer=self.tokenizer) + + # Setup CTC WER + self.ctc_wer = WERBPE( + decoding=self.ctc_decoding, + use_cer=self.cfg.aux_ctc.get('use_cer', False), + dist_sync_on_step=True, + log_prediction=self.cfg.get("log_prediction", False), + ) + + # setting the RNNT decoder as the default one + self.use_rnnt_decoder = True + + def _setup_dataloader_from_config(self, config: Optional[Dict]): + dataset = video_to_text_dataset.get_video_to_text_bpe_dataset_from_config( + config=config, + local_rank=self.local_rank, + global_rank=self.global_rank, + world_size=self.world_size, + tokenizer=self.tokenizer, + preprocessor_cfg=self.cfg.get("preprocessor", None), + ) + + if dataset is None: + return None + + shuffle = config['shuffle'] + if config.get('is_tarred', False): + shuffle = False + + if hasattr(dataset, 'collate_fn'): + collate_fn = dataset.collate_fn + else: + collate_fn = dataset.datasets[0].collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config['batch_size'], + collate_fn=collate_fn, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader': + """ + Setup function for a temporary data loader which wraps the provided video file. + + Args: + config: A python dictionary which contains the following keys: + paths2video_files: (a list) of paths to video files. The files should be relatively short fragments. \ + Recommended length per file is between 5 and 25 seconds. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + temp_dir: (str) A temporary directory where the video manifest is temporarily + stored. + num_workers: (int) number of workers. Depends of the batch_size and machine. \ + 0 - only the main process will load batches, 1 - one worker (not main process) + + Returns: + A pytorch DataLoader for the given video file(s). + """ + + if 'manifest_filepath' in config: + manifest_filepath = config['manifest_filepath'] + batch_size = config['batch_size'] + else: + manifest_filepath = os.path.join(config['temp_dir'], 'manifest.json') + batch_size = min(config['batch_size'], len(config['paths2video_files'])) + + dl_config = { + 'manifest_filepath': manifest_filepath, + 'batch_size': batch_size, + 'shuffle': False, + 'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)), + 'pin_memory': True, + 'channel_selector': config.get('channel_selector', None), + 'use_start_end_token': self.cfg.validation_ds.get('use_start_end_token', False), + } + + if config.get("augmentor"): + dl_config['augmentor'] = config.get("augmentor") + + temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config)) + return temporary_datalayer + + def change_vocabulary( + self, + new_tokenizer_dir: Union[str, DictConfig], + new_tokenizer_type: str, + decoding_cfg: Optional[DictConfig] = None, + ctc_decoding_cfg: Optional[DictConfig] = None, + ): + """ + Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on from pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + Args: + new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer (if the tokenizer type is `agg`) + new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`. + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + ctc_decoding_cfg: A config for auxiliary CTC decoding, which is optional and can be used to change the decoding type. + + Returns: None + + """ + if isinstance(new_tokenizer_dir, DictConfig): + if new_tokenizer_type == 'agg': + new_tokenizer_cfg = new_tokenizer_dir + else: + raise ValueError( + f'New tokenizer dir should be a string unless the tokenizer is `agg`, but this tokenizer type is: {new_tokenizer_type}' + ) + else: + new_tokenizer_cfg = None + + if new_tokenizer_cfg is not None: + tokenizer_cfg = new_tokenizer_cfg + else: + if not os.path.isdir(new_tokenizer_dir): + raise NotADirectoryError( + f'New tokenizer dir must be non-empty path to a directory. But I got: {new_tokenizer_dir}' + ) + + if new_tokenizer_type.lower() not in ('bpe', 'wpe'): + raise ValueError(f'New tokenizer type must be either `bpe` or `wpe`') + + tokenizer_cfg = OmegaConf.create({'dir': new_tokenizer_dir, 'type': new_tokenizer_type}) + + # Setup the tokenizer + self._setup_tokenizer(tokenizer_cfg) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + joint_config = self.joint.to_config_dict() + new_joint_config = copy.deepcopy(joint_config) + if self.tokenizer_type == "agg": + new_joint_config["vocabulary"] = ListConfig(vocabulary) + else: + new_joint_config["vocabulary"] = ListConfig(list(vocabulary.keys())) + + new_joint_config['num_classes'] = len(vocabulary) + del self.joint + self.joint = VisualEncDecHybridRNNTCTCBPEModel.from_config_dict(new_joint_config) + + decoder_config = self.decoder.to_config_dict() + new_decoder_config = copy.deepcopy(decoder_config) + new_decoder_config.vocab_size = len(vocabulary) + del self.decoder + self.decoder = VisualEncDecHybridRNNTCTCBPEModel.from_config_dict(new_decoder_config) + + del self.loss + self.loss = RNNTLoss(num_classes=self.joint.num_classes_with_blank - 1) + + if decoding_cfg is None: + # Assume same decoding config as before + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTBPEDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.joint): + self.cfg.joint = new_joint_config + + with open_dict(self.cfg.decoder): + self.cfg.decoder = new_decoder_config + + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed tokenizer of the RNNT decoder to {self.joint.vocabulary} vocabulary.") + + # set up the new tokenizer for the CTC decoder + if hasattr(self, 'ctc_decoder'): + ctc_decoder_config = copy.deepcopy(self.ctc_decoder.to_config_dict()) + # sidestepping the potential overlapping tokens issue in aggregate tokenizers + if self.tokenizer_type == "agg": + ctc_decoder_config.vocabulary = ListConfig(vocabulary) + else: + ctc_decoder_config.vocabulary = ListConfig(list(vocabulary.keys())) + + decoder_num_classes = ctc_decoder_config['num_classes'] + # Override number of classes if placeholder provided + logging.info( + "\nReplacing old number of classes ({}) with new number of classes - {}".format( + decoder_num_classes, len(vocabulary) + ) + ) + ctc_decoder_config['num_classes'] = len(vocabulary) + + del self.ctc_decoder + self.ctc_decoder = VisualEncDecHybridRNNTCTCBPEModel.from_config_dict(ctc_decoder_config) + del self.ctc_loss + self.ctc_loss = CTCLoss( + num_classes=self.ctc_decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self.cfg.aux_ctc.get("ctc_reduction", "mean_batch"), + ) + + if ctc_decoding_cfg is None: + # Assume same decoding config as before + ctc_decoding_cfg = self.cfg.aux_ctc.decoding + + # Assert the decoding config with all hyper parameters + ctc_decoding_cls = OmegaConf.structured(CTCBPEDecodingConfig) + ctc_decoding_cls = OmegaConf.create(OmegaConf.to_container(ctc_decoding_cls)) + ctc_decoding_cfg = OmegaConf.merge(ctc_decoding_cls, ctc_decoding_cfg) + + self.ctc_decoding = CTCBPEDecoding(decoding_cfg=ctc_decoding_cfg, tokenizer=self.tokenizer) + + self.ctc_wer = WERBPE( + decoding=self.ctc_decoding, + use_cer=self.cfg.aux_ctc.get('use_cer', False), + log_prediction=self.cfg.get("log_prediction", False), + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoder = ctc_decoder_config + + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoding = ctc_decoding_cfg + + logging.info(f"Changed tokenizer of the CTC decoder to {self.ctc_decoder.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = None): + """ + Changes decoding strategy used during RNNT decoding process. + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + decoder_type: (str) Can be set to 'rnnt' or 'ctc' to switch between appropriate decoder in a + model having both RNN-T and CTC decoders. Defaults to None, in which case RNN-T decoder is + used. If set to 'ctc', it raises error if 'ctc_decoder' is not an attribute of the model. + """ + if decoder_type is None or decoder_type == 'rnnt': + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTBPEDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoding strategy of the RNNT decoder to \n{OmegaConf.to_yaml(self.cfg.decoding)}") + elif decoder_type == 'ctc': + if not hasattr(self, 'ctc_decoding'): + raise ValueError("The model does not have the ctc_decoding module and does not support ctc decoding.") + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.aux_ctc.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.ctc_decoding = CTCBPEDecoding(decoding_cfg=decoding_cfg, tokenizer=self.tokenizer) + + self.ctc_wer = WERBPE( + decoding=self.ctc_decoding, + use_cer=self.ctc_wer.use_cer, + log_prediction=self.ctc_wer.log_prediction, + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.aux_ctc.decoding): + self.cfg.aux_ctc.decoding = decoding_cfg + + self.use_rnnt_decoder = False + logging.info( + f"Changed decoding strategy of the CTC decoder to \n{OmegaConf.to_yaml(self.cfg.aux_ctc.decoding)}" + ) + else: + raise ValueError(f"decoder_type={decoder_type} is not supported. Supported values: [ctc,rnnt]") + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + return results diff --git a/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_models.py b/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_models.py new file mode 100644 index 000000000000..7c658f2a18c6 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_hybrid_rnnt_ctc_models.py @@ -0,0 +1,644 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json +import os +import tempfile +from typing import List, Optional + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from tqdm.auto import tqdm + +from nemo.collections.asr.losses.ctc import CTCLoss +from nemo.collections.asr.metrics.wer import WER, CTCDecoding, CTCDecodingConfig +from nemo.collections.asr.parts.mixins import ASRBPEMixin, InterCTCMixin +from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.collections.multimodal.speech_cv.models.visual_rnnt_models import VisualEncDecRNNTModel +from nemo.core.classes.common import PretrainedModelInfo +from nemo.core.classes.mixins import AccessMixin +from nemo.utils import logging, model_utils + + +class VisualEncDecHybridRNNTCTCModel(VisualEncDecRNNTModel, ASRBPEMixin, InterCTCMixin): + """Base class for hybrid RNNT/CTC models.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + cfg = model_utils.convert_model_config_to_dict_config(cfg) + cfg = model_utils.maybe_update_config_version(cfg) + super().__init__(cfg=cfg, trainer=trainer) + + if 'aux_ctc' not in self.cfg: + raise ValueError( + "The config need to have a section for the CTC decoder named as aux_ctc for Hybrid models." + ) + with open_dict(self.cfg.aux_ctc): + if "feat_in" not in self.cfg.aux_ctc.decoder or ( + not self.cfg.aux_ctc.decoder.feat_in and hasattr(self.encoder, '_feat_out') + ): + self.cfg.aux_ctc.decoder.feat_in = self.encoder._feat_out + if "feat_in" not in self.cfg.aux_ctc.decoder or not self.cfg.aux_ctc.decoder.feat_in: + raise ValueError("param feat_in of the decoder's config is not set!") + + if self.cfg.aux_ctc.decoder.num_classes < 1 and self.cfg.aux_ctc.decoder.vocabulary is not None: + logging.info( + "\nReplacing placeholder number of classes ({}) with actual number of classes - {}".format( + self.cfg.aux_ctc.decoder.num_classes, len(self.cfg.aux_ctc.decoder.vocabulary) + ) + ) + self.cfg.aux_ctc.decoder["num_classes"] = len(self.cfg.aux_ctc.decoder.vocabulary) + + self.ctc_decoder = VisualEncDecHybridRNNTCTCModel.from_config_dict(self.cfg.aux_ctc.decoder) + self.ctc_loss_weight = self.cfg.aux_ctc.get("ctc_loss_weight", 0.5) + + self.ctc_loss = CTCLoss( + num_classes=self.ctc_decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self.cfg.aux_ctc.get("ctc_reduction", "mean_batch"), + ) + + ctc_decoding_cfg = self.cfg.aux_ctc.get('decoding', None) + if ctc_decoding_cfg is None: + ctc_decoding_cfg = OmegaConf.structured(CTCDecodingConfig) + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoding = ctc_decoding_cfg + + self.ctc_decoding = CTCDecoding(self.cfg.aux_ctc.decoding, vocabulary=self.ctc_decoder.vocabulary) + self.ctc_wer = WER( + decoding=self.ctc_decoding, + use_cer=self.cfg.aux_ctc.get('use_cer', False), + dist_sync_on_step=True, + log_prediction=self.cfg.get("log_prediction", False), + ) + + # setting the RNNT decoder as the default one + self.use_rnnt_decoder = True + + # setting up interCTC loss (from InterCTCMixin) + self.setup_interctc(decoder_name='decoder', loss_name='loss', wer_name='_wer') + + @torch.no_grad() + def transcribe( + self, + paths2video_files: List[str], + batch_size: int = 4, + return_hypotheses: bool = False, + partial_hypothesis: Optional[List['Hypothesis']] = None, + num_workers: int = 0, + channel_selector: Optional[ChannelSelectorType] = None, + ) -> (List[str], Optional[List['Hypothesis']]): + """ + Uses greedy decoding to transcribe video files. Use this method for debugging and prototyping. + + Args: + + paths2video_files: (a list) of paths to video files. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + return_hypotheses: (bool) Either return hypotheses or text + With hypotheses can do some postprocessing like getting timestamp or rescoring + num_workers: (int) number of workers for DataLoader + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. + + Returns: + Returns a tuple of 2 items - + * A list of greedy transcript texts / Hypothesis + * An optional list of beam search transcript texts / Hypothesis / NBestHypothesis. + """ + if self.use_rnnt_decoder: + return super().transcribe( + paths2video_files=paths2video_files, + batch_size=batch_size, + return_hypotheses=return_hypotheses, + partial_hypothesis=partial_hypothesis, + num_workers=num_workers, + channel_selector=channel_selector, + ) + + if paths2video_files is None or len(paths2video_files) == 0: + return {} + # We will store transcriptions here + hypotheses = [] + all_hypotheses = [] + # Model's mode and device + mode = self.training + device = next(self.parameters()).device + + if num_workers is None: + num_workers = min(batch_size, os.cpu_count() - 1) + + try: + + # Switch model to evaluation mode + self.eval() + # Freeze the visual front-end, encoder and decoder modules + self.video_front_end.freeze() + self.encoder.freeze() + self.decoder.freeze() + self.joint.freeze() + if hasattr(self, 'ctc_decoder'): + self.ctc_decoder.freeze() + + logging_level = logging.get_verbosity() + logging.set_verbosity(logging.WARNING) + # Work in tmp directory - will store manifest file there + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, 'manifest.json'), 'w', encoding='utf-8') as fp: + for video_file in paths2video_files: + entry = {'video_filepath': video_file, 'duration': 100000, 'text': ''} + fp.write(json.dumps(entry) + '\n') + + config = { + 'paths2video_files': paths2video_files, + 'batch_size': batch_size, + 'temp_dir': tmpdir, + 'num_workers': num_workers, + 'channel_selector': channel_selector, + } + + temporary_datalayer = self._setup_transcribe_dataloader(config) + for test_batch in tqdm(temporary_datalayer, desc="Transcribing"): + encoded, encoded_len = self.forward( + input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) + ) + + logits = self.ctc_decoder(encoder_output=encoded) + best_hyp, all_hyp = self.ctc_decoding.ctc_decoder_predictions_tensor( + logits, encoded_len, return_hypotheses=return_hypotheses, + ) + if return_hypotheses: + # dump log probs per file + for idx in range(logits.shape[0]): + best_hyp[idx].y_sequence = logits[idx][: encoded_len[idx]] + if best_hyp[idx].alignments is None: + best_hyp[idx].alignments = best_hyp[idx].y_sequence + del logits + + hypotheses += best_hyp + if all_hyp is not None: + all_hypotheses += all_hyp + else: + all_hypotheses += best_hyp + + del encoded + del test_batch + finally: + # set mode back to its original value + self.train(mode=mode) + + logging.set_verbosity(logging_level) + if mode is True: + self.video_front_end.unfreeze() + self.encoder.unfreeze() + self.decoder.unfreeze() + self.joint.unfreeze() + if hasattr(self, 'ctc_decoder'): + self.ctc_decoder.unfreeze() + return hypotheses, all_hypotheses + + def change_vocabulary( + self, + new_vocabulary: List[str], + decoding_cfg: Optional[DictConfig] = None, + ctc_decoding_cfg: Optional[DictConfig] = None, + ): + """ + Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning a pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + Args: + new_vocabulary: list with new vocabulary. Must contain at least 2 elements. Typically, \ + this is target alphabet. + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + ctc_decoding_cfg: A config for CTC decoding, which is optional and can be used to change decoding type. + + Returns: None + + """ + super().change_vocabulary(new_vocabulary=new_vocabulary, decoding_cfg=decoding_cfg) + + # set up the new tokenizer for the CTC decoder + if hasattr(self, 'ctc_decoder'): + if self.ctc_decoder.vocabulary == new_vocabulary: + logging.warning( + f"Old {self.ctc_decoder.vocabulary} and new {new_vocabulary} match. Not changing anything." + ) + else: + if new_vocabulary is None or len(new_vocabulary) == 0: + raise ValueError(f'New vocabulary must be non-empty list of chars. But I got: {new_vocabulary}') + decoder_config = self.ctc_decoder.to_config_dict() + new_decoder_config = copy.deepcopy(decoder_config) + new_decoder_config['vocabulary'] = new_vocabulary + new_decoder_config['num_classes'] = len(new_vocabulary) + + del self.ctc_decoder + self.ctc_decoder = VisualEncDecHybridRNNTCTCModel.from_config_dict(new_decoder_config) + del self.ctc_loss + self.ctc_loss = CTCLoss( + num_classes=self.ctc_decoder.num_classes_with_blank - 1, + zero_infinity=True, + reduction=self.cfg.aux_ctc.get("ctc_reduction", "mean_batch"), + ) + + if ctc_decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `ctc_decoding_cfg` passed when changing decoding strategy, using internal config") + ctc_decoding_cfg = self.cfg.aux_ctc.decoding + + # Assert the decoding config with all hyper parameters + ctc_decoding_cls = OmegaConf.structured(CTCDecodingConfig) + ctc_decoding_cls = OmegaConf.create(OmegaConf.to_container(ctc_decoding_cls)) + ctc_decoding_cfg = OmegaConf.merge(ctc_decoding_cls, ctc_decoding_cfg) + + self.ctc_decoding = CTCDecoding(decoding_cfg=ctc_decoding_cfg, vocabulary=self.ctc_decoder.vocabulary) + + self.ctc_wer = WER( + decoding=self.ctc_decoding, + use_cer=self.ctc_wer.use_cer, + log_prediction=self.ctc_wer.log_prediction, + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoding = ctc_decoding_cfg + + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoder = new_decoder_config + + ds_keys = ['train_ds', 'validation_ds', 'test_ds'] + for key in ds_keys: + if key in self.cfg: + with open_dict(self.cfg[key]): + self.cfg[key]['labels'] = OmegaConf.create(new_vocabulary) + + logging.info(f"Changed the tokenizer of the CTC decoder to {self.ctc_decoder.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig, decoder_type: str = None): + """ + Changes decoding strategy used during RNNT decoding process. + + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + decoder_type: (str) Can be set to 'rnnt' or 'ctc' to switch between appropriate decoder in a + model having RNN-T and CTC decoders. Defaults to None, in which case RNN-T decoder is + used. If set to 'ctc', it raises error if 'ctc_decoder' is not an attribute of the model. + """ + if decoder_type is None or decoder_type == 'rnnt': + self.use_rnnt_decoder = True + return super().change_decoding_strategy(decoding_cfg=decoding_cfg) + + assert decoder_type == 'ctc' and hasattr(self, 'ctc_decoder') + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.aux_ctc.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(CTCDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.ctc_decoding = CTCDecoding(decoding_cfg=decoding_cfg, vocabulary=self.ctc_decoder.vocabulary) + + self.ctc_wer = WER( + decoding=self.ctc_decoding, + use_cer=self.ctc_wer.use_cer, + log_prediction=self.ctc_wer.log_prediction, + dist_sync_on_step=True, + ) + + # Update config + with open_dict(self.cfg.aux_ctc): + self.cfg.aux_ctc.decoding = decoding_cfg + + self.use_rnnt_decoder = False + logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.aux_ctc.decoding)}") + + # PTL-specific methods + def training_step(self, batch, batch_nb): + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + if self.is_interctc_enabled(): + AccessMixin.set_access_enabled(access_enabled=True) + + signal, signal_len, transcript, transcript_len = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + # During training, loss must be computed, so decoder forward is necessary + decoder, target_length, states = self.decoder(targets=transcript, target_length=transcript_len) + + if hasattr(self, '_trainer') and self._trainer is not None: + log_every_n_steps = self._trainer.log_every_n_steps + sample_id = self._trainer.global_step + else: + log_every_n_steps = 1 + sample_id = batch_nb + + # If fused Joint-Loss-WER is not used + if not self.joint.fuse_loss_wer: + # Compute full joint and loss + joint = self.joint(encoder_outputs=encoded, decoder_outputs=decoder) + loss_value = self.loss( + log_probs=joint, targets=transcript, input_lengths=encoded_len, target_lengths=target_length + ) + + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + + # Reset access registry + # if AccessMixin.is_access_enabled(): + # AccessMixin.reset_registry(self) + + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + + if (sample_id + 1) % log_every_n_steps == 0: + self.wer.update(encoded, encoded_len, transcript, transcript_len) + _, scores, words = self.wer.compute() + self.wer.reset() + tensorboard_logs.update({'training_batch_wer': scores.float() / words}) + + else: + # If fused Joint-Loss-WER is used + if (sample_id + 1) % log_every_n_steps == 0: + compute_wer = True + else: + compute_wer = False + + # Fused joint step + loss_value, wer, _, _ = self.joint( + encoder_outputs=encoded, + decoder_outputs=decoder, + encoder_lengths=encoded_len, + transcripts=transcript, + transcript_lengths=transcript_len, + compute_wer=compute_wer, + ) + + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + + # Reset access registry + # if AccessMixin.is_access_enabled(): + # AccessMixin.reset_registry(self) + + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + + if compute_wer: + tensorboard_logs.update({'training_batch_wer': wer}) + + if self.ctc_loss_weight > 0: + log_probs = self.ctc_decoder(encoder_output=encoded) + ctc_loss = self.ctc_loss( + log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len + ) + + # Add Interctc Losses + ctc_loss, interctc_tensorboard_logs = self.add_interctc_losses( + ctc_loss, transcript, transcript_len, compute_wer=((batch_nb + 1) % log_every_n_steps == 0) + ) + tensorboard_logs.update(interctc_tensorboard_logs) + + tensorboard_logs['train_rnnt_loss'] = loss_value + tensorboard_logs['train_ctc_loss'] = ctc_loss + loss_value = (1 - self.ctc_loss_weight) * loss_value + self.ctc_loss_weight * ctc_loss + tensorboard_logs['train_loss'] = loss_value + if (sample_id + 1) % log_every_n_steps == 0: + self.ctc_wer.update( + predictions=log_probs, + targets=transcript, + target_lengths=transcript_len, + predictions_lengths=encoded_len, + ) + ctc_wer, _, _ = self.ctc_wer.compute() + self.ctc_wer.reset() + tensorboard_logs.update({'training_batch_wer_ctc': ctc_wer}) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + # Log items + self.log_dict(tensorboard_logs) + + # Preserve batch acoustic model T and language model U parameters if normalizing + if self._optim_normalize_joint_txu: + self._optim_normalize_txu = [encoded_len.max(), transcript_len.max()] + + return {'loss': loss_value} + + def predict_step(self, batch, batch_idx, dataloader_idx=0): + # TODO: add support for CTC decoding + signal, signal_len, transcript, transcript_len, sample_id = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + best_hyp_text, all_hyp_text = self.decoding.rnnt_decoder_predictions_tensor( + encoder_output=encoded, encoded_lengths=encoded_len, return_hypotheses=False + ) + + sample_id = sample_id.cpu().detach().numpy() + return list(zip(sample_id, best_hyp_text)) + + def validation_step(self, batch, batch_idx, dataloader_idx=0): + if self.is_interctc_enabled(): + AccessMixin.set_access_enabled(access_enabled=True) + + signal, signal_len, transcript, transcript_len = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + tensorboard_logs = {} + + # If experimental fused Joint-Loss-WER is not used + if not self.joint.fuse_loss_wer: + if self.compute_eval_loss: + decoder, target_length, states = self.decoder(targets=transcript, target_length=transcript_len) + joint = self.joint(encoder_outputs=encoded, decoder_outputs=decoder) + + loss_value = self.loss( + log_probs=joint, targets=transcript, input_lengths=encoded_len, target_lengths=target_length + ) + + tensorboard_logs['val_loss'] = loss_value + + self.wer.update(encoded, encoded_len, transcript, transcript_len) + wer, wer_num, wer_denom = self.wer.compute() + self.wer.reset() + + tensorboard_logs['val_wer_num'] = wer_num + tensorboard_logs['val_wer_denom'] = wer_denom + tensorboard_logs['val_wer'] = wer + + else: + # If experimental fused Joint-Loss-WER is used + compute_wer = True + + if self.compute_eval_loss: + decoded, target_len, states = self.decoder(targets=transcript, target_length=transcript_len) + else: + decoded = None + target_len = transcript_len + + # Fused joint step + loss_value, wer, wer_num, wer_denom = self.joint( + encoder_outputs=encoded, + decoder_outputs=decoded, + encoder_lengths=encoded_len, + transcripts=transcript, + transcript_lengths=target_len, + compute_wer=compute_wer, + ) + + if loss_value is not None: + tensorboard_logs['val_loss'] = loss_value + + tensorboard_logs['val_wer_num'] = wer_num + tensorboard_logs['val_wer_denom'] = wer_denom + tensorboard_logs['val_wer'] = wer + + log_probs = self.ctc_decoder(encoder_output=encoded) + if self.compute_eval_loss: + ctc_loss = self.ctc_loss( + log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len + ) + + # Add interCTC losses + ctc_loss, interctc_tensorboard_logs = self.add_interctc_losses( + ctc_loss, transcript, transcript_len, compute_wer=True, log_wer_num_denom=True, log_prefix="val_", + ) + tensorboard_logs.update(interctc_tensorboard_logs) + + tensorboard_logs['val_ctc_loss'] = ctc_loss + tensorboard_logs['val_rnnt_loss'] = loss_value + loss_value = (1 - self.ctc_loss_weight) * loss_value + self.ctc_loss_weight * ctc_loss + tensorboard_logs['val_loss'] = loss_value + self.ctc_wer.update( + predictions=log_probs, targets=transcript, target_lengths=transcript_len, predictions_lengths=encoded_len, + ) + ctc_wer, ctc_wer_num, ctc_wer_denom = self.ctc_wer.compute() + self.ctc_wer.reset() + tensorboard_logs['val_wer_num_ctc'] = ctc_wer_num + tensorboard_logs['val_wer_denom_ctc'] = ctc_wer_denom + tensorboard_logs['val_wer_ctc'] = ctc_wer + + self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32)) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + return tensorboard_logs + + def test_step(self, batch, batch_idx, dataloader_idx=0): + logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = {name.replace("val_", "test_"): value for name, value in logs.items()} + if type(self.trainer.test_dataloaders) == list and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(test_logs) + else: + self.test_step_outputs.append(test_logs) + return test_logs + + """ + def test_step(self, batch, batch_idx, dataloader_idx=0): + logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = { + 'test_wer_num': logs['val_wer_num'], + 'test_wer_denom': logs['val_wer_denom'], + # 'test_wer': logs['val_wer'], + } + if 'val_loss' in logs: + test_logs['test_loss'] = logs['val_loss'] + + if self.ctc_loss_weight > 0: + test_logs['test_wer_num_ctc'] = logs['val_wer_num_ctc'] + test_logs['test_wer_denom_ctc'] = logs['val_wer_denom_ctc'] + if 'val_ctc_loss' in logs: + test_logs['test_ctc_loss'] = logs['val_ctc_loss'] + if 'val_rnnt_loss' in logs: + test_logs['test_rnnt_loss'] = logs['val_rnnt_loss'] + + return test_logs + """ + + def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): + if self.compute_eval_loss: + val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() + val_loss_log = {'val_loss': val_loss_mean} + else: + val_loss_log = {} + wer_num = torch.stack([x['val_wer_num'] for x in outputs]).sum() + wer_denom = torch.stack([x['val_wer_denom'] for x in outputs]).sum() + tensorboard_logs = {**val_loss_log, 'val_wer': wer_num.float() / wer_denom} + if self.ctc_loss_weight > 0: + ctc_wer_num = torch.stack([x['val_wer_num_ctc'] for x in outputs]).sum() + ctc_wer_denom = torch.stack([x['val_wer_denom_ctc'] for x in outputs]).sum() + tensorboard_logs['val_wer_ctc'] = ctc_wer_num.float() / ctc_wer_denom + + metrics = {**val_loss_log, 'log': tensorboard_logs} + self.finalize_interctc_metrics(metrics, outputs, prefix="val_") + return metrics + + def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): + if self.compute_eval_loss: + test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() + test_loss_log = {'test_loss': test_loss_mean} + else: + test_loss_log = {} + wer_num = torch.stack([x['test_wer_num'] for x in outputs]).sum() + wer_denom = torch.stack([x['test_wer_denom'] for x in outputs]).sum() + tensorboard_logs = {**test_loss_log, 'test_wer': wer_num.float() / wer_denom} + + if self.ctc_loss_weight > 0: + ctc_wer_num = torch.stack([x['test_wer_num_ctc'] for x in outputs]).sum() + ctc_wer_denom = torch.stack([x['test_wer_denom_ctc'] for x in outputs]).sum() + tensorboard_logs['test_wer_ctc'] = ctc_wer_num.float() / ctc_wer_denom + + metrics = {**test_loss_log, 'log': tensorboard_logs} + self.finalize_interctc_metrics(metrics, outputs, prefix="test_") + return metrics + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + return results diff --git a/nemo/collections/multimodal/speech_cv/models/visual_rnnt_bpe_models.py b/nemo/collections/multimodal/speech_cv/models/visual_rnnt_bpe_models.py new file mode 100644 index 000000000000..5c5263b1ce76 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_rnnt_bpe_models.py @@ -0,0 +1,321 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +from typing import Dict, List, Optional, Union + +import torch +from omegaconf import DictConfig, ListConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer + +from nemo.collections.asr.losses.rnnt import RNNTLoss +from nemo.collections.asr.metrics.rnnt_wer_bpe import RNNTBPEWER, RNNTBPEDecoding, RNNTBPEDecodingConfig +from nemo.collections.asr.parts.mixins import ASRBPEMixin +from nemo.collections.multimodal.speech_cv.data import video_to_text_dataset +from nemo.collections.multimodal.speech_cv.models.visual_rnnt_models import VisualEncDecRNNTModel +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging, model_utils + + +class VisualEncDecRNNTBPEModel(VisualEncDecRNNTModel, ASRBPEMixin): + """Base class for encoder decoder RNNT-based models with subword tokenization.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + # Convert to Hydra 1.0 compatible DictConfig + cfg = model_utils.convert_model_config_to_dict_config(cfg) + cfg = model_utils.maybe_update_config_version(cfg) + + # Tokenizer is necessary for this model + if 'tokenizer' not in cfg: + raise ValueError("`cfg` must have `tokenizer` config to create a tokenizer !") + + if not isinstance(cfg, DictConfig): + cfg = OmegaConf.create(cfg) + + # Setup the tokenizer + self._setup_tokenizer(cfg.tokenizer) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + # Set the new vocabulary + with open_dict(cfg): + cfg.labels = ListConfig(list(vocabulary)) + + with open_dict(cfg.decoder): + cfg.decoder.vocab_size = len(vocabulary) + + with open_dict(cfg.joint): + cfg.joint.num_classes = len(vocabulary) + cfg.joint.vocabulary = ListConfig(list(vocabulary)) + cfg.joint.jointnet.encoder_hidden = cfg.model_defaults.enc_hidden + cfg.joint.jointnet.pred_hidden = cfg.model_defaults.pred_hidden + + super().__init__(cfg=cfg, trainer=trainer) + + # Setup decoding object + self.decoding = RNNTBPEDecoding( + decoding_cfg=self.cfg.decoding, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + # Setup wer object + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=0, + use_cer=self._cfg.get('use_cer', False), + log_prediction=self._cfg.get('log_prediction', True), + dist_sync_on_step=True, + ) + + # Setup fused Joint step if flag is set + if self.joint.fuse_loss_wer: + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + def change_vocabulary( + self, + new_tokenizer_dir: Union[str, DictConfig], + new_tokenizer_type: str, + decoding_cfg: Optional[DictConfig] = None, + ): + """ + Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on from pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + Args: + new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer (if the tokenizer type is `agg`) + new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`. + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + + Returns: None + + """ + if isinstance(new_tokenizer_dir, DictConfig): + if new_tokenizer_type == 'agg': + new_tokenizer_cfg = new_tokenizer_dir + else: + raise ValueError( + f'New tokenizer dir should be a string unless the tokenizer is `agg`, but this tokenizer type is: {new_tokenizer_type}' + ) + else: + new_tokenizer_cfg = None + + if new_tokenizer_cfg is not None: + tokenizer_cfg = new_tokenizer_cfg + else: + if not os.path.isdir(new_tokenizer_dir): + raise NotADirectoryError( + f'New tokenizer dir must be non-empty path to a directory. But I got: {new_tokenizer_dir}' + ) + + if new_tokenizer_type.lower() not in ('bpe', 'wpe'): + raise ValueError(f'New tokenizer type must be either `bpe` or `wpe`') + + tokenizer_cfg = OmegaConf.create({'dir': new_tokenizer_dir, 'type': new_tokenizer_type}) + + # Setup the tokenizer + self._setup_tokenizer(tokenizer_cfg) + + # Initialize a dummy vocabulary + vocabulary = self.tokenizer.tokenizer.get_vocab() + + joint_config = self.joint.to_config_dict() + new_joint_config = copy.deepcopy(joint_config) + if self.tokenizer_type == "agg": + new_joint_config["vocabulary"] = ListConfig(vocabulary) + else: + new_joint_config["vocabulary"] = ListConfig(list(vocabulary.keys())) + + new_joint_config['num_classes'] = len(vocabulary) + del self.joint + self.joint = VisualEncDecRNNTBPEModel.from_config_dict(new_joint_config) + + decoder_config = self.decoder.to_config_dict() + new_decoder_config = copy.deepcopy(decoder_config) + new_decoder_config.vocab_size = len(vocabulary) + del self.decoder + self.decoder = VisualEncDecRNNTBPEModel.from_config_dict(new_decoder_config) + + del self.loss + self.loss = RNNTLoss(num_classes=self.joint.num_classes_with_blank - 1) + + if decoding_cfg is None: + # Assume same decoding config as before + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTBPEDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.joint): + self.cfg.joint = new_joint_config + + with open_dict(self.cfg.decoder): + self.cfg.decoder = new_decoder_config + + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoder to output to {self.joint.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig): + """ + Changes decoding strategy used during RNNT decoding process. + + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + """ + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTBPEDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTBPEDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, tokenizer=self.tokenizer, + ) + + self.wer = RNNTBPEWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.decoding)}") + + def _setup_dataloader_from_config(self, config: Optional[Dict]): + dataset = video_to_text_dataset.get_video_to_text_bpe_dataset_from_config( + config=config, + local_rank=self.local_rank, + global_rank=self.global_rank, + world_size=self.world_size, + tokenizer=self.tokenizer, + preprocessor_cfg=self.cfg.get("preprocessor", None), + ) + + if dataset is None: + return None + + shuffle = config['shuffle'] + if config.get('is_tarred', False): + shuffle = False + + if hasattr(dataset, 'collate_fn'): + collate_fn = dataset.collate_fn + else: + collate_fn = dataset.datasets[0].collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config['batch_size'], + collate_fn=collate_fn, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader': + """ + Setup function for a temporary data loader which wraps the provided video file. + + Args: + config: A python dictionary which contains the following keys: + paths2video_files: (a list) of paths to video files. The files should be relatively short fragments. \ + Recommended length per file is between 5 and 25 seconds. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + temp_dir: (str) A temporary directory where the video manifest is temporarily + stored. + + Returns: + A pytorch DataLoader for the given video file(s). + """ + if 'manifest_filepath' in config: + manifest_filepath = config['manifest_filepath'] + batch_size = config['batch_size'] + else: + manifest_filepath = os.path.join(config['temp_dir'], 'manifest.json') + batch_size = min(config['batch_size'], len(config['paths2video_files'])) + + dl_config = { + 'manifest_filepath': manifest_filepath, + 'batch_size': batch_size, + 'shuffle': False, + 'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)), + 'pin_memory': True, + 'channel_selector': config.get('channel_selector', None), + 'use_start_end_token': self.cfg.validation_ds.get('use_start_end_token', False), + } + + if config.get("augmentor"): + dl_config['augmentor'] = config.get("augmentor") + + temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config)) + return temporary_datalayer + + @classmethod + def list_available_models(cls) -> List[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + + return results diff --git a/nemo/collections/multimodal/speech_cv/models/visual_rnnt_models.py b/nemo/collections/multimodal/speech_cv/models/visual_rnnt_models.py new file mode 100644 index 000000000000..0bd3e2fd1563 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/models/visual_rnnt_models.py @@ -0,0 +1,920 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json +import os +import tempfile +from math import ceil, isclose +from typing import Dict, List, Optional, Tuple, Union + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from tqdm.auto import tqdm + +from nemo.collections.asr.data import audio_to_text_dataset +from nemo.collections.asr.losses.rnnt import RNNTLoss, resolve_rnnt_default_loss_name +from nemo.collections.asr.metrics.rnnt_wer import RNNTWER, RNNTDecoding, RNNTDecodingConfig +from nemo.collections.asr.models.asr_model import ASRModel +from nemo.collections.asr.modules.rnnt import RNNTDecoderJoint +from nemo.collections.asr.parts.mixins import ASRModuleMixin +from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType +from nemo.collections.multimodal.speech_cv.data import video_to_text_dataset +from nemo.core.classes import Exportable +from nemo.core.classes.common import PretrainedModelInfo, typecheck +from nemo.core.classes.mixins import AccessMixin +from nemo.core.neural_types import AcousticEncodedRepresentation, LengthsType, NeuralType, VideoSignal +from nemo.utils import logging + + +class VisualEncDecRNNTModel(ASRModel, ASRModuleMixin, Exportable): + """Base class for encoder decoder RNNT-based models.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer = None): + # Get global rank and total number of GPU workers for IterableDataset partitioning, if applicable + # Global_rank and local_rank is set by LightningModule in Lightning 1.2.0 + self.world_size = 1 + if trainer is not None: + self.world_size = trainer.world_size + + super().__init__(cfg=cfg, trainer=trainer) + + # Preprocessors + self.video_preprocessor = VisualEncDecRNNTModel.from_config_dict(self._cfg.video_preprocessor) + + # Augmentations + self.video_augmentation = VisualEncDecRNNTModel.from_config_dict(self._cfg.video_augment) + + # Front-end Networks + self.video_front_end = VisualEncDecRNNTModel.from_config_dict(self._cfg.video_front_end) + + # Back-end Networks + self.encoder = VisualEncDecRNNTModel.from_config_dict(self._cfg.encoder) + + # Update config values required by components dynamically + with open_dict(self.cfg.decoder): + self.cfg.decoder.vocab_size = len(self.cfg.labels) + + with open_dict(self.cfg.joint): + self.cfg.joint.num_classes = len(self.cfg.labels) + self.cfg.joint.vocabulary = self.cfg.labels + self.cfg.joint.jointnet.encoder_hidden = self.cfg.model_defaults.enc_hidden + self.cfg.joint.jointnet.pred_hidden = self.cfg.model_defaults.pred_hidden + + self.decoder = VisualEncDecRNNTModel.from_config_dict(self.cfg.decoder) + self.joint = VisualEncDecRNNTModel.from_config_dict(self.cfg.joint) + + # Setup RNNT Loss + loss_name, loss_kwargs = self.extract_rnnt_loss_cfg(self.cfg.get("loss", None)) + + self.loss = RNNTLoss( + num_classes=self.joint.num_classes_with_blank - 1, + loss_name=loss_name, + loss_kwargs=loss_kwargs, + reduction=self.cfg.get("rnnt_reduction", "mean_batch"), + ) + + # Setup decoding objects + self.decoding = RNNTDecoding( + decoding_cfg=self.cfg.decoding, decoder=self.decoder, joint=self.joint, vocabulary=self.joint.vocabulary, + ) + # Setup WER calculation + self.wer = RNNTWER( + decoding=self.decoding, + batch_dim_index=0, + use_cer=self._cfg.get('use_cer', False), + log_prediction=self._cfg.get('log_prediction', True), + dist_sync_on_step=True, + ) + + # Whether to compute loss during evaluation + if 'compute_eval_loss' in self.cfg: + self.compute_eval_loss = self.cfg.compute_eval_loss + else: + self.compute_eval_loss = True + + # Setup fused Joint step if flag is set + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Setup optimization normalization (if provided in config) + self.setup_optim_normalization() + + # Setup optional Optimization flags + self.setup_optimization_flags() + + # Setup encoder adapters (from ASRAdapterModelMixin) + self.setup_adapters() + + def setup_optim_normalization(self): + """ + Helper method to setup normalization of certain parts of the model prior to the optimization step. + + Supported pre-optimization normalizations are as follows: + + .. code-block:: yaml + + # Variation Noise injection + model: + variational_noise: + std: 0.0 + start_step: 0 + + # Joint - Length normalization + model: + normalize_joint_txu: false + + # Encoder Network - gradient normalization + model: + normalize_encoder_norm: false + + # Decoder / Prediction Network - gradient normalization + model: + normalize_decoder_norm: false + + # Joint - gradient normalization + model: + normalize_joint_norm: false + """ + # setting up the variational noise for the decoder + if hasattr(self.cfg, 'variational_noise'): + self._optim_variational_noise_std = self.cfg['variational_noise'].get('std', 0) + self._optim_variational_noise_start = self.cfg['variational_noise'].get('start_step', 0) + else: + self._optim_variational_noise_std = 0 + self._optim_variational_noise_start = 0 + + # Setup normalized gradients for model joint by T x U scaling factor (joint length normalization) + self._optim_normalize_joint_txu = self.cfg.get('normalize_joint_txu', False) + self._optim_normalize_txu = None + + # Setup normalized encoder norm for model + self._optim_normalize_encoder_norm = self.cfg.get('normalize_encoder_norm', False) + + # Setup normalized decoder norm for model + self._optim_normalize_decoder_norm = self.cfg.get('normalize_decoder_norm', False) + + # Setup normalized joint norm for model + self._optim_normalize_joint_norm = self.cfg.get('normalize_joint_norm', False) + + def extract_rnnt_loss_cfg(self, cfg: Optional[DictConfig]): + """ + Helper method to extract the rnnt loss name, and potentially its kwargs + to be passed. + + Args: + cfg: Should contain `loss_name` as a string which is resolved to a RNNT loss name. + If the default should be used, then `default` can be used. + Optionally, one can pass additional kwargs to the loss function. The subdict + should have a keyname as follows : `{loss_name}_kwargs`. + + Note that whichever loss_name is selected, that corresponding kwargs will be + selected. For the "default" case, the "{resolved_default}_kwargs" will be used. + + Examples: + .. code-block:: yaml + + loss_name: "default" + warprnnt_numba_kwargs: + kwargs2: some_other_val + + Returns: + A tuple, the resolved loss name as well as its kwargs (if found). + """ + if cfg is None: + cfg = DictConfig({}) + + loss_name = cfg.get("loss_name", "default") + + if loss_name == "default": + loss_name = resolve_rnnt_default_loss_name() + + loss_kwargs = cfg.get(f"{loss_name}_kwargs", None) + + logging.info(f"Using RNNT Loss : {loss_name}\n" f"Loss {loss_name}_kwargs: {loss_kwargs}") + + return loss_name, loss_kwargs + + @torch.no_grad() + def transcribe( + self, + paths2video_files: List[str], + batch_size: int = 4, + return_hypotheses: bool = False, + partial_hypothesis: Optional[List['Hypothesis']] = None, + num_workers: int = 0, + channel_selector: Optional[ChannelSelectorType] = None, + augmentor: DictConfig = None, + ) -> Tuple[List[str], Optional[List['Hypothesis']]]: + """ + Uses greedy decoding to transcribe video files. Use this method for debugging and prototyping. + + Args: + + paths2video_files: (a list) of paths to video files. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + return_hypotheses: (bool) Either return hypotheses or text + With hypotheses can do some postprocessing like getting timestamp or rescoring + num_workers: (int) number of workers for DataLoader + channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing. + augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied. + Returns: + Returns a tuple of 2 items - + * A list of greedy transcript texts / Hypothesis + * An optional list of beam search transcript texts / Hypothesis / NBestHypothesis. + """ + if paths2video_files is None or len(paths2video_files) == 0: + return {} + # We will store transcriptions here + hypotheses = [] + all_hypotheses = [] + # Model's mode and device + mode = self.training + device = next(self.parameters()).device + + if num_workers is None: + num_workers = min(batch_size, os.cpu_count() - 1) + + try: + + # Switch model to evaluation mode + self.eval() + # Freeze the visual front-end, encoder and decoder modules + self.video_front_end.freeze() + self.encoder.freeze() + self.decoder.freeze() + self.joint.freeze() + logging_level = logging.get_verbosity() + logging.set_verbosity(logging.WARNING) + # Work in tmp directory - will store manifest file there + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, 'manifest.json'), 'w', encoding='utf-8') as fp: + for video_file in paths2video_files: + entry = {'video_filepath': video_file, 'duration': 100000, 'text': ''} + fp.write(json.dumps(entry) + '\n') + + config = { + 'paths2video_files': paths2video_files, + 'batch_size': batch_size, + 'temp_dir': tmpdir, + 'num_workers': num_workers, + 'channel_selector': channel_selector, + } + + if augmentor: + config['augmentor'] = augmentor + + temporary_datalayer = self._setup_transcribe_dataloader(config) + for test_batch in tqdm(temporary_datalayer, desc="Transcribing"): + encoded, encoded_len = self.forward( + input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device) + ) + best_hyp, all_hyp = self.decoding.rnnt_decoder_predictions_tensor( + encoded, + encoded_len, + return_hypotheses=return_hypotheses, + partial_hypotheses=partial_hypothesis, + ) + + hypotheses += best_hyp + if all_hyp is not None: + all_hypotheses += all_hyp + else: + all_hypotheses += best_hyp + + del encoded + del test_batch + finally: + # set mode back to its original value + self.train(mode=mode) + + logging.set_verbosity(logging_level) + if mode is True: + self.video_front_end.unfreeze() + self.encoder.unfreeze() + self.decoder.unfreeze() + self.joint.unfreeze() + return hypotheses, all_hypotheses + + def change_vocabulary(self, new_vocabulary: List[str], decoding_cfg: Optional[DictConfig] = None): + """ + Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning a pre-trained model. + This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would + use it if you want to use pretrained encoder when fine-tuning on data in another language, or when you'd need + model to learn capitalization, punctuation and/or special characters. + + Args: + new_vocabulary: list with new vocabulary. Must contain at least 2 elements. Typically, \ + this is target alphabet. + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + + Returns: None + + """ + if self.joint.vocabulary == new_vocabulary: + logging.warning(f"Old {self.joint.vocabulary} and new {new_vocabulary} match. Not changing anything.") + else: + if new_vocabulary is None or len(new_vocabulary) == 0: + raise ValueError(f'New vocabulary must be non-empty list of chars. But I got: {new_vocabulary}') + + joint_config = self.joint.to_config_dict() + new_joint_config = copy.deepcopy(joint_config) + new_joint_config['vocabulary'] = new_vocabulary + new_joint_config['num_classes'] = len(new_vocabulary) + del self.joint + self.joint = VisualEncDecRNNTModel.from_config_dict(new_joint_config) + + decoder_config = self.decoder.to_config_dict() + new_decoder_config = copy.deepcopy(decoder_config) + new_decoder_config.vocab_size = len(new_vocabulary) + del self.decoder + self.decoder = VisualEncDecRNNTModel.from_config_dict(new_decoder_config) + + del self.loss + loss_name, loss_kwargs = self.extract_rnnt_loss_cfg(self.cfg.get('loss', None)) + self.loss = RNNTLoss( + num_classes=self.joint.num_classes_with_blank - 1, loss_name=loss_name, loss_kwargs=loss_kwargs + ) + + if decoding_cfg is None: + # Assume same decoding config as before + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, vocabulary=self.joint.vocabulary, + ) + + self.wer = RNNTWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.joint): + self.cfg.joint = new_joint_config + + with open_dict(self.cfg.decoder): + self.cfg.decoder = new_decoder_config + + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + ds_keys = ['train_ds', 'validation_ds', 'test_ds'] + for key in ds_keys: + if key in self.cfg: + with open_dict(self.cfg[key]): + self.cfg[key]['labels'] = OmegaConf.create(new_vocabulary) + + logging.info(f"Changed decoder to output to {self.joint.vocabulary} vocabulary.") + + def change_decoding_strategy(self, decoding_cfg: DictConfig): + """ + Changes decoding strategy used during RNNT decoding process. + + Args: + decoding_cfg: A config for the decoder, which is optional. If the decoding type + needs to be changed (from say Greedy to Beam decoding etc), the config can be passed here. + """ + if decoding_cfg is None: + # Assume same decoding config as before + logging.info("No `decoding_cfg` passed when changing decoding strategy, using internal config") + decoding_cfg = self.cfg.decoding + + # Assert the decoding config with all hyper parameters + decoding_cls = OmegaConf.structured(RNNTDecodingConfig) + decoding_cls = OmegaConf.create(OmegaConf.to_container(decoding_cls)) + decoding_cfg = OmegaConf.merge(decoding_cls, decoding_cfg) + + self.decoding = RNNTDecoding( + decoding_cfg=decoding_cfg, decoder=self.decoder, joint=self.joint, vocabulary=self.joint.vocabulary, + ) + + self.wer = RNNTWER( + decoding=self.decoding, + batch_dim_index=self.wer.batch_dim_index, + use_cer=self.wer.use_cer, + log_prediction=self.wer.log_prediction, + dist_sync_on_step=True, + ) + + # Setup fused Joint step + if self.joint.fuse_loss_wer or ( + self.decoding.joint_fused_batch_size is not None and self.decoding.joint_fused_batch_size > 0 + ): + self.joint.set_loss(self.loss) + self.joint.set_wer(self.wer) + + # Update config + with open_dict(self.cfg.decoding): + self.cfg.decoding = decoding_cfg + + logging.info(f"Changed decoding strategy to \n{OmegaConf.to_yaml(self.cfg.decoding)}") + + def _setup_dataloader_from_config(self, config: Optional[Dict]): + # Automatically inject args from model config to dataloader config + audio_to_text_dataset.inject_dataloader_value_from_model_config(self.cfg, config, key='sample_rate') + audio_to_text_dataset.inject_dataloader_value_from_model_config(self.cfg, config, key='labels') + dataset = video_to_text_dataset.get_video_to_text_bpe_dataset_from_config( + config=config, + local_rank=self.local_rank, + global_rank=self.global_rank, + world_size=self.world_size, + preprocessor_cfg=self._cfg.get("preprocessor", None), + ) + + if dataset is None: + return None + + shuffle = config['shuffle'] + if config.get('is_tarred', False): + shuffle = False + + if hasattr(dataset, 'collate_fn'): + collate_fn = dataset.collate_fn + else: + collate_fn = dataset.datasets[0].collate_fn + + return torch.utils.data.DataLoader( + dataset=dataset, + batch_size=config['batch_size'], + collate_fn=collate_fn, + drop_last=config.get('drop_last', False), + shuffle=shuffle, + num_workers=config.get('num_workers', 0), + pin_memory=config.get('pin_memory', False), + ) + + def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the training data loader via a Dict-like object. + + Args: + train_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in train_data_config: + train_data_config['shuffle'] = True + + # preserve config + self._update_dataset_config(dataset_name='train', config=train_data_config) + + self._train_dl = self._setup_dataloader_from_config(config=train_data_config) + + # Need to set this because if using an IterableDataset, the length of the dataloader is the total number + # of samples rather than the number of batches, and this messes up the tqdm progress bar. + # So we set the number of steps manually (to the correct number) to fix this. + if 'is_tarred' in train_data_config and train_data_config['is_tarred']: + # We also need to check if limit_train_batches is already set. + # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches, + # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0). + if self._trainer is not None and isinstance(self._trainer.limit_train_batches, float): + self._trainer.limit_train_batches = int( + self._trainer.limit_train_batches + * ceil((len(self._train_dl.dataset) / self.world_size) / train_data_config['batch_size']) + ) + elif self._trainer is None: + logging.warning( + "Model Trainer was not set before constructing the dataset, incorrect number of " + "training batches will be used. Please set the trainer and rebuild the dataset." + ) + + def setup_validation_data(self, val_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the validation data loader via a Dict-like object. + + Args: + val_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in val_data_config: + val_data_config['shuffle'] = False + + # preserve config + self._update_dataset_config(dataset_name='validation', config=val_data_config) + + self._validation_dl = self._setup_dataloader_from_config(config=val_data_config) + + def setup_test_data(self, test_data_config: Optional[Union[DictConfig, Dict]]): + """ + Sets up the test data loader via a Dict-like object. + + Args: + test_data_config: A config that contains the information regarding construction + of an ASR Training dataset. + + Supported Datasets: + - :class:`~nemo.collections.multimodal.speech_cv.data.video_to_text.VideoToCharDataset` + - :class:`~nemo.collections.asr.data.video_to_text.VideoToBPEDataset` + - :class:`~nemo.collections.asr.data.video_to_text.TarredVideoToBPEDataset` + """ + if 'shuffle' not in test_data_config: + test_data_config['shuffle'] = False + + # preserve config + self._update_dataset_config(dataset_name='test', config=test_data_config) + + self._test_dl = self._setup_dataloader_from_config(config=test_data_config) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + + return { + "input_signal": NeuralType(('B', 'C', 'T', 'H', 'W'), VideoSignal(), optional=True), + "input_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), + "encoded_lengths": NeuralType(tuple('B'), LengthsType()), + } + + @typecheck() + def forward(self, input_signal=None, input_signal_length=None): + """ + Forward pass of the model. Note that for RNNT Models, the forward pass of the model is a 3 step process, + and this method only performs the first step - forward of the acoustic/visual model. + + Please refer to the `training_step` in order to see the full `forward` step for training - which + performs the forward of the acoustic model, the prediction network and then the joint network. + Finally, it computes the loss and possibly compute the detokenized text via the `decoding` step. + + Please refer to the `validation_step` in order to see the full `forward` step for inference - which + performs the forward of the acoustic model, the prediction network and then the joint network. + Finally, it computes the decoded tokens via the `decoding` step and possibly compute the batch metrics. + + Args: + input_signal: Tensor that represents a batch of video signals, + of shape [B, T, H, W, C]. T here represents timesteps, H height, W width and C channels + input_signal_length: Vector of length B, that contains the individual lengths of the video + sequences. + + Returns: + A tuple of 2 elements - + 1) The log probabilities tensor of shape [B, T, D]. + 2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B]. + """ + + # Preprocessing + processed_video_signal, processed_video_signal_length = self.video_preprocessor( + input_signal=input_signal, length=input_signal_length + ) + + # Augmentation + processed_video_signal = self.video_augmentation( + input_signal=processed_video_signal, length=processed_video_signal_length + ) + + # Front-end Networks + processed_video_signal, processed_video_signal_length = self.video_front_end( + input_signal=processed_video_signal, length=processed_video_signal_length + ) + + # Back-end Networks + encoded, encoded_len = self.encoder(audio_signal=processed_video_signal, length=processed_video_signal_length) + + return encoded, encoded_len + + # PTL-specific methods + def training_step(self, batch, batch_nb): + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + signal, signal_len, transcript, transcript_len = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + # During training, loss must be computed, so decoder forward is necessary + decoder, target_length, states = self.decoder(targets=transcript, target_length=transcript_len) + + if hasattr(self, '_trainer') and self._trainer is not None: + log_every_n_steps = self._trainer.log_every_n_steps + sample_id = self._trainer.global_step + else: + log_every_n_steps = 1 + sample_id = batch_nb + + # If experimental fused Joint-Loss-WER is not used + if not self.joint.fuse_loss_wer: + # Compute full joint and loss + joint = self.joint(encoder_outputs=encoded, decoder_outputs=decoder) + loss_value = self.loss( + log_probs=joint, targets=transcript, input_lengths=encoded_len, target_lengths=target_length + ) + + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + + if (sample_id + 1) % log_every_n_steps == 0: + self.wer.update(encoded, encoded_len, transcript, transcript_len) + _, scores, words = self.wer.compute() + self.wer.reset() + tensorboard_logs.update({'training_batch_wer': scores.float() / words}) + + else: + # If experimental fused Joint-Loss-WER is used + if (sample_id + 1) % log_every_n_steps == 0: + compute_wer = True + else: + compute_wer = False + + # Fused joint step + loss_value, wer, _, _ = self.joint( + encoder_outputs=encoded, + decoder_outputs=decoder, + encoder_lengths=encoded_len, + transcripts=transcript, + transcript_lengths=transcript_len, + compute_wer=compute_wer, + ) + + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + + # Reset access registry + if AccessMixin.is_access_enabled(): + AccessMixin.reset_registry(self) + + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), + } + + if compute_wer: + tensorboard_logs.update({'training_batch_wer': wer}) + + # Log items + self.log_dict(tensorboard_logs) + + # Preserve batch acoustic model T and language model U parameters if normalizing + if self._optim_normalize_joint_txu: + self._optim_normalize_txu = [encoded_len.max(), transcript_len.max()] + + return {'loss': loss_value} + + def predict_step(self, batch, batch_idx, dataloader_idx=0): + signal, signal_len, transcript, transcript_len, sample_id = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + best_hyp_text, all_hyp_text = self.decoding.rnnt_decoder_predictions_tensor( + encoder_output=encoded, encoded_lengths=encoded_len, return_hypotheses=False + ) + + sample_id = sample_id.cpu().detach().numpy() + return list(zip(sample_id, best_hyp_text)) + + def validation_step(self, batch, batch_idx, dataloader_idx=0): + signal, signal_len, transcript, transcript_len = batch + + # forward() only performs encoder forward + encoded, encoded_len = self.forward(input_signal=signal, input_signal_length=signal_len) + del signal + + tensorboard_logs = {} + + # If experimental fused Joint-Loss-WER is not used + if not self.joint.fuse_loss_wer: + if self.compute_eval_loss: + decoder, target_length, states = self.decoder(targets=transcript, target_length=transcript_len) + joint = self.joint(encoder_outputs=encoded, decoder_outputs=decoder) + + loss_value = self.loss( + log_probs=joint, targets=transcript, input_lengths=encoded_len, target_lengths=target_length + ) + + tensorboard_logs['val_loss'] = loss_value + + self.wer.update(encoded, encoded_len, transcript, transcript_len) + wer, wer_num, wer_denom = self.wer.compute() + self.wer.reset() + + tensorboard_logs['val_wer_num'] = wer_num + tensorboard_logs['val_wer_denom'] = wer_denom + tensorboard_logs['val_wer'] = wer + + else: + # If experimental fused Joint-Loss-WER is used + compute_wer = True + + if self.compute_eval_loss: + decoded, target_len, states = self.decoder(targets=transcript, target_length=transcript_len) + else: + decoded = None + target_len = transcript_len + + # Fused joint step + loss_value, wer, wer_num, wer_denom = self.joint( + encoder_outputs=encoded, + decoder_outputs=decoded, + encoder_lengths=encoded_len, + transcripts=transcript, + transcript_lengths=target_len, + compute_wer=compute_wer, + ) + + if loss_value is not None: + tensorboard_logs['val_loss'] = loss_value + + tensorboard_logs['val_wer_num'] = wer_num + tensorboard_logs['val_wer_denom'] = wer_denom + tensorboard_logs['val_wer'] = wer + + self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32)) + + return tensorboard_logs + + def test_step(self, batch, batch_idx, dataloader_idx=0): + logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = { + 'test_wer_num': logs['val_wer_num'], + 'test_wer_denom': logs['val_wer_denom'], + # 'test_wer': logs['val_wer'], + } + if 'val_loss' in logs: + test_logs['test_loss'] = logs['val_loss'] + return test_logs + + def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): + if self.compute_eval_loss: + val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() + val_loss_log = {'val_loss': val_loss_mean} + else: + val_loss_log = {} + wer_num = torch.stack([x['val_wer_num'] for x in outputs]).sum() + wer_denom = torch.stack([x['val_wer_denom'] for x in outputs]).sum() + tensorboard_logs = {**val_loss_log, 'val_wer': wer_num.float() / wer_denom} + return {**val_loss_log, 'log': tensorboard_logs} + + def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): + if self.compute_eval_loss: + test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() + test_loss_log = {'test_loss': test_loss_mean} + else: + test_loss_log = {} + wer_num = torch.stack([x['test_wer_num'] for x in outputs]).sum() + wer_denom = torch.stack([x['test_wer_denom'] for x in outputs]).sum() + tensorboard_logs = {**test_loss_log, 'test_wer': wer_num.float() / wer_denom} + return {**test_loss_log, 'log': tensorboard_logs} + + def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader': + """ + Setup function for a temporary data loader which wraps the provided video file. + + Args: + config: A python dictionary which contains the following keys: + paths2video_files: (a list) of paths to video files. The files should be relatively short fragments. \ + Recommended length per file is between 5 and 25 seconds. + batch_size: (int) batch size to use during inference. \ + Bigger will result in better throughput performance but would use more memory. + temp_dir: (str) A temporary directory where the video manifest is temporarily + stored. + + Returns: + A pytorch DataLoader for the given video file(s). + """ + if 'manifest_filepath' in config: + manifest_filepath = config['manifest_filepath'] + batch_size = config['batch_size'] + else: + manifest_filepath = os.path.join(config['temp_dir'], 'manifest.json') + batch_size = min(config['batch_size'], len(config['paths2video_files'])) + + dl_config = { + 'manifest_filepath': manifest_filepath, + 'labels': self.joint.vocabulary, + 'batch_size': batch_size, + 'trim_silence': False, + 'shuffle': False, + 'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)), + 'pin_memory': True, + } + + if config.get("augmentor"): + dl_config['augmentor'] = config.get("augmentor") + + temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config)) + return temporary_datalayer + + def on_after_backward(self): + super().on_after_backward() + if self._optim_variational_noise_std > 0 and self.global_step >= self._optim_variational_noise_start: + for param_name, param in self.decoder.named_parameters(): + if param.grad is not None: + noise = torch.normal( + mean=0.0, + std=self._optim_variational_noise_std, + size=param.size(), + device=param.device, + dtype=param.dtype, + ) + param.grad.data.add_(noise) + + if self._optim_normalize_joint_txu: + T, U = self._optim_normalize_txu + if T is not None and U is not None: + for param_name, param in self.encoder.named_parameters(): + if param.grad is not None: + param.grad.data.div_(U) + + for param_name, param in self.decoder.named_parameters(): + if param.grad is not None: + param.grad.data.div_(T) + + if self._optim_normalize_encoder_norm: + for param_name, param in self.encoder.named_parameters(): + if param.grad is not None: + norm = param.grad.norm() + param.grad.data.div_(norm) + + if self._optim_normalize_decoder_norm: + for param_name, param in self.decoder.named_parameters(): + if param.grad is not None: + norm = param.grad.norm() + param.grad.data.div_(norm) + + if self._optim_normalize_joint_norm: + for param_name, param in self.joint.named_parameters(): + if param.grad is not None: + norm = param.grad.norm() + param.grad.data.div_(norm) + + # EncDecRNNTModel is exported in 2 parts + def list_export_subnets(self): + return ['encoder', 'decoder_joint'] + + # for export + @property + def decoder_joint(self): + return RNNTDecoderJoint(self.decoder, self.joint) + + @classmethod + def list_available_models(cls) -> List[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + + Returns: + List of available pre-trained models. + """ + results = [] + + return results diff --git a/nemo/collections/multimodal/speech_cv/modules/__init__.py b/nemo/collections/multimodal/speech_cv/modules/__init__.py new file mode 100644 index 000000000000..bfc09080b274 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/modules/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.collections.multimodal.speech_cv.modules.linear_projection_video_front_end import ( + LinearProjectionVideoFrontEnd, +) +from nemo.collections.multimodal.speech_cv.modules.resnet_video_front_end import ResNetVideoFrontEnd +from nemo.collections.multimodal.speech_cv.modules.video_augment import VideoAugmentation +from nemo.collections.multimodal.speech_cv.modules.video_preprocessing import VideoPreprocessor diff --git a/nemo/collections/multimodal/speech_cv/modules/linear_projection_video_front_end.py b/nemo/collections/multimodal/speech_cv/modules/linear_projection_video_front_end.py new file mode 100644 index 000000000000..45e797171f2e --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/modules/linear_projection_video_front_end.py @@ -0,0 +1,143 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +import torch +from torch import nn + +from nemo.core.classes.module import NeuralModule +from nemo.core.neural_types import LengthsType, NeuralType, VideoSignal + + +class LinearProjectionVideoFrontEnd(NeuralModule): + + """ + Linear Projection Video Front-End for Lip Reading + + The spatial dimension is flattened and projected to dim_output using a Linear layer. + This is equivalent to having a convolution layer with a kernel size of the size of the image. + Circle crop can be used as pre-processing to crop the image as a circle around lips and ignore corner pixels + + Args: + in_channels: number of inputs video channels, 1 for grayscale and 3 for RGB + in_height: image height + in_width: image width + dim_output: output feature dimension for linear projection + out_channels_first: Whether outputs should have channels_first format (Batch, Dout, Time) or channels_last (Batch, Time, Dout) + circle_crop: crop the image as a circle before the Linear layer, default to False + circle_radius: the circle radius, default to 1 for full circle + + """ + + def __init__( + self, + in_channels, + in_height, + in_width, + dim_output, + out_channels_first=True, + circle_crop=False, + circle_radius=1.0, + ): + super(LinearProjectionVideoFrontEnd, self).__init__() + + self.out_channels_first = out_channels_first + self.in_height = in_height + self.in_width = in_width + self.dim_output = dim_output + self.in_channels = in_channels + self.circle_crop = circle_crop + self.circle_radius = circle_radius + self.circle_indices = self.get_circle_indices() + + if self.dim_output is not None: + if self.circle_crop: + self.linear_proj = nn.Linear(in_channels * len(self.circle_indices), dim_output) + else: + self.linear_proj = nn.Linear(in_channels * in_height * in_width, dim_output) + else: + self.linear_proj = nn.Identity() + + @property + def input_types(self): + """Returns definitions of module input ports.""" + return OrderedDict( + { + "audio_signal": NeuralType(('B', 'D', 'T', 'H', 'W'), VideoSignal()), + "length": NeuralType(tuple('B'), LengthsType()), + } + ) + + @property + def input_types_for_export(self): + """Returns definitions of module input ports.""" + return OrderedDict( + { + "output_signal": NeuralType(('B', 'D', 'T'), NeuralType()), + "length": NeuralType(tuple('B'), LengthsType()), + } + ) + + def get_circle_indices(self): + + """ return image indices inside circle of radius circle_radius """ + + # Create linspace + linspace_height = (torch.linspace(0, 2, steps=self.in_height) - 1).abs() + linspace_width = (torch.linspace(0, 2, steps=self.in_width) - 1).abs() + + # Repeat linspace along height/width + linspace_height = linspace_height.unsqueeze(dim=-1).repeat(1, self.in_width).flatten() + linspace_width = linspace_width.repeat(self.in_height) + + # Compute norm + dist = torch.sqrt(linspace_height.square() + linspace_width.square()) + + # Get circle indices + circle_indices = torch.nonzero(dist <= self.circle_radius).squeeze(dim=-1) + + return circle_indices + + def forward(self, input_signal, length): + + # Permute (B, C, T, H, W) -> (B, T, H, W, C) + input_signal = input_signal.permute(0, 2, 3, 4, 1) + + # Circle Crop + if self.circle_crop: + + # Flatten height, width (B, T, H, W, C) -> (B, T, H*W, C) + input_signal = input_signal.flatten(start_dim=2, end_dim=-2) + + # (B, T, H*W, C) -> (B, T, N circle, C) + input_signal = input_signal[:, :, self.circle_indices] + + # Flatten circle and channels (B, T, N circle, C) -> (B, T, N) + input_signal = input_signal.flatten(start_dim=2, end_dim=-1) + + # Flatten height, width and channels (B, T, H, W, C) -> (B, T, N) + else: + input_signal = input_signal.flatten(start_dim=2, end_dim=-1) + + # Project (B, T, N) -> (B, T, Dout) + input_signal = self.linear_proj(input_signal) + + # Transpose to channels_last format (Batch, Dout, Time) -> (Batch, Time, Dout) + if self.out_channels_first: + output_signal = input_signal.transpose(1, 2) + else: + output_signal = input_signal + + return output_signal, length diff --git a/nemo/collections/multimodal/speech_cv/modules/resnet_video_front_end.py b/nemo/collections/multimodal/speech_cv/modules/resnet_video_front_end.py new file mode 100644 index 000000000000..202c629e2b02 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/modules/resnet_video_front_end.py @@ -0,0 +1,84 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +import torch +from torch import nn + +from nemo.collections.multimodal.speech_cv.parts.submodules.resnet import ResNet +from nemo.core.classes.module import NeuralModule +from nemo.core.neural_types import LengthsType, NeuralType, VideoSignal + + +class ResNetVideoFrontEnd(NeuralModule): + """ + Lip Reading / Visual Speech Recognition (VSR) ResNet Front-End Network + + Paper: + 'Audio-Visual Efficient Conformer for Robust Speech Recognition' by Burchi and Timofte + https://arxiv.org/abs/2301.01456 + + Args: + in_channels: number of inputs video channels, 1 for grayscale and 3 for RGB + model: model size in ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] + dim_output: output feature dimension for linear projection after spacial average pooling + out_channels_first: Whether outputs should have channels_first format (Batch, Dout, Time) or channels_last (Batch, Time, Dout) + """ + + def __init__(self, in_channels=1, model="ResNet18", dim_output=256, out_channels_first=True): + super(ResNetVideoFrontEnd, self).__init__() + + self.front_end = nn.Sequential( + nn.Conv3d( + in_channels=in_channels, out_channels=64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3) + ), + nn.BatchNorm3d(num_features=64), + nn.ReLU(), + nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)), + ResNet(include_stem=False, dim_output=dim_output, model=model), + ) + + self.out_channels_first = out_channels_first + + @property + def input_types(self): + """Returns definitions of module input ports.""" + return OrderedDict( + { + "audio_signal": NeuralType(('B', 'D', 'T', 'H', 'W'), VideoSignal()), + "length": NeuralType(tuple('B'), LengthsType()), + } + ) + + @property + def input_types_for_export(self): + """Returns definitions of module input ports.""" + return OrderedDict( + { + "output_signal": NeuralType(('B', 'D', 'T'), NeuralType()), + "length": NeuralType(tuple('B'), LengthsType()), + } + ) + + def forward(self, input_signal, length): + + # Front-End Network (Batch, Din, Time, Height, Width) -> (Batch, Dout, Time) + input_signal = self.front_end(input_signal) + + # Transpose to channels_last format (Batch, Dout, Time) -> (Batch, Time, Dout) + if not self.out_channels_first: + input_signal = input_signal.transpose(1, 2) + + return input_signal, length diff --git a/nemo/collections/multimodal/speech_cv/modules/video_augment.py b/nemo/collections/multimodal/speech_cv/modules/video_augment.py new file mode 100644 index 000000000000..e191cc89d1a0 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/modules/video_augment.py @@ -0,0 +1,225 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from collections import OrderedDict + +import torch +from torch import nn + +from nemo.core.classes.module import NeuralModule +from nemo.core.neural_types import NeuralType, VideoSignal + + +try: + import torchvision + + TORCHVISION_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + TORCHVISION_AVAILABLE = False + + +class VideoAugmentation(NeuralModule): + + """ Video Augmentation for batched video input: input_signal shape (B, C, T, H, W) """ + + def __init__( + self, + random_crop, + crop_size, + horizontal_flip, + time_masking, + num_mask_second=1.0, + spatial_masking=False, + mean_frame=True, + ): + super().__init__() + + # Params + self.random_crop = random_crop + self.crop_size = crop_size + self.horizontal_flip = horizontal_flip + self.time_masking = time_masking + self.spatial_masking = spatial_masking + + self.training_augments = nn.ModuleList() + self.inference_augments = nn.ModuleList() + + # Random Crop + if self.random_crop: + if TORCHVISION_AVAILABLE: + self.training_augments.append(torchvision.transforms.RandomCrop(self.crop_size)) + self.inference_augments.append(torchvision.transforms.CenterCrop(self.crop_size)) + else: + raise Exception("RandomCrop transform requires torchvision") + + # Horizontal Flip + if self.horizontal_flip: + if TORCHVISION_AVAILABLE: + self.training_augments.append(torchvision.transforms.RandomHorizontalFlip()) + else: + raise Exception("RandomHorizontalFlip transform requires torchvision") + + # Time Masking + if self.time_masking: + self.training_augments.append(VideoFrameMasking(num_mask_second=num_mask_second, mean_frame=mean_frame)) + + # Spatial Masking + if self.spatial_masking: + self.training_augments.append(SpatialVideoMasking(mean_frame=mean_frame)) + + @property + def input_types(self): + """Returns definitions of module input ports.""" + return OrderedDict({"input_signal": NeuralType(('B', 'D', 'T', 'H', 'W'), VideoSignal()),}) + + @property + def input_types_for_export(self): + """Returns definitions of module input ports.""" + return OrderedDict({"output_signal": NeuralType(('B', 'D', 'T', 'H', 'W'), VideoSignal()),}) + + @torch.no_grad() + def forward(self, input_signal, length): + + if self.training: + augments = self.training_augments + else: + augments = self.inference_augments + + output_signal = input_signal + + for augment in augments: + if isinstance(augment, VideoFrameMasking) or isinstance(augment, SpatialVideoMasking): + output_signal = augment(output_signal, length) + else: + output_signal = augment(output_signal) + + return output_signal + + +class SpatialVideoMasking(NeuralModule): + + """ Spatial Video Mask + + Will mask videos frames in the spatial dimensions using horizontal and vertical masks + + params: + num_horizontal_masks: number of horizontal masks + num_vertical_masks: number of vertical masks + max_h: maximum width of horizontal mask + max_v: maximum width of vertical mask + mean_frame: mask using video mean instead of zeros + + """ + + def __init__(self, num_horizontal_masks=1, num_vertical_masks=1, max_h=30, max_v=30, mean_frame=True): + super().__init__() + + self.num_horizontal_masks = num_horizontal_masks + self.num_vertical_masks = num_vertical_masks + self.max_h = max_h + self.max_v = max_v + self.mean_frame = mean_frame + self.random = random.Random() + + def forward(self, input_signal, length): + + # (B, C, T, H, W) + shape = input_signal.shape + + # Batch loop + for b in range(shape[0]): + + # Mask Value + mask_value = input_signal[b, :, : length[b]].mean() if self.mean_frame else 0.0 + + # Horizontal Mask loop + for i in range(self.num_horizontal_masks): + + # Start index + x = self.random.randint(0, shape[3] - self.max_h) + + # Mask width + w = self.random.randint(0, self.max_h) + + # Apply mask + input_signal[b, :, :, x : x + w] = mask_value + + # Vertical Mask loop + for i in range(self.num_vertical_masks): + + # Start index + x = self.random.randint(0, shape[4] - self.max_v) + + # Mask width + w = self.random.randint(0, self.max_v) + + # Apply mask + input_signal[b, :, :, :, x : x + w] = mask_value + + return input_signal + + +class VideoFrameMasking(NeuralModule): + + """ Video Frame Mask: + + As explained in: + "Visual Speech Recognition for Multiple Languages in the Wild" + https://arxiv.org/abs/2202.13084 + + S6 Time Masking + We mask n consecutive frames with the mean frame of the video. + The duration tn is chosen from 0 to an upper bound nmax using a uniform distribution. + Since there is a large variance in the video lengths of the LRS2 and LRS3 datasets, we set the number of masks proportional to the sequence length. + Specifically, we use one mask per second, and for each mask, the maximum duration nmax is set to 0.4 seconds. + + """ + + def __init__(self, T_second=0.4, num_mask_second=1.0, fps=25.0, mean_frame=True): + super().__init__() + + self.T = int(T_second * fps) + self.num_mask_second = num_mask_second + self.mean_frame = mean_frame + self.fps = fps + self.random = random.Random() + + def forward(self, input_signal, length): + + # (B, C, T, H, W) + shape = input_signal.shape + + # Batch loop + for b in range(shape[0]): + + # Mask per second + mT = int(length[b] / self.fps * self.num_mask_second) + + # Mask Value + mask_value = input_signal[b, :, : length[b]].mean() if self.mean_frame else 0.0 + + # Mask loop + for i in range(mT): + + # Start left Frame + x_left = self.random.randint(0, length[b] - self.T) + + # Mask width + w = self.random.randint(0, self.T) + + # Apply mask + input_signal[b, :, x_left : x_left + w] = mask_value + + return input_signal diff --git a/nemo/collections/multimodal/speech_cv/modules/video_preprocessing.py b/nemo/collections/multimodal/speech_cv/modules/video_preprocessing.py new file mode 100644 index 000000000000..30accea097db --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/modules/video_preprocessing.py @@ -0,0 +1,138 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn + +from nemo.collections.multimodal.speech_cv.parts.submodules.permute import Permute +from nemo.core.classes import NeuralModule, typecheck + +try: + import torchvision + + TORCHVISION_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + TORCHVISION_AVAILABLE = False + + +class VideoPreprocessor(NeuralModule): + + """ Video Pre-processing + + args: + grayscale: convert images to grayscale + normalize: normalize videos + resize: resize videos + resize_size: output image size for resize + norm_mean: normalize mean + norm_std: normalize std + + """ + + def __init__(self, grayscale, normalize, resize, resize_size, norm_mean, norm_std): + super().__init__() + + # Params + self.grayscale = grayscale + self.normalize = normalize + self.resize = resize + self.resize_size = resize_size + self.norm_mean = norm_mean + self.norm_std = norm_std + + self.transforms = nn.ModuleList() + + # Convert float32 [0:255] -> [0:1] + if TORCHVISION_AVAILABLE: + self.transforms.append(torchvision.transforms.ConvertImageDtype(dtype=torch.float32)) + else: + raise Exception("ConvertImageDtype transform requires torchvision") + + # Convert Channels First + self.transforms.append(Permute(dims=(0, 4, 1, 2, 3))) # (B, T, H, W, C) -> (B, C, T, H, W) + + # Resize + if self.resize: + self.transforms.append(ResizeVideo(self.resize_size)) # (B, C, T, H, W) -> (B, C, T, H', W') + + # Grayscale + if self.grayscale: + if TORCHVISION_AVAILABLE: + self.transforms.append( + nn.Sequential( + Permute(dims=(0, 2, 1, 3, 4)), # (B, C, T, H, W) -> (B, T, C, H, W) + torchvision.transforms.Grayscale(), + Permute(dims=(0, 2, 1, 3, 4)), # (B, T, C, H, W) -> (B, C, T, H, W) + ) + ) + else: + raise Exception("Grayscale transform requires torchvision") + + # Normalize + if self.normalize: + self.transforms.append(NormalizeVideo(mean=norm_mean, std=norm_std)) + + @typecheck() + @torch.no_grad() + def forward(self, input_signal, length): + + for transform in self.transforms: + input_signal = transform(input_signal) + + return input_signal, length + + +class NormalizeVideo(NeuralModule): + def __init__(self, mean, std): + super().__init__() + + self.register_buffer( + "mean", torch.tensor(mean, dtype=torch.float32).reshape(len(mean), 1, 1, 1), persistent=False + ) + self.register_buffer( + "std", torch.tensor(std, dtype=torch.float32).reshape(len(std), 1, 1, 1), persistent=False + ) + + def forward(self, x): + + x = (x - self.mean) / self.std + + return x + + +class ResizeVideo(NeuralModule): + def __init__(self, size): + super().__init__() + + self.size = size + if TORCHVISION_AVAILABLE: + self.resize = torchvision.transforms.Resize(size=self.size) + else: + raise Exception("Resize transform requires torchvision") + + def forward(self, x): + + # (B, C, T, H, W) + if x.dim() == 5: + + B, C = x.shape[:2] + x = x.flatten(start_dim=0, end_dim=1) + x = self.resize(x) + x = x.reshape((B, C) + x.shape[1:]) + + # (C, T, H, W) + elif x.dim() == 4: + x = self.resize(x) + + return x diff --git a/nemo/collections/multimodal/speech_cv/parts/__init__.py b/nemo/collections/multimodal/speech_cv/parts/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/speech_cv/parts/preprocessing/features.py b/nemo/collections/multimodal/speech_cv/parts/preprocessing/features.py new file mode 100644 index 000000000000..29b5268d6adf --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/preprocessing/features.py @@ -0,0 +1,62 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile + +try: + import torchvision + + TORCHVISION_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + TORCHVISION_AVAILABLE = False + + +class VideoFeaturizer(object): + def __init__(self): + pass + + def process(self, video_file, offset, duration): + + # Load Video + video = self.from_file(video_file, offset=offset, duration=duration) + + return video + + def from_file(self, video_file, offset, duration): + + if not TORCHVISION_AVAILABLE: + raise Exception("Reading Video requires torchvision") + + # Load from filename + if isinstance(video_file, str): + video, audio, infos = torchvision.io.read_video( + video_file, start_pts=offset, end_pts=offset + duration, pts_unit="sec" + ) + + # Load from bytes + elif isinstance(video_file, bytes): + + # webdataset.torch_video + with tempfile.TemporaryDirectory() as dirname: + fname = os.path.join(dirname, f"file.mp4") + with open(fname, "wb") as stream: + stream.write(video_file) + video, audio, infos = torchvision.io.read_video( + fname, start_pts=offset, end_pts=offset + duration, pts_unit="sec" + ) + else: + raise Exception("Unknown video data format") + + return video diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/__init__.py b/nemo/collections/multimodal/speech_cv/parts/submodules/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/conv2d.py b/nemo/collections/multimodal/speech_cv/parts/submodules/conv2d.py new file mode 100644 index 000000000000..25f6e5451b66 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/conv2d.py @@ -0,0 +1,72 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + +from torch import nn +from torch.nn import init +from torch.nn.common_types import _size_2_t + + +class Conv2d(nn.Conv2d): + + """ + Conv2d layer with ResNet initialization: + + Reference: "Deep Residual Learning for Image Recognition" by He et al. + https://arxiv.org/abs/1512.03385 + + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + padding: Union[str, _size_2_t] = 0, + dilation: _size_2_t = 1, + groups: int = 1, + bias: bool = True, + padding_mode: str = 'zeros', + device=None, + dtype=None, + weight_init: str = "default", + bias_init: str = "default", + ): + + super(Conv2d, self).__init__( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + padding_mode=padding_mode, + device=device, + dtype=dtype, + ) + + # Weight Init + assert weight_init in ["default", "he_normal"] + if weight_init == "he_normal": + init.kaiming_normal_(self.weight) + + # Bias Init + assert bias_init in ["default", "zeros"] + if self.bias is not None: + if bias_init == "zeros": + init.zeros_(self.bias) diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/global_avg_pool2d.py b/nemo/collections/multimodal/speech_cv/parts/submodules/global_avg_pool2d.py new file mode 100644 index 000000000000..6c248d86564e --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/global_avg_pool2d.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from torch import nn + + +class GlobalAvgPool2d(nn.Module): + def __init__(self, dim=(2, 3), keepdim=False): + super(GlobalAvgPool2d, self).__init__() + self.dim = dim + self.keepdim = keepdim + + def forward(self, x): + + assert x.dim() == 4, "input signal should have 4 dims, has {}".format(x.dim()) + + return x.mean(dim=self.dim, keepdim=self.keepdim) diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/permute.py b/nemo/collections/multimodal/speech_cv/parts/submodules/permute.py new file mode 100644 index 000000000000..abd4ce34f4a8 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/permute.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from torch import nn + + +class Permute(nn.Module): + def __init__(self, dims, make_contiguous=False): + super(Permute, self).__init__() + self.dims = dims + self.make_contiguous = make_contiguous + + def forward(self, x): + x = x.permute(self.dims) + if self.make_contiguous: + x = x.contiguous() + return x diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/resnet.py b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet.py new file mode 100644 index 000000000000..c911db6f3abe --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet.py @@ -0,0 +1,175 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from torch import nn + +from nemo.collections.multimodal.speech_cv.parts.submodules.conv2d import Conv2d +from nemo.collections.multimodal.speech_cv.parts.submodules.global_avg_pool2d import GlobalAvgPool2d +from nemo.collections.multimodal.speech_cv.parts.submodules.resnet_block import ResNetBlock +from nemo.collections.multimodal.speech_cv.parts.submodules.resnet_bottleneck_block import ResNetBottleneckBlock + + +class ResNet(nn.Module): + + """ ResNet (ResNet18, ResNet34, ResNet50, ResNet101, ResNet152) + Models: 224 x 224 + ResNet18: 11,689,512 Params + ResNet34: 21,797,672 Params + ResNet50: 25,557,032 Params + ResNet101: 44,549,160 Params + Resnet152: 60,192,808 Params + Reference: "Deep Residual Learning for Image Recognition" by He et al. + https://arxiv.org/abs/1512.03385 + """ + + def __init__(self, dim_input=3, dim_output=1000, model="ResNet50", include_stem=True, include_head=True): + super(ResNet, self).__init__() + + assert model in ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] + + if model == "ResNet18": + dim_stem = 64 + dim_blocks = [64, 128, 256, 512] + num_blocks = [2, 2, 2, 2] + bottleneck = False + elif model == "ResNet34": + dim_stem = 64 + dim_blocks = [64, 128, 256, 512] + num_blocks = [3, 4, 6, 3] + bottleneck = False + elif model == "ResNet50": + dim_stem = 64 + dim_blocks = [256, 512, 1024, 2048] + num_blocks = [3, 4, 6, 3] + bottleneck = True + elif model == "ResNet101": + dim_stem = 64 + dim_blocks = [256, 512, 1024, 2048] + num_blocks = [3, 4, 23, 3] + bottleneck = True + elif model == "ResNet152": + dim_stem = 64 + dim_blocks = [256, 512, 1024, 2048] + num_blocks = [3, 8, 36, 3] + bottleneck = True + + self.stem = ( + nn.Sequential( + Conv2d( + in_channels=dim_input, + out_channels=dim_stem, + kernel_size=(7, 7), + stride=(2, 2), + weight_init="he_normal", + bias=False, + ), + nn.BatchNorm2d(num_features=dim_stem), + nn.ReLU(), + nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), + ) + if include_stem + else nn.Identity() + ) + + # Blocks + self.blocks = nn.ModuleList() + for stage_id in range(4): + + for block_id in range(num_blocks[stage_id]): + + # Projection Block + if block_id == 0: + if stage_id == 0: + stride = (1, 1) + bottleneck_ratio = 1 + in_features = dim_stem + else: + stride = (2, 2) + bottleneck_ratio = 2 + in_features = dim_blocks[stage_id - 1] + # Default Block + else: + stride = (1, 1) + in_features = dim_blocks[stage_id] + bottleneck_ratio = 4 + + if bottleneck: + self.blocks.append( + ResNetBottleneckBlock( + in_features=in_features, + out_features=dim_blocks[stage_id], + bottleneck_ratio=bottleneck_ratio, + kernel_size=(3, 3), + stride=stride, + ) + ) + else: + self.blocks.append( + ResNetBlock( + in_features=in_features, + out_features=dim_blocks[stage_id], + kernel_size=(3, 3), + stride=stride, + ) + ) + + # Head + self.head = ( + nn.Sequential( + GlobalAvgPool2d(), + nn.Linear(in_features=dim_blocks[-1], out_features=dim_output) + if dim_output is not None + else nn.Identity(), + ) + if include_head + else nn.Identity() + ) + + def forward(self, x): + + # Is Video + if x.dim() == 5: + + is_video = True + batch_size = x.shape[0] + video_frames = x.shape[2] + + # (B, Din, T, H, W) -> (B * T, Din, H, W) + x = x.transpose(1, 2).flatten(start_dim=0, end_dim=1) + + else: + is_video = False + + # (B, Din, H, W) -> (B, D0, H//4, W//4) + x = self.stem(x) + + # (B, D0, H//4, W//4) -> (B, D4, H//32, W//32) + for block in self.blocks: + x = block(x) + + # (B, D4, H//32, W//32) -> (B, Dout) + x = self.head(x) + + # Is Video + if is_video: + + # (B * T, Dout) -> (B, Dout, T) + if x.dim() == 2: + x = x.reshape(batch_size, video_frames, -1).transpose(1, 2) + + # (B * T, D4, H//32, W//32) -> (B, D4, T, H//32, W//32) + else: + x = x.reshape(batch_size, video_frames, x.shape[1], x.shape[2], x.shape[3]).transpose(1, 2) + + return x diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_block.py b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_block.py new file mode 100644 index 000000000000..19436311ccd7 --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_block.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.nn.modules.utils import _pair + +from nemo.collections.multimodal.speech_cv.parts.submodules.conv2d import Conv2d + + +class ResNetBlock(nn.Module): + + """ ResNet Residual Block used by ResNet18 and ResNet34 networks. + References: "Deep Residual Learning for Image Recognition", He et al. + https://arxiv.org/abs/1512.03385 + """ + + def __init__(self, in_features, out_features, kernel_size, stride, weight_init="he_normal", bias_init="zeros"): + super(ResNetBlock, self).__init__() + + # Convert to pair + kernel_size = _pair(kernel_size) + + # layers + self.layers = nn.Sequential( + Conv2d( + in_channels=in_features, + out_channels=out_features, + kernel_size=kernel_size, + stride=stride, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + padding=((kernel_size[0] - 1) // 2, kernel_size[1] // 2), + ), + nn.BatchNorm2d(out_features), + nn.ReLU(), + Conv2d( + in_channels=out_features, + out_channels=out_features, + kernel_size=kernel_size, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + padding=((kernel_size[0] - 1) // 2, kernel_size[1] // 2), + ), + nn.BatchNorm2d(out_features), + ) + + # Residual Block + if torch.prod(torch.tensor(stride)) > 1 or in_features != out_features: + self.residual = nn.Sequential( + Conv2d( + in_channels=in_features, + out_channels=out_features, + kernel_size=1, + stride=stride, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + ), + nn.BatchNorm2d(out_features), + ) + else: + self.residual = nn.Identity() + + # Joined Post Act + self.joined_post_act = nn.ReLU() + + def forward(self, x): + + # Forward Layers + x = self.joined_post_act(self.layers(x) + self.residual(x)) + + return x diff --git a/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_bottleneck_block.py b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_bottleneck_block.py new file mode 100644 index 000000000000..50cafa53343c --- /dev/null +++ b/nemo/collections/multimodal/speech_cv/parts/submodules/resnet_bottleneck_block.py @@ -0,0 +1,107 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.nn.modules.utils import _pair + +from nemo.collections.multimodal.speech_cv.parts.submodules.conv2d import Conv2d + + +class ResNetBottleneckBlock(nn.Module): + + """ ResNet Bottleneck Residual Block used by ResNet50, ResNet101 and ResNet152 networks. + References: "Deep Residual Learning for Image Recognition", He et al. + https://arxiv.org/abs/1512.03385 + """ + + def __init__( + self, + in_features, + out_features, + bottleneck_ratio, + kernel_size, + stride, + weight_init="he_normal", + bias_init="zeros", + ): + super(ResNetBottleneckBlock, self).__init__() + + # Assert + assert in_features % bottleneck_ratio == 0 + + # Convert to pair + kernel_size = _pair(kernel_size) + + # layers + self.layers = nn.Sequential( + Conv2d( + in_channels=in_features, + out_channels=in_features // bottleneck_ratio, + kernel_size=1, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + ), + nn.BatchNorm2d(in_features // bottleneck_ratio), + nn.ReLU(), + Conv2d( + in_channels=in_features // bottleneck_ratio, + out_channels=in_features // bottleneck_ratio, + kernel_size=kernel_size, + stride=stride, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + padding=((kernel_size[0] - 1) // 2, kernel_size[1] // 2), + ), + nn.BatchNorm2d(in_features // bottleneck_ratio), + nn.ReLU(), + Conv2d( + in_channels=in_features // bottleneck_ratio, + out_channels=out_features, + kernel_size=1, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + ), + nn.BatchNorm2d(out_features), + ) + + # Joined Post Act + self.joined_post_act = nn.ReLU() + + # Residual Block + if torch.prod(torch.tensor(stride)) > 1 or in_features != out_features: + self.residual = nn.Sequential( + Conv2d( + in_channels=in_features, + out_channels=out_features, + kernel_size=1, + stride=stride, + bias=False, + weight_init=weight_init, + bias_init=bias_init, + ), + nn.BatchNorm2d(out_features), + ) + else: + self.residual = nn.Identity() + + def forward(self, x): + + # Forward Layers + x = self.joined_post_act(self.layers(x) + self.residual(x)) + + return x diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index f2de48da26d0..98fd4cc6193a 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -25,6 +25,7 @@ 'ChannelType', 'AcousticEncodedRepresentation', 'AudioSignal', + 'VideoSignal', 'SpectrogramType', 'MelSpectrogramType', 'MFCCSpectrogramType', @@ -199,6 +200,21 @@ def type_parameters(self): return self._params +class VideoSignal(ElementType): + """Element type to represent encoded representation returned by the visual encoder model + Args: + fps (int): frames per second. + """ + + def __init__(self, fps: int = None): + self._params = {} + self._params['fps'] = fps + + @property + def type_parameters(self): + return self._params + + class SpectrogramType(ChannelType): """Element type to represent generic spectrogram signal""" From 1dbf71d65747714fa7ee2d9c6359e2ce23f6e5cc Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Wed, 20 Sep 2023 23:52:00 +0200 Subject: [PATCH 242/512] HF StarCoder to NeMo conversion script (#7421) * Script to convert HF StarCoder checkpoint to NeMo Signed-off-by: Jan Lasek * StarCoder conversion test Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jan Lasek * Fix test Signed-off-by: Jan Lasek * Catch up with save_to changes Signed-off-by: Jan Lasek * Don't abbreviate args for clarity Signed-off-by: Jan Lasek * Configurable precision: BF16 vs FP32 Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jan Lasek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Jenkinsfile | 25 +- .../convert_starcoder_hf_to_nemo.py | 220 ++++++++++++++++++ 2 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py diff --git a/Jenkinsfile b/Jenkinsfile index 04bc96ff1596..2abcdbcc5ddb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -116,12 +116,25 @@ pipeline { } } failFast true - steps { - sh 'CUDA_VISIBLE_DEVICES=0 python scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py \ - --in-file=/home/TestData/nlp/megatron_llama/llama-ci-hf \ - --out-file=/home/TestData/nlp/megatron_llama/ci.nemo \ - --precision=16' - sh 'rm -f /home/TestData/nlp/megatron_llama/ci.nemo' + parallel { + stage('Llama') { + steps { + sh 'CUDA_VISIBLE_DEVICES=0 python scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py \ + --in-file=/home/TestData/nlp/megatron_llama/llama-ci-hf \ + --out-file=/home/TestData/nlp/megatron_llama/ci.nemo \ + --precision=16' + sh 'rm -f /home/TestData/nlp/megatron_llama/ci.nemo' + } + } + stage('StarCoder') { + steps { + sh 'python scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py \ + --config examples/nlp/language_modeling/conf/megatron_gpt_config.yaml \ + --input /home/TestData/nlp/megatron_gpt/starcoder-ci-hf \ + --output /home/TestData/nlp/megatron_gpt/starcoder-ci-hf' + sh 'rm -f /home/TestData/nlp/megatron_gpt/starcoder-ci-hf/megatron_starcoder_tp1_pp1.nemo' + } + } } } diff --git a/scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py b/scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py new file mode 100644 index 000000000000..e826dae037d3 --- /dev/null +++ b/scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py @@ -0,0 +1,220 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +A script to convert the BigCode StarCoder checkpoints from HuggingFace to Megatron GPTModel. +This script is hardcoded specifically for the StarCoder pretrained models only, and is not +generalisable to any other models. + +This script will load and convert the model entirely on CPU for OOM safety, but it is +possible to initialize the model on GPU before the save down. You can do this by adding --cuda +parameter to this script call. + +This script requires that you have downloaded the StarCoder checkpoint from HuggingFace. +This can be done using Git with the following command: +```bash +git clone https://huggingface.co/bigcode/starcoder +``` +Note that downloading this particular checkpoint requires authentication with a HuggingFace token. + +The script will generate a Megatron model with TP=1 and PP=1. If you need different TP/PP +values, then after running this script, please use the following script to set whatever +TP/PP configuration you want: + NeMo/examples/nlp/language_modeling/megatron_change_num_partitions.py + +This script also requires a baseline config file from which to override default parameters. +You can specify the location of this file using the -c argument. Please use the config below +to correctly configure creating GPT-2 model in Megatron: + NeMo/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml + + +Here is an example usage command: +```python +python scripts/nlp_language_modeling/convert_starcoder_hf_to_nemo.py \ + --config /path/to/megatron_gpt_config.yaml \ + --input /path/to/starcoder \ + --output /path/to/save +``` +""" + +import argparse +import os +from typing import Dict + +import pytorch_lightning as pl +import torch +import yaml +from omegaconf import OmegaConf +from transformers import AutoConfig, AutoModelForCausalLM + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.utils import logging + + +def convert_state_dict(state_dict: Dict[str, torch.Tensor], amp: bool = False): + def get_new_key(old_key): + if old_key == "transformer.wte.weight": + return "embedding.word_embeddings.weight" + if old_key == "transformer.wpe.weight": + return "embedding.position_embeddings.weight" + elif old_key.startswith("transformer.ln_f"): + return old_key.replace("transformer.ln_f", "decoder.final_layernorm") + elif old_key.startswith("lm_head"): + return old_key.replace("lm_head", "output_layer") + else: + p1 = old_key.replace("transformer.h", "decoder.layers") + p2 = p1.replace("ln_1.", "self_attention.linear_qkv.layer_norm_") + p3 = p2.replace("attn.c_proj", "self_attention.linear_proj") + p4 = p3.replace("attn.c_attn", "self_attention.linear_qkv") + p5 = p4.replace("ln_2.", "mlp.linear_fc1.layer_norm_") + p6 = p5.replace("c_fc", "linear_fc1") + p7 = p6.replace("c_proj", "linear_fc2") + return p7 + + new_dict = {} + prefix = "model.module." if amp else "model." + + for old_key, val in state_dict.items(): + new_key = get_new_key(old_key) + new_key = prefix + new_key + new_dict[new_key] = val + + return new_dict + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True, help="Path to the megatron_gpt_config.yaml file") + parser.add_argument( + "--input", type=str, required=True, help="StarCoder from HuggingFace hub or local dir with downloaded model" + ) + parser.add_argument("--output", type=str, default=".", help="Path to dir where to store output .nemo file") + parser.add_argument( + "--precision", type=str, default="bf16", choices=["bf16", "32"], help="Precision for checkpoint weights saved" + ) + parser.add_argument("--cuda", action="store_true", help="Put Nemo model onto GPU prior to saving") + args = parser.parse_args() + + if not os.path.isdir(args.output): + raise FileNotFoundError(f"Output directory '{args.output}' does not exist") + + hf_config = AutoConfig.from_pretrained(args.input) + + with open(args.config, "r", encoding="utf_8") as f: + orig_cfg = yaml.safe_load(f) + + model_dict = orig_cfg["model"] + + if "data" in model_dict: + del model_dict["data"] + + override_model_dict = { + "micro_batch_size": 1, + "global_batch_size": 1, + "tensor_model_parallel_size": 1, + "pipeline_model_parallel_size": 1, + "megatron_amp_O2": False, + "transformer_engine": True, + "use_cpu_initialization": not args.cuda, + "normalization": "layernorm", + "mcore_gpt": True, + "num_query_groups": 1, # MQA + "hidden_size": hf_config.n_embd, + "encoder_seq_length": hf_config.n_positions, + "max_position_embeddings": hf_config.n_positions, + "num_layers": hf_config.n_layer, + "num_attention_heads": hf_config.n_head, + "ffn_hidden_size": hf_config.n_inner, + "layernorm_epsilon": hf_config.layer_norm_epsilon, + "pre_process": True, + "post_process": True, + "apply_query_key_layer_scaling": True, + "bias": True, + "transformer_block_type": "pre_ln", + "fp32_residual_connection": False, + "hidden_dropout": hf_config.summary_first_dropout, + "attention_dropout": hf_config.attn_pdrop, + "ffn_dropout": 0, + "share_embeddings_and_output_weights": False, + "position_embedding_type": "learned_absolute", + "normalize_attention_scores": True, + "precision": args.precision, + } + tokenizer_dict = { + "library": "huggingface", + "type": args.input, + "use_fast": True, + } + trainer_dict = { + "devices": 1, + "num_nodes": 1, + "accelerator": "gpu" if args.cuda else "cpu", + "precision": args.precision, + "logger": False, + "enable_checkpointing": False, + "max_epochs": -1, + "max_steps": 100000, + "log_every_n_steps": 10, + "val_check_interval": 100, + "limit_val_batches": 50, + "limit_test_batches": 500, + "accumulate_grad_batches": 1, + "gradient_clip_val": 1.0, + "benchmark": False, + "enable_model_summary": False, + "strategy": NLPDDPStrategy(), + } + + model_dict.update(override_model_dict) + model_dict["tokenizer"] = tokenizer_dict + + omega_cfg = OmegaConf.create(model_dict) + + trainer = pl.Trainer(**trainer_dict) + + logging.info("Creating Megatron model...") + model = MegatronGPTModel(omega_cfg, trainer) + logging.info(f"Created model:\n{model}") + + logging.info("Loading HuggingFace model...") + model_hf = AutoModelForCausalLM.from_pretrained(args.input) + logging.info(f"Loaded model:\n{model_hf}") + + state_dict_hf = model_hf.state_dict() + convert_dict = convert_state_dict(state_dict_hf, amp=omega_cfg.megatron_amp_O2) + + logging.info("Loading state dict...") + missing_keys, unexpected_keys = model.load_state_dict(convert_dict, strict=False) + + if missing_keys: + # Keys ending with '_extra_state' are related to Transformer Engine internals + missing_keys_non_extra = [key for key in missing_keys if not key.endswith("_extra_state")] + if missing_keys_non_extra: + logging.critical("Missing keys were detected during the load, something has gone wrong. Aborting.") + raise RuntimeError(f"Missing keys: \n{missing_keys_non_extra}") + + if unexpected_keys: + logging.critical("Unexpected keys were detected which should not happen. Aborting.") + raise RuntimeError(f"Unexpected keys: \n{unexpected_keys}") + + logging.info("Saving model...") + # We make sure that the tokenizer can be instantiated later regardless of args.input + model.cfg.tokenizer.update(type="bigcode/starcoder") + dtype = torch.bfloat16 if args.precision == "bf16" else torch.float32 + model = model.to(dtype=dtype) + model.cfg.update(use_cpu_initialization=False) + model.save_to(os.path.join(args.output, "megatron_starcoder_tp1_pp1.nemo")) + logging.info("Done.") From e16856e8b268c0d825b60271a460c5b588558d7c Mon Sep 17 00:00:00 2001 From: Kelvin Liu Date: Fri, 22 Sep 2023 01:57:02 +0800 Subject: [PATCH 243/512] fix bug when loading dist ckpt in peft (#7452) Signed-off-by: Hongbin Liu Co-authored-by: Hongbin Liu --- nemo/collections/nlp/parts/nlp_overrides.py | 65 ++++++++++++++++++--- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index a4eb161be42c..f65dfa4a04ac 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -867,7 +867,8 @@ def _load_state_dict_from_disk(self, model_weights, map_location=None): peft_state_dict = torch.load(model_weights_path, map_location)['state_dict'] else: peft_state_dict = {} - base_model_state_dict.update(peft_state_dict) # add the PEFT state_dict into the base model's state_dict + if base_model_state_dict: + base_model_state_dict.update(peft_state_dict) # add the PEFT state_dict into the base model's state_dict return base_model_state_dict def restore_from( @@ -892,13 +893,61 @@ def restore_from( return loaded_params conf, instance, state_dict = loaded_params - if ( - self.peft_model_nemo_path is None and self.peft_model_ckpt_dir is None - ): # we have this check only for training PEFT from scratch - peft_state_dict = instance.get_peft_state_dict() - state_dict.update(peft_state_dict) - state_dict = self.modify_state_dict(conf, state_dict) - self.load_instance_with_state_dict(instance, state_dict, strict) + # if we're using dist checkpointing then state_dict will be None + if state_dict is None: + # dist checkpointing needs torch.distributed to load the checkpoint + if parallel_state.is_unitialized(): + + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + with tempfile.TemporaryDirectory() as tmpdir: + # Check if self.model_extracted_dir is set, and is a valid path + if self.model_extracted_dir is not None and os.path.isdir(self.model_extracted_dir): + # Log that NeMo will use the provided `model_extracted_dir` + logging.info( + f"Restoration will occur within pre-extracted directory : " f"`{self.model_extracted_dir}`." + ) + + # Override `tmpdir` above with the pre-extracted `model_extracted_dir` + tmpdir = self.model_extracted_dir + + else: + # Extract the nemo file into the temporary directory + self._unpack_nemo_file( + path2file=restore_path, out_folder=tmpdir, extract_config_only=return_config is True + ) + checkpoint = {} + sharded_state_dict = instance.sharded_state_dict() + peft_state_dict = instance.get_peft_state_dict() + for k in peft_state_dict.keys(): + sharded_state_dict.pop(k) + checkpoint['state_dict'] = sharded_state_dict + # remove model weights extension + tmp_model_weights_ckpt = os.path.join(tmpdir, self.model_weights_ckpt) + tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] + assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' + checkpoint = dist_checkpointing.load( + sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir + ) + checkpoint['state_dict'].update(peft_state_dict) + instance.on_load_checkpoint(checkpoint) + if hasattr(instance, 'setup_transformer_engine_tp_groups'): + instance.setup_transformer_engine_tp_groups() + + else: + if ( + self.peft_model_nemo_path is None and self.peft_model_ckpt_dir is None + ): # we have this check only for training PEFT from scratch + peft_state_dict = instance.get_peft_state_dict() + state_dict.update(peft_state_dict) + state_dict = self.modify_state_dict(conf, state_dict) + self.load_instance_with_state_dict(instance, state_dict, strict) + logging.info(f'Model {instance.__class__.__name__} was successfully restored from {restore_path}.') return instance From 20dc14232e6f3774a6bf6077c6576d65bb95967b Mon Sep 17 00:00:00 2001 From: Tamerlan Tabolov Date: Thu, 21 Sep 2023 20:11:45 +0200 Subject: [PATCH 244/512] Fix adding positional embeddings in-place in transformer module (#7440) Signed-off-by: Tamerlan Tabolov Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> --- nemo/collections/tts/modules/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/tts/modules/transformer.py b/nemo/collections/tts/modules/transformer.py index 3dda8c522dcc..728b583919ff 100644 --- a/nemo/collections/tts/modules/transformer.py +++ b/nemo/collections/tts/modules/transformer.py @@ -246,7 +246,7 @@ def forward(self, input, seq_lens, conditioning=None): def _forward(self, inp, mask, conditioning): pos_seq = torch.arange(inp.size(1), device=inp.device).to(inp.dtype) pos_emb = self.pos_emb(pos_seq) * mask - inp += pos_emb + inp = inp + pos_emb inp = self.cond_input(inp, conditioning) out = self.drop(inp) From 19358afe2ccb64b77c1dd29408c00fe1403a70ce Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Thu, 21 Sep 2023 22:13:25 -0400 Subject: [PATCH 245/512] Fix (#7478) Signed-off-by: Cheng-Ping Hsieh --- .../nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py index 55bce820ca8f..801a58394f06 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -289,6 +289,7 @@ def collate_fn(self, batch): labels = [x[: self.max_seq_length] for x in labels] loss_mask = [x[: self.max_seq_length] for x in loss_mask] contexts = [x[: self.max_seq_length] for x in contexts] + answers = [x[: self.max_seq_length] for x in answers] # increase max length to nearest multiple of 4 or 8 if self.pad_to_max_length: From 30e4ca6f38dc8631572fecc282afdf364600e006 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 23 Sep 2023 23:10:20 -0700 Subject: [PATCH 246/512] add sleep (#7498) (#7499) * add sleep * add sleep onto config instead * add comment --------- Signed-off-by: Gerald Shen Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> --- nemo/utils/exp_manager.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index 70a2e2ebb94e..ae83db4aa44f 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -170,6 +170,8 @@ class ExpManagerConfig: ema: Optional[EMAParams] = EMAParams() # Wall clock time limit max_time_per_run: Optional[str] = None + # time to sleep non 0 ranks during initialization + seconds_to_sleep: float = 5 class TimingCallback(Callback): @@ -308,6 +310,7 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo Set this to True if you are using DDP with many GPUs and do not want many log files in your exp dir. - max_time (str): The maximum wall clock time *per run*. This is intended to be used on clusters where you want a checkpoint to be saved after this specified time and be able to resume from that checkpoint. Defaults to None. + - seconds_to_sleep (float): seconds to sleep non rank 0 processes for. Used to give enough time for rank 0 to initialize returns: log_dir (Path): The final logging directory where logging files are saved. Usually the concatenation of @@ -508,6 +511,11 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo # Add lightning file logging to global_rank zero add_filehandlers_to_pl_logger(log_dir / 'lightning_logs.txt', log_dir / 'nemo_error_log.txt') + elif trainer.num_devices * trainer.num_devices > 1: + # sleep other ranks so rank 0 can finish + # doing the initialization such as moving files + time.sleep(cfg.seconds_to_sleep) + return log_dir From 96ece091995a4761478b67e8d397bbc525c99082 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 Sep 2023 10:04:27 -0700 Subject: [PATCH 247/512] Fix exp manager check for sleep (#7503) (#7504) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- nemo/utils/exp_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index ae83db4aa44f..dbd6a297e49b 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -511,7 +511,7 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo # Add lightning file logging to global_rank zero add_filehandlers_to_pl_logger(log_dir / 'lightning_logs.txt', log_dir / 'nemo_error_log.txt') - elif trainer.num_devices * trainer.num_devices > 1: + elif trainer.num_nodes * trainer.num_devices > 1: # sleep other ranks so rank 0 can finish # doing the initialization such as moving files time.sleep(cfg.seconds_to_sleep) From c1d05d5093d7fc3bfe9a6c225c87a0718baf762f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:21:17 -0700 Subject: [PATCH 248/512] bugfix: trainer.accelerator=auto from None. (#7492) (#7493) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> --- tutorials/tts/Vits_Training.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index d1d82b753091..4bb87a2c4506 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -308,7 +308,7 @@ " phoneme_dict_path=tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt \\\n", " heteronyms_path=tts_dataset_files/heteronyms-052722 \\\n", " trainer.max_epochs=3 \\\n", - " trainer.accelerator=null \\\n", + " trainer.accelerator=auto \\\n", " trainer.check_val_every_n_epoch=1 \\\n", " trainer.devices=1)" ] From 6a8dd4848ac6f8fbd89c9e886435a2bc225e1d28 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Mon, 25 Sep 2023 16:55:05 -0700 Subject: [PATCH 249/512] [doc] fix broken link (#7481) Signed-off-by: Stas Bekman --- docs/source/core/exp_manager.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/core/exp_manager.rst b/docs/source/core/exp_manager.rst index c23d902a26ee..5415ae403f38 100644 --- a/docs/source/core/exp_manager.rst +++ b/docs/source/core/exp_manager.rst @@ -32,7 +32,7 @@ Optionally, launch TensorBoard to view the training results in ``./nemo_experime .. If ``create_checkpoint_callback`` is set to ``True``, then NeMo automatically creates checkpoints during training -using PyTorch Lightning's `ModelCheckpoint `_. +using PyTorch Lightning's `ModelCheckpoint `_. We can configure the ``ModelCheckpoint`` via YAML or CLI. .. code-block:: yaml From 3efd3278641d6030f8266fbc59ff11d45f2d2574 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Mon, 25 Sep 2023 21:27:13 -0700 Subject: [PATCH 250/512] [TTS] Read audio as int32 to avoid flac read errors (#7477) * [TTS] Read audio as int32 to avoid flac read errors Signed-off-by: Ryan * [TTS] Add comment about read failures Signed-off-by: Ryan --------- Signed-off-by: Ryan --- .../asr/parts/preprocessing/segment.py | 17 +++++++++++++---- nemo/collections/tts/data/vocoder_dataset.py | 6 ++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py index d586137d5ff2..f614d3d22186 100644 --- a/nemo/collections/asr/parts/preprocessing/segment.py +++ b/nemo/collections/asr/parts/preprocessing/segment.py @@ -375,7 +375,15 @@ def from_file_list( @classmethod def segment_from_file( - cls, audio_file, target_sr=None, n_segments=0, trim=False, orig_sr=None, channel_selector=None, offset=None + cls, + audio_file, + target_sr=None, + n_segments=0, + trim=False, + orig_sr=None, + channel_selector=None, + offset=None, + dtype='float32', ): """Grabs n_segments number of samples from audio_file. If offset is not provided, n_segments are selected randomly. @@ -390,6 +398,7 @@ def segment_from_file( :param orig_sr: the original sample rate :param channel selector: select a subset of channels. If set to `None`, the original signal will be used. :param offset: fixed offset in seconds + :param dtype: data type to load audio as. :return: numpy array of samples """ is_segmented = False @@ -412,15 +421,15 @@ def segment_from_file( f'Provided audio start ({audio_start}) is larger than the maximum possible ({max_audio_start})' ) f.seek(audio_start) - samples = f.read(n_segments_at_original_sr, dtype='float32') + samples = f.read(n_segments_at_original_sr, dtype=dtype) is_segmented = True elif n_segments_at_original_sr > len(f): logging.warning( f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) of the audio file {audio_file}. This may lead to shape mismatch errors." ) - samples = f.read(dtype='float32') + samples = f.read(dtype=dtype) else: - samples = f.read(dtype='float32') + samples = f.read(dtype=dtype) except RuntimeError as e: logging.error(f"Loading {audio_file} via SoundFile raised RuntimeError: `{e}`.") raise e diff --git a/nemo/collections/tts/data/vocoder_dataset.py b/nemo/collections/tts/data/vocoder_dataset.py index a5a30870dfff..97e0648f8b11 100644 --- a/nemo/collections/tts/data/vocoder_dataset.py +++ b/nemo/collections/tts/data/vocoder_dataset.py @@ -122,11 +122,13 @@ def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]: return sampler def _segment_audio(self, audio_filepath: Path) -> AudioSegment: - # Retry file read multiple times as file seeking can produce random IO errors. + # File seeking sometimes fails when reading flac files with libsndfile < 1.0.30. + # Read audio as int32 to minimize issues, and retry read on a different segment in case of failure. + # https://github.com/bastibe/python-soundfile/issues/274 for _ in range(self.num_audio_retries): try: audio_segment = AudioSegment.segment_from_file( - audio_filepath, target_sr=self.sample_rate, n_segments=self.n_samples, + audio_filepath, target_sr=self.sample_rate, n_segments=self.n_samples, dtype="int32" ) return audio_segment except Exception: From 2ab2ff7643ccd7cacbb0506efad1cfc629f8ec38 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Tue, 26 Sep 2023 14:36:26 +1000 Subject: [PATCH 251/512] Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS (#7409) * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS * Train 'AISHELL-3' dataset with multi-speakers Signed-off-by: Robin Dong * Update get_data.py update copyright header Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * Update get_data.py added a disclaimer Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add new configuration file for AISHELL3 with multispeaker of fastpitch Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- .../fastpitch_align_multispeaker_22050.yaml | 261 ++++++++++++++++++ .../ds_conf/ds_for_fastpitch_align.yaml | 49 ++++ .../tts/aishell3/get_data.py | 156 +++++++++++ 3 files changed, 466 insertions(+) create mode 100644 examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml create mode 100755 scripts/dataset_processing/tts/aishell3/ds_conf/ds_for_fastpitch_align.yaml create mode 100755 scripts/dataset_processing/tts/aishell3/get_data.py diff --git a/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml b/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml new file mode 100644 index 000000000000..2464e546598e --- /dev/null +++ b/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml @@ -0,0 +1,261 @@ +# This config contains the default values for training FastPitch model with aligner using 22KHz sampling +# rate. If you want to train model on other dataset, you can change config values according to your dataset. +# Most dataset-specific arguments are in the head of the config file, see below. + +name: FastPitch + +train_dataset: ??? +validation_datasets: ??? +sup_data_path: ??? +sup_data_types: [ "align_prior_matrix", "pitch", "speaker_id"] + +# Default values from librosa.pyin +pitch_fmin: 65.40639132514966 +pitch_fmax: 1986.977294921875 + +# these frame-wise values depend on pitch_fmin and pitch_fmax, you can get values +# by running `scripts/dataset_processing/tts/extract_sup_data.py` +pitch_mean: ??? # e.g. 221.4948272705078 for SFbilingual dataset. +pitch_std: ??? # e.g. 64.6528930664063 for SFbilingual dataset. + +# Default values for dataset with sample_rate=22050 +sample_rate: 22050 +n_mel_channels: 80 +n_window_size: 1024 +n_window_stride: 256 +n_fft: 1024 +lowfreq: 0 +highfreq: null +window: hann + +# There are four candidates of `phoneme_dict_path` provided for Chinese as shown below, +# 1) 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/pinyin_dict_nv_22.10.txt", +# 2) IPA converted from 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/ipa_dict_nv23.05.txt", +# 3) 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/pinyin_dict_nv23.05.txt", +# 4) (default) IPA converted from 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt" +# Suggest to choose IPA symbol set converted from 36-final Pinyin because better audio quality were observed. +phoneme_dict_path: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt" + +model: + learn_alignment: true + bin_loss_warmup_epochs: 100 + + n_speakers: 1958 + max_token_duration: 75 + symbols_embedding_dim: 384 + pitch_embedding_kernel_size: 3 + speaker_emb_condition_prosody: true + speaker_emb_condition_aligner: true + + pitch_fmin: ${pitch_fmin} + pitch_fmax: ${pitch_fmax} + + pitch_mean: ${pitch_mean} + pitch_std: ${pitch_std} + + sample_rate: ${sample_rate} + n_mel_channels: ${n_mel_channels} + n_window_size: ${n_window_size} + n_window_stride: ${n_window_stride} + n_fft: ${n_fft} + lowfreq: ${lowfreq} + highfreq: ${highfreq} + window: ${window} + + text_normalizer: + _target_: nemo_text_processing.text_normalization.normalize.Normalizer + lang: zh + input_case: cased + + text_normalizer_call_kwargs: + verbose: false + punct_pre_process: true + punct_post_process: true + + text_tokenizer: + _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.ChinesePhonemesTokenizer + punct: true + apostrophe: true + pad_with_space: true + g2p: + _target_: nemo.collections.tts.g2p.models.zh_cn_pinyin.ChineseG2p + phoneme_dict: ${phoneme_dict_path} + word_segmenter: jieba # Only jieba is supported now. + phoneme_prefix: "" + phoneme_case: lower + tone_prefix: "#" + ascii_letter_prefix: "" + ascii_letter_case: upper + + train_ds: + dataset: + _target_: nemo.collections.tts.data.dataset.TTSDataset + manifest_filepath: ${train_dataset} + sample_rate: ${model.sample_rate} + sup_data_path: ${sup_data_path} + sup_data_types: ${sup_data_types} + n_fft: ${model.n_fft} + win_length: ${model.n_window_size} + hop_length: ${model.n_window_stride} + window: ${model.window} + n_mels: ${model.n_mel_channels} + lowfreq: ${model.lowfreq} + highfreq: ${model.highfreq} + max_duration: null # change to null to include longer audios. + min_duration: 0.1 + ignore_file: null + trim: true + trim_top_db: 50 + trim_frame_length: ${model.n_window_size} + trim_hop_length: ${model.n_window_stride} + pitch_fmin: ${model.pitch_fmin} + pitch_fmax: ${model.pitch_fmax} + pitch_norm: true + pitch_mean: ${model.pitch_mean} + pitch_std: ${model.pitch_std} + + dataloader_params: + drop_last: false + shuffle: true + batch_size: 32 + num_workers: 12 + pin_memory: true + + validation_ds: + dataset: + _target_: nemo.collections.tts.data.dataset.TTSDataset + manifest_filepath: ${validation_datasets} + sample_rate: ${model.sample_rate} + sup_data_path: ${sup_data_path} + sup_data_types: ${sup_data_types} + n_fft: ${model.n_fft} + win_length: ${model.n_window_size} + hop_length: ${model.n_window_stride} + window: ${model.window} + n_mels: ${model.n_mel_channels} + lowfreq: ${model.lowfreq} + highfreq: ${model.highfreq} + max_duration: null # change to null to include longer audios. + min_duration: 0.1 + ignore_file: null + trim: true + trim_top_db: 50 + trim_frame_length: ${model.n_window_size} + trim_hop_length: ${model.n_window_stride} + pitch_fmin: ${model.pitch_fmin} + pitch_fmax: ${model.pitch_fmax} + pitch_norm: true + pitch_mean: ${model.pitch_mean} + pitch_std: ${model.pitch_std} + + dataloader_params: + drop_last: false + shuffle: false + batch_size: 32 + num_workers: 2 + pin_memory: true + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + features: ${model.n_mel_channels} + lowfreq: ${model.lowfreq} + highfreq: ${model.highfreq} + n_fft: ${model.n_fft} + n_window_size: ${model.n_window_size} + window_size: false + n_window_stride: ${model.n_window_stride} + window_stride: false + pad_to: 1 + pad_value: 0 + sample_rate: ${model.sample_rate} + window: ${model.window} + normalize: null + preemph: null + dither: 0.0 + frame_splicing: 1 + log: true + log_zero_guard_type: add + log_zero_guard_value: 1e-05 + mag_power: 1.0 + + input_fft: #n_embed and padding_idx are added by the model + _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder + n_layer: 6 + n_head: 1 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + d_embed: ${model.symbols_embedding_dim} + + output_fft: + _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder + n_layer: 6 + n_head: 1 + d_model: ${model.symbols_embedding_dim} + d_head: 64 + d_inner: 1536 + kernel_size: 3 + dropout: 0.1 + dropatt: 0.1 + dropemb: 0.0 + + alignment_module: + _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder + n_text_channels: ${model.symbols_embedding_dim} + + duration_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + pitch_predictor: + _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor + input_size: ${model.symbols_embedding_dim} + kernel_size: 3 + filter_size: 256 + dropout: 0.1 + n_layers: 2 + + optim: + name: adamw + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 1e-6 + + sched: + name: NoamAnnealing + warmup_steps: 1000 + last_epoch: -1 + d_model: 1 # Disable scaling based on model dim + +trainer: + num_nodes: 1 + devices: -1 # number of gpus + accelerator: gpu + strategy: ddp + precision: 16 + max_epochs: 5000 + accumulate_grad_batches: 1 + gradient_clip_val: 1000.0 + enable_checkpointing: false # Provided by exp_manager + logger: false # Provided by exp_manager + log_every_n_steps: 100 + check_val_every_n_epoch: 5 + benchmark: false + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + monitor: val_loss + resume_if_exists: false + resume_ignore_no_checkpoint: false diff --git a/scripts/dataset_processing/tts/aishell3/ds_conf/ds_for_fastpitch_align.yaml b/scripts/dataset_processing/tts/aishell3/ds_conf/ds_for_fastpitch_align.yaml new file mode 100755 index 000000000000..f8298d7c2680 --- /dev/null +++ b/scripts/dataset_processing/tts/aishell3/ds_conf/ds_for_fastpitch_align.yaml @@ -0,0 +1,49 @@ +name: "ds_for_fastpitch_align" + +manifest_filepath: "train_manifest.json" +sup_data_path: "sup_data" +sup_data_types: [ "align_prior_matrix", "pitch", "speaker_id"] +phoneme_dict_path: "scripts/tts_dataset_files/zh/24finals/pinyin_dict_nv_22.10.txt" + +dataset: + _target_: nemo.collections.tts.data.dataset.TTSDataset + manifest_filepath: ${manifest_filepath} + sample_rate: 22050 + sup_data_path: ${sup_data_path} + sup_data_types: ${sup_data_types} + n_fft: 1024 + win_length: 1024 + hop_length: 256 + window: "hann" + n_mels: 80 + lowfreq: 0 + highfreq: null + max_duration: null + min_duration: 0.1 + ignore_file: null + trim: true + trim_top_db: 50 + trim_frame_length: 1024 + trim_hop_length: 256 + pitch_fmin: 65.40639132514966 + pitch_fmax: 2093.004522404789 + + text_normalizer: + _target_: nemo_text_processing.text_normalization.normalize.Normalizer + lang: zh + input_case: cased + + text_normalizer_call_kwargs: + verbose: false + punct_pre_process: true + punct_post_process: true + + text_tokenizer: + _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.ChinesePhonemesTokenizer + punct: true + apostrophe: true + pad_with_space: true + g2p: + _target_: nemo.collections.tts.g2p.models.zh_cn_pinyin.ChineseG2p + phoneme_dict: ${phoneme_dict_path} + word_segmenter: jieba # Only jieba is supported now. diff --git a/scripts/dataset_processing/tts/aishell3/get_data.py b/scripts/dataset_processing/tts/aishell3/get_data.py new file mode 100755 index 000000000000..904ab0314653 --- /dev/null +++ b/scripts/dataset_processing/tts/aishell3/get_data.py @@ -0,0 +1,156 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Disclaimer: +# Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use. + +import argparse +import json +import os +import random +import subprocess +import tarfile +import urllib.request +from pathlib import Path + +import numpy as np +from nemo_text_processing.text_normalization.normalize import Normalizer +from opencc import OpenCC + +URL = "https://www.openslr.org/resources/93/data_aishell3.tgz" + + +def get_args(): + parser = argparse.ArgumentParser( + description='Prepare SF_bilingual dataset and create manifests with predefined split' + ) + + parser.add_argument( + "--data-root", + type=Path, + help="where the dataset will reside", + default="./DataChinese/sf_bilingual_speech_zh_en_vv1/SF_bilingual/", + ) + parser.add_argument( + "--manifests-path", type=Path, help="where the resulting manifests files will reside", default="./" + ) + parser.add_argument("--val-size", default=0.01, type=float, help="eval set split") + parser.add_argument("--test-size", default=0.01, type=float, help="test set split") + parser.add_argument( + "--seed-for-ds-split", + default=100, + type=float, + help="Seed for deterministic split of train/dev/test, NVIDIA's default is 100", + ) + + args = parser.parse_args() + return args + + +def __maybe_download_file(source_url, destination_path): + if not destination_path.exists(): + tmp_file_path = destination_path.with_suffix('.tmp') + urllib.request.urlretrieve(source_url, filename=str(tmp_file_path)) + tmp_file_path.rename(destination_path) + + +def __extract_file(filepath, data_dir): + try: + tar = tarfile.open(filepath) + tar.extractall(data_dir) + tar.close() + except Exception: + print(f"Error while extracting {filepath}. Already extracted?") + + +def __process_transcript(file_path: str): + # Create directory for processed wav files + Path(file_path / "processed").mkdir(parents=True, exist_ok=True) + # Create zh-TW to zh-simplify converter + cc = OpenCC('t2s') + # Create normalizer + text_normalizer = Normalizer( + lang="zh", input_case="cased", overwrite_cache=True, cache_dir=str(file_path / "cache_dir"), + ) + text_normalizer_call_kwargs = {"punct_pre_process": True, "punct_post_process": True} + normalizer_call = lambda x: text_normalizer.normalize(x, **text_normalizer_call_kwargs) + entries = [] + i = 0 + SPEAKER_LEN = 7 + with open(file_path / "train" / "content.txt", encoding="utf-8") as fin: + for line in fin: + content = line.split() + wav_name, text = content[0], "".join(content[1::2]) + "。" + wav_name = wav_name.replace(u'\ufeff', '') + speaker = wav_name[:SPEAKER_LEN] + wav_file = file_path / "train" / "wav" / speaker / wav_name + assert os.path.exists(wav_file), f"{wav_file} not found!" + duration = subprocess.check_output(f"soxi -D {wav_file}", shell=True) + if float(duration) <= 3.0: # filter out wav files shorter than 3 seconds + continue + processed_file = file_path / "processed" / wav_name + # convert wav to mono 22050HZ, 16 bit (as SFSpeech dataset) + subprocess.run(f"sox {wav_file} -r 22050 -c 1 -b 16 {processed_file}", shell=True) + simplified_text = cc.convert(text) + normalized_text = normalizer_call(simplified_text) + entry = { + 'audio_filepath': os.path.abspath(processed_file), + 'duration': float(duration), + 'text': text, + 'normalized_text': normalized_text, + 'speaker': int(speaker[3:]), + } + + i += 1 + entries.append(entry) + return entries + + +def __process_data(dataset_path, val_size, test_size, seed_for_ds_split, manifests_dir): + entries = __process_transcript(dataset_path) + + random.Random(seed_for_ds_split).shuffle(entries) + + train_size = 1.0 - val_size - test_size + train_entries, validate_entries, test_entries = np.split( + entries, [int(len(entries) * train_size), int(len(entries) * (train_size + val_size))] + ) + + assert len(train_entries) > 0, "Not enough data for train, val and test" + + def save(p, data): + with open(p, 'w') as f: + for d in data: + f.write(json.dumps(d) + '\n') + + save(manifests_dir / "train_manifest.json", train_entries) + save(manifests_dir / "val_manifest.json", validate_entries) + save(manifests_dir / "test_manifest.json", test_entries) + + +def main(): + args = get_args() + + tarred_data_path = args.data_root / "data_aishell3.tgz" + + __maybe_download_file(URL, tarred_data_path) + __extract_file(str(tarred_data_path), str(args.data_root)) + + __process_data( + args.data_root, args.val_size, args.test_size, args.seed_for_ds_split, args.manifests_path, + ) + + +if __name__ == "__main__": + main() From a62d4e908009d3bc19e228a61a4accc557e69aa9 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Tue, 26 Sep 2023 07:49:24 -0700 Subject: [PATCH 252/512] dllogger - log on rank 0 only (#7513) Signed-off-by: Stas Bekman --- nemo/utils/loggers/dllogger.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nemo/utils/loggers/dllogger.py b/nemo/utils/loggers/dllogger.py index 0b2fde061ff9..cdeef63b75f7 100644 --- a/nemo/utils/loggers/dllogger.py +++ b/nemo/utils/loggers/dllogger.py @@ -20,6 +20,7 @@ from lightning_utilities.core.apply_func import apply_to_collection from omegaconf import DictConfig, ListConfig, OmegaConf from pytorch_lightning.loggers import Logger +from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.parsing import AttributeDict from nemo.utils import logging @@ -81,6 +82,7 @@ def __init__(self, stdout: bool, verbose: bool, json_file: str): ) dllogger.init(backends=backends) + @rank_zero_only def log_hyperparams(self, params, *args, **kwargs): if isinstance(params, Namespace): params = vars(params) @@ -91,6 +93,7 @@ def log_hyperparams(self, params, *args, **kwargs): params = _sanitize_callable_params(_flatten_dict(_convert_params(params))) dllogger.log(step="PARAMETER", data=params) + @rank_zero_only def log_metrics(self, metrics, step=None): if step is None: step = tuple() From 686609519100abb8c13093c81f8be4a3970677f6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:24:49 -0700 Subject: [PATCH 253/512] Fix TTS FastPitch tutorial (#7494) (#7516) * Fix --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> --- .../tts/FastPitch_Adapter_Finetuning.ipynb | 28 ++++++++++++++----- .../FastPitch_MultiSpeaker_Pretraining.ipynb | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index 0499c12c90ec..5fe61d596f4b 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -80,6 +80,16 @@ "!wandb login #PASTE_WANDB_APIKEY_HERE" ] }, + { + "cell_type": "markdown", + "id": "b73283fc", + "metadata": {}, + "source": [ + "## Set finetuning params\n", + "\n", + "This notebook expects a pretrained model to finetune. If you have a pretrained multispeaker checkpoint, set the path in next block to the path of pretrained checkpoint. You can also pretrain a multispeaker adapter checkpoint using the [FastPitch_MultiSpeaker_Pretraining tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb)." + ] + }, { "cell_type": "code", "execution_count": null, @@ -88,8 +98,9 @@ "outputs": [], "source": [ "# .nemo files for your pre-trained FastPitch and HiFiGAN\n", - "pretrained_fastpitch_checkpoint = \"\"\n", - "finetuned_hifigan_on_multispeaker_checkpoint = \"\"" + "pretrained_fastpitch_checkpoint = \"\"\n", + "finetuned_hifigan_on_multispeaker_checkpoint = \"\"\n", + "use_ipa = False #Set to False while using Arpabet." ] }, { @@ -430,12 +441,15 @@ "metadata": {}, "outputs": [], "source": [ - "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"ipa_cmudict-0.7b_nv23.01.txt\"))\n", + "phone_dict_name = \"ipa_cmudict-0.7b_nv23.01.txt\" if use_ipa else \"cmudict-0.7b_nv22.10\"\n", + "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", phone_dict_name))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=175.48513793945312\n", - "PITCH_STD=42.3786735534668" + "PITCH_STD=42.3786735534668\n", + "\n", + "config_filename = \"fastpitch_align_ipa_adapter.yaml\" if use_ipa else \"fastpitch_align_44100_adapter.yaml\"" ] }, { @@ -468,7 +482,7 @@ "source": [ "# Normally 200 epochs\n", "!cd {code_dir} && python examples/tts/fastpitch_finetune_adapters.py \\\n", - "--config-name=fastpitch_align_ipa_adapter.yaml \\\n", + "--config-name={config_filename} \\\n", "+init_from_nemo_model={pretrained_fastpitch_checkpoint} \\\n", "train_dataset={train_manifest} \\\n", "validation_datasets={valid_manifest} \\\n", @@ -491,7 +505,7 @@ "+exp_manager.wandb_logger_kwargs.name=\"tutorial-FastPitch-finetune-adaptation\" \\\n", "+exp_manager.wandb_logger_kwargs.project=\"NeMo\" \\\n", "+exp_manager.checkpoint_callback_params.save_top_k=-1 \\\n", - "trainer.max_epochs=200 \\\n", + "trainer.max_epochs=20 \\\n", "trainer.check_val_every_n_epoch=10 \\\n", "trainer.log_every_n_steps=1 \\\n", "trainer.devices=1 \\\n", @@ -598,7 +612,7 @@ "model.optim.lr=0.0001 \\\n", "model/train_ds=train_ds_finetune \\\n", "model/validation_ds=val_ds_finetune \\\n", - "+trainer.max_epochs=500 \\\n", + "+trainer.max_epochs=50 \\\n", "trainer.check_val_every_n_epoch=5 \\\n", "trainer.devices=-1 \\\n", "trainer.strategy='ddp' \\\n", diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index cb5cb651d76e..a031723f549b 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -511,7 +511,7 @@ "+trainer.max_epochs=5 \\\n", "trainer.check_val_every_n_epoch=5 \\\n", "trainer.devices=1 \\\n", - "trainer.strategy='ddp' \\\n", + "trainer.strategy='auto' \\\n", "trainer.precision=16 \\\n", "exp_manager.exp_dir={logs_dir} \\\n", "exp_manager.create_wandb_logger=True \\\n", From 89e97b4e8da14bcd84d6e1a33ee902602858ad95 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:25:23 -0700 Subject: [PATCH 254/512] Fix get_dist() tensor dimension (#7506) (#7515) Signed-off-by: Jocelyn Huang Co-authored-by: Jocelyn --- nemo/collections/tts/modules/aligner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/tts/modules/aligner.py b/nemo/collections/tts/modules/aligner.py index 2910602474fd..f044a86a52eb 100644 --- a/nemo/collections/tts/modules/aligner.py +++ b/nemo/collections/tts/modules/aligner.py @@ -98,7 +98,7 @@ def get_dist(self, keys, queries, mask=None): self._apply_mask(dist, mask, float("inf")) - return dist + return dist.squeeze(1) @staticmethod def get_euclidean_dist(queries_enc, keys_enc): From e1419a860633de4d930c1eb9a801ee54e2cefb94 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:25:50 -0700 Subject: [PATCH 255/512] bugfix: specify trainer.strategy=auto when devices=1 (#7509) (#7512) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> --- tutorials/tts/Vits_Training.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index 4bb87a2c4506..5108567b5c6c 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -309,6 +309,7 @@ " heteronyms_path=tts_dataset_files/heteronyms-052722 \\\n", " trainer.max_epochs=3 \\\n", " trainer.accelerator=auto \\\n", + " trainer.strategy=auto \\\n", " trainer.check_val_every_n_epoch=1 \\\n", " trainer.devices=1)" ] From e5b7c26134c76d430b82a79e377e9c7bd6f54544 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Tue, 26 Sep 2023 13:30:31 -0400 Subject: [PATCH 256/512] fix (#7511) Signed-off-by: Abhinav Khattar --- examples/nlp/language_modeling/tuning/megatron_gpt_sft.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index 6b945c99a750..68f06a0da7da 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -66,6 +66,9 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0) gpt_cfg.ffn_dropout = cfg.model.ffn_dropout gpt_cfg.use_flash_attention = cfg.model.get('use_flash_attention', False) + gpt_cfg.tensor_model_parallel_size = cfg.model.get('tensor_model_parallel_size', 1) + gpt_cfg.pipeline_model_parallel_size = cfg.model.get('pipeline_model_parallel_size', 1) + gpt_cfg.pipeline_model_parallel_split_rank = cfg.model.get('pipeline_model_parallel_split_rank', 0) sft_cls = MegatronGPTSFTModel gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}" From febcab058a85ddf9df63e76d934a4a3452875ae0 Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Tue, 26 Sep 2023 17:14:16 -0700 Subject: [PATCH 257/512] [TTS] Fix FastPitch data prep tutorial (#7524) Signed-off-by: Ryan --- scripts/dataset_processing/tts/preprocess_text.py | 4 ++-- tutorials/tts/FastPitch_Data_Preparation.ipynb | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/dataset_processing/tts/preprocess_text.py b/scripts/dataset_processing/tts/preprocess_text.py index 2edc52969140..580a84a02d6f 100644 --- a/scripts/dataset_processing/tts/preprocess_text.py +++ b/scripts/dataset_processing/tts/preprocess_text.py @@ -22,9 +22,9 @@ --input_manifest="/manifest.json" \ --output_manifest="/manifest_processed.json" \ --normalizer_config_path="/examples/tts/conf/text/normalizer_en.yaml" \ - --lower_case=True \ + --lower_case \ --num_workers=4 \ - --batch_size=16 + --joblib_batch_size=16 """ import argparse diff --git a/tutorials/tts/FastPitch_Data_Preparation.ipynb b/tutorials/tts/FastPitch_Data_Preparation.ipynb index 46778759d5cb..5a42018c70b5 100644 --- a/tutorials/tts/FastPitch_Data_Preparation.ipynb +++ b/tutorials/tts/FastPitch_Data_Preparation.ipynb @@ -332,6 +332,8 @@ "lower_case = True\n", "# Whether to overwrite output manifest, if it exists\n", "overwrite_manifest = True\n", + "# Batch size for joblib parallelization. Increasing this value might speed up the script, depending on your CPU.\n", + "joblib_batch_size = 16\n", "\n", "# Python wrapper to invoke the given bash script with the given input args\n", "def run_script(script, args):\n", @@ -351,8 +353,10 @@ " f\"--output_manifest={output_filepath}\",\n", " f\"--num_workers={num_workers}\",\n", " f\"--normalizer_config_path={normalizer_config_filepath}\",\n", - " f\"--lower_case={lower_case}\"\n", + " f\"--joblib_batch_size={joblib_batch_size}\"\n", " ]\n", + " if lower_case:\n", + " args.append(\"--lower_case\")\n", " if overwrite_manifest:\n", " args.append(\"--overwrite\")\n", "\n", @@ -787,7 +791,7 @@ "\n", "We will train HiFi-GAN first so that we can use it to help evaluate the performance of FastPitch as it is being trained.\n", "\n", - "HiFi-GAN training only requires a manifest with with the `audio_filepath` field. All other fields in the manifest are for FastPitch training.\n", + "HiFi-GAN training only requires a manifest with the `audio_filepath` field. All other fields in the manifest are for FastPitch training.\n", "\n", "Here we show how to train these models from scratch. You can also fine-tune them from pretrained checkpoints as mentioned in our [FastPitch fine-tuning tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_Finetuning.ipynb), but pretrained checkpoints compatible with these experimental recipes are not yet available on NGC.\n" ], @@ -914,7 +918,7 @@ { "cell_type": "code", "source": [ - "hifigan_log_epoch_dir = hifigan_log_dir / \"epoch_10\"\n", + "hifigan_log_epoch_dir = hifigan_log_dir / \"epoch_10\" / dataset_name\n", "!ls $hifigan_log_epoch_dir" ], "metadata": { @@ -966,7 +970,7 @@ "1. Training manifest(s) with `audio_filepath` and `text` or `normalized_text` fields.\n", "2. Precomputed features such as *pitch* and *energy* specified in the feature [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/feature/feature_44100.yaml).\n", "3. (Optional) Statistics file for normalizing features.\n", - "4. (Optional) For a multi-speaker model, the manifest needs a `speaker` field amd JSON file mapping speaker IDs to speaker indices.\n", + "4. (Optional) For a multi-speaker model, the manifest needs a `speaker` field and JSON file mapping speaker IDs to speaker indices.\n", "5. (Optional) To train with IPA phonemes, a [phoneme dictionary](https://github.com/NVIDIA/NeMo/blob/main/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt) and optional [heteronyms file](https://github.com/NVIDIA/NeMo/blob/main/scripts/tts_dataset_files/heteronyms-052722)\n", "6. (Optional) HiFi-GAN checkpoint or [NGC model name](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py#L413) for generating audio predictions during training.\n", "\n" @@ -1093,7 +1097,7 @@ { "cell_type": "code", "source": [ - "faspitch_log_epoch_dir = fastpitch_log_dir / \"epoch_10\"\n", + "faspitch_log_epoch_dir = fastpitch_log_dir / \"epoch_10\" / dataset_name\n", "!ls $faspitch_log_epoch_dir" ], "metadata": { From 147e7ac77ef6a5faf3e9e22b0fdd15c416b137df Mon Sep 17 00:00:00 2001 From: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Date: Wed, 27 Sep 2023 08:12:26 +0200 Subject: [PATCH 258/512] add italian tokenization (#7486) * add italian tokenization Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more ipa lexicon it Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix error deletion Signed-off-by: GiacomoLeoneMaria * add test Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../tokenizers/text_to_speech/ipa_lexicon.py | 50 +++++++++++++++++-- .../text_to_speech/tokenizer_utils.py | 5 ++ .../text_to_speech/tts_tokenizers.py | 32 +++++++++++- .../text_to_speech/test_tts_tokenizers.py | 13 +++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py index 746c783bd1b6..2e1bb359102b 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py +++ b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py @@ -15,7 +15,7 @@ # fmt: off -SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES"] +SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES", "it-IT"] DEFAULT_PUNCTUATION = ( ',', '.', '!', '?', '-', @@ -48,6 +48,12 @@ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ä', 'Ö', 'Ü', 'ẞ', ), + "it-IT": ( + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', 'À', 'È', 'É', 'Ì', + 'Ò', 'Ù' + ), } IPA_CHARACTER_SETS = { @@ -70,7 +76,20 @@ 'w', 'x', 'y', 'z', 'ç', 'ø', 'ŋ', 'œ', 'ɐ', 'ɑ', 'ɒ', 'ɔ', 'ə', 'ɛ', 'ɜ', 'ɡ', 'ɪ', 'ɹ', 'ɾ', 'ʃ', 'ʊ', 'ʌ', 'ʒ', '̃', 'θ' - ) + ), + "it-IT": ( + 'a', 'b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', + 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'z', 'æ', 'ɐ', 'ɑ', 'ɔ', 'ə', 'ɚ', + 'ɜ', 'ɬ', 'ɹ', 'ʌ', 'ʔ', 'ʲ', '̃', '̩', 'ᵻ', + 'ð', 'ŋ', 'ɛ', 'ɡ', 'ɣ', 'ɪ', 'ɲ', 'ɾ', 'ʃ', + 'ʊ', 'ʎ', 'ʒ', 'ʝ', 'β', 'θ', 'd͡', 't͡', 'ø', 'ɒ', + 'ɕ', 'ɓ', 'ç', 'ɖ', 'ɘ', 'ɝ', 'ɞ', 'ɟ','ʄ','ɡ','ɠ', + 'ɢ','ʛ','ɦ','ɧ','ħ','ɥ','ʜ','ɨ','ɬ','ɫ','ɮ','ʟ', + 'ɱ','ɯ','ɰ','ɳ','ɵ','ɸ','œ','ɶ','ʘ','ɺ','ɻ','ʀ','ʁ', + 'ɽ','ʂ','ʈ','ʧ','ʉ','ʋ','ⱱ','ɤ','ʍ','χ','ʏ','ʑ','ʐ', + 'ʔ','ʡ','ʕ','ʢ','ǀ','ǁ','ǂ','ᵻ' + ), } GRAPHEME_CHARACTER_CASES = ["upper", "lower", "mixed"] @@ -124,7 +143,7 @@ def get_ipa_punctuation_list(locale): punct_set = set(DEFAULT_PUNCTUATION) # TODO @xueyang: verify potential mismatches with locale-specific punctuation sets used # in nemo_text_processing.text_normalization.en.taggers.punctuation.py - if locale in ["de-DE", "es-ES"]: + if locale in ["de-DE", "es-ES", "it-IT"]: # ref: https://en.wikipedia.org/wiki/Guillemet#Uses punct_set.update(['«', '»', '‹', '›']) if locale == "de-DE": @@ -140,6 +159,31 @@ def get_ipa_punctuation_list(locale): '—', # em dash, U+2014, decimal 8212 ] ) + if locale == "it-IT": + # ref: https://en.wikipedia.org/wiki/German_orthography#Punctuation + punct_set.update( + [ + '„', # double low-9 quotation mark, U+201E, decimal 8222 + '“', # left double quotation mark, U+201C, decimal 8220 + '‚', # single low-9 quotation mark, U+201A, decimal 8218 + '‘', # left single quotation mark, U+2018, decimal 8216 + '‒', # figure dash, U+2012, decimal 8210 + '–', # en dash, U+2013, decimal 8211 + '—', # em dash, U+2014, decimal 8212 + 'ʴ', + 'ʰ', + 'ʱ', + 'ʲ', + 'ʷ', + 'ˠ', + 'ˤ', + '˞↓', + '↑', + '→', + '↗', + '↘,', + ] + ) elif locale == "es-ES": # ref: https://en.wikipedia.org/wiki/Spanish_orthography#Punctuation punct_set.update(['¿', '¡']) diff --git a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py index 92a3e0fb49e0..ad9ad9f4e898 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py +++ b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py @@ -23,6 +23,7 @@ "english_text_preprocessing", "any_locale_text_preprocessing", "spanish_text_preprocessing", + "italian_text_preprocessing", "any_locale_word_tokenize", "english_word_tokenize", "LATIN_CHARS_ALL", @@ -189,5 +190,9 @@ def spanish_text_preprocessing(text: str) -> str: return text.lower() +def italian_text_preprocessing(text: str) -> str: + return text.lower() + + def chinese_text_preprocessing(text: str) -> str: return text diff --git a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py index 4193cf00eb85..32f725c9c73f 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py +++ b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py @@ -28,6 +28,7 @@ any_locale_text_preprocessing, chinese_text_preprocessing, english_text_preprocessing, + italian_text_preprocessing, spanish_text_preprocessing, ) from nemo.utils import logging @@ -267,6 +268,34 @@ def __init__( ) +class ItalianCharsTokenizer(BaseCharsTokenizer): + PUNCT_LIST = get_ipa_punctuation_list("it-IT") + + def __init__( + self, punct=True, apostrophe=True, add_blank_at=None, pad_with_space=False, non_default_punct_list=None + ): + """Italian grapheme tokenizer. + Args: + punct: Whether to reserve grapheme for basic punctuation or not. + apostrophe: Whether to use apostrophe or not. + add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None), + if None then no blank in labels. + pad_with_space: Whether to pad text with spaces at the beginning and at the end or not. + non_default_punct_list: List of punctuation marks which will be used instead default. + """ + + it_alphabet = "abcdefghijklmnopqrstuvwxyzàèéìòù" + super().__init__( + chars=it_alphabet, + punct=punct, + apostrophe=apostrophe, + add_blank_at=add_blank_at, + pad_with_space=pad_with_space, + non_default_punct_list=non_default_punct_list, + text_preprocessing_func=italian_text_preprocessing, + ) + + class GermanPhonemesTokenizer(BaseCharsTokenizer): # fmt: off PUNCT_LIST = ( # Derived from LJSpeech and "/" additionally @@ -694,8 +723,7 @@ def __init__( pad_with_space=False, text_preprocessing_func=chinese_text_preprocessing, ): - """ - Chinese phoneme-based tokenizer. + """Chinese phoneme-based tokenizer. Note: This tokenizer for now covers Chinese phonemes/tones and English letters because our dataset contains both Chinese and English graphemes. Args: diff --git a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py index e4e16fa31d68..62c571bc16b7 100644 --- a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py +++ b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py @@ -18,6 +18,7 @@ EnglishCharsTokenizer, GermanCharsTokenizer, IPATokenizer, + ItalianCharsTokenizer, SpanishCharsTokenizer, ) from nemo.collections.tts.g2p.models.i18n_ipa import IpaG2p @@ -89,6 +90,18 @@ def test_german_chars_tokenizer(self): assert chars == expected_output assert len(tokens) == len(input_text) + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_italian_chars_tokenizer(self): + input_text = "Ciao mondo!" + expected_output = "ciao mondo!" + + tokenizer = ItalianCharsTokenizer() + chars, tokens = self._parse_text(tokenizer, input_text) + + assert chars == expected_output + assert len(tokens) == len(input_text) + @pytest.mark.run_only_on('CPU') @pytest.mark.unit def test_spanish_chars_tokenizer(self): From 301a266bff394dcc296e01ba04eb0b8b60b26556 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 23:13:39 -0700 Subject: [PATCH 259/512] Replace None strategy with auto in tutorial notebooks (#7521) (#7527) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/asr/ASR_TTS_Tutorial.ipynb | 2 +- tutorials/asr/Self_Supervised_Pre_Training.ipynb | 6 +++--- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../speech_enhancement/Speech_Enhancement_with_NeMo.ipynb | 6 +++--- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/GLUE_Benchmark.ipynb | 4 ++-- tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb | 4 ++-- tutorials/nlp/Punctuation_and_Capitalization.ipynb | 6 +++--- .../nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb | 4 ++-- tutorials/nlp/Relation_Extraction-BioMegatron.ipynb | 4 ++-- tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb | 6 +++--- tutorials/nlp/Token_Classification-BioMegatron.ipynb | 2 +- .../nlp/Token_Classification_Named_Entity_Recognition.ipynb | 4 ++-- tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb | 4 ++-- tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb | 2 +- .../speaker_tasks/Speaker_Identification_Verification.ipynb | 2 +- 18 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 41747c4fa5d9..071ed5f72809 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -1985,4 +1985,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb index 007713ee3cc2..9bbcc8e4aa34 100644 --- a/tutorials/asr/ASR_TTS_Tutorial.ipynb +++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb @@ -553,7 +553,7 @@ "config.trainer.max_epochs = NUM_EPOCHS\n", "\n", "config.trainer.devices = 1\n", - "config.trainer.strategy = None # use 1 device, no need for ddp strategy\n", + "config.trainer.strategy = auto # use 1 device, no need for ddp strategy\n", "\n", "OmegaConf.resolve(config)" ] diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index e13ca31e8195..20b0a8742e72 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -316,7 +316,7 @@ " cfg.trainer.gpus = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", - " cfg.trainer.strategy = None\n", + " cfg.trainer.strategy = auto\n", " cfg.trainer.gpus = 0\n", "\n", "cfg.exp_manager.exp_dir = data_dir + \"/content/exp\"\n", @@ -538,7 +538,7 @@ " cfg.trainer.gpus = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", - " cfg.trainer.strategy = None\n", + " cfg.trainer.strategy = auto\n", " cfg.trainer.gpus = 0\n", "\n", "cfg.model.tokenizer.dir = data_dir + \"/tokenizers/an4/tokenizer_spe_unigram_v128/\" # note this is a directory, not a path to a vocabulary file\n", @@ -725,4 +725,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 7e04e3e6cd68..0dc4e1e5443f 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -441,7 +441,7 @@ "config.trainer.max_epochs = 5\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None" + "config.trainer.strategy = auto" ], "execution_count": null, "outputs": [] diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index ed4893d83a84..ea06200efb0f 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -462,7 +462,7 @@ "config.trainer.max_epochs = 5\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None" + "config.trainer.strategy = auto" ] }, { diff --git a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb index 41a49688d35e..d8a15cbd5e1c 100644 --- a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb +++ b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb @@ -667,7 +667,7 @@ "config.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# Instantiate the trainer\n", "trainer = pl.Trainer(**config.trainer)" @@ -1144,7 +1144,7 @@ "config_dual_output.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config_dual_output.trainer.strategy = None\n", + "config_dual_output.trainer.strategy = auto\n", "\n", "# Instantiate the trainer\n", "trainer = pl.Trainer(**config_dual_output.trainer)\n", @@ -1313,4 +1313,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 54debaaac416..466add77d8dc 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -187,7 +187,7 @@ "cfg.model.validation_ds.data_file = os.path.join(DATA_DIR, \"tiny_example_validation_pairs.tsv\")\n", "\n", "# remove distributed training flags\n", - "cfg.trainer.strategy = None\n", + "cfg.trainer.strategy = auto\n", "cfg.trainer.accelerator = None" ] }, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 516cd9b9811e..c7f62e6b635b 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -342,7 +342,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 128\n", @@ -563,4 +563,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index 1c75afc67352..282a90428c01 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -286,7 +286,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# setup a small number of epochs for demonstration purposes of this tutorial\n", "config.trainer.max_epochs = 5\n", @@ -705,7 +705,7 @@ "config.trainer.accelerator = accelerator\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "trainer = pl.Trainer(**config.trainer)\n", "config.exp_manager.exp_dir = os.path.join(DATA_DIR, \"output/\" + run_name)\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 1d5c73255f68..02133ae23a03 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -550,7 +550,7 @@ "config.trainer.max_epochs = 1\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] @@ -745,7 +745,7 @@ "config.trainer.accelerator = accelerator\n", "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "config.trainer.max_epochs = 1\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# Exp manager\n", "config.exp_manager.explicit_log_dir = 'tarred_experiment'\n", @@ -1043,4 +1043,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 2ce757d57ac9..6dacda811bda 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -645,7 +645,7 @@ "config.trainer.max_epochs = 1\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "config.exp_manager.use_datetime_version=False\n", "config.exp_manager.explicit_log_dir='Punctuation_And_Capitalization_Lexical_Audio'\n", "\n", @@ -860,7 +860,7 @@ "config.trainer.accelerator = accelerator\n", "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "config.trainer.max_epochs = 1\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# Exp manager\n", "config.exp_manager.explicit_log_dir = 'tarred_experiment'\n", diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 3d428235aa03..2205a5f953c7 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -403,7 +403,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] @@ -652,4 +652,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index e6df8b71c294..c673a71b98e2 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -370,7 +370,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# disable distributed training when using Colab to prevent the errors\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "# Training stops when max_step or max_epochs is reached (earliest)\n", @@ -573,7 +573,7 @@ "# create a copy of the trainer config and update it to be used for final evaluation\n", "eval_trainer_cfg = config.trainer.copy()\n", "eval_trainer_cfg.accelerator = 'gpu' if torch.cuda.is_available() else 'cpu' # it is safer to perform evaluation on single GPU as PT is buggy with the last batch on multi-GPUs\n", - "eval_trainer_cfg.strategy = None # 'ddp' is buggy with test process in the current PT, it looks like it has been fixed in the latest master\n", + "eval_trainer_cfg.strategy = auto # 'ddp' is buggy with test process in the current PT, it looks like it has been fixed in the latest master\n", "eval_trainer = pl.Trainer(**eval_trainer_cfg)\n", "\n", "eval_trainer.test(model=eval_model, verbose=False) # test_dataloaders=eval_dataloader,\n" @@ -832,4 +832,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 3ac90f354fef..b2da9799b470 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -434,7 +434,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 73438b590827..3a289ba4c1f1 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -533,7 +533,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 32\n", @@ -847,4 +847,4 @@ "metadata": {} } ] -} \ No newline at end of file +} diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index b4a9315f9670..f25ad2494694 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -400,7 +400,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 128\n", @@ -671,4 +671,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index 7ecbc63af67d..475e382e574f 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -761,7 +761,7 @@ "source": [ "config.model.diarizer.speaker_embeddings.model_path=\"titanet_large\"\n", "config.trainer.max_epochs = 5\n", - "config.trainer.strategy = None" + "config.trainer.strategy = auto" ] }, { diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index 2cd3ded24e8b..2e7490ed7b9b 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -475,7 +475,7 @@ "config.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = None\n", + "config.trainer.strategy = auto\n", "\n", "# Remove augmentations\n", "config.model.train_ds.augmentor=None" From 9bee661d9c12d41cc2cd77e7db92db3971f3aee0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 27 Sep 2023 19:56:52 +0800 Subject: [PATCH 260/512] unpin setuptools (#7534) (#7535) Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 7481e337c999..a9a8c1e98100 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,7 +5,7 @@ onnx>=1.7.0 python-dateutil ruamel.yaml scikit-learn -setuptools==65.5.1 +setuptools>=65.5.1 tensorboard text-unidecode torch From b5466436860da39c50b82b4de68ea0b1e0f65738 Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Wed, 27 Sep 2023 09:05:29 -0700 Subject: [PATCH 261/512] remove auto generated examples (#7510) * explicitly remove autogenerated examples for data parallel evaluation Signed-off-by: arendu * mark autogenrated and remove it for test Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../data/language_modeling/megatron/gpt_sft_dataset.py | 5 +++++ .../models/language_modeling/megatron_gpt_sft_model.py | 9 +++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 2c655d5cde6b..101201ef7536 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -171,8 +171,13 @@ def __getitem__(self, idx): # idx may < 0 because we pad_samples_to_global_batch_size, e.g. id = -1 if idx < 0: idx = len(self) + idx + auto_gen_idx = True + else: + auto_gen_idx = False try: example = self.indexed_dataset[idx] + if auto_gen_idx: + example['__AUTOGENERATED__'] = True except Exception as e: logging.error(f"Error while loading example {idx} from dataset {self.file_path}") raise e diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 5ab36d3c27a8..11dd0034dc24 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -489,7 +489,6 @@ def inference_epoch_end(self, outputs, mode, data_cfg): ) # Remove duplicate examples due to distributed sampler. - inp_label_set = set() deduplicated_outputs = { 'preds': [], 'labels': [], @@ -502,14 +501,16 @@ def inference_epoch_end(self, outputs, mode, data_cfg): for pred, label, input, metadata in zip( batch['preds'], batch['labels'], batch['inputs'], batch['metadata'] ): - key = input + label total_size += 1 - if key not in inp_label_set: - inp_label_set.add(key) + if not metadata.get("__AUTOGENERATED__", False): deduplicated_outputs['preds'].append(pred) deduplicated_outputs['labels'].append(label) deduplicated_outputs['inputs'].append(input) deduplicated_outputs['metadata'].append(metadata) + else: + logging.info( + f"skipping autogenerated example example {input} prediction {pred} label {label}" + ) # Compute metric score metric_name = self.val_metric_name if mode == 'validation' else self.test_metric_name From 08e91e1a0c6a6426d29dc1260483b298c438eed0 Mon Sep 17 00:00:00 2001 From: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Date: Wed, 27 Sep 2023 15:21:06 -0400 Subject: [PATCH 262/512] Add the `strategy` argument to `MegatronGPTModel.generate()` (#7264) It is passed as an explicit argument rather than through `**strategy_args` so as to ensure someone cannot accidentally pass other arguments that would end up being ignored. It is a keyword-only argument to ensure that if in the future we want to update the signature to `**strategy_args`, we can do it without breaking code. Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> --- .../nlp/models/language_modeling/megatron_gpt_model.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 9b132e1d3321..0a8a93ebac52 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -41,6 +41,7 @@ get_ltor_masks_and_position_ids, get_params_for_weight_decay_optimization, ) +from nemo.collections.nlp.modules.common.text_generation_strategy import TextGenerationStrategy from nemo.collections.nlp.modules.common.text_generation_utils import ( generate, get_computeprob_response, @@ -1176,6 +1177,8 @@ def generate( inputs: Union[List[str], torch.Tensor, List[dict]], length_params: LengthParam, sampling_params: SamplingParam = None, + *, + strategy: Optional[TextGenerationStrategy] = None, ) -> OutputType: # check whether the DDP is initialized @@ -1201,7 +1204,11 @@ def dummy(): if length_params is None: length_params = get_default_length_params() - return megatron_gpt_generate(self.cuda(), inputs, self.tokenizer, length_params, sampling_params) + strategy_args = {} if strategy is None else {"strategy": strategy} + + return megatron_gpt_generate( + self.cuda(), inputs, self.tokenizer, length_params, sampling_params, **strategy_args + ) def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: inference_config = self.get_inference_config() From 677960d001932e7c399eb3e6c8c63fc3da59a888 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:45:17 -0700 Subject: [PATCH 263/512] Fix PTL2.0 related ASR bugs in r1.21.0: Val metrics logging, None dataloader issue (#7531) (#7533) * fix none dataloader issue ptl2 * ptl2.0 logging fixes for rnnt_models --------- Signed-off-by: KunalDhawan Co-authored-by: Kunal Dhawan Co-authored-by: Nithin Rao --- nemo/collections/asr/models/rnnt_models.py | 24 ++++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index 0c1da97c5012..8b5798d34356 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -772,7 +772,7 @@ def predict_step(self, batch, batch_idx, dataloader_idx=0): sample_id = sample_id.cpu().detach().numpy() return list(zip(sample_id, best_hyp_text)) - def validation_step(self, batch, batch_idx, dataloader_idx=0): + def validation_pass(self, batch, batch_idx, dataloader_idx=0): signal, signal_len, transcript, transcript_len = batch # forward() only performs encoder forward @@ -835,15 +835,21 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0): return tensorboard_logs + def validation_step(self, batch, batch_idx, dataloader_idx=0): + metrics = self.validation_pass(batch, batch_idx, dataloader_idx) + if type(self.trainer.val_dataloaders) == list and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(metrics) + else: + self.validation_step_outputs.append(metrics) + return metrics + def test_step(self, batch, batch_idx, dataloader_idx=0): - logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) - test_logs = { - 'test_wer_num': logs['val_wer_num'], - 'test_wer_denom': logs['val_wer_denom'], - # 'test_wer': logs['val_wer'], - } - if 'val_loss' in logs: - test_logs['test_loss'] = logs['val_loss'] + logs = self.validation_pass(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = {name.replace("val_", "test_"): value for name, value in logs.items()} + if type(self.trainer.test_dataloaders) == list and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(test_logs) + else: + self.test_step_outputs.append(test_logs) return test_logs def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): From 3b435edd686a3d374713ae3c8d805d13ac728679 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 27 Sep 2023 17:37:53 -0700 Subject: [PATCH 264/512] gpus -> devices (#7542) (#7545) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao --- tutorials/asr/Self_Supervised_Pre_Training.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 20b0a8742e72..cecc0b08cd05 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -313,11 +313,11 @@ "if torch.cuda.is_available():\n", " cfg.trainer.accelerator = 'gpu'\n", " cfg.trainer.strategy = 'dp'\n", - " cfg.trainer.gpus = 1\n", + " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", " cfg.trainer.strategy = auto\n", - " cfg.trainer.gpus = 0\n", + " cfg.trainer.devices = 0\n", "\n", "cfg.exp_manager.exp_dir = data_dir + \"/content/exp\"\n", "cfg.exp_manager.name = \"pre_trained\"\n", @@ -535,11 +535,11 @@ "if torch.cuda.is_available():\n", " cfg.trainer.accelerator = 'gpu'\n", " cfg.trainer.strategy = 'dp'\n", - " cfg.trainer.gpus = 1\n", + " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", " cfg.trainer.strategy = auto\n", - " cfg.trainer.gpus = 0\n", + " cfg.trainer.devices = 0\n", "\n", "cfg.model.tokenizer.dir = data_dir + \"/tokenizers/an4/tokenizer_spe_unigram_v128/\" # note this is a directory, not a path to a vocabulary file\n", "cfg.model.tokenizer.type = \"bpe\"\n", From 2bb56f73a46c65a65b759a1d0e2b87b00f7b47ca Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 27 Sep 2023 18:00:26 -0700 Subject: [PATCH 265/512] Update FFMPEG version to fix issue with torchaudio (#7551) (#7553) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e4f04359e6eb..843c0c27df45 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ RUN apt-get update && \ libsndfile1 sox \ libfreetype6 \ swig \ - ffmpeg \ + ffmpeg=ffmpeg_5.1.2-3ubuntu1 \ libavdevice-dev && \ rm -rf /var/lib/apt/lists/* From 82f547f9f5144bf00848908790ffd0e43736aced Mon Sep 17 00:00:00 2001 From: meatybobby Date: Thu, 28 Sep 2023 10:12:55 -0700 Subject: [PATCH 266/512] PEFT GPT & T5 Refactor (#7308) * initial implementation of add_adapters API * correct type hint * Add config in add_adapters for save and load (@author bobchen) * Remove AdapterConfig to avoid import error * Add AdaterConfig back and move adaptermixin to sft model * Add NLPSaveRestoreConnector as default in NLPModel.restore_from * Add restore_from_nemo_with_adapter and test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename t5 file and classes to be consistent with GPT * add t5 sft dataset * add support for single-file format with T5SFTDataset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Various small changes to make T5 SFT work like GPT SFT * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add adapter evaluation test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add MultiAdaterConfig for ia3 and fix builder issue * Make ptuning for T5SFTModel work using mixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add IA3_Adapter for AdapterName * Add adapter name for ptuning and attention adapter * Make test script GPT/T5 agnostic * Add layer selection feature * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Integrate adapter name and config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update gpt peft tuning script to new API * add t5 peft tuning script with new API * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix IA3 layer selection issue * Override state_dict on SFT model instead of mixin * Add load adapter by adapter config * move peft config map away from example script * auto get config from nemo adapter * Move PEFTConfig to new file * fix ckpt save/load for t5 * name change: add_adapters -> add_adapter * variable name change * update t5 script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix t5 issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add weight tying * update gpt tuning script * PEFT-API proposal * Fix according to comments * update tuning scripts * move merge_cfg_with to mixin class since it applies to both gpt and t5 and requires the model class for restore * Add mcore_gpt support for NLPAdapterMixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * variable name change to distinguish "peft" and "adapter" * override `load_adapters` to support `add_adapter` name change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tuning and eval script for adapter save/load * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add Ptuning on first stage only * add lora tutorial for review * Fix layer selection for mcore * add landing page * fix resume training Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add mcore condition in sharded_state_dict to make sft work * Update lora_tutorial.md First edit of this file for PEFT documentation for NeMO Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * rename Adapter to AttentionAdapter to avoid confusion in doc * Change load_adapters to load .nemo * add quick start guide * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add load_adapters with .ckpt * Remove setup_complete changes in load_adapters * update landing page * remove typo * Updated quick_start.md per Chen Cui Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * Add inference config merger and tutorial * Add doc string for NLPAdapterModelMixin and deprecated warning on MegatronGPTPEFTModel * add supported_methods.md and update other documentations * Update supported_methods.md minor updates. Signed-off-by: Adi Renduchintala * Update landing_page.md minor update. Signed-off-by: Adi Renduchintala * Modify doc string for NLPAdapterModelMixin * Add doc string add_adapters in NLPAdapterModelMixin * rename canonical adapters * remove mcore hard dependency * [PATCH] move microbatch calculator to nemo from apex * remove apex dependency in gpt and t5 sft models * remove apex dependency in gpt model * render doc strings * fix * Add missing virtual_tokens on ptuning * fix docstrings * update gpt-style model coverage in docs * update docstring * Remove pdb * add lightning_fabric to make docstring rendering work * Add Ptuning missing key * try docstring rendering * Fix ptuning issue * update gpt t5 peft tuning and eval scripts * typos * update eval config * fix bug relating to apex dependency removal * typo * make predict step behave the same as test step * make lora tutorial work in notebook * cosmetics * update yaml scripts * mcore_gpt attribute optional * typo * update eval scripts and fix T5 eval bugs * add NLPDDPStrategyNotebook and trainer builder logic to use it * update lora notebook to use new trainer builder * fix microbatch calculator bug for inference after training * Convert markdown files to RST and incorporate with doc * typo * revise language * remove extra cell * remove unnecessary inheritance * remove old tests * move layer selection default so logging messages make sense * remove `save_adapters` as adapter weights are saved automatically during training * initialize weights from a checkpoint instead of randomly * multiple fields can form a context (#7147) * list of context fields and flexible prompt template Signed-off-by: arendu * list of fields for context Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Fix bug Signed-off-by: Cheng-Ping Hsieh * Add multiple truncation fields and middle truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Compatible to old ckpt Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix tokenize detokenize issue Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove detokenization, add truncation augmentation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve comments Signed-off-by: Cheng-Ping Hsieh * Remove unused import Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert eos Signed-off-by: Cheng-Ping Hsieh * Add tokenizer space_sensitive attribute Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error Signed-off-by: Cheng-Ping Hsieh * Fix erorr and use re Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Change assert logic Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow adi suggestion Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove merge function Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add example and comment Signed-off-by: Cheng-Ping Hsieh * Remove context_key and add comment Signed-off-by: Cheng-Ping Hsieh * Remove random truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix template none Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> * revert config changes * remove accidental breakpoint * support TP>1 loading * infer adapter type from checkpoint in during eval * breakup add adapter * enable interpolation of train_ds and validation_ds * update metric calc script to conform to single-file eval format * remove extraneous print * update lora notebook for updated merge_inference_cfg * Update nlp_adapter_mixins.py variable name change Signed-off-by: Chen Cui * turn off grad scaler for PP to match old scripts * remove PEFTSaveRestoreConnector since functionality all covered by the new mixin class * remove resume_from_checkpoint check since covered in #7335 * revert changes made in eval config interpolation * more interpolation * typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove dup line Signed-off-by: Chen Cui * code style warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix config mistake Signed-off-by: Chen Cui * add copyright header Signed-off-by: Chen Cui * fix code check warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert changes to remove apex dependency (mixed apex+nemo microbatch calculator broke some CI tests) Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * consolidate peft and sft scripts Signed-off-by: Chen Cui * update CI tests Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * notebook branch points to main to prepare for merge Signed-off-by: Chen Cui * fix gpt and t5 validation with any metric other than loss Signed-off-by: Chen Cui * support pre-extracted checkpoints Signed-off-by: Chen Cui --------- Signed-off-by: jasonwan Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: jasonwan Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Adi Renduchintala Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> --- Jenkinsfile | 380 +---- docs/source/conf.py | 1 + docs/source/nlp/api.rst | 14 +- docs/source/nlp/nemo_megatron/intro.rst | 1 + .../nlp/nemo_megatron/peft/landing_page.rst | 35 + .../nlp/nemo_megatron/peft/quick_start.rst | 90 ++ .../nemo_megatron/peft/supported_methods.rst | 71 + .../megatron_t5_prompt_learning.py | 5 + .../megatron_t5_prompt_learning_eval.py | 5 + .../megatron_t5_seq2seq_eval.py | 8 +- .../megatron_t5_seq2seq_finetune.py | 8 +- .../conf/megatron_gpt_peft_eval_config.yaml | 36 +- .../conf/megatron_gpt_peft_tuning_config.yaml | 22 +- .../conf/megatron_t5_peft_eval_config.yaml | 213 +++ .../conf/megatron_t5_peft_tuning_config.yaml | 220 +++ .../tuning/megatron_gpt_adapter_eval.py | 5 + .../tuning/megatron_gpt_adapter_tuning.py | 5 + .../tuning/megatron_gpt_ia3_eval.py | 5 + .../tuning/megatron_gpt_ia3_tuning.py | 5 + .../tuning/megatron_gpt_peft_eval.py | 178 +-- .../tuning/megatron_gpt_peft_tuning.py | 231 +-- .../tuning/megatron_gpt_sft.py | 5 + .../tuning/megatron_t5_adapter_eval.py | 5 + .../tuning/megatron_t5_adapter_tuning.py | 5 + .../tuning/megatron_t5_ia3_eval.py | 5 + .../tuning/megatron_t5_ia3_tuning.py | 5 + .../tuning/megatron_t5_lora_eval.py | 5 + .../tuning/megatron_t5_lora_tuning.py | 5 + .../tuning/megatron_t5_peft_eval.py | 135 ++ .../tuning/megatron_t5_peft_tuning.py | 65 + .../megatron/t5_sft_dataset.py | 169 +++ .../language_modeling/megatron_glue_model.py | 6 +- .../language_modeling/megatron_gpt_model.py | 3 +- .../megatron_gpt_peft_models.py | 41 +- .../megatron_gpt_sft_model.py | 19 +- .../language_modeling/megatron_t0_model.py | 14 +- .../megatron_t5_adapter_model.py | 8 +- .../megatron_t5_prompt_learning_model.py | 14 +- ...tune_model.py => megatron_t5_sft_model.py} | 301 ++-- nemo/collections/nlp/models/nlp_model.py | 23 +- .../megatron/adapters/parallel_adapters.py | 11 +- .../megatron/token_level_encoder_decoder.py | 20 +- .../nlp/parts/megatron_trainer_builder.py | 30 +- nemo/collections/nlp/parts/mixins/__init__.py | 13 + .../nlp/parts/mixins/nlp_adapter_mixins.py | 484 ++++++ nemo/collections/nlp/parts/nlp_overrides.py | 162 +- nemo/collections/nlp/parts/peft_config.py | 190 +++ nemo/core/classes/mixins/adapter_mixins.py | 21 +- .../metric_calculation/peft_metric_calc.py | 33 +- tutorials/nlp/lora.ipynb | 1301 +++-------------- 50 files changed, 2520 insertions(+), 2116 deletions(-) create mode 100644 docs/source/nlp/nemo_megatron/peft/landing_page.rst create mode 100644 docs/source/nlp/nemo_megatron/peft/quick_start.rst create mode 100644 docs/source/nlp/nemo_megatron/peft/supported_methods.rst create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_eval_config.yaml create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_tuning_config.yaml create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_peft_tuning.py create mode 100644 nemo/collections/nlp/data/language_modeling/megatron/t5_sft_dataset.py rename nemo/collections/nlp/models/language_modeling/{megatron_finetune_model.py => megatron_t5_sft_model.py} (74%) create mode 100644 nemo/collections/nlp/parts/mixins/__init__.py create mode 100644 nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py create mode 100644 nemo/collections/nlp/parts/peft_config.py diff --git a/Jenkinsfile b/Jenkinsfile index 2abcdbcc5ddb..92aa65ae660b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -669,188 +669,6 @@ pipeline { } } - // commented out temporarily to save time on github ci - //stage('L2: Megatron T5 Adapter PP=2') { - // when { - // anyOf { - // branch 'main' - // changeRequest target: 'main' - // } - // } - // failFast true - // parallel{ - // stage('T5 Adapter tuning & inference TP=1 PP=2') { - // steps { - // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ - // --config-name=megatron_t5_adapter_tuning_config \ - // name='test_tp1_pp2' \ - // exp_manager.exp_dir='examples/adapter_tuning' \ - // trainer.devices=2 \ - // trainer.max_steps=1 \ - // trainer.val_check_interval=1 \ - // trainer.max_epochs=null \ - // model.data.num_workers=1 \ - // model.tensor_model_parallel_size=1 \ - // model.pipeline_model_parallel_size=2 \ - // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - // model.existing_tasks=[] \ - // model.new_tasks=['rte'] \ - // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // model.global_batch_size=4" - // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ - // --config-name=megatron_t5_adapter_inference \ - // adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \ - // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - // trainer.devices=2 \ - // data.num_workers=1 \ - // tensor_model_parallel_size=1 \ - // pipeline_model_parallel_size=2 \ - // data.global_batch_size=2 \ - // data.micro_batch_size=2 \ - // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // pred_file_path='examples/adapter_tuning/test_tp1_pp2/preds.txt'" - // sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo" - // sh "rm -rf examples/adapter_tuning/test_tp1_pp2" - // } - // } - // } - //} - //stage('L2: Megatron T5 Adapter TP=2') { - // when { - // anyOf { - // branch 'main' - // changeRequest target: 'main' - // } - // } - // failFast true - // parallel{ - // stage('T5 Adapter tuning & inference TP=2 PP=1') { - // steps { - // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \ - // --config-name=megatron_t5_adapter_tuning_config \ - // name='test_tp2_pp1' \ - // exp_manager.exp_dir='examples/adapter_tuning' \ - // trainer.devices=2 \ - // trainer.max_steps=1 \ - // trainer.val_check_interval=1 \ - // trainer.max_epochs=null \ - // model.data.num_workers=1 \ - // model.tensor_model_parallel_size=2 \ - // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - // model.existing_tasks=[] \ - // model.new_tasks=['rte'] \ - // model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // model.global_batch_size=4" - // sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \ - // --config-name=megatron_t5_adapter_inference \ - // adapter_model_file='examples/adapter_tuning/test_tp2_pp1.nemo' \ - // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - // trainer.devices=2 \ - // tensor_model_parallel_size=2 \ - // data.global_batch_size=2 \ - // data.micro_batch_size=2 \ - // data.num_workers=1 \ - // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // pred_file_path='examples/adapter_tuning/test_tp2_pp1/preds.txt'" - // sh "rm -rf examples/adapter_tuning/test_tp2_pp1.nemo" - // sh "rm -rf examples/adapter_tuning/test_tp2_pp1" - // } - // } - // } - //} - stage('L2: Megatron T5 IA3 PP=2') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel{ - stage('T5 IA3 tuning & inference TP=1 PP=2') { - steps { - sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py \ - --config-name=megatron_t5_ia3_tuning_config \ - name='test_tp1_pp2' \ - exp_manager.exp_dir='examples/ia3_tuning' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=1 \ - model.pipeline_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - // TODO: @eharper temporarily comment while investigating how to fix - // sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py \ - // --config-name=megatron_t5_ia3_inference \ - // adapter_model_file='examples/ia3_tuning/test_tp1_pp2.nemo' \ - // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - // trainer.devices=2 \ - // data.num_workers=1 \ - // tensor_model_parallel_size=1 \ - // pipeline_model_parallel_size=2 \ - // data.global_batch_size=2 \ - // data.micro_batch_size=2 \ - // data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - // pred_file_path='examples/ia3_tuning/test_tp1_pp2/preds.txt'" - sh "rm -rf examples/ia3_tuning/test_tp1_pp2.nemo" - sh "rm -rf examples/ia3_tuning/test_tp1_pp2" - } - } - } - } - stage('L2: Megatron T5 IA3 TP=2') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel{ - stage('T5 IA3 tuning & inference TP=2 PP=1') { - steps { - sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py \ - --config-name=megatron_t5_ia3_tuning_config \ - name='test_tp2_pp1' \ - exp_manager.exp_dir='examples/ia3_tuning' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['rte'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - model.global_batch_size=4" - sh "python examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py \ - --config-name=megatron_t5_ia3_inference \ - adapter_model_file='examples/ia3_tuning/test_tp2_pp1.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - trainer.devices=2 \ - data.num_workers=1 \ - tensor_model_parallel_size=2 \ - data.global_batch_size=2 \ - data.micro_batch_size=2 \ - data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \ - pred_file_path='examples/ia3_tuning/test_tp2_pp1/preds.txt'" - sh "rm -rf examples/ia3_tuning/test_tp2_pp1.nemo" - sh "rm -rf examples/ia3_tuning/test_tp2_pp1" - } - } - } - } stage('L2: Speech Transcription') { when { @@ -3742,7 +3560,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } failFast true steps { - sh "python examples/nlp/language_modeling/tuning/megatron_gpt_sft.py \ + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \ trainer.devices=2 \ trainer.log_every_n_steps=1 \ trainer.val_check_interval=2 \ @@ -3756,6 +3574,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \ model.optim.name=fused_adam \ model.optim.lr=2e-4 \ + model.peft.peft_scheme=null \ model.data.train_ds.micro_batch_size=1 \ model.data.train_ds.global_batch_size=4 \ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ @@ -3770,7 +3589,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.validation_ds.num_workers=0 \ model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ model.data.validation_ds.names=[quarel]" - sh "python examples/nlp/language_modeling/tuning/megatron_gpt_sft.py \ + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \ trainer.devices=2 \ trainer.log_every_n_steps=1 \ trainer.val_check_interval=1 \ @@ -3784,6 +3603,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \ model.optim.name=fused_adam \ model.optim.lr=2e-4 \ + model.peft.peft_scheme=null \ model.data.train_ds.micro_batch_size=1 \ model.data.train_ds.global_batch_size=4 \ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \ @@ -3870,14 +3690,15 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.data.validation_ds.names=[quarel]" sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \ model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \ - model.peft.restore_from_path=/home/TestData/nlp/lora_tuning_tp2/megatron_gpt_peft_tuning/checkpoints/megatron_gpt_peft_tuning.nemo \ + model.peft.restore_from_path=/home/TestData/nlp/lora_tuning_tp2/megatron_gpt_peft_lora_tuning/checkpoints/megatron_gpt_peft_lora_tuning.nemo \ model.peft.restore_from_ckpt_name=null \ model.peft.restore_from_hparams_path=null \ + model.tensor_model_parallel_size=2 \ trainer.devices=2 \ model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \ model.data.test_ds.names=['quarel4'] \ - model.data.test_ds.global_batch_size=1 \ - model.data.test_ds.micro_batch_size=1 \ + model.global_batch_size=2 \ + model.micro_batch_size=1 \ model.data.test_ds.tokens_to_generate=10 \ model.data.test_ds.write_predictions_to_file=True \ model.data.test_ds.output_file_path_prefix='/home/TestData/nlp/lora_tuning_tp2/out' \ @@ -4428,136 +4249,6 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } - // commented out to save time in github ci, we have tp>1 and pp>1 tests anyway @adithyare - //stage('L2: Megatron T5 Prompt Learning TP1 PP1') { - // when { - // anyOf { - // branch 'main' - // changeRequest target: 'main' - // } - // } - // failFast true - // parallel{ - // stage('T5 Prompt Learning TP=1 PP=1') { - // steps { - // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \ - // --config-name=megatron_t5_prompt_learning \ - // name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test' \ - // trainer.devices=1 \ - // trainer.max_steps=1 \ - // trainer.val_check_interval=1 \ - // trainer.max_epochs=null \ - // model.data.num_workers=1 \ - // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ - // model.existing_tasks=[] \ - // model.new_tasks=['squad'] \ - // model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // model.global_batch_size=4 \ - // model.micro_batch_size=4" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test" - // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \ - // virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo' \ - // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \ - // data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt' \ - // data.global_batch_size=4 \ - // data.micro_batch_size=4" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt" - // } - // } - // } - //} - - stage('L2: Megatron T5 Prompt Learning TP2 PP1') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel{ - stage('T5 Prompt Learning TP=2 PP=1') { - steps { - sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \ - --config-name=megatron_t5_prompt_learning \ - name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2' \ - trainer.devices=2 \ - trainer.max_steps=1 \ - trainer.val_check_interval=1 \ - trainer.max_epochs=null \ - model.data.num_workers=1 \ - model.tensor_model_parallel_size=2 \ - model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - model.existing_tasks=[] \ - model.new_tasks=['squad'] \ - model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - model.global_batch_size=8 \ - model.micro_batch_size=8" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2" - sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \ - virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2.nemo' \ - language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \ - data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2_preds.txt' \ - tensor_model_parallel_size=2 \ - trainer.devices=2 \ - data.global_batch_size=8 \ - data.micro_batch_size=8" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2.nemo" - sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_tp2_preds.txt" - } - } - } - } - - // TODO: add when https://github.com/NVIDIA/apex/pull/1596 is merged - // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { - // when { - // anyOf { - // branch 'main' - // changeRequest target: 'main' - // } - // } - // failFast true - // parallel{ - // stage('T5 Prompt Learning TP=1 PP=2') { - // steps { - // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \ - // --config-name=megatron_t5_prompt_learning \ - // name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2' \ - // trainer.devices=2 \ - // trainer.max_steps=1 \ - // trainer.val_check_interval=1 \ - // trainer.max_epochs=null \ - // model.data.num_workers=1 \ - // model.pipeline_model_parallel_size=2 \ - // model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - // model.existing_tasks=[] \ - // model.new_tasks=['squad'] \ - // model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // model.global_batch_size=8 \ - // model.micro_batch_size=8" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2" - // sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \ - // virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2.nemo' \ - // language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \ - // data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \ - // pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2_preds.txt' \ - // tensor_model_parallel_size=2 \ - // trainer.devices=2 \ - // data.global_batch_size=8 \ - // data.micro_batch_size=8" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2.nemo" - // sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_pp2_preds.txt" - // } - // } - // } - // } stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { @@ -4870,6 +4561,61 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } } + + stage('L2: Megatron T5 PEFT Lora TP=2') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/nlp/t5_lora_tuning_tp2" + sh "python examples/nlp/language_modeling/tuning/megatron_t5_peft_tuning.py \ + trainer.devices=2 \ + trainer.log_every_n_steps=1 \ + trainer.max_epochs=9999 \ + trainer.max_steps=3 \ + trainer.val_check_interval=3 \ + ++trainer.limit_val_batches=2 \ + trainer.precision=16 \ + exp_manager.exp_dir=/home/TestData/nlp/t5_lora_tuning_tp2 \ + model.pipeline_model_parallel_size=1 \ + model.tensor_model_parallel_size=2 \ + model.restore_from_path=/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo \ + model.peft.peft_scheme='lora' \ + model.answer_only_loss=True \ + model.micro_batch_size=1 \ + model.global_batch_size=1 \ + model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.train_ds.concat_sampling_probabilities=[1.0] \ + model.data.train_ds.num_workers=0 \ + model.data.validation_ds.num_workers=0 \ + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.names=[quarel]" + sh "python examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py \ + model.restore_from_path=/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo \ + model.peft.restore_from_path=/home/TestData/nlp/t5_lora_tuning_tp2/megatron_t5_peft_lora_tuning/checkpoints/megatron_t5_peft_lora_tuning.nemo \ + model.peft.restore_from_ckpt_name=null \ + model.peft.restore_from_hparams_path=null \ + model.tensor_model_parallel_size=2 \ + trainer.devices=2 \ + model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \ + model.data.test_ds.names=['quarel4'] \ + model.global_batch_size=2 \ + model.micro_batch_size=1 \ + model.data.test_ds.tokens_to_generate=10 \ + model.data.test_ds.write_predictions_to_file=True \ + model.data.test_ds.output_file_path_prefix='/home/TestData/nlp/t5_lora_tuning_tp2/out' \ + inference.greedy=True \ + inference.repetition_penalty=1.0 \ + inference.outfile_path='/home/TestData/nlp/t5_lora_tuning_tp2/out.jsonl'" + sh "rm -rf /home/TestData/nlp/t5_lora_tuning_tp2" + } + } + + stage('L2: Megatron Mock Data Generation') { when { anyOf { diff --git a/docs/source/conf.py b/docs/source/conf.py index 0765f8940ab0..c54defb59ce8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ 'attr', # attrdict in requirements, attr in import 'torchmetrics', # inherited from PTL 'lightning_utilities', # inherited from PTL + 'lightning_fabric', 'apex', 'megatron.core', 'transformer_engine', diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst index b13dedca300f..33709bd05a19 100755 --- a/docs/source/nlp/api.rst +++ b/docs/source/nlp/api.rst @@ -81,7 +81,6 @@ Modules .. autoclass:: nemo.collections.nlp.modules.common.megatron.module.Float16Module :show-inheritance: - .. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.gpt_model.GPTModel :show-inheritance: :no-members: @@ -140,11 +139,22 @@ Datasets .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.ul2_dataset.UL2Dataset :show-inheritance: + +Adapter Mixin Class +------------------------- + +.. autoclass:: nemo.collections.nlp.parts.mixins.nlp_adapter_mixins.NLPAdapterModelMixin + :show-inheritance: + :members: add_adapter, load_adapters, merge_cfg_with, merge_inference_cfg + :exclude-members: first_stage_of_pipeline, tie_weights, get_peft_state_dict, state_dict, sharded_state_dict, load_state_dict, on_load_checkpoint + :member-order: bysource + + Exportable Model Classes ------------------------- .. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTExportableModel - :show-inheritance: + :show-inheritance: .. toctree:: :maxdepth: 1 diff --git a/docs/source/nlp/nemo_megatron/intro.rst b/docs/source/nlp/nemo_megatron/intro.rst index 7525c778b974..c1b158d77e3e 100644 --- a/docs/source/nlp/nemo_megatron/intro.rst +++ b/docs/source/nlp/nemo_megatron/intro.rst @@ -25,6 +25,7 @@ team at NVIDIA. NeMo Megatron supports several types of models: prompt_learning retro/retro_model hiddens/hiddens_module + peft/landing_page References diff --git a/docs/source/nlp/nemo_megatron/peft/landing_page.rst b/docs/source/nlp/nemo_megatron/peft/landing_page.rst new file mode 100644 index 000000000000..c90dcdfff1c5 --- /dev/null +++ b/docs/source/nlp/nemo_megatron/peft/landing_page.rst @@ -0,0 +1,35 @@ +Parameter-Efficient Fine-Tuning (PEFT) +====================================== + +PEFT is a popular technique used to efficiently finetune large language +models for use in various downstream tasks. When finetuning with PEFT, +the base model weights are frozen, and a few trainable adapter modules +are injected into the model, resulting in a very small number (<< 1%) of +trainble weights. With carefully chosen adapter modules and injection +points, PEFT achieves comparable performance to full finetuning at a +fraction of the computational and storage costs. + +NeMo supports four PEFT methods which can be used with various +transformer-based models. + +==================== ===== ===== ========= == +\ GPT 3 NvGPT LLaMa 1/2 T5 +==================== ===== ===== ========= == +Adapters (Canonical) ✅ ✅ ✅ ✅ +LoRA ✅ ✅ ✅ ✅ +IA3 ✅ ✅ ✅ ✅ +P-Tuning ✅ ✅ ✅ ✅ +==================== ===== ===== ========= == + +Learn more about PEFT in NeMo with the :ref:`peftquickstart` which provides an overview on how PEFT works +in NeMo. Read about the supported PEFT methods +`here `__. For a practical example, take a look at +the `Step-by-step Guide `__. + +The API guide can be found `here <../../api.html#adapter-mixin-class>`__ + +.. toctree:: + :maxdepth: 1 + + quick_start + supported_methods \ No newline at end of file diff --git a/docs/source/nlp/nemo_megatron/peft/quick_start.rst b/docs/source/nlp/nemo_megatron/peft/quick_start.rst new file mode 100644 index 000000000000..000e242b9508 --- /dev/null +++ b/docs/source/nlp/nemo_megatron/peft/quick_start.rst @@ -0,0 +1,90 @@ +.. _peftquickstart: + + +Quick Start Guide +================= + +The quick start guide provides an overview of a PEFT workflow in NeMo. + +Terminology: PEFT vs Adapter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This tutorial uses "PEFT" to describe the overall parameter efficient +finetuning method, and "adapter" to describe the additional module +injected to a frozen base model. Each PEFT model can use one or more +types of adapters. + +One of the PEFT methods is sometimes referred to as "adapters", because +it was one of the first proposed usage of adapter modules for NLP. This +PEFT method will be called the "canonical" adapters to distinguish the +two usages. + +How PEFT work in NeMo models +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Each PEFT method has one or more types of adapters that need to be +injected into the base model. In NeMo models, the adapter logic and +adapter weights are already built into the submodules, but they are +disabled by default for ordinary training and fine-tuning. + +When doing PEFT, the adapter logic path can be enabled when +``model.add_adapter(peft_cfg)`` is called. In this function, the model +scans through each adapter applicable to the current PEFT method with +each of its submodules in order to find adapter logic paths that can be +enabled. Then, the base models weights are frozen, while newly added +adapter weights are unfrozen and allowed to be updated during +fine-tuning, hence achieving efficiency in the number of parameters +finetuned. + +PEFT config classes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Each PEFT method is specified by a ``PEFTConfig`` class which stores the +types of adapters applicable to the PEFT method, as well as +hyperparameters required to initialize these adapter modules. These four +PEFT methods are currently supported: + +1. Adapters (canonical): ``CanonicalAdaptersPEFTConfig`` +2. LoRA: ``LoraPEFTConfig`` +3. IA3: ``IA3PEFTConfig`` +4. P-Tuning: ``PtuningPEFTConfig`` + +These config classes make experimenting with different adapters as easy +as changing the config class. + +Moreover, it is possible to use a combination of the PEFT methods in +NeMo since they are orthogonal to each other. This can be easily done by +passing in a list of ``PEFTConfig`` objects to ``add_adapter`` instead +of a single one. For example, a common workflow is to combine P-Tuning +and Adapter, and this can be achieved with +``model.add_adapter([PtuningPEFTConfig(model_cfg), CanonicalAdaptersPEFTConfig(model_cfg)])`` + +Base model classes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +PEFT in NeMo is built with a mix-in class that does not belong to any +model in particular. This means that the same interface is available to +different NeMo models. Currently, NeMo supports PEFT for GPT-style +models such as GPT 3, NvGPT, LLaMa 1/2 (``MegatronGPTSFTModel``), as +well as T5 (``MegatronT5SFTModel``). + +Full finetuning vs PEFT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +You can switch between full fine-tuning and PEFT by removing calls to +``add_adapter`` and ``load_adapter``. + +The code snippet below illustrates the core API of full fine-tuning and +PEFT. + +.. code:: diff + + trainer = MegatronTrainerBuilder(config).create_trainer() + model_cfg = MegatronGPTSFTModel.merge_cfg_with(config.model.restore_from_path, config) + + model = MegatronGPTSFTModel.restore_from(restore_path, model_cfg, trainer) # restore from pretrained ckpt + + peft_cfg = LoRAPEFTConfig(model_cfg) + + model.add_adapter(peft_cfg) + trainer.fit(model) # saves adapter weights only + + # Restore from base then load adapter API + model = MegatronGPTSFTModel.restore_from(restore_path, trainer, model_cfg) + + model.load_adapters(adapter_save_path, peft_cfg) + model.freeze() + trainer.predict(model) diff --git a/docs/source/nlp/nemo_megatron/peft/supported_methods.rst b/docs/source/nlp/nemo_megatron/peft/supported_methods.rst new file mode 100644 index 000000000000..4479565be6aa --- /dev/null +++ b/docs/source/nlp/nemo_megatron/peft/supported_methods.rst @@ -0,0 +1,71 @@ + + +Supported PEFT methods +---------------------- + +NeMo supports the following PFET tuning methods + +1. **Adapters (Canonical)**: `Parameter-Efficient Transfer Learning for + NLP `__ + + - Adapters (Houlsby setup) is one of the first PEFT methods applied + to NLP. Adapter tuning is more efficient than full fine-tuning + because the base model weights are frozen, while only a small + number of adapter module weights are updated. In this method, two + linear layers with a bottleneck and a non-linear activation are + inserted into each transformer layer via a residual connection. In + each case, the output linear layer is initialized to 0 to ensure + that an untrained adapter does not affect the normal forward pass + of the transformer layer. + +2. **LoRA**: `LoRA: Low-Rank Adaptation of Large Language + Models `__ + + - LoRA makes fine-tuning efficient by representing weight updates + with two low rank decomposition matrices. The original model + weights remain frozen, while the low rank decomposition matrices + are updated to adapt to the new data , so the number of trainable + parameters is kept low. In contrast with adapters, the original + model weights and adapted weights can be combined during + inference, avoiding any architectural change or additional latency + in the model at inference time. + - The matrix decomposition operation can be applied to any linear + layer, but in practice, it is only applied to the K, Q, V + projection matrices (sometimes just applied to the Q,V layers). + Since NeMo's attention implementation fuses KQV into a single + projection, our LoRA implementation learns a single Low-Rank + projection for KQV in a combined fashion. + +3. **IA3**: `Few-Shot Parameter-Efficient Fine-Tuning is Better and + Cheaper than In-Context Learning `__ + + - IA3 makes fine-tuning efficient by rescaling activations with + learned vectors. The rescaling layers are injected in the + attention (for key and value) and feedforward modules in the base + model. Similar to other PEFT methods, only the rescaling vectors + are updated during fine-tuning to adapt to the new data so the + number of updated parameters is low. However, since rescaling + vectors are much smaller than low rank matrices (LoRA) and + bottleneck layers (Adapters), IA3 cuts down the number of + trainable parameters further by an order of magnitude. The + learning rescaling vectors can also be merged with the base + weights, leading to no architectural change and no additional + latency at inference time. + +4. **P-Tuning**: `GPT Understands, + Too `__ + + - P-tuning is an example of the prompt learning family of methods, + in which trainable virtual tokens are inserted into the model + input prompt to induce it to perform a task. Virtual tokens (also + called "continuous" or "soft" tokens) are embeddings that have no + concrete mapping to strings or characters within the model’s + vocabulary. They are simply 1D vectors that match the + dimensionality of real tokens which make up the model's + vocabulary. + - In p-tuning, an intermediate LSTM or MLP model is used to generate + virtual token embeddings. We refer to this intermediate model as + our ``prompt_encoder``. The prompt encoder parameters are randomly + initialized at the start of p-tuning. All base model parameters + are frozen, and only the prompt encoder weights are updated at + each training step. diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py index 1bef27352fb1..5a4f4038e501 100644 --- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py +++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -43,6 +44,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning.yaml") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py index 3b932e99ced3..67640138b3ff 100644 --- a/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py +++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py @@ -24,6 +24,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils.app_state import AppState +from nemo.utils.decorators import deprecated try: from megatron.core import parallel_state @@ -37,6 +38,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_prompt_learning_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py index e2fb58b55367..4c11e10d99c5 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py @@ -18,9 +18,9 @@ from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.plugins.precision import MixedPrecisionPlugin -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel from nemo.collections.nlp.models.language_modeling.megatron_glue_model import MegatronT5GLUEModel from nemo.collections.nlp.models.language_modeling.megatron_t0_model import MegatronT0Model +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.collections.nlp.parts.nlp_overrides import GradScaler, MegatronHalfPrecisionPlugin, NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils import logging @@ -122,13 +122,13 @@ def main(cfg) -> None: model = load_from_checkpoint_dir(MegatronT0Model, cfg, trainer, modify_confg_fn=_modify_config) else: if cfg.model.restore_from_path: - t5_cfg = MegatronT5FinetuneModel.restore_from( + t5_cfg = MegatronT5SFTModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True ) - model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config) + model = load_from_nemo(MegatronT5SFTModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config) else: validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint) - model = load_from_checkpoint_dir(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config) + model = load_from_checkpoint_dir(MegatronT5SFTModel, cfg, trainer, modify_confg_fn=_modify_config) model.freeze() trainer.validate(model) diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py index 189f4b46ab6d..3204ba2f6d76 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py @@ -21,9 +21,9 @@ from pytorch_lightning.plugins.environments import TorchElasticEnvironment from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel from nemo.collections.nlp.models.language_modeling.megatron_glue_model import MegatronT5GLUEModel from nemo.collections.nlp.models.language_modeling.megatron_t0_model import MegatronT0Model +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel from nemo.collections.nlp.parts.nlp_overrides import ( CustomProgressBar, @@ -207,13 +207,13 @@ def main(cfg) -> None: model = load_from_checkpoint_dir(MegatronT0Model, cfg, trainer, modify_confg_fn=_modify_config) else: if cfg.model.restore_from_path: - t5_cfg = MegatronT5FinetuneModel.restore_from( + t5_cfg = MegatronT5SFTModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True ) - model = load_from_nemo(MegatronT5FinetuneModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config) + model = load_from_nemo(MegatronT5SFTModel, cfg, trainer, t5_cfg, modify_confg_fn=_modify_config) else: validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint) - model = load_from_checkpoint_dir(MegatronT5FinetuneModel, cfg, trainer, modify_confg_fn=_modify_config) + model = load_from_checkpoint_dir(MegatronT5SFTModel, cfg, trainer, modify_confg_fn=_modify_config) trainer.fit(model) trainer.validate(model) diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml index 49bd15b0ea12..37fa8a269824 100755 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml @@ -26,7 +26,7 @@ exp_manager: resume_ignore_no_checkpoint: True create_checkpoint_callback: True checkpoint_callback_params: - monitor: validation_${model.data.validation_ds.metric.name} + monitor: validation_${model.data.test_ds.metric.name} save_top_k: 1 mode: max save_nemo_on_train_end: True @@ -39,12 +39,12 @@ model: seed: 1234 tensor_model_parallel_size: 1 # intra-layer model parallelism pipeline_model_parallel_size: 1 # inter-layer model parallelism - + global_batch_size: 1 micro_batch_size: 1 restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. + save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. sync_batch_comm: False megatron_amp_O2: False @@ -53,8 +53,8 @@ model: # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. sequence_parallel: False - ## Activation Checkpoint - activations_checkpoint_granularity: null # 'selective' or 'full' + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' # 'uniform' divides the total number of transformer layers and checkpoints the input activation # of each chunk at the specified granularity @@ -73,17 +73,29 @@ model: restore_from_path: null restore_from_ckpt_name: null restore_from_hparams_path: null - + # Used for adapter peft training adapter_tuning: type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' adapter_dim: 32 adapter_dropout: 0.0 - norm_position: 'pre' # This can be set to 'pre' or 'post', 'pre' is normally what is used. + norm_position: 'pre' # This can be set to 'pre', 'post' or null, 'pre' is normally what is used. column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] - + layer_selection: null # selects in which layers to add adapters, e.g. [1,12] will add adapters to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + layer_selection: null # selects in which layers to add lora adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + # Used for p-tuning peft training p_tuning: virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence @@ -91,18 +103,22 @@ model: embedding_dim: 1024 # the size of the prompt encoder embeddings init_std: 0.023 + ia3_tuning: + layer_selection: null # selects in which layers to add ia3 adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + data: test_ds: file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. names: ??? # Names of the corresponding datasets used to log metrics. - global_batch_size: ??? - micro_batch_size: ??? + global_batch_size: 1 + micro_batch_size: 1 shuffle: False num_workers: 0 pin_memory: True max_seq_length: 2048 min_seq_length: 1 drop_last: False + context_key: 'input' label_key: ${data.train_ds.label_key} add_eos: ${data.train_ds.add_eos} add_sep: ${data.train_ds.add_sep} diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml index b020c1aa49ad..6b6af4b1c81b 100755 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml @@ -1,4 +1,4 @@ -name: megatron_gpt_peft_tuning +name: megatron_gpt_peft_${model.peft.peft_scheme}_tuning trainer: devices: 1 @@ -10,7 +10,7 @@ trainer: use_distributed_sampler: False max_epochs: 9999 max_steps: 20000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 # frequency with which training steps are logged + log_every_n_steps: 10 # frequency with which training steps are logged val_check_interval: 200 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch gradient_clip_val: 1.0 @@ -47,12 +47,12 @@ model: seed: 1234 tensor_model_parallel_size: 1 # intra-layer model parallelism pipeline_model_parallel_size: 1 # inter-layer model parallelism - + global_batch_size: 128 micro_batch_size: 4 restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - save_nemo_on_validation_end: False # Saves an inference ready .nemo file every time a checkpoint is saved during training. + save_nemo_on_validation_end: False # Saves an inference ready .nemo file every time a checkpoint is saved during training. sync_batch_comm: False megatron_amp_O2: False @@ -61,8 +61,8 @@ model: # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. sequence_parallel: False - ## Activation Checkpoint - activations_checkpoint_granularity: null # 'selective' or 'full' + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' # 'uniform' divides the total number of transformer layers and checkpoints the input activation # of each chunk at the specified granularity @@ -79,7 +79,7 @@ model: peft: peft_scheme: "adapter" # can be either adapter,ia3, or ptuning restore_from_path: null - + # Used for adapter peft training adapter_tuning: type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' @@ -92,7 +92,7 @@ model: layer_selection: null # selects in which layers to add adapters, e.g. [1,12] will add adapters to layer 1 (lowest) and 12. null will apply adapters to all layers weight_tying: False position_embedding_strategy: null # used only when weight_tying is True - + lora_tuning: adapter_dim: 32 adapter_dropout: 0.0 @@ -101,21 +101,21 @@ model: layer_selection: null # selects in which layers to add lora adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers weight_tying: False position_embedding_strategy: null # used only when weight_tying is True - + # Used for p-tuning peft training p_tuning: virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence bottleneck_dim: 1024 # the size of the prompt encoder mlp bottleneck embedding_dim: 1024 # the size of the prompt encoder embeddings init_std: 0.023 - + ia3_tuning: layer_selection: null # selects in which layers to add ia3 adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers data: train_ds: # Example of how to specify paths to multiple datasets - # file_names: + # file_names: # - /path/to/squad.jsonl # - /path/to/mnli.jsonl # - /path/to/boolq.jsonl diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_eval_config.yaml new file mode 100644 index 000000000000..0ef90a2343fa --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_eval_config.yaml @@ -0,0 +1,213 @@ +name: megatron_t5_peft_${model.peft.peft_scheme}_tuning + +trainer: + devices: 1 + accelerator: gpu + num_nodes: 1 + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: 9999 + max_steps: 20000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 # frequency with which training steps are logged + val_check_interval: 200 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch + gradient_clip_val: 1.0 + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: validation_${model.data.test_ds.metric.name} + save_top_k: 1 + mode: max + save_nemo_on_train_end: True + filename: '${name}--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' + model_parallel_size: ${model.tensor_model_parallel_size} + always_save_nemo: True + save_best_model: False + +model: + seed: 1234 + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + + global_batch_size: 1 + micro_batch_size: 1 + restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. + sync_batch_comm: False + megatron_amp_O2: False + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + activations_checkpoint_layers_per_pipeline: null + answer_only_loss: False # not used right now + gradient_as_bucket_view: False + + hidden_dropout: 0.0 + attention_dropout: 0.0 + ffn_dropout: 0.0 + + peft: + peft_scheme: "adapter" # can be either adapter,ia3, or ptuning + restore_from_path: null + restore_from_ckpt_name: null + restore_from_hparams_path: null + + # Used for adapter peft training + adapter_tuning: + type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' + adapter_dim: 32 + adapter_dropout: 0.0 + norm_position: 'pre' # This can be set to 'pre', 'post' or null, 'pre' is normally what is used. + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] + layer_selection: null # selects in which layers to add adapters, e.g. [1,12] will add adapters to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + layer_selection: null # selects in which layers to add lora adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + + # Used for p-tuning peft training + p_tuning: + virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence + bottleneck_dim: 1024 # the size of the prompt encoder mlp bottleneck + embedding_dim: 1024 # the size of the prompt encoder embeddings + init_std: 0.023 + + ia3_tuning: + layer_selection: null # selects in which layers to add ia3 adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + + data: + test_ds: + file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: ??? # Names of the corresponding datasets used to log metrics. + global_batch_size: 1 #${model.global_batch_size} + micro_batch_size: 1 #${model.micro_batch_size} + shuffle: False + num_workers: 0 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: 'input' + label_key: ${data.train_ds.label_key} + add_eos: ${data.train_ds.add_eos} + add_sep: ${data.train_ds.add_sep} + add_bos: ${data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: ${data.train_ds.truncation_field} # Options: keys in prompt_template index_mapping_dir: null # Path to a directory to write index mapping files. + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${data.train_ds.prompt_template} + tokens_to_generate: 32 # decide how many tokens we want to generate to evaluate performance with string metrics + + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + +inference: + greedy: True # Whether or not to use sampling ; use greedy decoding otherwise + top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: 1.0 # sampling temperature + all_probs: False # whether return the log prob for all the tokens in vocab + repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. + min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. + compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + outfile_path: output.txt + compute_attention_mask: True + +# server-related configs +server: False # whether launch the API server +port: 5555 # the port number for the inference server +web_server: False # whether launch the web inference server +share: True # whether create a public URL +username: test # user name for web client +password: test2 # password for web client +web_port: 9889 # the port number of the web server 1058 +chat: False # use the chat interface +chatbot_config: + value: False # whether to inject the value attributes + attributes: + - name: Quality + min: 0 + max: 4 + key: quality + type: int + default: 4 + - name: Toxicity + min: 0 + max: 4 + key: toxcity + type: int + default: 0 + - name: Humor + min: 0 + max: 4 + key: humor + type: int + default: 0 + - name: Creativity + min: 0 + max: 4 + key: creativity + type: int + default: 0 + - name: Violence + min: 0 + max: 4 + key: violence + type: int + default: 0 + - name: Helpfulness + min: 0 + max: 4 + key: helpfulness + type: int + default: 4 + - name: Not_Appropriate + min: 0 + max: 4 + key: not_appropriate + type: int + default: 0 + - name: Language + choices: ['ar', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'eo', 'es', 'eu', 'fa', 'fi', 'fr', 'gl', 'he', 'hu', 'id', 'it', 'ja', 'ko', 'nb', 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sv', 'th', 'tr', 'uk', 'vi', 'zh'] + key: lang + type: list + default: en + + user: User + assistant: Assistant + system: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n" \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_tuning_config.yaml new file mode 100644 index 000000000000..d0ee2c417856 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_peft_tuning_config.yaml @@ -0,0 +1,220 @@ +name: megatron_t5_peft_${model.peft.peft_scheme}_tuning + +trainer: + devices: 1 + accelerator: gpu + num_nodes: 1 + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: 9999 + max_steps: 20000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 # frequency with which training steps are logged + val_check_interval: 200 # If is an int n > 1, will run val every n training steps, if a float 0.0 - 1.0 will run val every epoch fraction, e.g. 0.25 will run val every quarter epoch + gradient_clip_val: 1.0 + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: validation_${model.data.validation_ds.metric.name} + save_top_k: 1 + mode: min + save_nemo_on_train_end: True + filename: '${name}--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' + model_parallel_size: ${model.tensor_model_parallel_size} + always_save_nemo: False + save_best_model: True + create_early_stopping_callback: True + early_stopping_callback_params: + monitor: "val_loss" + mode: "min" + min_delta: 0.001 + patience: 10 + verbose: True + strict: False # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training. + +model: + seed: 1234 + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + + global_batch_size: 128 + micro_batch_size: 4 + restore_from_path: ??? # Path to an existing .nemo model you wish to add new tasks to or run inference with + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + save_nemo_on_validation_end: False # Saves an inference ready .nemo file every time a checkpoint is saved during training. + sync_batch_comm: False + megatron_amp_O2: False + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + ## Activation Checkpoint + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + activations_checkpoint_layers_per_pipeline: null + answer_only_loss: True + gradient_as_bucket_view: False + + hidden_dropout: 0.0 + attention_dropout: 0.0 + ffn_dropout: 0.0 + + peft: + peft_scheme: "adapter" # can be either adapter,ia3, or ptuning + restore_from_path: null + + # Used for adapter peft training + adapter_tuning: + type: 'parallel_adapter' # this should be either 'parallel_adapter' or 'linear_adapter' + adapter_dim: 32 + adapter_dropout: 0.0 + norm_position: 'pre' # This can be set to 'pre', 'post' or null, 'pre' is normally what is used. + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + norm_type: 'mixedfusedlayernorm' # IGNORED if layer_adapter is used, options are ['layernorm', 'mixedfusedlayernorm'] + layer_selection: null # selects in which layers to add adapters, e.g. [1,12] will add adapters to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + layer_selection: null # selects in which layers to add lora adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True + + # Used for p-tuning peft training + p_tuning: + virtual_tokens: 10 # The number of virtual tokens the prompt encoder should add at the start of the sequence + bottleneck_dim: 1024 # the size of the prompt encoder mlp bottleneck + embedding_dim: 1024 # the size of the prompt encoder embeddings + init_std: 0.023 + + ia3_tuning: + layer_selection: null # selects in which layers to add ia3 adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + + data: + train_ds: + # Example of how to specify paths to multiple datasets + # file_names: + # - /path/to/squad.jsonl + # - /path/to/mnli.jsonl + # - /path/to/boolq.jsonl + # Example of how each dataset is formatted + # {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'} + file_names: ??? # Path to a list of JSONL files corresponding to the source data. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: True + num_workers: 0 + memmap_workers: 2 + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: True + # Example of how to specify concat_sampling_probabilities + # concat_sampling_probabilities: + # - 0.5 + # - 0.25 + # - 0.25 + concat_sampling_probabilities: null # When providing a list of datasets, this arg defines the sampling probabilities from each dataset when strategy='random' + context_key: 'input' + label_key: 'output' + add_eos: True + add_sep: False + add_bos: False + separate_prompt_and_response_with_newline: False + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: "{input} {output}" # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}" + + validation_ds: + file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: null # Names of the corresponding datasets used to log metrics. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: False + num_workers: 0 + memmap_workers: ${model.data.train_ds.memmap_workers} + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: 'input' + label_key: 'output' + add_eos: ${model.data.train_ds.add_eos} + add_sep: ${model.data.train_ds.add_sep} + add_bos: ${model.data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${model.data.train_ds.prompt_template} # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}" + tokens_to_generate: 32 # decide how many tokens we want to generate to evaluate performance with string metrics + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + test_ds: + file_names: null # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds. + names: null # Names of the corresponding datasets used to log metrics. + global_batch_size: ${model.global_batch_size} + micro_batch_size: ${model.micro_batch_size} + shuffle: False + num_workers: 0 + memmap_workers: ${model.data.train_ds.memmap_workers} + pin_memory: True + max_seq_length: 2048 + min_seq_length: 1 + drop_last: False + context_key: 'input' + label_key: 'output' + add_eos: ${model.data.train_ds.add_eos} + add_sep: ${model.data.train_ds.add_sep} + add_bos: ${model.data.train_ds.add_bos} + separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline} + write_predictions_to_file: False + output_file_path_prefix: null # Prefix of the file to write predictions to. + truncation_field: "context" # Options: ['context', 'answer'] + index_mapping_dir: null # Path to a directory to write index mapping files. + prompt_template: ${model.data.train_ds.prompt_template} + tokens_to_generate: 32 # decide how many tokens we want to generate to evaluate performance with string metrics + metric: + name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss'] + average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported. + num_classes: null + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0.01 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 50 + min_lr: 0.0 # min_lr must be 0.0 for prompt learning when pipeline parallel > 1 + constant_steps: 0 # Constant steps should also be 0 when min_lr=0 + monitor: val_loss + reduce_on_plateau: false \ No newline at end of file diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py index ea368992bd5a..d08f4291321a 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py @@ -27,6 +27,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated mp.set_start_method("spawn", force=True) @@ -47,6 +48,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronGPTSFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_gpt_peft_tuning.py` and `megatron_gpt_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_gpt_adapter_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py index 54039a71f9f6..b09ff2c0c038 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -56,6 +57,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronGPTSFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_gpt_peft_tuning.py` and `megatron_gpt_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_gpt_adapter_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py index 5bf33b37ac72..2a6ca4b48049 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py @@ -26,6 +26,7 @@ from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated mp.set_start_method("spawn", force=True) @@ -46,6 +47,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronGPTSFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_gpt_peft_tuning.py` and `megatron_gpt_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_gpt_adapter_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py index 2e51da327ba7..a0ecc6e544e8 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -56,6 +57,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronGPTSFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_gpt_peft_tuning.py` and `megatron_gpt_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_gpt_ia3_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py index df6398e7790a..8098e62684ee 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py @@ -13,29 +13,29 @@ # limitations under the License. -import json -import os +import asyncio +import threading +from functools import partial +import torch import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from torch.utils.data import DataLoader +from omegaconf.omegaconf import OmegaConf + -from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel -from nemo.collections.nlp.models.nlp_model import NLPModel -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PEFTSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) +from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer +from nemo.collections.nlp.modules.common.text_generation_utils import generate +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder from nemo.core.config import hydra_runner from nemo.utils import logging +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True +except: + pass + mp.set_start_method("spawn", force=True) """ This is the script to run inference with a PEFT model or an SFT Model. @@ -69,115 +69,73 @@ """ +def use_inference_server(cfg, model, trainer): + if not HAVE_MEGATRON_CORE: + raise ValueError('Megatron-core needs to be installed to use this feature!') + + from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo + + trainer.test(model, dataloaders=None) + + if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: + if cfg.web_server: + if cfg.chat: + defaults = { + 'user': cfg.chatbot_config.user, + 'assistant': cfg.chatbot_config.assistant, + 'system': cfg.chatbot_config.system, + } + web_ui = partial( + get_chatbot_demo, + defaults=defaults, + value=cfg.chatbot_config.value, + attributes=cfg.chatbot_config.attributes, + ) + else: + web_ui = get_demo + loop = asyncio.new_event_loop() + thread = threading.Thread( + target=web_ui, daemon=True, args=(cfg.share, cfg.username, cfg.password, cfg.port, cfg.web_port, loop), + ) + thread.start() + server = MegatronServer(model.cuda()) + server.run("0.0.0.0", port=cfg.port) + + while True: + choice = torch.cuda.LongTensor(1) + torch.distributed.broadcast(choice, 0) + if choice[0].item() == 0: + generate(model.cuda()) + + @hydra_runner(config_path="conf", config_name="megatron_gpt_peft_eval_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f"\n{OmegaConf.to_yaml(cfg)}") - assert cfg.model.restore_from_path is not None - megatron_amp_O2 = cfg.model.get("megatron_amp_O2", False) - with_distributed_adam = False - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if cfg.trainer.precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=cfg.model.get("native_amp_init_scale", 2 ** 32), - growth_interval=cfg.model.get("native_amp_growth_interval", 1000), - hysteresis=cfg.model.get("hysteresis", 2), - enabled=False - if cfg.model.pipeline_model_parallel_size > 1 - else True, # turn off the grad scale for pipeline parallel LM model - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - if megatron_amp_O2 and not with_distributed_adam: - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device="cuda", scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device="cuda", scaler=scaler)) - - if cfg.get("cluster_type", None) == "BCP": - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - if cfg.model.peft.restore_from_path: - if cfg.model.peft.restore_from_path.endswith(".nemo"): - peft_model_cfg = MegatronGPTPEFTModel.restore_from( - restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True, - ) - elif cfg.model.peft.restore_from_hparams_path: # not a .nemo model we expect a hparams.yaml file - peft_model_cfg = OmegaConf.to_container(OmegaConf.load(cfg.model.peft.restore_from_hparams_path).cfg) - peft_model_cfg = OmegaConf.create(peft_model_cfg) - # extract dict inside cfg key and convert it to DictConfig - # this allows interpolation to work the same way as config from the .restore_from method - else: - raise RuntimeError("This script requires a .nemo peft model or path to hparams.yaml (and a ckpt path).") - else: - peft_model_cfg = MegatronGPTSFTModel.restore_from( - restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True, - ) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(peft_model_cfg): - # update the model config of the trained model with params we want to set at inference time. - peft_model_cfg.precision = cfg.trainer.precision - peft_model_cfg.data.test_ds = cfg.model.data.test_ds - peft_model_cfg.activations_checkpoint_granularity = None - peft_model_cfg.activations_checkpoint_method = None - peft_model_cfg.activations_checkpoint_layers_per_pipeline = None - if peft_model_cfg.get("use_flash_attention", False): - peft_model_cfg.use_flash_attention = cfg.model.use_flash_attention - if cfg.model.get("seq_len_interpolation_factor", None) is not None: - peft_model_cfg["seq_len_interpolation_factor"] = cfg.model.seq_len_interpolation_factor - - with open_dict(cfg): - # update the config with the trained model config - # required for hydra interpolation to work inside cfg.inference - cfg.inference.add_BOS = peft_model_cfg.data.test_ds.add_bos - cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate + trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer() if cfg.model.peft.restore_from_path: - if cfg.model.peft.restore_from_path.endswith(".nemo"): - save_restore_connector = PEFTSaveRestoreConnector( - peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None, - ) - else: - # attempting to load a ckpt peft model. - if cfg.model.peft.restore_from_ckpt_name: - ckpt_name = cfg.model.peft.restore_from_ckpt_name - else: - ckpt_name = "model_weights.ckpt" - save_restore_connector = PEFTSaveRestoreConnector( - peft_model_nemo_path=None, - peft_model_ckpt_path=cfg.model.peft.restore_from_path, - peft_model_ckpt_name=ckpt_name, - ) + model_cfg = MegatronGPTSFTModel.merge_inference_cfg(cfg.model.peft.restore_from_path, cfg) else: - save_restore_connector = NLPSaveRestoreConnector() + model_cfg = MegatronGPTSFTModel.merge_inference_cfg(cfg.model.restore_from_path, cfg) - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - model = MegatronGPTSFTModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=peft_model_cfg, - save_restore_connector=save_restore_connector, - ) + model = MegatronGPTSFTModel.restore_from(cfg.model.restore_from_path, model_cfg, trainer=trainer) + + if cfg.model.peft.restore_from_path: + model.load_adapters(cfg.model.peft.restore_from_path) model.freeze() + logging.info(f"Freezing parameters for PEFT eval:\n{model.summarize()}") + if not cfg.model.get('use_flash_attention', False): cfg.inference.compute_attention_mask = True config = OmegaConf.to_container(cfg.inference, resolve=True) model.set_inference_config(config) - trainer.test(model) + if not cfg.server: + trainer.test(model) + else: + use_inference_server(cfg, model, trainer) if __name__ == "__main__": diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py index 5628b02584d6..cffe974a071a 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py @@ -12,46 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import os -import tempfile - import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector -from torch.utils.data import DataLoader, Dataset +from omegaconf.omegaconf import OmegaConf + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder +from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP -from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import ( - MegatronGPTAdapterModel, - MegatronGPTAdapterModelWeightTying, - MegatronGPTAdapterPTuningModel, - MegatronGPTIA3Model, - MegatronGPTLoRAModel, - MegatronGPTLoRAModelWeightTying, - MegatronGPTPTuningModel, -) -from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTModel -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.parts.nlp_overrides import ( - CustomProgressBar, - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PEFTSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) from nemo.core.config import hydra_runner -from nemo.utils import AppState, logging +from nemo.utils import logging from nemo.utils.exp_manager import exp_manager -from nemo.utils.model_utils import inject_model_parallel_rank mp.set_start_method("spawn", force=True) """ -This is the script to train an Adapter infused GPT Model for text generation. +This is the script to finetuning a GPT Model with any PEFT method. A base GPT Model is required as a starting point. This script will then insert Adapters into each Transformer layer and will train/update only these adapters during training. The base GPT Model weights will remain frozen. @@ -63,189 +38,41 @@ Usage: Assuming the base model is a 125m GPT Model, with TP=1, PP=1: a. run a training run for a base gpt nemo file: - python megatron_gpt_adapter_tuning.py \ - "model.data.train_ds=[PATH TO TRAINING JSONL FILE]", - "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]", - model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE" + python megatron_gpt_peft_tuning.py \ + "model.data.train_ds.file_names=[PATH TO TRAINING JSONL FILE]", + "model.data.train_ds.concat_sampling_probabilities=[SAMPLING VAL]", + "model.data.validation_ds.file_names=[PATH TO VALIDATION JSONL FILE]", + "model.data.validation_ds.names=[NAME FOR METRIC LOGGING]", + model.restore_from_path="PATH TO BASE GPT MODEL .nemo FILE" + model.peft.peft_scheme='lora' # lora, ptuning, adapter, ia3, or none for full fineutning name="NAME OF TRAINING RUN" exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE", - trainer.max_epochs=2 +Please see lora.ipynb for a step-by-step guide. """ -def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): - """ - This function modifies the original gpt pre-training config (gpt_cfg) with attributes from the finetuning config (cfg). - The `add_cfg_to_tree` arg adds `cfg` to the top of the yaml tree which is needed for all `hparams.yaml` files when passed as an arg to `load_from_checkpoint()`. - """ - OmegaConf.set_struct(gpt_cfg, True) - OmegaConf.resolve(cfg) - with open_dict(gpt_cfg): - gpt_cfg.megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) - gpt_cfg.micro_batch_size = cfg.model.data.train_ds.micro_batch_size - gpt_cfg.global_batch_size = cfg.model.data.train_ds.global_batch_size - gpt_cfg.sequence_parallel = cfg.model.get("sequence_parallel", False) - gpt_cfg.activations_checkpoint_granularity = cfg.model.get("activations_checkpoint_granularity", None) - gpt_cfg.activations_checkpoint_num_layers = cfg.model.get("activations_checkpoint_num_layers", None) - gpt_cfg.activations_checkpoint_method = cfg.model.get("activations_checkpoint_method", None) - gpt_cfg.activations_checkpoint_layers_per_pipeline = cfg.model.get( - "activations_checkpoint_layers_per_pipeline", None - ) - gpt_cfg.data = cfg.model.data - gpt_cfg.optim = cfg.model.optim - gpt_cfg.precision = cfg.trainer.precision - gpt_cfg.answer_only_loss = cfg.model.answer_only_loss - gpt_cfg.restore_from_path = cfg.model.restore_from_path - gpt_cfg.resume_from_checkpoint = cfg.model.resume_from_checkpoint - gpt_cfg.save_nemo_on_validation_end = cfg.model.save_nemo_on_validation_end - gpt_cfg.gradient_as_bucket_view = cfg.model.gradient_as_bucket_view - gpt_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.0) - gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0) - gpt_cfg.ffn_dropout = cfg.model.ffn_dropout - gpt_cfg.peft = cfg.model.peft - peft_cls = _get_peft_scheme(cfg.model) - gpt_cfg.target = f"{peft_cls.__module__}.{peft_cls.__name__}" - - # This is needed when modifying a hparam file directly to load `.ckpt` files. - # This is not needed to modify the cfg in `.nemo` files. - if add_cfg_to_tree: - OmegaConf.resolve(gpt_cfg) - gpt_cfg.cfg = gpt_cfg - - return gpt_cfg - - -def _get_peft_scheme(cfg): - if cfg.peft.peft_scheme == "adapter": - if cfg.peft.adapter_tuning.weight_tying: - peft_cls = MegatronGPTAdapterModelWeightTying - else: - peft_cls = MegatronGPTAdapterModel - elif cfg.peft.peft_scheme == "ia3": - peft_cls = MegatronGPTIA3Model - elif cfg.peft.peft_scheme == "ptuning": - peft_cls = MegatronGPTPTuningModel - elif cfg.peft.peft_scheme == "adapter_and_ptuning": - peft_cls = MegatronGPTAdapterPTuningModel - elif cfg.peft.peft_scheme == "lora": - if cfg.peft.lora_tuning.weight_tying: - peft_cls = MegatronGPTLoRAModelWeightTying - else: - peft_cls = MegatronGPTLoRAModel - else: - raise RuntimeError("Invalid Peft scheme") - return peft_cls - - -def load_from_checkpoint_dir(cls, cfg, trainer, modify_confg_fn): - app_state = AppState() - if cfg.model.tensor_model_parallel_size > 1 or cfg.model.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.model.tensor_model_parallel_size * cfg.model.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = cfg.model.tensor_model_parallel_size - app_state.pipeline_model_parallel_size = cfg.model.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.model.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.model.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.model.pipeline_model_parallel_split_rank, - ) - checkpoint_path = inject_model_parallel_rank( - os.path.join(cfg.model.pretrained_checkpoint.checkpoint_dir, cfg.model.pretrained_checkpoint.checkpoint_name) - ) - hparams_file = OmegaConf.load(cfg.model.pretrained_checkpoint.hparams_file) - gpt_cfg = modify_confg_fn(hparams_file.cfg, cfg, add_cfg_to_tree=True) - with tempfile.NamedTemporaryFile(suffix='.yaml') as f: - OmegaConf.save(config=gpt_cfg, f=f.name) - model = cls.load_from_checkpoint(checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name,) - return model - - -def validate_checkpoint_loading_args(cfg): - if cfg.checkpoint_dir is None or not os.path.isdir(cfg.checkpoint_dir): - raise ValueError(f'Checkpoint directory {cfg.checkpoint_dir} does not exist or is not a directory.') - if cfg.checkpoint_name is None: - raise ValueError(f'Checkpoint name {cfg.checkpoint_name} is not valid.') - if cfg.hparams_file is None or not os.path.isfile(cfg.hparams_file): - raise ValueError(f'Hparams file {cfg.hparams_file} does not exist or is not a file.') - - @hydra_runner(config_path="conf", config_name="megatron_gpt_peft_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) - with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce - gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if cfg.trainer.precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32), - growth_interval=cfg.model.get('native_amp_growth_interval', 1000), - hysteresis=cfg.model.get('hysteresis', 2), - enabled=False - if cfg.model.pipeline_model_parallel_size > 1 - else True, # turn off the grad scale for pipeline parallel LM model - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - if megatron_amp_O2 and not with_distributed_adam: - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()]) + trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer() exp_manager(trainer, cfg.exp_manager) - # update resume from checkpoint found by exp_manager - if cfg.model.resume_from_checkpoint is not None: - trainer.ckpt_path = cfg.model.resume_from_checkpoint - logging.info(f'Resuming training from checkpoint: {trainer.ckpt_path}') - if cfg.model.restore_from_path: - base_model_save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(cfg.model.restore_from_path): - base_model_save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - base_model_cfg = MegatronGPTModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - return_config=True, - save_restore_connector=base_model_save_restore_connector, - ) - base_model_cfg = _modify_config(base_model_cfg, cfg, add_cfg_to_tree=False) - save_restore_connector = PEFTSaveRestoreConnector( - peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=trainer.ckpt_path - ) - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - peft_cls = _get_peft_scheme(cfg.model) - model = peft_cls.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=base_model_cfg, - save_restore_connector=save_restore_connector, - ) + model_cfg = MegatronGPTSFTModel.merge_cfg_with(cfg.model.restore_from_path, cfg) + model = MegatronGPTSFTModel.restore_from(cfg.model.restore_from_path, model_cfg, trainer=trainer) + peft_cfg_cls = PEFT_CONFIG_MAP[cfg.model.peft.peft_scheme] + + if cfg.model.peft.restore_from_path is not None: + # initialize peft weights from a checkpoint instead of randomly + # This is not the same as resume training because optimizer states are not restored. + logging.info("PEFT Weights will be loaded from", cfg.model.peft.restore_from_path) + model.load_adapters(cfg.model.peft.restore_from_path, peft_cfg_cls(model_cfg)) + elif peft_cfg_cls is not None: + logging.info("Adding adapter weights to the model for PEFT") + model.add_adapter(peft_cfg_cls(model_cfg)) else: - raise RuntimeError("PEFT training needs a trained base model present.") + logging.info(f"Running full finetuning since no peft scheme is given.\n{model.summarize()}") trainer.fit(model) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index 68f06a0da7da..07af2b887fb0 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -32,6 +32,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import AppState, logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager from nemo.utils.model_utils import inject_model_parallel_rank @@ -145,6 +146,10 @@ def validate_checkpoint_loading_args(cfg): raise ValueError(f'Hparams file {cfg.hparams_file} does not exist or is not a file.') +@deprecated( + explanation=f"{__file__} is deprecated. PEFT and SFT scripts are now consolidated" + "See updated scripts `megatron_gpt_peft_tuning.py` and `megatron_gpt_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_gpt_sft") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py index bdeec2b5a9c1..5fd07e85ce2d 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py @@ -25,6 +25,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils.app_state import AppState +from nemo.utils.decorators import deprecated mp.set_start_method("spawn", force=True) @@ -45,6 +46,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py index 1cebf8a993ff..96a8cba64863 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -56,6 +57,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_adapter_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py index 8a8ddae166e1..cc9dfef059b8 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_eval.py @@ -25,6 +25,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils.app_state import AppState +from nemo.utils.decorators import deprecated mp.set_start_method("spawn", force=True) @@ -45,6 +46,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_ia3_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py index 8ad64438b914..1edc87a416a4 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -56,6 +57,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_ia3_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py index d9de94843071..38032d06a8c8 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py @@ -25,6 +25,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils.app_state import AppState +from nemo.utils.decorators import deprecated mp.set_start_method("spawn", force=True) @@ -45,6 +46,10 @@ raise EnvironmentError("GPU is needed for the inference") +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference") def main(cfg) -> None: diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py index 57639bc70cb5..7178bf8145ba 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py @@ -29,6 +29,7 @@ ) from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.decorators import deprecated from nemo.utils.exp_manager import exp_manager mp.set_start_method("spawn", force=True) @@ -56,6 +57,10 @@ """ +@deprecated( + explanation=f"{__file__} is deprecated. Please use MegatronT5SFTModel.add_adapter() for PEFT features." + "See updated scripts `megatron_t5_peft_tuning.py` and `megatron_t5_peft_eval.py` for examples." +) @hydra_runner(config_path="conf", config_name="megatron_t5_lora_tuning_config") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py new file mode 100644 index 000000000000..d9d4d075362b --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py @@ -0,0 +1,135 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import asyncio +import threading +from functools import partial + +import torch +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf + + +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel +from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer +from nemo.collections.nlp.modules.common.text_generation_utils import generate +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder +from nemo.core.config import hydra_runner +from nemo.utils import logging + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + +mp.set_start_method("spawn", force=True) +""" +This is the script to run inference with a PEFT model or an SFT Model. + +If you want to evaluate an SFT .nemo file: + +python examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py \ + model.restore_from_path= \ + model.peft.restore_from_path=null \ + trainer.devices=1 model.data.test_ds.file_names=\[, ] \ + model.data.test_ds.names=\['name_for_test_file1', 'name_for_test_file2'] \ # this is not the filename just some identifier + model.data.test_ds.global_batch_size=4 \ # or some other value + model.data.test_ds.micro_batch_size=4 \ + model.data.test_ds.tokens_to_generate=30 \ + inference.greedy=True \ + inference.outfile_path=\'' + +If you want to evaluate a PEFT Model, you should provide a base T5 model and a PEFT model .nemo file + +python examples/nlp/language_modeling/tuning/megatron_t5_peft_eval.py \ + model.restore_from_path= \ + model.peft.restore_from_path= \ # this will be created if you use `megatron_t5_peft_tuning.py` + trainer.devices=1 model.data.test_ds.file_names=\[, ] \ + model.data.test_ds.names=\['name_for_test_file1', 'name_for_test_file2'] \ # this is not the filename just some identifier + model.data.test_ds.global_batch_size=4 \ # or some other value + model.data.test_ds.micro_batch_size=4 \ + model.data.test_ds.tokens_to_generate=30 \ + inference.greedy=True \ + inference.outfile_path=\'' + +""" + + +def use_inference_server(cfg, model, trainer): + if not HAVE_MEGATRON_CORE: + raise ValueError('Megatron-core needs to be installed to use this feature!') + + from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo + + trainer.test(model, dataloaders=None) + + if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0: + if cfg.web_server: + if cfg.chat: + defaults = { + 'user': cfg.chatbot_config.user, + 'assistant': cfg.chatbot_config.assistant, + 'system': cfg.chatbot_config.system, + } + web_ui = partial( + get_chatbot_demo, + defaults=defaults, + value=cfg.chatbot_config.value, + attributes=cfg.chatbot_config.attributes, + ) + else: + web_ui = get_demo + loop = asyncio.new_event_loop() + thread = threading.Thread( + target=web_ui, daemon=True, args=(cfg.share, cfg.username, cfg.password, cfg.port, cfg.web_port, loop), + ) + thread.start() + server = MegatronServer(model.cuda()) + server.run("0.0.0.0", port=cfg.port) + + while True: + choice = torch.cuda.LongTensor(1) + torch.distributed.broadcast(choice, 0) + if choice[0].item() == 0: + generate(model.cuda()) + + +@hydra_runner(config_path="conf", config_name="megatron_t5_peft_eval_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f"\n{OmegaConf.to_yaml(cfg)}") + trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer() + + model_cfg = MegatronT5SFTModel.merge_inference_cfg(cfg.model.peft.restore_from_path, cfg) + model = MegatronT5SFTModel.restore_from(cfg.model.restore_from_path, model_cfg, trainer=trainer) + + model.load_adapters(cfg.model.peft.restore_from_path) + + model.freeze() + logging.info(f"Freezing parameters for PEFT eval:\n{model.summarize()}") + + if not cfg.model.get('use_flash_attention', False): + cfg.inference.compute_attention_mask = True + + if not cfg.server: + trainer.test(model) + else: + use_inference_server(cfg, model, trainer) + + +if __name__ == "__main__": + main() diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_peft_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_peft_tuning.py new file mode 100644 index 000000000000..04e25956aed2 --- /dev/null +++ b/examples/nlp/language_modeling/tuning/megatron_t5_peft_tuning.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf + +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder +from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + +""" +This is the script to finetuning a T5 Model with any PEFT method. +A base T5 Model is required as a starting point. This script will then insert +Adapters into each Transformer layer and will train/update only these adapters +during training. The base T5 Model weights will remain frozen. + +This script is exactly the same as the peft tuning script for GPT. For more details +please refer to the GPT script and docs. +""" + + +@hydra_runner(config_path="conf", config_name="megatron_t5_peft_tuning_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + model_cfg = MegatronT5SFTModel.merge_cfg_with(cfg.model.restore_from_path, cfg) + model = MegatronT5SFTModel.restore_from(cfg.model.restore_from_path, model_cfg, trainer=trainer) + peft_cfg_cls = PEFT_CONFIG_MAP[cfg.model.peft.peft_scheme] + + if cfg.model.peft.restore_from_path is not None: + # initialize peft weights from a checkpoint instead of randomly + # This is not the same as resume training because optimizer states are not restored. + logging.info("PEFT Weights will be loaded from", cfg.model.peft.restore_from_path) + model.load_adapters(cfg.model.peft.restore_from_path, peft_cfg_cls(model_cfg)) + elif peft_cfg_cls is not None: + logging.info("Adding adapter weights to the model for PEFT") + model.add_adapter(peft_cfg_cls(model_cfg)) + else: + logging.info(f"Running full finetuning since no peft scheme is given.\n{model.summarize()}") + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/nlp/data/language_modeling/megatron/t5_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/t5_sft_dataset.py new file mode 100644 index 000000000000..7e60fdd11f80 --- /dev/null +++ b/nemo/collections/nlp/data/language_modeling/megatron/t5_sft_dataset.py @@ -0,0 +1,169 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import os +from typing import Optional + +import numpy as np +import torch +from datasets import load_dataset + +from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec +from nemo.collections.nlp.data.language_modeling.text_memmap_dataset import JSONLMemMapDataset +from nemo.core.classes import Dataset +from nemo.utils import logging + +__all__ = ['T5SFTDataset'] + + +class T5SFTDataset(Dataset): + """Sequence to Sequence Dataset in memory. + Similar to SequenceToSequenceDataset but with the same input format as GPTSFTDataset + """ + + def __init__( + self, + file_path: str, + src_tokenizer: TokenizerSpec, + tgt_tokenizer: TokenizerSpec, + max_src_seq_length: int, + max_tgt_seq_length: int, + add_bos_to_input: bool = True, + add_eos_to_input: bool = True, + replace_bos_with_pad: bool = False, + index_mapping_dir: str = None, + memmap_workers: Optional[int] = None, + hf_dataset: bool = False, + ): + """ + index_mapping_dir: Directory to save the index mapping to. If None, will write to the same folder as the dataset. + hf_dataset: Whether to load the json file with the HuggingFace dataset. otherwise, will load the jsonl file with the JSONLMemMapDataset. + """ + super().__init__() + self.file_path = file_path + self.src_tokenizer = src_tokenizer + self.tgt_tokenizer = tgt_tokenizer + self.max_src_seq_length = max_src_seq_length + self.max_tgt_seq_length = max_tgt_seq_length + self.add_bos_to_input = add_bos_to_input + self.add_eos_to_input = add_eos_to_input + self.replace_bos_with_pad = replace_bos_with_pad + assert self.max_src_seq_length > 0 + assert self.max_tgt_seq_length > 0 + + # check file exists + if not os.path.exists(self.file_path): + raise FileNotFoundError(f"Data file {self.file_path} not found") + + if hf_dataset: + self.indexed_dataset = load_dataset( + 'json', data_files=file_path, cache_dir=index_mapping_dir, num_proc=memmap_workers, split='train' + ) + else: + self.indexed_dataset = JSONLMemMapDataset( + dataset_paths=[file_path], + tokenizer=None, + header_lines=0, + index_mapping_dir=index_mapping_dir, + workers=memmap_workers, + ) + + def _process_src(self, src): + src = self.src_tokenizer.text_to_ids(src.strip()) + if self.add_bos_to_input: + src = [self.src_tokenizer.pad_id if self.replace_bos_with_pad else self.src_tokenizer.bos_id] + src + if self.add_eos_to_input: + src = src + [self.src_tokenizer.eos_id] + if len(src) > self.max_src_seq_length: + src = src[-self.max_src_seq_length + 1 :] + return src + + def _process_tgt(self, tgt): + tgt = ( + [self.tgt_tokenizer.pad_id if self.replace_bos_with_pad else self.tgt_tokenizer.bos_id] + + self.tgt_tokenizer.text_to_ids(tgt.strip()) + + [self.tgt_tokenizer.eos_id] + ) + if len(tgt) > self.max_tgt_seq_length: + tgt = tgt[-self.max_tgt_seq_length + 1 :] + return tgt + + def __len__(self): + return len(self.indexed_dataset) + + def __getitem__(self, idx): + example = self.indexed_dataset[idx] + text_enc = self._process_src(example['input']) + tgt = self._process_tgt(example['output']) + text_dec = tgt[:-1] + labels = tgt[1:] + return {'text_enc': text_enc, 'text_dec': text_dec, 'labels': labels} + + def collate_fn(self, batch): + text_enc = [item['text_enc'] for item in batch] + text_dec = [item['text_dec'] for item in batch] + labels = [item['labels'] for item in batch] + + if isinstance(text_enc[0], np.ndarray): + text_enc = [x.tolist() for x in text_enc] + + if isinstance(text_dec[0], np.ndarray): + text_dec = [x.tolist() for x in text_dec] + + if isinstance(labels[0], np.ndarray): + labels = [x.tolist() for x in labels] + + max_dec_input_length = max([len(item) for item in text_dec]) if text_dec else 0 + max_enc_input_length = max([len(item) for item in text_enc]) if text_enc else 0 + max_label_length = max([len(item) for item in labels]) if labels else 0 + + loss_mask = [([1] * (len(item))) + ([0] * (max_label_length - len(item))) for item in labels] + text_enc = [item + [self.src_tokenizer.pad_id] * (max_enc_input_length - len(item)) for item in text_enc] + text_dec = [item + [self.tgt_tokenizer.pad_id] * (max_dec_input_length - len(item)) for item in text_dec] + labels = [item + [self.tgt_tokenizer.pad_id] * (max_label_length - len(item)) for item in labels] + + text_enc = torch.LongTensor(text_enc) + text_dec = torch.LongTensor(text_dec) + labels = torch.LongTensor(labels) + loss_mask = torch.LongTensor(loss_mask) + + enc_mask = (text_enc != self.src_tokenizer.pad_id).long() + dec_mask = (text_dec != self.tgt_tokenizer.pad_id).long() + + return { + 'text_enc': text_enc, + 'text_dec': text_dec, + 'labels': labels, + 'loss_mask': loss_mask, + 'enc_mask': enc_mask, + 'dec_mask': dec_mask, + } + + +def convert_data_file_format(src_file_name, tgt_file_name, output_file_name): + """ + Converts the old two-file format used by SequenceToSequenceDataset to the new JSONL format used by T5SFTDataset + """ + output_lines = [] + with open(src_file_name, encoding='utf8') as f_src, open(tgt_file_name, encoding='utf8') as f_tgt: + for i, (src, tgt) in enumerate(zip(f_src, f_tgt)): + if i % 10000 == 0 and i != 0: + logging.info(f"Read {i} lines from {src_file_name} & {tgt_file_name}") + output_lines.append({'input': src, 'output': tgt}) + + logging.info(f'Dataset Length : {len(output_lines)}') + + with open(output_file_name, "w") as f_json: + for line in output_lines: + f_json.write(json.dumps(line) + '\n') diff --git a/nemo/collections/nlp/models/language_modeling/megatron_glue_model.py b/nemo/collections/nlp/models/language_modeling/megatron_glue_model.py index 5cc0f7ea3a32..c0a4b6351530 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_glue_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_glue_model.py @@ -18,7 +18,7 @@ TextToTextGLUEDataset, TextToTextXNLIDataset, ) -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.utils import logging try: @@ -33,8 +33,8 @@ __all__ = ['MegatronT5GLUEModel'] -class MegatronT5GLUEModel(MegatronT5FinetuneModel): - """GLUE Model that Inherits from MegatronT5FinetuneModel and overrides the dataset building.""" +class MegatronT5GLUEModel(MegatronT5SFTModel): + """GLUE Model that Inherits from MegatronT5SFTModel and overrides the dataset building.""" def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 0a8a93ebac52..db983e5f5e01 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -196,9 +196,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) - if not HAVE_MEGATRON_CORE: - raise ImportError( + logging.warning( "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) # this prevents base constructor from initializing tokenizer diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py index 0c3b1adc8dbd..281c451a1e7a 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -35,6 +35,7 @@ ) from nemo.core.classes.mixins import adapter_mixins from nemo.utils import logging, model_utils +from nemo.utils.decorators import deprecated try: from megatron.core import parallel_state @@ -46,6 +47,10 @@ HAVE_MEGATRON_CORE = False +@deprecated( + explanation="Please use MegatronGPTSFTModel.add_adapter() for PEFT features." + "See the updated `megatron_gpt_peft_tuning.py` for an example." +) class MegatronGPTPEFTModel(MegatronGPTSFTModel): """ base class for all mixin based adapter models @@ -72,7 +77,7 @@ def first_stage_of_pipeline(self): return False def init_peft_modules(self): - """ + """ Randomly initialize the peft params and add them to the appropriate modules. """ assert len(self.peft_name_keys) > 0, "peft_name_keys have not been set no PEFT modules will be added" @@ -123,7 +128,7 @@ def setup(self, stage=None): self.setup_complete = True def get_all_keys(self,): - """ + """ Returns all the keys in the model """ k = [n for n, p in self.named_parameters()] @@ -132,7 +137,7 @@ def get_all_keys(self,): return set(k + b) def get_peft_state_dict(self,): - """ + """ Gets the keys associated with the adapters only. """ state_dict = self.model.state_dict(prefix="model.module." if self.cfg.megatron_amp_O2 else "model.") @@ -214,13 +219,13 @@ def on_load_checkpoint(self, checkpoint) -> None: def setup_optimizer_param_groups(self): """ - ModelPT override. Optimizer will get self._optimizer_param_groups. + ModelPT override. Optimizer will get self._optimizer_param_groups. Makes two optimizer param groups, one for the frozen model params - and one for the prompt-table/prompt-encoder params. The learning + and one for the prompt-table/prompt-encoder params. The learning rate for the frozen model's params will always be zero effectively freezing the model's params but still allowing for the needed gradients - to be passed around in pipeline parallel models. The prompt-encoder - and/or prompt table will use the learning rate set by the user. + to be passed around in pipeline parallel models. The prompt-encoder + and/or prompt table will use the learning rate set by the user. """ self.freeze() # Freeze the entire model opt_params = [] @@ -240,7 +245,7 @@ def __init__( super().__init__(cfg, trainer) def init_peft_modules(self): - """ + """ Randomly initialize the peft params and add them to the appropriate modules. """ assert len(self.peft_name_keys) > 0, "peft_name_keys have not been set no PEFT modules will be added" @@ -303,8 +308,8 @@ class MegatronGPTAdapterModel(MegatronGPTLayerwisePEFTModel): Two adapter's are inserted into each Transformer layer in the base GPT Model. It is assumed that these set of adapters will then be trained for a specific task. - Once trained, the adapter weights will be saved and can be re-loaded - and infused into the same GPT Model for inference. + Once trained, the adapter weights will be saved and can be re-loaded + and infused into the same GPT Model for inference. """ def __init__( @@ -340,7 +345,7 @@ def __init__( class MegatronGPTAdapterModelWeightTying(MegatronGPTLayerwisePEFTModel): """ - TODO + TODO """ def __init__( @@ -420,8 +425,8 @@ class MegatronGPTIA3Model(MegatronGPTLayerwisePEFTModel): Three adapter's are inserted into each Transformer layer in the base GPT Model. Each adapter is basically a vector that simply scales the key, value or ffn hidden representations. It is assumed that these set of adapters will then be trained for a specific task. - Once trained, the adapter weights will be saved and can be re-loaded - and infused into the same GPT Model for inference. + Once trained, the adapter weights will be saved and can be re-loaded + and infused into the same GPT Model for inference. """ def __init__(self, cfg: DictConfig, trainer: Trainer): @@ -453,7 +458,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): class MegatronGPTPTuningModel(MegatronGPTPEFTModel): """ - MegatronGPTPTuningModel is a model that combines a base model (GPTSFTModel) with a p-tuning prefix in the + MegatronGPTPTuningModel is a model that combines a base model (GPTSFTModel) with a p-tuning prefix in the input word embedding representations using a prompt-encoder as descripted in Liu et al. https://arxiv.org/pdf/2103.10385.pdf The mixin framework adds the output of prompt-encoder (i.e. the virtual embeddings) inside @@ -476,7 +481,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens def init_peft_modules(self,): - """ + """ Initialize the p-tuning prompt encoder in the mixin. This should only happen in the first stage of the pipeline unlike other PEFT methods like Lora or Adapters because p-tuning only adds params at input to the encoder layer. @@ -488,7 +493,7 @@ def init_peft_modules(self,): return True def state_dict(self, destination=None, prefix=None, keep_vars=False): - """ + """ Reimplement state_dict for ptuning because we also need to check the stage of the pipeline. The check is required to make pp>1 to work. """ @@ -502,7 +507,7 @@ def state_dict(self, destination=None, prefix=None, keep_vars=False): return self.model.state_dict(prefix="model.") def load_state_dict(self, state_dict, strict: bool = True): - """ + """ Reimplement load_state_dict for ptuning because we also need to check the stage of the pipeline. The check is required to make pp>1 to work. """ @@ -638,7 +643,7 @@ def __init__( class MegatronGPTLoRAModelWeightTying(MegatronGPTLayerwisePEFTModel): """ - TODO + TODO """ def __init__( diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index 11dd0034dc24..df84b02cc7b4 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -32,13 +32,9 @@ ) from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.megatron.utils import get_iterator_k_split -from nemo.collections.nlp.modules.common.text_generation_utils import ( - LengthParam, - SamplingParam, - generate, - get_computeprob_response, - megatron_gpt_generate, -) +from nemo.collections.nlp.modules.common.text_generation_utils import generate, get_computeprob_response + +from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin from nemo.collections.nlp.parts.utils_funcs import get_last_rank from nemo.utils import AppState, logging @@ -68,7 +64,7 @@ __all__ = ['MegatronGPTSFTModel'] -class MegatronGPTSFTModel(MegatronGPTModel): +class MegatronGPTSFTModel(NLPAdapterModelMixin, MegatronGPTModel): """ Megatron GPT Supervised Fine-Tuning """ @@ -169,6 +165,7 @@ def setup(self, stage=None): # NOTE: super().__init__ will try and setup train/val/test datasets, but we sidestep this using a if self._train_ds is not None condition # We then set things up for real only once setup() of this class is called. resume_checkpoint_path = self.trainer.ckpt_path + self.setup_complete = True if resume_checkpoint_path: init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) else: @@ -209,6 +206,7 @@ def setup(self, stage=None): if self.cfg.get('transformer_engine', False): self.setup_transformer_engine_tp_groups() + self.setup_complete = True def _build_dataset(self, data_cfg, is_train=True): datasets = [] @@ -564,7 +562,7 @@ def inference_epoch_end(self, outputs, mode, data_cfg): # Logging of the averaged metrics: averaged_loss = sum(averaged_loss) / len(averaged_loss) - averaged_metric = sum(averaged_metric) / len(averaged_metric) if len(averaged_metric) > 1 else None + averaged_metric = sum(averaged_metric) / len(averaged_metric) if len(averaged_metric) >= 1 else None # Handle case where metrics can be nan or inf. This can break checkpoint save/load. if averaged_metric is not None and (torch.isinf(averaged_metric) or torch.isnan(averaged_metric)): @@ -831,6 +829,9 @@ def on_test_epoch_start(self): ) return super().on_test_epoch_start() + def on_predict_epoch_start(self): + return self.on_test_epoch_start() + def on_test_epoch_end(self): _ = self.inference_epoch_end(self.test_step_outputs, 'test', self.cfg.data.test_ds) # Commenting as on_test_epoch_end was a no-op in PTL 1.9 diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t0_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t0_model.py index fb62dc0db8ee..4d4d80b71a98 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t0_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t0_model.py @@ -23,7 +23,7 @@ MegatronPretrainingBatchSampler, ) from nemo.collections.nlp.data.language_modeling.t0_dataset import T0Dataset -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.utils import AppState, logging try: @@ -48,8 +48,8 @@ __all__ = ['MegatronT0Model'] -class MegatronT0Model(MegatronT5FinetuneModel): - """T0 (https://arxiv.org/abs/2110.08207) Model that Inherits from MegatronT5FinetuneModel and overrides the dataset building.""" +class MegatronT0Model(MegatronT5SFTModel): + """T0 (https://arxiv.org/abs/2110.08207) Model that Inherits from MegatronT5SFTModel and overrides the dataset building.""" def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) @@ -154,7 +154,7 @@ def _build_dataset(self, data_cfg, check_implict_grad_acc=False, is_train=True): return datasets def training_step(self, dataloader_iter, batch_idx): - return super(MegatronT5FinetuneModel, self).training_step(dataloader_iter, batch_idx) + return super().training_step(dataloader_iter, batch_idx) # Override the parent batch reconfiguring logic. def _reconfigure_and_process_inference_batch(self, batch): @@ -236,10 +236,10 @@ def setup_eval_dataloader(self, datasets, data_cfg): # TODO: Temporary overrides of finetune model. This needs to removed in the finetune model. def on_train_start(self) -> None: - super(MegatronT5FinetuneModel, self).on_train_start() + super().on_train_start() def on_validation_start(self) -> None: - super(MegatronT5FinetuneModel, self).on_validation_start() + super().on_validation_start() def on_test_start(self) -> None: - super(MegatronT5FinetuneModel, self).on_test_start() + super().on_test_start() diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py index eaf4004e7371..d1332831ef1d 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py @@ -26,11 +26,11 @@ from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.common.parts.adapter_modules import LinearAdapterConfig -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import ( MegatronT5PromptLearningModel, ) +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.collections.nlp.modules.common import VirtualPromptStyle from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( AdapterName, @@ -183,11 +183,11 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A ) # Special ids to text function to handle stripping and special tokens with sentencepiece tokenizers. - preds_text = MegatronT5FinetuneModel.ids_to_text(predicted_token_ids, self.tokenizer) - input_text = MegatronT5FinetuneModel.ids_to_text(enc_input, self.tokenizer) + preds_text = MegatronT5SFTModel.ids_to_text(predicted_token_ids, self.tokenizer) + input_text = MegatronT5SFTModel.ids_to_text(enc_input, self.tokenizer) if labels is not None: - labels_text = MegatronT5FinetuneModel.ids_to_text(labels, self.tokenizer) + labels_text = MegatronT5SFTModel.ids_to_text(labels, self.tokenizer) else: labels_text = [None] * len(preds_text) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index bcfa8c5d6a4c..f13be45db836 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -25,8 +25,8 @@ from nemo.collections.nlp.models.language_modeling.megatron_base_prompt_learning_model import ( MegatronBasePromptLearningModel, ) -from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model +from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel from nemo.collections.nlp.modules.common.megatron.utils import ( average_losses_across_data_parallel_group, get_iterator_k_split, @@ -296,9 +296,9 @@ def get_predictions(self, input_ids, enc_mask, encoder_input, labels): else self.tokenizer.bos_id, ) # Special ids to text function to handle stripping and special tokens with sentencepiece tokenizers. - preds_text = MegatronT5FinetuneModel.ids_to_text(predicted_token_ids, self.tokenizer) - labels_text = MegatronT5FinetuneModel.ids_to_text(labels, self.tokenizer) - input_text = MegatronT5FinetuneModel.ids_to_text(input_ids, self.tokenizer) + preds_text = MegatronT5SFTModel.ids_to_text(predicted_token_ids, self.tokenizer) + labels_text = MegatronT5SFTModel.ids_to_text(labels, self.tokenizer) + input_text = MegatronT5SFTModel.ids_to_text(input_ids, self.tokenizer) return { 'predicted_token_ids': preds_text, 'labels': labels_text, @@ -482,11 +482,11 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A else self.tokenizer.bos_id, ) # Special ids to text function to handle stripping and special tokens with sentencepiece tokenizers. - preds_text = MegatronT5FinetuneModel.ids_to_text(predicted_token_ids, self.tokenizer) - input_text = MegatronT5FinetuneModel.ids_to_text(input_ids, self.tokenizer) + preds_text = MegatronT5SFTModel.ids_to_text(predicted_token_ids, self.tokenizer) + input_text = MegatronT5SFTModel.ids_to_text(input_ids, self.tokenizer) if labels is not None: - labels_text = MegatronT5FinetuneModel.ids_to_text(labels, self.tokenizer) + labels_text = MegatronT5SFTModel.ids_to_text(labels, self.tokenizer) else: labels_text = [None] * len(preds_text) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_sft_model.py similarity index 74% rename from nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py rename to nemo/collections/nlp/models/language_modeling/megatron_t5_sft_model.py index b95017a17302..22483731a534 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_sft_model.py @@ -23,8 +23,11 @@ from nemo.collections.common.metrics import MetricStringToTorchMetric from nemo.collections.common.metrics.classification_accuracy import ExactStringPerCategoryMatchMetric from nemo.collections.nlp.data.common.sequence_to_sequence_dataset import SequenceToSequenceDataset +from nemo.collections.nlp.data.language_modeling.megatron.t5_sft_dataset import T5SFTDataset from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model, T5Sentinel from nemo.collections.nlp.modules.common.megatron.utils import get_iterator_k_split + +from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin from nemo.collections.nlp.parts.utils_funcs import get_last_rank from nemo.utils import AppState, logging @@ -50,19 +53,25 @@ HAVE_MEGATRON_CORE = False -__all__ = ['MegatronT5FinetuneModel'] +__all__ = ['MegatronT5SFTModel'] -class MegatronT5FinetuneModel(MegatronT5Model): - """Finetune Model that Inherits from MegatronT5Model instead.""" +class MegatronT5SFTModel(NLPAdapterModelMixin, MegatronT5Model): + """ T5 Finetuning model in the same format as MegatronGPTSFTModel """ def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) super().__init__(cfg, trainer=trainer) - self.val_metric, self.val_metric_name = self.setup_metric(self.cfg.data.validation_ds) - self.val_metric = torch.nn.ModuleList(self.val_metric) + self.val_metric = self.test_metric = None + if hasattr(self.cfg.data, "validation_ds"): + self.val_metric, self.val_metric_name = self.setup_metric(self.cfg.data.validation_ds) + self.val_metric = torch.nn.ModuleList(self.val_metric) if self.val_metric is not None else None if hasattr(self.cfg.data, "test_ds"): self.test_metric, self.test_metric_name = self.setup_metric(self.cfg.data.test_ds) - self.test_metric = torch.nn.ModuleList(self.test_metric) + self.test_metric = torch.nn.ModuleList(self.test_metric) if self.test_metric is not None else None def setup_metric(self, data_cfg): # XNLI is a special case. @@ -71,10 +80,12 @@ def setup_metric(self, data_cfg): metric = [ExactStringPerCategoryMatchMetric(self.cfg.eval_languages)] else: if not hasattr(data_cfg, "metric"): - metric = MetricStringToTorchMetric["exact_string_match"] + metric_class = MetricStringToTorchMetric["exact_string_match"] else: if not hasattr(data_cfg.metric, "name"): raise ValueError("Metric name is not provided in the metric config.") + if data_cfg.metric.name == "loss": + return None, "loss" if data_cfg.metric.name not in MetricStringToTorchMetric: raise KeyError( f"{data_cfg.metric.name} is not supported. List of supported metrics: {MetricStringToTorchMetric.keys()}" @@ -104,9 +115,8 @@ def setup_metric(self, data_cfg): raise ValueError( f"Number of class labels {len(data_cfg.metric.get('class_labels', None))} does not match `num_classes` : {data_cfg.metric.num_classes}" ) - - metric_name = data_cfg.metric.name - metric_class = MetricStringToTorchMetric[metric_name] + metric_name = data_cfg.metric.name + metric_class = MetricStringToTorchMetric[metric_name] # GLUE will not have a "src_file_name" attribute and will always have only a single metric. if hasattr(data_cfg, "src_file_name") or hasattr(data_cfg, "file_names"): @@ -143,6 +153,10 @@ def setup_metric(self, data_cfg): def _metrics_require_string2category_map(self): return set(["f1", "accuracy", "average_precision"]) + @property + def model(self): + return self.enc_dec_model + def setup(self, stage=None): # This is just to keep the parent class happy since we override its setup() method. self.init_consumed_samples = 0 @@ -158,6 +172,7 @@ def setup(self, stage=None): self.setup_test_data() if hasattr(self, '_train_ds'): self.setup_training_data() + self.setup_complete = True def on_validation_epoch_start(self): app_state = AppState() @@ -322,30 +337,31 @@ def inference_step(self, dataloader_iter, batch_idx: int, mode: str, dataloader_ ) # Special ids to text function to handle stripping and special tokens with sentencepiece tokenizers. - preds_text = MegatronT5FinetuneModel.ids_to_text(predicted_token_ids, self.tokenizer) - labels_text = MegatronT5FinetuneModel.ids_to_text(batch['labels'], self.tokenizer) - input_text = MegatronT5FinetuneModel.ids_to_text(batch['text_enc'], self.tokenizer) + preds_text = MegatronT5SFTModel.ids_to_text(predicted_token_ids, self.tokenizer) + labels_text = MegatronT5SFTModel.ids_to_text(batch['labels'], self.tokenizer) + input_text = MegatronT5SFTModel.ids_to_text(batch['text_enc'], self.tokenizer) if not batch_has_lang_information: categories = [None] * len(preds_text) else: categories = batch['lang'] - metric = self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx] - assert len(categories) == len(preds_text) == len(labels_text) - for _, (pred, label, category) in enumerate(zip(preds_text, labels_text, categories)): - # To compute metrics like pearson or spearman correlation, we need to cast the predicted string and labels to floats. - pred, label = self.cast_for_metric( - pred=pred, - label=label, - metric_name=self.val_metric_name if mode == 'validation' else self.test_metric_name, - class_labels=data_cfg.metric.get('class_labels', None), - labels_are_strings=data_cfg.metric.get('labels_are_strings', False), - ) - if batch_has_lang_information: - _ = metric(pred, label, category) - else: - _ = metric(pred, label) + if self.val_metric is not None or self.test_metric is not None: + metric = self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx] + assert len(categories) == len(preds_text) == len(labels_text) + for _, (pred, label, category) in enumerate(zip(preds_text, labels_text, categories)): + # To compute metrics like pearson or spearman correlation, we need to cast the predicted string and labels to floats. + pred, label = self.cast_for_metric( + pred=pred, + label=label, + metric_name=self.val_metric_name if mode == 'validation' else self.test_metric_name, + class_labels=data_cfg.metric.get('class_labels', None), + labels_are_strings=data_cfg.metric.get('labels_are_strings', False), + ) + if batch_has_lang_information: + _ = metric(pred, label, category) + else: + _ = metric(pred, label) outputs = { 'preds': preds_text, @@ -435,40 +451,40 @@ def inference_epoch_end(self, outputs, mode, data_cfg): torch.distributed.broadcast(loss, get_last_rank()) self.log('val_loss', loss, prog_bar=True, rank_zero_only=True, batch_size=1) self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) + averaged_loss.append(loss) # Determine the key used to log the loss based on the user provided name of the dataset or the dataloader index. loss_log_key = self._determine_log_key(data_cfg, dataloader_idx, "loss", mode) - # Determine the key used to log the eval metric based on the user provided name of the dataset or the dataloader index. - metric_log_key = self._determine_log_key(data_cfg, dataloader_idx, metric_name, mode) - self.log(loss_log_key, loss, batch_size=1) - metric_object = ( - self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx] - ) - metric = metric_object.compute() - if metric_name == 'rouge': - metric = metric['rouge1_fmeasure'] - # Handle logging of GLUE/XNLI separately here. XNLI has a separate metric per language. - if isinstance(metric, dict): - # GLUE case: - if len(metric) == 1 and 'acc' in metric: - metric = metric['acc'] - self.log(metric_log_key, metric, batch_size=1) - logging.info(f"{mode} {metric_name}: {metric}") - # XNLI case where the metric dictionary contains the language and the computed metric as values. - else: - for k, v in metric.items(): - if k != 'acc' and 'total' not in k: - self.log(metric_log_key + f'_{k}', v, batch_size=1) - logging.info(f"{mode} {metric_name} lang {k} : {v}") - if metric_name != 'rouge': + if metric_name != 'loss': + # Determine the key used to log the eval metric based on the user provided name of the dataset or the dataloader index. + metric_log_key = self._determine_log_key(data_cfg, dataloader_idx, metric_name, mode) + self.log(loss_log_key, loss, batch_size=1) + metric_object = ( + self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx] + ) + metric = metric_object.compute() + if metric_name == 'rouge': + metric = metric['rouge1_fmeasure'] + # Handle logging of GLUE/XNLI separately here. XNLI has a separate metric per language. + if isinstance(metric, dict): + # GLUE case: + if len(metric) == 1 and 'acc' in metric: metric = metric['acc'] - else: - self.log(metric_log_key, metric, batch_size=1) - logging.info(f"{metric_log_key}: {metric}") - metric_object.reset() - - averaged_loss.append(loss) - averaged_metric.append(metric) + self.log(metric_log_key, metric, batch_size=1) + logging.info(f"{mode} {metric_name}: {metric}") + # XNLI case where the metric dictionary contains the language and the computed metric as values. + else: + for k, v in metric.items(): + if k != 'acc' and 'total' not in k: + self.log(metric_log_key + f'_{k}', v, batch_size=1) + logging.info(f"{mode} {metric_name} lang {k} : {v}") + if metric_name != 'rouge': + metric = metric['acc'] + else: + self.log(metric_log_key, metric, batch_size=1) + logging.info(f"{metric_log_key}: {metric}") + metric_object.reset() + averaged_metric.append(metric) # Write predictions, labels, and inputs to a file for each validation/test dataset. if data_cfg.get("write_predictions_to_file", False): @@ -479,7 +495,7 @@ def inference_epoch_end(self, outputs, mode, data_cfg): f"Cannot write predictions to file when output_file_path_prefix is not set or present in the yaml config file." ) - # Gather the outputs object from all data parallel ranks since we are using the DistributedSampler which splits data across DDP ranks. + # Gather the outputs object from all data parallel ranks since we are using the DistributedSampler which splits data across DDPDDP ranks. gathered_outputs = [None for _ in range(parallel_state.get_data_parallel_world_size())] torch.distributed.all_gather_object( gathered_outputs, @@ -527,10 +543,10 @@ def inference_epoch_end(self, outputs, mode, data_cfg): # Logging of the averaged metrics: averaged_loss = sum(averaged_loss) / len(averaged_loss) - averaged_metric = sum(averaged_metric) / len(averaged_metric) + averaged_metric = sum(averaged_metric) / len(averaged_metric) if len(averaged_metric) >= 1 else None # Handle case where metrics can be nan or inf. This can break checkpoint save/load. - if torch.isinf(averaged_metric) or torch.isnan(averaged_metric): + if averaged_metric is not None and (torch.isinf(averaged_metric) or torch.isnan(averaged_metric)): app_state = AppState() monitor_mode = app_state.checkpoint_callback_params.mode assert monitor_mode in ['min', 'max'] @@ -538,10 +554,12 @@ def inference_epoch_end(self, outputs, mode, data_cfg): if mode == 'validation': self.log("validation_loss", averaged_loss, batch_size=1) - self.log(f"validation_{self.val_metric_name}", averaged_metric, batch_size=1) + if averaged_metric is not None: + self.log(f"validation_{self.val_metric_name}", averaged_metric, batch_size=1) elif mode == 'test': self.log("test_loss", averaged_loss, batch_size=1) - self.log(f"test_{self.test_metric_name}", averaged_metric, batch_size=1) + if averaged_metric is not None: + self.log(f"test_{self.test_metric_name}", averaged_metric, batch_size=1) app_state = AppState() if hasattr(self, "_train_ds"): @@ -635,7 +653,9 @@ def setup_eval_data(self, datasets, data_cfg): for dataset in datasets: eval_dl = self.build_data_loader( dataset, - global_batch_size=self.cfg.data.train_ds.global_batch_size, + global_batch_size=self.cfg.data.test_ds.global_batch_size + if hasattr(self.cfg.data, "test_ds") + else self.cfg.data.validation_ds.global_batch_size, shuffle=data_cfg.shuffle, num_workers=data_cfg.num_workers, pin_memory=data_cfg.pin_memory, @@ -660,32 +680,52 @@ def _build_train_dataset(self, data_cfg): f"Cannot use drop_last=False in your training data with gradient accumulation found grad acc of {data_cfg.global_batch_size // (data_cfg.micro_batch_size * parallel_state.get_data_parallel_world_size())} with global_batch_size {data_cfg.global_batch_size}, micro_batch_size {data_cfg.micro_batch_size}, data parallel size {parallel_state.get_data_parallel_world_size()}" ) datasets = [] - # Determine if we are using a single dataset or a list of datasets. - is_src_list_config = isinstance(data_cfg.src_file_name, ListConfig) - is_tgt_list_config = isinstance(data_cfg.tgt_file_name, ListConfig) - - if (is_src_list_config and not is_tgt_list_config) or (is_tgt_list_config and not is_src_list_config): - raise ValueError("src_list and tgt_list must both be either a ListConfig or a string. ") - if is_src_list_config: - if len(data_cfg.src_file_name) != len(data_cfg.tgt_file_name): - raise ValueError("src_file_name and tgt_file_name must have the same number of elements. ") + if hasattr(data_cfg, "src_file_name") and hasattr(data_cfg, "tgt_file_name"): + # Determine if we are using a single dataset or a list of datasets. + is_src_list_config = isinstance(data_cfg.src_file_name, ListConfig) + is_tgt_list_config = isinstance(data_cfg.tgt_file_name, ListConfig) + + if (is_src_list_config and not is_tgt_list_config) or (is_tgt_list_config and not is_src_list_config): + raise ValueError("src_list and tgt_list must both be either a ListConfig or a string. ") + if is_src_list_config: + if len(data_cfg.src_file_name) != len(data_cfg.tgt_file_name): + raise ValueError("src_file_name and tgt_file_name must have the same number of elements. ") + else: + data_cfg.src_file_name = [data_cfg.src_file_name] + data_cfg.tgt_file_name = [data_cfg.tgt_file_name] + + for src, tgt in zip(data_cfg.src_file_name, data_cfg.tgt_file_name): + dataset = SequenceToSequenceDataset( + src_file_name=src, + tgt_file_name=tgt, + src_tokenizer=self.tokenizer, + tgt_tokenizer=self.tokenizer, + max_src_seq_length=data_cfg.max_src_seq_length, + max_tgt_seq_length=data_cfg.max_tgt_seq_length, + add_bos_to_input=data_cfg.get('add_bos_to_input', True), + add_eos_to_input=data_cfg.get('add_eos_to_input', True), + replace_bos_with_pad=data_cfg.get('replace_bos_with_pad', False), + ) + datasets.append(dataset) + elif hasattr(data_cfg, "file_names"): + for file_path in data_cfg.file_names: + dataset = T5SFTDataset( + file_path=file_path, + src_tokenizer=self.tokenizer, + tgt_tokenizer=self.tokenizer, + max_src_seq_length=data_cfg.max_seq_length, + max_tgt_seq_length=data_cfg.max_seq_length, + add_bos_to_input=data_cfg.get('add_bos', True), + add_eos_to_input=data_cfg.get( + 'add_eos', True + ), # review: need domain knowledge to undertand if these args are ok + index_mapping_dir=data_cfg.get('index_mapping_dir', None), + memmap_workers=data_cfg.get('memmap_workers', None), + hf_dataset=data_cfg.get('hf_dataset', False), + ) + datasets.append(dataset) else: - data_cfg.src_file_name = [data_cfg.src_file_name] - data_cfg.tgt_file_name = [data_cfg.tgt_file_name] - - for src, tgt in zip(data_cfg.src_file_name, data_cfg.tgt_file_name): - dataset = SequenceToSequenceDataset( - src_file_name=src, - tgt_file_name=tgt, - src_tokenizer=self.tokenizer, - tgt_tokenizer=self.tokenizer, - max_src_seq_length=data_cfg.max_src_seq_length, - max_tgt_seq_length=data_cfg.max_tgt_seq_length, - add_bos_to_input=data_cfg.get('add_bos_to_input', True), - add_eos_to_input=data_cfg.get('add_eos_to_input', True), - replace_bos_with_pad=data_cfg.get('replace_bos_with_pad', False), - ) - datasets.append(dataset) + raise ValueError("You must specify either (src_file_name and tgt_file_name) or file_names in data config") if len(datasets) > 1: dataset = ConcatMapDataset( @@ -707,41 +747,58 @@ def _build_eval_dataset(self, data_cfg): f'You are trying to use "implicit gradient accumulation" of {data_cfg.global_batch_size // (data_cfg.micro_batch_size * parallel_state.get_data_parallel_world_size())} in your validation/test datasets. This is not supported. Please set global_batch_size equal to micro_batch_size * data_parallel_world_size.' ) datasets = [] - # Determine if we are using a single dataset or a list of datasets. - is_src_list_config = isinstance(data_cfg.src_file_name, ListConfig) - is_tgt_list_config = isinstance(data_cfg.tgt_file_name, ListConfig) - is_names_list_config = False - if hasattr(data_cfg, "names"): - if isinstance(data_cfg.names, ListConfig): - is_names_list_config = True - - if (is_src_list_config and not is_tgt_list_config) or (is_tgt_list_config and not is_src_list_config): - raise ValueError("src_list and tgt_list must both be either a ListConfig or a string. ") - if is_src_list_config: - if len(data_cfg.src_file_name) != len(data_cfg.tgt_file_name): - raise ValueError("src_file_name and tgt_file_name must have the same number of elements. ") - if is_names_list_config and len(data_cfg.names) != len(data_cfg.src_file_name): - raise ValueError( - "If you are providing names for each src/tgt file, they must have the same number of elements." + if hasattr(data_cfg, "src_file_name") and hasattr(data_cfg, "tgt_file_name"): + # Determine if we are using a single dataset or a list of datasets. + is_src_list_config = isinstance(data_cfg.src_file_name, ListConfig) + is_tgt_list_config = isinstance(data_cfg.tgt_file_name, ListConfig) + is_names_list_config = False + if hasattr(data_cfg, "names"): + if isinstance(data_cfg.names, ListConfig): + is_names_list_config = True + + if (is_src_list_config and not is_tgt_list_config) or (is_tgt_list_config and not is_src_list_config): + raise ValueError("src_list and tgt_list must both be either a ListConfig or a string. ") + if is_src_list_config: + if len(data_cfg.src_file_name) != len(data_cfg.tgt_file_name): + raise ValueError("src_file_name and tgt_file_name must have the same number of elements. ") + if is_names_list_config and len(data_cfg.names) != len(data_cfg.src_file_name): + raise ValueError( + "If you are providing names for each src/tgt file, they must have the same number of elements." + ) + else: + data_cfg.src_file_name = [data_cfg.src_file_name] + data_cfg.tgt_file_name = [data_cfg.tgt_file_name] + + for src, tgt in zip(data_cfg.src_file_name, data_cfg.tgt_file_name): + dataset = SequenceToSequenceDataset( + src_file_name=src, + tgt_file_name=tgt, + src_tokenizer=self.tokenizer, + tgt_tokenizer=self.tokenizer, + max_src_seq_length=data_cfg.max_src_seq_length, + max_tgt_seq_length=data_cfg.max_tgt_seq_length, + add_bos_to_input=data_cfg.get('add_bos_to_input', True), + add_eos_to_input=data_cfg.get('add_eos_to_input', True), + replace_bos_with_pad=data_cfg.get('replace_bos_with_pad', False), ) + datasets.append(dataset) + elif hasattr(data_cfg, "file_names"): + for file_path in data_cfg.file_names: + dataset = T5SFTDataset( + file_path=file_path, + src_tokenizer=self.tokenizer, + tgt_tokenizer=self.tokenizer, + max_src_seq_length=data_cfg.max_seq_length, + max_tgt_seq_length=data_cfg.max_seq_length, + add_bos_to_input=data_cfg.get('add_bos', True), + add_eos_to_input=data_cfg.get('add_eos', True), + index_mapping_dir=data_cfg.get('index_mapping_dir', None), + memmap_workers=data_cfg.get('memmap_workers', None), + hf_dataset=data_cfg.get('hf_dataset', False), + ) + datasets.append(dataset) else: - data_cfg.src_file_name = [data_cfg.src_file_name] - data_cfg.tgt_file_name = [data_cfg.tgt_file_name] - - for src, tgt in zip(data_cfg.src_file_name, data_cfg.tgt_file_name): - dataset = SequenceToSequenceDataset( - src_file_name=src, - tgt_file_name=tgt, - src_tokenizer=self.tokenizer, - tgt_tokenizer=self.tokenizer, - max_src_seq_length=data_cfg.max_src_seq_length, - max_tgt_seq_length=data_cfg.max_tgt_seq_length, - add_bos_to_input=data_cfg.get('add_bos_to_input', True), - add_eos_to_input=data_cfg.get('add_eos_to_input', True), - replace_bos_with_pad=data_cfg.get('replace_bos_with_pad', False), - ) - datasets.append(dataset) - + raise ValueError("You must specify either (src_file_name and tgt_file_name) or file_names in data config") return datasets def build_train_valid_test_datasets(self, stage): diff --git a/nemo/collections/nlp/models/nlp_model.py b/nemo/collections/nlp/models/nlp_model.py index 0f0de87d3887..ac3a8c998ba7 100644 --- a/nemo/collections/nlp/models/nlp_model.py +++ b/nemo/collections/nlp/models/nlp_model.py @@ -16,8 +16,9 @@ import hashlib import json import os -from typing import Any, Mapping, Optional +from typing import Any, Mapping, Optional, Union +import torch from lightning_fabric.utilities.cloud_io import _load as pl_load from omegaconf import DictConfig, OmegaConf from pytorch_lightning import Trainer @@ -39,6 +40,7 @@ from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector from nemo.core.classes import ModelPT from nemo.core.classes.exportable import Exportable +from nemo.core.connectors.save_restore_connector import SaveRestoreConnector from nemo.utils import AppState, logging try: @@ -439,3 +441,22 @@ def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True): del state_dict["bert_model.embeddings.position_ids"] results = super(NLPModel, self).load_state_dict(state_dict, strict=strict) return results + + @classmethod + def restore_from( + cls, + restore_path: str, + override_config_path: Optional[Union[OmegaConf, str]] = None, + map_location: Optional[torch.device] = None, + strict: bool = True, + return_config: bool = False, + save_restore_connector: SaveRestoreConnector = None, + trainer: Optional[Trainer] = None, + ): + if save_restore_connector is None: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(restore_path): + save_restore_connector.model_extracted_dir = restore_path + return super().restore_from( + restore_path, override_config_path, map_location, strict, return_config, save_restore_connector, trainer + ) diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index b1b978205406..f4caf0263dda 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -27,6 +27,7 @@ from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, init_method_const, init_method_normal from nemo.core.classes.mixins import adapter_mixin_strategies +from nemo.core.classes.mixins.adapter_mixins import AdapterConfig try: @@ -53,7 +54,7 @@ class AdapterName(str, enum.Enum): """ - Names for adapters used in NLP Adapters and IA3. Note: changing this will break backward compatibility. + Names for adapters used in NLP Adapters and IA3. Note: changing this will break backward compatibility. """ MLP_INFUSED = "mlp_infused_adapter" @@ -96,14 +97,14 @@ def forward(self, x): class MLPInfusedAdapter(InfusedAdapter): """ MLPInfusedAdapter is basically a clone of InfusedAdapter. We do this to make the adapter_mixin agnostic to adapter names - and only check adapter class types. + and only check adapter class types. """ pass @dataclass -class InfusedAdapterConfig: +class InfusedAdapterConfig(AdapterConfig): in_features: int _target_: str = "{0}.{1}".format(InfusedAdapter.__module__, InfusedAdapter.__name__) @@ -233,7 +234,7 @@ def forward(self, x): @dataclass -class ParallelLinearAdapterConfig: +class ParallelLinearAdapterConfig(AdapterConfig): in_features: int out_features: int dim: int @@ -407,7 +408,7 @@ def forward(self, batch_size: int, use_cached_reps: bool = False) -> torch.Tenso @dataclass -class PromptEncoderAdapterConfig: +class PromptEncoderAdapterConfig(AdapterConfig): virtual_tokens: int bottleneck_dim: int embedding_dim: int diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py index a95981e0953b..a48cb8726e8e 100644 --- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py @@ -15,6 +15,10 @@ import torch from omegaconf import DictConfig +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + PromptEncoderAdapterConfig, +) from nemo.collections.nlp.modules.common.megatron.hiddens import get_hiddens_module from nemo.collections.nlp.modules.common.megatron.language_model import Embedding from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType @@ -38,6 +42,7 @@ ) from nemo.collections.nlp.modules.common.megatron.vocab_parallel_cross_entropy import vocab_parallel_cross_entropy from nemo.collections.nlp.parts import utils_funcs +from nemo.core.classes.mixins import adapter_mixins try: from apex.transformer.enums import AttnMaskType, ModelType @@ -102,7 +107,7 @@ def forward(self, hidden_states, word_embeddings_weight): # TODO: add soft prompts as an Embedding sub-class -class MegatronTokenLevelEncoderDecoderModule(MegatronModule): +class MegatronTokenLevelEncoderDecoderModule(MegatronModule, adapter_mixins.AdapterModuleMixin): """Token-based (input/output is tokens) encoder-decoder model (e.g. T5 Language model.)""" def __init__( @@ -427,6 +432,7 @@ def __init__( ) self._tokens_head_key = 'tokens_head' + self.set_accepted_adapter_types([PromptEncoderAdapterConfig._target_]) def _validate_kv_channels(self, cfg): kv_channels = cfg.kv_channels @@ -548,6 +554,18 @@ def forward( else: enc_position_ids = None enc_input = self.encoder_embedding(enc_input_ids, enc_position_ids, token_type_ids=token_type_ids) + if self.is_adapter_available(): + _sq, _bs, _hs = enc_input.size() + ptuning_adapter = self.get_adapter_module(AdapterName.PTUNING_ADAPTER) + v = ptuning_adapter.virtual_tokens + if ( + ptuning_adapter and _sq >= v + ): # The sequence should be longer the v to insert virtual embeddings. + virtual_embeddings = ptuning_adapter(_bs) + enc_input = enc_input[ + v:, :, : + ] # the first v tokens are pads so that they can be swapped out with virtual embeddings. + enc_input = torch.concat([virtual_embeddings, enc_input], dim=0) else: enc_input = None else: diff --git a/nemo/collections/nlp/parts/megatron_trainer_builder.py b/nemo/collections/nlp/parts/megatron_trainer_builder.py index a416e3f93951..3c8bbc5db0d3 100644 --- a/nemo/collections/nlp/parts/megatron_trainer_builder.py +++ b/nemo/collections/nlp/parts/megatron_trainer_builder.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys + from omegaconf import DictConfig from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelSummary @@ -22,8 +24,10 @@ GradScaler, MegatronHalfPrecisionPlugin, NLPDDPStrategy, + NLPDDPStrategyNotebook, PipelineMixedPrecisionPlugin, ) +from nemo.utils import logging class MegatronTrainerBuilder: @@ -39,6 +43,12 @@ def _training_strategy(self) -> NLPDDPStrategy: """ Returns a ddp strategy passed to Trainer.strategy. """ + # check interactive environment + _IS_INTERACTIVE = hasattr(sys, "ps1") or bool(sys.flags.interactive) + if _IS_INTERACTIVE and self.cfg.trainer.devices == 1: + logging.info("Detected interactive environment, using NLPDDPStrategyNotebook") + return NLPDDPStrategyNotebook(no_ddp_communication_hook=True, find_unused_parameters=False,) + return NLPDDPStrategy( no_ddp_communication_hook=True, gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, @@ -60,8 +70,10 @@ def _plugins(self) -> list: Returns: plugins: list of plugins passed to Trainer.plugins including precision plugins. """ - megatron_amp_O2 = self.cfg.model.get('megatron_amp_O2', False) - with_distributed_adam = self.cfg.model.optim.get('name') == 'distributed_fused_adam' + megatron_amp_o2 = self.cfg.model.get('megatron_amp_O2', False) + with_distributed_adam = ( + self.cfg.model.optim.get('name') == 'distributed_fused_adam' if self.cfg.model.get('optim') else False + ) plugins = [] if self.cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: @@ -72,7 +84,7 @@ def _plugins(self) -> list: else: plugin_precision = 'bf16-mixed' - if megatron_amp_O2 and not with_distributed_adam: + if megatron_amp_o2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) @@ -110,3 +122,15 @@ def create_trainer(self) -> Trainer: **self.cfg.trainer, callbacks=[ModelSummary(max_depth=3), CustomProgressBar()] ) + + +class MegatronLMPPTrainerBuilder(MegatronTrainerBuilder): + """Builder for scripts where grad scaler is turned off for pipeline parallel LM model. E.g. PEFT tuning scripts""" + + def _grad_scaler(self) -> GradScaler: + return GradScaler( + init_scale=self.cfg.model.get("native_amp_init_scale", 2 ** 32), + growth_interval=self.cfg.model.get("native_amp_growth_interval", 1000), + hysteresis=self.cfg.model.get("hysteresis", 2), + enabled=False if self.cfg.model.pipeline_model_parallel_size > 1 else True, + ) diff --git a/nemo/collections/nlp/parts/mixins/__init__.py b/nemo/collections/nlp/parts/mixins/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/nlp/parts/mixins/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py new file mode 100644 index 000000000000..16a3850852d4 --- /dev/null +++ b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py @@ -0,0 +1,484 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +from typing import List, Optional, Union + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict + +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from nemo.collections.nlp.modules.common.megatron.adapters.mcore_mixins import swap_mcore_mixin + + HAVE_MEGATRON_CORE = True +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + + +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import PromptEncoderAdapterConfig +from nemo.collections.nlp.parts.peft_config import ( + PEFT_CONFIG_MAP, + CanonicalAdaptersPEFTConfig, + LoraPEFTConfig, + PEFTConfig, + PtuningPEFTConfig, +) +from nemo.core.classes.mixins.adapter_mixins import AdapterModuleMixin +from nemo.core.connectors.save_restore_connector import SaveRestoreConnector +from nemo.utils import logging, model_utils + +try: + from megatron.core import parallel_state +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + + +class NLPAdapterModelMixin: + """ NLP Adapter Mixin that can augment any transformer-based model with Adapter module support. + This mixin class should be used only with a top level ModelPT subclass, that includes either a `model` or an `enc_dec_model` submodule. + This mixin class adds several utility methods to add, load and save adapters. + + An Adapter module is any Pytorch nn.Module that possess a few properties : + + - It's input and output dimension are the same, while the hidden dimension need not be the same. + - The final layer of the Adapter module is zero-initialized, so that the residual connection to the adapter yields the original output. + + This mixin class aims to integrate with PEFT, which is one or more adapters modules. + The two features of PEFT, layer selection and weight tying, are also supported in this mixin class. + """ + + def __init__(self, *args, **kwargs): + self.use_peft = False + self.setup_complete = False + self.use_ptuning_only = False + super().__init__(*args, **kwargs) + if hasattr(self, "enc_dec_model"): + self.model_prefix = "enc_dec_model." # for T5 + else: + self.model_prefix = "model.module." if self.cfg.megatron_amp_O2 else "model." + + self.use_mcore_gpt = hasattr(self, 'mcore_gpt') and self.mcore_gpt + if self.use_mcore_gpt: + assert HAVE_MEGATRON_CORE, "You set `mcore_gpt` as True but megatron core is not found." + + def first_stage_of_pipeline(self): + if hasattr(self, "model") and hasattr(self.model, "pre_process"): + return self.model.pre_process + elif hasattr(self, "model") and hasattr(self.model, "module") and hasattr(self.model.module, "pre_process"): + # (guyueh1): this if condition is used to handle amp O2 + # when amp_O2 is on, self.model will be wrapped by the Float16Module class + return self.model.module.pre_process + logging.warning("no attribute named model or no model.pre_process found. Can not detect stage of pipeline...") + return False + + def _get_all_keys(self,): + """ + Returns all the keys in the model + """ + k = [n for n, p in self.named_parameters()] + b = [n for n, p in self.named_buffers() if n.replace("model.module.", "model.", 1) in self.state_dict().keys()] + # we include buffers because ptuning representations are cached in a buffer and saved to state_dict for inference time use. + return set(k + b) + + def _check_and_add_adapter(self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None): + if name_key_to_mcore_mixins is not None: + for mcore_target, mcore_mixin in name_key_to_mcore_mixins[peft_name]: + if name in [ + mcore_target, + f'model.{mcore_target}', + f'model.module.{mcore_target}', + ]: # simple string match for now + swap_mcore_mixin(module, mcore_mixin) + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_name, + cfg=peft_cfg, + base_model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + ) + elif isinstance(module, AdapterModuleMixin): + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_name, + cfg=peft_cfg, + base_model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + ) + + def _check_and_add_peft_cfg(self, peft_cfg): + + layer_selection = peft_cfg.layer_selection + + assert not self.use_mcore_gpt or hasattr( + peft_cfg, 'name_key_to_mcore_mixins' + ), f"{peft_cfg.__class__.__name__} is not supported in megatron core mode yet." + name_key_to_mcore_mixins = peft_cfg.name_key_to_mcore_mixins if self.use_mcore_gpt else None + + for adapter_name, adapter_cfg in peft_cfg.get_config_dict().items(): + # self.mcore_gpt means is GPT and not T5 + if hasattr(self, 'mcore_gpt') and not isinstance(adapter_cfg, PromptEncoderAdapterConfig): + if layer_selection is not None: + logging.info( + f"Layer selection {layer_selection} is enabled for the current model (" + f"{self.__class__.__name__} + {adapter_name})" + ) + if self.use_mcore_gpt: + if self.cfg.megatron_amp_O2: + layers = self.model.module.decoder.layers + else: + layers = self.model.decoder.layers + else: + if self.cfg.megatron_amp_O2: + layers = self.model.module.language_model.encoder.layers + else: + layers = self.model.language_model.encoder.layers + if layer_selection is None: + layer_selection = list(range(1, self.cfg.num_layers + 1)) + for layer in layers: + if layer.layer_number in layer_selection: + for name, module in layer.named_modules(): + self._check_and_add_adapter( + name, module, adapter_name, adapter_cfg, name_key_to_mcore_mixins + ) + else: + # Non GPT models, as well as GPT+PTuning do not support layer selection + if layer_selection is not None: + logging.warning( + "Layer selection is specified, but it is not supported for either " + f"{self.__class__.__name__} or {adapter_name})" + ) + for name, module in self.named_modules(): + self._check_and_add_adapter(name, module, adapter_name, adapter_cfg, name_key_to_mcore_mixins) + + def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): + """ + High level API to add one or more adapter modules to the model, and freeze the base weights + This method supports adding adapter modules from PEFTConfig or list of PEFTConfig. It would add + corresponding adapter modules. Layer selection and weight tying would be applied if it's in PEFTConfig + + Args: + peft_cfgs: One or more PEFTConfig objects that specify the PEFT method configuration + """ + + if not isinstance(peft_cfgs, List): + peft_cfgs = [peft_cfgs] + + self.base_keys = self._get_all_keys() + self.freeze() + logging.info(f"Before adding PEFT params:\n{self.summarize()}") + + self.use_ptuning_only = len(peft_cfgs) == 1 and isinstance(peft_cfgs[0], PtuningPEFTConfig) + + for peft_cfg in peft_cfgs: + if self.use_ptuning_only: + if not self.first_stage_of_pipeline(): + # There are no params to add if we are not in the first state of the pipeline + continue + self.virtual_tokens = peft_cfg.virtual_tokens + + self._check_and_add_peft_cfg(peft_cfg) + + logging.info(f"After adding PEFT params:\n{self.summarize()}") + self.adapter_keys = self._get_all_keys() - self.base_keys + + for cfg in peft_cfgs: + if cfg.weight_tying: + self.tie_weights(cfg) + self.use_peft = True + + def _get_config_and_state_dict_from_nemo(self, filepath, map_location): + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + SaveRestoreConnector._unpack_nemo_file(filepath, tmpdir) + + os.chdir(tmpdir) + + config_yaml = "model_config.yaml" + model_weights_ckpt = "model_weights.ckpt" + + conf = OmegaConf.load(config_yaml) + + os.chdir(cwd) + model_weights = os.path.join(tmpdir, model_weights_ckpt) + model_weights = inject_model_parallel_rank(model_weights) + state_dict = torch.load(model_weights, map_location=map_location) + + return conf, state_dict + finally: + os.chdir(cwd) + + def setup_optimizer_param_groups(self): + """ + ModelPT override. Optimizer will get self._optimizer_param_groups. + Makes two optimizer param groups, one for the frozen model params + and one for the prompt-table/prompt-encoder params. The learning + rate for the frozen model's params will always be zero effectively + freezing the model's params but still allowing for the needed gradients + to be passed around in pipeline parallel models. The prompt-encoder + and/or prompt table will use the learning rate set by the user. + """ + if self.use_peft: + self.freeze() # Freeze the entire model + opt_params = [] + for _, module in self.named_modules(): + if isinstance(module, AdapterModuleMixin) and module.is_adapter_available(): + module.set_enabled_adapters(enabled=True) + module.unfreeze_enabled_adapters() # selectively unfreeze the adapter modules. + opt_params += [p for p in module.parameters() if p.requires_grad] + self._optimizer_param_groups = ({"params": opt_params},) + logging.info(f"Optimizer groups set:\n{self.summarize()}") + else: + super().setup_optimizer_param_groups() + + def load_adapters( + self, filepath: str, peft_cfgs: Optional[Union[PEFTConfig, List[PEFTConfig]]] = None, map_location: str = None, + ): + """ + Utility method that restores only the adapter module(s), and not the entire model itself. + This allows the sharing of adapters which are often just a fraction of the size of the full model, + enabling easier deliver. + + .. note:: + + During restoration, assumes that the model does not currently already have one or more adapter modules. + + Args: + filepath: Filepath of the .ckpt or .nemo file. + peft_cfgs: One or more PEFTConfig objects that specify the PEFT method configuration. + If none, will infer from the .nemo checkpoint + map_location: Pytorch flag, where to place the adapter(s) state dict(s). + """ + + # Determine device + if map_location is None: + if torch.cuda.is_available(): + map_location = 'cuda' + else: + map_location = 'cpu' + + if filepath.endswith('.nemo'): + conf, state_dict = self._get_config_and_state_dict_from_nemo(filepath, map_location) + elif filepath.endswith('.ckpt'): + state_dict = torch.load(filepath, map_location)['state_dict'] + else: + raise RuntimeError(f"{filepath} is not nemo file or ckpt file") + if not peft_cfgs: + assert filepath.endswith( + '.nemo' + ), "Inferring peft scheme is only supported for .nemo checkpoints. Please supply the `peft_cfgs` argument." + peft_cfgs = [PEFT_CONFIG_MAP[conf.peft.peft_scheme](conf)] + self.add_adapter(peft_cfgs) + assert set(state_dict.keys()) == self.adapter_keys + super().load_state_dict(state_dict, strict=False) + + def tie_weights(self, peft_cfg): + pos_idx = 0 + + if self.use_mcore_gpt: + if self.cfg.megatron_amp_O2: + layers = self.model.module.decoder.layers + else: + layers = self.model.decoder.layers + else: + if self.cfg.megatron_amp_O2: + layers = self.model.module.language_model.encoder.layers + else: + layers = self.model.language_model.encoder.layers + + if isinstance(peft_cfg, LoraPEFTConfig): + layer0 = layers[0].self_attention + elif isinstance(peft_cfg, CanonicalAdaptersPEFTConfig): + layer0 = layers[0] + else: + raise RuntimeError(f"{peft_cfg} is not supported for tied weights") + + for adapter_name in layer0.adapter_layer: + adapter = layer0.get_adapter_module(adapter_name) + print(adapter_name, pos_idx) + adapter.set_position(pos_idx) + pos_idx += 1 + + for layer in layers[1:]: + if isinstance(peft_cfg, LoraPEFTConfig): + layer = layer.self_attention + for adapter_name in layer.adapter_layer: + print(adapter_name, pos_idx) + adapter_l = layer.get_adapter_module(adapter_name) + adapter_0 = layer0.get_adapter_module(adapter_name) + adapter_l.tie_weights(pos_idx, adapter_0) + pos_idx += 1 + + def get_peft_state_dict(self): + """ + Gets the keys associated with the adapters only. + """ + state_dict = self.model.state_dict(prefix=self.model_prefix) + peft_state_dict = {} + for k in self.adapter_keys: + # state_dict keys needs to be in non-O2 format and will be corrected in PEFTSaveRestoreConnector if O2=True + new_k = k.replace("model.module.", "model.", 1) + peft_state_dict[new_k] = state_dict[k] + return peft_state_dict + + def state_dict(self, destination=None, prefix=None, keep_vars=False): + if self.use_peft and self.setup_complete: + # Once setup is complete we no longer need to track the frozen part of the model. Only there adapter state dict keeps changing so state_dict only track these. + return self.get_peft_state_dict() + else: + # we want all the params with the same keys as calling self.state_dict() + # but we can't call self.state_dict() here as it would be a recursive call. + # so we call self.model.state_dict(prefix="model.") which will return all the keys and params same as calling self.state_dict() + return self.model.state_dict(prefix=self.model_prefix) + + def sharded_state_dict(self, prefix: str = ''): + use_mcore_gpt = hasattr(self, 'mcore_gpt') and self.mcore_gpt + if not use_mcore_gpt or (self.use_peft and self.setup_complete): + return None + else: + return self.model.sharded_state_dict(prefix=self.model_prefix) + + def load_state_dict(self, state_dict, strict: bool = True): + if len(state_dict) == 0: + return # checkpoint is loaded in on_load_checkpoint() + if self.use_peft and self.setup_complete: + # at this stage only adapter params will appear in the state_dict arg + # so we only update those while the rest of the model is frozen. + # setting strict=False will ignore the missing keys (which are not being updated anyway) + # explicitly check if state_dict.keys matches all the expected self.adapter_keys since we don't have the + # safety in strict=True anymore. + assert set(state_dict.keys()) == self.adapter_keys + super().load_state_dict(state_dict, strict=False) + else: + super().load_state_dict(state_dict, strict=True) + + def on_load_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint + """ + if self.use_peft and self.setup_complete: + if not self.use_ptuning_only or self.first_stage_of_pipeline(): + # same as super().on_load_checkpoint() but strict=False and only check unexpected keys + # mcore uses distributed checkpointing + if hasattr(self, 'mcore_gpt') and self.mcore_gpt: + for index, module in enumerate(self.get_gpt_module_list()): + if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + checkpoint_state_dict = checkpoint['state_dict'][f'model_{index}'] + else: + checkpoint_state_dict = checkpoint['state_dict'] + # checkpoint_state_dict has "model." but module does not so we need to remove it when loading + checkpoint_state_dict = { + key.replace('model.', ''): checkpoint_state_dict.pop(key) + for key in list(checkpoint_state_dict.keys()) + } + missing_keys, unexpected_keys = module.load_state_dict(checkpoint_state_dict, strict=False) + + assert len(unexpected_keys) == 0, 'Unexpected key(s) in state_dict: {}. '.format( + ', '.join('"{}"'.format(k) for k in unexpected_keys) + ) + + # legacy checkpointing for interleaved + else: + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + super().on_load_checkpoint(checkpoint) + + @classmethod + def merge_cfg_with(cls, path: str, cfg: DictConfig) -> DictConfig: + """ + Merge a given configuration dictionary `cfg` with the configuration dictionary + obtained from restoring a MegatronGPTSFTModel or MegatronT5SFTModel at the specified `path`. + + Args: + path (str): The path to the SFT model checkpoint to be restored. + cfg (DictConfig): The configuration dictionary to merge. + + Returns: + DictConfig: The merged configuration dictionary. + + Examples: + >>> path = "/path/to/model/checkpoint" + >>> cfg = DictConfig({"model": {"key": "value"}, "trainer": {"precision": 16}}) + >>> merged_cfg = merge_cfg_with(path, cfg) + + Notes: + - The function resolves variables within the `cfg` dictionary using `OmegaConf.resolve`. + - Keys in `cfg.model` will override the corresponding keys in the output dictionary. + - If "train_ds" exists in `cfg.model.data`, it updates `micro_batch_size` and `global_batch_size`. + - If `cfg.trainer` contains a "precision" key, it updates `output.precision`. + + """ + + base_cfg = cls.restore_from(path, return_config=True) + + OmegaConf.resolve(cfg) + with open_dict(base_cfg): + for key, val in cfg.model.items(): + base_cfg[key] = val + if "train_ds" in cfg.model.data: + base_cfg.micro_batch_size = cfg.model.data.train_ds.micro_batch_size + base_cfg.global_batch_size = cfg.model.data.train_ds.global_batch_size + if cfg.get("trainer", None) and cfg.trainer.get("precision"): + base_cfg.precision = cfg.trainer.precision + + return base_cfg + + @classmethod + def merge_inference_cfg(cls, path: str, cfg: DictConfig) -> DictConfig: + """ + Generate a configuration dictionary by a given configuration dictionary `cfg` with + the configuration dictionary obtained from restoring a MegatronGPTSFTModel or MegatronT5SFTModel + at the specified `path` and modify `cfg` for inference + + Args: + path (str): The path to the SFT model checkpoint to be restored. + cfg (DictConfig): The configuration dictionary to modify for inference. + + Returns: + DictConfig: The configuration dictionary for inference. + + Examples: + >>> path = "/path/to/model/checkpoint" + >>> cfg = DictConfig({"model": {"key": "value"}, "trainer": {"precision": 16}}) + >>> merged_cfg = merge_inference_cfg(path, cfg) + + Notes: + - "precision" and "test_ds" from `cfg` will override the corresponding keys in the output dictionary + - "activations_checkpoint" will be ovrrided to None in the output dictionary + - "use_flash_attention" will be True if in one of the configuration dictionarys is True + - "seq_len_interpolation_factor" will be overrided from `cfg` if it's not None from checkpoint + """ + + peft_cfg = cls.restore_from(path, return_config=True) + with open_dict(peft_cfg): + # update the model config of the trained model with params we want to set at inference time. + peft_cfg.precision = cfg.trainer.precision + for key, val in cfg.model.items(): + if key != 'data': + peft_cfg[key] = val + peft_cfg.data.test_ds = cfg.model.data.test_ds + + with open_dict(cfg): + cfg.inference.add_BOS = peft_cfg.data.test_ds.add_bos + cfg.inference.tokens_to_generate = peft_cfg.data.test_ds.tokens_to_generate + + return peft_cfg diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index f65dfa4a04ac..865974aff873 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -432,6 +432,18 @@ def restore_checkpoint_after_setup(self) -> bool: return True +class NLPDDPStrategyNotebook(NLPDDPStrategy): + """ Version of NLPDDPStrategy to be used in a Jupyter Notebook + A large portion of Megatron code has DDP dependency, so it has been necessary to use NLPDDPStrategy even for + single-GPU training (e.g. in a Jupyter notebook) + A PTL 2.0 changes has prevented DDPStrategy to be used in a notebook. + This version of NLPDDPStrategy enables megatron training in a notebook in PTL 2.0. + """ + + def _configure_launcher(self): + self._launcher = None + + class NLPSaveRestoreConnector(SaveRestoreConnector): def __init__(self) -> None: if not HAVE_APEX: @@ -802,156 +814,6 @@ def dummy(): return instance -class PEFTSaveRestoreConnector(NLPSaveRestoreConnector): - """ - PEFT models require the ability to load/save a small subset of the full model (once PEFT params have been infused into the base model.) - The PEFTSaveRestoreConnector is used to allow loading and saving only the PEFT params while not saving the entire model. - - Args: - peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params) - peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training. - peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder - If both are provided the peft_model_ckpt_path takes precedence. - If neither are provided, PEFT params are initialized at random (not loaded from any external source). - """ - - def __init__( - self, - peft_model_nemo_path: Optional[str] = None, - peft_model_ckpt_path: Optional[str] = None, - peft_model_ckpt_name: Optional[str] = "model_weights.ckpt", - ) -> None: - super().__init__() - self.peft_model_ckpt_name = peft_model_ckpt_name - if peft_model_ckpt_path: - # First we will try to load a adapter ckpt path - # this is given priority over loading from nemo path to make resumption of training possible - ckpt_name = os.path.basename(peft_model_ckpt_path) - if not ckpt_name.strip() == '': - # update the weights file name inside the ckpt path rank folders - self.peft_model_ckpt_name = ckpt_name - self.peft_model_ckpt_dir = os.path.dirname(peft_model_ckpt_path) - assert os.path.isdir(self.peft_model_ckpt_dir) - self.peft_model_nemo_path = None - elif peft_model_nemo_path: - # If resumption is not possible we will try to load a adapter nemo path - self.peft_model_nemo_path = peft_model_nemo_path - assert os.path.exists(self.peft_model_nemo_path) - self.peft_model_ckpt_dir = None - else: - # We are not resuming training from a nemo file or a ckpt - # We are training the adapter from randomly initialization - self.peft_model_nemo_path = None - self.peft_model_ckpt_dir = None - - def _load_state_dict_from_disk(self, model_weights, map_location=None): - """ - Infuse the state_dict of the base model with PEFT params from either a peft_model_nemo_path or peft_model_ckpt_path - """ - # first load based model weights - base_model_state_dict = super()._load_state_dict_from_disk(model_weights, map_location) - # Next, We want to load PEFT model's weights - if self.peft_model_nemo_path: - # if the PEFT weights are provided in a .nemo file - # we need to untar the .nemo if its still tarred - with tempfile.TemporaryDirectory() as tmpdir: - self._unpack_nemo_file(self.peft_model_nemo_path, tmpdir) - model_weights_path = self._inject_model_parallel_rank_for_ckpt(tmpdir, self.peft_model_ckpt_name) - peft_state_dict = torch.load(model_weights_path, map_location) - elif self.peft_model_ckpt_dir: - # if the PEFT weights are provided in a ckpt path file - # we don't need to untar - model_weights_path = self._inject_model_parallel_rank_for_ckpt( - self.peft_model_ckpt_dir, self.peft_model_ckpt_name - ) - peft_state_dict = torch.load(model_weights_path, map_location)['state_dict'] - else: - peft_state_dict = {} - if base_model_state_dict: - base_model_state_dict.update(peft_state_dict) # add the PEFT state_dict into the base model's state_dict - return base_model_state_dict - - def restore_from( - self, - calling_cls, - restore_path: str, - override_config_path: Optional[Union[OmegaConf, str]] = None, - map_location: Optional[torch.device] = None, - strict: bool = True, - return_config: bool = False, - trainer: Trainer = None, - ): - """ - Extends the restore_from method of the `NLPSaveRestoreConnector` so that PEFT params are inserted into the state_dict which is required when training a PEFT model from scratch. - """ - # Get path where the command is executed - the artifacts will be "retrieved" there - # (original .nemo behavior) - loaded_params = super().load_config_and_state_dict( - calling_cls, restore_path, override_config_path, map_location, strict, return_config, trainer, - ) - if not isinstance(loaded_params, tuple) or return_config is True: - return loaded_params - conf, instance, state_dict = loaded_params - - # if we're using dist checkpointing then state_dict will be None - if state_dict is None: - # dist checkpointing needs torch.distributed to load the checkpoint - if parallel_state.is_unitialized(): - - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - with tempfile.TemporaryDirectory() as tmpdir: - # Check if self.model_extracted_dir is set, and is a valid path - if self.model_extracted_dir is not None and os.path.isdir(self.model_extracted_dir): - # Log that NeMo will use the provided `model_extracted_dir` - logging.info( - f"Restoration will occur within pre-extracted directory : " f"`{self.model_extracted_dir}`." - ) - - # Override `tmpdir` above with the pre-extracted `model_extracted_dir` - tmpdir = self.model_extracted_dir - - else: - # Extract the nemo file into the temporary directory - self._unpack_nemo_file( - path2file=restore_path, out_folder=tmpdir, extract_config_only=return_config is True - ) - checkpoint = {} - sharded_state_dict = instance.sharded_state_dict() - peft_state_dict = instance.get_peft_state_dict() - for k in peft_state_dict.keys(): - sharded_state_dict.pop(k) - checkpoint['state_dict'] = sharded_state_dict - # remove model weights extension - tmp_model_weights_ckpt = os.path.join(tmpdir, self.model_weights_ckpt) - tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] - assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' - checkpoint = dist_checkpointing.load( - sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir - ) - checkpoint['state_dict'].update(peft_state_dict) - instance.on_load_checkpoint(checkpoint) - if hasattr(instance, 'setup_transformer_engine_tp_groups'): - instance.setup_transformer_engine_tp_groups() - - else: - if ( - self.peft_model_nemo_path is None and self.peft_model_ckpt_dir is None - ): # we have this check only for training PEFT from scratch - peft_state_dict = instance.get_peft_state_dict() - state_dict.update(peft_state_dict) - state_dict = self.modify_state_dict(conf, state_dict) - self.load_instance_with_state_dict(instance, state_dict, strict) - - logging.info(f'Model {instance.__class__.__name__} was successfully restored from {restore_path}.') - return instance - - class PipelineMixedPrecisionPlugin(MixedPrecisionPlugin): """ Overrides PTL autocasting to not wrap training/val/test_step. We do this because we have the megatron-core fwd/bwd functions in training_step. diff --git a/nemo/collections/nlp/parts/peft_config.py b/nemo/collections/nlp/parts/peft_config.py new file mode 100644 index 000000000000..dd75747fd73c --- /dev/null +++ b/nemo/collections/nlp/parts/peft_config.py @@ -0,0 +1,190 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict + +from omegaconf import DictConfig + +try: + from nemo.collections.nlp.modules.common.megatron.adapters.mcore_mixins import ( + MCoreGPTEmbeddingMixin, + MCoreSelfAttentionMixin, + MCoreTransformerLayerMixin, + ) +except (ImportError, ModuleNotFoundError): + MCoreGPTEmbeddingMixin = MCoreSelfAttentionMixin = MCoreTransformerLayerMixin = None + +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + InfusedAdapterConfig, + LoraKQVAdapterConfig, + LoraKQVAdapterWeightTyingConfig, + MLPInfusedAdapterConfig, + ParallelLinearAdapterConfig, + ParallelLinearAdapterWeightTyingConfig, + PromptEncoderAdapterConfig, +) + + +class PEFTConfig: + # superclass for adapter name and config + def __init__(self, peft_cfg: DictConfig, name_key_to_cfg: Dict): + self.name_key_to_cfg = name_key_to_cfg + + self.layer_selection = peft_cfg.get("layer_selection", None) + self.weight_tying = peft_cfg.get("weight_tying", False) + + def get_config_dict(self): + return self.name_key_to_cfg + + +class LoraPEFTConfig(PEFTConfig): + def __init__(self, cfg): + lora_cfg = cfg.peft.lora_tuning + if cfg.get("kv_channels", None) is None: + assert ( + cfg.hidden_size % cfg.num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = cfg.hidden_size // cfg.num_attention_heads + else: + kv_channels = cfg.kv_channels + projection_size = kv_channels * cfg.num_attention_heads + + config_args = { + "in_features": cfg.hidden_size, + "out_features": 3 * projection_size, + "dim": lora_cfg.adapter_dim, + "norm_position": None, + "norm_type": None, + "activation": "identity", + "column_init_method": lora_cfg.get("column_init_method", "normal"), + "row_init_method": lora_cfg.get("row_init_method", "zero"), + "gather_output": False, + "dropout": lora_cfg.adapter_dropout, + } + + if lora_cfg.weight_tying: + position_embedding_strategy = lora_cfg.get("position_embedding_strategy", None) + if position_embedding_strategy is None: + dim_position_embeddings = 0 + elif position_embedding_strategy == "add": + dim_position_embeddings = cfg.hidden_size + elif position_embedding_strategy == "biasadd": + dim_position_embeddings = 3 * projection_size + elif position_embedding_strategy == "concat": + dim_position_embeddings = lora_cfg.adapter_dim + elif position_embedding_strategy == "mlpconcat": + dim_position_embeddings = lora_cfg.adapter_dim + else: + raise RuntimeError( + f"Unknown position embedding strategy {position_embedding_strategy} for tied weights" + ) + config_args.update( + { + "num_position_embeddings": cfg.num_layers, + "dim_position_embeddings": dim_position_embeddings, + "position_embedding_strategy": position_embedding_strategy, + } + ) + adapter_cfg = LoraKQVAdapterWeightTyingConfig(**config_args) + else: + adapter_cfg = LoraKQVAdapterConfig(**config_args) + + name_key_to_cfg = { + AdapterName.LORA_KQV_ADAPTER: adapter_cfg, + } + self.name_key_to_mcore_mixins = {AdapterName.LORA_KQV_ADAPTER: [("self_attention", MCoreSelfAttentionMixin)]} + + super().__init__(lora_cfg, name_key_to_cfg) + + +class IA3PEFTConfig(PEFTConfig): + def __init__(self, cfg): + mlp_infused_adapter_cfg = MLPInfusedAdapterConfig( + in_features=cfg.ffn_hidden_size // cfg.tensor_model_parallel_size + ) + infused_adapter_cfg = InfusedAdapterConfig(in_features=cfg.hidden_size // cfg.tensor_model_parallel_size) + + name_key_to_cfg = { + AdapterName.KEY_INFUSED: infused_adapter_cfg, + AdapterName.VALUE_INFUSED: infused_adapter_cfg, + AdapterName.MLP_INFUSED: mlp_infused_adapter_cfg, + } + + super().__init__(cfg.peft.ia3_tuning, name_key_to_cfg) + + +class PtuningPEFTConfig(PEFTConfig): + def __init__(self, cfg): + adapter_cfg = PromptEncoderAdapterConfig( + cfg.peft.p_tuning.virtual_tokens, + cfg.peft.p_tuning.bottleneck_dim, + cfg.peft.p_tuning.embedding_dim, + cfg.peft.p_tuning.init_std, + cfg.hidden_size, + ) + name_key_to_cfg = {AdapterName.PTUNING_ADAPTER: adapter_cfg} + self.name_key_to_mcore_mixins = {AdapterName.PTUNING_ADAPTER: [('embedding', MCoreGPTEmbeddingMixin)]} + self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens + + super().__init__(cfg.peft.p_tuning, name_key_to_cfg) + + +class CanonicalAdaptersPEFTConfig(PEFTConfig): + def __init__(self, cfg): + adapter_tuning_cfg = cfg.peft.adapter_tuning + + config_args = { + "in_features": cfg.hidden_size, + "out_features": cfg.hidden_size, + "dim": adapter_tuning_cfg.adapter_dim, + "norm_position": adapter_tuning_cfg.get("norm_position", "pre"), + "norm_type": adapter_tuning_cfg.get("norm_type", "mixedfusedlayernorm"), + "column_init_method": adapter_tuning_cfg.get("column_init_method", "xavier"), + "row_init_method": adapter_tuning_cfg.get("row_init_method", "zero"), + "dropout": adapter_tuning_cfg.adapter_dropout, + } + + if adapter_tuning_cfg.weight_tying: + config_args.update( + { + "num_position_embeddings": cfg.num_layers * 2, + "dim_position_embeddings": cfg.hidden_size, + "position_embedding_strategy": adapter_tuning_cfg.get("position_embedding_strategy", None), + } + ) + adapter_cfg = ParallelLinearAdapterWeightTyingConfig(**config_args) + else: + adapter_cfg = ParallelLinearAdapterConfig(**config_args) + + name_key_to_cfg = { + AdapterName.PRE_ATTN_ADAPTER: adapter_cfg, + AdapterName.POST_ATTN_ADAPTER: adapter_cfg, + } + self.name_key_to_mcore_mixins = { + AdapterName.PRE_ATTN_ADAPTER: [("", MCoreTransformerLayerMixin)], + AdapterName.POST_ATTN_ADAPTER: [("", MCoreTransformerLayerMixin)], + } + + super().__init__(adapter_tuning_cfg, name_key_to_cfg) + + +PEFT_CONFIG_MAP = { + "adapter": CanonicalAdaptersPEFTConfig, + "ia3": IA3PEFTConfig, + "ptuning": PtuningPEFTConfig, + "lora": LoraPEFTConfig, + 'none': None, + None: None, +} diff --git a/nemo/core/classes/mixins/adapter_mixins.py b/nemo/core/classes/mixins/adapter_mixins.py index a7f94e90f9b9..2a05f374d464 100644 --- a/nemo/core/classes/mixins/adapter_mixins.py +++ b/nemo/core/classes/mixins/adapter_mixins.py @@ -42,6 +42,11 @@ def __post_init__(self): self.adapter_class_path = f'{self.adapter_class.__module__}.{self.adapter_class.__name__}' +class AdapterConfig: + # superclass for all adapter config dataclasses + pass + + def register_adapter(base_class: type, adapter_class: type): """ Registers a pair (Base class, Adapter class) into the adapter registry, used for de-referencing. @@ -144,8 +149,8 @@ class AdapterModuleMixin(ABC): metadata of the adapter config. .. note:: - - This module is **not** responsible for maintaining its config. Subclasses must ensure config is updated + + This module is **not** responsible for maintaining its config. Subclasses must ensure config is updated or preserved as needed. It is the responsibility of the subclasses to propagate the most up to date config to lower layers. """ @@ -153,7 +158,7 @@ class AdapterModuleMixin(ABC): adapter_global_cfg_key = "global_cfg" adapter_metadata_cfg_key = "adapter_meta_cfg" - def add_adapter(self, name: str, cfg: DictConfig, **kwargs): + def add_adapter(self, name: str, cfg: Union[DictConfig, AdapterConfig], **kwargs): """ Add an Adapter module to this module. @@ -520,7 +525,7 @@ def forward_single_enabled_adapter_( Perform the forward step of a single adapter module on some input data. .. note:: - + Subclasses can override this method to accommodate more complicate adapter forward steps. Args: @@ -608,7 +613,7 @@ def setup_adapters(self): f"Finished setup of adapter : '{full_adapter_name}'. Enabled: {adapter_cfg.get('enabled', True)}." ) - def add_adapter(self, name: str, cfg: DictConfig): + def add_adapter(self, name: str, cfg: Union[DictConfig, AdapterConfig]): """ Add an Adapter module to this model. @@ -758,7 +763,7 @@ def save_adapters(self, filepath: str, name: str = None): Utility method that saves only the adapter module(s), and not the entire model itself. This allows the sharing of adapters which are often just a fraction of the size of the full model, enabling easier deliver. - + .. note:: The saved file is a pytorch compatible pickle file, containing the state dicts of the adapter(s), @@ -840,7 +845,7 @@ def load_adapters(self, filepath: str, name: str = None, map_location: str = Non enabling easier deliver. .. note:: - + During restoration, assumes that the model does not currently already have an adapter with the name (if provided), or any adapter that shares a name with the state dict's modules (if name is not provided). This is to ensure that each adapter name is globally unique @@ -971,7 +976,7 @@ def adapter_module_names(self) -> List[str]: List of valid adapter modules that are supported by the model. .. note:: - + Subclasses should override this property and return a list of str names, of all the modules that they support, which will enable users to determine where to place the adapter modules. diff --git a/scripts/metric_calculation/peft_metric_calc.py b/scripts/metric_calculation/peft_metric_calc.py index 819d1f2a8c4c..ca13f83281c5 100755 --- a/scripts/metric_calculation/peft_metric_calc.py +++ b/scripts/metric_calculation/peft_metric_calc.py @@ -92,46 +92,35 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument( - '--ground-truth', - type=str, - help="ground truth .jsonl file made from /NeMo/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py", - ) - parser.add_argument( - '--preds', + '--pred-file', type=str, help="Text file with test set prompts + model predictions. Prediction file can be made by running NeMo/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py", ) parser.add_argument( - '--split-string', + '--pred-field', type=str, - help="The text at the end of the prompt, write before the predicted answer. This will be used to find the model's predictions in pred files when the pred file containers both the prompt and prediction.", - default=None, - ) # If the pred file only has preditions, just pass none + help="The field in the json file that contains the prediction tokens", + default="pred", + ) parser.add_argument( - '--answer-field', + '--ground-truth-field', type=str, help="The field in the json file that contains the ground truth tokens", - default="answer", + default="original_answers", ) args = parser.parse_args() - ground_truth_file = args.ground_truth - pred_file = args.preds + pred_file = args.pred_file scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) preds = open(pred_file, encoding="utf-8").readlines() - ground_truth = open(ground_truth_file).readlines() f1 = exact_match = total = r_score = 0 for i in range(len(preds)): - truth = json.loads(ground_truth[i]) - pred_answer = json.loads(preds[i]) - - # Need to separate out preditions from prompt, spliting on the provided "split string" - if args.split_string is not None: - pred_answer = pred_answer["sentence"].split(args.split_string)[-1].strip() + pred_line = json.loads(preds[i]) - true_answers = truth[args.answer_field] + pred_answer = pred_line[args.pred_field] + true_answers = pred_line[args.ground_truth_field] if not isinstance(true_answers, list): true_answers = [true_answers] diff --git a/tutorials/nlp/lora.ipynb b/tutorials/nlp/lora.ipynb index a0ac1e63e836..8603bbb62411 100644 --- a/tutorials/nlp/lora.ipynb +++ b/tutorials/nlp/lora.ipynb @@ -1,38 +1,45 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "Currently, this notebook must be run in a NeMo container.\n", + "An example command to launch the container:\n", + "```bash\n", + "docker run --gpus all -it --rm -v :/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 \n", + "```" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", - "execution_count": 2, - "id": "b7a434f4", - "metadata": {}, + "execution_count": null, "outputs": [], "source": [ - "BRANCH='main'\n", - "import os\n", - "import wget" - ] + "# Update megatron version to the newest.\n", + "!cd /workspace && python -m pip install -e git+https://github.com/NVIDIA/Megatron-LM#egg=megatron-core" + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", "execution_count": null, - "id": "developmental-gibraltar", - "metadata": {}, "outputs": [], "source": [ - "\"\"\"\n", - "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", - "\n", - "Instructions for setting up Colab are as follows:\n", - "1. Open a new Python 3 notebook.\n", - "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", - "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", - "4. Run this cell to set up dependencies.\n", - "\"\"\"\n", - "# If you're using Google Colab and not running locally, run this cell\n", - "\n", - "# install NeMo\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" - ] + "%cd /NeMo/tutorials/nlp\n", + "BRANCH='main'\n", + "import os\n", + "import wget\n", + "import sys\n", + "sys.path.insert(0, \"../..\") # find the local nemo first before the installed nemo" + ], + "metadata": { + "collapsed": false + } }, { "attachments": {}, @@ -42,16 +49,15 @@ "source": [ "### Introduction\n", "\n", - "In this notebook we demonstrate how to use NeMo's implementation of LoRA (Low Rank Adaptation) for fine-tuning large language models. Our implementation is based on the [paper](https://openreview.net/pdf?id=nZeVKeeFYf9) by Hu et al.\n", + "This notebook demonstrates how to apply PEFT in NeMo. For brevity, we have chosen LoRA as the PEFT technique and GPT as the language model, but the same recipe can be used for other PEFT techniques and language models, as described in the [Training](#training) section.\n", + "\n", + " The implementation of LoRA is based on the paper, [LoRA: Low-Rank Adaptation of Large Language Models](https://openreview.net/pdf?id=nZeVKeeFYf9) by Hu et al.\n", + "\n", + "This example demonstrates how to:\n", "\n", - "We are going to show you how to:\n", - " \n", " 1. Train a LoRA model on a simple Extractive QA task.\n", " 2. Inspect the trained LoRA model showing the parameters it contains.\n", - " 3. Run inference with the based model with the LoRA parameters.\n", - " 4. Merge the LoRA parameters into the base model and run inference again on the merged model.\n", - "\n", - "In this tutorial we will be focusing on LoRA, but the training and evaluation methods described here will be applicable for other Parameter-efficient Fine tuning (PEFT) methods in NeMo." + " 3. Run inference with the base model with the LoRA parameters." ] }, { @@ -79,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "0dbd41fd", "metadata": {}, "outputs": [], @@ -87,7 +93,9 @@ "# You can replace DATA_DIR and NEMO_DIR with your own locations\n", "DATA_DIR = \"data\"\n", "NEMO_DIR = \".\"\n", - "os.makedirs(DATA_DIR, exist_ok=True)" + "os.makedirs(DATA_DIR, exist_ok=True)\n", + "SQUAD_DIR = os.path.join(DATA_DIR, \"SQuAD\")\n", + "os.makedirs(SQUAD_DIR, exist_ok=True)" ] }, { @@ -102,19 +110,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "e72a1dc1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File ‘prompt_learning_squad_preprocessing.py’ already there; not retrieving.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# download the preprocessing scripts from github for the purpose of this tutorial\n", "! wget -nc https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/scripts/dataset_processing/nlp/squad/prompt_learning_squad_preprocessing.py" @@ -131,43 +130,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "fa16d8ac", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-05-30 14:07:23-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.109.153, 185.199.111.153, 185.199.108.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.109.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 30288272 (29M) [application/json]\n", - "Saving to: ‘train-v1.1.json’\n", - "\n", - "train-v1.1.json 100%[===================>] 28.88M 84.3MB/s in 0.3s \n", - "\n", - "2023-05-30 14:07:25 (84.3 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", - "\n", - "--2023-05-30 14:07:26-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.110.153, 185.199.108.153, 185.199.111.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.110.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 4854279 (4.6M) [application/json]\n", - "Saving to: ‘dev-v1.1.json’\n", - "\n", - "dev-v1.1.json 100%[===================>] 4.63M --.-KB/s in 0.1s \n", - "\n", - "2023-05-30 14:07:27 (43.8 MB/s) - ‘dev-v1.1.json’ saved [4854279/4854279]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "SQUAD_DIR = os.path.join(DATA_DIR, \"SQuAD\")\n", - "os.makedirs(SQUAD_DIR, exist_ok=True)\n", - "\n", "# Download the SQuAD dataset\n", "!wget -nc https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", "!wget -nc https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\n", @@ -177,25 +144,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "64e3e25b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Saving train split to data/SQuAD/squad_train.jsonl\n", - "100%|█████████████████████████████████| 87599/87599 [00:00<00:00, 204336.27it/s]\n", - "Saving val split to data/SQuAD/squad_val.jsonl\n", - "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 158654.55it/s]\n", - "Saving test split to data/SQuAD/squad_test_ground_truth.jsonl\n", - "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 183040.92it/s]\n", - "Saving test split to data/SQuAD/squad_test.jsonl\n", - "100%|█████████████████████████████████| 10570/10570 [00:00<00:00, 196367.94it/s]\n" - ] - } - ], + "outputs": [], "source": [ "# Preprocess squad data\n", "!python prompt_learning_squad_preprocessing.py --sft-format --data-dir {SQUAD_DIR}" @@ -203,25 +155,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "b562d1de", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the AFC at Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Denver Broncos\"}\n", - "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the NFC at Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Carolina Panthers\"}\n", - "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Where did Super Bowl 50 take place?\\n\\nAssistant:\", \"output\": \"Santa Clara, California\"}\n", - "{\"input\": \"User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \\\"golden anniversary\\\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \\\"Super Bowl L\\\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team won Super Bowl 50?\\n\\nAssistant:\", \"output\": \"Denver Broncos\"}\n", - "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\\n\\nAssistant:\", \"output\": \"Saint Bernadette Soubirous\"}\n", - "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:What is in front of the Notre Dame Main Building?\\n\\nAssistant:\", \"output\": \"a copper statue of Christ\"}\n", - "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:The Basilica of the Sacred heart at Notre Dame is beside to which structure?\\n\\nAssistant:\", \"output\": \"the Main Building\"}\n", - "{\"input\": \"User: Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \\\"Venite Ad Me Omnes\\\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary. Question:What is the Grotto at Notre Dame?\\n\\nAssistant:\", \"output\": \"a Marian place of prayer and reflection\"}\n" - ] - } - ], + "outputs": [], "source": [ "# What the squad dataset looks like after processing\n", "! head -200 $SQUAD_DIR/squad_train.jsonl > $SQUAD_DIR/squad_short_train.jsonl\n", @@ -237,12 +174,14 @@ "metadata": {}, "source": [ "### Model Config Setup\n", - "Now we will begin setting up the config file needed for PEFT tuning. We use a single config for all supported PEFT methods (LoRA, Adapter and P-Tuning). All PEFT methods use classes defined in [megatron_gpt_peft_models.py](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py). All PEFT Classes inherit from `MegatronGPTSFTModel` which is the class that governs instruction tuning." + "Now we will begin setting up the config file needed for PEFT tuning. We use a single config for all supported PEFT methods (LoRA, Adapter, IA3 and P-Tuning, as well as combinations of these). All PEFT methods use the GPT finetuning class `MegatronGPTSFTModel` as the frozen base network, and use the `add_adapter()` method to add adapter weights for PEFT.\n", + "\n", + "Let's create a config object for LoRA training." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5749c387", "metadata": {}, "outputs": [], @@ -266,15 +205,15 @@ "id": "ce966bcf", "metadata": {}, "source": [ - "The `config` contains several attributes required by the `MegatronGPTPEFTModel`. First we will set the training data path and the validation data path in the config.\n", - "The `config` allows us to set a list of `jsonl` files as training files and sample examples from each file with different probabilities. For simplicity we are going to use just one training file and thus the sampling probability is set to `1.0`\n", + "The `config` contains several attributes required by the `MegatronGPTSFTModel`. First we will set the training data path and the validation data path in the config.\n", + "The `config` allows us to set a list of `jsonl` files as training files and sample examples from each file with different probabilities. For simplicity, we are going to use just one training file and thus the sampling probability is set to `1.0`\n", "\n", "We can also monitor validation loss from multiple validation files during training. Again for simplicity we will use just one validation file." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "6bb1590f", "metadata": {}, "outputs": [], @@ -292,7 +231,7 @@ "metadata": {}, "source": [ "### PEFT Config\n", - "The attribute [config.model.peft](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml#L78) contains settings that control the PEFT training method and its related hyperpameters. We currently support `lora`, `adapters`, `ptuning` and `ia3`. We can instruct the training script to use one of these methods by setting the config.model.peft.peft_scheme attribute.\n", + "The attribute [config.model.peft](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml#L78) contains settings that control the PEFT training method and its related hyperpameters. We currently support `lora`, `adapter`, `ptuning` and `ia3`. We can instruct the training script to use one of these methods by setting the config.model.peft.peft_scheme attribute.\n", "\n", "The other hyperparams associated with lora tuning are present in the [config.model.peft.lora_tuning](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml#L92) attribute." ] @@ -324,12 +263,12 @@ "metadata": {}, "source": [ "### Prompt Formatting\n", - "The `config.model.data.train_ds.prompt_template` attribute allows us to further tweak the format of the input and output if needed. In this example, we have \"encoding\" our format inside the `jsonl` file directly. So we can keep the `prompt_template` in the config simple.(See previous section on Data Preparation). " + "The `config.model.data.train_ds.prompt_template` attribute allows us to further tweak the format of the input and output if needed. In this example, we have already incorporated our format inside the `jsonl` file during preprocessing, so we can keep the `prompt_template` in the config simple. (See previous section on Data Preparation)." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "1b6aa5c7", "metadata": {}, "outputs": [], @@ -349,29 +288,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "48cdf868", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:08:23 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n", - "[NeMo W 2023-05-30 14:08:24 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n" - ] - }, - { - "data": { - "text/plain": [ - "'https://api.ngc.nvidia.com/v2/models/nvidia/nemo/megatron_gpt_345m/versions/1/files/megatron_gpt_345m.nemo'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Check what GPT .nemo models we have available on NGC\n", "from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel\n", @@ -391,26 +311,28 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "364439a1", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File ‘./megatron_gpt_345m.nemo’ already there; not retrieving.\n" - ] - } - ], + "outputs": [], "source": [ "# Download the model from NGC\n", - "gpt_file_name = \"megatron_gpt_345m.nemo\"\n", - "!wget -nc --content-disposition {megatron_gpt_345m_nemo_url} -O {NEMO_DIR}/{gpt_file_name}" + "gpt_file_name = \"megatron_gpt_345m.nemo\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!wget -nc --content-disposition {megatron_gpt_345m_nemo_url} -O {NEMO_DIR}/{gpt_file_name}" + ], + "metadata": { + "collapsed": false + } + }, { "attachments": {}, "cell_type": "markdown", @@ -422,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "2778a5fa", "metadata": {}, "outputs": [], @@ -442,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "a278cbdf", "metadata": {}, "outputs": [], @@ -468,151 +390,12 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "12a37ada", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seed: 1234\n", - "tensor_model_parallel_size: 1\n", - "pipeline_model_parallel_size: 1\n", - "global_batch_size: 4\n", - "micro_batch_size: 1\n", - "restore_from_path: megatron_gpt_345m.nemo\n", - "resume_from_checkpoint: null\n", - "save_nemo_on_validation_end: false\n", - "sync_batch_comm: false\n", - "megatron_amp_O2: false\n", - "sequence_parallel: false\n", - "activations_checkpoint_granularity: null\n", - "activations_checkpoint_method: null\n", - "activations_checkpoint_num_layers: null\n", - "answer_only_loss: true\n", - "gradient_as_bucket_view: false\n", - "hidden_dropout: 0.0\n", - "attention_dropout: 0.0\n", - "ffn_dropout: 0.0\n", - "peft:\n", - " peft_scheme: adapter\n", - " restore_from_path: null\n", - " adapter_tuning:\n", - " type: parallel_adapter\n", - " adapter_dim: 32\n", - " adapter_dropout: 0.0\n", - " norm_position: pre\n", - " column_init_method: xavier\n", - " row_init_method: zero\n", - " norm_type: mixedfusedlayernorm\n", - " lora_tuning:\n", - " adapter_dim: 32\n", - " adapter_dropout: 0.0\n", - " column_init_method: xavier\n", - " row_init_method: zero\n", - " p_tuning:\n", - " virtual_tokens: 10\n", - " bottleneck_dim: 1024\n", - " embedding_dim: 1024\n", - " init_std: 0.023\n", - "data:\n", - " train_ds:\n", - " file_names:\n", - " - data/SQuAD/squad_short_train.jsonl\n", - " global_batch_size: ${model.global_batch_size}\n", - " micro_batch_size: ${model.micro_batch_size}\n", - " shuffle: true\n", - " num_workers: 0\n", - " pin_memory: true\n", - " max_seq_length: 2048\n", - " min_seq_length: 1\n", - " drop_last: true\n", - " concat_sampling_probabilities:\n", - " - 1.0\n", - " context_key: input\n", - " label_key: output\n", - " add_eos: true\n", - " add_sep: false\n", - " add_bos: false\n", - " separate_prompt_and_response_with_newline: false\n", - " truncation_field: context\n", - " index_mapping_dir: null\n", - " prompt_template: '{input} {output}'\n", - " validation_ds:\n", - " file_names:\n", - " - data/SQuAD/squad_short_val.jsonl\n", - " names:\n", - " - squad_val\n", - " global_batch_size: ${model.global_batch_size}\n", - " micro_batch_size: ${model.micro_batch_size}\n", - " shuffle: false\n", - " num_workers: 0\n", - " pin_memory: true\n", - " max_seq_length: 2048\n", - " min_seq_length: 1\n", - " drop_last: false\n", - " context_key: input\n", - " label_key: output\n", - " add_eos: ${model.data.train_ds.add_eos}\n", - " add_sep: ${model.data.train_ds.add_sep}\n", - " add_bos: ${model.data.train_ds.add_bos}\n", - " separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline}\n", - " write_predictions_to_file: false\n", - " output_file_path_prefix: null\n", - " truncation_field: context\n", - " index_mapping_dir: null\n", - " prompt_template: ${model.data.train_ds.prompt_template}\n", - " metric:\n", - " name: loss\n", - " average: null\n", - " num_classes: null\n", - "test_ds:\n", - " file_names: null\n", - " names: null\n", - " global_batch_size: ${model.global_batch_size}\n", - " micro_batch_size: ${model.micro_batch_size}\n", - " shuffle: false\n", - " num_workers: 4\n", - " pin_memory: true\n", - " max_seq_length: 2048\n", - " min_seq_length: 1\n", - " drop_last: false\n", - " context_key: input\n", - " label_key: output\n", - " add_eos: ${model.data.train_ds.add_eos}\n", - " add_sep: ${model.data.train_ds.add_sep}\n", - " add_bos: ${model.data.train_ds.add_bos}\n", - " separate_prompt_and_response_with_newline: ${model.data.train_ds.separate_prompt_and_response_with_newline}\n", - " write_predictions_to_file: false\n", - " output_file_path_prefix: null\n", - " truncation_field: context\n", - " index_mapping_dir: null\n", - " prompt_template: ${model.data.train_ds.prompt_template}\n", - " metric:\n", - " name: loss\n", - " average: null\n", - " num_classes: null\n", - "optim:\n", - " name: fused_adam\n", - " lr: 0.0001\n", - " weight_decay: 0.01\n", - " betas:\n", - " - 0.9\n", - " - 0.98\n", - " sched:\n", - " name: CosineAnnealing\n", - " warmup_steps: 50\n", - " min_lr: 0.0\n", - " constant_steps: 0\n", - " monitor: val_loss\n", - " reduce_on_plateau: false\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Final model config\n", "print(OmegaConf.to_yaml(config.model))" @@ -632,49 +415,15 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "90f85b2a", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit None Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Trainer config - \n", - "\n", - "devices: 1\n", - "accelerator: gpu\n", - "num_nodes: 1\n", - "precision: 16\n", - "logger: false\n", - "enable_checkpointing: false\n", - "use_distributed_sampler: false\n", - "max_epochs: 4\n", - "max_steps: 100\n", - "log_every_n_steps: 10\n", - "val_check_interval: 1.0\n", - "gradient_clip_val: 1.0\n", - "\n" - ] - } - ], + "outputs": [], "source": [ + "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n", "import torch\n", "import pytorch_lightning as pl\n", - "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n", - "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n", + "from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder\n", "\n", "# let's modify some trainer configs\n", "# check if we have GPU available and uses it\n", @@ -688,16 +437,11 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# setup cluster environment parameters\"\n", - "# use torch elastic cluster environment so `create_process_externally` is True\n", - "# the launcher is set to None. It will not try to spawn new processes.\n", - "# It won't create the misconfiguration error because of the `interactive session`\n", "os.environ[\"LOCAL_RANK\"] = '0'\n", "os.environ[\"RANK\"] = '0'\n", "os.environ[\"WORLD_SIZE\"] = '1'\n", "\n", - "strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", - "plugins = [TorchElasticEnvironment()]\n", - "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n", + "trainer = MegatronTrainerBuilder(config).create_trainer()\n", "\n", "print(\"Trainer config - \\n\")\n", "print(OmegaConf.to_yaml(config.trainer))" @@ -726,41 +470,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "f2c943ba", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo E 2023-05-30 14:09:17 exp_manager:646] exp_manager received explicit_log_dir: training_info and at least one of exp_dir: ./peft_lora, or version: None. Please note that exp_dir, name, and version will be ignored.\n", - "[NeMo W 2023-05-30 14:09:17 exp_manager:651] Exp_manager is logging to training_info, but it already exists.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:17 exp_manager:374] Experiments will be logged at training_info\n", - "[NeMo I 2023-05-30 14:09:17 exp_manager:797] TensorboardLogger has been set up\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:17 exp_manager:893] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "training_info\n" - ] - } - ], + "outputs": [], "source": [ "from nemo.utils.exp_manager import exp_manager\n", "\n", @@ -780,423 +493,71 @@ "id": "298b3dce", "metadata": {}, "source": [ - "### LoRA Training\n", - "We now set up the process for training a LoRA model. We first require a config that contains details about the base language model upon which we will train our LoRA model. So we first extract the `base_model_cfg`" + "### Training\n", + "We now set up the process for training a LoRA model. We first require a config that contains details about the base language model upon which we will train our LoRA model. So we first extract the `model_cfg` from the checkpoint and update it with any new settings we employ in our current (LoRA) `config`. These are combined in the `merge_cfg_with` function.\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "edb38445", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:30 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n" - ] - } - ], + "outputs": [], "source": [ - "from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTModel\n", - "from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector, PEFTSaveRestoreConnector\n", - "base_model_save_restore_connector = NLPSaveRestoreConnector()\n", - "base_model_cfg = MegatronGPTModel.restore_from(\n", - " restore_path=config.model.restore_from_path,\n", - " trainer=trainer,\n", - " return_config=True,\n", - " save_restore_connector=base_model_save_restore_connector,\n", - " )" + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel\n", + "\n", + "model_cfg = MegatronGPTSFTModel.merge_cfg_with(config.model.restore_from_path, config)" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "16bace39", + "id": "dfc55a1c", "metadata": {}, "source": [ - "Next, we update the `base_model_cfg` with any new settings we employ in our current (LoRA) `config`." + "Next, we instantiate the GPT model class and add the LoRA adapter\n", + "When we call `add_adapter`, the model prints out the parameter count before and after the operation. We can clearly see the number of trainable parameters increase after adding the adapter.\n", + "To print the parameter count manually, we can call `model.summarize()`." ] }, { "cell_type": "code", - "execution_count": 19, - "id": "fd350dbc", + "execution_count": null, + "id": "a81d8741", "metadata": {}, "outputs": [], "source": [ - "from omegaconf.omegaconf import open_dict\n", - "from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTLoRAModel\n", - "OmegaConf.set_struct(base_model_cfg, True)\n", - "OmegaConf.resolve(config)\n", - "with open_dict(base_model_cfg):\n", - " base_model_cfg.megatron_amp_O2 = config.model.get('megatron_amp_O2', False)\n", - " base_model_cfg.micro_batch_size = config.model.data.train_ds.micro_batch_size\n", - " base_model_cfg.global_batch_size = config.model.data.train_ds.global_batch_size\n", - " base_model_cfg.sequence_parallel = config.model.get(\"sequence_parallel\", False)\n", - " base_model_cfg.data = config.model.data\n", - " base_model_cfg.optim = config.model.optim\n", - " base_model_cfg.precision = config.trainer.precision\n", - " base_model_cfg.answer_only_loss = config.model.answer_only_loss\n", - " base_model_cfg.restore_from_path = config.model.restore_from_path\n", - " base_model_cfg.resume_from_checkpoint = config.model.resume_from_checkpoint\n", - " base_model_cfg.save_nemo_on_validation_end = config.model.save_nemo_on_validation_end\n", - " base_model_cfg.peft = config.model.peft\n", - " base_model_cfg.target = f\"{MegatronGPTLoRAModel.__module__}.{MegatronGPTLoRAModel.__name__}\"" + "from nemo.collections.nlp.parts.peft_config import LoraPEFTConfig\n", + "\n", + "model = MegatronGPTSFTModel.restore_from(config.model.restore_from_path, model_cfg, trainer=trainer)\n", + "model.add_adapter(LoraPEFTConfig(model_cfg))\n", + "# print(\"Parameter count manually:\\n\", model.summarize())" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "dfc55a1c", - "metadata": {}, "source": [ - "Next, we instantiate the LoRA model class" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "a81d8741", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:39 megatron_init:232] Rank 0 has data parallel group: [0]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:235] All data parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:236] Ranks 0 has data parallel rank: 0\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:244] Rank 0 has model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:245] All model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:255] Rank 0 has tensor model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:259] All tensor model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:260] Rank 0 has tensor model parallel rank: 0\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:274] Rank 0 has pipeline model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:286] Rank 0 has embedding group: [0]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:292] All pipeline model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:293] Rank 0 has pipeline model parallel rank 0\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:294] All embedding group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:09:39 megatron_init:295] Rank 0 has embedding rank: 0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:39 modelPT:244] You tried to register an artifact under config key=tokenizer.vocab_file but an artifact for it has already been registered.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:39 tokenizer_utils:204] Getting Megatron tokenizer for pretrained model name: megatron-gpt-345m, custom vocab file: /tmp/tmp1qljai9b/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, and merges file: /tmp/tmp1qljai9b/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt\n", - "[NeMo I 2023-05-30 14:09:39 tokenizer_utils:130] Getting HuggingFace AutoTokenizer with pretrained_model_name: gpt2, vocab_file: /tmp/tmp1qljai9b/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, merges_files: /tmp/tmp1qljai9b/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt, special_tokens_dict: {}, and use_fast: False\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using sep_token, but it is not set yet.\n", - "Using cls_token, but it is not set yet.\n", - "Using pad_token, but it is not set yet.\n", - "Using mask_token, but it is not set yet.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:40 megatron_base_model:238] Padded vocab_size: 50304, original vocab_size: 50257, dummy tokens: 47.\n", - "[NeMo I 2023-05-30 14:09:41 megatron_gpt_peft_models:56] Before adding PEFT params:\n", - " | Name | Type | Params\n", - " -----------------------------------\n", - " 0 | model | GPTModel | 354 M \n", - " -----------------------------------\n", - " 354 M Trainable params\n", - " 0 Non-trainable params\n", - " 354 M Total params\n", - " 1,419.485 Total estimated model params size (MB)\n", - "[NeMo I 2023-05-30 14:09:41 megatron_gpt_peft_models:65] After adding PEFT params:\n", - " | Name | Type | Params\n", - " -----------------------------------\n", - " 0 | model | GPTModel | 358 M \n", - " -----------------------------------\n", - " 358 M Trainable params\n", - " 0 Non-trainable params\n", - " 358 M Total params\n", - " 1,432.068 Total estimated model params size (MB)\n", - "[NeMo I 2023-05-30 14:09:42 nlp_overrides:491] Model MegatronGPTLoRAModel was successfully restored from /home/adithyare/NeMo/tutorials/nlp/megatron_gpt_345m.nemo.\n" - ] - } + "Simply substitute with the `MegatronT5SFTModel` class to use T5 instead of GPT.\n", + "\n", + "To use a different PEFT method, you can use a different config class in place of `LoraPEFTConfig`, such as `CanonicalAdaptersPEFTConfig`, `IA3PEFTConfig`, `PtuningPEFTConfig`. You can also use a combination of the methods by passing in a list:\n", + "`model.add_adapter([LoraPEFTConfig(model_cfg), PtuningPEFTConfig(model_cfg)])`\n", + "\n", + "We're now ready to start training." ], - "source": [ - "from nemo.collections.nlp.parts.nlp_overrides import PEFTSaveRestoreConnector\n", - "peft_save_restore_connector = PEFTSaveRestoreConnector(\n", - " peft_model_nemo_path=None, peft_model_ckpt_path=None\n", - " )\n", - "model = MegatronGPTLoRAModel.restore_from(\n", - " restore_path=config.model.restore_from_path,\n", - " trainer=trainer,\n", - " override_config_path=base_model_cfg,\n", - " save_restore_connector=peft_save_restore_connector,\n", - ")" - ] + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "2d99f433", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:175: UserWarning: The `batch_idx` argument in `MegatronGPTLoRAModel.on_train_batch_start` hook may not match with the actual batch index when using a `dataloader_iter` argument in your `training_step`.\n", - " rank_zero_warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:175: UserWarning: The `batch_idx` argument in `MegatronGPTLoRAModel.on_train_batch_end` hook may not match with the actual batch index when using a `dataloader_iter` argument in your `training_step`.\n", - " rank_zero_warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/lightning_fabric/plugins/environments/torchelastic.py:36: UserWarning: MASTER_ADDR environment variable is not defined. Set as localhost\n", - " rank_zero_warn(\"MASTER_ADDR environment variable is not defined. Set as localhost\")\n", - " \n", - "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/lightning_fabric/plugins/environments/torchelastic.py:44: UserWarning: MASTER_PORT environment variable is not defined. Set as 12910\n", - " rank_zero_warn(\"MASTER_PORT environment variable is not defined. Set as 12910\")\n", - " \n", - "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", - "----------------------------------------------------------------------------------------------------\n", - "distributed_backend=nccl\n", - "All distributed processes registered. Starting with 1 processes\n", - "----------------------------------------------------------------------------------------------------\n", - "\n", - "You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", - "[NeMo W 2023-05-30 14:09:46 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:613: UserWarning: Checkpoint directory /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints exists and is not empty.\n", - " rank_zero_warn(f\"Checkpoint directory {dirpath} exists and is not empty.\")\n", - " \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:46 megatron_gpt_sft_model:634] Building GPT SFT validation datasets.\n", - "[NeMo I 2023-05-30 14:09:46 text_memmap_dataset:104] Building data files\n", - "[NeMo I 2023-05-30 14:09:46 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.360761\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:114] Loading data files\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:205] Loading data/SQuAD/squad_short_val.jsonl\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.002361\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:121] Computing global indices\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:637] Length of val dataset: 20\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:648] Building GPT SFT traing datasets.\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:104] Building data files\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.299554\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:114] Loading data files\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:205] Loading data/SQuAD/squad_short_train.jsonl\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.001065\n", - "[NeMo I 2023-05-30 14:09:47 text_memmap_dataset:121] Computing global indices\n", - "[NeMo I 2023-05-30 14:09:47 dataset_utils:1341] > loading indexed mapping from data/SQuAD/squad_short_train.jsonl_squad_short_train.jsonl_indexmap_402mns_2046msl_0.00ssp_1234s.npy\n", - "[NeMo I 2023-05-30 14:09:47 dataset_utils:1344] loaded indexed file in 0.001 seconds\n", - "[NeMo I 2023-05-30 14:09:47 dataset_utils:1345] total number of samples: 600\n", - "make: Entering directory '/home/adithyare/NeMo/nemo/collections/nlp/data/language_modeling/megatron'\n", - "make: Nothing to be done for 'default'.\n", - "make: Leaving directory '/home/adithyare/NeMo/nemo/collections/nlp/data/language_modeling/megatron'\n", - "[NeMo I 2023-05-30 14:09:47 blendable_dataset:67] > elapsed time for building blendable dataset indices: 0.09 (sec)\n", - "> building indices for blendable datasets ...\n", - " > sample ratios:\n", - " dataset 0, input: 1, achieved: 1\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:650] Length of train dataset: 402\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:655] Building dataloader with consumed samples: 0\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_sft_model:655] Building dataloader with consumed samples: 0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:09:47 nlp_overrides:124] Configuring DDP for model parallelism.\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 adapter_mixins:430] Unfrozen adapter : lora_kqv_adapter\n", - "[NeMo I 2023-05-30 14:09:47 megatron_gpt_peft_models:130] Optimizer groups set:\n", - " | Name | Type | Params\n", - " -----------------------------------\n", - " 0 | model | GPTModel | 358 M \n", - " -----------------------------------\n", - " 3.1 M Trainable params\n", - " 354 M Non-trainable params\n", - " 358 M Total params\n", - " 716.034 Total estimated model params size (MB)\n", - "[NeMo I 2023-05-30 14:09:47 modelPT:721] Optimizer config = FusedAdam (\n", - " Parameter Group 0\n", - " betas: [0.9, 0.98]\n", - " bias_correction: True\n", - " eps: 1e-08\n", - " lr: 0.0001\n", - " weight_decay: 0.01\n", - " )\n", - "[NeMo I 2023-05-30 14:09:47 lr_scheduler:910] Scheduler \"\" \n", - " will be used during training (effective maximum steps = 100) - \n", - " Parameters : \n", - " (warmup_steps: 50\n", - " min_lr: 0.0\n", - " constant_steps: 0\n", - " max_steps: 100\n", - " )\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " | Name | Type | Params\n", - "-----------------------------------\n", - "0 | model | GPTModel | 358 M \n", - "-----------------------------------\n", - "3.1 M Trainable params\n", - "354 M Non-trainable params\n", - "358 M Total params\n", - "716.034 Total estimated model params size (MB)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3cb87a7b9d4b46e4a0fb0f0670351fbd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Sanity Checking: 0it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", - " rank_zero_warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py:401: UserWarning: Found `dataloader_iter` argument in the `validation_step`. Note that the support for this signature is experimental and the behavior is subject to change.\n", - " rank_zero_warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:48 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/apex/transformer/pipeline_parallel/utils.py:81: UserWarning: This function is only for unittest\n", - " warnings.warn(\"This function is only for unittest\")\n", - " \n", - "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('val_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", - " warning_cache.warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('validation_loss_squad_val', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", - " warning_cache.warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:536: PossibleUserWarning: It is recommended to use `self.log('validation_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.\n", - " warning_cache.warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", - " rank_zero_warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:49 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:344: UserWarning: Found `dataloader_iter` argument in the `training_step`. Note that the support for this signature is experimental and the behavior is subject to change.\n", - " rank_zero_warn(\n", - " \n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c7a473adeca64c828d2a1338dab1e76b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Training: 0it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:232: UserWarning: You called `self.log('global_step', ...)` in your `training_step` but the value needs to be floating point. Converting it to torch.float32.\n", - " warning_cache.warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:232: UserWarning: You called `self.log('consumed_samples', ...)` in your `training_step` but the value needs to be floating point. Converting it to torch.float32.\n", - " warning_cache.warn(\n", - " \n", - "[NeMo W 2023-05-30 14:09:51 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:139: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", - " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", - " \n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a0606700c7ab495eb08ed88c16949569", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Validation: 0it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Epoch 0, global step 100: 'validation_loss' reached 0.30823 (best 0.30823), saving model to '/home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt' as top 1\n", - "Metric val_loss improved. New best score: 0.308\n", - "`Trainer.fit` stopped: `max_steps=100` reached.\n", - "Restoring states from the checkpoint path at /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt\n", - "Restored all states from the checkpoint file at /home/adithyare/NeMo/tutorials/nlp/training_info/checkpoints/lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt\n" - ] - } - ], + "outputs": [], "source": [ - "# Training set to 2 epochs by default in a cell above\n", "trainer.fit(model)" ] }, @@ -1206,31 +567,15 @@ "id": "b8210d6d", "metadata": {}, "source": [ - "Once training is completed you should see a saved '.nemo' file in this folder `{config.exp_manager.explicit_log_dir}/checkpoints`" + "Once training is completed you should see a saved '.nemo' file in this folder `{config.exp_manager.explicit_log_dir}/checkpoints`. This checkpoint will only contain the trained adapter weights, and not the frozen base model weights." ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "e4e19e65", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 230M\n", - "-rw-rw-r-- 1 adithyare adithyare 14M May 30 14:10 lora_example_tuning.nemo\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 27 09:47 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0.ckpt'\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 27 09:47 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last.ckpt'\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 30 11:12 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last-v1.ckpt'\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 30 14:10 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-last-v2.ckpt'\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 30 11:12 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v1.ckpt'\n", - "-rw-rw-r-- 1 adithyare adithyare 37M May 30 14:10 'lora_example_tuning--validation_loss=0.308-step=100-consumed_samples=396.0-v2.ckpt'\n", - "training_info\n" - ] - } - ], + "outputs": [], "source": [ "# The trained '.nemo' model is saved in the location below:\n", "! ls -lh {config.exp_manager.explicit_log_dir}/checkpoints\n", @@ -1244,21 +589,62 @@ "metadata": {}, "source": [ "### Inference\n", - "The model object from `trainer.fit(model)` is also capable of doing inference. But for the tutorial we will re-load the saved `.nemo` lora model along with a `.nemo` base language model to simulate a more realistic scenario (where training does not happen right before inference).\n", + "The model object from `trainer.fit(model)` is also capable of doing inference. For the tutorial, however, we will re-load the saved `.nemo` lora model along with a `.nemo` base language model to simulate a more realistic scenario (where training does not happen right before inference).\n", "\n", - "First, we will load and modify a config file that will be used for inference." + "Run the cell below to reimport libraries and classes in case you did not run the training cells above." ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, + "outputs": [], + "source": [ + "# reimport libraries and classes in case one wants to only run cells from the Inference section\n", + "%cd /NeMo/tutorials/nlp\n", + "import wget, os, sys\n", + "sys.path.insert(0, \"../..\") # find the local nemo first before the installed nemo\n", + "from omegaconf import OmegaConf\n", + "from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder\n", + "from nemo.collections.nlp.parts.peft_config import LoraPEFTConfig\n", + "from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel\n", + "\n", + "NEMO_DIR = \".\"\n", + "DATA_DIR = \"data\"\n", + "CONFIG_DIR = os.path.join(NEMO_DIR, \"conf\")\n", + "SQUAD_DIR = os.path.join(DATA_DIR, \"SQuAD\")\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "First, we will load and modify a config file that will be used for inference.\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Download the example config file\n", + "wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml', CONFIG_DIR)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, "id": "41ab98a9", "metadata": {}, "outputs": [], "source": [ - "# Download the example config file\n", - "wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml', CONFIG_DIR)\n", - "\n", "# Load the example config file so we can start editing it\n", "CONFIG_EVAL_PATH = os.path.join(CONFIG_DIR, \"megatron_gpt_peft_eval_config.yaml\")\n", "config_eval = OmegaConf.load(CONFIG_EVAL_PATH)" @@ -1277,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "64a4e71a", "metadata": {}, "outputs": [], @@ -1294,28 +680,12 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "d8ace8f9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit None Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n" - ] - } - ], + "outputs": [], "source": [ - "strategy_eval = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", - "plugins_eval = [TorchElasticEnvironment()]\n", - "# notice the plugins, strategy and config.trainer args are the same as is training portion of this tutorial\n", - "# we just create a new object with no overlap from the training section of this tutorial\n", - "trainer_eval = pl.Trainer(plugins= plugins_eval, strategy=strategy_eval, **config_eval.trainer) " + "trainer_eval = MegatronTrainerBuilder(config_eval).create_trainer()" ] }, { @@ -1324,63 +694,47 @@ "id": "e745ac5e", "metadata": {}, "source": [ - "The `config_eval` object is the hydra config at \"inference/test time\". This means it should contain information relevant for inference/test time. But we still need to know some properties that were set at training time. For example, was the training done with `BOS` enabled or not? And other model specific attributes.\n", + "The `config_eval` object is the hydra config at \"inference/test time\". This means it should contain information relevant for inference/test time, although some properties that were set at training time are still relevant. For example, whether training was done with `BOS` enabled or not, and other model specific attributes.\n", "\n", - "So we extract the `peft_model_cfg` from the '.nemo' file of the lora model we just trained." + "So we extract the relevant information from the '.nemo' file of the lora model we just trained using the `merge_inference_cfg` function." ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "e04a2201", "metadata": {}, "outputs": [], "source": [ - "from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel\n", - "peft_model_cfg = MegatronGPTPEFTModel.restore_from(\n", - " restore_path=\"./training_info/checkpoints/lora_example_tuning.nemo\", trainer=trainer_eval, return_config=True,\n", - ")" + "eval_model_cfg = MegatronGPTSFTModel.merge_inference_cfg(config_eval.model.peft.restore_from_path, config_eval)" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "79a17ac7", - "metadata": {}, "source": [ - "We modify `peft_model_cfg` to include attributes from the `config_eval` that are specific to inference time." - ] + "The cell below is required if you are running the notebook end-to-end, and if you use a different batch size for training and evaluation. In this case, the microbatch calculator needs to be rest. If you are running training only or inference only, feel free to ignore this cell." + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": 27, - "id": "0e0a17aa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'file_names': ['data/SQuAD/squad_short_val.jsonl'], 'names': ['test_set'], 'global_batch_size': 1, 'micro_batch_size': 1, 'shuffle': False, 'num_workers': 0, 'pin_memory': True, 'max_seq_length': 2048, 'min_seq_length': 1, 'drop_last': False, 'context_key': '${data.train_ds.context_key}', 'label_key': '${data.train_ds.label_key}', 'add_eos': '${data.train_ds.add_eos}', 'add_sep': '${data.train_ds.add_sep}', 'add_bos': '${data.train_ds.add_bos}', 'separate_prompt_and_response_with_newline': '${data.train_ds.separate_prompt_and_response_with_newline}', 'write_predictions_to_file': False, 'output_file_path_prefix': None, 'truncation_field': '${data.train_ds.truncation_field}', 'index_mapping_dir': None, 'prompt_template': '${data.train_ds.prompt_template}', 'tokens_to_generate': 30, 'metric': {'name': 'loss', 'average': None, 'num_classes': None}}\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ - "with open_dict(peft_model_cfg):\n", - " # update the model config of the trained model with params we want to set at inference time.\n", - " peft_model_cfg.precision = config_eval.trainer.precision\n", - " peft_model_cfg.data.test_ds = config_eval.model.data.test_ds\n", - " peft_model_cfg.activations_checkpoint_granularity = None\n", - " peft_model_cfg.activations_checkpoint_method = None\n", - "\n", - "with open_dict(config_eval):\n", - " # update the config with the trained model config\n", - " # required for hydra interpolation to work inside cfg.inference\n", - " config_eval.inference.add_BOS = peft_model_cfg.data.test_ds.add_bos\n", - " config_eval.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate\n", - "\n", - "print(peft_model_cfg.data.test_ds)" - ] + "from apex.transformer.pipeline_parallel.utils import _reconfigure_microbatch_calculator\n", + "_reconfigure_microbatch_calculator(\n", + " rank=0,\n", + " rampup_batch_size=None,\n", + " global_batch_size=config_eval.model.global_batch_size,\n", + " micro_batch_size=config_eval.model.micro_batch_size,\n", + " data_parallel_size=1,\n", + ")" + ], + "metadata": { + "collapsed": false + } }, { "attachments": {}, @@ -1388,101 +742,21 @@ "id": "132ae378", "metadata": {}, "source": [ - "Next, we load the base language model as well as the lora model we just trained." + "Then, we load the base language model as well as the lora model we just trained." ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "b19cd0ce", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:11:11 megatron_init:232] Rank 0 has data parallel group: [0]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:235] All data parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:236] Ranks 0 has data parallel rank: 0\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:244] Rank 0 has model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:245] All model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:255] Rank 0 has tensor model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:259] All tensor model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:260] Rank 0 has tensor model parallel rank: 0\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:274] Rank 0 has pipeline model parallel group: [0]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:286] Rank 0 has embedding group: [0]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:292] All pipeline model parallel group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:293] Rank 0 has pipeline model parallel rank 0\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:294] All embedding group ranks: [[0]]\n", - "[NeMo I 2023-05-30 14:11:11 megatron_init:295] Rank 0 has embedding rank: 0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:11:11 modelPT:244] You tried to register an artifact under config key=tokenizer.vocab_file but an artifact for it has already been registered.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:11:11 tokenizer_utils:204] Getting Megatron tokenizer for pretrained model name: megatron-gpt-345m, custom vocab file: /tmp/tmp5lxz3z8d/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, and merges file: /tmp/tmp5lxz3z8d/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt\n", - "[NeMo I 2023-05-30 14:11:11 tokenizer_utils:130] Getting HuggingFace AutoTokenizer with pretrained_model_name: gpt2, vocab_file: /tmp/tmp5lxz3z8d/bfcdca5e44814366bdb5dcd651325152_gpt2-vocab.json, merges_files: /tmp/tmp5lxz3z8d/315a11fd68be49d6abdb34363e8c4997_gpt2-merge.txt, special_tokens_dict: {}, and use_fast: False\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using sep_token, but it is not set yet.\n", - "Using cls_token, but it is not set yet.\n", - "Using pad_token, but it is not set yet.\n", - "Using mask_token, but it is not set yet.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:11:12 megatron_base_model:238] Padded vocab_size: 50304, original vocab_size: 50257, dummy tokens: 47.\n", - "[NeMo I 2023-05-30 14:11:12 build_model:143] > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 354871296\n", - "[NeMo I 2023-05-30 14:11:12 megatron_gpt_peft_models:56] Before adding PEFT params:\n", - " | Name | Type | Params\n", - " -----------------------------------\n", - " 0 | model | GPTModel | 354 M \n", - " -----------------------------------\n", - " 354 M Trainable params\n", - " 0 Non-trainable params\n", - " 354 M Total params\n", - " 1,419.485 Total estimated model params size (MB)\n", - "[NeMo I 2023-05-30 14:11:12 megatron_gpt_peft_models:65] After adding PEFT params:\n", - " | Name | Type | Params\n", - " -----------------------------------\n", - " 0 | model | GPTModel | 358 M \n", - " -----------------------------------\n", - " 358 M Trainable params\n", - " 0 Non-trainable params\n", - " 358 M Total params\n", - " 1,432.068 Total estimated model params size (MB)\n", - "[NeMo I 2023-05-30 14:11:13 nlp_overrides:491] Model MegatronGPTLoRAModel was successfully restored from /home/adithyare/NeMo/tutorials/nlp/megatron_gpt_345m.nemo.\n" - ] - } - ], + "outputs": [], "source": [ - "save_restore_connector = PEFTSaveRestoreConnector(\n", - " peft_model_nemo_path=config_eval.model.peft.restore_from_path, peft_model_ckpt_path=None,\n", - ")\n", - "from nemo.collections.nlp.models.nlp_model import NLPModel\n", - "model_eval = MegatronGPTPEFTModel.restore_from(\n", - " restore_path=config_eval.model.restore_from_path,\n", - " trainer=trainer,\n", - " override_config_path=peft_model_cfg,\n", - " save_restore_connector=save_restore_connector,\n", - ")\n", + "model_eval = MegatronGPTSFTModel.restore_from(config_eval.model.restore_from_path, eval_model_cfg, trainer=trainer_eval)\n", + "model_eval.load_adapters(config_eval.model.peft.restore_from_path, LoraPEFTConfig(eval_model_cfg))\n", + "model_eval.freeze()\n", "\n", - "model_eval.freeze()" + "print(\"Parameter count manually:\\n\", model_eval.summarize())" ] }, { @@ -1496,34 +770,20 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "12c390f8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:104] Building data files\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:343] Processing 1 data files using 12 workers\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:349] Time building 0 / 1 mem-mapped files: 0:00:00.706630\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:114] Loading data files\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:205] Loading data/SQuAD/squad_short_val.jsonl\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:117] Time loading 1 mem-mapped files: 0:00:00.001054\n", - "[NeMo I 2023-05-30 14:11:18 text_memmap_dataset:121] Computing global indices\n" - ] - } - ], + "outputs": [], "source": [ - "_test_ds = model_eval._build_dataset(peft_model_cfg.data.test_ds, is_train=False)\n", + "_test_ds = model_eval._build_dataset(eval_model_cfg.data.test_ds, is_train=False)\n", "from torch.utils.data import DataLoader\n", "request_dl = DataLoader(\n", " dataset=_test_ds[0],\n", - " batch_size=peft_model_cfg.data.test_ds.global_batch_size,\n", + " batch_size=eval_model_cfg.data.test_ds.global_batch_size,\n", " collate_fn=_test_ds[0].collate_fn,\n", ")\n", "config_inference = OmegaConf.to_container(config_eval.inference, resolve=True)\n", - "model_eval.set_inference_config(config_inference)\n" + "model_eval.set_inference_config(config_inference)" ] }, { @@ -1535,161 +795,14 @@ "And finally, we call `trainer.predict` which triggers the inference process. The `response` object contains the outputs of the model." ] }, - { - "cell_type": "markdown", - "id": "733c172c", - "metadata": {}, - "source": [] - }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "5ba6a70c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", - "[NeMo W 2023-05-30 14:11:30 nemo_logging:349] /home/adithyare/miniconda3/envs/n22/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, predict_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 24 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", - " rank_zero_warn(\n", - " \n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ddcc3ce26ed74665a8429953b929a037", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Predicting: 100it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NeMo W 2023-05-30 14:11:30 nemo_logging:349] /home/adithyare/NeMo/nemo/collections/nlp/modules/common/text_generation_utils.py:306: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1678402379298/work/torch/csrc/utils/tensor_numpy.cpp:206.)\n", - " string_tensor = torch.as_tensor(\n", - " \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the AFC at Super Bowl 50?\n", - "\n", - "Assistant: Denver Broncos\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team represented the NFC at Super Bowl 50?\n", - "\n", - "Assistant: Denver Broncos\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Where did Super Bowl 50 take place?\n", - "\n", - "Assistant: Santa Clara, California\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Which NFL team won Super Bowl 50?\n", - "\n", - "Assistant: Denver Broncos\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What color was used to emphasize the 50th anniversary of the Super Bowl?\n", - "\n", - "Assistant: gold\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the theme of Super Bowl 50?\n", - "\n", - "Assistant: \"Gold\"\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What day was the game played on?\n", - "\n", - "Assistant: February 7, 2016\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What is the AFC short for?\n", - "\n", - "Assistant: Super Bowl 50\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the theme of Super Bowl 50?\n", - "\n", - "Assistant: \"Gold\"\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What does AFC stand for?\n", - "\n", - "Assistant: Super Bowl L\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What day was the Super Bowl played on?\n", - "\n", - "Assistant: February 7, 2016\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Who won Super Bowl 50?\n", - "\n", - "Assistant: Denver Broncos\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What venue did Super Bowl 50 take place in?\n", - "\n", - "Assistant: Levi's Stadium\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What city did Super Bowl 50 take place in?\n", - "\n", - "Assistant: San Francisco\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:If Roman numerals were used, what would Super Bowl 50 have been called?\n", - "\n", - "Assistant: Super Bowl L\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:Super Bowl 50 decided the NFL champion for what season?\n", - "\n", - "Assistant: 2015\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What year did the Denver Broncos secure a Super Bowl title for the third time?\n", - "\n", - "Assistant: 2015\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What city did Super Bowl 50 take place in?\n", - "\n", - "Assistant: San Francisco\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What stadium did Super Bowl 50 take place in?\n", - "\n", - "Assistant: Levi's Stadium\n", - "\n", - "\n", - "User: Context:Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50. Question:What was the final score of Super Bowl 50? \n", - "\n", - "Assistant: 24–10\n", - "\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "response = trainer.predict(model_eval, request_dl)\n", + "response = trainer_eval.predict(model_eval, request_dl)\n", "for batch in response:\n", " for s in batch['sentences']:\n", " print(f\"{s}\\n\\n\")" From 168979031db9c2fdddf1eb55cb773c5227aae56b Mon Sep 17 00:00:00 2001 From: Li Tao Date: Fri, 29 Sep 2023 02:01:51 +0800 Subject: [PATCH 267/512] fix a typo (#7496) Signed-off-by: BestJuly --- .../collections/nlp/data/language_modeling/megatron/helpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/helpers.cpp b/nemo/collections/nlp/data/language_modeling/megatron/helpers.cpp index ff262c053dab..4a65b313c816 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/helpers.cpp +++ b/nemo/collections/nlp/data/language_modeling/megatron/helpers.cpp @@ -38,7 +38,7 @@ void build_blending_indices(py::array_t& dataset_index, const int32_t num_datasets, const int64_t size, const bool verbose) { /* Given multiple datasets and a weighting array, build samples - such that it follows those wieghts.*/ + such that it follows those weights.*/ if (verbose) { std::cout << "> building indices for blendable datasets ..." << std::endl; From 3d28306d4609b5b1048dd13b33aa5dcee66f93b3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 28 Sep 2023 16:46:41 -0700 Subject: [PATCH 268/512] [TTS] remove curly braces from ${BRANCH} in jupyer notebook cell. (#7554) (#7560) * remove curly braces. * remove installation of pynini. --------- Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> --- tutorials/tts/FastPitch_ChineseTTS_Training.ipynb | 8 ++------ tutorials/tts/FastPitch_GermanTTS_Training.ipynb | 8 ++------ tutorials/tts/Vits_Training.ipynb | 8 ++------ 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 20a80ccab074..6be81479c750 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -61,12 +61,8 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", - "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", - "\n", - "## Install pynini\n", - "# !wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/nemo_text_processing/install_pynini.sh\n", - "# !bash install_pynini.sh\n", + "BRANCH = 'r1.21.0'\n", + "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n", "\n", "# !pip install opencc-python-reimplemented\n", "\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index 2a05c8dace35..fa29916b43d2 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -61,12 +61,8 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", - "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", - "\n", - "## Install pynini\n", - "# !wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/nemo_text_processing/install_pynini.sh\n", - "# !bash install_pynini.sh\n", + "BRANCH = 'r1.21.0'\n", + "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n", "\n", "\"\"\"\n", "Remember to restart the runtime for the kernel to pick up any upgraded packages (e.g. matplotlib)!\n", diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index 5108567b5c6c..dbfc0edbe82c 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -63,12 +63,8 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", - "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", - "\n", - "## Install pynini\n", - "# !wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/nemo_text_processing/install_pynini.sh\n", - "# !bash install_pynini.sh\n", + "BRANCH = 'r1.21.0'\n", + "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\"\n", "\n", "\"\"\"\n", "Remember to restart the runtime for the kernel to pick up any upgraded packages (e.g. matplotlib)!\n", From b38c28a71c6bf7edd17b56d36021aad518ca2bde Mon Sep 17 00:00:00 2001 From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Date: Fri, 29 Sep 2023 09:39:50 -0700 Subject: [PATCH 269/512] add youtube embed url (#7570) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> --- README.rst | 17 ++++++++++++++++- docs/source/starthere/intro.rst | 11 ++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 6dc491523a99..15a2120d30b5 100644 --- a/README.rst +++ b/README.rst @@ -67,7 +67,22 @@ For scaling NeMo LLM training on Slurm clusters or public clouds, please see the The NM launcher has extensive recipes, scripts, utilities, and documentation for training NeMo LLMs and also has an `Autoconfigurator `_ which can be used to find the optimal model parallel configuration for training on a specific cluster. -Also see our `introductory video `_ for a high level overview of NeMo. +Also see the two introductory videos below for a high level overview of NeMo. + +* Developing State-Of-The-Art Conversational AI Models in Three Lines of Code. +* NVIDIA NeMo: Toolkit for Conversational AI at PyData Yerevan 2022. + +|three_lines| |pydata| + +.. |pydata| image:: https://img.youtube.com/vi/J-P6Sczmas8/maxres3.jpg + :target: https://www.youtube.com/embed/J-P6Sczmas8?mute=0&start=14&autoplay=0 + :width: 600 + :alt: Develop Conversational AI Models in 3 Lines + +.. |three_lines| image:: https://img.youtube.com/vi/wBgpMf_KQVw/maxresdefault.jpg + :target: https://www.youtube.com/embed/wBgpMf_KQVw?mute=0&start=0&autoplay=0 + :width: 600 + :alt: Introduction at PyData@Yerevan 2022 Key Features ------------ diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst index 70426d3fe4a0..9297b7ef53b3 100644 --- a/docs/source/starthere/intro.rst +++ b/docs/source/starthere/intro.rst @@ -19,14 +19,23 @@ Conversational AI architectures are typically large and require a lot of data an for training. NeMo uses `PyTorch Lightning `_ for easy and performant multi-GPU/multi-node mixed-precision training. -`Pre-trained NeMo models. `_ +`Pre-trained NeMo models. `_ +Also see the two introductory videos below for a high level overview of NeMo. + +* Developing State-Of-The-Art Conversational AI Models in Three Lines of Code. .. raw:: html
+* NVIDIA NeMo: Toolkit for Conversational AI at PyData Yerevan 2022. +.. image:: https://img.youtube.com/vi/J-P6Sczmas8/maxres3.jpg + :target: https://www.youtube.com/embed/J-P6Sczmas8?mute=0&start=14&autoplay=0 + :width: 560 + :alt: Develop Conversational AI Models in 3 Lines + For more information and questions, visit the `NVIDIA NeMo Discussion Board `_. Prerequisites From b9033f27da2bc8b36ad111181d325e9e79712b8c Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 30 Sep 2023 02:57:58 +1000 Subject: [PATCH 270/512] Remap speakers to continuous range of speaker_id for dataset AISHELL3 (#7536) * Remap speakers to continuous range of speaker_id for dataset AISHELL3 * Add new key/value pair to record raw speaker for AISHELL3 dataset Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../fastpitch_align_multispeaker_22050.yaml | 2 +- .../tts/aishell3/get_data.py | 38 ++++++++++++------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml b/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml index 2464e546598e..55c918c28b72 100644 --- a/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml +++ b/examples/tts/conf/zh/fastpitch_align_multispeaker_22050.yaml @@ -40,7 +40,7 @@ model: learn_alignment: true bin_loss_warmup_epochs: 100 - n_speakers: 1958 + n_speakers: 175 max_token_duration: 75 symbols_embedding_dim: 384 pitch_embedding_kernel_size: 3 diff --git a/scripts/dataset_processing/tts/aishell3/get_data.py b/scripts/dataset_processing/tts/aishell3/get_data.py index 904ab0314653..1b3043bbf0d3 100755 --- a/scripts/dataset_processing/tts/aishell3/get_data.py +++ b/scripts/dataset_processing/tts/aishell3/get_data.py @@ -86,14 +86,17 @@ def __process_transcript(file_path: str): text_normalizer_call_kwargs = {"punct_pre_process": True, "punct_post_process": True} normalizer_call = lambda x: text_normalizer.normalize(x, **text_normalizer_call_kwargs) entries = [] - i = 0 SPEAKER_LEN = 7 + + candidates = [] + speakers = set() with open(file_path / "train" / "content.txt", encoding="utf-8") as fin: for line in fin: content = line.split() wav_name, text = content[0], "".join(content[1::2]) + "。" wav_name = wav_name.replace(u'\ufeff', '') speaker = wav_name[:SPEAKER_LEN] + speakers.add(speaker) wav_file = file_path / "train" / "wav" / speaker / wav_name assert os.path.exists(wav_file), f"{wav_file} not found!" duration = subprocess.check_output(f"soxi -D {wav_file}", shell=True) @@ -102,18 +105,27 @@ def __process_transcript(file_path: str): processed_file = file_path / "processed" / wav_name # convert wav to mono 22050HZ, 16 bit (as SFSpeech dataset) subprocess.run(f"sox {wav_file} -r 22050 -c 1 -b 16 {processed_file}", shell=True) - simplified_text = cc.convert(text) - normalized_text = normalizer_call(simplified_text) - entry = { - 'audio_filepath': os.path.abspath(processed_file), - 'duration': float(duration), - 'text': text, - 'normalized_text': normalized_text, - 'speaker': int(speaker[3:]), - } - - i += 1 - entries.append(entry) + candidates.append((processed_file, duration, text, speaker)) + + # remapping the speakder to speaker_id (start from 1) + remapping = {} + for index, speaker in enumerate(sorted(speakers)): + remapping[speaker] = index + 1 + + for processed_file, duration, text, speaker in candidates: + simplified_text = cc.convert(text) + normalized_text = normalizer_call(simplified_text) + entry = { + 'audio_filepath': os.path.abspath(processed_file), + 'duration': float(duration), + 'text': text, + 'normalized_text': normalized_text, + 'speaker_raw': speaker, + 'speaker': remapping[speaker], + } + + entries.append(entry) + return entries From 62097e54d90875389012d3ebaebc6d039aa9295d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:12:43 -0700 Subject: [PATCH 271/512] fix validation_step_outputs initialization for multi-dataloader (#7546) (#7572) * added correct validation_step_outputs initialization for mutli-dataloader * changed kernel for display * Update logic for validation and test step outputs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert multidataloader changes in multilang ASR notebook --------- Signed-off-by: KunalDhawan Signed-off-by: smajumdar Co-authored-by: Kunal Dhawan Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/core/classes/modelPT.py | 99 +++++++++++++++++++++++++++---- tutorials/asr/Multilang_ASR.ipynb | 11 +++- 2 files changed, 94 insertions(+), 16 deletions(-) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 6f5d0f90fe07..4c7efffcb117 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -179,18 +179,11 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): # Create list of lists for val and test outputs to support multiple dataloaders # Initialize an empty list as sometimes self._validation_dl can be None at this stage - self.validation_step_outputs = [] - # Check len(self._validation_dl) > 1 as sometimes single dataloader can be in a list: [] when ds_item in - # config has 1 item passed in a list - if self._validation_dl and type(self._validation_dl) == list and len(self._validation_dl) > 1: - for _ in range(len(self._validation_dl)): - self.validation_step_outputs.append([]) + self._validation_step_outputs = None # Initialize an empty list as sometimes self._test_dl can be None at this stage - self.test_step_outputs = [] - if self._test_dl and type(self._test_dl) == list and len(self._test_dl) > 1: - for _ in range(len(self._test_dl)): - self.test_step_outputs.append([]) + self._test_step_outputs = None + # ModelPT wrappers over subclass implementations self.training_step = model_utils.wrap_training_step(self.training_step) @@ -1573,6 +1566,61 @@ def cfg(self, cfg): if hasattr(self, '_hparams_initial') and 'cfg' in self._hparams_initial: self._hparams_initial['cfg'] = OmegaConf.to_object(self._cfg) + @property + def validation_step_outputs(self): + """ + Cached outputs of validation_step. It can be a list of items (for single data loader) or a list of lists + (for multiple data loaders). + + Returns: + List of outputs of validation_step. + """ + if self._validation_step_outputs is not None: + return self._validation_step_outputs + + # Initialize new output list + self._validation_step_outputs = [] + # Check len(self._validation_dl) > 1 as sometimes single dataloader can be in a list: [] when ds_item in + # config has 1 item passed in a list + if ( + self._validation_dl is not None + and isinstance(self._validation_dl, (list, tuple)) + and len(self._validation_dl) > 1 + ): + for _ in range(len(self._validation_dl)): + self._validation_step_outputs.append([]) + + return self._validation_step_outputs + + @validation_step_outputs.setter + def validation_step_outputs(self, value): + self._validation_step_outputs = value + + @property + def test_step_outputs(self): + """ + Cached outputs of test_step. It can be a list of items (for single data loader) or a list of lists (for multiple data loaders). + + Returns: + List of outputs of test_step. + """ + if self._test_step_outputs is not None: + return self._test_step_outputs + + # Initialize new output list + self._test_step_outputs = [] + # Check len(self._test_dl) > 1 as sometimes single dataloader can be in a list: [] when ds_item in + # config has 1 item passed in a list + if self._test_dl is not None and isinstance(self._test_dl, (list, tuple)) and len(self._test_dl) > 1: + for _ in range(len(self._test_dl)): + self._test_step_outputs.append([]) + + return self._test_step_outputs + + @test_step_outputs.setter + def test_step_outputs(self, value): + self._test_step_outputs = value + @staticmethod def _is_model_being_restored() -> bool: app_state = AppState() @@ -1714,15 +1762,40 @@ def on_train_batch_end(self, outputs, batch: Any, batch_idx: int, unused: int = logging.info("====== End nsys profiling ======") torch.cuda.cudart().cudaProfilerStop() + def _cleanup_on_execution_end(self): + """ + Utility function to clean up the module state at the end of execution. + """ + + # dynamic freezing cleanup + if hasattr(self, '_freeze_cfg'): + delattr(self, '_freeze_cfg') + + # Clear up the val and test output caches + self._validation_step_outputs = None + self._test_step_outputs = None + def on_train_end(self): """ PyTorch Lightning hook: https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-train-end We use it here to cleanup the dynamic freezing config. """ - # dynamic freezing cleanup - if hasattr(self, '_freeze_cfg'): - delattr(self, '_freeze_cfg') + self._cleanup_on_execution_end() + + def on_test_end(self): + """ PyTorch Lightning hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-test-end + """ + + self._cleanup_on_execution_end() + + def on_predict_end(self): + """ PyTorch Lightning hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-test-end + """ + + self._cleanup_on_execution_end() # TODO: Remove in PTL 1.7.2 def cuda(self, device=None): diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 3ce77648a60e..28073a050d5e 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -1713,7 +1713,7 @@ }, "outputs": [], "source": [ - "asr_model.setup_multiple_validation_data(val_data_config=validation_ds) " + "asr_model.setup_multiple_validation_data(val_data_config=validation_ds)" ] }, { @@ -2273,7 +2273,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -2287,11 +2287,16 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.12" }, "nteract": { "version": "0.28.0" }, + "vscode": { + "interpreter": { + "hash": "1aaa02ce0ce2638a6e16a203f0ce39bc7495f7236d7115882d2d3541e1318e7a" + } + }, "widgets": { "application/vnd.jupyter.widget-state+json": { "013abc9bfddf456abf15dc2b0567d969": { From fe50fa35f5411d2d60cf580898a0822b8ba6da2c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 12:31:21 -0700 Subject: [PATCH 272/512] Append output of val step to self.validation_step_outputs (#7530) (#7532) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- .../nlp/models/glue_benchmark/glue_benchmark_model.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nemo/collections/nlp/models/glue_benchmark/glue_benchmark_model.py b/nemo/collections/nlp/models/glue_benchmark/glue_benchmark_model.py index 7843da422c4e..4a073e2ada1c 100644 --- a/nemo/collections/nlp/models/glue_benchmark/glue_benchmark_model.py +++ b/nemo/collections/nlp/models/glue_benchmark/glue_benchmark_model.py @@ -173,16 +173,18 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0): model_output = torch.argmax(model_output, 1) eval_tensors = {'preds': model_output, 'labels': labels} - return {'val_loss': val_loss, 'eval_tensors': eval_tensors} + output = {'val_loss': val_loss, 'eval_tensors': eval_tensors} + self.validation_step_outputs.append(output) + return output def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): """ Called at the end of validation to aggregate outputs. outputs: list of individual outputs of each validation step. """ - avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() - preds = torch.cat([x['eval_tensors']['preds'] for x in outputs]) - labels = torch.cat([x['eval_tensors']['labels'] for x in outputs]) + avg_loss = torch.stack([x['val_loss'] for x in self.validation_step_outputs]).mean() + preds = torch.cat([x['eval_tensors']['preds'] for x in self.validation_step_outputs]) + labels = torch.cat([x['eval_tensors']['labels'] for x in self.validation_step_outputs]) all_preds = [] all_labels = [] From bf88a23d203d0a5b9b972b2477d1fe805e195d73 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 14:16:51 -0700 Subject: [PATCH 273/512] [TTS] fixed trainer's accelerator and strategy. (#7569) (#7574) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> --- tutorials/tts/Vits_Training.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index dbfc0edbe82c..db7161c06c61 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -304,8 +304,8 @@ " phoneme_dict_path=tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt \\\n", " heteronyms_path=tts_dataset_files/heteronyms-052722 \\\n", " trainer.max_epochs=3 \\\n", - " trainer.accelerator=auto \\\n", - " trainer.strategy=auto \\\n", + " trainer.accelerator='gpu' \\\n", + " trainer.strategy='ddp_find_unused_parameters_true' \\\n", " trainer.check_val_every_n_epoch=1 \\\n", " trainer.devices=1)" ] From 7987c21577f2a1ef61ed0baf4d5cd69136f61457 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 16:16:36 -0700 Subject: [PATCH 274/512] Append val/test output to instance variable in EncDecSpeakerLabelModel (#7562) (#7573) * Append val/test output to the instance variable in EncDecSpeakerLabelModel * Handle test case in evaluation_step * Replace type with isinstance --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- nemo/collections/asr/models/enhancement_models.py | 10 ++++++++++ nemo/collections/asr/models/label_models.py | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/nemo/collections/asr/models/enhancement_models.py b/nemo/collections/asr/models/enhancement_models.py index a25bf882a23b..a441c6f7a8b0 100644 --- a/nemo/collections/asr/models/enhancement_models.py +++ b/nemo/collections/asr/models/enhancement_models.py @@ -434,6 +434,16 @@ def evaluation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = # Log global step self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32), sync_dist=True) + if tag == 'val': + if isinstance(self.trainer.val_dataloaders, (list, tuple)) and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(output_dict) + else: + self.validation_step_outputs.append(output_dict) + else: + if isinstance(self.trainer.test_dataloaders, (list, tuple)) and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(output_dict) + else: + self.test_step_outputs.append(output_dict) return output_dict @classmethod diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 1a284aca609d..83e57ece59e3 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -373,13 +373,25 @@ def evaluation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = self._macro_accuracy.update(preds=logits, target=labels) stats = self._macro_accuracy._final_state() - return { + output = { f'{tag}_loss': loss_value, f'{tag}_correct_counts': correct_counts, f'{tag}_total_counts': total_counts, f'{tag}_acc_micro_top_k': acc_top_k, f'{tag}_acc_macro_stats': stats, } + if tag == 'val': + if isinstance(self.trainer.val_dataloaders, (list, tuple)) and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(output) + else: + self.validation_step_outputs.append(output) + else: + if isinstance(self.trainer.test_dataloaders, (list, tuple)) and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(output) + else: + self.test_step_outputs.append(output) + + return output def multi_evaluation_epoch_end(self, outputs, dataloader_idx: int = 0, tag: str = 'val'): loss_mean = torch.stack([x[f'{tag}_loss'] for x in outputs]).mean() From 50ab483ff947acc00b827cb7b0f730920e48f167 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:06:10 -0700 Subject: [PATCH 275/512] Fix CustomProgressBar for resume (#7427) (#7522) * Fix CustomProgress Bar for resume and multiple epochs * Edit num_training_batches * Use max_steps as total for progress bar for resume * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/nlp/parts/nlp_overrides.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 865974aff873..f5e3c082cc89 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -1121,6 +1121,12 @@ class CustomProgressBar(TQDMProgressBar): for megatron models """ + def get_current_epoch_step(self, trainer): + """ + Get the value of step within an epoch + """ + return trainer.fit_loop.epoch_loop.automatic_optimization.optim_progress.optimizer.step.current.completed + def init_train_tqdm(self): """ Override bar_format to not have 's/it' @@ -1129,11 +1135,22 @@ def init_train_tqdm(self): self.bar.bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]" return self.bar + def on_train_epoch_start(self, trainer, *_): + if trainer.max_steps > 0 and (trainer.ckpt_path is not None): + # while resuming from a ckpt use trainer.max_steps as the total for progress bar as trainer.num_training_batches + # is truncated to max_steps - step being resumed at + num_training_batches = trainer.max_steps + else: + num_training_batches = trainer.num_training_batches + self.train_progress_bar.reset(num_training_batches) + self.train_progress_bar.initial = 0 + self.train_progress_bar.set_description(f"Epoch {trainer.current_epoch}") + def on_train_batch_end(self, trainer, pl_module, *_, **__): """ - Override parent class on_train_batch_end to update progress bar per global_step instead of per microbatch + Override parent class on_train_batch_end to update progress bar per global batch instead of per microbatch """ - n = trainer.global_step + n = self.get_current_epoch_step(trainer) if self._should_update(n, self.train_progress_bar.total): _update_n(self.train_progress_bar, n) self.train_progress_bar.set_postfix(self.get_metrics(trainer, pl_module)) From 2cb9e4ca5734c3ef016d8815db2a27b1a7e9d77c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 19:12:52 -0700 Subject: [PATCH 276/512] fix typos in nfa and speech enhancement tutorials (#7580) (#7583) Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> --- .../speech_enhancement/Speech_Enhancement_with_NeMo.ipynb | 8 ++++---- tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb index d8a15cbd5e1c..d7cd6571c16a 100644 --- a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb +++ b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb @@ -9,7 +9,7 @@ "source": [ "# Introduction\n", "\n", - "The goal of this tutorial is to demonstrate the basic steps required to setup and train train a simple single-channel speech enhancement model in NeMo.\n", + "The goal of this tutorial is to demonstrate the basic steps required to setup and train a simple single-channel speech enhancement model in NeMo.\n", "\n", "This notebook covers the following steps:\n", "\n", @@ -21,7 +21,7 @@ "Note that this tutorial is only for demonstration purposes.\n", "To achieve best performance for a particular use case, carefully prepared data and more advanced models should be used.\n", "\n", - "*Disclamer:*\n", + "*Disclaimer:*\n", "User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use." ] }, @@ -411,7 +411,7 @@ "\n", "Here, a simple encoder-mask-decoder model will be used to process the noisy input signal and produce an enhanced output signal.\n", "\n", - "In general, an encoder-mask-decoder model can be confugured using `EncMaskDecAudioToAudioModel` class, which is depicted in the following block diagram." + "In general, an encoder-mask-decoder model can be configured using `EncMaskDecAudioToAudioModel` class, which is depicted in the following block diagram." ] }, { @@ -470,7 +470,7 @@ "In this particular configuration, the model structure can be described as follows:\n", "* `AudioToSpectrogram` implements the analysis STFT transform.\n", "* `MaskEstimatorRNN` is a mask estimator using RNNs.\n", - "* `MaskReferenceChannel` is a simple processor whith applies the estimated mask on the reference channel. In this tutorial, the input signal has only a single channel, so the reference channel will be set to `0`.\n", + "* `MaskReferenceChannel` is a simple processor which applies the estimated mask on the reference channel. In this tutorial, the input signal has only a single channel, so the reference channel will be set to `0`.\n", "* `SpectrogramToAudio` implements the synthesis STFT transform." ] }, diff --git a/tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb b/tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb index a6ab57854bad..3ca3c32c1074 100644 --- a/tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb +++ b/tutorials/tools/NeMo_Forced_Aligner_Tutorial.ipynb @@ -347,7 +347,7 @@ "id": "dHU-YmALUvVf" }, "source": [ - "The alignment process should have finished successfuly, let's look at some of the output files." + "The alignment process should have finished successfully, let's look at some of the output files." ] }, { @@ -472,7 +472,7 @@ "\n", "You can see that the token timestamps (in the first video) are very accurate, even despite the poor audio quality of the video.\n", "\n", - "The word timestamps (in the second video) are also very good. The only noticeable mistakes are when the the word has punctuation at the end (or beginning). This is because punctuation that is not separated from a word by a space is considered to be part of that word. If the alignment for the punctuation is in a region of non-speech, then the word alignment will also contain that region of non-speech." + "The word timestamps (in the second video) are also very good. The only noticeable mistakes are when the word has punctuation at the end (or beginning). This is because punctuation that is not separated from a word by a space is considered to be part of that word. If the alignment for the punctuation is in a region of non-speech, then the word alignment will also contain that region of non-speech." ] }, { From 2295e4490e8449bfef725c60a5f8f84e5faac13e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Sep 2023 23:57:01 -0700 Subject: [PATCH 277/512] Add strategy as ddp_find_unused_parameters_true for glue_benchmark.py (#7454) (#7461) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- examples/nlp/glue_benchmark/glue_benchmark.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/nlp/glue_benchmark/glue_benchmark.py b/examples/nlp/glue_benchmark/glue_benchmark.py index 87486dbc47b0..3cb5f8e4af3e 100644 --- a/examples/nlp/glue_benchmark/glue_benchmark.py +++ b/examples/nlp/glue_benchmark/glue_benchmark.py @@ -46,6 +46,10 @@ @hydra_runner(config_name="glue_benchmark_config") def main(cfg: DictConfig) -> None: + # PTL 2.0 has find_unused_parameters as False by default, so its required to set it to True + # when there are unused parameters like here + if cfg.trainer.strategy == 'ddp': + cfg.trainer.strategy = "ddp_find_unused_parameters_true" logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager_cfg = cfg.get("exp_manager", None) From 1be5988b69e5152731d6459d86d350c7ac62daf6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 30 Sep 2023 13:11:13 -0700 Subject: [PATCH 278/512] update strategy (#7577) (#7578) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao --- tutorials/asr/Self_Supervised_Pre_Training.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index cecc0b08cd05..55be87e013f7 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -312,7 +312,7 @@ "\n", "if torch.cuda.is_available():\n", " cfg.trainer.accelerator = 'gpu'\n", - " cfg.trainer.strategy = 'dp'\n", + " cfg.trainer.strategy = 'auto'\n", " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", @@ -534,7 +534,7 @@ "\n", "if torch.cuda.is_available():\n", " cfg.trainer.accelerator = 'gpu'\n", - " cfg.trainer.strategy = 'dp'\n", + " cfg.trainer.strategy = 'auto'\n", " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", From 8f362147db0588d21b33b3699459d1d10547b847 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Mon, 2 Oct 2023 08:00:51 -0700 Subject: [PATCH 279/512] Fix typos (#7581) --- tutorials/asr/ASR_Confidence_Estimation.ipynb | 14 +++++++------- tutorials/asr/Confidence_Ensembles.ipynb | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tutorials/asr/ASR_Confidence_Estimation.ipynb b/tutorials/asr/ASR_Confidence_Estimation.ipynb index 7a92ed026f07..93c40b576115 100644 --- a/tutorials/asr/ASR_Confidence_Estimation.ipynb +++ b/tutorials/asr/ASR_Confidence_Estimation.ipynb @@ -422,7 +422,7 @@ "1. Initialize _ConfidenceConfig_\n", "2. Put the created _ConfidenceConfig_ into the model decoding config.\n", "\n", - "The folloving cell contains an example of _ConfidenceConfig_ initialization and updating the the model's decoding config.\n", + "The following cell contains an example of _ConfidenceConfig_ initialization and updating the model's decoding config.\n", "\n", "For the _ConfidenceConfig_ there are also listed possible values for its parameters.\n", "\n", @@ -627,14 +627,14 @@ "4. Normalized Cross Entropy ($\\mathrm{NCE}$): how close of confidence for correct predictions to $1.0$ and of incorrect predictions to $0.0$. It ranges from $-\\infty$ to $1.0$, with negative scores indicating that the confidence method performs worse than the setting confidence score to $1-\\mathrm{WER}$. This metric is also known as Normalized Mutual Information.\n", "5. Expected Calibration Error ($\\mathrm{ECE}$): a weighted average over the absolute accuracy/confidence difference. It ranges from $0.0$ to $1.0$ with the best value $0.0$.\n", "\n", - "Metrics based on the Youden's curve (see https://en.wikipedia.org/wiki/Youden%27s_J_statistic) can also be condsidered. They are:\n", + "Metrics based on the Youden's curve (see https://en.wikipedia.org/wiki/Youden%27s_J_statistic) can also be considered. They are:\n", "1. Area Under the Youden's curve ($\\mathrm{AUC}_\\mathrm{YC}$): the rate of the effective threshold range (i.e. the adjustability or responsiveness). It ranges from $0.0$ to $1.0$ with the best value $0.5$.\n", "2. Maximum of the Youden's curve $\\mathrm{MAX}_\\mathrm{YC}$: the optimal $\\mathrm{TNR}$ vs. $\\mathrm{FNR}$ tradeoff. It's unnormalized version can be used as a criterion for selecting the optimal $\\tau$. It ranges from $0.0$ to $1.0$ with the best value $1.0$.\n", "3. The standard deviation of the Youden's curve values ($\\mathrm{STD}_\\mathrm{YC}$): indicates that $\\mathrm{TNR}$ and $\\mathrm{FNR}$ increase at different rates (viz. $\\mathrm{TNR}$ grows faster) as the $\\tau$ increases. It ranges from $0.0$ to $0.5$ with the best value around $0.25$.\n", "\n", - "When selecting/tuning a confidence method, it is recommended to maximize $\\mathrm{AUC}_\\mathrm{ROC}$ first as this is the main mectic of confidence estimation quality. Then, for overconfident models, maximizing $\\mathrm{AUC}_\\mathrm{NT}$ should take precedence over $\\mathrm{AUC}_\\mathrm{PR}$. Finally, a trade-off between $\\mathrm{NCE}$/$\\mathrm{ECE}$ and the family of $\\mathrm{YC}$ metrics considered as a compromise between formal correctness and controllability.\n", + "When selecting/tuning a confidence method, it is recommended to maximize $\\mathrm{AUC}_\\mathrm{ROC}$ first as this is the main metric of confidence estimation quality. Then, for overconfident models, maximizing $\\mathrm{AUC}_\\mathrm{NT}$ should take precedence over $\\mathrm{AUC}_\\mathrm{PR}$. Finally, a trade-off between $\\mathrm{NCE}$/$\\mathrm{ECE}$ and the family of $\\mathrm{YC}$ metrics considered as a compromise between formal correctness and controllability.\n", "\n", - "Let's see how well our confidence performs according to the metrcis above." + "Let's see how well our confidence performs according to the metrics above." ] }, { @@ -891,7 +891,7 @@ "id": "dbb82877" }, "source": [ - "## 4.1. Small WER improvenent\n", + "## 4.1. Small WER improvement\n", "\n", "Good confidence scores can slightly reduce WER by removing low confidence words from recognition results.\n", "\n", @@ -1190,7 +1190,7 @@ "id": "f28da61f", "metadata": {}, "source": [ - "The original examples contain speech, music, or noise. The resulring audio recordings are considered to contain no recognizable speech.\n", + "The original examples contain speech, music, or noise. The resulting audio recordings are considered to contain no recognizable speech.\n", "\n", "You can listen to an example of the audios." ] @@ -1397,7 +1397,7 @@ }, "source": [ "# Summary\n", - "This tutorial covered the basics of ASR confidence estimation and two examples of using ASR word confidence: WER reduction and hallusinations removal.\n", + "This tutorial covered the basics of ASR confidence estimation and two examples of using ASR word confidence: WER reduction and hallucinations removal.\n", "\n", "You can follow this tutorial on [ASR Confidence-based Ensembles](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Confidence_Ensembles.ipynb) to see another important application of ASR confidence estimation." ] diff --git a/tutorials/asr/Confidence_Ensembles.ipynb b/tutorials/asr/Confidence_Ensembles.ipynb index 4516d2b70d6d..eab46d3b06e5 100644 --- a/tutorials/asr/Confidence_Ensembles.ipynb +++ b/tutorials/asr/Confidence_Ensembles.ipynb @@ -48,7 +48,7 @@ "\n", "# clone SDP and install requirements\n", "!git clone https://github.com/NVIDIA/NeMo-speech-data-processor $WORKSPACE_DIR/NeMo-speech-data-processor\n", - "!pip install -r $WORKSPACE_DIR/NeMo-speech-data-processor/requirements.txt\n", + "!pip install -r $WORKSPACE_DIR/NeMo-speech-data-processor/requirements/main.txt\n", "\n", "\"\"\"\n", "Remember to restart the runtime for the kernel to pick up any upgraded packages.\n", @@ -106,13 +106,13 @@ "\n", "A short answer — you can use any ASR models. E.g., you can combine a number of CTC models, or Transducer models, or even mix-and-match. \n", "\n", - "A more detailed answer is that hte performance of the confidence ensemble is upper-bounded by the performance of the best model on each of the input examples. Thus you will benefit if some of your models work really well on part of the input compared to other models. This way you will get more gains compared to each separate model, and it will also make correct model identification easier.\n", + "A more detailed answer is that the performance of the confidence ensemble is upper-bounded by the performance of the best model on each of the input examples. Thus you will benefit if some of your models work really well on part of the input compared to other models. This way you will get more gains compared to each separate model, and it will also make correct model identification easier.\n", "\n", "### How to estimate a model's confidence?\n", "\n", "Good news, we have a whole separate [tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_Confidence_Estimation.ipynb) on this topic! You can go through it if you want to know all the details about different ways to estimate confidence of NeMo ASR models. There are different confidence measures and aggregation functions and for the absolute best performance, you will need to run a grid-search to pick the best confidence estimation way for your specific models and data.\n", "\n", - "That being said, we found that there exist a set of confidence parameters that work pretty well on a large set of models and datsets. They are default in NeMo and so you might not need to worry about running the search. If you do want to maximize the performance by tuning the confidence parameters, you only need to add [a few extra config lines](#Building-and-evaluating-ensemble-(tuned-parameters)).\n", + "That being said, we found that there exist a set of confidence parameters that work pretty well on a large set of models and datasets. They are default in NeMo and so you might not need to worry about running the search. If you do want to maximize the performance by tuning the confidence parameters, you only need to add [a few extra config lines](#Building-and-evaluating-ensemble-(tuned-parameters)).\n", "\n", "### How to calibrate confidence values?\n", "\n", From f29a917bac3e4fa4d21b4a0fd779ad4159a6373e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 09:35:22 -0700 Subject: [PATCH 280/512] Change hifigan finetune strategy to ddp_find_unused_parameters_true (#7579) (#7584) * Change strategy to auto --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> --- tutorials/tts/FastPitch_Adapter_Finetuning.ipynb | 2 +- tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index 5fe61d596f4b..5220519e01ad 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -615,7 +615,7 @@ "+trainer.max_epochs=50 \\\n", "trainer.check_val_every_n_epoch=5 \\\n", "trainer.devices=-1 \\\n", - "trainer.strategy='ddp' \\\n", + "trainer.strategy='ddp_find_unused_parameters_true' \\\n", "trainer.precision=16 \\\n", "exp_manager.exp_dir={logs_dir} \\\n", "exp_manager.create_wandb_logger=True \\\n", diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index a031723f549b..ad0a49067ca4 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -511,7 +511,7 @@ "+trainer.max_epochs=5 \\\n", "trainer.check_val_every_n_epoch=5 \\\n", "trainer.devices=1 \\\n", - "trainer.strategy='auto' \\\n", + "trainer.strategy='ddp_find_unused_parameters_true' \\\n", "trainer.precision=16 \\\n", "exp_manager.exp_dir={logs_dir} \\\n", "exp_manager.create_wandb_logger=True \\\n", From dc60a4752ac78c43531259b8b4f93a7a6ec690c7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 09:55:07 -0700 Subject: [PATCH 281/512] [BugFix] Add missing quotes for auto strategy in tutorial notebooks (#7541) (#7548) * Add missing quotes for auto strategy * Revert trainer.gpus to trainer.devices in Self_Supervised_Pre_Training.ipynb --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- tutorials/asr/ASR_TTS_Tutorial.ipynb | 2 +- tutorials/asr/Self_Supervised_Pre_Training.ipynb | 4 ++-- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../speech_enhancement/Speech_Enhancement_with_NeMo.ipynb | 4 ++-- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb | 4 ++-- tutorials/nlp/Punctuation_and_Capitalization.ipynb | 4 ++-- .../nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb | 4 ++-- tutorials/nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb | 4 ++-- tutorials/nlp/Token_Classification-BioMegatron.ipynb | 2 +- .../nlp/Token_Classification_Named_Entity_Recognition.ipynb | 2 +- tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb | 2 +- .../speaker_tasks/Speaker_Identification_Verification.ipynb | 2 +- 17 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb index 9bbcc8e4aa34..267c84bca9d2 100644 --- a/tutorials/asr/ASR_TTS_Tutorial.ipynb +++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb @@ -553,7 +553,7 @@ "config.trainer.max_epochs = NUM_EPOCHS\n", "\n", "config.trainer.devices = 1\n", - "config.trainer.strategy = auto # use 1 device, no need for ddp strategy\n", + "config.trainer.strategy = 'auto' # use 1 device, no need for ddp strategy\n", "\n", "OmegaConf.resolve(config)" ] diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 55be87e013f7..6d54655d04dc 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -316,7 +316,7 @@ " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", - " cfg.trainer.strategy = auto\n", + " cfg.trainer.strategy = 'auto'\n", " cfg.trainer.devices = 0\n", "\n", "cfg.exp_manager.exp_dir = data_dir + \"/content/exp\"\n", @@ -538,7 +538,7 @@ " cfg.trainer.devices = 1\n", "else:\n", " cfg.trainer.accelerator = 'cpu'\n", - " cfg.trainer.strategy = auto\n", + " cfg.trainer.strategy = 'auto'\n", " cfg.trainer.devices = 0\n", "\n", "cfg.model.tokenizer.dir = data_dir + \"/tokenizers/an4/tokenizer_spe_unigram_v128/\" # note this is a directory, not a path to a vocabulary file\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 0dc4e1e5443f..0e9579555408 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -441,7 +441,7 @@ "config.trainer.max_epochs = 5\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto" + "config.trainer.strategy = 'auto'" ], "execution_count": null, "outputs": [] diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index ea06200efb0f..6898b0164461 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -462,7 +462,7 @@ "config.trainer.max_epochs = 5\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto" + "config.trainer.strategy = 'auto'" ] }, { diff --git a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb index d7cd6571c16a..09226c83d654 100644 --- a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb +++ b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb @@ -667,7 +667,7 @@ "config.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# Instantiate the trainer\n", "trainer = pl.Trainer(**config.trainer)" @@ -1144,7 +1144,7 @@ "config_dual_output.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config_dual_output.trainer.strategy = auto\n", + "config_dual_output.trainer.strategy = 'auto'\n", "\n", "# Instantiate the trainer\n", "trainer = pl.Trainer(**config_dual_output.trainer)\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 466add77d8dc..89104554a475 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -187,7 +187,7 @@ "cfg.model.validation_ds.data_file = os.path.join(DATA_DIR, \"tiny_example_validation_pairs.tsv\")\n", "\n", "# remove distributed training flags\n", - "cfg.trainer.strategy = auto\n", + "cfg.trainer.strategy = 'auto'\n", "cfg.trainer.accelerator = None" ] }, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index c7f62e6b635b..0162e5c39a32 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -342,7 +342,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 128\n", diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index 282a90428c01..fc22fa05d162 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -286,7 +286,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# setup a small number of epochs for demonstration purposes of this tutorial\n", "config.trainer.max_epochs = 5\n", @@ -705,7 +705,7 @@ "config.trainer.accelerator = accelerator\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "trainer = pl.Trainer(**config.trainer)\n", "config.exp_manager.exp_dir = os.path.join(DATA_DIR, \"output/\" + run_name)\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 02133ae23a03..1545b4329d2b 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -550,7 +550,7 @@ "config.trainer.max_epochs = 1\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] @@ -745,7 +745,7 @@ "config.trainer.accelerator = accelerator\n", "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "config.trainer.max_epochs = 1\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# Exp manager\n", "config.exp_manager.explicit_log_dir = 'tarred_experiment'\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 6dacda811bda..5fbe61139980 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -645,7 +645,7 @@ "config.trainer.max_epochs = 1\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "config.exp_manager.use_datetime_version=False\n", "config.exp_manager.explicit_log_dir='Punctuation_And_Capitalization_Lexical_Audio'\n", "\n", @@ -860,7 +860,7 @@ "config.trainer.accelerator = accelerator\n", "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "config.trainer.max_epochs = 1\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# Exp manager\n", "config.exp_manager.explicit_log_dir = 'tarred_experiment'\n", diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 2205a5f953c7..8920a0738641 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -403,7 +403,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index c673a71b98e2..58dfd448f396 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -370,7 +370,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# disable distributed training when using Colab to prevent the errors\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "# Training stops when max_step or max_epochs is reached (earliest)\n", @@ -573,7 +573,7 @@ "# create a copy of the trainer config and update it to be used for final evaluation\n", "eval_trainer_cfg = config.trainer.copy()\n", "eval_trainer_cfg.accelerator = 'gpu' if torch.cuda.is_available() else 'cpu' # it is safer to perform evaluation on single GPU as PT is buggy with the last batch on multi-GPUs\n", - "eval_trainer_cfg.strategy = auto # 'ddp' is buggy with test process in the current PT, it looks like it has been fixed in the latest master\n", + "eval_trainer_cfg.strategy = 'auto' # 'ddp' is buggy with test process in the current PT, it looks like it has been fixed in the latest master\n", "eval_trainer = pl.Trainer(**eval_trainer_cfg)\n", "\n", "eval_trainer.test(model=eval_model, verbose=False) # test_dataloaders=eval_dataloader,\n" diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index b2da9799b470..56bf3261bcd3 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -434,7 +434,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 3a289ba4c1f1..2888703b9368 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -533,7 +533,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 32\n", diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index f25ad2494694..46e7e24f4130 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -400,7 +400,7 @@ "# config.trainer.amp_level = O1\n", "\n", "# remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# setup max number of steps to reduce training time for demonstration purposes of this tutorial\n", "config.trainer.max_steps = 128\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index 475e382e574f..f08c7ae88385 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -761,7 +761,7 @@ "source": [ "config.model.diarizer.speaker_embeddings.model_path=\"titanet_large\"\n", "config.trainer.max_epochs = 5\n", - "config.trainer.strategy = auto" + "config.trainer.strategy = 'auto'" ] }, { diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index 2e7490ed7b9b..954a84fa44ac 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -475,7 +475,7 @@ "config.trainer.max_epochs = 10\n", "\n", "# Remove distributed training flags\n", - "config.trainer.strategy = auto\n", + "config.trainer.strategy = 'auto'\n", "\n", "# Remove augmentations\n", "config.model.train_ds.augmentor=None" From 879047eb5a0be8d26545de1c27c5ef225940d9ff Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 13:06:36 -0700 Subject: [PATCH 282/512] add build os key (#7596) (#7599) * add build os key * add tools * update to stable version --------- Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao --- .readthedocs.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 226be6a7eab0..5ee18e6dee1e 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -20,12 +20,16 @@ # Required field. version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.10" + # Build documentation in the docs/ directory with Sphinx. sphinx: configuration: docs/source/conf.py # Set the version of Python and requirements required to build your docs python: - version: 3.8 install: - requirements: requirements/requirements_docs.txt From 0b1ea36cf7ad8dc8cbea29115dfb668b5642caa5 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Mon, 2 Oct 2023 22:43:54 +0200 Subject: [PATCH 283/512] StarCoder SFT test + bump PyT NGC image to 23.09 (#7540) * Add SFT StarCoder test Signed-off-by: Jan Lasek * Remove _modify_config call as it is covered in load_from_nemo just below Signed-off-by: Jan Lasek * Test with pyt:23.09 container Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek --- Jenkinsfile | 36 ++++++++++++++++++- .../tuning/megatron_gpt_sft.py | 1 - 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 92aa65ae660b..3d262931915b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,7 +1,7 @@ pipeline { agent { docker { - image 'nvcr.io/nvidia/pytorch:23.08-py3' + image 'nvcr.io/nvidia/pytorch:23.09-py3' args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1 --env HYDRA_FULL_ERROR=1' } } @@ -3621,6 +3621,40 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' sh "rm -rf examples/nlp/language_modeling/gpt_sft_results" } } + stage('L2: Megatron GPT Finetuning StarCoder PP=1') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "python examples/nlp/language_modeling/tuning/megatron_gpt_sft.py \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + trainer.precision=32 \ + trainer.max_steps=4 \ + trainer.val_check_interval=4 \ + trainer.enable_checkpointing=False \ + +trainer.limit_val_batches=2 \ + +trainer.limit_test_batches=2 \ + exp_manager.checkpoint_callback_params.save_best_model=False \ + exp_manager.exp_dir=examples/nlp/language_modeling/gpt_sft_results \ + model.optim.name=distributed_fused_adam \ + model.restore_from_path=/home/TestData/nlp/megatron_gpt/starcoder-ci-nemo/megatron_starcoder_tp1_pp1.nemo \ + model.tensor_model_parallel_size=1 \ + model.pipeline_model_parallel_size=1 \ + model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.train_ds.num_workers=0 \ + model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.validation_ds.num_workers=0 \ + model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \ + model.data.test_ds.num_workers=0 \ + model.data.train_ds.concat_sampling_probabilities=[1.0]" + sh "rm -rf examples/nlp/language_modeling/gpt_sft_results" + } + } stage('L2: Megatron GPT PEFT Lora PP=2') { when { anyOf { diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index 07af2b887fb0..a4888049cb42 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -202,7 +202,6 @@ def main(cfg) -> None: return_config=True, save_restore_connector=save_restore_connector, ) - gpt_cfg = _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False) model = load_from_nemo(MegatronGPTSFTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config) else: validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint) From 703d2e8f8350a03b18d6fadeade9d9007f1616a3 Mon Sep 17 00:00:00 2001 From: Adi Renduchintala Date: Mon, 2 Oct 2023 20:20:02 -0700 Subject: [PATCH 284/512] defaults changed (#7600) * defaults changed Signed-off-by: arendu * typo Signed-off-by: arendu * update Signed-off-by: arendu --------- Signed-off-by: arendu --- .../metric_calculation/peft_metric_calc.py | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/scripts/metric_calculation/peft_metric_calc.py b/scripts/metric_calculation/peft_metric_calc.py index ca13f83281c5..9fd2452fb17b 100755 --- a/scripts/metric_calculation/peft_metric_calc.py +++ b/scripts/metric_calculation/peft_metric_calc.py @@ -21,26 +21,18 @@ """ -This script can be used to calcualte exact match and F1 scores for many different tasks, not just squad. - -Example command for T5 Preds - - ``` - python squad_metric_calc.py \ - --ground-truth squad_test_gt.jsonl \ - --preds squad_preds_t5.txt - ``` +This script can be used to calcualte exact match and F1 scores for many different tasks. +The file "squad_test_predictions.jsonl" is assumed to be generated by the +`examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py` script Example command for GPT Preds ``` - python squad_metric_calc.py \ - --ground-truth squad_test_gt.jsonl \ - --preds squad_preds_gpt.txt \ - --split-string "answer:" + python peft_metric_calc.py \ + --pred_file squad_test_predictions.jsonl \ + --label_field "original_answers" \ ``` - In this case, the prediction file will be split on "answer: " when looking for the LM's predicted answer. """ @@ -92,21 +84,21 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument( - '--pred-file', + '--pred_file', type=str, help="Text file with test set prompts + model predictions. Prediction file can be made by running NeMo/examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py", ) parser.add_argument( - '--pred-field', + '--pred_field', type=str, help="The field in the json file that contains the prediction tokens", default="pred", ) parser.add_argument( - '--ground-truth-field', + '--label_field', type=str, help="The field in the json file that contains the ground truth tokens", - default="original_answers", + default="label", ) args = parser.parse_args() @@ -120,7 +112,7 @@ def main(): pred_line = json.loads(preds[i]) pred_answer = pred_line[args.pred_field] - true_answers = pred_line[args.ground_truth_field] + true_answers = pred_line[args.label_field] if not isinstance(true_answers, list): true_answers = [true_answers] From 8b77683cd23e0633db664ec31471302f7d7f112b Mon Sep 17 00:00:00 2001 From: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Date: Tue, 3 Oct 2023 05:40:12 +0200 Subject: [PATCH 285/512] add ItalianPhonemesTokenizer (#7587) * add ItalianPhonemesTokenizer Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix Italian phonemes Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- .../tokenizers/text_to_speech/ipa_lexicon.py | 7 +- .../text_to_speech/tts_tokenizers.py | 73 ++++++++++++++++++- .../text_to_speech/test_tts_tokenizers.py | 16 ++++ 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py index 2e1bb359102b..338b3536519b 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py +++ b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py @@ -88,7 +88,7 @@ 'ɢ','ʛ','ɦ','ɧ','ħ','ɥ','ʜ','ɨ','ɬ','ɫ','ɮ','ʟ', 'ɱ','ɯ','ɰ','ɳ','ɵ','ɸ','œ','ɶ','ʘ','ɺ','ɻ','ʀ','ʁ', 'ɽ','ʂ','ʈ','ʧ','ʉ','ʋ','ⱱ','ɤ','ʍ','χ','ʏ','ʑ','ʐ', - 'ʔ','ʡ','ʕ','ʢ','ǀ','ǁ','ǂ','ᵻ' + 'ʔ','ʡ','ʕ','ʢ','ǀ','ǁ','ǂ','ᵻ', 'ʃ','ː', ), } @@ -181,7 +181,10 @@ def get_ipa_punctuation_list(locale): '↑', '→', '↗', - '↘,', + '↘', + '”', + '’', + '-', ] ) elif locale == "es-ES": diff --git a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py index 32f725c9c73f..25b9d88a59dc 100644 --- a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py +++ b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py @@ -284,7 +284,7 @@ def __init__( non_default_punct_list: List of punctuation marks which will be used instead default. """ - it_alphabet = "abcdefghijklmnopqrstuvwxyzàèéìòù" + it_alphabet = "abcdefghijklmnopqrstuvwxyzàèéìòùó" super().__init__( chars=it_alphabet, punct=punct, @@ -367,6 +367,77 @@ def encode(self, text): return [self._token2id[p] for p in cs] +class ItalianPhonemesTokenizer(BaseCharsTokenizer): + # fmt: off + PUNCT_LIST = ( + ',', '.', '!', '?', '-', + ':', ';', '/', '"', '(', + ')', '[', ']', '{', '}', + '„', '“', '”', '‘', '’', '‒', '—', '«', '»', '‹', '›', '_', + ) + # fmt: on + + def __init__( + self, + punct=True, + apostrophe=True, + add_blank_at=None, + pad_with_space=False, + non_default_punct_list=None, + text_preprocessing_func=italian_text_preprocessing, + ): + """Italian phoneme-based tokenizer. + Args: + punct: Whether to reserve grapheme for basic punctuation or not. + apostrophe: Whether to use apostrophe or not. + add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None), + if None then no blank in labels. + pad_with_space: Whether to pad text with spaces at the beginning and at the end or not. + non_default_punct_list: List of punctuation marks which will be used instead default. + text_preprocessing_func: Text preprocessing function for correct execution of the tokenizer. + Currently, it only applies lower() function. + """ + + it_ipa = "abcdefghijklmnopqrstuvwxyzàèéìòùóæɐɑɔəɚɜɬɹʌʔᵻðŋɛɡɣɪɲɾʃʊʎʒʝβθd͡'t͡'øɒɕɓçɖɘɝɞɟʄɡɠɢʛɦɧħɥʜɨɬɫɮʟɱɯɰɳɵɸœɶʘɺɻʀʁɽʂʈʧʉʋⱱɤʍχʏʑʐʔʡʕʢǀǁǂᵻʃ'ː" + super().__init__( + chars=it_ipa, + punct=punct, + apostrophe=apostrophe, + add_blank_at=add_blank_at, + pad_with_space=pad_with_space, + non_default_punct_list=non_default_punct_list, + text_preprocessing_func=text_preprocessing_func, + ) + + def encode(self, text): + """See base class.""" + cs, space, tokens = [], self.tokens[self.space], set(self.tokens) + + text = self.text_preprocessing_func(text) + for c in text: + # Add space if last one isn't one + if c == space and len(cs) > 0 and cs[-1] != space: + cs.append(c) + # Add next char + elif (c.isalnum() or c == "'" or c == "\u0303") and c in tokens: + cs.append(c) + # Add punct + elif (c in self.PUNCT_LIST) and self.punct: + cs.append(c) + # Warn about unknown char + elif c != space: + logging.warning(f"Text: [{text}] contains unknown char: [{c}]. Symbol will be skipped.") + + # Remove trailing spaces + while cs[-1] == space: + cs.pop() + + if self.pad_with_space: + cs = [space] + cs + [space] + + return [self._token2id[p] for p in cs] + + class EnglishPhonemesTokenizer(BaseTokenizer): # fmt: off PUNCT_LIST = ( # Derived from LJSpeech and "/" additionally diff --git a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py index 62c571bc16b7..bc065e75fa66 100644 --- a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py +++ b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py @@ -34,6 +34,10 @@ class TestTTSTokenizers: "BUENOS": ["bwˈenos"], "DÍAS": ["dˈias"], } + PHONEME_DICT_IT = { + "CIAO": ["tʃˈao"], + "MONDO": ["mˈondo"], + } @staticmethod def _parse_text(tokenizer, text): @@ -146,6 +150,18 @@ def test_ipa_tokenizer_de_de(self): assert chars == expected_output + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_ipa_tokenizer_it_it(self): + input_text = "Ciao mondo" + expected_output = "tʃˈao mˈondo" + + g2p = IpaG2p(phoneme_dict=self.PHONEME_DICT_IT, locale="it-IT") + tokenizer = IPATokenizer(g2p=g2p, locale="it-IT") + chars, tokens = self._parse_text(tokenizer, input_text) + + assert chars == expected_output + @pytest.mark.run_only_on('CPU') @pytest.mark.unit def test_ipa_tokenizer_en_us(self): From e603caded55f7913b27d5bfa974531833a2877a4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 20:42:39 -0700 Subject: [PATCH 286/512] best ckpt fix (#7564) (#7588) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> --- nemo/utils/callbacks/nemo_model_checkpoint.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo/utils/callbacks/nemo_model_checkpoint.py b/nemo/utils/callbacks/nemo_model_checkpoint.py index d4759ecf5949..5089f50bd527 100644 --- a/nemo/utils/callbacks/nemo_model_checkpoint.py +++ b/nemo/utils/callbacks/nemo_model_checkpoint.py @@ -209,6 +209,8 @@ def on_train_end(self, trainer, pl_module): "were found. Saving latest model instead." ) else: + if os.path.isdir(self.best_model_path.split('.ckpt')[0]): + self.best_model_path = self.best_model_path.split('.ckpt')[0] self.best_model_path = trainer.strategy.broadcast(self.best_model_path) trainer._checkpoint_connector.restore(self.best_model_path) From 4d5184cad1631c13094d344e115594de1b0621e7 Mon Sep 17 00:00:00 2001 From: George <37293288+Jorjeous@users.noreply.github.com> Date: Tue, 3 Oct 2023 18:54:21 +0400 Subject: [PATCH 287/512] Add files via upload (#7598) specifies the branch Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- tutorials/tools/SDE_HowTo_v2.ipynb | 837 +---------------------------- 1 file changed, 5 insertions(+), 832 deletions(-) diff --git a/tutorials/tools/SDE_HowTo_v2.ipynb b/tutorials/tools/SDE_HowTo_v2.ipynb index a6d3f8dd2723..4087219a8dad 100644 --- a/tutorials/tools/SDE_HowTo_v2.ipynb +++ b/tutorials/tools/SDE_HowTo_v2.ipynb @@ -44,840 +44,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "c3919489", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "87f4e2f4-a06c-432d-d986-429fbe6714af" + "id": "c3919489" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'NeMo'...\n", - "remote: Enumerating objects: 121443, done.\u001b[K\n", - "remote: Counting objects: 100% (1811/1811), done.\u001b[K\n", - "remote: Compressing objects: 100% (945/945), done.\u001b[K\n", - "remote: Total 121443 (delta 1299), reused 1238 (delta 864), pack-reused 119632\u001b[K\n", - "Receiving objects: 100% (121443/121443), 228.05 MiB | 21.34 MiB/s, done.\n", - "Resolving deltas: 100% (90608/90608), done.\n", - "Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]\n", - "Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease\n", - "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]\n", - "Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]\n", - "Get:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [109 kB]\n", - "Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", - "Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n", - "Get:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]\n", - "Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", - "Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", - "Get:11 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [962 kB]\n", - "Get:12 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [1,059 kB]\n", - "Get:13 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [993 kB]\n", - "Get:14 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,254 kB]\n", - "Get:15 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [1,230 kB]\n", - "Get:16 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [1,079 kB]\n", - "Get:17 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 Packages [21.8 kB]\n", - "Fetched 6,959 kB in 1s (5,794 kB/s)\n", - "Reading package lists... Done\n", - "Reading package lists... Done\n", - "Building dependency tree... Done\n", - "Reading state information... Done\n", - "libsndfile1 is already the newest version (1.0.31-2build1).\n", - "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.\n", - "Requirement already satisfied: pip in /usr/local/lib/python3.10/dist-packages (23.1.2)\n", - "Collecting pip\n", - " Downloading pip-23.2.1-py3-none-any.whl (2.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: pip\n", - " Attempting uninstall: pip\n", - " Found existing installation: pip 23.1.2\n", - " Uninstalling pip-23.1.2:\n", - " Successfully uninstalled pip-23.1.2\n", - "Successfully installed pip-23.2.1\n", - "Uninstalling stuff\n", - "\u001b[33mWARNING: Skipping nemo_toolkit as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Skipping sacrebleu as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Skipping nemo_asr as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Skipping nemo_nlp as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Skipping nemo_tts as it is not installed.\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0mInstalling nemo\n", - "Obtaining file:///content/NeMo\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n", - " Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting huggingface-hub (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for huggingface-hub from https://files.pythonhosted.org/packages/7f/c4/adcbe9a696c135578cabcbdd7331332daad4d49b7c43688bc2d36b3a47d2/huggingface_hub-0.16.4-py3-none-any.whl.metadata\n", - " Downloading huggingface_hub-0.16.4-py3-none-any.whl.metadata (12 kB)\n", - "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.56.4)\n", - "Requirement already satisfied: numpy<1.24,>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.23.5)\n", - "Collecting onnx>=1.7.0 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for onnx>=1.7.0 from https://files.pythonhosted.org/packages/47/d4/f2d212558245e252b936247666c3f5981e6dba62ec470ff8be3df3389364/onnx-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading onnx-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)\n", - "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (2.8.2)\n", - "Collecting ruamel.yaml (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for ruamel.yaml from https://files.pythonhosted.org/packages/d9/0e/2a05efa11ea33513fbdf4a2e2576fe94fd8fa5ad226dbb9c660886390974/ruamel.yaml-0.17.32-py3-none-any.whl.metadata\n", - " Downloading ruamel.yaml-0.17.32-py3-none-any.whl.metadata (17 kB)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.2.2)\n", - "Collecting setuptools==65.5.1 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading setuptools-65.5.1-py3-none-any.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (2.12.3)\n", - "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.3)\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (2.0.1+cu118)\n", - "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (4.66.1)\n", - "Collecting wget (from nemo-toolkit==1.21.0rc0)\n", - " Downloading wget-3.2.zip (10 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.14.1)\n", - "Collecting black==19.10b0 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading black-19.10b0-py36-none-any.whl (97 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.5/97.5 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting click==8.0.2 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading click-8.0.2-py3-none-any.whl (97 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading isort-5.12.0-py3-none-any.whl (91 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting parameterized (from nemo-toolkit==1.21.0rc0)\n", - " Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n", - "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (7.4.1)\n", - "Collecting pytest-runner (from nemo-toolkit==1.21.0rc0)\n", - " Downloading pytest_runner-6.0.0-py3-none-any.whl (7.2 kB)\n", - "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (5.0.2)\n", - "Collecting sphinxcontrib-bibtex (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for sphinxcontrib-bibtex from https://files.pythonhosted.org/packages/79/59/fafc5c480506cc356e2a7ea009d7c7d75812475b4385fe851ae55575661c/sphinxcontrib_bibtex-2.6.1-py3-none-any.whl.metadata\n", - " Downloading sphinxcontrib_bibtex-2.6.1-py3-none-any.whl.metadata (6.1 kB)\n", - "Collecting wandb (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for wandb from https://files.pythonhosted.org/packages/fe/10/18b03623c460fd433525d9b4739af58c5e69f5974328dcdd037cfbc855d7/wandb-0.15.10-py3-none-any.whl.metadata\n", - " Downloading wandb-0.15.10-py3-none-any.whl.metadata (9.6 kB)\n", - "Collecting hydra-core<=1.3.2,>1.3 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting omegaconf<=2.3 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pytorch-lightning<=2.0.7,>=2.0 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pytorch-lightning<=2.0.7,>=2.0 from https://files.pythonhosted.org/packages/d5/ef/39994adec1fe1d5f25fd0dd0a82abcd8bd61fc968283790b9da7463f0279/pytorch_lightning-2.0.7-py3-none-any.whl.metadata\n", - " Downloading pytorch_lightning-2.0.7-py3-none-any.whl.metadata (23 kB)\n", - "Collecting torchmetrics>=0.11.0 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for torchmetrics>=0.11.0 from https://files.pythonhosted.org/packages/e3/86/47091c33ecf05f8826d134fd518485d4c68ca524c053b2fdd4e041c20547/torchmetrics-1.1.1-py3-none-any.whl.metadata\n", - " Downloading torchmetrics-1.1.1-py3-none-any.whl.metadata (21 kB)\n", - "Collecting transformers>=4.0.1 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for transformers>=4.0.1 from https://files.pythonhosted.org/packages/13/30/54b59e73400df3de506ad8630284e9fd63f4b94f735423d55fc342181037/transformers-4.33.1-py3-none-any.whl.metadata\n", - " Downloading transformers-4.33.1-py3-none-any.whl.metadata (119 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.9/119.9 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting webdataset<=0.1.62,>=0.1.48 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading webdataset-0.1.62-py3-none-any.whl (32 kB)\n", - "Requirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (7.0.0)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.5.3)\n", - "Collecting pydantic<2 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pydantic<2 from https://files.pythonhosted.org/packages/bc/e0/0371e9b6c910afe502e5fe18cc94562bfd9399617c7b4f5b6e13c29115b3/pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (149 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.3/149.3 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting sacremoses>=0.0.43 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading sacremoses-0.0.53.tar.gz (880 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting sentencepiece<1.0.0 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting youtokentome>=1.0.5 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading youtokentome-1.0.6.tar.gz (86 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting braceexpand (from nemo-toolkit==1.21.0rc0)\n", - " Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n", - "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.6.2)\n", - "Collecting g2p-en (from nemo-toolkit==1.21.0rc0)\n", - " Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (7.7.1)\n", - "Collecting jiwer (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for jiwer from https://files.pythonhosted.org/packages/0d/4f/ee537ab20144811dd99321735ff92ef2b3a3230b77ed7454bed4c44d21fc/jiwer-3.0.3-py3-none-any.whl.metadata\n", - " Downloading jiwer-3.0.3-py3-none-any.whl.metadata (2.6 kB)\n", - "Collecting kaldi-python-io (from nemo-toolkit==1.21.0rc0)\n", - " Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting kaldiio (from nemo-toolkit==1.21.0rc0)\n", - " Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n", - "Requirement already satisfied: librosa>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.10.1)\n", - "Collecting marshmallow (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for marshmallow from https://files.pythonhosted.org/packages/ed/3c/cebfdcad015240014ff08b883d1c0c427f2ba45ae8c6572851b6ef136cad/marshmallow-3.20.1-py3-none-any.whl.metadata\n", - " Downloading marshmallow-3.20.1-py3-none-any.whl.metadata (7.8 kB)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (3.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (23.1)\n", - "Collecting pyannote.core (from nemo-toolkit==1.21.0rc0)\n", - " Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pyannote.metrics (from nemo-toolkit==1.21.0rc0)\n", - " Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pydub (from nemo-toolkit==1.21.0rc0)\n", - " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", - "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (1.10.1)\n", - "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.12.1)\n", - "Collecting sox (from nemo-toolkit==1.21.0rc0)\n", - " Downloading sox-1.4.1-py2.py3-none-any.whl (39 kB)\n", - "Collecting texterrors (from nemo-toolkit==1.21.0rc0)\n", - " Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting boto3 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for boto3 from https://files.pythonhosted.org/packages/6e/7f/ffb72ddc9f465183af04623baf9d0ea70e73cb0957407e4c333b9e0263fb/boto3-1.28.43-py3-none-any.whl.metadata\n", - " Downloading boto3-1.28.43-py3-none-any.whl.metadata (6.7 kB)\n", - "Collecting datasets (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/09/7e/fd4d6441a541dba61d0acb3c1fd5df53214c2e9033854e837a99dd9e0793/datasets-2.14.5-py3-none-any.whl.metadata\n", - " Downloading datasets-2.14.5-py3-none-any.whl.metadata (19 kB)\n", - "Collecting einops (from nemo-toolkit==1.21.0rc0)\n", - " Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting faiss-cpu (from nemo-toolkit==1.21.0rc0)\n", - " Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.6/17.6 MB\u001b[0m \u001b[31m60.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting fasttext (from nemo-toolkit==1.21.0rc0)\n", - " Downloading fasttext-0.9.2.tar.gz (68 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting flask-restful (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for flask-restful from https://files.pythonhosted.org/packages/d7/7b/f0b45f0df7d2978e5ae51804bb5939b7897b2ace24306009da0cc34d8d1f/Flask_RESTful-0.3.10-py2.py3-none-any.whl.metadata\n", - " Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl.metadata (1.0 kB)\n", - "Collecting ftfy (from nemo-toolkit==1.21.0rc0)\n", - " Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (4.6.6)\n", - "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (3.9.0)\n", - "Collecting ijson (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for ijson from https://files.pythonhosted.org/packages/6b/78/2cbeb7020a7a319d148c92331951cfc710864990e32ff6c7f4859729fb48/ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", - "Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.42.1)\n", - "Collecting markdown2 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for markdown2 from https://files.pythonhosted.org/packages/f1/98/61276a753f078dd2f3171c9a69fd3f451d220e806b2b1cdca41b8e368b0f/markdown2-2.4.10-py2.py3-none-any.whl.metadata\n", - " Downloading markdown2-2.4.10-py2.py3-none-any.whl.metadata (2.0 kB)\n", - "Collecting megatron-core==0.2.0 (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for megatron-core==0.2.0 from https://files.pythonhosted.org/packages/33/f1/d94f2282b91950e31223efc39138748d71907dbf857e5523d1e73619fd62/megatron_core-0.2.0-py3-none-any.whl.metadata\n", - " Downloading megatron_core-0.2.0-py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (3.8.1)\n", - "Collecting opencc (from nemo-toolkit==1.21.0rc0)\n", - " Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m70.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pangu (from nemo-toolkit==1.21.0rc0)\n", - " Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n", - "Collecting rapidfuzz (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for rapidfuzz from https://files.pythonhosted.org/packages/35/04/9ca97b17da457ed294519477da2aad0799c9ba8eebf37761a5ca94c35534/rapidfuzz-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading rapidfuzz-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting rouge-score (from nemo-toolkit==1.21.0rc0)\n", - " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting sacrebleu[ja] (from nemo-toolkit==1.21.0rc0)\n", - " Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m118.9/118.9 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting sentence-transformers (from nemo-toolkit==1.21.0rc0)\n", - " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: tensorstore in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.1.41)\n", - "Collecting zarr (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for zarr from https://files.pythonhosted.org/packages/ba/55/0f5ec28561a1698ac5c11edc5724f8c6d48d01baecf740ffd62107d95e7f/zarr-2.16.1-py3-none-any.whl.metadata\n", - " Downloading zarr-2.16.1-py3-none-any.whl.metadata (5.8 kB)\n", - "Collecting attrdict (from nemo-toolkit==1.21.0rc0)\n", - " Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n", - "Collecting kornia (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for kornia from https://files.pythonhosted.org/packages/55/da/72cb83aa364ebb4d0109965e20c5d33d7063ccab15332c3fd0acfd5609c9/kornia-0.7.0-py2.py3-none-any.whl.metadata\n", - " Downloading kornia-0.7.0-py2.py3-none-any.whl.metadata (12 kB)\n", - "Collecting nemo-text-processing (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for nemo-text-processing from https://files.pythonhosted.org/packages/bd/82/b776d01ba650c3ab42ba5e381e34b35e507a21b16ad1246f51026fa13f0b/nemo_text_processing-0.2.0rc0-py3-none-any.whl.metadata\n", - " Downloading nemo_text_processing-0.2.0rc0-py3-none-any.whl.metadata (7.2 kB)\n", - "Collecting pypinyin (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pypinyin from https://files.pythonhosted.org/packages/00/fc/3e82bf38739a7b2c4f699245ce6c84ff254723c678c2cdc5d2ecbddf9afb/pypinyin-0.49.0-py2.py3-none-any.whl.metadata\n", - " Downloading pypinyin-0.49.0-py2.py3-none-any.whl.metadata (12 kB)\n", - "Collecting pypinyin-dict (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pypinyin-dict from https://files.pythonhosted.org/packages/89/31/16c26425685a84191503a226450837e0f4d540c164665d8567f2472861a9/pypinyin_dict-0.6.0-py2.py3-none-any.whl.metadata\n", - " Downloading pypinyin_dict-0.6.0-py2.py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting progress>=1.5 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading progress-1.6.tar.gz (7.8 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo-toolkit==1.21.0rc0) (0.9.0)\n", - "Collecting textdistance>=4.1.5 (from nemo-toolkit==1.21.0rc0)\n", - " Downloading textdistance-4.5.0-py3-none-any.whl (31 kB)\n", - "Requirement already satisfied: attrs>=18.1.0 in /usr/local/lib/python3.10/dist-packages (from black==19.10b0->nemo-toolkit==1.21.0rc0) (23.1.0)\n", - "Requirement already satisfied: appdirs in /usr/local/lib/python3.10/dist-packages (from black==19.10b0->nemo-toolkit==1.21.0rc0) (1.4.4)\n", - "Requirement already satisfied: toml>=0.9.4 in /usr/local/lib/python3.10/dist-packages (from black==19.10b0->nemo-toolkit==1.21.0rc0) (0.10.2)\n", - "Collecting typed-ast>=1.4.0 (from black==19.10b0->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for typed-ast>=1.4.0 from https://files.pythonhosted.org/packages/e2/ed/b9b8b794b37b55c9247b1e8d38b0361e8158795c181636d34d6c11b506e7/typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", - "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from black==19.10b0->nemo-toolkit==1.21.0rc0) (2023.6.3)\n", - "Collecting pathspec<1,>=0.6 (from black==19.10b0->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pathspec<1,>=0.6 from https://files.pythonhosted.org/packages/b4/2a/9b1be29146139ef459188f5e420a66e835dda921208db600b7037093891f/pathspec-0.11.2-py3-none-any.whl.metadata\n", - " Downloading pathspec-0.11.2-py3-none-any.whl.metadata (19 kB)\n", - "Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo-toolkit==1.21.0rc0)\n", - " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n", - "Collecting jiwer (from nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for jiwer from https://files.pythonhosted.org/packages/23/a3/92c29a5e422acd87e3b4f2e6dc0ce877070cc9b2f81d30fe84122032338a/jiwer-3.0.2-py3-none-any.whl.metadata\n", - " Downloading jiwer-3.0.2-py3-none-any.whl.metadata (2.6 kB)\n", - " Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n", - " Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n", - " Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n", - " Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n", - "Collecting rapidfuzz (from nemo-toolkit==1.21.0rc0)\n", - " Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m96.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (3.0.0)\n", - "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (1.3.2)\n", - "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (4.4.2)\n", - "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (1.7.0)\n", - "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (0.3.6)\n", - "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (4.7.1)\n", - "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (0.3)\n", - "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (1.0.5)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (1.1.0)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (4.42.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (1.4.5)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (9.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo-toolkit==1.21.0rc0) (3.1.1)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo-toolkit==1.21.0rc0) (0.39.1)\n", - "Requirement already satisfied: PyYAML>=5.1.0 in /usr/local/lib/python3.10/dist-packages (from omegaconf<=2.3->nemo-toolkit==1.21.0rc0) (6.0.1)\n", - "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo-toolkit==1.21.0rc0) (3.20.3)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo-toolkit==1.21.0rc0) (1.16.0)\n", - "Requirement already satisfied: fsspec[http]>2021.06.0 in /usr/local/lib/python3.10/dist-packages (from pytorch-lightning<=2.0.7,>=2.0->nemo-toolkit==1.21.0rc0) (2023.6.0)\n", - "Collecting lightning-utilities>=0.7.0 (from pytorch-lightning<=2.0.7,>=2.0->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for lightning-utilities>=0.7.0 from https://files.pythonhosted.org/packages/46/ee/8641eeb6a062f383b7d6875604e1f3f83bd2c93a0b4dbcabd3150b32de6e/lightning_utilities-0.9.0-py3-none-any.whl.metadata\n", - " Downloading lightning_utilities-0.9.0-py3-none-any.whl.metadata (4.6 kB)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo-toolkit==1.21.0rc0) (3.2.0)\n", - "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo-toolkit==1.21.0rc0) (1.15.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->nemo-toolkit==1.21.0rc0) (3.12.3)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo-toolkit==1.21.0rc0) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo-toolkit==1.21.0rc0) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo-toolkit==1.21.0rc0) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo-toolkit==1.21.0rc0) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->nemo-toolkit==1.21.0rc0) (3.27.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->nemo-toolkit==1.21.0rc0) (16.0.6)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers>=4.0.1->nemo-toolkit==1.21.0rc0) (2.31.0)\n", - "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers>=4.0.1->nemo-toolkit==1.21.0rc0)\n", - " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers>=4.0.1->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for safetensors>=0.3.1 from https://files.pythonhosted.org/packages/6c/f0/c17bbdb1e5f9dab29d44cade445135789f75f8f08ea2728d04493ea8412b/safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.7 kB)\n", - "Collecting botocore<1.32.0,>=1.31.43 (from boto3->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for botocore<1.32.0,>=1.31.43 from https://files.pythonhosted.org/packages/88/37/68fd026cde5d1c802ab34290285c5019e7ba3de3eea8e6c07756cfb827ae/botocore-1.31.43-py3-none-any.whl.metadata\n", - " Downloading botocore-1.31.43-py3-none-any.whl.metadata (6.0 kB)\n", - "Collecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo-toolkit==1.21.0rc0)\n", - " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", - "Collecting s3transfer<0.7.0,>=0.6.0 (from boto3->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for s3transfer<0.7.0,>=0.6.0 from https://files.pythonhosted.org/packages/d9/17/a3b666f5ef9543cfd3c661d39d1e193abb9649d0cfbbfee3cf3b51d5af02/s3transfer-0.6.2-py3-none-any.whl.metadata\n", - " Downloading s3transfer-0.6.2-py3-none-any.whl.metadata (1.8 kB)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo-toolkit==1.21.0rc0) (9.0.0)\n", - "Collecting dill<0.3.8,>=0.3.0 (from datasets->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for dill<0.3.8,>=0.3.0 from https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl.metadata\n", - " Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n", - "Collecting xxhash (from datasets->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for xxhash from https://files.pythonhosted.org/packages/13/c3/e942893f4864a424514c81640f114980cfd5aff7e7414d1e0255f4571111/xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess (from datasets->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for multiprocess from https://files.pythonhosted.org/packages/35/a8/36d8d7b3e46b377800d8dec47891cdf05842d1a2366909ae4a0c89fbc5e6/multiprocess-0.70.15-py310-none-any.whl.metadata\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo-toolkit==1.21.0rc0) (3.8.5)\n", - "Collecting pybind11>=2.2 (from fasttext->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pybind11>=2.2 from https://files.pythonhosted.org/packages/06/55/9f73c32dda93fa4f539fafa268f9504e83c489f460c380371d94296126cd/pybind11-2.11.1-py3-none-any.whl.metadata\n", - " Using cached pybind11-2.11.1-py3-none-any.whl.metadata (9.5 kB)\n", - "Collecting aniso8601>=0.82 (from flask-restful->nemo-toolkit==1.21.0rc0)\n", - " Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo-toolkit==1.21.0rc0) (2.2.5)\n", - "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo-toolkit==1.21.0rc0) (2023.3.post1)\n", - "Requirement already satisfied: wcwidth>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo-toolkit==1.21.0rc0) (0.2.6)\n", - "Collecting distance>=0.1.3 (from g2p-en->nemo-toolkit==1.21.0rc0)\n", - " Downloading Distance-0.1.3.tar.gz (180 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo-toolkit==1.21.0rc0) (4.11.2)\n", - "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (5.5.6)\n", - "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (0.2.0)\n", - "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (5.7.1)\n", - "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (3.6.5)\n", - "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (7.34.0)\n", - "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo-toolkit==1.21.0rc0) (3.0.8)\n", - "Collecting cdifflib (from nemo-text-processing->nemo-toolkit==1.21.0rc0)\n", - " Downloading cdifflib-1.2.6.tar.gz (11 kB)\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting pynini==2.1.5 (from nemo-text-processing->nemo-toolkit==1.21.0rc0)\n", - " Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo-toolkit==1.21.0rc0) (0.29.36)\n", - "Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo-toolkit==1.21.0rc0) (2.4.0)\n", - "Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo-toolkit==1.21.0rc0)\n", - " Downloading pyannote.database-5.0.1-py3-none-any.whl (48 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo-toolkit==1.21.0rc0)\n", - " Downloading docopt-0.6.2.tar.gz (25 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo-toolkit==1.21.0rc0) (2.0.0)\n", - "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo-toolkit==1.21.0rc0) (1.3.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo-toolkit==1.21.0rc0) (1.1.3)\n", - "Requirement already satisfied: tomli>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo-toolkit==1.21.0rc0) (2.0.1)\n", - "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score->nemo-toolkit==1.21.0rc0) (1.4.0)\n", - "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo-toolkit==1.21.0rc0)\n", - " Downloading ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (485 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m485.6/485.6 kB\u001b[0m \u001b[31m49.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting portalocker (from sacrebleu[ja]->nemo-toolkit==1.21.0rc0)\n", - " Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)\n", - "Collecting colorama (from sacrebleu[ja]->nemo-toolkit==1.21.0rc0)\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", - "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu[ja]->nemo-toolkit==1.21.0rc0) (4.9.3)\n", - "Collecting mecab-python3==1.0.5 (from sacrebleu[ja]->nemo-toolkit==1.21.0rc0)\n", - " Downloading mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (581 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m581.1/581.1 kB\u001b[0m \u001b[31m56.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting ipadic<2.0,>=1.0 (from sacrebleu[ja]->nemo-toolkit==1.21.0rc0)\n", - " Downloading ipadic-1.0.0.tar.gz (13.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers->nemo-toolkit==1.21.0rc0) (0.15.2+cu118)\n", - "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.0.7)\n", - "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.0.5)\n", - "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.0.1)\n", - "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (2.0.4)\n", - "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.1.9)\n", - "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.0.6)\n", - "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (2.16.1)\n", - "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (0.18.1)\n", - "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (2.2.0)\n", - "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (2.12.1)\n", - "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (0.7.13)\n", - "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo-toolkit==1.21.0rc0) (1.4.1)\n", - "Collecting docutils<0.19,>=0.14 (from sphinx->nemo-toolkit==1.21.0rc0)\n", - " Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m42.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo-toolkit==1.21.0rc0)\n", - " Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for pybtex-docutils>=1.0.0 from https://files.pythonhosted.org/packages/11/b1/ce1f4596211efb5410e178a803f08e59b20bedb66837dcf41e21c54f9ec1/pybtex_docutils-1.0.3-py3-none-any.whl.metadata\n", - " Downloading pybtex_docutils-1.0.3-py3-none-any.whl.metadata (4.3 kB)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (1.57.0)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (2.17.3)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (3.4.4)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (0.7.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (2.3.7)\n", - "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo-toolkit==1.21.0rc0) (0.41.2)\n", - "Collecting plac (from texterrors->nemo-toolkit==1.21.0rc0)\n", - " Downloading plac-1.3.5-py2.py3-none-any.whl (22 kB)\n", - "Collecting loguru (from texterrors->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for loguru from https://files.pythonhosted.org/packages/19/a9/4e91197b121a41c640367641a510fd9a05bb7a3259fc9678ee2976c8fd00/loguru-0.7.1-py3-none-any.whl.metadata\n", - " Downloading loguru-0.7.1-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo-toolkit==1.21.0rc0) (2.3.0)\n", - "Collecting Levenshtein (from texterrors->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for Levenshtein from https://files.pythonhosted.org/packages/e6/02/0a4ed6a9e2b78f6b57f25a87fc194d7d10c2bbe95d985f36390e86285232/Levenshtein-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading Levenshtein-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n", - "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for GitPython!=3.1.29,>=1.0.0 from https://files.pythonhosted.org/packages/0f/c6/bb9e2276b6fed126aa21e292493b45a3df4cfba7cbfcf2ab8809a6b0e718/GitPython-3.1.35-py3-none-any.whl.metadata\n", - " Downloading GitPython-3.1.35-py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo-toolkit==1.21.0rc0) (5.9.5)\n", - "Collecting sentry-sdk>=1.0.0 (from wandb->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for sentry-sdk>=1.0.0 from https://files.pythonhosted.org/packages/17/22/dbd5f854f373214d48585eeb6844e50a8dd1600f435d9033493f76f66dfa/sentry_sdk-1.30.0-py2.py3-none-any.whl.metadata\n", - " Downloading sentry_sdk-1.30.0-py2.py3-none-any.whl.metadata (9.6 kB)\n", - "Collecting docker-pycreds>=0.4.0 (from wandb->nemo-toolkit==1.21.0rc0)\n", - " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n", - "Collecting pathtools (from wandb->nemo-toolkit==1.21.0rc0)\n", - " Downloading pathtools-0.1.2.tar.gz (11 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting setproctitle (from wandb->nemo-toolkit==1.21.0rc0)\n", - " Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n", - "Collecting asciitree (from zarr->nemo-toolkit==1.21.0rc0)\n", - " Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fasteners (from zarr->nemo-toolkit==1.21.0rc0)\n", - " Downloading fasteners-0.18-py3-none-any.whl (18 kB)\n", - "Collecting numcodecs>=0.10.0 (from zarr->nemo-toolkit==1.21.0rc0)\n", - " Downloading numcodecs-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m116.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting urllib3<1.27,>=1.25.4 (from botocore<1.32.0,>=1.31.43->boto3->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for urllib3<1.27,>=1.25.4 from https://files.pythonhosted.org/packages/c5/05/c214b32d21c0b465506f95c4f28ccbcba15022e000b043b72b3df7728471/urllib3-1.26.16-py2.py3-none-any.whl.metadata\n", - " Downloading urllib3-1.26.16-py2.py3-none-any.whl.metadata (48 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.4/48.4 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo-toolkit==1.21.0rc0) (2.21)\n", - "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo-toolkit==1.21.0rc0) (2.1.2)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo-toolkit==1.21.0rc0) (1.3.1)\n", - "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->nemo-toolkit==1.21.0rc0)\n", - " Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo-toolkit==1.21.0rc0) (5.3.1)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo-toolkit==1.21.0rc0) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo-toolkit==1.21.0rc0) (4.9)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->nemo-toolkit==1.21.0rc0) (1.3.1)\n", - "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo-toolkit==1.21.0rc0) (6.1.12)\n", - "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo-toolkit==1.21.0rc0) (6.3.2)\n", - "Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for jedi>=0.16 from https://files.pythonhosted.org/packages/8e/46/7e3ae3aa2dcfcffc5138c6cef5448523218658411c84a2000bf75c8d3ec1/jedi-0.19.0-py2.py3-none-any.whl.metadata\n", - " Downloading jedi-0.19.0-py2.py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.7.5)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (3.0.39)\n", - "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.2.0)\n", - "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.1.6)\n", - "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (4.8.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo-toolkit==1.21.0rc0) (2.1.3)\n", - "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from numcodecs>=0.10.0->zarr->nemo-toolkit==1.21.0rc0) (0.4)\n", - "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.0->nemo-toolkit==1.21.0rc0) (3.10.0)\n", - "Requirement already satisfied: typer[all]>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from pyannote.database>=4.0.1->pyannote.metrics->nemo-toolkit==1.21.0rc0) (0.9.0)\n", - "Collecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo-toolkit==1.21.0rc0)\n", - " Downloading latexcodec-2.0.1-py2.py3-none-any.whl (18 kB)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.0.1->nemo-toolkit==1.21.0rc0) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.0.1->nemo-toolkit==1.21.0rc0) (2023.7.22)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo-toolkit==1.21.0rc0) (1.3.0)\n", - "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (6.5.5)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo-toolkit==1.21.0rc0) (2.5)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.0.1->nemo-toolkit==1.21.0rc0) (1.7.1)\n", - "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->nemo-toolkit==1.21.0rc0)\n", - " Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.8.3)\n", - "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (23.2.1)\n", - "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (23.1.0)\n", - "Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (5.3.1)\n", - "Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (5.9.2)\n", - "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (6.5.4)\n", - "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.5.7)\n", - "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.8.2)\n", - "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.17.1)\n", - "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.17.1)\n", - "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.0.0)\n", - "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.7.0)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo-toolkit==1.21.0rc0) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->nemo-toolkit==1.21.0rc0) (3.2.2)\n", - "Collecting shellingham<2.0.0,>=1.3.0 (from typer[all]>=0.2.1->pyannote.database>=4.0.1->pyannote.metrics->nemo-toolkit==1.21.0rc0)\n", - " Obtaining dependency information for shellingham<2.0.0,>=1.3.0 from https://files.pythonhosted.org/packages/57/70/0265437683625b2e6491736706d3d679d90e2a26f6bff59f4e46e09872b9/shellingham-1.5.3-py2.py3-none-any.whl.metadata\n", - " Downloading shellingham-1.5.3-py2.py3-none-any.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: rich<14.0.0,>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer[all]>=0.2.1->pyannote.database>=4.0.1->pyannote.metrics->nemo-toolkit==1.21.0rc0) (13.5.2)\n", - "Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.24.0)\n", - "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.2.3)\n", - "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (6.0.0)\n", - "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.7.1)\n", - "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.2.2)\n", - "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.8.4)\n", - "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.8.0)\n", - "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.5.0)\n", - "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.2.1)\n", - "Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (2.18.0)\n", - "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (4.19.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=10.11.0->typer[all]>=0.2.1->pyannote.database>=4.0.1->pyannote.metrics->nemo-toolkit==1.21.0rc0) (3.0.0)\n", - "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (21.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (2023.7.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.30.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.10.2)\n", - "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (3.7.1)\n", - "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.6.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=10.11.0->typer[all]>=0.2.1->pyannote.database>=4.0.1->pyannote.metrics->nemo-toolkit==1.21.0rc0) (0.1.2)\n", - "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (0.5.1)\n", - "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo-toolkit==1.21.0rc0) (1.3.0)\n", - "Downloading megatron_core-0.2.0-py3-none-any.whl (46 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading onnx-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pytorch_lightning-2.0.7-py3-none-any.whl (724 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m725.0/725.0 kB\u001b[0m \u001b[31m55.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading torchmetrics-1.1.1-py3-none-any.whl (763 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m763.4/763.4 kB\u001b[0m \u001b[31m51.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading transformers-4.33.1-py3-none-any.whl (7.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m31.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading boto3-1.28.43-py3-none-any.whl (135 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-2.14.5-py3-none-any.whl (519 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n", - "Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading kornia-0.7.0-py2.py3-none-any.whl (705 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m705.7/705.7 kB\u001b[0m \u001b[31m35.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading markdown2-2.4.10-py2.py3-none-any.whl (39 kB)\n", - "Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nemo_text_processing-0.2.0rc0-py3-none-any.whl (2.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m50.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pypinyin-0.49.0-py2.py3-none-any.whl (1.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m69.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pypinyin_dict-0.6.0-py2.py3-none-any.whl (9.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m86.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading ruamel.yaml-0.17.32-py3-none-any.whl (112 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.2/112.2 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading sphinxcontrib_bibtex-2.6.1-py3-none-any.whl (40 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading wandb-0.15.10-py3-none-any.whl (2.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m76.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading zarr-2.16.1-py3-none-any.whl (206 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m206.9/206.9 kB\u001b[0m \u001b[31m21.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading botocore-1.31.43-py3-none-any.whl (11.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.2/11.2 MB\u001b[0m \u001b[31m87.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading GitPython-3.1.35-py3-none-any.whl (188 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.8/188.8 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)\n", - "Downloading pathspec-0.11.2-py3-none-any.whl (29 kB)\n", - "Using cached pybind11-2.11.1-py3-none-any.whl (227 kB)\n", - "Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n", - "Downloading s3transfer-0.6.2-py3-none-any.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m77.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading sentry_sdk-1.30.0-py2.py3-none-any.whl (218 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m218.8/218.8 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (824 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m824.7/824.7 kB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading Levenshtein-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.5/172.5 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading loguru-0.7.1-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.4/61.4 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.1/143.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading shellingham-1.5.3-py2.py3-none-any.whl (9.7 kB)\n", - "Building wheels for collected packages: antlr4-python3-runtime, progress, sacremoses, youtokentome, fasttext, kaldi-python-io, nemo-toolkit, rouge-score, sentence-transformers, wget, distance, docopt, ipadic, asciitree, cdifflib, pathtools\n", - " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=ad5d3c0bd8ee550570d532ca332fe7204baee65f2066eefcf5fc70cd2afdb382\n", - " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n", - " Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9610 sha256=79cc5bc84a633414a3856bc3a35542e0489aaffa62e7cdae95e51474a38f064c\n", - " Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n", - " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895241 sha256=1fd4f16d193691e8ff7869591f5d11f5a32e21289fbb9a8422a800bc1bf781c7\n", - " Stored in directory: /root/.cache/pip/wheels/00/24/97/a2ea5324f36bc626e1ea0267f33db6aa80d157ee977e9e42fb\n", - " Building wheel for youtokentome (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for youtokentome: filename=youtokentome-1.0.6-cp310-cp310-linux_x86_64.whl size=1883668 sha256=d3e4925cf4b245a76184b4afe612a4fffd683de42889b5c1a07ca44be98dde9b\n", - " Stored in directory: /root/.cache/pip/wheels/df/85/f8/301d2ba45f43f30bed2fe413efa760bc726b8b660ed9c2900c\n", - " Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4199767 sha256=5bcc699963ee85f0f02846c1e278140b3ba3d95a7877d60b48c5b10f8f424bb1\n", - " Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n", - " Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8948 sha256=d2e946cb4c2298a69803c0492c1533a3d08d313ced846ba7f1555a11ab164bd6\n", - " Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n", - " Building editable for nemo-toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for nemo-toolkit: filename=nemo_toolkit-1.21.0rc0-0.editable-py3-none-any.whl size=9181 sha256=d5c26c7630db00d8c6a483e19f89aa11a4d7a11920dcb052d8bb6a56689eb77d\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-4nra31ak/wheels/41/a7/71/00ccdddfb43c015e8d025853cafb90117dd722fd8ec557581b\n", - " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=7f61e2fe8b2f326d21f3a0a06e5b58e2f1103a5523bac58ff728481bfdf203d8\n", - " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", - " Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for sentence-transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125923 sha256=b4fb59313f79b135e68441f9b4780a35d2658bac193693e6177887cb018824a7\n", - " Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n", - " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=dd1cfe282f727f91b5a5db464ae58f43372f62a80d0a083733b36c68edc6ee76\n", - " Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n", - " Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=287628136639656b068a6f2e67ab0f5a80bc4ad466590f19499978136c5e3726\n", - " Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n", - " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13705 sha256=9af9a132b49b73c475f9838a56ff79e968af41bb509c8b38b81f7c08c3da87dd\n", - " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n", - " Building wheel for ipadic (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for ipadic: filename=ipadic-1.0.0-py3-none-any.whl size=13556703 sha256=9e40daac3101fb3d52fcf9c986017638569c576c3be36805d1437e722a2d8803\n", - " Stored in directory: /root/.cache/pip/wheels/5b/ea/e3/2f6e0860a327daba3b030853fce4483ed37468bbf1101c59c3\n", - " Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=69d1478059cdad8cb54b8a1a29e66bd3229cf3458dcf96aeedb978381d8116f3\n", - " Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n", - " Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=f4ccba50f62b7265ea20b84bac762b6ed3dacae8d42ea7346d54e07da2a6ad9e\n", - " Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n", - " Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=95b669a715a9eefa4cd1222ac5c899a97064dbb765dccbd3a5b35c0208aa1389\n", - " Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n", - "Successfully built antlr4-python3-runtime progress sacremoses youtokentome fasttext kaldi-python-io nemo-toolkit rouge-score sentence-transformers wget distance docopt ipadic asciitree cdifflib pathtools\n", - "Installing collected packages: wget, tokenizers, sentencepiece, safetensors, pydub, progress, plac, pathtools, pangu, opencc, mecab-python3, ipadic, ijson, faiss-cpu, docopt, distance, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, xxhash, webdataset, urllib3, typed-ast, textdistance, sox, smmap, shellingham, setuptools, setproctitle, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pydantic, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, numcodecs, marshmallow, markdown2, loguru, lightning-utilities, latexcodec, kaldiio, kaldi-python-io, jmespath, jedi, isort, ftfy, fasteners, einops, docutils, docker-pycreds, dill, colorama, click, cdifflib, attrdict, zarr, youtokentome, sentry-sdk, sacremoses, sacrebleu, ruamel.yaml, pypinyin-dict, pybtex, pyannote.core, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fasttext, botocore, black, texterrors, s3transfer, rouge-score, pybtex-docutils, huggingface-hub, GitPython, g2p-en, flask-restful, wandb, transformers, pyannote.database, datasets, boto3, pyannote.metrics, nemo-text-processing, torchmetrics, sphinxcontrib-bibtex, sentence-transformers, pytorch-lightning, nemo-toolkit, megatron-core, kornia\n", - " Attempting uninstall: urllib3\n", - " Found existing installation: urllib3 2.0.4\n", - " Uninstalling urllib3-2.0.4:\n", - " Successfully uninstalled urllib3-2.0.4\n", - " Attempting uninstall: setuptools\n", - " Found existing installation: setuptools 67.7.2\n", - " Uninstalling setuptools-67.7.2:\n", - " Successfully uninstalled setuptools-67.7.2\n", - " Attempting uninstall: pydantic\n", - " Found existing installation: pydantic 2.3.0\n", - " Uninstalling pydantic-2.3.0:\n", - " Successfully uninstalled pydantic-2.3.0\n", - " Attempting uninstall: docutils\n", - " Found existing installation: docutils 0.18.1\n", - " Uninstalling docutils-0.18.1:\n", - " Successfully uninstalled docutils-0.18.1\n", - " Attempting uninstall: click\n", - " Found existing installation: click 8.1.7\n", - " Uninstalling click-8.1.7:\n", - " Successfully uninstalled click-8.1.7\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "cvxpy 1.3.2 requires setuptools>65.5.1, but you have setuptools 65.5.1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed GitPython-3.1.35 Levenshtein-0.21.1 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-19.10b0 boto3-1.28.43 botocore-1.31.43 braceexpand-0.1.7 cdifflib-1.2.6 click-8.0.2 colorama-0.4.6 datasets-2.14.5 dill-0.3.7 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.6.1 faiss-cpu-1.7.4 fasteners-0.18 fasttext-0.9.2 flask-restful-0.3.10 ftfy-6.1.1 g2p-en-2.1.0 gitdb-4.0.10 huggingface-hub-0.16.4 hydra-core-1.3.2 ijson-3.2.3 ipadic-1.0.0 isort-5.12.0 jedi-0.19.0 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.0 latexcodec-2.0.1 lightning-utilities-0.9.0 loguru-0.7.1 markdown2-2.4.10 marshmallow-3.20.1 mecab-python3-1.0.5 megatron-core-0.2.0 multiprocess-0.70.15 nemo-text-processing-0.2.0rc0 nemo-toolkit-1.21.0rc0 numcodecs-0.11.0 omegaconf-2.3.0 onnx-1.14.1 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.11.2 pathtools-0.1.2 plac-1.3.5 portalocker-2.7.0 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.0.1 pyannote.metrics-3.2.1 pybind11-2.11.1 pybtex-0.24.0 pybtex-docutils-1.0.3 pydantic-1.10.12 pydub-0.25.1 pynini-2.1.5 pypinyin-0.49.0 pypinyin-dict-0.6.0 pytest-runner-6.0.0 pytorch-lightning-2.0.7 rapidfuzz-2.13.7 rouge-score-0.1.2 ruamel.yaml-0.17.32 ruamel.yaml.clib-0.2.7 s3transfer-0.6.2 sacrebleu-2.3.1 sacremoses-0.0.53 safetensors-0.3.3 sentence-transformers-2.2.2 sentencepiece-0.1.99 sentry-sdk-1.30.0 setproctitle-1.3.2 setuptools-65.5.1 shellingham-1.5.3 smmap-5.0.0 sox-1.4.1 sphinxcontrib-bibtex-2.6.1 textdistance-4.5.0 texterrors-0.4.4 tokenizers-0.13.3 torchmetrics-1.1.1 transformers-4.33.1 typed-ast-1.5.5 urllib3-1.26.16 wandb-0.15.10 webdataset-0.1.62 wget-3.2 xxhash-3.3.0 youtokentome-1.0.6 zarr-2.16.1\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0mAll done!\n", - "Reading package lists... Done\n", - "Building dependency tree... Done\n", - "Reading state information... Done\n", - "The following additional packages will be installed:\n", - " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base\n", - " libsox3 libwavpack1\n", - "Suggested packages:\n", - " libsox-fmt-all\n", - "The following NEW packages will be installed:\n", - " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base\n", - " libsox3 libwavpack1 sox\n", - "0 upgraded, 7 newly installed, 0 to remove and 16 not upgraded.\n", - "Need to get 617 kB of archives.\n", - "After this operation, 1,764 kB of additional disk space will be used.\n", - "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n", - "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n", - "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n", - "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n", - "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n", - "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n", - "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n", - "Fetched 617 kB in 0s (5,224 kB/s)\n", - "Selecting previously unselected package libopencore-amrnb0:amd64.\n", - "(Reading database ... 120901 files and directories currently installed.)\n", - "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n", - "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n", - "Selecting previously unselected package libopencore-amrwb0:amd64.\n", - "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n", - "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n", - "Selecting previously unselected package libsox3:amd64.\n", - "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n", - "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Selecting previously unselected package libsox-fmt-alsa:amd64.\n", - "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n", - "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Selecting previously unselected package libwavpack1:amd64.\n", - "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n", - "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n", - "Selecting previously unselected package libsox-fmt-base:amd64.\n", - "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n", - "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Selecting previously unselected package sox.\n", - "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n", - "Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n", - "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n", - "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n", - "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n", - "Processing triggers for man-db (2.10.2-1) ...\n", - "Processing triggers for libc-bin (2.35-0ubuntu3.1) ...\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n", - "\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n", - "\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n", - "\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n", - "\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n", - "\n", - "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n", - "\n", - "Collecting dash>=2.1.0 (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Obtaining dependency information for dash>=2.1.0 from https://files.pythonhosted.org/packages/9b/b4/d522c16b41a8da013fd60a67f9618e57c504cd2c80e02a7a861413b93906/dash-2.13.0-py3-none-any.whl.metadata\n", - " Downloading dash-2.13.0-py3-none-any.whl.metadata (11 kB)\n", - "Collecting dash_bootstrap_components>=1.0.3 (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 2))\n", - " Obtaining dependency information for dash_bootstrap_components>=1.0.3 from https://files.pythonhosted.org/packages/cd/2a/cf963336e8b6745406d357e2f2b33ff1f236531fcadbe250096931855ec0/dash_bootstrap_components-1.5.0-py3-none-any.whl.metadata\n", - " Downloading dash_bootstrap_components-1.5.0-py3-none-any.whl.metadata (5.2 kB)\n", - "Collecting diff_match_patch (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 3))\n", - " Downloading diff_match_patch-20230430-py3-none-any.whl (42 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.8/42.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 4)) (0.6.2)\n", - "Requirement already satisfied: jiwer in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 5)) (2.5.2)\n", - "Requirement already satisfied: librosa>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (0.10.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 7)) (1.23.5)\n", - "Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 8)) (5.15.0)\n", - "Requirement already satisfied: SoundFile in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 9)) (0.12.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from -r ./NeMo/tools/speech_data_explorer/requirements.txt (line 10)) (4.66.1)\n", - "Requirement already satisfied: Flask<2.3.0,>=1.0.4 in /usr/local/lib/python3.10/dist-packages (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (2.2.5)\n", - "Collecting Werkzeug<2.3.0 (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading Werkzeug-2.2.3-py3-none-any.whl (233 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.6/233.6 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting dash-html-components==2.0.0 (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)\n", - "Collecting dash-core-components==2.0.0 (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)\n", - "Collecting dash-table==5.0.0 (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)\n", - "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (4.7.1)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (2.31.0)\n", - "Collecting retrying (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading retrying-1.3.4-py3-none-any.whl (11 kB)\n", - "Collecting ansi2html (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1))\n", - " Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)\n", - "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.10/dist-packages (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (1.5.7)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (65.5.1)\n", - "Requirement already satisfied: rapidfuzz==2.13.7 in /usr/local/lib/python3.10/dist-packages (from jiwer->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 5)) (2.13.7)\n", - "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (3.0.0)\n", - "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (1.10.1)\n", - "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (1.2.2)\n", - "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (1.3.2)\n", - "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (4.4.2)\n", - "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (0.56.4)\n", - "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (1.7.0)\n", - "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (0.3.6)\n", - "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (0.3)\n", - "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (1.0.5)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 8)) (8.2.3)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from plotly->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 8)) (23.1)\n", - "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from SoundFile->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 9)) (1.15.1)\n", - "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->SoundFile->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 9)) (2.21)\n", - "Requirement already satisfied: Jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from Flask<2.3.0,>=1.0.4->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (3.1.2)\n", - "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask<2.3.0,>=1.0.4->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (2.1.2)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from Flask<2.3.0,>=1.0.4->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (8.0.2)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (0.39.1)\n", - "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (3.10.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (2023.7.22)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa>=0.9.1->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 6)) (3.2.0)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from Werkzeug<2.3.0->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (2.1.3)\n", - "Requirement already satisfied: six>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from retrying->dash>=2.1.0->-r ./NeMo/tools/speech_data_explorer/requirements.txt (line 1)) (1.16.0)\n", - "Downloading dash-2.13.0-py3-none-any.whl (10.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dash_bootstrap_components-1.5.0-py3-none-any.whl (221 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m221.2/221.2 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: dash-table, dash-html-components, dash-core-components, Werkzeug, retrying, diff_match_patch, ansi2html, dash, dash_bootstrap_components\n", - " Attempting uninstall: Werkzeug\n", - " Found existing installation: Werkzeug 2.3.7\n", - " Uninstalling Werkzeug-2.3.7:\n", - " Successfully uninstalled Werkzeug-2.3.7\n", - "Successfully installed Werkzeug-2.2.3 ansi2html-1.8.0 dash-2.13.0 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 dash_bootstrap_components-1.5.0 diff_match_patch-20230430 retrying-1.3.4\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m" - ] - } - ], + "outputs": [], "source": [ - "!git clone https://github.com/NVIDIA/NeMo.git\n", + "BRANCH = 'main'\n", + "\n", + "!git clone -b $BRANCH https://github.com/NVIDIA/NeMo\n", "\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!cd NeMo;./reinstall.sh\n", From f10f93b111fccede8ba4c8c8d8d2b59f5fb00155 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:15:08 -0700 Subject: [PATCH 288/512] Fix validation in G2PModel and ThutmoseTaggerModel (#7597) (#7606) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- .../thutmose_tagger.py | 11 +++++++---- nemo/collections/tts/g2p/models/t5.py | 13 ++++++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py b/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py index be82d6a31582..f6e5c155646d 100644 --- a/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py +++ b/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py @@ -236,14 +236,15 @@ def validation_step(self, batch, batch_idx): val_loss_tag = self.loss_fn(logits=tag_logits, labels=tag_labels, loss_mask=labels_mask) val_loss_semiotic = self.loss_fn(logits=semiotic_logits, labels=semiotic_labels, loss_mask=labels_mask) val_loss = val_loss_tag + val_loss_semiotic + self.validation_step_outputs.append(val_loss) return {'val_loss': val_loss} - def on_validation_epoch_end(self, outputs): + def on_validation_epoch_end(self): """ Called at the end of validation to aggregate outputs. :param outputs: list of individual outputs of each validation step. """ - avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + avg_loss = torch.stack([x['val_loss'] for x in self.validation_step_outputs]).mean() # calculate metrics and classification report # In our task recall = accuracy, and the recall column - is the per class accuracy @@ -269,6 +270,8 @@ def on_validation_epoch_end(self, outputs): self.tag_multiword_classification_report.reset() self.semiotic_classification_report.reset() + self.validation_step_outputs.clear() # free memory + def test_step(self, batch, batch_idx): """ Lightning calls this inside the test loop with the data from the test dataloader @@ -276,12 +279,12 @@ def test_step(self, batch, batch_idx): """ return self.validation_step(batch, batch_idx) - def on_test_epoch_end(self, outputs): + def on_test_epoch_end(self): """ Called at the end of test to aggregate outputs. :param outputs: list of individual outputs of each test step. """ - return self.on_validation_epoch_end(outputs) + return self.on_validation_epoch_end() # Functions for inference @torch.no_grad() diff --git a/nemo/collections/tts/g2p/models/t5.py b/nemo/collections/tts/g2p/models/t5.py index 16f1f1933fb0..b41fcf1d5945 100644 --- a/nemo/collections/tts/g2p/models/t5.py +++ b/nemo/collections/tts/g2p/models/t5.py @@ -170,7 +170,18 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0, split="val"): ) generated_str, _, _ = self._generate_predictions(input_ids=input_ids, model_max_target_len=self.max_target_len) per = word_error_rate(hypotheses=generated_str, references=labels_str, use_cer=True) - return {f"{split}_loss": val_loss, 'per': per} + output = {f"{split}_loss": val_loss, 'per': per} + if split == 'val': + if isinstance(self.trainer.val_dataloaders, (list, tuple)) and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(output) + else: + self.validation_step_outputs.append(output) + else: + if isinstance(self.trainer.test_dataloaders, (list, tuple)) and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(output) + else: + self.test_step_outputs.append(output) + return output def test_step(self, batch, batch_idx, dataloader_idx=0): """ From a12835e4d3c6eff0c4c2df218d4ae984f425dea4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 13:11:09 -0700 Subject: [PATCH 289/512] Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../language_modeling/megatron_gpt_model.py | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index db983e5f5e01..b9ed3d790f02 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -13,6 +13,7 @@ # limitations under the License. import itertools +import os import queue import warnings from dataclasses import fields @@ -273,6 +274,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) + self.log_train_loss = bool(int(os.getenv("NEMO_LOG_TRAIN_LOSS", 1))) + self.loss_broadcast_src_rank = None self.inference_params = None @@ -627,17 +630,29 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_first_last_embeddings() ## logging - # we can only log on one rank if it is rank zero so we broadcast from last rank - # we can avoid this broadcast by updating the PTL log function to accept specific ranks - torch.distributed.broadcast(loss_mean, get_last_rank()) + if self.log_train_loss: + # When using pipeline parallelism, loss is calculated only in the last pipeline stage and + # it should be casted to other pipeline stages for logging. + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if self.loss_broadcast_src_rank is None: + dp_size = parallel_state.get_data_parallel_world_size() + tp_size = parallel_state.get_tensor_model_parallel_world_size() + pp_size = parallel_state.get_pipeline_model_parallel_world_size() + rank_in_dp_tp_group = torch.distributed.get_rank() % (dp_size * tp_size) + last_pipeline_stage_offset = (tp_size * dp_size) * (pp_size - 1) + self.loss_broadcast_src_rank = last_pipeline_stage_offset + rank_in_dp_tp_group + torch.distributed.broadcast( + loss_mean, self.loss_broadcast_src_rank, group=parallel_state.get_pipeline_model_parallel_group(), + ) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - # (@adithyare) we need to check for the _scaler attribute to enable pp>1 for adapter training - if self.torch_dtype == torch.float16 and hasattr(self.trainer.precision_plugin.scaler, "_scale"): - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) + # (@adithyare) we need to check for the _scaler attribute to enable pp>1 for adapter training + if self.cfg.precision == 16 and hasattr(self.trainer.precision_plugin.scaler, "_scale"): + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) lr = self._optimizer.param_groups[0]['lr'] self.log('lr', lr, rank_zero_only=True, batch_size=1) self.log( @@ -962,8 +977,19 @@ def on_validation_epoch_end(self): else: averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() - # we can only log on one rank if it is rank zero so we broadcast from last rank - torch.distributed.broadcast(averaged_loss, get_last_rank()) + # When using pipeline parallelism, loss is calculated only in the last pipeline stage and + # it should be casted to other pipeline stages for logging. + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if self.loss_broadcast_src_rank is None: + dp_size = parallel_state.get_data_parallel_world_size() + tp_size = parallel_state.get_tensor_model_parallel_world_size() + pp_size = parallel_state.get_pipeline_model_parallel_world_size() + rank_in_dp_tp_group = torch.distributed.get_rank() % (dp_size * tp_size) + last_pipeline_stage_offset = (tp_size * dp_size) * (pp_size - 1) + self.loss_broadcast_src_rank = last_pipeline_stage_offset + rank_in_dp_tp_group + torch.distributed.broadcast( + averaged_loss, self.loss_broadcast_src_rank, group=parallel_state.get_pipeline_model_parallel_group(), + ) self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) self.validation_step_outputs.clear() # free memory From 5211e5b62614bf62b2acdf8f978a95268f8174db Mon Sep 17 00:00:00 2001 From: Jason Date: Tue, 3 Oct 2023 16:28:25 -0400 Subject: [PATCH 290/512] Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../duplex_text_normalization_infer.py | 10 +++++- .../nn_wfst/en/electronic/normalize.py | 13 +++++-- .../en/electronic/tokenize_and_classify.py | 35 +++++++++++-------- .../nn_wfst/en/electronic/verbalize.py | 14 +++++--- .../nn_wfst/en/electronic/verbalize_final.py | 18 ++++++---- .../nn_wfst/en/whitelist/normalize.py | 13 +++++-- .../en/whitelist/tokenize_and_classify.py | 35 +++++++++++-------- .../nn_wfst/en/whitelist/verbalize.py | 14 +++++--- .../nn_wfst/en/whitelist/verbalize_final.py | 17 ++++++--- requirements/requirements_tts.txt | 3 +- .../tts/hui_acg/get_data.py | 10 +++++- .../tts/ljspeech/get_data.py | 10 +++++- .../dataset_processing/tts/preprocess_text.py | 10 +++++- .../tts/sfbilingual/get_data.py | 10 +++++- .../tts/thorsten_neutral/get_data.py | 10 +++++- .../asr/test_text_to_text_dataset.py | 10 +++++- tools/ctc_segmentation/requirements.txt | 3 +- tutorials/asr/ASR_TTS_Tutorial.ipynb | 9 ++++- .../tts/FastPitch_MixerTTS_Training.ipynb | 11 ++++-- tutorials/tts/NeMo_TTS_Primer.ipynb | 11 ++++-- 20 files changed, 199 insertions(+), 67 deletions(-) diff --git a/examples/nlp/duplex_text_normalization/duplex_text_normalization_infer.py b/examples/nlp/duplex_text_normalization/duplex_text_normalization_infer.py index 4cf25e12fc89..6bcc69de7db9 100644 --- a/examples/nlp/duplex_text_normalization/duplex_text_normalization_infer.py +++ b/examples/nlp/duplex_text_normalization/duplex_text_normalization_infer.py @@ -50,7 +50,6 @@ from typing import List from helpers import DECODER_MODEL, TAGGER_MODEL, instantiate_model_and_trainer -from nemo_text_processing.text_normalization.data_loader_utils import post_process_punct from nn_wfst.en.electronic.normalize import ElectronicNormalizer from nn_wfst.en.whitelist.normalize import WhitelistNormalizer from omegaconf import DictConfig, OmegaConf @@ -60,6 +59,15 @@ from nemo.core.config import hydra_runner from nemo.utils import logging +try: + from nemo_text_processing.text_normalization.data_loader_utils import post_process_punct +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + @hydra_runner(config_path="conf", config_name="duplex_tn_config") def main(cfg: DictConfig) -> None: diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/normalize.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/normalize.py index e0d83b42222d..a1f8caa7d959 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/normalize.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/normalize.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo_text_processing.text_normalization.normalize import Normalizer -from nemo_text_processing.text_normalization.token_parser import TokenParser +try: + from nemo_text_processing.text_normalization.normalize import Normalizer + from nemo_text_processing.text_normalization.token_parser import TokenParser +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) from nemo.collections.common.tokenizers.moses_tokenizers import MosesProcessor @@ -21,7 +28,7 @@ class ElectronicNormalizer(Normalizer): """ Normalizer for ELECTRONIC. - + Args: input_case: accepting either "lower_cased" or "cased" input. lang: language diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/tokenize_and_classify.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/tokenize_and_classify.py index 59a9d9784038..9e0c284d84b0 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/tokenize_and_classify.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/tokenize_and_classify.py @@ -15,18 +15,25 @@ import os -import pynini -from nemo_text_processing.text_normalization.en.graph_utils import ( - NEMO_WHITE_SPACE, - GraphFst, - delete_extra_space, - delete_space, - generator_main, -) -from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst -from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst -from nemo_text_processing.text_normalization.en.taggers.word import WordFst -from pynini.lib import pynutil +try: + import pynini + from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_WHITE_SPACE, + GraphFst, + delete_extra_space, + delete_space, + generator_main, + ) + from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst + from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst + from nemo_text_processing.text_normalization.en.taggers.word import WordFst + from pynini.lib import pynutil +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) from nemo.utils import logging @@ -34,9 +41,9 @@ class ClassifyFst(GraphFst): """ Final class that composes all other classification grammars. This class can process an entire sentence including punctuation. - For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. More details to deployment at NeMo/tools/text_processing_deployment. - + Args: input_case: accepting either "lower_cased" or "cased" input. deterministic: if True will provide a single transduction option, diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize.py index 6366942d34c8..7236be7a1994 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize.py @@ -12,15 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from nemo_text_processing.text_normalization.en.graph_utils import GraphFst -from nemo_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst +try: + from nemo_text_processing.text_normalization.en.graph_utils import GraphFst + from nemo_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) class VerbalizeFst(GraphFst): """ Composes other verbalizer grammars. - For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. More details to deployment at NeMo/tools/text_processing_deployment. Args: diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize_final.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize_final.py index 4d5d716bd01e..b2cc69ca9e09 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize_final.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/electronic/verbalize_final.py @@ -12,12 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import pynini -from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space -from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst -from nn_wfst.en.electronic.verbalize import VerbalizeFst -from pynini.lib import pynutil +try: + import pynini + from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space + from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst + from nn_wfst.en.electronic.verbalize import VerbalizeFst + from pynini.lib import pynutil +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) class VerbalizeFinalFst(GraphFst): diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/normalize.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/normalize.py index 4109109ec83a..cfb4bef5d1c3 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/normalize.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/normalize.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo_text_processing.text_normalization.normalize import Normalizer -from nemo_text_processing.text_normalization.token_parser import TokenParser +try: + from nemo_text_processing.text_normalization.normalize import Normalizer + from nemo_text_processing.text_normalization.token_parser import TokenParser +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) from nemo.collections.common.tokenizers.moses_tokenizers import MosesProcessor @@ -21,7 +28,7 @@ class WhitelistNormalizer(Normalizer): """ Normalizer for WHITELIST. - + Args: input_case: accepting either "lower_cased" or "cased" input. lang: language diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/tokenize_and_classify.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/tokenize_and_classify.py index 712812fa8190..c2d69e765bb4 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/tokenize_and_classify.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/tokenize_and_classify.py @@ -15,18 +15,25 @@ import os -import pynini -from nemo_text_processing.text_normalization.en.graph_utils import ( - NEMO_WHITE_SPACE, - GraphFst, - delete_extra_space, - delete_space, - generator_main, -) -from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst -from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst -from nemo_text_processing.text_normalization.en.taggers.word import WordFst -from pynini.lib import pynutil +try: + import pynini + from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_WHITE_SPACE, + GraphFst, + delete_extra_space, + delete_space, + generator_main, + ) + from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst + from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst + from nemo_text_processing.text_normalization.en.taggers.word import WordFst + from pynini.lib import pynutil +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) from nemo.utils import logging @@ -34,9 +41,9 @@ class ClassifyFst(GraphFst): """ Final class that composes all other classification grammars. This class can process an entire sentence including punctuation. - For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. More details to deployment at NeMo/tools/text_processing_deployment. - + Args: input_case: accepting either "lower_cased" or "cased" input. deterministic: if True will provide a single transduction option, diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize.py index e85f067acf96..c647a142ef8c 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize.py @@ -12,15 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from nemo_text_processing.text_normalization.en.graph_utils import GraphFst -from nemo_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst +try: + from nemo_text_processing.text_normalization.en.graph_utils import GraphFst + from nemo_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) class VerbalizeFst(GraphFst): """ Composes other verbalizer grammars. - For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. + For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File. More details to deployment at NeMo/tools/text_processing_deployment. Args: diff --git a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize_final.py b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize_final.py index 4d5d716bd01e..550a8a85d797 100644 --- a/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize_final.py +++ b/examples/nlp/duplex_text_normalization/nn_wfst/en/whitelist/verbalize_final.py @@ -13,11 +13,18 @@ # limitations under the License. -import pynini -from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space -from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst -from nn_wfst.en.electronic.verbalize import VerbalizeFst -from pynini.lib import pynutil +try: + import pynini + from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space + from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst + from nn_wfst.en.electronic.verbalize import VerbalizeFst + from pynini.lib import pynutil +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) class VerbalizeFinalFst(GraphFst): diff --git a/requirements/requirements_tts.txt b/requirements/requirements_tts.txt index bb330aaf2e58..9536faec8c78 100644 --- a/requirements/requirements_tts.txt +++ b/requirements/requirements_tts.txt @@ -4,7 +4,8 @@ jieba kornia librosa matplotlib -nemo_text_processing +# pynini does not currently support aarch, disable nemo_text_processing for now +nemo_text_processing; 'arm' not in platform_machine and 'aarch' not in platform_machine nltk pandas pypinyin diff --git a/scripts/dataset_processing/tts/hui_acg/get_data.py b/scripts/dataset_processing/tts/hui_acg/get_data.py index dfde19f33f57..668d532f321a 100644 --- a/scripts/dataset_processing/tts/hui_acg/get_data.py +++ b/scripts/dataset_processing/tts/hui_acg/get_data.py @@ -21,9 +21,17 @@ import pandas as pd from joblib import Parallel, delayed -from nemo_text_processing.text_normalization.normalize import Normalizer from tqdm import tqdm +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + from nemo.utils import logging # full corpus. diff --git a/scripts/dataset_processing/tts/ljspeech/get_data.py b/scripts/dataset_processing/tts/ljspeech/get_data.py index c8aeed5dbfca..8007b5a0f05a 100644 --- a/scripts/dataset_processing/tts/ljspeech/get_data.py +++ b/scripts/dataset_processing/tts/ljspeech/get_data.py @@ -20,9 +20,17 @@ import sox import wget -from nemo_text_processing.text_normalization.normalize import Normalizer from tqdm import tqdm +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + def get_args(): parser = argparse.ArgumentParser(description='Download LJSpeech and create manifests with predefined split') diff --git a/scripts/dataset_processing/tts/preprocess_text.py b/scripts/dataset_processing/tts/preprocess_text.py index 580a84a02d6f..6afab42a1d6b 100644 --- a/scripts/dataset_processing/tts/preprocess_text.py +++ b/scripts/dataset_processing/tts/preprocess_text.py @@ -32,10 +32,18 @@ from hydra.utils import instantiate from joblib import Parallel, delayed -from nemo_text_processing.text_normalization.normalize import Normalizer from omegaconf import OmegaConf from tqdm import tqdm +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest diff --git a/scripts/dataset_processing/tts/sfbilingual/get_data.py b/scripts/dataset_processing/tts/sfbilingual/get_data.py index bb38a6d127ba..806f9882a9f4 100755 --- a/scripts/dataset_processing/tts/sfbilingual/get_data.py +++ b/scripts/dataset_processing/tts/sfbilingual/get_data.py @@ -20,9 +20,17 @@ from pathlib import Path import numpy as np -from nemo_text_processing.text_normalization.normalize import Normalizer from opencc import OpenCC +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + def get_args(): parser = argparse.ArgumentParser( diff --git a/scripts/dataset_processing/tts/thorsten_neutral/get_data.py b/scripts/dataset_processing/tts/thorsten_neutral/get_data.py index 9422c0cd5498..d49d362064fd 100644 --- a/scripts/dataset_processing/tts/thorsten_neutral/get_data.py +++ b/scripts/dataset_processing/tts/thorsten_neutral/get_data.py @@ -32,9 +32,17 @@ from pathlib import Path from joblib import Parallel, delayed -from nemo_text_processing.text_normalization.normalize import Normalizer from tqdm import tqdm +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + from nemo.utils import logging # Thorsten Müller published two neural voice datasets, 21.02 and 22.10. diff --git a/tests/collections/asr/test_text_to_text_dataset.py b/tests/collections/asr/test_text_to_text_dataset.py index bc7a0a9d01dd..92205de41a1b 100644 --- a/tests/collections/asr/test_text_to_text_dataset.py +++ b/tests/collections/asr/test_text_to_text_dataset.py @@ -20,9 +20,17 @@ import pytest from hydra.utils import instantiate -from nemo_text_processing.text_normalization.normalize import Normalizer from omegaconf import OmegaConf +try: + from nemo_text_processing.text_normalization.normalize import Normalizer +except (ImportError, ModuleNotFoundError): + raise ModuleNotFoundError( + "The package `nemo_text_processing` was not installed in this environment. Please refer to" + " https://github.com/NVIDIA/NeMo-text-processing and install this package before using " + "this script" + ) + from nemo.collections.asr.data.text_to_text import TextToTextDataset, TextToTextItem, TextToTextIterableDataset from nemo.collections.common import tokenizers diff --git a/tools/ctc_segmentation/requirements.txt b/tools/ctc_segmentation/requirements.txt index f010b225a66e..bb51e49a0c87 100644 --- a/tools/ctc_segmentation/requirements.txt +++ b/tools/ctc_segmentation/requirements.txt @@ -1,3 +1,4 @@ ctc_segmentation==1.7.1 -nemo_text_processing==0.1.6rc0 +# pynini does not currently support aarch, disable nemo_text_processing for now +nemo_text_processing==0.1.6rc0; 'arm' not in platform_machine and 'aarch' not in platform_machine num2words diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb index 267c84bca9d2..067c007ea3df 100644 --- a/tutorials/asr/ASR_TTS_Tutorial.ipynb +++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb @@ -183,7 +183,14 @@ "from nemo.collections.tts.models import FastPitchModel, SpectrogramEnhancerModel\n", "from nemo.utils.notebook_utils import download_an4\n", "\n", - "from nemo_text_processing.text_normalization.normalize import Normalizer" + "try:\n", + " from nemo_text_processing.text_normalization.normalize import Normalizer\n", + "except ModuleNotFoundError:\n", + " raise ModuleNotFoundError(\n", + " \"The package `nemo_text_processing` was not installed in this environment. Please refer to\"\n", + " \" https://github.com/NVIDIA/NeMo-text-processing and install this package before using \"\n", + " \"this script\"\n", + " )" ] }, { diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 9eac34c499d8..a2e26d17ed9b 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -198,8 +198,15 @@ "source": [ "from nemo.collections.tts.g2p.models.en_us_arpabet import EnglishG2p\n", "from nemo.collections.tts.data.dataset import TTSDataset\n", - "from nemo_text_processing.text_normalization.normalize import Normalizer\n", - "from nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers import EnglishPhonemesTokenizer, EnglishCharsTokenizer" + "from nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers import EnglishPhonemesTokenizer, EnglishCharsTokenizer\n", + "try:\n", + " from nemo_text_processing.text_normalization.normalize import Normalizer\n", + "except ModuleNotFoundError:\n", + " raise ModuleNotFoundError(\n", + " \"The package `nemo_text_processing` was not installed in this environment. Please refer to\"\n", + " \" https://github.com/NVIDIA/NeMo-text-processing and install this package before using \"\n", + " \"this script\"\n", + " )" ] }, { diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index dea4a8936053..f891b7f11594 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -240,7 +240,14 @@ }, "outputs": [], "source": [ - "from nemo_text_processing.text_normalization.normalize import Normalizer\n", + "try:\n", + " from nemo_text_processing.text_normalization.normalize import Normalizer\n", + "except ModuleNotFoundError:\n", + " raise ModuleNotFoundError(\n", + " \"The package `nemo_text_processing` was not installed in this environment. Please refer to\"\n", + " \" https://github.com/NVIDIA/NeMo-text-processing and install this package before using \"\n", + " \"this script\"\n", + " )\n", "\n", "text_normalizer = Normalizer(input_case=\"cased\", lang=\"en\")" ] @@ -777,7 +784,7 @@ "While raw audio shows amplitude versus time and is useful for easily recording and listening, it is not optimal when it comes to processing.\n", "\n", "For processing, it is usually preferable to represent the audio as a **spectrogram** which shows frequency versus time. Specifically, we:\n", - "\n", + "\n", "1. Group together audio samples into a much smaller set of time buckets, called **audio frames**. An audio frame will usually bucket around 50ms of audio.\n", "2. For each audio frame, use the [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform) (**FFT**) to calculate the magnitude (ie. energy, amplitude or \"loudness\") and phase (which we don't use) of each frequency bin. We refer to the magnitudes of the frequency bins as a spectrogram\n", "3. Map the original frequency bins onto the [mel scale](https://en.wikipedia.org/wiki/Mel_scale), using overlapped [triangular filters](https://en.wikipedia.org/wiki/Window_function#Triangular_window) to create mel filterbanks.\n", From 9590c3c80b764d04587632a6ff2fde443c0a239f Mon Sep 17 00:00:00 2001 From: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Date: Tue, 3 Oct 2023 19:35:05 -0700 Subject: [PATCH 291/512] Bound transformers version in requirements (#7620) Signed-off-by: Abhishree --- requirements/requirements_lightning.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements_lightning.txt b/requirements/requirements_lightning.txt index 62468a37e972..adc6aa0e0026 100644 --- a/requirements/requirements_lightning.txt +++ b/requirements/requirements_lightning.txt @@ -2,6 +2,6 @@ hydra-core>1.3,<=1.3.2 omegaconf<=2.3 pytorch-lightning>=2.0,<=2.0.7 torchmetrics>=0.11.0 -transformers>=4.0.1 +transformers>=4.0.1,<=4.33.3 wandb webdataset>=0.1.48,<=0.1.62 From fe5af223367fa888e01df4577a864a9b84fae742 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Wed, 4 Oct 2023 12:29:12 -0400 Subject: [PATCH 292/512] fix llama2 70b lora tuning bug (#7622) * fix llama2 70b lora tuning bug Signed-off-by: Chen Cui * Update peft_config.py brackets Signed-off-by: Adi Renduchintala --------- Signed-off-by: Chen Cui Signed-off-by: Adi Renduchintala Co-authored-by: Adi Renduchintala --- nemo/collections/nlp/parts/peft_config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/parts/peft_config.py b/nemo/collections/nlp/parts/peft_config.py index dd75747fd73c..524a7fb62368 100644 --- a/nemo/collections/nlp/parts/peft_config.py +++ b/nemo/collections/nlp/parts/peft_config.py @@ -60,10 +60,14 @@ def __init__(self, cfg): else: kv_channels = cfg.kv_channels projection_size = kv_channels * cfg.num_attention_heads + num_query_groups = cfg.get("num_query_groups", None) + if num_query_groups is None: + num_query_groups = cfg.num_attention_heads + qkv_projection_size = projection_size + (2 * kv_channels * num_query_groups) config_args = { "in_features": cfg.hidden_size, - "out_features": 3 * projection_size, + "out_features": qkv_projection_size, "dim": lora_cfg.adapter_dim, "norm_position": None, "norm_type": None, From 381d84eec2ca8c4aeacd58dd9cdb908496112253 Mon Sep 17 00:00:00 2001 From: Mehadi Hasan Menon Date: Wed, 4 Oct 2023 22:32:12 +0600 Subject: [PATCH 293/512] Fix import error no module name model_utils (#7629) Signed-off-by: Mehadi Hasan Menon --- .../speech_recognition/confidence/benchmark_asr_confidence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py index 246aa61c2c0e..f4558fa85256 100644 --- a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py +++ b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py @@ -32,7 +32,7 @@ ) from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig from nemo.core.config import hydra_runner -from nemo.utils import logging +from nemo.utils import logging, model_utils """ Get confidence metrics and curve plots for a given model, dataset, and confidence parameters. From 19f32c5f9e345f164d9e5c8941bfa5673276dad8 Mon Sep 17 00:00:00 2001 From: Nithin Rao Date: Wed, 4 Oct 2023 15:19:03 -0700 Subject: [PATCH 294/512] add fc large ls models (#7641) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Koluguri --- docs/source/asr/data/benchmark_en.csv | 2 ++ nemo/collections/asr/models/ctc_bpe_models.py | 7 +++++++ nemo/collections/asr/models/rnnt_bpe_models.py | 7 +++++++ 3 files changed, 16 insertions(+) diff --git a/docs/source/asr/data/benchmark_en.csv b/docs/source/asr/data/benchmark_en.csv index b41c675f423c..1669ecdeefb5 100644 --- a/docs/source/asr/data/benchmark_en.csv +++ b/docs/source/asr/data/benchmark_en.csv @@ -26,6 +26,8 @@ stt_en_conformer_transducer_medium,EncDecRNNTBPEModel,"https://ngc.nvidia.com/ca stt_en_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large" stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge" stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge" +stt_en_fastconformer_ctc_large_ls,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large_ls" +stt_en_fastconformer_transducer_large_ls,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large_ls" stt_en_fastconformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large" stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large" stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc" diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py index aa26f27c29ab..7b17f7918e20 100644 --- a/nemo/collections/asr/models/ctc_bpe_models.py +++ b/nemo/collections/asr/models/ctc_bpe_models.py @@ -606,6 +606,13 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_ctc_large_ls", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large_ls", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_large_ls/versions/1.0.0/files/stt_en_fastconformer_ctc_large_ls.nemo", + ) + results.append(model) + model = PretrainedModelInfo( pretrained_model_name="stt_en_fastconformer_ctc_xlarge", description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge", diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py index c72d18a8023b..2b8ed315903c 100644 --- a/nemo/collections/asr/models/rnnt_bpe_models.py +++ b/nemo/collections/asr/models/rnnt_bpe_models.py @@ -253,6 +253,13 @@ def list_available_models(cls) -> List[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_en_fastconformer_transducer_large_ls", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large_ls", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_large_ls/versions/1.0.0/files/stt_en_fastconformer_transducer_large_ls.nemo", + ) + results.append(model) + model = PretrainedModelInfo( pretrained_model_name="stt_en_fastconformer_transducer_xlarge", description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge", From 329bd3c517f1960dc5b464099ca685d5244f1aec Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 22:40:58 -0700 Subject: [PATCH 295/512] bugfix: trainer.gpus, trainer.strategy, trainer.accelerator (#7621) (#7642) * [TTS] bugfix for Tacotron2 tutorial due to PTL 2.0 * trainer.gpus -> trainer.devices * fixed related tutorial bugs --------- Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- docs/source/asr/speaker_diarization/datasets.rst | 2 +- examples/asr/experimental/k2/align_speech_parallel.py | 2 +- examples/asr/experimental/k2/speech_to_text_bpe.py | 2 +- .../asr/speech_translation/speech_to_text_transformer.py | 2 +- .../multi_label_intent_slot_classification.py | 2 +- tutorials/tts/FastPitch_MixerTTS_Training.ipynb | 5 +++-- tutorials/tts/Tacotron2_Training.ipynb | 4 ++-- tutorials/tts/Vits_Training.ipynb | 2 +- 8 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/source/asr/speaker_diarization/datasets.rst b/docs/source/asr/speaker_diarization/datasets.rst index ff73dad8601a..9f1a43a58f11 100644 --- a/docs/source/asr/speaker_diarization/datasets.rst +++ b/docs/source/asr/speaker_diarization/datasets.rst @@ -107,7 +107,7 @@ Prepare the msdd training dataset for both train and validation. After the train .. code-block:: bash python ./multiscale_diar_decoder.py --config-path='../conf/neural_diarizer' --config-name='msdd_5scl_15_05_50Povl_256x3x32x2.yaml' \ - trainer.gpus=1 \ + trainer.devices=1 \ trainer.max_epochs=20 \ model.base.diarizer.speaker_embeddings.model_path="titanet_large" \ model.train_ds.manifest_filepath="" \ diff --git a/examples/asr/experimental/k2/align_speech_parallel.py b/examples/asr/experimental/k2/align_speech_parallel.py index dcccee48ee27..8ddf036f3e38 100644 --- a/examples/asr/experimental/k2/align_speech_parallel.py +++ b/examples/asr/experimental/k2/align_speech_parallel.py @@ -46,7 +46,7 @@ python align_speech_parallel.py \ trainer.precision=16 \ - trainer.gpus=2 \ + trainer.devices=2 \ ... You may control the dataloader's config by setting the predict_ds: diff --git a/examples/asr/experimental/k2/speech_to_text_bpe.py b/examples/asr/experimental/k2/speech_to_text_bpe.py index 5eefdfaf1fe3..ee3924c7b8ac 100644 --- a/examples/asr/experimental/k2/speech_to_text_bpe.py +++ b/examples/asr/experimental/k2/speech_to_text_bpe.py @@ -50,7 +50,7 @@ model.validation_ds.manifest_filepath= \ model.tokenizer.dir= \ model.tokenizer.type= \ - trainer.gpus=-1 \ + trainer.devices=-1 \ trainer.accelerator="ddp" \ trainer.max_epochs=100 \ model.optim.name="adamw" \ diff --git a/examples/asr/speech_translation/speech_to_text_transformer.py b/examples/asr/speech_translation/speech_to_text_transformer.py index 0c0882859b88..dce19df87a72 100644 --- a/examples/asr/speech_translation/speech_to_text_transformer.py +++ b/examples/asr/speech_translation/speech_to_text_transformer.py @@ -24,7 +24,7 @@ model.tokenizer.dir= \ model.tokenizer.model_path= \ model.tokenizer.type= \ - trainer.gpus=-1 \ + trainer.devices=-1 \ trainer.accelerator="ddp" \ trainer.max_epochs=100 \ model.optim.name="adamw" \ diff --git a/examples/nlp/intent_slot_classification/multi_label_intent_slot_classification.py b/examples/nlp/intent_slot_classification/multi_label_intent_slot_classification.py index bed58ecc43dc..2441885e2ed2 100644 --- a/examples/nlp/intent_slot_classification/multi_label_intent_slot_classification.py +++ b/examples/nlp/intent_slot_classification/multi_label_intent_slot_classification.py @@ -19,7 +19,7 @@ model.data_dir=/home/user/multiatis \ model.validation_ds.prefix=dev \ model.test_ds.prefix=dev \ - trainer.gpus=[0] \ + trainer.devices=[0] \ +trainer.fast_dev_run=true \ exp_manager.exp_dir=checkpoints diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index a2e26d17ed9b..70e989bb98af 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -515,7 +515,7 @@ " model.train_ds.dataloader_params.batch_size=24 \\\n", " model.validation_ds.dataloader_params.batch_size=24 \\\n", " exp_manager.exp_dir=./fastpitch_log_dir \\\n", - " model.n_speakers=1 trainer.devices=1 trainer.strategy=null \\\n", + " model.n_speakers=1 trainer.devices=1 trainer.strategy=\"ddp_find_unused_parameters_true\" \\\n", ")" ] }, @@ -565,7 +565,8 @@ "model.train_ds.dataloader_params.num_workers=0 \\\n", "model.validation_ds.dataloader_params.num_workers=0 \\\n", "trainer.max_epochs=3 \\\n", - "trainer.strategy=null \\\n", + "trainer.accelerator=\"gpu\" \\\n", + "trainer.strategy=\"ddp_find_unused_parameters_true\" \\\n", "trainer.check_val_every_n_epoch=1" ] }, diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index e9615b734ae4..8473dca449ea 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -295,9 +295,9 @@ " train_dataset=tests/data/asr/an4_train.json \\\n", " validation_datasets=tests/data/asr/an4_val.json \\\n", " trainer.max_epochs=3 \\\n", - " trainer.accelerator=null \\\n", + " trainer.accelerator='gpu' \\\n", " trainer.check_val_every_n_epoch=1 \\\n", - " +trainer.gpus=1)" + " trainer.devices=1)" ] }, { diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index db7161c06c61..a8a7ccc76ae2 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -251,7 +251,7 @@ " num_nodes: 1\n", " devices: 2\n", " accelerator: gpu\n", - " strategy: ddp\n", + " strategy: ddp_find_unused_parameters_true\n", " precision: 32\n", " max_epochs: -1\n", " accumulate_grad_batches: 1\n", From e109c6e0104a838e68094f4055229ecaed1c22d2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 22:44:48 -0700 Subject: [PATCH 296/512] fix ssl models ptl monitor val through logging (#7608) (#7614) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Co-authored-by: Eric Harper Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --- nemo/collections/asr/models/ssl_models.py | 16 ++++++++++++---- tutorials/asr/Self_Supervised_Pre_Training.ipynb | 6 +++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/nemo/collections/asr/models/ssl_models.py b/nemo/collections/asr/models/ssl_models.py index 8de713ca948d..6dca3815119f 100644 --- a/nemo/collections/asr/models/ssl_models.py +++ b/nemo/collections/asr/models/ssl_models.py @@ -527,7 +527,7 @@ def training_step(self, batch, batch_nb): return {'loss': loss_value, 'log': tensorboard_logs} - def validation_step(self, batch, batch_idx, dataloader_idx=0): + def validation_pass(self, batch, batch_idx, dataloader_idx=0): # Set flag to register tensors self._in_validation_step = True @@ -554,9 +554,17 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0): self.reset_registry() del self._in_validation_step - return { - 'val_loss': loss_value, - } + metrics = {'val_loss': loss_value} + + return metrics + + def validation_step(self, batch, batch_idx, dataloader_idx=0): + metrics = self.validation_pass(batch, batch_idx, dataloader_idx) + if type(self.trainer.val_dataloaders) == list and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(metrics) + else: + self.validation_step_outputs.append(metrics) + return metrics def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 6d54655d04dc..454840ded683 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -215,7 +215,7 @@ " file_id[file_id.find('-')+1 : file_id.rfind('-')],\n", " file_id + '.wav')\n", "\n", - " duration = librosa.core.get_duration(filename=audio_path)\n", + " duration = librosa.core.get_duration(path=audio_path)\n", "\n", " # Write the metadata to the manifest\n", " metadata = {\n", @@ -331,7 +331,7 @@ "\n", "cfg.model.optim.sched.name = \"CosineAnnealing\"\n", "cfg.model.optim.sched.warmup_steps = 1000\n", - "cfg.model.optim.sched.max_steps = 5000\n", + "cfg.model.optim.sched.max_steps = 2000\n", "#in practice you will usually want a much larger amount of pre-training steps\n", "cfg.model.optim.sched.min_lr = 0\n", "cfg.model.optim.lr = 0.015\n", @@ -554,7 +554,7 @@ "\n", "cfg.model.optim.sched.name = \"CosineAnnealing\"\n", "cfg.model.optim.sched.warmup_steps = 500\n", - "cfg.model.optim.sched.max_steps = 2000\n", + "cfg.model.optim.sched.max_steps = 1000\n", "cfg.model.optim.sched.min_lr = 0\n", "cfg.model.optim.lr = 0.015 #if encoder is frozen, lr can be much higher\n", "cfg.model.optim.weight_decay = 0\n", From b36555b2eb80f4385bbefa6f86110fa091432ca1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 22:45:19 -0700 Subject: [PATCH 297/512] Fix metrics for SE tutorial (#7604) (#7612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ante Jukić Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> --- .../asr/models/audio_to_audio_model.py | 34 +++++++++++-------- .../Speech_Enhancement_with_NeMo.ipynb | 9 ++--- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/nemo/collections/asr/models/audio_to_audio_model.py b/nemo/collections/asr/models/audio_to_audio_model.py index b48cd0c14e62..21860cf8ab56 100644 --- a/nemo/collections/asr/models/audio_to_audio_model.py +++ b/nemo/collections/asr/models/audio_to_audio_model.py @@ -17,7 +17,7 @@ import hydra import torch -from omegaconf import DictConfig +from omegaconf import DictConfig, OmegaConf from pytorch_lightning import Trainer from nemo.collections.asr.metrics.audio import AudioMetricWrapper @@ -67,12 +67,15 @@ def _setup_metrics(self, tag: str = 'val'): logging.debug('Found %d metrics for tag %s, not necesary to initialize again', num_dataloaders, tag) return - if 'metrics' not in self._cfg or tag not in self._cfg['metrics']: + if self.cfg.get('metrics') is None: # Metrics are not available in the configuration, nothing to do - logging.debug('No metrics configured for %s in model.metrics.%s', tag, tag) + logging.debug('No metrics configured in model.metrics') return - metrics_cfg = self._cfg['metrics'][tag] + if (metrics_cfg := self.cfg['metrics'].get(tag)) is None: + # Metrics configuration is not available in the configuration, nothing to do + logging.debug('No metrics configured for %s in model.metrics', tag) + return if 'loss' in metrics_cfg: raise ValueError( @@ -86,16 +89,19 @@ def _setup_metrics(self, tag: str = 'val'): # Setup metrics for each dataloader self.metrics[tag] = torch.nn.ModuleList() for dataloader_idx in range(num_dataloaders): - metrics_dataloader_idx = torch.nn.ModuleDict( - { - name: AudioMetricWrapper( - metric=hydra.utils.instantiate(cfg), - channel=cfg.get('channel'), - metric_using_batch_averaging=cfg.get('metric_using_batch_averaging'), - ) - for name, cfg in metrics_cfg.items() - } - ) + metrics_dataloader_idx = {} + for name, cfg in metrics_cfg.items(): + logging.debug('Initialize %s for dataloader_idx %s', name, dataloader_idx) + cfg_dict = OmegaConf.to_container(cfg) + cfg_channel = cfg_dict.pop('channel', None) + cfg_batch_averaging = cfg_dict.pop('metric_using_batch_averaging', None) + metrics_dataloader_idx[name] = AudioMetricWrapper( + metric=hydra.utils.instantiate(cfg_dict), + channel=cfg_channel, + metric_using_batch_averaging=cfg_batch_averaging, + ) + + metrics_dataloader_idx = torch.nn.ModuleDict(metrics_dataloader_idx) self.metrics[tag].append(metrics_dataloader_idx.to(self.device)) logging.info( diff --git a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb index 09226c83d654..a3706ab9c0ec 100644 --- a/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb +++ b/tutorials/audio_tasks/speech_enhancement/Speech_Enhancement_with_NeMo.ipynb @@ -102,11 +102,6 @@ "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest\n", "\n", "\n", - "# Used to download data processing scripts\n", - "USER = 'anteju' # TODO: change to 'NVIDIA'\n", - "BRANCH = 'dev/se-tutorial' # TODO: change to 'main'\n", - "\n", - "\n", "# Utility functions for displaying signals and metrics\n", "def show_signal(signal: np.ndarray, sample_rate: int = 16000, tag: str = 'Signal'):\n", " \"\"\"Show the time-domain signal and its spectrogram.\n", @@ -607,7 +602,7 @@ " '_target_': 'torchmetrics.audio.SignalDistortionRatio',\n", " }\n", "})\n", - "config.model.metrics.validation = metrics\n", + "config.model.metrics.val = metrics\n", "config.model.metrics.test = metrics\n", "\n", "print(\"Metrics config:\")\n", @@ -1112,7 +1107,7 @@ " 'channel': 1,\n", " },\n", "})\n", - "config_dual_output.model.metrics.validation = metrics\n", + "config_dual_output.model.metrics.val = metrics\n", "config_dual_output.model.metrics.test = metrics" ] }, From a0053a65db23a42d532ea55c753fe07c5eb4c04b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 09:21:49 -0700 Subject: [PATCH 298/512] Add ddp_find_unused_parameters=True and change accelerator to auto (#7623) (#7644) * Add ddp_find_unused_parameters=True and change acclerator to auto * Add ddp_find_unused_parameters True for normalization_as_tagging_train.py --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- .../intent_slot_classification/intent_slot_classification.py | 4 ++++ examples/nlp/question_answering/question_answering.py | 5 ++++- .../normalization_as_tagging_train.py | 4 ++++ .../nlp/token_classification/token_classification_train.py | 2 +- .../models/text_normalization_as_tagging/thutmose_tagger.py | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/nlp/intent_slot_classification/intent_slot_classification.py b/examples/nlp/intent_slot_classification/intent_slot_classification.py index 39b4b7c5dde6..a112ea7785f5 100644 --- a/examples/nlp/intent_slot_classification/intent_slot_classification.py +++ b/examples/nlp/intent_slot_classification/intent_slot_classification.py @@ -23,6 +23,10 @@ @hydra_runner(config_path="conf", config_name="intent_slot_classification_config") def main(cfg: DictConfig) -> None: + # PTL 2.0 has find_unused_parameters as False by default, so its required to set it to True + # when there are unused parameters like here + if cfg.trainer.strategy == 'ddp': + cfg.trainer.strategy = "ddp_find_unused_parameters_true" logging.info(f'Config Params:\n {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/examples/nlp/question_answering/question_answering.py b/examples/nlp/question_answering/question_answering.py index 2bcaf8f9eca2..fcde03582e5c 100644 --- a/examples/nlp/question_answering/question_answering.py +++ b/examples/nlp/question_answering/question_answering.py @@ -28,7 +28,10 @@ @hydra_runner(config_path="conf", config_name="qa_conf") def main(cfg: DictConfig) -> None: pl.seed_everything(42) - + # PTL 2.0 has find_unused_parameters as False by default, so its required to set it to True + # when there are unused parameters like here + if cfg.trainer.strategy == 'ddp': + cfg.trainer.strategy = "ddp_find_unused_parameters_true" logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_dir = exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py b/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py index e87ff7748185..36fe97d2341b 100644 --- a/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py +++ b/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py @@ -62,6 +62,10 @@ @hydra_runner(config_path="conf", config_name="thutmose_tagger_itn_config") def main(cfg: DictConfig) -> None: + # PTL 2.0 has find_unused_parameters as False by default, so its required to set it to True + # when there are unused parameters like here + if cfg.trainer.strategy == 'ddp': + cfg.trainer.strategy = "ddp_find_unused_parameters_true" logging.info(f'Config Params: {OmegaConf.to_yaml(cfg)}') # Train the model diff --git a/examples/nlp/token_classification/token_classification_train.py b/examples/nlp/token_classification/token_classification_train.py index 9b18d10b24e6..51983a1af98b 100644 --- a/examples/nlp/token_classification/token_classification_train.py +++ b/examples/nlp/token_classification/token_classification_train.py @@ -103,7 +103,7 @@ @hydra_runner(config_path="conf", config_name="token_classification_config") def main(cfg: DictConfig) -> None: try: - strategy = NLPDDPStrategy() + strategy = NLPDDPStrategy(find_unused_parameters=True) except (ImportError, ModuleNotFoundError): strategy = None diff --git a/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py b/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py index f6e5c155646d..4c11dc157b2b 100644 --- a/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py +++ b/nemo/collections/nlp/models/text_normalization_as_tagging/thutmose_tagger.py @@ -236,7 +236,7 @@ def validation_step(self, batch, batch_idx): val_loss_tag = self.loss_fn(logits=tag_logits, labels=tag_labels, loss_mask=labels_mask) val_loss_semiotic = self.loss_fn(logits=semiotic_logits, labels=semiotic_labels, loss_mask=labels_mask) val_loss = val_loss_tag + val_loss_semiotic - self.validation_step_outputs.append(val_loss) + self.validation_step_outputs.append({'val_loss': val_loss}) return {'val_loss': val_loss} def on_validation_epoch_end(self): diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 89104554a475..f2644af1f764 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -188,7 +188,7 @@ "\n", "# remove distributed training flags\n", "cfg.trainer.strategy = 'auto'\n", - "cfg.trainer.accelerator = None" + "cfg.trainer.accelerator = 'auto'" ] }, { From 358f5c6177cbf2389b7cb006d5142f712335e5e2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 10:18:32 -0700 Subject: [PATCH 299/512] Fix py3.11 dataclasses issue (#7616) * Fix py3.11 dataclasses issue (#7582) * Update ASR configs to support Python 3.11 Signed-off-by: smajumdar * Update TTS configs to support Python 3.11 Signed-off-by: smajumdar * Guard MeCab and Ipadic Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix scripts Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update name to ConfidenceMethodConfig Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Fix changes to confidence measure Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Signed-off-by: Sangkug Lym Signed-off-by: Jason Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sangkug Lym Co-authored-by: Jason --- .../experimental/k2/align_speech_parallel.py | 10 +- nemo/collections/asr/metrics/rnnt_wer.py | 8 +- nemo/collections/asr/metrics/wer.py | 12 +- .../asr/models/configs/aligner_config.py | 8 +- .../asr/models/configs/asr_models_config.py | 30 +-- .../configs/classification_models_config.py | 40 ++-- .../asr/models/configs/diarizer_config.py | 28 +-- .../configs/k2_sequence_models_config.py | 8 +- .../asr/models/configs/matchboxnet_config.py | 36 ++-- .../asr/models/configs/quartznet_config.py | 30 ++- .../asr/modules/audio_preprocessing.py | 6 + nemo/collections/asr/parts/k2/classes.py | 4 +- .../multi_head_attention_adapter_module.py | 14 +- .../asr/parts/submodules/ctc_beam_decoding.py | 6 +- .../parts/submodules/ctc_greedy_decoding.py | 4 +- .../parts/submodules/rnnt_greedy_decoding.py | 6 +- .../asr/parts/utils/asr_confidence_utils.py | 4 +- .../common/parts/adapter_modules.py | 6 +- .../common/tokenizers/en_ja_tokenizers.py | 15 +- .../machine_translation/mt_enc_dec_config.py | 172 ++++++++++-------- .../punctuation_capitalization_config.py | 32 ++-- .../common/megatron/megatron_encoders.py | 8 +- nemo/collections/tts/models/fastpitch.py | 6 +- nemo/collections/tts/models/tacotron2.py | 4 +- nemo/core/config/modelPT.py | 10 +- nemo/utils/exp_manager.py | 18 +- requirements/requirements_nlp.txt | 2 +- .../ngram_lm/eval_beamsearch_ngram.py | 6 +- .../eval_beamsearch_ngram_transducer.py | 2 +- .../confidence_ensembles/build_ensemble.py | 26 ++- .../confidence/benchmark_asr_confidence.py | 6 +- .../convert_to_tarred_audio_dataset.py | 2 +- .../asr/test_text_to_text_dataset.py | 4 +- tools/nemo_forced_aligner/align.py | 4 +- 34 files changed, 342 insertions(+), 235 deletions(-) diff --git a/examples/asr/experimental/k2/align_speech_parallel.py b/examples/asr/experimental/k2/align_speech_parallel.py index 8ddf036f3e38..bd03420e94c1 100644 --- a/examples/asr/experimental/k2/align_speech_parallel.py +++ b/examples/asr/experimental/k2/align_speech_parallel.py @@ -74,7 +74,7 @@ import os -from dataclasses import dataclass, is_dataclass +from dataclasses import dataclass, field, is_dataclass from typing import Optional import pytorch_lightning as ptl @@ -94,12 +94,14 @@ @dataclass class ParallelAlignmentConfig: model: Optional[str] = None # name - predict_ds: ASRDatasetConfig = ASRDatasetConfig(return_sample_id=True, num_workers=4) - aligner_args: K2AlignerWrapperModelConfig = K2AlignerWrapperModelConfig() + predict_ds: ASRDatasetConfig = field( + default_factory=lambda: ASRDatasetConfig(return_sample_id=True, num_workers=4) + ) + aligner_args: K2AlignerWrapperModelConfig = field(default_factory=lambda: K2AlignerWrapperModelConfig()) output_path: str = MISSING model_stride: int = 8 - trainer: TrainerConfig = TrainerConfig(gpus=-1, accelerator="ddp") + trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(gpus=-1, accelerator="ddp")) # there arguments will be ignored return_predictions: bool = False diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index 97c9c4575982..5518c8f0a25c 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -15,7 +15,7 @@ import copy import re from abc import abstractmethod -from dataclasses import dataclass, is_dataclass +from dataclasses import dataclass, field, is_dataclass from typing import Callable, Dict, List, Optional, Tuple, Union import editdistance @@ -1299,7 +1299,7 @@ class RNNTDecodingConfig: preserve_alignments: Optional[bool] = None # confidence config - confidence_cfg: ConfidenceConfig = ConfidenceConfig() + confidence_cfg: ConfidenceConfig = field(default_factory=lambda: ConfidenceConfig()) # RNNT Joint fused batch size fused_batch_size: Optional[int] = None @@ -1317,10 +1317,10 @@ class RNNTDecodingConfig: rnnt_timestamp_type: str = "all" # can be char, word or all for both # greedy decoding config - greedy: greedy_decode.GreedyRNNTInferConfig = greedy_decode.GreedyRNNTInferConfig() + greedy: greedy_decode.GreedyRNNTInferConfig = field(default_factory=lambda: greedy_decode.GreedyRNNTInferConfig()) # beam decoding config - beam: beam_decode.BeamRNNTInferConfig = beam_decode.BeamRNNTInferConfig(beam_size=4) + beam: beam_decode.BeamRNNTInferConfig = field(default_factory=lambda: beam_decode.BeamRNNTInferConfig(beam_size=4)) # can be used to change temperature for decoding temperature: float = 1.0 diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index 14fa46b308ab..46984ff86435 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -14,7 +14,7 @@ import re from abc import abstractmethod -from dataclasses import dataclass, is_dataclass +from dataclasses import dataclass, field, is_dataclass from typing import Callable, Dict, List, Optional, Tuple, Union import editdistance @@ -1297,13 +1297,17 @@ class CTCDecodingConfig: batch_dim_index: int = 0 # greedy decoding config - greedy: ctc_greedy_decoding.GreedyCTCInferConfig = ctc_greedy_decoding.GreedyCTCInferConfig() + greedy: ctc_greedy_decoding.GreedyCTCInferConfig = field( + default_factory=lambda: ctc_greedy_decoding.GreedyCTCInferConfig() + ) # beam decoding config - beam: ctc_beam_decoding.BeamCTCInferConfig = ctc_beam_decoding.BeamCTCInferConfig(beam_size=4) + beam: ctc_beam_decoding.BeamCTCInferConfig = field( + default_factory=lambda: ctc_beam_decoding.BeamCTCInferConfig(beam_size=4) + ) # confidence config - confidence_cfg: ConfidenceConfig = ConfidenceConfig() + confidence_cfg: ConfidenceConfig = field(default_factory=lambda: ConfidenceConfig()) # can be used to change temperature for decoding temperature: float = 1.0 diff --git a/nemo/collections/asr/models/configs/aligner_config.py b/nemo/collections/asr/models/configs/aligner_config.py index 06b41b5c115b..cf2cdd176719 100644 --- a/nemo/collections/asr/models/configs/aligner_config.py +++ b/nemo/collections/asr/models/configs/aligner_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from nemo.collections.asr.parts.k2.classes import GraphModuleConfig @@ -35,10 +35,10 @@ class AlignerWrapperModelConfig: word_output: bool = True cpu_decoding: bool = False decode_batch_size: int = 0 - ctc_cfg: AlignerCTCConfig = AlignerCTCConfig() - rnnt_cfg: AlignerRNNTConfig = AlignerRNNTConfig() + ctc_cfg: AlignerCTCConfig = field(default_factory=lambda: AlignerCTCConfig()) + rnnt_cfg: AlignerRNNTConfig = field(default_factory=lambda: AlignerRNNTConfig()) @dataclass class K2AlignerWrapperModelConfig(AlignerWrapperModelConfig): - decoder_module_cfg: GraphModuleConfig = GraphModuleConfig() + decoder_module_cfg: GraphModuleConfig = field(default_factory=lambda: GraphModuleConfig()) diff --git a/nemo/collections/asr/models/configs/asr_models_config.py b/nemo/collections/asr/models/configs/asr_models_config.py index 609d42216659..ce480cac8428 100644 --- a/nemo/collections/asr/models/configs/asr_models_config.py +++ b/nemo/collections/asr/models/configs/asr_models_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from omegaconf import MISSING @@ -74,24 +74,32 @@ class EncDecCTCConfig(model_cfg.ModelConfig): labels: List[str] = MISSING # Dataset configs - train_ds: ASRDatasetConfig = ASRDatasetConfig(manifest_filepath=None, shuffle=True) - validation_ds: ASRDatasetConfig = ASRDatasetConfig(manifest_filepath=None, shuffle=False) - test_ds: ASRDatasetConfig = ASRDatasetConfig(manifest_filepath=None, shuffle=False) + train_ds: ASRDatasetConfig = field(default_factory=lambda: ASRDatasetConfig(manifest_filepath=None, shuffle=True)) + validation_ds: ASRDatasetConfig = field( + default_factory=lambda: ASRDatasetConfig(manifest_filepath=None, shuffle=False) + ) + test_ds: ASRDatasetConfig = field(default_factory=lambda: ASRDatasetConfig(manifest_filepath=None, shuffle=False)) # Optimizer / Scheduler config - optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + optim: Optional[model_cfg.OptimConfig] = field( + default_factory=lambda: model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + ) # Model component configs - preprocessor: AudioToMelSpectrogramPreprocessorConfig = AudioToMelSpectrogramPreprocessorConfig() - spec_augment: Optional[SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig() - encoder: ConvASREncoderConfig = ConvASREncoderConfig() - decoder: ConvASRDecoderConfig = ConvASRDecoderConfig() - decoding: CTCDecodingConfig = CTCDecodingConfig() + preprocessor: AudioToMelSpectrogramPreprocessorConfig = field( + default_factory=lambda: AudioToMelSpectrogramPreprocessorConfig() + ) + spec_augment: Optional[SpectrogramAugmentationConfig] = field( + default_factory=lambda: SpectrogramAugmentationConfig() + ) + encoder: ConvASREncoderConfig = field(default_factory=lambda: ConvASREncoderConfig()) + decoder: ConvASRDecoderConfig = field(default_factory=lambda: ConvASRDecoderConfig()) + decoding: CTCDecodingConfig = field(default_factory=lambda: CTCDecodingConfig()) @dataclass class EncDecCTCModelConfig(model_cfg.NemoConfig): - model: EncDecCTCConfig = EncDecCTCConfig() + model: EncDecCTCConfig = field(default_factory=lambda: EncDecCTCConfig()) @dataclass diff --git a/nemo/collections/asr/models/configs/classification_models_config.py b/nemo/collections/asr/models/configs/classification_models_config.py index 0df911e9e69a..33408f591c8e 100644 --- a/nemo/collections/asr/models/configs/classification_models_config.py +++ b/nemo/collections/asr/models/configs/classification_models_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from omegaconf import MISSING @@ -72,30 +72,40 @@ class EncDecClassificationConfig(model_cfg.ModelConfig): timesteps: int = MISSING # Dataset configs - train_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=True, trim_silence=False + train_ds: EncDecClassificationDatasetConfig = field( + default_factory=lambda: EncDecClassificationDatasetConfig( + manifest_filepath=None, shuffle=True, trim_silence=False + ) ) - validation_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=False + validation_ds: EncDecClassificationDatasetConfig = field( + default_factory=lambda: EncDecClassificationDatasetConfig(manifest_filepath=None, shuffle=False) ) - test_ds: EncDecClassificationDatasetConfig = EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=False + test_ds: EncDecClassificationDatasetConfig = field( + default_factory=lambda: EncDecClassificationDatasetConfig(manifest_filepath=None, shuffle=False) ) # Optimizer / Scheduler config - optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + optim: Optional[model_cfg.OptimConfig] = field( + default_factory=lambda: model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + ) # Model component configs - preprocessor: AudioToMFCCPreprocessorConfig = AudioToMFCCPreprocessorConfig() - spec_augment: Optional[SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig() - crop_or_pad_augment: Optional[CropOrPadSpectrogramAugmentationConfig] = CropOrPadSpectrogramAugmentationConfig( - audio_length=timesteps + preprocessor: AudioToMFCCPreprocessorConfig = field(default_factory=lambda: AudioToMFCCPreprocessorConfig()) + spec_augment: Optional[SpectrogramAugmentationConfig] = field( + default_factory=lambda: SpectrogramAugmentationConfig() + ) + crop_or_pad_augment: Optional[CropOrPadSpectrogramAugmentationConfig] = field( + default_factory=lambda: CropOrPadSpectrogramAugmentationConfig(audio_length=-1) ) - encoder: ConvASREncoderConfig = ConvASREncoderConfig() - decoder: ConvASRDecoderClassificationConfig = ConvASRDecoderClassificationConfig() + encoder: ConvASREncoderConfig = field(default_factory=lambda: ConvASREncoderConfig()) + decoder: ConvASRDecoderClassificationConfig = field(default_factory=lambda: ConvASRDecoderClassificationConfig()) + + def __post_init__(self): + if self.crop_or_pad_augment is not None: + self.crop_or_pad_augment.audio_length = self.timesteps @dataclass class EncDecClassificationModelConfig(model_cfg.NemoConfig): - model: EncDecClassificationConfig = EncDecClassificationConfig() + model: EncDecClassificationConfig = field(default_factory=lambda: EncDecClassificationConfig()) diff --git a/nemo/collections/asr/models/configs/diarizer_config.py b/nemo/collections/asr/models/configs/diarizer_config.py index c09bb1dfb8f4..0745a6f2a451 100644 --- a/nemo/collections/asr/models/configs/diarizer_config.py +++ b/nemo/collections/asr/models/configs/diarizer_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, field from typing import Any, Dict, Optional, Tuple, Union @@ -78,9 +78,9 @@ class ASRDiarizerParams(DiarizerComponentConfig): @dataclass class ASRDiarizerConfig(DiarizerComponentConfig): model_path: Optional[str] = "stt_en_conformer_ctc_large" - parameters: ASRDiarizerParams = ASRDiarizerParams() - ctc_decoder_parameters: ASRDiarizerCTCDecoderParams = ASRDiarizerCTCDecoderParams() - realigning_lm_parameters: ASRRealigningLMParams = ASRRealigningLMParams() + parameters: ASRDiarizerParams = field(default_factory=lambda: ASRDiarizerParams()) + ctc_decoder_parameters: ASRDiarizerCTCDecoderParams = field(default_factory=lambda: ASRDiarizerCTCDecoderParams()) + realigning_lm_parameters: ASRRealigningLMParams = field(default_factory=lambda: ASRRealigningLMParams()) @dataclass @@ -102,7 +102,7 @@ class VADParams(DiarizerComponentConfig): class VADConfig(DiarizerComponentConfig): model_path: str = "vad_multilingual_marblenet" # .nemo local model path or pretrained VAD model name external_vad_manifest: Optional[str] = None - parameters: VADParams = VADParams() + parameters: VADParams = field(default_factory=lambda: VADParams()) @dataclass @@ -121,7 +121,7 @@ class SpeakerEmbeddingsParams(DiarizerComponentConfig): class SpeakerEmbeddingsConfig(DiarizerComponentConfig): # .nemo local model path or pretrained model name (titanet_large, ecapa_tdnn or speakerverification_speakernet) model_path: Optional[str] = None - parameters: SpeakerEmbeddingsParams = SpeakerEmbeddingsParams() + parameters: SpeakerEmbeddingsParams = field(default_factory=lambda: SpeakerEmbeddingsParams()) @dataclass @@ -142,7 +142,7 @@ class ClusteringParams(DiarizerComponentConfig): @dataclass class ClusteringConfig(DiarizerComponentConfig): - parameters: ClusteringParams = ClusteringParams() + parameters: ClusteringParams = field(default_factory=lambda: ClusteringParams()) @dataclass @@ -166,7 +166,7 @@ class MSDDParams(DiarizerComponentConfig): @dataclass class MSDDConfig(DiarizerComponentConfig): model_path: Optional[str] = "diar_msdd_telephonic" - parameters: MSDDParams = MSDDParams() + parameters: MSDDParams = field(default_factory=lambda: MSDDParams()) @dataclass @@ -176,16 +176,16 @@ class DiarizerConfig(DiarizerComponentConfig): oracle_vad: bool = False # If True, uses RTTM files provided in the manifest file to get VAD timestamps collar: float = 0.25 # Collar value for scoring ignore_overlap: bool = True # Consider or ignore overlap segments while scoring - vad: VADConfig = VADConfig() - speaker_embeddings: SpeakerEmbeddingsConfig = SpeakerEmbeddingsConfig() - clustering: ClusteringConfig = ClusteringConfig() - msdd_model: MSDDConfig = MSDDConfig() - asr: ASRDiarizerConfig = ASRDiarizerConfig() + vad: VADConfig = field(default_factory=lambda: VADConfig()) + speaker_embeddings: SpeakerEmbeddingsConfig = field(default_factory=lambda: SpeakerEmbeddingsConfig()) + clustering: ClusteringConfig = field(default_factory=lambda: ClusteringConfig()) + msdd_model: MSDDConfig = field(default_factory=lambda: MSDDConfig()) + asr: ASRDiarizerConfig = field(default_factory=lambda: ASRDiarizerConfig()) @dataclass class NeuralDiarizerInferenceConfig(DiarizerComponentConfig): - diarizer: DiarizerConfig = DiarizerConfig() + diarizer: DiarizerConfig = field(default_factory=lambda: DiarizerConfig()) device: str = "cpu" verbose: bool = False batch_size: int = 64 diff --git a/nemo/collections/asr/models/configs/k2_sequence_models_config.py b/nemo/collections/asr/models/configs/k2_sequence_models_config.py index 5a112f626f46..53ed3e1377fe 100644 --- a/nemo/collections/asr/models/configs/k2_sequence_models_config.py +++ b/nemo/collections/asr/models/configs/k2_sequence_models_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from nemo.collections.asr.models.configs.asr_models_config import EncDecCTCConfig from nemo.collections.asr.parts.k2.classes import GraphModuleConfig as BackendConfig @@ -26,14 +26,14 @@ class GraphModuleConfig: split_batch_size: int = 0 dec_type: str = "topo" transcribe_training: bool = True - backend_cfg: BackendConfig = BackendConfig() + backend_cfg: BackendConfig = field(default_factory=lambda: BackendConfig()) @dataclass class EncDecK2SeqConfig(EncDecCTCConfig): - graph_module_cfg: GraphModuleConfig = GraphModuleConfig() + graph_module_cfg: GraphModuleConfig = field(default_factory=lambda: GraphModuleConfig()) @dataclass class EncDecK2SeqModelConfig(NemoConfig): - model: EncDecK2SeqConfig = EncDecK2SeqConfig() + model: EncDecK2SeqConfig = field(default_factory=lambda: EncDecK2SeqConfig()) diff --git a/nemo/collections/asr/models/configs/matchboxnet_config.py b/nemo/collections/asr/models/configs/matchboxnet_config.py index 55a8b9fedec1..52ec4c35d9e8 100644 --- a/nemo/collections/asr/models/configs/matchboxnet_config.py +++ b/nemo/collections/asr/models/configs/matchboxnet_config.py @@ -107,30 +107,38 @@ class MatchboxNetModelConfig(clf_cfg.EncDecClassificationConfig): labels: List[str] = MISSING # Dataset configs - train_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=True, trim_silence=False + train_ds: clf_cfg.EncDecClassificationDatasetConfig = field( + default_factory=lambda: clf_cfg.EncDecClassificationDatasetConfig( + manifest_filepath=None, shuffle=True, trim_silence=False + ) ) - validation_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=False + validation_ds: clf_cfg.EncDecClassificationDatasetConfig = field( + default_factory=lambda: clf_cfg.EncDecClassificationDatasetConfig(manifest_filepath=None, shuffle=False) ) - test_ds: clf_cfg.EncDecClassificationDatasetConfig = clf_cfg.EncDecClassificationDatasetConfig( - manifest_filepath=None, shuffle=False + test_ds: clf_cfg.EncDecClassificationDatasetConfig = field( + default_factory=lambda: clf_cfg.EncDecClassificationDatasetConfig(manifest_filepath=None, shuffle=False) ) # Optimizer / Scheduler config - optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + optim: Optional[model_cfg.OptimConfig] = field( + default_factory=lambda: model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + ) # Model general component configs - preprocessor: AudioToMFCCPreprocessorConfig = AudioToMFCCPreprocessorConfig(window_size=0.025) - spec_augment: Optional[SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig( - freq_masks=2, time_masks=2, freq_width=15, time_width=25, rect_masks=5, rect_time=25, rect_freq=15 + preprocessor: AudioToMFCCPreprocessorConfig = field( + default_factory=lambda: AudioToMFCCPreprocessorConfig(window_size=0.025) + ) + spec_augment: Optional[SpectrogramAugmentationConfig] = field( + default_factory=lambda: SpectrogramAugmentationConfig( + freq_masks=2, time_masks=2, freq_width=15, time_width=25, rect_masks=5, rect_time=25, rect_freq=15 + ) ) - crop_or_pad_augment: Optional[CropOrPadSpectrogramAugmentationConfig] = CropOrPadSpectrogramAugmentationConfig( - audio_length=128 + crop_or_pad_augment: Optional[CropOrPadSpectrogramAugmentationConfig] = field( + default_factory=lambda: CropOrPadSpectrogramAugmentationConfig(audio_length=128) ) - encoder: ConvASREncoderConfig = ConvASREncoderConfig(activation="relu") - decoder: ConvASRDecoderClassificationConfig = ConvASRDecoderClassificationConfig() + encoder: ConvASREncoderConfig = field(default_factory=lambda: ConvASREncoderConfig(activation="relu")) + decoder: ConvASRDecoderClassificationConfig = field(default_factory=lambda: ConvASRDecoderClassificationConfig()) @dataclass diff --git a/nemo/collections/asr/models/configs/quartznet_config.py b/nemo/collections/asr/models/configs/quartznet_config.py index a1231002af41..93412b0053bf 100644 --- a/nemo/collections/asr/models/configs/quartznet_config.py +++ b/nemo/collections/asr/models/configs/quartznet_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Callable, List, Optional from omegaconf import MISSING @@ -174,20 +174,30 @@ class JasperModelConfig(ctc_cfg.EncDecCTCConfig): labels: List[str] = MISSING # Dataset configs - train_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig( - manifest_filepath=None, shuffle=True, trim_silence=True + train_ds: ctc_cfg.ASRDatasetConfig = field( + default_factory=lambda: ctc_cfg.ASRDatasetConfig(manifest_filepath=None, shuffle=True, trim_silence=True) + ) + validation_ds: ctc_cfg.ASRDatasetConfig = field( + default_factory=lambda: ctc_cfg.ASRDatasetConfig(manifest_filepath=None, shuffle=False) + ) + test_ds: ctc_cfg.ASRDatasetConfig = field( + default_factory=lambda: ctc_cfg.ASRDatasetConfig(manifest_filepath=None, shuffle=False) ) - validation_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig(manifest_filepath=None, shuffle=False) - test_ds: ctc_cfg.ASRDatasetConfig = ctc_cfg.ASRDatasetConfig(manifest_filepath=None, shuffle=False) # Optimizer / Scheduler config - optim: Optional[model_cfg.OptimConfig] = model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + optim: Optional[model_cfg.OptimConfig] = field( + default_factory=lambda: model_cfg.OptimConfig(sched=model_cfg.SchedConfig()) + ) # Model general component configs - preprocessor: AudioToMelSpectrogramPreprocessorConfig = AudioToMelSpectrogramPreprocessorConfig() - spec_augment: Optional[SpectrogramAugmentationConfig] = SpectrogramAugmentationConfig() - encoder: ConvASREncoderConfig = ConvASREncoderConfig(activation="relu") - decoder: ConvASRDecoderConfig = ConvASRDecoderConfig() + preprocessor: AudioToMelSpectrogramPreprocessorConfig = field( + default_factory=lambda: AudioToMelSpectrogramPreprocessorConfig() + ) + spec_augment: Optional[SpectrogramAugmentationConfig] = field( + default_factory=lambda: SpectrogramAugmentationConfig() + ) + encoder: ConvASREncoderConfig = field(default_factory=lambda: ConvASREncoderConfig(activation="relu")) + decoder: ConvASRDecoderConfig = field(default_factory=lambda: ConvASRDecoderConfig()) @dataclass diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py index 91c0c10b9604..471488bd9647 100644 --- a/nemo/collections/asr/modules/audio_preprocessing.py +++ b/nemo/collections/asr/modules/audio_preprocessing.py @@ -634,6 +634,12 @@ def __init__(self, audio_length): super(CropOrPadSpectrogramAugmentation, self).__init__() self.audio_length = audio_length + if self.audio_length < 0: + raise ValueError( + 'audio_length must be non-negative. If using a dataclass with OmegaConf, ' + 'please call OmegaConf.to_object(cfg) to call appropriate __post_init__ methods.' + ) + @typecheck() @torch.no_grad() def forward(self, input_signal, length): diff --git a/nemo/collections/asr/parts/k2/classes.py b/nemo/collections/asr/parts/k2/classes.py index bb749e15d4c6..d4c498f32a2d 100644 --- a/nemo/collections/asr/parts/k2/classes.py +++ b/nemo/collections/asr/parts/k2/classes.py @@ -13,7 +13,7 @@ # limitations under the License. from abc import ABC -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Optional, Tuple import torch @@ -43,7 +43,7 @@ class GraphModuleConfig: topo_with_self_loops: bool = True token_lm: Optional[Any] = None intersect_pruned: bool = False - intersect_conf: GraphIntersectDenseConfig = GraphIntersectDenseConfig() + intersect_conf: GraphIntersectDenseConfig = field(default_factory=lambda: GraphIntersectDenseConfig()) boost_coeff: float = 0.0 predictor_window_size: int = 0 predictor_step_size: int = 1 diff --git a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py index 563d4219baa7..333b630ff7f6 100644 --- a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py +++ b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py @@ -13,7 +13,7 @@ # limitations under the License. import math -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Optional import torch @@ -183,7 +183,7 @@ class MultiHeadAttentionAdapterConfig: n_feat: int dropout_rate: float = 0.0 proj_dim: Optional[int] = None - adapter_strategy: Optional[Any] = MHAResidualAddAdapterStrategyConfig() + adapter_strategy: Optional[Any] = field(default_factory=lambda: MHAResidualAddAdapterStrategyConfig()) _target_: str = "{0}.{1}".format(MultiHeadAttentionAdapter.__module__, MultiHeadAttentionAdapter.__name__) @@ -287,7 +287,7 @@ class RelPositionMultiHeadAttentionAdapterConfig: n_feat: int dropout_rate: float = 0.0 proj_dim: Optional[int] = None - adapter_strategy: Optional[Any] = MHAResidualAddAdapterStrategyConfig() + adapter_strategy: Optional[Any] = field(default_factory=lambda: MHAResidualAddAdapterStrategyConfig()) _target_: str = "{0}.{1}".format( RelPositionMultiHeadAttentionAdapter.__module__, RelPositionMultiHeadAttentionAdapter.__name__ ) @@ -336,7 +336,9 @@ class PositionalEncodingAdapterConfig: d_model: int max_len: int = 5000 xscale: float = 1.0 - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + adapter_strategy: Optional[Any] = field( + default_factory=lambda: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + ) _target_: str = "{0}.{1}".format(PositionalEncodingAdapter.__module__, PositionalEncodingAdapter.__name__) @@ -378,5 +380,7 @@ class RelPositionalEncodingAdapterConfig: d_model: int max_len: int = 5000 xscale: float = 1.0 - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + adapter_strategy: Optional[Any] = field( + default_factory=lambda: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + ) _target_: str = "{0}.{1}".format(RelPositionalEncodingAdapter.__module__, RelPositionalEncodingAdapter.__name__) diff --git a/nemo/collections/asr/parts/submodules/ctc_beam_decoding.py b/nemo/collections/asr/parts/submodules/ctc_beam_decoding.py index 377e43cd5f91..5ed504fd9c45 100644 --- a/nemo/collections/asr/parts/submodules/ctc_beam_decoding.py +++ b/nemo/collections/asr/parts/submodules/ctc_beam_decoding.py @@ -14,7 +14,7 @@ import math import os -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Tuple, Union import torch @@ -602,5 +602,5 @@ class BeamCTCInferConfig: beam_beta: float = 0.0 kenlm_path: Optional[str] = None - flashlight_cfg: Optional[FlashlightConfig] = FlashlightConfig() - pyctcdecode_cfg: Optional[PyCTCDecodeConfig] = PyCTCDecodeConfig() + flashlight_cfg: Optional[FlashlightConfig] = field(default_factory=lambda: FlashlightConfig()) + pyctcdecode_cfg: Optional[PyCTCDecodeConfig] = field(default_factory=lambda: PyCTCDecodeConfig()) diff --git a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py index 44ae9f4a134b..686ef79cabad 100644 --- a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional import torch @@ -253,7 +253,7 @@ class GreedyCTCInferConfig: preserve_alignments: bool = False compute_timestamps: bool = False preserve_frame_confidence: bool = False - confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() + confidence_method_cfg: Optional[ConfidenceMethodConfig] = field(default_factory=lambda: ConfidenceMethodConfig()) def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index 95b0bdf5fd13..e37d282ed0de 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -26,7 +26,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Tuple, Union import numpy as np @@ -2185,7 +2185,7 @@ class GreedyRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() + confidence_method_cfg: Optional[ConfidenceMethodConfig] = field(default_factory=lambda: ConfidenceMethodConfig()) def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed @@ -2201,7 +2201,7 @@ class GreedyBatchedRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() + confidence_method_cfg: Optional[ConfidenceMethodConfig] = field(default_factory=lambda: ConfidenceMethodConfig()) def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py index ddfac3744c6a..c406f5451e84 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py @@ -14,7 +14,7 @@ import math from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from functools import partial from typing import List, Optional @@ -175,7 +175,7 @@ class ConfidenceConfig: preserve_word_confidence: bool = False exclude_blank: bool = True aggregation: str = "min" - method_cfg: ConfidenceMethodConfig = ConfidenceMethodConfig() + method_cfg: ConfidenceMethodConfig = field(default_factory=lambda: ConfidenceMethodConfig()) def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed diff --git a/nemo/collections/common/parts/adapter_modules.py b/nemo/collections/common/parts/adapter_modules.py index 46c635e73975..2084147f9cbc 100644 --- a/nemo/collections/common/parts/adapter_modules.py +++ b/nemo/collections/common/parts/adapter_modules.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass, is_dataclass +from dataclasses import dataclass, field, is_dataclass from typing import Any, Optional from hydra.utils import instantiate @@ -160,5 +160,7 @@ class LinearAdapterConfig: activation: str = 'swish' norm_position: str = 'pre' dropout: float = 0.0 - adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + adapter_strategy: Optional[Any] = field( + default_factory=lambda: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig() + ) _target_: str = "{0}.{1}".format(LinearAdapter.__module__, LinearAdapter.__name__) diff --git a/nemo/collections/common/tokenizers/en_ja_tokenizers.py b/nemo/collections/common/tokenizers/en_ja_tokenizers.py index 63ae11f5da33..cf58130834e9 100644 --- a/nemo/collections/common/tokenizers/en_ja_tokenizers.py +++ b/nemo/collections/common/tokenizers/en_ja_tokenizers.py @@ -14,11 +14,19 @@ import re from typing import List -import ipadic -import MeCab from pangu import spacing from sacremoses import MosesDetokenizer, MosesPunctNormalizer, MosesTokenizer +try: + import ipadic + import MeCab + + HAVE_MECAB = True + HAVE_IPADIC = True +except (ImportError, ModuleNotFoundError): + HAVE_MECAB = False + HAVE_IPADIC = False + class EnJaProcessor: """ @@ -67,6 +75,9 @@ class JaMecabProcessor: """ def __init__(self): + if not HAVE_MECAB or not HAVE_IPADIC: + raise ImportError("Please ensure that you have installed `MeCab` and `ipadic` to use JaMecabProcessor") + self.mecab_tokenizer = MeCab.Tagger(ipadic.MECAB_ARGS + " -Owakati") def detokenize(self, text: List[str]) -> str: diff --git a/nemo/collections/nlp/models/machine_translation/mt_enc_dec_config.py b/nemo/collections/nlp/models/machine_translation/mt_enc_dec_config.py index ea1e86eee88c..1a69c3a33fc0 100644 --- a/nemo/collections/nlp/models/machine_translation/mt_enc_dec_config.py +++ b/nemo/collections/nlp/models/machine_translation/mt_enc_dec_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Optional, Tuple from omegaconf.omegaconf import MISSING @@ -46,7 +46,7 @@ class MTOptimConfig(OptimConfig): lr: float = 1e-3 betas: Tuple[float, float] = (0.9, 0.98) weight_decay: float = 0.0 - sched: Optional[MTSchedConfig] = MTSchedConfig() + sched: Optional[MTSchedConfig] = field(default_factory=lambda: MTSchedConfig()) @dataclass @@ -74,70 +74,80 @@ class MTEncDecModelConfig(EncDecNLPModelConfig): decoder_tokenizer: Any = MISSING decoder: Any = MISSING - head: TokenClassifierConfig = TokenClassifierConfig(log_softmax=True) + head: TokenClassifierConfig = field(default_factory=lambda: TokenClassifierConfig(log_softmax=True)) # dataset configurations - train_ds: Optional[TranslationDataConfig] = TranslationDataConfig( - src_file_name=MISSING, - tgt_file_name=MISSING, - tokens_in_batch=512, - clean=True, - shuffle=True, - cache_ids=False, - use_cache=False, + train_ds: Optional[TranslationDataConfig] = field( + default_factory=lambda: TranslationDataConfig( + src_file_name=MISSING, + tgt_file_name=MISSING, + tokens_in_batch=512, + clean=True, + shuffle=True, + cache_ids=False, + use_cache=False, + ) ) - validation_ds: Optional[TranslationDataConfig] = TranslationDataConfig( - src_file_name=MISSING, - tgt_file_name=MISSING, - tokens_in_batch=512, - clean=False, - shuffle=False, - cache_ids=False, - use_cache=False, + validation_ds: Optional[TranslationDataConfig] = field( + default_factory=lambda: TranslationDataConfig( + src_file_name=MISSING, + tgt_file_name=MISSING, + tokens_in_batch=512, + clean=False, + shuffle=False, + cache_ids=False, + use_cache=False, + ) ) - test_ds: Optional[TranslationDataConfig] = TranslationDataConfig( - src_file_name=MISSING, - tgt_file_name=MISSING, - tokens_in_batch=512, - clean=False, - shuffle=False, - cache_ids=False, - use_cache=False, + test_ds: Optional[TranslationDataConfig] = field( + default_factory=lambda: TranslationDataConfig( + src_file_name=MISSING, + tgt_file_name=MISSING, + tokens_in_batch=512, + clean=False, + shuffle=False, + cache_ids=False, + use_cache=False, + ) ) - optim: Optional[OptimConfig] = MTOptimConfig() + optim: Optional[OptimConfig] = field(default_factory=lambda: MTOptimConfig()) @dataclass class AAYNBaseConfig(MTEncDecModelConfig): # Attention is All You Need Base Configuration - encoder_tokenizer: TokenizerConfig = TokenizerConfig(library='yttm') - decoder_tokenizer: TokenizerConfig = TokenizerConfig(library='yttm') - - encoder: NeMoTransformerEncoderConfig = NeMoTransformerEncoderConfig( - library='nemo', - model_name=None, - pretrained=False, - hidden_size=512, - inner_size=2048, - num_layers=6, - num_attention_heads=8, - ffn_dropout=0.1, - attn_score_dropout=0.1, - attn_layer_dropout=0.1, + encoder_tokenizer: TokenizerConfig = field(default_factory=lambda: TokenizerConfig(library='yttm')) + decoder_tokenizer: TokenizerConfig = field(default_factory=lambda: TokenizerConfig(library='yttm')) + + encoder: NeMoTransformerEncoderConfig = field( + default_factory=lambda: NeMoTransformerEncoderConfig( + library='nemo', + model_name=None, + pretrained=False, + hidden_size=512, + inner_size=2048, + num_layers=6, + num_attention_heads=8, + ffn_dropout=0.1, + attn_score_dropout=0.1, + attn_layer_dropout=0.1, + ) ) - decoder: NeMoTransformerConfig = NeMoTransformerConfig( - library='nemo', - model_name=None, - pretrained=False, - hidden_size=512, - inner_size=2048, - num_layers=6, - num_attention_heads=8, - ffn_dropout=0.1, - attn_score_dropout=0.1, - attn_layer_dropout=0.1, + decoder: NeMoTransformerConfig = field( + default_factory=lambda: NeMoTransformerConfig( + library='nemo', + model_name=None, + pretrained=False, + hidden_size=512, + inner_size=2048, + num_layers=6, + num_attention_heads=8, + ffn_dropout=0.1, + attn_score_dropout=0.1, + attn_layer_dropout=0.1, + ) ) @@ -150,32 +160,36 @@ class MTBottleneckModelConfig(AAYNBaseConfig): recon_per_token: bool = True log_timing: bool = True - encoder: NeMoTransformerBottleneckEncoderConfig = NeMoTransformerBottleneckEncoderConfig( - library='nemo', - model_name=None, - pretrained=False, - hidden_size=512, - inner_size=2048, - num_layers=6, - num_attention_heads=8, - ffn_dropout=0.1, - attn_score_dropout=0.1, - attn_layer_dropout=0.1, - arch='seq2seq', - hidden_steps=32, - hidden_blocks=1, - hidden_init_method='params', + encoder: NeMoTransformerBottleneckEncoderConfig = field( + default_factory=lambda: NeMoTransformerBottleneckEncoderConfig( + library='nemo', + model_name=None, + pretrained=False, + hidden_size=512, + inner_size=2048, + num_layers=6, + num_attention_heads=8, + ffn_dropout=0.1, + attn_score_dropout=0.1, + attn_layer_dropout=0.1, + arch='seq2seq', + hidden_steps=32, + hidden_blocks=1, + hidden_init_method='params', + ) ) - decoder: NeMoTransformerBottleneckDecoderConfig = NeMoTransformerBottleneckDecoderConfig( - library='nemo', - model_name=None, - pretrained=False, - inner_size=2048, - num_layers=6, - num_attention_heads=8, - ffn_dropout=0.1, - attn_score_dropout=0.1, - attn_layer_dropout=0.1, - arch='seq2seq', + decoder: NeMoTransformerBottleneckDecoderConfig = field( + default_factory=lambda: NeMoTransformerBottleneckDecoderConfig( + library='nemo', + model_name=None, + pretrained=False, + inner_size=2048, + num_layers=6, + num_attention_heads=8, + ffn_dropout=0.1, + attn_score_dropout=0.1, + attn_layer_dropout=0.1, + arch='seq2seq', + ) ) diff --git a/nemo/collections/nlp/models/token_classification/punctuation_capitalization_config.py b/nemo/collections/nlp/models/token_classification/punctuation_capitalization_config.py index 6ca811fe273c..86bf12b92315 100644 --- a/nemo/collections/nlp/models/token_classification/punctuation_capitalization_config.py +++ b/nemo/collections/nlp/models/token_classification/punctuation_capitalization_config.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Dict, Optional from omegaconf.omegaconf import MISSING, DictConfig, OmegaConf, open_dict @@ -215,13 +215,15 @@ class PunctuationCapitalizationModelConfig: This config is a part of :class:`~PunctuationCapitalizationConfig`. """ - class_labels: ClassLabelsConfig = ClassLabelsConfig() + class_labels: ClassLabelsConfig = field(default_factory=lambda: ClassLabelsConfig()) """A mandatory parameter containing a dictionary with names of label id files used in .nemo checkpoints. These file names can also be used for passing label vocabularies to the model. If you wish to use ``class_labels`` for passing vocabularies, please provide path to vocabulary files in ``model.common_dataset_parameters.label_vocab_dir`` parameter.""" - common_dataset_parameters: Optional[CommonDatasetParametersConfig] = CommonDatasetParametersConfig() + common_dataset_parameters: Optional[CommonDatasetParametersConfig] = field( + default_factory=lambda: CommonDatasetParametersConfig() + ) """Label ids and loss mask information information.""" train_ds: Optional[PunctuationCapitalizationTrainDataConfig] = None @@ -233,16 +235,16 @@ class PunctuationCapitalizationModelConfig: test_ds: Optional[PunctuationCapitalizationEvalDataConfig] = None """A configuration for creating test datasets and data loaders.""" - punct_head: HeadConfig = HeadConfig() + punct_head: HeadConfig = field(default_factory=lambda: HeadConfig()) """A configuration for creating punctuation MLP head that is applied to a language model outputs.""" - capit_head: HeadConfig = HeadConfig() + capit_head: HeadConfig = field(default_factory=lambda: HeadConfig()) """A configuration for creating capitalization MLP head that is applied to a language model outputs.""" - tokenizer: Any = TokenizerConfig() + tokenizer: Any = field(default_factory=lambda: TokenizerConfig()) """A configuration for source text tokenizer.""" - language_model: LanguageModelConfig = LanguageModelConfig() + language_model: LanguageModelConfig = field(default_factory=lambda: LanguageModelConfig()) """A configuration of a BERT-like language model which serves as a model body.""" optim: Optional[Any] = None @@ -311,22 +313,30 @@ class PunctuationCapitalizationConfig(NemoConfig): do_testing: bool = False """Whether ot perform testing of the model.""" - model: PunctuationCapitalizationModelConfig = PunctuationCapitalizationModelConfig() + model: PunctuationCapitalizationModelConfig = field(default_factory=lambda: PunctuationCapitalizationModelConfig()) """A configuration for the :class:`~nemo.collections.nlp.models.token_classification.punctuation_capitalization_model.PunctuationCapitalizationModel` model.""" - trainer: Optional[TrainerConfig] = TrainerConfig() + trainer: Optional[TrainerConfig] = field(default_factory=lambda: TrainerConfig()) """Contains ``Trainer`` Lightning class constructor parameters.""" - exp_manager: Optional[ExpManagerConfig] = ExpManagerConfig(name=name, files_to_copy=[]) + exp_manager: Optional[ExpManagerConfig] = field( + default_factory=lambda: ExpManagerConfig(name=None, files_to_copy=[]) + ) """A configuration with various NeMo training options such as output directories, resuming from checkpoint, tensorboard and W&B logging, and so on. For possible options see :ref:`exp-manager-label`.""" + def __post_init__(self): + if self.exp_manager is not None: + self.exp_manager.name = self.name + @dataclass class PunctuationCapitalizationLexicalAudioConfig(PunctuationCapitalizationConfig): - model: PunctuationCapitalizationLexicalAudioModelConfig = PunctuationCapitalizationLexicalAudioModelConfig() + model: PunctuationCapitalizationLexicalAudioModelConfig = field( + default_factory=lambda: PunctuationCapitalizationLexicalAudioModelConfig() + ) def is_legacy_model_config(model_cfg: DictConfig) -> bool: diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py index 4bd99f7120f0..e2c7f22235d7 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py @@ -13,7 +13,6 @@ # limitations under the License. """Transformer based language model.""" -from MeCab import Model from nemo.collections.nlp.modules.common.megatron.megatron_perceiver_encoders import MegatronPerceiverEncoderModule from nemo.collections.nlp.modules.common.megatron.megatron_transformer_encoder import MegatronTransformerEncoderModule from nemo.collections.nlp.modules.common.megatron.retrieval_transformer import ( @@ -25,6 +24,13 @@ scaled_init_method_normal, ) +try: + from MeCab import Model + + HAVE_MECAB = True +except (ImportError, ModuleNotFoundError): + HAVE_MECAB = False + try: from apex.transformer.enums import AttnMaskType, ModelType diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py index a0e5497c6e92..bfb00e00c4ba 100644 --- a/nemo/collections/tts/models/fastpitch.py +++ b/nemo/collections/tts/models/fastpitch.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import contextlib -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import List, Optional @@ -70,12 +70,12 @@ class TextTokenizer: apostrophe: bool = True pad_with_space: bool = True add_blank_at: bool = True - g2p: G2PConfig = G2PConfig() + g2p: G2PConfig = field(default_factory=lambda: G2PConfig()) @dataclass class TextTokenizerConfig: - text_tokenizer: TextTokenizer = TextTokenizer() + text_tokenizer: TextTokenizer = field(default_factory=lambda: TextTokenizer()) class FastPitchModel(SpectrogramGenerator, Exportable, FastPitchAdapterModelMixin): diff --git a/nemo/collections/tts/models/tacotron2.py b/nemo/collections/tts/models/tacotron2.py index 846d8afec06e..3fcdee9832ef 100644 --- a/nemo/collections/tts/models/tacotron2.py +++ b/nemo/collections/tts/models/tacotron2.py @@ -13,7 +13,7 @@ # limitations under the License. import contextlib -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional import torch @@ -53,7 +53,7 @@ class Preprocessor: @dataclass class Tacotron2Config: - preprocessor: Preprocessor = Preprocessor() + preprocessor: Preprocessor = field(default_factory=lambda: Preprocessor()) encoder: Dict[Any, Any] = MISSING decoder: Dict[Any, Any] = MISSING postnet: Dict[Any, Any] = MISSING diff --git a/nemo/core/config/modelPT.py b/nemo/core/config/modelPT.py index 70e9b675e360..713c83379431 100644 --- a/nemo/core/config/modelPT.py +++ b/nemo/core/config/modelPT.py @@ -58,11 +58,13 @@ class HydraConfig: class NemoConfig: name: str = MISSING model: ModelConfig = MISSING - trainer: config.TrainerConfig = config.TrainerConfig( - strategy="ddp", enable_checkpointing=False, logger=False, log_every_n_steps=1, accelerator='gpu' + trainer: config.TrainerConfig = field( + default_factory=lambda: config.TrainerConfig( + strategy="ddp", enable_checkpointing=False, logger=False, log_every_n_steps=1, accelerator='gpu' + ) ) - exp_manager: Optional[Any] = exp_manager.ExpManagerConfig() - hydra: HydraConfig = HydraConfig() + exp_manager: Optional[Any] = field(default_factory=lambda: exp_manager.ExpManagerConfig()) + hydra: HydraConfig = field(default_factory=lambda: HydraConfig()) class ModelConfigBuilder: diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index dbd6a297e49b..3ee7cf28bf5a 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -18,7 +18,7 @@ import sys import time import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import timedelta from pathlib import Path from shutil import copy, move @@ -146,28 +146,30 @@ class ExpManagerConfig: create_wandb_logger: Optional[bool] = False wandb_logger_kwargs: Optional[Dict[Any, Any]] = None create_mlflow_logger: Optional[bool] = False - mlflow_logger_kwargs: Optional[MLFlowParams] = MLFlowParams() + mlflow_logger_kwargs: Optional[MLFlowParams] = field(default_factory=lambda: MLFlowParams()) create_dllogger_logger: Optional[bool] = False - dllogger_logger_kwargs: Optional[DLLoggerParams] = DLLoggerParams() + dllogger_logger_kwargs: Optional[DLLoggerParams] = field(default_factory=lambda: DLLoggerParams()) create_clearml_logger: Optional[bool] = False - clearml_logger_kwargs: Optional[ClearMLParams] = ClearMLParams() + clearml_logger_kwargs: Optional[ClearMLParams] = field(default_factory=lambda: ClearMLParams()) # Checkpointing parameters create_checkpoint_callback: Optional[bool] = True - checkpoint_callback_params: Optional[CallbackParams] = CallbackParams() + checkpoint_callback_params: Optional[CallbackParams] = field(default_factory=lambda: CallbackParams()) create_early_stopping_callback: Optional[bool] = False - early_stopping_callback_params: Optional[EarlyStoppingParams] = EarlyStoppingParams() + early_stopping_callback_params: Optional[EarlyStoppingParams] = field( + default_factory=lambda: EarlyStoppingParams() + ) create_preemption_callback: Optional[bool] = True # Additional exp_manager arguments files_to_copy: Optional[List[str]] = None # logs timing of train/val/test steps log_step_timing: Optional[bool] = True - step_timing_kwargs: Optional[StepTimingParams] = StepTimingParams() + step_timing_kwargs: Optional[StepTimingParams] = field(default_factory=lambda: StepTimingParams()) # Configures creation of log files for different ranks log_local_rank_0_only: Optional[bool] = False log_global_rank_0_only: Optional[bool] = False # disable initial validation when resuming from a checkpoint saved during validation disable_validation_on_resume: Optional[bool] = True - ema: Optional[EMAParams] = EMAParams() + ema: Optional[EMAParams] = field(default_factory=lambda: EMAParams()) # Wall clock time limit max_time_per_run: Optional[str] = None # time to sleep non 0 ranks during initialization diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index da44a726bd29..2a2941c27f5d 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -17,7 +17,7 @@ opencc pangu rapidfuzz rouge_score -sacrebleu[ja] +sacrebleu # manually install sacrebleu[ja] for Japanese support; MeCab is unsupported in Python 3.11+ sentence_transformers tensorstore zarr diff --git a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py index 1846a986bf6e..b1cd385f4198 100644 --- a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py +++ b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram.py @@ -112,14 +112,14 @@ class EvalBeamSearchNGramConfig: beam_beta: List[float] = field(default_factory=lambda: [0.0]) # The beta parameter or list of the betas for the beam search decoding decoding_strategy: str = "beam" - decoding: ctc_beam_decoding.BeamCTCInferConfig = ctc_beam_decoding.BeamCTCInferConfig(beam_size=128) + decoding: ctc_beam_decoding.BeamCTCInferConfig = field(default_factory=lambda: ctc_beam_decoding.BeamCTCInferConfig(beam_size=128)) - text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( + text_processing: Optional[TextProcessingConfig] = field(default_factory=lambda: TextProcessingConfig( punctuation_marks = ".,?", separate_punctuation = False, do_lowercase = False, rm_punctuation = False, - ) + )) # fmt: on diff --git a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py index bbc33d214636..8548b839024f 100644 --- a/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py +++ b/scripts/asr_language_modeling/ngram_lm/eval_beamsearch_ngram_transducer.py @@ -115,7 +115,7 @@ class EvalBeamSearchNGramConfig: hat_subtract_ilm: bool = False hat_ilm_weight: List[float] = field(default_factory=lambda: [0.0]) - decoding: rnnt_beam_decoding.BeamRNNTInferConfig = rnnt_beam_decoding.BeamRNNTInferConfig(beam_size=128) + decoding: rnnt_beam_decoding.BeamRNNTInferConfig = field(default_factory=lambda: rnnt_beam_decoding.BeamRNNTInferConfig(beam_size=128)) # fmt: on diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index 99bfa6187b30..e40997c4aca2 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -75,7 +75,7 @@ import sys import tempfile from copy import deepcopy -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Tuple @@ -209,19 +209,23 @@ class BuildEnsembleConfig: random_seed: int = 0 # for reproducibility # default confidence, can override - confidence: ConfidenceConfig = ConfidenceConfig( - # we keep frame confidences and apply aggregation manually to get full-utterance confidence - preserve_frame_confidence=True, - exclude_blank=True, - aggregation="mean", - method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + confidence: ConfidenceConfig = field( + default_factory=lambda: ConfidenceConfig( + # we keep frame confidences and apply aggregation manually to get full-utterance confidence + preserve_frame_confidence=True, + exclude_blank=True, + aggregation="mean", + measure_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + ) ) temperature: float = 1.0 # this is optional, but can be used to change any aspect of the transcription # config, such as batch size or amp usage. Note that model, data and confidence # will be overriden by this script - transcription: transcribe_speech.TranscriptionConfig = transcribe_speech.TranscriptionConfig() + transcription: transcribe_speech.TranscriptionConfig = field( + default_factory=lambda: transcribe_speech.TranscriptionConfig() + ) # set to True to tune the confidence. # requires dev manifests to be specified for each model @@ -229,12 +233,14 @@ class BuildEnsembleConfig: # used to specify what to tune over. By default runs tuning over some # reasonalbe grid, so that it does not take forever. # Can be changed as needed - tune_confidence_config: TuneConfidenceConfig = TuneConfidenceConfig() + tune_confidence_config: TuneConfidenceConfig = field(default_factory=lambda: TuneConfidenceConfig()) # very fast to tune and can be important in case of imbalanced datasets # will automatically set to False if dev data is not available tune_logistic_regression: bool = True - tune_logistic_regression_config: TuneLogisticRegressionConfig = TuneLogisticRegressionConfig() + tune_logistic_regression_config: TuneLogisticRegressionConfig = field( + default_factory=lambda: TuneLogisticRegressionConfig() + ) def __post_init__(self): """Checking that if any dev data is provided, all are provided. diff --git a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py index f4558fa85256..0c8ab9535fae 100644 --- a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py +++ b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py @@ -14,7 +14,7 @@ import json import os -from dataclasses import dataclass, is_dataclass +from dataclasses import dataclass, field, is_dataclass from pathlib import Path from typing import Optional @@ -124,7 +124,9 @@ class ConfidenceBenchmarkingConfig: # Confidence configs target_level: str = "auto" # Choices: "word", "token", "auto" (for both word- and token-level confidence) - confidence_cfg: ConfidenceConfig = ConfidenceConfig(preserve_word_confidence=True, preserve_token_confidence=True) + confidence_cfg: ConfidenceConfig = field( + default_factory=lambda: ConfidenceConfig(preserve_word_confidence=True, preserve_token_confidence=True) + ) grid_params: Optional[str] = None # a dictionary with lists of parameters to iteratively benchmark on diff --git a/scripts/speech_recognition/convert_to_tarred_audio_dataset.py b/scripts/speech_recognition/convert_to_tarred_audio_dataset.py index 64c086997ef0..3a739fe3a57d 100644 --- a/scripts/speech_recognition/convert_to_tarred_audio_dataset.py +++ b/scripts/speech_recognition/convert_to_tarred_audio_dataset.py @@ -202,7 +202,7 @@ class ASRTarredDatasetMetadata: num_samples_per_shard: Optional[int] = None is_concatenated_manifest: bool = False - dataset_config: Optional[ASRTarredDatasetConfig] = ASRTarredDatasetConfig() + dataset_config: Optional[ASRTarredDatasetConfig] = field(default_factory=lambda: ASRTarredDatasetConfig()) history: Optional[List[Any]] = field(default_factory=lambda: []) def __post_init__(self): diff --git a/tests/collections/asr/test_text_to_text_dataset.py b/tests/collections/asr/test_text_to_text_dataset.py index 92205de41a1b..bf68b51f1e6e 100644 --- a/tests/collections/asr/test_text_to_text_dataset.py +++ b/tests/collections/asr/test_text_to_text_dataset.py @@ -15,7 +15,7 @@ import json import multiprocessing import os -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path import pytest @@ -118,7 +118,7 @@ class TextTokenizerCfg: apostrophe: bool = True pad_with_space: bool = True add_blank_at: bool = True - g2p: G2PConfig = G2PConfig() + g2p: G2PConfig = field(default_factory=lambda: G2PConfig()) config = OmegaConf.create(OmegaConf.to_yaml(TextTokenizerCfg())) return instantiate(config) diff --git a/tools/nemo_forced_aligner/align.py b/tools/nemo_forced_aligner/align.py index 77ab3111fd91..d298e8072d58 100644 --- a/tools/nemo_forced_aligner/align.py +++ b/tools/nemo_forced_aligner/align.py @@ -149,8 +149,8 @@ class AlignmentConfig: # Output file configs save_output_file_formats: List[str] = field(default_factory=lambda: ["ctm", "ass"]) - ctm_file_config: CTMFileConfig = CTMFileConfig() - ass_file_config: ASSFileConfig = ASSFileConfig() + ctm_file_config: CTMFileConfig = field(default_factory=lambda: CTMFileConfig()) + ass_file_config: ASSFileConfig = field(default_factory=lambda: ASSFileConfig()) @hydra_runner(config_name="AlignmentConfig", schema=AlignmentConfig) From 6aff5e97283d0c1287dc47bf70c5fa84469c4c34 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 9 Oct 2023 15:52:51 -0700 Subject: [PATCH 300/512] [Stable Diffusion/ControlNet] Enable O2 training for SD and Fix ControlNet CI failure --- .../stable_diffusion/conf/sd_train.yaml | 9 +- .../generative/stable_diffusion/sd_train.py | 26 ++- examples/multimodal/mllm/neva/neva_export.py | 4 +- nemo/collections/common/callbacks/ema.py | 4 +- .../models/controlnet/controlnet.py | 2 + .../diffusionmodules/openaimodel.py | 40 ++-- .../megatron/data_samplers.py | 2 +- .../language_modeling/megatron_base_model.py | 10 +- .../vision/data/megatron/vit_dataset.py | 1 + nemo/core/optim/fused_adam.py | 190 ++++++++++++++++++ nemo/core/optim/optimizers.py | 8 + 11 files changed, 262 insertions(+), 34 deletions(-) create mode 100755 nemo/core/optim/fused_adam.py diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index ce865e7d739f..78a8dedccbc2 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -119,6 +119,7 @@ model: use_checkpoint: False legacy: False use_flash_attention: True + enable_amp_o2_fp16: True first_stage_config: _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL @@ -163,10 +164,11 @@ model: resume_from_checkpoint: null # manually set the checkpoint file to load from apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch DDP overlap. optim: - name: fused_adam - lr: 1e-4 + name: megatron_fused_adam + lr: null weight_decay: 0. betas: - 0.9 @@ -175,6 +177,9 @@ model: name: WarmupHoldPolicy warmup_steps: 10000 hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + capturable: True + master_weights: True + max_norm: ${trainer.gradient_clip_val} # Nsys profiling options nsys_profile: diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py index 0e3537a891c6..b741af3d76e6 100644 --- a/examples/multimodal/generative/stable_diffusion/sd_train.py +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -21,11 +21,35 @@ from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +class MegatronStableDiffusionTrainerBuilder(MegatronTrainerBuilder): + """Builder for SD model Trainer with overrides.""" + + def _training_strategy(self) -> NLPDDPStrategy: + """ + Returns a ddp strategy passed to Trainer.strategy. + """ + ddp_overlap = self.cfg.model.get('ddp_overlap', True) + if ddp_overlap: + return NLPDDPStrategy( + no_ddp_communication_hook=False, + gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, + find_unused_parameters=True, + bucket_cap_mb=256, + ) + else: + return NLPDDPStrategy( + no_ddp_communication_hook=True, + gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + @hydra_runner(config_path='conf', config_name='sd_train') def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") @@ -50,7 +74,7 @@ def amp_autocast_init(self, *args, **kwargs): torch.autocast.__orig_init__ = torch.autocast.__init__ torch.autocast.__init__ = amp_autocast_init - trainer = MegatronTrainerBuilder(cfg).create_trainer() + trainer = MegatronStableDiffusionTrainerBuilder(cfg).create_trainer() exp_manager(trainer, cfg.exp_manager) diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py index 185be63b6310..0b108effff88 100644 --- a/examples/multimodal/mllm/neva/neva_export.py +++ b/examples/multimodal/mllm/neva/neva_export.py @@ -144,7 +144,9 @@ def output_names(self): LOGGER.info('Exporting TRT') engine = engine_from_network( network_from_onnx_path('./onnx/vision_encoder.onnx'), - config=CreateConfig(fp16=precision in [16, '16', '16-mixed'], bf16=precision in ['bf16', 'bf16-mixed'], profiles=[p],), + config=CreateConfig( + fp16=precision in [16, '16', '16-mixed'], bf16=precision in ['bf16', 'bf16-mixed'], profiles=[p], + ), ) save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) diff --git a/nemo/collections/common/callbacks/ema.py b/nemo/collections/common/callbacks/ema.py index ec53b61e17c0..55243c542866 100644 --- a/nemo/collections/common/callbacks/ema.py +++ b/nemo/collections/common/callbacks/ema.py @@ -227,7 +227,7 @@ def __init__( def all_parameters(self) -> Iterable[torch.Tensor]: return (param for group in self.param_groups for param in group['params']) - def step(self, closure=None, **kwargs): + def step(self, closure=None, grad_scaler=None, **kwargs): self.join() if self.first_iteration: @@ -244,7 +244,7 @@ def step(self, closure=None, **kwargs): ) self.rebuild_ema_params = False - loss = self.optimizer.step(closure) + loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler) if self._should_update_at_step(): self.update() diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py index 2fb1c834ac1a..0b0c7b291d69 100644 --- a/nemo/collections/multimodal/models/controlnet/controlnet.py +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -161,6 +161,8 @@ def log_images( log = dict() batch = next(batch) + batch['images'] = batch['images'].to(torch.cuda.current_device()) + batch['hint'] = batch['hint'].to(torch.cuda.current_device()) N = batch['images'].shape[0] z, c = self.get_input(batch, self.first_stage_key, bs=N) c_cat, c = c["c_concat"][:N], c["c_crossattn"][:N] diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 515544c22028..e46f0c84b38c 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -34,13 +34,16 @@ ) -# dummy replace -def convert_module_to_f16(x): - pass +def convert_module_to_dtype(module, dtype): + # Convert module parameters to dtype + if isinstance(module, (torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Linear)): + module.weight.data = module.weight.data.to(dtype) + if module.bias is not None: + module.bias.data = module.bias.data.to(dtype) -def convert_module_to_f32(x): - pass +def convert_module_to_fp16(module): + convert_module_to_dtype(module, torch.float16) ## go @@ -473,6 +476,7 @@ def __init__( from_NeMo=False, # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF use_flash_attention: bool = False, + enable_amp_o2_fp16: bool = False, ): super().__init__() if use_spatial_transformer: @@ -732,6 +736,9 @@ def __init__( 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' ) + if enable_amp_o2_fp16: + self.convert_to_fp16() + def _input_blocks_mapping(self, input_dict): res_dict = {} for key_, value_ in input_dict.items(): @@ -950,17 +957,7 @@ def convert_to_fp16(self): """ Convert the torso of the model to float16. """ - self.input_blocks.apply(convert_module_to_f16) - self.middle_block.apply(convert_module_to_f16) - self.output_blocks.apply(convert_module_to_f16) - - def convert_to_fp32(self): - """ - Convert the torso of the model to float32. - """ - self.input_blocks.apply(convert_module_to_f32) - self.middle_block.apply(convert_module_to_f32) - self.output_blocks.apply(convert_module_to_f32) + self.apply(convert_module_to_fp16) def forward(self, x, timesteps=None, context=None, y=None, **kwargs): """ @@ -1164,15 +1161,8 @@ def convert_to_fp16(self): """ Convert the torso of the model to float16. """ - self.input_blocks.apply(convert_module_to_f16) - self.middle_block.apply(convert_module_to_f16) - - def convert_to_fp32(self): - """ - Convert the torso of the model to float32. - """ - self.input_blocks.apply(convert_module_to_f32) - self.middle_block.apply(convert_module_to_f32) + self.input_blocks.apply(convert_module_to_fp16) + self.middle_block.apply(convert_module_to_fp16) def forward(self, x, timesteps): """ diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py index 266159fd44c9..3ca4767e3480 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py @@ -190,4 +190,4 @@ def __iter__(self): # Check the last partial batch and see drop_last is set if len(batch) > 0 and not self.drop_last: - yield batch \ No newline at end of file + yield batch diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index b7e8daaddfa5..8435ce017254 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -78,8 +78,9 @@ class MegatronBaseModel(NLPModel): with O2 level optimizations and/or model parallelism. - Perform gradient clipping: `grad_clip_pl_default` triggers the PyTorch Lightning default implementation, `with_distributed_adam` triggers - the distributed optimizer's implementation, `megatron_amp_O2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. + the distributed optimizer's implementation, `with_megatron_fused_adam` triggers + the optimizer's implementation, `megatron_amp_O2` triggers gradient clipping + on the main grads, and otherwise gradient clipping is performed on the model grads. """ @@ -122,6 +123,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): self.model_parallel_config: ModelParallelConfig = self.build_model_parallel_config() self.with_distributed_adam = cfg.optim.get('name') == 'distributed_fused_adam' + self.with_megatron_fused_adam = cfg.optim.get('name') == 'megatron_fused_adam' # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() @@ -339,6 +341,10 @@ def configure_gradient_clipping(self, *args, **kwargs): if clip_val <= 0: return + if self.with_megatron_fused_adam: + # Gradient clipping is done in optimizer step + return + if self.grad_clip_pl_default: # use the default behavior return super().configure_gradient_clipping(*args, **kwargs) diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py index 649840872aa5..5ba711dd0b28 100644 --- a/nemo/collections/vision/data/megatron/vit_dataset.py +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -35,6 +35,7 @@ def _to_torch_data_type(precision): else: raise ValueError(f"Cannot recognize precision {precision}") + class RandomSeedDataset(Dataset): def __init__(self, dataset, seed=1234): self.base_seed = seed diff --git a/nemo/core/optim/fused_adam.py b/nemo/core/optim/fused_adam.py new file mode 100755 index 000000000000..9a7186250ad9 --- /dev/null +++ b/nemo/core/optim/fused_adam.py @@ -0,0 +1,190 @@ +import amp_C +import torch +from apex.multi_tensor_apply import multi_tensor_applier +from apex.optimizers import FusedAdam + + +class MegatronFusedAdam(FusedAdam): + """Wrapper class that supports NeMo-Megatron optimizations + + Performs gradient clipping, unscaling, and optimizer step. + """ + + def __init__(self, *args, max_norm=0, norm_type=2, **kwargs): + super().__init__(*args, **kwargs) + + assert norm_type == 2, "Currently only norm_type=2 is supported for MegatronFusedAdam" + + # Gradient clipping parameters + self.max_norm = float(max_norm) + self.norm_type = float(norm_type) + + def step(self, closure=None, grad_scaler=None): + from megatron.core import parallel_state + from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate + + from nemo.collections.nlp.modules.common.megatron.module import param_is_not_shared + + # Code path below assumes capturable=True and master_weights=True + if not (self.capturable and self.master_weights): + return super().step(closure=closure, grad_scaler=grad_scaler) + + loss = None + if closure is not None: + loss = closure() + + for group, group_master in zip(self.param_groups, self.param_groups_master): + if len(group['params']) == 0: + continue + device = group['params'][0].device + bias_correction = 1 if group['bias_correction'] else 0 + beta1, beta2 = group['betas'] + + # Assume same step per parameter group for simplicity + if 'step' in group: + group['step'] += 1 if not self.capturable else (self._dummy_overflow_buf != 1).to(torch.int) + else: + group['step'] = 1 if not self.capturable else torch.tensor([1], dtype=torch.int, device=device) + + # Check for overflow in gradients + found_inf = ( + grad_scaler._check_inf_per_device(self)[device] + if grad_scaler is not None + else torch.zeros((1,), device=device) + ) + self._dummy_overflow_buf.copy_(found_inf) + + # Get gradient scaling/unscaling factors + scale, inv_scale = None, None + if grad_scaler: + scale = grad_scaler._get_scale_async() + inv_scale = scale.double().reciprocal().float() + else: + scale = torch.ones((1,), device=device) + inv_scale = torch.ones((1,), device=device) + combined_scale = inv_scale + + # Gradient clipping + if self.max_norm > 0: + # Unscale gradients and find L2 norm + fp32_grads_for_norm = [] + fp16_grads_for_norm = [] + for p in group['params']: + if p.grad is None: + continue + assert p.dtype in [torch.float32, torch.float16], 'Only FP32/FP16 model parameters are supported' + + is_not_shared = param_is_not_shared(p) + is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(p) + if is_not_shared and is_not_tp_duplicate: + if p.dtype == torch.float32: + fp32_grads_for_norm.append(p.grad.detach()) + else: + fp16_grads_for_norm.append(p.grad.detach()) + + if fp32_grads_for_norm: + fp32_grad_norm, _ = multi_tensor_applier( + amp_C.multi_tensor_unscale_l2norm, + self._dummy_overflow_buf, + [fp32_grads_for_norm], + inv_scale, + False, + ) + else: + fp32_grad_norm = torch.tensor([0.0], dtype=torch.float32, device=device) + + if fp16_grads_for_norm: + fp16_grad_norm, _ = multi_tensor_applier( + amp_C.multi_tensor_unscale_l2norm, + self._dummy_overflow_buf, + [fp16_grads_for_norm], + inv_scale, + False, + ) + else: + fp16_grad_norm = torch.tensor([0.0], dtype=torch.float32, device=device) + + # Prep L2 norm for allreduce + total_norm = (fp32_grad_norm ** self.norm_type + fp16_grad_norm ** self.norm_type).squeeze() + + # Allreduce L2 norm across model-parallel GPUs + torch.distributed.all_reduce( + total_norm, op=torch.distributed.ReduceOp.SUM, group=parallel_state.get_model_parallel_group() + ) + total_norm = total_norm ** (1.0 / self.norm_type) + + # Combine unscaling factor with clip coefficient + clip_coeff = self.max_norm / (total_norm + 1.0e-6) + clip_coeff_clamped = torch.clamp(clip_coeff, max=1.0) + combined_scale = clip_coeff_clamped * combined_scale # Potential issue with associativity? + + # Create lists for multi-tensor apply + g_16, p_16, m_16, v_16 = [], [], [], [] + g_32, p_32, m_32, v_32 = [], [], [], [] + p_16_master = [] + p_32_master = [] + + for p, p_master in zip(group['params'], group_master['params']): + if p.grad is None: + continue + if p.grad.data.is_sparse: + raise RuntimeError( + 'MegatronFusedAdam does not support sparse gradients, please consider SparseAdam instead' + ) + + state = self.state[p] + # State initialization + if len(state) == 0: + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p.data).float() + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p.data).float() + + if p.dtype == torch.float16: + p_16_master.append(p_master.data) + g_16.append(p.grad.data) + p_16.append(p.data) + m_16.append(state['exp_avg']) + v_16.append(state['exp_avg_sq']) + elif p.dtype == torch.float32: + p_32_master.append(p_master.data) + g_32.append(p.grad.data) + p_32.append(p.data) + m_32.append(state['exp_avg']) + v_32.append(state['exp_avg_sq']) + else: + raise RuntimeError('MegatronFusedAdam only supports fp16 and fp32.') + + if len(g_16) > 0: + multi_tensor_applier( + self.multi_tensor_adam_capturable_master, + self._dummy_overflow_buf, + [g_16, p_16, m_16, v_16, p_16_master], + group['lr'], + beta1, + beta2, + group['eps'], + group['step'], + self.adam_w_mode, + bias_correction, + group['weight_decay'], + combined_scale, + ) + + if len(g_32) > 0: + multi_tensor_applier( + self.multi_tensor_adam_capturable_master, + self._dummy_overflow_buf, + [g_32, p_32, m_32, v_32, p_32_master], + group['lr'], + beta1, + beta2, + group['eps'], + group['step'], + self.adam_w_mode, + bias_correction, + group['weight_decay'], + combined_scale, + ) + + return loss diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index f591de92d3bd..0aeda813dea5 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -64,6 +64,14 @@ except (ImportError, ModuleNotFoundError): HAVE_APEX_DISTRIBUTED_ADAM = False + try: + # Try importing wrapper for Apex FusedAdam optimizer + from nemo.core.optim.fused_adam import MegatronFusedAdam + + AVAILABLE_OPTIMIZERS['megatron_fused_adam'] = MegatronFusedAdam + except (ImportError, ModuleNotFoundError): + logging.warning("Could not import MegatronFusedAdam optimizer") + __all__ = ['get_optimizer', 'register_optimizer', 'parse_optimizer_args'] From 32e4fba81f75f4c7a5378d893aef184d3265dcb5 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 9 Oct 2023 20:27:35 -0700 Subject: [PATCH 301/512] Mingyuanm/dreambooth fix --- .../dreambooth/conf/dreambooth.yaml | 2 +- .../generative/dreambooth/dreambooth.py | 3 ++- .../data/dreambooth/dreambooth_dataset.py | 27 +++++++++---------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml index 771c6d85d82a..37e9b284e219 100644 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml @@ -201,7 +201,7 @@ model: instance_prompt: a photo of a sks dog regularization_dir: /datasets/nemo_dogs regularization_prompt: a photo of a dog - num_reg_images: 200 + num_reg_images: 10 num_images_per_prompt: 4 resolution: 512 center_crop: True diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py index 777484734eb0..2b6212f0bba9 100644 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -102,7 +102,8 @@ def main(cfg): torch.backends.cuda.matmul.allow_tf32 = True - prepare_reg_data(cfg) + if cfg.model.with_prior_preservation: + prepare_reg_data(cfg) parallel_state.destroy_model_parallel() trainer = MegatronTrainerBuilder(cfg).create_trainer() diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py index 492cc00ec5cc..e25dcfbecf6b 100644 --- a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py +++ b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py @@ -87,8 +87,10 @@ def __init__( self.cached_reg_data_root = f'{self.reg_data_root}_cached' self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) self.num_instance_images = len(self.instance_images_path) - self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) - self.num_reg_images = len(self.reg_images_path) + + if self.with_prior_preservation: + self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) + self.num_reg_images = len(self.reg_images_path) if self.cached_instance_data_root: self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) @@ -129,23 +131,18 @@ def cache_latents(self, vae, text_encoder): os.makedirs(f'{self.instance_data_root}_cached', exist_ok=True) self.cached_instance_data_root = f'{self.instance_data_root}_cached' self.cached_reg_data_root = f'{self.reg_data_root}_cached' - if self.instance_data_root and (self.cached_instance_data_root is None): - + if self.instance_data_root and (len(os.listdir(self.cached_instance_data_root)) < self.num_instance_images): for i in tqdm(range(self.num_instance_images)): - if len(os.listdir(self.cached_instance_data_root)) == self.num_instance_images: - break x = torch.Tensor(self.get_image(self.instance_images_path[i % self.num_instance_images])) x = torch.unsqueeze(x, dim=0) params = vae.encode(x).parameters.squeeze(dim=0) torch.save(params, f'{self.instance_data_root}_cached/instance_image_cache_{i}.pt') - if self.with_prior_preservation and self.reg_data_root and (self.cached_reg_data_root is None): + if self.with_prior_preservation: os.makedirs(f'{self.reg_data_root}_cached', exist_ok=True) - - for i in tqdm(range(self.num_reg_images)): - if len(os.listdir(self.cached_reg_data_root)) == self.num_reg_images: - break - x = torch.Tensor(self.get_image(self.reg_images_path[i % self.num_reg_images])) - x = torch.unsqueeze(x, dim=0) - params = vae.encode(x).parameters.squeeze(dim=0) - torch.save(params, f'{self.reg_data_root}_cached/reg_image_cache_{i}.pt') + if self.reg_data_root and (len(os.listdir(self.cached_reg_data_root)) < self.num_reg_images): + for i in tqdm(range(self.num_reg_images)): + x = torch.Tensor(self.get_image(self.reg_images_path[i % self.num_reg_images])) + x = torch.unsqueeze(x, dim=0) + params = vae.encode(x).parameters.squeeze(dim=0) + torch.save(params, f'{self.reg_data_root}_cached/reg_image_cache_{i}.pt') From 173c468ae5fab5c7109f31f566b3cf3c5de5dd0c Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Tue, 10 Oct 2023 09:22:09 -0700 Subject: [PATCH 302/512] Fix NeMo CI Infer Issue --- examples/multimodal/generative/imagen/imagen_infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/generative/imagen/imagen_infer.py b/examples/multimodal/generative/imagen/imagen_infer.py index 5d2e58c395f9..97402b3ee500 100644 --- a/examples/multimodal/generative/imagen/imagen_infer.py +++ b/examples/multimodal/generative/imagen/imagen_infer.py @@ -23,7 +23,7 @@ def main(inference_config): if inference_config.get('infer'): # invoking from launcher - trainer = Trainer(inference_config.trainer) + trainer = Trainer(**inference_config.trainer) inference_config = inference_config.infer else: trainer = Trainer() From 3e038fdc305661f600a0d3828daed8dfcd8c69b7 Mon Sep 17 00:00:00 2001 From: Ahmad Kiswani Date: Wed, 11 Oct 2023 23:53:31 +0300 Subject: [PATCH 303/512] DreamFusion --- .dockerignore | 2 + .gitignore | 1 + .../generative/nerf/benchmark_callback.py | 82 +++ .../generative/nerf/config/config.yaml | 52 ++ .../nerf/config/model/background/random.yaml | 3 + .../nerf/config/model/background/static.yaml | 2 + .../nerf/config/model/background/tcnn.yaml | 19 + .../config/model/background/torchngp.yaml | 11 + .../nerf/config/model/data/data.yaml | 41 ++ .../nerf/config/model/dreamfusion-dmtet.yaml | 40 ++ .../nerf/config/model/dreamfusion.yaml | 40 ++ .../config/model/guidance/sd_huggingface.yaml | 4 + .../nerf/config/model/guidance/sd_nemo.yaml | 4 + .../nerf/config/model/guidance/sd_trt.yaml | 5 + .../nerf/config/model/loss/dmtet.yaml | 8 + .../nerf/config/model/loss/dreamfusion.yaml | 8 + .../config/model/material/basic_shading.yaml | 1 + .../nerf/config/model/nerf/tcnn.yaml | 32 + .../nerf/config/model/nerf/torchngp.yaml | 26 + .../nerf/config/model/optim/adan.yaml | 6 + .../nerf/config/model/renderer/nerfacc.yaml | 8 + .../config/model/renderer/nvdiffrast.yaml | 6 + .../model/renderer/torchngp_raymarching.yaml | 7 + examples/multimodal/generative/nerf/data.py | 73 +++ examples/multimodal/generative/nerf/main.py | 71 +++ .../multimodal/data/nerf/__init__.py | 0 .../multimodal/data/nerf/cameras.py | 178 ++++++ .../multimodal/data/nerf/circle_poses.py | 214 +++++++ .../multimodal/data/nerf/random_poses.py | 436 ++++++++++++++ .../collections/multimodal/data/nerf/utils.py | 204 +++++++ .../multimodal/models/nerf/base.py | 24 + .../multimodal/models/nerf/dreamfusion.py | 313 ++++++++++ .../multimodal/models/nerf/txt2nerf_base.py | 81 +++ .../multimodal/modules/nerf/__init__.py | 0 .../nerf/background/nerf_background_base.py | 22 + .../nerf/background/random_background.py | 19 + .../nerf/background/static_background.py | 14 + .../nerf/background/tcnn_background.py | 32 + .../nerf/background/torchngp_background.py | 31 + .../modules/nerf/geometry/__init__.py | 0 .../multimodal/modules/nerf/geometry/dmtet.py | 150 +++++ .../modules/nerf/geometry/layers.py | 129 ++++ .../modules/nerf/geometry/nerf_base.py | 360 ++++++++++++ .../modules/nerf/geometry/tcnn_nerf.py | 108 ++++ .../modules/nerf/geometry/torchngp_nerf.py | 114 ++++ .../modules/nerf/guidance/__init__.py | 0 .../stablediffusion_huggingface_pipeline.py | 142 +++++ .../guidance/stablediffusion_nemo_pipeline.py | 129 ++++ .../guidance/stablediffusion_trt_pipeline.py | 221 +++++++ .../nerf/guidance/txt2img_guidance_base.py | 6 + .../nerf/loss/laplacian_smooth_loss.py | 38 ++ .../nerf/loss/normal_consistency_loss.py | 56 ++ .../modules/nerf/materials/__init__.py | 0 .../modules/nerf/materials/basic_shading.py | 66 +++ .../modules/nerf/materials/materials_base.py | 29 + .../modules/nerf/renderers/__init__.py | 0 .../modules/nerf/renderers/base_renderer.py | 18 + .../nerf/renderers/base_sdf_renderer.py | 20 + .../nerf/renderers/base_volume_renderer.py | 7 + .../nerf/renderers/nerfacc_volume_renderer.py | 364 ++++++++++++ .../nerf/renderers/nvdiffrast_renderer.py | 222 +++++++ .../renderers/torchngp_volume_renderer.py | 275 +++++++++ .../multimodal/modules/nerf/utils/__init__.py | 0 .../modules/nerf/utils/activation.py | 20 + .../modules/nerf/utils/torch_ngp/__init__.py | 0 .../modules/nerf/utils/torch_ngp/encoding.py | 137 +++++ .../nerf/utils/torch_ngp/freqencoder.py | 73 +++ .../nerf/utils/torch_ngp/gridencoder.py | 287 +++++++++ .../nerf/utils/torch_ngp/raymarching.py | 551 ++++++++++++++++++ .../modules/nerf/utils/torch_ngp/shencoder.py | 82 +++ .../modules/nerf/utils/trt_engine.py | 173 ++++++ nemo/core/optim/adan.py | 453 ++++++++++++++ nemo/core/optim/optimizers.py | 2 + 73 files changed, 6352 insertions(+) create mode 100644 examples/multimodal/generative/nerf/benchmark_callback.py create mode 100644 examples/multimodal/generative/nerf/config/config.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/random.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/static.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/tcnn.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/torchngp.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/data/data.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/optim/adan.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml create mode 100644 examples/multimodal/generative/nerf/data.py create mode 100644 examples/multimodal/generative/nerf/main.py create mode 100644 nemo/collections/multimodal/data/nerf/__init__.py create mode 100644 nemo/collections/multimodal/data/nerf/cameras.py create mode 100644 nemo/collections/multimodal/data/nerf/circle_poses.py create mode 100644 nemo/collections/multimodal/data/nerf/random_poses.py create mode 100644 nemo/collections/multimodal/data/nerf/utils.py create mode 100644 nemo/collections/multimodal/models/nerf/base.py create mode 100644 nemo/collections/multimodal/models/nerf/dreamfusion.py create mode 100644 nemo/collections/multimodal/models/nerf/txt2nerf_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/random_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/static_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/tcnn_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/torchngp_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/dmtet.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/layers.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py create mode 100644 nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/basic_shading.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/materials_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/activation.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/trt_engine.py create mode 100644 nemo/core/optim/adan.py diff --git a/.dockerignore b/.dockerignore index 14f5114d01be..00626456bb1e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,3 +17,5 @@ coverage.xml .git **/*.nemo **/*.ckpt + +nogit/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1ff2a92cac64..e566a6685a32 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,4 @@ examples/neural_graphs/*.yml .hydra/ nemo_experiments/ +nogit/ \ No newline at end of file diff --git a/examples/multimodal/generative/nerf/benchmark_callback.py b/examples/multimodal/generative/nerf/benchmark_callback.py new file mode 100644 index 000000000000..7d216a6679c4 --- /dev/null +++ b/examples/multimodal/generative/nerf/benchmark_callback.py @@ -0,0 +1,82 @@ +import time +from typing import Optional + +from pytorch_lightning import Callback, LightningModule, Trainer + +from nemo.utils import logging + + +class BenchmarkCallback(Callback): + def __init__( + self, + start_benchmark_at_step: int = 0, + stop_benchmark_at_step: Optional[int] = None, + log_every_n_steps: int = 10, + ): + super().__init__() + self.start_benchmark_at_step = start_benchmark_at_step + self.stop_benchmark_at_step = stop_benchmark_at_step + self.log_every_n_steps = log_every_n_steps + self.train_times = [] + self.val_times = [] + self.train_steps_times = [] + self.val_steps_times = [] + + def should_benchmark(self, trainer: Trainer): + if self.stop_benchmark_at_step is None: + return trainer.global_step >= self.start_benchmark_at_step + return self.start_benchmark_at_step <= trainer.global_step <= self.stop_benchmark_at_step + + def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule): + self.epoch_start_time = time.time() + + def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + epoch_time = time.time() - self.epoch_start_time + self.train_times.append(epoch_time) + logging.info(f'Training-Epoch-{trainer.current_epoch}-Time: {epoch_time} [sec]') + + def on_train_batch_start(self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int): + self.step_start_time = time.time() + + def on_train_batch_end(self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int): + if self.should_benchmark(trainer): + step_time = time.time() - self.step_start_time + self.train_steps_times.append(step_time) + if trainer.global_step % self.log_every_n_steps == 0: + logging.info(f'Training-Step-{trainer.global_step}-Time: {step_time} [sec]') + + def on_validation_epoch_start(self, trainer: Trainer, pl_module: LightningModule): + self.val_start_time = time.time() + + def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + val_time = time.time() - self.val_start_time + self.val_times.append(val_time) + logging.info(f'Validation-Epoch-{trainer.current_epoch}-Time: {val_time} [sec]') + + def on_validation_batch_start( + self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int, dataloader_idx: int + ): + self.val_step_start_time = time.time() + + def on_validation_batch_end( + self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int, dataloader_idx: int + ): + if self.should_benchmark(trainer): + val_step_time = time.time() - self.val_step_start_time + self.val_steps_times.append(val_step_time) + if trainer.global_step % self.log_every_n_steps == 0: + logging.info(f'Validation-Step-{trainer.global_step}-Time: {val_step_time} [sec]') + + def on_fit_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + avg_train_time = sum(self.train_times) / len(self.train_times) + avg_val_time = sum(self.val_times) / len(self.val_times) + avg_train_step_time = sum(self.train_steps_times) / len(self.train_steps_times) + avg_val_step_time = sum(self.val_steps_times) / len(self.val_steps_times) + + logging.info(f'Average-Training-Epoch-Time: {avg_train_time} [sec]') + logging.info(f'Average-Validation-Epoch-Time: {avg_val_time} [sec]') + logging.info(f'Average-Training-Step-Time: {avg_train_step_time} [sec]') + logging.info(f'Average-Validation-Step-Time: {avg_val_step_time} [sec]') diff --git a/examples/multimodal/generative/nerf/config/config.yaml b/examples/multimodal/generative/nerf/config/config.yaml new file mode 100644 index 000000000000..1adcbae72c26 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/config.yaml @@ -0,0 +1,52 @@ +defaults: + - model: dreamfusion + - _self_ + +name: DreamFusion +seed: 2023 +mode: fit # fit, validate, test, export-mesh + +# export-mesh options +mesh_fname: /results/mesh.obj # mesh file name when mode=export-mesh +mesh_resolution: 128 # Mesh resolution when mode=export-mesh + +# benchmark options +enable_benchmark: False +benchmark_callback: + _target_: benchmark_callback.BenchmarkCallback + log_every_n_steps: 1 + +trainer: + devices: 1 + num_nodes: 1 + precision: 16 + max_steps: 10000 # example configs: dreamfuions=10000, dmtet=5000 + accelerator: gpu + enable_checkpointing: False + logger: False + log_every_n_steps: 1 + val_check_interval: 100 + accumulate_grad_batches: 1 + benchmark: False + enable_model_summary: True + +exp_manager: + name: ${name} + exp_dir: /results + create_tensorboard_logger: False + create_wandb_logger: False + wandb_logger_kwargs: + project: dreamfusion + group: nemo-df + name: ${name} + resume: True + create_checkpoint_callback: True + checkpoint_callback_params: + every_n_epochs: 0 + every_n_train_steps: 1000 # TODO(ahmadki): being ignored ? + monitor: loss + filename: '${name}-{step}' + save_top_k: -1 + always_save_nemo: False + resume_if_exists: True + resume_ignore_no_checkpoint: True diff --git a/examples/multimodal/generative/nerf/config/model/background/random.yaml b/examples/multimodal/generative/nerf/config/model/background/random.yaml new file mode 100644 index 000000000000..9cfb09fc6eca --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/random.yaml @@ -0,0 +1,3 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.random_background.RandomBackground +base_background: [1, 1, 1] +random_ratio: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/background/static.yaml b/examples/multimodal/generative/nerf/config/model/background/static.yaml new file mode 100644 index 000000000000..eb82f9944991 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/static.yaml @@ -0,0 +1,2 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.static_background.StaticBackground +background: [0, 0, 1] # rgb diff --git a/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml new file mode 100644 index 000000000000..8daf7bcd8349 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml @@ -0,0 +1,19 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.tcnn_background.TCNNBackground +bound: 1 +encoder_num_input_dims: 3 # 3 directions +encoder_cfg: + otype: "HashGrid" + n_levels: 16 + n_features_per_level: 2 + log2_hashmap_size: 19 + base_resolution: 16 + interpolation: "Smoothstep" + per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) + +background_net_num_output_dims: 3 # rgb +background_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 32 + n_hidden_layers: 2 diff --git a/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml new file mode 100644 index 000000000000..b77778099e79 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml @@ -0,0 +1,11 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.torchngp_background.TorchNGPBackground + +encoder_type: "frequency" +encoder_input_dims: 3 +encoder_multi_res: 6 + +num_output_dims: 3 +net_cfg: + num_hidden_dims: 32 + num_layers: 2 + bias: True diff --git a/examples/multimodal/generative/nerf/config/model/data/data.yaml b/examples/multimodal/generative/nerf/config/model/data/data.yaml new file mode 100644 index 000000000000..0b5f88b9f1fb --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/data/data.yaml @@ -0,0 +1,41 @@ +_target_: data.AggregatorDataModule + +train_batch_size: 1 +train_shuffle: false +train_dataset: + _target_: nemo.collections.multimodal.data.nerf.random_poses.RandomPosesDataset + internal_batch_size: 100 + width: 64 + height: 64 + radius_range: [3.0, 3.5] + theta_range: [45, 105] + phi_range: [-180, 180] + fovx_range: [10, 30] + fovy_range: [10, 30] + jitter: False + jitter_center: 0.2 + jitter_target: 0.2 + jitter_up: 0.02 + uniform_sphere_rate: 0 + angle_overhead: 30 + angle_front: 60 + +val_batch_size: 1 +val_shuffle: false +val_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 5 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 + +test_batch_size: 1 +test_shuffle: false +test_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 100 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml new file mode 100644 index 000000000000..bfadd4f426b3 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml @@ -0,0 +1,40 @@ +_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion # TODO(ahmadki): dreamfusion-dmetet should have it's own class +defaults: + - nerf: torchngp + - background: torchngp + - material: basic_shading + - renderer: nvdiffrast + - guidance: sd_huggingface + - optim: adan + - loss: dmtet + - data: data + - _self_ + +### model options +resume_from_checkpoint: +prompt: 'a hamburger' +negative_prompt: '' +front_prompt: ', front view' +side_prompt: ', side view' +back_prompt: ', back view' +update_extra_interval: 16 +guidance_scale: 100 +export_video: False + +iters: ${trainer.max_steps} +# TODO(ahmadki): move to database +latent_iter_ratio: 0.0 +albedo_iter_ratio: 0 +min_ambient_ratio: 0.1 +textureless_ratio: 0.2 + +data: + train_dataset: + width: 512 + height: 512 + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml new file mode 100644 index 000000000000..a67393341b53 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml @@ -0,0 +1,40 @@ +_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion +defaults: + - nerf: torchngp + - background: static + - material: basic_shading + - renderer: torchngp_raymarching + - guidance: sd_huggingface + - optim: adan + - loss: dreamfusion + - data: data + - _self_ + +### model options +resume_from_checkpoint: +prompt: 'a hamburger' +negative_prompt: '' +front_prompt: ', front view' +side_prompt: ', side view' +back_prompt: ', back view' +update_extra_interval: 16 +guidance_scale: 100 +export_video: False + +iters: ${trainer.max_steps} +# TODO(ahmadki): move to database +latent_iter_ratio: 0.2 +albedo_iter_ratio: 0.0 +min_ambient_ratio: 0.1 +textureless_ratio: 0.2 + +data: + train_dataset: + width: 64 + height: 64 + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml new file mode 100644 index 000000000000..a8b7adca3c55 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml @@ -0,0 +1,4 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_huggingface_pipeline.StableDiffusion +precision: ${trainer.precision} +model_key: stabilityai/stable-diffusion-2-1-base +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml new file mode 100644 index 000000000000..fd4517ec1f7c --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml @@ -0,0 +1,4 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_nemo_pipeline.StableDiffusion +checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo +sampler_type: 'DDIM' +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml new file mode 100644 index 000000000000..45c1e2ac8fb5 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml @@ -0,0 +1,5 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_trt_pipeline.StableDiffusion +checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo +plan_dir: /sd_checkpoints/nemo-1.5/plan +sampler_type=: DDIM" +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml b/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml new file mode 100644 index 000000000000..188c1034fc27 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml @@ -0,0 +1,8 @@ +lambda_sds: 1.0 +lambda_opacity: 0.0 +lambda_entropy: 0.0 +lambda_orientation: 0.0 +lambda_2d_normal_smooth: 0.0 +lambda_3d_normal_smooth: 0.0 +lambda_mesh_normal: 0.5 +lambda_mesh_laplacian: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml new file mode 100644 index 000000000000..8cfd4b47eb51 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml @@ -0,0 +1,8 @@ +lambda_sds: 1.0 +lambda_opacity: 0.0 +lambda_entropy: 1e-3 +lambda_orientation: 1e-2 +lambda_2d_normal_smooth: 0.0 +lambda_3d_normal_smooth: 0.0 +lambda_mesh_normal: 0.0 +lambda_mesh_laplacian: 0.0 diff --git a/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml b/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml new file mode 100644 index 000000000000..802defad1637 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml @@ -0,0 +1 @@ +_target_: nemo.collections.multimodal.modules.nerf.materials.basic_shading.BasicShading diff --git a/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml new file mode 100644 index 000000000000..0bf5ed6c5e2f --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml @@ -0,0 +1,32 @@ +_target_: nemo.collections.multimodal.modules.nerf.geometry.tcnn_nerf.TCNNNerf +num_input_dims: 3 # 3D space +bound: 1 +density_activation: softplus # softplus, exp +blob_radius: 0.5 +blob_density: 10 +normal_type: central_finite_difference + +encoder_cfg: + otype: "HashGrid" + n_levels: 16 + n_features_per_level: 2 + log2_hashmap_size: 19 + base_resolution: 16 + interpolation: "Smoothstep" + per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) + +sigma_net_num_output_dims: 1 # density +sigma_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 64 + n_hidden_layers: 3 + +features_net_num_output_dims: 3 # rgb +features_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 64 + n_hidden_layers: 3 diff --git a/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml new file mode 100644 index 000000000000..48877dcfa871 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml @@ -0,0 +1,26 @@ +_target_: nemo.collections.multimodal.modules.nerf.geometry.torchngp_nerf.TorchNGPNerf +num_input_dims: 3 # 3D space +bound: 1 +density_activation: exp # softplus, exp +blob_radius: 0.2 +blob_density: 5 +normal_type: central_finite_difference + +encoder_cfg: + encoder_type: 'hashgrid' + encoder_max_level: + log2_hashmap_size: 19 + desired_resolution: 2048 + interpolation: smoothstep + +sigma_net_num_output_dims: 1 # density +sigma_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + +features_net_num_output_dims: 3 # rgb +features_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True diff --git a/examples/multimodal/generative/nerf/config/model/optim/adan.yaml b/examples/multimodal/generative/nerf/config/model/optim/adan.yaml new file mode 100644 index 000000000000..885c13fcca8a --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/optim/adan.yaml @@ -0,0 +1,6 @@ +name: adan +lr: 5e-3 +eps: 1e-8 +weight_decay: 2e-5 +max_grad_norm: 5.0 +foreach: False diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml new file mode 100644 index 000000000000..73f48a7a0ca9 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml @@ -0,0 +1,8 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.nerfacc_volume_renderer.NerfaccVolumeBaseRenderer +grid_resolution: 128 +grid_levels: 3 +bound: ${model.nerf.bound} +render_step_size: 1.e-3 +near_plane: 0.2 +cone_angle: 0.004 +alpha_thre: 1.e-2 diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml new file mode 100644 index 000000000000..fefc217f4aec --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml @@ -0,0 +1,6 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.nvdiffrast_renderer.NVDiffRastRenderer +bound: ${model.nerf.bound} +grid_resolution: 128 +density_thresh: 10.0 +update_interval: 16 +quartet_file: "/results/tets/128_tets.npz" diff --git a/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml b/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml new file mode 100644 index 000000000000..5075a5fbc85c --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml @@ -0,0 +1,7 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.torchngp_volume_renderer.TorchNGPVolumeRenderer +bound: ${model.nerf.bound} +update_interval: 16 +grid_resolution: 128 +density_thresh: 10 +max_steps: 1024 +dt_gamma: 0 diff --git a/examples/multimodal/generative/nerf/data.py b/examples/multimodal/generative/nerf/data.py new file mode 100644 index 000000000000..1126a816a97e --- /dev/null +++ b/examples/multimodal/generative/nerf/data.py @@ -0,0 +1,73 @@ +import pytorch_lightning as pl +from hydra.utils import instantiate +from omegaconf.omegaconf import DictConfig +from torch.utils.data import DataLoader + + +# TODO(ahmadki): multi-GPU needs more work, we currently don't shard data +# across GPUs, which is OK for trainnig, but needs fixing for validation and testing. +class AggregatorDataModule(pl.LightningDataModule): + def __init__( + self, + train_dataset: DictConfig = None, + train_batch_size: int = 1, + train_shuffle: bool = False, + val_dataset: DictConfig = None, + val_batch_size: int = 1, + val_shuffle: bool = False, + test_dataset: DictConfig = None, + test_batch_size: int = 1, + test_shuffle: bool = False, + ): + super().__init__() + + self.train_dataset = train_dataset + self.train_batch_size = train_batch_size + self.train_shuffle = train_shuffle + self.val_dataset = val_dataset + self.val_batch_size = val_batch_size + self.val_shuffle = val_shuffle + self.test_dataset = test_dataset + self.test_batch_size = test_batch_size + self.test_shuffle = test_shuffle + + # TODO(ahmadki): lazy init + # def setup(self, stage=None) -> None: + # if stage in [None, "fit"]: + # self.train_dataset = instantiate(self.train_dataset) + # if stage in [None, "fit", "validate"]: + # self.val_dataset = instantiate(self.val_dataset) + # if stage in [None, "test", "predict"]: + # self.test_dataset = instantiate(self.test_dataset) + + def train_dataloader(self) -> DataLoader: + loader = DataLoader( + self.train_dataset, + batch_size=self.train_batch_size, + collate_fn=self.train_dataset.collate_fn, + pin_memory=True, + num_workers=4, + ) + return loader + + def val_dataloader(self) -> DataLoader: + loader = DataLoader( + self.val_dataset, + batch_size=self.val_batch_size, + collate_fn=self.val_dataset.collate_fn, + shuffle=self.val_shuffle, + pin_memory=True, + num_workers=0, + ) + return loader + + def test_dataloader(self) -> DataLoader: + loader = DataLoader( + self.test_dataset, + batch_size=self.test_batch_size, + collate_fn=self.test_dataset.collate_fn, + shuffle=self.test_shuffle, + pin_memory=True, + num_workers=0, + ) + return loader diff --git a/examples/multimodal/generative/nerf/main.py b/examples/multimodal/generative/nerf/main.py new file mode 100644 index 000000000000..35b80052ef7f --- /dev/null +++ b/examples/multimodal/generative/nerf/main.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from hydra.utils import get_class, instantiate +from omegaconf.omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer, seed_everything + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path='config', config_name='config') +def main(cfg: DictConfig) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + seed_everything(cfg.seed, workers=True) + + mode = cfg.mode + logging.info(f"{mode=}") + + model = None + model_cls = get_class(cfg.model._target_) + if cfg.model.resume_from_checkpoint is None: + model = model_cls(cfg=cfg.model) + else: + logging.info(f"Loading model from checkpoint: {cfg.model.resume_from_checkpoint}") + model = model_cls.load_from_checkpoint(cfg.model.resume_from_checkpoint, strict=False, cfg=cfg.model) + + if mode == "export-mesh": + mesh = model.mesh(resolution=cfg.mesh_resolution) + mesh.export(cfg.mesh_fname) + return + + # Prepare callbacks + callbacks = [] + if cfg.enable_benchmark: + callbacks.append(instantiate(cfg.benchmark_callback)) + + # Setup trainer + trainer = Trainer(callbacks=callbacks, **cfg.trainer) + exp_manager(trainer, cfg.exp_manager) + + # Setup datamodule + dm = instantiate(cfg.model.data) + + if mode == "fit": + trainer.fit(model, datamodule=dm) + elif mode == "validate": + trainer.validate(model, datamodule=dm) + elif mode == "test": + trainer.test(model, datamodule=dm) + else: + raise ValueError(f"Invalid mode: {mode}") + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/data/nerf/__init__.py b/nemo/collections/multimodal/data/nerf/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/nerf/cameras.py b/nemo/collections/multimodal/data/nerf/cameras.py new file mode 100644 index 000000000000..c1496b7eeaa3 --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/cameras.py @@ -0,0 +1,178 @@ +from abc import ABC, abstractmethod +from typing import List + +import numpy as np +import torch + + +class Camera(ABC): + """ + Abstract base class for Camera models. + """ + + def __init__(self, width: int, height: int, device: torch.device = 'cuda') -> None: + """ + Initializes the Camera instance with given dimensions and device. + + Parameters: + width: int - Width of the camera frame. + height: int - Height of the camera frame. + device: torch.device - The device where tensor computations will be performed. + """ + self.width = width + self.height = height + self.device = device + + @abstractmethod + def compute_intrinsics(self) -> None: + """ + Abstract method to compute camera intrinsics. + """ + pass + + @abstractmethod + def compute_projection_matrix(self) -> None: + """ + Abstract method to compute the projection matrix. + """ + pass + + +class OrthographicCamera(Camera): + """ + Class for Orthographic Camera models. + """ + + def compute_projection_matrix(self) -> torch.Tensor: + """ + Computes the projection matrix for an Orthographic camera. + + Returns: + torch.Tensor: The projection matrix. + """ + projection = torch.tensor( + [[2 / self.width, 0, 0, 0], [0, -2 / self.height, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + dtype=torch.float32, + device=self.device, + ).unsqueeze(0) + return projection + + +class PinholeCamera(Camera): + """ + Class for Pinhole Camera models. + """ + + def __init__(self, width: int, height: int, near: float, far: float, device: torch.device = 'cuda') -> None: + """ + Initializes the Pinhole Camera instance with given parameters. + + Parameters: + width: int - Width of the camera frame. + height: int - Height of the camera frame. + near: float - Near clipping plane. + far: float - Far clipping plane. + device: torch.device - The device where tensor computations will be performed. + """ + super().__init__(width, height, device) + self.near = near + self.far = far + + def compute_intrinsics(self, fovx: float, fovy: float) -> np.ndarray: + """ + Computes the intrinsic matrix for the camera based on field of views. + + Parameters: + fovx: float - Field of view in X direction. + fovy: float - Field of view in Y direction. + + Returns: + np.ndarray: The intrinsic matrix. + """ + focal_x = self.width / (2 * np.tan(np.deg2rad(fovx) / 2)) + focal_y = self.height / (2 * np.tan(np.deg2rad(fovy) / 2)) + cx, cy = self.width / 2, self.height / 2 + return np.array([focal_x, focal_y, cx, cy]) + + def compute_projection_matrix(self, focal_x: float, focal_y: float) -> torch.Tensor: + """ + Computes the projection matrix for the camera. + + Parameters: + focal_x: float - Focal length in X direction. + focal_y: float - Focal length in Y direction. + + Returns: + torch.Tensor: The projection matrix. + """ + projection = torch.tensor( + [ + [2 * focal_x / self.width, 0, 0, 0], + [0, -2 * focal_y / self.height, 0, 0], + [ + 0, + 0, + -(self.far + self.near) / (self.far - self.near), + -(2 * self.far * self.near) / (self.far - self.near), + ], + [0, 0, -1, 0], + ], + dtype=torch.float32, + device=self.device, + ).unsqueeze(0) + return projection + + +class CubeCamera(Camera): + """ + Class for Cube Camera models, which is essentially six pinhole cameras. + """ + + def __init__( + self, width: int, height: int, near: float = 0.01, far: float = 1000, device: torch.device = 'cuda' + ) -> None: + """ + Initializes the Cube Camera instance with given parameters. + + Parameters: + width: int - Width of each camera face. + height: int - Height of each camera face. + near: float - Near clipping plane. + far: float - Far clipping plane. + device: torch.device - The device where tensor computations will be performed. + """ + self.width = width + self.height = height + self.near = near + self.far = far + self.device = device + + def compute_intrinsics(self) -> List[np.ndarray]: + """ + Computes the intrinsic matrices for the six faces of the cube using a Pinhole camera model. + + Returns: + List[np.ndarray]: List of 6 intrinsic matrices, one for each face. + """ + # Similar to Pinhole but repeated six times for six faces of the cube + return [ + PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ).compute_intrinsics(90, 90) + for _ in range(6) + ] + + def compute_projection_matrix(self) -> List[torch.Tensor]: + """ + Computes the projection matrices for the six faces of the cube using a Pinhole camera model. + + Returns: + List[torch.Tensor]: List of 6 projection matrices, one for each face. + """ + # Similar to Pinhole but repeated six times for six faces of the cube + return [ + PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ).compute_projection_matrix(1, 1) + for _ in range(6) + ] diff --git a/nemo/collections/multimodal/data/nerf/circle_poses.py b/nemo/collections/multimodal/data/nerf/circle_poses.py new file mode 100644 index 000000000000..9c833ad94bab --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/circle_poses.py @@ -0,0 +1,214 @@ +from typing import Dict, Union + +import numpy as np +import torch +from torch.utils.data import Dataset + +from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera +from nemo.collections.multimodal.data.nerf.utils import ( + compute_look_at_vectors, + construct_poses, + get_rays, + get_view_direction, +) + + +def circle_poses( + radius: torch.Tensor = torch.tensor([3.2]), + theta: torch.Tensor = torch.tensor([60]), + phi: torch.Tensor = torch.tensor([0]), + angle_overhead: float = 30, + angle_front: float = 60, + return_dirs: bool = False, + device: torch.device = "cuda", +) -> torch.Tensor: + """ + Generate camera poses based on a circular arrangement. + + Parameters: + radius: torch.Tensor - Radii for the camera positions. + theta: torch.Tensor - Theta angles for the camera positions. + phi: torch.Tensor - Phi angles for the camera positions. + angle_overhead: float - Angle range of the overhead view. + angle_front: float - Angle range of the front view. + return_dirs: bool - Whether to return the view directions. + device: str - The device to allocate the tensor on (e.g., 'cuda' or 'cpu'). + + Returns: + Tuple: Contains the following: + - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. + - dirs (torch.Tensor, optional): View directions, if requested. + """ + # Convert degrees to radians for theta and phi + theta = theta / 180 * np.pi + phi = phi / 180 * np.pi + angle_overhead = angle_overhead / 180 * np.pi + angle_front = angle_front / 180 * np.pi + + # Calculate camera centers in Cartesian coordinates + centers = torch.stack( + [ + radius * torch.sin(theta) * torch.sin(phi), + radius * torch.cos(theta), + radius * torch.sin(theta) * torch.cos(phi), + ], + dim=-1, + ) # [B, 3] + + # Compute camera look-at matrix + forward_vector, up_vector, right_vector = compute_look_at_vectors(centers=centers, device=device) + + # Construct the 4x4 pose matrices + poses = construct_poses( + centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device + ) + + dirs = get_view_direction(theta, phi, angle_overhead, angle_front) if return_dirs else None + + return poses, dirs + + +class CirclePosesDataset(Dataset): + """ + A dataset class to generate circle poses. + """ + + def __init__( + self, + size: int = 100, + height: int = 256, + width: int = 256, + default_fovx: float = 20.0, + default_fovy: float = 20.0, + default_radius: float = 3.2, + default_polar: float = 90.0, + default_azimuth: float = 0.0, + angle_overhead: float = 30.0, + angle_front: float = 60.0, + near: float = 0.01, + far: float = 1000.0, + device: torch.device = 'cpu', + ) -> None: + """ + Initializes a new CirclePosesDataset instance. + + Parameters: + size (int): Number of samples in the dataset. + height (int): Height of the image. + width (int): Width of the image. + default_fovx (float): Default field of view in x-direction. + default_fovy (float): Default field of view in y-direction. + default_radius (float): Default radius of the circle. + default_polar (float): Default polar angle. + default_azimuth (float): Default azimuth angle. + angle_overhead (float): Overhead angle. + angle_front (float): Frontal angle. + near (float): Near clipping distance. + far (float): Far clipping distance. + device (torch.device): Device to generate data on. + """ + super().__init__() + self.size = size + self.height = height + self.width = width + + self.default_fovx = default_fovx + self.default_fovy = default_fovy + self.default_radius = default_radius + self.default_polar = default_polar + self.default_azimuth = default_azimuth + + self.angle_overhead = angle_overhead + self.angle_front = angle_front + self.near = near + self.far = far + + self.device = device + + # TODO(ahmadki): make camera type a parameter + self.camera = PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ) + + def __len__(self) -> int: + """Returns the number of samples in the dataset.""" + return self.size + + def __getitem__(self, idx: int) -> Dict[str, Union[int, torch.Tensor]]: + """Get an item from the dataset. + + Args: + idx (int): Index of the item to retrieve. + + Returns: + dict: Data dictionary containing the following: + - height (int): Height of the image. + - width (int): Width of the image. + - rays_o (torch.Tensor): Ray origins, shape [height, width, 3]. + - rays_d (torch.Tensor): Ray directions, shape [height, width, 3]. + - dir (torch.Tensor): View direction, shape [3]. + - mvp (torch.Tensor): Model-view-projection matrix, shape [4, 4]. + - azimuth (torch.Tensor): Azimuth angle, shape [1]. + """ + # Initialize circle pose parameters + thetas = torch.FloatTensor([self.default_polar]).to(self.device) + phis = torch.FloatTensor([(idx / self.size) * 360]).to(self.device) + radius = torch.FloatTensor([self.default_radius]).to(self.device) + + # Generate circle poses and directions + poses, dirs = circle_poses( + radius=radius, + theta=thetas, + phi=phis, + angle_overhead=self.angle_overhead, + angle_front=self.angle_front, + return_dirs=True, + device=self.device, + ) + + # Compute camera intrinsics + intrinsics = self.camera.compute_intrinsics(fovx=self.default_fovx, fovy=self.default_fovy) + + # Compute projection matrix + projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) + mvp = projection @ torch.inverse(poses) # [1, 4, 4] + + # Sample rays + rays_o, rays_d = get_rays( + poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device + ) + + # Compute azimuth delta + delta_azimuth = phis - self.default_azimuth + delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] + + data = { + 'height': self.height, + 'width': self.width, + 'rays_o': rays_o, + 'rays_d': rays_d, + 'dir': dirs, + 'mvp': mvp, + 'azimuth': delta_azimuth, + } + + return data + + def collate_fn(self, batch: list) -> Dict[str, Union[int, torch.Tensor]]: + """Collate function to combine multiple data points into batches. + + Args: + batch (list): List of data dictionaries. + + Returns: + dict: Collated data. + """ + return { + 'height': self.height, + 'width': self.width, + 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), + 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), + 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), + 'dir': torch.cat([item['dir'] for item in batch], dim=0), + 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), + } diff --git a/nemo/collections/multimodal/data/nerf/random_poses.py b/nemo/collections/multimodal/data/nerf/random_poses.py new file mode 100644 index 000000000000..5dd7121b7063 --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/random_poses.py @@ -0,0 +1,436 @@ +import random +from typing import Any, Dict, Iterator, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import IterableDataset + +from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera +from nemo.collections.multimodal.data.nerf.utils import ( + compute_look_at_vectors, + construct_poses, + get_rays, + get_view_direction, +) + + +def linear_normalization(x: float, lower_bound: float, upper_bound: float) -> float: + """ + Linearly normalize a value between lower_bound and upper_bound to a value between 0 and 1. + + Parameters: + x: The value to normalize. + lower_bound: The lower bound of the range of x. + upper_bound: The upper bound of the range of x. + + Returns: + The normalized value between 0 and 1. + """ + return min(1, max(0, (x - lower_bound) / (upper_bound - lower_bound))) + + +def rand_poses( + size: int, + radius_range: List[float] = [1, 1.5], + theta_range: List[float] = [0, 120], + phi_range: List[float] = [0, 360], + angle_overhead: float = 30, + angle_front: float = 60, + uniform_sphere_rate: float = 0.5, + jitter: bool = False, + jitter_center: float = 0.2, + jitter_target: float = 0.2, + jitter_up: float = 0.02, + return_dirs: bool = False, + device: torch.device = "cuda", +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Generate random poses from an orbit camera. + + Args: + size (int): Number of poses to generate. + radius_range (List[float]): Min and max radii for camera [min, max]. + theta_range (List[float]): Elevation angle range in degrees [min, max]. + phi_range (List[float]): Azimuth angle range in degrees [min, max]. + angle_overhead (float): Overhead angle in degrees. + angle_front (float): Front angle in degrees. + uniform_sphere_rate (float): The probability of sampling from a uniform sphere. + jitter (bool): Whether to add noise to the poses. + jitter_center (float): Noise range for the camera center. + jitter_target (float): Noise range for the camera target. + jitter_up (float): Noise range for the camera up vector. + return_dirs (bool): Whether to return the view directions. + device (torch.device): The device on which to allocate tensors. + + Returns: + Tuple: Contains the following: + - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. + - thetas (torch.Tensor): Elevation angles in degrees, shape [size]. + - phis (torch.Tensor): Azimuth angles in degrees, shape [size]. + - radius (torch.Tensor): Radii of the camera orbits, shape [size]. + - dirs (torch.Tensor, optional): View directions, if requested. + """ + + # Convert angles from degrees to radians + theta_range = np.radians(theta_range) + phi_range = np.radians(phi_range) + angle_overhead = np.radians(angle_overhead) + angle_front = np.radians(angle_front) + + # Generate radius for each pose + radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0] + + # Generate camera center positions + if random.random() < uniform_sphere_rate: + centers, thetas, phis = sample_uniform_sphere(size=size, radius=radius, device=device) + else: + centers, thetas, phis = sample_orbit( + size=size, radius=radius, theta_range=theta_range, phi_range=phi_range, device=device + ) + + # Initialize targets to 0 (assuming 0 is a point in 3D space that cameras are looking at) + targets = torch.zeros_like(centers) + + # Apply jitter + if jitter: + centers += torch.rand_like(centers) * jitter_center - jitter_center / 2.0 + targets = torch.randn_like(centers) * jitter_target + + # Compute camera look-at matrix + forward_vector, up_vector, right_vector = compute_look_at_vectors( + centers=centers - targets, jitter_up=jitter_up if jitter else 0, device=device + ) + + # Construct the 4x4 pose matrices + poses = construct_poses( + centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device + ) + + # Optionally compute view directions + dirs = get_view_direction(thetas, phis, angle_overhead, angle_front) if return_dirs else None + + # Convert back to degrees for thetas and phis + thetas, phis = torch.rad2deg(thetas), torch.rad2deg(phis) + + return poses, thetas, phis, radius, dirs + + +def sample_uniform_sphere( + size: int, radius: torch.Tensor, device: torch.device +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Sample points uniformly on a sphere. + + Args: + size (int): Number of points to sample. + device (torch.device): Device to allocate tensors on. + radius (torch.Tensor): Radii for the points. + + Returns: + Tuple: Contains the following: + - centers (torch.Tensor): The Cartesian coordinates of the sampled points. + - thetas (torch.Tensor): Elevation angles in radians. + - phis (torch.Tensor): Azimuth angles in radians. + """ + # Generate unit vectors + unit_centers = F.normalize( + torch.stack( + [ + torch.randn(size, device=device), + torch.abs(torch.randn(size, device=device)), + torch.randn(size, device=device), + ], + dim=-1, + ), + p=2, + dim=1, + ) + # Generate radii and scale unit vectors + centers = unit_centers * radius.unsqueeze(-1) + # Calculate spherical coordinates + thetas = torch.acos(unit_centers[:, 1]) + phis = torch.atan2(unit_centers[:, 0], unit_centers[:, 2]) + phis[phis < 0] += 2 * np.pi + + return centers, thetas, phis + + +def sample_orbit( + size: int, radius: torch.Tensor, theta_range: np.ndarray, phi_range: np.ndarray, device: torch.device = "cuda" +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Sample points on a spherical orbit. + + Args: + size (int): Number of points to sample. + radius (torch.Tensor): Radii for the points. + theta_range (np.ndarray): Elevation angle range in radians [min, max]. + phi_range (np.ndarray): Azimuth angle range in radians [min, max]. + device (torch.device): Device to allocate tensors on. + + Returns: + Tuple: Contains the following: + - centers (torch.Tensor): The Cartesian coordinates of the sampled points. + - thetas (torch.Tensor): Elevation angles in radians. + - phis (torch.Tensor): Azimuth angles in radians. + """ + thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0] + phis = torch.rand(size, device=device) * (phi_range[1] - phi_range[0]) + phi_range[0] + phis[phis < 0] += 2 * np.pi + + x = radius * torch.sin(thetas) * torch.sin(phis) + y = radius * torch.cos(thetas) + z = radius * torch.sin(thetas) * torch.cos(phis) + + centers = torch.stack([x, y, z], dim=-1) + + return centers, thetas, phis + + +class RandomPosesDataset(IterableDataset): + """ + A dataset class to generate random poses. + """ + + def __init__( + self, + internal_batch_size: int = 100, + height: int = 256, + width: int = 256, + radius_range: Tuple[float, float] = [3.0, 3.5], + theta_range: Tuple[float, float] = [45.0, 105.0], + phi_range: Tuple[float, float] = [-180.0, 180.0], + fovx_range: Tuple[float, float] = [10.0, 30.0], + default_fovx: float = 20.0, + fovy_range: Tuple[float, float] = [10.0, 30.0], + default_fovy: float = 20.0, + default_radius: float = 3.2, + default_polar: float = 90.0, + default_azimuth: float = 0.0, + jitter: bool = False, + jitter_center: float = 0.2, + jitter_target: float = 0.2, + jitter_up: float = 0.02, + angle_overhead: float = 30.0, + angle_front: float = 60.0, + uniform_sphere_rate: float = 0.0, + near: float = 0.01, + far: float = 1000.0, + device: torch.device = 'cpu', + ) -> None: + """ + Initializes a new RandomPosesDataset instance. + + Parameters: + internal_batch_size (int): Number of samples to pre-generate internally. + height (int): Height of the image. + width (int): Width of the image. + radius_range (Tuple[float, float]): Range of generated radii. + theta_range (Tuple[float, float]): Range of generated theta angles. + phi_range (Tuple[float, float]): Range of generated phi angles. + fovx_range (Tuple[float, float]): Range of generated field of view in x-direction. + default_fovx (float): Default field of view in x-direction. + fovy_range (Tuple[float, float]): Range of generated field of view angles in y-direction. + default_fovy (float): Default field of view in y-direction. + default_radius (float): Default radius of the circle. + default_polar (float): Default polar angle. + default_azimuth (float): Default azimuth angle. + jitter (bool): Whether to jitter the poses. + jitter_center (float): Jittering center range. + jitter_target (float): Jittering target range. + jitter_up (float): Jittering up range. + angle_overhead (float): Overhead angle. + angle_front (float): Frontal angle. + uniform_sphere_rate (float): Rate of sampling uniformly on a sphere. + near (float): Near clipping distance. + far (float): Far clipping distance. + device (torch.device): Device to generate data on. + """ + + super().__init__() + self.height = height + self.width = width + self.internal_batch_size = internal_batch_size + + # TODO(ahmadki): expose for models other than dreamfusion + self.progressive_view = False + self.progressive_view_start_step = 0 + self.progressive_view_end_step = 500 + + self.default_fovx = default_fovx + self.default_fovy = default_fovy + self.default_radius = default_radius + self.default_polar = default_polar + self.default_azimuth = default_azimuth + self.same_fov_random = True + + self.radius_range = radius_range + self.theta_range = theta_range + self.phi_range = phi_range + self.fovx_range = fovx_range + self.fovy_range = fovy_range + + self.current_radius_range = radius_range + self.current_theta_range = theta_range + self.current_phi_range = phi_range + self.current_fovx_range = fovx_range + self.current_fovy_range = fovy_range + + self.angle_overhead = angle_overhead + self.angle_front = angle_front + self.uniform_sphere_rate = uniform_sphere_rate + self.jitter = jitter + self.jitter_center = jitter_center + self.jitter_target = jitter_target + self.jitter_up = jitter_up + + self.near = near + self.far = far + + self.device = device + + # TODO(ahmadki): make camera type a parameter + self.camera = PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ) + + def update_step(self, epoch: int, global_step: int) -> None: + """ + Update the dataset at the beginning of each epoch. + + Parameters: + epoch (int): Current epoch. + global_step (int): Current global step. + + """ + if self.progressive_view: + self.progressive_view_update_step(global_step=global_step) + + def progressive_view_update_step(self, global_step: int) -> None: + """ + progressively relaxing view range + + Parameters: + global_step (int): Current global step. + """ + # TODO(ahmadki): support non-linear progressive_views + r = linear_normalization( + x=global_step, lower_bound=self.progressive_view_start_step, upper_bound=self.progressive_view_end_step + ) + self.current_phi_range = [ + (1 - r) * self.default_azimuth + r * self.phi_range[0], + (1 - r) * self.default_azimuth + r * self.phi_range[1], + ] + self.current_theta_range = [ + (1 - r) * self.default_polar + r * self.theta_range[0], + (1 - r) * self.default_polar + r * self.theta_range[1], + ] + self.current_radius_range = [ + (1 - r) * self.default_radius + r * self.radius_range[0], + (1 - r) * self.default_radius + r * self.radius_range[1], + ] + self.current_fovy_range = [ + (1 - r) * self.default_fovy + r * self.fovy_range[0], + (1 - r) * self.default_fovy + r * self.fovy_range[1], + ] + + def __iter__(self) -> Iterator[Dict[str, torch.Tensor]]: + """ + Returns an iterator over the dataset. + + Returns: + Iterator: An iterator over the dataset. + + """ + while True: + # Generate samples + rays_o, rays_d, dirs, mvp, delta_azimuth = self.generate_samples() + for i in range(self.internal_batch_size): + # Yield one sample at a time from the internal batch + yield { + 'height': self.height, + 'width': self.width, + 'rays_o': rays_o[i].unsqueeze(0), + 'rays_d': rays_d[i].unsqueeze(0), + 'dir': dirs[i].unsqueeze(0), + 'mvp': mvp[i].unsqueeze(0), + 'azimuth': delta_azimuth[i].unsqueeze(0), + } + + def generate_samples(self): + """ + Generate a batch of random poses. + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + A tuple containing: + - rays (Dict[str, torch.Tensor]): A dictionary containing the origin and direction of the rays. + - dirs (torch.Tensor): A tensor containing the directions of the rays. + - mvp (torch.Tensor): A tensor containing the model-view-projection matrix. + - azimuth (torch.Tensor): A A tensor containing the azimuth angle. + """ + # Generate random poses and directions + poses, dirs, thetas, phis, radius = rand_poses( + size=self.internal_batch_size, + radius_range=self.current_radius_range, + theta_range=self.current_theta_range, + phi_range=self.current_phi_range, + angle_overhead=self.angle_overhead, + angle_front=self.angle_front, + uniform_sphere_rate=self.uniform_sphere_rate, + jitter=self.jitter, + jitter_center=self.jitter_center, + jitter_target=self.jitter_target, + jitter_up=self.jitter_up, + return_dirs=True, + device=self.device, + ) + + # random focal + if self.same_fov_random: + fovx_random = random.random() + fovy_random = fovx_random + else: + fovx_random = random.random() + fovy_random = random.random() + fovx = fovx_random * (self.current_fovx_range[1] - self.current_fovx_range[0]) + self.current_fovx_range[0] + fovy = fovy_random * (self.current_fovy_range[1] - self.current_fovy_range[0]) + self.current_fovy_range[0] + + # Compute camera intrinsics + intrinsics = self.camera.compute_intrinsics(fovx=fovx, fovy=fovy) + + # Compute projection matrix + projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) + mvp = projection @ torch.inverse(poses) # [internal batch size, 4, 4] + + # Sample rays + rays_o, rays_d = get_rays( + poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device + ) + + # Compute azimuth delta + delta_azimuth = phis - self.default_azimuth + delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] + + return rays_o, rays_d, dirs, mvp, delta_azimuth + + def collate_fn(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Collate function to bundle multiple samples into a single batch. + + Args: + batch (List[Dict]): List of samples to collate. + + Returns: + Dict: A dictionary containing the collated batch. + """ + return { + 'height': self.height, + 'width': self.width, + 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), + 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), + 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), + 'dir': torch.cat([item['dir'] for item in batch], dim=0), + 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), + } diff --git a/nemo/collections/multimodal/data/nerf/utils.py b/nemo/collections/multimodal/data/nerf/utils.py new file mode 100644 index 000000000000..117d0de701fb --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/utils.py @@ -0,0 +1,204 @@ +from typing import Dict, Optional + +import numpy as np +import torch +import torch.nn.functional as F + + +def get_view_direction(thetas: torch.Tensor, phis: torch.Tensor, overhead: float, front: float) -> torch.Tensor: + """ + Get the view direction based on given theta and phi values. + + Parameters: + - thetas (torch.Tensor): Array of theta values with shape [B,] + - phis (torch.Tensor): Array of phi values with shape [B,] + - overhead (float): Threshold for determining top and bottom views. + - front (float): Threshold for determining front, back and side views. + + Returns: + - torch.Tensor: Array of view directions. Values can be: + 0: front + 1: side (camera left) + 2: back + 3: side (camera right) + 4: top + 5: bottom + + Notes: + - Phi and theta values are assumed to be in radians. + """ + + num_samples = thetas.shape[0] + res = torch.zeros(num_samples, dtype=torch.long) + + # Normalize phis values to [0, 2*pi] + phis = phis % (2 * np.pi) + + # Determine direction based on phis + res[(phis < front / 2) | (phis >= 2 * np.pi - front / 2)] = 0 + res[(phis >= front / 2) & (phis < np.pi - front / 2)] = 1 + res[(phis >= np.pi - front / 2) & (phis < np.pi + front / 2)] = 2 + res[(phis >= np.pi + front / 2) & (phis < 2 * np.pi - front / 2)] = 3 + + # Override directions based on thetas for top and bottom views + res[thetas <= overhead] = 4 + res[thetas >= (np.pi - overhead)] = 5 + + return res + + +def compute_look_at_vectors(centers: torch.Tensor, jitter_up: Optional[float] = None, device: torch.device = "cuda"): + """ + Compute the look-at vectors for camera poses. + + Parameters: + centers: The centers of the cameras. + jitter_up: The noise range for the up vector of the camera. + device: Device to allocate the output tensor. + + Returns: + Tuple: Contains the following: + - forward_vector: The forward vectors of the cameras, shape [B, 3]. + - up_vector: The up vectors of the cameras, shape [B, 3]. + - right_vector: The right vectors of the cameras, shape [B, 3]. + """ + forward_vector = F.normalize(centers) + up_vector = torch.FloatTensor([0, 1, 0]).to(device).unsqueeze(0).repeat(len(centers), 1) + right_vector = F.normalize(torch.cross(forward_vector, up_vector, dim=-1)) + up_noise = torch.randn_like(up_vector) * jitter_up if jitter_up is not None else 0 + up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1)) + up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1) + up_noise) + + return forward_vector, up_vector, right_vector + + +def construct_poses( + centers: torch.Tensor, + right_vector: torch.Tensor, + up_vector: torch.Tensor, + forward_vector: torch.Tensor, + device: torch.device, +) -> torch.Tensor: + """ + Construct the 4x4 pose matrices. + + Args: + size (int): Number of pose matrices to construct. + centers (torch.Tensor): The Cartesian coordinates of the camera centers. + right_vector (torch.Tensor): The right vectors of the cameras. + up_vector (torch.Tensor): The up vectors of the cameras. + forward_vector (torch.Tensor): The forward vectors of the cameras. + device (torch.device): Device to allocate tensors on. + + Returns: + torch.Tensor: The pose matrices, shape [size, 4, 4]. + """ + poses = torch.eye(4, dtype=torch.float32, device=device).unsqueeze(0).repeat(len(centers), 1, 1) + poses[:, :3, :3] = torch.stack([right_vector, up_vector, forward_vector], dim=-1) + poses[:, :3, 3] = centers + + return poses + + +@torch.cuda.amp.autocast(enabled=False) +def get_rays( + poses: torch.Tensor, + intrinsics: torch.Tensor, + height: int, + width: int, + num_samples: Optional[int] = None, + error_map: Optional[torch.Tensor] = None, + device: torch.device = "cuda", +) -> Dict[str, torch.Tensor]: + """ + Generates rays from camera poses and intrinsics. + + Args: + poses (torch.Tensor): Camera poses, shape [B, 4, 4] (cam2world). + intrinsics (torch.Tensor): Intrinsic camera parameters [fx, fy, cx, cy]. + height (int): Height of the image. + width (int): Width of the image. + num_samples: Number of rays to sample, default is None for all rays. + error_map: Optional tensor to use for non-uniform sampling of rays. + device (torch.device): Device on which to generate the rays. + + Returns: + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - 'rays_o': Origin of the rays, shape [B, N, 3] + - 'rays_d': Directions of the rays, shape [B, N, 3] + - 'inds': Indices of the rays, shape [B, N] (if N > 0) + - 'inds_coarse': Coarse indices of the rays, shape [B, N] (if error_map is not None) + """ + + batch_size = poses.shape[0] + fx, fy, cx, cy = intrinsics + + i, j = torch.meshgrid( + torch.linspace(0, width - 1, width, device=device), + torch.linspace(0, height - 1, height, device=device), + indexing='ij', + ) + i = i.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 + j = j.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 + + results = {} + + if num_samples is not None: + num_samples = min(num_samples, height * width) + + if error_map is None: + sampled_indices = torch.randint(0, height * width, size=[num_samples], device=device) + sampled_indices = sampled_indices.expand([batch_size, num_samples]) + else: + sampled_indices, sampled_indices_coarse = non_uniform_sampling( + error_map=error_map, num_samples=num_samples, height=height, width=width, device=device + ) + results['sampled_indices_coarse'] = sampled_indices_coarse + + i = torch.gather(i, -1, sampled_indices) + j = torch.gather(j, -1, sampled_indices) + results['sampled_indices'] = sampled_indices + else: + sampled_indices = torch.arange(height * width, device=device).expand([batch_size, height * width]) + + zs = torch.full_like(i, -1.0) + xs = -(i - cx) / fx * zs + ys = (j - cy) / fy * zs + directions = torch.stack((xs, ys, zs), dim=-1) + + rays_d = directions @ poses[:, :3, :3].transpose(-1, -2) + rays_o = poses[..., :3, 3].unsqueeze(-2).expand_as(rays_d) + + rays_o = rays_o.view(-1, height, width, 3) + rays_d = rays_d.view(-1, height, width, 3) + + return rays_o, rays_d + + +def non_uniform_sampling( + error_map: torch.Tensor, batch_size: int, num_samples: int, height: int, width: int, device: torch.device = "cuda" +) -> torch.Tensor: + """ + Perform non-uniform sampling based on the provided error_map. + + Parameters: + error_map: The error map for non-uniform sampling. + batch_size (int): Batch size of the generated samples. + num_samples (int): Number of samples to pick. + height (int): Height of the image. + width (int): Width of the image. + device: Device on which tensors are stored. + + Returns: + A tensor containing the sampled indices. + """ + + sampled_indices_coarse = torch.multinomial(error_map.to(device), num_samples, replacement=False) + inds_x, inds_y = sampled_indices_coarse // 128, sampled_indices_coarse % 128 + sx, sy = height / 128, width / 128 + + inds_x = (inds_x * sx + torch.rand(batch_size, num_samples, device=device) * sx).long().clamp(max=height - 1) + inds_y = (inds_y * sy + torch.rand(batch_size, num_samples, device=device) * sy).long().clamp(max=width - 1) + sampled_indices = inds_x * width + inds_y + + return sampled_indices, sampled_indices_coarse diff --git a/nemo/collections/multimodal/models/nerf/base.py b/nemo/collections/multimodal/models/nerf/base.py new file mode 100644 index 000000000000..d1908080e90c --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/base.py @@ -0,0 +1,24 @@ +import pytorch_lightning as pl + +from nemo.core.classes.common import Serialization +from nemo.core.classes.modelPT import ModelPT + + +class NerfModelBase(ModelPT, Serialization): + def __init__(self, cfg): + super().__init__(cfg=cfg) + self.save_hyperparameters() + self._cfg = cfg + + @staticmethod + def is_module_updatable(module): + return hasattr(module, 'update_step') and callable(module.update_step) + + def list_available_models(self): + pass + + def setup_training_data(self): + pass + + def setup_validation_data(self): + pass diff --git a/nemo/collections/multimodal/models/nerf/dreamfusion.py b/nemo/collections/multimodal/models/nerf/dreamfusion.py new file mode 100644 index 000000000000..6b7784b002ff --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/dreamfusion.py @@ -0,0 +1,313 @@ +import logging +import os +import random + +import cv2 +import imageio +import numpy as np +import torch + +from nemo.collections.multimodal.models.nerf.txt2nerf_base import Txt2NerfBase +from nemo.collections.multimodal.modules.nerf.loss.laplacian_smooth_loss import LaplacianSmoothLoss +from nemo.collections.multimodal.modules.nerf.loss.normal_consistency_loss import NormalConsistencyLoss +from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum +from nemo.core import optim + + +# TODO(ahmadki): split dmtet from dreamfusion +class DreamFusion(Txt2NerfBase): + def __init__(self, cfg): + super(DreamFusion, self).__init__(cfg) + + self.guidance_scale = cfg.guidance_scale + + self.iters = cfg.iters + self.latent_iter_ratio = cfg.latent_iter_ratio + self.albedo_iter_ratio = cfg.albedo_iter_ratio + self.min_ambient_ratio = cfg.min_ambient_ratio + self.textureless_ratio = cfg.textureless_ratio + + # Lambdas + self.lambda_sds = cfg.loss.lambda_sds + self.lambda_opacity = cfg.loss.lambda_opacity + self.lambda_entropy = cfg.loss.lambda_entropy + self.lambda_orientation = cfg.loss.lambda_orientation + self.lambda_2d_normal_smooth = cfg.loss.lambda_2d_normal_smooth + self.lambda_3d_normal_smooth = cfg.loss.lambda_3d_normal_smooth + self.lambda_mesh_normal = cfg.loss.lambda_mesh_normal + self.lambda_mesh_laplacian = cfg.loss.lambda_mesh_laplacian + + if self.lambda_mesh_normal > 0: + self.normal_consistency_loss_fn = NormalConsistencyLoss() + if self.lambda_mesh_laplacian > 0: + self.laplacian_smooth_loss_fn = LaplacianSmoothLoss() + + # Video + self.test_images = [] + self.test_depths = [] + + def training_step(self, batch, batch_idx): + # experiment iterations ratio + # i.e. what proportion of this experiment have we completed (in terms of iterations) so far? + exp_iter_ratio = self.global_step / self.iters + + # TODO(ahmadki): move to database + if exp_iter_ratio < self.latent_iter_ratio: + ambient_ratio = 1.0 + shading_type = ShadingEnum.NORMAL + as_latent = True + else: + if exp_iter_ratio <= self.albedo_iter_ratio: + ambient_ratio = 1.0 + shading_type = None + else: + # random shading + ambient_ratio = self.min_ambient_ratio + (1.0 - self.min_ambient_ratio) * random.random() + rand = random.random() + if rand >= (1.0 - self.textureless_ratio): + shading_type = ShadingEnum.TEXTURELESS + else: + shading_type = ShadingEnum.LAMBERTIAN + + as_latent = False + + return_normal_image = bool(self.lambda_2d_normal_smooth) + return_normal_perturb = bool(self.lambda_3d_normal_smooth) + return_vertices = bool(self.lambda_mesh_laplacian) + return_faces = bool(self.lambda_mesh_normal) or bool(self.lambda_mesh_laplacian) + return_faces_normals = bool(self.lambda_mesh_normal) + outputs = self( + rays_o=batch['rays_o'], # [B, H, W, 3] + rays_d=batch['rays_d'], # [B, H, W, 3] + mvp=batch['mvp'], # [B, 4, 4] + perturb=True, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + binarize=False, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + ) + + if as_latent: + pred_rgb = ( + torch.cat([outputs['image'], outputs['opacity']], dim=-1).permute(0, 3, 1, 2).contiguous() + ) # [B, 4, H, W] + else: + pred_rgb = outputs['image'].permute(0, 3, 1, 2).contiguous() # [B, 3, H, W] + + # TODO(ahmadki): move into guidance + azimuth = batch['azimuth'] + text_z = [self.text_z['uncond']] * azimuth.shape[0] + for b in range(azimuth.shape[0]): + if azimuth[b] >= -90 and azimuth[b] < 90: + if azimuth[b] >= 0: + r = 1 - azimuth[b] / 90 + else: + r = 1 + azimuth[b] / 90 + start_z = self.text_z['front'] + end_z = self.text_z['side'] + else: + if azimuth[b] >= 0: + r = 1 - (azimuth[b] - 90) / 90 + else: + r = 1 + (azimuth[b] + 90) / 90 + start_z = self.text_z['side'] + end_z = self.text_z['back'] + pos_z = r * start_z + (1 - r) * end_z + text_z.append(pos_z) + text_z = torch.cat(text_z, dim=0) + + loss_dict = {} + + # SDS loss + guidance_loss = self.guidance.train_step( + text_z, pred_rgb, as_latent=as_latent, guidance_scale=self.guidance_scale + ) + loss_dict['lambda_sds'] = guidance_loss * self.lambda_sds + + # opacity loss + if self.lambda_opacity > 0 and 'opacity' in outputs: + loss_opacity = (outputs['opacity'] ** 2).mean() + loss_dict['loss_opacity'] = self.lambda_opacity * loss_opacity + + # entropy loss + if self.lambda_entropy > 0 and 'weights' in outputs: + alphas = outputs['weights'].clamp(1e-5, 1 - 1e-5) + loss_entropy = (-alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean() + lambda_entropy = self.lambda_entropy * min(1, 2 * self.global_step / self.iters) + loss_dict['loss_entropy'] = lambda_entropy * loss_entropy + + if self.lambda_2d_normal_smooth > 0 and 'normal_image' in outputs: + pred_normal = outputs['normal_image'] + loss_smooth = (pred_normal[:, 1:, :, :] - pred_normal[:, :-1, :, :]).square().mean() + ( + pred_normal[:, :, 1:, :] - pred_normal[:, :, :-1, :] + ).square().mean() + loss_dict['loss_smooth'] = self.lambda_2d_normal_smooth * loss_smooth + + # orientation loss + if self.lambda_orientation > 0 and all(key in outputs for key in ['weights', 'normals', 'dirs']): + loss_orientation = ( + outputs['weights'].detach() * (outputs['normals'] * outputs['dirs']).sum(-1).clamp(min=0) ** 2 + ) + loss_orientation = loss_orientation.mean() + loss_dict['loss_orientation'] = self.lambda_orientation * loss_orientation + + if self.lambda_3d_normal_smooth > 0 and all(key in outputs for key in ['normals', 'normal_perturb']): + loss_normal_perturb = (outputs['normal_perturb'] - outputs['normals']).abs().mean() + loss_dict['loss_normal_smooth'] = self.lambda_3d_normal_smooth * loss_normal_perturb + + if self.lambda_mesh_normal > 0 and all(key in outputs for key in ['face_normals', 'faces']): + normal_consistency_loss = self.normal_consistency_loss_fn( + face_normals=outputs['face_normals'], t_pos_idx=outputs['faces'] + ) + loss_dict['normal_consistency_loss'] = self.lambda_mesh_normal * normal_consistency_loss + + if self.lambda_mesh_laplacian > 0 and all(key in outputs for key in ['verts', 'faces']): + laplacian_loss = self.laplacian_smooth_loss_fn(verts=outputs['verts'], faces=outputs['faces']) + loss_dict['laplacian_loss'] = self.lambda_mesh_laplacian * laplacian_loss + + loss = sum(loss_dict.values()) + + self.log_dict(loss_dict, prog_bar=False, rank_zero_only=True) + self.log('loss', loss, prog_bar=True, rank_zero_only=True) + + # TODO(ahmadki): LearningRateMonitor + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True) + + self.log('global_step', self.global_step + 1, prog_bar=True, rank_zero_only=True) + + return loss + + def validation_step(self, batch, batch_idx): + # save image + images, depths = self._shared_predict(batch) + + save_path = os.path.join(self.trainer.log_dir, 'validation') + os.makedirs(save_path, exist_ok=True) + for i, (image, depth) in enumerate(zip(images, depths)): + # Save image + cv2.imwrite( + os.path.join( + save_path, + f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_rgb.png', + ), + cv2.cvtColor(image, cv2.COLOR_RGB2BGR), + ) + # Save depth + cv2.imwrite( + os.path.join( + save_path, + f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_depth.png', + ), + depth, + ) + + def test_step(self, batch, batch_idx): + # save image + images, depths = self._shared_predict(batch) + self.test_images.append(images) + self.test_depths.append(depths) + + def on_test_epoch_end(self): + save_path = os.path.join(self.trainer.log_dir, 'test') + os.makedirs(save_path, exist_ok=True) + + images = np.concatenate(self.test_images, axis=0) + imageio.mimwrite( + os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_rgb.mp4')), + images, + fps=25, + quality=8, + macro_block_size=1, + ) + + depths = np.concatenate(self.test_depths, axis=0) + imageio.mimwrite( + os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_depth.mp4')), + depths, + fps=25, + quality=8, + macro_block_size=1, + ) + + self.test_images.clear() + self.test_depths.clear() + + def predict_step(self, batch, batch_idx): + return self._shared_predict(self, batch) + + def forward( + self, + rays_o, + rays_d, + mvp, + perturb, + ambient_ratio, + shading_type, + binarize, + return_normal_image, + return_normal_perturb, + return_vertices, + return_faces, + return_faces_normals, + ): + outputs = self.renderer( + rays_o=rays_o, + rays_d=rays_d, + mvp=mvp, + perturb=perturb, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + binarize=binarize, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + ) + return outputs + + def _shared_predict(self, data): + outputs = self( + rays_o=data['rays_o'], # [B, H, W, 3] + rays_d=data['rays_d'], # [B, H, W, 3] + mvp=data['mvp'], + perturb=False, + ambient_ratio=data['ambient_ratio'] if 'ambient_ratio' in data else 1.0, # TODO(ahmadki): move to dataset + shading_type=data['shading_type'] if 'shading_type' in data else None, # TODO(ahmadki): move to dataset + binarize=False, + return_normal_image=False, + return_normal_perturb=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + ) + + images_np = outputs['image'].detach().cpu().numpy() + images_np = (images_np * 255).astype(np.uint8) + + depths_np = outputs['depth'].detach().cpu().numpy() + depths_np = (depths_np - depths_np.min()) / (np.ptp(depths_np) + 1e-6) + depths_np = (depths_np * 255).astype(np.uint8) + + return images_np, depths_np + + # TODO(ahmadki): rework + def setup_optimization(self): + cfg = self._cfg.optim + optimizer_args = dict(cfg) + optimizer_args.pop('name', None) + + optimizer = optim.get_optimizer(cfg.name) + + optimizer = optimizer(params=self.parameters(), **optimizer_args) + + self._optimizer = optimizer + + def configure_optimizers(self): + self.setup_optimization() + return self._optimizer diff --git a/nemo/collections/multimodal/models/nerf/txt2nerf_base.py b/nemo/collections/multimodal/models/nerf/txt2nerf_base.py new file mode 100644 index 000000000000..19a393aa4774 --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/txt2nerf_base.py @@ -0,0 +1,81 @@ +import logging + +from nemo.collections.multimodal.models.nerf.base import NerfModelBase + + +class Txt2NerfBase(NerfModelBase): + def __init__(self, cfg): + super().__init__(cfg) + self.prompt = cfg.prompt + self.negative_prompt = cfg.negative_prompt + self.front_prompt = cfg.front_prompt + self.side_prompt = cfg.side_prompt + self.back_prompt = cfg.back_prompt + + self.nerf_cfg = cfg.nerf + self.renderer_cfg = cfg.renderer + self.guidance_cfg = cfg.guidance + + nerf = self.from_config_dict(cfg.nerf) + material = self.from_config_dict(cfg.material) + background = self.from_config_dict(cfg.background) + self.renderer = self.build_renderer(cfg.renderer, nerf, material, background) + self.guidance = None + + def build_renderer(self, cfg, nerf, material, background): + renderer = self.from_config_dict(cfg) + renderer.nerf = nerf + renderer.material = material + renderer.background = background + return renderer + + def build_guidance(self, cfg): + self.guidance = self.from_config_dict(cfg) + self.guidance.eval() + for p in self.guidance.parameters(): + p.requires_grad = False + + def prepare_embeddings(self): + # TODO(ahmadki): add top view ? + self.text_z = { + "default": self.guidance.get_text_embeds([self.prompt]), + "uncond": self.guidance.get_text_embeds([self.negative_prompt]), + "front": self.guidance.get_text_embeds([f"{self.prompt}{self.front_prompt}"]), + "side": self.guidance.get_text_embeds([f"{self.prompt}{self.side_prompt}"]), + "back": self.guidance.get_text_embeds([f"{self.prompt}{self.back_prompt}"]), + } + + def on_fit_start(self) -> None: + self.build_guidance(self.guidance_cfg) + self.prepare_embeddings() + + def on_train_batch_start(self, batch, batch_idx, unused=0): + if self.is_module_updatable(self.guidance): + self.guidance.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.nerf): + self.renderer.nerf.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.material): + self.renderer.material.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.background): + self.renderer.background.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer): + self.renderer.update_step(epoch=self.current_epoch, global_step=self.global_step) + + dataset = self.trainer.train_dataloader.dataset + if self.is_module_updatable(dataset): + dataset.update_step(epoch=self.current_epoch, global_step=self.global_step) + + def mesh(self, resolution, batch_size=128, density_thresh=None): + return self.nerf.mesh(resolution=resolution, batch_size=batch_size, density_thresh=density_thresh) + + def on_save_checkpoint(self, checkpoint): + # remove guidance from checkpoint. + # We can still laod the model without guidance checkpoints because the module is not initalized + # at __init__ time. + keys_to_remove = [key for key in checkpoint['state_dict'].keys() if key.startswith('guidance.')] + for key in keys_to_remove: + del checkpoint['state_dict'][key] diff --git a/nemo/collections/multimodal/modules/nerf/__init__.py b/nemo/collections/multimodal/modules/nerf/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py b/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py new file mode 100644 index 000000000000..3d03d14694be --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py @@ -0,0 +1,22 @@ +import torch +import torch.nn as nn + +# TODO(ahmadki): abstract class +class NeRFBackgroundBase(nn.Module): + def __init__(self): + super().__init__() + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + """ + positions = [B*N, 3] + """ + raise NotImplementedError + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + rays_d_encoding = self.encode(rays_d) + features = self.forward_net(rays_d_encoding) + features = torch.sigmoid(features) + return features diff --git a/nemo/collections/multimodal/modules/nerf/background/random_background.py b/nemo/collections/multimodal/modules/nerf/background/random_background.py new file mode 100644 index 000000000000..5f7f77d99596 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/random_background.py @@ -0,0 +1,19 @@ +import random +from typing import Tuple + +import torch +import torch.nn as nn + + +class RandomBackground(nn.Module): + def __init__(self, base_background: Tuple, random_ratio: float) -> None: + super().__init__() + self.random_ratio = random_ratio + self.num_output_dims = len(base_background) + self.register_buffer("base_background", torch.tensor(base_background)) + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + if random.random() < self.random_ratio: + return torch.rand(rays_d.shape[0], self.num_output_dims).to(rays_d) + else: + return self.base_background.to(rays_d).expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/static_background.py b/nemo/collections/multimodal/modules/nerf/background/static_background.py new file mode 100644 index 000000000000..955884161626 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/static_background.py @@ -0,0 +1,14 @@ +from typing import Tuple + +import torch +import torch.nn as nn + + +class StaticBackground(nn.Module): + def __init__(self, background: Tuple) -> None: + super().__init__() + self.register_buffer("background", torch.tensor(background)) + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + background = self.background.to(rays_d) + return background.expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py b/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py new file mode 100644 index 000000000000..3b45a60717a5 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py @@ -0,0 +1,32 @@ +from typing import Dict + +import numpy as np +import tinycudann as tcnn +import torch + +from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase + + +class TCNNBackground(NeRFBackgroundBase): + def __init__( + self, + bound: int, + encoder_num_input_dims: int, + encoder_cfg: Dict, + background_net_num_output_dims: int, + background_net_cfg: Dict, + ): + super().__init__() + self.bound = bound + if encoder_cfg.get('per_level_scale') is None: + encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) + self.encoder = tcnn.Encoding(n_input_dims=encoder_num_input_dims, encoding_config=dict(encoder_cfg)) + self.background_net = tcnn.Network( + self.encoder.n_output_dims, background_net_num_output_dims, network_config=dict(background_net_cfg) + ) + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + return self.encoder(rays_d) + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py b/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py new file mode 100644 index 000000000000..e792858cacce --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py @@ -0,0 +1,31 @@ +from typing import Dict + +import torch + +from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase +from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP +from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder + + +class TorchNGPBackground(NeRFBackgroundBase): + def __init__( + self, encoder_type: str, encoder_input_dims: int, encoder_multi_res: int, num_output_dims: int, net_cfg: Dict + ): + super().__init__() + + self.encoder, self.encoder_output_dims = get_encoder( + encoder_type, input_dim=encoder_input_dims, multires=encoder_multi_res + ) + self.background_net = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=num_output_dims, + num_hidden_dims=net_cfg.num_hidden_dims, + num_layers=net_cfg.num_layers, + bias=net_cfg.bias, + ) + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + return self.encoder(rays_d) + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/__init__.py b/nemo/collections/multimodal/modules/nerf/geometry/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py b/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py new file mode 100644 index 000000000000..c04ac342cec0 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py @@ -0,0 +1,150 @@ +import torch + + +class DeepMarchingTetrahedra: + """ + Class for Deep Marching Tetrahedra (DMTet). + + Attributes: + device (torch.device): Device to place the tensors. + triangle_table (Tensor): Lookup table for the triangles. + num_triangles_table (Tensor): Table for the number of triangles. + base_tet_edges (Tensor): The base edges for the tetrahedrons. + """ + + def __init__(self, device: torch.device) -> None: + """Initialize DMTet instance with the given device. + + Args: + device (torch.device): The device to place the tensors on. + """ + self.device = device + self.triangle_table = self._create_triangle_table() + self.num_triangles_table = self._create_num_triangles_table() + self.base_tet_edges = self._create_base_tet_edges() + + def _create_triangle_table(self) -> torch.Tensor: + """Create the lookup table for triangles. + + Returns: + Tensor: The triangle lookup table. + """ + return torch.tensor( + [ + [-1, -1, -1, -1, -1, -1], + [1, 0, 2, -1, -1, -1], + [4, 0, 3, -1, -1, -1], + [1, 4, 2, 1, 3, 4], + [3, 1, 5, -1, -1, -1], + [2, 3, 0, 2, 5, 3], + [1, 4, 0, 1, 5, 4], + [4, 2, 5, -1, -1, -1], + [4, 5, 2, -1, -1, -1], + [4, 1, 0, 4, 5, 1], + [3, 2, 0, 3, 5, 2], + [1, 3, 5, -1, -1, -1], + [4, 1, 2, 4, 3, 1], + [3, 0, 4, -1, -1, -1], + [2, 0, 1, -1, -1, -1], + [-1, -1, -1, -1, -1, -1], + ], + dtype=torch.long, + device=self.device, + ) + + def _create_num_triangles_table(self) -> torch.Tensor: + """Create the table for number of triangles. + + Returns: + Tensor: The number of triangles table. + """ + return torch.tensor([0, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 0], dtype=torch.long, device=self.device) + + def _create_base_tet_edges(self) -> torch.Tensor: + """Create the base edges for the tetrahedrons. + + Returns: + Tensor: The base edges for tetrahedrons. + """ + return torch.tensor([0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device=self.device) + + def _sort_edges(self, edges_ex2: torch.Tensor) -> torch.Tensor: + """Sort the given edges. + + Args: + edges_ex2 (Tensor): The edges to be sorted. + + Returns: + Tensor: The sorted edges. + """ + with torch.no_grad(): + order = (edges_ex2[:, 0] > edges_ex2[:, 1]).long() + order = order.unsqueeze(dim=1) + a = torch.gather(input=edges_ex2, index=order, dim=1) + b = torch.gather(input=edges_ex2, index=1 - order, dim=1) + return torch.stack([a, b], -1) + + # TODO(ahmadki): rename to forward ? return mesh ? + def __call__(self, positions: torch.Tensor, sdf_n: torch.Tensor, tet_fx4: torch.Tensor) -> tuple: + """ + Process the provided data to generate vertices and faces. + + Args: + positions (Tensor): Position tensor with shape [N, 3]. + sdf_n (Tensor): SDF tensor with shape [N]. + tet_fx4 (Tensor): Tetrahedron faces tensor with shape [F, 4]. + + Returns: + tuple: Vertices and faces tensors. + """ + with torch.no_grad(): + occ_n = sdf_n > 0 + occ_fx4 = occ_n[tet_fx4.reshape(-1)].reshape(-1, 4) + occ_sum = torch.sum(occ_fx4, -1) + valid_tets = (occ_sum > 0) & (occ_sum < 4) + occ_sum = occ_sum[valid_tets] + + # find all vertices + all_edges = tet_fx4[valid_tets][:, self.base_tet_edges].reshape(-1, 2) + all_edges = self._sort_edges(all_edges) + unique_edges, idx_map = torch.unique(all_edges, dim=0, return_inverse=True) + + unique_edges = unique_edges.long() + mask_edges = occ_n[unique_edges.reshape(-1)].reshape(-1, 2).sum(-1) == 1 + mapping = torch.ones((unique_edges.shape[0]), dtype=torch.long, device=self.device) * -1 + mapping[mask_edges] = torch.arange(mask_edges.sum(), dtype=torch.long, device=self.device) + idx_map = mapping[idx_map] # map edges to verts + + interp_v = unique_edges[mask_edges] + + edges_to_interp = positions[interp_v.reshape(-1)].reshape(-1, 2, 3) + edges_to_interp_sdf = sdf_n[interp_v.reshape(-1)].reshape(-1, 2, 1) + edges_to_interp_sdf[:, -1] *= -1 + + denominator = edges_to_interp_sdf.sum(1, keepdim=True) + edges_to_interp_sdf = torch.flip(edges_to_interp_sdf, [1]) / denominator + verts = (edges_to_interp * edges_to_interp_sdf).sum(1) + + idx_map = idx_map.reshape(-1, 6) + v_id = torch.pow(2, torch.arange(4, dtype=torch.long, device=self.device)) + tetindex = (occ_fx4[valid_tets] * v_id.unsqueeze(0)).sum(-1) + num_triangles = self.num_triangles_table[tetindex] + + # Generate triangle indices + faces = torch.cat( + ( + torch.gather( + input=idx_map[num_triangles == 1], + dim=1, + index=self.triangle_table[tetindex[num_triangles == 1]][:, :3], + ).reshape(-1, 3), + torch.gather( + input=idx_map[num_triangles == 2], + dim=1, + index=self.triangle_table[tetindex[num_triangles == 2]][:, :6], + ).reshape(-1, 3), + ), + dim=0, + ) + + return verts, faces diff --git a/nemo/collections/multimodal/modules/nerf/geometry/layers.py b/nemo/collections/multimodal/modules/nerf/geometry/layers.py new file mode 100644 index 000000000000..c80696bd170c --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/layers.py @@ -0,0 +1,129 @@ +from typing import Callable, List, Type, Union + +import torch +import torch.nn as nn + +BlockBuilder = Union[Callable[[int, int, bool], nn.Module], Type[nn.Module], None] + + +class MLP(nn.Module): + """ + A Multi-Layer Perceptron (MLP) module. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + num_hidden_dims (int): Number of hidden dimensions. + num_layers (int): Number of layers in the MLP. + bias (bool): If True, enables the bias in Linear layers. Default is True. + block (BlockBuilder): A callable or class for constructing a block. Default is None. + """ + + def __init__( + self, + num_input_dims: int, + num_output_dims: int, + num_hidden_dims: int, + num_layers: int, + bias: bool = True, + block: BlockBuilder = None, + ): + super().__init__() + + # Initialize the network as an empty list + network = [] + + # Add input layer + network.append(nn.Linear(num_input_dims, num_hidden_dims, bias=bias)) + network.append(nn.ReLU(inplace=True)) + + # Add hidden layers + for _ in range(1, num_layers - 1): + network.extend(self.build_layer(num_hidden_dims, num_hidden_dims, bias, block)) + + # Add output layer + network.append(nn.Linear(num_hidden_dims, num_output_dims, bias=bias)) + + # Wrap layers in ModuleList for proper registration + self.net = nn.ModuleList(network) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass through the MLP. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor. + """ + for module in self.net: + x = module(x) + return x + + @staticmethod + def build_layer( + num_input_dims: int, num_output_dims: int, bias: bool = True, block_builder: BlockBuilder = None + ) -> List[nn.Module]: + """ + Build a single layer for the MLP. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + bias (bool): If True, enables the bias in Linear layers. Default is True. + block_builder (BlockBuilder): A callable or class for constructing a block. Default is None. + + Returns: + List[nn.Module]: A list containing the layer's modules. + """ + if block_builder is None: + return [nn.Linear(num_input_dims, num_output_dims, bias=bias), nn.ReLU(inplace=True)] + else: + return [block_builder(num_input_dims, num_output_dims, bias=bias)] + + +class ResBlock(nn.Module): + """ + A residual block module. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + bias (bool): If True, enables the bias in Linear layers. Default is True. + """ + + def __init__(self, num_input_dims: int, num_output_dims: int, bias: bool = True): + super().__init__() + + self.dense = nn.Linear(num_input_dims, num_output_dims, bias=bias) + self.norm = nn.LayerNorm(num_output_dims) + self.activation = nn.SiLU(inplace=True) + + if num_input_dims != num_output_dims: + self.skip = nn.Linear(num_input_dims, num_output_dims, bias=False) + else: + self.skip = None + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass through the residual block. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor. + """ + identity = x + + out = self.dense(x) + out = self.norm(out) + + if self.skip is not None: + identity = self.skip(identity) + + out += identity + out = self.activation(out) + + return out diff --git a/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py b/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py new file mode 100644 index 000000000000..0b5eb6b6f260 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py @@ -0,0 +1,360 @@ +from enum import Enum +from typing import Callable, Optional, Tuple, Union + +import mcubes +import numpy as np +import pymeshlab +import torch +import torch.nn as nn +import torch.nn.functional as F +import trimesh + +from nemo.collections.multimodal.modules.nerf.utils.activation import trunc_exp + + +class DensityActivationEnum(str, Enum): + EXP = "exp" + SOFTPLUS = "softplus" + + +class NormalTypeEnum(str, Enum): + AUTOGRAD = "autograd" + FORWARD_FINITE_DIFFERENCE = "forward_finite_difference" + BACKWARD_FINITE_DIFFERENCE = "backward_finite_difference" + CENTRAL_FINITE_DIFFERENCE = "central_finite_difference" + + +# TODO(ahmadki): make abstract +class NeRFBase(nn.Module): + """ + A base class for Neural Radiance Fields (NeRF) models. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum] = NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, + ) -> None: + super().__init__() + self.num_input_dims = num_input_dims + self.bound = bound + self.density_activation = density_activation + self.blob_radius = blob_radius + self.blob_density = blob_density + self.normal_type = normal_type + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """Encode 3D positions. To be implemented by subclasses.""" + raise NotImplementedError + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """Calculate sigma (density). To be implemented by subclasses.""" + raise NotImplementedError + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """Calculate features. To be implemented by subclasses.""" + raise NotImplementedError + + def forward( + self, positions: torch.Tensor, return_normal: bool = True + ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Forward pass for the NeRF model. + + Args: + positions (torch.Tensor): The positions. + return_normal (bool): Flag to indicate whether to return normals or not. + + Returns: + Tuple containing density, features, and possibly normals. + """ + + if return_normal: + if self.normal_type == NormalTypeEnum.AUTOGRAD: + with torch.enable_grad(): + positions.requires_grad_(True) + sigma, features = self.forward_density_features(positions) + normal = -torch.autograd.grad(torch.sum(sigma), positions, create_graph=True)[0] # [N, D] + elif self.normal_type in [ + NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, + NormalTypeEnum.FORWARD_FINITE_DIFFERENCE, + NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE, + ]: + sigma, features = self.forward_density_features(positions) + normal = self.normal_finite_differences(positions) + else: + raise NotImplementedError("Invalid normal type.") + + normal = F.normalize(normal) + normal = torch.nan_to_num(normal) + else: + sigma, features = self.forward_density_features(positions) + normal = None + + return sigma, features, normal + + def forward_density_features(self, positions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Calculate both density and features based on the input positions. + + This function takes into account edge cases like empty input tensors and calculates + the density and features accordingly. See GitHub issues for details: + - https://github.com/KAIR-BAIR/nerfacc/issues/207#issuecomment-1653621720 + - https://github.com/ashawkey/torch-ngp/issues/176 + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple containing density and features tensors. + """ + + # Handle empty positions + if positions.shape[0] == 0: + sigma = torch.zeros(0, device=positions.device) + features = torch.zeros(0, self.num_input_dims, device=positions.device) + return sigma, features + + # Encode positions + positions_encoding = self.encode(positions) + + # Compute density + density = self.forward_density(positions, positions_encoding) + + # Compute features + features = self.forward_features(positions, positions_encoding) + + return density, features + + def forward_density( + self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Calculate the density based on the input positions and their encoding. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + positions_encoding (Optional[torch.Tensor]): Optional encoded positions. + Will be computed from `positions` if not provided. + + Returns: + torch.Tensor: Density tensor. + """ + + # Handle empty positions + if positions.shape[0] == 0: + sigma = torch.zeros(0, device=positions.device) + return sigma + + # Compute encoded positions if not provided + if positions_encoding is None: + positions_encoding = self.encode(positions) + + # Compute sigma using the neural network + sigma = self.sigma_net(positions_encoding) + + # Compute density using activation function + if self.density_activation == DensityActivationEnum.EXP: + density = trunc_exp(sigma + self.density_blob(positions)) + elif self.density_activation == DensityActivationEnum.SOFTPLUS: + density = F.softplus(sigma + self.density_blob(positions)) + else: + raise NotImplementedError("Invalid density activation.") + + return density + + def forward_features( + self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Compute the features based on the input positions and their encoding. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + positions_encoding (Optional[torch.Tensor]): Optional encoded positions. + Will be computed from `positions` if not provided. + + Returns: + torch.Tensor: Features tensor with shape [B*N, num_features_dims]. + """ + + # Handle empty positions + if positions.shape[0] == 0: + features = torch.zeros(0, self.num_features_dims, device=positions.device) + return features + + # Compute encoded positions if not provided + if positions_encoding is None: + positions_encoding = self.encode(positions) + + # Compute features using the neural network + features = self.features_net(positions_encoding) + + # Apply the sigmoid activation function to the features + features = torch.sigmoid(features) + + return features + + @torch.no_grad() + def density_blob(self, positions: torch.Tensor) -> torch.Tensor: + """ + Compute the density blob for the given positions. + + This method computes a density blob for each position in the tensor. It is + used to add a density value based on the distance of each position from the origin. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + + Returns: + torch.Tensor: Density blob tensor with shape [B*N, 1]. + """ + + # Compute the squared distance for each position + d = (positions ** 2).sum(-1) + + # Compute the density blob based on the activation function + if self.density_activation == DensityActivationEnum.EXP: + g = self.blob_density * torch.exp(-d / (2 * self.blob_radius ** 2)) + elif self.density_activation == DensityActivationEnum.SOFTPLUS: + g = self.blob_density * (1 - torch.sqrt(d) / self.blob_radius) + else: + raise NotImplementedError("Invalid density activation.") + + return g + + def normal_finite_differences(self, positions: torch.Tensor, eps: float = 1e-2) -> torch.Tensor: + """ + Calculate normals using finite differences. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + eps (float): A small value for finite difference calculation. Default is 1e-2. + + Returns: + torch.Tensor: Calculated normals tensor [B*N, D] + """ + # Create perturbation tensor + perturb = torch.eye(self.num_input_dims).to(positions.device).float() * eps # Shape (D, D) + + # Expand dims for batched operation + positions_expanded = positions[:, None, :] # (B*N, 1, D) + perturb_expanded = perturb[None, :, :] # (1, D, D) + + # Compute perturbed points + if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: + positions_perturbed = positions_expanded + perturb_expanded # (B*N, D, D) + elif self.normal_type == NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE: + positions_perturbed = positions_expanded - perturb_expanded # (B*N, D, D) + elif self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: + positions_perturbed_pos = positions_expanded + perturb_expanded # (B*N, D, D) + positions_perturbed_neg = positions_expanded - perturb_expanded # (B*N, D, D) + positions_perturbed = torch.cat([positions_perturbed_pos, positions_perturbed_neg], dim=1) # (B*N, 2*D, D) + + # Reshape perturbed points for batched function call + positions_perturbed_reshaped = positions_perturbed.view(-1, self.num_input_dims) # (B*N * {D or 2*D}, D) + + # Evaluate function at perturbed points + perturbed_sigma = self.forward_density(positions_perturbed_reshaped) # (B*N * {D or 2*D}, 1) + + # Reshape function values + if self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: + perturbed_sigma = perturbed_sigma.view(-1, 2 * self.num_input_dims) # (B*N, 2*D) + sigma_pos, sigma_neg = torch.chunk(perturbed_sigma, 2, dim=1) # (B*N, D) each + normal = 0.5 * (sigma_pos - sigma_neg) / eps # (B*N, D) + else: + perturbed_sigma = perturbed_sigma.view(-1, self.num_input_dims) # (B*N, D) + sigma = self.forward_density(positions) # (B*N,) # TODO(ahmadki): use the value from forward ? + if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: + normal = (perturbed_sigma - sigma[:, None]) / eps # (B*N, D) + else: # self.normal_type == BACKWARD_FINITE_DIFFERENCE + normal = (sigma[:, None] - perturbed_sigma) / eps # (B*N, D) + + return -normal + + # TODO(ahmadki): needs ar ework: + # 1. texture/vertices are off-axis, needs a fix. + # 2. device='cuda' is hardcoded + # 3. DMTet needs to go through a different code path ? create a base volume nerf, and a base dmtet nerf class ? + @torch.no_grad() + def mesh( + self, resolution: Optional[int] = 128, batch_size: int = 128, density_thresh: Optional[float] = None + ) -> pymeshlab.Mesh: + """ + Generate a mesh from the nerf. + + Args: + resolution (Optional[int]): Resolution of the mesh grid. Default is 128. + batch_size (int): Batch size for the mesh generation. Default is 128. + density_thresh (Optional[float]): Density threshold for the mesh generation. Default is None, will be calculated from mean density. + + Returns: + pymeshlab.Mesh: Mesh object. + """ + # Generate a grid of 3D points + x = np.linspace(-self.bound, self.bound, resolution) + y = np.linspace(-self.bound, self.bound, resolution) + z = np.linspace(-self.bound, self.bound, resolution) + xx, yy, zz = np.meshgrid(x, y, z) + + grid = np.stack((xx, yy, zz), axis=-1) # Shape (resolution, resolution, resolution, 3) + torch_grid = torch.tensor(grid, dtype=torch.float32).reshape(-1, 3).to(device="cuda") + + def batch_process(fn, input, batch_size): + num_points = input.shape[0] + batches = [input[i : i + batch_size] for i in range(0, num_points, batch_size)] + results = [fn(batch) for batch in batches] + results = [result.detach().cpu().numpy() for result in results] + return np.concatenate(results, axis=0) + + density = batch_process(fn=self.forward_density, input=torch_grid, batch_size=batch_size) + density = density.reshape(resolution, resolution, resolution) + + # If not provided set density_thresh based on mean density + if density_thresh is None: + density_thresh = density[density > 1e-3].mean().item() + + # Apply Marching Cubes + vertices, triangles = mcubes.marching_cubes(density, density_thresh) + + # Create a new Mesh + ms = pymeshlab.MeshSet() + + # Create Mesh using vertices and faces + m = pymeshlab.Mesh(vertices.copy(), triangles.copy()) + + # Add mesh to the MeshSet + ms.add_mesh(m, "generated_mesh") + + # Filters + ms.meshing_remove_unreferenced_vertices() + ms.meshing_remove_duplicate_faces() + ms.meshing_remove_null_faces() + ms.meshing_repair_non_manifold_edges(method=0) + ms.meshing_repair_non_manifold_vertices(vertdispratio=0) + + m = ms.current_mesh() + vertices = m.vertex_matrix() + faces = m.face_matrix() + + scaled_vertice = ( + -self.bound + (vertices / resolution) * 2 * self.bound + ) # scale vertices back to [-self.bound, self.bound] + scaled_vertices_torch = torch.tensor(scaled_vertice, dtype=torch.float32).to(device="cuda") + color = batch_process(fn=self.forward_features, input=scaled_vertices_torch, batch_size=batch_size) + + # Create the final mesh from cleaned vertices and faces and with color + mesh = trimesh.Trimesh(vertices=vertices, faces=faces, vertex_colors=color) + return mesh diff --git a/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py new file mode 100644 index 000000000000..2922df999d15 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py @@ -0,0 +1,108 @@ +from typing import Dict, Optional + +import numpy as np +import tinycudann as tcnn +import torch + +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum + + +# Don't fuse sigma_net with features_net: +# 1. performance benefit is questionable, especially that we sometimes require only density or features +# 2. we sacrifice generality +class TCNNNerf(NeRFBase): + """ + NeRF model with TCNN encoding and MLPs for sigma and features. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + encoder_cfg (Dict): Configuration for the TCNN encoder. + sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. + sigma_net_cfg (Dict): Configuration for the sigma network. + features_net_num_output_dims (int): Number of output dimensions for the features network. + features_net_cfg (Optional[Dict]): Configuration for the features network. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum], + encoder_cfg: Dict, + sigma_net_num_output_dims: int, + sigma_net_cfg: Dict, + features_net_num_output_dims: int, + features_net_cfg: Optional[Dict], + ) -> None: + super().__init__( + num_input_dims=num_input_dims, + bound=bound, + density_activation=density_activation, + blob_radius=blob_radius, + blob_density=blob_density, + normal_type=normal_type, + ) + + # Set per_level_scale if not set + if encoder_cfg.get('per_level_scale') is None: + encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) + # Build the TCNN encoder + self.encoder = tcnn.Encoding(n_input_dims=num_input_dims, encoding_config=dict(encoder_cfg)) + + # Build the sigma network + assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims!=1 is not supported" + self.sigma_tcnn = tcnn.Network( + self.encoder.n_output_dims, sigma_net_num_output_dims, network_config=dict(sigma_net_cfg) + ) + + # Build the features network + self.features_tcnn = None + if features_net_cfg is not None: + self.features_tcnn = tcnn.Network( + self.encoder.n_output_dims, features_net_num_output_dims, network_config=dict(features_net_cfg) + ) + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """ + Encode the positions using the TCNN encoder. + + Args: + positions (torch.Tensor): The positions tensor. + + Returns: + torch.Tensor: The encoded positions tensor. + """ + # TODO(ahmadki): is it safe to do with FP16 ? + return self.encoder((positions + self.bound) / (2 * self.bound)) + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the sigma using the TCNN network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The sigma tensor. + """ + return self.sigma_tcnn(positions_encoding).squeeze() + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the features using the TCNN network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The features tensor. + """ + return self.features_tcnn(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py new file mode 100644 index 000000000000..b831b94ef84b --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py @@ -0,0 +1,114 @@ +from typing import Dict, Optional + +import torch + +from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum +from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder + + +# Don't fuse sigma_net with features_net: +# 1. performance benefit is questionable, especially that we sometimes require only density or features +# 2. we sacrifice generality +class TorchNGPNerf(NeRFBase): + """ + NeRF model with Torch-NGP encoding and MLPs for sigma and features. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + encoder_type (str): Type of the encoder. + encoder_max_level (int): Maximum level of the encoder. + sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. + sigma_net_cfg (Dict): Configuration for the sigma network. + features_net_num_output_dims (int): Number of output dimensions for the features network. + features_net_cfg (Optional[Dict]): Configuration for the features network. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum], + encoder_cfg: Dict, + sigma_net_num_output_dims: int, + sigma_net_cfg: Dict, + features_net_num_output_dims: int, + features_net_cfg: Optional[Dict], + ): + super().__init__( + num_input_dims=num_input_dims, + bound=bound, + density_activation=density_activation, + blob_radius=blob_radius, + blob_density=blob_density, + normal_type=normal_type, + ) + + # Build the Torch-NGP encoder + self.encoder_max_level = encoder_cfg.get('encoder_max_level', None) + self.encoder, self.encoder_output_dims = get_encoder(input_dim=num_input_dims, **encoder_cfg) + + # Build the sigma network + assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims must be equal to 1" + self.sigma_mlp = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=sigma_net_num_output_dims, + num_hidden_dims=sigma_net_cfg.num_hidden_dims, + num_layers=sigma_net_cfg.num_layers, + bias=sigma_net_cfg.bias, + ) + + # Build the features network + self.features_mlp = None + if features_net_cfg is not None: + self.features_mlp = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=features_net_num_output_dims, + num_hidden_dims=features_net_cfg.num_hidden_dims, + num_layers=features_net_cfg.num_layers, + bias=features_net_cfg.bias, + ) + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """ + Encode the positions. + + Args: + positions (torch.Tensor): The positions tensor. + + Returns: + torch.Tensor: The encoded positions tensor. + """ + return self.encoder(positions, bound=self.bound, max_level=self.encoder_max_level) + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the sigma using the sigma network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The sigma tensor. + """ + return self.sigma_mlp(positions_encoding).squeeze() + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the features using the features network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The features tensor. + """ + return self.features_mlp(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/guidance/__init__.py b/nemo/collections/multimodal/modules/nerf/guidance/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py new file mode 100644 index 000000000000..008a7b3d3627 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py @@ -0,0 +1,142 @@ +from typing import List, Union + +import torch +import torch.nn.functional as F +from diffusers import DDIMScheduler, StableDiffusionPipeline + +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase + + +class StableDiffusion(Txt2ImgGuidanceBase): + def __init__( + self, + model_key: str = "stabilityai/stable-diffusion-2-1-base", + t_range: List[float] = [0.02, 0.98], + precision: str = "16", + device: torch.device = torch.device('cuda'), + ): + """ + Initialize StableDiffusion with model_key, t_range, precision and device. + + Parameters: + model_key (str): Pre-trained model key. + t_range (List[float]): Range for timesteps. + precision (str): Model precision ("16", "bf16" or other for float32). + device (torch.device): Device for torch tensor. + """ + super().__init__() + + self.device = device + self.model_key = model_key + self.precision_t = self._get_precision_type(precision) + + # Create model + pipe = StableDiffusionPipeline.from_pretrained(model_key, torch_dtype=self.precision_t).to(self.device) + if self.precision_t in [torch.float16, torch.bfloat16]: + pipe.unet.to(memory_format=torch.channels_last) + + self.vae = pipe.vae + self.tokenizer = pipe.tokenizer + self.text_encoder = pipe.text_encoder + self.unet = pipe.unet + self.scheduler = DDIMScheduler.from_pretrained(model_key, subfolder="scheduler", torch_dtype=self.precision_t) + + del pipe + + self.num_train_timesteps = self.scheduler.config.num_train_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.scheduler.alphas_cumprod.to(self.device) + + def _get_precision_type(self, precision: str) -> torch.dtype: + """ + Map string precision representation to torch dtype. + + Parameters: + precision (str): String representation of precision. + + Returns: + torch.dtype: Corresponding torch dtype. + """ + precision_map = {"16": torch.float16, "bf16": torch.bfloat16} + return precision_map.get(precision, torch.float32) + + @torch.no_grad() + def get_text_embeds(self, prompt: str) -> torch.Tensor: + """ + Get text embeddings from the given prompt. + + Parameters: + prompt (str): Input text. + + Returns: + torch.Tensor: Text embeddings tensor [B, 77, 1024]. + """ + inputs = self.tokenizer( + prompt, padding='max_length', max_length=self.tokenizer.model_max_length, return_tensors='pt' + ) + embeddings = self.text_encoder(inputs.input_ids.to(self.device))[0] + return embeddings + + # @torch.compile() # TODO(ahmadki) + def train_step( + self, + text_embeddings: torch.Tensor, + pred_rgb: torch.Tensor, + guidance_scale: float = 100.0, + as_latent: bool = False, + ) -> float: + """ + Train step function for StableDiffusion. + + Parameters: + text_embeddings (torch.Tensor): Embeddings tensor [B, 512]. + pred_rgb (torch.Tensor): Predicted RGB tensor [B, 3, 512, 512]. + guidance_scale (float): Guidance scaling factor. + as_latent (bool): If True, considers pred_rgb as latent. + + Returns: + float: Loss value. + """ + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + latents = self.encode_imgs(pred_rgb_512) + + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + # add noise + noise = torch.randn_like(latents) + latents_noisy = self.scheduler.add_noise(latents, noise, t) + # pred noise + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.unet(latent_model_input, td, encoder_hidden_states=text_embeddings).sample + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss + + def encode_imgs(self, imgs: torch.Tensor) -> torch.Tensor: + """ + Encode images into latent representations. + + Parameters: + imgs (torch.Tensor): Image tensor [B, 3, H, W]. + + Returns: + torch.Tensor: Encoded latent tensor. + """ + imgs = 2 * imgs - 1 + posterior = self.vae.encode(imgs).latent_dist + latents = posterior.sample() * self.vae.config.scaling_factor + return latents diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py new file mode 100644 index 000000000000..a605391b9c92 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py @@ -0,0 +1,129 @@ +import os +import tempfile + +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import OmegaConf + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + + +class StableDiffusion(Txt2ImgGuidanceBase): + def __init__( + self, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], precision="16", device=torch.device('cuda') + ): + super().__init__() + + self.device = device + self.checkpoint = checkpoint + self.sampler_type = sampler_type + + cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) + + cfg.precision = precision + cfg.ckpt_path = None + cfg.unet_config.from_pretrained = None + cfg.first_stage_config.from_pretrained = None + + self.model = LatentDiffusion(cfg).to(device) + + sd_state_dict = {} + # Remove Megatron wrapper and inductor + for key, value in state_dict.items(): + key = key[6:] + sd_state_dict[key] = value + self.model.load_state_dict(sd_state_dict) + self.first_stage_model = self.model.first_stage_model + self.text_encoder = self.model.cond_stage_model.encode + + self.num_train_timesteps = self.model.num_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.model.alphas_cumprod.to(self.device) + + @torch.no_grad() + def get_text_embeds(self, prompt): + return self.text_encoder(prompt) + + @torch.autocast(device_type="cuda") + def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): + + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + # interp to 512x512 to be fed into vae. + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + # encode image into latents with vae, requires grad! + latents = self.encode_imgs(pred_rgb_512) + + # timestep ~ U(0.02, 0.98) to avoid very high/low noise level + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + noise = torch.randn_like(latents) + latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + # w(t), sigma_t^2 + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss + + def image_encoder(self, x): + h = self.first_stage_model.encoder(x) + moments = self.first_stage_model.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def encode_imgs(self, imgs): + # imgs: [B, 3, H, W] + + imgs = 2 * imgs - 1 + + posterior = self.image_encoder(imgs) + latents = ( + posterior.sample() * self.image_encoder.config.scaling_factor + ) # self.vae.config.scaling_factor==0.18215 + + return latents + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py new file mode 100644 index 000000000000..bf8c5e971002 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py @@ -0,0 +1,221 @@ +import logging +import os +import tempfile + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import OmegaConf +from polygraphy import cuda +from transformers import CLIPTokenizer + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase +from nemo.collections.multimodal.modules.nerf.utils.trt_engine import Engine, device_view +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import default +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + + +class LatentDiffusionWrapper(Txt2ImgGuidanceBase): + def __init__(self, plan_dir, checkpoint): + super().__init__() + with open(os.path.join(plan_dir, "conf.yaml"), "rb") as fp: + config = OmegaConf.load(fp.name) + max_batch_size = config.batch_size + + self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + self.max_length = config.clip.max_length + self.rng = torch.Generator(device=torch.cuda.current_device(),) + + self.set_beta_schedule() + + stream = cuda.Stream() + + self.image_encoder = self.load_vae_from_checkpoint(checkpoint) + + self.text_encoder = Engine(os.path.join(plan_dir, "clip.plan")) + shape_dict = {'tokens': config.clip.tokens, 'logits': config.clip.logits} + self.text_encoder.set_engine(stream, shape_dict) + + self.unet = Engine(os.path.join(plan_dir, "unet.plan")) + shape_dict = { + 'x': config.unet.x, + 't': (max_batch_size * 2,), + 'context': config.unet.context, + 'logits': config.unet.logits, + } + self.unet.set_engine(stream, shape_dict) + + def set_beta_schedule(self): + betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.0120, cosine_s=0.008) + alphas = 1.0 - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + betas = torch.tensor(betas) + alphas = torch.tensor(alphas) + alphas_cumprod = torch.tensor(alphas_cumprod) + to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod.cpu()))) + (timesteps,) = betas.shape + self.num_timesteps = int(timesteps) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) + + def encode_imgs(self, imgs): + imgs = 2 * imgs - 1 + posterior = self.image_encoder(imgs) + latents = posterior.sample() * 0.18215 + return latents + + def clip_encode(self, text): + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) + tokens = batch_encoding["input_ids"].to("cuda", non_blocking=True) + z = self.text_encoder.infer({"tokens": device_view(tokens.type(torch.int32))})['logits'].clone() + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def apply_model(self, x, t, cond, return_ids=False): + self.conditioning_key = "crossattn" + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + # key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + key = 'c_crossattn' + cond = {key: cond} + # UNET TRT + cc = torch.cat(cond['c_crossattn'], 1) # needs to be changed I think + out = self.unet.infer( + { + "x": device_view(x.contiguous()), + "t": device_view(t.type(torch.int32).contiguous()), + "context": device_view(cc.contiguous()), + } + )['logits'].clone() + if isinstance(out, tuple) and not return_ids: + return out[0] + else: + return out + + def load_vae_from_checkpoint(self, checkpoint): + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) + + if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): + cfg.unet_config.from_pretrained = None + if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): + cfg.first_stage_config.from_pretrained = None + + model = LatentDiffusion(cfg).to(device) + + sd_state_dict = {} + for key, value in state_dict.items(): + key = key[6:] + sd_state_dict[key] = value + model.load_state_dict(sd_state_dict) + + return model.first_stage_model.encode + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict + + +class StableDiffusion(nn.Module): + def __init__(self, plan_dir, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], device=torch.device('cuda')): + super().__init__() + logging.info(f'loading stable diffusion...') + + self.device = device + self.sampler_type = sampler_type + self.model = LatentDiffusionWrapper(plan_dir, checkpoint) + + self.text_encoder = self.model.clip_encode + + self.num_train_timesteps = self.model.num_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.model.alphas_cumprod.to(self.device) # for convenience + + logging.info(f'loaded stable diffusion!') + + @torch.no_grad() + def get_text_embeds(self, prompt): + return self.text_encoder(prompt) + + def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): + + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + # interp to 512x512 to be fed into vae. + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + # encode image into latents with vae, requires grad! + latents = self.model.encode_imgs(pred_rgb_512) + + # timestep ~ U(0.02, 0.98) to avoid very high/low noise level + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + noise = torch.randn_like(latents) + latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + # w(t), sigma_t^2 + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss diff --git a/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py b/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py new file mode 100644 index 000000000000..8e03ffb41d71 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py @@ -0,0 +1,6 @@ +import torch.nn as nn + + +class Txt2ImgGuidanceBase(nn.Module): + def __init__(self): + super().__init__() diff --git a/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py b/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py new file mode 100644 index 000000000000..2240f0aee8ce --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn + + +class LaplacianSmoothLoss(nn.Module): + def __init__(self): + super(LaplacianSmoothLoss, self).__init__() + + @torch.cuda.amp.autocast(enabled=False) + def forward(self, verts, faces): + with torch.no_grad(): + L = self.laplacian_uniform(verts, faces.long()) + loss = L.mm(verts) + loss = loss.norm(dim=1) + loss = loss.mean() + return loss + + # TODO(ahmadki): should be moved to a separate mesh class + def laplacian_uniform(self, verts, faces): + V = verts.shape[0] + F = faces.shape[0] + + # Neighbor indices + ii = faces[:, [1, 2, 0]].flatten() + jj = faces[:, [2, 0, 1]].flatten() + adj = torch.stack([torch.cat([ii, jj]), torch.cat([jj, ii])], dim=0).unique(dim=1) + adj_values = torch.ones(adj.shape[1], device=verts.device, dtype=torch.float) + + # Diagonal indices + diag_idx = adj[0] + + # Build the sparse matrix + idx = torch.cat((adj, torch.stack((diag_idx, diag_idx), dim=0)), dim=1) + values = torch.cat((-adj_values, adj_values)) + + # The coalesce operation sums the duplicate indices, resulting in the + # correct diagonal + return torch.sparse_coo_tensor(idx, values, (V, V)).coalesce() diff --git a/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py b/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py new file mode 100644 index 000000000000..4459c7003fd4 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn + + +class NormalConsistencyLoss(nn.Module): + def __init__(self): + super(NormalConsistencyLoss, self).__init__() + + # TODO(ahmadki): is this safe to do in FP16 ? + def forward(self, face_normals, t_pos_idx): + tris_per_edge = self.compute_edge_to_face_mapping(t_pos_idx) + + # Fetch normals for both faces sharind an edge + n0 = face_normals[tris_per_edge[:, 0], :] + n1 = face_normals[tris_per_edge[:, 1], :] + + # Compute error metric based on normal difference + term = torch.clamp(torch.sum(n0 * n1, -1, keepdim=True), min=-1.0, max=1.0) + term = 1.0 - term + + return torch.mean(torch.abs(term)) + + # TODO(ahmadki): should belog to mesh class + def compute_edge_to_face_mapping(self, attr_idx): + with torch.no_grad(): + # Get unique edges + # Create all edges, packed by triangle + all_edges = torch.cat( + ( + torch.stack((attr_idx[:, 0], attr_idx[:, 1]), dim=-1), + torch.stack((attr_idx[:, 1], attr_idx[:, 2]), dim=-1), + torch.stack((attr_idx[:, 2], attr_idx[:, 0]), dim=-1), + ), + dim=-1, + ).view(-1, 2) + + # Swap edge order so min index is always first + order = (all_edges[:, 0] > all_edges[:, 1]).long().unsqueeze(dim=1) + sorted_edges = torch.cat( + (torch.gather(all_edges, 1, order), torch.gather(all_edges, 1, 1 - order)), dim=-1 + ) + + # Elliminate duplicates and return inverse mapping + unique_edges, idx_map = torch.unique(sorted_edges, dim=0, return_inverse=True) + + tris = torch.arange(attr_idx.shape[0]).repeat_interleave(3).cuda() + + tris_per_edge = torch.zeros((unique_edges.shape[0], 2), dtype=torch.int64).cuda() + + # Compute edge to face table + mask0 = order[:, 0] == 0 + mask1 = order[:, 0] == 1 + tris_per_edge[idx_map[mask0], 0] = tris[mask0] + tris_per_edge[idx_map[mask1], 1] = tris[mask1] + + return tris_per_edge diff --git a/nemo/collections/multimodal/modules/nerf/materials/__init__.py b/nemo/collections/multimodal/modules/nerf/materials/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py b/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py new file mode 100644 index 000000000000..434f58552a05 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py @@ -0,0 +1,66 @@ +from typing import Optional + +import torch + +from nemo.collections.multimodal.modules.nerf.materials.materials_base import MaterialsBase, ShadingEnum + + +class BasicShading(MaterialsBase): + """ + Material model for handling various shading types. + """ + + def __init__(self): + super(BasicShading, self).__init__() + self.specular = torch.nn.Parameter(torch.rand(3)) + self.shininess = torch.nn.Parameter(torch.rand(1)) + + def forward( + self, + albedo: torch.Tensor, + normals: torch.Tensor, + light_d: torch.Tensor, + ambient_ratio: float, + shading_type: Optional[ShadingEnum] = None, + ) -> torch.Tensor: + """ + Apply material and shading to the input RGB tensor. + + Args: + albedo (Tensor): Base albedo values. + normals (Tensor): Normal vectors at each ray intersection. + light_d (Tensor): Light direction. + ambient_ratio (float): Ratio for ambient lighting. + shading_type (ShadingEnum): The type of shading to apply + + Returns: + Tensor: The output RGB tensor after applying material and shading. + """ + if shading_type is None: + return albedo + elif shading_type == ShadingEnum.TEXTURELESS: + return torch.ones_like(albedo) * ambient_ratio + elif shading_type == ShadingEnum.NORMAL: + return (normals + 1) / 2 # Map normals from [-1, 1] to [0, 1] + elif shading_type in [ShadingEnum.LAMBERTIAN, ShadingEnum.PHONG]: + # Ambient light + ambient_light = ambient_ratio * albedo + # Dot product between light direction and normals + dot_product = torch.sum(normals * light_d, dim=1, keepdim=True) + # Lambertian term + diffuse_term = albedo * torch.clamp(dot_product, min=0) + + if shading_type == ShadingEnum.LAMBERTIAN: + return ambient_light + diffuse_term + elif shading_type == ShadingEnum.PHONG: + # Phong specular term + specular_term = ( + self.specular + * (self.shininess + 2) + * torch.pow(torch.clamp(dot_product, min=0), self.shininess) + / (2 * 3.14159) + ) + + return ambient_light + diffuse_term + specular_term + else: + raise ValueError(f"Unknown shading_type: {shading_type}") diff --git a/nemo/collections/multimodal/modules/nerf/materials/materials_base.py b/nemo/collections/multimodal/modules/nerf/materials/materials_base.py new file mode 100644 index 000000000000..393a5ffcc4fb --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/materials/materials_base.py @@ -0,0 +1,29 @@ +from enum import Enum +from typing import Literal, Optional + +from torch import nn + + +class ShadingEnum(str, Enum): + TEXTURELESS = "textureless" + NORMAL = "normal" + LAMBERTIAN = "lambertian" + PHONG = "phong" + + # TODO(ahmadki): + # Oren–Nayar + # Minnaert + # Cook–Torrance + # Ward anisotropic + # Hanrahan–Krueger + # Cel shading + # Gooch shading + + +class MaterialsBase(nn.Module): + """ + Base class for materials. + """ + + def __init__(self): + super(MaterialsBase, self).__init__() diff --git a/nemo/collections/multimodal/modules/nerf/renderers/__init__.py b/nemo/collections/multimodal/modules/nerf/renderers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py new file mode 100644 index 000000000000..36b78218a695 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py @@ -0,0 +1,18 @@ +import torch +import torch.nn as nn + +# TODO(ahmadki): make abstract +class BaseRenderer(nn.Module): + def __init__(self, bound, update_interval): + super().__init__() + self.bound = bound + aabb = torch.FloatTensor([-bound, -bound, -bound, bound, bound, bound]) + self.register_buffer('aabb', aabb) + self.update_interval = update_interval + + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, **kwargs): + raise NotImplementedError + + def forward(self, rays_o, rays_d, return_normal_image=False, return_normal_perturb=False, **kwargs): + raise NotImplementedError diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py new file mode 100644 index 000000000000..511908e826be --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py @@ -0,0 +1,20 @@ +import torch + +from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase + + +class BaseSDFRenderer(RendererBase): + def __init__(self, bound): + super().__init__(bound) + + # TODO(ahmadki): needs a rework + @torch.no_grad() + def get_vertices_and_triangles(self, resolution=None, S=128): + deform = torch.tanh(self.deform) / self.grid_size + + vertices, triangles = self.dmtet(self.verts + deform, self.sdf, self.indices) + + vertices = vertices.detach().cpu().numpy() + triangles = triangles.detach().cpu().numpy() + + return vertices, triangles diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py new file mode 100644 index 000000000000..a49f37dd0741 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py @@ -0,0 +1,7 @@ +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum +from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase + + +class BaseVolumeRenderer(RendererBase): + def __init__(self, bound, update_interval): + super().__init__(bound, update_interval) diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py new file mode 100644 index 000000000000..103c7b82ad23 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py @@ -0,0 +1,364 @@ +import collections +import math +from typing import Optional + +import torch +from nerfacc.estimators.occ_grid import OccGridEstimator +from nerfacc.grid import ray_aabb_intersect, traverse_grids +from nerfacc.volrend import accumulate_along_rays_, render_weight_from_density, rendering + +from nemo.collections.multimodal.modules.renderer.base_renderer import BaseRenderer + +Rays = collections.namedtuple("Rays", ("origins", "viewdirs")) + + +def namedtuple_map(fn, tup): + """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.""" + return type(tup)(*(None if x is None else fn(x) for x in tup)) + + +def render_image_with_occgrid( + # scene + nerf: torch.nn.Module, + estimator: OccGridEstimator, + rays: Rays, + # rendering options + near_plane: float = 0.0, + far_plane: float = 1e10, + render_step_size: float = 1e-3, + render_bkgd: Optional[torch.Tensor] = None, + cone_angle: float = 0.0, + alpha_thre: float = 0.0, + # test options + test_chunk_size: int = 8192, +): + """Render the pixels of an image.""" + rays_shape = rays.origins.shape + if len(rays_shape) == 3: + height, width, _ = rays_shape + num_rays = height * width + rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) + else: + num_rays, _ = rays_shape + + # TODO(ahmadki): optimize, cache result between sigma_fn and rgb_sigma_fn + def sigma_fn(t_starts, t_ends, ray_indices): + t_origins = chunk_rays.origins[ray_indices] + t_dirs = chunk_rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 + sigmas = nerf.density(positions)['sigma'] + return sigmas + + def rgb_sigma_fn(t_starts, t_ends, ray_indices): + t_origins = chunk_rays.origins[ray_indices] + t_dirs = chunk_rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 + sigmas, rgbs, normal = nerf( + positions=positions, view_dirs=None, light_dirs=t_dirs + ) # TODO(ahmadki): t_dirs is incorrect + return rgbs, sigmas + + results = [] + chunk = torch.iinfo(torch.int32).max if nerf.training else test_chunk_size + + for i in range(0, num_rays, chunk): + chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays) + ray_indices, t_starts, t_ends = estimator.sampling( + chunk_rays.origins, + chunk_rays.viewdirs, + sigma_fn=sigma_fn, + near_plane=near_plane, + far_plane=far_plane, + render_step_size=render_step_size, + stratified=nerf.training, + cone_angle=cone_angle, + alpha_thre=alpha_thre, + ) + rgb, opacity, depth, extras = rendering( + t_starts, + t_ends, + ray_indices, + n_rays=chunk_rays.origins.shape[0], + rgb_sigma_fn=rgb_sigma_fn, + render_bkgd=render_bkgd, + ) + + weight = extras["weights"] + alpha = extras["alphas"] + + chunk_results = [rgb, opacity, depth, weight, alpha, len(t_starts)] + results.append(chunk_results) + + colors, opacities, depths, weights, alphas, n_rendering_samples = [ + torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r for r in zip(*results) + ] + + return ( + colors.view((*rays_shape[:-1], -1)), + opacities.view((*rays_shape[:-1], -1)), + depths.view((*rays_shape[:-1], -1)), + weights, + alphas, + sum(n_rendering_samples), + ) + + +@torch.no_grad() +def render_image_with_occgrid_test( + max_samples: int, + # scene + nerf: torch.nn.Module, + estimator: OccGridEstimator, + rays: Rays, + # rendering options + near_plane: float = 0.0, + far_plane: float = 1e10, + render_step_size: float = 1e-3, + render_bkgd: Optional[torch.Tensor] = None, + cone_angle: float = 0.0, + alpha_thre: float = 0.0, + early_stop_eps: float = 1e-4, +): + """Render the pixels of an image.""" + rays_shape = rays.origins.shape + if len(rays_shape) == 3: + height, width, _ = rays_shape + num_rays = height * width + rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) + else: + num_rays, _ = rays_shape + + def rgb_sigma_fn(t_starts, t_ends, ray_indices): + t_origins = rays.origins[ray_indices] + t_dirs = rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts[:, None] + t_ends[:, None]) / 2.0 + sigmas, rgbs, normal = nerf( + positions=positions, view_dirs=None, light_dirs=t_dirs + ) # TODO(ahmadki): t_dirs is incorrect ? + return rgbs, sigmas + + device = rays.origins.device + opacity = torch.zeros(num_rays, 1, device=device) + depth = torch.zeros(num_rays, 1, device=device) + rgb = torch.zeros(num_rays, 3, device=device) + + ray_mask = torch.ones(num_rays, device=device).bool() + + # 1 for synthetic scenes, 4 for real scenes + min_samples = 1 if cone_angle == 0 else 4 + + iter_samples = total_samples = 0 + + rays_o = rays.origins + rays_d = rays.viewdirs + + near_planes = torch.full_like(rays_o[..., 0], fill_value=near_plane) + far_planes = torch.full_like(rays_o[..., 0], fill_value=far_plane) + + t_mins, t_maxs, hits = ray_aabb_intersect(rays_o, rays_d, estimator.aabbs) + + n_grids = estimator.binaries.size(0) + + if n_grids > 1: + t_sorted, t_indices = torch.sort(torch.cat([t_mins, t_maxs], -1), -1) + else: + t_sorted = torch.cat([t_mins, t_maxs], -1) + t_indices = torch.arange(0, n_grids * 2, device=t_mins.device, dtype=torch.int64).expand(num_rays, n_grids * 2) + + opc_thre = 1 - early_stop_eps + + while iter_samples < max_samples: + + n_alive = ray_mask.sum().item() + if n_alive == 0: + break + + # the number of samples to add on each ray + n_samples = max(min(num_rays // n_alive, 64), min_samples) + iter_samples += n_samples + + # ray marching + (intervals, samples, termination_planes) = traverse_grids( + # rays + rays_o, # [n_rays, 3] + rays_d, # [n_rays, 3] + # grids + estimator.binaries, # [m, resx, resy, resz] + estimator.aabbs, # [m, 6] + # options + near_planes, # [n_rays] + far_planes, # [n_rays] + render_step_size, + cone_angle, + n_samples, + True, + ray_mask, + # pre-compute intersections + t_sorted, # [n_rays, m*2] + t_indices, # [n_rays, m*2] + hits, # [n_rays, m] + ) + t_starts = intervals.vals[intervals.is_left] + t_ends = intervals.vals[intervals.is_right] + ray_indices = samples.ray_indices[samples.is_valid] + packed_info = samples.packed_info + + # get rgb and sigma from radiance field + rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices) + # volume rendering using native cuda scan + weights, _, alphas = render_weight_from_density( + t_starts, + t_ends, + sigmas, + ray_indices=ray_indices, + n_rays=num_rays, + prefix_trans=1 - opacity[ray_indices].squeeze(-1), + ) + if alpha_thre > 0: + vis_mask = alphas >= alpha_thre + ray_indices, rgbs, weights, t_starts, t_ends = ( + ray_indices[vis_mask], + rgbs[vis_mask], + weights[vis_mask], + t_starts[vis_mask], + t_ends[vis_mask], + ) + + accumulate_along_rays_( + weights, values=rgbs, ray_indices=ray_indices, outputs=rgb, + ) + accumulate_along_rays_( + weights, values=None, ray_indices=ray_indices, outputs=opacity, + ) + accumulate_along_rays_( + weights, values=(t_starts + t_ends)[..., None] / 2.0, ray_indices=ray_indices, outputs=depth, + ) + # update near_planes using termination planes + near_planes = termination_planes + # update rays status + ray_mask = torch.logical_and( + # early stopping + opacity.view(-1) <= opc_thre, + # remove rays that have reached the far plane + packed_info[:, 1] == n_samples, + ) + total_samples += ray_indices.shape[0] + + if render_bkgd is not None: + rgb = rgb + render_bkgd * (1.0 - opacity) + + depth = depth / opacity.clamp_min(torch.finfo(rgbs.dtype).eps) + + return ( + rgb.view((*rays_shape[:-1], -1)), + opacity.view((*rays_shape[:-1], -1)), + depth.view((*rays_shape[:-1], -1)), + weights, + alphas, + total_samples, + ) + + +class NerfaccVolumeBaseRenderer(BaseRenderer): + def __init__( + self, + bound, + grid_resolution, + grid_levels, + render_step_size=1e-3, + near_plane=0.2, + cone_angle=0.004, + alpha_thre=1e-2, + ): + + super().__init__(bound) + + self.grid_resolution = grid_resolution + self.grid_levels = grid_levels + self.render_step_size = render_step_size + self.near_plane = near_plane + self.cone_angle = cone_angle + self.alpha_thre = alpha_thre + self.nerf = None + + self.estimator = OccGridEstimator(roi_aabb=self.aabb, resolution=self.grid_resolution, levels=self.grid_levels) + + @torch.no_grad() # TODO(ahmadki) + def update_step( + self, + epoch: int, + global_step: int, + update_interval: int = 16, + decay: float = 0.95, + occ_thre: float = 0.01, + warmup_steps: int = 256, + **kwargs + ): + def occ_eval_fn(x): + density = self.nerf.forward_density(x) + return density * self.render_step_size + + self.estimator.update_every_n_steps( + step=global_step, + occ_eval_fn=occ_eval_fn, + occ_thre=occ_thre, + ema_decay=decay, + warmup_steps=warmup_steps, + n=update_interval, + ) + + def forward(self, rays_o, rays_d, mvp, h, w, staged=False, max_ray_batch=4096, step=None, **kwargs): + return self._render(rays_o=rays_o, rays_d=rays_d, step=step, **kwargs) + + def _render( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading='albedo', + bg_color=None, + perturb=False, + T_thresh=1e-4, + binarize=False, + step=None, + **kwargs + ): + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # N = B * N, in fact + + rays = Rays(origins=rays_o, viewdirs=rays_d) + + if self.training: + rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid( + nerf=self.nerf, + estimator=self.estimator, + rays=rays, + near_plane=self.near_plane, + render_step_size=self.render_step_size, + render_bkgd=bg_color, + cone_angle=self.cone_angle, + alpha_thre=self.alpha_thre, + ) + else: + rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid_test( + max_samples=1024, + nerf=self.nerf, + estimator=self.estimator, + rays=rays, + near_plane=self.near_plane, + render_step_size=self.render_step_size, + render_bkgd=bg_color, + cone_angle=self.cone_angle, + alpha_thre=self.alpha_thre, + ) + + results = {} + results['weights'] = weights + results['image'] = rgb.view(1, -1, 3) + results['depth'] = depth.view(1, -1) + results['weights_sum'] = acc.view(1, -1) + + return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py new file mode 100644 index 000000000000..9b23e1db890c --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py @@ -0,0 +1,222 @@ +import math + +import numpy as np +import nvdiffrast.torch as dr +import torch +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.nerf.geometry.dmtet import DeepMarchingTetrahedra +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum +from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer + + +# TODO: self.density_thresh, self.mean_density need a rework, they can be infered at run time +# and shouldn't be loaded from the checkpoint +class NVDiffRastRenderer(BaseRenderer): + def __init__(self, bound, update_interval, grid_resolution, density_thresh, quartet_file): + + super().__init__(bound, update_interval) + + self.grid_resolution = grid_resolution + self.density_thresh = density_thresh + self.quartet_file = quartet_file + + self.cascade = 1 + math.ceil(math.log2(bound)) + density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] + density_bitfield = torch.zeros( + self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 + ) # [CAS * H * H * H // 8] + self.register_buffer('density_grid', density_grid) + self.register_buffer('density_bitfield', density_bitfield) + self.mean_density = 0 + self.iter_density = 0 + + # load dmtet vertices + # TODO(ahmadki): hard coded devices + tets = np.load(quartet_file) + self.verts = -torch.tensor(tets['vertices'], dtype=torch.float32, device='cuda') * 2 # covers [-1, 1] + self.indices = torch.tensor(tets['indices'], dtype=torch.long, device='cuda') + self.tet_scale = torch.tensor([1, 1, 1], dtype=torch.float32, device='cuda') + self.dmtet = DeepMarchingTetrahedra(device='cuda') + + # vert sdf and deform + sdf = torch.nn.Parameter(torch.zeros_like(self.verts[..., 0]), requires_grad=True) + self.register_parameter('sdf', sdf) + deform = torch.nn.Parameter(torch.zeros_like(self.verts), requires_grad=True) + self.register_parameter('deform', deform) + + edges = torch.tensor( + [0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device="cuda" + ) # six edges for each tetrahedron. + all_edges = self.indices[:, edges].reshape(-1, 2) # [M * 6, 2] + all_edges_sorted = torch.sort(all_edges, dim=1)[0] + self.all_edges = torch.unique(all_edges_sorted, dim=0) + + self.initialized = False # TODO(ahmadki): not a good approach + + self.glctx = dr.RasterizeCudaContext() + + # TODO(ahmadki): not a good approach + self.nerf = None + self.material = None + self.background = None + + # TODO(ahmkadi): doesn't look good to me !! + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): + pass + + @torch.no_grad() + def init_tet(self): + # TODO(ahmadki): a better approach would be to have a global nerf representation (mesh) that + # we can init the tets from. this would work with checkpoints. + + # TODO(ahmadki): a placeholder, but it works for now + self.mean_density = 300 + density_thresh = min(self.mean_density, self.density_thresh) + + if self.nerf.density_activation == DensityActivationEnum.SOFTPLUS: + density_thresh = density_thresh * 25 + + # Get initial sigma + sigma = self.nerf.forward_density(positions=self.verts) + mask = sigma > density_thresh + valid_verts = self.verts[mask] + self.tet_scale = valid_verts.abs().amax(dim=0) + 1e-1 + + # Scale vertices + self.verts = self.verts * self.tet_scale + + # get sigma using the scaled vertices + sigma = self.nerf.forward_density(positions=self.verts) + self.sdf.data += (sigma - density_thresh).clamp(-1, 1) + + def forward( + self, + rays_o, + rays_d, + mvp, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + **kwargs + ): + if not self.initialized: + self.init_tet() + self.initialized = True + return self._render( + rays_o=rays_o, + rays_d=rays_d, + mvp=mvp, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + return_normal_image=return_normal_image, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + **kwargs + ) + + def _render( + self, + rays_o, + rays_d, + mvp, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + **kwargs + ): + # mvp: [B, 4, 4] + B, H, W, _ = rays_o.shape + + # TODO(ahmadki): move to dataset + # random sample light_d if not provided + if light_d is None: + # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) + light_d = rays_o + torch.randn(3, device=rays_o.device) + light_d = F.normalize(light_d) + + results = {} + + # get mesh + deform = torch.tanh(self.deform) / self.grid_resolution + + verts, faces = self.dmtet(self.verts + deform, self.sdf, self.indices) + + # get normals + i0, i1, i2 = faces[:, 0], faces[:, 1], faces[:, 2] + v0, v1, v2 = verts[i0, :], verts[i1, :], verts[i2, :] + + faces = faces.int() + + face_normals = torch.cross(v1 - v0, v2 - v0) + face_normals = F.normalize(face_normals) + + vn = torch.zeros_like(verts) + vn.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals) + vn.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals) + vn.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals) + + vn = torch.where( + torch.sum(vn * vn, -1, keepdim=True) > 1e-20, + vn, + torch.tensor([0.0, 0.0, 1.0], dtype=torch.float32, device=vn.device), + ) + + # rasterization + verts_clip = torch.bmm( + F.pad(verts, pad=(0, 1), mode='constant', value=1.0).unsqueeze(0).repeat(mvp.shape[0], 1, 1), + mvp.permute(0, 2, 1), + ).float() # [B, N, 4] + rast, _ = dr.rasterize(self.glctx, verts_clip, faces, (H, W)) + + alpha = (rast[..., 3:] > 0).float() + xyzs, _ = dr.interpolate(verts.unsqueeze(0), rast, faces) # [B, H, W, 3] + normal, _ = dr.interpolate(vn.unsqueeze(0).contiguous(), rast, faces) + normal = F.normalize(normal) + + xyzs = xyzs.view(-1, 3) + mask = (rast[..., 3:] > 0).view(-1).detach() + + # do the lighting here since we have normal from mesh now. + albedo = torch.zeros_like(xyzs, dtype=torch.float32) + if mask.any(): + masked_albedo = self.nerf.forward_features(positions=xyzs[mask]) + albedo[mask] = masked_albedo.float() + albedo = albedo.view(B, H, W, 3) + fg_color = self.material( + albedo=albedo, normals=normal, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type + ) + + fg_color = dr.antialias(fg_color, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 3] + alpha = dr.antialias(alpha, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 1] + + # mix background color + bg_color = self.background(rays_d=rays_d) # [N, 3] + + depth = rast[:, :, :, [2]] # [B, H, W] + color = fg_color + (1 - alpha) * bg_color + + results['depth'] = depth + results['image'] = color + if return_normal_image: + results['normal_image'] = dr.antialias((normal + 1) / 2, rast, verts_clip, faces).clamp( + 0, 1 + ) # [B, H, W, 3] + if return_vertices: + results['vertices'] = verts + if return_faces: + results['faces'] = faces + if return_faces_normals: + results['face_normals'] = face_normals + return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py new file mode 100644 index 000000000000..46096857a773 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py @@ -0,0 +1,275 @@ +import math + +import torch +import torch.nn.functional as F + +import nemo.collections.multimodal.modules.nerf.utils.torch_ngp.raymarching as raymarching +from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum +from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer + + +class TorchNGPVolumeRenderer(BaseRenderer): + def __init__(self, bound, update_interval, grid_resolution, density_thresh, max_steps, dt_gamma): + + super().__init__(bound, update_interval) + + self.cascade = 1 + math.ceil(math.log2(bound)) + self.grid_resolution = grid_resolution + self.density_thresh = density_thresh + self.dt_gamma = dt_gamma + self.max_steps = max_steps + + # density grid + # TODO(ahmadki): needs rework + density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] + density_bitfield = torch.zeros( + self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 + ) # [CAS * H * H * H // 8] + self.register_buffer('density_grid', density_grid) + self.register_buffer('density_bitfield', density_bitfield) + self.mean_density = 0 + self.iter_density = 0 + + # TODO(ahmadki): needs rework + self.nerf = None + self.material = None + self.background = None + + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): + if global_step % self.update_interval != 0: + return + + ### update density grid + tmp_grid = -torch.ones_like(self.density_grid) + + X = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + Y = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + Z = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing='ij') + coords = torch.cat( + [xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1 + ) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + xyzs = 2 * coords.float() / (self.grid_resolution - 1) - 1 # [N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_resolution = bound / self.grid_resolution + # scale to current cascade's resolution + cas_xyzs = xyzs * (bound - half_grid_resolution) + # add noise in [-hgs, hgs] + cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_resolution + # query density + density = self.nerf.forward_density(cas_xyzs).reshape(-1).detach() + # assign + tmp_grid[cas, indices] = density + # ema update + valid_mask = self.density_grid >= 0 + self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) + self.mean_density = torch.mean(self.density_grid[valid_mask]).item() + self.iter_density += 1 + + # convert to bitfield + density_thresh = min(self.mean_density, self.density_thresh) + self.density_bitfield = raymarching.packbits(self.density_grid, density_thresh, self.density_bitfield) + + def forward( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_normal_perturb=False, + **kwargs + ): + return self._render( + rays_o=rays_o, + rays_d=rays_d, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + **kwargs + ) + + # TODO(ahmadki): return_normal_image is always False ? + def _render( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_normal_perturb=False, + perturb=False, + T_thresh=1e-4, + binarize=False, + **kwargs + ): + # rays_o, rays_d: [B, H, W, 3] + B, H, W, _ = rays_o.shape + + # group all rays into a single batch + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + num_rays = rays_o.shape[0] # num_rays = B * H * W + + # pre-calculate near far + nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb) + + # random sample light_d if not provided + # TODO(ahmadki): move to dataset + if light_d is None: + # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) + light_d = rays_o + torch.randn(3, device=rays_o.device) + light_d = F.normalize(light_d) + + normal_image = None + normals_perturb = None + weights = None + + if self.training: + positions, dirs, ts, rays = raymarching.march_rays_train( + rays_o, + rays_d, + self.bound, + self.density_bitfield, + self.cascade, + self.grid_resolution, + nears, + fars, + perturb, + self.dt_gamma, + self.max_steps, + ) + dirs = F.normalize(dirs) + + if light_d.shape[0] > 1: + flatten_rays = raymarching.flatten_rays(rays, positions.shape[0]).long() + light_d = light_d[flatten_rays] + + return_normal = (shading_type is not None) or return_normal_image + sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) + + fg_color = self.material( + albedo=albedo, normals=normals, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type + ) + + weights, opacity, depth, image = raymarching.composite_rays_train( + sigmas, fg_color, ts, rays, T_thresh, binarize + ) + + if return_normal_image and normals is not None: + _, _, _, normal_image = raymarching.composite_rays_train( + sigmas.detach(), (normals + 1) / 2, ts, rays, T_thresh, binarize + ) + + if return_normal_perturb: + perturb_positions = positions + torch.randn_like(positions) * 1e-2 + normals_perturb = self.normal(positions=perturb_positions) + + else: + # allocate tensors + image = torch.zeros(num_rays, 3, device=rays_o.device) + depth = torch.zeros(num_rays, device=rays_o.device) + opacity = torch.zeros(num_rays, device=rays_o.device) + + n_alive = num_rays + rays_alive = torch.arange(n_alive, dtype=torch.int32, device=rays_o.device) + rays_t = nears.clone() + + step = 0 + + while step < self.max_steps: # hard coded max step + # count alive rays + n_alive = rays_alive.shape[0] + + # exit loop + if n_alive <= 0: + break + + # decide compact_steps + n_step = max(min(num_rays // n_alive, 8), 1) + + positions, dirs, ts = raymarching.march_rays( + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + self.bound, + self.density_bitfield, + self.cascade, + self.grid_resolution, + nears, + fars, + perturb if step == 0 else False, + self.dt_gamma, + self.max_steps, + ) + dirs = F.normalize(dirs) + + return_normal = shading_type not in [None, ShadingEnum.TEXTURELESS] + sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) + + fg_color = self.material( + albedo=albedo, + normals=normals, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + ) + raymarching.composite_rays( + n_alive, + n_step, + rays_alive, + rays_t, + sigmas, + fg_color, + ts, + opacity, + depth, + image, + T_thresh, + binarize, + ) + + # TODO(ahmadki): add optoin to return normal_image, like in training + + rays_alive = rays_alive[rays_alive >= 0] + + step += n_step + + # mix background color + bg_color = self.background(rays_d) # [N, 3] + image = image + (1 - opacity).unsqueeze(-1) * bg_color + + results = { + "image": image.view(B, H, W, 3), + "depth": depth.view(B, H, W, 1), + "opacity": opacity.view(B, H, W, 1), + "dirs": dirs, + } + if normals is not None: + results["normals"] = normals + if weights is not None: + results["weights"] = weights + if normal_image is not None: + results["normal_image"] = normal_image.view(B, H, W, 3) + if normals_perturb is not None: + results["normal_perturb"] = normals_perturb + + return results diff --git a/nemo/collections/multimodal/modules/nerf/utils/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/utils/activation.py b/nemo/collections/multimodal/modules/nerf/utils/activation.py new file mode 100644 index 000000000000..fa7f3c60829a --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/activation.py @@ -0,0 +1,20 @@ +import torch +from torch.autograd import Function +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _trunc_exp(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float) + def forward(ctx, x): + ctx.save_for_backward(x) + return torch.exp(x) + + @staticmethod + @custom_bwd + def backward(ctx, g): + x = ctx.saved_tensors[0] + return g * torch.exp(x.clamp(max=15)) + + +trunc_exp = _trunc_exp.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py new file mode 100644 index 000000000000..3d2c1e8c74e6 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py @@ -0,0 +1,137 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class FreqEncoder_torch(nn.Module): + def __init__( + self, + input_dim, + max_freq_log2, + N_freqs, + log_sampling=True, + include_input=True, + periodic_fns=(torch.sin, torch.cos), + ): + + super().__init__() + + self.input_dim = input_dim + self.include_input = include_input + self.periodic_fns = periodic_fns + self.N_freqs = N_freqs + + self.output_dim = 0 + if self.include_input: + self.output_dim += self.input_dim + + self.output_dim += self.input_dim * N_freqs * len(self.periodic_fns) + + if log_sampling: + self.freq_bands = 2 ** torch.linspace(0, max_freq_log2, N_freqs) + else: + self.freq_bands = torch.linspace(2 ** 0, 2 ** max_freq_log2, N_freqs) + + self.freq_bands = self.freq_bands.numpy().tolist() + + def forward(self, input, max_level=None, **kwargs): + + if max_level is None: + max_level = self.N_freqs + else: + max_level = int(max_level * self.N_freqs) + + out = [] + if self.include_input: + out.append(input) + + for i in range(max_level): + freq = self.freq_bands[i] + for p_fn in self.periodic_fns: + out.append(p_fn(input * freq)) + + # append 0 + if self.N_freqs - max_level > 0: + out.append( + torch.zeros( + input.shape[0], + (self.N_freqs - max_level) * 2 * input.shape[1], + device=input.device, + dtype=input.dtype, + ) + ) + + out = torch.cat(out, dim=-1) + + return out + + +def get_encoder( + encoder_type, + input_dim=3, + multires=6, + degree=4, + num_levels=16, + level_dim=2, + base_resolution=16, + log2_hashmap_size=19, + desired_resolution=2048, + align_corners=False, + interpolation='linear', + **kwargs +): + + if encoder_type is None: + return lambda x, **kwargs: x, input_dim + + elif encoder_type == 'frequency_torch': + encoder = FreqEncoder_torch( + input_dim=input_dim, max_freq_log2=multires - 1, N_freqs=multires, log_sampling=True + ) + + elif encoder_type == 'frequency': # CUDA implementation, faster than torch. + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.freqencoder import FreqEncoder + + encoder = FreqEncoder(input_dim=input_dim, degree=multires) + + elif encoder_type == 'sphere_harmonics': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.shencoder import SHEncoder + + encoder = SHEncoder(input_dim=input_dim, degree=degree) + + elif encoder_type == 'hashgrid': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder + + encoder = GridEncoder( + input_dim=input_dim, + num_levels=num_levels, + level_dim=level_dim, + base_resolution=base_resolution, + log2_hashmap_size=log2_hashmap_size, + desired_resolution=desired_resolution, + gridtype='hash', + align_corners=align_corners, + interpolation=interpolation, + ) + + elif encoder_type == 'tiledgrid': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder + + encoder = GridEncoder( + input_dim=input_dim, + num_levels=num_levels, + level_dim=level_dim, + base_resolution=base_resolution, + log2_hashmap_size=log2_hashmap_size, + desired_resolution=desired_resolution, + gridtype='tiled', + align_corners=align_corners, + interpolation=interpolation, + ) + + else: + raise NotImplementedError( + 'Unknown encoder type, choose from [None, frequency, sphere_harmonics, hashgrid, tiledgrid]' + ) + + return encoder, encoder.output_dim diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py new file mode 100644 index 000000000000..1c217f9c8b7d --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py @@ -0,0 +1,73 @@ +import _freqencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _freq_encoder(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision + def forward(ctx, inputs, degree, output_dim): + # inputs: [B, input_dim], float + # RETURN: [B, F], float + + if not inputs.is_cuda: + inputs = inputs.cuda() + inputs = inputs.contiguous() + + B, input_dim = inputs.shape # batch size, coord dim + + outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) + + _backend.freq_encode_forward(inputs, B, input_dim, degree, output_dim, outputs) + + ctx.save_for_backward(inputs, outputs) + ctx.dims = [B, input_dim, degree, output_dim] + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + # grad: [B, C * C] + + grad = grad.contiguous() + inputs, outputs = ctx.saved_tensors + B, input_dim, degree, output_dim = ctx.dims + + grad_inputs = torch.zeros_like(inputs) + _backend.freq_encode_backward(grad, outputs, B, input_dim, degree, output_dim, grad_inputs) + + return grad_inputs, None, None + + +freq_encode = _freq_encoder.apply + + +class FreqEncoder(nn.Module): + def __init__(self, input_dim=3, degree=4): + super().__init__() + + self.input_dim = input_dim + self.degree = degree + self.output_dim = input_dim + input_dim * 2 * degree + + def __repr__(self): + return f"FreqEncoder: input_dim={self.input_dim} degree={self.degree} output_dim={self.output_dim}" + + def forward(self, inputs, **kwargs): + # inputs: [..., input_dim] + # return: [..., ] + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.reshape(-1, self.input_dim) + + outputs = freq_encode(inputs, self.degree, self.output_dim) + + outputs = outputs.reshape(prefix_shape + [self.output_dim]) + + return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py new file mode 100644 index 000000000000..1a7487aee6f3 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py @@ -0,0 +1,287 @@ +import math + +import _gridencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + +_gridtype_to_id = { + 'hash': 0, + 'tiled': 1, +} + +_interp_to_id = { + 'linear': 0, + 'smoothstep': 1, +} + + +class _grid_encode(Function): + @staticmethod + @custom_fwd + def forward( + ctx, + inputs, + embeddings, + offsets, + per_level_scale, + base_resolution, + calc_grad_inputs=False, + gridtype=0, + align_corners=False, + interpolation=0, + max_level=None, + ): + # inputs: [B, D], float in [0, 1] + # embeddings: [sO, C], float + # offsets: [L + 1], int + # RETURN: [B, F], float + + inputs = inputs.contiguous() + + B, D = inputs.shape # batch size, coord dim + L = offsets.shape[0] - 1 # level + C = embeddings.shape[1] # embedding dim for each level + S = np.log2(per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f + H = base_resolution # base resolution + + max_level = L if max_level is None else max(min(int(math.ceil(max_level * L)), L), 1) + + # manually handle autocast (only use half precision embeddings, inputs must be float for enough precision) + # if C % 2 != 0, force float, since half for atomicAdd is very slow. + if torch.is_autocast_enabled() and C % 2 == 0: + embeddings = embeddings.to(torch.half) + + # L first, optimize cache for cuda kernel, but needs an extra permute later + outputs = torch.empty(L, B, C, device=inputs.device, dtype=embeddings.dtype) + + # zero init if we only calculate partial levels + if max_level < L: + outputs.zero_() + + if calc_grad_inputs: + dy_dx = torch.empty(B, L * D * C, device=inputs.device, dtype=embeddings.dtype) + if max_level < L: + dy_dx.zero_() + else: + dy_dx = None + + _backend.grid_encode_forward( + inputs, + embeddings, + offsets, + outputs, + B, + D, + C, + L, + max_level, + S, + H, + dy_dx, + gridtype, + align_corners, + interpolation, + ) + + # permute back to [B, L * C] + outputs = outputs.permute(1, 0, 2).reshape(B, L * C) + + ctx.save_for_backward(inputs, embeddings, offsets, dy_dx) + ctx.dims = [B, D, C, L, S, H, gridtype, interpolation, max_level] + ctx.align_corners = align_corners + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + + inputs, embeddings, offsets, dy_dx = ctx.saved_tensors + B, D, C, L, S, H, gridtype, interpolation, max_level = ctx.dims + align_corners = ctx.align_corners + + # grad: [B, L * C] --> [L, B, C] + grad = grad.view(B, L, C).permute(1, 0, 2).contiguous() + + grad_embeddings = torch.zeros_like(embeddings) + + if dy_dx is not None: + grad_inputs = torch.zeros_like(inputs, dtype=embeddings.dtype) + else: + grad_inputs = None + + _backend.grid_encode_backward( + grad, + inputs, + embeddings, + offsets, + grad_embeddings, + B, + D, + C, + L, + max_level, + S, + H, + dy_dx, + grad_inputs, + gridtype, + align_corners, + interpolation, + ) + + if dy_dx is not None: + grad_inputs = grad_inputs.to(inputs.dtype) + + return grad_inputs, grad_embeddings, None, None, None, None, None, None, None, None + + +grid_encode = _grid_encode.apply + + +class GridEncoder(nn.Module): + def __init__( + self, + input_dim=3, + num_levels=16, + level_dim=2, + per_level_scale=2, + base_resolution=16, + log2_hashmap_size=19, + desired_resolution=None, + gridtype='hash', + align_corners=False, + interpolation='linear', + ): + super().__init__() + + # the finest resolution desired at the last level, if provided, overridee per_level_scale + if desired_resolution is not None: + per_level_scale = np.exp2(np.log2(desired_resolution / base_resolution) / (num_levels - 1)) + + self.input_dim = input_dim # coord dims, 2 or 3 + self.num_levels = num_levels # num levels, each level multiply resolution by 2 + self.level_dim = level_dim # encode channels per level + self.per_level_scale = per_level_scale # multiply resolution by this scale at each level. + self.log2_hashmap_size = log2_hashmap_size + self.base_resolution = base_resolution + self.output_dim = num_levels * level_dim + self.gridtype = gridtype + self.gridtype_id = _gridtype_to_id[gridtype] # "tiled" or "hash" + self.interpolation = interpolation + self.interp_id = _interp_to_id[interpolation] # "linear" or "smoothstep" + self.align_corners = align_corners + + # allocate parameters + offsets = [] + offset = 0 + self.max_params = 2 ** log2_hashmap_size + for i in range(num_levels): + resolution = int(np.ceil(base_resolution * per_level_scale ** i)) + params_in_level = min(self.max_params, (resolution) ** input_dim) # limit max number + params_in_level = int(np.ceil(params_in_level / 8) * 8) # make divisible + offsets.append(offset) + offset += params_in_level + offsets.append(offset) + offsets = torch.from_numpy(np.array(offsets, dtype=np.int32)) + self.register_buffer('offsets', offsets) + + self.n_params = offsets[-1] * level_dim + + # parameters + self.embeddings = nn.Parameter(torch.empty(offset, level_dim)) + + self.reset_parameters() + + def reset_parameters(self): + std = 1e-4 + self.embeddings.data.uniform_(-std, std) + + def __repr__(self): + return f"GridEncoder: input_dim={self.input_dim} num_levels={self.num_levels} level_dim={self.level_dim} resolution={self.base_resolution} -> {int(round(self.base_resolution * self.per_level_scale ** (self.num_levels - 1)))} per_level_scale={self.per_level_scale:.4f} params={tuple(self.embeddings.shape)} gridtype={self.gridtype} align_corners={self.align_corners} interpolation={self.interpolation}" + + def forward(self, inputs, bound=1, max_level=None): + # inputs: [..., input_dim], normalized real world positions in [-bound, bound] + # max_level: only calculate first max_level levels (None will use all levels) + # return: [..., num_levels * level_dim] + + inputs = (inputs + bound) / (2 * bound) # map to [0, 1] + + # print('inputs', inputs.shape, inputs.dtype, inputs.min().item(), inputs.max().item()) + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.view(-1, self.input_dim) + + outputs = grid_encode( + inputs, + self.embeddings, + self.offsets, + self.per_level_scale, + self.base_resolution, + inputs.requires_grad, + self.gridtype_id, + self.align_corners, + self.interp_id, + max_level, + ) + outputs = outputs.view(prefix_shape + [self.output_dim]) + + # print('outputs', outputs.shape, outputs.dtype, outputs.min().item(), outputs.max().item()) + + return outputs + + # always run in float precision! + @torch.cuda.amp.autocast(enabled=False) + def grad_total_variation(self, weight=1e-7, inputs=None, bound=1, B=1000000): + # inputs: [..., input_dim], float in [-b, b], location to calculate TV loss. + + D = self.input_dim + C = self.embeddings.shape[1] # embedding dim for each level + L = self.offsets.shape[0] - 1 # level + S = np.log2(self.per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f + H = self.base_resolution # base resolution + + if inputs is None: + # randomized in [0, 1] + inputs = torch.rand(B, self.input_dim, device=self.embeddings.device) + else: + inputs = (inputs + bound) / (2 * bound) # map to [0, 1] + inputs = inputs.view(-1, self.input_dim) + B = inputs.shape[0] + + if self.embeddings.grad is None: + raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') + + _backend.grad_total_variation( + inputs, + self.embeddings, + self.embeddings.grad, + self.offsets, + weight, + B, + D, + C, + L, + S, + H, + self.gridtype_id, + self.align_corners, + ) + + @torch.cuda.amp.autocast(enabled=False) + def grad_weight_decay(self, weight=0.1): + # level-wise meaned weight decay (ref: zip-nerf) + + B = self.embeddings.shape[0] # size of embedding + C = self.embeddings.shape[1] # embedding dim for each level + L = self.offsets.shape[0] - 1 # level + + if self.embeddings.grad is None: + raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') + + _backend.grad_weight_decay(self.embeddings, self.embeddings.grad, self.offsets, weight, B, C, L) diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py new file mode 100644 index 000000000000..c0f15641fa5a --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py @@ -0,0 +1,551 @@ +import time + +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.cuda.amp import custom_bwd, custom_fwd + +# lazy building: +# `import raymarching` will not immediately build the extension, only if you actually call any functions. + +BACKEND = None + + +def get_backend(): + global BACKEND + + if BACKEND is None: + try: + import _raymarching as _backend + except ImportError: + from .backend import _backend + + BACKEND = _backend + + return BACKEND + + +# ---------------------------------------- +# utils +# ---------------------------------------- + + +class _near_far_from_aabb(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, aabb, min_near=0.2): + ''' near_far_from_aabb, CUDA implementation + Calculate rays' intersection time (near and far) with aabb + Args: + rays_o: float, [N, 3] + rays_d: float, [N, 3] + aabb: float, [6], (xmin, ymin, zmin, xmax, ymax, zmax) + min_near: float, scalar + Returns: + nears: float, [N] + fars: float, [N] + ''' + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + nears = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + fars = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().near_far_from_aabb(rays_o, rays_d, aabb, N, min_near, nears, fars) + + return nears, fars + + +near_far_from_aabb = _near_far_from_aabb.apply + + +class _sph_from_ray(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, radius): + ''' sph_from_ray, CUDA implementation + get spherical coordinate on the background sphere from rays. + Assume rays_o are inside the Sphere(radius). + Args: + rays_o: [N, 3] + rays_d: [N, 3] + radius: scalar, float + Return: + coords: [N, 2], in [-1, 1], theta and phi on a sphere. (further-surface) + ''' + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + coords = torch.empty(N, 2, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().sph_from_ray(rays_o, rays_d, radius, N, coords) + + return coords + + +sph_from_ray = _sph_from_ray.apply + + +class _morton3D(Function): + @staticmethod + def forward(ctx, coords): + ''' morton3D, CUDA implementation + Args: + coords: [N, 3], int32, in [0, 128) (for some reason there is no uint32 tensor in torch...) + TODO: check if the coord range is valid! (current 128 is safe) + Returns: + indices: [N], int32, in [0, 128^3) + + ''' + if not coords.is_cuda: + coords = coords.cuda() + + N = coords.shape[0] + + indices = torch.empty(N, dtype=torch.int32, device=coords.device) + + get_backend().morton3D(coords.int(), N, indices) + + return indices + + +morton3D = _morton3D.apply + + +class _morton3D_invert(Function): + @staticmethod + def forward(ctx, indices): + ''' morton3D_invert, CUDA implementation + Args: + indices: [N], int32, in [0, 128^3) + Returns: + coords: [N, 3], int32, in [0, 128) + + ''' + if not indices.is_cuda: + indices = indices.cuda() + + N = indices.shape[0] + + coords = torch.empty(N, 3, dtype=torch.int32, device=indices.device) + + get_backend().morton3D_invert(indices.int(), N, coords) + + return coords + + +morton3D_invert = _morton3D_invert.apply + + +class _packbits(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, grid, thresh, bitfield=None): + ''' packbits, CUDA implementation + Pack up the density grid into a bit field to accelerate ray marching. + Args: + grid: float, [C, H * H * H], assume H % 2 == 0 + thresh: float, threshold + Returns: + bitfield: uint8, [C, H * H * H / 8] + ''' + if not grid.is_cuda: + grid = grid.cuda() + grid = grid.contiguous() + + C = grid.shape[0] + H3 = grid.shape[1] + N = C * H3 // 8 + + if bitfield is None: + bitfield = torch.empty(N, dtype=torch.uint8, device=grid.device) + + get_backend().packbits(grid, N, thresh, bitfield) + + return bitfield + + +packbits = _packbits.apply + + +class _flatten_rays(Function): + @staticmethod + def forward(ctx, rays, M): + ''' flatten rays + Args: + rays: [N, 2], all rays' (point_offset, point_count), + M: scalar, int, count of points (we cannot get this info from rays unfortunately...) + Returns: + res: [M], flattened ray index. + ''' + if not rays.is_cuda: + rays = rays.cuda() + rays = rays.contiguous() + + N = rays.shape[0] + + res = torch.zeros(M, dtype=torch.int, device=rays.device) + + get_backend().flatten_rays(rays, N, M, res) + + return res + + +flatten_rays = _flatten_rays.apply + +# ---------------------------------------- +# train functions +# ---------------------------------------- + + +class _march_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward( + ctx, + rays_o, + rays_d, + bound, + density_bitfield, + C, + H, + nears, + fars, + perturb=False, + dt_gamma=0, + max_steps=1024, + contract=False, + ): + ''' march rays to generate points (forward only) + Args: + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + step_counter: int32, (2), used to count the actual number of generated points. + mean_count: int32, estimated mean steps to accelerate training. (but will randomly drop rays if the actual point count exceeded this threshold.) + perturb: bool + align: int, pad output so its size is dividable by align, set to -1 to disable. + force_all_rays: bool, ignore step_counter and mean_count, always calculate all rays. Useful if rendering the whole image, instead of some rays. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [M, 3], all generated points' coords. (all rays concated, need to use `rays` to extract points belonging to each ray) + dirs: float, [M, 3], all generated points' view dirs. + ts: float, [M, 2], all generated points' ts. + rays: int32, [N, 2], all rays' (point_offset, point_count), e.g., xyzs[rays[i, 0]:(rays[i, 0] + rays[i, 1])] --> points belonging to rays[i, 0] + ''' + + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + if not density_bitfield.is_cuda: + density_bitfield = density_bitfield.cuda() + + rays_o = rays_o.float().contiguous().view(-1, 3) + rays_d = rays_d.float().contiguous().view(-1, 3) + density_bitfield = density_bitfield.contiguous() + + N = rays_o.shape[0] # num rays + + step_counter = torch.zeros(1, dtype=torch.int32, device=rays_o.device) # point counter, ray counter + + if perturb: + noises = torch.rand(N, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(N, dtype=rays_o.dtype, device=rays_o.device) + + # first pass: write rays, get total number of points M to render + rays = torch.empty(N, 2, dtype=torch.int32, device=rays_o.device) # id, offset, num_steps + get_backend().march_rays_train( + rays_o, + rays_d, + density_bitfield, + bound, + contract, + dt_gamma, + max_steps, + N, + C, + H, + nears, + fars, + None, + None, + None, + rays, + step_counter, + noises, + ) + + # allocate based on M + M = step_counter.item() + # print(M, N) + # print(rays[:, 0].max()) + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) + + # second pass: write outputs + get_backend().march_rays_train( + rays_o, + rays_d, + density_bitfield, + bound, + contract, + dt_gamma, + max_steps, + N, + C, + H, + nears, + fars, + xyzs, + dirs, + ts, + rays, + step_counter, + noises, + ) + + return xyzs, dirs, ts, rays + + +march_rays_train = _march_rays_train.apply + + +class _composite_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, ts, rays, T_thresh=1e-4, binarize=False): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ts: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights: float, [M] + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.float().contiguous() + rgbs = rgbs.float().contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights = torch.zeros(M, dtype=sigmas.dtype, device=sigmas.device) # may leave unmodified, so init with 0 + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + get_backend().composite_rays_train_forward( + sigmas, rgbs, ts, rays, M, N, T_thresh, binarize, weights, weights_sum, depth, image + ) + + ctx.save_for_backward(sigmas, rgbs, ts, rays, weights_sum, depth, image) + ctx.dims = [M, N, T_thresh, binarize] + + return weights, weights_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights, grad_weights_sum, grad_depth, grad_image): + + grad_weights = grad_weights.contiguous() + grad_weights_sum = grad_weights_sum.contiguous() + grad_depth = grad_depth.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, ts, rays, weights_sum, depth, image = ctx.saved_tensors + M, N, T_thresh, binarize = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + + get_backend().composite_rays_train_backward( + grad_weights, + grad_weights_sum, + grad_depth, + grad_image, + sigmas, + rgbs, + ts, + rays, + weights_sum, + depth, + image, + M, + N, + T_thresh, + binarize, + grad_sigmas, + grad_rgbs, + ) + + return grad_sigmas, grad_rgbs, None, None, None, None + + +composite_rays_train = _composite_rays_train.apply + +# ---------------------------------------- +# infer functions +# ---------------------------------------- + + +class _march_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward( + ctx, + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + bound, + density_bitfield, + C, + H, + near, + far, + perturb=False, + dt_gamma=0, + max_steps=1024, + contract=False, + ): + ''' march rays to generate points (forward only, for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [N], the alive rays' IDs in N (N >= n_alive, but we only use first n_alive) + rays_t: float, [N], the alive rays' time, we only use the first n_alive. + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + align: int, pad output so its size is dividable by align, set to -1 to disable. + perturb: bool/int, int > 0 is used as the random seed. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [n_alive * n_step, 3], all generated points' coords + dirs: float, [n_alive * n_step, 3], all generated points' view dirs. + ts: float, [n_alive * n_step, 2], all generated points' ts + ''' + + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.float().contiguous().view(-1, 3) + rays_d = rays_d.float().contiguous().view(-1, 3) + + M = n_alive * n_step + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) # 2 vals, one for rgb, one for depth + + if perturb: + # torch.manual_seed(perturb) # test_gui uses spp index as seed + noises = torch.rand(n_alive, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(n_alive, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().march_rays( + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + bound, + contract, + dt_gamma, + max_steps, + C, + H, + density_bitfield, + near, + far, + xyzs, + dirs, + ts, + noises, + ) + + return xyzs, dirs, ts + + +march_rays = _march_rays.apply + + +class _composite_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward( + ctx, + n_alive, + n_step, + rays_alive, + rays_t, + sigmas, + rgbs, + ts, + weights_sum, + depth, + image, + T_thresh=1e-2, + binarize=False, + ): + ''' composite rays' rgbs, according to the ray marching formula. (for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [n_alive], the alive rays' IDs in N (N >= n_alive) + rays_t: float, [N], the alive rays' time + sigmas: float, [n_alive * n_step,] + rgbs: float, [n_alive * n_step, 3] + ts: float, [n_alive * n_step, 2] + In-place Outputs: + weights_sum: float, [N,], the alpha channel + depth: float, [N,], the depth value + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + sigmas = sigmas.float().contiguous() + rgbs = rgbs.float().contiguous() + get_backend().composite_rays( + n_alive, n_step, T_thresh, binarize, rays_alive, rays_t, sigmas, rgbs, ts, weights_sum, depth, image + ) + return tuple() + + +composite_rays = _composite_rays.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py new file mode 100644 index 000000000000..a97332089e52 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py @@ -0,0 +1,82 @@ +import _shencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _sh_encoder(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision + def forward(ctx, inputs, degree, calc_grad_inputs=False): + # inputs: [B, input_dim], float in [-1, 1] + # RETURN: [B, F], float + + inputs = inputs.contiguous() + B, input_dim = inputs.shape # batch size, coord dim + output_dim = degree ** 2 + + outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) + + if calc_grad_inputs: + dy_dx = torch.empty(B, input_dim * output_dim, dtype=inputs.dtype, device=inputs.device) + else: + dy_dx = None + + _backend.sh_encode_forward(inputs, outputs, B, input_dim, degree, dy_dx) + + ctx.save_for_backward(inputs, dy_dx) + ctx.dims = [B, input_dim, degree] + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + # grad: [B, C * C] + + inputs, dy_dx = ctx.saved_tensors + + if dy_dx is not None: + grad = grad.contiguous() + B, input_dim, degree = ctx.dims + grad_inputs = torch.zeros_like(inputs) + _backend.sh_encode_backward(grad, inputs, B, input_dim, degree, dy_dx, grad_inputs) + return grad_inputs, None, None + else: + return None, None, None + + +sh_encode = _sh_encoder.apply + + +class SHEncoder(nn.Module): + def __init__(self, input_dim=3, degree=4): + super().__init__() + + self.input_dim = input_dim # coord dims, must be 3 + self.degree = degree # 0 ~ 4 + self.output_dim = degree ** 2 + + assert self.input_dim == 3, "SH encoder only support input dim == 3" + assert self.degree > 0 and self.degree <= 8, "SH encoder only supports degree in [1, 8]" + + def __repr__(self): + return f"SHEncoder: input_dim={self.input_dim} degree={self.degree}" + + def forward(self, inputs, size=1): + # inputs: [..., input_dim], normalized real world positions in [-size, size] + # return: [..., degree^2] + + inputs = inputs / size # [-1, 1] + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.reshape(-1, self.input_dim) + + outputs = sh_encode(inputs, self.degree, inputs.requires_grad) + outputs = outputs.reshape(prefix_shape + [self.output_dim]) + + return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py b/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py new file mode 100644 index 000000000000..ebf0a43da596 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py @@ -0,0 +1,173 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from collections import OrderedDict +from copy import copy + +import numpy as np +import tensorrt as trt +import torch +from einops import repeat +from polygraphy import cuda +from polygraphy.backend.common import bytes_from_path +from polygraphy.backend.trt import engine_from_bytes +from polygraphy.backend.trt import util as trt_util + +TRT_LOGGER = trt.Logger(trt.Logger.ERROR) + +# Map of numpy dtype -> torch dtype +numpy_to_torch_dtype_dict = { + np.uint8: torch.uint8, + np.int8: torch.int8, + np.int16: torch.int16, + np.int32: torch.int32, + np.int64: torch.int64, + np.float16: torch.float16, + np.float32: torch.float32, + np.float64: torch.float64, + np.complex64: torch.complex64, + np.complex128: torch.complex128, +} +if np.version.full_version >= "1.24.0": + numpy_to_torch_dtype_dict[np.bool_] = torch.bool +else: + numpy_to_torch_dtype_dict[np.bool] = torch.bool + +# Map of torch dtype -> numpy dtype +torch_to_numpy_dtype_dict = {value: key for (key, value) in numpy_to_torch_dtype_dict.items()} + + +def device_view(t): + return cuda.DeviceView(ptr=t.data_ptr(), shape=t.shape, dtype=torch_to_numpy_dtype_dict[t.dtype]) + + +class Engine: + def __init__( + self, engine_path, + ): + self.engine_path = engine_path + self.engine = None + self.context = None + self.buffers = OrderedDict() + self.tensors = OrderedDict() + + def __del__(self): + [buf.free() for buf in self.buffers.values() if isinstance(buf, cuda.DeviceArray)] + del self.engine + del self.context + del self.buffers + del self.tensors + + def set_engine(self, stream, shape_dict): + self.load() + self.activate() + self.stream = stream + self.allocate_buffers(shape_dict, device='cuda') + + def load(self): + print(f"Loading TensorRT engine: {self.engine_path}") + self.engine = engine_from_bytes(bytes_from_path(self.engine_path)) + + def activate(self): + self.context = self.engine.create_execution_context() + + def allocate_buffers(self, shape_dict=None, device="cuda"): + for idx in range(trt_util.get_bindings_per_profile(self.engine)): + binding = self.engine[idx] + if shape_dict and binding in shape_dict: + shape = shape_dict[binding] + else: + shape = self.engine.get_binding_shape(binding) + dtype = trt.nptype(self.engine.get_binding_dtype(binding)) + if self.engine.binding_is_input(binding): + self.context.set_binding_shape(idx, shape) + tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(device=device) + self.tensors[binding] = tensor + self.buffers[binding] = cuda.DeviceView(ptr=tensor.data_ptr(), shape=shape, dtype=dtype) + + def infer(self, feed_dict): + stream = self.stream + start_binding, end_binding = trt_util.get_active_profile_bindings(self.context) + # shallow copy of ordered dict + device_buffers = copy(self.buffers) + for name, buf in feed_dict.items(): + assert isinstance(buf, cuda.DeviceView) + device_buffers[name] = buf + bindings = [0] * start_binding + [buf.ptr for buf in device_buffers.values()] + noerror = self.context.execute_async_v2(bindings=bindings, stream_handle=stream.ptr) + if not noerror: + raise ValueError(f"ERROR: inference failed.") + + return self.tensors + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == "linear": + betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + + elif schedule == "cosine": + timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "sqrt_linear": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + elif schedule == "sqrt": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): + if ddim_discr_method == 'uniform': + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == 'quad': + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) + else: + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f'Selected timesteps for ddim sampler: {steps_out}') + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) + if verbose: + print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') + print( + f'For the chosen value of eta, which is {eta}, ' + f'this results in the following sigma_t schedule for ddim sampler {sigmas}' + ) + return sigmas, alphas, alphas_prev + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() diff --git a/nemo/core/optim/adan.py b/nemo/core/optim/adan.py new file mode 100644 index 000000000000..cd3dab0325d7 --- /dev/null +++ b/nemo/core/optim/adan.py @@ -0,0 +1,453 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2022 Garena Online Private Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import List + +import torch +from torch import Tensor +from torch.optim.optimizer import Optimizer + + +class MultiTensorApply(object): + available = False + warned = False + + def __init__(self, chunk_size): + try: + MultiTensorApply.available = True + self.chunk_size = chunk_size + except ImportError as err: + MultiTensorApply.available = False + MultiTensorApply.import_err = err + + def __call__(self, op, noop_flag_buffer, tensor_lists, *args): + return op(self.chunk_size, noop_flag_buffer, tensor_lists, *args) + + +class Adan(Optimizer): + """ + Implements a pytorch variant of Adan + Adan was proposed in + Adan: Adaptive Nesterov Momentum Algorithm for + Faster Optimizing Deep Models[J].arXiv preprint arXiv:2208.06677, 2022. + https://arxiv.org/abs/2208.06677 + Arguments: + params (iterable): iterable of parameters to optimize or + dicts defining parameter groups. + lr (float, optional): learning rate. (default: 1e-3) + betas (Tuple[float, float, flot], optional): coefficients used for + first- and second-order moments. (default: (0.98, 0.92, 0.99)) + eps (float, optional): term added to the denominator to improve + numerical stability. (default: 1e-8) + weight_decay (float, optional): decoupled weight decay + (L2 penalty) (default: 0) + max_grad_norm (float, optional): value used to clip + global grad norm (default: 0.0 no clip) + no_prox (bool): how to perform the decoupled weight decay + (default: False) + foreach (bool): if True would use torch._foreach implementation. + It's faster but uses slightly more memory. (default: True) + fused (bool, optional): whether fused implementation is used. + (default: False) + """ + + def __init__( + self, + params, + lr=1e-3, + betas=(0.98, 0.92, 0.99), + eps=1e-8, + weight_decay=0.0, + max_grad_norm=0.0, + no_prox=False, + foreach: bool = True, + fused: bool = False, + ): + if not 0.0 <= max_grad_norm: + raise ValueError('Invalid Max grad norm: {}'.format(max_grad_norm)) + if not 0.0 <= lr: + raise ValueError('Invalid learning rate: {}'.format(lr)) + if not 0.0 <= eps: + raise ValueError('Invalid epsilon value: {}'.format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1])) + if not 0.0 <= betas[2] < 1.0: + raise ValueError('Invalid beta parameter at index 2: {}'.format(betas[2])) + defaults = dict( + lr=lr, + betas=betas, + eps=eps, + weight_decay=weight_decay, + max_grad_norm=max_grad_norm, + no_prox=no_prox, + foreach=foreach, + fused=fused, + ) + super().__init__(params, defaults) + + def __setstate__(self, state): + super(Adan, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('no_prox', False) + + @torch.no_grad() + def restart_opt(self): + for group in self.param_groups: + group['step'] = 0 + for p in group['params']: + if p.requires_grad: + state = self.state[p] + # State initialization + + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p) + # Exponential moving average of gradient difference + state['exp_avg_diff'] = torch.zeros_like(p) + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step.""" + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + if self.defaults['max_grad_norm'] > 0: + device = self.param_groups[0]['params'][0].device + global_grad_norm = torch.zeros(1, device=device) + + max_grad_norm = torch.tensor(self.defaults['max_grad_norm'], device=device) + for group in self.param_groups: + + for p in group['params']: + if p.grad is not None: + grad = p.grad + global_grad_norm.add_(grad.pow(2).sum()) + + global_grad_norm = torch.sqrt(global_grad_norm) + + clip_global_grad_norm = torch.clamp(max_grad_norm / (global_grad_norm + group['eps']), max=1.0).item() + else: + clip_global_grad_norm = 1.0 + + for group in self.param_groups: + params_with_grad = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + exp_avg_diffs = [] + neg_pre_grads = [] + + beta1, beta2, beta3 = group['betas'] + # assume same step across group now to simplify things + # per parameter step can be easily support + # by making it tensor, or pass list into kernel + if 'step' in group: + group['step'] += 1 + else: + group['step'] = 1 + + bias_correction1 = 1.0 - beta1 ** group['step'] + bias_correction2 = 1.0 - beta2 ** group['step'] + bias_correction3 = 1.0 - beta3 ** group['step'] + + for p in group['params']: + if p.grad is None: + continue + params_with_grad.append(p) + grads.append(p.grad) + + state = self.state[p] + if len(state) == 0: + state['exp_avg'] = torch.zeros_like(p) + state['exp_avg_sq'] = torch.zeros_like(p) + state['exp_avg_diff'] = torch.zeros_like(p) + + if 'neg_pre_grad' not in state or group['step'] == 1: + state['neg_pre_grad'] = p.grad.clone().mul_(-clip_global_grad_norm) + + exp_avgs.append(state['exp_avg']) + exp_avg_sqs.append(state['exp_avg_sq']) + exp_avg_diffs.append(state['exp_avg_diff']) + neg_pre_grads.append(state['neg_pre_grad']) + + kwargs = dict( + params=params_with_grad, + grads=grads, + exp_avgs=exp_avgs, + exp_avg_sqs=exp_avg_sqs, + exp_avg_diffs=exp_avg_diffs, + neg_pre_grads=neg_pre_grads, + beta1=beta1, + beta2=beta2, + beta3=beta3, + bias_correction1=bias_correction1, + bias_correction2=bias_correction2, + bias_correction3_sqrt=math.sqrt(bias_correction3), + lr=group['lr'], + weight_decay=group['weight_decay'], + eps=group['eps'], + no_prox=group['no_prox'], + clip_global_grad_norm=clip_global_grad_norm, + ) + + if group['foreach']: + if group['fused']: + if torch.cuda.is_available(): + _fused_adan_multi_tensor(**kwargs) + else: + raise ValueError('Fused Adan does not support CPU') + else: + _multi_tensor_adan(**kwargs) + elif group['fused']: + if torch.cuda.is_available(): + _fused_adan_single_tensor(**kwargs) + else: + raise ValueError('Fused Adan does not support CPU') + else: + _single_tensor_adan(**kwargs) + + return loss + + +def _single_tensor_adan( + params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + exp_avg_sqs: List[Tensor], + exp_avg_diffs: List[Tensor], + neg_pre_grads: List[Tensor], + *, + beta1: float, + beta2: float, + beta3: float, + bias_correction1: float, + bias_correction2: float, + bias_correction3_sqrt: float, + lr: float, + weight_decay: float, + eps: float, + no_prox: bool, + clip_global_grad_norm: Tensor, +): + for i, param in enumerate(params): + grad = grads[i] + exp_avg = exp_avgs[i] + exp_avg_sq = exp_avg_sqs[i] + exp_avg_diff = exp_avg_diffs[i] + neg_grad_or_diff = neg_pre_grads[i] + + grad.mul_(clip_global_grad_norm) + + # for memory saving, we use `neg_grad_or_diff` + # to get some temp variable in a inplace way + neg_grad_or_diff.add_(grad) + + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) # m_t + exp_avg_diff.mul_(beta2).add_(neg_grad_or_diff, alpha=1 - beta2) # diff_t + + neg_grad_or_diff.mul_(beta2).add_(grad) + exp_avg_sq.mul_(beta3).addcmul_(neg_grad_or_diff, neg_grad_or_diff, value=1 - beta3) # n_t + + denom = ((exp_avg_sq).sqrt() / bias_correction3_sqrt).add_(eps) + step_size_diff = lr * beta2 / bias_correction2 + step_size = lr / bias_correction1 + + if no_prox: + param.mul_(1 - lr * weight_decay) + param.addcdiv_(exp_avg, denom, value=-step_size) + param.addcdiv_(exp_avg_diff, denom, value=-step_size_diff) + else: + param.addcdiv_(exp_avg, denom, value=-step_size) + param.addcdiv_(exp_avg_diff, denom, value=-step_size_diff) + param.div_(1 + lr * weight_decay) + + neg_grad_or_diff.zero_().add_(grad, alpha=-1.0) + + +def _multi_tensor_adan( + params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + exp_avg_sqs: List[Tensor], + exp_avg_diffs: List[Tensor], + neg_pre_grads: List[Tensor], + *, + beta1: float, + beta2: float, + beta3: float, + bias_correction1: float, + bias_correction2: float, + bias_correction3_sqrt: float, + lr: float, + weight_decay: float, + eps: float, + no_prox: bool, + clip_global_grad_norm: Tensor, +): + if len(params) == 0: + return + + torch._foreach_mul_(grads, clip_global_grad_norm) + + # for memory saving, we use `neg_pre_grads` + # to get some temp variable in a inplace way + torch._foreach_add_(neg_pre_grads, grads) + + torch._foreach_mul_(exp_avgs, beta1) + torch._foreach_add_(exp_avgs, grads, alpha=1 - beta1) # m_t + + torch._foreach_mul_(exp_avg_diffs, beta2) + torch._foreach_add_(exp_avg_diffs, neg_pre_grads, alpha=1 - beta2) # diff_t + + torch._foreach_mul_(neg_pre_grads, beta2) + torch._foreach_add_(neg_pre_grads, grads) + torch._foreach_mul_(exp_avg_sqs, beta3) + torch._foreach_addcmul_(exp_avg_sqs, neg_pre_grads, neg_pre_grads, value=1 - beta3) # n_t + + denom = torch._foreach_sqrt(exp_avg_sqs) + torch._foreach_div_(denom, bias_correction3_sqrt) + torch._foreach_add_(denom, eps) + + step_size_diff = lr * beta2 / bias_correction2 + step_size = lr / bias_correction1 + + if no_prox: + torch._foreach_mul_(params, 1 - lr * weight_decay) + torch._foreach_addcdiv_(params, exp_avgs, denom, value=-step_size) + torch._foreach_addcdiv_(params, exp_avg_diffs, denom, value=-step_size_diff) + else: + torch._foreach_addcdiv_(params, exp_avgs, denom, value=-step_size) + torch._foreach_addcdiv_(params, exp_avg_diffs, denom, value=-step_size_diff) + torch._foreach_div_(params, 1 + lr * weight_decay) + torch._foreach_zero_(neg_pre_grads) + torch._foreach_add_(neg_pre_grads, grads, alpha=-1.0) + + +def _fused_adan_multi_tensor( + params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + exp_avg_sqs: List[Tensor], + exp_avg_diffs: List[Tensor], + neg_pre_grads: List[Tensor], + *, + beta1: float, + beta2: float, + beta3: float, + bias_correction1: float, + bias_correction2: float, + bias_correction3_sqrt: float, + lr: float, + weight_decay: float, + eps: float, + no_prox: bool, + clip_global_grad_norm: Tensor, +): + import fused_adan + + multi_tensor_applier = MultiTensorApply(2048 * 32) + _dummy_overflow_buf = torch.cuda.IntTensor([0]) + multi_tensor_applier( + fused_adan.adan_multi_tensor, + _dummy_overflow_buf, + [params, grads, exp_avgs, exp_avg_sqs, exp_avg_diffs, neg_pre_grads], + beta1, + beta2, + beta3, + bias_correction1, + bias_correction2, + bias_correction3_sqrt, + lr, + weight_decay, + eps, + no_prox, + clip_global_grad_norm, + ) + torch._foreach_zero_(neg_pre_grads) + torch._foreach_add_(neg_pre_grads, grads, alpha=-1.0) + + +def _fused_adan_single_tensor( + params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + exp_avg_sqs: List[Tensor], + exp_avg_diffs: List[Tensor], + neg_pre_grads: List[Tensor], + *, + beta1: float, + beta2: float, + beta3: float, + bias_correction1: float, + bias_correction2: float, + bias_correction3_sqrt: float, + lr: float, + weight_decay: float, + eps: float, + no_prox: bool, + clip_global_grad_norm: Tensor, +): + for i, param in enumerate(params): + p_data_fp32 = param.data.float() + out_p = param.data + grad = grads[i] + exp_avg = exp_avgs[i] + exp_avg_sq = exp_avg_sqs[i] + exp_avg_diff = exp_avg_diffs[i] + neg_grad = neg_pre_grads[i] + with torch.cuda.device(param.device): + import fused_adan + + fused_adan.adan_single_tensor( + p_data_fp32, + out_p, + grad, + exp_avg, + exp_avg_sq, + exp_avg_diff, + neg_grad, + beta1, + beta2, + beta3, + bias_correction1, + bias_correction2, + bias_correction3_sqrt, + lr, + weight_decay, + eps, + no_prox, + clip_global_grad_norm, + ) + neg_grad.zero_().add_(grad, alpha=-1.0) diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index 0aeda813dea5..a19676517fbd 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -25,6 +25,7 @@ from nemo.core.config import OptimizerParams, get_optimizer_config, register_optimizer_params from nemo.core.optim.adafactor import Adafactor +from nemo.core.optim.adan import Adan from nemo.core.optim.novograd import Novograd from nemo.utils import logging from nemo.utils.model_utils import maybe_update_config_version @@ -40,6 +41,7 @@ 'rprop': rprop.Rprop, 'novograd': Novograd, 'adafactor': Adafactor, + 'adan': Adan, } try: From 981fca59cdd2d754350be1d059d9b39cb98c1b6a Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 11 Oct 2023 18:36:48 -0700 Subject: [PATCH 304/512] Move neva export changes --- .../mllm/neva/conf/neva_export.yaml | 5 +- examples/multimodal/mllm/neva/neva_export.py | 87 +--- nemo/deploy/__init__.py | 18 + nemo/deploy/deploy_base.py | 93 ++++ nemo/deploy/deploy_pytriton.py | 149 ++++++ nemo/deploy/query.py | 79 +++ nemo/deploy/triton_deployable.py | 31 ++ nemo/deploy/utils.py | 79 +++ nemo/export/__init__.py | 15 + nemo/export/tensorrt_llm.py | 273 ++++++++++ nemo/export/trt_llm/__init__.py | 33 ++ nemo/export/trt_llm/decoder/__init__.py | 56 ++ nemo/export/trt_llm/decoder/decoder.py | 202 ++++++++ nemo/export/trt_llm/decoder/gpt.py | 99 ++++ nemo/export/trt_llm/decoder/gptj.py | 94 ++++ nemo/export/trt_llm/decoder/llama.py | 101 ++++ nemo/export/trt_llm/huggingface_utils.py | 138 +++++ nemo/export/trt_llm/model_config.py | 415 +++++++++++++++ nemo/export/trt_llm/model_config_trt.py | 65 +++ nemo/export/trt_llm/model_config_utils.py | 238 +++++++++ nemo/export/trt_llm/nemo/convert.py | 343 ++++++++++++ nemo/export/trt_llm/nemo/nemo.py | 269 ++++++++++ nemo/export/trt_llm/nemo/nemo_ckpt_convert.py | 282 ++++++++++ nemo/export/trt_llm/nemo_utils.py | 184 +++++++ nemo/export/trt_llm/quantization_utils.py | 119 +++++ nemo/export/trt_llm/tensor_utils.py | 62 +++ nemo/export/trt_llm/tensorrt_llm_build.py | 296 +++++++++++ nemo/export/trt_llm/tensorrt_llm_model.py | 487 ++++++++++++++++++ nemo/export/trt_llm/tensorrt_llm_run.py | 317 ++++++++++++ nemo/export/trt_llm/tensorrt_llm_utils.py | 73 +++ nemo/export/utils.py | 155 ++++++ 31 files changed, 4791 insertions(+), 66 deletions(-) create mode 100644 nemo/deploy/__init__.py create mode 100644 nemo/deploy/deploy_base.py create mode 100644 nemo/deploy/deploy_pytriton.py create mode 100644 nemo/deploy/query.py create mode 100644 nemo/deploy/triton_deployable.py create mode 100644 nemo/deploy/utils.py create mode 100644 nemo/export/__init__.py create mode 100644 nemo/export/tensorrt_llm.py create mode 100644 nemo/export/trt_llm/__init__.py create mode 100644 nemo/export/trt_llm/decoder/__init__.py create mode 100644 nemo/export/trt_llm/decoder/decoder.py create mode 100644 nemo/export/trt_llm/decoder/gpt.py create mode 100644 nemo/export/trt_llm/decoder/gptj.py create mode 100644 nemo/export/trt_llm/decoder/llama.py create mode 100644 nemo/export/trt_llm/huggingface_utils.py create mode 100644 nemo/export/trt_llm/model_config.py create mode 100644 nemo/export/trt_llm/model_config_trt.py create mode 100644 nemo/export/trt_llm/model_config_utils.py create mode 100644 nemo/export/trt_llm/nemo/convert.py create mode 100644 nemo/export/trt_llm/nemo/nemo.py create mode 100644 nemo/export/trt_llm/nemo/nemo_ckpt_convert.py create mode 100644 nemo/export/trt_llm/nemo_utils.py create mode 100644 nemo/export/trt_llm/quantization_utils.py create mode 100644 nemo/export/trt_llm/tensor_utils.py create mode 100644 nemo/export/trt_llm/tensorrt_llm_build.py create mode 100644 nemo/export/trt_llm/tensorrt_llm_model.py create mode 100644 nemo/export/trt_llm/tensorrt_llm_run.py create mode 100644 nemo/export/trt_llm/tensorrt_llm_utils.py create mode 100644 nemo/export/utils.py diff --git a/examples/multimodal/mllm/neva/conf/neva_export.yaml b/examples/multimodal/mllm/neva/conf/neva_export.yaml index 9867b8bf7bfc..fa3d70478662 100644 --- a/examples/multimodal/mllm/neva/conf/neva_export.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_export.yaml @@ -1,13 +1,12 @@ name: nemo_neva infer: - out_dir: ./out/ + out_dir: ./neva vision: min_batch_size: 1 opt_batch_size: 1 max_batch_size: 1 clip: openai/clip-vit-large-patch14 llm: - tekit_path: /tekit tensor_parallelism: 1 max_input_len: 2048 max_output_len: 1024 @@ -15,4 +14,4 @@ infer: model: precision: bf16 - restore_from_path: /path/to/pretrained.nemo \ No newline at end of file + restore_from_path: /path/to/pretrained.nemo diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py index 0b108effff88..b1d3031bfd4b 100644 --- a/examples/multimodal/mllm/neva/neva_export.py +++ b/examples/multimodal/mllm/neva/neva_export.py @@ -38,6 +38,7 @@ from nemo.core.classes.exportable import Exportable from nemo.core.connectors.save_restore_connector import SaveRestoreConnector from nemo.core.neural_types import ChannelType, LogitsType, NeuralType +from nemo.export import TensorRTLLM LOGGER = logging.getLogger(__name__) @@ -58,7 +59,7 @@ def build_vision_encoder(model_path, clip_path, precision, bs_min, bs_opt, bs_ma state_dict = connector._load_state_dict_from_disk(path) LOGGER.info('Done') - vision_connector = torch.nn.Linear(config.vision.hidden_size, config.llm.hidden_size, bias=True,) + vision_connector = torch.nn.Linear(config.mm_cfg.vision_encoder.hidden_size, config.hidden_size, bias=True,) vision_encoder = CLIPVisionModel.from_pretrained(clip_path, torch_dtype=torch_precision) image_size = vision_encoder.vision_model.config.image_size @@ -143,10 +144,12 @@ def output_names(self): LOGGER.info('Exporting TRT') engine = engine_from_network( - network_from_onnx_path('./onnx/vision_encoder.onnx'), + network_from_onnx_path('/tmp/onnx/vision_encoder.onnx'), config=CreateConfig( - fp16=precision in [16, '16', '16-mixed'], bf16=precision in ['bf16', 'bf16-mixed'], profiles=[p], - ), + tf32=precision in [32, '32', '32-true'], + fp16=precision in [16, '16', '16-mixed'], + bf16=precision in ['bf16', 'bf16-mixed'], + profiles=[p],), ) save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) @@ -155,74 +158,32 @@ def output_names(self): LOGGER.info('Done') -def build_trtllm_engines( - tekit_path, in_file, out_dir, tensor_parallelism, precision, max_input_len, max_output_len, max_batch_size -): - with tempfile.TemporaryDirectory() as temp_dir: - gpt_example_path = f'{tekit_path}/examples/gpt' - build_precision = 'bfloat16' if precision in ['bf16', 'bf16-mixed'] else 'float16' - LOGGER.info('Converting model weights') - convert_command = [ - 'python3', - 'nemo_ckpt_convert.py', - f'--out-dir={temp_dir}', - f'--in-file={in_file}', - f'--tensor-parallelism={tensor_parallelism}', - f'--storage-type={build_precision}', - '--verbose', - ] - convert_process = subprocess.Popen( - convert_command, cwd=gpt_example_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - stdout, stderr = convert_process.communicate() - print(stdout.decode()) - assert convert_process.returncode == 0, stderr.decode() - LOGGER.info('Done') - - shutil.copy(os.path.join(temp_dir, f'{tensor_parallelism}-gpu/tokenizer.model'), out_dir) - - LOGGER.info('Building TRT-LLM engines') - build_command = [ - 'python3', - 'build.py', - f'--model_dir={temp_dir}/{tensor_parallelism}-gpu', - f'--dtype={build_precision}', - f'--output_dir={os.path.abspath(out_dir)}', - f'--use_gpt_attention_plugin={build_precision}', - f'--world_size={tensor_parallelism}', - f'--max_input_len={max_input_len}', - f'--max_output_len={max_output_len}', - f'--max_batch_size={max_batch_size}', - f'--use_layernorm_plugin={build_precision}', - f'--use_gemm_plugin={build_precision}', - f'--max_prompt_embedding_table_size={max_batch_size*max_input_len}', - '--parallel_build', - '--enable_context_fmha', - '--remove_input_padding', - '--log_level=verbose', - ] - build_process = subprocess.Popen( - build_command, cwd=gpt_example_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - stdout, stderr = build_process.communicate() - print(stdout.decode()) - assert build_process.returncode == 0, stderr.decode() - LOGGER.info('Done') +def build_trtllm_engines(in_file, out_dir, tensor_parallelism, max_input_len, max_output_len, max_batch_size): + trt_llm_exporter = TensorRTLLM(model_dir=out_dir) + trt_llm_exporter.export( + nemo_checkpoint_path=in_file, + model_type="llama", + n_gpus=tensor_parallelism, + max_input_token=max_input_len, + max_output_token=max_output_len, + max_batch_size=max_batch_size, + max_prompt_embedding_table_size=max_batch_size * max_input_len, + ) + LOGGER.info('Done') @hydra_runner(config_path='conf', config_name='neva_export') def main(cfg): precision = cfg.model.get('precision', 16) assert precision != 32, 'FP32 export not supported' + plan_dir = os.path.join(cfg.infer.out_dir, 'plan') - os.makedirs(cfg.infer.out_dir, exist_ok=True) + os.makedirs(plan_dir, exist_ok=True) LOGGER.info('Building TRT-LLM engines') build_trtllm_engines( - cfg.infer.llm.tekit_path, cfg.model.restore_from_path, - cfg.infer.out_dir, + plan_dir, cfg.infer.llm.get('tensor_parallelism', 1), - precision, cfg.infer.llm.get('max_input_len', 2048), cfg.infer.llm.get('max_output_len', 2048), cfg.infer.llm.get('max_batch_size', 1), @@ -232,11 +193,11 @@ def main(cfg): build_vision_encoder( cfg.model.restore_from_path, cfg.infer.vision.clip, - 32, # WAR for TRT precision issue + 32, cfg.infer.vision.get('min_batch_size', 1), cfg.infer.vision.get('opt_batch_size', 1), cfg.infer.vision.get('max_batch_size', 1), - cfg.infer.out_dir, + plan_dir, ) diff --git a/nemo/deploy/__init__.py b/nemo/deploy/__init__.py new file mode 100644 index 000000000000..7157a7ef29f3 --- /dev/null +++ b/nemo/deploy/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .deploy_base import DeployBase +from .deploy_pytriton import DeployPyTriton +from .query import NemoQuery +from .triton_deployable import ITritonDeployable diff --git a/nemo/deploy/deploy_base.py b/nemo/deploy/deploy_base.py new file mode 100644 index 000000000000..334e2e23ef17 --- /dev/null +++ b/nemo/deploy/deploy_base.py @@ -0,0 +1,93 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +from abc import ABC, abstractmethod + +from pytorch_lightning import Trainer + +from nemo.core.classes.modelPT import ModelPT +from nemo.deploy.triton_deployable import ITritonDeployable + + +class DeployBase(ABC): + def __init__( + self, + triton_model_name: str, + triton_model_version: int = 1, + checkpoint_path: str = None, + model=None, + max_batch_size: int = 128, + port: int = 8000, + http_address="0.0.0.0", + ): + self.checkpoint_path = checkpoint_path + self.triton_model_name = triton_model_name + self.triton_model_version = triton_model_version + self.max_batch_size = max_batch_size + self.model = model + self.port = port + self.http_address = http_address + self.triton = None + + if checkpoint_path is None and model is None: + raise Exception("Either checkpoint_path or model should be provided.") + + @abstractmethod + def deploy(self): + pass + + @abstractmethod + def serve(self): + pass + + @abstractmethod + def run(self): + pass + + @abstractmethod + def stop(self): + pass + + def _init_nemo_model(self): + if self.checkpoint_path is not None: + model_config = ModelPT.restore_from(self.checkpoint_path, return_config=True) + module_path, class_name = DeployBase.get_module_and_class(model_config.target) + cls = getattr(importlib.import_module(module_path), class_name) + self.model = cls.restore_from(restore_path=self.checkpoint_path, trainer=Trainer()) + self.model.freeze() + + # has to turn off activations_checkpoint_method for inference + try: + self.model.model.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + + if self.model is None: + raise Exception("There is no model to deploy.") + + self._is_model_deployable() + + def _is_model_deployable(self): + if not issubclass(type(self.model), ITritonDeployable): + raise Exception( + "This model is not deployable to Triton." "nemo.deploy.ITritonDeployable class should be inherited" + ) + else: + return True + + @staticmethod + def get_module_and_class(target: str): + l = target.rindex(".") + return target[0:l], target[l + 1 : len(target)] diff --git a/nemo/deploy/deploy_pytriton.py b/nemo/deploy/deploy_pytriton.py new file mode 100644 index 000000000000..ece63bff03cc --- /dev/null +++ b/nemo/deploy/deploy_pytriton.py @@ -0,0 +1,149 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from pytriton.model_config import ModelConfig, Tensor +from pytriton.triton import Triton, TritonConfig +from .deploy_base import DeployBase + + +class DeployPyTriton(DeployBase): + + """ + Deploys any models to Triton Inference Server that implements ITritonDeployable interface in nemo.deploy. + + Example: + from nemo.deploy import DeployPyTriton, NemoQuery + from nemo.export import TensorRTLLM + + trt_llm_exporter = TensorRTLLM(model_dir="/path/for/model/files") + trt_llm_exporter.export( + nemo_checkpoint_path="/path/for/nemo/checkpoint", + model_type="llama", + n_gpus=1, + ) + + nm = DeployPyTriton(model=trt_llm_exporter, triton_model_name="model_name", port=8000) + nm.deploy() + nm.run() + nq = NemoQuery(url="localhost", model_name="model_name") + + prompts = ["hello, testing GPT inference", "another GPT inference test?"] + output = nq.query_llm(prompts=prompts, max_output_len=100) + print("prompts: ", prompts) + print("") + print("output: ", output) + print("") + + prompts = ["Give me some info about Paris", "Do you think Londan is a good city to visit?", "What do you think about Rome?"] + output = nq.query_llm(prompts=prompts, max_output_len=250) + print("prompts: ", prompts) + print("") + print("output: ", output) + print("") + + """ + + def __init__( + self, + triton_model_name: str, + triton_model_version: int = 1, + checkpoint_path: str = None, + model=None, + max_batch_size: int = 128, + port: int = 8000, + http_address="0.0.0.0", + ): + + """ + A nemo checkpoint or model is expected for serving on Triton Inference Server. + + Args: + triton_model_name (str): Name for the service + triton_model_version(int): Version for the service + checkpoint_path (str): path of the nemo file + model (ITritonDeployable): A model that implements the ITritonDeployable from nemo.deploy import ITritonDeployable + max_batch_size (int): max batch size + port (int) : port for the Triton server + http_address (str): http address for Triton server to bind. + """ + + super().__init__( + triton_model_name=triton_model_name, + triton_model_version=triton_model_version, + checkpoint_path=checkpoint_path, + model=model, + max_batch_size=max_batch_size, + port=port, + http_address=http_address, + ) + + def deploy(self): + + """ + Deploys any models to Triton Inference Server. + """ + + self._init_nemo_model() + + try: + triton_config = TritonConfig(http_address=self.http_address, http_port=self.port) + self.triton = Triton(config=triton_config) + self.triton.bind( + model_name=self.triton_model_name, + model_version=self.triton_model_version, + infer_func=self.model.triton_infer_fn, + inputs=self.model.get_triton_input, + outputs=self.model.get_triton_output, + config=ModelConfig(max_batch_size=self.max_batch_size), + ) + except Exception as e: + self.triton = None + print(e) + + def serve(self): + + """ + Starts serving the model and waits for the requests + """ + + if self.triton is None: + raise Exception("deploy should be called first.") + + try: + self.triton.serve() + except Exception as e: + self.triton = None + print(e) + + def run(self): + + """ + Starts serving the model asynchronously. + """ + + if self.triton is None: + raise Exception("deploy should be called first.") + + self.triton.run() + + def stop(self): + """ + Stops serving the model. + """ + + if self.triton is None: + raise Exception("deploy should be called first.") + + self.triton.stop() diff --git a/nemo/deploy/query.py b/nemo/deploy/query.py new file mode 100644 index 000000000000..e3656aeb7a8a --- /dev/null +++ b/nemo/deploy/query.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing + +import numpy as np +from pytriton.client import ModelClient + +from .utils import str_list2numpy + + +class NemoQuery: + """ + Sends a query to Triton for LLM inference + + Example: + from nemo.deploy import NemoQuery + + nq = NemoQuery(url="localhost", model_name="GPT-2B") + + prompts = ["hello, testing GPT inference", "another GPT inference test?"] + output = nq.query_llm( + prompts=prompts, + max_output_len=100, + top_k=1, + top_p=0.0, + temperature=0.0, + ) + print("prompts: ", prompts) + """ + + def __init__(self, url, model_name): + self.url = url + self.model_name = model_name + + def query_llm( + self, prompts, max_output_token=512, top_k=1, top_p=0.0, temperature=1.0, init_timeout=600.0, + ): + """ + Exports nemo checkpoints to TensorRT-LLM. + + Args: + prompts (List(str)): list of sentences. + max_output_token (int): max generated tokens. + top_k (int): limits us to a certain number (K) of the top tokens to consider. + top_p (float): limits us to the top tokens within a certain probability mass (p). + temperature (float): A parameter of the softmax function, which is the last layer in the network. + init_timeout (flat): timeout for the connection. + """ + + prompts = str_list2numpy(prompts) + max_output_token = np.full(prompts.shape, max_output_token, dtype=np.int_) + top_k = np.full(prompts.shape, top_k, dtype=np.int_) + top_p = np.full(prompts.shape, top_p, dtype=np.single) + temperature = np.full(prompts.shape, temperature, dtype=np.single) + + with ModelClient(self.url, self.model_name, init_timeout_s=init_timeout) as client: + result_dict = client.infer_batch( + prompts=prompts, max_output_token=max_output_token, top_k=top_k, top_p=top_p, temperature=temperature, + ) + output_type = client.model_config.outputs[0].dtype + + if output_type == np.bytes_: + sentences = np.char.decode(result_dict["outputs"].astype("bytes"), "utf-8") + return sentences + else: + return result_dict["outputs"] diff --git a/nemo/deploy/triton_deployable.py b/nemo/deploy/triton_deployable.py new file mode 100644 index 000000000000..084cc828882e --- /dev/null +++ b/nemo/deploy/triton_deployable.py @@ -0,0 +1,31 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from abc import ABC, abstractmethod +import numpy as np + + +class ITritonDeployable(ABC): + @abstractmethod + def get_triton_input(self): + pass + + @abstractmethod + def get_triton_output(self): + pass + + @abstractmethod + def triton_infer_fn(self, **inputs: np.ndarray): + pass diff --git a/nemo/deploy/utils.py b/nemo/deploy/utils.py new file mode 100644 index 000000000000..b5b01db3654f --- /dev/null +++ b/nemo/deploy/utils.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import numpy as np +import torch +from pytriton.model_config import Tensor + + +def typedict2tensor( + typedict_class, + overwrite_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None, + defaults: typing.Optional[typing.Dict[str, typing.Any]] = None, +): + def _map_type(type_): + if type_ is int: + return np.int32 + elif type_ is float: + return np.float32 + elif type_ is bool: + return np.bool_ + elif type_ is str: + return bytes + else: + raise PyTritonBadParameterError(f"Unknown type {type_}") + + def _get_tensor_params(type_): + count = 0 + while typing.get_origin(type_) is list: + type_ = typing.get_args(type_)[0] + count += 1 + count -= 1 # we don't want to count the last dimension + shape = (-1,) * count if count > 1 else (1,) + return {"shape": shape, "dtype": _map_type(type_)} + + overwrite_kwargs = overwrite_kwargs or {} + return tuple( + Tensor(name=name, **_get_tensor_params(type_), **overwrite_kwargs) + for name, type_ in typing.get_type_hints(typedict_class).items() + ) + + +def str_list2numpy(str_list: typing.List[str]) -> np.ndarray: + str_ndarray = np.array(str_list)[..., np.newaxis] + return np.char.encode(str_ndarray, "utf-8") + + +def str_ndarray2list(str_ndarray: np.ndarray) -> typing.List[str]: + str_ndarray = str_ndarray.astype("bytes") + str_ndarray = np.char.decode(str_ndarray, encoding="utf-8") + str_ndarray = str_ndarray.squeeze(axis=-1) + return str_ndarray.tolist() + + +def cast_output(data, required_dtype): + if isinstance(data, torch.Tensor): + data = data.cpu().numpy() + elif not isinstance(data, np.ndarray): + data = np.array(data) + + data_is_str = required_dtype in (object, np.object_, bytes, np.bytes_) + if data_is_str: + data = np.char.encode(data, "utf-8") + + if data.ndim < 2: + data = data[..., np.newaxis] + return data.astype(required_dtype) diff --git a/nemo/export/__init__.py b/nemo/export/__init__.py new file mode 100644 index 000000000000..bc2a8d887357 --- /dev/null +++ b/nemo/export/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .tensorrt_llm import TensorRTLLM diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py new file mode 100644 index 000000000000..8de9e79cbaff --- /dev/null +++ b/nemo/export/tensorrt_llm.py @@ -0,0 +1,273 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import shutil +from pathlib import Path + +import numpy as np +import tensorrt_llm +import torch +from pytriton.decorators import batch +from pytriton.model_config import Tensor + +from nemo.deploy import ITritonDeployable +from nemo.deploy.utils import cast_output, str_ndarray2list + +from .trt_llm.model_config_trt import model_config_to_tensorrt_llm +from .trt_llm.nemo_utils import get_tokenzier, nemo_to_model_config +from .trt_llm.quantization_utils import naive_quantization +from .trt_llm.tensorrt_llm_run import generate, load +from .utils import get_prompt_embedding_table, is_nemo_file, torch_to_numpy + + +class TensorRTLLM(ITritonDeployable): + + """ + Exports nemo checkpoints to TensorRT-LLM and run fast inference. + + Example: + from nemo.export import TensorRTLLM + + trt_llm_exporter = TensorRTLLM(model_dir="/path/for/model/files") + trt_llm_exporter.export( + nemo_checkpoint_path="/path/for/nemo/checkpoint", + model_type="llama", + n_gpus=1, + ) + + output = trt_llm_exporter.forward(["Hi, how are you?", "I am good, thanks, how about you?"]) + print("output: ", output) + + """ + + def __init__(self, model_dir: str): + """ + Args: + model_dir (str): path for storing the TensorRT-LLM model files. + """ + + self.model_dir = model_dir + self.model = None + self.tokenizer = None + self.prompt_table = None + self.task_vocab_size = None + self.n_gpus = None + self.config = None + self._load() + + def _load(self): + self.model = None + self.tokenizer = None + self.prompt_table = None + self.task_vocab_size = None + self.n_gpus = None + self.config = None + + if Path(self.model_dir).exists(): + folders = os.listdir(self.model_dir) + if len(folders) > 0: + try: + self._load_config_file() + self.tokenizer = get_tokenzier(Path(os.path.join(self.model_dir))) + self.model = load(tokenizer=self.tokenizer, engine_dir=self.model_dir) + self._load_prompt_table() + except: + raise Exception( + "Files in the TensorRT-LLM folder is corrupted and model needs to be exported again." + ) + + def _load_prompt_table(self): + path = Path(os.path.join(self.model_dir, "__prompt_embeddings__.npy")) + if path.exists(): + self.prompt_table = torch.from_numpy(np.load(path)) + self.task_vocab_size = 1 + + dtype = self.config['builder_config']['precision'] + self.prompt_table = self.prompt_table.cuda().to(dtype=tensorrt_llm._utils.str_dtype_to_torch(dtype)) + + if self.prompt_table.shape[1] != self.config["builder_config"]["hidden_size"]: + raise Exception( + "Hidden dimension of the model is {0} and does not match with the dimension of the prompt table.".format( + self.config["builder_config"]["hidden_size"] + ) + ) + else: + self.prompt_table = None + self.task_vocab_size = None + + def _load_config_file(self): + engine_dir = Path(self.model_dir) + config_path = engine_dir / 'config.json' + if config_path.exists(): + with open(config_path, 'r') as f: + self.config = json.load(f) + else: + raise FileNotFoundError("file: {0} could not be found.".format(config_path)) + + def export( + self, + nemo_checkpoint_path: str, + model_type: str, + prompt_embeddings_table=None, + delete_existing_files: bool = True, + n_gpus: int = 1, + max_input_token: int = 512, + max_output_token: int = 512, + max_batch_size: int = 32, + quantization: bool = None, + parallel_build: bool = False, + max_prompt_embedding_table_size: int = 0, + ): + """ + Exports nemo checkpoints to TensorRT-LLM. + + Args: + nemo_checkpoint_path (str): path for the nemo checkpoint. + model_type (str): type of the model. Currently supports "llama" and "gptnext". + prompt_embeddings_table (str): prompt embeddings table. + delete_existing_files (bool): if Truen, deletes all the files in model_dir. + n_gpus (int): number of GPUs to use for inference. + max_input_token (int): max input length. + max_output_token (int): max output length. + max_batch_size (int): max batch size. + quantization (bool): if True, applies naive quantization. + parallel_build (bool): build in parallel or not. + """ + + if prompt_embeddings_table is not None: + if not isinstance(prompt_embeddings_table, np.ndarray): + raise TypeError("Only numpy array is allowed for the prompt embeddings table.") + + if len(prompt_embeddings_table.shape) != 2: + raise Exception("A two dimensional prompt embeddings table for a sinlge task is only supported.") + + if Path(self.model_dir).exists(): + if delete_existing_files and len(os.listdir(self.model_dir)) > 0: + for files in os.listdir(self.model_dir): + path = os.path.join(self.model_dir, files) + try: + shutil.rmtree(path) + except OSError: + os.remove(path) + + if len(os.listdir(self.model_dir)) > 0: + raise Exception("Couldn't delete all files.") + elif len(os.listdir(self.model_dir)) > 0: + raise Exception("There are files in this folder. Try setting delete_existing_files=True.") + else: + Path(self.model_dir).mkdir(parents=True, exist_ok=True) + + self.model = None + + nemo_export_dir = os.path.join(self.model_dir, "/tmp_nemo/") + model_configs, self.tokenizer = nemo_to_model_config( + in_file=nemo_checkpoint_path, decoder_type=model_type, gpus=n_gpus, nemo_export_dir=nemo_export_dir + ) + + if max_prompt_embedding_table_size == 0 and prompt_embeddings_table is not None: + max_prompt_embedding_table_size = len(prompt_embeddings_table) + + model_config_to_tensorrt_llm( + model_configs, + self.model_dir, + n_gpus, + max_input_len=max_input_token, + max_output_len=max_output_token, + max_batch_size=max_batch_size, + max_prompt_embedding_table_size=max_prompt_embedding_table_size, + ) + + if prompt_embeddings_table is not None: + np.save(os.path.join(self.model_dir, "__prompt_embeddings__.npy"), prompt_embeddings_table) + + shutil.copy(os.path.join(nemo_export_dir, "tokenizer.model"), self.model_dir) + shutil.rmtree(nemo_export_dir) + self._load() + + def forward( + self, input_texts, max_output_token=512, top_k: int = 1, top_p: float = 0.0, temperature: float = 1.0, + ): + """ + Exports nemo checkpoints to TensorRT-LLM. + + Args: + input_texts (List(str)): list of sentences. + max_output_token (int): max generated tokens. + top_k (int): limits us to a certain number (K) of the top tokens to consider. + top_p (float): limits us to the top tokens within a certain probability mass (p). + temperature (float): A parameter of the softmax function, which is the last layer in the network. + """ + if self.model is None: + raise Exception( + "A nemo checkpoint should be exported and " "TensorRT LLM should be loaded first to run inference." + ) + else: + return generate( + input_texts=input_texts, + max_output_len=max_output_token, + host_context=self.model, + top_k=top_k, + top_p=top_p, + temperature=temperature, + prompt_table=self.prompt_table, + task_vocab_size=self.task_vocab_size, + ) + + def get_hidden_size(self): + if self.config is None: + return None + else: + return self.config["builder_config"]["hidden_size"] + + @property + def get_triton_input(self): + inputs = ( + Tensor(name="prompts", shape=(1,), dtype=bytes), + Tensor(name="max_output_token", shape=(1,), dtype=np.int_), + Tensor(name="top_k", shape=(1,), dtype=np.int_), + Tensor(name="top_p", shape=(1,), dtype=np.single), + Tensor(name="temperature", shape=(1,), dtype=np.single), + ) + return inputs + + @property + def get_triton_output(self): + outputs = (Tensor(name="outputs", shape=(1,), dtype=bytes),) + return outputs + + @batch + def triton_infer_fn(self, **inputs: np.ndarray): + try: + input_texts = str_ndarray2list(inputs.pop("prompts")) + max_output_token = inputs.pop("max_output_token") + top_k = inputs.pop("top_k") + top_p = inputs.pop("top_p") + temperature = inputs.pop("temperature") + + output_texts = self.forward( + input_texts=input_texts, + max_output_token=max_output_token[0][0], + top_k=top_k[0][0], + top_p=top_p[0][0], + temperature=temperature[0][0], + ) + + output = cast_output(output_texts, np.bytes_) + return {"outputs": output} + except Exception as error: + err_msg = "An error occurred: {0}".format(str(error)) + output = cast_output([err_msg], np.bytes_) + return {"outputs": output} diff --git a/nemo/export/trt_llm/__init__.py b/nemo/export/trt_llm/__init__.py new file mode 100644 index 000000000000..68233d55e62e --- /dev/null +++ b/nemo/export/trt_llm/__init__.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""LLM deployment package with tensorrt_llm.""" + +from mpi4py import MPI + +# Pre load MPI libs to avoid tensorrt_llm importing failures. +print(f"Loaded mpi lib {MPI.__file__} successfully") + +# Pre import tensorrt_llm +try: + import tensorrt_llm +except Exception as e: + print( + "tensorrt_llm package is not installed. Please build or install tensorrt_llm package" + " properly before calling the llm deployment API." + ) + raise (e) + +from .huggingface_utils import * # noqa +from .model_config_trt import * # noqa +from .model_config_utils import * # noqa +from .nemo_utils import * # noqa +from .quantization_utils import * # noqa +from .tensorrt_llm_run import * # noqa diff --git a/nemo/export/trt_llm/decoder/__init__.py b/nemo/export/trt_llm/decoder/__init__.py new file mode 100644 index 000000000000..2128a4b8fd64 --- /dev/null +++ b/nemo/export/trt_llm/decoder/__init__.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""LLM Decoder implementation for tensorrt_llm conversion.""" +from typing import Dict, Type + +import tensorrt as trt + +from ..model_config import DECODER_GPT2, DECODER_GPTJ, DECODER_GPTNEXT, DECODER_LLAMA, QUANTIZATION_NONE +from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder +from .gpt import GPTDecoderLayerBuilder, GPTDecoderLayerConfigBuilder +from .gptj import GPTJDecoderLayerBuilder, GPTJDecoderLayerConfigBuilder +from .llama import LLAMADecoderLayerBuilder, LLAMADecoderLayerConfigBuilder + +DECODER_CONFIG_REGISTRY: Dict[str, Type[DecoderLayerConfigBuilder]] = { + DECODER_GPT2: GPTDecoderLayerConfigBuilder, + DECODER_GPTJ: GPTJDecoderLayerConfigBuilder, + DECODER_LLAMA: LLAMADecoderLayerConfigBuilder, +} + + +def build_decoder_layer_config(layer, decoder: str, dtype=trt.float16, rank=0, tensor_parallel=1): + """Builds the decoder layer config with the input torch module.""" + assert decoder in DECODER_CONFIG_REGISTRY, f"{decoder} not supported" + return DECODER_CONFIG_REGISTRY[decoder](decoder, dtype, rank, tensor_parallel).build_layer(layer) + + +DECODER_REGISTRY: Dict[str, Type[DecoderLayerBuilder]] = { + DECODER_GPT2: GPTDecoderLayerBuilder, + DECODER_GPTJ: GPTJDecoderLayerBuilder, + DECODER_LLAMA: LLAMADecoderLayerBuilder, + DECODER_GPTNEXT: GPTDecoderLayerBuilder, +} + + +def build_decoder_layer( + layer, + layer_id: int, + num_layers: int, + dtype=trt.float16, + quantization=QUANTIZATION_NONE, + rank=0, + tensor_parallel=1, +): + """Builds the tensorrt llm decoder layer module with the layer config as the input.""" + assert layer.decoder_type in DECODER_REGISTRY, f"{layer.decoder_type} not supported" + builder = DECODER_REGISTRY[layer.decoder_type] + decoder_builder = builder(layer, layer_id, num_layers, dtype, quantization, rank, tensor_parallel) + return decoder_builder.decoder diff --git a/nemo/export/trt_llm/decoder/decoder.py b/nemo/export/trt_llm/decoder/decoder.py new file mode 100644 index 000000000000..f4eeab0be5f6 --- /dev/null +++ b/nemo/export/trt_llm/decoder/decoder.py @@ -0,0 +1,202 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The parent decoder class implementation for model_config and tensorrt_llm conversion.""" + +from abc import ABC, abstractmethod +from typing import Optional + +import tensorrt as trt +from transformers.activations import ACT2FN + +from ..model_config import QUANTIZATION_NONE, AttentionConfig, DecoderLayerConfig, LayernormConfig, MLPConfig +from ..quantization_utils import quantize_linear +from ..tensor_utils import get_tensor_parallel_group + + +def _get_hidden_act(act_func): + """Returns the name of the hidden activation functon based on ACT2FN.""" + if isinstance(act_func, str): + return act_func + + for name, func in ACT2FN.items(): + if isinstance(func, tuple): + if isinstance(act_func, func[0]): + return name + elif isinstance(act_func, func): + return name + assert False, f"Cannot find name for {act_func}" + + +class DecoderLayerConfigBuilder(ABC): + """A config builder that translate the LLM decoder layer to the DecoderLayerConfig.""" + + @abstractmethod + def hidden_act_fn(self, layer): + """Returns the hidden act fn in the MLP layer, e.g. SiLUActivation or NewGELUActivation.""" + pass + + @abstractmethod + def infer_num_attention_heads(self, layer): + """Returns the num of attention heads of the layer.""" + pass + + @abstractmethod + def infer_max_position_embeddings(self, layer): + """Returns the max positional embeddings of the layer.""" + pass + + @abstractmethod + def build_input_layernorm(self, layer) -> LayernormConfig: + """Returns the built input layernorm layer.""" + pass + + @abstractmethod + def build_attention(self, layer) -> AttentionConfig: + """Returns the built attention layer.""" + pass + + @abstractmethod + def build_mlp(self, layer) -> MLPConfig: + """Returns the built mlp layer.""" + pass + + @abstractmethod + def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: + """Returns the built post layernorm.""" + pass + + def __init__( + self, decoder_type: str, dtype: trt.DataType = trt.float16, rank: int = 0, tensor_parallel: int = 1, + ): + """Initializes the DecoderLayerConfigBuilder.""" + self.decoder_type = decoder_type + self.dtype = dtype + self.rank = rank + self.tensor_parallel = tensor_parallel + + def build_layer(self, layer) -> DecoderLayerConfig: + """Builds the decoder layer and returns the DecoderLayer.""" + decoder = DecoderLayerConfig() + + decoder.decoder_type = self.decoder_type + decoder.num_attention_heads = self.infer_num_attention_heads(layer) + decoder.num_kv_heads = self.infer_num_kv_heads(layer) + decoder.max_position_embeddings = self.infer_max_position_embeddings(layer) + + decoder.input_layernorm = self.build_input_layernorm(layer) + decoder.attention = self.build_attention(layer) + decoder.post_layernorm = self.build_post_layernorm(layer) + decoder.mlp = self.build_mlp(layer) + decoder.mlp.hidden_act = _get_hidden_act(self.hidden_act_fn(layer)).split("_")[0] + + return decoder + + def infer_num_kv_heads(self, layer): + """Returns the num of key value heads of the layer.""" + return self.infer_num_attention_heads(layer) + + +class DecoderLayerBuilder(ABC): + """An abstracted transformer decoder layer with tensorrt_llm implementation taking DecoderLayerConfig as the input. + + Individual decoder layers are supposed to extend this class and implement the customized + abstracted method. + """ + + @abstractmethod + def build_decoder(self, layer): + """Returns the built decoder layer.""" + pass + + def __init__( + self, + layer: DecoderLayerConfig, + layer_id: int, + num_layers: int, + dtype: trt.DataType = trt.float16, + quantization: str = QUANTIZATION_NONE, + rank: int = 0, + tensor_parallel: int = 1, + ): + """Initializes the DecoderLayer.""" + super().__init__() + assert isinstance(dtype, trt.DataType) + self.layer_id = layer_id + self.num_layers = num_layers + self.dtype = dtype + self.quantization = quantization + self.rank = rank + self.tensor_parallel = tensor_parallel + self.tp_group = get_tensor_parallel_group(tensor_parallel) + + self.hidden_size = layer.hidden_size + self.num_attention_heads = layer.num_attention_heads + self.num_kv_heads = layer.num_kv_heads if layer.num_kv_heads > 0 else layer.num_attention_heads + + assert ( + self.num_attention_heads % self.num_kv_heads + ) == 0, "MQA/GQA requires the number of heads to be divisible by the number of K/V heads." + assert (self.num_kv_heads % self.tensor_parallel) == 0 or (self.tensor_parallel % self.num_kv_heads) == 0, ( + "MQA/GQA requires either the number of K/V heads to be divisible by the number of GPUs" + " OR the number of GPUs to be divisible by the number of K/V heads." + ) + + self.max_position_embeddings = layer.max_position_embeddings + self.hidden_act = layer.mlp.hidden_act + + self.decoder = self.build_decoder(layer) + self.assign_weights(layer) + self.quantize(layer) + + def assign_weights(self, layer: DecoderLayerConfig): + """Assign the weights to the attention tensorrt_llm layer.""" + self.decoder.input_layernorm.weight.value = layer.input_layernorm.weight + if layer.input_layernorm.bias is not None: + self.decoder.input_layernorm.bias.value = layer.input_layernorm.bias + + self.decoder.attention.qkv.weight.value = layer.attention.qkv.weight + if layer.attention.qkv.bias is not None: + self.decoder.attention.qkv.bias.value = layer.attention.qkv.bias + + self.decoder.attention.dense.weight.value = layer.attention.dense.weight + if self.decoder.attention.dense.bias is not None: + self.decoder.attention.dense.bias.value = layer.attention.dense.bias + + if layer.post_layernorm is not None: + self.decoder.post_layernorm.weight.value = layer.post_layernorm.weight + if layer.post_layernorm.bias is not None: + self.decoder.post_layernorm.bias.value = layer.post_layernorm.bias + + self.decoder.mlp.fc.weight.value = layer.mlp.fc.weight + self.decoder.mlp.proj.weight.value = layer.mlp.proj.weight + bias = layer.mlp.fc.bias is not None + if bias: + self.decoder.mlp.fc.bias.value = layer.mlp.fc.bias + self.decoder.mlp.proj.bias.value = layer.mlp.proj.bias + + if layer.mlp.gate: + self.decoder.mlp.gate.weight.value = layer.mlp.gate.weight + if bias: + self.decoder.mlp.gate.bias.value = layer.mlp.gate.bias + + def quantize(self, layer: DecoderLayerConfig): + """Quantizes the decoder layer based on the layer config.""" + self.decoder.attention.qkv = quantize_linear( + self.decoder.attention.qkv, self.quantization, layer.attention.qkv + ) + self.decoder.attention.dense = quantize_linear( + self.decoder.attention.dense, self.quantization, layer.attention.dense + ) + self.decoder.mlp.fc = quantize_linear(self.decoder.mlp.fc, self.quantization, layer.mlp.fc) + self.decoder.mlp.proj = quantize_linear(self.decoder.mlp.proj, self.quantization, layer.mlp.proj) + + if hasattr(self.decoder.mlp, "gate"): + self.decoder.mlp.gate = quantize_linear(self.decoder.mlp.gate, self.quantization, layer.mlp.gate) diff --git a/nemo/export/trt_llm/decoder/gpt.py b/nemo/export/trt_llm/decoder/gpt.py new file mode 100644 index 000000000000..531e9deebcca --- /dev/null +++ b/nemo/export/trt_llm/decoder/gpt.py @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The GPT2 decoder implementation.""" + +from typing import Optional + +from tensorrt_llm.layers import AttentionMaskType, PositionEmbeddingType +from tensorrt_llm.models.gpt.model import GPTDecoderLayer +from typing_extensions import override + +from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig +from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder + + +class GPTDecoderLayerConfigBuilder(DecoderLayerConfigBuilder): + """The GPT2 implementation of the DecoderLayerConfigBuilder.""" + + @override + def hidden_act_fn(self, layer): + return layer.mlp.act + + @override + def infer_num_attention_heads(self, layer): + return layer.attn.num_heads + + @override + def infer_max_position_embeddings(self, layer): + return layer.attn.bias.shape[2] + + @override + def build_input_layernorm(self, layer) -> LayernormConfig: + return LayernormConfig.from_nn_module(layer.ln_1, dtype=self.dtype) + + @override + def build_attention(self, layer) -> AttentionConfig: + config = AttentionConfig() + config.qkv = LinearConfig.from_qkv_nn_modules( + [layer.attn.c_attn], rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + config.dense = LinearConfig.from_nn_module( + layer.attn.c_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + return config + + @override + def build_mlp(self, layer) -> MLPConfig: + config = MLPConfig() + config.fc = LinearConfig.from_nn_module( + layer.mlp.c_fc, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + config.proj = LinearConfig.from_nn_module( + layer.mlp.c_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + return config + + @override + def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: + return LayernormConfig.from_nn_module(layer.ln_2, dtype=self.dtype) + + +class GPTDecoderLayerBuilder(DecoderLayerBuilder): + """The GPT implementation of the DecoderLayer.""" + + @override + def build_decoder(self, layer): + rotary_pct = layer.rotary_pct + position_embedding_type = ( + PositionEmbeddingType.learned_absolute if rotary_pct == 0.0 else PositionEmbeddingType.rope_gpt_neox + ) + + bias_qkv = layer.attention.qkv.bias is not None + + return GPTDecoderLayer( + hidden_size=self.hidden_size, + num_attention_heads=self.num_attention_heads, + max_position_embeddings=self.max_position_embeddings, + num_layers=self.num_layers, + dtype=self.dtype, + apply_query_key_layer_scaling=False, + attention_mask_type=AttentionMaskType.causal, + hidden_act=self.hidden_act, + position_embedding_type=position_embedding_type, + rotary_embedding_percentage=rotary_pct, + inter_size=layer.ffn_hidden_size_local * self.tensor_parallel, + bias=bias_qkv, + tp_group=self.tp_group, + tp_size=self.tensor_parallel, + ) diff --git a/nemo/export/trt_llm/decoder/gptj.py b/nemo/export/trt_llm/decoder/gptj.py new file mode 100644 index 000000000000..5edb679523e8 --- /dev/null +++ b/nemo/export/trt_llm/decoder/gptj.py @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The GPTJ decoder implementation.""" + +from typing import Optional + +from tensorrt_llm.models.gptj.model import GPTJDecoderLayer +from typing_extensions import override + +from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig +from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder + + +class GPTJDecoderLayerConfigBuilder(DecoderLayerConfigBuilder): + """The GPTJ implementation of the DecoderLayerConfigBuilder.""" + + @override + def hidden_act_fn(self, layer): + """Returns the hidden act fn in the MLP layer, e.g. SiLUActivation or NewGELUActivation.""" + return layer.mlp.act + + @override + def infer_num_attention_heads(self, layer): + return layer.attn.num_attention_heads + + @override + def infer_max_position_embeddings(self, layer): + return layer.attn.bias.shape[2] + + @override + def build_input_layernorm(self, layer) -> LayernormConfig: + return LayernormConfig.from_nn_module(layer.ln_1, dtype=self.dtype) + + @override + def build_attention(self, layer) -> AttentionConfig: + config = AttentionConfig() + config.qkv = LinearConfig.from_qkv_nn_modules( + [layer.attn.q_proj, layer.attn.k_proj, layer.attn.v_proj], + rank=self.rank, + tensor_parallel=self.tensor_parallel, + dtype=self.dtype, + ) + + config.dense = LinearConfig.from_nn_module( + layer.attn.out_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + config.rotary_dim = layer.attn.rotary_dim + + return config + + @override + def build_mlp(self, layer) -> MLPConfig: + config = MLPConfig() + config.fc = LinearConfig.from_nn_module( + layer.mlp.fc_in, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + config.proj = LinearConfig.from_nn_module( + layer.mlp.fc_out, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + return config + + @override + def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: + # GPTJ do not have post layer_norm + return None + + +class GPTJDecoderLayerBuilder(DecoderLayerBuilder): + """The GPTJ implementation of the DecoderLayer.""" + + @override + def build_decoder(self, layer): + assert self.tensor_parallel == 1 and self.rank == 0, "Only single GPU is supported for GPTJ" + + return GPTJDecoderLayer( + hidden_size=self.hidden_size, + num_attention_heads=self.num_attention_heads, + max_position_embeddings=self.max_position_embeddings, + rotary_dim=layer.attention.rotary_dim, + dtype=self.dtype, + hidden_act=self.hidden_act, + tp_group=self.tp_group, + tp_size=self.tensor_parallel, + ) diff --git a/nemo/export/trt_llm/decoder/llama.py b/nemo/export/trt_llm/decoder/llama.py new file mode 100644 index 000000000000..01d48ba20c8e --- /dev/null +++ b/nemo/export/trt_llm/decoder/llama.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The LLAMA/LLAMA2 decoder implementation.""" + +from typing import Optional + +from tensorrt_llm.functional import non_gated_version +from tensorrt_llm.layers import AttentionMaskType, PositionEmbeddingType +from tensorrt_llm.models.llama.model import LLaMADecoderLayer +from typing_extensions import override + +from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig +from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder + + +class LLAMADecoderLayerConfigBuilder(DecoderLayerConfigBuilder): + """The LLAMA implementation of the DecoderLayerConfigBuilder.""" + + @override + def hidden_act_fn(self, layer): + return layer.mlp.act_fn + + @override + def infer_num_attention_heads(self, layer): + return layer.self_attn.num_heads + + @override + def infer_num_kv_heads(self, layer): + return layer.self_attn.num_key_value_heads + + @override + def infer_max_position_embeddings(self, layer): + return layer.self_attn.max_position_embeddings + + @override + def build_input_layernorm(self, layer) -> LayernormConfig: + return LayernormConfig.from_nn_module(layer.input_layernorm, dtype=self.dtype) + + @override + def build_attention(self, layer) -> AttentionConfig: + config = AttentionConfig() + config.qkv = LinearConfig.from_qkv_nn_modules( + [layer.self_attn.q_proj, layer.self_attn.k_proj, layer.self_attn.v_proj], + rank=self.rank, + tensor_parallel=self.tensor_parallel, + dtype=self.dtype, + ) + + config.dense = LinearConfig.from_nn_module( + layer.self_attn.o_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + return config + + @override + def build_mlp(self, layer) -> MLPConfig: + config = MLPConfig() + config.fc = LinearConfig.from_nn_module( + layer.mlp.gate_proj, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + config.proj = LinearConfig.from_nn_module( + layer.mlp.down_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + config.gate = LinearConfig.from_nn_module( + layer.mlp.up_proj, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, + ) + + return config + + @override + def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: + return LayernormConfig.from_nn_module(layer.post_attention_layernorm, dtype=self.dtype) + + +class LLAMADecoderLayerBuilder(DecoderLayerBuilder): + """The LLAMA implementation of the DecoderLayer.""" + + @override + def build_decoder(self, layer): + return LLaMADecoderLayer( + layer_id=self.layer_id, + hidden_size=self.hidden_size, + num_attention_heads=self.num_attention_heads, + num_kv_heads=self.num_kv_heads, + max_position_embeddings=self.max_position_embeddings, + dtype=self.dtype, + attention_mask_type=AttentionMaskType.causal, + hidden_act=non_gated_version(self.hidden_act), + position_embedding_type=PositionEmbeddingType.rope_gpt_neox, + mlp_hidden_size=layer.ffn_hidden_size_local * self.tensor_parallel, + tp_group=self.tp_group, + tp_size=self.tensor_parallel, + ) diff --git a/nemo/export/trt_llm/huggingface_utils.py b/nemo/export/trt_llm/huggingface_utils.py new file mode 100644 index 000000000000..6263183bbefb --- /dev/null +++ b/nemo/export/trt_llm/huggingface_utils.py @@ -0,0 +1,138 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The APIs to convert a huggingface model to model_config format.""" + +import copy +from typing import List, Tuple + +import numpy as np +import torch.nn as nn +from tensorrt_llm import str_dtype_to_trt +from transformers.models.llama.modeling_llama import LlamaRMSNorm + +from .decoder import build_decoder_layer_config +from .model_config import LINEAR_COLUMN, EmbeddingConfig, LayernormConfig, LinearConfig, ModelConfig +from .tensor_utils import split, torch_to_numpy_with_dtype + + +def _arch_to_decoder_type(arch: str): + arch_to_type = { + "GPT2LMHeadModel": "gpt2", + "GPTJForCausalLM": "gptj", + "LlamaForCausalLM": "llama", + } + return arch_to_type.get(arch, "") + + +def _check_model_compatibility(model: nn.Module) -> Tuple[bool, bool]: + """Returns whether the model is supported with the torch_to_tensorrt_llm API. + + And if positional embedding layer exists. + + We assumes the model to be assembled with one or two embedding layers, + a ModuleList of transformer decoders, + and a final layernorm. + Otherwise it will not be supported. + """ + num_embeddings = 0 + num_module_list = 0 + num_layer_norm = 0 + for module in model.children(): + if type(module) == nn.Embedding: + num_embeddings += 1 + elif type(module) == nn.ModuleList: + num_module_list += 1 + elif type(module) in [nn.LayerNorm, LlamaRMSNorm]: + num_layer_norm += 1 + + return ( + 1 <= num_embeddings and num_embeddings <= 2 and num_module_list == 1 and num_layer_norm == 1, + num_embeddings > 1, + ) + + +def _get_transformer_model(model: nn.Module) -> nn.Module: + """Returns the root module of the transformer model.""" + if hasattr(model, "transformer"): + # This is a LMHead model + return model.transformer + elif hasattr(model, "model"): + # LLAMA + return model.model + return model + + +def torch_to_model_config(model: nn.Module, gpus: int = 1,) -> List[ModelConfig]: + """The API to convert a torch or huggingface model to the ModelConfig format. + + The model has to be an LLM that we support for a successful conversion. + (See examples/deploy/llm/README.md.) + gpus: the number of inference gpus for multi gpu inferencing. + + Returns: + The list of converted ModelConfig, one for each gpu. + """ + transformer = _get_transformer_model(model) + + compatible, has_positional_embedding = _check_model_compatibility(transformer) + assert compatible, f"model {transformer} not supported" + + assert ( + model.config.architectures and len(model.config.architectures) >= 1 + ), f"Huggingface model config {model.config} does not have architectures" + + model_config_template = ModelConfig() + model_config_template.dtype = "float16" + dtype = str_dtype_to_trt(model_config_template.dtype) + + model_config_template.tensor_parallel = gpus + + for name, module in transformer.named_children(): + if type(module) == nn.Embedding: + if name != "wpe": + model_config_template.vocab_embedding = EmbeddingConfig.from_nn_module(module, dtype=dtype) + else: + assert has_positional_embedding + model_config_template.positional_embedding = EmbeddingConfig.from_nn_module(module, dtype=dtype) + if type(module) in [nn.LayerNorm, LlamaRMSNorm]: + model_config_template.final_layernorm = LayernormConfig.from_nn_module(module, dtype=dtype) + + model_configs = [] + for i in range(gpus): + model_configs.append(copy.deepcopy(model_config_template)) + model_configs[i].rank = i + + decoder_type = _arch_to_decoder_type(model.config.architectures[0]) + for name, module in transformer.named_children(): + if type(module) == nn.ModuleList: + for layer in module: + for i in range(gpus): + model_configs[i].layers.append( + build_decoder_layer_config(layer, decoder_type, rank=i, tensor_parallel=gpus, dtype=dtype) + ) + + if hasattr(model, "lm_head"): + lm_head_weight = torch_to_numpy_with_dtype(model.lm_head.weight, dtype=dtype) + else: + # We use wte weights if not provided. + lm_head_weight = model_configs[0].vocab_embedding.weight + + if model_configs[0].vocab_size_padded != model_configs[0].vocab_size: + pad_width = model_configs[0].vocab_size_padded - model_configs[0].vocab_size + lm_head_weight = np.pad(lm_head_weight, ((0, pad_width), (0, 0)), "constant", constant_values=0) + + for i in range(gpus): + model_configs[i].lm_head = LinearConfig(linear_type=LINEAR_COLUMN) + model_configs[i].lm_head.weight = np.ascontiguousarray( + split(lm_head_weight, model_configs[i].tensor_parallel, model_configs[i].rank) + ) + + return model_configs diff --git a/nemo/export/trt_llm/model_config.py b/nemo/export/trt_llm/model_config.py new file mode 100644 index 000000000000..b9515dd162a7 --- /dev/null +++ b/nemo/export/trt_llm/model_config.py @@ -0,0 +1,415 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""This module defines the model_config format. + +This format can be converted from huggingface, nemo or ammo quantized model. +And we will build tensorrt_llm engine from the context saved with this format. +""" + +import dataclasses +from dataclasses import dataclass, field +from typing import Dict, List, get_args, get_origin + +import numpy as np +import tensorrt as trt +import torch.nn as nn +from tensorrt_llm._utils import pad_vocab_size +from tensorrt_llm.functional import is_gated_activation +from transformers import LlamaConfig, PretrainedConfig +from transformers.models.llama.modeling_llama import LlamaRMSNorm + +from .tensor_utils import get_tensor_from_dict, split, torch_to_numpy_with_dtype + +DECODER_GPT2 = "gpt2" +DECODER_GPTJ = "gptj" +DECODER_LLAMA = "llama" +DECODER_GPTNEXT = "gptnext" + +QUANTIZATION_NONE = "" +QUANTIZATION_FP8 = "fp8" +QUANTIZATION_INT8_SQ = "int8_sq" + +LINEAR_COLUMN = "column" +LINEAR_ROW = "row" + +LAYERNORM_DEFAULT = "" +LAYERNORM_RMS = "rms" + +LAYER_DEFAULT = "" +LAYER_QKV = "qkv" + + +@dataclass +class EmbeddingConfig: + """The embedding layer config.""" + + weight: np.array = None + # Whether the embedding weights are local + is_local: bool = False + + @staticmethod + def from_nn_module(module: nn.Module, dtype=trt.float16): + """Converts an nn.Module to an EmbeddingConfig.""" + return EmbeddingConfig(weight=torch_to_numpy_with_dtype(module.weight, dtype)) + + @property + def local_vocab_size(self): + """Infers the vocab_size from the embedding layer weights shape.""" + return self.weight.shape[0] + + @property + def hidden_size(self): + """Infers the hidden_size from the embedding layer weights shape.""" + return self.weight.shape[1] + + +@dataclass +class LayernormConfig: + """The layernorm layer config.""" + + weight: np.array = None + bias: np.array = None + layernorm_type: str = LAYERNORM_DEFAULT + + @staticmethod + def from_nn_module(module: nn.Module, dtype=trt.float16): + """Converts an nn.Module to an LayernormConfig.""" + layernorm_type = LAYERNORM_RMS if type(module) == LlamaRMSNorm else LAYERNORM_DEFAULT + + config = LayernormConfig(weight=torch_to_numpy_with_dtype(module.weight, dtype), layernorm_type=layernorm_type) + if layernorm_type == LAYERNORM_DEFAULT: + config.bias = torch_to_numpy_with_dtype(module.bias, dtype) + + return config + + +@dataclass +class LinearConfig: + """The linear layer config.""" + + linear_type: str = "" + weight: np.array = None + bias: np.array = None + activation_scaling_factor: np.array = None + weights_scaling_factor: np.array = None + prequant_scaling_factor: np.array = None + layer_type: str = LAYER_DEFAULT + + @staticmethod + def from_nn_module(module: nn.Module, linear_type: str, rank=0, tensor_parallel=1, dtype=trt.float16): + """Converts an nn.Module to an LinearConfig.""" + weight = torch_to_numpy_with_dtype(module.weight, dtype) + if "Conv1D" in type(module).__name__: + weight = weight.transpose() + else: + assert type(module) == nn.Linear + + config = LinearConfig() + config.linear_type = linear_type + config.weight = np.ascontiguousarray( + split(weight, tensor_parallel, rank, dim=0 if linear_type == LINEAR_COLUMN else 1) + ) + + if hasattr(module, "bias") and module.bias is not None: + if linear_type == LINEAR_COLUMN: + config.bias = np.ascontiguousarray( + split(torch_to_numpy_with_dtype(module.bias, dtype), tensor_parallel, rank,) + ) + else: + config.bias = torch_to_numpy_with_dtype(module.bias, dtype) + + return config + + @staticmethod + def from_qkv_nn_modules(qkv_modules: List[nn.Module], rank=0, tensor_parallel=1, dtype=trt.float16): + """Converts the qkv modules to an LinearConfig.""" + config = LinearConfig() + config.linear_type = LINEAR_COLUMN + config.layer_type = LAYER_QKV + if len(qkv_modules) == 1: + # QKV layers combined as a single module, e.g. GPT2 + qkv_module = qkv_modules[0] + assert "Conv1D" in type(qkv_module).__name__ + + qkv_shape = qkv_module.weight.shape + # Decode the concat QKV weights and split them to different GPU rank. + config.weight = np.ascontiguousarray( + split( + torch_to_numpy_with_dtype(qkv_module.weight, dtype=dtype).reshape( + qkv_shape[0], 3, qkv_shape[-1] // 3 + ), + tensor_parallel, + rank, + dim=-1, + ) + .reshape(qkv_shape[0], -1) + .transpose() + ) + config.bias = np.ascontiguousarray( + split( + torch_to_numpy_with_dtype(qkv_module.bias, dtype=dtype).reshape(3, qkv_shape[-1] // 3), + tensor_parallel, + rank, + dim=-1, + ).reshape(-1) + ) + + elif len(qkv_modules) == 3: + # Separate QKV layers + for m in qkv_modules: + assert type(m) == nn.Linear + assert not (hasattr(m, "bias") and m.bias is not None) + + q_weight = split(torch_to_numpy_with_dtype(qkv_modules[0].weight), tensor_parallel, rank) + k_weight = split(torch_to_numpy_with_dtype(qkv_modules[1].weight), tensor_parallel, rank) + v_weight = split(torch_to_numpy_with_dtype(qkv_modules[2].weight), tensor_parallel, rank) + split_v = np.concatenate((q_weight, k_weight, v_weight)) + config.weight = np.ascontiguousarray(split_v) + + else: + assert False, f"QKV modules format {qkv_modules} not supported" + + return config + + +@dataclass +class AttentionConfig: + """The attention layer config.""" + + qkv: LinearConfig = None + dense: LinearConfig = None + + rotary_dim: int = -np.inf + + @staticmethod + def from_nemo( + weights_dict: Dict[str, np.ndarray], layer_id: int, rank: int = 0, + ): + """Converts the nemo weights and config to `AttentionConfig`.""" + attention = AttentionConfig() + attention.qkv = LinearConfig(linear_type=LINEAR_COLUMN, layer_type=LAYER_QKV) + attention.qkv.weight = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.attention.query_key_value.weight.{rank}" + ) + attention.qkv.bias = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.attention.query_key_value.bias.{rank}", + ) + + attention.dense = LinearConfig(linear_type=LINEAR_ROW) + attention.dense.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.attention.dense.weight.{rank}") + attention.dense.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.attention.dense.bias",) + return attention + + +@dataclass +class MLPConfig: + """The MLP layer config.""" + + fc: LinearConfig = None + gate: LinearConfig = None + proj: LinearConfig = None + hidden_act: str = "" + + @staticmethod + def from_nemo( + weights_dict: Dict[str, np.ndarray], + llm_config: PretrainedConfig, + layer_id: int, + rank: int = 0, + is_mcore: bool = False, + ): + """Converts the nemo weights and config to `MLPConfig`.""" + mlp = MLPConfig(hidden_act=llm_config.activation_function) + mlp.fc = LinearConfig(linear_type=LINEAR_COLUMN) + mlp.fc.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.weight.{rank}") + mlp.fc.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.bias.{rank}",) + + gated = is_gated_activation(mlp.hidden_act) + is_fast_glu = mlp.hidden_act in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] + if gated: + mlp.gate = LinearConfig(linear_type=LINEAR_COLUMN) + layer_name = ( + f"layers.{layer_id}.mlp.dense_h_to_4h_2.weight.{rank}" + if isinstance(llm_config, LlamaConfig) and not is_mcore and not is_fast_glu + else f"layers.{layer_id}.mlp.dense_h_to_4h.gate.weight.{rank}" + ) + mlp.gate.weight = get_tensor_from_dict(weights_dict, layer_name,) + mlp.gate.bias = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.gate.bias.{rank}", + ) + + mlp.proj = LinearConfig(linear_type=LINEAR_ROW) + mlp.proj.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_4h_to_h.weight.{rank}") + mlp.proj.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_4h_to_h.bias") + return mlp + + +@dataclass +class DecoderLayerConfig: + """The decoder layer config.""" + + decoder_type: str = "" + input_layernorm: LayernormConfig = None + attention: AttentionConfig = None + post_layernorm: LayernormConfig = None + mlp: MLPConfig = None + + num_attention_heads: int = 0 + + num_kv_heads: int = 0 + max_position_embeddings: int = 0 + rotary_pct: float = 0 + + @property + def hidden_size(self): + """Returns the hidden size of the transformer model.""" + return self.mlp.fc.weight.shape[1] + + @property + def ffn_hidden_size_local(self): + """Returns the ffn hidden size of the transformer model.""" + return self.mlp.fc.weight.shape[0] + + @staticmethod + def from_nemo( + weights_dict: Dict[str, np.ndarray], + llm_config: PretrainedConfig, + decoder_type: str, + layer_id: int, + rank: int = 0, + is_mcore: bool = False, + ): + """Converts the nemo weights and config to `DecoderLayerConfig`.""" + layer_config = DecoderLayerConfig( + decoder_type=decoder_type, + num_attention_heads=llm_config.n_head, + max_position_embeddings=llm_config.n_positions, + rotary_pct=llm_config.rotary_pct if hasattr(llm_config, "rotary_pct") else 0, + num_kv_heads=(llm_config.num_kv_heads if hasattr(llm_config, "num_kv_heads") else 0), + ) + layer_config.input_layernorm = LayernormConfig() + layer_config.input_layernorm.layernorm_type = ( + LAYERNORM_RMS if isinstance(llm_config, LlamaConfig) else LAYERNORM_DEFAULT + ) + layer_config.input_layernorm.weight = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.input_layernorm.weight", + ) + layer_config.input_layernorm.bias = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.input_layernorm.bias", + ) + layer_config.post_layernorm = LayernormConfig() + layer_config.post_layernorm.layernorm_type = ( + LAYERNORM_RMS if isinstance(llm_config, LlamaConfig) else LAYERNORM_DEFAULT + ) + + layer_config.post_layernorm.weight = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.post_attention_layernorm.weight", + ) + layer_config.post_layernorm.bias = get_tensor_from_dict( + weights_dict, f"layers.{layer_id}.post_attention_layernorm.bias", + ) + + layer_config.attention = AttentionConfig.from_nemo(weights_dict, layer_id, rank,) + layer_config.mlp = MLPConfig.from_nemo(weights_dict, llm_config, layer_id, rank, is_mcore) + + return layer_config + + +def _from_dict(class_type, data): + """Helper function to load the data as a class_type. class_type must be a dataclass.""" + if data is None: + return None + + if dataclasses.is_dataclass(class_type): + fieldtypes = {f.name: f.type for f in dataclasses.fields(class_type)} + return class_type(**{f: _from_dict(fieldtypes[f], data[f]) for f in data}) + elif get_origin(class_type) == list and dataclasses.is_dataclass(get_args(class_type)[0]): + list_value = [] + for child in data: + child_class_type = get_args(class_type)[0] + list_value.append(_from_dict(child_class_type, child)) + return list_value + else: + return data + + +@dataclass +class ModelConfig: + """The full LLM model config that includes the full information needed for tensorrt_llm engine building. + + This class includes all the fields that tensorrt_llm supports, but not all of the fields are required. + """ + + # Global metadata + quantization: str = QUANTIZATION_NONE + dtype: str = "float16" + + # Parallel metadata + rank: int = 0 + tensor_parallel: int = 1 + + # Model structure and weights + vocab_embedding: EmbeddingConfig = None + positional_embedding: EmbeddingConfig = None + layers: List[DecoderLayerConfig] = field(default_factory=list) + final_layernorm: LayernormConfig = None + lm_head: LinearConfig = None + + # Ptuning metadata + use_prompt_tuning: bool = False + + def to_dict(self) -> dict: + """Converts the instance to a python dict.""" + return dataclasses.asdict(self) + + @staticmethod + def from_dict(d: dict): + """Load a dict to a `ModelConfig` instance.""" + return _from_dict(ModelConfig, d) + + @property + def vocab_size(self): + """Returns the vocab_size of the model.""" + return ( + self.vocab_embedding.local_vocab_size * self.tensor_parallel + if self.vocab_embedding.is_local + else self.vocab_embedding.local_vocab_size + ) + + @property + def vocab_size_padded(self): + """Returns the padded vocab_size of the model rounds to the tensor_parallel.""" + return pad_vocab_size(self.vocab_size, self.tensor_parallel) + + @property + def hidden_size(self): + """Returns the hidden_size of the model.""" + return self.vocab_embedding.hidden_size + + @property + def max_position_embeddings(self): + """Returns the max_position_embedding of the model.""" + return self.layers[0].max_position_embeddings + + @property + def num_attention_heads(self): + """Returns the num_attention_heads of the model.""" + return self.layers[0].num_attention_heads + + @property + def num_kv_heads(self): + """Returns the num_key_value_heads of the model.""" + return self.layers[0].num_kv_heads if self.layers[0].num_kv_heads > 0 else self.num_attention_heads + + @property + def hidden_act(self): + """Returns the hidden_act of the model.""" + return self.layers[0].mlp.hidden_act diff --git a/nemo/export/trt_llm/model_config_trt.py b/nemo/export/trt_llm/model_config_trt.py new file mode 100644 index 000000000000..45049370c2c4 --- /dev/null +++ b/nemo/export/trt_llm/model_config_trt.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The API convert the model_config format to tensorrt_llm.""" + +import os +import shutil +from pathlib import Path +from typing import List, Union + +import psutil + +from .model_config import ModelConfig +from .tensorrt_llm_model import LMHeadModelBuilder + + +def model_config_to_tensorrt_llm( + model_configs: List[ModelConfig], + engine_dir: Union[str, Path], + gpus: int = 1, + max_input_len: int = 200, + max_output_len: int = 200, + max_batch_size: int = 1, + max_beam_width: int = 1, + max_prompt_embedding_table_size: int = 100, +): + """The API to convert a torch or huggingface model represented as ModelConfig to tensorrt_llm. + + Args: + model_configs: The list of ModelConfig converted, 1 for each GPU. + engine_dir: The target output directory to save the built tensorrt_llm engines. + gpus: the number of inference gpus for multi gpu inferencing. + max_input_len: The max input sequence length. + max_output_len: The max output sequence length. + max_batch_size: The max batch size. + max_beam_width: The max beam search width. + """ + engine_dir = Path(engine_dir) + if os.path.exists(engine_dir): + shutil.rmtree(engine_dir) + + print("Before engine building, CPU RAM Used (GB):" f" {psutil.Process().memory_info().rss / 1024 / 1024 / 1024}") + for rank in range(gpus): + model_configs[rank].use_prompt_tuning = max_prompt_embedding_table_size > 0 + builder = LMHeadModelBuilder(model_configs[rank]) + builder.build( + output_dir=engine_dir, + max_input_len=max_input_len, + max_output_len=max_output_len, + max_batch_size=max_batch_size, + max_beam_width=max_beam_width, + parallel_build=False, + max_prompt_embedding_table_size=max_prompt_embedding_table_size, + ) + print( + f"After Engine building rank {rank}, CPU RAM Used (GB):" + f" {psutil.Process().memory_info().rss / 1024 / 1024 / 1024}" + ) diff --git a/nemo/export/trt_llm/model_config_utils.py b/nemo/export/trt_llm/model_config_utils.py new file mode 100644 index 000000000000..9bb9dd12510b --- /dev/null +++ b/nemo/export/trt_llm/model_config_utils.py @@ -0,0 +1,238 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Utils to load and process model_config.""" + +import copy +import json +from dataclasses import fields, is_dataclass +from pathlib import Path +from typing import List, Union + +import numpy as np + +from .model_config import LAYER_QKV, LINEAR_COLUMN, EmbeddingConfig, LinearConfig, ModelConfig + + +def _restore_model_config(model_config, weights): + """Recursively restores the model_config from json and loads np.ndarray weights from weights.""" + if isinstance(model_config, dict): + for k, v in model_config.items(): + if isinstance(v, str) and v.startswith("_np:"): + model_config[k] = weights[v] + else: + _restore_model_config(v, weights) + if isinstance(model_config, list): + for i, v in enumerate(model_config): + if isinstance(v, str) and v.startswith("_np:"): + model_config[i] = weights[v] + else: + _restore_model_config(v, weights) + + +def load_model_configs(model_config_json: Union[str, Path], inference_tensor_parallel: int = 1) -> List[ModelConfig]: + """Loads the model_config saved from ammo export. + + Args: + model_config_json: The exported json file from ammo describing the optimized model. + Inside the same directory, each gpu rank will have its own npz file. + The json file represents the general ModelConfig structure while the detailed + weights for each rank are stored in the npz file. + + Returns: + The list of `ModelConfig` loaded and constructed. + """ + model_config_json = Path(model_config_json) + assert model_config_json.exists() + + with open(model_config_json, "r") as f: + model_config_template = json.load(f) + + tensor_parallel = model_config_template["tensor_parallel"] + assert tensor_parallel > 0, f"Invalid tensor_parallel {tensor_parallel}" + + model_config_dir = model_config_json.parents[0] + + model_configs = [] + for i in range(tensor_parallel): + decoder_type = model_config_template["layers"][0]["decoder_type"] + weights_file = f"{decoder_type}_tp{tensor_parallel}_rank{i}.npz" + weights = dict(np.load(model_config_dir / weights_file)) + model_config = copy.deepcopy(model_config_template) + model_config["rank"] = i + _restore_model_config(model_config, weights) + model_configs.append(ModelConfig.from_dict(model_config)) + + model_configs = _postprocess_model_configs(model_configs, inference_tensor_parallel=inference_tensor_parallel) + + return model_configs + + +def _same_array(arrays: List[np.ndarray]): + return all(np.array_equal(arrays[0], array) for array in arrays[1:]) + + +def _merge_model_configs_to_first(configs): + """This method merges the tensor fields for linear config so the config can be used with fewer GPUs. + + The implementation is recursive. + """ + merged_config = configs[0] + + if isinstance(merged_config, EmbeddingConfig): + if merged_config.is_local: + merged_config.weight = np.ascontiguousarray(np.concatenate([config.weight for config in configs], axis=0)) + + elif isinstance(merged_config, LinearConfig): + # The scaling factors merge rule is summarized as below: + + # S: all ranks should have the same scaling factor. + # M: Pick elementwise max among the ranks. Merged shape same as single rank. + # C: Concat the scaling factors on dim 0. Merged shape == tensor_parallel * original shape. + # RC: Reshape and concat. This is for QKV handling only. Merged shape == tensor_parallel * original shape. + # NA: Not valid / present + + # ws: weight scaling factor + # as: activation scaling factor + # ps: prequant scaling factor + + # C: Colum Linear + # R: Row Linear + # Q: QKV layer + + # F: FP8 + # I: INT8 SQ + + # Merge Rules: + # ws as ps + # FQ M M NA + # FC M M NA + # FR M M NA + # IQ RC M S + # IC C M S + # IR M M C + + # Handling constants + for field_name in ["activation_scaling_factor", "weights_scaling_factor"]: + merged_field_value = getattr(merged_config, field_name) + if merged_field_value is not None and merged_field_value.size == 1: + # Scaling factor is a scalar. + setattr( + merged_config, field_name, np.maximum.reduce([getattr(config, field_name) for config in configs]), + ) + + if merged_config.layer_type == LAYER_QKV: + assert merged_config.linear_type == LINEAR_COLUMN + out_dim = merged_config.weight.shape[0] + new_out_dim = out_dim * len(configs) + in_dim = merged_config.weight.shape[1] + # For QKV weights, the QKV dim should be the out most dim. + merged_config.weight = np.ascontiguousarray( + np.concatenate( + [config.weight.reshape(3, out_dim * in_dim // 3) for config in configs], axis=1 + ).reshape(new_out_dim, in_dim) + ) + for field_name in ["bias", "weights_scaling_factor"]: + merged_field_value = getattr(merged_config, field_name) + if merged_field_value is not None: + if merged_field_value.shape[0] == out_dim: + field_values = [getattr(config, field_name) for config in configs] + setattr( + merged_config, + field_name, + np.ascontiguousarray( + np.concatenate( + [field_value.reshape(3, out_dim // 3) for field_value in field_values], axis=1, + ).reshape(new_out_dim) + ), + ) + + # No op for prequant_scaling_factor + assert _same_array( + [config.prequant_scaling_factor for config in configs] + ), f"Failed to merge config {merged_config} with others" + + else: + # For normal linear layers, we merge column linear on the dim 0 and row on the dim 1 + merge_axis = 0 if merged_config.linear_type == LINEAR_COLUMN else 1 + merged_config.weight = np.ascontiguousarray( + np.concatenate([config.weight for config in configs], axis=merge_axis) + ) + + # Only cat the bias for column linear. + if merged_config.linear_type == LINEAR_COLUMN and merged_config.bias is not None: + merged_config.bias = np.ascontiguousarray(np.concatenate([config.bias for config in configs], axis=0)) + + if merged_config.linear_type == LINEAR_COLUMN: + if merged_config.weights_scaling_factor is not None and merged_config.weights_scaling_factor.size != 1: + # INT8 sq case + merged_config.weights_scaling_factor = np.ascontiguousarray( + np.concatenate([config.weights_scaling_factor for config in configs], axis=0) + ) + if merged_config.prequant_scaling_factor is not None: + assert _same_array( + [config.prequant_scaling_factor for config in configs] + ), f"Failed to merge config {merged_config} with others" + else: + if merged_config.weights_scaling_factor is not None: + merged_config.weights_scaling_factor = np.maximum.reduce( + [config.weights_scaling_factor for config in configs] + ) + if merged_config.prequant_scaling_factor is not None: + merged_config.prequant_scaling_factor = np.ascontiguousarray( + np.concatenate([config.prequant_scaling_factor for config in configs], axis=0) + ) + + elif is_dataclass(merged_config): + for field in fields(merged_config): + _merge_model_configs_to_first([getattr(config, field.name) for config in configs]) + elif isinstance(merged_config, list): + for i in range(len(merged_config)): + _merge_model_configs_to_first([config[i] for config in configs]) + + +def _merge_embedding(model_configs: List[ModelConfig]): + """Merges and replicates the embedding weights to all configs.""" + for embedding_name in ["vocab_embedding", "positional_embedding"]: + embedding_0 = getattr(model_configs[0], embedding_name) + if embedding_0 and embedding_0.is_local: + weights = [getattr(config, embedding_name).weight for config in model_configs] + merged_weight = np.ascontiguousarray(np.concatenate(weights, axis=0)) + for config in model_configs: + getattr(config, embedding_name).weight = merged_weight + getattr(config, embedding_name).is_local = False + + +def _postprocess_model_configs( + model_configs: List[ModelConfig], inference_tensor_parallel: int = 1 +) -> List[ModelConfig]: + """Postprocesses the model configs with trained tensor parallel to target inference tensor parallel.""" + if inference_tensor_parallel < len(model_configs): + # Merge the model_configs to target inferencen tensor parallel. + assert ( + len(model_configs) % inference_tensor_parallel == 0 + ), f"Cannot merge {len(model_configs)} configs to {inference_tensor_parallel}" + + num_configs_per_group = len(model_configs) // inference_tensor_parallel + merged_model_configs = [] + for i in range(inference_tensor_parallel): + model_config_slice = model_configs[i * num_configs_per_group : (i + 1) * num_configs_per_group] + _merge_model_configs_to_first(model_config_slice) + model_config_slice[0].rank = i + model_config_slice[0].tensor_parallel = inference_tensor_parallel + merged_model_configs.append(model_config_slice[0]) + else: + merged_model_configs = model_configs + + # So far we do not support parallel embedding layers yet. + # We will merge the local embedding weights and replicate it to all ranks for now. + _merge_embedding(merged_model_configs) + + return merged_model_configs diff --git a/nemo/export/trt_llm/nemo/convert.py b/nemo/export/trt_llm/nemo/convert.py new file mode 100644 index 000000000000..6f4f02013d1d --- /dev/null +++ b/nemo/export/trt_llm/nemo/convert.py @@ -0,0 +1,343 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Utilities for exporting a model to our custom format.""" + +import numpy as np +import torch +from tensorrt_llm._utils import torch_to_numpy + +# AMMO modification +# A global dicts to store exported weights. +# This is set to be a global variable to avoid extra code modification from tensorrt_llm. +weights_dict = {} + + +def cpu_map_location(storage, loc): + return storage.cpu() + + +def gpu_map_location(storage, loc): + if loc.startswith("cuda"): + training_gpu_idx = int(loc.split(":")[1]) + inference_gpu_idx = training_gpu_idx % torch.cuda.device_count() + return storage.cuda(inference_gpu_idx) + elif loc.startswith("cpu"): + return storage.cpu() + else: + raise ValueError(f"Not handled {loc}") + + +def save_val(val, dir, key, tp_num=None): + suffix = "bin" if tp_num is None else f"{tp_num}.bin" + # AMMO modification, save to in-memory dict instead of dir. + # Transpose linear layer weights to the correct shape. + if len(val.shape) >= 2: + val = np.ascontiguousarray(np.transpose(val.reshape(val.shape[0], -1), [1, 0])) + global weights_dict + weights_dict[f"model.{key}.{suffix}"] = val + + +def save_split(split_vals, dir, key, i, split_factor): + for j, val in enumerate(split_vals): + save_val(val, dir, key, i * split_factor + j) + + +def generate_int8(weights, act_range, is_qkv=False, multi_query_mode=False): + """This function has two purposes: + - compute quantized weights, scaled either per-tensor or per-column + - compute scaling factors. + + Depending on the GEMM API (CUTLASS/CUBLAS) the required scaling factors differ. + CUTLASS uses two sets of scaling factors. One for the activation X, one for the weight W. + CUBLAS only has one (we can't do per-row scaling). So we must provide pre-multiplied scaling factor. + + Here is the list of what we need (T means per-tensor, C per-column): + - scale_x_orig_quant puts fp activation into the quantized range (i.e. [-128, 127], for int8). + Used before the GEMM. (T) + - scale_y_quant_orig puts quantized activation into the fp range. Used if the GEMM outputs int8. (T) + - scale_w_quant_orig puts weights from quant range to fp range (used with CUTLASS) (T, C) + - scale_y_accum_quant puts the GEMM result (XW) from accumulation range (int32) + to quant range (int8) (used for CUBLAS) (T, C) + + Note that we don't do anything special about row-parallel GEMM. + Theoretically, we could have per-GPU scaling factors too, + but then the model would change depending on the number of GPUs used. + + For QKV projection, the behavior is special. Even if we have a single matrix to perform QKV projection, + we consider it + as three different matrices: Q, K, and V. So per-tensor actually means one scaling factor for each Q, K and V. + """ + # compute weight scaling factors for fp->int8 and int8->fp + if is_qkv and not multi_query_mode: + scale_w_orig_quant_t = 127.0 / act_range["w"].reshape(3, -1).max(dim=-1, keepdims=True)[0].cpu().numpy() + scale_w_orig_quant_c = 127.0 / act_range["w"].reshape(3, -1).cpu().numpy() + elif is_qkv and multi_query_mode: + raise ValueError("Multi-query w/ int8 quant has not been supported yet") + else: + scale_w_orig_quant_t = 127.0 / act_range["w"].max().cpu().numpy() + scale_w_orig_quant_c = 127.0 / act_range["w"].cpu().numpy() + scale_w_quant_orig_t = 1.0 / scale_w_orig_quant_t + scale_w_quant_orig_c = 1.0 / scale_w_orig_quant_c + + # compute the rest of needed scaling factors + scale_x_orig_quant_t = np.array(127.0 / act_range["x"].max().item()) + scale_y_orig_quant_t = np.array(127.0 / act_range["y"].max().item()) + scale_y_quant_orig_t = np.array(act_range["y"].max().item() / 127.0) + scale_y_accum_quant_t = scale_y_orig_quant_t / (scale_x_orig_quant_t * scale_w_orig_quant_t) + scale_y_accum_quant_c = scale_y_orig_quant_t / (scale_x_orig_quant_t * scale_w_orig_quant_c) + if is_qkv: + scale_y_accum_quant_t = np.broadcast_to(scale_y_accum_quant_t, scale_w_orig_quant_c.shape) + scale_w_quant_orig_t = np.broadcast_to(scale_w_quant_orig_t, scale_w_orig_quant_c.shape) + + def to_i8(x): + return x.round().clip(-127, 127).astype(np.int8) + + return { + "weight.int8": to_i8(weights * scale_w_orig_quant_t), + "weight.int8.col": to_i8(weights * scale_w_orig_quant_c), + "scale_x_orig_quant": scale_x_orig_quant_t.astype(np.float32), + "scale_w_quant_orig": scale_w_quant_orig_t.astype(np.float32), + "scale_w_quant_orig.col": scale_w_quant_orig_c.astype(np.float32), + "scale_y_accum_quant": scale_y_accum_quant_t.astype(np.float32), + "scale_y_accum_quant.col": scale_y_accum_quant_c.astype(np.float32), + "scale_y_quant_orig": scale_y_quant_orig_t.astype(np.float32), + } + + +def write_int8(vals, dir, base_key, split_dim, tp_rank, split_factor, kv_cache_only=False): + if not kv_cache_only: + save_split( + np.split(vals["weight.int8"], split_factor, axis=split_dim), + dir, + f"{base_key}.weight.int8", + tp_rank, + split_factor, + ) + save_split( + np.split(vals["weight.int8.col"], split_factor, axis=split_dim), + dir, + f"{base_key}.weight.int8.col", + tp_rank, + split_factor, + ) + + saved_keys_once = ["scale_y_quant_orig"] + if not kv_cache_only: + saved_keys_once += ["scale_x_orig_quant", "scale_w_quant_orig", "scale_y_accum_quant"] + # per-column scaling factors are loaded per-gpu for ColumnParallel GEMMs (QKV, FC1) + if not kv_cache_only: + if split_dim == -1: + save_split( + np.split(vals["scale_w_quant_orig.col"], split_factor, axis=split_dim), + dir, + f"{base_key}.scale_w_quant_orig.col", + tp_rank, + split_factor, + ) + save_split( + np.split(vals["scale_y_accum_quant.col"], split_factor, axis=split_dim), + dir, + f"{base_key}.scale_y_accum_quant.col", + tp_rank, + split_factor, + ) + else: + saved_keys_once += ["scale_w_quant_orig.col", "scale_y_accum_quant.col"] + + if tp_rank == 0: + for save_key in saved_keys_once: + save_val(vals[save_key], dir, f"{base_key}.{save_key}") + + +# Note: in multi_query_mode, only query heads are split between multiple GPUs, while key/value head +# are not split as there is only one head per key/value. +@torch.no_grad() +def split_and_save_weight(tp_rank, saved_dir, split_factor, key, vals, storage_type, act_range, config): + use_attention_nemo_shape = config.get("use_attention_nemo_shape", False) + split_gated_activation = config.get("split_gated_activation", False) + num_attention_heads = config.get("num_attention_heads", 0) + tp_size = config.get("tp_size", 1) + int8_outputs = config.get("int8_outputs", None) + multi_query_mode = config.get("multi_query_mode", False) + local_dim = config.get("local_dim", None) + num_kv_heads = config.get("num_kv_heads", num_attention_heads) + + save_int8 = int8_outputs == "all" or int8_outputs == "kv_cache_only" + + if not isinstance(vals, list): + vals = [vals] + + if config.get("transpose_weights", False) and vals[0].ndim == 2: + vals = [val.T for val in vals] + if "layernorm.weight" in key and config.get("apply_layernorm_1p", False): + vals = [val + 1.0 for val in vals] + vals = [torch_to_numpy(val.cpu().to(storage_type)) for val in vals] + + if ( + "input_layernorm.weight" in key + or "input_layernorm.bias" in key + or "attention.dense.bias" in key + or "post_attention_layernorm.weight" in key + or "post_attention_layernorm.bias" in key + or "post_self_attn_layernorm.weight" in key + or "mlp.dense_4h_to_h.bias" in key + or "final_layernorm.weight" in key + or "final_layernorm.bias" in key + ): + # shared weights, only need to convert the weights of rank 0 + if "post_self_attn_layernorm.weight" in key: + key = key.replace("post_self_attn_layernorm.weight", "post_attention_layernorm.weight") + if tp_rank == 0: + save_val(vals[0], saved_dir, key) + + elif ( + "attention.dense.weight" in key + or "mlp.dense_4h_to_h.weight" in key + or "attention.linear_proj.weight" in key + or "mlp.linear_fc2.weight" in key + ): + cat_dim = 0 + val = np.concatenate(vals, axis=cat_dim) + split_vals = np.split(val, split_factor, axis=cat_dim) + if "attention.linear_proj.weight" in key: + key = key.replace("attention.linear_proj.weight", "attention.dense.weight") + elif "mlp.linear_fc2.weight" in key: + key = key.replace("mlp.linear_fc2.weight", "mlp.dense_4h_to_h.weight") + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + if act_range is not None and int8_outputs == "all": + base_key = key.replace(".weight", "") + vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) + write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) + + elif "mlp.dense_h_to_4h.weight" in key or "mlp.dense_h_to_4h.bias" in key or "mlp.linear_fc1.weight" in key: + if split_gated_activation: + splits = [np.split(val, 2, axis=-1) for val in vals] + vals, gates = list(zip(*splits)) + cat_dim = -1 + val = np.concatenate(vals, axis=cat_dim) + split_vals = np.split(val, split_factor, axis=cat_dim) + if "mlp.linear_fc1.weight" in key: + key = key.replace("mlp.linear_fc1.weight", "mlp.dense_h_to_4h.weight") + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + if act_range is not None and int8_outputs == "all": + base_key = key.replace(".weight", "") + vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) + write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) + + if split_gated_activation: + assert not save_int8 + prefix, dot, suffix = key.rpartition(".") + key = prefix + ".gate" + dot + suffix + + gate = np.concatenate(gates, axis=cat_dim) + split_vals = np.split(gate, split_factor, axis=cat_dim) + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + + # Ammo modification + elif "mlp.dense_h_to_4h_2.weight" in key or "mlp.dense_h_to_4h_2.bias" in key: + cat_dim = -1 + val = np.concatenate(vals, axis=cat_dim) + split_vals = np.split(val, split_factor, axis=cat_dim) + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + if act_range is not None and int8_outputs == "all": + base_key = key.replace(".weight", "") + vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) + write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) + + elif "attention.query_key_value.bias" in key: + assert ( + num_attention_heads == num_kv_heads or multi_query_mode + ), "QKV bias is not supported for group query attention" + if local_dim is None: + local_dim = vals[0].shape[-1] // 3 + + if multi_query_mode: + val = vals[0] + # out_feature = local_dim + 2 * head_size; assumes local_dim equals to hidden_dim + b_q, b_kv = np.split(val, [local_dim], axis=-1) + b_q_split = np.split(b_q, split_factor, axis=-1) + split_vals = [np.concatenate((i, b_kv), axis=-1) for i in b_q_split] + else: + if use_attention_nemo_shape: + head_num = num_attention_heads // tp_size + size_per_head = local_dim // num_attention_heads + nemo_shape = (head_num, 3, size_per_head) + vals = [val.reshape(nemo_shape) for val in vals] + vals = [val.transpose(1, 0, 2) for val in vals] + + vals = [val.reshape(3, local_dim) for val in vals] + val = np.concatenate(vals, axis=-1) + split_vals = np.split(val, split_factor, axis=-1) + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + + elif "attention.query_key_value.weight" in key or "attention.linear_qkv.weight" in key: + assert use_attention_nemo_shape, "Only support NEMO shape for QKV weights" + hidden_dim = vals[0].shape[0] + size_per_head = hidden_dim // num_attention_heads + q_num = num_attention_heads // num_kv_heads + merge_factor = len(vals) + + # When the merge factor exceeds 1, the 'vals' list will have multiple entries. + # Depending on the format, 'vals' can look like either [QQQQ..KV, QQQQ..KV, ...](for GQA) or [QKV, QKV, ...](for MHA). + # We transform 'vals' into a unified structure: [QQQQQ...KK..VV..]. + # If the split factor surpasses 1, this array undergoes a split along its last dimension, which is 'size_per_head'. + + # Reshape each elements of the vals array to shape: + # (hidden_dim, q_num + 2, num_kv_heads // tp_size, size_per_head) + vals = [ + val.reshape(hidden_dim, num_kv_heads // tp_size, q_num + 2, size_per_head).transpose(0, 2, 1, 3) + for val in vals + ] + + # Combine all the Qs, Ks and Vs together for each val + q_splits, k_splits, v_splits = zip(*[np.split(val, [q_num, q_num + 1], axis=1) for val in vals]) + + # Concatenate Q, K, and V separately + q_splits_concat = np.concatenate(q_splits, axis=1) + k_splits_concat = np.concatenate(k_splits, axis=1) + v_splits_concat = np.concatenate(v_splits, axis=1) + + # Concatenate Q, K, and V together and reshape + qkv_split_concat = np.concatenate([q_splits_concat, k_splits_concat, v_splits_concat], axis=1) + qkv_split_concat = qkv_split_concat.reshape( + hidden_dim, q_num + 2, (num_kv_heads // tp_size) * size_per_head * merge_factor + ) + + # Final split + split_vals = np.split(qkv_split_concat, split_factor, axis=-1) + if "attention.linear_qkv.weight" in key: + key = key.replace("attention.linear_qkv.weight", "attention.query_key_value.weight") + save_split(split_vals, saved_dir, key, tp_rank, split_factor) + if save_int8: + base_key = key.replace(".weight", "") + vals_i8 = generate_int8(val, act_range, is_qkv=True, multi_query_mode=multi_query_mode) + write_int8( + vals_i8, + saved_dir, + base_key, + cat_dim, + tp_rank, + split_factor, + kv_cache_only=int8_outputs == "kv_cache_only", + ) + elif ( + "attention.query.weight" in key + or "attention.query.bias" in key + or "attention.key_value.weight" in key + or "attention.key_value.bias" in key + ): + pass + else: + print(f"[WARNING] {key} not handled by converter") + + # Ammo modification + global weights_dict + return weights_dict diff --git a/nemo/export/trt_llm/nemo/nemo.py b/nemo/export/trt_llm/nemo/nemo.py new file mode 100644 index 000000000000..9a7276646488 --- /dev/null +++ b/nemo/export/trt_llm/nemo/nemo.py @@ -0,0 +1,269 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Referrence impl in tensorrt_llm: examples/gpt/utils/nemo.py.""" +import functools +import logging +import os +import pathlib +import tarfile +import typing + +import torch +import yaml +from transformers import GPT2Config, LlamaConfig + +from .convert import cpu_map_location, gpu_map_location + +LOGGER = logging.getLogger(__name__) + + +def nemo_to_llm_config(nemo_model_config, vocab_size, eos_id, bos_id, decoder_type): + convertion_dict = { + "activation_function": "activation", + "layer_norm_epsilon": "layernorm_epsilon", + "n_embd": "hidden_size", + "n_head": "num_attention_heads", + "n_layer": "num_layers", + "n_positions": "max_position_embeddings", + "rotary_pct": "rotary_percentage", + "bias": "bias", + "intermediate_size": "ffn_hidden_size", + "num_kv_heads": "num_query_groups", + } + + kwargs = {key: nemo_model_config[value] for key, value in convertion_dict.items() if value in nemo_model_config} + kwargs["vocab_size"] = vocab_size + kwargs["eos_token_id"] = eos_id + kwargs["bos_token_id"] = bos_id + + llm_config = LlamaConfig if decoder_type == "llama" else GPT2Config + + return llm_config(**kwargs) + + +def add_special_tokens_to_tokenizer(tokenizer): + # Need to add cls, sep, mask tokens to the tokenizer if they don't exist. + # If cls, sep and mask are not attributes of the tokenizer, add it. + if not hasattr(tokenizer, "cls_token"): + tokenizer.add_special_tokens({"cls_token": ""}) + if not hasattr(tokenizer.tokenizer, "sep_id"): + tokenizer.add_special_tokens({"sep_token": ""}) + if not hasattr(tokenizer.tokenizer, "mask_id"): + tokenizer.add_special_tokens({"mask_token": ""}) + + # bos, eos, pad and unk may be present in the provided spm .model file, if they are, use it. + if not hasattr(tokenizer, "pad_token"): + if hasattr(tokenizer.tokenizer, "pad_id") and tokenizer.tokenizer.pad_id() > 0: + tokenizer.pad_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.pad_id()) + else: + tokenizer.add_special_tokens({"pad_token": ""}) + else: + tokenizer.add_special_tokens({"pad_token": ""}) + + if not hasattr(tokenizer, "bos_token"): + if hasattr(tokenizer.tokenizer, "bos_id") and tokenizer.tokenizer.bos_id() > 0: + tokenizer.bos_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.bos_id()) + else: + tokenizer.add_special_tokens({"bos_token": ""}) + else: + tokenizer.add_special_tokens({"bos_token": ""}) + + if not hasattr(tokenizer, "eos_token"): + if hasattr(tokenizer.tokenizer, "eos_id") and tokenizer.tokenizer.eos_id() > 0: + tokenizer.eos_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.eos_id()) + else: + tokenizer.add_special_tokens({"eos_token": ""}) + else: + tokenizer.add_special_tokens({"eos_token": ""}) + + +# TODO: remove tar.extractall usage before releasing with KitMaker +def unpack_nemo_ckpt( + nemo_archive_path: typing.Union[str, pathlib.Path], out_dir_path: typing.Union[str, pathlib.Path], +): + nemo_archive_path = pathlib.Path(nemo_archive_path) + if not nemo_archive_path.exists(): + raise FileNotFoundError(f"{nemo_archive_path} does not exist") + + for tar_mode in ["r:", "r:gz"]: + try: + with tarfile.open(nemo_archive_path, mode=tar_mode) as tar_file: + + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_members(tar_file): + members = [] + for member in tar_file.getmembers(): + member_path = os.path.join(out_dir_path, member.name) + if not is_within_directory(out_dir_path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + members.append(member) + return members + + tar_file.extractall( + out_dir_path, members=safe_members(tar_file), numeric_owner=False + ) # nosec - tar path has been validated. + + return out_dir_path + except tarfile.ReadError: + pass + + raise RuntimeError(f"Could not unpack {nemo_archive_path}") + + +def extract_layers_with_prefix(model_, prefix): + length_to_trim = len(prefix) + model_state = model_.get("state_dict", model_) + return {key[length_to_trim:]: model_state[key] for key in model_state.keys() if prefix in key} + + +class UnpackedNemoCheckpointDir: + def __init__( + self, checkpoints_dir: typing.Union[str, pathlib.Path], load_checkpoints_to_cpu: bool = False, + ): + self._checkpoints_dir = pathlib.Path(checkpoints_dir) + self._load_checkpoints_to_cpu = load_checkpoints_to_cpu + + @property + @functools.lru_cache + def model_config(self): + model_config = None + + model_config_filename = "model_config.yaml" + model_configs_paths = list(self._checkpoints_dir.rglob(model_config_filename)) + if model_configs_paths: + if len(model_configs_paths) > 1: + raise RuntimeError( + f"There are more than single {model_config_filename} in" + f" {self._checkpoints_dir}:" + f" {', '.join(map(lambda p: p.as_posix(), model_configs_paths))}" + ) + model_config_path = model_configs_paths[0] + LOGGER.debug("Loading model config from %s", model_config_path) + with model_config_path.open("r") as model_config_file: + model_config = yaml.load(model_config_file, Loader=yaml.SafeLoader) + else: + LOGGER.debug("Searching model config in checkpoints") + # try to obtain from checkpoint + checkpoint_name = self.checkpoint_name + checkpoints_paths = sorted(self._checkpoints_dir.rglob(checkpoint_name)) + if checkpoints_paths: + # assume that parallel ranks 0 checkpoint should have model config embedded + checkpoint_path = checkpoints_paths[0] + + map_location_fn = cpu_map_location if self._load_checkpoints_to_cpu else gpu_map_location + + model_00 = torch.load(checkpoint_path, map_location=map_location_fn) + if "hyper_parameters" in model_00 and "cfg" in model_00["hyper_parameters"]: + model_config = model_00["hyper_parameters"]["cfg"] + LOGGER.debug("Loaded model config from checkpoint %s", checkpoint_path) + else: + LOGGER.debug("Could not find model config in checkpoint %s", checkpoint_path) + + del model_00 + + if model_config is None: + LOGGER.warning("Could not find checkpoint with NeMo model config in %s", self._checkpoints_dir) + + LOGGER.debug("Loaded model config %s", model_config) + + return model_config + + @property + def checkpoints_dir(self): + return self._checkpoints_dir + + def get_checkpoints_paths(self, tensor_model_parallel_size=1, pipeline_model_parallel_size=1): + """Injects tensor/pipeline model parallel ranks into the filepath. + Does nothing if not using model parallelism. + """ + checkpoint_path_without_rank = self.checkpoints_dir / self.checkpoint_name + + def _inject_parallel_ranks(tp_rank, pp_rank): + if tensor_model_parallel_size > 1 or pipeline_model_parallel_size > 1: + if pipeline_model_parallel_size is None or pipeline_model_parallel_size == 1: + checkpoint_path = ( + checkpoint_path_without_rank.parent + / f"mp_rank_{tp_rank:02d}" + / checkpoint_path_without_rank.name + ) + else: + checkpoint_path = ( + checkpoint_path_without_rank.parent + / f"tp_rank_{tp_rank:02d}_pp_rank_{pp_rank:03d}" + / checkpoint_path_without_rank.name + ) + return checkpoint_path + else: + return checkpoint_path_without_rank + + return [ + [ + _inject_parallel_ranks(tp_rank=tp_rank, pp_rank=pp_rank) + for pp_rank in range(pipeline_model_parallel_size) + ] + for tp_rank in range(tensor_model_parallel_size) + ] + + @property + @functools.lru_cache + def checkpoint_name(self): + patterns = [ + "model_weights.ckpt", # older megatron checkpoints + "*last.ckpt", # newer format of checkpoints + ] + for pattern in patterns: + model_files = sorted(list(self._checkpoints_dir.rglob(pattern))) + if model_files: + return model_files[0].name + + raise ValueError(f"Could not find checkpoint files in {self._checkpoints_dir}") + + @functools.lru_cache + def get_tokenizer_file_path(self, tokenizer_key, file_key, default_filename_pattern): + model_config = self.model_config + file_property = None + if tokenizer_key in model_config and file_key in model_config[tokenizer_key]: + file_property = model_config[tokenizer_key][file_key] + elif file_key in model_config: + file_property = model_config[file_key] + + LOGGER.debug("model_config[%s][%s]=%s", tokenizer_key, file_key, file_property) + + if file_property and file_property.startswith("nemo:"): + filename = file_property.split("nemo:")[1] + filename_pattern = f"*{filename}" + elif file_property and file_property.startswith("/artifacts/"): + filename = pathlib.Path(file_property).name + filename_pattern = f"*{filename}" + elif file_property is None or file_property == "None": + filename_pattern = None + else: + filename_pattern = default_filename_pattern + LOGGER.warning( + f"Tokenizer file from config: {tokenizer_key}.{file_key}={file_property} " + f"looks like unsupported path. Pattern {filename_pattern} will be used." + ) + + file_path = None + if filename_pattern is not None: + files_paths = list(self._checkpoints_dir.glob(filename_pattern)) + if files_paths: + assert len(files_paths) == 1 + file_path = files_paths[0] + + return file_path diff --git a/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py b/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py new file mode 100644 index 000000000000..6433cef64792 --- /dev/null +++ b/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py @@ -0,0 +1,282 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Referrence impl in tensorrt_llm: examples/gpt/nemo_ckpt_convert.py.""" + +import configparser +import logging +import multiprocessing +import os +import shutil +import typing +from collections import defaultdict +from pathlib import Path + +import numpy as np +import torch +from tensorrt_llm._utils import str_dtype_to_torch, torch_to_numpy +from tqdm import tqdm +from transformers import GPT2Tokenizer, LlamaConfig, T5Tokenizer + +from .convert import cpu_map_location, gpu_map_location, split_and_save_weight +from .nemo import UnpackedNemoCheckpointDir, extract_layers_with_prefix, nemo_to_llm_config + +LOGGER = logging.getLogger(__name__) + +base_layer_names = { + "position_embedding": "model.language_model.embedding.position_embeddings.weight", + "word_embedding": "model.language_model.embedding.word_embeddings.weight", + "output_layer": "model.language_model.output_layer.weight", +} + +mcore_layer_names = { + "position_embedding": "model.embedding.position_embeddings.weight", + "word_embedding": "model.embedding.word_embeddings.weight", + "output_layer": "model.output_layer.weight", +} + + +def get_layer_name(layer_type: str, is_mcore: bool): + layer_dict = mcore_layer_names if is_mcore else base_layer_names + if layer_type in layer_dict: + return layer_dict[layer_type] + else: + raise ValueError(f"Unknown layer type {layer_type}") + + +def rename_key(old_key: str, pp_rank: int, num_layers: int, pp_size: int): + new_key = old_key + + if "layers." in old_key: + split_key = old_key.split(".") + split_key[1] = str(int(split_key[1]) + pp_rank * num_layers // pp_size) + new_key = ".".join(split_key) + + if "self_attention" in new_key: + new_key = new_key.replace("self_attention", "attention") + return new_key + + +@torch.no_grad() +def convert_checkpoint(unpacked_checkpoints_dir: UnpackedNemoCheckpointDir, args): + nemo_model_config = unpacked_checkpoints_dir.model_config + + checkpoints_paths = unpacked_checkpoints_dir.get_checkpoints_paths( + nemo_model_config.get("tensor_model_parallel_size", 1), + nemo_model_config.get("pipeline_model_parallel_size", 1), + ) + + # if checkpoints files could be found - start preparing output dir + out_dir = create_out_dir(args) + + map_location_fn = gpu_map_location if args.load_checkpoints_on_gpu else cpu_map_location + storage_type = str_dtype_to_torch(args.storage_type) + is_mcore = nemo_model_config.get("mcore_gpt", False) + + # load position_embedding from rank 0 + model_00 = torch.load(checkpoints_paths[0][0], map_location=map_location_fn) + model_00 = model_00.get("state_dict", model_00) + + has_position_embedding = get_layer_name("position_embedding", is_mcore) in model_00 + has_lm_head = get_layer_name("output_layer", is_mcore) in model_00 + + num_layers = nemo_model_config["num_layers"] + training_tp_size = nemo_model_config.get("tensor_model_parallel_size", 1) + training_pp_size = nemo_model_config.get("pipeline_model_parallel_size", 1) + inference_tp_size = args.tensor_parallelism + num_kv_heads = nemo_model_config.get("num_query_groups", 0) + multi_query_mode = nemo_model_config.get("multi_query_mode", False) + num_attention_heads = nemo_model_config["num_attention_heads"] + is_fast_glu = nemo_model_config.get("activation", "gelu") in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] + if num_kv_heads is None: + num_kv_heads = 0 + nemo_model_config["num_query_groups"] = 0 + + if num_kv_heads == 0: + if multi_query_mode: + num_kv_heads = 1 + else: + num_kv_heads = num_attention_heads + + export_config = { + "apply_layernorm_1p": nemo_model_config.get("normalization", "") == "layernorm1p", + "tp_size": training_tp_size, + "split_gated_activation": "swiglu" in nemo_model_config.get("activation", "gelu") + and (args.decoder_type == "gptnext" or is_mcore or is_fast_glu), + "num_attention_heads": num_attention_heads, + "num_kv_heads": num_kv_heads, + "use_attention_nemo_shape": True, + "transpose_weights": True, + } + + # merge_factor: how many TP training nodes are merged into an inference TP node + # split_factor: in how many parts a TP training node is split + gcd = np.gcd(training_tp_size, inference_tp_size) + merge_factor = training_tp_size // gcd + split_factor = inference_tp_size // gcd + + model_level_weights = defaultdict(list) + + def handle_model_level_weights(model, tp_idx: int, pp_idx: int): + if tp_idx == 0 and pp_idx == 0: + if has_position_embedding: + val = model[get_layer_name("position_embedding", is_mcore)] + # not weight, do not need to transpose + val = torch_to_numpy(val.to(storage_type).cpu()) + # AMMO modification + # val.tofile(out_dir / "model.wpe.bin") + model_level_weights["model.wpe.bin"].append(val) + if pp_idx == 0: + val = model.get("state_dict", model)[get_layer_name("word_embedding", is_mcore)] + val = torch_to_numpy(val.to(storage_type).cpu()) + model_level_weights["model.wte.bin"].append(val) + if has_lm_head and pp_idx == training_pp_size - 1: + val = model.get("state_dict", model)[get_layer_name("output_layer", is_mcore)] + val = torch_to_numpy(val.to(storage_type).cpu()) + model_level_weights["model.lm_head.weight.bin"].append(val) + + # AMMO modification + weights_dict = {} + for tp_rank in range(training_tp_size // merge_factor): + for pp_rank in range(training_pp_size): + models = [] + for k in range(merge_factor): + rank_weights = checkpoints_paths[tp_rank * merge_factor + k][pp_rank] + model = torch.load(rank_weights, map_location=map_location_fn) + handle_model_level_weights(model, tp_rank * merge_factor + k, pp_rank) + prefix = "model.decoder." if is_mcore else "model.language_model.encoder." + layers = extract_layers_with_prefix(model, prefix) + models.append(layers) + + starmap_args = [] + for key in models[0].keys(): + # Skipping the extra state as it is not a part of the model state dict + if "_extra_state" not in key: + starmap_args.append( + ( + tp_rank, + out_dir, + split_factor, + rename_key(key, pp_rank, num_layers, training_pp_size), + [model[key] for model in models], + storage_type, + None, + export_config, + ) + ) + starmap_args = tqdm(starmap_args, desc="saving weights") + + if args.processes > 1: + with multiprocessing.Pool(args.processes) as pool: + # AMMO modification + weights_dicts = pool.starmap(split_and_save_weight, starmap_args) + weights_dict_local = {k: v for d in weights_dicts for k, v in d.items()} + else: + # simpler for debug situations + for starmap_arg in starmap_args: + # AMMO modification + weights_dict_local = split_and_save_weight(*starmap_arg) + # AMMO modification + weights_dict.update(weights_dict_local) + + for key, values in model_level_weights.items(): + model_level_weights[key] = np.concatenate(values, axis=0) + # AMMO modification + weights_dict[key] = model_level_weights[key] + vocab_size = model_level_weights["model.wte.bin"].shape[0] + + tokenizer_config = update_tokenizer_paths(nemo_model_config["tokenizer"], unpacked_checkpoints_dir) + copy_tokenizer_files(tokenizer_config, out_dir) + # AMMO modification. + tokenizer_config["model"] = os.path.join(out_dir, "tokenizer.model") + tokenizer = build_tokenizer(tokenizer_config) + llm_config = nemo_to_llm_config( + nemo_model_config, vocab_size, tokenizer.eos_token_id, tokenizer.bos_token_id, args.decoder_type, + ) + + llm_config.is_mcore = is_mcore + + config = configparser.ConfigParser() + model_name = "llama" if isinstance(llm_config, LlamaConfig) else "gpt" + config[model_name] = {k: str(v) for k, v in vars(llm_config).items()} + config[model_name]["storage_dtype"] = args.storage_type + config_path = out_dir / "config.ini" + with config_path.open("w") as config_file: + config.write(config_file) + + # AMMO modification. + return weights_dict, llm_config, tokenizer + + +def create_out_dir(args): + # AMMO modification. + out_dir = Path(args.out_dir) + if not out_dir.exists(): + out_dir.mkdir(parents=True) + return out_dir + + +def update_tokenizer_paths(tokenizer_config: typing.Dict, unpacked_checkpoints_dir): + def _update_config_entry(key, file_pattern): + old_path = tokenizer_config[key] + if old_path is None: + return + old_path = Path(old_path) + new_path = unpacked_checkpoints_dir.get_tokenizer_file_path("tokenizer", key, file_pattern) + if new_path: + LOGGER.debug(f"Update tokenizer {key} {old_path} -> {new_path}") + tokenizer_config[key] = new_path.as_posix() + elif not old_path.exists(): + LOGGER.warning(f"Tokenizer {key}'s path {old_path} does not exists: set it to None") + tokenizer_config[key] = None + + _update_config_entry("model", "*.model") + _update_config_entry("vocab_file", "*vocab*") + _update_config_entry("merge_file", "*merge*.txt") + + return tokenizer_config + + +def copy_tokenizer_files(config, out_dir): + basenames = { + "model": "tokenizer", + "vocab_file": "vocab", + "merge_file": "merges", + } + + for key in basenames.keys(): + if config[key] is None: + continue + path = Path(config[key]) + if not path.exists(): + LOGGER.debug(f"Tokenizer {key}: {path} file not found") + continue + + dst_path = out_dir / f"{basenames[key]}{path.suffix}" + LOGGER.debug(f"Copy tokenizer {key}: {path}->{dst_path}") + shutil.copy(path.as_posix(), dst_path.as_posix()) + + +def build_tokenizer(tokenizer_config: typing.Dict): + if tokenizer_config["library"] == "sentencepiece": + # AMMO modification. + # Turn off legacy model by default: See https://github.com/huggingface/transformers/pull/24622 + tokenizer = T5Tokenizer(tokenizer_config["model"], extra_ids=0, legacy=False) + elif "GPT2" in tokenizer_config["type"]: + tokenizer = GPT2Tokenizer(tokenizer_config["vocab_file"], tokenizer_config["merge_file"]) + else: + raise ValueError(f'Tokenizer type {tokenizer_config["library"]} not handled') + + if tokenizer.bos_token_id is None: + tokenizer.add_special_tokens({"bos_token": ""}) + if tokenizer.eos_token_id is None: + tokenizer.add_special_tokens({"eos_token": ""}) + + return tokenizer diff --git a/nemo/export/trt_llm/nemo_utils.py b/nemo/export/trt_llm/nemo_utils.py new file mode 100644 index 000000000000..478cc24a2214 --- /dev/null +++ b/nemo/export/trt_llm/nemo_utils.py @@ -0,0 +1,184 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The APIs to convert a nemo model checkpoint to tensorrt_llm.""" + +import argparse +import ast +import configparser +import copy +import datetime +import logging +import os +import shutil +import sys +import tempfile +from pathlib import Path +from typing import Dict, List, Tuple + +import numpy as np +from tensorrt_llm import str_dtype_to_trt +from transformers import GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer + +from .model_config import ( + LAYERNORM_DEFAULT, + LAYERNORM_RMS, + LINEAR_COLUMN, + DecoderLayerConfig, + EmbeddingConfig, + LayernormConfig, + LinearConfig, + ModelConfig, +) +from .nemo.nemo import UnpackedNemoCheckpointDir, unpack_nemo_ckpt +from .nemo.nemo_ckpt_convert import build_tokenizer, convert_checkpoint +from .tensor_utils import get_tensor_from_dict, split + +LOGGER = logging.getLogger(__name__) + + +def _nemo_decode( + in_file: str, + out_dir: str, + tensor_parallelism: int = 1, + processes: int = 1, + storage_type: str = "bfloat16", + load_checkpoints_on_gpu: bool = False, + decoder_type: str = "gptnext", +) -> Tuple[Dict[str, np.ndarray], PretrainedConfig, PreTrainedTokenizer]: + """Decodes the NEMO file and returns the weights dict, llm config and tokenizer.""" + args = argparse.Namespace() + args.in_file = in_file + args.out_dir = out_dir + args.tensor_parallelism = tensor_parallelism + args.processes = processes + args.storage_type = storage_type + args.load_checkpoints_on_gpu = load_checkpoints_on_gpu + args.verbose = False + args.decoder_type = decoder_type + + input_path = Path(args.in_file) + if not input_path.exists(): + LOGGER.error("%s does not exists", input_path) + sys.exit(1) + + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + + # unpack if needed + if input_path.is_dir(): + nemo_dir = input_path + else: + start_time = datetime.datetime.now() + checkpoint_dir_path = temp_dir / "unpacked" + nemo_dir = unpack_nemo_ckpt(args.in_file, checkpoint_dir_path) + LOGGER.info("Spent %s (h:m:s) to unpack NeMo archive", datetime.datetime.now() - start_time) + + unpacked_checkpoint_dir = UnpackedNemoCheckpointDir( + nemo_dir, load_checkpoints_to_cpu=not args.load_checkpoints_on_gpu + ) + + start_time = datetime.datetime.now() + weights_dict, llm_config, tokenizer = convert_checkpoint(unpacked_checkpoint_dir, args) + LOGGER.info("Spent %s (h:m:s) to convert the model", datetime.datetime.now() - start_time) + + return weights_dict, llm_config, tokenizer + + +def get_model_config(weights_dir: Path) -> GPT2Config: + """Reads the GPT2Config from the decoded NEMO weights dir.""" + config = configparser.ConfigParser() + config_path = weights_dir / "config.ini" + assert os.path.isfile(config_path), f"{config_path} not present" + config.read(config_path) + config_dict = dict(config.items("gpt")) + # Parse the config to dict. + for k, v in config_dict.items(): + try: + config_dict[k] = ast.literal_eval(v) + except Exception: + pass + return GPT2Config(**config_dict) + + +def get_tokenzier(tokenizer_dir_or_path: Path) -> PreTrainedTokenizer: + """Loads the tokenizer from the decoded NEMO weights dir.""" + model_path = tokenizer_dir_or_path / "tokenizer.model" if tokenizer_dir_or_path.is_dir() else tokenizer_dir_or_path + tokenizer_config = {"library": "sentencepiece", "model": str(model_path)} + return build_tokenizer(tokenizer_config) + + +def nemo_to_model_config( + in_file: str, decoder_type: str, nemo_export_dir: str, gpus: int = 1 +) -> Tuple[List[ModelConfig], PreTrainedTokenizer]: + """Converts the NEMO file and construct the `ModelConfig` before tensorrt_llm deployment.""" + dtype_str = "bfloat16" + + if os.path.exists(nemo_export_dir): + shutil.rmtree(nemo_export_dir) + + weights_dict, llm_model_config, tokenizer = _nemo_decode( + in_file=in_file, + out_dir=nemo_export_dir, + tensor_parallelism=gpus, + processes=1, + storage_type=dtype_str, + load_checkpoints_on_gpu=False, + decoder_type=decoder_type, + ) + + model_config_template = ModelConfig() + model_config_template.dtype = dtype_str + + model_config_template.tensor_parallel = gpus + + str_dtype_to_trt(dtype_str) + + model_configs = [] + for i in range(gpus): + model_configs.append(copy.deepcopy(model_config_template)) + model_configs[i].rank = i + + model_configs[i].vocab_embedding = EmbeddingConfig(weight=get_tensor_from_dict(weights_dict, "wte")) + + model_configs[i].final_layernorm = LayernormConfig( + weight=get_tensor_from_dict(weights_dict, "final_layernorm.weight"), + bias=get_tensor_from_dict(weights_dict, "final_layernorm.bias"), + ) + model_configs[i].final_layernorm.layernorm_type = ( + LAYERNORM_RMS if isinstance(llm_model_config, LlamaConfig) else LAYERNORM_DEFAULT + ) + + for i in range(llm_model_config.n_layer): + for j in range(gpus): + model_configs[j].layers.append( + DecoderLayerConfig.from_nemo( + weights_dict=weights_dict, + llm_config=llm_model_config, + decoder_type=decoder_type, + layer_id=i, + rank=j, + is_mcore=llm_model_config.is_mcore, + ) + ) + + lm_head_weight = get_tensor_from_dict(weights_dict, "lm_head.weight") + + if model_configs[0].vocab_size_padded != model_configs[0].vocab_size: + pad_width = model_configs[0].vocab_size_padded - model_configs[0].vocab_size + lm_head_weight = np.pad(lm_head_weight, ((0, pad_width), (0, 0)), "constant", constant_values=0) + + for i in range(gpus): + model_configs[i].lm_head = LinearConfig(linear_type=LINEAR_COLUMN) + model_configs[i].lm_head.weight = np.ascontiguousarray( + split(lm_head_weight, model_configs[i].tensor_parallel, model_configs[i].rank) + ) + + return model_configs, tokenizer diff --git a/nemo/export/trt_llm/quantization_utils.py b/nemo/export/trt_llm/quantization_utils.py new file mode 100644 index 000000000000..fd363b11dcf5 --- /dev/null +++ b/nemo/export/trt_llm/quantization_utils.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""The utils to convert a tensorrt_llm network to a quantized network.""" + +import numpy as np +from tensorrt_llm.layers import Linear, RowLinear +from tensorrt_llm.quantization.layers import FP8Linear, FP8RowLinear, Int8SmoothQuantLinear, Int8SmoothQuantRowLinear + +from .model_config import QUANTIZATION_FP8, QUANTIZATION_INT8_SQ, QUANTIZATION_NONE, LinearConfig, ModelConfig + + +def quantize_linear(tensorrt_llm_layer, quantization: str, layer_config: LinearConfig): + """Returns the quantized tensorrt_llm linear layer.""" + if quantization == QUANTIZATION_NONE: + return tensorrt_llm_layer + + if quantization == QUANTIZATION_FP8: + # FP8 is not sensitive to scaling factors. So we just quantize all layers possible. + default_scaling_factor = np.array([1], dtype=np.float32) + if layer_config.activation_scaling_factor is None: + layer_config.activation_scaling_factor = default_scaling_factor + if layer_config.weights_scaling_factor is None: + layer_config.weights_scaling_factor = default_scaling_factor + + if layer_config.activation_scaling_factor is None or layer_config.weights_scaling_factor is None: + print(f"No valid scaling factors in {tensorrt_llm_layer._get_name()}, skipping quantization" " on this layer") + return tensorrt_llm_layer + else: + assert np.all(layer_config.activation_scaling_factor > 0) + assert np.all(layer_config.weights_scaling_factor > 0) + + bias = tensorrt_llm_layer.bias is not None + + linear_layer_type = type(tensorrt_llm_layer) + if linear_layer_type == Linear: + if quantization == QUANTIZATION_FP8: + linear = FP8Linear + elif quantization == QUANTIZATION_INT8_SQ: + linear = Int8SmoothQuantLinear + else: + assert False, f"{quantization} is not supported." + quantized_linear_layer = linear( + in_features=tensorrt_llm_layer.in_features, + out_features=tensorrt_llm_layer.out_features * tensorrt_llm_layer.tp_size, + bias=bias, + dtype=tensorrt_llm_layer.dtype, + tp_group=tensorrt_llm_layer.tp_group, + tp_size=tensorrt_llm_layer.tp_size, + gather_output=tensorrt_llm_layer.gather_output, + ) + elif linear_layer_type == RowLinear: + if quantization == QUANTIZATION_FP8: + row_linear = FP8RowLinear + elif quantization == QUANTIZATION_INT8_SQ: + row_linear = Int8SmoothQuantRowLinear + else: + assert False, f"{quantization} is not supported." + quantized_linear_layer = row_linear( + in_features=tensorrt_llm_layer.in_features * tensorrt_llm_layer.tp_size, + out_features=tensorrt_llm_layer.out_features, + bias=bias, + dtype=tensorrt_llm_layer.dtype, + tp_group=tensorrt_llm_layer.tp_group, + tp_size=tensorrt_llm_layer.tp_size, + ) + else: + assert False, f"{linear_layer_type} is not supported." + + quantized_linear_layer.weight = tensorrt_llm_layer.weight + quantized_linear_layer.bias = tensorrt_llm_layer.bias + + quantized_linear_layer.activation_scaling_factor.value = layer_config.activation_scaling_factor + quantized_linear_layer.weights_scaling_factor.value = layer_config.weights_scaling_factor + + if hasattr(quantized_linear_layer, "prequant_scaling_factor"): + quantized_linear_layer.prequant_scaling_factor.value = layer_config.prequant_scaling_factor + + return quantized_linear_layer + + +def naive_quantization(config: ModelConfig, quantization: str): + """Generates a constant scaling factor (1) with target quantization. + + This is for debugging and performance measurement only. + """ + config.quantization = quantization + # Here the scaling factor is not inversed. + # In nvidia systems: + # pytorch_quantization uses inv scale + # onnx & trt uses non-inv scale + # cask uses inv scale + default_scaling_factor = np.array([1], dtype=np.float32) + + if quantization == QUANTIZATION_FP8: + for layer in config.layers: + linear_layers = [ + layer.attention.qkv, + layer.attention.dense, + layer.mlp.fc, + layer.mlp.proj, + layer.mlp.gate, + ] + for linear_layer in linear_layers: + if linear_layer: + linear_layer.activation_scaling_factor = default_scaling_factor + linear_layer.weights_scaling_factor = default_scaling_factor + config.lm_head.activation_scaling_factor = default_scaling_factor + config.lm_head.weights_scaling_factor = default_scaling_factor + + else: + assert False, f"{quantization} not supported" diff --git a/nemo/export/trt_llm/tensor_utils.py b/nemo/export/trt_llm/tensor_utils.py new file mode 100644 index 000000000000..108568f35e2b --- /dev/null +++ b/nemo/export/trt_llm/tensor_utils.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Utils for tensor conversions between tensorrt, torch and numpy.""" + +from typing import Dict + +import numpy as np +import tensorrt as trt +import tensorrt_llm +import torch + + +def torch_to_numpy_with_dtype(tensor, dtype=trt.float16): + """Converts a torch tensor to numpy array with the dtype.""" + if dtype == trt.float16: + torch_dtype = torch.float16 + elif dtype == trt.float32: + torch_dtype = torch.float32 + elif dtype == trt.bfloat16: + torch_dtype = torch.bfloat16 + else: + assert False, f"{dtype} not supported" + return tensorrt_llm._utils.torch_to_numpy(tensor.detach().to(torch_dtype)) + + +def trt_dtype_to_str(dtype: trt.DataType): + """Converts a trt dtype to string.""" + str_map = { + trt.float16: "float16", + trt.bfloat16: "bfloat16", + trt.float32: "float32", + } + + return str_map[dtype] + + +def split(v, tp_size, idx, dim=0): + """Splits the np tensor v on dim and return the idx's slice.""" + if tp_size == 1: + return v + if len(v.shape) == 1: + return np.ascontiguousarray(np.split(v, tp_size)[idx]) + else: + return np.ascontiguousarray(np.split(v, tp_size, axis=dim)[idx]) + + +def get_tensor_parallel_group(tensor_parallel: int): + """Returns the tensor_parallel_group config based on tensor_parallel.""" + return None if tensor_parallel == 1 else list(range(tensor_parallel)) + + +def get_tensor_from_dict(weights_dict: Dict[str, np.ndarray], name: str) -> np.array: + """Loads tensor from the weights_dict.""" + return weights_dict.get(f"model.{name}.bin", None) diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py new file mode 100644 index 000000000000..45ecc08ead80 --- /dev/null +++ b/nemo/export/trt_llm/tensorrt_llm_build.py @@ -0,0 +1,296 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""This module builds the tensorrt_llm engine. + +Referrence impl in tensorrt_llm: examples/gpt/build.py. +""" + +import argparse +import os +import time +from pathlib import Path + +import tensorrt_llm +import torch +from tensorrt_llm import str_dtype_to_trt +from tensorrt_llm.builder import Builder +from tensorrt_llm.logger import logger +from tensorrt_llm.network import net_guard +from tensorrt_llm.plugin.plugin import ContextFMHAType +from tensorrt_llm.quantization import QuantMode + +MODEL_NAME = "ammo" + + +def get_engine_name(model, dtype, tp_size, rank): + """Returns the engine file name based on the provided info.""" + return "{}_{}_tp{}_rank{}.engine".format(model, dtype, tp_size, rank) + + +def serialize_engine(engine, path): + """Serializes the engine to path.""" + logger.info(f"Serializing engine to {path}...") + tik = time.time() + with open(path, "wb") as f: + f.write(bytearray(engine)) + tok = time.time() + t = time.strftime("%H:%M:%S", time.gmtime(tok - tik)) + logger.info(f"Engine serialized. Total time: {t}") + + +def build_rank_engine( + tensorrt_llm_gpt, builder: Builder, builder_config: tensorrt_llm.builder.BuilderConfig, engine_name, rank, args, +): + """@brief: Build the engine on the given rank. + + @param rank: The rank to build the engine. + @param args: The cmd line arguments. + @return: The built engine. + """ + str_dtype_to_trt(args.dtype) + + # TODO: Enable use_embedding_sharing when this feature is needed. + # Share_embedding_table can be set True only when: + # 1) the weight for lm_head() does not exist while other weights exist + # 2) For multiple-processes, use_parallel_embedding=True and embedding_sharding_dim == 0. + # Besides, for TensorRT 9.0, we can observe the engine size reduction when the lookup and gemm plugin are enabled. + # share_embedding_table = False + # if args.use_embedding_sharing: + # if args.world_size > 1: + # if args.model_dir is not None and args.embedding_sharding_dim == 0 and args.use_parallel_embedding: + # share_embedding_table = check_embedding_share(args.model_dir) + # else: + # if args.model_dir is not None: + # share_embedding_table = check_embedding_share(args.model_dir) + + # if not share_embedding_table: + # logger.warning(f'Cannot share the embedding lookup table.') + + # if share_embedding_table: + # logger.info( + # 'Engine will share embedding and language modeling weights.') + + # Module -> Network + ootb = os.getenv("OOTB", False) + + network = builder.create_network() + network.trt_network.name = engine_name + + # We have to use the attention plugin for most of the models. + if args.use_gpt_attention_plugin: + network.plugin_config.set_gpt_attention_plugin(dtype=args.use_gpt_attention_plugin) + + if not ootb: + if args.use_gemm_plugin: + network.plugin_config.set_gemm_plugin(dtype=args.use_gemm_plugin) + if args.use_rmsnorm_plugin: + network.plugin_config.set_rmsnorm_plugin(dtype=args.use_rmsnorm_plugin) + if args.use_layernorm_plugin: + network.plugin_config.set_layernorm_plugin(dtype=args.use_layernorm_plugin) + assert not (args.enable_context_fmha and args.enable_context_fmha_fp32_acc) + if args.enable_context_fmha: + network.plugin_config.set_context_fmha(ContextFMHAType.enabled) + if args.enable_context_fmha_fp32_acc: + network.plugin_config.set_context_fmha(ContextFMHAType.enabled_with_fp32_acc) + if args.remove_input_padding: + network.plugin_config.enable_remove_input_padding() + if args.paged_kv_cache: + network.plugin_config.enable_paged_kv_cache() + if args.use_ib_gpt_attention_plugin: + network.plugin_config.set_inflight_batching_gpt_attention_plugin(dtype=args.use_ib_gpt_attention_plugin) + + if args.use_inflight_batching: + network.plugin_config.enable_in_flight_batching() + + if args.use_lookup_plugin: + # Use the plugin for the embedding parallelism and sharing + network.plugin_config.set_lookup_plugin(dtype=args.dtype) + else: + print("Build engine in OOTB mode, disable all plugins except nccl.") + + if args.world_size > 1: + network.plugin_config.set_nccl_plugin(args.dtype) + + with net_guard(network): + # Prepare + network.set_named_parameters(tensorrt_llm_gpt.named_parameters()) + + # Forward + inputs = tensorrt_llm_gpt.prepare_inputs( + args.max_batch_size, + args.max_input_len, + args.max_output_len, + True, + args.max_beam_width, + paged_kv_cache=args.paged_kv_cache, + tokens_per_block=args.tokens_per_block, + prompt_embedding_table_size=args.max_prompt_embedding_table_size, + ) + tensorrt_llm_gpt(*inputs) + + engine = None + + # Network -> Engine + engine = builder.build_engine(network, builder_config) + if rank == 0: + config_path = args.output_dir / "config.json" + builder.save_config(builder_config, config_path) + return engine + + +def _build_impl(rank, tensorrt_llm_model, args): + torch.cuda.set_device(rank % args.gpus_per_node) + tensorrt_llm.logger.set_level(args.log_level) + args.output_dir.mkdir(parents=True, exist_ok=True) + timing_cache_file = args.timing_cache if args.timing_cache else args.output_dir / "model.cache" + timing_cache = timing_cache_file + + builder = Builder() + apply_query_key_layer_scaling = False + cur_rank = rank + + builder_config = builder.create_builder_config( + name=MODEL_NAME, + precision=args.dtype, + timing_cache=timing_cache, + tensor_parallel=args.world_size, # TP only + parallel_build=args.parallel_build, + num_layers=tensorrt_llm_model._num_layers, + num_heads=tensorrt_llm_model._num_heads, + num_kv_heads=tensorrt_llm_model._num_kv_heads, + hidden_size=tensorrt_llm_model._hidden_size, + vocab_size=tensorrt_llm_model._vocab_size, + hidden_act=tensorrt_llm_model.hidden_act, + max_position_embeddings=tensorrt_llm_model.max_position_embeddings, + apply_query_key_layer_scaling=apply_query_key_layer_scaling, + max_batch_size=args.max_batch_size, + max_input_len=args.max_input_len, + max_output_len=args.max_output_len, + int8="int8" in args.quantization, + opt_level=args.builder_opt, + paged_kv_cache=args.paged_kv_cache, + tokens_per_block=args.tokens_per_block, + use_prompt_tuning=args.max_prompt_embedding_table_size > 0, + use_parallel_embedding=args.use_parallel_embedding, + fp8="fp8" in args.quantization, + ) + + engine_name = get_engine_name(MODEL_NAME, args.dtype, args.world_size, cur_rank) + engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, cur_rank, args) + assert engine is not None, f"Failed to build engine for rank {cur_rank}" + + if cur_rank == 0: + # Use in-memory timing cache for multiple builder passes. + if not args.parallel_build: + timing_cache = builder_config.trt_builder_config.get_timing_cache() + + serialize_engine(engine, args.output_dir / engine_name) + + if rank == 0: + ok = builder.save_timing_cache(builder_config, timing_cache_file) + assert ok, "Failed to save timing cache." + + +def build( + tensorrt_llm_model, + output_dir: Path, + rank=0, + world_size=1, + dtype="float16", + timing_cache="", + log_level="info", + max_batch_size=1, + max_input_len=200, + max_output_len=200, + max_beam_width=1, + max_prompt_embedding_table_size=0, + parallel_build=False, + gpus_per_node=1, + quantization=None, +): + """Builds the tensorrt_llm_model to engine.""" + args = argparse.Namespace() + args.world_size = world_size + args.dtype = dtype + args.timing_cache = timing_cache + args.log_level = log_level + args.max_batch_size = max_batch_size + args.max_input_len = max_input_len + args.max_output_len = max_output_len + args.max_beam_width = max_beam_width + args.use_gpt_attention_plugin = dtype + args.use_gemm_plugin = dtype + # Only enable rmsnorm_plugin for INT8 and FP16 as FP8 performance has a regression. + # TODO: Understand why rmsnorm_plugin is not performing well in FP8 + args.use_rmsnorm_plugin = dtype if "fp8" not in quantization else False + args.use_layernorm_plugin = False + args.parallel_build = parallel_build + args.enable_context_fmha = True + args.enable_context_fmha_fp32_acc = False + args.gpus_per_node = gpus_per_node + args.builder_opt = None + args.output_dir = Path(output_dir) + args.remove_input_padding = True + args.use_smooth_quant = False + args.use_weight_only = False + args.weight_only_precision = "int8" + args.per_channel = False + args.per_token = False + args.int8_kv_cache = False + args.random_seed = None + args.paged_kv_cache = False + args.max_prompt_embedding_table_size = max_prompt_embedding_table_size + args.use_inflight_batching = False + args.use_ib_gpt_attention_plugin = False + args.use_parallel_embedding = False + args.use_lookup_plugin = False + args.tokens_per_block = 64 + args.quantization = quantization + + assert not ( + args.use_smooth_quant and args.use_weight_only + ), "You cannot enable both SmoothQuant and INT8 weight-only together." + + assert not ( + args.use_smooth_quant and args.use_weight_only + ), "You cannot enable both SmoothQuant and INT8 weight-only together." + + if args.use_ib_gpt_attention_plugin: + logger.warning( + "use_ib_gpt_attention_plugin is deprecated. Use combination of" + " --use_gpt_attention_plugin=dtype --use_inflight_batching instead." + ) + + if args.use_inflight_batching: + assert args.use_gpt_attention_plugin, "You have to use GPT attention plugin for in-flight batching mode" + assert args.paged_kv_cache, "You have to use paged kv cache for in-flight batching mode" + assert args.remove_input_padding, "You have to remove input padding for in-flight batching" + + if args.use_smooth_quant: + args.quant_mode = QuantMode.use_smooth_quant(args.per_token, args.per_channel) + elif args.use_weight_only: + args.quant_mode = QuantMode.use_weight_only(args.weight_only_precision == "int4") + else: + args.quant_mode = QuantMode(0) + + if args.int8_kv_cache: + args.quant_mode = args.quant_mode.set_int8_kv_cache() + + if args.random_seed is not None: + torch.manual_seed(args.random_seed) + + logger.set_level(args.log_level) + tik = time.time() + _build_impl(rank, tensorrt_llm_model, args) + + tok = time.time() + t = time.strftime("%H:%M:%S", time.gmtime(tok - tik)) + logger.info(f"Total time of building all {args.world_size} engines: {t}") diff --git a/nemo/export/trt_llm/tensorrt_llm_model.py b/nemo/export/trt_llm/tensorrt_llm_model.py new file mode 100644 index 000000000000..ecffd5573e2f --- /dev/null +++ b/nemo/export/trt_llm/tensorrt_llm_model.py @@ -0,0 +1,487 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""This module defines a tensorrt_llm based model for all LLMs we support inside AMMO. + +Referrence impl in tensorrt_llm: tensorrt_llm/models/gpt/model.py. +""" +import inspect +from collections import OrderedDict +from pathlib import Path + +import numpy as np +import tensorrt as trt +import torch +from tensorrt_llm import default_net, str_dtype_to_trt +from tensorrt_llm.functional import RaggedTensor, Tensor, expand_mask, gather_last_token_logits, shape +from tensorrt_llm.layers import ColumnLinear, InflightBatchingParam +from tensorrt_llm.models.generation_mixin import GenerationMixin +from tensorrt_llm.module import Module, ModuleList + +from .decoder import build_decoder_layer +from .model_config import ModelConfig +from .quantization_utils import quantize_linear +from .tensor_utils import get_tensor_parallel_group, trt_dtype_to_str +from .tensorrt_llm_build import build +from .tensorrt_llm_utils import build_embedding_from_config, build_layernorm_from_config, print_tensorrt_llm + + +class ModelBuilder(Module): + """A generic tensorrt_llm transformer model builder. + + We try to make this module builder as flexibile as possible to cover all transformer conversion usecases. + """ + + def __init__(self, model_config: ModelConfig): + """Initializes the ModelBuilder from a model_config.""" + super().__init__() + self.quantization = model_config.quantization + self.rank = model_config.rank + self.max_position_embeddings = model_config.max_position_embeddings + self.hidden_act = model_config.hidden_act + + self._dtype = str_dtype_to_trt(model_config.dtype) + self._kv_dtype = self._dtype + self._tensor_parallel = model_config.tensor_parallel + self._vocab_size = model_config.vocab_size + self._hidden_size = model_config.hidden_size + self._num_layers = len(model_config.layers) + self._num_heads = model_config.num_attention_heads + self._num_kv_heads = model_config.num_kv_heads + self._use_prompt_tuning = model_config.use_prompt_tuning + + # TODO: support use_parallel_embedding. + self.vocab_embedding = build_embedding_from_config( + model_config.vocab_embedding, self._dtype, use_prompt_tuning=self._use_prompt_tuning + ) + self.positional_embedding = build_embedding_from_config( + model_config.positional_embedding, self._dtype, use_prompt_tuning=False + ) + self.layers = ModuleList( + [ + build_decoder_layer( + layer, + layer_id, + self._num_layers, + dtype=self._dtype, + quantization=model_config.quantization, + rank=self.rank, + tensor_parallel=self._tensor_parallel, + ) + for layer_id, layer in enumerate(model_config.layers) + ] + ) + + self.ln_f = build_layernorm_from_config(model_config.final_layernorm, self._dtype) + + def forward( + self, + input_ids, + position_ids, + past_key_value=None, + sequence_length=None, + host_past_key_value_lengths=None, + use_cache=False, + attention_mask=None, + cache_indirection=None, + kv_cache_block_pointers=None, + prompt_embedding_table=None, + prompt_tasks=None, + prompt_vocab_size=None, + inflight_batching_args=None, + context_lengths=None, + host_context_lengths=None, + host_request_types=None, + max_context_length=None, + ): + """Forward function for the full model.""" + ptuning_args = [] + if self._use_prompt_tuning: + ptuning_args = [prompt_embedding_table, prompt_tasks, prompt_vocab_size] + x = self.vocab_embedding(input_ids.data, *ptuning_args) + if hasattr(self, "positional_embedding") and self.positional_embedding: + assert position_ids + x = x + self.positional_embedding(position_ids) + + hidden_states = x + + if past_key_value is None: + past_key_value = tuple([None] * len(self.layers)) + + if use_cache: + presents = [] + + if attention_mask is not None: + attention_mask = expand_mask(attention_mask, shape(input_ids.data, -1)) + hidden_states = RaggedTensor.from_row_lengths(hidden_states, input_ids.row_lengths, input_ids.max_row_length) + + def _forward_has_argument(layer, argument_name): + return argument_name in inspect.signature(layer.forward).parameters + + for idx, (layer, past, pointers) in enumerate(zip(self.layers, past_key_value, kv_cache_block_pointers)): + # In TRT LLM, not all model decoders are with the same forward arg signature. + # So we check arg compatibility and optionally add them if supported. + # In case the decoder forward signature changes, this if branch list below will need to be updated. + additional_inputs = {} + if _forward_has_argument(layer, "inflight_batching_args"): + additional_inputs["inflight_batching_args"] = inflight_batching_args + if _forward_has_argument(layer, "past_key_value_pointers"): + additional_inputs["past_key_value_pointers"] = ( + (None if inflight_batching_args is None else inflight_batching_args.past_key_value_pointers[idx]), + ) + if _forward_has_argument(layer, "pointers_to_kv_cache_block_pointers"): + additional_inputs["pointers_to_kv_cache_block_pointers"] = ( + ( + None + if ( + inflight_batching_args is None + or inflight_batching_args.pointers_to_kv_cache_block_pointers is None + ) + else inflight_batching_args.pointers_to_kv_cache_block_pointers[idx] + ), + ) + + hidden_states = layer( + hidden_states, + past_key_value=past, + sequence_length=sequence_length, + host_past_key_value_lengths=host_past_key_value_lengths, + use_cache=use_cache, + attention_mask=attention_mask, + cache_indirection=cache_indirection, + kv_cache_block_pointers=pointers, + context_lengths=context_lengths, + host_context_lengths=host_context_lengths, + host_request_types=host_request_types, + max_context_length=max_context_length, + **additional_inputs, + ) + + if use_cache: + presents.append(hidden_states[1]) + hidden_states = hidden_states[0] + + hidden_states = self.ln_f(hidden_states.data) + + if use_cache: + return (hidden_states, tuple(presents)) + return hidden_states + + +class LMHeadModelBuilder(ModelBuilder, GenerationMixin): + """The implementation of the model builder with an LMHead.""" + + def __init__(self, model_config: ModelConfig): + """Initializes the LMHeadModelBuilder from a model_config.""" + super().__init__(model_config) + + # TODO: Add support for share_embedding_table + share_embedding_table = False + share_weight = None + if share_embedding_table: + share_weight = self.embedding.vocab_embedding.weight + self.lm_head = ColumnLinear( + self._hidden_size, + model_config.vocab_size_padded, + bias=False, + dtype=self._dtype, + tp_group=get_tensor_parallel_group(self._tensor_parallel), + tp_size=self._tensor_parallel, + gather_output=True, + share_weight=share_weight, + ) + self.lm_head.weight.value = model_config.lm_head.weight + if model_config.quantization: + self.lm_head = quantize_linear(self.lm_head, model_config.quantization, model_config.lm_head) + + def forward( + self, + input_ids, + position_ids, + past_key_value=None, + sequence_length=None, + host_past_key_value_lengths=None, + use_cache=False, + last_token_ids=None, + attention_mask=None, + cache_indirection=None, + kv_cache_block_pointers=None, + prompt_embedding_table=None, + prompt_tasks=None, + prompt_vocab_size=None, + inflight_batching_args=None, + context_lengths=None, + host_context_lengths=None, + host_request_types=None, + max_context_length=None, + ): + """Forward function for the full LMHead model.""" + assert last_token_ids is not None, "Expecting last token ids to be not None" + hidden_states = super().forward( + input_ids, + position_ids, + past_key_value, + sequence_length, + host_past_key_value_lengths, + use_cache, + attention_mask, + cache_indirection, + kv_cache_block_pointers, + prompt_embedding_table, + prompt_tasks, + prompt_vocab_size, + inflight_batching_args, + context_lengths, + host_context_lengths, + host_request_types, + max_context_length, + ) + + if use_cache: + hidden_states, presents = hidden_states + + hidden_states = gather_last_token_logits( + hidden_states, last_token_ids, default_net().plugin_config.remove_input_padding + ) + + # [batch_size, hidden_size] -> [batch_size, vocab_size] + lm_logits = self.lm_head(hidden_states) + lm_logits.mark_output("logits", str_dtype_to_trt("float16")) + # out_inter.mark_output('inter', str_dtype_to_trt('float32')) + + if use_cache: + for i, present in enumerate(presents): + present.mark_output(f"present_key_value_{i}", self._kv_dtype) + return (lm_logits, presents) + + return lm_logits + + def prepare_inputs( + self, + max_batch_size, + max_input_len, + max_new_tokens, + use_cache, + max_beam_width: int = 1, + paged_kv_cache: bool = False, + tokens_per_block: int = 64, + prompt_embedding_table_size: int = 128, + ): + """@brief: Prepare inputs Tensors for the model. + + The given sizes are used to determine the + ranges of the dimensions of when using TRT dynamic shapes. + + @return: a list contains values which can be fed into the self.forward() + """ + # Prepare inputs + head_size = self._hidden_size // self._num_heads + num_heads_kv = (self._num_kv_heads + self._tensor_parallel - 1) // self._tensor_parallel + remove_input_padding = default_net().plugin_config.remove_input_padding + use_gpt_attention_plugin = default_net().plugin_config.gpt_attention_plugin + use_ib_gpt_attention_plugin = default_net().plugin_config.inflight_batching_gpt_attention_plugin + + model_inputs = self.prepare_basic_inputs( + max_batch_size, + max_beam_width, + max_input_len, + max_new_tokens, + num_heads_kv, + head_size, + self._num_layers, + self._kv_dtype, + remove_input_padding=remove_input_padding, + use_gpt_attention_plugin=use_gpt_attention_plugin, + use_ib_gpt_attention_plugin=use_ib_gpt_attention_plugin, + paged_kv_cache=paged_kv_cache, + tokens_per_block=tokens_per_block, + ) + + bb_range = [1, (max_batch_size * max_beam_width + 1) // 2, max_batch_size * max_beam_width] + p_embedding_range = [1, prompt_embedding_table_size // 2, prompt_embedding_table_size] + num_tokens_range = [ + 1, + max_batch_size * max_beam_width, + max(max_input_len * max_batch_size, max_beam_width * max_batch_size), + ] + bs_range = [1, (max_batch_size + 1) // 2, max_batch_size] + + prompt_embedding_table = None + tasks = None + prompt_vocab_size = None + if self._use_prompt_tuning: + prompt_embedding_table = Tensor( + name="prompt_embedding_table", + dtype=self._dtype, + shape=[-1, self._hidden_size], + dim_range=OrderedDict( + [("prompt_embedding_table_size", [p_embedding_range]), ("hidden_size", [self._hidden_size]),] + ), + ) + if remove_input_padding: + tasks = Tensor( + name="tasks", + dtype=trt.int32, + shape=[1, -1], + dim_range=OrderedDict([("batch_size_fake", [1]), ("input_len_task", [num_tokens_range]),]), + ) + else: + tasks = Tensor( + name="tasks", + dtype=trt.int32, + shape=[-1, 1], + dim_range=OrderedDict([("batch_size_beam_width", [bb_range]), ("broadcast_dim", [1]),]), + ) + prompt_vocab_size = Tensor( + name="prompt_vocab_size", dtype=trt.int32, shape=[1], dim_range=OrderedDict([("size", [1])]), + ) + + inflight_batching_args = None + if use_ib_gpt_attention_plugin: + past_key_value_pointers = [] + pointers_to_kv_cache_block_pointers = [] + for i in range(self._num_layers): + kv = Tensor( + name=f"past_key_value_pointers_{i}", + dtype=trt.int32, + # 2 INT32s for representing a single INT64 pointer + shape=[-1, 2], + dim_range=OrderedDict(batch_size_kv=[bs_range], pointer_width=[2]), + ) + past_key_value_pointers.append(kv) + + if paged_kv_cache: + # [nbReq, 2] + pkv = Tensor( + name=f"pointers_to_kv_cache_block_pointers_{i}", + dtype=trt.int32, + # 2 INT32s for representing a single INT64 pointer + shape=[-1, 2], + dim_range=OrderedDict(batch_size_cp=[bs_range], pointer_width=[2]), + ) + pointers_to_kv_cache_block_pointers.append(pkv) + + inflight_batching_args = InflightBatchingParam( + # [nbReq] + host_context_lengths=Tensor( + name="host_context_lengths", + dtype=trt.int32, + shape=[-1], + dim_range=OrderedDict(batch_size_hscl=[bs_range]), + ), + # [nbSeq] + context_lengths=Tensor( + name="context_lengths", + dtype=trt.int32, + shape=[-1], + dim_range=OrderedDict(batch_size_context_lengths=[bs_range]), + ), + # [nbReq] + host_beam_widths=Tensor( + name="beam_widths", dtype=trt.int32, shape=[-1], dim_range=OrderedDict(batch_size_bw=[bs_range]), + ), + # [nbReq, 2] + cache_indir_pointers=Tensor( + name="cache_indir_pointers", + dtype=trt.int32, + # 2 INT32s for representing a single INT64 pointer + shape=[-1, 2], + dim_range=OrderedDict(batch_size_cp=[bs_range], pointer_width=[2]), + ), + # [nbReq] + host_req_cache_max_seq_lengths=Tensor( + name="req_cache_max_seq_lengths", + dtype=trt.int32, + shape=[-1], + dim_range=OrderedDict(batch_size_rcmsl=[bs_range]), + ), + max_input_length=max_input_len, + max_beam_width=max_beam_width, + use_int8_kv_cache=self.quant_mode.has_int8_kv_cache(), + past_key_value_pointers=past_key_value_pointers, + pointers_to_kv_cache_block_pointers=( + None if not paged_kv_cache else pointers_to_kv_cache_block_pointers + ), + ) + + return ( + model_inputs["input_ids"], + model_inputs["position_ids"], + model_inputs["past_key_value"], + model_inputs["sequence_length"], + model_inputs["host_past_key_value_lengths"], + True, + model_inputs["last_token_ids"], + model_inputs["attention_mask"], + model_inputs["cache_indirection"], + model_inputs["kv_cache_block_pointers_list"], + prompt_embedding_table, + tasks, + prompt_vocab_size, + inflight_batching_args, + model_inputs["context_lengths"], + model_inputs["host_context_lengths"], + model_inputs["host_request_types"], + max_input_len, + ) + + def build( + self, + output_dir: Path, + timing_cache: str = "", + log_level: str = "info", + max_batch_size: int = 1, + max_input_len: int = 200, + max_output_len: int = 200, + max_beam_width: int = 1, + parallel_build: bool = False, + max_prompt_embedding_table_size: int = 0, + ): + """Builds the model and generate the tensorrt_llm engine. + + Args: + timing_cache: the name of the tensorrt timing cache file inside the output_dir. + log_level: the logging level. + max_batch_size: the max batch size of the deployed model engine. + max_input_len: the max length of the input tokens. + max_output_len: the max length of the output tokens. + max_beam_width: the max beam search width. + output_dir: the output directory where we save the generated tensorrt_llm engine file. + """ + # Uncomment the following to print the network for debugging purpose. + # self.print() + + if self.rank < torch.cuda.device_count(): + print(f"warning: Rank {self.rank} larger than GPUs available") + if self._tensor_parallel < torch.cuda.device_count(): + print(f"warning: Not enough GPUs locally, requesting {self._tensor_parallel}") + + build( + tensorrt_llm_model=self, + output_dir=output_dir, + rank=self.rank, + world_size=self._tensor_parallel, + dtype=trt_dtype_to_str(self._dtype), + timing_cache=timing_cache, + log_level=log_level, + max_batch_size=max_batch_size, + max_input_len=max_input_len, + max_output_len=max_output_len, + max_beam_width=max_beam_width, + max_prompt_embedding_table_size=max_prompt_embedding_table_size, + parallel_build=parallel_build, + gpus_per_node=torch.cuda.device_count(), + quantization=self.quantization, + ) + + def print(self): + """Debugging print of the tensorrt_llm network.""" + np.set_printoptions(threshold=36) + print_tensorrt_llm(f"rank.{self.rank}", self) diff --git a/nemo/export/trt_llm/tensorrt_llm_run.py b/nemo/export/trt_llm/tensorrt_llm_run.py new file mode 100644 index 000000000000..c2a8626fdf13 --- /dev/null +++ b/nemo/export/trt_llm/tensorrt_llm_run.py @@ -0,0 +1,317 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Defines the tensorrt_llm inference API that can support both single and multiple GPU LLM inferences. + +Referrence impl in tensorrt_llm: examples/llama/summarize.py. +""" + +import json +import os +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional + +import tensorrt_llm +import torch +from mpi4py.futures import MPIPoolExecutor +from tensorrt_llm.runtime import ModelConfig, SamplingConfig +from transformers import PreTrainedTokenizer + +from .tensorrt_llm_build import get_engine_name, MODEL_NAME # isort:skip + + +@dataclass +class TensorrtLLMHostContext: + """The host side context for TRT LLM inference.""" + + executor: MPIPoolExecutor = None + tensor_parallel: int = 1 + tokenizer: PreTrainedTokenizer = None + max_batch_size: int = 0 + max_input_len: int = 0 + + +@dataclass +class TensorrtLLMWorkerContext: + """The MPI worker side context for TRT LLM inference.""" + + decoder: tensorrt_llm.runtime.GenerationSession = None + sampling_config: SamplingConfig = None + max_batch_size: int = 0 + max_input_len: int = 0 + + +# This is a global context that will be initialized during the model loading process as MPI worker. +tensorrt_llm_worker_context = TensorrtLLMWorkerContext() + + +def _read_config(config_path: Path): + with open(config_path, "r") as f: + config = json.load(f) + use_gpt_attention_plugin = config["plugin_config"]["gpt_attention_plugin"] + ib_gpt_attention_plugin = config["plugin_config"]["inflight_batching_gpt_attention_plugin"] + remove_input_padding = config["plugin_config"]["remove_input_padding"] + world_size = config["builder_config"]["tensor_parallel"] + assert ( + world_size == tensorrt_llm.mpi_world_size() + ), f"Engine world size ({world_size}) != Runtime world size ({tensorrt_llm.mpi_world_size()})" + + assert world_size <= torch.cuda.device_count(), f"Not enough GPUs, requesting {world_size}" + + num_heads = config["builder_config"]["num_heads"] + num_kv_heads = config["builder_config"].get("num_kv_heads", num_heads) + hidden_size = config["builder_config"]["hidden_size"] // world_size + vocab_size = config["builder_config"]["vocab_size"] + num_layers = config["builder_config"]["num_layers"] + paged_kv_cache = config["plugin_config"]["paged_kv_cache"] + tokens_per_block = config["builder_config"]["tokens_per_block"] + use_prompt_tuning = config["builder_config"]["use_prompt_tuning"] + + num_heads = num_heads // world_size + num_kv_heads = (num_kv_heads + world_size - 1) // world_size + + model_config = ModelConfig( + num_heads=num_heads, + num_kv_heads=num_kv_heads, + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + gpt_attention_plugin=use_gpt_attention_plugin, + ib_gpt_attention_plugin=ib_gpt_attention_plugin, + remove_input_padding=remove_input_padding, + paged_kv_cache=paged_kv_cache, + tokens_per_block=tokens_per_block, + use_prompt_tuning=use_prompt_tuning, + ) + + dtype = config["builder_config"]["precision"] + max_input_len = config["builder_config"]["max_input_len"] + max_batch_size = config["builder_config"]["max_batch_size"] + + return model_config, world_size, dtype, max_input_len, max_batch_size + + +def _load(tokenizer: PreTrainedTokenizer, engine_dir, num_beams=1): + """The impl of `load` API for on a single GPU worker.""" + try: + tensorrt_llm.logger.set_level("info") + + engine_dir = Path(engine_dir) + config_path = engine_dir / "config.json" + model_config, world_size, dtype, max_input_len, max_batch_size = _read_config(config_path) + + runtime_rank = tensorrt_llm.mpi_rank() + + assert runtime_rank < torch.cuda.device_count(), f"Rank {runtime_rank} out of bound" + + runtime_mapping = tensorrt_llm.Mapping(world_size, runtime_rank) + torch.cuda.set_device(runtime_rank % runtime_mapping.gpus_per_node) + + engine_name = get_engine_name(MODEL_NAME, dtype, world_size, runtime_rank) + serialize_path = os.path.join(engine_dir, engine_name) + + with open(serialize_path, "rb") as f: + engine_buffer = f.read() + decoder = tensorrt_llm.runtime.GenerationSession( + model_config, engine_buffer, runtime_mapping, debug_mode=False + ) + + sampling_config = SamplingConfig( + end_id=tokenizer.eos_token_id, pad_id=tokenizer.eos_token_id, num_beams=num_beams + ) + + # Initialize the global context so it can be used during `run` API. + global tensorrt_llm_worker_context + tensorrt_llm_worker_context.decoder = decoder + tensorrt_llm_worker_context.sampling_config = sampling_config + tensorrt_llm_worker_context.max_batch_size = max_batch_size + tensorrt_llm_worker_context.max_input_len = max_input_len + + except Exception as e: + print(e) + raise e + + +def _forward( + input_tensors: List[torch.IntTensor], + max_output_len: int, + top_k: int = 1, + top_p: float = 0.0, + temperature: float = 1.0, + prompt_table=None, + task_vocab_size=None, +) -> Optional[torch.IntTensor]: + """The impl of `forward` API for on a single GPU worker with tensor as IO. + + Returns: + the output tokens tensor with shape [batch_size, num_beams, output_len]. + """ + try: + # Loading the global context initialized from the `load` API. + global tensorrt_llm_worker_context + decoder = tensorrt_llm_worker_context.decoder + sampling_config = tensorrt_llm_worker_context.sampling_config + max_batch_size = tensorrt_llm_worker_context.max_batch_size + max_input_len = tensorrt_llm_worker_context.max_input_len + + batch_size = len(input_tensors) + assert batch_size <= max_batch_size, f"batch size {batch_size} exceedng max batch size {max_batch_size}" + input_lengths = [t.shape[0] for t in input_tensors] + max_length = max(input_lengths) + assert max_length <= max_input_len, f"input length {max_length} exceedng max input length {max_input_len}" + pad_id = sampling_config.pad_id + + if decoder.remove_input_padding: + line_encoded = [torch.tensor(t, dtype=torch.int32).cuda() for t in input_tensors] + else: + line_encoded = torch.nested.to_padded_tensor( + torch.nested.nested_tensor(input_tensors, dtype=torch.int32), pad_id + ).cuda() + input_lengths = torch.tensor(input_lengths, dtype=torch.int32).cuda() + + if prompt_table is None: + ptuning_args = [] + else: + if task_vocab_size is None: + raise Exception("task_vocab_size cannot be None") + + task_vocab_size = torch.tensor([task_vocab_size], dtype=torch.int32, device="cuda") + + if isinstance(line_encoded, list): + le_size = len(line_encoded) + else: + le_size = line_encoded.size(0) + tasks = torch.zeros(le_size, 1).cuda() + + ptuning_args = [prompt_table, tasks, task_vocab_size] + + with torch.no_grad(): + sampling_config.top_k = top_k + sampling_config.top_p = top_p + sampling_config.temperature = temperature + + decoder.setup(batch_size, max_context_length=max_length, max_new_tokens=max_output_len) + + if decoder.remove_input_padding: + output_ids = decoder.decode_batch(line_encoded, sampling_config) + else: + output_ids = decoder.decode(line_encoded, input_lengths, sampling_config, *ptuning_args,) + + torch.cuda.synchronize() + + runtime_rank = tensorrt_llm.mpi_rank() + if runtime_rank == 0: + return output_ids + else: + return None + + except Exception as e: + print(e) + raise e + + +def load(tokenizer: PreTrainedTokenizer, engine_dir: str, num_beams: int = 1) -> TensorrtLLMHostContext: + """Loaded the compiled LLM model and run it. + + It also supports running the TRT LLM model on multi-GPU. + """ + config_path = os.path.join(engine_dir, "config.json") + with open(config_path, "r") as f: + config = json.load(f) + tensor_parallel = config["builder_config"]["tensor_parallel"] + if tensor_parallel == 1: + _load(tokenizer, engine_dir, num_beams) + executor = None + else: + executor = MPIPoolExecutor(max_workers=tensor_parallel) + futures = [] + for _ in range(tensor_parallel): + future = executor.submit(_load, tokenizer, engine_dir, num_beams) + futures.append(future) + for future in futures: + future.result() + + max_batch_size = config["builder_config"]["max_batch_size"] + max_input_len = config["builder_config"]["max_input_len"] + + return TensorrtLLMHostContext( + executor=executor, + tensor_parallel=tensor_parallel, + tokenizer=tokenizer, + max_batch_size=max_batch_size, + max_input_len=max_input_len, + ) + + +def forward( + input_tensors: List[torch.IntTensor], + max_output_len: int, + host_context: TensorrtLLMHostContext, + top_k: int = 1, + top_p: float = 0.0, + temperature: float = 1.0, + prompt_table=None, + task_vocab_size=None, +) -> Optional[torch.IntTensor]: + """Run the loaded model with the host_context provided from the `load` API.""" + batch_size = len(input_tensors) + max_batch_size = host_context.max_batch_size + assert batch_size <= max_batch_size, f"batch size {batch_size} exceedng max batch size {max_batch_size}" + max_length = max([t.shape[0] for t in input_tensors]) + max_input_len = host_context.max_input_len + assert max_length <= max_input_len, f"input length {max_length} exceedng max input length {max_input_len}" + + tensor_parallel = host_context.tensor_parallel + if tensor_parallel == 1: + return _forward(input_tensors, max_output_len, top_k, top_p, temperature, prompt_table, task_vocab_size) + else: + executor = host_context.executor + futures = [] + for _ in range(tensor_parallel): + future = executor.submit( + _forward, input_tensors, max_output_len, top_k, top_p, temperature, prompt_table, task_vocab_size + ) + futures.append(future) + for future in futures: + result = future.result() + if result is not None: + return result + + raise RuntimeError("Internal error") + + +def generate( + input_texts: List[torch.IntTensor], + max_output_len: int, + host_context: TensorrtLLMHostContext, + top_k: int = 1, + top_p: float = 0.0, + temperature: float = 1.0, + prompt_table=None, + task_vocab_size=None, +) -> Optional[List[List[str]]]: + """Generate the output sequence from the input sequence. + + Returns a 2D string list with shape [batch_size, num_beams]. + """ + tokenizer = host_context.tokenizer + input_tensors = [torch.IntTensor(tokenizer.encode(t, add_special_tokens=False)) for t in input_texts] + output_tensor = forward( + input_tensors, max_output_len, host_context, top_k, top_p, temperature, prompt_table, task_vocab_size + ) + assert output_tensor is not None + + input_lengths = [t.shape[0] for t in input_tensors] + output_lines_list = [ + tokenizer.batch_decode(output_tensor[b, :, input_lengths[b] :], skip_special_tokens=True) + for b in range(output_tensor.shape[0]) + ] + return output_lines_list diff --git a/nemo/export/trt_llm/tensorrt_llm_utils.py b/nemo/export/trt_llm/tensorrt_llm_utils.py new file mode 100644 index 000000000000..3827e38a8db0 --- /dev/null +++ b/nemo/export/trt_llm/tensorrt_llm_utils.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +"""Utils to convert model_config layers to tensorrt_llm modules.""" + +import tensorrt as trt +from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm +from tensorrt_llm.module import Module + +from .model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig +from .tensor_utils import get_tensor_parallel_group + + +def build_embedding_from_config( + config: EmbeddingConfig, dtype: trt.DataType, tensor_parallel: int = 1, use_prompt_tuning: bool = False +): + """Returns the tensorrt_llm embedding layer from the embedding config.""" + # If the config is empty, return an empty impl. + if config is None: + return None + EmbeddingCls = PromptTuningEmbedding if use_prompt_tuning else Embedding + trt_embedding = EmbeddingCls( + config.weight.shape[0], + config.weight.shape[1], + dtype=dtype, + tp_size=tensor_parallel, + tp_group=get_tensor_parallel_group(tensor_parallel), + ) + trt_embedding.weight.value = config.weight + return trt_embedding + + +def build_layernorm_from_config(config: LayernormConfig, dtype: trt.DataType): + """Returns the tensorrt_llm layernorm layer from the torch layernorm.""" + # If the config is empty, return an empty impl. + if config is None: + return None + + if config.layernorm_type == LAYERNORM_DEFAULT: + trt_layernorm = LayerNorm(normalized_shape=config.weight.shape[0], dtype=dtype) + trt_layernorm.weight.value = config.weight + trt_layernorm.bias.value = config.bias + elif config.layernorm_type == LAYERNORM_RMS: + trt_layernorm = RmsNorm(normalized_shape=config.weight.shape[0], dtype=dtype) + trt_layernorm.weight.value = config.weight + else: + raise NotImplementedError(f"{config.layernorm_type} not supported") + return trt_layernorm + + +def print_tensorrt_llm(name: str, tensorrt_llm_module: Module): + """Prints the tensorrt llm structure including weights and related data for debugging purpose.""" + for tensor_name in [ + "weight", + "bias", + "activation_scaling_factor", + "weights_scaling_factor", + "prequant_scaling_factor", + ]: + if hasattr(tensorrt_llm_module, tensor_name): + tensor = getattr(tensorrt_llm_module, tensor_name) + if tensor is not None: + print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}") + + for k, v in tensorrt_llm_module.named_children(): + print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v) diff --git a/nemo/export/utils.py b/nemo/export/utils.py new file mode 100644 index 000000000000..8a9691afe1d7 --- /dev/null +++ b/nemo/export/utils.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import logging +import os +import pathlib +import tarfile +import tempfile +import typing +from pathlib import Path + +import numpy as np +import torch +import yaml + +log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" +logging.basicConfig(format=log_format) +LOGGER = logging.getLogger(__name__) + +# numpy doesn't know bfloat16, define abstract binary type instead +np_bfloat16 = np.dtype('V2', metadata={"dtype": "bfloat16"}) + + +def unpack_nemo_ckpt( + nemo_archive_path: typing.Union[str, pathlib.Path], out_dir_path: typing.Union[str, pathlib.Path], +): + nemo_archive_path = pathlib.Path(nemo_archive_path) + if not nemo_archive_path.exists(): + raise FileNotFoundError(f"{nemo_archive_path} does not exist") + + for tar_mode in ["r:", "r:gz"]: + try: + with tarfile.open(nemo_archive_path, mode=tar_mode) as tar_file: + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + safe_extract(tar_file, path=out_dir_path) + return out_dir_path + except tarfile.ReadError: + pass + + raise RuntimeError(f"Could not unpack {nemo_archive_path}") + + +def prompt_convert(prompt_config, prompt_weights): + if "task_templates" in prompt_config: + prompt_templates = prompt_config["task_templates"] + actual_task_id = 0 + vtokens_embeddings = [] + vtokens_len = [] + for task_name_id, prompt_task in enumerate(prompt_templates): + prompt_task_name = prompt_task["taskname"] + LOGGER.info(f"Task {actual_task_id}: {prompt_task['taskname']}") + prompt_task_weights = prompt_weights["prompt_table"].get( + f"prompt_table.{prompt_task_name}.prompt_embeddings.weight" + ) + if prompt_task_weights is None: + continue + vtokens_embeddings.append(prompt_task_weights) + vtokens_len.append(prompt_task_weights.shape[0]) + actual_task_id += 1 + + max_vtoken_len = max(vtokens_len) + embedding_dim = vtokens_embeddings[0].shape[1] + + # pad tasks to longest task embedding table + for i, vtoken_emb_table in enumerate(vtokens_embeddings): + padded_table = torch.zeros((max_vtoken_len, embedding_dim)) + padded_table[: vtoken_emb_table.shape[0], :] = vtoken_emb_table + vtokens_embeddings[i] = padded_table + + vtokens_embeddings = torch.stack(vtokens_embeddings) + else: + vtokens_embeddings = prompt_weights["prompt_embeddings_weights"] + + return vtokens_embeddings + + +def torch_to_numpy(x): + if x.dtype != torch.bfloat16: + return x.numpy() + return x.view(torch.int16).numpy().view(np_bfloat16) + + +def cpu_map_location(storage, loc): + return storage.cpu() + + +def is_nemo_file(path): + flag = False + + if path is not None: + if len(path) > 5: + pc = Path(path) + if pc.exists(): + if pc.is_file(): + if path[-5 : len(path)] == ".nemo": + flag = True + + return flag + + +def get_prompt_embedding_table(prompt_checkpoint_path): + + with tempfile.TemporaryDirectory() as prompt_out_dir: + prompt_out_dir = Path(prompt_out_dir) + unpack_nemo_ckpt(prompt_checkpoint_path, prompt_out_dir) + + model_weights_ckpt = "model_weights.ckpt" + with open(prompt_out_dir / "model_config.yaml") as f: + prompt_config = yaml.full_load(f) + LOGGER.debug(prompt_config) + + weight_path = prompt_out_dir / model_weights_ckpt + if not weight_path.exists(): + weight_path = prompt_out_dir / "mp_rank_00" / model_weights_ckpt + + prompt_weights = torch.load(weight_path, map_location=cpu_map_location,) + + return prompt_convert(prompt_config, prompt_weights) + + +def torch_to_numpy(x): + if x.dtype != torch.bfloat16: + return x.numpy() + return x.view(torch.int16).numpy().view(np_bfloat16) From ed895d067c95c52ec3b03ab9d01e77f7f701bdb6 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Fri, 13 Oct 2023 09:57:12 -0700 Subject: [PATCH 305/512] Add Imagen Synthetic Dataloader --- .../generative/imagen/conf/base64-500m.yaml | 2 + examples/multimodal/mllm/neva/neva_export.py | 3 +- .../multimodal/data/imagen/imagen_dataset.py | 45 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml index b3ca47ec1c0b..4541110caf98 100644 --- a/examples/multimodal/generative/imagen/conf/base64-500m.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -112,6 +112,8 @@ model: data: num_workers: 16 + synthetic_data: False + synthetic_data_length: 800000 train: dataset_path: - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py index b1d3031bfd4b..b5da058e096a 100644 --- a/examples/multimodal/mllm/neva/neva_export.py +++ b/examples/multimodal/mllm/neva/neva_export.py @@ -149,7 +149,8 @@ def output_names(self): tf32=precision in [32, '32', '32-true'], fp16=precision in [16, '16', '16-mixed'], bf16=precision in ['bf16', 'bf16-mixed'], - profiles=[p],), + profiles=[p], + ), ) save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) diff --git a/nemo/collections/multimodal/data/imagen/imagen_dataset.py b/nemo/collections/multimodal/data/imagen/imagen_dataset.py index 2a0365bfc311..c3db3b3a4612 100644 --- a/nemo/collections/multimodal/data/imagen/imagen_dataset.py +++ b/nemo/collections/multimodal/data/imagen/imagen_dataset.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import torch + from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon from nemo.collections.multimodal.data.imagen.augmentations.augmentations import ( PickleTransform, @@ -21,9 +23,42 @@ construct_image_augmentations, identical_transform, ) +from nemo.core.classes import Dataset as NeMoDataset from nemo.utils import logging +class ImagenSyntheticDataset(NeMoDataset): + def __init__( + self, res, conditioning_cfg, fake_len=100000, no_embedding=False, + ): + super().__init__() + self.fake_len = fake_len + self.res = res + self.no_embedding = no_embedding + if not no_embedding: + self.out_key = conditioning_cfg.out_key if conditioning_cfg.out_key else conditioning_cfg.precached_key + self.token_length = conditioning_cfg.token_length + self.embed_dim = conditioning_cfg.embed_dim + + def __getitem__(self, index): + item = {} + if isinstance(self.res, list): + for resolution in self.res: + image_key = f'images_{resolution}' + item[image_key] = torch.randn(3, resolution, resolution) + else: + item['images'] = torch.randn(3, self.res, self.res) + + item['raw_text'] = f'fake text {index}' + if not self.no_embedding: + item[f'{self.out_key}_embeddings'] = torch.randn(self.token_length, self.embed_dim) + item[f'{self.out_key}_mask'] = torch.ones(self.token_length, dtype=torch.long) + return item + + def __len__(self): + return self.fake_len + + def _build_functions_with_pickles(data_cfg, condition_cfg): def tuple_to_dict(inp): for input in inp: @@ -72,6 +107,16 @@ def build_train_valid_datasets( ): data_cfg = model_cfg.data condition_cfg = model_cfg.conditioning + + if data_cfg.get('synthetic_data', False): + logging.info(f'Creating Synthetic Datasaet.') + train_data = ImagenSyntheticDataset( + res=data_cfg.train.get('target_resolutions', 64), + conditioning_cfg=condition_cfg, + fake_len=data_cfg.get('synthetic_data_length', 10000), + no_embedding=condition_cfg.get("online_encoding", False), + ) + return train_data, None # This function maps data that are tuples to dictionary. if condition_cfg.get("online_encoding", False): tuple_to_dict, transform_fn = _build_functions_no_pickles(data_cfg) From fd7c1d3c5d706b2b678d69489d65abb78cc8b609 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Fri, 13 Oct 2023 10:29:57 -0700 Subject: [PATCH 306/512] Add VITWrapper and export stuff to wrapper --- .../megatron_vit_classification_export.py | 44 +++++++++++++++++++ .../megatron_vit_classification_models.py | 29 ------------ 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/examples/vision/vision_transformer/megatron_vit_classification_export.py b/examples/vision/vision_transformer/megatron_vit_classification_export.py index 06f6447bd788..6156cc1d89cb 100644 --- a/examples/vision/vision_transformer/megatron_vit_classification_export.py +++ b/examples/vision/vision_transformer/megatron_vit_classification_export.py @@ -14,6 +14,7 @@ import glob import os +from typing import Dict, List, Optional import torch from omegaconf.omegaconf import OmegaConf, open_dict @@ -26,12 +27,53 @@ from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.classes.exportable import Exportable from nemo.core.config import hydra_runner +from nemo.core.neural_types import ChannelType, NeuralType from nemo.utils import logging from nemo.utils.get_rank import is_global_rank_zero from nemo.utils.trt_utils import build_engine +class VITWrapper(torch.nn.Module, Exportable): + def __init__(self, model): + super(VITWrapper, self).__init__() + self.model = model + + def forward(self, tokens): + output_tensor = self.model(tokens) + return output_tensor + + # For onnx export + def input_example(self, max_batch=8, max_dim=384): + """ + Generates input examples for tracing etc. + Returns: + A tuple of input examples. + """ + sample = next(self.parameters()) + tokens = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) + return (tokens,) + + @property + def input_types(self) -> Optional[Dict[str, NeuralType]]: + return { + "tokens": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + } + + @property + def output_types(self) -> Optional[Dict[str, NeuralType]]: + return {"logits": NeuralType(('B', 'D'), ChannelType())} + + @property + def input_names(self) -> List[str]: + return ['tokens'] + + @property + def output_names(self) -> List[str]: + return ['logits'] + + @hydra_runner(config_path="conf", config_name="megatron_vit_classification_export") def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") @@ -99,6 +141,8 @@ def dummy(): os.makedirs(f"{output_dir}/onnx/", exist_ok=True) os.makedirs(f"{output_dir}/plan/", exist_ok=True) + model = VITWrapper(model) + model.export(f"{output_dir}/onnx/vit.onnx", dynamic_axes={'tokens': {0: 'B'}}) input_profile = {} diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py index a1e4a52d8f20..854e1d6b5a83 100644 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -799,32 +799,3 @@ def parameters(self): return itertools.chain.from_iterable(module.parameters() for module in self.model) else: return self.model.parameters() - - # For onnx export - def input_example(self, max_batch=8, max_dim=384): - """ - Generates input examples for tracing etc. - Returns: - A tuple of input examples. - """ - sample = next(self.parameters()) - tokens = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'D'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] From dd67c953112bca4ddd35bfb5d178ec6e7ae3f769 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Fri, 13 Oct 2023 14:11:33 -0700 Subject: [PATCH 307/512] Update neva with megatron-core support --- .../mllm/neva/conf/neva_config.yaml | 4 +- .../multimodal/models/neva/neva_model.py | 404 ++++++++++++------ .../language_modeling/megatron_gpt_model.py | 33 +- .../megatron/adapters/parallel_adapters.py | 2 +- .../parts/mixins/multimodal_adapter_mixins.py | 110 +++++ 5 files changed, 410 insertions(+), 143 deletions(-) create mode 100644 nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml index 4535dba9e2ea..c2f44de2c1b9 100644 --- a/examples/multimodal/mllm/neva/conf/neva_config.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -79,12 +79,12 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True + use_im_start_end: False # LLM configs # use GPTModel from megatron.core - mcore_gpt: False + mcore_gpt: True # model architecture encoder_seq_length: 4096 diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index a8baed37338c..b5be817b05bd 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import itertools import math import os import random import re import tempfile +from itertools import chain from functools import partial from typing import Any, List, Optional, Union @@ -82,6 +82,7 @@ ) from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone from nemo.core import adapter_mixins from nemo.core.classes.common import PretrainedModelInfo @@ -99,8 +100,9 @@ HAVE_APEX = False try: - from megatron.core import parallel_state + from megatron.core import dist_checkpointing, parallel_state from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + from megatron.core.models.gpt import GPTModel as MCoreGPTModel HAVE_MEGATRON_CORE = True @@ -266,6 +268,158 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): return updated_input_embeds +class MCoreNevaModel(MCoreGPTModel): + def __init__( + self, mm_cfg, media_start_id, media_end_id, **kwargs, + ): + super(MCoreNevaModel, self).__init__(**kwargs,) + + self.mm_cfg = mm_cfg + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.dist_ckpt = False + + if mm_cfg.llm.from_pretrained is not None: + logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") + self.load_llm_weights(mm_cfg.llm.from_pretrained) + + if mm_cfg.llm.freeze: + for param in chain( + self.embedding.parameters(), + self.decoder.parameters(), + self.output_layer.parameters(), + ): + param.requires_grad = False + self.embedding = self.embedding.eval() + self.decoder = self.decoder.eval() + self.output_layer = self.output_layer.eval() + + # Initialize vision encoder and freeze it + if mm_cfg.vision_encoder.from_hf: + vision_encoder = CLIPVisionModel.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, + ).cuda() + vision_encoder = vision_encoder.to(torch.bfloat16) + if mm_cfg.vision_encoder.freeze: + for param in vision_encoder.parameters(): + param.requires_grad = False + vision_encoder = vision_encoder.eval() + else: + vision_cfg = MegatronCLIPModel.restore_from( + mm_cfg.vision_encoder.from_pretrained, return_config=True + ).vision + vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) + self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) + if mm_cfg.vision_encoder.freeze: + vision_encoder.freeze() + + model_type = self.mm_cfg.llm.get("model_type", "nvgpt") + # Monkey patch embedding + if kwargs.get("pre_process", True): + extend_instance(self.embedding.word_embeddings, NevaWordEmbeddingMixin) + self.embedding.word_embeddings.init_vision( + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), + class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), + use_im_start_end=mm_cfg.get("use_im_start_end", False), + llama_tricks=(model_type == "llama_2"), + ) + + def forward( + self, *args, **kwargs, + ): + media = kwargs.pop('media', None) + self.embedding.word_embeddings.set_media(media) + return super().forward(*args, **kwargs) + + def _load_model_weights(self, nemo_path): + """ + Shared method to load model weights from a given nemo_path. + """ + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + app_state = AppState() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + if os.path.isfile(nemo_path): + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + else: + tmpdir = nemo_path + os.chdir(tmpdir) + if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: + model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( + tmpdir, save_restore_connector.model_weights_ckpt + ) + else: + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + + # distributed checkpointing + if state_dict is None: + self.dist_ckpt = True + sharded_state_dict = self.sharded_state_dict(prefix="model.") + checkpoint = dict(state_dict=sharded_state_dict) + tmp_model_weights_ckpt = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] + assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' + checkpoint = dist_checkpointing.load( + sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir, + ) + state_dict = checkpoint["state_dict"] + + finally: + os.chdir(cwd) + + return state_dict + + def load_vision_encoder_weights(self, vision_encoder, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.replace("model.vision_encoder.", "") + new_state_dict[new_k] = v + + missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def load_llm_weights(self, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + if self.dist_ckpt: + for k, v in state_dict.items(): + new_k = k + if k.startswith("model."): + new_k = k.replace("model.", "", 1) + new_state_dict[new_k] = v + self.load_state_dict(new_state_dict, strict=True) + else: + for k, v in state_dict.items(): + if k.startswith("model.language_model."): + new_k = k.replace("model.language_model.", "", 1) + module_key, param_key = new_k.split(".", 1) + if module_key not in new_state_dict: + new_state_dict[module_key] = {} + new_state_dict[module_key][param_key] = v + self.language_model.load_state_dict(new_state_dict, strict=True) + print(f"Restored LLM weights from {nemo_path}.") class NevaModel(GPTModel): def __init__( @@ -321,9 +475,9 @@ def __init__( def forward( self, *args, **kwargs, ): - media = args[-1] + media = kwargs.pop('media', None) self.language_model.embedding.word_embeddings.set_media(media) - return super().forward(*args[:-1], **kwargs) + return super().forward(*args, **kwargs) def _load_model_weights(self, nemo_path): """ @@ -392,131 +546,124 @@ def load_llm_weights(self, language_model, nemo_path): print(f"Restored LLM weights from {nemo_path}.") -class MegatronNevaModel(MegatronGPTPEFTModel): +class MegatronNevaModel(MultimodalAdapterModelMixin, MegatronGPTModel): """ Megatron Neva pretraining """ def __init__(self, cfg: DictConfig, trainer: Trainer): - if getattr(self, "peft_name_keys", None) is None: - self.peft_name_keys = [] - self.name_key_to_cfg = {} + # MegatronGPTModel.__init__(self, cfg, trainer) + super().__init__(cfg, trainer) + self.init_neva_adapter() - self.peft_name_keys += [AdapterName.MM_LINEAR_ADAPTER] + def init_neva_adapter(self): + self.base_keys = self._get_all_keys() + adapter_name = AdapterName.MM_LINEAR_ADAPTER adapter_cfg = MMLinearAdapterConfig( - in_features=cfg.mm_cfg.vision_encoder.hidden_size, out_features=cfg.hidden_size, bias=True, - ) - self.name_key_to_cfg.update( - {AdapterName.MM_LINEAR_ADAPTER: adapter_cfg,} + in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, + out_features=self.cfg.hidden_size, bias=True, ) - MegatronGPTModel.__init__(self, cfg, trainer) - - self.setup_complete = False - self.base_keys = self.get_all_keys() - self.init_peft_modules() - self.adapter_keys = self.get_all_keys() - self.base_keys - if self.megatron_amp_O2: - self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) - - def get_all_keys(self,): - # TODO (yuya): p-tuning need additional handle, check peft models. - """ - Returns all the keys in the model - """ - k = [n for n, p in self.named_parameters()] - return set(k) + for name, module in self.named_modules(): + self._check_and_add_adapter( + name, module, adapter_name, adapter_cfg, + autocast_dtype=self.autocast_dtype, + ) + self.adapter_keys = self._get_all_keys() - self.base_keys - def init_peft_modules(self): - """ - Randomly initialize the peft params and add them to the appropriate modules. - """ - assert len(self.peft_name_keys) > 0, "peft_name_keys have not been set no PEFT modules will be added" - assert len(self.name_key_to_cfg) > 0, "name_key_to_cfg has not been set no PEFT modules will be added" - logging.info(f"Before adding PEFT params:\n{self.summarize()}") - for _, module in self.named_modules(): - if isinstance(module, adapter_mixins.AdapterModuleMixin): - for peft_key in self.peft_name_keys: - peft_cfg = self.name_key_to_cfg[peft_key] - if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): - module.add_adapter( - name=peft_key, - cfg=peft_cfg, # TODO (yuya): override this line in gpt peft models due to a conf merging issue - ) - if self.megatron_amp_O2: - for adapter_name in getattr(module, 'adapter_layer', []): - module.adapter_layer[adapter_name] = module.adapter_layer[adapter_name].to(self.autocast_dtype) - logging.info(f"After adding PEFT params:\n{self.summarize()}") - return True def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) media_end_id = self.tokenizer.token_to_id(DEFAULT_IM_END_TOKEN) - model = NevaModel( - mm_cfg=self.cfg.mm_cfg, - media_start_id=media_start_id, - media_end_id=media_end_id, - config=self.model_parallel_config, - vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), - hidden_size=self.cfg.hidden_size, - max_position_embeddings=self.cfg.max_position_embeddings, - num_layers=self.cfg.num_layers, - num_attention_heads=self.cfg.num_attention_heads, - apply_query_key_layer_scaling=self.cfg.get('apply_query_key_layer_scaling', True), - kv_channels=self.cfg.get('kv_channels', None), - ffn_hidden_size=self.cfg.ffn_hidden_size, - num_tokentypes=0, - parallel_output=True, - pre_process=pre_process, - post_process=post_process, - init_method_std=self.cfg.get('init_method_std', 0.02), - use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), - fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), - megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), - hidden_dropout=self.cfg.get('hidden_dropout', 0.1), - attention_dropout=self.cfg.get('attention_dropout', 0.1), - ffn_dropout=self.cfg.get('ffn_dropout', 0.0), - precision=self.cfg.get('precision', 16), - fp32_residual_connection=self.cfg.get('fp32_residual_connection', False), - activations_checkpoint_granularity=self.cfg.get('activations_checkpoint_granularity', None), - activations_checkpoint_method=self.cfg.get('activations_checkpoint_method', None), - activations_checkpoint_num_layers=self.cfg.get('activations_checkpoint_num_layers', 1), - activations_checkpoint_layers_per_pipeline=self.cfg.get( - 'activations_checkpoint_layers_per_pipeline', None - ), - normalization=self.cfg.get('normalization', 'layernorm'), - layernorm_epsilon=self.cfg.get('layernorm_epsilon', 1e-5), - onnx_safe=self.cfg.get('onnx_safe', False), - bias=self.cfg.get('bias', True), - bias_activation_fusion=self.cfg.get('bias_activation_fusion', True), - bias_dropout_add_fusion=self.cfg.get('bias_dropout_add_fusion', True), - activation=self.cfg.get('activation', 'gelu'), - headscale=self.cfg.get('headscale', False), - transformer_block_type=self.cfg.get('transformer_block_type', 'pre_ln'), - openai_gelu=self.cfg.get('openai_gelu', False), - normalize_attention_scores=self.cfg.get('normalize_attention_scores', True), - position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), - rotary_percentage=self.cfg.get('rotary_percentage', 1.0), - share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), - attention_type=self.cfg.get('attention_type', 'multihead'), - masked_softmax_fusion=self.cfg.get('masked_softmax_fusion', True), - persist_layer_norm=self.cfg.get('persist_layer_norm', False), - transformer_engine=self.cfg.get('transformer_engine', False), - fp8=self.cfg.get('fp8', False), - fp8_e4m3=self.cfg.get('fp8_e4m3', False), - fp8_hybrid=self.cfg.get('fp8_hybrid', False), - fp8_margin=self.cfg.get('fp8_margin', 0), - fp8_interval=self.cfg.get('fp8_interval', 1), - fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1), - fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), - reduce_amax=self.cfg.get('reduce_amax', True), - use_emha=self.cfg.get('use_emha', False), - ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), - use_flash_attention=self.cfg.get('use_flash_attention', False), - megatron_legacy=self.cfg.get('megatron_legacy', False), - seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), - ) + if self.mcore_gpt: + if parallel_state.is_unitialized(): + def dummy(): + return + if self.trainer.strategy.launcher is not None: + self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) + self.trainer.strategy.setup_environment() + + model = MCoreNevaModel( + mm_cfg=self.cfg.mm_cfg, + media_start_id=media_start_id, + media_end_id=media_end_id, + config=self.transformer_config, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), + max_sequence_length=self.cfg.get('encoder_seq_length', 512), + pre_process=pre_process, + post_process=post_process, + parallel_output=True, + share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), + position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percent=self.cfg.get('rotary_percentage', 1.0), + seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), + ) + else: + model = NevaModel( + mm_cfg=self.cfg.mm_cfg, + media_start_id=media_start_id, + media_end_id=media_end_id, + config=self.model_parallel_config, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), + hidden_size=self.cfg.hidden_size, + max_position_embeddings=self.cfg.max_position_embeddings, + num_layers=self.cfg.num_layers, + num_attention_heads=self.cfg.num_attention_heads, + apply_query_key_layer_scaling=self.cfg.get('apply_query_key_layer_scaling', True), + kv_channels=self.cfg.get('kv_channels', None), + ffn_hidden_size=self.cfg.ffn_hidden_size, + num_tokentypes=0, + parallel_output=True, + pre_process=pre_process, + post_process=post_process, + init_method_std=self.cfg.get('init_method_std', 0.02), + use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), + fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), + hidden_dropout=self.cfg.get('hidden_dropout', 0.1), + attention_dropout=self.cfg.get('attention_dropout', 0.1), + ffn_dropout=self.cfg.get('ffn_dropout', 0.0), + precision=self.cfg.get('precision', 16), + fp32_residual_connection=self.cfg.get('fp32_residual_connection', False), + activations_checkpoint_granularity=self.cfg.get('activations_checkpoint_granularity', None), + activations_checkpoint_method=self.cfg.get('activations_checkpoint_method', None), + activations_checkpoint_num_layers=self.cfg.get('activations_checkpoint_num_layers', 1), + activations_checkpoint_layers_per_pipeline=self.cfg.get( + 'activations_checkpoint_layers_per_pipeline', None + ), + normalization=self.cfg.get('normalization', 'layernorm'), + layernorm_epsilon=self.cfg.get('layernorm_epsilon', 1e-5), + onnx_safe=self.cfg.get('onnx_safe', False), + bias=self.cfg.get('bias', True), + bias_activation_fusion=self.cfg.get('bias_activation_fusion', True), + bias_dropout_add_fusion=self.cfg.get('bias_dropout_add_fusion', True), + activation=self.cfg.get('activation', 'gelu'), + headscale=self.cfg.get('headscale', False), + transformer_block_type=self.cfg.get('transformer_block_type', 'pre_ln'), + openai_gelu=self.cfg.get('openai_gelu', False), + normalize_attention_scores=self.cfg.get('normalize_attention_scores', True), + position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percentage=self.cfg.get('rotary_percentage', 1.0), + share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), + attention_type=self.cfg.get('attention_type', 'multihead'), + masked_softmax_fusion=self.cfg.get('masked_softmax_fusion', True), + persist_layer_norm=self.cfg.get('persist_layer_norm', False), + transformer_engine=self.cfg.get('transformer_engine', False), + fp8=self.cfg.get('fp8', False), + fp8_e4m3=self.cfg.get('fp8_e4m3', False), + fp8_hybrid=self.cfg.get('fp8_hybrid', False), + fp8_margin=self.cfg.get('fp8_margin', 0), + fp8_interval=self.cfg.get('fp8_interval', 1), + fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1), + fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), + reduce_amax=self.cfg.get('reduce_amax', True), + use_emha=self.cfg.get('use_emha', False), + ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=self.cfg.get('use_flash_attention', False), + megatron_legacy=self.cfg.get('megatron_legacy', False), + seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), + ) logging.info( f"Neva model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" @@ -597,15 +744,19 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ # Intermediate pipeline stage doesn't need any inputs batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} - output_tensor = model( - batch['tokens'], - batch['position_ids'], - batch['attention_mask'], - None, # placehpolder for loss mask - batch['labels'], - batch.get('media'), - checkpoint_activations_all_layers=checkpoint_activations_all_layers, - ) + forward_args = { + 'input_ids': batch['tokens'], + 'position_ids': batch['position_ids'], + 'attention_mask': batch['attention_mask'], + 'labels': batch['labels'], + 'media': batch.get('media', None), + } + if not self.mcore_gpt: + if self.use_loss_mask: + forward_args['loss_mask'] = batch['loss_mask'] + forward_args['checkpoint_activations_all_layers'] = checkpoint_activations_all_layers + + output_tensor = model(**forward_args) return output_tensor, partial(loss_func, loss_mask=batch['loss_mask']) @@ -853,8 +1004,15 @@ def load_state_dict(self, state_dict, strict=False): logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') + def on_load_checkpoint(self, checkpoint) -> None: + if self.mcore_gpt: + state_dict = checkpoint["state_dict"] + self.load_state_dict(state_dict) + def sharded_state_dict(self, prefix: str = ''): return None + # sharded_state_dict = MegatronGPTModel.sharded_state_dict(self, prefix) + # return sharded_state_dict def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: inference_config = self.get_inference_config() diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index b9ed3d790f02..44cb100304c6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -15,16 +15,15 @@ import itertools import os import queue +import torch import warnings from dataclasses import fields from functools import partial -from typing import Any, Dict, Iterator, List, Optional, Union - -import torch from omegaconf import OmegaConf from omegaconf.dictconfig import DictConfig from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer +from typing import Any, Dict, Iterator, List, Optional, Union from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( MegatronPretrainingRandomSampler, @@ -122,7 +121,7 @@ def __init__(self, model): def forward(self, tokens, position_ids, attention_mask): if self.fp8_enabled and HAVE_TE: with transformer_engine.pytorch.onnx_export(self.fp8_enabled), transformer_engine.pytorch.fp8_autocast( - enabled=self.fp8_enabled, fp8_recipe=self.fp8_recipe + enabled=self.fp8_enabled, fp8_recipe=self.fp8_recipe ), torch.no_grad(), torch.inference_mode(), torch.autocast( 'cuda', dtype=self.dtype ), warnings.catch_warnings(): @@ -137,7 +136,7 @@ def forward(self, tokens, position_ids, attention_mask): ) else: with torch.no_grad(), torch.inference_mode(), torch.autocast( - 'cuda', dtype=self.dtype + 'cuda', dtype=self.dtype ), warnings.catch_warnings(): warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning, module=r'.*') assert tokens.shape == position_ids.shape @@ -489,7 +488,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): grad_sync_func = None param_sync_func = None if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2, ) grad_sync_func = self.reduce_overlap_gradients param_sync_func = self.sync_overlap_parameters @@ -624,7 +623,7 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) if self.cfg.get('pipeline_model_parallel_size', 1) > 1 and self.cfg.get( - 'share_embeddings_and_output_weights', True + 'share_embeddings_and_output_weights', True ): # when using pipeline parallelism the first and last stage must keep embeddings in sync self.allreduce_first_last_embeddings() @@ -734,8 +733,8 @@ def allreduce_first_last_embeddings(self): # This should only run for models that support pipelined model parallelism # (BERT and GPT-2). if parallel_state.get_pipeline_model_parallel_world_size() > 1 and ( - parallel_state.is_pipeline_first_stage(ignore_virtual=True) - or parallel_state.is_pipeline_last_stage(ignore_virtual=True) + parallel_state.is_pipeline_first_stage(ignore_virtual=True) + or parallel_state.is_pipeline_last_stage(ignore_virtual=True) ): module_list = self.get_gpt_module_list() if parallel_state.is_pipeline_first_stage(ignore_virtual=True): @@ -1056,7 +1055,7 @@ def build_train_valid_test_datasets(self): return self._train_ds, self._validation_ds, self._test_ds def build_pretraining_data_loader( - self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False + self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False ): """Buld dataloader given an input dataset.""" @@ -1126,7 +1125,7 @@ def setup(self, stage=None): if self.rampup_batch_size: optimizer = self.cfg.optim.get('name', None) assert ( - optimizer == 'fused_adam' + optimizer == 'fused_adam' ), f'{optimizer} optimizer is not supported yet with rampup batch size. Please, use fused_adam optimizer instead.' num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR @@ -1198,12 +1197,12 @@ def setup_test_data(self, cfg): self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples) def generate( - self, - inputs: Union[List[str], torch.Tensor, List[dict]], - length_params: LengthParam, - sampling_params: SamplingParam = None, - *, - strategy: Optional[TextGenerationStrategy] = None, + self, + inputs: Union[List[str], torch.Tensor, List[dict]], + length_params: LengthParam, + sampling_params: SamplingParam = None, + *, + strategy: Optional[TextGenerationStrategy] = None, ) -> OutputType: # check whether the DDP is initialized diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index f4caf0263dda..4f1556ef572b 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -572,7 +572,7 @@ class LoraKQVAdapterWeightTyingConfig(ParallelLinearAdapterWeightTyingConfig): class MMLinearAdapter(nn.Module, AdapterModuleUtil): - def __init__(self, in_features: int, out_features: int, bias: bool) -> None: + def __init__(self, in_features: int, out_features: int, bias: bool, **kwargs) -> None: super().__init__() self.linear = torch.nn.Linear(in_features, out_features, bias,) diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py new file mode 100644 index 000000000000..7e2112e9df90 --- /dev/null +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -0,0 +1,110 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +from typing import List, Optional, Union + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict + +from nemo.utils.model_utils import inject_model_parallel_rank +from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin +from nemo.core.classes.mixins.adapter_mixins import AdapterModuleMixin +from nemo.core.connectors.save_restore_connector import SaveRestoreConnector +from nemo.utils import logging, model_utils +from nemo.collections.nlp.parts.peft_config import ( + PEFT_CONFIG_MAP, + CanonicalAdaptersPEFTConfig, + LoraPEFTConfig, + PEFTConfig, + PtuningPEFTConfig, +) + +try: + from megatron.core import parallel_state + from nemo.collections.nlp.modules.common.megatron.adapters.mcore_mixins import swap_mcore_mixin + +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + + +class MultimodalAdapterModelMixin(NLPAdapterModelMixin): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _get_all_keys(self,): + # TODO (yuya): p-tuning need additional handle, check peft models. + """ + Returns all the keys in the model + """ + k = [n for n, p in self.named_parameters()] + return set(k) + + def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): + if not isinstance(peft_cfgs, List): + peft_cfgs = [peft_cfgs] + + self.base_keys = self._get_all_keys() + self.freeze() + logging.info(f"Before adding PEFT params:\n{self.summarize()}") + + self.use_ptuning_only = len(peft_cfgs) == 1 and isinstance(peft_cfgs[0], PtuningPEFTConfig) + + for peft_cfg in peft_cfgs: + if self.use_ptuning_only: + if not self.first_stage_of_pipeline(): + # There are no params to add if we are not in the first state of the pipeline + continue + self.virtual_tokens = peft_cfg.virtual_tokens + + self._check_and_add_peft_cfg(peft_cfg) + + logging.info(f"After adding PEFT params:\n{self.summarize()}") + self.adapter_keys = self._get_all_keys() - self.base_keys + + for cfg in peft_cfgs: + if cfg.weight_tying: + self.tie_weights(cfg) + self.use_peft = True + + def _check_and_add_adapter(self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None): + if name_key_to_mcore_mixins is not None: + for mcore_target, mcore_mixin in name_key_to_mcore_mixins[peft_name]: + if name in [ + mcore_target, + f'model.{mcore_target}', + f'model.module.{mcore_target}', + ]: # simple string match for now + swap_mcore_mixin(module, mcore_mixin) + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_name, + cfg=peft_cfg, + base_model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + ) + if autocast_dtype is not None: + module.adapter_layer[peft_name] = module.adapter_layer[peft_name].to(autocast_dtype) + elif isinstance(module, AdapterModuleMixin): + if model_utils.import_class_by_path(peft_cfg._target_) in module.get_accepted_adapter_types(): + module.add_adapter( + name=peft_name, + cfg=peft_cfg, + base_model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + ) + if autocast_dtype is not None: + module.adapter_layer[peft_name] = module.adapter_layer[peft_name].to(autocast_dtype) \ No newline at end of file From 1e4c2b25a4adc2b5ca31e3af4675f6a7d68f17b1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 20:19:26 -0700 Subject: [PATCH 308/512] Fix issues with Dockerfile (#7650) (#7652) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 843c0c27df45..7d1539ad2db3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ RUN apt-get update && \ libsndfile1 sox \ libfreetype6 \ swig \ - ffmpeg=ffmpeg_5.1.2-3ubuntu1 \ + ffmpeg \ libavdevice-dev && \ rm -rf /var/lib/apt/lists/* @@ -111,7 +111,7 @@ RUN /usr/bin/test -n "$NEMO_VERSION" && \ /bin/echo "export BASE_IMAGE=${BASE_IMAGE}" >> /root/.bashrc # Install NeMo -RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]" +RUN --mount=from=nemo-src,target=/tmp/nemo,rw cd /tmp/nemo && pip install ".[all]" # Check install RUN python -c "import nemo.collections.nlp as nemo_nlp" && \ From 798f6fc33c5f9ce4f38bfacd082f424023943721 Mon Sep 17 00:00:00 2001 From: Aleksandr Laptev Date: Fri, 6 Oct 2023 13:03:07 +0700 Subject: [PATCH 309/512] [ASR] RNN-T greedy decoding max_frames fix for alignment and confidence (#7635) * decoding and test fix Signed-off-by: Aleksandr Laptev * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Aleksandr Laptev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../parts/submodules/rnnt_greedy_decoding.py | 31 +++++++++++-------- .../asr/test_asr_rnnt_encdec_model.py | 24 +++++++++----- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index e37d282ed0de..a0aea07f7bc8 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -649,6 +649,7 @@ def _greedy_decode_blank_as_pad( # Mask buffers blank_mask = torch.full([batchsize], fill_value=0, dtype=torch.bool, device=device) + blank_mask_prev = None # Get max sequence length max_out_len = out_len.max() @@ -666,6 +667,8 @@ def _greedy_decode_blank_as_pad( # Batch: [B, T, D], but Bi may have seq len < max(seq_lens_in_batch) # Forcibly mask with "blank" tokens, for all sample where current time step T > seq_len blank_mask = time_idx >= out_len + blank_mask_prev = blank_mask.clone() + # Start inner loop while not_blank and (self.max_symbols is None or symbols_added < self.max_symbols): # Batch prediction and joint network steps @@ -694,7 +697,6 @@ def _greedy_decode_blank_as_pad( # This is accumulating blanks over all time steps T and all target steps min(max_symbols, U) k_is_blank = k == self._blank_index blank_mask.bitwise_or_(k_is_blank) - all_blanks = torch.all(blank_mask) del k_is_blank @@ -705,10 +707,9 @@ def _greedy_decode_blank_as_pad( logp_vals = logp.to('cpu') logp_ids = logp_vals.max(1)[1] for batch_idx, is_blank in enumerate(blank_mask): - # we only want to update non-blanks, unless we are at the last step in the loop where - # all elements produced blanks, otherwise there will be duplicate predictions - # saved in alignments - if time_idx < out_len[batch_idx] and (all_blanks or not is_blank): + # we only want to update non-blanks and first-time blanks, + # otherwise alignments will contain duplicate predictions + if time_idx < out_len[batch_idx] and (not blank_mask_prev[batch_idx] or not is_blank): hypotheses[batch_idx].alignments[-1].append( (logp_vals[batch_idx], logp_ids[batch_idx]) ) @@ -720,13 +721,15 @@ def _greedy_decode_blank_as_pad( # Insert probabilities into last timestep per sample confidence = self._get_confidence(logp) for batch_idx, is_blank in enumerate(blank_mask): - if time_idx < out_len[batch_idx] and (all_blanks or not is_blank): + if time_idx < out_len[batch_idx] and (not blank_mask_prev[batch_idx] or not is_blank): hypotheses[batch_idx].frame_confidence[-1].append(confidence[batch_idx]) del logp + blank_mask_prev.bitwise_or_(blank_mask) + # If all samples predict / have predicted prior blanks, exit loop early # This is equivalent to if single sample predicted k - if all_blanks: + if blank_mask.all(): not_blank = False else: # Collect batch indices where blanks occurred now/past @@ -847,6 +850,7 @@ def _greedy_decode_masked( # Mask buffers blank_mask = torch.full([batchsize], fill_value=0, dtype=torch.bool, device=device) + blank_mask_prev = None # Get max sequence length max_out_len = out_len.max() @@ -866,6 +870,7 @@ def _greedy_decode_masked( # Batch: [B, T, D], but Bi may have seq len < max(seq_lens_in_batch) # Forcibly mask with "blank" tokens, for all sample where current time step T > seq_len blank_mask = time_idx >= out_len + blank_mask_prev = blank_mask.clone() # Start inner loop while not_blank and (self.max_symbols is None or symbols_added < self.max_symbols): @@ -904,7 +909,6 @@ def _greedy_decode_masked( # This is accumulating blanks over all time steps T and all target steps min(max_symbols, U) k_is_blank = k == self._blank_index blank_mask.bitwise_or_(k_is_blank) - all_blanks = torch.all(blank_mask) # If preserving alignments, check if sequence length of sample has been reached # before adding alignment @@ -913,10 +917,9 @@ def _greedy_decode_masked( logp_vals = logp.to('cpu') logp_ids = logp_vals.max(1)[1] for batch_idx, is_blank in enumerate(blank_mask): - # we only want to update non-blanks, unless we are at the last step in the loop where - # all elements produced blanks, otherwise there will be duplicate predictions - # saved in alignments - if time_idx < out_len[batch_idx] and (all_blanks or not is_blank): + # we only want to update non-blanks and first-time blanks, + # otherwise alignments will contain duplicate predictions + if time_idx < out_len[batch_idx] and (not blank_mask_prev[batch_idx] or not is_blank): hypotheses[batch_idx].alignments[-1].append( (logp_vals[batch_idx], logp_ids[batch_idx]) ) @@ -929,10 +932,12 @@ def _greedy_decode_masked( # Insert probabilities into last timestep per sample confidence = self._get_confidence(logp) for batch_idx, is_blank in enumerate(blank_mask): - if time_idx < out_len[batch_idx] and (all_blanks or not is_blank): + if time_idx < out_len[batch_idx] and (not blank_mask_prev[batch_idx] or not is_blank): hypotheses[batch_idx].frame_confidence[-1].append(confidence[batch_idx]) del logp + blank_mask_prev.bitwise_or_(blank_mask) + # If all samples predict / have predicted prior blanks, exit loop early # This is equivalent to if single sample predicted k if blank_mask.all(): diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index 12e08006a3e4..b466d09c460d 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -45,6 +45,10 @@ def predict( add_sos: bool = False, batch_size: Optional[int] = None, ) -> Tuple[torch.Tensor, List[torch.Tensor]]: + if batch_size is None: + batch_size = 1 + if y is not None: + y = y + torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat(y.size()) if y is not None and state is not None: return (y + state) / 2, y * state elif state is not None: @@ -52,8 +56,8 @@ def predict( elif y is not None: return y, torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat(y.size()) return ( - torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, 1, 1]), - torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, 1, 1]), + torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, batch_size, 1]), + torch.tensor([0] * self.vocab_size + [1], dtype=torch.float32).repeat([1, batch_size, 1]), ) def initialize_state(self, y: torch.Tensor) -> List[torch.Tensor]: @@ -66,8 +70,11 @@ def score_hypothesis( def batch_select_state(self, batch_states: List[torch.Tensor], idx: int) -> List[List[torch.Tensor]]: if batch_states is not None: - states = batch_states[0][idx] - states = states.long() + try: + states = batch_states[0][idx] + states = states.long() + except Exception as e: + raise Exception(batch_states, idx) return [states] else: return None @@ -92,8 +99,12 @@ def joint(self, f: torch.Tensor, g: torch.Tensor) -> torch.Tensor: setup["decoder"] = DummyRNNTDecoder(vocab_size=2, blank_idx=2, blank_as_pad=True) setup["decoder_masked"] = DummyRNNTDecoder(vocab_size=2, blank_idx=2, blank_as_pad=False) setup["joint"] = DummyRNNTJoint() - setup["encoder_output"] = torch.tensor([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=torch.float32).transpose(1, 2) - setup["encoded_lengths"] = torch.tensor([3]) + # expected timesteps for max_symbols_per_step=5 are [[0, 0, 0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], + # so we have both looped and regular iteration on the second frame + setup["encoder_output"] = torch.tensor( + [[[1, 0, 0], [0, 1, 0], [0, 0, 1]], [[0, 0, 1], [2, 0, 0], [0, 0, 0]]], dtype=torch.float32 + ).transpose(1, 2) + setup["encoded_lengths"] = torch.tensor([3, 2]) return setup @@ -726,7 +737,6 @@ def test_greedy_decoding_preserve_frame_confidence(self, greedy_class): decoder, joint_net, blank_index=len(token_list), - preserve_alignments=True, preserve_frame_confidence=True, max_symbols_per_step=max_symbols_per_step, ) From d9861d16a5b1cd87c99c1a67b9736cfaa337b9b3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 23:25:16 -0700 Subject: [PATCH 310/512] [ASR] Fix type error in jasper (#7636) (#7653) Signed-off-by: Ryan Co-authored-by: Ryan Langman --- nemo/collections/asr/parts/submodules/jasper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/asr/parts/submodules/jasper.py b/nemo/collections/asr/parts/submodules/jasper.py index 6496586463e4..900d0bd55a10 100644 --- a/nemo/collections/asr/parts/submodules/jasper.py +++ b/nemo/collections/asr/parts/submodules/jasper.py @@ -375,7 +375,7 @@ def update_masked_length(self, max_len, seq_range=None, device=None): self.lens = self.lens.to(device) else: self.lens = seq_range - self.max_len = max_len + self.max_len = torch.tensor(max_len) def mask_input(self, x, lens): max_len = x.size(2) From 3e38b7997df8d9fd433a893772ea323b4f24d35d Mon Sep 17 00:00:00 2001 From: Ryan Langman Date: Fri, 6 Oct 2023 10:07:15 -0700 Subject: [PATCH 311/512] [TTS] Add STFT and SI-SDR loss to audio codec recipe (#7468) * [TTS] Add STFT and SI-SDR loss to audio codec recipe Signed-off-by: Ryan * [TTS] Fix STFT resolution Signed-off-by: Ryan * [TTS] Fix training metric logging Signed-off-by: Ryan * [TTS] Add docstring to mel and stft losses Signed-off-by: Ryan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Ryan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../conf/audio_codec/audio_codec_24000.yaml | 170 ++++++++++++++++ .../{encodec.yaml => encodec_24000.yaml} | 12 +- .../tts/losses/audio_codec_loss.py | 186 +++++++++++++++++- nemo/collections/tts/models/audio_codec.py | 141 ++++++++----- .../tts/modules/encodec_modules.py | 35 ++-- nemo/collections/tts/parts/utils/helpers.py | 2 +- .../tts/losses/test_audio_codec_loss.py | 48 ++++- .../tts/modules/test_audio_codec_modules.py | 7 +- 8 files changed, 508 insertions(+), 93 deletions(-) create mode 100644 examples/tts/conf/audio_codec/audio_codec_24000.yaml rename examples/tts/conf/audio_codec/{encodec.yaml => encodec_24000.yaml} (94%) diff --git a/examples/tts/conf/audio_codec/audio_codec_24000.yaml b/examples/tts/conf/audio_codec/audio_codec_24000.yaml new file mode 100644 index 000000000000..14e6fe236545 --- /dev/null +++ b/examples/tts/conf/audio_codec/audio_codec_24000.yaml @@ -0,0 +1,170 @@ +# This config contains the default values for training 24khz audio codec model +# If you want to train model on other dataset, you can change config values according to your dataset. +# Most dataset-specific arguments are in the head of the config file, see below. + +name: EnCodec + +max_epochs: ??? +# Adjust batch size based on GPU memory +batch_size: 16 +# When doing weighted sampling with multiple manifests, this defines how many training steps are in an epoch. +# If null, then weighted sampling is disabled. +weighted_sampling_steps_per_epoch: null + +# Dataset metadata for each manifest +# https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/data/vocoder_dataset.py#L39-L41 +train_ds_meta: ??? +val_ds_meta: ??? + +log_ds_meta: ??? +log_dir: ??? + +# Modify these values based on your sample rate +sample_rate: 24000 +train_n_samples: 24000 +down_sample_rates: [2, 4, 5, 8] +up_sample_rates: [8, 5, 4, 2] +# The number of samples per encoded audio frame. Should be the product of the down_sample_rates. +# For example 2 * 4 * 5 * 8 = 320. +samples_per_frame: 320 + +model: + + max_epochs: ${max_epochs} + steps_per_epoch: ${weighted_sampling_steps_per_epoch} + + sample_rate: ${sample_rate} + samples_per_frame: ${samples_per_frame} + + mel_loss_l1_scale: 15.0 + mel_loss_l2_scale: 0.0 + stft_loss_scale: 15.0 + time_domain_loss_scale: 0.0 + + # Probability of updating the discriminator during each training step + # For example, update the discriminator 2/3 times (2 updates for every 3 batches) + disc_updates_per_period: 2 + disc_update_period: 3 + + # All resolutions for reconstruction loss, ordered [num_fft, hop_length, window_length] + loss_resolutions: [ + [32, 8, 32], [64, 16, 64], [128, 32, 128], [256, 64, 256], [512, 128, 512], [1024, 256, 1024], [2048, 512, 2048] + ] + mel_loss_dims: [5, 10, 20, 40, 80, 160, 320] + mel_loss_log_guard: 1.0 + stft_loss_log_guard: 1.0 + + train_ds: + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} + sample_rate: ${sample_rate} + n_samples: ${train_n_samples} + min_duration: 1.01 + max_duration: null + dataset_meta: ${train_ds_meta} + + dataloader_params: + batch_size: ${batch_size} + drop_last: true + num_workers: 4 + + validation_ds: + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + sample_rate: ${sample_rate} + n_samples: null + min_duration: null + max_duration: null + trunc_duration: 10.0 # Only use the first 10 seconds of audio for computing validation loss + dataset_meta: ${val_ds_meta} + + dataloader_params: + batch_size: 8 + num_workers: 2 + + # Configures how audio samples are generated and saved during training. + # Remove this section to disable logging. + log_config: + log_dir: ${log_dir} + log_epochs: [10, 50] + epoch_frequency: 100 + log_tensorboard: false + log_wandb: false + + generators: + - _target_: nemo.collections.tts.parts.utils.callbacks.AudioCodecArtifactGenerator + log_audio: true + log_encoding: true + log_dequantized: true + + dataset: + _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset + sample_rate: ${sample_rate} + n_samples: null + min_duration: null + max_duration: null + trunc_duration: 15.0 # Only log the first 15 seconds of generated audio. + dataset_meta: ${log_ds_meta} + + dataloader_params: + batch_size: 4 + num_workers: 2 + + audio_encoder: + _target_: nemo.collections.tts.modules.encodec_modules.HifiGanEncoder + down_sample_rates: ${down_sample_rates} + + audio_decoder: + _target_: nemo.collections.tts.modules.encodec_modules.SEANetDecoder + up_sample_rates: ${up_sample_rates} + + vector_quantizer: + _target_: nemo.collections.tts.modules.encodec_modules.ResidualVectorQuantizer + num_codebooks: 8 + + discriminator: + _target_: nemo.collections.tts.modules.encodec_modules.MultiResolutionDiscriminatorSTFT + resolutions: [[128, 32, 128], [256, 64, 256], [512, 128, 512], [1024, 256, 1024], [2048, 512, 2048]] + + # The original EnCodec uses hinged loss, but squared-GAN loss is more stable + # and reduces the need to tune the loss weights or use a gradient balancer. + generator_loss: + _target_: nemo.collections.tts.losses.audio_codec_loss.GeneratorSquaredLoss + + discriminator_loss: + _target_: nemo.collections.tts.losses.audio_codec_loss.DiscriminatorSquaredLoss + + optim: + _target_: torch.optim.Adam + lr: 3e-4 + betas: [0.5, 0.9] + + sched: + name: ExponentialLR + gamma: 0.998 + +trainer: + num_nodes: 1 + devices: 1 + accelerator: gpu + strategy: ddp_find_unused_parameters_true + precision: 32 # Vector quantization only works with 32-bit precision. + max_epochs: ${max_epochs} + accumulate_grad_batches: 1 + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + log_every_n_steps: 100 + check_val_every_n_epoch: 10 + benchmark: false + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + create_wandb_logger: false + checkpoint_callback_params: + monitor: val_loss + resume_if_exists: false + resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/audio_codec/encodec.yaml b/examples/tts/conf/audio_codec/encodec_24000.yaml similarity index 94% rename from examples/tts/conf/audio_codec/encodec.yaml rename to examples/tts/conf/audio_codec/encodec_24000.yaml index a0f7a50c92dd..8caaea76294b 100644 --- a/examples/tts/conf/audio_codec/encodec.yaml +++ b/examples/tts/conf/audio_codec/encodec_24000.yaml @@ -36,17 +36,23 @@ model: sample_rate: ${sample_rate} samples_per_frame: ${samples_per_frame} - mel_loss_scale: 5.0 + mel_loss_l1_scale: 1.0 + mel_loss_l2_scale: 1.0 + stft_loss_scale: 0.0 time_domain_loss_scale: 0.1 + # Probability of updating the discriminator during each training step # For example, update the discriminator 2/3 times (2 updates for every 3 batches) disc_updates_per_period: 2 disc_update_period: 3 - # All resolutions for mel reconstruction loss, ordered [num_fft, hop_length, window_length] - mel_loss_resolutions: [ + # All resolutions for reconstruction loss, ordered [num_fft, hop_length, window_length] + loss_resolutions: [ [32, 8, 32], [64, 16, 64], [128, 32, 128], [256, 64, 256], [512, 128, 512], [1024, 256, 1024], [2048, 512, 2048] ] + mel_loss_dims: [64, 64, 64, 64, 64, 64, 64] + mel_loss_log_guard: 1E-5 + stft_loss_log_guard: 1.0 train_ds: dataset: diff --git a/nemo/collections/tts/losses/audio_codec_loss.py b/nemo/collections/tts/losses/audio_codec_loss.py index 8819282f07bd..5a36d0378371 100644 --- a/nemo/collections/tts/losses/audio_codec_loss.py +++ b/nemo/collections/tts/losses/audio_codec_loss.py @@ -19,6 +19,7 @@ from einops import rearrange from nemo.collections.asr.parts.preprocessing.features import FilterbankFeatures +from nemo.collections.tts.parts.utils.helpers import get_mask_from_lengths, mask_sequence_tensor from nemo.core.classes import Loss, typecheck from nemo.core.neural_types import ( AudioSignal, @@ -109,14 +110,25 @@ def forward(self, audio_real, audio_gen, audio_len): class MultiResolutionMelLoss(Loss): - def __init__(self, sample_rate: int, mel_dim: int, resolutions: List[List], l1_scale: float = 1.0): + """ + Multi-resolution log mel spectrogram loss. + + Args: + sample_rate: Sample rate of audio. + resolutions: List of resolutions, each being 3 integers ordered [num_fft, hop_length, window_length] + mel_dims: Dimension of mel spectrogram to compute for each resolution. Should be same length as 'resolutions'. + log_guard: Value to add to mel spectrogram to avoid taking log of 0. + """ + + def __init__(self, sample_rate: int, resolutions: List[List], mel_dims: List[int], log_guard: float = 1.0): super(MultiResolutionMelLoss, self).__init__() + assert len(resolutions) == len(mel_dims) - self.l1_loss_fn = MaskedMAELoss(loss_scale=l1_scale) + self.l1_loss_fn = MaskedMAELoss() self.l2_loss_fn = MaskedMSELoss() self.mel_features = torch.nn.ModuleList() - for n_fft, hop_len, win_len in resolutions: + for mel_dim, (n_fft, hop_len, win_len) in zip(mel_dims, resolutions): mel_feature = FilterbankFeatures( sample_rate=sample_rate, nfilt=mel_dim, @@ -126,7 +138,7 @@ def __init__(self, sample_rate: int, mel_dim: int, resolutions: List[List], l1_s pad_to=1, mag_power=1.0, log_zero_guard_type="add", - log_zero_guard_value=1.0, + log_zero_guard_value=log_guard, mel_norm=None, normalize=None, preemph=None, @@ -146,20 +158,176 @@ def input_types(self): @property def output_types(self): return { - "loss": NeuralType(elements_type=LossType()), + "l1_loss": NeuralType(elements_type=LossType()), + "l2_loss": NeuralType(elements_type=LossType()), } @typecheck() def forward(self, audio_real, audio_gen, audio_len): - loss = 0.0 + l1_loss = 0.0 + l2_loss = 0.0 for mel_feature in self.mel_features: mel_real, mel_real_len = mel_feature(x=audio_real, seq_len=audio_len) mel_gen, _ = mel_feature(x=audio_gen, seq_len=audio_len) - loss += self.l1_loss_fn(predicted=mel_gen, target=mel_real, target_len=mel_real_len) - loss += self.l2_loss_fn(predicted=mel_gen, target=mel_real, target_len=mel_real_len) + l1_loss += self.l1_loss_fn(predicted=mel_gen, target=mel_real, target_len=mel_real_len) + l2_loss += self.l2_loss_fn(predicted=mel_gen, target=mel_real, target_len=mel_real_len) + + l1_loss /= len(self.mel_features) + l2_loss /= len(self.mel_features) + + return l1_loss, l2_loss + + +class STFTLoss(Loss): + """ + Log magnitude STFT loss. + + Args: + resolution: Resolution of spectrogram, a list of 3 numbers ordered [num_fft, hop_length, window_length] + log_guard: Value to add to magnitude spectrogram to avoid taking log of 0. + sqrt_guard: Value to add to when computing absolute value of STFT to avoid NaN loss. + """ + + def __init__(self, resolution: List[int], log_guard: float = 1.0, sqrt_guard: float = 1e-5): + super(STFTLoss, self).__init__() + self.loss_fn = MaskedMAELoss() + self.n_fft, self.hop_length, self.win_length = resolution + self.register_buffer("window", torch.hann_window(self.win_length, periodic=False)) + self.log_guard = log_guard + self.sqrt_guard = sqrt_guard + + def _compute_spectrogram(self, audio, spec_len): + # [B, n_fft, T_spec] + spec = torch.stft( + audio, + n_fft=self.n_fft, + hop_length=self.hop_length, + win_length=self.win_length, + window=self.window, + return_complex=True, + ) + # [B, n_fft, T_spec, 2] + spec = torch.view_as_real(spec) + # [B, n_fft, T_spec] + spec_mag = torch.sqrt(spec.pow(2).sum(-1) + self.sqrt_guard) + spec_log = torch.log(spec_mag + self.log_guard) + spec_log = mask_sequence_tensor(spec_log, spec_len) + return spec_log + + @property + def input_types(self): + return { + "audio_real": NeuralType(('B', 'T'), AudioSignal()), + "audio_gen": NeuralType(('B', 'T'), AudioSignal()), + "audio_len": NeuralType(tuple('B'), LengthsType()), + } - loss /= len(self.mel_features) + @property + def output_types(self): + return {"loss": NeuralType(elements_type=LossType())} + + @typecheck() + def forward(self, audio_real, audio_gen, audio_len): + spec_len = (audio_len // self.hop_length) + 1 + spec_real = self._compute_spectrogram(audio=audio_real, spec_len=spec_len) + spec_gen = self._compute_spectrogram(audio=audio_gen, spec_len=spec_len) + loss = self.loss_fn(predicted=spec_gen, target=spec_real, target_len=spec_len) + return loss + + +class MultiResolutionSTFTLoss(Loss): + """ + Multi-resolution log magnitude STFT loss. + + Args: + resolutions: List of resolutions, each being 3 integers ordered [num_fft, hop_length, window_length] + log_guard: Value to add to magnitude spectrogram to avoid taking log of 0. + sqrt_guard: Value to add to when computing absolute value of STFT to avoid NaN loss. + """ + + def __init__(self, resolutions: List[List], log_guard: float = 1.0, sqrt_guard: float = 1e-5): + super(MultiResolutionSTFTLoss, self).__init__() + self.loss_fns = torch.nn.ModuleList( + [STFTLoss(resolution=resolution, log_guard=log_guard, sqrt_guard=sqrt_guard) for resolution in resolutions] + ) + + @property + def input_types(self): + return { + "audio_real": NeuralType(('B', 'T'), AudioSignal()), + "audio_gen": NeuralType(('B', 'T'), AudioSignal()), + "audio_len": NeuralType(tuple('B'), LengthsType()), + } + @property + def output_types(self): + return {"loss": NeuralType(elements_type=LossType())} + + @typecheck() + def forward(self, audio_real, audio_gen, audio_len): + loss = 0.0 + for loss_fn in self.loss_fns: + loss += loss_fn(audio_real=audio_real, audio_gen=audio_gen, audio_len=audio_len) + loss /= len(self.loss_fns) + return loss + + +class SISDRLoss(Loss): + """ + SI-SDR loss based off of torchmetrics.functional.audio.sdr.scale_invariant_signal_distortion_ratio + with added support for masking. + """ + + def __init__(self, epsilon: float = 1e-8): + super(SISDRLoss, self).__init__() + self.epsilon = epsilon + + @property + def input_types(self): + return { + "audio_real": NeuralType(('B', 'T'), AudioSignal()), + "audio_gen": NeuralType(('B', 'T'), AudioSignal()), + "audio_len": NeuralType(tuple('B'), LengthsType()), + } + + @property + def output_types(self): + return {"loss": NeuralType(elements_type=LossType())} + + @typecheck() + def forward(self, audio_real, audio_gen, audio_len): + mask = get_mask_from_lengths(x=audio_real, lengths=audio_len) + audio_len = rearrange(audio_len, 'B -> B 1') + + # Shift audio to have zero-mean + # [B, 1] + target_mean = torch.sum(audio_real, dim=-1, keepdim=True) / audio_len + pred_mean = torch.sum(audio_gen, dim=-1, keepdim=True) / audio_len + + # [B, T] + target = audio_real - target_mean + target = target * mask + pred = audio_gen - pred_mean + pred = pred * mask + + # [B, 1] + ref_pred = torch.sum(pred * target, dim=-1, keepdim=True) + ref_target = torch.sum(target ** 2, dim=-1, keepdim=True) + alpha = (ref_pred + self.epsilon) / (ref_target + self.epsilon) + + # [B, T] + target_scaled = alpha * target + distortion = target_scaled - pred + + # [B] + target_scaled_power = torch.sum(target_scaled ** 2, dim=-1) + distortion_power = torch.sum(distortion ** 2, dim=-1) + + ratio = (target_scaled_power + self.epsilon) / (distortion_power + self.epsilon) + si_sdr = 10 * torch.log10(ratio) + + # [1] + loss = -torch.mean(si_sdr) return loss diff --git a/nemo/collections/tts/models/audio_codec.py b/nemo/collections/tts/models/audio_codec.py index 63140b77f2b5..9b6675db5979 100644 --- a/nemo/collections/tts/models/audio_codec.py +++ b/nemo/collections/tts/models/audio_codec.py @@ -25,7 +25,9 @@ from nemo.collections.tts.losses.audio_codec_loss import ( MultiResolutionMelLoss, + MultiResolutionSTFTLoss, RelativeFeatureMatchingLoss, + SISDRLoss, TimeDomainLoss, ) from nemo.collections.tts.modules.common import GaussianDropout @@ -85,26 +87,43 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): # Discriminator setup self.discriminator = instantiate(cfg.discriminator) - # Loss setup - mel_loss_dim = cfg.get("mel_loss_dim", 64) - mel_loss_resolutions = cfg.mel_loss_resolutions - self.time_domain_loss_scale = cfg.get("time_domain_loss_scale", 1.0) - self.mel_loss_scale = cfg.get("mel_loss_scale", 1.0) - mel_loss_l1_scale = cfg.get("mel_loss_l1_scale", 1.0) - self.gen_loss_scale = cfg.get("gen_loss_scale", 1.0) - self.feature_loss_scale = cfg.get("feature_loss_scale", 1.0) - - self.time_domain_loss_fn = TimeDomainLoss() + # Mel loss setup + loss_resolutions = cfg.loss_resolutions + mel_loss_dims = cfg.get("mel_loss_dims") + mel_loss_log_guard = cfg.get("mel_loss_log_guard", 1.0) + self.mel_loss_l1_scale = cfg.get("mel_loss_l1_scale", 1.0) + self.mel_loss_l2_scale = cfg.get("mel_loss_l2_scale", 1.0) self.mel_loss_fn = MultiResolutionMelLoss( sample_rate=self.sample_rate, - mel_dim=mel_loss_dim, - resolutions=mel_loss_resolutions, - l1_scale=mel_loss_l1_scale, + mel_dims=mel_loss_dims, + resolutions=loss_resolutions, + log_guard=mel_loss_log_guard, ) + + # STFT loss setup + stft_loss_log_guard = cfg.get("stft_loss_log_guard", 1.0) + self.stft_loss_scale = cfg.get("stft_loss_scale", 0.0) + self.stft_loss_fn = MultiResolutionSTFTLoss(resolutions=loss_resolutions, log_guard=stft_loss_log_guard,) + + # Time domain loss setup + self.time_domain_loss_scale = cfg.get("time_domain_loss_scale", 1.0) + self.si_sdr_loss_scale = cfg.get("si_sdr_loss_scale", 0.0) + self.time_domain_loss_fn = TimeDomainLoss() + self.si_sdr_loss_fn = SISDRLoss() + + # Discriminator loss setup + self.gen_loss_scale = cfg.get("gen_loss_scale", 1.0) + self.feature_loss_scale = cfg.get("feature_loss_scale", 1.0) self.gen_loss_fn = instantiate(cfg.generator_loss) self.disc_loss_fn = instantiate(cfg.discriminator_loss) self.feature_loss_fn = RelativeFeatureMatchingLoss() + # Codebook loss setup + if self.vector_quantizer: + self.commit_loss_scale = cfg.get("commit_loss_scale", 1.0) + else: + self.commit_loss_scale = 0.0 + # Log setup self.log_config = cfg.get("log_config", None) @@ -336,10 +355,10 @@ def _process_batch(self, batch): if self.vector_quantizer: encoded, _, commit_loss = self.vector_quantizer(inputs=encoded, input_len=encoded_len) else: - commit_loss = None + commit_loss = 0.0 # [B, T] - audio_gen, audio_gen_len = self.audio_decoder(inputs=encoded, input_len=encoded_len) + audio_gen, _ = self.audio_decoder(inputs=encoded, input_len=encoded_len) return audio, audio_len, audio_gen, commit_loss @@ -361,37 +380,65 @@ def training_step(self, batch, batch_idx): audio, audio_len, audio_gen, commit_loss = self._process_batch(batch) + metrics = { + "global_step": self.global_step, + "lr": optim_gen.param_groups[0]['lr'], + } + if self.should_update_disc(batch_idx): # Train discriminator disc_scores_real, disc_scores_gen, _, _ = self.discriminator( audio_real=audio, audio_gen=audio_gen.detach() ) loss_disc = self.disc_loss_fn(disc_scores_real=disc_scores_real, disc_scores_gen=disc_scores_gen) - train_disc_loss = loss_disc + metrics["d_loss"] = loss_disc optim_disc.zero_grad() - self.manual_backward(train_disc_loss) + self.manual_backward(loss_disc) optim_disc.step() - else: - loss_disc = None - loss_time_domain = self.time_domain_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) - train_loss_time_domain = self.time_domain_loss_scale * loss_time_domain + generator_losses = [] + + loss_mel_l1, loss_mel_l2 = self.mel_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + if self.mel_loss_l1_scale: + metrics["g_loss_mel_l1"] = loss_mel_l1 + generator_losses.append(self.mel_loss_l1_scale * loss_mel_l1) + if self.mel_loss_l2_scale: + metrics["g_loss_mel_l2"] = loss_mel_l2 + generator_losses.append(self.mel_loss_l2_scale * loss_mel_l2) + + if self.stft_loss_scale: + loss_stft = self.stft_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + metrics["g_loss_stft"] = loss_stft + generator_losses.append(self.stft_loss_scale * loss_stft) - loss_mel = self.mel_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) - train_loss_mel = self.mel_loss_scale * loss_mel + if self.time_domain_loss_scale: + loss_time_domain = self.time_domain_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + metrics["g_loss_time_domain"] = loss_time_domain + generator_losses.append(self.time_domain_loss_scale * loss_time_domain) + + if self.si_sdr_loss_scale: + loss_si_sdr = self.si_sdr_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + metrics["g_loss_si_sdr"] = loss_si_sdr + generator_losses.append(self.si_sdr_loss_scale * loss_si_sdr) _, disc_scores_gen, fmaps_real, fmaps_gen = self.discriminator(audio_real=audio, audio_gen=audio_gen) - loss_gen = self.gen_loss_fn(disc_scores_gen=disc_scores_gen) - train_loss_gen = self.gen_loss_scale * loss_gen + if self.gen_loss_scale: + loss_gen = self.gen_loss_fn(disc_scores_gen=disc_scores_gen) + metrics["g_loss_gen"] = loss_gen + generator_losses.append(self.gen_loss_scale * loss_gen) + + if self.feature_loss_scale: + loss_feature = self.feature_loss_fn(fmaps_real=fmaps_real, fmaps_gen=fmaps_gen) + metrics["g_loss_feature"] = loss_feature + generator_losses.append(self.feature_loss_scale * loss_feature) - loss_feature = self.feature_loss_fn(fmaps_real=fmaps_real, fmaps_gen=fmaps_gen) - train_loss_feature = self.feature_loss_scale * loss_feature + if self.commit_loss_scale: + metrics["g_loss_commit"] = commit_loss + generator_losses.append(self.commit_loss_scale * commit_loss) - loss_gen_all = train_loss_time_domain + train_loss_mel + train_loss_gen + train_loss_feature - if commit_loss is not None: - loss_gen_all += commit_loss + loss_gen_all = sum(generator_losses) optim_gen.zero_grad() self.manual_backward(loss_gen_all) @@ -399,36 +446,30 @@ def training_step(self, batch, batch_idx): self.update_lr() - metrics = { - "g_loss_time_domain": loss_time_domain, - "g_loss_mel": loss_mel, - "g_loss_gen": loss_gen, - "g_loss_feature": loss_feature, - "g_loss": loss_gen_all, - "global_step": self.global_step, - "lr": optim_gen.param_groups[0]['lr'], - } - - if loss_disc is not None: - metrics["d_loss"] = loss_disc - - if commit_loss is not None: - metrics["g_loss_commit"] = commit_loss - self.log_dict(metrics, on_step=True, sync_dist=True) - self.log("t_loss", train_loss_mel, prog_bar=True, logger=False, sync_dist=True) + self.log("t_loss", loss_mel_l1, prog_bar=True, logger=False, sync_dist=True) def on_train_epoch_end(self): self.update_lr("epoch") def validation_step(self, batch, batch_idx): audio, audio_len, audio_gen, _ = self._process_batch(batch) + + loss_mel_l1, loss_mel_l2 = self.mel_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + loss_stft = self.stft_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) loss_time_domain = self.time_domain_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) - loss_mel = self.mel_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + loss_si_sdr = self.si_sdr_loss_fn(audio_real=audio, audio_gen=audio_gen, audio_len=audio_len) + + # Use only main reconstruction losses for val_loss + val_loss = loss_mel_l1 + loss_stft + loss_time_domain + metrics = { - "val_loss": loss_time_domain + loss_mel, + "val_loss": val_loss, + "val_loss_mel_l1": loss_mel_l1, + "val_loss_mel_l2": loss_mel_l2, + "val_loss_stft": loss_stft, "val_loss_time_domain": loss_time_domain, - "val_loss_mel": loss_mel, + "val_loss_si_sdr": loss_si_sdr, } self.log_dict(metrics, on_epoch=True, sync_dist=True) diff --git a/nemo/collections/tts/modules/encodec_modules.py b/nemo/collections/tts/modules/encodec_modules.py index b05187ccb74b..8c424351ce35 100644 --- a/nemo/collections/tts/modules/encodec_modules.py +++ b/nemo/collections/tts/modules/encodec_modules.py @@ -198,6 +198,7 @@ def output_types(self): def remove_weight_norm(self): self.pre_conv.remove_weight_norm() + self.post_conv.remove_weight_norm() for res_block in self.res_blocks: res_block.remove_weight_norm() for down_sample_conv in self.down_sample_conv_layers: @@ -544,8 +545,8 @@ def __init__( codebook_size: int, codebook_dim: int, decay: float = 0.99, - threshold_ema_dead_code: Optional[int] = 2, - kmeans_iters: Optional[int] = None, + threshold_ema_dead_code: Optional[float] = 2.0, + kmeans_iters: Optional[int] = 50, ): super().__init__() self.decay = decay @@ -686,7 +687,6 @@ class ResidualVectorQuantizer(NeuralModule): Args: num_codebooks: Number of codebooks to use. - commit_loss_scale: Loss scale for codebook commit loss. codebook_size: Number of codes to use for each codebook. codebook_dim: Dimension of each code. decay: Decay for exponential moving average over the codebooks. @@ -700,20 +700,15 @@ class ResidualVectorQuantizer(NeuralModule): def __init__( self, num_codebooks: int, - commit_loss_scale: float = 1.0, codebook_size: int = 1024, codebook_dim: int = 128, decay: float = 0.99, - threshold_ema_dead_code: Optional[int] = 2, + threshold_ema_dead_code: Optional[float] = 2.0, kmeans_iters: Optional[int] = 50, ): super().__init__() self.codebook_dim = codebook_dim - - if commit_loss_scale: - self.commit_loss_fn = MaskedMSELoss(loss_scale=commit_loss_scale) - else: - self.commit_loss_fn = None + self.commit_loss_fn = MaskedMSELoss() self.codebooks = nn.ModuleList( [ @@ -728,16 +723,6 @@ def __init__( ] ) - def _commit_loss(self, input, target, input_len): - if not self.commit_loss_fn: - return 0.0 - - return self.commit_loss_fn( - predicted=rearrange(input, "B T D -> B D T"), - target=rearrange(target, "B T D -> B D T"), - target_len=input_len, - ) - @property def input_types(self): return { @@ -764,13 +749,17 @@ def forward(self, inputs: Tensor, input_len: Tensor) -> Tuple[Tensor, Tensor, fl dequantized_i, indices_i = codebook(inputs=residual, input_len=input_len) if self.training: - dequantized_i = residual + (dequantized_i - residual).detach() dequantized_i_const = dequantized_i.detach() - commit_loss_i = self._commit_loss(input=residual, target=dequantized_i_const, input_len=input_len) + + commit_loss_i = self.commit_loss_fn( + predicted=rearrange(residual, "B T D -> B D T"), + target=rearrange(dequantized_i_const, "B T D -> B D T"), + target_len=input_len, + ) commit_loss = commit_loss + commit_loss_i residual = residual - dequantized_i_const - + dequantized_i = residual + (dequantized_i - residual).detach() else: residual = residual - dequantized_i diff --git a/nemo/collections/tts/parts/utils/helpers.py b/nemo/collections/tts/parts/utils/helpers.py index 72048882fe78..08d31390107b 100644 --- a/nemo/collections/tts/parts/utils/helpers.py +++ b/nemo/collections/tts/parts/utils/helpers.py @@ -141,7 +141,7 @@ def get_mask_from_lengths(lengths: Optional[torch.Tensor] = None, x: Optional[to lengths: Optional[torch.tensor] (torch.tensor): 1D tensor with lengths x: Optional[torch.tensor] = tensor to be used on, last dimension is for mask Returns: - mask (torch.tensor): num_sequences x max_length x 1 binary tensor + mask (torch.tensor): num_sequences x max_length binary tensor """ if lengths is None: assert x is not None diff --git a/tests/collections/tts/losses/test_audio_codec_loss.py b/tests/collections/tts/losses/test_audio_codec_loss.py index 0fe7991e92cb..60ea8d293655 100644 --- a/tests/collections/tts/losses/test_audio_codec_loss.py +++ b/tests/collections/tts/losses/test_audio_codec_loss.py @@ -14,8 +14,10 @@ import pytest import torch +from torchmetrics import ScaleInvariantSignalDistortionRatio -from nemo.collections.tts.losses.audio_codec_loss import MaskedMAELoss, MaskedMSELoss +from nemo.collections.tts.losses.audio_codec_loss import MaskedMAELoss, MaskedMSELoss, SISDRLoss +from nemo.collections.tts.parts.utils.helpers import mask_sequence_tensor class TestAudioCodecLoss: @@ -42,3 +44,47 @@ def test_masked_loss_l2(self): loss = loss_fn(predicted=predicted, target=target, target_len=target_len) assert loss == (4 / 3) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_si_sdr_loss(self): + loss_fn = SISDRLoss() + sdr_fn = ScaleInvariantSignalDistortionRatio(zero_mean=True) + + num_samples = 1000 + torch.manual_seed(100) + target = torch.rand([1, num_samples]) + predicted = torch.rand([1, num_samples]) + target_len = torch.tensor([num_samples, num_samples]) + + torch_si_sdr = sdr_fn(preds=predicted, target=target) + loss = loss_fn(audio_real=target, audio_gen=predicted, audio_len=target_len) + si_sdr = -loss + + torch.testing.assert_close(actual=si_sdr, expected=torch_si_sdr) + + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_si_sdr_loss_batch(self): + loss_fn = SISDRLoss() + si_sdr_fn = ScaleInvariantSignalDistortionRatio(zero_mean=True) + + batch_size = 3 + num_samples = 1000 + torch.manual_seed(100) + target = torch.rand([batch_size, num_samples]) + predicted = torch.rand([batch_size, num_samples]) + + target_len = torch.tensor([500, 250, 900]) + target = mask_sequence_tensor(target, lengths=target_len) + predicted = mask_sequence_tensor(predicted, lengths=target_len) + + torch_si_sdr = 0.0 + for i in range(batch_size): + torch_si_sdr += si_sdr_fn(preds=predicted[i, : target_len[i]], target=target[i, : target_len[i]]) + torch_si_sdr /= batch_size + + loss = loss_fn(audio_real=target, audio_gen=predicted, audio_len=target_len) + si_sdr = -loss + + torch.testing.assert_close(actual=si_sdr, expected=torch_si_sdr) diff --git a/tests/collections/tts/modules/test_audio_codec_modules.py b/tests/collections/tts/modules/test_audio_codec_modules.py index 4650a6508edd..b48b415547fe 100644 --- a/tests/collections/tts/modules/test_audio_codec_modules.py +++ b/tests/collections/tts/modules/test_audio_codec_modules.py @@ -15,12 +15,7 @@ import pytest import torch -from nemo.collections.tts.modules.audio_codec_modules import ( - Conv1dNorm, - ConvTranspose1dNorm, - get_down_sample_padding, - get_up_sample_padding, -) +from nemo.collections.tts.modules.audio_codec_modules import Conv1dNorm, ConvTranspose1dNorm, get_down_sample_padding class TestAudioCodecModules: From 2209a30d086364e8318a59e1adc16d7135b51bf1 Mon Sep 17 00:00:00 2001 From: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Date: Sat, 7 Oct 2023 05:51:11 +0400 Subject: [PATCH 312/512] Create per.py (#7538) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Move model precision copy (#7336) * move cfg precision set to megatron base model Signed-off-by: Maanu Grover * remove copy from other models Signed-off-by: Maanu Grover * modify attribute not arg Signed-off-by: Maanu Grover * fix gpt model test for ptl 2.0 Signed-off-by: Maanu Grover * rename function and add docstring Signed-off-by: Maanu Grover * replace precision to dtype conditionals with func call Signed-off-by: Maanu Grover * unnecessary function and cfg reset Signed-off-by: Maanu Grover * set default value Signed-off-by: Maanu Grover * fix precision lookup in a few more places Signed-off-by: Maanu Grover * rename mapping function Signed-off-by: Maanu Grover * ununsed import Signed-off-by: Maanu Grover * save torch datatype to model Signed-off-by: Maanu Grover * set weights precision wrt amp o2 Signed-off-by: Maanu Grover * Revert "set weights precision wrt amp o2" This reverts commit 313a4bfe5eb69d771a6d2433898c0685836aef5c. Signed-off-by: Maanu Grover * revert half precision at inference attempt Signed-off-by: Maanu Grover * move autocast dtype to base model Signed-off-by: Maanu Grover * move params dtype to base model, enable fp16 O2 inf Signed-off-by: Maanu Grover * unused imports Signed-off-by: Maanu Grover --------- Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister * Fix PEFT checkpoint loading (#7388) * Fix PEFT checkpoint loading Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Use distributed optimizer support for multiple dtypes (#7359) * Update distopt wrapper with multiple dtype support Remove manual handling of separate FP32 optimizer. Signed-off-by: Tim Moon * Use distopt support for contiguous buffers with multiple dtypes Signed-off-by: Tim Moon * Fix typo Signed-off-by: Tim Moon * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Separate distopt buckets for first GPT layer and non-overlapped params Signed-off-by: Tim Moon * Add distopt logic for int dtypes Signed-off-by: Tim Moon * Update Apex commit Signed-off-by: Tim Moon * Remove unused variables Signed-off-by: Tim Moon * Update Apex commit in README and Jenkensfile Signed-off-by: Tim Moon * Debug Dockerfile and Jenkinsfile Signed-off-by: Tim Moon --------- Signed-off-by: Tim Moon Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * minor fix for llama ckpt conversion script (#7387) * minor fix for llama ckpt conversion script Signed-off-by: Jason Wang * Update Jenkinsfile Signed-off-by: Jason Wang * remove fast_swiglu configuration Signed-off-by: Jason Wang --------- Signed-off-by: Jason Wang Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * Fix wrong calling of librosa.get_duration() in notebook (#7376) Signed-off-by: Robin Dong Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * [PATCH] PEFT import mcore (#7393) * [PATCH] PEFT import mcore Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Create per.py Script for calculation Punctuation Error Rate and related rates (correct rate, deletions rate, etc.) Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [TTS] Added a callback for logging initial data (#7384) Signed-off-by: Ante Jukić Signed-off-by: Sasha Meister * Update Core Commit (#7402) * Update Core Commit Signed-off-by: Abhinav Khattar * update commit Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * Use cfg attribute in bert (#7394) * use cfg attribute instead of arg Signed-off-by: Maanu Grover * use torch_dtype in place of cfg.precision Signed-off-by: Maanu Grover * move precision copy before super constructor Signed-off-by: Maanu Grover * use trainer arg Signed-off-by: Maanu Grover --------- Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister * Add support for bias conversion in Swiglu models (#7386) * Add support for bias conversion in Swiglu models Signed-off-by: smajumdar * Add support for auto extracting tokenizer model Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add support for auto extracting tokenizer model Signed-off-by: smajumdar * Fix issue with missing tokenizer Signed-off-by: smajumdar * Refactor Signed-off-by: smajumdar * Refactor Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Update save_to and restore_from for dist checkpointing (#7343) * add dist ckpt to save to, in progress Signed-off-by: eharper * move dist ckpt Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update restore from, need to figure out how to initialize distributed Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * launch distrib if needed when restoring dist ckpt Signed-off-by: eharper * when using mcore we can change tp pp on the fly Signed-off-by: eharper * add load_from_checkpoint support for dist ckpt Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update llama convert script to save dist .nemo Signed-off-by: eharper * fix load dist ckpt Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * setup TE TP groups if needed Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * setup te tp groups if needed Signed-off-by: eharper * remove import Signed-off-by: eharper --------- Signed-off-by: eharper Signed-off-by: jasonwan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jasonwan Signed-off-by: Sasha Meister * fix forward for with mcore=false (#7403) Signed-off-by: Jimmy Zhang Co-authored-by: Jimmy Zhang Signed-off-by: Sasha Meister * Fix logging to remove 's/it' from progress bar in Megatron models and add train_step_timing (#7374) * Add CustomProgressBar class to exp_manager and trainer callbacks Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix the progress bar to reflect total microbatch cnt Signed-off-by: Abhishree * Modify CustomProgressBar class 1) Modify CustomProgressBar class to update progress bar per global_step instead of per microbatch 2) Add the callback to other megatron training/finetuning files that are not using MegatronTrainerBuilder Signed-off-by: Abhishree * Add CustomProgressBar callback to tuning files Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Set Activation Checkpointing Defaults (#7404) * Set Activation Checkpointing Defaults Signed-off-by: Abhinav Khattar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * check for None Signed-off-by: Abhinav Khattar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhinav Khattar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * make loss mask default to false (#7407) Signed-off-by: eharper Signed-off-by: Sasha Meister * Add dummy userbuffer config files (#7408) Signed-off-by: Sangkug Lym Signed-off-by: Sasha Meister * add missing ubconf files (#7412) Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * New tutorial on Speech Data Explorer (#7405) * Added Google Colab based tutorial on Speech Data Explorer Signed-off-by: George Zelenfroynd Signed-off-by: Sasha Meister * Update ptl training ckpt conversion script to work with dist ckpt (#7416) * update ptl convert script Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * don't break legacy Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: eharper Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Allow disabling sanity checking when num_sanity_val_steps=0 (#7413) * Allow disabling sanity checking when num_sanity_val_steps=0 Signed-off-by: Abhishree * Update num_sanity_val_steps to be a multiple of num_microbatches Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add comprehensive error messages (#7261) Signed-off-by: Anton Peganov Signed-off-by: Sasha Meister * check NEMO_PATH (#7418) Signed-off-by: Nikolay Karpov Signed-off-by: Sasha Meister * layer selection for ia3 (#7417) * layer selection for ia3 Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix missing pip package 'einops' (#7397) Signed-off-by: Robin Dong Signed-off-by: Sasha Meister * Fix failure of pyaudio in Google Colab (#7396) Signed-off-by: Robin Dong Signed-off-by: Sasha Meister * Update README.md: output_path --> output_manifest_filepath (#7442) Signed-off-by: Samuele Cornell Signed-off-by: Sasha Meister * Add rope dynamic linear scaling (#7437) * Add dynamic linear scaling Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yang Zhang Signed-off-by: Sasha Meister * Fix None dataloader issue in PTL2.0 (#7455) * Fix None dataloader issue in PTL2.0 Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: KunalDhawan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * [ASR] Confidence measure -> method renames (#7434) * measure -> method Signed-off-by: Aleksandr Laptev * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Aleksandr Laptev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add steps for document of getting dataset 'SF Bilingual Speech' (#7378) * Add steps for document of getting dataset 'SF Bilingual Speech' Signed-off-by: Robin Dong * Update datasets.rst added a link from a tutorial demonstrating detailed data prep steps. Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * RNN-T confidence and alignment bugfix (#7381) * new frame_confidence and alignments lists are now always created after the while loop Signed-off-by: Aleksandr Laptev * tests added Signed-off-by: Aleksandr Laptev --------- Signed-off-by: Aleksandr Laptev Signed-off-by: Sasha Meister * Fix resume from checkpoint in exp_manager (#7424) (#7426) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * Fix checking of cuda/cpu device for inputs of Decoder (#7444) * Fix checking of cuda/cpu device for inputs of Decoder Signed-off-by: Robin Dong * Update tacotron2.py Signed-off-by: Jason --------- Signed-off-by: Robin Dong Signed-off-by: Jason Co-authored-by: Jason Signed-off-by: Sasha Meister * Fix failure of ljspeech's get_data.py (#7430) * Fix failure of ljspeech's get_data.py Signed-off-by: Robin Dong * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * [TTS] Fix audio codec type checks (#7373) * [TTS] Fix audio codec type checks Signed-off-by: Ryan * [TTS] Fix audio codec tests Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * [TTS] Add dataset to path of logged artifacts (#7462) * [TTS] Add dataset to path of logged artifacts Signed-off-by: Ryan * [TTS] Revert axis name back to Audio Frames Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * Fix sft dataset truncation (#7464) * Add fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Automatic Lip Reading Recognition (ALR) - ASR/CV (Visual ASR) (#7330) * striding_conv1d_k5 and dw_striding_conv1d_k5 subsampling Signed-off-by: mburchi * transpose conv1d inputs Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: mburchi * Update subsampling.py change striding_conv1d_k5 to striding_conv1d Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * video manifest Signed-off-by: mburchi * add collection classes Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test_step_outputs Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * clean references Signed-off-by: mburchi * freeze unfreeze transcribe cv models Signed-off-by: mburchi * correct manifest get_full_path bug Signed-off-by: mburchi * update for PR Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * guard torchvision Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * _video_speech_collate_fn in cv/data/video_to_text.py Signed-off-by: mburchi * add self.out = None to asr subsampling Signed-off-by: mburchi * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv -> multimodal/speech_cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Igor Gitman Signed-off-by: Sasha Meister * HF StarCoder to NeMo conversion script (#7421) * Script to convert HF StarCoder checkpoint to NeMo Signed-off-by: Jan Lasek * StarCoder conversion test Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jan Lasek * Fix test Signed-off-by: Jan Lasek * Catch up with save_to changes Signed-off-by: Jan Lasek * Don't abbreviate args for clarity Signed-off-by: Jan Lasek * Configurable precision: BF16 vs FP32 Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jan Lasek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix bug when loading dist ckpt in peft (#7452) Signed-off-by: Hongbin Liu Co-authored-by: Hongbin Liu Signed-off-by: Sasha Meister * Fix adding positional embeddings in-place in transformer module (#7440) Signed-off-by: Tamerlan Tabolov Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix (#7478) Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Sasha Meister * add sleep (#7498) (#7499) * add sleep * add sleep onto config instead * add comment --------- Signed-off-by: Gerald Shen Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix exp manager check for sleep (#7503) (#7504) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * bugfix: trainer.accelerator=auto from None. (#7492) (#7493) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * [doc] fix broken link (#7481) Signed-off-by: Stas Bekman Signed-off-by: Sasha Meister * [TTS] Read audio as int32 to avoid flac read errors (#7477) * [TTS] Read audio as int32 to avoid flac read errors Signed-off-by: Ryan * [TTS] Add comment about read failures Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS (#7409) * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS * Train 'AISHELL-3' dataset with multi-speakers Signed-off-by: Robin Dong * Update get_data.py update copyright header Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * Update get_data.py added a disclaimer Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add new configuration file for AISHELL3 with multispeaker of fastpitch Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * dllogger - log on rank 0 only (#7513) Signed-off-by: Stas Bekman Signed-off-by: Sasha Meister * Fix TTS FastPitch tutorial (#7494) (#7516) * Fix --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix get_dist() tensor dimension (#7506) (#7515) Signed-off-by: Jocelyn Huang Co-authored-by: Jocelyn Signed-off-by: Sasha Meister * bugfix: specify trainer.strategy=auto when devices=1 (#7509) (#7512) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * fix (#7511) Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * [TTS] Fix FastPitch data prep tutorial (#7524) Signed-off-by: Ryan Signed-off-by: Sasha Meister * add italian tokenization (#7486) * add italian tokenization Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more ipa lexicon it Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix error deletion Signed-off-by: GiacomoLeoneMaria * add test Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Replace None strategy with auto in tutorial notebooks (#7521) (#7527) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * unpin setuptools (#7534) (#7535) Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Sasha Meister * Update per.py - if __name__ == "__main__" removed (now metric can be imported); - removed excessive classes (like "Sample" and "Statistics"); - transition from pandas df to dict of dicts; - removed unnecessary "return"; - notation fixing; - reduced calculation time Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Create punctuation_rates.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Format fixing Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * added nemo.logging, header, docstrings, how to use Signed-off-by: Sasha Meister * Added asserions to rate_punctuation.py Signed-off-by: Sasha Meister * fix typo Signed-off-by: Sasha Meister * added function for import and call, docstrings Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * remove auto generated examples (#7510) * explicitly remove autogenerated examples for data parallel evaluation Signed-off-by: arendu * mark autogenrated and remove it for test Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add the `strategy` argument to `MegatronGPTModel.generate()` (#7264) It is passed as an explicit argument rather than through `**strategy_args` so as to ensure someone cannot accidentally pass other arguments that would end up being ignored. It is a keyword-only argument to ensure that if in the future we want to update the signature to `**strategy_args`, we can do it without breaking code. Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix PTL2.0 related ASR bugs in r1.21.0: Val metrics logging, None dataloader issue (#7531) (#7533) * fix none dataloader issue ptl2 * ptl2.0 logging fixes for rnnt_models --------- Signed-off-by: KunalDhawan Co-authored-by: Kunal Dhawan Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * gpus -> devices (#7542) (#7545) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * Update FFMPEG version to fix issue with torchaudio (#7551) (#7553) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * PEFT GPT & T5 Refactor (#7308) * initial implementation of add_adapters API * correct type hint * Add config in add_adapters for save and load (@author bobchen) * Remove AdapterConfig to avoid import error * Add AdaterConfig back and move adaptermixin to sft model * Add NLPSaveRestoreConnector as default in NLPModel.restore_from * Add restore_from_nemo_with_adapter and test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename t5 file and classes to be consistent with GPT * add t5 sft dataset * add support for single-file format with T5SFTDataset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Various small changes to make T5 SFT work like GPT SFT * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add adapter evaluation test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add MultiAdaterConfig for ia3 and fix builder issue * Make ptuning for T5SFTModel work using mixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add IA3_Adapter for AdapterName * Add adapter name for ptuning and attention adapter * Make test script GPT/T5 agnostic * Add layer selection feature * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Integrate adapter name and config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update gpt peft tuning script to new API * add t5 peft tuning script with new API * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix IA3 layer selection issue * Override state_dict on SFT model instead of mixin * Add load adapter by adapter config * move peft config map away from example script * auto get config from nemo adapter * Move PEFTConfig to new file * fix ckpt save/load for t5 * name change: add_adapters -> add_adapter * variable name change * update t5 script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix t5 issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add weight tying * update gpt tuning script * PEFT-API proposal * Fix according to comments * update tuning scripts * move merge_cfg_with to mixin class since it applies to both gpt and t5 and requires the model class for restore * Add mcore_gpt support for NLPAdapterMixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * variable name change to distinguish "peft" and "adapter" * override `load_adapters` to support `add_adapter` name change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tuning and eval script for adapter save/load * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add Ptuning on first stage only * add lora tutorial for review * Fix layer selection for mcore * add landing page * fix resume training Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add mcore condition in sharded_state_dict to make sft work * Update lora_tutorial.md First edit of this file for PEFT documentation for NeMO Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * rename Adapter to AttentionAdapter to avoid confusion in doc * Change load_adapters to load .nemo * add quick start guide * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add load_adapters with .ckpt * Remove setup_complete changes in load_adapters * update landing page * remove typo * Updated quick_start.md per Chen Cui Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * Add inference config merger and tutorial * Add doc string for NLPAdapterModelMixin and deprecated warning on MegatronGPTPEFTModel * add supported_methods.md and update other documentations * Update supported_methods.md minor updates. Signed-off-by: Adi Renduchintala * Update landing_page.md minor update. Signed-off-by: Adi Renduchintala * Modify doc string for NLPAdapterModelMixin * Add doc string add_adapters in NLPAdapterModelMixin * rename canonical adapters * remove mcore hard dependency * [PATCH] move microbatch calculator to nemo from apex * remove apex dependency in gpt and t5 sft models * remove apex dependency in gpt model * render doc strings * fix * Add missing virtual_tokens on ptuning * fix docstrings * update gpt-style model coverage in docs * update docstring * Remove pdb * add lightning_fabric to make docstring rendering work * Add Ptuning missing key * try docstring rendering * Fix ptuning issue * update gpt t5 peft tuning and eval scripts * typos * update eval config * fix bug relating to apex dependency removal * typo * make predict step behave the same as test step * make lora tutorial work in notebook * cosmetics * update yaml scripts * mcore_gpt attribute optional * typo * update eval scripts and fix T5 eval bugs * add NLPDDPStrategyNotebook and trainer builder logic to use it * update lora notebook to use new trainer builder * fix microbatch calculator bug for inference after training * Convert markdown files to RST and incorporate with doc * typo * revise language * remove extra cell * remove unnecessary inheritance * remove old tests * move layer selection default so logging messages make sense * remove `save_adapters` as adapter weights are saved automatically during training * initialize weights from a checkpoint instead of randomly * multiple fields can form a context (#7147) * list of context fields and flexible prompt template Signed-off-by: arendu * list of fields for context Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Fix bug Signed-off-by: Cheng-Ping Hsieh * Add multiple truncation fields and middle truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Compatible to old ckpt Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix tokenize detokenize issue Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove detokenization, add truncation augmentation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve comments Signed-off-by: Cheng-Ping Hsieh * Remove unused import Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert eos Signed-off-by: Cheng-Ping Hsieh * Add tokenizer space_sensitive attribute Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error Signed-off-by: Cheng-Ping Hsieh * Fix erorr and use re Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Change assert logic Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow adi suggestion Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove merge function Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add example and comment Signed-off-by: Cheng-Ping Hsieh * Remove context_key and add comment Signed-off-by: Cheng-Ping Hsieh * Remove random truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix template none Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> * revert config changes * remove accidental breakpoint * support TP>1 loading * infer adapter type from checkpoint in during eval * breakup add adapter * enable interpolation of train_ds and validation_ds * update metric calc script to conform to single-file eval format * remove extraneous print * update lora notebook for updated merge_inference_cfg * Update nlp_adapter_mixins.py variable name change Signed-off-by: Chen Cui * turn off grad scaler for PP to match old scripts * remove PEFTSaveRestoreConnector since functionality all covered by the new mixin class * remove resume_from_checkpoint check since covered in #7335 * revert changes made in eval config interpolation * more interpolation * typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove dup line Signed-off-by: Chen Cui * code style warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix config mistake Signed-off-by: Chen Cui * add copyright header Signed-off-by: Chen Cui * fix code check warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert changes to remove apex dependency (mixed apex+nemo microbatch calculator broke some CI tests) Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * consolidate peft and sft scripts Signed-off-by: Chen Cui * update CI tests Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * notebook branch points to main to prepare for merge Signed-off-by: Chen Cui * fix gpt and t5 validation with any metric other than loss Signed-off-by: Chen Cui * support pre-extracted checkpoints Signed-off-by: Chen Cui --------- Signed-off-by: jasonwan Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: jasonwan Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Adi Renduchintala Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * fix a typo (#7496) Signed-off-by: BestJuly Signed-off-by: Sasha Meister * [TTS] remove curly braces from ${BRANCH} in jupyer notebook cell. (#7554) (#7560) * remove curly braces. * remove installation of pynini. --------- Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * add youtube embed url (#7570) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * Remap speakers to continuous range of speaker_id for dataset AISHELL3 (#7536) * Remap speakers to continuous range of speaker_id for dataset AISHELL3 * Add new key/value pair to record raw speaker for AISHELL3 dataset Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix validation_step_outputs initialization for multi-dataloader (#7546) (#7572) * added correct validation_step_outputs initialization for mutli-dataloader * changed kernel for display * Update logic for validation and test step outputs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert multidataloader changes in multilang ASR notebook --------- Signed-off-by: KunalDhawan Signed-off-by: smajumdar Co-authored-by: Kunal Dhawan Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Append output of val step to self.validation_step_outputs (#7530) (#7532) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * [TTS] fixed trainer's accelerator and strategy. (#7569) (#7574) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * Append val/test output to instance variable in EncDecSpeakerLabelModel (#7562) (#7573) * Append val/test output to the instance variable in EncDecSpeakerLabelModel * Handle test case in evaluation_step * Replace type with isinstance --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix CustomProgressBar for resume (#7427) (#7522) * Fix CustomProgress Bar for resume and multiple epochs * Edit num_training_batches * Use max_steps as total for progress bar for resume * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix typos in nfa and speech enhancement tutorials (#7580) (#7583) Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Signed-off-by: Sasha Meister * Add strategy as ddp_find_unused_parameters_true for glue_benchmark.py (#7454) (#7461) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * update strategy (#7577) (#7578) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * Fix typos (#7581) Signed-off-by: Sasha Meister * Change hifigan finetune strategy to ddp_find_unused_parameters_true (#7579) (#7584) * Change strategy to auto --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * [BugFix] Add missing quotes for auto strategy in tutorial notebooks (#7541) (#7548) * Add missing quotes for auto strategy * Revert trainer.gpus to trainer.devices in Self_Supervised_Pre_Training.ipynb --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * added per tests Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [PATCH] PEFT import mcore (#7393) * [PATCH] PEFT import mcore Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * add build os key (#7596) (#7599) * add build os key * add tools * update to stable version --------- Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * StarCoder SFT test + bump PyT NGC image to 23.09 (#7540) * Add SFT StarCoder test Signed-off-by: Jan Lasek * Remove _modify_config call as it is covered in load_from_nemo just below Signed-off-by: Jan Lasek * Test with pyt:23.09 container Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek Signed-off-by: Sasha Meister * defaults changed (#7600) * defaults changed Signed-off-by: arendu * typo Signed-off-by: arendu * update Signed-off-by: arendu --------- Signed-off-by: arendu Signed-off-by: Sasha Meister * add ItalianPhonemesTokenizer (#7587) * add ItalianPhonemesTokenizer Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix Italian phonemes Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * best ckpt fix (#7564) (#7588) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Sasha Meister * rate_punctuation.py Fixed output manifest saving Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Fix tests Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Add files via upload (#7598) specifies the branch Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix validation in G2PModel and ThutmoseTaggerModel (#7597) (#7606) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Function name fixing Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Moving PER to speech_to_text_eval.py Added: - "use_per": PER metric computing; - "scores_per_sample": metrics computation sample by sample for wer/cer/punctuation rates; - "output_with_scores_filename": saving manifest with metrics Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Update test_metrics.py Updated "punctuation_error_rate" function name Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Added use_per description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * guard extra dependencies Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Write metrics to "output_filename" if "scores_per_sample=True" Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * scores_per_sample description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix import guards Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Stats printing when HAVE_TABLUATE_AND_PANDAS=False Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Bound transformers version in requirements (#7620) Signed-off-by: Abhishree Signed-off-by: Sasha Meister * fix llama2 70b lora tuning bug (#7622) * fix llama2 70b lora tuning bug Signed-off-by: Chen Cui * Update peft_config.py brackets Signed-off-by: Adi Renduchintala --------- Signed-off-by: Chen Cui Signed-off-by: Adi Renduchintala Co-authored-by: Adi Renduchintala Signed-off-by: Sasha Meister * Fix import error no module name model_utils (#7629) Signed-off-by: Mehadi Hasan Menon Signed-off-by: Sasha Meister * Delete examples/asr/rate_punctuation.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Added use_per description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * metric and variables name fixing Signed-off-by: Sasha Meister * Add else samples = None Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * add fc large ls models (#7641) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Koluguri Signed-off-by: Sasha Meister * bugfix: trainer.gpus, trainer.strategy, trainer.accelerator (#7621) (#7642) * [TTS] bugfix for Tacotron2 tutorial due to PTL 2.0 * trainer.gpus -> trainer.devices * fixed related tutorial bugs --------- Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * fix ssl models ptl monitor val through logging (#7608) (#7614) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Co-authored-by: Eric Harper Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix metrics for SE tutorial (#7604) (#7612) Signed-off-by: Ante Jukić Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Signed-off-by: Sasha Meister * Add ddp_find_unused_parameters=True and change accelerator to auto (#7623) (#7644) * Add ddp_find_unused_parameters=True and change acclerator to auto * Add ddp_find_unused_parameters True for normalization_as_tagging_train.py --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix py3.11 dataclasses issue (#7616) * Fix py3.11 dataclasses issue (#7582) * Update ASR configs to support Python 3.11 Signed-off-by: smajumdar * Update TTS configs to support Python 3.11 Signed-off-by: smajumdar * Guard MeCab and Ipadic Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix scripts Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update name to ConfidenceMethodConfig Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Fix changes to confidence measure Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Signed-off-by: Sangkug Lym Signed-off-by: Jason Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sangkug Lym Co-authored-by: Jason Signed-off-by: Sasha Meister * moved per sample metrics computing to transcribe_utils Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Moved punctuation rates printing to punct_er Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Added reset for DatasetPunctuationErrorRate class Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Added compute_metrics_per_sample description Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update megatron_gpt_peft_models.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update speech_to_text_eval.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> * Copyright year fixing Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> * "& AFFILIATES" added Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> --------- Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister Signed-off-by: Jason Wang Signed-off-by: Tim Moon Signed-off-by: Robin Dong Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Ante Jukić Signed-off-by: Abhinav Khattar Signed-off-by: smajumdar Signed-off-by: eharper Signed-off-by: jasonwan Signed-off-by: Jimmy Zhang Signed-off-by: Abhishree Signed-off-by: Sangkug Lym Signed-off-by: George Zelenfroynd Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Anton Peganov Signed-off-by: Nikolay Karpov Signed-off-by: arendu Signed-off-by: Samuele Cornell Signed-off-by: Cheng-Ping Hsieh Signed-off-by: KunalDhawan Signed-off-by: Aleksandr Laptev Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Jason Signed-off-by: Ryan Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Signed-off-by: Jan Lasek Signed-off-by: Hongbin Liu Signed-off-by: Tamerlan Tabolov Signed-off-by: Gerald Shen Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Stas Bekman Signed-off-by: Jocelyn Huang Signed-off-by: GiacomoLeoneMaria Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Signed-off-by: Nithin Rao Koluguri Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Signed-off-by: BestJuly Signed-off-by: Elena Rastorgueva Signed-off-by: dimapihtar Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Signed-off-by: Mehadi Hasan Menon Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com> Co-authored-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Co-authored-by: Eric Harper Co-authored-by: Robin Dong Co-authored-by: Somshubra Majumdar Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Co-authored-by: Abhinav Khattar Co-authored-by: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Co-authored-by: Jimmy Zhang Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Sangkug Lym Co-authored-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: PeganovAnton Co-authored-by: Nikolay Karpov Co-authored-by: Adi Renduchintala Co-authored-by: Samuele Cornell Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: Yang Zhang Co-authored-by: Kunal Dhawan Co-authored-by: Aleksandr Laptev Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Jason Co-authored-by: Ryan Langman Co-authored-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: Igor Gitman Co-authored-by: Jan Lasek Co-authored-by: Kelvin Liu Co-authored-by: Hongbin Liu Co-authored-by: Tamerlan Tabolov Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Stas Bekman Co-authored-by: Jocelyn Co-authored-by: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Co-authored-by: Nithin Rao Co-authored-by: meatybobby Co-authored-by: Chen Cui Co-authored-by: Marc Romeyn Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Li Tao Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Co-authored-by: Igor Gitman Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: Mehadi Hasan Menon Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> --- examples/asr/speech_to_text_eval.py | 44 +- .../asr/parts/utils/transcribe_utils.py | 92 ++++ nemo/collections/common/metrics/punct_er.py | 473 ++++++++++++++++++ tests/collections/common/test_metrics.py | 73 +++ 4 files changed, 680 insertions(+), 2 deletions(-) create mode 100644 nemo/collections/common/metrics/punct_er.py diff --git a/examples/asr/speech_to_text_eval.py b/examples/asr/speech_to_text_eval.py index 452aa8202660..9e24f0172208 100644 --- a/examples/asr/speech_to_text_eval.py +++ b/examples/asr/speech_to_text_eval.py @@ -25,12 +25,18 @@ for full list of arguments >> dataset_manifest: Required - path to dataset JSON manifest file (in NeMo format) - output_filename: Optional - output filename where the transcriptions will be written. + output_filename: Optional - output filename where the transcriptions will be written. (if scores_per_sample=True, + metrics per sample will be written there too) use_cer: Bool, whether to compute CER or WER + use_punct_er: Bool, compute dataset Punctuation Error Rate (set the punctuation marks for metrics computation with + "text_processing.punctuation_marks") + tolerance: Float, minimum WER/CER required to pass some arbitrary tolerance. only_score_manifest: Bool, when set will skip audio transcription and just calculate WER of provided manifest. + scores_per_sample: Bool, compute metrics for each sample separately (if only_score_manifest=True, scores per sample + will be added to the manifest at the dataset_manifest path) # Usage @@ -66,7 +72,12 @@ from omegaconf import MISSING, OmegaConf, open_dict from nemo.collections.asr.metrics.wer import word_error_rate -from nemo.collections.asr.parts.utils.transcribe_utils import PunctuationCapitalization, TextProcessingConfig +from nemo.collections.asr.parts.utils.transcribe_utils import ( + PunctuationCapitalization, + TextProcessingConfig, + compute_metrics_per_sample, +) +from nemo.collections.common.metrics.punct_er import DatasetPunctuationErrorRate from nemo.core.config import hydra_runner from nemo.utils import logging @@ -82,9 +93,11 @@ class EvaluationConfig(transcribe_speech.TranscriptionConfig): att_context_size: Optional[list] = None use_cer: bool = False + use_punct_er: bool = False tolerance: Optional[float] = None only_score_manifest: bool = False + scores_per_sample: bool = False text_processing: Optional[TextProcessingConfig] = TextProcessingConfig( punctuation_marks=".,?", separate_punctuation=False, do_lowercase=False, rm_punctuation=False, @@ -154,6 +167,29 @@ def main(cfg: EvaluationConfig): f"contain value for `pred_text`." ) + if cfg.use_punct_er: + dper_obj = DatasetPunctuationErrorRate( + hypotheses=predicted_text, + references=ground_truth_text, + punctuation_marks=list(cfg.text_processing.punctuation_marks), + ) + dper_obj.compute() + + if cfg.scores_per_sample: + metrics_to_compute = ["wer", "cer"] + + if cfg.use_punct_er: + metrics_to_compute.append("punct_er") + + samples_with_metrics = compute_metrics_per_sample( + manifest_path=cfg.dataset_manifest, + reference_field="text", + hypothesis_field="pred_text", + metrics=metrics_to_compute, + punctuation_marks=cfg.text_processing.punctuation_marks, + output_manifest_path=cfg.output_filename, + ) + # Compute the WER cer = word_error_rate(hypotheses=predicted_text, references=ground_truth_text, use_cer=True) wer = word_error_rate(hypotheses=predicted_text, references=ground_truth_text, use_cer=False) @@ -173,6 +209,10 @@ def main(cfg: EvaluationConfig): logging.info(f'Dataset WER/CER ' + str(round(100 * wer, 2)) + "%/" + str(round(100 * cer, 2)) + "%") + if cfg.use_punct_er: + dper_obj.print() + dper_obj.reset() + # Inject the metric name and score into the config, and return the entire config with open_dict(cfg): cfg.metric_name = metric_name diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py index 640590fcc1b0..38f25673282b 100644 --- a/nemo/collections/asr/parts/utils/transcribe_utils.py +++ b/nemo/collections/asr/parts/utils/transcribe_utils.py @@ -23,9 +23,11 @@ from tqdm.auto import tqdm import nemo.collections.asr as nemo_asr +from nemo.collections.asr.metrics.wer import word_error_rate from nemo.collections.asr.models import ASRModel, EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils import rnnt_utils from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR +from nemo.collections.common.metrics.punct_er import OccurancePunctuationErrorRate from nemo.collections.common.parts.preprocessing.manifest import get_full_path from nemo.utils import logging, model_utils @@ -463,6 +465,96 @@ def transcribe_partial_audio( return hypotheses +def compute_metrics_per_sample( + manifest_path: str, + reference_field: str = "text", + hypothesis_field: str = "pred_text", + metrics: list[str] = ["wer"], + punctuation_marks: list[str] = [".", ",", "?"], + output_manifest_path: str = None, +) -> dict: + + ''' + Computes metrics per sample for given manifest + + Args: + manifest_path: str, Required - path to dataset JSON manifest file (in NeMo format) + reference_field: str, Optional - name of field in .json manifest with the reference text ("text" by default). + hypothesis_field: str, Optional - name of field in .json manifest with the hypothesis text ("pred_text" by default). + metrics: list[str], Optional - list of metrics to be computed (currently supported "wer", "cer", "punct_er") + punctuation_marks: list[str], Optional - list of punctuation marks for computing punctuation error rate ([".", ",", "?"] by default). + output_manifest_path: str, Optional - path where .json manifest with calculated metrics will be saved. + + Returns: + samples: dict - Dict of samples with calculated metrics + ''' + + supported_metrics = ["wer", "cer", "punct_er"] + + if len(metrics) == 0: + raise AssertionError( + f"'metrics' list is empty. \ + Select the metrics from the supported: {supported_metrics}." + ) + + for metric in metrics: + if metric not in supported_metrics: + raise AssertionError( + f"'{metric}' metric is not supported. \ + Currently supported metrics are {supported_metrics}." + ) + + if "punct_er" in metrics: + if len(punctuation_marks) == 0: + raise AssertionError("punctuation_marks list can't be empty when 'punct_er' metric is enabled.") + else: + oper_obj = OccurancePunctuationErrorRate(punctuation_marks=punctuation_marks) + + use_wer = "wer" in metrics + use_cer = "cer" in metrics + use_punct_er = "punct_er" in metrics + + with open(manifest_path, 'r') as manifest: + lines = manifest.readlines() + samples = [json.loads(line) for line in lines] + samples_with_metrics = [] + + logging.info(f"Computing {', '.join(metrics)} per sample") + + for sample in tqdm(samples): + reference = sample[reference_field] + hypothesis = sample[hypothesis_field] + + if use_wer: + sample_wer = word_error_rate(hypotheses=[hypothesis], references=[reference], use_cer=False) + sample["wer"] = round(100 * sample_wer, 2) + + if use_cer: + sample_cer = word_error_rate(hypotheses=[hypothesis], references=[reference], use_cer=True) + sample["cer"] = round(100 * sample_cer, 2) + + if use_punct_er: + operation_amounts, substitution_amounts, punctuation_rates = oper_obj.compute( + reference=reference, hypothesis=hypothesis + ) + sample["punct_correct_rate"] = round(100 * punctuation_rates.correct_rate, 2) + sample["punct_deletions_rate"] = round(100 * punctuation_rates.deletions_rate, 2) + sample["punct_insertions_rate"] = round(100 * punctuation_rates.insertions_rate, 2) + sample["punct_substitutions_rate"] = round(100 * punctuation_rates.substitutions_rate, 2) + sample["punct_error_rate"] = round(100 * punctuation_rates.punct_er, 2) + + samples_with_metrics.append(sample) + + if output_manifest_path is not None: + with open(output_manifest_path, 'w') as output: + for sample in samples_with_metrics: + line = json.dumps(sample) + output.writelines(f'{line}\n') + logging.info(f'Output manifest saved: {output_manifest_path}') + + return samples_with_metrics + + class PunctuationCapitalization: def __init__(self, punctuation_marks: str): """ diff --git a/nemo/collections/common/metrics/punct_er.py b/nemo/collections/common/metrics/punct_er.py new file mode 100644 index 000000000000..933c1581f016 --- /dev/null +++ b/nemo/collections/common/metrics/punct_er.py @@ -0,0 +1,473 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from collections import namedtuple +from tqdm import tqdm + +from nemo.utils import logging + +try: + import pandas as pd + from tabulate import tabulate + + HAVE_TABLUATE_AND_PANDAS = True +except (ImportError, ModuleNotFoundError): + HAVE_TABLUATE_AND_PANDAS = False + + +def punctuation_error_rate( + references: list[str], hypotheses: list[str], punctuation_marks: list[str], punctuation_mask: str = "[PUNCT]", +) -> None: + + """ + Computes Punctuation Error Rate + + Args: + references (list[str]) - list of references + hypotheses (list[str]) - list of hypotheses + punctuation_marks (list[str]) - list of punctuation marks for computing metrics + punctuation_mask (str, by default "[PUNCT]") - mask token that will be applied to + given punctuation marks while edit distance calculation + + Return: + punct_er (float) - Punctuation Error Rate + """ + + dper_obj = DatasetPunctuationErrorRate( + references=references, + hypotheses=hypotheses, + punctuation_marks=punctuation_marks, + punctuation_mask=punctuation_mask, + ) + + dper_obj.compute() + + return dper_obj.punct_er + + +class OccurancePunctuationErrorRate: + """ + Class for computation puncutation-related absolute amounts of operations and thier rates + between reference and hypothesis strings: + - Absolute amounts of correct predictions, deletions, insertions + and substitutions for each given punctuation mark + - Rates of correct predictions, deletions, insertions + and substitutions for each given punctuation mark + - Overall rates of correct predictions, deletions, insertions + and substiturions between reference and hypothesis string + - Punctuation Error Rate + + Args to init: + punctuation_marks (list[str]) - list of punctuation marks for computing metrics + punctuation_mask (str, by default "[PUNCT]") - mask token that will be applied to + given punctuation marks while edit distance calculation + + How to use: + 1. Create object of OccurancePunctuationErrorRate class. + Example: + punctuation_marks = [".", ",", "!", "?"] + oper_obj = OccurancePunctuationErrorRate(punctuation_marks) + + 2. To compute punctuation metrics, pass reference and hypothesis string to the "compute" method + of created object. + Example: + reference_str = "Hi, dear! Nice to see you. What's" + hypothesis_str = "Hi dear! Nice to see you! What's?" + oper_obj.compute(reference_str, hypothesis_str) + + Output (listed in order of output): + 1. Dict of absolute operations amounts for each given punctuation mark: + Example: + {'.': {'Correct': 0, 'Deletions': 0, 'Insertions': 0, 'Substitutions': 1}, + ',': {'Correct': 0, 'Deletions': 1, 'Insertions': 0, 'Substitutions': 0}, + '!': {'Correct': 1, 'Deletions': 0, 'Insertions': 0, 'Substitutions': 0}, + '?': {'Correct': 0, 'Deletions': 0, 'Insertions': 1, 'Substitutions': 0}} + + 2. Dict of substitutions absolute amounts between given punctuation marks: + Example: + {'.': {'.': 0, ',': 0, '!': 1, '?': 0}, + ',': {'.': 0, ',': 0, '!': 0, '?': 0}, + '!': {'.': 0, ',': 0, '!': 0, '?': 0}, + '?': {'.': 0, ',': 0, '!': 0, '?': 0}} + + 3. namedtuple "PunctuationRates" of punctuation operation rates (in range from 0 to 1): + 3.1. correct_rate - overall correct rate + Example: correct_rate=0.25 + 3.2. deletions_rate - overall deletions rate + Example: deletions_rate=0.25 + 3.3. insertions_rate - overall insertions rate + Example: insertions_rate=0.25 + 3.4. substitutions_rate - overall substitutions_rate + Example: substitutions_rate=0.25 + 3.5. punct_er - Punctuation Error Rate + Example: punct_er=0.75 + 3.6. operation_rates - dict of operations rates for each given punctuation mark + Example: + operation_rates={ + '.': {'Correct': 0.0, 'Deletions': 0.0, 'Insertions': 0.0, 'Substitutions': 1.0}, + ',': {'Correct': 0.0, 'Deletions': 1.0, 'Insertions': 0.0, 'Substitutions': 0.0}, + '!': {'Correct': 1.0, 'Deletions': 0.0, 'Insertions': 0.0, 'Substitutions': 0.0}, + '?': {'Correct': 0.0, 'Deletions': 0.0, 'Insertions': 1.0, 'Substitutions': 0.0} + } + + 3.7. substitution_rates - dict of substitution rates for each given punctuation mark + Example: + substitution_rates={ + '.': {'.': 0.0, ',': 0.0, '!': 1.0, '?': 0.0}, + ',': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0}, + '!': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0}, + '?': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0} + } + """ + + def __init__(self, punctuation_marks: list[str], punctuation_mask: str = "[PUNCT]") -> None: + + assert len(punctuation_marks) != 0, f"List of punctuation marks is empty" + + self.punctuation_marks = punctuation_marks + self.punctuation_mask = punctuation_mask + + self.operations = ["Correct", "Deletions", "Insertions", "Substitutions"] + + def compute_rates(self, operation_amounts: dict, substitution_amounts: dict): + operation_rates = {pm: {operation: 0 for operation in self.operations} for pm in self.punctuation_marks} + substitution_rates = {pm: {pm: 0 for pm in self.punctuation_marks} for pm in self.punctuation_marks} + + for pm in self.punctuation_marks: + operations_amount_by_pm = sum(operation_amounts[pm].values()) + + if operations_amount_by_pm == 0: + continue + + operation_rates[pm] = { + operation: (operation_amounts[pm][operation] / operations_amount_by_pm) + for operation in self.operations + } + + substitution_rates[pm] = { + _pm: (substitution_amounts[pm][_pm] / operations_amount_by_pm) + for _pm in substitution_amounts[pm].keys() + } + + _operation_amounts = { + operation: {pm: operation_amounts[operation] for pm, operation_amounts in operation_amounts.items()} + for operation in self.operations + } + + overall_amounts_by_operation = { + operation: sum(_operation_amounts[operation].values()) for operation in _operation_amounts + } + overall_operations_amount = sum(overall_amounts_by_operation.values()) + + punctuation_rates = namedtuple( + 'PunctuationRates', + [ + 'correct_rate', + 'deletions_rate', + 'insertions_rate', + 'substitutions_rate', + 'punct_er', + 'operation_rates', + 'substitution_rates', + ], + ) + + if overall_operations_amount == 0: + rates = punctuation_rates(0, 0, 0, 0, 0, operation_rates, substitution_rates) + else: + correct_rate = overall_amounts_by_operation["Correct"] / overall_operations_amount + deletions_rate = overall_amounts_by_operation["Deletions"] / overall_operations_amount + insertions_rate = overall_amounts_by_operation["Insertions"] / overall_operations_amount + substitutions_rate = overall_amounts_by_operation["Substitutions"] / overall_operations_amount + punct_er = deletions_rate + insertions_rate + substitutions_rate + + rates = punctuation_rates( + correct_rate, + deletions_rate, + insertions_rate, + substitutions_rate, + punct_er, + operation_rates, + substitution_rates, + ) + + return rates + + def compute_operation_amounts(self, reference: str, hypothesis: str): + operation_amounts = {pm: {operation: 0 for operation in self.operations} for pm in self.punctuation_marks} + substitution_amounts = {pm: {pm: 0 for pm in self.punctuation_marks} for pm in self.punctuation_marks} + + def tokenize(text: str, punctuation_marks: list[str]): + punctuation_marks = "\\" + "\\".join(self.punctuation_marks) + tokens = re.findall(rf"[\w']+|[{punctuation_marks}]", text) + return tokens + + def mask_punct_tokens(tokens: list[str], punctuation_marks: list[str], punctuation_mask: str): + masked = [punctuation_mask if token in punctuation_marks else token for token in tokens] + return masked + + r_tokens = tokenize(reference, self.punctuation_marks) + h_tokens = tokenize(hypothesis, self.punctuation_marks) + + r_masked = mask_punct_tokens(r_tokens, self.punctuation_marks, self.punctuation_mask) + h_masked = mask_punct_tokens(h_tokens, self.punctuation_marks, self.punctuation_mask) + + r_punct_amount = r_masked.count(self.punctuation_mask) + h_punct_amount = h_masked.count(self.punctuation_mask) + + if r_punct_amount + h_punct_amount == 0: + return operation_amounts, substitution_amounts + + r_len = len(r_masked) + h_len = len(h_masked) + + costs = [[0 for inner in range(h_len + 1)] for outer in range(r_len + 1)] + backtrace = [[0 for inner in range(h_len + 1)] for outer in range(r_len + 1)] + + COR = 'C' + DEL, DEL_PENALTY = 'D', 1 + INS, INS_PENALTY = 'I', 1 + SUB, SUB_PENALTY = 'S', 1 + + for i in range(1, r_len + 1): + costs[i][0] = DEL_PENALTY * i + backtrace[i][0] = DEL + + for j in range(1, h_len + 1): + costs[0][j] = INS_PENALTY * j + backtrace[0][j] = INS + + for j in range(1, h_len + 1): + costs[0][j] = INS_PENALTY * j + backtrace[0][j] = INS + + for i in range(1, r_len + 1): + for j in range(1, h_len + 1): + if r_masked[i - 1] == h_masked[j - 1]: + costs[i][j] = costs[i - 1][j - 1] + backtrace[i][j] = COR + else: + substitution_cost = costs[i - 1][j - 1] + SUB_PENALTY + insertion_cost = costs[i][j - 1] + INS_PENALTY + deletion_cost = costs[i - 1][j] + DEL_PENALTY + + costs[i][j] = min(substitution_cost, insertion_cost, deletion_cost) + if costs[i][j] == substitution_cost: + backtrace[i][j] = SUB + elif costs[i][j] == insertion_cost: + backtrace[i][j] = INS + else: + backtrace[i][j] = DEL + + i = r_len + j = h_len + + while i > 0 or j > 0: + if backtrace[i][j] == COR: + if r_masked[i - 1] == self.punctuation_mask or h_masked[j - 1] == self.punctuation_mask: + r_token = r_tokens[i - 1] + h_token = h_tokens[j - 1] + + if r_token == h_token: + operation_amounts[r_token]['Correct'] += 1 + else: + operation_amounts[r_token]['Substitutions'] += 1 + substitution_amounts[r_token][h_token] += 1 + i -= 1 + j -= 1 + + elif backtrace[i][j] == SUB: + i -= 1 + j -= 1 + + elif backtrace[i][j] == INS: + j -= 1 + + elif backtrace[i][j] == DEL: + i -= 1 + + for pm in self.punctuation_marks: + num_of_correct = operation_amounts[pm]['Correct'] + + num_substitutions_of_pm = operation_amounts[pm]['Substitutions'] + num_substitutions_to_pm = sum([substitution_amounts[_pm][pm] for _pm in self.punctuation_marks]) + + num_of_deletions = r_tokens.count(pm) - (num_of_correct + num_substitutions_of_pm) + operation_amounts[pm]['Deletions'] = num_of_deletions + + num_of_insertions = h_tokens.count(pm) - (num_of_correct + num_substitutions_to_pm) + operation_amounts[pm]['Insertions'] = num_of_insertions + + return operation_amounts, substitution_amounts + + def compute(self, reference: str, hypothesis: str): + operation_amounts, substitution_amounts = self.compute_operation_amounts(reference, hypothesis) + punctuation_rates = self.compute_rates(operation_amounts, substitution_amounts) + return operation_amounts, substitution_amounts, punctuation_rates + + +class DatasetPunctuationErrorRate: + """ + Class for computation the total puncutation-related absolute amounts of operations and their rates + in pairs of reference and hypothesis strins: + - Absolute amounts of correct predictions, deletions, insertions + and substitutions for each given punctuation mark + - Rates of correct predictions, deletions, insertions + and substitutions for each given punctuation mark + - Total rates of correct predictions, deletions, insertions + and substiturions in pairs of reference and hypothesis strings + - Punctuation Error Rate + + Args to init: + references (list[str]) - list of references + hypotheses (list[str]) - list of hypotheses + punctuation_marks (list[str]) - list of punctuation marks for computing metrics + punctuation_mask (str, by default "[PUNCT]") - mask token that will be applied to + given punctuation marks while edit distance calculation + + How to use: + 1. Create object of DatasetPunctuationErrorRate class. + Example: + references = ["Hi, dear! Nice to see you. What's"] + hypotheses = ["Hi dear! Nice to see you! What's?"] + punctuation_marks = [".", ",", "!", "?"] + + dper_obj = DatasetPunctuationErrorRate(references, hypotheses, punctuation_marks) + + 2. To compute punctuation metrics, call the class method "compute()". + Example: + dper_obj.compute() + + Result: + The following atributes of class object will be updated with calculated metrics values. + The values are available with calling the atributes: + + dper_obj.operation_rates - dict, rates of correctness and errors for each punctuation mark + from `preset dper_obj.punctuation_marks` list. + + dper_obj.substitution_rates - dict, substitution rates between puncutation marks from + `preset dper_obj.punctuation_marks` list. + + dper_obj.correct_rate - float, total rate of correctness between provided pairs of + references and hypotheses. + + dper_obj.deletions_rate - float, total rate of deletions between provided pairs of + references and hypotheses. + + dper_obj.insertions_rate - float, total rate of insertions between provided pairs of + references and hypotheses. + + dper_obj.substitutions_rate - float, total rate of substitutions between provided pairs of + references and hypotheses. + + dper_obj.punct_er - float, total Punctuation Error Rate between provided pairs of + references and hypotheses. + """ + + def __init__( + self, + references: list[str], + hypotheses: list[str], + punctuation_marks: list[str], + punctuation_mask: str = "[PUNCT]", + ) -> None: + + self.references = references + self.hypotheses = hypotheses + self.punctuation_marks = punctuation_marks + self.punctuation_mask = punctuation_mask + + self.oper_obj = OccurancePunctuationErrorRate( + punctuation_marks=self.punctuation_marks, punctuation_mask=self.punctuation_mask + ) + + self.operation_amounts = [] + self.substitution_amounts = [] + self.rates = [] + + self.operation_rates = None + self.substitution_rates = None + self.correct_rate = None + self.deletions_rate = None + self.insertions_rate = None + self.substitutions_rate = None + self.punct_er = None + + def compute(self): + def sum_amounts(amounts_dicts: list[dict]): + amounts = {key: {_key: 0 for _key in amounts_dicts[0][key]} for key in amounts_dicts[0].keys()} + + for amounts_dict in amounts_dicts: + for outer_key, inner_dict in amounts_dict.items(): + for inner_key, value in inner_dict.items(): + amounts[outer_key][inner_key] += value + return amounts + + logging.info("Computing Punctuation Error Rate") + + for reference, hypothesis in tqdm(zip(self.references, self.hypotheses), total=len(self.references)): + operation_amounts, substitution_amounts, punctuation_rates = self.oper_obj.compute(reference, hypothesis) + self.operation_amounts.append(operation_amounts) + self.substitution_amounts.append(substitution_amounts) + self.rates.append(punctuation_rates) + + overall_operation_amounts = sum_amounts(self.operation_amounts) + overall_substitution_amounts = sum_amounts(self.substitution_amounts) + overall_rates = self.oper_obj.compute_rates( + operation_amounts=overall_operation_amounts, substitution_amounts=overall_substitution_amounts + ) + + self.operation_rates = overall_rates.operation_rates + self.substitution_rates = overall_rates.substitution_rates + self.correct_rate = overall_rates.correct_rate + self.deletions_rate = overall_rates.deletions_rate + self.insertions_rate = overall_rates.insertions_rate + self.substitutions_rate = overall_rates.substitutions_rate + self.punct_er = overall_rates.punct_er + + def reset(self): + self.operation_amounts = [] + self.substitution_amounts = [] + self.rates = [] + + self.operation_rates = None + self.substitution_rates = None + self.correct_rate = None + self.deletions_rate = None + self.insertions_rate = None + self.substitutions_rate = None + self.punct_er = None + + def print(self): + logging.info(f'Dataset PER ' + str(round(100 * self.punct_er, 2)) + '%') + + if HAVE_TABLUATE_AND_PANDAS: + rates_by_pm_df = pd.DataFrame(self.operation_rates) * 100 + substitution_rates_by_pm_df = pd.DataFrame(self.substitution_rates) * 100 + + logging.info( + "Rates of punctuation correctness and errors (%):\n" + + tabulate(rates_by_pm_df, headers='keys', tablefmt='psql') + ) + logging.info( + "Substitution rates between punctuation marks (%):\n" + + tabulate(substitution_rates_by_pm_df, headers='keys', tablefmt='psql') + ) + else: + logging.warning("Some of the modules (pandas or tabulate) can't be imported") + logging.info(f"Rates of punctuation correctness and errors (in range [0, 1]):\n{self.operation_rates}\n") + logging.info( + f"Substitution rates between punctuation marks (in range [0, 1]):\n{self.substitution_rates}\n" + ) diff --git a/tests/collections/common/test_metrics.py b/tests/collections/common/test_metrics.py index e4bfde635a06..f9005232a017 100644 --- a/tests/collections/common/test_metrics.py +++ b/tests/collections/common/test_metrics.py @@ -16,6 +16,11 @@ import torch from nemo.collections.common.metrics.classification_accuracy import TopKClassificationAccuracy +from nemo.collections.common.metrics.punct_er import ( + DatasetPunctuationErrorRate, + OccurancePunctuationErrorRate, + punctuation_error_rate, +) from .loss_inputs import ALL_NUM_MEASUREMENTS_ARE_ZERO, NO_ZERO_NUM_MEASUREMENTS, SOME_NUM_MEASUREMENTS_ARE_ZERO from .perplexity_inputs import NO_PROBS_NO_LOGITS, ONLY_LOGITS1, ONLY_LOGITS100, ONLY_PROBS, PROBS_AND_LOGITS @@ -149,3 +154,71 @@ def test_loss(self, ddp, dist_sync_on_step, loss_sum_or_avg, num_measurements, t dist_sync_on_step=dist_sync_on_step, take_avg_loss=take_avg_loss, ) + + +class TestPunctuationErrorRate: + reference = "Hi, dear! Nice to see you. What's" + hypothesis = "Hi dear! Nice to see you! What's?" + punctuation_marks = [".", ",", "!", "?"] + + operation_amounts = { + '.': {'Correct': 0, 'Deletions': 0, 'Insertions': 0, 'Substitutions': 1}, + ',': {'Correct': 0, 'Deletions': 1, 'Insertions': 0, 'Substitutions': 0}, + '!': {'Correct': 1, 'Deletions': 0, 'Insertions': 0, 'Substitutions': 0}, + '?': {'Correct': 0, 'Deletions': 0, 'Insertions': 1, 'Substitutions': 0}, + } + substitution_amounts = { + '.': {'.': 0, ',': 0, '!': 1, '?': 0}, + ',': {'.': 0, ',': 0, '!': 0, '?': 0}, + '!': {'.': 0, ',': 0, '!': 0, '?': 0}, + '?': {'.': 0, ',': 0, '!': 0, '?': 0}, + } + correct_rate = 0.25 + deletions_rate = 0.25 + insertions_rate = 0.25 + substitutions_rate = 0.25 + punct_er = 0.75 + operation_rates = { + '.': {'Correct': 0.0, 'Deletions': 0.0, 'Insertions': 0.0, 'Substitutions': 1.0}, + ',': {'Correct': 0.0, 'Deletions': 1.0, 'Insertions': 0.0, 'Substitutions': 0.0}, + '!': {'Correct': 1.0, 'Deletions': 0.0, 'Insertions': 0.0, 'Substitutions': 0.0}, + '?': {'Correct': 0.0, 'Deletions': 0.0, 'Insertions': 1.0, 'Substitutions': 0.0}, + } + substitution_rates = { + '.': {'.': 0.0, ',': 0.0, '!': 1.0, '?': 0.0}, + ',': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0}, + '!': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0}, + '?': {'.': 0.0, ',': 0.0, '!': 0.0, '?': 0.0}, + } + + @pytest.mark.unit + def test_punctuation_error_rate(self): + assert punctuation_error_rate([self.reference], [self.hypothesis], self.punctuation_marks) == self.punct_er + + @pytest.mark.unit + def test_OccurancePunctuationErrorRate(self): + oper_obj = OccurancePunctuationErrorRate(self.punctuation_marks) + operation_amounts, substitution_amounts, punctuation_rates = oper_obj.compute(self.reference, self.hypothesis) + + assert operation_amounts == self.operation_amounts + assert substitution_amounts == self.substitution_amounts + assert punctuation_rates.correct_rate == self.correct_rate + assert punctuation_rates.deletions_rate == self.deletions_rate + assert punctuation_rates.insertions_rate == self.insertions_rate + assert punctuation_rates.substitutions_rate == self.substitutions_rate + assert punctuation_rates.punct_er == self.punct_er + assert punctuation_rates.operation_rates == self.operation_rates + assert punctuation_rates.substitution_rates == self.substitution_rates + + @pytest.mark.unit + def test_DatasetPunctuationErrorRate(self): + dper_obj = DatasetPunctuationErrorRate([self.reference], [self.hypothesis], self.punctuation_marks) + dper_obj.compute() + + assert dper_obj.correct_rate == self.correct_rate + assert dper_obj.deletions_rate == self.deletions_rate + assert dper_obj.insertions_rate == self.insertions_rate + assert dper_obj.substitutions_rate == self.substitutions_rate + assert dper_obj.punct_er == self.punct_er + assert dper_obj.operation_rates == self.operation_rates + assert dper_obj.substitution_rates == self.substitution_rates From 70c0a3779789eb82e6f5b93314f5ddeabd2998fb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 12:53:33 +0300 Subject: [PATCH 313/512] conversion issue fix (#7648) (#7668) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> --- .../nlp/models/language_modeling/megatron_gpt_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 44cb100304c6..c84a1d04e046 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1332,7 +1332,7 @@ def on_load_checkpoint(self, checkpoint) -> None: # mcore uses distributed checkpointing if self.mcore_gpt: - if 'state_dict' in checkpoint: + if 'state_dict' in checkpoint and checkpoint['state_dict']: for index, module in enumerate(self.get_gpt_module_list()): if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: checkpoint_state_dict = checkpoint['state_dict'][f'model_{index}'] From b7bcf085a51f335ffd041f5a83bc7aefaa8f7119 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 12:54:49 +0300 Subject: [PATCH 314/512] layernorm1p fix (#7523) (#7567) * layernorm1p fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add layernorm1p to if statement * config changes * gpt config changes * remove layernorm_zero_centered_gamma from gpt config * change line --------- Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../nlp/models/language_modeling/megatron_gpt_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index c84a1d04e046..045023717533 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1514,10 +1514,14 @@ def build_transformer_config(self) -> TransformerConfig: activation_func = activation_to_func(activation) normalization = self.cfg.get('normalization', 'layernorm') + layernorm_zero_centered_gamma = self.cfg.get('normalization', 'layernorm') == 'layernorm1p' if normalization == 'layernorm': normalization = 'LayerNorm' elif normalization == 'rmsnorm': normalization = 'RMSNorm' + elif normalization == 'layernorm1p': + normalization = 'LayerNorm' + layernorm_zero_centered_gamma = True else: logging.warning( f"The normalization type: {normalization} might not be supported in megatron core." @@ -1561,7 +1565,7 @@ def build_transformer_config(self) -> TransformerConfig: # any configs that are not in the nemo model config will be added here config_mapping = { 'apply_residual_connection_post_layernorm': False, # we don't use this in NeMo - 'layernorm_zero_centered_gamma': False, # not currently used in NeMo + 'layernorm_zero_centered_gamma': layernorm_zero_centered_gamma, 'add_bias_linear': add_bias_linear, 'gated_linear_unit': gated_linear_unit, 'activation_func': activation_func, From b3da442f0a1e1fe3b8c94d22fe65091ba17a76ce Mon Sep 17 00:00:00 2001 From: Yi Dong <43824965+yidong72@users.noreply.github.com> Date: Tue, 10 Oct 2023 08:22:55 -0400 Subject: [PATCH 315/512] generalized chat sft prompt (#7655) * fix dataset issues Signed-off-by: Yi Dong * working version Signed-off-by: Yi Dong * all passed Signed-off-by: Yi Dong * refactor tests Signed-off-by: Yi Dong * all pass Signed-off-by: Yi Dong * working version Signed-off-by: Yi Dong * use end name signal for labels Signed-off-by: Yi Dong * all fixed Signed-off-by: Yi Dong * update doc Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * remove unused imports Signed-off-by: Yi Dong * make sure nccl not timing out Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * generate example template Signed-off-by: Yi Dong * generic end of name token Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * add the chat prompt format into the config Signed-off-by: Yi Dong * make sure sft working Signed-off-by: Yi Dong * address reviewer comment Signed-off-by: Yi Dong * fix non Signed-off-by: Yi Dong * try openAI prompt Signed-off-by: Yi Dong * remove unused imports Signed-off-by: Yi Dong * remove human labels from the data Signed-off-by: Yi Dong * use hf dataset to clean Signed-off-by: Yi Dong * reviewer comments Signed-off-by: Yi Dong --------- Signed-off-by: Yi Dong --- .../language_modeling/megatron_gpt_eval.py | 7 +- .../tuning/conf/megatron_gpt_sft.yaml | 6 + .../tuning/megatron_gpt_sft.py | 12 +- .../megatron/gpt_sft_chat_dataset.py | 257 +++++++---- .../megatron/gpt_sft_dataset.py | 18 +- .../megatron_gpt_sft_model.py | 5 +- .../nlp_language_modeling/sft/data_clean.py | 2 +- .../sft/preprocessing.py | 11 +- .../collections/nlp/test_chat_sft_dataset.py | 408 ++++++++++++------ 9 files changed, 488 insertions(+), 238 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index 04125c6f750e..c9eb013b64e9 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio +import datetime import os import threading from functools import partial @@ -167,7 +168,11 @@ def remove_padded_prompts(response, nb_paddings): def main(cfg) -> None: # trainer required for restoring model parallel models - trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer, callbacks=[CustomProgressBar()]) + trainer = Trainer( + strategy=NLPDDPStrategy(timeout=datetime.timedelta(seconds=18000)), + **cfg.trainer, + callbacks=[CustomProgressBar()], + ) if cfg.gpt_model_file is not None: if ( diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml index b0b8eedb5633..27e73996225f 100644 --- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml +++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml @@ -71,6 +71,12 @@ model: data: chat: False # whether use chatbot data or not + chat_prompt_tokens: # special tokens for the chat prompts, a dictionary of {token_type: token}. note that some tokenizer may combine the characters at the junction between {end_of_turn}{turn_start}. e.g. '', the '><' sometimes is merged to be a single token. This is not supported, try to avoid + system_turn_start: '' + turn_start: '' + label_start: '' + end_of_turn: "\x0A" # \0x0A is '\n' + end_of_name: "\x0A" # \0x0A is '\n' train_ds: # Example of how to specify paths to multiple datasets # file_names: diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index a4888049cb42..79dd20fcf84a 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -15,11 +15,12 @@ import os import tempfile +import torch.multiprocessing as mp from omegaconf.omegaconf import OmegaConf, open_dict from pytorch_lightning import Trainer from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector +from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset import get_prompt_template_example from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel from nemo.collections.nlp.parts.nlp_overrides import ( @@ -36,6 +37,8 @@ from nemo.utils.exp_manager import exp_manager from nemo.utils.model_utils import inject_model_parallel_rank +mp.set_start_method("spawn", force=True) + def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): """ @@ -71,6 +74,13 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.pipeline_model_parallel_size = cfg.model.get('pipeline_model_parallel_size', 1) gpt_cfg.pipeline_model_parallel_split_rank = cfg.model.get('pipeline_model_parallel_split_rank', 0) + if cfg.model.data.get('chat', False): + # chat model, overwrite the prompt template + prompt_template = get_prompt_template_example(cfg.model.data.chat_prompt_tokens) + gpt_cfg.data.train_ds.prompt_template = prompt_template + gpt_cfg.data.validation_ds.prompt_template = prompt_template + gpt_cfg.data.test_ds.prompt_template = prompt_template + sft_cls = MegatronGPTSFTModel gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}" diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py index 801a58394f06..96cc57a300b8 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -16,19 +16,19 @@ import torch -from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset from nemo.utils import logging -__all__ = ['GPTSFTChatDataset'] +__all__ = ['GPTSFTChatDataset', 'get_prompt_template_example'] -IGNORE_INDEX = -100 -END_SIGNAL = "\n" -END_NAME_SIGNAL = "\n" -SYSTEM_TOKEN = "System\n" -TURN_TOKEN = "" +PREFIX_STR = ( + "\x00" # the prefix string used in the tokenizer to deal with the added empty token for some of the tokenizers +) + +IGNORE_INDEX = -100 +SYSTEM_TOKEN = "System" TYPE_INSTRUCTION = { 'TEXT_TO_VALUE': "", @@ -36,6 +36,56 @@ } +def _get_header_conversation_type_mask_role(source, special_tokens): + END_SIGNAL = special_tokens['end_of_turn'] + END_NAME_SIGNAL = special_tokens['end_of_name'] + + data_type = None + if 'type' in source: + data_type = source['type'] + if data_type is not None: + assert data_type in TYPE_INSTRUCTION, f"source type {data_type} not supported" + # add end signal and concatenate together + conversation = source['system'] + if data_type is not None: + if TYPE_INSTRUCTION[data_type] != '': + conversation = conversation + '\n' + TYPE_INSTRUCTION[data_type] + mask_role = source.get('mask', 'User') + header = f"{special_tokens['system_turn_start']}{SYSTEM_TOKEN}{END_NAME_SIGNAL}{conversation}{END_SIGNAL}" + conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, data_type, special_tokens) + return header, conversation, data_type, mask_role + + +def get_prompt_template_example(special_tokens): + source = { + 'system': '{system message}', + 'conversations': [ + {'from': 'User', 'value': '{turn 1 user message}', 'label': None}, + {'from': 'Assistant', 'value': '{turn 1 assistant message}', 'label': '{turn 1 assistant label}'}, + {'from': 'User', 'value': '{turn 2 user message}', 'label': None}, + {'from': 'Assistant', 'value': '{turn 2 assistant message}', 'label': '{turn 2 assistant label}'}, + ], + "mask": "User", + "type": "VALUE_TO_TEXT", + } + _, conversation, _, _ = _get_header_conversation_type_mask_role(source, special_tokens) + return conversation + + +def identify_start_index_of_subsequence(subsequence, sequence): + """ find the location of the small tensor in the large tensor. + e.g. small = [1,3], large = [2,3,1,3], returns 2 + small = [3,2], large = [2,3,1,3], returns -1 + Args: + small (tensor): small tensor + large (tensor): large tensor + """ + for i in range(sequence.size(0) - subsequence.size(0) + 1): + if torch.equal(sequence[i : i + subsequence.size(0)], subsequence): + return i + return -1 + + def _mask_targets( target, tokenized_lens, @@ -45,8 +95,10 @@ def _mask_targets( tokenizer, mask_role, gtype, - extra_id_2_token_id, - new_line_token_id, + name_end_token_ids, + special_tokens, + label_start_ids, + num_turn_start_tokens, ): """ This function masks the tokens so the loss is computed only on the non-masked role's responses. For 'TEXT_TO_VALUE' type, the loss is computed on the value attributes. @@ -60,68 +112,88 @@ def _mask_targets( tokenizer (TokenizerSpec): tokenizer object mask_role (str): the speaker id to be masked from loss computation gtype (str): either 'TEXT_TO_VALUE' or 'VALUE_TO_TEXT' - extra_id_2_token_id (int): token id - new_line_token_id (int): new line token id - + name_end_token_ids (int): end of name token ids + special_tokens (dict): special tokens used for the chat prompt. It has the keys: system_turn_start, turn_start, label_start, end_of_turn + label_start_ids (list): list of label start token ids, + num_turn_start_tokens (int): number of tokens of the turn_start str """ + TURN_TOKEN = special_tokens['turn_start'] + END_NAME_SIGNAL = special_tokens['end_of_name'] + label_start_ids = torch.tensor(label_start_ids) + name_end_token_ids = torch.tensor(name_end_token_ids) + cur_idx = header_len tgt_len = target.shape[0] for i, (tokenized_len, speaker, s_id) in enumerate(zip(tokenized_lens, speakers, s_ids)): # note, sentence piece will add extra empty token in front. has to compute the diff - id1 = tokenizer.text_to_ids("") - id2 = tokenizer.text_to_ids("" + TURN_TOKEN + speaker + END_NAME_SIGNAL) - skip_name_len = len(id2) - len(id1) - if extra_id_2_token_id is None: - raise ValueError("extra_id_2 is not in the vocabulary") - if (s_id == extra_id_2_token_id).any().item(): + id1 = tokenizer.text_to_ids(PREFIX_STR) + id2 = tokenizer.text_to_ids(PREFIX_STR + TURN_TOKEN + speaker + END_NAME_SIGNAL) + skip_name_len = len(id2) - len( + id1 + ) # s_ids[:skip_name_len] is the name part of the prompt 'TURN_TOKEN + speaker + END_NAME_SIGNAL' + # get the position of the label start string in this turn + location = identify_start_index_of_subsequence(label_start_ids, s_id) + + if location >= 0: + # if it contains the label start tokens if gtype == 'VALUE_TO_TEXT': - # if contains the token - assert skip_name_len == torch.where((s_id == extra_id_2_token_id))[0].item() - # find new line token id 14 - more_skip_len = torch.where((s_id[skip_name_len:] == new_line_token_id))[0][0].item() + 1 + # handles the case that condition on labels to generate respone + # the next token after the name part of the prompt is the beginning of the label start tokens + assert skip_name_len == location + # find the first new line token after the label part, which indicates the end of the whole label string + # newline_loc = torch.where((s_id[skip_name_len:] == name_end_token_ids))[0] + newline_loc = identify_start_index_of_subsequence(name_end_token_ids, s_id[skip_name_len:]) + if newline_loc < 0: + # cannot find new line token, which means the the whole turn is just a partial label string. Mask the whole turn + target[cur_idx : cur_idx + tokenized_len] = IGNORE_INDEX + continue + # skip the label part and the new line token + more_skip_len = newline_loc + len(name_end_token_ids) + # skip the name part and the label part skip_name_len += more_skip_len elif gtype == 'TEXT_TO_VALUE': - skip_name_len = torch.where((s_id == extra_id_2_token_id))[0].item() + 1 + # handles the case that condition on response to generate label + # skip the name part, response and the label start tokens part, the remainder is the label string without label start, e.g. 'quality:9,toxicity:8...' + skip_name_len = location + len(label_start_ids) if cur_idx >= tgt_len: break elif cur_idx + tokenized_len < tgt_len: - # Check whether the mask is applied to the correct position, the first token is turn token: - # s_id[2:] skips the artifact empty token and the turn token - # target[cur_idx + 1:cur_idx + tokenized_len] skip the turn token + # Check whether the mask is applied to the correct position, the first token is turn start tokens if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[1:]): logging.warning("a sentence mismatches the corresponding piece " "in the conversation") if i == 0 and (gtype == 'VALUE_TO_TEXT' or gtype is None): - # mask the first turn completely to provide at least one turn as context + # mask the first turn completely to provide at least one turn as context for the rest target[cur_idx : cur_idx + tokenized_len] = IGNORE_INDEX elif speaker == mask_role and i == 1 and gtype == 'TEXT_TO_VALUE': - # leave the first human tag unmasked - target[cur_idx + 1 : cur_idx + tokenized_len] = IGNORE_INDEX + # leave the first turn start tag unmasked, servers severs as the end of turn signal + target[cur_idx + num_turn_start_tokens : cur_idx + tokenized_len] = IGNORE_INDEX elif speaker == mask_role and (i > 1): - # leave the first human tag unmasked - target[cur_idx + 1 : cur_idx + tokenized_len] = IGNORE_INDEX + # leave the first turn start tag unmasked, which severs as the end of turn signal + target[cur_idx + num_turn_start_tokens : cur_idx + tokenized_len] = IGNORE_INDEX elif speaker == mask_role and (i <= 1): # mask out everything in the second turn target[cur_idx : cur_idx + tokenized_len] = IGNORE_INDEX else: - # mask up to the name end, need to remove one as skip name has an extra artifact empty token + # mask up to name part, label part for VALUE_TO_TEXT, or name part, response and label start tokens for TEXT_TO_VALUE, or just the name part if gtype is None target[cur_idx : cur_idx + skip_name_len] = IGNORE_INDEX cur_idx += tokenized_len -def cannonical_form_formater(cannoical_form): - return f'{cannoical_form}\n' - - -def response_value_formater(label): +def response_value_formater(label, label_start, end_signal): if isinstance(label, str): - return '' + label + '\n' + return label_start + label + end_signal elif label is None: return '' else: raise ValueError(f'Unknown label type {type(label)}, only str type is supported') -def _add_speaker_and_signal(header, source, mask_role, gtype): +def _add_speaker_and_signal(header, source, mask_role, gtype, special_tokens): + TURN_TOKEN = special_tokens['turn_start'] + END_SIGNAL = special_tokens['end_of_turn'] + LABEL_START = special_tokens['label_start'] + END_NAME_SIGNAL = special_tokens['end_of_name'] + """Add speaker and start/end signal on each round.""" BEGIN_SIGNAL = "" conversation = header @@ -138,7 +210,11 @@ def _add_speaker_and_signal(header, source, mask_role, gtype): + role_token + sentence_from + END_NAME_SIGNAL - + (response_value_formater(sentence['label']) if 'label' in sentence else '') + + ( + response_value_formater(sentence['label'], LABEL_START, END_NAME_SIGNAL) + if 'label' in sentence + else '' + ) + sentence["value"] + END_SIGNAL ) @@ -150,7 +226,11 @@ def _add_speaker_and_signal(header, source, mask_role, gtype): + END_NAME_SIGNAL + sentence["value"] + END_SIGNAL - + (response_value_formater(sentence['label']) if 'label' in sentence else '') + + ( + response_value_formater(sentence['label'], LABEL_START, END_NAME_SIGNAL) + if 'label' in sentence + else '' + ) ) else: raise ValueError( @@ -163,7 +243,14 @@ def _add_speaker_and_signal(header, source, mask_role, gtype): return conversation -def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, new_line_token_id: int): +def preprocess( + source: dict, + tokenizer: TokenizerSpec, + name_end_token_ids: int, + label_start_ids: list, + special_tokens: dict, + num_turn_start_tokens: int, +): """ Given a conversation list. This transform: 1. Add signal '### ' at the beginning each sentence, with end signal '\n'; @@ -171,36 +258,23 @@ def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, 3. Tokenize the concatenated conversation; 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. """ - data_type = None - if 'type' in source: - data_type = source['type'] - assert data_type in TYPE_INSTRUCTION, f"source type {data_type} not supported" - # add end signal and concatenate together - conversation = source['system'] - if data_type is not None: - if TYPE_INSTRUCTION[data_type] != '': - conversation = conversation + '\n' + TYPE_INSTRUCTION[data_type] - mask_role = source.get('mask', 'User') - header = f"{SYSTEM_TOKEN}{conversation}" - conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, data_type) + header, conversation, data_type, mask_role = _get_header_conversation_type_mask_role(source, special_tokens) # tokenize conversations input_ids = tokenizer.text_to_ids(conversation) target = copy.deepcopy(input_ids) - header_len = len(tokenizer.text_to_ids(header)) + header_tokens = tokenizer.text_to_ids(header) + header_len = len(header_tokens) ids = [] tokenized_lens = [] + assert torch.equal(torch.tensor(target[:header_len]), torch.tensor(header_tokens)) for s in source['conversations']: - if isinstance(tokenizer, SentencePieceTokenizer): - tokenized_sentence = tokenizer.text_to_ids(s["value"]) - ids.append(torch.tensor(tokenized_sentence)[1:]) - # remove one token as it adds an empty token in front - tokenized_lens.append(len(tokenized_sentence) - 1) - else: - tokenized_sentence = tokenizer.text_to_ids(s["value"]) - ids.append(torch.tensor(tokenized_sentence)) - # remove one token as it adds an empty token in front - tokenized_lens.append(len(tokenized_sentence)) + # hack to remove the extra empty token in front + id1 = tokenizer.text_to_ids(PREFIX_STR + s["value"]) + id2 = tokenizer.text_to_ids(PREFIX_STR) + tokenized_sentence = id1[len(id2) :] + ids.append(torch.tensor(tokenized_sentence)) + tokenized_lens.append(len(tokenized_sentence)) speakers = [sentence["from"] for sentence in source['conversations']] assert mask_role in speakers, "mask role not in the conversation" target = torch.LongTensor(target) @@ -216,8 +290,10 @@ def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, tokenizer, mask_role, data_type, - extra_id_2_token_id, - new_line_token_id, + name_end_token_ids, + special_tokens, + label_start_ids, + num_turn_start_tokens, ) mask = (target != IGNORE_INDEX).bool() assert mask.sum().item() != 0, "mask is empty" @@ -228,14 +304,6 @@ def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, return dict(input_ids=input_ids, mask=mask, context_ids=context_ids, answer_ids=answer_ids) -def _check_token_in_vocab(tokenizer, token): - ids = tokenizer.text_to_ids(token) - if isinstance(tokenizer, SentencePieceTokenizer): - return len(ids) == 2 - else: - return len(ids) == 1 - - class GPTSFTChatDataset(GPTSFTDataset): def _maybe_validate_prompt_template(self): pass @@ -243,22 +311,20 @@ def _maybe_validate_prompt_template(self): def _build_samples_mapping(self): super()._build_samples_mapping() assert hasattr(self.tokenizer, "vocab"), "tokenizer should have vocab property, not supported" - assert _check_token_in_vocab( - self.tokenizer, '' - ), " not in the tokenizer vocab. not supported" - assert _check_token_in_vocab( - self.tokenizer, '' - ), " not in the tokenizer vocab. not supported" - # calcuilate id value - if _check_token_in_vocab(self.tokenizer, ''): - ids_1 = self.tokenizer.text_to_ids('') - ids_2 = self.tokenizer.text_to_ids('') - self.extra_id_2_token_id = ids_1[len(ids_2) :][0] - else: - self.extra_id_2_token_id = None - ids_1 = self.tokenizer.text_to_ids('\n') - ids_2 = self.tokenizer.text_to_ids('') - self.new_line_token_id = ids_1[len(ids_2) :][0] + LABEL_START = self.special_tokens['label_start'] + END_NAME_SIGNAL = self.special_tokens['end_of_name'] + + id1 = self.tokenizer.text_to_ids(PREFIX_STR) + id2 = self.tokenizer.text_to_ids(PREFIX_STR + LABEL_START) + self.label_start_tokens = id2[len(id1) :] + + id1 = self.tokenizer.text_to_ids(PREFIX_STR + END_NAME_SIGNAL) + id2 = self.tokenizer.text_to_ids(PREFIX_STR) + self.name_end_token_ids = id1[len(id2) :] + + id1 = self.tokenizer.text_to_ids(PREFIX_STR + self.special_tokens['turn_start']) + id2 = self.tokenizer.text_to_ids(PREFIX_STR) + self.num_turn_start_tokens = len(id1) - len(id2) def _process_example(self, example): """ @@ -266,7 +332,14 @@ def _process_example(self, example): Truncation is carried out when needed, but it is performed only on the prompt side. BOS, EOS, and SEP, are added if specified. """ - result = preprocess(example, self.tokenizer, self.extra_id_2_token_id, self.new_line_token_id) + result = preprocess( + example, + self.tokenizer, + self.name_end_token_ids, + self.label_start_tokens, + self.special_tokens, + self.num_turn_start_tokens, + ) # store metadata in dataset, in case user may have keys required in the prediction json files metadata = {k: v for k, v in example.items() if k not in ['conversations']} diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 101201ef7536..9c6e50f5e43f 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -13,10 +13,14 @@ # limitations under the License. import re -from typing import List, Optional +from typing import List, Mapping, Optional +import datasets import numpy as np import torch + +# hack to avoid the "not enough disk space" error in some slurm cluster +datasets.builder.has_sufficient_disk_space = lambda needed_bytes, directory='.': True from datasets import load_dataset from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec @@ -52,6 +56,7 @@ def __init__( memmap_workers: Optional[int] = None, hf_dataset: bool = False, truncation_method: str = 'right', + special_tokens: Optional[Mapping[str, str]] = None, # special tokens, a dictory of {token_type: token} ): """ file_path: Path to a JSONL GPT supervised fine-tuning dataset. Data is formatted as multiple JSON lines with each line formatted as follows. {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'} @@ -73,6 +78,7 @@ def __init__( prompt_template: Prompt template to inject via an fstring. Formatted like Q: {context_key}\n\nA: {label_key} hf_dataset: Whether to load the json file with the HuggingFace dataset. otherwise, will load the jsonl file with the JSONLMemMapDataset. truncation_method: Truncation from which position. Options: ['left', 'right'] + special_tokens: special tokens for the chat prompts, a dictionary of {token_type: token}. Default: {'system_turn_start': '', 'turn_start': '', 'label_start': '', 'end_of_turn': '\n', "end_of_name": "\n"} """ self.tokenizer = tokenizer self.file_path = file_path @@ -93,6 +99,16 @@ def __init__( self.virtual_tokens = virtual_tokens self.tokens_to_generate = tokens_to_generate self.truncation_method = truncation_method + if special_tokens is None: + self.special_tokens = { + "system_turn_start": "", + "turn_start": "", + "label_start": "", + "end_of_turn": "\n", + "end_of_name": "\n", + } + else: + self.special_tokens = special_tokens if hf_dataset: self.indexed_dataset = load_dataset( diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py index df84b02cc7b4..e0d62d74cf60 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py @@ -33,7 +33,6 @@ from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.megatron.utils import get_iterator_k_split from nemo.collections.nlp.modules.common.text_generation_utils import generate, get_computeprob_response - from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin from nemo.collections.nlp.parts.utils_funcs import get_last_rank from nemo.utils import AppState, logging @@ -296,9 +295,11 @@ def _build_dataset(self, data_cfg, is_train=True): truncation_method=data_cfg.get( 'truncation_method', 'right' ), # used to choose truncation method. Options: ['random', 'left', 'right'] + special_tokens=self.cfg.data.get( + 'chat_prompt_tokens', None + ), # special tokens for the chat prompts, a dictionary of {token_type: token}. Default: {'system_turn_start': '', 'turn_start': '', 'label_start': '', 'end_of_turn': '\n', "end_of_name": "\n"} ) datasets.append(dataset) - if is_train: dataset = BlendableDataset( datasets=datasets, weights=data_cfg.concat_sampling_probabilities, size=num_train_samples_after_blend diff --git a/scripts/nlp_language_modeling/sft/data_clean.py b/scripts/nlp_language_modeling/sft/data_clean.py index 8c67aa2e3bcd..362f7edeeb3b 100644 --- a/scripts/nlp_language_modeling/sft/data_clean.py +++ b/scripts/nlp_language_modeling/sft/data_clean.py @@ -41,7 +41,7 @@ def data_clean( ) if library == 'huggingface': tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) - d = GPTSFTChatDataset(dataset_file, tokenizer, seq_len, 1) + d = GPTSFTChatDataset(dataset_file, tokenizer, seq_len, 1, hf_dataset=True) total_records = len(d) removed_ids = set() for i in range(total_records): diff --git a/scripts/nlp_language_modeling/sft/preprocessing.py b/scripts/nlp_language_modeling/sft/preprocessing.py index 7a08e055543d..175187a279bc 100644 --- a/scripts/nlp_language_modeling/sft/preprocessing.py +++ b/scripts/nlp_language_modeling/sft/preprocessing.py @@ -80,11 +80,12 @@ def parse_conversations(tree_obj): raise ValueError(f'unknown role {prompt_obj["role"]}') turn = {'value': prompt_obj['text'], 'from': role} if 'labels' in prompt_obj: - turn['human_labels'] = prompt_obj['labels'] - for key in turn['human_labels']: - value_set = label_values.get(key, set()) - value_set.add(turn['human_labels'][key]['value']) - label_values[key] = value_set + # remove human labels + # turn['human_labels'] = prompt_obj['labels'] + # for key in turn['human_labels']: + # value_set = label_values.get(key, set()) + # value_set.add(turn['human_labels'][key]['value']) + # label_values[key] = value_set turn['label'] = encode_labels(prompt_obj['labels']) if 'lang' in prompt_obj: turn['lang'] = prompt_obj['lang'].split('-')[0] diff --git a/tests/collections/nlp/test_chat_sft_dataset.py b/tests/collections/nlp/test_chat_sft_dataset.py index 36d00e3108d7..f7bcecaa3c28 100644 --- a/tests/collections/nlp/test_chat_sft_dataset.py +++ b/tests/collections/nlp/test_chat_sft_dataset.py @@ -16,13 +16,18 @@ import json import os import random +from functools import partial import pytest -from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset import GPTSFTChatDataset +from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset import ( + GPTSFTChatDataset, + get_prompt_template_example, +) from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer TOKENIZER_FILE_43B = '/home/TestData/nlp/megatron_sft/tokenizer.model' +TOKENIZER_FILE_Llama2 = '/home/TestData/nlp/megatron_sft/llama2_tokenizer.model' MERGE_FILE = '/home/TestData/nlp/megatron_sft/merges.txt' VOCAB_FILE = '/home/TestData/nlp/megatron_sft/vocab.json' @@ -54,7 +59,7 @@ def create_data_points(mask_user, turn_num, records, temp_file, t2v, label=True) with open(temp_file, 'w', encoding='utf-8') as f: for r in range(records): record = {} - record['system'] = 'a chat\n\n' + record['system'] = 'a chat' record['type'] = 'TEXT_TO_VALUE' if t2v else 'VALUE_TO_TEXT' record['mask'] = 'User' if mask_user else 'Assistant' turns = [] @@ -74,244 +79,377 @@ def create_data_points(mask_user, turn_num, records, temp_file, t2v, label=True) class TestGPTSFTChatDataset: @classmethod def setup_class(cls): - pass + cls.special_tokens = { + "system_turn_start": "", + "turn_start": "", + "label_start": "", + "end_of_turn": "\n", + "end_of_name": "\n", + } + cls.suffix = cls.special_tokens['end_of_turn'] + cls.special_tokens['turn_start'] + cls.label_suffix = cls.special_tokens['end_of_name'] + cls.special_tokens['turn_start'] - @pytest.mark.unit - def test_43B_tokenizer_mask_user(self): + def _mask_user_test(self, tokenizer, ids_to_text): random.seed(5) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: data_points = create_data_points(True, turn_num, records, temp_file, t2v=False) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, + ) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(1, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['value'] + self.suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_43B_tokenizer_mask_assistant(self): + def _mask_assistant_test(self, tokenizer, ids_to_text): random.seed(3) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: data_points = create_data_points(False, turn_num, records, temp_file, t2v=False) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, + ) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(2, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['value'] + self.suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_43B_tokenizer_mask_user_t2v(self): + def _mask_user_t2v_test(self, tokenizer, ids_to_text): random.seed(5) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: data_points = create_data_points(True, turn_num, records, temp_file, t2v=True) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, + ) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(1, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['label'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['label'] + self.label_suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_43B_tokenizer_mask_assistant_t2v(self): + def _mask_assistant_t2v_test(self, tokenizer, ids_to_text): random.seed(5) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: data_points = create_data_points(False, turn_num, records, temp_file, t2v=True) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, + ) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(0, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['label'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['label'] + self.label_suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_mpt_tokenizer_mask_user(self): + def _mask_user_nolabel_test(self, tokenizer, ids_to_text): random.seed(5) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: - data_points = create_data_points(True, turn_num, records, temp_file, t2v=False) - tokenizer = get_nmt_tokenizer( - library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True - ) - tokenizer.add_special_tokens( - {'additional_special_tokens': ['', '', '']} + data_points = create_data_points(True, turn_num, records, temp_file, t2v=False, label=False) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, ) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = ids_to_text(tokenizer, input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(1, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['value'] + self.suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_mpt_tokenizer_mask_assistant(self): + def _mask_assistant_nolabel_test(self, tokenizer, ids_to_text): random.seed(3) temp_file = '/tmp/test_file.jsonl' turn_num = 5 records = 5 try: - data_points = create_data_points(False, turn_num, records, temp_file, t2v=False) - tokenizer = get_nmt_tokenizer( - library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True - ) - tokenizer.add_special_tokens( - {'additional_special_tokens': ['', '', '']} + data_points = create_data_points(False, turn_num, records, temp_file, t2v=False, label=False) + d = GPTSFTChatDataset( + temp_file, + tokenizer, + 4096, + 1, + index_mapping_dir='/tmp/', + hf_dataset=True, + special_tokens=self.special_tokens, ) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) for i in range(len(d)): result = d[i] input_ids = result['input_ids'] mask = result['mask'] - text = ids_to_text(tokenizer, input_ids[mask].tolist()) + text = ids_to_text(input_ids[mask].tolist()) expected_text = '' for j in range(2, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' + expected_text += data_points[i]['conversations'][j]['value'] + self.suffix assert text == expected_text finally: os.remove(temp_file) - @pytest.mark.unit - def test_mpt_tokenizer_mask_user_t2v(self): + def _test_example_prompt(self, tokenizer): random.seed(5) - temp_file = '/tmp/test_file.jsonl' - turn_num = 5 - records = 5 - try: - data_points = create_data_points(True, turn_num, records, temp_file, t2v=True) - tokenizer = get_nmt_tokenizer( - library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + conv = get_prompt_template_example(self.special_tokens) + expected = ( + self.special_tokens['system_turn_start'] + + 'System' + + self.special_tokens['end_of_name'] + + '{system message}' + + self.special_tokens['end_of_turn'] + ) + for turn in range(2): + expected += ( + self.special_tokens['turn_start'] + + 'User' + + self.special_tokens['end_of_name'] + + f'{{turn {turn + 1} user message}}' + + self.special_tokens['end_of_turn'] ) - tokenizer.add_special_tokens( - {'additional_special_tokens': ['', '', '']} + expected += self.special_tokens['turn_start'] + 'Assistant' + self.special_tokens['end_of_name'] + expected += ( + self.special_tokens['label_start'] + + f'{{turn {turn + 1} assistant label}}' + + self.special_tokens['end_of_name'] ) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) - for i in range(len(d)): - result = d[i] - input_ids = result['input_ids'] - mask = result['mask'] - text = ids_to_text(tokenizer, input_ids[mask].tolist()) - expected_text = '' - for j in range(1, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['label'] + '\n' + '' - assert text == expected_text - finally: - os.remove(temp_file) + expected += f'{{turn {turn + 1} assistant message}}' + self.special_tokens['end_of_turn'] + expected += self.special_tokens['turn_start'] + assert conv == expected @pytest.mark.unit - def test_mpt_tokenizer_mask_assistant_t2v(self): - random.seed(5) - temp_file = '/tmp/test_file.jsonl' - turn_num = 5 - records = 5 - try: - data_points = create_data_points(False, turn_num, records, temp_file, t2v=True) - tokenizer = get_nmt_tokenizer( - library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True - ) - tokenizer.add_special_tokens( - {'additional_special_tokens': ['', '', '']} - ) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) - for i in range(len(d)): - result = d[i] - input_ids = result['input_ids'] - mask = result['mask'] - text = ids_to_text(tokenizer, input_ids[mask].tolist()) - expected_text = '' - for j in range(0, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['label'] + '\n' + '' - assert text == expected_text - finally: - os.remove(temp_file) + def test_43B_example_prompt(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._test_example_prompt(tokenizer) + + @pytest.mark.unit + def test_43B_tokenizer_mask_user(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_user_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_43B_tokenizer_mask_assistant(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_assistant_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_43B_tokenizer_mask_user_t2v(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_user_t2v_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_43B_tokenizer_mask_assistant_t2v(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_assistant_t2v_test(tokenizer, tokenizer.ids_to_text) @pytest.mark.unit def test_43B_tokenizer_mask_user_nolabel(self): - random.seed(5) - temp_file = '/tmp/test_file.jsonl' - turn_num = 5 - records = 5 - try: - data_points = create_data_points(True, turn_num, records, temp_file, t2v=False, label=False) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) - for i in range(len(d)): - result = d[i] - input_ids = result['input_ids'] - mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) - expected_text = '' - for j in range(1, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' - assert text == expected_text - finally: - os.remove(temp_file) + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_user_nolabel_test(tokenizer, tokenizer.ids_to_text) @pytest.mark.unit def test_43B_tokenizer_mask_assistant_nolabel(self): - random.seed(3) - temp_file = '/tmp/test_file.jsonl' - turn_num = 5 - records = 5 - try: - data_points = create_data_points(False, turn_num, records, temp_file, t2v=False, label=False) - tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) - d = GPTSFTChatDataset(temp_file, tokenizer, 4096, 1, index_mapping_dir='/tmp/', hf_dataset=True) - for i in range(len(d)): - result = d[i] - input_ids = result['input_ids'] - mask = result['mask'] - text = tokenizer.ids_to_text(input_ids[mask].tolist()) - expected_text = '' - for j in range(2, turn_num, 2): - expected_text += data_points[i]['conversations'][j]['value'] + '\n' + '' - assert text == expected_text - finally: - os.remove(temp_file) + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_43B) + self._mask_assistant_nolabel_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_user(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_user_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_assistant(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_assistant_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_user_t2v(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_user_t2v_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_assistant_t2v(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_assistant_t2v_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_user_nolabel(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_user_nolabel_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_mpt_tokenizer_mask_assistant_nolabel(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + tokenizer.add_special_tokens({'additional_special_tokens': ['', '', '']}) + self._mask_assistant_nolabel_test(tokenizer, partial(ids_to_text, tokenizer)) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_user(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_user_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_assistant(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_assistant_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_user_t2v(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_user_t2v_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_assistant_t2v(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_assistant_t2v_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_user_nolabel(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_user_nolabel_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_llama2_tokenizer_mask_assistant_nolabel(self): + tokenizer = get_nmt_tokenizer(library='sentencepiece', tokenizer_model=TOKENIZER_FILE_Llama2) + self._mask_assistant_nolabel_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_user(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_user_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_assistant(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_assistant_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_user_t2v(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_user_t2v_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_assistant_t2v(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_assistant_t2v_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_user_nolabel(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_user_nolabel_test(tokenizer, tokenizer.ids_to_text) + + @pytest.mark.unit + def test_normal_mpt_tokenizer_mask_assistant_nolabel(self): + tokenizer = get_nmt_tokenizer( + library='huggingface', model_name='gpt2', merges_file=MERGE_FILE, vocab_file=VOCAB_FILE, use_fast=True + ) + self._mask_assistant_nolabel_test(tokenizer, tokenizer.ids_to_text) + + +class TestDifferentGPTSFTChatDataset(TestGPTSFTChatDataset): + @classmethod + def setup_class(cls): + cls.special_tokens = { + "system_turn_start": "<|im_start|>", + "turn_start": "<|im_start|>", + "label_start": "<|label|>", + "end_of_turn": "<|im_end|>\n", + "end_of_name": "\n", + } + cls.suffix = cls.special_tokens['end_of_turn'] + cls.special_tokens['turn_start'] + cls.label_suffix = cls.special_tokens['end_of_name'] + cls.special_tokens['turn_start'] From 188f0a1626701fd02cadcbe287eff825bb1e5b1c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:25:22 -0700 Subject: [PATCH 316/512] Fix vad & speech command tutorial - onnx (#7671) (#7672) * fix vad onnx * fix mbn onnx --------- Signed-off-by: fayejf Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> --- tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb | 4 ++-- tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index d11640844e60..f52e4777f5d3 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -559,7 +559,7 @@ "CHANNELS = 1\n", "RATE = 16000\n", "audio, sample_rate = librosa.load(wave_file, sr=RATE)\n", - "dur = librosa.get_duration(audio)\n", + "dur = librosa.get_duration(y=audio, sr=sample_rate)\n", "print(dur)" ] }, @@ -798,7 +798,7 @@ "source": [ "import onnxruntime\n", "vad_model.export('vad.onnx')\n", - "ort_session = onnxruntime.InferenceSession('vad.onnx')\n", + "ort_session = onnxruntime.InferenceSession('vad.onnx', providers=['CPUExecutionProvider'])\n", "\n", "def to_numpy(tensor):\n", " return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 21f2bc58a099..9ca2232142ad 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -695,7 +695,7 @@ "source": [ "import onnxruntime\n", "mbn_model.export('mbn.onnx')\n", - "ort_session = onnxruntime.InferenceSession('mbn.onnx')\n", + "ort_session = onnxruntime.InferenceSession('mbn.onnx', providers=['CPUExecutionProvider'])\n", "\n", "def to_numpy(tensor):\n", " return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n", From 33d04b235acb78e6d095faf4eebe7a8dace5afe7 Mon Sep 17 00:00:00 2001 From: Igor Gitman Date: Tue, 10 Oct 2023 17:18:44 -0700 Subject: [PATCH 317/512] Fix in the confidence ensemble test (#7682) * Fix in the confidence ensemble test Signed-off-by: Igor Gitman * Correct parameter names Signed-off-by: Igor Gitman --------- Signed-off-by: Igor Gitman --- scripts/confidence_ensembles/build_ensemble.py | 2 +- scripts/confidence_ensembles/test_confidence_ensembles.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index e40997c4aca2..4c05e2e4ff3f 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -215,7 +215,7 @@ class BuildEnsembleConfig: preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) ) temperature: float = 1.0 diff --git a/scripts/confidence_ensembles/test_confidence_ensembles.py b/scripts/confidence_ensembles/test_confidence_ensembles.py index fa537529ab6b..42a5a1946e8c 100644 --- a/scripts/confidence_ensembles/test_confidence_ensembles.py +++ b/scripts/confidence_ensembles/test_confidence_ensembles.py @@ -33,7 +33,10 @@ [ "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_ctc_large", "ensemble.0.model=stt_es_conformer_transducer_large ensemble.1.model=stt_it_conformer_transducer_large", - "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc ensemble.1.model=stt_it_fastconformer_hybrid_large_pc", + ( + "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " + "confidence.method_cfg.alpha=0.33 confidence.method_cfg.entropy_norm=exp " + ), ( "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc " "ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " From 40f8256f51fdf2d295b09586c0d55885aeef0742 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:27:41 -0600 Subject: [PATCH 318/512] PEFT eval fix (#7626) (#7638) * fix issue where peft weights are not loaded for distributed checkpoints * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- nemo/collections/nlp/parts/nlp_overrides.py | 174 ++++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index f5e3c082cc89..7b8953a53461 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -814,6 +814,180 @@ def dummy(): return instance +class PEFTSaveRestoreConnector(NLPSaveRestoreConnector): + """ + PEFT models require the ability to load/save a small subset of the full model (once PEFT params have been infused into the base model.) + The PEFTSaveRestoreConnector is used to allow loading and saving only the PEFT params while not saving the entire model. + + Args: + peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params) + peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training. + peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder + If both are provided the peft_model_ckpt_path takes precedence. + If neither are provided, PEFT params are initialized at random (not loaded from any external source). + """ + + def __init__( + self, + peft_model_nemo_path: Optional[str] = None, + peft_model_ckpt_path: Optional[str] = None, + peft_model_ckpt_name: Optional[str] = "model_weights.ckpt", + ) -> None: + super().__init__() + self.peft_model_ckpt_name = peft_model_ckpt_name + if peft_model_ckpt_path: + # First we will try to load a adapter ckpt path + # this is given priority over loading from nemo path to make resumption of training possible + ckpt_name = os.path.basename(peft_model_ckpt_path) + if not ckpt_name.strip() == '': + # update the weights file name inside the ckpt path rank folders + self.peft_model_ckpt_name = ckpt_name + self.peft_model_ckpt_dir = os.path.dirname(peft_model_ckpt_path) + assert os.path.isdir(self.peft_model_ckpt_dir) + self.peft_model_nemo_path = None + elif peft_model_nemo_path: + # If resumption is not possible we will try to load a adapter nemo path + self.peft_model_nemo_path = peft_model_nemo_path + assert os.path.exists(self.peft_model_nemo_path) + self.peft_model_ckpt_dir = None + else: + # We are not resuming training from a nemo file or a ckpt + # We are training the adapter from randomly initialization + self.peft_model_nemo_path = None + self.peft_model_ckpt_dir = None + + def _load_state_dict_from_disk(self, model_weights, map_location=None): + """ + Infuse the state_dict of the base model with PEFT params from either a peft_model_nemo_path or peft_model_ckpt_path + """ + # first load based model weights + base_model_state_dict = super()._load_state_dict_from_disk(model_weights, map_location) + + # if distributed checkpointing, load peft weights in restore_from + if base_model_state_dict: + # Next, We want to load PEFT model's weights + if self.peft_model_nemo_path: + # if the PEFT weights are provided in a .nemo file + # we need to untar the .nemo if its still tarred + with tempfile.TemporaryDirectory() as tmpdir: + self._unpack_nemo_file(self.peft_model_nemo_path, tmpdir) + model_weights_path = self._inject_model_parallel_rank_for_ckpt(tmpdir, self.peft_model_ckpt_name) + peft_state_dict = torch.load(model_weights_path, map_location) + elif self.peft_model_ckpt_dir: + # if the PEFT weights are provided in a ckpt path file + # we don't need to untar + model_weights_path = self._inject_model_parallel_rank_for_ckpt( + self.peft_model_ckpt_dir, self.peft_model_ckpt_name + ) + peft_state_dict = torch.load(model_weights_path, map_location)['state_dict'] + else: + peft_state_dict = {} + base_model_state_dict.update(peft_state_dict) # add the PEFT state_dict into the base model's state_dict + return base_model_state_dict + + def restore_from( + self, + calling_cls, + restore_path: str, + override_config_path: Optional[Union[OmegaConf, str]] = None, + map_location: Optional[torch.device] = None, + strict: bool = True, + return_config: bool = False, + trainer: Trainer = None, + ): + """ + Extends the restore_from method of the `NLPSaveRestoreConnector` so that PEFT params are inserted into the state_dict which is required when training a PEFT model from scratch. + """ + # Get path where the command is executed - the artifacts will be "retrieved" there + # (original .nemo behavior) + loaded_params = super().load_config_and_state_dict( + calling_cls, restore_path, override_config_path, map_location, strict, return_config, trainer, + ) + if not isinstance(loaded_params, tuple) or return_config is True: + return loaded_params + conf, instance, state_dict = loaded_params + + # if we're using dist checkpointing then state_dict will be None + if state_dict is None: + # dist checkpointing needs torch.distributed to load the checkpoint + if parallel_state.is_unitialized(): + + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + with tempfile.TemporaryDirectory() as tmpdir: + # Check if self.model_extracted_dir is set, and is a valid path + if self.model_extracted_dir is not None and os.path.isdir(self.model_extracted_dir): + # Log that NeMo will use the provided `model_extracted_dir` + logging.info( + f"Restoration will occur within pre-extracted directory : " f"`{self.model_extracted_dir}`." + ) + + # Override `tmpdir` above with the pre-extracted `model_extracted_dir` + tmpdir = self.model_extracted_dir + + else: + # Extract the nemo file into the temporary directory + self._unpack_nemo_file( + path2file=restore_path, out_folder=tmpdir, extract_config_only=return_config is True + ) + checkpoint = {} + sharded_state_dict = instance.sharded_state_dict() + + # if distributed checkpointing, load peft weights here instead of in _load_state_dict_from_disk + if self.peft_model_nemo_path: + # if the PEFT weights are provided in a .nemo file + # we need to untar the .nemo if its still tarred + with tempfile.TemporaryDirectory() as tmpdir2: + self._unpack_nemo_file(self.peft_model_nemo_path, tmpdir2) + model_weights_path = self._inject_model_parallel_rank_for_ckpt( + tmpdir2, self.peft_model_ckpt_name + ) + peft_state_dict = torch.load(model_weights_path, map_location) + elif self.peft_model_ckpt_dir: + # if the PEFT weights are provided in a ckpt path file + # we don't need to untar + model_weights_path = self._inject_model_parallel_rank_for_ckpt( + self.peft_model_ckpt_dir, self.peft_model_ckpt_name + ) + peft_state_dict = torch.load(model_weights_path, map_location)['state_dict'] + else: + peft_state_dict = instance.get_peft_state_dict() + + if conf.peft.peft_scheme != "ptuning": + for k in peft_state_dict.keys(): + sharded_state_dict.pop(k) + + checkpoint['state_dict'] = sharded_state_dict + # remove model weights extension + tmp_model_weights_ckpt = os.path.join(tmpdir, self.model_weights_ckpt) + tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] + assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' + checkpoint = dist_checkpointing.load( + sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir + ) + checkpoint['state_dict'].update(peft_state_dict) + instance.on_load_checkpoint(checkpoint) + if hasattr(instance, 'setup_transformer_engine_tp_groups'): + instance.setup_transformer_engine_tp_groups() + + else: + if ( + self.peft_model_nemo_path is None and self.peft_model_ckpt_dir is None + ): # we have this check only for training PEFT from scratch + peft_state_dict = instance.get_peft_state_dict() + state_dict.update(peft_state_dict) + state_dict = self.modify_state_dict(conf, state_dict) + self.load_instance_with_state_dict(instance, state_dict, strict) + + logging.info(f'Model {instance.__class__.__name__} was successfully restored from {restore_path}.') + return instance + + class PipelineMixedPrecisionPlugin(MixedPrecisionPlugin): """ Overrides PTL autocasting to not wrap training/val/test_step. We do this because we have the megatron-core fwd/bwd functions in training_step. From 79c3703b180eab2f10d11406072cf14812844536 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:28:15 -0600 Subject: [PATCH 319/512] propagate mp config (#7637) (#7639) Signed-off-by: eharper Co-authored-by: Eric Harper --- .../language_modeling/megatron_gpt_continue_training.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_gpt_continue_training.py b/examples/nlp/language_modeling/megatron_gpt_continue_training.py index 31d7145e6bbb..8f0661a51406 100755 --- a/examples/nlp/language_modeling/megatron_gpt_continue_training.py +++ b/examples/nlp/language_modeling/megatron_gpt_continue_training.py @@ -61,9 +61,10 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False): gpt_cfg.max_position_embeddings = cfg.model.max_position_embeddings gpt_cfg.seq_len_interpolation_factor = cfg.model.seq_len_interpolation_factor gpt_cfg.use_flash_attention = cfg.model.use_flash_attention - assert ( - gpt_cfg.encoder_seq_length == gpt_cfg.max_position_embeddings * gpt_cfg.seq_len_interpolation_factor - ), 'seq_length should be equal to max_position_embedding * seq_len_interpolation_factor' + gpt_cfg.tensor_model_parallel_size = cfg.model.get('tensor_model_parallel_size', 1) + gpt_cfg.pipeline_model_parallel_size = cfg.model.get('pipeline_model_parallel_size', 1) + gpt_cfg.pipeline_model_parallel_split_rank = cfg.model.get('pipeline_model_parallel_split_rank', 0) + # This is needed when modifying a hparam file directly to load `.ckpt` files. # This is not needed to modify the cfg in `.nemo` files. if add_cfg_to_tree: From aba4a006a332c887a5448fdffaa4b34408559817 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:31:18 -0600 Subject: [PATCH 320/512] Add find_unused_parameters_true for text_classiftn and punctuation_capitalization (#7649) (#7657) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- .../nlp/text_classification/text_classification_with_bert.py | 2 +- ...punctuation_capitalization_lexical_audio_train_evaluate.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/nlp/text_classification/text_classification_with_bert.py b/examples/nlp/text_classification/text_classification_with_bert.py index db3d263cb1ab..2deb3b34c444 100644 --- a/examples/nlp/text_classification/text_classification_with_bert.py +++ b/examples/nlp/text_classification/text_classification_with_bert.py @@ -109,7 +109,7 @@ def main(cfg: DictConfig) -> None: logging.info(f'\nConfig Params:\n{OmegaConf.to_yaml(cfg)}') try: - strategy = NLPDDPStrategy() + strategy = NLPDDPStrategy(find_unused_parameters=True) except (ImportError, ModuleNotFoundError): strategy = None diff --git a/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py b/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py index 85f46bf12bac..149a9a4515e2 100644 --- a/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py +++ b/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py @@ -103,6 +103,10 @@ @hydra_runner(config_path="conf", config_name="punctuation_capitalization_lexical_audio_config") def main(cfg: DictConfig) -> None: + # PTL 2.0 has find_unused_parameters as False by default, so its required to set it to True + # when there are unused parameters like here + if cfg.trainer.strategy == 'ddp': + cfg.trainer.strategy = "ddp_find_unused_parameters_true" torch.manual_seed(42) cfg = OmegaConf.merge(OmegaConf.structured(PunctuationCapitalizationLexicalAudioConfig()), cfg) trainer = pl.Trainer(**cfg.trainer) From 503301b2244573dd56e3fe2acfa5223d9b0454fa Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:32:03 -0600 Subject: [PATCH 321/512] Hotfix (#7501) (#7568) Signed-off-by: Jan Baczek Co-authored-by: jbaczek <45043825+jbaczek@users.noreply.github.com> --- examples/nlp/language_modeling/conf/megatron_gpt_config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index 98711d9a41cd..fdad93d14adf 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -1,4 +1,5 @@ defaults: + - _self_ - optional tp_overlap@model.ub_tp_comm_overlap_cfg: name: megatron_gpt From 98e6ffe9d3d2b949c373b1169a42e5a2e0df4d0c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:32:58 -0600 Subject: [PATCH 322/512] Avoid duplicated checkpoint save (#7555) (#7566) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mikołaj Błaż Co-authored-by: mikolajblaz --- nemo/collections/nlp/parts/nlp_overrides.py | 35 +++++++++++++-------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 7b8953a53461..60b9bbacfbdb 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -291,6 +291,10 @@ def save_checkpoint( checkpoint_dir = ckpt_to_dir(filepath) fs = get_filesystem(checkpoint_dir) + if fs.isdir(checkpoint_dir) and dist_checkpointing.check_is_distributed_checkpoint(checkpoint_dir): + logging.info(f'Distributed checkpoint at path {checkpoint_dir} already exists, skipping saving') + return + if is_global_rank_zero(): fs.makedirs(checkpoint_dir, exist_ok=True) @@ -477,19 +481,24 @@ def save_to(self, model, save_path: str): # model weights is a directory dist_ckpt_dir = ckpt_to_dir(os.path.join(dir_name, self.model_weights_ckpt)) fs = get_filesystem(dist_ckpt_dir) - if is_global_rank_zero(): - fs.makedirs(dist_ckpt_dir, exist_ok=True) - sharded_state_dict = model.sharded_state_dict() - # dist checkpoint needs torch.distributed to save the checkpoint - if parallel_state.is_unitialized(): - - def dummy(): - return - - if model.trainer.strategy.launcher is not None: - model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) - model.trainer.strategy.setup_environment() - dist_checkpointing.save(sharded_state_dict=sharded_state_dict, checkpoint_dir=dist_ckpt_dir) + + if fs.isdir(dist_ckpt_dir) and dist_checkpointing.check_is_distributed_checkpoint(dist_ckpt_dir): + logging.info(f'Distributed checkpoint at path {dist_ckpt_dir} already exists, skipping saving') + else: + if is_global_rank_zero(): + fs.makedirs(dist_ckpt_dir, exist_ok=True) + + sharded_state_dict = model.sharded_state_dict() + # dist checkpoint needs torch.distributed to save the checkpoint + if parallel_state.is_unitialized(): + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + dist_checkpointing.save(sharded_state_dict=sharded_state_dict, checkpoint_dir=dist_ckpt_dir) else: From b6fecc5fc540e01989e193db616f4b8bb0fd3bc6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:34:25 -0600 Subject: [PATCH 323/512] Cache FP8 weight and transpose only at the first micro-batch in each validation and test routine (#7470) (#7483) * Cache weight and transpose only in the first batch in all training, val, and test runs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../modules/common/megatron/transformer.py | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 4b018a40e94b..003945e057d6 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1033,7 +1033,10 @@ def __init__( reduce_amax=reduce_amax, ) - self.is_first_microbatch = True + self.is_first_train_microbatch = ( + True # Is the current micro-batch the first micro-batch in a global-batch in training + ) + self.is_prev_microbatch_training = True # Is the previous micro-batch in training mode self.microbatch_count = 0 # transformer engine forward needs to know if it is working on the first microbatch self.checkpoint_core_attention = ( activations_checkpoint_granularity == 'selective' @@ -1247,6 +1250,12 @@ def custom_forward(*inputs): attention_mask = inputs[1] encoder_output = inputs[2] enc_dec_attn_mask = inputs[3] + # Cache FP8 weight and transpose at (1) the first micro-batch in each global-batch + # in training, (2) the first micro-batch in each validation and test routine. + # The caching happens in TransformerEngine when passing `is_first_microbatch=True`. + is_first_microbatch = (self.is_first_train_microbatch and self.training) or ( + self.is_prev_microbatch_training and not self.training + ) for index in range(start, end): layer = self._get_layer(index) hidden_states = layer( @@ -1255,7 +1264,7 @@ def custom_forward(*inputs): encoder_output=encoder_output, enc_dec_attn_mask=enc_dec_attn_mask, inference_params=None, - is_first_microbatch=self.is_first_microbatch, + is_first_microbatch=is_first_microbatch, checkpoint_core_attention=False, ) @@ -1539,6 +1548,12 @@ def forward( else: checkpoint_core_attention = False + # Cache FP8 weight and transpose at (1) the first micro-batch in each global-batch + # in training, (2) the first micro-batch in each validation and test routine. + # The caching happens in TransformerEngine when passing `is_first_microbatch=True`. + is_first_microbatch = (self.is_first_train_microbatch and self.training) or ( + self.is_prev_microbatch_training and not self.training + ) if self.transformer_engine: hidden_states = layer( hidden_states, @@ -1546,7 +1561,7 @@ def forward( encoder_output=encoder_output, enc_dec_attn_mask=enc_dec_attn_mask, inference_params=self.inference_params, - is_first_microbatch=self.is_first_microbatch, + is_first_microbatch=is_first_microbatch, checkpoint_core_attention=checkpoint_core_attention, ) else: @@ -1580,9 +1595,10 @@ def forward( self.microbatch_count += 1 if self.microbatch_count % num_micro_batches == 0: self.microbatch_count = 0 - self.is_first_microbatch = True + self.is_first_train_microbatch = True else: - self.is_first_microbatch = False + self.is_first_train_microbatch = False + self.is_prev_microbatch_training = self.training output = hidden_states From 292d232c9df07ef28c26f5fab95d770e13de1d1f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:35:08 -0600 Subject: [PATCH 324/512] Add an option to disable manual GC in validation (#7467) (#7476) Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym --- .../nlp/models/language_modeling/megatron_base_model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 8435ce017254..bb74106eb8b2 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -200,6 +200,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): } self.gc_interval = cfg.get('gc_interval', 0) + # Do manual garbage collection during validation routine when gc_interval > 0 + self.gc_in_validation = bool(int(os.getenv("NEMO_MANUAL_GC_IN_VALIDATION", 1))) assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0." # If gc_interval > 0, memory garbage collection is manually controlled. # The automatic garbage collector sould be disabled before training starts. @@ -284,12 +286,12 @@ def on_train_start(self) -> None: def on_validation_start(self) -> None: super().on_validation_start() - if self.gc_interval > 0: + if self.gc_interval > 0 and self.gc_in_validation: gc.collect() def on_validation_end(self) -> None: super().on_validation_end() - if self.gc_interval > 0: + if self.gc_interval > 0 and self.gc_in_validation: gc.collect() def _build_vocab(self): @@ -447,7 +449,7 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None: super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) - if self.gc_interval > 0: + if self.gc_interval > 0 and self.gc_in_validation: if self.validation_global_step % self.gc_interval == 0: gc.collect() self.validation_global_step += 1 From 9c48ce1d28080182fd1c39ab22baa7213fe8d2ba Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:55:47 -0700 Subject: [PATCH 325/512] Remove PUBLICATIONS.md, point to github.io NeMo page instead (#7694) (#7695) * update publications section to point to blog website page * add hyphen * use double backquotes for code formatting --------- Signed-off-by: Elena Rastorgueva Signed-off-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> --- PUBLICATIONS.md | 214 ------------------------------------------------ README.rst | 5 +- 2 files changed, 4 insertions(+), 215 deletions(-) delete mode 100644 PUBLICATIONS.md diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md deleted file mode 100644 index cd120efc7e7b..000000000000 --- a/PUBLICATIONS.md +++ /dev/null @@ -1,214 +0,0 @@ -# Publications - -Here, we list a collection of research articles that utilize the NeMo Toolkit. If you would like to include your paper in this collection, please submit a PR updating this document. - -------- - -# Automatic Speech Recognition (ASR) - -
- 2023 - - * [Confidence-based Ensembles of End-to-End Speech Recognition Models](https://arxiv.org/abs/2306.15824) - * [Fast Entropy-Based Methods of Word-Level Confidence Estimation for End-to-End Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10022960) - * [Damage Control During Domain Adaptation for Transducer Based Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10023219) - -
- -
- 2022 - - * [Multi-blank Transducers for Speech Recognition](https://arxiv.org/abs/2211.03541) - -
- -
- 2021 - - * [Citrinet: Closing the Gap between Non-Autoregressive and Autoregressive End-to-End Models for Automatic Speech Recognition](https://arxiv.org/abs/2104.01721) - * [SPGISpeech: 5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition](https://www.isca-speech.org/archive/interspeech_2021/oneill21_interspeech.html) - * [CarneliNet: Neural Mixture Model for Automatic Speech Recognition](https://arxiv.org/abs/2107.10708) - * [CTC Variations Through New WFST Topologies](https://arxiv.org/abs/2110.03098) - * [A Toolbox for Construction and Analysis of Speech Datasets](https://openreview.net/pdf?id=oJ0oHQtAld) - -
- - -
- 2020 - - * [Cross-Language Transfer Learning, Continuous Learning, and Domain Adaptation for End-to-End Automatic Speech Recognition](https://ieeexplore.ieee.org/document/9428334) - * [Correction of Automatic Speech Recognition with Transformer Sequence-To-Sequence Model](https://ieeexplore.ieee.org/abstract/document/9053051) - * [Improving Noise Robustness of an End-to-End Neural Model for Automatic Speech Recognition](https://arxiv.org/abs/2010.12715) - -
- - -
- 2019 - - * [Jasper: An End-to-End Convolutional Neural Acoustic Model](https://arxiv.org/abs/1904.03288) - * [QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions](https://arxiv.org/abs/1910.10261) - - -
- - --------- - - -## Speaker Recognition (SpkR) - -
- 2022 - - * [TitaNet: Neural Model for Speaker Representation with 1D Depth-Wise Separable Convolutions and Global Context](https://ieeexplore.ieee.org/abstract/document/9746806) - -
- - -
- 2020 - - * [SpeakerNet: 1D Depth-wise Separable Convolutional Network for Text-Independent Speaker Recognition and Verification]( https://arxiv.org/pdf/2010.12653.pdf) - -
- --------- - -## Speech Classification - -
- 2022 - - * [AmberNet: A Compact End-to-End Model for Spoken Language Identification](https://arxiv.org/abs/2210.15781) - * [Accidental Learners: Spoken Language Identification in Multilingual Self-Supervised Models](https://arxiv.org/abs/2211.05103) - - -
- -
- 2021 - - * [MarbleNet: Deep 1D Time-Channel Separable Convolutional Neural Network for Voice Activity Detection](https://ieeexplore.ieee.org/abstract/document/9414470/) - -
- - -
- 2020 - - * [MatchboxNet - 1D Time-Channel Separable Convolutional Neural Network Architecture for Speech Commands Recognition](http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=337&id=993) - -
- - --------- - -## Speech Translation - -
- 2022 - - * [NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2022](https://aclanthology.org/2022.iwslt-1.18/) - -
- - --------- - -# Natural Language Processing (NLP) - -## Language Modeling - -
- 2022 - - * [Evaluating Parameter Efficient Learning for Generation](https://arxiv.org/abs/2210.13673) - * [Text Mining Drug/Chemical-Protein Interactions using an Ensemble of BERT and T5 Based Models](https://arxiv.org/abs/2111.15617) - -
- -
- 2021 - - * [BioMegatron: Larger Biomedical Domain Language Model ](https://aclanthology.org/2020.emnlp-main.379/) - -
- -## Neural Machine Translation - -
- 2022 - - * [Finding the Right Recipe for Low Resource Domain Adaptation in Neural Machine Translation](https://arxiv.org/abs/2206.01137) - -
- -
- 2021 - - * [NVIDIA NeMo Neural Machine Translatio Systems for English-German and English-Russian News and Biomedical Tasks at WMT21](https://arxiv.org/pdf/2111.08634.pdf) - -
- --------- - -## Dialogue State Tracking - -
- 2021 - - * [SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services](https://arxiv.org/abs/2105.08049) - -
- -
- 2020 - - * [A Fast and Robust BERT-based Dialogue State Tracker for Schema-Guided Dialogue Dataset](https://arxiv.org/abs/2008.12335) - -
--------- - - -# Text To Speech (TTS) - -
- 2022 - - * [Adapter-Based Extension of Multi-Speaker Text-to-Speech Model for New Speakers](https://arxiv.org/abs/2211.00585) - -
- -
- 2021 - - * [TalkNet: Fully-Convolutional Non-Autoregressive Speech Synthesis Model](https://www.isca-speech.org/archive/interspeech_2021/beliaev21_interspeech.html) - * [TalkNet 2: Non-Autoregressive Depth-Wise Separable Convolutional Model for Speech Synthesis with Explicit Pitch and Duration Prediction](https://arxiv.org/abs/2104.08189) - * [Hi-Fi Multi-Speaker English TTS Dataset](https://www.isca-speech.org/archive/pdfs/interspeech_2021/bakhturina21_interspeech.pdf) - * [Mixer-TTS: non-autoregressive, fast and compact text-to-speech model conditioned on language model embeddings](https://arxiv.org/abs/2110.03584) - -
- - --------- - -# (Inverse) Text Normalization -
- 2022 - - * [Shallow Fusion of Weighted Finite-State Transducer and Language Model for Text Normalization](https://arxiv.org/abs/2203.15917) - * [Thutmose Tagger: Single-pass neural model for Inverse Text Normalization](https://arxiv.org/abs/2208.00064) - -
- -
- 2021 - - * [NeMo Inverse Text Normalization: From Development to Production](https://www.isca-speech.org/archive/pdfs/interspeech_2021/zhang21ga_interspeech.pdf) - * [A Unified Transformer-based Framework for Duplex Text Normalization](https://arxiv.org/pdf/2108.09889.pdf ) - -
- --------- \ No newline at end of file diff --git a/README.rst b/README.rst index 15a2120d30b5..acc30a68e610 100644 --- a/README.rst +++ b/README.rst @@ -360,7 +360,10 @@ We welcome community contributions! Please refer to the `CONTRIBUTING.md `_. We welcome the addition of your own articles to this list ! +We provide an ever-growing list of `publications `_ that utilize the NeMo framework. + +If you would like to add your own article to the list, you are welcome to do so via a pull request to this repository's ``gh-pages-src`` branch. +Please refer to the instructions in the `README of that branch `_. License ------- From 762b5ca280aaf68cba3c77d5aec460f8bacf625c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 14:58:16 -0700 Subject: [PATCH 326/512] Fix multi rank finetune for ASR (#7684) (#7699) * Fix multi rank finetune for ASR * Actually add time * Actually add time --------- Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar --- examples/asr/speech_to_text_finetune.py | 29 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/examples/asr/speech_to_text_finetune.py b/examples/asr/speech_to_text_finetune.py index a5ba95b41221..667fe998161d 100644 --- a/examples/asr/speech_to_text_finetune.py +++ b/examples/asr/speech_to_text_finetune.py @@ -49,25 +49,25 @@ For documentation on fine-tuning this model, please visit: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/configs.html#fine-tuning-configurations """ - +import time import pytorch_lightning as pl from omegaconf import OmegaConf -from pytorch_lightning.utilities import rank_zero_only from nemo.collections.asr.models import ASRModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager +from nemo.utils.get_rank import is_global_rank_zero -@rank_zero_only -def get_base_model(cfg): +def get_base_model(trainer, cfg): """ Returns the base model to be fine-tuned. Currently supports two types of initializations: 1) `init_from_nemo_model`, and 2) `init_from_pretrained_model`. Args: + trainer: PyTorch Lightning Trainer cfg: config Returns: asr_model: ASRModel instance @@ -84,7 +84,24 @@ def get_base_model(cfg): elif nemo_model_path is not None: asr_model = ASRModel.restore_from(restore_path=nemo_model_path) elif pretrained_name is not None: - asr_model = ASRModel.from_pretrained(model_name=pretrained_name) + # Due to potential first time download of the model on the cluster, we need to make sure that only one + # rank downloads the model and the others wait for the download to finish. + num_ranks = trainer.num_devices * trainer.num_devices + + if num_ranks > 1 and is_global_rank_zero(): + asr_model = ASRModel.from_pretrained(model_name=pretrained_name) + else: + # Sleep on all ranks for at least 60 seconds + wait_time = int(cfg.get('exp_manager', {}).get('seconds_to_sleep', 60)) + if wait_time < 60: + wait_time = 60 + + logging.info(f"Sleeping for at least {wait_time} seconds to wait for model download to finish.") + + time.sleep(wait_time) + + # restore model from cached model dir + asr_model = ASRModel.from_pretrained(model_name=pretrained_name) return asr_model @@ -180,7 +197,7 @@ def main(cfg): "Currently for simplicity of single script for all model types, we only support `init_from_nemo_model` and `init_from_pretrained_model`" ) - asr_model = get_base_model(cfg) + asr_model = get_base_model(trainer, cfg) # Check vocabulary type and update if needed asr_model = check_vocabulary(asr_model, cfg) From 7755c173fac79b6494b686b9781d7ebac9f22054 Mon Sep 17 00:00:00 2001 From: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Date: Wed, 11 Oct 2023 16:23:41 -0700 Subject: [PATCH 327/512] Update docs: readme, getting started, ASR intro (#7679) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [TTS] Add dataset to path of logged artifacts (#7462) * [TTS] Add dataset to path of logged artifacts Signed-off-by: Ryan * [TTS] Revert axis name back to Audio Frames Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Elena Rastorgueva * move install info to INSTALLATION.md Signed-off-by: Elena Rastorgueva * tidy up links Signed-off-by: Elena Rastorgueva * Fix sft dataset truncation (#7464) * Add fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Automatic Lip Reading Recognition (ALR) - ASR/CV (Visual ASR) (#7330) * striding_conv1d_k5 and dw_striding_conv1d_k5 subsampling Signed-off-by: mburchi * transpose conv1d inputs Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: mburchi * Update subsampling.py change striding_conv1d_k5 to striding_conv1d Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * video manifest Signed-off-by: mburchi * add collection classes Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test_step_outputs Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * clean references Signed-off-by: mburchi * freeze unfreeze transcribe cv models Signed-off-by: mburchi * correct manifest get_full_path bug Signed-off-by: mburchi * update for PR Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * guard torchvision Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * _video_speech_collate_fn in cv/data/video_to_text.py Signed-off-by: mburchi * add self.out = None to asr subsampling Signed-off-by: mburchi * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv -> multimodal/speech_cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Igor Gitman Signed-off-by: Elena Rastorgueva * HF StarCoder to NeMo conversion script (#7421) * Script to convert HF StarCoder checkpoint to NeMo Signed-off-by: Jan Lasek * StarCoder conversion test Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jan Lasek * Fix test Signed-off-by: Jan Lasek * Catch up with save_to changes Signed-off-by: Jan Lasek * Don't abbreviate args for clarity Signed-off-by: Jan Lasek * Configurable precision: BF16 vs FP32 Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jan Lasek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix bug when loading dist ckpt in peft (#7452) Signed-off-by: Hongbin Liu Co-authored-by: Hongbin Liu Signed-off-by: Elena Rastorgueva * Fix adding positional embeddings in-place in transformer module (#7440) Signed-off-by: Tamerlan Tabolov Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix (#7478) Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Elena Rastorgueva * add sleep (#7498) (#7499) * add sleep * add sleep onto config instead * add comment --------- Signed-off-by: Gerald Shen Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix exp manager check for sleep (#7503) (#7504) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Elena Rastorgueva * bugfix: trainer.accelerator=auto from None. (#7492) (#7493) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Elena Rastorgueva * [doc] fix broken link (#7481) Signed-off-by: Stas Bekman Signed-off-by: Elena Rastorgueva * [TTS] Read audio as int32 to avoid flac read errors (#7477) * [TTS] Read audio as int32 to avoid flac read errors Signed-off-by: Ryan * [TTS] Add comment about read failures Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Elena Rastorgueva * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS (#7409) * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS * Train 'AISHELL-3' dataset with multi-speakers Signed-off-by: Robin Dong * Update get_data.py update copyright header Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * Update get_data.py added a disclaimer Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add new configuration file for AISHELL3 with multispeaker of fastpitch Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * dllogger - log on rank 0 only (#7513) Signed-off-by: Stas Bekman Signed-off-by: Elena Rastorgueva * Fix TTS FastPitch tutorial (#7494) (#7516) * Fix --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix get_dist() tensor dimension (#7506) (#7515) Signed-off-by: Jocelyn Huang Co-authored-by: Jocelyn Signed-off-by: Elena Rastorgueva * bugfix: specify trainer.strategy=auto when devices=1 (#7509) (#7512) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Elena Rastorgueva * fix (#7511) Signed-off-by: Abhinav Khattar Signed-off-by: Elena Rastorgueva * [TTS] Fix FastPitch data prep tutorial (#7524) Signed-off-by: Ryan Signed-off-by: Elena Rastorgueva * add italian tokenization (#7486) * add italian tokenization Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more ipa lexicon it Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix error deletion Signed-off-by: GiacomoLeoneMaria * add test Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Replace None strategy with auto in tutorial notebooks (#7521) (#7527) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * unpin setuptools (#7534) (#7535) Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * remove auto generated examples (#7510) * explicitly remove autogenerated examples for data parallel evaluation Signed-off-by: arendu * mark autogenrated and remove it for test Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Add the `strategy` argument to `MegatronGPTModel.generate()` (#7264) It is passed as an explicit argument rather than through `**strategy_args` so as to ensure someone cannot accidentally pass other arguments that would end up being ignored. It is a keyword-only argument to ensure that if in the future we want to update the signature to `**strategy_args`, we can do it without breaking code. Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix PTL2.0 related ASR bugs in r1.21.0: Val metrics logging, None dataloader issue (#7531) (#7533) * fix none dataloader issue ptl2 * ptl2.0 logging fixes for rnnt_models --------- Signed-off-by: KunalDhawan Co-authored-by: Kunal Dhawan Co-authored-by: Nithin Rao Signed-off-by: Elena Rastorgueva * gpus -> devices (#7542) (#7545) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Elena Rastorgueva * Update FFMPEG version to fix issue with torchaudio (#7551) (#7553) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Elena Rastorgueva * PEFT GPT & T5 Refactor (#7308) * initial implementation of add_adapters API * correct type hint * Add config in add_adapters for save and load (@author bobchen) * Remove AdapterConfig to avoid import error * Add AdaterConfig back and move adaptermixin to sft model * Add NLPSaveRestoreConnector as default in NLPModel.restore_from * Add restore_from_nemo_with_adapter and test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename t5 file and classes to be consistent with GPT * add t5 sft dataset * add support for single-file format with T5SFTDataset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Various small changes to make T5 SFT work like GPT SFT * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add adapter evaluation test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add MultiAdaterConfig for ia3 and fix builder issue * Make ptuning for T5SFTModel work using mixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add IA3_Adapter for AdapterName * Add adapter name for ptuning and attention adapter * Make test script GPT/T5 agnostic * Add layer selection feature * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Integrate adapter name and config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update gpt peft tuning script to new API * add t5 peft tuning script with new API * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix IA3 layer selection issue * Override state_dict on SFT model instead of mixin * Add load adapter by adapter config * move peft config map away from example script * auto get config from nemo adapter * Move PEFTConfig to new file * fix ckpt save/load for t5 * name change: add_adapters -> add_adapter * variable name change * update t5 script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix t5 issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add weight tying * update gpt tuning script * PEFT-API proposal * Fix according to comments * update tuning scripts * move merge_cfg_with to mixin class since it applies to both gpt and t5 and requires the model class for restore * Add mcore_gpt support for NLPAdapterMixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * variable name change to distinguish "peft" and "adapter" * override `load_adapters` to support `add_adapter` name change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tuning and eval script for adapter save/load * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add Ptuning on first stage only * add lora tutorial for review * Fix layer selection for mcore * add landing page * fix resume training Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add mcore condition in sharded_state_dict to make sft work * Update lora_tutorial.md First edit of this file for PEFT documentation for NeMO Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * rename Adapter to AttentionAdapter to avoid confusion in doc * Change load_adapters to load .nemo * add quick start guide * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add load_adapters with .ckpt * Remove setup_complete changes in load_adapters * update landing page * remove typo * Updated quick_start.md per Chen Cui Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * Add inference config merger and tutorial * Add doc string for NLPAdapterModelMixin and deprecated warning on MegatronGPTPEFTModel * add supported_methods.md and update other documentations * Update supported_methods.md minor updates. Signed-off-by: Adi Renduchintala * Update landing_page.md minor update. Signed-off-by: Adi Renduchintala * Modify doc string for NLPAdapterModelMixin * Add doc string add_adapters in NLPAdapterModelMixin * rename canonical adapters * remove mcore hard dependency * [PATCH] move microbatch calculator to nemo from apex * remove apex dependency in gpt and t5 sft models * remove apex dependency in gpt model * render doc strings * fix * Add missing virtual_tokens on ptuning * fix docstrings * update gpt-style model coverage in docs * update docstring * Remove pdb * add lightning_fabric to make docstring rendering work * Add Ptuning missing key * try docstring rendering * Fix ptuning issue * update gpt t5 peft tuning and eval scripts * typos * update eval config * fix bug relating to apex dependency removal * typo * make predict step behave the same as test step * make lora tutorial work in notebook * cosmetics * update yaml scripts * mcore_gpt attribute optional * typo * update eval scripts and fix T5 eval bugs * add NLPDDPStrategyNotebook and trainer builder logic to use it * update lora notebook to use new trainer builder * fix microbatch calculator bug for inference after training * Convert markdown files to RST and incorporate with doc * typo * revise language * remove extra cell * remove unnecessary inheritance * remove old tests * move layer selection default so logging messages make sense * remove `save_adapters` as adapter weights are saved automatically during training * initialize weights from a checkpoint instead of randomly * multiple fields can form a context (#7147) * list of context fields and flexible prompt template Signed-off-by: arendu * list of fields for context Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Fix bug Signed-off-by: Cheng-Ping Hsieh * Add multiple truncation fields and middle truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Compatible to old ckpt Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix tokenize detokenize issue Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove detokenization, add truncation augmentation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve comments Signed-off-by: Cheng-Ping Hsieh * Remove unused import Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert eos Signed-off-by: Cheng-Ping Hsieh * Add tokenizer space_sensitive attribute Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error Signed-off-by: Cheng-Ping Hsieh * Fix erorr and use re Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Change assert logic Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow adi suggestion Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove merge function Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add example and comment Signed-off-by: Cheng-Ping Hsieh * Remove context_key and add comment Signed-off-by: Cheng-Ping Hsieh * Remove random truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix template none Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> * revert config changes * remove accidental breakpoint * support TP>1 loading * infer adapter type from checkpoint in during eval * breakup add adapter * enable interpolation of train_ds and validation_ds * update metric calc script to conform to single-file eval format * remove extraneous print * update lora notebook for updated merge_inference_cfg * Update nlp_adapter_mixins.py variable name change Signed-off-by: Chen Cui * turn off grad scaler for PP to match old scripts * remove PEFTSaveRestoreConnector since functionality all covered by the new mixin class * remove resume_from_checkpoint check since covered in #7335 * revert changes made in eval config interpolation * more interpolation * typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove dup line Signed-off-by: Chen Cui * code style warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix config mistake Signed-off-by: Chen Cui * add copyright header Signed-off-by: Chen Cui * fix code check warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert changes to remove apex dependency (mixed apex+nemo microbatch calculator broke some CI tests) Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * consolidate peft and sft scripts Signed-off-by: Chen Cui * update CI tests Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * notebook branch points to main to prepare for merge Signed-off-by: Chen Cui * fix gpt and t5 validation with any metric other than loss Signed-off-by: Chen Cui * support pre-extracted checkpoints Signed-off-by: Chen Cui --------- Signed-off-by: jasonwan Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: jasonwan Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Adi Renduchintala Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix a typo (#7496) Signed-off-by: BestJuly Signed-off-by: Elena Rastorgueva * [TTS] remove curly braces from ${BRANCH} in jupyer notebook cell. (#7554) (#7560) * remove curly braces. * remove installation of pynini. --------- Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Elena Rastorgueva * add youtube embed url (#7570) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Elena Rastorgueva * Remap speakers to continuous range of speaker_id for dataset AISHELL3 (#7536) * Remap speakers to continuous range of speaker_id for dataset AISHELL3 * Add new key/value pair to record raw speaker for AISHELL3 dataset Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix validation_step_outputs initialization for multi-dataloader (#7546) (#7572) * added correct validation_step_outputs initialization for mutli-dataloader * changed kernel for display * Update logic for validation and test step outputs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert multidataloader changes in multilang ASR notebook --------- Signed-off-by: KunalDhawan Signed-off-by: smajumdar Co-authored-by: Kunal Dhawan Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Append output of val step to self.validation_step_outputs (#7530) (#7532) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * [TTS] fixed trainer's accelerator and strategy. (#7569) (#7574) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Elena Rastorgueva * Append val/test output to instance variable in EncDecSpeakerLabelModel (#7562) (#7573) * Append val/test output to the instance variable in EncDecSpeakerLabelModel * Handle test case in evaluation_step * Replace type with isinstance --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix CustomProgressBar for resume (#7427) (#7522) * Fix CustomProgress Bar for resume and multiple epochs * Edit num_training_batches * Use max_steps as total for progress bar for resume * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix typos in nfa and speech enhancement tutorials (#7580) (#7583) Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Add strategy as ddp_find_unused_parameters_true for glue_benchmark.py (#7454) (#7461) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * update strategy (#7577) (#7578) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Elena Rastorgueva * Fix typos (#7581) Signed-off-by: Elena Rastorgueva * Change hifigan finetune strategy to ddp_find_unused_parameters_true (#7579) (#7584) * Change strategy to auto --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * [BugFix] Add missing quotes for auto strategy in tutorial notebooks (#7541) (#7548) * Add missing quotes for auto strategy * Revert trainer.gpus to trainer.devices in Self_Supervised_Pre_Training.ipynb --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * add build os key (#7596) (#7599) * add build os key * add tools * update to stable version --------- Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Elena Rastorgueva * StarCoder SFT test + bump PyT NGC image to 23.09 (#7540) * Add SFT StarCoder test Signed-off-by: Jan Lasek * Remove _modify_config call as it is covered in load_from_nemo just below Signed-off-by: Jan Lasek * Test with pyt:23.09 container Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek Signed-off-by: Elena Rastorgueva * defaults changed (#7600) * defaults changed Signed-off-by: arendu * typo Signed-off-by: arendu * update Signed-off-by: arendu --------- Signed-off-by: arendu Signed-off-by: Elena Rastorgueva * add ItalianPhonemesTokenizer (#7587) * add ItalianPhonemesTokenizer Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix Italian phonemes Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * best ckpt fix (#7564) (#7588) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Add files via upload (#7598) specifies the branch Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix validation in G2PModel and ThutmoseTaggerModel (#7597) (#7606) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Bound transformers version in requirements (#7620) Signed-off-by: Abhishree Signed-off-by: Elena Rastorgueva * fix llama2 70b lora tuning bug (#7622) * fix llama2 70b lora tuning bug Signed-off-by: Chen Cui * Update peft_config.py brackets Signed-off-by: Adi Renduchintala --------- Signed-off-by: Chen Cui Signed-off-by: Adi Renduchintala Co-authored-by: Adi Renduchintala Signed-off-by: Elena Rastorgueva * Fix import error no module name model_utils (#7629) Signed-off-by: Mehadi Hasan Menon Signed-off-by: Elena Rastorgueva * add fc large ls models (#7641) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Koluguri Signed-off-by: Elena Rastorgueva * bugfix: trainer.gpus, trainer.strategy, trainer.accelerator (#7621) (#7642) * [TTS] bugfix for Tacotron2 tutorial due to PTL 2.0 * trainer.gpus -> trainer.devices * fixed related tutorial bugs --------- Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix ssl models ptl monitor val through logging (#7608) (#7614) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Co-authored-by: Eric Harper Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix metrics for SE tutorial (#7604) (#7612) Signed-off-by: Ante Jukić Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Add ddp_find_unused_parameters=True and change accelerator to auto (#7623) (#7644) * Add ddp_find_unused_parameters=True and change acclerator to auto * Add ddp_find_unused_parameters True for normalization_as_tagging_train.py --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Fix py3.11 dataclasses issue (#7616) * Fix py3.11 dataclasses issue (#7582) * Update ASR configs to support Python 3.11 Signed-off-by: smajumdar * Update TTS configs to support Python 3.11 Signed-off-by: smajumdar * Guard MeCab and Ipadic Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix remaining ASR dataclasses Signed-off-by: smajumdar * Fix scripts Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update name to ConfidenceMethodConfig Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Fix changes to confidence measure Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Signed-off-by: Sangkug Lym Signed-off-by: Jason Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sangkug Lym Co-authored-by: Jason Signed-off-by: Elena Rastorgueva * Fix issues with Dockerfile (#7650) (#7652) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Elena Rastorgueva * [ASR] RNN-T greedy decoding max_frames fix for alignment and confidence (#7635) * decoding and test fix Signed-off-by: Aleksandr Laptev * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Aleksandr Laptev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * [ASR] Fix type error in jasper (#7636) (#7653) Signed-off-by: Ryan Co-authored-by: Ryan Langman Signed-off-by: Elena Rastorgueva * [TTS] Add STFT and SI-SDR loss to audio codec recipe (#7468) * [TTS] Add STFT and SI-SDR loss to audio codec recipe Signed-off-by: Ryan * [TTS] Fix STFT resolution Signed-off-by: Ryan * [TTS] Fix training metric logging Signed-off-by: Ryan * [TTS] Add docstring to mel and stft losses Signed-off-by: Ryan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Ryan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * add outline of asr quickstart info to asr/intro.rst Signed-off-by: Elena Rastorgueva * add CLI, LM and real-time transcription sections Signed-off-by: Elena Rastorgueva * Create per.py (#7538) * Move model precision copy (#7336) * move cfg precision set to megatron base model Signed-off-by: Maanu Grover * remove copy from other models Signed-off-by: Maanu Grover * modify attribute not arg Signed-off-by: Maanu Grover * fix gpt model test for ptl 2.0 Signed-off-by: Maanu Grover * rename function and add docstring Signed-off-by: Maanu Grover * replace precision to dtype conditionals with func call Signed-off-by: Maanu Grover * unnecessary function and cfg reset Signed-off-by: Maanu Grover * set default value Signed-off-by: Maanu Grover * fix precision lookup in a few more places Signed-off-by: Maanu Grover * rename mapping function Signed-off-by: Maanu Grover * ununsed import Signed-off-by: Maanu Grover * save torch datatype to model Signed-off-by: Maanu Grover * set weights precision wrt amp o2 Signed-off-by: Maanu Grover * Revert "set weights precision wrt amp o2" This reverts commit 313a4bfe5eb69d771a6d2433898c0685836aef5c. Signed-off-by: Maanu Grover * revert half precision at inference attempt Signed-off-by: Maanu Grover * move autocast dtype to base model Signed-off-by: Maanu Grover * move params dtype to base model, enable fp16 O2 inf Signed-off-by: Maanu Grover * unused imports Signed-off-by: Maanu Grover --------- Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister * Fix PEFT checkpoint loading (#7388) * Fix PEFT checkpoint loading Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Use distributed optimizer support for multiple dtypes (#7359) * Update distopt wrapper with multiple dtype support Remove manual handling of separate FP32 optimizer. Signed-off-by: Tim Moon * Use distopt support for contiguous buffers with multiple dtypes Signed-off-by: Tim Moon * Fix typo Signed-off-by: Tim Moon * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Separate distopt buckets for first GPT layer and non-overlapped params Signed-off-by: Tim Moon * Add distopt logic for int dtypes Signed-off-by: Tim Moon * Update Apex commit Signed-off-by: Tim Moon * Remove unused variables Signed-off-by: Tim Moon * Update Apex commit in README and Jenkensfile Signed-off-by: Tim Moon * Debug Dockerfile and Jenkinsfile Signed-off-by: Tim Moon --------- Signed-off-by: Tim Moon Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * minor fix for llama ckpt conversion script (#7387) * minor fix for llama ckpt conversion script Signed-off-by: Jason Wang * Update Jenkinsfile Signed-off-by: Jason Wang * remove fast_swiglu configuration Signed-off-by: Jason Wang --------- Signed-off-by: Jason Wang Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * Fix wrong calling of librosa.get_duration() in notebook (#7376) Signed-off-by: Robin Dong Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * [PATCH] PEFT import mcore (#7393) * [PATCH] PEFT import mcore Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Create per.py Script for calculation Punctuation Error Rate and related rates (correct rate, deletions rate, etc.) Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [TTS] Added a callback for logging initial data (#7384) Signed-off-by: Ante Jukić Signed-off-by: Sasha Meister * Update Core Commit (#7402) * Update Core Commit Signed-off-by: Abhinav Khattar * update commit Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * Use cfg attribute in bert (#7394) * use cfg attribute instead of arg Signed-off-by: Maanu Grover * use torch_dtype in place of cfg.precision Signed-off-by: Maanu Grover * move precision copy before super constructor Signed-off-by: Maanu Grover * use trainer arg Signed-off-by: Maanu Grover --------- Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister * Add support for bias conversion in Swiglu models (#7386) * Add support for bias conversion in Swiglu models Signed-off-by: smajumdar * Add support for auto extracting tokenizer model Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add support for auto extracting tokenizer model Signed-off-by: smajumdar * Fix issue with missing tokenizer Signed-off-by: smajumdar * Refactor Signed-off-by: smajumdar * Refactor Signed-off-by: smajumdar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: smajumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Update save_to and restore_from for dist checkpointing (#7343) * add dist ckpt to save to, in progress Signed-off-by: eharper * move dist ckpt Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update restore from, need to figure out how to initialize distributed Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * launch distrib if needed when restoring dist ckpt Signed-off-by: eharper * when using mcore we can change tp pp on the fly Signed-off-by: eharper * add load_from_checkpoint support for dist ckpt Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update llama convert script to save dist .nemo Signed-off-by: eharper * fix load dist ckpt Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * setup TE TP groups if needed Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * setup te tp groups if needed Signed-off-by: eharper * remove import Signed-off-by: eharper --------- Signed-off-by: eharper Signed-off-by: jasonwan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jasonwan Signed-off-by: Sasha Meister * fix forward for with mcore=false (#7403) Signed-off-by: Jimmy Zhang Co-authored-by: Jimmy Zhang Signed-off-by: Sasha Meister * Fix logging to remove 's/it' from progress bar in Megatron models and add train_step_timing (#7374) * Add CustomProgressBar class to exp_manager and trainer callbacks Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix the progress bar to reflect total microbatch cnt Signed-off-by: Abhishree * Modify CustomProgressBar class 1) Modify CustomProgressBar class to update progress bar per global_step instead of per microbatch 2) Add the callback to other megatron training/finetuning files that are not using MegatronTrainerBuilder Signed-off-by: Abhishree * Add CustomProgressBar callback to tuning files Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Set Activation Checkpointing Defaults (#7404) * Set Activation Checkpointing Defaults Signed-off-by: Abhinav Khattar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * check for None Signed-off-by: Abhinav Khattar * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhinav Khattar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * make loss mask default to false (#7407) Signed-off-by: eharper Signed-off-by: Sasha Meister * Add dummy userbuffer config files (#7408) Signed-off-by: Sangkug Lym Signed-off-by: Sasha Meister * add missing ubconf files (#7412) Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * New tutorial on Speech Data Explorer (#7405) * Added Google Colab based tutorial on Speech Data Explorer Signed-off-by: George Zelenfroynd Signed-off-by: Sasha Meister * Update ptl training ckpt conversion script to work with dist ckpt (#7416) * update ptl convert script Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * don't break legacy Signed-off-by: eharper * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: eharper Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Allow disabling sanity checking when num_sanity_val_steps=0 (#7413) * Allow disabling sanity checking when num_sanity_val_steps=0 Signed-off-by: Abhishree * Update num_sanity_val_steps to be a multiple of num_microbatches Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add comprehensive error messages (#7261) Signed-off-by: Anton Peganov Signed-off-by: Sasha Meister * check NEMO_PATH (#7418) Signed-off-by: Nikolay Karpov Signed-off-by: Sasha Meister * layer selection for ia3 (#7417) * layer selection for ia3 Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix missing pip package 'einops' (#7397) Signed-off-by: Robin Dong Signed-off-by: Sasha Meister * Fix failure of pyaudio in Google Colab (#7396) Signed-off-by: Robin Dong Signed-off-by: Sasha Meister * Update README.md: output_path --> output_manifest_filepath (#7442) Signed-off-by: Samuele Cornell Signed-off-by: Sasha Meister * Add rope dynamic linear scaling (#7437) * Add dynamic linear scaling Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yang Zhang Signed-off-by: Sasha Meister * Fix None dataloader issue in PTL2.0 (#7455) * Fix None dataloader issue in PTL2.0 Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * updating values of self._validation_dl and self._test_dl as well Signed-off-by: KunalDhawan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: KunalDhawan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * [ASR] Confidence measure -> method renames (#7434) * measure -> method Signed-off-by: Aleksandr Laptev * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Aleksandr Laptev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add steps for document of getting dataset 'SF Bilingual Speech' (#7378) * Add steps for document of getting dataset 'SF Bilingual Speech' Signed-off-by: Robin Dong * Update datasets.rst added a link from a tutorial demonstrating detailed data prep steps. Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * RNN-T confidence and alignment bugfix (#7381) * new frame_confidence and alignments lists are now always created after the while loop Signed-off-by: Aleksandr Laptev * tests added Signed-off-by: Aleksandr Laptev --------- Signed-off-by: Aleksandr Laptev Signed-off-by: Sasha Meister * Fix resume from checkpoint in exp_manager (#7424) (#7426) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Eric Harper Signed-off-by: Sasha Meister * Fix checking of cuda/cpu device for inputs of Decoder (#7444) * Fix checking of cuda/cpu device for inputs of Decoder Signed-off-by: Robin Dong * Update tacotron2.py Signed-off-by: Jason --------- Signed-off-by: Robin Dong Signed-off-by: Jason Co-authored-by: Jason Signed-off-by: Sasha Meister * Fix failure of ljspeech's get_data.py (#7430) * Fix failure of ljspeech's get_data.py Signed-off-by: Robin Dong * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * [TTS] Fix audio codec type checks (#7373) * [TTS] Fix audio codec type checks Signed-off-by: Ryan * [TTS] Fix audio codec tests Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * [TTS] Add dataset to path of logged artifacts (#7462) * [TTS] Add dataset to path of logged artifacts Signed-off-by: Ryan * [TTS] Revert axis name back to Audio Frames Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * Fix sft dataset truncation (#7464) * Add fix Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Automatic Lip Reading Recognition (ALR) - ASR/CV (Visual ASR) (#7330) * striding_conv1d_k5 and dw_striding_conv1d_k5 subsampling Signed-off-by: mburchi * transpose conv1d inputs Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: mburchi * Update subsampling.py change striding_conv1d_k5 to striding_conv1d Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * video manifest Signed-off-by: mburchi * add collection classes Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test_step_outputs Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * correct manifest bug when having only audio or only videos Signed-off-by: mburchi * clean references Signed-off-by: mburchi * freeze unfreeze transcribe cv models Signed-off-by: mburchi * correct manifest get_full_path bug Signed-off-by: mburchi * update for PR Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * guard torchvision Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * _video_speech_collate_fn in cv/data/video_to_text.py Signed-off-by: mburchi * add self.out = None to asr subsampling Signed-off-by: mburchi * Update nemo/collections/cv/data/video_to_text_dataset.py Co-authored-by: Igor Gitman Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> * cv -> multimodal/speech_cv branch Signed-off-by: mburchi * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Igor Gitman Signed-off-by: Sasha Meister * HF StarCoder to NeMo conversion script (#7421) * Script to convert HF StarCoder checkpoint to NeMo Signed-off-by: Jan Lasek * StarCoder conversion test Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Jan Lasek * Fix test Signed-off-by: Jan Lasek * Catch up with save_to changes Signed-off-by: Jan Lasek * Don't abbreviate args for clarity Signed-off-by: Jan Lasek * Configurable precision: BF16 vs FP32 Signed-off-by: Jan Lasek * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jan Lasek Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix bug when loading dist ckpt in peft (#7452) Signed-off-by: Hongbin Liu Co-authored-by: Hongbin Liu Signed-off-by: Sasha Meister * Fix adding positional embeddings in-place in transformer module (#7440) Signed-off-by: Tamerlan Tabolov Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix (#7478) Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Sasha Meister * add sleep (#7498) (#7499) * add sleep * add sleep onto config instead * add comment --------- Signed-off-by: Gerald Shen Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix exp manager check for sleep (#7503) (#7504) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * bugfix: trainer.accelerator=auto from None. (#7492) (#7493) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * [doc] fix broken link (#7481) Signed-off-by: Stas Bekman Signed-off-by: Sasha Meister * [TTS] Read audio as int32 to avoid flac read errors (#7477) * [TTS] Read audio as int32 to avoid flac read errors Signed-off-by: Ryan * [TTS] Add comment about read failures Signed-off-by: Ryan --------- Signed-off-by: Ryan Signed-off-by: Sasha Meister * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS (#7409) * Add dataset 'AISHELL-3' from OpenSLR for training mandarin TTS * Train 'AISHELL-3' dataset with multi-speakers Signed-off-by: Robin Dong * Update get_data.py update copyright header Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * Update get_data.py added a disclaimer Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add new configuration file for AISHELL3 with multispeaker of fastpitch Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * dllogger - log on rank 0 only (#7513) Signed-off-by: Stas Bekman Signed-off-by: Sasha Meister * Fix TTS FastPitch tutorial (#7494) (#7516) * Fix --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix get_dist() tensor dimension (#7506) (#7515) Signed-off-by: Jocelyn Huang Co-authored-by: Jocelyn Signed-off-by: Sasha Meister * bugfix: specify trainer.strategy=auto when devices=1 (#7509) (#7512) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * fix (#7511) Signed-off-by: Abhinav Khattar Signed-off-by: Sasha Meister * [TTS] Fix FastPitch data prep tutorial (#7524) Signed-off-by: Ryan Signed-off-by: Sasha Meister * add italian tokenization (#7486) * add italian tokenization Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more ipa lexicon it Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix error deletion Signed-off-by: GiacomoLeoneMaria * add test Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Replace None strategy with auto in tutorial notebooks (#7521) (#7527) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * unpin setuptools (#7534) (#7535) Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Sasha Meister * Update per.py - if __name__ == "__main__" removed (now metric can be imported); - removed excessive classes (like "Sample" and "Statistics"); - transition from pandas df to dict of dicts; - removed unnecessary "return"; - notation fixing; - reduced calculation time Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Create punctuation_rates.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Format fixing Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * added nemo.logging, header, docstrings, how to use Signed-off-by: Sasha Meister * Added asserions to rate_punctuation.py Signed-off-by: Sasha Meister * fix typo Signed-off-by: Sasha Meister * added function for import and call, docstrings Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * remove auto generated examples (#7510) * explicitly remove autogenerated examples for data parallel evaluation Signed-off-by: arendu * mark autogenrated and remove it for test Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: arendu Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Add the `strategy` argument to `MegatronGPTModel.generate()` (#7264) It is passed as an explicit argument rather than through `**strategy_args` so as to ensure someone cannot accidentally pass other arguments that would end up being ignored. It is a keyword-only argument to ensure that if in the future we want to update the signature to `**strategy_args`, we can do it without breaking code. Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix PTL2.0 related ASR bugs in r1.21.0: Val metrics logging, None dataloader issue (#7531) (#7533) * fix none dataloader issue ptl2 * ptl2.0 logging fixes for rnnt_models --------- Signed-off-by: KunalDhawan Co-authored-by: Kunal Dhawan Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * gpus -> devices (#7542) (#7545) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * Update FFMPEG version to fix issue with torchaudio (#7551) (#7553) Signed-off-by: smajumdar Co-authored-by: Somshubra Majumdar Signed-off-by: Sasha Meister * PEFT GPT & T5 Refactor (#7308) * initial implementation of add_adapters API * correct type hint * Add config in add_adapters for save and load (@author bobchen) * Remove AdapterConfig to avoid import error * Add AdaterConfig back and move adaptermixin to sft model * Add NLPSaveRestoreConnector as default in NLPModel.restore_from * Add restore_from_nemo_with_adapter and test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename t5 file and classes to be consistent with GPT * add t5 sft dataset * add support for single-file format with T5SFTDataset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Various small changes to make T5 SFT work like GPT SFT * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add adapter evaluation test script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add MultiAdaterConfig for ia3 and fix builder issue * Make ptuning for T5SFTModel work using mixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add IA3_Adapter for AdapterName * Add adapter name for ptuning and attention adapter * Make test script GPT/T5 agnostic * Add layer selection feature * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Integrate adapter name and config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update gpt peft tuning script to new API * add t5 peft tuning script with new API * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix IA3 layer selection issue * Override state_dict on SFT model instead of mixin * Add load adapter by adapter config * move peft config map away from example script * auto get config from nemo adapter * Move PEFTConfig to new file * fix ckpt save/load for t5 * name change: add_adapters -> add_adapter * variable name change * update t5 script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix t5 issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add weight tying * update gpt tuning script * PEFT-API proposal * Fix according to comments * update tuning scripts * move merge_cfg_with to mixin class since it applies to both gpt and t5 and requires the model class for restore * Add mcore_gpt support for NLPAdapterMixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * variable name change to distinguish "peft" and "adapter" * override `load_adapters` to support `add_adapter` name change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tuning and eval script for adapter save/load * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add Ptuning on first stage only * add lora tutorial for review * Fix layer selection for mcore * add landing page * fix resume training Signed-off-by: jasonwan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add mcore condition in sharded_state_dict to make sft work * Update lora_tutorial.md First edit of this file for PEFT documentation for NeMO Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * rename Adapter to AttentionAdapter to avoid confusion in doc * Change load_adapters to load .nemo * add quick start guide * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add load_adapters with .ckpt * Remove setup_complete changes in load_adapters * update landing page * remove typo * Updated quick_start.md per Chen Cui Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> * Add inference config merger and tutorial * Add doc string for NLPAdapterModelMixin and deprecated warning on MegatronGPTPEFTModel * add supported_methods.md and update other documentations * Update supported_methods.md minor updates. Signed-off-by: Adi Renduchintala * Update landing_page.md minor update. Signed-off-by: Adi Renduchintala * Modify doc string for NLPAdapterModelMixin * Add doc string add_adapters in NLPAdapterModelMixin * rename canonical adapters * remove mcore hard dependency * [PATCH] move microbatch calculator to nemo from apex * remove apex dependency in gpt and t5 sft models * remove apex dependency in gpt model * render doc strings * fix * Add missing virtual_tokens on ptuning * fix docstrings * update gpt-style model coverage in docs * update docstring * Remove pdb * add lightning_fabric to make docstring rendering work * Add Ptuning missing key * try docstring rendering * Fix ptuning issue * update gpt t5 peft tuning and eval scripts * typos * update eval config * fix bug relating to apex dependency removal * typo * make predict step behave the same as test step * make lora tutorial work in notebook * cosmetics * update yaml scripts * mcore_gpt attribute optional * typo * update eval scripts and fix T5 eval bugs * add NLPDDPStrategyNotebook and trainer builder logic to use it * update lora notebook to use new trainer builder * fix microbatch calculator bug for inference after training * Convert markdown files to RST and incorporate with doc * typo * revise language * remove extra cell * remove unnecessary inheritance * remove old tests * move layer selection default so logging messages make sense * remove `save_adapters` as adapter weights are saved automatically during training * initialize weights from a checkpoint instead of randomly * multiple fields can form a context (#7147) * list of context fields and flexible prompt template Signed-off-by: arendu * list of fields for context Signed-off-by: arendu * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Fix bug Signed-off-by: Cheng-Ping Hsieh * Add multiple truncation fields and middle truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Compatible to old ckpt Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix tokenize detokenize issue Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove detokenization, add truncation augmentation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolve comments Signed-off-by: Cheng-Ping Hsieh * Remove unused import Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert eos Signed-off-by: Cheng-Ping Hsieh * Add tokenizer space_sensitive attribute Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix error Signed-off-by: Cheng-Ping Hsieh * Fix erorr and use re Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * Change assert logic Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Follow adi suggestion Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Remove merge function Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add example and comment Signed-off-by: Cheng-Ping Hsieh * Remove context_key and add comment Signed-off-by: Cheng-Ping Hsieh * Remove random truncation Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix template none Signed-off-by: Cheng-Ping Hsieh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug Signed-off-by: Cheng-Ping Hsieh --------- Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> * revert config changes * remove accidental breakpoint * support TP>1 loading * infer adapter type from checkpoint in during eval * breakup add adapter * enable interpolation of train_ds and validation_ds * update metric calc script to conform to single-file eval format * remove extraneous print * update lora notebook for updated merge_inference_cfg * Update nlp_adapter_mixins.py variable name change Signed-off-by: Chen Cui * turn off grad scaler for PP to match old scripts * remove PEFTSaveRestoreConnector since functionality all covered by the new mixin class * remove resume_from_checkpoint check since covered in #7335 * revert changes made in eval config interpolation * more interpolation * typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove dup line Signed-off-by: Chen Cui * code style warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix config mistake Signed-off-by: Chen Cui * add copyright header Signed-off-by: Chen Cui * fix code check warnings Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert changes to remove apex dependency (mixed apex+nemo microbatch calculator broke some CI tests) Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * update deprecation notices Signed-off-by: Chen Cui * consolidate peft and sft scripts Signed-off-by: Chen Cui * update CI tests Signed-off-by: Chen Cui * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * notebook branch points to main to prepare for merge Signed-off-by: Chen Cui * fix gpt and t5 validation with any metric other than loss Signed-off-by: Chen Cui * support pre-extracted checkpoints Signed-off-by: Chen Cui --------- Signed-off-by: jasonwan Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marc Romeyn Co-authored-by: jasonwan Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Adi Renduchintala Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * fix a typo (#7496) Signed-off-by: BestJuly Signed-off-by: Sasha Meister * [TTS] remove curly braces from ${BRANCH} in jupyer notebook cell. (#7554) (#7560) * remove curly braces. * remove installation of pynini. --------- Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * add youtube embed url (#7570) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * Remap speakers to continuous range of speaker_id for dataset AISHELL3 (#7536) * Remap speakers to continuous range of speaker_id for dataset AISHELL3 * Add new key/value pair to record raw speaker for AISHELL3 dataset Signed-off-by: Robin Dong --------- Signed-off-by: Robin Dong Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix validation_step_outputs initialization for multi-dataloader (#7546) (#7572) * added correct validation_step_outputs initialization for mutli-dataloader * changed kernel for display * Update logic for validation and test step outputs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert multidataloader changes in multilang ASR notebook --------- Signed-off-by: KunalDhawan Signed-off-by: smajumdar Co-authored-by: Kunal Dhawan Co-authored-by: Somshubra Majumdar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Append output of val step to self.validation_step_outputs (#7530) (#7532) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * [TTS] fixed trainer's accelerator and strategy. (#7569) (#7574) Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Sasha Meister * Append val/test output to instance variable in EncDecSpeakerLabelModel (#7562) (#7573) * Append val/test output to the instance variable in EncDecSpeakerLabelModel * Handle test case in evaluation_step * Replace type with isinstance --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix CustomProgressBar for resume (#7427) (#7522) * Fix CustomProgress Bar for resume and multiple epochs * Edit num_training_batches * Use max_steps as total for progress bar for resume * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * fix typos in nfa and speech enhancement tutorials (#7580) (#7583) Signed-off-by: Elena Rastorgueva Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Signed-off-by: Sasha Meister * Add strategy as ddp_find_unused_parameters_true for glue_benchmark.py (#7454) (#7461) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * update strategy (#7577) (#7578) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * Fix typos (#7581) Signed-off-by: Sasha Meister * Change hifigan finetune strategy to ddp_find_unused_parameters_true (#7579) (#7584) * Change strategy to auto --------- Signed-off-by: Cheng-Ping Hsieh Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Sasha Meister * [BugFix] Add missing quotes for auto strategy in tutorial notebooks (#7541) (#7548) * Add missing quotes for auto strategy * Revert trainer.gpus to trainer.devices in Self_Supervised_Pre_Training.ipynb --------- Signed-off-by: Abhishree Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * added per tests Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * [PATCH] PEFT import mcore (#7393) * [PATCH] PEFT import mcore Signed-off-by: Jason Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jason Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * add build os key (#7596) (#7599) * add build os key * add tools * update to stable version --------- Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Signed-off-by: Sasha Meister * StarCoder SFT test + bump PyT NGC image to 23.09 (#7540) * Add SFT StarCoder test Signed-off-by: Jan Lasek * Remove _modify_config call as it is covered in load_from_nemo just below Signed-off-by: Jan Lasek * Test with pyt:23.09 container Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek Signed-off-by: Sasha Meister * defaults changed (#7600) * defaults changed Signed-off-by: arendu * typo Signed-off-by: arendu * update Signed-off-by: arendu --------- Signed-off-by: arendu Signed-off-by: Sasha Meister * add ItalianPhonemesTokenizer (#7587) * add ItalianPhonemesTokenizer Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix Italian phonemes Signed-off-by: GiacomoLeoneMaria * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test Signed-off-by: GiacomoLeoneMaria --------- Signed-off-by: GiacomoLeoneMaria Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * best ckpt fix (#7564) (#7588) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Sasha Meister * rate_punctuation.py Fixed output manifest saving Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Fix tests Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Add files via upload (#7598) specifies the branch Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix validation in G2PModel and ThutmoseTaggerModel (#7597) (#7606) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain (#7576) (#7586) * Broadcast loss only when using pipeline parallelism and within the pipeline parallel domain * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Safeguard nemo_text_processing installation on ARM (#7485) * safeguard nemo_text_processing installing Signed-off-by: Jason * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update check Signed-off-by: Jason --------- Signed-off-by: Jason Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Sasha Meister * Function name fixing Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Moving PER to speech_to_text_eval.py Added: - "use_per": PER metric computing; - "scores_per_sample": metrics computation sample by sample for wer/cer/punctuation rates; - "output_with_scores_filename": saving manifest with metrics Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Update test_metrics.py Updated "punctuation_error_rate" function name Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Added use_per description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * guard extra dependencies Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Write metrics to "output_filename" if "scores_per_sample=True" Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * scores_per_sample description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix import guards Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Stats printing when HAVE_TABLUATE_AND_PANDAS=False Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * Bound transformers version in requirements (#7620) Signed-off-by: Abhishree Signed-off-by: Sasha Meister * fix llama2 70b lora tuning bug (#7622) * fix llama2 70b lora tuning bug Signed-off-by: Chen Cui * Update peft_config.py brackets Signed-off-by: Adi Renduchintala --------- Signed-off-by: Chen Cui Signed-off-by: Adi Renduchintala Co-authored-by: Adi Renduchintala Signed-off-by: Sasha Meister * Fix import error no module name model_utils (#7629) Signed-off-by: Mehadi Hasan Menon Signed-off-by: Sasha Meister * Delete examples/asr/rate_punctuation.py Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * Added use_per description Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * metric and variables name fixing Signed-off-by: Sasha Meister * Add else samples = None Signed-off-by: Sasha Meister * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sasha Meister * add fc large ls models (#7641) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Koluguri Signed-off-by: Sasha Meister * bugfix: trainer.gpus, trainer.strategy, trainer.accelerator (#7621) (#7642) * [TTS] bugfix for Tacotron2 tutorial due to PTL 2.0 * trainer.gpus -> trainer.devices * fixed related tutorial bugs --------- Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * fix ssl models ptl monitor val through logging (#7608) (#7614) Signed-off-by: Nithin Rao Koluguri Co-authored-by: Nithin Rao Co-authored-by: Eric Harper Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix metrics for SE tutorial (#7604) (#7612) Signed-off-by: Ante Jukić Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Signed-off-by: Sasha Meister * Add ddp_find_unused_parameters=True and change accelerator to auto (#7623) (#7644) * Add ddp_find_unused_parameters=True and change acclerator to auto * Add ddp_find_unused_parameters True for normalization_as_tagging_train.py --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Sasha Meister * Fix py3.11 dataclasses issue (#7616) * Fix py3.11 dataclasses issue (#7582) * Update ASR configs to support Python 3.11 Signe… * add more info and links to asr intro.rst Signed-off-by: Elena Rastorgueva * conversion issue fix (#7648) (#7668) Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * layernorm1p fix (#7523) (#7567) * layernorm1p fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add layernorm1p to if statement * config changes * gpt config changes * remove layernorm_zero_centered_gamma from gpt config * change line --------- Signed-off-by: dimapihtar Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * generalized chat sft prompt (#7655) * fix dataset issues Signed-off-by: Yi Dong * working version Signed-off-by: Yi Dong * all passed Signed-off-by: Yi Dong * refactor tests Signed-off-by: Yi Dong * all pass Signed-off-by: Yi Dong * working version Signed-off-by: Yi Dong * use end name signal for labels Signed-off-by: Yi Dong * all fixed Signed-off-by: Yi Dong * update doc Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * remove unused imports Signed-off-by: Yi Dong * make sure nccl not timing out Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * generate example template Signed-off-by: Yi Dong * generic end of name token Signed-off-by: Yi Dong * style fix Signed-off-by: Yi Dong * add the chat prompt format into the config Signed-off-by: Yi Dong * make sure sft working Signed-off-by: Yi Dong * address reviewer comment Signed-off-by: Yi Dong * fix non Signed-off-by: Yi Dong * try openAI prompt Signed-off-by: Yi Dong * remove unused imports Signed-off-by: Yi Dong * remove human labels from the data Signed-off-by: Yi Dong * use hf dataset to clean Signed-off-by: Yi Dong * reviewer comments Signed-off-by: Yi Dong --------- Signed-off-by: Yi Dong Signed-off-by: Elena Rastorgueva * Fix vad & speech command tutorial - onnx (#7671) (#7672) * fix vad onnx * fix mbn onnx --------- Signed-off-by: fayejf Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * put installation info back in README Signed-off-by: Elena Rastorgueva * remove installation info from docs, point to info in readme Signed-off-by: Elena Rastorgueva * tidy up readme a bit Signed-off-by: Elena Rastorgueva * tidy up starthere intro, update code Signed-off-by: Elena Rastorgueva * correct timestamps code snippet, other updates Signed-off-by: Elena Rastorgueva * remove old link placeholder Signed-off-by: Elena Rastorgueva * Fix in the confidence ensemble test (#7682) * Fix in the confidence ensemble test Signed-off-by: Igor Gitman * Correct parameter names Signed-off-by: Igor Gitman --------- Signed-off-by: Igor Gitman Signed-off-by: Elena Rastorgueva * remove example command to install pytorch Signed-off-by: Elena Rastorgueva * put back links for pretrained models and discussion board Signed-off-by: Elena Rastorgueva * change publications link to point to blog page Signed-off-by: Elena Rastorgueva * fix link formatting Signed-off-by: Elena Rastorgueva * PEFT eval fix (#7626) (#7638) * fix issue where peft weights are not loaded for distributed checkpoints * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Chen Cui Co-authored-by: Chen Cui Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * fix publications link formatting Signed-off-by: Elena Rastorgueva * propagate mp config (#7637) (#7639) Signed-off-by: eharper Co-authored-by: Eric Harper Signed-off-by: Elena Rastorgueva * Add find_unused_parameters_true for text_classiftn and punctuation_capitalization (#7649) (#7657) Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Hotfix (#7501) (#7568) Signed-off-by: Jan Baczek Co-authored-by: jbaczek <45043825+jbaczek@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Avoid duplicated checkpoint save (#7555) (#7566) Signed-off-by: Mikołaj Błaż Co-authored-by: mikolajblaz Signed-off-by: Elena Rastorgueva * Cache FP8 weight and transpose only at the first micro-batch in each validation and test routine (#7470) (#7483) * Cache weight and transpose only in the first batch in all training, val, and test runs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Elena Rastorgueva * Add an option to disable manual GC in validation (#7467) (#7476) Signed-off-by: Sangkug Lym Co-authored-by: Sangkug Lym Signed-off-by: Elena Rastorgueva * more natural phrasing Signed-off-by: Elena Rastorgueva * update gh-pages-src link to point to readme Signed-off-by: Elena Rastorgueva * remove PUBLICATIONS.md and update README Signed-off-by: Elena Rastorgueva * Update README.rst Remove old video, move PyData video to tutorials section Signed-off-by: Elena Rastorgueva * specify optional mac dependencies, add space in comment Signed-off-by: Elena Rastorgueva * update copyright year Signed-off-by: Elena Rastorgueva --------- Signed-off-by: Ryan Signed-off-by: Elena Rastorgueva Signed-off-by: Cheng-Ping Hsieh Signed-off-by: Hongbin Liu Signed-off-by: Tamerlan Tabolov Signed-off-by: smajumdar Signed-off-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Signed-off-by: Stas Bekman Signed-off-by: Robin Dong Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Jocelyn Huang Signed-off-by: Abhinav Khattar Signed-off-by: Abhishree Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com> Signed-off-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Signed-off-by: Nithin Rao Koluguri Signed-off-by: jasonwan Signed-off-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Signed-off-by: Adi Renduchintala Signed-off-by: arendu Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Signed-off-by: Chen Cui Signed-off-by: BestJuly Signed-off-by: Jan Lasek Signed-off-by: arendu Signed-off-by: GiacomoLeoneMaria Signed-off-by: dimapihtar Signed-off-by: George <37293288+Jorjeous@users.noreply.github.com> Signed-off-by: Jason Signed-off-by: Mehadi Hasan Menon Signed-off-by: Ante Jukić Signed-off-by: Maanu Grover Signed-off-by: Sasha Meister Signed-off-by: Jason Wang Signed-off-by: Tim Moon Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Signed-off-by: eharper Signed-off-by: Jimmy Zhang Signed-off-by: Sangkug Lym Signed-off-by: George Zelenfroynd Signed-off-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Signed-off-by: Anton Peganov Signed-off-by: Nikolay Karpov Signed-off-by: Samuele Cornell Signed-off-by: KunalDhawan Signed-off-by: Aleksandr Laptev Signed-off-by: mburchi Signed-off-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Signed-off-by: Gerald Shen Signed-off-by: Yi Dong Signed-off-by: Igor Gitman Signed-off-by: Jan Baczek Signed-off-by: Mikołaj Błaż Signed-off-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Co-authored-by: Ryan Langman Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maxime Burchi <60737204+burchim@users.noreply.github.com> Co-authored-by: Jan Lasek Co-authored-by: Kelvin Liu Co-authored-by: Hongbin Liu Co-authored-by: Tamerlan Tabolov Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Somshubra Majumdar Co-authored-by: Xuesong Yang <16880-xueyang@users.noreply.gitlab-master.nvidia.com> Co-authored-by: Stas Bekman Co-authored-by: Robin Dong Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Jocelyn Co-authored-by: Abhinav Khattar Co-authored-by: Giacomo Leone Maria Cavallini <72698188+GiacomoLeoneMaria@users.noreply.github.com> Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> Co-authored-by: Adi Renduchintala Co-authored-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> Co-authored-by: Nithin Rao Co-authored-by: meatybobby Co-authored-by: Chen Cui Co-authored-by: Marc Romeyn Co-authored-by: jasonwan Co-authored-by: hkelly33 <58792115+hkelly33@users.noreply.github.com> Co-authored-by: Yuanzhe Dong Co-authored-by: Cheng-Ping Hsieh Co-authored-by: Li Tao Co-authored-by: Igor Gitman Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Co-authored-by: George <37293288+Jorjeous@users.noreply.github.com> Co-authored-by: Jason Co-authored-by: Mehadi Hasan Menon Co-authored-by: Eric Harper Co-authored-by: anteju <108555623+anteju@users.noreply.github.com> Co-authored-by: Aleksandr Laptev Co-authored-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com> Co-authored-by: Maanu Grover <109391026+maanug-nv@users.noreply.github.com> Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Co-authored-by: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Co-authored-by: Jimmy Zhang Co-authored-by: Sangkug Lym Co-authored-by: PeganovAnton Co-authored-by: Nikolay Karpov Co-authored-by: Samuele Cornell Co-authored-by: Yang Zhang Co-authored-by: Kunal Dhawan Co-authored-by: Igor Gitman Co-authored-by: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com> Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com> Co-authored-by: jbaczek <45043825+jbaczek@users.noreply.github.com> Co-authored-by: mikolajblaz --- README.rst | 71 +++++--- docs/source/asr/asr_language_modeling.rst | 1 + docs/source/asr/datasets.rst | 2 + docs/source/asr/intro.rst | 189 ++++++++++++++++++--- docs/source/asr/resources.rst | 17 -- docs/source/asr/results.rst | 3 + docs/source/conf.py | 2 +- docs/source/starthere/intro.rst | 193 ++++++++-------------- 8 files changed, 283 insertions(+), 195 deletions(-) delete mode 100644 docs/source/asr/resources.rst diff --git a/README.rst b/README.rst index acc30a68e610..d05a9b63c642 100644 --- a/README.rst +++ b/README.rst @@ -58,7 +58,7 @@ State of the Art pretrained NeMo models are freely available on `HuggingFace Hub These models can be used to transcribe audio, synthesize speech, or translate text in just a few lines of code. We have extensive `tutorials `_ that -can all be run on `Google Colab `_. +can be run on `Google Colab `_. For advanced users that want to train NeMo models from scratch or finetune existing NeMo models we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. @@ -67,30 +67,14 @@ For scaling NeMo LLM training on Slurm clusters or public clouds, please see the The NM launcher has extensive recipes, scripts, utilities, and documentation for training NeMo LLMs and also has an `Autoconfigurator `_ which can be used to find the optimal model parallel configuration for training on a specific cluster. -Also see the two introductory videos below for a high level overview of NeMo. - -* Developing State-Of-The-Art Conversational AI Models in Three Lines of Code. -* NVIDIA NeMo: Toolkit for Conversational AI at PyData Yerevan 2022. - -|three_lines| |pydata| - -.. |pydata| image:: https://img.youtube.com/vi/J-P6Sczmas8/maxres3.jpg - :target: https://www.youtube.com/embed/J-P6Sczmas8?mute=0&start=14&autoplay=0 - :width: 600 - :alt: Develop Conversational AI Models in 3 Lines - -.. |three_lines| image:: https://img.youtube.com/vi/wBgpMf_KQVw/maxresdefault.jpg - :target: https://www.youtube.com/embed/wBgpMf_KQVw?mute=0&start=0&autoplay=0 - :width: 600 - :alt: Introduction at PyData@Yerevan 2022 - Key Features ------------ * Speech processing * `HuggingFace Space for Audio Transcription (File, Microphone and YouTube) `_ + * `Pretrained models `_ available in 14+ languages * `Automatic Speech Recognition (ASR) `_ - * Supported ASR models: ``_ + * Supported ASR `models `_: * Jasper, QuartzNet, CitriNet, ContextNet * Conformer-CTC, Conformer-Transducer, FastConformer-CTC, FastConformer-Transducer * Squeezeformer-CTC and Squeezeformer-Transducer @@ -101,7 +85,7 @@ Key Features * Hybrid Transducer/CTC * NeMo Original `Multi-blank Transducers `_ and `Token-and-Duration Transducers (TDT) `_ * Streaming/Buffered ASR (CTC/Transducer) - `Chunked Inference Examples `_ - * Cache-aware Streaming Conformer with multiple lookaheads - ``_ + * `Cache-aware Streaming Conformer `_ with multiple lookaheads. * Beam Search decoding * `Language Modelling for ASR (CTC and RNNT) `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer * `Support of long audios for Conformer with memory efficient local attention `_ @@ -113,8 +97,6 @@ Key Features * Clustering Diarizer: TitaNet, ECAPA_TDNN, SpeakerNet * Neural Diarizer: MSDD (Multi-scale Diarization Decoder) * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer - * `Pretrained models on different languages. `_: English, Spanish, German, Russian, Chinese, French, Italian, Polish, ... - * `NGC collection of pre-trained speech processing models. `_ * Natural Language Processing * `NeMo Megatron pre-training of Large Language Models `_ * `Neural Machine Translation (NMT) `_ @@ -151,7 +133,7 @@ Requirements 1) Python 3.10 or above 2) Pytorch 1.13.1 or above -3) NVIDIA GPU for training +3) NVIDIA GPU, if you intend to do model training Documentation ------------- @@ -178,6 +160,15 @@ Tutorials --------- A great way to start with NeMo is by checking `one of our tutorials `_. +You can also get a high-level overview of NeMo by watching the talk *NVIDIA NeMo: Toolkit for Conversational AI*, presented at PyData Yerevan 2022: + +|pydata| + +.. |pydata| image:: https://img.youtube.com/vi/J-P6Sczmas8/maxres3.jpg + :target: https://www.youtube.com/embed/J-P6Sczmas8?mute=0&start=14&autoplay=0 + :width: 600 + :alt: NeMo presentation at PyData@Yerevan 2022 + Getting help with NeMo ---------------------- FAQ can be found on NeMo's `Discussions board `_. You are welcome to ask questions or start discussions there. @@ -185,7 +176,6 @@ FAQ can be found on NeMo's `Discussions board `_ CONTRIBUTING.md for the process. +We welcome community contributions! Please refer to `CONTRIBUTING.md `_ for the process. Publications ------------ @@ -367,4 +384,4 @@ Please refer to the instructions in the `README of that branch `_. +NeMo is released under an `Apache 2.0 license `_. diff --git a/docs/source/asr/asr_language_modeling.rst b/docs/source/asr/asr_language_modeling.rst index a0d46ca795b1..bb823cb252c0 100644 --- a/docs/source/asr/asr_language_modeling.rst +++ b/docs/source/asr/asr_language_modeling.rst @@ -39,6 +39,7 @@ penalty term to consider the sequence length in the scores. Larger alpha means m importance on the acoustic model. Negative values for beta will give penalty to longer sequences and make the decoder to prefer shorter predictions, while positive values would result in longer candidates. +.. _train-ngram-lm: Train N-gram LM =============== diff --git a/docs/source/asr/datasets.rst b/docs/source/asr/datasets.rst index 05278ecb2437..61c34014c809 100644 --- a/docs/source/asr/datasets.rst +++ b/docs/source/asr/datasets.rst @@ -162,6 +162,8 @@ these files using ``--dest_folder``. In order to generate files in the supported After the script finishes, the ``train.json``, ``dev.json``, ``test.json``, and ``vocab.txt`` files can be found in the ``dest_folder`` directory. +.. _section-with-manifest-format-explanation: + Preparing Custom ASR Data ------------------------- diff --git a/docs/source/asr/intro.rst b/docs/source/asr/intro.rst index 46a192c546a2..7066c2989393 100644 --- a/docs/source/asr/intro.rst +++ b/docs/source/asr/intro.rst @@ -1,34 +1,156 @@ Automatic Speech Recognition (ASR) ================================== -ASR, or Automatic Speech Recognition, refers to the problem of getting a program to automatically transcribe spoken language -(speech-to-text). Our goal is usually to have a model that minimizes the Word Error Rate (WER) metric when transcribing speech input. -In other words, given some audio file (e.g. a WAV file) containing speech, how do we transform this into the corresponding text with -as few errors as possible? +Automatic Speech Recognition (ASR), also known as Speech To Text (STT), refers to the problem of automatically transcribing spoken language. +You can use NeMo to transcribe speech using open-sourced pretrained models in :ref:`14+ languages `, or :doc:`train your own<./examples/kinyarwanda_asr>` ASR models. -Traditional speech recognition takes a generative approach, modeling the full pipeline of how speech sounds are produced in order to -evaluate a speech sample. We would start from a language model that encapsulates the most likely orderings of words that are generated -(e.g. an n-gram model), to a pronunciation model for each word in that ordering (e.g. a pronunciation table), to an acoustic model that -translates those pronunciations to audio waveforms (e.g. a Gaussian Mixture Model). -Then, if we receive some spoken input, our goal would be to find the most likely sequence of text that would result in the given audio -according to our generative pipeline of models. Overall, with traditional speech recognition, we try to model ``Pr(audio|transcript)*Pr(transcript)``, -and take the argmax of this over possible transcripts. -Over time, neural nets advanced to the point where each component of the traditional speech recognition model could be replaced by a -neural model that had better performance and that had a greater potential for generalization. For example, we could replace an n-gram -model with a neural language model, and replace a pronunciation table with a neural pronunciation model, and so on. However, each of -these neural models need to be trained individually on different tasks, and errors in any model in the pipeline could throw off the -whole prediction. +Transcribe speech with 3 lines of code +---------------------------------------- +After :ref:`installing NeMo`, you can transcribe an audio file as follows: -Thus, we can see the appeal of end-to-end ASR architectures: discriminative models that simply take an audio input and give a textual -output, and in which all components of the architecture are trained together towards the same goal. The model's encoder would be -akin to an acoustic model for extracting speech features, which can then be directly piped to a decoder which outputs text. If desired, -we could integrate a language model that would improve our predictions, as well. +.. code-block:: python -And the entire end-to-end ASR model can be trained at once--a much easier pipeline to handle! + import nemo.collections.asr as nemo_asr + asr_model = nemo_asr.models.ASRModel.from_pretrained("stt_en_fastconformer_transducer_large") + transcript = asr_model.transcribe(["path/to/audio_file.wav"]) -A demo below allows evaluation of NeMo ASR models in multiple langauges from the browser: +Obtain word timestamps +^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can also obtain timestamps for each word in the transcription as follows: + +.. code-block:: python + + # import nemo_asr and instantiate asr_model as above + import nemo.collections.asr as nemo_asr + asr_model = nemo_asr.models.ASRModel.from_pretrained("stt_en_fastconformer_transducer_large") + + # update decoding config to preserve alignments and compute timestamps + from omegaconf import OmegaConf, open_dict + decoding_cfg = asr_model.cfg.decoding + with open_dict(decoding_cfg): + decoding_cfg.preserve_alignments = True + decoding_cfg.compute_timestamps = True + asr_model.change_decoding_strategy(decoding_cfg) + + # specify flag `return_hypotheses=True`` + hypotheses = asr_model.transcribe(["path/to/audio_file.wav"], return_hypotheses=True) + + # if hypotheses form a tuple (from RNNT), extract just "best" hypotheses + if type(hypotheses) == tuple and len(hypotheses) == 2: + hypotheses = hypotheses[0] + + timestamp_dict = hypotheses[0].timestep # extract timesteps from hypothesis of first (and only) audio file + print("Hypothesis contains following timestep information :", list(timestamp_dict.keys())) + + # For a FastConformer model, you can display the word timestamps as follows: + # 80ms is duration of a timestep at output of the Conformer + time_stride = 8 * asr_model.cfg.preprocessor.window_stride + + word_timestamps = timestamp_dict['word'] + + for stamp in word_timestamps: + start = stamp['start_offset'] * time_stride + end = stamp['end_offset'] * time_stride + word = stamp['char'] if 'char' in stamp else stamp['word'] + + print(f"Time : {start:0.2f} - {end:0.2f} - {word}") + +Transcribe speech via command line +---------------------------------- +You can also transcribe speech via the command line using the following `script `_, for example: + +.. code-block:: bash + + python /blob/main/examples/asr/transcribe_speech.py \ + pretrained_name="stt_en_fastconformer_transducer_large" \ + audio_dir= # path to dir containing audio files to transcribe + +The script will save all transcriptions in a JSONL file where each line corresponds to an audio file in ````. +This file will correspond to a format that NeMo commonly uses for saving model predictions, and also for storing +input data for training and evaluation. You can learn more about the format that NeMo uses for these files +(which we refer to as "manifest files") :ref:`here`. + +You can also specify the files to be transcribed inside a manifest file, and pass that in using the argument +``dataset_manifest=`` instead of ``audio_dir``. + + +Incorporate a language model (LM) to improve ASR transcriptions +--------------------------------------------------------------- + +You can often get a boost to transcription accuracy by using a Language Model to help choose words that are more likely +to be spoken in a sentence. + +You can get a good improvement in transcription accuracy even using a simple N-gram LM. + +After :ref:`training ` an N-gram LM, you can use it for transcribing audio as follows: + +1. Install the OpenSeq2Seq beam search decoding and KenLM libraries using `this script `_. +2. Perform transcription using `this script `_: + +.. code-block:: bash + + python eval_beamsearch_ngram.py nemo_model_file= \ + input_manifest= \ + beam_width=[] \ + beam_alpha=[] \ + beam_beta=[] \ + preds_output_folder= \ + probs_cache_file=null \ + decoding_mode=beamsearch_ngram \ + decoding_strategy="" + +See more information about LM decoding :doc:`here <./asr_language_modeling>`. + +Use real-time transcription +--------------------------- + +It is possible to use NeMo to transcribe speech in real-time. You can find an example of how to do +this in the following `notebook tutorial `_. + + +Try different ASR models +------------------------ + +NeMo offers a variety of open-sourced pretrained ASR models that vary by model architecture: + +* **encoder architecture** (FastConformer, Conformer, Citrinet, etc.), +* **decoder architecture** (Transducer, CTC & hybrid of the two), +* **size** of the model (small, medium, large, etc.). + +The pretrained models also vary by: + +* **language** (English, Spanish, etc., including some **multilingual** and **code-switching** models), +* whether the output text contains **punctuation & capitalization** or not. + +The NeMo ASR checkpoints can be found on `HuggingFace `_, or on `NGC `_. All models released by the NeMo team can be found on NGC, and some of those are also available on HuggingFace. + +All NeMo ASR checkpoints open-sourced by the NeMo team follow the following naming convention: +``stt_{language}_{encoder name}_{decoder name}_{model size}{_optional descriptor}``. + +You can load the checkpoints automatically using the ``ASRModel.from_pretrained()`` class method, for example: + +.. code-block:: python + + import nemo.collections.asr as nemo_asr + # model will be fetched from NGC + asr_model = nemo_asr.models.ASRModel.from_pretrained("stt_en_fastconformer_transducer_large") + # if model name is prepended with "nvidia/", the model will be fetched from huggingface + asr_model = nemo_asr.models.ASRModel.from_pretrained("nvidia/stt_en_fastconformer_transducer_large") + # you can also load open-sourced NeMo models released by other HF users using: + # asr_model = nemo_asr.models.ASRModel.from_pretrained("/") + +See further documentation about :doc:`loading checkpoints <./results>`, a full :ref:`list ` of models and their :doc:`benchmark scores <./score>`. + +There is also more information about the ASR model architectures available in NeMo :doc:`here <./models>`. + + +Try out NeMo ASR transcription in your browser +---------------------------------------------- +You can try out transcription with NeMo ASR models without leaving your browser, by using the HuggingFace Space embedded below. .. raw:: html @@ -40,10 +162,27 @@ A demo below allows evaluation of NeMo ASR models in multiple langauges from the -The full documentation tree is as follows: +ASR tutorial notebooks +---------------------- +Hands-on speech recognition tutorial notebooks can be found under `the ASR tutorials folder `_. +If you are a beginner to NeMo, consider trying out the `ASR with NeMo `_ tutorial. +This and most other tutorials can be run on Google Colab by specifying the link to the notebooks' GitHub pages on Colab. + +ASR model configuration +----------------------- +Documentation regarding the configuration files specific to the ``nemo_asr`` models can be found in the :doc:`Configuration Files <./configs>` section. + +Preparing ASR datasets +---------------------- +NeMo includes preprocessing scripts for several common ASR datasets. The :doc:`Datasets <./datasets>` section contains instructions on +running those scripts. It also includes guidance for creating your own NeMo-compatible dataset, if you have your own data. + +Further information +------------------- +For more information, see additional sections in the ASR docs on the left-hand-side menu or in the list below: .. toctree:: - :maxdepth: 8 + :maxdepth: 1 models datasets @@ -54,5 +193,3 @@ The full documentation tree is as follows: api resources examples/kinyarwanda_asr.rst - -.. include:: resources.rst diff --git a/docs/source/asr/resources.rst b/docs/source/asr/resources.rst deleted file mode 100644 index e192f5fbe83d..000000000000 --- a/docs/source/asr/resources.rst +++ /dev/null @@ -1,17 +0,0 @@ -Resources and Documentation ---------------------------- - -Hands-on speech recognition tutorial notebooks can be found under `the ASR tutorials folder `_. -If you are a beginner to NeMo, consider trying out the `ASR with NeMo `_ tutorial. -This and most other tutorials can be run on Google Colab by specifying the link to the notebooks' GitHub pages on Colab. - -If you are looking for information about a particular ASR model, or would like to find out more about the model -architectures available in the `nemo_asr` collection, refer to the :doc:`Models <./models>` section. - -NeMo includes preprocessing scripts for several common ASR datasets. The :doc:`Datasets <./datasets>` section contains instructions on -running those scripts. It also includes guidance for creating your own NeMo-compatible dataset, if you have your own data. - -Information about how to load model checkpoints (either local files or pretrained ones from NGC), as well as a list of the checkpoints -available on NGC are located on the :doc:`Checkpoints <./results>` section. - -Documentation regarding the configuration files specific to the ``nemo_asr`` models can be found on the :doc:`Configuration Files <./configs>` section. diff --git a/docs/source/asr/results.rst b/docs/source/asr/results.rst index 466393e9a55a..3dca110d89e4 100644 --- a/docs/source/asr/results.rst +++ b/docs/source/asr/results.rst @@ -157,6 +157,9 @@ Language Models for ASR | + +.. _asr-checkpoint-list-by-language: + Speech Recognition (Languages) ------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index c54defb59ce8..952e25332ca4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -149,7 +149,7 @@ # General information about the project. project = "NVIDIA NeMo" -copyright = "© 2021-2022 NVIDIA Corporation & Affiliates. All rights reserved." +copyright = "© 2021-2023 NVIDIA Corporation & Affiliates. All rights reserved." author = "NVIDIA CORPORATION" # The version info for the project you're documenting, acts as replacement for diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst index 9297b7ef53b3..e6a59b0832ab 100644 --- a/docs/source/starthere/intro.rst +++ b/docs/source/starthere/intro.rst @@ -10,7 +10,7 @@ Introduction `NVIDIA NeMo `_, part of the NVIDIA AI platform, is a toolkit for building new state-of-the-art conversational AI models. NeMo has separate collections for Automatic Speech Recognition (ASR), -Natural Language Processing (NLP), and Text-to-Speech (TTS) synthesis models. Each collection consists of +Natural Language Processing (NLP), and Text-to-Speech (TTS) models. Each collection consists of prebuilt modules that include everything needed to train on your data. Every module can easily be customized, extended, and composed to create new conversational AI model architectures. @@ -19,181 +19,126 @@ Conversational AI architectures are typically large and require a lot of data an for training. NeMo uses `PyTorch Lightning `_ for easy and performant multi-GPU/multi-node mixed-precision training. -`Pre-trained NeMo models. `_ - -Also see the two introductory videos below for a high level overview of NeMo. - -* Developing State-Of-The-Art Conversational AI Models in Three Lines of Code. -.. raw:: html - -
- -
- -* NVIDIA NeMo: Toolkit for Conversational AI at PyData Yerevan 2022. -.. image:: https://img.youtube.com/vi/J-P6Sczmas8/maxres3.jpg - :target: https://www.youtube.com/embed/J-P6Sczmas8?mute=0&start=14&autoplay=0 - :width: 560 - :alt: Develop Conversational AI Models in 3 Lines - -For more information and questions, visit the `NVIDIA NeMo Discussion Board `_. +`Pre-trained NeMo models `_ are available +in 14+ languages. Prerequisites ------------- Before you begin using NeMo, it's assumed you meet the following prerequisites. -#. You have Python version 3.9, 3.10. +#. You have Python version 3.10 or above. #. You have Pytorch version 1.13.1 or 2.0+. -#. You have access to an NVIDIA GPU for training. +#. You have access to an NVIDIA GPU, if you intend to do model training. .. _quick_start_guide: Quick Start Guide ----------------- -This NeMo Quick Start Guide is a starting point for users who want to try out NeMo; specifically, this guide enables users to quickly get started with the NeMo fundamentals by walking you through an example audio translator and voice swap. +You can try out NeMo's ASR, NLP and TTS functionality with the example below, which is based on the `Audio Translation `_ tutorial. -If you're new to NeMo, the best way to get started is to take a look at the following tutorials: - -* `Text Classification (Sentiment Analysis) `__ - demonstrates the Text Classification model using the NeMo NLP collection. -* `NeMo Primer `__ - introduces NeMo, PyTorch Lightning, and OmegaConf, and shows how to use, modify, save, and restore NeMo models. -* `NeMo Models `__ - explains the fundamental concepts of the NeMo model. -* `NeMo voice swap demo `__ - demonstrates how to swap a voice in the audio fragment with a computer generated one using NeMo. - -Below we is the code snippet of Audio Translator application. +Once you have :ref:`installed NeMo `, then you can run the code below: .. code-block:: python - # Import NeMo and it's ASR, NLP and TTS collections - import nemo - # Import Speech Recognition collection + # Import NeMo's ASR, NLP and TTS collections import nemo.collections.asr as nemo_asr - # Import Natural Language Processing colleciton import nemo.collections.nlp as nemo_nlp - # Import Speech Synthesis collection import nemo.collections.tts as nemo_tts - # Next, we instantiate all the necessary models directly from NVIDIA NGC - # Speech Recognition model - QuartzNet trained on Russian part of MCV 6.0 - quartznet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="stt_ru_quartznet15x5").cuda() - # Neural Machine Translation model - nmt_model = nemo_nlp.models.MTEncDecModel.from_pretrained(model_name='nmt_ru_en_transformer6x6').cuda() - # Spectrogram generator which takes text as an input and produces spectrogram - spectrogram_generator = nemo_tts.models.FastPitchModel.from_pretrained(model_name="tts_en_fastpitch").cuda() - # Vocoder model which takes spectrogram and produces actual audio - vocoder = nemo_tts.models.HifiGanModel.from_pretrained(model_name="tts_en_hifigan").cuda() - # Transcribe an audio file - # IMPORTANT: The audio must be mono with 16Khz sampling rate - # Get example from: https://nemo-public.s3.us-east-2.amazonaws.com/mcv-samples-ru/common_voice_ru_19034087.wav - russian_text = quartznet.transcribe(['Path_to_audio_file']) - print(russian_text) - # You should see russian text here. Let's translate it to English - english_text = nmt_model.translate(russian_text) - print(english_text) - # After this you should see English translation - # Let's convert it into audio - # A helper function which combines FastPitch and HiFiGAN to go directly from - # text to audio - def text_to_audio(text): - parsed = spectrogram_generator.parse(text) - spectrogram = spectrogram_generator.generate_spectrogram(tokens=parsed) - audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) - return audio.to('cpu').numpy() - audio = text_to_audio(english_text[0]) + # Download an audio file that we will transcribe, translate, and convert the written translation to speech + import wget + wget.download("https://nemo-public.s3.us-east-2.amazonaws.com/zh-samples/common_voice_zh-CN_21347786.mp3") + # Instantiate a Mandarin speech recognition model and transcribe an audio file. + asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="stt_zh_citrinet_1024_gamma_0_25") + mandarin_text = asr_model.transcribe(['common_voice_zh-CN_21347786.mp3']) + print(mandarin_text) -Installation ------------- + # Instantiate Neural Machine Translation model and translate the text + nmt_model = nemo_nlp.models.MTEncDecModel.from_pretrained(model_name="nmt_zh_en_transformer24x6") + english_text = nmt_model.translate(mandarin_text) + print(english_text) -Pip -~~~ -Use this installation mode if you want the latest released version. + # Instantiate a spectrogram generator (which converts text -> spectrogram) + # and vocoder model (which converts spectrogram -> audio waveform) + spectrogram_generator = nemo_tts.models.FastPitchModel.from_pretrained(model_name="tts_en_fastpitch") + vocoder = nemo_tts.models.HifiGanModel.from_pretrained(model_name="tts_en_hifigan") -.. code-block:: bash + # Parse the text input, generate the spectrogram, and convert it to audio + parsed_text = spectrogram_generator.parse(english_text[0]) + spectrogram = spectrogram_generator.generate_spectrogram(tokens=parsed_text) + audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) - apt-get update && apt-get install -y libsndfile1 ffmpeg - pip install Cython - pip install nemo_toolkit[all] + # Save the audio to a file + import soundfile as sf + sf.write("output_audio.wav", audio.to('cpu').detach().numpy()[0], 22050) -Pip from source -~~~~~~~~~~~~~~~ -Use this installation mode if you want the version from a particular GitHub branch (for example, ``main``). +You can learn more by about specific tasks you are interested in by checking out the NeMo :doc:`tutorials <./tutorials>`, or documentation (e.g. read :doc:`here <../asr/intro>` to learn more about ASR). -.. code-block:: bash +You can also learn more about NeMo in the `NeMo Primer `_ tutorial, which introduces NeMo, PyTorch Lightning, and OmegaConf, and shows how to use, modify, save, and restore NeMo models. Additionally, the `NeMo Models `__ tutorial explains the fundamentals of how NeMo models are created. These concepts are also explained in detail in the :doc:`NeMo Core <../core/core>` documentation. - apt-get update && apt-get install -y libsndfile1 ffmpeg - pip install Cython - python -m pip install git+https://github.com/NVIDIA/NeMo.git@{BRANCH}#egg=nemo_toolkit[all] - # For v1.0.2, replace {BRANCH} with v1.0.2 like so: - # python -m pip install git+https://github.com/NVIDIA/NeMo.git@v1.0.2#egg=nemo_toolkit[all] -From source -~~~~~~~~~~~ -Use this installation mode if you are contributing to NeMo. +Introductory videos +------------------- -.. code-block:: bash - - apt-get update && apt-get install -y libsndfile1 ffmpeg - git clone https://github.com/NVIDIA/NeMo - cd NeMo - ./reinstall.sh - -Docker containers -~~~~~~~~~~~~~~~~~ -To build a nemo container with Dockerfile from a branch, please run - -.. code-block:: bash +See the two introductory videos below for a high level overview of NeMo. - DOCKER_BUILDKIT=1 docker build -f Dockerfile -t nemo:latest. +**Developing State-Of-The-Art Conversational AI Models in Three Lines of Code** +.. raw:: html -If you chose to work with the ``main`` branch, we recommend using `NVIDIA's PyTorch container version 21.05-py3 `_, then install from GitHub. +
+ +
-.. code-block:: bash +**NVIDIA NeMo: Toolkit for Conversational AI at PyData Yerevan 2022** - docker run --gpus all -it --rm -v :/NeMo --shm-size=8g \ - -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \ - stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.05-py3 +.. raw:: html -.. _mac-installation: +
+ +
-Mac computers with Apple silicon -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To install NeMo on Mac with Apple M-Series GPU: +.. _installation: -- install `Homebrew `_ package manager +Installation +------------ -- create a new Conda environment +The simplest way to install NeMo is via pip, see info below. -- install PyTorch 2.0 or higher +.. note:: Full NeMo installation instructions (with more ways to install NeMo, and how to handle optional dependencies) can be found in the `GitHub README `_. -- run the following code: +Conda +~~~~~ -.. code-block:: shell +We recommend installing NeMo in a fresh Conda environment. - # install mecab using Homebrew, required for sacrebleu for NLP collection - brew install mecab +.. code-block:: bash - # install pynini using Conda, required for text normalization - conda install -c conda-forge pynini + conda create --name nemo python==3.10.12 + conda activate nemo - # install Cython manually - pip install cython +Install PyTorch using their `configurator `_. - # clone the repo and install in development mode - git clone https://github.com/NVIDIA/NeMo - cd NeMo - ./reinstall.sh +Pip +~~~ +Use this installation mode if you want the latest released version. +.. code-block:: bash + apt-get update && apt-get install -y libsndfile1 ffmpeg + pip install Cython + pip install nemo_toolkit['all'] -`FAQ `_ ---------------------------------------------------- -Have a look at our `discussions board `_ and feel free to post a question or start a discussion. +Depending on the shell used, you may need to use ``"nemo_toolkit[all]"`` instead in the above command. +Discussion board +---------------- +For more information and questions, visit the `NVIDIA NeMo Discussion Board `_. Contributing ------------ @@ -203,4 +148,4 @@ We welcome community contributions! Refer to the `CONTRIBUTING.md `_. \ No newline at end of file +NeMo is released under an `Apache 2.0 license `_. \ No newline at end of file From 5f35a8c084dc34eaa01a6c4de2fd3b7369fbe3f2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 08:52:18 -0700 Subject: [PATCH 328/512] fix onnx (#7703) (#7704) Signed-off-by: fayejf Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com> --- tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 58dfd448f396..412860045cb0 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -774,7 +774,7 @@ "model.eval()\n", "infer_datalayer = create_infer_dataloader(model, queries)\n", "\n", - "ort_session = onnxruntime.InferenceSession(config.export_to)\n", + "ort_session = onnxruntime.InferenceSession(config.export_to, providers=['CPUExecutionProvider'])\n", "\n", "for batch in infer_datalayer:\n", " input_ids, input_type_ids, input_mask, subtokens_mask = batch\n", From 29910cde068df62644f173355b8daee16adf8ad4 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Thu, 12 Oct 2023 17:53:44 -0400 Subject: [PATCH 329/512] move core install to /workspace (#7706) Signed-off-by: Abhinav Khattar --- Dockerfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7d1539ad2db3..06f96a091a22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,6 +43,11 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* WORKDIR /workspace/ +# install megatron core, this can be removed once 0.3 pip package is released +RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ + cd Megatron-LM && \ + git checkout ab0336a5c8eab77aa74ae604ba1e73decbf6d560 && \ + pip install -e . WORKDIR /tmp/ @@ -52,12 +57,6 @@ RUN git clone https://github.com/NVIDIA/apex.git && \ git checkout 52e18c894223800cb611682dce27d88050edf1de && \ pip3 install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ -# install megatron core, this can be removed once 0.3 pip package is released -RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \ - cd Megatron-LM && \ - git checkout ab0336a5c8eab77aa74ae604ba1e73decbf6d560 && \ - pip install -e . - # uninstall stuff from base container RUN pip3 uninstall -y sacrebleu torchtext From aa3a9773262d9f91bb102516b43ee3e651a96885 Mon Sep 17 00:00:00 2001 From: anteju <108555623+anteju@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:48:53 -0700 Subject: [PATCH 330/512] Fix typo in audio codec config, encoder target (#7697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ante Jukić --- examples/tts/conf/audio_codec/audio_codec_24000.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tts/conf/audio_codec/audio_codec_24000.yaml b/examples/tts/conf/audio_codec/audio_codec_24000.yaml index 14e6fe236545..7d90392767ea 100644 --- a/examples/tts/conf/audio_codec/audio_codec_24000.yaml +++ b/examples/tts/conf/audio_codec/audio_codec_24000.yaml @@ -112,7 +112,7 @@ model: num_workers: 2 audio_encoder: - _target_: nemo.collections.tts.modules.encodec_modules.HifiGanEncoder + _target_: nemo.collections.tts.modules.encodec_modules.SEANetEncoder down_sample_rates: ${down_sample_rates} audio_decoder: From eab0f54d4a9106f12eeb237a6ba2acae9eef3571 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 20:33:43 -0700 Subject: [PATCH 331/512] Replace strategy='dp'/None with 'auto' (#7681) (#7696) * Add strategy=auto for None and dp * Change strategy from None to auto --------- Signed-off-by: Abhishree Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> --- examples/nlp/dialogue/dialogue.py | 2 +- .../nlp/text_classification/text_classification_with_bert.py | 2 +- .../nlp/token_classification/token_classification_train.py | 2 +- tutorials/asr/ASR_with_Subword_Tokenization.ipynb | 4 ++-- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../speaker_tasks/Speaker_Identification_Verification.ipynb | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/nlp/dialogue/dialogue.py b/examples/nlp/dialogue/dialogue.py index 0da5ae5717da..de91b60d1ed3 100644 --- a/examples/nlp/dialogue/dialogue.py +++ b/examples/nlp/dialogue/dialogue.py @@ -68,7 +68,7 @@ def main(cfg: DictConfig) -> None: try: strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=True,) except (ImportError, ModuleNotFoundError): - strategy = None + strategy = 'auto' trainer = pl.Trainer(**cfg.trainer, strategy=strategy) diff --git a/examples/nlp/text_classification/text_classification_with_bert.py b/examples/nlp/text_classification/text_classification_with_bert.py index 2deb3b34c444..01e8fae9bba5 100644 --- a/examples/nlp/text_classification/text_classification_with_bert.py +++ b/examples/nlp/text_classification/text_classification_with_bert.py @@ -111,7 +111,7 @@ def main(cfg: DictConfig) -> None: try: strategy = NLPDDPStrategy(find_unused_parameters=True) except (ImportError, ModuleNotFoundError): - strategy = None + strategy = 'auto' trainer = pl.Trainer(strategy=strategy, **cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/examples/nlp/token_classification/token_classification_train.py b/examples/nlp/token_classification/token_classification_train.py index 51983a1af98b..56c1487cf9c5 100644 --- a/examples/nlp/token_classification/token_classification_train.py +++ b/examples/nlp/token_classification/token_classification_train.py @@ -105,7 +105,7 @@ def main(cfg: DictConfig) -> None: try: strategy = NLPDDPStrategy(find_unused_parameters=True) except (ImportError, ModuleNotFoundError): - strategy = None + strategy = 'auto' trainer = pl.Trainer(strategy=strategy, **cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index fdb83d9a0dac..92dcc2305e89 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -1374,7 +1374,7 @@ "trainer = pl.Trainer(amp_level='O1', precision=16)\r\n", "\r\n", "# Trainer with a distributed backend:\r\n", - "trainer = pl.Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='dp')\r\n", + "trainer = pl.Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='auto')\r\n", "\r\n", "# Of course, you can combine these flags as well.\r\n", "```\r\n", @@ -1443,4 +1443,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 0e9579555408..cc6c4956b8a5 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -646,7 +646,7 @@ "\n", "```python\n", "# Trainer with a distributed backend:\n", - "trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='dp')\n", + "trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='auto')\n", "\n", "# Mixed precision:\n", "trainer = Trainer(amp_level='O1', precision=16)\n", diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 6898b0164461..0e5a59312506 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -678,7 +678,7 @@ "\n", "```python\n", "# Trainer with a distributed backend:\n", - "trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='dp')\n", + "trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='auto')\n", "\n", "# Mixed precision:\n", "trainer = Trainer(amp_level='O1', precision=16)\n", diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index 211c78d45342..af95c7707828 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -357,7 +357,7 @@ "config.trainer.precision = 16\n", "config.trainer.devices = [0] # 0 for CPU, or list of the GPUs to use [0] this tutorial does not support multiple GPUs. If needed please use NeMo/examples/nlp/question_answering/question_answering.py\n", "config.trainer.accelerator = \"gpu\"\n", - "config.trainer.strategy=\"dp\"" + "config.trainer.strategy=\"auto\"" ] }, { diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index 954a84fa44ac..f92864ae3306 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -637,7 +637,7 @@ "We can dramatically improve the time taken to train this model by using Multi GPU training along with Mixed Precision.\n", "\n", "### Trainer with a distributed backend:\n", - "
trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='dp')\n",
+    "
trainer = Trainer(devices=2, num_nodes=2, accelerator='gpu', strategy='auto')\n",
     "
\n", "\n", "### Mixed precision:\n", From 233e62b733ac9b216cc9e0f85f72493033475e42 Mon Sep 17 00:00:00 2001 From: anteju <108555623+anteju@users.noreply.github.com> Date: Thu, 12 Oct 2023 21:40:57 -0700 Subject: [PATCH 332/512] [ASR] Multichannel mask estimator with flex number of channels (#7317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding a mask estimator which can process an arbitrary number of channels Signed-off-by: Ante Jukić * Bypass failing tests + mark as pleasefixme Signed-off-by: Ante Jukić --------- Signed-off-by: Ante Jukić --- .../conf/beamforming_flex_channels.yaml | 146 ++++ examples/audio_tasks/process_audio.py | 9 + nemo/collections/asr/data/audio_to_audio.py | 2 +- nemo/collections/asr/losses/audio_losses.py | 8 +- .../asr/models/audio_to_audio_model.py | 19 +- .../asr/models/enhancement_models.py | 53 +- nemo/collections/asr/modules/__init__.py | 7 +- nemo/collections/asr/modules/audio_modules.py | 630 ++++++++++++-- .../asr/modules/audio_preprocessing.py | 4 +- .../parts/submodules/multichannel_modules.py | 780 ++++++++++++++++++ .../asr/parts/utils/audio_utils.py | 6 +- requirements/requirements_asr.txt | 2 +- .../test_asr_part_submodules_multichannel.py | 157 ++++ .../asr/test_asr_rnnt_encdec_model.py | 2 + tests/collections/asr/test_audio_modules.py | 67 ++ .../collections/asr/test_conformer_encoder.py | 1 + 16 files changed, 1779 insertions(+), 114 deletions(-) create mode 100644 examples/audio_tasks/conf/beamforming_flex_channels.yaml create mode 100644 nemo/collections/asr/parts/submodules/multichannel_modules.py create mode 100644 tests/collections/asr/test_asr_part_submodules_multichannel.py diff --git a/examples/audio_tasks/conf/beamforming_flex_channels.yaml b/examples/audio_tasks/conf/beamforming_flex_channels.yaml new file mode 100644 index 000000000000..29fc87acf93d --- /dev/null +++ b/examples/audio_tasks/conf/beamforming_flex_channels.yaml @@ -0,0 +1,146 @@ +# This configuration contains the exemplary values for training a multichannel speech enhancement model with a mask-based beamformer. +# +name: beamforming_flex_channels + +model: + sample_rate: 16000 + skip_nan_grad: false + num_outputs: 1 + + train_ds: + manifest_filepath: ??? + input_key: audio_filepath # key of the input signal path in the manifest + input_channel_selector: null # load all channels from the input file + target_key: target_anechoic_filepath # key of the target signal path in the manifest + target_channel_selector: 0 # load only the first channel from the target file + audio_duration: 4.0 # in seconds, audio segment duration for training + random_offset: true # if the file is longer than audio_duration, use random offset to select a subsegment + min_duration: ${model.train_ds.audio_duration} + batch_size: 16 # batch size may be increased based on the available memory + shuffle: true + num_workers: 16 + pin_memory: true + + validation_ds: + manifest_filepath: ??? + input_key: audio_filepath # key of the input signal path in the manifest + input_channel_selector: null # load all channels from the input file + target_key: target_anechoic_filepath # key of the target signal path in the manifest + target_channel_selector: 0 # load only the first channel from the target file + batch_size: 8 + shuffle: false + num_workers: 8 + pin_memory: true + + channel_augment: + _target_: nemo.collections.asr.parts.submodules.multichannel_modules.ChannelAugment + num_channels_min: 2 # minimal number of channels selected for each batch + num_channels_max: null # max number of channels is determined by the batch size + permute_channels: true + + encoder: + _target_: nemo.collections.asr.modules.audio_preprocessing.AudioToSpectrogram + fft_length: 512 # Length of the window and FFT for calculating spectrogram + hop_length: 256 # Hop length for calculating spectrogram + + decoder: + _target_: nemo.collections.asr.modules.audio_preprocessing.SpectrogramToAudio + fft_length: ${model.encoder.fft_length} + hop_length: ${model.encoder.hop_length} + + mask_estimator: + _target_: nemo.collections.asr.modules.audio_modules.MaskEstimatorFlexChannels + num_outputs: ${model.num_outputs} # number of output masks + num_subbands: 257 # number of subbands for the input spectrogram + num_blocks: 5 # number of blocks in the model + channel_reduction_position: 3 # 0-indexed, apply channel reduction before this block + channel_reduction_type: average # channel-wise reduction + channel_block_type: transform_average_concatenate # channel block + temporal_block_type: conformer_encoder # temporal block + temporal_block_num_layers: 5 # number of layers for the temporal block + temporal_block_num_heads: 4 # number of heads for the temporal block + temporal_block_dimension: 128 # the hidden size of the temporal block + mag_reduction: null # channel-wise reduction of magnitude + mag_normalization: mean_var # normalization using mean and variance + use_ipd: true # use inter-channel phase difference + ipd_normalization: mean # mean normalization + + mask_processor: + # Mask-based multi-channel processor + _target_: nemo.collections.asr.modules.audio_modules.MaskBasedBeamformer + filter_type: pmwf # parametric multichannel wiener filter + filter_beta: 0.0 # mvdr + filter_rank: one + ref_channel: max_snr # select reference channel by maximizing estimated SNR + ref_hard: 1 # a one-hot reference. If false, a soft estimate across channels is used. + ref_hard_use_grad: false # use straight-through gradient when using hard reference + ref_subband_weighting: false # use subband weighting for reference estimation + num_subbands: ${model.mask_estimator.num_subbands} + + loss: + _target_: nemo.collections.asr.losses.SDRLoss + convolution_invariant: true # convolution-invariant loss + sdr_max: 30 # soft threshold for SDR + + metrics: + val: + sdr_0: + _target_: torchmetrics.audio.SignalDistortionRatio + channel: 0 # evaluate only on channel 0, if there are multiple outputs + + optim: + name: adamw + lr: 1e-4 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: CosineAnnealing + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 + +trainer: + devices: -1 # number of GPUs, -1 would use all available GPUs + num_nodes: 1 + max_epochs: -1 + max_steps: -1 # computed at runtime if not set + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + gradient_clip_val: null + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + log_every_n_steps: 25 # Interval of logging. + enable_progress_bar: true + num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs + sync_batchnorm: true + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + # in case of multiple validation sets, first one is used + monitor: "val_loss" + mode: "min" + save_top_k: 5 + always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints + + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.pyth + # you need to set these two to true to continue the training + resume_if_exists: false + resume_ignore_no_checkpoint: false + + # You may use this section to create a W&B logger + create_wandb_logger: false + wandb_logger_kwargs: + name: null + project: null diff --git a/examples/audio_tasks/process_audio.py b/examples/audio_tasks/process_audio.py index 20650d8a8c3c..e73831fe7a5f 100644 --- a/examples/audio_tasks/process_audio.py +++ b/examples/audio_tasks/process_audio.py @@ -37,6 +37,7 @@ pretrained_name: name of a pretrained AudioToAudioModel model (from NGC registry) audio_dir: path to directory with audio files dataset_manifest: path to dataset JSON manifest file (in NeMo format) + max_utts: maximum number of utterances to process input_channel_selector: list of channels to take from audio files, defaults to `None` and takes all available channels input_key: key for audio filepath in the manifest file, defaults to `audio_filepath` @@ -80,6 +81,7 @@ class ProcessConfig: pretrained_name: Optional[str] = None # Name of a pretrained model audio_dir: Optional[str] = None # Path to a directory which contains audio files dataset_manifest: Optional[str] = None # Path to dataset's JSON manifest + max_utts: Optional[int] = None # max number of utterances to process # Audio configs input_channel_selector: Optional[List] = None # Union types not supported Optional[Union[List, int]] @@ -171,6 +173,10 @@ def main(cfg: ProcessConfig) -> ProcessConfig: audio_file = manifest_dir / audio_file filepaths.append(str(audio_file.absolute())) + if cfg.max_utts is not None: + # Limit the number of utterances to process + filepaths = filepaths[: cfg.max_utts] + logging.info(f"\nProcessing {len(filepaths)} files...\n") # setup AMP (optional) @@ -225,6 +231,9 @@ def autocast(): item = json.loads(line) item['processed_audio_filepath'] = paths2processed_files[idx] f.write(json.dumps(item) + "\n") + + if cfg.max_utts is not None and idx >= cfg.max_utts - 1: + break else: for idx, processed_file in enumerate(paths2processed_files): item = {'processed_audio_filepath': processed_file} diff --git a/nemo/collections/asr/data/audio_to_audio.py b/nemo/collections/asr/data/audio_to_audio.py index 9f9eda7c865a..a3c6dd0cc1b3 100644 --- a/nemo/collections/asr/data/audio_to_audio.py +++ b/nemo/collections/asr/data/audio_to_audio.py @@ -636,7 +636,7 @@ def get_duration(audio_files: List[str]) -> List[float]: Returns: List of durations in seconds. """ - duration = [librosa.get_duration(filename=f) for f in flatten(audio_files)] + duration = [librosa.get_duration(path=f) for f in flatten(audio_files)] return duration def load_embedding(self, example: collections.Audio.OUTPUT_TYPE) -> Dict[str, torch.Tensor]: diff --git a/nemo/collections/asr/losses/audio_losses.py b/nemo/collections/asr/losses/audio_losses.py index 34c73a23d7b8..62ce4a9f7edd 100644 --- a/nemo/collections/asr/losses/audio_losses.py +++ b/nemo/collections/asr/losses/audio_losses.py @@ -121,8 +121,8 @@ def convolution_invariant_target( input_length: Optional[torch.Tensor] = None, mask: Optional[torch.Tensor] = None, filter_length: int = 512, - diag_reg: float = 1e-8, - eps: float = 1e-10, + diag_reg: float = 1e-6, + eps: float = 1e-8, ) -> torch.Tensor: """Calculate optimal convolution-invariant target for a given estimate. Assumes time dimension is the last dimension in the array. @@ -222,7 +222,7 @@ def calculate_sdr_batch( convolution_filter_length: Optional[int] = 512, remove_mean: bool = True, sdr_max: Optional[float] = None, - eps: float = 1e-10, + eps: float = 1e-8, ) -> torch.Tensor: """Calculate signal-to-distortion ratio per channel. @@ -310,7 +310,7 @@ def __init__( convolution_filter_length: Optional[int] = 512, remove_mean: bool = True, sdr_max: Optional[float] = None, - eps: float = 1e-10, + eps: float = 1e-8, ): super().__init__() diff --git a/nemo/collections/asr/models/audio_to_audio_model.py b/nemo/collections/asr/models/audio_to_audio_model.py index 21860cf8ab56..49364843e8b8 100644 --- a/nemo/collections/asr/models/audio_to_audio_model.py +++ b/nemo/collections/asr/models/audio_to_audio_model.py @@ -121,15 +121,24 @@ def on_test_start(self): return super().on_test_start() def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): - return self.evaluation_step(batch, batch_idx, dataloader_idx, 'val') + output_dict = self.evaluation_step(batch, batch_idx, dataloader_idx, 'val') + if isinstance(self.trainer.val_dataloaders, (list, tuple)) and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(output_dict) + else: + self.validation_step_outputs.append(output_dict) + return output_dict def test_step(self, batch, batch_idx, dataloader_idx=0): - return self.evaluation_step(batch, batch_idx, dataloader_idx, 'test') + output_dict = self.evaluation_step(batch, batch_idx, dataloader_idx, 'test') + if isinstance(self.trainer.test_dataloaders, (list, tuple)) and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(output_dict) + else: + self.test_step_outputs.append(output_dict) + return output_dict def multi_evaluation_epoch_end(self, outputs, dataloader_idx: int = 0, tag: str = 'val'): # Handle loss loss_mean = torch.stack([x[f'{tag}_loss'] for x in outputs]).mean() - output_dict = {f'{tag}_loss': loss_mean} tensorboard_logs = {f'{tag}_loss': loss_mean} # Handle metrics for this tag and dataloader_idx @@ -141,9 +150,7 @@ def multi_evaluation_epoch_end(self, outputs, dataloader_idx: int = 0, tag: str # Store for logs tensorboard_logs[f'{tag}_{name}'] = value - output_dict['log'] = tensorboard_logs - - return output_dict + return {f'{tag}_loss': loss_mean, 'log': tensorboard_logs} def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): return self.multi_evaluation_epoch_end(outputs, dataloader_idx, 'val') diff --git a/nemo/collections/asr/models/enhancement_models.py b/nemo/collections/asr/models/enhancement_models.py index a441c6f7a8b0..7cc5c3d8459f 100644 --- a/nemo/collections/asr/models/enhancement_models.py +++ b/nemo/collections/asr/models/enhancement_models.py @@ -61,14 +61,24 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): self.decoder = EncMaskDecAudioToAudioModel.from_config_dict(self._cfg.decoder) if 'mixture_consistency' in self._cfg: + logging.debug('Using mixture consistency') self.mixture_consistency = EncMaskDecAudioToAudioModel.from_config_dict(self._cfg.mixture_consistency) else: + logging.debug('Mixture consistency not used') self.mixture_consistency = None # Future enhancement: # If subclasses need to modify the config before calling super() # Check ASRBPE* classes do with their mixin + # Setup augmentation + if hasattr(self.cfg, 'channel_augment') and self.cfg.channel_augment is not None: + logging.debug('Using channel augmentation') + self.channel_augmentation = EncMaskDecAudioToAudioModel.from_config_dict(self.cfg.channel_augment) + else: + logging.debug('Channel augmentation not used') + self.channel_augmentation = None + # Setup optional Optimization flags self.setup_optimization_flags() @@ -125,7 +135,7 @@ def process( temporary_manifest_filepath = os.path.join(tmpdir, 'manifest.json') with open(temporary_manifest_filepath, 'w', encoding='utf-8') as fp: for audio_file in paths2audio_files: - entry = {'input_filepath': audio_file, 'duration': librosa.get_duration(filename=audio_file)} + entry = {'input_filepath': audio_file, 'duration': librosa.get_duration(path=audio_file)} fp.write(json.dumps(entry) + '\n') config = { @@ -397,17 +407,23 @@ def training_step(self, batch, batch_idx): if target_signal.ndim == 2: target_signal = target_signal.unsqueeze(1) + # Apply channel augmentation + if self.training and self.channel_augmentation is not None: + input_signal = self.channel_augmentation(input=input_signal) + + # Process input processed_signal, _ = self.forward(input_signal=input_signal, input_length=input_length) - loss_value = self.loss(estimate=processed_signal, target=target_signal, input_length=input_length) + # Calculate the loss + loss = self.loss(estimate=processed_signal, target=target_signal, input_length=input_length) - tensorboard_logs = { - 'train_loss': loss_value, - 'learning_rate': self._optimizer.param_groups[0]['lr'], - 'global_step': torch.tensor(self.trainer.global_step, dtype=torch.float32), - } + # Logs + self.log('train_loss', loss) + self.log('learning_rate', self._optimizer.param_groups[0]['lr']) + self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32)) - return {'loss': loss_value, 'log': tensorboard_logs} + # Return loss + return loss def evaluation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = 'val'): input_signal, input_length, target_signal, target_length = batch @@ -419,11 +435,11 @@ def evaluation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = if target_signal.ndim == 2: target_signal = target_signal.unsqueeze(1) + # Process input processed_signal, _ = self.forward(input_signal=input_signal, input_length=input_length) - # Prepare output - loss_value = self.loss(estimate=processed_signal, target=target_signal, input_length=input_length) - output_dict = {f'{tag}_loss': loss_value} + # Calculate the loss + loss = self.loss(estimate=processed_signal, target=target_signal, input_length=input_length) # Update metrics if hasattr(self, 'metrics') and tag in self.metrics: @@ -432,19 +448,10 @@ def evaluation_step(self, batch, batch_idx, dataloader_idx: int = 0, tag: str = metric.update(preds=processed_signal, target=target_signal, input_length=input_length) # Log global step - self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32), sync_dist=True) + self.log('global_step', torch.tensor(self.trainer.global_step, dtype=torch.float32)) - if tag == 'val': - if isinstance(self.trainer.val_dataloaders, (list, tuple)) and len(self.trainer.val_dataloaders) > 1: - self.validation_step_outputs[dataloader_idx].append(output_dict) - else: - self.validation_step_outputs.append(output_dict) - else: - if isinstance(self.trainer.test_dataloaders, (list, tuple)) and len(self.trainer.test_dataloaders) > 1: - self.test_step_outputs[dataloader_idx].append(output_dict) - else: - self.test_step_outputs.append(output_dict) - return output_dict + # Return loss + return {f'{tag}_loss': loss} @classmethod def list_available_models(cls) -> Optional[PretrainedModelInfo]: diff --git a/nemo/collections/asr/modules/__init__.py b/nemo/collections/asr/modules/__init__.py index ecd430b56e6c..0265d9e30687 100644 --- a/nemo/collections/asr/modules/__init__.py +++ b/nemo/collections/asr/modules/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.collections.asr.modules.audio_modules import MaskBasedBeamformer, MaskEstimatorRNN, MaskReferenceChannel +from nemo.collections.asr.modules.audio_modules import ( + MaskBasedBeamformer, + MaskEstimatorFlexChannels, + MaskEstimatorRNN, + MaskReferenceChannel, +) from nemo.collections.asr.modules.audio_preprocessing import ( AudioToMelSpectrogramPreprocessor, AudioToMFCCPreprocessor, diff --git a/nemo/collections/asr/modules/audio_modules.py b/nemo/collections/asr/modules/audio_modules.py index e2218d2118cf..3abbd287051a 100644 --- a/nemo/collections/asr/modules/audio_modules.py +++ b/nemo/collections/asr/modules/audio_modules.py @@ -12,35 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional, Tuple +from typing import Dict, List, Optional, Tuple import numpy as np import torch +from nemo.collections.asr.losses.audio_losses import temporal_mean +from nemo.collections.asr.modules.conformer_encoder import ConformerEncoder from nemo.collections.asr.parts.preprocessing.features import make_seq_mask_like +from nemo.collections.asr.parts.submodules.multichannel_modules import ( + ChannelAttentionPool, + ChannelAveragePool, + ParametricMultichannelWienerFilter, + TransformAttendConcatenate, + TransformAverageConcatenate, +) from nemo.collections.asr.parts.utils.audio_utils import db2mag, wrap_to_pi from nemo.core.classes import NeuralModule, typecheck from nemo.core.neural_types import FloatType, LengthsType, NeuralType, SpectrogramType from nemo.utils import logging from nemo.utils.decorators import experimental -try: - import torchaudio - - HAVE_TORCHAUDIO = True -except ModuleNotFoundError: - HAVE_TORCHAUDIO = False - - __all__ = [ 'MaskEstimatorRNN', + 'MaskEstimatorFlexChannels', 'MaskReferenceChannel', 'MaskBasedBeamformer', 'MaskBasedDereverbWPE', ] -@experimental class SpectrogramToMultichannelFeatures(NeuralModule): """Convert a complex-valued multi-channel spectrogram to multichannel features. @@ -50,32 +51,36 @@ class SpectrogramToMultichannelFeatures(NeuralModule): num_input_channels: Optional, provides the number of channels of the input signal. Used to infer the number of output channels. - magnitude_reduction: Reduction across channels. Default `None`, will calculate - magnitude of each channel. + mag_reduction: Reduction across channels. Default `None`, will calculate + magnitude of each channel. + mag_power: Optional, apply power on the magnitude. use_ipd: Use inter-channel phase difference (IPD). mag_normalization: Normalization for magnitude features ipd_normalization: Normalization for IPD features + eps: Small regularization constant. """ def __init__( self, num_subbands: int, num_input_channels: Optional[int] = None, - mag_reduction: Optional[str] = 'rms', + mag_reduction: Optional[str] = None, + mag_power: Optional[float] = None, use_ipd: bool = False, mag_normalization: Optional[str] = None, ipd_normalization: Optional[str] = None, + eps: float = 1e-8, ): super().__init__() self.mag_reduction = mag_reduction + self.mag_power = mag_power self.use_ipd = use_ipd - # TODO: normalization - if mag_normalization is not None: + if mag_normalization not in [None, 'mean', 'mean_var']: raise NotImplementedError(f'Unknown magnitude normalization {mag_normalization}') self.mag_normalization = mag_normalization - if ipd_normalization is not None: + if ipd_normalization not in [None, 'mean', 'mean_var']: raise NotImplementedError(f'Unknown ipd normalization {ipd_normalization}') self.ipd_normalization = ipd_normalization @@ -86,6 +91,19 @@ def __init__( self._num_features = num_subbands self._num_channels = num_input_channels if self.mag_reduction is None else 1 + self.eps = eps + + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tnum_subbands: %d', num_subbands) + logging.debug('\tmag_reduction: %s', self.mag_reduction) + logging.debug('\tmag_power: %s', self.mag_power) + logging.debug('\tuse_ipd: %s', self.use_ipd) + logging.debug('\tmag_normalization: %s', self.mag_normalization) + logging.debug('\tipd_normalization: %s', self.ipd_normalization) + logging.debug('\teps: %f', self.eps) + logging.debug('\t_num_features: %s', self._num_features) + logging.debug('\t_num_channels: %s', self._num_channels) + @property def input_types(self) -> Dict[str, NeuralType]: """Returns definitions of module output ports. @@ -122,6 +140,102 @@ def num_channels(self) -> int: 'must be provided when constructing the object.' ) + @staticmethod + def get_mean_time_channel(input: torch.Tensor, input_length: Optional[torch.Tensor] = None) -> torch.Tensor: + """Calculate mean across time and channel dimensions. + + Args: + input: tensor with shape (B, C, F, T) + input_length: tensor with shape (B,) + + Returns: + Mean of `input` calculated across time and channel dimension + with shape (B, 1, F, 1) + """ + assert input.ndim == 4, f'Expected input to have 4 dimensions, got {input.ndim}' + + if input_length is None: + mean = torch.mean(input, dim=(-1, -3), keepdim=True) + else: + # temporal mean + mean = temporal_mean(input, input_length, keepdim=True) + # channel mean + mean = torch.mean(mean, dim=-3, keepdim=True) + + return mean + + @classmethod + def get_mean_std_time_channel( + cls, input: torch.Tensor, input_length: Optional[torch.Tensor] = None, eps: float = 1e-10 + ) -> torch.Tensor: + """Calculate mean and standard deviation across time and channel dimensions. + + Args: + input: tensor with shape (B, C, F, T) + input_length: tensor with shape (B,) + + Returns: + Mean and standard deviation of the `input` calculated across time and + channel dimension, each with shape (B, 1, F, 1). + """ + assert input.ndim == 4, f'Expected input to have 4 dimensions, got {input.ndim}' + + if input_length is None: + std, mean = torch.std_mean(input, dim=(-1, -3), unbiased=False, keepdim=True) + else: + mean = cls.get_mean_time_channel(input, input_length) + std = (input - mean).pow(2) + # temporal mean + std = temporal_mean(std, input_length, keepdim=True) + # channel mean + std = torch.mean(std, dim=-3, keepdim=True) + # final value + std = torch.sqrt(std.clamp(eps)) + + return mean, std + + @typecheck( + input_types={ + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + 'input_length': NeuralType(tuple('B'), LengthsType()), + }, + output_types={'output': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()),}, + ) + def normalize_mean(self, input: torch.Tensor, input_length: torch.Tensor) -> torch.Tensor: + """Mean normalization for the input tensor. + + Args: + input: input tensor + input_length: valid length for each example + + Returns: + Mean normalized input. + """ + mean = self.get_mean_time_channel(input=input, input_length=input_length) + output = input - mean + return output + + @typecheck( + input_types={ + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + 'input_length': NeuralType(tuple('B'), LengthsType()), + }, + output_types={'output': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()),}, + ) + def normalize_mean_var(self, input: torch.Tensor, input_length: torch.Tensor) -> torch.Tensor: + """Mean and variance normalization for the input tensor. + + Args: + input: input tensor + input_length: valid length for each example + + Returns: + Mean and variance normalized input. + """ + mean, std = self.get_mean_std_time_channel(input=input, input_length=input_length, eps=self.eps) + output = (input - mean) / std + return output + @typecheck() def forward(self, input: torch.Tensor, input_length: torch.Tensor) -> torch.Tensor: """Convert input batch of C-channel spectrograms into @@ -148,20 +262,30 @@ def forward(self, input: torch.Tensor, input_length: torch.Tensor) -> torch.Tens else: raise ValueError(f'Unexpected magnitude reduction {self.mag_reduction}') - if self.mag_normalization is not None: - mag = self.mag_normalization(mag) + if self.mag_power is not None: + mag = torch.pow(mag, self.mag_power) + + if self.mag_normalization == 'mean': + # normalize mean across channels and time steps + mag = self.normalize_mean(input=mag, input_length=input_length) + elif self.mag_normalization == 'mean_var': + mag = self.normalize_mean_var(input=mag, input_length=input_length) features = mag if self.use_ipd: - # Calculate IPD relative to average spec - spec_mean = torch.mean(input, axis=1, keepdim=True) + # Calculate IPD relative to the average spec + spec_mean = torch.mean(input, axis=1, keepdim=True) # channel average ipd = torch.angle(input) - torch.angle(spec_mean) # Modulo to [-pi, pi] ipd = wrap_to_pi(ipd) - if self.ipd_normalization is not None: - ipd = self.ipd_normalization(ipd) + if self.ipd_normalization == 'mean': + # normalize mean across channels and time steps + # mean across time + ipd = self.normalize_mean(input=ipd, input_length=input_length) + elif self.ipd_normalization == 'mean_var': + ipd = self.normalize_mean_var(input=ipd, input_length=input_length) # Concatenate to existing features features = torch.cat([features.expand(ipd.shape), ipd], axis=2) @@ -342,6 +466,258 @@ def forward(self, input: torch.Tensor, input_length: torch.Tensor) -> Tuple[torc return masks, output_length +class MaskEstimatorFlexChannels(NeuralModule): + """Estimate `num_outputs` masks from the input spectrogram + using stacked channel-wise and temporal layers. + + This model is using interlaved channel blocks and temporal blocks, and + it can process arbitrary number of input channels. + Default channel block is the transform-average-concatenate layer. + Default temporal block is the Conformer encoder. + Reduction from multichannel signal to single-channel signal is performed + after `channel_reduction_position` blocks. Only temporal blocks are used afterwards. + After the sequence of blocks, the output mask is computed using an additional + output temporal layer and a nonlinearity. + + References: + - Yoshioka et al, VarArray: Array-Geometry-Agnostic Continuous Speech Separation, 2022 + - Jukić et al, Flexible multichannel speech enhancement for noise-robust frontend, 2023 + + Args: + num_outputs: Number of output masks. + num_subbands: Number of subbands on the input spectrogram. + num_blocks: Number of blocks in the model. + channel_reduction_position: After this block, the signal will be reduced across channels. + channel_reduction_type: Reduction across channels: 'average' or 'attention' + channel_block_type: Block for channel processing: 'transform_average_concatenate' or 'transform_attend_concatenate' + temporal_block_type: Block for temporal processing: 'conformer_encoder' + temporal_block_num_layers: Number of layers for the temporal block + temporal_block_num_heads: Number of heads for the temporal block + temporal_block_dimension: The hidden size of the model + temporal_block_self_attention_model: Self attention model for the temporal block + temporal_block_att_context_size: Attention context size for the temporal block + mag_reduction: Channel-wise reduction for magnitude features + mag_power: Power to apply on magnitude features + use_ipd: Use inter-channel phase difference (IPD) features + mag_normalization: Normalize using mean ('mean') or mean and variance ('mean_var') + ipd_normalization: Normalize using mean ('mean') or mean and variance ('mean_var') + """ + + def __init__( + self, + num_outputs: int, + num_subbands: int, + num_blocks: int, + channel_reduction_position: int = -1, # if 0, apply before block 0, if -1 apply at the end + channel_reduction_type: str = 'attention', + channel_block_type: str = 'transform_attend_concatenate', + temporal_block_type: str = 'conformer_encoder', + temporal_block_num_layers: int = 5, + temporal_block_num_heads: int = 4, + temporal_block_dimension: int = 128, + temporal_block_self_attention_model: str = 'rel_pos', + temporal_block_att_context_size: Optional[List[int]] = None, + num_input_channels: Optional[int] = None, + mag_reduction: str = 'abs_mean', + mag_power: Optional[float] = None, + use_ipd: bool = True, + mag_normalization: Optional[str] = None, + ipd_normalization: Optional[str] = None, + ): + super().__init__() + + self.features = SpectrogramToMultichannelFeatures( + num_subbands=num_subbands, + num_input_channels=num_input_channels, + mag_reduction=mag_reduction, + mag_power=mag_power, + use_ipd=use_ipd, + mag_normalization=mag_normalization, + ipd_normalization=ipd_normalization, + ) + self.num_blocks = num_blocks + logging.debug('Total number of blocks: %d', self.num_blocks) + + # Channel reduction + if channel_reduction_position == -1: + # Apply reduction after the last layer + channel_reduction_position = num_blocks + + if channel_reduction_position > num_blocks: + raise ValueError( + f'Channel reduction position {channel_reduction_position} exceeds the number of blocks {num_blocks}' + ) + self.channel_reduction_position = channel_reduction_position + logging.debug('Channel reduction will be applied before block %d', self.channel_reduction_position) + + # Prepare processing blocks + self.channel_blocks = torch.nn.ModuleList() + self.temporal_blocks = torch.nn.ModuleList() + + for n in range(num_blocks): + logging.debug('Prepare block %d', n) + + # Setup channel block + if n < channel_reduction_position: + # Number of input features is either the number of input channels or the number of temporal block features + channel_in_features = self.features.num_features if n == 0 else temporal_block_dimension + logging.debug( + 'Setup channel block %s with %d input features and %d output features', + channel_block_type, + channel_in_features, + temporal_block_dimension, + ) + + # Instantiante the channel block + if channel_block_type == 'transform_average_concatenate': + channel_block = TransformAverageConcatenate( + in_features=channel_in_features, out_features=temporal_block_dimension + ) + elif channel_block_type == 'transform_attend_concatenate': + channel_block = TransformAttendConcatenate( + in_features=channel_in_features, out_features=temporal_block_dimension + ) + else: + raise ValueError(f'Unknown channel layer type: {channel_block_type}') + self.channel_blocks.append(channel_block) + + # Setup temporal block + temporal_in_features = ( + self.features.num_features if n == self.channel_reduction_position == 0 else temporal_block_dimension + ) + logging.debug('Setup temporal block %s', temporal_block_type) + if temporal_block_type == 'conformer_encoder': + temporal_block = ConformerEncoder( + feat_in=temporal_in_features, + n_layers=temporal_block_num_layers, + d_model=temporal_block_dimension, + subsampling_factor=1, + self_attention_model=temporal_block_self_attention_model, + att_context_size=temporal_block_att_context_size, + n_heads=temporal_block_num_heads, + ) + else: + raise ValueError(f'Unknown temporal block {temporal_block}.') + + self.temporal_blocks.append(temporal_block) + + logging.debug('Setup channel reduction %s', channel_reduction_type) + if channel_reduction_type == 'average': + # Mean across channel dimension + self.channel_reduction = ChannelAveragePool() + elif channel_reduction_type == 'attention': + # Number of input features is either the number of input channels or the number of temporal block features + channel_reduction_in_features = ( + self.features.num_features if self.channel_reduction_position == 0 else temporal_block_dimension + ) + # Attention across channel dimension + self.channel_reduction = ChannelAttentionPool(in_features=channel_reduction_in_features) + else: + raise ValueError(f'Unknown channel reduction type: {channel_reduction_type}') + + logging.debug('Setup %d output layers', num_outputs) + self.output_layers = torch.nn.ModuleList( + [ + ConformerEncoder( + feat_in=temporal_block_dimension, + n_layers=1, + d_model=temporal_block_dimension, + feat_out=num_subbands, + subsampling_factor=1, + self_attention_model=temporal_block_self_attention_model, + att_context_size=temporal_block_att_context_size, + n_heads=temporal_block_num_heads, + ) + for _ in range(num_outputs) + ] + ) + + # Output nonlinearity + self.output_nonlinearity = torch.nn.Sigmoid() + + @property + def input_types(self) -> Dict[str, NeuralType]: + """Returns definitions of module output ports. + """ + return { + "input": NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + "input_length": NeuralType(('B',), LengthsType()), + } + + @property + def output_types(self) -> Dict[str, NeuralType]: + """Returns definitions of module output ports. + """ + return { + "output": NeuralType(('B', 'C', 'D', 'T'), FloatType()), + "output_length": NeuralType(('B',), LengthsType()), + } + + @typecheck() + def forward(self, input: torch.Tensor, input_length: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """Estimate `num_outputs` masks from the input spectrogram. + """ + # get input features from a complex-valued spectrogram, (B, C, F, T) + output, output_length = self.features(input=input, input_length=input_length) + + # batch and num channels + B, M = input.size(0), input.size(1) + + # process all blocks + for n in range(self.num_blocks): + if n < self.channel_reduction_position: + # apply multichannel block + output = self.channel_blocks[n](input=output) + # change to a single-stream format + F, T = output.size(-2), output.size(-1) + # (B, M, F, T) -> (B * M, F, T) + output = output.reshape(-1, F, T) + if M > 1: + # adjust the lengths accordingly + output_length = output_length.repeat_interleave(M) + + elif n == self.channel_reduction_position: + # apply channel reduction + # (B, M, F, T) -> (B, F, T) + output = self.channel_reduction(input=output) + + # apply temporal model on each channel independently + with typecheck.disable_checks(): + # output is AcousticEncodedRepresentation, conformer encoder requires SpectrogramType + output, output_length = self.temporal_blocks[n](audio_signal=output, length=output_length) + + # if channel reduction has not been applied yet, go back to multichannel layout + if n < self.channel_reduction_position: + # back to multi-channel format with possibly a different number of features + T = output.size(-1) + # (B * M, F, T) -> (B, M, F, T) + output = output.reshape(B, M, -1, T) + if M > 1: + # convert lengths from single-stream format to original multichannel + output_length = output_length[0:-1:M] + + if self.channel_reduction_position == self.num_blocks: + # apply channel reduction after the last layer + # (B, M, F, T) -> (B, F, T) + output = self.channel_reduction(input=output) + + # final mask for each output + masks = [] + for output_layer in self.output_layers: + # calculate mask + with typecheck.disable_checks(): + # output is AcousticEncodedRepresentation, conformer encoder requires SpectrogramType + mask, mask_length = output_layer(audio_signal=output, length=output_length) + mask = self.output_nonlinearity(mask) + # append to all masks + masks.append(mask) + + # stack masks along channel dimensions + masks = torch.stack(masks, dim=1) + + return masks, mask_length + + class MaskReferenceChannel(NeuralModule): """A simple mask processor which applies mask on ref_channel of the input signal. @@ -359,6 +735,11 @@ def __init__(self, ref_channel: int = 0, mask_min_db: float = -200, mask_max_db: self.mask_min = db2mag(mask_min_db) self.mask_max = db2mag(mask_max_db) + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tref_channel: %d', self.ref_channel) + logging.debug('\tmask_min: %f', self.mask_min) + logging.debug('\tmask_max: %f', self.mask_max) + @property def input_types(self) -> Dict[str, NeuralType]: """Returns definitions of module output ports. @@ -380,7 +761,7 @@ def output_types(self) -> Dict[str, NeuralType]: @typecheck() def forward( - self, input: torch.Tensor, input_length: torch.Tensor, mask: torch.Tensor + self, input: torch.Tensor, input_length: torch.Tensor, mask: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor]: """Apply mask on `ref_channel` of the input signal. This can be used to generate multi-channel output. @@ -407,36 +788,86 @@ class MaskBasedBeamformer(NeuralModule): Args: filter_type: string denoting the type of the filter. Defaults to `mvdr` - ref_channel: reference channel for processing + filter_beta: Parameter of the parameteric multichannel Wiener filter + filter_rank: Parameter of the parametric multichannel Wiener filter + filter_postfilter: Optional, postprocessing of the filter + ref_channel: Optional, reference channel. If None, it will be estimated automatically + ref_hard: If true, hard (one-hot) reference. If false, a soft reference + ref_hard_use_grad: If true, use straight-through gradient when using the hard reference + ref_subband_weighting: If true, use subband weighting when estimating reference channel + num_subbands: Optional, used to determine the parameter size for reference estimation mask_min_db: Threshold mask to a minimal value before applying it, defaults to -200dB mask_max_db: Threshold mask to a maximal value before applying it, defaults to 0dB + diag_reg: Optional, diagonal regularization for the multichannel filter + eps: Small regularization constant to avoid division by zero """ def __init__( self, filter_type: str = 'mvdr_souden', - ref_channel: int = 0, + filter_beta: float = 0.0, + filter_rank: str = 'one', + filter_postfilter: Optional[str] = None, + ref_channel: Optional[int] = 0, + ref_hard: bool = True, + ref_hard_use_grad: bool = False, + ref_subband_weighting: bool = False, + num_subbands: Optional[int] = None, mask_min_db: float = -200, mask_max_db: float = 0, + postmask_min_db: float = 0, + postmask_max_db: float = 0, + diag_reg: Optional[float] = 1e-6, + eps: float = 1e-8, ): - if not HAVE_TORCHAUDIO: - logging.error('Could not import torchaudio. Some features might not work.') - - raise ModuleNotFoundError( - "torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" - ) - super().__init__() - self.ref_channel = ref_channel - self.filter_type = filter_type - if self.filter_type == 'mvdr_souden': - self.psd = torchaudio.transforms.PSD() - self.filter = torchaudio.transforms.SoudenMVDR() - else: + if filter_type not in ['pmwf', 'mvdr_souden']: raise ValueError(f'Unknown filter type {filter_type}') + + self.filter_type = filter_type + if self.filter_type == 'mvdr_souden' and filter_beta != 0: + logging.warning( + 'Using filter type %s: beta will be automatically set to zero (current beta %f) and rank to one (current rank %s).', + self.filter_type, + filter_beta, + filter_rank, + ) + filter_beta = 0.0 + filter_rank = 'one' + # Prepare filter + self.filter = ParametricMultichannelWienerFilter( + beta=filter_beta, + rank=filter_rank, + postfilter=filter_postfilter, + ref_channel=ref_channel, + ref_hard=ref_hard, + ref_hard_use_grad=ref_hard_use_grad, + ref_subband_weighting=ref_subband_weighting, + num_subbands=num_subbands, + diag_reg=diag_reg, + eps=eps, + ) # Mask thresholding + if mask_min_db >= mask_max_db: + raise ValueError( + f'Lower bound for the mask {mask_min_db}dB must be smaller than the upper bound {mask_max_db}dB' + ) self.mask_min = db2mag(mask_min_db) self.mask_max = db2mag(mask_max_db) + # Postmask thresholding + if postmask_min_db > postmask_max_db: + raise ValueError( + f'Lower bound for the postmask {postmask_min_db}dB must be smaller or equal to the upper bound {postmask_max_db}dB' + ) + self.postmask_min = db2mag(postmask_min_db) + self.postmask_max = db2mag(postmask_max_db) + + logging.debug('Initialized %s', self.__class__.__name__) + logging.debug('\tfilter_type: %s', self.filter_type) + logging.debug('\tmask_min: %e', self.mask_min) + logging.debug('\tmask_max: %e', self.mask_max) + logging.debug('\tpostmask_min: %e', self.postmask_min) + logging.debug('\tpostmask_max: %e', self.postmask_max) @property def input_types(self) -> Dict[str, NeuralType]: @@ -444,8 +875,9 @@ def input_types(self) -> Dict[str, NeuralType]: """ return { "input": NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), - "input_length": NeuralType(('B',), LengthsType()), "mask": NeuralType(('B', 'C', 'D', 'T'), FloatType()), + "mask_undesired": NeuralType(('B', 'C', 'D', 'T'), FloatType(), optional=True), + "input_length": NeuralType(('B',), LengthsType(), optional=True), } @property @@ -454,45 +886,79 @@ def output_types(self) -> Dict[str, NeuralType]: """ return { "output": NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), - "output_length": NeuralType(('B',), LengthsType()), + "output_length": NeuralType(('B',), LengthsType(), optional=True), } @typecheck() - def forward(self, input: torch.Tensor, input_length: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: + def forward( + self, + input: torch.Tensor, + mask: torch.Tensor, + mask_undesired: Optional[torch.Tensor] = None, + input_length: Optional[torch.Tensor] = None, + ) -> torch.Tensor: """Apply a mask-based beamformer to the input spectrogram. This can be used to generate multi-channel output. - If `mask` has `M` channels, the output will have `M` channels as well. + If `mask` has multiple channels, a multichannel filter is created for each mask, + and the output is concatenation of individual outputs along the channel dimension. + The total number of outputs is `num_masks * M`, where `M` is the number of channels + at the filter output. Args: input: Input signal complex-valued spectrogram, shape (B, C, F, N) + mask: Mask for M output signals, shape (B, num_masks, F, N) input_length: Length of valid entries along the time dimension, shape (B,) - mask: Mask for M output signals, shape (B, M, F, N) Returns: - M-channel output signal complex-valued spectrogram, shape (B, M, F, N) + Multichannel output signal complex-valued spectrogram, shape (B, num_masks * M, F, N) """ - # Apply threshold on the mask - mask = torch.clamp(mask, min=self.mask_min, max=self.mask_max) # Length mask - length_mask: torch.Tensor = make_seq_mask_like( - lengths=input_length, like=mask[:, 0, ...], time_dim=-1, valid_ones=False - ) - # Use each mask to generate an output at ref_channel - output = [] - for m in range(mask.size(1)): - # Prepare mask for the desired and the undesired signal - mask_desired = mask[:, m, ...].masked_fill(length_mask, 0.0) - mask_undesired = (1 - mask_desired).masked_fill(length_mask, 0.0) - # Calculate PSDs - psd_desired = self.psd(input, mask_desired) - psd_undesired = self.psd(input, mask_undesired) + if input_length is not None: + length_mask: torch.Tensor = make_seq_mask_like( + lengths=input_length, like=mask[:, 0, ...], time_dim=-1, valid_ones=False + ) + + # Use each mask to generate an output + output, num_masks = [], mask.size(1) + for m in range(num_masks): + # Desired signal mask + mask_d = mask[:, m, ...] + # Undesired signal mask + if mask_undesired is not None: + mask_u = mask_undesired[:, m, ...] + elif num_masks == 1: + # If a single mask is estimated, use the complement + mask_u = 1 - mask_d + else: + # Use sum of all other sources + mask_u = torch.sum(mask, dim=1) - mask_d + + # Threshold masks + mask_d = torch.clamp(mask_d, min=self.mask_min, max=self.mask_max) + mask_u = torch.clamp(mask_u, min=self.mask_min, max=self.mask_max) + + if input_length is not None: + mask_d = mask_d.masked_fill(length_mask, 0.0) + mask_u = mask_u.masked_fill(length_mask, 0.0) + # Apply filter - output_m = self.filter(input, psd_desired, psd_undesired, reference_channel=self.ref_channel) - output_m = output_m.masked_fill(length_mask, 0.0) - # Save the current output (B, F, N) + output_m = self.filter(input=input, mask_s=mask_d, mask_n=mask_u) + + # Optional: apply a postmask with min and max thresholds + if self.postmask_min < self.postmask_max: + postmask_m = torch.clamp(mask[:, m, ...], min=self.postmask_min, max=self.postmask_max) + output_m = output_m * postmask_m.unsqueeze(1) + + # Save the current output (B, M, F, T) output.append(output_m) - output = torch.stack(output, axis=1) + # Combine outputs along the channel dimension + # Each output is (B, M, F, T) + output = torch.concatenate(output, axis=1) + + # Apply masking + if input_length is not None: + output = output.masked_fill(length_mask[:, None, ...], 0.0) return output, input_length @@ -516,15 +982,19 @@ class estimates a multiple-input multiple-output prediction filter - Jukić et al, Group sparsity for MIMO speech dereverberation, 2015 """ - def __init__( - self, filter_length: int, prediction_delay: int, diag_reg: Optional[float] = 1e-8, eps: float = 1e-10 - ): + def __init__(self, filter_length: int, prediction_delay: int, diag_reg: Optional[float] = 1e-6, eps: float = 1e-8): super().__init__() self.filter_length = filter_length self.prediction_delay = prediction_delay self.diag_reg = diag_reg self.eps = eps + logging.debug('Initialized %s', self.__class__.__name__) + logging.debug('\tfilter_length: %d', self.filter_length) + logging.debug('\tprediction_delay: %d', self.prediction_delay) + logging.debug('\tdiag_reg: %g', self.diag_reg) + logging.debug('\teps: %g', self.eps) + @property def input_types(self) -> Dict[str, NeuralType]: """Returns definitions of module output ports. @@ -561,7 +1031,7 @@ def forward( shape as the input signal (B, C, F, N), and the output length is the same as the input length. """ - # Temporal weighting: average power over channels, shape (B, F, N) + # Temporal weighting: average power over channels, output shape (B, F, N) weight = torch.mean(power, dim=1) # Use inverse power as the weight weight = 1 / (weight + self.eps) @@ -799,6 +1269,7 @@ class MaskBasedDereverbWPE(NeuralModule): mask_max_db: Threshold mask to a minimal value before applying it, defaults to 0dB diag_reg: Diagonal regularization for WPE eps: Small regularization constant + dtype: Data type for internal computations References: - Kinoshita et al, Neural network-based spectrum estimation for online WPE dereverberation, 2017 @@ -812,8 +1283,9 @@ def __init__( num_iterations: int = 1, mask_min_db: float = -200, mask_max_db: float = 0, - diag_reg: Optional[float] = 1e-8, - eps: float = 1e-10, + diag_reg: Optional[float] = 1e-6, + eps: float = 1e-8, + dtype: torch.dtype = torch.cdouble, ): super().__init__() # Filter setup @@ -824,6 +1296,16 @@ def __init__( # Mask thresholding self.mask_min = db2mag(mask_min_db) self.mask_max = db2mag(mask_max_db) + # Internal calculations + if dtype not in [torch.cfloat, torch.cdouble]: + raise ValueError(f'Unsupported dtype {dtype}, expecting torch.cfloat or torch.cdouble') + self.dtype = dtype + + logging.debug('Initialized %s', self.__class__.__name__) + logging.debug('\tnum_iterations: %s', self.num_iterations) + logging.debug('\tmask_min: %g', self.mask_min) + logging.debug('\tmask_max: %g', self.mask_max) + logging.debug('\tdtype: %s', self.dtype) @property def input_types(self) -> Dict[str, NeuralType]: @@ -851,19 +1333,21 @@ def forward( """Given an input signal `input`, apply the WPE dereverberation algoritm. Args: - input: C-channel complex-valued spectrogram, shape (B, C, F, N) + input: C-channel complex-valued spectrogram, shape (B, C, F, T) input_length: Optional length for each signal in the batch, shape (B,) - mask: Optional mask, shape (B, 1, F, N) or (B, C, F, N) + mask: Optional mask, shape (B, 1, F, N) or (B, C, F, T) Returns: Processed tensor with the same number of channels as the input, - shape (B, C, F, N). + shape (B, C, F, T). """ io_dtype = input.dtype with torch.cuda.amp.autocast(enabled=False): + output = input.to(dtype=self.dtype) - output = input.cdouble() + if not output.is_complex(): + raise RuntimeError(f'Expecting complex input, got {output.dtype}') for i in range(self.num_iterations): magnitude = torch.abs(output) @@ -891,7 +1375,7 @@ class MixtureConsistencyProjection(NeuralModule): eps: Small positive value for regularization Reference: - Wisdom et al., Differentiable consistency constraints for improved deep speech enhancement, 2018 + Wisdom et al, Differentiable consistency constraints for improved deep speech enhancement, 2018 """ def __init__(self, weighting: Optional[str] = None, eps: float = 1e-8): diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py index 471488bd9647..cc5312403255 100644 --- a/nemo/collections/asr/modules/audio_preprocessing.py +++ b/nemo/collections/asr/modules/audio_preprocessing.py @@ -716,7 +716,7 @@ def __init__(self, fft_length: int, hop_length: int, power: Optional[float] = No logging.error('Could not import torchaudio. Some features might not work.') raise ModuleNotFoundError( - "torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" + f"torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" ) super().__init__() @@ -819,7 +819,7 @@ def __init__(self, fft_length: int, hop_length: int): logging.error('Could not import torchaudio. Some features might not work.') raise ModuleNotFoundError( - "torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" + f"torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" ) super().__init__() diff --git a/nemo/collections/asr/parts/submodules/multichannel_modules.py b/nemo/collections/asr/parts/submodules/multichannel_modules.py new file mode 100644 index 000000000000..04ab9985d641 --- /dev/null +++ b/nemo/collections/asr/parts/submodules/multichannel_modules.py @@ -0,0 +1,780 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from typing import Callable, Optional + +import torch + +from nemo.collections.asr.parts.submodules.multi_head_attention import MultiHeadAttention +from nemo.core.classes import NeuralModule, typecheck +from nemo.core.neural_types import AudioSignal, FloatType, NeuralType, SpectrogramType +from nemo.utils import logging + +try: + import torchaudio + + HAVE_TORCHAUDIO = True +except ModuleNotFoundError: + HAVE_TORCHAUDIO = False + + +class ChannelAugment(NeuralModule): + """Randomly permute and selects a subset of channels. + + Args: + permute_channels (bool): Apply a random permutation of channels. + num_channels_min (int): Minimum number of channels to select. + num_channels_max (int): Max number of channels to select. + rng: Optional, random generator. + seed: Optional, seed for the generator. + """ + + def __init__( + self, + permute_channels: bool = True, + num_channels_min: int = 1, + num_channels_max: Optional[int] = None, + rng: Optional[Callable] = None, + seed: Optional[int] = None, + ): + super().__init__() + + self._rng = random.Random(seed) if rng is None else rng + self.permute_channels = permute_channels + self.num_channels_min = num_channels_min + self.num_channels_max = num_channels_max + + if num_channels_max is not None and num_channels_min > num_channels_max: + raise ValueError( + f'Min number of channels {num_channels_min} cannot be greater than max number of channels {num_channels_max}' + ) + + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tpermute_channels: %s', self.permute_channels) + logging.debug('\tnum_channels_min: %s', self.num_channels_min) + logging.debug('\tnum_channels_max: %s', self.num_channels_max) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'T'), AudioSignal()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'C', 'T'), AudioSignal()), + } + + @typecheck() + @torch.no_grad() + def forward(self, input: torch.Tensor) -> torch.Tensor: + # Expecting (B, C, T) + assert input.ndim == 3, f'Expecting input with shape (B, C, T)' + num_channels_in = input.size(1) + + if num_channels_in < self.num_channels_min: + raise RuntimeError( + f'Number of input channels ({num_channels_in}) is smaller than the min number of output channels ({self.num_channels_min})' + ) + + num_channels_max = num_channels_in if self.num_channels_max is None else self.num_channels_max + num_channels_out = self._rng.randint(self.num_channels_min, num_channels_max) + + channels = list(range(num_channels_in)) + + if self.permute_channels: + self._rng.shuffle(channels) + + channels = channels[:num_channels_out] + + return input[:, channels, :] + + +class TransformAverageConcatenate(NeuralModule): + """Apply transform-average-concatenate across channels. + We're using a version from [2]. + + Args: + in_features: Number of input features + out_features: Number of output features + + References: + [1] Luo et al, End-to-end Microphone Permutation and Number Invariant Multi-channel Speech Separation, 2019 + [2] Yoshioka et al, VarArray: Array-Geometry-Agnostic Continuous Speech Separation, 2022 + """ + + def __init__(self, in_features: int, out_features: Optional[int] = None): + super().__init__() + + if out_features is None: + out_features = in_features + + # Parametrize with the total number of features (needs to be divisible by two due to stacking) + if out_features % 2 != 0: + raise ValueError(f'Number of output features should be divisible by two, currently set to {out_features}') + + self.transform_channel = torch.nn.Sequential( + torch.nn.Linear(in_features, out_features // 2, bias=False), torch.nn.ReLU() + ) + self.transform_average = torch.nn.Sequential( + torch.nn.Linear(in_features, out_features // 2, bias=False), torch.nn.ReLU() + ) + + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tin_features: %d', in_features) + logging.debug('\tout_features: %d', out_features) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @typecheck() + def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + input: shape (B, M, in_features, T) + + Returns: + Output tensor with shape shape (B, M, out_features, T) + """ + B, M, F, T = input.shape + + # (B, M, F, T) -> (B, T, M, F) + input = input.permute(0, 3, 1, 2) + + # transform and average across channels + average = self.transform_average(input) + average = torch.mean(average, dim=-2, keepdim=True) + # view with the number of channels expanded to M + average = average.expand(-1, -1, M, -1) + + # transform each channel + transform = self.transform_channel(input) + + # concatenate along feature dimension + output = torch.cat([transform, average], dim=-1) + + # Return to the original layout + # (B, T, M, F) -> (B, M, F, T) + output = output.permute(0, 2, 3, 1) + + return output + + +class TransformAttendConcatenate(NeuralModule): + """Apply transform-attend-concatenate across channels. + The output is a concatenation of transformed channel and MHA + over channels. + + Args: + in_features: Number of input features + out_features: Number of output features + n_head: Number of heads for the MHA module + dropout_rate: Dropout rate for the MHA module + + References: + - Jukić et al, Flexible multichannel speech enhancement for noise-robust frontend, 2023 + """ + + def __init__(self, in_features: int, out_features: Optional[int] = None, n_head: int = 4, dropout_rate: float = 0): + super().__init__() + + if out_features is None: + out_features = in_features + + # Parametrize with the total number of features (needs to be divisible by two due to stacking) + if out_features % 2 != 0: + raise ValueError(f'Number of output features should be divisible by two, currently set to {out_features}') + + self.transform_channel = torch.nn.Sequential( + torch.nn.Linear(in_features, out_features // 2, bias=False), torch.nn.ReLU() + ) + self.transform_attend = torch.nn.Sequential( + torch.nn.Linear(in_features, out_features // 2, bias=False), torch.nn.ReLU() + ) + self.attention = MultiHeadAttention(n_head=n_head, n_feat=out_features // 2, dropout_rate=dropout_rate) + + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tin_features: %d', in_features) + logging.debug('\tout_features: %d', out_features) + logging.debug('\tn_head: %d', n_head) + logging.debug('\tdropout_rate: %f', dropout_rate) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @typecheck() + def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + input: shape (B, M, in_features, T) + + Returns: + Output tensor with shape shape (B, M, out_features, T) + """ + B, M, F, T = input.shape + + # (B, M, F, T) -> (B, T, M, F) + input = input.permute(0, 3, 1, 2) + input = input.reshape(B * T, M, F) + + # transform each channel + transform = self.transform_channel(input) + + # attend + attend = self.transform_attend(input) + # attention across channels + attend = self.attention(query=attend, key=attend, value=attend, mask=None) + + # concatenate along feature dimension + output = torch.cat([transform, attend], dim=-1) + + # return to the original layout + output = output.view(B, T, M, -1) + + # (B, T, M, num_features) -> (B, M, num_features, T) + output = output.permute(0, 2, 3, 1) + + return output + + +class ChannelAveragePool(NeuralModule): + """Apply average pooling across channels. + """ + + def __init__(self): + super().__init__() + logging.debug('Initialized %s', self.__class__.__name__) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'D', 'T'), SpectrogramType()), + } + + @typecheck() + def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + input: shape (B, M, F, T) + + Returns: + Output tensor with shape shape (B, F, T) + """ + return torch.mean(input, dim=-3) + + +class ChannelAttentionPool(NeuralModule): + """Use attention pooling to aggregate information across channels. + First apply MHA across channels and then apply averaging. + + Args: + in_features: Number of input features + out_features: Number of output features + n_head: Number of heads for the MHA module + dropout_rate: Dropout rate for the MHA module + + References: + - Wang et al, Neural speech separation using sparially distributed microphones, 2020 + - Jukić et al, Flexible multichannel speech enhancement for noise-robust frontend, 2023 + """ + + def __init__(self, in_features: int, n_head: int = 1, dropout_rate: float = 0): + super().__init__() + self.in_features = in_features + self.attention = MultiHeadAttention(n_head=n_head, n_feat=in_features, dropout_rate=dropout_rate) + + logging.debug('Initialized %s with', self.__class__.__name__) + logging.debug('\tin_features: %d', in_features) + logging.debug('\tnum_heads: %d', n_head) + logging.debug('\tdropout_rate: %d', dropout_rate) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'D', 'T'), SpectrogramType()), + } + + @typecheck() + def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + input: shape (B, M, F, T) + + Returns: + Output tensor with shape shape (B, F, T) + """ + B, M, F, T = input.shape + + # (B, M, F, T) -> (B, T, M, F) + input = input.permute(0, 3, 1, 2) + input = input.reshape(B * T, M, F) + + # attention across channels + output = self.attention(query=input, key=input, value=input, mask=None) + + # return to the original layout + output = output.view(B, T, M, -1) + + # (B, T, M, num_features) -> (B, M, out_features, T) + output = output.permute(0, 2, 3, 1) + + # average across channels + output = torch.mean(output, axis=-3) + + return output + + +class ParametricMultichannelWienerFilter(NeuralModule): + """Parametric multichannel Wiener filter, with an adjustable + tradeoff between noise reduction and speech distortion. + It supports automatic reference channel selection based + on the estimated output SNR. + + Args: + beta: Parameter of the parameteric filter, tradeoff between noise reduction + and speech distortion (0: MVDR, 1: MWF). + rank: Rank assumption for the speech covariance matrix. + postfilter: Optional postfilter. If None, no postfilter is applied. + ref_channel: Optional, reference channel. If None, it will be estimated automatically. + ref_hard: If true, estimate a hard (one-hot) reference. If false, a soft reference. + ref_hard_use_grad: If true, use straight-through gradient when using the hard reference + ref_subband_weighting: If true, use subband weighting when estimating reference channel + num_subbands: Optional, used to determine the parameter size for reference estimation + diag_reg: Optional, diagonal regularization for the multichannel filter + eps: Small regularization constant to avoid division by zero + + References: + - Souden et al, On Optimal Frequency-Domain Multichannel Linear Filtering for Noise Reduction, 2010 + """ + + def __init__( + self, + beta: float = 1.0, + rank: str = 'one', + postfilter: Optional[str] = None, + ref_channel: Optional[int] = None, + ref_hard: bool = True, + ref_hard_use_grad: bool = True, + ref_subband_weighting: bool = False, + num_subbands: Optional[int] = None, + diag_reg: Optional[float] = 1e-6, + eps: float = 1e-8, + ): + if not HAVE_TORCHAUDIO: + logging.error('Could not import torchaudio. Some features might not work.') + + raise ModuleNotFoundError( + f"torchaudio is not installed but is necessary to instantiate a {self.__class__.__name__}" + ) + + super().__init__() + + # Parametric filter + # 0=MVDR, 1=MWF + self.beta = beta + + # Rank + # Assumed rank for the signal covariance matrix (psd_s) + self.rank = rank + + if self.rank == 'full' and self.beta == 0: + raise ValueError(f'Rank {self.rank} is not compatible with beta {self.beta}.') + + # Postfilter, applied on the output of the multichannel filter + if postfilter not in [None, 'ban']: + raise ValueError(f'Postfilter {postfilter} is not supported.') + self.postfilter = postfilter + + # Regularization + if diag_reg is not None and diag_reg < 0: + raise ValueError(f'Diagonal regularization {diag_reg} must be positive.') + self.diag_reg = diag_reg + + if eps <= 0: + raise ValueError(f'Epsilon {eps} must be positive.') + self.eps = eps + + # PSD estimator + self.psd = torchaudio.transforms.PSD() + + # Reference channel + self.ref_channel = ref_channel + if self.ref_channel == 'max_snr': + self.ref_estimator = ReferenceChannelEstimatorSNR( + hard=ref_hard, + hard_use_grad=ref_hard_use_grad, + subband_weighting=ref_subband_weighting, + num_subbands=num_subbands, + eps=eps, + ) + else: + self.ref_estimator = None + # Flag to determine if the filter is MISO or MIMO + self.is_mimo = self.ref_channel is None + + logging.debug('Initialized %s', self.__class__.__name__) + logging.debug('\tbeta: %f', self.beta) + logging.debug('\trank: %s', self.rank) + logging.debug('\tpostfilter: %s', self.postfilter) + logging.debug('\tdiag_reg: %g', self.diag_reg) + logging.debug('\teps: %g', self.eps) + logging.debug('\tref_channel: %s', self.ref_channel) + logging.debug('\tis_mimo: %s', self.is_mimo) + + @staticmethod + def trace(x: torch.Tensor, keepdim: bool = False) -> torch.Tensor: + """Calculate trace of matrix slices over the last + two dimensions in the input tensor. + + Args: + x: tensor, shape (..., C, C) + + Returns: + Trace for each (C, C) matrix. shape (...) + """ + trace = torch.diagonal(x, dim1=-2, dim2=-1).sum(-1) + if keepdim: + trace = trace.unsqueeze(-1).unsqueeze(-1) + return trace + + def apply_diag_reg(self, psd: torch.Tensor) -> torch.Tensor: + """Apply diagonal regularization on psd. + + Args: + psd: tensor, shape (..., C, C) + + Returns: + Tensor, same shape as input. + """ + # Regularization: diag_reg * trace(psd) + eps + diag_reg = self.diag_reg * self.trace(psd).real + self.eps + + # Apply regularization + psd = psd + torch.diag_embed(diag_reg.unsqueeze(-1) * torch.ones(psd.shape[-1], device=psd.device)) + + return psd + + def apply_filter(self, input: torch.Tensor, filter: torch.Tensor) -> torch.Tensor: + """Apply the MIMO filter on the input. + + Args: + input: batch with C input channels, shape (B, C, F, T) + filter: batch of C-input, M-output filters, shape (B, F, C, M) + + Returns: + M-channel filter output, shape (B, M, F, T) + """ + if not filter.is_complex(): + raise TypeError(f'Expecting complex-valued filter, found {filter.dtype}') + + if not input.is_complex(): + raise TypeError(f'Expecting complex-valued input, found {input.dtype}') + + if filter.ndim != 4 or filter.size(-2) != input.size(-3) or filter.size(-3) != input.size(-2): + raise ValueError(f'Filter shape {filter.shape}, not compatible with input shape {input.shape}') + + output = torch.einsum('bfcm,bcft->bmft', filter.conj(), input) + + return output + + def apply_ban(self, input: torch.Tensor, filter: torch.Tensor, psd_n: torch.Tensor) -> torch.Tensor: + """Apply blind analytic normalization postfilter. Note that this normalization has been + derived for the GEV beamformer in [1]. More specifically, the BAN postfilter aims to scale GEV + to satisfy the distortionless constraint and the final analytical expression is derived using + an assumption on the norm of the transfer function. + However, this may still be useful in some instances. + + Args: + input: batch with M output channels (B, M, F, T) + filter: batch of C-input, M-output filters, shape (B, F, C, M) + psd_n: batch of noise PSDs, shape (B, F, C, C) + + Returns: + Filtere input, shape (B, M, F, T) + + References: + - Warsitz and Haeb-Umbach, Blind Acoustic Beamforming Based on Generalized Eigenvalue Decomposition, 2007 + """ + # number of input channel, used to normalize the numerator + num_inputs = filter.size(-2) + numerator = torch.einsum('bfcm,bfci,bfij,bfjm->bmf', filter.conj(), psd_n, psd_n, filter) + numerator = torch.sqrt(numerator.abs() / num_inputs) + + denominator = torch.einsum('bfcm,bfci,bfim->bmf', filter.conj(), psd_n, filter) + denominator = denominator.abs() + + # Scalar filter per output channel, frequency and batch + # shape (B, M, F) + ban = numerator / (denominator + self.eps) + + input = ban[..., None] * input + + return input + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'input': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + 'mask_s': NeuralType(('B', 'D', 'T'), FloatType()), + 'mask_n': NeuralType(('B', 'D', 'T'), FloatType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()), + } + + @typecheck() + def forward(self, input: torch.Tensor, mask_s: torch.Tensor, mask_n: torch.Tensor) -> torch.Tensor: + """Return processed signal. + The output has either one channel (M=1) if a ref_channel is selected, + or the same number of channels as the input (M=C) if ref_channel is None. + + Args: + input: Input signal, complex tensor with shape (B, C, F, T) + mask_s: Mask for the desired signal, shape (B, F, T) + mask_n: Mask for the undesired noise, shape (B, F, T) + + Returns: + Processed signal, shape (B, M, F, T) + """ + iodtype = input.dtype + + with torch.cuda.amp.autocast(enabled=False): + # Convert to double + input = input.cdouble() + mask_s = mask_s.double() + mask_n = mask_n.double() + + # Calculate signal statistics + psd_s = self.psd(input, mask_s) + psd_n = self.psd(input, mask_n) + + if self.rank == 'one': + # Calculate filter W using (18) in [1] + # Diagonal regularization + if self.diag_reg: + psd_n = self.apply_diag_reg(psd_n) + + # MIMO filter + # (B, F, C, C) + W = torch.linalg.solve(psd_n, psd_s) + lam = self.trace(W, keepdim=True).real + W = W / (self.beta + lam + self.eps) + elif self.rank == 'full': + # Calculate filter W using (15) in [1] + psd_sn = psd_s + self.beta * psd_n + + if self.diag_reg: + psd_sn = self.apply_diag_reg(psd_sn) + + # MIMO filter + # (B, F, C, C) + W = torch.linalg.solve(psd_sn, psd_s) + else: + raise RuntimeError(f'Unexpected rank {self.rank}') + + if torch.jit.isinstance(self.ref_channel, int): + # Fixed ref channel + # (B, F, C, 1) + W = W[..., self.ref_channel].unsqueeze(-1) + elif self.ref_estimator is not None: + # Estimate ref channel tensor (one-hot or soft across C) + # (B, C) + ref_channel_tensor = self.ref_estimator(W=W, psd_s=psd_s, psd_n=psd_n).to(W.dtype) + # Weighting across channels + # (B, F, C, 1) + W = torch.sum(W * ref_channel_tensor[:, None, None, :], dim=-1, keepdim=True) + + output = self.apply_filter(input=input, filter=W) + + # Optional: postfilter + if self.postfilter == 'ban': + output = self.apply_ban(input=output, filter=W, psd_n=psd_n) + + return output.to(iodtype) + + +class ReferenceChannelEstimatorSNR(NeuralModule): + """Estimate a reference channel by selecting the reference + that maximizes the output SNR. It returns one-hot encoded + vector or a soft reference. + + A straight-through estimator is used for gradient when using + hard reference. + + Args: + hard: If true, use hard estimate of ref channel. + If false, use a soft estimate across channels. + hard_use_grad: Use straight-through estimator for + the gradient. + subband_weighting: If true, use subband weighting when + adding across subband SNRs. If false, use average + across subbands. + + References: + Boeddeker et al, Front-End Processing for the CHiME-5 Dinner Party Scenario, 2018 + """ + + def __init__( + self, + hard: bool = True, + hard_use_grad: bool = True, + subband_weighting: bool = False, + num_subbands: Optional[int] = None, + eps: float = 1e-8, + ): + super().__init__() + + self.hard = hard + self.hard_use_grad = hard_use_grad + self.subband_weighting = subband_weighting + self.eps = eps + + if subband_weighting and num_subbands is None: + raise ValueError(f'Number of subbands must be provided when using subband_weighting={subband_weighting}.') + # Subband weighting + self.weight_s = torch.nn.Parameter(torch.ones(num_subbands)) if subband_weighting else None + self.weight_n = torch.nn.Parameter(torch.ones(num_subbands)) if subband_weighting else None + + logging.debug('Initialized %s', self.__class__.__name__) + logging.debug('\thard: %d', self.hard) + logging.debug('\thard_use_grad: %d', self.hard_use_grad) + logging.debug('\tsubband_weighting: %d', self.subband_weighting) + logging.debug('\tnum_subbands: %s', num_subbands) + logging.debug('\teps: %e', self.eps) + + @property + def input_types(self): + """Returns definitions of module input types + """ + return { + 'W': NeuralType(('B', 'D', 'C', 'C'), SpectrogramType()), + 'psd_s': NeuralType(('B', 'D', 'C', 'C'), SpectrogramType()), + 'psd_n': NeuralType(('B', 'D', 'C', 'C'), SpectrogramType()), + } + + @property + def output_types(self): + """Returns definitions of module output types + """ + return { + 'output': NeuralType(('B', 'C'), FloatType()), + } + + @typecheck() + def forward(self, W: torch.Tensor, psd_s: torch.Tensor, psd_n: torch.Tensor) -> torch.Tensor: + """ + Args: + W: Multichannel input multichannel output filter, shape (B, F, C, M), where + C is the number of input channels and M is the number of output channels + psd_s: Covariance for the signal, shape (B, F, C, C) + psd_n: Covariance for the noise, shape (B, F, C, C) + + Returns: + One-hot or soft reference channel, shape (B, M) + """ + if self.subband_weighting: + # (B, F, M) + pow_s = torch.einsum('...jm,...jk,...km->...m', W.conj(), psd_s, W).abs() + pow_n = torch.einsum('...jm,...jk,...km->...m', W.conj(), psd_n, W).abs() + + # Subband-weighting + # (B, F, M) -> (B, M) + pow_s = torch.sum(pow_s * self.weight_s.softmax(dim=0).unsqueeze(1), dim=-2) + pow_n = torch.sum(pow_n * self.weight_n.softmax(dim=0).unsqueeze(1), dim=-2) + else: + # Sum across f as well + # (B, F, C, M), (B, F, C, C), (B, F, C, M) -> (B, M) + pow_s = torch.einsum('...fjm,...fjk,...fkm->...m', W.conj(), psd_s, W).abs() + pow_n = torch.einsum('...fjm,...fjk,...fkm->...m', W.conj(), psd_n, W).abs() + + # Estimated SNR per channel (B, C) + snr = pow_s / (pow_n + self.eps) + snr = 10 * torch.log10(snr + self.eps) + + # Soft reference + ref_soft = snr.softmax(dim=-1) + + if self.hard: + _, idx = ref_soft.max(dim=-1, keepdim=True) + ref_hard = torch.zeros_like(snr).scatter(-1, idx, 1.0) + if self.hard_use_grad: + # Straight-through for gradient + # Propagate ref_soft gradient, as if thresholding is identity + ref = ref_hard - ref_soft.detach() + ref_soft + else: + # No gradient + ref = ref_hard + else: + ref = ref_soft + + return ref diff --git a/nemo/collections/asr/parts/utils/audio_utils.py b/nemo/collections/asr/parts/utils/audio_utils.py index 80dfc74950a5..8188dbed003b 100644 --- a/nemo/collections/asr/parts/utils/audio_utils.py +++ b/nemo/collections/asr/parts/utils/audio_utils.py @@ -412,7 +412,7 @@ def calculate_sdr_numpy( convolution_filter_length: Optional[int] = None, remove_mean: bool = True, sdr_max: Optional[float] = None, - eps: float = 1e-10, + eps: float = 1e-8, ) -> float: """Calculate signal-to-distortion ratio. @@ -519,7 +519,7 @@ def convmtx_mc_numpy(x: np.ndarray, filter_length: int, delay: int = 0, n_steps: return np.hstack(mc_mtx) -def scale_invariant_target_numpy(estimate: np.ndarray, target: np.ndarray, eps: float = 1e-10) -> np.ndarray: +def scale_invariant_target_numpy(estimate: np.ndarray, target: np.ndarray, eps: float = 1e-8) -> np.ndarray: """Calculate convolution-invariant target for a given estimated signal. Calculate scaled target obtained by solving @@ -543,7 +543,7 @@ def scale_invariant_target_numpy(estimate: np.ndarray, target: np.ndarray, eps: def convolution_invariant_target_numpy( - estimate: np.ndarray, target: np.ndarray, filter_length, diag_reg: float = 1e-8, eps: float = 1e-10 + estimate: np.ndarray, target: np.ndarray, filter_length, diag_reg: float = 1e-6, eps: float = 1e-8 ) -> np.ndarray: """Calculate convolution-invariant target for a given estimated signal. diff --git a/requirements/requirements_asr.txt b/requirements/requirements_asr.txt index 011862ad723b..8df86fa4679a 100644 --- a/requirements/requirements_asr.txt +++ b/requirements/requirements_asr.txt @@ -5,7 +5,7 @@ ipywidgets jiwer kaldi-python-io kaldiio -librosa>=0.9.0 +librosa>=0.10.0 marshmallow matplotlib packaging diff --git a/tests/collections/asr/test_asr_part_submodules_multichannel.py b/tests/collections/asr/test_asr_part_submodules_multichannel.py new file mode 100644 index 000000000000..f53d14027731 --- /dev/null +++ b/tests/collections/asr/test_asr_part_submodules_multichannel.py @@ -0,0 +1,157 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch + +from nemo.collections.asr.parts.submodules.multichannel_modules import ( + ChannelAttentionPool, + ChannelAugment, + ChannelAveragePool, + TransformAttendConcatenate, + TransformAverageConcatenate, +) + + +class TestChannelAugment: + @pytest.mark.unit + @pytest.mark.parametrize('num_channels', [1, 2, 6]) + def test_channel_selection(self, num_channels): + """Test getting a fixed number of channels without randomization. + The first few channels will always be selected. + """ + num_examples = 100 + batch_size = 4 + num_samples = 100 + + uut = ChannelAugment(permute_channels=False, num_channels_min=1, num_channels_max=num_channels) + + for n in range(num_examples): + input = torch.rand(batch_size, num_channels, num_samples) + output = uut(input=input) + + num_channels_out = output.size(-2) + + assert torch.allclose( + output, input[:, :num_channels_out, :] + ), f'Failed for num_channels_out {num_channels_out}, example {n}' + + +class TestTAC: + @pytest.mark.unit + @pytest.mark.parametrize('num_channels', [1, 2, 6]) + def test_average(self, num_channels): + """Test transform-average-concatenate. + """ + num_examples = 10 + batch_size = 4 + in_features = 128 + out_features = 96 + num_frames = 20 + + uut = TransformAverageConcatenate(in_features=in_features, out_features=out_features) + + for n in range(num_examples): + input = torch.rand(batch_size, num_channels, in_features, num_frames) + output = uut(input=input) + + # Dimensions must match + assert output.shape == ( + batch_size, + num_channels, + out_features, + num_frames, + ), f'Example {n}: output shape {output.shape} not matching the expected ({batch_size}, {num_channels}, {out_features}, {num_frames})' + + # Second half of features must be the same for all channels (concatenated average) + if num_channels > 1: + # reference + avg_ref = output[:, 0, out_features // 2 :, :] + for m in range(1, num_channels): + assert torch.allclose( + output[:, m, out_features // 2 :, :], avg_ref + ), f'Example {n}: average not matching' + + @pytest.mark.unit + @pytest.mark.parametrize('num_channels', [1, 2, 6]) + def test_attend(self, num_channels): + """Test transform-attend-concatenate. + Second half of features is different across channels, since we're using attention, so + we check only for shape. + """ + num_examples = 10 + batch_size = 4 + in_features = 128 + out_features = 96 + num_frames = 20 + + uut = TransformAttendConcatenate(in_features=in_features, out_features=out_features) + + for n in range(num_examples): + input = torch.rand(batch_size, num_channels, in_features, num_frames) + output = uut(input=input) + + # Dimensions must match + assert output.shape == ( + batch_size, + num_channels, + out_features, + num_frames, + ), f'Example {n}: output shape {output.shape} not matching the expected ({batch_size}, {num_channels}, {out_features}, {num_frames})' + + +class TestChannelPool: + @pytest.mark.unit + @pytest.mark.parametrize('num_channels', [1, 2, 6]) + def test_average(self, num_channels): + """Test average channel pooling. + """ + num_examples = 10 + batch_size = 4 + in_features = 128 + num_frames = 20 + + uut = ChannelAveragePool() + + for n in range(num_examples): + input = torch.rand(batch_size, num_channels, in_features, num_frames) + output = uut(input=input) + + # Dimensions must match + assert torch.allclose( + output, torch.mean(input, dim=1) + ), f'Example {n}: output not matching the expected average' + + @pytest.mark.unit + @pytest.mark.parametrize('num_channels', [2, 6]) + def test_attention(self, num_channels): + """Test attention for channel pooling. + """ + num_examples = 10 + batch_size = 4 + in_features = 128 + num_frames = 20 + + uut = ChannelAttentionPool(in_features=in_features) + + for n in range(num_examples): + input = torch.rand(batch_size, num_channels, in_features, num_frames) + output = uut(input=input) + + # Dimensions must match + assert output.shape == ( + batch_size, + in_features, + num_frames, + ), f'Example {n}: output shape {output.shape} not matching the expected ({batch_size}, {in_features}, {num_frames})' diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index b466d09c460d..8622ab9b53ce 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -641,6 +641,7 @@ def test_greedy_multi_decoding_stateless_decoder(self, greedy_class): partial_hyp = partial_hyp[0] _ = greedy(encoder_output=enc_out, encoded_lengths=enc_len, partial_hypotheses=partial_hyp) + @pytest.mark.pleasefixme @pytest.mark.skipif( not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', ) @@ -704,6 +705,7 @@ def test_greedy_decoding_preserve_alignment(self, greedy_class): assert torch.is_tensor(logp) assert torch.is_tensor(label) + @pytest.mark.pleasefixme @pytest.mark.skipif( not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.', ) diff --git a/tests/collections/asr/test_audio_modules.py b/tests/collections/asr/test_audio_modules.py index 3ddc1e22a577..ee1d61f59ed3 100644 --- a/tests/collections/asr/test_audio_modules.py +++ b/tests/collections/asr/test_audio_modules.py @@ -21,12 +21,14 @@ from nemo.collections.asr.modules.audio_modules import ( MaskBasedDereverbWPE, + MaskEstimatorFlexChannels, MaskReferenceChannel, SpectrogramToMultichannelFeatures, WPEFilter, ) from nemo.collections.asr.modules.audio_preprocessing import AudioToSpectrogram from nemo.collections.asr.parts.utils.audio_utils import convmtx_mc_numpy +from nemo.utils import logging try: importlib.import_module('torchaudio') @@ -347,3 +349,68 @@ def test_mask_based_dereverb_init(self, num_channels: int, filter_length: int, d assert y.shape == x.shape, 'Output shape not matching, example {n}' assert torch.equal(y_length, x_length), 'Length not matching, example {n}' + + +class TestMaskEstimator: + @pytest.mark.unit + @pytest.mark.skipif(not HAVE_TORCHAUDIO, reason="Modules in this test require torchaudio") + @pytest.mark.parametrize('channel_reduction_position', [0, 1, -1]) + @pytest.mark.parametrize('channel_reduction_type', ['average', 'attention']) + @pytest.mark.parametrize('channel_block_type', ['transform_average_concatenate', 'transform_attend_concatenate']) + def test_flex_channels( + self, channel_reduction_position: int, channel_reduction_type: str, channel_block_type: str + ): + """Test initialization of the mask estimator and make sure it can process input tensor. + """ + # Model parameters + num_subbands_tests = [32, 65] + num_outputs_tests = [1, 2] + num_blocks_tests = [1, 5] + + # Input configuration + num_channels_tests = [1, 4] + batch_size = 4 + num_frames = 50 + + for num_subbands in num_subbands_tests: + for num_outputs in num_outputs_tests: + for num_blocks in num_blocks_tests: + logging.debug( + 'Instantiate with num_subbands=%d, num_outputs=%d, num_blocks=%d', + num_subbands, + num_outputs, + num_blocks, + ) + + # Instantiate + uut = MaskEstimatorFlexChannels( + num_outputs=num_outputs, + num_subbands=num_subbands, + num_blocks=num_blocks, + channel_reduction_position=channel_reduction_position, + channel_reduction_type=channel_reduction_type, + channel_block_type=channel_block_type, + ) + + # Process different channel configurations + for num_channels in num_channels_tests: + logging.debug('Process num_channels=%d', num_channels) + input_size = (batch_size, num_channels, num_subbands, num_frames) + + # multi-channel input + spec = torch.randn(input_size, dtype=torch.cfloat) + spec_length = torch.randint(1, num_frames, (batch_size,)) + + # UUT + mask, mask_length = uut(input=spec, input_length=spec_length) + + # Check output dimensions match + expected_mask_shape = (batch_size, num_outputs, num_subbands, num_frames) + assert ( + mask.shape == expected_mask_shape + ), f'Output shape mismatch: expected {expected_mask_shape}, got {mask.shape}' + + # Check output lengths match + assert torch.all( + mask_length == spec_length + ), f'Output length mismatch: expected {spec_length}, got {mask_length}' diff --git a/tests/collections/asr/test_conformer_encoder.py b/tests/collections/asr/test_conformer_encoder.py index aa972ae6fd47..a7b914120bb8 100644 --- a/tests/collections/asr/test_conformer_encoder.py +++ b/tests/collections/asr/test_conformer_encoder.py @@ -77,6 +77,7 @@ def test_stochastic_depth_model_creation(self): feat_in=10, n_layers=n_layers, d_model=4, feat_out=8, stochastic_depth_start_layer=start_layer, ) + @pytest.mark.pleasefixme def test_stochastic_depth_forward(self): """Testing that forward works and we get randomness during training, but not during eval.""" random_input = torch.rand((1, 2, 2)) From 3cd9fbd94bd797e89270fb779f6a33930132cd92 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 13 Oct 2023 11:09:38 -0700 Subject: [PATCH 333/512] fix ptl_bugs in slu_models.py (#7689) (#7712) * fix ptl_bugs in slu_models.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change strategy to ddp_find_unused_parameters_true in slu example yaml --------- Signed-off-by: Seonghun Noh Signed-off-by: Seonghun Co-authored-by: Seonghun Noh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com> --- .../conformer_transformer_large_bpe.yaml | 2 +- nemo/collections/asr/models/slu_models.py | 30 ++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/examples/slu/speech_intent_slot/configs/conformer_transformer_large_bpe.yaml b/examples/slu/speech_intent_slot/configs/conformer_transformer_large_bpe.yaml index 10d59f2b5c0d..5d309f3cd193 100644 --- a/examples/slu/speech_intent_slot/configs/conformer_transformer_large_bpe.yaml +++ b/examples/slu/speech_intent_slot/configs/conformer_transformer_large_bpe.yaml @@ -174,7 +174,7 @@ trainer: max_steps: -1 # computed at runtime if not set val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations accelerator: auto - strategy: ddp + strategy: ddp_find_unused_parameters_true accumulate_grad_batches: 1 gradient_clip_val: 0.0 precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. diff --git a/nemo/collections/asr/models/slu_models.py b/nemo/collections/asr/models/slu_models.py index 6df907334662..59323cfbfffe 100644 --- a/nemo/collections/asr/models/slu_models.py +++ b/nemo/collections/asr/models/slu_models.py @@ -285,7 +285,7 @@ def predict( predictions = self.sequence_generator.decode_semantics_from_tokens(pred_tokens) return predictions - def validation_step(self, batch, batch_idx, dataloader_idx=0): + def validation_pass(self, batch, batch_idx, dataloader_idx=0): if len(batch) == 4: signal, signal_len, semantics, semantics_len = batch else: @@ -327,19 +327,29 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0): 'val_wer': wer, } + def validation_step(self, batch, batch_idx, dataloader_idx=0): + metrics = self.validation_pass(batch, batch_idx, dataloader_idx) + if type(self.trainer.val_dataloaders) == list and len(self.trainer.val_dataloaders) > 1: + self.validation_step_outputs[dataloader_idx].append(metrics) + else: + self.validation_step_outputs.append(metrics) + return metrics + def test_step(self, batch, batch_idx, dataloader_idx=0): - logs = self.validation_step(batch, batch_idx, dataloader_idx=dataloader_idx) - test_logs = { - 'test_loss': logs['val_loss'], - 'test_wer_num': logs['val_wer_num'], - 'test_wer_denom': logs['val_wer_denom'], - 'test_wer': logs['val_wer'], - } + logs = self.validation_pass(batch, batch_idx, dataloader_idx=dataloader_idx) + test_logs = {name.replace("val_", "test_"): value for name, value in logs.items()} + if type(self.trainer.test_dataloaders) == list and len(self.trainer.test_dataloaders) > 1: + self.test_step_outputs[dataloader_idx].append(test_logs) + else: + self.test_step_outputs.append(test_logs) return test_logs def test_dataloader(self): - if self._test_dl is not None: - return self._test_dl + if self._test_dl is None: + # None dataloader no longer supported in PTL2.0 + self._test_dl = [] + + return self._test_dl def _setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: From ddf546d178c1a400e56c20e4da74a1fe22592b8d Mon Sep 17 00:00:00 2001 From: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com> Date: Fri, 13 Oct 2023 16:10:16 -0700 Subject: [PATCH 334/512] fix code block typo (#7717) Signed-off-by: Elena Rastorgueva --- docs/source/asr/intro.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/asr/intro.rst b/docs/source/asr/intro.rst index 7066c2989393..2ac27c4312dc 100644 --- a/docs/source/asr/intro.rst +++ b/docs/source/asr/intro.rst @@ -31,9 +31,9 @@ You can also obtain timestamps for each word in the transcription as follows: from omegaconf import OmegaConf, open_dict decoding_cfg = asr_model.cfg.decoding with open_dict(decoding_cfg): - decoding_cfg.preserve_alignments = True - decoding_cfg.compute_timestamps = True - asr_model.change_decoding_strategy(decoding_cfg) + decoding_cfg.preserve_alignments = True + decoding_cfg.compute_timestamps = True + asr_model.change_decoding_strategy(decoding_cfg) # specify flag `return_hypotheses=True`` hypotheses = asr_model.transcribe(["path/to/audio_file.wav"], return_hypotheses=True) From ff7154d4cd7ffdf4eb98c343ba54c13b99545df1 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 16 Oct 2023 10:02:44 -0700 Subject: [PATCH 335/512] Update key mapping logic --- nemo/collections/nlp/parts/nlp_overrides.py | 110 ++++---------------- 1 file changed, 20 insertions(+), 90 deletions(-) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 60b9bbacfbdb..6fa645093fc4 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -14,6 +14,7 @@ import itertools import os +import re import shutil import tempfile from collections import OrderedDict, defaultdict @@ -614,99 +615,28 @@ def modify_state_dict(self, conf, state_dict): if 'model.model.diffusion_model.input_blocks.1.0.in_layers.2.weight' in loaded_keys: new_state_dict = {} # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + def should_process(key): + base_str = "model.model.diffusion_model." + blocks = ["input_blocks", "middle_block", "output_blocks"] + for block in blocks: + for layer_type in ["in_layers", "out_layers"]: + for index in [2, 3]: # The layers index. + for param in ["weight", "bias"]: + if block == 'middle_block': + for num in [0, 2]: + template = f"{base_str}{block}.{num}.{layer_type}.{index}.{param}" + if key == template: + return True + else: + for num in range(12): # 12 blocks, adjust as needed. + template = f"{base_str}{block}.{num}.0.{layer_type}.{index}.{param}" + if key == template: + return True + return False for key_ in state_dict.keys(): if key_ == "model.cond_stage_model.transformer.text_model.embeddings.position_ids": continue - if key_ in [ - "model.model.diffusion_model.input_blocks.1.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.1.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.1.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.1.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.2.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.2.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.2.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.2.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.4.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.4.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.4.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.4.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.5.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.5.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.5.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.5.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.7.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.7.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.7.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.7.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.8.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.8.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.8.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.8.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.10.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.10.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.10.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.10.0.out_layers.3.bias", - "model.model.diffusion_model.input_blocks.11.0.in_layers.2.weight", - "model.model.diffusion_model.input_blocks.11.0.in_layers.2.bias", - "model.model.diffusion_model.input_blocks.11.0.out_layers.3.weight", - "model.model.diffusion_model.input_blocks.11.0.out_layers.3.bias", - "model.model.diffusion_model.middle_block.0.in_layers.2.weight", - "model.model.diffusion_model.middle_block.0.in_layers.2.bias", - "model.model.diffusion_model.middle_block.0.out_layers.3.weight", - "model.model.diffusion_model.middle_block.0.out_layers.3.bias", - "model.model.diffusion_model.middle_block.2.in_layers.2.weight", - "model.model.diffusion_model.middle_block.2.in_layers.2.bias", - "model.model.diffusion_model.middle_block.2.out_layers.3.weight", - "model.model.diffusion_model.middle_block.2.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.0.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.0.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.0.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.0.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.1.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.1.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.1.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.1.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.2.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.2.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.2.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.2.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.3.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.3.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.3.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.3.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.4.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.4.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.4.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.4.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.5.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.5.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.5.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.5.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.6.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.6.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.6.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.6.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.7.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.7.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.7.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.7.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.8.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.8.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.8.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.8.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.9.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.9.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.9.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.9.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.10.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.10.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.10.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.10.0.out_layers.3.bias", - "model.model.diffusion_model.output_blocks.11.0.in_layers.2.weight", - "model.model.diffusion_model.output_blocks.11.0.in_layers.2.bias", - "model.model.diffusion_model.output_blocks.11.0.out_layers.3.weight", - "model.model.diffusion_model.output_blocks.11.0.out_layers.3.bias", - ]: + if should_process(key_): s = key_.split('.') idx = int(s[-2]) new_key_ = ".".join(s[:-2] + [str(int(idx - 1))] + [s[-1]]) From 0087ee3bdaed57c28fa3d7b035d4e095a13ecd66 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 11:15:49 -0700 Subject: [PATCH 336/512] Few merge fixes --- .../megatron_bart_pretraining.py | 4 ++-- .../megatron_gpt_continue_training.py | 4 ++-- .../megatron_gpt_prompt_learning.py | 4 ++-- .../megatron_retro_cal_shape.py | 6 ++--- .../megatron_retro_fine_tune.py | 6 ++--- .../megatron_retro_mutransfer_pretrain.py | 6 ++--- .../megatron_retro_pretraining.py | 6 ++--- .../megatron_t5_lm_adaptation_finetune.py | 4 ++-- .../megatron_t5_seq2seq_eval.py | 4 ++-- .../megatron_t5_seq2seq_finetune.py | 4 ++-- .../tuning/megatron_gpt_adapter_tuning.py | 4 ++-- .../tuning/megatron_gpt_ia3_tuning.py | 4 ++-- .../tuning/megatron_gpt_sft.py | 4 ++-- .../tuning/megatron_t5_adapter_tuning.py | 4 ++-- .../tuning/megatron_t5_ia3_tuning.py | 4 ++-- .../tuning/megatron_t5_lora_tuning.py | 4 ++-- .../megatron_nmt_training.py | 4 ++-- .../language_modeling/megatron_base_model.py | 2 +- .../language_modeling/megatron_bert_model.py | 22 +++++++++---------- .../megatron_lm_encoder_decoder_model.py | 22 +++++++++---------- .../megatron_retrieval_model.py | 8 +++---- .../modules/common/megatron/transformer.py | 8 ++++--- .../nlp/parts/megatron_trainer_builder.py | 4 ++-- 23 files changed, 72 insertions(+), 70 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_bart_pretraining.py b/examples/nlp/language_modeling/megatron_bart_pretraining.py index 72c7a755c0ec..c2ba020a4a21 100644 --- a/examples/nlp/language_modeling/megatron_bart_pretraining.py +++ b/examples/nlp/language_modeling/megatron_bart_pretraining.py @@ -37,7 +37,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -56,7 +56,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_gpt_continue_training.py b/examples/nlp/language_modeling/megatron_gpt_continue_training.py index 7b8a9897e1c0..8f0661a51406 100755 --- a/examples/nlp/language_modeling/megatron_gpt_continue_training.py +++ b/examples/nlp/language_modeling/megatron_gpt_continue_training.py @@ -133,7 +133,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name', 'fused_adam') == 'distributed_fused_adam' plugins = [] strategy = NLPDDPStrategy( @@ -152,7 +152,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py index 89077f099aeb..4f3aa31176ab 100644 --- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py +++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py @@ -48,7 +48,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) @@ -67,7 +67,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_cal_shape.py b/examples/nlp/language_modeling/megatron_retro_cal_shape.py index 008ce1445929..754f06018ba5 100644 --- a/examples/nlp/language_modeling/megatron_retro_cal_shape.py +++ b/examples/nlp/language_modeling/megatron_retro_cal_shape.py @@ -34,10 +34,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -53,7 +53,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(MixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_fine_tune.py b/examples/nlp/language_modeling/megatron_retro_fine_tune.py index 031191c22b0a..aa7de6fda582 100644 --- a/examples/nlp/language_modeling/megatron_retro_fine_tune.py +++ b/examples/nlp/language_modeling/megatron_retro_fine_tune.py @@ -74,10 +74,10 @@ def main(cfg) -> None: # import torch.multiprocessing as mp # mp.set_start_method("spawn", force=True) ##################################################### - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, timeout=datetime.timedelta(seconds=18000), @@ -95,7 +95,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(MixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py index 79b25fd2c5e8..81a71650dc42 100644 --- a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py +++ b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py @@ -40,10 +40,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -59,7 +59,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(MixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_retro_pretraining.py b/examples/nlp/language_modeling/megatron_retro_pretraining.py index e90fdddf1169..c1393863da57 100644 --- a/examples/nlp/language_modeling/megatron_retro_pretraining.py +++ b/examples/nlp/language_modeling/megatron_retro_pretraining.py @@ -39,10 +39,10 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( - no_ddp_communication_hook=True if megatron_amp_o2 else False, + no_ddp_communication_hook=True if megatron_amp_O2 else False, gradient_as_bucket_view=cfg.model.gradient_as_bucket_view, find_unused_parameters=False, ) @@ -58,7 +58,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(MixedPrecisionPlugin(plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py index 0a10d434127d..e2af0b89ac48 100644 --- a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py +++ b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py @@ -38,7 +38,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, # we don't use DDP for async grad allreduce @@ -57,7 +57,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py index 6a5ab9fbf481..4c11e10d99c5 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py @@ -71,7 +71,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -90,7 +90,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(MixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py index 59ca94082f26..3204ba2f6d76 100644 --- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py +++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py @@ -151,7 +151,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -170,7 +170,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py index 15a7ed800ce4..b09ff2c0c038 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py @@ -66,7 +66,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -88,7 +88,7 @@ def main(cfg) -> None: else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py index b55739b08794..a0ecc6e544e8 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py @@ -66,7 +66,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -87,7 +87,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py index 4c4c001014db..79dd20fcf84a 100644 --- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py +++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py @@ -165,7 +165,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name', 'fused_adam') == 'distributed_fused_adam' plugins = [] strategy = NLPDDPStrategy( @@ -185,7 +185,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py index ebc6a0327b78..96a8cba64863 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py @@ -66,7 +66,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -87,7 +87,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py index fc508611c9b1..1edc87a416a4 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py @@ -66,7 +66,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -88,7 +88,7 @@ def main(cfg) -> None: else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py index 73cfae76b7a5..7178bf8145ba 100644 --- a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py +++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py @@ -66,7 +66,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam' plugins = [] @@ -87,7 +87,7 @@ def main(cfg) -> None: plugin_precision = '16-mixed' else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/examples/nlp/machine_translation/megatron_nmt_training.py b/examples/nlp/machine_translation/megatron_nmt_training.py index 5d4768d0f146..38b993479b3c 100644 --- a/examples/nlp/machine_translation/megatron_nmt_training.py +++ b/examples/nlp/machine_translation/megatron_nmt_training.py @@ -42,7 +42,7 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False) plugins = [] strategy = NLPDDPStrategy( no_ddp_communication_hook=True, @@ -62,7 +62,7 @@ def main(cfg) -> None: else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2: + if megatron_amp_O2: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 3dc1435a1325..6579e837b1a6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -78,7 +78,7 @@ class MegatronBaseModel(NLPModel): with O2 level optimizations and/or model parallelism. - Perform gradient clipping: `grad_clip_pl_default` triggers the PyTorch Lightning default implementation, `with_distributed_adam` triggers - the distributed optimizer's implementation, `megatron_amp_o2` triggers gradient clipping on the main grads, + the distributed optimizer's implementation, `megatron_amp_O2` triggers gradient clipping on the main grads, and otherwise gradient clipping is performed on the model grads. """ diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index 083b6391bf0e..1cc774f1bd61 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -79,10 +79,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): raise ImportError( "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) self.cfg = cfg - if not self.megatron_amp_o2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') super().__init__(cfg, trainer=trainer, no_lm_init=False) @@ -90,7 +90,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self._validate_trainer() self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) # used in NVIDIA NGC PyTorch containers @@ -109,7 +109,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: self.model = self.model[0] - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -343,7 +343,7 @@ def training_step(self, dataloader_iter, batch_idx): # note: not necessary, but reduces performance degradation # from multiple simultaneous NCCL calls self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) self._optimizer.allreduce_main_grads() @@ -401,7 +401,7 @@ def allreduce_first_last_embeddings(self): module = self.model if module.share_token_embeddings: word_embeddings_weight = module.word_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: # O2 recipe stores a "main" copy of weights and grads grad = word_embeddings_weight.main_grad else: @@ -648,7 +648,7 @@ def _append_sequence_parallel_module_grads(self, module, grads): for param in module.parameters(): sequence_parallel_param = getattr(param, 'sequence_parallel', False) if sequence_parallel_param: - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = param.main_grad else: grad = param.grad @@ -857,7 +857,7 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True if parallel_state.is_pipeline_last_stage(ignore_virtual=True): if isinstance(self.model, list): @@ -866,20 +866,20 @@ def configure_optimizers(self): module = self.model if module.share_token_embeddings: param = module.word_embeddings_weight() - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Disable overlapped grad sync for layer norm grads when # sequence parallelism is enabled for param in self.parameters(): if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # sequence parallelism is enabled for param in self.parameters(): if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True # Initialize parameter buckets for overlapped grad and param syncs diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index 39c1ed2b3270..f43f4709a8d6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -115,9 +115,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # We don't need to call it explicitly? Since it is a pytorch lightning hook function # self.setup_optimizer_param_groups() - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -129,7 +129,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): ) self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) self.enc_dec_model.model_type = ModelType.encoder_and_decoder @@ -185,7 +185,7 @@ def configure_optimizers(self): # Disable async grad reductions for params that are # synchronized for pipeline parallelism for param in model_parallel_params: - param._disable_greedy_grad_copy = not self.megatron_amp_o2 + param._disable_greedy_grad_copy = not self.megatron_amp_O2 param._disable_overlap_grad_sync = True return super().configure_optimizers() @@ -373,7 +373,7 @@ def training_step(self, dataloader_iter, batch_idx): # note: not necessary, but reduces performance degradation # from multiple simultaneous NCCL calls self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: # when using pipeline parallelism grads must be reduced after the pipeline (not asynchronously) if self.cfg.get('pipeline_model_parallel_size', 1) > 1: # main grads are stored in the MainParamsOptimizer wrapper @@ -475,7 +475,7 @@ def allreduce_word_and_position_embeddings(self): 'share_decoder_tokens_head_embeddings', True ): word_embeddings_weight = self.enc_dec_model.word_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: # O2 recipe stores a "main" copy of weights and grads grad = word_embeddings_weight.main_grad else: @@ -496,7 +496,7 @@ def allreduce_word_and_position_embeddings(self): ): if self.cfg.get('share_token_embeddings', True): position_embeddings_weight = self.enc_dec_model.position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -515,7 +515,7 @@ def allreduce_word_and_position_embeddings(self): and parallel_state.get_pipeline_model_parallel_split_rank() > 1 ): position_embeddings_weight = self.enc_dec_model.encoder_relative_position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -529,7 +529,7 @@ def allreduce_word_and_position_embeddings(self): and parallel_state.is_rank_in_decoder_relative_position_embedding_group() ): position_embeddings_weight = self.enc_dec_model.decoder_relative_position_embeddings_weight() - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -542,7 +542,7 @@ def allreduce_word_and_position_embeddings(self): position_embeddings_weight = ( self.enc_dec_model.decoder_cross_attention_relative_position_embeddings_weight() ) - if self.megatron_amp_o2: + if self.megatron_amp_O2: grad = position_embeddings_weight.main_grad else: grad = position_embeddings_weight.grad @@ -630,7 +630,7 @@ def _kwargs_to_arg_idx(self): Computed on first call, and then cached. """ # build mapping of kwargs to arg index at first run - module = self.enc_dec_model.forward if not self.megatron_amp_o2 else self.enc_dec_model.module.forward + module = self.enc_dec_model.forward if not self.megatron_amp_O2 else self.enc_dec_model.module.forward args_name = inspect.getfullargspec(module)[0][1:] kwargs_to_arg_idx = {k: v for k, v in zip(args_name, range(len(args_name)))} diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py index e418151308b0..d10c9f27f6cb 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py @@ -82,9 +82,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): # TODO does not support PP yet self.model = self.model_provider_func(pre_process=True, post_process=True, add_encoder=True, add_decoder=True) - self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - if self.megatron_amp_o2: + if self.megatron_amp_O2: if not self.with_distributed_adam: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type @@ -99,7 +99,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.model.model_type = ModelType.encoder_and_decoder self.enable_autocast = ( - True if (not self.megatron_amp_o2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False ) if hasattr(self.cfg, "shape_file"): @@ -270,7 +270,7 @@ def training_step(self, batch, batch_idx): if self.with_distributed_adam: # gradients are reduced internally in distributed optimizer pass - elif self.megatron_amp_o2: + elif self.megatron_amp_O2: # while async grad allreduce is enabled, bprop will keep moving forward without waiting for # the finish of async grad AR works. Hence, to guarantee the correctness of grads reduction, # we cannot start weight update until all async grad AR works are done. diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 003945e057d6..30b7f66e22b4 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1579,9 +1579,6 @@ def forward( cross_attention_relative_position_bias=cross_attention_relative_position_bias, checkpoint_core_attention=checkpoint_core_attention, ) - # Update current sequence length outside of the loops - if self.transformer_engine: - self.inference_current_sequence_len += hidden_states.size(0) if self.return_select_layer < 0: assert ( @@ -1590,6 +1587,11 @@ def forward( if index == self.num_layers + self.return_select_layer: return hidden_states + # Update current sequence length outside of the loops + if self.transformer_engine: + self.inference_current_sequence_len += hidden_states.size(0) + + # Skip counter update for eval and activation checkpointing if torch.is_grad_enabled() and self.training: self.microbatch_count += 1 diff --git a/nemo/collections/nlp/parts/megatron_trainer_builder.py b/nemo/collections/nlp/parts/megatron_trainer_builder.py index 3c8bbc5db0d3..b2554a35cdbd 100644 --- a/nemo/collections/nlp/parts/megatron_trainer_builder.py +++ b/nemo/collections/nlp/parts/megatron_trainer_builder.py @@ -70,7 +70,7 @@ def _plugins(self) -> list: Returns: plugins: list of plugins passed to Trainer.plugins including precision plugins. """ - megatron_amp_o2 = self.cfg.model.get('megatron_amp_O2', False) + megatron_amp_O2 = self.cfg.model.get('megatron_amp_O2', False) with_distributed_adam = ( self.cfg.model.optim.get('name') == 'distributed_fused_adam' if self.cfg.model.get('optim') else False ) @@ -84,7 +84,7 @@ def _plugins(self) -> list: else: plugin_precision = 'bf16-mixed' - if megatron_amp_o2 and not with_distributed_adam: + if megatron_amp_O2 and not with_distributed_adam: plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) else: plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) From 8bdbd476c9f7bccfe33aa59b5844b94bec6159e7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:21:27 +0000 Subject: [PATCH 337/512] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../multimodal/models/neva/neva_model.py | 21 +++++++++---------- .../language_modeling/megatron_gpt_model.py | 19 ++++++++--------- .../modules/common/megatron/transformer.py | 1 - .../parts/mixins/multimodal_adapter_mixins.py | 16 +++++++------- nemo/collections/nlp/parts/nlp_overrides.py | 1 + tools/asr_evaluator/asr_evaluator.py | 4 +++- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index b5be817b05bd..d6b8e2336375 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -17,8 +17,8 @@ import random import re import tempfile -from itertools import chain from functools import partial +from itertools import chain from typing import Any, List, Optional, Union import numpy as np @@ -80,9 +80,9 @@ SamplingParam, TextGeneration, ) +from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone from nemo.core import adapter_mixins from nemo.core.classes.common import PretrainedModelInfo @@ -101,8 +101,8 @@ try: from megatron.core import dist_checkpointing, parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func from megatron.core.models.gpt import GPTModel as MCoreGPTModel + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func HAVE_MEGATRON_CORE = True @@ -268,6 +268,7 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): return updated_input_embeds + class MCoreNevaModel(MCoreGPTModel): def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, @@ -285,9 +286,7 @@ def __init__( if mm_cfg.llm.freeze: for param in chain( - self.embedding.parameters(), - self.decoder.parameters(), - self.output_layer.parameters(), + self.embedding.parameters(), self.decoder.parameters(), self.output_layer.parameters(), ): param.requires_grad = False self.embedding = self.embedding.eval() @@ -421,6 +420,7 @@ def load_llm_weights(self, nemo_path): self.language_model.load_state_dict(new_state_dict, strict=True) print(f"Restored LLM weights from {nemo_path}.") + class NevaModel(GPTModel): def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, @@ -560,17 +560,14 @@ def init_neva_adapter(self): self.base_keys = self._get_all_keys() adapter_name = AdapterName.MM_LINEAR_ADAPTER adapter_cfg = MMLinearAdapterConfig( - in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, - out_features=self.cfg.hidden_size, bias=True, + in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, out_features=self.cfg.hidden_size, bias=True, ) for name, module in self.named_modules(): self._check_and_add_adapter( - name, module, adapter_name, adapter_cfg, - autocast_dtype=self.autocast_dtype, + name, module, adapter_name, adapter_cfg, autocast_dtype=self.autocast_dtype, ) self.adapter_keys = self._get_all_keys() - self.base_keys - def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) @@ -578,8 +575,10 @@ def model_provider_func(self, pre_process, post_process): if self.mcore_gpt: if parallel_state.is_unitialized(): + def dummy(): return + if self.trainer.strategy.launcher is not None: self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) self.trainer.strategy.setup_environment() diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 8a09be4af598..a8f46d96f8db 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -25,7 +25,6 @@ from omegaconf.dictconfig import DictConfig from pytorch_lightning.accelerators import CPUAccelerator from pytorch_lightning.trainer.trainer import Trainer -from typing import Any, Dict, Iterator, List, Optional, Union from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( MegatronPretrainingRandomSampler, @@ -490,7 +489,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): grad_sync_func = None param_sync_func = None if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2, ) + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) grad_sync_func = self.reduce_overlap_gradients param_sync_func = self.sync_overlap_parameters @@ -625,7 +624,7 @@ def training_step(self, dataloader_iter, batch_idx): self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) if self.cfg.get('pipeline_model_parallel_size', 1) > 1 and self.cfg.get( - 'share_embeddings_and_output_weights', True + 'share_embeddings_and_output_weights', True ): # when using pipeline parallelism the first and last stage must keep embeddings in sync self.allreduce_first_last_embeddings() @@ -735,8 +734,8 @@ def allreduce_first_last_embeddings(self): # This should only run for models that support pipelined model parallelism # (BERT and GPT-2). if parallel_state.get_pipeline_model_parallel_world_size() > 1 and ( - parallel_state.is_pipeline_first_stage(ignore_virtual=True) - or parallel_state.is_pipeline_last_stage(ignore_virtual=True) + parallel_state.is_pipeline_first_stage(ignore_virtual=True) + or parallel_state.is_pipeline_last_stage(ignore_virtual=True) ): module_list = self.get_gpt_module_list() if parallel_state.is_pipeline_first_stage(ignore_virtual=True): @@ -1057,7 +1056,7 @@ def build_train_valid_test_datasets(self): return self._train_ds, self._validation_ds, self._test_ds def build_pretraining_data_loader( - self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False + self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False ): """Buld dataloader given an input dataset.""" @@ -1199,10 +1198,10 @@ def setup_test_data(self, cfg): self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples) def generate( - self, - inputs: Union[List[str], torch.Tensor, List[dict]], - length_params: LengthParam, - sampling_params: SamplingParam = None, + self, + inputs: Union[List[str], torch.Tensor, List[dict]], + length_params: LengthParam, + sampling_params: SamplingParam = None, *, strategy: Optional[TextGenerationStrategy] = None, ) -> OutputType: diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 30b7f66e22b4..bbb4b3ba31f0 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1591,7 +1591,6 @@ def forward( if self.transformer_engine: self.inference_current_sequence_len += hidden_states.size(0) - # Skip counter update for eval and activation checkpointing if torch.is_grad_enabled() and self.training: self.microbatch_count += 1 diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py index 7e2112e9df90..682a76aa939a 100644 --- a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -19,11 +19,7 @@ import torch from omegaconf import DictConfig, OmegaConf, open_dict -from nemo.utils.model_utils import inject_model_parallel_rank from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin -from nemo.core.classes.mixins.adapter_mixins import AdapterModuleMixin -from nemo.core.connectors.save_restore_connector import SaveRestoreConnector -from nemo.utils import logging, model_utils from nemo.collections.nlp.parts.peft_config import ( PEFT_CONFIG_MAP, CanonicalAdaptersPEFTConfig, @@ -31,9 +27,14 @@ PEFTConfig, PtuningPEFTConfig, ) +from nemo.core.classes.mixins.adapter_mixins import AdapterModuleMixin +from nemo.core.connectors.save_restore_connector import SaveRestoreConnector +from nemo.utils import logging, model_utils +from nemo.utils.model_utils import inject_model_parallel_rank try: from megatron.core import parallel_state + from nemo.collections.nlp.modules.common.megatron.adapters.mcore_mixins import swap_mcore_mixin except (ImportError, ModuleNotFoundError): @@ -41,7 +42,6 @@ class MultimodalAdapterModelMixin(NLPAdapterModelMixin): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -80,7 +80,9 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): self.tie_weights(cfg) self.use_peft = True - def _check_and_add_adapter(self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None): + def _check_and_add_adapter( + self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None + ): if name_key_to_mcore_mixins is not None: for mcore_target, mcore_mixin in name_key_to_mcore_mixins[peft_name]: if name in [ @@ -107,4 +109,4 @@ def _check_and_add_adapter(self, name, module, peft_name, peft_cfg, name_key_to_ model_parallel_config=self.model_parallel_config, ) if autocast_dtype is not None: - module.adapter_layer[peft_name] = module.adapter_layer[peft_name].to(autocast_dtype) \ No newline at end of file + module.adapter_layer[peft_name] = module.adapter_layer[peft_name].to(autocast_dtype) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 6fa645093fc4..f61fb2cd654d 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -633,6 +633,7 @@ def should_process(key): if key == template: return True return False + for key_ in state_dict.keys(): if key_ == "model.cond_stage_model.transformer.text_model.embeddings.position_ids": continue diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index 9540d3429138..82b61290e66f 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -12,12 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import json + import git from omegaconf import OmegaConf, open_dict -from utils import cal_target_metadata_wer, run_asr_inference + from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import cal_target_metadata_wer, run_asr_inference """ This script serves as evaluator of ASR models From 7be8108a4223c80378b0c7714945caddaf77c951 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 11:29:46 -0700 Subject: [PATCH 338/512] Fix diff for non-mm models --- .dockerignore | 2 - .gitignore | 1 - .gitlab-ci.yml | 13 - LICENSE | 527 +----------------- examples/tts/conf/fastpitch/fastpitch.yaml | 256 --------- examples/tts/conf/hifigan/hifigan_data.yaml | 133 ----- .../tts/conf/hifigan/sample/sample_22050.yaml | 3 - .../tts/conf/hifigan/sample/sample_44100.yaml | 3 - examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- .../asr/test_asr_interctc_models.py | 2 - .../scripts/run_ctc_segmentation.py | 2 +- tutorials/00_NeMo_Primer.ipynb | 2 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 2 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 2 +- tutorials/asr/ASR_for_telephony_speech.ipynb | 2 +- tutorials/asr/ASR_with_NeMo.ipynb | 4 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 2 +- tutorials/asr/ASR_with_Transducers.ipynb | 2 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 2 +- tutorials/asr/Multilang_ASR.ipynb | 2 +- tutorials/asr/Offline_ASR.ipynb | 2 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 2 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 2 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 2 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 2 +- .../asr/Self_Supervised_Pre_Training.ipynb | 2 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 2 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 2 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 2 +- ...Language_Models_for_Downstream_Tasks.ipynb | 2 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 2 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 2 +- .../nlp/Multitask_Prompt_and_PTuning.ipynb | 2 +- .../nlp/Punctuation_and_Capitalization.ipynb | 2 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 2 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- .../ASR_with_SpeakerDiarization.ipynb | 2 +- .../Speaker_Diarization_Inference.ipynb | 2 +- .../Speaker_Diarization_Training.ipynb | 4 +- .../Speaker_Identification_Verification.ipynb | 2 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 2 +- tutorials/tools/Multispeaker_Simulator.ipynb | 2 +- .../tts/Aligner_Inference_Examples.ipynb | 2 +- .../Evaluation_MelCepstralDistortion.ipynb | 2 +- tutorials/tts/FastPitch_Finetuning.ipynb | 2 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 2 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 2 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- 73 files changed, 70 insertions(+), 1006 deletions(-) delete mode 100644 .gitlab-ci.yml delete mode 100644 examples/tts/conf/fastpitch/fastpitch.yaml delete mode 100644 examples/tts/conf/hifigan/hifigan_data.yaml delete mode 100644 examples/tts/conf/hifigan/sample/sample_22050.yaml delete mode 100644 examples/tts/conf/hifigan/sample/sample_44100.yaml diff --git a/.dockerignore b/.dockerignore index 00626456bb1e..14f5114d01be 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,5 +17,3 @@ coverage.xml .git **/*.nemo **/*.ckpt - -nogit/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index e566a6685a32..1ff2a92cac64 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,3 @@ examples/neural_graphs/*.yml .hydra/ nemo_experiments/ -nogit/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index d5fcccbd2d1c..000000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,13 +0,0 @@ -stages: - - pre - -pre-commit-checks: - stage: pre - script: - - pre-commit run --all-files --verbose --show-diff-on-failure - image: gitlab-master.nvidia.com:5005/dl/ai-services/python-clients/codeformat:latest - tags: - - os/linux - - type/docker - only: - - merge_requests diff --git a/LICENSE b/LICENSE index 551b265159a3..f49a4e16e68b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,307 +1,3 @@ -The following applies to all files unless otherwise noted: - -NVIDIA NEMO FRAMEWORK MULTIMODAL PRE-RELEASE EVALUATION LICENSE - -IMPORTANT NOTICE – PLEASE READ AND AGREE BEFORE USING THE CONTAINER. This license -agreement (“Agreement”) is a legal agreement between you, whether an individual or -entity ("you”) and NVIDIA Corporation ("NVIDIA") and governs your use of an early -access version of the NVIDIA NeMo framework multimodal container and all its contents -(“CONTAINER”). This Agreement can be accepted only by an adult of legal age of -majority in the country in which the CONTAINER is used. If you don’t have the required -age or authority to accept this Agreement, or if you don’t accept all the terms and -conditions of this Agreement, do not download, install or use the CONTAINER. You -agree to use the CONTAINER only for purposes that are permitted by this Agreement and -any applicable law or regulation in the relevant jurisdictions. - -1. License. -1.1 Subject to the terms of this Agreement, NVIDIA grants you a non-exclusive, -revocable, non-transferable, non-sublicensable (except as expressly granted in this -Agreement), license to: (a) install and use copies of the CONTAINER, and (b) modify and -create derivative works of sample or example source code delivered by NVIDIA as part -of the CONTAINER (if applicable), all the foregoing only for your internal use to -evaluate or develop and test services and applications with the CONTAINER, without use -in production. - -2. Limitations. -Your license to use the CONTAINER and derivative works of the CONTAINER is restricted -as follows: - -2.1 The CONTAINER may run on any computing system with or without NVIDIA GPUs, except -for the NVIDIA proprietary software (such as CUDA and TensorRT software) in the -CONTAINER which is licensed only to run on systems with NVIDIA GPUs. The NVIDIA -proprietary software in the CONTAINER may be present on systems without NVIDIA GPUs, -as long as it is not running on such systems. For components governed by open source -software licenses, see the information in the “Components Under Other Licenses” section -below. - -2.2 The CONTAINER and derivative works may not be used in any commercial-ready products -or services, or separately to even for development, test or evaluation of other NVIDIA -non-NVIDIA products or services. - -2.3 You may not reverse engineer, decompile, or disassemble the CONTAINER components -provided in binary form, nor attempt in any other manner to obtain source code of such -CONTAINER components. - -2.4 You may not change or remove copyright or other proprietary notices in the -CONTAINER. - -2.5 Except as expressly granted in this Agreement, you may not copy, sell, rent, -sublicense, transfer, distribute, modify or create derivative works of the CONTAINER, -or make its functionality available to others. - -2.6 Data generated with use of the CONTAINER may not be used for deployment purposes. - -2.7 You may not bypass, disable, or circumvent any technical limitation, encryption, -security, digital rights management or authentication mechanism in the CONTAINER. - -2.8 You may not replace any NVIDIA software components that are governed by this -Agreement with other software that implements NVIDIA APIs. - -2.9 You may not use the CONTAINER for the purpose of developing competing products or -technologies or assisting a third party in such activities. - -2.10 You may not use the CONTAINER in any manner that would cause it to become subject -to an open source software license; subject to the terms in the “Components Under Other -Licenses” section below. - -2.11 Unless you have an agreement with NVIDIA for this purpose, you may not use the -CONTAINER provided under this Agreement and derivative works in a system or -application where the use of or application where the use of or failure of such system -or application developed with CONTAINER provided by NVIDIA could result in injury, -death or catastrophic damage. NVIDIA will not be liable to you or any third party, in -whole or in part, for any claims or damages arising from these uses. You are solely -responsible for ensuring that systems and applications developed with the CONTAINER -as a whole include sufficient safety and redundancy features and comply with all -applicable legal and regulatory standards and requirements. - -3. Your Privacy: Collection and Use of Information. -Please review the NVIDIA Privacy Policy, located at -https://www.nvidia.com/en-us/about-nvidia/privacy-policy, which explains NVIDIA’s -policy for collecting and using data, as well as visit the NVIDIA Privacy Center, -located at https://www.nvidia.com/en-us/privacy-center, to manage your consent and -privacy preferences. NVIDIA may require certain personal information such as name, -email address and entitlement information including survey responses to deliver or -provide the CONTAINER to you. - -4. Authorized Users. -You may allow employees and contractors of your entity or of your subsidiary(ies) to -access and use the CONTAINER from your secure network to perform the work authorized -by this Agreement on your behalf. If you are an academic institution, you may allow -users enrolled or employed by the academic institution to access and use the CONTAINER -as authorized by this Agreement from your secure network. You are responsible for the -compliance with the terms of this Agreement by your authorized users. Any act or -omission that if committed by you would constitute a breach of this Agreement will be -deemed to constitute a breach of this Agreement if committed by your authorized users. - -5. Confidentiality. -You agree that you will not use, nor authorize others to use, NVIDIA Confidential -Information, except as necessary for the performance of this Agreement, and that you -will not disclose NVIDIA Confidential Information to any third party, except to -permitted users under this Agreement that have a need to know such Confidential -Information for the purpose of this Agreement, provided that each such recipient is -subject to a written agreement that includes confidentiality obligations consistent -with these terms. You agree to use all reasonable efforts to maintain the -confidentiality of NVIDIA Confidential Information in your possession or control, but -in no event less than the efforts that you ordinarily use with respect to your own -Confidential Information of similar nature and importance. “Confidential Information” -means the output, and any results of benchmarking or other competitive analysis or -regression or performance data relating to the CONTAINER. - -6. Pre-Release Versions. -CONTAINER versions or specific features identified as alpha, beta, preview, early -access or otherwise as pre-release may not be fully functional, may contain errors or -design flaws, and may have reduced or different security, privacy, availability, and -reliability standards relative to commercial versions of NVIDIA offerings. You may use -a pre-release CONTAINER at your own risk, understanding that such versions are not -intended for use in production or business-critical systems. NVIDIA may choose not to -make available a commercial version of any pre-release CONTAINER. NVIDIA may also -choose to abandon development and terminate the availability of a pre-release CONTAINER -at any time without liability. - -7. Updates. -NVIDIA may, at its option, make available patches, workarounds or other updates to the -CONTAINER. Unless the updates are provided with their separate governing terms, they -are deemed part of the CONTAINER licensed to you as provided in this Agreement. - -8. Components Under Other Licenses. -The CONTAINER may include or be distributed with components provided with separate -legal notices or terms that accompany the components, such as open source software -terms and other license terms ("Other Licenses”). The components are subject to the -applicable Other Licenses, including any proprietary notices, disclaimers, -requirements and extended use rights; except that this Agreement will prevail regarding -the use of third-party open source software, unless a third-party open source software -license requires its license terms to prevail. Open source software license means any -software, data or documentation subject to any license identified as an open source -license by the Open Source Initiative (http://opensource.org), Free Software Foundation (http://www.fsf.org) or -other similar open source organization or listed by the Software Package Data Exchange -(SPDX) Workgroup under the Linux Foundation (http://www.spdx.org). - -You acknowledge and agree that it is your sole responsibility to obtain any additional -third-party licenses required to make, have made, use, have used, sell, import, and -offer for sale your products or services that include or incorporate components under -Other Licenses, including, without limitation, audio and/or video encoders and decoders -and implementations of technical standards. NVIDIA does not grant to you under this -Agreement any necessary patent or other rights, including standard essential patent -rights, with respect to components under Other Licenses. - -9. Termination. -This Agreement will automatically terminate without notice from NVIDIA if you fail to -comply with any of the terms in this Agreement or if you commence or participate in any -legal proceeding against NVIDIA with respect to the CONTAINER. Additionally, either -party may terminate this Agreement at any time with prior written notice to the other -party. Upon any termination, you must stop using and destroy all copies of the CONTAINER -and derivative works. Upon written request, you will certify in writing that you have -complied with your commitments under this section. All provisions will survive -termination, except for the licenses granted to you. - -10. Ownership. -The CONTAINER, including all intellectual property rights, is and will remain the sole -and exclusive property of NVIDIA or its licensors. Except as expressly granted in this -Agreement, (i) NVIDIA reserves all rights, interests, and remedies in connection with -the CONTAINER, and (ii) no other license or right is granted to you by implication, -estoppel or otherwise. You agree to cooperate with NVIDIA and provide reasonably -requested information to verify your compliance with this Agreement. - -11. Feedback. -You may, but you are not obligated to, provide suggestions, requests, fixes, -modifications, enhancements, or other feedback regarding the CONTAINER -(collectively, “Feedback”). Feedback, even if designated as confidential by you, -will not create any confidentiality obligation for NVIDIA or its affiliates. If -you provide Feedback, you hereby grant NVIDIA, its affiliates and its designees -a non-exclusive, perpetual, irrevocable, sublicensable, worldwide, royalty-free, -fully paid-up and transferable license, under your intellectual property rights, -to publicly perform, publicly display, reproduce, use, make, have made, sell, -offer for sale, distribute (through multiple tiers of distribution), import, -create derivative works of and otherwise commercialize and exploit the Feedback -at NVIDIA’s discretion. You will not give Feedback (i) that you have reason to -believe is subject to any restriction that impairs the exercise of the grant -stated in this section, such as third-party intellectual property rights; or -(ii) subject to license terms which seek to require any product incorporating or -developed using such Feedback, or other intellectual property of NVIDIA or its -affiliates, to be licensed to or otherwise shared with any third party. - -12. Disclaimer of Warranties. -THE CONTAINER IS PROVIDED BY NVIDIA AS-IS AND WITH ALL FAULTS. TO THE FULLEST -EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA DISCLAIMS ALL WARRANTIES AND -REPRESENTATIONS OF ANY KIND, WHETHER EXPRESS, IMPLIED OR STATUTORY, RELATING TO -OR ARISING UNDER THIS AGREEMENT, INCLUDING, WITHOUT LIMITATION, THE WARRANTIES -OF TITLE, NONINFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, -USAGE OF TRADE AND COURSE OF DEALING. WITHOUT LIMITING THE FOREGOING, NVIDIA -DOES NOT WARRANT THAT THE CONTAINER WILL MEET YOUR REQUIREMENTS; THAT ANY -DEFECTS OR ERRORS WILL BE CORRECTED; THAT ANY CERTAIN CONTENT WILL BE AVAILABLE; -OR THAT THE CONTAINER IS FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS. NO -INFORMATION OR ADVICE GIVEN BY NVIDIA WILL IN ANY WAY INCREASE THE SCOPE OF ANY -WARRANTY EXPRESSLY PROVIDED IN THIS AGREEMENT. NVIDIA does not warrant or assume -responsibility for the accuracy or completeness of any third-party information, -text, graphics, links contained in THE CONTAINER. - -13. Limitations of Liability. -TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL NVIDIA BE -LIABLE FOR ANY (I) INDIRECT, PUNITIVE, SPECIAL, INCIDENTAL OR CONSEQUENTIAL -DAMAGES, OR (II) DAMAGES FOR THE (A) COST OF PROCURING SUBSTITUTE GOODS OR (B) -LOSS OF PROFITS, REVENUES, USE, DATA OR GOODWILL ARISING OUT OF OR RELATED TO -THIS AGREEMENT, WHETHER BASED ON BREACH OF CONTRACT, TORT (INCLUDING NEGLIGENCE), -STRICT LIABILITY, OR OTHERWISE, AND EVEN IF NVIDIA HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES AND EVEN IF A PARTY'S REMEDIES FAIL THEIR ESSENTIAL -PURPOSE. - -ADDITIONALLY, TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA’S TOTAL -CUMULATIVE AGGREGATE LIABILITY FOR ANY AND ALL LIABILITIES, OBLIGATIONS OR -CLAIMS ARISING OUT OF OR RELATED TO THIS AGREEMENT WILL NOT EXCEED FIVE U.S. -DOLLARS (US$5). - -14. Governing Law and Jurisdiction. -This Agreement will be governed in all respects by the laws of the United States -and the laws of the State of Delaware, without regard to conflict of laws -principles or the United Nations Convention on Contracts for the International Sale -of Goods. The state and federal courts residing in Santa Clara County, -California will have exclusive jurisdiction over any dispute or claim arising -out of or related to this Agreement, and the parties irrevocably consent to -personal jurisdiction and venue in those courts; except that either party may -apply for injunctive remedies or an equivalent type of urgent legal relief in -any jurisdiction. - -15. No Assignment. -NVIDIA may assign, delegate or transfer its rights or obligations under this -Agreement by any means or operation of law. You may not, without NVIDIA’s prior -written consent, assign, delegate or transfer any of your rights or obligations -under this Agreement by any means or operation of law, and any attempt to do so -is null and void. - -16. Waiver. -No failure or delay by a party to enforce any Agreement term or obligation will -operate as a waiver by that party, nor prevent the enforcement of such term or -obligation later. - -17. Export. -You agree to comply with all applicable export, import, trade and economic -sanctions laws and regulations, including U.S. Export Administration Regulations -and Office of Foreign Assets Control regulations. These laws include -restrictions on destinations, end-users and end-use. - -18. Government Use. - The CONTAINER, including related documentation (“Protected Items”) is a -“Commercial product” as this term is defined at 48 C.F.R. 2.101, consisting of -“commercial computer software” and “commercial computer software documentation” -as such terms are used in, respectively, 48 C.F.R. 12.212 and 48 C.F.R. -227.7202 & 252.227-7014(a)(1). Before any Protected Items are supplied to the -U.S. Government, you will (i) inform the U.S. Government in writing that the -Protected Items are and must be treated as commercial computer software and -commercial computer software documentation developed at private expense; (ii) -inform the U.S. Government that the Protected Items are provided subject to the -terms of this Agreement; and (iii) mark the Protected Items as commercial -computer software and commercial computer software documentation developed at -private expense. In no event will you permit the U.S. Government to acquire -rights in Protected Items beyond those specified in 48 C.F.R. 52.227-19(b)(1)- -(2) or 252.227-7013(c) except as expressly approved by NVIDIA in writing. - -19. Notices. -Unless otherwise specifically stated in this Agreement, all notices, requests, -consents and other communications, which are required or permitted under this -Agreement, will be in writing to the address below and will be effective (i) -upon receipt if by personal delivery, (ii) upon receipt if by certified or -registered mail (return receipt requested), or (iii) one (1) day after it is -sent if by next day delivery by a major commercial delivery service. Please -direct your legal notices or other correspondence to NVIDIA Corporation, 2788 -San Tomas Expressway, Santa Clara, California 95051, United States of America, -Attention: Legal Department. - -20. Force Majeure. -Neither party will be liable during any period where an event or circumstance -prevents or delays that party from performing its obligations under this -Agreement and that event or circumstance: (i) is not within the reasonable -control of that party and is not the result of that party’s negligence, and (ii) -cannot be overcome or avoided by that party using reasonably diligent efforts. - -21. Entire Agreement. -Regarding the subject matter of this Agreement, the parties agree that (i) this -Agreement constitutes the entire and exclusive agreement between the parties and -supersedes all prior and contemporaneous communications and (ii) any additional -or different terms or conditions, whether contained in purchase orders, order -acknowledgments, invoices or otherwise, will not be binding on the receiving party -and are null and void. a court of competent jurisdiction rules that a provision of -this Agreement is unenforceable, that provision will be deemed modified to the -extent necessary to make it enforceable and the remainder of this Agreement will -continue in full force and effect. Any amendment to this Agreement must be in writing -and signed by authorized representatives of both parties. - -22. Licensing. -If the terms in this license are not suitable for your organization, or for any -questions regarding this license, please contact NVIDIA at -nvidia-compute-license-questions@nvidia.com. - -(v. April 10, 2023) - --- - -This repository also contains code from Colossal-AI, and K-diffusion. Files from these -organizations have notices at the top of each file. Below are licenses used in those -files, as indicated. - - -------------- LICENSE FOR from Colossal-AI for Stable Diffusion -------------- - -Copyright 2021- HPC-AI Technology Inc. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -490,7 +186,7 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2021- HPC-AI Technology Inc. + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -502,223 +198,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. - - ## Some of colossal-ai's code is derived from others projects, which is subject to the following copyright notice: - - Copyright 2021 The Alpa team. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://github.com/alpa-projects/alpa/blob/979a45a3e6187df941ef4a4c4c6eea664527d68d/LICENSE - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ------------------------------------------------- - - Copyright 2018-2020 Philippe Tillet - Copyright 2020-2022 OpenAI - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files - (the "Software"), to deal in the Software without restriction, - including without limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of the Software, - and to permit persons to whom the Software is furnished to do so, - subject to the following conditions: - - ---------------- LICENSE FOR Microsoft Deepspeed ---------------- - - MIT License - - Copyright (c) Microsoft Corporation. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE - - ---------------- LICENSE FOR NVIDIA Megatron-LM ---------------- - - Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of NVIDIA CORPORATION nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR NVIDIA Apex ---------------- - - All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Facebook Fairscale ---------------- - - Copyright (c) Facebook, Inc. and its affiliates - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Flash Attention ---------------- - - BSD 3-Clause License - - Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Facebook xFormers ---------------- - - From xFormers: - - Copyright (c) Facebook, Inc. and its affiliates - - - === - - BSD 3-Clause License - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - -------------- LICENSE FOR from K-diffusion for Diffusion Inference -------------- - -Copyright (c) 2022 Katherine Crowson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file + limitations under the License. \ No newline at end of file diff --git a/examples/tts/conf/fastpitch/fastpitch.yaml b/examples/tts/conf/fastpitch/fastpitch.yaml deleted file mode 100644 index 1d552d058d76..000000000000 --- a/examples/tts/conf/fastpitch/fastpitch.yaml +++ /dev/null @@ -1,256 +0,0 @@ -# This config contains the default values for training an English FastPitch model. -# If you want to train a model on other dataset, you can change config values according to your dataset. -# Most dataset-specific arguments are in the head of the config file, see below. - -name: FastPitch - -defaults: - - feature: ??? - -max_epochs: ??? -batch_size: 32 -weighted_sampling_steps_per_epoch: null - -n_speakers: ??? -speaker_path: null -feature_stats_path: null - -train_ds_meta: ??? -val_ds_meta: ??? -log_ds_meta: ??? - -phoneme_dict_path: ??? -heteronyms_path: ??? - -log_dir: ??? -vocoder_type: ??? -vocoder_name: null -vocoder_checkpoint_path: null - -model: - learn_alignment: true - bin_loss_warmup_epochs: 100 - - n_speakers: ${n_speakers} - n_mel_channels: ${feature.mel_feature.mel_dim} - min_token_duration: 1 - max_token_duration: 75 - symbols_embedding_dim: 384 - pitch_embedding_kernel_size: 3 - energy_embedding_kernel_size: 3 - speaker_emb_condition_prosody: true - speaker_emb_condition_aligner: true - use_log_energy: false - dur_loss_scale: 0.1 - pitch_loss_scale: 0.1 - energy_loss_scale: 0.1 - aligner_loss_scale: 0.1 - - preprocessor: - _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor - features: ${feature.mel_feature.mel_dim} - lowfreq: ${feature.mel_feature.lowfreq} - highfreq: ${feature.mel_feature.highfreq} - n_fft: ${feature.win_length} - n_window_size: ${feature.win_length} - window_size: false - n_window_stride: ${feature.hop_length} - window_stride: false - pad_to: 1 - pad_value: 0 - sample_rate: ${feature.sample_rate} - window: hann - normalize: null - preemph: null - dither: 0.0 - frame_splicing: 1 - log: true - log_zero_guard_type: add - log_zero_guard_value: 1.0 - mag_power: 1.0 - mel_norm: null - - text_tokenizer: - _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.IPATokenizer - punct: true - apostrophe: true - pad_with_space: true - g2p: - _target_: nemo.collections.tts.g2p.models.i18n_ipa.IpaG2p - phoneme_dict: ${phoneme_dict_path} - heteronyms: ${heteronyms_path} - phoneme_probability: 0.8 - ignore_ambiguous_words: false - use_chars: true - use_stresses: true - - pitch_processor: - _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization - field: pitch - stats_path: ${feature_stats_path} - - energy_processor: - _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization - field: energy - stats_path: ${feature_stats_path} - - train_ds: - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - dataset_meta: ${train_ds_meta} - weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - min_duration: 0.1 - max_duration: 10.0 - - dataloader_params: - batch_size: ${batch_size} - num_workers: 4 - - validation_ds: - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - dataset_meta: ${val_ds_meta} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 2 - - log_config: - log_dir: ${log_dir} - log_epochs: [10, 50] - epoch_frequency: 100 - log_tensorboard: false - log_wandb: false - - generators: - - _target_: nemo.collections.tts.parts.utils.callbacks.FastPitchArtifactGenerator - log_spectrogram: true - log_alignment: true - audio_params: - _target_: nemo.collections.tts.parts.utils.callbacks.LogAudioParams - log_audio_gta: true - vocoder_type: ${vocoder_type} - vocoder_name: ${vocoder_name} - vocoder_checkpoint_path: ${vocoder_checkpoint_path} - - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - text_tokenizer: ${model.text_tokenizer} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - - dataset_meta: ${log_ds_meta} - - dataloader_params: - batch_size: 8 - num_workers: 2 - - input_fft: - _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder - n_layer: 6 - n_head: 2 - d_model: ${model.symbols_embedding_dim} - d_head: 64 - d_inner: 1536 - kernel_size: 3 - dropout: 0.1 - dropatt: 0.1 - dropemb: 0.0 - d_embed: ${model.symbols_embedding_dim} - - output_fft: - _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder - n_layer: 6 - n_head: 1 - d_model: ${model.symbols_embedding_dim} - d_head: 64 - d_inner: 1536 - kernel_size: 3 - dropout: 0.1 - dropatt: 0.1 - dropemb: 0.0 - - alignment_module: - _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder - n_text_channels: ${model.symbols_embedding_dim} - - duration_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - pitch_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - energy_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - optim: - name: adamw - lr: 1e-3 - betas: [0.9, 0.999] - weight_decay: 1e-6 - - sched: - name: NoamAnnealing - warmup_steps: 1000 - last_epoch: -1 - d_model: 1 # Disable scaling based on model dim - -trainer: - num_nodes: 1 - devices: 1 - accelerator: gpu - strategy: ddp - precision: 16 - max_epochs: ${max_epochs} - accumulate_grad_batches: 1 - gradient_clip_val: 10.0 - enable_checkpointing: false # Provided by exp_manager - logger: false # Provided by exp_manager - log_every_n_steps: 100 - check_val_every_n_epoch: 10 - benchmark: false - -exp_manager: - exp_dir: null - name: ${name} - create_tensorboard_logger: true - create_checkpoint_callback: true - checkpoint_callback_params: - monitor: val_loss - resume_if_exists: false - resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/hifigan/hifigan_data.yaml b/examples/tts/conf/hifigan/hifigan_data.yaml deleted file mode 100644 index fde2f169aa8d..000000000000 --- a/examples/tts/conf/hifigan/hifigan_data.yaml +++ /dev/null @@ -1,133 +0,0 @@ -# This config contains the default values for training a HiFi-GAN model. -# If you want to train model on other dataset, you can change config values according to your dataset. -# Most dataset-specific arguments are in the head of the config file, see below. - -name: "HifiGan" - -defaults: - - feature: ??? - - sample: ??? - - model/generator: ??? - -max_epochs: ??? -batch_size: 16 -weighted_sampling_steps_per_epoch: null - -train_ds_meta: ??? -val_ds_meta: ??? -log_ds_meta: ??? - -log_dir: ??? - -model: - - max_epochs: ${max_epochs} - steps_per_epoch: ${weighted_sampling_steps_per_epoch} - l1_loss_factor: 60 - - preprocessor: - _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures - nfilt: ${feature.mel_feature.mel_dim} - lowfreq: ${feature.mel_feature.lowfreq} - highfreq: ${feature.mel_feature.highfreq} - n_fft: ${feature.win_length} - n_window_size: ${feature.win_length} - n_window_stride: ${feature.hop_length} - pad_to: 0 - pad_value: 0 - exact_pad: true - sample_rate: ${feature.sample_rate} - window: hann - normalize: null - preemph: null - dither: 0.0 - frame_splicing: 1 - log: true - log_zero_guard_type: add - log_zero_guard_value: 1.0 - mag_power: 1.0 - mel_norm: null - use_grads: false - - train_ds: - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} - sample_rate: ${feature.sample_rate} - n_samples: ${sample.train_n_samples} - min_duration: 0.4 - max_duration: null - dataset_meta: ${train_ds_meta} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 4 - - validation_ds: - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - sample_rate: ${feature.sample_rate} - n_samples: ${sample.val_n_samples} - min_duration: 3.0 - max_duration: null - dataset_meta: ${val_ds_meta} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 2 - - log_config: - log_dir: ${log_dir} - log_epochs: [10, 50] - epoch_frequency: 100 - log_tensorboard: false - log_wandb: false - - generators: - - _target_: nemo.collections.tts.parts.utils.callbacks.VocoderArtifactGenerator - - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - sample_rate: ${feature.sample_rate} - n_samples: null - min_duration: null - max_duration: null - dataset_meta: ${log_ds_meta} - - dataloader_params: - batch_size: 4 - num_workers: 2 - - optim: - _target_: torch.optim.AdamW - lr: 2e-4 - betas: [0.8, 0.99] - weight_decay: 1e-6 - sched: - name: ExponentialLR - gamma: 0.999 - -trainer: - num_nodes: 1 - devices: 1 - accelerator: gpu - strategy: ddp - precision: 16 - max_epochs: ${max_epochs} - accumulate_grad_batches: 1 - enable_checkpointing: False # Provided by exp_manager - logger: false # Provided by exp_manager - log_every_n_steps: 100 - check_val_every_n_epoch: 10 - benchmark: false - -exp_manager: - exp_dir: null - name: ${name} - create_tensorboard_logger: true - create_checkpoint_callback: true - create_wandb_logger: false - checkpoint_callback_params: - monitor: val_loss - resume_if_exists: false - resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/hifigan/sample/sample_22050.yaml b/examples/tts/conf/hifigan/sample/sample_22050.yaml deleted file mode 100644 index 18bc206e2566..000000000000 --- a/examples/tts/conf/hifigan/sample/sample_22050.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Audio dataset sampling config for 22.05khz sampling rate -train_n_samples: 8192 -val_n_samples: 66048 diff --git a/examples/tts/conf/hifigan/sample/sample_44100.yaml b/examples/tts/conf/hifigan/sample/sample_44100.yaml deleted file mode 100644 index d8315623bbbe..000000000000 --- a/examples/tts/conf/hifigan/sample/sample_44100.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Audio dataset sampling config for 44.1khz sampling rate -train_n_samples: 16384 -val_n_samples: 131072 diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index 3bb241659e48..e7bffa888653 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf +from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging -from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index 0f67aa11e09b..ff7b2b0675ea 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch +from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager -from utils import get_model """ This script supports training of G2PModels diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py index 253f7170b989..39a189ef4aa8 100644 --- a/tests/collections/asr/test_asr_interctc_models.py +++ b/tests/collections/asr/test_asr_interctc_models.py @@ -66,7 +66,6 @@ def squeezeformer_encoder_config() -> Dict: class TestInterCTCLoss: - @pytest.mark.pleasefixme @pytest.mark.unit @pytest.mark.parametrize( "model_class", [EncDecCTCModel, EncDecHybridRNNTCTCModel], @@ -87,7 +86,6 @@ class TestInterCTCLoss: ([], [0.3]), ], ) - @pytest.mark.pleasefixme def test_forward(self, model_class, encoder_config, apply_at_layers, loss_weights): preprocessor_config = {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor'} vocabulary = [ diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 884b6186e249..90e67ab844c7 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm +from utils import get_segments import nemo.collections.asr as nemo_asr -from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 1e484c14f607..50aa60260b35 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 7334188ac5d0..4255a6656b8a 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 071ed5f72809..289426f3bc2b 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index 02fa0325121c..0c34baacc953 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index 13b61626e746..ae4f43867c8d 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index eaeb93bb92ff..b9c0db866f9c 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index ed1fd1157f1d..6133fdc9a8b9 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index bab9299f57d4..74cd0f739e84 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -54,7 +54,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -588,7 +588,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'r1.20.0'\n", + " BRANCH = 'main'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index 92dcc2305e89..001336c97094 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -41,7 +41,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index 8a302a8c7130..e1eb494f777e 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -29,7 +29,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index a6519e73f6ec..c23398dca46a 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index 29ded2c98fa4..2f179eaa9a5a 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -46,7 +46,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index 4420085f319f..d3928bed987f 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -44,7 +44,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 28073a050d5e..7a11cb7dc6a6 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -104,7 +104,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 751eedba1519..8544d230878c 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -52,7 +52,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index 07149d752e5f..8a8335ac1542 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -25,7 +25,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 1ffe81dff02f..31d2c0dec943 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index a4c192f7f7e5..8883cce55a80 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -32,7 +32,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index f52e4777f5d3..490a4b6c8de7 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 9ca2232142ad..858f162b1834 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -29,7 +29,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 454840ded683..b055f14f5885 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index cc6c4956b8a5..58b719a867fa 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -61,7 +61,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index 19c998120d9d..a4701dc025d8 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 0e5a59312506..123a03efc28e 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 957ecdb49985..c9c547a8383e 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -50,7 +50,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index bda6338dd0bf..c4406a4f04ee 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index 88ddb5189cc4..8cf540b27114 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index 75428c686e7e..faa93de12514 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index ce6334f9712d..c63d2a8b1689 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index a1aa3cdae55b..323bfa1c49b8 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index e5ec1941b032..ddd3bdd4f929 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index f2644af1f764..dfdf594e6804 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 0162e5c39a32..b77b3439b444 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index fc22fa05d162..675fdfd5351c 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index 4e88195c9635..f925d2bc59b0 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index cc158388feb5..bfd3c7094198 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -65,7 +65,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 87bfc5c9b30d..004014ebdeeb 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 1545b4329d2b..f88c33fada34 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 5fbe61139980..2afbb19c0e66 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index af95c7707828..a211c8320d51 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 8920a0738641..d6b1e98b428e 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 412860045cb0..fdcff979ea46 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 56bf3261bcd3..afbc8394aa84 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 2888703b9368..3ab98f6c19fd 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 46e7e24f4130..7f1baf536d87 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index 90dacd5f6b8d..ea943b35e0d0 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 6caaa35e0765..1fd0f1b140d5 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index f08c7ae88385..7db905b6d225 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index f92864ae3306..27a01b894eae 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 770d9e7894c9..98f0cce4e9ec 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index 2841b1df62d1..c2a9caf1ea72 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index 3653b9e3ed8d..611e1e3b6e66 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 647242294197..699f1b131408 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index 101a71aaf089..e0c34b3c0de5 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 70e989bb98af..747ecfa43127 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index 7ab6d1d58ac8..eda5bba0aa1e 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index b7acbd364507..73c12bc79900 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index 4c39e591f203..195b773fb5ee 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index f891b7f11594..fe2e34659554 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index be99231ec9eb..6fe269e76904 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index 8473dca449ea..79546bb79db9 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", From aab3c40ff1eb6ce1eafdf7b6922a398a352a48b0 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 11:29:46 -0700 Subject: [PATCH 339/512] Fix diff for non-mm models --- .dockerignore | 2 - .gitignore | 1 - .gitlab-ci.yml | 13 - LICENSE | 527 +----------------- examples/tts/conf/fastpitch/fastpitch.yaml | 256 --------- examples/tts/conf/hifigan/hifigan_data.yaml | 133 ----- .../tts/conf/hifigan/sample/sample_22050.yaml | 3 - .../tts/conf/hifigan/sample/sample_44100.yaml | 3 - examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- .../asr/test_asr_interctc_models.py | 2 - .../scripts/run_ctc_segmentation.py | 2 +- tutorials/00_NeMo_Primer.ipynb | 2 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 2 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 2 +- tutorials/asr/ASR_for_telephony_speech.ipynb | 2 +- tutorials/asr/ASR_with_NeMo.ipynb | 4 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 2 +- tutorials/asr/ASR_with_Transducers.ipynb | 2 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 2 +- tutorials/asr/Multilang_ASR.ipynb | 2 +- tutorials/asr/Offline_ASR.ipynb | 2 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 2 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 2 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 2 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 2 +- .../asr/Self_Supervised_Pre_Training.ipynb | 2 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 2 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 2 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 2 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 2 +- ...Language_Models_for_Downstream_Tasks.ipynb | 2 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 2 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 2 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 2 +- .../nlp/Multitask_Prompt_and_PTuning.ipynb | 2 +- .../nlp/Punctuation_and_Capitalization.ipynb | 2 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 2 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- .../ASR_with_SpeakerDiarization.ipynb | 2 +- .../Speaker_Diarization_Inference.ipynb | 2 +- .../Speaker_Diarization_Training.ipynb | 4 +- .../Speaker_Identification_Verification.ipynb | 2 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 2 +- tutorials/tools/Multispeaker_Simulator.ipynb | 2 +- .../tts/Aligner_Inference_Examples.ipynb | 2 +- .../Evaluation_MelCepstralDistortion.ipynb | 2 +- tutorials/tts/FastPitch_Finetuning.ipynb | 2 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 2 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 2 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- 73 files changed, 70 insertions(+), 1006 deletions(-) delete mode 100644 .gitlab-ci.yml delete mode 100644 examples/tts/conf/fastpitch/fastpitch.yaml delete mode 100644 examples/tts/conf/hifigan/hifigan_data.yaml delete mode 100644 examples/tts/conf/hifigan/sample/sample_22050.yaml delete mode 100644 examples/tts/conf/hifigan/sample/sample_44100.yaml diff --git a/.dockerignore b/.dockerignore index 00626456bb1e..14f5114d01be 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,5 +17,3 @@ coverage.xml .git **/*.nemo **/*.ckpt - -nogit/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index e566a6685a32..1ff2a92cac64 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,3 @@ examples/neural_graphs/*.yml .hydra/ nemo_experiments/ -nogit/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index d5fcccbd2d1c..000000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,13 +0,0 @@ -stages: - - pre - -pre-commit-checks: - stage: pre - script: - - pre-commit run --all-files --verbose --show-diff-on-failure - image: gitlab-master.nvidia.com:5005/dl/ai-services/python-clients/codeformat:latest - tags: - - os/linux - - type/docker - only: - - merge_requests diff --git a/LICENSE b/LICENSE index 551b265159a3..f49a4e16e68b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,307 +1,3 @@ -The following applies to all files unless otherwise noted: - -NVIDIA NEMO FRAMEWORK MULTIMODAL PRE-RELEASE EVALUATION LICENSE - -IMPORTANT NOTICE – PLEASE READ AND AGREE BEFORE USING THE CONTAINER. This license -agreement (“Agreement”) is a legal agreement between you, whether an individual or -entity ("you”) and NVIDIA Corporation ("NVIDIA") and governs your use of an early -access version of the NVIDIA NeMo framework multimodal container and all its contents -(“CONTAINER”). This Agreement can be accepted only by an adult of legal age of -majority in the country in which the CONTAINER is used. If you don’t have the required -age or authority to accept this Agreement, or if you don’t accept all the terms and -conditions of this Agreement, do not download, install or use the CONTAINER. You -agree to use the CONTAINER only for purposes that are permitted by this Agreement and -any applicable law or regulation in the relevant jurisdictions. - -1. License. -1.1 Subject to the terms of this Agreement, NVIDIA grants you a non-exclusive, -revocable, non-transferable, non-sublicensable (except as expressly granted in this -Agreement), license to: (a) install and use copies of the CONTAINER, and (b) modify and -create derivative works of sample or example source code delivered by NVIDIA as part -of the CONTAINER (if applicable), all the foregoing only for your internal use to -evaluate or develop and test services and applications with the CONTAINER, without use -in production. - -2. Limitations. -Your license to use the CONTAINER and derivative works of the CONTAINER is restricted -as follows: - -2.1 The CONTAINER may run on any computing system with or without NVIDIA GPUs, except -for the NVIDIA proprietary software (such as CUDA and TensorRT software) in the -CONTAINER which is licensed only to run on systems with NVIDIA GPUs. The NVIDIA -proprietary software in the CONTAINER may be present on systems without NVIDIA GPUs, -as long as it is not running on such systems. For components governed by open source -software licenses, see the information in the “Components Under Other Licenses” section -below. - -2.2 The CONTAINER and derivative works may not be used in any commercial-ready products -or services, or separately to even for development, test or evaluation of other NVIDIA -non-NVIDIA products or services. - -2.3 You may not reverse engineer, decompile, or disassemble the CONTAINER components -provided in binary form, nor attempt in any other manner to obtain source code of such -CONTAINER components. - -2.4 You may not change or remove copyright or other proprietary notices in the -CONTAINER. - -2.5 Except as expressly granted in this Agreement, you may not copy, sell, rent, -sublicense, transfer, distribute, modify or create derivative works of the CONTAINER, -or make its functionality available to others. - -2.6 Data generated with use of the CONTAINER may not be used for deployment purposes. - -2.7 You may not bypass, disable, or circumvent any technical limitation, encryption, -security, digital rights management or authentication mechanism in the CONTAINER. - -2.8 You may not replace any NVIDIA software components that are governed by this -Agreement with other software that implements NVIDIA APIs. - -2.9 You may not use the CONTAINER for the purpose of developing competing products or -technologies or assisting a third party in such activities. - -2.10 You may not use the CONTAINER in any manner that would cause it to become subject -to an open source software license; subject to the terms in the “Components Under Other -Licenses” section below. - -2.11 Unless you have an agreement with NVIDIA for this purpose, you may not use the -CONTAINER provided under this Agreement and derivative works in a system or -application where the use of or application where the use of or failure of such system -or application developed with CONTAINER provided by NVIDIA could result in injury, -death or catastrophic damage. NVIDIA will not be liable to you or any third party, in -whole or in part, for any claims or damages arising from these uses. You are solely -responsible for ensuring that systems and applications developed with the CONTAINER -as a whole include sufficient safety and redundancy features and comply with all -applicable legal and regulatory standards and requirements. - -3. Your Privacy: Collection and Use of Information. -Please review the NVIDIA Privacy Policy, located at -https://www.nvidia.com/en-us/about-nvidia/privacy-policy, which explains NVIDIA’s -policy for collecting and using data, as well as visit the NVIDIA Privacy Center, -located at https://www.nvidia.com/en-us/privacy-center, to manage your consent and -privacy preferences. NVIDIA may require certain personal information such as name, -email address and entitlement information including survey responses to deliver or -provide the CONTAINER to you. - -4. Authorized Users. -You may allow employees and contractors of your entity or of your subsidiary(ies) to -access and use the CONTAINER from your secure network to perform the work authorized -by this Agreement on your behalf. If you are an academic institution, you may allow -users enrolled or employed by the academic institution to access and use the CONTAINER -as authorized by this Agreement from your secure network. You are responsible for the -compliance with the terms of this Agreement by your authorized users. Any act or -omission that if committed by you would constitute a breach of this Agreement will be -deemed to constitute a breach of this Agreement if committed by your authorized users. - -5. Confidentiality. -You agree that you will not use, nor authorize others to use, NVIDIA Confidential -Information, except as necessary for the performance of this Agreement, and that you -will not disclose NVIDIA Confidential Information to any third party, except to -permitted users under this Agreement that have a need to know such Confidential -Information for the purpose of this Agreement, provided that each such recipient is -subject to a written agreement that includes confidentiality obligations consistent -with these terms. You agree to use all reasonable efforts to maintain the -confidentiality of NVIDIA Confidential Information in your possession or control, but -in no event less than the efforts that you ordinarily use with respect to your own -Confidential Information of similar nature and importance. “Confidential Information” -means the output, and any results of benchmarking or other competitive analysis or -regression or performance data relating to the CONTAINER. - -6. Pre-Release Versions. -CONTAINER versions or specific features identified as alpha, beta, preview, early -access or otherwise as pre-release may not be fully functional, may contain errors or -design flaws, and may have reduced or different security, privacy, availability, and -reliability standards relative to commercial versions of NVIDIA offerings. You may use -a pre-release CONTAINER at your own risk, understanding that such versions are not -intended for use in production or business-critical systems. NVIDIA may choose not to -make available a commercial version of any pre-release CONTAINER. NVIDIA may also -choose to abandon development and terminate the availability of a pre-release CONTAINER -at any time without liability. - -7. Updates. -NVIDIA may, at its option, make available patches, workarounds or other updates to the -CONTAINER. Unless the updates are provided with their separate governing terms, they -are deemed part of the CONTAINER licensed to you as provided in this Agreement. - -8. Components Under Other Licenses. -The CONTAINER may include or be distributed with components provided with separate -legal notices or terms that accompany the components, such as open source software -terms and other license terms ("Other Licenses”). The components are subject to the -applicable Other Licenses, including any proprietary notices, disclaimers, -requirements and extended use rights; except that this Agreement will prevail regarding -the use of third-party open source software, unless a third-party open source software -license requires its license terms to prevail. Open source software license means any -software, data or documentation subject to any license identified as an open source -license by the Open Source Initiative (http://opensource.org), Free Software Foundation (http://www.fsf.org) or -other similar open source organization or listed by the Software Package Data Exchange -(SPDX) Workgroup under the Linux Foundation (http://www.spdx.org). - -You acknowledge and agree that it is your sole responsibility to obtain any additional -third-party licenses required to make, have made, use, have used, sell, import, and -offer for sale your products or services that include or incorporate components under -Other Licenses, including, without limitation, audio and/or video encoders and decoders -and implementations of technical standards. NVIDIA does not grant to you under this -Agreement any necessary patent or other rights, including standard essential patent -rights, with respect to components under Other Licenses. - -9. Termination. -This Agreement will automatically terminate without notice from NVIDIA if you fail to -comply with any of the terms in this Agreement or if you commence or participate in any -legal proceeding against NVIDIA with respect to the CONTAINER. Additionally, either -party may terminate this Agreement at any time with prior written notice to the other -party. Upon any termination, you must stop using and destroy all copies of the CONTAINER -and derivative works. Upon written request, you will certify in writing that you have -complied with your commitments under this section. All provisions will survive -termination, except for the licenses granted to you. - -10. Ownership. -The CONTAINER, including all intellectual property rights, is and will remain the sole -and exclusive property of NVIDIA or its licensors. Except as expressly granted in this -Agreement, (i) NVIDIA reserves all rights, interests, and remedies in connection with -the CONTAINER, and (ii) no other license or right is granted to you by implication, -estoppel or otherwise. You agree to cooperate with NVIDIA and provide reasonably -requested information to verify your compliance with this Agreement. - -11. Feedback. -You may, but you are not obligated to, provide suggestions, requests, fixes, -modifications, enhancements, or other feedback regarding the CONTAINER -(collectively, “Feedback”). Feedback, even if designated as confidential by you, -will not create any confidentiality obligation for NVIDIA or its affiliates. If -you provide Feedback, you hereby grant NVIDIA, its affiliates and its designees -a non-exclusive, perpetual, irrevocable, sublicensable, worldwide, royalty-free, -fully paid-up and transferable license, under your intellectual property rights, -to publicly perform, publicly display, reproduce, use, make, have made, sell, -offer for sale, distribute (through multiple tiers of distribution), import, -create derivative works of and otherwise commercialize and exploit the Feedback -at NVIDIA’s discretion. You will not give Feedback (i) that you have reason to -believe is subject to any restriction that impairs the exercise of the grant -stated in this section, such as third-party intellectual property rights; or -(ii) subject to license terms which seek to require any product incorporating or -developed using such Feedback, or other intellectual property of NVIDIA or its -affiliates, to be licensed to or otherwise shared with any third party. - -12. Disclaimer of Warranties. -THE CONTAINER IS PROVIDED BY NVIDIA AS-IS AND WITH ALL FAULTS. TO THE FULLEST -EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA DISCLAIMS ALL WARRANTIES AND -REPRESENTATIONS OF ANY KIND, WHETHER EXPRESS, IMPLIED OR STATUTORY, RELATING TO -OR ARISING UNDER THIS AGREEMENT, INCLUDING, WITHOUT LIMITATION, THE WARRANTIES -OF TITLE, NONINFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, -USAGE OF TRADE AND COURSE OF DEALING. WITHOUT LIMITING THE FOREGOING, NVIDIA -DOES NOT WARRANT THAT THE CONTAINER WILL MEET YOUR REQUIREMENTS; THAT ANY -DEFECTS OR ERRORS WILL BE CORRECTED; THAT ANY CERTAIN CONTENT WILL BE AVAILABLE; -OR THAT THE CONTAINER IS FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS. NO -INFORMATION OR ADVICE GIVEN BY NVIDIA WILL IN ANY WAY INCREASE THE SCOPE OF ANY -WARRANTY EXPRESSLY PROVIDED IN THIS AGREEMENT. NVIDIA does not warrant or assume -responsibility for the accuracy or completeness of any third-party information, -text, graphics, links contained in THE CONTAINER. - -13. Limitations of Liability. -TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL NVIDIA BE -LIABLE FOR ANY (I) INDIRECT, PUNITIVE, SPECIAL, INCIDENTAL OR CONSEQUENTIAL -DAMAGES, OR (II) DAMAGES FOR THE (A) COST OF PROCURING SUBSTITUTE GOODS OR (B) -LOSS OF PROFITS, REVENUES, USE, DATA OR GOODWILL ARISING OUT OF OR RELATED TO -THIS AGREEMENT, WHETHER BASED ON BREACH OF CONTRACT, TORT (INCLUDING NEGLIGENCE), -STRICT LIABILITY, OR OTHERWISE, AND EVEN IF NVIDIA HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES AND EVEN IF A PARTY'S REMEDIES FAIL THEIR ESSENTIAL -PURPOSE. - -ADDITIONALLY, TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, NVIDIA’S TOTAL -CUMULATIVE AGGREGATE LIABILITY FOR ANY AND ALL LIABILITIES, OBLIGATIONS OR -CLAIMS ARISING OUT OF OR RELATED TO THIS AGREEMENT WILL NOT EXCEED FIVE U.S. -DOLLARS (US$5). - -14. Governing Law and Jurisdiction. -This Agreement will be governed in all respects by the laws of the United States -and the laws of the State of Delaware, without regard to conflict of laws -principles or the United Nations Convention on Contracts for the International Sale -of Goods. The state and federal courts residing in Santa Clara County, -California will have exclusive jurisdiction over any dispute or claim arising -out of or related to this Agreement, and the parties irrevocably consent to -personal jurisdiction and venue in those courts; except that either party may -apply for injunctive remedies or an equivalent type of urgent legal relief in -any jurisdiction. - -15. No Assignment. -NVIDIA may assign, delegate or transfer its rights or obligations under this -Agreement by any means or operation of law. You may not, without NVIDIA’s prior -written consent, assign, delegate or transfer any of your rights or obligations -under this Agreement by any means or operation of law, and any attempt to do so -is null and void. - -16. Waiver. -No failure or delay by a party to enforce any Agreement term or obligation will -operate as a waiver by that party, nor prevent the enforcement of such term or -obligation later. - -17. Export. -You agree to comply with all applicable export, import, trade and economic -sanctions laws and regulations, including U.S. Export Administration Regulations -and Office of Foreign Assets Control regulations. These laws include -restrictions on destinations, end-users and end-use. - -18. Government Use. - The CONTAINER, including related documentation (“Protected Items”) is a -“Commercial product” as this term is defined at 48 C.F.R. 2.101, consisting of -“commercial computer software” and “commercial computer software documentation” -as such terms are used in, respectively, 48 C.F.R. 12.212 and 48 C.F.R. -227.7202 & 252.227-7014(a)(1). Before any Protected Items are supplied to the -U.S. Government, you will (i) inform the U.S. Government in writing that the -Protected Items are and must be treated as commercial computer software and -commercial computer software documentation developed at private expense; (ii) -inform the U.S. Government that the Protected Items are provided subject to the -terms of this Agreement; and (iii) mark the Protected Items as commercial -computer software and commercial computer software documentation developed at -private expense. In no event will you permit the U.S. Government to acquire -rights in Protected Items beyond those specified in 48 C.F.R. 52.227-19(b)(1)- -(2) or 252.227-7013(c) except as expressly approved by NVIDIA in writing. - -19. Notices. -Unless otherwise specifically stated in this Agreement, all notices, requests, -consents and other communications, which are required or permitted under this -Agreement, will be in writing to the address below and will be effective (i) -upon receipt if by personal delivery, (ii) upon receipt if by certified or -registered mail (return receipt requested), or (iii) one (1) day after it is -sent if by next day delivery by a major commercial delivery service. Please -direct your legal notices or other correspondence to NVIDIA Corporation, 2788 -San Tomas Expressway, Santa Clara, California 95051, United States of America, -Attention: Legal Department. - -20. Force Majeure. -Neither party will be liable during any period where an event or circumstance -prevents or delays that party from performing its obligations under this -Agreement and that event or circumstance: (i) is not within the reasonable -control of that party and is not the result of that party’s negligence, and (ii) -cannot be overcome or avoided by that party using reasonably diligent efforts. - -21. Entire Agreement. -Regarding the subject matter of this Agreement, the parties agree that (i) this -Agreement constitutes the entire and exclusive agreement between the parties and -supersedes all prior and contemporaneous communications and (ii) any additional -or different terms or conditions, whether contained in purchase orders, order -acknowledgments, invoices or otherwise, will not be binding on the receiving party -and are null and void. a court of competent jurisdiction rules that a provision of -this Agreement is unenforceable, that provision will be deemed modified to the -extent necessary to make it enforceable and the remainder of this Agreement will -continue in full force and effect. Any amendment to this Agreement must be in writing -and signed by authorized representatives of both parties. - -22. Licensing. -If the terms in this license are not suitable for your organization, or for any -questions regarding this license, please contact NVIDIA at -nvidia-compute-license-questions@nvidia.com. - -(v. April 10, 2023) - --- - -This repository also contains code from Colossal-AI, and K-diffusion. Files from these -organizations have notices at the top of each file. Below are licenses used in those -files, as indicated. - - -------------- LICENSE FOR from Colossal-AI for Stable Diffusion -------------- - -Copyright 2021- HPC-AI Technology Inc. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -490,7 +186,7 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2021- HPC-AI Technology Inc. + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -502,223 +198,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. - - ## Some of colossal-ai's code is derived from others projects, which is subject to the following copyright notice: - - Copyright 2021 The Alpa team. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://github.com/alpa-projects/alpa/blob/979a45a3e6187df941ef4a4c4c6eea664527d68d/LICENSE - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ------------------------------------------------- - - Copyright 2018-2020 Philippe Tillet - Copyright 2020-2022 OpenAI - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files - (the "Software"), to deal in the Software without restriction, - including without limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of the Software, - and to permit persons to whom the Software is furnished to do so, - subject to the following conditions: - - ---------------- LICENSE FOR Microsoft Deepspeed ---------------- - - MIT License - - Copyright (c) Microsoft Corporation. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE - - ---------------- LICENSE FOR NVIDIA Megatron-LM ---------------- - - Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of NVIDIA CORPORATION nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR NVIDIA Apex ---------------- - - All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Facebook Fairscale ---------------- - - Copyright (c) Facebook, Inc. and its affiliates - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Flash Attention ---------------- - - BSD 3-Clause License - - Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ---------------- LICENSE FOR Facebook xFormers ---------------- - - From xFormers: - - Copyright (c) Facebook, Inc. and its affiliates - - - === - - BSD 3-Clause License - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - -------------- LICENSE FOR from K-diffusion for Diffusion Inference -------------- - -Copyright (c) 2022 Katherine Crowson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file + limitations under the License. \ No newline at end of file diff --git a/examples/tts/conf/fastpitch/fastpitch.yaml b/examples/tts/conf/fastpitch/fastpitch.yaml deleted file mode 100644 index 1d552d058d76..000000000000 --- a/examples/tts/conf/fastpitch/fastpitch.yaml +++ /dev/null @@ -1,256 +0,0 @@ -# This config contains the default values for training an English FastPitch model. -# If you want to train a model on other dataset, you can change config values according to your dataset. -# Most dataset-specific arguments are in the head of the config file, see below. - -name: FastPitch - -defaults: - - feature: ??? - -max_epochs: ??? -batch_size: 32 -weighted_sampling_steps_per_epoch: null - -n_speakers: ??? -speaker_path: null -feature_stats_path: null - -train_ds_meta: ??? -val_ds_meta: ??? -log_ds_meta: ??? - -phoneme_dict_path: ??? -heteronyms_path: ??? - -log_dir: ??? -vocoder_type: ??? -vocoder_name: null -vocoder_checkpoint_path: null - -model: - learn_alignment: true - bin_loss_warmup_epochs: 100 - - n_speakers: ${n_speakers} - n_mel_channels: ${feature.mel_feature.mel_dim} - min_token_duration: 1 - max_token_duration: 75 - symbols_embedding_dim: 384 - pitch_embedding_kernel_size: 3 - energy_embedding_kernel_size: 3 - speaker_emb_condition_prosody: true - speaker_emb_condition_aligner: true - use_log_energy: false - dur_loss_scale: 0.1 - pitch_loss_scale: 0.1 - energy_loss_scale: 0.1 - aligner_loss_scale: 0.1 - - preprocessor: - _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor - features: ${feature.mel_feature.mel_dim} - lowfreq: ${feature.mel_feature.lowfreq} - highfreq: ${feature.mel_feature.highfreq} - n_fft: ${feature.win_length} - n_window_size: ${feature.win_length} - window_size: false - n_window_stride: ${feature.hop_length} - window_stride: false - pad_to: 1 - pad_value: 0 - sample_rate: ${feature.sample_rate} - window: hann - normalize: null - preemph: null - dither: 0.0 - frame_splicing: 1 - log: true - log_zero_guard_type: add - log_zero_guard_value: 1.0 - mag_power: 1.0 - mel_norm: null - - text_tokenizer: - _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.IPATokenizer - punct: true - apostrophe: true - pad_with_space: true - g2p: - _target_: nemo.collections.tts.g2p.models.i18n_ipa.IpaG2p - phoneme_dict: ${phoneme_dict_path} - heteronyms: ${heteronyms_path} - phoneme_probability: 0.8 - ignore_ambiguous_words: false - use_chars: true - use_stresses: true - - pitch_processor: - _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization - field: pitch - stats_path: ${feature_stats_path} - - energy_processor: - _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization - field: energy - stats_path: ${feature_stats_path} - - train_ds: - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - dataset_meta: ${train_ds_meta} - weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - min_duration: 0.1 - max_duration: 10.0 - - dataloader_params: - batch_size: ${batch_size} - num_workers: 4 - - validation_ds: - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - dataset_meta: ${val_ds_meta} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 2 - - log_config: - log_dir: ${log_dir} - log_epochs: [10, 50] - epoch_frequency: 100 - log_tensorboard: false - log_wandb: false - - generators: - - _target_: nemo.collections.tts.parts.utils.callbacks.FastPitchArtifactGenerator - log_spectrogram: true - log_alignment: true - audio_params: - _target_: nemo.collections.tts.parts.utils.callbacks.LogAudioParams - log_audio_gta: true - vocoder_type: ${vocoder_type} - vocoder_name: ${vocoder_name} - vocoder_checkpoint_path: ${vocoder_checkpoint_path} - - dataset: - _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset - text_tokenizer: ${model.text_tokenizer} - sample_rate: ${feature.sample_rate} - speaker_path: ${speaker_path} - align_prior_hop_length: ${feature.hop_length} - featurizers: ${feature.featurizers} - - feature_processors: - pitch: ${model.pitch_processor} - energy: ${model.energy_processor} - - dataset_meta: ${log_ds_meta} - - dataloader_params: - batch_size: 8 - num_workers: 2 - - input_fft: - _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder - n_layer: 6 - n_head: 2 - d_model: ${model.symbols_embedding_dim} - d_head: 64 - d_inner: 1536 - kernel_size: 3 - dropout: 0.1 - dropatt: 0.1 - dropemb: 0.0 - d_embed: ${model.symbols_embedding_dim} - - output_fft: - _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder - n_layer: 6 - n_head: 1 - d_model: ${model.symbols_embedding_dim} - d_head: 64 - d_inner: 1536 - kernel_size: 3 - dropout: 0.1 - dropatt: 0.1 - dropemb: 0.0 - - alignment_module: - _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder - n_text_channels: ${model.symbols_embedding_dim} - - duration_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - pitch_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - energy_predictor: - _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor - input_size: ${model.symbols_embedding_dim} - kernel_size: 3 - filter_size: 256 - dropout: 0.1 - n_layers: 2 - - optim: - name: adamw - lr: 1e-3 - betas: [0.9, 0.999] - weight_decay: 1e-6 - - sched: - name: NoamAnnealing - warmup_steps: 1000 - last_epoch: -1 - d_model: 1 # Disable scaling based on model dim - -trainer: - num_nodes: 1 - devices: 1 - accelerator: gpu - strategy: ddp - precision: 16 - max_epochs: ${max_epochs} - accumulate_grad_batches: 1 - gradient_clip_val: 10.0 - enable_checkpointing: false # Provided by exp_manager - logger: false # Provided by exp_manager - log_every_n_steps: 100 - check_val_every_n_epoch: 10 - benchmark: false - -exp_manager: - exp_dir: null - name: ${name} - create_tensorboard_logger: true - create_checkpoint_callback: true - checkpoint_callback_params: - monitor: val_loss - resume_if_exists: false - resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/hifigan/hifigan_data.yaml b/examples/tts/conf/hifigan/hifigan_data.yaml deleted file mode 100644 index fde2f169aa8d..000000000000 --- a/examples/tts/conf/hifigan/hifigan_data.yaml +++ /dev/null @@ -1,133 +0,0 @@ -# This config contains the default values for training a HiFi-GAN model. -# If you want to train model on other dataset, you can change config values according to your dataset. -# Most dataset-specific arguments are in the head of the config file, see below. - -name: "HifiGan" - -defaults: - - feature: ??? - - sample: ??? - - model/generator: ??? - -max_epochs: ??? -batch_size: 16 -weighted_sampling_steps_per_epoch: null - -train_ds_meta: ??? -val_ds_meta: ??? -log_ds_meta: ??? - -log_dir: ??? - -model: - - max_epochs: ${max_epochs} - steps_per_epoch: ${weighted_sampling_steps_per_epoch} - l1_loss_factor: 60 - - preprocessor: - _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures - nfilt: ${feature.mel_feature.mel_dim} - lowfreq: ${feature.mel_feature.lowfreq} - highfreq: ${feature.mel_feature.highfreq} - n_fft: ${feature.win_length} - n_window_size: ${feature.win_length} - n_window_stride: ${feature.hop_length} - pad_to: 0 - pad_value: 0 - exact_pad: true - sample_rate: ${feature.sample_rate} - window: hann - normalize: null - preemph: null - dither: 0.0 - frame_splicing: 1 - log: true - log_zero_guard_type: add - log_zero_guard_value: 1.0 - mag_power: 1.0 - mel_norm: null - use_grads: false - - train_ds: - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch} - sample_rate: ${feature.sample_rate} - n_samples: ${sample.train_n_samples} - min_duration: 0.4 - max_duration: null - dataset_meta: ${train_ds_meta} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 4 - - validation_ds: - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - sample_rate: ${feature.sample_rate} - n_samples: ${sample.val_n_samples} - min_duration: 3.0 - max_duration: null - dataset_meta: ${val_ds_meta} - - dataloader_params: - batch_size: ${batch_size} - num_workers: 2 - - log_config: - log_dir: ${log_dir} - log_epochs: [10, 50] - epoch_frequency: 100 - log_tensorboard: false - log_wandb: false - - generators: - - _target_: nemo.collections.tts.parts.utils.callbacks.VocoderArtifactGenerator - - dataset: - _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset - sample_rate: ${feature.sample_rate} - n_samples: null - min_duration: null - max_duration: null - dataset_meta: ${log_ds_meta} - - dataloader_params: - batch_size: 4 - num_workers: 2 - - optim: - _target_: torch.optim.AdamW - lr: 2e-4 - betas: [0.8, 0.99] - weight_decay: 1e-6 - sched: - name: ExponentialLR - gamma: 0.999 - -trainer: - num_nodes: 1 - devices: 1 - accelerator: gpu - strategy: ddp - precision: 16 - max_epochs: ${max_epochs} - accumulate_grad_batches: 1 - enable_checkpointing: False # Provided by exp_manager - logger: false # Provided by exp_manager - log_every_n_steps: 100 - check_val_every_n_epoch: 10 - benchmark: false - -exp_manager: - exp_dir: null - name: ${name} - create_tensorboard_logger: true - create_checkpoint_callback: true - create_wandb_logger: false - checkpoint_callback_params: - monitor: val_loss - resume_if_exists: false - resume_ignore_no_checkpoint: false diff --git a/examples/tts/conf/hifigan/sample/sample_22050.yaml b/examples/tts/conf/hifigan/sample/sample_22050.yaml deleted file mode 100644 index 18bc206e2566..000000000000 --- a/examples/tts/conf/hifigan/sample/sample_22050.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Audio dataset sampling config for 22.05khz sampling rate -train_n_samples: 8192 -val_n_samples: 66048 diff --git a/examples/tts/conf/hifigan/sample/sample_44100.yaml b/examples/tts/conf/hifigan/sample/sample_44100.yaml deleted file mode 100644 index d8315623bbbe..000000000000 --- a/examples/tts/conf/hifigan/sample/sample_44100.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Audio dataset sampling config for 44.1khz sampling rate -train_n_samples: 16384 -val_n_samples: 131072 diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index 3bb241659e48..e7bffa888653 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf +from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging -from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index 0f67aa11e09b..ff7b2b0675ea 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch +from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager -from utils import get_model """ This script supports training of G2PModels diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py index 253f7170b989..39a189ef4aa8 100644 --- a/tests/collections/asr/test_asr_interctc_models.py +++ b/tests/collections/asr/test_asr_interctc_models.py @@ -66,7 +66,6 @@ def squeezeformer_encoder_config() -> Dict: class TestInterCTCLoss: - @pytest.mark.pleasefixme @pytest.mark.unit @pytest.mark.parametrize( "model_class", [EncDecCTCModel, EncDecHybridRNNTCTCModel], @@ -87,7 +86,6 @@ class TestInterCTCLoss: ([], [0.3]), ], ) - @pytest.mark.pleasefixme def test_forward(self, model_class, encoder_config, apply_at_layers, loss_weights): preprocessor_config = {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor'} vocabulary = [ diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 884b6186e249..90e67ab844c7 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm +from utils import get_segments import nemo.collections.asr as nemo_asr -from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 1e484c14f607..50aa60260b35 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 7334188ac5d0..4255a6656b8a 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 071ed5f72809..289426f3bc2b 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index 02fa0325121c..0c34baacc953 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index 13b61626e746..ae4f43867c8d 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index eaeb93bb92ff..b9c0db866f9c 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index ed1fd1157f1d..6133fdc9a8b9 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index bab9299f57d4..74cd0f739e84 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -54,7 +54,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -588,7 +588,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'r1.20.0'\n", + " BRANCH = 'main'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index 92dcc2305e89..001336c97094 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -41,7 +41,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index 8a302a8c7130..e1eb494f777e 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -29,7 +29,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index a6519e73f6ec..c23398dca46a 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index 29ded2c98fa4..2f179eaa9a5a 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -46,7 +46,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index 4420085f319f..d3928bed987f 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -44,7 +44,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index 28073a050d5e..7a11cb7dc6a6 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -104,7 +104,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 751eedba1519..8544d230878c 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -52,7 +52,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index 07149d752e5f..8a8335ac1542 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -25,7 +25,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 1ffe81dff02f..31d2c0dec943 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index a4c192f7f7e5..8883cce55a80 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -32,7 +32,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index f52e4777f5d3..490a4b6c8de7 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index 9ca2232142ad..858f162b1834 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -29,7 +29,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 454840ded683..b055f14f5885 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index cc6c4956b8a5..58b719a867fa 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -61,7 +61,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index 19c998120d9d..a4701dc025d8 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index 0e5a59312506..123a03efc28e 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 957ecdb49985..c9c547a8383e 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -50,7 +50,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index bda6338dd0bf..c4406a4f04ee 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index 88ddb5189cc4..8cf540b27114 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index 75428c686e7e..faa93de12514 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index ce6334f9712d..c63d2a8b1689 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index a1aa3cdae55b..323bfa1c49b8 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index e5ec1941b032..ddd3bdd4f929 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index f2644af1f764..dfdf594e6804 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 0162e5c39a32..b77b3439b444 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index fc22fa05d162..675fdfd5351c 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index 4e88195c9635..f925d2bc59b0 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index cc158388feb5..bfd3c7094198 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -65,7 +65,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 87bfc5c9b30d..004014ebdeeb 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 1545b4329d2b..f88c33fada34 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 5fbe61139980..2afbb19c0e66 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index af95c7707828..a211c8320d51 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index 8920a0738641..d6b1e98b428e 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 412860045cb0..fdcff979ea46 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 56bf3261bcd3..afbc8394aa84 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='r1.20.0'" + "BRANCH='main'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index 2888703b9368..3ab98f6c19fd 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'r1.20.0'" + "BRANCH = 'main'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 46e7e24f4130..7f1baf536d87 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index 90dacd5f6b8d..ea943b35e0d0 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 6caaa35e0765..1fd0f1b140d5 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index f08c7ae88385..7db905b6d225 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index f92864ae3306..27a01b894eae 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 770d9e7894c9..98f0cce4e9ec 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index 2841b1df62d1..c2a9caf1ea72 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index 3653b9e3ed8d..611e1e3b6e66 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 647242294197..699f1b131408 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index 101a71aaf089..e0c34b3c0de5 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 70e989bb98af..747ecfa43127 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index 7ab6d1d58ac8..eda5bba0aa1e 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index b7acbd364507..73c12bc79900 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index 4c39e591f203..195b773fb5ee 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index f891b7f11594..fe2e34659554 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index be99231ec9eb..6fe269e76904 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index 8473dca449ea..79546bb79db9 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'r1.20.0'\n", + "BRANCH = 'main'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", From 38dc2905a4f7ba76d450ac46df026a1e1e83d176 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:37:19 +0000 Subject: [PATCH 340/512] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- tools/ctc_segmentation/scripts/run_ctc_segmentation.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index e7bffa888653..3bb241659e48 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf -from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index ff7b2b0675ea..0f67aa11e09b 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch -from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager +from utils import get_model """ This script supports training of G2PModels diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 90e67ab844c7..884b6186e249 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm -from utils import get_segments import nemo.collections.asr as nemo_asr +from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") From 563cadb7aa60dae1783604a5bdd1d0922f1fa80e Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 12:41:33 -0700 Subject: [PATCH 341/512] Remove deployment and export scripts --- .../nsfw/conf/megatron_nsfw_export.yaml | 15 - .../nsfw/megatron_nsfw_export.py | 111 ---- .../clip/conf/megatron_clip_export.yaml | 16 - .../foundation/clip/megatron_clip_export.py | 148 ------ .../controlnet/conf/controlnet_export.yaml | 24 - .../controlnet/controlnet_export.py | 329 ------------ .../dreambooth/conf/dreambooth_export.yaml | 23 - .../dreambooth/dreambooth_export.py | 238 --------- .../generative/imagen/conf/export.yaml | 28 - .../generative/imagen/imagen_export.py | 218 -------- .../instruct_pix2pix/conf/sd_export.yaml | 19 - .../instruct_pix2pix/sd_edit_export.py | 325 ------------ .../stable_diffusion/conf/sd_export.yaml | 23 - .../generative/stable_diffusion/sd_export.py | 241 --------- .../mllm/neva/conf/neva_export.yaml | 17 - examples/multimodal/mllm/neva/neva_export.py | 206 -------- .../megatron_vit_classification_export.yaml | 16 - .../megatron_vit_classification_export.py | 163 ------ nemo/deploy/__init__.py | 18 - nemo/deploy/deploy_base.py | 93 ---- nemo/deploy/deploy_pytriton.py | 149 ------ nemo/deploy/query.py | 79 --- nemo/deploy/triton_deployable.py | 31 -- nemo/deploy/utils.py | 79 --- nemo/export/__init__.py | 15 - nemo/export/tensorrt_llm.py | 273 ---------- nemo/export/trt_llm/__init__.py | 33 -- nemo/export/trt_llm/decoder/__init__.py | 56 -- nemo/export/trt_llm/decoder/decoder.py | 202 -------- nemo/export/trt_llm/decoder/gpt.py | 99 ---- nemo/export/trt_llm/decoder/gptj.py | 94 ---- nemo/export/trt_llm/decoder/llama.py | 101 ---- nemo/export/trt_llm/huggingface_utils.py | 138 ----- nemo/export/trt_llm/model_config.py | 415 --------------- nemo/export/trt_llm/model_config_trt.py | 65 --- nemo/export/trt_llm/model_config_utils.py | 238 --------- nemo/export/trt_llm/nemo/convert.py | 343 ------------ nemo/export/trt_llm/nemo/nemo.py | 269 ---------- nemo/export/trt_llm/nemo/nemo_ckpt_convert.py | 282 ---------- nemo/export/trt_llm/nemo_utils.py | 184 ------- nemo/export/trt_llm/quantization_utils.py | 119 ----- nemo/export/trt_llm/tensor_utils.py | 62 --- nemo/export/trt_llm/tensorrt_llm_build.py | 296 ----------- nemo/export/trt_llm/tensorrt_llm_model.py | 487 ------------------ nemo/export/trt_llm/tensorrt_llm_run.py | 317 ------------ nemo/export/trt_llm/tensorrt_llm_utils.py | 73 --- nemo/export/utils.py | 155 ------ 47 files changed, 6925 deletions(-) delete mode 100644 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml delete mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py delete mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml delete mode 100644 examples/multimodal/foundation/clip/megatron_clip_export.py delete mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_export.yaml delete mode 100644 examples/multimodal/generative/controlnet/controlnet_export.py delete mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml delete mode 100644 examples/multimodal/generative/dreambooth/dreambooth_export.py delete mode 100644 examples/multimodal/generative/imagen/conf/export.yaml delete mode 100644 examples/multimodal/generative/imagen/imagen_export.py delete mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml delete mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py delete mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml delete mode 100644 examples/multimodal/generative/stable_diffusion/sd_export.py delete mode 100644 examples/multimodal/mllm/neva/conf/neva_export.yaml delete mode 100644 examples/multimodal/mllm/neva/neva_export.py delete mode 100644 examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml delete mode 100644 examples/vision/vision_transformer/megatron_vit_classification_export.py delete mode 100644 nemo/deploy/__init__.py delete mode 100644 nemo/deploy/deploy_base.py delete mode 100644 nemo/deploy/deploy_pytriton.py delete mode 100644 nemo/deploy/query.py delete mode 100644 nemo/deploy/triton_deployable.py delete mode 100644 nemo/deploy/utils.py delete mode 100644 nemo/export/__init__.py delete mode 100644 nemo/export/tensorrt_llm.py delete mode 100644 nemo/export/trt_llm/__init__.py delete mode 100644 nemo/export/trt_llm/decoder/__init__.py delete mode 100644 nemo/export/trt_llm/decoder/decoder.py delete mode 100644 nemo/export/trt_llm/decoder/gpt.py delete mode 100644 nemo/export/trt_llm/decoder/gptj.py delete mode 100644 nemo/export/trt_llm/decoder/llama.py delete mode 100644 nemo/export/trt_llm/huggingface_utils.py delete mode 100644 nemo/export/trt_llm/model_config.py delete mode 100644 nemo/export/trt_llm/model_config_trt.py delete mode 100644 nemo/export/trt_llm/model_config_utils.py delete mode 100644 nemo/export/trt_llm/nemo/convert.py delete mode 100644 nemo/export/trt_llm/nemo/nemo.py delete mode 100644 nemo/export/trt_llm/nemo/nemo_ckpt_convert.py delete mode 100644 nemo/export/trt_llm/nemo_utils.py delete mode 100644 nemo/export/trt_llm/quantization_utils.py delete mode 100644 nemo/export/trt_llm/tensor_utils.py delete mode 100644 nemo/export/trt_llm/tensorrt_llm_build.py delete mode 100644 nemo/export/trt_llm/tensorrt_llm_model.py delete mode 100644 nemo/export/trt_llm/tensorrt_llm_run.py delete mode 100644 nemo/export/trt_llm/tensorrt_llm_utils.py delete mode 100644 nemo/export/utils.py diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml deleted file mode 100644 index 19f5d8ee6e67..000000000000 --- a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_export.yaml +++ /dev/null @@ -1,15 +0,0 @@ -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -infer: - out_path: "megatron-nsfw" - max_batch_size: 64 - max_dim: 224 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py deleted file mode 100644 index 344e5b492dbe..000000000000 --- a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_export.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Dict, List, Optional - -import torch -from omegaconf.omegaconf import OmegaConf - -from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils import logging -from nemo.utils.trt_utils import build_engine - - -class ContentFilteringWrapper(MegatronContentFilteringModel): - def __init__(self, cfg, trainer): - super(ContentFilteringWrapper, self).__init__(cfg, trainer) - - def forward(self, image: torch.Tensor): - return super().forward(image, mlp_factor=1.0, emb_factor=1.0).sigmoid() - - def input_example(self, max_batch: int = 64, max_dim: int = 224): - device = next(self.parameters()).device - return (torch.randn(max_batch, 3, max_dim, max_dim, device=device),) - - @property - def input_names(self) -> List[str]: - return ["images"] - - @property - def output_names(self) -> List[str]: - return ["nsfw_probs"] - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return {"images": NeuralType(("B", "C", "H", "W"), ChannelType())} - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"nsfw_probs": NeuralType(("B",), ChannelType())} - - -def set_envvar(): - os.environ["LOCAL_RANK"] = os.environ.get("LOCAL_RANK", "0") - os.environ["RANK"] = os.environ.get("RANK", "0") - os.environ["LOCAL_SIZE"] = os.environ.get("LOCAL_SIZE", "1") - os.environ["WORLD_SIZE"] = os.environ.get("WORLD_SIZE", "1") - - -@hydra_runner(config_path="conf", config_name="megatron_nsfw_export") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - set_envvar() - - output_dir = cfg.infer.out_path - max_batch_size = cfg.infer.max_batch_size - trt_precision = cfg.trainer.precision - cfg.trainer.precision = 32 - - # These configs are required to be off during inference. - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.vision.precision = cfg.trainer.precision - if cfg.trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - trainer, model = setup_trainer_and_model_for_inference( - model_provider=ContentFilteringWrapper, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - - bs1_example = model.input_example(max_batch=1, max_dim=cfg.infer.max_dim) - bsmax_example = model.input_example(max_batch=max_batch_size, max_dim=cfg.infer.max_dim) - - os.makedirs(f"{output_dir}/onnx", exist_ok=True) - model.export(f"{output_dir}/onnx/nsfw.onnx", dynamic_axes={"images": {0: "B"}}, input_example=bsmax_example) - - input_profile = { - "images": [tuple(bs1_example[0].shape), tuple(bsmax_example[0].shape), tuple(bsmax_example[0].shape),] - } - - build_engine( - f"{output_dir}/onnx/nsfw.onnx", - f"{output_dir}/plan/nsfw.plan", - fp16=(trt_precision == 16), - input_profile=input_profile, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml deleted file mode 100644 index 86abd360b01e..000000000000 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_export.yaml +++ /dev/null @@ -1,16 +0,0 @@ -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -infer: - out_path: 'megatron-clip' - max_batch_size: 64 - max_dim: 224 - max_text: 64 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/multimodal/foundation/clip/megatron_clip_export.py b/examples/multimodal/foundation/clip/megatron_clip_export.py deleted file mode 100644 index a15df91922ed..000000000000 --- a/examples/multimodal/foundation/clip/megatron_clip_export.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Dict, List, Optional - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image - -from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils import logging -from nemo.utils.trt_utils import build_engine - - -class CLIPWrapper(torch.nn.Module, Exportable): - def __init__(self, vision_encoder, text_encoder, text_transform): - super(CLIPWrapper, self).__init__() - self.vision_encoder = vision_encoder - self.text_encoder = text_encoder - self.text_transform = text_transform - - def forward(self, image, texts): - image_features = self.vision_encoder(image) - text_features = self.text_encoder(texts) - image_features /= image_features.norm(dim=-1, keepdim=True) - text_features /= text_features.norm(dim=-1, keepdim=True) - - text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) - - return text_probs - - # For onnx export - def input_example(self, max_batch=8, max_dim=224, max_text=64): - """ - Generates input examples for tracing etc. - Returns: - A tuple of input examples. - """ - sample = next(self.parameters()) - images = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) - texts = self.text_transform(["a girl"] * max_text).to(sample.device) - return (images, texts) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "images": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), - "texts": NeuralType(('H', 'D'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"text_probs": NeuralType(('B', 'H'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['images', 'texts'] - - @property - def output_names(self) -> List[str]: - return ['text_probs'] - - -@hydra_runner(config_path="conf", config_name="megatron_clip_export") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - output_dir = cfg.infer.out_path - max_batch_size = cfg.infer.max_batch_size - max_dim = cfg.infer.max_dim - max_text = cfg.infer.max_text - trt_precision = cfg.trainer.precision - cfg.trainer.precision = 32 - - # These configs are required to be off during inference. - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.vision.precision = cfg.trainer.precision - model_cfg.text.precision = cfg.trainer.precision - if cfg.trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - - if model.cfg.get("megatron_amp_O2", False): - vision_encoder = model.model.module.vision_encoder - text_encoder = model.model.module.text_encoder - else: - vision_encoder = model.model.vision_encoder - text_encoder = model.model.text_encoder - - val_image_transform, text_transform = get_preprocess_fns(model.cfg, model.tokenizer, is_train=False,) - - os.makedirs(f"{output_dir}/onnx/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - - clip_model = CLIPWrapper(vision_encoder, text_encoder, text_transform) - dynamic_axes = {'images': {0: 'B'}, 'texts_input': {0, 'H'}} - clip_model.export(f"{output_dir}/onnx/clip.onnx", dynamic_axes=None) - - input_profile = {} - bs1_example = clip_model.input_example(max_batch=1, max_dim=max_dim, max_text=1) - bsmax_example = clip_model.input_example(max_batch=max_batch_size, max_dim=max_dim, max_text=max_text) - input_profile['images'] = [ - tuple(bs1_example[0].shape), - tuple(bsmax_example[0].shape), - tuple(bsmax_example[0].shape), - ] - input_profile['texts'] = [ - tuple(bs1_example[1].shape), - tuple(bsmax_example[1].shape), - tuple(bsmax_example[1].shape), - ] - build_engine( - f"{output_dir}/onnx/clip.onnx", - f"{output_dir}/plan/clip.plan", - fp16=(trt_precision in [16, '16', '16-mixed']), - input_profile=input_profile, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml deleted file mode 100644 index 35e7ce9c48e4..000000000000 --- a/examples/multimodal/generative/controlnet/conf/controlnet_export.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: controlnet-export - -infer: - unconditional_guidance_scale: 3 - num_images_per_prompt: 1 - hint_image_size: 512 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 50 - sampler_type: 'DDIM' - eta: 0 - out_path: 'controlnet' - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: /ckpts/controlnet/nemo_controlnet.nemo - precision: ${trainer.precision} diff --git a/examples/multimodal/generative/controlnet/controlnet_export.py b/examples/multimodal/generative/controlnet/controlnet_export.py deleted file mode 100644 index 57f2f3c90b3e..000000000000 --- a/examples/multimodal/generative/controlnet/controlnet_export.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import gc -import os -from typing import Dict, List, Optional - -import torch -import torch.nn as nn -from omegaconf.omegaconf import OmegaConf - -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils.trt_utils import build_engine - - -@hydra_runner(config_path='conf', config_name='controlnet_export') -def main(cfg): - # setup default values for inference configs - - batch_size = cfg.infer.get('num_images_per_prompt', 1) - height = cfg.infer.get('height', 512) - width = cfg.infer.get('width', 512) - hint_image_size = cfg.infer.get('hint_image_size', 512) - downsampling_factor = cfg.infer.get('down_factor', 8) - fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: - print("BF16 not supported for export, will use fp32") - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.control_stage_config.from_pretrained_unet = None - model_cfg.channels_last = True - model_cfg.capture_cudagraph_iters = -1 - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronControlNet, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - in_channels = model.model.diffusion_model.in_channels - shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] - fake_text = [""] - out = model.cond_stage_model(fake_text) - - output_dir = cfg.infer.out_path - os.makedirs(f"{output_dir}/onnx/controlnet/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create( - { - 'controlnet': OmegaConf.create({}), - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vae': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'downsampling_factor': downsampling_factor, - 'in_channels': in_channels, - 'height': height, - 'width': width, - 'hint_image_size': hint_image_size, - } - ) - deployment_conf.sampler.eta = cfg.infer.get('eta', 0) - deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) - deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") - - ### Controlnet Export - x = torch.randn(1, *shape_of_internal, device="cuda") - t = torch.randint(high=10, size=(1,), device="cuda") - cc = torch.randn(1, out.shape[1], out.shape[2], device="cuda") - hint = torch.randn(1, 3, hint_image_size, hint_image_size, device="cuda") # b c h w - - controlnet_inputs = (x, hint, t, cc) - control_outs = model.control_model(*controlnet_inputs) - control_names = [f"control_{i}" for i in range(len(control_outs))] - - input_names = ["x", "hint", "t", "context"] - output_names = control_names - - print('Running Controlnet onnx export') - torch.onnx.export( - model.control_model, - controlnet_inputs, - f"{output_dir}/onnx/controlnet/controlnet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"x": {0: 'B'}, "hint": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, - opset_version=17, - ) - - input_profile_controlnet = {} - input_profile_controlnet["x"] = [(batch_size, *(x.shape[1:]))] * 3 - input_profile_controlnet["hint"] = [(batch_size, *(hint.shape[1:]))] * 3 - input_profile_controlnet["t"] = [(batch_size, *(t.shape[1:]))] * 3 - input_profile_controlnet["context"] = [(batch_size, *(cc.shape[1:]))] * 3 - - deployment_conf.controlnet.x = input_profile_controlnet["x"][0] - deployment_conf.controlnet.hint = input_profile_controlnet["hint"][0] - deployment_conf.controlnet.t = input_profile_controlnet["t"][0] - deployment_conf.controlnet.context = input_profile_controlnet["context"][0] - deployment_conf.controlnet.control = OmegaConf.create({}) - - for control_name, control_out in zip(control_names, control_outs): - deployment_conf.controlnet.control.update({control_name: (batch_size, *(control_out.shape[1:]))}) - - ### UNet Export - input_names = ["x", "t", "context"] + control_names - output_names = ["logits"] - - class UNETControlWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, x, t, cc, *control): - if any(part_control is None for part_control in control): - control = None - else: - control = list(control) - - return self.model(x=x, timesteps=t, context=cc, control=control) - - print('Running UNET onnx export') - torch.onnx.export( - UNETControlWrapper(model.model.diffusion_model), - (x, t, cc, *control_outs), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={ - **{"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, - **{control_name: {0: 'B'} for control_name in control_names}, - }, - opset_version=17, - ) - - input_profile_unet = {} - input_profile_unet["x"] = [(batch_size, *(x.shape[1:]))] * 3 - input_profile_unet["t"] = [(batch_size, *(t.shape[1:]))] * 3 - input_profile_unet["context"] = [(batch_size, *(cc.shape[1:]))] * 3 - - deployment_conf.unet.x = input_profile_unet["x"][0] - deployment_conf.unet.t = input_profile_unet["t"][0] - deployment_conf.unet.context = input_profile_unet["context"][0] - deployment_conf.unet.logits = input_profile_unet["x"][0] - deployment_conf.unet.control = OmegaConf.create({}) - - for control_name, control_out in zip(control_names, control_outs): - input_profile_unet[control_name] = [(batch_size, *(control_out.shape[1:]))] * 3 - deployment_conf.unet.control.update({control_name: input_profile_unet[control_name][0]}) - - ### VAE Export - class VAEWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, z): - z = self.model.post_quant_conv(z) - return self.model.decoder(z) - - input_names = ["z"] - output_names = ["logits"] - z = torch.randn(1, *shape_of_internal, device="cuda") - - print('Running VAE onnx export') - torch.onnx.export( - VAEWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - ) - - input_profile_vae = {} - input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 - deployment_conf.vae.z = input_profile_vae["z"][0] - - ### CLIP Export - class CLIPWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model(input_ids=input_ids) - return outputs.last_hidden_state - - class OpenCLIPWrapper(nn.Module, Exportable): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model.encode_with_transformer(input_ids) - return outputs - - def input_example(self, max_text=64): - sample = next(self.parameters()) - tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('H', 'D'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'H'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] - - print('Running CLIP onnx export') - openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) - tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - - if openai_clip: - input_names = ["tokens"] - output_names = ["logits"] - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) - else: - clip_model = OpenCLIPWrapper(model.cond_stage_model) - clip_model.export("stable-diffusion/onnx/clip/clip.onnx") - - input_profile_clip = {} - input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 - deployment_conf.clip.tokens = input_profile_clip["tokens"][0] - deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) - deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) - deployment_conf.clip.max_length = model.cond_stage_model.max_length - deployment_conf.clip.openai_clip = openai_clip - with open(f"{output_dir}/plan/conf.yaml", "wb") as f: - OmegaConf.save(config=deployment_conf, f=f.name) - - del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out, hint, control_outs - torch.cuda.empty_cache() - gc.collect() - - print('Running Controlnet TRT conversion') - build_engine( - f"{output_dir}/onnx/controlnet/controlnet.onnx", - f"{output_dir}/plan/controlnet.plan", - fp16=fp16, - input_profile=input_profile_controlnet, - timing_cache=None, - workspace_size=0, - ) - - print('Running UNET TRT conversion') - build_engine( - f"{output_dir}/onnx/unet/unet.onnx", - f"{output_dir}/plan/unet.plan", - fp16=fp16, - input_profile=input_profile_unet, - timing_cache=None, - workspace_size=0, - ) - - print('Running VAE TRT conversion') - build_engine( - f"{output_dir}/onnx/vae/vae.onnx", - f"{output_dir}/plan/vae.plan", - fp16=fp16, - input_profile=input_profile_vae, - timing_cache=None, - workspace_size=0, - ) - - print('Running CLIP TRT conversion') - build_engine( - f"{output_dir}/onnx/clip/clip.onnx", - f"{output_dir}/plan/clip.plan", - fp16=fp16, - input_profile=input_profile_clip, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml deleted file mode 100644 index ca9f2e224171..000000000000 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth_export.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: stable-diffusion-export - -infer: - unconditional_guidance_scale: 7.5 - num_images_per_prompt: 4 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 100 - sampler_type: 'DDIM' - eta: 0 - out_path: 'dreambooth' - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: null - precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/dreambooth_export.py b/examples/multimodal/generative/dreambooth/dreambooth_export.py deleted file mode 100644 index 5de4c038a9ea..000000000000 --- a/examples/multimodal/generative/dreambooth/dreambooth_export.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import gc -import os -import time -from typing import Dict, List, Optional - -import torch -import torch.nn as nn -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion -from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils.trt_utils import build_engine - - -@hydra_runner(config_path='conf', config_name='dreambooth_export') -def main(cfg): - batch_size = cfg.infer.get('num_images_per_prompt', 1) - height = cfg.infer.get('height', 512) - width = cfg.infer.get('width', 512) - downsampling_factor = cfg.infer.get('down_factor', 8) - fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: - print("BF16 not supported for export, will use fp32") - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - in_channels = model.model.diffusion_model.in_channels - shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] - fake_text = [""] - out = model.cond_stage_model(fake_text) - output_dir = cfg.infer.out_path - os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create( - { - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vae': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'downsampling_factor': downsampling_factor, - 'in_channels': in_channels, - 'height': height, - 'width': width, - } - ) - deployment_conf.sampler.eta = cfg.infer.get('eta', 0) - deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) - deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") - - ### UNet Export - x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") - cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") - input_names = ["x", "t", "context"] - output_names = ["logits"] - torch.onnx.export( - model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, - opset_version=17, - ) - input_profile_unet = {} - input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 - input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 - input_profile_unet["context"] = [(2 * batch_size, *(cc.shape[1:]))] * 3 - deployment_conf.unet.x = input_profile_unet["x"][0] - deployment_conf.unet.t = input_profile_unet["t"][0] - deployment_conf.unet.context = input_profile_unet["context"][0] - deployment_conf.unet.logits = input_profile_unet["x"][0] - - ### VAE Export - class VAEWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, z): - h = self.model.post_quant_conv(z) - dec = self.model.decoder(h) - return dec - - input_names = ["z"] - output_names = ["logits"] - z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export( - VAEWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - ) - input_profile_vae = {} - input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 - deployment_conf.vae.z = input_profile_vae["z"][0] - - ### CLIP Export - class CLIPWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model(input_ids=input_ids) - return outputs.last_hidden_state - - class OpenCLIPWrapper(nn.Module, Exportable): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model.encode_with_transformer(input_ids) - return outputs - - def input_example(self, max_text=64): - sample = next(self.parameters()) - tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('H', 'D'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'H'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] - - openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) - tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - - if openai_clip: - input_names = ["tokens"] - output_names = ["logits"] - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) - else: - clip_model = OpenCLIPWrapper(model.cond_stage_model) - clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") - - input_profile_clip = {} - input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 - deployment_conf.clip.tokens = input_profile_clip["tokens"][0] - deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) - deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) - deployment_conf.clip.max_length = model.cond_stage_model.max_length - with open(f"{output_dir}/plan/conf.yaml", "wb") as f: - OmegaConf.save(config=deployment_conf, f=f.name) - del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out - torch.cuda.empty_cache() - gc.collect() - - build_engine( - f"{output_dir}/onnx/unet/unet.onnx", - f"{output_dir}/plan/unet.plan", - fp16=fp16, - input_profile=input_profile_unet, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/vae/vae.onnx", - f"{output_dir}/plan/vae.plan", - fp16=fp16, - input_profile=input_profile_vae, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/clip/clip.onnx", - f"{output_dir}/plan/clip.plan", - fp16=fp16, - input_profile=input_profile_clip, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/imagen/conf/export.yaml b/examples/multimodal/generative/imagen/conf/export.yaml deleted file mode 100644 index fa77478390a7..000000000000 --- a/examples/multimodal/generative/imagen/conf/export.yaml +++ /dev/null @@ -1,28 +0,0 @@ -num_images_per_promt: 2 # The number of images generated for each promt text -model_name: null # Avaliable model_name defined in pretrained_models.yaml -run_ema_model: True # Whether load the reg/ema model when using pretrained models -customized_model: # Mutually exclusive with model_name - # base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-200k.ckpt # Either .ckpt or .nemo is accepatable - base_ckpt: /lm/data/nemo/imagen/base.nemo - base_cfg: # Must provided if loading .ckpt checkpoint - sr256_ckpt: /lm/data/nemo/imagen/sr256.nemo - sr256_cfg: - sr1024_ckpt: /lm/data/nemo/imagen/sr1024.nemo - sr1024_cfg: -target_resolution: 256 # in [64, 256, 1024] -inference_precision: 16 # [16, 32, AMP] -thresholding_method: dynamic - -output_path: 'output/export' # Save location -record_time: True # Whether to record inference time meta -encoder_path: '/nemo/data/encoders' # Set to null if you wish to download encoders on the fly -samplings: - - # Base64 - step: 30 - cfg: 7.5 - - # SR256 - step: 20 - cfg: 8 - - # SR1024 - step: 20 - cfg: 7.5 diff --git a/examples/multimodal/generative/imagen/imagen_export.py b/examples/multimodal/generative/imagen/imagen_export.py deleted file mode 100644 index efee38da42cc..000000000000 --- a/examples/multimodal/generative/imagen/imagen_export.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import gc -import os - -import torch -from omegaconf import OmegaConf -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.modules.imagen.diffusionmodules import attention_alt -from nemo.core.config import hydra_runner -from nemo.utils.trt_utils import build_engine - - -@hydra_runner(config_path='conf', config_name='export') -def main(inference_config): - if inference_config.get('infer'): - # invoking from launcher - trainer = Trainer(inference_config.trainer) - inference_config = inference_config.infer - else: - trainer = Trainer() - - # Set up variable to use alternative attention - attention_alt.USE_ALT = True - from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig - - inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) - fp16 = 16 == int(inference_config.get("inference_precision", 32)) - # Set model to FP32 for ONNX export - inference_config.inference_precision = 32 - - pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) - batch_size = inference_config.get('num_images_per_promt', 1) - thresholding_method = inference_config.get('thresholding_method', 'dynamic') - fake_text = [""] - out_embed, out_mask = pipeline.get_text_encodings(fake_text, repeat=batch_size) - output_dir = inference_config.output_path - deployment_conf = OmegaConf.create( - { - 't5': OmegaConf.create({}), - 'models': OmegaConf.create([]), - 'batch_size': batch_size, - 'thresholding_method': thresholding_method, - } - ) - - ### T5 Export - class T5Wrapper(torch.nn.Module): - def __init__(self, t5_encoder): - super(T5Wrapper, self).__init__() - self.t5_encoder = t5_encoder - - def forward(self, input_ids, attn_mask): - t5_encoder = self.t5_encoder - - with torch.no_grad(): - output = t5_encoder.model(input_ids=input_ids, attention_mask=attn_mask) - encoded_text = output.last_hidden_state - - encoded_text = encoded_text[:, 0 : t5_encoder.max_seq_len] - attn_mask = attn_mask[:, 0 : t5_encoder.max_seq_len] - - return encoded_text, attn_mask - - t5_wrapper = T5Wrapper(pipeline.text_encoder) - # Exporting T5Encoder in CPU - t5_wrapper.to('cpu') - - input_names = ['input_ids', 'attn_mask'] - output_names = ['encoded_text', 'text_mask'] - input_ids = torch.randint(high=10, size=(1, pipeline.text_encoder.model_seq_len), dtype=torch.int) - attn_mask = torch.zeros(1, pipeline.text_encoder.model_seq_len, dtype=torch.int) - - os.makedirs(f"{output_dir}/onnx/t5/", exist_ok=True) - torch.onnx.export( - t5_wrapper, - (input_ids, attn_mask), - f"{output_dir}/onnx/t5/t5.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"input_ids": {0: 'B'}, "attn_mask": {0: 'B'},}, - opset_version=17, - ) - - input_profile_t5 = {} - input_profile_t5["input_ids"] = [input_ids.shape] * 3 - input_profile_t5["attn_mask"] = [attn_mask.shape] * 3 - deployment_conf.t5.model_seq_len = pipeline.text_encoder.model_seq_len - del pipeline.text_encoder, input_ids, attn_mask - - ### UNet Export - os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) - - low_res_size = None - cfgs = [each.cfg for each in inference_config.samplings] - cfgs = cfgs[: len(pipeline.models)] - steps = [each.step for each in inference_config.samplings] - steps = steps[: len(pipeline.models)] - input_profile_unets = [] - - for i, model in enumerate(pipeline.models): - unet_model = model.unet - - ### UNet Export - x = torch.randn(batch_size, 3, unet_model.image_size, unet_model.image_size, device="cuda") - time = torch.randn(batch_size, device='cuda') - text_embed = torch.randn(batch_size, out_embed.shape[1], out_embed.shape[2], device='cuda') - text_mask = torch.zeros((batch_size, out_mask.shape[1]), dtype=torch.int, device='cuda') - input_names = ["x", "time", "text_embed", "text_mask"] - output_names = ["logits"] - dynamic_axes = { - "x": {0: 'B'}, - "time": {0: 'B'}, - "text_embed": {0: 'B'}, - "text_mask": {0: 'B'}, - } - inputs = [x, time, text_embed, text_mask] - - if low_res_size is not None: - input_names.append("x_low_res") - dynamic_axes['x_low_res'] = {0: 'batch'} - x_low_res = torch.randn(batch_size, 3, low_res_size, low_res_size, device="cuda") - inputs.append(x_low_res) - - if model.noise_cond_aug: - input_names.append("time_low_res") - dynamic_axes['time_low_res'] = {0: 'batch'} - time_low_res = torch.ones(batch_size, device="cuda") - inputs.append(time_low_res) - - torch.onnx.export( - unet_model, - tuple(inputs), - f"{output_dir}/onnx/unet/unet{i}.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - opset_version=17, - ) - - input_profile_unet = {} - input_profile_unet["x"] = [(batch_size, *(x.shape[1:]))] * 3 - input_profile_unet["time"] = [(batch_size,)] * 3 - input_profile_unet["text_embed"] = [(batch_size, *(text_embed.shape[1:]))] * 3 - input_profile_unet["text_mask"] = [(batch_size, *(text_mask.shape[1:]))] * 3 - - config = OmegaConf.create({}) - config.preconditioning_type = model.preconditioning_type - config.preconditioning = model.cfg.preconditioning - config.noise_cond_aug = model.noise_cond_aug - config.cond_scale = cfgs[i] - config.step = steps[i] - config.x = input_profile_unet["x"][0] - - if i == 0: - config.text_embed = input_profile_unet["text_embed"][0] - config.text_mask = input_profile_unet["text_mask"][0] - - if low_res_size is not None: - input_profile_unet["x_low_res"] = [(batch_size, *(x_low_res.shape[1:]))] * 3 - - if model.noise_cond_aug: - input_profile_unet["time_low_res"] = [(batch_size,)] * 3 - - for key in input_profile_unet: - # set up min and max batch to 1 and 2 * batch_size - input_profile_unet[key][0] = (1, *input_profile_unet[key][0][1:]) - input_profile_unet[key][2] = (2 * batch_size, *input_profile_unet[key][2][1:]) - - deployment_conf.models.append(config) - input_profile_unets.append(input_profile_unet) - - low_res_size = unet_model.image_size - - os.makedirs(f"{output_dir}/plan", exist_ok=True) - with open(f"{output_dir}/plan/conf.yaml", "wb") as f: - OmegaConf.save(config=deployment_conf, f=f.name) - - del pipeline, x, time, text_embed, text_mask - torch.cuda.empty_cache() - gc.collect() - - build_engine( - f"{output_dir}/onnx/t5/t5.onnx", - f"{output_dir}/plan/t5.plan", - fp16=False, - input_profile=input_profile_t5, - timing_cache=None, - workspace_size=0, - ) - - for i, input_profile in enumerate(input_profile_unets): - build_engine( - f"{output_dir}/onnx/unet/unet{i}.onnx", - f"{output_dir}/plan/unet{i}.plan", - fp16=fp16, - input_profile=input_profile, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml deleted file mode 100644 index 2af156df6c60..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/conf/sd_export.yaml +++ /dev/null @@ -1,19 +0,0 @@ -edit: - resolution: 256 - steps: 100 - out_path: "instruct_pix2pix" - cfg_text: 7.5 - cfg_image: 1.2 - num_images_per_prompt: 8 - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained instruct pix2pix .nemo file - precision: ${trainer.precision} - diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py deleted file mode 100644 index a97ed6e09a63..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_export.py +++ /dev/null @@ -1,325 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import gc -import math -import os -import random -import sys -from argparse import ArgumentParser -from typing import Dict, List, Optional - -import einops -import numpy as np -import torch -import torch.nn as nn -from einops import rearrange, repeat -from omegaconf import OmegaConf, open_dict -from PIL import Image, ImageOps - -from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit -from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import DiscreteEpsDDPMDenoiser -from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils import logging -from nemo.utils.trt_utils import build_engine - - -class CFGDenoiser(nn.Module): - def __init__(self, model): - super().__init__() - self.inner_model = model - - def forward(self, z, sigma, cond, uncond, text_cfg_scale, image_cfg_scale): - cfg_z = einops.repeat(z, "b ... -> (n b) ...", n=3) - cfg_sigma = einops.repeat(sigma, "b ... -> (n b) ...", n=3) - cfg_cond = { - "c_crossattn": [torch.cat([cond["c_crossattn"][0], uncond["c_crossattn"][0], uncond["c_crossattn"][0]])], - "c_concat": [torch.cat([cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]])], - } - print(cfg_z.shape, cfg_sigma.shape) - out_cond, out_img_cond, out_uncond = self.inner_model(cfg_z, cfg_sigma, cond=cfg_cond).chunk(3) - out = out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) - return out - - -@hydra_runner(config_path='conf', config_name='sd_export') -def main(cfg): - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: - print("BF16 not supported for export, will use fp32") - with open_dict(cfg): - edit_cfg = cfg.pop("edit") - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusionEdit, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - - # inference use the latent diffusion part of megatron wrapper - model = megatron_diffusion_model.model - model.eval() - model_wrap = DiscreteEpsDDPMDenoiser(model) - model_wrap_cfg = CFGDenoiser(model_wrap) - null_token = model.get_learned_conditioning([""]) - - # input_image = Image.open(edit_cfg.input).convert("RGB") - # width, height = input_image.size - # factor = edit_cfg.resolution / max(width, height) - # factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) - # width = int((width * factor) // 64) * 64 - # height = int((height * factor) // 64) * 64 - # input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) - input_image = np.random.rand(edit_cfg.resolution, edit_cfg.resolution, 3) * 255 - input_image = Image.fromarray(input_image.astype('uint8')).convert('RGB') - batch_size = edit_cfg.get("num_images_per_prompt", 1) - height = edit_cfg.resolution - width = edit_cfg.resolution - - output_dir = edit_cfg.out_path - - os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create( - { - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vaee': OmegaConf.create({}), - 'vaed': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'height': height, - 'width': width, - 'resolution': edit_cfg.resolution, - 'steps': edit_cfg.steps, - 'text_cfg_scale': edit_cfg.cfg_text, - 'image_cfg_scale': edit_cfg.cfg_image, - } - ) - - fake_text = [""] - out_cond = model.cond_stage_model(fake_text) - - ### VAE Encode Export - class VAEEncodeWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, x): - h = self.model.encoder(x) - moments = self.model.quant_conv(h) - return moments - - input_names = ["x"] - output_names = ["logits"] - x = torch.randn(1, 3, width, height, device="cuda") - # z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export( - VAEEncodeWrapper(model.first_stage_model), - (x,), - f"{output_dir}/onnx/vae/vae_encode.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"x": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - ) - input_profile_vaee = {} - input_profile_vaee["x"] = [(1, *(x.shape[1:]))] * 3 - with torch.no_grad(): - out_vaee = VAEEncodeWrapper(model.first_stage_model)(x) - deployment_conf.vaee.x = input_profile_vaee["x"][0] - deployment_conf.vaee.logits = tuple(out_vaee.shape) - - x = torch.randn(3, *(out_vaee.shape[1:]), device="cuda") - t = torch.randn(3, device="cuda") - cc = torch.randn(3, out_cond.shape[1], out_cond.shape[2], device="cuda") - # x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") - # cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") - input_names = ["x", "t", "context"] - output_names = ["logits"] - torch.onnx.export( - model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, - opset_version=17, - ) - - input_profile_unet = {} - input_profile_unet["x"] = [(3 * batch_size, *(x.shape[1:]))] * 3 - input_profile_unet["t"] = [(3 * batch_size, *(t.shape[1:]))] * 3 - input_profile_unet["context"] = [(3 * batch_size, *(cc.shape[1:]))] * 3 - with torch.no_grad(): - out_unet = model.model.diffusion_model(x, t, context=cc) - deployment_conf.unet.x = input_profile_unet["x"][0] - deployment_conf.unet.t = input_profile_unet["t"][0] - deployment_conf.unet.context = input_profile_unet["context"][0] - deployment_conf.unet.logits = (3 * batch_size, *(out_unet.shape[1:])) - - ### VAE Decode Export - class VAEDecodeWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, z): - h = self.model.post_quant_conv(z) - dec = self.model.decoder(h) - return dec - - input_names = ["z"] - output_names = ["logits"] - z = torch.randn(1, *(out_unet.shape[1:]), device="cuda") - # z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export( - VAEDecodeWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae_decode.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - ) - input_profile_vaed = {} - input_profile_vaed["z"] = [(batch_size, *(z.shape[1:]))] * 3 - deployment_conf.vaed.z = input_profile_vaed["z"][0] - deployment_conf.vaed.logits = (batch_size, 3, height, width) - - ### CLIP Export - class CLIPWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model(input_ids=input_ids) - return outputs.last_hidden_state - - class OpenCLIPWrapper(nn.Module, Exportable): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model.encode_with_transformer(input_ids) - return outputs - - def input_example(self, max_text=64): - sample = next(self.parameters()) - tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('H', 'D'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'H'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] - - openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) - tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - - if openai_clip: - input_names = ["tokens"] - output_names = ["logits"] - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) - else: - clip_model = OpenCLIPWrapper(model.cond_stage_model) - clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") - - input_profile_clip = {} - input_profile_clip["tokens"] = [(1, *(tokens.shape[1:]))] * 3 - deployment_conf.clip.tokens = input_profile_clip["tokens"][0] - deployment_conf.clip.logits = (1, model.cond_stage_model.max_length, out_cond.shape[2]) - deployment_conf.clip.max_length = model.cond_stage_model.max_length - with open(f"{output_dir}/plan/conf.yaml", "wb") as f: - OmegaConf.save(config=deployment_conf, f=f.name) - del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out_cond, out_vaee, out_unet - torch.cuda.empty_cache() - gc.collect() - build_engine( - f"{output_dir}/onnx/unet/unet.onnx", - f"{output_dir}/plan/unet.plan", - fp16=fp16, - input_profile=input_profile_unet, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/vae/vae_decode.onnx", - f"{output_dir}/plan/vae_decode.plan", - fp16=fp16, - input_profile=input_profile_vaed, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/vae/vae_encode.onnx", - f"{output_dir}/plan/vae_encode.plan", - fp16=fp16, - input_profile=input_profile_vaee, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/clip/clip.onnx", - f"{output_dir}/plan/clip.plan", - fp16=fp16, - input_profile=input_profile_clip, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml deleted file mode 100644 index 546c937955a1..000000000000 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_export.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: stable-diffusion-export - -infer: - unconditional_guidance_scale: 7.5 - num_images_per_prompt: 1 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 50 - sampler_type: 'PLMS' - eta: 0 - out_path: 'stable-diffusion' - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: null - precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/sd_export.py b/examples/multimodal/generative/stable_diffusion/sd_export.py deleted file mode 100644 index 9a2ff4c7fa64..000000000000 --- a/examples/multimodal/generative/stable_diffusion/sd_export.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import gc -import os -import time -from typing import Dict, List, Optional - -import torch -import torch.nn as nn -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion -from nemo.collections.multimodal.modules.stable_diffusion.encoders.modules import FrozenCLIPEmbedder -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils.trt_utils import build_engine - - -@hydra_runner(config_path='conf', config_name='sd_export') -def main(cfg): - # setup default values for inference configs - - batch_size = cfg.infer.get('num_images_per_prompt', 1) - height = cfg.infer.get('height', 512) - width = cfg.infer.get('width', 512) - downsampling_factor = cfg.infer.get('down_factor', 8) - fp16 = 16 == cfg.trainer.get("precision", 32) - if cfg.trainer.get("precision", 32) in ['bf16', 'bf16-mixed']: - print("BF16 not supported for export, will use fp32") - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - in_channels = model.model.diffusion_model.in_channels - shape_of_internal = [in_channels, height // downsampling_factor, width // downsampling_factor] - fake_text = [""] - out = model.cond_stage_model(fake_text) - - output_dir = cfg.infer.out_path - os.makedirs(f"{output_dir}/onnx/unet/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/clip/", exist_ok=True) - os.makedirs(f"{output_dir}/onnx/vae/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - deployment_conf = OmegaConf.create( - { - 'clip': OmegaConf.create({}), - 'unet': OmegaConf.create({}), - 'vae': OmegaConf.create({}), - 'sampler': OmegaConf.create({}), - 'batch_size': batch_size, - 'downsampling_factor': downsampling_factor, - 'in_channels': in_channels, - 'height': height, - 'width': width, - } - ) - deployment_conf.sampler.eta = cfg.infer.get('eta', 0) - deployment_conf.sampler.inference_steps = cfg.infer.get('inference_steps', 50) - deployment_conf.sampler.sampler_type = cfg.infer.get('sampler_type', "ddim") - - ### UNet Export - x, t = torch.randn(2, *shape_of_internal, device="cuda"), torch.randint(high=10, size=(2,), device="cuda") - cc = torch.randn(2, out.shape[1], out.shape[2], device="cuda") - input_names = ["x", "t", "context"] - output_names = ["logits"] - torch.onnx.export( - model.model.diffusion_model, - (x, t, cc), - f"{output_dir}/onnx/unet/unet.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"x": {0: 'B'}, "t": {0: 'B'}, "context": {0: 'B'}}, - opset_version=17, - ) - input_profile_unet = {} - input_profile_unet["x"] = [(2 * batch_size, *(x.shape[1:]))] * 3 - input_profile_unet["t"] = [(2 * batch_size, *(t.shape[1:]))] * 3 - input_profile_unet["context"] = [(2 * batch_size, *(cc.shape[1:]))] * 3 - deployment_conf.unet.x = input_profile_unet["x"][0] - deployment_conf.unet.t = input_profile_unet["t"][0] - deployment_conf.unet.context = input_profile_unet["context"][0] - deployment_conf.unet.logits = input_profile_unet["x"][0] - - ### VAE Export - class VAEWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, z): - h = self.model.post_quant_conv(z) - dec = self.model.decoder(h) - return dec - - input_names = ["z"] - output_names = ["logits"] - z = torch.randn(1, *shape_of_internal, device="cuda") - torch.onnx.export( - VAEWrapper(model.first_stage_model), - (z,), - f"{output_dir}/onnx/vae/vae.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"z": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - ) - input_profile_vae = {} - input_profile_vae["z"] = [(batch_size, *(z.shape[1:]))] * 3 - deployment_conf.vae.z = input_profile_vae["z"][0] - - ### CLIP Export - class CLIPWrapper(nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model(input_ids=input_ids) - return outputs.last_hidden_state - - class OpenCLIPWrapper(nn.Module, Exportable): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, input_ids): - outputs = self.model.encode_with_transformer(input_ids) - return outputs - - def input_example(self, max_text=64): - sample = next(self.parameters()) - tokens = torch.randint(high=10, size=(1, self.model.max_length)).to(sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('H', 'D'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'H'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] - - openai_clip = isinstance(model.cond_stage_model, FrozenCLIPEmbedder) - tokens = torch.randint(high=10, size=(1, model.cond_stage_model.max_length), device="cuda") - - if openai_clip: - input_names = ["tokens"] - output_names = ["logits"] - torch.onnx.export( - CLIPWrapper(model.cond_stage_model.transformer), - (tokens,), - f"{output_dir}/onnx/clip/clip.onnx", - verbose=False, - input_names=input_names, - output_names=output_names, - dynamic_axes={"tokens": {0: 'B'}, "logits": {0: 'B'}}, - opset_version=17, - do_constant_folding=True, - export_params=True, - ) - else: - clip_model = OpenCLIPWrapper(model.cond_stage_model) - clip_model.export(f"{output_dir}/onnx/clip/clip.onnx") - - input_profile_clip = {} - input_profile_clip["tokens"] = [(batch_size, *(tokens.shape[1:]))] * 3 - deployment_conf.clip.tokens = input_profile_clip["tokens"][0] - deployment_conf.clip.logits = (batch_size, model.cond_stage_model.max_length, out.shape[2]) - deployment_conf.clip.unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) - deployment_conf.clip.max_length = model.cond_stage_model.max_length - deployment_conf.clip.openai_clip = openai_clip - with open(f"{output_dir}/plan/conf.yaml", "wb") as f: - OmegaConf.save(config=deployment_conf, f=f.name) - del model, trainer, megatron_diffusion_model, x, t, cc, z, tokens, out - torch.cuda.empty_cache() - gc.collect() - - build_engine( - f"{output_dir}/onnx/unet/unet.onnx", - f"{output_dir}/plan/unet.plan", - fp16=fp16, - input_profile=input_profile_unet, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/vae/vae.onnx", - f"{output_dir}/plan/vae.plan", - fp16=fp16, - input_profile=input_profile_vae, - timing_cache=None, - workspace_size=0, - ) - build_engine( - f"{output_dir}/onnx/clip/clip.onnx", - f"{output_dir}/plan/clip.plan", - fp16=fp16, - input_profile=input_profile_clip, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/mllm/neva/conf/neva_export.yaml b/examples/multimodal/mllm/neva/conf/neva_export.yaml deleted file mode 100644 index fa3d70478662..000000000000 --- a/examples/multimodal/mllm/neva/conf/neva_export.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: nemo_neva -infer: - out_dir: ./neva - vision: - min_batch_size: 1 - opt_batch_size: 1 - max_batch_size: 1 - clip: openai/clip-vit-large-patch14 - llm: - tensor_parallelism: 1 - max_input_len: 2048 - max_output_len: 1024 - max_batch_size: 1 - -model: - precision: bf16 - restore_from_path: /path/to/pretrained.nemo diff --git a/examples/multimodal/mllm/neva/neva_export.py b/examples/multimodal/mllm/neva/neva_export.py deleted file mode 100644 index b5da058e096a..000000000000 --- a/examples/multimodal/mllm/neva/neva_export.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import torch -from omegaconf.omegaconf import OmegaConf -from polygraphy.backend.trt import CreateConfig, Profile, engine_from_network, network_from_onnx_path, save_engine -from polygraphy.logger import G_LOGGER - -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, LogitsType, NeuralType - -G_LOGGER.module_severity = G_LOGGER.EXTRA_VERBOSE -import logging -import os -import shutil -import subprocess -import tempfile -from pathlib import Path - -import torch -from omegaconf import OmegaConf -from transformers import CLIPImageProcessor, CLIPVisionModel - -from nemo.core.classes.exportable import Exportable -from nemo.core.connectors.save_restore_connector import SaveRestoreConnector -from nemo.core.neural_types import ChannelType, LogitsType, NeuralType -from nemo.export import TensorRTLLM - -LOGGER = logging.getLogger(__name__) - - -def build_vision_encoder(model_path, clip_path, precision, bs_min, bs_opt, bs_max, out_dir): - torch_precision = torch.bfloat16 if precision in ['bf16', 'bf16-mixed'] else torch.float16 - - with tempfile.TemporaryDirectory() as temp: - LOGGER.info('Extracting model') - connector = SaveRestoreConnector() - connector._unpack_nemo_file(path2file=model_path, out_folder=temp) - config_yaml = os.path.join(temp, connector.model_config_yaml) - config = OmegaConf.load(config_yaml) - if config.tensor_model_parallel_size > 1: - path = os.path.join(temp, 'mp_rank_00', connector.model_weights_ckpt) - else: - path = os.path.join(temp, connector.model_weights_ckpt) - state_dict = connector._load_state_dict_from_disk(path) - LOGGER.info('Done') - - vision_connector = torch.nn.Linear(config.mm_cfg.vision_encoder.hidden_size, config.hidden_size, bias=True,) - vision_encoder = CLIPVisionModel.from_pretrained(clip_path, torch_dtype=torch_precision) - image_size = vision_encoder.vision_model.config.image_size - - if 'model.vision_connector.weight' in state_dict: - new_state_dict = { - 'weight': state_dict['model.vision_connector.weight'], - 'bias': state_dict['model.vision_connector.bias'], - } - else: - new_state_dict = { - 'weight': state_dict[ - 'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear.weight' - ], - 'bias': state_dict[ - 'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear.bias' - ], - } - - vision_connector.load_state_dict(new_state_dict) - vision_connector = vision_connector.to(dtype=torch_precision) - - class VisionEncoderWrapper(torch.nn.Module, Exportable): - def __init__(self, encoder, connector): - super().__init__() - self.encoder = encoder - self.connector = connector - - def forward(self, images): - vision_x = self.encoder(images, output_hidden_states=True) - vision_x = vision_x.hidden_states[-2] - vision_x = vision_x[:, 1:] - vision_x = self.connector(vision_x) - return vision_x - - # For onnx export - def input_example(self, max_batch=8): - sample = next(self.parameters()) - images = torch.randn(max_batch, 3, image_size, image_size, device=sample.device, dtype=sample.dtype) - return (images,) - - @property - def input_types(self): - return {'images': NeuralType(('B', 'C', 'H', 'W'), ChannelType())} - - @property - def output_types(self): - return {'features': NeuralType(('B', 'S', 'D'), LogitsType())} - - @property - def input_names(self): - return ['images'] - - @property - def output_names(self): - return ['features'] - - wrapper = VisionEncoderWrapper(vision_encoder, vision_connector) - - os.makedirs(f'/tmp/onnx/', exist_ok=True) - dynamic_axes = {'images': {0: 'B'}} - - LOGGER.info('Exporting ONNX') - wrapper.export(f'/tmp/onnx/vision_encoder.onnx', dynamic_axes=dynamic_axes, onnx_opset_version=17) - LOGGER.info('Done') - - bsmin_example = wrapper.input_example(max_batch=bs_min) - bsopt_example = wrapper.input_example(max_batch=bs_opt) - bsmax_example = wrapper.input_example(max_batch=bs_max) - - input_profile = {} - input_profile['images'] = [ - tuple(bsmin_example[0].shape), - tuple(bsopt_example[0].shape), - tuple(bsmax_example[0].shape), - ] - - p = Profile() - if input_profile: - for name, dims in input_profile.items(): - assert len(dims) == 3 - p.add(name, min=dims[0], opt=dims[1], max=dims[2]) - - LOGGER.info('Exporting TRT') - engine = engine_from_network( - network_from_onnx_path('/tmp/onnx/vision_encoder.onnx'), - config=CreateConfig( - tf32=precision in [32, '32', '32-true'], - fp16=precision in [16, '16', '16-mixed'], - bf16=precision in ['bf16', 'bf16-mixed'], - profiles=[p], - ), - ) - save_engine(engine, path=os.path.join(out_dir, 'vision_encoder.plan')) - - processor = CLIPImageProcessor.from_pretrained(clip_path) - processor.save_pretrained(out_dir) - LOGGER.info('Done') - - -def build_trtllm_engines(in_file, out_dir, tensor_parallelism, max_input_len, max_output_len, max_batch_size): - trt_llm_exporter = TensorRTLLM(model_dir=out_dir) - trt_llm_exporter.export( - nemo_checkpoint_path=in_file, - model_type="llama", - n_gpus=tensor_parallelism, - max_input_token=max_input_len, - max_output_token=max_output_len, - max_batch_size=max_batch_size, - max_prompt_embedding_table_size=max_batch_size * max_input_len, - ) - LOGGER.info('Done') - - -@hydra_runner(config_path='conf', config_name='neva_export') -def main(cfg): - precision = cfg.model.get('precision', 16) - assert precision != 32, 'FP32 export not supported' - plan_dir = os.path.join(cfg.infer.out_dir, 'plan') - - os.makedirs(plan_dir, exist_ok=True) - LOGGER.info('Building TRT-LLM engines') - build_trtllm_engines( - cfg.model.restore_from_path, - plan_dir, - cfg.infer.llm.get('tensor_parallelism', 1), - cfg.infer.llm.get('max_input_len', 2048), - cfg.infer.llm.get('max_output_len', 2048), - cfg.infer.llm.get('max_batch_size', 1), - ) - - LOGGER.info('Building vision TRT engine') - build_vision_encoder( - cfg.model.restore_from_path, - cfg.infer.vision.clip, - 32, - cfg.infer.vision.get('min_batch_size', 1), - cfg.infer.vision.get('opt_batch_size', 1), - cfg.infer.vision.get('max_batch_size', 1), - plan_dir, - ) - - -if __name__ == '__main__': - main() diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml deleted file mode 100644 index cbcca277b3f5..000000000000 --- a/examples/vision/vision_transformer/conf/megatron_vit_classification_export.yaml +++ /dev/null @@ -1,16 +0,0 @@ -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - - -infer: - out_path: 'megatron-vit' - max_batch_size: 64 - max_dim: 384 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/vision/vision_transformer/megatron_vit_classification_export.py b/examples/vision/vision_transformer/megatron_vit_classification_export.py deleted file mode 100644 index 6156cc1d89cb..000000000000 --- a/examples/vision/vision_transformer/megatron_vit_classification_export.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os -from typing import Dict, List, Optional - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from torch.utils.data import DataLoader, Dataset - -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames -from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel -from nemo.core.classes.exportable import Exportable -from nemo.core.config import hydra_runner -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero -from nemo.utils.trt_utils import build_engine - - -class VITWrapper(torch.nn.Module, Exportable): - def __init__(self, model): - super(VITWrapper, self).__init__() - self.model = model - - def forward(self, tokens): - output_tensor = self.model(tokens) - return output_tensor - - # For onnx export - def input_example(self, max_batch=8, max_dim=384): - """ - Generates input examples for tracing etc. - Returns: - A tuple of input examples. - """ - sample = next(self.parameters()) - tokens = torch.randn(max_batch, 3, max_dim, max_dim, device=sample.device) - return (tokens,) - - @property - def input_types(self) -> Optional[Dict[str, NeuralType]]: - return { - "tokens": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), - } - - @property - def output_types(self) -> Optional[Dict[str, NeuralType]]: - return {"logits": NeuralType(('B', 'D'), ChannelType())} - - @property - def input_names(self) -> List[str]: - return ['tokens'] - - @property - def output_names(self) -> List[str]: - return ['logits'] - - -@hydra_runner(config_path="conf", config_name="megatron_vit_classification_export") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - output_dir = cfg.infer.out_path - max_batch_size = cfg.infer.max_batch_size - max_dim = cfg.infer.max_dim - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce - ) - print(type(cfg.trainer.precision)) - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - trt_precision = cfg.trainer.precision - cfg.trainer.precision = 32 - # trainer required for restoring model parallel models - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - - save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - - model_cfg = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - save_restore_connector=save_restore_connector, - return_config=True, - ) - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size - ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" - - # These configs are required to be off during inference. - with open_dict(model_cfg): - model_cfg.precision = int(trainer.precision) if trainer.precision.isdigit() else trainer.precision - print(type(model_cfg.precision)) - if trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - model = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=model_cfg, - save_restore_connector=save_restore_connector, - strict=True, - ) - - model.eval() - - # initialize apex DDP strategy - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - os.makedirs(f"{output_dir}/onnx/", exist_ok=True) - os.makedirs(f"{output_dir}/plan/", exist_ok=True) - - model = VITWrapper(model) - - model.export(f"{output_dir}/onnx/vit.onnx", dynamic_axes={'tokens': {0: 'B'}}) - - input_profile = {} - bs1_example = model.input_example(max_batch=1, max_dim=max_dim)[0] - bsmax_example = model.input_example(max_batch=max_batch_size, max_dim=max_dim)[0] - input_profile['tokens'] = [tuple(bs1_example.shape), tuple(bsmax_example.shape), tuple(bsmax_example.shape)] - build_engine( - f"{output_dir}/onnx/vit.onnx", - f"{output_dir}/plan/vit.plan", - fp16=(trt_precision in [16, '16', '16-mixed']), - input_profile=input_profile, - timing_cache=None, - workspace_size=0, - ) - - -if __name__ == '__main__': - main() diff --git a/nemo/deploy/__init__.py b/nemo/deploy/__init__.py deleted file mode 100644 index 7157a7ef29f3..000000000000 --- a/nemo/deploy/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .deploy_base import DeployBase -from .deploy_pytriton import DeployPyTriton -from .query import NemoQuery -from .triton_deployable import ITritonDeployable diff --git a/nemo/deploy/deploy_base.py b/nemo/deploy/deploy_base.py deleted file mode 100644 index 334e2e23ef17..000000000000 --- a/nemo/deploy/deploy_base.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import importlib -from abc import ABC, abstractmethod - -from pytorch_lightning import Trainer - -from nemo.core.classes.modelPT import ModelPT -from nemo.deploy.triton_deployable import ITritonDeployable - - -class DeployBase(ABC): - def __init__( - self, - triton_model_name: str, - triton_model_version: int = 1, - checkpoint_path: str = None, - model=None, - max_batch_size: int = 128, - port: int = 8000, - http_address="0.0.0.0", - ): - self.checkpoint_path = checkpoint_path - self.triton_model_name = triton_model_name - self.triton_model_version = triton_model_version - self.max_batch_size = max_batch_size - self.model = model - self.port = port - self.http_address = http_address - self.triton = None - - if checkpoint_path is None and model is None: - raise Exception("Either checkpoint_path or model should be provided.") - - @abstractmethod - def deploy(self): - pass - - @abstractmethod - def serve(self): - pass - - @abstractmethod - def run(self): - pass - - @abstractmethod - def stop(self): - pass - - def _init_nemo_model(self): - if self.checkpoint_path is not None: - model_config = ModelPT.restore_from(self.checkpoint_path, return_config=True) - module_path, class_name = DeployBase.get_module_and_class(model_config.target) - cls = getattr(importlib.import_module(module_path), class_name) - self.model = cls.restore_from(restore_path=self.checkpoint_path, trainer=Trainer()) - self.model.freeze() - - # has to turn off activations_checkpoint_method for inference - try: - self.model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - if self.model is None: - raise Exception("There is no model to deploy.") - - self._is_model_deployable() - - def _is_model_deployable(self): - if not issubclass(type(self.model), ITritonDeployable): - raise Exception( - "This model is not deployable to Triton." "nemo.deploy.ITritonDeployable class should be inherited" - ) - else: - return True - - @staticmethod - def get_module_and_class(target: str): - l = target.rindex(".") - return target[0:l], target[l + 1 : len(target)] diff --git a/nemo/deploy/deploy_pytriton.py b/nemo/deploy/deploy_pytriton.py deleted file mode 100644 index ece63bff03cc..000000000000 --- a/nemo/deploy/deploy_pytriton.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from pytriton.model_config import ModelConfig, Tensor -from pytriton.triton import Triton, TritonConfig -from .deploy_base import DeployBase - - -class DeployPyTriton(DeployBase): - - """ - Deploys any models to Triton Inference Server that implements ITritonDeployable interface in nemo.deploy. - - Example: - from nemo.deploy import DeployPyTriton, NemoQuery - from nemo.export import TensorRTLLM - - trt_llm_exporter = TensorRTLLM(model_dir="/path/for/model/files") - trt_llm_exporter.export( - nemo_checkpoint_path="/path/for/nemo/checkpoint", - model_type="llama", - n_gpus=1, - ) - - nm = DeployPyTriton(model=trt_llm_exporter, triton_model_name="model_name", port=8000) - nm.deploy() - nm.run() - nq = NemoQuery(url="localhost", model_name="model_name") - - prompts = ["hello, testing GPT inference", "another GPT inference test?"] - output = nq.query_llm(prompts=prompts, max_output_len=100) - print("prompts: ", prompts) - print("") - print("output: ", output) - print("") - - prompts = ["Give me some info about Paris", "Do you think Londan is a good city to visit?", "What do you think about Rome?"] - output = nq.query_llm(prompts=prompts, max_output_len=250) - print("prompts: ", prompts) - print("") - print("output: ", output) - print("") - - """ - - def __init__( - self, - triton_model_name: str, - triton_model_version: int = 1, - checkpoint_path: str = None, - model=None, - max_batch_size: int = 128, - port: int = 8000, - http_address="0.0.0.0", - ): - - """ - A nemo checkpoint or model is expected for serving on Triton Inference Server. - - Args: - triton_model_name (str): Name for the service - triton_model_version(int): Version for the service - checkpoint_path (str): path of the nemo file - model (ITritonDeployable): A model that implements the ITritonDeployable from nemo.deploy import ITritonDeployable - max_batch_size (int): max batch size - port (int) : port for the Triton server - http_address (str): http address for Triton server to bind. - """ - - super().__init__( - triton_model_name=triton_model_name, - triton_model_version=triton_model_version, - checkpoint_path=checkpoint_path, - model=model, - max_batch_size=max_batch_size, - port=port, - http_address=http_address, - ) - - def deploy(self): - - """ - Deploys any models to Triton Inference Server. - """ - - self._init_nemo_model() - - try: - triton_config = TritonConfig(http_address=self.http_address, http_port=self.port) - self.triton = Triton(config=triton_config) - self.triton.bind( - model_name=self.triton_model_name, - model_version=self.triton_model_version, - infer_func=self.model.triton_infer_fn, - inputs=self.model.get_triton_input, - outputs=self.model.get_triton_output, - config=ModelConfig(max_batch_size=self.max_batch_size), - ) - except Exception as e: - self.triton = None - print(e) - - def serve(self): - - """ - Starts serving the model and waits for the requests - """ - - if self.triton is None: - raise Exception("deploy should be called first.") - - try: - self.triton.serve() - except Exception as e: - self.triton = None - print(e) - - def run(self): - - """ - Starts serving the model asynchronously. - """ - - if self.triton is None: - raise Exception("deploy should be called first.") - - self.triton.run() - - def stop(self): - """ - Stops serving the model. - """ - - if self.triton is None: - raise Exception("deploy should be called first.") - - self.triton.stop() diff --git a/nemo/deploy/query.py b/nemo/deploy/query.py deleted file mode 100644 index e3656aeb7a8a..000000000000 --- a/nemo/deploy/query.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import typing - -import numpy as np -from pytriton.client import ModelClient - -from .utils import str_list2numpy - - -class NemoQuery: - """ - Sends a query to Triton for LLM inference - - Example: - from nemo.deploy import NemoQuery - - nq = NemoQuery(url="localhost", model_name="GPT-2B") - - prompts = ["hello, testing GPT inference", "another GPT inference test?"] - output = nq.query_llm( - prompts=prompts, - max_output_len=100, - top_k=1, - top_p=0.0, - temperature=0.0, - ) - print("prompts: ", prompts) - """ - - def __init__(self, url, model_name): - self.url = url - self.model_name = model_name - - def query_llm( - self, prompts, max_output_token=512, top_k=1, top_p=0.0, temperature=1.0, init_timeout=600.0, - ): - """ - Exports nemo checkpoints to TensorRT-LLM. - - Args: - prompts (List(str)): list of sentences. - max_output_token (int): max generated tokens. - top_k (int): limits us to a certain number (K) of the top tokens to consider. - top_p (float): limits us to the top tokens within a certain probability mass (p). - temperature (float): A parameter of the softmax function, which is the last layer in the network. - init_timeout (flat): timeout for the connection. - """ - - prompts = str_list2numpy(prompts) - max_output_token = np.full(prompts.shape, max_output_token, dtype=np.int_) - top_k = np.full(prompts.shape, top_k, dtype=np.int_) - top_p = np.full(prompts.shape, top_p, dtype=np.single) - temperature = np.full(prompts.shape, temperature, dtype=np.single) - - with ModelClient(self.url, self.model_name, init_timeout_s=init_timeout) as client: - result_dict = client.infer_batch( - prompts=prompts, max_output_token=max_output_token, top_k=top_k, top_p=top_p, temperature=temperature, - ) - output_type = client.model_config.outputs[0].dtype - - if output_type == np.bytes_: - sentences = np.char.decode(result_dict["outputs"].astype("bytes"), "utf-8") - return sentences - else: - return result_dict["outputs"] diff --git a/nemo/deploy/triton_deployable.py b/nemo/deploy/triton_deployable.py deleted file mode 100644 index 084cc828882e..000000000000 --- a/nemo/deploy/triton_deployable.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from abc import ABC, abstractmethod -import numpy as np - - -class ITritonDeployable(ABC): - @abstractmethod - def get_triton_input(self): - pass - - @abstractmethod - def get_triton_output(self): - pass - - @abstractmethod - def triton_infer_fn(self, **inputs: np.ndarray): - pass diff --git a/nemo/deploy/utils.py b/nemo/deploy/utils.py deleted file mode 100644 index b5b01db3654f..000000000000 --- a/nemo/deploy/utils.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing - -import numpy as np -import torch -from pytriton.model_config import Tensor - - -def typedict2tensor( - typedict_class, - overwrite_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None, - defaults: typing.Optional[typing.Dict[str, typing.Any]] = None, -): - def _map_type(type_): - if type_ is int: - return np.int32 - elif type_ is float: - return np.float32 - elif type_ is bool: - return np.bool_ - elif type_ is str: - return bytes - else: - raise PyTritonBadParameterError(f"Unknown type {type_}") - - def _get_tensor_params(type_): - count = 0 - while typing.get_origin(type_) is list: - type_ = typing.get_args(type_)[0] - count += 1 - count -= 1 # we don't want to count the last dimension - shape = (-1,) * count if count > 1 else (1,) - return {"shape": shape, "dtype": _map_type(type_)} - - overwrite_kwargs = overwrite_kwargs or {} - return tuple( - Tensor(name=name, **_get_tensor_params(type_), **overwrite_kwargs) - for name, type_ in typing.get_type_hints(typedict_class).items() - ) - - -def str_list2numpy(str_list: typing.List[str]) -> np.ndarray: - str_ndarray = np.array(str_list)[..., np.newaxis] - return np.char.encode(str_ndarray, "utf-8") - - -def str_ndarray2list(str_ndarray: np.ndarray) -> typing.List[str]: - str_ndarray = str_ndarray.astype("bytes") - str_ndarray = np.char.decode(str_ndarray, encoding="utf-8") - str_ndarray = str_ndarray.squeeze(axis=-1) - return str_ndarray.tolist() - - -def cast_output(data, required_dtype): - if isinstance(data, torch.Tensor): - data = data.cpu().numpy() - elif not isinstance(data, np.ndarray): - data = np.array(data) - - data_is_str = required_dtype in (object, np.object_, bytes, np.bytes_) - if data_is_str: - data = np.char.encode(data, "utf-8") - - if data.ndim < 2: - data = data[..., np.newaxis] - return data.astype(required_dtype) diff --git a/nemo/export/__init__.py b/nemo/export/__init__.py deleted file mode 100644 index bc2a8d887357..000000000000 --- a/nemo/export/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .tensorrt_llm import TensorRTLLM diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py deleted file mode 100644 index 8de9e79cbaff..000000000000 --- a/nemo/export/tensorrt_llm.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os -import shutil -from pathlib import Path - -import numpy as np -import tensorrt_llm -import torch -from pytriton.decorators import batch -from pytriton.model_config import Tensor - -from nemo.deploy import ITritonDeployable -from nemo.deploy.utils import cast_output, str_ndarray2list - -from .trt_llm.model_config_trt import model_config_to_tensorrt_llm -from .trt_llm.nemo_utils import get_tokenzier, nemo_to_model_config -from .trt_llm.quantization_utils import naive_quantization -from .trt_llm.tensorrt_llm_run import generate, load -from .utils import get_prompt_embedding_table, is_nemo_file, torch_to_numpy - - -class TensorRTLLM(ITritonDeployable): - - """ - Exports nemo checkpoints to TensorRT-LLM and run fast inference. - - Example: - from nemo.export import TensorRTLLM - - trt_llm_exporter = TensorRTLLM(model_dir="/path/for/model/files") - trt_llm_exporter.export( - nemo_checkpoint_path="/path/for/nemo/checkpoint", - model_type="llama", - n_gpus=1, - ) - - output = trt_llm_exporter.forward(["Hi, how are you?", "I am good, thanks, how about you?"]) - print("output: ", output) - - """ - - def __init__(self, model_dir: str): - """ - Args: - model_dir (str): path for storing the TensorRT-LLM model files. - """ - - self.model_dir = model_dir - self.model = None - self.tokenizer = None - self.prompt_table = None - self.task_vocab_size = None - self.n_gpus = None - self.config = None - self._load() - - def _load(self): - self.model = None - self.tokenizer = None - self.prompt_table = None - self.task_vocab_size = None - self.n_gpus = None - self.config = None - - if Path(self.model_dir).exists(): - folders = os.listdir(self.model_dir) - if len(folders) > 0: - try: - self._load_config_file() - self.tokenizer = get_tokenzier(Path(os.path.join(self.model_dir))) - self.model = load(tokenizer=self.tokenizer, engine_dir=self.model_dir) - self._load_prompt_table() - except: - raise Exception( - "Files in the TensorRT-LLM folder is corrupted and model needs to be exported again." - ) - - def _load_prompt_table(self): - path = Path(os.path.join(self.model_dir, "__prompt_embeddings__.npy")) - if path.exists(): - self.prompt_table = torch.from_numpy(np.load(path)) - self.task_vocab_size = 1 - - dtype = self.config['builder_config']['precision'] - self.prompt_table = self.prompt_table.cuda().to(dtype=tensorrt_llm._utils.str_dtype_to_torch(dtype)) - - if self.prompt_table.shape[1] != self.config["builder_config"]["hidden_size"]: - raise Exception( - "Hidden dimension of the model is {0} and does not match with the dimension of the prompt table.".format( - self.config["builder_config"]["hidden_size"] - ) - ) - else: - self.prompt_table = None - self.task_vocab_size = None - - def _load_config_file(self): - engine_dir = Path(self.model_dir) - config_path = engine_dir / 'config.json' - if config_path.exists(): - with open(config_path, 'r') as f: - self.config = json.load(f) - else: - raise FileNotFoundError("file: {0} could not be found.".format(config_path)) - - def export( - self, - nemo_checkpoint_path: str, - model_type: str, - prompt_embeddings_table=None, - delete_existing_files: bool = True, - n_gpus: int = 1, - max_input_token: int = 512, - max_output_token: int = 512, - max_batch_size: int = 32, - quantization: bool = None, - parallel_build: bool = False, - max_prompt_embedding_table_size: int = 0, - ): - """ - Exports nemo checkpoints to TensorRT-LLM. - - Args: - nemo_checkpoint_path (str): path for the nemo checkpoint. - model_type (str): type of the model. Currently supports "llama" and "gptnext". - prompt_embeddings_table (str): prompt embeddings table. - delete_existing_files (bool): if Truen, deletes all the files in model_dir. - n_gpus (int): number of GPUs to use for inference. - max_input_token (int): max input length. - max_output_token (int): max output length. - max_batch_size (int): max batch size. - quantization (bool): if True, applies naive quantization. - parallel_build (bool): build in parallel or not. - """ - - if prompt_embeddings_table is not None: - if not isinstance(prompt_embeddings_table, np.ndarray): - raise TypeError("Only numpy array is allowed for the prompt embeddings table.") - - if len(prompt_embeddings_table.shape) != 2: - raise Exception("A two dimensional prompt embeddings table for a sinlge task is only supported.") - - if Path(self.model_dir).exists(): - if delete_existing_files and len(os.listdir(self.model_dir)) > 0: - for files in os.listdir(self.model_dir): - path = os.path.join(self.model_dir, files) - try: - shutil.rmtree(path) - except OSError: - os.remove(path) - - if len(os.listdir(self.model_dir)) > 0: - raise Exception("Couldn't delete all files.") - elif len(os.listdir(self.model_dir)) > 0: - raise Exception("There are files in this folder. Try setting delete_existing_files=True.") - else: - Path(self.model_dir).mkdir(parents=True, exist_ok=True) - - self.model = None - - nemo_export_dir = os.path.join(self.model_dir, "/tmp_nemo/") - model_configs, self.tokenizer = nemo_to_model_config( - in_file=nemo_checkpoint_path, decoder_type=model_type, gpus=n_gpus, nemo_export_dir=nemo_export_dir - ) - - if max_prompt_embedding_table_size == 0 and prompt_embeddings_table is not None: - max_prompt_embedding_table_size = len(prompt_embeddings_table) - - model_config_to_tensorrt_llm( - model_configs, - self.model_dir, - n_gpus, - max_input_len=max_input_token, - max_output_len=max_output_token, - max_batch_size=max_batch_size, - max_prompt_embedding_table_size=max_prompt_embedding_table_size, - ) - - if prompt_embeddings_table is not None: - np.save(os.path.join(self.model_dir, "__prompt_embeddings__.npy"), prompt_embeddings_table) - - shutil.copy(os.path.join(nemo_export_dir, "tokenizer.model"), self.model_dir) - shutil.rmtree(nemo_export_dir) - self._load() - - def forward( - self, input_texts, max_output_token=512, top_k: int = 1, top_p: float = 0.0, temperature: float = 1.0, - ): - """ - Exports nemo checkpoints to TensorRT-LLM. - - Args: - input_texts (List(str)): list of sentences. - max_output_token (int): max generated tokens. - top_k (int): limits us to a certain number (K) of the top tokens to consider. - top_p (float): limits us to the top tokens within a certain probability mass (p). - temperature (float): A parameter of the softmax function, which is the last layer in the network. - """ - if self.model is None: - raise Exception( - "A nemo checkpoint should be exported and " "TensorRT LLM should be loaded first to run inference." - ) - else: - return generate( - input_texts=input_texts, - max_output_len=max_output_token, - host_context=self.model, - top_k=top_k, - top_p=top_p, - temperature=temperature, - prompt_table=self.prompt_table, - task_vocab_size=self.task_vocab_size, - ) - - def get_hidden_size(self): - if self.config is None: - return None - else: - return self.config["builder_config"]["hidden_size"] - - @property - def get_triton_input(self): - inputs = ( - Tensor(name="prompts", shape=(1,), dtype=bytes), - Tensor(name="max_output_token", shape=(1,), dtype=np.int_), - Tensor(name="top_k", shape=(1,), dtype=np.int_), - Tensor(name="top_p", shape=(1,), dtype=np.single), - Tensor(name="temperature", shape=(1,), dtype=np.single), - ) - return inputs - - @property - def get_triton_output(self): - outputs = (Tensor(name="outputs", shape=(1,), dtype=bytes),) - return outputs - - @batch - def triton_infer_fn(self, **inputs: np.ndarray): - try: - input_texts = str_ndarray2list(inputs.pop("prompts")) - max_output_token = inputs.pop("max_output_token") - top_k = inputs.pop("top_k") - top_p = inputs.pop("top_p") - temperature = inputs.pop("temperature") - - output_texts = self.forward( - input_texts=input_texts, - max_output_token=max_output_token[0][0], - top_k=top_k[0][0], - top_p=top_p[0][0], - temperature=temperature[0][0], - ) - - output = cast_output(output_texts, np.bytes_) - return {"outputs": output} - except Exception as error: - err_msg = "An error occurred: {0}".format(str(error)) - output = cast_output([err_msg], np.bytes_) - return {"outputs": output} diff --git a/nemo/export/trt_llm/__init__.py b/nemo/export/trt_llm/__init__.py deleted file mode 100644 index 68233d55e62e..000000000000 --- a/nemo/export/trt_llm/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""LLM deployment package with tensorrt_llm.""" - -from mpi4py import MPI - -# Pre load MPI libs to avoid tensorrt_llm importing failures. -print(f"Loaded mpi lib {MPI.__file__} successfully") - -# Pre import tensorrt_llm -try: - import tensorrt_llm -except Exception as e: - print( - "tensorrt_llm package is not installed. Please build or install tensorrt_llm package" - " properly before calling the llm deployment API." - ) - raise (e) - -from .huggingface_utils import * # noqa -from .model_config_trt import * # noqa -from .model_config_utils import * # noqa -from .nemo_utils import * # noqa -from .quantization_utils import * # noqa -from .tensorrt_llm_run import * # noqa diff --git a/nemo/export/trt_llm/decoder/__init__.py b/nemo/export/trt_llm/decoder/__init__.py deleted file mode 100644 index 2128a4b8fd64..000000000000 --- a/nemo/export/trt_llm/decoder/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""LLM Decoder implementation for tensorrt_llm conversion.""" -from typing import Dict, Type - -import tensorrt as trt - -from ..model_config import DECODER_GPT2, DECODER_GPTJ, DECODER_GPTNEXT, DECODER_LLAMA, QUANTIZATION_NONE -from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder -from .gpt import GPTDecoderLayerBuilder, GPTDecoderLayerConfigBuilder -from .gptj import GPTJDecoderLayerBuilder, GPTJDecoderLayerConfigBuilder -from .llama import LLAMADecoderLayerBuilder, LLAMADecoderLayerConfigBuilder - -DECODER_CONFIG_REGISTRY: Dict[str, Type[DecoderLayerConfigBuilder]] = { - DECODER_GPT2: GPTDecoderLayerConfigBuilder, - DECODER_GPTJ: GPTJDecoderLayerConfigBuilder, - DECODER_LLAMA: LLAMADecoderLayerConfigBuilder, -} - - -def build_decoder_layer_config(layer, decoder: str, dtype=trt.float16, rank=0, tensor_parallel=1): - """Builds the decoder layer config with the input torch module.""" - assert decoder in DECODER_CONFIG_REGISTRY, f"{decoder} not supported" - return DECODER_CONFIG_REGISTRY[decoder](decoder, dtype, rank, tensor_parallel).build_layer(layer) - - -DECODER_REGISTRY: Dict[str, Type[DecoderLayerBuilder]] = { - DECODER_GPT2: GPTDecoderLayerBuilder, - DECODER_GPTJ: GPTJDecoderLayerBuilder, - DECODER_LLAMA: LLAMADecoderLayerBuilder, - DECODER_GPTNEXT: GPTDecoderLayerBuilder, -} - - -def build_decoder_layer( - layer, - layer_id: int, - num_layers: int, - dtype=trt.float16, - quantization=QUANTIZATION_NONE, - rank=0, - tensor_parallel=1, -): - """Builds the tensorrt llm decoder layer module with the layer config as the input.""" - assert layer.decoder_type in DECODER_REGISTRY, f"{layer.decoder_type} not supported" - builder = DECODER_REGISTRY[layer.decoder_type] - decoder_builder = builder(layer, layer_id, num_layers, dtype, quantization, rank, tensor_parallel) - return decoder_builder.decoder diff --git a/nemo/export/trt_llm/decoder/decoder.py b/nemo/export/trt_llm/decoder/decoder.py deleted file mode 100644 index f4eeab0be5f6..000000000000 --- a/nemo/export/trt_llm/decoder/decoder.py +++ /dev/null @@ -1,202 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The parent decoder class implementation for model_config and tensorrt_llm conversion.""" - -from abc import ABC, abstractmethod -from typing import Optional - -import tensorrt as trt -from transformers.activations import ACT2FN - -from ..model_config import QUANTIZATION_NONE, AttentionConfig, DecoderLayerConfig, LayernormConfig, MLPConfig -from ..quantization_utils import quantize_linear -from ..tensor_utils import get_tensor_parallel_group - - -def _get_hidden_act(act_func): - """Returns the name of the hidden activation functon based on ACT2FN.""" - if isinstance(act_func, str): - return act_func - - for name, func in ACT2FN.items(): - if isinstance(func, tuple): - if isinstance(act_func, func[0]): - return name - elif isinstance(act_func, func): - return name - assert False, f"Cannot find name for {act_func}" - - -class DecoderLayerConfigBuilder(ABC): - """A config builder that translate the LLM decoder layer to the DecoderLayerConfig.""" - - @abstractmethod - def hidden_act_fn(self, layer): - """Returns the hidden act fn in the MLP layer, e.g. SiLUActivation or NewGELUActivation.""" - pass - - @abstractmethod - def infer_num_attention_heads(self, layer): - """Returns the num of attention heads of the layer.""" - pass - - @abstractmethod - def infer_max_position_embeddings(self, layer): - """Returns the max positional embeddings of the layer.""" - pass - - @abstractmethod - def build_input_layernorm(self, layer) -> LayernormConfig: - """Returns the built input layernorm layer.""" - pass - - @abstractmethod - def build_attention(self, layer) -> AttentionConfig: - """Returns the built attention layer.""" - pass - - @abstractmethod - def build_mlp(self, layer) -> MLPConfig: - """Returns the built mlp layer.""" - pass - - @abstractmethod - def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: - """Returns the built post layernorm.""" - pass - - def __init__( - self, decoder_type: str, dtype: trt.DataType = trt.float16, rank: int = 0, tensor_parallel: int = 1, - ): - """Initializes the DecoderLayerConfigBuilder.""" - self.decoder_type = decoder_type - self.dtype = dtype - self.rank = rank - self.tensor_parallel = tensor_parallel - - def build_layer(self, layer) -> DecoderLayerConfig: - """Builds the decoder layer and returns the DecoderLayer.""" - decoder = DecoderLayerConfig() - - decoder.decoder_type = self.decoder_type - decoder.num_attention_heads = self.infer_num_attention_heads(layer) - decoder.num_kv_heads = self.infer_num_kv_heads(layer) - decoder.max_position_embeddings = self.infer_max_position_embeddings(layer) - - decoder.input_layernorm = self.build_input_layernorm(layer) - decoder.attention = self.build_attention(layer) - decoder.post_layernorm = self.build_post_layernorm(layer) - decoder.mlp = self.build_mlp(layer) - decoder.mlp.hidden_act = _get_hidden_act(self.hidden_act_fn(layer)).split("_")[0] - - return decoder - - def infer_num_kv_heads(self, layer): - """Returns the num of key value heads of the layer.""" - return self.infer_num_attention_heads(layer) - - -class DecoderLayerBuilder(ABC): - """An abstracted transformer decoder layer with tensorrt_llm implementation taking DecoderLayerConfig as the input. - - Individual decoder layers are supposed to extend this class and implement the customized - abstracted method. - """ - - @abstractmethod - def build_decoder(self, layer): - """Returns the built decoder layer.""" - pass - - def __init__( - self, - layer: DecoderLayerConfig, - layer_id: int, - num_layers: int, - dtype: trt.DataType = trt.float16, - quantization: str = QUANTIZATION_NONE, - rank: int = 0, - tensor_parallel: int = 1, - ): - """Initializes the DecoderLayer.""" - super().__init__() - assert isinstance(dtype, trt.DataType) - self.layer_id = layer_id - self.num_layers = num_layers - self.dtype = dtype - self.quantization = quantization - self.rank = rank - self.tensor_parallel = tensor_parallel - self.tp_group = get_tensor_parallel_group(tensor_parallel) - - self.hidden_size = layer.hidden_size - self.num_attention_heads = layer.num_attention_heads - self.num_kv_heads = layer.num_kv_heads if layer.num_kv_heads > 0 else layer.num_attention_heads - - assert ( - self.num_attention_heads % self.num_kv_heads - ) == 0, "MQA/GQA requires the number of heads to be divisible by the number of K/V heads." - assert (self.num_kv_heads % self.tensor_parallel) == 0 or (self.tensor_parallel % self.num_kv_heads) == 0, ( - "MQA/GQA requires either the number of K/V heads to be divisible by the number of GPUs" - " OR the number of GPUs to be divisible by the number of K/V heads." - ) - - self.max_position_embeddings = layer.max_position_embeddings - self.hidden_act = layer.mlp.hidden_act - - self.decoder = self.build_decoder(layer) - self.assign_weights(layer) - self.quantize(layer) - - def assign_weights(self, layer: DecoderLayerConfig): - """Assign the weights to the attention tensorrt_llm layer.""" - self.decoder.input_layernorm.weight.value = layer.input_layernorm.weight - if layer.input_layernorm.bias is not None: - self.decoder.input_layernorm.bias.value = layer.input_layernorm.bias - - self.decoder.attention.qkv.weight.value = layer.attention.qkv.weight - if layer.attention.qkv.bias is not None: - self.decoder.attention.qkv.bias.value = layer.attention.qkv.bias - - self.decoder.attention.dense.weight.value = layer.attention.dense.weight - if self.decoder.attention.dense.bias is not None: - self.decoder.attention.dense.bias.value = layer.attention.dense.bias - - if layer.post_layernorm is not None: - self.decoder.post_layernorm.weight.value = layer.post_layernorm.weight - if layer.post_layernorm.bias is not None: - self.decoder.post_layernorm.bias.value = layer.post_layernorm.bias - - self.decoder.mlp.fc.weight.value = layer.mlp.fc.weight - self.decoder.mlp.proj.weight.value = layer.mlp.proj.weight - bias = layer.mlp.fc.bias is not None - if bias: - self.decoder.mlp.fc.bias.value = layer.mlp.fc.bias - self.decoder.mlp.proj.bias.value = layer.mlp.proj.bias - - if layer.mlp.gate: - self.decoder.mlp.gate.weight.value = layer.mlp.gate.weight - if bias: - self.decoder.mlp.gate.bias.value = layer.mlp.gate.bias - - def quantize(self, layer: DecoderLayerConfig): - """Quantizes the decoder layer based on the layer config.""" - self.decoder.attention.qkv = quantize_linear( - self.decoder.attention.qkv, self.quantization, layer.attention.qkv - ) - self.decoder.attention.dense = quantize_linear( - self.decoder.attention.dense, self.quantization, layer.attention.dense - ) - self.decoder.mlp.fc = quantize_linear(self.decoder.mlp.fc, self.quantization, layer.mlp.fc) - self.decoder.mlp.proj = quantize_linear(self.decoder.mlp.proj, self.quantization, layer.mlp.proj) - - if hasattr(self.decoder.mlp, "gate"): - self.decoder.mlp.gate = quantize_linear(self.decoder.mlp.gate, self.quantization, layer.mlp.gate) diff --git a/nemo/export/trt_llm/decoder/gpt.py b/nemo/export/trt_llm/decoder/gpt.py deleted file mode 100644 index 531e9deebcca..000000000000 --- a/nemo/export/trt_llm/decoder/gpt.py +++ /dev/null @@ -1,99 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The GPT2 decoder implementation.""" - -from typing import Optional - -from tensorrt_llm.layers import AttentionMaskType, PositionEmbeddingType -from tensorrt_llm.models.gpt.model import GPTDecoderLayer -from typing_extensions import override - -from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig -from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder - - -class GPTDecoderLayerConfigBuilder(DecoderLayerConfigBuilder): - """The GPT2 implementation of the DecoderLayerConfigBuilder.""" - - @override - def hidden_act_fn(self, layer): - return layer.mlp.act - - @override - def infer_num_attention_heads(self, layer): - return layer.attn.num_heads - - @override - def infer_max_position_embeddings(self, layer): - return layer.attn.bias.shape[2] - - @override - def build_input_layernorm(self, layer) -> LayernormConfig: - return LayernormConfig.from_nn_module(layer.ln_1, dtype=self.dtype) - - @override - def build_attention(self, layer) -> AttentionConfig: - config = AttentionConfig() - config.qkv = LinearConfig.from_qkv_nn_modules( - [layer.attn.c_attn], rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - config.dense = LinearConfig.from_nn_module( - layer.attn.c_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - return config - - @override - def build_mlp(self, layer) -> MLPConfig: - config = MLPConfig() - config.fc = LinearConfig.from_nn_module( - layer.mlp.c_fc, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - config.proj = LinearConfig.from_nn_module( - layer.mlp.c_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - return config - - @override - def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: - return LayernormConfig.from_nn_module(layer.ln_2, dtype=self.dtype) - - -class GPTDecoderLayerBuilder(DecoderLayerBuilder): - """The GPT implementation of the DecoderLayer.""" - - @override - def build_decoder(self, layer): - rotary_pct = layer.rotary_pct - position_embedding_type = ( - PositionEmbeddingType.learned_absolute if rotary_pct == 0.0 else PositionEmbeddingType.rope_gpt_neox - ) - - bias_qkv = layer.attention.qkv.bias is not None - - return GPTDecoderLayer( - hidden_size=self.hidden_size, - num_attention_heads=self.num_attention_heads, - max_position_embeddings=self.max_position_embeddings, - num_layers=self.num_layers, - dtype=self.dtype, - apply_query_key_layer_scaling=False, - attention_mask_type=AttentionMaskType.causal, - hidden_act=self.hidden_act, - position_embedding_type=position_embedding_type, - rotary_embedding_percentage=rotary_pct, - inter_size=layer.ffn_hidden_size_local * self.tensor_parallel, - bias=bias_qkv, - tp_group=self.tp_group, - tp_size=self.tensor_parallel, - ) diff --git a/nemo/export/trt_llm/decoder/gptj.py b/nemo/export/trt_llm/decoder/gptj.py deleted file mode 100644 index 5edb679523e8..000000000000 --- a/nemo/export/trt_llm/decoder/gptj.py +++ /dev/null @@ -1,94 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The GPTJ decoder implementation.""" - -from typing import Optional - -from tensorrt_llm.models.gptj.model import GPTJDecoderLayer -from typing_extensions import override - -from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig -from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder - - -class GPTJDecoderLayerConfigBuilder(DecoderLayerConfigBuilder): - """The GPTJ implementation of the DecoderLayerConfigBuilder.""" - - @override - def hidden_act_fn(self, layer): - """Returns the hidden act fn in the MLP layer, e.g. SiLUActivation or NewGELUActivation.""" - return layer.mlp.act - - @override - def infer_num_attention_heads(self, layer): - return layer.attn.num_attention_heads - - @override - def infer_max_position_embeddings(self, layer): - return layer.attn.bias.shape[2] - - @override - def build_input_layernorm(self, layer) -> LayernormConfig: - return LayernormConfig.from_nn_module(layer.ln_1, dtype=self.dtype) - - @override - def build_attention(self, layer) -> AttentionConfig: - config = AttentionConfig() - config.qkv = LinearConfig.from_qkv_nn_modules( - [layer.attn.q_proj, layer.attn.k_proj, layer.attn.v_proj], - rank=self.rank, - tensor_parallel=self.tensor_parallel, - dtype=self.dtype, - ) - - config.dense = LinearConfig.from_nn_module( - layer.attn.out_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - config.rotary_dim = layer.attn.rotary_dim - - return config - - @override - def build_mlp(self, layer) -> MLPConfig: - config = MLPConfig() - config.fc = LinearConfig.from_nn_module( - layer.mlp.fc_in, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - config.proj = LinearConfig.from_nn_module( - layer.mlp.fc_out, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - return config - - @override - def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: - # GPTJ do not have post layer_norm - return None - - -class GPTJDecoderLayerBuilder(DecoderLayerBuilder): - """The GPTJ implementation of the DecoderLayer.""" - - @override - def build_decoder(self, layer): - assert self.tensor_parallel == 1 and self.rank == 0, "Only single GPU is supported for GPTJ" - - return GPTJDecoderLayer( - hidden_size=self.hidden_size, - num_attention_heads=self.num_attention_heads, - max_position_embeddings=self.max_position_embeddings, - rotary_dim=layer.attention.rotary_dim, - dtype=self.dtype, - hidden_act=self.hidden_act, - tp_group=self.tp_group, - tp_size=self.tensor_parallel, - ) diff --git a/nemo/export/trt_llm/decoder/llama.py b/nemo/export/trt_llm/decoder/llama.py deleted file mode 100644 index 01d48ba20c8e..000000000000 --- a/nemo/export/trt_llm/decoder/llama.py +++ /dev/null @@ -1,101 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The LLAMA/LLAMA2 decoder implementation.""" - -from typing import Optional - -from tensorrt_llm.functional import non_gated_version -from tensorrt_llm.layers import AttentionMaskType, PositionEmbeddingType -from tensorrt_llm.models.llama.model import LLaMADecoderLayer -from typing_extensions import override - -from ..model_config import LINEAR_COLUMN, LINEAR_ROW, AttentionConfig, LayernormConfig, LinearConfig, MLPConfig -from .decoder import DecoderLayerBuilder, DecoderLayerConfigBuilder - - -class LLAMADecoderLayerConfigBuilder(DecoderLayerConfigBuilder): - """The LLAMA implementation of the DecoderLayerConfigBuilder.""" - - @override - def hidden_act_fn(self, layer): - return layer.mlp.act_fn - - @override - def infer_num_attention_heads(self, layer): - return layer.self_attn.num_heads - - @override - def infer_num_kv_heads(self, layer): - return layer.self_attn.num_key_value_heads - - @override - def infer_max_position_embeddings(self, layer): - return layer.self_attn.max_position_embeddings - - @override - def build_input_layernorm(self, layer) -> LayernormConfig: - return LayernormConfig.from_nn_module(layer.input_layernorm, dtype=self.dtype) - - @override - def build_attention(self, layer) -> AttentionConfig: - config = AttentionConfig() - config.qkv = LinearConfig.from_qkv_nn_modules( - [layer.self_attn.q_proj, layer.self_attn.k_proj, layer.self_attn.v_proj], - rank=self.rank, - tensor_parallel=self.tensor_parallel, - dtype=self.dtype, - ) - - config.dense = LinearConfig.from_nn_module( - layer.self_attn.o_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - return config - - @override - def build_mlp(self, layer) -> MLPConfig: - config = MLPConfig() - config.fc = LinearConfig.from_nn_module( - layer.mlp.gate_proj, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - config.proj = LinearConfig.from_nn_module( - layer.mlp.down_proj, LINEAR_ROW, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - config.gate = LinearConfig.from_nn_module( - layer.mlp.up_proj, LINEAR_COLUMN, rank=self.rank, tensor_parallel=self.tensor_parallel, dtype=self.dtype, - ) - - return config - - @override - def build_post_layernorm(self, layer) -> Optional[LayernormConfig]: - return LayernormConfig.from_nn_module(layer.post_attention_layernorm, dtype=self.dtype) - - -class LLAMADecoderLayerBuilder(DecoderLayerBuilder): - """The LLAMA implementation of the DecoderLayer.""" - - @override - def build_decoder(self, layer): - return LLaMADecoderLayer( - layer_id=self.layer_id, - hidden_size=self.hidden_size, - num_attention_heads=self.num_attention_heads, - num_kv_heads=self.num_kv_heads, - max_position_embeddings=self.max_position_embeddings, - dtype=self.dtype, - attention_mask_type=AttentionMaskType.causal, - hidden_act=non_gated_version(self.hidden_act), - position_embedding_type=PositionEmbeddingType.rope_gpt_neox, - mlp_hidden_size=layer.ffn_hidden_size_local * self.tensor_parallel, - tp_group=self.tp_group, - tp_size=self.tensor_parallel, - ) diff --git a/nemo/export/trt_llm/huggingface_utils.py b/nemo/export/trt_llm/huggingface_utils.py deleted file mode 100644 index 6263183bbefb..000000000000 --- a/nemo/export/trt_llm/huggingface_utils.py +++ /dev/null @@ -1,138 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The APIs to convert a huggingface model to model_config format.""" - -import copy -from typing import List, Tuple - -import numpy as np -import torch.nn as nn -from tensorrt_llm import str_dtype_to_trt -from transformers.models.llama.modeling_llama import LlamaRMSNorm - -from .decoder import build_decoder_layer_config -from .model_config import LINEAR_COLUMN, EmbeddingConfig, LayernormConfig, LinearConfig, ModelConfig -from .tensor_utils import split, torch_to_numpy_with_dtype - - -def _arch_to_decoder_type(arch: str): - arch_to_type = { - "GPT2LMHeadModel": "gpt2", - "GPTJForCausalLM": "gptj", - "LlamaForCausalLM": "llama", - } - return arch_to_type.get(arch, "") - - -def _check_model_compatibility(model: nn.Module) -> Tuple[bool, bool]: - """Returns whether the model is supported with the torch_to_tensorrt_llm API. - - And if positional embedding layer exists. - - We assumes the model to be assembled with one or two embedding layers, - a ModuleList of transformer decoders, - and a final layernorm. - Otherwise it will not be supported. - """ - num_embeddings = 0 - num_module_list = 0 - num_layer_norm = 0 - for module in model.children(): - if type(module) == nn.Embedding: - num_embeddings += 1 - elif type(module) == nn.ModuleList: - num_module_list += 1 - elif type(module) in [nn.LayerNorm, LlamaRMSNorm]: - num_layer_norm += 1 - - return ( - 1 <= num_embeddings and num_embeddings <= 2 and num_module_list == 1 and num_layer_norm == 1, - num_embeddings > 1, - ) - - -def _get_transformer_model(model: nn.Module) -> nn.Module: - """Returns the root module of the transformer model.""" - if hasattr(model, "transformer"): - # This is a LMHead model - return model.transformer - elif hasattr(model, "model"): - # LLAMA - return model.model - return model - - -def torch_to_model_config(model: nn.Module, gpus: int = 1,) -> List[ModelConfig]: - """The API to convert a torch or huggingface model to the ModelConfig format. - - The model has to be an LLM that we support for a successful conversion. - (See examples/deploy/llm/README.md.) - gpus: the number of inference gpus for multi gpu inferencing. - - Returns: - The list of converted ModelConfig, one for each gpu. - """ - transformer = _get_transformer_model(model) - - compatible, has_positional_embedding = _check_model_compatibility(transformer) - assert compatible, f"model {transformer} not supported" - - assert ( - model.config.architectures and len(model.config.architectures) >= 1 - ), f"Huggingface model config {model.config} does not have architectures" - - model_config_template = ModelConfig() - model_config_template.dtype = "float16" - dtype = str_dtype_to_trt(model_config_template.dtype) - - model_config_template.tensor_parallel = gpus - - for name, module in transformer.named_children(): - if type(module) == nn.Embedding: - if name != "wpe": - model_config_template.vocab_embedding = EmbeddingConfig.from_nn_module(module, dtype=dtype) - else: - assert has_positional_embedding - model_config_template.positional_embedding = EmbeddingConfig.from_nn_module(module, dtype=dtype) - if type(module) in [nn.LayerNorm, LlamaRMSNorm]: - model_config_template.final_layernorm = LayernormConfig.from_nn_module(module, dtype=dtype) - - model_configs = [] - for i in range(gpus): - model_configs.append(copy.deepcopy(model_config_template)) - model_configs[i].rank = i - - decoder_type = _arch_to_decoder_type(model.config.architectures[0]) - for name, module in transformer.named_children(): - if type(module) == nn.ModuleList: - for layer in module: - for i in range(gpus): - model_configs[i].layers.append( - build_decoder_layer_config(layer, decoder_type, rank=i, tensor_parallel=gpus, dtype=dtype) - ) - - if hasattr(model, "lm_head"): - lm_head_weight = torch_to_numpy_with_dtype(model.lm_head.weight, dtype=dtype) - else: - # We use wte weights if not provided. - lm_head_weight = model_configs[0].vocab_embedding.weight - - if model_configs[0].vocab_size_padded != model_configs[0].vocab_size: - pad_width = model_configs[0].vocab_size_padded - model_configs[0].vocab_size - lm_head_weight = np.pad(lm_head_weight, ((0, pad_width), (0, 0)), "constant", constant_values=0) - - for i in range(gpus): - model_configs[i].lm_head = LinearConfig(linear_type=LINEAR_COLUMN) - model_configs[i].lm_head.weight = np.ascontiguousarray( - split(lm_head_weight, model_configs[i].tensor_parallel, model_configs[i].rank) - ) - - return model_configs diff --git a/nemo/export/trt_llm/model_config.py b/nemo/export/trt_llm/model_config.py deleted file mode 100644 index b9515dd162a7..000000000000 --- a/nemo/export/trt_llm/model_config.py +++ /dev/null @@ -1,415 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""This module defines the model_config format. - -This format can be converted from huggingface, nemo or ammo quantized model. -And we will build tensorrt_llm engine from the context saved with this format. -""" - -import dataclasses -from dataclasses import dataclass, field -from typing import Dict, List, get_args, get_origin - -import numpy as np -import tensorrt as trt -import torch.nn as nn -from tensorrt_llm._utils import pad_vocab_size -from tensorrt_llm.functional import is_gated_activation -from transformers import LlamaConfig, PretrainedConfig -from transformers.models.llama.modeling_llama import LlamaRMSNorm - -from .tensor_utils import get_tensor_from_dict, split, torch_to_numpy_with_dtype - -DECODER_GPT2 = "gpt2" -DECODER_GPTJ = "gptj" -DECODER_LLAMA = "llama" -DECODER_GPTNEXT = "gptnext" - -QUANTIZATION_NONE = "" -QUANTIZATION_FP8 = "fp8" -QUANTIZATION_INT8_SQ = "int8_sq" - -LINEAR_COLUMN = "column" -LINEAR_ROW = "row" - -LAYERNORM_DEFAULT = "" -LAYERNORM_RMS = "rms" - -LAYER_DEFAULT = "" -LAYER_QKV = "qkv" - - -@dataclass -class EmbeddingConfig: - """The embedding layer config.""" - - weight: np.array = None - # Whether the embedding weights are local - is_local: bool = False - - @staticmethod - def from_nn_module(module: nn.Module, dtype=trt.float16): - """Converts an nn.Module to an EmbeddingConfig.""" - return EmbeddingConfig(weight=torch_to_numpy_with_dtype(module.weight, dtype)) - - @property - def local_vocab_size(self): - """Infers the vocab_size from the embedding layer weights shape.""" - return self.weight.shape[0] - - @property - def hidden_size(self): - """Infers the hidden_size from the embedding layer weights shape.""" - return self.weight.shape[1] - - -@dataclass -class LayernormConfig: - """The layernorm layer config.""" - - weight: np.array = None - bias: np.array = None - layernorm_type: str = LAYERNORM_DEFAULT - - @staticmethod - def from_nn_module(module: nn.Module, dtype=trt.float16): - """Converts an nn.Module to an LayernormConfig.""" - layernorm_type = LAYERNORM_RMS if type(module) == LlamaRMSNorm else LAYERNORM_DEFAULT - - config = LayernormConfig(weight=torch_to_numpy_with_dtype(module.weight, dtype), layernorm_type=layernorm_type) - if layernorm_type == LAYERNORM_DEFAULT: - config.bias = torch_to_numpy_with_dtype(module.bias, dtype) - - return config - - -@dataclass -class LinearConfig: - """The linear layer config.""" - - linear_type: str = "" - weight: np.array = None - bias: np.array = None - activation_scaling_factor: np.array = None - weights_scaling_factor: np.array = None - prequant_scaling_factor: np.array = None - layer_type: str = LAYER_DEFAULT - - @staticmethod - def from_nn_module(module: nn.Module, linear_type: str, rank=0, tensor_parallel=1, dtype=trt.float16): - """Converts an nn.Module to an LinearConfig.""" - weight = torch_to_numpy_with_dtype(module.weight, dtype) - if "Conv1D" in type(module).__name__: - weight = weight.transpose() - else: - assert type(module) == nn.Linear - - config = LinearConfig() - config.linear_type = linear_type - config.weight = np.ascontiguousarray( - split(weight, tensor_parallel, rank, dim=0 if linear_type == LINEAR_COLUMN else 1) - ) - - if hasattr(module, "bias") and module.bias is not None: - if linear_type == LINEAR_COLUMN: - config.bias = np.ascontiguousarray( - split(torch_to_numpy_with_dtype(module.bias, dtype), tensor_parallel, rank,) - ) - else: - config.bias = torch_to_numpy_with_dtype(module.bias, dtype) - - return config - - @staticmethod - def from_qkv_nn_modules(qkv_modules: List[nn.Module], rank=0, tensor_parallel=1, dtype=trt.float16): - """Converts the qkv modules to an LinearConfig.""" - config = LinearConfig() - config.linear_type = LINEAR_COLUMN - config.layer_type = LAYER_QKV - if len(qkv_modules) == 1: - # QKV layers combined as a single module, e.g. GPT2 - qkv_module = qkv_modules[0] - assert "Conv1D" in type(qkv_module).__name__ - - qkv_shape = qkv_module.weight.shape - # Decode the concat QKV weights and split them to different GPU rank. - config.weight = np.ascontiguousarray( - split( - torch_to_numpy_with_dtype(qkv_module.weight, dtype=dtype).reshape( - qkv_shape[0], 3, qkv_shape[-1] // 3 - ), - tensor_parallel, - rank, - dim=-1, - ) - .reshape(qkv_shape[0], -1) - .transpose() - ) - config.bias = np.ascontiguousarray( - split( - torch_to_numpy_with_dtype(qkv_module.bias, dtype=dtype).reshape(3, qkv_shape[-1] // 3), - tensor_parallel, - rank, - dim=-1, - ).reshape(-1) - ) - - elif len(qkv_modules) == 3: - # Separate QKV layers - for m in qkv_modules: - assert type(m) == nn.Linear - assert not (hasattr(m, "bias") and m.bias is not None) - - q_weight = split(torch_to_numpy_with_dtype(qkv_modules[0].weight), tensor_parallel, rank) - k_weight = split(torch_to_numpy_with_dtype(qkv_modules[1].weight), tensor_parallel, rank) - v_weight = split(torch_to_numpy_with_dtype(qkv_modules[2].weight), tensor_parallel, rank) - split_v = np.concatenate((q_weight, k_weight, v_weight)) - config.weight = np.ascontiguousarray(split_v) - - else: - assert False, f"QKV modules format {qkv_modules} not supported" - - return config - - -@dataclass -class AttentionConfig: - """The attention layer config.""" - - qkv: LinearConfig = None - dense: LinearConfig = None - - rotary_dim: int = -np.inf - - @staticmethod - def from_nemo( - weights_dict: Dict[str, np.ndarray], layer_id: int, rank: int = 0, - ): - """Converts the nemo weights and config to `AttentionConfig`.""" - attention = AttentionConfig() - attention.qkv = LinearConfig(linear_type=LINEAR_COLUMN, layer_type=LAYER_QKV) - attention.qkv.weight = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.attention.query_key_value.weight.{rank}" - ) - attention.qkv.bias = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.attention.query_key_value.bias.{rank}", - ) - - attention.dense = LinearConfig(linear_type=LINEAR_ROW) - attention.dense.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.attention.dense.weight.{rank}") - attention.dense.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.attention.dense.bias",) - return attention - - -@dataclass -class MLPConfig: - """The MLP layer config.""" - - fc: LinearConfig = None - gate: LinearConfig = None - proj: LinearConfig = None - hidden_act: str = "" - - @staticmethod - def from_nemo( - weights_dict: Dict[str, np.ndarray], - llm_config: PretrainedConfig, - layer_id: int, - rank: int = 0, - is_mcore: bool = False, - ): - """Converts the nemo weights and config to `MLPConfig`.""" - mlp = MLPConfig(hidden_act=llm_config.activation_function) - mlp.fc = LinearConfig(linear_type=LINEAR_COLUMN) - mlp.fc.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.weight.{rank}") - mlp.fc.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.bias.{rank}",) - - gated = is_gated_activation(mlp.hidden_act) - is_fast_glu = mlp.hidden_act in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] - if gated: - mlp.gate = LinearConfig(linear_type=LINEAR_COLUMN) - layer_name = ( - f"layers.{layer_id}.mlp.dense_h_to_4h_2.weight.{rank}" - if isinstance(llm_config, LlamaConfig) and not is_mcore and not is_fast_glu - else f"layers.{layer_id}.mlp.dense_h_to_4h.gate.weight.{rank}" - ) - mlp.gate.weight = get_tensor_from_dict(weights_dict, layer_name,) - mlp.gate.bias = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.mlp.dense_h_to_4h.gate.bias.{rank}", - ) - - mlp.proj = LinearConfig(linear_type=LINEAR_ROW) - mlp.proj.weight = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_4h_to_h.weight.{rank}") - mlp.proj.bias = get_tensor_from_dict(weights_dict, f"layers.{layer_id}.mlp.dense_4h_to_h.bias") - return mlp - - -@dataclass -class DecoderLayerConfig: - """The decoder layer config.""" - - decoder_type: str = "" - input_layernorm: LayernormConfig = None - attention: AttentionConfig = None - post_layernorm: LayernormConfig = None - mlp: MLPConfig = None - - num_attention_heads: int = 0 - - num_kv_heads: int = 0 - max_position_embeddings: int = 0 - rotary_pct: float = 0 - - @property - def hidden_size(self): - """Returns the hidden size of the transformer model.""" - return self.mlp.fc.weight.shape[1] - - @property - def ffn_hidden_size_local(self): - """Returns the ffn hidden size of the transformer model.""" - return self.mlp.fc.weight.shape[0] - - @staticmethod - def from_nemo( - weights_dict: Dict[str, np.ndarray], - llm_config: PretrainedConfig, - decoder_type: str, - layer_id: int, - rank: int = 0, - is_mcore: bool = False, - ): - """Converts the nemo weights and config to `DecoderLayerConfig`.""" - layer_config = DecoderLayerConfig( - decoder_type=decoder_type, - num_attention_heads=llm_config.n_head, - max_position_embeddings=llm_config.n_positions, - rotary_pct=llm_config.rotary_pct if hasattr(llm_config, "rotary_pct") else 0, - num_kv_heads=(llm_config.num_kv_heads if hasattr(llm_config, "num_kv_heads") else 0), - ) - layer_config.input_layernorm = LayernormConfig() - layer_config.input_layernorm.layernorm_type = ( - LAYERNORM_RMS if isinstance(llm_config, LlamaConfig) else LAYERNORM_DEFAULT - ) - layer_config.input_layernorm.weight = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.input_layernorm.weight", - ) - layer_config.input_layernorm.bias = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.input_layernorm.bias", - ) - layer_config.post_layernorm = LayernormConfig() - layer_config.post_layernorm.layernorm_type = ( - LAYERNORM_RMS if isinstance(llm_config, LlamaConfig) else LAYERNORM_DEFAULT - ) - - layer_config.post_layernorm.weight = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.post_attention_layernorm.weight", - ) - layer_config.post_layernorm.bias = get_tensor_from_dict( - weights_dict, f"layers.{layer_id}.post_attention_layernorm.bias", - ) - - layer_config.attention = AttentionConfig.from_nemo(weights_dict, layer_id, rank,) - layer_config.mlp = MLPConfig.from_nemo(weights_dict, llm_config, layer_id, rank, is_mcore) - - return layer_config - - -def _from_dict(class_type, data): - """Helper function to load the data as a class_type. class_type must be a dataclass.""" - if data is None: - return None - - if dataclasses.is_dataclass(class_type): - fieldtypes = {f.name: f.type for f in dataclasses.fields(class_type)} - return class_type(**{f: _from_dict(fieldtypes[f], data[f]) for f in data}) - elif get_origin(class_type) == list and dataclasses.is_dataclass(get_args(class_type)[0]): - list_value = [] - for child in data: - child_class_type = get_args(class_type)[0] - list_value.append(_from_dict(child_class_type, child)) - return list_value - else: - return data - - -@dataclass -class ModelConfig: - """The full LLM model config that includes the full information needed for tensorrt_llm engine building. - - This class includes all the fields that tensorrt_llm supports, but not all of the fields are required. - """ - - # Global metadata - quantization: str = QUANTIZATION_NONE - dtype: str = "float16" - - # Parallel metadata - rank: int = 0 - tensor_parallel: int = 1 - - # Model structure and weights - vocab_embedding: EmbeddingConfig = None - positional_embedding: EmbeddingConfig = None - layers: List[DecoderLayerConfig] = field(default_factory=list) - final_layernorm: LayernormConfig = None - lm_head: LinearConfig = None - - # Ptuning metadata - use_prompt_tuning: bool = False - - def to_dict(self) -> dict: - """Converts the instance to a python dict.""" - return dataclasses.asdict(self) - - @staticmethod - def from_dict(d: dict): - """Load a dict to a `ModelConfig` instance.""" - return _from_dict(ModelConfig, d) - - @property - def vocab_size(self): - """Returns the vocab_size of the model.""" - return ( - self.vocab_embedding.local_vocab_size * self.tensor_parallel - if self.vocab_embedding.is_local - else self.vocab_embedding.local_vocab_size - ) - - @property - def vocab_size_padded(self): - """Returns the padded vocab_size of the model rounds to the tensor_parallel.""" - return pad_vocab_size(self.vocab_size, self.tensor_parallel) - - @property - def hidden_size(self): - """Returns the hidden_size of the model.""" - return self.vocab_embedding.hidden_size - - @property - def max_position_embeddings(self): - """Returns the max_position_embedding of the model.""" - return self.layers[0].max_position_embeddings - - @property - def num_attention_heads(self): - """Returns the num_attention_heads of the model.""" - return self.layers[0].num_attention_heads - - @property - def num_kv_heads(self): - """Returns the num_key_value_heads of the model.""" - return self.layers[0].num_kv_heads if self.layers[0].num_kv_heads > 0 else self.num_attention_heads - - @property - def hidden_act(self): - """Returns the hidden_act of the model.""" - return self.layers[0].mlp.hidden_act diff --git a/nemo/export/trt_llm/model_config_trt.py b/nemo/export/trt_llm/model_config_trt.py deleted file mode 100644 index 45049370c2c4..000000000000 --- a/nemo/export/trt_llm/model_config_trt.py +++ /dev/null @@ -1,65 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The API convert the model_config format to tensorrt_llm.""" - -import os -import shutil -from pathlib import Path -from typing import List, Union - -import psutil - -from .model_config import ModelConfig -from .tensorrt_llm_model import LMHeadModelBuilder - - -def model_config_to_tensorrt_llm( - model_configs: List[ModelConfig], - engine_dir: Union[str, Path], - gpus: int = 1, - max_input_len: int = 200, - max_output_len: int = 200, - max_batch_size: int = 1, - max_beam_width: int = 1, - max_prompt_embedding_table_size: int = 100, -): - """The API to convert a torch or huggingface model represented as ModelConfig to tensorrt_llm. - - Args: - model_configs: The list of ModelConfig converted, 1 for each GPU. - engine_dir: The target output directory to save the built tensorrt_llm engines. - gpus: the number of inference gpus for multi gpu inferencing. - max_input_len: The max input sequence length. - max_output_len: The max output sequence length. - max_batch_size: The max batch size. - max_beam_width: The max beam search width. - """ - engine_dir = Path(engine_dir) - if os.path.exists(engine_dir): - shutil.rmtree(engine_dir) - - print("Before engine building, CPU RAM Used (GB):" f" {psutil.Process().memory_info().rss / 1024 / 1024 / 1024}") - for rank in range(gpus): - model_configs[rank].use_prompt_tuning = max_prompt_embedding_table_size > 0 - builder = LMHeadModelBuilder(model_configs[rank]) - builder.build( - output_dir=engine_dir, - max_input_len=max_input_len, - max_output_len=max_output_len, - max_batch_size=max_batch_size, - max_beam_width=max_beam_width, - parallel_build=False, - max_prompt_embedding_table_size=max_prompt_embedding_table_size, - ) - print( - f"After Engine building rank {rank}, CPU RAM Used (GB):" - f" {psutil.Process().memory_info().rss / 1024 / 1024 / 1024}" - ) diff --git a/nemo/export/trt_llm/model_config_utils.py b/nemo/export/trt_llm/model_config_utils.py deleted file mode 100644 index 9bb9dd12510b..000000000000 --- a/nemo/export/trt_llm/model_config_utils.py +++ /dev/null @@ -1,238 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Utils to load and process model_config.""" - -import copy -import json -from dataclasses import fields, is_dataclass -from pathlib import Path -from typing import List, Union - -import numpy as np - -from .model_config import LAYER_QKV, LINEAR_COLUMN, EmbeddingConfig, LinearConfig, ModelConfig - - -def _restore_model_config(model_config, weights): - """Recursively restores the model_config from json and loads np.ndarray weights from weights.""" - if isinstance(model_config, dict): - for k, v in model_config.items(): - if isinstance(v, str) and v.startswith("_np:"): - model_config[k] = weights[v] - else: - _restore_model_config(v, weights) - if isinstance(model_config, list): - for i, v in enumerate(model_config): - if isinstance(v, str) and v.startswith("_np:"): - model_config[i] = weights[v] - else: - _restore_model_config(v, weights) - - -def load_model_configs(model_config_json: Union[str, Path], inference_tensor_parallel: int = 1) -> List[ModelConfig]: - """Loads the model_config saved from ammo export. - - Args: - model_config_json: The exported json file from ammo describing the optimized model. - Inside the same directory, each gpu rank will have its own npz file. - The json file represents the general ModelConfig structure while the detailed - weights for each rank are stored in the npz file. - - Returns: - The list of `ModelConfig` loaded and constructed. - """ - model_config_json = Path(model_config_json) - assert model_config_json.exists() - - with open(model_config_json, "r") as f: - model_config_template = json.load(f) - - tensor_parallel = model_config_template["tensor_parallel"] - assert tensor_parallel > 0, f"Invalid tensor_parallel {tensor_parallel}" - - model_config_dir = model_config_json.parents[0] - - model_configs = [] - for i in range(tensor_parallel): - decoder_type = model_config_template["layers"][0]["decoder_type"] - weights_file = f"{decoder_type}_tp{tensor_parallel}_rank{i}.npz" - weights = dict(np.load(model_config_dir / weights_file)) - model_config = copy.deepcopy(model_config_template) - model_config["rank"] = i - _restore_model_config(model_config, weights) - model_configs.append(ModelConfig.from_dict(model_config)) - - model_configs = _postprocess_model_configs(model_configs, inference_tensor_parallel=inference_tensor_parallel) - - return model_configs - - -def _same_array(arrays: List[np.ndarray]): - return all(np.array_equal(arrays[0], array) for array in arrays[1:]) - - -def _merge_model_configs_to_first(configs): - """This method merges the tensor fields for linear config so the config can be used with fewer GPUs. - - The implementation is recursive. - """ - merged_config = configs[0] - - if isinstance(merged_config, EmbeddingConfig): - if merged_config.is_local: - merged_config.weight = np.ascontiguousarray(np.concatenate([config.weight for config in configs], axis=0)) - - elif isinstance(merged_config, LinearConfig): - # The scaling factors merge rule is summarized as below: - - # S: all ranks should have the same scaling factor. - # M: Pick elementwise max among the ranks. Merged shape same as single rank. - # C: Concat the scaling factors on dim 0. Merged shape == tensor_parallel * original shape. - # RC: Reshape and concat. This is for QKV handling only. Merged shape == tensor_parallel * original shape. - # NA: Not valid / present - - # ws: weight scaling factor - # as: activation scaling factor - # ps: prequant scaling factor - - # C: Colum Linear - # R: Row Linear - # Q: QKV layer - - # F: FP8 - # I: INT8 SQ - - # Merge Rules: - # ws as ps - # FQ M M NA - # FC M M NA - # FR M M NA - # IQ RC M S - # IC C M S - # IR M M C - - # Handling constants - for field_name in ["activation_scaling_factor", "weights_scaling_factor"]: - merged_field_value = getattr(merged_config, field_name) - if merged_field_value is not None and merged_field_value.size == 1: - # Scaling factor is a scalar. - setattr( - merged_config, field_name, np.maximum.reduce([getattr(config, field_name) for config in configs]), - ) - - if merged_config.layer_type == LAYER_QKV: - assert merged_config.linear_type == LINEAR_COLUMN - out_dim = merged_config.weight.shape[0] - new_out_dim = out_dim * len(configs) - in_dim = merged_config.weight.shape[1] - # For QKV weights, the QKV dim should be the out most dim. - merged_config.weight = np.ascontiguousarray( - np.concatenate( - [config.weight.reshape(3, out_dim * in_dim // 3) for config in configs], axis=1 - ).reshape(new_out_dim, in_dim) - ) - for field_name in ["bias", "weights_scaling_factor"]: - merged_field_value = getattr(merged_config, field_name) - if merged_field_value is not None: - if merged_field_value.shape[0] == out_dim: - field_values = [getattr(config, field_name) for config in configs] - setattr( - merged_config, - field_name, - np.ascontiguousarray( - np.concatenate( - [field_value.reshape(3, out_dim // 3) for field_value in field_values], axis=1, - ).reshape(new_out_dim) - ), - ) - - # No op for prequant_scaling_factor - assert _same_array( - [config.prequant_scaling_factor for config in configs] - ), f"Failed to merge config {merged_config} with others" - - else: - # For normal linear layers, we merge column linear on the dim 0 and row on the dim 1 - merge_axis = 0 if merged_config.linear_type == LINEAR_COLUMN else 1 - merged_config.weight = np.ascontiguousarray( - np.concatenate([config.weight for config in configs], axis=merge_axis) - ) - - # Only cat the bias for column linear. - if merged_config.linear_type == LINEAR_COLUMN and merged_config.bias is not None: - merged_config.bias = np.ascontiguousarray(np.concatenate([config.bias for config in configs], axis=0)) - - if merged_config.linear_type == LINEAR_COLUMN: - if merged_config.weights_scaling_factor is not None and merged_config.weights_scaling_factor.size != 1: - # INT8 sq case - merged_config.weights_scaling_factor = np.ascontiguousarray( - np.concatenate([config.weights_scaling_factor for config in configs], axis=0) - ) - if merged_config.prequant_scaling_factor is not None: - assert _same_array( - [config.prequant_scaling_factor for config in configs] - ), f"Failed to merge config {merged_config} with others" - else: - if merged_config.weights_scaling_factor is not None: - merged_config.weights_scaling_factor = np.maximum.reduce( - [config.weights_scaling_factor for config in configs] - ) - if merged_config.prequant_scaling_factor is not None: - merged_config.prequant_scaling_factor = np.ascontiguousarray( - np.concatenate([config.prequant_scaling_factor for config in configs], axis=0) - ) - - elif is_dataclass(merged_config): - for field in fields(merged_config): - _merge_model_configs_to_first([getattr(config, field.name) for config in configs]) - elif isinstance(merged_config, list): - for i in range(len(merged_config)): - _merge_model_configs_to_first([config[i] for config in configs]) - - -def _merge_embedding(model_configs: List[ModelConfig]): - """Merges and replicates the embedding weights to all configs.""" - for embedding_name in ["vocab_embedding", "positional_embedding"]: - embedding_0 = getattr(model_configs[0], embedding_name) - if embedding_0 and embedding_0.is_local: - weights = [getattr(config, embedding_name).weight for config in model_configs] - merged_weight = np.ascontiguousarray(np.concatenate(weights, axis=0)) - for config in model_configs: - getattr(config, embedding_name).weight = merged_weight - getattr(config, embedding_name).is_local = False - - -def _postprocess_model_configs( - model_configs: List[ModelConfig], inference_tensor_parallel: int = 1 -) -> List[ModelConfig]: - """Postprocesses the model configs with trained tensor parallel to target inference tensor parallel.""" - if inference_tensor_parallel < len(model_configs): - # Merge the model_configs to target inferencen tensor parallel. - assert ( - len(model_configs) % inference_tensor_parallel == 0 - ), f"Cannot merge {len(model_configs)} configs to {inference_tensor_parallel}" - - num_configs_per_group = len(model_configs) // inference_tensor_parallel - merged_model_configs = [] - for i in range(inference_tensor_parallel): - model_config_slice = model_configs[i * num_configs_per_group : (i + 1) * num_configs_per_group] - _merge_model_configs_to_first(model_config_slice) - model_config_slice[0].rank = i - model_config_slice[0].tensor_parallel = inference_tensor_parallel - merged_model_configs.append(model_config_slice[0]) - else: - merged_model_configs = model_configs - - # So far we do not support parallel embedding layers yet. - # We will merge the local embedding weights and replicate it to all ranks for now. - _merge_embedding(merged_model_configs) - - return merged_model_configs diff --git a/nemo/export/trt_llm/nemo/convert.py b/nemo/export/trt_llm/nemo/convert.py deleted file mode 100644 index 6f4f02013d1d..000000000000 --- a/nemo/export/trt_llm/nemo/convert.py +++ /dev/null @@ -1,343 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Utilities for exporting a model to our custom format.""" - -import numpy as np -import torch -from tensorrt_llm._utils import torch_to_numpy - -# AMMO modification -# A global dicts to store exported weights. -# This is set to be a global variable to avoid extra code modification from tensorrt_llm. -weights_dict = {} - - -def cpu_map_location(storage, loc): - return storage.cpu() - - -def gpu_map_location(storage, loc): - if loc.startswith("cuda"): - training_gpu_idx = int(loc.split(":")[1]) - inference_gpu_idx = training_gpu_idx % torch.cuda.device_count() - return storage.cuda(inference_gpu_idx) - elif loc.startswith("cpu"): - return storage.cpu() - else: - raise ValueError(f"Not handled {loc}") - - -def save_val(val, dir, key, tp_num=None): - suffix = "bin" if tp_num is None else f"{tp_num}.bin" - # AMMO modification, save to in-memory dict instead of dir. - # Transpose linear layer weights to the correct shape. - if len(val.shape) >= 2: - val = np.ascontiguousarray(np.transpose(val.reshape(val.shape[0], -1), [1, 0])) - global weights_dict - weights_dict[f"model.{key}.{suffix}"] = val - - -def save_split(split_vals, dir, key, i, split_factor): - for j, val in enumerate(split_vals): - save_val(val, dir, key, i * split_factor + j) - - -def generate_int8(weights, act_range, is_qkv=False, multi_query_mode=False): - """This function has two purposes: - - compute quantized weights, scaled either per-tensor or per-column - - compute scaling factors. - - Depending on the GEMM API (CUTLASS/CUBLAS) the required scaling factors differ. - CUTLASS uses two sets of scaling factors. One for the activation X, one for the weight W. - CUBLAS only has one (we can't do per-row scaling). So we must provide pre-multiplied scaling factor. - - Here is the list of what we need (T means per-tensor, C per-column): - - scale_x_orig_quant puts fp activation into the quantized range (i.e. [-128, 127], for int8). - Used before the GEMM. (T) - - scale_y_quant_orig puts quantized activation into the fp range. Used if the GEMM outputs int8. (T) - - scale_w_quant_orig puts weights from quant range to fp range (used with CUTLASS) (T, C) - - scale_y_accum_quant puts the GEMM result (XW) from accumulation range (int32) - to quant range (int8) (used for CUBLAS) (T, C) - - Note that we don't do anything special about row-parallel GEMM. - Theoretically, we could have per-GPU scaling factors too, - but then the model would change depending on the number of GPUs used. - - For QKV projection, the behavior is special. Even if we have a single matrix to perform QKV projection, - we consider it - as three different matrices: Q, K, and V. So per-tensor actually means one scaling factor for each Q, K and V. - """ - # compute weight scaling factors for fp->int8 and int8->fp - if is_qkv and not multi_query_mode: - scale_w_orig_quant_t = 127.0 / act_range["w"].reshape(3, -1).max(dim=-1, keepdims=True)[0].cpu().numpy() - scale_w_orig_quant_c = 127.0 / act_range["w"].reshape(3, -1).cpu().numpy() - elif is_qkv and multi_query_mode: - raise ValueError("Multi-query w/ int8 quant has not been supported yet") - else: - scale_w_orig_quant_t = 127.0 / act_range["w"].max().cpu().numpy() - scale_w_orig_quant_c = 127.0 / act_range["w"].cpu().numpy() - scale_w_quant_orig_t = 1.0 / scale_w_orig_quant_t - scale_w_quant_orig_c = 1.0 / scale_w_orig_quant_c - - # compute the rest of needed scaling factors - scale_x_orig_quant_t = np.array(127.0 / act_range["x"].max().item()) - scale_y_orig_quant_t = np.array(127.0 / act_range["y"].max().item()) - scale_y_quant_orig_t = np.array(act_range["y"].max().item() / 127.0) - scale_y_accum_quant_t = scale_y_orig_quant_t / (scale_x_orig_quant_t * scale_w_orig_quant_t) - scale_y_accum_quant_c = scale_y_orig_quant_t / (scale_x_orig_quant_t * scale_w_orig_quant_c) - if is_qkv: - scale_y_accum_quant_t = np.broadcast_to(scale_y_accum_quant_t, scale_w_orig_quant_c.shape) - scale_w_quant_orig_t = np.broadcast_to(scale_w_quant_orig_t, scale_w_orig_quant_c.shape) - - def to_i8(x): - return x.round().clip(-127, 127).astype(np.int8) - - return { - "weight.int8": to_i8(weights * scale_w_orig_quant_t), - "weight.int8.col": to_i8(weights * scale_w_orig_quant_c), - "scale_x_orig_quant": scale_x_orig_quant_t.astype(np.float32), - "scale_w_quant_orig": scale_w_quant_orig_t.astype(np.float32), - "scale_w_quant_orig.col": scale_w_quant_orig_c.astype(np.float32), - "scale_y_accum_quant": scale_y_accum_quant_t.astype(np.float32), - "scale_y_accum_quant.col": scale_y_accum_quant_c.astype(np.float32), - "scale_y_quant_orig": scale_y_quant_orig_t.astype(np.float32), - } - - -def write_int8(vals, dir, base_key, split_dim, tp_rank, split_factor, kv_cache_only=False): - if not kv_cache_only: - save_split( - np.split(vals["weight.int8"], split_factor, axis=split_dim), - dir, - f"{base_key}.weight.int8", - tp_rank, - split_factor, - ) - save_split( - np.split(vals["weight.int8.col"], split_factor, axis=split_dim), - dir, - f"{base_key}.weight.int8.col", - tp_rank, - split_factor, - ) - - saved_keys_once = ["scale_y_quant_orig"] - if not kv_cache_only: - saved_keys_once += ["scale_x_orig_quant", "scale_w_quant_orig", "scale_y_accum_quant"] - # per-column scaling factors are loaded per-gpu for ColumnParallel GEMMs (QKV, FC1) - if not kv_cache_only: - if split_dim == -1: - save_split( - np.split(vals["scale_w_quant_orig.col"], split_factor, axis=split_dim), - dir, - f"{base_key}.scale_w_quant_orig.col", - tp_rank, - split_factor, - ) - save_split( - np.split(vals["scale_y_accum_quant.col"], split_factor, axis=split_dim), - dir, - f"{base_key}.scale_y_accum_quant.col", - tp_rank, - split_factor, - ) - else: - saved_keys_once += ["scale_w_quant_orig.col", "scale_y_accum_quant.col"] - - if tp_rank == 0: - for save_key in saved_keys_once: - save_val(vals[save_key], dir, f"{base_key}.{save_key}") - - -# Note: in multi_query_mode, only query heads are split between multiple GPUs, while key/value head -# are not split as there is only one head per key/value. -@torch.no_grad() -def split_and_save_weight(tp_rank, saved_dir, split_factor, key, vals, storage_type, act_range, config): - use_attention_nemo_shape = config.get("use_attention_nemo_shape", False) - split_gated_activation = config.get("split_gated_activation", False) - num_attention_heads = config.get("num_attention_heads", 0) - tp_size = config.get("tp_size", 1) - int8_outputs = config.get("int8_outputs", None) - multi_query_mode = config.get("multi_query_mode", False) - local_dim = config.get("local_dim", None) - num_kv_heads = config.get("num_kv_heads", num_attention_heads) - - save_int8 = int8_outputs == "all" or int8_outputs == "kv_cache_only" - - if not isinstance(vals, list): - vals = [vals] - - if config.get("transpose_weights", False) and vals[0].ndim == 2: - vals = [val.T for val in vals] - if "layernorm.weight" in key and config.get("apply_layernorm_1p", False): - vals = [val + 1.0 for val in vals] - vals = [torch_to_numpy(val.cpu().to(storage_type)) for val in vals] - - if ( - "input_layernorm.weight" in key - or "input_layernorm.bias" in key - or "attention.dense.bias" in key - or "post_attention_layernorm.weight" in key - or "post_attention_layernorm.bias" in key - or "post_self_attn_layernorm.weight" in key - or "mlp.dense_4h_to_h.bias" in key - or "final_layernorm.weight" in key - or "final_layernorm.bias" in key - ): - # shared weights, only need to convert the weights of rank 0 - if "post_self_attn_layernorm.weight" in key: - key = key.replace("post_self_attn_layernorm.weight", "post_attention_layernorm.weight") - if tp_rank == 0: - save_val(vals[0], saved_dir, key) - - elif ( - "attention.dense.weight" in key - or "mlp.dense_4h_to_h.weight" in key - or "attention.linear_proj.weight" in key - or "mlp.linear_fc2.weight" in key - ): - cat_dim = 0 - val = np.concatenate(vals, axis=cat_dim) - split_vals = np.split(val, split_factor, axis=cat_dim) - if "attention.linear_proj.weight" in key: - key = key.replace("attention.linear_proj.weight", "attention.dense.weight") - elif "mlp.linear_fc2.weight" in key: - key = key.replace("mlp.linear_fc2.weight", "mlp.dense_4h_to_h.weight") - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - if act_range is not None and int8_outputs == "all": - base_key = key.replace(".weight", "") - vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) - write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) - - elif "mlp.dense_h_to_4h.weight" in key or "mlp.dense_h_to_4h.bias" in key or "mlp.linear_fc1.weight" in key: - if split_gated_activation: - splits = [np.split(val, 2, axis=-1) for val in vals] - vals, gates = list(zip(*splits)) - cat_dim = -1 - val = np.concatenate(vals, axis=cat_dim) - split_vals = np.split(val, split_factor, axis=cat_dim) - if "mlp.linear_fc1.weight" in key: - key = key.replace("mlp.linear_fc1.weight", "mlp.dense_h_to_4h.weight") - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - if act_range is not None and int8_outputs == "all": - base_key = key.replace(".weight", "") - vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) - write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) - - if split_gated_activation: - assert not save_int8 - prefix, dot, suffix = key.rpartition(".") - key = prefix + ".gate" + dot + suffix - - gate = np.concatenate(gates, axis=cat_dim) - split_vals = np.split(gate, split_factor, axis=cat_dim) - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - - # Ammo modification - elif "mlp.dense_h_to_4h_2.weight" in key or "mlp.dense_h_to_4h_2.bias" in key: - cat_dim = -1 - val = np.concatenate(vals, axis=cat_dim) - split_vals = np.split(val, split_factor, axis=cat_dim) - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - if act_range is not None and int8_outputs == "all": - base_key = key.replace(".weight", "") - vals_i8 = generate_int8(val, act_range, multi_query_mode=multi_query_mode) - write_int8(vals_i8, saved_dir, base_key, cat_dim, tp_rank, split_factor) - - elif "attention.query_key_value.bias" in key: - assert ( - num_attention_heads == num_kv_heads or multi_query_mode - ), "QKV bias is not supported for group query attention" - if local_dim is None: - local_dim = vals[0].shape[-1] // 3 - - if multi_query_mode: - val = vals[0] - # out_feature = local_dim + 2 * head_size; assumes local_dim equals to hidden_dim - b_q, b_kv = np.split(val, [local_dim], axis=-1) - b_q_split = np.split(b_q, split_factor, axis=-1) - split_vals = [np.concatenate((i, b_kv), axis=-1) for i in b_q_split] - else: - if use_attention_nemo_shape: - head_num = num_attention_heads // tp_size - size_per_head = local_dim // num_attention_heads - nemo_shape = (head_num, 3, size_per_head) - vals = [val.reshape(nemo_shape) for val in vals] - vals = [val.transpose(1, 0, 2) for val in vals] - - vals = [val.reshape(3, local_dim) for val in vals] - val = np.concatenate(vals, axis=-1) - split_vals = np.split(val, split_factor, axis=-1) - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - - elif "attention.query_key_value.weight" in key or "attention.linear_qkv.weight" in key: - assert use_attention_nemo_shape, "Only support NEMO shape for QKV weights" - hidden_dim = vals[0].shape[0] - size_per_head = hidden_dim // num_attention_heads - q_num = num_attention_heads // num_kv_heads - merge_factor = len(vals) - - # When the merge factor exceeds 1, the 'vals' list will have multiple entries. - # Depending on the format, 'vals' can look like either [QQQQ..KV, QQQQ..KV, ...](for GQA) or [QKV, QKV, ...](for MHA). - # We transform 'vals' into a unified structure: [QQQQQ...KK..VV..]. - # If the split factor surpasses 1, this array undergoes a split along its last dimension, which is 'size_per_head'. - - # Reshape each elements of the vals array to shape: - # (hidden_dim, q_num + 2, num_kv_heads // tp_size, size_per_head) - vals = [ - val.reshape(hidden_dim, num_kv_heads // tp_size, q_num + 2, size_per_head).transpose(0, 2, 1, 3) - for val in vals - ] - - # Combine all the Qs, Ks and Vs together for each val - q_splits, k_splits, v_splits = zip(*[np.split(val, [q_num, q_num + 1], axis=1) for val in vals]) - - # Concatenate Q, K, and V separately - q_splits_concat = np.concatenate(q_splits, axis=1) - k_splits_concat = np.concatenate(k_splits, axis=1) - v_splits_concat = np.concatenate(v_splits, axis=1) - - # Concatenate Q, K, and V together and reshape - qkv_split_concat = np.concatenate([q_splits_concat, k_splits_concat, v_splits_concat], axis=1) - qkv_split_concat = qkv_split_concat.reshape( - hidden_dim, q_num + 2, (num_kv_heads // tp_size) * size_per_head * merge_factor - ) - - # Final split - split_vals = np.split(qkv_split_concat, split_factor, axis=-1) - if "attention.linear_qkv.weight" in key: - key = key.replace("attention.linear_qkv.weight", "attention.query_key_value.weight") - save_split(split_vals, saved_dir, key, tp_rank, split_factor) - if save_int8: - base_key = key.replace(".weight", "") - vals_i8 = generate_int8(val, act_range, is_qkv=True, multi_query_mode=multi_query_mode) - write_int8( - vals_i8, - saved_dir, - base_key, - cat_dim, - tp_rank, - split_factor, - kv_cache_only=int8_outputs == "kv_cache_only", - ) - elif ( - "attention.query.weight" in key - or "attention.query.bias" in key - or "attention.key_value.weight" in key - or "attention.key_value.bias" in key - ): - pass - else: - print(f"[WARNING] {key} not handled by converter") - - # Ammo modification - global weights_dict - return weights_dict diff --git a/nemo/export/trt_llm/nemo/nemo.py b/nemo/export/trt_llm/nemo/nemo.py deleted file mode 100644 index 9a7276646488..000000000000 --- a/nemo/export/trt_llm/nemo/nemo.py +++ /dev/null @@ -1,269 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Referrence impl in tensorrt_llm: examples/gpt/utils/nemo.py.""" -import functools -import logging -import os -import pathlib -import tarfile -import typing - -import torch -import yaml -from transformers import GPT2Config, LlamaConfig - -from .convert import cpu_map_location, gpu_map_location - -LOGGER = logging.getLogger(__name__) - - -def nemo_to_llm_config(nemo_model_config, vocab_size, eos_id, bos_id, decoder_type): - convertion_dict = { - "activation_function": "activation", - "layer_norm_epsilon": "layernorm_epsilon", - "n_embd": "hidden_size", - "n_head": "num_attention_heads", - "n_layer": "num_layers", - "n_positions": "max_position_embeddings", - "rotary_pct": "rotary_percentage", - "bias": "bias", - "intermediate_size": "ffn_hidden_size", - "num_kv_heads": "num_query_groups", - } - - kwargs = {key: nemo_model_config[value] for key, value in convertion_dict.items() if value in nemo_model_config} - kwargs["vocab_size"] = vocab_size - kwargs["eos_token_id"] = eos_id - kwargs["bos_token_id"] = bos_id - - llm_config = LlamaConfig if decoder_type == "llama" else GPT2Config - - return llm_config(**kwargs) - - -def add_special_tokens_to_tokenizer(tokenizer): - # Need to add cls, sep, mask tokens to the tokenizer if they don't exist. - # If cls, sep and mask are not attributes of the tokenizer, add it. - if not hasattr(tokenizer, "cls_token"): - tokenizer.add_special_tokens({"cls_token": ""}) - if not hasattr(tokenizer.tokenizer, "sep_id"): - tokenizer.add_special_tokens({"sep_token": ""}) - if not hasattr(tokenizer.tokenizer, "mask_id"): - tokenizer.add_special_tokens({"mask_token": ""}) - - # bos, eos, pad and unk may be present in the provided spm .model file, if they are, use it. - if not hasattr(tokenizer, "pad_token"): - if hasattr(tokenizer.tokenizer, "pad_id") and tokenizer.tokenizer.pad_id() > 0: - tokenizer.pad_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.pad_id()) - else: - tokenizer.add_special_tokens({"pad_token": ""}) - else: - tokenizer.add_special_tokens({"pad_token": ""}) - - if not hasattr(tokenizer, "bos_token"): - if hasattr(tokenizer.tokenizer, "bos_id") and tokenizer.tokenizer.bos_id() > 0: - tokenizer.bos_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.bos_id()) - else: - tokenizer.add_special_tokens({"bos_token": ""}) - else: - tokenizer.add_special_tokens({"bos_token": ""}) - - if not hasattr(tokenizer, "eos_token"): - if hasattr(tokenizer.tokenizer, "eos_id") and tokenizer.tokenizer.eos_id() > 0: - tokenizer.eos_token = tokenizer.tokenizer.id_to_piece(tokenizer.tokenizer.eos_id()) - else: - tokenizer.add_special_tokens({"eos_token": ""}) - else: - tokenizer.add_special_tokens({"eos_token": ""}) - - -# TODO: remove tar.extractall usage before releasing with KitMaker -def unpack_nemo_ckpt( - nemo_archive_path: typing.Union[str, pathlib.Path], out_dir_path: typing.Union[str, pathlib.Path], -): - nemo_archive_path = pathlib.Path(nemo_archive_path) - if not nemo_archive_path.exists(): - raise FileNotFoundError(f"{nemo_archive_path} does not exist") - - for tar_mode in ["r:", "r:gz"]: - try: - with tarfile.open(nemo_archive_path, mode=tar_mode) as tar_file: - - def is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - - prefix = os.path.commonprefix([abs_directory, abs_target]) - - return prefix == abs_directory - - def safe_members(tar_file): - members = [] - for member in tar_file.getmembers(): - member_path = os.path.join(out_dir_path, member.name) - if not is_within_directory(out_dir_path, member_path): - raise Exception("Attempted Path Traversal in Tar File") - members.append(member) - return members - - tar_file.extractall( - out_dir_path, members=safe_members(tar_file), numeric_owner=False - ) # nosec - tar path has been validated. - - return out_dir_path - except tarfile.ReadError: - pass - - raise RuntimeError(f"Could not unpack {nemo_archive_path}") - - -def extract_layers_with_prefix(model_, prefix): - length_to_trim = len(prefix) - model_state = model_.get("state_dict", model_) - return {key[length_to_trim:]: model_state[key] for key in model_state.keys() if prefix in key} - - -class UnpackedNemoCheckpointDir: - def __init__( - self, checkpoints_dir: typing.Union[str, pathlib.Path], load_checkpoints_to_cpu: bool = False, - ): - self._checkpoints_dir = pathlib.Path(checkpoints_dir) - self._load_checkpoints_to_cpu = load_checkpoints_to_cpu - - @property - @functools.lru_cache - def model_config(self): - model_config = None - - model_config_filename = "model_config.yaml" - model_configs_paths = list(self._checkpoints_dir.rglob(model_config_filename)) - if model_configs_paths: - if len(model_configs_paths) > 1: - raise RuntimeError( - f"There are more than single {model_config_filename} in" - f" {self._checkpoints_dir}:" - f" {', '.join(map(lambda p: p.as_posix(), model_configs_paths))}" - ) - model_config_path = model_configs_paths[0] - LOGGER.debug("Loading model config from %s", model_config_path) - with model_config_path.open("r") as model_config_file: - model_config = yaml.load(model_config_file, Loader=yaml.SafeLoader) - else: - LOGGER.debug("Searching model config in checkpoints") - # try to obtain from checkpoint - checkpoint_name = self.checkpoint_name - checkpoints_paths = sorted(self._checkpoints_dir.rglob(checkpoint_name)) - if checkpoints_paths: - # assume that parallel ranks 0 checkpoint should have model config embedded - checkpoint_path = checkpoints_paths[0] - - map_location_fn = cpu_map_location if self._load_checkpoints_to_cpu else gpu_map_location - - model_00 = torch.load(checkpoint_path, map_location=map_location_fn) - if "hyper_parameters" in model_00 and "cfg" in model_00["hyper_parameters"]: - model_config = model_00["hyper_parameters"]["cfg"] - LOGGER.debug("Loaded model config from checkpoint %s", checkpoint_path) - else: - LOGGER.debug("Could not find model config in checkpoint %s", checkpoint_path) - - del model_00 - - if model_config is None: - LOGGER.warning("Could not find checkpoint with NeMo model config in %s", self._checkpoints_dir) - - LOGGER.debug("Loaded model config %s", model_config) - - return model_config - - @property - def checkpoints_dir(self): - return self._checkpoints_dir - - def get_checkpoints_paths(self, tensor_model_parallel_size=1, pipeline_model_parallel_size=1): - """Injects tensor/pipeline model parallel ranks into the filepath. - Does nothing if not using model parallelism. - """ - checkpoint_path_without_rank = self.checkpoints_dir / self.checkpoint_name - - def _inject_parallel_ranks(tp_rank, pp_rank): - if tensor_model_parallel_size > 1 or pipeline_model_parallel_size > 1: - if pipeline_model_parallel_size is None or pipeline_model_parallel_size == 1: - checkpoint_path = ( - checkpoint_path_without_rank.parent - / f"mp_rank_{tp_rank:02d}" - / checkpoint_path_without_rank.name - ) - else: - checkpoint_path = ( - checkpoint_path_without_rank.parent - / f"tp_rank_{tp_rank:02d}_pp_rank_{pp_rank:03d}" - / checkpoint_path_without_rank.name - ) - return checkpoint_path - else: - return checkpoint_path_without_rank - - return [ - [ - _inject_parallel_ranks(tp_rank=tp_rank, pp_rank=pp_rank) - for pp_rank in range(pipeline_model_parallel_size) - ] - for tp_rank in range(tensor_model_parallel_size) - ] - - @property - @functools.lru_cache - def checkpoint_name(self): - patterns = [ - "model_weights.ckpt", # older megatron checkpoints - "*last.ckpt", # newer format of checkpoints - ] - for pattern in patterns: - model_files = sorted(list(self._checkpoints_dir.rglob(pattern))) - if model_files: - return model_files[0].name - - raise ValueError(f"Could not find checkpoint files in {self._checkpoints_dir}") - - @functools.lru_cache - def get_tokenizer_file_path(self, tokenizer_key, file_key, default_filename_pattern): - model_config = self.model_config - file_property = None - if tokenizer_key in model_config and file_key in model_config[tokenizer_key]: - file_property = model_config[tokenizer_key][file_key] - elif file_key in model_config: - file_property = model_config[file_key] - - LOGGER.debug("model_config[%s][%s]=%s", tokenizer_key, file_key, file_property) - - if file_property and file_property.startswith("nemo:"): - filename = file_property.split("nemo:")[1] - filename_pattern = f"*{filename}" - elif file_property and file_property.startswith("/artifacts/"): - filename = pathlib.Path(file_property).name - filename_pattern = f"*{filename}" - elif file_property is None or file_property == "None": - filename_pattern = None - else: - filename_pattern = default_filename_pattern - LOGGER.warning( - f"Tokenizer file from config: {tokenizer_key}.{file_key}={file_property} " - f"looks like unsupported path. Pattern {filename_pattern} will be used." - ) - - file_path = None - if filename_pattern is not None: - files_paths = list(self._checkpoints_dir.glob(filename_pattern)) - if files_paths: - assert len(files_paths) == 1 - file_path = files_paths[0] - - return file_path diff --git a/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py b/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py deleted file mode 100644 index 6433cef64792..000000000000 --- a/nemo/export/trt_llm/nemo/nemo_ckpt_convert.py +++ /dev/null @@ -1,282 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Referrence impl in tensorrt_llm: examples/gpt/nemo_ckpt_convert.py.""" - -import configparser -import logging -import multiprocessing -import os -import shutil -import typing -from collections import defaultdict -from pathlib import Path - -import numpy as np -import torch -from tensorrt_llm._utils import str_dtype_to_torch, torch_to_numpy -from tqdm import tqdm -from transformers import GPT2Tokenizer, LlamaConfig, T5Tokenizer - -from .convert import cpu_map_location, gpu_map_location, split_and_save_weight -from .nemo import UnpackedNemoCheckpointDir, extract_layers_with_prefix, nemo_to_llm_config - -LOGGER = logging.getLogger(__name__) - -base_layer_names = { - "position_embedding": "model.language_model.embedding.position_embeddings.weight", - "word_embedding": "model.language_model.embedding.word_embeddings.weight", - "output_layer": "model.language_model.output_layer.weight", -} - -mcore_layer_names = { - "position_embedding": "model.embedding.position_embeddings.weight", - "word_embedding": "model.embedding.word_embeddings.weight", - "output_layer": "model.output_layer.weight", -} - - -def get_layer_name(layer_type: str, is_mcore: bool): - layer_dict = mcore_layer_names if is_mcore else base_layer_names - if layer_type in layer_dict: - return layer_dict[layer_type] - else: - raise ValueError(f"Unknown layer type {layer_type}") - - -def rename_key(old_key: str, pp_rank: int, num_layers: int, pp_size: int): - new_key = old_key - - if "layers." in old_key: - split_key = old_key.split(".") - split_key[1] = str(int(split_key[1]) + pp_rank * num_layers // pp_size) - new_key = ".".join(split_key) - - if "self_attention" in new_key: - new_key = new_key.replace("self_attention", "attention") - return new_key - - -@torch.no_grad() -def convert_checkpoint(unpacked_checkpoints_dir: UnpackedNemoCheckpointDir, args): - nemo_model_config = unpacked_checkpoints_dir.model_config - - checkpoints_paths = unpacked_checkpoints_dir.get_checkpoints_paths( - nemo_model_config.get("tensor_model_parallel_size", 1), - nemo_model_config.get("pipeline_model_parallel_size", 1), - ) - - # if checkpoints files could be found - start preparing output dir - out_dir = create_out_dir(args) - - map_location_fn = gpu_map_location if args.load_checkpoints_on_gpu else cpu_map_location - storage_type = str_dtype_to_torch(args.storage_type) - is_mcore = nemo_model_config.get("mcore_gpt", False) - - # load position_embedding from rank 0 - model_00 = torch.load(checkpoints_paths[0][0], map_location=map_location_fn) - model_00 = model_00.get("state_dict", model_00) - - has_position_embedding = get_layer_name("position_embedding", is_mcore) in model_00 - has_lm_head = get_layer_name("output_layer", is_mcore) in model_00 - - num_layers = nemo_model_config["num_layers"] - training_tp_size = nemo_model_config.get("tensor_model_parallel_size", 1) - training_pp_size = nemo_model_config.get("pipeline_model_parallel_size", 1) - inference_tp_size = args.tensor_parallelism - num_kv_heads = nemo_model_config.get("num_query_groups", 0) - multi_query_mode = nemo_model_config.get("multi_query_mode", False) - num_attention_heads = nemo_model_config["num_attention_heads"] - is_fast_glu = nemo_model_config.get("activation", "gelu") in ['fast-geglu', 'fast-swiglu', 'fast-reglu'] - if num_kv_heads is None: - num_kv_heads = 0 - nemo_model_config["num_query_groups"] = 0 - - if num_kv_heads == 0: - if multi_query_mode: - num_kv_heads = 1 - else: - num_kv_heads = num_attention_heads - - export_config = { - "apply_layernorm_1p": nemo_model_config.get("normalization", "") == "layernorm1p", - "tp_size": training_tp_size, - "split_gated_activation": "swiglu" in nemo_model_config.get("activation", "gelu") - and (args.decoder_type == "gptnext" or is_mcore or is_fast_glu), - "num_attention_heads": num_attention_heads, - "num_kv_heads": num_kv_heads, - "use_attention_nemo_shape": True, - "transpose_weights": True, - } - - # merge_factor: how many TP training nodes are merged into an inference TP node - # split_factor: in how many parts a TP training node is split - gcd = np.gcd(training_tp_size, inference_tp_size) - merge_factor = training_tp_size // gcd - split_factor = inference_tp_size // gcd - - model_level_weights = defaultdict(list) - - def handle_model_level_weights(model, tp_idx: int, pp_idx: int): - if tp_idx == 0 and pp_idx == 0: - if has_position_embedding: - val = model[get_layer_name("position_embedding", is_mcore)] - # not weight, do not need to transpose - val = torch_to_numpy(val.to(storage_type).cpu()) - # AMMO modification - # val.tofile(out_dir / "model.wpe.bin") - model_level_weights["model.wpe.bin"].append(val) - if pp_idx == 0: - val = model.get("state_dict", model)[get_layer_name("word_embedding", is_mcore)] - val = torch_to_numpy(val.to(storage_type).cpu()) - model_level_weights["model.wte.bin"].append(val) - if has_lm_head and pp_idx == training_pp_size - 1: - val = model.get("state_dict", model)[get_layer_name("output_layer", is_mcore)] - val = torch_to_numpy(val.to(storage_type).cpu()) - model_level_weights["model.lm_head.weight.bin"].append(val) - - # AMMO modification - weights_dict = {} - for tp_rank in range(training_tp_size // merge_factor): - for pp_rank in range(training_pp_size): - models = [] - for k in range(merge_factor): - rank_weights = checkpoints_paths[tp_rank * merge_factor + k][pp_rank] - model = torch.load(rank_weights, map_location=map_location_fn) - handle_model_level_weights(model, tp_rank * merge_factor + k, pp_rank) - prefix = "model.decoder." if is_mcore else "model.language_model.encoder." - layers = extract_layers_with_prefix(model, prefix) - models.append(layers) - - starmap_args = [] - for key in models[0].keys(): - # Skipping the extra state as it is not a part of the model state dict - if "_extra_state" not in key: - starmap_args.append( - ( - tp_rank, - out_dir, - split_factor, - rename_key(key, pp_rank, num_layers, training_pp_size), - [model[key] for model in models], - storage_type, - None, - export_config, - ) - ) - starmap_args = tqdm(starmap_args, desc="saving weights") - - if args.processes > 1: - with multiprocessing.Pool(args.processes) as pool: - # AMMO modification - weights_dicts = pool.starmap(split_and_save_weight, starmap_args) - weights_dict_local = {k: v for d in weights_dicts for k, v in d.items()} - else: - # simpler for debug situations - for starmap_arg in starmap_args: - # AMMO modification - weights_dict_local = split_and_save_weight(*starmap_arg) - # AMMO modification - weights_dict.update(weights_dict_local) - - for key, values in model_level_weights.items(): - model_level_weights[key] = np.concatenate(values, axis=0) - # AMMO modification - weights_dict[key] = model_level_weights[key] - vocab_size = model_level_weights["model.wte.bin"].shape[0] - - tokenizer_config = update_tokenizer_paths(nemo_model_config["tokenizer"], unpacked_checkpoints_dir) - copy_tokenizer_files(tokenizer_config, out_dir) - # AMMO modification. - tokenizer_config["model"] = os.path.join(out_dir, "tokenizer.model") - tokenizer = build_tokenizer(tokenizer_config) - llm_config = nemo_to_llm_config( - nemo_model_config, vocab_size, tokenizer.eos_token_id, tokenizer.bos_token_id, args.decoder_type, - ) - - llm_config.is_mcore = is_mcore - - config = configparser.ConfigParser() - model_name = "llama" if isinstance(llm_config, LlamaConfig) else "gpt" - config[model_name] = {k: str(v) for k, v in vars(llm_config).items()} - config[model_name]["storage_dtype"] = args.storage_type - config_path = out_dir / "config.ini" - with config_path.open("w") as config_file: - config.write(config_file) - - # AMMO modification. - return weights_dict, llm_config, tokenizer - - -def create_out_dir(args): - # AMMO modification. - out_dir = Path(args.out_dir) - if not out_dir.exists(): - out_dir.mkdir(parents=True) - return out_dir - - -def update_tokenizer_paths(tokenizer_config: typing.Dict, unpacked_checkpoints_dir): - def _update_config_entry(key, file_pattern): - old_path = tokenizer_config[key] - if old_path is None: - return - old_path = Path(old_path) - new_path = unpacked_checkpoints_dir.get_tokenizer_file_path("tokenizer", key, file_pattern) - if new_path: - LOGGER.debug(f"Update tokenizer {key} {old_path} -> {new_path}") - tokenizer_config[key] = new_path.as_posix() - elif not old_path.exists(): - LOGGER.warning(f"Tokenizer {key}'s path {old_path} does not exists: set it to None") - tokenizer_config[key] = None - - _update_config_entry("model", "*.model") - _update_config_entry("vocab_file", "*vocab*") - _update_config_entry("merge_file", "*merge*.txt") - - return tokenizer_config - - -def copy_tokenizer_files(config, out_dir): - basenames = { - "model": "tokenizer", - "vocab_file": "vocab", - "merge_file": "merges", - } - - for key in basenames.keys(): - if config[key] is None: - continue - path = Path(config[key]) - if not path.exists(): - LOGGER.debug(f"Tokenizer {key}: {path} file not found") - continue - - dst_path = out_dir / f"{basenames[key]}{path.suffix}" - LOGGER.debug(f"Copy tokenizer {key}: {path}->{dst_path}") - shutil.copy(path.as_posix(), dst_path.as_posix()) - - -def build_tokenizer(tokenizer_config: typing.Dict): - if tokenizer_config["library"] == "sentencepiece": - # AMMO modification. - # Turn off legacy model by default: See https://github.com/huggingface/transformers/pull/24622 - tokenizer = T5Tokenizer(tokenizer_config["model"], extra_ids=0, legacy=False) - elif "GPT2" in tokenizer_config["type"]: - tokenizer = GPT2Tokenizer(tokenizer_config["vocab_file"], tokenizer_config["merge_file"]) - else: - raise ValueError(f'Tokenizer type {tokenizer_config["library"]} not handled') - - if tokenizer.bos_token_id is None: - tokenizer.add_special_tokens({"bos_token": ""}) - if tokenizer.eos_token_id is None: - tokenizer.add_special_tokens({"eos_token": ""}) - - return tokenizer diff --git a/nemo/export/trt_llm/nemo_utils.py b/nemo/export/trt_llm/nemo_utils.py deleted file mode 100644 index 478cc24a2214..000000000000 --- a/nemo/export/trt_llm/nemo_utils.py +++ /dev/null @@ -1,184 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The APIs to convert a nemo model checkpoint to tensorrt_llm.""" - -import argparse -import ast -import configparser -import copy -import datetime -import logging -import os -import shutil -import sys -import tempfile -from pathlib import Path -from typing import Dict, List, Tuple - -import numpy as np -from tensorrt_llm import str_dtype_to_trt -from transformers import GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer - -from .model_config import ( - LAYERNORM_DEFAULT, - LAYERNORM_RMS, - LINEAR_COLUMN, - DecoderLayerConfig, - EmbeddingConfig, - LayernormConfig, - LinearConfig, - ModelConfig, -) -from .nemo.nemo import UnpackedNemoCheckpointDir, unpack_nemo_ckpt -from .nemo.nemo_ckpt_convert import build_tokenizer, convert_checkpoint -from .tensor_utils import get_tensor_from_dict, split - -LOGGER = logging.getLogger(__name__) - - -def _nemo_decode( - in_file: str, - out_dir: str, - tensor_parallelism: int = 1, - processes: int = 1, - storage_type: str = "bfloat16", - load_checkpoints_on_gpu: bool = False, - decoder_type: str = "gptnext", -) -> Tuple[Dict[str, np.ndarray], PretrainedConfig, PreTrainedTokenizer]: - """Decodes the NEMO file and returns the weights dict, llm config and tokenizer.""" - args = argparse.Namespace() - args.in_file = in_file - args.out_dir = out_dir - args.tensor_parallelism = tensor_parallelism - args.processes = processes - args.storage_type = storage_type - args.load_checkpoints_on_gpu = load_checkpoints_on_gpu - args.verbose = False - args.decoder_type = decoder_type - - input_path = Path(args.in_file) - if not input_path.exists(): - LOGGER.error("%s does not exists", input_path) - sys.exit(1) - - with tempfile.TemporaryDirectory() as temp_dir: - temp_dir = Path(temp_dir) - - # unpack if needed - if input_path.is_dir(): - nemo_dir = input_path - else: - start_time = datetime.datetime.now() - checkpoint_dir_path = temp_dir / "unpacked" - nemo_dir = unpack_nemo_ckpt(args.in_file, checkpoint_dir_path) - LOGGER.info("Spent %s (h:m:s) to unpack NeMo archive", datetime.datetime.now() - start_time) - - unpacked_checkpoint_dir = UnpackedNemoCheckpointDir( - nemo_dir, load_checkpoints_to_cpu=not args.load_checkpoints_on_gpu - ) - - start_time = datetime.datetime.now() - weights_dict, llm_config, tokenizer = convert_checkpoint(unpacked_checkpoint_dir, args) - LOGGER.info("Spent %s (h:m:s) to convert the model", datetime.datetime.now() - start_time) - - return weights_dict, llm_config, tokenizer - - -def get_model_config(weights_dir: Path) -> GPT2Config: - """Reads the GPT2Config from the decoded NEMO weights dir.""" - config = configparser.ConfigParser() - config_path = weights_dir / "config.ini" - assert os.path.isfile(config_path), f"{config_path} not present" - config.read(config_path) - config_dict = dict(config.items("gpt")) - # Parse the config to dict. - for k, v in config_dict.items(): - try: - config_dict[k] = ast.literal_eval(v) - except Exception: - pass - return GPT2Config(**config_dict) - - -def get_tokenzier(tokenizer_dir_or_path: Path) -> PreTrainedTokenizer: - """Loads the tokenizer from the decoded NEMO weights dir.""" - model_path = tokenizer_dir_or_path / "tokenizer.model" if tokenizer_dir_or_path.is_dir() else tokenizer_dir_or_path - tokenizer_config = {"library": "sentencepiece", "model": str(model_path)} - return build_tokenizer(tokenizer_config) - - -def nemo_to_model_config( - in_file: str, decoder_type: str, nemo_export_dir: str, gpus: int = 1 -) -> Tuple[List[ModelConfig], PreTrainedTokenizer]: - """Converts the NEMO file and construct the `ModelConfig` before tensorrt_llm deployment.""" - dtype_str = "bfloat16" - - if os.path.exists(nemo_export_dir): - shutil.rmtree(nemo_export_dir) - - weights_dict, llm_model_config, tokenizer = _nemo_decode( - in_file=in_file, - out_dir=nemo_export_dir, - tensor_parallelism=gpus, - processes=1, - storage_type=dtype_str, - load_checkpoints_on_gpu=False, - decoder_type=decoder_type, - ) - - model_config_template = ModelConfig() - model_config_template.dtype = dtype_str - - model_config_template.tensor_parallel = gpus - - str_dtype_to_trt(dtype_str) - - model_configs = [] - for i in range(gpus): - model_configs.append(copy.deepcopy(model_config_template)) - model_configs[i].rank = i - - model_configs[i].vocab_embedding = EmbeddingConfig(weight=get_tensor_from_dict(weights_dict, "wte")) - - model_configs[i].final_layernorm = LayernormConfig( - weight=get_tensor_from_dict(weights_dict, "final_layernorm.weight"), - bias=get_tensor_from_dict(weights_dict, "final_layernorm.bias"), - ) - model_configs[i].final_layernorm.layernorm_type = ( - LAYERNORM_RMS if isinstance(llm_model_config, LlamaConfig) else LAYERNORM_DEFAULT - ) - - for i in range(llm_model_config.n_layer): - for j in range(gpus): - model_configs[j].layers.append( - DecoderLayerConfig.from_nemo( - weights_dict=weights_dict, - llm_config=llm_model_config, - decoder_type=decoder_type, - layer_id=i, - rank=j, - is_mcore=llm_model_config.is_mcore, - ) - ) - - lm_head_weight = get_tensor_from_dict(weights_dict, "lm_head.weight") - - if model_configs[0].vocab_size_padded != model_configs[0].vocab_size: - pad_width = model_configs[0].vocab_size_padded - model_configs[0].vocab_size - lm_head_weight = np.pad(lm_head_weight, ((0, pad_width), (0, 0)), "constant", constant_values=0) - - for i in range(gpus): - model_configs[i].lm_head = LinearConfig(linear_type=LINEAR_COLUMN) - model_configs[i].lm_head.weight = np.ascontiguousarray( - split(lm_head_weight, model_configs[i].tensor_parallel, model_configs[i].rank) - ) - - return model_configs, tokenizer diff --git a/nemo/export/trt_llm/quantization_utils.py b/nemo/export/trt_llm/quantization_utils.py deleted file mode 100644 index fd363b11dcf5..000000000000 --- a/nemo/export/trt_llm/quantization_utils.py +++ /dev/null @@ -1,119 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""The utils to convert a tensorrt_llm network to a quantized network.""" - -import numpy as np -from tensorrt_llm.layers import Linear, RowLinear -from tensorrt_llm.quantization.layers import FP8Linear, FP8RowLinear, Int8SmoothQuantLinear, Int8SmoothQuantRowLinear - -from .model_config import QUANTIZATION_FP8, QUANTIZATION_INT8_SQ, QUANTIZATION_NONE, LinearConfig, ModelConfig - - -def quantize_linear(tensorrt_llm_layer, quantization: str, layer_config: LinearConfig): - """Returns the quantized tensorrt_llm linear layer.""" - if quantization == QUANTIZATION_NONE: - return tensorrt_llm_layer - - if quantization == QUANTIZATION_FP8: - # FP8 is not sensitive to scaling factors. So we just quantize all layers possible. - default_scaling_factor = np.array([1], dtype=np.float32) - if layer_config.activation_scaling_factor is None: - layer_config.activation_scaling_factor = default_scaling_factor - if layer_config.weights_scaling_factor is None: - layer_config.weights_scaling_factor = default_scaling_factor - - if layer_config.activation_scaling_factor is None or layer_config.weights_scaling_factor is None: - print(f"No valid scaling factors in {tensorrt_llm_layer._get_name()}, skipping quantization" " on this layer") - return tensorrt_llm_layer - else: - assert np.all(layer_config.activation_scaling_factor > 0) - assert np.all(layer_config.weights_scaling_factor > 0) - - bias = tensorrt_llm_layer.bias is not None - - linear_layer_type = type(tensorrt_llm_layer) - if linear_layer_type == Linear: - if quantization == QUANTIZATION_FP8: - linear = FP8Linear - elif quantization == QUANTIZATION_INT8_SQ: - linear = Int8SmoothQuantLinear - else: - assert False, f"{quantization} is not supported." - quantized_linear_layer = linear( - in_features=tensorrt_llm_layer.in_features, - out_features=tensorrt_llm_layer.out_features * tensorrt_llm_layer.tp_size, - bias=bias, - dtype=tensorrt_llm_layer.dtype, - tp_group=tensorrt_llm_layer.tp_group, - tp_size=tensorrt_llm_layer.tp_size, - gather_output=tensorrt_llm_layer.gather_output, - ) - elif linear_layer_type == RowLinear: - if quantization == QUANTIZATION_FP8: - row_linear = FP8RowLinear - elif quantization == QUANTIZATION_INT8_SQ: - row_linear = Int8SmoothQuantRowLinear - else: - assert False, f"{quantization} is not supported." - quantized_linear_layer = row_linear( - in_features=tensorrt_llm_layer.in_features * tensorrt_llm_layer.tp_size, - out_features=tensorrt_llm_layer.out_features, - bias=bias, - dtype=tensorrt_llm_layer.dtype, - tp_group=tensorrt_llm_layer.tp_group, - tp_size=tensorrt_llm_layer.tp_size, - ) - else: - assert False, f"{linear_layer_type} is not supported." - - quantized_linear_layer.weight = tensorrt_llm_layer.weight - quantized_linear_layer.bias = tensorrt_llm_layer.bias - - quantized_linear_layer.activation_scaling_factor.value = layer_config.activation_scaling_factor - quantized_linear_layer.weights_scaling_factor.value = layer_config.weights_scaling_factor - - if hasattr(quantized_linear_layer, "prequant_scaling_factor"): - quantized_linear_layer.prequant_scaling_factor.value = layer_config.prequant_scaling_factor - - return quantized_linear_layer - - -def naive_quantization(config: ModelConfig, quantization: str): - """Generates a constant scaling factor (1) with target quantization. - - This is for debugging and performance measurement only. - """ - config.quantization = quantization - # Here the scaling factor is not inversed. - # In nvidia systems: - # pytorch_quantization uses inv scale - # onnx & trt uses non-inv scale - # cask uses inv scale - default_scaling_factor = np.array([1], dtype=np.float32) - - if quantization == QUANTIZATION_FP8: - for layer in config.layers: - linear_layers = [ - layer.attention.qkv, - layer.attention.dense, - layer.mlp.fc, - layer.mlp.proj, - layer.mlp.gate, - ] - for linear_layer in linear_layers: - if linear_layer: - linear_layer.activation_scaling_factor = default_scaling_factor - linear_layer.weights_scaling_factor = default_scaling_factor - config.lm_head.activation_scaling_factor = default_scaling_factor - config.lm_head.weights_scaling_factor = default_scaling_factor - - else: - assert False, f"{quantization} not supported" diff --git a/nemo/export/trt_llm/tensor_utils.py b/nemo/export/trt_llm/tensor_utils.py deleted file mode 100644 index 108568f35e2b..000000000000 --- a/nemo/export/trt_llm/tensor_utils.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Utils for tensor conversions between tensorrt, torch and numpy.""" - -from typing import Dict - -import numpy as np -import tensorrt as trt -import tensorrt_llm -import torch - - -def torch_to_numpy_with_dtype(tensor, dtype=trt.float16): - """Converts a torch tensor to numpy array with the dtype.""" - if dtype == trt.float16: - torch_dtype = torch.float16 - elif dtype == trt.float32: - torch_dtype = torch.float32 - elif dtype == trt.bfloat16: - torch_dtype = torch.bfloat16 - else: - assert False, f"{dtype} not supported" - return tensorrt_llm._utils.torch_to_numpy(tensor.detach().to(torch_dtype)) - - -def trt_dtype_to_str(dtype: trt.DataType): - """Converts a trt dtype to string.""" - str_map = { - trt.float16: "float16", - trt.bfloat16: "bfloat16", - trt.float32: "float32", - } - - return str_map[dtype] - - -def split(v, tp_size, idx, dim=0): - """Splits the np tensor v on dim and return the idx's slice.""" - if tp_size == 1: - return v - if len(v.shape) == 1: - return np.ascontiguousarray(np.split(v, tp_size)[idx]) - else: - return np.ascontiguousarray(np.split(v, tp_size, axis=dim)[idx]) - - -def get_tensor_parallel_group(tensor_parallel: int): - """Returns the tensor_parallel_group config based on tensor_parallel.""" - return None if tensor_parallel == 1 else list(range(tensor_parallel)) - - -def get_tensor_from_dict(weights_dict: Dict[str, np.ndarray], name: str) -> np.array: - """Loads tensor from the weights_dict.""" - return weights_dict.get(f"model.{name}.bin", None) diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py deleted file mode 100644 index 45ecc08ead80..000000000000 --- a/nemo/export/trt_llm/tensorrt_llm_build.py +++ /dev/null @@ -1,296 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""This module builds the tensorrt_llm engine. - -Referrence impl in tensorrt_llm: examples/gpt/build.py. -""" - -import argparse -import os -import time -from pathlib import Path - -import tensorrt_llm -import torch -from tensorrt_llm import str_dtype_to_trt -from tensorrt_llm.builder import Builder -from tensorrt_llm.logger import logger -from tensorrt_llm.network import net_guard -from tensorrt_llm.plugin.plugin import ContextFMHAType -from tensorrt_llm.quantization import QuantMode - -MODEL_NAME = "ammo" - - -def get_engine_name(model, dtype, tp_size, rank): - """Returns the engine file name based on the provided info.""" - return "{}_{}_tp{}_rank{}.engine".format(model, dtype, tp_size, rank) - - -def serialize_engine(engine, path): - """Serializes the engine to path.""" - logger.info(f"Serializing engine to {path}...") - tik = time.time() - with open(path, "wb") as f: - f.write(bytearray(engine)) - tok = time.time() - t = time.strftime("%H:%M:%S", time.gmtime(tok - tik)) - logger.info(f"Engine serialized. Total time: {t}") - - -def build_rank_engine( - tensorrt_llm_gpt, builder: Builder, builder_config: tensorrt_llm.builder.BuilderConfig, engine_name, rank, args, -): - """@brief: Build the engine on the given rank. - - @param rank: The rank to build the engine. - @param args: The cmd line arguments. - @return: The built engine. - """ - str_dtype_to_trt(args.dtype) - - # TODO: Enable use_embedding_sharing when this feature is needed. - # Share_embedding_table can be set True only when: - # 1) the weight for lm_head() does not exist while other weights exist - # 2) For multiple-processes, use_parallel_embedding=True and embedding_sharding_dim == 0. - # Besides, for TensorRT 9.0, we can observe the engine size reduction when the lookup and gemm plugin are enabled. - # share_embedding_table = False - # if args.use_embedding_sharing: - # if args.world_size > 1: - # if args.model_dir is not None and args.embedding_sharding_dim == 0 and args.use_parallel_embedding: - # share_embedding_table = check_embedding_share(args.model_dir) - # else: - # if args.model_dir is not None: - # share_embedding_table = check_embedding_share(args.model_dir) - - # if not share_embedding_table: - # logger.warning(f'Cannot share the embedding lookup table.') - - # if share_embedding_table: - # logger.info( - # 'Engine will share embedding and language modeling weights.') - - # Module -> Network - ootb = os.getenv("OOTB", False) - - network = builder.create_network() - network.trt_network.name = engine_name - - # We have to use the attention plugin for most of the models. - if args.use_gpt_attention_plugin: - network.plugin_config.set_gpt_attention_plugin(dtype=args.use_gpt_attention_plugin) - - if not ootb: - if args.use_gemm_plugin: - network.plugin_config.set_gemm_plugin(dtype=args.use_gemm_plugin) - if args.use_rmsnorm_plugin: - network.plugin_config.set_rmsnorm_plugin(dtype=args.use_rmsnorm_plugin) - if args.use_layernorm_plugin: - network.plugin_config.set_layernorm_plugin(dtype=args.use_layernorm_plugin) - assert not (args.enable_context_fmha and args.enable_context_fmha_fp32_acc) - if args.enable_context_fmha: - network.plugin_config.set_context_fmha(ContextFMHAType.enabled) - if args.enable_context_fmha_fp32_acc: - network.plugin_config.set_context_fmha(ContextFMHAType.enabled_with_fp32_acc) - if args.remove_input_padding: - network.plugin_config.enable_remove_input_padding() - if args.paged_kv_cache: - network.plugin_config.enable_paged_kv_cache() - if args.use_ib_gpt_attention_plugin: - network.plugin_config.set_inflight_batching_gpt_attention_plugin(dtype=args.use_ib_gpt_attention_plugin) - - if args.use_inflight_batching: - network.plugin_config.enable_in_flight_batching() - - if args.use_lookup_plugin: - # Use the plugin for the embedding parallelism and sharing - network.plugin_config.set_lookup_plugin(dtype=args.dtype) - else: - print("Build engine in OOTB mode, disable all plugins except nccl.") - - if args.world_size > 1: - network.plugin_config.set_nccl_plugin(args.dtype) - - with net_guard(network): - # Prepare - network.set_named_parameters(tensorrt_llm_gpt.named_parameters()) - - # Forward - inputs = tensorrt_llm_gpt.prepare_inputs( - args.max_batch_size, - args.max_input_len, - args.max_output_len, - True, - args.max_beam_width, - paged_kv_cache=args.paged_kv_cache, - tokens_per_block=args.tokens_per_block, - prompt_embedding_table_size=args.max_prompt_embedding_table_size, - ) - tensorrt_llm_gpt(*inputs) - - engine = None - - # Network -> Engine - engine = builder.build_engine(network, builder_config) - if rank == 0: - config_path = args.output_dir / "config.json" - builder.save_config(builder_config, config_path) - return engine - - -def _build_impl(rank, tensorrt_llm_model, args): - torch.cuda.set_device(rank % args.gpus_per_node) - tensorrt_llm.logger.set_level(args.log_level) - args.output_dir.mkdir(parents=True, exist_ok=True) - timing_cache_file = args.timing_cache if args.timing_cache else args.output_dir / "model.cache" - timing_cache = timing_cache_file - - builder = Builder() - apply_query_key_layer_scaling = False - cur_rank = rank - - builder_config = builder.create_builder_config( - name=MODEL_NAME, - precision=args.dtype, - timing_cache=timing_cache, - tensor_parallel=args.world_size, # TP only - parallel_build=args.parallel_build, - num_layers=tensorrt_llm_model._num_layers, - num_heads=tensorrt_llm_model._num_heads, - num_kv_heads=tensorrt_llm_model._num_kv_heads, - hidden_size=tensorrt_llm_model._hidden_size, - vocab_size=tensorrt_llm_model._vocab_size, - hidden_act=tensorrt_llm_model.hidden_act, - max_position_embeddings=tensorrt_llm_model.max_position_embeddings, - apply_query_key_layer_scaling=apply_query_key_layer_scaling, - max_batch_size=args.max_batch_size, - max_input_len=args.max_input_len, - max_output_len=args.max_output_len, - int8="int8" in args.quantization, - opt_level=args.builder_opt, - paged_kv_cache=args.paged_kv_cache, - tokens_per_block=args.tokens_per_block, - use_prompt_tuning=args.max_prompt_embedding_table_size > 0, - use_parallel_embedding=args.use_parallel_embedding, - fp8="fp8" in args.quantization, - ) - - engine_name = get_engine_name(MODEL_NAME, args.dtype, args.world_size, cur_rank) - engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, cur_rank, args) - assert engine is not None, f"Failed to build engine for rank {cur_rank}" - - if cur_rank == 0: - # Use in-memory timing cache for multiple builder passes. - if not args.parallel_build: - timing_cache = builder_config.trt_builder_config.get_timing_cache() - - serialize_engine(engine, args.output_dir / engine_name) - - if rank == 0: - ok = builder.save_timing_cache(builder_config, timing_cache_file) - assert ok, "Failed to save timing cache." - - -def build( - tensorrt_llm_model, - output_dir: Path, - rank=0, - world_size=1, - dtype="float16", - timing_cache="", - log_level="info", - max_batch_size=1, - max_input_len=200, - max_output_len=200, - max_beam_width=1, - max_prompt_embedding_table_size=0, - parallel_build=False, - gpus_per_node=1, - quantization=None, -): - """Builds the tensorrt_llm_model to engine.""" - args = argparse.Namespace() - args.world_size = world_size - args.dtype = dtype - args.timing_cache = timing_cache - args.log_level = log_level - args.max_batch_size = max_batch_size - args.max_input_len = max_input_len - args.max_output_len = max_output_len - args.max_beam_width = max_beam_width - args.use_gpt_attention_plugin = dtype - args.use_gemm_plugin = dtype - # Only enable rmsnorm_plugin for INT8 and FP16 as FP8 performance has a regression. - # TODO: Understand why rmsnorm_plugin is not performing well in FP8 - args.use_rmsnorm_plugin = dtype if "fp8" not in quantization else False - args.use_layernorm_plugin = False - args.parallel_build = parallel_build - args.enable_context_fmha = True - args.enable_context_fmha_fp32_acc = False - args.gpus_per_node = gpus_per_node - args.builder_opt = None - args.output_dir = Path(output_dir) - args.remove_input_padding = True - args.use_smooth_quant = False - args.use_weight_only = False - args.weight_only_precision = "int8" - args.per_channel = False - args.per_token = False - args.int8_kv_cache = False - args.random_seed = None - args.paged_kv_cache = False - args.max_prompt_embedding_table_size = max_prompt_embedding_table_size - args.use_inflight_batching = False - args.use_ib_gpt_attention_plugin = False - args.use_parallel_embedding = False - args.use_lookup_plugin = False - args.tokens_per_block = 64 - args.quantization = quantization - - assert not ( - args.use_smooth_quant and args.use_weight_only - ), "You cannot enable both SmoothQuant and INT8 weight-only together." - - assert not ( - args.use_smooth_quant and args.use_weight_only - ), "You cannot enable both SmoothQuant and INT8 weight-only together." - - if args.use_ib_gpt_attention_plugin: - logger.warning( - "use_ib_gpt_attention_plugin is deprecated. Use combination of" - " --use_gpt_attention_plugin=dtype --use_inflight_batching instead." - ) - - if args.use_inflight_batching: - assert args.use_gpt_attention_plugin, "You have to use GPT attention plugin for in-flight batching mode" - assert args.paged_kv_cache, "You have to use paged kv cache for in-flight batching mode" - assert args.remove_input_padding, "You have to remove input padding for in-flight batching" - - if args.use_smooth_quant: - args.quant_mode = QuantMode.use_smooth_quant(args.per_token, args.per_channel) - elif args.use_weight_only: - args.quant_mode = QuantMode.use_weight_only(args.weight_only_precision == "int4") - else: - args.quant_mode = QuantMode(0) - - if args.int8_kv_cache: - args.quant_mode = args.quant_mode.set_int8_kv_cache() - - if args.random_seed is not None: - torch.manual_seed(args.random_seed) - - logger.set_level(args.log_level) - tik = time.time() - _build_impl(rank, tensorrt_llm_model, args) - - tok = time.time() - t = time.strftime("%H:%M:%S", time.gmtime(tok - tik)) - logger.info(f"Total time of building all {args.world_size} engines: {t}") diff --git a/nemo/export/trt_llm/tensorrt_llm_model.py b/nemo/export/trt_llm/tensorrt_llm_model.py deleted file mode 100644 index ecffd5573e2f..000000000000 --- a/nemo/export/trt_llm/tensorrt_llm_model.py +++ /dev/null @@ -1,487 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""This module defines a tensorrt_llm based model for all LLMs we support inside AMMO. - -Referrence impl in tensorrt_llm: tensorrt_llm/models/gpt/model.py. -""" -import inspect -from collections import OrderedDict -from pathlib import Path - -import numpy as np -import tensorrt as trt -import torch -from tensorrt_llm import default_net, str_dtype_to_trt -from tensorrt_llm.functional import RaggedTensor, Tensor, expand_mask, gather_last_token_logits, shape -from tensorrt_llm.layers import ColumnLinear, InflightBatchingParam -from tensorrt_llm.models.generation_mixin import GenerationMixin -from tensorrt_llm.module import Module, ModuleList - -from .decoder import build_decoder_layer -from .model_config import ModelConfig -from .quantization_utils import quantize_linear -from .tensor_utils import get_tensor_parallel_group, trt_dtype_to_str -from .tensorrt_llm_build import build -from .tensorrt_llm_utils import build_embedding_from_config, build_layernorm_from_config, print_tensorrt_llm - - -class ModelBuilder(Module): - """A generic tensorrt_llm transformer model builder. - - We try to make this module builder as flexibile as possible to cover all transformer conversion usecases. - """ - - def __init__(self, model_config: ModelConfig): - """Initializes the ModelBuilder from a model_config.""" - super().__init__() - self.quantization = model_config.quantization - self.rank = model_config.rank - self.max_position_embeddings = model_config.max_position_embeddings - self.hidden_act = model_config.hidden_act - - self._dtype = str_dtype_to_trt(model_config.dtype) - self._kv_dtype = self._dtype - self._tensor_parallel = model_config.tensor_parallel - self._vocab_size = model_config.vocab_size - self._hidden_size = model_config.hidden_size - self._num_layers = len(model_config.layers) - self._num_heads = model_config.num_attention_heads - self._num_kv_heads = model_config.num_kv_heads - self._use_prompt_tuning = model_config.use_prompt_tuning - - # TODO: support use_parallel_embedding. - self.vocab_embedding = build_embedding_from_config( - model_config.vocab_embedding, self._dtype, use_prompt_tuning=self._use_prompt_tuning - ) - self.positional_embedding = build_embedding_from_config( - model_config.positional_embedding, self._dtype, use_prompt_tuning=False - ) - self.layers = ModuleList( - [ - build_decoder_layer( - layer, - layer_id, - self._num_layers, - dtype=self._dtype, - quantization=model_config.quantization, - rank=self.rank, - tensor_parallel=self._tensor_parallel, - ) - for layer_id, layer in enumerate(model_config.layers) - ] - ) - - self.ln_f = build_layernorm_from_config(model_config.final_layernorm, self._dtype) - - def forward( - self, - input_ids, - position_ids, - past_key_value=None, - sequence_length=None, - host_past_key_value_lengths=None, - use_cache=False, - attention_mask=None, - cache_indirection=None, - kv_cache_block_pointers=None, - prompt_embedding_table=None, - prompt_tasks=None, - prompt_vocab_size=None, - inflight_batching_args=None, - context_lengths=None, - host_context_lengths=None, - host_request_types=None, - max_context_length=None, - ): - """Forward function for the full model.""" - ptuning_args = [] - if self._use_prompt_tuning: - ptuning_args = [prompt_embedding_table, prompt_tasks, prompt_vocab_size] - x = self.vocab_embedding(input_ids.data, *ptuning_args) - if hasattr(self, "positional_embedding") and self.positional_embedding: - assert position_ids - x = x + self.positional_embedding(position_ids) - - hidden_states = x - - if past_key_value is None: - past_key_value = tuple([None] * len(self.layers)) - - if use_cache: - presents = [] - - if attention_mask is not None: - attention_mask = expand_mask(attention_mask, shape(input_ids.data, -1)) - hidden_states = RaggedTensor.from_row_lengths(hidden_states, input_ids.row_lengths, input_ids.max_row_length) - - def _forward_has_argument(layer, argument_name): - return argument_name in inspect.signature(layer.forward).parameters - - for idx, (layer, past, pointers) in enumerate(zip(self.layers, past_key_value, kv_cache_block_pointers)): - # In TRT LLM, not all model decoders are with the same forward arg signature. - # So we check arg compatibility and optionally add them if supported. - # In case the decoder forward signature changes, this if branch list below will need to be updated. - additional_inputs = {} - if _forward_has_argument(layer, "inflight_batching_args"): - additional_inputs["inflight_batching_args"] = inflight_batching_args - if _forward_has_argument(layer, "past_key_value_pointers"): - additional_inputs["past_key_value_pointers"] = ( - (None if inflight_batching_args is None else inflight_batching_args.past_key_value_pointers[idx]), - ) - if _forward_has_argument(layer, "pointers_to_kv_cache_block_pointers"): - additional_inputs["pointers_to_kv_cache_block_pointers"] = ( - ( - None - if ( - inflight_batching_args is None - or inflight_batching_args.pointers_to_kv_cache_block_pointers is None - ) - else inflight_batching_args.pointers_to_kv_cache_block_pointers[idx] - ), - ) - - hidden_states = layer( - hidden_states, - past_key_value=past, - sequence_length=sequence_length, - host_past_key_value_lengths=host_past_key_value_lengths, - use_cache=use_cache, - attention_mask=attention_mask, - cache_indirection=cache_indirection, - kv_cache_block_pointers=pointers, - context_lengths=context_lengths, - host_context_lengths=host_context_lengths, - host_request_types=host_request_types, - max_context_length=max_context_length, - **additional_inputs, - ) - - if use_cache: - presents.append(hidden_states[1]) - hidden_states = hidden_states[0] - - hidden_states = self.ln_f(hidden_states.data) - - if use_cache: - return (hidden_states, tuple(presents)) - return hidden_states - - -class LMHeadModelBuilder(ModelBuilder, GenerationMixin): - """The implementation of the model builder with an LMHead.""" - - def __init__(self, model_config: ModelConfig): - """Initializes the LMHeadModelBuilder from a model_config.""" - super().__init__(model_config) - - # TODO: Add support for share_embedding_table - share_embedding_table = False - share_weight = None - if share_embedding_table: - share_weight = self.embedding.vocab_embedding.weight - self.lm_head = ColumnLinear( - self._hidden_size, - model_config.vocab_size_padded, - bias=False, - dtype=self._dtype, - tp_group=get_tensor_parallel_group(self._tensor_parallel), - tp_size=self._tensor_parallel, - gather_output=True, - share_weight=share_weight, - ) - self.lm_head.weight.value = model_config.lm_head.weight - if model_config.quantization: - self.lm_head = quantize_linear(self.lm_head, model_config.quantization, model_config.lm_head) - - def forward( - self, - input_ids, - position_ids, - past_key_value=None, - sequence_length=None, - host_past_key_value_lengths=None, - use_cache=False, - last_token_ids=None, - attention_mask=None, - cache_indirection=None, - kv_cache_block_pointers=None, - prompt_embedding_table=None, - prompt_tasks=None, - prompt_vocab_size=None, - inflight_batching_args=None, - context_lengths=None, - host_context_lengths=None, - host_request_types=None, - max_context_length=None, - ): - """Forward function for the full LMHead model.""" - assert last_token_ids is not None, "Expecting last token ids to be not None" - hidden_states = super().forward( - input_ids, - position_ids, - past_key_value, - sequence_length, - host_past_key_value_lengths, - use_cache, - attention_mask, - cache_indirection, - kv_cache_block_pointers, - prompt_embedding_table, - prompt_tasks, - prompt_vocab_size, - inflight_batching_args, - context_lengths, - host_context_lengths, - host_request_types, - max_context_length, - ) - - if use_cache: - hidden_states, presents = hidden_states - - hidden_states = gather_last_token_logits( - hidden_states, last_token_ids, default_net().plugin_config.remove_input_padding - ) - - # [batch_size, hidden_size] -> [batch_size, vocab_size] - lm_logits = self.lm_head(hidden_states) - lm_logits.mark_output("logits", str_dtype_to_trt("float16")) - # out_inter.mark_output('inter', str_dtype_to_trt('float32')) - - if use_cache: - for i, present in enumerate(presents): - present.mark_output(f"present_key_value_{i}", self._kv_dtype) - return (lm_logits, presents) - - return lm_logits - - def prepare_inputs( - self, - max_batch_size, - max_input_len, - max_new_tokens, - use_cache, - max_beam_width: int = 1, - paged_kv_cache: bool = False, - tokens_per_block: int = 64, - prompt_embedding_table_size: int = 128, - ): - """@brief: Prepare inputs Tensors for the model. - - The given sizes are used to determine the - ranges of the dimensions of when using TRT dynamic shapes. - - @return: a list contains values which can be fed into the self.forward() - """ - # Prepare inputs - head_size = self._hidden_size // self._num_heads - num_heads_kv = (self._num_kv_heads + self._tensor_parallel - 1) // self._tensor_parallel - remove_input_padding = default_net().plugin_config.remove_input_padding - use_gpt_attention_plugin = default_net().plugin_config.gpt_attention_plugin - use_ib_gpt_attention_plugin = default_net().plugin_config.inflight_batching_gpt_attention_plugin - - model_inputs = self.prepare_basic_inputs( - max_batch_size, - max_beam_width, - max_input_len, - max_new_tokens, - num_heads_kv, - head_size, - self._num_layers, - self._kv_dtype, - remove_input_padding=remove_input_padding, - use_gpt_attention_plugin=use_gpt_attention_plugin, - use_ib_gpt_attention_plugin=use_ib_gpt_attention_plugin, - paged_kv_cache=paged_kv_cache, - tokens_per_block=tokens_per_block, - ) - - bb_range = [1, (max_batch_size * max_beam_width + 1) // 2, max_batch_size * max_beam_width] - p_embedding_range = [1, prompt_embedding_table_size // 2, prompt_embedding_table_size] - num_tokens_range = [ - 1, - max_batch_size * max_beam_width, - max(max_input_len * max_batch_size, max_beam_width * max_batch_size), - ] - bs_range = [1, (max_batch_size + 1) // 2, max_batch_size] - - prompt_embedding_table = None - tasks = None - prompt_vocab_size = None - if self._use_prompt_tuning: - prompt_embedding_table = Tensor( - name="prompt_embedding_table", - dtype=self._dtype, - shape=[-1, self._hidden_size], - dim_range=OrderedDict( - [("prompt_embedding_table_size", [p_embedding_range]), ("hidden_size", [self._hidden_size]),] - ), - ) - if remove_input_padding: - tasks = Tensor( - name="tasks", - dtype=trt.int32, - shape=[1, -1], - dim_range=OrderedDict([("batch_size_fake", [1]), ("input_len_task", [num_tokens_range]),]), - ) - else: - tasks = Tensor( - name="tasks", - dtype=trt.int32, - shape=[-1, 1], - dim_range=OrderedDict([("batch_size_beam_width", [bb_range]), ("broadcast_dim", [1]),]), - ) - prompt_vocab_size = Tensor( - name="prompt_vocab_size", dtype=trt.int32, shape=[1], dim_range=OrderedDict([("size", [1])]), - ) - - inflight_batching_args = None - if use_ib_gpt_attention_plugin: - past_key_value_pointers = [] - pointers_to_kv_cache_block_pointers = [] - for i in range(self._num_layers): - kv = Tensor( - name=f"past_key_value_pointers_{i}", - dtype=trt.int32, - # 2 INT32s for representing a single INT64 pointer - shape=[-1, 2], - dim_range=OrderedDict(batch_size_kv=[bs_range], pointer_width=[2]), - ) - past_key_value_pointers.append(kv) - - if paged_kv_cache: - # [nbReq, 2] - pkv = Tensor( - name=f"pointers_to_kv_cache_block_pointers_{i}", - dtype=trt.int32, - # 2 INT32s for representing a single INT64 pointer - shape=[-1, 2], - dim_range=OrderedDict(batch_size_cp=[bs_range], pointer_width=[2]), - ) - pointers_to_kv_cache_block_pointers.append(pkv) - - inflight_batching_args = InflightBatchingParam( - # [nbReq] - host_context_lengths=Tensor( - name="host_context_lengths", - dtype=trt.int32, - shape=[-1], - dim_range=OrderedDict(batch_size_hscl=[bs_range]), - ), - # [nbSeq] - context_lengths=Tensor( - name="context_lengths", - dtype=trt.int32, - shape=[-1], - dim_range=OrderedDict(batch_size_context_lengths=[bs_range]), - ), - # [nbReq] - host_beam_widths=Tensor( - name="beam_widths", dtype=trt.int32, shape=[-1], dim_range=OrderedDict(batch_size_bw=[bs_range]), - ), - # [nbReq, 2] - cache_indir_pointers=Tensor( - name="cache_indir_pointers", - dtype=trt.int32, - # 2 INT32s for representing a single INT64 pointer - shape=[-1, 2], - dim_range=OrderedDict(batch_size_cp=[bs_range], pointer_width=[2]), - ), - # [nbReq] - host_req_cache_max_seq_lengths=Tensor( - name="req_cache_max_seq_lengths", - dtype=trt.int32, - shape=[-1], - dim_range=OrderedDict(batch_size_rcmsl=[bs_range]), - ), - max_input_length=max_input_len, - max_beam_width=max_beam_width, - use_int8_kv_cache=self.quant_mode.has_int8_kv_cache(), - past_key_value_pointers=past_key_value_pointers, - pointers_to_kv_cache_block_pointers=( - None if not paged_kv_cache else pointers_to_kv_cache_block_pointers - ), - ) - - return ( - model_inputs["input_ids"], - model_inputs["position_ids"], - model_inputs["past_key_value"], - model_inputs["sequence_length"], - model_inputs["host_past_key_value_lengths"], - True, - model_inputs["last_token_ids"], - model_inputs["attention_mask"], - model_inputs["cache_indirection"], - model_inputs["kv_cache_block_pointers_list"], - prompt_embedding_table, - tasks, - prompt_vocab_size, - inflight_batching_args, - model_inputs["context_lengths"], - model_inputs["host_context_lengths"], - model_inputs["host_request_types"], - max_input_len, - ) - - def build( - self, - output_dir: Path, - timing_cache: str = "", - log_level: str = "info", - max_batch_size: int = 1, - max_input_len: int = 200, - max_output_len: int = 200, - max_beam_width: int = 1, - parallel_build: bool = False, - max_prompt_embedding_table_size: int = 0, - ): - """Builds the model and generate the tensorrt_llm engine. - - Args: - timing_cache: the name of the tensorrt timing cache file inside the output_dir. - log_level: the logging level. - max_batch_size: the max batch size of the deployed model engine. - max_input_len: the max length of the input tokens. - max_output_len: the max length of the output tokens. - max_beam_width: the max beam search width. - output_dir: the output directory where we save the generated tensorrt_llm engine file. - """ - # Uncomment the following to print the network for debugging purpose. - # self.print() - - if self.rank < torch.cuda.device_count(): - print(f"warning: Rank {self.rank} larger than GPUs available") - if self._tensor_parallel < torch.cuda.device_count(): - print(f"warning: Not enough GPUs locally, requesting {self._tensor_parallel}") - - build( - tensorrt_llm_model=self, - output_dir=output_dir, - rank=self.rank, - world_size=self._tensor_parallel, - dtype=trt_dtype_to_str(self._dtype), - timing_cache=timing_cache, - log_level=log_level, - max_batch_size=max_batch_size, - max_input_len=max_input_len, - max_output_len=max_output_len, - max_beam_width=max_beam_width, - max_prompt_embedding_table_size=max_prompt_embedding_table_size, - parallel_build=parallel_build, - gpus_per_node=torch.cuda.device_count(), - quantization=self.quantization, - ) - - def print(self): - """Debugging print of the tensorrt_llm network.""" - np.set_printoptions(threshold=36) - print_tensorrt_llm(f"rank.{self.rank}", self) diff --git a/nemo/export/trt_llm/tensorrt_llm_run.py b/nemo/export/trt_llm/tensorrt_llm_run.py deleted file mode 100644 index c2a8626fdf13..000000000000 --- a/nemo/export/trt_llm/tensorrt_llm_run.py +++ /dev/null @@ -1,317 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Defines the tensorrt_llm inference API that can support both single and multiple GPU LLM inferences. - -Referrence impl in tensorrt_llm: examples/llama/summarize.py. -""" - -import json -import os -from dataclasses import dataclass -from pathlib import Path -from typing import List, Optional - -import tensorrt_llm -import torch -from mpi4py.futures import MPIPoolExecutor -from tensorrt_llm.runtime import ModelConfig, SamplingConfig -from transformers import PreTrainedTokenizer - -from .tensorrt_llm_build import get_engine_name, MODEL_NAME # isort:skip - - -@dataclass -class TensorrtLLMHostContext: - """The host side context for TRT LLM inference.""" - - executor: MPIPoolExecutor = None - tensor_parallel: int = 1 - tokenizer: PreTrainedTokenizer = None - max_batch_size: int = 0 - max_input_len: int = 0 - - -@dataclass -class TensorrtLLMWorkerContext: - """The MPI worker side context for TRT LLM inference.""" - - decoder: tensorrt_llm.runtime.GenerationSession = None - sampling_config: SamplingConfig = None - max_batch_size: int = 0 - max_input_len: int = 0 - - -# This is a global context that will be initialized during the model loading process as MPI worker. -tensorrt_llm_worker_context = TensorrtLLMWorkerContext() - - -def _read_config(config_path: Path): - with open(config_path, "r") as f: - config = json.load(f) - use_gpt_attention_plugin = config["plugin_config"]["gpt_attention_plugin"] - ib_gpt_attention_plugin = config["plugin_config"]["inflight_batching_gpt_attention_plugin"] - remove_input_padding = config["plugin_config"]["remove_input_padding"] - world_size = config["builder_config"]["tensor_parallel"] - assert ( - world_size == tensorrt_llm.mpi_world_size() - ), f"Engine world size ({world_size}) != Runtime world size ({tensorrt_llm.mpi_world_size()})" - - assert world_size <= torch.cuda.device_count(), f"Not enough GPUs, requesting {world_size}" - - num_heads = config["builder_config"]["num_heads"] - num_kv_heads = config["builder_config"].get("num_kv_heads", num_heads) - hidden_size = config["builder_config"]["hidden_size"] // world_size - vocab_size = config["builder_config"]["vocab_size"] - num_layers = config["builder_config"]["num_layers"] - paged_kv_cache = config["plugin_config"]["paged_kv_cache"] - tokens_per_block = config["builder_config"]["tokens_per_block"] - use_prompt_tuning = config["builder_config"]["use_prompt_tuning"] - - num_heads = num_heads // world_size - num_kv_heads = (num_kv_heads + world_size - 1) // world_size - - model_config = ModelConfig( - num_heads=num_heads, - num_kv_heads=num_kv_heads, - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - gpt_attention_plugin=use_gpt_attention_plugin, - ib_gpt_attention_plugin=ib_gpt_attention_plugin, - remove_input_padding=remove_input_padding, - paged_kv_cache=paged_kv_cache, - tokens_per_block=tokens_per_block, - use_prompt_tuning=use_prompt_tuning, - ) - - dtype = config["builder_config"]["precision"] - max_input_len = config["builder_config"]["max_input_len"] - max_batch_size = config["builder_config"]["max_batch_size"] - - return model_config, world_size, dtype, max_input_len, max_batch_size - - -def _load(tokenizer: PreTrainedTokenizer, engine_dir, num_beams=1): - """The impl of `load` API for on a single GPU worker.""" - try: - tensorrt_llm.logger.set_level("info") - - engine_dir = Path(engine_dir) - config_path = engine_dir / "config.json" - model_config, world_size, dtype, max_input_len, max_batch_size = _read_config(config_path) - - runtime_rank = tensorrt_llm.mpi_rank() - - assert runtime_rank < torch.cuda.device_count(), f"Rank {runtime_rank} out of bound" - - runtime_mapping = tensorrt_llm.Mapping(world_size, runtime_rank) - torch.cuda.set_device(runtime_rank % runtime_mapping.gpus_per_node) - - engine_name = get_engine_name(MODEL_NAME, dtype, world_size, runtime_rank) - serialize_path = os.path.join(engine_dir, engine_name) - - with open(serialize_path, "rb") as f: - engine_buffer = f.read() - decoder = tensorrt_llm.runtime.GenerationSession( - model_config, engine_buffer, runtime_mapping, debug_mode=False - ) - - sampling_config = SamplingConfig( - end_id=tokenizer.eos_token_id, pad_id=tokenizer.eos_token_id, num_beams=num_beams - ) - - # Initialize the global context so it can be used during `run` API. - global tensorrt_llm_worker_context - tensorrt_llm_worker_context.decoder = decoder - tensorrt_llm_worker_context.sampling_config = sampling_config - tensorrt_llm_worker_context.max_batch_size = max_batch_size - tensorrt_llm_worker_context.max_input_len = max_input_len - - except Exception as e: - print(e) - raise e - - -def _forward( - input_tensors: List[torch.IntTensor], - max_output_len: int, - top_k: int = 1, - top_p: float = 0.0, - temperature: float = 1.0, - prompt_table=None, - task_vocab_size=None, -) -> Optional[torch.IntTensor]: - """The impl of `forward` API for on a single GPU worker with tensor as IO. - - Returns: - the output tokens tensor with shape [batch_size, num_beams, output_len]. - """ - try: - # Loading the global context initialized from the `load` API. - global tensorrt_llm_worker_context - decoder = tensorrt_llm_worker_context.decoder - sampling_config = tensorrt_llm_worker_context.sampling_config - max_batch_size = tensorrt_llm_worker_context.max_batch_size - max_input_len = tensorrt_llm_worker_context.max_input_len - - batch_size = len(input_tensors) - assert batch_size <= max_batch_size, f"batch size {batch_size} exceedng max batch size {max_batch_size}" - input_lengths = [t.shape[0] for t in input_tensors] - max_length = max(input_lengths) - assert max_length <= max_input_len, f"input length {max_length} exceedng max input length {max_input_len}" - pad_id = sampling_config.pad_id - - if decoder.remove_input_padding: - line_encoded = [torch.tensor(t, dtype=torch.int32).cuda() for t in input_tensors] - else: - line_encoded = torch.nested.to_padded_tensor( - torch.nested.nested_tensor(input_tensors, dtype=torch.int32), pad_id - ).cuda() - input_lengths = torch.tensor(input_lengths, dtype=torch.int32).cuda() - - if prompt_table is None: - ptuning_args = [] - else: - if task_vocab_size is None: - raise Exception("task_vocab_size cannot be None") - - task_vocab_size = torch.tensor([task_vocab_size], dtype=torch.int32, device="cuda") - - if isinstance(line_encoded, list): - le_size = len(line_encoded) - else: - le_size = line_encoded.size(0) - tasks = torch.zeros(le_size, 1).cuda() - - ptuning_args = [prompt_table, tasks, task_vocab_size] - - with torch.no_grad(): - sampling_config.top_k = top_k - sampling_config.top_p = top_p - sampling_config.temperature = temperature - - decoder.setup(batch_size, max_context_length=max_length, max_new_tokens=max_output_len) - - if decoder.remove_input_padding: - output_ids = decoder.decode_batch(line_encoded, sampling_config) - else: - output_ids = decoder.decode(line_encoded, input_lengths, sampling_config, *ptuning_args,) - - torch.cuda.synchronize() - - runtime_rank = tensorrt_llm.mpi_rank() - if runtime_rank == 0: - return output_ids - else: - return None - - except Exception as e: - print(e) - raise e - - -def load(tokenizer: PreTrainedTokenizer, engine_dir: str, num_beams: int = 1) -> TensorrtLLMHostContext: - """Loaded the compiled LLM model and run it. - - It also supports running the TRT LLM model on multi-GPU. - """ - config_path = os.path.join(engine_dir, "config.json") - with open(config_path, "r") as f: - config = json.load(f) - tensor_parallel = config["builder_config"]["tensor_parallel"] - if tensor_parallel == 1: - _load(tokenizer, engine_dir, num_beams) - executor = None - else: - executor = MPIPoolExecutor(max_workers=tensor_parallel) - futures = [] - for _ in range(tensor_parallel): - future = executor.submit(_load, tokenizer, engine_dir, num_beams) - futures.append(future) - for future in futures: - future.result() - - max_batch_size = config["builder_config"]["max_batch_size"] - max_input_len = config["builder_config"]["max_input_len"] - - return TensorrtLLMHostContext( - executor=executor, - tensor_parallel=tensor_parallel, - tokenizer=tokenizer, - max_batch_size=max_batch_size, - max_input_len=max_input_len, - ) - - -def forward( - input_tensors: List[torch.IntTensor], - max_output_len: int, - host_context: TensorrtLLMHostContext, - top_k: int = 1, - top_p: float = 0.0, - temperature: float = 1.0, - prompt_table=None, - task_vocab_size=None, -) -> Optional[torch.IntTensor]: - """Run the loaded model with the host_context provided from the `load` API.""" - batch_size = len(input_tensors) - max_batch_size = host_context.max_batch_size - assert batch_size <= max_batch_size, f"batch size {batch_size} exceedng max batch size {max_batch_size}" - max_length = max([t.shape[0] for t in input_tensors]) - max_input_len = host_context.max_input_len - assert max_length <= max_input_len, f"input length {max_length} exceedng max input length {max_input_len}" - - tensor_parallel = host_context.tensor_parallel - if tensor_parallel == 1: - return _forward(input_tensors, max_output_len, top_k, top_p, temperature, prompt_table, task_vocab_size) - else: - executor = host_context.executor - futures = [] - for _ in range(tensor_parallel): - future = executor.submit( - _forward, input_tensors, max_output_len, top_k, top_p, temperature, prompt_table, task_vocab_size - ) - futures.append(future) - for future in futures: - result = future.result() - if result is not None: - return result - - raise RuntimeError("Internal error") - - -def generate( - input_texts: List[torch.IntTensor], - max_output_len: int, - host_context: TensorrtLLMHostContext, - top_k: int = 1, - top_p: float = 0.0, - temperature: float = 1.0, - prompt_table=None, - task_vocab_size=None, -) -> Optional[List[List[str]]]: - """Generate the output sequence from the input sequence. - - Returns a 2D string list with shape [batch_size, num_beams]. - """ - tokenizer = host_context.tokenizer - input_tensors = [torch.IntTensor(tokenizer.encode(t, add_special_tokens=False)) for t in input_texts] - output_tensor = forward( - input_tensors, max_output_len, host_context, top_k, top_p, temperature, prompt_table, task_vocab_size - ) - assert output_tensor is not None - - input_lengths = [t.shape[0] for t in input_tensors] - output_lines_list = [ - tokenizer.batch_decode(output_tensor[b, :, input_lengths[b] :], skip_special_tokens=True) - for b in range(output_tensor.shape[0]) - ] - return output_lines_list diff --git a/nemo/export/trt_llm/tensorrt_llm_utils.py b/nemo/export/trt_llm/tensorrt_llm_utils.py deleted file mode 100644 index 3827e38a8db0..000000000000 --- a/nemo/export/trt_llm/tensorrt_llm_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NvidiaProprietary -# -# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual -# property and proprietary rights in and to this material, related -# documentation and any modifications thereto. Any use, reproduction, -# disclosure or distribution of this material and related documentation -# without an express license agreement from NVIDIA CORPORATION or -# its affiliates is strictly prohibited. - -"""Utils to convert model_config layers to tensorrt_llm modules.""" - -import tensorrt as trt -from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm -from tensorrt_llm.module import Module - -from .model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig -from .tensor_utils import get_tensor_parallel_group - - -def build_embedding_from_config( - config: EmbeddingConfig, dtype: trt.DataType, tensor_parallel: int = 1, use_prompt_tuning: bool = False -): - """Returns the tensorrt_llm embedding layer from the embedding config.""" - # If the config is empty, return an empty impl. - if config is None: - return None - EmbeddingCls = PromptTuningEmbedding if use_prompt_tuning else Embedding - trt_embedding = EmbeddingCls( - config.weight.shape[0], - config.weight.shape[1], - dtype=dtype, - tp_size=tensor_parallel, - tp_group=get_tensor_parallel_group(tensor_parallel), - ) - trt_embedding.weight.value = config.weight - return trt_embedding - - -def build_layernorm_from_config(config: LayernormConfig, dtype: trt.DataType): - """Returns the tensorrt_llm layernorm layer from the torch layernorm.""" - # If the config is empty, return an empty impl. - if config is None: - return None - - if config.layernorm_type == LAYERNORM_DEFAULT: - trt_layernorm = LayerNorm(normalized_shape=config.weight.shape[0], dtype=dtype) - trt_layernorm.weight.value = config.weight - trt_layernorm.bias.value = config.bias - elif config.layernorm_type == LAYERNORM_RMS: - trt_layernorm = RmsNorm(normalized_shape=config.weight.shape[0], dtype=dtype) - trt_layernorm.weight.value = config.weight - else: - raise NotImplementedError(f"{config.layernorm_type} not supported") - return trt_layernorm - - -def print_tensorrt_llm(name: str, tensorrt_llm_module: Module): - """Prints the tensorrt llm structure including weights and related data for debugging purpose.""" - for tensor_name in [ - "weight", - "bias", - "activation_scaling_factor", - "weights_scaling_factor", - "prequant_scaling_factor", - ]: - if hasattr(tensorrt_llm_module, tensor_name): - tensor = getattr(tensorrt_llm_module, tensor_name) - if tensor is not None: - print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}") - - for k, v in tensorrt_llm_module.named_children(): - print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v) diff --git a/nemo/export/utils.py b/nemo/export/utils.py deleted file mode 100644 index 8a9691afe1d7..000000000000 --- a/nemo/export/utils.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import datetime -import logging -import os -import pathlib -import tarfile -import tempfile -import typing -from pathlib import Path - -import numpy as np -import torch -import yaml - -log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" -logging.basicConfig(format=log_format) -LOGGER = logging.getLogger(__name__) - -# numpy doesn't know bfloat16, define abstract binary type instead -np_bfloat16 = np.dtype('V2', metadata={"dtype": "bfloat16"}) - - -def unpack_nemo_ckpt( - nemo_archive_path: typing.Union[str, pathlib.Path], out_dir_path: typing.Union[str, pathlib.Path], -): - nemo_archive_path = pathlib.Path(nemo_archive_path) - if not nemo_archive_path.exists(): - raise FileNotFoundError(f"{nemo_archive_path} does not exist") - - for tar_mode in ["r:", "r:gz"]: - try: - with tarfile.open(nemo_archive_path, mode=tar_mode) as tar_file: - - def is_within_directory(directory, target): - - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - - prefix = os.path.commonprefix([abs_directory, abs_target]) - - return prefix == abs_directory - - def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - - for member in tar.getmembers(): - member_path = os.path.join(path, member.name) - if not is_within_directory(path, member_path): - raise Exception("Attempted Path Traversal in Tar File") - - tar.extractall(path, members, numeric_owner=numeric_owner) - - safe_extract(tar_file, path=out_dir_path) - return out_dir_path - except tarfile.ReadError: - pass - - raise RuntimeError(f"Could not unpack {nemo_archive_path}") - - -def prompt_convert(prompt_config, prompt_weights): - if "task_templates" in prompt_config: - prompt_templates = prompt_config["task_templates"] - actual_task_id = 0 - vtokens_embeddings = [] - vtokens_len = [] - for task_name_id, prompt_task in enumerate(prompt_templates): - prompt_task_name = prompt_task["taskname"] - LOGGER.info(f"Task {actual_task_id}: {prompt_task['taskname']}") - prompt_task_weights = prompt_weights["prompt_table"].get( - f"prompt_table.{prompt_task_name}.prompt_embeddings.weight" - ) - if prompt_task_weights is None: - continue - vtokens_embeddings.append(prompt_task_weights) - vtokens_len.append(prompt_task_weights.shape[0]) - actual_task_id += 1 - - max_vtoken_len = max(vtokens_len) - embedding_dim = vtokens_embeddings[0].shape[1] - - # pad tasks to longest task embedding table - for i, vtoken_emb_table in enumerate(vtokens_embeddings): - padded_table = torch.zeros((max_vtoken_len, embedding_dim)) - padded_table[: vtoken_emb_table.shape[0], :] = vtoken_emb_table - vtokens_embeddings[i] = padded_table - - vtokens_embeddings = torch.stack(vtokens_embeddings) - else: - vtokens_embeddings = prompt_weights["prompt_embeddings_weights"] - - return vtokens_embeddings - - -def torch_to_numpy(x): - if x.dtype != torch.bfloat16: - return x.numpy() - return x.view(torch.int16).numpy().view(np_bfloat16) - - -def cpu_map_location(storage, loc): - return storage.cpu() - - -def is_nemo_file(path): - flag = False - - if path is not None: - if len(path) > 5: - pc = Path(path) - if pc.exists(): - if pc.is_file(): - if path[-5 : len(path)] == ".nemo": - flag = True - - return flag - - -def get_prompt_embedding_table(prompt_checkpoint_path): - - with tempfile.TemporaryDirectory() as prompt_out_dir: - prompt_out_dir = Path(prompt_out_dir) - unpack_nemo_ckpt(prompt_checkpoint_path, prompt_out_dir) - - model_weights_ckpt = "model_weights.ckpt" - with open(prompt_out_dir / "model_config.yaml") as f: - prompt_config = yaml.full_load(f) - LOGGER.debug(prompt_config) - - weight_path = prompt_out_dir / model_weights_ckpt - if not weight_path.exists(): - weight_path = prompt_out_dir / "mp_rank_00" / model_weights_ckpt - - prompt_weights = torch.load(weight_path, map_location=cpu_map_location,) - - return prompt_convert(prompt_config, prompt_weights) - - -def torch_to_numpy(x): - if x.dtype != torch.bfloat16: - return x.numpy() - return x.view(torch.int16).numpy().view(np_bfloat16) From 9a566be7a5d0b79215ad7aead02beca8cba09589 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 16 Oct 2023 14:30:08 -0700 Subject: [PATCH 342/512] Improve the unet ckpt loading logic. --- .../diffusionmodules/openaimodel.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index e46f0c84b38c..9c52198f1566 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -723,18 +723,16 @@ def __init__( ) if from_pretrained is not None: - if from_NeMo: - state_dict = torch.load(from_pretrained, map_location='cpu') - missing_key, _, _, _ = self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) - else: - state_dict = torch.load(from_pretrained, map_location='cpu') - if 'state_dict' in state_dict.keys(): - state_dict = state_dict['state_dict'] - missing_key, _, _, _ = self._load_pretrained_model(state_dict) + state_dict = torch.load(from_pretrained, map_location='cpu') + if 'state_dict' in state_dict.keys(): + state_dict = state_dict['state_dict'] + missing_key, unexpected_keys, _, _ = self._load_pretrained_model(state_dict, from_NeMo=from_NeMo) if len(missing_key) > 0: print( 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' ) + print(f"Missing keys: {missing_key}") + print(f"Unexpected keys: {unexpected_keys}") if enable_amp_o2_fp16: self.convert_to_fp16() From 7a0ae363e89e5da2c09db304fea9aa8614a644e9 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Mon, 16 Oct 2023 14:30:08 -0700 Subject: [PATCH 343/512] Improve the unet ckpt loading logic. --- .../diffusionmodules/openaimodel.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index e46f0c84b38c..9c52198f1566 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -723,18 +723,16 @@ def __init__( ) if from_pretrained is not None: - if from_NeMo: - state_dict = torch.load(from_pretrained, map_location='cpu') - missing_key, _, _, _ = self._load_pretrained_model(state_dict['state_dict'], from_NeMo=True) - else: - state_dict = torch.load(from_pretrained, map_location='cpu') - if 'state_dict' in state_dict.keys(): - state_dict = state_dict['state_dict'] - missing_key, _, _, _ = self._load_pretrained_model(state_dict) + state_dict = torch.load(from_pretrained, map_location='cpu') + if 'state_dict' in state_dict.keys(): + state_dict = state_dict['state_dict'] + missing_key, unexpected_keys, _, _ = self._load_pretrained_model(state_dict, from_NeMo=from_NeMo) if len(missing_key) > 0: print( 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' ) + print(f"Missing keys: {missing_key}") + print(f"Unexpected keys: {unexpected_keys}") if enable_amp_o2_fp16: self.convert_to_fp16() From 576c65217bcdc8f6efcb40e223b55e9cde162a0a Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 20:15:28 -0700 Subject: [PATCH 344/512] Add checkpoint_averaging script --- .../distributed_checkpoint_averaging.py | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 scripts/checkpoint_averaging/distributed_checkpoint_averaging.py diff --git a/scripts/checkpoint_averaging/distributed_checkpoint_averaging.py b/scripts/checkpoint_averaging/distributed_checkpoint_averaging.py new file mode 100644 index 000000000000..6939cc9b36b5 --- /dev/null +++ b/scripts/checkpoint_averaging/distributed_checkpoint_averaging.py @@ -0,0 +1,160 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example: python scripts/checkpoint_averaging/distributed_checkpoint_averaging.py \ + --name_prefix= \ + --checkpoint_dir= + --steps + +will generate a new directory in each of the distributed checkpoint subfolders named -averaged +""" + +import argparse +import logging +import os +import shutil + +import zarr + +logging.basicConfig(level=logging.INFO) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--name_prefix', help='Name of the final checkpoint. Will append -averaged automatically.', + ) + parser.add_argument( + '--checkpoint_dir', help='Folder containing all the distributed checkpoints.', + ) + # list of checkpoint steps to average + parser.add_argument( + '--steps', + nargs='+', + type=int, + help='List of checkpoint steps to average. If not specified, will average all.', + ) + + args = parser.parse_args() + + if args.steps is not None: + logging.info(f"Will average only steps {args.steps}") + + # repeating for all ranks + + checkpoint_paths = [] + for ckpt_dir in os.listdir(args.checkpoint_dir): + logging.info("Processing %s", ckpt_dir) + if ckpt_dir.endswith('0-last'): + continue + if args.steps is None: + checkpoint_paths.append(ckpt_dir) + else: + for step in args.steps: + key = f"-step={step}-" + if key in ckpt_dir: + checkpoint_paths.append(ckpt_dir) + + n = len(checkpoint_paths) + # initialize dict, will be used to store the weights that need to be averaged + avg_weights = {} + + logging.info(f"Averaging {n} checkpoints ... {'at steps:' + str(args.steps) if args.steps is not None else ''}") + + # item that needs to be copied to the new checkpoint folder + copy_items = [] + for ix, path in enumerate(checkpoint_paths): + full_path = os.path.join(args.checkpoint_dir, path) + + for item in os.listdir(full_path): + + # if item is not a directory, skip it + if not os.path.isdir(os.path.join(full_path, item)): + if ix == 0: + copy_items.append(os.path.join(full_path, item)) + continue + + # transformer engine states, leave them out + if item.endswith('._extra_state'): + if ix == 0: + copy_items.append(os.path.join(full_path, item)) + continue + + # optimizer states, no point of averaing them + if item.startswith('optimizer.'): + if ix == 0: + copy_items.append(os.path.join(full_path, item)) + continue + + if item not in avg_weights: + logging.info(f"Initialized average weights dict with: {item}") + avg_weights[item] = zarr.open(os.path.join(full_path, item), mode='r') + else: + logging.info(f"Updated average weights dict with weight: {item}") + array_z = zarr.open(os.path.join(full_path, item), mode='r') + sum_array = avg_weights[item][:] + array_z[:] + avg_weights[item] = zarr.array(sum_array, chunks=array_z.chunks, dtype=array_z.dtype) + + for k in avg_weights: + logging.info(f"Average weights dict key : {k}, dtype : {avg_weights[k].dtype}, shape : {avg_weights[k].shape}") + if str(avg_weights[k].dtype).startswith("int"): + raise ValueError("Int type not supported") + else: + array_z = avg_weights[k][:] + array_z = array_z / n + avg_weights[k] = zarr.array(array_z, chunks=avg_weights[k].chunks, dtype=avg_weights[k].dtype) + + # Save model + if args.steps is None: + ckpt_name = os.path.join(args.checkpoint_dir, args.name_prefix + '-averaged') + else: + steps_combined = '_'.join([str(x) for x in args.steps]) + ckpt_name = os.path.join(args.checkpoint_dir, args.name_prefix + '-' + steps_combined + '-averaged') + + # save avg_weights + for k in avg_weights: + logging.info(f"Saving {k} to {ckpt_name}") + zarr.save(os.path.join(ckpt_name, k), avg_weights[k]) + + # copy other files + for item in copy_items: + is_file = os.path.isfile(item) + logging.info(f"Copying {'directory' if is_file else 'file'} {item} to {ckpt_name}") + if os.path.isfile(item): + # copy single file + shutil.copy(item, ckpt_name) + else: + # copy directory + shutil.copytree(item, os.path.join(ckpt_name, os.path.basename(item)), dirs_exist_ok=True) + + logging.info(f"Averaged distributed checkpoint saved as : {ckpt_name}") + + +if __name__ == '__main__': + main() From d6900f9bc1922d086e2e388dcec6e3bd2b0f59dc Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 16 Oct 2023 20:15:48 -0700 Subject: [PATCH 345/512] Hide multimodal code changes --- .../nsfw/conf/megatron_nsfw_config.yaml | 230 -- .../nsfw/conf/megatron_nsfw_infer.yaml | 12 - .../nsfw/megatron_nsfw_infer.py | 78 - .../nsfw/megatron_nsfw_pretrain.py | 60 - examples/multimodal/convert_ckpt_to_nemo.py | 197 -- .../clip/conf/megatron_clip_config.yaml | 250 -- .../conf/megatron_clip_imagenet_zeroshot.yaml | 17 - .../clip/conf/megatron_clip_infer.yaml | 13 - .../clip/convert_external_clip_to_nemo.py | 284 --- .../clip/megatron_clip_imagenet_zeroshot.py | 142 -- .../foundation/clip/megatron_clip_infer.py | 89 - .../foundation/clip/megatron_clip_pretrain.py | 49 - .../controlnet/conf/controlnet_infer.yaml | 36 - .../controlnet/conf/controlnet_v1-5.yaml | 220 -- .../generative/controlnet/controlnet_infer.py | 247 -- .../generative/controlnet/controlnet_train.py | 54 - .../generative/convert_hf_ckpt_to_nemo.py | 226 -- .../dreambooth/conf/dreambooth.yaml | 224 -- .../dreambooth/conf/dreambooth_infer.yaml | 32 - .../generative/dreambooth/dreambooth.py | 119 - .../generative/dreambooth/dreambooth_infer.py | 44 - .../multimodal/generative/imagen/README.md | 104 - .../generative/imagen/conf/base64-2b.yaml | 142 -- .../imagen/conf/base64-500m-edm.yaml | 136 -- .../generative/imagen/conf/base64-500m.yaml | 144 -- .../conf/base64-500m_online_encoding.yaml | 137 -- .../generative/imagen/conf/fid_inference.yaml | 26 - .../imagen/conf/imagen_fid_images.yaml | 57 - .../imagen/conf/inference_pipeline.yaml | 42 - .../generative/imagen/conf/sr1024-600m.yaml | 145 -- .../imagen/conf/sr256-400m-edm.yaml | 222 -- .../generative/imagen/conf/sr256-400m.yaml | 150 -- .../imagen/conf/sr256-450m-edm.yaml | 222 -- .../imagen/conf/sr256-600m-edm-noise.yaml | 142 -- .../imagen/conf/sr256-600m-edm.yaml | 219 -- .../generative/imagen/conf/sr256-600m.yaml | 146 -- .../generative/imagen/generate_fid_images.py | 104 - .../imagen/imagen_generate_images.py | 62 - .../generative/imagen/imagen_infer.py | 45 - .../generative/imagen/imagen_training.py | 71 - .../instruct_pix2pix/conf/sd_edit.yaml | 23 - .../instruct_pix2pix/conf/sd_finetune.yaml | 168 -- .../instruct_pix2pix/sd_edit_cli.py | 174 -- .../instruct_pix2pix/sd_finetune.py | 45 - .../generative/nerf/benchmark_callback.py | 82 - .../generative/nerf/config/config.yaml | 52 - .../nerf/config/model/background/random.yaml | 3 - .../nerf/config/model/background/static.yaml | 2 - .../nerf/config/model/background/tcnn.yaml | 19 - .../config/model/background/torchngp.yaml | 11 - .../nerf/config/model/data/data.yaml | 41 - .../nerf/config/model/dreamfusion-dmtet.yaml | 40 - .../nerf/config/model/dreamfusion.yaml | 40 - .../config/model/guidance/sd_huggingface.yaml | 4 - .../nerf/config/model/guidance/sd_nemo.yaml | 4 - .../nerf/config/model/guidance/sd_trt.yaml | 5 - .../nerf/config/model/loss/dmtet.yaml | 8 - .../nerf/config/model/loss/dreamfusion.yaml | 8 - .../config/model/material/basic_shading.yaml | 1 - .../nerf/config/model/nerf/tcnn.yaml | 32 - .../nerf/config/model/nerf/torchngp.yaml | 26 - .../nerf/config/model/optim/adan.yaml | 6 - .../nerf/config/model/renderer/nerfacc.yaml | 8 - .../config/model/renderer/nvdiffrast.yaml | 6 - .../model/renderer/torchngp_raymarching.yaml | 7 - examples/multimodal/generative/nerf/data.py | 73 - examples/multimodal/generative/nerf/main.py | 71 - .../stable_diffusion/conf/sd2_train.yaml | 192 -- .../stable_diffusion/conf/sd_fid_images.yaml | 45 - .../stable_diffusion/conf/sd_infer.yaml | 31 - .../stable_diffusion/conf/sd_train.yaml | 207 -- .../stable_diffusion/generate_fid_images.py | 83 - .../generative/stable_diffusion/sd_infer.py | 44 - .../generative/stable_diffusion/sd_train.py | 87 - .../mllm/kosmos/conf/kosmos_config.yaml | 317 --- .../multimodal/mllm/kosmos/kosmos_pretrain.py | 51 - .../mllm/neva/conf/llava_config.yaml | 213 -- .../mllm/neva/conf/neva_config.yaml | 212 -- .../mllm/neva/conf/neva_finetune.yaml | 209 -- .../mllm/neva/conf/neva_inference.yaml | 52 - .../multimodal/mllm/neva/conf/neva_peft.yaml | 215 -- .../mllm/neva/convert_hf_llava_to_neva.py | 343 --- .../multimodal/mllm/neva/neva_evaluation.py | 353 --- .../multimodal/mllm/neva/neva_finetune.py | 55 - examples/multimodal/mllm/neva/neva_peft.py | 56 - .../multimodal/mllm/neva/neva_pretrain.py | 47 - examples/vision/convert_ckpt_to_nemo.py | 160 -- .../megatron_vit_classification_config.yaml | 163 -- .../megatron_vit_classification_evaluate.yaml | 15 - .../megatron_vit_classification_infer.yaml | 12 - .../megatron_vit_classification_evaluate.py | 124 - .../megatron_vit_classification_finetune.py | 52 - .../megatron_vit_classification_infer.py | 145 -- .../megatron_vit_classification_pretrain.py | 44 - nemo/collections/multimodal/data/__init__.py | 13 - .../multimodal/data/clip/__init__.py | 13 - .../data/clip/augmentations/__init__.py | 13 - .../data/clip/augmentations/augmentations.py | 108 - .../multimodal/data/clip/clip_dataset.py | 194 -- .../data/clip/imagenet_zeroshot_data.py | 1100 --------- .../multimodal/data/common/__init__.py | 13 - .../multimodal/data/common/data_samplers.py | 133 - .../multimodal/data/common/webdataset.py | 286 --- .../multimodal/data/common/webdataset_s3.py | 237 -- .../data/controlnet/controlnet_dataset.py | 100 - .../data/dreambooth/dreambooth_dataset.py | 148 -- .../multimodal/data/imagen/__init__.py | 13 - .../data/imagen/augmentations/__init__.py | 13 - .../imagen/augmentations/augmentations.py | 76 - .../data/imagen/augmentations/corruption.py | 33 - .../multimodal/data/imagen/imagen_dataset.py | 156 -- .../data/instruct_pix2pix/__init__.py | 13 - .../data/instruct_pix2pix/edit_dataset.py | 132 - .../multimodal/data/kosmos/__init__.py | 0 .../multimodal/data/kosmos/kosmos_dataset.py | 366 --- .../multimodal/data/nerf/__init__.py | 0 .../multimodal/data/nerf/cameras.py | 178 -- .../multimodal/data/nerf/circle_poses.py | 214 -- .../multimodal/data/nerf/random_poses.py | 436 ---- .../collections/multimodal/data/nerf/utils.py | 204 -- .../multimodal/data/neva/__init__.py | 0 .../multimodal/data/neva/conversation.py | 406 --- .../multimodal/data/neva/neva_dataset.py | 584 ----- .../multimodal/data/nsfw/__init__.py | 13 - .../multimodal/data/nsfw/nsfw_dataset.py | 53 - .../data/stable_diffusion/__init__.py | 13 - .../stable_diffusion/augmentation/__init__.py | 13 - .../augmentation/augmentations.py | 71 - .../stable_diffusion_dataset.py | 184 -- .../collections/multimodal/losses/__init__.py | 13 - .../multimodal/losses/clip_loss.py | 112 - .../collections/multimodal/models/__init__.py | 13 - .../multimodal/models/clip/__init__.py | 13 - .../models/clip/megatron_clip_models.py | 1017 -------- .../models/content_filter/__init__.py | 13 - .../megatron_nsfw_clip_models.py | 398 --- .../multimodal/models/controlnet/__init__.py | 13 - .../models/controlnet/controlnet.py | 1002 -------- .../models/controlnet/uniformer/LICENSE | 203 -- .../models/controlnet/uniformer/__init__.py | 33 - .../configs/_base_/datasets/ade20k.py | 58 - .../configs/_base_/datasets/chase_db1.py | 64 - .../configs/_base_/datasets/cityscapes.py | 50 - .../_base_/datasets/cityscapes_769x769.py | 32 - .../configs/_base_/datasets/drive.py | 64 - .../uniformer/configs/_base_/datasets/hrf.py | 64 - .../configs/_base_/datasets/pascal_context.py | 64 - .../_base_/datasets/pascal_context_59.py | 64 - .../configs/_base_/datasets/pascal_voc12.py | 61 - .../_base_/datasets/pascal_voc12_aug.py | 8 - .../configs/_base_/datasets/stare.py | 64 - .../configs/_base_/default_runtime.py | 15 - .../configs/_base_/models/ann_r50-d8.py | 48 - .../configs/_base_/models/apcnet_r50-d8.py | 46 - .../configs/_base_/models/ccnet_r50-d8.py | 46 - .../uniformer/configs/_base_/models/cgnet.py | 54 - .../configs/_base_/models/danet_r50-d8.py | 46 - .../configs/_base_/models/deeplabv3_r50-d8.py | 46 - .../_base_/models/deeplabv3_unet_s5-d16.py | 52 - .../_base_/models/deeplabv3plus_r50-d8.py | 48 - .../configs/_base_/models/dmnet_r50-d8.py | 46 - .../configs/_base_/models/dnl_r50-d8.py | 48 - .../configs/_base_/models/emanet_r50-d8.py | 49 - .../configs/_base_/models/encnet_r50-d8.py | 49 - .../configs/_base_/models/fast_scnn.py | 59 - .../configs/_base_/models/fcn_hr18.py | 37 - .../configs/_base_/models/fcn_r50-d8.py | 47 - .../configs/_base_/models/fcn_unet_s5-d16.py | 53 - .../configs/_base_/models/fpn_r50.py | 34 - .../configs/_base_/models/fpn_uniformer.py | 32 - .../configs/_base_/models/gcnet_r50-d8.py | 48 - .../configs/_base_/models/lraspp_m-v3-d8.py | 22 - .../configs/_base_/models/nonlocal_r50-d8.py | 48 - .../configs/_base_/models/ocrnet_hr18.py | 53 - .../configs/_base_/models/ocrnet_r50-d8.py | 49 - .../configs/_base_/models/pointrend_r50.py | 49 - .../configs/_base_/models/psanet_r50-d8.py | 51 - .../configs/_base_/models/pspnet_r50-d8.py | 46 - .../_base_/models/pspnet_unet_s5-d16.py | 52 - .../configs/_base_/models/upernet_r50.py | 46 - .../_base_/models/upernet_uniformer.py | 45 - .../configs/_base_/schedules/schedule_160k.py | 9 - .../configs/_base_/schedules/schedule_20k.py | 9 - .../configs/_base_/schedules/schedule_40k.py | 9 - .../configs/_base_/schedules/schedule_80k.py | 9 - .../exp/upernet_global_small/config.py | 48 - .../uniformer/exp/upernet_global_small/run.sh | 10 - .../exp/upernet_global_small/test.sh | 10 - .../exp/upernet_global_small/test_config_g.py | 48 - .../upernet_global_small/test_config_h32.py | 49 - .../upernet_global_small/test_config_w32.py | 49 - .../controlnet/uniformer/mmcv/__init__.py | 15 - .../uniformer/mmcv/arraymisc/__init__.py | 4 - .../uniformer/mmcv/arraymisc/quantization.py | 49 - .../controlnet/uniformer/mmcv/cnn/__init__.py | 131 - .../controlnet/uniformer/mmcv/cnn/alexnet.py | 62 - .../uniformer/mmcv/cnn/bricks/__init__.py | 61 - .../uniformer/mmcv/cnn/bricks/activation.py | 93 - .../mmcv/cnn/bricks/context_block.py | 123 - .../uniformer/mmcv/cnn/bricks/conv.py | 44 - .../cnn/bricks/conv2d_adaptive_padding.py | 46 - .../uniformer/mmcv/cnn/bricks/conv_module.py | 206 -- .../uniformer/mmcv/cnn/bricks/conv_ws.py | 121 - .../bricks/depthwise_separable_conv_module.py | 95 - .../uniformer/mmcv/cnn/bricks/drop.py | 64 - .../mmcv/cnn/bricks/generalized_attention.py | 346 --- .../uniformer/mmcv/cnn/bricks/hsigmoid.py | 34 - .../uniformer/mmcv/cnn/bricks/hswish.py | 29 - .../uniformer/mmcv/cnn/bricks/non_local.py | 272 --- .../uniformer/mmcv/cnn/bricks/norm.py | 148 -- .../uniformer/mmcv/cnn/bricks/padding.py | 36 - .../uniformer/mmcv/cnn/bricks/plugin.py | 87 - .../uniformer/mmcv/cnn/bricks/registry.py | 16 - .../uniformer/mmcv/cnn/bricks/scale.py | 21 - .../uniformer/mmcv/cnn/bricks/swish.py | 25 - .../uniformer/mmcv/cnn/bricks/transformer.py | 607 ----- .../uniformer/mmcv/cnn/bricks/upsample.py | 83 - .../uniformer/mmcv/cnn/bricks/wrappers.py | 173 -- .../controlnet/uniformer/mmcv/cnn/builder.py | 28 - .../controlnet/uniformer/mmcv/cnn/resnet.py | 271 -- .../uniformer/mmcv/cnn/utils/__init__.py | 48 - .../uniformer/mmcv/cnn/utils/flops_counter.py | 580 ----- .../uniformer/mmcv/cnn/utils/fuse_conv_bn.py | 56 - .../uniformer/mmcv/cnn/utils/sync_bn.py | 59 - .../uniformer/mmcv/cnn/utils/weight_init.py | 644 ----- .../controlnet/uniformer/mmcv/cnn/vgg.py | 159 -- .../uniformer/mmcv/engine/__init__.py | 4 - .../controlnet/uniformer/mmcv/engine/test.py | 195 -- .../uniformer/mmcv/fileio/__init__.py | 19 - .../uniformer/mmcv/fileio/file_client.py | 1127 --------- .../mmcv/fileio/handlers/__init__.py | 7 - .../uniformer/mmcv/fileio/handlers/base.py | 30 - .../mmcv/fileio/handlers/json_handler.py | 35 - .../mmcv/fileio/handlers/pickle_handler.py | 26 - .../mmcv/fileio/handlers/yaml_handler.py | 24 - .../controlnet/uniformer/mmcv/fileio/io.py | 148 -- .../controlnet/uniformer/mmcv/fileio/parse.py | 89 - .../uniformer/mmcv/image/__init__.py | 106 - .../uniformer/mmcv/image/colorspace.py | 304 --- .../uniformer/mmcv/image/geometric.py | 671 ----- .../controlnet/uniformer/mmcv/image/io.py | 256 -- .../controlnet/uniformer/mmcv/image/misc.py | 43 - .../uniformer/mmcv/image/photometric.py | 422 ---- .../uniformer/mmcv/model_zoo/deprecated.json | 6 - .../uniformer/mmcv/model_zoo/mmcls.json | 31 - .../uniformer/mmcv/model_zoo/open_mmlab.json | 50 - .../controlnet/uniformer/mmcv/ops/__init__.py | 134 - .../uniformer/mmcv/ops/assign_score_withk.py | 117 - .../uniformer/mmcv/ops/ball_query.py | 49 - .../controlnet/uniformer/mmcv/ops/bbox.py | 71 - .../uniformer/mmcv/ops/border_align.py | 98 - .../uniformer/mmcv/ops/box_iou_rotated.py | 44 - .../controlnet/uniformer/mmcv/ops/carafe.py | 281 --- .../uniformer/mmcv/ops/cc_attention.py | 81 - .../uniformer/mmcv/ops/contour_expand.py | 45 - .../uniformer/mmcv/ops/corner_pool.py | 162 -- .../uniformer/mmcv/ops/correlation.py | 197 -- .../uniformer/mmcv/ops/deform_conv.py | 406 --- .../uniformer/mmcv/ops/deform_roi_pool.py | 165 -- .../uniformer/mmcv/ops/deprecated_wrappers.py | 43 - .../uniformer/mmcv/ops/focal_loss.py | 183 -- .../mmcv/ops/furthest_point_sample.py | 74 - .../mmcv/ops/fused_bias_leakyrelu.py | 249 -- .../uniformer/mmcv/ops/gather_points.py | 47 - .../uniformer/mmcv/ops/group_points.py | 206 -- .../controlnet/uniformer/mmcv/ops/info.py | 36 - .../controlnet/uniformer/mmcv/ops/iou3d.py | 82 - .../controlnet/uniformer/mmcv/ops/knn.py | 73 - .../uniformer/mmcv/ops/masked_conv.py | 82 - .../uniformer/mmcv/ops/merge_cells.py | 140 -- .../mmcv/ops/modulated_deform_conv.py | 280 --- .../mmcv/ops/multi_scale_deform_attn.py | 348 --- .../controlnet/uniformer/mmcv/ops/nms.py | 383 --- .../uniformer/mmcv/ops/pixel_group.py | 71 - .../uniformer/mmcv/ops/point_sample.py | 317 --- .../uniformer/mmcv/ops/points_in_boxes.py | 111 - .../uniformer/mmcv/ops/points_sampler.py | 169 -- .../controlnet/uniformer/mmcv/ops/psa_mask.py | 85 - .../uniformer/mmcv/ops/roi_align.py | 204 -- .../uniformer/mmcv/ops/roi_align_rotated.py | 161 -- .../controlnet/uniformer/mmcv/ops/roi_pool.py | 79 - .../uniformer/mmcv/ops/roiaware_pool3d.py | 100 - .../uniformer/mmcv/ops/roipoint_pool3d.py | 75 - .../controlnet/uniformer/mmcv/ops/saconv.py | 128 - .../uniformer/mmcv/ops/scatter_points.py | 132 - .../controlnet/uniformer/mmcv/ops/sync_bn.py | 288 --- .../uniformer/mmcv/ops/three_interpolate.py | 62 - .../controlnet/uniformer/mmcv/ops/three_nn.py | 50 - .../uniformer/mmcv/ops/tin_shift.py | 67 - .../uniformer/mmcv/ops/upfirdn2d.py | 307 --- .../controlnet/uniformer/mmcv/ops/voxelize.py | 115 - .../uniformer/mmcv/parallel/__init__.py | 19 - .../uniformer/mmcv/parallel/_functions.py | 76 - .../uniformer/mmcv/parallel/collate.py | 69 - .../uniformer/mmcv/parallel/data_container.py | 83 - .../uniformer/mmcv/parallel/data_parallel.py | 93 - .../uniformer/mmcv/parallel/distributed.py | 106 - .../mmcv/parallel/distributed_deprecated.py | 58 - .../uniformer/mmcv/parallel/registry.py | 8 - .../uniformer/mmcv/parallel/scatter_gather.py | 59 - .../uniformer/mmcv/parallel/utils.py | 20 - .../uniformer/mmcv/runner/__init__.py | 118 - .../uniformer/mmcv/runner/base_module.py | 194 -- .../uniformer/mmcv/runner/base_runner.py | 515 ---- .../uniformer/mmcv/runner/builder.py | 21 - .../uniformer/mmcv/runner/checkpoint.py | 670 ----- .../mmcv/runner/default_constructor.py | 43 - .../uniformer/mmcv/runner/dist_utils.py | 157 -- .../mmcv/runner/epoch_based_runner.py | 172 -- .../uniformer/mmcv/runner/fp16_utils.py | 388 --- .../uniformer/mmcv/runner/hooks/__init__.py | 58 - .../uniformer/mmcv/runner/hooks/checkpoint.py | 156 -- .../uniformer/mmcv/runner/hooks/closure.py | 10 - .../uniformer/mmcv/runner/hooks/ema.py | 84 - .../uniformer/mmcv/runner/hooks/evaluation.py | 493 ---- .../uniformer/mmcv/runner/hooks/hook.py | 100 - .../uniformer/mmcv/runner/hooks/iter_timer.py | 17 - .../mmcv/runner/hooks/logger/__init__.py | 20 - .../mmcv/runner/hooks/logger/base.py | 157 -- .../mmcv/runner/hooks/logger/dvclive.py | 51 - .../mmcv/runner/hooks/logger/mlflow.py | 70 - .../mmcv/runner/hooks/logger/neptune.py | 75 - .../mmcv/runner/hooks/logger/pavi.py | 113 - .../mmcv/runner/hooks/logger/tensorboard.py | 49 - .../mmcv/runner/hooks/logger/text.py | 235 -- .../mmcv/runner/hooks/logger/wandb.py | 54 - .../uniformer/mmcv/runner/hooks/lr_updater.py | 615 ----- .../uniformer/mmcv/runner/hooks/memory.py | 24 - .../mmcv/runner/hooks/momentum_updater.py | 421 ---- .../uniformer/mmcv/runner/hooks/optimizer.py | 461 ---- .../uniformer/mmcv/runner/hooks/profiler.py | 174 -- .../mmcv/runner/hooks/sampler_seed.py | 20 - .../mmcv/runner/hooks/sync_buffer.py | 22 - .../mmcv/runner/iter_based_runner.py | 256 -- .../uniformer/mmcv/runner/log_buffer.py | 40 - .../mmcv/runner/optimizer/__init__.py | 11 - .../mmcv/runner/optimizer/builder.py | 40 - .../runner/optimizer/default_constructor.py | 246 -- .../uniformer/mmcv/runner/priority.py | 60 - .../controlnet/uniformer/mmcv/runner/utils.py | 92 - .../uniformer/mmcv/utils/__init__.py | 185 -- .../controlnet/uniformer/mmcv/utils/config.py | 658 ----- .../controlnet/uniformer/mmcv/utils/env.py | 100 - .../uniformer/mmcv/utils/ext_loader.py | 72 - .../uniformer/mmcv/utils/logging.py | 109 - .../controlnet/uniformer/mmcv/utils/misc.py | 371 --- .../uniformer/mmcv/utils/parrots_jit.py | 33 - .../uniformer/mmcv/utils/parrots_wrapper.py | 106 - .../controlnet/uniformer/mmcv/utils/path.py | 99 - .../uniformer/mmcv/utils/progressbar.py | 204 -- .../uniformer/mmcv/utils/registry.py | 303 --- .../uniformer/mmcv/utils/testing.py | 138 -- .../controlnet/uniformer/mmcv/utils/timer.py | 117 - .../controlnet/uniformer/mmcv/utils/trace.py | 24 - .../uniformer/mmcv/utils/version_utils.py | 88 - .../controlnet/uniformer/mmcv/version.py | 36 - .../uniformer/mmcv/video/__init__.py | 29 - .../controlnet/uniformer/mmcv/video/io.py | 310 --- .../uniformer/mmcv/video/optflow.py | 248 -- .../uniformer/mmcv/video/processing.py | 128 - .../uniformer/mmcv/visualization/__init__.py | 15 - .../uniformer/mmcv/visualization/color.py | 52 - .../uniformer/mmcv/visualization/image.py | 144 -- .../uniformer/mmcv/visualization/optflow.py | 109 - .../uniformer/mmcv_custom/__init__.py | 5 - .../uniformer/mmcv_custom/checkpoint.py | 477 ---- .../uniformer/mmseg/apis/__init__.py | 14 - .../uniformer/mmseg/apis/inference.py | 127 - .../controlnet/uniformer/mmseg/apis/test.py | 217 -- .../controlnet/uniformer/mmseg/apis/train.py | 109 - .../uniformer/mmseg/core/__init__.py | 3 - .../mmseg/core/evaluation/__init__.py | 14 - .../mmseg/core/evaluation/class_names.py | 458 ---- .../mmseg/core/evaluation/eval_hooks.py | 108 - .../mmseg/core/evaluation/metrics.py | 297 --- .../uniformer/mmseg/core/seg/__init__.py | 4 - .../uniformer/mmseg/core/seg/builder.py | 8 - .../mmseg/core/seg/sampler/__init__.py | 4 - .../core/seg/sampler/base_pixel_sampler.py | 12 - .../core/seg/sampler/ohem_pixel_sampler.py | 76 - .../uniformer/mmseg/core/utils/__init__.py | 3 - .../uniformer/mmseg/core/utils/misc.py | 17 - .../uniformer/mmseg/datasets/__init__.py | 30 - .../uniformer/mmseg/datasets/ade.py | 322 --- .../uniformer/mmseg/datasets/builder.py | 172 -- .../uniformer/mmseg/datasets/chase_db1.py | 25 - .../uniformer/mmseg/datasets/cityscapes.py | 241 -- .../uniformer/mmseg/datasets/custom.py | 392 --- .../mmseg/datasets/dataset_wrappers.py | 50 - .../uniformer/mmseg/datasets/drive.py | 25 - .../uniformer/mmseg/datasets/hrf.py | 23 - .../mmseg/datasets/pascal_context.py | 294 --- .../mmseg/datasets/pipelines/__init__.py | 43 - .../mmseg/datasets/pipelines/compose.py | 51 - .../mmseg/datasets/pipelines/formating.py | 294 --- .../mmseg/datasets/pipelines/loading.py | 145 -- .../mmseg/datasets/pipelines/test_time_aug.py | 118 - .../mmseg/datasets/pipelines/transforms.py | 839 ------- .../uniformer/mmseg/datasets/stare.py | 25 - .../uniformer/mmseg/datasets/voc.py | 65 - .../uniformer/mmseg/models/__init__.py | 17 - .../mmseg/models/backbones/__init__.py | 27 - .../uniformer/mmseg/models/backbones/cgnet.py | 362 --- .../mmseg/models/backbones/fast_scnn.py | 355 --- .../uniformer/mmseg/models/backbones/hrnet.py | 536 ---- .../mmseg/models/backbones/mobilenet_v2.py | 183 -- .../mmseg/models/backbones/mobilenet_v3.py | 267 -- .../mmseg/models/backbones/resnest.py | 299 --- .../mmseg/models/backbones/resnet.py | 650 ----- .../mmseg/models/backbones/resnext.py | 124 - .../uniformer/mmseg/models/backbones/unet.py | 451 ---- .../mmseg/models/backbones/uniformer.py | 554 ----- .../uniformer/mmseg/models/backbones/vit.py | 443 ---- .../uniformer/mmseg/models/builder.py | 43 - .../mmseg/models/decode_heads/__init__.py | 45 - .../mmseg/models/decode_heads/ann_head.py | 259 -- .../mmseg/models/decode_heads/apc_head.py | 141 -- .../mmseg/models/decode_heads/aspp_head.py | 106 - .../decode_heads/cascade_decode_head.py | 56 - .../mmseg/models/decode_heads/cc_head.py | 41 - .../mmseg/models/decode_heads/da_head.py | 174 -- .../mmseg/models/decode_heads/decode_head.py | 226 -- .../mmseg/models/decode_heads/dm_head.py | 137 -- .../mmseg/models/decode_heads/dnl_head.py | 126 - .../mmseg/models/decode_heads/ema_head.py | 155 -- .../mmseg/models/decode_heads/enc_head.py | 174 -- .../mmseg/models/decode_heads/fcn_head.py | 81 - .../mmseg/models/decode_heads/fpn_head.py | 61 - .../mmseg/models/decode_heads/gc_head.py | 41 - .../mmseg/models/decode_heads/lraspp_head.py | 77 - .../mmseg/models/decode_heads/nl_head.py | 46 - .../mmseg/models/decode_heads/ocr_head.py | 124 - .../mmseg/models/decode_heads/point_head.py | 312 --- .../mmseg/models/decode_heads/psa_head.py | 186 -- .../mmseg/models/decode_heads/psp_head.py | 101 - .../models/decode_heads/sep_aspp_head.py | 82 - .../mmseg/models/decode_heads/sep_fcn_head.py | 54 - .../mmseg/models/decode_heads/uper_head.py | 118 - .../uniformer/mmseg/models/losses/__init__.py | 19 - .../uniformer/mmseg/models/losses/accuracy.py | 77 - .../mmseg/models/losses/cross_entropy_loss.py | 155 -- .../mmseg/models/losses/dice_loss.py | 98 - .../mmseg/models/losses/lovasz_loss.py | 304 --- .../uniformer/mmseg/models/losses/utils.py | 117 - .../uniformer/mmseg/models/necks/__init__.py | 4 - .../uniformer/mmseg/models/necks/fpn.py | 210 -- .../mmseg/models/necks/multilevel_neck.py | 53 - .../mmseg/models/segmentors/__init__.py | 5 - .../uniformer/mmseg/models/segmentors/base.py | 256 -- .../segmentors/cascade_encoder_decoder.py | 95 - .../models/segmentors/encoder_decoder.py | 275 --- .../uniformer/mmseg/models/utils/__init__.py | 20 - .../uniformer/mmseg/models/utils/drop.py | 29 - .../mmseg/models/utils/inverted_residual.py | 218 -- .../mmseg/models/utils/make_divisible.py | 27 - .../uniformer/mmseg/models/utils/res_layer.py | 96 - .../uniformer/mmseg/models/utils/se_layer.py | 61 - .../models/utils/self_attention_block.py | 162 -- .../mmseg/models/utils/up_conv_block.py | 106 - .../mmseg/models/utils/weight_init.py | 63 - .../uniformer/mmseg/ops/__init__.py | 4 - .../uniformer/mmseg/ops/encoding.py | 65 - .../uniformer/mmseg/ops/wrappers.py | 43 - .../uniformer/mmseg/utils/__init__.py | 4 - .../uniformer/mmseg/utils/collect_env.py | 16 - .../uniformer/mmseg/utils/logger.py | 27 - .../multimodal/models/controlnet/util.py | 98 - .../multimodal/models/dreambooth/__init__.py | 13 - .../models/dreambooth/dreambooth.py | 654 ----- .../multimodal/models/dreambooth/util.py | 169 -- .../multimodal/models/imagen/__init__.py | 13 - .../multimodal/models/imagen/imagen.py | 602 ----- .../models/imagen/imagen_pipeline.py | 355 --- .../multimodal/models/imagen/precond.py | 174 -- .../models/instruct_pix2pix/__init__.py | 13 - .../models/instruct_pix2pix/ldm/__init__.py | 13 - .../models/instruct_pix2pix/ldm/ddpm_edit.py | 268 -- .../multimodal/models/kosmos/__init__.py | 0 .../models/kosmos/megatron_kosmos_model.py | 1154 --------- .../models/kosmos/perceiver_resampler.py | 131 - .../multimodal/models/nerf/base.py | 24 - .../multimodal/models/nerf/dreamfusion.py | 313 --- .../multimodal/models/nerf/txt2nerf_base.py | 81 - .../multimodal/models/neva/__init__.py | 13 - .../multimodal/models/neva/neva_model.py | 1076 -------- .../models/neva/neva_peft_models.py | 60 - .../models/stable_diffusion/__init__.py | 13 - .../stable_diffusion/diffusion_model.py | 81 - .../models/stable_diffusion/ldm/__init__.py | 13 - .../stable_diffusion/ldm/autoencoder.py | 608 ----- .../models/stable_diffusion/ldm/ddpm.py | 2170 ----------------- .../models/stable_diffusion/ldm_config.py | 144 -- .../stable_diffusion/samplers/__init__.py | 16 - .../stable_diffusion/samplers/base_sampler.py | 339 --- .../models/stable_diffusion/samplers/ddim.py | 119 - .../stable_diffusion/samplers/dpmsolver.py | 493 ---- .../stable_diffusion/samplers/k_diffusion.py | 838 ------- .../stable_diffusion/samplers/para_ddim.py | 231 -- .../models/stable_diffusion/samplers/plms.py | 105 - .../stable_diffusion/samplers/sampler_dpm.py | 77 - .../multimodal/modules/__init__.py | 13 - .../imagen/diffusionmodules/attention.py | 317 --- .../imagen/diffusionmodules/attention_alt.py | 321 --- .../modules/imagen/diffusionmodules/blocks.py | 906 ------- .../modules/imagen/diffusionmodules/embs.py | 69 - .../modules/imagen/diffusionmodules/layers.py | 251 -- .../modules/imagen/diffusionmodules/nets.py | 642 ----- .../modules/imagen/encoder/t5encoder.json | 51 - .../modules/imagen/encoder/t5encoder.py | 59 - .../modules/imagen/sampler/batch_ops.py | 57 - .../modules/imagen/sampler/continuous_ddpm.py | 168 -- .../modules/imagen/sampler/sampler.py | 250 -- .../multimodal/modules/nerf/__init__.py | 0 .../nerf/background/nerf_background_base.py | 22 - .../nerf/background/random_background.py | 19 - .../nerf/background/static_background.py | 14 - .../nerf/background/tcnn_background.py | 32 - .../nerf/background/torchngp_background.py | 31 - .../modules/nerf/geometry/__init__.py | 0 .../multimodal/modules/nerf/geometry/dmtet.py | 150 -- .../modules/nerf/geometry/layers.py | 129 - .../modules/nerf/geometry/nerf_base.py | 360 --- .../modules/nerf/geometry/tcnn_nerf.py | 108 - .../modules/nerf/geometry/torchngp_nerf.py | 114 - .../modules/nerf/guidance/__init__.py | 0 .../stablediffusion_huggingface_pipeline.py | 142 -- .../guidance/stablediffusion_nemo_pipeline.py | 129 - .../guidance/stablediffusion_trt_pipeline.py | 221 -- .../nerf/guidance/txt2img_guidance_base.py | 6 - .../nerf/loss/laplacian_smooth_loss.py | 38 - .../nerf/loss/normal_consistency_loss.py | 56 - .../modules/nerf/materials/__init__.py | 0 .../modules/nerf/materials/basic_shading.py | 66 - .../modules/nerf/materials/materials_base.py | 29 - .../modules/nerf/renderers/__init__.py | 0 .../modules/nerf/renderers/base_renderer.py | 18 - .../nerf/renderers/base_sdf_renderer.py | 20 - .../nerf/renderers/base_volume_renderer.py | 7 - .../nerf/renderers/nerfacc_volume_renderer.py | 364 --- .../nerf/renderers/nvdiffrast_renderer.py | 222 -- .../renderers/torchngp_volume_renderer.py | 275 --- .../multimodal/modules/nerf/utils/__init__.py | 0 .../modules/nerf/utils/activation.py | 20 - .../modules/nerf/utils/torch_ngp/__init__.py | 0 .../modules/nerf/utils/torch_ngp/encoding.py | 137 -- .../nerf/utils/torch_ngp/freqencoder.py | 73 - .../nerf/utils/torch_ngp/gridencoder.py | 287 --- .../nerf/utils/torch_ngp/raymarching.py | 551 ----- .../modules/nerf/utils/torch_ngp/shencoder.py | 82 - .../modules/nerf/utils/trt_engine.py | 173 -- .../modules/stable_diffusion/__init__.py | 13 - .../modules/stable_diffusion/attention.py | 408 ---- .../diffusionmodules/__init__.py | 13 - .../diffusionmodules/model.py | 878 ------- .../diffusionmodules/openaimodel.py | 1191 --------- .../stable_diffusion/diffusionmodules/util.py | 309 --- .../distributions/__init__.py | 13 - .../distributions/distributions.py | 98 - .../stable_diffusion/encoders/__init__.py | 13 - .../stable_diffusion/encoders/modules.py | 471 ---- .../encoders/x_transformer.py | 629 ----- nemo/collections/multimodal/parts/__init__.py | 13 - .../multimodal/parts/imagen/__init__.py | 13 - .../multimodal/parts/imagen/utils.py | 29 - .../parts/stable_diffusion/__init__.py | 13 - .../parts/stable_diffusion/lr_scheduler.py | 112 - .../parts/stable_diffusion/pipeline.py | 202 -- .../parts/stable_diffusion/utils.py | 213 -- nemo/collections/multimodal/parts/utils.py | 267 -- nemo/collections/vision/__init__.py | 38 - nemo/collections/vision/data/__init__.py | 13 - .../vision/data/imagenet_classnames.py | 1016 -------- .../vision/data/megatron/__init__.py | 13 - .../vision/data/megatron/autoaugment.py | 270 -- .../vision/data/megatron/data_samplers.py | 89 - .../vision/data/megatron/image_folder.py | 286 --- .../vision/data/megatron/vit_dataset.py | 284 --- nemo/collections/vision/losses/__init__.py | 13 - nemo/collections/vision/metrics/__init__.py | 13 - nemo/collections/vision/models/__init__.py | 13 - .../megatron_vit_classification_models.py | 801 ------ nemo/collections/vision/modules/__init__.py | 13 - .../vision/modules/common/__init__.py | 13 - .../modules/common/megatron/__init__.py | 13 - .../common/megatron/vision_transformer.py | 492 ---- .../vision/modules/vit/__init__.py | 13 - .../vision/modules/vit/vit_backbone.py | 361 --- nemo/collections/vision/parts/__init__.py | 13 - requirements/requirements_vision.txt | 8 - scripts/fid-eval-text2img/TFinception_V3.py | 231 -- .../fid-eval-text2img/compute_clip_score.py | 120 - scripts/fid-eval-text2img/compute_fid.py | 409 ---- scripts/fid-eval-text2img/eval_fid.py | 100 - scripts/fid-eval-text2img/fid_dataset.py | 128 - scripts/fid-eval-text2img/plot.py | 40 - .../collections/multimodal/test_clip_model.py | 482 ---- tests/collections/vision/test_vit_model.py | 379 --- utils/flash-attention.patch | 87 - utils/triton.patch | 53 - 599 files changed, 88910 deletions(-) delete mode 100644 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml delete mode 100755 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml delete mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py delete mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py delete mode 100644 examples/multimodal/convert_ckpt_to_nemo.py delete mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml delete mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml delete mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml delete mode 100644 examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py delete mode 100644 examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py delete mode 100644 examples/multimodal/foundation/clip/megatron_clip_infer.py delete mode 100644 examples/multimodal/foundation/clip/megatron_clip_pretrain.py delete mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml delete mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml delete mode 100644 examples/multimodal/generative/controlnet/controlnet_infer.py delete mode 100644 examples/multimodal/generative/controlnet/controlnet_train.py delete mode 100644 examples/multimodal/generative/convert_hf_ckpt_to_nemo.py delete mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth.yaml delete mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml delete mode 100644 examples/multimodal/generative/dreambooth/dreambooth.py delete mode 100644 examples/multimodal/generative/dreambooth/dreambooth_infer.py delete mode 100644 examples/multimodal/generative/imagen/README.md delete mode 100644 examples/multimodal/generative/imagen/conf/base64-2b.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/base64-500m.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/fid_inference.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/inference_pipeline.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr1024-600m.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml delete mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m.yaml delete mode 100644 examples/multimodal/generative/imagen/generate_fid_images.py delete mode 100644 examples/multimodal/generative/imagen/imagen_generate_images.py delete mode 100644 examples/multimodal/generative/imagen/imagen_infer.py delete mode 100644 examples/multimodal/generative/imagen/imagen_training.py delete mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml delete mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml delete mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py delete mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_finetune.py delete mode 100644 examples/multimodal/generative/nerf/benchmark_callback.py delete mode 100644 examples/multimodal/generative/nerf/config/config.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/background/random.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/background/static.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/background/tcnn.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/background/torchngp.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/data/data.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/optim/adan.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml delete mode 100644 examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml delete mode 100644 examples/multimodal/generative/nerf/data.py delete mode 100644 examples/multimodal/generative/nerf/main.py delete mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml delete mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml delete mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml delete mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml delete mode 100644 examples/multimodal/generative/stable_diffusion/generate_fid_images.py delete mode 100644 examples/multimodal/generative/stable_diffusion/sd_infer.py delete mode 100644 examples/multimodal/generative/stable_diffusion/sd_train.py delete mode 100644 examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml delete mode 100644 examples/multimodal/mllm/kosmos/kosmos_pretrain.py delete mode 100644 examples/multimodal/mllm/neva/conf/llava_config.yaml delete mode 100644 examples/multimodal/mllm/neva/conf/neva_config.yaml delete mode 100644 examples/multimodal/mllm/neva/conf/neva_finetune.yaml delete mode 100644 examples/multimodal/mllm/neva/conf/neva_inference.yaml delete mode 100644 examples/multimodal/mllm/neva/conf/neva_peft.yaml delete mode 100644 examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py delete mode 100644 examples/multimodal/mllm/neva/neva_evaluation.py delete mode 100644 examples/multimodal/mllm/neva/neva_finetune.py delete mode 100644 examples/multimodal/mllm/neva/neva_peft.py delete mode 100644 examples/multimodal/mllm/neva/neva_pretrain.py delete mode 100644 examples/vision/convert_ckpt_to_nemo.py delete mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml delete mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml delete mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml delete mode 100644 examples/vision/vision_transformer/megatron_vit_classification_evaluate.py delete mode 100644 examples/vision/vision_transformer/megatron_vit_classification_finetune.py delete mode 100644 examples/vision/vision_transformer/megatron_vit_classification_infer.py delete mode 100644 examples/vision/vision_transformer/megatron_vit_classification_pretrain.py delete mode 100644 nemo/collections/multimodal/data/__init__.py delete mode 100644 nemo/collections/multimodal/data/clip/__init__.py delete mode 100644 nemo/collections/multimodal/data/clip/augmentations/__init__.py delete mode 100644 nemo/collections/multimodal/data/clip/augmentations/augmentations.py delete mode 100644 nemo/collections/multimodal/data/clip/clip_dataset.py delete mode 100644 nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py delete mode 100644 nemo/collections/multimodal/data/common/__init__.py delete mode 100644 nemo/collections/multimodal/data/common/data_samplers.py delete mode 100644 nemo/collections/multimodal/data/common/webdataset.py delete mode 100644 nemo/collections/multimodal/data/common/webdataset_s3.py delete mode 100644 nemo/collections/multimodal/data/controlnet/controlnet_dataset.py delete mode 100644 nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py delete mode 100644 nemo/collections/multimodal/data/imagen/__init__.py delete mode 100644 nemo/collections/multimodal/data/imagen/augmentations/__init__.py delete mode 100644 nemo/collections/multimodal/data/imagen/augmentations/augmentations.py delete mode 100644 nemo/collections/multimodal/data/imagen/augmentations/corruption.py delete mode 100644 nemo/collections/multimodal/data/imagen/imagen_dataset.py delete mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/__init__.py delete mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py delete mode 100644 nemo/collections/multimodal/data/kosmos/__init__.py delete mode 100644 nemo/collections/multimodal/data/kosmos/kosmos_dataset.py delete mode 100644 nemo/collections/multimodal/data/nerf/__init__.py delete mode 100644 nemo/collections/multimodal/data/nerf/cameras.py delete mode 100644 nemo/collections/multimodal/data/nerf/circle_poses.py delete mode 100644 nemo/collections/multimodal/data/nerf/random_poses.py delete mode 100644 nemo/collections/multimodal/data/nerf/utils.py delete mode 100644 nemo/collections/multimodal/data/neva/__init__.py delete mode 100644 nemo/collections/multimodal/data/neva/conversation.py delete mode 100644 nemo/collections/multimodal/data/neva/neva_dataset.py delete mode 100644 nemo/collections/multimodal/data/nsfw/__init__.py delete mode 100644 nemo/collections/multimodal/data/nsfw/nsfw_dataset.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/__init__.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py delete mode 100644 nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py delete mode 100644 nemo/collections/multimodal/losses/__init__.py delete mode 100644 nemo/collections/multimodal/losses/clip_loss.py delete mode 100644 nemo/collections/multimodal/models/__init__.py delete mode 100644 nemo/collections/multimodal/models/clip/__init__.py delete mode 100644 nemo/collections/multimodal/models/clip/megatron_clip_models.py delete mode 100644 nemo/collections/multimodal/models/content_filter/__init__.py delete mode 100644 nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py delete mode 100644 nemo/collections/multimodal/models/controlnet/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/controlnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/LICENSE delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py delete mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py delete mode 100644 nemo/collections/multimodal/models/controlnet/util.py delete mode 100644 nemo/collections/multimodal/models/dreambooth/__init__.py delete mode 100644 nemo/collections/multimodal/models/dreambooth/dreambooth.py delete mode 100644 nemo/collections/multimodal/models/dreambooth/util.py delete mode 100644 nemo/collections/multimodal/models/imagen/__init__.py delete mode 100644 nemo/collections/multimodal/models/imagen/imagen.py delete mode 100644 nemo/collections/multimodal/models/imagen/imagen_pipeline.py delete mode 100644 nemo/collections/multimodal/models/imagen/precond.py delete mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/__init__.py delete mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py delete mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py delete mode 100644 nemo/collections/multimodal/models/kosmos/__init__.py delete mode 100644 nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py delete mode 100644 nemo/collections/multimodal/models/kosmos/perceiver_resampler.py delete mode 100644 nemo/collections/multimodal/models/nerf/base.py delete mode 100644 nemo/collections/multimodal/models/nerf/dreamfusion.py delete mode 100644 nemo/collections/multimodal/models/nerf/txt2nerf_base.py delete mode 100644 nemo/collections/multimodal/models/neva/__init__.py delete mode 100644 nemo/collections/multimodal/models/neva/neva_model.py delete mode 100644 nemo/collections/multimodal/models/neva/neva_peft_models.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/__init__.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm_config.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py delete mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py delete mode 100644 nemo/collections/multimodal/modules/__init__.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py delete mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py delete mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json delete mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py delete mode 100644 nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py delete mode 100644 nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py delete mode 100644 nemo/collections/multimodal/modules/imagen/sampler/sampler.py delete mode 100644 nemo/collections/multimodal/modules/nerf/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py delete mode 100644 nemo/collections/multimodal/modules/nerf/background/random_background.py delete mode 100644 nemo/collections/multimodal/modules/nerf/background/static_background.py delete mode 100644 nemo/collections/multimodal/modules/nerf/background/tcnn_background.py delete mode 100644 nemo/collections/multimodal/modules/nerf/background/torchngp_background.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/dmtet.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/layers.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py delete mode 100644 nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py delete mode 100644 nemo/collections/multimodal/modules/nerf/guidance/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py delete mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py delete mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py delete mode 100644 nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py delete mode 100644 nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py delete mode 100644 nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py delete mode 100644 nemo/collections/multimodal/modules/nerf/materials/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/materials/basic_shading.py delete mode 100644 nemo/collections/multimodal/modules/nerf/materials/materials_base.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/activation.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py delete mode 100644 nemo/collections/multimodal/modules/nerf/utils/trt_engine.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/__init__.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/attention.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py delete mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py delete mode 100644 nemo/collections/multimodal/parts/__init__.py delete mode 100644 nemo/collections/multimodal/parts/imagen/__init__.py delete mode 100644 nemo/collections/multimodal/parts/imagen/utils.py delete mode 100644 nemo/collections/multimodal/parts/stable_diffusion/__init__.py delete mode 100644 nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py delete mode 100644 nemo/collections/multimodal/parts/stable_diffusion/pipeline.py delete mode 100644 nemo/collections/multimodal/parts/stable_diffusion/utils.py delete mode 100644 nemo/collections/multimodal/parts/utils.py delete mode 100644 nemo/collections/vision/__init__.py delete mode 100644 nemo/collections/vision/data/__init__.py delete mode 100644 nemo/collections/vision/data/imagenet_classnames.py delete mode 100644 nemo/collections/vision/data/megatron/__init__.py delete mode 100644 nemo/collections/vision/data/megatron/autoaugment.py delete mode 100644 nemo/collections/vision/data/megatron/data_samplers.py delete mode 100644 nemo/collections/vision/data/megatron/image_folder.py delete mode 100644 nemo/collections/vision/data/megatron/vit_dataset.py delete mode 100644 nemo/collections/vision/losses/__init__.py delete mode 100644 nemo/collections/vision/metrics/__init__.py delete mode 100644 nemo/collections/vision/models/__init__.py delete mode 100644 nemo/collections/vision/models/megatron_vit_classification_models.py delete mode 100644 nemo/collections/vision/modules/__init__.py delete mode 100644 nemo/collections/vision/modules/common/__init__.py delete mode 100644 nemo/collections/vision/modules/common/megatron/__init__.py delete mode 100644 nemo/collections/vision/modules/common/megatron/vision_transformer.py delete mode 100644 nemo/collections/vision/modules/vit/__init__.py delete mode 100644 nemo/collections/vision/modules/vit/vit_backbone.py delete mode 100644 nemo/collections/vision/parts/__init__.py delete mode 100644 requirements/requirements_vision.txt delete mode 100644 scripts/fid-eval-text2img/TFinception_V3.py delete mode 100644 scripts/fid-eval-text2img/compute_clip_score.py delete mode 100644 scripts/fid-eval-text2img/compute_fid.py delete mode 100644 scripts/fid-eval-text2img/eval_fid.py delete mode 100644 scripts/fid-eval-text2img/fid_dataset.py delete mode 100644 scripts/fid-eval-text2img/plot.py delete mode 100644 tests/collections/multimodal/test_clip_model.py delete mode 100644 tests/collections/vision/test_vit_model.py delete mode 100644 utils/flash-attention.patch delete mode 100644 utils/triton.patch diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml deleted file mode 100644 index 11dc65155cf5..000000000000 --- a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml +++ /dev/null @@ -1,230 +0,0 @@ -name: megatron_clip -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - max_epochs: 10 - max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - use_distributed_sampler: False - check_val_every_n_epoch: 1 - limit_val_batches: 1.0 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: megatron_nsfw - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 64 # limited by GPU memory - global_batch_size: 64 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_pretrained: null # used in fine-tuning - # multimodal configs - output_dim: 768 - # As the number of devices used to train increases, so does the space complexity of - # the logit matrix. Using a naïve all-gather scheme, space complexity will be - # `O(n^2)`. Instead, complexity may become effectively linear if the flags - # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one - # numerical results as the naïve method. - local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) - gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue - - vision: - precision: ${trainer.precision} - patch_dim: 14 - img_h: 224 - img_w: 224 - image_mean: null - image_std: null - num_channels: 3 - drop_patch_rate: 0.0 - drop_path_rate: 0.0 - global_average_pool: false - output_dim: ${model.output_dim} - class_token_length: 1 - preprocess_layernorm: true - encoder_seq_length: 196 - max_position_embeddings: 196 - position_embedding_type: learned_parameters - num_layers: 24 - hidden_size: 1024 - ffn_hidden_size: 4096 - num_attention_heads: 16 - init_method_std: 0.02 - use_scaled_init_method: true - hidden_dropout: 0.0 - attention_dropout: 0.0 - kv_channels: null - apply_query_key_layer_scaling: true - normalization: layernorm - layernorm_epsilon: 1.0e-05 - do_layer_norm_weight_decay: false - pre_process: true - post_process: true - persist_layer_norm: true - activations_checkpoint_granularity: null - activations_checkpoint_method: null - activations_checkpoint_num_layers: null - sequence_parallel: false - native_amp_init_scale: 4294967296 - native_amp_growth_interval: 1000 - hysteresis: 2 - fp32_residual_connection: false - fp16_lm_cross_entropy: false - masked_softmax_fusion: true - bias_dropout_add_fusion: true - use_cpu_initialization: false - onnx_safe: false - gradient_accumulation_fusion: false - openai_gelu: false - bias_activation_fusion: false - megatron_legacy: true - activation: quick-gelu - - text: - precision: ${trainer.precision} - # text configs - output_dim: ${model.output_dim} - - encoder_seq_length: 77 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_parameters - num_layers: 12 - hidden_size: 768 - ffn_hidden_size: 3072 - num_attention_heads: 12 - init_method_std: 0.02 - use_scaled_init_method: true - hidden_dropout: 0.0 - attention_dropout: 0.0 - kv_channels: null - apply_query_key_layer_scaling: true - normalization: layernorm - layernorm_epsilon: 1.0e-05 - do_layer_norm_weight_decay: false - pre_process: true - post_process: true - persist_layer_norm: true - activations_checkpoint_granularity: null - activations_checkpoint_method: null - activations_checkpoint_num_layers: null - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: false - native_amp_init_scale: 4294967296 - native_amp_growth_interval: 1000 - hysteresis: 2 - fp32_residual_connection: false - fp16_lm_cross_entropy: false - masked_softmax_fusion: true - bias_dropout_add_fusion: true - use_cpu_initialization: false - onnx_safe: false - gradient_accumulation_fusion: false - openai_gelu: false - bias_activation_fusion: false - megatron_legacy: true - transformer_engine: false - fp8: false - fp8_e4m3: false - fp8_hybrid: false - fp8_margin: 0 - fp8_interval: 1 - fp8_amax_history_len: 1 - fp8_amax_compute_algo: most_recent - use_emha: false - activation: quick-gelu - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - sim_hidden_dim: 64 - cls_hidden_dim: 64 - - tokenizer: - library: 'huggingface' - type: 'openai/clip-vit-large-patch14' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. - - data: - num_workers: 8 - train: - dataset_path: /datasets/coyo/test.pkl - validation: # List of paths to pkl files or tar files - dataset_path: /datasets/coyo/test.pkl - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo - - imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: adam - lr: 1e-3 - weight_decay: 0.0 - sched: - name: CosineAnnealing - warmup_steps: 200 - constant_steps: 0 - min_lr: 1e-5 - concepts: ??? - diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml deleted file mode 100755 index f78eba0bdc96..000000000000 --- a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml +++ /dev/null @@ -1,12 +0,0 @@ -image_path: ??? # Path to a image for inference - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py deleted file mode 100644 index d6b4bed6d01a..000000000000 --- a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import torch -from omegaconf.omegaconf import OmegaConf -from PIL import Image - -from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform -from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero - - -def _get_autocast_dtype(precision: str): - if precision in ["bf16", "bf16-mixed"]: - return torch.bfloat16 - if precision in [32, "32", "32-true"]: - return torch.float - if precision in [16, "16", "16-mixed"]: - return torch.half - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - -@hydra_runner(config_path="conf", config_name="megatron_nsfw_infer") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - # These configs are required to be off during inference. - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.vision.precision = cfg.trainer.precision - if cfg.trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronContentFilteringModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - image_transform_fn = image_transform( - (model.cfg.vision.img_h, model.cfg.vision.img_w), - is_train=False, - mean=model.cfg.vision.image_mean, - std=model.cfg.vision.image_std, - resize_longest_max=True, - ) - - autocast_dtype = _get_autocast_dtype(trainer.precision) - image = Image.open(cfg.image_path).convert('RGB') - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - image = image_transform_fn(image).unsqueeze(0).cuda() - probability = model(image).sigmoid() - - if is_global_rank_zero: - print("Given image's NSFW probability: ", probability.cpu().item()) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py deleted file mode 100644 index 51ccd596431a..000000000000 --- a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="megatron_nsfw_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( - "Gradient accumulation is not supported in CLIP yet." - ) - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronContentFilteringModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=cfg.model, - save_restore_connector=NLPSaveRestoreConnector(), - strict=False, - ) - - trainer.fit(model) - - if "save_path" in cfg.model: - logging.info(f"Saving model to path: {cfg.model.save_path}") - model.save_to(cfg.model.save_path) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py deleted file mode 100644 index d640e0c11ede..000000000000 --- a/examples/multimodal/convert_ckpt_to_nemo.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r""" -Conversion script to convert PTL checkpoints into nemo checkpoint. - Example to run this conversion script: - python -m torch.distributed.launch --nproc_per_node= * \ - convert_ckpt_to_nemo.py \ - --checkpoint_folder \ - --checkpoint_name \ - --nemo_file_path \ - --tensor_model_parallel_size \ - --pipeline_model_parallel_size -""" - -import os -from argparse import ArgumentParser - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth -from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen -from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit -from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed -from nemo.utils.model_utils import inject_model_parallel_rank - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def get_args(): - parser = ArgumentParser() - parser.add_argument( - "--checkpoint_folder", - type=str, - default=None, - required=True, - help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", - ) - parser.add_argument( - "--checkpoint_name", - type=str, - default=None, - required=True, - help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", - ) - - parser.add_argument( - "--hparams_file", - type=str, - default=None, - required=False, - help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", - ) - parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument("--gpus_per_node", type=int, required=False, default=1) - parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) - parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) - parser.add_argument( - "--pipeline_model_parallel_split_rank", - type=int, - required=False, - default=None, - help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", - ) - parser.add_argument("--model_type", type=str, required=False, default="megatron_clip") - parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) - parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") - - args = parser.parse_args() - return args - - -def convert(local_rank, rank, world_size, args): - app_state = AppState() - app_state.data_parallel_rank = 0 - num_nodes = world_size // args.gpus_per_node - - cfg = OmegaConf.load(args.hparams_file) - with open_dict(cfg): - cfg['model'] = cfg['cfg'] - cfg['trainer'] = {'precision': cfg['model']['precision']} - if args.bcp: - cfg['cluster_type'] = 'BCP' - trainer = MegatronTrainerBuilder(cfg).create_trainer() - - app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = args.tensor_model_parallel_size - - # no use atm, use to split ranks in encoder/decoder models. - if args.pipeline_model_parallel_size > 1 and args.model_type in []: - if args.pipeline_model_parallel_split_rank is not None: - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank - else: - if args.pipeline_model_parallel_size % 2 != 0: - raise ValueError( - f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." - ) - else: - # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 - else: - app_state.pipeline_model_parallel_split_rank = None - - app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - - parallel_state.initialize_model_parallel( - tensor_model_parallel_size=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, - ) - - app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() - app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - - # inject model parallel rank - checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) - - logging.info( - f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' - ) - - if args.model_type == 'megatron_clip': - model = MegatronCLIPModel.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'stable_diffusion': - model = MegatronLatentDiffusion.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'instruct_pix2pix': - model = MegatronLatentDiffusionEdit.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'dreambooth': - model = MegatronLatentDiffusion.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'imagen': - model = MegatronImagen.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) - elif args.model_type == 'controlnet': - model = MegatronControlNet.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'kosmos': - model = MegatronKosmosModel.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - elif args.model_type == 'neva': - model = MegatronNevaModel.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - else: - raise ValueError(f"Unrecognized model_type {args.model_type}.") - - model._save_restore_connector = NLPSaveRestoreConnector() - - if torch.distributed.is_initialized(): - torch.distributed.barrier() - - model.save_to(args.nemo_file_path) - - logging.info(f'NeMo model saved to: {args.nemo_file_path}') - - -if __name__ == '__main__': - args = get_args() - local_rank, rank, world_size = initialize_distributed(args) - convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml deleted file mode 100644 index a6b1928ef13f..000000000000 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml +++ /dev/null @@ -1,250 +0,0 @@ -name: megatron_clip -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: megatron_clip - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 32 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - # multimodal configs - output_dim: 512 - # As the number of devices used to train increases, so does the space complexity of - # the logit matrix. Using a naïve all-gather scheme, space complexity will be - # `O(n^2)`. Instead, complexity may become effectively linear if the flags - # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one - # numerical results as the naïve method. - local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) - gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue - - vision: - precision: ${trainer.precision} - # vision configs - patch_dim: 16 - img_h: 224 - img_w: 224 - image_mean: null - image_std: null - num_channels: 3 - drop_patch_rate: 0.0 - drop_path_rate: 0.0 - global_average_pool: False - output_dim: ${model.output_dim} - class_token_length: 8 - preprocess_layernorm: True # apply layer norm to embedded tokens - - # model architecture - encoder_seq_length: 196 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_absolute - num_layers: 12 - hidden_size: 768 - ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 12 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - - text: - precision: ${trainer.precision} - # text configs - output_dim: ${model.output_dim} - - # model architecture - encoder_seq_length: 77 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_absolute - num_layers: 12 - hidden_size: 512 - ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 8 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'huggingface' - type: 'openai/clip-vit-large-patch14' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. - - data: - num_workers: 8 - train: - dataset_path: # List of paths to pkl files or tar files - - /datasets/coyo/test.pkl - validation: # List of paths to pkl files or tar files - dataset_path: - - /datasets/coyo/test.pkl - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo - - imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.2 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 2000 - constant_steps: 0 - min_lr: 1e-5 \ No newline at end of file diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml deleted file mode 100755 index 79bdac888887..000000000000 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml +++ /dev/null @@ -1,17 +0,0 @@ -trainer: - devices: 8 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: bf16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} - micro_batch_size: 1000 - global_batch_size: 8000 - - data: - num_workers: 2 - imagenet_val: ??? # path to imagenet val folder - diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml deleted file mode 100755 index 215cd17841ae..000000000000 --- a/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml +++ /dev/null @@ -1,13 +0,0 @@ -image_path: ??? # Path to a image for inference -texts: ??? # List of texts to compute similarity - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py b/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py deleted file mode 100644 index 67151d95e971..000000000000 --- a/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py +++ /dev/null @@ -1,284 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Usage example: - python /opt/NeMo/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py - --arch=ViT-H-14 - --version=laion2b_s32b_b79k - --hparams_file=path/to/saved.yaml - --nemo_file_path=open_clip.nemo - -If converting from OpenCLIP, specify the architecture (`arch`) and version (`version`) from the OpenCLIP model list (https://github.com/mlfoundations/open_clip#usage). - -If converting from Hugging Face, set the version to `huggingface` and the architecture (`arch`) to the Hugging Face model name (e.g., `yuvalkirstain/PickScore_v1`). - -Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. -""" - -import os -from argparse import ArgumentParser - -import einops -import open_clip -import torch -from omegaconf import OmegaConf -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer -from transformers import CLIPModel - -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed -from nemo.utils.model_utils import inject_model_parallel_rank - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def get_args(): - parser = ArgumentParser() - parser.add_argument("--arch", type=str, default="ViT-H-14") - parser.add_argument("--version", type=str, default="laion2b_s32b_b79k") - - parser.add_argument( - "--hparams_file", - type=str, - default=None, - required=False, - help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", - ) - parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument("--gpus_per_node", type=int, required=False, default=1) - parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) - parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) - parser.add_argument( - "--pipeline_model_parallel_split_rank", - type=int, - required=False, - default=None, - help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", - ) - parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) - parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") - - args = parser.parse_args() - return args - - -def mapping_openclip_state_dict(open_model): - open_state_dict = open_model.state_dict() - key_mapping = { - "positional_embedding": "text_encoder.language_model.embedding.position_embeddings", - "token_embedding.weight": "text_encoder.language_model.embedding.word_embeddings.weight", - "ln_final.weight": "text_encoder.language_model.encoder.final_layernorm.weight", - "ln_final.bias": "text_encoder.language_model.encoder.final_layernorm.bias", - "text_projection": "text_encoder.head.weight", - } - layer_mapping = { - ".ln_1.weight": ".input_layernorm.weight", - ".ln_1.bias": ".input_layernorm.bias", - ".attn.in_proj_weight": ".self_attention.query_key_value.weight", - ".attn.in_proj_bias": ".self_attention.query_key_value.bias", - ".attn.out_proj.weight": ".self_attention.dense.weight", - ".attn.out_proj.bias": ".self_attention.dense.bias", - ".ln_2.weight": ".post_attention_layernorm.weight", - ".ln_2.bias": ".post_attention_layernorm.bias", - ".mlp.c_fc.weight": ".mlp.dense_h_to_4h.weight", - ".mlp.c_fc.bias": ".mlp.dense_h_to_4h.bias", - ".mlp.c_proj.weight": ".mlp.dense_4h_to_h.weight", - ".mlp.c_proj.bias": ".mlp.dense_4h_to_h.bias", - ".ln_pre.weight": ".preprocess_layernorm.weight", - ".ln_pre.bias": ".preprocess_layernorm.bias", - ".ln_post.weight": ".transformer.final_layernorm.weight", - ".ln_post.bias": ".transformer.final_layernorm.bias", - ".positional_embedding": ".position_embeddings", - ".backbone.proj": ".head.weight", - ".class_embedding": ".cls_token", - ".backbone.conv1.weight": ".backbone.linear_encoder.weight", - } - - nemo_state_dict = {} - for key in open_state_dict.keys(): - if key.startswith("transformer.resblocks."): - key_ = key.replace("transformer.resblocks.", "text_encoder.language_model.encoder.layers.") - elif key.startswith("visual.transformer.resblocks."): - key_ = key.replace("visual.transformer.resblocks.", "vision_encoder.backbone.transformer.layers.") - elif key.startswith('visual.'): - key_ = key.replace("visual.", "vision_encoder.backbone.") - else: - key_ = key - for pat in key_mapping: - if key_ == pat: - key_ = key_.replace(pat, key_mapping[pat]) - for pat in layer_mapping: - if key_.endswith(pat): - key_ = key_[: -len(pat)] + layer_mapping[pat] - break - nemo_state_dict[key_] = open_state_dict[key] - - nemo_state_dict["text_encoder.head.weight"] = nemo_state_dict["text_encoder.head.weight"].T - nemo_state_dict["vision_encoder.head.weight"] = nemo_state_dict["vision_encoder.head.weight"].T - nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ - "vision_encoder.backbone.cls_token" - ].reshape(1, 1, -1) - w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] - nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) - nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) - - return nemo_state_dict - - -def mapping_hf_state_dict(hf_model): - hf_state_dict = hf_model.state_dict() - key_mapping = { - "text_projection.weight": "text_encoder.head.weight", - "visual_projection.weight": "vision_encoder.head.weight", - } - - layer_mapping = { - ".layer_norm1.weight": ".input_layernorm.weight", - ".layer_norm1.bias": ".input_layernorm.bias", - ".self_attn.out_proj.weight": ".self_attention.dense.weight", - ".self_attn.out_proj.bias": ".self_attention.dense.bias", - ".layer_norm2.weight": ".post_attention_layernorm.weight", - ".layer_norm2.bias": ".post_attention_layernorm.bias", - ".mlp.fc1.weight": ".mlp.dense_h_to_4h.weight", - ".mlp.fc1.bias": ".mlp.dense_h_to_4h.bias", - ".mlp.fc2.weight": ".mlp.dense_4h_to_h.weight", - ".mlp.fc2.bias": ".mlp.dense_4h_to_h.bias", - ".pre_layrnorm.weight": ".preprocess_layernorm.weight", - ".pre_layrnorm.bias": ".preprocess_layernorm.bias", - ".post_layernorm.weight": ".transformer.final_layernorm.weight", - ".post_layernorm.bias": ".transformer.final_layernorm.bias", - ".backbone.embeddings.position_embedding.weight": ".backbone.position_embeddings", - ".language_model.embeddings.position_embedding.weight": ".language_model.embedding.position_embeddings", - ".embeddings.class_embedding": ".cls_token", - ".backbone.embeddings.patch_embedding.weight": ".backbone.linear_encoder.weight", - ".final_layer_norm.weight": ".encoder.final_layernorm.weight", - ".final_layer_norm.bias": ".encoder.final_layernorm.bias", - ".embeddings.token_embedding.weight": ".embedding.word_embeddings.weight", - } - - nemo_state_dict = {} - for key in hf_state_dict.keys(): - if key.startswith("text_model.encoder.layers"): - key_ = key.replace("text_model.encoder.layers", "text_encoder.language_model.encoder.layers") - elif key.startswith("vision_model.encoder.layers"): - key_ = key.replace("vision_model.encoder.layers", "vision_encoder.backbone.transformer.layers") - elif key.startswith('vision_model.'): - key_ = key.replace("vision_model.", "vision_encoder.backbone.") - elif key.startswith('text_model.'): - key_ = key.replace('text_model.', 'text_encoder.language_model.') - else: - key_ = key - for pat in key_mapping: - if key_ == pat: - key_ = key_.replace(pat, key_mapping[pat]) - for pat in layer_mapping: - if key_.endswith(pat): - key_ = key_[: -len(pat)] + layer_mapping[pat] - break - if 'q_proj' in key_: - key_k = key.replace('q_proj', 'k_proj') - key_v = key.replace('q_proj', 'v_proj') - key_new = key_.replace('self_attn.q_proj', 'self_attention.query_key_value') - value_new = torch.concat((hf_state_dict[key], hf_state_dict[key_k], hf_state_dict[key_v]), dim=0) - nemo_state_dict[key_new] = value_new - elif not ('k_proj' in key_ or 'v_proj' in key_ or 'position_ids' in key_): - nemo_state_dict[key_] = hf_state_dict[key] - - nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ - "vision_encoder.backbone.cls_token" - ].reshape(1, 1, -1) - w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] - nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) - nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) - - return nemo_state_dict - - -def convert(local_rank, rank, world_size, args): - app_state = AppState() - app_state.data_parallel_rank = 0 - num_nodes = world_size // args.gpus_per_node - if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') - - app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = args.tensor_model_parallel_size - - # no use atm, use to split ranks in encoder/decoder models. - if args.pipeline_model_parallel_size > 1 and args.model_type in []: - if args.pipeline_model_parallel_split_rank is not None: - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank - else: - if args.pipeline_model_parallel_size % 2 != 0: - raise ValueError( - f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." - ) - else: - # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 - else: - app_state.pipeline_model_parallel_split_rank = None - - app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - - parallel_state.initialize_model_parallel( - tensor_model_parallel_size=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, - ) - - app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() - app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - - cfg = OmegaConf.load(args.hparams_file) - model = MegatronCLIPModel(cfg.model, trainer) - - if args.version == "huggingface": - hf_model = CLIPModel.from_pretrained(args.arch) - state_dict = mapping_hf_state_dict(hf_model) - else: - open_model, _, _ = open_clip.create_model_and_transforms(args.arch, pretrained=args.version) - state_dict = mapping_openclip_state_dict(open_model) - - model.model.load_state_dict(state_dict) - - model._save_restore_connector = NLPSaveRestoreConnector() - - if torch.distributed.is_initialized(): - torch.distributed.barrier() - - model.save_to(args.nemo_file_path) - - logging.info(f'NeMo model saved to: {args.nemo_file_path}') - - -if __name__ == '__main__': - args = get_args() - local_rank, rank, world_size = initialize_distributed(args) - convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py deleted file mode 100644 index 2c536ca7b5bb..000000000000 --- a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import torch -import torch.nn.functional as F -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from tqdm import tqdm - -from nemo.collections.multimodal.data.clip.clip_dataset import ( - ImagenetClassnameDataset, - build_imagenet_validation_dataloader, - get_preprocess_fns, - tokenize, -) -from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero - -try: - from megatron.core import parallel_state, tensor_parallel - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def accuracy(output, target, topk=(1,)): - pred = output.topk(max(topk), 1, True, True)[1].t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] - - -@hydra_runner(config_path="conf", config_name="megatron_clip_imagenet_zeroshot") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - # These configs are required to be off during inference. - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.vision.precision = cfg.trainer.precision - model_cfg.text.precision = cfg.trainer.precision - if cfg.trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - - if model.cfg.get("megatron_amp_O2", False): - vision_encoder = model.model.module.vision_encoder - text_encoder = model.model.module.text_encoder - else: - vision_encoder = model.model.vision_encoder - text_encoder = model.model.text_encoder - - # get autocast_dtype - if trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - with open_dict(cfg): - cfg.model["vision"] = model.cfg.vision - cfg.model["text"] = model.cfg.text - - imagenet_val = build_imagenet_validation_dataloader(cfg.model, model.tokenizer) - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - # build imagenet classification classifier - classifier = [] - for texts in imagenet_val["texts"]: - texts = texts.cuda(non_blocking=True) - class_embeddings = text_encoder(texts) - class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) - class_embedding /= class_embedding.norm() - classifier.append(class_embedding) - classifier = torch.stack(classifier, dim=1) - - top1, top5, n = 0.0, 0.0, 0.0 - for images, target in tqdm(imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): - if images is None or target is None: - continue - - images = images.cuda(non_blocking=True) - target = target.cuda(non_blocking=True) - # predict - image_features = vision_encoder(images) - image_features = F.normalize(image_features, dim=-1) - logits = 100.0 * image_features @ classifier - - # measure accuracy - acc1, acc5 = accuracy(logits, target, topk=(1, 5)) - top1 += acc1 - top5 += acc5 - n += images.size(0) - - logging.info('Finished zero-shot imagenet.') - top1 = top1 / n - top5 = top5 / n - - imagenet_metric = torch.zeros(2).cuda() - imagenet_metric[0], imagenet_metric[1] = top1, top5 - imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) - - if is_global_rank_zero: - logging.info(f"Zero-shot CLIP accuracy Top-1: {imagenet_metric[0]:.4f}; Top-5: {imagenet_metric[1]:.4f}") - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_infer.py b/examples/multimodal/foundation/clip/megatron_clip_infer.py deleted file mode 100644 index 06f37081b9be..000000000000 --- a/examples/multimodal/foundation/clip/megatron_clip_infer.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero - - -@hydra_runner(config_path="conf", config_name="megatron_clip_infer") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - # These configs are required to be off during inference. - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.vision.precision = cfg.trainer.precision - model_cfg.text.precision = cfg.trainer.precision - if cfg.trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - trainer, model = setup_trainer_and_model_for_inference( - model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - - if model.cfg.get("megatron_amp_O2", False): - vision_encoder = model.model.module.vision_encoder - text_encoder = model.model.module.text_encoder - else: - vision_encoder = model.model.vision_encoder - text_encoder = model.model.text_encoder - - val_image_transform, text_transform = get_preprocess_fns(model.cfg, model.tokenizer, is_train=False,) - - # get autocast_dtype - if trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - image = Image.open(cfg.image_path).convert('RGB') - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - image = val_image_transform(image).unsqueeze(0).cuda() - texts = text_transform(cfg.texts).cuda() - image_features = vision_encoder(image) - text_features = text_encoder(texts) - image_features /= image_features.norm(dim=-1, keepdim=True) - text_features /= text_features.norm(dim=-1, keepdim=True) - - text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) - - if is_global_rank_zero: - print(f"Given image's CLIP text probability: ", list(zip(cfg.texts, text_probs[0].cpu().numpy()))) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py deleted file mode 100644 index d0dcc07ffe3e..000000000000 --- a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="megatron_clip_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( - "Gradient accumulation is not supported in CLIP yet." - ) - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronCLIPModel(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml deleted file mode 100644 index 0012e272aac4..000000000000 --- a/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: stable-diffusion-train - -infer: - unconditional_guidance_scale: 3 - num_images_per_prompt: 4 - hint_image_size: 512 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 50 - sampler_type: 'DDIM' - eta: 0 - output_type: 'pil' - save_to_file: True - out_path: 'controlnet' - seed: 355 - prompts: - - high quality picture of a house in oil painting style - control: - - /datasets/coco-stuff/house.png #images/val2017/000000001584.jpg - # Depending on the input control, if the input control is already the conditioning image, null should be passed here - # If a reconstruction target is used as control, then preprocessing function that turns it into a conditioning image needs to be specified - control_image_preprocess: seg2img - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: /ckpts/controlnet/30k.nemo - precision: ${trainer.precision} - strength: 2.0 - guess_mode: False \ No newline at end of file diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml deleted file mode 100644 index beb4fd80ee84..000000000000 --- a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml +++ /dev/null @@ -1,220 +0,0 @@ -trainer: - devices: 2 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: True - max_epochs: 3 # PTL default. In practice, max_steps will be reached first. - max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - limit_val_batches: 0 - - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: controlnet - create_wandb_logger: False - wandb_logger_kwargs: - project: stable-diffusion - group: controlnet - name: controlnet-v1.5 - resume: True - create_checkpoint_callback: True - create_tensorboard_logger: True - checkpoint_callback_params: - save_top_k: -1 - every_n_train_steps: 5000 - every_n_epochs: 0 - monitor: reduced_train_loss - filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 4 # limited by GPU memory - global_batch_size: 8 - - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: images - cond_stage_key: captions - control_key: hint - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - scale_by_std: False - ckpt_path: - ignore_keys: [ ] - parameterization: eps - clip_denoised: True - load_only_unet: False - cosine_s: 8e-3 - given_betas: - original_elbo_weight: 0 - v_posterior: 0 - l_simple_weight: 1 - use_positional_encodings: False - learn_logvar: False - logvar_init: 0 - beta_schedule: linear - loss_type: l2 - learning_rate: 1.0e-04 - concat_mode: True - cond_stage_forward: - text_embedding_dropout_rate: 0.0 - fused_opt: True - inductor: False - inductor_cudagraphs: False - capture_cudagraph_iters: -1 # -1 to disable - channels_last: True - only_mid_control: False - sd_locked: True - - control_stage_config: - _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet - params: - from_pretrained_unet: /ckpts/v1-5-pruned.ckpt - from_NeMo: True - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - use_linear_in_transformer: False - transformer_depth: 1 - context_dim: 768 - use_checkpoint: False - legacy: False - use_flash_attention: False - - unet_config: - _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel - from_pretrained: /ckpts/v1-5-pruned.ckpt - from_NeMo: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: False - legacy: False - use_flash_attention: False - - first_stage_config: - _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL - from_pretrained: /ckpts/vae.bin - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - version: openai/clip-vit-large-patch14 - device: cuda - max_length: 77 - - data: - num_workers: 16 - train: - dataset_path: - #- /datasets/tarfiles/fill50k.pkl - - /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl - augmentations: - resize_smallest_side: 512 - center_crop_h_w: 512, 512 - horizontal_flip: False - filterings: - - webdataset: - infinite_sampler: False - local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles - - optim: - name: fused_adam - lr: 2e-5 - weight_decay: 0. - betas: - - 0.9 - - 0.999 - sched: - name: WarmupHoldPolicy - warmup_steps: 0 - hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - image_logger: - batch_frequency: 1000 - max_images: 4 - - #miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) diff --git a/examples/multimodal/generative/controlnet/controlnet_infer.py b/examples/multimodal/generative/controlnet/controlnet_infer.py deleted file mode 100644 index c050010a73c0..000000000000 --- a/examples/multimodal/generative/controlnet/controlnet_infer.py +++ /dev/null @@ -1,247 +0,0 @@ -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time - -import cv2 -import einops -import torch -from PIL import Image - -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.collections.multimodal.models.controlnet.util import get_preprocessing_function -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner - - -def get_control_input(image_path, batch_size, hint_image_size, control_image_preprocess=None): - image = cv2.imread(image_path) - if control_image_preprocess: - # More applications can be supported here - process = get_preprocessing_function(control_image_preprocess) - image = process(image) - image = cv2.resize(image, (hint_image_size, hint_image_size)) - control = torch.from_numpy(image).float() / 255.0 - control = torch.stack([control for _ in range(batch_size)], dim=0) - control = einops.rearrange(control, 'b h w c -> b c h w') - return control - - -def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): - c = cond_stage_model.encode(batch_size * [prompt]) - if unconditional_guidance_scale != 1.0: - uc = cond_stage_model.encode(batch_size * [""]) - else: - uc = None - return c, uc - - -def initialize_sampler(model, sampler_type): - if sampler_type == 'DDIM': - sampler = DDIMSampler(model) - elif sampler_type == 'PLMS': - sampler = PLMSSampler(model) - else: - raise ValueError(f'Sampler {sampler_type} is not supported for {cls.__name__}') - return sampler - - -def decode_images(model, samples): - images = model.decode_first_stage(samples) - - images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) - - return images - - -def torch_to_numpy(images): - numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] - return numpy_images - - -def numpy_to_pil(images): - """ - Convert a numpy image or a batch of images to a PIL image. - """ - if images.ndim == 3: - images = images[None, ...] - images = (images * 255).round().astype("uint8") - pil_images = [Image.fromarray(image) for image in images] - - return pil_images - - -def pipeline(model, cfg, rng=None, verbose=True): - # setup default values for inference configs - unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) - batch_size = cfg.infer.get('num_images_per_prompt', 1) - prompts = cfg.infer.get('prompts', []) - control = cfg.infer.get('control', []) - height = cfg.infer.get('height', 512) - width = cfg.infer.get('width', 512) - downsampling_factor = cfg.infer.get('down_factor', 8) - sampler_type = cfg.infer.get('sampler_type', 'DDIM') - inference_steps = cfg.infer.get('inference_steps', 50) - output_type = cfg.infer.get('output_type', 'pil') - save_to_file = cfg.infer.get('save_to_file', True) - out_path = cfg.infer.get('out_path', '') - eta = cfg.infer.get('eta', 0) - guess_mode = cfg.model.get('guess_mode', False) - hint_image_size = cfg.infer.get('hint_image_size', 512) - control_image_preprocess = cfg.infer.get('control_image_preprocess', None) - - # get autocast_dtype - if cfg.trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif cfg.trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif cfg.trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in [32, 16, "bf16"]') - - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - - in_channels = model.model.diffusion_model.in_channels - - sampler = initialize_sampler(model, sampler_type.upper()) - - output = [] - throughput = [] - - if isinstance(prompts, str): - prompts = [prompts] - - assert len(prompts) == len(control) - - for control, prompt in zip(control, prompts): - tic = time.perf_counter() - tic_total = tic - txt_cond, txt_u_cond = encode_prompt( - model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size - ) - - control = get_control_input(control, batch_size, hint_image_size, control_image_preprocess).to( - torch.cuda.current_device(), dtype=autocast_dtype - ) - - cond = {"c_concat": control, "c_crossattn": txt_cond} - u_cond = {"c_concat": None if guess_mode else control, "c_crossattn": txt_u_cond} - - toc = time.perf_counter() - conditioning_time = toc - tic - - latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] - latents = torch.randn( - [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng - ).to(torch.cuda.current_device()) - - tic = time.perf_counter() - samples, intermediates = sampler.sample( - S=inference_steps, - conditioning=cond, - batch_size=batch_size, - shape=latent_shape, - verbose=False, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=u_cond, - eta=eta, - x_T=latents, - ) - toc = time.perf_counter() - sampling_time = toc - tic - - tic = time.perf_counter() - images = decode_images(model, samples) - toc = time.perf_counter() - decode_time = toc - tic - - toc_total = time.perf_counter() - total_time = toc_total - tic_total - output.append(images) - - throughput.append( - { - 'text-conditioning-time': conditioning_time, - 'sampling-time': sampling_time, - 'decode-time': decode_time, - 'total-time': total_time, - 'sampling-steps': inference_steps, - } - ) - - # Convert output type and save to disk - if output_type == 'torch': - output = torch.cat(output, dim=0) - else: - output = torch_to_numpy(output) - if output_type == 'pil': - output = [numpy_to_pil(x) for x in output] - - if save_to_file: - os.makedirs(out_path, exist_ok=True) - # Saving control map - control_image = control[0].float().cpu().permute(1, 2, 0).numpy() - control_image = Image.fromarray((control_image * 255).round().astype("uint8")) - control_image.save(os.path.join(out_path, f'{prompt[:50]}_control.png')) - if output_type == 'pil': - for text_prompt, pils in zip(prompts, output): - for idx, image in enumerate(pils): - image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) - else: - with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: - pickle.dump(output, f) - else: - return output - - ave_metrics = {} - for key in throughput[0].keys(): - ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) - if verbose: - print(ave_metrics) - - -@hydra_runner(config_path='conf', config_name='controlnet_infer') -def main(cfg): - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.control_stage_config.from_pretrained_unet = None - model_cfg.channels_last = True - model_cfg.capture_cudagraph_iters = -1 - - torch.backends.cuda.matmul.allow_tf32 = True - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronControlNet, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - guess_mode = cfg.model.guess_mode - model.contol_scales = ( - [cfg.model.strength * (0.825 ** float(12 - i)) for i in range(13)] - if guess_mode - else ([cfg.model.strength] * 13) - ) - - rng = torch.Generator().manual_seed(cfg.infer.seed) - pipeline(model, cfg, rng=rng) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/controlnet/controlnet_train.py b/examples/multimodal/generative/controlnet/controlnet_train.py deleted file mode 100644 index 8a7a46b79480..000000000000 --- a/examples/multimodal/generative/controlnet/controlnet_train.py +++ /dev/null @@ -1,54 +0,0 @@ -from datetime import timedelta - -import pytorch_lightning as pl -import torch -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.strategies.ddp import DDPStrategy - -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( - construct_image_augmentations, - identical_transform, -) -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.collections.multimodal.models.controlnet.util import ImageLogger -from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils.exp_manager import StatelessTimer, exp_manager - - -class MegatronControlNetTrainerBuilder(MegatronTrainerBuilder): - """Builder for T5 model Trainer with overrides.""" - - def create_trainer(self, callbacks=[]) -> Trainer: - strategy = self._training_strategy() - plugins = self._plugins() - return Trainer(plugins=plugins, strategy=strategy, **self.cfg.trainer, callbacks=callbacks) - - -@hydra_runner(config_path='conf', config_name='controlnet_v1-5.yaml') -def main(cfg): - callbacks = [] - - if cfg.model.get('image_logger', None): - callbacks.append(ImageLogger(**cfg.model.image_logger)) - - trainer = MegatronControlNetTrainerBuilder(cfg).create_trainer(callbacks=callbacks) - - exp_manager(trainer, cfg.get("exp_manager", None)) - - model = MegatronControlNet(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py b/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py deleted file mode 100644 index cd7e97ded64f..000000000000 --- a/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Usage example: - python /opt/NeMo/examples/multimodal/generative/stable_diffusion/convert_hf_ckpt_to_nemo.py - --ckpt_path=path/to/hf.ckpt - --hparams_file=path/to/saved.yaml - --nemo_file_path=hf2sd.nemo - -Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. -""" - -import os -import tempfile -from argparse import ArgumentParser - -import torch -from lightning_fabric.utilities.cloud_io import _load as pl_load -from omegaconf import OmegaConf -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def get_args(): - parser = ArgumentParser() - parser.add_argument("--ckpt_path", type=str, default=None, required=True, help="Path to checkpoint.") - - parser.add_argument( - "--hparams_file", - type=str, - default=None, - required=False, - help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", - ) - parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument("--gpus_per_node", type=int, required=False, default=1) - parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) - parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) - parser.add_argument( - "--pipeline_model_parallel_split_rank", - type=int, - required=False, - default=None, - help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", - ) - parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) - parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") - parser.add_argument("--model_type", type=str, required=False, default="stable_diffusion") - parser.add_argument("--nemo_clip_path", type=str, required=False, help="Path to clip ckpt file in .nemo format") - - args = parser.parse_args() - return args - - -def load_config_and_state_from_nemo(nemo_path): - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - - # Change current working directory to - os.chdir(tmpdir) - config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) - cfg = OmegaConf.load(config_yaml) - - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - state_dict = save_restore_connector._load_state_dict_from_disk(model_weights, map_location=map_location) - finally: - os.chdir(cwd) - - return cfg, state_dict - - -def mapping_hf_state_dict(hf_state_dict, model, clip_dict=None): - nemo_state = model.state_dict() - new_state_dict = {} - for k, v in hf_state_dict.items(): - k = 'model.' + k - # This is not necessary when you turn off model.inductor in config file - # if 'diffusion_model' in k: - # k = k.replace('diffusion_model', 'diffusion_model._orig_mod') - if 'in_layers' in k or 'out_layers' in k: - s = k.split('.') - idx = int(s[-2]) - if idx != 0: - k = ".".join(s[:-2] + [str(int(idx - 1))] + [s[-1]]) - if k in nemo_state: - new_state_dict[k] = v - if clip_dict: - for k, v in clip_dict.items(): - k = k.replace("model.text_encoder", "model.cond_stage_model.model") - if k in nemo_state: - new_state_dict[k] = v - for k in [ - 'betas', - 'alphas_cumprod', - 'alphas_cumprod_prev', - 'sqrt_alphas_cumprod', - 'sqrt_one_minus_alphas_cumprod', - 'log_one_minus_alphas_cumprod', - 'sqrt_recip_alphas_cumprod', - 'sqrt_recipm1_alphas_cumprod', - 'posterior_variance', - 'posterior_log_variance_clipped', - 'posterior_mean_coef1', - 'posterior_mean_coef2', - ]: - new_state_dict['model.' + k] = nemo_state['model.' + k] - - return new_state_dict - - -def convert(local_rank, rank, world_size, args): - app_state = AppState() - app_state.data_parallel_rank = 0 - num_nodes = world_size // args.gpus_per_node - if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') - - app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = args.tensor_model_parallel_size - - # no use atm, use to split ranks in encoder/decoder models. - if args.pipeline_model_parallel_size > 1 and args.model_type in []: - if args.pipeline_model_parallel_split_rank is not None: - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank - else: - if args.pipeline_model_parallel_size % 2 != 0: - raise ValueError( - f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." - ) - else: - # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 - else: - app_state.pipeline_model_parallel_split_rank = None - - app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - - parallel_state.initialize_model_parallel( - tensor_model_parallel_size=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, - ) - - app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() - app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - - if args.ckpt_path.endswith('safetensors'): - from safetensors.torch import load_file as load_safetensors - - checkpoint = load_safetensors(args.ckpt_path) - else: - checkpoint = pl_load(args.ckpt_path, map_location='cpu') - if 'state_dict' in checkpoint.keys(): - checkpoint = checkpoint['state_dict'] - cfg = OmegaConf.load(args.hparams_file) - cfg.model.inductor = False - if args.model_type == 'stable_diffusion': - model = MegatronLatentDiffusion(cfg.model, trainer) - elif args.model_type == 'controlnet': - model = MegatronControlNet(cfg.model, trainer) - - if 'nemo' in model.cfg.cond_stage_config._target_: - assert ( - args.nemo_clip_path is not None - ), "To align with current hparams file, you need to provide .nemo checkpoint of clip model for stable diffusion. If you want to convert HF clip checkpoint to .nemo checkpoint first, please refer to /opt/NeMo/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py" - _, clip_dict = load_config_and_state_from_nemo(args.nemo_clip_path) - else: - clip_dict = None - - state_dict = mapping_hf_state_dict(checkpoint, model, clip_dict=clip_dict) - - model._save_restore_connector = NLPSaveRestoreConnector() - - model.load_state_dict(state_dict) - - if torch.distributed.is_initialized(): - torch.distributed.barrier() - - model.save_to(args.nemo_file_path) - - logging.info(f'NeMo model saved to: {args.nemo_file_path}') - - -if __name__ == '__main__': - args = get_args() - local_rank, rank, world_size = initialize_distributed(args) - convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml deleted file mode 100644 index 37e9b284e219..000000000000 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml +++ /dev/null @@ -1,224 +0,0 @@ -name: Dreambooth - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16-mixed - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 400 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - limit_val_batches: 0 - -exp_manager: - exp_dir: null - name: ${name} - create_checkpoint_callback: True - create_tensorboard_logger: True - checkpoint_callback_params: - every_n_train_steps: 200 - every_n_epochs: 0 - monitor: reduced_train_loss - save_on_train_epoch_end: False - filename: '${name}-{step}' - save_top_k: -1 - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 2 # limited by GPU memory - global_batch_size: 2 # will use more micro batches to reach global batch size - - with_prior_preservation: False - use_cached_latents: True - prior_loss_weight: 0.5 - train_text_encoder: False - restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed - - - - - linear_start: 0.00085 - linear_end: 0.012 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: images - cond_stage_key: captions - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn # check - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - scale_by_std: False - ckpt_path: - ignore_keys: [ ] - parameterization: eps - clip_denoised: True - load_only_unet: False - cosine_s: 8e-3 - given_betas: - original_elbo_weight: 0 - v_posterior: 0 - l_simple_weight: 1 - use_positional_encodings: False - learn_logvar: False - logvar_init: 0 - beta_schedule: linear - loss_type: l2 - - concat_mode: True - cond_stage_forward: - text_embedding_dropout_rate: 0.1 - fused_opt: True - inductor: False - inductor_cudagraphs: False - channels_last: False - - unet_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: /ckpts/unet.bin #load unet weights for finetuning, can use .ckpt ckpts from various sources - from_NeMo: False #Must be specified when from pretrained is not None, False means loading unet from HF ckpt - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_heads: 8 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 768 - use_checkpoint: False - legacy: False - use_flash_attention: False - - first_stage_config: - _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL - from_pretrained: /ckpts/vae.bin - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 #Never used - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder - restore_from_path: /ckpts/openai.nemo - device: cuda - freeze: True - layer: "last" - # For compatibility of history version that uses HF clip model - # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - # version: openai/clip-vit-large-patch14 - # device: cuda - # max_length: 77 - - noise_scheduler: - _target_: nemo.collections.multimodal.models.dreambooth.util.sd_noise_scheduler - parameterization: eps - v_posterior: 0 - given_betas: - beta_schedule: linear - timesteps: 1000 - linear_start: 0.00085 - linear_end: 0.012 - cosine_s: 8e-3 - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - optim: - name: fused_adam - lr: 1e-6 - weight_decay: 0. - betas: - - 0.9 - - 0.999 - sched: - name: WarmupHoldPolicy - warmup_steps: 1 - hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - data: - name: pbss - num_workers: 4 - instance_dir: /datasets/instance_dir - instance_prompt: a photo of a sks dog - regularization_dir: /datasets/nemo_dogs - regularization_prompt: a photo of a dog - num_reg_images: 10 - num_images_per_prompt: 4 - resolution: 512 - center_crop: True - cached_instance_dir: #/datasets/instance_dir_cached - cached_reg_dir: #/datasets/nemo_dogs_cached - -##The below infer config is to use inference script generating regularization images -infer: - unconditional_guidance_scale: 7.5 - num_images_per_prompt: ${model.data.num_images_per_prompt} - height: 512 - width: 512 - down_factor: 8 - inference_steps: 50 - sampler_type: 'PLMS' - eta: 0 - output_type: 'pil' - save_to_file: False - out_path: ${model.data.regularization_dir} - prompts: ${model.data.regularization_prompt} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml deleted file mode 100644 index fc8d35443767..000000000000 --- a/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: stable-diffusion-train - -infer: - unconditional_guidance_scale: 7.5 - num_images_per_prompt: 4 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 100 - sampler_type: 'DDIM' - eta: 0 - output_type: 'pil' - save_to_file: True - out_path: 'dreambooth' - seed: 123 - prompts: - - 'a photo of a sks dog' - - 'a photo of a sks dog in the Acropolis' - - 'a photo of a sks dog in front of eiffel tower' - - 'a photo of sks dog sleeping' - - 'a photo of a sks dog riding a bike' - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: null - precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py deleted file mode 100644 index 2b6212f0bba9..000000000000 --- a/examples/multimodal/generative/dreambooth/dreambooth.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import pytorch_lightning as pl -import torch - -from omegaconf import DictConfig, OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector - -from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder - -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def prepare_reg_data(cfg): - reg_dir = cfg.model.data.regularization_dir - num_reg_images = cfg.model.data.num_reg_images - num_images_per_prompt = cfg.model.data.num_images_per_prompt - reg_prompt = cfg.model.data.regularization_prompt - os.makedirs(reg_dir, exist_ok=True) - NUM_REG_IMAGES = len(os.listdir(reg_dir)) - if NUM_REG_IMAGES < num_reg_images: - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.micro_batch_size = cfg.model.micro_batch_size - model_cfg.global_batch_size = cfg.model.global_batch_size - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - rng = torch.Generator() - rng.manual_seed(trainer.global_rank * 100 + cfg.model.seed) - images_to_generate = cfg.model.data.num_reg_images - NUM_REG_IMAGES - images_to_generate = images_to_generate // trainer.world_size - - logging.info( - f"No enough images in regularization folder, generating {images_to_generate} from provided ckpt on each device" - ) - - for i in range(images_to_generate // num_images_per_prompt + 1): - output = pipeline(model, cfg, verbose=False, rng=rng) - for text_prompt, pils in zip(reg_prompt, output): - for idx, image in enumerate(pils): - image.save( - os.path.join( - cfg.infer.out_path, - f'{reg_prompt}_{trainer.global_rank}_{NUM_REG_IMAGES + i * num_images_per_prompt + idx}.png', - ) - ) - del model - del trainer - if torch.cuda.is_available(): - torch.cuda.empty_cache() - - -@hydra_runner(config_path='conf', config_name='dreambooth.yaml') -def main(cfg): - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - torch.backends.cuda.matmul.allow_tf32 = True - - if cfg.model.with_prior_preservation: - prepare_reg_data(cfg) - parallel_state.destroy_model_parallel() - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - - exp_manager(trainer, cfg.exp_manager) - - model = MegatronDreamBooth(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/dreambooth/dreambooth_infer.py b/examples/multimodal/generative/dreambooth/dreambooth_infer.py deleted file mode 100644 index e652fa68ddcd..000000000000 --- a/examples/multimodal/generative/dreambooth/dreambooth_infer.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='dreambooth_infer') -def main(cfg): - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - rng = torch.Generator().manual_seed(cfg.infer.seed) - pipeline(model, cfg, rng=rng) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/imagen/README.md b/examples/multimodal/generative/imagen/README.md deleted file mode 100644 index ba33b649cb35..000000000000 --- a/examples/multimodal/generative/imagen/README.md +++ /dev/null @@ -1,104 +0,0 @@ -# Imagen -## A. Overview - -Imagen is a multi-stage text-to-image diffusion model with an unprecedented degree of photorealism and a deep level of language understanding. Given a text prompt, Imagen first generates an image at a 64x64 resolution and then upsamples the generated image to 256x256 and 1024x1024 resolutions, all using diffusion models. - -**Table of Contents:** -- [Imagen](#imagen) - - [A. Overview](#a-overview) - - [B. Imagen Pipeline](#b-imagen-pipeline) - - [C. Files in this folder](#c-files-in-this-folder) - - [D. Imagen Training](#d-imagen-training) - - [D.1 Training Dataset](#d1-training-dataset) - - [D.2 Training configs](#d2-training-configs) - - [E. Imagen Inference](#e-imagen-inference) - - [E.1 Inference Settings](#e1-inference-settings) - - [E.2 Running the sample inference code](#e2-running-the-sample-inference-code) - - [E.3 Inference GPU Memory Usage](#e3-inference-gpu-memory-usage) - - [E.3.1 FP16 Inference](#e31-fp16-inference) - - [E.3.2 FP32 Inference](#e32-fp32-inference) - - [E.3.3 AMP Inference (Autocast Enabled)](#e33-amp-inference-autocast-enabled) - - [F. UNet Architecture](#f-unet-architecture) - - [F.1 U-Net (used for base model)](#f1-u-net-used-for-base-model) - - [F.2 Efficient U-Net (used for SR models)](#f2-efficient-u-net-used-for-sr-models) - -## B. Imagen Pipeline - -Imagen comprises a frozen text encoder (e.g. T5-XXL) to map input text into a sequence of embeddings, and a 64x64 image diffusion model, followed by two super-resolution diffusion models for generating 256x256 and 1024x1024 images. All diffusion models are conditioned on the text embedding sequence and use classifier-free guidance. - -## C. Files in this folder - -- [imagen_training.py](imagen_training.py): Script for running inference -- [imagen_generate_images.py](imagen_generate_images.py): Script for generating images for FID-CLIP analysis -- [imagen_infer.py](imagen_infer.py): Script for running inference - -## D. Imagen Training - -All three diffusion models (64x64, 256x256, 1024x1024) can be trained independently. - -### D.1 Training Dataset - -### D.2 Training configs -| configs | Description | -|---|---| -| base64-2b.yaml | 2b-parameter base 64x64 model as described in Imagen paper | -| base64-500m.yaml | 500m-parameter base 64x64 model with decreased number of embedding channels| -|sr256-400m.yaml| 400m-parameter sr 256x256 model as described in Imagen paper | -|sr1024-400m.yaml| 400m-parameter sr 1024x1024 model as described in Imagen paper | - -## E. Imagen Inference - -### E.1 Inference Settings - -[inference_pipeline.yaml](conf/inference_pipeline.yaml) specifies every config for running the sample inference code. Specifically: -- num_images_per_promt: The number of images you want to generate for each text prompt -- model_name: Different pre-defined configs (not used for now) -- run_ema_model: Either run reg/ema model for pretrained models -- customized_model: Instead of loading pre-defined models, load specified checkpoint. .ckpt checkpoint (generated during in-the-middle of training) and .nemo checkpoint (generated once training completed) are both acceptable -- target_resolution: should be one of [64, 256, 1024] -- inference_precision: Running inference in one of [16, 32, AMP] mode -- dynamic_thresholding: Whether to use dynamic thresholding when generating images -- texts: List of text prompts that are used to generate images -- output_path: The path to save generate images -- encoder_path: If not set (null), it will download text encoder first time running the inference code (and will be saved to HF_HOME), you can also load it offline by setting it to the prepared folder -- samplers: List of sampler settings that are used for each model. `step` (the number of iterations to denoise the image, ideally the larger the better, but also consume more time) and `cfg` for classifier free guidance value. You can tweak these values for better visual quality. - -### E.2 Running the sample inference code -``` -(inside NeMo root folder) -python examples/multimodal/generative/imagen/imagen_infer.py -``` - -### E.3 Inference GPU Memory Usage - -#### E.3.1 FP16 Inference -| Output\Batch size | 1 | 8 | -|-------------------|-------|-------| -| 64x64 | 11.7G | 11.9G | -| 256x256 | 12.5G | 13.0G | -| 1024x1024 | 14.1G | 21.6G | - -#### E.3.2 FP32 Inference -| Output\Batch size | 1 | 8 | -|-------------------|-------|-------| -| 64x64 | 21.7G | 22.6G | -| 256x256 | 23.4G | 24.5G | -| 1024x1024 | 26.6G | 40.6G | - -#### E.3.3 AMP Inference (Autocast Enabled) -| Output\Batch size | 1 | 8 | -|-------------------|-------|-------| -| 64x64 | 22.4G | 23.4G | -| 256x256 | 24.0G | 25.1G | -| 1024x1024 | 26.4G | 33.7G | - -## F. UNet Architecture - -We have prepared two types of UNet for Imagen according to the paper. Base model (64x64) and SR models (256x256, 1024x1024) are using different UNet models. - -### F.1 U-Net (used for base model) - - - -### F.2 Efficient U-Net (used for SR models) - diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml deleted file mode 100644 index 4c02c97c9e4e..000000000000 --- a/examples/multimodal/generative/imagen/conf/base64-2b.yaml +++ /dev/null @@ -1,142 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-base64-nf512 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 32 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - inductor: True - inductor_cudagraphs: False - unet_type: base - channels_last: True - - unet: - embed_dim: 512 - image_size: 64 - channels: 3 - num_res_blocks: 3 - channel_mult: [ 1, 2, 3, 4 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 2048 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [ 8, 16, 32 ] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - resblock_updown: False - resample_with_conv: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - # If want to switch to continuous DDPM training, - # use the following config: - # preconditioning_type: DDPM - # preconditioning: - # loss_type: l2 - # pred_objective: noise - # noise_schedule: cosine - # timesteps: 1000 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 64 - center_crop_h_w: 64, 64 - horizontal_flip: False - filterings: null - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: False - local_root_path: /datasets - verbose: False - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml b/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml deleted file mode 100644 index 11224e3b84d2..000000000000 --- a/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml +++ /dev/null @@ -1,136 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-base64-nf256 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 48 # limited by GPU memory - global_batch_size: 48 # will use more micro batches to reach global batch size - inductor: False - inductor_cudagraphs: False - unet_type: base - - unet: - embed_dim: 256 - image_size: 64 - channels: 3 - num_res_blocks: 3 - channel_mult: [ 1, 2, 3, 4 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [ 8, 16, 32 ] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: False - resblock_updown: False - resample_with_conv: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 64 - center_crop_h_w: 64, 64 - horizontal_flip: False - filterings: null - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: False - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml deleted file mode 100644 index 4541110caf98..000000000000 --- a/examples/multimodal/generative/imagen/conf/base64-500m.yaml +++ /dev/null @@ -1,144 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - limit_val_batches: 0 - log_every_n_steps: 5 # Interval of logging. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-base64-nf256 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 128 # limited by GPU memory - global_batch_size: 128 # will use more micro batches to reach global batch size - inductor: True - inductor_cudagraphs: False - unet_type: base - channels_last: True - - unet: - embed_dim: 256 - image_size: 64 - channels: 3 - num_res_blocks: 3 - channel_mult: [ 1, 2, 3, 4 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [ 8, 16, 32 ] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - resblock_updown: False - resample_with_conv: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: False # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - # If want to switch to continuous DDPM training, - # use the following config: - # preconditioning_type: DDPM - # preconditioning: - # loss_type: l2 - # pred_objective: noise - # noise_schedule: cosine - # timesteps: 1000 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - synthetic_data: False - synthetic_data_length: 800000 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 64 - center_crop_h_w: 64, 64 - horizontal_flip: False - filterings: null - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: False - local_root_path: /datasets - verbose: False - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml b/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml deleted file mode 100644 index efbab7bc1ca8..000000000000 --- a/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml +++ /dev/null @@ -1,137 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-base64-nf256 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 48 # limited by GPU memory - global_batch_size: 48 # will use more micro batches to reach global batch size - - unet_type: base - unet: - embed_dim: 256 - image_size: 64 - channels: 3 - num_res_blocks: 3 - channel_mult: [ 1, 2, 3, 4 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [ 8, 16, 32 ] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: True - flash_attention: False - resblock_updown: False - resample_with_conv: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - preconditioning_type: DDPM - preconditioning: - loss_type: l2 - pred_objective: noise - noise_schedule: cosine - timesteps: 1000 - - conditioning: - online_encoding: True # defaults to False (use precached encodings) if not specified - # Online encoding increases training time by about 3-4x, and is only for users who want to do a quick dev run of - # Imagen, and/or those who do not have the disk space to store precached embeddings. - # Optionally specify encoder_path if online_encoding; else, specify precached_key and out_key - encoder_path: # folder path to t5xxl-encoder.bin, or leave empty to download (and cache) t5-11b weights - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 64 - center_crop_h_w: 64, 64 - horizontal_flip: False - filterings: null - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: False - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/fid_inference.yaml b/examples/multimodal/generative/imagen/conf/fid_inference.yaml deleted file mode 100644 index 413da2b8eeac..000000000000 --- a/examples/multimodal/generative/imagen/conf/fid_inference.yaml +++ /dev/null @@ -1,26 +0,0 @@ -num_images_per_promt: 8 # The number of images generated for each promt text -model_name: null # Avaliable model_name defined in pretrained_models.yaml -run_ema_model: True # Whether load the reg/ema model when using pretrained models -customized_model: # Mutually exclusive with model_name - base_ckpt: /aot/exp/nemo-megatron-stacked-ddpm-16n/imagen-nemo/checkpoints/imagen-nemo--reduced_train_loss=0.03-step=100000-consumed_samples=512000000.0.ckpt # Either .ckpt or .nemo is accepatable - base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint - sr256_ckpt: null - sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml - sr1024_ckpt: null - sr1024_cfg: null -target_resolution: 64 # in [64, 256, 1024] -inference_precision: '32' # [16, 32, AMP] -thresholding_method: 'dynamic' -output_path: 'output/imagen-megatron-pipeline-fid' # Save location -record_time: True # Whether to record inference time meta -encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly -samplings: - - - step: 250 - cfg: 7.5 - - - step: 20 - cfg: 7.5 - - - diff --git a/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml b/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml deleted file mode 100644 index 5a5867cfae50..000000000000 --- a/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: imagen_fid_images - -fid: - classifier_free_guidance: - - 1 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - nnodes_per_cfg: 1 - ntasks_per_node: 8 - local_task_id: null - num_images_to_eval: 30000 - coco_captions_path: /aot/datasets/coco2014/coco2014_val_sampled_30k/captions - coco_images_path: /aot/datasets/coco2014/coco2014_val/images_256 - save_path: output/fid-launcher-test - ncaptions_per_batch: 4 - save_all_res: False - save_text: False - -infer: - num_images_per_promt: 1 # The number of images generated for each promt text - model_name: null # Avaliable model_name defined in pretrained_models.yaml - run_ema_model: True # Whether load the reg/ema model when using pretrained models - customized_model: # Mutually exclusive with model_name - base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo # Either .ckpt or .nemo is accepatable - base_cfg: null # Must provided if loading .ckpt checkpoint - sr256_ckpt: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo - sr256_cfg: null - sr1024_ckpt: null - sr1024_cfg: null - target_resolution: 256 # in [64, 256, 1024] - inference_precision: '32' # [16, 32, AMP] - thresholding_method: 'dynamic' - record_time: True # Whether to record inference time meta - encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly - samplings: - - - step: 30 - - - step: 20 - -models: - - - restore_from_path: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo - - - restore_from_path: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 32 - logger: False # logger provided by exp_manager diff --git a/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml b/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml deleted file mode 100644 index 1b4bbd9e5a17..000000000000 --- a/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml +++ /dev/null @@ -1,42 +0,0 @@ -num_images_per_promt: 4 # The number of images generated for each promt text -model_name: null # Avaliable model_name defined in pretrained_models.yaml -run_ema_model: True # Whether load the reg/ema model when using pretrained models -customized_model: # Mutually exclusive with model_name - base_ckpt: null # Either .ckpt or .nemo is accepatable - base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint - sr256_ckpt: null - sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml - sr1024_ckpt: null - sr1024_cfg: examples/multimodal/generative/imagen/conf/sr1024-400m.yaml -target_resolution: 64 # in [64, 256, 1024] -inference_precision: 32 # [16, 32, AMP] -thresholding_method: dynamic -texts: - - 'a photograph of an astronaut riding a horse' - - 'a highly detailed digital painting of a portal in a mystic forest with many beautiful trees. A person is standing in front of the portal' - - A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat. - - A cute corgi lives in a house made out of sushi. - - A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him. - - A brain riding a rocketship heading towards the moon. - - One cat and two dogs sitting on the grass. - - A wine glass on top of a dog. - - A blue coloured pizza. - - A transparent sculpture of a duck made out of glass. There is a painting on the wall behind it. - - A raccoon wearing cowboy hat and black leather jacket is behind the backyard window. Rain droplets on the window. - -output_path: 'output/imagen_output' # Save location -record_time: True # Whether to record inference time meta -encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly -samplings: - - # Base64 - step: 30 - cfg: 7.5 - - # SR256 - step: 20 - cfg: 8 - - # SR1024 - step: 20 - cfg: 7.5 - - - diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml deleted file mode 100644 index 3652267193b1..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml +++ /dev/null @@ -1,145 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-1024 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr1024-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 64 # limited by GPU memory - global_batch_size: 64 # will use more micro batches to reach global batch size - inductor: True - inductor_cudagraphs: False - unet_type: sr - channels_last: True - - unet: - embed_dim: 128 - image_size: 1024 - channels: 3 - channel_mult: [ 1, 2, 4, 8, 8 ] - num_attn_heads: 8 - per_head_channels: 64 - attention_type: cross - atnn_enabled_at: [ 0, 0, 0, 1, 1 ] - feature_pooling_type: attention - stride: 2 - num_resblocks: [ 2, 4, 8, 8, 8 ] - learned_sinu_pos_emb_dim: 0 - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: True - flash_attention: False - skip_connection_scaling: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce - - noise_cond_aug: True - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - # If want to switch to continuous DDPM training, - # use the following config: - # preconditioning_type: DDPM - # preconditioning: - # loss_type: l2 - # pred_objective: noise - # noise_schedule: cosine - # timesteps: 1000 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 1024 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 1024 - estimated_portion: 0.2 # Estimated % of examples left after filtering. This is use to estimate # epoch - target_resolutions: [64, 256] - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml deleted file mode 100644 index 22ab0672e577..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml +++ /dev/null @@ -1,222 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 16 # will use more micro batches to reach global batch size - inductor: False - inductor_cudagraphs: False - - unet_type: sr-unet - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - num_res_blocks: [2, 2, 3, 4, 3] - channel_mult: [ 1, 2, 4, 6, 6 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [32, 16] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - resblock_updown: False - resample_with_conv: True - low_res_cond: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - # - datasets/improved-aesthetic/wdinfo-selene.pkl - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - corruption_aug: - target_resolution: [ 64, 256 ] - kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. - 8: 1 - 16: 2 - 32: 3 - 64: 6 - 128: 11 - 256: 22 - 512: 44 - 1024: 88 - 2048: 176 - 4096: 352 - - blur: - add_random_blur: True - blur_prob1: 0.2 - blur_prob2: 0.2 - - blur_sigma_dict: - 8: 0.25 - 16: 0.5 - 32: 0.75 - 64: 1.5 - 128: 3 - 256: 6 - 512: 12 - 1024: 24 - 2048: 48 - 4096: 96 - - resize: - add_random_resize: True - - resize_prob1: - up: 0.2 - down: 0.2 - keep: 0.6 - resize_prob2: - up: 0.2 - down: 0.2 - keep: 0.6 - - resize_range1: - - 0.8 - - 1.2 - resize_range2: - - 0.8 - - 1.2 - - noise: - add_random_noise: True - gaussian_noise_prob1: 1.0 # 0.5 - gaussian_noise_prob2: 1.0 # 0.5 - gray_noise_prob1: 0.0 # 0.4 - gray_noise_prob2: 0.0 # 0.4 - - gaussian_sigma_range1: - - 0 - - 3 - gaussian_sigma_range2: - - 0 - - 2.5 - - poisson_scale_range1: - - 0.005 - - 3 - poisson_scale_range2: - - 0.005 - - 2.5 - - jpeg: - add_random_compression: False - jpeg_range1: - - 75 - - 95 - jpeg_range2: - - 75 - - 95 - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml deleted file mode 100644 index 984bddda2c55..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml +++ /dev/null @@ -1,150 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 16 # will use more micro batches to reach global batch size - inductor: True - inductor_cudagraphs: False - channels_last: True - - unet_type: sr-unet - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - num_res_blocks: [2, 2, 3, 4, 3] - channel_mult: [ 1, 2, 4, 6, 6 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: fused - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [32, 16] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - resblock_updown: False - resample_with_conv: True - low_res_cond: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce - - noise_cond_aug: True - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - # If want to switch to continuous DDPM training, - # use the following config: - # preconditioning_type: DDPM - # preconditioning: - # loss_type: l2 - # pred_objective: noise - # noise_schedule: cosine - # timesteps: 1000 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - target_resolutions: [ 64, 256 ] - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml deleted file mode 100644 index cbee92a40a58..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml +++ /dev/null @@ -1,222 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 16 # will use more micro batches to reach global batch size - inductor: False - inductor_cudagraphs: False - - unet_type: sr-unet - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - num_res_blocks: [2, 2, 3, 4, 3] - channel_mult: [ 1, 2, 4, 6, 6 ] - num_attn_heads: 4 - per_head_channels: 64 - cond_dim: 512 - attention_type: stacked - feature_pooling_type: attention - learned_sinu_pos_emb_dim: 0 - attention_resolutions: [32, 16] - dropout: False - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - resblock_updown: False - resample_with_conv: True - low_res_cond: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - # - datasets/improved-aesthetic/wdinfo-selene.pkl - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - corruption_aug: - target_resolution: [ 64, 256 ] - kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. - 8: 1 - 16: 2 - 32: 3 - 64: 6 - 128: 11 - 256: 22 - 512: 44 - 1024: 88 - 2048: 176 - 4096: 352 - - blur: - add_random_blur: True - blur_prob1: 0.2 - blur_prob2: 0.2 - - blur_sigma_dict: - 8: 0.25 - 16: 0.5 - 32: 0.75 - 64: 1.5 - 128: 3 - 256: 6 - 512: 12 - 1024: 24 - 2048: 48 - 4096: 96 - - resize: - add_random_resize: True - - resize_prob1: - up: 0.2 - down: 0.2 - keep: 0.6 - resize_prob2: - up: 0.2 - down: 0.2 - keep: 0.6 - - resize_range1: - - 0.8 - - 1.2 - resize_range2: - - 0.8 - - 1.2 - - noise: - add_random_noise: True - gaussian_noise_prob1: 1.0 # 0.5 - gaussian_noise_prob2: 1.0 # 0.5 - gray_noise_prob1: 0.0 # 0.4 - gray_noise_prob2: 0.0 # 0.4 - - gaussian_sigma_range1: - - 0 - - 3 - gaussian_sigma_range2: - - 0 - - 2.5 - - poisson_scale_range1: - - 0.005 - - 3 - poisson_scale_range2: - - 0.005 - - 2.5 - - jpeg: - add_random_compression: False - jpeg_range1: - - 75 - - 95 - jpeg_range2: - - 75 - - 95 - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml deleted file mode 100644 index 3e5318186961..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml +++ /dev/null @@ -1,142 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 32 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - inductor: False - inductor_cudagraphs: False - - unet_type: sr - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - channel_mult: [ 1, 2, 4, 8, 8 ] - num_attn_heads: 8 - per_head_channels: 64 - attention_type: stacked - atnn_enabled_at: [ 0, 0, 0, 1, 1 ] - feature_pooling_type: attention - stride: 2 - num_resblocks: [ 2, 4, 8, 8, 8 ] - learned_sinu_pos_emb_dim: 0 - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: False - skip_connection_scaling: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - noise_cond_aug: True - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - corruption_aug: - target_resolution: [ 64, 256 ] - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml deleted file mode 100644 index 67f05c52ff6e..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml +++ /dev/null @@ -1,219 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 32 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - inductor: False - inductor_cudagraphs: False - - unet_type: sr - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - channel_mult: [ 1, 2, 4, 8, 8 ] - num_attn_heads: 8 - per_head_channels: 64 - attention_type: stacked - atnn_enabled_at: [ 0, 0, 0, 1, 1 ] - feature_pooling_type: attention - stride: 2 - num_resblocks: [ 2, 4, 8, 8, 8 ] - learned_sinu_pos_emb_dim: 0 - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: False - skip_connection_scaling: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - # - datasets/improved-aesthetic/wdinfo-selene.pkl - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - corruption_aug: - target_resolution: [ 64, 256 ] - kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. - 8: 1 - 16: 2 - 32: 3 - 64: 6 - 128: 11 - 256: 22 - 512: 44 - 1024: 88 - 2048: 176 - 4096: 352 - - blur: - add_random_blur: True - blur_prob1: 0.2 - blur_prob2: 0.2 - - blur_sigma_dict: - 8: 0.25 - 16: 0.5 - 32: 0.75 - 64: 1.5 - 128: 3 - 256: 6 - 512: 12 - 1024: 24 - 2048: 48 - 4096: 96 - - resize: - add_random_resize: True - - resize_prob1: - up: 0.2 - down: 0.2 - keep: 0.6 - resize_prob2: - up: 0.2 - down: 0.2 - keep: 0.6 - - resize_range1: - - 0.8 - - 1.2 - resize_range2: - - 0.8 - - 1.2 - - noise: - add_random_noise: True - gaussian_noise_prob1: 1.0 # 0.5 - gaussian_noise_prob2: 1.0 # 0.5 - gray_noise_prob1: 0.0 # 0.4 - gray_noise_prob2: 0.0 # 0.4 - - gaussian_sigma_range1: - - 0 - - 3 - gaussian_sigma_range2: - - 0 - - 2.5 - - poisson_scale_range1: - - 0.005 - - 3 - poisson_scale_range2: - - 0.005 - - 2.5 - - jpeg: - add_random_compression: False - jpeg_range1: - - 75 - - 95 - jpeg_range2: - - 75 - - 95 - - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - pbss_checkpoint_saving: - enable: False - pbss_credentials_file: pbss_credentials_joc.secret - save_frequency: 1000 - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml deleted file mode 100644 index 115e9dd3099c..000000000000 --- a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml +++ /dev/null @@ -1,146 +0,0 @@ -name: imagen-nemo # The name of your model -allow_tf32: True - -trainer: - devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] - num_nodes: 1 - max_epochs: -1 - max_steps: 2500000 # precedence over max_epochs - logger: False # Provided by exp_manager - precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. - accelerator: gpu - log_every_n_steps: 5 # Interval of logging. - resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. - num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it - enable_checkpointing: False # Provided by exp_manager - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - - -exp_manager: - exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger - name: imagen-sr256-nf128 - project: imagen - group: nemo-imagen - resume: True - create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger - create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback - checkpoint_callback_params: - monitor: reduced_train_loss - save_top_k: 5 - every_n_epochs: 0 # Save checkpoint frequency. - every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 64 # limited by GPU memory - global_batch_size: 64 # will use more micro batches to reach global batch size - inductor: True - inductor_cudagraphs: False - channels_last: True - - unet_type: sr - unet: - embed_dim: 128 - image_size: 256 - channels: 3 - channel_mult: [ 1, 2, 4, 8, 8 ] - num_attn_heads: 8 - per_head_channels: 64 - attention_type: fused - atnn_enabled_at: [ 0, 0, 0, 1, 1 ] - feature_pooling_type: attention - stride: 2 - num_resblocks: [ 2, 4, 8, 8, 8 ] - learned_sinu_pos_emb_dim: 0 - use_null_token: False - init_conv_kernel_size: 3 - gradient_checkpointing: False - scale_shift_norm: True - stable_attention: False - flash_attention: True - skip_connection_scaling: True - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce - - noise_cond_aug: True - preconditioning_type: EDM - preconditioning: - loss_type: l2 - sigma_data: 0.5 - p_mean: -1.2 - p_std: 1.2 - # If want to switch to continuous DDPM training, - # use the following config: - # preconditioning_type: DDPM - # preconditioning: - # loss_type: l2 - # pred_objective: noise - # noise_schedule: cosine - # timesteps: 1000 - - conditioning: - embed_dim: 1024 - token_length: 128 - drop_rate: 0.1 - precached_key: embeddings_t5_xxl - out_key: t5_text - - data: - num_workers: 16 - train: - dataset_path: - - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 - - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 - augmentations: - resize_smallest_side: 256 - center_crop_h_w: 256, 256 - horizontal_flip: False - filterings: - resolution: - method: larger - value: 256 - estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch - target_resolutions: [64, 256] - webdataset: - use_webdataset: True - object_store: False - infinite_sampler: True - local_root_path: /datasets - verbose: False - - optim: - # We need weight decay for large-scale odel - name: fused_adam - lr: 0.0001 - eps: 1e-8 - betas: [ 0.9, 0.999 ] - weight_decay: 0.01 - sched: - name: WarmupPolicy - warmup_steps: 10000 - warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/generate_fid_images.py b/examples/multimodal/generative/imagen/generate_fid_images.py deleted file mode 100644 index 55ca92ace8bd..000000000000 --- a/examples/multimodal/generative/imagen/generate_fid_images.py +++ /dev/null @@ -1,104 +0,0 @@ -import os -import time - -import torch -from omegaconf.omegaconf import open_dict -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='imagen_fid_images') -def main(cfg): - # Read configuration parameters - nnodes_per_cfg = cfg.fid.nnodes_per_cfg - ntasks_per_node = cfg.fid.ntasks_per_node - local_task_id = cfg.fid.local_task_id - num_images_to_eval = cfg.fid.num_images_to_eval - path = cfg.fid.coco_captions_path - save_text = cfg.fid.save_text - - node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) - node_id_per_cfg = node_id % nnodes_per_cfg - - current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] - save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) - - # Read and store captions - captions = [] - caption_files = sorted(os.listdir(path)) - assert len(caption_files) >= num_images_to_eval - for file in caption_files[:num_images_to_eval]: - with open(os.path.join(path, file), 'r') as f: - captions += f.readlines() - print(f"The total number of captions to generate is: {len(captions)}") - - # Calculate partition sizes and select the partition for the current node - partition_size_per_node = num_images_to_eval // nnodes_per_cfg - start_idx = node_id_per_cfg * partition_size_per_node - end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None - captions = captions[start_idx:end_idx] - print(f"Current node {node_id} will generate images from {start_idx} to {end_idx}") - - local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) - partition_size_per_task = int(len(captions) // ntasks_per_node) - - # Select the partition for the current task - start_idx = local_task_id * partition_size_per_task - end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None - input = captions[start_idx:end_idx] - chunk_size = len(input) - - print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") - os.makedirs(save_path, exist_ok=True) - - trainer = Trainer() - pipeline = ImagenPipeline.from_pretrained(cfg=cfg.infer, trainer=trainer, megatron_loading=True, megatron_cfg=cfg) - - # Generate images using the model and save them - batch_idx = 0 - batch_size = cfg.fid.ncaptions_per_batch - while True: - if batch_idx * batch_size >= len(input): - break - batch_captions = input[batch_idx * batch_size : (batch_idx + 1) * batch_size] - # Different seed for every image - seeds = [local_task_id * chunk_size + batch_idx * batch_size + idx for idx in range(len(batch_captions))] - with torch.no_grad(): - images, all_res_images, *_ = pipeline( - prompts=batch_captions, seed=seeds, single_batch_mode=True, classifier_free_guidance=current_node_cfg, - ) - - if cfg.fid.save_all_res: - all_res = [f'_RES{model.image_size}' for model in pipeline.models] - outpaths = [] - # for the highest resolution we save as its original name so that - # we can automate the CLIP & FID calculation process from Megatron-Launcher - all_res[-1] = '' - for res in all_res: - outpath = f"{save_path}{res}" - os.makedirs(outpath, exist_ok=True) - outpaths.append(outpath) - for outpath, one_res in zip(outpaths, all_res_images): - for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): - image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx - image.save(os.path.join(outpath, f'image{image_idx:06d}.png')) - if save_text: - with open(os.path.join(outpath, f'image{image_idx:06d}.txt'), 'w') as f: - f.writelines(caption) - else: - for idx, (caption, image) in enumerate(zip(batch_captions, images[0])): - image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx - image.save(os.path.join(save_path, f'image{image_idx:06d}.png')) - if save_text: - with open(os.path.join(save_path, f'image{image_idx:06d}.txt'), 'w') as f: - f.writelines(caption) - print( - f'Save {len(images[0])} images to {save_path} with name from image{(local_task_id*chunk_size+batch_idx*batch_size):06d}.png to image{image_idx:06d}.png' - ) - batch_idx += 1 - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/imagen/imagen_generate_images.py b/examples/multimodal/generative/imagen/imagen_generate_images.py deleted file mode 100644 index b7e4c857decc..000000000000 --- a/examples/multimodal/generative/imagen/imagen_generate_images.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import pickle - -import torch -from omegaconf import OmegaConf -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='fid_inference.yaml') -def main(inference_config): - inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) - captions = pickle.load(open('coco_captions.pkl', 'rb')) - ntasks = 8 - if os.environ.get('CUDA_VISIBLE_DEVICES'): - # Multi-GPU - task_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0)) - else: - # Single GPU - task_id = 0 - chuncksize = int(len(captions) // ntasks) - if task_id != ntasks - 1: - input = captions[task_id * chuncksize : (task_id + 1) * chuncksize] - else: - input = captions[task_id * chuncksize :] - captions = input - - trainer = Trainer() - pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) - batch_size = 16 - batch_idx = 0 - - possible_res = [64, 256] # [64, 256] - outpaths = [] - for res in possible_res: - outpath = f'{inference_config.output_path}_RES{res}' - os.makedirs(outpath, exist_ok=True) - outpaths.append(outpath) - while True: - if batch_idx * batch_size >= len(captions): - break - batch_captions = captions[batch_idx * batch_size : (batch_idx + 1) * batch_size] - - # Different seed for every image - seeds = [task_id * chuncksize + batch_idx * batch_size + idx for idx in range(len(batch_captions))] - seed = batch_idx + chuncksize - - with torch.no_grad(): - images, all_res_images, throughput = pipeline(prompts=batch_captions, seed=seeds, single_batch_mode=True,) - - for outpath, one_res in zip(outpaths, all_res_images): - for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): - image.save(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.png')) - with open(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.txt'), 'w') as f: - f.writelines(caption) - batch_idx += 1 - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/imagen/imagen_infer.py b/examples/multimodal/generative/imagen/imagen_infer.py deleted file mode 100644 index 97402b3ee500..000000000000 --- a/examples/multimodal/generative/imagen/imagen_infer.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from omegaconf import OmegaConf -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='inference_pipeline.yaml') -def main(inference_config): - if inference_config.get('infer'): - # invoking from launcher - trainer = Trainer(**inference_config.trainer) - inference_config = inference_config.infer - else: - trainer = Trainer() - inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) - pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) - - # Texts are passed in the config files - images, all_res, throughput = pipeline() - - # Save images - outpath = inference_config.output_path - os.makedirs(outpath, exist_ok=True) - for text, pils in zip(inference_config.texts, images): - for idx, image in enumerate(pils): - image.save(os.path.join(outpath, f'{text}_{idx}.png')) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/imagen/imagen_training.py b/examples/multimodal/generative/imagen/imagen_training.py deleted file mode 100644 index 66a1f0aedefb..000000000000 --- a/examples/multimodal/generative/imagen/imagen_training.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import pytorch_lightning as pl -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector -from torch._dynamo import disable -from torch._inductor import config as inductor_config - -from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path='conf', config_name='base64-500m') -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronImagen(cfg.model, trainer) - - if cfg.model.get("inductor", False): - # Temporary hack to get rid of TorchDynamo issue with DDP - # TODO: remove these if https://github.com/pytorch/pytorch/issues/94574 fixed - torch.arange = disable(torch.arange) - torch.ones = disable(torch.ones) - torch.zeros = disable(torch.zeros) - - # TODO: remove this if latest TorchDynamo fixed `t.uniform_(0, 1)` failure - torch.Tensor.uniform_ = disable(torch.Tensor.uniform_) - - # Disable TorchDynamo for unsupported function - pl.core.LightningModule.log = disable(pl.core.LightningModule.log) - - # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = cfg.model.inductor_cudagraphs - model.model.model.unet = torch.compile(model.model.model.unet) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml deleted file mode 100644 index 75eed9d9b6bf..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml +++ /dev/null @@ -1,23 +0,0 @@ -edit: - resolution: 256 - steps: 100 - input: path/to/input/picture - outpath: path/to/output/folder - prompt: "" - cfg_text: 7.5 - cfg_image: 1.2 - num_images_per_prompt: 8 - combine_images: [ 2, 4 ] # [row, column] - seed: 1234 - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained instruct pix2pix .nemo file - precision: ${trainer.precision} - diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml deleted file mode 100644 index 34ef1f436cd6..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml +++ /dev/null @@ -1,168 +0,0 @@ -name: instruct-pix2pix-train - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 10000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 1 - val_check_interval: 100 - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: instruct-pix2pix - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - create_checkpoint_callback: True - create_tensorboard_logger: True - checkpoint_callback_params: - save_top_k: 4 - mode: min - monitor: val/loss - filename: 'instruct-pix2pix--{val/loss:.4f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - ckpt_path: null # load checkpoint weights from previous stages for fine-tuning - precision: ${trainer.precision} - micro_batch_size: 32 - global_batch_size: 32 # `= micro_batch_size * total_devices` fake global batch size for sampler - - linear_start: 0.00085 - linear_end: 0.012 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: edited - cond_stage_key: edit # txt for cifar, caption for pbss - image_size: 32 - channels: 4 - cond_stage_trainable: false - conditioning_key: hybrid - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - scale_by_std: False - - ignore_keys: [ ] - parameterization: eps - clip_denoised: True - load_only_unet: False - cosine_s: 8e-3 - given_betas: - original_elbo_weight: 0 - v_posterior: 0 - l_simple_weight: 1 - use_positional_encodings: False - learn_logvar: False - logvar_init: 0 - beta_schedule: linear - loss_type: l2 - concat_mode: True - cond_stage_forward: - text_embedding_dropout_rate: 0 - fused_opt: True - inductor: False - inductor_cudagraphs: False - - unet_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: - image_size: 32 # unused - in_channels: 8 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_heads: 8 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 768 - use_checkpoint: False - legacy: False - use_flash_attention: False - - first_stage_config: - _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL - from_pretrained: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - version: openai/clip-vit-large-patch14 - device: cuda - max_length: 77 - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0. - betas: - - 0.9 - - 0.999 - sched: - name: WarmupHoldPolicy - warmup_steps: 100 - hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant - - data: - # Path to instruct-pix2pix dataset must be specified by the user. - # https://github.com/timothybrooks/instruct-pix2pix#generated-dataset - data_path: ??? - num_workers: 2 - dataloader_type: cyclic # cyclic - validation_drop_last: True # Set to false if the last partial validation samples is to be consumed diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py deleted file mode 100644 index 83658fd1194a..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import math -import os -import random -import sys -from argparse import ArgumentParser - -import einops -import numpy as np -import torch -import torch.nn as nn -from einops import rearrange, repeat -from omegaconf import OmegaConf, open_dict -from PIL import Image, ImageOps -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from torch import autocast - -from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit -from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import ( - DiscreteEpsDDPMDenoiser, - sample_euler_ancestral, -) -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils import logging - - -class CFGDenoiser(nn.Module): - def __init__(self, model): - super().__init__() - self.inner_model = model - - def forward(self, z, sigma, cond, uncond, text_cfg_scale, image_cfg_scale): - cfg_z = einops.repeat(z, "b ... -> (n b) ...", n=3) - cfg_sigma = einops.repeat(sigma, "b ... -> (n b) ...", n=3) - cfg_cond = { - "c_crossattn": [torch.cat([cond["c_crossattn"][0], uncond["c_crossattn"][0], uncond["c_crossattn"][0]])], - "c_concat": [torch.cat([cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]])], - } - out_cond, out_img_cond, out_uncond = self.inner_model(cfg_z, cfg_sigma, cond=cfg_cond).chunk(3) - out = out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) - return out - - -@hydra_runner(config_path='conf', config_name='sd_edit') -def main(cfg): - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - with open_dict(cfg): - edit_cfg = cfg.pop("edit") - - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusionEdit, cfg=cfg, model_cfg_modifier=model_cfg_modifier, - ) - - # inference use the latent diffusion part of megatron wrapper - model = megatron_diffusion_model.model - model_wrap = DiscreteEpsDDPMDenoiser(model) - model_wrap_cfg = CFGDenoiser(model_wrap) - null_token = model.get_learned_conditioning([""]) - - seed = random.randint(0, 100000) if edit_cfg.seed is None else edit_cfg.seed - input_image = Image.open(edit_cfg.input).convert("RGB") - width, height = input_image.size - factor = edit_cfg.resolution / max(width, height) - factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) - width = int((width * factor) // 64) * 64 - height = int((height * factor) // 64) * 64 - input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) - - if edit_cfg.prompt == "": - input_image.save(edit_cfg.output) - return - - # get autocast_dtype - if trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - num_images_per_prompt = edit_cfg.num_images_per_prompt - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - cond = {} - cond["c_crossattn"] = [ - repeat(model.get_learned_conditioning([edit_cfg.prompt]), "1 ... -> n ...", n=num_images_per_prompt) - ] - input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1 - input_image = rearrange(input_image, "h w c -> 1 c h w").cuda(non_blocking=True) - cond["c_concat"] = [ - repeat(model.encode_first_stage(input_image).mode(), "1 ... -> n ...", n=num_images_per_prompt) - ] - - uncond = {} - uncond["c_crossattn"] = [repeat(null_token, "1 ... -> n ...", n=num_images_per_prompt)] - uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])] - - sigmas = model_wrap.get_sigmas(edit_cfg.steps) - - extra_args = { - "cond": cond, - "uncond": uncond, - "text_cfg_scale": edit_cfg.cfg_text, - "image_cfg_scale": edit_cfg.cfg_image, - } - torch.manual_seed(seed) - z = torch.randn_like(cond["c_concat"][0]) - z = z * sigmas[0] - z = sample_euler_ancestral(model_wrap_cfg, z, sigmas, extra_args=extra_args) - x = model.decode_first_stage(z) - x = torch.clamp((x + 1.0) / 2.0, min=0.0, max=1.0) - x = 255.0 * rearrange(x, "n c h w -> n h w c") - - os.makedirs(edit_cfg.outpath, exist_ok=True) - if edit_cfg.get("combine_images") is None: - for idx, image in enumerate(x): - edited_image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) - save_path = os.path.join( - edit_cfg.outpath, - f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_{idx}.jpg', - ) - edited_image.save(save_path) - logging.info(f"Edited image saved to: {save_path}") - else: - row, column = edit_cfg.combine_images - width, height = x.size(2), x.size(1) - total_width, total_height = width * column, height * row - edited_image = Image.new('RGB', (total_width, total_height)) - x_offset = 0 - y_offset = 0 - for idx, image in enumerate(x): - image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) - edited_image.paste(image, (x_offset, y_offset)) - x_offset += image.size[0] - if (idx + 1) % column == 0: - x_offset = 0 - y_offset += height - save_path = os.path.join( - edit_cfg.outpath, - f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_combine.jpg', - ) - edited_image.save(save_path) - logging.info(f"Edited image saved to: {save_path}") - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py b/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py deleted file mode 100644 index 430fc5af0ec9..000000000000 --- a/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="sd_finetune") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronLatentDiffusionEdit(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/nerf/benchmark_callback.py b/examples/multimodal/generative/nerf/benchmark_callback.py deleted file mode 100644 index 7d216a6679c4..000000000000 --- a/examples/multimodal/generative/nerf/benchmark_callback.py +++ /dev/null @@ -1,82 +0,0 @@ -import time -from typing import Optional - -from pytorch_lightning import Callback, LightningModule, Trainer - -from nemo.utils import logging - - -class BenchmarkCallback(Callback): - def __init__( - self, - start_benchmark_at_step: int = 0, - stop_benchmark_at_step: Optional[int] = None, - log_every_n_steps: int = 10, - ): - super().__init__() - self.start_benchmark_at_step = start_benchmark_at_step - self.stop_benchmark_at_step = stop_benchmark_at_step - self.log_every_n_steps = log_every_n_steps - self.train_times = [] - self.val_times = [] - self.train_steps_times = [] - self.val_steps_times = [] - - def should_benchmark(self, trainer: Trainer): - if self.stop_benchmark_at_step is None: - return trainer.global_step >= self.start_benchmark_at_step - return self.start_benchmark_at_step <= trainer.global_step <= self.stop_benchmark_at_step - - def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule): - self.epoch_start_time = time.time() - - def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule): - if self.should_benchmark(trainer): - epoch_time = time.time() - self.epoch_start_time - self.train_times.append(epoch_time) - logging.info(f'Training-Epoch-{trainer.current_epoch}-Time: {epoch_time} [sec]') - - def on_train_batch_start(self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int): - self.step_start_time = time.time() - - def on_train_batch_end(self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int): - if self.should_benchmark(trainer): - step_time = time.time() - self.step_start_time - self.train_steps_times.append(step_time) - if trainer.global_step % self.log_every_n_steps == 0: - logging.info(f'Training-Step-{trainer.global_step}-Time: {step_time} [sec]') - - def on_validation_epoch_start(self, trainer: Trainer, pl_module: LightningModule): - self.val_start_time = time.time() - - def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule): - if self.should_benchmark(trainer): - val_time = time.time() - self.val_start_time - self.val_times.append(val_time) - logging.info(f'Validation-Epoch-{trainer.current_epoch}-Time: {val_time} [sec]') - - def on_validation_batch_start( - self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int, dataloader_idx: int - ): - self.val_step_start_time = time.time() - - def on_validation_batch_end( - self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int, dataloader_idx: int - ): - if self.should_benchmark(trainer): - val_step_time = time.time() - self.val_step_start_time - self.val_steps_times.append(val_step_time) - if trainer.global_step % self.log_every_n_steps == 0: - logging.info(f'Validation-Step-{trainer.global_step}-Time: {val_step_time} [sec]') - - def on_fit_end(self, trainer: Trainer, pl_module: LightningModule): - if self.should_benchmark(trainer): - avg_train_time = sum(self.train_times) / len(self.train_times) - avg_val_time = sum(self.val_times) / len(self.val_times) - avg_train_step_time = sum(self.train_steps_times) / len(self.train_steps_times) - avg_val_step_time = sum(self.val_steps_times) / len(self.val_steps_times) - - logging.info(f'Average-Training-Epoch-Time: {avg_train_time} [sec]') - logging.info(f'Average-Validation-Epoch-Time: {avg_val_time} [sec]') - logging.info(f'Average-Training-Step-Time: {avg_train_step_time} [sec]') - logging.info(f'Average-Validation-Step-Time: {avg_val_step_time} [sec]') diff --git a/examples/multimodal/generative/nerf/config/config.yaml b/examples/multimodal/generative/nerf/config/config.yaml deleted file mode 100644 index 1adcbae72c26..000000000000 --- a/examples/multimodal/generative/nerf/config/config.yaml +++ /dev/null @@ -1,52 +0,0 @@ -defaults: - - model: dreamfusion - - _self_ - -name: DreamFusion -seed: 2023 -mode: fit # fit, validate, test, export-mesh - -# export-mesh options -mesh_fname: /results/mesh.obj # mesh file name when mode=export-mesh -mesh_resolution: 128 # Mesh resolution when mode=export-mesh - -# benchmark options -enable_benchmark: False -benchmark_callback: - _target_: benchmark_callback.BenchmarkCallback - log_every_n_steps: 1 - -trainer: - devices: 1 - num_nodes: 1 - precision: 16 - max_steps: 10000 # example configs: dreamfuions=10000, dmtet=5000 - accelerator: gpu - enable_checkpointing: False - logger: False - log_every_n_steps: 1 - val_check_interval: 100 - accumulate_grad_batches: 1 - benchmark: False - enable_model_summary: True - -exp_manager: - name: ${name} - exp_dir: /results - create_tensorboard_logger: False - create_wandb_logger: False - wandb_logger_kwargs: - project: dreamfusion - group: nemo-df - name: ${name} - resume: True - create_checkpoint_callback: True - checkpoint_callback_params: - every_n_epochs: 0 - every_n_train_steps: 1000 # TODO(ahmadki): being ignored ? - monitor: loss - filename: '${name}-{step}' - save_top_k: -1 - always_save_nemo: False - resume_if_exists: True - resume_ignore_no_checkpoint: True diff --git a/examples/multimodal/generative/nerf/config/model/background/random.yaml b/examples/multimodal/generative/nerf/config/model/background/random.yaml deleted file mode 100644 index 9cfb09fc6eca..000000000000 --- a/examples/multimodal/generative/nerf/config/model/background/random.yaml +++ /dev/null @@ -1,3 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.background.random_background.RandomBackground -base_background: [1, 1, 1] -random_ratio: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/background/static.yaml b/examples/multimodal/generative/nerf/config/model/background/static.yaml deleted file mode 100644 index eb82f9944991..000000000000 --- a/examples/multimodal/generative/nerf/config/model/background/static.yaml +++ /dev/null @@ -1,2 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.background.static_background.StaticBackground -background: [0, 0, 1] # rgb diff --git a/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml deleted file mode 100644 index 8daf7bcd8349..000000000000 --- a/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.background.tcnn_background.TCNNBackground -bound: 1 -encoder_num_input_dims: 3 # 3 directions -encoder_cfg: - otype: "HashGrid" - n_levels: 16 - n_features_per_level: 2 - log2_hashmap_size: 19 - base_resolution: 16 - interpolation: "Smoothstep" - per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) - -background_net_num_output_dims: 3 # rgb -background_net_cfg: - otype: "FullyFusedMLP" - activation: "ReLU" - output_activation: "None" - n_neurons: 32 - n_hidden_layers: 2 diff --git a/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml deleted file mode 100644 index b77778099e79..000000000000 --- a/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.background.torchngp_background.TorchNGPBackground - -encoder_type: "frequency" -encoder_input_dims: 3 -encoder_multi_res: 6 - -num_output_dims: 3 -net_cfg: - num_hidden_dims: 32 - num_layers: 2 - bias: True diff --git a/examples/multimodal/generative/nerf/config/model/data/data.yaml b/examples/multimodal/generative/nerf/config/model/data/data.yaml deleted file mode 100644 index 0b5f88b9f1fb..000000000000 --- a/examples/multimodal/generative/nerf/config/model/data/data.yaml +++ /dev/null @@ -1,41 +0,0 @@ -_target_: data.AggregatorDataModule - -train_batch_size: 1 -train_shuffle: false -train_dataset: - _target_: nemo.collections.multimodal.data.nerf.random_poses.RandomPosesDataset - internal_batch_size: 100 - width: 64 - height: 64 - radius_range: [3.0, 3.5] - theta_range: [45, 105] - phi_range: [-180, 180] - fovx_range: [10, 30] - fovy_range: [10, 30] - jitter: False - jitter_center: 0.2 - jitter_target: 0.2 - jitter_up: 0.02 - uniform_sphere_rate: 0 - angle_overhead: 30 - angle_front: 60 - -val_batch_size: 1 -val_shuffle: false -val_dataset: - _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset - size: 5 - width: 800 - height: 800 - angle_overhead: 30 - angle_front: 60 - -test_batch_size: 1 -test_shuffle: false -test_dataset: - _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset - size: 100 - width: 800 - height: 800 - angle_overhead: 30 - angle_front: 60 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml deleted file mode 100644 index bfadd4f426b3..000000000000 --- a/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml +++ /dev/null @@ -1,40 +0,0 @@ -_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion # TODO(ahmadki): dreamfusion-dmetet should have it's own class -defaults: - - nerf: torchngp - - background: torchngp - - material: basic_shading - - renderer: nvdiffrast - - guidance: sd_huggingface - - optim: adan - - loss: dmtet - - data: data - - _self_ - -### model options -resume_from_checkpoint: -prompt: 'a hamburger' -negative_prompt: '' -front_prompt: ', front view' -side_prompt: ', side view' -back_prompt: ', back view' -update_extra_interval: 16 -guidance_scale: 100 -export_video: False - -iters: ${trainer.max_steps} -# TODO(ahmadki): move to database -latent_iter_ratio: 0.0 -albedo_iter_ratio: 0 -min_ambient_ratio: 0.1 -textureless_ratio: 0.2 - -data: - train_dataset: - width: 512 - height: 512 - val_dataset: - width: 800 - height: 800 - test_dataset: - width: 800 - height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml deleted file mode 100644 index a67393341b53..000000000000 --- a/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml +++ /dev/null @@ -1,40 +0,0 @@ -_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion -defaults: - - nerf: torchngp - - background: static - - material: basic_shading - - renderer: torchngp_raymarching - - guidance: sd_huggingface - - optim: adan - - loss: dreamfusion - - data: data - - _self_ - -### model options -resume_from_checkpoint: -prompt: 'a hamburger' -negative_prompt: '' -front_prompt: ', front view' -side_prompt: ', side view' -back_prompt: ', back view' -update_extra_interval: 16 -guidance_scale: 100 -export_video: False - -iters: ${trainer.max_steps} -# TODO(ahmadki): move to database -latent_iter_ratio: 0.2 -albedo_iter_ratio: 0.0 -min_ambient_ratio: 0.1 -textureless_ratio: 0.2 - -data: - train_dataset: - width: 64 - height: 64 - val_dataset: - width: 800 - height: 800 - test_dataset: - width: 800 - height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml deleted file mode 100644 index a8b7adca3c55..000000000000 --- a/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml +++ /dev/null @@ -1,4 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_huggingface_pipeline.StableDiffusion -precision: ${trainer.precision} -model_key: stabilityai/stable-diffusion-2-1-base -t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml deleted file mode 100644 index fd4517ec1f7c..000000000000 --- a/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml +++ /dev/null @@ -1,4 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_nemo_pipeline.StableDiffusion -checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo -sampler_type: 'DDIM' -t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml deleted file mode 100644 index 45c1e2ac8fb5..000000000000 --- a/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_trt_pipeline.StableDiffusion -checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo -plan_dir: /sd_checkpoints/nemo-1.5/plan -sampler_type=: DDIM" -t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml b/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml deleted file mode 100644 index 188c1034fc27..000000000000 --- a/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml +++ /dev/null @@ -1,8 +0,0 @@ -lambda_sds: 1.0 -lambda_opacity: 0.0 -lambda_entropy: 0.0 -lambda_orientation: 0.0 -lambda_2d_normal_smooth: 0.0 -lambda_3d_normal_smooth: 0.0 -lambda_mesh_normal: 0.5 -lambda_mesh_laplacian: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml deleted file mode 100644 index 8cfd4b47eb51..000000000000 --- a/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml +++ /dev/null @@ -1,8 +0,0 @@ -lambda_sds: 1.0 -lambda_opacity: 0.0 -lambda_entropy: 1e-3 -lambda_orientation: 1e-2 -lambda_2d_normal_smooth: 0.0 -lambda_3d_normal_smooth: 0.0 -lambda_mesh_normal: 0.0 -lambda_mesh_laplacian: 0.0 diff --git a/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml b/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml deleted file mode 100644 index 802defad1637..000000000000 --- a/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml +++ /dev/null @@ -1 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.materials.basic_shading.BasicShading diff --git a/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml deleted file mode 100644 index 0bf5ed6c5e2f..000000000000 --- a/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml +++ /dev/null @@ -1,32 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.geometry.tcnn_nerf.TCNNNerf -num_input_dims: 3 # 3D space -bound: 1 -density_activation: softplus # softplus, exp -blob_radius: 0.5 -blob_density: 10 -normal_type: central_finite_difference - -encoder_cfg: - otype: "HashGrid" - n_levels: 16 - n_features_per_level: 2 - log2_hashmap_size: 19 - base_resolution: 16 - interpolation: "Smoothstep" - per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) - -sigma_net_num_output_dims: 1 # density -sigma_net_cfg: - otype: "FullyFusedMLP" - activation: "ReLU" - output_activation: "None" - n_neurons: 64 - n_hidden_layers: 3 - -features_net_num_output_dims: 3 # rgb -features_net_cfg: - otype: "FullyFusedMLP" - activation: "ReLU" - output_activation: "None" - n_neurons: 64 - n_hidden_layers: 3 diff --git a/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml deleted file mode 100644 index 48877dcfa871..000000000000 --- a/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml +++ /dev/null @@ -1,26 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.geometry.torchngp_nerf.TorchNGPNerf -num_input_dims: 3 # 3D space -bound: 1 -density_activation: exp # softplus, exp -blob_radius: 0.2 -blob_density: 5 -normal_type: central_finite_difference - -encoder_cfg: - encoder_type: 'hashgrid' - encoder_max_level: - log2_hashmap_size: 19 - desired_resolution: 2048 - interpolation: smoothstep - -sigma_net_num_output_dims: 1 # density -sigma_net_cfg: - num_hidden_dims: 64 - num_layers: 3 - bias: True - -features_net_num_output_dims: 3 # rgb -features_net_cfg: - num_hidden_dims: 64 - num_layers: 3 - bias: True diff --git a/examples/multimodal/generative/nerf/config/model/optim/adan.yaml b/examples/multimodal/generative/nerf/config/model/optim/adan.yaml deleted file mode 100644 index 885c13fcca8a..000000000000 --- a/examples/multimodal/generative/nerf/config/model/optim/adan.yaml +++ /dev/null @@ -1,6 +0,0 @@ -name: adan -lr: 5e-3 -eps: 1e-8 -weight_decay: 2e-5 -max_grad_norm: 5.0 -foreach: False diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml deleted file mode 100644 index 73f48a7a0ca9..000000000000 --- a/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.renderers.nerfacc_volume_renderer.NerfaccVolumeBaseRenderer -grid_resolution: 128 -grid_levels: 3 -bound: ${model.nerf.bound} -render_step_size: 1.e-3 -near_plane: 0.2 -cone_angle: 0.004 -alpha_thre: 1.e-2 diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml deleted file mode 100644 index fefc217f4aec..000000000000 --- a/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.renderers.nvdiffrast_renderer.NVDiffRastRenderer -bound: ${model.nerf.bound} -grid_resolution: 128 -density_thresh: 10.0 -update_interval: 16 -quartet_file: "/results/tets/128_tets.npz" diff --git a/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml b/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml deleted file mode 100644 index 5075a5fbc85c..000000000000 --- a/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_target_: nemo.collections.multimodal.modules.nerf.renderers.torchngp_volume_renderer.TorchNGPVolumeRenderer -bound: ${model.nerf.bound} -update_interval: 16 -grid_resolution: 128 -density_thresh: 10 -max_steps: 1024 -dt_gamma: 0 diff --git a/examples/multimodal/generative/nerf/data.py b/examples/multimodal/generative/nerf/data.py deleted file mode 100644 index 1126a816a97e..000000000000 --- a/examples/multimodal/generative/nerf/data.py +++ /dev/null @@ -1,73 +0,0 @@ -import pytorch_lightning as pl -from hydra.utils import instantiate -from omegaconf.omegaconf import DictConfig -from torch.utils.data import DataLoader - - -# TODO(ahmadki): multi-GPU needs more work, we currently don't shard data -# across GPUs, which is OK for trainnig, but needs fixing for validation and testing. -class AggregatorDataModule(pl.LightningDataModule): - def __init__( - self, - train_dataset: DictConfig = None, - train_batch_size: int = 1, - train_shuffle: bool = False, - val_dataset: DictConfig = None, - val_batch_size: int = 1, - val_shuffle: bool = False, - test_dataset: DictConfig = None, - test_batch_size: int = 1, - test_shuffle: bool = False, - ): - super().__init__() - - self.train_dataset = train_dataset - self.train_batch_size = train_batch_size - self.train_shuffle = train_shuffle - self.val_dataset = val_dataset - self.val_batch_size = val_batch_size - self.val_shuffle = val_shuffle - self.test_dataset = test_dataset - self.test_batch_size = test_batch_size - self.test_shuffle = test_shuffle - - # TODO(ahmadki): lazy init - # def setup(self, stage=None) -> None: - # if stage in [None, "fit"]: - # self.train_dataset = instantiate(self.train_dataset) - # if stage in [None, "fit", "validate"]: - # self.val_dataset = instantiate(self.val_dataset) - # if stage in [None, "test", "predict"]: - # self.test_dataset = instantiate(self.test_dataset) - - def train_dataloader(self) -> DataLoader: - loader = DataLoader( - self.train_dataset, - batch_size=self.train_batch_size, - collate_fn=self.train_dataset.collate_fn, - pin_memory=True, - num_workers=4, - ) - return loader - - def val_dataloader(self) -> DataLoader: - loader = DataLoader( - self.val_dataset, - batch_size=self.val_batch_size, - collate_fn=self.val_dataset.collate_fn, - shuffle=self.val_shuffle, - pin_memory=True, - num_workers=0, - ) - return loader - - def test_dataloader(self) -> DataLoader: - loader = DataLoader( - self.test_dataset, - batch_size=self.test_batch_size, - collate_fn=self.test_dataset.collate_fn, - shuffle=self.test_shuffle, - pin_memory=True, - num_workers=0, - ) - return loader diff --git a/examples/multimodal/generative/nerf/main.py b/examples/multimodal/generative/nerf/main.py deleted file mode 100644 index 35b80052ef7f..000000000000 --- a/examples/multimodal/generative/nerf/main.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from hydra.utils import get_class, instantiate -from omegaconf.omegaconf import DictConfig, OmegaConf -from pytorch_lightning import Trainer, seed_everything - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path='config', config_name='config') -def main(cfg: DictConfig) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - seed_everything(cfg.seed, workers=True) - - mode = cfg.mode - logging.info(f"{mode=}") - - model = None - model_cls = get_class(cfg.model._target_) - if cfg.model.resume_from_checkpoint is None: - model = model_cls(cfg=cfg.model) - else: - logging.info(f"Loading model from checkpoint: {cfg.model.resume_from_checkpoint}") - model = model_cls.load_from_checkpoint(cfg.model.resume_from_checkpoint, strict=False, cfg=cfg.model) - - if mode == "export-mesh": - mesh = model.mesh(resolution=cfg.mesh_resolution) - mesh.export(cfg.mesh_fname) - return - - # Prepare callbacks - callbacks = [] - if cfg.enable_benchmark: - callbacks.append(instantiate(cfg.benchmark_callback)) - - # Setup trainer - trainer = Trainer(callbacks=callbacks, **cfg.trainer) - exp_manager(trainer, cfg.exp_manager) - - # Setup datamodule - dm = instantiate(cfg.model.data) - - if mode == "fit": - trainer.fit(model, datamodule=dm) - elif mode == "validate": - trainer.validate(model, datamodule=dm) - elif mode == "test": - trainer.test(model, datamodule=dm) - else: - raise ValueError(f"Invalid mode: {mode}") - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml deleted file mode 100644 index 3cfc822f8462..000000000000 --- a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml +++ /dev/null @@ -1,192 +0,0 @@ -name: stable-diffusion2-train - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 140000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - limit_val_batches: 0 - -exp_manager: - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: stable-diffusion - group: nemo-sd - name: ${name} - resume: True - create_checkpoint_callback: True - create_tensorboard_logger: True - checkpoint_callback_params: - every_n_train_steps: 1000 - every_n_epochs: 0 - monitor: reduced_train_loss - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 4 # limited by GPU memory - global_batch_size: 16 # will use more micro batches to reach global batch size - - linear_start: 0.00085 - linear_end: 0.012 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: images - cond_stage_key: captions # txt for cifar, caption for pbss - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn # check - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - scale_by_std: False - ckpt_path: - ignore_keys: [] - parameterization: eps - clip_denoised: True - load_only_unet: False - cosine_s: 8e-3 - given_betas: - original_elbo_weight: 0 - v_posterior: 0 - l_simple_weight: 1 - use_positional_encodings: False - learn_logvar: False - logvar_init: 0 - beta_schedule: linear - loss_type: l2 - - concat_mode: True - cond_stage_forward: - text_embedding_dropout_rate: 0.1 - fused_opt: True - inductor: True - inductor_cudagraphs: False - capture_cudagraph_iters: -1 # -1 to disable - channels_last: True - - unet_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: - from_NeMo: #Must be specified when from pretrained is not None, False means loading unet from HF ckpt - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_head_channels: 64 - use_spatial_transformer: true - use_linear_in_transformer: true - transformer_depth: 1 - context_dim: 1024 - use_checkpoint: False - legacy: False - use_flash_attention: False - - first_stage_config: - _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL - from_pretrained: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 #Never used - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder - restore_from_path: /path/to/clip.nemo - device: cuda - freeze: True - layer: "penultimate" - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - optim: - name: fused_adam - lr: 1e-4 - weight_decay: 0. - betas: - - 0.9 - - 0.999 - sched: - name: WarmupHoldPolicy - warmup_steps: 10000 - hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - data: - num_workers: 16 - synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data - synthetic_data_length: 10000 - train: - dataset_path: - - /datasets/coyo/test.pkl - augmentations: - resize_smallest_side: 512 - center_crop_h_w: 512, 512 - horizontal_flip: False - filterings: - - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml deleted file mode 100644 index e526bc52d673..000000000000 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: stable-diffusion-train - -fid: - classifier_free_guidance: - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - - 8 - nnodes_per_cfg: 1 - ntasks_per_node: 8 - local_task_id: null - num_images_to_eval: 30000 - coco_captions_path: /coco2014/coco2014_val_sampled_30k/captions - coco_images_path: /coco2014/coco2014_val/images_256 - save_path: output - -infer: - unconditional_guidance_scale: null - num_images_per_prompt: 1 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 50 - sampler_type: 'PLMS' - eta: 0 - output_type: 'pil' - save_to_file: False # We need to rename and maintain the order of images for clip score calculation, so we will save it outside the inference pipeline - out_path: ${fid.save_path} - seed: 123 - prompts: - -trainer: - devices: ${fid.ntasks_per_node} - num_nodes: 1 - accelerator: gpu - precision: 32 - logger: False # logger provided by exp_manager - -model: - restore_from_path: null - precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml deleted file mode 100644 index dbe384dd2566..000000000000 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: stable-diffusion-train - -infer: - unconditional_guidance_scale: 7.5 - num_images_per_prompt: 4 - height: 512 - width: 512 - down_factor: 8 - inference_steps: 25 - sampler_type: 'DPM' - eta: 0 - output_type: 'pil' - save_to_file: True - out_path: 'stable-diffusion' - seed: 123 - prompts: - - 'A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat.' - - 'A cute corgi lives in a house made out of sushi.' - - 'A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him.' - - 'A brain riding a rocketship heading towards the moon.' - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - -model: - restore_from_path: null - precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml deleted file mode 100644 index 78a8dedccbc2..000000000000 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ /dev/null @@ -1,207 +0,0 @@ -name: stable-diffusion-train - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: 2 # PTL default. In practice, max_steps will be reached first. - max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: True - limit_val_batches: 0 - - -exp_manager: - exp_dir: null - name: ${name} - create_wandb_logger: False - wandb_logger_kwargs: - project: stable-diffusion - group: nemo-sd - name: ${name} - resume: True - create_checkpoint_callback: True - create_tensorboard_logger: True - checkpoint_callback_params: - every_n_train_steps: 1000 - every_n_epochs: 0 - monitor: reduced_train_loss - filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - ema: - enable: True - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 1 # limited by GPU memory - global_batch_size: 1 # will use more micro batches to reach global batch size - native_amp_init_scale: 65536.0 # Init scale for grad scaler used at fp16 - - - linear_start: 0.00085 - linear_end: 0.012 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: images - cond_stage_key: captions # txt for cifar, caption for pbss - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn # check - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - scale_by_std: False - ckpt_path: - ignore_keys: [] - parameterization: eps - clip_denoised: True - load_only_unet: False - cosine_s: 8e-3 - given_betas: - original_elbo_weight: 0 - v_posterior: 0 - l_simple_weight: 1 - use_positional_encodings: False - learn_logvar: False - logvar_init: 0 - beta_schedule: linear - loss_type: l2 - - concat_mode: True - cond_stage_forward: - text_embedding_dropout_rate: 0.1 - fused_opt: True - inductor: False - inductor_cudagraphs: False - capture_cudagraph_iters: -1 # -1 to disable - channels_last: True - - unet_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel - from_pretrained: #/ckpts/nemo-v1-2.ckpt - from_NeMo: True #Must be specified when from pretrained is not None, False means loading unet from HF ckpt - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_heads: 8 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 768 - use_checkpoint: False - legacy: False - use_flash_attention: True - enable_amp_o2_fp16: True - - first_stage_config: - _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL - from_pretrained: /ckpts/vae.bin - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 #Never used - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - capture_cudagraph_iters: ${model.capture_cudagraph_iters} - - cond_stage_config: - _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder - restore_from_path: /ckpts/openai.nemo - device: cuda - freeze: True - layer: "last" - # For compatibility of history version that uses HF clip model - # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder - # version: openai/clip-vit-large-patch14 - # device: cuda - # max_length: 77 - # capture_cudagraph_iters: {$model.capture_cudagraph_iters} - - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - ddp_overlap: True # True for using PyTorch DDP overlap. - - optim: - name: megatron_fused_adam - lr: null - weight_decay: 0. - betas: - - 0.9 - - 0.999 - sched: - name: WarmupHoldPolicy - warmup_steps: 10000 - hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant - capturable: True - master_weights: True - max_norm: ${trainer.gradient_clip_val} - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - data: - num_workers: 16 - synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data - synthetic_data_length: 10000 - train: - dataset_path: - - /datasets/coyo/wdinfo.pkl - augmentations: - resize_smallest_side: 512 - center_crop_h_w: 512, 512 - horizontal_flip: False - filterings: - - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py deleted file mode 100644 index c1a37cd953a6..000000000000 --- a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -import time -import torch -from omegaconf.omegaconf import open_dict - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='sd_fid_images') -def main(cfg): - # Read configuration parameters - nnodes_per_cfg = cfg.fid.nnodes_per_cfg - ntasks_per_node = cfg.fid.ntasks_per_node - local_task_id = cfg.fid.local_task_id - num_images_to_eval = cfg.fid.num_images_to_eval - path = cfg.fid.coco_captions_path - - node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) - node_id_per_cfg = node_id % nnodes_per_cfg - - current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] - with open_dict(cfg): - cfg.infer.unconditional_guidance_scale = current_node_cfg - save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) - - # Read and store captions - captions = [] - caption_files = sorted(os.listdir(path)) - assert len(caption_files) >= num_images_to_eval - for file in caption_files[:num_images_to_eval]: - with open(os.path.join(path, file), 'r') as f: - captions += f.readlines() - - # Calculate partition sizes and select the partition for the current node - partition_size_per_node = num_images_to_eval // nnodes_per_cfg - start_idx = node_id_per_cfg * partition_size_per_node - end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None - captions = captions[start_idx:end_idx] - - local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) - partition_size_per_task = int(len(captions) // ntasks_per_node) - - # Select the partition for the current task - start_idx = local_task_id * partition_size_per_task - end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None - input = captions[start_idx:end_idx] - - print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") - - os.makedirs(save_path, exist_ok=True) - - # Modify the model configuration - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - model_cfg.global_batch_size = model_cfg.micro_batch_size * ntasks_per_node - - # Set up the trainer and model for inference - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - # Generate images using the model and save them - for i, prompt in enumerate(input): - cfg.infer.prompts = [prompt] - rng = torch.Generator().manual_seed(cfg.infer.seed + local_task_id * 10 + node_id_per_cfg * 100 + i * 1000) - output = pipeline(model, cfg, rng=rng) - for image in output[0]: - image_num = i + partition_size_per_node * node_id_per_cfg + partition_size_per_task * local_task_id - image.save(os.path.join(save_path, f'image{image_num:06d}.png')) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_infer.py b/examples/multimodal/generative/stable_diffusion/sd_infer.py deleted file mode 100644 index 0fe9a0064e47..000000000000 --- a/examples/multimodal/generative/stable_diffusion/sd_infer.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline -from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference -from nemo.core.config import hydra_runner - - -@hydra_runner(config_path='conf', config_name='sd_infer') -def main(cfg): - def model_cfg_modifier(model_cfg): - model_cfg.precision = cfg.trainer.precision - model_cfg.ckpt_path = None - model_cfg.inductor = False - model_cfg.unet_config.use_flash_attention = False - model_cfg.unet_config.from_pretrained = None - model_cfg.first_stage_config.from_pretrained = None - - torch.backends.cuda.matmul.allow_tf32 = True - trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( - model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier - ) - model = megatron_diffusion_model.model - model.cuda().eval() - - rng = torch.Generator().manual_seed(cfg.infer.seed) - pipeline(model, cfg, rng=rng) - - -if __name__ == "__main__": - main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py deleted file mode 100644 index b741af3d76e6..000000000000 --- a/examples/multimodal/generative/stable_diffusion/sd_train.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from datetime import timedelta - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -class MegatronStableDiffusionTrainerBuilder(MegatronTrainerBuilder): - """Builder for SD model Trainer with overrides.""" - - def _training_strategy(self) -> NLPDDPStrategy: - """ - Returns a ddp strategy passed to Trainer.strategy. - """ - ddp_overlap = self.cfg.model.get('ddp_overlap', True) - if ddp_overlap: - return NLPDDPStrategy( - no_ddp_communication_hook=False, - gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, - find_unused_parameters=True, - bucket_cap_mb=256, - ) - else: - return NLPDDPStrategy( - no_ddp_communication_hook=True, - gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, - find_unused_parameters=False, - ) - - -@hydra_runner(config_path='conf', config_name='sd_train') -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - torch.backends.cuda.matmul.allow_tf32 = True - - if cfg.model.capture_cudagraph_iters >= 0: - # Required by CUDA graph with DDP - os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0" - - # Hack to avoid CUDA graph issue with AMP, PyTorch Lightning doesn't support - # changing autocast arguments for now. - # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/cuda/graphs.py#L234 - def amp_autocast_init(self, *args, **kwargs): - if "cache_enabled" not in kwargs: - kwargs["cache_enabled"] = False - return self.__orig_init__(*args, **kwargs) - - torch.cuda.amp.autocast.__orig_init__ = torch.cuda.amp.autocast.__init__ - torch.cuda.amp.autocast.__init__ = amp_autocast_init - torch.autocast.__orig_init__ = torch.autocast.__init__ - torch.autocast.__init__ = amp_autocast_init - - trainer = MegatronStableDiffusionTrainerBuilder(cfg).create_trainer() - - exp_manager(trainer, cfg.exp_manager) - - model = MegatronLatentDiffusion(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml deleted file mode 100644 index fbe1883276fe..000000000000 --- a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml +++ /dev/null @@ -1,317 +0,0 @@ -name: nemo_kosmos -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: nemo_kosmos - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - - # Batch size guideline for different types of dataset - micro_batch_size: 1 # limited by GPU memory - global_batch_size: 256 # will use more micro batches to reach global batch size - - media_start_token: "" - media_end_token: "" - - enabled_data_types: ["text", "image_caption"] - per_type_micro_batch_size: - text: 1 - image_caption: 32 - image_interleaved: 1 - per_type_sequence_length: - text: 2048 # placeholder - image_caption: 128 - image_interleaved: 2048 - per_type_loss_weights: - text: 1 - image_caption: 1 - image_interleaved: 1 - - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - # multimodal configs - num_media_latents: 64 # each media is encoded and sampled into `num_media_latents` LM embeddings - - llm: - precision: ${trainer.precision} - - # model architecture - encoder_seq_length: 2048 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_absolute - num_layers: 12 - hidden_size: 2048 - ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm1p # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - bias: True # Whether to use bias terms in all weight matrices. - activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] - headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. - transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] - normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. - attention_type: 'multihead' # Attention type. Options ['multihead'] - share_embeddings_and_output_weights: False # Share embedding and output layer weights. - overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - vision: - from_pretrained: /path/to/clip_model.nemo - precision: ${trainer.precision} - # vision configs - patch_dim: 14 - img_h: 224 - img_w: 224 - image_mean: null - image_std: null - num_channels: 3 - drop_patch_rate: 0.0 - drop_path_rate: 0.0 - global_average_pool: False - class_token_length: 1 - preprocess_layernorm: True # apply layer norm to embedded tokens - - # model architecture - encoder_seq_length: 196 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: learned_parameters - num_layers: 24 - hidden_size: 1024 - ffn_hidden_size: 4096 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add final layer norm - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - # Megatron O2-style half-precision - megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'megatron' - type: 'GPT2BPETokenizer' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - - data: - # TEXT DATA - # ================================================================================== - # Path to data must be specified by the user. - # Supports List, String and Dictionary - # List : can override from the CLI: "model.data.data_prefix=[.5,/raid/data/pile/my-gpt3_00_text_document,.5,/raid/data/pile/my-gpt3_01_text_document]", - # Or see example below: - # data_prefix: - # - .5 - # - /raid/data/pile/my-gpt3_00_text_document - # - .5 - # - /raid/data/pile/my-gpt3_01_text_document - # Dictionary: can override from CLI "model.data.data_prefix"={"train":[1.0, /path/to/data], "validation":/path/to/data, "test":/path/to/test} - # Or see example below: - # "model.data.data_prefix: {train:[1.0,/path/to/data], validation:[/path/to/data], test:[/path/to/test]}" - num_workers: 2 - data_prefix: ??? - index_mapping_dir: null # path to save index mapping .npy files, by default will save in the same location as data_prefix - data_impl: mmap - splits_string: 950,50,0 - seq_length: ${model.llm.encoder_seq_length} - skip_warmup: True - dataloader_type: single # cyclic - reset_position_ids: False # Reset position ids after end-of-document token - reset_attention_mask: False # Reset attention mask after end-of-document token - eod_mask_loss: False # Mask loss for the end of document tokens - validation_drop_last: True # Set to false if the last partial validation samples is to be consumed - no_seqlen_plus_one_input_tokens: False # Set to True to disable fetching (sequence length + 1) input tokens, instead get (sequence length) input tokens and mask the last token - pad_samples_to_global_batch_size: False # Set to True if you want to pad the last partial batch with -1's to equal global batch size - shuffle_documents: True # Set to False to disable documents shuffling. Sample index will still be shuffled - - # ================================================================================= - # MEDIA DATA - image_caption: - num_workers: 8 - train: - dataset_path: # List of paths to pkl files or tar files - - /datasets/coyo/test.pkl - validation: # List of paths to pkl files or tar files - dataset_path: - - /datasets/coyo/test.pkl - webdataset: - infinite_sampler: False - local_root_path: /datasets/coyo - - image_interleaved: - num_workers: 8 - train: - dataset_path: # List of paths to pkl files or tar files - - /datasets/coyo/test.pkl - validation: # List of paths to pkl files or tar files - dataset_path: - - /datasets/coyo/test.pkl - webdataset: - chunk_size: 190 - infinite_sampler: False - local_root_path: /datasets/coyo -# boto3: -# credentials_file: /lustre/fsw/joc/yuya/kosmos/s3_cred -# bucket: webdataset - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 2e-4 - weight_decay: 0.1 - betas: - - 0.9 - - 0.95 - sched: - name: CosineAnnealing - warmup_steps: 2000 - constant_steps: 0 - min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/kosmos/kosmos_pretrain.py b/examples/multimodal/mllm/kosmos/kosmos_pretrain.py deleted file mode 100644 index 8ac7679ade74..000000000000 --- a/examples/multimodal/mllm/kosmos/kosmos_pretrain.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector - -from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - PipelineMixedPrecisionPlugin, -) -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="kosmos_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronKosmosModel(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/mllm/neva/conf/llava_config.yaml b/examples/multimodal/mllm/neva/conf/llava_config.yaml deleted file mode 100644 index 0b2cf826c606..000000000000 --- a/examples/multimodal/mllm/neva/conf/llava_config.yaml +++ /dev/null @@ -1,213 +0,0 @@ -name: nemo_neva -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: nemo_neva - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - - # Batch size guideline for different types of dataset - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 128 # will use more micro batches to reach global batch size - - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # Multimodal configs - mm_cfg: - llm: - from_pretrained: null # path to nemo checkpoint - freeze: False - model_type: llama_2 # Only support nvgpt or llama_2 - vision_encoder: - from_pretrained: "openai/clip-vit-large-patch14" # path or name - from_hf: True - patch_dim: 14 - hidden_size: 1024 # could be found from model but tricky in code - vision_select_layer: -2 # default to the last layer - class_token_length: 1 - freeze: True - pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: False - - - # LLM configs - # use GPTModel from megatron.core - mcore_gpt: False - - # model architecture - encoder_seq_length: 4096 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: rope - num_layers: 24 - hidden_size: 2048 - ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0.0 # Dropout probability for hidden state transformer. - attention_dropout: 0.0 # Dropout probability for attention - ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: 'rmsnorm' # Normalization layer to use. Options are 'layernorm', 'rmsnorm' - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - bias: False # Whether to use bias terms in all weight matrices. - activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] - headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. - transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] - normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. - attention_type: 'multihead' # Attention type. Options ['multihead'] - share_embeddings_and_output_weights: False # Share embedding and output layer weights. - overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. - num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. - override_vocab_size: 32000 - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'sentencepiece' - type: null - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - additional_special_tokens: null # ["", "", "", "", "", ""] - - data: - num_workers: 8 - dataloader_type: cyclic - data_path: - lazy_preprocess: True - is_multimodal: True - sep_image_conv_front: False - image_token_len: 256 - conv_template: llama_2 # check `nemo/collections/multimodal/data/neva/conversation.py` - image_folder: null - image_aspect_ratio: 'square' - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 2e-3 - weight_decay: 0. - betas: - - 0.9 - - 0.95 - sched: - name: CosineAnnealing - warmup_steps: 140 - constant_steps: 0 - min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml deleted file mode 100644 index c2f44de2c1b9..000000000000 --- a/examples/multimodal/mllm/neva/conf/neva_config.yaml +++ /dev/null @@ -1,212 +0,0 @@ -name: nemo_neva -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: nemo_neva - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - - # Batch size guideline for different types of dataset - micro_batch_size: 16 # limited by GPU memory - global_batch_size: 128 # will use more micro batches to reach global batch size - - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # Multimodal configs - mm_cfg: - llm: - from_pretrained: null # path to nemo checkpoint - freeze: True - model_type: llama_2 # `nvgpt` or `llama_2` supported - vision_encoder: - from_pretrained: "" # path or name - from_hf: True - patch_dim: 14 - hidden_size: 1024 # could be found from model but tricky in code - vision_select_layer: -2 # default to the last layer - class_token_length: 1 - freeze: True - pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: False - - - # LLM configs - # use GPTModel from megatron.core - mcore_gpt: True - - # model architecture - encoder_seq_length: 4096 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: rope - num_layers: 40 - hidden_size: 5120 - ffn_hidden_size: 13824 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 40 - init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: rmsnorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - bias: False # Whether to use bias terms in all weight matrices. - activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] - headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. - transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] - normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. - attention_type: 'multihead' # Attention type. Options ['multihead'] - share_embeddings_and_output_weights: False # Share embedding and output layer weights. - overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. - num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. - use_flash_attention: True - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters - async_grad_allreduce: False - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'sentencepiece' - type: null - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - additional_special_tokens: null # ["", "", "", "", "", ""] - - data: - num_workers: 8 - dataloader_type: cyclic - data_path: - lazy_preprocess: True - is_multimodal: True - sep_image_conv_front: False - image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` - image_folder: null - image_aspect_ratio: 'square' - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 2e-3 - weight_decay: 0. - betas: - - 0.9 - - 0.95 - sched: - name: CosineAnnealing - warmup_steps: 140 - constant_steps: 0 - min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml deleted file mode 100644 index bd902b9f5d15..000000000000 --- a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml +++ /dev/null @@ -1,209 +0,0 @@ -name: nemo_neva -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: nemo_neva - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - - # Batch size guideline for different types of dataset - micro_batch_size: 4 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # Multimodal configs - mm_cfg: - llm: - from_pretrained: null # path to nemo checkpoint - freeze: False - model_type: nvgpt # Only support nvgpt or llama_2 - vision_encoder: - from_pretrained: "" # path or name - from_hf: True - patch_dim: 14 - hidden_size: 1024 # could be found from model but tricky in code - vision_select_layer: -2 # default to the last layer - class_token_length: 1 - freeze: True - pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True # only support True now - - - # LLM configs - # use GPTModel from megatron.core - mcore_gpt: False - - # model architecture - encoder_seq_length: 4096 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: rope - num_layers: 24 - hidden_size: 2048 - ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm1p # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - bias: False # Whether to use bias terms in all weight matrices. - activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] - headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. - transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] - normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. - attention_type: 'multihead' # Attention type. Options ['multihead'] - share_embeddings_and_output_weights: False # Share embedding and output layer weights. - overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. - num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'megatron' - type: 'GPT2BPETokenizer' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - - data: - num_workers: 8 - dataloader_type: cyclic - data_path: - lazy_preprocess: True - is_multimodal: True - sep_image_conv_front: False - image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` - image_folder: null - image_aspect_ratio: 'square' - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 2e-5 - weight_decay: 0. - betas: - - 0.9 - - 0.95 - sched: - name: CosineAnnealing - warmup_steps: 200 - constant_steps: 0 - min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml deleted file mode 100644 index 35ca1e179f98..000000000000 --- a/examples/multimodal/mllm/neva/conf/neva_inference.yaml +++ /dev/null @@ -1,52 +0,0 @@ -inference: - greedy: False # Whether or not to use sampling ; use greedy decoding otherwise - top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: 0.2 # sampling temperature - add_BOS: False # add the bos token at the begining of the prompt - tokens_to_generate: 256 # The minimum length of the sequence to be generated. - all_probs: False # whether return the log prob for all the tokens in vocab - repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. - min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. - compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - end_strings: ["","",] # generation will stop when one of these tokens is generated - images_base_path: /pwd/images - -trainer: - devices: 8 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: bf16 # 16, 32, or bf16 - -cluster_type: BCP -tensor_model_parallel_size: 8 -pipeline_model_parallel_size: 1 -pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model (0 for others) -neva_model_file: /pwd/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo -checkpoint_dir: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/checkpoints # checkpoint file dir. This is used to load the PTL checkpoint generated during the Kosmos training -checkpoint_name: null #megatron_clip--val_loss=0.41-step=13499-consumed_samples=431904.0.ckpt # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/version_0/hparams.yaml # model configuration file, only used for PTL checkpoint loading -quality: 9 -toxicity: 0 -humor: 6 -creativity: 6 -violence: 0 -helpfulness: 6 -not_appropriate: 0 - -# MORE THAN ONE INFERENCE IS NOT RUNNING PROPERLY NEED TO CHECK WHY SECOND IS OUTPUTING JUNK N -prompt_file: /pwd/nemo_experiments/input_prompts.jsonl -output_file: /pwd/nemo_experiments/results.jsonl - -server: False # whether launch the API server -port: 5555 # the port number for the inference server -web_server: False # whether launch the web inference server -share: False # whether create a public URL -username: test # user name for web client -password: test2 # password for web client -web_port: 9889 # the port number of the web server - -quantization: - algorithm: awq # int8_sq, fp8, int8, awq - enable: False \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml deleted file mode 100644 index 0099d1d8c4d4..000000000000 --- a/examples/multimodal/mllm/neva/conf/neva_peft.yaml +++ /dev/null @@ -1,215 +0,0 @@ -name: nemo_neva -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: bf16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - check_val_every_n_epoch: null - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: nemo_neva - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - resume_from_checkpoint: ${model.resume_from_checkpoint} - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - -model: - precision: ${trainer.precision} - - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - - # Batch size guideline for different types of dataset - micro_batch_size: 4 # limited by GPU memory - global_batch_size: 32 # will use more micro batches to reach global batch size - - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # Multimodal configs - mm_cfg: - llm: - from_pretrained: null # path to nemo checkpoint - freeze: True # Set this to True in adapter learning! - model_type: nvgpt # Only support nvgpt or llama_2 - vision_encoder: - from_pretrained: "" # path or name - from_hf: True - patch_dim: 14 - hidden_size: 1024 # could be found from model but tricky in code - vision_select_layer: -2 # default to the last layer - class_token_length: 1 - freeze: True - pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True # only support True now - - peft: - lora_tuning: - adapter_dim: 32 - adapter_dropout: 0.0 - column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal - row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal - - # LLM configs - # use GPTModel from megatron.core - mcore_gpt: False - - # model architecture - encoder_seq_length: 4096 - max_position_embeddings: ${.encoder_seq_length} - position_embedding_type: rope - num_layers: 24 - hidden_size: 2048 - ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 16 - init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm1p # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - bias: False # Whether to use bias terms in all weight matrices. - activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] - headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. - transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] - normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. - attention_type: 'multihead' # Attention type. Options ['multihead'] - share_embeddings_and_output_weights: False # Share embedding and output layer weights. - overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 - seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. - num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'megatron' - type: 'GPT2BPETokenizer' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - - data: - num_workers: 8 - dataloader_type: cyclic - data_path: - lazy_preprocess: True - is_multimodal: True - sep_image_conv_front: False - image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` - image_folder: null - image_aspect_ratio: 'square' - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 2e-5 - weight_decay: 0. - betas: - - 0.9 - - 0.95 - sched: - name: CosineAnnealing - warmup_steps: 200 - constant_steps: 0 - min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py deleted file mode 100644 index b70faf61a413..000000000000 --- a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py +++ /dev/null @@ -1,343 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r""" -Conversion script to convert Huggingface LLaMA checkpoints into nemo checkpoint. - Example to run this conversion script: - python convert_hf_llava_to_nevo.py \ - --in-file \ - --out-file \ - --tokenizer-model -""" - -import os -from argparse import ArgumentParser -from collections import OrderedDict - -import torch -from llava import LlavaLlamaForCausalLM -from omegaconf import OmegaConf -from pytorch_lightning.core.saving import _load_state as ptl_load_state -from pytorch_lightning.trainer.trainer import Trainer -from transformers import LlamaTokenizer - -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.nlp.parts.nlp_overrides import ( - GradScaler, - MegatronHalfPrecisionPlugin, - NLPDDPStrategy, - NLPSaveRestoreConnector, - PipelineMixedPrecisionPlugin, -) -from nemo.utils import logging - - -def get_args(): - parser = ArgumentParser() - parser.add_argument( - "--in-file", type=str, default=None, required=True, help="Path to Huggingface LLaMA checkpoints", - ) - parser.add_argument("--out-file", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument( - "--tokenizer-model", type=str, default=None, required=False, help="Path to sentencepiece tokenizer model." - ) - parser.add_argument("--precision", type=str, default="32", help="Model precision") - args = parser.parse_args() - return args - - -def load_model(cls, checkpoint, strict, **kwargs): - try: - if 'cfg' in kwargs: - model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) - else: - # model = ptl_load_state( - # cls, checkpoint, strict=strict, cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs - # ) - model = cls(cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs) - for name, module in model.named_parameters(): - if name in checkpoint['state_dict']: - module.data = checkpoint['state_dict'][name] - checkpoint['state_dict'].pop(name) - else: - print(f"Unexpected key: {name} not in checkpoint but in model.") - - for name, buffer in model.named_buffers(): - if name in checkpoint['state_dict']: - buffer.data = checkpoint['state_dict'][name] - checkpoint['state_dict'].pop(name) - - if len(checkpoint['state_dict'].keys()) != 0: - raise RuntimeError( - f"Additional keys: {checkpoint['state_dict'].keys()} in checkpoint but not in model." - ) - - # register the artifacts - cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] - if cfg.tokenizer.model is not None: - model.register_artifact("tokenizer.tokenizer_model", cfg.tokenizer.model) - if cfg.tokenizer.vocab_file is not None: - model.register_artifact("tokenizer.vocab_file", cfg.tokenizer.vocab_file) - if cfg.tokenizer.merge_file is not None: - model.register_artifact("tokenizer.merge_file", cfg.tokenizer.merge_file) - finally: - cls._set_model_restore_state(is_being_restored=False) - return model - - -def load_config(args, llama_config): - nemo_config = OmegaConf.load(os.path.join(os.path.dirname(__file__), 'conf/llava_config.yaml')).model - nemo_config.encoder_seq_length = llama_config['max_position_embeddings'] - nemo_config.num_layers = int(llama_config['num_hidden_layers']) - nemo_config.hidden_size = llama_config['hidden_size'] - nemo_config.ffn_hidden_size = llama_config['intermediate_size'] - nemo_config.num_attention_heads = llama_config['num_attention_heads'] - nemo_config.max_position_embeddings = llama_config['max_position_embeddings'] - nemo_config.init_method_std = llama_config['initializer_range'] - nemo_config.layernorm_epsilon = llama_config['rms_norm_eps'] - if 'num_key_value_heads' in llama_config: - nemo_config.num_query_groups = llama_config['num_key_value_heads'] - nemo_config.use_cpu_initialization = True - nemo_config.activation = 'fast-swiglu' - if args.tokenizer_model is None: - nemo_config.tokenizer.model = llama_config['tokenizer_model'] - else: - nemo_config.tokenizer.model = args.tokenizer_model - if llama_config['rope_scaling'] is not None: - if llama_config['rope_scaling']['type'] == 'linear': - nemo_config['seq_len_interpolation_factor'] = llama_config['rope_scaling']['factor'] - else: - raise ValueError("Only linear rope scaling type is supported now") - - base = 128 - while llama_config['vocab_size'] % base != 0: - base //= 2 - nemo_config.make_vocab_size_divisible_by = base - - return nemo_config - - -def convert(args): - logging.info(f"loading checkpoint {args.in_file}") - model = LlavaLlamaForCausalLM.from_pretrained(args.in_file) - tokenizer = LlamaTokenizer.from_pretrained(args.in_file) - hf_config = vars(model.config) - hf_config['tokenizer_model'] = str(tokenizer.vocab_file) - print(f"hf_config: {hf_config}") - print("named parameters:") - for name, param in model.named_parameters(): - print(f"- {name}") - - nemo_config = load_config(args, hf_config) - print(nemo_config) - - if args.precision in ["32", "16"]: - precision = int(float(args.precision)) - elif args.precision in ["bf16", "bf16-mixed"]: - if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): - precision = args.precision - else: - logging.warning("BF16 is not supported on this device. Using FP16 instead.") - precision = args.precision[2:] # prune bf in string - else: - precision = args.precision - - plugins = [] - if precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: - scaler = None - if precision in [16, '16', '16-mixed']: - scaler = GradScaler( - init_scale=nemo_config.get('native_amp_init_scale', 2 ** 32), - growth_interval=nemo_config.get('native_amp_growth_interval', 1000), - hysteresis=nemo_config.get('hysteresis', 2), - ) - # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed - plugin_precision = '16-mixed' - else: - plugin_precision = 'bf16-mixed' - - if nemo_config.get('megatron_amp_O2', False): - plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - else: - plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) - - if precision == 32: - dtype = torch.float32 - elif precision in [16, "16", "16-mixed"]: - dtype = torch.float16 - elif precision in ["bf16", "bf16-mixed"]: - dtype = torch.bfloat16 - else: - dtype = torch.float32 # fallback - - nemo_config.precision = precision - print(f"nemo_config: {nemo_config}") - - trainer = Trainer(plugins=plugins, accelerator='cpu', precision=precision, strategy=NLPDDPStrategy()) - - hidden_size = hf_config["hidden_size"] - head_num = hf_config["num_attention_heads"] - head_size = hidden_size // head_num - num_layers = hf_config["num_hidden_layers"] - - mcore_gpt = nemo_config.mcore_gpt - - assert mcore_gpt == nemo_config.get( - 'transformer_engine', False - ), "mcore_gpt transformer_engine must be enabled (or disabled) together." - - param_to_weights = lambda param: param.float() - - checkpoint = OrderedDict() - checkpoint['state_dict'] = OrderedDict() - - # Multimodal projection - if mcore_gpt: - raise NotImplementedError - else: - mm_projection_layer_base_name = ( - f'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear' - ) - checkpoint['state_dict'][f'{mm_projection_layer_base_name}.weight'] = param_to_weights( - model.state_dict()[f'model.mm_projector.weight'] - ) - checkpoint['state_dict'][f'{mm_projection_layer_base_name}.bias'] = param_to_weights( - model.state_dict()[f'model.mm_projector.bias'] - ) - - embed_weight = model.state_dict()[f'model.embed_tokens.weight'] - if mcore_gpt: - embed_weights_base_name = f'model.embedding.word_embeddings.weight' - else: - embed_weights_base_name = f'model.language_model.embedding.word_embeddings.weight' - checkpoint['state_dict'][embed_weights_base_name] = param_to_weights(embed_weight) - - # in hf, this is defined as register_buffer(..., persistent=False) so it won't be in the state dict - if f'model.layers.0.self_attn.rotary_emb.inv_freq' in model.state_dict(): - rotary_embed_weight = model.state_dict()[f'model.layers.0.self_attn.rotary_emb.inv_freq'] - if mcore_gpt: - rotary_embed_weight_base_name = f'model.rotary_pos_emb.inv_freq' - else: - rotary_embed_weight_base_name = f'model.language_model.rotary_pos_emb.inv_freq' - checkpoint['state_dict'][rotary_embed_weight_base_name] = param_to_weights(rotary_embed_weight) - - if nemo_config.num_query_groups is None or nemo_config.num_query_groups == head_num: - num_query_groups = head_num - else: - num_query_groups = nemo_config.num_query_groups - assert head_num % num_query_groups == 0, 'head_num must be divisible by num_query_groups' - if mcore_gpt: - assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.' - - for l in range(int(num_layers)): - print(f"converting layer {l}") - old_tensor_shape = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].size() - new_q_tensor_shape = (head_num, head_size) + old_tensor_shape[1:] - new_kv_tensor_shape = (num_query_groups, head_size) + old_tensor_shape[1:] - q = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].view(*new_q_tensor_shape) - k = model.state_dict()[f'model.layers.{l}.self_attn.k_proj.weight'].view(*new_kv_tensor_shape) - v = model.state_dict()[f'model.layers.{l}.self_attn.v_proj.weight'].view(*new_kv_tensor_shape) - qkv_weights = torch.empty((0, head_size) + old_tensor_shape[1:]) - heads_per_group = head_num // num_query_groups - for i in range(num_query_groups): - qkv_weights = torch.cat((qkv_weights, q[i * heads_per_group : (i + 1) * heads_per_group, :, :])) - qkv_weights = torch.cat((qkv_weights, k[i : i + 1, :, :])) - qkv_weights = torch.cat((qkv_weights, v[i : i + 1, :, :])) - qkv_weights = qkv_weights.reshape([head_size * (head_num + 2 * num_query_groups), hidden_size]) - if mcore_gpt: - qkv_weights_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.weight' - else: - qkv_weights_base_name = f'model.language_model.encoder.layers.{l}.self_attention.query_key_value.weight' - checkpoint['state_dict'][qkv_weights_base_name] = param_to_weights(qkv_weights) - - # attention dense - o_weight = model.state_dict()[f'model.layers.{l}.self_attn.o_proj.weight'] - if mcore_gpt: - o_weight_base_name = f'model.decoder.layers.{l}.self_attention.linear_proj.weight' - else: - o_weight_base_name = f'model.language_model.encoder.layers.{l}.self_attention.dense.weight' - checkpoint['state_dict'][o_weight_base_name] = param_to_weights(o_weight) - - # MLP - mlp_down_weight = model.state_dict()[f'model.layers.{l}.mlp.gate_proj.weight'] - mlp_gate_weight = model.state_dict()[f'model.layers.{l}.mlp.up_proj.weight'] - if mcore_gpt: - mlp_down_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.weight' - else: - mlp_down_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_h_to_4h.weight' - mlp_down_weight = torch.cat((mlp_down_weight, mlp_gate_weight), axis=0) - checkpoint['state_dict'][mlp_down_base_name] = param_to_weights(mlp_down_weight) - - mlp_up_weight = model.state_dict()[f'model.layers.{l}.mlp.down_proj.weight'] - if mcore_gpt: - mlp_up_base_name = f'model.decoder.layers.{l}.mlp.linear_fc2.weight' - else: - mlp_up_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_4h_to_h.weight' - checkpoint['state_dict'][mlp_up_base_name] = param_to_weights(mlp_up_weight) - - # LayerNorm - input_ln_weight = model.state_dict()[f'model.layers.{l}.input_layernorm.weight'] - if mcore_gpt: - input_ln_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight' - else: - input_ln_base_name = f'model.language_model.encoder.layers.{l}.input_layernorm.weight' - checkpoint['state_dict'][input_ln_base_name] = param_to_weights(input_ln_weight) - - post_attn_ln_weight = model.state_dict()[f'model.layers.{l}.post_attention_layernorm.weight'] - if mcore_gpt: - post_attn_ln_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight' - else: - post_attn_ln_base_name = f'model.language_model.encoder.layers.{l}.post_attention_layernorm.weight' - checkpoint['state_dict'][post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight) - - print(f"done layer {l}") - - final_ln_weight = model.state_dict()[f'model.norm.weight'] - if mcore_gpt: - final_ln_base_name = f'model.decoder.final_layernorm.weight' - else: - final_ln_base_name = f'model.language_model.encoder.final_layernorm.weight' - checkpoint['state_dict'][final_ln_base_name] = param_to_weights(final_ln_weight) - - output_layer_weight = model.state_dict()[f'lm_head.weight'] - if mcore_gpt: - output_layer_base_name = f'model.output_layer.weight' - else: - output_layer_base_name = f'model.language_model.output_layer.weight' - checkpoint['state_dict'][output_layer_base_name] = param_to_weights(output_layer_weight) - - checkpoint[MegatronNevaModel.CHECKPOINT_HYPER_PARAMS_KEY] = nemo_config - - del model - - if nemo_config.get('megatron_amp_O2', False): - keys = list(checkpoint['state_dict'].keys()) - for key in keys: - checkpoint['state_dict'][key.replace('model.', 'model.module.', 1)] = checkpoint['state_dict'].pop(key) - - model = load_model(MegatronNevaModel, checkpoint, strict=False, trainer=trainer) - - model._save_restore_connector = NLPSaveRestoreConnector() - - # cast to target precision and disable cpu init - model = model.to(dtype=dtype) - model.cfg.use_cpu_initialization = False - - model.save_to(args.out_file) - logging.info(f'NeMo model saved to: {args.out_file}') - - -if __name__ == '__main__': - args = get_args() - convert(args) diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py deleted file mode 100644 index 256d58018c9b..000000000000 --- a/examples/multimodal/mllm/neva/neva_evaluation.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio -import json -import os -import re -import threading - -import torch -from omegaconf import OmegaConf, open_dict -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer -from torch.utils.data import DataLoader, Dataset - -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel -from nemo.collections.nlp.modules.common.megatron_web_server import get_demo -from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer -from nemo.collections.nlp.modules.common.text_generation_utils import generate -from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils.app_state import AppState -from nemo.utils.model_utils import inject_model_parallel_rank - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -try: - import ammo.torch.quantization as atq - - HAVE_AMMO = True - -except (ImportError, ModuleNotFoundError): - - HAVE_AMMO = False - - -""" -This is the script to run GPT text generation. - -Usage: - Assume the model has TP=1, PP=1 in the following use cases. - a. run greedy inference from a nemo file: - python neva_evaluation.py \ - neva_model_file=PATH_TO_MODEL \ - inference.greedy=True \ - inference.add_BOS=True \ - trainer.devices=1 \ - trainer.num_nodes=1 \ - tensor_model_parallel_size=-1 \ - pipeline_model_parallel_size=-1 \ - prompts=[prompt1,prompt2] - - b. run greedy inference from a PTL checkpoint file: - python neva_evaluation.py \ - checkpoint_dir=PATH_TO_CHECKPOINT_FILE \ - checkpoint_name=CHECKPOINT_FILE_NAME \ - hparams_file=HPARAMS_FILE \ - inference.greedy=True \ - inference.add_BOS=True \ - trainer.devices=1 \ - trainer.num_nodes=1 \ - tensor_model_parallel_size=-1 \ - pipeline_model_parallel_size=-1 \ - prompts=[prompt1,prompt2] - - c. run top_p inference from a nemo file: - python neva_evaluation.py \ - neva_model_file=PATH_TO_MODEL \ - inference.greedy=False \ - inference.top_k=0 \ - inference.top_p=0.9 \ - inference.repetition_penalty=1.2 \ - inference.add_BOS=True \ - trainer.devices=1 \ - trainer.num_nodes=1 \ - tensor_model_parallel_size=-1 \ - pipeline_model_parallel_size=-1 \ - prompts=[prompt1,prompt2] - - d. If you don't need to generate tokens and need model to compute logprobs: - python neva_evaluation.py \ - neva_model_file=PATH_TO_MODEL \ - inference.compute_logprob=True \ - trainer.devices=1 \ - trainer.num_nodes=1 \ - tensor_model_parallel_size=-1 \ - pipeline_model_parallel_size=-1 \ - prompts=[text to get logprob] - - e. Launch the inference server - python neva_evaluation.py \ - neva_model_file=PATH_TO_MODEL \ - trainer.devices=1 \ - trainer.num_nodes=1 \ - tensor_model_parallel_size=-1 \ - pipeline_model_parallel_size=-1 \ - server=True - - To send a request to the server, here is one example code: - ```python - import json - import requests - - batch_size = 8 - port_num = 5555 - headers = {"Content-Type": "application/json"} - - - def request_data(data): - resp = requests.put('http://localhost:{}/generate'.format(port_num), - data=json.dumps(data), - headers=headers) - sentences = resp.json()['sentences'] - return sentences - - - data = { - "sentences": [""] * batch_size, - "images" : [] * batch_size, - "tokens_to_generate": 300, - "temperature": 1.0, - "add_BOS": True, - "top_k": 0, - "top_p": 0.9, - "greedy": False, - "all_probs": False, - "repetition_penalty": 1.2, - "min_tokens_to_generate": 2, - } - - sentences = request_data(data) - ``` -""" - -if not torch.cuda.is_available(): - raise EnvironmentError("GPU is needed for the inference") - - -class RequestDataSet(Dataset): - def __init__(self, sentences): - super().__init__() - self.sentences = sentences - - def __len__(self,): - return len(self.sentences) - - def __getitem__(self, idx): - return self.sentences[idx] - - -@hydra_runner(config_path="conf", config_name="neva_inference") -def main(cfg) -> None: - - plugins = [] - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - # trainer required for restoring model parallel models - trainer = Trainer(plugins=plugins, strategy=NLPDDPStrategy(), **cfg.trainer) - - if ( - cfg.tensor_model_parallel_size < 0 - or cfg.pipeline_model_parallel_size < 0 - or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 - ): - model_config = MegatronNevaModel.restore_from( - restore_path=cfg.neva_model_file, trainer=trainer, return_config=True, - ) - - with open_dict(cfg): - cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) - cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) - cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" - - if cfg.neva_model_file: - save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(cfg.neva_model_file): - save_restore_connector.model_extracted_dir = cfg.neva_model_file - - pretrained_cfg = MegatronNevaModel.restore_from( - restore_path=cfg.neva_model_file, - trainer=trainer, - return_config=True, - save_restore_connector=save_restore_connector, - ) - OmegaConf.set_struct(pretrained_cfg, True) - with open_dict(pretrained_cfg): - pretrained_cfg.sequence_parallel = False - pretrained_cfg.activations_checkpoint_granularity = None - pretrained_cfg.activations_checkpoint_method = None - pretrained_cfg.precision = trainer.precision - pretrained_cfg.mm_cfg.llm.from_pretrained = None - # pretrained_cfg.mm_cfg.vision_encoder.from_pretrained = None - - model = MegatronNevaModel.restore_from( - restore_path=cfg.neva_model_file, - trainer=trainer, - override_config_path=pretrained_cfg, - save_restore_connector=save_restore_connector, - ) - - elif cfg.checkpoint_dir: - app_state = AppState() - if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: - app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size - app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size - ( - app_state.tensor_model_parallel_rank, - app_state.pipeline_model_parallel_rank, - app_state.model_parallel_size, - app_state.data_parallel_size, - app_state.pipeline_model_parallel_split_rank, - app_state.virtual_pipeline_model_parallel_rank, - ) = fake_initialize_model_parallel( - world_size=app_state.model_parallel_size, - rank=trainer.global_rank, - tensor_model_parallel_size_=cfg.tensor_model_parallel_size, - pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, - ) - checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) - # TODO: This wont work properly (We need to set model.llm.from_pretrained model.vision.from_pretrained to nul) - model = MegatronNevaModel.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) - else: - raise ValueError("need at least a nemo file or checkpoint dir") - - model.freeze() - - # Have to turn off activations_checkpoint_method for inference - # Have to turn off activations_checkpoint_method for inference - try: - model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - try: - model.model.module.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass - - length_params: LengthParam = { - "max_length": cfg.inference.tokens_to_generate, - "min_length": cfg.inference.min_tokens_to_generate, - } - - sampling_params: SamplingParam = { - "use_greedy": cfg.inference.greedy, - "temperature": cfg.inference.temperature, - "top_k": cfg.inference.top_k, - "top_p": cfg.inference.top_p, - "repetition_penalty": cfg.inference.repetition_penalty, - "add_BOS": cfg.inference.add_BOS, - "all_probs": cfg.inference.all_probs, - "compute_logprob": cfg.inference.compute_logprob, - "end_strings": cfg.inference.end_strings, - } - - with open(cfg.prompt_file, 'r') as f: - lines = f.readlines() - - final_prompts = [] - for line in lines: - prompt_dict = json.loads(line) - final_prompts.append(prompt_dict) - - responses = model.generate( - input_prompts=final_prompts, length_params=length_params, sampling_params=sampling_params, inference_config=cfg - ) - - # =================== Start Quantization ==================== - # see https://gitlab-master.nvidia.com/omniml/ammo/-/tree/main/examples/nemo/neva for details - if HAVE_AMMO and cfg.quantization.enable == True: - print(f"Using quantization algorithm: {cfg.quantization.algorithm}") - if cfg.quantization.algorithm == "int8_sq": - atq_config = atq.INT8_SMOOTHQUANT_CFG - elif cfg.quantization.algorithm == "fp8": - atq_config = atq.FP8_DEFAULT_CFG - elif cfg.quantization.algorithm == "awq": - atq_config = atq.INT4_AWQ_CFG - else: - raise ValueError(f"Unsupported quantization algorithm: {cfg.quantization.algorithm}") - - def forward_loop(): - model.generate( - input_prompts=final_prompts, - length_params=length_params, - sampling_params=sampling_params, - inference_config=cfg, - ) - - atq.quantize(model, atq_config, forward_loop) - - responses = model.generate( - input_prompts=final_prompts, - length_params=length_params, - sampling_params=sampling_params, - inference_config=cfg, - ) - # ============== Quantization End ========================= - - results = [] - for response, prompt in zip(responses, final_prompts): - prompt['full_text'] = response["clean_text"] - prompt['text'] = response["clean_response"] - prompt['model_id'] = cfg.neva_model_file - prompt['answer_id'] = 0 - prompt['metadata'] = {} - results.append(prompt) - - with open(cfg.output_file, 'w') as f: - for result in results: - f.write(json.dumps(result) + '\n') - - """ - # Second method of running text generation, call trainer.predict - ds = RequestDataSet(final_prompts) - request_dl = DataLoader(dataset=ds, batch_size=1) - config = OmegaConf.to_container(cfg.inference) - model.set_inference_config(config) - response = trainer.predict(model, request_dl) - - print("***************************") - print(response) - print("***************************") - """ - - -if __name__ == '__main__': - main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/multimodal/mllm/neva/neva_finetune.py b/examples/multimodal/mllm/neva/neva_finetune.py deleted file mode 100644 index fa32e5e2d24b..000000000000 --- a/examples/multimodal/mllm/neva/neva_finetune.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - - -@hydra_runner(config_path="conf", config_name="neva_finetune") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - if cfg.model.restore_from_path is None: - model = MegatronNevaModel(cfg.model, trainer) - else: - model = MegatronNevaModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=cfg.model, - save_restore_connector=NLPSaveRestoreConnector(), - strict=False, - ) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py deleted file mode 100644 index 1738c41c2e48..000000000000 --- a/examples/multimodal/mllm/neva/neva_peft.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaLoRAModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - - -@hydra_runner(config_path="conf", config_name="neva_peft") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - if cfg.model.restore_from_path is None: - model = MegatronNevaLoRAModel(cfg.model, trainer) - else: - model = MegatronNevaLoRAModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=cfg.model, - save_restore_connector=NLPSaveRestoreConnector(), - strict=False, - ) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/multimodal/mllm/neva/neva_pretrain.py b/examples/multimodal/mllm/neva/neva_pretrain.py deleted file mode 100644 index b7d23532c1b0..000000000000 --- a/examples/multimodal/mllm/neva/neva_pretrain.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch.multiprocessing as mp -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - -mp.set_start_method("spawn", force=True) - - -@hydra_runner(config_path="conf", config_name="neva_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronNevaModel(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py deleted file mode 100644 index 27782f34d0bb..000000000000 --- a/examples/vision/convert_ckpt_to_nemo.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r""" -Conversion script to convert PTL checkpoints into nemo checkpoint. - Example to run this conversion script: - python -m torch.distributed.launch --nproc_per_node= * \ - convert_ckpt_to_nemo.py \ - --checkpoint_folder \ - --checkpoint_name \ - --nemo_file_path \ - --tensor_model_parallel_size \ - --pipeline_model_parallel_size -""" - -import os -from argparse import ArgumentParser - -import torch -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed -from nemo.utils.model_utils import inject_model_parallel_rank - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def get_args(): - parser = ArgumentParser() - parser.add_argument( - "--checkpoint_folder", - type=str, - default=None, - required=True, - help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", - ) - parser.add_argument( - "--checkpoint_name", - type=str, - default=None, - required=True, - help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", - ) - - parser.add_argument( - "--hparams_file", - type=str, - default=None, - required=False, - help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", - ) - parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") - parser.add_argument("--gpus_per_node", type=int, required=True, default=None) - parser.add_argument("--tensor_model_parallel_size", type=int, required=True, default=None) - parser.add_argument("--pipeline_model_parallel_size", type=int, required=True, default=None) - parser.add_argument( - "--pipeline_model_parallel_split_rank", - type=int, - required=False, - default=None, - help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", - ) - parser.add_argument("--model_type", type=str, required=True, default="vit_classification") - parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) - parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") - - args = parser.parse_args() - return args - - -def convert(local_rank, rank, world_size, args): - app_state = AppState() - app_state.data_parallel_rank = 0 - num_nodes = world_size // args.gpus_per_node - if args.bcp: - trainer = Trainer( - devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] - ) - else: - trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') - - app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size - app_state.tensor_model_parallel_size = args.tensor_model_parallel_size - - # no use atm, use to split ranks in encoder/decoder models. - if args.pipeline_model_parallel_size > 1 and args.model_type in []: - if args.pipeline_model_parallel_split_rank is not None: - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank - else: - if args.pipeline_model_parallel_size % 2 != 0: - raise ValueError( - f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." - ) - else: - # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. - app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 - else: - app_state.pipeline_model_parallel_split_rank = None - - app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size - - parallel_state.initialize_model_parallel( - tensor_model_parallel_size=app_state.tensor_model_parallel_size, - pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, - pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, - ) - - app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() - app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() - - # inject model parallel rank - checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) - - logging.info( - f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' - ) - - if args.model_type == 'vit_classification': - model = MegatronVitClassificationModel.load_from_checkpoint( - checkpoint_path, hparams_file=args.hparams_file, trainer=trainer - ) - else: - raise ValueError(f"Unrecognized model_type {args.model_type}.") - - model._save_restore_connector = NLPSaveRestoreConnector() - - if torch.distributed.is_initialized(): - torch.distributed.barrier() - - model.save_to(args.nemo_file_path) - - logging.info(f'NeMo model saved to: {args.nemo_file_path}') - - -if __name__ == '__main__': - args = get_args() - local_rank, rank, world_size = initialize_distributed(args) - convert(local_rank, rank, world_size, args) diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml deleted file mode 100755 index 264b49a0ba37..000000000000 --- a/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml +++ /dev/null @@ -1,163 +0,0 @@ -# shared by ViT classification pretraining and fine-tuning - -name: megatron_vit_classify -restore_from_path: null # used when starting from a .nemo file - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False # logger provided by exp_manager - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 # PTL default. In practice, max_steps will be reached first. - max_steps: 95000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches - log_every_n_steps: 10 - val_check_interval: 100 - limit_val_batches: 50 - limit_test_batches: 500 - accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually - -exp_manager: - explicit_log_dir: null - exp_dir: null - name: megatron_vit_classification - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: True - resume_ignore_no_checkpoint: True - create_checkpoint_callback: True - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_vit_classification--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} - - -model: - precision: ${trainer.precision} - # specify micro_batch_size, global_batch_size, and model parallelism - # gradient accumulation will be done automatically based on data_parallel_size - micro_batch_size: 256 # limited by GPU memory - global_batch_size: 4096 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # vision configs - vision_pretraining_type: "classify" - num_classes: 1000 - patch_dim: 16 - img_h: 224 - img_w: 224 - classes_fraction: 1.0 - data_per_class_fraction: 1.0 - num_channels: 3 - drop_path_rate: 0.0 - - # model architecture - encoder_seq_length: 196 - max_position_embeddings: ${.encoder_seq_length} - num_layers: 12 - hidden_size: 768 - ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 12 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0.1 # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - ## Activation Checkpointing - # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. - # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). - # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. - # 'full' will checkpoint the entire transformer layer. - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - # 'uniform' divides the total number of transformer layers and checkpoints the input activation - # of each chunk at the specified granularity - # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity - activations_checkpoint_num_layers: null # not used with 'selective' - # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. - # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. - - ## Sequence Parallelism - # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially - # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. - sequence_parallel: False - - data: - # Path to image dataset must be specified by the user. - # Supports List - # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", - data_path: ??? - num_workers: 2 - dataloader_type: cyclic # cyclic - validation_drop_last: True # Set to false if the last partial validation samples is to be consumed - data_sharding: False - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 5e-4 - weight_decay: 0.1 - betas: - - 0.9 - - 0.999 - sched: - name: CosineAnnealing - warmup_steps: 10000 - constant_steps: 0 - min_lr: 1e-5 \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml deleted file mode 100755 index 4b9a71bedc7d..000000000000 --- a/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml +++ /dev/null @@ -1,15 +0,0 @@ -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} - micro_batch_size: 512 # we only supports DP=1 eval at the moment, GBS=MBS - - data: - num_workers: 2 - imagenet_val: ??? # path to imagenet val folder \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml deleted file mode 100755 index 553abb5bc23b..000000000000 --- a/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml +++ /dev/null @@ -1,12 +0,0 @@ -data_path: ??? # Path to a image folder for inference - -trainer: - devices: 1 - num_nodes: 1 - accelerator: gpu - logger: False # logger provided by exp_manager - precision: 16 # 16, 32, or bf16 - -model: - restore_from_path: null # Path to a trained ViT .nemo file - precision: ${trainer.precision} diff --git a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py deleted file mode 100644 index d1e596ad1bce..000000000000 --- a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from torch.utils.data import DataLoader, Dataset -from tqdm import tqdm - -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames -from nemo.collections.vision.data.megatron.image_folder import ImageFolder -from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero - - -@hydra_runner(config_path="conf", config_name="megatron_vit_classification_evaluate") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce - ) - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - # trainer required for restoring model parallel models - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - - save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - - model_cfg = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - save_restore_connector=save_restore_connector, - return_config=True, - ) - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size - ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" - - # These configs are required to be off during inference. - with open_dict(model_cfg): - model_cfg.precision = trainer.precision - if trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - model = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=model_cfg, - save_restore_connector=save_restore_connector, - strict=True, - ) - - model.eval() - - val_transform = ClassificationTransform(model.cfg, (model.cfg.img_h, model.cfg.img_w), train=False) - val_data = ImageFolder(root=cfg.model.data.imagenet_val, transform=val_transform,) - - # initialize apex DDP strategy - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - test_loader = DataLoader(val_data, batch_size=cfg.model.micro_batch_size, num_workers=cfg.model.data.num_workers,) - - # get autocast_dtype - if trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - total = correct = 0.0 - for tokens, labels in tqdm(test_loader): - logits = model(tokens.cuda()) - class_indices = torch.argmax(logits, -1) - correct += (class_indices == labels.cuda()).float().sum() - total += len(labels) - - if is_global_rank_zero: - print(f"ViT Imagenet 1K Evaluation Accuracy: {correct / total:.4f}") - - -if __name__ == '__main__': - main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py deleted file mode 100644 index cc81cda2c477..000000000000 --- a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.finetune = True - cfg.model.precision = cfg.trainer.precision - - model = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=cfg.model, - save_restore_connector=NLPSaveRestoreConnector(), - strict=False, - ) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_infer.py b/examples/vision/vision_transformer/megatron_vit_classification_infer.py deleted file mode 100644 index c48090d31475..000000000000 --- a/examples/vision/vision_transformer/megatron_vit_classification_infer.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os - -import torch -from omegaconf.omegaconf import OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment -from torch.utils.data import DataLoader, Dataset - -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames -from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.get_rank import is_global_rank_zero - -_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() - - -class ImageFolderDataset(Dataset): - def __init__(self, folder_path, transform=None): - self.folder_path = folder_path - self.transform = transform - # Use glob to find all image files in folder_path - image_paths = [] - for ext in _IMG_EXTENSIONS + [x.upper() for x in _IMG_EXTENSIONS]: - search_pattern = os.path.join(folder_path, f"*.{ext}") - image_paths += glob.glob(search_pattern) - self.image_paths = image_paths - - def __len__(self): - return len(self.image_paths) - - def __getitem__(self, idx): - image_path = self.image_paths[idx] - image = Image.open(image_path).convert('RGB') - if self.transform is not None: - image = self.transform(image) - return image - - -@hydra_runner(config_path="conf", config_name="megatron_vit_classification_infer") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - plugins = [] - strategy = NLPDDPStrategy( - no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce - ) - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - # trainer required for restoring model parallel models - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - - save_restore_connector = NLPSaveRestoreConnector() - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - - model_cfg = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - save_restore_connector=save_restore_connector, - return_config=True, - ) - - assert ( - cfg.trainer.devices * cfg.trainer.num_nodes - == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size - ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" - - # These configs are required to be off during inference. - with open_dict(model_cfg): - model_cfg.precision = trainer.precision - if trainer.precision != "bf16": - model_cfg.megatron_amp_O2 = False - model_cfg.sequence_parallel = False - model_cfg.activations_checkpoint_granularity = None - model_cfg.activations_checkpoint_method = None - - model = MegatronVitClassificationModel.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=model_cfg, - save_restore_connector=save_restore_connector, - strict=True, - ) - - model.eval() - - test_transform = ClassificationTransform(cfg.model, (model_cfg.img_h, model_cfg.img_w), train=False) - test_data = ImageFolderDataset(folder_path=cfg.data_path, transform=test_transform,) - test_loader = DataLoader(test_data, batch_size=8) - - # initialize apex DDP strategy - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - # get autocast_dtype - if trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - class_names = [] - for tokens in test_loader: - logits = model(tokens.cuda()) - class_indices = torch.argmax(logits, -1) - class_names += [imagenet_classnames[x] for x in class_indices] - - if is_global_rank_zero: - filenames = [os.path.basename(f) for f in test_data.image_paths] - print(f"Predicted classes: ", list(zip(filenames, class_names))) - - -if __name__ == '__main__': - main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py deleted file mode 100644 index d39df23e6ba1..000000000000 --- a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from omegaconf.omegaconf import OmegaConf, open_dict - -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel - -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - - -@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") -def main(cfg) -> None: - logging.info("\n\n************** Experiment configuration ***********") - logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - - trainer = MegatronTrainerBuilder(cfg).create_trainer() - exp_manager(trainer, cfg.exp_manager) - - # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams - with open_dict(cfg): - cfg.model.precision = cfg.trainer.precision - - model = MegatronVitClassificationModel(cfg.model, trainer) - - trainer.fit(model) - - -if __name__ == '__main__': - main() diff --git a/nemo/collections/multimodal/data/__init__.py b/nemo/collections/multimodal/data/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/__init__.py b/nemo/collections/multimodal/data/clip/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/clip/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/__init__.py b/nemo/collections/multimodal/data/clip/augmentations/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/clip/augmentations/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py deleted file mode 100644 index 2cf3dad64464..000000000000 --- a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is adapted from public repo -https://github.com/mlfoundations/open_clip/blob/28c994406e39a5babc749c76871d92f33e9c558d/src/open_clip/transform.py -by @yaoyu-33 -""" -from typing import Optional, Sequence, Tuple - -import torch -import torch.nn as nn -import torchvision.transforms.functional as F -from torchvision.transforms import ( - CenterCrop, - Compose, - InterpolationMode, - Normalize, - RandomResizedCrop, - Resize, - ToTensor, -) - -OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) -OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) - - -class ResizeMaxSize(nn.Module): - def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, fn='max', fill=0): - super().__init__() - if not isinstance(max_size, int): - raise TypeError(f"Size should be int. Got {type(max_size)}") - self.max_size = max_size - self.interpolation = interpolation - self.fn = min if fn == 'min' else min - self.fill = fill - - def forward(self, img): - if isinstance(img, torch.Tensor): - height, width = img.shape[:2] - else: - width, height = img.size - scale = self.max_size / float(max(height, width)) - if scale != 1.0: - new_size = tuple(round(dim * scale) for dim in (height, width)) - img = F.resize(img, new_size, self.interpolation) - pad_h = self.max_size - new_size[0] - pad_w = self.max_size - new_size[1] - img = F.pad(img, padding=[pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2], fill=self.fill) - return img - - -def _convert_to_rgb(image): - return image.convert('RGB') - - -def image_transform( - image_size: int, - is_train: bool, - mean: Optional[Tuple[float, ...]] = None, - std: Optional[Tuple[float, ...]] = None, - resize_longest_max: bool = False, - fill_color: int = 0, -): - mean = mean or OPENAI_DATASET_MEAN - if not isinstance(mean, (list, tuple)): - mean = (mean,) * 3 - - std = std or OPENAI_DATASET_STD - if not isinstance(std, (list, tuple)): - std = (std,) * 3 - - if isinstance(image_size, (list, tuple)) and image_size[0] == image_size[1]: - # for square size, pass size as int so that Resize() uses aspect preserving shortest edge - image_size = image_size[0] - - normalize = Normalize(mean=mean, std=std) - if is_train: - return Compose( - [ - RandomResizedCrop(image_size, scale=(0.9, 1.0), interpolation=InterpolationMode.BICUBIC), - _convert_to_rgb, - ToTensor(), - normalize, - ] - ) - else: - if resize_longest_max: - transforms = [ResizeMaxSize(image_size, fill=fill_color)] - else: - transforms = [ - Resize(image_size, interpolation=InterpolationMode.BICUBIC), - CenterCrop(image_size), - ] - transforms.extend( - [_convert_to_rgb, ToTensor(), normalize,] - ) - return Compose(transforms) diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py deleted file mode 100644 index f63a86dc9174..000000000000 --- a/nemo/collections/multimodal/data/clip/clip_dataset.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from functools import partial -from typing import Any, Dict, List, Optional, Union - -import torch -from torch.utils.data import Dataset, default_collate - -from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform -from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, - MegatronPretrainingSampler, -) -from nemo.collections.vision.data.megatron.image_folder import ImageFolder -from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int = 77) -> torch.LongTensor: - """ - Returns the tokenized representation of given input string(s) - - Parameters - ---------- - texts : Union[str, List[str]] - An input string or a list of input strings to tokenize - tokenizer: - Tokenizer loaded in NeMo NeMo - context_length : int - The context length to use; all CLIP models use 77 as the context length - - Returns - ------- - A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] - """ - texts_is_str = False - if isinstance(texts, str): - texts = [texts] - texts_is_str = True - - bos_id = tokenizer.bos_id - eos_id = tokenizer.eos_id - all_tokens = [[bos_id] + tokenizer.text_to_ids(text) + [eos_id] for text in texts] - result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) - - for i, tokens in enumerate(all_tokens): - if len(tokens) > context_length: - tokens = tokens[:context_length] # Truncate - tokens[-1] = eos_id - result[i, : len(tokens)] = torch.tensor(tokens) - - if texts_is_str: - result = result[0] - return result - - -def get_preprocess_fns(model_cfg, tokenizer=None, is_train=True): - # Define transforms - img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) - img_mean = model_cfg.vision.get("img_mean") - img_std = model_cfg.vision.get("img_std") - img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) - text_transform = lambda x: x - if tokenizer is not None: - text_transform = partial( - tokenize, tokenizer=tokenizer, context_length=model_cfg.text.get("max_position_embeddings"), - ) - return img_transform, text_transform - - -def build_train_valid_datasets( - model_cfg, consumed_samples, tokenizer=None, -): - data_cfg = model_cfg.data - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict['images'] = input[0] - out_dict['captions'] = input[1] - yield out_dict - - def transform_fn(sample, img_transform, text_transform): - image, text = sample["jpg"], sample["txt"] - return img_transform(image), text_transform(text) - - train_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=True) - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=partial(transform_fn, img_transform=train_img_transform, text_transform=text_transform), - compose_fn=tuple_to_dict, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): - val_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=0, - map_fn=partial(transform_fn, img_transform=val_img_transform, text_transform=text_transform), - compose_fn=tuple_to_dict, - is_train=False, - ) - - return train_data, val_data - - -# For zero-shot imagenet validation -def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): - val_image_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) - data_cfg = model_cfg.data - - imagenet_val = {} - - imagenet_path = data_cfg.get("imagenet_val") - if imagenet_path is None: - return None - - image_dataset = ImageFolder(root=imagenet_path, transform=val_image_transform,) - - image_batch_sampler = MegatronPretrainingSampler( - total_samples=len(image_dataset), - consumed_samples=0, - micro_batch_size=model_cfg.micro_batch_size, - global_batch_size=model_cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=False, - ) - - def custom_collate(batch): - if len(batch) == 0: - return None, None - else: - return default_collate(batch) - - imagenet_val["images"] = torch.utils.data.DataLoader( - image_dataset, - batch_sampler=image_batch_sampler, - num_workers=min(data_cfg.num_workers, 2), - collate_fn=custom_collate, - pin_memory=True, - persistent_workers=True, - ) - - text_dataset = ImagenetClassnameDataset(imagenet_classnames, openai_imagenet_template, text_transform) - imagenet_val["texts"] = torch.utils.data.DataLoader( - text_dataset, - batch_size=text_dataset.num_templates, - num_workers=0, - pin_memory=True, - persistent_workers=False, - drop_last=False, - ) - return imagenet_val - - -class ImagenetClassnameDataset(Dataset): - def __init__(self, classnames, templates, text_transform): - self.num_templates = len(templates) - self.samples = [] - for classname in classnames: - texts = [template(classname) for template in templates] - self.samples.extend(text_transform(texts)) - - def __getitem__(self, index): - return self.samples[index] - - def __len__(self): - return len(self.samples) diff --git a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py deleted file mode 100644 index c7387d37eba7..000000000000 --- a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py +++ /dev/null @@ -1,1100 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -imagenet_classnames = [ - "tench", - "goldfish", - "great white shark", - "tiger shark", - "hammerhead shark", - "electric ray", - "stingray", - "rooster", - "hen", - "ostrich", - "brambling", - "goldfinch", - "house finch", - "junco", - "indigo bunting", - "American robin", - "bulbul", - "jay", - "magpie", - "chickadee", - "American dipper", - "kite (bird of prey)", - "bald eagle", - "vulture", - "great grey owl", - "fire salamander", - "smooth newt", - "newt", - "spotted salamander", - "axolotl", - "American bullfrog", - "tree frog", - "tailed frog", - "loggerhead sea turtle", - "leatherback sea turtle", - "mud turtle", - "terrapin", - "box turtle", - "banded gecko", - "green iguana", - "Carolina anole", - "desert grassland whiptail lizard", - "agama", - "frilled-necked lizard", - "alligator lizard", - "Gila monster", - "European green lizard", - "chameleon", - "Komodo dragon", - "Nile crocodile", - "American alligator", - "triceratops", - "worm snake", - "ring-necked snake", - "eastern hog-nosed snake", - "smooth green snake", - "kingsnake", - "garter snake", - "water snake", - "vine snake", - "night snake", - "boa constrictor", - "African rock python", - "Indian cobra", - "green mamba", - "sea snake", - "Saharan horned viper", - "eastern diamondback rattlesnake", - "sidewinder rattlesnake", - "trilobite", - "harvestman", - "scorpion", - "yellow garden spider", - "barn spider", - "European garden spider", - "southern black widow", - "tarantula", - "wolf spider", - "tick", - "centipede", - "black grouse", - "ptarmigan", - "ruffed grouse", - "prairie grouse", - "peafowl", - "quail", - "partridge", - "african grey parrot", - "macaw", - "sulphur-crested cockatoo", - "lorikeet", - "coucal", - "bee eater", - "hornbill", - "hummingbird", - "jacamar", - "toucan", - "duck", - "red-breasted merganser", - "goose", - "black swan", - "tusker", - "echidna", - "platypus", - "wallaby", - "koala", - "wombat", - "jellyfish", - "sea anemone", - "brain coral", - "flatworm", - "nematode", - "conch", - "snail", - "slug", - "sea slug", - "chiton", - "chambered nautilus", - "Dungeness crab", - "rock crab", - "fiddler crab", - "red king crab", - "American lobster", - "spiny lobster", - "crayfish", - "hermit crab", - "isopod", - "white stork", - "black stork", - "spoonbill", - "flamingo", - "little blue heron", - "great egret", - "bittern bird", - "crane bird", - "limpkin", - "common gallinule", - "American coot", - "bustard", - "ruddy turnstone", - "dunlin", - "common redshank", - "dowitcher", - "oystercatcher", - "pelican", - "king penguin", - "albatross", - "grey whale", - "killer whale", - "dugong", - "sea lion", - "Chihuahua", - "Japanese Chin", - "Maltese", - "Pekingese", - "Shih Tzu", - "King Charles Spaniel", - "Papillon", - "toy terrier", - "Rhodesian Ridgeback", - "Afghan Hound", - "Basset Hound", - "Beagle", - "Bloodhound", - "Bluetick Coonhound", - "Black and Tan Coonhound", - "Treeing Walker Coonhound", - "English foxhound", - "Redbone Coonhound", - "borzoi", - "Irish Wolfhound", - "Italian Greyhound", - "Whippet", - "Ibizan Hound", - "Norwegian Elkhound", - "Otterhound", - "Saluki", - "Scottish Deerhound", - "Weimaraner", - "Staffordshire Bull Terrier", - "American Staffordshire Terrier", - "Bedlington Terrier", - "Border Terrier", - "Kerry Blue Terrier", - "Irish Terrier", - "Norfolk Terrier", - "Norwich Terrier", - "Yorkshire Terrier", - "Wire Fox Terrier", - "Lakeland Terrier", - "Sealyham Terrier", - "Airedale Terrier", - "Cairn Terrier", - "Australian Terrier", - "Dandie Dinmont Terrier", - "Boston Terrier", - "Miniature Schnauzer", - "Giant Schnauzer", - "Standard Schnauzer", - "Scottish Terrier", - "Tibetan Terrier", - "Australian Silky Terrier", - "Soft-coated Wheaten Terrier", - "West Highland White Terrier", - "Lhasa Apso", - "Flat-Coated Retriever", - "Curly-coated Retriever", - "Golden Retriever", - "Labrador Retriever", - "Chesapeake Bay Retriever", - "German Shorthaired Pointer", - "Vizsla", - "English Setter", - "Irish Setter", - "Gordon Setter", - "Brittany dog", - "Clumber Spaniel", - "English Springer Spaniel", - "Welsh Springer Spaniel", - "Cocker Spaniel", - "Sussex Spaniel", - "Irish Water Spaniel", - "Kuvasz", - "Schipperke", - "Groenendael dog", - "Malinois", - "Briard", - "Australian Kelpie", - "Komondor", - "Old English Sheepdog", - "Shetland Sheepdog", - "collie", - "Border Collie", - "Bouvier des Flandres dog", - "Rottweiler", - "German Shepherd Dog", - "Dobermann", - "Miniature Pinscher", - "Greater Swiss Mountain Dog", - "Bernese Mountain Dog", - "Appenzeller Sennenhund", - "Entlebucher Sennenhund", - "Boxer", - "Bullmastiff", - "Tibetan Mastiff", - "French Bulldog", - "Great Dane", - "St. Bernard", - "husky", - "Alaskan Malamute", - "Siberian Husky", - "Dalmatian", - "Affenpinscher", - "Basenji", - "pug", - "Leonberger", - "Newfoundland dog", - "Great Pyrenees dog", - "Samoyed", - "Pomeranian", - "Chow Chow", - "Keeshond", - "brussels griffon", - "Pembroke Welsh Corgi", - "Cardigan Welsh Corgi", - "Toy Poodle", - "Miniature Poodle", - "Standard Poodle", - "Mexican hairless dog (xoloitzcuintli)", - "grey wolf", - "Alaskan tundra wolf", - "red wolf or maned wolf", - "coyote", - "dingo", - "dhole", - "African wild dog", - "hyena", - "red fox", - "kit fox", - "Arctic fox", - "grey fox", - "tabby cat", - "tiger cat", - "Persian cat", - "Siamese cat", - "Egyptian Mau", - "cougar", - "lynx", - "leopard", - "snow leopard", - "jaguar", - "lion", - "tiger", - "cheetah", - "brown bear", - "American black bear", - "polar bear", - "sloth bear", - "mongoose", - "meerkat", - "tiger beetle", - "ladybug", - "ground beetle", - "longhorn beetle", - "leaf beetle", - "dung beetle", - "rhinoceros beetle", - "weevil", - "fly", - "bee", - "ant", - "grasshopper", - "cricket insect", - "stick insect", - "cockroach", - "praying mantis", - "cicada", - "leafhopper", - "lacewing", - "dragonfly", - "damselfly", - "red admiral butterfly", - "ringlet butterfly", - "monarch butterfly", - "small white butterfly", - "sulphur butterfly", - "gossamer-winged butterfly", - "starfish", - "sea urchin", - "sea cucumber", - "cottontail rabbit", - "hare", - "Angora rabbit", - "hamster", - "porcupine", - "fox squirrel", - "marmot", - "beaver", - "guinea pig", - "common sorrel horse", - "zebra", - "pig", - "wild boar", - "warthog", - "hippopotamus", - "ox", - "water buffalo", - "bison", - "ram (adult male sheep)", - "bighorn sheep", - "Alpine ibex", - "hartebeest", - "impala (antelope)", - "gazelle", - "arabian camel", - "llama", - "weasel", - "mink", - "European polecat", - "black-footed ferret", - "otter", - "skunk", - "badger", - "armadillo", - "three-toed sloth", - "orangutan", - "gorilla", - "chimpanzee", - "gibbon", - "siamang", - "guenon", - "patas monkey", - "baboon", - "macaque", - "langur", - "black-and-white colobus", - "proboscis monkey", - "marmoset", - "white-headed capuchin", - "howler monkey", - "titi monkey", - "Geoffroy's spider monkey", - "common squirrel monkey", - "ring-tailed lemur", - "indri", - "Asian elephant", - "African bush elephant", - "red panda", - "giant panda", - "snoek fish", - "eel", - "silver salmon", - "rock beauty fish", - "clownfish", - "sturgeon", - "gar fish", - "lionfish", - "pufferfish", - "abacus", - "abaya", - "academic gown", - "accordion", - "acoustic guitar", - "aircraft carrier", - "airliner", - "airship", - "altar", - "ambulance", - "amphibious vehicle", - "analog clock", - "apiary", - "apron", - "trash can", - "assault rifle", - "backpack", - "bakery", - "balance beam", - "balloon", - "ballpoint pen", - "Band-Aid", - "banjo", - "baluster / handrail", - "barbell", - "barber chair", - "barbershop", - "barn", - "barometer", - "barrel", - "wheelbarrow", - "baseball", - "basketball", - "bassinet", - "bassoon", - "swimming cap", - "bath towel", - "bathtub", - "station wagon", - "lighthouse", - "beaker", - "military hat (bearskin or shako)", - "beer bottle", - "beer glass", - "bell tower", - "baby bib", - "tandem bicycle", - "bikini", - "ring binder", - "binoculars", - "birdhouse", - "boathouse", - "bobsleigh", - "bolo tie", - "poke bonnet", - "bookcase", - "bookstore", - "bottle cap", - "hunting bow", - "bow tie", - "brass memorial plaque", - "bra", - "breakwater", - "breastplate", - "broom", - "bucket", - "buckle", - "bulletproof vest", - "high-speed train", - "butcher shop", - "taxicab", - "cauldron", - "candle", - "cannon", - "canoe", - "can opener", - "cardigan", - "car mirror", - "carousel", - "tool kit", - "cardboard box / carton", - "car wheel", - "automated teller machine", - "cassette", - "cassette player", - "castle", - "catamaran", - "CD player", - "cello", - "mobile phone", - "chain", - "chain-link fence", - "chain mail", - "chainsaw", - "storage chest", - "chiffonier", - "bell or wind chime", - "china cabinet", - "Christmas stocking", - "church", - "movie theater", - "cleaver", - "cliff dwelling", - "cloak", - "clogs", - "cocktail shaker", - "coffee mug", - "coffeemaker", - "spiral or coil", - "combination lock", - "computer keyboard", - "candy store", - "container ship", - "convertible", - "corkscrew", - "cornet", - "cowboy boot", - "cowboy hat", - "cradle", - "construction crane", - "crash helmet", - "crate", - "infant bed", - "Crock Pot", - "croquet ball", - "crutch", - "cuirass", - "dam", - "desk", - "desktop computer", - "rotary dial telephone", - "diaper", - "digital clock", - "digital watch", - "dining table", - "dishcloth", - "dishwasher", - "disc brake", - "dock", - "dog sled", - "dome", - "doormat", - "drilling rig", - "drum", - "drumstick", - "dumbbell", - "Dutch oven", - "electric fan", - "electric guitar", - "electric locomotive", - "entertainment center", - "envelope", - "espresso machine", - "face powder", - "feather boa", - "filing cabinet", - "fireboat", - "fire truck", - "fire screen", - "flagpole", - "flute", - "folding chair", - "football helmet", - "forklift", - "fountain", - "fountain pen", - "four-poster bed", - "freight car", - "French horn", - "frying pan", - "fur coat", - "garbage truck", - "gas mask or respirator", - "gas pump", - "goblet", - "go-kart", - "golf ball", - "golf cart", - "gondola", - "gong", - "gown", - "grand piano", - "greenhouse", - "radiator grille", - "grocery store", - "guillotine", - "hair clip", - "hair spray", - "half-track", - "hammer", - "hamper", - "hair dryer", - "hand-held computer", - "handkerchief", - "hard disk drive", - "harmonica", - "harp", - "combine harvester", - "hatchet", - "holster", - "home theater", - "honeycomb", - "hook", - "hoop skirt", - "gymnastic horizontal bar", - "horse-drawn vehicle", - "hourglass", - "iPod", - "clothes iron", - "carved pumpkin", - "jeans", - "jeep", - "T-shirt", - "jigsaw puzzle", - "rickshaw", - "joystick", - "kimono", - "knee pad", - "knot", - "lab coat", - "ladle", - "lampshade", - "laptop computer", - "lawn mower", - "lens cap", - "letter opener", - "library", - "lifeboat", - "lighter", - "limousine", - "ocean liner", - "lipstick", - "slip-on shoe", - "lotion", - "music speaker", - "loupe magnifying glass", - "sawmill", - "magnetic compass", - "messenger bag", - "mailbox", - "tights", - "one-piece bathing suit", - "manhole cover", - "maraca", - "marimba", - "mask", - "matchstick", - "maypole", - "maze", - "measuring cup", - "medicine cabinet", - "megalith", - "microphone", - "microwave oven", - "military uniform", - "milk can", - "minibus", - "miniskirt", - "minivan", - "missile", - "mitten", - "mixing bowl", - "mobile home", - "ford model t", - "modem", - "monastery", - "monitor", - "moped", - "mortar and pestle", - "graduation cap", - "mosque", - "mosquito net", - "vespa", - "mountain bike", - "tent", - "computer mouse", - "mousetrap", - "moving van", - "muzzle", - "metal nail", - "neck brace", - "necklace", - "baby pacifier", - "notebook computer", - "obelisk", - "oboe", - "ocarina", - "odometer", - "oil filter", - "pipe organ", - "oscilloscope", - "overskirt", - "bullock cart", - "oxygen mask", - "product packet / packaging", - "paddle", - "paddle wheel", - "padlock", - "paintbrush", - "pajamas", - "palace", - "pan flute", - "paper towel", - "parachute", - "parallel bars", - "park bench", - "parking meter", - "railroad car", - "patio", - "payphone", - "pedestal", - "pencil case", - "pencil sharpener", - "perfume", - "Petri dish", - "photocopier", - "plectrum", - "Pickelhaube", - "picket fence", - "pickup truck", - "pier", - "piggy bank", - "pill bottle", - "pillow", - "ping-pong ball", - "pinwheel", - "pirate ship", - "drink pitcher", - "block plane", - "planetarium", - "plastic bag", - "plate rack", - "farm plow", - "plunger", - "Polaroid camera", - "pole", - "police van", - "poncho", - "pool table", - "soda bottle", - "plant pot", - "potter's wheel", - "power drill", - "prayer rug", - "printer", - "prison", - "missile", - "projector", - "hockey puck", - "punching bag", - "purse", - "quill", - "quilt", - "race car", - "racket", - "radiator", - "radio", - "radio telescope", - "rain barrel", - "recreational vehicle", - "fishing casting reel", - "reflex camera", - "refrigerator", - "remote control", - "restaurant", - "revolver", - "rifle", - "rocking chair", - "rotisserie", - "eraser", - "rugby ball", - "ruler measuring stick", - "sneaker", - "safe", - "safety pin", - "salt shaker", - "sandal", - "sarong", - "saxophone", - "scabbard", - "weighing scale", - "school bus", - "schooner", - "scoreboard", - "CRT monitor", - "screw", - "screwdriver", - "seat belt", - "sewing machine", - "shield", - "shoe store", - "shoji screen / room divider", - "shopping basket", - "shopping cart", - "shovel", - "shower cap", - "shower curtain", - "ski", - "balaclava ski mask", - "sleeping bag", - "slide rule", - "sliding door", - "slot machine", - "snorkel", - "snowmobile", - "snowplow", - "soap dispenser", - "soccer ball", - "sock", - "solar thermal collector", - "sombrero", - "soup bowl", - "keyboard space bar", - "space heater", - "space shuttle", - "spatula", - "motorboat", - "spider web", - "spindle", - "sports car", - "spotlight", - "stage", - "steam locomotive", - "through arch bridge", - "steel drum", - "stethoscope", - "scarf", - "stone wall", - "stopwatch", - "stove", - "strainer", - "tram", - "stretcher", - "couch", - "stupa", - "submarine", - "suit", - "sundial", - "sunglasses", - "sunglasses", - "sunscreen", - "suspension bridge", - "mop", - "sweatshirt", - "swim trunks / shorts", - "swing", - "electrical switch", - "syringe", - "table lamp", - "tank", - "tape player", - "teapot", - "teddy bear", - "television", - "tennis ball", - "thatched roof", - "front curtain", - "thimble", - "threshing machine", - "throne", - "tile roof", - "toaster", - "tobacco shop", - "toilet seat", - "torch", - "totem pole", - "tow truck", - "toy store", - "tractor", - "semi-trailer truck", - "tray", - "trench coat", - "tricycle", - "trimaran", - "tripod", - "triumphal arch", - "trolleybus", - "trombone", - "hot tub", - "turnstile", - "typewriter keyboard", - "umbrella", - "unicycle", - "upright piano", - "vacuum cleaner", - "vase", - "vaulted or arched ceiling", - "velvet fabric", - "vending machine", - "vestment", - "viaduct", - "violin", - "volleyball", - "waffle iron", - "wall clock", - "wallet", - "wardrobe", - "military aircraft", - "sink", - "washing machine", - "water bottle", - "water jug", - "water tower", - "whiskey jug", - "whistle", - "hair wig", - "window screen", - "window shade", - "Windsor tie", - "wine bottle", - "airplane wing", - "wok", - "wooden spoon", - "wool", - "split-rail fence", - "shipwreck", - "sailboat", - "yurt", - "website", - "comic book", - "crossword", - "traffic or street sign", - "traffic light", - "dust jacket", - "menu", - "plate", - "guacamole", - "consomme", - "hot pot", - "trifle", - "ice cream", - "popsicle", - "baguette", - "bagel", - "pretzel", - "cheeseburger", - "hot dog", - "mashed potatoes", - "cabbage", - "broccoli", - "cauliflower", - "zucchini", - "spaghetti squash", - "acorn squash", - "butternut squash", - "cucumber", - "artichoke", - "bell pepper", - "cardoon", - "mushroom", - "Granny Smith apple", - "strawberry", - "orange", - "lemon", - "fig", - "pineapple", - "banana", - "jackfruit", - "cherimoya (custard apple)", - "pomegranate", - "hay", - "carbonara", - "chocolate syrup", - "dough", - "meatloaf", - "pizza", - "pot pie", - "burrito", - "red wine", - "espresso", - "tea cup", - "eggnog", - "mountain", - "bubble", - "cliff", - "coral reef", - "geyser", - "lakeshore", - "promontory", - "sandbar", - "beach", - "valley", - "volcano", - "baseball player", - "bridegroom", - "scuba diver", - "rapeseed", - "daisy", - "yellow lady's slipper", - "corn", - "acorn", - "rose hip", - "horse chestnut seed", - "coral fungus", - "agaric", - "gyromitra", - "stinkhorn mushroom", - "earth star fungus", - "hen of the woods mushroom", - "bolete", - "corn cob", - "toilet paper", -] - -openai_imagenet_template = [ - lambda c: f'a bad photo of a {c}.', - lambda c: f'a photo of many {c}.', - lambda c: f'a sculpture of a {c}.', - lambda c: f'a photo of the hard to see {c}.', - lambda c: f'a low resolution photo of the {c}.', - lambda c: f'a rendering of a {c}.', - lambda c: f'graffiti of a {c}.', - lambda c: f'a bad photo of the {c}.', - lambda c: f'a cropped photo of the {c}.', - lambda c: f'a tattoo of a {c}.', - lambda c: f'the embroidered {c}.', - lambda c: f'a photo of a hard to see {c}.', - lambda c: f'a bright photo of a {c}.', - lambda c: f'a photo of a clean {c}.', - lambda c: f'a photo of a dirty {c}.', - lambda c: f'a dark photo of the {c}.', - lambda c: f'a drawing of a {c}.', - lambda c: f'a photo of my {c}.', - lambda c: f'the plastic {c}.', - lambda c: f'a photo of the cool {c}.', - lambda c: f'a close-up photo of a {c}.', - lambda c: f'a black and white photo of the {c}.', - lambda c: f'a painting of the {c}.', - lambda c: f'a painting of a {c}.', - lambda c: f'a pixelated photo of the {c}.', - lambda c: f'a sculpture of the {c}.', - lambda c: f'a bright photo of the {c}.', - lambda c: f'a cropped photo of a {c}.', - lambda c: f'a plastic {c}.', - lambda c: f'a photo of the dirty {c}.', - lambda c: f'a jpeg corrupted photo of a {c}.', - lambda c: f'a blurry photo of the {c}.', - lambda c: f'a photo of the {c}.', - lambda c: f'a good photo of the {c}.', - lambda c: f'a rendering of the {c}.', - lambda c: f'a {c} in a video game.', - lambda c: f'a photo of one {c}.', - lambda c: f'a doodle of a {c}.', - lambda c: f'a close-up photo of the {c}.', - lambda c: f'a photo of a {c}.', - lambda c: f'the origami {c}.', - lambda c: f'the {c} in a video game.', - lambda c: f'a sketch of a {c}.', - lambda c: f'a doodle of the {c}.', - lambda c: f'a origami {c}.', - lambda c: f'a low resolution photo of a {c}.', - lambda c: f'the toy {c}.', - lambda c: f'a rendition of the {c}.', - lambda c: f'a photo of the clean {c}.', - lambda c: f'a photo of a large {c}.', - lambda c: f'a rendition of a {c}.', - lambda c: f'a photo of a nice {c}.', - lambda c: f'a photo of a weird {c}.', - lambda c: f'a blurry photo of a {c}.', - lambda c: f'a cartoon {c}.', - lambda c: f'art of a {c}.', - lambda c: f'a sketch of the {c}.', - lambda c: f'a embroidered {c}.', - lambda c: f'a pixelated photo of a {c}.', - lambda c: f'itap of the {c}.', - lambda c: f'a jpeg corrupted photo of the {c}.', - lambda c: f'a good photo of a {c}.', - lambda c: f'a plushie {c}.', - lambda c: f'a photo of the nice {c}.', - lambda c: f'a photo of the small {c}.', - lambda c: f'a photo of the weird {c}.', - lambda c: f'the cartoon {c}.', - lambda c: f'art of the {c}.', - lambda c: f'a drawing of the {c}.', - lambda c: f'a photo of the large {c}.', - lambda c: f'a black and white photo of a {c}.', - lambda c: f'the plushie {c}.', - lambda c: f'a dark photo of a {c}.', - lambda c: f'itap of a {c}.', - lambda c: f'graffiti of the {c}.', - lambda c: f'a toy {c}.', - lambda c: f'itap of my {c}.', - lambda c: f'a photo of a cool {c}.', - lambda c: f'a photo of a small {c}.', - lambda c: f'a tattoo of the {c}.', -] diff --git a/nemo/collections/multimodal/data/common/__init__.py b/nemo/collections/multimodal/data/common/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/common/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/common/data_samplers.py b/nemo/collections/multimodal/data/common/data_samplers.py deleted file mode 100644 index 1cfd3d046a76..000000000000 --- a/nemo/collections/multimodal/data/common/data_samplers.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from multiprocessing import Value - -import torch -from webdataset.pytorch import IterableDataset -from nemo.utils import logging - - -class SharedEpoch: - def __init__(self, epoch: int = 0): - self.shared_epoch = Value('i', epoch) - - def set_value(self, epoch): - self.shared_epoch.value = epoch - - def get_value(self): - return self.shared_epoch.value - - -class WDSUrlsRandomSampler(IterableDataset): - def __init__( - self, - urls, - total_urls: int, - chunk_size: int, - consumed_samples: int, - data_parallel_rank: int, - data_parallel_size: int, - num_workers: int, - drop_last: bool, - data_sharding: bool, - ): - r"""Sampler for WebDataset Urls with data parallelism. - Args: - urls : The urls of the tar files from which to sample. - total_urls (int): Total number of urls in the dataset. - chunk_size (int): Number of objects per tar file. - consumed_samples (int): Number of samples consumed so far by the training process. - **Note samples here is not urls.** - data_parallel_rank (int): Rank of the current data parallel process. - data_parallel_size (int): Number of data parallel processes. - drop_last (bool): If True, drop the remaining urls if the number is smaller than `data_parallel_size`. - If False, pad the urls until its size is divisible by `data_parallel_size`. - data_sharding (bool): If True, use data sharding before data shuffling, i.e. only shuffle within the data parallel group. - """ - super().__init__() - self.urls = urls - self.total_urls = total_urls - self.chunk_size = chunk_size - - if consumed_samples % data_parallel_size == 0: - logging.warning("Multimodal data resuming will be approximate!") - self.consumed_urls = ( - consumed_samples // (data_parallel_size * num_workers) // chunk_size * (data_parallel_size * num_workers) - ) - self.consumed_samples = self.consumed_urls * chunk_size - - self.data_parallel_rank = data_parallel_rank - self.data_parallel_size = data_parallel_size - self.drop_last = drop_last - self.data_sharding = data_sharding - self.epoch = SharedEpoch() - - self.remaining_urls = self.total_urls % self.data_parallel_size - - def __len__(self): - if self.drop_last: - return self.total_urls // self.data_parallel_size - else: - return (self.total_urls + self.data_parallel_size - 1) // self.data_parallel_size - - def __iter__(self): - worker_id, num_workers = 0, 1 - worker_info = torch.utils.data.get_worker_info() - if worker_info is not None: - worker_id, num_workers = worker_info.id, worker_info.num_workers - - self.consumed_urls = ( - self.consumed_samples - // (self.data_parallel_size * num_workers) - // self.chunk_size - * (self.data_parallel_size * num_workers) - ) - - if self.drop_last or self.remaining_urls == 0: - active_total_urls = self.total_urls - self.remaining_urls - else: - active_total_urls = self.total_urls + self.data_parallel_size - self.remaining_urls - - self.epoch.set_value(self.consumed_urls // active_total_urls) - current_epoch_urls = self.consumed_urls % active_total_urls - - # data sharding and random sampling - if self.data_sharding: - bucket_size = active_total_urls // self.data_parallel_size - bucket_offset = current_epoch_urls // self.data_parallel_size - start_idx = self.data_parallel_rank * bucket_size - - g = torch.Generator() - g.manual_seed(self.epoch.get_value()) - random_idx = torch.randperm(bucket_size, generator=g).tolist() - idx_range = [start_idx + x for x in random_idx[bucket_offset:]] - else: - full_bucket_size = active_total_urls - full_bucket_offset = current_epoch_urls - g = torch.Generator() - g.manual_seed(self.epoch.get_value()) - idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() - idx_range_active = idx_range_total[full_bucket_offset:] - idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] - - # Use additional permutation to replace out-of-range indices when drop_last is False - additional_random_idx = torch.randperm(self.total_urls, generator=g).tolist() - for n, idx in enumerate(idx_range): - self.consumed_samples += self.data_parallel_size * self.chunk_size - if worker_info is not None and n % num_workers != worker_id: - continue - if idx < self.total_urls: - yield dict(url=self.urls[idx]) - else: - yield dict(url=self.urls[additional_random_idx[idx - self.total_urls]]) diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py deleted file mode 100644 index 785f834b34f9..000000000000 --- a/nemo/collections/multimodal/data/common/webdataset.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import io -import itertools -import json -import os -import pickle -import random -import re -from typing import Callable, List, Union - -import boto3 -import torch.distributed as dist -import webdataset as wds -from botocore.config import Config -from PIL import Image -from webdataset import WebDataset, warn_and_continue -from webdataset.filters import _shuffle -from webdataset.utils import pytorch_worker_info - -from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch, WDSUrlsRandomSampler -from nemo.collections.multimodal.data.common.webdataset_s3 import WebDataset as WebDatasetS3 -from nemo.core.classes import IterableDataset as NeMoIterableDataset -from nemo.utils import logging - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -Image.MAX_IMAGE_PIXELS = 933120000 -_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() -from webdataset import warn_and_continue - - -class detshuffle2(wds.PipelineStage): - def __init__( - self, bufsize=1000, initial=100, seed=0, epoch=-1, - ): - self.bufsize = bufsize - self.initial = initial - self.seed = seed - self.epoch = epoch - - def run(self, src): - if isinstance(self.epoch, SharedEpoch): - epoch = self.epoch.get_value() - else: - # NOTE: this is epoch tracking is problematic in a multiprocess (dataloader workers or train) - # situation as different workers may wrap at different times (or not at all). - self.epoch += 1 - epoch = self.epoch - rng = random.Random() - # This seed to be deterministic AND the same across all nodes/workers in each epoch - if parallel_state.is_unitialized(): - seed = self.seed + epoch - else: - seed = self.seed + epoch + (100 * parallel_state.get_data_parallel_rank()) - rng.seed(seed) - return _shuffle(src, self.bufsize, self.initial, rng) - - -def pil_loader(key, data): - r""" - Function to load an image. - If the image is corrupt, it returns a black image. - Args: - key: Image key. - data: Image data stream. - """ - extension = re.sub(r".*[.]", "", key) - if extension.lower() not in _IMG_EXTENSIONS: - return None - - with io.BytesIO(data) as stream: - img = Image.open(stream) - img.load() - img = img.convert("RGB") - - return img - - -def get_world_size(): - r"""Get world size. How many GPUs are available in this job.""" - world_size = 1 - if dist.is_available(): - if dist.is_initialized(): - world_size = dist.get_world_size() - return world_size - - -class WebDatasetCommon(NeMoIterableDataset): - def __init__( - self, - dataset_cfg, - map_fn: Callable, - compose_fn: Union[Callable, List[Callable]], - consumed_samples: int, - filter_fn: Callable = None, - gen_cfg=None, - decode_fn: Callable = None, - is_train=True, - ): - - super().__init__() - self.dataset_cfg = dataset_cfg - self.num_workers = dataset_cfg.num_workers - self.world_size = get_world_size() - self.webdata_cfg = dataset_cfg.webdataset - self.infinite_sampler = self.webdata_cfg.get("infinite_sampler", False) - self.gen_cfg = gen_cfg - self.consumed_samples = consumed_samples - - self.local_root_path = self.webdata_cfg.local_root_path - if is_train: - dataset_path = dataset_cfg.train.dataset_path - self.augmentations = dataset_cfg.train.get("augmentations", None) - self.filterings = dataset_cfg.train.get("filterings", None) - else: - dataset_path = dataset_cfg.validation.dataset_path - self.augmentations = dataset_cfg.validation.get("augmentations", None) - self.filterings = dataset_cfg.validation.get("filterings", None) - - if "boto3" in dataset_cfg: - logging.info(f'Init boto3 using credentials file at {dataset_cfg.boto3.credentials_file}') - self.use_boto3 = True - assert dataset_cfg.boto3.credentials_file is not None - with open(dataset_cfg.boto3.credentials_file) as fin: - self.credentials = json.load(fin) - config = Config(connect_timeout=30, signature_version="s3", retries={"max_attempts": 999999}) - self.s3 = boto3.client('s3', **self.credentials, config=config) - self.bucket = dataset_cfg.boto3.bucket - self.local_root_path = "" - else: - logging.info(f'Read Webdataset locally. Data stores at {self.local_root_path}') - self.use_boto3 = False - self.s3 = None - self.bucket = None - - # wdinfo in a dict containing webdata information - self.wdinfo = dict() - if dataset_path[0].endswith(".pkl"): - for dset_info_path in dataset_path: - with open(dset_info_path, 'rb') as fp: - dset_info = pickle.load(fp) - if 'tar_files' not in self.wdinfo: - self.wdinfo['tar_files'] = dset_info['tar_files'] - self.wdinfo['total_key_count'] = dset_info['total_key_count'] - self.wdinfo['chunk_size'] = dset_info['chunk_size'] - else: - self.wdinfo['tar_files'].extend(dset_info['tar_files']) - self.wdinfo['total_key_count'] += dset_info['total_key_count'] - train_info = self.wdinfo - else: - train_info = self.wdinfo - train_info['tar_files'] = map(wds.shardlists.expand_urls, dataset_path) - train_info['tar_files'] = list(itertools.chain.from_iterable(train_info['tar_files'])) - train_info['chunk_size'] = self.webdata_cfg.get("chunk_size", 1000) - train_info['total_key_count'] = train_info['chunk_size'] * len(train_info['tar_files']) - - self.data_parallel_size = parallel_state.get_data_parallel_world_size() - chunk_size = train_info['chunk_size'] - - num_workers = dataset_cfg.get("num_workers") or 1 - self.consumed_urls = ( - consumed_samples - // (self.data_parallel_size * num_workers) - // chunk_size - * (self.data_parallel_size * num_workers) - ) - self.consumed_samples = self.consumed_urls * chunk_size - self.skip_ahead = consumed_samples - self.consumed_samples - - decode_fn = pil_loader if decode_fn is None else decode_fn - shards_train_list = train_info["tar_files"] - num_shards = len(shards_train_list) - assert num_shards > 0, "Did not find any training data." - - # Shuffle buffer: - shuffle_buffer_size = train_info["chunk_size"] - - if self.filterings is not None: - # TODO : Not a good way of estimating filtering (We expect user to give estimated portion) - # We should estimate in someway. This is anyway used only in progress bar - logging.info(f'Estimated {self.filterings.estimated_portion} will be remaining after filtering') - train_info["total_key_count"] = int(train_info["total_key_count"] * self.filterings.estimated_portion) - - # WDS Dataset Pipeline - # DetShuffle -> Decode -> Filter -> Map -> Compose - train_dataset, epoch = self._get_webdataset_and_epoch() - train_dataset = train_dataset.compose(detshuffle2(bufsize=shuffle_buffer_size, epoch=epoch)) - train_dataset = train_dataset.decode(decode_fn, handler=warn_and_continue) - - if self.filterings is not None: - if self.filterings.resolution is not None: - train_dataset = train_dataset.select(filter_fn) - - train_dataset = train_dataset.map(map_fn, handler=warn_and_continue) - if not isinstance(compose_fn, list): - compose_fn = [compose_fn] - for fn in compose_fn: - train_dataset = train_dataset.compose(fn) - train_dataset.total_images = train_info["total_key_count"] - - if train_info["total_key_count"] != train_info["chunk_size"] * len(train_info["tar_files"]): - logging.warning("Total image count is not equal to chunk_size * number of tar files.") - - if self.infinite_sampler: - rank, world_size, worker_id, num_workers = pytorch_worker_info() - nbatches = train_dataset.total_images // world_size // self.num_workers - logging.info(f'Setting nbatches={nbatches} for infinite sampler. world_size={world_size}') - train_dataset = train_dataset.with_epoch(nbatches=nbatches) - - logging.info("Total number of training shards: %d", num_shards) - logging.info("Total training key count: %d", train_dataset.total_images) - - self._dataset = train_dataset - - def _get_webdataset_and_epoch(self): - train_info = self.wdinfo - chunk_size = train_info["chunk_size"] - shards_train_list = train_info["tar_files"] - shards_train_list = [os.path.join(self.local_root_path, x) for x in shards_train_list] - epoch = 0 - - if not self.infinite_sampler: - logging.info(f'Initiating Webdataset Random Sampler..') - assert ( - self.filterings is None - ), 'Webdataset Random Sampler should not be used with filters. Switch to infinite sampler' - shards_train_list = WDSUrlsRandomSampler( - urls=shards_train_list, - total_urls=len(shards_train_list), - chunk_size=chunk_size, - consumed_samples=self.consumed_samples, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - num_workers=self.dataset_cfg.get("num_workers") or 1, - drop_last=True, - data_sharding=self.dataset_cfg.train.get("data_sharding", True), - ) - epoch = shards_train_list.epoch - - if self.use_boto3: - train_dataset = WebDatasetS3( - shards_train_list, - handler=warn_and_continue, - resampled=self.infinite_sampler or False, - load_from_object_store=self.use_boto3, - s3_client=self.s3, - s3_bucket_name=self.bucket, - ) - else: - train_dataset = WebDataset( - shards_train_list, handler=warn_and_continue, resampled=self.infinite_sampler or False, - ) - - return train_dataset, epoch - - def __iter__(self): - ds_iter = self._dataset.__iter__() - while self.skip_ahead > 0 and not self.infinite_sampler: - try: - _ = next(ds_iter) - self.skip_ahead -= self.data_parallel_size * self.num_workers - except StopIteration: - self.skip_ahead = 0 - return ds_iter - - def __len__(self): - return self._dataset.total_images diff --git a/nemo/collections/multimodal/data/common/webdataset_s3.py b/nemo/collections/multimodal/data/common/webdataset_s3.py deleted file mode 100644 index d1cc7c9278e3..000000000000 --- a/nemo/collections/multimodal/data/common/webdataset_s3.py +++ /dev/null @@ -1,237 +0,0 @@ -import io -import os -import sys -from urllib.parse import urlparse - -import webdataset.gopen as gopen_webdata -import yaml -from webdataset import cache, filters, shardlists -from webdataset.compat import FluidInterface -from webdataset.handlers import reraise_exception -from webdataset.pipeline import DataPipeline -from webdataset.pytorch import IterableDataset -from webdataset.tariterators import group_by_keys, tar_file_expander - -# Number of attempts to read aws objects. -_NUM_OBJECT_STORE_READ_ATTEMPTS = 10 - - -def gopen(url, mode="rb", bufsize=8192, **kw): - r"""Open the URL. - This uses the `gopen_schemes` dispatch table to dispatch based - on scheme. - Support for the following schemes is built-in: pipe, file, - http, https, sftp, ftps, scp. - When no scheme is given the url is treated as a file. - You can use the OPEN_VERBOSE argument to get info about - files being opened. - - This implementation is based on webdataset's gopen, - with the modification of supporting reading from s3 object_store: - https://webdataset.github.io/webdataset/api/webdataset/gopen.html#gopen - Args: - url (list[str]): the source URL - mode (str): the mode ("rb", "r") - bufsize (int): the buffer size - """ - global fallback_gopen - verbose = int(os.environ.get("GOPEN_VERBOSE", 0)) - if verbose: - print("GOPEN", url, gopen_webdata.info, file=sys.stderr) - - assert mode in ["rb", "wb"], mode - if url == "-": - if mode == "rb": - return sys.stdin.buffer - elif mode == "wb": - return sys.stdout.buffer - else: - raise ValueError(f"unknown mode {mode}") - - # If we specify 'object_store' in keyword arguments, - # then we would load from AWS. - # In this case, you also need to specify s3_client and s3_bucket_name - # in arguments. - if 'object_store' in kw and kw['object_store']: - # Load from object store - attempt = 0 - - while attempt < _NUM_OBJECT_STORE_READ_ATTEMPTS: - try: - s3_response_object = kw['s3_client'].get_object(Bucket=kw['s3_bucket_name'], Key=url) - object_content = s3_response_object['Body'].read() - - # This is a check to verify is the object is fully read. - full_read = s3_response_object['ContentLength'] == len(object_content) - if full_read: - return io.BytesIO(object_content) - else: - attempt += 1 - except Exception as e: # noqa - # If there is an exception (usually connectivity error or protocol error), read again - attempt += 1 - print(e) - print('Retrying tar file download, attempt {}'.format(attempt)) - continue - raise ConnectionError('Unable to read {} from PBSS. {} attempts tried.'.format(url, attempt)) - - # Append root path to the url if dataset is stored on local disk system - elif 'local_root_path' in kw and kw['local_root_path'] is not None: - url = os.path.join(kw['local_root_path'], url) - - # For all other gopen schemes, use the native webdataset gopen functions. - pr = urlparse(url) - if pr.scheme == "": - bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) - return open(url, mode, buffering=bufsize) - if pr.scheme == "file": - bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) - return open(pr.path, mode, buffering=bufsize) - handler = gopen_webdata.gopen_schemes["__default__"] - handler = gopen_webdata.gopen_schemes.get(pr.scheme, handler) - return handler(url, mode, bufsize, **kw) - - -def url_opener(data, handler=reraise_exception, **kw): - r"""Given a stream of url names (packaged in `dict(url=url)`), yield opened streams. - - Args: - data: Iterator of dictionaires containing url paths. - handler: Exception handler. - """ - for sample in data: - assert isinstance(sample, dict), sample - assert "url" in sample - url = sample["url"] - try: - stream = gopen(url, **kw) - sample.update(stream=stream) - yield sample - except Exception as exn: - exn.args = exn.args + (url,) - if handler(exn): - continue - else: - break - - -# Define a new tarfile_samples -def tarfile_samples( - src, - handler=reraise_exception, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None, -): - r""" - Given an iterator of filenames, this function opens the URL streams - and groups data by keys. - - Args: - src: Iterator of data dictionaires containing URL names. - handler: Exception handler. - load_from_object_store (bool): A boolean flag to specify whether to load from - object store. - s3_client: If loading from object store, specify S3 client. - s3_bucket_name: If loading from object store, specify S3 bucket name. - local_root_path: If loading from local (or mounted) disk system, - specify the root path of the dataset. - """ - streams = url_opener( - src, - handler=handler, - object_store=load_from_object_store, - s3_client=s3_client, - s3_bucket_name=s3_bucket_name, - local_root_path=local_root_path, - ) - files = tar_file_expander(streams, handler=handler) - samples = group_by_keys(files, handler=handler) - return samples - - -tarfile_to_samples = filters.pipelinefilter(tarfile_samples) - - -class WebDataset(DataPipeline, FluidInterface): - r"""Webdataset class modified to support loading from object store.""" - - def __init__( - self, - urls, - handler=reraise_exception, - resampled=False, - shardshuffle=None, - cache_size=-1, - cache_dir=None, - detshuffle=False, - nodesplitter=shardlists.single_node_only, - verbose=False, - load_from_object_store=False, - s3_client=None, - s3_bucket_name=None, - local_root_path=None, - ): - r""" - Args: - urls: An iterator containing a list of url names. - handler: Exception handler. - resampled: If true, sample shards from shard list with replacement. - shardshuffle: If true, shuffles the entire shard list. - cache_size: Size of cache. - cache_dir: Path to store cache. - detshuffle: Whether to use deterministic shuffling when shardshuffle is True. - nodesplitter: Function for splitting urls among nodes. - verbose: If True, prints logs. - load_from_object_store (bool): A boolean flag to specify whether to load from - object store. - s3_client: If loading from object store, specify S3 client. - s3_bucket_name: If loading from object store, specify S3 bucket name. - local_root_path: If loading from local (or mounted) disk system, - specify the root path of the dataset. - """ - super().__init__() - if isinstance(urls, IterableDataset): - assert not resampled - self.append(urls) - elif isinstance(urls, str) and (urls.endswith(".yaml") or urls.endswith(".yml")): - with (open(urls)) as stream: - spec = yaml.safe_load(stream) - assert "datasets" in spec - self.append(shardlists.MultiShardSample(spec)) - elif isinstance(urls, dict): - assert "datasets" in urls - self.append(shardlists.MultiShardSample(urls)) - elif resampled: - self.append(shardlists.ResampledShards(urls)) - else: - self.append(shardlists.SimpleShardList(urls)) - self.append(nodesplitter) - self.append(shardlists.split_by_worker) - if shardshuffle is True: - shardshuffle = 100 - if shardshuffle is not None: - if detshuffle: - self.append(filters.detshuffle(shardshuffle)) - else: - self.append(filters.shuffle(shardshuffle)) - if cache_dir is None or cache_size == 0: - self.append( - tarfile_to_samples( - handler=handler, - load_from_object_store=load_from_object_store, - s3_client=s3_client, - s3_bucket_name=s3_bucket_name, - local_root_path=local_root_path, - ) - ) - else: - - # We dont use cache. - assert cache_size == -1 or cache_size > 0 - self.append( - cache.cached_tarfile_to_samples( - handler=handler, verbose=verbose, cache_size=cache_size, cache_dir=cache_dir, - ) - ) diff --git a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py deleted file mode 100644 index 301be555dad1..000000000000 --- a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( - construct_image_augmentations, - identical_transform, -) - - -def build_train_valid_datasets( - model_cfg, consumed_samples, -): - data_cfg = model_cfg.data - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict['images'] = input[0].permute(1, 2, 0) - out_dict['captions'] = input[1] - out_dict['hint'] = input[2].permute(1, 2, 0) - yield out_dict - - def transform_fn(sample): - - image, text, hint = sample["jpg"], sample["txt"], sample["png"] - # TODO : If no agumentations just return the image ? - img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) - text_transform = identical_transform - return img_transform(image), text_transform(text), img_transform(hint) - - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=False, - ) - - return train_data, val_data - - -def build_train_valid_precached_datasets( - model_cfg, consumed_samples, -): - data_cfg = model_cfg.data - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) - out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) - yield out_dict - - def transform_fn(sample): - return sample['pickle'] - - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=False, - ) - - return train_data, val_data diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py deleted file mode 100644 index e25dcfbecf6b..000000000000 --- a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import pickle -from pathlib import Path - -import torch -from PIL import Image -from pytorch_lightning.utilities import rank_zero_only -from torch.utils.data import Dataset -from torchvision import transforms -from tqdm import tqdm - - -class DreamBoothDataset(Dataset): - """ - A dataset to prepare the instance and class images with the prompts for fine-tuning the model. - It pre-processes the images and the tokenizes prompts. - """ - - def __init__( - self, - instance_data_root, - instance_prompt, - with_prior_preservation=False, - reg_data_root=None, - reg_prompt=None, - size=512, - center_crop=True, - repeat=10000, - load_cache_latents=False, - cached_instance_data_root=None, - cached_reg_data_root=None, - vae=None, - text_encoder=None, - ): - self.size = size - self.center_crop = center_crop - - assert instance_data_root or cached_instance_data_root, "must provide instance images to start training." - self.instance_data_root = Path(instance_data_root) - self.cached_instance_data_root = cached_instance_data_root - self.cached_reg_data_root = cached_reg_data_root - - self.instance_images_path = list(Path(instance_data_root).iterdir()) - self.num_instance_images = len(self.instance_images_path) - self.instance_prompt = instance_prompt - self._length = self.num_instance_images * repeat - self.load_cache_latents = load_cache_latents - self.with_prior_preservation = with_prior_preservation - - if reg_data_root is not None: - self.reg_data_root = Path(reg_data_root) - self.reg_images_path = list(self.reg_data_root.iterdir()) - self.num_reg_images = len(self.reg_images_path) - self.reg_prompt = reg_prompt - else: - self.reg_data_root = None - - self.image_transforms = transforms.Compose( - [ - transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR), - transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size), - transforms.ToTensor(), - transforms.Normalize([0.5], [0.5]), - ] - ) - - if self.load_cache_latents: - if (self.cached_instance_data_root is None) or ( - self.with_prior_preservation and self.cached_reg_data_root is None - ): - self.cache_latents(vae, text_encoder) - - self.cached_instance_data_root = f'{self.instance_data_root}_cached' - self.cached_reg_data_root = f'{self.reg_data_root}_cached' - self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) - self.num_instance_images = len(self.instance_images_path) - - if self.with_prior_preservation: - self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) - self.num_reg_images = len(self.reg_images_path) - - if self.cached_instance_data_root: - self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) - self.num_instance_images = len(self.instance_images_path) - if self.with_prior_preservation and self.cached_reg_data_root: - self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) - self.num_reg_images = len(self.reg_images_path) - - def __len__(self): - return self._length - - def get_image(self, path): - image = Image.open(path) - if not image.mode == "RGB": - image = image.convert("RGB") - image = self.image_transforms(image) - return image - - def __getitem__(self, index): - example = {} - if self.load_cache_latents: - example["instance_images"] = torch.load(self.instance_images_path[index % self.num_instance_images]) - else: - example["instance_images"] = self.get_image(self.instance_images_path[index % self.num_instance_images]) - example["instance_prompt"] = self.instance_prompt - - if self.reg_data_root: - if self.load_cache_latents: - example["reg_images"] = torch.load(self.reg_images_path[index % self.num_reg_images]) - else: - example["reg_images"] = self.get_image(self.reg_images_path[index % self.num_reg_images]) - example["reg_prompt"] = self.reg_prompt - - return example - - @rank_zero_only - def cache_latents(self, vae, text_encoder): - os.makedirs(f'{self.instance_data_root}_cached', exist_ok=True) - self.cached_instance_data_root = f'{self.instance_data_root}_cached' - self.cached_reg_data_root = f'{self.reg_data_root}_cached' - if self.instance_data_root and (len(os.listdir(self.cached_instance_data_root)) < self.num_instance_images): - for i in tqdm(range(self.num_instance_images)): - x = torch.Tensor(self.get_image(self.instance_images_path[i % self.num_instance_images])) - x = torch.unsqueeze(x, dim=0) - params = vae.encode(x).parameters.squeeze(dim=0) - torch.save(params, f'{self.instance_data_root}_cached/instance_image_cache_{i}.pt') - - if self.with_prior_preservation: - os.makedirs(f'{self.reg_data_root}_cached', exist_ok=True) - if self.reg_data_root and (len(os.listdir(self.cached_reg_data_root)) < self.num_reg_images): - for i in tqdm(range(self.num_reg_images)): - x = torch.Tensor(self.get_image(self.reg_images_path[i % self.num_reg_images])) - x = torch.unsqueeze(x, dim=0) - params = vae.encode(x).parameters.squeeze(dim=0) - torch.save(params, f'{self.reg_data_root}_cached/reg_image_cache_{i}.pt') diff --git a/nemo/collections/multimodal/data/imagen/__init__.py b/nemo/collections/multimodal/data/imagen/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/imagen/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/__init__.py b/nemo/collections/multimodal/data/imagen/augmentations/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/imagen/augmentations/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py b/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py deleted file mode 100644 index 23f481bc8720..000000000000 --- a/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import List, Optional - -import torch - -from nemo.utils import logging - - -def build_resolution_filter(value=None, method='larger', image_idx=0): - """ - Filter image based on its resolution. - value: filter threshold - method: Either larger or smaller - image_idx: idx of the image in the tuple input - """ - assert method == 'larger' or method == 'smaller' - if method == 'larger': - logging.info(f'Only Selecting images with resolution >= {value}') - return lambda x: x[image_idx].size[0] >= value and x[image_idx].size[1] >= value - - logging.info(f'Only Selecting images with resolution <= {value}') - return lambda x: x[image_idx].size[0] <= value and x[image_idx].size[1] <= value - - -class PickleTransform: - """ - Convert encodings stored in the pickle file to encoding and mask. - Transform the pad and resize the embedding to match the generator config. - """ - - def __init__(self, encoding_lengths: List[int], encoding_keys: List[str], out_keys: Optional[List[str]] = None): - assert len(encoding_keys) == len(encoding_lengths) - self.encoding_lengths = encoding_lengths - self.encoding_keys = encoding_keys - self.out_keys = out_keys if out_keys is not None else encoding_keys - - def _pad_and_resize(self, arr, ntokens): - # Function for padding and resizing a numpy array - - arr = torch.tensor(arr) - embed_dim = arr.shape[1] - - arr_padded = torch.zeros(ntokens, embed_dim, device=arr.device, dtype=torch.float32) - - # If the input text is larger than num_text_tokens, clip it. - if arr.shape[0] > ntokens: - arr = arr[0:ntokens] - - mask = torch.LongTensor(ntokens).zero_() - if len(arr.shape) > 1: - mask[0 : arr.shape[0]] = 1 - - if len(arr.shape) > 1: - arr_padded[0 : arr.shape[0]] = arr - - return arr_padded, mask - - def __call__(self, data): - out_dict = dict() - for token_length, encoding_key, out_key in zip(self.encoding_lengths, self.encoding_keys, self.out_keys): - embed, mask = self._pad_and_resize(data[encoding_key]['encodings'], token_length) - out_dict[f'{out_key}_embeddings'] = embed - out_dict[f'{out_key}_mask'] = mask - return out_dict diff --git a/nemo/collections/multimodal/data/imagen/augmentations/corruption.py b/nemo/collections/multimodal/data/imagen/augmentations/corruption.py deleted file mode 100644 index 6c17066fd285..000000000000 --- a/nemo/collections/multimodal/data/imagen/augmentations/corruption.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torchvision.transforms.functional as torchvision_F - - -class ImagePyramidNoCorruptions: - r""" - Only downsample image without any additional corruption. - """ - - def __init__(self, target_resolutions): - self.resolutions = target_resolutions - - def obtain_image_pyramid(self, image): - # Downsampling - data_dict = dict() - for res in self.resolutions: - image_downsampled = torchvision_F.resize( - image, res, interpolation=torchvision_F.InterpolationMode.BICUBIC, antialias=True - ) - data_dict[f'images_{res}'] = image_downsampled - return data_dict diff --git a/nemo/collections/multimodal/data/imagen/imagen_dataset.py b/nemo/collections/multimodal/data/imagen/imagen_dataset.py deleted file mode 100644 index c3db3b3a4612..000000000000 --- a/nemo/collections/multimodal/data/imagen/imagen_dataset.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.multimodal.data.imagen.augmentations.augmentations import ( - PickleTransform, - build_resolution_filter, -) -from nemo.collections.multimodal.data.imagen.augmentations.corruption import ImagePyramidNoCorruptions -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( - construct_image_augmentations, - identical_transform, -) -from nemo.core.classes import Dataset as NeMoDataset -from nemo.utils import logging - - -class ImagenSyntheticDataset(NeMoDataset): - def __init__( - self, res, conditioning_cfg, fake_len=100000, no_embedding=False, - ): - super().__init__() - self.fake_len = fake_len - self.res = res - self.no_embedding = no_embedding - if not no_embedding: - self.out_key = conditioning_cfg.out_key if conditioning_cfg.out_key else conditioning_cfg.precached_key - self.token_length = conditioning_cfg.token_length - self.embed_dim = conditioning_cfg.embed_dim - - def __getitem__(self, index): - item = {} - if isinstance(self.res, list): - for resolution in self.res: - image_key = f'images_{resolution}' - item[image_key] = torch.randn(3, resolution, resolution) - else: - item['images'] = torch.randn(3, self.res, self.res) - - item['raw_text'] = f'fake text {index}' - if not self.no_embedding: - item[f'{self.out_key}_embeddings'] = torch.randn(self.token_length, self.embed_dim) - item[f'{self.out_key}_mask'] = torch.ones(self.token_length, dtype=torch.long) - return item - - def __len__(self): - return self.fake_len - - -def _build_functions_with_pickles(data_cfg, condition_cfg): - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict['images'] = input[0] - - # Output from pickle transform is already a dictionary - out_dict.update(input[1]) - - out_dict['raw_text'] = input[2] - yield out_dict - - def transform_fn(sample): - image, encodings, text = sample['jpg'], sample['pickle'], sample['txt'] - img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) - pickle_transform = PickleTransform( - encoding_keys=[condition_cfg.precached_key], - encoding_lengths=[condition_cfg.token_length], - out_keys=[condition_cfg.out_key], - ) - text_transform = identical_transform - return img_transform(image), pickle_transform(encodings), text_transform(text) - - return tuple_to_dict, transform_fn - - -def _build_functions_no_pickles(data_cfg): - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict['images'] = input[0] - out_dict['raw_text'] = input[1] - yield out_dict - - def transform_fn(sample): - image, text = sample['jpg'], sample['txt'] - img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) - text_transform = identical_transform - return img_transform(image), text_transform(text) - - return tuple_to_dict, transform_fn - - -def build_train_valid_datasets( - model_cfg, consumed_samples, -): - data_cfg = model_cfg.data - condition_cfg = model_cfg.conditioning - - if data_cfg.get('synthetic_data', False): - logging.info(f'Creating Synthetic Datasaet.') - train_data = ImagenSyntheticDataset( - res=data_cfg.train.get('target_resolutions', 64), - conditioning_cfg=condition_cfg, - fake_len=data_cfg.get('synthetic_data_length', 10000), - no_embedding=condition_cfg.get("online_encoding", False), - ) - return train_data, None - # This function maps data that are tuples to dictionary. - if condition_cfg.get("online_encoding", False): - tuple_to_dict, transform_fn = _build_functions_no_pickles(data_cfg) - else: - tuple_to_dict, transform_fn = _build_functions_with_pickles(data_cfg, condition_cfg) - - filter_cfg = data_cfg.train.get('filterings', None) - - # For adding corruptions and obtaining image pyramid - if model_cfg.unet_type.startswith('sr'): - assert data_cfg.train.get('target_resolutions'), 'SR model requires multiple resolution for training' - logging.info(f'Resizing input images into the follow resolutions: {data_cfg.train.target_resolutions}') - corruption_gen = ImagePyramidNoCorruptions(target_resolutions=data_cfg.train.target_resolutions) - else: - corruption_gen = None - - # This function is used for obtaining image pyramid - # in SR models for Imagen, we need to use low-res image as conditioning. - def obtain_image_pyramid(inp): - for data_dict in inp: - data_pyramid = corruption_gen.obtain_image_pyramid(data_dict['images']) - data_dict.update(data_pyramid) - yield data_dict - - compose_fn = [tuple_to_dict] - if corruption_gen: - compose_fn.append(obtain_image_pyramid) - - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=compose_fn, - filter_fn=build_resolution_filter(**filter_cfg.resolution, image_idx='jpg') if filter_cfg else None, - is_train=True, - ) - return train_data, None diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py deleted file mode 100644 index 37f4518528a9..000000000000 --- a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import json -import math -from pathlib import Path -from typing import Any - -import numpy as np -import torch -import torchvision -from einops import rearrange -from PIL import Image -from torch.utils.data import Dataset - -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import construct_image_augmentations - - -class EditDataset(Dataset): - def __init__( - self, - path: str, - split: str = "train", - splits: tuple[float, float, float] = (0.95, 0.04, 0.01), - min_resize_res: int = 256, - max_resize_res: int = 256, - crop_res: int = 256, - flip_prob: float = 0.0, - ): - assert split in ("train", "val", "test") - assert sum(splits) == 1 - self.path = path - self.min_resize_res = min_resize_res - self.max_resize_res = max_resize_res - self.crop_res = crop_res - self.flip_prob = flip_prob - - with open(Path(self.path, "seeds.json")) as f: - self.seeds = json.load(f) - - split_0, split_1 = { - "train": (0.0, splits[0]), - "val": (splits[0], splits[0] + splits[1]), - "test": (splits[0] + splits[1], 1.0), - }[split] - - idx_0 = math.floor(split_0 * len(self.seeds)) - idx_1 = math.floor(split_1 * len(self.seeds)) - self.seeds = self.seeds[idx_0:idx_1] - - def __len__(self) -> int: - return len(self.seeds) - - def __getitem__(self, i: int) -> dict[str, Any]: - name, seeds = self.seeds[i] - propt_dir = Path(self.path, name) - seed = seeds[torch.randint(0, len(seeds), ()).item()] - with open(propt_dir.joinpath("prompt.json")) as fp: - prompt = json.load(fp)["edit"] - - image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) - image_1 = Image.open(propt_dir.joinpath(f"{seed}_1.jpg")) - - resize_res = torch.randint(self.min_resize_res, self.max_resize_res + 1, ()).item() - image_0 = image_0.resize((resize_res, resize_res), Image.Resampling.LANCZOS) - image_1 = image_1.resize((resize_res, resize_res), Image.Resampling.LANCZOS) - - image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") - image_1 = rearrange(2 * torch.tensor(np.array(image_1)).float() / 255 - 1, "h w c -> c h w") - - crop = torchvision.transforms.RandomCrop(self.crop_res) - flip = torchvision.transforms.RandomHorizontalFlip(float(self.flip_prob)) - image_0, image_1 = flip(crop(torch.cat((image_0, image_1)))).chunk(2) - - return dict(edited=image_1, edit=dict(c_concat=image_0, c_crossattn=prompt)) - - -class EditDatasetEval(Dataset): - def __init__( - self, path: str, split: str = "train", splits: tuple[float, float, float] = (0.9, 0.05, 0.05), res: int = 256, - ): - assert split in ("train", "val", "test") - assert sum(splits) == 1 - self.path = path - self.res = res - - with open(Path(self.path, "seeds.json")) as f: - self.seeds = json.load(f) - - split_0, split_1 = { - "train": (0.0, splits[0]), - "val": (splits[0], splits[0] + splits[1]), - "test": (splits[0] + splits[1], 1.0), - }[split] - - idx_0 = math.floor(split_0 * len(self.seeds)) - idx_1 = math.floor(split_1 * len(self.seeds)) - self.seeds = self.seeds[idx_0:idx_1] - - def __len__(self) -> int: - return len(self.seeds) - - def __getitem__(self, i: int) -> dict[str, Any]: - name, seeds = self.seeds[i] - propt_dir = Path(self.path, name) - seed = seeds[torch.randint(0, len(seeds), ()).item()] - with open(propt_dir.joinpath("prompt.json")) as fp: - prompt = json.load(fp) - edit = prompt["edit"] - input_prompt = prompt["input"] - output_prompt = prompt["output"] - - image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) - - reize_res = torch.randint(self.res, self.res + 1, ()).item() - image_0 = image_0.resize((reize_res, reize_res), Image.Resampling.LANCZOS) - - image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") - - return dict(image_0=image_0, input_prompt=input_prompt, edit=edit, output_prompt=output_prompt) diff --git a/nemo/collections/multimodal/data/kosmos/__init__.py b/nemo/collections/multimodal/data/kosmos/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py b/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py deleted file mode 100644 index 78b736ca36b8..000000000000 --- a/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py +++ /dev/null @@ -1,366 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import io -import json -import re -from functools import partial -from typing import Any, Dict, List, Optional, Union - -import torch -from einops import rearrange -from PIL import Image -from torch.utils.data import Dataset, default_collate - -from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform -from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, - MegatronPretrainingSampler, -) -from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import _create_ltor_masks_and_position_ids -from nemo.collections.vision.data.megatron.image_folder import ImageFolder -from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -MIN_KB = 10 -MAX_NUM_IMAGES = 6 -Image.MAX_IMAGE_PIXELS = 933120000 -_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() -_DATASET_TYPES = ["image_caption", "image_interleaved"] - - -def pil_loader(key, data): - r""" - Function to load an image. - If the image is corrupt, it returns a black image. - Args: - key: Image key. - data: Image data stream. - """ - extension = re.sub(r".*[.]", "", key) - if extension.lower() not in _IMG_EXTENSIONS: - return None - if len(data) // 1000 <= MIN_KB: - return None - - with io.BytesIO(data) as stream: - img = Image.open(stream) - img.load() - img = img.convert("RGB") - - return img - - -def tokenize_and_insert_media_tokens( - texts: Union[str, List[str]], - tokenizer: Any, - context_length: int, - num_media_tokens: int, - add_extra_token: int, - media_start_id: str, - media_end_id: str, -) -> torch.LongTensor: - """ - Returns the tokenized representation of given input string(s) with media tokens inserted. - - Parameters - ---------- - texts : Union[str, List[str]] - An input string or a list of input strings to tokenize. - tokenizer : Any - A tokenizer to be used for tokenization. - context_length : int - The context length to be used for the output tensor. - num_media_tokens : int - The number of media latents to insert between media tokens. - - Returns - ------- - torch.LongTensor - A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. - """ - assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." - - texts_is_str = False - if isinstance(texts, str): - texts = [texts] - texts_is_str = True - - # bos token is never used - # bos_id = tokenizer.bos_id - eos_id = tokenizer.eos_id - - all_tokens = [] - for text in texts: - tokens = tokenizer.text_to_ids(text) - media_positions = [i for i, x in enumerate(tokens) if x == media_start_id] - for media_pos in media_positions[::-1]: - tokens[media_pos : media_pos + 1] = [media_start_id] + [-1] * num_media_tokens + [media_end_id] - tokens = tokens + [eos_id] - all_tokens.append(tokens) - - # truncate and padding - result = torch.zeros(len(all_tokens), context_length + add_extra_token, dtype=torch.long) - - for i, tokens in enumerate(all_tokens): - if len(tokens) > context_length + add_extra_token: - tokens = tokens[: context_length + add_extra_token] # Truncate - result[i, : len(tokens)] = torch.tensor(tokens) - - if texts_is_str: - result = result[0] - return result - - -def get_preprocess_fns( - model_cfg, data_type, tokenizer=None, is_train=True, add_extra_token=1, media_start_id=None, media_end_id=None, -): - assert ( - media_start_id is not None and media_end_id is not None - ), "`media_start_id` and `media_end_id` should be provided." - - # Define transforms - img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) - img_mean = model_cfg.vision.get("img_mean") - img_std = model_cfg.vision.get("img_std") - img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) - - text_transform = lambda x: x - if tokenizer is not None: - text_transform = partial( - tokenize_and_insert_media_tokens, - tokenizer=tokenizer, - context_length=model_cfg.per_type_sequence_length[data_type], - num_media_tokens=model_cfg.num_media_latents, - add_extra_token=add_extra_token, - media_start_id=media_start_id, - media_end_id=media_end_id, - ) - else: - raise ValueError("tokenizer should not be None here!") - - return img_transform, text_transform - - -def transform_fn_for_image_caption(sample, img_transform, text_transform, media_start_token): - image, text = sample["jpg"], sample["txt"] - caption_template = lambda x: f"{media_start_token}{x.strip()}" - text = caption_template(text) - return img_transform(image), text_transform(text) - - -def transform_fn_for_image_interleaved(sample, img_transform, text_transform, media_start_token, sim_threshold=0.3): - info = sample["json"] - sentences = info["text_list"] - - images, sentence_ixs = [], [] - for sample_image in info["image_info"]: - image = sample[sample_image["image_name"]] - # filter to images >= 10KB - if isinstance(image, bytes): - continue - if sample_image["matched_sim"] < sim_threshold: - continue - - images.append(image) - sentence_ixs.append(sample_image["matched_text_index"]) - - if len(images) == 0: - raise ValueError("No images in sample") - - keep_ixs = min(len(images), MAX_NUM_IMAGES) - images = images[:keep_ixs] - sentence_ixs = sentence_ixs[:keep_ixs] - - def interleaved_template(sentences, sentence_ixs): - for ix in sentence_ixs: - sentences[ix] = f"{media_start_token}{sentences[ix]}" - text = " ".join(sentences) - return text - - text = interleaved_template(sentences, sentence_ixs) - images_tensors = torch.stack([img_transform(image) for image in images]) - image_size = images_tensors.shape[1:] - if len(images_tensors) < MAX_NUM_IMAGES: - zero_padding = torch.zeros((MAX_NUM_IMAGES - len(images_tensors), *image_size), dtype=torch.float) - images_tensors = torch.cat((images_tensors, zero_padding), dim=0) - - return images_tensors, text_transform(text) - - -def compose_batch(inp, model_cfg, tokenizer, add_extra_token, media_start_id, media_end_id, newline_id): - pad_id = tokenizer.pad_id - for input in inp: - media = input[0] - - # vision_x should be of shape (b, T_img, F, C, H, W) - if len(media.shape) == 3: # image_caption - media = rearrange(media, "c h w -> 1 1 c h w") - elif len(media.shape) == 4: # image_interleaved - media = rearrange(media, "T c h w -> T 1 c h w") - else: - raise ValueError(f"Media shape length is not expected: {media.shape}.") - - tokens = input[1] - if add_extra_token: - tokens = input[1][:-1].contiguous() - labels = input[1][1:].contiguous().clone().detach() - else: - labels = torch.roll(tokens, shifts=-1, dims=0) - labels[-1] = -1 - - labels[labels == media_start_id] = newline_id - labels[labels == media_end_id] = -1 - labels[labels == pad_id] = -1 - - attention_mask, loss_mask, position_ids = _create_ltor_masks_and_position_ids( - tokens=tokens, - eod_token=tokenizer.eos_id, - eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), - reset_attention_mask=False, - reset_position_ids=False, - ) - - loss_mask[labels == -1] = 0.0 - tokens[tokens == -1] = 0 - labels[labels == -1] = 0 - - yield { - 'tokens': tokens, - 'labels': labels, - 'attention_mask': attention_mask, - 'loss_mask': loss_mask, - 'position_ids': position_ids, - 'media': media, - } - - -def build_train_valid_datasets( - model_cfg, consumed_samples, tokenizer=None, data_type='image_caption', -): - assert data_type in _DATASET_TYPES, f"`data_type={data_type}` is not available: {_DATASET_TYPES}." - - media_start_token = model_cfg.media_start_token - media_end_token = model_cfg.media_end_token - assert ( - media_start_token in tokenizer.vocab and media_end_token in tokenizer.vocab - ), f"Cannot find media tokens in tokenizer vocab: {media_start_token} {media_end_token}" - media_start_id = tokenizer.token_to_id(media_start_token) - media_end_id = tokenizer.token_to_id(media_end_token) - newline_id = tokenizer.text_to_ids("\n")[-1] - - data_cfg = model_cfg.data.get(data_type) - - no_seqlen_plus_one_input_tokens = model_cfg.data.get('no_seqlen_plus_one_input_tokens', False) - add_extra_token = 0 if no_seqlen_plus_one_input_tokens else 1 - - compose_fn = compose_batch - if data_type == 'image_caption': - transform_fn = transform_fn_for_image_caption - elif data_type == 'image_interleaved': - transform_fn = transform_fn_for_image_interleaved - - train_img_transform, text_transform = get_preprocess_fns( - model_cfg, - data_type=data_type, - tokenizer=tokenizer, - is_train=True, - add_extra_token=add_extra_token, - media_start_id=media_start_id, - media_end_id=media_end_id, - ) - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - decode_fn=pil_loader if data_type == 'interleaved' else None, - map_fn=partial( - transform_fn, - img_transform=train_img_transform, - text_transform=text_transform, - media_start_token=media_start_token, - ), - compose_fn=partial( - compose_fn, - model_cfg=model_cfg, - tokenizer=tokenizer, - add_extra_token=add_extra_token, - media_start_id=media_start_id, - media_end_id=media_end_id, - newline_id=newline_id, - ), - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): - val_img_transform, text_transform = get_preprocess_fns( - model_cfg, - data_type=data_type, - tokenizer=tokenizer, - is_train=False, - add_extra_token=add_extra_token, - media_start_id=media_start_id, - media_end_id=media_end_id, - ) - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=0, - decode_fn=pil_loader if data_type == 'interleaved' else None, - map_fn=partial( - transform_fn, - img_transform=train_img_transform, - text_transform=text_transform, - media_start_token=media_start_token, - ), - compose_fn=partial( - compose_fn, - model_cfg=model_cfg, - tokenizer=tokenizer, - add_extra_token=add_extra_token, - media_start_id=media_start_id, - media_end_id=media_end_id, - newline_id=newline_id, - ), - is_train=False, - ) - - return train_data, val_data - - -class MergedKosmosDataLoader: - def __init__(self, dataloaders): - self.dataloaders = dataloaders - self.dataloader_iters = {type: iter(dataloader) for type, dataloader in dataloaders.items()} - self.lengths = {type: len(dataloader) for type, dataloader in dataloaders.items()} - self.min_length = min(self.lengths.values()) - - def __iter__(self): - while True: - try: - batch = {type: next(iter) for type, iter in self.dataloader_iters.items()} - except StopIteration: - return - yield batch - - def __len__(self): - return self.min_length diff --git a/nemo/collections/multimodal/data/nerf/__init__.py b/nemo/collections/multimodal/data/nerf/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/data/nerf/cameras.py b/nemo/collections/multimodal/data/nerf/cameras.py deleted file mode 100644 index c1496b7eeaa3..000000000000 --- a/nemo/collections/multimodal/data/nerf/cameras.py +++ /dev/null @@ -1,178 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List - -import numpy as np -import torch - - -class Camera(ABC): - """ - Abstract base class for Camera models. - """ - - def __init__(self, width: int, height: int, device: torch.device = 'cuda') -> None: - """ - Initializes the Camera instance with given dimensions and device. - - Parameters: - width: int - Width of the camera frame. - height: int - Height of the camera frame. - device: torch.device - The device where tensor computations will be performed. - """ - self.width = width - self.height = height - self.device = device - - @abstractmethod - def compute_intrinsics(self) -> None: - """ - Abstract method to compute camera intrinsics. - """ - pass - - @abstractmethod - def compute_projection_matrix(self) -> None: - """ - Abstract method to compute the projection matrix. - """ - pass - - -class OrthographicCamera(Camera): - """ - Class for Orthographic Camera models. - """ - - def compute_projection_matrix(self) -> torch.Tensor: - """ - Computes the projection matrix for an Orthographic camera. - - Returns: - torch.Tensor: The projection matrix. - """ - projection = torch.tensor( - [[2 / self.width, 0, 0, 0], [0, -2 / self.height, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], - dtype=torch.float32, - device=self.device, - ).unsqueeze(0) - return projection - - -class PinholeCamera(Camera): - """ - Class for Pinhole Camera models. - """ - - def __init__(self, width: int, height: int, near: float, far: float, device: torch.device = 'cuda') -> None: - """ - Initializes the Pinhole Camera instance with given parameters. - - Parameters: - width: int - Width of the camera frame. - height: int - Height of the camera frame. - near: float - Near clipping plane. - far: float - Far clipping plane. - device: torch.device - The device where tensor computations will be performed. - """ - super().__init__(width, height, device) - self.near = near - self.far = far - - def compute_intrinsics(self, fovx: float, fovy: float) -> np.ndarray: - """ - Computes the intrinsic matrix for the camera based on field of views. - - Parameters: - fovx: float - Field of view in X direction. - fovy: float - Field of view in Y direction. - - Returns: - np.ndarray: The intrinsic matrix. - """ - focal_x = self.width / (2 * np.tan(np.deg2rad(fovx) / 2)) - focal_y = self.height / (2 * np.tan(np.deg2rad(fovy) / 2)) - cx, cy = self.width / 2, self.height / 2 - return np.array([focal_x, focal_y, cx, cy]) - - def compute_projection_matrix(self, focal_x: float, focal_y: float) -> torch.Tensor: - """ - Computes the projection matrix for the camera. - - Parameters: - focal_x: float - Focal length in X direction. - focal_y: float - Focal length in Y direction. - - Returns: - torch.Tensor: The projection matrix. - """ - projection = torch.tensor( - [ - [2 * focal_x / self.width, 0, 0, 0], - [0, -2 * focal_y / self.height, 0, 0], - [ - 0, - 0, - -(self.far + self.near) / (self.far - self.near), - -(2 * self.far * self.near) / (self.far - self.near), - ], - [0, 0, -1, 0], - ], - dtype=torch.float32, - device=self.device, - ).unsqueeze(0) - return projection - - -class CubeCamera(Camera): - """ - Class for Cube Camera models, which is essentially six pinhole cameras. - """ - - def __init__( - self, width: int, height: int, near: float = 0.01, far: float = 1000, device: torch.device = 'cuda' - ) -> None: - """ - Initializes the Cube Camera instance with given parameters. - - Parameters: - width: int - Width of each camera face. - height: int - Height of each camera face. - near: float - Near clipping plane. - far: float - Far clipping plane. - device: torch.device - The device where tensor computations will be performed. - """ - self.width = width - self.height = height - self.near = near - self.far = far - self.device = device - - def compute_intrinsics(self) -> List[np.ndarray]: - """ - Computes the intrinsic matrices for the six faces of the cube using a Pinhole camera model. - - Returns: - List[np.ndarray]: List of 6 intrinsic matrices, one for each face. - """ - # Similar to Pinhole but repeated six times for six faces of the cube - return [ - PinholeCamera( - width=self.width, height=self.height, near=self.near, far=self.far, device=self.device - ).compute_intrinsics(90, 90) - for _ in range(6) - ] - - def compute_projection_matrix(self) -> List[torch.Tensor]: - """ - Computes the projection matrices for the six faces of the cube using a Pinhole camera model. - - Returns: - List[torch.Tensor]: List of 6 projection matrices, one for each face. - """ - # Similar to Pinhole but repeated six times for six faces of the cube - return [ - PinholeCamera( - width=self.width, height=self.height, near=self.near, far=self.far, device=self.device - ).compute_projection_matrix(1, 1) - for _ in range(6) - ] diff --git a/nemo/collections/multimodal/data/nerf/circle_poses.py b/nemo/collections/multimodal/data/nerf/circle_poses.py deleted file mode 100644 index 9c833ad94bab..000000000000 --- a/nemo/collections/multimodal/data/nerf/circle_poses.py +++ /dev/null @@ -1,214 +0,0 @@ -from typing import Dict, Union - -import numpy as np -import torch -from torch.utils.data import Dataset - -from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera -from nemo.collections.multimodal.data.nerf.utils import ( - compute_look_at_vectors, - construct_poses, - get_rays, - get_view_direction, -) - - -def circle_poses( - radius: torch.Tensor = torch.tensor([3.2]), - theta: torch.Tensor = torch.tensor([60]), - phi: torch.Tensor = torch.tensor([0]), - angle_overhead: float = 30, - angle_front: float = 60, - return_dirs: bool = False, - device: torch.device = "cuda", -) -> torch.Tensor: - """ - Generate camera poses based on a circular arrangement. - - Parameters: - radius: torch.Tensor - Radii for the camera positions. - theta: torch.Tensor - Theta angles for the camera positions. - phi: torch.Tensor - Phi angles for the camera positions. - angle_overhead: float - Angle range of the overhead view. - angle_front: float - Angle range of the front view. - return_dirs: bool - Whether to return the view directions. - device: str - The device to allocate the tensor on (e.g., 'cuda' or 'cpu'). - - Returns: - Tuple: Contains the following: - - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. - - dirs (torch.Tensor, optional): View directions, if requested. - """ - # Convert degrees to radians for theta and phi - theta = theta / 180 * np.pi - phi = phi / 180 * np.pi - angle_overhead = angle_overhead / 180 * np.pi - angle_front = angle_front / 180 * np.pi - - # Calculate camera centers in Cartesian coordinates - centers = torch.stack( - [ - radius * torch.sin(theta) * torch.sin(phi), - radius * torch.cos(theta), - radius * torch.sin(theta) * torch.cos(phi), - ], - dim=-1, - ) # [B, 3] - - # Compute camera look-at matrix - forward_vector, up_vector, right_vector = compute_look_at_vectors(centers=centers, device=device) - - # Construct the 4x4 pose matrices - poses = construct_poses( - centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device - ) - - dirs = get_view_direction(theta, phi, angle_overhead, angle_front) if return_dirs else None - - return poses, dirs - - -class CirclePosesDataset(Dataset): - """ - A dataset class to generate circle poses. - """ - - def __init__( - self, - size: int = 100, - height: int = 256, - width: int = 256, - default_fovx: float = 20.0, - default_fovy: float = 20.0, - default_radius: float = 3.2, - default_polar: float = 90.0, - default_azimuth: float = 0.0, - angle_overhead: float = 30.0, - angle_front: float = 60.0, - near: float = 0.01, - far: float = 1000.0, - device: torch.device = 'cpu', - ) -> None: - """ - Initializes a new CirclePosesDataset instance. - - Parameters: - size (int): Number of samples in the dataset. - height (int): Height of the image. - width (int): Width of the image. - default_fovx (float): Default field of view in x-direction. - default_fovy (float): Default field of view in y-direction. - default_radius (float): Default radius of the circle. - default_polar (float): Default polar angle. - default_azimuth (float): Default azimuth angle. - angle_overhead (float): Overhead angle. - angle_front (float): Frontal angle. - near (float): Near clipping distance. - far (float): Far clipping distance. - device (torch.device): Device to generate data on. - """ - super().__init__() - self.size = size - self.height = height - self.width = width - - self.default_fovx = default_fovx - self.default_fovy = default_fovy - self.default_radius = default_radius - self.default_polar = default_polar - self.default_azimuth = default_azimuth - - self.angle_overhead = angle_overhead - self.angle_front = angle_front - self.near = near - self.far = far - - self.device = device - - # TODO(ahmadki): make camera type a parameter - self.camera = PinholeCamera( - width=self.width, height=self.height, near=self.near, far=self.far, device=self.device - ) - - def __len__(self) -> int: - """Returns the number of samples in the dataset.""" - return self.size - - def __getitem__(self, idx: int) -> Dict[str, Union[int, torch.Tensor]]: - """Get an item from the dataset. - - Args: - idx (int): Index of the item to retrieve. - - Returns: - dict: Data dictionary containing the following: - - height (int): Height of the image. - - width (int): Width of the image. - - rays_o (torch.Tensor): Ray origins, shape [height, width, 3]. - - rays_d (torch.Tensor): Ray directions, shape [height, width, 3]. - - dir (torch.Tensor): View direction, shape [3]. - - mvp (torch.Tensor): Model-view-projection matrix, shape [4, 4]. - - azimuth (torch.Tensor): Azimuth angle, shape [1]. - """ - # Initialize circle pose parameters - thetas = torch.FloatTensor([self.default_polar]).to(self.device) - phis = torch.FloatTensor([(idx / self.size) * 360]).to(self.device) - radius = torch.FloatTensor([self.default_radius]).to(self.device) - - # Generate circle poses and directions - poses, dirs = circle_poses( - radius=radius, - theta=thetas, - phi=phis, - angle_overhead=self.angle_overhead, - angle_front=self.angle_front, - return_dirs=True, - device=self.device, - ) - - # Compute camera intrinsics - intrinsics = self.camera.compute_intrinsics(fovx=self.default_fovx, fovy=self.default_fovy) - - # Compute projection matrix - projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) - mvp = projection @ torch.inverse(poses) # [1, 4, 4] - - # Sample rays - rays_o, rays_d = get_rays( - poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device - ) - - # Compute azimuth delta - delta_azimuth = phis - self.default_azimuth - delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] - - data = { - 'height': self.height, - 'width': self.width, - 'rays_o': rays_o, - 'rays_d': rays_d, - 'dir': dirs, - 'mvp': mvp, - 'azimuth': delta_azimuth, - } - - return data - - def collate_fn(self, batch: list) -> Dict[str, Union[int, torch.Tensor]]: - """Collate function to combine multiple data points into batches. - - Args: - batch (list): List of data dictionaries. - - Returns: - dict: Collated data. - """ - return { - 'height': self.height, - 'width': self.width, - 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), - 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), - 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), - 'dir': torch.cat([item['dir'] for item in batch], dim=0), - 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), - } diff --git a/nemo/collections/multimodal/data/nerf/random_poses.py b/nemo/collections/multimodal/data/nerf/random_poses.py deleted file mode 100644 index 5dd7121b7063..000000000000 --- a/nemo/collections/multimodal/data/nerf/random_poses.py +++ /dev/null @@ -1,436 +0,0 @@ -import random -from typing import Any, Dict, Iterator, List, Optional, Tuple - -import numpy as np -import torch -import torch.nn.functional as F -from torch.utils.data import IterableDataset - -from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera -from nemo.collections.multimodal.data.nerf.utils import ( - compute_look_at_vectors, - construct_poses, - get_rays, - get_view_direction, -) - - -def linear_normalization(x: float, lower_bound: float, upper_bound: float) -> float: - """ - Linearly normalize a value between lower_bound and upper_bound to a value between 0 and 1. - - Parameters: - x: The value to normalize. - lower_bound: The lower bound of the range of x. - upper_bound: The upper bound of the range of x. - - Returns: - The normalized value between 0 and 1. - """ - return min(1, max(0, (x - lower_bound) / (upper_bound - lower_bound))) - - -def rand_poses( - size: int, - radius_range: List[float] = [1, 1.5], - theta_range: List[float] = [0, 120], - phi_range: List[float] = [0, 360], - angle_overhead: float = 30, - angle_front: float = 60, - uniform_sphere_rate: float = 0.5, - jitter: bool = False, - jitter_center: float = 0.2, - jitter_target: float = 0.2, - jitter_up: float = 0.02, - return_dirs: bool = False, - device: torch.device = "cuda", -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - """ - Generate random poses from an orbit camera. - - Args: - size (int): Number of poses to generate. - radius_range (List[float]): Min and max radii for camera [min, max]. - theta_range (List[float]): Elevation angle range in degrees [min, max]. - phi_range (List[float]): Azimuth angle range in degrees [min, max]. - angle_overhead (float): Overhead angle in degrees. - angle_front (float): Front angle in degrees. - uniform_sphere_rate (float): The probability of sampling from a uniform sphere. - jitter (bool): Whether to add noise to the poses. - jitter_center (float): Noise range for the camera center. - jitter_target (float): Noise range for the camera target. - jitter_up (float): Noise range for the camera up vector. - return_dirs (bool): Whether to return the view directions. - device (torch.device): The device on which to allocate tensors. - - Returns: - Tuple: Contains the following: - - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. - - thetas (torch.Tensor): Elevation angles in degrees, shape [size]. - - phis (torch.Tensor): Azimuth angles in degrees, shape [size]. - - radius (torch.Tensor): Radii of the camera orbits, shape [size]. - - dirs (torch.Tensor, optional): View directions, if requested. - """ - - # Convert angles from degrees to radians - theta_range = np.radians(theta_range) - phi_range = np.radians(phi_range) - angle_overhead = np.radians(angle_overhead) - angle_front = np.radians(angle_front) - - # Generate radius for each pose - radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0] - - # Generate camera center positions - if random.random() < uniform_sphere_rate: - centers, thetas, phis = sample_uniform_sphere(size=size, radius=radius, device=device) - else: - centers, thetas, phis = sample_orbit( - size=size, radius=radius, theta_range=theta_range, phi_range=phi_range, device=device - ) - - # Initialize targets to 0 (assuming 0 is a point in 3D space that cameras are looking at) - targets = torch.zeros_like(centers) - - # Apply jitter - if jitter: - centers += torch.rand_like(centers) * jitter_center - jitter_center / 2.0 - targets = torch.randn_like(centers) * jitter_target - - # Compute camera look-at matrix - forward_vector, up_vector, right_vector = compute_look_at_vectors( - centers=centers - targets, jitter_up=jitter_up if jitter else 0, device=device - ) - - # Construct the 4x4 pose matrices - poses = construct_poses( - centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device - ) - - # Optionally compute view directions - dirs = get_view_direction(thetas, phis, angle_overhead, angle_front) if return_dirs else None - - # Convert back to degrees for thetas and phis - thetas, phis = torch.rad2deg(thetas), torch.rad2deg(phis) - - return poses, thetas, phis, radius, dirs - - -def sample_uniform_sphere( - size: int, radius: torch.Tensor, device: torch.device -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Sample points uniformly on a sphere. - - Args: - size (int): Number of points to sample. - device (torch.device): Device to allocate tensors on. - radius (torch.Tensor): Radii for the points. - - Returns: - Tuple: Contains the following: - - centers (torch.Tensor): The Cartesian coordinates of the sampled points. - - thetas (torch.Tensor): Elevation angles in radians. - - phis (torch.Tensor): Azimuth angles in radians. - """ - # Generate unit vectors - unit_centers = F.normalize( - torch.stack( - [ - torch.randn(size, device=device), - torch.abs(torch.randn(size, device=device)), - torch.randn(size, device=device), - ], - dim=-1, - ), - p=2, - dim=1, - ) - # Generate radii and scale unit vectors - centers = unit_centers * radius.unsqueeze(-1) - # Calculate spherical coordinates - thetas = torch.acos(unit_centers[:, 1]) - phis = torch.atan2(unit_centers[:, 0], unit_centers[:, 2]) - phis[phis < 0] += 2 * np.pi - - return centers, thetas, phis - - -def sample_orbit( - size: int, radius: torch.Tensor, theta_range: np.ndarray, phi_range: np.ndarray, device: torch.device = "cuda" -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Sample points on a spherical orbit. - - Args: - size (int): Number of points to sample. - radius (torch.Tensor): Radii for the points. - theta_range (np.ndarray): Elevation angle range in radians [min, max]. - phi_range (np.ndarray): Azimuth angle range in radians [min, max]. - device (torch.device): Device to allocate tensors on. - - Returns: - Tuple: Contains the following: - - centers (torch.Tensor): The Cartesian coordinates of the sampled points. - - thetas (torch.Tensor): Elevation angles in radians. - - phis (torch.Tensor): Azimuth angles in radians. - """ - thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0] - phis = torch.rand(size, device=device) * (phi_range[1] - phi_range[0]) + phi_range[0] - phis[phis < 0] += 2 * np.pi - - x = radius * torch.sin(thetas) * torch.sin(phis) - y = radius * torch.cos(thetas) - z = radius * torch.sin(thetas) * torch.cos(phis) - - centers = torch.stack([x, y, z], dim=-1) - - return centers, thetas, phis - - -class RandomPosesDataset(IterableDataset): - """ - A dataset class to generate random poses. - """ - - def __init__( - self, - internal_batch_size: int = 100, - height: int = 256, - width: int = 256, - radius_range: Tuple[float, float] = [3.0, 3.5], - theta_range: Tuple[float, float] = [45.0, 105.0], - phi_range: Tuple[float, float] = [-180.0, 180.0], - fovx_range: Tuple[float, float] = [10.0, 30.0], - default_fovx: float = 20.0, - fovy_range: Tuple[float, float] = [10.0, 30.0], - default_fovy: float = 20.0, - default_radius: float = 3.2, - default_polar: float = 90.0, - default_azimuth: float = 0.0, - jitter: bool = False, - jitter_center: float = 0.2, - jitter_target: float = 0.2, - jitter_up: float = 0.02, - angle_overhead: float = 30.0, - angle_front: float = 60.0, - uniform_sphere_rate: float = 0.0, - near: float = 0.01, - far: float = 1000.0, - device: torch.device = 'cpu', - ) -> None: - """ - Initializes a new RandomPosesDataset instance. - - Parameters: - internal_batch_size (int): Number of samples to pre-generate internally. - height (int): Height of the image. - width (int): Width of the image. - radius_range (Tuple[float, float]): Range of generated radii. - theta_range (Tuple[float, float]): Range of generated theta angles. - phi_range (Tuple[float, float]): Range of generated phi angles. - fovx_range (Tuple[float, float]): Range of generated field of view in x-direction. - default_fovx (float): Default field of view in x-direction. - fovy_range (Tuple[float, float]): Range of generated field of view angles in y-direction. - default_fovy (float): Default field of view in y-direction. - default_radius (float): Default radius of the circle. - default_polar (float): Default polar angle. - default_azimuth (float): Default azimuth angle. - jitter (bool): Whether to jitter the poses. - jitter_center (float): Jittering center range. - jitter_target (float): Jittering target range. - jitter_up (float): Jittering up range. - angle_overhead (float): Overhead angle. - angle_front (float): Frontal angle. - uniform_sphere_rate (float): Rate of sampling uniformly on a sphere. - near (float): Near clipping distance. - far (float): Far clipping distance. - device (torch.device): Device to generate data on. - """ - - super().__init__() - self.height = height - self.width = width - self.internal_batch_size = internal_batch_size - - # TODO(ahmadki): expose for models other than dreamfusion - self.progressive_view = False - self.progressive_view_start_step = 0 - self.progressive_view_end_step = 500 - - self.default_fovx = default_fovx - self.default_fovy = default_fovy - self.default_radius = default_radius - self.default_polar = default_polar - self.default_azimuth = default_azimuth - self.same_fov_random = True - - self.radius_range = radius_range - self.theta_range = theta_range - self.phi_range = phi_range - self.fovx_range = fovx_range - self.fovy_range = fovy_range - - self.current_radius_range = radius_range - self.current_theta_range = theta_range - self.current_phi_range = phi_range - self.current_fovx_range = fovx_range - self.current_fovy_range = fovy_range - - self.angle_overhead = angle_overhead - self.angle_front = angle_front - self.uniform_sphere_rate = uniform_sphere_rate - self.jitter = jitter - self.jitter_center = jitter_center - self.jitter_target = jitter_target - self.jitter_up = jitter_up - - self.near = near - self.far = far - - self.device = device - - # TODO(ahmadki): make camera type a parameter - self.camera = PinholeCamera( - width=self.width, height=self.height, near=self.near, far=self.far, device=self.device - ) - - def update_step(self, epoch: int, global_step: int) -> None: - """ - Update the dataset at the beginning of each epoch. - - Parameters: - epoch (int): Current epoch. - global_step (int): Current global step. - - """ - if self.progressive_view: - self.progressive_view_update_step(global_step=global_step) - - def progressive_view_update_step(self, global_step: int) -> None: - """ - progressively relaxing view range - - Parameters: - global_step (int): Current global step. - """ - # TODO(ahmadki): support non-linear progressive_views - r = linear_normalization( - x=global_step, lower_bound=self.progressive_view_start_step, upper_bound=self.progressive_view_end_step - ) - self.current_phi_range = [ - (1 - r) * self.default_azimuth + r * self.phi_range[0], - (1 - r) * self.default_azimuth + r * self.phi_range[1], - ] - self.current_theta_range = [ - (1 - r) * self.default_polar + r * self.theta_range[0], - (1 - r) * self.default_polar + r * self.theta_range[1], - ] - self.current_radius_range = [ - (1 - r) * self.default_radius + r * self.radius_range[0], - (1 - r) * self.default_radius + r * self.radius_range[1], - ] - self.current_fovy_range = [ - (1 - r) * self.default_fovy + r * self.fovy_range[0], - (1 - r) * self.default_fovy + r * self.fovy_range[1], - ] - - def __iter__(self) -> Iterator[Dict[str, torch.Tensor]]: - """ - Returns an iterator over the dataset. - - Returns: - Iterator: An iterator over the dataset. - - """ - while True: - # Generate samples - rays_o, rays_d, dirs, mvp, delta_azimuth = self.generate_samples() - for i in range(self.internal_batch_size): - # Yield one sample at a time from the internal batch - yield { - 'height': self.height, - 'width': self.width, - 'rays_o': rays_o[i].unsqueeze(0), - 'rays_d': rays_d[i].unsqueeze(0), - 'dir': dirs[i].unsqueeze(0), - 'mvp': mvp[i].unsqueeze(0), - 'azimuth': delta_azimuth[i].unsqueeze(0), - } - - def generate_samples(self): - """ - Generate a batch of random poses. - - Returns: - Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: - A tuple containing: - - rays (Dict[str, torch.Tensor]): A dictionary containing the origin and direction of the rays. - - dirs (torch.Tensor): A tensor containing the directions of the rays. - - mvp (torch.Tensor): A tensor containing the model-view-projection matrix. - - azimuth (torch.Tensor): A A tensor containing the azimuth angle. - """ - # Generate random poses and directions - poses, dirs, thetas, phis, radius = rand_poses( - size=self.internal_batch_size, - radius_range=self.current_radius_range, - theta_range=self.current_theta_range, - phi_range=self.current_phi_range, - angle_overhead=self.angle_overhead, - angle_front=self.angle_front, - uniform_sphere_rate=self.uniform_sphere_rate, - jitter=self.jitter, - jitter_center=self.jitter_center, - jitter_target=self.jitter_target, - jitter_up=self.jitter_up, - return_dirs=True, - device=self.device, - ) - - # random focal - if self.same_fov_random: - fovx_random = random.random() - fovy_random = fovx_random - else: - fovx_random = random.random() - fovy_random = random.random() - fovx = fovx_random * (self.current_fovx_range[1] - self.current_fovx_range[0]) + self.current_fovx_range[0] - fovy = fovy_random * (self.current_fovy_range[1] - self.current_fovy_range[0]) + self.current_fovy_range[0] - - # Compute camera intrinsics - intrinsics = self.camera.compute_intrinsics(fovx=fovx, fovy=fovy) - - # Compute projection matrix - projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) - mvp = projection @ torch.inverse(poses) # [internal batch size, 4, 4] - - # Sample rays - rays_o, rays_d = get_rays( - poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device - ) - - # Compute azimuth delta - delta_azimuth = phis - self.default_azimuth - delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] - - return rays_o, rays_d, dirs, mvp, delta_azimuth - - def collate_fn(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]: - """ - Collate function to bundle multiple samples into a single batch. - - Args: - batch (List[Dict]): List of samples to collate. - - Returns: - Dict: A dictionary containing the collated batch. - """ - return { - 'height': self.height, - 'width': self.width, - 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), - 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), - 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), - 'dir': torch.cat([item['dir'] for item in batch], dim=0), - 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), - } diff --git a/nemo/collections/multimodal/data/nerf/utils.py b/nemo/collections/multimodal/data/nerf/utils.py deleted file mode 100644 index 117d0de701fb..000000000000 --- a/nemo/collections/multimodal/data/nerf/utils.py +++ /dev/null @@ -1,204 +0,0 @@ -from typing import Dict, Optional - -import numpy as np -import torch -import torch.nn.functional as F - - -def get_view_direction(thetas: torch.Tensor, phis: torch.Tensor, overhead: float, front: float) -> torch.Tensor: - """ - Get the view direction based on given theta and phi values. - - Parameters: - - thetas (torch.Tensor): Array of theta values with shape [B,] - - phis (torch.Tensor): Array of phi values with shape [B,] - - overhead (float): Threshold for determining top and bottom views. - - front (float): Threshold for determining front, back and side views. - - Returns: - - torch.Tensor: Array of view directions. Values can be: - 0: front - 1: side (camera left) - 2: back - 3: side (camera right) - 4: top - 5: bottom - - Notes: - - Phi and theta values are assumed to be in radians. - """ - - num_samples = thetas.shape[0] - res = torch.zeros(num_samples, dtype=torch.long) - - # Normalize phis values to [0, 2*pi] - phis = phis % (2 * np.pi) - - # Determine direction based on phis - res[(phis < front / 2) | (phis >= 2 * np.pi - front / 2)] = 0 - res[(phis >= front / 2) & (phis < np.pi - front / 2)] = 1 - res[(phis >= np.pi - front / 2) & (phis < np.pi + front / 2)] = 2 - res[(phis >= np.pi + front / 2) & (phis < 2 * np.pi - front / 2)] = 3 - - # Override directions based on thetas for top and bottom views - res[thetas <= overhead] = 4 - res[thetas >= (np.pi - overhead)] = 5 - - return res - - -def compute_look_at_vectors(centers: torch.Tensor, jitter_up: Optional[float] = None, device: torch.device = "cuda"): - """ - Compute the look-at vectors for camera poses. - - Parameters: - centers: The centers of the cameras. - jitter_up: The noise range for the up vector of the camera. - device: Device to allocate the output tensor. - - Returns: - Tuple: Contains the following: - - forward_vector: The forward vectors of the cameras, shape [B, 3]. - - up_vector: The up vectors of the cameras, shape [B, 3]. - - right_vector: The right vectors of the cameras, shape [B, 3]. - """ - forward_vector = F.normalize(centers) - up_vector = torch.FloatTensor([0, 1, 0]).to(device).unsqueeze(0).repeat(len(centers), 1) - right_vector = F.normalize(torch.cross(forward_vector, up_vector, dim=-1)) - up_noise = torch.randn_like(up_vector) * jitter_up if jitter_up is not None else 0 - up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1)) - up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1) + up_noise) - - return forward_vector, up_vector, right_vector - - -def construct_poses( - centers: torch.Tensor, - right_vector: torch.Tensor, - up_vector: torch.Tensor, - forward_vector: torch.Tensor, - device: torch.device, -) -> torch.Tensor: - """ - Construct the 4x4 pose matrices. - - Args: - size (int): Number of pose matrices to construct. - centers (torch.Tensor): The Cartesian coordinates of the camera centers. - right_vector (torch.Tensor): The right vectors of the cameras. - up_vector (torch.Tensor): The up vectors of the cameras. - forward_vector (torch.Tensor): The forward vectors of the cameras. - device (torch.device): Device to allocate tensors on. - - Returns: - torch.Tensor: The pose matrices, shape [size, 4, 4]. - """ - poses = torch.eye(4, dtype=torch.float32, device=device).unsqueeze(0).repeat(len(centers), 1, 1) - poses[:, :3, :3] = torch.stack([right_vector, up_vector, forward_vector], dim=-1) - poses[:, :3, 3] = centers - - return poses - - -@torch.cuda.amp.autocast(enabled=False) -def get_rays( - poses: torch.Tensor, - intrinsics: torch.Tensor, - height: int, - width: int, - num_samples: Optional[int] = None, - error_map: Optional[torch.Tensor] = None, - device: torch.device = "cuda", -) -> Dict[str, torch.Tensor]: - """ - Generates rays from camera poses and intrinsics. - - Args: - poses (torch.Tensor): Camera poses, shape [B, 4, 4] (cam2world). - intrinsics (torch.Tensor): Intrinsic camera parameters [fx, fy, cx, cy]. - height (int): Height of the image. - width (int): Width of the image. - num_samples: Number of rays to sample, default is None for all rays. - error_map: Optional tensor to use for non-uniform sampling of rays. - device (torch.device): Device on which to generate the rays. - - Returns: - Dict[str, torch.Tensor]: A dictionary containing the following keys: - - 'rays_o': Origin of the rays, shape [B, N, 3] - - 'rays_d': Directions of the rays, shape [B, N, 3] - - 'inds': Indices of the rays, shape [B, N] (if N > 0) - - 'inds_coarse': Coarse indices of the rays, shape [B, N] (if error_map is not None) - """ - - batch_size = poses.shape[0] - fx, fy, cx, cy = intrinsics - - i, j = torch.meshgrid( - torch.linspace(0, width - 1, width, device=device), - torch.linspace(0, height - 1, height, device=device), - indexing='ij', - ) - i = i.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 - j = j.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 - - results = {} - - if num_samples is not None: - num_samples = min(num_samples, height * width) - - if error_map is None: - sampled_indices = torch.randint(0, height * width, size=[num_samples], device=device) - sampled_indices = sampled_indices.expand([batch_size, num_samples]) - else: - sampled_indices, sampled_indices_coarse = non_uniform_sampling( - error_map=error_map, num_samples=num_samples, height=height, width=width, device=device - ) - results['sampled_indices_coarse'] = sampled_indices_coarse - - i = torch.gather(i, -1, sampled_indices) - j = torch.gather(j, -1, sampled_indices) - results['sampled_indices'] = sampled_indices - else: - sampled_indices = torch.arange(height * width, device=device).expand([batch_size, height * width]) - - zs = torch.full_like(i, -1.0) - xs = -(i - cx) / fx * zs - ys = (j - cy) / fy * zs - directions = torch.stack((xs, ys, zs), dim=-1) - - rays_d = directions @ poses[:, :3, :3].transpose(-1, -2) - rays_o = poses[..., :3, 3].unsqueeze(-2).expand_as(rays_d) - - rays_o = rays_o.view(-1, height, width, 3) - rays_d = rays_d.view(-1, height, width, 3) - - return rays_o, rays_d - - -def non_uniform_sampling( - error_map: torch.Tensor, batch_size: int, num_samples: int, height: int, width: int, device: torch.device = "cuda" -) -> torch.Tensor: - """ - Perform non-uniform sampling based on the provided error_map. - - Parameters: - error_map: The error map for non-uniform sampling. - batch_size (int): Batch size of the generated samples. - num_samples (int): Number of samples to pick. - height (int): Height of the image. - width (int): Width of the image. - device: Device on which tensors are stored. - - Returns: - A tensor containing the sampled indices. - """ - - sampled_indices_coarse = torch.multinomial(error_map.to(device), num_samples, replacement=False) - inds_x, inds_y = sampled_indices_coarse // 128, sampled_indices_coarse % 128 - sx, sy = height / 128, width / 128 - - inds_x = (inds_x * sx + torch.rand(batch_size, num_samples, device=device) * sx).long().clamp(max=height - 1) - inds_y = (inds_y * sy + torch.rand(batch_size, num_samples, device=device) * sy).long().clamp(max=width - 1) - sampled_indices = inds_x * width + inds_y - - return sampled_indices, sampled_indices_coarse diff --git a/nemo/collections/multimodal/data/neva/__init__.py b/nemo/collections/multimodal/data/neva/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py deleted file mode 100644 index 4d46bf4decf2..000000000000 --- a/nemo/collections/multimodal/data/neva/conversation.py +++ /dev/null @@ -1,406 +0,0 @@ -import dataclasses -import re -from enum import Enum, auto -from typing import List, Tuple - - -class SeparatorStyle(Enum): - """Different separator style.""" - - SINGLE = auto() - TWO = auto() - MPT = auto() - PLAIN = auto() - LLAMA_2 = auto() - NVGPT = auto() - - -@dataclasses.dataclass -class Conversation: - """A class that keeps all conversation history.""" - - system: str - roles: List[str] - messages: List[List[str]] - offset: int - sep_style: SeparatorStyle = SeparatorStyle.SINGLE - sep: str = "###" - sep2: str = None - version: str = "Unknown" - skip_next: bool = False - - def get_prompt(self): - messages = self.messages - if len(messages) > 0 and type(messages[0][1]) is tuple: - messages = self.messages.copy() - init_role, init_msg = messages[0].copy() - init_msg = init_msg[0].replace("", "").strip() - if 'mmtag' in self.version: - messages[0] = (init_role, init_msg) - messages.insert(0, (self.roles[0], "")) - messages.insert(1, (self.roles[1], "Received.")) - else: - messages[0] = (init_role, "\n" + init_msg) - - if self.sep_style == SeparatorStyle.SINGLE: - ret = self.system + self.sep - for role, message in messages: - if message: - if type(message) is tuple: - message, _, _ = message - ret += role + ": " + message + self.sep - else: - ret += role + ":" - elif self.sep_style == SeparatorStyle.TWO: - seps = [self.sep, self.sep2] - ret = self.system + seps[0] - for i, (role, message) in enumerate(messages): - if message: - if type(message) is tuple: - message, _, _ = message - ret += role + ": " + message + seps[i % 2] - else: - ret += role + ":" - elif self.sep_style == SeparatorStyle.MPT: - ret = self.system + self.sep - for role, message in messages: - if message: - if type(message) is tuple: - message, _, _ = message - ret += role + message + self.sep - else: - ret += role - elif self.sep_style == SeparatorStyle.LLAMA_2: - wrap_sys = lambda msg: f"<>\n{msg}\n<>\n\n" - wrap_inst = lambda msg: f"[INST] {msg} [/INST]" - ret = "" - - for i, (role, message) in enumerate(messages): - if i == 0: - assert message, "first message should not be none" - assert role == self.roles[0], "first message should come from user" - if message: - if type(message) is tuple: - message, _, _ = message - if i == 0: - message = wrap_sys(self.system) + message - if i % 2 == 0: - message = wrap_inst(message) - ret += self.sep + message - else: - ret += " " + message + " " + self.sep2 - else: - ret += "" - ret = ret.lstrip(self.sep) - elif self.sep_style == SeparatorStyle.PLAIN: - seps = [self.sep, self.sep2] - ret = self.system - for i, (role, message) in enumerate(messages): - if message: - if type(message) is tuple: - message, _, _ = message - ret += message + seps[i % 2] - else: - ret += "" - elif self.sep_style == SeparatorStyle.NVGPT: - ret = self.sep2 + self.system + self.sep - for role, message in messages: - if message: - if type(message) is tuple: - message, _, _ = message - ret += role + '\n' + message + '\n' + self.sep - else: - ret += role + '\n' - else: - raise ValueError(f"Invalid style: {self.sep_style}") - - return ret - - def append_message(self, role, message): - self.messages.append([role, message]) - - def get_images(self, return_pil=False): - images = [] - for i, (role, msg) in enumerate(self.messages[self.offset :]): - if i % 2 == 0: - if type(msg) is tuple: - import base64 - from io import BytesIO - - from PIL import Image - - msg, image, image_process_mode = msg - if image_process_mode == "Pad": - - def expand2square(pil_img, background_color=(122, 116, 104)): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - image = expand2square(image) - elif image_process_mode == "Crop": - pass - elif image_process_mode == "Resize": - image = image.resize((336, 336)) - else: - raise ValueError(f"Invalid image_process_mode: {image_process_mode}") - max_hw, min_hw = max(image.size), min(image.size) - aspect_ratio = max_hw / min_hw - max_len, min_len = 800, 400 - shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) - longest_edge = int(shortest_edge * aspect_ratio) - W, H = image.size - if H > W: - H, W = longest_edge, shortest_edge - else: - H, W = shortest_edge, longest_edge - image = image.resize((W, H)) - if return_pil: - images.append(image) - else: - buffered = BytesIO() - image.save(buffered, format="JPEG") - img_b64_str = base64.b64encode(buffered.getvalue()).decode() - images.append(img_b64_str) - return images - - def to_gradio_chatbot(self): - ret = [] - for i, (role, msg) in enumerate(self.messages[self.offset :]): - if i % 2 == 0: - if type(msg) is tuple: - import base64 - from io import BytesIO - - msg, image, image_process_mode = msg - max_hw, min_hw = max(image.size), min(image.size) - aspect_ratio = max_hw / min_hw - max_len, min_len = 800, 400 - shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) - longest_edge = int(shortest_edge * aspect_ratio) - W, H = image.size - if H > W: - H, W = longest_edge, shortest_edge - else: - H, W = shortest_edge, longest_edge - image = image.resize((W, H)) - # image = image.resize((224, 224)) - buffered = BytesIO() - image.save(buffered, format="JPEG") - img_b64_str = base64.b64encode(buffered.getvalue()).decode() - img_str = f'user upload image' - msg = msg.replace('', img_str) - ret.append([msg, None]) - else: - ret[-1][-1] = msg - return ret - - def copy(self): - return Conversation( - system=self.system, - roles=self.roles, - messages=[[x, y] for x, y in self.messages], - offset=self.offset, - sep_style=self.sep_style, - sep=self.sep, - sep2=self.sep2, - version=self.version, - ) - - def dict(self): - if len(self.get_images()) > 0: - return { - "system": self.system, - "roles": self.roles, - "messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages], - "offset": self.offset, - "sep": self.sep, - "sep2": self.sep2, - } - return { - "system": self.system, - "roles": self.roles, - "messages": self.messages, - "offset": self.offset, - "sep": self.sep, - "sep2": self.sep2, - } - - -# . . -# NVGPT -# . . - -conv_nvgpt = Conversation( - system="""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n""", - roles=("User", "Assistant"), - version="nvgpt", - messages=(), - offset=0, - sep_style=SeparatorStyle.NVGPT, - sep="", - sep2="System\n", -) - -conv_vicuna_v0 = Conversation( - system="A chat between a curious human and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the human's questions.", - roles=("Human", "Assistant"), - messages=( - ("Human", "What are the key differences between renewable and non-renewable energy sources?"), - ( - "Assistant", - "Renewable energy sources are those that can be replenished naturally in a relatively " - "short amount of time, such as solar, wind, hydro, geothermal, and biomass. " - "Non-renewable energy sources, on the other hand, are finite and will eventually be " - "depleted, such as coal, oil, and natural gas. Here are some key differences between " - "renewable and non-renewable energy sources:\n" - "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable " - "energy sources are finite and will eventually run out.\n" - "2. Environmental impact: Renewable energy sources have a much lower environmental impact " - "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, " - "and other negative effects.\n" - "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically " - "have lower operational costs than non-renewable sources.\n" - "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote " - "locations than non-renewable sources.\n" - "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different " - "situations and needs, while non-renewable sources are more rigid and inflexible.\n" - "6. Sustainability: Renewable energy sources are more sustainable over the long term, while " - "non-renewable sources are not, and their depletion can lead to economic and social instability.\n", - ), - ), - offset=2, - sep_style=SeparatorStyle.SINGLE, - sep="###", -) - -conv_vicuna_v1 = Conversation( - system="A chat between a curious user and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the user's questions.", - roles=("USER", "ASSISTANT"), - version="v1", - messages=(), - offset=0, - sep_style=SeparatorStyle.TWO, - sep=" ", - sep2="", -) - -conv_llama_2 = Conversation( - system="""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. - -If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""", - roles=("USER", "ASSISTANT"), - version="llama_v2", - messages=(), - offset=0, - sep_style=SeparatorStyle.LLAMA_2, - sep="", - sep2="", -) - -conv_llava_llama_2 = Conversation( - system="You are a helpful language and vision assistant. " - "You are able to understand the visual content that the user provides, " - "and assist the user with a variety of tasks using natural language.", - roles=("USER", "ASSISTANT"), - version="llama_v2", - messages=(), - offset=0, - sep_style=SeparatorStyle.LLAMA_2, - sep="", - sep2="", -) - -conv_mpt = Conversation( - system="""<|im_start|>system -A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""", - roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), - version="mpt", - messages=(), - offset=0, - sep_style=SeparatorStyle.MPT, - sep="<|im_end|>", -) - -conv_llava_plain = Conversation( - system="", roles=("", ""), messages=(), offset=0, sep_style=SeparatorStyle.PLAIN, sep="\n", -) - -conv_llava_v0 = Conversation( - system="A chat between a curious human and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the human's questions.", - roles=("Human", "Assistant"), - messages=(("Human", "Hi!"), ("Assistant", "Hi there! How can I help you today?")), - offset=2, - sep_style=SeparatorStyle.SINGLE, - sep="###", -) - -conv_llava_v0_mmtag = Conversation( - system="A chat between a curious user and an artificial intelligence assistant. " - "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." - "The visual content will be provided with the following format: visual content.", - roles=("Human", "Assistant"), - messages=(), - offset=0, - sep_style=SeparatorStyle.SINGLE, - sep="###", - version="v0_mmtag", -) - -conv_llava_v1 = Conversation( - system="A chat between a curious human and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the human's questions.", - roles=("USER", "ASSISTANT"), - version="v1", - messages=(), - offset=0, - sep_style=SeparatorStyle.TWO, - sep=" ", - sep2="", -) - -conv_llava_v1_mmtag = Conversation( - system="A chat between a curious user and an artificial intelligence assistant. " - "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." - "The visual content will be provided with the following format: visual content.", - roles=("USER", "ASSISTANT"), - messages=(), - offset=0, - sep_style=SeparatorStyle.TWO, - sep=" ", - sep2="", - version="v1_mmtag", -) - -default_conversation = conv_vicuna_v0 -conv_templates = { - "default": conv_vicuna_v0, - "v0": conv_vicuna_v0, - "v1": conv_vicuna_v1, - "vicuna_v1": conv_vicuna_v1, - "llama_2": conv_llama_2, - "plain": conv_llava_plain, - "v0_plain": conv_llava_plain, - "llava_v0": conv_llava_v0, - "v0_mmtag": conv_llava_v0_mmtag, - "llava_v1": conv_llava_v1, - "v1_mmtag": conv_llava_v1_mmtag, - "llava_llama_2": conv_llava_llama_2, - "mpt": conv_mpt, - "nvgpt": conv_nvgpt, -} - - -if __name__ == "__main__": - print(default_conversation.get_prompt()) diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py deleted file mode 100644 index dc4a609f8d48..000000000000 --- a/nemo/collections/multimodal/data/neva/neva_dataset.py +++ /dev/null @@ -1,584 +0,0 @@ -import copy -import json -import logging -import os -import pathlib -import re -import tarfile -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Sequence, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F -import transformers -from einops import rearrange -from omegaconf import DictConfig -from PIL import Image -from torch.utils.data import Dataset, default_collate -from transformers import CLIPImageProcessor - -import nemo.collections.multimodal.data.neva.conversation as conversation_lib -from nemo.collections.multimodal.data.kosmos.kosmos_dataset import tokenize_and_insert_media_tokens -from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids - -MAX_NUM_IMAGES = 4 -IGNORE_INDEX = -1 -DEFAULT_PAD_TOKEN = "" -DEFAULT_BOS_TOKEN = "" -DEFAULT_EOS_TOKEN = "" -DEFAULT_UNK_TOKEN = "" -DEFAULT_IMAGE_TOKEN = "" -DEFAULT_SYSTEM_TOKEN = "" -DEFAULT_SEPARATOR_TOKEN = "" -DEFAULT_LABELS_TOKEN = "" -DEFAULT_IMAGE_PATCH_TOKEN = "" -DEFAULT_IM_START_TOKEN = "" -DEFAULT_IM_END_TOKEN = "" - - -class TarOrFolderImageLoader: - def __init__(self, image_folder): - self.image_folder = image_folder - self.tar_index = {} - if self.image_folder.endswith('.tar'): - self.build_index() - - def build_index(self): - with tarfile.open(self.image_folder, 'r') as tar: - for member in tar.getmembers(): - self.tar_index[member.name] = member - - def open_image(self, file_name): - if self.image_folder.endswith('.tar'): - with tarfile.open(self.image_folder, 'r') as tar: - member = self.tar_index.get(file_name) - if member: - f = tar.extractfile(member) - return Image.open(f).convert('RGB') - else: - return Image.open(os.path.join(self.image_folder, file_name)).convert('RGB') - return None - - -def tokenize( - texts: Union[str, List[str]], tokenizer: Any, context_length: int, add_extra_token: int, -) -> torch.LongTensor: - """ - Returns the tokenized representation of given input string(s). If the list of tokens exceeds the context - length plus the number of extra tokens, it gets truncated. If it's smaller, it gets padded with zeros. - - Parameters - ---------- - texts : Union[str, List[str]] - An input string or a list of input strings to tokenize. - tokenizer : Any - A tokenizer to be used for tokenization. - context_length : int - The context length to be used for the output tensor. - add_extra_token : int - Number of extra tokens to add, should be either 0 or 1. - - Returns - ------- - torch.LongTensor - A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length + add_extra_token]. - """ - assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." - - texts_is_str = False - if isinstance(texts, str): - texts = [texts] - texts_is_str = True - tokens = tokenizer.text_to_ids(texts) - max_len = max([len(token) for token in tokens]) - context_length = min(max_len - add_extra_token, context_length) - # truncate and padding - result = torch.zeros(len(tokens), context_length + add_extra_token, dtype=torch.long) - - for i, token in enumerate(tokens): - if len(token) > context_length + add_extra_token: - token = token[: context_length + add_extra_token] # Truncate - result[i, : len(token)] = torch.tensor(token) - if texts_is_str: - result = result[0] - return result - - -def preprocess_multimodal(sources: dict, multimodal_cfg: dict, cur_token_len: int,) -> Dict: - is_multimodal = multimodal_cfg['is_multimodal'] - image_token_len = cur_token_len - if not is_multimodal: - return sources - - for source in sources: - conversation = source['conversations'] - if multimodal_cfg['sep_image_conv_front']: - assert DEFAULT_IMAGE_TOKEN in conversation[0]['value'] - conversation[0]['value'] = conversation[0]['value'].replace(DEFAULT_IMAGE_TOKEN, '').strip() - conversation[0]['value'] = ( - DEFAULT_IMAGE_TOKEN - + conversation_lib.default_conversation.sep - + conversation_lib.default_conversation.roles[0] - + ": " - + conversation[0]['value'] - ) - for turn in conversation: - if multimodal_cfg['use_im_start_end']: - replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len - else: - replace_token = DEFAULT_IMAGE_PATCH_TOKEN * (image_token_len - 2) - replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN - turn["value"] = turn["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) - - return sources - - -def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: - conv = conversation_lib.conv_llava_llama_2.copy() - roles = {"human": conv.roles[0], "gpt": conv.roles[1]} - - # Apply prompt templates - conversations = [] - for i, source in enumerate(sources): - source = source['conversations'] - if roles[source[0]["from"]] != conv.roles[0]: - # Skip the first one if it is not from human - source = source[1:] - - conv.messages = [] - for j, sentence in enumerate(source): - role = roles[sentence["from"]] - assert role == conv.roles[j % 2], f"{i}" - conv.append_message(role, sentence["value"]) - conversations.append(conv.get_prompt()) - - add_extra_token = cfg.get("add_extra_token") - - # Tokenize conversations - tokens = tokenize( - texts=conversations, - tokenizer=tokenizer, - context_length=cfg.get("context_length"), - add_extra_token=add_extra_token, - ) - - # llama tricks - tokens[tokens == 32003] = 0 # DEFAULT_IMAGE_PATCH_TOKEN - tokens[tokens == 32006] = 1 # - tokens[tokens == 32007] = 2 # - labels = tokens.clone().detach() - - # Mask labels - sep = "[/INST] " - for conversation, target in zip(conversations, labels): - rounds = conversation.split(conv.sep2) - cur_len = 0 - for i, rou in enumerate(rounds): - - if rou == "": - break - - parts = rou.split(sep) - if len(parts) != 2: - break - parts[0] += sep - - round_len = len(tokenizer.text_to_ids(rou + conv.sep2)) - if i > 0: - round_len -= 1 # Remove extra token added by sp tokenizer - instruction_len = len(tokenizer.text_to_ids(parts[0])) - 1 - target[cur_len : cur_len + instruction_len] = IGNORE_INDEX - - cur_len += round_len - target[cur_len:] = IGNORE_INDEX - - # Check if masking working correctly - # print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())]) - - if add_extra_token: - tokens = tokens[:, :-1].contiguous() - labels = labels[:, 1:].contiguous() - else: - labels = torch.roll(labels, shifts=-1, dims=-1) - labels[:, -1] = IGNORE_INDEX - - return dict(tokens=tokens, labels=labels,) - - -def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: - conv = conversation_lib.conv_vicuna_v1.copy() - roles = {"human": conv.roles[0], "gpt": conv.roles[1]} - - # Apply prompt templates - conversations = [] - for i, source in enumerate(sources): - source = source['conversations'] - if roles[source[0]["from"]] != conv.roles[0]: - # Skip the first one if it is not from human - source = source[1:] - - conv.messages = [] - for j, sentence in enumerate(source): - role = roles[sentence["from"]] - assert role == conv.roles[j % 2], f"{i}" - conv.append_message(role, sentence["value"]) - conversations.append(conv.get_prompt()) - - # Tokenize conversations - - add_extra_token = cfg.get("add_extra_token") - # Tokenize conversations - tokens = tokenize( - texts=conversations, - tokenizer=tokenizer, - context_length=cfg.get("context_length"), - add_extra_token=add_extra_token, - ) - - labels = tokens.clone().detach() - - # Mask labels - sep = conv.sep + conv.roles[1] + ": " - for conversation, target in zip(conversations, labels): - - rounds = conversation.split(conv.sep2) - cur_len = 1 - target[:cur_len] = IGNORE_INDEX - for i, rou in enumerate(rounds): - if rou == "": - break - - parts = rou.split(sep) - if len(parts) != 2: - break - parts[0] += sep - - round_len = len(tokenizer.text_to_ids(rou)) - instruction_len = len(tokenizer.text_to_ids(parts[0])) - 2 - - target[cur_len : cur_len + instruction_len] = IGNORE_INDEX - - cur_len += round_len - target[cur_len:] = IGNORE_INDEX - - if add_extra_token: - tokens = tokens[:, :-1].contiguous() - labels = labels[:, 1:].contiguous() - else: - labels = torch.roll(labels, shifts=-1, dims=-1) - labels[:, -1] = IGNORE_INDEX - - return dict(tokens=tokens, labels=labels,) - - -def preprocess_nvgpt(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: - """ - Given a record this transform: - 1. Add signal '<>' at the beginning each sentence, with end signal '\n'; - 2. Concatenate conversations together; - 3. Tokenize the concatenated conversation; - 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. - """ - - conv = conversation_lib.conv_nvgpt.copy() - - # Apply prompt templates - conversations = [] - for source in sources: - conv.messages = [] - conv.system = source.get('system', conv.system) - if len(source['conversations']) >= 2: - conv.roles = (source['conversations'][0]['from'], source['conversations'][1]['from']) - - strip_end_for_inference = False - for turn in source['conversations']: - if 'label' in turn: - value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value'] - conv.append_message(turn['from'], value) - if not turn["value"]: - strip_end_for_inference = ( - True # in inference, current turn is empty, thus end tokens need to striped. - ) - else: - conv.append_message(turn['from'], turn['value']) - context = conv.get_prompt() - if strip_end_for_inference: - context = context.rstrip("\n") + "\n" - conversations.append(context) - - add_extra_token = cfg.get("add_extra_token") - # Tokenize conversations - tokens = tokenize( - texts=conversations, - tokenizer=tokenizer, - context_length=cfg.get("context_length"), - add_extra_token=add_extra_token, - ) - - labels = tokens.clone().detach() - - # Mask targets - sep = conv.sep + conv.roles[1] + "\n" - labels_str_regexp = re.compile(f"{DEFAULT_LABELS_TOKEN}quality:.*\n") - for conversation, target in zip(conversations, labels): - rounds = conversation.split(conv.sep) - re_rounds = [conv.sep.join(rounds[:3])] # system + user + gpt - - for conv_idx in range(3, len(rounds), 2): - re_rounds.append(conv.sep.join(rounds[conv_idx : conv_idx + 2])) # user + gpt - - cur_len = 0 - for i, rou in enumerate(re_rounds): - if rou == "": - break - parts = rou.split(sep) - if len(parts) != 2: - break - - # Match the pattern - match = labels_str_regexp.search(parts[1]) - labels_str = match.group() if match else "" - - instruction_len = len(tokenizer.text_to_ids(parts[0] + sep + labels_str)) - round_len = len(tokenizer.text_to_ids(rou + conv.sep)) - target[cur_len : cur_len + instruction_len] = IGNORE_INDEX - - cur_len += round_len - target[cur_len:] = IGNORE_INDEX - - if add_extra_token: - tokens = tokens[:, :-1].contiguous() - labels = labels[:, 1:].contiguous() - else: - labels = torch.roll(labels, shifts=-1, dims=-1) - labels[:, -1] = IGNORE_INDEX - - return dict(tokens=tokens, labels=labels,) - - -class LazySupervisedDataset(Dataset): - """Dataset for supervised fine-tuning.""" - - def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): - super(LazySupervisedDataset, self).__init__() - logging.warning("Loading data...") - if data_path is not None: - logging.warning("Loading data...") - list_data_dict = json.load(open(data_path, "r")) - else: - list_data_dict = [] - - logging.warning("Formatting inputs...Skip in lazy mode") - self.tokenizer = tokenizer - self.list_data_dict = list_data_dict - self.multimodal_cfg = multimodal_cfg - self.conv_template = multimodal_cfg["conv_template"] - self.image_folder = multimodal_cfg['image_folder'] - self.processor = multimodal_cfg["image_processor"] - - self.image_loader = TarOrFolderImageLoader(self.image_folder) - - def __len__(self): - return len(self.list_data_dict) - - def __getitem__(self, i) -> Dict[str, torch.Tensor]: - sources = self.list_data_dict[i] - processor = self.processor - if isinstance(i, int): - sources = [sources] - assert len(sources) == 1, "Don't know why it is wrapped to a list" # FIXME - if 'image' in sources[0]: - if not isinstance(self.list_data_dict[i]['image'], list): - self.list_data_dict[i]['image'] = [self.list_data_dict[i]['image']] - - images = [] - for image_file in self.list_data_dict[i]['image']: - image = self.image_loader.open_image(image_file) - if image is None: - logging.warning(f"Image {image_file} could not be found!") - if self.multimodal_cfg['image_aspect_ratio'] == 'keep': - max_hw, min_hw = max(image.size), min(image.size) - aspect_ratio = max_hw / min_hw - max_len, min_len = 448, 224 - shortest_edge = int(min(max_len / aspect_ratio, min_len)) - image = processor.preprocess( - image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} - )['pixel_values'][0] - elif self.multimodal_cfg['image_aspect_ratio'] == 'pad': - - def expand2square(pil_img, background_color): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) - image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] - else: - image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] - images.append(image) - images_tensors = torch.tensor([]) - if images: - images_tensors = torch.stack(images) - cur_token_len = (images_tensors[0].shape[1] // 14) * ( - images_tensors[0].shape[2] // 14 - ) # FIXME: 14 is hardcoded patch size - sources = preprocess_multimodal(copy.deepcopy(sources), self.multimodal_cfg, cur_token_len) - else: - images_tensors = torch.tensor([]) - sources = copy.deepcopy(sources) - - if self.conv_template == "nvgpt": - data_dict = preprocess_nvgpt(sources, self.tokenizer, self.multimodal_cfg,) - elif self.conv_template == "v1": - data_dict = preprocess_v1(sources, self.tokenizer, self.multimodal_cfg,) - elif self.conv_template == "llama_2": - data_dict = preprocess_llama_2(sources, self.tokenizer, self.multimodal_cfg,) - else: - raise ValueError(f"Conversation template `{self.conv_template}` is not supported in Neva now.") - - if isinstance(i, int): - data_dict = dict(tokens=data_dict["tokens"][0], labels=data_dict["labels"][0]) - - # image exist in the data - if self.multimodal_cfg['is_multimodal']: - crop_size = self.processor.crop_size - # image does not exist in the data, but the model is multimodal - zero_padding = torch.zeros( - (MAX_NUM_IMAGES - len(images_tensors), 3, crop_size['height'], crop_size['width']), dtype=torch.float - ) - images_tensors = torch.cat((images_tensors, zero_padding), dim=0) - data_dict['image'] = images_tensors - return data_dict - - -class NevaDataset(LazySupervisedDataset): - """Dataset for supervised fine-tuning.""" - - def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): - - if data_path.endswith(".json"): - super(NevaDataset, self).__init__(data_path, tokenizer, multimodal_cfg) - - elif data_path.endswith(".jsonl"): - super(NevaDataset, self).__init__(None, tokenizer, multimodal_cfg) - logging.warning("Loading image inputs from SteerLM Dataset") - image_folder = multimodal_cfg['image_folder'] - for line in open(data_path, "r"): - record = json.loads(line) - - # This currently supports only a single image - # search for tag - - record['image'] = [] - for turn in record['conversations']: - matches = re.finditer('', DEFAULT_IMAGE_TOKEN, turn['value']) - - self.list_data_dict.append(record) - - else: - raise ValueError(f"Formatting of {data_path} is not supported in Neva.") - - -@dataclass -class DataCollatorForSupervisedDataset(object): - """Collate examples for supervised fine-tuning.""" - - model_cfg: DictConfig - tokenizer: transformers.PreTrainedTokenizer - - def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: - max_len = max(instance['tokens'].shape[0] for instance in instances) - max_len = (max_len - 1) // 4 * 4 + 4 - for instance in instances: - pad_len = max_len - instance['tokens'].shape[0] - instance['tokens'] = F.pad(instance['tokens'], (0, pad_len), 'constant', 0) - instance['labels'] = F.pad(instance['labels'], (0, pad_len), 'constant', -1) - - batch = default_collate(instances) - tokenizer = self.tokenizer - model_cfg = self.model_cfg - - tokens = batch['tokens'] - labels = batch['labels'] - media = batch.get('image') - - attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids( - data=tokens, - eod_token=tokenizer.eos_id, - eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), - reset_attention_mask=False, - reset_position_ids=False, - ) - - loss_mask[labels == -1] = 0.0 - tokens[tokens == -1] = 0 - labels[labels == -1] = 0 - - if media is None: - raise NotImplementedError - else: - media = rearrange(media, "b T c h w -> b T 1 c h w") - - batch = { - 'tokens': tokens, - 'labels': labels, - 'attention_mask': attention_mask, - 'loss_mask': loss_mask, - 'position_ids': position_ids, - 'media': media, - } - return batch - - -def make_supervised_data_module(tokenizer, model_cfg) -> Dict: - """Make dataset and collator for supervised fine-tuning.""" - data_cfg = model_cfg.data - mm_cfg = model_cfg.mm_cfg - add_extra_token = 1 - if getattr(model_cfg, 'no_seqlen_plus_one_input_tokens', False): - add_extra_token = 0 - if mm_cfg.vision_encoder.from_hf: - image_processor = CLIPImageProcessor.from_pretrained( - mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16 - ) - else: - # TODO(yuya): Fix this hard-code for our own CLIP - image_processor = CLIPImageProcessor.from_pretrained( - "openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16 - ) - train_dataset = NevaDataset( - tokenizer=tokenizer, - data_path=data_cfg.data_path, - multimodal_cfg=dict( - is_multimodal=data_cfg.is_multimodal, - sep_image_conv_front=data_cfg.sep_image_conv_front, - conv_template=data_cfg.get("conv_template", "nvgpt"), - image_token_len=data_cfg.image_token_len, - image_folder=data_cfg.image_folder, - image_aspect_ratio=data_cfg.image_aspect_ratio, - use_im_start_end=getattr(model_cfg.mm_cfg, 'use_im_start_end', False), - image_processor=image_processor, - add_extra_token=add_extra_token, - context_length=model_cfg.encoder_seq_length, - ), - ) - # data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) - return dict(train_dataset=train_dataset, eval_dataset=train_dataset) diff --git a/nemo/collections/multimodal/data/nsfw/__init__.py b/nemo/collections/multimodal/data/nsfw/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/nsfw/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py b/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py deleted file mode 100644 index de2406012fbc..000000000000 --- a/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py +++ /dev/null @@ -1,53 +0,0 @@ -import pathlib -from typing import Callable, List, Optional, Tuple - -import torch -from omegaconf.dictconfig import DictConfig -from PIL import Image - -from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform - - -class DirectoryBasedDataset(torch.utils.data.Dataset): - def __init__(self, path: str, transform: Optional[Callable] = None): - super(DirectoryBasedDataset, self).__init__() - - self._transform = transform - self._samples = self._get_files(path, "nsfw", 1) + self._get_files(path, "safe", 0) - - def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]: - if index >= len(self): - raise IndexError(f"Index {index} ot of bound {len(self)}") - - sample_path, category = self._samples[index] - - image = Image.open(sample_path) - - if self._transform is not None: - image = self._transform(image) - - return image, category - - def __len__(self) -> int: - return len(self._samples) - - def _get_files(self, path: str, subdir: str, category: int) -> List[Tuple[str, int]]: - globpath = pathlib.Path(path) / subdir - return [(x, category) for x in globpath.glob("*.*")] - - -def build_dataset(model_cfg: DictConfig, consumed_samples: int, is_train: bool): - img_fn = image_transform( - (model_cfg.vision.img_h, model_cfg.vision.img_w), - is_train=False, - mean=model_cfg.vision.image_mean, - std=model_cfg.vision.image_std, - resize_longest_max=True, - ) - - if is_train: - path = model_cfg.data.train.dataset_path - else: - path = model_cfg.data.validation.dataset_path - - return DirectoryBasedDataset(path, transform=img_fn) diff --git a/nemo/collections/multimodal/data/stable_diffusion/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py deleted file mode 100644 index eba00f96c0c2..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import torch -import torchvision.transforms as transforms - - -def construct_clip_augmentations(n_px=224): - def _convert_image_to_rgb(image): - return image.convert("RGB") - - return transforms.Compose( - [ - transforms.Resize(n_px, interpolation=transforms.InterpolationMode.BICUBIC), - transforms.CenterCrop(n_px), - _convert_image_to_rgb, - transforms.ToTensor(), - transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), - ] - ) - - -def construct_image_augmentations(augmentation_dict, normalize=True): - train_img_transform = [] - for aug in augmentation_dict: - if aug == 'resize_smallest_side': - img_size = int(augmentation_dict[aug]) - train_img_transform.append( - transforms.Resize(img_size, interpolation=transforms.InterpolationMode.BICUBIC, antialias=True) - ) - - elif aug == 'center_crop_h_w': - img_w, img_h = augmentation_dict[aug].split(',') - img_w = int(img_w) - img_h = int(img_h) - train_img_transform.append(transforms.CenterCrop((img_w, img_h))) - - elif aug == 'random_crop_h_w': - img_w, img_h = augmentation_dict[aug].split(',') - img_w = int(img_w) - img_h = int(img_h) - train_img_transform.append(transforms.RandomCrop((img_w, img_h))) - - elif aug == 'horizontal_flip': - enabled = augmentation_dict[aug] - if enabled: - train_img_transform.append(transforms.RandomHorizontalFlip(p=0.5)) - else: - raise ValueError('Augmentation not supported') - - # Always need to convert data to tensor - train_img_transform.append(transforms.ToTensor()) - if normalize: - train_img_transform.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) - train_img_transform = transforms.Compose(train_img_transform) - return train_img_transform - - -def identical_transform(x): - return x diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py deleted file mode 100644 index 963982e14cb6..000000000000 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - -from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon -from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( - construct_image_augmentations, - identical_transform, -) -from nemo.core.classes import Dataset as NeMoDataset -from nemo.utils import logging - - -class SDSyntheticDataset(NeMoDataset): - def __init__( - self, image_H, image_W, fake_len=100000, image_key='images', txt_key='txt', seq_len=80, context_dim=768 - ): - super().__init__() - self.fake_len = fake_len - self.H = image_H - self.W = image_W - self.image_key = image_key - self.txt_key = txt_key - assert image_key.endswith('encoded') == txt_key.endswith( - 'encoded' - ), 'In precached mode, first and second stage key must both end with "encoded"' - self.precached = self.image_key.endswith('encoded') - self.seq_len = seq_len - self.context_dim = context_dim - - def __getitem__(self, index): - item = {} - if self.precached: - item[self.image_key] = torch.randn(8, self.H // 8, self.W // 8) - item[self.txt_key] = torch.randn(self.seq_len, self.context_dim) - else: - item[self.image_key] = torch.randn(self.H, self.W, 3) - item[self.txt_key] = f'This is meaningless fake text No.{index}' - - return item - - def __len__(self): - return self.fake_len - - -def build_train_valid_datasets( - model_cfg, consumed_samples, -): - data_cfg = model_cfg.data - - def build_resolution_filter(value=None, method='larger'): - assert method == 'larger' or method == 'smaller' - if method == 'larger': - logging.info(f'Only Selecting images with resolution >= {value}') - return lambda x: x['jpg'].size[0] >= value and x['jpg'].size[1] >= value - logging.info(f'Only Selecting images with resolution <= {value}') - return lambda x: x['jpg'].size[0] <= value and x['jpg'].size[1] <= value - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict[model_cfg.first_stage_key] = input[0].permute(1, 2, 0) - out_dict[model_cfg.cond_stage_key] = input[1] - yield out_dict - - def transform_fn(sample): - image, text = sample["jpg"], sample["txt"] - # TODO : If no agumentations just return the image ? - img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) - text_transform = identical_transform - return img_transform(image), text_transform(text) - - if data_cfg.get('synthetic_data', False): - H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') - train_data = SDSyntheticDataset( - int(H), - int(W), - image_key=model_cfg.first_stage_key, - txt_key=model_cfg.cond_stage_key, - context_dim=model_cfg.unet_config.context_dim, - ) - - else: - filter_cfg = data_cfg.train.get('filterings', None) - filter_fn = build_resolution_filter(**filter_cfg.resolution) if filter_cfg else None - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - filter_fn=filter_fn, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - if data_cfg.get('synthetic_data', False): - val_data = SDSyntheticDataset( - int(H), - int(W), - image_key=model_cfg.first_stage_key, - txt_key=model_cfg.cond_stage_key, - context_dim=model_cfg.unet_config.context_dim, - ) - else: - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - filter_fn=filter_fn, - is_train=False, - ) - - return train_data, val_data - - -def build_train_valid_precached_datasets( - model_cfg, consumed_samples, -): - data_cfg = model_cfg.data - - # This function maps data that are tuples to dictionary. - def tuple_to_dict(inp): - for input in inp: - out_dict = dict() - out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) - out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) - yield out_dict - - def transform_fn(sample): - return sample['pickle'] - - if data_cfg.get('synthetic_data', False): - H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') - train_data = SDSyntheticDataset( - int(H), - int(W), - image_key=model_cfg.first_stage_key, - txt_key=model_cfg.cond_stage_key, - context_dim=model_cfg.unet_config.context_dim, - ) - else: - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=True, - ) - - val_data = None - if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): - if data_cfg.get('synthetic_data', False): - H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') - train_data = SDSyntheticDataset( - int(H), - int(W), - image_key=model_cfg.first_stage_key, - txt_key=model_cfg.cond_stage_key, - context_dim=model_cfg.unet_config.context_dim, - ) - else: - val_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=False, - ) - - return train_data, val_data diff --git a/nemo/collections/multimodal/losses/__init__.py b/nemo/collections/multimodal/losses/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/losses/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/losses/clip_loss.py b/nemo/collections/multimodal/losses/clip_loss.py deleted file mode 100644 index 5eb84b020aed..000000000000 --- a/nemo/collections/multimodal/losses/clip_loss.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.distributed.nn -import torch.nn as nn -from torch import distributed as dist -from torch.nn import functional as F - -from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def gather_features( - image_features, text_features, local_loss=False, gather_with_grad=False, -): - data_parallel_world_size = parallel_state.get_data_parallel_world_size() - data_parallel_rank = parallel_state.get_data_parallel_rank() - data_parallel_group = parallel_state.get_data_parallel_group() - - if gather_with_grad: - # TODO (yuya): this is not working in current version of pytorch - # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/loss.py#L48 - all_image_features = torch.cat(torch.distributed.nn.all_gather(image_features), dim=0) - all_text_features = torch.cat(torch.distributed.nn.all_gather(text_features), dim=0) - - else: - gathered_image_features = [torch.zeros_like(image_features) for _ in range(data_parallel_world_size)] - gathered_text_features = [torch.zeros_like(text_features) for _ in range(data_parallel_world_size)] - dist.all_gather(gathered_image_features, image_features, group=data_parallel_group) - dist.all_gather(gathered_text_features, text_features, group=data_parallel_group) - # TODO (yuya): check what's this - if not local_loss: - # ensure grads for local rank when all_* features don't have a gradient - # https://amsword.medium.com/gradient-backpropagation-with-torch-distributed-all-gather-9f3941a381f8 - gathered_image_features[data_parallel_rank] = image_features - gathered_text_features[data_parallel_rank] = text_features - all_image_features = torch.cat(gathered_image_features, dim=0) - all_text_features = torch.cat(gathered_text_features, dim=0) - - return all_image_features, all_text_features - - -class ClipLoss(nn.Module): - def __init__( - self, local_loss=False, gather_with_grad=False, cache_labels=False, - ): - super().__init__() - self.local_loss = local_loss - self.gather_with_grad = gather_with_grad - self.cache_labels = cache_labels - - # cache state - self.prev_num_logits = 0 - self.labels = {} - - self.world_size = parallel_state.get_data_parallel_world_size() - self.rank = parallel_state.get_data_parallel_rank() - - def forward(self, output_tensor): - image_features, text_features, logit_scale = output_tensor - device = image_features.device - if self.world_size > 1: - all_image_features, all_text_features = gather_features( - image_features, text_features, self.local_loss, self.gather_with_grad - ) - - if self.local_loss: - logits_per_image = logit_scale * image_features @ all_text_features.T - logits_per_text = logit_scale * text_features @ all_image_features.T - else: - logits_per_image = logit_scale * all_image_features @ all_text_features.T - logits_per_text = logits_per_image.T - else: - logits_per_image = logit_scale * image_features @ text_features.T - logits_per_text = logit_scale * text_features @ image_features.T - - # calculated ground-truth and cache if enabled - num_logits = logits_per_image.shape[0] - if self.prev_num_logits != num_logits or device not in self.labels: - labels = torch.arange(num_logits, device=device, dtype=torch.long) - if self.world_size > 1 and self.local_loss: - labels = labels + num_logits * self.rank - if self.cache_labels: - self.labels[device] = labels - self.prev_num_logits = num_logits - else: - labels = self.labels[device] - - total_loss = (F.cross_entropy(logits_per_image, labels) + F.cross_entropy(logits_per_text, labels)) / 2 - - # TODO (yuya): this is not necessary; not necessary if global! - reduced_loss = average_losses_across_data_parallel_group([total_loss]) - return total_loss, {"loss": reduced_loss} diff --git a/nemo/collections/multimodal/models/__init__.py b/nemo/collections/multimodal/models/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/__init__.py b/nemo/collections/multimodal/models/clip/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/clip/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py deleted file mode 100644 index e24e95e68af8..000000000000 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ /dev/null @@ -1,1017 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import itertools -from functools import partial -from typing import Any, List, Optional, Union - -import numpy as np -import torch -import torch.nn.functional as F -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.trainer.trainer import Trainer -from tqdm import tqdm - -from nemo.collections.multimodal.data.clip.clip_dataset import ( - build_imagenet_validation_dataloader, - build_train_valid_datasets, - tokenize, -) -from nemo.collections.multimodal.losses.clip_loss import ClipLoss -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ( - ApexGuardDefaults, - average_losses_across_data_parallel_group, - get_all_params_for_weight_decay_optimization, - get_linear_layer, - get_params_for_weight_decay_optimization, - init_method_normal, - parallel_lm_logits, - scaled_init_method_normal, -) -from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank -from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead -from nemo.core.classes.common import PretrainedModelInfo -from nemo.utils import logging - -try: - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -class CLIPVisionTransformer(MegatronModule): - """Vision Transformer Model.""" - - def __init__(self, model_cfg, model_parallel_config, pre_process=True, post_process=True, skip_head=False): - super(CLIPVisionTransformer, self).__init__() - - scaled_init_method = ( - scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) - if model_cfg.use_scaled_init_method - else init_method_normal(model_cfg.init_method_std) - ) - - self.config = model_parallel_config - self.hidden_size = model_cfg.hidden_size - self.global_average_pool = model_cfg.global_average_pool - self.pre_process = pre_process - self.post_process = post_process - self.skip_head = skip_head - - if model_cfg.get("class_token_length") is None or model_cfg.get("class_token_length") <= 0: - class_token = False - else: - class_token = True - self.backbone = VitBackbone( - model_cfg, - model_parallel_config, - init_method=init_method_normal(model_cfg.init_method_std), - scaled_init_method=scaled_init_method, - pre_process=self.pre_process, - post_process=self.post_process, - class_token=class_token, - single_token_output=False, - ) - - if self.post_process and not skip_head: - self.output_dim = model_cfg.output_dim - self.head = torch.nn.Linear(self.hidden_size, self.output_dim, bias=False,) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - self.backbone.set_input_tensor(input_tensor) - - def forward(self, input): - hidden_states = self.backbone(input) - - if self.post_process and not self.skip_head: - if self.global_average_pool: - hidden_states = hidden_states.mean(dim=1) - else: - hidden_states = hidden_states[:, 0] - hidden_states = self.head(hidden_states) - # print("vision_head", hidden_states.shape) - return hidden_states - - -class CLIPTextTransformer(MegatronModule): - """Text Transformer Model.""" - - def __init__(self, model_cfg, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True): - super(CLIPTextTransformer, self).__init__() - - self.config = model_parallel_config - self.pre_process = pre_process - self.post_process = post_process - self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy - self.sequence_parallel = model_cfg.sequence_parallel - self.gradient_accumulation_fusion = model_cfg.gradient_accumulation_fusion - - scaled_init_method = ( - scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) - if model_cfg.use_scaled_init_method - else init_method_normal(model_cfg.init_method_std) - ) - self.language_model, self._language_model_key = get_language_model( - config=model_parallel_config, - vocab_size=padded_vocab_size, - hidden_size=model_cfg.hidden_size, - hidden_dropout=model_cfg.hidden_dropout, - attention_dropout=model_cfg.attention_dropout, - num_tokentypes=0, - max_position_embeddings=model_cfg.max_position_embeddings, - num_layers=model_cfg.num_layers, - num_attention_heads=model_cfg.num_attention_heads, - apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, - kv_channels=model_cfg.kv_channels, - ffn_hidden_size=model_cfg.ffn_hidden_size, - add_pooler=False, - encoder_attn_mask_type=AttnMaskType.causal, - position_embedding_type=model_cfg.get("position_embedding_type", "learned_absolute"), - init_method=init_method_normal(model_cfg.init_method_std), - scaled_init_method=scaled_init_method, - pre_process=self.pre_process, - post_process=self.post_process, - init_method_std=model_cfg.init_method_std, - precision=model_cfg.precision, - fp32_residual_connection=model_cfg.fp32_residual_connection, - activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, - activations_checkpoint_method=model_cfg.activations_checkpoint_method, - activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, - activations_checkpoint_layers_per_pipeline=model_cfg.activations_checkpoint_layers_per_pipeline, - normalization=model_cfg.normalization, - layernorm_epsilon=model_cfg.layernorm_epsilon, - bias_activation_fusion=model_cfg.bias_activation_fusion, - bias_dropout_add_fusion=model_cfg.bias_dropout_add_fusion, - masked_softmax_fusion=model_cfg.masked_softmax_fusion, - persist_layer_norm=model_cfg.persist_layer_norm, - openai_gelu=model_cfg.openai_gelu, - onnx_safe=model_cfg.onnx_safe, - megatron_legacy=model_cfg.megatron_legacy, - transformer_engine=model_cfg.transformer_engine, - fp8=model_cfg.fp8, - fp8_e4m3=model_cfg.fp8_e4m3, - fp8_hybrid=model_cfg.fp8_hybrid, - fp8_margin=model_cfg.fp8_margin, - fp8_interval=model_cfg.fp8_interval, - fp8_amax_history_len=model_cfg.fp8_amax_history_len, - fp8_amax_compute_algo=model_cfg.fp8_amax_compute_algo, - reduce_amax=model_cfg.get('reduce_amax', True), - use_emha=model_cfg.use_emha, - activation=model_cfg.get('activation', 'gelu'), - use_flash_attention=model_cfg.get('flash_attention', False), - ) - - self.initialize_word_embeddings( - init_method=init_method_normal(model_cfg.init_method_std), - vocab_size=padded_vocab_size, - hidden_size=model_cfg.hidden_size, - ) - - # TODO (yuya): check this position id - self.position_ids = None - if self.pre_process: - self.position_ids = torch.arange(model_cfg.max_position_embeddings).expand(1, -1).cuda() - - if self.post_process: - self.output_dim = model_cfg.output_dim - self.head = torch.nn.Linear(model_cfg.hidden_size, self.output_dim, bias=False,) - - self.attn_mask = self.build_attention_mask(model_cfg.max_position_embeddings) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - self.language_model.set_input_tensor(input_tensor) - - def build_attention_mask(self, max_position_embeddings): - # lazily create causal attention mask, with full attention between the tokens - mask = torch.empty(max_position_embeddings, max_position_embeddings, dtype=bool, device='cuda') - mask.fill_(True) - mask.triu_(1) # zero out the lower diagonal - mask = mask.reshape(1, 1, max_position_embeddings, max_position_embeddings) - return mask - - def forward( - self, input_ids, - ): - # input_ids: [b, s] - # position_ids: [b, s] - # attention_mask: [1, 1, s, s] - - hidden_states = self.language_model( - input_ids, - self.position_ids, - self.attn_mask, - token_type_ids=None, - layer_past=None, - get_key_value=False, - encoder_input=None, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - checkpoint_activations_all_layers=None, - ) - - if self.post_process: - # shape = [seq, bsz, hidden] - # take features from the eot embedding (eot_token is the highest number in each sequence) - hidden_states = hidden_states[input_ids.argmax(dim=-1), torch.arange(hidden_states.shape[1])] - return self.head(hidden_states) - - return hidden_states - - -class CLIPModel(MegatronModule): - """CLIP Model""" - - def __init__(self, model_cfg, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True): - super(CLIPModel, self).__init__() - - self.config = model_parallel_config - self.pre_process = pre_process - self.post_process = post_process - self.vision_encoder = CLIPVisionTransformer( - model_cfg.vision, model_parallel_config, pre_process=self.pre_process, post_process=self.post_process, - ) - self.text_encoder = CLIPTextTransformer( - model_cfg.text, - model_parallel_config, - padded_vocab_size, - pre_process=self.pre_process, - post_process=self.post_process, - ) - - self.logit_scale = torch.nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - # TODO (yuya): fix this - pass - - def forward(self, images, captions): - image_features = self.vision_encoder(images) - text_features = self.text_encoder(captions) - - if self.post_process: - return F.normalize(image_features, dim=-1), F.normalize(text_features, dim=-1), self.logit_scale.exp() - - return image_features, text_features - - -class MegatronCLIPModel(MegatronBaseModel): - """Megatron CLIP Model.""" - - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - self.imagenet_val = None - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - - if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): - raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') - - # build_model returns a list of modules which are used for interleaved pipeline parallelism - if isinstance(self.trainer.accelerator, CPUAccelerator): - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - on_cpu=True, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) - else: - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) - - # if we're not using interleaved, then self.model is a module. - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: - self.model = self.model[0] - - if self.megatron_amp_O2: - - if not self.with_distributed_adam: - # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type - if isinstance(self.model, list): - for module in self.model: - module.cuda(torch.cuda.current_device()) - else: - self.model.cuda(torch.cuda.current_device()) - - # Model wrapper to convert both model and inputs to half precision - # TODO (yuya): check this; FP16 Module might not work; when self.model is a list? - if isinstance(self.model, list): - converted_model = [] - for module in self.model: - converted_model.append( - Float16Module(config=self.model_parallel_config, module=module, precision=cfg.precision) - ) - self.model = converted_model - else: - self.model = Float16Module( - config=self.model_parallel_config, module=self.model, precision=cfg.precision - ) - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - self.enable_autocast = ( - True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False - ) - - self.transformer_engine = cfg.get('transformer_engine', False) - - # Convert the global-batch-based profile index to micro-batch index - if hasattr(self, '_nsys_profile_enabled'): - mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) - data_parallel_world_size = trainer.world_size // mp_size - grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) - self._nsys_profile_start_step *= grad_accum_steps - self._nsys_profile_end_step *= grad_accum_steps - self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) - self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process, post_process): - """Model depends on pipeline paralellism.""" - model = CLIPModel( - model_cfg=self.cfg, - model_parallel_config=self.model_parallel_config, - padded_vocab_size=self.padded_vocab_size, - pre_process=pre_process, - post_process=post_process, - ) - return model - - def setup_optimizer_param_groups(self): - """ModelPT override. Optimizer will get self._optimizer_param_groups""" - if self.cfg.get('do_layer_norm_weight_decay', False): - if isinstance(self.model, list): - self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) - else: - self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) - - else: - self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) - - def configure_optimizers(self): - - if self.with_distributed_adam: - - # Disable overlapped grad sync for embedding grad when - # pipeline parallelism is enabled - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if parallel_state.is_pipeline_first_stage(ignore_virtual=True): - if isinstance(self.model, list): - module = self.model[0] # only the first virtual rank has the embeddings - else: - module = self.model - # TODO (yuya): text transformer's embedding needs to be taken care of when PP>1 - # if module.share_token_embeddings: - # param = module.word_embeddings_weight() - # param._disable_greedy_grad_copy = not self.megatron_amp_O2 - # param._disable_overlap_grad_sync = True - if parallel_state.is_pipeline_last_stage(ignore_virtual=True): - if isinstance(self.model, list): - module = self.model[-1] # only the last virtual rank has the embeddings - else: - module = self.model - # if module.share_token_embeddings: - # param = module.word_embeddings_weight() - # param._disable_greedy_grad_copy = not self.megatron_amp_O2 - # param._disable_overlap_grad_sync = True - - # Disable overlapped grad sync for layer norm grads when - # sequence parallelism is enabled - for param in self.parameters(): - if getattr(param, 'sequence_parallel', False): - param._disable_greedy_grad_copy = not self.megatron_amp_O2 - param._disable_overlap_grad_sync = True - - # Initialize parameter buckets for overlapped grad and param syncs - # Note: Params with disabled overlapping are put in the - # last param bucket - buckets = [] - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: - # Initialize a bucket for each virtual pipeline stage - for module in self.model: - if isinstance(module, Float16Module): - module = module.module - stage_bucket = [] - for layer in itertools.chain( - module.vision_encoder.backbone.transformer.layers, - module.text_encoder.language_model.encoder.layers, - ): - stage_bucket.extend( - p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) - ) - buckets.append(stage_bucket) - else: - # Initialize a bucket for each Transformer layer - modules = self.model if isinstance(self.model, list) else [self.model] - for module in modules: - if isinstance(module, Float16Module): - module = module.module - for layer in itertools.chain( - module.vision_encoder.backbone.transformer.layers, - module.text_encoder.language_model.encoder.layers, - ): - buckets.append( - [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] - ) - buckets.reverse() - used_params = set() - for bucket in buckets: - used_params.update(bucket) - buckets[-1].extend(p for p in self.parameters() if p not in used_params) - self.distributed_adam_buckets = buckets - - return super().configure_optimizers() - - def forward(self, image, text): - output_tensor = self.model(image, text) - return output_tensor - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - - # handle asynchronous grad reduction - no_sync_func = None - grad_sync_func = None - param_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - grad_sync_func = self.reduce_overlap_gradients - param_sync_func = self.sync_overlap_parameters - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - module.config.grad_sync_func = grad_sync_func - module.config.param_sync_func = param_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # only the last stages of the pipeline return losses - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_mean = loss_tensor.mean() - else: - # Get the total loss since micro batches sizes are not uniform - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - # we're not on the last pipeline stage so no losses - if forward_only: - loss_mean = [] - else: - loss_mean = torch.tensor(0.0).cuda() - - return loss_mean - - def initialize_ub_func(self): - ub_cfgs = self.cfg.get('ub_tp_comm_overlap_cfg', None) - if ub_cfgs is None: - warnings.warn( - "Couldn't find TP config. Please check the path correctness. Initializing TP comm overlap with the default config." - ) - - input_shape = [ - self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), - self.cfg.get('hidden_size'), - ] - - te_module.base.initialize_ub( - shape=input_shape, - tp_size=self.cfg.get('tensor_model_parallel_size'), - use_fp8=self.cfg.get('fp8'), - ub_cfgs=ub_cfgs, - ) - self.initialize_ub = False - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - # Initialize userbuffer communicators. - if self.initialize_ub: - self.initialize_ub_func() - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() - - if self.with_distributed_adam: - # hack to enable overlapping param sync and forward compute - # note: the distributed optimizer monkey-patches each - # parameter's __getattribute__ function so that it can - # launch parameter all-gathers the first time the - # parameter is accessed after the optimizer step. However, - # PyTorch directly passes embedding parameters into a C++, - # bypassing this process. A quick-and-dirty hack is to - # manually interact with the parameter. - modules = self.model if isinstance(self.model, list) else [self.model] - for module in modules: - if isinstance(module, Float16Module): - module = module.module - module = module.text_encoder.language_model - if hasattr(module, 'embedding'): - for param in module.embedding.parameters(): - param.data_ptr() - - loss_mean = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # synchronize asynchronous grad reductions - # note: not necessary, but reduces performance degradation - # from multiple simultaneous NCCL calls - self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_O2: - # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - self._optimizer.allreduce_main_grads() - else: - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - # TODO (yuya): check if this is needed in text transformer when PP>1 - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: - # # when using pipeline parallelism the first and last stage must keep embeddings in sync - # self.allreduce_first_last_embeddings() - - ## logging - # we can only log on one rank if it is rank zero so we broadcast from last rank - # we can avoid this broadcast by updating the PTL log function to accept specific ranks - torch.distributed.broadcast(loss_mean, get_last_rank()) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - - return loss_mean - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def allreduce_sequence_parallel_gradients(self): - """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. - Modified from megatron-lm: - https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 - """ - - grads = [] - if isinstance(self.model, list): - for module in self.model: - self._append_sequence_parallel_module_grads(module, grads) - else: - self._append_sequence_parallel_module_grads(self.model, grads) - - coalesced = torch._utils._flatten_dense_tensors(grads) - torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) - for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): - buf.copy_(synced) - - def get_forward_output_and_loss_func(self): - loss_func = ClipLoss(local_loss=self.cfg.local_loss, gather_with_grad=self.cfg.gather_with_grad,) - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - if parallel_state.get_pipeline_model_parallel_world_size() == 1: - images = batch["images"].cuda(non_blocking=True) - captions = batch["captions"].cuda(non_blocking=True) - else: - # GPT3 uses only causal mask, which doesn't need attention mask - if parallel_state.is_pipeline_first_stage(): - # Fist pipeline stage needs only the tokens and position_ids - images = batch["images"].cuda(non_blocking=True) - captions = batch["captions"].cuda(non_blocking=True) - else: - # Intermediate / Last pipeline stage doesn't need any inputs - images, captions = None, None - - output_tensor = model(images, captions) - return output_tensor, loss_func - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - def zero_shot_classifier(self): - if self.cfg.get("megatron_amp_O2", False): - text_encoder = self.model.module.text_encoder - else: - text_encoder = self.model.text_encoder - - with torch.no_grad(): - zeroshot_weights = [] - for texts in self.imagenet_val["texts"]: - texts = texts.cuda(non_blocking=True) - # TODO (yuya): distributed not working - with torch.cuda.amp.autocast( - enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, - ): - class_embeddings = text_encoder(texts) - class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) - class_embedding /= class_embedding.norm() - zeroshot_weights.append(class_embedding) - zeroshot_weights = torch.stack(zeroshot_weights, dim=1) - return zeroshot_weights - - def zero_shot_eval(self): - def accuracy(output, target, topk=(1,)): - pred = output.topk(max(topk), 1, True, True)[1].t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] - - logging.info('Starting zero-shot imagenet.') - - logging.info('Building zero-shot classifier') - classifier = self.zero_shot_classifier() - - logging.info('Using classifier') - - if self.cfg.get("megatron_amp_O2", False): - vision_encoder = self.model.module.vision_encoder - else: - vision_encoder = self.model.vision_encoder - with torch.no_grad(): - top1, top5, n = 0.0, 0.0, 0.0 - for images, target in tqdm(self.imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): - if images is None or target is None: - continue - - images = images.cuda(non_blocking=True).to(self.autocast_dtype) - target = target.cuda(non_blocking=True) - # predict - with torch.cuda.amp.autocast( - enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, - ): - image_features = vision_encoder(images) - image_features = F.normalize(image_features, dim=-1) - logits = 100.0 * image_features @ classifier - - # measure accuracy - acc1, acc5 = accuracy(logits, target, topk=(1, 5)) - top1 += acc1 - top5 += acc5 - n += images.size(0) - - logging.info('Finished zero-shot imagenet.') - top1 = top1 / n - top5 = top5 / n - return top1, top5 - - def validation_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - # Initialize userbuffer communicators. - if self.initialize_ub: - self.initialize_ub_func() - - loss = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - self.validation_step_outputs.append(loss) - - return loss - - def on_validation_epoch_end(self): - # TODO (yuya): need fix later, check with Sean - if not self.validation_step_outputs: - return - - # Run zero shot imagenet evaluation - if self.imagenet_val is not None: - imagenet_metric = torch.zeros(2).cuda() - imagenet_metric[0], imagenet_metric[1] = self.zero_shot_eval() - imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) - self.log('imagenet_top1', imagenet_metric[0], prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True, batch_size=1) - - if parallel_state.is_pipeline_last_stage(): - averaged_metrics = torch.tensor( - [torch.stack(self.validation_step_outputs).mean()], dtype=torch.float32, device='cuda' - ) - else: - averaged_metrics = torch.tensor([0.0], dtype=torch.float32, device='cuda') - - # we can only log on one rank if it is rank zero so we broadcast from last rank - torch.distributed.broadcast(averaged_metrics, get_last_rank()) - averaged_loss = averaged_metrics - - self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) - self.validation_step_outputs.clear() # free memory - - return averaged_loss - - def test_step(self, batch, batch_idx): - return self.validation_step(batch, batch_idx) - - def test_epoch_end(self, outputs): - averaged_loss = average_losses_across_data_parallel_group(outputs) - logging.info(f'test_loss: {averaged_loss[0]}') - - def build_train_valid_test_datasets(self): - logging.info('Building datasets for CLIP...') - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - - self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), tokenizer=self.tokenizer, - ) - self._test_ds = None - - if self._train_ds is not None: - logging.info(f'Length of train dataset: {len(self._train_ds)}') - if self._validation_ds is not None: - logging.info(f'Length of val dataset: {len(self._validation_ds)}') - if self._test_ds is not None: - logging.info(f'Length of test dataset: {len(self._test_ds)}') - logging.info(f'Finished building datasets for CLIP.') - - return self._train_ds, self._validation_ds, self._test_ds - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( - # ignore_virtual=True - # ): - # # substract the embedding weights on the last virtual stage - # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) - # num_parameters_on_device -= num_word_embedding_parameters - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( - # ignore_virtual=True - # ): - # # substract the embedding weights on the last stage - # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) - # - # num_parameters_on_device -= num_word_embedding_parameters - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda() - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - - # Batch size need to be provided for webdatset - self._num_micro_batches = get_num_microbatches() - self._micro_batch_size = self.cfg.micro_batch_size - - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - if self.cfg.data.get("imagenet_val") is not None: - self.imagenet_val = build_imagenet_validation_dataloader(self.cfg, self.tokenizer) - - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - # module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - # self.model.sync_initial_word_embeddings() - pass - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = torch.utils.data.DataLoader( - self._train_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=cfg.train.get("drop_last", True), - persistent_workers=True, - ) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - self._validation_dl = torch.utils.data.DataLoader( - self._validation_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=cfg.train.get("drop_last", True), - persistent_workers=True, - ) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, - ) - - def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: - raise NotImplementedError - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: - return None - - def on_save_checkpoint(self, checkpoint) -> None: - """LightningModule hook: - https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint - """ - if isinstance(self.model, list): - for i in range(len(self.model)): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - - def on_load_checkpoint(self, checkpoint) -> None: - """LightningModule hook: - https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint - """ - if isinstance(self.model, list): - for i in range(len(self.model)): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() diff --git a/nemo/collections/multimodal/models/content_filter/__init__.py b/nemo/collections/multimodal/models/content_filter/__init__.py deleted file mode 100644 index 9ff638194e7a..000000000000 --- a/nemo/collections/multimodal/models/content_filter/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py deleted file mode 100644 index d49e2bfafe6b..000000000000 --- a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py +++ /dev/null @@ -1,398 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import itertools -from typing import List, Optional, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F -from apex.transformer.pipeline_parallel.utils import get_num_microbatches -from megatron.core import parallel_state -from megatron.core.pipeline_parallel.schedules import get_forward_backward_func -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.multimodal.data.clip.clip_dataset import tokenize -from nemo.collections.multimodal.data.nsfw.nsfw_dataset import build_dataset -from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPTextTransformer, CLIPVisionTransformer -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.core.classes.common import PretrainedModelInfo -from nemo.utils import logging - - -class ContentFilteringModel(MegatronModule): - """Clip based content filtering model for NSFW.""" - - def __init__(self, model_cfg: DictConfig, model_parallel_config, padded_vocab_size: int, tokenizer: Optional): - super(ContentFilteringModel, self).__init__() - self.cfg = model_cfg - self.config = model_parallel_config - self.tokenizer = tokenizer - - self.concept_list = self._load_concept_list(model_cfg.concepts) - self.concept_count = len(self.concept_list) - - self.vision_encoder = CLIPVisionTransformer( - model_cfg.vision, model_parallel_config, pre_process=True, post_process=True - ) - - if "text" in model_cfg and model_cfg.text is not None: - self.text_encoder = CLIPTextTransformer( - model_cfg.text, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True - ) - else: - self.text_encoder = None - - self.mlp_similarity_model = nn.Sequential( - nn.Linear(model_cfg.output_dim * 2, model_cfg.sim_hidden_dim), - nn.ReLU(), - nn.Linear(model_cfg.sim_hidden_dim, 1), - ) - - self.nn_classifier = nn.Sequential( - nn.Linear(self.concept_count * 2 + model_cfg.output_dim, model_cfg.cls_hidden_dim), - nn.ReLU(), - nn.Linear(model_cfg.cls_hidden_dim, 1), - ) - - self.register_buffer("concepts", torch.zeros(self.concept_count, model_cfg.output_dim)) - - def initialize_concept_embeddings(self, concepts: torch.Tensor): - if self.text_encoder is None: - return - - self.concepts.copy_(concepts.detach()) - del self.text_encoder - self.text_encoder = None - - def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: - """Perform model forward pass for given image and factor. - While inferencing, factors should be equal to default value - """ - - with torch.no_grad(): - embedding = self.vision_encoder(image).detach() - cos_similarity = self.cosine_similarity(embedding, self.concepts) - mlp_similarity = self.mlp_similarity(embedding, self.concepts) - - features = torch.cat([cos_similarity, mlp_similarity * mlp_factor, embedding * emb_factor], dim=-1) - - return self.nn_classifier(features) - - def cosine_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - """Compute cosine similarity between prediction tensor and target tensor - Args: - prediction: Tensor of shape [X, H] for prediction embedding - target: Tensor of shape [Y, H] for target to compare - Returns: - Similarity matrix of shape [X, Y] and value range [-1, 1] - """ - normalized_prediction = F.normalize(prediction) - normalized_target = F.normalize(target) - - return torch.matmul(normalized_prediction, normalized_target.t()) - - def mlp_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - """Compute mlp based similarity between prediction tensor and target tensor - Args: - prediction: Tensor of shape [X, H] for prediction embedding - target: Tensor of shape [Y, H] for target to compare - Returns: - Similarity matrix of shape [X, Y] and value range [-1, 1] - """ - - prediction, target = torch.broadcast_tensors(prediction.unsqueeze(1), target.unsqueeze(0)) - - combined = torch.cat([prediction, target], dim=-1) - - return torch.tanh(self.mlp_similarity_model(combined).squeeze(-1)) - - def set_input_tensor(self, input_tensor: torch.Tensor): - pass - - def _load_concept_list(self, config: Union[str, List[str]]) -> List[str]: - if isinstance(config, str): - config = [config] - - result_list = [] - for concept_file in config: - with open(concept_file, "r") as f: - result_list += [x.strip() for x in f.readlines() if x.strip() != ""] - - return result_list - - -def _get_autocast_dtype(precision: str): - if precision in ["bf16", "bf16-mixed"]: - return torch.bfloat16 - if precision in [32, "32", "32-true"]: - return torch.float - if precision in [16, "16", "16-mixed"]: - return torch.half - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - -class MegatronContentFilteringModel(MegatronBaseModel): - def __init__(self, cfg: DictConfig, trainer: Trainer): - super(MegatronContentFilteringModel, self).__init__(cfg, trainer) - - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - on_cpu=isinstance(self.trainer.accelerator, CPUAccelerator), - virtual_pipeline_model_parallel_size=None, - ) - self.model = self.model[0] - - self.megatron_amp_O2 = cfg.get("megatron_amp_O2", False) - if self.megatron_amp_O2: - if isinstance(self.model, list): - self.model = [ - Float16Module(config=self.model_parallel_config, module=x, precision=cfg.precision) - for x in self.model - ] - else: - self.model = Float16Module( - config=self.model_parallel_config, module=self.model, precision=cfg.precision - ) - - self.autocast_dtype = _get_autocast_dtype(self.trainer.precision) - self.enable_autocast = (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) - - self.init_consumed_samples = 0 - self.mlp_factor = 1.0 - self.emb_factor = 1.0 - - self.validation_metrics = None - - def get_module_list(self): - if isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process, post_process): - return ContentFilteringModel(self.cfg, self.model_parallel_config, self.padded_vocab_size, self.tokenizer) - - def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: - return self.model(image, mlp_factor, emb_factor) - - def get_forward_output_and_loss_func(self, with_accuracy: bool = False): - def loss_fn(prediction: torch.Tensor, target: torch.Tensor): - loss = F.binary_cross_entropy_with_logits(prediction, target) - out_dict = {"loss": loss} - - if with_accuracy: - accuracy_components = torch.stack( - [ - ((prediction > 0) & (target == 1.0)).sum(), # tp - ((prediction < 0) & (target == 0.0)).sum(), # tn - ((prediction > 0) & (target == 0.0)).sum(), # fp - ((prediction < 0) & (target == 1.0)).sum(), # fn - ] - ) - out_dict["accuracy"] = accuracy_components - - return loss, out_dict - - def forward_step(dataloader_iter, model): - images, labels = next(dataloader_iter) - - if ( - parallel_state.get_pipeline_model_parallel_world_size() == 1 - or parallel_state.is_pipeline_first_stage() - ): - images = images.cuda(non_blocking=True) - labels = labels.cuda(non_blocking=True) - else: - images, labels = None, None - - classification = model(images, mlp_factor=self.mlp_factor, emb_factor=self.emb_factor) - - return classification.squeeze(-1), functools.partial(loss_fn, target=labels.float()) - - return forward_step - - def get_forward_embedding_func(self): - def forward_step(dataloader_iter, model): - concepts = next(dataloader_iter) - concepts = tokenize(concepts, self.tokenizer, self.cfg.text.max_position_embeddings) - return (model.text_encoder(concepts.cuda(non_blocking=True)), lambda x: (0.0, {"concepts": x})) - - return forward_step - - def fwd_bwd_step(self, dataloader_iter, batch_idx: int, forward_only: bool): - fwd_bwd_function = get_forward_backward_func() - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(with_accuracy=forward_only), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - metrics = None - if losses_reduced_per_micro_batch: - loss_mean = torch.stack([l["loss"] for l in losses_reduced_per_micro_batch]).mean() - if forward_only: - metrics = torch.stack([l["accuracy"] for l in losses_reduced_per_micro_batch]).sum(dim=0) - else: - loss_mean = 0.0 - - return loss_mean, metrics - - def training_step(self, dataloader_iter, batch_idx): - self._optimizer.zero_grad() - - loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=False) - - if self.megatron_amp_O2: - self._optimizer.allreduce_main_grads() - else: - self.allreduce_gradients() - - torch.distributed.broadcast(loss_mean, get_last_rank()) - if self.cfg.precision == 16: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log("loss_scale", loss_scale, batch_size=1, prog_bar=True) - - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True, batch_size=1, prog_bar=True) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - - return loss_mean - - def validation_step(self, dataloader_iter, batch_idx): - loss, metrics = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=True) - if self.validation_metrics is None: - self.validation_metrics = metrics - else: - self.validation_metrics += metrics - - self.validation_step_outputs.append(loss) - return loss - - def on_validation_epoch_end(self): - torch.distributed.all_reduce(self.validation_metrics, op=torch.distributed.ReduceOp.SUM) - accuracy = (self.validation_metrics[0] + self.validation_metrics[1]) / self.validation_metrics.sum() - self.validation_metrics = None - - averaged_metrics = 0 - if parallel_state.is_pipeline_last_stage(): - averaged_metrics = torch.stack(self.validation_step_outputs).mean() - torch.distributed.broadcast(averaged_metrics, get_last_rank()) - self.log("val_loss", averaged_metrics, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log("accuracy", accuracy, prog_bar=True, rank_zero_only=True, batch_size=1) - - logging.info(f"Current evaluation accuracy: {accuracy}") - - return averaged_metrics - - def test_step(self, dataloader_iter, batch_idx): - return self.validation_step(dataloader_iter, batch_idx) - - def backward(self, *args, **kwargs): - pass - - def optimizer_zero_grad(self, *args, **kwargs): - pass - - def on_fit_start(self): - if self.model.text_encoder is not None: - fwd_bwd_function = get_forward_backward_func() - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_embedding_func(), - data_iterator=iter([self.model.concept_list]), - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=True, - seq_length=None, - micro_batch_size=self.model.concept_count, - ) - - concepts = torch.cat([x["concepts"] for x in losses_reduced_per_micro_batch], dim=0) - self.model.initialize_concept_embeddings(concepts) - self._cfg["text"] = None - - def setup(self, stage): - resume_checkpoint_path = self.trainer.ckpt_path - self.init_consumed_samples = ( - self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) if resume_checkpoint_path else 0 - ) - self.setup_training_data(self.cfg) - self.setup_validation_data(self.cfg) - - def setup_training_data(self, cfg: DictConfig) -> None: - logging.info("Setting up training dataset.") - train_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=True) - - sampler = torch.utils.data.distributed.DistributedSampler( - train_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True - ) - - self._train_dl = torch.utils.data.DataLoader( - train_ds, - sampler=sampler, - batch_size=cfg.micro_batch_size, - num_workers=cfg.data.num_workers, - pin_memory=True, - drop_last=cfg.data.train.get("drop_last", True), - persistent_workers=True, - ) - - def setup_validation_data(self, cfg: DictConfig) -> None: - logging.info("Setting up validation dataset.") - val_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=False) - - sampler = torch.utils.data.distributed.DistributedSampler( - val_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True - ) - - self._validation_dl = torch.utils.data.DataLoader( - val_ds, - sampler=sampler, - batch_size=cfg.micro_batch_size, - num_workers=cfg.data.num_workers, - pin_memory=True, - drop_last=cfg.data.validation.get("drop_last", True), - persistent_workers=True, - ) - - def parameters(self): - return itertools.chain(self.model.mlp_similarity_model.parameters(), self.model.nn_classifier.parameters()) - - def on_load_checkpoint(self, checkpoint) -> None: - if "model.concepts" in checkpoint["state_dict"]: - self.model.text_encoder = None - - @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: - return None diff --git a/nemo/collections/multimodal/models/controlnet/__init__.py b/nemo/collections/multimodal/models/controlnet/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py deleted file mode 100644 index 0b0c7b291d69..000000000000 --- a/nemo/collections/multimodal/models/controlnet/controlnet.py +++ /dev/null @@ -1,1002 +0,0 @@ -from typing import Any, Dict, Optional, Union - -import einops -import torch -import torch.nn as nn -from einops import rearrange, repeat -from omegaconf import DictConfig, OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.utilities.rank_zero import rank_zero_only -from torch._dynamo import optimize -from torch._inductor import config as inductor_config -from torchvision.utils import make_grid - -from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import ( - AttentionBlock, - Downsample, - ResBlock, - TimestepEmbedSequential, - UNetModel, -) -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - conv_nd, - linear, - timestep_embedding, - zero_module, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import exists, log_txt_as_img -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.module import Float16Module -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.utils import logging - -try: - from apex import amp - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -class ControlledUnetModel(UNetModel): - def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs): - hs = [] - with torch.no_grad(): - t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) - emb = self.time_embed(t_emb) - h = x.type(emb.dtype) - for module in self.input_blocks: - h = module(h, emb, context) - hs.append(h) - h = self.middle_block(h, emb, context) - - if control is not None: - h += control.pop() - - for i, module in enumerate(self.output_blocks): - if only_mid_control or control is None: - h = torch.cat([h, hs.pop()], dim=1) - else: - h = torch.cat([h, hs.pop() + control.pop()], dim=1) - h = module(h, emb, context) - - h = h.type(x.dtype) - return self.out(h) - - -class ControlLDM(LatentDiffusion): - def __init__(self, cfg, model_parallel_config): - super().__init__(cfg=cfg, model_parallel_config=model_parallel_config) - self.control_model = ControlLDM.from_config_dict(cfg.control_stage_config) - self.control_key = cfg.control_key - self.only_mid_control = cfg.only_mid_control - self.control_scales = [1.0] * 13 - self.sd_locked = cfg.sd_locked - self.channels_last = cfg.channels_last - - if cfg.get("inductor", False): - # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) - torch._dynamo.config.dynamic_shapes = False - torch._dynamo.config.automatic_dynamic_shapes = False - self.control_model = torch.compile(self.control_model) - - if self.channels_last: - self.control_model = self.control_model.to(memory_format=torch.channels_last) - - @torch.no_grad() - def get_input(self, batch, k, bs=None, *args, **kwargs): - x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) - control = batch[self.control_key] - if bs is not None: - control = control[:bs] - control = control.to(torch.cuda.current_device()) - if self.channels_last: - control = control.permute(0, 3, 1, 2).to(non_blocking=True) - else: - control = einops.rearrange(control, 'b h w c -> b c h w') - control = control.to(memory_format=torch.contiguous_format).float() - return x, dict(c_crossattn=c, c_concat=control) - - def apply_model(self, x_noisy, t, cond, *args, **kwargs): - assert isinstance(cond, dict) - diffusion_model = self.model.diffusion_model - - # cond_txt = torch.cat(cond['c_crossattn'], 1) ## Has removed this first dim in the get_input function, same for below hint input - cond_txt = cond['c_crossattn'] - - if cond['c_concat'] is None: - eps = diffusion_model( - x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control - ) - else: - control = self.control_model(x=x_noisy, hint=cond['c_concat'], timesteps=t, context=cond_txt) - control = [c * scale for c, scale in zip(control, self.control_scales)] - eps = diffusion_model( - x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control - ) - return eps - - @torch.no_grad() - def get_unconditional_conditioning(self, N): - return self.get_learned_conditioning([""] * N) - - @torch.no_grad() - def log_images( - self, - batch, - N=4, - n_row=2, - sample=False, - ddim_steps=50, - ddim_eta=0.0, - return_keys=None, - quantize_denoised=True, - inpaint=True, - plot_denoise_rows=False, - plot_progressive_rows=True, - plot_diffusion_rows=False, - unconditional_guidance_scale=9.0, - unconditional_guidance_label=None, - use_ema_scope=True, - **kwargs, - ): - use_ddim = ddim_steps is not None - - log = dict() - batch = next(batch) - batch['images'] = batch['images'].to(torch.cuda.current_device()) - batch['hint'] = batch['hint'].to(torch.cuda.current_device()) - N = batch['images'].shape[0] - z, c = self.get_input(batch, self.first_stage_key, bs=N) - c_cat, c = c["c_concat"][:N], c["c_crossattn"][:N] - N = min(z.shape[0], N) - n_row = min(z.shape[0], n_row) - log["reconstruction"] = self.decode_first_stage(z) - log["control"] = c_cat * 2.0 - 1.0 - log["conditioning"] = log_txt_as_img((512, 512), batch[self.cond_stage_key], size=16) - - if plot_diffusion_rows: - # get diffusion row - diffusion_row = list() - z_start = z[:n_row] - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.to(self.device).long() - noise = torch.randn_like(z_start) - z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) - diffusion_row.append(self.decode_first_stage(z_noisy)) - - diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W - diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') - diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') - diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) - log["diffusion_row"] = diffusion_grid - - if sample: - # get denoise row - samples, z_denoise_row = self.sample_log( - cond={"c_concat": c_cat, "c_crossattn": c}, - batch_size=N, - ddim=use_ddim, - ddim_steps=ddim_steps, - eta=ddim_eta, - ) - x_samples = self.decode_first_stage(samples) - log["samples"] = x_samples - if plot_denoise_rows: - denoise_grid = self._get_denoise_row_from_list(z_denoise_row) - log["denoise_row"] = denoise_grid - - if unconditional_guidance_scale > 1.0: - uc_cross = self.get_unconditional_conditioning(N) - uc_cat = c_cat # torch.zeros_like(c_cat) - uc_full = {"c_concat": uc_cat, "c_crossattn": uc_cross} - samples_cfg, _ = self.sample_log( - cond={"c_concat": c_cat, "c_crossattn": c}, - batch_size=N, - ddim=use_ddim, - ddim_steps=ddim_steps, - eta=ddim_eta, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=uc_full, - ) - x_samples_cfg = self.decode_first_stage(samples_cfg) - log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg - - return log - - @torch.no_grad() - def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): - ddim_sampler = DDIMSampler(self) - c, h, w = cond["c_concat"][0].shape - shape = (self.channels, h // 8, w // 8) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) - return samples, intermediates - - def parameters(self): - params = list(self.control_model.parameters()) - if not self.sd_locked: - params += list(self.model.diffusion_model.output_blocks.parameters()) - params += list(self.model.diffusion_model.out.parameters()) - return params - - def low_vram_shift(self, is_diffusing): - if is_diffusing: - self.model = self.model.cuda() - self.control_model = self.control_model.cuda() - self.first_stage_model = self.first_stage_model.cpu() - self.cond_stage_model = self.cond_stage_model.cpu() - else: - self.model = self.model.cpu() - self.control_model = self.control_model.cpu() - self.first_stage_model = self.first_stage_model.cuda() - self.cond_stage_model = self.cond_stage_model.cuda() - - -class ControlNet(nn.Module): - def __init__( - self, - image_size, - in_channels, - model_channels, - hint_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - use_checkpoint=False, - use_fp16=False, - num_heads=-1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - use_spatial_transformer=False, # custom transformer support - transformer_depth=1, # custom transformer support - context_dim=None, # custom transformer support - n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model - legacy=True, - disable_self_attentions=None, ###TODO MMY these are new - num_attention_blocks=None, - disable_middle_self_attn=False, - use_linear_in_transformer=False, - use_flash_attention=False, - from_pretrained_unet=None, - from_NeMo=True, - ): - super().__init__() - if use_spatial_transformer: - assert ( - context_dim is not None - ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' - - if context_dim is not None: - assert ( - use_spatial_transformer - ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' - from omegaconf.listconfig import ListConfig - - if type(context_dim) == ListConfig: - context_dim = list(context_dim) - - if num_heads_upsample == -1: - num_heads_upsample = num_heads - - if num_heads == -1: - assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' - - if num_head_channels == -1: - assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' - - self.dims = dims - self.image_size = image_size - self.in_channels = in_channels - self.model_channels = model_channels - if isinstance(num_res_blocks, int): - self.num_res_blocks = len(channel_mult) * [num_res_blocks] - else: - if len(num_res_blocks) != len(channel_mult): - raise ValueError( - "provide num_res_blocks either as an int (globally constant) or " - "as a list/tuple (per-level) with the same length as channel_mult" - ) - self.num_res_blocks = num_res_blocks - if disable_self_attentions is not None: - # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not - assert len(disable_self_attentions) == len(channel_mult) - if num_attention_blocks is not None: - assert len(num_attention_blocks) == len(self.num_res_blocks) - assert all( - map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks))) - ) - print( - f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " - f"This option has LESS priority than attention_resolutions {attention_resolutions}, " - f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " - f"attention will still not be set." - ) - - self.attention_resolutions = attention_resolutions - self.dropout = dropout - self.channel_mult = channel_mult - self.conv_resample = conv_resample - self.use_checkpoint = use_checkpoint - self.dtype = torch.float16 if use_fp16 else torch.float32 - self.num_heads = num_heads - self.num_head_channels = num_head_channels - self.num_heads_upsample = num_heads_upsample - self.predict_codebook_ids = n_embed is not None - - time_embed_dim = model_channels * 4 - self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), - ) - - self.input_blocks = nn.ModuleList( - [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] - ) - self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) - - self.input_hint_block = TimestepEmbedSequential( - conv_nd(dims, hint_channels, 16, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 16, 16, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 16, 32, 3, padding=1, stride=2), - nn.SiLU(), - conv_nd(dims, 32, 32, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 32, 96, 3, padding=1, stride=2), - nn.SiLU(), - conv_nd(dims, 96, 96, 3, padding=1), - nn.SiLU(), - conv_nd(dims, 96, 256, 3, padding=1, stride=2), - nn.SiLU(), - zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)), - ) - - self._feature_size = model_channels - input_block_chans = [model_channels] - ch = model_channels - ds = 1 - for level, mult in enumerate(channel_mult): - for nr in range(self.num_res_blocks[level]): - layers = [ - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=mult * model_channels, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ) - ] - ch = mult * model_channels - if ds in attention_resolutions: - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - if exists(disable_self_attentions): - disabled_sa = disable_self_attentions[level] - else: - disabled_sa = False - - if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: - layers.append( - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - disable_self_attn=disabled_sa, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - ) - ) - self.input_blocks.append(TimestepEmbedSequential(*layers)) - self.zero_convs.append(self.make_zero_conv(ch)) - self._feature_size += ch - input_block_chans.append(ch) - if level != len(channel_mult) - 1: - out_ch = ch - self.input_blocks.append( - TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=out_ch, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - down=True, - ) - if resblock_updown - else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) - ) - ) - ch = out_ch - input_block_chans.append(ch) - self.zero_convs.append(self.make_zero_conv(ch)) - ds *= 2 - self._feature_size += ch - - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - self.middle_block = TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( # always uses a self-attn - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - disable_self_attn=disable_middle_self_attn, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - ), - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - ) - self.middle_block_out = self.make_zero_conv(ch) - self._feature_size += ch - - if from_pretrained_unet is not None: - self.load_from_unet(from_pretrained_unet=from_pretrained_unet, from_NeMo=from_NeMo) - - def load_from_unet(self, from_pretrained_unet, from_NeMo=True): - if not from_NeMo: - print('loading from other source of unet is experimental! Carefully check if keys are loaded correctly.') - else: - print("Loading unet blocks from sd") - - state_dict = torch.load(from_pretrained_unet, map_location='cpu') - state_dict = state_dict['state_dict'] - model_state_dict = self.state_dict() - - re_state_dict = {} - for key_, value_ in state_dict.items(): - if key_.startswith('model.model.diffusion_model'): - re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ - if key_.startswith('model.diffusion_model'): - re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ - if key_.startswith('model.model._orig_mod.diffusion_model'): - re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ - if key_.startswith('model._orig_mod.diffusion_model'): - re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ - - expected_keys = list(model_state_dict.keys()) - loaded_keys = list(re_state_dict.keys()) - missing_keys = list(set(expected_keys) - set(loaded_keys)) - unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - if ( - 'input_blocks.1.0.in_layers.2.weight' in loaded_keys - and 'input_blocks.1.0.in_layers.1.weight' in expected_keys - ): - # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following - for key_ in missing_keys: - if key_.startswith('input_blocks') or key_.startswith('middle_block.'): - s = key_.split('.') - idx = int(s[-2]) - new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) - re_state_dict[key_] = re_state_dict[new_key_] - - loaded_keys = list(re_state_dict.keys()) - missing_keys = list(set(expected_keys) - set(loaded_keys)) - unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - self.load_state_dict(re_state_dict, strict=False) - - if len(missing_keys) > 42: - print( - 'warning: only input hint blocks and zero conv layers are randomly initialized. This message indicates some unet blocks are not loaded correctly.' - ) - print(f'There is {len(missing_keys)} total missing keys') - print("Missing:", missing_keys) - print("Unexpected:", unexpected_keys) - else: - print("sd blocks loaded successfully") - - # Check if unet blocks are loaded - # for key, value in self.state_dict().items(): - # if key in missing_keys: - # continue - # if torch.allclose(value, re_state_dict[key], atol = 1e-5): - # pass - # else: - # print(f"{key} not matching after loading") - - def make_zero_conv(self, channels): - return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) - - def forward(self, x, hint, timesteps, context, **kwargs): - t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) - emb = self.time_embed(t_emb) - - guided_hint = self.input_hint_block(hint, emb, context) - outs = [] - - h = x.type(self.dtype) - for module, zero_conv in zip(self.input_blocks, self.zero_convs): - if guided_hint is not None: - h = module(h, emb, context) - h += guided_hint - guided_hint = None - else: - h = module(h, emb, context) - outs.append(zero_conv(h, emb, context)) - - h = self.middle_block(h, emb, context) - outs.append(self.middle_block_out(h, emb, context)) - - return outs - - -class MegatronControlNet(MegatronBaseModel): - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - - # megatron_amp_O2 is not yet supported in diffusion models - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - - self.model = self.model_provider_func() - - self.conditioning_keys = [] - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process=True, post_process=True): - """Model depends on pipeline paralellism.""" - model = ControlLDM(cfg=self.cfg, model_parallel_config=self.model_parallel_config) - return model - - def forward(self, x, c, *args, **kwargs): - output_tensor = self.model(x, c, *args, **kwargs) - return output_tensor - - @rank_zero_only - @torch.no_grad() - def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): - if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: - assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' - batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) - self.model.on_train_batch_start(batch, batch_idx) - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - tensor_shape = None # Placeholder - - # handle asynchronous grad reduction - no_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # losses_reduced_per_micro_batch is a list of dictionaries - # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps - # only the last stages of the pipeline return losses - loss_dict = {} - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[key] = loss_tensor.mean() - loss_mean = loss_dict["train/loss"] - else: - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - if forward_only: - loss_mean = [] - else: - loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) - - return loss_mean, loss_dict - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - # we zero grads here because we also call backward in the apex fwd/bwd functions - self._optimizer.zero_grad() - - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # gradients are reduced internally in distributed optimizer - pass - elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() - self._optimizer.allreduce_main_grads() - else: - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - if self.cfg.precision == [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - return loss_mean - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def get_forward_output_and_loss_func(self): - def process_batch(batch): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - # noise_map, condition - batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) - if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): - # in the case of precached text embeddings, cond_stage is also a tensor - batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) - - # SD has more dedicated structure for encoding, so we enable autocasting here as well - with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, - ): - x, c = self.model.get_input(batch, self.cfg.first_stage_key) - - if not isinstance(c, dict): - return [x, c] - - if len(self.conditioning_keys) == 0: - self.conditioning_keys = list(c.keys()) - c_list = [c[key] for key in self.conditioning_keys] - return [x, *c_list] - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - batch = process_batch(batch) - batch = [x.cuda(non_blocking=True) for x in batch] - if len(self.conditioning_keys) == 0: - x, c = batch - else: - x = batch[0] - c = {} - for idx, key in enumerate(self.conditioning_keys): - c[key] = batch[1 + idx] - loss, loss_dict = model(x, c) - - def dummy(output_tensor): - return loss, loss_dict - - # output_tensor, and a function to convert output_tensor to loss + loss_dict - return loss, dummy - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - @torch.no_grad() - def validation_step(self, batch, batch_idx): - tensor_shape = None # Placeholder - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=True, - tensor_shape=None, # required by pipeline parallelism - dtype=self.autocast_dtype, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=True, - ) - # only the last stages of the pipeline return losses - val_loss_dict = {} - if losses_reduced_per_micro_batch: - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - val_loss_dict[key] = loss_tensor.mean() - - self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True) - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - - # Batch size need to be provided for webdatset - self._num_micro_batches = get_num_microbatches() - self._micro_batch_size = self.cfg.micro_batch_size - - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - def build_train_valid_test_datasets(self): - logging.info('Building datasets for Stable Diffusion...') - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - - if self.cfg.first_stage_key.endswith("encoded"): - self._train_ds, self._validation_ds = build_train_valid_precached_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), - ) - else: - self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) - ) - self._test_ds = None - - if self._train_ds is not None: - logging.info(f'Length of train dataset: {len(self._train_ds)}') - if self._validation_ds is not None: - logging.info(f'Length of val dataset: {len(self._validation_ds)}') - if self._test_ds is not None: - logging.info(f'Length of test dataset: {len(self._test_ds)}') - logging.info(f'Finished building datasets for LatentDiffusion.') - return self._train_ds, self._validation_ds, self._test_ds - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = torch.utils.data.DataLoader( - self._train_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=True, - persistent_workers=True, - ) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - self._validation_dl = torch.utils.data.DataLoader( - self._validation_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=False, - persistent_workers=True, - ) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, - ) - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls): - return None - - def log_images(self, *args, **kwargs): - return self.model.log_images(*args, **kwargs) - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE b/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE deleted file mode 100644 index c38dc639e6e2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE +++ /dev/null @@ -1,203 +0,0 @@ -Copyright 2022 SenseTime X-Lab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2022 SenseTime X-Lab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py deleted file mode 100644 index a03ce9a4511d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# Uniformer -# From https://github.com/Sense-X/UniFormer -# # Apache-2.0 license - -import os - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import ( - inference_segmentor, - init_segmentor, - show_result_pyplot, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core.evaluation import get_palette - -checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" - - -class UniformerDetector: - def __init__(self): - annotator_ckpts_path = '/opt/NeMo/nemo/collections/multimodal/models/controlnet/uniformer' - modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") - if not os.path.exists(modelpath): - from basicsr.utils.download_util import load_file_from_url - - load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) - config_file = os.path.join( - os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py" - ) - self.model = init_segmentor(config_file, modelpath).cuda() - - def __call__(self, img): - result = inference_segmentor(self.model, img) - res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1) - return res_img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py deleted file mode 100644 index 868ea7214c35..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py +++ /dev/null @@ -1,58 +0,0 @@ -# dataset settings -dataset_type = 'ADE20KDataset' -data_root = 'data/ade/ADEChallengeData2016' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -crop_size = (512, 512) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py deleted file mode 100644 index 4a234cc4de85..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'ChaseDB1Dataset' -data_root = 'data/CHASE_DB1' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_scale = (960, 999) -crop_size = (128, 128) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline, - ), - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py deleted file mode 100644 index e44904a99a8d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py +++ /dev/null @@ -1,50 +0,0 @@ -# dataset settings -dataset_type = 'CityscapesDataset' -data_root = 'data/cityscapes/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -crop_size = (512, 1024) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict( - samples_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='leftImg8bit/train', - ann_dir='gtFine/train', - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline - ), - test=dict( - type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py deleted file mode 100644 index f4a0def57ae7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py +++ /dev/null @@ -1,32 +0,0 @@ -_base_ = './cityscapes.py' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -crop_size = (769, 769) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2049, 1025), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict(train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py deleted file mode 100644 index 51849ec17534..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'DRIVEDataset' -data_root = 'data/DRIVE' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_scale = (584, 565) -crop_size = (64, 64) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline, - ), - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py deleted file mode 100644 index ef920a7e9491..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'HRFDataset' -data_root = 'data/HRF' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_scale = (2336, 3504) -crop_size = (256, 256) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline, - ), - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py deleted file mode 100644 index 9b7a0d335b16..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'PascalContextDataset' -data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - -img_scale = (520, 520) -crop_size = (480, 480) - -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py deleted file mode 100644 index 8e757090c2a2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'PascalContextDataset59' -data_root = 'data/VOCdevkit/VOC2010/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - -img_scale = (520, 520) -crop_size = (480, 480) - -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', reduce_zero_label=True), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/train.txt', - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClassContext', - split='ImageSets/SegmentationContext/val.txt', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py deleted file mode 100644 index 55d49f3b0156..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py +++ /dev/null @@ -1,61 +0,0 @@ -# dataset settings -dataset_type = 'PascalVOCDataset' -data_root = 'data/VOCdevkit/VOC2012' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -crop_size = (512, 512) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 512), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/train.txt', - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/val.txt', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='JPEGImages', - ann_dir='SegmentationClass', - split='ImageSets/Segmentation/val.txt', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py deleted file mode 100644 index 5dfc7c2e640a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +++ /dev/null @@ -1,8 +0,0 @@ -_base_ = './pascal_voc12.py' -# dataset settings -data = dict( - train=dict( - ann_dir=['SegmentationClass', 'SegmentationClassAug'], - split=['ImageSets/Segmentation/train.txt', 'ImageSets/Segmentation/aug.txt'], - ) -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py deleted file mode 100644 index c2e6bbc32e0b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py +++ /dev/null @@ -1,64 +0,0 @@ -# dataset settings -dataset_type = 'STAREDataset' -data_root = 'data/STARE' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_scale = (605, 700) -crop_size = (128, 128) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=img_scale, - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ], - ), -] - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=40000, - dataset=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/training', - ann_dir='annotations/training', - pipeline=train_pipeline, - ), - ), - val=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - data_root=data_root, - img_dir='images/validation', - ann_dir='annotations/validation', - pipeline=test_pipeline, - ), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py deleted file mode 100644 index 42ed60a779ae..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py +++ /dev/null @@ -1,15 +0,0 @@ -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), - # dict(type='TensorboardLoggerHook') - ], -) -# yapf:enable -dist_params = dict(backend='nccl') -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] -cudnn_benchmark = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py deleted file mode 100644 index 74d4d7851a59..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='ANNHead', - in_channels=[1024, 2048], - in_index=[2, 3], - channels=512, - project_channels=256, - query_scales=(1,), - key_pool_scales=(1, 3, 6, 8), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py deleted file mode 100644 index 96ece2073821..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='APCHead', - in_channels=2048, - in_index=3, - channels=512, - pool_scales=(1, 2, 3, 6), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=dict(type='SyncBN', requires_grad=True), - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py deleted file mode 100644 index b949aa80e45e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='CCHead', - in_channels=2048, - in_index=3, - channels=512, - recurrence=2, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py deleted file mode 100644 index 19f45463bbb9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py +++ /dev/null @@ -1,54 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) -model = dict( - type='EncoderDecoder', - backbone=dict( - type='CGNet', - norm_cfg=norm_cfg, - in_channels=3, - num_channels=(32, 64, 128), - num_blocks=(3, 21), - dilations=(2, 4), - reductions=(8, 16), - ), - decode_head=dict( - type='FCNHead', - in_channels=256, - in_index=2, - channels=256, - num_convs=0, - concat_input=False, - dropout_ratio=0, - num_classes=19, - norm_cfg=norm_cfg, - loss_decode=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0, - class_weight=[ - 2.5959933, - 6.7415504, - 3.5354059, - 9.8663225, - 9.690899, - 9.369352, - 10.289121, - 9.953208, - 4.3097677, - 9.490387, - 7.674431, - 9.396905, - 10.347791, - 6.3927646, - 10.226669, - 10.241062, - 10.280587, - 10.396974, - 10.055647, - ], - ), - ), - # model training and testing settings - train_cfg=dict(sampler=None), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py deleted file mode 100644 index 758161a914a5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='DAHead', - in_channels=2048, - in_index=3, - channels=512, - pam_channels=64, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py deleted file mode 100644 index 501b207c0de2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='ASPPHead', - in_channels=2048, - in_index=3, - channels=512, - dilations=(1, 12, 24, 36), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py deleted file mode 100644 index 4f1a8536caf9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +++ /dev/null @@ -1,52 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained=None, - backbone=dict( - type='UNet', - in_channels=3, - base_channels=64, - num_stages=5, - strides=(1, 1, 1, 1, 1), - enc_num_convs=(2, 2, 2, 2, 2), - dec_num_convs=(2, 2, 2, 2), - downsamples=(True, True, True, True), - enc_dilations=(1, 1, 1, 1, 1), - dec_dilations=(1, 1, 1, 1), - with_cp=False, - conv_cfg=None, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - norm_eval=False, - ), - decode_head=dict( - type='ASPPHead', - in_channels=64, - in_index=4, - channels=16, - dilations=(1, 12, 24, 36), - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=128, - in_index=3, - channels=64, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py deleted file mode 100644 index fbf847d8941d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='DepthwiseSeparableASPPHead', - in_channels=2048, - in_index=3, - channels=512, - dilations=(1, 12, 24, 36), - c1_in_channels=256, - c1_channels=48, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py deleted file mode 100644 index 42ab79c4ce82..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='DMHead', - in_channels=2048, - in_index=3, - channels=512, - filter_sizes=(1, 3, 5, 7), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=dict(type='SyncBN', requires_grad=True), - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py deleted file mode 100644 index 5e6656c49b78..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='DNLHead', - in_channels=2048, - in_index=3, - channels=512, - dropout_ratio=0.1, - reduction=2, - use_scale=True, - mode='embedded_gaussian', - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py deleted file mode 100644 index ff8a84c1c491..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py +++ /dev/null @@ -1,49 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='EMAHead', - in_channels=2048, - in_index=3, - channels=256, - ema_channels=512, - num_bases=64, - num_stages=3, - momentum=0.1, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py deleted file mode 100644 index c61fb7d77e35..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py +++ /dev/null @@ -1,49 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='EncHead', - in_channels=[512, 1024, 2048], - in_index=(1, 2, 3), - channels=512, - num_codes=32, - use_se_loss=True, - add_lateral=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py deleted file mode 100644 index d6a4fb7205dc..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py +++ /dev/null @@ -1,59 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) -model = dict( - type='EncoderDecoder', - backbone=dict( - type='FastSCNN', - downsample_dw_channels=(32, 48), - global_in_channels=64, - global_block_channels=(64, 96, 128), - global_block_strides=(2, 2, 1), - global_out_channels=128, - higher_in_channels=64, - lower_in_channels=128, - fusion_out_channels=128, - out_indices=(0, 1, 2), - norm_cfg=norm_cfg, - align_corners=False, - ), - decode_head=dict( - type='DepthwiseSeparableFCNHead', - in_channels=128, - channels=128, - concat_input=False, - num_classes=19, - in_index=-1, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), - ), - auxiliary_head=[ - dict( - type='FCNHead', - in_channels=128, - channels=32, - num_convs=1, - num_classes=19, - in_index=-2, - norm_cfg=norm_cfg, - concat_input=False, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), - ), - dict( - type='FCNHead', - in_channels=64, - channels=32, - num_convs=1, - num_classes=19, - in_index=-3, - norm_cfg=norm_cfg, - concat_input=False, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), - ), - ], - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py deleted file mode 100644 index 0c20335075a9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py +++ /dev/null @@ -1,37 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://msra/hrnetv2_w18', - backbone=dict( - type='HRNet', - norm_cfg=norm_cfg, - norm_eval=False, - extra=dict( - stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), - stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), - stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), - stage4=dict( - num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) - ), - ), - ), - decode_head=dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - channels=sum([18, 36, 72, 144]), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py deleted file mode 100644 index 43364899324f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py +++ /dev/null @@ -1,47 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='FCNHead', - in_channels=2048, - in_index=3, - channels=512, - num_convs=2, - concat_input=True, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py deleted file mode 100644 index ebfd9879787a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py +++ /dev/null @@ -1,53 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained=None, - backbone=dict( - type='UNet', - in_channels=3, - base_channels=64, - num_stages=5, - strides=(1, 1, 1, 1, 1), - enc_num_convs=(2, 2, 2, 2, 2), - dec_num_convs=(2, 2, 2, 2), - downsamples=(True, True, True, True), - enc_dilations=(1, 1, 1, 1, 1), - dec_dilations=(1, 1, 1, 1), - with_cp=False, - conv_cfg=None, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - norm_eval=False, - ), - decode_head=dict( - type='FCNHead', - in_channels=64, - in_index=4, - channels=64, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=128, - in_index=3, - channels=64, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py deleted file mode 100644 index a51398d3a5b0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py +++ /dev/null @@ -1,34 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 1, 1), - strides=(1, 2, 2, 2), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), - decode_head=dict( - type='FPNHead', - in_channels=[256, 256, 256, 256], - in_index=[0, 1, 2, 3], - feature_strides=[4, 8, 16, 32], - channels=128, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py deleted file mode 100644 index f81960a35c2d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py +++ /dev/null @@ -1,32 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - mlp_ratio=4.0, - qkv_bias=True, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.1, - ), - neck=dict(type='FPN', in_channels=[64, 128, 320, 512], out_channels=256, num_outs=4), - decode_head=dict( - type='FPNHead', - in_channels=[256, 256, 256, 256], - in_index=[0, 1, 2, 3], - feature_strides=[4, 8, 16, 32], - channels=128, - dropout_ratio=0.1, - num_classes=150, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py deleted file mode 100644 index c1d4477e0250..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='GCHead', - in_channels=2048, - in_index=3, - channels=512, - ratio=1 / 4.0, - pooling_type='att', - fusion_types=('channel_add',), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py deleted file mode 100644 index 2b2fa51f8d01..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py +++ /dev/null @@ -1,22 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) -model = dict( - type='EncoderDecoder', - backbone=dict(type='MobileNetV3', arch='large', out_indices=(1, 3, 16), norm_cfg=norm_cfg), - decode_head=dict( - type='LRASPPHead', - in_channels=(16, 24, 960), - in_index=(0, 1, 2), - channels=128, - input_transform='multiple_select', - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU'), - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py deleted file mode 100644 index 7477ac076da2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='NLHead', - in_channels=2048, - in_index=3, - channels=512, - dropout_ratio=0.1, - reduction=2, - use_scale=True, - mode='embedded_gaussian', - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py deleted file mode 100644 index 282f7d239eb5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py +++ /dev/null @@ -1,53 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='CascadeEncoderDecoder', - num_stages=2, - pretrained='open-mmlab://msra/hrnetv2_w18', - backbone=dict( - type='HRNet', - norm_cfg=norm_cfg, - norm_eval=False, - extra=dict( - stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), - stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), - stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), - stage4=dict( - num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) - ), - ), - ), - decode_head=[ - dict( - type='FCNHead', - in_channels=[18, 36, 72, 144], - channels=sum([18, 36, 72, 144]), - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - kernel_size=1, - num_convs=1, - concat_input=False, - dropout_ratio=-1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - dict( - type='OCRHead', - in_channels=[18, 36, 72, 144], - in_index=(0, 1, 2, 3), - input_transform='resize_concat', - channels=512, - ocr_channels=256, - dropout_ratio=-1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - ], - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py deleted file mode 100644 index a5dcc09b6750..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py +++ /dev/null @@ -1,49 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='CascadeEncoderDecoder', - num_stages=2, - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=[ - dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - dict( - type='OCRHead', - in_channels=2048, - in_index=3, - channels=512, - ocr_channels=256, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - ], - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py deleted file mode 100644 index 88ec38a37a5c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py +++ /dev/null @@ -1,49 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='CascadeEncoderDecoder', - num_stages=2, - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 1, 1), - strides=(1, 2, 2, 2), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), - decode_head=[ - dict( - type='FPNHead', - in_channels=[256, 256, 256, 256], - in_index=[0, 1, 2, 3], - feature_strides=[4, 8, 16, 32], - channels=128, - dropout_ratio=-1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - dict( - type='PointHead', - in_channels=[256], - in_index=[0], - channels=256, - num_fcs=3, - coarse_pred_each_layer=True, - dropout_ratio=-1, - num_classes=19, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - ], - # model training and testing settings - train_cfg=dict(num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), - test_cfg=dict(mode='whole', subdivision_steps=2, subdivision_num_points=8196, scale_factor=2), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py deleted file mode 100644 index 07aba72c3f7d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py +++ /dev/null @@ -1,51 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='PSAHead', - in_channels=2048, - in_index=3, - channels=512, - mask_size=(97, 97), - psa_type='bi-direction', - compact=False, - shrink_factor=2, - normalization_factor=1.0, - psa_softmax=True, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py deleted file mode 100644 index e6c2a5534fc0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='PSPHead', - in_channels=2048, - in_index=3, - channels=512, - pool_scales=(1, 2, 3, 6), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py deleted file mode 100644 index 7010b76bc4e0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py +++ /dev/null @@ -1,52 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained=None, - backbone=dict( - type='UNet', - in_channels=3, - base_channels=64, - num_stages=5, - strides=(1, 1, 1, 1, 1), - enc_num_convs=(2, 2, 2, 2, 2), - dec_num_convs=(2, 2, 2, 2), - downsamples=(True, True, True, True), - enc_dilations=(1, 1, 1, 1, 1), - dec_dilations=(1, 1, 1, 1), - with_cp=False, - conv_cfg=None, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - norm_eval=False, - ), - decode_head=dict( - type='PSPHead', - in_channels=64, - in_index=4, - channels=16, - pool_scales=(1, 2, 3, 6), - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=128, - in_index=3, - channels=64, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=2, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='slide', crop_size=256, stride=170), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py deleted file mode 100644 index bef6484ab3ae..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py +++ /dev/null @@ -1,46 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='open-mmlab://resnet50_v1c', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 1, 1), - strides=(1, 2, 2, 2), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True, - ), - decode_head=dict( - type='UPerHead', - in_channels=[256, 512, 1024, 2048], - in_index=[0, 1, 2, 3], - pool_scales=(1, 2, 3, 6), - channels=512, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py deleted file mode 100644 index df70f56cf9a0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py +++ /dev/null @@ -1,45 +0,0 @@ -# model settings -norm_cfg = dict(type='BN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained=None, - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - mlp_ratio=4.0, - qkv_bias=True, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.1, - ), - decode_head=dict( - type='UPerHead', - in_channels=[64, 128, 320, 512], - in_index=[0, 1, 2, 3], - pool_scales=(1, 2, 3, 6), - channels=512, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ), - auxiliary_head=dict( - type='FCNHead', - in_channels=320, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), - ), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), -) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py deleted file mode 100644 index 52603890b10f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py +++ /dev/null @@ -1,9 +0,0 @@ -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() -# learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=160000) -checkpoint_config = dict(by_epoch=False, interval=16000) -evaluation = dict(interval=16000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py deleted file mode 100644 index bf780a1b6f65..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py +++ /dev/null @@ -1,9 +0,0 @@ -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() -# learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=20000) -checkpoint_config = dict(by_epoch=False, interval=2000) -evaluation = dict(interval=2000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py deleted file mode 100644 index cdbf841abcb2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py +++ /dev/null @@ -1,9 +0,0 @@ -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() -# learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=40000) -checkpoint_config = dict(by_epoch=False, interval=4000) -evaluation = dict(interval=4000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py deleted file mode 100644 index c190cee6bdc7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py +++ /dev/null @@ -1,9 +0,0 @@ -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() -# learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=80000) -checkpoint_config = dict(by_epoch=False, interval=8000) -evaluation = dict(interval=8000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py deleted file mode 100644 index 3d17fe03c602..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py +++ /dev/null @@ -1,48 +0,0 @@ -_base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', - '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py', -] -model = dict( - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - drop_path_rate=0.25, - windows=False, - hybrid=False, - ), - decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), - auxiliary_head=dict(in_channels=320, num_classes=150), -) - -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.0), - 'relative_position_bias_table': dict(decay_mult=0.0), - 'norm': dict(decay_mult=0.0), - } - ), -) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False, -) - -data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh deleted file mode 100644 index 9fb22edfa7a3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -work_path=$(dirname $0) -PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ -python -m torch.distributed.launch --nproc_per_node=8 \ - tools/train.py ${work_path}/config.py \ - --launcher pytorch \ - --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ - --work-dir ${work_path}/ckpt \ - 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh deleted file mode 100644 index d9a85e7a0d3b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -work_path=$(dirname $0) -PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ -python -m torch.distributed.launch --nproc_per_node=8 \ - tools/test.py ${work_path}/test_config_h32.py \ - ${work_path}/ckpt/latest.pth \ - --launcher pytorch \ - --eval mIoU \ - 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py deleted file mode 100644 index 3d17fe03c602..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py +++ /dev/null @@ -1,48 +0,0 @@ -_base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', - '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py', -] -model = dict( - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - drop_path_rate=0.25, - windows=False, - hybrid=False, - ), - decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), - auxiliary_head=dict(in_channels=320, num_classes=150), -) - -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.0), - 'relative_position_bias_table': dict(decay_mult=0.0), - 'norm': dict(decay_mult=0.0), - } - ), -) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False, -) - -data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py deleted file mode 100644 index 4a5923cb210c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py +++ /dev/null @@ -1,49 +0,0 @@ -_base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', - '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py', -] -model = dict( - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - drop_path_rate=0.25, - windows=False, - hybrid=True, - window_size=32, - ), - decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), - auxiliary_head=dict(in_channels=320, num_classes=150), -) - -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.0), - 'relative_position_bias_table': dict(decay_mult=0.0), - 'norm': dict(decay_mult=0.0), - } - ), -) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False, -) - -data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py deleted file mode 100644 index 4fde8ab1ebe4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py +++ /dev/null @@ -1,49 +0,0 @@ -_base_ = [ - '../../configs/_base_/models/upernet_uniformer.py', - '../../configs/_base_/datasets/ade20k.py', - '../../configs/_base_/default_runtime.py', - '../../configs/_base_/schedules/schedule_160k.py', -] -model = dict( - backbone=dict( - type='UniFormer', - embed_dim=[64, 128, 320, 512], - layers=[3, 4, 8, 3], - head_dim=64, - drop_path_rate=0.25, - windows=True, - hybrid=False, - window_size=32, - ), - decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), - auxiliary_head=dict(in_channels=320, num_classes=150), -) - -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.0), - 'relative_position_bias_table': dict(decay_mult=0.0), - 'norm': dict(decay_mult=0.0), - } - ), -) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False, -) - -data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py deleted file mode 100644 index 210a29891383..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -# flake8: noqa -from .arraymisc import * -from .fileio import * -from .image import * -from .utils import * -from .version import * -from .video import * -from .visualization import * - -# The following modules are not imported to this level, so mmcv may be used -# without PyTorch. -# - runner -# - parallel -# - op diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py deleted file mode 100644 index 4b4700d6139a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .quantization import dequantize, quantize - -__all__ = ['quantize', 'dequantize'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py deleted file mode 100644 index 87ba022c1ced..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numpy as np - - -def quantize(arr, min_val, max_val, levels, dtype=np.int64): - """Quantize an array of (-inf, inf) to [0, levels-1]. - - Args: - arr (ndarray): Input array. - min_val (scalar): Minimum value to be clipped. - max_val (scalar): Maximum value to be clipped. - levels (int): Quantization levels. - dtype (np.type): The type of the quantized array. - - Returns: - tuple: Quantized array. - """ - if not (isinstance(levels, int) and levels > 1): - raise ValueError(f'levels must be a positive integer, but got {levels}') - if min_val >= max_val: - raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') - - arr = np.clip(arr, min_val, max_val) - min_val - quantized_arr = np.minimum(np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) - - return quantized_arr - - -def dequantize(arr, min_val, max_val, levels, dtype=np.float64): - """Dequantize an array. - - Args: - arr (ndarray): Input array. - min_val (scalar): Minimum value to be clipped. - max_val (scalar): Maximum value to be clipped. - levels (int): Quantization levels. - dtype (np.type): The type of the dequantized array. - - Returns: - tuple: Dequantized array. - """ - if not (isinstance(levels, int) and levels > 1): - raise ValueError(f'levels must be a positive integer, but got {levels}') - if min_val >= max_val: - raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') - - dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - min_val) / levels + min_val - - return dequantized_arr diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py deleted file mode 100644 index f87bac5fafca..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .alexnet import AlexNet - -# yapf: disable -from .bricks import ( - ACTIVATION_LAYERS, - CONV_LAYERS, - NORM_LAYERS, - PADDING_LAYERS, - PLUGIN_LAYERS, - UPSAMPLE_LAYERS, - ContextBlock, - Conv2d, - Conv3d, - ConvAWS2d, - ConvModule, - ConvTranspose2d, - ConvTranspose3d, - ConvWS2d, - DepthwiseSeparableConvModule, - GeneralizedAttention, - HSigmoid, - HSwish, - Linear, - MaxPool2d, - MaxPool3d, - NonLocal1d, - NonLocal2d, - NonLocal3d, - Scale, - Swish, - build_activation_layer, - build_conv_layer, - build_norm_layer, - build_padding_layer, - build_plugin_layer, - build_upsample_layer, - conv_ws_2d, - is_norm, -) -from .builder import MODELS, build_model_from_cfg - -# yapf: enable -from .resnet import ResNet, make_res_layer -from .utils import ( - INITIALIZERS, - Caffe2XavierInit, - ConstantInit, - KaimingInit, - NormalInit, - PretrainedInit, - TruncNormalInit, - UniformInit, - XavierInit, - bias_init_with_prob, - caffe2_xavier_init, - constant_init, - fuse_conv_bn, - get_model_complexity_info, - initialize, - kaiming_init, - normal_init, - trunc_normal_init, - uniform_init, - xavier_init, -) -from .vgg import VGG, make_vgg_layer - -__all__ = [ - 'AlexNet', - 'VGG', - 'make_vgg_layer', - 'ResNet', - 'make_res_layer', - 'constant_init', - 'xavier_init', - 'normal_init', - 'trunc_normal_init', - 'uniform_init', - 'kaiming_init', - 'caffe2_xavier_init', - 'bias_init_with_prob', - 'ConvModule', - 'build_activation_layer', - 'build_conv_layer', - 'build_norm_layer', - 'build_padding_layer', - 'build_upsample_layer', - 'build_plugin_layer', - 'is_norm', - 'NonLocal1d', - 'NonLocal2d', - 'NonLocal3d', - 'ContextBlock', - 'HSigmoid', - 'Swish', - 'HSwish', - 'GeneralizedAttention', - 'ACTIVATION_LAYERS', - 'CONV_LAYERS', - 'NORM_LAYERS', - 'PADDING_LAYERS', - 'UPSAMPLE_LAYERS', - 'PLUGIN_LAYERS', - 'Scale', - 'get_model_complexity_info', - 'conv_ws_2d', - 'ConvAWS2d', - 'ConvWS2d', - 'fuse_conv_bn', - 'DepthwiseSeparableConvModule', - 'Linear', - 'Conv2d', - 'ConvTranspose2d', - 'MaxPool2d', - 'ConvTranspose3d', - 'MaxPool3d', - 'Conv3d', - 'initialize', - 'INITIALIZERS', - 'ConstantInit', - 'XavierInit', - 'NormalInit', - 'TruncNormalInit', - 'UniformInit', - 'KaimingInit', - 'PretrainedInit', - 'Caffe2XavierInit', - 'MODELS', - 'build_model_from_cfg', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py deleted file mode 100644 index e52d852bceaa..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging - -import torch.nn as nn - - -class AlexNet(nn.Module): - """AlexNet backbone. - - Args: - num_classes (int): number of classes for classification. - """ - - def __init__(self, num_classes=-1): - super(AlexNet, self).__init__() - self.num_classes = num_classes - self.features = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(64, 192, kernel_size=5, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(192, 384, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(384, 256, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(256, 256, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - ) - if self.num_classes > 0: - self.classifier = nn.Sequential( - nn.Dropout(), - nn.Linear(256 * 6 * 6, 4096), - nn.ReLU(inplace=True), - nn.Dropout(), - nn.Linear(4096, 4096), - nn.ReLU(inplace=True), - nn.Linear(4096, num_classes), - ) - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = logging.getLogger() - from ..runner import load_checkpoint - - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - # use default initializer - pass - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - - x = self.features(x) - if self.num_classes > 0: - x = x.view(x.size(0), 256 * 6 * 6) - x = self.classifier(x) - - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py deleted file mode 100644 index 4405eb058c4c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .activation import build_activation_layer -from .context_block import ContextBlock -from .conv import build_conv_layer -from .conv2d_adaptive_padding import Conv2dAdaptivePadding -from .conv_module import ConvModule -from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d -from .depthwise_separable_conv_module import DepthwiseSeparableConvModule -from .drop import Dropout, DropPath -from .generalized_attention import GeneralizedAttention -from .hsigmoid import HSigmoid -from .hswish import HSwish -from .non_local import NonLocal1d, NonLocal2d, NonLocal3d -from .norm import build_norm_layer, is_norm -from .padding import build_padding_layer -from .plugin import build_plugin_layer -from .registry import ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS -from .scale import Scale -from .swish import Swish -from .upsample import build_upsample_layer -from .wrappers import Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, Linear, MaxPool2d, MaxPool3d - -__all__ = [ - 'ConvModule', - 'build_activation_layer', - 'build_conv_layer', - 'build_norm_layer', - 'build_padding_layer', - 'build_upsample_layer', - 'build_plugin_layer', - 'is_norm', - 'HSigmoid', - 'HSwish', - 'NonLocal1d', - 'NonLocal2d', - 'NonLocal3d', - 'ContextBlock', - 'GeneralizedAttention', - 'ACTIVATION_LAYERS', - 'CONV_LAYERS', - 'NORM_LAYERS', - 'PADDING_LAYERS', - 'UPSAMPLE_LAYERS', - 'PLUGIN_LAYERS', - 'Scale', - 'ConvAWS2d', - 'ConvWS2d', - 'conv_ws_2d', - 'DepthwiseSeparableConvModule', - 'Swish', - 'Linear', - 'Conv2dAdaptivePadding', - 'Conv2d', - 'ConvTranspose2d', - 'MaxPool2d', - 'ConvTranspose3d', - 'MaxPool3d', - 'Conv3d', - 'Dropout', - 'DropPath', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py deleted file mode 100644 index 74134627bd60..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( - TORCH_VERSION, - build_from_cfg, - digit_version, -) - -from .registry import ACTIVATION_LAYERS - -for module in [nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh]: - ACTIVATION_LAYERS.register_module(module=module) - - -@ACTIVATION_LAYERS.register_module(name='Clip') -@ACTIVATION_LAYERS.register_module() -class Clamp(nn.Module): - """Clamp activation layer. - - This activation function is to clamp the feature map value within - :math:`[min, max]`. More details can be found in ``torch.clamp()``. - - Args: - min (Number | optional): Lower-bound of the range to be clamped to. - Default to -1. - max (Number | optional): Upper-bound of the range to be clamped to. - Default to 1. - """ - - def __init__(self, min=-1.0, max=1.0): - super(Clamp, self).__init__() - self.min = min - self.max = max - - def forward(self, x): - """Forward function. - - Args: - x (torch.Tensor): The input tensor. - - Returns: - torch.Tensor: Clamped tensor. - """ - return torch.clamp(x, min=self.min, max=self.max) - - -class GELU(nn.Module): - r"""Applies the Gaussian Error Linear Units function: - - .. math:: - \text{GELU}(x) = x * \Phi(x) - where :math:`\Phi(x)` is the Cumulative Distribution Function for - Gaussian Distribution. - - Shape: - - Input: :math:`(N, *)` where `*` means, any number of additional - dimensions - - Output: :math:`(N, *)`, same shape as the input - - .. image:: scripts/activation_images/GELU.png - - Examples:: - - >>> m = nn.GELU() - >>> input = torch.randn(2) - >>> output = m(input) - """ - - def forward(self, input): - return F.gelu(input) - - -if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4'): - ACTIVATION_LAYERS.register_module(module=GELU) -else: - ACTIVATION_LAYERS.register_module(module=nn.GELU) - - -def build_activation_layer(cfg): - """Build activation layer. - - Args: - cfg (dict): The activation layer config, which should contain: - - type (str): Layer type. - - layer args: Args needed to instantiate an activation layer. - - Returns: - nn.Module: Created activation layer. - """ - return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py deleted file mode 100644 index fd2a4b80ea18..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import nn - -from ..utils import constant_init, kaiming_init -from .registry import PLUGIN_LAYERS - - -def last_zero_init(m): - if isinstance(m, nn.Sequential): - constant_init(m[-1], val=0) - else: - constant_init(m, val=0) - - -@PLUGIN_LAYERS.register_module() -class ContextBlock(nn.Module): - """ContextBlock module in GCNet. - - See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' - (https://arxiv.org/abs/1904.11492) for details. - - Args: - in_channels (int): Channels of the input feature map. - ratio (float): Ratio of channels of transform bottleneck - pooling_type (str): Pooling method for context modeling. - Options are 'att' and 'avg', stand for attention pooling and - average pooling respectively. Default: 'att'. - fusion_types (Sequence[str]): Fusion method for feature fusion, - Options are 'channels_add', 'channel_mul', stand for channelwise - addition and multiplication respectively. Default: ('channel_add',) - """ - - _abbr_ = 'context_block' - - def __init__(self, in_channels, ratio, pooling_type='att', fusion_types=('channel_add',)): - super(ContextBlock, self).__init__() - assert pooling_type in ['avg', 'att'] - assert isinstance(fusion_types, (list, tuple)) - valid_fusion_types = ['channel_add', 'channel_mul'] - assert all([f in valid_fusion_types for f in fusion_types]) - assert len(fusion_types) > 0, 'at least one fusion should be used' - self.in_channels = in_channels - self.ratio = ratio - self.planes = int(in_channels * ratio) - self.pooling_type = pooling_type - self.fusion_types = fusion_types - if pooling_type == 'att': - self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) - self.softmax = nn.Softmax(dim=2) - else: - self.avg_pool = nn.AdaptiveAvgPool2d(1) - if 'channel_add' in fusion_types: - self.channel_add_conv = nn.Sequential( - nn.Conv2d(self.in_channels, self.planes, kernel_size=1), - nn.LayerNorm([self.planes, 1, 1]), - nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1), - ) - else: - self.channel_add_conv = None - if 'channel_mul' in fusion_types: - self.channel_mul_conv = nn.Sequential( - nn.Conv2d(self.in_channels, self.planes, kernel_size=1), - nn.LayerNorm([self.planes, 1, 1]), - nn.ReLU(inplace=True), # yapf: disable - nn.Conv2d(self.planes, self.in_channels, kernel_size=1), - ) - else: - self.channel_mul_conv = None - self.reset_parameters() - - def reset_parameters(self): - if self.pooling_type == 'att': - kaiming_init(self.conv_mask, mode='fan_in') - self.conv_mask.inited = True - - if self.channel_add_conv is not None: - last_zero_init(self.channel_add_conv) - if self.channel_mul_conv is not None: - last_zero_init(self.channel_mul_conv) - - def spatial_pool(self, x): - batch, channel, height, width = x.size() - if self.pooling_type == 'att': - input_x = x - # [N, C, H * W] - input_x = input_x.view(batch, channel, height * width) - # [N, 1, C, H * W] - input_x = input_x.unsqueeze(1) - # [N, 1, H, W] - context_mask = self.conv_mask(x) - # [N, 1, H * W] - context_mask = context_mask.view(batch, 1, height * width) - # [N, 1, H * W] - context_mask = self.softmax(context_mask) - # [N, 1, H * W, 1] - context_mask = context_mask.unsqueeze(-1) - # [N, 1, C, 1] - context = torch.matmul(input_x, context_mask) - # [N, C, 1, 1] - context = context.view(batch, channel, 1, 1) - else: - # [N, C, 1, 1] - context = self.avg_pool(x) - - return context - - def forward(self, x): - # [N, C, 1, 1] - context = self.spatial_pool(x) - - out = x - if self.channel_mul_conv is not None: - # [N, C, 1, 1] - channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) - out = out * channel_mul_term - if self.channel_add_conv is not None: - # [N, C, 1, 1] - channel_add_term = self.channel_add_conv(context) - out = out + channel_add_term - - return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py deleted file mode 100644 index cf54491997a4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from torch import nn - -from .registry import CONV_LAYERS - -CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) -CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) -CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) -CONV_LAYERS.register_module('Conv', module=nn.Conv2d) - - -def build_conv_layer(cfg, *args, **kwargs): - """Build convolution layer. - - Args: - cfg (None or dict): The conv layer config, which should contain: - - type (str): Layer type. - - layer args: Args needed to instantiate an conv layer. - args (argument list): Arguments passed to the `__init__` - method of the corresponding conv layer. - kwargs (keyword arguments): Keyword arguments passed to the `__init__` - method of the corresponding conv layer. - - Returns: - nn.Module: Created conv layer. - """ - if cfg is None: - cfg_ = dict(type='Conv2d') - else: - if not isinstance(cfg, dict): - raise TypeError('cfg must be a dict') - if 'type' not in cfg: - raise KeyError('the cfg dict must contain the key "type"') - cfg_ = cfg.copy() - - layer_type = cfg_.pop('type') - if layer_type not in CONV_LAYERS: - raise KeyError(f'Unrecognized norm type {layer_type}') - else: - conv_layer = CONV_LAYERS.get(layer_type) - - layer = conv_layer(*args, **kwargs, **cfg_) - - return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py deleted file mode 100644 index 39f9c01dd794..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import math - -from torch import nn -from torch.nn import functional as F - -from .registry import CONV_LAYERS - - -@CONV_LAYERS.register_module() -class Conv2dAdaptivePadding(nn.Conv2d): - """Implementation of 2D convolution in tensorflow with `padding` as "same", - which applies padding to input (if needed) so that input image gets fully - covered by filter and stride you specified. For stride 1, this will ensure - that output image size is same as input. For stride of 2, output dimensions - will be half, for example. - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the convolving kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 - padding (int or tuple, optional): Zero-padding added to both sides of - the input. Default: 0 - dilation (int or tuple, optional): Spacing between kernel elements. - Default: 1 - groups (int, optional): Number of blocked connections from input - channels to output channels. Default: 1 - bias (bool, optional): If ``True``, adds a learnable bias to the - output. Default: ``True`` - """ - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): - super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) - - def forward(self, x): - img_h, img_w = x.size()[-2:] - kernel_h, kernel_w = self.weight.size()[-2:] - stride_h, stride_w = self.stride - output_h = math.ceil(img_h / stride_h) - output_w = math.ceil(img_w / stride_w) - pad_h = max((output_h - 1) * self.stride[0] + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0) - pad_w = max((output_w - 1) * self.stride[1] + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0) - if pad_h > 0 or pad_w > 0: - x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) - return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py deleted file mode 100644 index 3e9f76b3f9e5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings - -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm -from ..utils import constant_init, kaiming_init -from .activation import build_activation_layer -from .conv import build_conv_layer -from .norm import build_norm_layer -from .padding import build_padding_layer -from .registry import PLUGIN_LAYERS - - -@PLUGIN_LAYERS.register_module() -class ConvModule(nn.Module): - """A conv block that bundles conv/norm/activation layers. - - This block simplifies the usage of convolution layers, which are commonly - used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). - It is based upon three build methods: `build_conv_layer()`, - `build_norm_layer()` and `build_activation_layer()`. - - Besides, we add some additional features in this module. - 1. Automatically set `bias` of the conv layer. - 2. Spectral norm is supported. - 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only - supports zero and circular padding, and we add "reflect" padding mode. - - Args: - in_channels (int): Number of channels in the input feature map. - Same as that in ``nn._ConvNd``. - out_channels (int): Number of channels produced by the convolution. - Same as that in ``nn._ConvNd``. - kernel_size (int | tuple[int]): Size of the convolving kernel. - Same as that in ``nn._ConvNd``. - stride (int | tuple[int]): Stride of the convolution. - Same as that in ``nn._ConvNd``. - padding (int | tuple[int]): Zero-padding added to both sides of - the input. Same as that in ``nn._ConvNd``. - dilation (int | tuple[int]): Spacing between kernel elements. - Same as that in ``nn._ConvNd``. - groups (int): Number of blocked connections from input channels to - output channels. Same as that in ``nn._ConvNd``. - bias (bool | str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise - False. Default: "auto". - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. Default: None. - act_cfg (dict): Config dict for activation layer. - Default: dict(type='ReLU'). - inplace (bool): Whether to use inplace mode for activation. - Default: True. - with_spectral_norm (bool): Whether use spectral norm in conv module. - Default: False. - padding_mode (str): If the `padding_mode` has not been supported by - current `Conv2d` in PyTorch, we will use our own padding layer - instead. Currently, we support ['zeros', 'circular'] with official - implementation and ['reflect'] with our own implementation. - Default: 'zeros'. - order (tuple[str]): The order of conv/norm/activation layers. It is a - sequence of "conv", "norm" and "act". Common examples are - ("conv", "norm", "act") and ("act", "conv", "norm"). - Default: ('conv', 'norm', 'act'). - """ - - _abbr_ = 'conv_block' - - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias='auto', - conv_cfg=None, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - inplace=True, - with_spectral_norm=False, - padding_mode='zeros', - order=('conv', 'norm', 'act'), - ): - super(ConvModule, self).__init__() - assert conv_cfg is None or isinstance(conv_cfg, dict) - assert norm_cfg is None or isinstance(norm_cfg, dict) - assert act_cfg is None or isinstance(act_cfg, dict) - official_padding_mode = ['zeros', 'circular'] - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.inplace = inplace - self.with_spectral_norm = with_spectral_norm - self.with_explicit_padding = padding_mode not in official_padding_mode - self.order = order - assert isinstance(self.order, tuple) and len(self.order) == 3 - assert set(order) == set(['conv', 'norm', 'act']) - - self.with_norm = norm_cfg is not None - self.with_activation = act_cfg is not None - # if the conv layer is before a norm layer, bias is unnecessary. - if bias == 'auto': - bias = not self.with_norm - self.with_bias = bias - - if self.with_explicit_padding: - pad_cfg = dict(type=padding_mode) - self.padding_layer = build_padding_layer(pad_cfg, padding) - - # reset padding to 0 for conv module - conv_padding = 0 if self.with_explicit_padding else padding - # build convolution layer - self.conv = build_conv_layer( - conv_cfg, - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=conv_padding, - dilation=dilation, - groups=groups, - bias=bias, - ) - # export the attributes of self.conv to a higher level for convenience - self.in_channels = self.conv.in_channels - self.out_channels = self.conv.out_channels - self.kernel_size = self.conv.kernel_size - self.stride = self.conv.stride - self.padding = padding - self.dilation = self.conv.dilation - self.transposed = self.conv.transposed - self.output_padding = self.conv.output_padding - self.groups = self.conv.groups - - if self.with_spectral_norm: - self.conv = nn.utils.spectral_norm(self.conv) - - # build normalization layers - if self.with_norm: - # norm layer is after conv layer - if order.index('norm') > order.index('conv'): - norm_channels = out_channels - else: - norm_channels = in_channels - self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) - self.add_module(self.norm_name, norm) - if self.with_bias: - if isinstance(norm, (_BatchNorm, _InstanceNorm)): - warnings.warn('Unnecessary conv bias before batch/instance norm') - else: - self.norm_name = None - - # build activation layer - if self.with_activation: - act_cfg_ = act_cfg.copy() - # nn.Tanh has no 'inplace' argument - if act_cfg_['type'] not in ['Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish']: - act_cfg_.setdefault('inplace', inplace) - self.activate = build_activation_layer(act_cfg_) - - # Use msra init by default - self.init_weights() - - @property - def norm(self): - if self.norm_name: - return getattr(self, self.norm_name) - else: - return None - - def init_weights(self): - # 1. It is mainly for customized conv layers with their own - # initialization manners by calling their own ``init_weights()``, - # and we do not want ConvModule to override the initialization. - # 2. For customized conv layers without their own initialization - # manners (that is, they don't have their own ``init_weights()``) - # and PyTorch's conv layers, they will be initialized by - # this method with default ``kaiming_init``. - # Note: For PyTorch's conv layers, they will be overwritten by our - # initialization implementation using default ``kaiming_init``. - if not hasattr(self.conv, 'init_weights'): - if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': - nonlinearity = 'leaky_relu' - a = self.act_cfg.get('negative_slope', 0.01) - else: - nonlinearity = 'relu' - a = 0 - kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) - if self.with_norm: - constant_init(self.norm, 1, bias=0) - - def forward(self, x, activate=True, norm=True): - for layer in self.order: - if layer == 'conv': - if self.with_explicit_padding: - x = self.padding_layer(x) - x = self.conv(x) - elif layer == 'norm' and norm and self.with_norm: - x = self.norm(x) - elif layer == 'act' and activate and self.with_activation: - x = self.activate(x) - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py deleted file mode 100644 index ecd8ed0db777..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .registry import CONV_LAYERS - - -def conv_ws_2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, eps=1e-5): - c_in = weight.size(0) - weight_flat = weight.view(c_in, -1) - mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) - std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) - weight = (weight - mean) / (std + eps) - return F.conv2d(input, weight, bias, stride, padding, dilation, groups) - - -@CONV_LAYERS.register_module('ConvWS') -class ConvWS2d(nn.Conv2d): - def __init__( - self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, eps=1e-5 - ): - super(ConvWS2d, self).__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias, - ) - self.eps = eps - - def forward(self, x): - return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.eps) - - -@CONV_LAYERS.register_module(name='ConvAWS') -class ConvAWS2d(nn.Conv2d): - """AWS (Adaptive Weight Standardization) - - This is a variant of Weight Standardization - (https://arxiv.org/pdf/1903.10520.pdf) - It is used in DetectoRS to avoid NaN - (https://arxiv.org/pdf/2006.02334.pdf) - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the conv kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 - padding (int or tuple, optional): Zero-padding added to both sides of - the input. Default: 0 - dilation (int or tuple, optional): Spacing between kernel elements. - Default: 1 - groups (int, optional): Number of blocked connections from input - channels to output channels. Default: 1 - bias (bool, optional): If set True, adds a learnable bias to the - output. Default: True - """ - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): - super().__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias, - ) - self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1)) - self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) - - def _get_weight(self, weight): - weight_flat = weight.view(weight.size(0), -1) - mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) - std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) - weight = (weight - mean) / std - weight = self.weight_gamma * weight + self.weight_beta - return weight - - def forward(self, x): - weight = self._get_weight(self.weight) - return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - """Override default load function. - - AWS overrides the function _load_from_state_dict to recover - weight_gamma and weight_beta if they are missing. If weight_gamma and - weight_beta are found in the checkpoint, this function will return - after super()._load_from_state_dict. Otherwise, it will compute the - mean and std of the pretrained weights and store them in weight_beta - and weight_gamma. - """ - - self.weight_gamma.data.fill_(-1) - local_missing_keys = [] - super()._load_from_state_dict( - state_dict, prefix, local_metadata, strict, local_missing_keys, unexpected_keys, error_msgs - ) - if self.weight_gamma.data.mean() > 0: - for k in local_missing_keys: - missing_keys.append(k) - return - weight = self.weight.data - weight_flat = weight.view(weight.size(0), -1) - mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) - std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) - self.weight_beta.data.copy_(mean) - self.weight_gamma.data.copy_(std) - missing_gamma_beta = [k for k in local_missing_keys if k.endswith('weight_gamma') or k.endswith('weight_beta')] - for k in missing_gamma_beta: - local_missing_keys.remove(k) - for k in local_missing_keys: - missing_keys.append(k) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py deleted file mode 100644 index 6e4b622aed59..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn - -from .conv_module import ConvModule - - -class DepthwiseSeparableConvModule(nn.Module): - """Depthwise separable convolution module. - - See https://arxiv.org/pdf/1704.04861.pdf for details. - - This module can replace a ConvModule with the conv block replaced by two - conv block: depthwise conv block and pointwise conv block. The depthwise - conv block contains depthwise-conv/norm/activation layers. The pointwise - conv block contains pointwise-conv/norm/activation layers. It should be - noted that there will be norm/activation layer in the depthwise conv block - if `norm_cfg` and `act_cfg` are specified. - - Args: - in_channels (int): Number of channels in the input feature map. - Same as that in ``nn._ConvNd``. - out_channels (int): Number of channels produced by the convolution. - Same as that in ``nn._ConvNd``. - kernel_size (int | tuple[int]): Size of the convolving kernel. - Same as that in ``nn._ConvNd``. - stride (int | tuple[int]): Stride of the convolution. - Same as that in ``nn._ConvNd``. Default: 1. - padding (int | tuple[int]): Zero-padding added to both sides of - the input. Same as that in ``nn._ConvNd``. Default: 0. - dilation (int | tuple[int]): Spacing between kernel elements. - Same as that in ``nn._ConvNd``. Default: 1. - norm_cfg (dict): Default norm config for both depthwise ConvModule and - pointwise ConvModule. Default: None. - act_cfg (dict): Default activation config for both depthwise ConvModule - and pointwise ConvModule. Default: dict(type='ReLU'). - dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is - 'default', it will be the same as `norm_cfg`. Default: 'default'. - dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is - 'default', it will be the same as `act_cfg`. Default: 'default'. - pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is - 'default', it will be the same as `norm_cfg`. Default: 'default'. - pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is - 'default', it will be the same as `act_cfg`. Default: 'default'. - kwargs (optional): Other shared arguments for depthwise and pointwise - ConvModule. See ConvModule for ref. - """ - - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - dw_norm_cfg='default', - dw_act_cfg='default', - pw_norm_cfg='default', - pw_act_cfg='default', - **kwargs - ): - super(DepthwiseSeparableConvModule, self).__init__() - assert 'groups' not in kwargs, 'groups should not be specified' - - # if norm/activation config of depthwise/pointwise ConvModule is not - # specified, use default config. - dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg - dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg - pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg - pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg - - # depthwise convolution - self.depthwise_conv = ConvModule( - in_channels, - in_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=in_channels, - norm_cfg=dw_norm_cfg, - act_cfg=dw_act_cfg, - **kwargs - ) - - self.pointwise_conv = ConvModule( - in_channels, out_channels, 1, norm_cfg=pw_norm_cfg, act_cfg=pw_act_cfg, **kwargs - ) - - def forward(self, x): - x = self.depthwise_conv(x) - x = self.pointwise_conv(x) - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py deleted file mode 100644 index b7f1af30b38d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import build_from_cfg -from .registry import DROPOUT_LAYERS - - -def drop_path(x, drop_prob=0.0, training=False): - """Drop paths (Stochastic Depth) per sample (when applied in main path of - residual blocks). - - We follow the implementation - https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 - """ - if drop_prob == 0.0 or not training: - return x - keep_prob = 1 - drop_prob - # handle tensors with different dimensions, not just 4D tensors. - shape = (x.shape[0],) + (1,) * (x.ndim - 1) - random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) - output = x.div(keep_prob) * random_tensor.floor() - return output - - -@DROPOUT_LAYERS.register_module() -class DropPath(nn.Module): - """Drop paths (Stochastic Depth) per sample (when applied in main path of - residual blocks). - - We follow the implementation - https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 - - Args: - drop_prob (float): Probability of the path to be zeroed. Default: 0.1 - """ - - def __init__(self, drop_prob=0.1): - super(DropPath, self).__init__() - self.drop_prob = drop_prob - - def forward(self, x): - return drop_path(x, self.drop_prob, self.training) - - -@DROPOUT_LAYERS.register_module() -class Dropout(nn.Dropout): - """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of - ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with - ``DropPath`` - - Args: - drop_prob (float): Probability of the elements to be - zeroed. Default: 0.5. - inplace (bool): Do the operation inplace or not. Default: False. - """ - - def __init__(self, drop_prob=0.5, inplace=False): - super().__init__(p=drop_prob, inplace=inplace) - - -def build_dropout(cfg, default_args=None): - """Builder for drop out layers.""" - return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py deleted file mode 100644 index 3886a902c75c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import math - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..utils import kaiming_init -from .registry import PLUGIN_LAYERS - - -@PLUGIN_LAYERS.register_module() -class GeneralizedAttention(nn.Module): - """GeneralizedAttention module. - - See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' - (https://arxiv.org/abs/1711.07971) for details. - - Args: - in_channels (int): Channels of the input feature map. - spatial_range (int): The spatial range. -1 indicates no spatial range - constraint. Default: -1. - num_heads (int): The head number of empirical_attention module. - Default: 9. - position_embedding_dim (int): The position embedding dimension. - Default: -1. - position_magnitude (int): A multiplier acting on coord difference. - Default: 1. - kv_stride (int): The feature stride acting on key/value feature map. - Default: 2. - q_stride (int): The feature stride acting on query feature map. - Default: 1. - attention_type (str): A binary indicator string for indicating which - items in generalized empirical_attention module are used. - Default: '1111'. - - - '1000' indicates 'query and key content' (appr - appr) item, - - '0100' indicates 'query content and relative position' - (appr - position) item, - - '0010' indicates 'key content only' (bias - appr) item, - - '0001' indicates 'relative position only' (bias - position) item. - """ - - _abbr_ = 'gen_attention_block' - - def __init__( - self, - in_channels, - spatial_range=-1, - num_heads=9, - position_embedding_dim=-1, - position_magnitude=1, - kv_stride=2, - q_stride=1, - attention_type='1111', - ): - - super(GeneralizedAttention, self).__init__() - - # hard range means local range for non-local operation - self.position_embedding_dim = position_embedding_dim if position_embedding_dim > 0 else in_channels - - self.position_magnitude = position_magnitude - self.num_heads = num_heads - self.in_channels = in_channels - self.spatial_range = spatial_range - self.kv_stride = kv_stride - self.q_stride = q_stride - self.attention_type = [bool(int(_)) for _ in attention_type] - self.qk_embed_dim = in_channels // num_heads - out_c = self.qk_embed_dim * num_heads - - if self.attention_type[0] or self.attention_type[1]: - self.query_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) - self.query_conv.kaiming_init = True - - if self.attention_type[0] or self.attention_type[2]: - self.key_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) - self.key_conv.kaiming_init = True - - self.v_dim = in_channels // num_heads - self.value_conv = nn.Conv2d( - in_channels=in_channels, out_channels=self.v_dim * num_heads, kernel_size=1, bias=False - ) - self.value_conv.kaiming_init = True - - if self.attention_type[1] or self.attention_type[3]: - self.appr_geom_fc_x = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) - self.appr_geom_fc_x.kaiming_init = True - - self.appr_geom_fc_y = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) - self.appr_geom_fc_y.kaiming_init = True - - if self.attention_type[2]: - stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) - appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv - self.appr_bias = nn.Parameter(appr_bias_value) - - if self.attention_type[3]: - stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) - geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv - self.geom_bias = nn.Parameter(geom_bias_value) - - self.proj_conv = nn.Conv2d( - in_channels=self.v_dim * num_heads, out_channels=in_channels, kernel_size=1, bias=True - ) - self.proj_conv.kaiming_init = True - self.gamma = nn.Parameter(torch.zeros(1)) - - if self.spatial_range >= 0: - # only works when non local is after 3*3 conv - if in_channels == 256: - max_len = 84 - elif in_channels == 512: - max_len = 42 - - max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) - local_constraint_map = np.ones((max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) - for iy in range(max_len): - for ix in range(max_len): - local_constraint_map[ - iy, - ix, - max((iy - self.spatial_range) // self.kv_stride, 0) : min( - (iy + self.spatial_range + 1) // self.kv_stride + 1, max_len - ), - max((ix - self.spatial_range) // self.kv_stride, 0) : min( - (ix + self.spatial_range + 1) // self.kv_stride + 1, max_len - ), - ] = 0 - - self.local_constraint_map = nn.Parameter( - torch.from_numpy(local_constraint_map).byte(), requires_grad=False - ) - - if self.q_stride > 1: - self.q_downsample = nn.AvgPool2d(kernel_size=1, stride=self.q_stride) - else: - self.q_downsample = None - - if self.kv_stride > 1: - self.kv_downsample = nn.AvgPool2d(kernel_size=1, stride=self.kv_stride) - else: - self.kv_downsample = None - - self.init_weights() - - def get_position_embedding(self, h, w, h_kv, w_kv, q_stride, kv_stride, device, dtype, feat_dim, wave_length=1000): - # the default type of Tensor is float32, leading to type mismatch - # in fp16 mode. Cast it to support fp16 mode. - h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) - h_idxs = h_idxs.view((h, 1)) * q_stride - - w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) - w_idxs = w_idxs.view((w, 1)) * q_stride - - h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(device=device, dtype=dtype) - h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride - - w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(device=device, dtype=dtype) - w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride - - # (h, h_kv, 1) - h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) - h_diff *= self.position_magnitude - - # (w, w_kv, 1) - w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) - w_diff *= self.position_magnitude - - feat_range = torch.arange(0, feat_dim / 4).to(device=device, dtype=dtype) - - dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) - dim_mat = dim_mat ** ((4.0 / feat_dim) * feat_range) - dim_mat = dim_mat.view((1, 1, -1)) - - embedding_x = torch.cat(((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) - - embedding_y = torch.cat(((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) - - return embedding_x, embedding_y - - def forward(self, x_input): - num_heads = self.num_heads - - # use empirical_attention - if self.q_downsample is not None: - x_q = self.q_downsample(x_input) - else: - x_q = x_input - n, _, h, w = x_q.shape - - if self.kv_downsample is not None: - x_kv = self.kv_downsample(x_input) - else: - x_kv = x_input - _, _, h_kv, w_kv = x_kv.shape - - if self.attention_type[0] or self.attention_type[1]: - proj_query = self.query_conv(x_q).view((n, num_heads, self.qk_embed_dim, h * w)) - proj_query = proj_query.permute(0, 1, 3, 2) - - if self.attention_type[0] or self.attention_type[2]: - proj_key = self.key_conv(x_kv).view((n, num_heads, self.qk_embed_dim, h_kv * w_kv)) - - if self.attention_type[1] or self.attention_type[3]: - position_embed_x, position_embed_y = self.get_position_embedding( - h, - w, - h_kv, - w_kv, - self.q_stride, - self.kv_stride, - x_input.device, - x_input.dtype, - self.position_embedding_dim, - ) - # (n, num_heads, w, w_kv, dim) - position_feat_x = ( - self.appr_geom_fc_x(position_embed_x) - .view(1, w, w_kv, num_heads, self.qk_embed_dim) - .permute(0, 3, 1, 2, 4) - .repeat(n, 1, 1, 1, 1) - ) - - # (n, num_heads, h, h_kv, dim) - position_feat_y = ( - self.appr_geom_fc_y(position_embed_y) - .view(1, h, h_kv, num_heads, self.qk_embed_dim) - .permute(0, 3, 1, 2, 4) - .repeat(n, 1, 1, 1, 1) - ) - - position_feat_x /= math.sqrt(2) - position_feat_y /= math.sqrt(2) - - # accelerate for saliency only - if (np.sum(self.attention_type) == 1) and self.attention_type[2]: - appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) - - energy = torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, h_kv * w_kv) - - h = 1 - w = 1 - else: - # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for - if not self.attention_type[0]: - energy = torch.zeros(n, num_heads, h, w, h_kv, w_kv, dtype=x_input.dtype, device=x_input.device) - - # attention_type[0]: appr - appr - # attention_type[1]: appr - position - # attention_type[2]: bias - appr - # attention_type[3]: bias - position - if self.attention_type[0] or self.attention_type[2]: - if self.attention_type[0] and self.attention_type[2]: - appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim) - energy = torch.matmul(proj_query + appr_bias, proj_key).view(n, num_heads, h, w, h_kv, w_kv) - - elif self.attention_type[0]: - energy = torch.matmul(proj_query, proj_key).view(n, num_heads, h, w, h_kv, w_kv) - - elif self.attention_type[2]: - appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) - - energy += torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, 1, h_kv, w_kv) - - if self.attention_type[1] or self.attention_type[3]: - if self.attention_type[1] and self.attention_type[3]: - geom_bias = self.geom_bias.view(1, num_heads, 1, self.qk_embed_dim) - - proj_query_reshape = (proj_query + geom_bias).view(n, num_heads, h, w, self.qk_embed_dim) - - energy_x = torch.matmul( - proj_query_reshape.permute(0, 1, 3, 2, 4), position_feat_x.permute(0, 1, 2, 4, 3) - ) - energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) - - energy_y = torch.matmul(proj_query_reshape, position_feat_y.permute(0, 1, 2, 4, 3)) - energy_y = energy_y.unsqueeze(5) - - energy += energy_x + energy_y - - elif self.attention_type[1]: - proj_query_reshape = proj_query.view(n, num_heads, h, w, self.qk_embed_dim) - proj_query_reshape = proj_query_reshape.permute(0, 1, 3, 2, 4) - position_feat_x_reshape = position_feat_x.permute(0, 1, 2, 4, 3) - position_feat_y_reshape = position_feat_y.permute(0, 1, 2, 4, 3) - - energy_x = torch.matmul(proj_query_reshape, position_feat_x_reshape) - energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) - - energy_y = torch.matmul(proj_query_reshape, position_feat_y_reshape) - energy_y = energy_y.unsqueeze(5) - - energy += energy_x + energy_y - - elif self.attention_type[3]: - geom_bias = self.geom_bias.view(1, num_heads, self.qk_embed_dim, 1).repeat(n, 1, 1, 1) - - position_feat_x_reshape = position_feat_x.view(n, num_heads, w * w_kv, self.qk_embed_dim) - - position_feat_y_reshape = position_feat_y.view(n, num_heads, h * h_kv, self.qk_embed_dim) - - energy_x = torch.matmul(position_feat_x_reshape, geom_bias) - energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) - - energy_y = torch.matmul(position_feat_y_reshape, geom_bias) - energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) - - energy += energy_x + energy_y - - energy = energy.view(n, num_heads, h * w, h_kv * w_kv) - - if self.spatial_range >= 0: - cur_local_constraint_map = ( - self.local_constraint_map[:h, :w, :h_kv, :w_kv].contiguous().view(1, 1, h * w, h_kv * w_kv) - ) - - energy = energy.masked_fill_(cur_local_constraint_map, float('-inf')) - - attention = F.softmax(energy, 3) - - proj_value = self.value_conv(x_kv) - proj_value_reshape = proj_value.view((n, num_heads, self.v_dim, h_kv * w_kv)).permute(0, 1, 3, 2) - - out = ( - torch.matmul(attention, proj_value_reshape) - .permute(0, 1, 3, 2) - .contiguous() - .view(n, self.v_dim * self.num_heads, h, w) - ) - - out = self.proj_conv(out) - - # output is downsampled, upsample back to input size - if self.q_downsample is not None: - out = F.interpolate(out, size=x_input.shape[2:], mode='bilinear', align_corners=False) - - out = self.gamma * out + x_input - return out - - def init_weights(self): - for m in self.modules(): - if hasattr(m, 'kaiming_init') and m.kaiming_init: - kaiming_init(m, mode='fan_in', nonlinearity='leaky_relu', bias=0, distribution='uniform', a=1) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py deleted file mode 100644 index 30b1a3d6580c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn - -from .registry import ACTIVATION_LAYERS - - -@ACTIVATION_LAYERS.register_module() -class HSigmoid(nn.Module): - """Hard Sigmoid Module. Apply the hard sigmoid function: - Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) - Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) - - Args: - bias (float): Bias of the input feature map. Default: 1.0. - divisor (float): Divisor of the input feature map. Default: 2.0. - min_value (float): Lower bound value. Default: 0.0. - max_value (float): Upper bound value. Default: 1.0. - - Returns: - Tensor: The output tensor. - """ - - def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): - super(HSigmoid, self).__init__() - self.bias = bias - self.divisor = divisor - assert self.divisor != 0 - self.min_value = min_value - self.max_value = max_value - - def forward(self, x): - x = (x + self.bias) / self.divisor - - return x.clamp_(self.min_value, self.max_value) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py deleted file mode 100644 index 7e0c090ff037..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn - -from .registry import ACTIVATION_LAYERS - - -@ACTIVATION_LAYERS.register_module() -class HSwish(nn.Module): - """Hard Swish Module. - - This module applies the hard swish function: - - .. math:: - Hswish(x) = x * ReLU6(x + 3) / 6 - - Args: - inplace (bool): can optionally do the operation in-place. - Default: False. - - Returns: - Tensor: The output tensor. - """ - - def __init__(self, inplace=False): - super(HSwish, self).__init__() - self.act = nn.ReLU6(inplace) - - def forward(self, x): - return x * self.act(x + 3) / 6 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py deleted file mode 100644 index 34a3602e2a84..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from abc import ABCMeta - -import torch -import torch.nn as nn - -from ..utils import constant_init, normal_init -from .conv_module import ConvModule -from .registry import PLUGIN_LAYERS - - -class _NonLocalNd(nn.Module, metaclass=ABCMeta): - """Basic Non-local module. - - This module is proposed in - "Non-local Neural Networks" - Paper reference: https://arxiv.org/abs/1711.07971 - Code reference: https://github.com/AlexHex7/Non-local_pytorch - - Args: - in_channels (int): Channels of the input feature map. - reduction (int): Channel reduction ratio. Default: 2. - use_scale (bool): Whether to scale pairwise_weight by - `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. - Default: True. - conv_cfg (None | dict): The config dict for convolution layers. - If not specified, it will use `nn.Conv2d` for convolution layers. - Default: None. - norm_cfg (None | dict): The config dict for normalization layers. - Default: None. (This parameter is only applicable to conv_out.) - mode (str): Options are `gaussian`, `concatenation`, - `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. - """ - - def __init__( - self, - in_channels, - reduction=2, - use_scale=True, - conv_cfg=None, - norm_cfg=None, - mode='embedded_gaussian', - **kwargs, - ): - super(_NonLocalNd, self).__init__() - self.in_channels = in_channels - self.reduction = reduction - self.use_scale = use_scale - self.inter_channels = max(in_channels // reduction, 1) - self.mode = mode - - if mode not in ['gaussian', 'embedded_gaussian', 'dot_product', 'concatenation']: - raise ValueError( - "Mode should be in 'gaussian', 'concatenation', " - f"'embedded_gaussian' or 'dot_product', but got " - f'{mode} instead.' - ) - - # g, theta, phi are defaulted as `nn.ConvNd`. - # Here we use ConvModule for potential usage. - self.g = ConvModule(self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) - self.conv_out = ConvModule( - self.inter_channels, self.in_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None - ) - - if self.mode != 'gaussian': - self.theta = ConvModule( - self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None - ) - self.phi = ConvModule( - self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None - ) - - if self.mode == 'concatenation': - self.concat_project = ConvModule( - self.inter_channels * 2, 1, kernel_size=1, stride=1, padding=0, bias=False, act_cfg=dict(type='ReLU') - ) - - self.init_weights(**kwargs) - - def init_weights(self, std=0.01, zeros_init=True): - if self.mode != 'gaussian': - for m in [self.g, self.theta, self.phi]: - normal_init(m.conv, std=std) - else: - normal_init(self.g.conv, std=std) - if zeros_init: - if self.conv_out.norm_cfg is None: - constant_init(self.conv_out.conv, 0) - else: - constant_init(self.conv_out.norm, 0) - else: - if self.conv_out.norm_cfg is None: - normal_init(self.conv_out.conv, std=std) - else: - normal_init(self.conv_out.norm, std=std) - - def gaussian(self, theta_x, phi_x): - # NonLocal1d pairwise_weight: [N, H, H] - # NonLocal2d pairwise_weight: [N, HxW, HxW] - # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - pairwise_weight = pairwise_weight.softmax(dim=-1) - return pairwise_weight - - def embedded_gaussian(self, theta_x, phi_x): - # NonLocal1d pairwise_weight: [N, H, H] - # NonLocal2d pairwise_weight: [N, HxW, HxW] - # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - if self.use_scale: - # theta_x.shape[-1] is `self.inter_channels` - pairwise_weight /= theta_x.shape[-1] ** 0.5 - pairwise_weight = pairwise_weight.softmax(dim=-1) - return pairwise_weight - - def dot_product(self, theta_x, phi_x): - # NonLocal1d pairwise_weight: [N, H, H] - # NonLocal2d pairwise_weight: [N, HxW, HxW] - # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - pairwise_weight /= pairwise_weight.shape[-1] - return pairwise_weight - - def concatenation(self, theta_x, phi_x): - # NonLocal1d pairwise_weight: [N, H, H] - # NonLocal2d pairwise_weight: [N, HxW, HxW] - # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] - h = theta_x.size(2) - w = phi_x.size(3) - theta_x = theta_x.repeat(1, 1, 1, w) - phi_x = phi_x.repeat(1, 1, h, 1) - - concat_feature = torch.cat([theta_x, phi_x], dim=1) - pairwise_weight = self.concat_project(concat_feature) - n, _, h, w = pairwise_weight.size() - pairwise_weight = pairwise_weight.view(n, h, w) - pairwise_weight /= pairwise_weight.shape[-1] - - return pairwise_weight - - def forward(self, x): - # Assume `reduction = 1`, then `inter_channels = C` - # or `inter_channels = C` when `mode="gaussian"` - - # NonLocal1d x: [N, C, H] - # NonLocal2d x: [N, C, H, W] - # NonLocal3d x: [N, C, T, H, W] - n = x.size(0) - - # NonLocal1d g_x: [N, H, C] - # NonLocal2d g_x: [N, HxW, C] - # NonLocal3d g_x: [N, TxHxW, C] - g_x = self.g(x).view(n, self.inter_channels, -1) - g_x = g_x.permute(0, 2, 1) - - # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] - # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] - # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] - if self.mode == 'gaussian': - theta_x = x.view(n, self.in_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - if self.sub_sample: - phi_x = self.phi(x).view(n, self.in_channels, -1) - else: - phi_x = x.view(n, self.in_channels, -1) - elif self.mode == 'concatenation': - theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) - phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) - else: - theta_x = self.theta(x).view(n, self.inter_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - phi_x = self.phi(x).view(n, self.inter_channels, -1) - - pairwise_func = getattr(self, self.mode) - # NonLocal1d pairwise_weight: [N, H, H] - # NonLocal2d pairwise_weight: [N, HxW, HxW] - # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] - pairwise_weight = pairwise_func(theta_x, phi_x) - - # NonLocal1d y: [N, H, C] - # NonLocal2d y: [N, HxW, C] - # NonLocal3d y: [N, TxHxW, C] - y = torch.matmul(pairwise_weight, g_x) - # NonLocal1d y: [N, C, H] - # NonLocal2d y: [N, C, H, W] - # NonLocal3d y: [N, C, T, H, W] - y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) - - output = x + self.conv_out(y) - - return output - - -class NonLocal1d(_NonLocalNd): - """1D Non-local module. - - Args: - in_channels (int): Same as `NonLocalND`. - sub_sample (bool): Whether to apply max pooling after pairwise - function (Note that the `sub_sample` is applied on spatial only). - Default: False. - conv_cfg (None | dict): Same as `NonLocalND`. - Default: dict(type='Conv1d'). - """ - - def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv1d'), **kwargs): - super(NonLocal1d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) - - self.sub_sample = sub_sample - - if sub_sample: - max_pool_layer = nn.MaxPool1d(kernel_size=2) - self.g = nn.Sequential(self.g, max_pool_layer) - if self.mode != 'gaussian': - self.phi = nn.Sequential(self.phi, max_pool_layer) - else: - self.phi = max_pool_layer - - -@PLUGIN_LAYERS.register_module() -class NonLocal2d(_NonLocalNd): - """2D Non-local module. - - Args: - in_channels (int): Same as `NonLocalND`. - sub_sample (bool): Whether to apply max pooling after pairwise - function (Note that the `sub_sample` is applied on spatial only). - Default: False. - conv_cfg (None | dict): Same as `NonLocalND`. - Default: dict(type='Conv2d'). - """ - - _abbr_ = 'nonlocal_block' - - def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv2d'), **kwargs): - super(NonLocal2d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) - - self.sub_sample = sub_sample - - if sub_sample: - max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) - self.g = nn.Sequential(self.g, max_pool_layer) - if self.mode != 'gaussian': - self.phi = nn.Sequential(self.phi, max_pool_layer) - else: - self.phi = max_pool_layer - - -class NonLocal3d(_NonLocalNd): - """3D Non-local module. - - Args: - in_channels (int): Same as `NonLocalND`. - sub_sample (bool): Whether to apply max pooling after pairwise - function (Note that the `sub_sample` is applied on spatial only). - Default: False. - conv_cfg (None | dict): Same as `NonLocalND`. - Default: dict(type='Conv3d'). - """ - - def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv3d'), **kwargs): - super(NonLocal3d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) - self.sub_sample = sub_sample - - if sub_sample: - max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) - self.g = nn.Sequential(self.g, max_pool_layer) - if self.mode != 'gaussian': - self.phi = nn.Sequential(self.phi, max_pool_layer) - else: - self.phi = max_pool_layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py deleted file mode 100644 index e3f5eaa8af18..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import inspect - -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( - SyncBatchNorm, - _BatchNorm, - _InstanceNorm, -) - -from .registry import NORM_LAYERS - -NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) -NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) -NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) -NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) -NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) -NORM_LAYERS.register_module('GN', module=nn.GroupNorm) -NORM_LAYERS.register_module('LN', module=nn.LayerNorm) -NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) -NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) -NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) -NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) - - -def infer_abbr(class_type): - """Infer abbreviation from the class name. - - When we build a norm layer with `build_norm_layer()`, we want to preserve - the norm type in variable names, e.g, self.bn1, self.gn. This method will - infer the abbreviation to map class types to abbreviations. - - Rule 1: If the class has the property "_abbr_", return the property. - Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or - InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and - "in" respectively. - Rule 3: If the class name contains "batch", "group", "layer" or "instance", - the abbreviation of this layer will be "bn", "gn", "ln" and "in" - respectively. - Rule 4: Otherwise, the abbreviation falls back to "norm". - - Args: - class_type (type): The norm layer type. - - Returns: - str: The inferred abbreviation. - """ - if not inspect.isclass(class_type): - raise TypeError(f'class_type must be a type, but got {type(class_type)}') - if hasattr(class_type, '_abbr_'): - return class_type._abbr_ - if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN - return 'in' - elif issubclass(class_type, _BatchNorm): - return 'bn' - elif issubclass(class_type, nn.GroupNorm): - return 'gn' - elif issubclass(class_type, nn.LayerNorm): - return 'ln' - else: - class_name = class_type.__name__.lower() - if 'batch' in class_name: - return 'bn' - elif 'group' in class_name: - return 'gn' - elif 'layer' in class_name: - return 'ln' - elif 'instance' in class_name: - return 'in' - else: - return 'norm_layer' - - -def build_norm_layer(cfg, num_features, postfix=''): - """Build normalization layer. - - Args: - cfg (dict): The norm layer config, which should contain: - - - type (str): Layer type. - - layer args: Args needed to instantiate a norm layer. - - requires_grad (bool, optional): Whether stop gradient updates. - num_features (int): Number of input channels. - postfix (int | str): The postfix to be appended into norm abbreviation - to create named layer. - - Returns: - (str, nn.Module): The first element is the layer name consisting of - abbreviation and postfix, e.g., bn1, gn. The second element is the - created norm layer. - """ - if not isinstance(cfg, dict): - raise TypeError('cfg must be a dict') - if 'type' not in cfg: - raise KeyError('the cfg dict must contain the key "type"') - cfg_ = cfg.copy() - - layer_type = cfg_.pop('type') - if layer_type not in NORM_LAYERS: - raise KeyError(f'Unrecognized norm type {layer_type}') - - norm_layer = NORM_LAYERS.get(layer_type) - abbr = infer_abbr(norm_layer) - - assert isinstance(postfix, (int, str)) - name = abbr + str(postfix) - - requires_grad = cfg_.pop('requires_grad', True) - cfg_.setdefault('eps', 1e-5) - if layer_type != 'GN': - layer = norm_layer(num_features, **cfg_) - if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): - layer._specify_ddp_gpu_num(1) - else: - assert 'num_groups' in cfg_ - layer = norm_layer(num_channels=num_features, **cfg_) - - for param in layer.parameters(): - param.requires_grad = requires_grad - - return name, layer - - -def is_norm(layer, exclude=None): - """Check if a layer is a normalization layer. - - Args: - layer (nn.Module): The layer to be checked. - exclude (type | tuple[type]): Types to be excluded. - - Returns: - bool: Whether the layer is a norm layer. - """ - if exclude is not None: - if not isinstance(exclude, tuple): - exclude = (exclude,) - if not is_tuple_of(exclude, type): - raise TypeError( - f'"exclude" must be either None or type or a tuple of types, ' f'but got {type(exclude)}: {exclude}' - ) - - if exclude and isinstance(layer, exclude): - return False - - all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) - return isinstance(layer, all_norm_bases) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py deleted file mode 100644 index e4ac6b28a178..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn - -from .registry import PADDING_LAYERS - -PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) -PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) -PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) - - -def build_padding_layer(cfg, *args, **kwargs): - """Build padding layer. - - Args: - cfg (None or dict): The padding layer config, which should contain: - - type (str): Layer type. - - layer args: Args needed to instantiate a padding layer. - - Returns: - nn.Module: Created padding layer. - """ - if not isinstance(cfg, dict): - raise TypeError('cfg must be a dict') - if 'type' not in cfg: - raise KeyError('the cfg dict must contain the key "type"') - - cfg_ = cfg.copy() - padding_type = cfg_.pop('type') - if padding_type not in PADDING_LAYERS: - raise KeyError(f'Unrecognized padding type {padding_type}.') - else: - padding_layer = PADDING_LAYERS.get(padding_type) - - layer = padding_layer(*args, **kwargs, **cfg_) - - return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py deleted file mode 100644 index d1e6d6fb326a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py +++ /dev/null @@ -1,87 +0,0 @@ -import inspect -import platform - -from .registry import PLUGIN_LAYERS - -if platform.system() == 'Windows': - import regex as re -else: - import re - - -def infer_abbr(class_type): - """Infer abbreviation from the class name. - - This method will infer the abbreviation to map class types to - abbreviations. - - Rule 1: If the class has the property "abbr", return the property. - Rule 2: Otherwise, the abbreviation falls back to snake case of class - name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. - - Args: - class_type (type): The norm layer type. - - Returns: - str: The inferred abbreviation. - """ - - def camel2snack(word): - """Convert camel case word into snack case. - - Modified from `inflection lib - `_. - - Example:: - - >>> camel2snack("FancyBlock") - 'fancy_block' - """ - - word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) - word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) - word = word.replace('-', '_') - return word.lower() - - if not inspect.isclass(class_type): - raise TypeError(f'class_type must be a type, but got {type(class_type)}') - if hasattr(class_type, '_abbr_'): - return class_type._abbr_ - else: - return camel2snack(class_type.__name__) - - -def build_plugin_layer(cfg, postfix='', **kwargs): - """Build plugin layer. - - Args: - cfg (None or dict): cfg should contain: - type (str): identify plugin layer type. - layer args: args needed to instantiate a plugin layer. - postfix (int, str): appended into norm abbreviation to - create named layer. Default: ''. - - Returns: - tuple[str, nn.Module]: - name (str): abbreviation + postfix - layer (nn.Module): created plugin layer - """ - if not isinstance(cfg, dict): - raise TypeError('cfg must be a dict') - if 'type' not in cfg: - raise KeyError('the cfg dict must contain the key "type"') - cfg_ = cfg.copy() - - layer_type = cfg_.pop('type') - if layer_type not in PLUGIN_LAYERS: - raise KeyError(f'Unrecognized plugin type {layer_type}') - - plugin_layer = PLUGIN_LAYERS.get(layer_type) - abbr = infer_abbr(plugin_layer) - - assert isinstance(postfix, (int, str)) - name = abbr + str(postfix) - - layer = plugin_layer(**kwargs, **cfg_) - - return name, layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py deleted file mode 100644 index 584e3b0870fc..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry - -CONV_LAYERS = Registry('conv layer') -NORM_LAYERS = Registry('norm layer') -ACTIVATION_LAYERS = Registry('activation layer') -PADDING_LAYERS = Registry('padding layer') -UPSAMPLE_LAYERS = Registry('upsample layer') -PLUGIN_LAYERS = Registry('plugin layer') - -DROPOUT_LAYERS = Registry('drop out layers') -POSITIONAL_ENCODING = Registry('position encoding') -ATTENTION = Registry('attention') -FEEDFORWARD_NETWORK = Registry('feed-forward Network') -TRANSFORMER_LAYER = Registry('transformerLayer') -TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py deleted file mode 100644 index c905fffcc8bf..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn - - -class Scale(nn.Module): - """A learnable scale parameter. - - This layer scales the input by a learnable factor. It multiplies a - learnable scale parameter of shape (1,) with input of any shape. - - Args: - scale (float): Initial value of scale factor. Default: 1.0 - """ - - def __init__(self, scale=1.0): - super(Scale, self).__init__() - self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) - - def forward(self, x): - return x * self.scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py deleted file mode 100644 index e2ca8ed7b749..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn - -from .registry import ACTIVATION_LAYERS - - -@ACTIVATION_LAYERS.register_module() -class Swish(nn.Module): - """Swish Module. - - This module applies the swish function: - - .. math:: - Swish(x) = x * Sigmoid(x) - - Returns: - Tensor: The output tensor. - """ - - def __init__(self): - super(Swish, self).__init__() - - def forward(self, x): - return x * torch.sigmoid(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py deleted file mode 100644 index 7661266316c8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py +++ /dev/null @@ -1,607 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -import warnings - -import torch -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import ConfigDict, deprecated_api_warning -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - Linear, - build_activation_layer, - build_norm_layer, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.base_module import ( - BaseModule, - ModuleList, - Sequential, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg - -from .drop import build_dropout -from .registry import ( - ATTENTION, - FEEDFORWARD_NETWORK, - POSITIONAL_ENCODING, - TRANSFORMER_LAYER, - TRANSFORMER_LAYER_SEQUENCE, -) - -# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file -try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import ( # noqa F401 - MultiScaleDeformableAttention, - ) - - warnings.warn( - ImportWarning( - '``MultiScaleDeformableAttention`` has been moved to ' - '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 - '``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 - 'to ``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 - ) - ) - -except ImportError: - warnings.warn( - 'Fail to import ``MultiScaleDeformableAttention`` from ' - '``mmcv.ops.multi_scale_deform_attn``, ' - 'You should install ``mmcv-full`` if you need this module. ' - ) - - -def build_positional_encoding(cfg, default_args=None): - """Builder for Position Encoding.""" - return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) - - -def build_attention(cfg, default_args=None): - """Builder for attention.""" - return build_from_cfg(cfg, ATTENTION, default_args) - - -def build_feedforward_network(cfg, default_args=None): - """Builder for feed-forward network (FFN).""" - return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) - - -def build_transformer_layer(cfg, default_args=None): - """Builder for transformer layer.""" - return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) - - -def build_transformer_layer_sequence(cfg, default_args=None): - """Builder for transformer encoder and transformer decoder.""" - return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) - - -@ATTENTION.register_module() -class MultiheadAttention(BaseModule): - """A wrapper for ``torch.nn.MultiheadAttention``. - - This module implements MultiheadAttention with identity connection, - and positional encoding is also passed as input. - - Args: - embed_dims (int): The embedding dimension. - num_heads (int): Parallel attention heads. - attn_drop (float): A Dropout layer on attn_output_weights. - Default: 0.0. - proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. - Default: 0.0. - dropout_layer (obj:`ConfigDict`): The dropout_layer used - when adding the shortcut. - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - batch_first (bool): When it is True, Key, Query and Value are shape of - (batch, n, embed_dim), otherwise (n, batch, embed_dim). - Default to False. - """ - - def __init__( - self, - embed_dims, - num_heads, - attn_drop=0.0, - proj_drop=0.0, - dropout_layer=dict(type='Dropout', drop_prob=0.0), - init_cfg=None, - batch_first=False, - **kwargs, - ): - super(MultiheadAttention, self).__init__(init_cfg) - if 'dropout' in kwargs: - warnings.warn( - 'The arguments `dropout` in MultiheadAttention ' - 'has been deprecated, now you can separately ' - 'set `attn_drop`(float), proj_drop(float), ' - 'and `dropout_layer`(dict) ' - ) - attn_drop = kwargs['dropout'] - dropout_layer['drop_prob'] = kwargs.pop('dropout') - - self.embed_dims = embed_dims - self.num_heads = num_heads - self.batch_first = batch_first - - self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, **kwargs) - - self.proj_drop = nn.Dropout(proj_drop) - self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else nn.Identity() - - @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiheadAttention') - def forward( - self, - query, - key=None, - value=None, - identity=None, - query_pos=None, - key_pos=None, - attn_mask=None, - key_padding_mask=None, - **kwargs, - ): - """Forward function for `MultiheadAttention`. - - **kwargs allow passing a more general data flow when combining - with other operations in `transformerlayer`. - - Args: - query (Tensor): The input query with shape [num_queries, bs, - embed_dims] if self.batch_first is False, else - [bs, num_queries embed_dims]. - key (Tensor): The key tensor with shape [num_keys, bs, - embed_dims] if self.batch_first is False, else - [bs, num_keys, embed_dims] . - If None, the ``query`` will be used. Defaults to None. - value (Tensor): The value tensor with same shape as `key`. - Same in `nn.MultiheadAttention.forward`. Defaults to None. - If None, the `key` will be used. - identity (Tensor): This tensor, with the same shape as x, - will be used for the identity link. - If None, `x` will be used. Defaults to None. - query_pos (Tensor): The positional encoding for query, with - the same shape as `x`. If not None, it will - be added to `x` before forward function. Defaults to None. - key_pos (Tensor): The positional encoding for `key`, with the - same shape as `key`. Defaults to None. If not None, it will - be added to `key` before forward function. If None, and - `query_pos` has the same shape as `key`, then `query_pos` - will be used for `key_pos`. Defaults to None. - attn_mask (Tensor): ByteTensor mask with shape [num_queries, - num_keys]. Same in `nn.MultiheadAttention.forward`. - Defaults to None. - key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. - Defaults to None. - - Returns: - Tensor: forwarded results with shape - [num_queries, bs, embed_dims] - if self.batch_first is False, else - [bs, num_queries embed_dims]. - """ - - if key is None: - key = query - if value is None: - value = key - if identity is None: - identity = query - if key_pos is None: - if query_pos is not None: - # use query_pos if key_pos is not available - if query_pos.shape == key.shape: - key_pos = query_pos - else: - warnings.warn(f'position encoding of key is' f'missing in {self.__class__.__name__}.') - if query_pos is not None: - query = query + query_pos - if key_pos is not None: - key = key + key_pos - - # Because the dataflow('key', 'query', 'value') of - # ``torch.nn.MultiheadAttention`` is (num_query, batch, - # embed_dims), We should adjust the shape of dataflow from - # batch_first (batch, num_query, embed_dims) to num_query_first - # (num_query ,batch, embed_dims), and recover ``attn_output`` - # from num_query_first to batch_first. - if self.batch_first: - query = query.transpose(0, 1) - key = key.transpose(0, 1) - value = value.transpose(0, 1) - - out = self.attn(query=query, key=key, value=value, attn_mask=attn_mask, key_padding_mask=key_padding_mask)[0] - - if self.batch_first: - out = out.transpose(0, 1) - - return identity + self.dropout_layer(self.proj_drop(out)) - - -@FEEDFORWARD_NETWORK.register_module() -class FFN(BaseModule): - """Implements feed-forward networks (FFNs) with identity connection. - - Args: - embed_dims (int): The feature dimension. Same as - `MultiheadAttention`. Defaults: 256. - feedforward_channels (int): The hidden dimension of FFNs. - Defaults: 1024. - num_fcs (int, optional): The number of fully-connected layers in - FFNs. Default: 2. - act_cfg (dict, optional): The activation config for FFNs. - Default: dict(type='ReLU') - ffn_drop (float, optional): Probability of an element to be - zeroed in FFN. Default 0.0. - add_identity (bool, optional): Whether to add the - identity connection. Default: `True`. - dropout_layer (obj:`ConfigDict`): The dropout_layer used - when adding the shortcut. - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - """ - - @deprecated_api_warning({'dropout': 'ffn_drop', 'add_residual': 'add_identity'}, cls_name='FFN') - def __init__( - self, - embed_dims=256, - feedforward_channels=1024, - num_fcs=2, - act_cfg=dict(type='ReLU', inplace=True), - ffn_drop=0.0, - dropout_layer=None, - add_identity=True, - init_cfg=None, - **kwargs, - ): - super(FFN, self).__init__(init_cfg) - assert num_fcs >= 2, 'num_fcs should be no less ' f'than 2. got {num_fcs}.' - self.embed_dims = embed_dims - self.feedforward_channels = feedforward_channels - self.num_fcs = num_fcs - self.act_cfg = act_cfg - self.activate = build_activation_layer(act_cfg) - - layers = [] - in_channels = embed_dims - for _ in range(num_fcs - 1): - layers.append(Sequential(Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(ffn_drop))) - in_channels = feedforward_channels - layers.append(Linear(feedforward_channels, embed_dims)) - layers.append(nn.Dropout(ffn_drop)) - self.layers = Sequential(*layers) - self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else torch.nn.Identity() - self.add_identity = add_identity - - @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') - def forward(self, x, identity=None): - """Forward function for `FFN`. - - The function would add x to the output tensor if residue is None. - """ - out = self.layers(x) - if not self.add_identity: - return self.dropout_layer(out) - if identity is None: - identity = x - return identity + self.dropout_layer(out) - - -@TRANSFORMER_LAYER.register_module() -class BaseTransformerLayer(BaseModule): - """Base `TransformerLayer` for vision transformer. - - It can be built from `mmcv.ConfigDict` and support more flexible - customization, for example, using any number of `FFN or LN ` and - use different kinds of `attention` by specifying a list of `ConfigDict` - named `attn_cfgs`. It is worth mentioning that it supports `prenorm` - when you specifying `norm` as the first element of `operation_order`. - More details about the `prenorm`: `On Layer Normalization in the - Transformer Architecture `_ . - - Args: - attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): - Configs for `self_attention` or `cross_attention` modules, - The order of the configs in the list should be consistent with - corresponding attentions in operation_order. - If it is a dict, all of the attention modules in operation_order - will be built with this config. Default: None. - ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): - Configs for FFN, The order of the configs in the list should be - consistent with corresponding ffn in operation_order. - If it is a dict, all of the attention modules in operation_order - will be built with this config. - operation_order (tuple[str]): The execution order of operation - in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). - Support `prenorm` when you specifying first element as `norm`. - Default:None. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='LN'). - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - batch_first (bool): Key, Query and Value are shape - of (batch, n, embed_dim) - or (n, batch, embed_dim). Default to False. - """ - - def __init__( - self, - attn_cfgs=None, - ffn_cfgs=dict( - type='FFN', - embed_dims=256, - feedforward_channels=1024, - num_fcs=2, - ffn_drop=0.0, - act_cfg=dict(type='ReLU', inplace=True), - ), - operation_order=None, - norm_cfg=dict(type='LN'), - init_cfg=None, - batch_first=False, - **kwargs, - ): - - deprecated_args = dict( - feedforward_channels='feedforward_channels', ffn_dropout='ffn_drop', ffn_num_fcs='num_fcs' - ) - for ori_name, new_name in deprecated_args.items(): - if ori_name in kwargs: - warnings.warn( - f'The arguments `{ori_name}` in BaseTransformerLayer ' - f'has been deprecated, now you should set `{new_name}` ' - f'and other FFN related arguments ' - f'to a dict named `ffn_cfgs`. ' - ) - ffn_cfgs[new_name] = kwargs[ori_name] - - super(BaseTransformerLayer, self).__init__(init_cfg) - - self.batch_first = batch_first - - assert set(operation_order) & set(['self_attn', 'norm', 'ffn', 'cross_attn']) == set(operation_order), ( - f'The operation_order of' - f' {self.__class__.__name__} should ' - f'contains all four operation type ' - f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" - ) - - num_attn = operation_order.count('self_attn') + operation_order.count('cross_attn') - if isinstance(attn_cfgs, dict): - attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] - else: - assert num_attn == len(attn_cfgs), ( - f'The length ' - f'of attn_cfg {num_attn} is ' - f'not consistent with the number of attention' - f'in operation_order {operation_order}.' - ) - - self.num_attn = num_attn - self.operation_order = operation_order - self.norm_cfg = norm_cfg - self.pre_norm = operation_order[0] == 'norm' - self.attentions = ModuleList() - - index = 0 - for operation_name in operation_order: - if operation_name in ['self_attn', 'cross_attn']: - if 'batch_first' in attn_cfgs[index]: - assert self.batch_first == attn_cfgs[index]['batch_first'] - else: - attn_cfgs[index]['batch_first'] = self.batch_first - attention = build_attention(attn_cfgs[index]) - # Some custom attentions used as `self_attn` - # or `cross_attn` can have different behavior. - attention.operation_name = operation_name - self.attentions.append(attention) - index += 1 - - self.embed_dims = self.attentions[0].embed_dims - - self.ffns = ModuleList() - num_ffns = operation_order.count('ffn') - if isinstance(ffn_cfgs, dict): - ffn_cfgs = ConfigDict(ffn_cfgs) - if isinstance(ffn_cfgs, dict): - ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] - assert len(ffn_cfgs) == num_ffns - for ffn_index in range(num_ffns): - if 'embed_dims' not in ffn_cfgs[ffn_index]: - ffn_cfgs['embed_dims'] = self.embed_dims - else: - assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims - self.ffns.append(build_feedforward_network(ffn_cfgs[ffn_index], dict(type='FFN'))) - - self.norms = ModuleList() - num_norms = operation_order.count('norm') - for _ in range(num_norms): - self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) - - def forward( - self, - query, - key=None, - value=None, - query_pos=None, - key_pos=None, - attn_masks=None, - query_key_padding_mask=None, - key_padding_mask=None, - **kwargs, - ): - """Forward function for `TransformerDecoderLayer`. - - **kwargs contains some specific arguments of attentions. - - Args: - query (Tensor): The input query with shape - [num_queries, bs, embed_dims] if - self.batch_first is False, else - [bs, num_queries embed_dims]. - key (Tensor): The key tensor with shape [num_keys, bs, - embed_dims] if self.batch_first is False, else - [bs, num_keys, embed_dims] . - value (Tensor): The value tensor with same shape as `key`. - query_pos (Tensor): The positional encoding for `query`. - Default: None. - key_pos (Tensor): The positional encoding for `key`. - Default: None. - attn_masks (List[Tensor] | None): 2D Tensor used in - calculation of corresponding attention. The length of - it should equal to the number of `attention` in - `operation_order`. Default: None. - query_key_padding_mask (Tensor): ByteTensor for `query`, with - shape [bs, num_queries]. Only used in `self_attn` layer. - Defaults to None. - key_padding_mask (Tensor): ByteTensor for `query`, with - shape [bs, num_keys]. Default: None. - - Returns: - Tensor: forwarded results with shape [num_queries, bs, embed_dims]. - """ - - norm_index = 0 - attn_index = 0 - ffn_index = 0 - identity = query - if attn_masks is None: - attn_masks = [None for _ in range(self.num_attn)] - elif isinstance(attn_masks, torch.Tensor): - attn_masks = [copy.deepcopy(attn_masks) for _ in range(self.num_attn)] - warnings.warn(f'Use same attn_mask in all attentions in ' f'{self.__class__.__name__} ') - else: - assert len(attn_masks) == self.num_attn, ( - f'The length of ' - f'attn_masks {len(attn_masks)} must be equal ' - f'to the number of attention in ' - f'operation_order {self.num_attn}' - ) - - for layer in self.operation_order: - if layer == 'self_attn': - temp_key = temp_value = query - query = self.attentions[attn_index]( - query, - temp_key, - temp_value, - identity if self.pre_norm else None, - query_pos=query_pos, - key_pos=query_pos, - attn_mask=attn_masks[attn_index], - key_padding_mask=query_key_padding_mask, - **kwargs, - ) - attn_index += 1 - identity = query - - elif layer == 'norm': - query = self.norms[norm_index](query) - norm_index += 1 - - elif layer == 'cross_attn': - query = self.attentions[attn_index]( - query, - key, - value, - identity if self.pre_norm else None, - query_pos=query_pos, - key_pos=key_pos, - attn_mask=attn_masks[attn_index], - key_padding_mask=key_padding_mask, - **kwargs, - ) - attn_index += 1 - identity = query - - elif layer == 'ffn': - query = self.ffns[ffn_index](query, identity if self.pre_norm else None) - ffn_index += 1 - - return query - - -@TRANSFORMER_LAYER_SEQUENCE.register_module() -class TransformerLayerSequence(BaseModule): - """Base class for TransformerEncoder and TransformerDecoder in vision - transformer. - - As base-class of Encoder and Decoder in vision transformer. - Support customization such as specifying different kind - of `transformer_layer` in `transformer_coder`. - - Args: - transformerlayer (list[obj:`mmcv.ConfigDict`] | - obj:`mmcv.ConfigDict`): Config of transformerlayer - in TransformerCoder. If it is obj:`mmcv.ConfigDict`, - it would be repeated `num_layer` times to a - list[`mmcv.ConfigDict`]. Default: None. - num_layers (int): The number of `TransformerLayer`. Default: None. - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - """ - - def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): - super(TransformerLayerSequence, self).__init__(init_cfg) - if isinstance(transformerlayers, dict): - transformerlayers = [copy.deepcopy(transformerlayers) for _ in range(num_layers)] - else: - assert isinstance(transformerlayers, list) and len(transformerlayers) == num_layers - self.num_layers = num_layers - self.layers = ModuleList() - for i in range(num_layers): - self.layers.append(build_transformer_layer(transformerlayers[i])) - self.embed_dims = self.layers[0].embed_dims - self.pre_norm = self.layers[0].pre_norm - - def forward( - self, - query, - key, - value, - query_pos=None, - key_pos=None, - attn_masks=None, - query_key_padding_mask=None, - key_padding_mask=None, - **kwargs, - ): - """Forward function for `TransformerCoder`. - - Args: - query (Tensor): Input query with shape - `(num_queries, bs, embed_dims)`. - key (Tensor): The key tensor with shape - `(num_keys, bs, embed_dims)`. - value (Tensor): The value tensor with shape - `(num_keys, bs, embed_dims)`. - query_pos (Tensor): The positional encoding for `query`. - Default: None. - key_pos (Tensor): The positional encoding for `key`. - Default: None. - attn_masks (List[Tensor], optional): Each element is 2D Tensor - which is used in calculation of corresponding attention in - operation_order. Default: None. - query_key_padding_mask (Tensor): ByteTensor for `query`, with - shape [bs, num_queries]. Only used in self-attention - Default: None. - key_padding_mask (Tensor): ByteTensor for `query`, with - shape [bs, num_keys]. Default: None. - - Returns: - Tensor: results with shape [num_queries, bs, embed_dims]. - """ - for layer in self.layers: - query = layer( - query, - key, - value, - query_pos=query_pos, - key_pos=key_pos, - attn_masks=attn_masks, - query_key_padding_mask=query_key_padding_mask, - key_padding_mask=key_padding_mask, - **kwargs, - ) - return query diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py deleted file mode 100644 index f4d0f1fa8291..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn -import torch.nn.functional as F - -from ..utils import xavier_init -from .registry import UPSAMPLE_LAYERS - -UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) -UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) - - -@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') -class PixelShufflePack(nn.Module): - """Pixel Shuffle upsample layer. - - This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to - achieve a simple upsampling with pixel shuffle. - - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - scale_factor (int): Upsample ratio. - upsample_kernel (int): Kernel size of the conv layer to expand the - channels. - """ - - def __init__(self, in_channels, out_channels, scale_factor, upsample_kernel): - super(PixelShufflePack, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.scale_factor = scale_factor - self.upsample_kernel = upsample_kernel - self.upsample_conv = nn.Conv2d( - self.in_channels, - self.out_channels * scale_factor * scale_factor, - self.upsample_kernel, - padding=(self.upsample_kernel - 1) // 2, - ) - self.init_weights() - - def init_weights(self): - xavier_init(self.upsample_conv, distribution='uniform') - - def forward(self, x): - x = self.upsample_conv(x) - x = F.pixel_shuffle(x, self.scale_factor) - return x - - -def build_upsample_layer(cfg, *args, **kwargs): - """Build upsample layer. - - Args: - cfg (dict): The upsample layer config, which should contain: - - - type (str): Layer type. - - scale_factor (int): Upsample ratio, which is not applicable to - deconv. - - layer args: Args needed to instantiate a upsample layer. - args (argument list): Arguments passed to the ``__init__`` - method of the corresponding conv layer. - kwargs (keyword arguments): Keyword arguments passed to the - ``__init__`` method of the corresponding conv layer. - - Returns: - nn.Module: Created upsample layer. - """ - if not isinstance(cfg, dict): - raise TypeError(f'cfg must be a dict, but got {type(cfg)}') - if 'type' not in cfg: - raise KeyError(f'the cfg dict must contain the key "type", but got {cfg}') - cfg_ = cfg.copy() - - layer_type = cfg_.pop('type') - if layer_type not in UPSAMPLE_LAYERS: - raise KeyError(f'Unrecognized upsample type {layer_type}') - else: - upsample = UPSAMPLE_LAYERS.get(layer_type) - - if upsample is nn.Upsample: - cfg_['mode'] = layer_type - layer = upsample(*args, **kwargs, **cfg_) - return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py deleted file mode 100644 index 9028d69d4480..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 - -Wrap some nn modules to support empty tensor input. Currently, these wrappers -are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask -heads are trained on only positive RoIs. -""" -import math - -import torch -import torch.nn as nn -from torch.nn.modules.utils import _pair, _triple - -from .registry import CONV_LAYERS, UPSAMPLE_LAYERS - -if torch.__version__ == 'parrots': - TORCH_VERSION = torch.__version__ -else: - # torch.__version__ could be 1.3.1+cu92, we only need the first two - # for comparison - TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) - - -def obsolete_torch_version(torch_version, version_threshold): - return torch_version == 'parrots' or torch_version <= version_threshold - - -class NewEmptyTensorOp(torch.autograd.Function): - @staticmethod - def forward(ctx, x, new_shape): - ctx.shape = x.shape - return x.new_empty(new_shape) - - @staticmethod - def backward(ctx, grad): - shape = ctx.shape - return NewEmptyTensorOp.apply(grad, shape), None - - -@CONV_LAYERS.register_module('Conv', force=True) -class Conv2d(nn.Conv2d): - def forward(self, x): - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation): - o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) - - -@CONV_LAYERS.register_module('Conv3d', force=True) -class Conv3d(nn.Conv3d): - def forward(self, x): - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation): - o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) - - -@CONV_LAYERS.register_module() -@CONV_LAYERS.register_module('deconv') -@UPSAMPLE_LAYERS.register_module('deconv', force=True) -class ConvTranspose2d(nn.ConvTranspose2d): - def forward(self, x): - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d, op in zip( - x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding - ): - out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) - - -@CONV_LAYERS.register_module() -@CONV_LAYERS.register_module('deconv3d') -@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) -class ConvTranspose3d(nn.ConvTranspose3d): - def forward(self, x): - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): - out_shape = [x.shape[0], self.out_channels] - for i, k, p, s, d, op in zip( - x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding - ): - out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) - - -class MaxPool2d(nn.MaxPool2d): - def forward(self, x): - # PyTorch 1.9 does not support empty tensor inference yet - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): - out_shape = list(x.shape[:2]) - for i, k, p, s, d in zip( - x.shape[-2:], _pair(self.kernel_size), _pair(self.padding), _pair(self.stride), _pair(self.dilation) - ): - o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 - o = math.ceil(o) if self.ceil_mode else math.floor(o) - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - return empty - - return super().forward(x) - - -class MaxPool3d(nn.MaxPool3d): - def forward(self, x): - # PyTorch 1.9 does not support empty tensor inference yet - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): - out_shape = list(x.shape[:2]) - for i, k, p, s, d in zip( - x.shape[-3:], - _triple(self.kernel_size), - _triple(self.padding), - _triple(self.stride), - _triple(self.dilation), - ): - o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 - o = math.ceil(o) if self.ceil_mode else math.floor(o) - out_shape.append(o) - empty = NewEmptyTensorOp.apply(x, out_shape) - return empty - - return super().forward(x) - - -class Linear(torch.nn.Linear): - def forward(self, x): - # empty tensor forward of Linear layer is supported in Pytorch 1.6 - if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): - out_shape = [x.shape[0], self.out_features] - empty = NewEmptyTensorOp.apply(x, out_shape) - if self.training: - # produce dummy gradient to avoid DDP warning. - dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + dummy - else: - return empty - - return super().forward(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py deleted file mode 100644 index 64e378712149..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ..runner import Sequential -from ..utils import Registry, build_from_cfg - - -def build_model_from_cfg(cfg, registry, default_args=None): - """Build a PyTorch model from config dict(s). Different from - ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. - - Args: - cfg (dict, list[dict]): The config of modules, is is either a config - dict or a list of config dicts. If cfg is a list, a - the built modules will be wrapped with ``nn.Sequential``. - registry (:obj:`Registry`): A registry the module belongs to. - default_args (dict, optional): Default arguments to build the module. - Defaults to None. - - Returns: - nn.Module: A built nn module. - """ - if isinstance(cfg, list): - modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg] - return Sequential(*modules) - else: - return build_from_cfg(cfg, registry, default_args) - - -MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py deleted file mode 100644 index a432cd00d4c5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py +++ /dev/null @@ -1,271 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging - -import torch.nn as nn -import torch.utils.checkpoint as cp - -from .utils import constant_init, kaiming_init - - -def conv3x3(in_planes, out_planes, stride=1, dilation=1): - """3x3 convolution with padding.""" - return nn.Conv2d( - in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False - ) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): - super(BasicBlock, self).__init__() - assert style in ['pytorch', 'caffe'] - self.conv1 = conv3x3(inplanes, planes, stride, dilation) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - self.dilation = dilation - assert not with_cp - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): - """Bottleneck block. - - If style is "pytorch", the stride-two layer is the 3x3 conv layer, if - it is "caffe", the stride-two layer is the first 1x1 conv layer. - """ - super(Bottleneck, self).__init__() - assert style in ['pytorch', 'caffe'] - if style == 'pytorch': - conv1_stride = 1 - conv2_stride = stride - else: - conv1_stride = stride - conv2_stride = 1 - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) - self.conv2 = nn.Conv2d( - planes, planes, kernel_size=3, stride=conv2_stride, padding=dilation, dilation=dilation, bias=False - ) - - self.bn1 = nn.BatchNorm2d(planes) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * self.expansion) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - self.dilation = dilation - self.with_cp = with_cp - - def forward(self, x): - def _inner_forward(x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - out = self.relu(out) - - return out - - -def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', with_cp=False): - downsample = None - if stride != 1 or inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(inplanes, planes, stride, dilation, downsample, style=style, with_cp=with_cp)) - inplanes = planes * block.expansion - for _ in range(1, blocks): - layers.append(block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) - - return nn.Sequential(*layers) - - -class ResNet(nn.Module): - """ResNet backbone. - - Args: - depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. - num_stages (int): Resnet stages, normally 4. - strides (Sequence[int]): Strides of the first block of each stage. - dilations (Sequence[int]): Dilation of each stage. - out_indices (Sequence[int]): Output from which stages. - style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two - layer is the 3x3 conv layer, otherwise the stride-two layer is - the first 1x1 conv layer. - frozen_stages (int): Stages to be frozen (all param fixed). -1 means - not freezing any parameters. - bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze - running stats (mean and var). - bn_frozen (bool): Whether to freeze weight and bias of BN layers. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. - """ - - arch_settings = { - 18: (BasicBlock, (2, 2, 2, 2)), - 34: (BasicBlock, (3, 4, 6, 3)), - 50: (Bottleneck, (3, 4, 6, 3)), - 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)), - } - - def __init__( - self, - depth, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 1, 1), - out_indices=(0, 1, 2, 3), - style='pytorch', - frozen_stages=-1, - bn_eval=True, - bn_frozen=False, - with_cp=False, - ): - super(ResNet, self).__init__() - if depth not in self.arch_settings: - raise KeyError(f'invalid depth {depth} for resnet') - assert num_stages >= 1 and num_stages <= 4 - block, stage_blocks = self.arch_settings[depth] - stage_blocks = stage_blocks[:num_stages] - assert len(strides) == len(dilations) == num_stages - assert max(out_indices) < num_stages - - self.out_indices = out_indices - self.style = style - self.frozen_stages = frozen_stages - self.bn_eval = bn_eval - self.bn_frozen = bn_frozen - self.with_cp = with_cp - - self.inplanes = 64 - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.res_layers = [] - for i, num_blocks in enumerate(stage_blocks): - stride = strides[i] - dilation = dilations[i] - planes = 64 * 2 ** i - res_layer = make_res_layer( - block, - self.inplanes, - planes, - num_blocks, - stride=stride, - dilation=dilation, - style=self.style, - with_cp=with_cp, - ) - self.inplanes = planes * block.expansion - layer_name = f'layer{i + 1}' - self.add_module(layer_name, res_layer) - self.res_layers.append(layer_name) - - self.feat_dim = block.expansion * 64 * 2 ** (len(stage_blocks) - 1) - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = logging.getLogger() - from ..runner import load_checkpoint - - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, nn.BatchNorm2d): - constant_init(m, 1) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - outs = [] - for i, layer_name in enumerate(self.res_layers): - res_layer = getattr(self, layer_name) - x = res_layer(x) - if i in self.out_indices: - outs.append(x) - if len(outs) == 1: - return outs[0] - else: - return tuple(outs) - - def train(self, mode=True): - super(ResNet, self).train(mode) - if self.bn_eval: - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eval() - if self.bn_frozen: - for params in m.parameters(): - params.requires_grad = False - if mode and self.frozen_stages >= 0: - for param in self.conv1.parameters(): - param.requires_grad = False - for param in self.bn1.parameters(): - param.requires_grad = False - self.bn1.eval() - self.bn1.weight.requires_grad = False - self.bn1.bias.requires_grad = False - for i in range(1, self.frozen_stages + 1): - mod = getattr(self, f'layer{i}') - mod.eval() - for param in mod.parameters(): - param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py deleted file mode 100644 index 2f1607650cb1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .flops_counter import get_model_complexity_info -from .fuse_conv_bn import fuse_conv_bn -from .sync_bn import revert_sync_batchnorm -from .weight_init import ( - INITIALIZERS, - Caffe2XavierInit, - ConstantInit, - KaimingInit, - NormalInit, - PretrainedInit, - TruncNormalInit, - UniformInit, - XavierInit, - bias_init_with_prob, - caffe2_xavier_init, - constant_init, - initialize, - kaiming_init, - normal_init, - trunc_normal_init, - uniform_init, - xavier_init, -) - -__all__ = [ - 'get_model_complexity_info', - 'bias_init_with_prob', - 'caffe2_xavier_init', - 'constant_init', - 'kaiming_init', - 'normal_init', - 'trunc_normal_init', - 'uniform_init', - 'xavier_init', - 'fuse_conv_bn', - 'initialize', - 'INITIALIZERS', - 'ConstantInit', - 'XavierInit', - 'NormalInit', - 'TruncNormalInit', - 'UniformInit', - 'KaimingInit', - 'PretrainedInit', - 'Caffe2XavierInit', - 'revert_sync_batchnorm', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py deleted file mode 100644 index afbba392fa97..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py +++ /dev/null @@ -1,580 +0,0 @@ -# Modified from flops-counter.pytorch by Vladislav Sovrasov -# original repo: https://github.com/sovrasov/flops-counter.pytorch - -# MIT License - -# Copyright (c) 2018 Vladislav Sovrasov - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import sys -from functools import partial - -import numpy as np -import torch -import torch.nn as nn - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -def get_model_complexity_info( - model, input_shape, print_per_layer_stat=True, as_strings=True, input_constructor=None, flush=False, ost=sys.stdout -): - """Get complexity information of a model. - - This method can calculate FLOPs and parameter counts of a model with - corresponding input shape. It can also print complexity information for - each layer in a model. - - Supported layers are listed as below: - - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. - - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, - ``nn.ReLU6``. - - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, - ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, - ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, - ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, - ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. - - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, - ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, - ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. - - Linear: ``nn.Linear``. - - Deconvolution: ``nn.ConvTranspose2d``. - - Upsample: ``nn.Upsample``. - - Args: - model (nn.Module): The model for complexity calculation. - input_shape (tuple): Input shape used for calculation. - print_per_layer_stat (bool): Whether to print complexity information - for each layer in a model. Default: True. - as_strings (bool): Output FLOPs and params counts in a string form. - Default: True. - input_constructor (None | callable): If specified, it takes a callable - method that generates input. otherwise, it will generate a random - tensor with input shape to calculate FLOPs. Default: None. - flush (bool): same as that in :func:`print`. Default: False. - ost (stream): same as ``file`` param in :func:`print`. - Default: sys.stdout. - - Returns: - tuple[float | str]: If ``as_strings`` is set to True, it will return - FLOPs and parameter counts in a string format. otherwise, it will - return those in a float number format. - """ - assert type(input_shape) is tuple - assert len(input_shape) >= 1 - assert isinstance(model, nn.Module) - flops_model = add_flops_counting_methods(model) - flops_model.eval() - flops_model.start_flops_count() - if input_constructor: - input = input_constructor(input_shape) - _ = flops_model(**input) - else: - try: - batch = torch.ones(()).new_empty( - (1, *input_shape), - dtype=next(flops_model.parameters()).dtype, - device=next(flops_model.parameters()).device, - ) - except StopIteration: - # Avoid StopIteration for models which have no parameters, - # like `nn.Relu()`, `nn.AvgPool2d`, etc. - batch = torch.ones(()).new_empty((1, *input_shape)) - - _ = flops_model(batch) - - flops_count, params_count = flops_model.compute_average_flops_cost() - if print_per_layer_stat: - print_model_with_flops(flops_model, flops_count, params_count, ost=ost, flush=flush) - flops_model.stop_flops_count() - - if as_strings: - return flops_to_string(flops_count), params_to_string(params_count) - - return flops_count, params_count - - -def flops_to_string(flops, units='GFLOPs', precision=2): - """Convert FLOPs number into a string. - - Note that Here we take a multiply-add counts as one FLOP. - - Args: - flops (float): FLOPs number to be converted. - units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', - 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically - choose the most suitable unit for FLOPs. Default: 'GFLOPs'. - precision (int): Digit number after the decimal point. Default: 2. - - Returns: - str: The converted FLOPs number with units. - - Examples: - >>> flops_to_string(1e9) - '1.0 GFLOPs' - >>> flops_to_string(2e5, 'MFLOPs') - '0.2 MFLOPs' - >>> flops_to_string(3e-9, None) - '3e-09 FLOPs' - """ - if units is None: - if flops // 10 ** 9 > 0: - return str(round(flops / 10.0 ** 9, precision)) + ' GFLOPs' - elif flops // 10 ** 6 > 0: - return str(round(flops / 10.0 ** 6, precision)) + ' MFLOPs' - elif flops // 10 ** 3 > 0: - return str(round(flops / 10.0 ** 3, precision)) + ' KFLOPs' - else: - return str(flops) + ' FLOPs' - else: - if units == 'GFLOPs': - return str(round(flops / 10.0 ** 9, precision)) + ' ' + units - elif units == 'MFLOPs': - return str(round(flops / 10.0 ** 6, precision)) + ' ' + units - elif units == 'KFLOPs': - return str(round(flops / 10.0 ** 3, precision)) + ' ' + units - else: - return str(flops) + ' FLOPs' - - -def params_to_string(num_params, units=None, precision=2): - """Convert parameter number into a string. - - Args: - num_params (float): Parameter number to be converted. - units (str | None): Converted FLOPs units. Options are None, 'M', - 'K' and ''. If set to None, it will automatically choose the most - suitable unit for Parameter number. Default: None. - precision (int): Digit number after the decimal point. Default: 2. - - Returns: - str: The converted parameter number with units. - - Examples: - >>> params_to_string(1e9) - '1000.0 M' - >>> params_to_string(2e5) - '200.0 k' - >>> params_to_string(3e-9) - '3e-09' - """ - if units is None: - if num_params // 10 ** 6 > 0: - return str(round(num_params / 10 ** 6, precision)) + ' M' - elif num_params // 10 ** 3: - return str(round(num_params / 10 ** 3, precision)) + ' k' - else: - return str(num_params) - else: - if units == 'M': - return str(round(num_params / 10.0 ** 6, precision)) + ' ' + units - elif units == 'K': - return str(round(num_params / 10.0 ** 3, precision)) + ' ' + units - else: - return str(num_params) - - -def print_model_with_flops(model, total_flops, total_params, units='GFLOPs', precision=3, ost=sys.stdout, flush=False): - """Print a model with FLOPs for each layer. - - Args: - model (nn.Module): The model to be printed. - total_flops (float): Total FLOPs of the model. - total_params (float): Total parameter counts of the model. - units (str | None): Converted FLOPs units. Default: 'GFLOPs'. - precision (int): Digit number after the decimal point. Default: 3. - ost (stream): same as `file` param in :func:`print`. - Default: sys.stdout. - flush (bool): same as that in :func:`print`. Default: False. - - Example: - >>> class ExampleModel(nn.Module): - - >>> def __init__(self): - >>> super().__init__() - >>> self.conv1 = nn.Conv2d(3, 8, 3) - >>> self.conv2 = nn.Conv2d(8, 256, 3) - >>> self.conv3 = nn.Conv2d(256, 8, 3) - >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) - >>> self.flatten = nn.Flatten() - >>> self.fc = nn.Linear(8, 1) - - >>> def forward(self, x): - >>> x = self.conv1(x) - >>> x = self.conv2(x) - >>> x = self.conv3(x) - >>> x = self.avg_pool(x) - >>> x = self.flatten(x) - >>> x = self.fc(x) - >>> return x - - >>> model = ExampleModel() - >>> x = (3, 16, 16) - to print the complexity information state for each layer, you can use - >>> get_model_complexity_info(model, x) - or directly use - >>> print_model_with_flops(model, 4579784.0, 37361) - ExampleModel( - 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, - (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 - (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) - (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) - (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) - (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) - (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) - ) - """ - - def accumulate_params(self): - if is_supported_instance(self): - return self.__params__ - else: - sum = 0 - for m in self.children(): - sum += m.accumulate_params() - return sum - - def accumulate_flops(self): - if is_supported_instance(self): - return self.__flops__ / model.__batch_counter__ - else: - sum = 0 - for m in self.children(): - sum += m.accumulate_flops() - return sum - - def flops_repr(self): - accumulated_num_params = self.accumulate_params() - accumulated_flops_cost = self.accumulate_flops() - return ', '.join( - [ - params_to_string(accumulated_num_params, units='M', precision=precision), - '{:.3%} Params'.format(accumulated_num_params / total_params), - flops_to_string(accumulated_flops_cost, units=units, precision=precision), - '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), - self.original_extra_repr(), - ] - ) - - def add_extra_repr(m): - m.accumulate_flops = accumulate_flops.__get__(m) - m.accumulate_params = accumulate_params.__get__(m) - flops_extra_repr = flops_repr.__get__(m) - if m.extra_repr != flops_extra_repr: - m.original_extra_repr = m.extra_repr - m.extra_repr = flops_extra_repr - assert m.extra_repr != m.original_extra_repr - - def del_extra_repr(m): - if hasattr(m, 'original_extra_repr'): - m.extra_repr = m.original_extra_repr - del m.original_extra_repr - if hasattr(m, 'accumulate_flops'): - del m.accumulate_flops - - model.apply(add_extra_repr) - print(model, file=ost, flush=flush) - model.apply(del_extra_repr) - - -def get_model_parameters_number(model): - """Calculate parameter number of a model. - - Args: - model (nn.module): The model for parameter number calculation. - - Returns: - float: Parameter number of the model. - """ - num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) - return num_params - - -def add_flops_counting_methods(net_main_module): - # adding additional methods to the existing module object, - # this is done this way so that each function has access to self object - net_main_module.start_flops_count = start_flops_count.__get__(net_main_module) - net_main_module.stop_flops_count = stop_flops_count.__get__(net_main_module) - net_main_module.reset_flops_count = reset_flops_count.__get__(net_main_module) - net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__(net_main_module) # noqa: E501 - - net_main_module.reset_flops_count() - - return net_main_module - - -def compute_average_flops_cost(self): - """Compute average FLOPs cost. - - A method to compute average FLOPs cost, which will be available after - `add_flops_counting_methods()` is called on a desired net object. - - Returns: - float: Current mean flops consumption per image. - """ - batches_count = self.__batch_counter__ - flops_sum = 0 - for module in self.modules(): - if is_supported_instance(module): - flops_sum += module.__flops__ - params_sum = get_model_parameters_number(self) - return flops_sum / batches_count, params_sum - - -def start_flops_count(self): - """Activate the computation of mean flops consumption per image. - - A method to activate the computation of mean flops consumption per image. - which will be available after ``add_flops_counting_methods()`` is called on - a desired net object. It should be called before running the network. - """ - add_batch_counter_hook_function(self) - - def add_flops_counter_hook_function(module): - if is_supported_instance(module): - if hasattr(module, '__flops_handle__'): - return - - else: - handle = module.register_forward_hook(get_modules_mapping()[type(module)]) - - module.__flops_handle__ = handle - - self.apply(partial(add_flops_counter_hook_function)) - - -def stop_flops_count(self): - """Stop computing the mean flops consumption per image. - - A method to stop computing the mean flops consumption per image, which will - be available after ``add_flops_counting_methods()`` is called on a desired - net object. It can be called to pause the computation whenever. - """ - remove_batch_counter_hook_function(self) - self.apply(remove_flops_counter_hook_function) - - -def reset_flops_count(self): - """Reset statistics computed so far. - - A method to Reset computed statistics, which will be available after - `add_flops_counting_methods()` is called on a desired net object. - """ - add_batch_counter_variables_or_reset(self) - self.apply(add_flops_counter_variable_or_reset) - - -# ---- Internal functions -def empty_flops_counter_hook(module, input, output): - module.__flops__ += 0 - - -def upsample_flops_counter_hook(module, input, output): - output_size = output[0] - batch_size = output_size.shape[0] - output_elements_count = batch_size - for val in output_size.shape[1:]: - output_elements_count *= val - module.__flops__ += int(output_elements_count) - - -def relu_flops_counter_hook(module, input, output): - active_elements_count = output.numel() - module.__flops__ += int(active_elements_count) - - -def linear_flops_counter_hook(module, input, output): - input = input[0] - output_last_dim = output.shape[-1] # pytorch checks dimensions, so here we don't care much - module.__flops__ += int(np.prod(input.shape) * output_last_dim) - - -def pool_flops_counter_hook(module, input, output): - input = input[0] - module.__flops__ += int(np.prod(input.shape)) - - -def norm_flops_counter_hook(module, input, output): - input = input[0] - - batch_flops = np.prod(input.shape) - if getattr(module, 'affine', False) or getattr(module, 'elementwise_affine', False): - batch_flops *= 2 - module.__flops__ += int(batch_flops) - - -def deconv_flops_counter_hook(conv_module, input, output): - # Can have multiple inputs, getting the first one - input = input[0] - - batch_size = input.shape[0] - input_height, input_width = input.shape[2:] - - kernel_height, kernel_width = conv_module.kernel_size - in_channels = conv_module.in_channels - out_channels = conv_module.out_channels - groups = conv_module.groups - - filters_per_channel = out_channels // groups - conv_per_position_flops = kernel_height * kernel_width * in_channels * filters_per_channel - - active_elements_count = batch_size * input_height * input_width - overall_conv_flops = conv_per_position_flops * active_elements_count - bias_flops = 0 - if conv_module.bias is not None: - output_height, output_width = output.shape[2:] - bias_flops = out_channels * batch_size * output_height * output_height - overall_flops = overall_conv_flops + bias_flops - - conv_module.__flops__ += int(overall_flops) - - -def conv_flops_counter_hook(conv_module, input, output): - # Can have multiple inputs, getting the first one - input = input[0] - - batch_size = input.shape[0] - output_dims = list(output.shape[2:]) - - kernel_dims = list(conv_module.kernel_size) - in_channels = conv_module.in_channels - out_channels = conv_module.out_channels - groups = conv_module.groups - - filters_per_channel = out_channels // groups - conv_per_position_flops = int(np.prod(kernel_dims)) * in_channels * filters_per_channel - - active_elements_count = batch_size * int(np.prod(output_dims)) - - overall_conv_flops = conv_per_position_flops * active_elements_count - - bias_flops = 0 - - if conv_module.bias is not None: - - bias_flops = out_channels * active_elements_count - - overall_flops = overall_conv_flops + bias_flops - - conv_module.__flops__ += int(overall_flops) - - -def batch_counter_hook(module, input, output): - batch_size = 1 - if len(input) > 0: - # Can have multiple inputs, getting the first one - input = input[0] - batch_size = len(input) - else: - pass - print('Warning! No positional inputs found for a module, ' 'assuming batch size is 1.') - module.__batch_counter__ += batch_size - - -def add_batch_counter_variables_or_reset(module): - - module.__batch_counter__ = 0 - - -def add_batch_counter_hook_function(module): - if hasattr(module, '__batch_counter_handle__'): - return - - handle = module.register_forward_hook(batch_counter_hook) - module.__batch_counter_handle__ = handle - - -def remove_batch_counter_hook_function(module): - if hasattr(module, '__batch_counter_handle__'): - module.__batch_counter_handle__.remove() - del module.__batch_counter_handle__ - - -def add_flops_counter_variable_or_reset(module): - if is_supported_instance(module): - if hasattr(module, '__flops__') or hasattr(module, '__params__'): - print( - 'Warning: variables __flops__ or __params__ are already ' - 'defined for the module' + type(module).__name__ + ' ptflops can affect your code!' - ) - module.__flops__ = 0 - module.__params__ = get_model_parameters_number(module) - - -def is_supported_instance(module): - if type(module) in get_modules_mapping(): - return True - return False - - -def remove_flops_counter_hook_function(module): - if is_supported_instance(module): - if hasattr(module, '__flops_handle__'): - module.__flops_handle__.remove() - del module.__flops_handle__ - - -def get_modules_mapping(): - return { - # convolutions - nn.Conv1d: conv_flops_counter_hook, - nn.Conv2d: conv_flops_counter_hook, - mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, - nn.Conv3d: conv_flops_counter_hook, - mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, - # activations - nn.ReLU: relu_flops_counter_hook, - nn.PReLU: relu_flops_counter_hook, - nn.ELU: relu_flops_counter_hook, - nn.LeakyReLU: relu_flops_counter_hook, - nn.ReLU6: relu_flops_counter_hook, - # poolings - nn.MaxPool1d: pool_flops_counter_hook, - nn.AvgPool1d: pool_flops_counter_hook, - nn.AvgPool2d: pool_flops_counter_hook, - nn.MaxPool2d: pool_flops_counter_hook, - mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, - nn.MaxPool3d: pool_flops_counter_hook, - mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, - nn.AvgPool3d: pool_flops_counter_hook, - nn.AdaptiveMaxPool1d: pool_flops_counter_hook, - nn.AdaptiveAvgPool1d: pool_flops_counter_hook, - nn.AdaptiveMaxPool2d: pool_flops_counter_hook, - nn.AdaptiveAvgPool2d: pool_flops_counter_hook, - nn.AdaptiveMaxPool3d: pool_flops_counter_hook, - nn.AdaptiveAvgPool3d: pool_flops_counter_hook, - # normalizations - nn.BatchNorm1d: norm_flops_counter_hook, - nn.BatchNorm2d: norm_flops_counter_hook, - nn.BatchNorm3d: norm_flops_counter_hook, - nn.GroupNorm: norm_flops_counter_hook, - nn.InstanceNorm1d: norm_flops_counter_hook, - nn.InstanceNorm2d: norm_flops_counter_hook, - nn.InstanceNorm3d: norm_flops_counter_hook, - nn.LayerNorm: norm_flops_counter_hook, - # FC - nn.Linear: linear_flops_counter_hook, - mmcv.cnn.bricks.Linear: linear_flops_counter_hook, - # Upscale - nn.Upsample: upsample_flops_counter_hook, - # Deconvolution - nn.ConvTranspose2d: deconv_flops_counter_hook, - mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, - } diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py deleted file mode 100644 index 33dd13e18826..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn - - -def _fuse_conv_bn(conv, bn): - """Fuse conv and bn into one module. - - Args: - conv (nn.Module): Conv to be fused. - bn (nn.Module): BN to be fused. - - Returns: - nn.Module: Fused module. - """ - conv_w = conv.weight - conv_b = conv.bias if conv.bias is not None else torch.zeros_like(bn.running_mean) - - factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) - conv.weight = nn.Parameter(conv_w * factor.reshape([conv.out_channels, 1, 1, 1])) - conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) - return conv - - -def fuse_conv_bn(module): - """Recursively fuse conv and bn in a module. - - During inference, the functionary of batch norm layers is turned off - but only the mean and var alone channels are used, which exposes the - chance to fuse it with the preceding conv layers to save computations and - simplify network structures. - - Args: - module (nn.Module): Module to be fused. - - Returns: - nn.Module: Fused module. - """ - last_conv = None - last_conv_name = None - - for name, child in module.named_children(): - if isinstance(child, (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): - if last_conv is None: # only fuse BN that is after Conv - continue - fused_conv = _fuse_conv_bn(last_conv, child) - module._modules[last_conv_name] = fused_conv - # To reduce changes, set BN as Identity instead of deleting it. - module._modules[name] = nn.Identity() - last_conv = None - elif isinstance(child, nn.Conv2d): - last_conv = child - last_conv_name = name - else: - fuse_conv_bn(child) - return module diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py deleted file mode 100644 index d88b7e476317..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py +++ /dev/null @@ -1,59 +0,0 @@ -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): - """A general BatchNorm layer without input dimension check. - - Reproduced from @kapily's work: - (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) - The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc - is `_check_input_dim` that is designed for tensor sanity checks. - The check has been bypassed in this class for the convenience of converting - SyncBatchNorm. - """ - - def _check_input_dim(self, input): - return - - -def revert_sync_batchnorm(module): - """Helper function to convert all `SyncBatchNorm` (SyncBN) and - `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to - `BatchNormXd` layers. - - Adapted from @kapily's work: - (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) - - Args: - module (nn.Module): The module containing `SyncBatchNorm` layers. - - Returns: - module_output: The converted module with `BatchNormXd` layers. - """ - module_output = module - module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] - if hasattr(mmcv, 'ops'): - module_checklist.append(mmcv.ops.SyncBatchNorm) - if isinstance(module, tuple(module_checklist)): - module_output = _BatchNormXd( - module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats - ) - if module.affine: - # no_grad() may not be needed here but - # just to be consistent with `convert_sync_batchnorm()` - with torch.no_grad(): - module_output.weight = module.weight - module_output.bias = module.bias - module_output.running_mean = module.running_mean - module_output.running_var = module.running_var - module_output.num_batches_tracked = module.num_batches_tracked - module_output.training = module.training - # qconfig exists in quantized models - if hasattr(module, 'qconfig'): - module_output.qconfig = module.qconfig - for name, child in module.named_children(): - module_output.add_module(name, revert_sync_batchnorm(child)) - del module - return module_output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py deleted file mode 100644 index aa5047e743cb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py +++ /dev/null @@ -1,644 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -import math -import warnings - -import numpy as np -import torch -import torch.nn as nn -from torch import Tensor - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( - Registry, - build_from_cfg, - get_logger, - print_log, -) - -INITIALIZERS = Registry('initializer') - - -def update_init_info(module, init_info): - """Update the `_params_init_info` in the module if the value of parameters - are changed. - - Args: - module (obj:`nn.Module`): The module of PyTorch with a user-defined - attribute `_params_init_info` which records the initialization - information. - init_info (str): The string that describes the initialization. - """ - assert hasattr(module, '_params_init_info'), f'Can not find `_params_init_info` in {module}' - for name, param in module.named_parameters(): - - assert param in module._params_init_info, ( - f'Find a new :obj:`Parameter` ' - f'named `{name}` during executing the ' - f'`init_weights` of ' - f'`{module.__class__.__name__}`. ' - f'Please do not add or ' - f'replace parameters during executing ' - f'the `init_weights`. ' - ) - - # The parameter has been changed during executing the - # `init_weights` of module - mean_value = param.data.mean() - if module._params_init_info[param]['tmp_mean_value'] != mean_value: - module._params_init_info[param]['init_info'] = init_info - module._params_init_info[param]['tmp_mean_value'] = mean_value - - -def constant_init(module, val, bias=0): - if hasattr(module, 'weight') and module.weight is not None: - nn.init.constant_(module.weight, val) - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) - - -def xavier_init(module, gain=1, bias=0, distribution='normal'): - assert distribution in ['uniform', 'normal'] - if hasattr(module, 'weight') and module.weight is not None: - if distribution == 'uniform': - nn.init.xavier_uniform_(module.weight, gain=gain) - else: - nn.init.xavier_normal_(module.weight, gain=gain) - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) - - -def normal_init(module, mean=0, std=1, bias=0): - if hasattr(module, 'weight') and module.weight is not None: - nn.init.normal_(module.weight, mean, std) - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) - - -def trunc_normal_init( - module: nn.Module, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, bias: float = 0 -) -> None: - if hasattr(module, 'weight') and module.weight is not None: - trunc_normal_(module.weight, mean, std, a, b) # type: ignore - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) # type: ignore - - -def uniform_init(module, a=0, b=1, bias=0): - if hasattr(module, 'weight') and module.weight is not None: - nn.init.uniform_(module.weight, a, b) - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) - - -def kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'): - assert distribution in ['uniform', 'normal'] - if hasattr(module, 'weight') and module.weight is not None: - if distribution == 'uniform': - nn.init.kaiming_uniform_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) - else: - nn.init.kaiming_normal_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) - if hasattr(module, 'bias') and module.bias is not None: - nn.init.constant_(module.bias, bias) - - -def caffe2_xavier_init(module, bias=0): - # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch - # Acknowledgment to FAIR's internal code - kaiming_init(module, a=1, mode='fan_in', nonlinearity='leaky_relu', bias=bias, distribution='uniform') - - -def bias_init_with_prob(prior_prob): - """initialize conv/fc bias value according to a given probability value.""" - bias_init = float(-np.log((1 - prior_prob) / prior_prob)) - return bias_init - - -def _get_bases_name(m): - return [b.__name__ for b in m.__class__.__bases__] - - -class BaseInit(object): - def __init__(self, *, bias=0, bias_prob=None, layer=None): - self.wholemodule = False - if not isinstance(bias, (int, float)): - raise TypeError(f'bias must be a number, but got a {type(bias)}') - - if bias_prob is not None: - if not isinstance(bias_prob, float): - raise TypeError( - f'bias_prob type must be float, \ - but got {type(bias_prob)}' - ) - - if layer is not None: - if not isinstance(layer, (str, list)): - raise TypeError( - f'layer must be a str or a list of str, \ - but got a {type(layer)}' - ) - else: - layer = [] - - if bias_prob is not None: - self.bias = bias_init_with_prob(bias_prob) - else: - self.bias = bias - self.layer = [layer] if isinstance(layer, str) else layer - - def _get_init_info(self): - info = f'{self.__class__.__name__}, bias={self.bias}' - return info - - -@INITIALIZERS.register_module(name='Constant') -class ConstantInit(BaseInit): - """Initialize module parameters with constant values. - - Args: - val (int | float): the value to fill the weights in the module with - bias (int | float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - """ - - def __init__(self, val, **kwargs): - super().__init__(**kwargs) - self.val = val - - def __call__(self, module): - def init(m): - if self.wholemodule: - constant_init(m, self.val, self.bias) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - constant_init(m, self.val, self.bias) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' - return info - - -@INITIALIZERS.register_module(name='Xavier') -class XavierInit(BaseInit): - r"""Initialize module parameters with values according to the method - described in `Understanding the difficulty of training deep feedforward - neural networks - Glorot, X. & Bengio, Y. (2010). - `_ - - Args: - gain (int | float): an optional scaling factor. Defaults to 1. - bias (int | float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - distribution (str): distribution either be ``'normal'`` - or ``'uniform'``. Defaults to ``'normal'``. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - """ - - def __init__(self, gain=1, distribution='normal', **kwargs): - super().__init__(**kwargs) - self.gain = gain - self.distribution = distribution - - def __call__(self, module): - def init(m): - if self.wholemodule: - xavier_init(m, self.gain, self.bias, self.distribution) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - xavier_init(m, self.gain, self.bias, self.distribution) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = f'{self.__class__.__name__}: gain={self.gain}, ' f'distribution={self.distribution}, bias={self.bias}' - return info - - -@INITIALIZERS.register_module(name='Normal') -class NormalInit(BaseInit): - r"""Initialize module parameters with the values drawn from the normal - distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. - - Args: - mean (int | float):the mean of the normal distribution. Defaults to 0. - std (int | float): the standard deviation of the normal distribution. - Defaults to 1. - bias (int | float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - - """ - - def __init__(self, mean=0, std=1, **kwargs): - super().__init__(**kwargs) - self.mean = mean - self.std = std - - def __call__(self, module): - def init(m): - if self.wholemodule: - normal_init(m, self.mean, self.std, self.bias) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - normal_init(m, self.mean, self.std, self.bias) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = f'{self.__class__.__name__}: mean={self.mean},' f' std={self.std}, bias={self.bias}' - return info - - -@INITIALIZERS.register_module(name='TruncNormal') -class TruncNormalInit(BaseInit): - r"""Initialize module parameters with the values drawn from the normal - distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values - outside :math:`[a, b]`. - - Args: - mean (float): the mean of the normal distribution. Defaults to 0. - std (float): the standard deviation of the normal distribution. - Defaults to 1. - a (float): The minimum cutoff value. - b ( float): The maximum cutoff value. - bias (float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - - """ - - def __init__(self, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, **kwargs) -> None: - super().__init__(**kwargs) - self.mean = mean - self.std = std - self.a = a - self.b = b - - def __call__(self, module: nn.Module) -> None: - def init(m): - if self.wholemodule: - trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = ( - f'{self.__class__.__name__}: a={self.a}, b={self.b},' - f' mean={self.mean}, std={self.std}, bias={self.bias}' - ) - return info - - -@INITIALIZERS.register_module(name='Uniform') -class UniformInit(BaseInit): - r"""Initialize module parameters with values drawn from the uniform - distribution :math:`\mathcal{U}(a, b)`. - - Args: - a (int | float): the lower bound of the uniform distribution. - Defaults to 0. - b (int | float): the upper bound of the uniform distribution. - Defaults to 1. - bias (int | float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - """ - - def __init__(self, a=0, b=1, **kwargs): - super().__init__(**kwargs) - self.a = a - self.b = b - - def __call__(self, module): - def init(m): - if self.wholemodule: - uniform_init(m, self.a, self.b, self.bias) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - uniform_init(m, self.a, self.b, self.bias) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = f'{self.__class__.__name__}: a={self.a},' f' b={self.b}, bias={self.bias}' - return info - - -@INITIALIZERS.register_module(name='Kaiming') -class KaimingInit(BaseInit): - r"""Initialize module parameters with the values according to the method - described in `Delving deep into rectifiers: Surpassing human-level - performance on ImageNet classification - He, K. et al. (2015). - `_ - - Args: - a (int | float): the negative slope of the rectifier used after this - layer (only used with ``'leaky_relu'``). Defaults to 0. - mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing - ``'fan_in'`` preserves the magnitude of the variance of the weights - in the forward pass. Choosing ``'fan_out'`` preserves the - magnitudes in the backwards pass. Defaults to ``'fan_out'``. - nonlinearity (str): the non-linear function (`nn.functional` name), - recommended to use only with ``'relu'`` or ``'leaky_relu'`` . - Defaults to 'relu'. - bias (int | float): the value to fill the bias. Defaults to 0. - bias_prob (float, optional): the probability for bias initialization. - Defaults to None. - distribution (str): distribution either be ``'normal'`` or - ``'uniform'``. Defaults to ``'normal'``. - layer (str | list[str], optional): the layer will be initialized. - Defaults to None. - """ - - def __init__(self, a=0, mode='fan_out', nonlinearity='relu', distribution='normal', **kwargs): - super().__init__(**kwargs) - self.a = a - self.mode = mode - self.nonlinearity = nonlinearity - self.distribution = distribution - - def __call__(self, module): - def init(m): - if self.wholemodule: - kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) - else: - layername = m.__class__.__name__ - basesname = _get_bases_name(m) - if len(set(self.layer) & set([layername] + basesname)): - kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) - - module.apply(init) - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = ( - f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' - f'nonlinearity={self.nonlinearity}, ' - f'distribution ={self.distribution}, bias={self.bias}' - ) - return info - - -@INITIALIZERS.register_module(name='Caffe2Xavier') -class Caffe2XavierInit(KaimingInit): - # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch - # Acknowledgment to FAIR's internal code - def __init__(self, **kwargs): - super().__init__(a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform', **kwargs) - - def __call__(self, module): - super().__call__(module) - - -@INITIALIZERS.register_module(name='Pretrained') -class PretrainedInit(object): - """Initialize module by loading a pretrained model. - - Args: - checkpoint (str): the checkpoint file of the pretrained model should - be load. - prefix (str, optional): the prefix of a sub-module in the pretrained - model. it is for loading a part of the pretrained model to - initialize. For example, if we would like to only load the - backbone of a detector model, we can set ``prefix='backbone.'``. - Defaults to None. - map_location (str): map tensors into proper locations. - """ - - def __init__(self, checkpoint, prefix=None, map_location=None): - self.checkpoint = checkpoint - self.prefix = prefix - self.map_location = map_location - - def __call__(self, module): - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import ( - _load_checkpoint_with_prefix, - load_checkpoint, - load_state_dict, - ) - - logger = get_logger('mmcv') - if self.prefix is None: - print_log(f'load model from: {self.checkpoint}', logger=logger) - load_checkpoint(module, self.checkpoint, map_location=self.map_location, strict=False, logger=logger) - else: - print_log(f'load {self.prefix} in model from: {self.checkpoint}', logger=logger) - state_dict = _load_checkpoint_with_prefix(self.prefix, self.checkpoint, map_location=self.map_location) - load_state_dict(module, state_dict, strict=False, logger=logger) - - if hasattr(module, '_params_init_info'): - update_init_info(module, init_info=self._get_init_info()) - - def _get_init_info(self): - info = f'{self.__class__.__name__}: load from {self.checkpoint}' - return info - - -def _initialize(module, cfg, wholemodule=False): - func = build_from_cfg(cfg, INITIALIZERS) - # wholemodule flag is for override mode, there is no layer key in override - # and initializer will give init values for the whole module with the name - # in override. - func.wholemodule = wholemodule - func(module) - - -def _initialize_override(module, override, cfg): - if not isinstance(override, (dict, list)): - raise TypeError( - f'override must be a dict or a list of dict, \ - but got {type(override)}' - ) - - override = [override] if isinstance(override, dict) else override - - for override_ in override: - - cp_override = copy.deepcopy(override_) - name = cp_override.pop('name', None) - if name is None: - raise ValueError('`override` must contain the key "name",' f'but got {cp_override}') - # if override only has name key, it means use args in init_cfg - if not cp_override: - cp_override.update(cfg) - # if override has name key and other args except type key, it will - # raise error - elif 'type' not in cp_override.keys(): - raise ValueError(f'`override` need "type" key, but got {cp_override}') - - if hasattr(module, name): - _initialize(getattr(module, name), cp_override, wholemodule=True) - else: - raise RuntimeError(f'module did not have attribute {name}, ' f'but init_cfg is {cp_override}.') - - -def initialize(module, init_cfg): - """Initialize a module. - - Args: - module (``torch.nn.Module``): the module will be initialized. - init_cfg (dict | list[dict]): initialization configuration dict to - define initializer. OpenMMLab has implemented 6 initializers - including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, - ``Kaiming``, and ``Pretrained``. - Example: - >>> module = nn.Linear(2, 3, bias=True) - >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) - >>> initialize(module, init_cfg) - - >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) - >>> # define key ``'layer'`` for initializing layer with different - >>> # configuration - >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), - dict(type='Constant', layer='Linear', val=2)] - >>> initialize(module, init_cfg) - - >>> # define key``'override'`` to initialize some specific part in - >>> # module - >>> class FooNet(nn.Module): - >>> def __init__(self): - >>> super().__init__() - >>> self.feat = nn.Conv2d(3, 16, 3) - >>> self.reg = nn.Conv2d(16, 10, 3) - >>> self.cls = nn.Conv2d(16, 5, 3) - >>> model = FooNet() - >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', - >>> override=dict(type='Constant', name='reg', val=3, bias=4)) - >>> initialize(model, init_cfg) - - >>> model = ResNet(depth=50) - >>> # Initialize weights with the pretrained model. - >>> init_cfg = dict(type='Pretrained', - checkpoint='torchvision://resnet50') - >>> initialize(model, init_cfg) - - >>> # Initialize weights of a sub-module with the specific part of - >>> # a pretrained model by using "prefix". - >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ - >>> 'retinanet_r50_fpn_1x_coco/'\ - >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' - >>> init_cfg = dict(type='Pretrained', - checkpoint=url, prefix='backbone.') - """ - if not isinstance(init_cfg, (dict, list)): - raise TypeError( - f'init_cfg must be a dict or a list of dict, \ - but got {type(init_cfg)}' - ) - - if isinstance(init_cfg, dict): - init_cfg = [init_cfg] - - for cfg in init_cfg: - # should deeply copy the original config because cfg may be used by - # other modules, e.g., one init_cfg shared by multiple bottleneck - # blocks, the expected cfg will be changed after pop and will change - # the initialization behavior of other modules - cp_cfg = copy.deepcopy(cfg) - override = cp_cfg.pop('override', None) - _initialize(module, cp_cfg) - - if override is not None: - cp_cfg.pop('layer', None) - _initialize_override(module, override, cp_cfg) - else: - # All attributes in module have same initialization. - pass - - -def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, b: float) -> Tensor: - # Method based on - # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf - # Modified from - # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py - def norm_cdf(x): - # Computes standard normal cumulative distribution function - return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 - - if (mean < a - 2 * std) or (mean > b + 2 * std): - warnings.warn( - 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' - 'The distribution of values may be incorrect.', - stacklevel=2, - ) - - with torch.no_grad(): - # Values are generated by using a truncated uniform distribution and - # then using the inverse CDF for the normal distribution. - # Get upper and lower cdf values - lower = norm_cdf((a - mean) / std) - upper = norm_cdf((b - mean) / std) - - # Uniformly fill tensor with values from [lower, upper], then translate - # to [2lower-1, 2upper-1]. - tensor.uniform_(2 * lower - 1, 2 * upper - 1) - - # Use inverse cdf transform for normal distribution to get truncated - # standard normal - tensor.erfinv_() - - # Transform to proper mean, std - tensor.mul_(std * math.sqrt(2.0)) - tensor.add_(mean) - - # Clamp to ensure it's in the proper range - tensor.clamp_(min=a, max=b) - return tensor - - -def trunc_normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0) -> Tensor: - r"""Fills the input Tensor with values drawn from a truncated - normal distribution. The values are effectively drawn from the - normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` - with values outside :math:`[a, b]` redrawn until they are within - the bounds. The method used for generating the random values works - best when :math:`a \leq \text{mean} \leq b`. - - Modified from - https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py - - Args: - tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. - mean (float): the mean of the normal distribution. - std (float): the standard deviation of the normal distribution. - a (float): the minimum cutoff value. - b (float): the maximum cutoff value. - """ - return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py deleted file mode 100644 index c430ff61db6e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging - -import torch.nn as nn - -from .utils import constant_init, kaiming_init, normal_init - - -def conv3x3(in_planes, out_planes, dilation=1): - """3x3 convolution with padding.""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, padding=dilation, dilation=dilation) - - -def make_vgg_layer(inplanes, planes, num_blocks, dilation=1, with_bn=False, ceil_mode=False): - layers = [] - for _ in range(num_blocks): - layers.append(conv3x3(inplanes, planes, dilation)) - if with_bn: - layers.append(nn.BatchNorm2d(planes)) - layers.append(nn.ReLU(inplace=True)) - inplanes = planes - layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) - - return layers - - -class VGG(nn.Module): - """VGG backbone. - - Args: - depth (int): Depth of vgg, from {11, 13, 16, 19}. - with_bn (bool): Use BatchNorm or not. - num_classes (int): number of classes for classification. - num_stages (int): VGG stages, normally 5. - dilations (Sequence[int]): Dilation of each stage. - out_indices (Sequence[int]): Output from which stages. - frozen_stages (int): Stages to be frozen (all param fixed). -1 means - not freezing any parameters. - bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze - running stats (mean and var). - bn_frozen (bool): Whether to freeze weight and bias of BN layers. - """ - - arch_settings = {11: (1, 1, 2, 2, 2), 13: (2, 2, 2, 2, 2), 16: (2, 2, 3, 3, 3), 19: (2, 2, 4, 4, 4)} - - def __init__( - self, - depth, - with_bn=False, - num_classes=-1, - num_stages=5, - dilations=(1, 1, 1, 1, 1), - out_indices=(0, 1, 2, 3, 4), - frozen_stages=-1, - bn_eval=True, - bn_frozen=False, - ceil_mode=False, - with_last_pool=True, - ): - super(VGG, self).__init__() - if depth not in self.arch_settings: - raise KeyError(f'invalid depth {depth} for vgg') - assert num_stages >= 1 and num_stages <= 5 - stage_blocks = self.arch_settings[depth] - self.stage_blocks = stage_blocks[:num_stages] - assert len(dilations) == num_stages - assert max(out_indices) <= num_stages - - self.num_classes = num_classes - self.out_indices = out_indices - self.frozen_stages = frozen_stages - self.bn_eval = bn_eval - self.bn_frozen = bn_frozen - - self.inplanes = 3 - start_idx = 0 - vgg_layers = [] - self.range_sub_modules = [] - for i, num_blocks in enumerate(self.stage_blocks): - num_modules = num_blocks * (2 + with_bn) + 1 - end_idx = start_idx + num_modules - dilation = dilations[i] - planes = 64 * 2 ** i if i < 4 else 512 - vgg_layer = make_vgg_layer( - self.inplanes, planes, num_blocks, dilation=dilation, with_bn=with_bn, ceil_mode=ceil_mode - ) - vgg_layers.extend(vgg_layer) - self.inplanes = planes - self.range_sub_modules.append([start_idx, end_idx]) - start_idx = end_idx - if not with_last_pool: - vgg_layers.pop(-1) - self.range_sub_modules[-1][1] -= 1 - self.module_name = 'features' - self.add_module(self.module_name, nn.Sequential(*vgg_layers)) - - if self.num_classes > 0: - self.classifier = nn.Sequential( - nn.Linear(512 * 7 * 7, 4096), - nn.ReLU(True), - nn.Dropout(), - nn.Linear(4096, 4096), - nn.ReLU(True), - nn.Dropout(), - nn.Linear(4096, num_classes), - ) - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = logging.getLogger() - from ..runner import load_checkpoint - - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, nn.BatchNorm2d): - constant_init(m, 1) - elif isinstance(m, nn.Linear): - normal_init(m, std=0.01) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - outs = [] - vgg_layers = getattr(self, self.module_name) - for i in range(len(self.stage_blocks)): - for j in range(*self.range_sub_modules[i]): - vgg_layer = vgg_layers[j] - x = vgg_layer(x) - if i in self.out_indices: - outs.append(x) - if self.num_classes > 0: - x = x.view(x.size(0), -1) - x = self.classifier(x) - outs.append(x) - if len(outs) == 1: - return outs[0] - else: - return tuple(outs) - - def train(self, mode=True): - super(VGG, self).train(mode) - if self.bn_eval: - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eval() - if self.bn_frozen: - for params in m.parameters(): - params.requires_grad = False - vgg_layers = getattr(self, self.module_name) - if mode and self.frozen_stages >= 0: - for i in range(self.frozen_stages): - for j in range(*self.range_sub_modules[i]): - mod = vgg_layers[j] - mod.eval() - for param in mod.parameters(): - param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py deleted file mode 100644 index 91307c41c0a5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .test import collect_results_cpu, collect_results_gpu, multi_gpu_test, single_gpu_test - -__all__ = ['collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', 'single_gpu_test'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py deleted file mode 100644 index 93f07f70ea4a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import pickle -import shutil -import tempfile -import time - -import torch -import torch.distributed as dist - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info - - -def single_gpu_test(model, data_loader): - """Test model with a single gpu. - - This method tests model with a single gpu and displays test progress bar. - - Args: - model (nn.Module): Model to be tested. - data_loader (nn.Dataloader): Pytorch data loader. - - Returns: - list: The prediction results. - """ - model.eval() - results = [] - dataset = data_loader.dataset - prog_bar = mmcv.ProgressBar(len(dataset)) - for data in data_loader: - with torch.no_grad(): - result = model(return_loss=False, **data) - results.extend(result) - - # Assume result has the same length of batch_size - # refer to https://github.com/open-mmlab/mmcv/issues/985 - batch_size = len(result) - for _ in range(batch_size): - prog_bar.update() - return results - - -def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): - """Test model with multiple gpus. - - This method tests model with multiple gpus and collects the results - under two different modes: gpu and cpu modes. By setting - ``gpu_collect=True``, it encodes results to gpu tensors and use gpu - communication for results collection. On cpu mode it saves the results on - different gpus to ``tmpdir`` and collects them by the rank 0 worker. - - Args: - model (nn.Module): Model to be tested. - data_loader (nn.Dataloader): Pytorch data loader. - tmpdir (str): Path of directory to save the temporary results from - different gpus under cpu mode. - gpu_collect (bool): Option to use either gpu or cpu to collect results. - - Returns: - list: The prediction results. - """ - model.eval() - results = [] - dataset = data_loader.dataset - rank, world_size = get_dist_info() - if rank == 0: - prog_bar = mmcv.ProgressBar(len(dataset)) - time.sleep(2) # This line can prevent deadlock problem in some cases. - for i, data in enumerate(data_loader): - with torch.no_grad(): - result = model(return_loss=False, **data) - results.extend(result) - - if rank == 0: - batch_size = len(result) - batch_size_all = batch_size * world_size - if batch_size_all + prog_bar.completed > len(dataset): - batch_size_all = len(dataset) - prog_bar.completed - for _ in range(batch_size_all): - prog_bar.update() - - # collect results from all ranks - if gpu_collect: - results = collect_results_gpu(results, len(dataset)) - else: - results = collect_results_cpu(results, len(dataset), tmpdir) - return results - - -def collect_results_cpu(result_part, size, tmpdir=None): - """Collect results under cpu mode. - - On cpu mode, this function will save the results on different gpus to - ``tmpdir`` and collect them by the rank 0 worker. - - Args: - result_part (list): Result list containing result parts - to be collected. - size (int): Size of the results, commonly equal to length of - the results. - tmpdir (str | None): temporal directory for collected results to - store. If set to None, it will create a random temporal directory - for it. - - Returns: - list: The collected results. - """ - rank, world_size = get_dist_info() - # create a tmp dir if it is not specified - if tmpdir is None: - MAX_LEN = 512 - # 32 is whitespace - dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') - if rank == 0: - mmcv.mkdir_or_exist('.dist_test') - tmpdir = tempfile.mkdtemp(dir='.dist_test') - tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') - dir_tensor[: len(tmpdir)] = tmpdir - dist.broadcast(dir_tensor, 0) - tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() - else: - mmcv.mkdir_or_exist(tmpdir) - # dump the part result to the dir - mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) - dist.barrier() - # collect all parts - if rank != 0: - return None - else: - # load results of all parts from tmp dir - part_list = [] - for i in range(world_size): - part_file = osp.join(tmpdir, f'part_{i}.pkl') - part_result = mmcv.load(part_file) - # When data is severely insufficient, an empty part_result - # on a certain gpu could makes the overall outputs empty. - if part_result: - part_list.append(part_result) - # sort the results - ordered_results = [] - for res in zip(*part_list): - ordered_results.extend(list(res)) - # the dataloader may pad some samples - ordered_results = ordered_results[:size] - # remove tmp dir - shutil.rmtree(tmpdir) - return ordered_results - - -def collect_results_gpu(result_part, size): - """Collect results under gpu mode. - - On gpu mode, this function will encode results to gpu tensors and use gpu - communication for results collection. - - Args: - result_part (list): Result list containing result parts - to be collected. - size (int): Size of the results, commonly equal to length of - the results. - - Returns: - list: The collected results. - """ - rank, world_size = get_dist_info() - # dump result part to tensor with pickle - part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') - # gather all result part tensor shape - shape_tensor = torch.tensor(part_tensor.shape, device='cuda') - shape_list = [shape_tensor.clone() for _ in range(world_size)] - dist.all_gather(shape_list, shape_tensor) - # padding result part tensor to max length - shape_max = torch.tensor(shape_list).max() - part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') - part_send[: shape_tensor[0]] = part_tensor - part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] - # gather all result part - dist.all_gather(part_recv_list, part_send) - - if rank == 0: - part_list = [] - for recv, shape in zip(part_recv_list, shape_list): - part_result = pickle.loads(recv[: shape[0]].cpu().numpy().tobytes()) - # When data is severely insufficient, an empty part_result - # on a certain gpu could makes the overall outputs empty. - if part_result: - part_list.append(part_result) - # sort the results - ordered_results = [] - for res in zip(*part_list): - ordered_results.extend(list(res)) - # the dataloader may pad some samples - ordered_results = ordered_results[:size] - return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py deleted file mode 100644 index 48c6ac0c6999..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .file_client import BaseStorageBackend, FileClient -from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler -from .io import dump, load, register_handler -from .parse import dict_from_file, list_from_file - -__all__ = [ - 'BaseStorageBackend', - 'FileClient', - 'load', - 'dump', - 'register_handler', - 'BaseFileHandler', - 'JsonHandler', - 'PickleHandler', - 'YamlHandler', - 'list_from_file', - 'dict_from_file', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py deleted file mode 100644 index fe088721ca25..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py +++ /dev/null @@ -1,1127 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import inspect -import os -import os.path as osp -import re -import tempfile -import warnings -from abc import ABCMeta, abstractmethod -from contextlib import contextmanager -from pathlib import Path -from typing import Iterable, Iterator, Optional, Tuple, Union -from urllib.request import urlopen - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.misc import has_method -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.path import is_filepath - - -class BaseStorageBackend(metaclass=ABCMeta): - """Abstract class of storage backends. - - All backends need to implement two apis: ``get()`` and ``get_text()``. - ``get()`` reads the file as a byte stream and ``get_text()`` reads the file - as texts. - """ - - # a flag to indicate whether the backend can create a symlink for a file - _allow_symlink = False - - @property - def name(self): - return self.__class__.__name__ - - @property - def allow_symlink(self): - return self._allow_symlink - - @abstractmethod - def get(self, filepath): - pass - - @abstractmethod - def get_text(self, filepath): - pass - - -class CephBackend(BaseStorageBackend): - """Ceph storage backend (for internal use). - - Args: - path_mapping (dict|None): path mapping dict from local path to Petrel - path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` - will be replaced by ``dst``. Default: None. - - .. warning:: - :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, - please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. - """ - - def __init__(self, path_mapping=None): - try: - import ceph - except ImportError: - raise ImportError('Please install ceph to enable CephBackend.') - - warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') - self._client = ceph.S3Client() - assert isinstance(path_mapping, dict) or path_mapping is None - self.path_mapping = path_mapping - - def get(self, filepath): - filepath = str(filepath) - if self.path_mapping is not None: - for k, v in self.path_mapping.items(): - filepath = filepath.replace(k, v) - value = self._client.Get(filepath) - value_buf = memoryview(value) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - -class PetrelBackend(BaseStorageBackend): - """Petrel storage backend (for internal use). - - PetrelBackend supports reading and writing data to multiple clusters. - If the file path contains the cluster name, PetrelBackend will read data - from specified cluster or write data to it. Otherwise, PetrelBackend will - access the default cluster. - - Args: - path_mapping (dict, optional): Path mapping dict from local path to - Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in - ``filepath`` will be replaced by ``dst``. Default: None. - enable_mc (bool, optional): Whether to enable memcached support. - Default: True. - - Examples: - >>> filepath1 = 's3://path/of/file' - >>> filepath2 = 'cluster-name:s3://path/of/file' - >>> client = PetrelBackend() - >>> client.get(filepath1) # get data from default cluster - >>> client.get(filepath2) # get data from 'cluster-name' cluster - """ - - def __init__(self, path_mapping: Optional[dict] = None, enable_mc: bool = True): - try: - from petrel_client import client - except ImportError: - raise ImportError('Please install petrel_client to enable ' 'PetrelBackend.') - - self._client = client.Client(enable_mc=enable_mc) - assert isinstance(path_mapping, dict) or path_mapping is None - self.path_mapping = path_mapping - - def _map_path(self, filepath: Union[str, Path]) -> str: - """Map ``filepath`` to a string path whose prefix will be replaced by - :attr:`self.path_mapping`. - - Args: - filepath (str): Path to be mapped. - """ - filepath = str(filepath) - if self.path_mapping is not None: - for k, v in self.path_mapping.items(): - filepath = filepath.replace(k, v) - return filepath - - def _format_path(self, filepath: str) -> str: - """Convert a ``filepath`` to standard format of petrel oss. - - If the ``filepath`` is concatenated by ``os.path.join``, in a Windows - environment, the ``filepath`` will be the format of - 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the - above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. - - Args: - filepath (str): Path to be formatted. - """ - return re.sub(r'\\+', '/', filepath) - - def get(self, filepath: Union[str, Path]) -> memoryview: - """Read data from a given ``filepath`` with 'rb' mode. - - Args: - filepath (str or Path): Path to read data. - - Returns: - memoryview: A memory view of expected bytes object to avoid - copying. The memoryview object can be converted to bytes by - ``value_buf.tobytes()``. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - value = self._client.Get(filepath) - value_buf = memoryview(value) - return value_buf - - def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - return str(self.get(filepath), encoding=encoding) - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Save data to a given ``filepath``. - - Args: - obj (bytes): Data to be saved. - filepath (str or Path): Path to write data. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - self._client.put(filepath, obj) - - def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: - """Save data to a given ``filepath``. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str): The encoding format used to encode the ``obj``. - Default: 'utf-8'. - """ - self.put(bytes(obj, encoding=encoding), filepath) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str or Path): Path to be removed. - """ - if not has_method(self._client, 'delete'): - raise NotImplementedError( - ( - 'Current version of Petrel Python SDK has not supported ' - 'the `delete` method, please use a higher version or dev' - ' branch instead.' - ) - ) - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - self._client.delete(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - if not (has_method(self._client, 'contains') and has_method(self._client, 'isdir')): - raise NotImplementedError( - ( - 'Current version of Petrel Python SDK has not supported ' - 'the `contains` and `isdir` methods, please use a higher' - 'version or dev branch instead.' - ) - ) - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.contains(filepath) or self._client.isdir(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - if not has_method(self._client, 'isdir'): - raise NotImplementedError( - ( - 'Current version of Petrel Python SDK has not supported ' - 'the `isdir` method, please use a higher version or dev' - ' branch instead.' - ) - ) - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - if not has_method(self._client, 'contains'): - raise NotImplementedError( - ( - 'Current version of Petrel Python SDK has not supported ' - 'the `contains` method, please use a higher version or ' - 'dev branch instead.' - ) - ) - - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - return self._client.contains(filepath) - - def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result after concatenation. - """ - filepath = self._format_path(self._map_path(filepath)) - if filepath.endswith('/'): - filepath = filepath[:-1] - formatted_paths = [filepath] - for path in filepaths: - formatted_paths.append(self._format_path(self._map_path(path))) - return '/'.join(formatted_paths) - - @contextmanager - def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: - """Download a file from ``filepath`` and return a temporary path. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Args: - filepath (str | Path): Download a file from ``filepath``. - - Examples: - >>> client = PetrelBackend() - >>> # After existing from the ``with`` clause, - >>> # the path will be removed - >>> with client.get_local_path('s3://path/of/your/file') as path: - ... # do something here - - Yields: - Iterable[str]: Only yield one temporary path. - """ - filepath = self._map_path(filepath) - filepath = self._format_path(filepath) - assert self.isfile(filepath) - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - def list_dir_or_file( - self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False, - ) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - Petrel has no concept of directories but it simulates the directory - hierarchy in the filesystem through public prefixes. In addition, - if the returned path ends with '/', it means the path is a public - prefix which is a logical directory. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - In addition, the returned path of directory will not contains the - suffix '/' which is consistent with other backends. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - if not has_method(self._client, 'list'): - raise NotImplementedError( - ( - 'Current version of Petrel Python SDK has not supported ' - 'the `list` method, please use a higher version or dev' - ' branch instead.' - ) - ) - - dir_path = self._map_path(dir_path) - dir_path = self._format_path(dir_path) - if list_dir and suffix is not None: - raise TypeError('`list_dir` should be False when `suffix` is not None') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('`suffix` must be a string or tuple of strings') - - # Petrel's simulated directory hierarchy assumes that directory paths - # should end with `/` - if not dir_path.endswith('/'): - dir_path += '/' - - root = dir_path - - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): - for path in self._client.list(dir_path): - # the `self.isdir` is not used here to determine whether path - # is a directory, because `self.isdir` relies on - # `self._client.list` - if path.endswith('/'): # a directory path - next_dir_path = self.join_path(dir_path, path) - if list_dir: - # get the relative path and exclude the last - # character '/' - rel_dir = next_dir_path[len(root) : -1] - yield rel_dir - if recursive: - yield from _list_dir_or_file(next_dir_path, list_dir, list_file, suffix, recursive) - else: # a file path - absolute_path = self.join_path(dir_path, path) - rel_path = absolute_path[len(root) :] - if (suffix is None or rel_path.endswith(suffix)) and list_file: - yield rel_path - - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) - - -class MemcachedBackend(BaseStorageBackend): - """Memcached storage backend. - - Attributes: - server_list_cfg (str): Config file for memcached server list. - client_cfg (str): Config file for memcached client. - sys_path (str | None): Additional path to be appended to `sys.path`. - Default: None. - """ - - def __init__(self, server_list_cfg, client_cfg, sys_path=None): - if sys_path is not None: - import sys - - sys.path.append(sys_path) - try: - import mc - except ImportError: - raise ImportError('Please install memcached to enable MemcachedBackend.') - - self.server_list_cfg = server_list_cfg - self.client_cfg = client_cfg - self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, self.client_cfg) - # mc.pyvector servers as a point which points to a memory cache - self._mc_buffer = mc.pyvector() - - def get(self, filepath): - filepath = str(filepath) - import mc - - self._client.Get(filepath, self._mc_buffer) - value_buf = mc.ConvertBuffer(self._mc_buffer) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - -class LmdbBackend(BaseStorageBackend): - """Lmdb storage backend. - - Args: - db_path (str): Lmdb database path. - readonly (bool, optional): Lmdb environment parameter. If True, - disallow any write operations. Default: True. - lock (bool, optional): Lmdb environment parameter. If False, when - concurrent access occurs, do not lock the database. Default: False. - readahead (bool, optional): Lmdb environment parameter. If False, - disable the OS filesystem readahead mechanism, which may improve - random read performance when a database is larger than RAM. - Default: False. - - Attributes: - db_path (str): Lmdb database path. - """ - - def __init__(self, db_path, readonly=True, lock=False, readahead=False, **kwargs): - try: - import lmdb - except ImportError: - raise ImportError('Please install lmdb to enable LmdbBackend.') - - self.db_path = str(db_path) - self._client = lmdb.open(self.db_path, readonly=readonly, lock=lock, readahead=readahead, **kwargs) - - def get(self, filepath): - """Get values according to the filepath. - - Args: - filepath (str | obj:`Path`): Here, filepath is the lmdb key. - """ - filepath = str(filepath) - with self._client.begin(write=False) as txn: - value_buf = txn.get(filepath.encode('ascii')) - return value_buf - - def get_text(self, filepath, encoding=None): - raise NotImplementedError - - -class HardDiskBackend(BaseStorageBackend): - """Raw hard disks storage backend.""" - - _allow_symlink = True - - def get(self, filepath: Union[str, Path]) -> bytes: - """Read data from a given ``filepath`` with 'rb' mode. - - Args: - filepath (str or Path): Path to read data. - - Returns: - bytes: Expected bytes object. - """ - with open(filepath, 'rb') as f: - value_buf = f.read() - return value_buf - - def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - with open(filepath, 'r', encoding=encoding) as f: - value_buf = f.read() - return value_buf - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'wb' mode. - - Note: - ``put`` will create a directory if the directory of ``filepath`` - does not exist. - - Args: - obj (bytes): Data to be written. - filepath (str or Path): Path to write data. - """ - mmcv.mkdir_or_exist(osp.dirname(filepath)) - with open(filepath, 'wb') as f: - f.write(obj) - - def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: - """Write data to a given ``filepath`` with 'w' mode. - - Note: - ``put_text`` will create a directory if the directory of - ``filepath`` does not exist. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - """ - mmcv.mkdir_or_exist(osp.dirname(filepath)) - with open(filepath, 'w', encoding=encoding) as f: - f.write(obj) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str or Path): Path to be removed. - """ - os.remove(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - return osp.exists(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - return osp.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - return osp.isfile(filepath) - - def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Join one or more filepath components intelligently. The return value - is the concatenation of filepath and any members of *filepaths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result of concatenation. - """ - return osp.join(filepath, *filepaths) - - @contextmanager - def get_local_path(self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: - """Only for unified API and do nothing.""" - yield filepath - - def list_dir_or_file( - self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False, - ) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - if list_dir and suffix is not None: - raise TypeError('`suffix` should be None when `list_dir` is True') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('`suffix` must be a string or tuple of strings') - - root = dir_path - - def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): - for entry in os.scandir(dir_path): - if not entry.name.startswith('.') and entry.is_file(): - rel_path = osp.relpath(entry.path, root) - if (suffix is None or rel_path.endswith(suffix)) and list_file: - yield rel_path - elif osp.isdir(entry.path): - if list_dir: - rel_dir = osp.relpath(entry.path, root) - yield rel_dir - if recursive: - yield from _list_dir_or_file(entry.path, list_dir, list_file, suffix, recursive) - - return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) - - -class HTTPBackend(BaseStorageBackend): - """HTTP and HTTPS storage bachend.""" - - def get(self, filepath): - value_buf = urlopen(filepath).read() - return value_buf - - def get_text(self, filepath, encoding='utf-8'): - value_buf = urlopen(filepath).read() - return value_buf.decode(encoding) - - @contextmanager - def get_local_path(self, filepath: str) -> Iterable[str]: - """Download a file from ``filepath``. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Args: - filepath (str): Download a file from ``filepath``. - - Examples: - >>> client = HTTPBackend() - >>> # After existing from the ``with`` clause, - >>> # the path will be removed - >>> with client.get_local_path('http://path/of/your/file') as path: - ... # do something here - """ - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - -class FileClient: - """A general file client to access files in different backends. - - The client loads a file or text in a specified backend from its path - and returns it as a binary or text file. There are two ways to choose a - backend, the name of backend and the prefix of path. Although both of them - can be used to choose a storage backend, ``backend`` has a higher priority - that is if they are all set, the storage backend will be chosen by the - backend argument. If they are all `None`, the disk backend will be chosen. - Note that It can also register other backend accessor with a given name, - prefixes, and backend class. In addition, We use the singleton pattern to - avoid repeated object creation. If the arguments are the same, the same - object will be returned. - - Args: - backend (str, optional): The storage backend type. Options are "disk", - "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. - prefix (str, optional): The prefix of the registered storage backend. - Options are "s3", "http", "https". Default: None. - - Examples: - >>> # only set backend - >>> file_client = FileClient(backend='petrel') - >>> # only set prefix - >>> file_client = FileClient(prefix='s3') - >>> # set both backend and prefix but use backend to choose client - >>> file_client = FileClient(backend='petrel', prefix='s3') - >>> # if the arguments are the same, the same object is returned - >>> file_client1 = FileClient(backend='petrel') - >>> file_client1 is file_client - True - - Attributes: - client (:obj:`BaseStorageBackend`): The backend object. - """ - - _backends = { - 'disk': HardDiskBackend, - 'ceph': CephBackend, - 'memcached': MemcachedBackend, - 'lmdb': LmdbBackend, - 'petrel': PetrelBackend, - 'http': HTTPBackend, - } - # This collection is used to record the overridden backends, and when a - # backend appears in the collection, the singleton pattern is disabled for - # that backend, because if the singleton pattern is used, then the object - # returned will be the backend before overwriting - _overridden_backends = set() - _prefix_to_backends = { - 's3': PetrelBackend, - 'http': HTTPBackend, - 'https': HTTPBackend, - } - _overridden_prefixes = set() - - _instances = {} - - def __new__(cls, backend=None, prefix=None, **kwargs): - if backend is None and prefix is None: - backend = 'disk' - if backend is not None and backend not in cls._backends: - raise ValueError( - f'Backend {backend} is not supported. Currently supported ones' f' are {list(cls._backends.keys())}' - ) - if prefix is not None and prefix not in cls._prefix_to_backends: - raise ValueError( - f'prefix {prefix} is not supported. Currently supported ones ' - f'are {list(cls._prefix_to_backends.keys())}' - ) - - # concatenate the arguments to a unique key for determining whether - # objects with the same arguments were created - arg_key = f'{backend}:{prefix}' - for key, value in kwargs.items(): - arg_key += f':{key}:{value}' - - # if a backend was overridden, it will create a new object - if ( - arg_key in cls._instances - and backend not in cls._overridden_backends - and prefix not in cls._overridden_prefixes - ): - _instance = cls._instances[arg_key] - else: - # create a new object and put it to _instance - _instance = super().__new__(cls) - if backend is not None: - _instance.client = cls._backends[backend](**kwargs) - else: - _instance.client = cls._prefix_to_backends[prefix](**kwargs) - - cls._instances[arg_key] = _instance - - return _instance - - @property - def name(self): - return self.client.name - - @property - def allow_symlink(self): - return self.client.allow_symlink - - @staticmethod - def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: - """Parse the prefix of a uri. - - Args: - uri (str | Path): Uri to be parsed that contains the file prefix. - - Examples: - >>> FileClient.parse_uri_prefix('s3://path/of/your/file') - 's3' - - Returns: - str | None: Return the prefix of uri if the uri contains '://' - else ``None``. - """ - assert is_filepath(uri) - uri = str(uri) - if '://' not in uri: - return None - else: - prefix, _ = uri.split('://') - # In the case of PetrelBackend, the prefix may contains the cluster - # name like clusterName:s3 - if ':' in prefix: - _, prefix = prefix.split(':') - return prefix - - @classmethod - def infer_client( - cls, file_client_args: Optional[dict] = None, uri: Optional[Union[str, Path]] = None - ) -> 'FileClient': - """Infer a suitable file client based on the URI and arguments. - - Args: - file_client_args (dict, optional): Arguments to instantiate a - FileClient. Default: None. - uri (str | Path, optional): Uri to be parsed that contains the file - prefix. Default: None. - - Examples: - >>> uri = 's3://path/of/your/file' - >>> file_client = FileClient.infer_client(uri=uri) - >>> file_client_args = {'backend': 'petrel'} - >>> file_client = FileClient.infer_client(file_client_args) - - Returns: - FileClient: Instantiated FileClient object. - """ - assert file_client_args is not None or uri is not None - if file_client_args is None: - file_prefix = cls.parse_uri_prefix(uri) # type: ignore - return cls(prefix=file_prefix) - else: - return cls(**file_client_args) - - @classmethod - def _register_backend(cls, name, backend, force=False, prefixes=None): - if not isinstance(name, str): - raise TypeError('the backend name should be a string, ' f'but got {type(name)}') - if not inspect.isclass(backend): - raise TypeError(f'backend should be a class but got {type(backend)}') - if not issubclass(backend, BaseStorageBackend): - raise TypeError(f'backend {backend} is not a subclass of BaseStorageBackend') - if not force and name in cls._backends: - raise KeyError( - f'{name} is already registered as a storage backend, ' 'add "force=True" if you want to override it' - ) - - if name in cls._backends and force: - cls._overridden_backends.add(name) - cls._backends[name] = backend - - if prefixes is not None: - if isinstance(prefixes, str): - prefixes = [prefixes] - else: - assert isinstance(prefixes, (list, tuple)) - for prefix in prefixes: - if prefix not in cls._prefix_to_backends: - cls._prefix_to_backends[prefix] = backend - elif (prefix in cls._prefix_to_backends) and force: - cls._overridden_prefixes.add(prefix) - cls._prefix_to_backends[prefix] = backend - else: - raise KeyError( - f'{prefix} is already registered as a storage backend,' - ' add "force=True" if you want to override it' - ) - - @classmethod - def register_backend(cls, name, backend=None, force=False, prefixes=None): - """Register a backend to FileClient. - - This method can be used as a normal class method or a decorator. - - .. code-block:: python - - class NewBackend(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath): - return filepath - - FileClient.register_backend('new', NewBackend) - - or - - .. code-block:: python - - @FileClient.register_backend('new') - class NewBackend(BaseStorageBackend): - - def get(self, filepath): - return filepath - - def get_text(self, filepath): - return filepath - - Args: - name (str): The name of the registered backend. - backend (class, optional): The backend class to be registered, - which must be a subclass of :class:`BaseStorageBackend`. - When this method is used as a decorator, backend is None. - Defaults to None. - force (bool, optional): Whether to override the backend if the name - has already been registered. Defaults to False. - prefixes (str or list[str] or tuple[str], optional): The prefixes - of the registered storage backend. Default: None. - `New in version 1.3.15.` - """ - if backend is not None: - cls._register_backend(name, backend, force=force, prefixes=prefixes) - return - - def _register(backend_cls): - cls._register_backend(name, backend_cls, force=force, prefixes=prefixes) - return backend_cls - - return _register - - def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: - """Read data from a given ``filepath`` with 'rb' mode. - - Note: - There are two types of return values for ``get``, one is ``bytes`` - and the other is ``memoryview``. The advantage of using memoryview - is that you can avoid copying, and if you want to convert it to - ``bytes``, you can use ``.tobytes()``. - - Args: - filepath (str or Path): Path to read data. - - Returns: - bytes | memoryview: Expected bytes object or a memory view of the - bytes object. - """ - return self.client.get(filepath) - - def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: - """Read data from a given ``filepath`` with 'r' mode. - - Args: - filepath (str or Path): Path to read data. - encoding (str): The encoding format used to open the ``filepath``. - Default: 'utf-8'. - - Returns: - str: Expected text reading from ``filepath``. - """ - return self.client.get_text(filepath, encoding) - - def put(self, obj: bytes, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'wb' mode. - - Note: - ``put`` should create a directory if the directory of ``filepath`` - does not exist. - - Args: - obj (bytes): Data to be written. - filepath (str or Path): Path to write data. - """ - self.client.put(obj, filepath) - - def put_text(self, obj: str, filepath: Union[str, Path]) -> None: - """Write data to a given ``filepath`` with 'w' mode. - - Note: - ``put_text`` should create a directory if the directory of - ``filepath`` does not exist. - - Args: - obj (str): Data to be written. - filepath (str or Path): Path to write data. - encoding (str, optional): The encoding format used to open the - `filepath`. Default: 'utf-8'. - """ - self.client.put_text(obj, filepath) - - def remove(self, filepath: Union[str, Path]) -> None: - """Remove a file. - - Args: - filepath (str, Path): Path to be removed. - """ - self.client.remove(filepath) - - def exists(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path exists. - - Args: - filepath (str or Path): Path to be checked whether exists. - - Returns: - bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. - """ - return self.client.exists(filepath) - - def isdir(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a directory. - - Args: - filepath (str or Path): Path to be checked whether it is a - directory. - - Returns: - bool: Return ``True`` if ``filepath`` points to a directory, - ``False`` otherwise. - """ - return self.client.isdir(filepath) - - def isfile(self, filepath: Union[str, Path]) -> bool: - """Check whether a file path is a file. - - Args: - filepath (str or Path): Path to be checked whether it is a file. - - Returns: - bool: Return ``True`` if ``filepath`` points to a file, ``False`` - otherwise. - """ - return self.client.isfile(filepath) - - def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: - """Concatenate all file paths. - - Join one or more filepath components intelligently. The return value - is the concatenation of filepath and any members of *filepaths. - - Args: - filepath (str or Path): Path to be concatenated. - - Returns: - str: The result of concatenation. - """ - return self.client.join_path(filepath, *filepaths) - - @contextmanager - def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: - """Download data from ``filepath`` and write the data to local path. - - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - - Note: - If the ``filepath`` is a local path, just return itself. - - .. warning:: - ``get_local_path`` is an experimental interface that may change in - the future. - - Args: - filepath (str or Path): Path to be read data. - - Examples: - >>> file_client = FileClient(prefix='s3') - >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: - ... # do something here - - Yields: - Iterable[str]: Only yield one path. - """ - with self.client.get_local_path(str(filepath)) as local_path: - yield local_path - - def list_dir_or_file( - self, - dir_path: Union[str, Path], - list_dir: bool = True, - list_file: bool = True, - suffix: Optional[Union[str, Tuple[str]]] = None, - recursive: bool = False, - ) -> Iterator[str]: - """Scan a directory to find the interested directories or files in - arbitrary order. - - Note: - :meth:`list_dir_or_file` returns the path relative to ``dir_path``. - - Args: - dir_path (str | Path): Path of the directory. - list_dir (bool): List the directories. Default: True. - list_file (bool): List the path of files. Default: True. - suffix (str or tuple[str], optional): File suffix - that we are interested in. Default: None. - recursive (bool): If set to True, recursively scan the - directory. Default: False. - - Yields: - Iterable[str]: A relative path to ``dir_path``. - """ - yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py deleted file mode 100644 index aa24d9197283..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .base import BaseFileHandler -from .json_handler import JsonHandler -from .pickle_handler import PickleHandler -from .yaml_handler import YamlHandler - -__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py deleted file mode 100644 index 288878bc5728..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from abc import ABCMeta, abstractmethod - - -class BaseFileHandler(metaclass=ABCMeta): - # `str_like` is a flag to indicate whether the type of file object is - # str-like object or bytes-like object. Pickle only processes bytes-like - # objects but json only processes str-like object. If it is str-like - # object, `StringIO` will be used to process the buffer. - str_like = True - - @abstractmethod - def load_from_fileobj(self, file, **kwargs): - pass - - @abstractmethod - def dump_to_fileobj(self, obj, file, **kwargs): - pass - - @abstractmethod - def dump_to_str(self, obj, **kwargs): - pass - - def load_from_path(self, filepath, mode='r', **kwargs): - with open(filepath, mode) as f: - return self.load_from_fileobj(f, **kwargs) - - def dump_to_path(self, obj, filepath, mode='w', **kwargs): - with open(filepath, mode) as f: - self.dump_to_fileobj(obj, f, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py deleted file mode 100644 index c95a8b72d202..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import json - -import numpy as np - -from .base import BaseFileHandler - - -def set_default(obj): - """Set default json values for non-serializable values. - - It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. - It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, - etc.) into plain numbers of plain python built-in types. - """ - if isinstance(obj, (set, range)): - return list(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - elif isinstance(obj, np.generic): - return obj.item() - raise TypeError(f'{type(obj)} is unsupported for json dump') - - -class JsonHandler(BaseFileHandler): - def load_from_fileobj(self, file): - return json.load(file) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('default', set_default) - json.dump(obj, file, **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('default', set_default) - return json.dumps(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py deleted file mode 100644 index fffd741130ff..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import pickle - -from .base import BaseFileHandler - - -class PickleHandler(BaseFileHandler): - - str_like = False - - def load_from_fileobj(self, file, **kwargs): - return pickle.load(file, **kwargs) - - def load_from_path(self, filepath, **kwargs): - return super(PickleHandler, self).load_from_path(filepath, mode='rb', **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('protocol', 2) - return pickle.dumps(obj, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('protocol', 2) - pickle.dump(obj, file, **kwargs) - - def dump_to_path(self, obj, filepath, **kwargs): - super(PickleHandler, self).dump_to_path(obj, filepath, mode='wb', **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py deleted file mode 100644 index cf89a1efa70c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import yaml - -try: - from yaml import CDumper as Dumper - from yaml import CLoader as Loader -except ImportError: - from yaml import Loader, Dumper - -from .base import BaseFileHandler # isort:skip - - -class YamlHandler(BaseFileHandler): - def load_from_fileobj(self, file, **kwargs): - kwargs.setdefault('Loader', Loader) - return yaml.load(file, **kwargs) - - def dump_to_fileobj(self, obj, file, **kwargs): - kwargs.setdefault('Dumper', Dumper) - yaml.dump(obj, file, **kwargs) - - def dump_to_str(self, obj, **kwargs): - kwargs.setdefault('Dumper', Dumper) - return yaml.dump(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py deleted file mode 100644 index bcbdc2eb3803..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from io import BytesIO, StringIO -from pathlib import Path - -from ..utils import is_list_of, is_str -from .file_client import FileClient -from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler - -file_handlers = { - 'json': JsonHandler(), - 'yaml': YamlHandler(), - 'yml': YamlHandler(), - 'pickle': PickleHandler(), - 'pkl': PickleHandler(), -} - - -def load(file, file_format=None, file_client_args=None, **kwargs): - """Load data from json/yaml/pickle files. - - This method provides a unified api for loading data from serialized files. - - Note: - In v1.3.16 and later, ``load`` supports loading data from serialized - files those can be storaged in different backends. - - Args: - file (str or :obj:`Path` or file-like object): Filename or a file-like - object. - file_format (str, optional): If not specified, the file format will be - inferred from the file extension, otherwise use the specified one. - Currently supported formats include "json", "yaml/yml" and - "pickle/pkl". - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> load('/path/of/your/file') # file is storaged in disk - >>> load('https://path/of/your/file') # file is storaged in Internet - >>> load('s3://path/of/your/file') # file is storaged in petrel - - Returns: - The content from the file. - """ - if isinstance(file, Path): - file = str(file) - if file_format is None and is_str(file): - file_format = file.split('.')[-1] - if file_format not in file_handlers: - raise TypeError(f'Unsupported format: {file_format}') - - handler = file_handlers[file_format] - if is_str(file): - file_client = FileClient.infer_client(file_client_args, file) - if handler.str_like: - with StringIO(file_client.get_text(file)) as f: - obj = handler.load_from_fileobj(f, **kwargs) - else: - with BytesIO(file_client.get(file)) as f: - obj = handler.load_from_fileobj(f, **kwargs) - elif hasattr(file, 'read'): - obj = handler.load_from_fileobj(file, **kwargs) - else: - raise TypeError('"file" must be a filepath str or a file-object') - return obj - - -def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): - """Dump data to json/yaml/pickle strings or files. - - This method provides a unified api for dumping data as strings or to files, - and also supports custom arguments for each file format. - - Note: - In v1.3.16 and later, ``dump`` supports dumping data as strings or to - files which is saved to different backends. - - Args: - obj (any): The python object to be dumped. - file (str or :obj:`Path` or file-like object, optional): If not - specified, then the object is dumped to a str, otherwise to a file - specified by the filename or file-like object. - file_format (str, optional): Same as :func:`load`. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> dump('hello world', '/path/of/your/file') # disk - >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel - - Returns: - bool: True for success, False otherwise. - """ - if isinstance(file, Path): - file = str(file) - if file_format is None: - if is_str(file): - file_format = file.split('.')[-1] - elif file is None: - raise ValueError('file_format must be specified since file is None') - if file_format not in file_handlers: - raise TypeError(f'Unsupported format: {file_format}') - - handler = file_handlers[file_format] - if file is None: - return handler.dump_to_str(obj, **kwargs) - elif is_str(file): - file_client = FileClient.infer_client(file_client_args, file) - if handler.str_like: - with StringIO() as f: - handler.dump_to_fileobj(obj, f, **kwargs) - file_client.put_text(f.getvalue(), file) - else: - with BytesIO() as f: - handler.dump_to_fileobj(obj, f, **kwargs) - file_client.put(f.getvalue(), file) - elif hasattr(file, 'write'): - handler.dump_to_fileobj(obj, file, **kwargs) - else: - raise TypeError('"file" must be a filename str or a file-object') - - -def _register_handler(handler, file_formats): - """Register a handler for some file extensions. - - Args: - handler (:obj:`BaseFileHandler`): Handler to be registered. - file_formats (str or list[str]): File formats to be handled by this - handler. - """ - if not isinstance(handler, BaseFileHandler): - raise TypeError(f'handler must be a child of BaseFileHandler, not {type(handler)}') - if isinstance(file_formats, str): - file_formats = [file_formats] - if not is_list_of(file_formats, str): - raise TypeError('file_formats must be a str or a list of str') - for ext in file_formats: - file_handlers[ext] = handler - - -def register_handler(file_formats, **kwargs): - def wrap(cls): - _register_handler(cls(**kwargs), file_formats) - return cls - - return wrap diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py deleted file mode 100644 index 19c618d9a034..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. - -from io import StringIO - -from .file_client import FileClient - - -def list_from_file(filename, prefix='', offset=0, max_num=0, encoding='utf-8', file_client_args=None): - """Load a text file and parse the content as a list of strings. - - Note: - In v1.3.16 and later, ``list_from_file`` supports loading a text file - which can be storaged in different backends and parsing the content as - a list for strings. - - Args: - filename (str): Filename. - prefix (str): The prefix to be inserted to the beginning of each item. - offset (int): The offset of lines. - max_num (int): The maximum number of lines to be read, - zeros and negatives mean no limitation. - encoding (str): Encoding used to open the file. Default utf-8. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> list_from_file('/path/of/your/file') # disk - ['hello', 'world'] - >>> list_from_file('s3://path/of/your/file') # ceph or petrel - ['hello', 'world'] - - Returns: - list[str]: A list of strings. - """ - cnt = 0 - item_list = [] - file_client = FileClient.infer_client(file_client_args, filename) - with StringIO(file_client.get_text(filename, encoding)) as f: - for _ in range(offset): - f.readline() - for line in f: - if 0 < max_num <= cnt: - break - item_list.append(prefix + line.rstrip('\n\r')) - cnt += 1 - return item_list - - -def dict_from_file(filename, key_type=str, encoding='utf-8', file_client_args=None): - """Load a text file and parse the content as a dict. - - Each line of the text file will be two or more columns split by - whitespaces or tabs. The first column will be parsed as dict keys, and - the following columns will be parsed as dict values. - - Note: - In v1.3.16 and later, ``dict_from_file`` supports loading a text file - which can be storaged in different backends and parsing the content as - a dict. - - Args: - filename(str): Filename. - key_type(type): Type of the dict keys. str is user by default and - type conversion will be performed if specified. - encoding (str): Encoding used to open the file. Default utf-8. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - - Examples: - >>> dict_from_file('/path/of/your/file') # disk - {'key1': 'value1', 'key2': 'value2'} - >>> dict_from_file('s3://path/of/your/file') # ceph or petrel - {'key1': 'value1', 'key2': 'value2'} - - Returns: - dict: The parsed contents. - """ - mapping = {} - file_client = FileClient.infer_client(file_client_args, filename) - with StringIO(file_client.get_text(filename, encoding)) as f: - for line in f: - items = line.rstrip('\n').split() - assert len(items) >= 2 - key = key_type(items[0]) - val = items[1:] if len(items) > 2 else items[1] - mapping[key] = val - return mapping diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py deleted file mode 100644 index bf63e993892c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .colorspace import ( - bgr2gray, - bgr2hls, - bgr2hsv, - bgr2rgb, - bgr2ycbcr, - gray2bgr, - gray2rgb, - hls2bgr, - hsv2bgr, - imconvert, - rgb2bgr, - rgb2gray, - rgb2ycbcr, - ycbcr2bgr, - ycbcr2rgb, -) -from .geometric import ( - cutout, - imcrop, - imflip, - imflip_, - impad, - impad_to_multiple, - imrescale, - imresize, - imresize_like, - imresize_to_multiple, - imrotate, - imshear, - imtranslate, - rescale_size, -) -from .io import imfrombytes, imread, imwrite, supported_backends, use_backend -from .misc import tensor2imgs -from .photometric import ( - adjust_brightness, - adjust_color, - adjust_contrast, - adjust_lighting, - adjust_sharpness, - auto_contrast, - clahe, - imdenormalize, - imequalize, - iminvert, - imnormalize, - imnormalize_, - lut_transform, - posterize, - solarize, -) - -__all__ = [ - 'bgr2gray', - 'bgr2hls', - 'bgr2hsv', - 'bgr2rgb', - 'gray2bgr', - 'gray2rgb', - 'hls2bgr', - 'hsv2bgr', - 'imconvert', - 'rgb2bgr', - 'rgb2gray', - 'imrescale', - 'imresize', - 'imresize_like', - 'imresize_to_multiple', - 'rescale_size', - 'imcrop', - 'imflip', - 'imflip_', - 'impad', - 'impad_to_multiple', - 'imrotate', - 'imfrombytes', - 'imread', - 'imwrite', - 'supported_backends', - 'use_backend', - 'imdenormalize', - 'imnormalize', - 'imnormalize_', - 'iminvert', - 'posterize', - 'solarize', - 'rgb2ycbcr', - 'bgr2ycbcr', - 'ycbcr2rgb', - 'ycbcr2bgr', - 'tensor2imgs', - 'imshear', - 'imtranslate', - 'adjust_color', - 'imequalize', - 'adjust_brightness', - 'adjust_contrast', - 'lut_transform', - 'clahe', - 'adjust_sharpness', - 'auto_contrast', - 'cutout', - 'adjust_lighting', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py deleted file mode 100644 index e167caac49f3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import cv2 -import numpy as np - - -def imconvert(img, src, dst): - """Convert an image from the src colorspace to dst colorspace. - - Args: - img (ndarray): The input image. - src (str): The source colorspace, e.g., 'rgb', 'hsv'. - dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. - - Returns: - ndarray: The converted image. - """ - code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') - out_img = cv2.cvtColor(img, code) - return out_img - - -def bgr2gray(img, keepdim=False): - """Convert a BGR image to grayscale image. - - Args: - img (ndarray): The input image. - keepdim (bool): If False (by default), then return the grayscale image - with 2 dims, otherwise 3 dims. - - Returns: - ndarray: The converted grayscale image. - """ - out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - if keepdim: - out_img = out_img[..., None] - return out_img - - -def rgb2gray(img, keepdim=False): - """Convert a RGB image to grayscale image. - - Args: - img (ndarray): The input image. - keepdim (bool): If False (by default), then return the grayscale image - with 2 dims, otherwise 3 dims. - - Returns: - ndarray: The converted grayscale image. - """ - out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) - if keepdim: - out_img = out_img[..., None] - return out_img - - -def gray2bgr(img): - """Convert a grayscale image to BGR image. - - Args: - img (ndarray): The input image. - - Returns: - ndarray: The converted BGR image. - """ - img = img[..., None] if img.ndim == 2 else img - out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - return out_img - - -def gray2rgb(img): - """Convert a grayscale image to RGB image. - - Args: - img (ndarray): The input image. - - Returns: - ndarray: The converted RGB image. - """ - img = img[..., None] if img.ndim == 2 else img - out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) - return out_img - - -def _convert_input_type_range(img): - """Convert the type and range of the input image. - - It converts the input image to np.float32 type and range of [0, 1]. - It is mainly used for pre-processing the input image in colorspace - conversion functions such as rgb2ycbcr and ycbcr2rgb. - - Args: - img (ndarray): The input image. It accepts: - 1. np.uint8 type with range [0, 255]; - 2. np.float32 type with range [0, 1]. - - Returns: - (ndarray): The converted image with type of np.float32 and range of - [0, 1]. - """ - img_type = img.dtype - img = img.astype(np.float32) - if img_type == np.float32: - pass - elif img_type == np.uint8: - img /= 255.0 - else: - raise TypeError('The img type should be np.float32 or np.uint8, ' f'but got {img_type}') - return img - - -def _convert_output_type_range(img, dst_type): - """Convert the type and range of the image according to dst_type. - - It converts the image to desired type and range. If `dst_type` is np.uint8, - images will be converted to np.uint8 type with range [0, 255]. If - `dst_type` is np.float32, it converts the image to np.float32 type with - range [0, 1]. - It is mainly used for post-processing images in colorspace conversion - functions such as rgb2ycbcr and ycbcr2rgb. - - Args: - img (ndarray): The image to be converted with np.float32 type and - range [0, 255]. - dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it - converts the image to np.uint8 type with range [0, 255]. If - dst_type is np.float32, it converts the image to np.float32 type - with range [0, 1]. - - Returns: - (ndarray): The converted image with desired type and range. - """ - if dst_type not in (np.uint8, np.float32): - raise TypeError('The dst_type should be np.float32 or np.uint8, ' f'but got {dst_type}') - if dst_type == np.uint8: - img = img.round() - else: - img /= 255.0 - return img.astype(dst_type) - - -def rgb2ycbcr(img, y_only=False): - """Convert a RGB image to YCbCr image. - - This function produces the same results as Matlab's `rgb2ycbcr` function. - It implements the ITU-R BT.601 conversion for standard-definition - television. See more details in - https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. - - It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. - In OpenCV, it implements a JPEG conversion. See more details in - https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. - - Args: - img (ndarray): The input image. It accepts: - 1. np.uint8 type with range [0, 255]; - 2. np.float32 type with range [0, 1]. - y_only (bool): Whether to only return Y channel. Default: False. - - Returns: - ndarray: The converted YCbCr image. The output image has the same type - and range as input image. - """ - img_type = img.dtype - img = _convert_input_type_range(img) - if y_only: - out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 - else: - out_img = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], [24.966, 112.0, -18.214]]) + [ - 16, - 128, - 128, - ] - out_img = _convert_output_type_range(out_img, img_type) - return out_img - - -def bgr2ycbcr(img, y_only=False): - """Convert a BGR image to YCbCr image. - - The bgr version of rgb2ycbcr. - It implements the ITU-R BT.601 conversion for standard-definition - television. See more details in - https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. - - It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. - In OpenCV, it implements a JPEG conversion. See more details in - https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. - - Args: - img (ndarray): The input image. It accepts: - 1. np.uint8 type with range [0, 255]; - 2. np.float32 type with range [0, 1]. - y_only (bool): Whether to only return Y channel. Default: False. - - Returns: - ndarray: The converted YCbCr image. The output image has the same type - and range as input image. - """ - img_type = img.dtype - img = _convert_input_type_range(img) - if y_only: - out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 - else: - out_img = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [ - 16, - 128, - 128, - ] - out_img = _convert_output_type_range(out_img, img_type) - return out_img - - -def ycbcr2rgb(img): - """Convert a YCbCr image to RGB image. - - This function produces the same results as Matlab's ycbcr2rgb function. - It implements the ITU-R BT.601 conversion for standard-definition - television. See more details in - https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. - - It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. - In OpenCV, it implements a JPEG conversion. See more details in - https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. - - Args: - img (ndarray): The input image. It accepts: - 1. np.uint8 type with range [0, 255]; - 2. np.float32 type with range [0, 1]. - - Returns: - ndarray: The converted RGB image. The output image has the same type - and range as input image. - """ - img_type = img.dtype - img = _convert_input_type_range(img) * 255 - out_img = np.matmul( - img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071], [0.00625893, -0.00318811, 0]] - ) * 255.0 + [-222.921, 135.576, -276.836] - out_img = _convert_output_type_range(out_img, img_type) - return out_img - - -def ycbcr2bgr(img): - """Convert a YCbCr image to BGR image. - - The bgr version of ycbcr2rgb. - It implements the ITU-R BT.601 conversion for standard-definition - television. See more details in - https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. - - It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. - In OpenCV, it implements a JPEG conversion. See more details in - https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. - - Args: - img (ndarray): The input image. It accepts: - 1. np.uint8 type with range [0, 255]; - 2. np.float32 type with range [0, 1]. - - Returns: - ndarray: The converted BGR image. The output image has the same type - and range as input image. - """ - img_type = img.dtype - img = _convert_input_type_range(img) * 255 - out_img = np.matmul( - img, [[0.00456621, 0.00456621, 0.00456621], [0.00791071, -0.00153632, 0], [0, -0.00318811, 0.00625893]] - ) * 255.0 + [-276.836, 135.576, -222.921] - out_img = _convert_output_type_range(out_img, img_type) - return out_img - - -def convert_color_factory(src, dst): - - code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') - - def convert_color(img): - out_img = cv2.cvtColor(img, code) - return out_img - - convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} - image. - - Args: - img (ndarray or str): The input image. - - Returns: - ndarray: The converted {dst.upper()} image. - """ - - return convert_color - - -bgr2rgb = convert_color_factory('bgr', 'rgb') - -rgb2bgr = convert_color_factory('rgb', 'bgr') - -bgr2hsv = convert_color_factory('bgr', 'hsv') - -hsv2bgr = convert_color_factory('hsv', 'bgr') - -bgr2hls = convert_color_factory('bgr', 'hls') - -hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py deleted file mode 100644 index 2c96fae34feb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py +++ /dev/null @@ -1,671 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numbers - -import cv2 -import numpy as np - -from ..utils import to_2tuple -from .io import imread_backend - -try: - from PIL import Image -except ImportError: - Image = None - - -def _scale_size(size, scale): - """Rescale a size by a ratio. - - Args: - size (tuple[int]): (w, h). - scale (float | tuple(float)): Scaling factor. - - Returns: - tuple[int]: scaled size. - """ - if isinstance(scale, (float, int)): - scale = (scale, scale) - w, h = size - return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) - - -cv2_interp_codes = { - 'nearest': cv2.INTER_NEAREST, - 'bilinear': cv2.INTER_LINEAR, - 'bicubic': cv2.INTER_CUBIC, - 'area': cv2.INTER_AREA, - 'lanczos': cv2.INTER_LANCZOS4, -} - -if Image is not None: - pillow_interp_codes = { - 'nearest': Image.NEAREST, - 'bilinear': Image.BILINEAR, - 'bicubic': Image.BICUBIC, - 'box': Image.BOX, - 'lanczos': Image.LANCZOS, - 'hamming': Image.HAMMING, - } - - -def imresize(img, size, return_scale=False, interpolation='bilinear', out=None, backend=None): - """Resize image to a given size. - - Args: - img (ndarray): The input image. - size (tuple[int]): Target size (w, h). - return_scale (bool): Whether to return `w_scale` and `h_scale`. - interpolation (str): Interpolation method, accepted values are - "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' - backend, "nearest", "bilinear" for 'pillow' backend. - out (ndarray): The output destination. - backend (str | None): The image resize backend type. Options are `cv2`, - `pillow`, `None`. If backend is None, the global imread_backend - specified by ``mmcv.use_backend()`` will be used. Default: None. - - Returns: - tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. - """ - h, w = img.shape[:2] - if backend is None: - backend = imread_backend - if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported for resize.' f"Supported backends are 'cv2', 'pillow'") - - if backend == 'pillow': - assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' - pil_image = Image.fromarray(img) - pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) - resized_img = np.array(pil_image) - else: - resized_img = cv2.resize(img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) - if not return_scale: - return resized_img - else: - w_scale = size[0] / w - h_scale = size[1] / h - return resized_img, w_scale, h_scale - - -def imresize_to_multiple( - img, - divisor, - size=None, - scale_factor=None, - keep_ratio=False, - return_scale=False, - interpolation='bilinear', - out=None, - backend=None, -): - """Resize image according to a given size or scale factor and then rounds - up the the resized or rescaled image size to the nearest value that can be - divided by the divisor. - - Args: - img (ndarray): The input image. - divisor (int | tuple): Resized image size will be a multiple of - divisor. If divisor is a tuple, divisor should be - (w_divisor, h_divisor). - size (None | int | tuple[int]): Target size (w, h). Default: None. - scale_factor (None | float | tuple[float]): Multiplier for spatial - size. Should match input size if it is a tuple and the 2D style is - (w_scale_factor, h_scale_factor). Default: None. - keep_ratio (bool): Whether to keep the aspect ratio when resizing the - image. Default: False. - return_scale (bool): Whether to return `w_scale` and `h_scale`. - interpolation (str): Interpolation method, accepted values are - "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' - backend, "nearest", "bilinear" for 'pillow' backend. - out (ndarray): The output destination. - backend (str | None): The image resize backend type. Options are `cv2`, - `pillow`, `None`. If backend is None, the global imread_backend - specified by ``mmcv.use_backend()`` will be used. Default: None. - - Returns: - tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. - """ - h, w = img.shape[:2] - if size is not None and scale_factor is not None: - raise ValueError('only one of size or scale_factor should be defined') - elif size is None and scale_factor is None: - raise ValueError('one of size or scale_factor should be defined') - elif size is not None: - size = to_2tuple(size) - if keep_ratio: - size = rescale_size((w, h), size, return_scale=False) - else: - size = _scale_size((w, h), scale_factor) - - divisor = to_2tuple(divisor) - size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) - resized_img, w_scale, h_scale = imresize( - img, size, return_scale=True, interpolation=interpolation, out=out, backend=backend - ) - if return_scale: - return resized_img, w_scale, h_scale - else: - return resized_img - - -def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear', backend=None): - """Resize image to the same size of a given image. - - Args: - img (ndarray): The input image. - dst_img (ndarray): The target image. - return_scale (bool): Whether to return `w_scale` and `h_scale`. - interpolation (str): Same as :func:`resize`. - backend (str | None): Same as :func:`resize`. - - Returns: - tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. - """ - h, w = dst_img.shape[:2] - return imresize(img, (w, h), return_scale, interpolation, backend=backend) - - -def rescale_size(old_size, scale, return_scale=False): - """Calculate the new size to be rescaled to. - - Args: - old_size (tuple[int]): The old size (w, h) of image. - scale (float | tuple[int]): The scaling factor or maximum size. - If it is a float number, then the image will be rescaled by this - factor, else if it is a tuple of 2 integers, then the image will - be rescaled as large as possible within the scale. - return_scale (bool): Whether to return the scaling factor besides the - rescaled image size. - - Returns: - tuple[int]: The new rescaled image size. - """ - w, h = old_size - if isinstance(scale, (float, int)): - if scale <= 0: - raise ValueError(f'Invalid scale {scale}, must be positive.') - scale_factor = scale - elif isinstance(scale, tuple): - max_long_edge = max(scale) - max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) - else: - raise TypeError(f'Scale must be a number or tuple of int, but got {type(scale)}') - - new_size = _scale_size((w, h), scale_factor) - - if return_scale: - return new_size, scale_factor - else: - return new_size - - -def imrescale(img, scale, return_scale=False, interpolation='bilinear', backend=None): - """Resize image while keeping the aspect ratio. - - Args: - img (ndarray): The input image. - scale (float | tuple[int]): The scaling factor or maximum size. - If it is a float number, then the image will be rescaled by this - factor, else if it is a tuple of 2 integers, then the image will - be rescaled as large as possible within the scale. - return_scale (bool): Whether to return the scaling factor besides the - rescaled image. - interpolation (str): Same as :func:`resize`. - backend (str | None): Same as :func:`resize`. - - Returns: - ndarray: The rescaled image. - """ - h, w = img.shape[:2] - new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) - rescaled_img = imresize(img, new_size, interpolation=interpolation, backend=backend) - if return_scale: - return rescaled_img, scale_factor - else: - return rescaled_img - - -def imflip(img, direction='horizontal'): - """Flip an image horizontally or vertically. - - Args: - img (ndarray): Image to be flipped. - direction (str): The flip direction, either "horizontal" or - "vertical" or "diagonal". - - Returns: - ndarray: The flipped image. - """ - assert direction in ['horizontal', 'vertical', 'diagonal'] - if direction == 'horizontal': - return np.flip(img, axis=1) - elif direction == 'vertical': - return np.flip(img, axis=0) - else: - return np.flip(img, axis=(0, 1)) - - -def imflip_(img, direction='horizontal'): - """Inplace flip an image horizontally or vertically. - - Args: - img (ndarray): Image to be flipped. - direction (str): The flip direction, either "horizontal" or - "vertical" or "diagonal". - - Returns: - ndarray: The flipped image (inplace). - """ - assert direction in ['horizontal', 'vertical', 'diagonal'] - if direction == 'horizontal': - return cv2.flip(img, 1, img) - elif direction == 'vertical': - return cv2.flip(img, 0, img) - else: - return cv2.flip(img, -1, img) - - -def imrotate(img, angle, center=None, scale=1.0, border_value=0, interpolation='bilinear', auto_bound=False): - """Rotate an image. - - Args: - img (ndarray): Image to be rotated. - angle (float): Rotation angle in degrees, positive values mean - clockwise rotation. - center (tuple[float], optional): Center point (w, h) of the rotation in - the source image. If not specified, the center of the image will be - used. - scale (float): Isotropic scale factor. - border_value (int): Border value. - interpolation (str): Same as :func:`resize`. - auto_bound (bool): Whether to adjust the image size to cover the whole - rotated image. - - Returns: - ndarray: The rotated image. - """ - if center is not None and auto_bound: - raise ValueError('`auto_bound` conflicts with `center`') - h, w = img.shape[:2] - if center is None: - center = ((w - 1) * 0.5, (h - 1) * 0.5) - assert isinstance(center, tuple) - - matrix = cv2.getRotationMatrix2D(center, -angle, scale) - if auto_bound: - cos = np.abs(matrix[0, 0]) - sin = np.abs(matrix[0, 1]) - new_w = h * sin + w * cos - new_h = h * cos + w * sin - matrix[0, 2] += (new_w - w) * 0.5 - matrix[1, 2] += (new_h - h) * 0.5 - w = int(np.round(new_w)) - h = int(np.round(new_h)) - rotated = cv2.warpAffine(img, matrix, (w, h), flags=cv2_interp_codes[interpolation], borderValue=border_value) - return rotated - - -def bbox_clip(bboxes, img_shape): - """Clip bboxes to fit the image shape. - - Args: - bboxes (ndarray): Shape (..., 4*k) - img_shape (tuple[int]): (height, width) of the image. - - Returns: - ndarray: Clipped bboxes. - """ - assert bboxes.shape[-1] % 4 == 0 - cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) - cmin[0::2] = img_shape[1] - 1 - cmin[1::2] = img_shape[0] - 1 - clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) - return clipped_bboxes - - -def bbox_scaling(bboxes, scale, clip_shape=None): - """Scaling bboxes w.r.t the box center. - - Args: - bboxes (ndarray): Shape(..., 4). - scale (float): Scaling factor. - clip_shape (tuple[int], optional): If specified, bboxes that exceed the - boundary will be clipped according to the given shape (h, w). - - Returns: - ndarray: Scaled bboxes. - """ - if float(scale) == 1.0: - scaled_bboxes = bboxes.copy() - else: - w = bboxes[..., 2] - bboxes[..., 0] + 1 - h = bboxes[..., 3] - bboxes[..., 1] + 1 - dw = (w * (scale - 1)) * 0.5 - dh = (h * (scale - 1)) * 0.5 - scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) - if clip_shape is not None: - return bbox_clip(scaled_bboxes, clip_shape) - else: - return scaled_bboxes - - -def imcrop(img, bboxes, scale=1.0, pad_fill=None): - """Crop image patches. - - 3 steps: scale the bboxes -> clip bboxes -> crop and pad. - - Args: - img (ndarray): Image to be cropped. - bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. - scale (float, optional): Scale ratio of bboxes, the default value - 1.0 means no padding. - pad_fill (Number | list[Number]): Value to be filled for padding. - Default: None, which means no padding. - - Returns: - list[ndarray] | ndarray: The cropped image patches. - """ - chn = 1 if img.ndim == 2 else img.shape[2] - if pad_fill is not None: - if isinstance(pad_fill, (int, float)): - pad_fill = [pad_fill for _ in range(chn)] - assert len(pad_fill) == chn - - _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes - scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) - clipped_bbox = bbox_clip(scaled_bboxes, img.shape) - - patches = [] - for i in range(clipped_bbox.shape[0]): - x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) - if pad_fill is None: - patch = img[y1 : y2 + 1, x1 : x2 + 1, ...] - else: - _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) - if chn == 1: - patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) - else: - patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) - patch = np.array(pad_fill, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) - x_start = 0 if _x1 >= 0 else -_x1 - y_start = 0 if _y1 >= 0 else -_y1 - w = x2 - x1 + 1 - h = y2 - y1 + 1 - patch[y_start : y_start + h, x_start : x_start + w, ...] = img[y1 : y1 + h, x1 : x1 + w, ...] - patches.append(patch) - - if bboxes.ndim == 1: - return patches[0] - else: - return patches - - -def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): - """Pad the given image to a certain shape or pad on all sides with - specified padding mode and padding value. - - Args: - img (ndarray): Image to be padded. - shape (tuple[int]): Expected padding shape (h, w). Default: None. - padding (int or tuple[int]): Padding on each border. If a single int is - provided this is used to pad all borders. If tuple of length 2 is - provided this is the padding on left/right and top/bottom - respectively. If a tuple of length 4 is provided this is the - padding for the left, top, right and bottom borders respectively. - Default: None. Note that `shape` and `padding` can not be both - set. - pad_val (Number | Sequence[Number]): Values to be filled in padding - areas when padding_mode is 'constant'. Default: 0. - padding_mode (str): Type of padding. Should be: constant, edge, - reflect or symmetric. Default: constant. - - - constant: pads with a constant value, this value is specified - with pad_val. - - edge: pads with the last value at the edge of the image. - - reflect: pads with reflection of image without repeating the - last value on the edge. For example, padding [1, 2, 3, 4] - with 2 elements on both sides in reflect mode will result - in [3, 2, 1, 2, 3, 4, 3, 2]. - - symmetric: pads with reflection of image repeating the last - value on the edge. For example, padding [1, 2, 3, 4] with - 2 elements on both sides in symmetric mode will result in - [2, 1, 1, 2, 3, 4, 4, 3] - - Returns: - ndarray: The padded image. - """ - - assert (shape is not None) ^ (padding is not None) - if shape is not None: - padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) - - # check pad_val - if isinstance(pad_val, tuple): - assert len(pad_val) == img.shape[-1] - elif not isinstance(pad_val, numbers.Number): - raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') - - # check padding - if isinstance(padding, tuple) and len(padding) in [2, 4]: - if len(padding) == 2: - padding = (padding[0], padding[1], padding[0], padding[1]) - elif isinstance(padding, numbers.Number): - padding = (padding, padding, padding, padding) - else: - raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') - - # check padding mode - assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] - - border_type = { - 'constant': cv2.BORDER_CONSTANT, - 'edge': cv2.BORDER_REPLICATE, - 'reflect': cv2.BORDER_REFLECT_101, - 'symmetric': cv2.BORDER_REFLECT, - } - img = cv2.copyMakeBorder( - img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val - ) - - return img - - -def impad_to_multiple(img, divisor, pad_val=0): - """Pad an image to ensure each edge to be multiple to some number. - - Args: - img (ndarray): Image to be padded. - divisor (int): Padded image edges will be multiple to divisor. - pad_val (Number | Sequence[Number]): Same as :func:`impad`. - - Returns: - ndarray: The padded image. - """ - pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor - pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor - return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) - - -def cutout(img, shape, pad_val=0): - """Randomly cut out a rectangle from the original img. - - Args: - img (ndarray): Image to be cutout. - shape (int | tuple[int]): Expected cutout shape (h, w). If given as a - int, the value will be used for both h and w. - pad_val (int | float | tuple[int | float]): Values to be filled in the - cut area. Defaults to 0. - - Returns: - ndarray: The cutout image. - """ - - channels = 1 if img.ndim == 2 else img.shape[2] - if isinstance(shape, int): - cut_h, cut_w = shape, shape - else: - assert isinstance(shape, tuple) and len(shape) == 2, ( - f'shape must be a int or a tuple with length 2, but got type ' f'{type(shape)} instead.' - ) - cut_h, cut_w = shape - if isinstance(pad_val, (int, float)): - pad_val = tuple([pad_val] * channels) - elif isinstance(pad_val, tuple): - assert len(pad_val) == channels, ( - 'Expected the num of elements in tuple equals the channels' - 'of input image. Found {} vs {}'.format(len(pad_val), channels) - ) - else: - raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') - - img_h, img_w = img.shape[:2] - y0 = np.random.uniform(img_h) - x0 = np.random.uniform(img_w) - - y1 = int(max(0, y0 - cut_h / 2.0)) - x1 = int(max(0, x0 - cut_w / 2.0)) - y2 = min(img_h, y1 + cut_h) - x2 = min(img_w, x1 + cut_w) - - if img.ndim == 2: - patch_shape = (y2 - y1, x2 - x1) - else: - patch_shape = (y2 - y1, x2 - x1, channels) - - img_cutout = img.copy() - patch = np.array(pad_val, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) - img_cutout[y1:y2, x1:x2, ...] = patch - - return img_cutout - - -def _get_shear_matrix(magnitude, direction='horizontal'): - """Generate the shear matrix for transformation. - - Args: - magnitude (int | float): The magnitude used for shear. - direction (str): The flip direction, either "horizontal" - or "vertical". - - Returns: - ndarray: The shear matrix with dtype float32. - """ - if direction == 'horizontal': - shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) - elif direction == 'vertical': - shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) - return shear_matrix - - -def imshear(img, magnitude, direction='horizontal', border_value=0, interpolation='bilinear'): - """Shear an image. - - Args: - img (ndarray): Image to be sheared with format (h, w) - or (h, w, c). - magnitude (int | float): The magnitude used for shear. - direction (str): The flip direction, either "horizontal" - or "vertical". - border_value (int | tuple[int]): Value used in case of a - constant border. - interpolation (str): Same as :func:`resize`. - - Returns: - ndarray: The sheared image. - """ - assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' - height, width = img.shape[:2] - if img.ndim == 2: - channels = 1 - elif img.ndim == 3: - channels = img.shape[-1] - if isinstance(border_value, int): - border_value = tuple([border_value] * channels) - elif isinstance(border_value, tuple): - assert len(border_value) == channels, ( - 'Expected the num of elements in tuple equals the channels' - 'of input image. Found {} vs {}'.format(len(border_value), channels) - ) - else: - raise ValueError(f'Invalid type {type(border_value)} for `border_value`') - shear_matrix = _get_shear_matrix(magnitude, direction) - sheared = cv2.warpAffine( - img, - shear_matrix, - (width, height), - # Note case when the number elements in `border_value` - # greater than 3 (e.g. shearing masks whose channels large - # than 3) will raise TypeError in `cv2.warpAffine`. - # Here simply slice the first 3 values in `border_value`. - borderValue=border_value[:3], - flags=cv2_interp_codes[interpolation], - ) - return sheared - - -def _get_translate_matrix(offset, direction='horizontal'): - """Generate the translate matrix. - - Args: - offset (int | float): The offset used for translate. - direction (str): The translate direction, either - "horizontal" or "vertical". - - Returns: - ndarray: The translate matrix with dtype float32. - """ - if direction == 'horizontal': - translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) - elif direction == 'vertical': - translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) - return translate_matrix - - -def imtranslate(img, offset, direction='horizontal', border_value=0, interpolation='bilinear'): - """Translate an image. - - Args: - img (ndarray): Image to be translated with format - (h, w) or (h, w, c). - offset (int | float): The offset used for translate. - direction (str): The translate direction, either "horizontal" - or "vertical". - border_value (int | tuple[int]): Value used in case of a - constant border. - interpolation (str): Same as :func:`resize`. - - Returns: - ndarray: The translated image. - """ - assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' - height, width = img.shape[:2] - if img.ndim == 2: - channels = 1 - elif img.ndim == 3: - channels = img.shape[-1] - if isinstance(border_value, int): - border_value = tuple([border_value] * channels) - elif isinstance(border_value, tuple): - assert len(border_value) == channels, ( - 'Expected the num of elements in tuple equals the channels' - 'of input image. Found {} vs {}'.format(len(border_value), channels) - ) - else: - raise ValueError(f'Invalid type {type(border_value)} for `border_value`.') - translate_matrix = _get_translate_matrix(offset, direction) - translated = cv2.warpAffine( - img, - translate_matrix, - (width, height), - # Note case when the number elements in `border_value` - # greater than 3 (e.g. translating masks whose channels - # large than 3) will raise TypeError in `cv2.warpAffine`. - # Here simply slice the first 3 values in `border_value`. - borderValue=border_value[:3], - flags=cv2_interp_codes[interpolation], - ) - return translated diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py deleted file mode 100644 index eebffaac43d7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import io -import os.path as osp -from pathlib import Path - -import cv2 -import numpy as np -from cv2 import IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import check_file_exist, is_str, mkdir_or_exist - -try: - from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG -except ImportError: - TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None - -try: - from PIL import Image, ImageOps -except ImportError: - Image = None - -try: - import tifffile -except ImportError: - tifffile = None - -jpeg = None -supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] - -imread_flags = { - 'color': IMREAD_COLOR, - 'grayscale': IMREAD_GRAYSCALE, - 'unchanged': IMREAD_UNCHANGED, - 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, - 'grayscale_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE, -} - -imread_backend = 'cv2' - - -def use_backend(backend): - """Select a backend for image decoding. - - Args: - backend (str): The image decoding backend type. Options are `cv2`, - `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) - and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` - file format. - """ - assert backend in supported_backends - global imread_backend - imread_backend = backend - if imread_backend == 'turbojpeg': - if TurboJPEG is None: - raise ImportError('`PyTurboJPEG` is not installed') - global jpeg - if jpeg is None: - jpeg = TurboJPEG() - elif imread_backend == 'pillow': - if Image is None: - raise ImportError('`Pillow` is not installed') - elif imread_backend == 'tifffile': - if tifffile is None: - raise ImportError('`tifffile` is not installed') - - -def _jpegflag(flag='color', channel_order='bgr'): - channel_order = channel_order.lower() - if channel_order not in ['rgb', 'bgr']: - raise ValueError('channel order must be either "rgb" or "bgr"') - - if flag == 'color': - if channel_order == 'bgr': - return TJPF_BGR - elif channel_order == 'rgb': - return TJCS_RGB - elif flag == 'grayscale': - return TJPF_GRAY - else: - raise ValueError('flag must be "color" or "grayscale"') - - -def _pillow2array(img, flag='color', channel_order='bgr'): - """Convert a pillow image to numpy array. - - Args: - img (:obj:`PIL.Image.Image`): The image loaded using PIL - flag (str): Flags specifying the color type of a loaded image, - candidates are 'color', 'grayscale' and 'unchanged'. - Default to 'color'. - channel_order (str): The channel order of the output image array, - candidates are 'bgr' and 'rgb'. Default to 'bgr'. - - Returns: - np.ndarray: The converted numpy array - """ - channel_order = channel_order.lower() - if channel_order not in ['rgb', 'bgr']: - raise ValueError('channel order must be either "rgb" or "bgr"') - - if flag == 'unchanged': - array = np.array(img) - if array.ndim >= 3 and array.shape[2] >= 3: # color image - array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR - else: - # Handle exif orientation tag - if flag in ['color', 'grayscale']: - img = ImageOps.exif_transpose(img) - # If the image mode is not 'RGB', convert it to 'RGB' first. - if img.mode != 'RGB': - if img.mode != 'LA': - # Most formats except 'LA' can be directly converted to RGB - img = img.convert('RGB') - else: - # When the mode is 'LA', the default conversion will fill in - # the canvas with black, which sometimes shadows black objects - # in the foreground. - # - # Therefore, a random color (124, 117, 104) is used for canvas - img_rgba = img.convert('RGBA') - img = Image.new('RGB', img_rgba.size, (124, 117, 104)) - img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha - if flag in ['color', 'color_ignore_orientation']: - array = np.array(img) - if channel_order != 'rgb': - array = array[:, :, ::-1] # RGB to BGR - elif flag in ['grayscale', 'grayscale_ignore_orientation']: - img = img.convert('L') - array = np.array(img) - else: - raise ValueError( - 'flag must be "color", "grayscale", "unchanged", ' - f'"color_ignore_orientation" or "grayscale_ignore_orientation"' - f' but got {flag}' - ) - return array - - -def imread(img_or_path, flag='color', channel_order='bgr', backend=None): - """Read an image. - - Args: - img_or_path (ndarray or str or Path): Either a numpy array or str or - pathlib.Path. If it is a numpy array (loaded image), then - it will be returned as is. - flag (str): Flags specifying the color type of a loaded image, - candidates are `color`, `grayscale`, `unchanged`, - `color_ignore_orientation` and `grayscale_ignore_orientation`. - By default, `cv2` and `pillow` backend would rotate the image - according to its EXIF info unless called with `unchanged` or - `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend - always ignore image's EXIF info regardless of the flag. - The `turbojpeg` backend only supports `color` and `grayscale`. - channel_order (str): Order of channel, candidates are `bgr` and `rgb`. - backend (str | None): The image decoding backend type. Options are - `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. - If backend is None, the global imread_backend specified by - ``mmcv.use_backend()`` will be used. Default: None. - - Returns: - ndarray: Loaded image array. - """ - - if backend is None: - backend = imread_backend - if backend not in supported_backends: - raise ValueError( - f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" - ) - if isinstance(img_or_path, Path): - img_or_path = str(img_or_path) - - if isinstance(img_or_path, np.ndarray): - return img_or_path - elif is_str(img_or_path): - check_file_exist(img_or_path, f'img file does not exist: {img_or_path}') - if backend == 'turbojpeg': - with open(img_or_path, 'rb') as in_file: - img = jpeg.decode(in_file.read(), _jpegflag(flag, channel_order)) - if img.shape[-1] == 1: - img = img[:, :, 0] - return img - elif backend == 'pillow': - img = Image.open(img_or_path) - img = _pillow2array(img, flag, channel_order) - return img - elif backend == 'tifffile': - img = tifffile.imread(img_or_path) - return img - else: - flag = imread_flags[flag] if is_str(flag) else flag - img = cv2.imread(img_or_path, flag) - if flag == IMREAD_COLOR and channel_order == 'rgb': - cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) - return img - else: - raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object') - - -def imfrombytes(content, flag='color', channel_order='bgr', backend=None): - """Read an image from bytes. - - Args: - content (bytes): Image bytes got from files or other streams. - flag (str): Same as :func:`imread`. - backend (str | None): The image decoding backend type. Options are - `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the - global imread_backend specified by ``mmcv.use_backend()`` will be - used. Default: None. - - Returns: - ndarray: Loaded image array. - """ - - if backend is None: - backend = imread_backend - if backend not in supported_backends: - raise ValueError( - f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" - ) - if backend == 'turbojpeg': - img = jpeg.decode(content, _jpegflag(flag, channel_order)) - if img.shape[-1] == 1: - img = img[:, :, 0] - return img - elif backend == 'pillow': - buff = io.BytesIO(content) - img = Image.open(buff) - img = _pillow2array(img, flag, channel_order) - return img - else: - img_np = np.frombuffer(content, np.uint8) - flag = imread_flags[flag] if is_str(flag) else flag - img = cv2.imdecode(img_np, flag) - if flag == IMREAD_COLOR and channel_order == 'rgb': - cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) - return img - - -def imwrite(img, file_path, params=None, auto_mkdir=True): - """Write image to file. - - Args: - img (ndarray): Image array to be written. - file_path (str): Image file path. - params (None or list): Same as opencv :func:`imwrite` interface. - auto_mkdir (bool): If the parent folder of `file_path` does not exist, - whether to create it automatically. - - Returns: - bool: Successful or not. - """ - if auto_mkdir: - dir_name = osp.abspath(osp.dirname(file_path)) - mkdir_or_exist(dir_name) - return cv2.imwrite(file_path, img, params) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py deleted file mode 100644 index a66ed60474b9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numpy as np - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -try: - import torch -except ImportError: - torch = None - - -def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): - """Convert tensor to 3-channel images. - - Args: - tensor (torch.Tensor): Tensor that contains multiple images, shape ( - N, C, H, W). - mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). - std (tuple[float], optional): Standard deviation of images. - Defaults to (1, 1, 1). - to_rgb (bool, optional): Whether the tensor was converted to RGB - format in the first place. If so, convert it back to BGR. - Defaults to True. - - Returns: - list[np.ndarray]: A list that contains multiple images. - """ - - if torch is None: - raise RuntimeError('pytorch is not installed') - assert torch.is_tensor(tensor) and tensor.ndim == 4 - assert len(mean) == 3 - assert len(std) == 3 - - num_imgs = tensor.size(0) - mean = np.array(mean, dtype=np.float32) - std = np.array(std, dtype=np.float32) - imgs = [] - for img_id in range(num_imgs): - img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) - img = mmcv.imdenormalize(img, mean, std, to_bgr=to_rgb).astype(np.uint8) - imgs.append(np.ascontiguousarray(img)) - return imgs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py deleted file mode 100644 index a68b8f49cade..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py +++ /dev/null @@ -1,422 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import cv2 -import numpy as np - -from ..utils import is_tuple_of -from .colorspace import bgr2gray, gray2bgr - - -def imnormalize(img, mean, std, to_rgb=True): - """Normalize an image with mean and std. - - Args: - img (ndarray): Image to be normalized. - mean (ndarray): The mean to be used for normalize. - std (ndarray): The std to be used for normalize. - to_rgb (bool): Whether to convert to rgb. - - Returns: - ndarray: The normalized image. - """ - img = img.copy().astype(np.float32) - return imnormalize_(img, mean, std, to_rgb) - - -def imnormalize_(img, mean, std, to_rgb=True): - """Inplace normalize an image with mean and std. - - Args: - img (ndarray): Image to be normalized. - mean (ndarray): The mean to be used for normalize. - std (ndarray): The std to be used for normalize. - to_rgb (bool): Whether to convert to rgb. - - Returns: - ndarray: The normalized image. - """ - # cv2 inplace normalization does not accept uint8 - assert img.dtype != np.uint8 - mean = np.float64(mean.reshape(1, -1)) - stdinv = 1 / np.float64(std.reshape(1, -1)) - if to_rgb: - cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace - cv2.subtract(img, mean, img) # inplace - cv2.multiply(img, stdinv, img) # inplace - return img - - -def imdenormalize(img, mean, std, to_bgr=True): - assert img.dtype != np.uint8 - mean = mean.reshape(1, -1).astype(np.float64) - std = std.reshape(1, -1).astype(np.float64) - img = cv2.multiply(img, std) # make a copy - cv2.add(img, mean, img) # inplace - if to_bgr: - cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace - return img - - -def iminvert(img): - """Invert (negate) an image. - - Args: - img (ndarray): Image to be inverted. - - Returns: - ndarray: The inverted image. - """ - return np.full_like(img, 255) - img - - -def solarize(img, thr=128): - """Solarize an image (invert all pixel values above a threshold) - - Args: - img (ndarray): Image to be solarized. - thr (int): Threshold for solarizing (0 - 255). - - Returns: - ndarray: The solarized image. - """ - img = np.where(img < thr, img, 255 - img) - return img - - -def posterize(img, bits): - """Posterize an image (reduce the number of bits for each color channel) - - Args: - img (ndarray): Image to be posterized. - bits (int): Number of bits (1 to 8) to use for posterizing. - - Returns: - ndarray: The posterized image. - """ - shift = 8 - bits - img = np.left_shift(np.right_shift(img, shift), shift) - return img - - -def adjust_color(img, alpha=1, beta=None, gamma=0): - r"""It blends the source image and its gray image: - - .. math:: - output = img * alpha + gray\_img * beta + gamma - - Args: - img (ndarray): The input source image. - alpha (int | float): Weight for the source image. Default 1. - beta (int | float): Weight for the converted gray image. - If None, it's assigned the value (1 - `alpha`). - gamma (int | float): Scalar added to each sum. - Same as :func:`cv2.addWeighted`. Default 0. - - Returns: - ndarray: Colored image which has the same size and dtype as input. - """ - gray_img = bgr2gray(img) - gray_img = np.tile(gray_img[..., None], [1, 1, 3]) - if beta is None: - beta = 1 - alpha - colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) - if not colored_img.dtype == np.uint8: - # Note when the dtype of `img` is not the default `np.uint8` - # (e.g. np.float32), the value in `colored_img` got from cv2 - # is not guaranteed to be in range [0, 255], so here clip - # is needed. - colored_img = np.clip(colored_img, 0, 255) - return colored_img - - -def imequalize(img): - """Equalize the image histogram. - - This function applies a non-linear mapping to the input image, - in order to create a uniform distribution of grayscale values - in the output image. - - Args: - img (ndarray): Image to be equalized. - - Returns: - ndarray: The equalized image. - """ - - def _scale_channel(im, c): - """Scale the data in the corresponding channel.""" - im = im[:, :, c] - # Compute the histogram of the image channel. - histo = np.histogram(im, 256, (0, 255))[0] - # For computing the step, filter out the nonzeros. - nonzero_histo = histo[histo > 0] - step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 - if not step: - lut = np.array(range(256)) - else: - # Compute the cumulative sum, shifted by step // 2 - # and then normalized by step. - lut = (np.cumsum(histo) + (step // 2)) // step - # Shift lut, prepending with 0. - lut = np.concatenate([[0], lut[:-1]], 0) - # handle potential integer overflow - lut[lut > 255] = 255 - # If step is zero, return the original image. - # Otherwise, index from lut. - return np.where(np.equal(step, 0), im, lut[im]) - - # Scales each channel independently and then stacks - # the result. - s1 = _scale_channel(img, 0) - s2 = _scale_channel(img, 1) - s3 = _scale_channel(img, 2) - equalized_img = np.stack([s1, s2, s3], axis=-1) - return equalized_img.astype(img.dtype) - - -def adjust_brightness(img, factor=1.0): - """Adjust image brightness. - - This function controls the brightness of an image. An - enhancement factor of 0.0 gives a black image. - A factor of 1.0 gives the original image. This function - blends the source image and the degenerated black image: - - .. math:: - output = img * factor + degenerated * (1 - factor) - - Args: - img (ndarray): Image to be brightened. - factor (float): A value controls the enhancement. - Factor 1.0 returns the original image, lower - factors mean less color (brightness, contrast, - etc), and higher values more. Default 1. - - Returns: - ndarray: The brightened image. - """ - degenerated = np.zeros_like(img) - # Note manually convert the dtype to np.float32, to - # achieve as close results as PIL.ImageEnhance.Brightness. - # Set beta=1-factor, and gamma=0 - brightened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) - brightened_img = np.clip(brightened_img, 0, 255) - return brightened_img.astype(img.dtype) - - -def adjust_contrast(img, factor=1.0): - """Adjust image contrast. - - This function controls the contrast of an image. An - enhancement factor of 0.0 gives a solid grey - image. A factor of 1.0 gives the original image. It - blends the source image and the degenerated mean image: - - .. math:: - output = img * factor + degenerated * (1 - factor) - - Args: - img (ndarray): Image to be contrasted. BGR order. - factor (float): Same as :func:`mmcv.adjust_brightness`. - - Returns: - ndarray: The contrasted image. - """ - gray_img = bgr2gray(img) - hist = np.histogram(gray_img, 256, (0, 255))[0] - mean = round(np.sum(gray_img) / np.sum(hist)) - degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) - degenerated = gray2bgr(degenerated) - contrasted_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) - contrasted_img = np.clip(contrasted_img, 0, 255) - return contrasted_img.astype(img.dtype) - - -def auto_contrast(img, cutoff=0): - """Auto adjust image contrast. - - This function maximize (normalize) image contrast by first removing cutoff - percent of the lightest and darkest pixels from the histogram and remapping - the image so that the darkest pixel becomes black (0), and the lightest - becomes white (255). - - Args: - img (ndarray): Image to be contrasted. BGR order. - cutoff (int | float | tuple): The cutoff percent of the lightest and - darkest pixels to be removed. If given as tuple, it shall be - (low, high). Otherwise, the single value will be used for both. - Defaults to 0. - - Returns: - ndarray: The contrasted image. - """ - - def _auto_contrast_channel(im, c, cutoff): - im = im[:, :, c] - # Compute the histogram of the image channel. - histo = np.histogram(im, 256, (0, 255))[0] - # Remove cut-off percent pixels from histo - histo_sum = np.cumsum(histo) - cut_low = histo_sum[-1] * cutoff[0] // 100 - cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 - histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low - histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) - - # Compute mapping - low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] - # If all the values have been cut off, return the origin img - if low >= high: - return im - scale = 255.0 / (high - low) - offset = -low * scale - lut = np.array(range(256)) - lut = lut * scale + offset - lut = np.clip(lut, 0, 255) - return lut[im] - - if isinstance(cutoff, (int, float)): - cutoff = (cutoff, cutoff) - else: - assert isinstance(cutoff, tuple), ( - 'cutoff must be of type int, ' f'float or tuple, but got {type(cutoff)} instead.' - ) - # Auto adjusts contrast for each channel independently and then stacks - # the result. - s1 = _auto_contrast_channel(img, 0, cutoff) - s2 = _auto_contrast_channel(img, 1, cutoff) - s3 = _auto_contrast_channel(img, 2, cutoff) - contrasted_img = np.stack([s1, s2, s3], axis=-1) - return contrasted_img.astype(img.dtype) - - -def adjust_sharpness(img, factor=1.0, kernel=None): - """Adjust image sharpness. - - This function controls the sharpness of an image. An - enhancement factor of 0.0 gives a blurred image. A - factor of 1.0 gives the original image. And a factor - of 2.0 gives a sharpened image. It blends the source - image and the degenerated mean image: - - .. math:: - output = img * factor + degenerated * (1 - factor) - - Args: - img (ndarray): Image to be sharpened. BGR order. - factor (float): Same as :func:`mmcv.adjust_brightness`. - kernel (np.ndarray, optional): Filter kernel to be applied on the img - to obtain the degenerated img. Defaults to None. - - Note: - No value sanity check is enforced on the kernel set by users. So with - an inappropriate kernel, the ``adjust_sharpness`` may fail to perform - the function its name indicates but end up performing whatever - transform determined by the kernel. - - Returns: - ndarray: The sharpened image. - """ - - if kernel is None: - # adopted from PIL.ImageFilter.SMOOTH - kernel = np.array([[1.0, 1.0, 1.0], [1.0, 5.0, 1.0], [1.0, 1.0, 1.0]]) / 13 - assert isinstance(kernel, np.ndarray), f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' - assert kernel.ndim == 2, f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' - - degenerated = cv2.filter2D(img, -1, kernel) - sharpened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) - sharpened_img = np.clip(sharpened_img, 0, 255) - return sharpened_img.astype(img.dtype) - - -def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): - """AlexNet-style PCA jitter. - - This data augmentation is proposed in `ImageNet Classification with Deep - Convolutional Neural Networks - `_. - - Args: - img (ndarray): Image to be adjusted lighting. BGR order. - eigval (ndarray): the eigenvalue of the convariance matrix of pixel - values, respectively. - eigvec (ndarray): the eigenvector of the convariance matrix of pixel - values, respectively. - alphastd (float): The standard deviation for distribution of alpha. - Defaults to 0.1 - to_rgb (bool): Whether to convert img to rgb. - - Returns: - ndarray: The adjusted image. - """ - assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), ( - f'eigval and eigvec should both be of type np.ndarray, got ' f'{type(eigval)} and {type(eigvec)} instead.' - ) - - assert eigval.ndim == 1 and eigvec.ndim == 2 - assert eigvec.shape == (3, eigval.shape[0]) - n_eigval = eigval.shape[0] - assert isinstance(alphastd, float), 'alphastd should be of type float, ' f'got {type(alphastd)} instead.' - - img = img.copy().astype(np.float32) - if to_rgb: - cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace - - alpha = np.random.normal(0, alphastd, n_eigval) - alter = ( - eigvec - * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) - * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) - ) - alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) - img_adjusted = img + alter - return img_adjusted - - -def lut_transform(img, lut_table): - """Transform array by look-up table. - - The function lut_transform fills the output array with values from the - look-up table. Indices of the entries are taken from the input array. - - Args: - img (ndarray): Image to be transformed. - lut_table (ndarray): look-up table of 256 elements; in case of - multi-channel input array, the table should either have a single - channel (in this case the same table is used for all channels) or - the same number of channels as in the input array. - - Returns: - ndarray: The transformed image. - """ - assert isinstance(img, np.ndarray) - assert 0 <= np.min(img) and np.max(img) <= 255 - assert isinstance(lut_table, np.ndarray) - assert lut_table.shape == (256,) - - return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) - - -def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): - """Use CLAHE method to process the image. - - See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. - Graphics Gems, 1994:474-485.` for more information. - - Args: - img (ndarray): Image to be processed. - clip_limit (float): Threshold for contrast limiting. Default: 40.0. - tile_grid_size (tuple[int]): Size of grid for histogram equalization. - Input image will be divided into equally sized rectangular tiles. - It defines the number of tiles in row and column. Default: (8, 8). - - Returns: - ndarray: The processed image. - """ - assert isinstance(img, np.ndarray) - assert img.ndim == 2 - assert isinstance(clip_limit, (float, int)) - assert is_tuple_of(tile_grid_size, int) - assert len(tile_grid_size) == 2 - - clahe = cv2.createCLAHE(clip_limit, tile_grid_size) - return clahe.apply(np.array(img, dtype=np.uint8)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json deleted file mode 100644 index 25cf6f28caec..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "resnet50_caffe": "detectron/resnet50_caffe", - "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", - "resnet101_caffe": "detectron/resnet101_caffe", - "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" -} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json deleted file mode 100644 index bdb311d9fe6d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", - "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", - "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", - "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", - "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", - "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", - "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", - "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", - "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth", - "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth", - "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth", - "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth", - "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth", - "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth", - "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth", - "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth", - "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", - "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", - "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", - "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", - "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", - "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", - "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", - "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", - "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", - "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", - "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", - "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", - "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth" -} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json deleted file mode 100644 index 8311db4feef9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", - "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", - "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", - "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", - "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", - "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", - "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", - "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", - "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", - "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", - "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", - "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", - "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", - "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", - "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", - "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", - "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", - "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", - "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", - "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", - "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", - "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", - "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", - "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", - "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", - "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", - "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", - "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", - "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", - "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", - "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", - "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", - "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", - "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", - "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", - "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", - "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", - "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", - "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", - "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", - "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", - "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", - "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", - "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", - "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", - "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", - "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", - "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" -} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py deleted file mode 100644 index ecee97e0c0cb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .assign_score_withk import assign_score_withk -from .ball_query import ball_query -from .bbox import bbox_overlaps -from .border_align import BorderAlign, border_align -from .box_iou_rotated import box_iou_rotated -from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive -from .cc_attention import CrissCrossAttention -from .contour_expand import contour_expand -from .corner_pool import CornerPool -from .correlation import Correlation -from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d -from .deform_roi_pool import DeformRoIPool, DeformRoIPoolPack, ModulatedDeformRoIPoolPack, deform_roi_pool -from .deprecated_wrappers import Conv2d_deprecated as Conv2d -from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d -from .deprecated_wrappers import Linear_deprecated as Linear -from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d -from .focal_loss import SigmoidFocalLoss, SoftmaxFocalLoss, sigmoid_focal_loss, softmax_focal_loss -from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist -from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu -from .gather_points import gather_points -from .group_points import GroupAll, QueryAndGroup, grouping_operation -from .info import get_compiler_version, get_compiling_cuda_version, get_onnxruntime_op_path -from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev -from .knn import knn -from .masked_conv import MaskedConv2d, masked_conv2d -from .modulated_deform_conv import ModulatedDeformConv2d, ModulatedDeformConv2dPack, modulated_deform_conv2d -from .multi_scale_deform_attn import MultiScaleDeformableAttention -from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms -from .pixel_group import pixel_group -from .point_sample import SimpleRoIAlign, point_sample, rel_roi_point_to_rel_img_point -from .points_in_boxes import points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part -from .points_sampler import PointsSampler -from .psa_mask import PSAMask -from .roi_align import RoIAlign, roi_align -from .roi_align_rotated import RoIAlignRotated, roi_align_rotated -from .roi_pool import RoIPool, roi_pool -from .roiaware_pool3d import RoIAwarePool3d -from .roipoint_pool3d import RoIPointPool3d -from .saconv import SAConv2d -from .scatter_points import DynamicScatter, dynamic_scatter -from .sync_bn import SyncBatchNorm -from .three_interpolate import three_interpolate -from .three_nn import three_nn -from .tin_shift import TINShift, tin_shift -from .upfirdn2d import upfirdn2d -from .voxelize import Voxelization, voxelization - -__all__ = [ - 'bbox_overlaps', - 'CARAFE', - 'CARAFENaive', - 'CARAFEPack', - 'carafe', - 'carafe_naive', - 'CornerPool', - 'DeformConv2d', - 'DeformConv2dPack', - 'deform_conv2d', - 'DeformRoIPool', - 'DeformRoIPoolPack', - 'ModulatedDeformRoIPoolPack', - 'deform_roi_pool', - 'SigmoidFocalLoss', - 'SoftmaxFocalLoss', - 'sigmoid_focal_loss', - 'softmax_focal_loss', - 'get_compiler_version', - 'get_compiling_cuda_version', - 'get_onnxruntime_op_path', - 'MaskedConv2d', - 'masked_conv2d', - 'ModulatedDeformConv2d', - 'ModulatedDeformConv2dPack', - 'modulated_deform_conv2d', - 'batched_nms', - 'nms', - 'soft_nms', - 'nms_match', - 'RoIAlign', - 'roi_align', - 'RoIPool', - 'roi_pool', - 'SyncBatchNorm', - 'Conv2d', - 'ConvTranspose2d', - 'Linear', - 'MaxPool2d', - 'CrissCrossAttention', - 'PSAMask', - 'point_sample', - 'rel_roi_point_to_rel_img_point', - 'SimpleRoIAlign', - 'SAConv2d', - 'TINShift', - 'tin_shift', - 'assign_score_withk', - 'box_iou_rotated', - 'RoIPointPool3d', - 'nms_rotated', - 'knn', - 'ball_query', - 'upfirdn2d', - 'FusedBiasLeakyReLU', - 'fused_bias_leakyrelu', - 'RoIAlignRotated', - 'roi_align_rotated', - 'pixel_group', - 'QueryAndGroup', - 'GroupAll', - 'grouping_operation', - 'contour_expand', - 'three_nn', - 'three_interpolate', - 'MultiScaleDeformableAttention', - 'BorderAlign', - 'border_align', - 'gather_points', - 'furthest_point_sample', - 'furthest_point_sample_with_dist', - 'PointsSampler', - 'Correlation', - 'boxes_iou_bev', - 'nms_bev', - 'nms_normal_bev', - 'Voxelization', - 'voxelization', - 'dynamic_scatter', - 'DynamicScatter', - 'RoIAwarePool3d', - 'points_in_boxes_part', - 'points_in_boxes_cpu', - 'points_in_boxes_all', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py deleted file mode 100644 index 399600eb812b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py +++ /dev/null @@ -1,117 +0,0 @@ -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) - - -class AssignScoreWithK(Function): - r"""Perform weighted sum to generate output features according to scores. - Modified from `PAConv `_. - - This is a memory-efficient CUDA implementation of assign_scores operation, - which first transform all point features with weight bank, then assemble - neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. - - See the `paper `_ appendix Sec. D for - more detailed descriptions. - - Note: - This implementation assumes using ``neighbor`` kernel input, which is - (point_features - center_features, point_features). - See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ - pointnet2/paconv.py#L128 for more details. - """ - - @staticmethod - def forward(ctx, scores, point_features, center_features, knn_idx, aggregate='sum'): - """ - Args: - scores (torch.Tensor): (B, npoint, K, M), predicted scores to - aggregate weight matrices in the weight bank. - ``npoint`` is the number of sampled centers. - ``K`` is the number of queried neighbors. - ``M`` is the number of weight matrices in the weight bank. - point_features (torch.Tensor): (B, N, M, out_dim) - Pre-computed point features to be aggregated. - center_features (torch.Tensor): (B, N, M, out_dim) - Pre-computed center features to be aggregated. - knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. - We assume the first idx in each row is the idx of the center. - aggregate (str, optional): Aggregation method. - Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. - - Returns: - torch.Tensor: (B, out_dim, npoint, K), the aggregated features. - """ - agg = {'sum': 0, 'avg': 1, 'max': 2} - - B, N, M, out_dim = point_features.size() - _, npoint, K, _ = scores.size() - - output = point_features.new_zeros((B, out_dim, npoint, K)) - ext_module.assign_score_withk_forward( - point_features.contiguous(), - center_features.contiguous(), - scores.contiguous(), - knn_idx.contiguous(), - output, - B=B, - N0=N, - N1=npoint, - M=M, - K=K, - O=out_dim, - aggregate=agg[aggregate], - ) - - ctx.save_for_backward(output, point_features, center_features, scores, knn_idx) - ctx.agg = agg[aggregate] - - return output - - @staticmethod - def backward(ctx, grad_out): - """ - Args: - grad_out (torch.Tensor): (B, out_dim, npoint, K) - - Returns: - grad_scores (torch.Tensor): (B, npoint, K, M) - grad_point_features (torch.Tensor): (B, N, M, out_dim) - grad_center_features (torch.Tensor): (B, N, M, out_dim) - """ - _, point_features, center_features, scores, knn_idx = ctx.saved_tensors - - agg = ctx.agg - - B, N, M, out_dim = point_features.size() - _, npoint, K, _ = scores.size() - - grad_point_features = point_features.new_zeros(point_features.shape) - grad_center_features = center_features.new_zeros(center_features.shape) - grad_scores = scores.new_zeros(scores.shape) - - ext_module.assign_score_withk_backward( - grad_out.contiguous(), - point_features.contiguous(), - center_features.contiguous(), - scores.contiguous(), - knn_idx.contiguous(), - grad_point_features, - grad_center_features, - grad_scores, - B=B, - N0=N, - N1=npoint, - M=M, - K=K, - O=out_dim, - aggregate=agg, - ) - - return grad_scores, grad_point_features, grad_center_features, None, None - - -assign_score_withk = AssignScoreWithK.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py deleted file mode 100644 index 51c403292391..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) - - -class BallQuery(Function): - """Find nearby points in spherical space.""" - - @staticmethod - def forward( - ctx, min_radius: float, max_radius: float, sample_num: int, xyz: torch.Tensor, center_xyz: torch.Tensor - ) -> torch.Tensor: - """ - Args: - min_radius (float): minimum radius of the balls. - max_radius (float): maximum radius of the balls. - sample_num (int): maximum number of features in the balls. - xyz (Tensor): (B, N, 3) xyz coordinates of the features. - center_xyz (Tensor): (B, npoint, 3) centers of the ball query. - - Returns: - Tensor: (B, npoint, nsample) tensor with the indices of - the features that form the query balls. - """ - assert center_xyz.is_contiguous() - assert xyz.is_contiguous() - assert min_radius < max_radius - - B, N, _ = xyz.size() - npoint = center_xyz.size(1) - idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) - - ext_module.ball_query_forward( - center_xyz, xyz, idx, b=B, n=N, m=npoint, min_radius=min_radius, max_radius=max_radius, nsample=sample_num - ) - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(idx) - return idx - - @staticmethod - def backward(ctx, a=None): - return None, None, None, None - - -ball_query = BallQuery.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py deleted file mode 100644 index 44aa88881385..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) - - -def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): - """Calculate overlap between two set of bboxes. - - If ``aligned`` is ``False``, then calculate the ious between each bbox - of bboxes1 and bboxes2, otherwise the ious between each aligned pair of - bboxes1 and bboxes2. - - Args: - bboxes1 (Tensor): shape (m, 4) in format or empty. - bboxes2 (Tensor): shape (n, 4) in format or empty. - If aligned is ``True``, then m and n must be equal. - mode (str): "iou" (intersection over union) or iof (intersection over - foreground). - - Returns: - ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) - - Example: - >>> bboxes1 = torch.FloatTensor([ - >>> [0, 0, 10, 10], - >>> [10, 10, 20, 20], - >>> [32, 32, 38, 42], - >>> ]) - >>> bboxes2 = torch.FloatTensor([ - >>> [0, 0, 10, 20], - >>> [0, 10, 10, 19], - >>> [10, 10, 20, 20], - >>> ]) - >>> bbox_overlaps(bboxes1, bboxes2) - tensor([[0.5000, 0.0000, 0.0000], - [0.0000, 0.0000, 1.0000], - [0.0000, 0.0000, 0.0000]]) - - Example: - >>> empty = torch.FloatTensor([]) - >>> nonempty = torch.FloatTensor([ - >>> [0, 0, 10, 9], - >>> ]) - >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) - >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) - >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) - """ - - mode_dict = {'iou': 0, 'iof': 1} - assert mode in mode_dict.keys() - mode_flag = mode_dict[mode] - # Either the boxes are empty or the length of boxes' last dimension is 4 - assert bboxes1.size(-1) == 4 or bboxes1.size(0) == 0 - assert bboxes2.size(-1) == 4 or bboxes2.size(0) == 0 - assert offset == 1 or offset == 0 - - rows = bboxes1.size(0) - cols = bboxes2.size(0) - if aligned: - assert rows == cols - - if rows * cols == 0: - return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) - - if aligned: - ious = bboxes1.new_zeros(rows) - else: - ious = bboxes1.new_zeros((rows, cols)) - ext_module.bbox_overlaps(bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) - return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py deleted file mode 100644 index beea1a66e997..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -# modified from -# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['border_align_forward', 'border_align_backward']) - - -class BorderAlignFunction(Function): - @staticmethod - def symbolic(g, input, boxes, pool_size): - return g.op('mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) - - @staticmethod - def forward(ctx, input, boxes, pool_size): - ctx.pool_size = pool_size - ctx.input_shape = input.size() - - assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' - assert boxes.size(2) == 4, 'the last dimension of boxes must be (x1, y1, x2, y2)' - assert input.size(1) % 4 == 0, 'the channel for input feature must be divisible by factor 4' - - # [B, C//4, H*W, 4] - output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) - output = input.new_zeros(output_shape) - # `argmax_idx` only used for backward - argmax_idx = input.new_zeros(output_shape).to(torch.int) - - ext_module.border_align_forward(input, boxes, output, argmax_idx, pool_size=ctx.pool_size) - - ctx.save_for_backward(boxes, argmax_idx) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - boxes, argmax_idx = ctx.saved_tensors - grad_input = grad_output.new_zeros(ctx.input_shape) - # complex head architecture may cause grad_output uncontiguous - grad_output = grad_output.contiguous() - ext_module.border_align_backward(grad_output, boxes, argmax_idx, grad_input, pool_size=ctx.pool_size) - return grad_input, None, None - - -border_align = BorderAlignFunction.apply - - -class BorderAlign(nn.Module): - r"""Border align pooling layer. - - Applies border_align over the input feature based on predicted bboxes. - The details were described in the paper - `BorderDet: Border Feature for Dense Object Detection - `_. - - For each border line (e.g. top, left, bottom or right) of each box, - border_align does the following: - 1. uniformly samples `pool_size`+1 positions on this line, involving \ - the start and end points. - 2. the corresponding features on these points are computed by \ - bilinear interpolation. - 3. max pooling over all the `pool_size`+1 positions are used for \ - computing pooled feature. - - Args: - pool_size (int): number of positions sampled over the boxes' borders - (e.g. top, bottom, left, right). - - """ - - def __init__(self, pool_size): - super(BorderAlign, self).__init__() - self.pool_size = pool_size - - def forward(self, input, boxes): - """ - Args: - input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), - [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, - right features respectively. - boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). - - Returns: - Tensor: Pooled features with shape [N,C,H*W,4]. The order is - (top,left,bottom,right) for the last dimension. - """ - return border_align(input, boxes, self.pool_size) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(pool_size={self.pool_size})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py deleted file mode 100644 index dfadb39c715c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) - - -def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): - """Return intersection-over-union (Jaccard index) of boxes. - - Both sets of boxes are expected to be in - (x_center, y_center, width, height, angle) format. - - If ``aligned`` is ``False``, then calculate the ious between each bbox - of bboxes1 and bboxes2, otherwise the ious between each aligned pair of - bboxes1 and bboxes2. - - Arguments: - boxes1 (Tensor): rotated bboxes 1. \ - It has shape (N, 5), indicating (x, y, w, h, theta) for each row. - Note that theta is in radian. - boxes2 (Tensor): rotated bboxes 2. \ - It has shape (M, 5), indicating (x, y, w, h, theta) for each row. - Note that theta is in radian. - mode (str): "iou" (intersection over union) or iof (intersection over - foreground). - - Returns: - ious(Tensor): shape (N, M) if aligned == False else shape (N,) - """ - assert mode in ['iou', 'iof'] - mode_dict = {'iou': 0, 'iof': 1} - mode_flag = mode_dict[mode] - rows = bboxes1.size(0) - cols = bboxes2.size(0) - if aligned: - ious = bboxes1.new_zeros(rows) - else: - ious = bboxes1.new_zeros((rows * cols)) - bboxes1 = bboxes1.contiguous() - bboxes2 = bboxes2.contiguous() - ext_module.box_iou_rotated(bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) - if not aligned: - ious = ious.view(rows, cols) - return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py deleted file mode 100644 index bc0eb0d32f71..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Function -from torch.nn.modules.module import Module - -from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', 'carafe_backward'] -) - - -class CARAFENaiveFunction(Function): - @staticmethod - def symbolic(g, features, masks, kernel_size, group_size, scale_factor): - return g.op( - 'mmcv::MMCVCARAFENaive', - features, - masks, - kernel_size_i=kernel_size, - group_size_i=group_size, - scale_factor_f=scale_factor, - ) - - @staticmethod - def forward(ctx, features, masks, kernel_size, group_size, scale_factor): - assert scale_factor >= 1 - assert masks.size(1) == kernel_size * kernel_size * group_size - assert masks.size(-1) == features.size(-1) * scale_factor - assert masks.size(-2) == features.size(-2) * scale_factor - assert features.size(1) % group_size == 0 - assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 - ctx.kernel_size = kernel_size - ctx.group_size = group_size - ctx.scale_factor = scale_factor - ctx.feature_size = features.size() - ctx.mask_size = masks.size() - - n, c, h, w = features.size() - output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) - ext_module.carafe_naive_forward( - features, masks, output, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor - ) - - if features.requires_grad or masks.requires_grad: - ctx.save_for_backward(features, masks) - return output - - @staticmethod - def backward(ctx, grad_output): - assert grad_output.is_cuda - - features, masks = ctx.saved_tensors - kernel_size = ctx.kernel_size - group_size = ctx.group_size - scale_factor = ctx.scale_factor - - grad_input = torch.zeros_like(features) - grad_masks = torch.zeros_like(masks) - ext_module.carafe_naive_backward( - grad_output.contiguous(), - features, - masks, - grad_input, - grad_masks, - kernel_size=kernel_size, - group_size=group_size, - scale_factor=scale_factor, - ) - - return grad_input, grad_masks, None, None, None - - -carafe_naive = CARAFENaiveFunction.apply - - -class CARAFENaive(Module): - def __init__(self, kernel_size, group_size, scale_factor): - super(CARAFENaive, self).__init__() - - assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) - self.kernel_size = kernel_size - self.group_size = group_size - self.scale_factor = scale_factor - - def forward(self, features, masks): - return carafe_naive(features, masks, self.kernel_size, self.group_size, self.scale_factor) - - -class CARAFEFunction(Function): - @staticmethod - def symbolic(g, features, masks, kernel_size, group_size, scale_factor): - return g.op( - 'mmcv::MMCVCARAFE', - features, - masks, - kernel_size_i=kernel_size, - group_size_i=group_size, - scale_factor_f=scale_factor, - ) - - @staticmethod - def forward(ctx, features, masks, kernel_size, group_size, scale_factor): - assert scale_factor >= 1 - assert masks.size(1) == kernel_size * kernel_size * group_size - assert masks.size(-1) == features.size(-1) * scale_factor - assert masks.size(-2) == features.size(-2) * scale_factor - assert features.size(1) % group_size == 0 - assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 - ctx.kernel_size = kernel_size - ctx.group_size = group_size - ctx.scale_factor = scale_factor - ctx.feature_size = features.size() - ctx.mask_size = masks.size() - - n, c, h, w = features.size() - output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) - routput = features.new_zeros(output.size(), requires_grad=False) - rfeatures = features.new_zeros(features.size(), requires_grad=False) - rmasks = masks.new_zeros(masks.size(), requires_grad=False) - ext_module.carafe_forward( - features, - masks, - rfeatures, - routput, - rmasks, - output, - kernel_size=kernel_size, - group_size=group_size, - scale_factor=scale_factor, - ) - - if features.requires_grad or masks.requires_grad: - ctx.save_for_backward(features, masks, rfeatures) - return output - - @staticmethod - def backward(ctx, grad_output): - assert grad_output.is_cuda - - features, masks, rfeatures = ctx.saved_tensors - kernel_size = ctx.kernel_size - group_size = ctx.group_size - scale_factor = ctx.scale_factor - - rgrad_output = torch.zeros_like(grad_output, requires_grad=False) - rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) - rgrad_input = torch.zeros_like(features, requires_grad=False) - rgrad_masks = torch.zeros_like(masks, requires_grad=False) - grad_input = torch.zeros_like(features, requires_grad=False) - grad_masks = torch.zeros_like(masks, requires_grad=False) - ext_module.carafe_backward( - grad_output.contiguous(), - rfeatures, - masks, - rgrad_output, - rgrad_input_hs, - rgrad_input, - rgrad_masks, - grad_input, - grad_masks, - kernel_size=kernel_size, - group_size=group_size, - scale_factor=scale_factor, - ) - return grad_input, grad_masks, None, None, None - - -carafe = CARAFEFunction.apply - - -class CARAFE(Module): - """ CARAFE: Content-Aware ReAssembly of FEatures - - Please refer to https://arxiv.org/abs/1905.02188 for more details. - - Args: - kernel_size (int): reassemble kernel size - group_size (int): reassemble group size - scale_factor (int): upsample ratio - - Returns: - upsampled feature map - """ - - def __init__(self, kernel_size, group_size, scale_factor): - super(CARAFE, self).__init__() - - assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) - self.kernel_size = kernel_size - self.group_size = group_size - self.scale_factor = scale_factor - - def forward(self, features, masks): - return carafe(features, masks, self.kernel_size, self.group_size, self.scale_factor) - - -@UPSAMPLE_LAYERS.register_module(name='carafe') -class CARAFEPack(nn.Module): - """A unified package of CARAFE upsampler that contains: 1) channel - compressor 2) content encoder 3) CARAFE op. - - Official implementation of ICCV 2019 paper - CARAFE: Content-Aware ReAssembly of FEatures - Please refer to https://arxiv.org/abs/1905.02188 for more details. - - Args: - channels (int): input feature channels - scale_factor (int): upsample ratio - up_kernel (int): kernel size of CARAFE op - up_group (int): group size of CARAFE op - encoder_kernel (int): kernel size of content encoder - encoder_dilation (int): dilation of content encoder - compressed_channels (int): output channels of channels compressor - - Returns: - upsampled feature map - """ - - def __init__( - self, - channels, - scale_factor, - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64, - ): - super(CARAFEPack, self).__init__() - self.channels = channels - self.scale_factor = scale_factor - self.up_kernel = up_kernel - self.up_group = up_group - self.encoder_kernel = encoder_kernel - self.encoder_dilation = encoder_dilation - self.compressed_channels = compressed_channels - self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, 1) - self.content_encoder = nn.Conv2d( - self.compressed_channels, - self.up_kernel * self.up_kernel * self.up_group * self.scale_factor * self.scale_factor, - self.encoder_kernel, - padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), - dilation=self.encoder_dilation, - groups=1, - ) - self.init_weights() - - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - xavier_init(m, distribution='uniform') - normal_init(self.content_encoder, std=0.001) - - def kernel_normalizer(self, mask): - mask = F.pixel_shuffle(mask, self.scale_factor) - n, mask_c, h, w = mask.size() - # use float division explicitly, - # to void inconsistency while exporting to onnx - mask_channel = int(mask_c / float(self.up_kernel ** 2)) - mask = mask.view(n, mask_channel, -1, h, w) - - mask = F.softmax(mask, dim=2, dtype=mask.dtype) - mask = mask.view(n, mask_c, h, w).contiguous() - - return mask - - def feature_reassemble(self, x, mask): - x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) - return x - - def forward(self, x): - compressed_x = self.channel_compressor(x) - mask = self.content_encoder(compressed_x) - mask = self.kernel_normalizer(mask) - - x = self.feature_reassemble(x, mask) - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py deleted file mode 100644 index 48fe50696acb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import PLUGIN_LAYERS, Scale - - -def NEG_INF_DIAG(n, device): - """Returns a diagonal matrix of size [n, n]. - - The diagonal are all "-inf". This is for avoiding calculating the - overlapped element in the Criss-Cross twice. - """ - return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) - - -@PLUGIN_LAYERS.register_module() -class CrissCrossAttention(nn.Module): - """Criss-Cross Attention Module. - - .. note:: - Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch - to a pure PyTorch and equivalent implementation. For more - details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. - - Speed comparison for one forward pass - - - Input size: [2,512,97,97] - - Device: 1 NVIDIA GeForce RTX 2080 Ti - - +-----------------------+---------------+------------+---------------+ - | |PyTorch version|CUDA version|Relative speed | - +=======================+===============+============+===============+ - |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | - +-----------------------+---------------+------------+---------------+ - |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | - +-----------------------+---------------+------------+---------------+ - - Args: - in_channels (int): Channels of the input feature map. - """ - - def __init__(self, in_channels): - super().__init__() - self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) - self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) - self.value_conv = nn.Conv2d(in_channels, in_channels, 1) - self.gamma = Scale(0.0) - self.in_channels = in_channels - - def forward(self, x): - """forward function of Criss-Cross Attention. - - Args: - x (Tensor): Input feature. \ - shape (batch_size, in_channels, height, width) - Returns: - Tensor: Output of the layer, with shape of \ - (batch_size, in_channels, height, width) - """ - B, C, H, W = x.size() - query = self.query_conv(x) - key = self.key_conv(x) - value = self.value_conv(x) - energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(H, query.device) - energy_H = energy_H.transpose(1, 2) - energy_W = torch.einsum('bchw,bchj->bhwj', query, key) - attn = F.softmax(torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] - out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) - out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) - - out = self.gamma(out) + x - out = out.contiguous() - - return out - - def __repr__(self): - s = self.__class__.__name__ - s += f'(in_channels={self.in_channels})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py deleted file mode 100644 index 14281d4c5d63..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numpy as np -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['contour_expand']) - - -def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num): - """Expand kernel contours so that foreground pixels are assigned into - instances. - - Arguments: - kernel_mask (np.array or Tensor): The instance kernel mask with - size hxw. - internal_kernel_label (np.array or Tensor): The instance internal - kernel label with size hxw. - min_kernel_area (int): The minimum kernel area. - kernel_num (int): The instance kernel number. - - Returns: - label (list): The instance index map with size hxw. - """ - assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) - assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) - assert isinstance(min_kernel_area, int) - assert isinstance(kernel_num, int) - - if isinstance(kernel_mask, np.ndarray): - kernel_mask = torch.from_numpy(kernel_mask) - if isinstance(internal_kernel_label, np.ndarray): - internal_kernel_label = torch.from_numpy(internal_kernel_label) - - if torch.__version__ == 'parrots': - if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: - label = [] - else: - label = ext_module.contour_expand( - kernel_mask, internal_kernel_label, min_kernel_area=min_kernel_area, kernel_num=kernel_num - ) - label = label.tolist() - else: - label = ext_module.contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num) - return label diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py deleted file mode 100644 index ede2266be45c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', - [ - 'top_pool_forward', - 'top_pool_backward', - 'bottom_pool_forward', - 'bottom_pool_backward', - 'left_pool_forward', - 'left_pool_backward', - 'right_pool_forward', - 'right_pool_backward', - ], -) - -_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} - - -class TopPoolFunction(Function): - @staticmethod - def symbolic(g, input): - output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) - return output - - @staticmethod - def forward(ctx, input): - output = ext_module.top_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - (input,) = ctx.saved_tensors - output = ext_module.top_pool_backward(input, grad_output) - return output - - -class BottomPoolFunction(Function): - @staticmethod - def symbolic(g, input): - output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) - return output - - @staticmethod - def forward(ctx, input): - output = ext_module.bottom_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - (input,) = ctx.saved_tensors - output = ext_module.bottom_pool_backward(input, grad_output) - return output - - -class LeftPoolFunction(Function): - @staticmethod - def symbolic(g, input): - output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) - return output - - @staticmethod - def forward(ctx, input): - output = ext_module.left_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - (input,) = ctx.saved_tensors - output = ext_module.left_pool_backward(input, grad_output) - return output - - -class RightPoolFunction(Function): - @staticmethod - def symbolic(g, input): - output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) - return output - - @staticmethod - def forward(ctx, input): - output = ext_module.right_pool_forward(input) - ctx.save_for_backward(input) - return output - - @staticmethod - def backward(ctx, grad_output): - (input,) = ctx.saved_tensors - output = ext_module.right_pool_backward(input, grad_output) - return output - - -class CornerPool(nn.Module): - """Corner Pooling. - - Corner Pooling is a new type of pooling layer that helps a - convolutional network better localize corners of bounding boxes. - - Please refer to https://arxiv.org/abs/1808.01244 for more details. - Code is modified from https://github.com/princeton-vl/CornerNet-Lite. - - Args: - mode(str): Pooling orientation for the pooling layer - - - 'bottom': Bottom Pooling - - 'left': Left Pooling - - 'right': Right Pooling - - 'top': Top Pooling - - Returns: - Feature map after pooling. - """ - - pool_functions = { - 'bottom': BottomPoolFunction, - 'left': LeftPoolFunction, - 'right': RightPoolFunction, - 'top': TopPoolFunction, - } - - cummax_dim_flip = { - 'bottom': (2, False), - 'left': (3, True), - 'right': (3, False), - 'top': (2, True), - } - - def __init__(self, mode): - super(CornerPool, self).__init__() - assert mode in self.pool_functions - self.mode = mode - self.corner_pool = self.pool_functions[mode] - - def forward(self, x): - if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': - if torch.onnx.is_in_onnx_export(): - assert torch.__version__ >= '1.7.0', ( - 'When `cummax` serves as an intermediate component whose ' - 'outputs is used as inputs for another modules, it\'s ' - 'expected that pytorch version must be >= 1.7.0, ' - 'otherwise Error appears like: `RuntimeError: tuple ' - 'appears in op that does not forward tuples, unsupported ' - 'kind: prim::PythonOp`.' - ) - - dim, flip = self.cummax_dim_flip[self.mode] - if flip: - x = x.flip(dim) - pool_tensor, _ = torch.cummax(x, dim=dim) - if flip: - pool_tensor = pool_tensor.flip(dim) - return pool_tensor - else: - return self.corner_pool.apply(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py deleted file mode 100644 index a5f89fa68576..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import Tensor, nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['correlation_forward', 'correlation_backward']) - - -class CorrelationFunction(Function): - @staticmethod - def forward( - ctx, input1, input2, kernel_size=1, max_displacement=1, stride=1, padding=1, dilation=1, dilation_patch=1 - ): - - ctx.save_for_backward(input1, input2) - - kH, kW = ctx.kernel_size = _pair(kernel_size) - patch_size = max_displacement * 2 + 1 - ctx.patch_size = patch_size - dH, dW = ctx.stride = _pair(stride) - padH, padW = ctx.padding = _pair(padding) - dilationH, dilationW = ctx.dilation = _pair(dilation) - dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(dilation_patch) - - output_size = CorrelationFunction._output_size(ctx, input1) - - output = input1.new_zeros(output_size) - - ext_module.correlation_forward( - input1, - input2, - output, - kH=kH, - kW=kW, - patchH=patch_size, - patchW=patch_size, - padH=padH, - padW=padW, - dilationH=dilationH, - dilationW=dilationW, - dilation_patchH=dilation_patchH, - dilation_patchW=dilation_patchW, - dH=dH, - dW=dW, - ) - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input1, input2 = ctx.saved_tensors - - kH, kW = ctx.kernel_size - patch_size = ctx.patch_size - padH, padW = ctx.padding - dilationH, dilationW = ctx.dilation - dilation_patchH, dilation_patchW = ctx.dilation_patch - dH, dW = ctx.stride - grad_input1 = torch.zeros_like(input1) - grad_input2 = torch.zeros_like(input2) - - ext_module.correlation_backward( - grad_output, - input1, - input2, - grad_input1, - grad_input2, - kH=kH, - kW=kW, - patchH=patch_size, - patchW=patch_size, - padH=padH, - padW=padW, - dilationH=dilationH, - dilationW=dilationW, - dilation_patchH=dilation_patchH, - dilation_patchW=dilation_patchW, - dH=dH, - dW=dW, - ) - return grad_input1, grad_input2, None, None, None, None, None, None - - @staticmethod - def _output_size(ctx, input1): - iH, iW = input1.size(2), input1.size(3) - batch_size = input1.size(0) - kH, kW = ctx.kernel_size - patch_size = ctx.patch_size - dH, dW = ctx.stride - padH, padW = ctx.padding - dilationH, dilationW = ctx.dilation - dilatedKH = (kH - 1) * dilationH + 1 - dilatedKW = (kW - 1) * dilationW + 1 - - oH = int((iH + 2 * padH - dilatedKH) / dH + 1) - oW = int((iW + 2 * padW - dilatedKW) / dW + 1) - - output_size = (batch_size, patch_size, patch_size, oH, oW) - return output_size - - -class Correlation(nn.Module): - r"""Correlation operator - - This correlation operator works for optical flow correlation computation. - - There are two batched tensors with shape :math:`(N, C, H, W)`, - and the correlation output's shape is :math:`(N, max\_displacement \times - 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` - - where - - .. math:: - H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - - dilation \times (kernel\_size - 1) - 1} - {stride} + 1\right\rfloor - - .. math:: - W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation - \times (kernel\_size - 1) - 1} - {stride} + 1\right\rfloor - - the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding - window convolution between input1 and shifted input2, - - .. math:: - Corr(N_i, dx, dy) = - \sum_{c=0}^{C-1} - input1(N_i, c) \star - \mathcal{S}(input2(N_i, c), dy, dx) - - where :math:`\star` is the valid 2d sliding window convolution operator, - and :math:`\mathcal{S}` means shifting the input features (auto-complete - zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in - [-max\_displacement \times dilation\_patch, max\_displacement \times - dilation\_patch]`. - - Args: - kernel_size (int): The size of sliding window i.e. local neighborhood - representing the center points and involved in correlation - computation. Defaults to 1. - max_displacement (int): The radius for computing correlation volume, - but the actual working space can be dilated by dilation_patch. - Defaults to 1. - stride (int): The stride of the sliding blocks in the input spatial - dimensions. Defaults to 1. - padding (int): Zero padding added to all four sides of the input1. - Defaults to 0. - dilation (int): The spacing of local neighborhood that will involved - in correlation. Defaults to 1. - dilation_patch (int): The spacing between position need to compute - correlation. Defaults to 1. - """ - - def __init__( - self, - kernel_size: int = 1, - max_displacement: int = 1, - stride: int = 1, - padding: int = 0, - dilation: int = 1, - dilation_patch: int = 1, - ) -> None: - super().__init__() - self.kernel_size = kernel_size - self.max_displacement = max_displacement - self.stride = stride - self.padding = padding - self.dilation = dilation - self.dilation_patch = dilation_patch - - def forward(self, input1: Tensor, input2: Tensor) -> Tensor: - return CorrelationFunction.apply( - input1, - input2, - self.kernel_size, - self.max_displacement, - self.stride, - self.padding, - self.dilation, - self.dilation_patch, - ) - - def __repr__(self) -> str: - s = self.__class__.__name__ - s += f'(kernel_size={self.kernel_size}, ' - s += f'max_displacement={self.max_displacement}, ' - s += f'stride={self.stride}, ' - s += f'padding={self.padding}, ' - s += f'dilation={self.dilation}, ' - s += f'dilation_patch={self.dilation_patch})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py deleted file mode 100644 index 6c6d14243d22..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch import Tensor -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair, _single - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning -from ..cnn import CONV_LAYERS -from ..utils import ext_loader, print_log - -ext_module = ext_loader.load_ext( - '_ext', ['deform_conv_forward', 'deform_conv_backward_input', 'deform_conv_backward_parameters'] -) - - -class DeformConv2dFunction(Function): - @staticmethod - def symbolic( - g, input, offset, weight, stride, padding, dilation, groups, deform_groups, bias=False, im2col_step=32 - ): - return g.op( - 'mmcv::MMCVDeformConv2d', - input, - offset, - weight, - stride_i=stride, - padding_i=padding, - dilation_i=dilation, - groups_i=groups, - deform_groups_i=deform_groups, - bias_i=bias, - im2col_step_i=im2col_step, - ) - - @staticmethod - def forward( - ctx, - input, - offset, - weight, - stride=1, - padding=0, - dilation=1, - groups=1, - deform_groups=1, - bias=False, - im2col_step=32, - ): - if input is not None and input.dim() != 4: - raise ValueError( - f'Expected 4D tensor as input, got {input.dim()}D tensor \ - instead.' - ) - assert bias is False, 'Only support bias is False.' - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.groups = groups - ctx.deform_groups = deform_groups - ctx.im2col_step = im2col_step - - # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; - # amp won't cast the type of model (float32), but "offset" is cast - # to float16 by nn.Conv2d automatically, leading to the type - # mismatch with input (when it is float32) or weight. - # The flag for whether to use fp16 or amp is the type of "offset", - # we cast weight and input to temporarily support fp16 and amp - # whatever the pytorch version is. - input = input.type_as(offset) - weight = weight.type_as(input) - ctx.save_for_backward(input, offset, weight) - - output = input.new_empty(DeformConv2dFunction._output_size(ctx, input, weight)) - - ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones - - cur_im2col_step = min(ctx.im2col_step, input.size(0)) - assert (input.size(0) % cur_im2col_step) == 0, 'im2col step must divide batchsize' - ext_module.deform_conv_forward( - input, - weight, - offset, - output, - ctx.bufs_[0], - ctx.bufs_[1], - kW=weight.size(3), - kH=weight.size(2), - dW=ctx.stride[1], - dH=ctx.stride[0], - padW=ctx.padding[1], - padH=ctx.padding[0], - dilationW=ctx.dilation[1], - dilationH=ctx.dilation[0], - group=ctx.groups, - deformable_group=ctx.deform_groups, - im2col_step=cur_im2col_step, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, weight = ctx.saved_tensors - - grad_input = grad_offset = grad_weight = None - - cur_im2col_step = min(ctx.im2col_step, input.size(0)) - assert (input.size(0) % cur_im2col_step) == 0, 'batch size must be divisible by im2col_step' - - grad_output = grad_output.contiguous() - if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - ext_module.deform_conv_backward_input( - input, - offset, - grad_output, - grad_input, - grad_offset, - weight, - ctx.bufs_[0], - kW=weight.size(3), - kH=weight.size(2), - dW=ctx.stride[1], - dH=ctx.stride[0], - padW=ctx.padding[1], - padH=ctx.padding[0], - dilationW=ctx.dilation[1], - dilationH=ctx.dilation[0], - group=ctx.groups, - deformable_group=ctx.deform_groups, - im2col_step=cur_im2col_step, - ) - - if ctx.needs_input_grad[2]: - grad_weight = torch.zeros_like(weight) - ext_module.deform_conv_backward_parameters( - input, - offset, - grad_output, - grad_weight, - ctx.bufs_[0], - ctx.bufs_[1], - kW=weight.size(3), - kH=weight.size(2), - dW=ctx.stride[1], - dH=ctx.stride[0], - padW=ctx.padding[1], - padH=ctx.padding[0], - dilationW=ctx.dilation[1], - dilationH=ctx.dilation[0], - group=ctx.groups, - deformable_group=ctx.deform_groups, - scale=1, - im2col_step=cur_im2col_step, - ) - - return grad_input, grad_offset, grad_weight, None, None, None, None, None, None, None - - @staticmethod - def _output_size(ctx, input, weight): - channels = weight.size(0) - output_size = (input.size(0), channels) - for d in range(input.dim() - 2): - in_size = input.size(d + 2) - pad = ctx.padding[d] - kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 - stride_ = ctx.stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) - if not all(map(lambda s: s > 0, output_size)): - raise ValueError( - 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' - ) - return output_size - - -deform_conv2d = DeformConv2dFunction.apply - - -class DeformConv2d(nn.Module): - r"""Deformable 2D convolution. - - Applies a deformable 2D convolution over an input signal composed of - several input planes. DeformConv2d was described in the paper - `Deformable Convolutional Networks - `_ - - Note: - The argument ``im2col_step`` was added in version 1.3.17, which means - number of samples processed by the ``im2col_cuda_kernel`` per call. - It enables users to define ``batch_size`` and ``im2col_step`` more - flexibly and solved `issue mmcv#1440 - `_. - - Args: - in_channels (int): Number of channels in the input image. - out_channels (int): Number of channels produced by the convolution. - kernel_size(int, tuple): Size of the convolving kernel. - stride(int, tuple): Stride of the convolution. Default: 1. - padding (int or tuple): Zero-padding added to both sides of the input. - Default: 0. - dilation (int or tuple): Spacing between kernel elements. Default: 1. - groups (int): Number of blocked connections from input. - channels to output channels. Default: 1. - deform_groups (int): Number of deformable group partitions. - bias (bool): If True, adds a learnable bias to the output. - Default: False. - im2col_step (int): Number of samples processed by im2col_cuda_kernel - per call. It will work when ``batch_size`` > ``im2col_step``, but - ``batch_size`` must be divisible by ``im2col_step``. Default: 32. - `New in version 1.3.17.` - """ - - @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='DeformConv2d') - def __init__( - self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, ...]], - stride: Union[int, Tuple[int, ...]] = 1, - padding: Union[int, Tuple[int, ...]] = 0, - dilation: Union[int, Tuple[int, ...]] = 1, - groups: int = 1, - deform_groups: int = 1, - bias: bool = False, - im2col_step: int = 32, - ) -> None: - super(DeformConv2d, self).__init__() - - assert not bias, f'bias={bias} is not supported in DeformConv2d.' - assert in_channels % groups == 0, f'in_channels {in_channels} cannot be divisible by groups {groups}' - assert ( - out_channels % groups == 0 - ), f'out_channels {out_channels} cannot be divisible by groups \ - {groups}' - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = _pair(stride) - self.padding = _pair(padding) - self.dilation = _pair(dilation) - self.groups = groups - self.deform_groups = deform_groups - self.im2col_step = im2col_step - # enable compatibility with nn.Conv2d - self.transposed = False - self.output_padding = _single(0) - - # only weight, no bias - self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) - - self.reset_parameters() - - def reset_parameters(self): - # switch the initialization of `self.weight` to the standard kaiming - # method described in `Delving deep into rectifiers: Surpassing - # human-level performance on ImageNet classification` - He, K. et al. - # (2015), using a uniform distribution - nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') - - def forward(self, x: Tensor, offset: Tensor) -> Tensor: - """Deformable Convolutional forward function. - - Args: - x (Tensor): Input feature, shape (B, C_in, H_in, W_in) - offset (Tensor): Offset for deformable convolution, shape - (B, deform_groups*kernel_size[0]*kernel_size[1]*2, - H_out, W_out), H_out, W_out are equal to the output's. - - An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. - The spatial arrangement is like: - - .. code:: text - - (x0, y0) (x1, y1) (x2, y2) - (x3, y3) (x4, y4) (x5, y5) - (x6, y6) (x7, y7) (x8, y8) - - Returns: - Tensor: Output of the layer. - """ - # To fix an assert error in deform_conv_cuda.cpp:128 - # input image is smaller than kernel - input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < self.kernel_size[1]) - if input_pad: - pad_h = max(self.kernel_size[0] - x.size(2), 0) - pad_w = max(self.kernel_size[1] - x.size(3), 0) - x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() - offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) - offset = offset.contiguous() - out = deform_conv2d( - x, - offset, - self.weight, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deform_groups, - False, - self.im2col_step, - ) - if input_pad: - out = out[:, :, : out.size(2) - pad_h, : out.size(3) - pad_w].contiguous() - return out - - def __repr__(self): - s = self.__class__.__name__ - s += f'(in_channels={self.in_channels},\n' - s += f'out_channels={self.out_channels},\n' - s += f'kernel_size={self.kernel_size},\n' - s += f'stride={self.stride},\n' - s += f'padding={self.padding},\n' - s += f'dilation={self.dilation},\n' - s += f'groups={self.groups},\n' - s += f'deform_groups={self.deform_groups},\n' - # bias is not supported in DeformConv2d. - s += 'bias=False)' - return s - - -@CONV_LAYERS.register_module('DCN') -class DeformConv2dPack(DeformConv2d): - """A Deformable Conv Encapsulation that acts as normal Conv layers. - - The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. - The spatial arrangement is like: - - .. code:: text - - (x0, y0) (x1, y1) (x2, y2) - (x3, y3) (x4, y4) (x5, y5) - (x6, y6) (x7, y7) (x8, y8) - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int or tuple[int]): Same as nn.Conv2d. - stride (int or tuple[int]): Same as nn.Conv2d. - padding (int or tuple[int]): Same as nn.Conv2d. - dilation (int or tuple[int]): Same as nn.Conv2d. - groups (int): Same as nn.Conv2d. - bias (bool or str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if norm_cfg is None, otherwise - False. - """ - - _version = 2 - - def __init__(self, *args, **kwargs): - super(DeformConv2dPack, self).__init__(*args, **kwargs) - self.conv_offset = nn.Conv2d( - self.in_channels, - self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - dilation=_pair(self.dilation), - bias=True, - ) - self.init_offset() - - def init_offset(self): - self.conv_offset.weight.data.zero_() - self.conv_offset.bias.data.zero_() - - def forward(self, x): - offset = self.conv_offset(x) - return deform_conv2d( - x, - offset, - self.weight, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deform_groups, - False, - self.im2col_step, - ) - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - version = local_metadata.get('version', None) - - if version is None or version < 2: - # the key is different in early versions - # In version < 2, DeformConvPack loads previous benchmark models. - if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') - if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: - state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') - - if version is not None and version > 1: - print_log(f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') - - super()._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py deleted file mode 100644 index 1528a0748922..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) - - -class DeformRoIPoolFunction(Function): - @staticmethod - def symbolic(g, input, rois, offset, output_size, spatial_scale, sampling_ratio, gamma): - return g.op( - 'mmcv::MMCVDeformRoIPool', - input, - rois, - offset, - pooled_height_i=output_size[0], - pooled_width_i=output_size[1], - spatial_scale_f=spatial_scale, - sampling_ratio_f=sampling_ratio, - gamma_f=gamma, - ) - - @staticmethod - def forward(ctx, input, rois, offset, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): - if offset is None: - offset = input.new_zeros(0) - ctx.output_size = _pair(output_size) - ctx.spatial_scale = float(spatial_scale) - ctx.sampling_ratio = int(sampling_ratio) - ctx.gamma = float(gamma) - - assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) - output = input.new_zeros(output_shape) - - ext_module.deform_roi_pool_forward( - input, - rois, - offset, - output, - pooled_height=ctx.output_size[0], - pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - gamma=ctx.gamma, - ) - - ctx.save_for_backward(input, rois, offset) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, rois, offset = ctx.saved_tensors - grad_input = grad_output.new_zeros(input.shape) - grad_offset = grad_output.new_zeros(offset.shape) - - ext_module.deform_roi_pool_backward( - grad_output, - input, - rois, - offset, - grad_input, - grad_offset, - pooled_height=ctx.output_size[0], - pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - gamma=ctx.gamma, - ) - if grad_offset.numel() == 0: - grad_offset = None - return grad_input, None, grad_offset, None, None, None, None - - -deform_roi_pool = DeformRoIPoolFunction.apply - - -class DeformRoIPool(nn.Module): - def __init__(self, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): - super(DeformRoIPool, self).__init__() - self.output_size = _pair(output_size) - self.spatial_scale = float(spatial_scale) - self.sampling_ratio = int(sampling_ratio) - self.gamma = float(gamma) - - def forward(self, input, rois, offset=None): - return deform_roi_pool( - input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma - ) - - -class DeformRoIPoolPack(DeformRoIPool): - def __init__( - self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 - ): - super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) - - self.output_channels = output_channels - self.deform_fc_channels = deform_fc_channels - - self.offset_fc = nn.Sequential( - nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), - nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, self.deform_fc_channels), - nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), - ) - self.offset_fc[-1].weight.data.zero_() - self.offset_fc[-1].bias.data.zero_() - - def forward(self, input, rois): - assert input.size(1) == self.output_channels - x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) - rois_num = rois.size(0) - offset = self.offset_fc(x.view(rois_num, -1)) - offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) - return deform_roi_pool( - input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma - ) - - -class ModulatedDeformRoIPoolPack(DeformRoIPool): - def __init__( - self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 - ): - super(ModulatedDeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) - - self.output_channels = output_channels - self.deform_fc_channels = deform_fc_channels - - self.offset_fc = nn.Sequential( - nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), - nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, self.deform_fc_channels), - nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), - ) - self.offset_fc[-1].weight.data.zero_() - self.offset_fc[-1].bias.data.zero_() - - self.mask_fc = nn.Sequential( - nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), - nn.ReLU(inplace=True), - nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 1), - nn.Sigmoid(), - ) - self.mask_fc[2].weight.data.zero_() - self.mask_fc[2].bias.data.zero_() - - def forward(self, input, rois): - assert input.size(1) == self.output_channels - x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) - rois_num = rois.size(0) - offset = self.offset_fc(x.view(rois_num, -1)) - offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) - mask = self.mask_fc(x.view(rois_num, -1)) - mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) - d = deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) - return d * mask diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py deleted file mode 100644 index 47d87b75d87f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -# This file is for backward compatibility. -# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. -import warnings - -from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d - - -class Conv2d_deprecated(Conv2d): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead' - ) - - -class ConvTranspose2d_deprecated(ConvTranspose2d): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' - 'deprecated in the future. Please import them from "mmcv.cnn" ' - 'instead' - ) - - -class MaxPool2d_deprecated(MaxPool2d): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead' - ) - - -class Linear_deprecated(Linear): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' - ' the future. Please import them from "mmcv.cnn" instead' - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py deleted file mode 100644 index b218ed24ebc1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', - [ - 'sigmoid_focal_loss_forward', - 'sigmoid_focal_loss_backward', - 'softmax_focal_loss_forward', - 'softmax_focal_loss_backward', - ], -) - - -class SigmoidFocalLossFunction(Function): - @staticmethod - def symbolic(g, input, target, gamma, alpha, weight, reduction): - return g.op( - 'mmcv::MMCVSigmoidFocalLoss', - input, - target, - gamma_f=gamma, - alpha_f=alpha, - weight_f=weight, - reduction_s=reduction, - ) - - @staticmethod - def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): - - assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) - assert input.dim() == 2 - assert target.dim() == 1 - assert input.size(0) == target.size(0) - if weight is None: - weight = input.new_empty(0) - else: - assert weight.dim() == 1 - assert input.size(1) == weight.size(0) - ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} - assert reduction in ctx.reduction_dict.keys() - - ctx.gamma = float(gamma) - ctx.alpha = float(alpha) - ctx.reduction = ctx.reduction_dict[reduction] - - output = input.new_zeros(input.size()) - - ext_module.sigmoid_focal_loss_forward(input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) - if ctx.reduction == ctx.reduction_dict['mean']: - output = output.sum() / input.size(0) - elif ctx.reduction == ctx.reduction_dict['sum']: - output = output.sum() - ctx.save_for_backward(input, target, weight) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, target, weight = ctx.saved_tensors - - grad_input = input.new_zeros(input.size()) - - ext_module.sigmoid_focal_loss_backward(input, target, weight, grad_input, gamma=ctx.gamma, alpha=ctx.alpha) - - grad_input *= grad_output - if ctx.reduction == ctx.reduction_dict['mean']: - grad_input /= input.size(0) - return grad_input, None, None, None, None, None - - -sigmoid_focal_loss = SigmoidFocalLossFunction.apply - - -class SigmoidFocalLoss(nn.Module): - def __init__(self, gamma, alpha, weight=None, reduction='mean'): - super(SigmoidFocalLoss, self).__init__() - self.gamma = gamma - self.alpha = alpha - self.register_buffer('weight', weight) - self.reduction = reduction - - def forward(self, input, target): - return sigmoid_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(gamma={self.gamma}, ' - s += f'alpha={self.alpha}, ' - s += f'reduction={self.reduction})' - return s - - -class SoftmaxFocalLossFunction(Function): - @staticmethod - def symbolic(g, input, target, gamma, alpha, weight, reduction): - return g.op( - 'mmcv::MMCVSoftmaxFocalLoss', - input, - target, - gamma_f=gamma, - alpha_f=alpha, - weight_f=weight, - reduction_s=reduction, - ) - - @staticmethod - def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): - - assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) - assert input.dim() == 2 - assert target.dim() == 1 - assert input.size(0) == target.size(0) - if weight is None: - weight = input.new_empty(0) - else: - assert weight.dim() == 1 - assert input.size(1) == weight.size(0) - ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} - assert reduction in ctx.reduction_dict.keys() - - ctx.gamma = float(gamma) - ctx.alpha = float(alpha) - ctx.reduction = ctx.reduction_dict[reduction] - - channel_stats, _ = torch.max(input, dim=1) - input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) - input_softmax.exp_() - - channel_stats = input_softmax.sum(dim=1) - input_softmax /= channel_stats.unsqueeze(1).expand_as(input) - - output = input.new_zeros(input.size(0)) - ext_module.softmax_focal_loss_forward(input_softmax, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) - - if ctx.reduction == ctx.reduction_dict['mean']: - output = output.sum() / input.size(0) - elif ctx.reduction == ctx.reduction_dict['sum']: - output = output.sum() - ctx.save_for_backward(input_softmax, target, weight) - return output - - @staticmethod - def backward(ctx, grad_output): - input_softmax, target, weight = ctx.saved_tensors - buff = input_softmax.new_zeros(input_softmax.size(0)) - grad_input = input_softmax.new_zeros(input_softmax.size()) - - ext_module.softmax_focal_loss_backward( - input_softmax, target, weight, buff, grad_input, gamma=ctx.gamma, alpha=ctx.alpha - ) - - grad_input *= grad_output - if ctx.reduction == ctx.reduction_dict['mean']: - grad_input /= input_softmax.size(0) - return grad_input, None, None, None, None, None - - -softmax_focal_loss = SoftmaxFocalLossFunction.apply - - -class SoftmaxFocalLoss(nn.Module): - def __init__(self, gamma, alpha, weight=None, reduction='mean'): - super(SoftmaxFocalLoss, self).__init__() - self.gamma = gamma - self.alpha = alpha - self.register_buffer('weight', weight) - self.reduction = reduction - - def forward(self, input, target): - return softmax_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(gamma={self.gamma}, ' - s += f'alpha={self.alpha}, ' - s += f'reduction={self.reduction})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py deleted file mode 100644 index 606855fef5f9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py +++ /dev/null @@ -1,74 +0,0 @@ -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['furthest_point_sampling_forward', 'furthest_point_sampling_with_dist_forward'] -) - - -class FurthestPointSampling(Function): - """Uses iterative furthest point sampling to select a set of features whose - corresponding points have the furthest distance.""" - - @staticmethod - def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor: - """ - Args: - points_xyz (Tensor): (B, N, 3) where N > num_points. - num_points (int): Number of points in the sampled set. - - Returns: - Tensor: (B, num_points) indices of the sampled points. - """ - assert points_xyz.is_contiguous() - - B, N = points_xyz.size()[:2] - output = torch.cuda.IntTensor(B, num_points) - temp = torch.cuda.FloatTensor(B, N).fill_(1e10) - - ext_module.furthest_point_sampling_forward( - points_xyz, temp, output, b=B, n=N, m=num_points, - ) - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(output) - return output - - @staticmethod - def backward(xyz, a=None): - return None, None - - -class FurthestPointSamplingWithDist(Function): - """Uses iterative furthest point sampling to select a set of features whose - corresponding points have the furthest distance.""" - - @staticmethod - def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor: - """ - Args: - points_dist (Tensor): (B, N, N) Distance between each point pair. - num_points (int): Number of points in the sampled set. - - Returns: - Tensor: (B, num_points) indices of the sampled points. - """ - assert points_dist.is_contiguous() - - B, N, _ = points_dist.size() - output = points_dist.new_zeros([B, num_points], dtype=torch.int32) - temp = points_dist.new_zeros([B, N]).fill_(1e10) - - ext_module.furthest_point_sampling_with_dist_forward(points_dist, temp, output, b=B, n=N, m=num_points) - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(output) - return output - - @staticmethod - def backward(xyz, a=None): - return None, None - - -furthest_point_sample = FurthestPointSampling.apply -furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py deleted file mode 100644 index a1f89dd27ebe..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py +++ /dev/null @@ -1,249 +0,0 @@ -# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 - -# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. -# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator -# Augmentation (ADA) -# ======================================================================= - -# 1. Definitions - -# "Licensor" means any person or entity that distributes its Work. - -# "Software" means the original work of authorship made available under -# this License. - -# "Work" means the Software and any additions to or derivative works of -# the Software that are made available under this License. - -# The terms "reproduce," "reproduction," "derivative works," and -# "distribution" have the meaning as provided under U.S. copyright law; -# provided, however, that for the purposes of this License, derivative -# works shall not include works that remain separable from, or merely -# link (or bind by name) to the interfaces of, the Work. - -# Works, including the Software, are "made available" under this License -# by including in or with the Work either (a) a copyright notice -# referencing the applicability of this License to the Work, or (b) a -# copy of this License. - -# 2. License Grants - -# 2.1 Copyright Grant. Subject to the terms and conditions of this -# License, each Licensor grants to you a perpetual, worldwide, -# non-exclusive, royalty-free, copyright license to reproduce, -# prepare derivative works of, publicly display, publicly perform, -# sublicense and distribute its Work and any resulting derivative -# works in any form. - -# 3. Limitations - -# 3.1 Redistribution. You may reproduce or distribute the Work only -# if (a) you do so under this License, (b) you include a complete -# copy of this License with your distribution, and (c) you retain -# without modification any copyright, patent, trademark, or -# attribution notices that are present in the Work. - -# 3.2 Derivative Works. You may specify that additional or different -# terms apply to the use, reproduction, and distribution of your -# derivative works of the Work ("Your Terms") only if (a) Your Terms -# provide that the use limitation in Section 3.3 applies to your -# derivative works, and (b) you identify the specific derivative -# works that are subject to Your Terms. Notwithstanding Your Terms, -# this License (including the redistribution requirements in Section -# 3.1) will continue to apply to the Work itself. - -# 3.3 Use Limitation. The Work and any derivative works thereof only -# may be used or intended for use non-commercially. Notwithstanding -# the foregoing, NVIDIA and its affiliates may use the Work and any -# derivative works commercially. As used herein, "non-commercially" -# means for research or evaluation purposes only. - -# 3.4 Patent Claims. If you bring or threaten to bring a patent claim -# against any Licensor (including any claim, cross-claim or -# counterclaim in a lawsuit) to enforce any patents that you allege -# are infringed by any Work, then your rights under this License from -# such Licensor (including the grant in Section 2.1) will terminate -# immediately. - -# 3.5 Trademarks. This License does not grant any rights to use any -# Licensor’s or its affiliates’ names, logos, or trademarks, except -# as necessary to reproduce the notices described in this License. - -# 3.6 Termination. If you violate any term of this License, then your -# rights under this License (including the grant in Section 2.1) will -# terminate immediately. - -# 4. Disclaimer of Warranty. - -# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR -# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER -# THIS LICENSE. - -# 5. Limitation of Liability. - -# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL -# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE -# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, -# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF -# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK -# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, -# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER -# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGES. - -# ======================================================================= - -import torch -import torch.nn.functional as F -from torch import nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) - - -class FusedBiasLeakyReLUFunctionBackward(Function): - """Calculate second order deviation. - - This function is to compute the second order deviation for the fused leaky - relu operation. - """ - - @staticmethod - def forward(ctx, grad_output, out, negative_slope, scale): - ctx.save_for_backward(out) - ctx.negative_slope = negative_slope - ctx.scale = scale - - empty = grad_output.new_empty(0) - - grad_input = ext_module.fused_bias_leakyrelu( - grad_output, empty, out, act=3, grad=1, alpha=negative_slope, scale=scale - ) - - dim = [0] - - if grad_input.ndim > 2: - dim += list(range(2, grad_input.ndim)) - - grad_bias = grad_input.sum(dim).detach() - - return grad_input, grad_bias - - @staticmethod - def backward(ctx, gradgrad_input, gradgrad_bias): - (out,) = ctx.saved_tensors - - # The second order deviation, in fact, contains two parts, while the - # the first part is zero. Thus, we direct consider the second part - # which is similar with the first order deviation in implementation. - gradgrad_out = ext_module.fused_bias_leakyrelu( - gradgrad_input, gradgrad_bias.to(out.dtype), out, act=3, grad=1, alpha=ctx.negative_slope, scale=ctx.scale - ) - - return gradgrad_out, None, None, None - - -class FusedBiasLeakyReLUFunction(Function): - @staticmethod - def forward(ctx, input, bias, negative_slope, scale): - empty = input.new_empty(0) - - out = ext_module.fused_bias_leakyrelu(input, bias, empty, act=3, grad=0, alpha=negative_slope, scale=scale) - ctx.save_for_backward(out) - ctx.negative_slope = negative_slope - ctx.scale = scale - - return out - - @staticmethod - def backward(ctx, grad_output): - (out,) = ctx.saved_tensors - - grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( - grad_output, out, ctx.negative_slope, ctx.scale - ) - - return grad_input, grad_bias, None, None - - -class FusedBiasLeakyReLU(nn.Module): - """Fused bias leaky ReLU. - - This function is introduced in the StyleGAN2: - http://arxiv.org/abs/1912.04958 - - The bias term comes from the convolution operation. In addition, to keep - the variance of the feature map or gradients unchanged, they also adopt a - scale similarly with Kaiming initialization. However, since the - :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the - final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 - your own scale. - - TODO: Implement the CPU version. - - Args: - channel (int): The channel number of the feature map. - negative_slope (float, optional): Same as nn.LeakyRelu. - Defaults to 0.2. - scale (float, optional): A scalar to adjust the variance of the feature - map. Defaults to 2**0.5. - """ - - def __init__(self, num_channels, negative_slope=0.2, scale=2 ** 0.5): - super(FusedBiasLeakyReLU, self).__init__() - - self.bias = nn.Parameter(torch.zeros(num_channels)) - self.negative_slope = negative_slope - self.scale = scale - - def forward(self, input): - return fused_bias_leakyrelu(input, self.bias, self.negative_slope, self.scale) - - -def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2 ** 0.5): - """Fused bias leaky ReLU function. - - This function is introduced in the StyleGAN2: - http://arxiv.org/abs/1912.04958 - - The bias term comes from the convolution operation. In addition, to keep - the variance of the feature map or gradients unchanged, they also adopt a - scale similarly with Kaiming initialization. However, since the - :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the - final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 - your own scale. - - Args: - input (torch.Tensor): Input feature map. - bias (nn.Parameter): The bias from convolution operation. - negative_slope (float, optional): Same as nn.LeakyRelu. - Defaults to 0.2. - scale (float, optional): A scalar to adjust the variance of the feature - map. Defaults to 2**0.5. - - Returns: - torch.Tensor: Feature map after non-linear activation. - """ - - if not input.is_cuda: - return bias_leakyrelu_ref(input, bias, negative_slope, scale) - - return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), negative_slope, scale) - - -def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2 ** 0.5): - - if bias is not None: - assert bias.ndim == 1 - assert bias.shape[0] == x.shape[1] - x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) - - x = F.leaky_relu(x, negative_slope) - if scale != 1: - x = x * scale - - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py deleted file mode 100644 index b6aa89d50279..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py +++ /dev/null @@ -1,47 +0,0 @@ -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['gather_points_forward', 'gather_points_backward']) - - -class GatherPoints(Function): - """Gather points with given index.""" - - @staticmethod - def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: - """ - Args: - features (Tensor): (B, C, N) features to gather. - indices (Tensor): (B, M) where M is the number of points. - - Returns: - Tensor: (B, C, M) where M is the number of points. - """ - assert features.is_contiguous() - assert indices.is_contiguous() - - B, npoint = indices.size() - _, C, N = features.size() - output = torch.cuda.FloatTensor(B, C, npoint) - - ext_module.gather_points_forward(features, indices, output, b=B, c=C, n=N, npoints=npoint) - - ctx.for_backwards = (indices, C, N) - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(indices) - return output - - @staticmethod - def backward(ctx, grad_out): - idx, C, N = ctx.for_backwards - B, npoint = idx.size() - - grad_features = torch.cuda.FloatTensor(B, C, N).zero_() - grad_out_data = grad_out.data.contiguous() - ext_module.gather_points_backward(grad_out_data, idx, grad_features.data, b=B, c=C, n=N, npoints=npoint) - return grad_features, None - - -gather_points = GatherPoints.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py deleted file mode 100644 index 85e8956baa99..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - -import torch -from torch import nn as nn -from torch.autograd import Function - -from ..utils import ext_loader -from .ball_query import ball_query -from .knn import knn - -ext_module = ext_loader.load_ext('_ext', ['group_points_forward', 'group_points_backward']) - - -class QueryAndGroup(nn.Module): - """Groups points with a ball query of radius. - - Args: - max_radius (float): The maximum radius of the balls. - If None is given, we will use kNN sampling instead of ball query. - sample_num (int): Maximum number of features to gather in the ball. - min_radius (float, optional): The minimum radius of the balls. - Default: 0. - use_xyz (bool, optional): Whether to use xyz. - Default: True. - return_grouped_xyz (bool, optional): Whether to return grouped xyz. - Default: False. - normalize_xyz (bool, optional): Whether to normalize xyz. - Default: False. - uniform_sample (bool, optional): Whether to sample uniformly. - Default: False - return_unique_cnt (bool, optional): Whether to return the count of - unique samples. Default: False. - return_grouped_idx (bool, optional): Whether to return grouped idx. - Default: False. - """ - - def __init__( - self, - max_radius, - sample_num, - min_radius=0, - use_xyz=True, - return_grouped_xyz=False, - normalize_xyz=False, - uniform_sample=False, - return_unique_cnt=False, - return_grouped_idx=False, - ): - super().__init__() - self.max_radius = max_radius - self.min_radius = min_radius - self.sample_num = sample_num - self.use_xyz = use_xyz - self.return_grouped_xyz = return_grouped_xyz - self.normalize_xyz = normalize_xyz - self.uniform_sample = uniform_sample - self.return_unique_cnt = return_unique_cnt - self.return_grouped_idx = return_grouped_idx - if self.return_unique_cnt: - assert self.uniform_sample, 'uniform_sample should be True when ' 'returning the count of unique samples' - if self.max_radius is None: - assert not self.normalize_xyz, 'can not normalize grouped xyz when max_radius is None' - - def forward(self, points_xyz, center_xyz, features=None): - """ - Args: - points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. - center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. - features (Tensor): (B, C, N) Descriptors of the features. - - Returns: - Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. - """ - # if self.max_radius is None, we will perform kNN instead of ball query - # idx is of shape [B, npoint, sample_num] - if self.max_radius is None: - idx = knn(self.sample_num, points_xyz, center_xyz, False) - idx = idx.transpose(1, 2).contiguous() - else: - idx = ball_query(self.min_radius, self.max_radius, self.sample_num, points_xyz, center_xyz) - - if self.uniform_sample: - unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) - for i_batch in range(idx.shape[0]): - for i_region in range(idx.shape[1]): - unique_ind = torch.unique(idx[i_batch, i_region, :]) - num_unique = unique_ind.shape[0] - unique_cnt[i_batch, i_region] = num_unique - sample_ind = torch.randint(0, num_unique, (self.sample_num - num_unique,), dtype=torch.long) - all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) - idx[i_batch, i_region, :] = all_ind - - xyz_trans = points_xyz.transpose(1, 2).contiguous() - # (B, 3, npoint, sample_num) - grouped_xyz = grouping_operation(xyz_trans, idx) - grouped_xyz_diff = grouped_xyz - center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets - if self.normalize_xyz: - grouped_xyz_diff /= self.max_radius - - if features is not None: - grouped_features = grouping_operation(features, idx) - if self.use_xyz: - # (B, C + 3, npoint, sample_num) - new_features = torch.cat([grouped_xyz_diff, grouped_features], dim=1) - else: - new_features = grouped_features - else: - assert self.use_xyz, 'Cannot have not features and not use xyz as a feature!' - new_features = grouped_xyz_diff - - ret = [new_features] - if self.return_grouped_xyz: - ret.append(grouped_xyz) - if self.return_unique_cnt: - ret.append(unique_cnt) - if self.return_grouped_idx: - ret.append(idx) - if len(ret) == 1: - return ret[0] - else: - return tuple(ret) - - -class GroupAll(nn.Module): - """Group xyz with feature. - - Args: - use_xyz (bool): Whether to use xyz. - """ - - def __init__(self, use_xyz: bool = True): - super().__init__() - self.use_xyz = use_xyz - - def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): - """ - Args: - xyz (Tensor): (B, N, 3) xyz coordinates of the features. - new_xyz (Tensor): new xyz coordinates of the features. - features (Tensor): (B, C, N) features to group. - - Returns: - Tensor: (B, C + 3, 1, N) Grouped feature. - """ - grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) - if features is not None: - grouped_features = features.unsqueeze(2) - if self.use_xyz: - # (B, 3 + C, 1, N) - new_features = torch.cat([grouped_xyz, grouped_features], dim=1) - else: - new_features = grouped_features - else: - new_features = grouped_xyz - - return new_features - - -class GroupingOperation(Function): - """Group feature with given index.""" - - @staticmethod - def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: - """ - Args: - features (Tensor): (B, C, N) tensor of features to group. - indices (Tensor): (B, npoint, nsample) the indices of - features to group with. - - Returns: - Tensor: (B, C, npoint, nsample) Grouped features. - """ - features = features.contiguous() - indices = indices.contiguous() - - B, nfeatures, nsample = indices.size() - _, C, N = features.size() - output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) - - ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, indices, output) - - ctx.for_backwards = (indices, N) - return output - - @staticmethod - def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Args: - grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients - of the output from forward. - - Returns: - Tensor: (B, C, N) gradient of the features. - """ - idx, N = ctx.for_backwards - - B, C, npoint, nsample = grad_out.size() - grad_features = torch.cuda.FloatTensor(B, C, N).zero_() - - grad_out_data = grad_out.data.contiguous() - ext_module.group_points_backward(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) - return grad_features, None - - -grouping_operation = GroupingOperation.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py deleted file mode 100644 index 5be5ea46aa91..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import glob -import os - -import torch - -if torch.__version__ == 'parrots': - import parrots - - def get_compiler_version(): - return 'GCC ' + parrots.version.compiler - - def get_compiling_cuda_version(): - return parrots.version.cuda - - -else: - from ..utils import ext_loader - - ext_module = ext_loader.load_ext('_ext', ['get_compiler_version', 'get_compiling_cuda_version']) - - def get_compiler_version(): - return ext_module.get_compiler_version() - - def get_compiling_cuda_version(): - return ext_module.get_compiling_cuda_version() - - -def get_onnxruntime_op_path(): - wildcard = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), '_ext_ort.*.so') - - paths = glob.glob(wildcard) - if len(paths) > 0: - return paths[0] - else: - return '' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py deleted file mode 100644 index 35a098e50995..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_nms_normal_forward'] -) - - -def boxes_iou_bev(boxes_a, boxes_b): - """Calculate boxes IoU in the Bird's Eye View. - - Args: - boxes_a (torch.Tensor): Input boxes a with shape (M, 5). - boxes_b (torch.Tensor): Input boxes b with shape (N, 5). - - Returns: - ans_iou (torch.Tensor): IoU result with shape (M, N). - """ - ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) - - ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) - - return ans_iou - - -def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): - """NMS function GPU implementation (for BEV boxes). The overlap of two - boxes for IoU calculation is defined as the exact overlapping area of the - two boxes. In this function, one can also set ``pre_max_size`` and - ``post_max_size``. - - Args: - boxes (torch.Tensor): Input boxes with the shape of [N, 5] - ([x1, y1, x2, y2, ry]). - scores (torch.Tensor): Scores of boxes with the shape of [N]. - thresh (float): Overlap threshold of NMS. - pre_max_size (int, optional): Max size of boxes before NMS. - Default: None. - post_max_size (int, optional): Max size of boxes after NMS. - Default: None. - - Returns: - torch.Tensor: Indexes after NMS. - """ - assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' - order = scores.sort(0, descending=True)[1] - - if pre_max_size is not None: - order = order[:pre_max_size] - boxes = boxes[order].contiguous() - - keep = torch.zeros(boxes.size(0), dtype=torch.long) - num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh) - keep = order[keep[:num_out].cuda(boxes.device)].contiguous() - if post_max_size is not None: - keep = keep[:post_max_size] - return keep - - -def nms_normal_bev(boxes, scores, thresh): - """Normal NMS function GPU implementation (for BEV boxes). The overlap of - two boxes for IoU calculation is defined as the exact overlapping area of - the two boxes WITH their yaw angle set to 0. - - Args: - boxes (torch.Tensor): Input boxes with shape (N, 5). - scores (torch.Tensor): Scores of predicted boxes with shape (N). - thresh (float): Overlap threshold of NMS. - - Returns: - torch.Tensor: Remaining indices with scores in descending order. - """ - assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' - order = scores.sort(0, descending=True)[1] - - boxes = boxes[order].contiguous() - - keep = torch.zeros(boxes.size(0), dtype=torch.long) - num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh) - return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py deleted file mode 100644 index 66be24b2c0db..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py +++ /dev/null @@ -1,73 +0,0 @@ -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['knn_forward']) - - -class KNN(Function): - r"""KNN (CUDA) based on heap data structure. - Modified from `PAConv `_. - - Find k-nearest points. - """ - - @staticmethod - def forward( - ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False - ) -> torch.Tensor: - """ - Args: - k (int): number of nearest neighbors. - xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). - xyz coordinates of the features. - center_xyz (Tensor, optional): (B, npoint, 3) if transposed == - False, else (B, 3, npoint). centers of the knn query. - Default: None. - transposed (bool, optional): whether the input tensors are - transposed. Should not explicitly use this keyword when - calling knn (=KNN.apply), just add the fourth param. - Default: False. - - Returns: - Tensor: (B, k, npoint) tensor with the indices of - the features that form k-nearest neighbours. - """ - assert (k > 0) & (k < 100), 'k should be in range(0, 100)' - - if center_xyz is None: - center_xyz = xyz - - if transposed: - xyz = xyz.transpose(2, 1).contiguous() - center_xyz = center_xyz.transpose(2, 1).contiguous() - - assert xyz.is_contiguous() # [B, N, 3] - assert center_xyz.is_contiguous() # [B, npoint, 3] - - center_xyz_device = center_xyz.get_device() - assert center_xyz_device == xyz.get_device(), 'center_xyz and xyz should be put on the same device' - if torch.cuda.current_device() != center_xyz_device: - torch.cuda.set_device(center_xyz_device) - - B, npoint, _ = center_xyz.shape - N = xyz.shape[1] - - idx = center_xyz.new_zeros((B, npoint, k)).int() - dist2 = center_xyz.new_zeros((B, npoint, k)).float() - - ext_module.knn_forward(xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) - # idx shape to [B, k, npoint] - idx = idx.transpose(2, 1).contiguous() - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(idx) - return idx - - @staticmethod - def backward(ctx, a=None): - return None, None, None - - -knn = KNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py deleted file mode 100644 index c067f11ca8c6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import math - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['masked_im2col_forward', 'masked_col2im_forward']) - - -class MaskedConv2dFunction(Function): - @staticmethod - def symbolic(g, features, mask, weight, bias, padding, stride): - return g.op('mmcv::MMCVMaskedConv2d', features, mask, weight, bias, padding_i=padding, stride_i=stride) - - @staticmethod - def forward(ctx, features, mask, weight, bias, padding=0, stride=1): - assert mask.dim() == 3 and mask.size(0) == 1 - assert features.dim() == 4 and features.size(0) == 1 - assert features.size()[2:] == mask.size()[1:] - pad_h, pad_w = _pair(padding) - stride_h, stride_w = _pair(stride) - if stride_h != 1 or stride_w != 1: - raise ValueError('Stride could not only be 1 in masked_conv2d currently.') - out_channel, in_channel, kernel_h, kernel_w = weight.size() - - batch_size = features.size(0) - out_h = int(math.floor((features.size(2) + 2 * pad_h - (kernel_h - 1) - 1) / stride_h + 1)) - out_w = int(math.floor((features.size(3) + 2 * pad_w - (kernel_h - 1) - 1) / stride_w + 1)) - mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) - output = features.new_zeros(batch_size, out_channel, out_h, out_w) - if mask_inds.numel() > 0: - mask_h_idx = mask_inds[:, 0].contiguous() - mask_w_idx = mask_inds[:, 1].contiguous() - data_col = features.new_zeros(in_channel * kernel_h * kernel_w, mask_inds.size(0)) - ext_module.masked_im2col_forward( - features, - mask_h_idx, - mask_w_idx, - data_col, - kernel_h=kernel_h, - kernel_w=kernel_w, - pad_h=pad_h, - pad_w=pad_w, - ) - - masked_output = torch.addmm(1, bias[:, None], 1, weight.view(out_channel, -1), data_col) - ext_module.masked_col2im_forward( - masked_output, mask_h_idx, mask_w_idx, output, height=out_h, width=out_w, channels=out_channel - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - return (None,) * 5 - - -masked_conv2d = MaskedConv2dFunction.apply - - -class MaskedConv2d(nn.Conv2d): - """A MaskedConv2d which inherits the official Conv2d. - - The masked forward doesn't implement the backward function and only - supports the stride parameter to be 1 currently. - """ - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): - super(MaskedConv2d, self).__init__( - in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias - ) - - def forward(self, input, mask=None): - if mask is None: # fallback to the normal Conv2d - return super(MaskedConv2d, self).forward(input) - else: - return masked_conv2d(input, mask, self.weight, self.bias, self.padding) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py deleted file mode 100644 index ed7f7ece8e33..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from abc import abstractmethod - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..cnn import ConvModule - - -class BaseMergeCell(nn.Module): - """The basic class for cells used in NAS-FPN and NAS-FCOS. - - BaseMergeCell takes 2 inputs. After applying convolution - on them, they are resized to the target size. Then, - they go through binary_op, which depends on the type of cell. - If with_out_conv is True, the result of output will go through - another convolution layer. - - Args: - in_channels (int): number of input channels in out_conv layer. - out_channels (int): number of output channels in out_conv layer. - with_out_conv (bool): Whether to use out_conv layer - out_conv_cfg (dict): Config dict for convolution layer, which should - contain "groups", "kernel_size", "padding", "bias" to build - out_conv layer. - out_norm_cfg (dict): Config dict for normalization layer in out_conv. - out_conv_order (tuple): The order of conv/norm/activation layers in - out_conv. - with_input1_conv (bool): Whether to use convolution on input1. - with_input2_conv (bool): Whether to use convolution on input2. - input_conv_cfg (dict): Config dict for building input1_conv layer and - input2_conv layer, which is expected to contain the type of - convolution. - Default: None, which means using conv2d. - input_norm_cfg (dict): Config dict for normalization layer in - input1_conv and input2_conv layer. Default: None. - upsample_mode (str): Interpolation method used to resize the output - of input1_conv and input2_conv to target size. Currently, we - support ['nearest', 'bilinear']. Default: 'nearest'. - """ - - def __init__( - self, - fused_channels=256, - out_channels=256, - with_out_conv=True, - out_conv_cfg=dict(groups=1, kernel_size=3, padding=1, bias=True), - out_norm_cfg=None, - out_conv_order=('act', 'conv', 'norm'), - with_input1_conv=False, - with_input2_conv=False, - input_conv_cfg=None, - input_norm_cfg=None, - upsample_mode='nearest', - ): - super(BaseMergeCell, self).__init__() - assert upsample_mode in ['nearest', 'bilinear'] - self.with_out_conv = with_out_conv - self.with_input1_conv = with_input1_conv - self.with_input2_conv = with_input2_conv - self.upsample_mode = upsample_mode - - if self.with_out_conv: - self.out_conv = ConvModule( - fused_channels, out_channels, **out_conv_cfg, norm_cfg=out_norm_cfg, order=out_conv_order - ) - - self.input1_conv = ( - self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) - if with_input1_conv - else nn.Sequential() - ) - self.input2_conv = ( - self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) - if with_input2_conv - else nn.Sequential() - ) - - def _build_input_conv(self, channel, conv_cfg, norm_cfg): - return ConvModule(channel, channel, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True) - - @abstractmethod - def _binary_op(self, x1, x2): - pass - - def _resize(self, x, size): - if x.shape[-2:] == size: - return x - elif x.shape[-2:] < size: - return F.interpolate(x, size=size, mode=self.upsample_mode) - else: - assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 - kernel_size = x.shape[-1] // size[-1] - x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) - return x - - def forward(self, x1, x2, out_size=None): - assert x1.shape[:2] == x2.shape[:2] - assert out_size is None or len(out_size) == 2 - if out_size is None: # resize to larger one - out_size = max(x1.size()[2:], x2.size()[2:]) - - x1 = self.input1_conv(x1) - x2 = self.input2_conv(x2) - - x1 = self._resize(x1, out_size) - x2 = self._resize(x2, out_size) - - x = self._binary_op(x1, x2) - if self.with_out_conv: - x = self.out_conv(x) - return x - - -class SumCell(BaseMergeCell): - def __init__(self, in_channels, out_channels, **kwargs): - super(SumCell, self).__init__(in_channels, out_channels, **kwargs) - - def _binary_op(self, x1, x2): - return x1 + x2 - - -class ConcatCell(BaseMergeCell): - def __init__(self, in_channels, out_channels, **kwargs): - super(ConcatCell, self).__init__(in_channels * 2, out_channels, **kwargs) - - def _binary_op(self, x1, x2): - ret = torch.cat([x1, x2], dim=1) - return ret - - -class GlobalPoolingCell(BaseMergeCell): - def __init__(self, in_channels=None, out_channels=None, **kwargs): - super().__init__(in_channels, out_channels, **kwargs) - self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) - - def _binary_op(self, x1, x2): - x2_att = self.global_pool(x2).sigmoid() - return x2 + x2_att * x1 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py deleted file mode 100644 index f70b7d356cf5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import math - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair, _single - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning -from ..cnn import CONV_LAYERS -from ..utils import ext_loader, print_log - -ext_module = ext_loader.load_ext('_ext', ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) - - -class ModulatedDeformConv2dFunction(Function): - @staticmethod - def symbolic(g, input, offset, mask, weight, bias, stride, padding, dilation, groups, deform_groups): - input_tensors = [input, offset, mask, weight] - if bias is not None: - input_tensors.append(bias) - return g.op( - 'mmcv::MMCVModulatedDeformConv2d', - *input_tensors, - stride_i=stride, - padding_i=padding, - dilation_i=dilation, - groups_i=groups, - deform_groups_i=deform_groups, - ) - - @staticmethod - def forward( - ctx, input, offset, mask, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, deform_groups=1 - ): - if input is not None and input.dim() != 4: - raise ValueError( - f'Expected 4D tensor as input, got {input.dim()}D tensor \ - instead.' - ) - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.groups = groups - ctx.deform_groups = deform_groups - ctx.with_bias = bias is not None - if not ctx.with_bias: - bias = input.new_empty(0) # fake tensor - # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; - # amp won't cast the type of model (float32), but "offset" is cast - # to float16 by nn.Conv2d automatically, leading to the type - # mismatch with input (when it is float32) or weight. - # The flag for whether to use fp16 or amp is the type of "offset", - # we cast weight and input to temporarily support fp16 and amp - # whatever the pytorch version is. - input = input.type_as(offset) - weight = weight.type_as(input) - ctx.save_for_backward(input, offset, mask, weight, bias) - output = input.new_empty(ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) - ctx._bufs = [input.new_empty(0), input.new_empty(0)] - ext_module.modulated_deform_conv_forward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - output, - ctx._bufs[1], - kernel_h=weight.size(2), - kernel_w=weight.size(3), - stride_h=ctx.stride[0], - stride_w=ctx.stride[1], - pad_h=ctx.padding[0], - pad_w=ctx.padding[1], - dilation_h=ctx.dilation[0], - dilation_w=ctx.dilation[1], - group=ctx.groups, - deformable_group=ctx.deform_groups, - with_bias=ctx.with_bias, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, mask, weight, bias = ctx.saved_tensors - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - grad_mask = torch.zeros_like(mask) - grad_weight = torch.zeros_like(weight) - grad_bias = torch.zeros_like(bias) - grad_output = grad_output.contiguous() - ext_module.modulated_deform_conv_backward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - ctx._bufs[1], - grad_input, - grad_weight, - grad_bias, - grad_offset, - grad_mask, - grad_output, - kernel_h=weight.size(2), - kernel_w=weight.size(3), - stride_h=ctx.stride[0], - stride_w=ctx.stride[1], - pad_h=ctx.padding[0], - pad_w=ctx.padding[1], - dilation_h=ctx.dilation[0], - dilation_w=ctx.dilation[1], - group=ctx.groups, - deformable_group=ctx.deform_groups, - with_bias=ctx.with_bias, - ) - if not ctx.with_bias: - grad_bias = None - - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) - - @staticmethod - def _output_size(ctx, input, weight): - channels = weight.size(0) - output_size = (input.size(0), channels) - for d in range(input.dim() - 2): - in_size = input.size(d + 2) - pad = ctx.padding[d] - kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 - stride_ = ctx.stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) - if not all(map(lambda s: s > 0, output_size)): - raise ValueError( - 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' - ) - return output_size - - -modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply - - -class ModulatedDeformConv2d(nn.Module): - @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='ModulatedDeformConv2d') - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deform_groups=1, - bias=True, - ): - super(ModulatedDeformConv2d, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = _pair(stride) - self.padding = _pair(padding) - self.dilation = _pair(dilation) - self.groups = groups - self.deform_groups = deform_groups - # enable compatibility with nn.Conv2d - self.transposed = False - self.output_padding = _single(0) - - self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter('bias', None) - self.init_weights() - - def init_weights(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1.0 / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - if self.bias is not None: - self.bias.data.zero_() - - def forward(self, x, offset, mask): - return modulated_deform_conv2d( - x, - offset, - mask, - self.weight, - self.bias, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deform_groups, - ) - - -@CONV_LAYERS.register_module('DCNv2') -class ModulatedDeformConv2dPack(ModulatedDeformConv2d): - """A ModulatedDeformable Conv Encapsulation that acts as normal Conv - layers. - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int or tuple[int]): Same as nn.Conv2d. - stride (int): Same as nn.Conv2d, while tuple is not supported. - padding (int): Same as nn.Conv2d, while tuple is not supported. - dilation (int): Same as nn.Conv2d, while tuple is not supported. - groups (int): Same as nn.Conv2d. - bias (bool or str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if norm_cfg is None, otherwise - False. - """ - - _version = 2 - - def __init__(self, *args, **kwargs): - super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) - self.conv_offset = nn.Conv2d( - self.in_channels, - self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], - kernel_size=self.kernel_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - bias=True, - ) - self.init_weights() - - def init_weights(self): - super(ModulatedDeformConv2dPack, self).init_weights() - if hasattr(self, 'conv_offset'): - self.conv_offset.weight.data.zero_() - self.conv_offset.bias.data.zero_() - - def forward(self, x): - out = self.conv_offset(x) - o1, o2, mask = torch.chunk(out, 3, dim=1) - offset = torch.cat((o1, o2), dim=1) - mask = torch.sigmoid(mask) - return modulated_deform_conv2d( - x, - offset, - mask, - self.weight, - self.bias, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deform_groups, - ) - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - version = local_metadata.get('version', None) - - if version is None or version < 2: - # the key is different in early versions - # In version < 2, ModulatedDeformConvPack - # loads previous benchmark models. - if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: - state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') - if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: - state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') - - if version is not None and version > 1: - print_log(f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') - - super()._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py deleted file mode 100644 index e4ef4f9ab2d7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import math -import warnings - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd.function import Function, once_differentiable - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import deprecated_api_warning -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import constant_init, xavier_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.registry import ATTENTION -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import BaseModule -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) - - -class MultiScaleDeformableAttnFunction(Function): - @staticmethod - def forward( - ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step - ): - """GPU version of multi-scale deformable attention. - - Args: - value (Tensor): The value has shape - (bs, num_keys, mum_heads, embed_dims//num_heads) - value_spatial_shapes (Tensor): Spatial shape of - each feature map, has shape (num_levels, 2), - last dimension 2 represent (h, w) - sampling_locations (Tensor): The location of sampling points, - has shape - (bs ,num_queries, num_heads, num_levels, num_points, 2), - the last dimension 2 represent (x, y). - attention_weights (Tensor): The weight of sampling points used - when calculate the attention, has shape - (bs ,num_queries, num_heads, num_levels, num_points), - im2col_step (Tensor): The step used in image to column. - - Returns: - Tensor: has shape (bs, num_queries, embed_dims) - """ - - ctx.im2col_step = im2col_step - output = ext_module.ms_deform_attn_forward( - value, - value_spatial_shapes, - value_level_start_index, - sampling_locations, - attention_weights, - im2col_step=ctx.im2col_step, - ) - ctx.save_for_backward( - value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - """GPU version of backward function. - - Args: - grad_output (Tensor): Gradient - of output tensor of forward. - - Returns: - Tuple[Tensor]: Gradient - of input tensors in forward. - """ - value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors - grad_value = torch.zeros_like(value) - grad_sampling_loc = torch.zeros_like(sampling_locations) - grad_attn_weight = torch.zeros_like(attention_weights) - - ext_module.ms_deform_attn_backward( - value, - value_spatial_shapes, - value_level_start_index, - sampling_locations, - attention_weights, - grad_output.contiguous(), - grad_value, - grad_sampling_loc, - grad_attn_weight, - im2col_step=ctx.im2col_step, - ) - - return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None - - -def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): - """CPU version of multi-scale deformable attention. - - Args: - value (Tensor): The value has shape - (bs, num_keys, mum_heads, embed_dims//num_heads) - value_spatial_shapes (Tensor): Spatial shape of - each feature map, has shape (num_levels, 2), - last dimension 2 represent (h, w) - sampling_locations (Tensor): The location of sampling points, - has shape - (bs ,num_queries, num_heads, num_levels, num_points, 2), - the last dimension 2 represent (x, y). - attention_weights (Tensor): The weight of sampling points used - when calculate the attention, has shape - (bs ,num_queries, num_heads, num_levels, num_points), - - Returns: - Tensor: has shape (bs, num_queries, embed_dims) - """ - - bs, _, num_heads, embed_dims = value.shape - _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape - value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) - sampling_grids = 2 * sampling_locations - 1 - sampling_value_list = [] - for level, (H_, W_) in enumerate(value_spatial_shapes): - # bs, H_*W_, num_heads, embed_dims -> - # bs, H_*W_, num_heads*embed_dims -> - # bs, num_heads*embed_dims, H_*W_ -> - # bs*num_heads, embed_dims, H_, W_ - value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_) - # bs, num_queries, num_heads, num_points, 2 -> - # bs, num_heads, num_queries, num_points, 2 -> - # bs*num_heads, num_queries, num_points, 2 - sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) - # bs*num_heads, embed_dims, num_queries, num_points - sampling_value_l_ = F.grid_sample( - value_l_, sampling_grid_l_, mode='bilinear', padding_mode='zeros', align_corners=False - ) - sampling_value_list.append(sampling_value_l_) - # (bs, num_queries, num_heads, num_levels, num_points) -> - # (bs, num_heads, num_queries, num_levels, num_points) -> - # (bs, num_heads, 1, num_queries, num_levels*num_points) - attention_weights = attention_weights.transpose(1, 2).reshape( - bs * num_heads, 1, num_queries, num_levels * num_points - ) - output = ( - (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights) - .sum(-1) - .view(bs, num_heads * embed_dims, num_queries) - ) - return output.transpose(1, 2).contiguous() - - -@ATTENTION.register_module() -class MultiScaleDeformableAttention(BaseModule): - """An attention module used in Deformable-Detr. - - `Deformable DETR: Deformable Transformers for End-to-End Object Detection. - `_. - - Args: - embed_dims (int): The embedding dimension of Attention. - Default: 256. - num_heads (int): Parallel attention heads. Default: 64. - num_levels (int): The number of feature map used in - Attention. Default: 4. - num_points (int): The number of sampling points for - each query in each head. Default: 4. - im2col_step (int): The step used in image_to_column. - Default: 64. - dropout (float): A Dropout layer on `inp_identity`. - Default: 0.1. - batch_first (bool): Key, Query and Value are shape of - (batch, n, embed_dim) - or (n, batch, embed_dim). Default to False. - norm_cfg (dict): Config dict for normalization layer. - Default: None. - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - """ - - def __init__( - self, - embed_dims=256, - num_heads=8, - num_levels=4, - num_points=4, - im2col_step=64, - dropout=0.1, - batch_first=False, - norm_cfg=None, - init_cfg=None, - ): - super().__init__(init_cfg) - if embed_dims % num_heads != 0: - raise ValueError(f'embed_dims must be divisible by num_heads, ' f'but got {embed_dims} and {num_heads}') - dim_per_head = embed_dims // num_heads - self.norm_cfg = norm_cfg - self.dropout = nn.Dropout(dropout) - self.batch_first = batch_first - - # you'd better set dim_per_head to a power of 2 - # which is more efficient in the CUDA implementation - def _is_power_of_2(n): - if (not isinstance(n, int)) or (n < 0): - raise ValueError('invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n))) - return (n & (n - 1) == 0) and n != 0 - - if not _is_power_of_2(dim_per_head): - warnings.warn( - "You'd better set embed_dims in " - 'MultiScaleDeformAttention to make ' - 'the dimension of each attention head a power of 2 ' - 'which is more efficient in our CUDA implementation.' - ) - - self.im2col_step = im2col_step - self.embed_dims = embed_dims - self.num_levels = num_levels - self.num_heads = num_heads - self.num_points = num_points - self.sampling_offsets = nn.Linear(embed_dims, num_heads * num_levels * num_points * 2) - self.attention_weights = nn.Linear(embed_dims, num_heads * num_levels * num_points) - self.value_proj = nn.Linear(embed_dims, embed_dims) - self.output_proj = nn.Linear(embed_dims, embed_dims) - self.init_weights() - - def init_weights(self): - """Default initialization for Parameters of Module.""" - constant_init(self.sampling_offsets, 0.0) - thetas = torch.arange(self.num_heads, dtype=torch.float32) * (2.0 * math.pi / self.num_heads) - grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) - grid_init = ( - (grid_init / grid_init.abs().max(-1, keepdim=True)[0]) - .view(self.num_heads, 1, 1, 2) - .repeat(1, self.num_levels, self.num_points, 1) - ) - for i in range(self.num_points): - grid_init[:, :, i, :] *= i + 1 - - self.sampling_offsets.bias.data = grid_init.view(-1) - constant_init(self.attention_weights, val=0.0, bias=0.0) - xavier_init(self.value_proj, distribution='uniform', bias=0.0) - xavier_init(self.output_proj, distribution='uniform', bias=0.0) - self._is_init = True - - @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiScaleDeformableAttention') - def forward( - self, - query, - key=None, - value=None, - identity=None, - query_pos=None, - key_padding_mask=None, - reference_points=None, - spatial_shapes=None, - level_start_index=None, - **kwargs, - ): - """Forward Function of MultiScaleDeformAttention. - - Args: - query (Tensor): Query of Transformer with shape - (num_query, bs, embed_dims). - key (Tensor): The key tensor with shape - `(num_key, bs, embed_dims)`. - value (Tensor): The value tensor with shape - `(num_key, bs, embed_dims)`. - identity (Tensor): The tensor used for addition, with the - same shape as `query`. Default None. If None, - `query` will be used. - query_pos (Tensor): The positional encoding for `query`. - Default: None. - key_pos (Tensor): The positional encoding for `key`. Default - None. - reference_points (Tensor): The normalized reference - points with shape (bs, num_query, num_levels, 2), - all elements is range in [0, 1], top-left (0,0), - bottom-right (1, 1), including padding area. - or (N, Length_{query}, num_levels, 4), add - additional two dimensions is (w, h) to - form reference boxes. - key_padding_mask (Tensor): ByteTensor for `query`, with - shape [bs, num_key]. - spatial_shapes (Tensor): Spatial shape of features in - different levels. With shape (num_levels, 2), - last dimension represents (h, w). - level_start_index (Tensor): The start index of each level. - A tensor has shape ``(num_levels, )`` and can be represented - as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. - - Returns: - Tensor: forwarded results with shape [num_query, bs, embed_dims]. - """ - - if value is None: - value = query - - if identity is None: - identity = query - if query_pos is not None: - query = query + query_pos - if not self.batch_first: - # change to (bs, num_query ,embed_dims) - query = query.permute(1, 0, 2) - value = value.permute(1, 0, 2) - - bs, num_query, _ = query.shape - bs, num_value, _ = value.shape - assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value - - value = self.value_proj(value) - if key_padding_mask is not None: - value = value.masked_fill(key_padding_mask[..., None], 0.0) - value = value.view(bs, num_value, self.num_heads, -1) - sampling_offsets = self.sampling_offsets(query).view( - bs, num_query, self.num_heads, self.num_levels, self.num_points, 2 - ) - attention_weights = self.attention_weights(query).view( - bs, num_query, self.num_heads, self.num_levels * self.num_points - ) - attention_weights = attention_weights.softmax(-1) - - attention_weights = attention_weights.view(bs, num_query, self.num_heads, self.num_levels, self.num_points) - if reference_points.shape[-1] == 2: - offset_normalizer = torch.stack([spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) - sampling_locations = ( - reference_points[:, :, None, :, None, :] - + sampling_offsets / offset_normalizer[None, None, None, :, None, :] - ) - elif reference_points.shape[-1] == 4: - sampling_locations = ( - reference_points[:, :, None, :, None, :2] - + sampling_offsets / self.num_points * reference_points[:, :, None, :, None, 2:] * 0.5 - ) - else: - raise ValueError( - f'Last dim of reference_points must be' f' 2 or 4, but get {reference_points.shape[-1]} instead.' - ) - if torch.cuda.is_available() and value.is_cuda: - output = MultiScaleDeformableAttnFunction.apply( - value, spatial_shapes, level_start_index, sampling_locations, attention_weights, self.im2col_step - ) - else: - output = multi_scale_deformable_attn_pytorch(value, spatial_shapes, sampling_locations, attention_weights) - - output = self.output_proj(output) - - if not self.batch_first: - # (num_query, bs ,embed_dims) - output = output.permute(1, 0, 2) - - return self.dropout(output) + identity diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py deleted file mode 100644 index 4914c139427b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py +++ /dev/null @@ -1,383 +0,0 @@ -import os - -import numpy as np -import torch - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) - - -# This function is modified from: https://github.com/pytorch/vision/ -class NMSop(torch.autograd.Function): - @staticmethod - def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): - is_filtering_by_score = score_threshold > 0 - if is_filtering_by_score: - valid_mask = scores > score_threshold - bboxes, scores = bboxes[valid_mask], scores[valid_mask] - valid_inds = torch.nonzero(valid_mask, as_tuple=False).squeeze(dim=1) - - inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) - - if max_num > 0: - inds = inds[:max_num] - if is_filtering_by_score: - inds = valid_inds[inds] - return inds - - @staticmethod - def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, max_num): - from ..onnx import is_custom_op_loaded - - has_custom_op = is_custom_op_loaded() - # TensorRT nms plugin is aligned with original nms in ONNXRuntime - is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' - if has_custom_op and (not is_trt_backend): - return g.op( - 'mmcv::NonMaxSuppression', bboxes, scores, iou_threshold_f=float(iou_threshold), offset_i=int(offset) - ) - else: - from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze - from ..onnx.onnx_utils.symbolic_helper import _size_helper - - boxes = unsqueeze(g, bboxes, 0) - scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) - - if max_num > 0: - max_num = g.op('Constant', value_t=torch.tensor(max_num, dtype=torch.long)) - else: - dim = g.op('Constant', value_t=torch.tensor(0)) - max_num = _size_helper(g, bboxes, dim) - max_output_per_class = max_num - iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float)) - score_threshold = g.op('Constant', value_t=torch.tensor([score_threshold], dtype=torch.float)) - nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold, score_threshold) - return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1) - - -class SoftNMSop(torch.autograd.Function): - @staticmethod - def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, offset): - dets = boxes.new_empty((boxes.size(0), 5), device='cpu') - inds = ext_module.softnms( - boxes.cpu(), - scores.cpu(), - dets.cpu(), - iou_threshold=float(iou_threshold), - sigma=float(sigma), - min_score=float(min_score), - method=int(method), - offset=int(offset), - ) - return dets, inds - - @staticmethod - def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, offset): - from packaging import version - - assert version.parse(torch.__version__) >= version.parse('1.7.0') - nms_out = g.op( - 'mmcv::SoftNonMaxSuppression', - boxes, - scores, - iou_threshold_f=float(iou_threshold), - sigma_f=float(sigma), - min_score_f=float(min_score), - method_i=int(method), - offset_i=int(offset), - outputs=2, - ) - return nms_out - - -@deprecated_api_warning({'iou_thr': 'iou_threshold'}) -def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): - """Dispatch to either CPU or GPU NMS implementations. - - The input can be either torch tensor or numpy array. GPU NMS will be used - if the input is gpu tensor, otherwise CPU NMS - will be used. The returned type will always be the same as inputs. - - Arguments: - boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). - scores (torch.Tensor or np.ndarray): scores in shape (N, ). - iou_threshold (float): IoU threshold for NMS. - offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). - score_threshold (float): score threshold for NMS. - max_num (int): maximum number of boxes after NMS. - - Returns: - tuple: kept dets(boxes and scores) and indice, which is always the \ - same data type as the input. - - Example: - >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], - >>> [49.3, 32.9, 51.0, 35.3], - >>> [49.2, 31.8, 51.0, 35.4], - >>> [35.1, 11.5, 39.1, 15.7], - >>> [35.6, 11.8, 39.3, 14.2], - >>> [35.3, 11.5, 39.9, 14.5], - >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) - >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ - dtype=np.float32) - >>> iou_threshold = 0.6 - >>> dets, inds = nms(boxes, scores, iou_threshold) - >>> assert len(inds) == len(dets) == 3 - """ - assert isinstance(boxes, (torch.Tensor, np.ndarray)) - assert isinstance(scores, (torch.Tensor, np.ndarray)) - is_numpy = False - if isinstance(boxes, np.ndarray): - is_numpy = True - boxes = torch.from_numpy(boxes) - if isinstance(scores, np.ndarray): - scores = torch.from_numpy(scores) - assert boxes.size(1) == 4 - assert boxes.size(0) == scores.size(0) - assert offset in (0, 1) - - if torch.__version__ == 'parrots': - indata_list = [boxes, scores] - indata_dict = {'iou_threshold': float(iou_threshold), 'offset': int(offset)} - inds = ext_module.nms(*indata_list, **indata_dict) - else: - inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold, max_num) - dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) - if is_numpy: - dets = dets.cpu().numpy() - inds = inds.cpu().numpy() - return dets, inds - - -@deprecated_api_warning({'iou_thr': 'iou_threshold'}) -def soft_nms(boxes, scores, iou_threshold=0.3, sigma=0.5, min_score=1e-3, method='linear', offset=0): - """Dispatch to only CPU Soft NMS implementations. - - The input can be either a torch tensor or numpy array. - The returned type will always be the same as inputs. - - Arguments: - boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). - scores (torch.Tensor or np.ndarray): scores in shape (N, ). - iou_threshold (float): IoU threshold for NMS. - sigma (float): hyperparameter for gaussian method - min_score (float): score filter threshold - method (str): either 'linear' or 'gaussian' - offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). - - Returns: - tuple: kept dets(boxes and scores) and indice, which is always the \ - same data type as the input. - - Example: - >>> boxes = np.array([[4., 3., 5., 3.], - >>> [4., 3., 5., 4.], - >>> [3., 1., 3., 1.], - >>> [3., 1., 3., 1.], - >>> [3., 1., 3., 1.], - >>> [3., 1., 3., 1.]], dtype=np.float32) - >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) - >>> iou_threshold = 0.6 - >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) - >>> assert len(inds) == len(dets) == 5 - """ - - assert isinstance(boxes, (torch.Tensor, np.ndarray)) - assert isinstance(scores, (torch.Tensor, np.ndarray)) - is_numpy = False - if isinstance(boxes, np.ndarray): - is_numpy = True - boxes = torch.from_numpy(boxes) - if isinstance(scores, np.ndarray): - scores = torch.from_numpy(scores) - assert boxes.size(1) == 4 - assert boxes.size(0) == scores.size(0) - assert offset in (0, 1) - method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} - assert method in method_dict.keys() - - if torch.__version__ == 'parrots': - dets = boxes.new_empty((boxes.size(0), 5), device='cpu') - indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] - indata_dict = { - 'iou_threshold': float(iou_threshold), - 'sigma': float(sigma), - 'min_score': min_score, - 'method': method_dict[method], - 'offset': int(offset), - } - inds = ext_module.softnms(*indata_list, **indata_dict) - else: - dets, inds = SoftNMSop.apply( - boxes.cpu(), - scores.cpu(), - float(iou_threshold), - float(sigma), - float(min_score), - method_dict[method], - int(offset), - ) - - dets = dets[: inds.size(0)] - - if is_numpy: - dets = dets.cpu().numpy() - inds = inds.cpu().numpy() - return dets, inds - else: - return dets.to(device=boxes.device), inds.to(device=boxes.device) - - -def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): - """Performs non-maximum suppression in a batched fashion. - - Modified from https://github.com/pytorch/vision/blob - /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. - In order to perform NMS independently per class, we add an offset to all - the boxes. The offset is dependent only on the class idx, and is large - enough so that boxes from different classes do not overlap. - - Arguments: - boxes (torch.Tensor): boxes in shape (N, 4). - scores (torch.Tensor): scores in shape (N, ). - idxs (torch.Tensor): each index value correspond to a bbox cluster, - and NMS will not be applied between elements of different idxs, - shape (N, ). - nms_cfg (dict): specify nms type and other parameters like iou_thr. - Possible keys includes the following. - - - iou_thr (float): IoU threshold used for NMS. - - split_thr (float): threshold number of boxes. In some cases the - number of boxes is large (e.g., 200k). To avoid OOM during - training, the users could set `split_thr` to a small value. - If the number of boxes is greater than the threshold, it will - perform NMS on each group of boxes separately and sequentially. - Defaults to 10000. - class_agnostic (bool): if true, nms is class agnostic, - i.e. IoU thresholding happens over all boxes, - regardless of the predicted class. - - Returns: - tuple: kept dets and indice. - """ - nms_cfg_ = nms_cfg.copy() - class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) - if class_agnostic: - boxes_for_nms = boxes - else: - max_coordinate = boxes.max() - offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) - boxes_for_nms = boxes + offsets[:, None] - - nms_type = nms_cfg_.pop('type', 'nms') - nms_op = eval(nms_type) - - split_thr = nms_cfg_.pop('split_thr', 10000) - # Won't split to multiple nms nodes when exporting to onnx - if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): - dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) - boxes = boxes[keep] - # -1 indexing works abnormal in TensorRT - # This assumes `dets` has 5 dimensions where - # the last dimension is score. - # TODO: more elegant way to handle the dimension issue. - # Some type of nms would reweight the score, such as SoftNMS - scores = dets[:, 4] - else: - max_num = nms_cfg_.pop('max_num', -1) - total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) - # Some type of nms would reweight the score, such as SoftNMS - scores_after_nms = scores.new_zeros(scores.size()) - for id in torch.unique(idxs): - mask = (idxs == id).nonzero(as_tuple=False).view(-1) - dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) - total_mask[mask[keep]] = True - scores_after_nms[mask[keep]] = dets[:, -1] - keep = total_mask.nonzero(as_tuple=False).view(-1) - - scores, inds = scores_after_nms[keep].sort(descending=True) - keep = keep[inds] - boxes = boxes[keep] - - if max_num > 0: - keep = keep[:max_num] - boxes = boxes[:max_num] - scores = scores[:max_num] - - return torch.cat([boxes, scores[:, None]], -1), keep - - -def nms_match(dets, iou_threshold): - """Matched dets into different groups by NMS. - - NMS match is Similar to NMS but when a bbox is suppressed, nms match will - record the indice of suppressed bbox and form a group with the indice of - kept bbox. In each group, indice is sorted as score order. - - Arguments: - dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). - iou_thr (float): IoU thresh for NMS. - - Returns: - List[torch.Tensor | np.ndarray]: The outer list corresponds different - matched group, the inner Tensor corresponds the indices for a group - in score order. - """ - if dets.shape[0] == 0: - matched = [] - else: - assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' f'but get {dets.shape}' - if isinstance(dets, torch.Tensor): - dets_t = dets.detach().cpu() - else: - dets_t = torch.from_numpy(dets) - indata_list = [dets_t] - indata_dict = {'iou_threshold': float(iou_threshold)} - matched = ext_module.nms_match(*indata_list, **indata_dict) - if torch.__version__ == 'parrots': - matched = matched.tolist() - - if isinstance(dets, torch.Tensor): - return [dets.new_tensor(m, dtype=torch.long) for m in matched] - else: - return [np.array(m, dtype=np.int) for m in matched] - - -def nms_rotated(dets, scores, iou_threshold, labels=None): - """Performs non-maximum suppression (NMS) on the rotated boxes according to - their intersection-over-union (IoU). - - Rotated NMS iteratively removes lower scoring rotated boxes which have an - IoU greater than iou_threshold with another (higher scoring) rotated box. - - Args: - boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ - be in (x_ctr, y_ctr, width, height, angle_radian) format. - scores (Tensor): scores in shape (N, ). - iou_threshold (float): IoU thresh for NMS. - labels (Tensor): boxes' label in shape (N,). - - Returns: - tuple: kept dets(boxes and scores) and indice, which is always the \ - same data type as the input. - """ - if dets.shape[0] == 0: - return dets, None - multi_label = labels is not None - if multi_label: - dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) - else: - dets_wl = dets - _, order = scores.sort(0, descending=True) - dets_sorted = dets_wl.index_select(0, order) - - if torch.__version__ == 'parrots': - keep_inds = ext_module.nms_rotated( - dets_wl, scores, order, dets_sorted, iou_threshold=iou_threshold, multi_label=multi_label - ) - else: - keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, iou_threshold, multi_label) - dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), dim=1) - return dets, keep_inds diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py deleted file mode 100644 index 228769d37089..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numpy as np -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['pixel_group']) - - -def pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold): - """Group pixels into text instances, which is widely used text detection - methods. - - Arguments: - score (np.array or Tensor): The foreground score with size hxw. - mask (np.array or Tensor): The foreground mask with size hxw. - embedding (np.array or Tensor): The embedding with size hxwxc to - distinguish instances. - kernel_label (np.array or Tensor): The instance kernel index with - size hxw. - kernel_contour (np.array or Tensor): The kernel contour with size hxw. - kernel_region_num (int): The instance kernel region number. - distance_threshold (float): The embedding distance threshold between - kernel and pixel in one instance. - - Returns: - pixel_assignment (List[List[float]]): The instance coordinate list. - Each element consists of averaged confidence, pixel number, and - coordinates (x_i, y_i for all pixels) in order. - """ - assert isinstance(score, (torch.Tensor, np.ndarray)) - assert isinstance(mask, (torch.Tensor, np.ndarray)) - assert isinstance(embedding, (torch.Tensor, np.ndarray)) - assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) - assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) - assert isinstance(kernel_region_num, int) - assert isinstance(distance_threshold, float) - - if isinstance(score, np.ndarray): - score = torch.from_numpy(score) - if isinstance(mask, np.ndarray): - mask = torch.from_numpy(mask) - if isinstance(embedding, np.ndarray): - embedding = torch.from_numpy(embedding) - if isinstance(kernel_label, np.ndarray): - kernel_label = torch.from_numpy(kernel_label) - if isinstance(kernel_contour, np.ndarray): - kernel_contour = torch.from_numpy(kernel_contour) - - if torch.__version__ == 'parrots': - label = ext_module.pixel_group( - score, - mask, - embedding, - kernel_label, - kernel_contour, - kernel_region_num=kernel_region_num, - distance_threshold=distance_threshold, - ) - label = label.tolist() - label = label[0] - list_index = kernel_region_num - pixel_assignment = [] - for x in range(kernel_region_num): - pixel_assignment.append(np.array(label[list_index : list_index + int(label[x])], dtype=np.float)) - list_index = list_index + int(label[x]) - else: - pixel_assignment = ext_module.pixel_group( - score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold - ) - return pixel_assignment diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py deleted file mode 100644 index 204ff1c74e12..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py +++ /dev/null @@ -1,317 +0,0 @@ -# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa - -from os import path as osp - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn.modules.utils import _pair -from torch.onnx.operators import shape_as_tensor - - -def bilinear_grid_sample(im, grid, align_corners=False): - """Given an input and a flow-field grid, computes the output using input - values and pixel locations from grid. Supported only bilinear interpolation - method to sample the input pixels. - - Args: - im (torch.Tensor): Input feature map, shape (N, C, H, W) - grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) - align_corners {bool}: If set to True, the extrema (-1 and 1) are - considered as referring to the center points of the input’s - corner pixels. If set to False, they are instead considered as - referring to the corner points of the input’s corner pixels, - making the sampling more resolution agnostic. - Returns: - torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) - """ - n, c, h, w = im.shape - gn, gh, gw, _ = grid.shape - assert n == gn - - x = grid[:, :, :, 0] - y = grid[:, :, :, 1] - - if align_corners: - x = ((x + 1) / 2) * (w - 1) - y = ((y + 1) / 2) * (h - 1) - else: - x = ((x + 1) * w - 1) / 2 - y = ((y + 1) * h - 1) / 2 - - x = x.view(n, -1) - y = y.view(n, -1) - - x0 = torch.floor(x).long() - y0 = torch.floor(y).long() - x1 = x0 + 1 - y1 = y0 + 1 - - wa = ((x1 - x) * (y1 - y)).unsqueeze(1) - wb = ((x1 - x) * (y - y0)).unsqueeze(1) - wc = ((x - x0) * (y1 - y)).unsqueeze(1) - wd = ((x - x0) * (y - y0)).unsqueeze(1) - - # Apply default for grid_sample function zero padding - im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) - padded_h = h + 2 - padded_w = w + 2 - # save points positions after padding - x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 - - # Clip coordinates to padded image size - x0 = torch.where(x0 < 0, torch.tensor(0), x0) - x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) - x1 = torch.where(x1 < 0, torch.tensor(0), x1) - x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) - y0 = torch.where(y0 < 0, torch.tensor(0), y0) - y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) - y1 = torch.where(y1 < 0, torch.tensor(0), y1) - y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) - - im_padded = im_padded.view(n, c, -1) - - x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) - x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) - x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) - x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) - - Ia = torch.gather(im_padded, 2, x0_y0) - Ib = torch.gather(im_padded, 2, x0_y1) - Ic = torch.gather(im_padded, 2, x1_y0) - Id = torch.gather(im_padded, 2, x1_y1) - - return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) - - -def is_in_onnx_export_without_custom_ops(): - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_onnxruntime_op_path - - ort_custom_op_path = get_onnxruntime_op_path() - return torch.onnx.is_in_onnx_export() and not osp.exists(ort_custom_op_path) - - -def normalize(grid): - """Normalize input grid from [-1, 1] to [0, 1] - Args: - grid (Tensor): The grid to be normalize, range [-1, 1]. - Returns: - Tensor: Normalized grid, range [0, 1]. - """ - - return (grid + 1.0) / 2.0 - - -def denormalize(grid): - """Denormalize input grid from range [0, 1] to [-1, 1] - Args: - grid (Tensor): The grid to be denormalize, range [0, 1]. - Returns: - Tensor: Denormalized grid, range [-1, 1]. - """ - - return grid * 2.0 - 1.0 - - -def generate_grid(num_grid, size, device): - """Generate regular square grid of points in [0, 1] x [0, 1] coordinate - space. - - Args: - num_grid (int): The number of grids to sample, one for each region. - size (tuple(int, int)): The side size of the regular grid. - device (torch.device): Desired device of returned tensor. - - Returns: - (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that - contains coordinates for the regular grids. - """ - - affine_trans = torch.tensor([[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]], device=device) - grid = F.affine_grid(affine_trans, torch.Size((1, 1, *size)), align_corners=False) - grid = normalize(grid) - return grid.view(1, -1, 2).expand(num_grid, -1, -1) - - -def rel_roi_point_to_abs_img_point(rois, rel_roi_points): - """Convert roi based relative point coordinates to image based absolute - point coordinates. - - Args: - rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) - rel_roi_points (Tensor): Point coordinates inside RoI, relative to - RoI, location, range (0, 1), shape (N, P, 2) - Returns: - Tensor: Image based absolute point coordinates, shape (N, P, 2) - """ - - with torch.no_grad(): - assert rel_roi_points.size(0) == rois.size(0) - assert rois.dim() == 2 - assert rel_roi_points.dim() == 3 - assert rel_roi_points.size(2) == 2 - # remove batch idx - if rois.size(1) == 5: - rois = rois[:, 1:] - abs_img_points = rel_roi_points.clone() - # To avoid an error during exporting to onnx use independent - # variables instead inplace computation - xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) - ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) - xs += rois[:, None, 0] - ys += rois[:, None, 1] - abs_img_points = torch.stack([xs, ys], dim=2) - return abs_img_points - - -def get_shape_from_feature_map(x): - """Get spatial resolution of input feature map considering exporting to - onnx mode. - - Args: - x (torch.Tensor): Input tensor, shape (N, C, H, W) - Returns: - torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) - """ - if torch.onnx.is_in_onnx_export(): - img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(x.device).float() - else: - img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(x.device).float() - return img_shape - - -def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.0): - """Convert image based absolute point coordinates to image based relative - coordinates for sampling. - - Args: - abs_img_points (Tensor): Image based absolute point coordinates, - shape (N, P, 2) - img (tuple/Tensor): (height, width) of image or feature map. - spatial_scale (float): Scale points by this factor. Default: 1. - - Returns: - Tensor: Image based relative point coordinates for sampling, - shape (N, P, 2) - """ - - assert (isinstance(img, tuple) and len(img) == 2) or (isinstance(img, torch.Tensor) and len(img.shape) == 4) - - if isinstance(img, tuple): - h, w = img - scale = torch.tensor([w, h], dtype=torch.float, device=abs_img_points.device) - scale = scale.view(1, 1, 2) - else: - scale = get_shape_from_feature_map(img) - - return abs_img_points / scale * spatial_scale - - -def rel_roi_point_to_rel_img_point(rois, rel_roi_points, img, spatial_scale=1.0): - """Convert roi based relative point coordinates to image based absolute - point coordinates. - - Args: - rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) - rel_roi_points (Tensor): Point coordinates inside RoI, relative to - RoI, location, range (0, 1), shape (N, P, 2) - img (tuple/Tensor): (height, width) of image or feature map. - spatial_scale (float): Scale points by this factor. Default: 1. - - Returns: - Tensor: Image based relative point coordinates for sampling, - shape (N, P, 2) - """ - - abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) - rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, spatial_scale) - - return rel_img_point - - -def point_sample(input, points, align_corners=False, **kwargs): - """A wrapper around :func:`grid_sample` to support 3D point_coords tensors - Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to - lie inside ``[0, 1] x [0, 1]`` square. - - Args: - input (Tensor): Feature map, shape (N, C, H, W). - points (Tensor): Image based absolute point coordinates (normalized), - range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). - align_corners (bool): Whether align_corners. Default: False - - Returns: - Tensor: Features of `point` on `input`, shape (N, C, P) or - (N, C, Hgrid, Wgrid). - """ - - add_dim = False - if points.dim() == 3: - add_dim = True - points = points.unsqueeze(2) - if is_in_onnx_export_without_custom_ops(): - # If custom ops for onnx runtime not compiled use python - # implementation of grid_sample function to make onnx graph - # with supported nodes - output = bilinear_grid_sample(input, denormalize(points), align_corners=align_corners) - else: - output = F.grid_sample(input, denormalize(points), align_corners=align_corners, **kwargs) - if add_dim: - output = output.squeeze(3) - return output - - -class SimpleRoIAlign(nn.Module): - def __init__(self, output_size, spatial_scale, aligned=True): - """Simple RoI align in PointRend, faster than standard RoIAlign. - - Args: - output_size (tuple[int]): h, w - spatial_scale (float): scale the input boxes by this number - aligned (bool): if False, use the legacy implementation in - MMDetection, align_corners=True will be used in F.grid_sample. - If True, align the results more perfectly. - """ - - super(SimpleRoIAlign, self).__init__() - self.output_size = _pair(output_size) - self.spatial_scale = float(spatial_scale) - # to be consistent with other RoI ops - self.use_torchvision = False - self.aligned = aligned - - def forward(self, features, rois): - num_imgs = features.size(0) - num_rois = rois.size(0) - rel_roi_points = generate_grid(num_rois, self.output_size, device=rois.device) - - if torch.onnx.is_in_onnx_export(): - rel_img_points = rel_roi_point_to_rel_img_point(rois, rel_roi_points, features, self.spatial_scale) - rel_img_points = rel_img_points.reshape(num_imgs, -1, *rel_img_points.shape[1:]) - point_feats = point_sample(features, rel_img_points, align_corners=not self.aligned) - point_feats = point_feats.transpose(1, 2) - else: - point_feats = [] - for batch_ind in range(num_imgs): - # unravel batch dim - feat = features[batch_ind].unsqueeze(0) - inds = rois[:, 0].long() == batch_ind - if inds.any(): - rel_img_points = rel_roi_point_to_rel_img_point( - rois[inds], rel_roi_points[inds], feat, self.spatial_scale - ).unsqueeze(0) - point_feat = point_sample(feat, rel_img_points, align_corners=not self.aligned) - point_feat = point_feat.squeeze(0).transpose(0, 1) - point_feats.append(point_feat) - - point_feats = torch.cat(point_feats, dim=0) - - channels = features.size(1) - roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) - - return roi_feats - - def __repr__(self): - format_str = self.__class__.__name__ - format_str += '(output_size={}, spatial_scale={}'.format(self.output_size, self.spatial_scale) - return format_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py deleted file mode 100644 index bf01e1d7746e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py +++ /dev/null @@ -1,111 +0,0 @@ -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', 'points_in_boxes_all_forward'] -) - - -def points_in_boxes_part(points, boxes): - """Find the box in which each point is (CUDA). - - Args: - points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate - boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in - LiDAR/DEPTH coordinate, (x, y, z) is the bottom center - - Returns: - box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 - """ - assert points.shape[0] == boxes.shape[0], ( - 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' - ) - assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' - batch_size, num_points, _ = points.shape - - box_idxs_of_pts = points.new_zeros((batch_size, num_points), dtype=torch.int).fill_(-1) - - # If manually put the tensor 'points' or 'boxes' on a device - # which is not the current device, some temporary variables - # will be created on the current device in the cuda op, - # and the output will be incorrect. - # Therefore, we force the current device to be the same - # as the device of the tensors if it was not. - # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 - # for the incorrect output before the fix. - points_device = points.get_device() - assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' - if torch.cuda.current_device() != points_device: - torch.cuda.set_device(points_device) - - ext_module.points_in_boxes_part_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) - - return box_idxs_of_pts - - -def points_in_boxes_cpu(points, boxes): - """Find all boxes in which each point is (CPU). The CPU version of - :meth:`points_in_boxes_all`. - - Args: - points (torch.Tensor): [B, M, 3], [x, y, z] in - LiDAR/DEPTH coordinate - boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], - (x, y, z) is the bottom center. - - Returns: - box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. - """ - assert points.shape[0] == boxes.shape[0], ( - 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' - ) - assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' - batch_size, num_points, _ = points.shape - num_boxes = boxes.shape[1] - - point_indices = points.new_zeros((batch_size, num_boxes, num_points), dtype=torch.int) - for b in range(batch_size): - ext_module.points_in_boxes_cpu_forward( - boxes[b].float().contiguous(), points[b].float().contiguous(), point_indices[b] - ) - point_indices = point_indices.transpose(1, 2) - - return point_indices - - -def points_in_boxes_all(points, boxes): - """Find all boxes in which each point is (CUDA). - - Args: - points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate - boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], - (x, y, z) is the bottom center. - - Returns: - box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. - """ - assert boxes.shape[0] == points.shape[0], ( - 'Points and boxes should have the same batch size, ' f'but got {boxes.shape[0]} and {boxes.shape[0]}' - ) - assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' - assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' - batch_size, num_points, _ = points.shape - num_boxes = boxes.shape[1] - - box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), dtype=torch.int).fill_(0) - - # Same reason as line 25-32 - points_device = points.get_device() - assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' - if torch.cuda.current_device() != points_device: - torch.cuda.set_device(points_device) - - ext_module.points_in_boxes_all_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) - - return box_idxs_of_pts diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py deleted file mode 100644 index 5e6d0078813f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py +++ /dev/null @@ -1,169 +0,0 @@ -from typing import List - -import torch -from torch import nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import force_fp32 - -from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist - - -def calc_square_dist(point_feat_a, point_feat_b, norm=True): - """Calculating square distance between a and b. - - Args: - point_feat_a (Tensor): (B, N, C) Feature vector of each point. - point_feat_b (Tensor): (B, M, C) Feature vector of each point. - norm (Bool, optional): Whether to normalize the distance. - Default: True. - - Returns: - Tensor: (B, N, M) Distance between each pair points. - """ - num_channel = point_feat_a.shape[-1] - # [bs, n, 1] - a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) - # [bs, 1, m] - b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) - - corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) - - dist = a_square + b_square - 2 * corr_matrix - if norm: - dist = torch.sqrt(dist) / num_channel - return dist - - -def get_sampler_cls(sampler_type): - """Get the type and mode of points sampler. - - Args: - sampler_type (str): The type of points sampler. - The valid value are "D-FPS", "F-FPS", or "FS". - - Returns: - class: Points sampler type. - """ - sampler_mappings = { - 'D-FPS': DFPSSampler, - 'F-FPS': FFPSSampler, - 'FS': FSSampler, - } - try: - return sampler_mappings[sampler_type] - except KeyError: - raise KeyError( - f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ - {sampler_type}' - ) - - -class PointsSampler(nn.Module): - """Points sampling. - - Args: - num_point (list[int]): Number of sample points. - fps_mod_list (list[str], optional): Type of FPS method, valid mod - ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. - F-FPS: using feature distances for FPS. - D-FPS: using Euclidean distances of points for FPS. - FS: using F-FPS and D-FPS simultaneously. - fps_sample_range_list (list[int], optional): - Range of points to apply FPS. Default: [-1]. - """ - - def __init__( - self, num_point: List[int], fps_mod_list: List[str] = ['D-FPS'], fps_sample_range_list: List[int] = [-1] - ): - super().__init__() - # FPS would be applied to different fps_mod in the list, - # so the length of the num_point should be equal to - # fps_mod_list and fps_sample_range_list. - assert len(num_point) == len(fps_mod_list) == len(fps_sample_range_list) - self.num_point = num_point - self.fps_sample_range_list = fps_sample_range_list - self.samplers = nn.ModuleList() - for fps_mod in fps_mod_list: - self.samplers.append(get_sampler_cls(fps_mod)()) - self.fp16_enabled = False - - @force_fp32() - def forward(self, points_xyz, features): - """ - Args: - points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. - features (Tensor): (B, C, N) Descriptors of the features. - - Returns: - Tensor: (B, npoint, sample_num) Indices of sampled points. - """ - indices = [] - last_fps_end_index = 0 - - for fps_sample_range, sampler, npoint in zip(self.fps_sample_range_list, self.samplers, self.num_point): - assert fps_sample_range < points_xyz.shape[1] - - if fps_sample_range == -1: - sample_points_xyz = points_xyz[:, last_fps_end_index:] - if features is not None: - sample_features = features[:, :, last_fps_end_index:] - else: - sample_features = None - else: - sample_points_xyz = points_xyz[:, last_fps_end_index:fps_sample_range] - if features is not None: - sample_features = features[:, :, last_fps_end_index:fps_sample_range] - else: - sample_features = None - - fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, npoint) - - indices.append(fps_idx + last_fps_end_index) - last_fps_end_index += fps_sample_range - indices = torch.cat(indices, dim=1) - - return indices - - -class DFPSSampler(nn.Module): - """Using Euclidean distances of points for FPS.""" - - def __init__(self): - super().__init__() - - def forward(self, points, features, npoint): - """Sampling points with D-FPS.""" - fps_idx = furthest_point_sample(points.contiguous(), npoint) - return fps_idx - - -class FFPSSampler(nn.Module): - """Using feature distances for FPS.""" - - def __init__(self): - super().__init__() - - def forward(self, points, features, npoint): - """Sampling points with F-FPS.""" - assert features is not None, 'feature input to FFPS_Sampler should not be None' - features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) - features_dist = calc_square_dist(features_for_fps, features_for_fps, norm=False) - fps_idx = furthest_point_sample_with_dist(features_dist, npoint) - return fps_idx - - -class FSSampler(nn.Module): - """Using F-FPS and D-FPS simultaneously.""" - - def __init__(self): - super().__init__() - - def forward(self, points, features, npoint): - """Sampling points with FS_Sampling.""" - assert features is not None, 'feature input to FS_Sampler should not be None' - ffps_sampler = FFPSSampler() - dfps_sampler = DFPSSampler() - fps_idx_ffps = ffps_sampler(points, features, npoint) - fps_idx_dfps = dfps_sampler(points, features, npoint) - fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) - return fps_idx diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py deleted file mode 100644 index e49546cb9059..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py +++ /dev/null @@ -1,85 +0,0 @@ -# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa -from torch import nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['psamask_forward', 'psamask_backward']) - - -class PSAMaskFunction(Function): - @staticmethod - def symbolic(g, input, psa_type, mask_size): - return g.op('mmcv::MMCVPSAMask', input, psa_type_i=psa_type, mask_size_i=mask_size) - - @staticmethod - def forward(ctx, input, psa_type, mask_size): - ctx.psa_type = psa_type - ctx.mask_size = _pair(mask_size) - ctx.save_for_backward(input) - - h_mask, w_mask = ctx.mask_size - batch_size, channels, h_feature, w_feature = input.size() - assert channels == h_mask * w_mask - output = input.new_zeros((batch_size, h_feature * w_feature, h_feature, w_feature)) - - ext_module.psamask_forward( - input, - output, - psa_type=psa_type, - num_=batch_size, - h_feature=h_feature, - w_feature=w_feature, - h_mask=h_mask, - w_mask=w_mask, - half_h_mask=(h_mask - 1) // 2, - half_w_mask=(w_mask - 1) // 2, - ) - return output - - @staticmethod - def backward(ctx, grad_output): - input = ctx.saved_tensors[0] - psa_type = ctx.psa_type - h_mask, w_mask = ctx.mask_size - batch_size, channels, h_feature, w_feature = input.size() - grad_input = grad_output.new_zeros((batch_size, channels, h_feature, w_feature)) - ext_module.psamask_backward( - grad_output, - grad_input, - psa_type=psa_type, - num_=batch_size, - h_feature=h_feature, - w_feature=w_feature, - h_mask=h_mask, - w_mask=w_mask, - half_h_mask=(h_mask - 1) // 2, - half_w_mask=(w_mask - 1) // 2, - ) - return grad_input, None, None, None - - -psa_mask = PSAMaskFunction.apply - - -class PSAMask(nn.Module): - def __init__(self, psa_type, mask_size=None): - super(PSAMask, self).__init__() - assert psa_type in ['collect', 'distribute'] - if psa_type == 'collect': - psa_type_enum = 0 - else: - psa_type_enum = 1 - self.psa_type_enum = psa_type_enum - self.mask_size = mask_size - self.psa_type = psa_type - - def forward(self, input): - return psa_mask(input, self.psa_type_enum, self.mask_size) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(psa_type={self.psa_type}, ' - s += f'mask_size={self.mask_size})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py deleted file mode 100644 index 15be8de4db83..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import deprecated_api_warning, ext_loader - -ext_module = ext_loader.load_ext('_ext', ['roi_align_forward', 'roi_align_backward']) - - -class RoIAlignFunction(Function): - @staticmethod - def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, pool_mode, aligned): - from ..onnx import is_custom_op_loaded - - has_custom_op = is_custom_op_loaded() - if has_custom_op: - return g.op( - 'mmcv::MMCVRoiAlign', - input, - rois, - output_height_i=output_size[0], - output_width_i=output_size[1], - spatial_scale_f=spatial_scale, - sampling_ratio_i=sampling_ratio, - mode_s=pool_mode, - aligned_i=aligned, - ) - else: - from torch.onnx import TensorProtoDataType - from torch.onnx.symbolic_helper import _slice_helper - from torch.onnx.symbolic_opset9 import squeeze, sub - - # batch_indices = rois[:, 0].long() - batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1]) - batch_indices = squeeze(g, batch_indices, 1) - batch_indices = g.op('Cast', batch_indices, to_i=TensorProtoDataType.INT64) - # rois = rois[:, 1:] - rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) - if aligned: - # rois -= 0.5/spatial_scale - aligned_offset = g.op('Constant', value_t=torch.tensor([0.5 / spatial_scale], dtype=torch.float32)) - rois = sub(g, rois, aligned_offset) - # roi align - return g.op( - 'RoiAlign', - input, - rois, - batch_indices, - output_height_i=output_size[0], - output_width_i=output_size[1], - spatial_scale_f=spatial_scale, - sampling_ratio_i=max(0, sampling_ratio), - mode_s=pool_mode, - ) - - @staticmethod - def forward(ctx, input, rois, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True): - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.sampling_ratio = sampling_ratio - assert pool_mode in ('max', 'avg') - ctx.pool_mode = 0 if pool_mode == 'max' else 1 - ctx.aligned = aligned - ctx.input_shape = input.size() - - assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) - output = input.new_zeros(output_shape) - if ctx.pool_mode == 0: - argmax_y = input.new_zeros(output_shape) - argmax_x = input.new_zeros(output_shape) - else: - argmax_y = input.new_zeros(0) - argmax_x = input.new_zeros(0) - - ext_module.roi_align_forward( - input, - rois, - output, - argmax_y, - argmax_x, - aligned_height=ctx.output_size[0], - aligned_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - pool_mode=ctx.pool_mode, - aligned=ctx.aligned, - ) - - ctx.save_for_backward(rois, argmax_y, argmax_x) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - rois, argmax_y, argmax_x = ctx.saved_tensors - grad_input = grad_output.new_zeros(ctx.input_shape) - # complex head architecture may cause grad_output uncontiguous. - grad_output = grad_output.contiguous() - ext_module.roi_align_backward( - grad_output, - rois, - argmax_y, - argmax_x, - grad_input, - aligned_height=ctx.output_size[0], - aligned_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - pool_mode=ctx.pool_mode, - aligned=ctx.aligned, - ) - return grad_input, None, None, None, None, None, None - - -roi_align = RoIAlignFunction.apply - - -class RoIAlign(nn.Module): - """RoI align pooling layer. - - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each - output sample. 0 to take samples densely for current models. - pool_mode (str, 'avg' or 'max'): pooling mode in each bin. - aligned (bool): if False, use the legacy implementation in - MMDetection. If True, align the results more perfectly. - use_torchvision (bool): whether to use roi_align from torchvision. - - Note: - The implementation of RoIAlign when aligned=True is modified from - https://github.com/facebookresearch/detectron2/ - - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel - indices (in our pixel model) are computed by floor(c - 0.5) and - ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete - indices [0] and [1] (which are sampled from the underlying signal - at continuous coordinates 0.5 and 1.5). But the original roi_align - (aligned=False) does not subtract the 0.5 when computing - neighboring pixel indices and therefore it uses pixels with a - slightly incorrect alignment (relative to our pixel model) when - performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; - - The difference does not make a difference to the model's - performance if ROIAlign is used together with conv layers. - """ - - @deprecated_api_warning({'out_size': 'output_size', 'sample_num': 'sampling_ratio'}, cls_name='RoIAlign') - def __init__( - self, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True, use_torchvision=False - ): - super(RoIAlign, self).__init__() - - self.output_size = _pair(output_size) - self.spatial_scale = float(spatial_scale) - self.sampling_ratio = int(sampling_ratio) - self.pool_mode = pool_mode - self.aligned = aligned - self.use_torchvision = use_torchvision - - def forward(self, input, rois): - """ - Args: - input: NCHW images - rois: Bx5 boxes. First column is the index into N.\ - The other 4 columns are xyxy. - """ - if self.use_torchvision: - from torchvision.ops import roi_align as tv_roi_align - - if 'aligned' in tv_roi_align.__code__.co_varnames: - return tv_roi_align( - input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned - ) - else: - if self.aligned: - rois -= rois.new_tensor([0.0] + [0.5 / self.spatial_scale] * 4) - return tv_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) - else: - return roi_align( - input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.pool_mode, self.aligned - ) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(output_size={self.output_size}, ' - s += f'spatial_scale={self.spatial_scale}, ' - s += f'sampling_ratio={self.sampling_ratio}, ' - s += f'pool_mode={self.pool_mode}, ' - s += f'aligned={self.aligned}, ' - s += f'use_torchvision={self.use_torchvision})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py deleted file mode 100644 index 07108d2bc888..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch.nn as nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) - - -class RoIAlignRotatedFunction(Function): - @staticmethod - def symbolic(g, features, rois, out_size, spatial_scale, sample_num, aligned, clockwise): - if isinstance(out_size, int): - out_h = out_size - out_w = out_size - elif isinstance(out_size, tuple): - assert len(out_size) == 2 - assert isinstance(out_size[0], int) - assert isinstance(out_size[1], int) - out_h, out_w = out_size - else: - raise TypeError('"out_size" must be an integer or tuple of integers') - return g.op( - 'mmcv::MMCVRoIAlignRotated', - features, - rois, - output_height_i=out_h, - output_width_i=out_h, - spatial_scale_f=spatial_scale, - sampling_ratio_i=sample_num, - aligned_i=aligned, - clockwise_i=clockwise, - ) - - @staticmethod - def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): - if isinstance(out_size, int): - out_h = out_size - out_w = out_size - elif isinstance(out_size, tuple): - assert len(out_size) == 2 - assert isinstance(out_size[0], int) - assert isinstance(out_size[1], int) - out_h, out_w = out_size - else: - raise TypeError('"out_size" must be an integer or tuple of integers') - ctx.spatial_scale = spatial_scale - ctx.sample_num = sample_num - ctx.aligned = aligned - ctx.clockwise = clockwise - ctx.save_for_backward(rois) - ctx.feature_size = features.size() - - batch_size, num_channels, data_height, data_width = features.size() - num_rois = rois.size(0) - - output = features.new_zeros(num_rois, num_channels, out_h, out_w) - ext_module.roi_align_rotated_forward( - features, - rois, - output, - pooled_height=out_h, - pooled_width=out_w, - spatial_scale=spatial_scale, - sample_num=sample_num, - aligned=aligned, - clockwise=clockwise, - ) - return output - - @staticmethod - def backward(ctx, grad_output): - feature_size = ctx.feature_size - spatial_scale = ctx.spatial_scale - aligned = ctx.aligned - clockwise = ctx.clockwise - sample_num = ctx.sample_num - rois = ctx.saved_tensors[0] - assert feature_size is not None - batch_size, num_channels, data_height, data_width = feature_size - - out_w = grad_output.size(3) - out_h = grad_output.size(2) - - grad_input = grad_rois = None - - if ctx.needs_input_grad[0]: - grad_input = rois.new_zeros(batch_size, num_channels, data_height, data_width) - ext_module.roi_align_rotated_backward( - grad_output.contiguous(), - rois, - grad_input, - pooled_height=out_h, - pooled_width=out_w, - spatial_scale=spatial_scale, - sample_num=sample_num, - aligned=aligned, - clockwise=clockwise, - ) - return grad_input, grad_rois, None, None, None, None, None - - -roi_align_rotated = RoIAlignRotatedFunction.apply - - -class RoIAlignRotated(nn.Module): - """RoI align pooling layer for rotated proposals. - - It accepts a feature map of shape (N, C, H, W) and rois with shape - (n, 6) with each roi decoded as (batch_index, center_x, center_y, - w, h, angle). The angle is in radian. - - Args: - out_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sample_num (int): number of inputs samples to take for each - output sample. 0 to take samples densely for current models. - aligned (bool): if False, use the legacy implementation in - MMDetection. If True, align the results more perfectly. - Default: True. - clockwise (bool): If True, the angle in each proposal follows a - clockwise fashion in image space, otherwise, the angle is - counterclockwise. Default: False. - - Note: - The implementation of RoIAlign when aligned=True is modified from - https://github.com/facebookresearch/detectron2/ - - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel - indices (in our pixel model) are computed by floor(c - 0.5) and - ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete - indices [0] and [1] (which are sampled from the underlying signal - at continuous coordinates 0.5 and 1.5). But the original roi_align - (aligned=False) does not subtract the 0.5 when computing - neighboring pixel indices and therefore it uses pixels with a - slightly incorrect alignment (relative to our pixel model) when - performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; - - The difference does not make a difference to the model's - performance if ROIAlign is used together with conv layers. - """ - - def __init__(self, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): - super(RoIAlignRotated, self).__init__() - - self.out_size = out_size - self.spatial_scale = float(spatial_scale) - self.sample_num = int(sample_num) - self.aligned = aligned - self.clockwise = clockwise - - def forward(self, features, rois): - return RoIAlignRotatedFunction.apply( - features, rois, self.out_size, self.spatial_scale, self.sample_num, self.aligned, self.clockwise - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py deleted file mode 100644 index 04e3d55a3a5c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['roi_pool_forward', 'roi_pool_backward']) - - -class RoIPoolFunction(Function): - @staticmethod - def symbolic(g, input, rois, output_size, spatial_scale): - return g.op('MaxRoiPool', input, rois, pooled_shape_i=output_size, spatial_scale_f=spatial_scale) - - @staticmethod - def forward(ctx, input, rois, output_size, spatial_scale=1.0): - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.input_shape = input.size() - - assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' - - output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) - output = input.new_zeros(output_shape) - argmax = input.new_zeros(output_shape, dtype=torch.int) - - ext_module.roi_pool_forward( - input, - rois, - output, - argmax, - pooled_height=ctx.output_size[0], - pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - ) - - ctx.save_for_backward(rois, argmax) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - rois, argmax = ctx.saved_tensors - grad_input = grad_output.new_zeros(ctx.input_shape) - - ext_module.roi_pool_backward( - grad_output, - rois, - argmax, - grad_input, - pooled_height=ctx.output_size[0], - pooled_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - ) - - return grad_input, None, None, None - - -roi_pool = RoIPoolFunction.apply - - -class RoIPool(nn.Module): - def __init__(self, output_size, spatial_scale=1.0): - super(RoIPool, self).__init__() - - self.output_size = _pair(output_size) - self.spatial_scale = float(spatial_scale) - - def forward(self, input, rois): - return roi_pool(input, rois, self.output_size, self.spatial_scale) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(output_size={self.output_size}, ' - s += f'spatial_scale={self.spatial_scale})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py deleted file mode 100644 index befb3a757324..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import nn as nn -from torch.autograd import Function - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) - - -class RoIAwarePool3d(nn.Module): - """Encode the geometry-specific features of each 3D proposal. - - Please refer to `PartA2 `_ for more - details. - - Args: - out_size (int or tuple): The size of output features. n or - [n1, n2, n3]. - max_pts_per_voxel (int, optional): The maximum number of points per - voxel. Default: 128. - mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. - Default: 'max'. - """ - - def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): - super().__init__() - - self.out_size = out_size - self.max_pts_per_voxel = max_pts_per_voxel - assert mode in ['max', 'avg'] - pool_mapping = {'max': 0, 'avg': 1} - self.mode = pool_mapping[mode] - - def forward(self, rois, pts, pts_feature): - """ - Args: - rois (torch.Tensor): [N, 7], in LiDAR coordinate, - (x, y, z) is the bottom center of rois. - pts (torch.Tensor): [npoints, 3], coordinates of input points. - pts_feature (torch.Tensor): [npoints, C], features of input points. - - Returns: - pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] - """ - - return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, self.out_size, self.max_pts_per_voxel, self.mode) - - -class RoIAwarePool3dFunction(Function): - @staticmethod - def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, mode): - """ - Args: - rois (torch.Tensor): [N, 7], in LiDAR coordinate, - (x, y, z) is the bottom center of rois. - pts (torch.Tensor): [npoints, 3], coordinates of input points. - pts_feature (torch.Tensor): [npoints, C], features of input points. - out_size (int or tuple): The size of output features. n or - [n1, n2, n3]. - max_pts_per_voxel (int): The maximum number of points per voxel. - Default: 128. - mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average - pool). - - Returns: - pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output - pooled features. - """ - - if isinstance(out_size, int): - out_x = out_y = out_z = out_size - else: - assert len(out_size) == 3 - assert mmcv.is_tuple_of(out_size, int) - out_x, out_y, out_z = out_size - - num_rois = rois.shape[0] - num_channels = pts_feature.shape[-1] - num_pts = pts.shape[0] - - pooled_features = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels)) - argmax = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) - pts_idx_of_voxels = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, max_pts_per_voxel), dtype=torch.int) - - ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, mode) - - ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, num_pts, num_channels) - return pooled_features - - @staticmethod - def backward(ctx, grad_out): - ret = ctx.roiaware_pool3d_for_backward - pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret - - grad_in = grad_out.new_zeros((num_pts, num_channels)) - ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out.contiguous(), grad_in, mode) - - return None, None, grad_in, None, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py deleted file mode 100644 index c24c4844bd24..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py +++ /dev/null @@ -1,75 +0,0 @@ -from torch import nn as nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) - - -class RoIPointPool3d(nn.Module): - """Encode the geometry-specific features of each 3D proposal. - - Please refer to `Paper of PartA2 `_ - for more details. - - Args: - num_sampled_points (int, optional): Number of samples in each roi. - Default: 512. - """ - - def __init__(self, num_sampled_points=512): - super().__init__() - self.num_sampled_points = num_sampled_points - - def forward(self, points, point_features, boxes3d): - """ - Args: - points (torch.Tensor): Input points whose shape is (B, N, C). - point_features (torch.Tensor): Features of input points whose shape - is (B, N, C). - boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). - - Returns: - pooled_features (torch.Tensor): The output pooled features whose - shape is (B, M, 512, 3 + C). - pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). - """ - return RoIPointPool3dFunction.apply(points, point_features, boxes3d, self.num_sampled_points) - - -class RoIPointPool3dFunction(Function): - @staticmethod - def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): - """ - Args: - points (torch.Tensor): Input points whose shape is (B, N, C). - point_features (torch.Tensor): Features of input points whose shape - is (B, N, C). - boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). - num_sampled_points (int, optional): The num of sampled points. - Default: 512. - - Returns: - pooled_features (torch.Tensor): The output pooled features whose - shape is (B, M, 512, 3 + C). - pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). - """ - assert len(points.shape) == 3 and points.shape[2] == 3 - batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[1], point_features.shape[2] - pooled_boxes3d = boxes3d.view(batch_size, -1, 7) - pooled_features = point_features.new_zeros((batch_size, boxes_num, num_sampled_points, 3 + feature_len)) - pooled_empty_flag = point_features.new_zeros((batch_size, boxes_num)).int() - - ext_module.roipoint_pool3d_forward( - points.contiguous(), - pooled_boxes3d.contiguous(), - point_features.contiguous(), - pooled_features, - pooled_empty_flag, - ) - - return pooled_features, pooled_empty_flag - - @staticmethod - def backward(ctx, grad_out): - raise NotImplementedError diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py deleted file mode 100644 index c72bbb8a502d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.deform_conv import deform_conv2d -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version - - -@CONV_LAYERS.register_module(name='SAC') -class SAConv2d(ConvAWS2d): - """SAC (Switchable Atrous Convolution) - - This is an implementation of SAC in DetectoRS - (https://arxiv.org/pdf/2006.02334.pdf). - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the convolving kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 - padding (int or tuple, optional): Zero-padding added to both sides of - the input. Default: 0 - padding_mode (string, optional): ``'zeros'``, ``'reflect'``, - ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` - dilation (int or tuple, optional): Spacing between kernel elements. - Default: 1 - groups (int, optional): Number of blocked connections from input - channels to output channels. Default: 1 - bias (bool, optional): If ``True``, adds a learnable bias to the - output. Default: ``True`` - use_deform: If ``True``, replace convolution with deformable - convolution. Default: ``False``. - """ - - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - use_deform=False, - ): - super().__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - bias=bias, - ) - self.use_deform = use_deform - self.switch = nn.Conv2d(self.in_channels, 1, kernel_size=1, stride=stride, bias=True) - self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) - self.pre_context = nn.Conv2d(self.in_channels, self.in_channels, kernel_size=1, bias=True) - self.post_context = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=1, bias=True) - if self.use_deform: - self.offset_s = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) - self.offset_l = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) - self.init_weights() - - def init_weights(self): - constant_init(self.switch, 0, bias=1) - self.weight_diff.data.zero_() - constant_init(self.pre_context, 0) - constant_init(self.post_context, 0) - if self.use_deform: - constant_init(self.offset_s, 0) - constant_init(self.offset_l, 0) - - def forward(self, x): - # pre-context - avg_x = F.adaptive_avg_pool2d(x, output_size=1) - avg_x = self.pre_context(avg_x) - avg_x = avg_x.expand_as(x) - x = x + avg_x - # switch - avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') - avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) - switch = self.switch(avg_x) - # sac - weight = self._get_weight(self.weight) - zero_bias = torch.zeros(self.out_channels, device=weight.device, dtype=weight.dtype) - - if self.use_deform: - offset = self.offset_s(avg_x) - out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) - else: - if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): - out_s = super().conv2d_forward(x, weight) - elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): - # bias is a required argument of _conv_forward in torch 1.8.0 - out_s = super()._conv_forward(x, weight, zero_bias) - else: - out_s = super()._conv_forward(x, weight) - ori_p = self.padding - ori_d = self.dilation - self.padding = tuple(3 * p for p in self.padding) - self.dilation = tuple(3 * d for d in self.dilation) - weight = weight + self.weight_diff - if self.use_deform: - offset = self.offset_l(avg_x) - out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) - else: - if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): - out_l = super().conv2d_forward(x, weight) - elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): - # bias is a required argument of _conv_forward in torch 1.8.0 - out_l = super()._conv_forward(x, weight, zero_bias) - else: - out_l = super()._conv_forward(x, weight) - - out = switch * out_s + (1 - switch) * out_l - self.padding = ori_p - self.dilation = ori_d - # post-context - avg_x = F.adaptive_avg_pool2d(out, output_size=1) - avg_x = self.post_context(avg_x) - avg_x = avg_x.expand_as(out) - out = out + avg_x - return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py deleted file mode 100644 index 6d5866dcf2c6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) - - -class _DynamicScatter(Function): - @staticmethod - def forward(ctx, feats, coors, reduce_type='max'): - """convert kitti points(N, >=3) to voxels. - - Args: - feats (torch.Tensor): [N, C]. Points features to be reduced - into voxels. - coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates - (specifically multi-dim voxel index) of each points. - reduce_type (str, optional): Reduce op. support 'max', 'sum' and - 'mean'. Default: 'max'. - - Returns: - voxel_feats (torch.Tensor): [M, C]. Reduced features, input - features that shares the same voxel coordinates are reduced to - one row. - voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. - """ - results = ext_module.dynamic_point_to_voxel_forward(feats, coors, reduce_type) - (voxel_feats, voxel_coors, point2voxel_map, voxel_points_count) = results - ctx.reduce_type = reduce_type - ctx.save_for_backward(feats, voxel_feats, point2voxel_map, voxel_points_count) - ctx.mark_non_differentiable(voxel_coors) - return voxel_feats, voxel_coors - - @staticmethod - def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): - (feats, voxel_feats, point2voxel_map, voxel_points_count) = ctx.saved_tensors - grad_feats = torch.zeros_like(feats) - # TODO: whether to use index put or use cuda_backward - # To use index put, need point to voxel index - ext_module.dynamic_point_to_voxel_backward( - grad_feats, - grad_voxel_feats.contiguous(), - feats, - voxel_feats, - point2voxel_map, - voxel_points_count, - ctx.reduce_type, - ) - return grad_feats, None, None - - -dynamic_scatter = _DynamicScatter.apply - - -class DynamicScatter(nn.Module): - """Scatters points into voxels, used in the voxel encoder with dynamic - voxelization. - - Note: - The CPU and GPU implementation get the same output, but have numerical - difference after summation and division (e.g., 5e-7). - - Args: - voxel_size (list): list [x, y, z] size of three dimension. - point_cloud_range (list): The coordinate range of points, [x_min, - y_min, z_min, x_max, y_max, z_max]. - average_points (bool): whether to use avg pooling to scatter points - into voxel. - """ - - def __init__(self, voxel_size, point_cloud_range, average_points: bool): - super().__init__() - - self.voxel_size = voxel_size - self.point_cloud_range = point_cloud_range - self.average_points = average_points - - def forward_single(self, points, coors): - """Scatters points into voxels. - - Args: - points (torch.Tensor): Points to be reduced into voxels. - coors (torch.Tensor): Corresponding voxel coordinates (specifically - multi-dim voxel index) of each points. - - Returns: - voxel_feats (torch.Tensor): Reduced features, input features that - shares the same voxel coordinates are reduced to one row. - voxel_coors (torch.Tensor): Voxel coordinates. - """ - reduce = 'mean' if self.average_points else 'max' - return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) - - def forward(self, points, coors): - """Scatters points/features into voxels. - - Args: - points (torch.Tensor): Points to be reduced into voxels. - coors (torch.Tensor): Corresponding voxel coordinates (specifically - multi-dim voxel index) of each points. - - Returns: - voxel_feats (torch.Tensor): Reduced features, input features that - shares the same voxel coordinates are reduced to one row. - voxel_coors (torch.Tensor): Voxel coordinates. - """ - if coors.size(-1) == 3: - return self.forward_single(points, coors) - else: - batch_size = coors[-1, 0] + 1 - voxels, voxel_coors = [], [] - for i in range(batch_size): - inds = torch.where(coors[:, 0] == i) - voxel, voxel_coor = self.forward_single(points[inds], coors[inds][:, 1:]) - coor_pad = nn.functional.pad(voxel_coor, (1, 0), mode='constant', value=i) - voxel_coors.append(coor_pad) - voxels.append(voxel) - features = torch.cat(voxels, dim=0) - feature_coors = torch.cat(voxel_coors, dim=0) - - return features, feature_coors - - def __repr__(self): - s = self.__class__.__name__ + '(' - s += 'voxel_size=' + str(self.voxel_size) - s += ', point_cloud_range=' + str(self.point_cloud_range) - s += ', average_points=' + str(self.average_points) - s += ')' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py deleted file mode 100644 index 28a609585caf..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.distributed as dist -import torch.nn.functional as F -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.module import Module -from torch.nn.parameter import Parameter - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NORM_LAYERS -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', - [ - 'sync_bn_forward_mean', - 'sync_bn_forward_var', - 'sync_bn_forward_output', - 'sync_bn_backward_param', - 'sync_bn_backward_data', - ], -) - - -class SyncBatchNormFunction(Function): - @staticmethod - def symbolic(g, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): - return g.op( - 'mmcv::MMCVSyncBatchNorm', - input, - running_mean, - running_var, - weight, - bias, - momentum_f=momentum, - eps_f=eps, - group_i=group, - group_size_i=group_size, - stats_mode=stats_mode, - ) - - @staticmethod - def forward(self, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): - self.momentum = momentum - self.eps = eps - self.group = group - self.group_size = group_size - self.stats_mode = stats_mode - - assert isinstance( - input, (torch.HalfTensor, torch.FloatTensor, torch.cuda.HalfTensor, torch.cuda.FloatTensor) - ), f'only support Half or Float Tensor, but {input.type()}' - output = torch.zeros_like(input) - input3d = input.flatten(start_dim=2) - output3d = output.view_as(input3d) - num_channels = input3d.size(1) - - # ensure mean/var/norm/std are initialized as zeros - # ``torch.empty()`` does not guarantee that - mean = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) - var = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) - norm = torch.zeros_like(input3d, dtype=torch.float, device=input3d.device) - std = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) - - batch_size = input3d.size(0) - if batch_size > 0: - ext_module.sync_bn_forward_mean(input3d, mean) - batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) - else: - # skip updating mean and leave it as zeros when the input is empty - batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) - - # synchronize mean and the batch flag - vec = torch.cat([mean, batch_flag]) - if self.stats_mode == 'N': - vec *= batch_size - if self.group_size > 1: - dist.all_reduce(vec, group=self.group) - total_batch = vec[-1].detach() - mean = vec[:num_channels] - - if self.stats_mode == 'default': - mean = mean / self.group_size - elif self.stats_mode == 'N': - mean = mean / total_batch.clamp(min=1) - else: - raise NotImplementedError - - # leave var as zeros when the input is empty - if batch_size > 0: - ext_module.sync_bn_forward_var(input3d, mean, var) - - if self.stats_mode == 'N': - var *= batch_size - if self.group_size > 1: - dist.all_reduce(var, group=self.group) - - if self.stats_mode == 'default': - var /= self.group_size - elif self.stats_mode == 'N': - var /= total_batch.clamp(min=1) - else: - raise NotImplementedError - - # if the total batch size over all the ranks is zero, - # we should not update the statistics in the current batch - update_flag = total_batch.clamp(max=1) - momentum = update_flag * self.momentum - ext_module.sync_bn_forward_output( - input3d, - mean, - var, - weight, - bias, - running_mean, - running_var, - norm, - std, - output3d, - eps=self.eps, - momentum=momentum, - group_size=self.group_size, - ) - self.save_for_backward(norm, std, weight) - return output - - @staticmethod - @once_differentiable - def backward(self, grad_output): - norm, std, weight = self.saved_tensors - grad_weight = torch.zeros_like(weight) - grad_bias = torch.zeros_like(weight) - grad_input = torch.zeros_like(grad_output) - grad_output3d = grad_output.flatten(start_dim=2) - grad_input3d = grad_input.view_as(grad_output3d) - - batch_size = grad_input3d.size(0) - if batch_size > 0: - ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, grad_bias) - - # all reduce - if self.group_size > 1: - dist.all_reduce(grad_weight, group=self.group) - dist.all_reduce(grad_bias, group=self.group) - grad_weight /= self.group_size - grad_bias /= self.group_size - - if batch_size > 0: - ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight, grad_bias, norm, std, grad_input3d) - - return grad_input, None, None, grad_weight, grad_bias, None, None, None, None, None - - -@NORM_LAYERS.register_module(name='MMSyncBN') -class SyncBatchNorm(Module): - """Synchronized Batch Normalization. - - Args: - num_features (int): number of features/chennels in input tensor - eps (float, optional): a value added to the denominator for numerical - stability. Defaults to 1e-5. - momentum (float, optional): the value used for the running_mean and - running_var computation. Defaults to 0.1. - affine (bool, optional): whether to use learnable affine parameters. - Defaults to True. - track_running_stats (bool, optional): whether to track the running - mean and variance during training. When set to False, this - module does not track such statistics, and initializes statistics - buffers ``running_mean`` and ``running_var`` as ``None``. When - these buffers are ``None``, this module always uses batch - statistics in both training and eval modes. Defaults to True. - group (int, optional): synchronization of stats happen within - each process group individually. By default it is synchronization - across the whole world. Defaults to None. - stats_mode (str, optional): The statistical mode. Available options - includes ``'default'`` and ``'N'``. Defaults to 'default'. - When ``stats_mode=='default'``, it computes the overall statistics - using those from each worker with equal weight, i.e., the - statistics are synchronized and simply divied by ``group``. This - mode will produce inaccurate statistics when empty tensors occur. - When ``stats_mode=='N'``, it compute the overall statistics using - the total number of batches in each worker ignoring the number of - group, i.e., the statistics are synchronized and then divied by - the total batch ``N``. This mode is beneficial when empty tensors - occur during training, as it average the total mean by the real - number of batch. - """ - - def __init__( - self, - num_features, - eps=1e-5, - momentum=0.1, - affine=True, - track_running_stats=True, - group=None, - stats_mode='default', - ): - super(SyncBatchNorm, self).__init__() - self.num_features = num_features - self.eps = eps - self.momentum = momentum - self.affine = affine - self.track_running_stats = track_running_stats - group = dist.group.WORLD if group is None else group - self.group = group - self.group_size = dist.get_world_size(group) - assert stats_mode in ['default', 'N'], f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' - self.stats_mode = stats_mode - if self.affine: - self.weight = Parameter(torch.Tensor(num_features)) - self.bias = Parameter(torch.Tensor(num_features)) - else: - self.register_parameter('weight', None) - self.register_parameter('bias', None) - if self.track_running_stats: - self.register_buffer('running_mean', torch.zeros(num_features)) - self.register_buffer('running_var', torch.ones(num_features)) - self.register_buffer('num_batches_tracked', torch.tensor(0, dtype=torch.long)) - else: - self.register_buffer('running_mean', None) - self.register_buffer('running_var', None) - self.register_buffer('num_batches_tracked', None) - self.reset_parameters() - - def reset_running_stats(self): - if self.track_running_stats: - self.running_mean.zero_() - self.running_var.fill_(1) - self.num_batches_tracked.zero_() - - def reset_parameters(self): - self.reset_running_stats() - if self.affine: - self.weight.data.uniform_() # pytorch use ones_() - self.bias.data.zero_() - - def forward(self, input): - if input.dim() < 2: - raise ValueError(f'expected at least 2D input, got {input.dim()}D input') - if self.momentum is None: - exponential_average_factor = 0.0 - else: - exponential_average_factor = self.momentum - - if self.training and self.track_running_stats: - if self.num_batches_tracked is not None: - self.num_batches_tracked += 1 - if self.momentum is None: # use cumulative moving average - exponential_average_factor = 1.0 / float(self.num_batches_tracked) - else: # use exponential moving average - exponential_average_factor = self.momentum - - if self.training or not self.track_running_stats: - return SyncBatchNormFunction.apply( - input, - self.running_mean, - self.running_var, - self.weight, - self.bias, - exponential_average_factor, - self.eps, - self.group, - self.group_size, - self.stats_mode, - ) - else: - return F.batch_norm( - input, - self.running_mean, - self.running_var, - self.weight, - self.bias, - False, - exponential_average_factor, - self.eps, - ) - - def __repr__(self): - s = self.__class__.__name__ - s += f'({self.num_features}, ' - s += f'eps={self.eps}, ' - s += f'momentum={self.momentum}, ' - s += f'affine={self.affine}, ' - s += f'track_running_stats={self.track_running_stats}, ' - s += f'group_size={self.group_size},' - s += f'stats_mode={self.stats_mode})' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py deleted file mode 100644 index 09333e484221..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py +++ /dev/null @@ -1,62 +0,0 @@ -from typing import Tuple - -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['three_interpolate_forward', 'three_interpolate_backward']) - - -class ThreeInterpolate(Function): - """Performs weighted linear interpolation on 3 features. - - Please refer to `Paper of PointNet++ `_ - for more details. - """ - - @staticmethod - def forward(ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: - """ - Args: - features (Tensor): (B, C, M) Features descriptors to be - interpolated - indices (Tensor): (B, n, 3) index three nearest neighbors - of the target features in features - weight (Tensor): (B, n, 3) weights of interpolation - - Returns: - Tensor: (B, C, N) tensor of the interpolated features - """ - assert features.is_contiguous() - assert indices.is_contiguous() - assert weight.is_contiguous() - - B, c, m = features.size() - n = indices.size(1) - ctx.three_interpolate_for_backward = (indices, weight, m) - output = torch.cuda.FloatTensor(B, c, n) - - ext_module.three_interpolate_forward(features, indices, weight, output, b=B, c=c, m=m, n=n) - return output - - @staticmethod - def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Args: - grad_out (Tensor): (B, C, N) tensor with gradients of outputs - - Returns: - Tensor: (B, C, M) tensor with gradients of features - """ - idx, weight, m = ctx.three_interpolate_for_backward - B, c, n = grad_out.size() - - grad_features = torch.cuda.FloatTensor(B, c, m).zero_() - grad_out_data = grad_out.data.contiguous() - - ext_module.three_interpolate_backward(grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) - return grad_features, None, None - - -three_interpolate = ThreeInterpolate.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py deleted file mode 100644 index 384d91534d17..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Tuple - -import torch -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) - - -class ThreeNN(Function): - """Find the top-3 nearest neighbors of the target set from the source set. - - Please refer to `Paper of PointNet++ `_ - for more details. - """ - - @staticmethod - def forward(ctx, target: torch.Tensor, source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Args: - target (Tensor): shape (B, N, 3), points set that needs to - find the nearest neighbors. - source (Tensor): shape (B, M, 3), points set that is used - to find the nearest neighbors of points in target set. - - Returns: - Tensor: shape (B, N, 3), L2 distance of each point in target - set to their corresponding nearest neighbors. - """ - target = target.contiguous() - source = source.contiguous() - - B, N, _ = target.size() - m = source.size(1) - dist2 = torch.cuda.FloatTensor(B, N, 3) - idx = torch.cuda.IntTensor(B, N, 3) - - ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) - if torch.__version__ != 'parrots': - ctx.mark_non_differentiable(idx) - - return torch.sqrt(dist2), idx - - @staticmethod - def backward(ctx, a=None, b=None): - return None, None - - -three_nn = ThreeNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py deleted file mode 100644 index 4b0a8162e811..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -# Code reference from "Temporal Interlacing Network" -# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py -# Hao Shao, Shengju Qian, Yu Liu -# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk - -import torch -import torch.nn as nn -from torch.autograd import Function - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['tin_shift_forward', 'tin_shift_backward']) - - -class TINShiftFunction(Function): - @staticmethod - def forward(ctx, input, shift): - C = input.size(2) - num_segments = shift.size(1) - if C // num_segments <= 0 or C % num_segments != 0: - raise ValueError( - 'C should be a multiple of num_segments, ' f'but got C={C} and num_segments={num_segments}.' - ) - - ctx.save_for_backward(shift) - - out = torch.zeros_like(input) - ext_module.tin_shift_forward(input, shift, out) - - return out - - @staticmethod - def backward(ctx, grad_output): - - shift = ctx.saved_tensors[0] - data_grad_input = grad_output.new(*grad_output.size()).zero_() - shift_grad_input = shift.new(*shift.size()).zero_() - ext_module.tin_shift_backward(grad_output, shift, data_grad_input) - - return data_grad_input, shift_grad_input - - -tin_shift = TINShiftFunction.apply - - -class TINShift(nn.Module): - """Temporal Interlace Shift. - - Temporal Interlace shift is a differentiable temporal-wise frame shifting - which is proposed in "Temporal Interlacing Network" - - Please refer to https://arxiv.org/abs/2001.06499 for more details. - Code is modified from https://github.com/mit-han-lab/temporal-shift-module - """ - - def forward(self, input, shift): - """Perform temporal interlace shift. - - Args: - input (Tensor): Feature map with shape [N, num_segments, C, H * W]. - shift (Tensor): Shift tensor with shape [N, num_segments]. - - Returns: - Feature map after temporal interlace shift. - """ - return tin_shift(input, shift) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py deleted file mode 100644 index c1f330686b9c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py +++ /dev/null @@ -1,307 +0,0 @@ -# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 - -# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. -# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator -# Augmentation (ADA) -# ======================================================================= - -# 1. Definitions - -# "Licensor" means any person or entity that distributes its Work. - -# "Software" means the original work of authorship made available under -# this License. - -# "Work" means the Software and any additions to or derivative works of -# the Software that are made available under this License. - -# The terms "reproduce," "reproduction," "derivative works," and -# "distribution" have the meaning as provided under U.S. copyright law; -# provided, however, that for the purposes of this License, derivative -# works shall not include works that remain separable from, or merely -# link (or bind by name) to the interfaces of, the Work. - -# Works, including the Software, are "made available" under this License -# by including in or with the Work either (a) a copyright notice -# referencing the applicability of this License to the Work, or (b) a -# copy of this License. - -# 2. License Grants - -# 2.1 Copyright Grant. Subject to the terms and conditions of this -# License, each Licensor grants to you a perpetual, worldwide, -# non-exclusive, royalty-free, copyright license to reproduce, -# prepare derivative works of, publicly display, publicly perform, -# sublicense and distribute its Work and any resulting derivative -# works in any form. - -# 3. Limitations - -# 3.1 Redistribution. You may reproduce or distribute the Work only -# if (a) you do so under this License, (b) you include a complete -# copy of this License with your distribution, and (c) you retain -# without modification any copyright, patent, trademark, or -# attribution notices that are present in the Work. - -# 3.2 Derivative Works. You may specify that additional or different -# terms apply to the use, reproduction, and distribution of your -# derivative works of the Work ("Your Terms") only if (a) Your Terms -# provide that the use limitation in Section 3.3 applies to your -# derivative works, and (b) you identify the specific derivative -# works that are subject to Your Terms. Notwithstanding Your Terms, -# this License (including the redistribution requirements in Section -# 3.1) will continue to apply to the Work itself. - -# 3.3 Use Limitation. The Work and any derivative works thereof only -# may be used or intended for use non-commercially. Notwithstanding -# the foregoing, NVIDIA and its affiliates may use the Work and any -# derivative works commercially. As used herein, "non-commercially" -# means for research or evaluation purposes only. - -# 3.4 Patent Claims. If you bring or threaten to bring a patent claim -# against any Licensor (including any claim, cross-claim or -# counterclaim in a lawsuit) to enforce any patents that you allege -# are infringed by any Work, then your rights under this License from -# such Licensor (including the grant in Section 2.1) will terminate -# immediately. - -# 3.5 Trademarks. This License does not grant any rights to use any -# Licensor’s or its affiliates’ names, logos, or trademarks, except -# as necessary to reproduce the notices described in this License. - -# 3.6 Termination. If you violate any term of this License, then your -# rights under this License (including the grant in Section 2.1) will -# terminate immediately. - -# 4. Disclaimer of Warranty. - -# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR -# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER -# THIS LICENSE. - -# 5. Limitation of Liability. - -# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL -# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE -# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, -# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF -# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK -# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, -# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER -# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGES. - -# ======================================================================= - -import torch -from torch.autograd import Function -from torch.nn import functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import to_2tuple -from ..utils import ext_loader - -upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) - - -class UpFirDn2dBackward(Function): - @staticmethod - def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size): - - up_x, up_y = up - down_x, down_y = down - g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad - - grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) - - grad_input = upfirdn2d_ext.upfirdn2d( - grad_output, - grad_kernel, - up_x=down_x, - up_y=down_y, - down_x=up_x, - down_y=up_y, - pad_x0=g_pad_x0, - pad_x1=g_pad_x1, - pad_y0=g_pad_y0, - pad_y1=g_pad_y1, - ) - grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) - - ctx.save_for_backward(kernel) - - pad_x0, pad_x1, pad_y0, pad_y1 = pad - - ctx.up_x = up_x - ctx.up_y = up_y - ctx.down_x = down_x - ctx.down_y = down_y - ctx.pad_x0 = pad_x0 - ctx.pad_x1 = pad_x1 - ctx.pad_y0 = pad_y0 - ctx.pad_y1 = pad_y1 - ctx.in_size = in_size - ctx.out_size = out_size - - return grad_input - - @staticmethod - def backward(ctx, gradgrad_input): - (kernel,) = ctx.saved_tensors - - gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) - - gradgrad_out = upfirdn2d_ext.upfirdn2d( - gradgrad_input, - kernel, - up_x=ctx.up_x, - up_y=ctx.up_y, - down_x=ctx.down_x, - down_y=ctx.down_y, - pad_x0=ctx.pad_x0, - pad_x1=ctx.pad_x1, - pad_y0=ctx.pad_y0, - pad_y1=ctx.pad_y1, - ) - # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], - # ctx.out_size[1], ctx.in_size[3]) - gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]) - - return gradgrad_out, None, None, None, None, None, None, None, None - - -class UpFirDn2d(Function): - @staticmethod - def forward(ctx, input, kernel, up, down, pad): - up_x, up_y = up - down_x, down_y = down - pad_x0, pad_x1, pad_y0, pad_y1 = pad - - kernel_h, kernel_w = kernel.shape - batch, channel, in_h, in_w = input.shape - ctx.in_size = input.shape - - input = input.reshape(-1, in_h, in_w, 1) - - ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) - - out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 - out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 - ctx.out_size = (out_h, out_w) - - ctx.up = (up_x, up_y) - ctx.down = (down_x, down_y) - ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) - - g_pad_x0 = kernel_w - pad_x0 - 1 - g_pad_y0 = kernel_h - pad_y0 - 1 - g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 - g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 - - ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) - - out = upfirdn2d_ext.upfirdn2d( - input, - kernel, - up_x=up_x, - up_y=up_y, - down_x=down_x, - down_y=down_y, - pad_x0=pad_x0, - pad_x1=pad_x1, - pad_y0=pad_y0, - pad_y1=pad_y1, - ) - # out = out.view(major, out_h, out_w, minor) - out = out.view(-1, channel, out_h, out_w) - - return out - - @staticmethod - def backward(ctx, grad_output): - kernel, grad_kernel = ctx.saved_tensors - - grad_input = UpFirDn2dBackward.apply( - grad_output, kernel, grad_kernel, ctx.up, ctx.down, ctx.pad, ctx.g_pad, ctx.in_size, ctx.out_size, - ) - - return grad_input, None, None, None, None - - -def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): - """UpFRIDn for 2d features. - - UpFIRDn is short for upsample, apply FIR filter and downsample. More - details can be found in: - https://www.mathworks.com/help/signal/ref/upfirdn.html - - Args: - input (Tensor): Tensor with shape of (n, c, h, w). - kernel (Tensor): Filter kernel. - up (int | tuple[int], optional): Upsampling factor. If given a number, - we will use this factor for the both height and width side. - Defaults to 1. - down (int | tuple[int], optional): Downsampling factor. If given a - number, we will use this factor for the both height and width side. - Defaults to 1. - pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or - (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). - - Returns: - Tensor: Tensor after UpFIRDn. - """ - if input.device.type == 'cpu': - if len(pad) == 2: - pad = (pad[0], pad[1], pad[0], pad[1]) - - up = to_2tuple(up) - - down = to_2tuple(down) - - out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], pad[0], pad[1], pad[2], pad[3]) - else: - _up = to_2tuple(up) - - _down = to_2tuple(down) - - if len(pad) == 4: - _pad = pad - elif len(pad) == 2: - _pad = (pad[0], pad[1], pad[0], pad[1]) - - out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) - - return out - - -def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): - _, channel, in_h, in_w = input.shape - input = input.reshape(-1, in_h, in_w, 1) - - _, in_h, in_w, minor = input.shape - kernel_h, kernel_w = kernel.shape - - out = input.view(-1, in_h, 1, in_w, 1, minor) - out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) - out = out.view(-1, in_h * up_y, in_w * up_x, minor) - - out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) - out = out[ - :, max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), :, - ] - - out = out.permute(0, 3, 1, 2) - out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) - w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) - out = F.conv2d(out, w) - out = out.reshape( - -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, - ) - out = out.permute(0, 2, 3, 1) - out = out[:, ::down_y, ::down_x, :] - - out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 - out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 - - return out.view(-1, channel, out_h, out_w) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py deleted file mode 100644 index 60e23663270d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch import nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) - - -class _Voxelization(Function): - @staticmethod - def forward(ctx, points, voxel_size, coors_range, max_points=35, max_voxels=20000): - """Convert kitti points(N, >=3) to voxels. - - Args: - points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points - and points[:, 3:] contain other information like reflectivity. - voxel_size (tuple or float): The size of voxel with the shape of - [3]. - coors_range (tuple or float): The coordinate range of voxel with - the shape of [6]. - max_points (int, optional): maximum points contained in a voxel. if - max_points=-1, it means using dynamic_voxelize. Default: 35. - max_voxels (int, optional): maximum voxels this function create. - for second, 20000 is a good choice. Users should shuffle points - before call this function because max_voxels may drop points. - Default: 20000. - - Returns: - voxels_out (torch.Tensor): Output voxels with the shape of [M, - max_points, ndim]. Only contain points and returned when - max_points != -1. - coors_out (torch.Tensor): Output coordinates with the shape of - [M, 3]. - num_points_per_voxel_out (torch.Tensor): Num points per voxel with - the shape of [M]. Only returned when max_points != -1. - """ - if max_points == -1 or max_voxels == -1: - coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) - ext_module.dynamic_voxelize_forward(points, coors, voxel_size, coors_range, 3) - return coors - else: - voxels = points.new_zeros(size=(max_voxels, max_points, points.size(1))) - coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) - num_points_per_voxel = points.new_zeros(size=(max_voxels,), dtype=torch.int) - voxel_num = ext_module.hard_voxelize_forward( - points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, 3 - ) - # select the valid voxels - voxels_out = voxels[:voxel_num] - coors_out = coors[:voxel_num] - num_points_per_voxel_out = num_points_per_voxel[:voxel_num] - return voxels_out, coors_out, num_points_per_voxel_out - - -voxelization = _Voxelization.apply - - -class Voxelization(nn.Module): - """Convert kitti points(N, >=3) to voxels. - - Please refer to `PVCNN `_ for more - details. - - Args: - voxel_size (tuple or float): The size of voxel with the shape of [3]. - point_cloud_range (tuple or float): The coordinate range of voxel with - the shape of [6]. - max_num_points (int): maximum points contained in a voxel. if - max_points=-1, it means using dynamic_voxelize. - max_voxels (int, optional): maximum voxels this function create. - for second, 20000 is a good choice. Users should shuffle points - before call this function because max_voxels may drop points. - Default: 20000. - """ - - def __init__(self, voxel_size, point_cloud_range, max_num_points, max_voxels=20000): - super().__init__() - - self.voxel_size = voxel_size - self.point_cloud_range = point_cloud_range - self.max_num_points = max_num_points - if isinstance(max_voxels, tuple): - self.max_voxels = max_voxels - else: - self.max_voxels = _pair(max_voxels) - - point_cloud_range = torch.tensor(point_cloud_range, dtype=torch.float32) - voxel_size = torch.tensor(voxel_size, dtype=torch.float32) - grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size - grid_size = torch.round(grid_size).long() - input_feat_shape = grid_size[:2] - self.grid_size = grid_size - # the origin shape is as [x-len, y-len, z-len] - # [w, h, d] -> [d, h, w] - self.pcd_shape = [*input_feat_shape, 1][::-1] - - def forward(self, input): - if self.training: - max_voxels = self.max_voxels[0] - else: - max_voxels = self.max_voxels[1] - - return voxelization(input, self.voxel_size, self.point_cloud_range, self.max_num_points, max_voxels) - - def __repr__(self): - s = self.__class__.__name__ + '(' - s += 'voxel_size=' + str(self.voxel_size) - s += ', point_cloud_range=' + str(self.point_cloud_range) - s += ', max_num_points=' + str(self.max_num_points) - s += ', max_voxels=' + str(self.max_voxels) - s += ')' - return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py deleted file mode 100644 index da4f1557d34a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .collate import collate -from .data_container import DataContainer -from .data_parallel import MMDataParallel -from .distributed import MMDistributedDataParallel -from .registry import MODULE_WRAPPERS -from .scatter_gather import scatter, scatter_kwargs -from .utils import is_module_wrapper - -__all__ = [ - 'collate', - 'DataContainer', - 'MMDataParallel', - 'MMDistributedDataParallel', - 'scatter', - 'scatter_kwargs', - 'is_module_wrapper', - 'MODULE_WRAPPERS', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py deleted file mode 100644 index 154a0302584c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch.nn.parallel._functions import _get_stream - - -def scatter(input, devices, streams=None): - """Scatters tensor across multiple GPUs.""" - if streams is None: - streams = [None] * len(devices) - - if isinstance(input, list): - chunk_size = (len(input) - 1) // len(devices) + 1 - outputs = [ - scatter(input[i], [devices[i // chunk_size]], [streams[i // chunk_size]]) for i in range(len(input)) - ] - return outputs - elif isinstance(input, torch.Tensor): - output = input.contiguous() - # TODO: copy to a pinned buffer first (if copying from CPU) - stream = streams[0] if output.numel() > 0 else None - if devices != [-1]: - with torch.cuda.device(devices[0]), torch.cuda.stream(stream): - output = output.cuda(devices[0], non_blocking=True) - else: - # unsqueeze the first dimension thus the tensor's shape is the - # same as those scattered with GPU. - output = output.unsqueeze(0) - return output - else: - raise Exception(f'Unknown type {type(input)}.') - - -def synchronize_stream(output, devices, streams): - if isinstance(output, list): - chunk_size = len(output) // len(devices) - for i in range(len(devices)): - for j in range(chunk_size): - synchronize_stream(output[i * chunk_size + j], [devices[i]], [streams[i]]) - elif isinstance(output, torch.Tensor): - if output.numel() != 0: - with torch.cuda.device(devices[0]): - main_stream = torch.cuda.current_stream() - main_stream.wait_stream(streams[0]) - output.record_stream(main_stream) - else: - raise Exception(f'Unknown type {type(output)}.') - - -def get_input_device(input): - if isinstance(input, list): - for item in input: - input_device = get_input_device(item) - if input_device != -1: - return input_device - return -1 - elif isinstance(input, torch.Tensor): - return input.get_device() if input.is_cuda else -1 - else: - raise Exception(f'Unknown type {type(input)}.') - - -class Scatter: - @staticmethod - def forward(target_gpus, input): - input_device = get_input_device(input) - streams = None - if input_device == -1 and target_gpus != [-1]: - # Perform CPU to GPU copies in a background stream - streams = [_get_stream(device) for device in target_gpus] - - outputs = scatter(input, target_gpus, streams) - # Synchronize with the copy stream - if streams is not None: - synchronize_stream(outputs, target_gpus, streams) - - return tuple(outputs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py deleted file mode 100644 index 9607ce7efb6d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from collections.abc import Mapping, Sequence - -import torch -import torch.nn.functional as F -from torch.utils.data.dataloader import default_collate - -from .data_container import DataContainer - - -def collate(batch, samples_per_gpu=1): - """Puts each data field into a tensor/DataContainer with outer dimension - batch size. - - Extend default_collate to add support for - :type:`~mmcv.parallel.DataContainer`. There are 3 cases. - - 1. cpu_only = True, e.g., meta data - 2. cpu_only = False, stack = True, e.g., images tensors - 3. cpu_only = False, stack = False, e.g., gt bboxes - """ - - if not isinstance(batch, Sequence): - raise TypeError(f'{batch.dtype} is not supported.') - - if isinstance(batch[0], DataContainer): - stacked = [] - if batch[0].cpu_only: - for i in range(0, len(batch), samples_per_gpu): - stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) - return DataContainer(stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) - elif batch[0].stack: - for i in range(0, len(batch), samples_per_gpu): - assert isinstance(batch[i].data, torch.Tensor) - - if batch[i].pad_dims is not None: - ndim = batch[i].dim() - assert ndim > batch[i].pad_dims - max_shape = [0 for _ in range(batch[i].pad_dims)] - for dim in range(1, batch[i].pad_dims + 1): - max_shape[dim - 1] = batch[i].size(-dim) - for sample in batch[i : i + samples_per_gpu]: - for dim in range(0, ndim - batch[i].pad_dims): - assert batch[i].size(dim) == sample.size(dim) - for dim in range(1, batch[i].pad_dims + 1): - max_shape[dim - 1] = max(max_shape[dim - 1], sample.size(-dim)) - padded_samples = [] - for sample in batch[i : i + samples_per_gpu]: - pad = [0 for _ in range(batch[i].pad_dims * 2)] - for dim in range(1, batch[i].pad_dims + 1): - pad[2 * dim - 1] = max_shape[dim - 1] - sample.size(-dim) - padded_samples.append(F.pad(sample.data, pad, value=sample.padding_value)) - stacked.append(default_collate(padded_samples)) - elif batch[i].pad_dims is None: - stacked.append(default_collate([sample.data for sample in batch[i : i + samples_per_gpu]])) - else: - raise ValueError('pad_dims should be either None or integers (1-3)') - - else: - for i in range(0, len(batch), samples_per_gpu): - stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) - return DataContainer(stacked, batch[0].stack, batch[0].padding_value) - elif isinstance(batch[0], Sequence): - transposed = zip(*batch) - return [collate(samples, samples_per_gpu) for samples in transposed] - elif isinstance(batch[0], Mapping): - return {key: collate([d[key] for d in batch], samples_per_gpu) for key in batch[0]} - else: - return default_collate(batch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py deleted file mode 100644 index 120f68b8fa60..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import functools - -import torch - - -def assert_tensor_type(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - if not isinstance(args[0].data, torch.Tensor): - raise AttributeError( - f'{args[0].__class__.__name__} has no attribute ' f'{func.__name__} for type {args[0].datatype}' - ) - return func(*args, **kwargs) - - return wrapper - - -class DataContainer: - """A container for any type of objects. - - Typically tensors will be stacked in the collate function and sliced along - some dimension in the scatter function. This behavior has some limitations. - 1. All tensors have to be the same size. - 2. Types are limited (numpy array or Tensor). - - We design `DataContainer` and `MMDataParallel` to overcome these - limitations. The behavior can be either of the following. - - - copy to GPU, pad all tensors to the same size and stack them - - copy to GPU without stacking - - leave the objects as is and pass it to the model - - pad_dims specifies the number of last few dimensions to do padding - """ - - def __init__(self, data, stack=False, padding_value=0, cpu_only=False, pad_dims=2): - self._data = data - self._cpu_only = cpu_only - self._stack = stack - self._padding_value = padding_value - assert pad_dims in [None, 1, 2, 3] - self._pad_dims = pad_dims - - def __repr__(self): - return f'{self.__class__.__name__}({repr(self.data)})' - - def __len__(self): - return len(self._data) - - @property - def data(self): - return self._data - - @property - def datatype(self): - if isinstance(self.data, torch.Tensor): - return self.data.type() - else: - return type(self.data) - - @property - def cpu_only(self): - return self._cpu_only - - @property - def stack(self): - return self._stack - - @property - def padding_value(self): - return self._padding_value - - @property - def pad_dims(self): - return self._pad_dims - - @assert_tensor_type - def size(self, *args, **kwargs): - return self.data.size(*args, **kwargs) - - @assert_tensor_type - def dim(self): - return self.data.dim() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py deleted file mode 100644 index bd0715da94ad..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from itertools import chain - -from torch.nn.parallel import DataParallel - -from .scatter_gather import scatter_kwargs - - -class MMDataParallel(DataParallel): - """The DataParallel module that supports DataContainer. - - MMDataParallel has two main differences with PyTorch DataParallel: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data during both GPU and CPU inference. - - It implement two more APIs ``train_step()`` and ``val_step()``. - - Args: - module (:class:`nn.Module`): Module to be encapsulated. - device_ids (list[int]): Device IDS of modules to be scattered to. - Defaults to None when GPU is not available. - output_device (str | int): Device ID for output. Defaults to None. - dim (int): Dimension used to scatter the data. Defaults to 0. - """ - - def __init__(self, *args, dim=0, **kwargs): - super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) - self.dim = dim - - def forward(self, *inputs, **kwargs): - """Override the original forward function. - - The main difference lies in the CPU inference where the data in - :class:`DataContainers` will still be gathered. - """ - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module(*inputs[0], **kwargs[0]) - else: - return super().forward(*inputs, **kwargs) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module.train_step(*inputs[0], **kwargs[0]) - - assert len(self.device_ids) == 1, ( - 'MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - 'instead.' - ) - - for t in chain(self.module.parameters(), self.module.buffers()): - if t.device != self.src_device_obj: - raise RuntimeError( - 'module must have its parameters and buffers ' - f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}' - ) - - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - return self.module.train_step(*inputs[0], **kwargs[0]) - - def val_step(self, *inputs, **kwargs): - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module.val_step(*inputs[0], **kwargs[0]) - - assert len(self.device_ids) == 1, ( - 'MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - ' instead.' - ) - - for t in chain(self.module.parameters(), self.module.buffers()): - if t.device != self.src_device_obj: - raise RuntimeError( - 'module must have its parameters and buffers ' - f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}' - ) - - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py deleted file mode 100644 index 755c4398fcdc..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch.nn.parallel.distributed import DistributedDataParallel, _find_tensors - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import print_log -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version - -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(DistributedDataParallel): - """The DDP module that supports DataContainer. - - MMDDP has two main differences with PyTorch DDP: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data. - - It implement two APIs ``train_step()`` and ``val_step()``. - """ - - def to_kwargs(self, inputs, kwargs, device_id): - # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 - # to move all tensors to device_id - return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - """train_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.train_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if ( - 'parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) >= digit_version('1.7') - and self.reducer._rebuild_buckets() - ): - print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.train_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - output = self.module.train_step(*inputs, **kwargs) - - if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): - self.require_forward_param_sync = False - return output - - def val_step(self, *inputs, **kwargs): - """val_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.val_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if ( - 'parrots' not in TORCH_VERSION - and digit_version(TORCH_VERSION) >= digit_version('1.7') - and self.reducer._rebuild_buckets() - ): - print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.val_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - output = self.module.val_step(*inputs, **kwargs) - - if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): - self.require_forward_param_sync = False - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py deleted file mode 100644 index 7e8a47648e1e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.distributed as dist -import torch.nn as nn -from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version - -from .registry import MODULE_WRAPPERS -from .scatter_gather import scatter_kwargs - - -@MODULE_WRAPPERS.register_module() -class MMDistributedDataParallel(nn.Module): - def __init__(self, module, dim=0, broadcast_buffers=True, bucket_cap_mb=25): - super(MMDistributedDataParallel, self).__init__() - self.module = module - self.dim = dim - self.broadcast_buffers = broadcast_buffers - - self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 - self._sync_params() - - def _dist_broadcast_coalesced(self, tensors, buffer_size): - for tensors in _take_tensors(tensors, buffer_size): - flat_tensors = _flatten_dense_tensors(tensors) - dist.broadcast(flat_tensors, 0) - for tensor, synced in zip(tensors, _unflatten_dense_tensors(flat_tensors, tensors)): - tensor.copy_(synced) - - def _sync_params(self): - module_states = list(self.module.state_dict().values()) - if len(module_states) > 0: - self._dist_broadcast_coalesced(module_states, self.broadcast_bucket_size) - if self.broadcast_buffers: - if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version('1.0'): - buffers = [b.data for b in self.module._all_buffers()] - else: - buffers = [b.data for b in self.module.buffers()] - if len(buffers) > 0: - self._dist_broadcast_coalesced(buffers, self.broadcast_bucket_size) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def forward(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) - return self.module(*inputs[0], **kwargs[0]) - - def train_step(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) - output = self.module.train_step(*inputs[0], **kwargs[0]) - return output - - def val_step(self, *inputs, **kwargs): - inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) - output = self.module.val_step(*inputs[0], **kwargs[0]) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py deleted file mode 100644 index 4d31bc1d08e6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from torch.nn.parallel import DataParallel, DistributedDataParallel - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry - -MODULE_WRAPPERS = Registry('module wrapper') -MODULE_WRAPPERS.register_module(module=DataParallel) -MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py deleted file mode 100644 index 900ff88566f8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from torch.nn.parallel._functions import Scatter as OrigScatter - -from ._functions import Scatter -from .data_container import DataContainer - - -def scatter(inputs, target_gpus, dim=0): - """Scatter inputs to target gpus. - - The only difference from original :func:`scatter` is to add support for - :type:`~mmcv.parallel.DataContainer`. - """ - - def scatter_map(obj): - if isinstance(obj, torch.Tensor): - if target_gpus != [-1]: - return OrigScatter.apply(target_gpus, None, dim, obj) - else: - # for CPU inference we use self-implemented scatter - return Scatter.forward(target_gpus, obj) - if isinstance(obj, DataContainer): - if obj.cpu_only: - return obj.data - else: - return Scatter.forward(target_gpus, obj.data) - if isinstance(obj, tuple) and len(obj) > 0: - return list(zip(*map(scatter_map, obj))) - if isinstance(obj, list) and len(obj) > 0: - out = list(map(list, zip(*map(scatter_map, obj)))) - return out - if isinstance(obj, dict) and len(obj) > 0: - out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) - return out - return [obj for targets in target_gpus] - - # After scatter_map is called, a scatter_map cell will exist. This cell - # has a reference to the actual function scatter_map, which has references - # to a closure that has a reference to the scatter_map cell (because the - # fn is recursive). To avoid this reference cycle, we set the function to - # None, clearing the cell - try: - return scatter_map(inputs) - finally: - scatter_map = None - - -def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): - """Scatter with support for kwargs dictionary.""" - inputs = scatter(inputs, target_gpus, dim) if inputs else [] - kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] - if len(inputs) < len(kwargs): - inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) - elif len(kwargs) < len(inputs): - kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) - inputs = tuple(inputs) - kwargs = tuple(kwargs) - return inputs, kwargs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py deleted file mode 100644 index 0f5712cb42c3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .registry import MODULE_WRAPPERS - - -def is_module_wrapper(module): - """Check if a module is a module wrapper. - - The following 3 modules in MMCV (and their subclasses) are regarded as - module wrappers: DataParallel, DistributedDataParallel, - MMDistributedDataParallel (the deprecated version). You may add you own - module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. - - Args: - module (nn.Module): The module to be checked. - - Returns: - bool: True if the input module is a module wrapper. - """ - module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) - return isinstance(module, module_wrappers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py deleted file mode 100644 index 8edd3a098aed..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .base_module import BaseModule, ModuleList, Sequential -from .base_runner import BaseRunner -from .builder import RUNNERS, build_runner -from .checkpoint import ( - CheckpointLoader, - _load_checkpoint, - _load_checkpoint_with_prefix, - load_checkpoint, - load_state_dict, - save_checkpoint, - weights_to_cpu, -) -from .default_constructor import DefaultRunnerConstructor -from .dist_utils import allreduce_grads, allreduce_params, get_dist_info, init_dist, master_only -from .epoch_based_runner import EpochBasedRunner, Runner -from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model -from .hooks import ( - HOOKS, - CheckpointHook, - ClosureHook, - DistEvalHook, - DistSamplerSeedHook, - DvcliveLoggerHook, - EMAHook, - EvalHook, - Fp16OptimizerHook, - GradientCumulativeFp16OptimizerHook, - GradientCumulativeOptimizerHook, - Hook, - IterTimerHook, - LoggerHook, - LrUpdaterHook, - MlflowLoggerHook, - NeptuneLoggerHook, - OptimizerHook, - PaviLoggerHook, - SyncBuffersHook, - TensorboardLoggerHook, - TextLoggerHook, - WandbLoggerHook, -) -from .iter_based_runner import IterBasedRunner, IterLoader -from .log_buffer import LogBuffer -from .optimizer import ( - OPTIMIZER_BUILDERS, - OPTIMIZERS, - DefaultOptimizerConstructor, - build_optimizer, - build_optimizer_constructor, -) -from .priority import Priority, get_priority -from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed - -__all__ = [ - 'BaseRunner', - 'Runner', - 'EpochBasedRunner', - 'IterBasedRunner', - 'LogBuffer', - 'HOOKS', - 'Hook', - 'CheckpointHook', - 'ClosureHook', - 'LrUpdaterHook', - 'OptimizerHook', - 'IterTimerHook', - 'DistSamplerSeedHook', - 'LoggerHook', - 'PaviLoggerHook', - 'TextLoggerHook', - 'TensorboardLoggerHook', - 'NeptuneLoggerHook', - 'WandbLoggerHook', - 'MlflowLoggerHook', - 'DvcliveLoggerHook', - '_load_checkpoint', - 'load_state_dict', - 'load_checkpoint', - 'weights_to_cpu', - 'save_checkpoint', - 'Priority', - 'get_priority', - 'get_host_info', - 'get_time_str', - 'obj_from_dict', - 'init_dist', - 'get_dist_info', - 'master_only', - 'OPTIMIZER_BUILDERS', - 'OPTIMIZERS', - 'DefaultOptimizerConstructor', - 'build_optimizer', - 'build_optimizer_constructor', - 'IterLoader', - 'set_random_seed', - 'auto_fp16', - 'force_fp32', - 'wrap_fp16_model', - 'Fp16OptimizerHook', - 'SyncBuffersHook', - 'EMAHook', - 'build_runner', - 'RUNNERS', - 'allreduce_grads', - 'allreduce_params', - 'LossScaler', - 'CheckpointLoader', - 'BaseModule', - '_load_checkpoint_with_prefix', - 'EvalHook', - 'DistEvalHook', - 'Sequential', - 'ModuleList', - 'GradientCumulativeOptimizerHook', - 'GradientCumulativeFp16OptimizerHook', - 'DefaultRunnerConstructor', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py deleted file mode 100644 index c0c66594dccb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -import warnings -from abc import ABCMeta -from collections import defaultdict -from logging import FileHandler - -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.dist_utils import master_only -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.logging import ( - get_logger, - logger_initialized, - print_log, -) - - -class BaseModule(nn.Module, metaclass=ABCMeta): - """Base module for all modules in openmmlab. - - ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional - functionality of parameter initialization. Compared with - ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. - - - ``init_cfg``: the config to control the initialization. - - ``init_weights``: The function of parameter - initialization and recording initialization - information. - - ``_params_init_info``: Used to track the parameter - initialization information. This attribute only - exists during executing the ``init_weights``. - - Args: - init_cfg (dict, optional): Initialization config dict. - """ - - def __init__(self, init_cfg=None): - """Initialize BaseModule, inherited from `torch.nn.Module`""" - - # NOTE init_cfg can be defined in different levels, but init_cfg - # in low levels has a higher priority. - - super(BaseModule, self).__init__() - # define default value of init_cfg instead of hard code - # in init_weights() function - self._is_init = False - - self.init_cfg = copy.deepcopy(init_cfg) - - # Backward compatibility in derived classes - # if pretrained is not None: - # warnings.warn('DeprecationWarning: pretrained is a deprecated \ - # key, please consider using init_cfg') - # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) - - @property - def is_init(self): - return self._is_init - - def init_weights(self): - """Initialize the weights.""" - - is_top_level_module = False - # check if it is top-level module - if not hasattr(self, '_params_init_info'): - # The `_params_init_info` is used to record the initialization - # information of the parameters - # the key should be the obj:`nn.Parameter` of model and the value - # should be a dict containing - # - init_info (str): The string that describes the initialization. - # - tmp_mean_value (FloatTensor): The mean of the parameter, - # which indicates whether the parameter has been modified. - # this attribute would be deleted after all parameters - # is initialized. - self._params_init_info = defaultdict(dict) - is_top_level_module = True - - # Initialize the `_params_init_info`, - # When detecting the `tmp_mean_value` of - # the corresponding parameter is changed, update related - # initialization information - for name, param in self.named_parameters(): - self._params_init_info[param]['init_info'] = ( - f'The value is the same before and ' - f'after calling `init_weights` ' - f'of {self.__class__.__name__} ' - ) - self._params_init_info[param]['tmp_mean_value'] = param.data.mean() - - # pass `params_init_info` to all submodules - # All submodules share the same `params_init_info`, - # so it will be updated when parameters are - # modified at any level of the model. - for sub_module in self.modules(): - sub_module._params_init_info = self._params_init_info - - # Get the initialized logger, if not exist, - # create a logger named `mmcv` - logger_names = list(logger_initialized.keys()) - logger_name = logger_names[0] if logger_names else 'mmcv' - - from ..cnn import initialize - from ..cnn.utils.weight_init import update_init_info - - module_name = self.__class__.__name__ - if not self._is_init: - if self.init_cfg: - print_log(f'initialize {module_name} with init_cfg {self.init_cfg}', logger=logger_name) - initialize(self, self.init_cfg) - if isinstance(self.init_cfg, dict): - # prevent the parameters of - # the pre-trained model - # from being overwritten by - # the `init_weights` - if self.init_cfg['type'] == 'Pretrained': - return - - for m in self.children(): - if hasattr(m, 'init_weights'): - m.init_weights() - # users may overload the `init_weights` - update_init_info( - m, init_info=f'Initialized by ' f'user-defined `init_weights`' f' in {m.__class__.__name__} ' - ) - - self._is_init = True - else: - warnings.warn(f'init_weights of {self.__class__.__name__} has ' f'been called more than once.') - - if is_top_level_module: - self._dump_init_info(logger_name) - - for sub_module in self.modules(): - del sub_module._params_init_info - - @master_only - def _dump_init_info(self, logger_name): - """Dump the initialization information to a file named - `initialization.log.json` in workdir. - - Args: - logger_name (str): The name of logger. - """ - - logger = get_logger(logger_name) - - with_file_handler = False - # dump the information to the logger file if there is a `FileHandler` - for handler in logger.handlers: - if isinstance(handler, FileHandler): - handler.stream.write('Name of parameter - Initialization information\n') - for name, param in self.named_parameters(): - handler.stream.write( - f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n" - ) - handler.stream.flush() - with_file_handler = True - if not with_file_handler: - for name, param in self.named_parameters(): - print_log( - f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n ", - logger=logger_name, - ) - - def __repr__(self): - s = super().__repr__() - if self.init_cfg: - s += f'\ninit_cfg={self.init_cfg}' - return s - - -class Sequential(BaseModule, nn.Sequential): - """Sequential module in openmmlab. - - Args: - init_cfg (dict, optional): Initialization config dict. - """ - - def __init__(self, *args, init_cfg=None): - BaseModule.__init__(self, init_cfg) - nn.Sequential.__init__(self, *args) - - -class ModuleList(BaseModule, nn.ModuleList): - """ModuleList in openmmlab. - - Args: - modules (iterable, optional): an iterable of modules to add. - init_cfg (dict, optional): Initialization config dict. - """ - - def __init__(self, modules=None, init_cfg=None): - BaseModule.__init__(self, init_cfg) - nn.ModuleList.__init__(self, modules) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py deleted file mode 100644 index 92948a64963c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py +++ /dev/null @@ -1,515 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -import logging -import os.path as osp -import warnings -from abc import ABCMeta, abstractmethod - -import torch -from torch.optim import Optimizer - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from ..parallel import is_module_wrapper -from .checkpoint import load_checkpoint -from .dist_utils import get_dist_info -from .hooks import HOOKS, Hook -from .log_buffer import LogBuffer -from .priority import Priority, get_priority -from .utils import get_time_str - - -class BaseRunner(metaclass=ABCMeta): - """The base class of Runner, a training helper for PyTorch. - - All subclasses should implement the following APIs: - - - ``run()`` - - ``train()`` - - ``val()`` - - ``save_checkpoint()`` - - Args: - model (:obj:`torch.nn.Module`): The model to be run. - batch_processor (callable): A callable method that process a data - batch. The interface of this method should be - `batch_processor(model, data, train_mode) -> dict` - optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an - optimizer (in most cases) or a dict of optimizers (in models that - requires more than one optimizer, e.g., GAN). - work_dir (str, optional): The working directory to save checkpoints - and logs. Defaults to None. - logger (:obj:`logging.Logger`): Logger used during training. - Defaults to None. (The default value is just for backward - compatibility) - meta (dict | None): A dict records some import information such as - environment info and seed, which will be logged in logger hook. - Defaults to None. - max_epochs (int, optional): Total training epochs. - max_iters (int, optional): Total training iterations. - """ - - def __init__( - self, - model, - batch_processor=None, - optimizer=None, - work_dir=None, - logger=None, - meta=None, - max_iters=None, - max_epochs=None, - ): - if batch_processor is not None: - if not callable(batch_processor): - raise TypeError('batch_processor must be callable, ' f'but got {type(batch_processor)}') - warnings.warn( - 'batch_processor is deprecated, please implement ' 'train_step() and val_step() in the model instead.' - ) - # raise an error is `batch_processor` is not None and - # `model.train_step()` exists. - if is_module_wrapper(model): - _model = model.module - else: - _model = model - if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): - raise RuntimeError( - 'batch_processor and model.train_step()/model.val_step() ' 'cannot be both available.' - ) - else: - assert hasattr(model, 'train_step') - - # check the type of `optimizer` - if isinstance(optimizer, dict): - for name, optim in optimizer.items(): - if not isinstance(optim, Optimizer): - raise TypeError( - f'optimizer must be a dict of torch.optim.Optimizers, ' - f'but optimizer["{name}"] is a {type(optim)}' - ) - elif not isinstance(optimizer, Optimizer) and optimizer is not None: - raise TypeError( - f'optimizer must be a torch.optim.Optimizer object ' f'or dict or None, but got {type(optimizer)}' - ) - - # check the type of `logger` - if not isinstance(logger, logging.Logger): - raise TypeError(f'logger must be a logging.Logger object, ' f'but got {type(logger)}') - - # check the type of `meta` - if meta is not None and not isinstance(meta, dict): - raise TypeError(f'meta must be a dict or None, but got {type(meta)}') - - self.model = model - self.batch_processor = batch_processor - self.optimizer = optimizer - self.logger = logger - self.meta = meta - # create work_dir - if mmcv.is_str(work_dir): - self.work_dir = osp.abspath(work_dir) - mmcv.mkdir_or_exist(self.work_dir) - elif work_dir is None: - self.work_dir = None - else: - raise TypeError('"work_dir" must be a str or None') - - # get model name from the model class - if hasattr(self.model, 'module'): - self._model_name = self.model.module.__class__.__name__ - else: - self._model_name = self.model.__class__.__name__ - - self._rank, self._world_size = get_dist_info() - self.timestamp = get_time_str() - self.mode = None - self._hooks = [] - self._epoch = 0 - self._iter = 0 - self._inner_iter = 0 - - if max_epochs is not None and max_iters is not None: - raise ValueError('Only one of `max_epochs` or `max_iters` can be set.') - - self._max_epochs = max_epochs - self._max_iters = max_iters - # TODO: Redesign LogBuffer, it is not flexible and elegant enough - self.log_buffer = LogBuffer() - - @property - def model_name(self): - """str: Name of the model, usually the module class name.""" - return self._model_name - - @property - def rank(self): - """int: Rank of current process. (distributed training)""" - return self._rank - - @property - def world_size(self): - """int: Number of processes participating in the job. - (distributed training)""" - return self._world_size - - @property - def hooks(self): - """list[:obj:`Hook`]: A list of registered hooks.""" - return self._hooks - - @property - def epoch(self): - """int: Current epoch.""" - return self._epoch - - @property - def iter(self): - """int: Current iteration.""" - return self._iter - - @property - def inner_iter(self): - """int: Iteration in an epoch.""" - return self._inner_iter - - @property - def max_epochs(self): - """int: Maximum training epochs.""" - return self._max_epochs - - @property - def max_iters(self): - """int: Maximum training iterations.""" - return self._max_iters - - @abstractmethod - def train(self): - pass - - @abstractmethod - def val(self): - pass - - @abstractmethod - def run(self, data_loaders, workflow, **kwargs): - pass - - @abstractmethod - def save_checkpoint(self, out_dir, filename_tmpl, save_optimizer=True, meta=None, create_symlink=True): - pass - - def current_lr(self): - """Get current learning rates. - - Returns: - list[float] | dict[str, list[float]]: Current learning rates of all - param groups. If the runner has a dict of optimizers, this - method will return a dict. - """ - if isinstance(self.optimizer, torch.optim.Optimizer): - lr = [group['lr'] for group in self.optimizer.param_groups] - elif isinstance(self.optimizer, dict): - lr = dict() - for name, optim in self.optimizer.items(): - lr[name] = [group['lr'] for group in optim.param_groups] - else: - raise RuntimeError('lr is not applicable because optimizer does not exist.') - return lr - - def current_momentum(self): - """Get current momentums. - - Returns: - list[float] | dict[str, list[float]]: Current momentums of all - param groups. If the runner has a dict of optimizers, this - method will return a dict. - """ - - def _get_momentum(optimizer): - momentums = [] - for group in optimizer.param_groups: - if 'momentum' in group.keys(): - momentums.append(group['momentum']) - elif 'betas' in group.keys(): - momentums.append(group['betas'][0]) - else: - momentums.append(0) - return momentums - - if self.optimizer is None: - raise RuntimeError('momentum is not applicable because optimizer does not exist.') - elif isinstance(self.optimizer, torch.optim.Optimizer): - momentums = _get_momentum(self.optimizer) - elif isinstance(self.optimizer, dict): - momentums = dict() - for name, optim in self.optimizer.items(): - momentums[name] = _get_momentum(optim) - return momentums - - def register_hook(self, hook, priority='NORMAL'): - """Register a hook into the hook list. - - The hook will be inserted into a priority queue, with the specified - priority (See :class:`Priority` for details of priorities). - For hooks with the same priority, they will be triggered in the same - order as they are registered. - - Args: - hook (:obj:`Hook`): The hook to be registered. - priority (int or str or :obj:`Priority`): Hook priority. - Lower value means higher priority. - """ - assert isinstance(hook, Hook) - if hasattr(hook, 'priority'): - raise ValueError('"priority" is a reserved attribute for hooks') - priority = get_priority(priority) - hook.priority = priority - # insert the hook to a sorted list - inserted = False - for i in range(len(self._hooks) - 1, -1, -1): - if priority >= self._hooks[i].priority: - self._hooks.insert(i + 1, hook) - inserted = True - break - if not inserted: - self._hooks.insert(0, hook) - - def register_hook_from_cfg(self, hook_cfg): - """Register a hook from its cfg. - - Args: - hook_cfg (dict): Hook config. It should have at least keys 'type' - and 'priority' indicating its type and priority. - - Notes: - The specific hook class to register should not use 'type' and - 'priority' arguments during initialization. - """ - hook_cfg = hook_cfg.copy() - priority = hook_cfg.pop('priority', 'NORMAL') - hook = mmcv.build_from_cfg(hook_cfg, HOOKS) - self.register_hook(hook, priority=priority) - - def call_hook(self, fn_name): - """Call all hooks. - - Args: - fn_name (str): The function name in each hook to be called, such as - "before_train_epoch". - """ - for hook in self._hooks: - getattr(hook, fn_name)(self) - - def get_hook_info(self): - # Get hooks info in each stage - stage_hook_map = {stage: [] for stage in Hook.stages} - for hook in self.hooks: - try: - priority = Priority(hook.priority).name - except ValueError: - priority = hook.priority - classname = hook.__class__.__name__ - hook_info = f'({priority:<12}) {classname:<35}' - for trigger_stage in hook.get_triggered_stages(): - stage_hook_map[trigger_stage].append(hook_info) - - stage_hook_infos = [] - for stage in Hook.stages: - hook_infos = stage_hook_map[stage] - if len(hook_infos) > 0: - info = f'{stage}:\n' - info += '\n'.join(hook_infos) - info += '\n -------------------- ' - stage_hook_infos.append(info) - return '\n'.join(stage_hook_infos) - - def load_checkpoint(self, filename, map_location='cpu', strict=False, revise_keys=[(r'^module.', '')]): - return load_checkpoint(self.model, filename, map_location, strict, self.logger, revise_keys=revise_keys) - - def resume(self, checkpoint, resume_optimizer=True, map_location='default'): - if map_location == 'default': - if torch.cuda.is_available(): - device_id = torch.cuda.current_device() - checkpoint = self.load_checkpoint( - checkpoint, map_location=lambda storage, loc: storage.cuda(device_id) - ) - else: - checkpoint = self.load_checkpoint(checkpoint) - else: - checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) - - self._epoch = checkpoint['meta']['epoch'] - self._iter = checkpoint['meta']['iter'] - if self.meta is None: - self.meta = {} - self.meta.setdefault('hook_msgs', {}) - # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages - self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) - - # Re-calculate the number of iterations when resuming - # models with different number of GPUs - if 'config' in checkpoint['meta']: - config = mmcv.Config.fromstring(checkpoint['meta']['config'], file_format='.py') - previous_gpu_ids = config.get('gpu_ids', None) - if previous_gpu_ids and len(previous_gpu_ids) > 0 and len(previous_gpu_ids) != self.world_size: - self._iter = int(self._iter * len(previous_gpu_ids) / self.world_size) - self.logger.info('the iteration number is changed due to ' 'change of GPU number') - - # resume meta information meta - self.meta = checkpoint['meta'] - - if 'optimizer' in checkpoint and resume_optimizer: - if isinstance(self.optimizer, Optimizer): - self.optimizer.load_state_dict(checkpoint['optimizer']) - elif isinstance(self.optimizer, dict): - for k in self.optimizer.keys(): - self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) - else: - raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') - - self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) - - def register_lr_hook(self, lr_config): - if lr_config is None: - return - elif isinstance(lr_config, dict): - assert 'policy' in lr_config - policy_type = lr_config.pop('policy') - # If the type of policy is all in lower case, e.g., 'cyclic', - # then its first letter will be capitalized, e.g., to be 'Cyclic'. - # This is for the convenient usage of Lr updater. - # Since this is not applicable for ` - # CosineAnnealingLrUpdater`, - # the string will not be changed if it contains capital letters. - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + 'LrUpdaterHook' - lr_config['type'] = hook_type - hook = mmcv.build_from_cfg(lr_config, HOOKS) - else: - hook = lr_config - self.register_hook(hook, priority='VERY_HIGH') - - def register_momentum_hook(self, momentum_config): - if momentum_config is None: - return - if isinstance(momentum_config, dict): - assert 'policy' in momentum_config - policy_type = momentum_config.pop('policy') - # If the type of policy is all in lower case, e.g., 'cyclic', - # then its first letter will be capitalized, e.g., to be 'Cyclic'. - # This is for the convenient usage of momentum updater. - # Since this is not applicable for - # `CosineAnnealingMomentumUpdater`, - # the string will not be changed if it contains capital letters. - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + 'MomentumUpdaterHook' - momentum_config['type'] = hook_type - hook = mmcv.build_from_cfg(momentum_config, HOOKS) - else: - hook = momentum_config - self.register_hook(hook, priority='HIGH') - - def register_optimizer_hook(self, optimizer_config): - if optimizer_config is None: - return - if isinstance(optimizer_config, dict): - optimizer_config.setdefault('type', 'OptimizerHook') - hook = mmcv.build_from_cfg(optimizer_config, HOOKS) - else: - hook = optimizer_config - self.register_hook(hook, priority='ABOVE_NORMAL') - - def register_checkpoint_hook(self, checkpoint_config): - if checkpoint_config is None: - return - if isinstance(checkpoint_config, dict): - checkpoint_config.setdefault('type', 'CheckpointHook') - hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) - else: - hook = checkpoint_config - self.register_hook(hook, priority='NORMAL') - - def register_logger_hooks(self, log_config): - if log_config is None: - return - log_interval = log_config['interval'] - for info in log_config['hooks']: - logger_hook = mmcv.build_from_cfg(info, HOOKS, default_args=dict(interval=log_interval)) - self.register_hook(logger_hook, priority='VERY_LOW') - - def register_timer_hook(self, timer_config): - if timer_config is None: - return - if isinstance(timer_config, dict): - timer_config_ = copy.deepcopy(timer_config) - hook = mmcv.build_from_cfg(timer_config_, HOOKS) - else: - hook = timer_config - self.register_hook(hook, priority='LOW') - - def register_custom_hooks(self, custom_config): - if custom_config is None: - return - - if not isinstance(custom_config, list): - custom_config = [custom_config] - - for item in custom_config: - if isinstance(item, dict): - self.register_hook_from_cfg(item) - else: - self.register_hook(item, priority='NORMAL') - - def register_profiler_hook(self, profiler_config): - if profiler_config is None: - return - if isinstance(profiler_config, dict): - profiler_config.setdefault('type', 'ProfilerHook') - hook = mmcv.build_from_cfg(profiler_config, HOOKS) - else: - hook = profiler_config - self.register_hook(hook) - - def register_training_hooks( - self, - lr_config, - optimizer_config=None, - checkpoint_config=None, - log_config=None, - momentum_config=None, - timer_config=dict(type='IterTimerHook'), - custom_hooks_config=None, - ): - """Register default and custom hooks for training. - - Default and custom hooks include: - - +----------------------+-------------------------+ - | Hooks | Priority | - +======================+=========================+ - | LrUpdaterHook | VERY_HIGH (10) | - +----------------------+-------------------------+ - | MomentumUpdaterHook | HIGH (30) | - +----------------------+-------------------------+ - | OptimizerStepperHook | ABOVE_NORMAL (40) | - +----------------------+-------------------------+ - | CheckpointSaverHook | NORMAL (50) | - +----------------------+-------------------------+ - | IterTimerHook | LOW (70) | - +----------------------+-------------------------+ - | LoggerHook(s) | VERY_LOW (90) | - +----------------------+-------------------------+ - | CustomHook(s) | defaults to NORMAL (50) | - +----------------------+-------------------------+ - - If custom hooks have same priority with default hooks, custom hooks - will be triggered after default hooks. - """ - self.register_lr_hook(lr_config) - self.register_momentum_hook(momentum_config) - self.register_optimizer_hook(optimizer_config) - self.register_checkpoint_hook(checkpoint_config) - self.register_timer_hook(timer_config) - self.register_logger_hooks(log_config) - self.register_custom_hooks(custom_hooks_config) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py deleted file mode 100644 index aaebf844ced3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy - -from ..utils import Registry - -RUNNERS = Registry('runner') -RUNNER_BUILDERS = Registry('runner builder') - - -def build_runner_constructor(cfg): - return RUNNER_BUILDERS.build(cfg) - - -def build_runner(cfg, default_args=None): - runner_cfg = copy.deepcopy(cfg) - constructor_type = runner_cfg.pop('constructor', 'DefaultRunnerConstructor') - runner_constructor = build_runner_constructor( - dict(type=constructor_type, runner_cfg=runner_cfg, default_args=default_args) - ) - runner = runner_constructor() - return runner diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py deleted file mode 100644 index 479da4f51b26..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py +++ /dev/null @@ -1,670 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import io -import os -import os.path as osp -import pkgutil -import re -import time -import warnings -from collections import OrderedDict -from importlib import import_module -from tempfile import TemporaryDirectory - -import torch -import torchvision -from torch.optim import Optimizer -from torch.utils import model_zoo - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -from ..fileio import FileClient -from ..fileio import load as load_file -from ..parallel import is_module_wrapper -from ..utils import mkdir_or_exist -from .dist_utils import get_dist_info - -ENV_MMCV_HOME = 'MMCV_HOME' -ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' -DEFAULT_CACHE_DIR = '~/.cache' - - -def _get_mmcv_home(): - mmcv_home = os.path.expanduser( - os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) - ) - - mkdir_or_exist(mmcv_home) - return mmcv_home - - -def load_state_dict(module, state_dict, strict=False, logger=None): - """Load state_dict to a module. - - This method is modified from :meth:`torch.nn.Module.load_state_dict`. - Default value for ``strict`` is set to ``False`` and the message for - param mismatch will be shown even if strict is False. - - Args: - module (Module): Module that receives the state_dict. - state_dict (OrderedDict): Weights. - strict (bool): whether to strictly enforce that the keys - in :attr:`state_dict` match the keys returned by this module's - :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. - logger (:obj:`logging.Logger`, optional): Logger to log the error - message. If not specified, print function will be used. - """ - unexpected_keys = [] - all_missing_keys = [] - err_msg = [] - - metadata = getattr(state_dict, '_metadata', None) - state_dict = state_dict.copy() - if metadata is not None: - state_dict._metadata = metadata - - # use _load_from_state_dict to enable checkpoint version control - def load(module, prefix=''): - # recursively check parallel module in case that the model has a - # complicated structure, e.g., nn.Module(nn.Module(DDP)) - if is_module_wrapper(module): - module = module.module - local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) - module._load_from_state_dict( - state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg - ) - for name, child in module._modules.items(): - if child is not None: - load(child, prefix + name + '.') - - load(module) - load = None # break load->load reference cycle - - # ignore "num_batches_tracked" of BN layers - missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] - - if unexpected_keys: - err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') - if missing_keys: - err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') - - rank, _ = get_dist_info() - if len(err_msg) > 0 and rank == 0: - err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') - err_msg = '\n'.join(err_msg) - if strict: - raise RuntimeError(err_msg) - elif logger is not None: - logger.warning(err_msg) - else: - print(err_msg) - - -def get_torchvision_models(): - model_urls = dict() - for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): - if ispkg: - continue - _zoo = import_module(f'torchvision.models.{name}') - if hasattr(_zoo, 'model_urls'): - _urls = getattr(_zoo, 'model_urls') - model_urls.update(_urls) - return model_urls - - -def get_external_models(): - mmcv_home = _get_mmcv_home() - default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') - default_urls = load_file(default_json_path) - assert isinstance(default_urls, dict) - external_json_path = osp.join(mmcv_home, 'open_mmlab.json') - if osp.exists(external_json_path): - external_urls = load_file(external_json_path) - assert isinstance(external_urls, dict) - default_urls.update(external_urls) - - return default_urls - - -def get_mmcls_models(): - mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') - mmcls_urls = load_file(mmcls_json_path) - - return mmcls_urls - - -def get_deprecated_model_names(): - deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') - deprecate_urls = load_file(deprecate_json_path) - assert isinstance(deprecate_urls, dict) - - return deprecate_urls - - -def _process_mmcls_checkpoint(checkpoint): - state_dict = checkpoint['state_dict'] - new_state_dict = OrderedDict() - for k, v in state_dict.items(): - if k.startswith('backbone.'): - new_state_dict[k[9:]] = v - new_checkpoint = dict(state_dict=new_state_dict) - - return new_checkpoint - - -class CheckpointLoader: - """A general checkpoint loader to manage all schemes.""" - - _schemes = {} - - @classmethod - def _register_scheme(cls, prefixes, loader, force=False): - if isinstance(prefixes, str): - prefixes = [prefixes] - else: - assert isinstance(prefixes, (list, tuple)) - for prefix in prefixes: - if (prefix not in cls._schemes) or force: - cls._schemes[prefix] = loader - else: - raise KeyError( - f'{prefix} is already registered as a loader backend, ' - 'add "force=True" if you want to override it' - ) - # sort, longer prefixes take priority - cls._schemes = OrderedDict(sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) - - @classmethod - def register_scheme(cls, prefixes, loader=None, force=False): - """Register a loader to CheckpointLoader. - - This method can be used as a normal class method or a decorator. - - Args: - prefixes (str or list[str] or tuple[str]): - The prefix of the registered loader. - loader (function, optional): The loader function to be registered. - When this method is used as a decorator, loader is None. - Defaults to None. - force (bool, optional): Whether to override the loader - if the prefix has already been registered. Defaults to False. - """ - - if loader is not None: - cls._register_scheme(prefixes, loader, force=force) - return - - def _register(loader_cls): - cls._register_scheme(prefixes, loader_cls, force=force) - return loader_cls - - return _register - - @classmethod - def _get_checkpoint_loader(cls, path): - """Finds a loader that supports the given path. Falls back to the local - loader if no other loader is found. - - Args: - path (str): checkpoint path - - Returns: - loader (function): checkpoint loader - """ - - for p in cls._schemes: - if path.startswith(p): - return cls._schemes[p] - - @classmethod - def load_checkpoint(cls, filename, map_location=None, logger=None): - """load checkpoint through URL scheme path. - - Args: - filename (str): checkpoint file name with given prefix - map_location (str, optional): Same as :func:`torch.load`. - Default: None - logger (:mod:`logging.Logger`, optional): The logger for message. - Default: None - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - - checkpoint_loader = cls._get_checkpoint_loader(filename) - class_name = checkpoint_loader.__name__ - mmcv.print_log(f'load checkpoint from {class_name[10:]} path: {filename}', logger) - return checkpoint_loader(filename, map_location) - - -@CheckpointLoader.register_scheme(prefixes='') -def load_from_local(filename, map_location): - """load checkpoint by local file path. - - Args: - filename (str): local checkpoint file path - map_location (str, optional): Same as :func:`torch.load`. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - - if not osp.isfile(filename): - raise IOError(f'{filename} is not a checkpoint file') - checkpoint = torch.load(filename, map_location=map_location) - return checkpoint - - -@CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) -def load_from_http(filename, map_location=None, model_dir=None): - """load checkpoint through HTTP or HTTPS scheme path. In distributed - setting, this function only download checkpoint at local rank 0. - - Args: - filename (str): checkpoint file path with modelzoo or - torchvision prefix - map_location (str, optional): Same as :func:`torch.load`. - model_dir (string, optional): directory in which to save the object, - Default: None - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - rank, world_size = get_dist_info() - rank = int(os.environ.get('LOCAL_RANK', rank)) - if rank == 0: - checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) - if world_size > 1: - torch.distributed.barrier() - if rank > 0: - checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) - return checkpoint - - -@CheckpointLoader.register_scheme(prefixes='pavi://') -def load_from_pavi(filename, map_location=None): - """load checkpoint through the file path prefixed with pavi. In distributed - setting, this function download ckpt at all ranks to different temporary - directories. - - Args: - filename (str): checkpoint file path with pavi prefix - map_location (str, optional): Same as :func:`torch.load`. - Default: None - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - assert filename.startswith('pavi://'), f'Expected filename startswith `pavi://`, but get {filename}' - model_path = filename[7:] - - try: - from pavi import modelcloud - except ImportError: - raise ImportError('Please install pavi to load checkpoint from modelcloud.') - - model = modelcloud.get(model_path) - with TemporaryDirectory() as tmp_dir: - downloaded_file = osp.join(tmp_dir, model.name) - model.download(downloaded_file) - checkpoint = torch.load(downloaded_file, map_location=map_location) - return checkpoint - - -@CheckpointLoader.register_scheme(prefixes='s3://') -def load_from_ceph(filename, map_location=None, backend='petrel'): - """load checkpoint through the file path prefixed with s3. In distributed - setting, this function download ckpt at all ranks to different temporary - directories. - - Args: - filename (str): checkpoint file path with s3 prefix - map_location (str, optional): Same as :func:`torch.load`. - backend (str, optional): The storage backend type. Options are 'ceph', - 'petrel'. Default: 'petrel'. - - .. warning:: - :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, - please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - allowed_backends = ['ceph', 'petrel'] - if backend not in allowed_backends: - raise ValueError(f'Load from Backend {backend} is not supported.') - - if backend == 'ceph': - warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') - - # CephClient and PetrelBackend have the same prefix 's3://' and the latter - # will be chosen as default. If PetrelBackend can not be instantiated - # successfully, the CephClient will be chosen. - try: - file_client = FileClient(backend=backend) - except ImportError: - allowed_backends.remove(backend) - file_client = FileClient(backend=allowed_backends[0]) - - with io.BytesIO(file_client.get(filename)) as buffer: - checkpoint = torch.load(buffer, map_location=map_location) - return checkpoint - - -@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) -def load_from_torchvision(filename, map_location=None): - """load checkpoint through the file path prefixed with modelzoo or - torchvision. - - Args: - filename (str): checkpoint file path with modelzoo or - torchvision prefix - map_location (str, optional): Same as :func:`torch.load`. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - model_urls = get_torchvision_models() - if filename.startswith('modelzoo://'): - warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') - model_name = filename[11:] - else: - model_name = filename[14:] - return load_from_http(model_urls[model_name], map_location=map_location) - - -@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) -def load_from_openmmlab(filename, map_location=None): - """load checkpoint through the file path prefixed with open-mmlab or - openmmlab. - - Args: - filename (str): checkpoint file path with open-mmlab or - openmmlab prefix - map_location (str, optional): Same as :func:`torch.load`. - Default: None - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - - model_urls = get_external_models() - prefix_str = 'open-mmlab://' - if filename.startswith(prefix_str): - model_name = filename[13:] - else: - model_name = filename[12:] - prefix_str = 'openmmlab://' - - deprecated_urls = get_deprecated_model_names() - if model_name in deprecated_urls: - warnings.warn( - f'{prefix_str}{model_name} is deprecated in favor ' f'of {prefix_str}{deprecated_urls[model_name]}' - ) - model_name = deprecated_urls[model_name] - model_url = model_urls[model_name] - # check if is url - if model_url.startswith(('http://', 'https://')): - checkpoint = load_from_http(model_url, map_location=map_location) - else: - filename = osp.join(_get_mmcv_home(), model_url) - if not osp.isfile(filename): - raise IOError(f'{filename} is not a checkpoint file') - checkpoint = torch.load(filename, map_location=map_location) - return checkpoint - - -@CheckpointLoader.register_scheme(prefixes='mmcls://') -def load_from_mmcls(filename, map_location=None): - """load checkpoint through the file path prefixed with mmcls. - - Args: - filename (str): checkpoint file path with mmcls prefix - map_location (str, optional): Same as :func:`torch.load`. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - - model_urls = get_mmcls_models() - model_name = filename[8:] - checkpoint = load_from_http(model_urls[model_name], map_location=map_location) - checkpoint = _process_mmcls_checkpoint(checkpoint) - return checkpoint - - -def _load_checkpoint(filename, map_location=None, logger=None): - """Load checkpoint from somewhere (modelzoo, file, url). - - Args: - filename (str): Accept local filepath, URL, ``torchvision://xxx``, - ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for - details. - map_location (str, optional): Same as :func:`torch.load`. - Default: None. - logger (:mod:`logging.Logger`, optional): The logger for error message. - Default: None - - Returns: - dict or OrderedDict: The loaded checkpoint. It can be either an - OrderedDict storing model weights or a dict containing other - information, which depends on the checkpoint. - """ - return CheckpointLoader.load_checkpoint(filename, map_location, logger) - - -def _load_checkpoint_with_prefix(prefix, filename, map_location=None): - """Load partial pretrained model with specific prefix. - - Args: - prefix (str): The prefix of sub-module. - filename (str): Accept local filepath, URL, ``torchvision://xxx``, - ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for - details. - map_location (str | None): Same as :func:`torch.load`. Default: None. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - - checkpoint = _load_checkpoint(filename, map_location=map_location) - - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - else: - state_dict = checkpoint - if not prefix.endswith('.'): - prefix += '.' - prefix_len = len(prefix) - - state_dict = {k[prefix_len:]: v for k, v in state_dict.items() if k.startswith(prefix)} - - assert state_dict, f'{prefix} is not in the pretrained model' - return state_dict - - -def load_checkpoint(model, filename, map_location=None, strict=False, logger=None, revise_keys=[(r'^module\.', '')]): - """Load checkpoint from a file or URI. - - Args: - model (Module): Module to load checkpoint. - filename (str): Accept local filepath, URL, ``torchvision://xxx``, - ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for - details. - map_location (str): Same as :func:`torch.load`. - strict (bool): Whether to allow different params for the model and - checkpoint. - logger (:mod:`logging.Logger` or None): The logger for error message. - revise_keys (list): A list of customized keywords to modify the - state_dict in checkpoint. Each item is a (pattern, replacement) - pair of the regular expression operations. Default: strip - the prefix 'module.' by [(r'^module\\.', '')]. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - checkpoint = _load_checkpoint(filename, map_location, logger) - # OrderedDict is a subclass of dict - if not isinstance(checkpoint, dict): - raise RuntimeError(f'No state_dict found in checkpoint file {filename}') - # get state_dict from checkpoint - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - else: - state_dict = checkpoint - - # strip prefix of state_dict - metadata = getattr(state_dict, '_metadata', OrderedDict()) - for p, r in revise_keys: - state_dict = OrderedDict({re.sub(p, r, k): v for k, v in state_dict.items()}) - # Keep metadata in state_dict - state_dict._metadata = metadata - - # load state_dict - load_state_dict(model, state_dict, strict, logger) - return checkpoint - - -def weights_to_cpu(state_dict): - """Copy a model state_dict to cpu. - - Args: - state_dict (OrderedDict): Model weights on GPU. - - Returns: - OrderedDict: Model weights on GPU. - """ - state_dict_cpu = OrderedDict() - for key, val in state_dict.items(): - state_dict_cpu[key] = val.cpu() - # Keep metadata in state_dict - state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) - return state_dict_cpu - - -def _save_to_state_dict(module, destination, prefix, keep_vars): - """Saves module state to `destination` dictionary. - - This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. - - Args: - module (nn.Module): The module to generate state_dict. - destination (dict): A dict where state will be stored. - prefix (str): The prefix for parameters and buffers used in this - module. - """ - for name, param in module._parameters.items(): - if param is not None: - destination[prefix + name] = param if keep_vars else param.detach() - for name, buf in module._buffers.items(): - # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d - if buf is not None: - destination[prefix + name] = buf if keep_vars else buf.detach() - - -def get_state_dict(module, destination=None, prefix='', keep_vars=False): - """Returns a dictionary containing a whole state of the module. - - Both parameters and persistent buffers (e.g. running averages) are - included. Keys are corresponding parameter and buffer names. - - This method is modified from :meth:`torch.nn.Module.state_dict` to - recursively check parallel module in case that the model has a complicated - structure, e.g., nn.Module(nn.Module(DDP)). - - Args: - module (nn.Module): The module to generate state_dict. - destination (OrderedDict): Returned dict for the state of the - module. - prefix (str): Prefix of the key. - keep_vars (bool): Whether to keep the variable property of the - parameters. Default: False. - - Returns: - dict: A dictionary containing a whole state of the module. - """ - # recursively check parallel module in case that the model has a - # complicated structure, e.g., nn.Module(nn.Module(DDP)) - if is_module_wrapper(module): - module = module.module - - # below is the same as torch.nn.Module.state_dict() - if destination is None: - destination = OrderedDict() - destination._metadata = OrderedDict() - destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) - _save_to_state_dict(module, destination, prefix, keep_vars) - for name, child in module._modules.items(): - if child is not None: - get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) - for hook in module._state_dict_hooks.values(): - hook_result = hook(module, destination, prefix, local_metadata) - if hook_result is not None: - destination = hook_result - return destination - - -def save_checkpoint(model, filename, optimizer=None, meta=None, file_client_args=None): - """Save checkpoint to file. - - The checkpoint will have 3 fields: ``meta``, ``state_dict`` and - ``optimizer``. By default ``meta`` will contain version and time info. - - Args: - model (Module): Module whose params are to be saved. - filename (str): Checkpoint filename. - optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. - meta (dict, optional): Metadata to be saved in checkpoint. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - `New in version 1.3.16.` - """ - if meta is None: - meta = {} - elif not isinstance(meta, dict): - raise TypeError(f'meta must be a dict or None, but got {type(meta)}') - meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) - - if is_module_wrapper(model): - model = model.module - - if hasattr(model, 'CLASSES') and model.CLASSES is not None: - # save class name to the meta - meta.update(CLASSES=model.CLASSES) - - checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} - # save optimizer state dict in the checkpoint - if isinstance(optimizer, Optimizer): - checkpoint['optimizer'] = optimizer.state_dict() - elif isinstance(optimizer, dict): - checkpoint['optimizer'] = {} - for name, optim in optimizer.items(): - checkpoint['optimizer'][name] = optim.state_dict() - - if filename.startswith('pavi://'): - if file_client_args is not None: - raise ValueError( - 'file_client_args should be "None" if filename starts with' f'"pavi://", but got {file_client_args}' - ) - try: - from pavi import exception, modelcloud - except ImportError: - raise ImportError('Please install pavi to load checkpoint from modelcloud.') - model_path = filename[7:] - root = modelcloud.Folder() - model_dir, model_name = osp.split(model_path) - try: - model = modelcloud.get(model_dir) - except exception.NodeNotFoundError: - model = root.create_training_model(model_dir) - with TemporaryDirectory() as tmp_dir: - checkpoint_file = osp.join(tmp_dir, model_name) - with open(checkpoint_file, 'wb') as f: - torch.save(checkpoint, f) - f.flush() - model.create_file(checkpoint_file, name=model_name) - else: - file_client = FileClient.infer_client(file_client_args, filename) - with io.BytesIO() as f: - torch.save(checkpoint, f) - file_client.put(f.getvalue(), filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py deleted file mode 100644 index c840d803f743..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py +++ /dev/null @@ -1,43 +0,0 @@ -from .builder import RUNNER_BUILDERS, RUNNERS - - -@RUNNER_BUILDERS.register_module() -class DefaultRunnerConstructor: - """Default constructor for runners. - - Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. - For example, We can inject some new properties and functions for `Runner`. - - Example: - >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import RUNNER_BUILDERS, build_runner - >>> # Define a new RunnerReconstructor - >>> @RUNNER_BUILDERS.register_module() - >>> class MyRunnerConstructor: - ... def __init__(self, runner_cfg, default_args=None): - ... if not isinstance(runner_cfg, dict): - ... raise TypeError('runner_cfg should be a dict', - ... f'but got {type(runner_cfg)}') - ... self.runner_cfg = runner_cfg - ... self.default_args = default_args - ... - ... def __call__(self): - ... runner = RUNNERS.build(self.runner_cfg, - ... default_args=self.default_args) - ... # Add new properties for existing runner - ... runner.my_name = 'my_runner' - ... runner.my_function = lambda self: print(self.my_name) - ... ... - >>> # build your runner - >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, - ... constructor='MyRunnerConstructor') - >>> runner = build_runner(runner_cfg) - """ - - def __init__(self, runner_cfg, default_args=None): - if not isinstance(runner_cfg, dict): - raise TypeError('runner_cfg should be a dict', f'but got {type(runner_cfg)}') - self.runner_cfg = runner_cfg - self.default_args = default_args - - def __call__(self): - return RUNNERS.build(self.runner_cfg, default_args=self.default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py deleted file mode 100644 index 19799b785be9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import functools -import os -import subprocess -from collections import OrderedDict - -import torch -import torch.multiprocessing as mp -from torch import distributed as dist -from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors - - -def init_dist(launcher, backend='nccl', **kwargs): - if mp.get_start_method(allow_none=True) is None: - mp.set_start_method('spawn') - if launcher == 'pytorch': - _init_dist_pytorch(backend, **kwargs) - elif launcher == 'mpi': - _init_dist_mpi(backend, **kwargs) - elif launcher == 'slurm': - _init_dist_slurm(backend, **kwargs) - else: - raise ValueError(f'Invalid launcher type: {launcher}') - - -def _init_dist_pytorch(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_mpi(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['OMPI_COMM_WORLD_RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_slurm(backend, port=None): - """Initialize slurm distributed training environment. - - If argument ``port`` is not specified, then the master port will be system - environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system - environment variable, then a default port ``29500`` will be used. - - Args: - backend (str): Backend of torch.distributed. - port (int, optional): Master port. Defaults to None. - """ - proc_id = int(os.environ['SLURM_PROCID']) - ntasks = int(os.environ['SLURM_NTASKS']) - node_list = os.environ['SLURM_NODELIST'] - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(proc_id % num_gpus) - addr = subprocess.getoutput(f'scontrol show hostname {node_list} | head -n1') - # specify master port - if port is not None: - os.environ['MASTER_PORT'] = str(port) - elif 'MASTER_PORT' in os.environ: - pass # use MASTER_PORT in the environment variable - else: - # 29500 is torch.distributed default port - os.environ['MASTER_PORT'] = '29500' - # use MASTER_ADDR in the environment variable if it already exists - if 'MASTER_ADDR' not in os.environ: - os.environ['MASTER_ADDR'] = addr - os.environ['WORLD_SIZE'] = str(ntasks) - os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) - os.environ['RANK'] = str(proc_id) - dist.init_process_group(backend=backend) - - -def get_dist_info(): - if dist.is_available() and dist.is_initialized(): - rank = dist.get_rank() - world_size = dist.get_world_size() - else: - rank = 0 - world_size = 1 - return rank, world_size - - -def master_only(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - rank, _ = get_dist_info() - if rank == 0: - return func(*args, **kwargs) - - return wrapper - - -def allreduce_params(params, coalesce=True, bucket_size_mb=-1): - """Allreduce parameters. - - Args: - params (list[torch.Parameters]): List of parameters or buffers of a - model. - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - _, world_size = get_dist_info() - if world_size == 1: - return - params = [param.data for param in params] - if coalesce: - _allreduce_coalesced(params, world_size, bucket_size_mb) - else: - for tensor in params: - dist.all_reduce(tensor.div_(world_size)) - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - """Allreduce gradients. - - Args: - params (list[torch.Parameters]): List of parameters of a model - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - grads = [param.grad.data for param in params if param.requires_grad and param.grad is not None] - _, world_size = get_dist_info() - if world_size == 1: - return - if coalesce: - _allreduce_coalesced(grads, world_size, bucket_size_mb) - else: - for tensor in grads: - dist.all_reduce(tensor.div_(world_size)) - - -def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): - if bucket_size_mb > 0: - bucket_size_bytes = bucket_size_mb * 1024 * 1024 - buckets = _take_tensors(tensors, bucket_size_bytes) - else: - buckets = OrderedDict() - for tensor in tensors: - tp = tensor.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(tensor) - buckets = buckets.values() - - for bucket in buckets: - flat_tensors = _flatten_dense_tensors(bucket) - dist.all_reduce(flat_tensors) - flat_tensors.div_(world_size) - for tensor, synced in zip(bucket, _unflatten_dense_tensors(flat_tensors, bucket)): - tensor.copy_(synced) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py deleted file mode 100644 index ba7a97fa0241..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import platform -import shutil -import time -import warnings - -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from .base_runner import BaseRunner -from .builder import RUNNERS -from .checkpoint import save_checkpoint -from .utils import get_host_info - - -@RUNNERS.register_module() -class EpochBasedRunner(BaseRunner): - """Epoch-based Runner. - - This runner train models epoch by epoch. - """ - - def run_iter(self, data_batch, train_mode, **kwargs): - if self.batch_processor is not None: - outputs = self.batch_processor(self.model, data_batch, train_mode=train_mode, **kwargs) - elif train_mode: - outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) - else: - outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) - if not isinstance(outputs, dict): - raise TypeError('"batch_processor()" or "model.train_step()"' 'and "model.val_step()" must return a dict') - if 'log_vars' in outputs: - self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) - self.outputs = outputs - - def train(self, data_loader, **kwargs): - self.model.train() - self.mode = 'train' - self.data_loader = data_loader - self._max_iters = self._max_epochs * len(self.data_loader) - self.call_hook('before_train_epoch') - time.sleep(2) # Prevent possible deadlock during epoch transition - for i, data_batch in enumerate(self.data_loader): - self._inner_iter = i - self.call_hook('before_train_iter') - self.run_iter(data_batch, train_mode=True, **kwargs) - self.call_hook('after_train_iter') - self._iter += 1 - - self.call_hook('after_train_epoch') - self._epoch += 1 - - @torch.no_grad() - def val(self, data_loader, **kwargs): - self.model.eval() - self.mode = 'val' - self.data_loader = data_loader - self.call_hook('before_val_epoch') - time.sleep(2) # Prevent possible deadlock during epoch transition - for i, data_batch in enumerate(self.data_loader): - self._inner_iter = i - self.call_hook('before_val_iter') - self.run_iter(data_batch, train_mode=False) - self.call_hook('after_val_iter') - - self.call_hook('after_val_epoch') - - def run(self, data_loaders, workflow, max_epochs=None, **kwargs): - """Start running. - - Args: - data_loaders (list[:obj:`DataLoader`]): Dataloaders for training - and validation. - workflow (list[tuple]): A list of (phase, epochs) to specify the - running order and epochs. E.g, [('train', 2), ('val', 1)] means - running 2 epochs for training and 1 epoch for validation, - iteratively. - """ - assert isinstance(data_loaders, list) - assert mmcv.is_list_of(workflow, tuple) - assert len(data_loaders) == len(workflow) - if max_epochs is not None: - warnings.warn( - 'setting max_epochs in run is deprecated, ' 'please set max_epochs in runner_config', - DeprecationWarning, - ) - self._max_epochs = max_epochs - - assert self._max_epochs is not None, 'max_epochs must be specified during instantiation' - - for i, flow in enumerate(workflow): - mode, epochs = flow - if mode == 'train': - self._max_iters = self._max_epochs * len(data_loaders[i]) - break - - work_dir = self.work_dir if self.work_dir is not None else 'NONE' - self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) - self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) - self.logger.info('workflow: %s, max: %d epochs', workflow, self._max_epochs) - self.call_hook('before_run') - - while self.epoch < self._max_epochs: - for i, flow in enumerate(workflow): - mode, epochs = flow - if isinstance(mode, str): # self.train() - if not hasattr(self, mode): - raise ValueError(f'runner has no method named "{mode}" to run an ' 'epoch') - epoch_runner = getattr(self, mode) - else: - raise TypeError('mode in workflow must be a str, but got {}'.format(type(mode))) - - for _ in range(epochs): - if mode == 'train' and self.epoch >= self._max_epochs: - break - epoch_runner(data_loaders[i], **kwargs) - - time.sleep(1) # wait for some hooks like loggers to finish - self.call_hook('after_run') - - def save_checkpoint( - self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True - ): - """Save the checkpoint. - - Args: - out_dir (str): The directory that checkpoints are saved. - filename_tmpl (str, optional): The checkpoint filename template, - which contains a placeholder for the epoch number. - Defaults to 'epoch_{}.pth'. - save_optimizer (bool, optional): Whether to save the optimizer to - the checkpoint. Defaults to True. - meta (dict, optional): The meta information to be saved in the - checkpoint. Defaults to None. - create_symlink (bool, optional): Whether to create a symlink - "latest.pth" to point to the latest checkpoint. - Defaults to True. - """ - if meta is None: - meta = {} - elif not isinstance(meta, dict): - raise TypeError(f'meta should be a dict or None, but got {type(meta)}') - if self.meta is not None: - meta.update(self.meta) - # Note: meta.update(self.meta) should be done before - # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise - # there will be problems with resumed checkpoints. - # More details in https://github.com/open-mmlab/mmcv/pull/1108 - meta.update(epoch=self.epoch + 1, iter=self.iter) - - filename = filename_tmpl.format(self.epoch + 1) - filepath = osp.join(out_dir, filename) - optimizer = self.optimizer if save_optimizer else None - save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) - # in some environments, `os.symlink` is not supported, you may need to - # set `create_symlink` to False - if create_symlink: - dst_file = osp.join(out_dir, 'latest.pth') - if platform.system() != 'Windows': - mmcv.symlink(filename, dst_file) - else: - shutil.copy(filepath, dst_file) - - -@RUNNERS.register_module() -class Runner(EpochBasedRunner): - """Deprecated name of EpochBasedRunner.""" - - def __init__(self, *args, **kwargs): - warnings.warn('Runner was deprecated, please use EpochBasedRunner instead') - super().__init__(*args, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py deleted file mode 100644 index e205ab42af90..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py +++ /dev/null @@ -1,388 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import functools -import warnings -from collections import abc -from inspect import getfullargspec - -import numpy as np -import torch -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version -from .dist_utils import allreduce_grads as _allreduce_grads - -try: - # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported - # and used; otherwise, auto fp16 will adopt mmcv's implementation. - # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 - # manually, so the behavior may not be consistent with real amp. - from torch.cuda.amp import autocast -except ImportError: - pass - - -def cast_tensor_type(inputs, src_type, dst_type): - """Recursively convert Tensor in inputs from src_type to dst_type. - - Args: - inputs: Inputs that to be casted. - src_type (torch.dtype): Source type.. - dst_type (torch.dtype): Destination type. - - Returns: - The same type with inputs, but all contained Tensors have been cast. - """ - if isinstance(inputs, nn.Module): - return inputs - elif isinstance(inputs, torch.Tensor): - return inputs.to(dst_type) - elif isinstance(inputs, str): - return inputs - elif isinstance(inputs, np.ndarray): - return inputs - elif isinstance(inputs, abc.Mapping): - return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()}) - elif isinstance(inputs, abc.Iterable): - return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs) - else: - return inputs - - -def auto_fp16(apply_to=None, out_fp32=False): - """Decorator to enable fp16 training automatically. - - This decorator is useful when you write custom modules and want to support - mixed precision training. If inputs arguments are fp32 tensors, they will - be converted to fp16 automatically. Arguments other than fp32 tensors are - ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the - backend, otherwise, original mmcv implementation will be adopted. - - Args: - apply_to (Iterable, optional): The argument names to be converted. - `None` indicates all arguments. - out_fp32 (bool): Whether to convert the output back to fp32. - - Example: - - >>> import torch.nn as nn - >>> class MyModule1(nn.Module): - >>> - >>> # Convert x and y to fp16 - >>> @auto_fp16() - >>> def forward(self, x, y): - >>> pass - - >>> import torch.nn as nn - >>> class MyModule2(nn.Module): - >>> - >>> # convert pred to fp16 - >>> @auto_fp16(apply_to=('pred', )) - >>> def do_something(self, pred, others): - >>> pass - """ - - def auto_fp16_wrapper(old_func): - @functools.wraps(old_func) - def new_func(*args, **kwargs): - # check if the module has set the attribute `fp16_enabled`, if not, - # just fallback to the original method. - if not isinstance(args[0], torch.nn.Module): - raise TypeError('@auto_fp16 can only be used to decorate the ' 'method of nn.Module') - if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): - return old_func(*args, **kwargs) - - # get the arg spec of the decorated method - args_info = getfullargspec(old_func) - # get the argument names to be casted - args_to_cast = args_info.args if apply_to is None else apply_to - # convert the args that need to be processed - new_args = [] - # NOTE: default args are not taken into consideration - if args: - arg_names = args_info.args[: len(args)] - for i, arg_name in enumerate(arg_names): - if arg_name in args_to_cast: - new_args.append(cast_tensor_type(args[i], torch.float, torch.half)) - else: - new_args.append(args[i]) - # convert the kwargs that need to be processed - new_kwargs = {} - if kwargs: - for arg_name, arg_value in kwargs.items(): - if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.float, torch.half) - else: - new_kwargs[arg_name] = arg_value - # apply converted arguments to the decorated method - if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): - with autocast(enabled=True): - output = old_func(*new_args, **new_kwargs) - else: - output = old_func(*new_args, **new_kwargs) - # cast the results back to fp32 if necessary - if out_fp32: - output = cast_tensor_type(output, torch.half, torch.float) - return output - - return new_func - - return auto_fp16_wrapper - - -def force_fp32(apply_to=None, out_fp16=False): - """Decorator to convert input arguments to fp32 in force. - - This decorator is useful when you write custom modules and want to support - mixed precision training. If there are some inputs that must be processed - in fp32 mode, then this decorator can handle it. If inputs arguments are - fp16 tensors, they will be converted to fp32 automatically. Arguments other - than fp16 tensors are ignored. If you are using PyTorch >= 1.6, - torch.cuda.amp is used as the backend, otherwise, original mmcv - implementation will be adopted. - - Args: - apply_to (Iterable, optional): The argument names to be converted. - `None` indicates all arguments. - out_fp16 (bool): Whether to convert the output back to fp16. - - Example: - - >>> import torch.nn as nn - >>> class MyModule1(nn.Module): - >>> - >>> # Convert x and y to fp32 - >>> @force_fp32() - >>> def loss(self, x, y): - >>> pass - - >>> import torch.nn as nn - >>> class MyModule2(nn.Module): - >>> - >>> # convert pred to fp32 - >>> @force_fp32(apply_to=('pred', )) - >>> def post_process(self, pred, others): - >>> pass - """ - - def force_fp32_wrapper(old_func): - @functools.wraps(old_func) - def new_func(*args, **kwargs): - # check if the module has set the attribute `fp16_enabled`, if not, - # just fallback to the original method. - if not isinstance(args[0], torch.nn.Module): - raise TypeError('@force_fp32 can only be used to decorate the ' 'method of nn.Module') - if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): - return old_func(*args, **kwargs) - # get the arg spec of the decorated method - args_info = getfullargspec(old_func) - # get the argument names to be casted - args_to_cast = args_info.args if apply_to is None else apply_to - # convert the args that need to be processed - new_args = [] - if args: - arg_names = args_info.args[: len(args)] - for i, arg_name in enumerate(arg_names): - if arg_name in args_to_cast: - new_args.append(cast_tensor_type(args[i], torch.half, torch.float)) - else: - new_args.append(args[i]) - # convert the kwargs that need to be processed - new_kwargs = dict() - if kwargs: - for arg_name, arg_value in kwargs.items(): - if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float) - else: - new_kwargs[arg_name] = arg_value - # apply converted arguments to the decorated method - if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): - with autocast(enabled=False): - output = old_func(*new_args, **new_kwargs) - else: - output = old_func(*new_args, **new_kwargs) - # cast the results back to fp32 if necessary - if out_fp16: - output = cast_tensor_type(output, torch.float, torch.half) - return output - - return new_func - - return force_fp32_wrapper - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - warnings.warning( - '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' - 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads' - ) - _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) - - -def wrap_fp16_model(model): - """Wrap the FP32 model to FP16. - - If you are using PyTorch >= 1.6, torch.cuda.amp is used as the - backend, otherwise, original mmcv implementation will be adopted. - - For PyTorch >= 1.6, this function will - 1. Set fp16 flag inside the model to True. - - Otherwise: - 1. Convert FP32 model to FP16. - 2. Remain some necessary layers to be FP32, e.g., normalization layers. - 3. Set `fp16_enabled` flag inside the model to True. - - Args: - model (nn.Module): Model in FP32. - """ - if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.6.0'): - # convert model to fp16 - model.half() - # patch the normalization layers to make it work in fp32 mode - patch_norm_fp32(model) - # set `fp16_enabled` flag - for m in model.modules(): - if hasattr(m, 'fp16_enabled'): - m.fp16_enabled = True - - -def patch_norm_fp32(module): - """Recursively convert normalization layers from FP16 to FP32. - - Args: - module (nn.Module): The modules to be converted in FP16. - - Returns: - nn.Module: The converted module, the normalization layers have been - converted to FP32. - """ - if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): - module.float() - if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': - module.forward = patch_forward_method(module.forward, torch.half, torch.float) - for child in module.children(): - patch_norm_fp32(child) - return module - - -def patch_forward_method(func, src_type, dst_type, convert_output=True): - """Patch the forward method of a module. - - Args: - func (callable): The original forward method. - src_type (torch.dtype): Type of input arguments to be converted from. - dst_type (torch.dtype): Type of input arguments to be converted to. - convert_output (bool): Whether to convert the output back to src_type. - - Returns: - callable: The patched forward method. - """ - - def new_forward(*args, **kwargs): - output = func(*cast_tensor_type(args, src_type, dst_type), **cast_tensor_type(kwargs, src_type, dst_type)) - if convert_output: - output = cast_tensor_type(output, dst_type, src_type) - return output - - return new_forward - - -class LossScaler: - """Class that manages loss scaling in mixed precision training which - supports both dynamic or static mode. - - The implementation refers to - https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. - Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. - It's important to understand how :class:`LossScaler` operates. - Loss scaling is designed to combat the problem of underflowing - gradients encountered at long times when training fp16 networks. - Dynamic loss scaling begins by attempting a very high loss - scale. Ironically, this may result in OVERflowing gradients. - If overflowing gradients are encountered, :class:`FP16_Optimizer` then - skips the update step for this particular iteration/minibatch, - and :class:`LossScaler` adjusts the loss scale to a lower value. - If a certain number of iterations occur without overflowing gradients - detected,:class:`LossScaler` increases the loss scale once more. - In this way :class:`LossScaler` attempts to "ride the edge" of always - using the highest loss scale possible without incurring overflow. - - Args: - init_scale (float): Initial loss scale value, default: 2**32. - scale_factor (float): Factor used when adjusting the loss scale. - Default: 2. - mode (str): Loss scaling mode. 'dynamic' or 'static' - scale_window (int): Number of consecutive iterations without an - overflow to wait before increasing the loss scale. Default: 1000. - """ - - def __init__(self, init_scale=2 ** 32, mode='dynamic', scale_factor=2.0, scale_window=1000): - self.cur_scale = init_scale - self.cur_iter = 0 - assert mode in ('dynamic', 'static'), 'mode can only be dynamic or static' - self.mode = mode - self.last_overflow_iter = -1 - self.scale_factor = scale_factor - self.scale_window = scale_window - - def has_overflow(self, params): - """Check if params contain overflow.""" - if self.mode != 'dynamic': - return False - for p in params: - if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): - return True - return False - - def _has_inf_or_nan(x): - """Check if params contain NaN.""" - try: - cpu_sum = float(x.float().sum()) - except RuntimeError as instance: - if 'value cannot be converted' not in instance.args[0]: - raise - return True - else: - if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum: - return True - return False - - def update_scale(self, overflow): - """update the current loss scale value when overflow happens.""" - if self.mode != 'dynamic': - return - if overflow: - self.cur_scale = max(self.cur_scale / self.scale_factor, 1) - self.last_overflow_iter = self.cur_iter - else: - if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0: - self.cur_scale *= self.scale_factor - self.cur_iter += 1 - - def state_dict(self): - """Returns the state of the scaler as a :class:`dict`.""" - return dict( - cur_scale=self.cur_scale, - cur_iter=self.cur_iter, - mode=self.mode, - last_overflow_iter=self.last_overflow_iter, - scale_factor=self.scale_factor, - scale_window=self.scale_window, - ) - - def load_state_dict(self, state_dict): - """Loads the loss_scaler state dict. - - Args: - state_dict (dict): scaler state. - """ - self.cur_scale = state_dict['cur_scale'] - self.cur_iter = state_dict['cur_iter'] - self.mode = state_dict['mode'] - self.last_overflow_iter = state_dict['last_overflow_iter'] - self.scale_factor = state_dict['scale_factor'] - self.scale_window = state_dict['scale_window'] - - @property - def loss_scale(self): - return self.cur_scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py deleted file mode 100644 index 6b1b86fba36e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .checkpoint import CheckpointHook -from .closure import ClosureHook -from .ema import EMAHook -from .evaluation import DistEvalHook, EvalHook -from .hook import HOOKS, Hook -from .iter_timer import IterTimerHook -from .logger import ( - DvcliveLoggerHook, - LoggerHook, - MlflowLoggerHook, - NeptuneLoggerHook, - PaviLoggerHook, - TensorboardLoggerHook, - TextLoggerHook, - WandbLoggerHook, -) -from .lr_updater import LrUpdaterHook -from .memory import EmptyCacheHook -from .momentum_updater import MomentumUpdaterHook -from .optimizer import ( - Fp16OptimizerHook, - GradientCumulativeFp16OptimizerHook, - GradientCumulativeOptimizerHook, - OptimizerHook, -) -from .profiler import ProfilerHook -from .sampler_seed import DistSamplerSeedHook -from .sync_buffer import SyncBuffersHook - -__all__ = [ - 'HOOKS', - 'Hook', - 'CheckpointHook', - 'ClosureHook', - 'LrUpdaterHook', - 'OptimizerHook', - 'Fp16OptimizerHook', - 'IterTimerHook', - 'DistSamplerSeedHook', - 'EmptyCacheHook', - 'LoggerHook', - 'MlflowLoggerHook', - 'PaviLoggerHook', - 'TextLoggerHook', - 'TensorboardLoggerHook', - 'NeptuneLoggerHook', - 'WandbLoggerHook', - 'DvcliveLoggerHook', - 'MomentumUpdaterHook', - 'SyncBuffersHook', - 'EMAHook', - 'EvalHook', - 'DistEvalHook', - 'ProfilerHook', - 'GradientCumulativeOptimizerHook', - 'GradientCumulativeFp16OptimizerHook', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py deleted file mode 100644 index 24381d6876ef..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import warnings - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient -from ..dist_utils import allreduce_params, master_only -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class CheckpointHook(Hook): - """Save checkpoints periodically. - - Args: - interval (int): The saving period. If ``by_epoch=True``, interval - indicates epochs, otherwise it indicates iterations. - Default: -1, which means "never". - by_epoch (bool): Saving checkpoints by epoch or by iteration. - Default: True. - save_optimizer (bool): Whether to save optimizer state_dict in the - checkpoint. It is usually used for resuming experiments. - Default: True. - out_dir (str, optional): The root directory to save checkpoints. If not - specified, ``runner.work_dir`` will be used by default. If - specified, the ``out_dir`` will be the concatenation of ``out_dir`` - and the last level directory of ``runner.work_dir``. - `Changed in version 1.3.16.` - max_keep_ckpts (int, optional): The maximum checkpoints to keep. - In some cases we want only the latest few checkpoints and would - like to delete old ones to save the disk space. - Default: -1, which means unlimited. - save_last (bool, optional): Whether to force the last checkpoint to be - saved regardless of interval. Default: True. - sync_buffer (bool, optional): Whether to synchronize buffers in - different gpus. Default: False. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - `New in version 1.3.16.` - - .. warning:: - Before v1.3.16, the ``out_dir`` argument indicates the path where the - checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the - root directory and the final path to save checkpoint is the - concatenation of ``out_dir`` and the last level directory of - ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" - and the value of ``runner.work_dir`` is "/path/of/B", then the final - path will be "/path/of/A/B". - """ - - def __init__( - self, - interval=-1, - by_epoch=True, - save_optimizer=True, - out_dir=None, - max_keep_ckpts=-1, - save_last=True, - sync_buffer=False, - file_client_args=None, - **kwargs, - ): - self.interval = interval - self.by_epoch = by_epoch - self.save_optimizer = save_optimizer - self.out_dir = out_dir - self.max_keep_ckpts = max_keep_ckpts - self.save_last = save_last - self.args = kwargs - self.sync_buffer = sync_buffer - self.file_client_args = file_client_args - - def before_run(self, runner): - if not self.out_dir: - self.out_dir = runner.work_dir - - self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) - - # if `self.out_dir` is not equal to `runner.work_dir`, it means that - # `self.out_dir` is set so the final `self.out_dir` is the - # concatenation of `self.out_dir` and the last level directory of - # `runner.work_dir` - if self.out_dir != runner.work_dir: - basename = osp.basename(runner.work_dir.rstrip(osp.sep)) - self.out_dir = self.file_client.join_path(self.out_dir, basename) - - runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' f'{self.file_client.name}.')) - - # disable the create_symlink option because some file backends do not - # allow to create a symlink - if 'create_symlink' in self.args: - if self.args['create_symlink'] and not self.file_client.allow_symlink: - self.args['create_symlink'] = False - warnings.warn( - ( - 'create_symlink is set as True by the user but is changed' - 'to be False because creating symbolic link is not ' - f'allowed in {self.file_client.name}' - ) - ) - else: - self.args['create_symlink'] = self.file_client.allow_symlink - - def after_train_epoch(self, runner): - if not self.by_epoch: - return - - # save checkpoint for following cases: - # 1. every ``self.interval`` epochs - # 2. reach the last epoch of training - if self.every_n_epochs(runner, self.interval) or (self.save_last and self.is_last_epoch(runner)): - runner.logger.info(f'Saving checkpoint at {runner.epoch + 1} epochs') - if self.sync_buffer: - allreduce_params(runner.model.buffers()) - self._save_checkpoint(runner) - - @master_only - def _save_checkpoint(self, runner): - """Save the current checkpoint and delete unwanted checkpoint.""" - runner.save_checkpoint(self.out_dir, save_optimizer=self.save_optimizer, **self.args) - if runner.meta is not None: - if self.by_epoch: - cur_ckpt_filename = self.args.get('filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) - else: - cur_ckpt_filename = self.args.get('filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) - runner.meta.setdefault('hook_msgs', dict()) - runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path(self.out_dir, cur_ckpt_filename) - # remove other checkpoints - if self.max_keep_ckpts > 0: - if self.by_epoch: - name = 'epoch_{}.pth' - current_ckpt = runner.epoch + 1 - else: - name = 'iter_{}.pth' - current_ckpt = runner.iter + 1 - redundant_ckpts = range(current_ckpt - self.max_keep_ckpts * self.interval, 0, -self.interval) - filename_tmpl = self.args.get('filename_tmpl', name) - for _step in redundant_ckpts: - ckpt_path = self.file_client.join_path(self.out_dir, filename_tmpl.format(_step)) - if self.file_client.isfile(ckpt_path): - self.file_client.remove(ckpt_path) - else: - break - - def after_train_iter(self, runner): - if self.by_epoch: - return - - # save checkpoint for following cases: - # 1. every ``self.interval`` iterations - # 2. reach the last iteration of training - if self.every_n_iters(runner, self.interval) or (self.save_last and self.is_last_iter(runner)): - runner.logger.info(f'Saving checkpoint at {runner.iter + 1} iterations') - if self.sync_buffer: - allreduce_params(runner.model.buffers()) - self._save_checkpoint(runner) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py deleted file mode 100644 index 0781664b46a3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class ClosureHook(Hook): - def __init__(self, fn_name, fn): - assert hasattr(self, fn_name) - assert callable(fn) - setattr(self, fn_name, fn) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py deleted file mode 100644 index 8114b106bf3c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ...parallel import is_module_wrapper -from ..hooks.hook import HOOKS, Hook - - -@HOOKS.register_module() -class EMAHook(Hook): - r"""Exponential Moving Average Hook. - - Use Exponential Moving Average on all parameters of model in training - process. All parameters have a ema backup, which update by the formula - as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. - - .. math:: - - \text{Xema\_{t+1}} = (1 - \text{momentum}) \times - \text{Xema\_{t}} + \text{momentum} \times X_t - - Args: - momentum (float): The momentum used for updating ema parameter. - Defaults to 0.0002. - interval (int): Update ema parameter every interval iteration. - Defaults to 1. - warm_up (int): During first warm_up steps, we may use smaller momentum - to update ema parameters more slowly. Defaults to 100. - resume_from (str): The checkpoint path. Defaults to None. - """ - - def __init__(self, momentum=0.0002, interval=1, warm_up=100, resume_from=None): - assert isinstance(interval, int) and interval > 0 - self.warm_up = warm_up - self.interval = interval - assert momentum > 0 and momentum < 1 - self.momentum = momentum ** interval - self.checkpoint = resume_from - - def before_run(self, runner): - """To resume model with it's ema parameters more friendly. - - Register ema parameter as ``named_buffer`` to model - """ - model = runner.model - if is_module_wrapper(model): - model = model.module - self.param_ema_buffer = {} - self.model_parameters = dict(model.named_parameters(recurse=True)) - for name, value in self.model_parameters.items(): - # "." is not allowed in module's buffer name - buffer_name = f"ema_{name.replace('.', '_')}" - self.param_ema_buffer[name] = buffer_name - model.register_buffer(buffer_name, value.data.clone()) - self.model_buffers = dict(model.named_buffers(recurse=True)) - if self.checkpoint is not None: - runner.resume(self.checkpoint) - - def after_train_iter(self, runner): - """Update ema parameter every self.interval iterations.""" - curr_step = runner.iter - # We warm up the momentum considering the instability at beginning - momentum = min(self.momentum, (1 + curr_step) / (self.warm_up + curr_step)) - if curr_step % self.interval != 0: - return - for name, parameter in self.model_parameters.items(): - buffer_name = self.param_ema_buffer[name] - buffer_parameter = self.model_buffers[buffer_name] - buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) - - def after_train_epoch(self, runner): - """We load parameter values from ema backup to model before the - EvalHook.""" - self._swap_ema_parameters() - - def before_train_epoch(self, runner): - """We recover model's parameter from ema backup after last epoch's - EvalHook.""" - self._swap_ema_parameters() - - def _swap_ema_parameters(self): - """Swap the parameter of model with parameter in ema_buffer.""" - for name, value in self.model_parameters.items(): - temp = value.data.clone() - ema_buffer = self.model_buffers[self.param_ema_buffer[name]] - value.data.copy_(ema_buffer.data) - ema_buffer.data.copy_(temp) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py deleted file mode 100644 index 1431bb39a665..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py +++ /dev/null @@ -1,493 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import warnings -from math import inf - -import torch.distributed as dist -from torch.nn.modules.batchnorm import _BatchNorm -from torch.utils.data import DataLoader - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_seq_of -from .hook import Hook -from .logger import LoggerHook - - -class EvalHook(Hook): - """Non-Distributed evaluation hook. - - This hook will regularly perform evaluation in a given interval when - performing in non-distributed environment. - - Args: - dataloader (DataLoader): A PyTorch dataloader, whose dataset has - implemented ``evaluate`` function. - start (int | None, optional): Evaluation starting epoch. It enables - evaluation before the training starts if ``start`` <= the resuming - epoch. If None, whether to evaluate is merely decided by - ``interval``. Default: None. - interval (int): Evaluation interval. Default: 1. - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - Default: True. - save_best (str, optional): If a metric is specified, it would measure - the best checkpoint during evaluation. The information about best - checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep - best score value and best checkpoint path, which will be also - loaded when resume checkpoint. Options are the evaluation metrics - on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox - detection and instance segmentation. ``AR@100`` for proposal - recall. If ``save_best`` is ``auto``, the first key of the returned - ``OrderedDict`` result will be used. Default: None. - rule (str | None, optional): Comparison rule for best score. If set to - None, it will infer a reasonable rule. Keys such as 'acc', 'top' - .etc will be inferred by 'greater' rule. Keys contain 'loss' will - be inferred by 'less' rule. Options are 'greater', 'less', None. - Default: None. - test_fn (callable, optional): test a model with samples from a - dataloader, and return the test results. If ``None``, the default - test function ``mmcv.engine.single_gpu_test`` will be used. - (default: ``None``) - greater_keys (List[str] | None, optional): Metric keys that will be - inferred by 'greater' comparison rule. If ``None``, - _default_greater_keys will be used. (default: ``None``) - less_keys (List[str] | None, optional): Metric keys that will be - inferred by 'less' comparison rule. If ``None``, _default_less_keys - will be used. (default: ``None``) - out_dir (str, optional): The root directory to save checkpoints. If not - specified, `runner.work_dir` will be used by default. If specified, - the `out_dir` will be the concatenation of `out_dir` and the last - level directory of `runner.work_dir`. - `New in version 1.3.16.` - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. Default: None. - `New in version 1.3.16.` - **eval_kwargs: Evaluation arguments fed into the evaluate function of - the dataset. - - Notes: - If new arguments are added for EvalHook, tools/test.py, - tools/eval_metric.py may be affected. - """ - - # Since the key for determine greater or less is related to the downstream - # tasks, downstream repos may need to overwrite the following inner - # variable accordingly. - - rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} - init_value_map = {'greater': -inf, 'less': inf} - _default_greater_keys = ['acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', 'mAcc', 'aAcc'] - _default_less_keys = ['loss'] - - def __init__( - self, - dataloader, - start=None, - interval=1, - by_epoch=True, - save_best=None, - rule=None, - test_fn=None, - greater_keys=None, - less_keys=None, - out_dir=None, - file_client_args=None, - **eval_kwargs, - ): - if not isinstance(dataloader, DataLoader): - raise TypeError(f'dataloader must be a pytorch DataLoader, ' f'but got {type(dataloader)}') - - if interval <= 0: - raise ValueError(f'interval must be a positive number, ' f'but got {interval}') - - assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' - - if start is not None and start < 0: - raise ValueError(f'The evaluation start epoch {start} is smaller ' f'than 0') - - self.dataloader = dataloader - self.interval = interval - self.start = start - self.by_epoch = by_epoch - - assert isinstance(save_best, str) or save_best is None, ( - '""save_best"" should be a str or None ' f'rather than {type(save_best)}' - ) - self.save_best = save_best - self.eval_kwargs = eval_kwargs - self.initial_flag = True - - if test_fn is None: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import single_gpu_test - - self.test_fn = single_gpu_test - else: - self.test_fn = test_fn - - if greater_keys is None: - self.greater_keys = self._default_greater_keys - else: - if not isinstance(greater_keys, (list, tuple)): - greater_keys = (greater_keys,) - assert is_seq_of(greater_keys, str) - self.greater_keys = greater_keys - - if less_keys is None: - self.less_keys = self._default_less_keys - else: - if not isinstance(less_keys, (list, tuple)): - less_keys = (less_keys,) - assert is_seq_of(less_keys, str) - self.less_keys = less_keys - - if self.save_best is not None: - self.best_ckpt_path = None - self._init_rule(rule, self.save_best) - - self.out_dir = out_dir - self.file_client_args = file_client_args - - def _init_rule(self, rule, key_indicator): - """Initialize rule, key_indicator, comparison_func, and best score. - - Here is the rule to determine which rule is used for key indicator - when the rule is not specific (note that the key indicator matching - is case-insensitive): - 1. If the key indicator is in ``self.greater_keys``, the rule will be - specified as 'greater'. - 2. Or if the key indicator is in ``self.less_keys``, the rule will be - specified as 'less'. - 3. Or if the key indicator is equal to the substring in any one item - in ``self.greater_keys``, the rule will be specified as 'greater'. - 4. Or if the key indicator is equal to the substring in any one item - in ``self.less_keys``, the rule will be specified as 'less'. - - Args: - rule (str | None): Comparison rule for best score. - key_indicator (str | None): Key indicator to determine the - comparison rule. - """ - if rule not in self.rule_map and rule is not None: - raise KeyError(f'rule must be greater, less or None, ' f'but got {rule}.') - - if rule is None: - if key_indicator != 'auto': - # `_lc` here means we use the lower case of keys for - # case-insensitive matching - key_indicator_lc = key_indicator.lower() - greater_keys = [key.lower() for key in self.greater_keys] - less_keys = [key.lower() for key in self.less_keys] - - if key_indicator_lc in greater_keys: - rule = 'greater' - elif key_indicator_lc in less_keys: - rule = 'less' - elif any(key in key_indicator_lc for key in greater_keys): - rule = 'greater' - elif any(key in key_indicator_lc for key in less_keys): - rule = 'less' - else: - raise ValueError( - f'Cannot infer the rule for key ' - f'{key_indicator}, thus a specific rule ' - f'must be specified.' - ) - self.rule = rule - self.key_indicator = key_indicator - if self.rule is not None: - self.compare_func = self.rule_map[self.rule] - - def before_run(self, runner): - if not self.out_dir: - self.out_dir = runner.work_dir - - self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) - - # if `self.out_dir` is not equal to `runner.work_dir`, it means that - # `self.out_dir` is set so the final `self.out_dir` is the - # concatenation of `self.out_dir` and the last level directory of - # `runner.work_dir` - if self.out_dir != runner.work_dir: - basename = osp.basename(runner.work_dir.rstrip(osp.sep)) - self.out_dir = self.file_client.join_path(self.out_dir, basename) - runner.logger.info((f'The best checkpoint will be saved to {self.out_dir} by ' f'{self.file_client.name}')) - - if self.save_best is not None: - if runner.meta is None: - warnings.warn('runner.meta is None. Creating an empty one.') - runner.meta = dict() - runner.meta.setdefault('hook_msgs', dict()) - self.best_ckpt_path = runner.meta['hook_msgs'].get('best_ckpt', None) - - def before_train_iter(self, runner): - """Evaluate the model only at the start of training by iteration.""" - if self.by_epoch or not self.initial_flag: - return - if self.start is not None and runner.iter >= self.start: - self.after_train_iter(runner) - self.initial_flag = False - - def before_train_epoch(self, runner): - """Evaluate the model only at the start of training by epoch.""" - if not (self.by_epoch and self.initial_flag): - return - if self.start is not None and runner.epoch >= self.start: - self.after_train_epoch(runner) - self.initial_flag = False - - def after_train_iter(self, runner): - """Called after every training iter to evaluate the results.""" - if not self.by_epoch and self._should_evaluate(runner): - # Because the priority of EvalHook is higher than LoggerHook, the - # training log and the evaluating log are mixed. Therefore, - # we need to dump the training log and clear it before evaluating - # log is generated. In addition, this problem will only appear in - # `IterBasedRunner` whose `self.by_epoch` is False, because - # `EpochBasedRunner` whose `self.by_epoch` is True calls - # `_do_evaluate` in `after_train_epoch` stage, and at this stage - # the training log has been printed, so it will not cause any - # problem. more details at - # https://github.com/open-mmlab/mmsegmentation/issues/694 - for hook in runner._hooks: - if isinstance(hook, LoggerHook): - hook.after_train_iter(runner) - runner.log_buffer.clear() - - self._do_evaluate(runner) - - def after_train_epoch(self, runner): - """Called after every training epoch to evaluate the results.""" - if self.by_epoch and self._should_evaluate(runner): - self._do_evaluate(runner) - - def _do_evaluate(self, runner): - """perform evaluation and save ckpt.""" - results = self.test_fn(runner.model, self.dataloader) - runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) - key_score = self.evaluate(runner, results) - # the key_score may be `None` so it needs to skip the action to save - # the best checkpoint - if self.save_best and key_score: - self._save_ckpt(runner, key_score) - - def _should_evaluate(self, runner): - """Judge whether to perform evaluation. - - Here is the rule to judge whether to perform evaluation: - 1. It will not perform evaluation during the epoch/iteration interval, - which is determined by ``self.interval``. - 2. It will not perform evaluation if the start time is larger than - current time. - 3. It will not perform evaluation when current time is larger than - the start time but during epoch/iteration interval. - - Returns: - bool: The flag indicating whether to perform evaluation. - """ - if self.by_epoch: - current = runner.epoch - check_time = self.every_n_epochs - else: - current = runner.iter - check_time = self.every_n_iters - - if self.start is None: - if not check_time(runner, self.interval): - # No evaluation during the interval. - return False - elif (current + 1) < self.start: - # No evaluation if start is larger than the current time. - return False - else: - # Evaluation only at epochs/iters 3, 5, 7... - # if start==3 and interval==2 - if (current + 1 - self.start) % self.interval: - return False - return True - - def _save_ckpt(self, runner, key_score): - """Save the best checkpoint. - - It will compare the score according to the compare function, write - related information (best score, best checkpoint path) and save the - best checkpoint into ``work_dir``. - """ - if self.by_epoch: - current = f'epoch_{runner.epoch + 1}' - cur_type, cur_time = 'epoch', runner.epoch + 1 - else: - current = f'iter_{runner.iter + 1}' - cur_type, cur_time = 'iter', runner.iter + 1 - - best_score = runner.meta['hook_msgs'].get('best_score', self.init_value_map[self.rule]) - if self.compare_func(key_score, best_score): - best_score = key_score - runner.meta['hook_msgs']['best_score'] = best_score - - if self.best_ckpt_path and self.file_client.isfile(self.best_ckpt_path): - self.file_client.remove(self.best_ckpt_path) - runner.logger.info((f'The previous best checkpoint {self.best_ckpt_path} was ' 'removed')) - - best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' - self.best_ckpt_path = self.file_client.join_path(self.out_dir, best_ckpt_name) - runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path - - runner.save_checkpoint(self.out_dir, best_ckpt_name, create_symlink=False) - runner.logger.info(f'Now best checkpoint is saved as {best_ckpt_name}.') - runner.logger.info(f'Best {self.key_indicator} is {best_score:0.4f} ' f'at {cur_time} {cur_type}.') - - def evaluate(self, runner, results): - """Evaluate the results. - - Args: - runner (:obj:`mmcv.Runner`): The underlined training runner. - results (list): Output results. - """ - eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs) - - for name, val in eval_res.items(): - runner.log_buffer.output[name] = val - runner.log_buffer.ready = True - - if self.save_best is not None: - # If the performance of model is pool, the `eval_res` may be an - # empty dict and it will raise exception when `self.save_best` is - # not None. More details at - # https://github.com/open-mmlab/mmdetection/issues/6265. - if not eval_res: - warnings.warn( - 'Since `eval_res` is an empty dict, the behavior to save ' - 'the best checkpoint will be skipped in this evaluation.' - ) - return None - - if self.key_indicator == 'auto': - # infer from eval_results - self._init_rule(self.rule, list(eval_res.keys())[0]) - return eval_res[self.key_indicator] - - return None - - -class DistEvalHook(EvalHook): - """Distributed evaluation hook. - - This hook will regularly perform evaluation in a given interval when - performing in distributed environment. - - Args: - dataloader (DataLoader): A PyTorch dataloader, whose dataset has - implemented ``evaluate`` function. - start (int | None, optional): Evaluation starting epoch. It enables - evaluation before the training starts if ``start`` <= the resuming - epoch. If None, whether to evaluate is merely decided by - ``interval``. Default: None. - interval (int): Evaluation interval. Default: 1. - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - default: True. - save_best (str, optional): If a metric is specified, it would measure - the best checkpoint during evaluation. The information about best - checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep - best score value and best checkpoint path, which will be also - loaded when resume checkpoint. Options are the evaluation metrics - on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox - detection and instance segmentation. ``AR@100`` for proposal - recall. If ``save_best`` is ``auto``, the first key of the returned - ``OrderedDict`` result will be used. Default: None. - rule (str | None, optional): Comparison rule for best score. If set to - None, it will infer a reasonable rule. Keys such as 'acc', 'top' - .etc will be inferred by 'greater' rule. Keys contain 'loss' will - be inferred by 'less' rule. Options are 'greater', 'less', None. - Default: None. - test_fn (callable, optional): test a model with samples from a - dataloader in a multi-gpu manner, and return the test results. If - ``None``, the default test function ``mmcv.engine.multi_gpu_test`` - will be used. (default: ``None``) - tmpdir (str | None): Temporary directory to save the results of all - processes. Default: None. - gpu_collect (bool): Whether to use gpu or cpu to collect results. - Default: False. - broadcast_bn_buffer (bool): Whether to broadcast the - buffer(running_mean and running_var) of rank 0 to other rank - before evaluation. Default: True. - out_dir (str, optional): The root directory to save checkpoints. If not - specified, `runner.work_dir` will be used by default. If specified, - the `out_dir` will be the concatenation of `out_dir` and the last - level directory of `runner.work_dir`. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. Default: None. - **eval_kwargs: Evaluation arguments fed into the evaluate function of - the dataset. - """ - - def __init__( - self, - dataloader, - start=None, - interval=1, - by_epoch=True, - save_best=None, - rule=None, - test_fn=None, - greater_keys=None, - less_keys=None, - broadcast_bn_buffer=True, - tmpdir=None, - gpu_collect=False, - out_dir=None, - file_client_args=None, - **eval_kwargs, - ): - - if test_fn is None: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import multi_gpu_test - - test_fn = multi_gpu_test - - super().__init__( - dataloader, - start=start, - interval=interval, - by_epoch=by_epoch, - save_best=save_best, - rule=rule, - test_fn=test_fn, - greater_keys=greater_keys, - less_keys=less_keys, - out_dir=out_dir, - file_client_args=file_client_args, - **eval_kwargs, - ) - - self.broadcast_bn_buffer = broadcast_bn_buffer - self.tmpdir = tmpdir - self.gpu_collect = gpu_collect - - def _do_evaluate(self, runner): - """perform evaluation and save ckpt.""" - # Synchronization of BatchNorm's buffer (running_mean - # and running_var) is not supported in the DDP of pytorch, - # which may cause the inconsistent performance of models in - # different ranks, so we broadcast BatchNorm's buffers - # of rank 0 to other ranks to avoid this. - if self.broadcast_bn_buffer: - model = runner.model - for name, module in model.named_modules(): - if isinstance(module, _BatchNorm) and module.track_running_stats: - dist.broadcast(module.running_var, 0) - dist.broadcast(module.running_mean, 0) - - tmpdir = self.tmpdir - if tmpdir is None: - tmpdir = osp.join(runner.work_dir, '.eval_hook') - - results = self.test_fn(runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) - if runner.rank == 0: - print('\n') - runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) - key_score = self.evaluate(runner, results) - # the key_score may be `None` so it needs to skip the action to - # save the best checkpoint - if self.save_best and key_score: - self._save_ckpt(runner, key_score) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py deleted file mode 100644 index 730cb0f21e7c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, is_method_overridden - -HOOKS = Registry('hook') - - -class Hook: - stages = ( - 'before_run', - 'before_train_epoch', - 'before_train_iter', - 'after_train_iter', - 'after_train_epoch', - 'before_val_epoch', - 'before_val_iter', - 'after_val_iter', - 'after_val_epoch', - 'after_run', - ) - - def before_run(self, runner): - pass - - def after_run(self, runner): - pass - - def before_epoch(self, runner): - pass - - def after_epoch(self, runner): - pass - - def before_iter(self, runner): - pass - - def after_iter(self, runner): - pass - - def before_train_epoch(self, runner): - self.before_epoch(runner) - - def before_val_epoch(self, runner): - self.before_epoch(runner) - - def after_train_epoch(self, runner): - self.after_epoch(runner) - - def after_val_epoch(self, runner): - self.after_epoch(runner) - - def before_train_iter(self, runner): - self.before_iter(runner) - - def before_val_iter(self, runner): - self.before_iter(runner) - - def after_train_iter(self, runner): - self.after_iter(runner) - - def after_val_iter(self, runner): - self.after_iter(runner) - - def every_n_epochs(self, runner, n): - return (runner.epoch + 1) % n == 0 if n > 0 else False - - def every_n_inner_iters(self, runner, n): - return (runner.inner_iter + 1) % n == 0 if n > 0 else False - - def every_n_iters(self, runner, n): - return (runner.iter + 1) % n == 0 if n > 0 else False - - def end_of_epoch(self, runner): - return runner.inner_iter + 1 == len(runner.data_loader) - - def is_last_epoch(self, runner): - return runner.epoch + 1 == runner._max_epochs - - def is_last_iter(self, runner): - return runner.iter + 1 == runner._max_iters - - def get_triggered_stages(self): - trigger_stages = set() - for stage in Hook.stages: - if is_method_overridden(stage, Hook, self): - trigger_stages.add(stage) - - # some methods will be triggered in multi stages - # use this dict to map method to stages. - method_stages_map = { - 'before_epoch': ['before_train_epoch', 'before_val_epoch'], - 'after_epoch': ['after_train_epoch', 'after_val_epoch'], - 'before_iter': ['before_train_iter', 'before_val_iter'], - 'after_iter': ['after_train_iter', 'after_val_iter'], - } - - for method, map_stages in method_stages_map.items(): - if is_method_overridden(method, Hook, self): - trigger_stages.update(map_stages) - - return [stage for stage in Hook.stages if stage in trigger_stages] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py deleted file mode 100644 index 734404f95c9f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import time - -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class IterTimerHook(Hook): - def before_epoch(self, runner): - self.t = time.time() - - def before_iter(self, runner): - runner.log_buffer.update({'data_time': time.time() - self.t}) - - def after_iter(self, runner): - runner.log_buffer.update({'time': time.time() - self.t}) - self.t = time.time() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py deleted file mode 100644 index 17da656e176e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .base import LoggerHook -from .dvclive import DvcliveLoggerHook -from .mlflow import MlflowLoggerHook -from .neptune import NeptuneLoggerHook -from .pavi import PaviLoggerHook -from .tensorboard import TensorboardLoggerHook -from .text import TextLoggerHook -from .wandb import WandbLoggerHook - -__all__ = [ - 'LoggerHook', - 'MlflowLoggerHook', - 'PaviLoggerHook', - 'TensorboardLoggerHook', - 'TextLoggerHook', - 'WandbLoggerHook', - 'NeptuneLoggerHook', - 'DvcliveLoggerHook', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py deleted file mode 100644 index cb873734d28e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numbers -from abc import ABCMeta, abstractmethod - -import numpy as np -import torch - -from ..hook import Hook - - -class LoggerHook(Hook): - """Base class for logger hooks. - - Args: - interval (int): Logging interval (every k iterations). - ignore_last (bool): Ignore the log of last iterations in each epoch - if less than `interval`. - reset_flag (bool): Whether to clear the output buffer after logging. - by_epoch (bool): Whether EpochBasedRunner is used. - """ - - __metaclass__ = ABCMeta - - def __init__(self, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): - self.interval = interval - self.ignore_last = ignore_last - self.reset_flag = reset_flag - self.by_epoch = by_epoch - - @abstractmethod - def log(self, runner): - pass - - @staticmethod - def is_scalar(val, include_np=True, include_torch=True): - """Tell the input variable is a scalar or not. - - Args: - val: Input variable. - include_np (bool): Whether include 0-d np.ndarray as a scalar. - include_torch (bool): Whether include 0-d torch.Tensor as a scalar. - - Returns: - bool: True or False. - """ - if isinstance(val, numbers.Number): - return True - elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: - return True - elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: - return True - else: - return False - - def get_mode(self, runner): - if runner.mode == 'train': - if 'time' in runner.log_buffer.output: - mode = 'train' - else: - mode = 'val' - elif runner.mode == 'val': - mode = 'val' - else: - raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') - return mode - - def get_epoch(self, runner): - if runner.mode == 'train': - epoch = runner.epoch + 1 - elif runner.mode == 'val': - # normal val mode - # runner.epoch += 1 has been done before val workflow - epoch = runner.epoch - else: - raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') - return epoch - - def get_iter(self, runner, inner_iter=False): - """Get the current training iteration step.""" - if self.by_epoch and inner_iter: - current_iter = runner.inner_iter + 1 - else: - current_iter = runner.iter + 1 - return current_iter - - def get_lr_tags(self, runner): - tags = {} - lrs = runner.current_lr() - if isinstance(lrs, dict): - for name, value in lrs.items(): - tags[f'learning_rate/{name}'] = value[0] - else: - tags['learning_rate'] = lrs[0] - return tags - - def get_momentum_tags(self, runner): - tags = {} - momentums = runner.current_momentum() - if isinstance(momentums, dict): - for name, value in momentums.items(): - tags[f'momentum/{name}'] = value[0] - else: - tags['momentum'] = momentums[0] - return tags - - def get_loggable_tags( - self, runner, allow_scalar=True, allow_text=False, add_mode=True, tags_to_skip=('time', 'data_time') - ): - tags = {} - for var, val in runner.log_buffer.output.items(): - if var in tags_to_skip: - continue - if self.is_scalar(val) and not allow_scalar: - continue - if isinstance(val, str) and not allow_text: - continue - if add_mode: - var = f'{self.get_mode(runner)}/{var}' - tags[var] = val - tags.update(self.get_lr_tags(runner)) - tags.update(self.get_momentum_tags(runner)) - return tags - - def before_run(self, runner): - for hook in runner.hooks[::-1]: - if isinstance(hook, LoggerHook): - hook.reset_flag = True - break - - def before_epoch(self, runner): - runner.log_buffer.clear() # clear logs of last epoch - - def after_train_iter(self, runner): - if self.by_epoch and self.every_n_inner_iters(runner, self.interval): - runner.log_buffer.average(self.interval) - elif not self.by_epoch and self.every_n_iters(runner, self.interval): - runner.log_buffer.average(self.interval) - elif self.end_of_epoch(runner) and not self.ignore_last: - # not precise but more stable - runner.log_buffer.average(self.interval) - - if runner.log_buffer.ready: - self.log(runner) - if self.reset_flag: - runner.log_buffer.clear_output() - - def after_train_epoch(self, runner): - if runner.log_buffer.ready: - self.log(runner) - if self.reset_flag: - runner.log_buffer.clear_output() - - def after_val_epoch(self, runner): - runner.log_buffer.average() - self.log(runner) - if self.reset_flag: - runner.log_buffer.clear_output() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py deleted file mode 100644 index d92f1696909d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class DvcliveLoggerHook(LoggerHook): - """Class to log metrics with dvclive. - - It requires `dvclive`_ to be installed. - - Args: - path (str): Directory where dvclive will write TSV log files. - interval (int): Logging interval (every k iterations). - Default 10. - ignore_last (bool): Ignore the log of last iterations in each epoch - if less than `interval`. - Default: True. - reset_flag (bool): Whether to clear the output buffer after logging. - Default: True. - by_epoch (bool): Whether EpochBasedRunner is used. - Default: True. - - .. _dvclive: - https://dvc.org/doc/dvclive - """ - - def __init__(self, path, interval=10, ignore_last=True, reset_flag=True, by_epoch=True): - - super(DvcliveLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.path = path - self.import_dvclive() - - def import_dvclive(self): - try: - import dvclive - except ImportError: - raise ImportError('Please run "pip install dvclive" to install dvclive') - self.dvclive = dvclive - - @master_only - def before_run(self, runner): - self.dvclive.init(self.path) - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner) - if tags: - for k, v in tags.items(): - self.dvclive.log(k, v, step=self.get_iter(runner)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py deleted file mode 100644 index 3392baa8f43d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class MlflowLoggerHook(LoggerHook): - def __init__( - self, exp_name=None, tags=None, log_model=True, interval=10, ignore_last=True, reset_flag=False, by_epoch=True - ): - """Class to log metrics and (optionally) a trained model to MLflow. - - It requires `MLflow`_ to be installed. - - Args: - exp_name (str, optional): Name of the experiment to be used. - Default None. - If not None, set the active experiment. - If experiment does not exist, an experiment with provided name - will be created. - tags (dict of str: str, optional): Tags for the current run. - Default None. - If not None, set tags for the current run. - log_model (bool, optional): Whether to log an MLflow artifact. - Default True. - If True, log runner.model as an MLflow artifact - for the current run. - interval (int): Logging interval (every k iterations). - ignore_last (bool): Ignore the log of last iterations in each epoch - if less than `interval`. - reset_flag (bool): Whether to clear the output buffer after logging - by_epoch (bool): Whether EpochBasedRunner is used. - - .. _MLflow: - https://www.mlflow.org/docs/latest/index.html - """ - super(MlflowLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.import_mlflow() - self.exp_name = exp_name - self.tags = tags - self.log_model = log_model - - def import_mlflow(self): - try: - import mlflow - import mlflow.pytorch as mlflow_pytorch - except ImportError: - raise ImportError('Please run "pip install mlflow" to install mlflow') - self.mlflow = mlflow - self.mlflow_pytorch = mlflow_pytorch - - @master_only - def before_run(self, runner): - super(MlflowLoggerHook, self).before_run(runner) - if self.exp_name is not None: - self.mlflow.set_experiment(self.exp_name) - if self.tags is not None: - self.mlflow.set_tags(self.tags) - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner) - if tags: - self.mlflow.log_metrics(tags, step=self.get_iter(runner)) - - @master_only - def after_run(self, runner): - if self.log_model: - self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py deleted file mode 100644 index 25e6f1d85ebb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class NeptuneLoggerHook(LoggerHook): - """Class to log metrics to NeptuneAI. - - It requires `neptune-client` to be installed. - - Args: - init_kwargs (dict): a dict contains the initialization keys as below: - - project (str): Name of a project in a form of - namespace/project_name. If None, the value of - NEPTUNE_PROJECT environment variable will be taken. - - api_token (str): User’s API token. - If None, the value of NEPTUNE_API_TOKEN environment - variable will be taken. Note: It is strongly recommended - to use NEPTUNE_API_TOKEN environment variable rather than - placing your API token in plain text in your source code. - - name (str, optional, default is 'Untitled'): Editable name of - the run. Name is displayed in the run's Details and in - Runs table as a column. - Check https://docs.neptune.ai/api-reference/neptune#init for - more init arguments. - interval (int): Logging interval (every k iterations). - ignore_last (bool): Ignore the log of last iterations in each epoch - if less than `interval`. - reset_flag (bool): Whether to clear the output buffer after logging - by_epoch (bool): Whether EpochBasedRunner is used. - - .. _NeptuneAI: - https://docs.neptune.ai/you-should-know/logging-metadata - """ - - def __init__( - self, init_kwargs=None, interval=10, ignore_last=True, reset_flag=True, with_step=True, by_epoch=True - ): - - super(NeptuneLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.import_neptune() - self.init_kwargs = init_kwargs - self.with_step = with_step - - def import_neptune(self): - try: - import neptune.new as neptune - except ImportError: - raise ImportError('Please run "pip install neptune-client" to install neptune') - self.neptune = neptune - self.run = None - - @master_only - def before_run(self, runner): - if self.init_kwargs: - self.run = self.neptune.init(**self.init_kwargs) - else: - self.run = self.neptune.init() - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner) - if tags: - for tag_name, tag_value in tags.items(): - if self.with_step: - self.run[tag_name].log(tag_value, step=self.get_iter(runner)) - else: - tags['global_step'] = self.get_iter(runner) - self.run[tag_name].log(tags) - - @master_only - def after_run(self, runner): - self.run.stop() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py deleted file mode 100644 index 1f79cb0f305e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import json -import os -import os.path as osp - -import torch -import yaml - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from ....parallel.utils import is_module_wrapper -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class PaviLoggerHook(LoggerHook): - def __init__( - self, - init_kwargs=None, - add_graph=False, - add_last_ckpt=False, - interval=10, - ignore_last=True, - reset_flag=False, - by_epoch=True, - img_key='img_info', - ): - super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.init_kwargs = init_kwargs - self.add_graph = add_graph - self.add_last_ckpt = add_last_ckpt - self.img_key = img_key - - @master_only - def before_run(self, runner): - super(PaviLoggerHook, self).before_run(runner) - try: - from pavi import SummaryWriter - except ImportError: - raise ImportError('Please run "pip install pavi" to install pavi.') - - self.run_name = runner.work_dir.split('/')[-1] - - if not self.init_kwargs: - self.init_kwargs = dict() - self.init_kwargs['name'] = self.run_name - self.init_kwargs['model'] = runner._model_name - if runner.meta is not None: - if 'config_dict' in runner.meta: - config_dict = runner.meta['config_dict'] - assert isinstance(config_dict, dict), ( - 'meta["config_dict"] has to be of a dict, ' f'but got {type(config_dict)}' - ) - elif 'config_file' in runner.meta: - config_file = runner.meta['config_file'] - config_dict = dict(mmcv.Config.fromfile(config_file)) - else: - config_dict = None - if config_dict is not None: - # 'max_.*iter' is parsed in pavi sdk as the maximum iterations - # to properly set up the progress bar. - config_dict = config_dict.copy() - config_dict.setdefault('max_iter', runner.max_iters) - # non-serializable values are first converted in - # mmcv.dump to json - config_dict = json.loads(mmcv.dump(config_dict, file_format='json')) - session_text = yaml.dump(config_dict) - self.init_kwargs['session_text'] = session_text - self.writer = SummaryWriter(**self.init_kwargs) - - def get_step(self, runner): - """Get the total training step/epoch.""" - if self.get_mode(runner) == 'val' and self.by_epoch: - return self.get_epoch(runner) - else: - return self.get_iter(runner) - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner, add_mode=False) - if tags: - self.writer.add_scalars(self.get_mode(runner), tags, self.get_step(runner)) - - @master_only - def after_run(self, runner): - if self.add_last_ckpt: - ckpt_path = osp.join(runner.work_dir, 'latest.pth') - if osp.islink(ckpt_path): - ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) - - if osp.isfile(ckpt_path): - # runner.epoch += 1 has been done before `after_run`. - iteration = runner.epoch if self.by_epoch else runner.iter - return self.writer.add_snapshot_file( - tag=self.run_name, snapshot_file_path=ckpt_path, iteration=iteration - ) - - # flush the buffer and send a task ending signal to Pavi - self.writer.close() - - @master_only - def before_epoch(self, runner): - if runner.epoch == 0 and self.add_graph: - if is_module_wrapper(runner.model): - _model = runner.model.module - else: - _model = runner.model - device = next(_model.parameters()).device - data = next(iter(runner.data_loader)) - image = data[self.img_key][0:1].to(device) - with torch.no_grad(): - self.writer.add_graph(_model, image) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py deleted file mode 100644 index ccd0c5b5aac0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class TensorboardLoggerHook(LoggerHook): - def __init__(self, log_dir=None, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): - super(TensorboardLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.log_dir = log_dir - - @master_only - def before_run(self, runner): - super(TensorboardLoggerHook, self).before_run(runner) - if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.1'): - try: - from tensorboardX import SummaryWriter - except ImportError: - raise ImportError('Please install tensorboardX to use ' 'TensorboardLoggerHook.') - else: - try: - from torch.utils.tensorboard import SummaryWriter - except ImportError: - raise ImportError( - 'Please run "pip install future tensorboard" to install ' - 'the dependencies to use torch.utils.tensorboard ' - '(applicable to PyTorch 1.1 or higher)' - ) - - if self.log_dir is None: - self.log_dir = osp.join(runner.work_dir, 'tf_logs') - self.writer = SummaryWriter(self.log_dir) - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner, allow_text=True) - for tag, val in tags.items(): - if isinstance(val, str): - self.writer.add_text(tag, val, self.get_iter(runner)) - else: - self.writer.add_scalar(tag, val, self.get_iter(runner)) - - @master_only - def after_run(self, runner): - self.writer.close() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py deleted file mode 100644 index da54f3d56059..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import datetime -import os -import os.path as osp -from collections import OrderedDict - -import torch -import torch.distributed as dist - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio.file_client import FileClient -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of, scandir -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class TextLoggerHook(LoggerHook): - """Logger hook in text. - - In this logger hook, the information will be printed on terminal and - saved in json file. - - Args: - by_epoch (bool, optional): Whether EpochBasedRunner is used. - Default: True. - interval (int, optional): Logging interval (every k iterations). - Default: 10. - ignore_last (bool, optional): Ignore the log of last iterations in each - epoch if less than :attr:`interval`. Default: True. - reset_flag (bool, optional): Whether to clear the output buffer after - logging. Default: False. - interval_exp_name (int, optional): Logging interval for experiment - name. This feature is to help users conveniently get the experiment - information from screen or log file. Default: 1000. - out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. - If ``out_dir`` is specified, logs will be copied to a new directory - which is the concatenation of ``out_dir`` and the last level - directory of ``runner.work_dir``. Default: None. - `New in version 1.3.16.` - out_suffix (str or tuple[str], optional): Those filenames ending with - ``out_suffix`` will be copied to ``out_dir``. - Default: ('.log.json', '.log', '.py'). - `New in version 1.3.16.` - keep_local (bool, optional): Whether to keep local log when - :attr:`out_dir` is specified. If False, the local log will be - removed. Default: True. - `New in version 1.3.16.` - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Default: None. - `New in version 1.3.16.` - """ - - def __init__( - self, - by_epoch=True, - interval=10, - ignore_last=True, - reset_flag=False, - interval_exp_name=1000, - out_dir=None, - out_suffix=('.log.json', '.log', '.py'), - keep_local=True, - file_client_args=None, - ): - super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.by_epoch = by_epoch - self.time_sec_tot = 0 - self.interval_exp_name = interval_exp_name - - if out_dir is None and file_client_args is not None: - raise ValueError('file_client_args should be "None" when `out_dir` is not' 'specified.') - self.out_dir = out_dir - - if not (out_dir is None or isinstance(out_dir, str) or is_tuple_of(out_dir, str)): - raise TypeError('out_dir should be "None" or string or tuple of ' 'string, but got {out_dir}') - self.out_suffix = out_suffix - - self.keep_local = keep_local - self.file_client_args = file_client_args - if self.out_dir is not None: - self.file_client = FileClient.infer_client(file_client_args, self.out_dir) - - def before_run(self, runner): - super(TextLoggerHook, self).before_run(runner) - - if self.out_dir is not None: - self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) - # The final `self.out_dir` is the concatenation of `self.out_dir` - # and the last level directory of `runner.work_dir` - basename = osp.basename(runner.work_dir.rstrip(osp.sep)) - self.out_dir = self.file_client.join_path(self.out_dir, basename) - runner.logger.info( - ( - f'Text logs will be saved to {self.out_dir} by ' - f'{self.file_client.name} after the training process.' - ) - ) - - self.start_iter = runner.iter - self.json_log_path = osp.join(runner.work_dir, f'{runner.timestamp}.log.json') - if runner.meta is not None: - self._dump_log(runner.meta, runner) - - def _get_max_memory(self, runner): - device = getattr(runner.model, 'output_device', None) - mem = torch.cuda.max_memory_allocated(device=device) - mem_mb = torch.tensor([mem / (1024 * 1024)], dtype=torch.int, device=device) - if runner.world_size > 1: - dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) - return mem_mb.item() - - def _log_info(self, log_dict, runner): - # print exp name for users to distinguish experiments - # at every ``interval_exp_name`` iterations and the end of each epoch - if runner.meta is not None and 'exp_name' in runner.meta: - if (self.every_n_iters(runner, self.interval_exp_name)) or (self.by_epoch and self.end_of_epoch(runner)): - exp_info = f'Exp name: {runner.meta["exp_name"]}' - runner.logger.info(exp_info) - - if log_dict['mode'] == 'train': - if isinstance(log_dict['lr'], dict): - lr_str = [] - for k, val in log_dict['lr'].items(): - lr_str.append(f'lr_{k}: {val:.3e}') - lr_str = ' '.join(lr_str) - else: - lr_str = f'lr: {log_dict["lr"]:.3e}' - - # by epoch: Epoch [4][100/1000] - # by iter: Iter [100/100000] - if self.by_epoch: - log_str = f'Epoch [{log_dict["epoch"]}]' f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' - else: - log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' - log_str += f'{lr_str}, ' - - if 'time' in log_dict.keys(): - self.time_sec_tot += log_dict['time'] * self.interval - time_sec_avg = self.time_sec_tot / (runner.iter - self.start_iter + 1) - eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) - eta_str = str(datetime.timedelta(seconds=int(eta_sec))) - log_str += f'eta: {eta_str}, ' - log_str += f'time: {log_dict["time"]:.3f}, ' f'data_time: {log_dict["data_time"]:.3f}, ' - # statistic memory - if torch.cuda.is_available(): - log_str += f'memory: {log_dict["memory"]}, ' - else: - # val/test time - # here 1000 is the length of the val dataloader - # by epoch: Epoch[val] [4][1000] - # by iter: Iter[val] [1000] - if self.by_epoch: - log_str = f'Epoch({log_dict["mode"]}) ' f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' - else: - log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' - - log_items = [] - for name, val in log_dict.items(): - # TODO: resolve this hack - # these items have been in log_str - if name in ['mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', 'memory', 'epoch']: - continue - if isinstance(val, float): - val = f'{val:.4f}' - log_items.append(f'{name}: {val}') - log_str += ', '.join(log_items) - - runner.logger.info(log_str) - - def _dump_log(self, log_dict, runner): - # dump log in json format - json_log = OrderedDict() - for k, v in log_dict.items(): - json_log[k] = self._round_float(v) - # only append log at last line - if runner.rank == 0: - with open(self.json_log_path, 'a+') as f: - mmcv.dump(json_log, f, file_format='json') - f.write('\n') - - def _round_float(self, items): - if isinstance(items, list): - return [self._round_float(item) for item in items] - elif isinstance(items, float): - return round(items, 5) - else: - return items - - def log(self, runner): - if 'eval_iter_num' in runner.log_buffer.output: - # this doesn't modify runner.iter and is regardless of by_epoch - cur_iter = runner.log_buffer.output.pop('eval_iter_num') - else: - cur_iter = self.get_iter(runner, inner_iter=True) - - log_dict = OrderedDict(mode=self.get_mode(runner), epoch=self.get_epoch(runner), iter=cur_iter) - - # only record lr of the first param group - cur_lr = runner.current_lr() - if isinstance(cur_lr, list): - log_dict['lr'] = cur_lr[0] - else: - assert isinstance(cur_lr, dict) - log_dict['lr'] = {} - for k, lr_ in cur_lr.items(): - assert isinstance(lr_, list) - log_dict['lr'].update({k: lr_[0]}) - - if 'time' in runner.log_buffer.output: - # statistic memory - if torch.cuda.is_available(): - log_dict['memory'] = self._get_max_memory(runner) - - log_dict = dict(log_dict, **runner.log_buffer.output) - - self._log_info(log_dict, runner) - self._dump_log(log_dict, runner) - return log_dict - - def after_run(self, runner): - # copy or upload logs to self.out_dir - if self.out_dir is not None: - for filename in scandir(runner.work_dir, self.out_suffix, True): - local_filepath = osp.join(runner.work_dir, filename) - out_filepath = self.file_client.join_path(self.out_dir, filename) - with open(local_filepath, 'r') as f: - self.file_client.put_text(f.read(), out_filepath) - - runner.logger.info((f'The file {local_filepath} has been uploaded to ' f'{out_filepath}.')) - - if not self.keep_local: - os.remove(local_filepath) - runner.logger.info((f'{local_filepath} was removed due to the ' '`self.keep_local=False`')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py deleted file mode 100644 index c94d8391711c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ...dist_utils import master_only -from ..hook import HOOKS -from .base import LoggerHook - - -@HOOKS.register_module() -class WandbLoggerHook(LoggerHook): - def __init__( - self, - init_kwargs=None, - interval=10, - ignore_last=True, - reset_flag=False, - commit=True, - by_epoch=True, - with_step=True, - ): - super(WandbLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) - self.import_wandb() - self.init_kwargs = init_kwargs - self.commit = commit - self.with_step = with_step - - def import_wandb(self): - try: - import wandb - except ImportError: - raise ImportError('Please run "pip install wandb" to install wandb') - self.wandb = wandb - - @master_only - def before_run(self, runner): - super(WandbLoggerHook, self).before_run(runner) - if self.wandb is None: - self.import_wandb() - if self.init_kwargs: - self.wandb.init(**self.init_kwargs) - else: - self.wandb.init() - - @master_only - def log(self, runner): - tags = self.get_loggable_tags(runner) - if tags: - if self.with_step: - self.wandb.log(tags, step=self.get_iter(runner), commit=self.commit) - else: - tags['global_step'] = self.get_iter(runner) - self.wandb.log(tags, commit=self.commit) - - @master_only - def after_run(self, runner): - self.wandb.join() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py deleted file mode 100644 index 8f92871c64cd..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py +++ /dev/null @@ -1,615 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numbers -from math import cos, pi - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from .hook import HOOKS, Hook - - -class LrUpdaterHook(Hook): - """LR Scheduler in MMCV. - - Args: - by_epoch (bool): LR changes epoch by epoch - warmup (string): Type of warmup used. It can be None(use no warmup), - 'constant', 'linear' or 'exp' - warmup_iters (int): The number of iterations or epochs that warmup - lasts - warmup_ratio (float): LR used at the beginning of warmup equals to - warmup_ratio * initial_lr - warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters - means the number of epochs that warmup lasts, otherwise means the - number of iteration that warmup lasts - """ - - def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.1, warmup_by_epoch=False): - # validate the "warmup" argument - if warmup is not None: - if warmup not in ['constant', 'linear', 'exp']: - raise ValueError( - f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' - ) - if warmup is not None: - assert warmup_iters > 0, '"warmup_iters" must be a positive integer' - assert 0 < warmup_ratio <= 1.0, '"warmup_ratio" must be in range (0,1]' - - self.by_epoch = by_epoch - self.warmup = warmup - self.warmup_iters = warmup_iters - self.warmup_ratio = warmup_ratio - self.warmup_by_epoch = warmup_by_epoch - - if self.warmup_by_epoch: - self.warmup_epochs = self.warmup_iters - self.warmup_iters = None - else: - self.warmup_epochs = None - - self.base_lr = [] # initial lr for all param groups - self.regular_lr = [] # expected lr if no warming up is performed - - def _set_lr(self, runner, lr_groups): - if isinstance(runner.optimizer, dict): - for k, optim in runner.optimizer.items(): - for param_group, lr in zip(optim.param_groups, lr_groups[k]): - param_group['lr'] = lr - else: - for param_group, lr in zip(runner.optimizer.param_groups, lr_groups): - param_group['lr'] = lr - - def get_lr(self, runner, base_lr): - raise NotImplementedError - - def get_regular_lr(self, runner): - if isinstance(runner.optimizer, dict): - lr_groups = {} - for k in runner.optimizer.keys(): - _lr_group = [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr[k]] - lr_groups.update({k: _lr_group}) - - return lr_groups - else: - return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] - - def get_warmup_lr(self, cur_iters): - def _get_warmup_lr(cur_iters, regular_lr): - if self.warmup == 'constant': - warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] - elif self.warmup == 'linear': - k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) - warmup_lr = [_lr * (1 - k) for _lr in regular_lr] - elif self.warmup == 'exp': - k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) - warmup_lr = [_lr * k for _lr in regular_lr] - return warmup_lr - - if isinstance(self.regular_lr, dict): - lr_groups = {} - for key, regular_lr in self.regular_lr.items(): - lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr) - return lr_groups - else: - return _get_warmup_lr(cur_iters, self.regular_lr) - - def before_run(self, runner): - # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, - # it will be set according to the optimizer params - if isinstance(runner.optimizer, dict): - self.base_lr = {} - for k, optim in runner.optimizer.items(): - for group in optim.param_groups: - group.setdefault('initial_lr', group['lr']) - _base_lr = [group['initial_lr'] for group in optim.param_groups] - self.base_lr.update({k: _base_lr}) - else: - for group in runner.optimizer.param_groups: - group.setdefault('initial_lr', group['lr']) - self.base_lr = [group['initial_lr'] for group in runner.optimizer.param_groups] - - def before_train_epoch(self, runner): - if self.warmup_iters is None: - epoch_len = len(runner.data_loader) - self.warmup_iters = self.warmup_epochs * epoch_len - - if not self.by_epoch: - return - - self.regular_lr = self.get_regular_lr(runner) - self._set_lr(runner, self.regular_lr) - - def before_train_iter(self, runner): - cur_iter = runner.iter - if not self.by_epoch: - self.regular_lr = self.get_regular_lr(runner) - if self.warmup is None or cur_iter >= self.warmup_iters: - self._set_lr(runner, self.regular_lr) - else: - warmup_lr = self.get_warmup_lr(cur_iter) - self._set_lr(runner, warmup_lr) - elif self.by_epoch: - if self.warmup is None or cur_iter > self.warmup_iters: - return - elif cur_iter == self.warmup_iters: - self._set_lr(runner, self.regular_lr) - else: - warmup_lr = self.get_warmup_lr(cur_iter) - self._set_lr(runner, warmup_lr) - - -@HOOKS.register_module() -class FixedLrUpdaterHook(LrUpdaterHook): - def __init__(self, **kwargs): - super(FixedLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - return base_lr - - -@HOOKS.register_module() -class StepLrUpdaterHook(LrUpdaterHook): - """Step LR scheduler with min_lr clipping. - - Args: - step (int | list[int]): Step to decay the LR. If an int value is given, - regard it as the decay interval. If a list is given, decay LR at - these steps. - gamma (float, optional): Decay LR ratio. Default: 0.1. - min_lr (float, optional): Minimum LR value to keep. If LR after decay - is lower than `min_lr`, it will be clipped to this value. If None - is given, we don't perform lr clipping. Default: None. - """ - - def __init__(self, step, gamma=0.1, min_lr=None, **kwargs): - if isinstance(step, list): - assert mmcv.is_list_of(step, int) - assert all([s > 0 for s in step]) - elif isinstance(step, int): - assert step > 0 - else: - raise TypeError('"step" must be a list or integer') - self.step = step - self.gamma = gamma - self.min_lr = min_lr - super(StepLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - progress = runner.epoch if self.by_epoch else runner.iter - - # calculate exponential term - if isinstance(self.step, int): - exp = progress // self.step - else: - exp = len(self.step) - for i, s in enumerate(self.step): - if progress < s: - exp = i - break - - lr = base_lr * (self.gamma ** exp) - if self.min_lr is not None: - # clip to a minimum value - lr = max(lr, self.min_lr) - return lr - - -@HOOKS.register_module() -class ExpLrUpdaterHook(LrUpdaterHook): - def __init__(self, gamma, **kwargs): - self.gamma = gamma - super(ExpLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - progress = runner.epoch if self.by_epoch else runner.iter - return base_lr * self.gamma ** progress - - -@HOOKS.register_module() -class PolyLrUpdaterHook(LrUpdaterHook): - def __init__(self, power=1.0, min_lr=0.0, **kwargs): - self.power = power - self.min_lr = min_lr - super(PolyLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - if self.by_epoch: - progress = runner.epoch - max_progress = runner.max_epochs - else: - progress = runner.iter - max_progress = runner.max_iters - coeff = (1 - progress / max_progress) ** self.power - return (base_lr - self.min_lr) * coeff + self.min_lr - - -@HOOKS.register_module() -class InvLrUpdaterHook(LrUpdaterHook): - def __init__(self, gamma, power=1.0, **kwargs): - self.gamma = gamma - self.power = power - super(InvLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - progress = runner.epoch if self.by_epoch else runner.iter - return base_lr * (1 + self.gamma * progress) ** (-self.power) - - -@HOOKS.register_module() -class CosineAnnealingLrUpdaterHook(LrUpdaterHook): - def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): - assert (min_lr is None) ^ (min_lr_ratio is None) - self.min_lr = min_lr - self.min_lr_ratio = min_lr_ratio - super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - if self.by_epoch: - progress = runner.epoch - max_progress = runner.max_epochs - else: - progress = runner.iter - max_progress = runner.max_iters - - if self.min_lr_ratio is not None: - target_lr = base_lr * self.min_lr_ratio - else: - target_lr = self.min_lr - return annealing_cos(base_lr, target_lr, progress / max_progress) - - -@HOOKS.register_module() -class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): - """Flat + Cosine lr schedule. - - Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501 - - Args: - start_percent (float): When to start annealing the learning rate - after the percentage of the total training steps. - The value should be in range [0, 1). - Default: 0.75 - min_lr (float, optional): The minimum lr. Default: None. - min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. - Either `min_lr` or `min_lr_ratio` should be specified. - Default: None. - """ - - def __init__(self, start_percent=0.75, min_lr=None, min_lr_ratio=None, **kwargs): - assert (min_lr is None) ^ (min_lr_ratio is None) - if start_percent < 0 or start_percent > 1 or not isinstance(start_percent, float): - raise ValueError('expected float between 0 and 1 start_percent, but ' f'got {start_percent}') - self.start_percent = start_percent - self.min_lr = min_lr - self.min_lr_ratio = min_lr_ratio - super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs) - - def get_lr(self, runner, base_lr): - if self.by_epoch: - start = round(runner.max_epochs * self.start_percent) - progress = runner.epoch - start - max_progress = runner.max_epochs - start - else: - start = round(runner.max_iters * self.start_percent) - progress = runner.iter - start - max_progress = runner.max_iters - start - - if self.min_lr_ratio is not None: - target_lr = base_lr * self.min_lr_ratio - else: - target_lr = self.min_lr - - if progress < 0: - return base_lr - else: - return annealing_cos(base_lr, target_lr, progress / max_progress) - - -@HOOKS.register_module() -class CosineRestartLrUpdaterHook(LrUpdaterHook): - """Cosine annealing with restarts learning rate scheme. - - Args: - periods (list[int]): Periods for each cosine anneling cycle. - restart_weights (list[float], optional): Restart weights at each - restart iteration. Default: [1]. - min_lr (float, optional): The minimum lr. Default: None. - min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. - Either `min_lr` or `min_lr_ratio` should be specified. - Default: None. - """ - - def __init__(self, periods, restart_weights=[1], min_lr=None, min_lr_ratio=None, **kwargs): - assert (min_lr is None) ^ (min_lr_ratio is None) - self.periods = periods - self.min_lr = min_lr - self.min_lr_ratio = min_lr_ratio - self.restart_weights = restart_weights - assert len(self.periods) == len( - self.restart_weights - ), 'periods and restart_weights should have the same length.' - super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) - - self.cumulative_periods = [sum(self.periods[0 : i + 1]) for i in range(0, len(self.periods))] - - def get_lr(self, runner, base_lr): - if self.by_epoch: - progress = runner.epoch - else: - progress = runner.iter - - if self.min_lr_ratio is not None: - target_lr = base_lr * self.min_lr_ratio - else: - target_lr = self.min_lr - - idx = get_position_from_periods(progress, self.cumulative_periods) - current_weight = self.restart_weights[idx] - nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] - current_periods = self.periods[idx] - - alpha = min((progress - nearest_restart) / current_periods, 1) - return annealing_cos(base_lr, target_lr, alpha, current_weight) - - -def get_position_from_periods(iteration, cumulative_periods): - """Get the position from a period list. - - It will return the index of the right-closest number in the period list. - For example, the cumulative_periods = [100, 200, 300, 400], - if iteration == 50, return 0; - if iteration == 210, return 2; - if iteration == 300, return 3. - - Args: - iteration (int): Current iteration. - cumulative_periods (list[int]): Cumulative period list. - - Returns: - int: The position of the right-closest number in the period list. - """ - for i, period in enumerate(cumulative_periods): - if iteration < period: - return i - raise ValueError(f'Current iteration {iteration} exceeds ' f'cumulative_periods {cumulative_periods}') - - -@HOOKS.register_module() -class CyclicLrUpdaterHook(LrUpdaterHook): - """Cyclic LR Scheduler. - - Implement the cyclical learning rate policy (CLR) described in - https://arxiv.org/pdf/1506.01186.pdf - - Different from the original paper, we use cosine annealing rather than - triangular policy inside a cycle. This improves the performance in the - 3D detection area. - - Args: - by_epoch (bool): Whether to update LR by epoch. - target_ratio (tuple[float]): Relative ratio of the highest LR and the - lowest LR to the initial LR. - cyclic_times (int): Number of cycles during training - step_ratio_up (float): The ratio of the increasing process of LR in - the total cycle. - anneal_strategy (str): {'cos', 'linear'} - Specifies the annealing strategy: 'cos' for cosine annealing, - 'linear' for linear annealing. Default: 'cos'. - """ - - def __init__( - self, - by_epoch=False, - target_ratio=(10, 1e-4), - cyclic_times=1, - step_ratio_up=0.4, - anneal_strategy='cos', - **kwargs, - ): - if isinstance(target_ratio, float): - target_ratio = (target_ratio, target_ratio / 1e5) - elif isinstance(target_ratio, tuple): - target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio - else: - raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') - - assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' - assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' - - self.target_ratio = target_ratio - self.cyclic_times = cyclic_times - self.step_ratio_up = step_ratio_up - self.lr_phases = [] # init lr_phases - # validate anneal_strategy - if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') - elif anneal_strategy == 'cos': - self.anneal_func = annealing_cos - elif anneal_strategy == 'linear': - self.anneal_func = annealing_linear - - assert not by_epoch, 'currently only support "by_epoch" = False' - super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) - - def before_run(self, runner): - super(CyclicLrUpdaterHook, self).before_run(runner) - # initiate lr_phases - # total lr_phases are separated as up and down - max_iter_per_phase = runner.max_iters // self.cyclic_times - iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) - self.lr_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) - self.lr_phases.append( - [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] - ) - - def get_lr(self, runner, base_lr): - curr_iter = runner.iter - for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.lr_phases: - curr_iter %= max_iter_per_phase - if start_iter <= curr_iter < end_iter: - progress = curr_iter - start_iter - return self.anneal_func(base_lr * start_ratio, base_lr * end_ratio, progress / (end_iter - start_iter)) - - -@HOOKS.register_module() -class OneCycleLrUpdaterHook(LrUpdaterHook): - """One Cycle LR Scheduler. - - The 1cycle learning rate policy changes the learning rate after every - batch. The one cycle learning rate policy is described in - https://arxiv.org/pdf/1708.07120.pdf - - Args: - max_lr (float or list): Upper learning rate boundaries in the cycle - for each parameter group. - total_steps (int, optional): The total number of steps in the cycle. - Note that if a value is not provided here, it will be the max_iter - of runner. Default: None. - pct_start (float): The percentage of the cycle (in number of steps) - spent increasing the learning rate. - Default: 0.3 - anneal_strategy (str): {'cos', 'linear'} - Specifies the annealing strategy: 'cos' for cosine annealing, - 'linear' for linear annealing. - Default: 'cos' - div_factor (float): Determines the initial learning rate via - initial_lr = max_lr/div_factor - Default: 25 - final_div_factor (float): Determines the minimum learning rate via - min_lr = initial_lr/final_div_factor - Default: 1e4 - three_phase (bool): If three_phase is True, use a third phase of the - schedule to annihilate the learning rate according to - final_div_factor instead of modifying the second phase (the first - two phases will be symmetrical about the step indicated by - pct_start). - Default: False - """ - - def __init__( - self, - max_lr, - total_steps=None, - pct_start=0.3, - anneal_strategy='cos', - div_factor=25, - final_div_factor=1e4, - three_phase=False, - **kwargs, - ): - # validate by_epoch, currently only support by_epoch = False - if 'by_epoch' not in kwargs: - kwargs['by_epoch'] = False - else: - assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' - if not isinstance(max_lr, (numbers.Number, list, dict)): - raise ValueError('the type of max_lr must be the one of list or ' f'dict, but got {type(max_lr)}') - self._max_lr = max_lr - if total_steps is not None: - if not isinstance(total_steps, int): - raise ValueError('the type of total_steps must be int, but' f'got {type(total_steps)}') - self.total_steps = total_steps - # validate pct_start - if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError('expected float between 0 and 1 pct_start, but ' f'got {pct_start}') - self.pct_start = pct_start - # validate anneal_strategy - if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') - elif anneal_strategy == 'cos': - self.anneal_func = annealing_cos - elif anneal_strategy == 'linear': - self.anneal_func = annealing_linear - self.div_factor = div_factor - self.final_div_factor = final_div_factor - self.three_phase = three_phase - self.lr_phases = [] # init lr_phases - super(OneCycleLrUpdaterHook, self).__init__(**kwargs) - - def before_run(self, runner): - if hasattr(self, 'total_steps'): - total_steps = self.total_steps - else: - total_steps = runner.max_iters - if total_steps < runner.max_iters: - raise ValueError( - 'The total steps must be greater than or equal to max ' - f'iterations {runner.max_iters} of runner, but total steps ' - f'is {total_steps}.' - ) - - if isinstance(runner.optimizer, dict): - self.base_lr = {} - for k, optim in runner.optimizer.items(): - _max_lr = format_param(k, optim, self._max_lr) - self.base_lr[k] = [lr / self.div_factor for lr in _max_lr] - for group, lr in zip(optim.param_groups, self.base_lr[k]): - group.setdefault('initial_lr', lr) - else: - k = type(runner.optimizer).__name__ - _max_lr = format_param(k, runner.optimizer, self._max_lr) - self.base_lr = [lr / self.div_factor for lr in _max_lr] - for group, lr in zip(runner.optimizer.param_groups, self.base_lr): - group.setdefault('initial_lr', lr) - - if self.three_phase: - self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) - self.lr_phases.append([float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1]) - self.lr_phases.append([total_steps - 1, 1, 1 / self.final_div_factor]) - else: - self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) - self.lr_phases.append([total_steps - 1, self.div_factor, 1 / self.final_div_factor]) - - def get_lr(self, runner, base_lr): - curr_iter = runner.iter - start_iter = 0 - for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): - if curr_iter <= end_iter: - pct = (curr_iter - start_iter) / (end_iter - start_iter) - lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, pct) - break - start_iter = end_iter - return lr - - -def annealing_cos(start, end, factor, weight=1): - """Calculate annealing cos learning rate. - - Cosine anneal from `weight * start + (1 - weight) * end` to `end` as - percentage goes from 0.0 to 1.0. - - Args: - start (float): The starting learning rate of the cosine annealing. - end (float): The ending learing rate of the cosine annealing. - factor (float): The coefficient of `pi` when calculating the current - percentage. Range from 0.0 to 1.0. - weight (float, optional): The combination factor of `start` and `end` - when calculating the actual starting learning rate. Default to 1. - """ - cos_out = cos(pi * factor) + 1 - return end + 0.5 * weight * (start - end) * cos_out - - -def annealing_linear(start, end, factor): - """Calculate annealing linear learning rate. - - Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0. - - Args: - start (float): The starting learning rate of the linear annealing. - end (float): The ending learing rate of the linear annealing. - factor (float): The coefficient of `pi` when calculating the current - percentage. Range from 0.0 to 1.0. - """ - return start + (end - start) * factor - - -def format_param(name, optim, param): - if isinstance(param, numbers.Number): - return [param] * len(optim.param_groups) - elif isinstance(param, (list, tuple)): # multi param groups - if len(param) != len(optim.param_groups): - raise ValueError(f'expected {len(optim.param_groups)} ' f'values for {name}, got {len(param)}') - return param - else: # multi optimizers - if name not in param: - raise KeyError(f'{name} is not found in {param.keys()}') - return param[name] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py deleted file mode 100644 index d483c16b512c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch - -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class EmptyCacheHook(Hook): - def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): - self._before_epoch = before_epoch - self._after_epoch = after_epoch - self._after_iter = after_iter - - def after_iter(self, runner): - if self._after_iter: - torch.cuda.empty_cache() - - def before_epoch(self, runner): - if self._before_epoch: - torch.cuda.empty_cache() - - def after_epoch(self, runner): - if self._after_epoch: - torch.cuda.empty_cache() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py deleted file mode 100644 index b366fa8e6817..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py +++ /dev/null @@ -1,421 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from .hook import HOOKS, Hook -from .lr_updater import annealing_cos, annealing_linear, format_param - - -class MomentumUpdaterHook(Hook): - def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.9): - # validate the "warmup" argument - if warmup is not None: - if warmup not in ['constant', 'linear', 'exp']: - raise ValueError( - f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' - ) - if warmup is not None: - assert warmup_iters > 0, '"warmup_iters" must be a positive integer' - assert 0 < warmup_ratio <= 1.0, '"warmup_momentum" must be in range (0,1]' - - self.by_epoch = by_epoch - self.warmup = warmup - self.warmup_iters = warmup_iters - self.warmup_ratio = warmup_ratio - - self.base_momentum = [] # initial momentum for all param groups - self.regular_momentum = [] # expected momentum if no warming up is performed - - def _set_momentum(self, runner, momentum_groups): - if isinstance(runner.optimizer, dict): - for k, optim in runner.optimizer.items(): - for param_group, mom in zip(optim.param_groups, momentum_groups[k]): - if 'momentum' in param_group.keys(): - param_group['momentum'] = mom - elif 'betas' in param_group.keys(): - param_group['betas'] = (mom, param_group['betas'][1]) - else: - for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): - if 'momentum' in param_group.keys(): - param_group['momentum'] = mom - elif 'betas' in param_group.keys(): - param_group['betas'] = (mom, param_group['betas'][1]) - - def get_momentum(self, runner, base_momentum): - raise NotImplementedError - - def get_regular_momentum(self, runner): - if isinstance(runner.optimizer, dict): - momentum_groups = {} - for k in runner.optimizer.keys(): - _momentum_group = [ - self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum[k] - ] - momentum_groups.update({k: _momentum_group}) - return momentum_groups - else: - return [self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum] - - def get_warmup_momentum(self, cur_iters): - def _get_warmup_momentum(cur_iters, regular_momentum): - if self.warmup == 'constant': - warmup_momentum = [_momentum / self.warmup_ratio for _momentum in self.regular_momentum] - elif self.warmup == 'linear': - k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) - warmup_momentum = [_momentum / (1 - k) for _momentum in self.regular_mom] - elif self.warmup == 'exp': - k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) - warmup_momentum = [_momentum / k for _momentum in self.regular_mom] - return warmup_momentum - - if isinstance(self.regular_momentum, dict): - momentum_groups = {} - for key, regular_momentum in self.regular_momentum.items(): - momentum_groups[key] = _get_warmup_momentum(cur_iters, regular_momentum) - return momentum_groups - else: - return _get_warmup_momentum(cur_iters, self.regular_momentum) - - def before_run(self, runner): - # NOTE: when resuming from a checkpoint, - # if 'initial_momentum' is not saved, - # it will be set according to the optimizer params - if isinstance(runner.optimizer, dict): - self.base_momentum = {} - for k, optim in runner.optimizer.items(): - for group in optim.param_groups: - if 'momentum' in group.keys(): - group.setdefault('initial_momentum', group['momentum']) - else: - group.setdefault('initial_momentum', group['betas'][0]) - _base_momentum = [group['initial_momentum'] for group in optim.param_groups] - self.base_momentum.update({k: _base_momentum}) - else: - for group in runner.optimizer.param_groups: - if 'momentum' in group.keys(): - group.setdefault('initial_momentum', group['momentum']) - else: - group.setdefault('initial_momentum', group['betas'][0]) - self.base_momentum = [group['initial_momentum'] for group in runner.optimizer.param_groups] - - def before_train_epoch(self, runner): - if not self.by_epoch: - return - self.regular_mom = self.get_regular_momentum(runner) - self._set_momentum(runner, self.regular_mom) - - def before_train_iter(self, runner): - cur_iter = runner.iter - if not self.by_epoch: - self.regular_mom = self.get_regular_momentum(runner) - if self.warmup is None or cur_iter >= self.warmup_iters: - self._set_momentum(runner, self.regular_mom) - else: - warmup_momentum = self.get_warmup_momentum(cur_iter) - self._set_momentum(runner, warmup_momentum) - elif self.by_epoch: - if self.warmup is None or cur_iter > self.warmup_iters: - return - elif cur_iter == self.warmup_iters: - self._set_momentum(runner, self.regular_mom) - else: - warmup_momentum = self.get_warmup_momentum(cur_iter) - self._set_momentum(runner, warmup_momentum) - - -@HOOKS.register_module() -class StepMomentumUpdaterHook(MomentumUpdaterHook): - """Step momentum scheduler with min value clipping. - - Args: - step (int | list[int]): Step to decay the momentum. If an int value is - given, regard it as the decay interval. If a list is given, decay - momentum at these steps. - gamma (float, optional): Decay momentum ratio. Default: 0.5. - min_momentum (float, optional): Minimum momentum value to keep. If - momentum after decay is lower than this value, it will be clipped - accordingly. If None is given, we don't perform lr clipping. - Default: None. - """ - - def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs): - if isinstance(step, list): - assert mmcv.is_list_of(step, int) - assert all([s > 0 for s in step]) - elif isinstance(step, int): - assert step > 0 - else: - raise TypeError('"step" must be a list or integer') - self.step = step - self.gamma = gamma - self.min_momentum = min_momentum - super(StepMomentumUpdaterHook, self).__init__(**kwargs) - - def get_momentum(self, runner, base_momentum): - progress = runner.epoch if self.by_epoch else runner.iter - - # calculate exponential term - if isinstance(self.step, int): - exp = progress // self.step - else: - exp = len(self.step) - for i, s in enumerate(self.step): - if progress < s: - exp = i - break - - momentum = base_momentum * (self.gamma ** exp) - if self.min_momentum is not None: - # clip to a minimum value - momentum = max(momentum, self.min_momentum) - return momentum - - -@HOOKS.register_module() -class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): - def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): - assert (min_momentum is None) ^ (min_momentum_ratio is None) - self.min_momentum = min_momentum - self.min_momentum_ratio = min_momentum_ratio - super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) - - def get_momentum(self, runner, base_momentum): - if self.by_epoch: - progress = runner.epoch - max_progress = runner.max_epochs - else: - progress = runner.iter - max_progress = runner.max_iters - if self.min_momentum_ratio is not None: - target_momentum = base_momentum * self.min_momentum_ratio - else: - target_momentum = self.min_momentum - return annealing_cos(base_momentum, target_momentum, progress / max_progress) - - -@HOOKS.register_module() -class CyclicMomentumUpdaterHook(MomentumUpdaterHook): - """Cyclic momentum Scheduler. - - Implement the cyclical momentum scheduler policy described in - https://arxiv.org/pdf/1708.07120.pdf - - This momentum scheduler usually used together with the CyclicLRUpdater - to improve the performance in the 3D detection area. - - Attributes: - target_ratio (tuple[float]): Relative ratio of the lowest momentum and - the highest momentum to the initial momentum. - cyclic_times (int): Number of cycles during training - step_ratio_up (float): The ratio of the increasing process of momentum - in the total cycle. - by_epoch (bool): Whether to update momentum by epoch. - """ - - def __init__(self, by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4, **kwargs): - if isinstance(target_ratio, float): - target_ratio = (target_ratio, target_ratio / 1e5) - elif isinstance(target_ratio, tuple): - target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio - else: - raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') - - assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' - assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' - - self.target_ratio = target_ratio - self.cyclic_times = cyclic_times - self.step_ratio_up = step_ratio_up - self.momentum_phases = [] # init momentum_phases - # currently only support by_epoch=False - assert not by_epoch, 'currently only support "by_epoch" = False' - super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) - - def before_run(self, runner): - super(CyclicMomentumUpdaterHook, self).before_run(runner) - # initiate momentum_phases - # total momentum_phases are separated as up and down - max_iter_per_phase = runner.max_iters // self.cyclic_times - iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) - self.momentum_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) - self.momentum_phases.append( - [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] - ) - - def get_momentum(self, runner, base_momentum): - curr_iter = runner.iter - for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.momentum_phases: - curr_iter %= max_iter_per_phase - if start_iter <= curr_iter < end_iter: - progress = curr_iter - start_iter - return annealing_cos( - base_momentum * start_ratio, base_momentum * end_ratio, progress / (end_iter - start_iter) - ) - - -@HOOKS.register_module() -class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): - """OneCycle momentum Scheduler. - - This momentum scheduler usually used together with the OneCycleLrUpdater - to improve the performance. - - Args: - base_momentum (float or list): Lower momentum boundaries in the cycle - for each parameter group. Note that momentum is cycled inversely - to learning rate; at the peak of a cycle, momentum is - 'base_momentum' and learning rate is 'max_lr'. - Default: 0.85 - max_momentum (float or list): Upper momentum boundaries in the cycle - for each parameter group. Functionally, - it defines the cycle amplitude (max_momentum - base_momentum). - Note that momentum is cycled inversely - to learning rate; at the start of a cycle, momentum is - 'max_momentum' and learning rate is 'base_lr' - Default: 0.95 - pct_start (float): The percentage of the cycle (in number of steps) - spent increasing the learning rate. - Default: 0.3 - anneal_strategy (str): {'cos', 'linear'} - Specifies the annealing strategy: 'cos' for cosine annealing, - 'linear' for linear annealing. - Default: 'cos' - three_phase (bool): If three_phase is True, use a third phase of the - schedule to annihilate the learning rate according to - final_div_factor instead of modifying the second phase (the first - two phases will be symmetrical about the step indicated by - pct_start). - Default: False - """ - - def __init__( - self, base_momentum=0.85, max_momentum=0.95, pct_start=0.3, anneal_strategy='cos', three_phase=False, **kwargs - ): - # validate by_epoch, currently only support by_epoch=False - if 'by_epoch' not in kwargs: - kwargs['by_epoch'] = False - else: - assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' - if not isinstance(base_momentum, (float, list, dict)): - raise ValueError('base_momentum must be the type among of float,' 'list or dict.') - self._base_momentum = base_momentum - if not isinstance(max_momentum, (float, list, dict)): - raise ValueError('max_momentum must be the type among of float,' 'list or dict.') - self._max_momentum = max_momentum - # validate pct_start - if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError('Expected float between 0 and 1 pct_start, but ' f'got {pct_start}') - self.pct_start = pct_start - # validate anneal_strategy - if anneal_strategy not in ['cos', 'linear']: - raise ValueError('anneal_strategy must by one of "cos" or ' f'"linear", instead got {anneal_strategy}') - elif anneal_strategy == 'cos': - self.anneal_func = annealing_cos - elif anneal_strategy == 'linear': - self.anneal_func = annealing_linear - self.three_phase = three_phase - self.momentum_phases = [] # init momentum_phases - super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs) - - def before_run(self, runner): - if isinstance(runner.optimizer, dict): - for k, optim in runner.optimizer.items(): - if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: - raise ValueError('optimizer must support momentum with' 'option enabled') - self.use_beta1 = 'betas' in optim.defaults - _base_momentum = format_param(k, optim, self._base_momentum) - _max_momentum = format_param(k, optim, self._max_momentum) - for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): - if self.use_beta1: - _, beta2 = group['betas'] - group['betas'] = (m_momentum, beta2) - else: - group['momentum'] = m_momentum - group['base_momentum'] = b_momentum - group['max_momentum'] = m_momentum - else: - optim = runner.optimizer - if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: - raise ValueError('optimizer must support momentum with' 'option enabled') - self.use_beta1 = 'betas' in optim.defaults - k = type(optim).__name__ - _base_momentum = format_param(k, optim, self._base_momentum) - _max_momentum = format_param(k, optim, self._max_momentum) - for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): - if self.use_beta1: - _, beta2 = group['betas'] - group['betas'] = (m_momentum, beta2) - else: - group['momentum'] = m_momentum - group['base_momentum'] = b_momentum - group['max_momentum'] = m_momentum - - if self.three_phase: - self.momentum_phases.append( - { - 'end_iter': float(self.pct_start * runner.max_iters) - 1, - 'start_momentum': 'max_momentum', - 'end_momentum': 'base_momentum', - } - ) - self.momentum_phases.append( - { - 'end_iter': float(2 * self.pct_start * runner.max_iters) - 2, - 'start_momentum': 'base_momentum', - 'end_momentum': 'max_momentum', - } - ) - self.momentum_phases.append( - {'end_iter': runner.max_iters - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'max_momentum'} - ) - else: - self.momentum_phases.append( - { - 'end_iter': float(self.pct_start * runner.max_iters) - 1, - 'start_momentum': 'max_momentum', - 'end_momentum': 'base_momentum', - } - ) - self.momentum_phases.append( - {'end_iter': runner.max_iters - 1, 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum'} - ) - - def _set_momentum(self, runner, momentum_groups): - if isinstance(runner.optimizer, dict): - for k, optim in runner.optimizer.items(): - for param_group, mom in zip(optim.param_groups, momentum_groups[k]): - if 'momentum' in param_group.keys(): - param_group['momentum'] = mom - elif 'betas' in param_group.keys(): - param_group['betas'] = (mom, param_group['betas'][1]) - else: - for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): - if 'momentum' in param_group.keys(): - param_group['momentum'] = mom - elif 'betas' in param_group.keys(): - param_group['betas'] = (mom, param_group['betas'][1]) - - def get_momentum(self, runner, param_group): - curr_iter = runner.iter - start_iter = 0 - for i, phase in enumerate(self.momentum_phases): - end_iter = phase['end_iter'] - if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: - pct = (curr_iter - start_iter) / (end_iter - start_iter) - momentum = self.anneal_func( - param_group[phase['start_momentum']], param_group[phase['end_momentum']], pct - ) - break - start_iter = end_iter - return momentum - - def get_regular_momentum(self, runner): - if isinstance(runner.optimizer, dict): - momentum_groups = {} - for k, optim in runner.optimizer.items(): - _momentum_group = [self.get_momentum(runner, param_group) for param_group in optim.param_groups] - momentum_groups.update({k: _momentum_group}) - return momentum_groups - else: - momentum_groups = [] - for param_group in runner.optimizer.param_groups: - momentum_groups.append(self.get_momentum(runner, param_group)) - return momentum_groups diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py deleted file mode 100644 index 03090c2e97ff..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py +++ /dev/null @@ -1,461 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -from collections import defaultdict -from itertools import chain - -from torch.nn.utils import clip_grad - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version -from ..dist_utils import allreduce_grads -from ..fp16_utils import LossScaler, wrap_fp16_model -from .hook import HOOKS, Hook - -try: - # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported - # and used; otherwise, auto fp16 will adopt mmcv's implementation. - from torch.cuda.amp import GradScaler -except ImportError: - pass - - -@HOOKS.register_module() -class OptimizerHook(Hook): - def __init__(self, grad_clip=None): - self.grad_clip = grad_clip - - def clip_grads(self, params): - params = list(filter(lambda p: p.requires_grad and p.grad is not None, params)) - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **self.grad_clip) - - def after_train_iter(self, runner): - runner.optimizer.zero_grad() - runner.outputs['loss'].backward() - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - runner.optimizer.step() - - -@HOOKS.register_module() -class GradientCumulativeOptimizerHook(OptimizerHook): - """Optimizer Hook implements multi-iters gradient cumulating. - - Args: - cumulative_iters (int, optional): Num of gradient cumulative iters. - The optimizer will step every `cumulative_iters` iters. - Defaults to 1. - - Examples: - >>> # Use cumulative_iters to simulate a large batch size - >>> # It is helpful when the hardware cannot handle a large batch size. - >>> loader = DataLoader(data, batch_size=64) - >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4) - >>> # almost equals to - >>> loader = DataLoader(data, batch_size=256) - >>> optim_hook = OptimizerHook() - """ - - def __init__(self, cumulative_iters=1, **kwargs): - super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) - - assert isinstance(cumulative_iters, int) and cumulative_iters > 0, ( - f'cumulative_iters only accepts positive int, but got ' f'{type(cumulative_iters)} instead.' - ) - - self.cumulative_iters = cumulative_iters - self.divisible_iters = 0 - self.remainder_iters = 0 - self.initialized = False - - def has_batch_norm(self, module): - if isinstance(module, _BatchNorm): - return True - for m in module.children(): - if self.has_batch_norm(m): - return True - return False - - def _init(self, runner): - if runner.iter % self.cumulative_iters != 0: - runner.logger.warning( - 'Resume iter number is not divisible by cumulative_iters in ' - 'GradientCumulativeOptimizerHook, which means the gradient of ' - 'some iters is lost and the result may be influenced slightly.' - ) - - if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: - runner.logger.warning( - 'GradientCumulativeOptimizerHook may slightly decrease ' - 'performance if the model has BatchNorm layers.' - ) - - residual_iters = runner.max_iters - runner.iter - - self.divisible_iters = residual_iters // self.cumulative_iters * self.cumulative_iters - self.remainder_iters = residual_iters - self.divisible_iters - - self.initialized = True - - def after_train_iter(self, runner): - if not self.initialized: - self._init(runner) - - if runner.iter < self.divisible_iters: - loss_factor = self.cumulative_iters - else: - loss_factor = self.remainder_iters - loss = runner.outputs['loss'] - loss = loss / loss_factor - loss.backward() - - if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): - - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - runner.optimizer.step() - runner.optimizer.zero_grad() - - -if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): - - @HOOKS.register_module() - class Fp16OptimizerHook(OptimizerHook): - """FP16 optimizer hook (using PyTorch's implementation). - - If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, - to take care of the optimization procedure. - - Args: - loss_scale (float | str | dict): Scale factor configuration. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of GradScalar. - Defaults to 512. For Pytorch >= 1.6, mmcv uses official - implementation of GradScaler. If you use a dict version of - loss_scale to create GradScaler, please refer to: - https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler - for the parameters. - - Examples: - >>> loss_scale = dict( - ... init_scale=65536.0, - ... growth_factor=2.0, - ... backoff_factor=0.5, - ... growth_interval=2000 - ... ) - >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) - """ - - def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): - self.grad_clip = grad_clip - self.coalesce = coalesce - self.bucket_size_mb = bucket_size_mb - self.distributed = distributed - self._scale_update_param = None - if loss_scale == 'dynamic': - self.loss_scaler = GradScaler() - elif isinstance(loss_scale, float): - self._scale_update_param = loss_scale - self.loss_scaler = GradScaler(init_scale=loss_scale) - elif isinstance(loss_scale, dict): - self.loss_scaler = GradScaler(**loss_scale) - else: - raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') - - def before_run(self, runner): - """Preparing steps before Mixed Precision Training.""" - # wrap model mode to fp16 - wrap_fp16_model(runner.model) - # resume from state dict - if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: - scaler_state_dict = runner.meta['fp16']['loss_scaler'] - self.loss_scaler.load_state_dict(scaler_state_dict) - - def copy_grads_to_fp32(self, fp16_net, fp32_weights): - """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): - if fp16_param.grad is not None: - if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new(fp32_param.size()) - fp32_param.grad.copy_(fp16_param.grad) - - def copy_params_to_fp16(self, fp16_net, fp32_weights): - """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): - fp16_param.data.copy_(fp32_param.data) - - def after_train_iter(self, runner): - """Backward optimization steps for Mixed Precision Training. For - dynamic loss scaling, please refer to - https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler. - - 1. Scale the loss by a scale factor. - 2. Backward the loss to obtain the gradients. - 3. Unscale the optimizer’s gradient tensors. - 4. Call optimizer.step() and update scale factor. - 5. Save loss_scaler state_dict for resume purpose. - """ - # clear grads of last iteration - runner.model.zero_grad() - runner.optimizer.zero_grad() - - self.loss_scaler.scale(runner.outputs['loss']).backward() - self.loss_scaler.unscale_(runner.optimizer) - # grad clip - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - # backward and update scaler - self.loss_scaler.step(runner.optimizer) - self.loss_scaler.update(self._scale_update_param) - - # save state_dict of loss_scaler - runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() - - @HOOKS.register_module() - class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): - """Fp16 optimizer Hook (using PyTorch's implementation) implements - multi-iters gradient cumulating. - - If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, - to take care of the optimization procedure. - """ - - def __init__(self, *args, **kwargs): - super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) - - def after_train_iter(self, runner): - if not self.initialized: - self._init(runner) - - if runner.iter < self.divisible_iters: - loss_factor = self.cumulative_iters - else: - loss_factor = self.remainder_iters - loss = runner.outputs['loss'] - loss = loss / loss_factor - - self.loss_scaler.scale(loss).backward() - - if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): - - # copy fp16 grads in the model to fp32 params in the optimizer - self.loss_scaler.unscale_(runner.optimizer) - - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - - # backward and update scaler - self.loss_scaler.step(runner.optimizer) - self.loss_scaler.update(self._scale_update_param) - - # save state_dict of loss_scaler - runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() - - # clear grads - runner.model.zero_grad() - runner.optimizer.zero_grad() - - -else: - - @HOOKS.register_module() - class Fp16OptimizerHook(OptimizerHook): - """FP16 optimizer hook (mmcv's implementation). - - The steps of fp16 optimizer is as follows. - 1. Scale the loss value. - 2. BP in the fp16 model. - 2. Copy gradients from fp16 model to fp32 weights. - 3. Update fp32 weights. - 4. Copy updated parameters from fp32 weights to fp16 model. - - Refer to https://arxiv.org/abs/1710.03740 for more details. - - Args: - loss_scale (float | str | dict): Scale factor configuration. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of LossScaler. - Defaults to 512. - """ - - def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): - self.grad_clip = grad_clip - self.coalesce = coalesce - self.bucket_size_mb = bucket_size_mb - self.distributed = distributed - if loss_scale == 'dynamic': - self.loss_scaler = LossScaler(mode='dynamic') - elif isinstance(loss_scale, float): - self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') - elif isinstance(loss_scale, dict): - self.loss_scaler = LossScaler(**loss_scale) - else: - raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') - - def before_run(self, runner): - """Preparing steps before Mixed Precision Training. - - 1. Make a master copy of fp32 weights for optimization. - 2. Convert the main model from fp32 to fp16. - """ - # keep a copy of fp32 weights - old_groups = runner.optimizer.param_groups - runner.optimizer.param_groups = copy.deepcopy(runner.optimizer.param_groups) - state = defaultdict(dict) - p_map = { - old_p: p - for old_p, p in zip( - chain(*(g['params'] for g in old_groups)), - chain(*(g['params'] for g in runner.optimizer.param_groups)), - ) - } - for k, v in runner.optimizer.state.items(): - state[p_map[k]] = v - runner.optimizer.state = state - # convert model to fp16 - wrap_fp16_model(runner.model) - # resume from state dict - if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: - scaler_state_dict = runner.meta['fp16']['loss_scaler'] - self.loss_scaler.load_state_dict(scaler_state_dict) - - def copy_grads_to_fp32(self, fp16_net, fp32_weights): - """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): - if fp16_param.grad is not None: - if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new(fp32_param.size()) - fp32_param.grad.copy_(fp16_param.grad) - - def copy_params_to_fp16(self, fp16_net, fp32_weights): - """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): - fp16_param.data.copy_(fp32_param.data) - - def after_train_iter(self, runner): - """Backward optimization steps for Mixed Precision Training. For - dynamic loss scaling, please refer `loss_scalar.py` - - 1. Scale the loss by a scale factor. - 2. Backward the loss to obtain the gradients (fp16). - 3. Copy gradients from the model to the fp32 weight copy. - 4. Scale the gradients back and update the fp32 weight copy. - 5. Copy back the params from fp32 weight copy to the fp16 model. - 6. Save loss_scaler state_dict for resume purpose. - """ - # clear grads of last iteration - runner.model.zero_grad() - runner.optimizer.zero_grad() - # scale the loss value - scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale - scaled_loss.backward() - # copy fp16 grads in the model to fp32 params in the optimizer - - fp32_weights = [] - for param_group in runner.optimizer.param_groups: - fp32_weights += param_group['params'] - self.copy_grads_to_fp32(runner.model, fp32_weights) - # allreduce grads - if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) - - has_overflow = self.loss_scaler.has_overflow(fp32_weights) - # if has overflow, skip this iteration - if not has_overflow: - # scale the gradients back - for param in fp32_weights: - if param.grad is not None: - param.grad.div_(self.loss_scaler.loss_scale) - if self.grad_clip is not None: - grad_norm = self.clip_grads(fp32_weights) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - # update fp32 params - runner.optimizer.step() - # copy fp32 params to the fp16 model - self.copy_params_to_fp16(runner.model, fp32_weights) - self.loss_scaler.update_scale(has_overflow) - if has_overflow: - runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') - - # save state_dict of loss_scaler - runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() - - @HOOKS.register_module() - class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): - """Fp16 optimizer Hook (using mmcv implementation) implements multi- - iters gradient cumulating.""" - - def __init__(self, *args, **kwargs): - super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) - - def after_train_iter(self, runner): - if not self.initialized: - self._init(runner) - - if runner.iter < self.divisible_iters: - loss_factor = self.cumulative_iters - else: - loss_factor = self.remainder_iters - - loss = runner.outputs['loss'] - loss = loss / loss_factor - - # scale the loss value - scaled_loss = loss * self.loss_scaler.loss_scale - scaled_loss.backward() - - if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): - - # copy fp16 grads in the model to fp32 params in the optimizer - fp32_weights = [] - for param_group in runner.optimizer.param_groups: - fp32_weights += param_group['params'] - self.copy_grads_to_fp32(runner.model, fp32_weights) - # allreduce grads - if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) - - has_overflow = self.loss_scaler.has_overflow(fp32_weights) - # if has overflow, skip this iteration - if not has_overflow: - # scale the gradients back - for param in fp32_weights: - if param.grad is not None: - param.grad.div_(self.loss_scaler.loss_scale) - if self.grad_clip is not None: - grad_norm = self.clip_grads(fp32_weights) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) - # update fp32 params - runner.optimizer.step() - # copy fp32 params to the fp16 model - self.copy_params_to_fp16(runner.model, fp32_weights) - else: - runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') - - self.loss_scaler.update_scale(has_overflow) - - # save state_dict of loss_scaler - runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() - - # clear grads - runner.model.zero_grad() - runner.optimizer.zero_grad() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py deleted file mode 100644 index ad58c981b2be..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings -from typing import Callable, List, Optional, Union - -import torch - -from ..dist_utils import master_only -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class ProfilerHook(Hook): - """Profiler to analyze performance during training. - - PyTorch Profiler is a tool that allows the collection of the performance - metrics during the training. More details on Profiler can be found at - https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile - - Args: - by_epoch (bool): Profile performance by epoch or by iteration. - Default: True. - profile_iters (int): Number of iterations for profiling. - If ``by_epoch=True``, profile_iters indicates that they are the - first profile_iters epochs at the beginning of the - training, otherwise it indicates the first profile_iters - iterations. Default: 1. - activities (list[str]): List of activity groups (CPU, CUDA) to use in - profiling. Default: ['cpu', 'cuda']. - schedule (dict, optional): Config of generating the callable schedule. - if schedule is None, profiler will not add step markers into the - trace and table view. Default: None. - on_trace_ready (callable, dict): Either a handler or a dict of generate - handler. Default: None. - record_shapes (bool): Save information about operator's input shapes. - Default: False. - profile_memory (bool): Track tensor memory allocation/deallocation. - Default: False. - with_stack (bool): Record source information (file and line number) - for the ops. Default: False. - with_flops (bool): Use formula to estimate the FLOPS of specific - operators (matrix multiplication and 2D convolution). - Default: False. - json_trace_path (str, optional): Exports the collected trace in Chrome - JSON format. Default: None. - - Example: - >>> runner = ... # instantiate a Runner - >>> # tensorboard trace - >>> trace_config = dict(type='tb_trace', dir_name='work_dir') - >>> profiler_config = dict(on_trace_ready=trace_config) - >>> runner.register_profiler_hook(profiler_config) - >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) - """ - - def __init__( - self, - by_epoch: bool = True, - profile_iters: int = 1, - activities: List[str] = ['cpu', 'cuda'], - schedule: Optional[dict] = None, - on_trace_ready: Optional[Union[Callable, dict]] = None, - record_shapes: bool = False, - profile_memory: bool = False, - with_stack: bool = False, - with_flops: bool = False, - json_trace_path: Optional[str] = None, - ) -> None: - try: - from torch import profiler # torch version >= 1.8.1 - except ImportError: - raise ImportError('profiler is the new feature of torch1.8.1, ' f'but your version is {torch.__version__}') - - assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' - self.by_epoch = by_epoch - - if profile_iters < 1: - raise ValueError('profile_iters should be greater than 0, but got ' f'{profile_iters}') - self.profile_iters = profile_iters - - if not isinstance(activities, list): - raise ValueError(f'activities should be list, but got {type(activities)}') - self.activities = [] - for activity in activities: - activity = activity.lower() - if activity == 'cpu': - self.activities.append(profiler.ProfilerActivity.CPU) - elif activity == 'cuda': - self.activities.append(profiler.ProfilerActivity.CUDA) - else: - raise ValueError(f'activity should be "cpu" or "cuda", but got {activity}') - - if schedule is not None: - self.schedule = profiler.schedule(**schedule) - else: - self.schedule = None - - self.on_trace_ready = on_trace_ready - self.record_shapes = record_shapes - self.profile_memory = profile_memory - self.with_stack = with_stack - self.with_flops = with_flops - self.json_trace_path = json_trace_path - - @master_only - def before_run(self, runner): - if self.by_epoch and runner.max_epochs < self.profile_iters: - raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_epochs}') - - if not self.by_epoch and runner.max_iters < self.profile_iters: - raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_iters}') - - if callable(self.on_trace_ready): # handler - _on_trace_ready = self.on_trace_ready - elif isinstance(self.on_trace_ready, dict): # config of handler - trace_cfg = self.on_trace_ready.copy() - trace_type = trace_cfg.pop('type') # log_trace handler - if trace_type == 'log_trace': - - def _log_handler(prof): - print(prof.key_averages().table(**trace_cfg)) - - _on_trace_ready = _log_handler - elif trace_type == 'tb_trace': # tensorboard_trace handler - try: - import torch_tb_profiler # noqa: F401 - except ImportError: - raise ImportError('please run "pip install ' 'torch-tb-profiler" to install ' 'torch_tb_profiler') - _on_trace_ready = torch.profiler.tensorboard_trace_handler(**trace_cfg) - else: - raise ValueError('trace_type should be "log_trace" or ' f'"tb_trace", but got {trace_type}') - elif self.on_trace_ready is None: - _on_trace_ready = None # type: ignore - else: - raise ValueError('on_trace_ready should be handler, dict or None, ' f'but got {type(self.on_trace_ready)}') - - if runner.max_epochs > 1: - warnings.warn( - f'profiler will profile {runner.max_epochs} epochs ' - 'instead of 1 epoch. Since profiler will slow down ' - 'the training, it is recommended to train 1 epoch ' - 'with ProfilerHook and adjust your setting according' - ' to the profiler summary. During normal training ' - '(epoch > 1), you may disable the ProfilerHook.' - ) - - self.profiler = torch.profiler.profile( - activities=self.activities, - schedule=self.schedule, - on_trace_ready=_on_trace_ready, - record_shapes=self.record_shapes, - profile_memory=self.profile_memory, - with_stack=self.with_stack, - with_flops=self.with_flops, - ) - - self.profiler.__enter__() - runner.logger.info('profiler is profiling...') - - @master_only - def after_train_epoch(self, runner): - if self.by_epoch and runner.epoch == self.profile_iters - 1: - runner.logger.info('profiler may take a few minutes...') - self.profiler.__exit__(None, None, None) - if self.json_trace_path is not None: - self.profiler.export_chrome_trace(self.json_trace_path) - - @master_only - def after_train_iter(self, runner): - self.profiler.step() - if not self.by_epoch and runner.iter == self.profile_iters - 1: - runner.logger.info('profiler may take a few minutes...') - self.profiler.__exit__(None, None, None) - if self.json_trace_path is not None: - self.profiler.export_chrome_trace(self.json_trace_path) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py deleted file mode 100644 index ee0dc6bdd8df..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class DistSamplerSeedHook(Hook): - """Data-loading sampler for distributed training. - - When distributed training, it is only useful in conjunction with - :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same - purpose with :obj:`IterLoader`. - """ - - def before_epoch(self, runner): - if hasattr(runner.data_loader.sampler, 'set_epoch'): - # in case the data loader uses `SequentialSampler` in Pytorch - runner.data_loader.sampler.set_epoch(runner.epoch) - elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): - # batch sampler in pytorch warps the sampler as its attributes. - runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py deleted file mode 100644 index 6376b7ff8942..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from ..dist_utils import allreduce_params -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class SyncBuffersHook(Hook): - """Synchronize model buffers such as running_mean and running_var in BN at - the end of each epoch. - - Args: - distributed (bool): Whether distributed training is used. It is - effective only for distributed training. Defaults to True. - """ - - def __init__(self, distributed=True): - self.distributed = distributed - - def after_epoch(self, runner): - """All-reduce model buffers at the end of each epoch.""" - if self.distributed: - allreduce_params(runner.model.buffers()) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py deleted file mode 100644 index f73f8ca649f1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import platform -import shutil -import time -import warnings - -import torch -from torch.optim import Optimizer - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from .base_runner import BaseRunner -from .builder import RUNNERS -from .checkpoint import save_checkpoint -from .hooks import IterTimerHook -from .utils import get_host_info - - -class IterLoader: - def __init__(self, dataloader): - self._dataloader = dataloader - self.iter_loader = iter(self._dataloader) - self._epoch = 0 - - @property - def epoch(self): - return self._epoch - - def __next__(self): - try: - data = next(self.iter_loader) - except StopIteration: - self._epoch += 1 - if hasattr(self._dataloader.sampler, 'set_epoch'): - self._dataloader.sampler.set_epoch(self._epoch) - time.sleep(2) # Prevent possible deadlock during epoch transition - self.iter_loader = iter(self._dataloader) - data = next(self.iter_loader) - - return data - - def __len__(self): - return len(self._dataloader) - - -@RUNNERS.register_module() -class IterBasedRunner(BaseRunner): - """Iteration-based Runner. - - This runner train models iteration by iteration. - """ - - def train(self, data_loader, **kwargs): - self.model.train() - self.mode = 'train' - self.data_loader = data_loader - self._epoch = data_loader.epoch - data_batch = next(data_loader) - self.call_hook('before_train_iter') - outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) - if not isinstance(outputs, dict): - raise TypeError('model.train_step() must return a dict') - if 'log_vars' in outputs: - self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) - self.outputs = outputs - self.call_hook('after_train_iter') - self._inner_iter += 1 - self._iter += 1 - - @torch.no_grad() - def val(self, data_loader, **kwargs): - self.model.eval() - self.mode = 'val' - self.data_loader = data_loader - data_batch = next(data_loader) - self.call_hook('before_val_iter') - outputs = self.model.val_step(data_batch, **kwargs) - if not isinstance(outputs, dict): - raise TypeError('model.val_step() must return a dict') - if 'log_vars' in outputs: - self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) - self.outputs = outputs - self.call_hook('after_val_iter') - self._inner_iter += 1 - - def run(self, data_loaders, workflow, max_iters=None, **kwargs): - """Start running. - - Args: - data_loaders (list[:obj:`DataLoader`]): Dataloaders for training - and validation. - workflow (list[tuple]): A list of (phase, iters) to specify the - running order and iterations. E.g, [('train', 10000), - ('val', 1000)] means running 10000 iterations for training and - 1000 iterations for validation, iteratively. - """ - assert isinstance(data_loaders, list) - assert mmcv.is_list_of(workflow, tuple) - assert len(data_loaders) == len(workflow) - if max_iters is not None: - warnings.warn( - 'setting max_iters in run is deprecated, ' 'please set max_iters in runner_config', DeprecationWarning - ) - self._max_iters = max_iters - assert self._max_iters is not None, 'max_iters must be specified during instantiation' - - work_dir = self.work_dir if self.work_dir is not None else 'NONE' - self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) - self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) - self.logger.info('workflow: %s, max: %d iters', workflow, self._max_iters) - self.call_hook('before_run') - - iter_loaders = [IterLoader(x) for x in data_loaders] - - self.call_hook('before_epoch') - - while self.iter < self._max_iters: - for i, flow in enumerate(workflow): - self._inner_iter = 0 - mode, iters = flow - if not isinstance(mode, str) or not hasattr(self, mode): - raise ValueError('runner has no method named "{}" to run a workflow'.format(mode)) - iter_runner = getattr(self, mode) - for _ in range(iters): - if mode == 'train' and self.iter >= self._max_iters: - break - iter_runner(iter_loaders[i], **kwargs) - - time.sleep(1) # wait for some hooks like loggers to finish - self.call_hook('after_epoch') - self.call_hook('after_run') - - def resume(self, checkpoint, resume_optimizer=True, map_location='default'): - """Resume model from checkpoint. - - Args: - checkpoint (str): Checkpoint to resume from. - resume_optimizer (bool, optional): Whether resume the optimizer(s) - if the checkpoint file includes optimizer(s). Default to True. - map_location (str, optional): Same as :func:`torch.load`. - Default to 'default'. - """ - if map_location == 'default': - device_id = torch.cuda.current_device() - checkpoint = self.load_checkpoint(checkpoint, map_location=lambda storage, loc: storage.cuda(device_id)) - else: - checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) - - self._epoch = checkpoint['meta']['epoch'] - self._iter = checkpoint['meta']['iter'] - self._inner_iter = checkpoint['meta']['iter'] - if 'optimizer' in checkpoint and resume_optimizer: - if isinstance(self.optimizer, Optimizer): - self.optimizer.load_state_dict(checkpoint['optimizer']) - elif isinstance(self.optimizer, dict): - for k in self.optimizer.keys(): - self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) - else: - raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') - - self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') - - def save_checkpoint( - self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True - ): - """Save checkpoint to file. - - Args: - out_dir (str): Directory to save checkpoint files. - filename_tmpl (str, optional): Checkpoint file template. - Defaults to 'iter_{}.pth'. - meta (dict, optional): Metadata to be saved in checkpoint. - Defaults to None. - save_optimizer (bool, optional): Whether save optimizer. - Defaults to True. - create_symlink (bool, optional): Whether create symlink to the - latest checkpoint file. Defaults to True. - """ - if meta is None: - meta = {} - elif not isinstance(meta, dict): - raise TypeError(f'meta should be a dict or None, but got {type(meta)}') - if self.meta is not None: - meta.update(self.meta) - # Note: meta.update(self.meta) should be done before - # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise - # there will be problems with resumed checkpoints. - # More details in https://github.com/open-mmlab/mmcv/pull/1108 - meta.update(epoch=self.epoch + 1, iter=self.iter) - - filename = filename_tmpl.format(self.iter + 1) - filepath = osp.join(out_dir, filename) - optimizer = self.optimizer if save_optimizer else None - save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) - # in some environments, `os.symlink` is not supported, you may need to - # set `create_symlink` to False - if create_symlink: - dst_file = osp.join(out_dir, 'latest.pth') - if platform.system() != 'Windows': - mmcv.symlink(filename, dst_file) - else: - shutil.copy(filepath, dst_file) - - def register_training_hooks( - self, - lr_config, - optimizer_config=None, - checkpoint_config=None, - log_config=None, - momentum_config=None, - custom_hooks_config=None, - ): - """Register default hooks for iter-based training. - - Checkpoint hook, optimizer stepper hook and logger hooks will be set to - `by_epoch=False` by default. - - Default hooks include: - - +----------------------+-------------------------+ - | Hooks | Priority | - +======================+=========================+ - | LrUpdaterHook | VERY_HIGH (10) | - +----------------------+-------------------------+ - | MomentumUpdaterHook | HIGH (30) | - +----------------------+-------------------------+ - | OptimizerStepperHook | ABOVE_NORMAL (40) | - +----------------------+-------------------------+ - | CheckpointSaverHook | NORMAL (50) | - +----------------------+-------------------------+ - | IterTimerHook | LOW (70) | - +----------------------+-------------------------+ - | LoggerHook(s) | VERY_LOW (90) | - +----------------------+-------------------------+ - | CustomHook(s) | defaults to NORMAL (50) | - +----------------------+-------------------------+ - - If custom hooks have same priority with default hooks, custom hooks - will be triggered after default hooks. - """ - if checkpoint_config is not None: - checkpoint_config.setdefault('by_epoch', False) - if lr_config is not None: - lr_config.setdefault('by_epoch', False) - if log_config is not None: - for info in log_config['hooks']: - info.setdefault('by_epoch', False) - super(IterBasedRunner, self).register_training_hooks( - lr_config=lr_config, - momentum_config=momentum_config, - optimizer_config=optimizer_config, - checkpoint_config=checkpoint_config, - log_config=log_config, - timer_config=IterTimerHook(), - custom_hooks_config=custom_hooks_config, - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py deleted file mode 100644 index 5a08dfb3b937..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from collections import OrderedDict - -import numpy as np - - -class LogBuffer: - def __init__(self): - self.val_history = OrderedDict() - self.n_history = OrderedDict() - self.output = OrderedDict() - self.ready = False - - def clear(self): - self.val_history.clear() - self.n_history.clear() - self.clear_output() - - def clear_output(self): - self.output.clear() - self.ready = False - - def update(self, vars, count=1): - assert isinstance(vars, dict) - for key, var in vars.items(): - if key not in self.val_history: - self.val_history[key] = [] - self.n_history[key] = [] - self.val_history[key].append(var) - self.n_history[key].append(count) - - def average(self, n=0): - """Average latest n values or all values.""" - assert n >= 0 - for key in self.val_history: - values = np.array(self.val_history[key][-n:]) - nums = np.array(self.n_history[key][-n:]) - avg = np.sum(values * nums) / np.sum(nums) - self.output[key] = avg - self.ready = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py deleted file mode 100644 index c5a0041381c9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, build_optimizer_constructor -from .default_constructor import DefaultOptimizerConstructor - -__all__ = [ - 'OPTIMIZER_BUILDERS', - 'OPTIMIZERS', - 'DefaultOptimizerConstructor', - 'build_optimizer', - 'build_optimizer_constructor', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py deleted file mode 100644 index d305b1a6eadd..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy -import inspect - -import torch - -from ...utils import Registry, build_from_cfg - -OPTIMIZERS = Registry('optimizer') -OPTIMIZER_BUILDERS = Registry('optimizer builder') - - -def register_torch_optimizers(): - torch_optimizers = [] - for module_name in dir(torch.optim): - if module_name.startswith('__'): - continue - _optim = getattr(torch.optim, module_name) - if inspect.isclass(_optim) and issubclass(_optim, torch.optim.Optimizer): - OPTIMIZERS.register_module()(_optim) - torch_optimizers.append(module_name) - return torch_optimizers - - -TORCH_OPTIMIZERS = register_torch_optimizers() - - -def build_optimizer_constructor(cfg): - return build_from_cfg(cfg, OPTIMIZER_BUILDERS) - - -def build_optimizer(model, cfg): - optimizer_cfg = copy.deepcopy(cfg) - constructor_type = optimizer_cfg.pop('constructor', 'DefaultOptimizerConstructor') - paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) - optim_constructor = build_optimizer_constructor( - dict(type=constructor_type, optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg) - ) - optimizer = optim_constructor(model) - return optimizer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py deleted file mode 100644 index c0721ccad28f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings - -import torch -from torch.nn import GroupNorm, LayerNorm - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( - _BatchNorm, - _InstanceNorm, - build_from_cfg, - is_list_of, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.ext_loader import check_ops_exist - -from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS - - -@OPTIMIZER_BUILDERS.register_module() -class DefaultOptimizerConstructor: - """Default constructor for optimizers. - - By default each parameter share the same optimizer settings, and we - provide an argument ``paramwise_cfg`` to specify parameter-wise settings. - It is a dict and may contain the following fields: - - - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If - one of the keys in ``custom_keys`` is a substring of the name of one - parameter, then the setting of the parameter will be specified by - ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will - be ignored. It should be noted that the aforementioned ``key`` is the - longest key that is a substring of the name of the parameter. If there - are multiple matched keys with the same length, then the key with lower - alphabet order will be chosen. - ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` - and ``decay_mult``. See Example 2 below. - - ``bias_lr_mult`` (float): It will be multiplied to the learning - rate for all bias parameters (except for those in normalization - layers and offset layers of DCN). - - ``bias_decay_mult`` (float): It will be multiplied to the weight - decay for all bias parameters (except for those in - normalization layers, depthwise conv layers, offset layers of DCN). - - ``norm_decay_mult`` (float): It will be multiplied to the weight - decay for all weight and bias parameters of normalization - layers. - - ``dwconv_decay_mult`` (float): It will be multiplied to the weight - decay for all weight and bias parameters of depthwise conv - layers. - - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning - rate for parameters of offset layer in the deformable convs - of a model. - - ``bypass_duplicate`` (bool): If true, the duplicate parameters - would not be added into optimizer. Default: False. - - Note: - 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will - override the effect of ``bias_lr_mult`` in the bias of offset - layer. So be careful when using both ``bias_lr_mult`` and - ``dcn_offset_lr_mult``. If you wish to apply both of them to the - offset layer in deformable convs, set ``dcn_offset_lr_mult`` - to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. - 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will - apply it to all the DCN layers in the model. So be careful when - the model contains multiple DCN layers in places other than - backbone. - - Args: - model (:obj:`nn.Module`): The model with parameters to be optimized. - optimizer_cfg (dict): The config dict of the optimizer. - Positional fields are - - - `type`: class name of the optimizer. - - Optional fields are - - - any arguments of the corresponding optimizer type, e.g., - lr, weight_decay, momentum, etc. - paramwise_cfg (dict, optional): Parameter-wise options. - - Example 1: - >>> model = torch.nn.modules.Conv1d(1, 1, 1) - >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, - >>> weight_decay=0.0001) - >>> paramwise_cfg = dict(norm_decay_mult=0.) - >>> optim_builder = DefaultOptimizerConstructor( - >>> optimizer_cfg, paramwise_cfg) - >>> optimizer = optim_builder(model) - - Example 2: - >>> # assume model have attribute model.backbone and model.cls_head - >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) - >>> paramwise_cfg = dict(custom_keys={ - '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) - >>> optim_builder = DefaultOptimizerConstructor( - >>> optimizer_cfg, paramwise_cfg) - >>> optimizer = optim_builder(model) - >>> # Then the `lr` and `weight_decay` for model.backbone is - >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for - >>> # model.cls_head is (0.01, 0.95). - """ - - def __init__(self, optimizer_cfg, paramwise_cfg=None): - if not isinstance(optimizer_cfg, dict): - raise TypeError('optimizer_cfg should be a dict', f'but got {type(optimizer_cfg)}') - self.optimizer_cfg = optimizer_cfg - self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg - self.base_lr = optimizer_cfg.get('lr', None) - self.base_wd = optimizer_cfg.get('weight_decay', None) - self._validate_cfg() - - def _validate_cfg(self): - if not isinstance(self.paramwise_cfg, dict): - raise TypeError('paramwise_cfg should be None or a dict, ' f'but got {type(self.paramwise_cfg)}') - - if 'custom_keys' in self.paramwise_cfg: - if not isinstance(self.paramwise_cfg['custom_keys'], dict): - raise TypeError( - 'If specified, custom_keys must be a dict, ' f'but got {type(self.paramwise_cfg["custom_keys"])}' - ) - if self.base_wd is None: - for key in self.paramwise_cfg['custom_keys']: - if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: - raise ValueError('base_wd should not be None') - - # get base lr and weight decay - # weight_decay must be explicitly specified if mult is specified - if ( - 'bias_decay_mult' in self.paramwise_cfg - or 'norm_decay_mult' in self.paramwise_cfg - or 'dwconv_decay_mult' in self.paramwise_cfg - ): - if self.base_wd is None: - raise ValueError('base_wd should not be None') - - def _is_in(self, param_group, param_group_list): - assert is_list_of(param_group_list, dict) - param = set(param_group['params']) - param_set = set() - for group in param_group_list: - param_set.update(set(group['params'])) - - return not param.isdisjoint(param_set) - - def add_params(self, params, module, prefix='', is_dcn_module=None): - """Add all parameters of module to the params list. - - The parameters of the given module will be added to the list of param - groups, with specific rules defined by paramwise_cfg. - - Args: - params (list[dict]): A list of param groups, it will be modified - in place. - module (nn.Module): The module to be added. - prefix (str): The prefix of the module - is_dcn_module (int|float|None): If the current module is a - submodule of DCN, `is_dcn_module` will be passed to - control conv_offset layer's learning rate. Defaults to None. - """ - # get param-wise options - custom_keys = self.paramwise_cfg.get('custom_keys', {}) - # first sort with alphabet order and then sort with reversed len of str - sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) - - bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.0) - bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.0) - norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.0) - dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.0) - bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) - dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.0) - - # special rules for norm layers and depth-wise conv layers - is_norm = isinstance(module, (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) - is_dwconv = isinstance(module, torch.nn.Conv2d) and module.in_channels == module.groups - - for name, param in module.named_parameters(recurse=False): - param_group = {'params': [param]} - if not param.requires_grad: - params.append(param_group) - continue - if bypass_duplicate and self._is_in(param_group, params): - warnings.warn(f'{prefix} is duplicate. It is skipped since ' f'bypass_duplicate={bypass_duplicate}') - continue - # if the parameter match one of the custom keys, ignore other rules - is_custom = False - for key in sorted_keys: - if key in f'{prefix}.{name}': - is_custom = True - lr_mult = custom_keys[key].get('lr_mult', 1.0) - param_group['lr'] = self.base_lr * lr_mult - if self.base_wd is not None: - decay_mult = custom_keys[key].get('decay_mult', 1.0) - param_group['weight_decay'] = self.base_wd * decay_mult - break - - if not is_custom: - # bias_lr_mult affects all bias parameters - # except for norm.bias dcn.conv_offset.bias - if name == 'bias' and not (is_norm or is_dcn_module): - param_group['lr'] = self.base_lr * bias_lr_mult - - if prefix.find('conv_offset') != -1 and is_dcn_module and isinstance(module, torch.nn.Conv2d): - # deal with both dcn_offset's bias & weight - param_group['lr'] = self.base_lr * dcn_offset_lr_mult - - # apply weight decay policies - if self.base_wd is not None: - # norm decay - if is_norm: - param_group['weight_decay'] = self.base_wd * norm_decay_mult - # depth-wise conv - elif is_dwconv: - param_group['weight_decay'] = self.base_wd * dwconv_decay_mult - # bias lr and decay - elif name == 'bias' and not is_dcn_module: - # TODO: current bias_decay_mult will have affect on DCN - param_group['weight_decay'] = self.base_wd * bias_decay_mult - params.append(param_group) - - if check_ops_exist(): - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( - DeformConv2d, - ModulatedDeformConv2d, - ) - - is_dcn_module = isinstance(module, (DeformConv2d, ModulatedDeformConv2d)) - else: - is_dcn_module = False - for child_name, child_mod in module.named_children(): - child_prefix = f'{prefix}.{child_name}' if prefix else child_name - self.add_params(params, child_mod, prefix=child_prefix, is_dcn_module=is_dcn_module) - - def __call__(self, model): - if hasattr(model, 'module'): - model = model.module - - optimizer_cfg = self.optimizer_cfg.copy() - # if no paramwise option is specified, just use the global setting - if not self.paramwise_cfg: - optimizer_cfg['params'] = model.parameters() - return build_from_cfg(optimizer_cfg, OPTIMIZERS) - - # set param-wise lr and weight decay recursively - params = [] - self.add_params(params, model) - optimizer_cfg['params'] = params - - return build_from_cfg(optimizer_cfg, OPTIMIZERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py deleted file mode 100644 index 64cc4e3a05f8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from enum import Enum - - -class Priority(Enum): - """Hook priority levels. - - +--------------+------------+ - | Level | Value | - +==============+============+ - | HIGHEST | 0 | - +--------------+------------+ - | VERY_HIGH | 10 | - +--------------+------------+ - | HIGH | 30 | - +--------------+------------+ - | ABOVE_NORMAL | 40 | - +--------------+------------+ - | NORMAL | 50 | - +--------------+------------+ - | BELOW_NORMAL | 60 | - +--------------+------------+ - | LOW | 70 | - +--------------+------------+ - | VERY_LOW | 90 | - +--------------+------------+ - | LOWEST | 100 | - +--------------+------------+ - """ - - HIGHEST = 0 - VERY_HIGH = 10 - HIGH = 30 - ABOVE_NORMAL = 40 - NORMAL = 50 - BELOW_NORMAL = 60 - LOW = 70 - VERY_LOW = 90 - LOWEST = 100 - - -def get_priority(priority): - """Get priority value. - - Args: - priority (int or str or :obj:`Priority`): Priority. - - Returns: - int: The priority value. - """ - if isinstance(priority, int): - if priority < 0 or priority > 100: - raise ValueError('priority must be between 0 and 100') - return priority - elif isinstance(priority, Priority): - return priority.value - elif isinstance(priority, str): - return Priority[priority.upper()].value - else: - raise TypeError('priority must be an integer or Priority enum value') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py deleted file mode 100644 index 4ac2ec3e88ff..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import random -import sys -import time -import warnings -from getpass import getuser -from socket import gethostname - -import numpy as np -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -def get_host_info(): - """Get hostname and username. - - Return empty string if exception raised, e.g. ``getpass.getuser()`` will - lead to error in docker container - """ - host = '' - try: - host = f'{getuser()}@{gethostname()}' - except Exception as e: - warnings.warn(f'Host or user not found: {str(e)}') - finally: - return host - - -def get_time_str(): - return time.strftime('%Y%m%d_%H%M%S', time.localtime()) - - -def obj_from_dict(info, parent=None, default_args=None): - """Initialize an object from dict. - - The dict must contain the key "type", which indicates the object type, it - can be either a string or type, such as "list" or ``list``. Remaining - fields are treated as the arguments for constructing the object. - - Args: - info (dict): Object types and arguments. - parent (:class:`module`): Module which may containing expected object - classes. - default_args (dict, optional): Default arguments for initializing the - object. - - Returns: - any type: Object built from the dict. - """ - assert isinstance(info, dict) and 'type' in info - assert isinstance(default_args, dict) or default_args is None - args = info.copy() - obj_type = args.pop('type') - if mmcv.is_str(obj_type): - if parent is not None: - obj_type = getattr(parent, obj_type) - else: - obj_type = sys.modules[obj_type] - elif not isinstance(obj_type, type): - raise TypeError('type must be a str or valid type, but ' f'got {type(obj_type)}') - if default_args is not None: - for name, value in default_args.items(): - args.setdefault(name, value) - return obj_type(**args) - - -def set_random_seed(seed, deterministic=False, use_rank_shift=False): - """Set random seed. - - Args: - seed (int): Seed to be used. - deterministic (bool): Whether to set the deterministic option for - CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` - to True and `torch.backends.cudnn.benchmark` to False. - Default: False. - rank_shift (bool): Whether to add rank number to the random seed to - have different random seed in different threads. Default: False. - """ - if use_rank_shift: - rank, _ = mmcv.runner.get_dist_info() - seed += rank - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - os.environ['PYTHONHASHSEED'] = str(seed) - if deterministic: - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py deleted file mode 100644 index f0eb61d01a3d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py +++ /dev/null @@ -1,185 +0,0 @@ -# flake8: noqa -# Copyright (c) OpenMMLab. All rights reserved. -from .config import Config, ConfigDict, DictAction -from .misc import ( - check_prerequisites, - concat_list, - deprecated_api_warning, - has_method, - import_modules_from_strings, - is_list_of, - is_method_overridden, - is_seq_of, - is_str, - is_tuple_of, - iter_cast, - list_cast, - requires_executable, - requires_package, - slice_list, - to_1tuple, - to_2tuple, - to_3tuple, - to_4tuple, - to_ntuple, - tuple_cast, -) -from .path import check_file_exist, fopen, is_filepath, mkdir_or_exist, scandir, symlink -from .progressbar import ProgressBar, track_iter_progress, track_parallel_progress, track_progress -from .testing import ( - assert_attrs_equal, - assert_dict_contains_subset, - assert_dict_has_keys, - assert_is_norm_layer, - assert_keys_equal, - assert_params_all_zeros, - check_python_script, -) -from .timer import Timer, TimerError, check_time -from .version_utils import digit_version, get_git_hash - -try: - import torch -except ImportError: - __all__ = [ - 'Config', - 'ConfigDict', - 'DictAction', - 'is_str', - 'iter_cast', - 'list_cast', - 'tuple_cast', - 'is_seq_of', - 'is_list_of', - 'is_tuple_of', - 'slice_list', - 'concat_list', - 'check_prerequisites', - 'requires_package', - 'requires_executable', - 'is_filepath', - 'fopen', - 'check_file_exist', - 'mkdir_or_exist', - 'symlink', - 'scandir', - 'ProgressBar', - 'track_progress', - 'track_iter_progress', - 'track_parallel_progress', - 'Timer', - 'TimerError', - 'check_time', - 'deprecated_api_warning', - 'digit_version', - 'get_git_hash', - 'import_modules_from_strings', - 'assert_dict_contains_subset', - 'assert_attrs_equal', - 'assert_dict_has_keys', - 'assert_keys_equal', - 'check_python_script', - 'to_1tuple', - 'to_2tuple', - 'to_3tuple', - 'to_4tuple', - 'to_ntuple', - 'is_method_overridden', - 'has_method', - ] -else: - from .env import collect_env - from .logging import get_logger, print_log - from .parrots_jit import jit, skip_no_elena - from .parrots_wrapper import ( - TORCH_VERSION, - BuildExtension, - CppExtension, - CUDAExtension, - DataLoader, - PoolDataLoader, - SyncBatchNorm, - _AdaptiveAvgPoolNd, - _AdaptiveMaxPoolNd, - _AvgPoolNd, - _BatchNorm, - _ConvNd, - _ConvTransposeMixin, - _get_cuda_home, - _InstanceNorm, - _MaxPoolNd, - get_build_config, - is_rocm_pytorch, - ) - from .registry import Registry, build_from_cfg - from .trace import is_jit_tracing - - __all__ = [ - 'Config', - 'ConfigDict', - 'DictAction', - 'collect_env', - 'get_logger', - 'print_log', - 'is_str', - 'iter_cast', - 'list_cast', - 'tuple_cast', - 'is_seq_of', - 'is_list_of', - 'is_tuple_of', - 'slice_list', - 'concat_list', - 'check_prerequisites', - 'requires_package', - 'requires_executable', - 'is_filepath', - 'fopen', - 'check_file_exist', - 'mkdir_or_exist', - 'symlink', - 'scandir', - 'ProgressBar', - 'track_progress', - 'track_iter_progress', - 'track_parallel_progress', - 'Registry', - 'build_from_cfg', - 'Timer', - 'TimerError', - 'check_time', - 'SyncBatchNorm', - '_AdaptiveAvgPoolNd', - '_AdaptiveMaxPoolNd', - '_AvgPoolNd', - '_BatchNorm', - '_ConvNd', - '_ConvTransposeMixin', - '_InstanceNorm', - '_MaxPoolNd', - 'get_build_config', - 'BuildExtension', - 'CppExtension', - 'CUDAExtension', - 'DataLoader', - 'PoolDataLoader', - 'TORCH_VERSION', - 'deprecated_api_warning', - 'digit_version', - 'get_git_hash', - 'import_modules_from_strings', - 'jit', - 'skip_no_elena', - 'assert_dict_contains_subset', - 'assert_attrs_equal', - 'assert_dict_has_keys', - 'assert_keys_equal', - 'assert_is_norm_layer', - 'assert_params_all_zeros', - 'check_python_script', - 'is_method_overridden', - 'is_jit_tracing', - 'is_rocm_pytorch', - '_get_cuda_home', - 'has_method', - ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py deleted file mode 100644 index 2d8eb6858bc4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py +++ /dev/null @@ -1,658 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import ast -import copy -import os -import os.path as osp -import platform -import shutil -import sys -import tempfile -import uuid -import warnings -from argparse import Action, ArgumentParser -from collections import abc -from importlib import import_module - -from addict import Dict -from yapf.yapflib.yapf_api import FormatCode - -from .misc import import_modules_from_strings -from .path import check_file_exist - -if platform.system() == 'Windows': - import regex as re -else: - import re - -BASE_KEY = '_base_' -DELETE_KEY = '_delete_' -DEPRECATION_KEY = '_deprecation_' -RESERVED_KEYS = ['filename', 'text', 'pretty_text'] - - -class ConfigDict(Dict): - def __missing__(self, name): - raise KeyError(name) - - def __getattr__(self, name): - try: - value = super(ConfigDict, self).__getattr__(name) - except KeyError: - ex = AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{name}'") - except Exception as e: - ex = e - else: - return value - raise ex - - -def add_args(parser, cfg, prefix=''): - for k, v in cfg.items(): - if isinstance(v, str): - parser.add_argument('--' + prefix + k) - elif isinstance(v, int): - parser.add_argument('--' + prefix + k, type=int) - elif isinstance(v, float): - parser.add_argument('--' + prefix + k, type=float) - elif isinstance(v, bool): - parser.add_argument('--' + prefix + k, action='store_true') - elif isinstance(v, dict): - add_args(parser, v, prefix + k + '.') - elif isinstance(v, abc.Iterable): - parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') - else: - print(f'cannot parse key {prefix + k} of type {type(v)}') - return parser - - -class Config: - """A facility for config and config files. - - It supports common file formats as configs: python/json/yaml. The interface - is the same as a dict object and also allows access config values as - attributes. - - Example: - >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) - >>> cfg.a - 1 - >>> cfg.b - {'b1': [0, 1]} - >>> cfg.b.b1 - [0, 1] - >>> cfg = Config.fromfile('tests/data/config/a.py') - >>> cfg.filename - "/home/kchen/projects/mmcv/tests/data/config/a.py" - >>> cfg.item4 - 'test' - >>> cfg - "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " - "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" - """ - - @staticmethod - def _validate_py_syntax(filename): - with open(filename, 'r', encoding='utf-8') as f: - # Setting encoding explicitly to resolve coding issue on windows - content = f.read() - try: - ast.parse(content) - except SyntaxError as e: - raise SyntaxError('There are syntax errors in config ' f'file {filename}: {e}') - - @staticmethod - def _substitute_predefined_vars(filename, temp_config_name): - file_dirname = osp.dirname(filename) - file_basename = osp.basename(filename) - file_basename_no_extension = osp.splitext(file_basename)[0] - file_extname = osp.splitext(filename)[1] - support_templates = dict( - fileDirname=file_dirname, - fileBasename=file_basename, - fileBasenameNoExtension=file_basename_no_extension, - fileExtname=file_extname, - ) - with open(filename, 'r', encoding='utf-8') as f: - # Setting encoding explicitly to resolve coding issue on windows - config_file = f.read() - for key, value in support_templates.items(): - regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' - value = value.replace('\\', '/') - config_file = re.sub(regexp, value, config_file) - with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: - tmp_config_file.write(config_file) - - @staticmethod - def _pre_substitute_base_vars(filename, temp_config_name): - """Substitute base variable placehoders to string, so that parsing - would work.""" - with open(filename, 'r', encoding='utf-8') as f: - # Setting encoding explicitly to resolve coding issue on windows - config_file = f.read() - base_var_dict = {} - regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}' - base_vars = set(re.findall(regexp, config_file)) - for base_var in base_vars: - randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' - base_var_dict[randstr] = base_var - regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' - config_file = re.sub(regexp, f'"{randstr}"', config_file) - with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: - tmp_config_file.write(config_file) - return base_var_dict - - @staticmethod - def _substitute_base_vars(cfg, base_var_dict, base_cfg): - """Substitute variable strings to their actual values.""" - cfg = copy.deepcopy(cfg) - - if isinstance(cfg, dict): - for k, v in cfg.items(): - if isinstance(v, str) and v in base_var_dict: - new_v = base_cfg - for new_k in base_var_dict[v].split('.'): - new_v = new_v[new_k] - cfg[k] = new_v - elif isinstance(v, (list, tuple, dict)): - cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) - elif isinstance(cfg, tuple): - cfg = tuple(Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg) - elif isinstance(cfg, list): - cfg = [Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg] - elif isinstance(cfg, str) and cfg in base_var_dict: - new_v = base_cfg - for new_k in base_var_dict[cfg].split('.'): - new_v = new_v[new_k] - cfg = new_v - - return cfg - - @staticmethod - def _file2dict(filename, use_predefined_variables=True): - filename = osp.abspath(osp.expanduser(filename)) - check_file_exist(filename) - fileExtname = osp.splitext(filename)[1] - if fileExtname not in ['.py', '.json', '.yaml', '.yml']: - raise IOError('Only py/yml/yaml/json type are supported now!') - - with tempfile.TemporaryDirectory() as temp_config_dir: - temp_config_file = tempfile.NamedTemporaryFile(dir=temp_config_dir, suffix=fileExtname) - if platform.system() == 'Windows': - temp_config_file.close() - temp_config_name = osp.basename(temp_config_file.name) - # Substitute predefined variables - if use_predefined_variables: - Config._substitute_predefined_vars(filename, temp_config_file.name) - else: - shutil.copyfile(filename, temp_config_file.name) - # Substitute base variables from placeholders to strings - base_var_dict = Config._pre_substitute_base_vars(temp_config_file.name, temp_config_file.name) - - if filename.endswith('.py'): - temp_module_name = osp.splitext(temp_config_name)[0] - sys.path.insert(0, temp_config_dir) - Config._validate_py_syntax(filename) - mod = import_module(temp_module_name) - sys.path.pop(0) - cfg_dict = {name: value for name, value in mod.__dict__.items() if not name.startswith('__')} - # delete imported module - del sys.modules[temp_module_name] - elif filename.endswith(('.yml', '.yaml', '.json')): - import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - cfg_dict = mmcv.load(temp_config_file.name) - # close temp file - temp_config_file.close() - - # check deprecation information - if DEPRECATION_KEY in cfg_dict: - deprecation_info = cfg_dict.pop(DEPRECATION_KEY) - warning_msg = f'The config file {filename} will be deprecated ' 'in the future.' - if 'expected' in deprecation_info: - warning_msg += f' Please use {deprecation_info["expected"]} ' 'instead.' - if 'reference' in deprecation_info: - warning_msg += ' More information can be found at ' f'{deprecation_info["reference"]}' - warnings.warn(warning_msg) - - cfg_text = filename + '\n' - with open(filename, 'r', encoding='utf-8') as f: - # Setting encoding explicitly to resolve coding issue on windows - cfg_text += f.read() - - if BASE_KEY in cfg_dict: - cfg_dir = osp.dirname(filename) - base_filename = cfg_dict.pop(BASE_KEY) - base_filename = base_filename if isinstance(base_filename, list) else [base_filename] - - cfg_dict_list = list() - cfg_text_list = list() - for f in base_filename: - _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) - cfg_dict_list.append(_cfg_dict) - cfg_text_list.append(_cfg_text) - - base_cfg_dict = dict() - for c in cfg_dict_list: - duplicate_keys = base_cfg_dict.keys() & c.keys() - if len(duplicate_keys) > 0: - raise KeyError('Duplicate key is not allowed among bases. ' f'Duplicate keys: {duplicate_keys}') - base_cfg_dict.update(c) - - # Substitute base variables from strings to their actual values - cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, base_cfg_dict) - - base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) - cfg_dict = base_cfg_dict - - # merge cfg_text - cfg_text_list.append(cfg_text) - cfg_text = '\n'.join(cfg_text_list) - - return cfg_dict, cfg_text - - @staticmethod - def _merge_a_into_b(a, b, allow_list_keys=False): - """merge dict ``a`` into dict ``b`` (non-inplace). - - Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid - in-place modifications. - - Args: - a (dict): The source dict to be merged into ``b``. - b (dict): The origin dict to be fetch keys from ``a``. - allow_list_keys (bool): If True, int string keys (e.g. '0', '1') - are allowed in source ``a`` and will replace the element of the - corresponding index in b if b is a list. Default: False. - - Returns: - dict: The modified dict of ``b`` using ``a``. - - Examples: - # Normally merge a into b. - >>> Config._merge_a_into_b( - ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) - {'obj': {'a': 2}} - - # Delete b first and merge a into b. - >>> Config._merge_a_into_b( - ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) - {'obj': {'a': 2}} - - # b is a list - >>> Config._merge_a_into_b( - ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) - [{'a': 2}, {'b': 2}] - """ - b = b.copy() - for k, v in a.items(): - if allow_list_keys and k.isdigit() and isinstance(b, list): - k = int(k) - if len(b) <= k: - raise KeyError(f'Index {k} exceeds the length of list {b}') - b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) - elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): - allowed_types = (dict, list) if allow_list_keys else dict - if not isinstance(b[k], allowed_types): - raise TypeError( - f'{k}={v} in child config cannot inherit from base ' - f'because {k} is a dict in the child config but is of ' - f'type {type(b[k])} in base config. You may set ' - f'`{DELETE_KEY}=True` to ignore the base config' - ) - b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) - else: - b[k] = v - return b - - @staticmethod - def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): - cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) - if import_custom_modules and cfg_dict.get('custom_imports', None): - import_modules_from_strings(**cfg_dict['custom_imports']) - return Config(cfg_dict, cfg_text=cfg_text, filename=filename) - - @staticmethod - def fromstring(cfg_str, file_format): - """Generate config from config str. - - Args: - cfg_str (str): Config str. - file_format (str): Config file format corresponding to the - config str. Only py/yml/yaml/json type are supported now! - - Returns: - obj:`Config`: Config obj. - """ - if file_format not in ['.py', '.json', '.yaml', '.yml']: - raise IOError('Only py/yml/yaml/json type are supported now!') - if file_format != '.py' and 'dict(' in cfg_str: - # check if users specify a wrong suffix for python - warnings.warn('Please check "file_format", the file format may be .py') - with tempfile.NamedTemporaryFile('w', encoding='utf-8', suffix=file_format, delete=False) as temp_file: - temp_file.write(cfg_str) - # on windows, previous implementation cause error - # see PR 1077 for details - cfg = Config.fromfile(temp_file.name) - os.remove(temp_file.name) - return cfg - - @staticmethod - def auto_argparser(description=None): - """Generate argparser from config file automatically (experimental)""" - partial_parser = ArgumentParser(description=description) - partial_parser.add_argument('config', help='config file path') - cfg_file = partial_parser.parse_known_args()[0].config - cfg = Config.fromfile(cfg_file) - parser = ArgumentParser(description=description) - parser.add_argument('config', help='config file path') - add_args(parser, cfg) - return parser, cfg - - def __init__(self, cfg_dict=None, cfg_text=None, filename=None): - if cfg_dict is None: - cfg_dict = dict() - elif not isinstance(cfg_dict, dict): - raise TypeError('cfg_dict must be a dict, but ' f'got {type(cfg_dict)}') - for key in cfg_dict: - if key in RESERVED_KEYS: - raise KeyError(f'{key} is reserved for config file') - - super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) - super(Config, self).__setattr__('_filename', filename) - if cfg_text: - text = cfg_text - elif filename: - with open(filename, 'r') as f: - text = f.read() - else: - text = '' - super(Config, self).__setattr__('_text', text) - - @property - def filename(self): - return self._filename - - @property - def text(self): - return self._text - - @property - def pretty_text(self): - - indent = 4 - - def _indent(s_, num_spaces): - s = s_.split('\n') - if len(s) == 1: - return s_ - first = s.pop(0) - s = [(num_spaces * ' ') + line for line in s] - s = '\n'.join(s) - s = first + '\n' + s - return s - - def _format_basic_types(k, v, use_mapping=False): - if isinstance(v, str): - v_str = f"'{v}'" - else: - v_str = str(v) - - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f'{k_str}: {v_str}' - else: - attr_str = f'{str(k)}={v_str}' - attr_str = _indent(attr_str, indent) - - return attr_str - - def _format_list(k, v, use_mapping=False): - # check if all items in the list are dict - if all(isinstance(_, dict) for _ in v): - v_str = '[\n' - v_str += '\n'.join(f'dict({_indent(_format_dict(v_), indent)}),' for v_ in v).rstrip(',') - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f'{k_str}: {v_str}' - else: - attr_str = f'{str(k)}={v_str}' - attr_str = _indent(attr_str, indent) + ']' - else: - attr_str = _format_basic_types(k, v, use_mapping) - return attr_str - - def _contain_invalid_identifier(dict_str): - contain_invalid_identifier = False - for key_name in dict_str: - contain_invalid_identifier |= not str(key_name).isidentifier() - return contain_invalid_identifier - - def _format_dict(input_dict, outest_level=False): - r = '' - s = [] - - use_mapping = _contain_invalid_identifier(input_dict) - if use_mapping: - r += '{' - for idx, (k, v) in enumerate(input_dict.items()): - is_last = idx >= len(input_dict) - 1 - end = '' if outest_level or is_last else ',' - if isinstance(v, dict): - v_str = '\n' + _format_dict(v) - if use_mapping: - k_str = f"'{k}'" if isinstance(k, str) else str(k) - attr_str = f'{k_str}: dict({v_str}' - else: - attr_str = f'{str(k)}=dict({v_str}' - attr_str = _indent(attr_str, indent) + ')' + end - elif isinstance(v, list): - attr_str = _format_list(k, v, use_mapping) + end - else: - attr_str = _format_basic_types(k, v, use_mapping) + end - - s.append(attr_str) - r += '\n'.join(s) - if use_mapping: - r += '}' - return r - - cfg_dict = self._cfg_dict.to_dict() - text = _format_dict(cfg_dict, outest_level=True) - # copied from setup.cfg - yapf_style = dict( - based_on_style='pep8', - blank_line_before_nested_class_or_def=True, - split_before_expression_after_opening_paren=True, - ) - text, _ = FormatCode(text, style_config=yapf_style, verify=True) - - return text - - def __repr__(self): - return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' - - def __len__(self): - return len(self._cfg_dict) - - def __getattr__(self, name): - return getattr(self._cfg_dict, name) - - def __getitem__(self, name): - return self._cfg_dict.__getitem__(name) - - def __setattr__(self, name, value): - if isinstance(value, dict): - value = ConfigDict(value) - self._cfg_dict.__setattr__(name, value) - - def __setitem__(self, name, value): - if isinstance(value, dict): - value = ConfigDict(value) - self._cfg_dict.__setitem__(name, value) - - def __iter__(self): - return iter(self._cfg_dict) - - def __getstate__(self): - return (self._cfg_dict, self._filename, self._text) - - def __setstate__(self, state): - _cfg_dict, _filename, _text = state - super(Config, self).__setattr__('_cfg_dict', _cfg_dict) - super(Config, self).__setattr__('_filename', _filename) - super(Config, self).__setattr__('_text', _text) - - def dump(self, file=None): - cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() - if self.filename.endswith('.py'): - if file is None: - return self.pretty_text - else: - with open(file, 'w', encoding='utf-8') as f: - f.write(self.pretty_text) - else: - import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - if file is None: - file_format = self.filename.split('.')[-1] - return mmcv.dump(cfg_dict, file_format=file_format) - else: - mmcv.dump(cfg_dict, file) - - def merge_from_dict(self, options, allow_list_keys=True): - """Merge list into cfg_dict. - - Merge the dict parsed by MultipleKVAction into this cfg. - - Examples: - >>> options = {'model.backbone.depth': 50, - ... 'model.backbone.with_cp':True} - >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) - >>> cfg.merge_from_dict(options) - >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') - >>> assert cfg_dict == dict( - ... model=dict(backbone=dict(depth=50, with_cp=True))) - - # Merge list element - >>> cfg = Config(dict(pipeline=[ - ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) - >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) - >>> cfg.merge_from_dict(options, allow_list_keys=True) - >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') - >>> assert cfg_dict == dict(pipeline=[ - ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) - - Args: - options (dict): dict of configs to merge from. - allow_list_keys (bool): If True, int string keys (e.g. '0', '1') - are allowed in ``options`` and will replace the element of the - corresponding index in the config if the config is a list. - Default: True. - """ - option_cfg_dict = {} - for full_key, v in options.items(): - d = option_cfg_dict - key_list = full_key.split('.') - for subkey in key_list[:-1]: - d.setdefault(subkey, ConfigDict()) - d = d[subkey] - subkey = key_list[-1] - d[subkey] = v - - cfg_dict = super(Config, self).__getattribute__('_cfg_dict') - super(Config, self).__setattr__( - '_cfg_dict', Config._merge_a_into_b(option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys) - ) - - -class DictAction(Action): - """ - argparse action to split an argument into KEY=VALUE form - on the first = and append to a dictionary. List options can - be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit - brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build - list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' - """ - - @staticmethod - def _parse_int_float_bool(val): - try: - return int(val) - except ValueError: - pass - try: - return float(val) - except ValueError: - pass - if val.lower() in ['true', 'false']: - return True if val.lower() == 'true' else False - return val - - @staticmethod - def _parse_iterable(val): - """Parse iterable values in the string. - - All elements inside '()' or '[]' are treated as iterable values. - - Args: - val (str): Value string. - - Returns: - list | tuple: The expanded list or tuple from the string. - - Examples: - >>> DictAction._parse_iterable('1,2,3') - [1, 2, 3] - >>> DictAction._parse_iterable('[a, b, c]') - ['a', 'b', 'c'] - >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') - [(1, 2, 3), ['a', 'b'], 'c'] - """ - - def find_next_comma(string): - """Find the position of next comma in the string. - - If no ',' is found in the string, return the string length. All - chars inside '()' and '[]' are treated as one element and thus ',' - inside these brackets are ignored. - """ - assert (string.count('(') == string.count(')')) and ( - string.count('[') == string.count(']') - ), f'Imbalanced brackets exist in {string}' - end = len(string) - for idx, char in enumerate(string): - pre = string[:idx] - # The string before this ',' is balanced - if (char == ',') and (pre.count('(') == pre.count(')')) and (pre.count('[') == pre.count(']')): - end = idx - break - return end - - # Strip ' and " characters and replace whitespace. - val = val.strip('\'\"').replace(' ', '') - is_tuple = False - if val.startswith('(') and val.endswith(')'): - is_tuple = True - val = val[1:-1] - elif val.startswith('[') and val.endswith(']'): - val = val[1:-1] - elif ',' not in val: - # val is a single value - return DictAction._parse_int_float_bool(val) - - values = [] - while len(val) > 0: - comma_idx = find_next_comma(val) - element = DictAction._parse_iterable(val[:comma_idx]) - values.append(element) - val = val[comma_idx + 1 :] - if is_tuple: - values = tuple(values) - return values - - def __call__(self, parser, namespace, values, option_string=None): - options = {} - for kv in values: - key, val = kv.split('=', maxsplit=1) - options[key] = self._parse_iterable(val) - setattr(namespace, self.dest, options) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py deleted file mode 100644 index 484c17be1767..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -"""This file holding some environment constant for sharing by other files.""" - -import os.path as osp -import subprocess -import sys -from collections import defaultdict - -import cv2 -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -from .parrots_wrapper import get_build_config - - -def collect_env(): - """Collect the information of the running environments. - - Returns: - dict: The environment information. The following fields are contained. - - - sys.platform: The variable of ``sys.platform``. - - Python: Python version. - - CUDA available: Bool, indicating if CUDA is available. - - GPU devices: Device type of each GPU. - - CUDA_HOME (optional): The env var ``CUDA_HOME``. - - NVCC (optional): NVCC version. - - GCC: GCC version, "n/a" if GCC is not installed. - - PyTorch: PyTorch version. - - PyTorch compiling details: The output of \ - ``torch.__config__.show()``. - - TorchVision (optional): TorchVision version. - - OpenCV: OpenCV version. - - MMCV: MMCV version. - - MMCV Compiler: The GCC version for compiling MMCV ops. - - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. - """ - env_info = {} - env_info['sys.platform'] = sys.platform - env_info['Python'] = sys.version.replace('\n', '') - - cuda_available = torch.cuda.is_available() - env_info['CUDA available'] = cuda_available - - if cuda_available: - devices = defaultdict(list) - for k in range(torch.cuda.device_count()): - devices[torch.cuda.get_device_name(k)].append(str(k)) - for name, device_ids in devices.items(): - env_info['GPU ' + ','.join(device_ids)] = name - - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _get_cuda_home - - CUDA_HOME = _get_cuda_home() - env_info['CUDA_HOME'] = CUDA_HOME - - if CUDA_HOME is not None and osp.isdir(CUDA_HOME): - try: - nvcc = osp.join(CUDA_HOME, 'bin/nvcc') - nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True) - nvcc = nvcc.decode('utf-8').strip() - except subprocess.SubprocessError: - nvcc = 'Not Available' - env_info['NVCC'] = nvcc - - try: - gcc = subprocess.check_output('gcc --version | head -n1', shell=True) - gcc = gcc.decode('utf-8').strip() - env_info['GCC'] = gcc - except subprocess.CalledProcessError: # gcc is unavailable - env_info['GCC'] = 'n/a' - - env_info['PyTorch'] = torch.__version__ - env_info['PyTorch compiling details'] = get_build_config() - - try: - import torchvision - - env_info['TorchVision'] = torchvision.__version__ - except ModuleNotFoundError: - pass - - env_info['OpenCV'] = cv2.__version__ - - env_info['MMCV'] = mmcv.__version__ - - try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( - get_compiler_version, - get_compiling_cuda_version, - ) - except ModuleNotFoundError: - env_info['MMCV Compiler'] = 'n/a' - env_info['MMCV CUDA Compiler'] = 'n/a' - else: - env_info['MMCV Compiler'] = get_compiler_version() - env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() - - return env_info diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py deleted file mode 100644 index 6e2217c7e99d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import importlib -import os -import pkgutil -import warnings -from collections import namedtuple - -import torch - -if torch.__version__ != 'parrots': - - def load_ext(name, funcs): - ext = importlib.import_module('mmcv.' + name) - for fun in funcs: - assert hasattr(ext, fun), f'{fun} miss in module {name}' - return ext - - -else: - from parrots import extension - from parrots.base import ParrotsException - - has_return_value_ops = [ - 'nms', - 'softnms', - 'nms_match', - 'nms_rotated', - 'top_pool_forward', - 'top_pool_backward', - 'bottom_pool_forward', - 'bottom_pool_backward', - 'left_pool_forward', - 'left_pool_backward', - 'right_pool_forward', - 'right_pool_backward', - 'fused_bias_leakyrelu', - 'upfirdn2d', - 'ms_deform_attn_forward', - 'pixel_group', - 'contour_expand', - ] - - def get_fake_func(name, e): - def fake_func(*args, **kwargs): - warnings.warn(f'{name} is not supported in parrots now') - raise e - - return fake_func - - def load_ext(name, funcs): - ExtModule = namedtuple('ExtModule', funcs) - ext_list = [] - lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - for fun in funcs: - try: - ext_fun = extension.load(fun, name, lib_dir=lib_root) - except ParrotsException as e: - if 'No element registered' not in e.message: - warnings.warn(e.message) - ext_fun = get_fake_func(fun, e) - ext_list.append(ext_fun) - else: - if fun in has_return_value_ops: - ext_list.append(ext_fun.op) - else: - ext_list.append(ext_fun.op_) - return ExtModule(*ext_list) - - -def check_ops_exist(): - ext_loader = pkgutil.find_loader('mmcv._ext') - return ext_loader is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py deleted file mode 100644 index 403a1ad7aa77..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging - -import torch.distributed as dist - -logger_initialized = {} - - -def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): - """Initialize and get a logger by name. - - If the logger has not been initialized, this method will initialize the - logger by adding one or two handlers, otherwise the initialized logger will - be directly returned. During initialization, a StreamHandler will always be - added. If `log_file` is specified and the process rank is 0, a FileHandler - will also be added. - - Args: - name (str): Logger name. - log_file (str | None): The log filename. If specified, a FileHandler - will be added to the logger. - log_level (int): The logger level. Note that only the process of - rank 0 is affected, and other processes will set the level to - "Error" thus be silent most of the time. - file_mode (str): The file mode used in opening log file. - Defaults to 'w'. - - Returns: - logging.Logger: The expected logger. - """ - logger = logging.getLogger(name) - if name in logger_initialized: - return logger - # handle hierarchical names - # e.g., logger "a" is initialized, then logger "a.b" will skip the - # initialization since it is a child of "a". - for logger_name in logger_initialized: - if name.startswith(logger_name): - return logger - - # handle duplicate logs to the console - # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) - # to the root logger. As logger.propagate is True by default, this root - # level handler causes logging messages from rank>0 processes to - # unexpectedly show up on the console, creating much unwanted clutter. - # To fix this issue, we set the root logger's StreamHandler, if any, to log - # at the ERROR level. - for handler in logger.root.handlers: - if type(handler) is logging.StreamHandler: - handler.setLevel(logging.ERROR) - - stream_handler = logging.StreamHandler() - handlers = [stream_handler] - - if dist.is_available() and dist.is_initialized(): - rank = dist.get_rank() - else: - rank = 0 - - # only rank 0 will add a FileHandler - if rank == 0 and log_file is not None: - # Here, the default behaviour of the official logger is 'a'. Thus, we - # provide an interface to change the file mode to the default - # behaviour. - file_handler = logging.FileHandler(log_file, file_mode) - handlers.append(file_handler) - - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - for handler in handlers: - handler.setFormatter(formatter) - handler.setLevel(log_level) - logger.addHandler(handler) - - if rank == 0: - logger.setLevel(log_level) - else: - logger.setLevel(logging.ERROR) - - logger_initialized[name] = True - - return logger - - -def print_log(msg, logger=None, level=logging.INFO): - """Print a log message. - - Args: - msg (str): The message to be logged. - logger (logging.Logger | str | None): The logger to be used. - Some special loggers are: - - "silent": no message will be printed. - - other str: the logger obtained with `get_root_logger(logger)`. - - None: The `print()` method will be used to print log messages. - level (int): Logging level. Only available when `logger` is a Logger - object or "root". - """ - if logger is None: - print(msg) - elif isinstance(logger, logging.Logger): - logger.log(level, msg) - elif logger == 'silent': - pass - elif isinstance(logger, str): - _logger = get_logger(logger) - _logger.log(level, msg) - else: - raise TypeError( - 'logger should be either a logging.Logger object, str, ' f'"silent" or None, but got {type(logger)}' - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py deleted file mode 100644 index 01204666f985..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py +++ /dev/null @@ -1,371 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import collections.abc -import functools -import itertools -import subprocess -import warnings -from collections import abc -from importlib import import_module -from inspect import getfullargspec -from itertools import repeat - - -# From PyTorch internals -def _ntuple(n): - def parse(x): - if isinstance(x, collections.abc.Iterable): - return x - return tuple(repeat(x, n)) - - return parse - - -to_1tuple = _ntuple(1) -to_2tuple = _ntuple(2) -to_3tuple = _ntuple(3) -to_4tuple = _ntuple(4) -to_ntuple = _ntuple - - -def is_str(x): - """Whether the input is an string instance. - - Note: This method is deprecated since python 2 is no longer supported. - """ - return isinstance(x, str) - - -def import_modules_from_strings(imports, allow_failed_imports=False): - """Import modules from the given list of strings. - - Args: - imports (list | str | None): The given module names to be imported. - allow_failed_imports (bool): If True, the failed imports will return - None. Otherwise, an ImportError is raise. Default: False. - - Returns: - list[module] | module | None: The imported modules. - - Examples: - >>> osp, sys = import_modules_from_strings( - ... ['os.path', 'sys']) - >>> import os.path as osp_ - >>> import sys as sys_ - >>> assert osp == osp_ - >>> assert sys == sys_ - """ - if not imports: - return - single_import = False - if isinstance(imports, str): - single_import = True - imports = [imports] - if not isinstance(imports, list): - raise TypeError(f'custom_imports must be a list but got type {type(imports)}') - imported = [] - for imp in imports: - if not isinstance(imp, str): - raise TypeError(f'{imp} is of type {type(imp)} and cannot be imported.') - try: - imported_tmp = import_module(imp) - except ImportError: - if allow_failed_imports: - warnings.warn(f'{imp} failed to import and is ignored.', UserWarning) - imported_tmp = None - else: - raise ImportError - imported.append(imported_tmp) - if single_import: - imported = imported[0] - return imported - - -def iter_cast(inputs, dst_type, return_type=None): - """Cast elements of an iterable object into some type. - - Args: - inputs (Iterable): The input object. - dst_type (type): Destination type. - return_type (type, optional): If specified, the output object will be - converted to this type, otherwise an iterator. - - Returns: - iterator or specified type: The converted object. - """ - if not isinstance(inputs, abc.Iterable): - raise TypeError('inputs must be an iterable object') - if not isinstance(dst_type, type): - raise TypeError('"dst_type" must be a valid type') - - out_iterable = map(dst_type, inputs) - - if return_type is None: - return out_iterable - else: - return return_type(out_iterable) - - -def list_cast(inputs, dst_type): - """Cast elements of an iterable object into a list of some type. - - A partial method of :func:`iter_cast`. - """ - return iter_cast(inputs, dst_type, return_type=list) - - -def tuple_cast(inputs, dst_type): - """Cast elements of an iterable object into a tuple of some type. - - A partial method of :func:`iter_cast`. - """ - return iter_cast(inputs, dst_type, return_type=tuple) - - -def is_seq_of(seq, expected_type, seq_type=None): - """Check whether it is a sequence of some type. - - Args: - seq (Sequence): The sequence to be checked. - expected_type (type): Expected type of sequence items. - seq_type (type, optional): Expected sequence type. - - Returns: - bool: Whether the sequence is valid. - """ - if seq_type is None: - exp_seq_type = abc.Sequence - else: - assert isinstance(seq_type, type) - exp_seq_type = seq_type - if not isinstance(seq, exp_seq_type): - return False - for item in seq: - if not isinstance(item, expected_type): - return False - return True - - -def is_list_of(seq, expected_type): - """Check whether it is a list of some type. - - A partial method of :func:`is_seq_of`. - """ - return is_seq_of(seq, expected_type, seq_type=list) - - -def is_tuple_of(seq, expected_type): - """Check whether it is a tuple of some type. - - A partial method of :func:`is_seq_of`. - """ - return is_seq_of(seq, expected_type, seq_type=tuple) - - -def slice_list(in_list, lens): - """Slice a list into several sub lists by a list of given length. - - Args: - in_list (list): The list to be sliced. - lens(int or list): The expected length of each out list. - - Returns: - list: A list of sliced list. - """ - if isinstance(lens, int): - assert len(in_list) % lens == 0 - lens = [lens] * int(len(in_list) / lens) - if not isinstance(lens, list): - raise TypeError('"indices" must be an integer or a list of integers') - elif sum(lens) != len(in_list): - raise ValueError('sum of lens and list length does not ' f'match: {sum(lens)} != {len(in_list)}') - out_list = [] - idx = 0 - for i in range(len(lens)): - out_list.append(in_list[idx : idx + lens[i]]) - idx += lens[i] - return out_list - - -def concat_list(in_list): - """Concatenate a list of list into a single list. - - Args: - in_list (list): The list of list to be merged. - - Returns: - list: The concatenated flat list. - """ - return list(itertools.chain(*in_list)) - - -def check_prerequisites( - prerequisites, - checker, - msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' 'found, please install them first.', -): # yapf: disable - """A decorator factory to check if prerequisites are satisfied. - - Args: - prerequisites (str of list[str]): Prerequisites to be checked. - checker (callable): The checker method that returns True if a - prerequisite is meet, False otherwise. - msg_tmpl (str): The message template with two variables. - - Returns: - decorator: A specific decorator. - """ - - def wrap(func): - @functools.wraps(func) - def wrapped_func(*args, **kwargs): - requirements = [prerequisites] if isinstance(prerequisites, str) else prerequisites - missing = [] - for item in requirements: - if not checker(item): - missing.append(item) - if missing: - print(msg_tmpl.format(', '.join(missing), func.__name__)) - raise RuntimeError('Prerequisites not meet.') - else: - return func(*args, **kwargs) - - return wrapped_func - - return wrap - - -def _check_py_package(package): - try: - import_module(package) - except ImportError: - return False - else: - return True - - -def _check_executable(cmd): - if subprocess.call(f'which {cmd}', shell=True) != 0: - return False - else: - return True - - -def requires_package(prerequisites): - """A decorator to check if some python packages are installed. - - Example: - >>> @requires_package('numpy') - >>> func(arg1, args): - >>> return numpy.zeros(1) - array([0.]) - >>> @requires_package(['numpy', 'non_package']) - >>> func(arg1, args): - >>> return numpy.zeros(1) - ImportError - """ - return check_prerequisites(prerequisites, checker=_check_py_package) - - -def requires_executable(prerequisites): - """A decorator to check if some executable files are installed. - - Example: - >>> @requires_executable('ffmpeg') - >>> func(arg1, args): - >>> print(1) - 1 - """ - return check_prerequisites(prerequisites, checker=_check_executable) - - -def deprecated_api_warning(name_dict, cls_name=None): - """A decorator to check if some arguments are deprecate and try to replace - deprecate src_arg_name to dst_arg_name. - - Args: - name_dict(dict): - key (str): Deprecate argument names. - val (str): Expected argument names. - - Returns: - func: New function. - """ - - def api_warning_wrapper(old_func): - @functools.wraps(old_func) - def new_func(*args, **kwargs): - # get the arg spec of the decorated method - args_info = getfullargspec(old_func) - # get name of the function - func_name = old_func.__name__ - if cls_name is not None: - func_name = f'{cls_name}.{func_name}' - if args: - arg_names = args_info.args[: len(args)] - for src_arg_name, dst_arg_name in name_dict.items(): - if src_arg_name in arg_names: - warnings.warn( - f'"{src_arg_name}" is deprecated in ' - f'`{func_name}`, please use "{dst_arg_name}" ' - 'instead' - ) - arg_names[arg_names.index(src_arg_name)] = dst_arg_name - if kwargs: - for src_arg_name, dst_arg_name in name_dict.items(): - if src_arg_name in kwargs: - - assert dst_arg_name not in kwargs, ( - f'The expected behavior is to replace ' - f'the deprecated key `{src_arg_name}` to ' - f'new key `{dst_arg_name}`, but got them ' - f'in the arguments at the same time, which ' - f'is confusing. `{src_arg_name} will be ' - f'deprecated in the future, please ' - f'use `{dst_arg_name}` instead.' - ) - - warnings.warn( - f'"{src_arg_name}" is deprecated in ' - f'`{func_name}`, please use "{dst_arg_name}" ' - 'instead' - ) - kwargs[dst_arg_name] = kwargs.pop(src_arg_name) - - # apply converted arguments to the decorated method - output = old_func(*args, **kwargs) - return output - - return new_func - - return api_warning_wrapper - - -def is_method_overridden(method, base_class, derived_class): - """Check if a method of base class is overridden in derived class. - - Args: - method (str): the method name to check. - base_class (type): the class of the base class. - derived_class (type | Any): the class or instance of the derived class. - """ - assert isinstance(base_class, type), "base_class doesn't accept instance, Please pass class instead." - - if not isinstance(derived_class, type): - derived_class = derived_class.__class__ - - base_method = getattr(base_class, method) - derived_method = getattr(derived_class, method) - return derived_method != base_method - - -def has_method(obj: object, method: str) -> bool: - """Check whether the object has a method. - - Args: - method (str): The method name to check. - obj (object): The object to check. - - Returns: - bool: True if the object has the method else False. - """ - return hasattr(obj, method) and callable(getattr(obj, method)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py deleted file mode 100644 index e68d315d4a23..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os - -from .parrots_wrapper import TORCH_VERSION - -parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') - -if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': - from parrots.jit import pat as jit -else: - - def jit(func=None, check_input=None, full_shape=True, derivate=False, coderize=False, optimize=False): - def wrapper(func): - def wrapper_inner(*args, **kargs): - return func(*args, **kargs) - - return wrapper_inner - - if func is None: - return wrapper - else: - return func - - -if TORCH_VERSION == 'parrots': - from parrots.utils.tester import skip_no_elena -else: - - def skip_no_elena(func): - def wrapper(*args, **kargs): - return func(*args, **kargs) - - return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py deleted file mode 100644 index 5f57625d7971..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from functools import partial - -import torch - -TORCH_VERSION = torch.__version__ - - -def is_rocm_pytorch() -> bool: - is_rocm = False - if TORCH_VERSION != 'parrots': - try: - from torch.utils.cpp_extension import ROCM_HOME - - is_rocm = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False - except ImportError: - pass - return is_rocm - - -def _get_cuda_home(): - if TORCH_VERSION == 'parrots': - from parrots.utils.build_extension import CUDA_HOME - else: - if is_rocm_pytorch(): - from torch.utils.cpp_extension import ROCM_HOME - - CUDA_HOME = ROCM_HOME - else: - from torch.utils.cpp_extension import CUDA_HOME - return CUDA_HOME - - -def get_build_config(): - if TORCH_VERSION == 'parrots': - from parrots.config import get_build_info - - return get_build_info() - else: - return torch.__config__.show() - - -def _get_conv(): - if TORCH_VERSION == 'parrots': - from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin - else: - from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin - return _ConvNd, _ConvTransposeMixin - - -def _get_dataloader(): - if TORCH_VERSION == 'parrots': - from torch.utils.data import DataLoader, PoolDataLoader - else: - from torch.utils.data import DataLoader - - PoolDataLoader = DataLoader - return DataLoader, PoolDataLoader - - -def _get_extension(): - if TORCH_VERSION == 'parrots': - from parrots.utils.build_extension import BuildExtension, Extension - - CppExtension = partial(Extension, cuda=False) - CUDAExtension = partial(Extension, cuda=True) - else: - from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension - return BuildExtension, CppExtension, CUDAExtension - - -def _get_pool(): - if TORCH_VERSION == 'parrots': - from parrots.nn.modules.pool import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd - else: - from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd - return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd - - -def _get_norm(): - if TORCH_VERSION == 'parrots': - from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm - - SyncBatchNorm_ = torch.nn.SyncBatchNorm2d - else: - from torch.nn.modules.batchnorm import _BatchNorm - from torch.nn.modules.instancenorm import _InstanceNorm - - SyncBatchNorm_ = torch.nn.SyncBatchNorm - return _BatchNorm, _InstanceNorm, SyncBatchNorm_ - - -_ConvNd, _ConvTransposeMixin = _get_conv() -DataLoader, PoolDataLoader = _get_dataloader() -BuildExtension, CppExtension, CUDAExtension = _get_extension() -_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() -_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() - - -class SyncBatchNorm(SyncBatchNorm_): - def _check_input_dim(self, input): - if TORCH_VERSION == 'parrots': - if input.dim() < 2: - raise ValueError(f'expected at least 2D input (got {input.dim()}D input)') - else: - super()._check_input_dim(input) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py deleted file mode 100644 index 56eb66140d73..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import os.path as osp -from pathlib import Path - -from .misc import is_str - - -def is_filepath(x): - return is_str(x) or isinstance(x, Path) - - -def fopen(filepath, *args, **kwargs): - if is_str(filepath): - return open(filepath, *args, **kwargs) - elif isinstance(filepath, Path): - return filepath.open(*args, **kwargs) - raise ValueError('`filepath` should be a string or a Path') - - -def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): - if not osp.isfile(filename): - raise FileNotFoundError(msg_tmpl.format(filename)) - - -def mkdir_or_exist(dir_name, mode=0o777): - if dir_name == '': - return - dir_name = osp.expanduser(dir_name) - os.makedirs(dir_name, mode=mode, exist_ok=True) - - -def symlink(src, dst, overwrite=True, **kwargs): - if os.path.lexists(dst) and overwrite: - os.remove(dst) - os.symlink(src, dst, **kwargs) - - -def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): - """Scan a directory to find the interested files. - - Args: - dir_path (str | obj:`Path`): Path of the directory. - suffix (str | tuple(str), optional): File suffix that we are - interested in. Default: None. - recursive (bool, optional): If set to True, recursively scan the - directory. Default: False. - case_sensitive (bool, optional) : If set to False, ignore the case of - suffix. Default: True. - - Returns: - A generator for all the interested files with relative paths. - """ - if isinstance(dir_path, (str, Path)): - dir_path = str(dir_path) - else: - raise TypeError('"dir_path" must be a string or Path object') - - if (suffix is not None) and not isinstance(suffix, (str, tuple)): - raise TypeError('"suffix" must be a string or tuple of strings') - - if suffix is not None and not case_sensitive: - suffix = suffix.lower() if isinstance(suffix, str) else tuple(item.lower() for item in suffix) - - root = dir_path - - def _scandir(dir_path, suffix, recursive, case_sensitive): - for entry in os.scandir(dir_path): - if not entry.name.startswith('.') and entry.is_file(): - rel_path = osp.relpath(entry.path, root) - _rel_path = rel_path if case_sensitive else rel_path.lower() - if suffix is None or _rel_path.endswith(suffix): - yield rel_path - elif recursive and os.path.isdir(entry.path): - # scan recursively if entry.path is a directory - yield from _scandir(entry.path, suffix, recursive, case_sensitive) - - return _scandir(dir_path, suffix, recursive, case_sensitive) - - -def find_vcs_root(path, markers=('.git',)): - """Finds the root directory (including itself) of specified markers. - - Args: - path (str): Path of directory or file. - markers (list[str], optional): List of file or directory names. - - Returns: - The directory contained one of the markers or None if not found. - """ - if osp.isfile(path): - path = osp.dirname(path) - - prev, cur = None, osp.abspath(osp.expanduser(path)) - while cur != prev: - if any(osp.exists(osp.join(cur, marker)) for marker in markers): - return cur - prev, cur = cur, osp.split(cur)[0] - return None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py deleted file mode 100644 index 68d2e2383dcf..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import sys -from collections.abc import Iterable -from multiprocessing import Pool -from shutil import get_terminal_size - -from .timer import Timer - - -class ProgressBar: - """A progress bar which can print the progress.""" - - def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): - self.task_num = task_num - self.bar_width = bar_width - self.completed = 0 - self.file = file - if start: - self.start() - - @property - def terminal_width(self): - width, _ = get_terminal_size() - return width - - def start(self): - if self.task_num > 0: - self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' 'elapsed: 0s, ETA:') - else: - self.file.write('completed: 0, elapsed: 0s') - self.file.flush() - self.timer = Timer() - - def update(self, num_tasks=1): - assert num_tasks > 0 - self.completed += num_tasks - elapsed = self.timer.since_start() - if elapsed > 0: - fps = self.completed / elapsed - else: - fps = float('inf') - if self.task_num > 0: - percentage = self.completed / float(self.task_num) - eta = int(elapsed * (1 - percentage) / percentage + 0.5) - msg = ( - f'\r[{{}}] {self.completed}/{self.task_num}, ' - f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' - f'ETA: {eta:5}s' - ) - - bar_width = min(self.bar_width, int(self.terminal_width - len(msg)) + 2, int(self.terminal_width * 0.6)) - bar_width = max(2, bar_width) - mark_width = int(bar_width * percentage) - bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) - self.file.write(msg.format(bar_chars)) - else: - self.file.write(f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' f' {fps:.1f} tasks/s') - self.file.flush() - - -def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): - """Track the progress of tasks execution with a progress bar. - - Tasks are done with a simple for-loop. - - Args: - func (callable): The function to be applied to each task. - tasks (list or tuple[Iterable, int]): A list of tasks or - (tasks, total num). - bar_width (int): Width of progress bar. - - Returns: - list: The task results. - """ - if isinstance(tasks, tuple): - assert len(tasks) == 2 - assert isinstance(tasks[0], Iterable) - assert isinstance(tasks[1], int) - task_num = tasks[1] - tasks = tasks[0] - elif isinstance(tasks, Iterable): - task_num = len(tasks) - else: - raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') - prog_bar = ProgressBar(task_num, bar_width, file=file) - results = [] - for task in tasks: - results.append(func(task, **kwargs)) - prog_bar.update() - prog_bar.file.write('\n') - return results - - -def init_pool(process_num, initializer=None, initargs=None): - if initializer is None: - return Pool(process_num) - elif initargs is None: - return Pool(process_num, initializer) - else: - if not isinstance(initargs, tuple): - raise TypeError('"initargs" must be a tuple') - return Pool(process_num, initializer, initargs) - - -def track_parallel_progress( - func, - tasks, - nproc, - initializer=None, - initargs=None, - bar_width=50, - chunksize=1, - skip_first=False, - keep_order=True, - file=sys.stdout, -): - """Track the progress of parallel task execution with a progress bar. - - The built-in :mod:`multiprocessing` module is used for process pools and - tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. - - Args: - func (callable): The function to be applied to each task. - tasks (list or tuple[Iterable, int]): A list of tasks or - (tasks, total num). - nproc (int): Process (worker) number. - initializer (None or callable): Refer to :class:`multiprocessing.Pool` - for details. - initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for - details. - chunksize (int): Refer to :class:`multiprocessing.Pool` for details. - bar_width (int): Width of progress bar. - skip_first (bool): Whether to skip the first sample for each worker - when estimating fps, since the initialization step may takes - longer. - keep_order (bool): If True, :func:`Pool.imap` is used, otherwise - :func:`Pool.imap_unordered` is used. - - Returns: - list: The task results. - """ - if isinstance(tasks, tuple): - assert len(tasks) == 2 - assert isinstance(tasks[0], Iterable) - assert isinstance(tasks[1], int) - task_num = tasks[1] - tasks = tasks[0] - elif isinstance(tasks, Iterable): - task_num = len(tasks) - else: - raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') - pool = init_pool(nproc, initializer, initargs) - start = not skip_first - task_num -= nproc * chunksize * int(skip_first) - prog_bar = ProgressBar(task_num, bar_width, start, file=file) - results = [] - if keep_order: - gen = pool.imap(func, tasks, chunksize) - else: - gen = pool.imap_unordered(func, tasks, chunksize) - for result in gen: - results.append(result) - if skip_first: - if len(results) < nproc * chunksize: - continue - elif len(results) == nproc * chunksize: - prog_bar.start() - continue - prog_bar.update() - prog_bar.file.write('\n') - pool.close() - pool.join() - return results - - -def track_iter_progress(tasks, bar_width=50, file=sys.stdout): - """Track the progress of tasks iteration or enumeration with a progress - bar. - - Tasks are yielded with a simple for-loop. - - Args: - tasks (list or tuple[Iterable, int]): A list of tasks or - (tasks, total num). - bar_width (int): Width of progress bar. - - Yields: - list: The task results. - """ - if isinstance(tasks, tuple): - assert len(tasks) == 2 - assert isinstance(tasks[0], Iterable) - assert isinstance(tasks[1], int) - task_num = tasks[1] - tasks = tasks[0] - elif isinstance(tasks, Iterable): - task_num = len(tasks) - else: - raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') - prog_bar = ProgressBar(task_num, bar_width, file=file) - for task in tasks: - yield task - prog_bar.update() - prog_bar.file.write('\n') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py deleted file mode 100644 index d5433ed2b063..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import inspect -import warnings -from functools import partial - -from .misc import is_seq_of - - -def build_from_cfg(cfg, registry, default_args=None): - """Build a module from config dict. - - Args: - cfg (dict): Config dict. It should at least contain the key "type". - registry (:obj:`Registry`): The registry to search the type from. - default_args (dict, optional): Default initialization arguments. - - Returns: - object: The constructed object. - """ - if not isinstance(cfg, dict): - raise TypeError(f'cfg must be a dict, but got {type(cfg)}') - if 'type' not in cfg: - if default_args is None or 'type' not in default_args: - raise KeyError('`cfg` or `default_args` must contain the key "type", ' f'but got {cfg}\n{default_args}') - if not isinstance(registry, Registry): - raise TypeError('registry must be an mmcv.Registry object, ' f'but got {type(registry)}') - if not (isinstance(default_args, dict) or default_args is None): - raise TypeError('default_args must be a dict or None, ' f'but got {type(default_args)}') - - args = cfg.copy() - - if default_args is not None: - for name, value in default_args.items(): - args.setdefault(name, value) - - obj_type = args.pop('type') - if isinstance(obj_type, str): - obj_cls = registry.get(obj_type) - if obj_cls is None: - raise KeyError(f'{obj_type} is not in the {registry.name} registry') - elif inspect.isclass(obj_type): - obj_cls = obj_type - else: - raise TypeError(f'type must be a str or valid type, but got {type(obj_type)}') - try: - return obj_cls(**args) - except Exception as e: - # Normal TypeError does not print class name. - raise type(e)(f'{obj_cls.__name__}: {e}') - - -class Registry: - """A registry to map strings to classes. - - Registered object could be built from registry. - Example: - >>> MODELS = Registry('models') - >>> @MODELS.register_module() - >>> class ResNet: - >>> pass - >>> resnet = MODELS.build(dict(type='ResNet')) - - Please refer to - https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for - advanced usage. - - Args: - name (str): Registry name. - build_func(func, optional): Build function to construct instance from - Registry, func:`build_from_cfg` is used if neither ``parent`` or - ``build_func`` is specified. If ``parent`` is specified and - ``build_func`` is not given, ``build_func`` will be inherited - from ``parent``. Default: None. - parent (Registry, optional): Parent registry. The class registered in - children registry could be built from parent. Default: None. - scope (str, optional): The scope of registry. It is the key to search - for children registry. If not specified, scope will be the name of - the package where class is defined, e.g. mmdet, mmcls, mmseg. - Default: None. - """ - - def __init__(self, name, build_func=None, parent=None, scope=None): - self._name = name - self._module_dict = dict() - self._children = dict() - self._scope = self.infer_scope() if scope is None else scope - - # self.build_func will be set with the following priority: - # 1. build_func - # 2. parent.build_func - # 3. build_from_cfg - if build_func is None: - if parent is not None: - self.build_func = parent.build_func - else: - self.build_func = build_from_cfg - else: - self.build_func = build_func - if parent is not None: - assert isinstance(parent, Registry) - parent._add_children(self) - self.parent = parent - else: - self.parent = None - - def __len__(self): - return len(self._module_dict) - - def __contains__(self, key): - return self.get(key) is not None - - def __repr__(self): - format_str = self.__class__.__name__ + f'(name={self._name}, ' f'items={self._module_dict})' - return format_str - - @staticmethod - def infer_scope(): - """Infer the scope of registry. - - The name of the package where registry is defined will be returned. - - Example: - # in mmdet/models/backbone/resnet.py - >>> MODELS = Registry('models') - >>> @MODELS.register_module() - >>> class ResNet: - >>> pass - The scope of ``ResNet`` will be ``mmdet``. - - - Returns: - scope (str): The inferred scope name. - """ - # inspect.stack() trace where this function is called, the index-2 - # indicates the frame where `infer_scope()` is called - filename = inspect.getmodule(inspect.stack()[2][0]).__name__ - split_filename = filename.split('.') - return split_filename[0] - - @staticmethod - def split_scope_key(key): - """Split scope and key. - - The first scope will be split from key. - - Examples: - >>> Registry.split_scope_key('mmdet.ResNet') - 'mmdet', 'ResNet' - >>> Registry.split_scope_key('ResNet') - None, 'ResNet' - - Return: - scope (str, None): The first scope. - key (str): The remaining key. - """ - split_index = key.find('.') - if split_index != -1: - return key[:split_index], key[split_index + 1 :] - else: - return None, key - - @property - def name(self): - return self._name - - @property - def scope(self): - return self._scope - - @property - def module_dict(self): - return self._module_dict - - @property - def children(self): - return self._children - - def get(self, key): - """Get the registry record. - - Args: - key (str): The class name in string format. - - Returns: - class: The corresponding class. - """ - scope, real_key = self.split_scope_key(key) - if scope is None or scope == self._scope: - # get from self - if real_key in self._module_dict: - return self._module_dict[real_key] - else: - # get from self._children - if scope in self._children: - return self._children[scope].get(real_key) - else: - # goto root - parent = self.parent - while parent.parent is not None: - parent = parent.parent - return parent.get(key) - - def build(self, *args, **kwargs): - return self.build_func(*args, **kwargs, registry=self) - - def _add_children(self, registry): - """Add children for a registry. - - The ``registry`` will be added as children based on its scope. - The parent registry could build objects from children registry. - - Example: - >>> models = Registry('models') - >>> mmdet_models = Registry('models', parent=models) - >>> @mmdet_models.register_module() - >>> class ResNet: - >>> pass - >>> resnet = models.build(dict(type='mmdet.ResNet')) - """ - - assert isinstance(registry, Registry) - assert registry.scope is not None - assert registry.scope not in self.children, f'scope {registry.scope} exists in {self.name} registry' - self.children[registry.scope] = registry - - def _register_module(self, module_class, module_name=None, force=False): - if not inspect.isclass(module_class): - raise TypeError('module must be a class, ' f'but got {type(module_class)}') - - if module_name is None: - module_name = module_class.__name__ - if isinstance(module_name, str): - module_name = [module_name] - for name in module_name: - if not force and name in self._module_dict: - raise KeyError(f'{name} is already registered ' f'in {self.name}') - self._module_dict[name] = module_class - - def deprecated_register_module(self, cls=None, force=False): - warnings.warn( - 'The old API of register_module(module, force=False) ' - 'is deprecated and will be removed, please use the new API ' - 'register_module(name=None, force=False, module=None) instead.' - ) - if cls is None: - return partial(self.deprecated_register_module, force=force) - self._register_module(cls, force=force) - return cls - - def register_module(self, name=None, force=False, module=None): - """Register a module. - - A record will be added to `self._module_dict`, whose key is the class - name or the specified name, and value is the class itself. - It can be used as a decorator or a normal function. - - Example: - >>> backbones = Registry('backbone') - >>> @backbones.register_module() - >>> class ResNet: - >>> pass - - >>> backbones = Registry('backbone') - >>> @backbones.register_module(name='mnet') - >>> class MobileNet: - >>> pass - - >>> backbones = Registry('backbone') - >>> class ResNet: - >>> pass - >>> backbones.register_module(ResNet) - - Args: - name (str | None): The module name to be registered. If not - specified, the class name will be used. - force (bool, optional): Whether to override an existing class with - the same name. Default: False. - module (type): Module class to be registered. - """ - if not isinstance(force, bool): - raise TypeError(f'force must be a boolean, but got {type(force)}') - # NOTE: This is a walkaround to be compatible with the old api, - # while it may introduce unexpected bugs. - if isinstance(name, type): - return self.deprecated_register_module(name, force=force) - - # raise the error ahead of time - if not (name is None or isinstance(name, str) or is_seq_of(name, str)): - raise TypeError( - 'name must be either of None, an instance of str or a sequence' f' of str, but got {type(name)}' - ) - - # use it as a normal method: x.register_module(module=SomeClass) - if module is not None: - self._register_module(module_class=module, module_name=name, force=force) - return module - - # use it as a decorator: @x.register_module() - def _register(cls): - self._register_module(module_class=cls, module_name=name, force=force) - return cls - - return _register diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py deleted file mode 100644 index 4ba7d184d326..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Open-MMLab. -import sys -from collections.abc import Iterable -from runpy import run_path -from shlex import split -from typing import Any, Dict, List -from unittest.mock import patch - - -def check_python_script(cmd): - """Run the python cmd script with `__main__`. The difference between - `os.system` is that, this function exectues code in the current process, so - that it can be tracked by coverage tools. Currently it supports two forms: - - - ./tests/data/scripts/hello.py zz - - python tests/data/scripts/hello.py zz - """ - args = split(cmd) - if args[0] == 'python': - args = args[1:] - with patch.object(sys, 'argv', args): - run_path(args[0], run_name='__main__') - - -def _any(judge_result): - """Since built-in ``any`` works only when the element of iterable is not - iterable, implement the function.""" - if not isinstance(judge_result, Iterable): - return judge_result - - try: - for element in judge_result: - if _any(element): - return True - except TypeError: - # Maybe encounter the case: torch.tensor(True) | torch.tensor(False) - if judge_result: - return True - return False - - -def assert_dict_contains_subset(dict_obj: Dict[Any, Any], expected_subset: Dict[Any, Any]) -> bool: - """Check if the dict_obj contains the expected_subset. - - Args: - dict_obj (Dict[Any, Any]): Dict object to be checked. - expected_subset (Dict[Any, Any]): Subset expected to be contained in - dict_obj. - - Returns: - bool: Whether the dict_obj contains the expected_subset. - """ - - for key, value in expected_subset.items(): - if key not in dict_obj.keys() or _any(dict_obj[key] != value): - return False - return True - - -def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: - """Check if attribute of class object is correct. - - Args: - obj (object): Class object to be checked. - expected_attrs (Dict[str, Any]): Dict of the expected attrs. - - Returns: - bool: Whether the attribute of class object is correct. - """ - for attr, value in expected_attrs.items(): - if not hasattr(obj, attr) or _any(getattr(obj, attr) != value): - return False - return True - - -def assert_dict_has_keys(obj: Dict[str, Any], expected_keys: List[str]) -> bool: - """Check if the obj has all the expected_keys. - - Args: - obj (Dict[str, Any]): Object to be checked. - expected_keys (List[str]): Keys expected to contained in the keys of - the obj. - - Returns: - bool: Whether the obj has the expected keys. - """ - return set(expected_keys).issubset(set(obj.keys())) - - -def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool: - """Check if target_keys is equal to result_keys. - - Args: - result_keys (List[str]): Result keys to be checked. - target_keys (List[str]): Target keys to be checked. - - Returns: - bool: Whether target_keys is equal to result_keys. - """ - return set(result_keys) == set(target_keys) - - -def assert_is_norm_layer(module) -> bool: - """Check if the module is a norm layer. - - Args: - module (nn.Module): The module to be checked. - - Returns: - bool: Whether the module is a norm layer. - """ - from torch.nn import GroupNorm, LayerNorm - - from .parrots_wrapper import _BatchNorm, _InstanceNorm - - norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) - return isinstance(module, norm_layer_candidates) - - -def assert_params_all_zeros(module) -> bool: - """Check if the parameters of the module is all zeros. - - Args: - module (nn.Module): The module to be checked. - - Returns: - bool: Whether the parameters of the module is all zeros. - """ - weight_data = module.weight.data - is_weight_zero = weight_data.allclose(weight_data.new_zeros(weight_data.size())) - - if hasattr(module, 'bias') and module.bias is not None: - bias_data = module.bias.data - is_bias_zero = bias_data.allclose(bias_data.new_zeros(bias_data.size())) - else: - is_bias_zero = True - - return is_weight_zero and is_bias_zero diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py deleted file mode 100644 index c20892b088e0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from time import time - - -class TimerError(Exception): - def __init__(self, message): - self.message = message - super(TimerError, self).__init__(message) - - -class Timer: - """A flexible Timer class. - - :Example: - - >>> import time - >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - >>> with mmcv.Timer(): - >>> # simulate a code block that will run for 1s - >>> time.sleep(1) - 1.000 - >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): - >>> # simulate a code block that will run for 1s - >>> time.sleep(1) - it takes 1.0 seconds - >>> timer = mmcv.Timer() - >>> time.sleep(0.5) - >>> print(timer.since_start()) - 0.500 - >>> time.sleep(0.5) - >>> print(timer.since_last_check()) - 0.500 - >>> print(timer.since_start()) - 1.000 - """ - - def __init__(self, start=True, print_tmpl=None): - self._is_running = False - self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' - if start: - self.start() - - @property - def is_running(self): - """bool: indicate whether the timer is running""" - return self._is_running - - def __enter__(self): - self.start() - return self - - def __exit__(self, type, value, traceback): - print(self.print_tmpl.format(self.since_last_check())) - self._is_running = False - - def start(self): - """Start the timer.""" - if not self._is_running: - self._t_start = time() - self._is_running = True - self._t_last = time() - - def since_start(self): - """Total time since the timer is started. - - Returns (float): Time in seconds. - """ - if not self._is_running: - raise TimerError('timer is not running') - self._t_last = time() - return self._t_last - self._t_start - - def since_last_check(self): - """Time since the last checking. - - Either :func:`since_start` or :func:`since_last_check` is a checking - operation. - - Returns (float): Time in seconds. - """ - if not self._is_running: - raise TimerError('timer is not running') - dur = time() - self._t_last - self._t_last = time() - return dur - - -_g_timers = {} # global timers - - -def check_time(timer_id): - """Add check points in a single line. - - This method is suitable for running a task on a list of items. A timer will - be registered when the method is called for the first time. - - :Example: - - >>> import time - >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - >>> for i in range(1, 6): - >>> # simulate a code block - >>> time.sleep(i) - >>> mmcv.check_time('task1') - 2.000 - 3.000 - 4.000 - 5.000 - - Args: - timer_id (str): Timer identifier. - """ - if timer_id not in _g_timers: - _g_timers[timer_id] = Timer() - return 0 - else: - return _g_timers[timer_id].since_last_check() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py deleted file mode 100644 index 12f297ee2eaa..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py +++ /dev/null @@ -1,24 +0,0 @@ -import warnings - -import torch - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import digit_version - - -def is_jit_tracing() -> bool: - if torch.__version__ != 'parrots' and digit_version(torch.__version__) >= digit_version('1.6.0'): - on_trace = torch.jit.is_tracing() - # In PyTorch 1.6, torch.jit.is_tracing has a bug. - # Refers to https://github.com/pytorch/pytorch/issues/42448 - if isinstance(on_trace, bool): - return on_trace - else: - return torch._C._is_tracing() - else: - warnings.warn( - 'torch.jit.is_tracing is only supported after v1.6.0. ' - 'Therefore is_tracing returns False automatically. Please ' - 'set on_trace manually if you are using trace.', - UserWarning, - ) - return False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py deleted file mode 100644 index a0abd9d4596e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import subprocess -import warnings - -from packaging.version import parse - - -def digit_version(version_str: str, length: int = 4): - """Convert a version string into a tuple of integers. - - This method is usually used for comparing two versions. For pre-release - versions: alpha < beta < rc. - - Args: - version_str (str): The version string. - length (int): The maximum number of version levels. Default: 4. - - Returns: - tuple[int]: The version info in digits (integers). - """ - assert 'parrots' not in version_str - version = parse(version_str) - assert version.release, f'failed to parse version {version_str}' - release = list(version.release) - release = release[:length] - if len(release) < length: - release = release + [0] * (length - len(release)) - if version.is_prerelease: - mapping = {'a': -3, 'b': -2, 'rc': -1} - val = -4 - # version.pre can be None - if version.pre: - if version.pre[0] not in mapping: - warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 'version checking may go wrong') - else: - val = mapping[version.pre[0]] - release.extend([val, version.pre[-1]]) - else: - release.extend([val, 0]) - - elif version.is_postrelease: - release.extend([1, version.post]) - else: - release.extend([0, 0]) - return tuple(release) - - -def _minimal_ext_cmd(cmd): - # construct minimal environment - env = {} - for k in ['SYSTEMROOT', 'PATH', 'HOME']: - v = os.environ.get(k) - if v is not None: - env[k] = v - # LANGUAGE is used on win32 - env['LANGUAGE'] = 'C' - env['LANG'] = 'C' - env['LC_ALL'] = 'C' - out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0] - return out - - -def get_git_hash(fallback='unknown', digits=None): - """Get the git hash of the current repo. - - Args: - fallback (str, optional): The fallback string when git hash is - unavailable. Defaults to 'unknown'. - digits (int, optional): kept digits of the hash. Defaults to None, - meaning all digits are kept. - - Returns: - str: Git commit hash. - """ - - if digits is not None and not isinstance(digits, int): - raise TypeError('digits must be None or an integer') - - try: - out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) - sha = out.strip().decode('ascii') - if digits is not None: - sha = sha[:digits] - except OSError: - sha = fallback - - return sha diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py deleted file mode 100644 index 50d390de11c6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -__version__ = '1.3.17' - - -def parse_version_info(version_str: str, length: int = 4) -> tuple: - """Parse a version string into a tuple. - - Args: - version_str (str): The version string. - length (int): The maximum number of version levels. Default: 4. - - Returns: - tuple[int | str]: The version info, e.g., "1.3.0" is parsed into - (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into - (2, 0, 0, 0, 'rc', 1) (when length is set to 4). - """ - from packaging.version import parse - - version = parse(version_str) - assert version.release, f'failed to parse version {version_str}' - release = list(version.release) - release = release[:length] - if len(release) < length: - release = release + [0] * (length - len(release)) - if version.is_prerelease: - release.extend(list(version.pre)) - elif version.is_postrelease: - release.extend(list(version.post)) - else: - release.extend([0, 0]) - return tuple(release) - - -version_info = tuple(int(x) for x in __version__.split('.')[:3]) - -__all__ = ['__version__', 'version_info', 'parse_version_info'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py deleted file mode 100644 index 71e5ece71438..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .io import Cache, VideoReader, frames2video -from .optflow import ( - dequantize_flow, - flow_from_bytes, - flow_warp, - flowread, - flowwrite, - quantize_flow, - sparse_flow_from_bytes, -) -from .processing import concat_video, convert_video, cut_video, resize_video - -__all__ = [ - 'Cache', - 'VideoReader', - 'frames2video', - 'convert_video', - 'resize_video', - 'cut_video', - 'concat_video', - 'flowread', - 'flowwrite', - 'quantize_flow', - 'dequantize_flow', - 'flow_warp', - 'flow_from_bytes', - 'sparse_flow_from_bytes', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py deleted file mode 100644 index 43363f2dd8ed..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -from collections import OrderedDict - -import cv2 -from cv2 import ( - CAP_PROP_FOURCC, - CAP_PROP_FPS, - CAP_PROP_FRAME_COUNT, - CAP_PROP_FRAME_HEIGHT, - CAP_PROP_FRAME_WIDTH, - CAP_PROP_POS_FRAMES, - VideoWriter_fourcc, -) - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( - check_file_exist, - mkdir_or_exist, - scandir, - track_progress, -) - - -class Cache: - def __init__(self, capacity): - self._cache = OrderedDict() - self._capacity = int(capacity) - if capacity <= 0: - raise ValueError('capacity must be a positive integer') - - @property - def capacity(self): - return self._capacity - - @property - def size(self): - return len(self._cache) - - def put(self, key, val): - if key in self._cache: - return - if len(self._cache) >= self.capacity: - self._cache.popitem(last=False) - self._cache[key] = val - - def get(self, key, default=None): - val = self._cache[key] if key in self._cache else default - return val - - -class VideoReader: - """Video class with similar usage to a list object. - - This video warpper class provides convenient apis to access frames. - There exists an issue of OpenCV's VideoCapture class that jumping to a - certain frame may be inaccurate. It is fixed in this class by checking - the position after jumping each time. - Cache is used when decoding videos. So if the same frame is visited for - the second time, there is no need to decode again if it is stored in the - cache. - - :Example: - - >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - >>> v = mmcv.VideoReader('sample.mp4') - >>> len(v) # get the total frame number with `len()` - 120 - >>> for img in v: # v is iterable - >>> mmcv.imshow(img) - >>> v[5] # get the 6th frame - """ - - def __init__(self, filename, cache_capacity=10): - # Check whether the video path is a url - if not filename.startswith(('https://', 'http://')): - check_file_exist(filename, 'Video file not found: ' + filename) - self._vcap = cv2.VideoCapture(filename) - assert cache_capacity > 0 - self._cache = Cache(cache_capacity) - self._position = 0 - # get basic info - self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) - self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) - self._fps = self._vcap.get(CAP_PROP_FPS) - self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) - self._fourcc = self._vcap.get(CAP_PROP_FOURCC) - - @property - def vcap(self): - """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" - return self._vcap - - @property - def opened(self): - """bool: Indicate whether the video is opened.""" - return self._vcap.isOpened() - - @property - def width(self): - """int: Width of video frames.""" - return self._width - - @property - def height(self): - """int: Height of video frames.""" - return self._height - - @property - def resolution(self): - """tuple: Video resolution (width, height).""" - return (self._width, self._height) - - @property - def fps(self): - """float: FPS of the video.""" - return self._fps - - @property - def frame_cnt(self): - """int: Total frames of the video.""" - return self._frame_cnt - - @property - def fourcc(self): - """str: "Four character code" of the video.""" - return self._fourcc - - @property - def position(self): - """int: Current cursor position, indicating frame decoded.""" - return self._position - - def _get_real_position(self): - return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) - - def _set_real_position(self, frame_id): - self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) - pos = self._get_real_position() - for _ in range(frame_id - pos): - self._vcap.read() - self._position = frame_id - - def read(self): - """Read the next frame. - - If the next frame have been decoded before and in the cache, then - return it directly, otherwise decode, cache and return it. - - Returns: - ndarray or None: Return the frame if successful, otherwise None. - """ - # pos = self._position - if self._cache: - img = self._cache.get(self._position) - if img is not None: - ret = True - else: - if self._position != self._get_real_position(): - self._set_real_position(self._position) - ret, img = self._vcap.read() - if ret: - self._cache.put(self._position, img) - else: - ret, img = self._vcap.read() - if ret: - self._position += 1 - return img - - def get_frame(self, frame_id): - """Get frame by index. - - Args: - frame_id (int): Index of the expected frame, 0-based. - - Returns: - ndarray or None: Return the frame if successful, otherwise None. - """ - if frame_id < 0 or frame_id >= self._frame_cnt: - raise IndexError(f'"frame_id" must be between 0 and {self._frame_cnt - 1}') - if frame_id == self._position: - return self.read() - if self._cache: - img = self._cache.get(frame_id) - if img is not None: - self._position = frame_id + 1 - return img - self._set_real_position(frame_id) - ret, img = self._vcap.read() - if ret: - if self._cache: - self._cache.put(self._position, img) - self._position += 1 - return img - - def current_frame(self): - """Get the current frame (frame that is just visited). - - Returns: - ndarray or None: If the video is fresh, return None, otherwise - return the frame. - """ - if self._position == 0: - return None - return self._cache.get(self._position - 1) - - def cvt2frames(self, frame_dir, file_start=0, filename_tmpl='{:06d}.jpg', start=0, max_num=0, show_progress=True): - """Convert a video to frame images. - - Args: - frame_dir (str): Output directory to store all the frame images. - file_start (int): Filenames will start from the specified number. - filename_tmpl (str): Filename template with the index as the - placeholder. - start (int): The starting frame index. - max_num (int): Maximum number of frames to be written. - show_progress (bool): Whether to show a progress bar. - """ - mkdir_or_exist(frame_dir) - if max_num == 0: - task_num = self.frame_cnt - start - else: - task_num = min(self.frame_cnt - start, max_num) - if task_num <= 0: - raise ValueError('start must be less than total frame number') - if start > 0: - self._set_real_position(start) - - def write_frame(file_idx): - img = self.read() - if img is None: - return - filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) - cv2.imwrite(filename, img) - - if show_progress: - track_progress(write_frame, range(file_start, file_start + task_num)) - else: - for i in range(task_num): - write_frame(file_start + i) - - def __len__(self): - return self.frame_cnt - - def __getitem__(self, index): - if isinstance(index, slice): - return [self.get_frame(i) for i in range(*index.indices(self.frame_cnt))] - # support negative indexing - if index < 0: - index += self.frame_cnt - if index < 0: - raise IndexError('index out of range') - return self.get_frame(index) - - def __iter__(self): - self._set_real_position(0) - return self - - def __next__(self): - img = self.read() - if img is not None: - return img - else: - raise StopIteration - - next = __next__ - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._vcap.release() - - -def frames2video( - frame_dir, video_file, fps=30, fourcc='XVID', filename_tmpl='{:06d}.jpg', start=0, end=0, show_progress=True -): - """Read the frame images from a directory and join them as a video. - - Args: - frame_dir (str): The directory containing video frames. - video_file (str): Output filename. - fps (float): FPS of the output video. - fourcc (str): Fourcc of the output video, this should be compatible - with the output file type. - filename_tmpl (str): Filename template with the index as the variable. - start (int): Starting frame index. - end (int): Ending frame index. - show_progress (bool): Whether to show a progress bar. - """ - if end == 0: - ext = filename_tmpl.split('.')[-1] - end = len([name for name in scandir(frame_dir, ext)]) - first_file = osp.join(frame_dir, filename_tmpl.format(start)) - check_file_exist(first_file, 'The start frame not found: ' + first_file) - img = cv2.imread(first_file) - height, width = img.shape[:2] - resolution = (width, height) - vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, resolution) - - def write_frame(file_idx): - filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) - img = cv2.imread(filename) - vwriter.write(img) - - if show_progress: - track_progress(write_frame, range(start, end)) - else: - for i in range(start, end): - write_frame(i) - vwriter.release() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py deleted file mode 100644 index 90b81c714f05..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings - -import cv2 -import numpy as np - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.arraymisc import dequantize, quantize -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str - - -def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): - """Read an optical flow map. - - Args: - flow_or_path (ndarray or str): A flow map or filepath. - quantize (bool): whether to read quantized pair, if set to True, - remaining args will be passed to :func:`dequantize_flow`. - concat_axis (int): The axis that dx and dy are concatenated, - can be either 0 or 1. Ignored if quantize is False. - - Returns: - ndarray: Optical flow represented as a (h, w, 2) numpy array - """ - if isinstance(flow_or_path, np.ndarray): - if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): - raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') - return flow_or_path - elif not is_str(flow_or_path): - raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' f'not {type(flow_or_path)}') - - if not quantize: - with open(flow_or_path, 'rb') as f: - try: - header = f.read(4).decode('utf-8') - except Exception: - raise IOError(f'Invalid flow file: {flow_or_path}') - else: - if header != 'PIEH': - raise IOError(f'Invalid flow file: {flow_or_path}, ' 'header does not contain PIEH') - - w = np.fromfile(f, np.int32, 1).squeeze() - h = np.fromfile(f, np.int32, 1).squeeze() - flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) - else: - assert concat_axis in [0, 1] - cat_flow = imread(flow_or_path, flag='unchanged') - if cat_flow.ndim != 2: - raise IOError(f'{flow_or_path} is not a valid quantized flow file, ' f'its dimension is {cat_flow.ndim}.') - assert cat_flow.shape[concat_axis] % 2 == 0 - dx, dy = np.split(cat_flow, 2, axis=concat_axis) - flow = dequantize_flow(dx, dy, *args, **kwargs) - - return flow.astype(np.float32) - - -def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): - """Write optical flow to file. - - If the flow is not quantized, it will be saved as a .flo file losslessly, - otherwise a jpeg image which is lossy but of much smaller size. (dx and dy - will be concatenated horizontally into a single image if quantize is True.) - - Args: - flow (ndarray): (h, w, 2) array of optical flow. - filename (str): Output filepath. - quantize (bool): Whether to quantize the flow and save it to 2 jpeg - images. If set to True, remaining args will be passed to - :func:`quantize_flow`. - concat_axis (int): The axis that dx and dy are concatenated, - can be either 0 or 1. Ignored if quantize is False. - """ - if not quantize: - with open(filename, 'wb') as f: - f.write('PIEH'.encode('utf-8')) - np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) - flow = flow.astype(np.float32) - flow.tofile(f) - f.flush() - else: - assert concat_axis in [0, 1] - dx, dy = quantize_flow(flow, *args, **kwargs) - dxdy = np.concatenate((dx, dy), axis=concat_axis) - imwrite(dxdy, filename) - - -def quantize_flow(flow, max_val=0.02, norm=True): - """Quantize flow to [0, 255]. - - After this step, the size of flow will be much smaller, and can be - dumped as jpeg images. - - Args: - flow (ndarray): (h, w, 2) array of optical flow. - max_val (float): Maximum value of flow, values beyond - [-max_val, max_val] will be truncated. - norm (bool): Whether to divide flow values by image width/height. - - Returns: - tuple[ndarray]: Quantized dx and dy. - """ - h, w, _ = flow.shape - dx = flow[..., 0] - dy = flow[..., 1] - if norm: - dx = dx / w # avoid inplace operations - dy = dy / h - # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. - flow_comps = [quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy]] - return tuple(flow_comps) - - -def dequantize_flow(dx, dy, max_val=0.02, denorm=True): - """Recover from quantized flow. - - Args: - dx (ndarray): Quantized dx. - dy (ndarray): Quantized dy. - max_val (float): Maximum value used when quantizing. - denorm (bool): Whether to multiply flow values with width/height. - - Returns: - ndarray: Dequantized flow. - """ - assert dx.shape == dy.shape - assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) - - dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] - - if denorm: - dx *= dx.shape[1] - dy *= dx.shape[0] - flow = np.dstack((dx, dy)) - return flow - - -def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): - """Use flow to warp img. - - Args: - img (ndarray, float or uint8): Image to be warped. - flow (ndarray, float): Optical Flow. - filling_value (int): The missing pixels will be set with filling_value. - interpolate_mode (str): bilinear -> Bilinear Interpolation; - nearest -> Nearest Neighbor. - - Returns: - ndarray: Warped image with the same shape of img - """ - warnings.warn('This function is just for prototyping and cannot ' 'guarantee the computational efficiency.') - assert flow.ndim == 3, 'Flow must be in 3D arrays.' - height = flow.shape[0] - width = flow.shape[1] - channels = img.shape[2] - - output = np.ones((height, width, channels), dtype=img.dtype) * filling_value - - grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) - dx = grid[:, :, 0] + flow[:, :, 1] - dy = grid[:, :, 1] + flow[:, :, 0] - sx = np.floor(dx).astype(int) - sy = np.floor(dy).astype(int) - valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) - - if interpolate_mode == 'nearest': - output[valid, :] = img[dx[valid].round().astype(int), dy[valid].round().astype(int), :] - elif interpolate_mode == 'bilinear': - # dirty walkround for integer positions - eps_ = 1e-6 - dx, dy = dx + eps_, dy + eps_ - left_top_ = ( - img[np.floor(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] - * (np.ceil(dx[valid]) - dx[valid])[:, None] - * (np.ceil(dy[valid]) - dy[valid])[:, None] - ) - left_down_ = ( - img[np.ceil(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] - * (dx[valid] - np.floor(dx[valid]))[:, None] - * (np.ceil(dy[valid]) - dy[valid])[:, None] - ) - right_top_ = ( - img[np.floor(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] - * (np.ceil(dx[valid]) - dx[valid])[:, None] - * (dy[valid] - np.floor(dy[valid]))[:, None] - ) - right_down_ = ( - img[np.ceil(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] - * (dx[valid] - np.floor(dx[valid]))[:, None] - * (dy[valid] - np.floor(dy[valid]))[:, None] - ) - output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ - else: - raise NotImplementedError( - 'We only support interpolation modes of nearest and bilinear, ' f'but got {interpolate_mode}.' - ) - return output.astype(img.dtype) - - -def flow_from_bytes(content): - """Read dense optical flow from bytes. - - .. note:: - This load optical flow function works for FlyingChairs, FlyingThings3D, - Sintel, FlyingChairsOcc datasets, but cannot load the data from - ChairsSDHom. - - Args: - content (bytes): Optical flow bytes got from files or other streams. - - Returns: - ndarray: Loaded optical flow with the shape (H, W, 2). - """ - - # header in first 4 bytes - header = content[:4] - if header.decode('utf-8') != 'PIEH': - raise Exception('Flow file header does not contain PIEH') - # width in second 4 bytes - width = np.frombuffer(content[4:], np.int32, 1).squeeze() - # height in third 4 bytes - height = np.frombuffer(content[8:], np.int32, 1).squeeze() - # after first 12 bytes, all bytes are flow - flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape((height, width, 2)) - - return flow - - -def sparse_flow_from_bytes(content): - """Read the optical flow in KITTI datasets from bytes. - - This function is modified from RAFT load the `KITTI datasets - `_. - - Args: - content (bytes): Optical flow bytes got from files or other streams. - - Returns: - Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2) - and flow valid mask with the shape (H, W). - """ # nopa - - content = np.frombuffer(content, np.uint8) - flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) - flow = flow[:, :, ::-1].astype(np.float32) - # flow shape (H, W, 2) valid shape (H, W) - flow, valid = flow[:, :, :2], flow[:, :, 2] - flow = (flow - 2 ** 15) / 64.0 - return flow, valid diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py deleted file mode 100644 index 8933708ef744..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -import os.path as osp -import subprocess -import tempfile - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import requires_executable - - -@requires_executable('ffmpeg') -def convert_video(in_file, out_file, print_cmd=False, pre_options='', **kwargs): - """Convert a video with ffmpeg. - - This provides a general api to ffmpeg, the executed command is:: - - `ffmpeg -y -i ` - - Options(kwargs) are mapped to ffmpeg commands with the following rules: - - - key=val: "-key val" - - key=True: "-key" - - key=False: "" - - Args: - in_file (str): Input video filename. - out_file (str): Output video filename. - pre_options (str): Options appears before "-i ". - print_cmd (bool): Whether to print the final ffmpeg command. - """ - options = [] - for k, v in kwargs.items(): - if isinstance(v, bool): - if v: - options.append(f'-{k}') - elif k == 'log_level': - assert v in ['quiet', 'panic', 'fatal', 'error', 'warning', 'info', 'verbose', 'debug', 'trace'] - options.append(f'-loglevel {v}') - else: - options.append(f'-{k} {v}') - cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' f'{out_file}' - if print_cmd: - print(cmd) - subprocess.call(cmd, shell=True) - - -@requires_executable('ffmpeg') -def resize_video(in_file, out_file, size=None, ratio=None, keep_ar=False, log_level='info', print_cmd=False): - """Resize a video. - - Args: - in_file (str): Input video filename. - out_file (str): Output video filename. - size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). - ratio (tuple or float): Expected resize ratio, (2, 0.5) means - (w*2, h*0.5). - keep_ar (bool): Whether to keep original aspect ratio. - log_level (str): Logging level of ffmpeg. - print_cmd (bool): Whether to print the final ffmpeg command. - """ - if size is None and ratio is None: - raise ValueError('expected size or ratio must be specified') - if size is not None and ratio is not None: - raise ValueError('size and ratio cannot be specified at the same time') - options = {'log_level': log_level} - if size: - if not keep_ar: - options['vf'] = f'scale={size[0]}:{size[1]}' - else: - options['vf'] = f'scale=w={size[0]}:h={size[1]}:' 'force_original_aspect_ratio=decrease' - else: - if not isinstance(ratio, tuple): - ratio = (ratio, ratio) - options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' - convert_video(in_file, out_file, print_cmd, **options) - - -@requires_executable('ffmpeg') -def cut_video(in_file, out_file, start=None, end=None, vcodec=None, acodec=None, log_level='info', print_cmd=False): - """Cut a clip from a video. - - Args: - in_file (str): Input video filename. - out_file (str): Output video filename. - start (None or float): Start time (in seconds). - end (None or float): End time (in seconds). - vcodec (None or str): Output video codec, None for unchanged. - acodec (None or str): Output audio codec, None for unchanged. - log_level (str): Logging level of ffmpeg. - print_cmd (bool): Whether to print the final ffmpeg command. - """ - options = {'log_level': log_level} - if vcodec is None: - options['vcodec'] = 'copy' - if acodec is None: - options['acodec'] = 'copy' - if start: - options['ss'] = start - else: - start = 0 - if end: - options['t'] = end - start - convert_video(in_file, out_file, print_cmd, **options) - - -@requires_executable('ffmpeg') -def concat_video(video_list, out_file, vcodec=None, acodec=None, log_level='info', print_cmd=False): - """Concatenate multiple videos into a single one. - - Args: - video_list (list): A list of video filenames - out_file (str): Output video filename - vcodec (None or str): Output video codec, None for unchanged - acodec (None or str): Output audio codec, None for unchanged - log_level (str): Logging level of ffmpeg. - print_cmd (bool): Whether to print the final ffmpeg command. - """ - tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) - with open(tmp_filename, 'w') as f: - for filename in video_list: - f.write(f'file {osp.abspath(filename)}\n') - options = {'log_level': log_level} - if vcodec is None: - options['vcodec'] = 'copy' - if acodec is None: - options['acodec'] = 'copy' - convert_video(tmp_filename, out_file, print_cmd, pre_options='-f concat -safe 0', **options) - os.close(tmp_filehandler) - os.remove(tmp_filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py deleted file mode 100644 index f336d6ce01b4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .color import Color, color_val -from .image import imshow, imshow_bboxes, imshow_det_bboxes -from .optflow import flow2rgb, flowshow, make_color_wheel - -__all__ = [ - 'Color', - 'color_val', - 'imshow', - 'imshow_bboxes', - 'imshow_det_bboxes', - 'flowshow', - 'flow2rgb', - 'make_color_wheel', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py deleted file mode 100644 index d2290a315f9e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from enum import Enum - -import numpy as np - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str - - -class Color(Enum): - """An enum that defines common colors. - - Contains red, green, blue, cyan, yellow, magenta, white and black. - """ - - red = (0, 0, 255) - green = (0, 255, 0) - blue = (255, 0, 0) - cyan = (255, 255, 0) - yellow = (0, 255, 255) - magenta = (255, 0, 255) - white = (255, 255, 255) - black = (0, 0, 0) - - -def color_val(color): - """Convert various input to color tuples. - - Args: - color (:obj:`Color`/str/tuple/int/ndarray): Color inputs - - Returns: - tuple[int]: A tuple of 3 integers indicating BGR channels. - """ - if is_str(color): - return Color[color].value - elif isinstance(color, Color): - return color.value - elif isinstance(color, tuple): - assert len(color) == 3 - for channel in color: - assert 0 <= channel <= 255 - return color - elif isinstance(color, int): - assert 0 <= color <= 255 - return color, color, color - elif isinstance(color, np.ndarray): - assert color.ndim == 1 and color.size == 3 - assert np.all((color >= 0) & (color <= 255)) - color = color.astype(np.uint8) - return tuple(color) - else: - raise TypeError(f'Invalid type for color: {type(color)}') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py deleted file mode 100644 index feda6fa59520..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import cv2 -import numpy as np - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite -from .color import color_val - - -def imshow(img, win_name='', wait_time=0): - """Show an image. - - Args: - img (str or ndarray): The image to be displayed. - win_name (str): The window name. - wait_time (int): Value of waitKey param. - """ - cv2.imshow(win_name, imread(img)) - if wait_time == 0: # prevent from hanging if windows was closed - while True: - ret = cv2.waitKey(1) - - closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 - # if user closed window or if some key pressed - if closed or ret != -1: - break - else: - ret = cv2.waitKey(wait_time) - - -def imshow_bboxes( - img, bboxes, colors='green', top_k=-1, thickness=1, show=True, win_name='', wait_time=0, out_file=None -): - """Draw bboxes on an image. - - Args: - img (str or ndarray): The image to be displayed. - bboxes (list or ndarray): A list of ndarray of shape (k, 4). - colors (list[str or tuple or Color]): A list of colors. - top_k (int): Plot the first k bboxes only if set positive. - thickness (int): Thickness of lines. - show (bool): Whether to show the image. - win_name (str): The window name. - wait_time (int): Value of waitKey param. - out_file (str, optional): The filename to write the image. - - Returns: - ndarray: The image with bboxes drawn on it. - """ - img = imread(img) - img = np.ascontiguousarray(img) - - if isinstance(bboxes, np.ndarray): - bboxes = [bboxes] - if not isinstance(colors, list): - colors = [colors for _ in range(len(bboxes))] - colors = [color_val(c) for c in colors] - assert len(bboxes) == len(colors) - - for i, _bboxes in enumerate(bboxes): - _bboxes = _bboxes.astype(np.int32) - if top_k <= 0: - _top_k = _bboxes.shape[0] - else: - _top_k = min(top_k, _bboxes.shape[0]) - for j in range(_top_k): - left_top = (_bboxes[j, 0], _bboxes[j, 1]) - right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) - cv2.rectangle(img, left_top, right_bottom, colors[i], thickness=thickness) - - if show: - imshow(img, win_name, wait_time) - if out_file is not None: - imwrite(img, out_file) - return img - - -def imshow_det_bboxes( - img, - bboxes, - labels, - class_names=None, - score_thr=0, - bbox_color='green', - text_color='green', - thickness=1, - font_scale=0.5, - show=True, - win_name='', - wait_time=0, - out_file=None, -): - """Draw bboxes and class labels (with scores) on an image. - - Args: - img (str or ndarray): The image to be displayed. - bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or - (n, 5). - labels (ndarray): Labels of bboxes. - class_names (list[str]): Names of each classes. - score_thr (float): Minimum score of bboxes to be shown. - bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. - text_color (str or tuple or :obj:`Color`): Color of texts. - thickness (int): Thickness of lines. - font_scale (float): Font scales of texts. - show (bool): Whether to show the image. - win_name (str): The window name. - wait_time (int): Value of waitKey param. - out_file (str or None): The filename to write the image. - - Returns: - ndarray: The image with bboxes drawn on it. - """ - assert bboxes.ndim == 2 - assert labels.ndim == 1 - assert bboxes.shape[0] == labels.shape[0] - assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 - img = imread(img) - img = np.ascontiguousarray(img) - - if score_thr > 0: - assert bboxes.shape[1] == 5 - scores = bboxes[:, -1] - inds = scores > score_thr - bboxes = bboxes[inds, :] - labels = labels[inds] - - bbox_color = color_val(bbox_color) - text_color = color_val(text_color) - - for bbox, label in zip(bboxes, labels): - bbox_int = bbox.astype(np.int32) - left_top = (bbox_int[0], bbox_int[1]) - right_bottom = (bbox_int[2], bbox_int[3]) - cv2.rectangle(img, left_top, right_bottom, bbox_color, thickness=thickness) - label_text = class_names[label] if class_names is not None else f'cls {label}' - if len(bbox) > 4: - label_text += f'|{bbox[-1]:.02f}' - cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) - - if show: - imshow(img, win_name, wait_time) - if out_file is not None: - imwrite(img, out_file) - return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py deleted file mode 100644 index 1954452dcda1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from __future__ import division - -import numpy as np - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import rgb2bgr -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.video import flowread -from .image import imshow - - -def flowshow(flow, win_name='', wait_time=0): - """Show optical flow. - - Args: - flow (ndarray or str): The optical flow to be displayed. - win_name (str): The window name. - wait_time (int): Value of waitKey param. - """ - flow = flowread(flow) - flow_img = flow2rgb(flow) - imshow(rgb2bgr(flow_img), win_name, wait_time) - - -def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): - """Convert flow map to RGB image. - - Args: - flow (ndarray): Array of optical flow. - color_wheel (ndarray or None): Color wheel used to map flow field to - RGB colorspace. Default color wheel will be used if not specified. - unknown_thr (str): Values above this threshold will be marked as - unknown and thus ignored. - - Returns: - ndarray: RGB image that can be visualized. - """ - assert flow.ndim == 3 and flow.shape[-1] == 2 - if color_wheel is None: - color_wheel = make_color_wheel() - assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 - num_bins = color_wheel.shape[0] - - dx = flow[:, :, 0].copy() - dy = flow[:, :, 1].copy() - - ignore_inds = np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | (np.abs(dy) > unknown_thr) - dx[ignore_inds] = 0 - dy[ignore_inds] = 0 - - rad = np.sqrt(dx ** 2 + dy ** 2) - if np.any(rad > np.finfo(float).eps): - max_rad = np.max(rad) - dx /= max_rad - dy /= max_rad - - rad = np.sqrt(dx ** 2 + dy ** 2) - angle = np.arctan2(-dy, -dx) / np.pi - - bin_real = (angle + 1) / 2 * (num_bins - 1) - bin_left = np.floor(bin_real).astype(int) - bin_right = (bin_left + 1) % num_bins - w = (bin_real - bin_left.astype(np.float32))[..., None] - flow_img = (1 - w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] - small_ind = rad <= 1 - flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) - flow_img[np.logical_not(small_ind)] *= 0.75 - - flow_img[ignore_inds, :] = 0 - - return flow_img - - -def make_color_wheel(bins=None): - """Build a color wheel. - - Args: - bins(list or tuple, optional): Specify the number of bins for each - color range, corresponding to six ranges: red -> yellow, - yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, - magenta -> red. [15, 6, 4, 11, 13, 6] is used for default - (see Middlebury). - - Returns: - ndarray: Color wheel of shape (total_bins, 3). - """ - if bins is None: - bins = [15, 6, 4, 11, 13, 6] - assert len(bins) == 6 - - RY, YG, GC, CB, BM, MR = tuple(bins) - - ry = [1, np.arange(RY) / RY, 0] - yg = [1 - np.arange(YG) / YG, 1, 0] - gc = [0, 1, np.arange(GC) / GC] - cb = [0, 1 - np.arange(CB) / CB, 1] - bm = [np.arange(BM) / BM, 0, 1] - mr = [1, 0, 1 - np.arange(MR) / MR] - - num_bins = RY + YG + GC + CB + BM + MR - - color_wheel = np.zeros((3, num_bins), dtype=np.float32) - - col = 0 - for i, color in enumerate([ry, yg, gc, cb, bm, mr]): - for j in range(3): - color_wheel[j, col : col + bins[i]] = color[j] - col += bins[i] - - return color_wheel.T diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py deleted file mode 100644 index 7e0e39b03e2a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- - -from .checkpoint import load_checkpoint - -__all__ = ['load_checkpoint'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py deleted file mode 100644 index 9f27d7fea454..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright (c) Open-MMLab. All rights reserved. -import io -import os -import os.path as osp -import pkgutil -import time -import warnings -from collections import OrderedDict -from importlib import import_module -from tempfile import TemporaryDirectory - -import torch -import torchvision -from torch.nn import functional as F -from torch.optim import Optimizer -from torch.utils import model_zoo - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import load as load_file -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import is_module_wrapper -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import mkdir_or_exist - -ENV_MMCV_HOME = 'MMCV_HOME' -ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' -DEFAULT_CACHE_DIR = '~/.cache' - - -def _get_mmcv_home(): - mmcv_home = os.path.expanduser( - os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) - ) - - mkdir_or_exist(mmcv_home) - return mmcv_home - - -def load_state_dict(module, state_dict, strict=False, logger=None): - """Load state_dict to a module. - - This method is modified from :meth:`torch.nn.Module.load_state_dict`. - Default value for ``strict`` is set to ``False`` and the message for - param mismatch will be shown even if strict is False. - - Args: - module (Module): Module that receives the state_dict. - state_dict (OrderedDict): Weights. - strict (bool): whether to strictly enforce that the keys - in :attr:`state_dict` match the keys returned by this module's - :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. - logger (:obj:`logging.Logger`, optional): Logger to log the error - message. If not specified, print function will be used. - """ - unexpected_keys = [] - all_missing_keys = [] - err_msg = [] - - metadata = getattr(state_dict, '_metadata', None) - state_dict = state_dict.copy() - if metadata is not None: - state_dict._metadata = metadata - - # use _load_from_state_dict to enable checkpoint version control - def load(module, prefix=''): - # recursively check parallel module in case that the model has a - # complicated structure, e.g., nn.Module(nn.Module(DDP)) - if is_module_wrapper(module): - module = module.module - local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) - module._load_from_state_dict( - state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg - ) - for name, child in module._modules.items(): - if child is not None: - load(child, prefix + name + '.') - - load(module) - load = None # break load->load reference cycle - - # ignore "num_batches_tracked" of BN layers - missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] - - if unexpected_keys: - err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') - if missing_keys: - err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') - - rank, _ = get_dist_info() - if len(err_msg) > 0 and rank == 0: - err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') - err_msg = '\n'.join(err_msg) - if strict: - raise RuntimeError(err_msg) - elif logger is not None: - logger.warning(err_msg) - else: - print(err_msg) - - -def load_url_dist(url, model_dir=None): - """In distributed setting, this function only download checkpoint at local - rank 0.""" - rank, world_size = get_dist_info() - rank = int(os.environ.get('LOCAL_RANK', rank)) - if rank == 0: - checkpoint = model_zoo.load_url(url, model_dir=model_dir) - if world_size > 1: - torch.distributed.barrier() - if rank > 0: - checkpoint = model_zoo.load_url(url, model_dir=model_dir) - return checkpoint - - -def load_pavimodel_dist(model_path, map_location=None): - """In distributed setting, this function only download checkpoint at local - rank 0.""" - try: - from pavi import modelcloud - except ImportError: - raise ImportError('Please install pavi to load checkpoint from modelcloud.') - rank, world_size = get_dist_info() - rank = int(os.environ.get('LOCAL_RANK', rank)) - if rank == 0: - model = modelcloud.get(model_path) - with TemporaryDirectory() as tmp_dir: - downloaded_file = osp.join(tmp_dir, model.name) - model.download(downloaded_file) - checkpoint = torch.load(downloaded_file, map_location=map_location) - if world_size > 1: - torch.distributed.barrier() - if rank > 0: - model = modelcloud.get(model_path) - with TemporaryDirectory() as tmp_dir: - downloaded_file = osp.join(tmp_dir, model.name) - model.download(downloaded_file) - checkpoint = torch.load(downloaded_file, map_location=map_location) - return checkpoint - - -def load_fileclient_dist(filename, backend, map_location): - """In distributed setting, this function only download checkpoint at local - rank 0.""" - rank, world_size = get_dist_info() - rank = int(os.environ.get('LOCAL_RANK', rank)) - allowed_backends = ['ceph'] - if backend not in allowed_backends: - raise ValueError(f'Load from Backend {backend} is not supported.') - if rank == 0: - fileclient = FileClient(backend=backend) - buffer = io.BytesIO(fileclient.get(filename)) - checkpoint = torch.load(buffer, map_location=map_location) - if world_size > 1: - torch.distributed.barrier() - if rank > 0: - fileclient = FileClient(backend=backend) - buffer = io.BytesIO(fileclient.get(filename)) - checkpoint = torch.load(buffer, map_location=map_location) - return checkpoint - - -def get_torchvision_models(): - model_urls = dict() - for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): - if ispkg: - continue - _zoo = import_module(f'torchvision.models.{name}') - if hasattr(_zoo, 'model_urls'): - _urls = getattr(_zoo, 'model_urls') - model_urls.update(_urls) - return model_urls - - -def get_external_models(): - mmcv_home = _get_mmcv_home() - default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') - default_urls = load_file(default_json_path) - assert isinstance(default_urls, dict) - external_json_path = osp.join(mmcv_home, 'open_mmlab.json') - if osp.exists(external_json_path): - external_urls = load_file(external_json_path) - assert isinstance(external_urls, dict) - default_urls.update(external_urls) - - return default_urls - - -def get_mmcls_models(): - mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') - mmcls_urls = load_file(mmcls_json_path) - - return mmcls_urls - - -def get_deprecated_model_names(): - deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') - deprecate_urls = load_file(deprecate_json_path) - assert isinstance(deprecate_urls, dict) - - return deprecate_urls - - -def _process_mmcls_checkpoint(checkpoint): - state_dict = checkpoint['state_dict'] - new_state_dict = OrderedDict() - for k, v in state_dict.items(): - if k.startswith('backbone.'): - new_state_dict[k[9:]] = v - new_checkpoint = dict(state_dict=new_state_dict) - - return new_checkpoint - - -def _load_checkpoint(filename, map_location=None): - """Load checkpoint from somewhere (modelzoo, file, url). - - Args: - filename (str): Accept local filepath, URL, ``torchvision://xxx``, - ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for - details. - map_location (str | None): Same as :func:`torch.load`. Default: None. - - Returns: - dict | OrderedDict: The loaded checkpoint. It can be either an - OrderedDict storing model weights or a dict containing other - information, which depends on the checkpoint. - """ - if filename.startswith('modelzoo://'): - warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') - model_urls = get_torchvision_models() - model_name = filename[11:] - checkpoint = load_url_dist(model_urls[model_name]) - elif filename.startswith('torchvision://'): - model_urls = get_torchvision_models() - model_name = filename[14:] - checkpoint = load_url_dist(model_urls[model_name]) - elif filename.startswith('open-mmlab://'): - model_urls = get_external_models() - model_name = filename[13:] - deprecated_urls = get_deprecated_model_names() - if model_name in deprecated_urls: - warnings.warn( - f'open-mmlab://{model_name} is deprecated in favor ' f'of open-mmlab://{deprecated_urls[model_name]}' - ) - model_name = deprecated_urls[model_name] - model_url = model_urls[model_name] - # check if is url - if model_url.startswith(('http://', 'https://')): - checkpoint = load_url_dist(model_url) - else: - filename = osp.join(_get_mmcv_home(), model_url) - if not osp.isfile(filename): - raise IOError(f'{filename} is not a checkpoint file') - checkpoint = torch.load(filename, map_location=map_location) - elif filename.startswith('mmcls://'): - model_urls = get_mmcls_models() - model_name = filename[8:] - checkpoint = load_url_dist(model_urls[model_name]) - checkpoint = _process_mmcls_checkpoint(checkpoint) - elif filename.startswith(('http://', 'https://')): - checkpoint = load_url_dist(filename) - elif filename.startswith('pavi://'): - model_path = filename[7:] - checkpoint = load_pavimodel_dist(model_path, map_location=map_location) - elif filename.startswith('s3://'): - checkpoint = load_fileclient_dist(filename, backend='ceph', map_location=map_location) - else: - if not osp.isfile(filename): - raise IOError(f'{filename} is not a checkpoint file') - checkpoint = torch.load(filename, map_location=map_location) - return checkpoint - - -def load_checkpoint(model, filename, map_location='cpu', strict=False, logger=None): - """Load checkpoint from a file or URI. - - Args: - model (Module): Module to load checkpoint. - filename (str): Accept local filepath, URL, ``torchvision://xxx``, - ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for - details. - map_location (str): Same as :func:`torch.load`. - strict (bool): Whether to allow different params for the model and - checkpoint. - logger (:mod:`logging.Logger` or None): The logger for error message. - - Returns: - dict or OrderedDict: The loaded checkpoint. - """ - checkpoint = _load_checkpoint(filename, map_location) - # OrderedDict is a subclass of dict - if not isinstance(checkpoint, dict): - raise RuntimeError(f'No state_dict found in checkpoint file {filename}') - # get state_dict from checkpoint - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - elif 'model' in checkpoint: - state_dict = checkpoint['model'] - else: - state_dict = checkpoint - # strip prefix of state_dict - if list(state_dict.keys())[0].startswith('module.'): - state_dict = {k[7:]: v for k, v in state_dict.items()} - - # for MoBY, load model of online branch - if sorted(list(state_dict.keys()))[0].startswith('encoder'): - state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')} - - # reshape absolute position embedding - if state_dict.get('absolute_pos_embed') is not None: - absolute_pos_embed = state_dict['absolute_pos_embed'] - N1, L, C1 = absolute_pos_embed.size() - N2, C2, H, W = model.absolute_pos_embed.size() - if N1 != N2 or C1 != C2 or L != H * W: - logger.warning("Error in loading absolute_pos_embed, pass") - else: - state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) - - # interpolate position bias table if needed - relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k] - for table_key in relative_position_bias_table_keys: - table_pretrained = state_dict[table_key] - table_current = model.state_dict()[table_key] - L1, nH1 = table_pretrained.size() - L2, nH2 = table_current.size() - if nH1 != nH2: - logger.warning(f"Error in loading {table_key}, pass") - else: - if L1 != L2: - S1 = int(L1 ** 0.5) - S2 = int(L2 ** 0.5) - table_pretrained_resized = F.interpolate( - table_pretrained.permute(1, 0).view(1, nH1, S1, S1), size=(S2, S2), mode='bicubic' - ) - state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) - - # load state_dict - load_state_dict(model, state_dict, strict, logger) - return checkpoint - - -def weights_to_cpu(state_dict): - """Copy a model state_dict to cpu. - - Args: - state_dict (OrderedDict): Model weights on GPU. - - Returns: - OrderedDict: Model weights on GPU. - """ - state_dict_cpu = OrderedDict() - for key, val in state_dict.items(): - state_dict_cpu[key] = val.cpu() - return state_dict_cpu - - -def _save_to_state_dict(module, destination, prefix, keep_vars): - """Saves module state to `destination` dictionary. - - This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. - - Args: - module (nn.Module): The module to generate state_dict. - destination (dict): A dict where state will be stored. - prefix (str): The prefix for parameters and buffers used in this - module. - """ - for name, param in module._parameters.items(): - if param is not None: - destination[prefix + name] = param if keep_vars else param.detach() - for name, buf in module._buffers.items(): - # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d - if buf is not None: - destination[prefix + name] = buf if keep_vars else buf.detach() - - -def get_state_dict(module, destination=None, prefix='', keep_vars=False): - """Returns a dictionary containing a whole state of the module. - - Both parameters and persistent buffers (e.g. running averages) are - included. Keys are corresponding parameter and buffer names. - - This method is modified from :meth:`torch.nn.Module.state_dict` to - recursively check parallel module in case that the model has a complicated - structure, e.g., nn.Module(nn.Module(DDP)). - - Args: - module (nn.Module): The module to generate state_dict. - destination (OrderedDict): Returned dict for the state of the - module. - prefix (str): Prefix of the key. - keep_vars (bool): Whether to keep the variable property of the - parameters. Default: False. - - Returns: - dict: A dictionary containing a whole state of the module. - """ - # recursively check parallel module in case that the model has a - # complicated structure, e.g., nn.Module(nn.Module(DDP)) - if is_module_wrapper(module): - module = module.module - - # below is the same as torch.nn.Module.state_dict() - if destination is None: - destination = OrderedDict() - destination._metadata = OrderedDict() - destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) - _save_to_state_dict(module, destination, prefix, keep_vars) - for name, child in module._modules.items(): - if child is not None: - get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) - for hook in module._state_dict_hooks.values(): - hook_result = hook(module, destination, prefix, local_metadata) - if hook_result is not None: - destination = hook_result - return destination - - -def save_checkpoint(model, filename, optimizer=None, meta=None): - """Save checkpoint to file. - - The checkpoint will have 3 fields: ``meta``, ``state_dict`` and - ``optimizer``. By default ``meta`` will contain version and time info. - - Args: - model (Module): Module whose params are to be saved. - filename (str): Checkpoint filename. - optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. - meta (dict, optional): Metadata to be saved in checkpoint. - """ - if meta is None: - meta = {} - elif not isinstance(meta, dict): - raise TypeError(f'meta must be a dict or None, but got {type(meta)}') - meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) - - if is_module_wrapper(model): - model = model.module - - if hasattr(model, 'CLASSES') and model.CLASSES is not None: - # save class name to the meta - meta.update(CLASSES=model.CLASSES) - - checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} - # save optimizer state dict in the checkpoint - if isinstance(optimizer, Optimizer): - checkpoint['optimizer'] = optimizer.state_dict() - elif isinstance(optimizer, dict): - checkpoint['optimizer'] = {} - for name, optim in optimizer.items(): - checkpoint['optimizer'][name] = optim.state_dict() - - if filename.startswith('pavi://'): - try: - from pavi import modelcloud - from pavi.exception import NodeNotFoundError - except ImportError: - raise ImportError('Please install pavi to load checkpoint from modelcloud.') - model_path = filename[7:] - root = modelcloud.Folder() - model_dir, model_name = osp.split(model_path) - try: - model = modelcloud.get(model_dir) - except NodeNotFoundError: - model = root.create_training_model(model_dir) - with TemporaryDirectory() as tmp_dir: - checkpoint_file = osp.join(tmp_dir, model_name) - with open(checkpoint_file, 'wb') as f: - torch.save(checkpoint, f) - f.flush() - model.create_file(checkpoint_file, name=model_name) - else: - mmcv.mkdir_or_exist(osp.dirname(filename)) - # immediately flush buffer - with open(filename, 'wb') as f: - torch.save(checkpoint, f) - f.flush() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py deleted file mode 100644 index 1752e7fc7969..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from .inference import inference_segmentor, init_segmentor, show_result_pyplot -from .test import multi_gpu_test, single_gpu_test -from .train import get_root_logger, set_random_seed, train_segmentor - -__all__ = [ - 'get_root_logger', - 'set_random_seed', - 'train_segmentor', - 'init_segmentor', - 'inference_segmentor', - 'multi_gpu_test', - 'single_gpu_test', - 'show_result_pyplot', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py deleted file mode 100644 index 32c6db9f1ccb..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py +++ /dev/null @@ -1,127 +0,0 @@ -import matplotlib.pyplot as plt -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate, scatter -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets.pipelines import Compose -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import build_segmentor - - -def init_segmentor(config, checkpoint=None, device='cuda:0'): - """Initialize a segmentor from config file. - - Args: - config (str or :obj:`mmcv.Config`): Config file path or the config - object. - checkpoint (str, optional): Checkpoint path. If left as None, the model - will not load any weights. - device (str, optional) CPU/CUDA device option. Default 'cuda:0'. - Use 'cpu' for loading model on CPU. - Returns: - nn.Module: The constructed segmentor. - """ - if isinstance(config, str): - config = mmcv.Config.fromfile(config) - elif not isinstance(config, mmcv.Config): - raise TypeError('config must be a filename or Config object, ' 'but got {}'.format(type(config))) - config.model.pretrained = None - config.model.train_cfg = None - model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) - if checkpoint is not None: - checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') - model.CLASSES = checkpoint['meta']['CLASSES'] - model.PALETTE = checkpoint['meta']['PALETTE'] - model.cfg = config # save the config in the model for convenience - model.to(device) - model.eval() - return model - - -class LoadImage: - """A simple pipeline to load image.""" - - def __call__(self, results): - """Call function to load images into results. - - Args: - results (dict): A result dict contains the file name - of the image to be read. - - Returns: - dict: ``results`` will be returned containing loaded image. - """ - - if isinstance(results['img'], str): - results['filename'] = results['img'] - results['ori_filename'] = results['img'] - else: - results['filename'] = None - results['ori_filename'] = None - img = mmcv.imread(results['img']) - results['img'] = img - results['img_shape'] = img.shape - results['ori_shape'] = img.shape - return results - - -def inference_segmentor(model, img): - """Inference image(s) with the segmentor. - - Args: - model (nn.Module): The loaded segmentor. - imgs (str/ndarray or list[str/ndarray]): Either image files or loaded - images. - - Returns: - (list[Tensor]): The segmentation result. - """ - cfg = model.cfg - device = next(model.parameters()).device # model device - # build the data pipeline - test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] - test_pipeline = Compose(test_pipeline) - # prepare data - data = dict(img=img) - data = test_pipeline(data) - data = collate([data], samples_per_gpu=1) - if next(model.parameters()).is_cuda: - # scatter to specified GPU - data = scatter(data, [device])[0] - else: - data['img_metas'] = [i.data[0] for i in data['img_metas']] - - # forward the model - with torch.no_grad(): - result = model(return_loss=False, rescale=True, **data) - return result - - -def show_result_pyplot(model, img, result, palette=None, fig_size=(15, 10), opacity=0.5, title='', block=True): - """Visualize the segmentation results on the image. - - Args: - model (nn.Module): The loaded segmentor. - img (str or np.ndarray): Image filename or loaded image. - result (list): The segmentation result. - palette (list[list[int]]] | None): The palette of segmentation - map. If None is given, random palette will be generated. - Default: None - fig_size (tuple): Figure size of the pyplot figure. - opacity(float): Opacity of painted segmentation map. - Default 0.5. - Must be in (0, 1] range. - title (str): The title of pyplot figure. - Default is ''. - block (bool): Whether to block the pyplot figure. - Default is True. - """ - if hasattr(model, 'module'): - model = model.module - img = model.show_result(img, result, palette=palette, show=False, opacity=opacity) - # plt.figure(figsize=fig_size) - # plt.imshow(mmcv.bgr2rgb(img)) - # plt.title(title) - # plt.tight_layout() - # plt.show(block=block) - return mmcv.bgr2rgb(img) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py deleted file mode 100644 index 961b5e0a781b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py +++ /dev/null @@ -1,217 +0,0 @@ -import os.path as osp -import pickle -import shutil -import tempfile - -import numpy as np -import torch -import torch.distributed as dist - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import tensor2imgs -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info - - -def np2tmp(array, temp_file_name=None): - """Save ndarray to local numpy file. - - Args: - array (ndarray): Ndarray to save. - temp_file_name (str): Numpy file name. If 'temp_file_name=None', this - function will generate a file name with tempfile.NamedTemporaryFile - to save ndarray. Default: None. - - Returns: - str: The numpy file name. - """ - - if temp_file_name is None: - temp_file_name = tempfile.NamedTemporaryFile(suffix='.npy', delete=False).name - np.save(temp_file_name, array) - return temp_file_name - - -def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5): - """Test with single GPU. - - Args: - model (nn.Module): Model to be tested. - data_loader (utils.data.Dataloader): Pytorch data loader. - show (bool): Whether show results during inference. Default: False. - out_dir (str, optional): If specified, the results will be dumped into - the directory to save output results. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - opacity(float): Opacity of painted segmentation map. - Default 0.5. - Must be in (0, 1] range. - Returns: - list: The prediction results. - """ - - model.eval() - results = [] - dataset = data_loader.dataset - prog_bar = mmcv.ProgressBar(len(dataset)) - for i, data in enumerate(data_loader): - with torch.no_grad(): - result = model(return_loss=False, **data) - - if show or out_dir: - img_tensor = data['img'][0] - img_metas = data['img_metas'][0].data[0] - imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) - assert len(imgs) == len(img_metas) - - for img, img_meta in zip(imgs, img_metas): - h, w, _ = img_meta['img_shape'] - img_show = img[:h, :w, :] - - ori_h, ori_w = img_meta['ori_shape'][:-1] - img_show = mmcv.imresize(img_show, (ori_w, ori_h)) - - if out_dir: - out_file = osp.join(out_dir, img_meta['ori_filename']) - else: - out_file = None - - model.module.show_result( - img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity - ) - - if isinstance(result, list): - if efficient_test: - result = [np2tmp(_) for _ in result] - results.extend(result) - else: - if efficient_test: - result = np2tmp(result) - results.append(result) - - batch_size = len(result) - for _ in range(batch_size): - prog_bar.update() - return results - - -def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False, efficient_test=False): - """Test model with multiple gpus. - - This method tests model with multiple gpus and collects the results - under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' - it encodes results to gpu tensors and use gpu communication for results - collection. On cpu mode it saves the results on different gpus to 'tmpdir' - and collects them by the rank 0 worker. - - Args: - model (nn.Module): Model to be tested. - data_loader (utils.data.Dataloader): Pytorch data loader. - tmpdir (str): Path of directory to save the temporary results from - different gpus under cpu mode. - gpu_collect (bool): Option to use either gpu or cpu to collect results. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - - Returns: - list: The prediction results. - """ - - model.eval() - results = [] - dataset = data_loader.dataset - rank, world_size = get_dist_info() - if rank == 0: - prog_bar = mmcv.ProgressBar(len(dataset)) - for i, data in enumerate(data_loader): - with torch.no_grad(): - result = model(return_loss=False, rescale=True, **data) - - if isinstance(result, list): - if efficient_test: - result = [np2tmp(_) for _ in result] - results.extend(result) - else: - if efficient_test: - result = np2tmp(result) - results.append(result) - - if rank == 0: - batch_size = data['img'][0].size(0) - for _ in range(batch_size * world_size): - prog_bar.update() - - # collect results from all ranks - if gpu_collect: - results = collect_results_gpu(results, len(dataset)) - else: - results = collect_results_cpu(results, len(dataset), tmpdir) - return results - - -def collect_results_cpu(result_part, size, tmpdir=None): - """Collect results with CPU.""" - rank, world_size = get_dist_info() - # create a tmp dir if it is not specified - if tmpdir is None: - MAX_LEN = 512 - # 32 is whitespace - dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') - if rank == 0: - tmpdir = tempfile.mkdtemp() - tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') - dir_tensor[: len(tmpdir)] = tmpdir - dist.broadcast(dir_tensor, 0) - tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() - else: - mmcv.mkdir_or_exist(tmpdir) - # dump the part result to the dir - mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) - dist.barrier() - # collect all parts - if rank != 0: - return None - else: - # load results of all parts from tmp dir - part_list = [] - for i in range(world_size): - part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) - part_list.append(mmcv.load(part_file)) - # sort the results - ordered_results = [] - for res in zip(*part_list): - ordered_results.extend(list(res)) - # the dataloader may pad some samples - ordered_results = ordered_results[:size] - # remove tmp dir - shutil.rmtree(tmpdir) - return ordered_results - - -def collect_results_gpu(result_part, size): - """Collect results with GPU.""" - rank, world_size = get_dist_info() - # dump result part to tensor with pickle - part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') - # gather all result part tensor shape - shape_tensor = torch.tensor(part_tensor.shape, device='cuda') - shape_list = [shape_tensor.clone() for _ in range(world_size)] - dist.all_gather(shape_list, shape_tensor) - # padding result part tensor to max length - shape_max = torch.tensor(shape_list).max() - part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') - part_send[: shape_tensor[0]] = part_tensor - part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] - # gather all result part - dist.all_gather(part_recv_list, part_send) - - if rank == 0: - part_list = [] - for recv, shape in zip(part_recv_list, shape_list): - part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes())) - # sort the results - ordered_results = [] - for res in zip(*part_list): - ordered_results.extend(list(res)) - # the dataloader may pad some samples - ordered_results = ordered_results[:size] - return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py deleted file mode 100644 index 1ed5228bcfb6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py +++ /dev/null @@ -1,109 +0,0 @@ -import random -import warnings - -import numpy as np -import torch - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import ( - MMDataParallel, - MMDistributedDataParallel, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import build_optimizer, build_runner -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import DistEvalHook, EvalHook -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets import build_dataloader, build_dataset -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - - -def set_random_seed(seed, deterministic=False): - """Set random seed. - - Args: - seed (int): Seed to be used. - deterministic (bool): Whether to set the deterministic option for - CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` - to True and `torch.backends.cudnn.benchmark` to False. - Default: False. - """ - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - if deterministic: - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - -def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): - """Launch segmentor training.""" - logger = get_root_logger(cfg.log_level) - - # prepare data loaders - dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] - data_loaders = [ - build_dataloader( - ds, - cfg.data.samples_per_gpu, - cfg.data.workers_per_gpu, - # cfg.gpus will be ignored if distributed - len(cfg.gpu_ids), - dist=distributed, - seed=cfg.seed, - drop_last=True, - ) - for ds in dataset - ] - - # put model on gpus - if distributed: - find_unused_parameters = cfg.get('find_unused_parameters', False) - # Sets the `find_unused_parameters` parameter in - # torch.nn.parallel.DistributedDataParallel - model = MMDistributedDataParallel( - model.cuda(), - device_ids=[torch.cuda.current_device()], - broadcast_buffers=False, - find_unused_parameters=find_unused_parameters, - ) - else: - model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) - - # build runner - optimizer = build_optimizer(model, cfg.optimizer) - - if cfg.get('runner') is None: - cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} - warnings.warn( - 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning - ) - - runner = build_runner( - cfg.runner, - default_args=dict( - model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta - ), - ) - - # register hooks - runner.register_training_hooks( - cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None) - ) - - # an ugly walkaround to make the .log and .log.json filenames the same - runner.timestamp = timestamp - - # register eval hooks - if validate: - val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) - val_dataloader = build_dataloader( - val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False - ) - eval_cfg = cfg.get('evaluation', {}) - eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' - eval_hook = DistEvalHook if distributed else EvalHook - runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') - - if cfg.resume_from: - runner.resume(cfg.resume_from) - elif cfg.load_from: - runner.load_checkpoint(cfg.load_from) - runner.run(data_loaders, cfg.workflow) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py deleted file mode 100644 index 965605587211..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .evaluation import * # noqa: F401, F403 -from .seg import * # noqa: F401, F403 -from .utils import * # noqa: F401, F403 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py deleted file mode 100644 index c77282a68a12..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from .class_names import get_classes, get_palette -from .eval_hooks import DistEvalHook, EvalHook -from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou - -__all__ = [ - 'EvalHook', - 'DistEvalHook', - 'mean_dice', - 'mean_iou', - 'mean_fscore', - 'eval_metrics', - 'get_classes', - 'get_palette', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py deleted file mode 100644 index 7ebbe83b1851..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py +++ /dev/null @@ -1,458 +0,0 @@ -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -def cityscapes_classes(): - """Cityscapes class names for external use.""" - return [ - 'road', - 'sidewalk', - 'building', - 'wall', - 'fence', - 'pole', - 'traffic light', - 'traffic sign', - 'vegetation', - 'terrain', - 'sky', - 'person', - 'rider', - 'car', - 'truck', - 'bus', - 'train', - 'motorcycle', - 'bicycle', - ] - - -def ade_classes(): - """ADE20K class names for external use.""" - return [ - 'wall', - 'building', - 'sky', - 'floor', - 'tree', - 'ceiling', - 'road', - 'bed ', - 'windowpane', - 'grass', - 'cabinet', - 'sidewalk', - 'person', - 'earth', - 'door', - 'table', - 'mountain', - 'plant', - 'curtain', - 'chair', - 'car', - 'water', - 'painting', - 'sofa', - 'shelf', - 'house', - 'sea', - 'mirror', - 'rug', - 'field', - 'armchair', - 'seat', - 'fence', - 'desk', - 'rock', - 'wardrobe', - 'lamp', - 'bathtub', - 'railing', - 'cushion', - 'base', - 'box', - 'column', - 'signboard', - 'chest of drawers', - 'counter', - 'sand', - 'sink', - 'skyscraper', - 'fireplace', - 'refrigerator', - 'grandstand', - 'path', - 'stairs', - 'runway', - 'case', - 'pool table', - 'pillow', - 'screen door', - 'stairway', - 'river', - 'bridge', - 'bookcase', - 'blind', - 'coffee table', - 'toilet', - 'flower', - 'book', - 'hill', - 'bench', - 'countertop', - 'stove', - 'palm', - 'kitchen island', - 'computer', - 'swivel chair', - 'boat', - 'bar', - 'arcade machine', - 'hovel', - 'bus', - 'towel', - 'light', - 'truck', - 'tower', - 'chandelier', - 'awning', - 'streetlight', - 'booth', - 'television receiver', - 'airplane', - 'dirt track', - 'apparel', - 'pole', - 'land', - 'bannister', - 'escalator', - 'ottoman', - 'bottle', - 'buffet', - 'poster', - 'stage', - 'van', - 'ship', - 'fountain', - 'conveyer belt', - 'canopy', - 'washer', - 'plaything', - 'swimming pool', - 'stool', - 'barrel', - 'basket', - 'waterfall', - 'tent', - 'bag', - 'minibike', - 'cradle', - 'oven', - 'ball', - 'food', - 'step', - 'tank', - 'trade name', - 'microwave', - 'pot', - 'animal', - 'bicycle', - 'lake', - 'dishwasher', - 'screen', - 'blanket', - 'sculpture', - 'hood', - 'sconce', - 'vase', - 'traffic light', - 'tray', - 'ashcan', - 'fan', - 'pier', - 'crt screen', - 'plate', - 'monitor', - 'bulletin board', - 'shower', - 'radiator', - 'glass', - 'clock', - 'flag', - ] - - -def voc_classes(): - """Pascal VOC class names for external use.""" - return [ - 'background', - 'aeroplane', - 'bicycle', - 'bird', - 'boat', - 'bottle', - 'bus', - 'car', - 'cat', - 'chair', - 'cow', - 'diningtable', - 'dog', - 'horse', - 'motorbike', - 'person', - 'pottedplant', - 'sheep', - 'sofa', - 'train', - 'tvmonitor', - ] - - -def cityscapes_palette(): - """Cityscapes palette for external use.""" - return [ - [128, 64, 128], - [244, 35, 232], - [70, 70, 70], - [102, 102, 156], - [190, 153, 153], - [153, 153, 153], - [250, 170, 30], - [220, 220, 0], - [107, 142, 35], - [152, 251, 152], - [70, 130, 180], - [220, 20, 60], - [255, 0, 0], - [0, 0, 142], - [0, 0, 70], - [0, 60, 100], - [0, 80, 100], - [0, 0, 230], - [119, 11, 32], - ] - - -def ade_palette(): - """ADE20K palette for external use.""" - return [ - [120, 120, 120], - [180, 120, 120], - [6, 230, 230], - [80, 50, 50], - [4, 200, 3], - [120, 120, 80], - [140, 140, 140], - [204, 5, 255], - [230, 230, 230], - [4, 250, 7], - [224, 5, 255], - [235, 255, 7], - [150, 5, 61], - [120, 120, 70], - [8, 255, 51], - [255, 6, 82], - [143, 255, 140], - [204, 255, 4], - [255, 51, 7], - [204, 70, 3], - [0, 102, 200], - [61, 230, 250], - [255, 6, 51], - [11, 102, 255], - [255, 7, 71], - [255, 9, 224], - [9, 7, 230], - [220, 220, 220], - [255, 9, 92], - [112, 9, 255], - [8, 255, 214], - [7, 255, 224], - [255, 184, 6], - [10, 255, 71], - [255, 41, 10], - [7, 255, 255], - [224, 255, 8], - [102, 8, 255], - [255, 61, 6], - [255, 194, 7], - [255, 122, 8], - [0, 255, 20], - [255, 8, 41], - [255, 5, 153], - [6, 51, 255], - [235, 12, 255], - [160, 150, 20], - [0, 163, 255], - [140, 140, 140], - [250, 10, 15], - [20, 255, 0], - [31, 255, 0], - [255, 31, 0], - [255, 224, 0], - [153, 255, 0], - [0, 0, 255], - [255, 71, 0], - [0, 235, 255], - [0, 173, 255], - [31, 0, 255], - [11, 200, 200], - [255, 82, 0], - [0, 255, 245], - [0, 61, 255], - [0, 255, 112], - [0, 255, 133], - [255, 0, 0], - [255, 163, 0], - [255, 102, 0], - [194, 255, 0], - [0, 143, 255], - [51, 255, 0], - [0, 82, 255], - [0, 255, 41], - [0, 255, 173], - [10, 0, 255], - [173, 255, 0], - [0, 255, 153], - [255, 92, 0], - [255, 0, 255], - [255, 0, 245], - [255, 0, 102], - [255, 173, 0], - [255, 0, 20], - [255, 184, 184], - [0, 31, 255], - [0, 255, 61], - [0, 71, 255], - [255, 0, 204], - [0, 255, 194], - [0, 255, 82], - [0, 10, 255], - [0, 112, 255], - [51, 0, 255], - [0, 194, 255], - [0, 122, 255], - [0, 255, 163], - [255, 153, 0], - [0, 255, 10], - [255, 112, 0], - [143, 255, 0], - [82, 0, 255], - [163, 255, 0], - [255, 235, 0], - [8, 184, 170], - [133, 0, 255], - [0, 255, 92], - [184, 0, 255], - [255, 0, 31], - [0, 184, 255], - [0, 214, 255], - [255, 0, 112], - [92, 255, 0], - [0, 224, 255], - [112, 224, 255], - [70, 184, 160], - [163, 0, 255], - [153, 0, 255], - [71, 255, 0], - [255, 0, 163], - [255, 204, 0], - [255, 0, 143], - [0, 255, 235], - [133, 255, 0], - [255, 0, 235], - [245, 0, 255], - [255, 0, 122], - [255, 245, 0], - [10, 190, 212], - [214, 255, 0], - [0, 204, 255], - [20, 0, 255], - [255, 255, 0], - [0, 153, 255], - [0, 41, 255], - [0, 255, 204], - [41, 0, 255], - [41, 255, 0], - [173, 0, 255], - [0, 245, 255], - [71, 0, 255], - [122, 0, 255], - [0, 255, 184], - [0, 92, 255], - [184, 255, 0], - [0, 133, 255], - [255, 214, 0], - [25, 194, 194], - [102, 255, 0], - [92, 0, 255], - ] - - -def voc_palette(): - """Pascal VOC palette for external use.""" - return [ - [0, 0, 0], - [128, 0, 0], - [0, 128, 0], - [128, 128, 0], - [0, 0, 128], - [128, 0, 128], - [0, 128, 128], - [128, 128, 128], - [64, 0, 0], - [192, 0, 0], - [64, 128, 0], - [192, 128, 0], - [64, 0, 128], - [192, 0, 128], - [64, 128, 128], - [192, 128, 128], - [0, 64, 0], - [128, 64, 0], - [0, 192, 0], - [128, 192, 0], - [0, 64, 128], - ] - - -dataset_aliases = { - 'cityscapes': ['cityscapes'], - 'ade': ['ade', 'ade20k'], - 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], -} - - -def get_classes(dataset): - """Get class names of a dataset.""" - alias2name = {} - for name, aliases in dataset_aliases.items(): - for alias in aliases: - alias2name[alias] = name - - if mmcv.is_str(dataset): - if dataset in alias2name: - labels = eval(alias2name[dataset] + '_classes()') - else: - raise ValueError(f'Unrecognized dataset: {dataset}') - else: - raise TypeError(f'dataset must a str, but got {type(dataset)}') - return labels - - -def get_palette(dataset): - """Get class palette (RGB) of a dataset.""" - alias2name = {} - for name, aliases in dataset_aliases.items(): - for alias in aliases: - alias2name[alias] = name - - if mmcv.is_str(dataset): - if dataset in alias2name: - labels = eval(alias2name[dataset] + '_palette()') - else: - raise ValueError(f'Unrecognized dataset: {dataset}') - else: - raise TypeError(f'dataset must a str, but got {type(dataset)}') - return labels diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py deleted file mode 100644 index 34b01f515383..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py +++ /dev/null @@ -1,108 +0,0 @@ -import os.path as osp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import DistEvalHook as _DistEvalHook -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import EvalHook as _EvalHook - - -class EvalHook(_EvalHook): - """Single GPU EvalHook, with efficient test support. - - Args: - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - Default: False. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - Returns: - list: The prediction results. - """ - - greater_keys = ['mIoU', 'mAcc', 'aAcc'] - - def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): - super().__init__(*args, by_epoch=by_epoch, **kwargs) - self.efficient_test = efficient_test - - def after_train_iter(self, runner): - """After train epoch hook. - - Override default ``single_gpu_test``. - """ - if self.by_epoch or not self.every_n_iters(runner, self.interval): - return - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test - - runner.log_buffer.clear() - results = single_gpu_test(runner.model, self.dataloader, show=False, efficient_test=self.efficient_test) - self.evaluate(runner, results) - - def after_train_epoch(self, runner): - """After train epoch hook. - - Override default ``single_gpu_test``. - """ - if not self.by_epoch or not self.every_n_epochs(runner, self.interval): - return - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test - - runner.log_buffer.clear() - results = single_gpu_test(runner.model, self.dataloader, show=False) - self.evaluate(runner, results) - - -class DistEvalHook(_DistEvalHook): - """Distributed EvalHook, with efficient test support. - - Args: - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - Default: False. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - Returns: - list: The prediction results. - """ - - greater_keys = ['mIoU', 'mAcc', 'aAcc'] - - def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): - super().__init__(*args, by_epoch=by_epoch, **kwargs) - self.efficient_test = efficient_test - - def after_train_iter(self, runner): - """After train epoch hook. - - Override default ``multi_gpu_test``. - """ - if self.by_epoch or not self.every_n_iters(runner, self.interval): - return - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test - - runner.log_buffer.clear() - results = multi_gpu_test( - runner.model, - self.dataloader, - tmpdir=osp.join(runner.work_dir, '.eval_hook'), - gpu_collect=self.gpu_collect, - efficient_test=self.efficient_test, - ) - if runner.rank == 0: - print('\n') - self.evaluate(runner, results) - - def after_train_epoch(self, runner): - """After train epoch hook. - - Override default ``multi_gpu_test``. - """ - if not self.by_epoch or not self.every_n_epochs(runner, self.interval): - return - from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test - - runner.log_buffer.clear() - results = multi_gpu_test( - runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect - ) - if runner.rank == 0: - print('\n') - self.evaluate(runner, results) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py deleted file mode 100644 index 06b9755207e1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py +++ /dev/null @@ -1,297 +0,0 @@ -from collections import OrderedDict - -import numpy as np -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -def f_score(precision, recall, beta=1): - """calcuate the f-score value. - - Args: - precision (float | torch.Tensor): The precision value. - recall (float | torch.Tensor): The recall value. - beta (int): Determines the weight of recall in the combined score. - Default: False. - - Returns: - [torch.tensor]: The f-score value. - """ - score = (1 + beta ** 2) * (precision * recall) / ((beta ** 2 * precision) + recall) - return score - - -def intersect_and_union(pred_label, label, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False): - """Calculate intersection and Union. - - Args: - pred_label (ndarray | str): Prediction segmentation map - or predict result filename. - label (ndarray | str): Ground truth segmentation map - or label filename. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. The parameter will - work only when label is str. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. The parameter will - work only when label is str. Default: False. - - Returns: - torch.Tensor: The intersection of prediction and ground truth - histogram on all classes. - torch.Tensor: The union of prediction and ground truth histogram on - all classes. - torch.Tensor: The prediction histogram on all classes. - torch.Tensor: The ground truth histogram on all classes. - """ - - if isinstance(pred_label, str): - pred_label = torch.from_numpy(np.load(pred_label)) - else: - pred_label = torch.from_numpy((pred_label)) - - if isinstance(label, str): - label = torch.from_numpy(mmcv.imread(label, flag='unchanged', backend='pillow')) - else: - label = torch.from_numpy(label) - - if label_map is not None: - for old_id, new_id in label_map.items(): - label[label == old_id] = new_id - if reduce_zero_label: - label[label == 0] = 255 - label = label - 1 - label[label == 254] = 255 - - mask = label != ignore_index - pred_label = pred_label[mask] - label = label[mask] - - intersect = pred_label[pred_label == label] - area_intersect = torch.histc(intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_pred_label = torch.histc(pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_label = torch.histc(label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_union = area_pred_label + area_label - area_intersect - return area_intersect, area_union, area_pred_label, area_label - - -def total_intersect_and_union( - results, gt_seg_maps, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False -): - """Calculate Total Intersection and Union. - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - - Returns: - ndarray: The intersection of prediction and ground truth histogram - on all classes. - ndarray: The union of prediction and ground truth histogram on all - classes. - ndarray: The prediction histogram on all classes. - ndarray: The ground truth histogram on all classes. - """ - num_imgs = len(results) - assert len(gt_seg_maps) == num_imgs - total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) - total_area_union = torch.zeros((num_classes,), dtype=torch.float64) - total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) - total_area_label = torch.zeros((num_classes,), dtype=torch.float64) - for i in range(num_imgs): - area_intersect, area_union, area_pred_label, area_label = intersect_and_union( - results[i], gt_seg_maps[i], num_classes, ignore_index, label_map, reduce_zero_label - ) - total_area_intersect += area_intersect - total_area_union += area_union - total_area_pred_label += area_pred_label - total_area_label += area_label - return total_area_intersect, total_area_union, total_area_pred_label, total_area_label - - -def mean_iou( - results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False -): - """Calculate Mean Intersection and Union (mIoU) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - - Returns: - dict[str, float | ndarray]: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category IoU, shape (num_classes, ). - """ - iou_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mIoU'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label, - ) - return iou_result - - -def mean_dice( - results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False -): - """Calculate Mean Dice (mDice) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - - Returns: - dict[str, float | ndarray]: Default metrics. - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category dice, shape (num_classes, ). - """ - - dice_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mDice'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label, - ) - return dice_result - - -def mean_fscore( - results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False, beta=1 -): - """Calculate Mean Intersection and Union (mIoU) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - beta (int): Determines the weight of recall in the combined score. - Default: False. - - - Returns: - dict[str, float | ndarray]: Default metrics. - float: Overall accuracy on all images. - ndarray: Per category recall, shape (num_classes, ). - ndarray: Per category precision, shape (num_classes, ). - ndarray: Per category f-score, shape (num_classes, ). - """ - fscore_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mFscore'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label, - beta=beta, - ) - return fscore_result - - -def eval_metrics( - results, - gt_seg_maps, - num_classes, - ignore_index, - metrics=['mIoU'], - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False, - beta=1, -): - """Calculate evaluation metrics - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - Returns: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category evaluation metrics, shape (num_classes, ). - """ - if isinstance(metrics, str): - metrics = [metrics] - allowed_metrics = ['mIoU', 'mDice', 'mFscore'] - if not set(metrics).issubset(set(allowed_metrics)): - raise KeyError('metrics {} is not supported'.format(metrics)) - - total_area_intersect, total_area_union, total_area_pred_label, total_area_label = total_intersect_and_union( - results, gt_seg_maps, num_classes, ignore_index, label_map, reduce_zero_label - ) - all_acc = total_area_intersect.sum() / total_area_label.sum() - ret_metrics = OrderedDict({'aAcc': all_acc}) - for metric in metrics: - if metric == 'mIoU': - iou = total_area_intersect / total_area_union - acc = total_area_intersect / total_area_label - ret_metrics['IoU'] = iou - ret_metrics['Acc'] = acc - elif metric == 'mDice': - dice = 2 * total_area_intersect / (total_area_pred_label + total_area_label) - acc = total_area_intersect / total_area_label - ret_metrics['Dice'] = dice - ret_metrics['Acc'] = acc - elif metric == 'mFscore': - precision = total_area_intersect / total_area_pred_label - recall = total_area_intersect / total_area_label - f_value = torch.tensor([f_score(x[0], x[1], beta) for x in zip(precision, recall)]) - ret_metrics['Fscore'] = f_value - ret_metrics['Precision'] = precision - ret_metrics['Recall'] = recall - - ret_metrics = {metric: value.numpy() for metric, value in ret_metrics.items()} - if nan_to_num is not None: - ret_metrics = OrderedDict( - {metric: np.nan_to_num(metric_value, nan=nan_to_num) for metric, metric_value in ret_metrics.items()} - ) - return ret_metrics diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py deleted file mode 100644 index 93bc129b685e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .builder import build_pixel_sampler -from .sampler import BasePixelSampler, OHEMPixelSampler - -__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py deleted file mode 100644 index 908e885cb71d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py +++ /dev/null @@ -1,8 +0,0 @@ -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg - -PIXEL_SAMPLERS = Registry('pixel sampler') - - -def build_pixel_sampler(cfg, **default_args): - """Build pixel sampler for segmentation map.""" - return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py deleted file mode 100644 index 332b242c03d1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base_pixel_sampler import BasePixelSampler -from .ohem_pixel_sampler import OHEMPixelSampler - -__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py deleted file mode 100644 index b75b1566c9f1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import ABCMeta, abstractmethod - - -class BasePixelSampler(metaclass=ABCMeta): - """Base class of pixel sampler.""" - - def __init__(self, **kwargs): - pass - - @abstractmethod - def sample(self, seg_logit, seg_label): - """Placeholder for sample function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py deleted file mode 100644 index cfab50a07df6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py +++ /dev/null @@ -1,76 +0,0 @@ -import torch -import torch.nn.functional as F - -from ..builder import PIXEL_SAMPLERS -from .base_pixel_sampler import BasePixelSampler - - -@PIXEL_SAMPLERS.register_module() -class OHEMPixelSampler(BasePixelSampler): - """Online Hard Example Mining Sampler for segmentation. - - Args: - context (nn.Module): The context of sampler, subclass of - :obj:`BaseDecodeHead`. - thresh (float, optional): The threshold for hard example selection. - Below which, are prediction with low confidence. If not - specified, the hard examples will be pixels of top ``min_kept`` - loss. Default: None. - min_kept (int, optional): The minimum number of predictions to keep. - Default: 100000. - """ - - def __init__(self, context, thresh=None, min_kept=100000): - super(OHEMPixelSampler, self).__init__() - self.context = context - assert min_kept > 1 - self.thresh = thresh - self.min_kept = min_kept - - def sample(self, seg_logit, seg_label): - """Sample pixels that have high loss or with low prediction confidence. - - Args: - seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) - seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) - - Returns: - torch.Tensor: segmentation weight, shape (N, H, W) - """ - with torch.no_grad(): - assert seg_logit.shape[2:] == seg_label.shape[2:] - assert seg_label.shape[1] == 1 - seg_label = seg_label.squeeze(1).long() - batch_kept = self.min_kept * seg_label.size(0) - valid_mask = seg_label != self.context.ignore_index - seg_weight = seg_logit.new_zeros(size=seg_label.size()) - valid_seg_weight = seg_weight[valid_mask] - if self.thresh is not None: - seg_prob = F.softmax(seg_logit, dim=1) - - tmp_seg_label = seg_label.clone().unsqueeze(1) - tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 - seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) - sort_prob, sort_indices = seg_prob[valid_mask].sort() - - if sort_prob.numel() > 0: - min_threshold = sort_prob[min(batch_kept, sort_prob.numel() - 1)] - else: - min_threshold = 0.0 - threshold = max(min_threshold, self.thresh) - valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.0 - else: - losses = self.context.loss_decode( - seg_logit, - seg_label, - weight=None, - ignore_index=self.context.ignore_index, - reduction_override='none', - ) - # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa - _, sort_indices = losses[valid_mask].sort(descending=True) - valid_seg_weight[sort_indices[:batch_kept]] = 1.0 - - seg_weight[valid_mask] = valid_seg_weight - - return seg_weight diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py deleted file mode 100644 index f2678b321c29..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .misc import add_prefix - -__all__ = ['add_prefix'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py deleted file mode 100644 index eb862a82bd47..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py +++ /dev/null @@ -1,17 +0,0 @@ -def add_prefix(inputs, prefix): - """Add prefix for dict. - - Args: - inputs (dict): The input dict with str keys. - prefix (str): The prefix to add. - - Returns: - - dict: The dict with keys updated with ``prefix``. - """ - - outputs = dict() - for name, value in inputs.items(): - outputs[f'{prefix}.{name}'] = value - - return outputs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py deleted file mode 100644 index 3612a6e86e94..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from .ade import ADE20KDataset -from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset -from .chase_db1 import ChaseDB1Dataset -from .cityscapes import CityscapesDataset -from .custom import CustomDataset -from .dataset_wrappers import ConcatDataset, RepeatDataset -from .drive import DRIVEDataset -from .hrf import HRFDataset -from .pascal_context import PascalContextDataset, PascalContextDataset59 -from .stare import STAREDataset -from .voc import PascalVOCDataset - -__all__ = [ - 'CustomDataset', - 'build_dataloader', - 'ConcatDataset', - 'RepeatDataset', - 'DATASETS', - 'build_dataset', - 'PIPELINES', - 'CityscapesDataset', - 'PascalVOCDataset', - 'ADE20KDataset', - 'PascalContextDataset', - 'PascalContextDataset59', - 'ChaseDB1Dataset', - 'DRIVEDataset', - 'HRFDataset', - 'STAREDataset', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py deleted file mode 100644 index 6a69943b1ce7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py +++ /dev/null @@ -1,322 +0,0 @@ -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class ADE20KDataset(CustomDataset): - """ADE20K dataset. - - In segmentation map annotation for ADE20K, 0 stands for background, which - is not included in 150 categories. ``reduce_zero_label`` is fixed to True. - The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to - '.png'. - """ - - CLASSES = ( - 'wall', - 'building', - 'sky', - 'floor', - 'tree', - 'ceiling', - 'road', - 'bed ', - 'windowpane', - 'grass', - 'cabinet', - 'sidewalk', - 'person', - 'earth', - 'door', - 'table', - 'mountain', - 'plant', - 'curtain', - 'chair', - 'car', - 'water', - 'painting', - 'sofa', - 'shelf', - 'house', - 'sea', - 'mirror', - 'rug', - 'field', - 'armchair', - 'seat', - 'fence', - 'desk', - 'rock', - 'wardrobe', - 'lamp', - 'bathtub', - 'railing', - 'cushion', - 'base', - 'box', - 'column', - 'signboard', - 'chest of drawers', - 'counter', - 'sand', - 'sink', - 'skyscraper', - 'fireplace', - 'refrigerator', - 'grandstand', - 'path', - 'stairs', - 'runway', - 'case', - 'pool table', - 'pillow', - 'screen door', - 'stairway', - 'river', - 'bridge', - 'bookcase', - 'blind', - 'coffee table', - 'toilet', - 'flower', - 'book', - 'hill', - 'bench', - 'countertop', - 'stove', - 'palm', - 'kitchen island', - 'computer', - 'swivel chair', - 'boat', - 'bar', - 'arcade machine', - 'hovel', - 'bus', - 'towel', - 'light', - 'truck', - 'tower', - 'chandelier', - 'awning', - 'streetlight', - 'booth', - 'television receiver', - 'airplane', - 'dirt track', - 'apparel', - 'pole', - 'land', - 'bannister', - 'escalator', - 'ottoman', - 'bottle', - 'buffet', - 'poster', - 'stage', - 'van', - 'ship', - 'fountain', - 'conveyer belt', - 'canopy', - 'washer', - 'plaything', - 'swimming pool', - 'stool', - 'barrel', - 'basket', - 'waterfall', - 'tent', - 'bag', - 'minibike', - 'cradle', - 'oven', - 'ball', - 'food', - 'step', - 'tank', - 'trade name', - 'microwave', - 'pot', - 'animal', - 'bicycle', - 'lake', - 'dishwasher', - 'screen', - 'blanket', - 'sculpture', - 'hood', - 'sconce', - 'vase', - 'traffic light', - 'tray', - 'ashcan', - 'fan', - 'pier', - 'crt screen', - 'plate', - 'monitor', - 'bulletin board', - 'shower', - 'radiator', - 'glass', - 'clock', - 'flag', - ) - - PALETTE = [ - [120, 120, 120], - [180, 120, 120], - [6, 230, 230], - [80, 50, 50], - [4, 200, 3], - [120, 120, 80], - [140, 140, 140], - [204, 5, 255], - [230, 230, 230], - [4, 250, 7], - [224, 5, 255], - [235, 255, 7], - [150, 5, 61], - [120, 120, 70], - [8, 255, 51], - [255, 6, 82], - [143, 255, 140], - [204, 255, 4], - [255, 51, 7], - [204, 70, 3], - [0, 102, 200], - [61, 230, 250], - [255, 6, 51], - [11, 102, 255], - [255, 7, 71], - [255, 9, 224], - [9, 7, 230], - [220, 220, 220], - [255, 9, 92], - [112, 9, 255], - [8, 255, 214], - [7, 255, 224], - [255, 184, 6], - [10, 255, 71], - [255, 41, 10], - [7, 255, 255], - [224, 255, 8], - [102, 8, 255], - [255, 61, 6], - [255, 194, 7], - [255, 122, 8], - [0, 255, 20], - [255, 8, 41], - [255, 5, 153], - [6, 51, 255], - [235, 12, 255], - [160, 150, 20], - [0, 163, 255], - [140, 140, 140], - [250, 10, 15], - [20, 255, 0], - [31, 255, 0], - [255, 31, 0], - [255, 224, 0], - [153, 255, 0], - [0, 0, 255], - [255, 71, 0], - [0, 235, 255], - [0, 173, 255], - [31, 0, 255], - [11, 200, 200], - [255, 82, 0], - [0, 255, 245], - [0, 61, 255], - [0, 255, 112], - [0, 255, 133], - [255, 0, 0], - [255, 163, 0], - [255, 102, 0], - [194, 255, 0], - [0, 143, 255], - [51, 255, 0], - [0, 82, 255], - [0, 255, 41], - [0, 255, 173], - [10, 0, 255], - [173, 255, 0], - [0, 255, 153], - [255, 92, 0], - [255, 0, 255], - [255, 0, 245], - [255, 0, 102], - [255, 173, 0], - [255, 0, 20], - [255, 184, 184], - [0, 31, 255], - [0, 255, 61], - [0, 71, 255], - [255, 0, 204], - [0, 255, 194], - [0, 255, 82], - [0, 10, 255], - [0, 112, 255], - [51, 0, 255], - [0, 194, 255], - [0, 122, 255], - [0, 255, 163], - [255, 153, 0], - [0, 255, 10], - [255, 112, 0], - [143, 255, 0], - [82, 0, 255], - [163, 255, 0], - [255, 235, 0], - [8, 184, 170], - [133, 0, 255], - [0, 255, 92], - [184, 0, 255], - [255, 0, 31], - [0, 184, 255], - [0, 214, 255], - [255, 0, 112], - [92, 255, 0], - [0, 224, 255], - [112, 224, 255], - [70, 184, 160], - [163, 0, 255], - [153, 0, 255], - [71, 255, 0], - [255, 0, 163], - [255, 204, 0], - [255, 0, 143], - [0, 255, 235], - [133, 255, 0], - [255, 0, 235], - [245, 0, 255], - [255, 0, 122], - [255, 245, 0], - [10, 190, 212], - [214, 255, 0], - [0, 204, 255], - [20, 0, 255], - [255, 255, 0], - [0, 153, 255], - [0, 41, 255], - [0, 255, 204], - [41, 0, 255], - [41, 255, 0], - [173, 0, 255], - [0, 245, 255], - [71, 0, 255], - [122, 0, 255], - [0, 255, 184], - [0, 92, 255], - [184, 255, 0], - [0, 133, 255], - [255, 214, 0], - [25, 194, 194], - [102, 255, 0], - [92, 0, 255], - ] - - def __init__(self, **kwargs): - super(ADE20KDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', reduce_zero_label=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py deleted file mode 100644 index c076a55fe358..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py +++ /dev/null @@ -1,172 +0,0 @@ -import copy -import platform -import random -from functools import partial - -import numpy as np -from torch.utils.data import DistributedSampler - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( - DataLoader, - PoolDataLoader, -) - -if platform.system() != 'Windows': - # https://github.com/pytorch/pytorch/issues/973 - import resource - - rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) - hard_limit = rlimit[1] - soft_limit = min(4096, hard_limit) - resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) - -DATASETS = Registry('dataset') -PIPELINES = Registry('pipeline') - - -def _concat_dataset(cfg, default_args=None): - """Build :obj:`ConcatDataset by.""" - from .dataset_wrappers import ConcatDataset - - img_dir = cfg['img_dir'] - ann_dir = cfg.get('ann_dir', None) - split = cfg.get('split', None) - num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 - if ann_dir is not None: - num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 - else: - num_ann_dir = 0 - if split is not None: - num_split = len(split) if isinstance(split, (list, tuple)) else 1 - else: - num_split = 0 - if num_img_dir > 1: - assert num_img_dir == num_ann_dir or num_ann_dir == 0 - assert num_img_dir == num_split or num_split == 0 - else: - assert num_split == num_ann_dir or num_ann_dir <= 1 - num_dset = max(num_split, num_img_dir) - - datasets = [] - for i in range(num_dset): - data_cfg = copy.deepcopy(cfg) - if isinstance(img_dir, (list, tuple)): - data_cfg['img_dir'] = img_dir[i] - if isinstance(ann_dir, (list, tuple)): - data_cfg['ann_dir'] = ann_dir[i] - if isinstance(split, (list, tuple)): - data_cfg['split'] = split[i] - datasets.append(build_dataset(data_cfg, default_args)) - - return ConcatDataset(datasets) - - -def build_dataset(cfg, default_args=None): - """Build datasets.""" - from .dataset_wrappers import ConcatDataset, RepeatDataset - - if isinstance(cfg, (list, tuple)): - dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) - elif cfg['type'] == 'RepeatDataset': - dataset = RepeatDataset(build_dataset(cfg['dataset'], default_args), cfg['times']) - elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance(cfg.get('split', None), (list, tuple)): - dataset = _concat_dataset(cfg, default_args) - else: - dataset = build_from_cfg(cfg, DATASETS, default_args) - - return dataset - - -def build_dataloader( - dataset, - samples_per_gpu, - workers_per_gpu, - num_gpus=1, - dist=True, - shuffle=True, - seed=None, - drop_last=False, - pin_memory=True, - dataloader_type='PoolDataLoader', - **kwargs, -): - """Build PyTorch DataLoader. - - In distributed training, each GPU/process has a dataloader. - In non-distributed training, there is only one dataloader for all GPUs. - - Args: - dataset (Dataset): A PyTorch dataset. - samples_per_gpu (int): Number of training samples on each GPU, i.e., - batch size of each GPU. - workers_per_gpu (int): How many subprocesses to use for data loading - for each GPU. - num_gpus (int): Number of GPUs. Only used in non-distributed training. - dist (bool): Distributed training/test or not. Default: True. - shuffle (bool): Whether to shuffle the data at every epoch. - Default: True. - seed (int | None): Seed to be used. Default: None. - drop_last (bool): Whether to drop the last incomplete batch in epoch. - Default: False - pin_memory (bool): Whether to use pin_memory in DataLoader. - Default: True - dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' - kwargs: any keyword argument to be used to initialize DataLoader - - Returns: - DataLoader: A PyTorch dataloader. - """ - rank, world_size = get_dist_info() - if dist: - sampler = DistributedSampler(dataset, world_size, rank, shuffle=shuffle) - shuffle = False - batch_size = samples_per_gpu - num_workers = workers_per_gpu - else: - sampler = None - batch_size = num_gpus * samples_per_gpu - num_workers = num_gpus * workers_per_gpu - - init_fn = partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None - - assert dataloader_type in ('DataLoader', 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' - - if dataloader_type == 'PoolDataLoader': - dataloader = PoolDataLoader - elif dataloader_type == 'DataLoader': - dataloader = DataLoader - - data_loader = dataloader( - dataset, - batch_size=batch_size, - sampler=sampler, - num_workers=num_workers, - collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), - pin_memory=pin_memory, - shuffle=shuffle, - worker_init_fn=init_fn, - drop_last=drop_last, - **kwargs, - ) - - return data_loader - - -def worker_init_fn(worker_id, num_workers, rank, seed): - """Worker init func for dataloader. - - The seed of each worker equals to num_worker * rank + worker_id + user_seed - - Args: - worker_id (int): Worker id. - num_workers (int): Number of workers. - rank (int): The rank of current process. - seed (int): The random seed to use. - """ - - worker_seed = num_workers * rank + worker_id + seed - np.random.seed(worker_seed) - random.seed(worker_seed) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py deleted file mode 100644 index 906e51485f72..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py +++ /dev/null @@ -1,25 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class ChaseDB1Dataset(CustomDataset): - """Chase_db1 dataset. - - In segmentation map annotation for Chase_db1, 0 stands for background, - which is included in 2 categories. ``reduce_zero_label`` is fixed to False. - The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to - '_1stHO.png'. - """ - - CLASSES = ('background', 'vessel') - - PALETTE = [[120, 120, 120], [6, 230, 230]] - - def __init__(self, **kwargs): - super(ChaseDB1Dataset, self).__init__( - img_suffix='.png', seg_map_suffix='_1stHO.png', reduce_zero_label=False, **kwargs - ) - assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py deleted file mode 100644 index 4a8a0ecd589b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py +++ /dev/null @@ -1,241 +0,0 @@ -import os.path as osp -import tempfile - -import numpy as np -from PIL import Image - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class CityscapesDataset(CustomDataset): - """Cityscapes dataset. - - The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is - fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. - """ - - CLASSES = ( - 'road', - 'sidewalk', - 'building', - 'wall', - 'fence', - 'pole', - 'traffic light', - 'traffic sign', - 'vegetation', - 'terrain', - 'sky', - 'person', - 'rider', - 'car', - 'truck', - 'bus', - 'train', - 'motorcycle', - 'bicycle', - ) - - PALETTE = [ - [128, 64, 128], - [244, 35, 232], - [70, 70, 70], - [102, 102, 156], - [190, 153, 153], - [153, 153, 153], - [250, 170, 30], - [220, 220, 0], - [107, 142, 35], - [152, 251, 152], - [70, 130, 180], - [220, 20, 60], - [255, 0, 0], - [0, 0, 142], - [0, 0, 70], - [0, 60, 100], - [0, 80, 100], - [0, 0, 230], - [119, 11, 32], - ] - - def __init__(self, **kwargs): - super(CityscapesDataset, self).__init__( - img_suffix='_leftImg8bit.png', seg_map_suffix='_gtFine_labelTrainIds.png', **kwargs - ) - - @staticmethod - def _convert_to_label_id(result): - """Convert trainId to id for cityscapes.""" - if isinstance(result, str): - result = np.load(result) - import cityscapesscripts.helpers.labels as CSLabels - - result_copy = result.copy() - for trainId, label in CSLabels.trainId2label.items(): - result_copy[result == trainId] = label.id - - return result_copy - - def results2img(self, results, imgfile_prefix, to_label_id): - """Write the segmentation results to images. - - Args: - results (list[list | tuple | ndarray]): Testing results of the - dataset. - imgfile_prefix (str): The filename prefix of the png files. - If the prefix is "somepath/xxx", - the png files will be named "somepath/xxx.png". - to_label_id (bool): whether convert output to label_id for - submission - - Returns: - list[str: str]: result txt files which contains corresponding - semantic segmentation images. - """ - mmcv.mkdir_or_exist(imgfile_prefix) - result_files = [] - prog_bar = mmcv.ProgressBar(len(self)) - for idx in range(len(self)): - result = results[idx] - if to_label_id: - result = self._convert_to_label_id(result) - filename = self.img_infos[idx]['filename'] - basename = osp.splitext(osp.basename(filename))[0] - - png_filename = osp.join(imgfile_prefix, f'{basename}.png') - - output = Image.fromarray(result.astype(np.uint8)).convert('P') - import cityscapesscripts.helpers.labels as CSLabels - - palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) - for label_id, label in CSLabels.id2label.items(): - palette[label_id] = label.color - - output.putpalette(palette) - output.save(png_filename) - result_files.append(png_filename) - prog_bar.update() - - return result_files - - def format_results(self, results, imgfile_prefix=None, to_label_id=True): - """Format the results into dir (standard format for Cityscapes - evaluation). - - Args: - results (list): Testing results of the dataset. - imgfile_prefix (str | None): The prefix of images files. It - includes the file path and the prefix of filename, e.g., - "a/b/prefix". If not specified, a temp file will be created. - Default: None. - to_label_id (bool): whether convert output to label_id for - submission. Default: False - - Returns: - tuple: (result_files, tmp_dir), result_files is a list containing - the image paths, tmp_dir is the temporal directory created - for saving json/png files when img_prefix is not specified. - """ - - assert isinstance(results, list), 'results must be a list' - assert len(results) == len(self), ( - 'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}' - ) - - if imgfile_prefix is None: - tmp_dir = tempfile.TemporaryDirectory() - imgfile_prefix = tmp_dir.name - else: - tmp_dir = None - result_files = self.results2img(results, imgfile_prefix, to_label_id) - - return result_files, tmp_dir - - def evaluate(self, results, metric='mIoU', logger=None, imgfile_prefix=None, efficient_test=False): - """Evaluation in Cityscapes/default protocol. - - Args: - results (list): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | None | str): Logger used for printing - related information during evaluation. Default: None. - imgfile_prefix (str | None): The prefix of output image file, - for cityscapes evaluation only. It includes the file path and - the prefix of filename, e.g., "a/b/prefix". - If results are evaluated with cityscapes protocol, it would be - the prefix of output png files. The output files would be - png images under folder "a/b/prefix/xxx.png", where "xxx" is - the image name of cityscapes. If not specified, a temp file - will be created for evaluation. - Default: None. - - Returns: - dict[str, float]: Cityscapes/default metrics. - """ - - eval_results = dict() - metrics = metric.copy() if isinstance(metric, list) else [metric] - if 'cityscapes' in metrics: - eval_results.update(self._evaluate_cityscapes(results, logger, imgfile_prefix)) - metrics.remove('cityscapes') - if len(metrics) > 0: - eval_results.update(super(CityscapesDataset, self).evaluate(results, metrics, logger, efficient_test)) - - return eval_results - - def _evaluate_cityscapes(self, results, logger, imgfile_prefix): - """Evaluation in Cityscapes protocol. - - Args: - results (list): Testing results of the dataset. - logger (logging.Logger | str | None): Logger used for printing - related information during evaluation. Default: None. - imgfile_prefix (str | None): The prefix of output image file - - Returns: - dict[str: float]: Cityscapes evaluation results. - """ - try: - import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa - except ImportError: - raise ImportError('Please run "pip install cityscapesscripts" to ' 'install cityscapesscripts first.') - msg = 'Evaluating in Cityscapes style' - if logger is None: - msg = '\n' + msg - print_log(msg, logger=logger) - - result_files, tmp_dir = self.format_results(results, imgfile_prefix) - - if tmp_dir is None: - result_dir = imgfile_prefix - else: - result_dir = tmp_dir.name - - eval_results = dict() - print_log(f'Evaluating results under {result_dir} ...', logger=logger) - - CSEval.args.evalInstLevelScore = True - CSEval.args.predictionPath = osp.abspath(result_dir) - CSEval.args.evalPixelAccuracy = True - CSEval.args.JSONOutput = False - - seg_map_list = [] - pred_list = [] - - # when evaluating with official cityscapesscripts, - # **_gtFine_labelIds.png is used - for seg_map in mmcv.scandir(self.ann_dir, 'gtFine_labelIds.png', recursive=True): - seg_map_list.append(osp.join(self.ann_dir, seg_map)) - pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) - - eval_results.update(CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) - - if tmp_dir is not None: - tmp_dir.cleanup() - - return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py deleted file mode 100644 index 28680a832ca5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py +++ /dev/null @@ -1,392 +0,0 @@ -import os -import os.path as osp -from collections import OrderedDict -from functools import reduce - -import numpy as np -from prettytable import PrettyTable -from torch.utils.data import Dataset - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import eval_metrics -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from .builder import DATASETS -from .pipelines import Compose - - -@DATASETS.register_module() -class CustomDataset(Dataset): - """Custom dataset for semantic segmentation. An example of file structure - is as followed. - - .. code-block:: none - - ├── data - │ ├── my_dataset - │ │ ├── img_dir - │ │ │ ├── train - │ │ │ │ ├── xxx{img_suffix} - │ │ │ │ ├── yyy{img_suffix} - │ │ │ │ ├── zzz{img_suffix} - │ │ │ ├── val - │ │ ├── ann_dir - │ │ │ ├── train - │ │ │ │ ├── xxx{seg_map_suffix} - │ │ │ │ ├── yyy{seg_map_suffix} - │ │ │ │ ├── zzz{seg_map_suffix} - │ │ │ ├── val - - The img/gt_semantic_seg pair of CustomDataset should be of the same - except suffix. A valid img/gt_semantic_seg filename pair should be like - ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included - in the suffix). If split is given, then ``xxx`` is specified in txt file. - Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. - Please refer to ``docs/tutorials/new_dataset.md`` for more details. - - - Args: - pipeline (list[dict]): Processing pipeline - img_dir (str): Path to image directory - img_suffix (str): Suffix of images. Default: '.jpg' - ann_dir (str, optional): Path to annotation directory. Default: None - seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' - split (str, optional): Split txt file. If split is specified, only - file with suffix in the splits will be loaded. Otherwise, all - images in img_dir/ann_dir will be loaded. Default: None - data_root (str, optional): Data root for img_dir/ann_dir. Default: - None. - test_mode (bool): If test_mode=True, gt wouldn't be loaded. - ignore_index (int): The label index to be ignored. Default: 255 - reduce_zero_label (bool): Whether to mark label zero as ignored. - Default: False - classes (str | Sequence[str], optional): Specify classes to load. - If is None, ``cls.CLASSES`` will be used. Default: None. - palette (Sequence[Sequence[int]]] | np.ndarray | None): - The palette of segmentation map. If None is given, and - self.PALETTE is None, random palette will be generated. - Default: None - """ - - CLASSES = None - - PALETTE = None - - def __init__( - self, - pipeline, - img_dir, - img_suffix='.jpg', - ann_dir=None, - seg_map_suffix='.png', - split=None, - data_root=None, - test_mode=False, - ignore_index=255, - reduce_zero_label=False, - classes=None, - palette=None, - ): - self.pipeline = Compose(pipeline) - self.img_dir = img_dir - self.img_suffix = img_suffix - self.ann_dir = ann_dir - self.seg_map_suffix = seg_map_suffix - self.split = split - self.data_root = data_root - self.test_mode = test_mode - self.ignore_index = ignore_index - self.reduce_zero_label = reduce_zero_label - self.label_map = None - self.CLASSES, self.PALETTE = self.get_classes_and_palette(classes, palette) - - # join paths if data_root is specified - if self.data_root is not None: - if not osp.isabs(self.img_dir): - self.img_dir = osp.join(self.data_root, self.img_dir) - if not (self.ann_dir is None or osp.isabs(self.ann_dir)): - self.ann_dir = osp.join(self.data_root, self.ann_dir) - if not (self.split is None or osp.isabs(self.split)): - self.split = osp.join(self.data_root, self.split) - - # load annotations - self.img_infos = self.load_annotations( - self.img_dir, self.img_suffix, self.ann_dir, self.seg_map_suffix, self.split - ) - - def __len__(self): - """Total number of samples of data.""" - return len(self.img_infos) - - def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, split): - """Load annotation from directory. - - Args: - img_dir (str): Path to image directory - img_suffix (str): Suffix of images. - ann_dir (str|None): Path to annotation directory. - seg_map_suffix (str|None): Suffix of segmentation maps. - split (str|None): Split txt file. If split is specified, only file - with suffix in the splits will be loaded. Otherwise, all images - in img_dir/ann_dir will be loaded. Default: None - - Returns: - list[dict]: All image info of dataset. - """ - - img_infos = [] - if split is not None: - with open(split) as f: - for line in f: - img_name = line.strip() - img_info = dict(filename=img_name + img_suffix) - if ann_dir is not None: - seg_map = img_name + seg_map_suffix - img_info['ann'] = dict(seg_map=seg_map) - img_infos.append(img_info) - else: - for img in mmcv.scandir(img_dir, img_suffix, recursive=True): - img_info = dict(filename=img) - if ann_dir is not None: - seg_map = img.replace(img_suffix, seg_map_suffix) - img_info['ann'] = dict(seg_map=seg_map) - img_infos.append(img_info) - - print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) - return img_infos - - def get_ann_info(self, idx): - """Get annotation by index. - - Args: - idx (int): Index of data. - - Returns: - dict: Annotation info of specified index. - """ - - return self.img_infos[idx]['ann'] - - def pre_pipeline(self, results): - """Prepare results dict for pipeline.""" - results['seg_fields'] = [] - results['img_prefix'] = self.img_dir - results['seg_prefix'] = self.ann_dir - if self.custom_classes: - results['label_map'] = self.label_map - - def __getitem__(self, idx): - """Get training/test data after pipeline. - - Args: - idx (int): Index of data. - - Returns: - dict: Training/test data (with annotation if `test_mode` is set - False). - """ - - if self.test_mode: - return self.prepare_test_img(idx) - else: - return self.prepare_train_img(idx) - - def prepare_train_img(self, idx): - """Get training data and annotations after pipeline. - - Args: - idx (int): Index of data. - - Returns: - dict: Training data and annotation after pipeline with new keys - introduced by pipeline. - """ - - img_info = self.img_infos[idx] - ann_info = self.get_ann_info(idx) - results = dict(img_info=img_info, ann_info=ann_info) - self.pre_pipeline(results) - return self.pipeline(results) - - def prepare_test_img(self, idx): - """Get testing data after pipeline. - - Args: - idx (int): Index of data. - - Returns: - dict: Testing data after pipeline with new keys introduced by - pipeline. - """ - - img_info = self.img_infos[idx] - results = dict(img_info=img_info) - self.pre_pipeline(results) - return self.pipeline(results) - - def format_results(self, results, **kwargs): - """Place holder to format result to dataset specific output.""" - - def get_gt_seg_maps(self, efficient_test=False): - """Get ground truth segmentation maps for evaluation.""" - gt_seg_maps = [] - for img_info in self.img_infos: - seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map']) - if efficient_test: - gt_seg_map = seg_map - else: - gt_seg_map = mmcv.imread(seg_map, flag='unchanged', backend='pillow') - gt_seg_maps.append(gt_seg_map) - return gt_seg_maps - - def get_classes_and_palette(self, classes=None, palette=None): - """Get class names of current dataset. - - Args: - classes (Sequence[str] | str | None): If classes is None, use - default CLASSES defined by builtin dataset. If classes is a - string, take it as a file name. The file contains the name of - classes where each line contains one class name. If classes is - a tuple or list, override the CLASSES defined by the dataset. - palette (Sequence[Sequence[int]]] | np.ndarray | None): - The palette of segmentation map. If None is given, random - palette will be generated. Default: None - """ - if classes is None: - self.custom_classes = False - return self.CLASSES, self.PALETTE - - self.custom_classes = True - if isinstance(classes, str): - # take it as a file path - class_names = mmcv.list_from_file(classes) - elif isinstance(classes, (tuple, list)): - class_names = classes - else: - raise ValueError(f'Unsupported type {type(classes)} of classes.') - - if self.CLASSES: - if not set(classes).issubset(self.CLASSES): - raise ValueError('classes is not a subset of CLASSES.') - - # dictionary, its keys are the old label ids and its values - # are the new label ids. - # used for changing pixel labels in load_annotations. - self.label_map = {} - for i, c in enumerate(self.CLASSES): - if c not in class_names: - self.label_map[i] = -1 - else: - self.label_map[i] = classes.index(c) - - palette = self.get_palette_for_custom_classes(class_names, palette) - - return class_names, palette - - def get_palette_for_custom_classes(self, class_names, palette=None): - - if self.label_map is not None: - # return subset of palette - palette = [] - for old_id, new_id in sorted(self.label_map.items(), key=lambda x: x[1]): - if new_id != -1: - palette.append(self.PALETTE[old_id]) - palette = type(self.PALETTE)(palette) - - elif palette is None: - if self.PALETTE is None: - palette = np.random.randint(0, 255, size=(len(class_names), 3)) - else: - palette = self.PALETTE - - return palette - - def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): - """Evaluate the dataset. - - Args: - results (list): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. 'mIoU', - 'mDice' and 'mFscore' are supported. - logger (logging.Logger | None | str): Logger used for printing - related information during evaluation. Default: None. - - Returns: - dict[str, float]: Default metrics. - """ - - if isinstance(metric, str): - metric = [metric] - allowed_metrics = ['mIoU', 'mDice', 'mFscore'] - if not set(metric).issubset(set(allowed_metrics)): - raise KeyError('metric {} is not supported'.format(metric)) - eval_results = {} - gt_seg_maps = self.get_gt_seg_maps(efficient_test) - if self.CLASSES is None: - num_classes = len(reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) - else: - num_classes = len(self.CLASSES) - ret_metrics = eval_metrics( - results, - gt_seg_maps, - num_classes, - self.ignore_index, - metric, - label_map=self.label_map, - reduce_zero_label=self.reduce_zero_label, - ) - - if self.CLASSES is None: - class_names = tuple(range(num_classes)) - else: - class_names = self.CLASSES - - # summary table - ret_metrics_summary = OrderedDict( - { - ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) - for ret_metric, ret_metric_value in ret_metrics.items() - } - ) - - # each class table - ret_metrics.pop('aAcc', None) - ret_metrics_class = OrderedDict( - {ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items()} - ) - ret_metrics_class.update({'Class': class_names}) - ret_metrics_class.move_to_end('Class', last=False) - - # for logger - class_table_data = PrettyTable() - for key, val in ret_metrics_class.items(): - class_table_data.add_column(key, val) - - summary_table_data = PrettyTable() - for key, val in ret_metrics_summary.items(): - if key == 'aAcc': - summary_table_data.add_column(key, [val]) - else: - summary_table_data.add_column('m' + key, [val]) - - print_log('per class results:', logger) - print_log('\n' + class_table_data.get_string(), logger=logger) - print_log('Summary:', logger) - print_log('\n' + summary_table_data.get_string(), logger=logger) - - # each metric dict - for key, value in ret_metrics_summary.items(): - if key == 'aAcc': - eval_results[key] = value / 100.0 - else: - eval_results['m' + key] = value / 100.0 - - ret_metrics_class.pop('Class', None) - for key, value in ret_metrics_class.items(): - eval_results.update({key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names)}) - - if mmcv.is_list_of(results, str): - for file_name in results: - os.remove(file_name) - return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py deleted file mode 100644 index d6a5e957ec3b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py +++ /dev/null @@ -1,50 +0,0 @@ -from torch.utils.data.dataset import ConcatDataset as _ConcatDataset - -from .builder import DATASETS - - -@DATASETS.register_module() -class ConcatDataset(_ConcatDataset): - """A wrapper of concatenated dataset. - - Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but - concat the group flag for image aspect ratio. - - Args: - datasets (list[:obj:`Dataset`]): A list of datasets. - """ - - def __init__(self, datasets): - super(ConcatDataset, self).__init__(datasets) - self.CLASSES = datasets[0].CLASSES - self.PALETTE = datasets[0].PALETTE - - -@DATASETS.register_module() -class RepeatDataset(object): - """A wrapper of repeated dataset. - - The length of repeated dataset will be `times` larger than the original - dataset. This is useful when the data loading time is long but the dataset - is small. Using RepeatDataset can reduce the data loading time between - epochs. - - Args: - dataset (:obj:`Dataset`): The dataset to be repeated. - times (int): Repeat times. - """ - - def __init__(self, dataset, times): - self.dataset = dataset - self.times = times - self.CLASSES = dataset.CLASSES - self.PALETTE = dataset.PALETTE - self._ori_len = len(self.dataset) - - def __getitem__(self, idx): - """Get item from original dataset.""" - return self.dataset[idx % self._ori_len] - - def __len__(self): - """The length is multiplied by ``times``""" - return self.times * self._ori_len diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py deleted file mode 100644 index 9cb073329ef0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py +++ /dev/null @@ -1,25 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class DRIVEDataset(CustomDataset): - """DRIVE dataset. - - In segmentation map annotation for DRIVE, 0 stands for background, which is - included in 2 categories. ``reduce_zero_label`` is fixed to False. The - ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to - '_manual1.png'. - """ - - CLASSES = ('background', 'vessel') - - PALETTE = [[120, 120, 120], [6, 230, 230]] - - def __init__(self, **kwargs): - super(DRIVEDataset, self).__init__( - img_suffix='.png', seg_map_suffix='_manual1.png', reduce_zero_label=False, **kwargs - ) - assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py deleted file mode 100644 index b67616f5f58f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py +++ /dev/null @@ -1,23 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class HRFDataset(CustomDataset): - """HRF dataset. - - In segmentation map annotation for HRF, 0 stands for background, which is - included in 2 categories. ``reduce_zero_label`` is fixed to False. The - ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to - '.png'. - """ - - CLASSES = ('background', 'vessel') - - PALETTE = [[120, 120, 120], [6, 230, 230]] - - def __init__(self, **kwargs): - super(HRFDataset, self).__init__(img_suffix='.png', seg_map_suffix='.png', reduce_zero_label=False, **kwargs) - assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py deleted file mode 100644 index 35028ac9b15a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py +++ /dev/null @@ -1,294 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class PascalContextDataset(CustomDataset): - """PascalContext dataset. - - In segmentation map annotation for PascalContext, 0 stands for background, - which is included in 60 categories. ``reduce_zero_label`` is fixed to - False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is - fixed to '.png'. - - Args: - split (str): Split txt file for PascalContext. - """ - - CLASSES = ( - 'background', - 'aeroplane', - 'bag', - 'bed', - 'bedclothes', - 'bench', - 'bicycle', - 'bird', - 'boat', - 'book', - 'bottle', - 'building', - 'bus', - 'cabinet', - 'car', - 'cat', - 'ceiling', - 'chair', - 'cloth', - 'computer', - 'cow', - 'cup', - 'curtain', - 'dog', - 'door', - 'fence', - 'floor', - 'flower', - 'food', - 'grass', - 'ground', - 'horse', - 'keyboard', - 'light', - 'motorbike', - 'mountain', - 'mouse', - 'person', - 'plate', - 'platform', - 'pottedplant', - 'road', - 'rock', - 'sheep', - 'shelves', - 'sidewalk', - 'sign', - 'sky', - 'snow', - 'sofa', - 'table', - 'track', - 'train', - 'tree', - 'truck', - 'tvmonitor', - 'wall', - 'water', - 'window', - 'wood', - ) - - PALETTE = [ - [120, 120, 120], - [180, 120, 120], - [6, 230, 230], - [80, 50, 50], - [4, 200, 3], - [120, 120, 80], - [140, 140, 140], - [204, 5, 255], - [230, 230, 230], - [4, 250, 7], - [224, 5, 255], - [235, 255, 7], - [150, 5, 61], - [120, 120, 70], - [8, 255, 51], - [255, 6, 82], - [143, 255, 140], - [204, 255, 4], - [255, 51, 7], - [204, 70, 3], - [0, 102, 200], - [61, 230, 250], - [255, 6, 51], - [11, 102, 255], - [255, 7, 71], - [255, 9, 224], - [9, 7, 230], - [220, 220, 220], - [255, 9, 92], - [112, 9, 255], - [8, 255, 214], - [7, 255, 224], - [255, 184, 6], - [10, 255, 71], - [255, 41, 10], - [7, 255, 255], - [224, 255, 8], - [102, 8, 255], - [255, 61, 6], - [255, 194, 7], - [255, 122, 8], - [0, 255, 20], - [255, 8, 41], - [255, 5, 153], - [6, 51, 255], - [235, 12, 255], - [160, 150, 20], - [0, 163, 255], - [140, 140, 140], - [250, 10, 15], - [20, 255, 0], - [31, 255, 0], - [255, 31, 0], - [255, 224, 0], - [153, 255, 0], - [0, 0, 255], - [255, 71, 0], - [0, 235, 255], - [0, 173, 255], - [31, 0, 255], - ] - - def __init__(self, split, **kwargs): - super(PascalContextDataset, self).__init__( - img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=False, **kwargs - ) - assert osp.exists(self.img_dir) and self.split is not None - - -@DATASETS.register_module() -class PascalContextDataset59(CustomDataset): - """PascalContext dataset. - - In segmentation map annotation for PascalContext, 0 stands for background, - which is included in 60 categories. ``reduce_zero_label`` is fixed to - False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is - fixed to '.png'. - - Args: - split (str): Split txt file for PascalContext. - """ - - CLASSES = ( - 'aeroplane', - 'bag', - 'bed', - 'bedclothes', - 'bench', - 'bicycle', - 'bird', - 'boat', - 'book', - 'bottle', - 'building', - 'bus', - 'cabinet', - 'car', - 'cat', - 'ceiling', - 'chair', - 'cloth', - 'computer', - 'cow', - 'cup', - 'curtain', - 'dog', - 'door', - 'fence', - 'floor', - 'flower', - 'food', - 'grass', - 'ground', - 'horse', - 'keyboard', - 'light', - 'motorbike', - 'mountain', - 'mouse', - 'person', - 'plate', - 'platform', - 'pottedplant', - 'road', - 'rock', - 'sheep', - 'shelves', - 'sidewalk', - 'sign', - 'sky', - 'snow', - 'sofa', - 'table', - 'track', - 'train', - 'tree', - 'truck', - 'tvmonitor', - 'wall', - 'water', - 'window', - 'wood', - ) - - PALETTE = [ - [180, 120, 120], - [6, 230, 230], - [80, 50, 50], - [4, 200, 3], - [120, 120, 80], - [140, 140, 140], - [204, 5, 255], - [230, 230, 230], - [4, 250, 7], - [224, 5, 255], - [235, 255, 7], - [150, 5, 61], - [120, 120, 70], - [8, 255, 51], - [255, 6, 82], - [143, 255, 140], - [204, 255, 4], - [255, 51, 7], - [204, 70, 3], - [0, 102, 200], - [61, 230, 250], - [255, 6, 51], - [11, 102, 255], - [255, 7, 71], - [255, 9, 224], - [9, 7, 230], - [220, 220, 220], - [255, 9, 92], - [112, 9, 255], - [8, 255, 214], - [7, 255, 224], - [255, 184, 6], - [10, 255, 71], - [255, 41, 10], - [7, 255, 255], - [224, 255, 8], - [102, 8, 255], - [255, 61, 6], - [255, 194, 7], - [255, 122, 8], - [0, 255, 20], - [255, 8, 41], - [255, 5, 153], - [6, 51, 255], - [235, 12, 255], - [160, 150, 20], - [0, 163, 255], - [140, 140, 140], - [250, 10, 15], - [20, 255, 0], - [31, 255, 0], - [255, 31, 0], - [255, 224, 0], - [153, 255, 0], - [0, 0, 255], - [255, 71, 0], - [0, 235, 255], - [0, 173, 255], - [31, 0, 255], - ] - - def __init__(self, split, **kwargs): - super(PascalContextDataset59, self).__init__( - img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=True, **kwargs - ) - assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py deleted file mode 100644 index 52eb533242b3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from .compose import Compose -from .formating import Collect, ImageToTensor, ToDataContainer, ToTensor, Transpose, to_tensor -from .loading import LoadAnnotations, LoadImageFromFile -from .test_time_aug import MultiScaleFlipAug -from .transforms import ( - CLAHE, - AdjustGamma, - Normalize, - Pad, - PhotoMetricDistortion, - RandomCrop, - RandomFlip, - RandomRotate, - Rerange, - Resize, - RGB2Gray, - SegRescale, -) - -__all__ = [ - 'Compose', - 'to_tensor', - 'ToTensor', - 'ImageToTensor', - 'ToDataContainer', - 'Transpose', - 'Collect', - 'LoadAnnotations', - 'LoadImageFromFile', - 'MultiScaleFlipAug', - 'Resize', - 'RandomFlip', - 'Pad', - 'RandomCrop', - 'Normalize', - 'SegRescale', - 'PhotoMetricDistortion', - 'RandomRotate', - 'AdjustGamma', - 'CLAHE', - 'Rerange', - 'RGB2Gray', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py deleted file mode 100644 index c3b11a9870a5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py +++ /dev/null @@ -1,51 +0,0 @@ -import collections - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg - -from ..builder import PIPELINES - - -@PIPELINES.register_module() -class Compose(object): - """Compose multiple transforms sequentially. - - Args: - transforms (Sequence[dict | callable]): Sequence of transform object or - config dict to be composed. - """ - - def __init__(self, transforms): - assert isinstance(transforms, collections.abc.Sequence) - self.transforms = [] - for transform in transforms: - if isinstance(transform, dict): - transform = build_from_cfg(transform, PIPELINES) - self.transforms.append(transform) - elif callable(transform): - self.transforms.append(transform) - else: - raise TypeError('transform must be callable or a dict') - - def __call__(self, data): - """Call function to apply transforms sequentially. - - Args: - data (dict): A result dict contains the data to transform. - - Returns: - dict: Transformed data. - """ - - for t in self.transforms: - data = t(data) - if data is None: - return None - return data - - def __repr__(self): - format_string = self.__class__.__name__ + '(' - for t in self.transforms: - format_string += '\n' - format_string += f' {t}' - format_string += '\n)' - return format_string diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py deleted file mode 100644 index e5222a69bec6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py +++ /dev/null @@ -1,294 +0,0 @@ -from collections.abc import Sequence - -import numpy as np -import torch - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import DataContainer as DC - -from ..builder import PIPELINES - - -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - - Args: - data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to - be converted. - """ - - if isinstance(data, torch.Tensor): - return data - elif isinstance(data, np.ndarray): - return torch.from_numpy(data) - elif isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - elif isinstance(data, int): - return torch.LongTensor([data]) - elif isinstance(data, float): - return torch.FloatTensor([data]) - else: - raise TypeError(f'type {type(data)} cannot be converted to tensor.') - - -@PIPELINES.register_module() -class ToTensor(object): - """Convert some results to :obj:`torch.Tensor` by given keys. - - Args: - keys (Sequence[str]): Keys that need to be converted to Tensor. - """ - - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Call function to convert data in results to :obj:`torch.Tensor`. - - Args: - results (dict): Result dict contains the data to convert. - - Returns: - dict: The result dict contains the data converted - to :obj:`torch.Tensor`. - """ - - for key in self.keys: - results[key] = to_tensor(results[key]) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(keys={self.keys})' - - -@PIPELINES.register_module() -class ImageToTensor(object): - """Convert image to :obj:`torch.Tensor` by given keys. - - The dimension order of input image is (H, W, C). The pipeline will convert - it to (C, H, W). If only 2 dimension (H, W) is given, the output would be - (1, H, W). - - Args: - keys (Sequence[str]): Key of images to be converted to Tensor. - """ - - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Call function to convert image in results to :obj:`torch.Tensor` and - transpose the channel order. - - Args: - results (dict): Result dict contains the image data to convert. - - Returns: - dict: The result dict contains the image converted - to :obj:`torch.Tensor` and transposed to (C, H, W) order. - """ - - for key in self.keys: - img = results[key] - if len(img.shape) < 3: - img = np.expand_dims(img, -1) - results[key] = to_tensor(img.transpose(2, 0, 1)) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(keys={self.keys})' - - -@PIPELINES.register_module() -class Transpose(object): - """Transpose some results by given keys. - - Args: - keys (Sequence[str]): Keys of results to be transposed. - order (Sequence[int]): Order of transpose. - """ - - def __init__(self, keys, order): - self.keys = keys - self.order = order - - def __call__(self, results): - """Call function to convert image in results to :obj:`torch.Tensor` and - transpose the channel order. - - Args: - results (dict): Result dict contains the image data to convert. - - Returns: - dict: The result dict contains the image converted - to :obj:`torch.Tensor` and transposed to (C, H, W) order. - """ - - for key in self.keys: - results[key] = results[key].transpose(self.order) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(keys={self.keys}, order={self.order})' - - -@PIPELINES.register_module() -class ToDataContainer(object): - """Convert results to :obj:`mmcv.DataContainer` by given fields. - - Args: - fields (Sequence[dict]): Each field is a dict like - ``dict(key='xxx', **kwargs)``. The ``key`` in result will - be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. - Default: ``(dict(key='img', stack=True), - dict(key='gt_semantic_seg'))``. - """ - - def __init__(self, fields=(dict(key='img', stack=True), dict(key='gt_semantic_seg'))): - self.fields = fields - - def __call__(self, results): - """Call function to convert data in results to - :obj:`mmcv.DataContainer`. - - Args: - results (dict): Result dict contains the data to convert. - - Returns: - dict: The result dict contains the data converted to - :obj:`mmcv.DataContainer`. - """ - - for field in self.fields: - field = field.copy() - key = field.pop('key') - results[key] = DC(results[key], **field) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(fields={self.fields})' - - -@PIPELINES.register_module() -class DefaultFormatBundle(object): - """Default formatting bundle. - - It simplifies the pipeline of formatting common fields, including "img" - and "gt_semantic_seg". These fields are formatted as follows. - - - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) - - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, - (3)to DataContainer (stack=True) - """ - - def __call__(self, results): - """Call function to transform and format common fields in results. - - Args: - results (dict): Result dict contains the data to convert. - - Returns: - dict: The result dict contains the data that is formatted with - default bundle. - """ - - if 'img' in results: - img = results['img'] - if len(img.shape) < 3: - img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)) - results['img'] = DC(to_tensor(img), stack=True) - if 'gt_semantic_seg' in results: - # convert to long - results['gt_semantic_seg'] = DC( - to_tensor(results['gt_semantic_seg'][None, ...].astype(np.int64)), stack=True - ) - return results - - def __repr__(self): - return self.__class__.__name__ - - -@PIPELINES.register_module() -class Collect(object): - """Collect data from the loader relevant to the specific task. - - This is usually the last stage of the data loader pipeline. Typically keys - is set to some subset of "img", "gt_semantic_seg". - - The "img_meta" item is always populated. The contents of the "img_meta" - dictionary depends on "meta_keys". By default this includes: - - - "img_shape": shape of the image input to the network as a tuple - (h, w, c). Note that images may be zero padded on the bottom/right - if the batch tensor is larger than this shape. - - - "scale_factor": a float indicating the preprocessing scale - - - "flip": a boolean indicating if image flip transform was used - - - "filename": path to the image file - - - "ori_shape": original shape of the image as a tuple (h, w, c) - - - "pad_shape": image shape after padding - - - "img_norm_cfg": a dict of normalization information: - - mean - per channel mean subtraction - - std - per channel std divisor - - to_rgb - bool indicating if bgr was converted to rgb - - Args: - keys (Sequence[str]): Keys of results to be collected in ``data``. - meta_keys (Sequence[str], optional): Meta keys to be converted to - ``mmcv.DataContainer`` and collected in ``data[img_metas]``. - Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', - 'pad_shape', 'scale_factor', 'flip', 'flip_direction', - 'img_norm_cfg')`` - """ - - def __init__( - self, - keys, - meta_keys=( - 'filename', - 'ori_filename', - 'ori_shape', - 'img_shape', - 'pad_shape', - 'scale_factor', - 'flip', - 'flip_direction', - 'img_norm_cfg', - ), - ): - self.keys = keys - self.meta_keys = meta_keys - - def __call__(self, results): - """Call function to collect keys in results. The keys in ``meta_keys`` - will be converted to :obj:mmcv.DataContainer. - - Args: - results (dict): Result dict contains the data to collect. - - Returns: - dict: The result dict contains the following keys - - keys in``self.keys`` - - ``img_metas`` - """ - - data = {} - img_meta = {} - for key in self.meta_keys: - img_meta[key] = results[key] - data['img_metas'] = DC(img_meta, cpu_only=True) - for key in self.keys: - data[key] = results[key] - return data - - def __repr__(self): - return self.__class__.__name__ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py deleted file mode 100644 index 5d2e2a51a1bf..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py +++ /dev/null @@ -1,145 +0,0 @@ -import os.path as osp - -import numpy as np - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -from ..builder import PIPELINES - - -@PIPELINES.register_module() -class LoadImageFromFile(object): - """Load an image from file. - - Required keys are "img_prefix" and "img_info" (a dict that must contain the - key "filename"). Added or updated keys are "filename", "img", "img_shape", - "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), - "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). - - Args: - to_float32 (bool): Whether to convert the loaded image to a float32 - numpy array. If set to False, the loaded image is an uint8 array. - Defaults to False. - color_type (str): The flag argument for :func:`mmcv.imfrombytes`. - Defaults to 'color'. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. - imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: - 'cv2' - """ - - def __init__( - self, to_float32=False, color_type='color', file_client_args=dict(backend='disk'), imdecode_backend='cv2' - ): - self.to_float32 = to_float32 - self.color_type = color_type - self.file_client_args = file_client_args.copy() - self.file_client = None - self.imdecode_backend = imdecode_backend - - def __call__(self, results): - """Call functions to load image and get image meta information. - - Args: - results (dict): Result dict from :obj:`mmseg.CustomDataset`. - - Returns: - dict: The dict contains loaded image and meta information. - """ - - if self.file_client is None: - self.file_client = mmcv.FileClient(**self.file_client_args) - - if results.get('img_prefix') is not None: - filename = osp.join(results['img_prefix'], results['img_info']['filename']) - else: - filename = results['img_info']['filename'] - img_bytes = self.file_client.get(filename) - img = mmcv.imfrombytes(img_bytes, flag=self.color_type, backend=self.imdecode_backend) - if self.to_float32: - img = img.astype(np.float32) - - results['filename'] = filename - results['ori_filename'] = results['img_info']['filename'] - results['img'] = img - results['img_shape'] = img.shape - results['ori_shape'] = img.shape - # Set initial values for default meta_keys - results['pad_shape'] = img.shape - results['scale_factor'] = 1.0 - num_channels = 1 if len(img.shape) < 3 else img.shape[2] - results['img_norm_cfg'] = dict( - mean=np.zeros(num_channels, dtype=np.float32), std=np.ones(num_channels, dtype=np.float32), to_rgb=False - ) - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(to_float32={self.to_float32},' - repr_str += f"color_type='{self.color_type}'," - repr_str += f"imdecode_backend='{self.imdecode_backend}')" - return repr_str - - -@PIPELINES.register_module() -class LoadAnnotations(object): - """Load annotations for semantic segmentation. - - Args: - reduce_zero_label (bool): Whether reduce all label value by 1. - Usually used for datasets where 0 is background label. - Default: False. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmcv.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. - imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: - 'pillow' - """ - - def __init__(self, reduce_zero_label=False, file_client_args=dict(backend='disk'), imdecode_backend='pillow'): - self.reduce_zero_label = reduce_zero_label - self.file_client_args = file_client_args.copy() - self.file_client = None - self.imdecode_backend = imdecode_backend - - def __call__(self, results): - """Call function to load multiple types annotations. - - Args: - results (dict): Result dict from :obj:`mmseg.CustomDataset`. - - Returns: - dict: The dict contains loaded semantic segmentation annotations. - """ - - if self.file_client is None: - self.file_client = mmcv.FileClient(**self.file_client_args) - - if results.get('seg_prefix', None) is not None: - filename = osp.join(results['seg_prefix'], results['ann_info']['seg_map']) - else: - filename = results['ann_info']['seg_map'] - img_bytes = self.file_client.get(filename) - gt_semantic_seg = ( - mmcv.imfrombytes(img_bytes, flag='unchanged', backend=self.imdecode_backend).squeeze().astype(np.uint8) - ) - # modify if custom classes - if results.get('label_map', None) is not None: - for old_id, new_id in results['label_map'].items(): - gt_semantic_seg[gt_semantic_seg == old_id] = new_id - # reduce zero_label - if self.reduce_zero_label: - # avoid using underflow conversion - gt_semantic_seg[gt_semantic_seg == 0] = 255 - gt_semantic_seg = gt_semantic_seg - 1 - gt_semantic_seg[gt_semantic_seg == 254] = 255 - results['gt_semantic_seg'] = gt_semantic_seg - results['seg_fields'].append('gt_semantic_seg') - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(reduce_zero_label={self.reduce_zero_label},' - repr_str += f"imdecode_backend='{self.imdecode_backend}')" - return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py deleted file mode 100644 index e8675fb4c872..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py +++ /dev/null @@ -1,118 +0,0 @@ -import warnings - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -from ..builder import PIPELINES -from .compose import Compose - - -@PIPELINES.register_module() -class MultiScaleFlipAug(object): - """Test-time augmentation with multiple scales and flipping. - - An example configuration is as followed: - - .. code-block:: - - img_scale=(2048, 1024), - img_ratios=[0.5, 1.0], - flip=True, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ] - - After MultiScaleFLipAug with above configuration, the results are wrapped - into lists of the same length as followed: - - .. code-block:: - - dict( - img=[...], - img_shape=[...], - scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] - flip=[False, True, False, True] - ... - ) - - Args: - transforms (list[dict]): Transforms to apply in each augmentation. - img_scale (None | tuple | list[tuple]): Images scales for resizing. - img_ratios (float | list[float]): Image ratios for resizing - flip (bool): Whether apply flip augmentation. Default: False. - flip_direction (str | list[str]): Flip augmentation directions, - options are "horizontal" and "vertical". If flip_direction is list, - multiple flip augmentations will be applied. - It has no effect when flip == False. Default: "horizontal". - """ - - def __init__(self, transforms, img_scale, img_ratios=None, flip=False, flip_direction='horizontal'): - self.transforms = Compose(transforms) - if img_ratios is not None: - img_ratios = img_ratios if isinstance(img_ratios, list) else [img_ratios] - assert mmcv.is_list_of(img_ratios, float) - if img_scale is None: - # mode 1: given img_scale=None and a range of image ratio - self.img_scale = None - assert mmcv.is_list_of(img_ratios, float) - elif isinstance(img_scale, tuple) and mmcv.is_list_of(img_ratios, float): - assert len(img_scale) == 2 - # mode 2: given a scale and a range of image ratio - self.img_scale = [(int(img_scale[0] * ratio), int(img_scale[1] * ratio)) for ratio in img_ratios] - else: - # mode 3: given multiple scales - self.img_scale = img_scale if isinstance(img_scale, list) else [img_scale] - assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None - self.flip = flip - self.img_ratios = img_ratios - self.flip_direction = flip_direction if isinstance(flip_direction, list) else [flip_direction] - assert mmcv.is_list_of(self.flip_direction, str) - if not self.flip and self.flip_direction != ['horizontal']: - warnings.warn('flip_direction has no effect when flip is set to False') - if self.flip and not any([t['type'] == 'RandomFlip' for t in transforms]): - warnings.warn('flip has no effect when RandomFlip is not in transforms') - - def __call__(self, results): - """Call function to apply test time augment transforms on results. - - Args: - results (dict): Result dict contains the data to transform. - - Returns: - dict[str: list]: The augmented data, where each value is wrapped - into a list. - """ - - aug_data = [] - if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): - h, w = results['img'].shape[:2] - img_scale = [(int(w * ratio), int(h * ratio)) for ratio in self.img_ratios] - else: - img_scale = self.img_scale - flip_aug = [False, True] if self.flip else [False] - for scale in img_scale: - for flip in flip_aug: - for direction in self.flip_direction: - _results = results.copy() - _results['scale'] = scale - _results['flip'] = flip - _results['flip_direction'] = direction - data = self.transforms(_results) - aug_data.append(data) - # list of dict to dict of list - aug_data_dict = {key: [] for key in aug_data[0]} - for data in aug_data: - for key, val in data.items(): - aug_data_dict[key].append(val) - return aug_data_dict - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(transforms={self.transforms}, ' - repr_str += f'img_scale={self.img_scale}, flip={self.flip})' - repr_str += f'flip_direction={self.flip_direction}' - return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py deleted file mode 100644 index 12bf591cff32..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py +++ /dev/null @@ -1,839 +0,0 @@ -import numpy as np -from numpy import random - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning, is_tuple_of - -from ..builder import PIPELINES - - -@PIPELINES.register_module() -class Resize(object): - """Resize images & seg. - - This transform resizes the input image to some scale. If the input dict - contains the key "scale", then the scale in the input dict is used, - otherwise the specified scale in the init method is used. - - ``img_scale`` can be None, a tuple (single-scale) or a list of tuple - (multi-scale). There are 4 multiscale modes: - - - ``ratio_range is not None``: - 1. When img_scale is None, img_scale is the shape of image in results - (img_scale = results['img'].shape[:2]) and the image is resized based - on the original size. (mode 1) - 2. When img_scale is a tuple (single-scale), randomly sample a ratio from - the ratio range and multiply it with the image scale. (mode 2) - - - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a - scale from the a range. (mode 3) - - - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a - scale from multiple scales. (mode 4) - - Args: - img_scale (tuple or list[tuple]): Images scales for resizing. - multiscale_mode (str): Either "range" or "value". - ratio_range (tuple[float]): (min_ratio, max_ratio) - keep_ratio (bool): Whether to keep the aspect ratio when resizing the - image. - """ - - def __init__(self, img_scale=None, multiscale_mode='range', ratio_range=None, keep_ratio=True): - if img_scale is None: - self.img_scale = None - else: - if isinstance(img_scale, list): - self.img_scale = img_scale - else: - self.img_scale = [img_scale] - assert mmcv.is_list_of(self.img_scale, tuple) - - if ratio_range is not None: - # mode 1: given img_scale=None and a range of image ratio - # mode 2: given a scale and a range of image ratio - assert self.img_scale is None or len(self.img_scale) == 1 - else: - # mode 3 and 4: given multiple scales or a range of scales - assert multiscale_mode in ['value', 'range'] - - self.multiscale_mode = multiscale_mode - self.ratio_range = ratio_range - self.keep_ratio = keep_ratio - - @staticmethod - def random_select(img_scales): - """Randomly select an img_scale from given candidates. - - Args: - img_scales (list[tuple]): Images scales for selection. - - Returns: - (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, - where ``img_scale`` is the selected image scale and - ``scale_idx`` is the selected index in the given candidates. - """ - - assert mmcv.is_list_of(img_scales, tuple) - scale_idx = np.random.randint(len(img_scales)) - img_scale = img_scales[scale_idx] - return img_scale, scale_idx - - @staticmethod - def random_sample(img_scales): - """Randomly sample an img_scale when ``multiscale_mode=='range'``. - - Args: - img_scales (list[tuple]): Images scale range for sampling. - There must be two tuples in img_scales, which specify the lower - and upper bound of image scales. - - Returns: - (tuple, None): Returns a tuple ``(img_scale, None)``, where - ``img_scale`` is sampled scale and None is just a placeholder - to be consistent with :func:`random_select`. - """ - - assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 - img_scale_long = [max(s) for s in img_scales] - img_scale_short = [min(s) for s in img_scales] - long_edge = np.random.randint(min(img_scale_long), max(img_scale_long) + 1) - short_edge = np.random.randint(min(img_scale_short), max(img_scale_short) + 1) - img_scale = (long_edge, short_edge) - return img_scale, None - - @staticmethod - def random_sample_ratio(img_scale, ratio_range): - """Randomly sample an img_scale when ``ratio_range`` is specified. - - A ratio will be randomly sampled from the range specified by - ``ratio_range``. Then it would be multiplied with ``img_scale`` to - generate sampled scale. - - Args: - img_scale (tuple): Images scale base to multiply with ratio. - ratio_range (tuple[float]): The minimum and maximum ratio to scale - the ``img_scale``. - - Returns: - (tuple, None): Returns a tuple ``(scale, None)``, where - ``scale`` is sampled ratio multiplied with ``img_scale`` and - None is just a placeholder to be consistent with - :func:`random_select`. - """ - - assert isinstance(img_scale, tuple) and len(img_scale) == 2 - min_ratio, max_ratio = ratio_range - assert min_ratio <= max_ratio - ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio - scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) - return scale, None - - def _random_scale(self, results): - """Randomly sample an img_scale according to ``ratio_range`` and - ``multiscale_mode``. - - If ``ratio_range`` is specified, a ratio will be sampled and be - multiplied with ``img_scale``. - If multiple scales are specified by ``img_scale``, a scale will be - sampled according to ``multiscale_mode``. - Otherwise, single scale will be used. - - Args: - results (dict): Result dict from :obj:`dataset`. - - Returns: - dict: Two new keys 'scale` and 'scale_idx` are added into - ``results``, which would be used by subsequent pipelines. - """ - - if self.ratio_range is not None: - if self.img_scale is None: - h, w = results['img'].shape[:2] - scale, scale_idx = self.random_sample_ratio((w, h), self.ratio_range) - else: - scale, scale_idx = self.random_sample_ratio(self.img_scale[0], self.ratio_range) - elif len(self.img_scale) == 1: - scale, scale_idx = self.img_scale[0], 0 - elif self.multiscale_mode == 'range': - scale, scale_idx = self.random_sample(self.img_scale) - elif self.multiscale_mode == 'value': - scale, scale_idx = self.random_select(self.img_scale) - else: - raise NotImplementedError - - results['scale'] = scale - results['scale_idx'] = scale_idx - - def _resize_img(self, results): - """Resize images with ``results['scale']``.""" - if self.keep_ratio: - img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) - # the w_scale and h_scale has minor difference - # a real fix should be done in the mmcv.imrescale in the future - new_h, new_w = img.shape[:2] - h, w = results['img'].shape[:2] - w_scale = new_w / w - h_scale = new_h / h - else: - img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) - scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) - results['img'] = img - results['img_shape'] = img.shape - results['pad_shape'] = img.shape # in case that there is no padding - results['scale_factor'] = scale_factor - results['keep_ratio'] = self.keep_ratio - - def _resize_seg(self, results): - """Resize semantic segmentation map with ``results['scale']``.""" - for key in results.get('seg_fields', []): - if self.keep_ratio: - gt_seg = mmcv.imrescale(results[key], results['scale'], interpolation='nearest') - else: - gt_seg = mmcv.imresize(results[key], results['scale'], interpolation='nearest') - results[key] = gt_seg - - def __call__(self, results): - """Call function to resize images, bounding boxes, masks, semantic - segmentation map. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', - 'keep_ratio' keys are added into result dict. - """ - - if 'scale' not in results: - self._random_scale(results) - self._resize_img(results) - self._resize_seg(results) - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += ( - f'(img_scale={self.img_scale}, ' - f'multiscale_mode={self.multiscale_mode}, ' - f'ratio_range={self.ratio_range}, ' - f'keep_ratio={self.keep_ratio})' - ) - return repr_str - - -@PIPELINES.register_module() -class RandomFlip(object): - """Flip the image & seg. - - If the input dict contains the key "flip", then the flag will be used, - otherwise it will be randomly decided by a ratio specified in the init - method. - - Args: - prob (float, optional): The flipping probability. Default: None. - direction(str, optional): The flipping direction. Options are - 'horizontal' and 'vertical'. Default: 'horizontal'. - """ - - @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') - def __init__(self, prob=None, direction='horizontal'): - self.prob = prob - self.direction = direction - if prob is not None: - assert prob >= 0 and prob <= 1 - assert direction in ['horizontal', 'vertical'] - - def __call__(self, results): - """Call function to flip bounding boxes, masks, semantic segmentation - maps. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Flipped results, 'flip', 'flip_direction' keys are added into - result dict. - """ - - if 'flip' not in results: - flip = True if np.random.rand() < self.prob else False - results['flip'] = flip - if 'flip_direction' not in results: - results['flip_direction'] = self.direction - if results['flip']: - # flip image - results['img'] = mmcv.imflip(results['img'], direction=results['flip_direction']) - - # flip segs - for key in results.get('seg_fields', []): - # use copy() to make numpy stride positive - results[key] = mmcv.imflip(results[key], direction=results['flip_direction']).copy() - return results - - def __repr__(self): - return self.__class__.__name__ + f'(prob={self.prob})' - - -@PIPELINES.register_module() -class Pad(object): - """Pad the image & mask. - - There are two padding modes: (1) pad to a fixed size and (2) pad to the - minimum size that is divisible by some number. - Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", - - Args: - size (tuple, optional): Fixed padding size. - size_divisor (int, optional): The divisor of padded size. - pad_val (float, optional): Padding value. Default: 0. - seg_pad_val (float, optional): Padding value of segmentation map. - Default: 255. - """ - - def __init__(self, size=None, size_divisor=None, pad_val=0, seg_pad_val=255): - self.size = size - self.size_divisor = size_divisor - self.pad_val = pad_val - self.seg_pad_val = seg_pad_val - # only one of size and size_divisor should be valid - assert size is not None or size_divisor is not None - assert size is None or size_divisor is None - - def _pad_img(self, results): - """Pad images according to ``self.size``.""" - if self.size is not None: - padded_img = mmcv.impad(results['img'], shape=self.size, pad_val=self.pad_val) - elif self.size_divisor is not None: - padded_img = mmcv.impad_to_multiple(results['img'], self.size_divisor, pad_val=self.pad_val) - results['img'] = padded_img - results['pad_shape'] = padded_img.shape - results['pad_fixed_size'] = self.size - results['pad_size_divisor'] = self.size_divisor - - def _pad_seg(self, results): - """Pad masks according to ``results['pad_shape']``.""" - for key in results.get('seg_fields', []): - results[key] = mmcv.impad(results[key], shape=results['pad_shape'][:2], pad_val=self.seg_pad_val) - - def __call__(self, results): - """Call function to pad images, masks, semantic segmentation maps. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Updated result dict. - """ - - self._pad_img(results) - self._pad_seg(results) - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' f'pad_val={self.pad_val})' - return repr_str - - -@PIPELINES.register_module() -class Normalize(object): - """Normalize the image. - - Added key is "img_norm_cfg". - - Args: - mean (sequence): Mean values of 3 channels. - std (sequence): Std values of 3 channels. - to_rgb (bool): Whether to convert the image from BGR to RGB, - default is true. - """ - - def __init__(self, mean, std, to_rgb=True): - self.mean = np.array(mean, dtype=np.float32) - self.std = np.array(std, dtype=np.float32) - self.to_rgb = to_rgb - - def __call__(self, results): - """Call function to normalize images. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Normalized results, 'img_norm_cfg' key is added into - result dict. - """ - - results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) - results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' f'{self.to_rgb})' - return repr_str - - -@PIPELINES.register_module() -class Rerange(object): - """Rerange the image pixel value. - - Args: - min_value (float or int): Minimum value of the reranged image. - Default: 0. - max_value (float or int): Maximum value of the reranged image. - Default: 255. - """ - - def __init__(self, min_value=0, max_value=255): - assert isinstance(min_value, float) or isinstance(min_value, int) - assert isinstance(max_value, float) or isinstance(max_value, int) - assert min_value < max_value - self.min_value = min_value - self.max_value = max_value - - def __call__(self, results): - """Call function to rerange images. - - Args: - results (dict): Result dict from loading pipeline. - Returns: - dict: Reranged results. - """ - - img = results['img'] - img_min_value = np.min(img) - img_max_value = np.max(img) - - assert img_min_value < img_max_value - # rerange to [0, 1] - img = (img - img_min_value) / (img_max_value - img_min_value) - # rerange to [min_value, max_value] - img = img * (self.max_value - self.min_value) + self.min_value - results['img'] = img - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' - return repr_str - - -@PIPELINES.register_module() -class CLAHE(object): - """Use CLAHE method to process the image. - - See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. - Graphics Gems, 1994:474-485.` for more information. - - Args: - clip_limit (float): Threshold for contrast limiting. Default: 40.0. - tile_grid_size (tuple[int]): Size of grid for histogram equalization. - Input image will be divided into equally sized rectangular tiles. - It defines the number of tiles in row and column. Default: (8, 8). - """ - - def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): - assert isinstance(clip_limit, (float, int)) - self.clip_limit = clip_limit - assert is_tuple_of(tile_grid_size, int) - assert len(tile_grid_size) == 2 - self.tile_grid_size = tile_grid_size - - def __call__(self, results): - """Call function to Use CLAHE method process images. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Processed results. - """ - - for i in range(results['img'].shape[2]): - results['img'][:, :, i] = mmcv.clahe( - np.array(results['img'][:, :, i], dtype=np.uint8), self.clip_limit, self.tile_grid_size - ) - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(clip_limit={self.clip_limit}, ' f'tile_grid_size={self.tile_grid_size})' - return repr_str - - -@PIPELINES.register_module() -class RandomCrop(object): - """Random crop the image & seg. - - Args: - crop_size (tuple): Expected size after cropping, (h, w). - cat_max_ratio (float): The maximum ratio that single category could - occupy. - """ - - def __init__(self, crop_size, cat_max_ratio=1.0, ignore_index=255): - assert crop_size[0] > 0 and crop_size[1] > 0 - self.crop_size = crop_size - self.cat_max_ratio = cat_max_ratio - self.ignore_index = ignore_index - - def get_crop_bbox(self, img): - """Randomly get a crop bounding box.""" - margin_h = max(img.shape[0] - self.crop_size[0], 0) - margin_w = max(img.shape[1] - self.crop_size[1], 0) - offset_h = np.random.randint(0, margin_h + 1) - offset_w = np.random.randint(0, margin_w + 1) - crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] - crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] - - return crop_y1, crop_y2, crop_x1, crop_x2 - - def crop(self, img, crop_bbox): - """Crop from ``img``""" - crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox - img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] - return img - - def __call__(self, results): - """Call function to randomly crop images, semantic segmentation maps. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Randomly cropped results, 'img_shape' key in result dict is - updated according to crop size. - """ - - img = results['img'] - crop_bbox = self.get_crop_bbox(img) - if self.cat_max_ratio < 1.0: - # Repeat 10 times - for _ in range(10): - seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) - labels, cnt = np.unique(seg_temp, return_counts=True) - cnt = cnt[labels != self.ignore_index] - if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.cat_max_ratio: - break - crop_bbox = self.get_crop_bbox(img) - - # crop the image - img = self.crop(img, crop_bbox) - img_shape = img.shape - results['img'] = img - results['img_shape'] = img_shape - - # crop semantic seg - for key in results.get('seg_fields', []): - results[key] = self.crop(results[key], crop_bbox) - - return results - - def __repr__(self): - return self.__class__.__name__ + f'(crop_size={self.crop_size})' - - -@PIPELINES.register_module() -class RandomRotate(object): - """Rotate the image & seg. - - Args: - prob (float): The rotation probability. - degree (float, tuple[float]): Range of degrees to select from. If - degree is a number instead of tuple like (min, max), - the range of degree will be (``-degree``, ``+degree``) - pad_val (float, optional): Padding value of image. Default: 0. - seg_pad_val (float, optional): Padding value of segmentation map. - Default: 255. - center (tuple[float], optional): Center point (w, h) of the rotation in - the source image. If not specified, the center of the image will be - used. Default: None. - auto_bound (bool): Whether to adjust the image size to cover the whole - rotated image. Default: False - """ - - def __init__(self, prob, degree, pad_val=0, seg_pad_val=255, center=None, auto_bound=False): - self.prob = prob - assert prob >= 0 and prob <= 1 - if isinstance(degree, (float, int)): - assert degree > 0, f'degree {degree} should be positive' - self.degree = (-degree, degree) - else: - self.degree = degree - assert len(self.degree) == 2, f'degree {self.degree} should be a ' f'tuple of (min, max)' - self.pal_val = pad_val - self.seg_pad_val = seg_pad_val - self.center = center - self.auto_bound = auto_bound - - def __call__(self, results): - """Call function to rotate image, semantic segmentation maps. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Rotated results. - """ - - rotate = True if np.random.rand() < self.prob else False - degree = np.random.uniform(min(*self.degree), max(*self.degree)) - if rotate: - # rotate image - results['img'] = mmcv.imrotate( - results['img'], angle=degree, border_value=self.pal_val, center=self.center, auto_bound=self.auto_bound - ) - - # rotate segs - for key in results.get('seg_fields', []): - results[key] = mmcv.imrotate( - results[key], - angle=degree, - border_value=self.seg_pad_val, - center=self.center, - auto_bound=self.auto_bound, - interpolation='nearest', - ) - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += ( - f'(prob={self.prob}, ' - f'degree={self.degree}, ' - f'pad_val={self.pal_val}, ' - f'seg_pad_val={self.seg_pad_val}, ' - f'center={self.center}, ' - f'auto_bound={self.auto_bound})' - ) - return repr_str - - -@PIPELINES.register_module() -class RGB2Gray(object): - """Convert RGB image to grayscale image. - - This transform calculate the weighted mean of input image channels with - ``weights`` and then expand the channels to ``out_channels``. When - ``out_channels`` is None, the number of output channels is the same as - input channels. - - Args: - out_channels (int): Expected number of output channels after - transforming. Default: None. - weights (tuple[float]): The weights to calculate the weighted mean. - Default: (0.299, 0.587, 0.114). - """ - - def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): - assert out_channels is None or out_channels > 0 - self.out_channels = out_channels - assert isinstance(weights, tuple) - for item in weights: - assert isinstance(item, (float, int)) - self.weights = weights - - def __call__(self, results): - """Call function to convert RGB image to grayscale image. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Result dict with grayscale image. - """ - img = results['img'] - assert len(img.shape) == 3 - assert img.shape[2] == len(self.weights) - weights = np.array(self.weights).reshape((1, 1, -1)) - img = (img * weights).sum(2, keepdims=True) - if self.out_channels is None: - img = img.repeat(weights.shape[2], axis=2) - else: - img = img.repeat(self.out_channels, axis=2) - - results['img'] = img - results['img_shape'] = img.shape - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(out_channels={self.out_channels}, ' f'weights={self.weights})' - return repr_str - - -@PIPELINES.register_module() -class AdjustGamma(object): - """Using gamma correction to process the image. - - Args: - gamma (float or int): Gamma value used in gamma correction. - Default: 1.0. - """ - - def __init__(self, gamma=1.0): - assert isinstance(gamma, float) or isinstance(gamma, int) - assert gamma > 0 - self.gamma = gamma - inv_gamma = 1.0 / gamma - self.table = np.array([(i / 255.0) ** inv_gamma * 255 for i in np.arange(256)]).astype('uint8') - - def __call__(self, results): - """Call function to process the image with gamma correction. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Processed results. - """ - - results['img'] = mmcv.lut_transform(np.array(results['img'], dtype=np.uint8), self.table) - - return results - - def __repr__(self): - return self.__class__.__name__ + f'(gamma={self.gamma})' - - -@PIPELINES.register_module() -class SegRescale(object): - """Rescale semantic segmentation maps. - - Args: - scale_factor (float): The scale factor of the final output. - """ - - def __init__(self, scale_factor=1): - self.scale_factor = scale_factor - - def __call__(self, results): - """Call function to scale the semantic segmentation map. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Result dict with semantic segmentation map scaled. - """ - for key in results.get('seg_fields', []): - if self.scale_factor != 1: - results[key] = mmcv.imrescale(results[key], self.scale_factor, interpolation='nearest') - return results - - def __repr__(self): - return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' - - -@PIPELINES.register_module() -class PhotoMetricDistortion(object): - """Apply photometric distortion to image sequentially, every transformation - is applied with a probability of 0.5. The position of random contrast is in - second or second to last. - - 1. random brightness - 2. random contrast (mode 0) - 3. convert color from BGR to HSV - 4. random saturation - 5. random hue - 6. convert color from HSV to BGR - 7. random contrast (mode 1) - - Args: - brightness_delta (int): delta of brightness. - contrast_range (tuple): range of contrast. - saturation_range (tuple): range of saturation. - hue_delta (int): delta of hue. - """ - - def __init__(self, brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18): - self.brightness_delta = brightness_delta - self.contrast_lower, self.contrast_upper = contrast_range - self.saturation_lower, self.saturation_upper = saturation_range - self.hue_delta = hue_delta - - def convert(self, img, alpha=1, beta=0): - """Multiple with alpha and add beat with clip.""" - img = img.astype(np.float32) * alpha + beta - img = np.clip(img, 0, 255) - return img.astype(np.uint8) - - def brightness(self, img): - """Brightness distortion.""" - if random.randint(2): - return self.convert(img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)) - return img - - def contrast(self, img): - """Contrast distortion.""" - if random.randint(2): - return self.convert(img, alpha=random.uniform(self.contrast_lower, self.contrast_upper)) - return img - - def saturation(self, img): - """Saturation distortion.""" - if random.randint(2): - img = mmcv.bgr2hsv(img) - img[:, :, 1] = self.convert( - img[:, :, 1], alpha=random.uniform(self.saturation_lower, self.saturation_upper) - ) - img = mmcv.hsv2bgr(img) - return img - - def hue(self, img): - """Hue distortion.""" - if random.randint(2): - img = mmcv.bgr2hsv(img) - img[:, :, 0] = (img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)) % 180 - img = mmcv.hsv2bgr(img) - return img - - def __call__(self, results): - """Call function to perform photometric distortion on images. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Result dict with images distorted. - """ - - img = results['img'] - # random brightness - img = self.brightness(img) - - # mode == 0 --> do random contrast first - # mode == 1 --> do random contrast last - mode = random.randint(2) - if mode == 1: - img = self.contrast(img) - - # random saturation - img = self.saturation(img) - - # random hue - img = self.hue(img) - - # random contrast - if mode == 0: - img = self.contrast(img) - - results['img'] = img - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += ( - f'(brightness_delta={self.brightness_delta}, ' - f'contrast_range=({self.contrast_lower}, ' - f'{self.contrast_upper}), ' - f'saturation_range=({self.saturation_lower}, ' - f'{self.saturation_upper}), ' - f'hue_delta={self.hue_delta})' - ) - return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py deleted file mode 100644 index a94d01763980..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py +++ /dev/null @@ -1,25 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class STAREDataset(CustomDataset): - """STARE dataset. - - In segmentation map annotation for STARE, 0 stands for background, which is - included in 2 categories. ``reduce_zero_label`` is fixed to False. The - ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to - '.ah.png'. - """ - - CLASSES = ('background', 'vessel') - - PALETTE = [[120, 120, 120], [6, 230, 230]] - - def __init__(self, **kwargs): - super(STAREDataset, self).__init__( - img_suffix='.png', seg_map_suffix='.ah.png', reduce_zero_label=False, **kwargs - ) - assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py deleted file mode 100644 index 5fd6641b33e1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py +++ /dev/null @@ -1,65 +0,0 @@ -import os.path as osp - -from .builder import DATASETS -from .custom import CustomDataset - - -@DATASETS.register_module() -class PascalVOCDataset(CustomDataset): - """Pascal VOC dataset. - - Args: - split (str): Split txt file for Pascal VOC. - """ - - CLASSES = ( - 'background', - 'aeroplane', - 'bicycle', - 'bird', - 'boat', - 'bottle', - 'bus', - 'car', - 'cat', - 'chair', - 'cow', - 'diningtable', - 'dog', - 'horse', - 'motorbike', - 'person', - 'pottedplant', - 'sheep', - 'sofa', - 'train', - 'tvmonitor', - ) - - PALETTE = [ - [0, 0, 0], - [128, 0, 0], - [0, 128, 0], - [128, 128, 0], - [0, 0, 128], - [128, 0, 128], - [0, 128, 128], - [128, 128, 128], - [64, 0, 0], - [192, 0, 0], - [64, 128, 0], - [192, 128, 0], - [64, 0, 128], - [192, 0, 128], - [64, 128, 128], - [192, 128, 128], - [0, 64, 0], - [128, 64, 0], - [0, 192, 0], - [128, 192, 0], - [0, 64, 128], - ] - - def __init__(self, split, **kwargs): - super(PascalVOCDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) - assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py deleted file mode 100644 index 130f6c12914e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from .backbones import * # noqa: F401,F403 -from .builder import BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, build_head, build_loss, build_segmentor -from .decode_heads import * # noqa: F401,F403 -from .losses import * # noqa: F401,F403 -from .necks import * # noqa: F401,F403 -from .segmentors import * # noqa: F401,F403 - -__all__ = [ - 'BACKBONES', - 'HEADS', - 'LOSSES', - 'SEGMENTORS', - 'build_backbone', - 'build_head', - 'build_loss', - 'build_segmentor', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py deleted file mode 100644 index ceb46fade97e..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from .cgnet import CGNet - -# from .fast_scnn import FastSCNN -from .hrnet import HRNet -from .mobilenet_v2 import MobileNetV2 -from .mobilenet_v3 import MobileNetV3 -from .resnest import ResNeSt -from .resnet import ResNet, ResNetV1c, ResNetV1d -from .resnext import ResNeXt -from .unet import UNet -from .uniformer import UniFormer -from .vit import VisionTransformer - -__all__ = [ - 'ResNet', - 'ResNetV1c', - 'ResNetV1d', - 'ResNeXt', - 'HRNet', - 'ResNeSt', - 'MobileNetV2', - 'UNet', - 'CGNet', - 'MobileNetV3', - 'VisionTransformer', - 'UniFormer', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py deleted file mode 100644 index 37a147de274d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py +++ /dev/null @@ -1,362 +0,0 @@ -import torch -import torch.nn as nn -import torch.utils.checkpoint as cp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - ConvModule, - build_conv_layer, - build_norm_layer, - constant_init, - kaiming_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES - - -class GlobalContextExtractor(nn.Module): - """Global Context Extractor for CGNet. - - This class is employed to refine the joint feature of both local feature - and surrounding context. - - Args: - channel (int): Number of input feature channels. - reduction (int): Reductions for global context extractor. Default: 16. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - """ - - def __init__(self, channel, reduction=16, with_cp=False): - super(GlobalContextExtractor, self).__init__() - self.channel = channel - self.reduction = reduction - assert reduction >= 1 and channel >= reduction - self.with_cp = with_cp - self.avg_pool = nn.AdaptiveAvgPool2d(1) - self.fc = nn.Sequential( - nn.Linear(channel, channel // reduction), - nn.ReLU(inplace=True), - nn.Linear(channel // reduction, channel), - nn.Sigmoid(), - ) - - def forward(self, x): - def _inner_forward(x): - num_batch, num_channel = x.size()[:2] - y = self.avg_pool(x).view(num_batch, num_channel) - y = self.fc(y).view(num_batch, num_channel, 1, 1) - return x * y - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - return out - - -class ContextGuidedBlock(nn.Module): - """Context Guided Block for CGNet. - - This class consists of four components: local feature extractor, - surrounding feature extractor, joint feature extractor and global - context extractor. - - Args: - in_channels (int): Number of input feature channels. - out_channels (int): Number of output feature channels. - dilation (int): Dilation rate for surrounding context extractor. - Default: 2. - reduction (int): Reduction for global context extractor. Default: 16. - skip_connect (bool): Add input to output or not. Default: True. - downsample (bool): Downsample the input to 1/2 or not. Default: False. - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN', requires_grad=True). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='PReLU'). - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - """ - - def __init__( - self, - in_channels, - out_channels, - dilation=2, - reduction=16, - skip_connect=True, - downsample=False, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - act_cfg=dict(type='PReLU'), - with_cp=False, - ): - super(ContextGuidedBlock, self).__init__() - self.with_cp = with_cp - self.downsample = downsample - - channels = out_channels if downsample else out_channels // 2 - if 'type' in act_cfg and act_cfg['type'] == 'PReLU': - act_cfg['num_parameters'] = channels - kernel_size = 3 if downsample else 1 - stride = 2 if downsample else 1 - padding = (kernel_size - 1) // 2 - - self.conv1x1 = ConvModule( - in_channels, channels, kernel_size, stride, padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg - ) - - self.f_loc = build_conv_layer( - conv_cfg, channels, channels, kernel_size=3, padding=1, groups=channels, bias=False - ) - self.f_sur = build_conv_layer( - conv_cfg, - channels, - channels, - kernel_size=3, - padding=dilation, - groups=channels, - dilation=dilation, - bias=False, - ) - - self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] - self.activate = nn.PReLU(2 * channels) - - if downsample: - self.bottleneck = build_conv_layer(conv_cfg, 2 * channels, out_channels, kernel_size=1, bias=False) - - self.skip_connect = skip_connect and not downsample - self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) - - def forward(self, x): - def _inner_forward(x): - out = self.conv1x1(x) - loc = self.f_loc(out) - sur = self.f_sur(out) - - joi_feat = torch.cat([loc, sur], 1) # the joint feature - joi_feat = self.bn(joi_feat) - joi_feat = self.activate(joi_feat) - if self.downsample: - joi_feat = self.bottleneck(joi_feat) # channel = out_channels - # f_glo is employed to refine the joint feature - out = self.f_glo(joi_feat) - - if self.skip_connect: - return x + out - else: - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - return out - - -class InputInjection(nn.Module): - """Downsampling module for CGNet.""" - - def __init__(self, num_downsampling): - super(InputInjection, self).__init__() - self.pool = nn.ModuleList() - for i in range(num_downsampling): - self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) - - def forward(self, x): - for pool in self.pool: - x = pool(x) - return x - - -@BACKBONES.register_module() -class CGNet(nn.Module): - """CGNet backbone. - - A Light-weight Context Guided Network for Semantic Segmentation - arXiv: https://arxiv.org/abs/1811.08201 - - Args: - in_channels (int): Number of input image channels. Normally 3. - num_channels (tuple[int]): Numbers of feature channels at each stages. - Default: (32, 64, 128). - num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. - Default: (3, 21). - dilations (tuple[int]): Dilation rate for surrounding context - extractors at stage 1 and stage 2. Default: (2, 4). - reductions (tuple[int]): Reductions for global context extractors at - stage 1 and stage 2. Default: (8, 16). - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN', requires_grad=True). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='PReLU'). - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. Default: False. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - """ - - def __init__( - self, - in_channels=3, - num_channels=(32, 64, 128), - num_blocks=(3, 21), - dilations=(2, 4), - reductions=(8, 16), - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - act_cfg=dict(type='PReLU'), - norm_eval=False, - with_cp=False, - ): - - super(CGNet, self).__init__() - self.in_channels = in_channels - self.num_channels = num_channels - assert isinstance(self.num_channels, tuple) and len(self.num_channels) == 3 - self.num_blocks = num_blocks - assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 - self.dilations = dilations - assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 - self.reductions = reductions - assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': - self.act_cfg['num_parameters'] = num_channels[0] - self.norm_eval = norm_eval - self.with_cp = with_cp - - cur_channels = in_channels - self.stem = nn.ModuleList() - for i in range(3): - self.stem.append( - ConvModule( - cur_channels, - num_channels[0], - 3, - 2 if i == 0 else 1, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - ) - cur_channels = num_channels[0] - - self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 - self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 - - cur_channels += in_channels - self.norm_prelu_0 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) - - # stage 1 - self.level1 = nn.ModuleList() - for i in range(num_blocks[0]): - self.level1.append( - ContextGuidedBlock( - cur_channels if i == 0 else num_channels[1], - num_channels[1], - dilations[0], - reductions[0], - downsample=(i == 0), - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp, - ) - ) # CG block - - cur_channels = 2 * num_channels[1] + in_channels - self.norm_prelu_1 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) - - # stage 2 - self.level2 = nn.ModuleList() - for i in range(num_blocks[1]): - self.level2.append( - ContextGuidedBlock( - cur_channels if i == 0 else num_channels[2], - num_channels[2], - dilations[1], - reductions[1], - downsample=(i == 0), - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp, - ) - ) # CG block - - cur_channels = 2 * num_channels[2] - self.norm_prelu_2 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) - - def forward(self, x): - output = [] - - # stage 0 - inp_2x = self.inject_2x(x) - inp_4x = self.inject_4x(x) - for layer in self.stem: - x = layer(x) - x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) - output.append(x) - - # stage 1 - for i, layer in enumerate(self.level1): - x = layer(x) - if i == 0: - down1 = x - x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) - output.append(x) - - # stage 2 - for i, layer in enumerate(self.level2): - x = layer(x) - if i == 0: - down2 = x - x = self.norm_prelu_2(torch.cat([down2, x], 1)) - output.append(x) - - return output - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if isinstance(pretrained, str): - logger = get_root_logger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, (nn.Conv2d, nn.Linear)): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - elif isinstance(m, nn.PReLU): - constant_init(m, 0) - else: - raise TypeError('pretrained must be a str or None') - - def train(self, mode=True): - """Convert the model into training mode will keeping the normalization - layer freezed.""" - super(CGNet, self).train(mode) - if mode and self.norm_eval: - for m in self.modules(): - # trick: eval have effect on BatchNorm only - if isinstance(m, _BatchNorm): - m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py deleted file mode 100644 index 532d781d7ffa..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py +++ /dev/null @@ -1,355 +0,0 @@ -import torch -import torch.nn as nn -from torch.nn.modules.batchnorm import _BatchNorm - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - ConvModule, - DepthwiseSeparableConvModule, - constant_init, - kaiming_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.decode_heads.psp_head import PPM -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize - -from ..builder import BACKBONES -from ..utils.inverted_residual import InvertedResidual - - -class LearningToDownsample(nn.Module): - """Learning to downsample module. - - Args: - in_channels (int): Number of input channels. - dw_channels (tuple[int]): Number of output channels of the first and - the second depthwise conv (dwconv) layers. - out_channels (int): Number of output channels of the whole - 'learning to downsample' module. - conv_cfg (dict | None): Config of conv layers. Default: None - norm_cfg (dict | None): Config of norm layers. Default: - dict(type='BN') - act_cfg (dict): Config of activation layers. Default: - dict(type='ReLU') - """ - - def __init__( - self, - in_channels, - dw_channels, - out_channels, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - ): - super(LearningToDownsample, self).__init__() - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - dw_channels1 = dw_channels[0] - dw_channels2 = dw_channels[1] - - self.conv = ConvModule( - in_channels, - dw_channels1, - 3, - stride=2, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.dsconv1 = DepthwiseSeparableConvModule( - dw_channels1, dw_channels2, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg - ) - self.dsconv2 = DepthwiseSeparableConvModule( - dw_channels2, out_channels, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg - ) - - def forward(self, x): - x = self.conv(x) - x = self.dsconv1(x) - x = self.dsconv2(x) - return x - - -class GlobalFeatureExtractor(nn.Module): - """Global feature extractor module. - - Args: - in_channels (int): Number of input channels of the GFE module. - Default: 64 - block_channels (tuple[int]): Tuple of ints. Each int specifies the - number of output channels of each Inverted Residual module. - Default: (64, 96, 128) - out_channels(int): Number of output channels of the GFE module. - Default: 128 - expand_ratio (int): Adjusts number of channels of the hidden layer - in InvertedResidual by this amount. - Default: 6 - num_blocks (tuple[int]): Tuple of ints. Each int specifies the - number of times each Inverted Residual module is repeated. - The repeated Inverted Residual modules are called a 'group'. - Default: (3, 3, 3) - strides (tuple[int]): Tuple of ints. Each int specifies - the downsampling factor of each 'group'. - Default: (2, 2, 1) - pool_scales (tuple[int]): Tuple of ints. Each int specifies - the parameter required in 'global average pooling' within PPM. - Default: (1, 2, 3, 6) - conv_cfg (dict | None): Config of conv layers. Default: None - norm_cfg (dict | None): Config of norm layers. Default: - dict(type='BN') - act_cfg (dict): Config of activation layers. Default: - dict(type='ReLU') - align_corners (bool): align_corners argument of F.interpolate. - Default: False - """ - - def __init__( - self, - in_channels=64, - block_channels=(64, 96, 128), - out_channels=128, - expand_ratio=6, - num_blocks=(3, 3, 3), - strides=(2, 2, 1), - pool_scales=(1, 2, 3, 6), - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False, - ): - super(GlobalFeatureExtractor, self).__init__() - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - assert len(block_channels) == len(num_blocks) == 3 - self.bottleneck1 = self._make_layer(in_channels, block_channels[0], num_blocks[0], strides[0], expand_ratio) - self.bottleneck2 = self._make_layer( - block_channels[0], block_channels[1], num_blocks[1], strides[1], expand_ratio - ) - self.bottleneck3 = self._make_layer( - block_channels[1], block_channels[2], num_blocks[2], strides[2], expand_ratio - ) - self.ppm = PPM( - pool_scales, - block_channels[2], - block_channels[2] // 4, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - align_corners=align_corners, - ) - self.out = ConvModule( - block_channels[2] * 2, - out_channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def _make_layer(self, in_channels, out_channels, blocks, stride=1, expand_ratio=6): - layers = [InvertedResidual(in_channels, out_channels, stride, expand_ratio, norm_cfg=self.norm_cfg)] - for i in range(1, blocks): - layers.append(InvertedResidual(out_channels, out_channels, 1, expand_ratio, norm_cfg=self.norm_cfg)) - return nn.Sequential(*layers) - - def forward(self, x): - x = self.bottleneck1(x) - x = self.bottleneck2(x) - x = self.bottleneck3(x) - x = torch.cat([x, *self.ppm(x)], dim=1) - x = self.out(x) - return x - - -class FeatureFusionModule(nn.Module): - """Feature fusion module. - - Args: - higher_in_channels (int): Number of input channels of the - higher-resolution branch. - lower_in_channels (int): Number of input channels of the - lower-resolution branch. - out_channels (int): Number of output channels. - conv_cfg (dict | None): Config of conv layers. Default: None - norm_cfg (dict | None): Config of norm layers. Default: - dict(type='BN') - act_cfg (dict): Config of activation layers. Default: - dict(type='ReLU') - align_corners (bool): align_corners argument of F.interpolate. - Default: False - """ - - def __init__( - self, - higher_in_channels, - lower_in_channels, - out_channels, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False, - ): - super(FeatureFusionModule, self).__init__() - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.align_corners = align_corners - self.dwconv = ConvModule( - lower_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - self.conv_lower_res = ConvModule( - out_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None - ) - self.conv_higher_res = ConvModule( - higher_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None - ) - self.relu = nn.ReLU(True) - - def forward(self, higher_res_feature, lower_res_feature): - lower_res_feature = resize( - lower_res_feature, size=higher_res_feature.size()[2:], mode='bilinear', align_corners=self.align_corners - ) - lower_res_feature = self.dwconv(lower_res_feature) - lower_res_feature = self.conv_lower_res(lower_res_feature) - - higher_res_feature = self.conv_higher_res(higher_res_feature) - out = higher_res_feature + lower_res_feature - return self.relu(out) - - -@BACKBONES.register_module() -class FastSCNN(nn.Module): - """Fast-SCNN Backbone. - - Args: - in_channels (int): Number of input image channels. Default: 3. - downsample_dw_channels (tuple[int]): Number of output channels after - the first conv layer & the second conv layer in - Learning-To-Downsample (LTD) module. - Default: (32, 48). - global_in_channels (int): Number of input channels of - Global Feature Extractor(GFE). - Equal to number of output channels of LTD. - Default: 64. - global_block_channels (tuple[int]): Tuple of integers that describe - the output channels for each of the MobileNet-v2 bottleneck - residual blocks in GFE. - Default: (64, 96, 128). - global_block_strides (tuple[int]): Tuple of integers - that describe the strides (downsampling factors) for each of the - MobileNet-v2 bottleneck residual blocks in GFE. - Default: (2, 2, 1). - global_out_channels (int): Number of output channels of GFE. - Default: 128. - higher_in_channels (int): Number of input channels of the higher - resolution branch in FFM. - Equal to global_in_channels. - Default: 64. - lower_in_channels (int): Number of input channels of the lower - resolution branch in FFM. - Equal to global_out_channels. - Default: 128. - fusion_out_channels (int): Number of output channels of FFM. - Default: 128. - out_indices (tuple): Tuple of indices of list - [higher_res_features, lower_res_features, fusion_output]. - Often set to (0,1,2) to enable aux. heads. - Default: (0, 1, 2). - conv_cfg (dict | None): Config of conv layers. Default: None - norm_cfg (dict | None): Config of norm layers. Default: - dict(type='BN') - act_cfg (dict): Config of activation layers. Default: - dict(type='ReLU') - align_corners (bool): align_corners argument of F.interpolate. - Default: False - """ - - def __init__( - self, - in_channels=3, - downsample_dw_channels=(32, 48), - global_in_channels=64, - global_block_channels=(64, 96, 128), - global_block_strides=(2, 2, 1), - global_out_channels=128, - higher_in_channels=64, - lower_in_channels=128, - fusion_out_channels=128, - out_indices=(0, 1, 2), - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - align_corners=False, - ): - - super(FastSCNN, self).__init__() - if global_in_channels != higher_in_channels: - raise AssertionError( - 'Global Input Channels must be the same \ - with Higher Input Channels!' - ) - elif global_out_channels != lower_in_channels: - raise AssertionError( - 'Global Output Channels must be the same \ - with Lower Input Channels!' - ) - - self.in_channels = in_channels - self.downsample_dw_channels1 = downsample_dw_channels[0] - self.downsample_dw_channels2 = downsample_dw_channels[1] - self.global_in_channels = global_in_channels - self.global_block_channels = global_block_channels - self.global_block_strides = global_block_strides - self.global_out_channels = global_out_channels - self.higher_in_channels = higher_in_channels - self.lower_in_channels = lower_in_channels - self.fusion_out_channels = fusion_out_channels - self.out_indices = out_indices - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.align_corners = align_corners - self.learning_to_downsample = LearningToDownsample( - in_channels, - downsample_dw_channels, - global_in_channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.global_feature_extractor = GlobalFeatureExtractor( - global_in_channels, - global_block_channels, - global_out_channels, - strides=self.global_block_strides, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - align_corners=self.align_corners, - ) - self.feature_fusion = FeatureFusionModule( - higher_in_channels, - lower_in_channels, - fusion_out_channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - align_corners=self.align_corners, - ) - - def init_weights(self, pretrained=None): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - - def forward(self, x): - higher_res_features = self.learning_to_downsample(x) - lower_res_features = self.global_feature_extractor(higher_res_features) - fusion_output = self.feature_fusion(higher_res_features, lower_res_features) - - outs = [higher_res_features, lower_res_features, fusion_output] - outs = [outs[i] for i in self.out_indices] - return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py deleted file mode 100644 index 03dd604869ea..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py +++ /dev/null @@ -1,536 +0,0 @@ -import torch.nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - build_conv_layer, - build_norm_layer, - constant_init, - kaiming_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Upsample, resize -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES -from .resnet import BasicBlock, Bottleneck - - -class HRModule(nn.Module): - """High-Resolution Module for HRNet. - - In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange - is in this module. - """ - - def __init__( - self, - num_branches, - blocks, - num_blocks, - in_channels, - num_channels, - multiscale_output=True, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - ): - super(HRModule, self).__init__() - self._check_branches(num_branches, num_blocks, in_channels, num_channels) - - self.in_channels = in_channels - self.num_branches = num_branches - - self.multiscale_output = multiscale_output - self.norm_cfg = norm_cfg - self.conv_cfg = conv_cfg - self.with_cp = with_cp - self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) - self.fuse_layers = self._make_fuse_layers() - self.relu = nn.ReLU(inplace=False) - - def _check_branches(self, num_branches, num_blocks, in_channels, num_channels): - """Check branches configuration.""" - if num_branches != len(num_blocks): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' f'{len(num_blocks)})' - raise ValueError(error_msg) - - if num_branches != len(num_channels): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' f'{len(num_channels)})' - raise ValueError(error_msg) - - if num_branches != len(in_channels): - error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' f'{len(in_channels)})' - raise ValueError(error_msg) - - def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): - """Build one branch.""" - downsample = None - if stride != 1 or self.in_channels[branch_index] != num_channels[branch_index] * block.expansion: - downsample = nn.Sequential( - build_conv_layer( - self.conv_cfg, - self.in_channels[branch_index], - num_channels[branch_index] * block.expansion, - kernel_size=1, - stride=stride, - bias=False, - ), - build_norm_layer(self.norm_cfg, num_channels[branch_index] * block.expansion)[1], - ) - - layers = [] - layers.append( - block( - self.in_channels[branch_index], - num_channels[branch_index], - stride, - downsample=downsample, - with_cp=self.with_cp, - norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg, - ) - ) - self.in_channels[branch_index] = num_channels[branch_index] * block.expansion - for i in range(1, num_blocks[branch_index]): - layers.append( - block( - self.in_channels[branch_index], - num_channels[branch_index], - with_cp=self.with_cp, - norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg, - ) - ) - - return nn.Sequential(*layers) - - def _make_branches(self, num_branches, block, num_blocks, num_channels): - """Build multiple branch.""" - branches = [] - - for i in range(num_branches): - branches.append(self._make_one_branch(i, block, num_blocks, num_channels)) - - return nn.ModuleList(branches) - - def _make_fuse_layers(self): - """Build fuse layer.""" - if self.num_branches == 1: - return None - - num_branches = self.num_branches - in_channels = self.in_channels - fuse_layers = [] - num_out_branches = num_branches if self.multiscale_output else 1 - for i in range(num_out_branches): - fuse_layer = [] - for j in range(num_branches): - if j > i: - fuse_layer.append( - nn.Sequential( - build_conv_layer( - self.conv_cfg, - in_channels[j], - in_channels[i], - kernel_size=1, - stride=1, - padding=0, - bias=False, - ), - build_norm_layer(self.norm_cfg, in_channels[i])[1], - # we set align_corners=False for HRNet - Upsample(scale_factor=2 ** (j - i), mode='bilinear', align_corners=False), - ) - ) - elif j == i: - fuse_layer.append(None) - else: - conv_downsamples = [] - for k in range(i - j): - if k == i - j - 1: - conv_downsamples.append( - nn.Sequential( - build_conv_layer( - self.conv_cfg, - in_channels[j], - in_channels[i], - kernel_size=3, - stride=2, - padding=1, - bias=False, - ), - build_norm_layer(self.norm_cfg, in_channels[i])[1], - ) - ) - else: - conv_downsamples.append( - nn.Sequential( - build_conv_layer( - self.conv_cfg, - in_channels[j], - in_channels[j], - kernel_size=3, - stride=2, - padding=1, - bias=False, - ), - build_norm_layer(self.norm_cfg, in_channels[j])[1], - nn.ReLU(inplace=False), - ) - ) - fuse_layer.append(nn.Sequential(*conv_downsamples)) - fuse_layers.append(nn.ModuleList(fuse_layer)) - - return nn.ModuleList(fuse_layers) - - def forward(self, x): - """Forward function.""" - if self.num_branches == 1: - return [self.branches[0](x[0])] - - for i in range(self.num_branches): - x[i] = self.branches[i](x[i]) - - x_fuse = [] - for i in range(len(self.fuse_layers)): - y = 0 - for j in range(self.num_branches): - if i == j: - y += x[j] - elif j > i: - y = y + resize( - self.fuse_layers[i][j](x[j]), size=x[i].shape[2:], mode='bilinear', align_corners=False - ) - else: - y += self.fuse_layers[i][j](x[j]) - x_fuse.append(self.relu(y)) - return x_fuse - - -@BACKBONES.register_module() -class HRNet(nn.Module): - """HRNet backbone. - - High-Resolution Representations for Labeling Pixels and Regions - arXiv: https://arxiv.org/abs/1904.04514 - - Args: - extra (dict): detailed configuration for each stage of HRNet. - in_channels (int): Number of input image channels. Normally 3. - conv_cfg (dict): dictionary to construct and config conv layer. - norm_cfg (dict): dictionary to construct and config norm layer. - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. - zero_init_residual (bool): whether to use zero init for last norm layer - in resblocks to let them behave as identity. - - Example: - >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import HRNet - >>> import torch - >>> extra = dict( - >>> stage1=dict( - >>> num_modules=1, - >>> num_branches=1, - >>> block='BOTTLENECK', - >>> num_blocks=(4, ), - >>> num_channels=(64, )), - >>> stage2=dict( - >>> num_modules=1, - >>> num_branches=2, - >>> block='BASIC', - >>> num_blocks=(4, 4), - >>> num_channels=(32, 64)), - >>> stage3=dict( - >>> num_modules=4, - >>> num_branches=3, - >>> block='BASIC', - >>> num_blocks=(4, 4, 4), - >>> num_channels=(32, 64, 128)), - >>> stage4=dict( - >>> num_modules=3, - >>> num_branches=4, - >>> block='BASIC', - >>> num_blocks=(4, 4, 4, 4), - >>> num_channels=(32, 64, 128, 256))) - >>> self = HRNet(extra, in_channels=1) - >>> self.eval() - >>> inputs = torch.rand(1, 1, 32, 32) - >>> level_outputs = self.forward(inputs) - >>> for level_out in level_outputs: - ... print(tuple(level_out.shape)) - (1, 32, 8, 8) - (1, 64, 4, 4) - (1, 128, 2, 2) - (1, 256, 1, 1) - """ - - blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} - - def __init__( - self, - extra, - in_channels=3, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=False, - with_cp=False, - zero_init_residual=False, - ): - super(HRNet, self).__init__() - self.extra = extra - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.norm_eval = norm_eval - self.with_cp = with_cp - self.zero_init_residual = zero_init_residual - - # stem net - self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) - self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) - - self.conv1 = build_conv_layer(self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) - - self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer(self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) - - self.add_module(self.norm2_name, norm2) - self.relu = nn.ReLU(inplace=True) - - # stage 1 - self.stage1_cfg = self.extra['stage1'] - num_channels = self.stage1_cfg['num_channels'][0] - block_type = self.stage1_cfg['block'] - num_blocks = self.stage1_cfg['num_blocks'][0] - - block = self.blocks_dict[block_type] - stage1_out_channels = num_channels * block.expansion - self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) - - # stage 2 - self.stage2_cfg = self.extra['stage2'] - num_channels = self.stage2_cfg['num_channels'] - block_type = self.stage2_cfg['block'] - - block = self.blocks_dict[block_type] - num_channels = [channel * block.expansion for channel in num_channels] - self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) - self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) - - # stage 3 - self.stage3_cfg = self.extra['stage3'] - num_channels = self.stage3_cfg['num_channels'] - block_type = self.stage3_cfg['block'] - - block = self.blocks_dict[block_type] - num_channels = [channel * block.expansion for channel in num_channels] - self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) - self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) - - # stage 4 - self.stage4_cfg = self.extra['stage4'] - num_channels = self.stage4_cfg['num_channels'] - block_type = self.stage4_cfg['block'] - - block = self.blocks_dict[block_type] - num_channels = [channel * block.expansion for channel in num_channels] - self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) - self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels) - - @property - def norm1(self): - """nn.Module: the normalization layer named "norm1" """ - return getattr(self, self.norm1_name) - - @property - def norm2(self): - """nn.Module: the normalization layer named "norm2" """ - return getattr(self, self.norm2_name) - - def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): - """Make transition layer.""" - num_branches_cur = len(num_channels_cur_layer) - num_branches_pre = len(num_channels_pre_layer) - - transition_layers = [] - for i in range(num_branches_cur): - if i < num_branches_pre: - if num_channels_cur_layer[i] != num_channels_pre_layer[i]: - transition_layers.append( - nn.Sequential( - build_conv_layer( - self.conv_cfg, - num_channels_pre_layer[i], - num_channels_cur_layer[i], - kernel_size=3, - stride=1, - padding=1, - bias=False, - ), - build_norm_layer(self.norm_cfg, num_channels_cur_layer[i])[1], - nn.ReLU(inplace=True), - ) - ) - else: - transition_layers.append(None) - else: - conv_downsamples = [] - for j in range(i + 1 - num_branches_pre): - in_channels = num_channels_pre_layer[-1] - out_channels = num_channels_cur_layer[i] if j == i - num_branches_pre else in_channels - conv_downsamples.append( - nn.Sequential( - build_conv_layer( - self.conv_cfg, - in_channels, - out_channels, - kernel_size=3, - stride=2, - padding=1, - bias=False, - ), - build_norm_layer(self.norm_cfg, out_channels)[1], - nn.ReLU(inplace=True), - ) - ) - transition_layers.append(nn.Sequential(*conv_downsamples)) - - return nn.ModuleList(transition_layers) - - def _make_layer(self, block, inplanes, planes, blocks, stride=1): - """Make each layer.""" - downsample = None - if stride != 1 or inplanes != planes * block.expansion: - downsample = nn.Sequential( - build_conv_layer( - self.conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False - ), - build_norm_layer(self.norm_cfg, planes * block.expansion)[1], - ) - - layers = [] - layers.append( - block( - inplanes, - planes, - stride, - downsample=downsample, - with_cp=self.with_cp, - norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg, - ) - ) - inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append( - block(inplanes, planes, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg) - ) - - return nn.Sequential(*layers) - - def _make_stage(self, layer_config, in_channels, multiscale_output=True): - """Make each stage.""" - num_modules = layer_config['num_modules'] - num_branches = layer_config['num_branches'] - num_blocks = layer_config['num_blocks'] - num_channels = layer_config['num_channels'] - block = self.blocks_dict[layer_config['block']] - - hr_modules = [] - for i in range(num_modules): - # multi_scale_output is only used for the last module - if not multiscale_output and i == num_modules - 1: - reset_multiscale_output = False - else: - reset_multiscale_output = True - - hr_modules.append( - HRModule( - num_branches, - block, - num_blocks, - in_channels, - num_channels, - reset_multiscale_output, - with_cp=self.with_cp, - norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg, - ) - ) - - return nn.Sequential(*hr_modules), in_channels - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if isinstance(pretrained, str): - logger = get_root_logger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - - if self.zero_init_residual: - for m in self.modules(): - if isinstance(m, Bottleneck): - constant_init(m.norm3, 0) - elif isinstance(m, BasicBlock): - constant_init(m.norm2, 0) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - """Forward function.""" - - x = self.conv1(x) - x = self.norm1(x) - x = self.relu(x) - x = self.conv2(x) - x = self.norm2(x) - x = self.relu(x) - x = self.layer1(x) - - x_list = [] - for i in range(self.stage2_cfg['num_branches']): - if self.transition1[i] is not None: - x_list.append(self.transition1[i](x)) - else: - x_list.append(x) - y_list = self.stage2(x_list) - - x_list = [] - for i in range(self.stage3_cfg['num_branches']): - if self.transition2[i] is not None: - x_list.append(self.transition2[i](y_list[-1])) - else: - x_list.append(y_list[i]) - y_list = self.stage3(x_list) - - x_list = [] - for i in range(self.stage4_cfg['num_branches']): - if self.transition3[i] is not None: - x_list.append(self.transition3[i](y_list[-1])) - else: - x_list.append(y_list[i]) - y_list = self.stage4(x_list) - - return y_list - - def train(self, mode=True): - """Convert the model into training mode will keeping the normalization - layer freezed.""" - super(HRNet, self).train(mode) - if mode and self.norm_eval: - for m in self.modules(): - # trick: eval have effect on BatchNorm only - if isinstance(m, _BatchNorm): - m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py deleted file mode 100644 index cda42da943f5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py +++ /dev/null @@ -1,183 +0,0 @@ -import logging - -import torch.nn as nn -from torch.nn.modules.batchnorm import _BatchNorm - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint - -from ..builder import BACKBONES -from ..utils import InvertedResidual, make_divisible - - -@BACKBONES.register_module() -class MobileNetV2(nn.Module): - """MobileNetV2 backbone. - - Args: - widen_factor (float): Width multiplier, multiply number of - channels in each layer by this amount. Default: 1.0. - strides (Sequence[int], optional): Strides of the first block of each - layer. If not specified, default config in ``arch_setting`` will - be used. - dilations (Sequence[int]): Dilation of each layer. - out_indices (None or Sequence[int]): Output from which stages. - Default: (7, ). - frozen_stages (int): Stages to be frozen (all param fixed). - Default: -1, which means not freezing any parameters. - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='ReLU6'). - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. Default: False. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - """ - - # Parameters to build layers. 3 parameters are needed to construct a - # layer, from left to right: expand_ratio, channel, num_blocks. - arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], [6, 96, 3], [6, 160, 3], [6, 320, 1]] - - def __init__( - self, - widen_factor=1.0, - strides=(1, 2, 2, 2, 1, 2, 1), - dilations=(1, 1, 1, 1, 1, 1, 1), - out_indices=(1, 2, 4, 6), - frozen_stages=-1, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU6'), - norm_eval=False, - with_cp=False, - ): - super(MobileNetV2, self).__init__() - self.widen_factor = widen_factor - self.strides = strides - self.dilations = dilations - assert len(strides) == len(dilations) == len(self.arch_settings) - self.out_indices = out_indices - for index in out_indices: - if index not in range(0, 7): - raise ValueError('the item in out_indices must in ' f'range(0, 8). But received {index}') - - if frozen_stages not in range(-1, 7): - raise ValueError('frozen_stages must be in range(-1, 7). ' f'But received {frozen_stages}') - self.out_indices = out_indices - self.frozen_stages = frozen_stages - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.norm_eval = norm_eval - self.with_cp = with_cp - - self.in_channels = make_divisible(32 * widen_factor, 8) - - self.conv1 = ConvModule( - in_channels=3, - out_channels=self.in_channels, - kernel_size=3, - stride=2, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - self.layers = [] - - for i, layer_cfg in enumerate(self.arch_settings): - expand_ratio, channel, num_blocks = layer_cfg - stride = self.strides[i] - dilation = self.dilations[i] - out_channels = make_divisible(channel * widen_factor, 8) - inverted_res_layer = self.make_layer( - out_channels=out_channels, - num_blocks=num_blocks, - stride=stride, - dilation=dilation, - expand_ratio=expand_ratio, - ) - layer_name = f'layer{i + 1}' - self.add_module(layer_name, inverted_res_layer) - self.layers.append(layer_name) - - def make_layer(self, out_channels, num_blocks, stride, dilation, expand_ratio): - """Stack InvertedResidual blocks to build a layer for MobileNetV2. - - Args: - out_channels (int): out_channels of block. - num_blocks (int): Number of blocks. - stride (int): Stride of the first block. - dilation (int): Dilation of the first block. - expand_ratio (int): Expand the number of channels of the - hidden layer in InvertedResidual by this ratio. - """ - layers = [] - for i in range(num_blocks): - layers.append( - InvertedResidual( - self.in_channels, - out_channels, - stride if i == 0 else 1, - expand_ratio=expand_ratio, - dilation=dilation if i == 0 else 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - with_cp=self.with_cp, - ) - ) - self.in_channels = out_channels - - return nn.Sequential(*layers) - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = logging.getLogger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - x = self.conv1(x) - - outs = [] - for i, layer_name in enumerate(self.layers): - layer = getattr(self, layer_name) - x = layer(x) - if i in self.out_indices: - outs.append(x) - - if len(outs) == 1: - return outs[0] - else: - return tuple(outs) - - def _freeze_stages(self): - if self.frozen_stages >= 0: - for param in self.conv1.parameters(): - param.requires_grad = False - for i in range(1, self.frozen_stages + 1): - layer = getattr(self, f'layer{i}') - layer.eval() - for param in layer.parameters(): - param.requires_grad = False - - def train(self, mode=True): - super(MobileNetV2, self).train(mode) - self._freeze_stages() - if mode and self.norm_eval: - for m in self.modules(): - if isinstance(m, _BatchNorm): - m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py deleted file mode 100644 index 11c665237e1c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py +++ /dev/null @@ -1,267 +0,0 @@ -import logging - -import torch.nn as nn -from torch.nn.modules.batchnorm import _BatchNorm - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks import Conv2dAdaptivePadding -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint - -from ..builder import BACKBONES -from ..utils import InvertedResidualV3 as InvertedResidual - - -@BACKBONES.register_module() -class MobileNetV3(nn.Module): - """MobileNetV3 backbone. - - This backbone is the improved implementation of `Searching for MobileNetV3 - `_. - - Args: - arch (str): Architecture of mobilnetv3, from {'small', 'large'}. - Default: 'small'. - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN'). - out_indices (tuple[int]): Output from which layer. - Default: (0, 1, 12). - frozen_stages (int): Stages to be frozen (all param fixed). - Default: -1, which means not freezing any parameters. - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. Default: False. - with_cp (bool): Use checkpoint or not. Using checkpoint will save - some memory while slowing down the training speed. - Default: False. - """ - - # Parameters to build each block: - # [kernel size, mid channels, out channels, with_se, act type, stride] - arch_settings = { - 'small': [ - [3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 - [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 - [3, 88, 24, False, 'ReLU', 1], - [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 - [5, 240, 40, True, 'HSwish', 1], - [5, 240, 40, True, 'HSwish', 1], - [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 - [5, 144, 48, True, 'HSwish', 1], - [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 - [5, 576, 96, True, 'HSwish', 1], - [5, 576, 96, True, 'HSwish', 1], - ], - 'large': [ - [3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 - [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 - [3, 72, 24, False, 'ReLU', 1], - [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 - [5, 120, 40, True, 'ReLU', 1], - [5, 120, 40, True, 'ReLU', 1], - [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 - [3, 200, 80, False, 'HSwish', 1], - [3, 184, 80, False, 'HSwish', 1], - [3, 184, 80, False, 'HSwish', 1], - [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 - [3, 672, 112, True, 'HSwish', 1], - [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 - [5, 960, 160, True, 'HSwish', 1], - [5, 960, 160, True, 'HSwish', 1], - ], - } # yapf: disable - - def __init__( - self, - arch='small', - conv_cfg=None, - norm_cfg=dict(type='BN'), - out_indices=(0, 1, 12), - frozen_stages=-1, - reduction_factor=1, - norm_eval=False, - with_cp=False, - ): - super(MobileNetV3, self).__init__() - assert arch in self.arch_settings - assert isinstance(reduction_factor, int) and reduction_factor > 0 - assert mmcv.is_tuple_of(out_indices, int) - for index in out_indices: - if index not in range(0, len(self.arch_settings[arch]) + 2): - raise ValueError( - 'the item in out_indices must in ' - f'range(0, {len(self.arch_settings[arch])+2}). ' - f'But received {index}' - ) - - if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): - raise ValueError( - 'frozen_stages must be in range(-1, ' - f'{len(self.arch_settings[arch])+2}). ' - f'But received {frozen_stages}' - ) - self.arch = arch - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.out_indices = out_indices - self.frozen_stages = frozen_stages - self.reduction_factor = reduction_factor - self.norm_eval = norm_eval - self.with_cp = with_cp - self.layers = self._make_layer() - - def _make_layer(self): - layers = [] - - # build the first layer (layer0) - in_channels = 16 - layer = ConvModule( - in_channels=3, - out_channels=in_channels, - kernel_size=3, - stride=2, - padding=1, - conv_cfg=dict(type='Conv2dAdaptivePadding'), - norm_cfg=self.norm_cfg, - act_cfg=dict(type='HSwish'), - ) - self.add_module('layer0', layer) - layers.append('layer0') - - layer_setting = self.arch_settings[self.arch] - for i, params in enumerate(layer_setting): - (kernel_size, mid_channels, out_channels, with_se, act, stride) = params - - if self.arch == 'large' and i >= 12 or self.arch == 'small' and i >= 8: - mid_channels = mid_channels // self.reduction_factor - out_channels = out_channels // self.reduction_factor - - if with_se: - se_cfg = dict( - channels=mid_channels, - ratio=4, - act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), - ) - else: - se_cfg = None - - layer = InvertedResidual( - in_channels=in_channels, - out_channels=out_channels, - mid_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - se_cfg=se_cfg, - with_expand_conv=(in_channels != mid_channels), - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=dict(type=act), - with_cp=self.with_cp, - ) - in_channels = out_channels - layer_name = 'layer{}'.format(i + 1) - self.add_module(layer_name, layer) - layers.append(layer_name) - - # build the last layer - # block5 layer12 os=32 for small model - # block6 layer16 os=32 for large model - layer = ConvModule( - in_channels=in_channels, - out_channels=576 if self.arch == 'small' else 960, - kernel_size=1, - stride=1, - dilation=4, - padding=0, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=dict(type='HSwish'), - ) - layer_name = 'layer{}'.format(len(layer_setting) + 1) - self.add_module(layer_name, layer) - layers.append(layer_name) - - # next, convert backbone MobileNetV3 to a semantic segmentation version - if self.arch == 'small': - self.layer4.depthwise_conv.conv.stride = (1, 1) - self.layer9.depthwise_conv.conv.stride = (1, 1) - for i in range(4, len(layers)): - layer = getattr(self, layers[i]) - if isinstance(layer, InvertedResidual): - modified_module = layer.depthwise_conv.conv - else: - modified_module = layer.conv - - if i < 9: - modified_module.dilation = (2, 2) - pad = 2 - else: - modified_module.dilation = (4, 4) - pad = 4 - - if not isinstance(modified_module, Conv2dAdaptivePadding): - # Adjust padding - pad *= (modified_module.kernel_size[0] - 1) // 2 - modified_module.padding = (pad, pad) - else: - self.layer7.depthwise_conv.conv.stride = (1, 1) - self.layer13.depthwise_conv.conv.stride = (1, 1) - for i in range(7, len(layers)): - layer = getattr(self, layers[i]) - if isinstance(layer, InvertedResidual): - modified_module = layer.depthwise_conv.conv - else: - modified_module = layer.conv - - if i < 13: - modified_module.dilation = (2, 2) - pad = 2 - else: - modified_module.dilation = (4, 4) - pad = 4 - - if not isinstance(modified_module, Conv2dAdaptivePadding): - # Adjust padding - pad *= (modified_module.kernel_size[0] - 1) // 2 - modified_module.padding = (pad, pad) - - return layers - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = logging.getLogger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, nn.BatchNorm2d): - constant_init(m, 1) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - outs = [] - for i, layer_name in enumerate(self.layers): - layer = getattr(self, layer_name) - x = layer(x) - if i in self.out_indices: - outs.append(x) - return outs - - def _freeze_stages(self): - for i in range(self.frozen_stages + 1): - layer = getattr(self, f'layer{i}') - layer.eval() - for param in layer.parameters(): - param.requires_grad = False - - def train(self, mode=True): - super(MobileNetV3, self).train(mode) - self._freeze_stages() - if mode and self.norm_eval: - for m in self.modules(): - if isinstance(m, _BatchNorm): - m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py deleted file mode 100644 index 83915384db3a..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py +++ /dev/null @@ -1,299 +0,0 @@ -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.checkpoint as cp -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer - -from ..builder import BACKBONES -from ..utils import ResLayer -from .resnet import Bottleneck as _Bottleneck -from .resnet import ResNetV1d - - -class RSoftmax(nn.Module): - """Radix Softmax module in ``SplitAttentionConv2d``. - - Args: - radix (int): Radix of input. - groups (int): Groups of input. - """ - - def __init__(self, radix, groups): - super().__init__() - self.radix = radix - self.groups = groups - - def forward(self, x): - batch = x.size(0) - if self.radix > 1: - x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) - x = F.softmax(x, dim=1) - x = x.reshape(batch, -1) - else: - x = torch.sigmoid(x) - return x - - -class SplitAttentionConv2d(nn.Module): - """Split-Attention Conv2d in ResNeSt. - - Args: - in_channels (int): Same as nn.Conv2d. - out_channels (int): Same as nn.Conv2d. - kernel_size (int | tuple[int]): Same as nn.Conv2d. - stride (int | tuple[int]): Same as nn.Conv2d. - padding (int | tuple[int]): Same as nn.Conv2d. - dilation (int | tuple[int]): Same as nn.Conv2d. - groups (int): Same as nn.Conv2d. - radix (int): Radix of SpltAtConv2d. Default: 2 - reduction_factor (int): Reduction factor of inter_channels. Default: 4. - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. Default: None. - dcn (dict): Config dict for DCN. Default: None. - """ - - def __init__( - self, - in_channels, - channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - radix=2, - reduction_factor=4, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None, - ): - super(SplitAttentionConv2d, self).__init__() - inter_channels = max(in_channels * radix // reduction_factor, 32) - self.radix = radix - self.groups = groups - self.channels = channels - self.with_dcn = dcn is not None - self.dcn = dcn - fallback_on_stride = False - if self.with_dcn: - fallback_on_stride = self.dcn.pop('fallback_on_stride', False) - if self.with_dcn and not fallback_on_stride: - assert conv_cfg is None, 'conv_cfg must be None for DCN' - conv_cfg = dcn - self.conv = build_conv_layer( - conv_cfg, - in_channels, - channels * radix, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups * radix, - bias=False, - ) - self.norm0_name, norm0 = build_norm_layer(norm_cfg, channels * radix, postfix=0) - self.add_module(self.norm0_name, norm0) - self.relu = nn.ReLU(inplace=True) - self.fc1 = build_conv_layer(None, channels, inter_channels, 1, groups=self.groups) - self.norm1_name, norm1 = build_norm_layer(norm_cfg, inter_channels, postfix=1) - self.add_module(self.norm1_name, norm1) - self.fc2 = build_conv_layer(None, inter_channels, channels * radix, 1, groups=self.groups) - self.rsoftmax = RSoftmax(radix, groups) - - @property - def norm0(self): - """nn.Module: the normalization layer named "norm0" """ - return getattr(self, self.norm0_name) - - @property - def norm1(self): - """nn.Module: the normalization layer named "norm1" """ - return getattr(self, self.norm1_name) - - def forward(self, x): - x = self.conv(x) - x = self.norm0(x) - x = self.relu(x) - - batch, rchannel = x.shape[:2] - batch = x.size(0) - if self.radix > 1: - splits = x.view(batch, self.radix, -1, *x.shape[2:]) - gap = splits.sum(dim=1) - else: - gap = x - gap = F.adaptive_avg_pool2d(gap, 1) - gap = self.fc1(gap) - - gap = self.norm1(gap) - gap = self.relu(gap) - - atten = self.fc2(gap) - atten = self.rsoftmax(atten).view(batch, -1, 1, 1) - - if self.radix > 1: - attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) - out = torch.sum(attens * splits, dim=1) - else: - out = atten * x - return out.contiguous() - - -class Bottleneck(_Bottleneck): - """Bottleneck block for ResNeSt. - - Args: - inplane (int): Input planes of this block. - planes (int): Middle planes of this block. - groups (int): Groups of conv2. - width_per_group (int): Width per group of conv2. 64x4d indicates - ``groups=64, width_per_group=4`` and 32x8d indicates - ``groups=32, width_per_group=8``. - radix (int): Radix of SpltAtConv2d. Default: 2 - reduction_factor (int): Reduction factor of inter_channels in - SplitAttentionConv2d. Default: 4. - avg_down_stride (bool): Whether to use average pool for stride in - Bottleneck. Default: True. - kwargs (dict): Key word arguments for base class. - """ - - expansion = 4 - - def __init__( - self, - inplanes, - planes, - groups=1, - base_width=4, - base_channels=64, - radix=2, - reduction_factor=4, - avg_down_stride=True, - **kwargs - ): - """Bottleneck block for ResNeSt.""" - super(Bottleneck, self).__init__(inplanes, planes, **kwargs) - - if groups == 1: - width = self.planes - else: - width = math.floor(self.planes * (base_width / base_channels)) * groups - - self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 - - self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) - self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) - - self.conv1 = build_conv_layer( - self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False - ) - self.add_module(self.norm1_name, norm1) - self.with_modulated_dcn = False - self.conv2 = SplitAttentionConv2d( - width, - width, - kernel_size=3, - stride=1 if self.avg_down_stride else self.conv2_stride, - padding=self.dilation, - dilation=self.dilation, - groups=groups, - radix=radix, - reduction_factor=reduction_factor, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - dcn=self.dcn, - ) - delattr(self, self.norm2_name) - - if self.avg_down_stride: - self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) - - self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) - self.add_module(self.norm3_name, norm3) - - def forward(self, x): - def _inner_forward(x): - identity = x - - out = self.conv1(x) - out = self.norm1(out) - out = self.relu(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv1_plugin_names) - - out = self.conv2(out) - - if self.avg_down_stride: - out = self.avd_layer(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv2_plugin_names) - - out = self.conv3(out) - out = self.norm3(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv3_plugin_names) - - if self.downsample is not None: - identity = self.downsample(x) - - out += identity - - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - out = self.relu(out) - - return out - - -@BACKBONES.register_module() -class ResNeSt(ResNetV1d): - """ResNeSt backbone. - - Args: - groups (int): Number of groups of Bottleneck. Default: 1 - base_width (int): Base width of Bottleneck. Default: 4 - radix (int): Radix of SpltAtConv2d. Default: 2 - reduction_factor (int): Reduction factor of inter_channels in - SplitAttentionConv2d. Default: 4. - avg_down_stride (bool): Whether to use average pool for stride in - Bottleneck. Default: True. - kwargs (dict): Keyword arguments for ResNet. - """ - - arch_settings = { - 50: (Bottleneck, (3, 4, 6, 3)), - 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)), - 200: (Bottleneck, (3, 24, 36, 3)), - } - - def __init__(self, groups=1, base_width=4, radix=2, reduction_factor=4, avg_down_stride=True, **kwargs): - self.groups = groups - self.base_width = base_width - self.radix = radix - self.reduction_factor = reduction_factor - self.avg_down_stride = avg_down_stride - super(ResNeSt, self).__init__(**kwargs) - - def make_res_layer(self, **kwargs): - """Pack all blocks in a stage into a ``ResLayer``.""" - return ResLayer( - groups=self.groups, - base_width=self.base_width, - base_channels=self.base_channels, - radix=self.radix, - reduction_factor=self.reduction_factor, - avg_down_stride=self.avg_down_stride, - **kwargs - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py deleted file mode 100644 index 8b418aad171d..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py +++ /dev/null @@ -1,650 +0,0 @@ -import torch.nn as nn -import torch.utils.checkpoint as cp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - build_conv_layer, - build_norm_layer, - build_plugin_layer, - constant_init, - kaiming_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES -from ..utils import ResLayer - - -class BasicBlock(nn.Module): - """Basic block for ResNet.""" - - expansion = 1 - - def __init__( - self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None, - plugins=None, - ): - super(BasicBlock, self).__init__() - assert dcn is None, 'Not implemented yet.' - assert plugins is None, 'Not implemented yet.' - - self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) - self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) - - self.conv1 = build_conv_layer( - conv_cfg, inplanes, planes, 3, stride=stride, padding=dilation, dilation=dilation, bias=False - ) - self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer(conv_cfg, planes, planes, 3, padding=1, bias=False) - self.add_module(self.norm2_name, norm2) - - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - self.dilation = dilation - self.with_cp = with_cp - - @property - def norm1(self): - """nn.Module: normalization layer after the first convolution layer""" - return getattr(self, self.norm1_name) - - @property - def norm2(self): - """nn.Module: normalization layer after the second convolution layer""" - return getattr(self, self.norm2_name) - - def forward(self, x): - """Forward function.""" - - def _inner_forward(x): - identity = x - - out = self.conv1(x) - out = self.norm1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.norm2(out) - - if self.downsample is not None: - identity = self.downsample(x) - - out += identity - - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - """Bottleneck block for ResNet. - - If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is - "caffe", the stride-two layer is the first 1x1 conv layer. - """ - - expansion = 4 - - def __init__( - self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - style='pytorch', - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - dcn=None, - plugins=None, - ): - super(Bottleneck, self).__init__() - assert style in ['pytorch', 'caffe'] - assert dcn is None or isinstance(dcn, dict) - assert plugins is None or isinstance(plugins, list) - if plugins is not None: - allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] - assert all(p['position'] in allowed_position for p in plugins) - - self.inplanes = inplanes - self.planes = planes - self.stride = stride - self.dilation = dilation - self.style = style - self.with_cp = with_cp - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.dcn = dcn - self.with_dcn = dcn is not None - self.plugins = plugins - self.with_plugins = plugins is not None - - if self.with_plugins: - # collect plugins for conv1/conv2/conv3 - self.after_conv1_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv1'] - self.after_conv2_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv2'] - self.after_conv3_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv3'] - - if self.style == 'pytorch': - self.conv1_stride = 1 - self.conv2_stride = stride - else: - self.conv1_stride = stride - self.conv2_stride = 1 - - self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) - self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) - self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) - - self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) - self.add_module(self.norm1_name, norm1) - fallback_on_stride = False - if self.with_dcn: - fallback_on_stride = dcn.pop('fallback_on_stride', False) - if not self.with_dcn or fallback_on_stride: - self.conv2 = build_conv_layer( - conv_cfg, - planes, - planes, - kernel_size=3, - stride=self.conv2_stride, - padding=dilation, - dilation=dilation, - bias=False, - ) - else: - assert self.conv_cfg is None, 'conv_cfg must be None for DCN' - self.conv2 = build_conv_layer( - dcn, - planes, - planes, - kernel_size=3, - stride=self.conv2_stride, - padding=dilation, - dilation=dilation, - bias=False, - ) - - self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) - self.add_module(self.norm3_name, norm3) - - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - - if self.with_plugins: - self.after_conv1_plugin_names = self.make_block_plugins(planes, self.after_conv1_plugins) - self.after_conv2_plugin_names = self.make_block_plugins(planes, self.after_conv2_plugins) - self.after_conv3_plugin_names = self.make_block_plugins(planes * self.expansion, self.after_conv3_plugins) - - def make_block_plugins(self, in_channels, plugins): - """make plugins for block. - - Args: - in_channels (int): Input channels of plugin. - plugins (list[dict]): List of plugins cfg to build. - - Returns: - list[str]: List of the names of plugin. - """ - assert isinstance(plugins, list) - plugin_names = [] - for plugin in plugins: - plugin = plugin.copy() - name, layer = build_plugin_layer(plugin, in_channels=in_channels, postfix=plugin.pop('postfix', '')) - assert not hasattr(self, name), f'duplicate plugin {name}' - self.add_module(name, layer) - plugin_names.append(name) - return plugin_names - - def forward_plugin(self, x, plugin_names): - """Forward function for plugins.""" - out = x - for name in plugin_names: - out = getattr(self, name)(x) - return out - - @property - def norm1(self): - """nn.Module: normalization layer after the first convolution layer""" - return getattr(self, self.norm1_name) - - @property - def norm2(self): - """nn.Module: normalization layer after the second convolution layer""" - return getattr(self, self.norm2_name) - - @property - def norm3(self): - """nn.Module: normalization layer after the third convolution layer""" - return getattr(self, self.norm3_name) - - def forward(self, x): - """Forward function.""" - - def _inner_forward(x): - identity = x - - out = self.conv1(x) - out = self.norm1(out) - out = self.relu(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv1_plugin_names) - - out = self.conv2(out) - out = self.norm2(out) - out = self.relu(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv2_plugin_names) - - out = self.conv3(out) - out = self.norm3(out) - - if self.with_plugins: - out = self.forward_plugin(out, self.after_conv3_plugin_names) - - if self.downsample is not None: - identity = self.downsample(x) - - out += identity - - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - out = self.relu(out) - - return out - - -@BACKBONES.register_module() -class ResNet(nn.Module): - """ResNet backbone. - - Args: - depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. - in_channels (int): Number of input image channels. Default" 3. - stem_channels (int): Number of stem channels. Default: 64. - base_channels (int): Number of base channels of res layer. Default: 64. - num_stages (int): Resnet stages, normally 4. - strides (Sequence[int]): Strides of the first block of each stage. - dilations (Sequence[int]): Dilation of each stage. - out_indices (Sequence[int]): Output from which stages. - style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two - layer is the 3x3 conv layer, otherwise the stride-two layer is - the first 1x1 conv layer. - deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv - avg_down (bool): Use AvgPool instead of stride conv when - downsampling in the bottleneck. - frozen_stages (int): Stages to be frozen (stop grad and set eval mode). - -1 means not freezing any parameters. - norm_cfg (dict): Dictionary to construct and config norm layer. - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. - plugins (list[dict]): List of plugins for stages, each dict contains: - - - cfg (dict, required): Cfg dict to build plugin. - - - position (str, required): Position inside block to insert plugin, - options: 'after_conv1', 'after_conv2', 'after_conv3'. - - - stages (tuple[bool], optional): Stages to apply plugin, length - should be same as 'num_stages' - multi_grid (Sequence[int]|None): Multi grid dilation rates of last - stage. Default: None - contract_dilation (bool): Whether contract first dilation of each layer - Default: False - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. - zero_init_residual (bool): Whether to use zero init for last norm layer - in resblocks to let them behave as identity. - - Example: - >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNet - >>> import torch - >>> self = ResNet(depth=18) - >>> self.eval() - >>> inputs = torch.rand(1, 3, 32, 32) - >>> level_outputs = self.forward(inputs) - >>> for level_out in level_outputs: - ... print(tuple(level_out.shape)) - (1, 64, 8, 8) - (1, 128, 4, 4) - (1, 256, 2, 2) - (1, 512, 1, 1) - """ - - arch_settings = { - 18: (BasicBlock, (2, 2, 2, 2)), - 34: (BasicBlock, (3, 4, 6, 3)), - 50: (Bottleneck, (3, 4, 6, 3)), - 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)), - } - - def __init__( - self, - depth, - in_channels=3, - stem_channels=64, - base_channels=64, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 1, 1), - out_indices=(0, 1, 2, 3), - style='pytorch', - deep_stem=False, - avg_down=False, - frozen_stages=-1, - conv_cfg=None, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=False, - dcn=None, - stage_with_dcn=(False, False, False, False), - plugins=None, - multi_grid=None, - contract_dilation=False, - with_cp=False, - zero_init_residual=True, - ): - super(ResNet, self).__init__() - if depth not in self.arch_settings: - raise KeyError(f'invalid depth {depth} for resnet') - self.depth = depth - self.stem_channels = stem_channels - self.base_channels = base_channels - self.num_stages = num_stages - assert num_stages >= 1 and num_stages <= 4 - self.strides = strides - self.dilations = dilations - assert len(strides) == len(dilations) == num_stages - self.out_indices = out_indices - assert max(out_indices) < num_stages - self.style = style - self.deep_stem = deep_stem - self.avg_down = avg_down - self.frozen_stages = frozen_stages - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.with_cp = with_cp - self.norm_eval = norm_eval - self.dcn = dcn - self.stage_with_dcn = stage_with_dcn - if dcn is not None: - assert len(stage_with_dcn) == num_stages - self.plugins = plugins - self.multi_grid = multi_grid - self.contract_dilation = contract_dilation - self.zero_init_residual = zero_init_residual - self.block, stage_blocks = self.arch_settings[depth] - self.stage_blocks = stage_blocks[:num_stages] - self.inplanes = stem_channels - - self._make_stem_layer(in_channels, stem_channels) - - self.res_layers = [] - for i, num_blocks in enumerate(self.stage_blocks): - stride = strides[i] - dilation = dilations[i] - dcn = self.dcn if self.stage_with_dcn[i] else None - if plugins is not None: - stage_plugins = self.make_stage_plugins(plugins, i) - else: - stage_plugins = None - # multi grid is applied to last layer only - stage_multi_grid = multi_grid if i == len(self.stage_blocks) - 1 else None - planes = base_channels * 2 ** i - res_layer = self.make_res_layer( - block=self.block, - inplanes=self.inplanes, - planes=planes, - num_blocks=num_blocks, - stride=stride, - dilation=dilation, - style=self.style, - avg_down=self.avg_down, - with_cp=with_cp, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - dcn=dcn, - plugins=stage_plugins, - multi_grid=stage_multi_grid, - contract_dilation=contract_dilation, - ) - self.inplanes = planes * self.block.expansion - layer_name = f'layer{i+1}' - self.add_module(layer_name, res_layer) - self.res_layers.append(layer_name) - - self._freeze_stages() - - self.feat_dim = self.block.expansion * base_channels * 2 ** (len(self.stage_blocks) - 1) - - def make_stage_plugins(self, plugins, stage_idx): - """make plugins for ResNet 'stage_idx'th stage . - - Currently we support to insert 'context_block', - 'empirical_attention_block', 'nonlocal_block' into the backbone like - ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of - Bottleneck. - - An example of plugins format could be : - >>> plugins=[ - ... dict(cfg=dict(type='xxx', arg1='xxx'), - ... stages=(False, True, True, True), - ... position='after_conv2'), - ... dict(cfg=dict(type='yyy'), - ... stages=(True, True, True, True), - ... position='after_conv3'), - ... dict(cfg=dict(type='zzz', postfix='1'), - ... stages=(True, True, True, True), - ... position='after_conv3'), - ... dict(cfg=dict(type='zzz', postfix='2'), - ... stages=(True, True, True, True), - ... position='after_conv3') - ... ] - >>> self = ResNet(depth=18) - >>> stage_plugins = self.make_stage_plugins(plugins, 0) - >>> assert len(stage_plugins) == 3 - - Suppose 'stage_idx=0', the structure of blocks in the stage would be: - conv1-> conv2->conv3->yyy->zzz1->zzz2 - Suppose 'stage_idx=1', the structure of blocks in the stage would be: - conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 - - If stages is missing, the plugin would be applied to all stages. - - Args: - plugins (list[dict]): List of plugins cfg to build. The postfix is - required if multiple same type plugins are inserted. - stage_idx (int): Index of stage to build - - Returns: - list[dict]: Plugins for current stage - """ - stage_plugins = [] - for plugin in plugins: - plugin = plugin.copy() - stages = plugin.pop('stages', None) - assert stages is None or len(stages) == self.num_stages - # whether to insert plugin into current stage - if stages is None or stages[stage_idx]: - stage_plugins.append(plugin) - - return stage_plugins - - def make_res_layer(self, **kwargs): - """Pack all blocks in a stage into a ``ResLayer``.""" - return ResLayer(**kwargs) - - @property - def norm1(self): - """nn.Module: the normalization layer named "norm1" """ - return getattr(self, self.norm1_name) - - def _make_stem_layer(self, in_channels, stem_channels): - """Make stem layer for ResNet.""" - if self.deep_stem: - self.stem = nn.Sequential( - build_conv_layer( - self.conv_cfg, in_channels, stem_channels // 2, kernel_size=3, stride=2, padding=1, bias=False - ), - build_norm_layer(self.norm_cfg, stem_channels // 2)[1], - nn.ReLU(inplace=True), - build_conv_layer( - self.conv_cfg, - stem_channels // 2, - stem_channels // 2, - kernel_size=3, - stride=1, - padding=1, - bias=False, - ), - build_norm_layer(self.norm_cfg, stem_channels // 2)[1], - nn.ReLU(inplace=True), - build_conv_layer( - self.conv_cfg, stem_channels // 2, stem_channels, kernel_size=3, stride=1, padding=1, bias=False - ), - build_norm_layer(self.norm_cfg, stem_channels)[1], - nn.ReLU(inplace=True), - ) - else: - self.conv1 = build_conv_layer( - self.conv_cfg, in_channels, stem_channels, kernel_size=7, stride=2, padding=3, bias=False - ) - self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, stem_channels, postfix=1) - self.add_module(self.norm1_name, norm1) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - def _freeze_stages(self): - """Freeze stages param and norm stats.""" - if self.frozen_stages >= 0: - if self.deep_stem: - self.stem.eval() - for param in self.stem.parameters(): - param.requires_grad = False - else: - self.norm1.eval() - for m in [self.conv1, self.norm1]: - for param in m.parameters(): - param.requires_grad = False - - for i in range(1, self.frozen_stages + 1): - m = getattr(self, f'layer{i}') - m.eval() - for param in m.parameters(): - param.requires_grad = False - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if isinstance(pretrained, str): - logger = get_root_logger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - - if self.dcn is not None: - for m in self.modules(): - if isinstance(m, Bottleneck) and hasattr(m, 'conv2_offset'): - constant_init(m.conv2_offset, 0) - - if self.zero_init_residual: - for m in self.modules(): - if isinstance(m, Bottleneck): - constant_init(m.norm3, 0) - elif isinstance(m, BasicBlock): - constant_init(m.norm2, 0) - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - """Forward function.""" - if self.deep_stem: - x = self.stem(x) - else: - x = self.conv1(x) - x = self.norm1(x) - x = self.relu(x) - x = self.maxpool(x) - outs = [] - for i, layer_name in enumerate(self.res_layers): - res_layer = getattr(self, layer_name) - x = res_layer(x) - if i in self.out_indices: - outs.append(x) - return tuple(outs) - - def train(self, mode=True): - """Convert the model into training mode while keep normalization layer - freezed.""" - super(ResNet, self).train(mode) - self._freeze_stages() - if mode and self.norm_eval: - for m in self.modules(): - # trick: eval have effect on BatchNorm only - if isinstance(m, _BatchNorm): - m.eval() - - -@BACKBONES.register_module() -class ResNetV1c(ResNet): - """ResNetV1c variant described in [1]_. - - Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv - in the input stem with three 3x3 convs. - - References: - .. [1] https://arxiv.org/pdf/1812.01187.pdf - """ - - def __init__(self, **kwargs): - super(ResNetV1c, self).__init__(deep_stem=True, avg_down=False, **kwargs) - - -@BACKBONES.register_module() -class ResNetV1d(ResNet): - """ResNetV1d variant described in [1]_. - - Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in - the input stem with three 3x3 convs. And in the downsampling block, a 2x2 - avg_pool with stride 2 is added before conv, whose stride is changed to 1. - """ - - def __init__(self, **kwargs): - super(ResNetV1d, self).__init__(deep_stem=True, avg_down=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py deleted file mode 100644 index 5ee1d26d15a6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py +++ /dev/null @@ -1,124 +0,0 @@ -import math - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer - -from ..builder import BACKBONES -from ..utils import ResLayer -from .resnet import Bottleneck as _Bottleneck -from .resnet import ResNet - - -class Bottleneck(_Bottleneck): - """Bottleneck block for ResNeXt. - - If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is - "caffe", the stride-two layer is the first 1x1 conv layer. - """ - - def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, **kwargs): - super(Bottleneck, self).__init__(inplanes, planes, **kwargs) - - if groups == 1: - width = self.planes - else: - width = math.floor(self.planes * (base_width / base_channels)) * groups - - self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) - self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) - self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) - - self.conv1 = build_conv_layer( - self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False - ) - self.add_module(self.norm1_name, norm1) - fallback_on_stride = False - self.with_modulated_dcn = False - if self.with_dcn: - fallback_on_stride = self.dcn.pop('fallback_on_stride', False) - if not self.with_dcn or fallback_on_stride: - self.conv2 = build_conv_layer( - self.conv_cfg, - width, - width, - kernel_size=3, - stride=self.conv2_stride, - padding=self.dilation, - dilation=self.dilation, - groups=groups, - bias=False, - ) - else: - assert self.conv_cfg is None, 'conv_cfg must be None for DCN' - self.conv2 = build_conv_layer( - self.dcn, - width, - width, - kernel_size=3, - stride=self.conv2_stride, - padding=self.dilation, - dilation=self.dilation, - groups=groups, - bias=False, - ) - - self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) - self.add_module(self.norm3_name, norm3) - - -@BACKBONES.register_module() -class ResNeXt(ResNet): - """ResNeXt backbone. - - Args: - depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. - in_channels (int): Number of input image channels. Normally 3. - num_stages (int): Resnet stages, normally 4. - groups (int): Group of resnext. - base_width (int): Base width of resnext. - strides (Sequence[int]): Strides of the first block of each stage. - dilations (Sequence[int]): Dilation of each stage. - out_indices (Sequence[int]): Output from which stages. - style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two - layer is the 3x3 conv layer, otherwise the stride-two layer is - the first 1x1 conv layer. - frozen_stages (int): Stages to be frozen (all param fixed). -1 means - not freezing any parameters. - norm_cfg (dict): dictionary to construct and config norm layer. - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. - zero_init_residual (bool): whether to use zero init for last norm layer - in resblocks to let them behave as identity. - - Example: - >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNeXt - >>> import torch - >>> self = ResNeXt(depth=50) - >>> self.eval() - >>> inputs = torch.rand(1, 3, 32, 32) - >>> level_outputs = self.forward(inputs) - >>> for level_out in level_outputs: - ... print(tuple(level_out.shape)) - (1, 256, 8, 8) - (1, 512, 4, 4) - (1, 1024, 2, 2) - (1, 2048, 1, 1) - """ - - arch_settings = { - 50: (Bottleneck, (3, 4, 6, 3)), - 101: (Bottleneck, (3, 4, 23, 3)), - 152: (Bottleneck, (3, 8, 36, 3)), - } - - def __init__(self, groups=1, base_width=4, **kwargs): - self.groups = groups - self.base_width = base_width - super(ResNeXt, self).__init__(**kwargs) - - def make_res_layer(self, **kwargs): - """Pack all blocks in a stage into a ``ResLayer``""" - return ResLayer(groups=self.groups, base_width=self.base_width, base_channels=self.base_channels, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py deleted file mode 100644 index e3a5a76e39f3..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py +++ /dev/null @@ -1,451 +0,0 @@ -import torch.nn as nn -import torch.utils.checkpoint as cp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - UPSAMPLE_LAYERS, - ConvModule, - build_activation_layer, - build_norm_layer, - constant_init, - kaiming_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES -from ..utils import UpConvBlock - - -class BasicConvBlock(nn.Module): - """Basic convolutional block for UNet. - - This module consists of several plain convolutional layers. - - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - num_convs (int): Number of convolutional layers. Default: 2. - stride (int): Whether use stride convolution to downsample - the input feature map. If stride=2, it only uses stride convolution - in the first convolutional layer to downsample the input feature - map. Options are 1 or 2. Default: 1. - dilation (int): Whether use dilated convolution to expand the - receptive field. Set dilation rate of each convolutional layer and - the dilation rate of the first convolutional layer is always 1. - Default: 1. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - conv_cfg (dict | None): Config dict for convolution layer. - Default: None. - norm_cfg (dict | None): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict | None): Config dict for activation layer in ConvModule. - Default: dict(type='ReLU'). - dcn (bool): Use deformable convolution in convolutional layer or not. - Default: None. - plugins (dict): plugins for convolutional layers. Default: None. - """ - - def __init__( - self, - in_channels, - out_channels, - num_convs=2, - stride=1, - dilation=1, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - dcn=None, - plugins=None, - ): - super(BasicConvBlock, self).__init__() - assert dcn is None, 'Not implemented yet.' - assert plugins is None, 'Not implemented yet.' - - self.with_cp = with_cp - convs = [] - for i in range(num_convs): - convs.append( - ConvModule( - in_channels=in_channels if i == 0 else out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride if i == 0 else 1, - dilation=1 if i == 0 else dilation, - padding=1 if i == 0 else dilation, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - ) - - self.convs = nn.Sequential(*convs) - - def forward(self, x): - """Forward function.""" - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(self.convs, x) - else: - out = self.convs(x) - return out - - -@UPSAMPLE_LAYERS.register_module() -class DeconvModule(nn.Module): - """Deconvolution upsample module in decoder for UNet (2X upsample). - - This module uses deconvolution to upsample feature map in the decoder - of UNet. - - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - norm_cfg (dict | None): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict | None): Config dict for activation layer in ConvModule. - Default: dict(type='ReLU'). - kernel_size (int): Kernel size of the convolutional layer. Default: 4. - """ - - def __init__( - self, - in_channels, - out_channels, - with_cp=False, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - *, - kernel_size=4, - scale_factor=2, - ): - super(DeconvModule, self).__init__() - - assert (kernel_size - scale_factor >= 0) and (kernel_size - scale_factor) % 2 == 0, ( - f'kernel_size should be greater than or equal to scale_factor ' - f'and (kernel_size - scale_factor) should be even numbers, ' - f'while the kernel size is {kernel_size} and scale_factor is ' - f'{scale_factor}.' - ) - - stride = scale_factor - padding = (kernel_size - scale_factor) // 2 - self.with_cp = with_cp - deconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) - - norm_name, norm = build_norm_layer(norm_cfg, out_channels) - activate = build_activation_layer(act_cfg) - self.deconv_upsamping = nn.Sequential(deconv, norm, activate) - - def forward(self, x): - """Forward function.""" - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(self.deconv_upsamping, x) - else: - out = self.deconv_upsamping(x) - return out - - -@UPSAMPLE_LAYERS.register_module() -class InterpConv(nn.Module): - """Interpolation upsample module in decoder for UNet. - - This module uses interpolation to upsample feature map in the decoder - of UNet. It consists of one interpolation upsample layer and one - convolutional layer. It can be one interpolation upsample layer followed - by one convolutional layer (conv_first=False) or one convolutional layer - followed by one interpolation upsample layer (conv_first=True). - - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - norm_cfg (dict | None): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict | None): Config dict for activation layer in ConvModule. - Default: dict(type='ReLU'). - conv_cfg (dict | None): Config dict for convolution layer. - Default: None. - conv_first (bool): Whether convolutional layer or interpolation - upsample layer first. Default: False. It means interpolation - upsample layer followed by one convolutional layer. - kernel_size (int): Kernel size of the convolutional layer. Default: 1. - stride (int): Stride of the convolutional layer. Default: 1. - padding (int): Padding of the convolutional layer. Default: 1. - upsample_cfg (dict): Interpolation config of the upsample layer. - Default: dict( - scale_factor=2, mode='bilinear', align_corners=False). - """ - - def __init__( - self, - in_channels, - out_channels, - with_cp=False, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - *, - conv_cfg=None, - conv_first=False, - kernel_size=1, - stride=1, - padding=0, - upsample_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False), - ): - super(InterpConv, self).__init__() - - self.with_cp = with_cp - conv = ConvModule( - in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - upsample = nn.Upsample(**upsample_cfg) - if conv_first: - self.interp_upsample = nn.Sequential(conv, upsample) - else: - self.interp_upsample = nn.Sequential(upsample, conv) - - def forward(self, x): - """Forward function.""" - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(self.interp_upsample, x) - else: - out = self.interp_upsample(x) - return out - - -@BACKBONES.register_module() -class UNet(nn.Module): - """UNet backbone. - U-Net: Convolutional Networks for Biomedical Image Segmentation. - https://arxiv.org/pdf/1505.04597.pdf - - Args: - in_channels (int): Number of input image channels. Default" 3. - base_channels (int): Number of base channels of each stage. - The output channels of the first stage. Default: 64. - num_stages (int): Number of stages in encoder, normally 5. Default: 5. - strides (Sequence[int 1 | 2]): Strides of each stage in encoder. - len(strides) is equal to num_stages. Normally the stride of the - first stage in encoder is 1. If strides[i]=2, it uses stride - convolution to downsample in the correspondence encoder stage. - Default: (1, 1, 1, 1, 1). - enc_num_convs (Sequence[int]): Number of convolutional layers in the - convolution block of the correspondence encoder stage. - Default: (2, 2, 2, 2, 2). - dec_num_convs (Sequence[int]): Number of convolutional layers in the - convolution block of the correspondence decoder stage. - Default: (2, 2, 2, 2). - downsamples (Sequence[int]): Whether use MaxPool to downsample the - feature map after the first stage of encoder - (stages: [1, num_stages)). If the correspondence encoder stage use - stride convolution (strides[i]=2), it will never use MaxPool to - downsample, even downsamples[i-1]=True. - Default: (True, True, True, True). - enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. - Default: (1, 1, 1, 1, 1). - dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. - Default: (1, 1, 1, 1). - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - conv_cfg (dict | None): Config dict for convolution layer. - Default: None. - norm_cfg (dict | None): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict | None): Config dict for activation layer in ConvModule. - Default: dict(type='ReLU'). - upsample_cfg (dict): The upsample config of the upsample module in - decoder. Default: dict(type='InterpConv'). - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. Default: False. - dcn (bool): Use deformable convolution in convolutional layer or not. - Default: None. - plugins (dict): plugins for convolutional layers. Default: None. - - Notice: - The input image size should be divisible by the whole downsample rate - of the encoder. More detail of the whole downsample rate can be found - in UNet._check_input_divisible. - - """ - - def __init__( - self, - in_channels=3, - base_channels=64, - num_stages=5, - strides=(1, 1, 1, 1, 1), - enc_num_convs=(2, 2, 2, 2, 2), - dec_num_convs=(2, 2, 2, 2), - downsamples=(True, True, True, True), - enc_dilations=(1, 1, 1, 1, 1), - dec_dilations=(1, 1, 1, 1), - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - norm_eval=False, - dcn=None, - plugins=None, - ): - super(UNet, self).__init__() - assert dcn is None, 'Not implemented yet.' - assert plugins is None, 'Not implemented yet.' - assert len(strides) == num_stages, ( - 'The length of strides should be equal to num_stages, ' - f'while the strides is {strides}, the length of ' - f'strides is {len(strides)}, and the num_stages is ' - f'{num_stages}.' - ) - assert len(enc_num_convs) == num_stages, ( - 'The length of enc_num_convs should be equal to num_stages, ' - f'while the enc_num_convs is {enc_num_convs}, the length of ' - f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is ' - f'{num_stages}.' - ) - assert len(dec_num_convs) == (num_stages - 1), ( - 'The length of dec_num_convs should be equal to (num_stages-1), ' - f'while the dec_num_convs is {dec_num_convs}, the length of ' - f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is ' - f'{num_stages}.' - ) - assert len(downsamples) == (num_stages - 1), ( - 'The length of downsamples should be equal to (num_stages-1), ' - f'while the downsamples is {downsamples}, the length of ' - f'downsamples is {len(downsamples)}, and the num_stages is ' - f'{num_stages}.' - ) - assert len(enc_dilations) == num_stages, ( - 'The length of enc_dilations should be equal to num_stages, ' - f'while the enc_dilations is {enc_dilations}, the length of ' - f'enc_dilations is {len(enc_dilations)}, and the num_stages is ' - f'{num_stages}.' - ) - assert len(dec_dilations) == (num_stages - 1), ( - 'The length of dec_dilations should be equal to (num_stages-1), ' - f'while the dec_dilations is {dec_dilations}, the length of ' - f'dec_dilations is {len(dec_dilations)}, and the num_stages is ' - f'{num_stages}.' - ) - self.num_stages = num_stages - self.strides = strides - self.downsamples = downsamples - self.norm_eval = norm_eval - self.base_channels = base_channels - - self.encoder = nn.ModuleList() - self.decoder = nn.ModuleList() - - for i in range(num_stages): - enc_conv_block = [] - if i != 0: - if strides[i] == 1 and downsamples[i - 1]: - enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) - upsample = strides[i] != 1 or downsamples[i - 1] - self.decoder.append( - UpConvBlock( - conv_block=BasicConvBlock, - in_channels=base_channels * 2 ** i, - skip_channels=base_channels * 2 ** (i - 1), - out_channels=base_channels * 2 ** (i - 1), - num_convs=dec_num_convs[i - 1], - stride=1, - dilation=dec_dilations[i - 1], - with_cp=with_cp, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - upsample_cfg=upsample_cfg if upsample else None, - dcn=None, - plugins=None, - ) - ) - - enc_conv_block.append( - BasicConvBlock( - in_channels=in_channels, - out_channels=base_channels * 2 ** i, - num_convs=enc_num_convs[i], - stride=strides[i], - dilation=enc_dilations[i], - with_cp=with_cp, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - dcn=None, - plugins=None, - ) - ) - self.encoder.append((nn.Sequential(*enc_conv_block))) - in_channels = base_channels * 2 ** i - - def forward(self, x): - self._check_input_divisible(x) - enc_outs = [] - for enc in self.encoder: - x = enc(x) - enc_outs.append(x) - dec_outs = [x] - for i in reversed(range(len(self.decoder))): - x = self.decoder[i](enc_outs[i], x) - dec_outs.append(x) - - return dec_outs - - def train(self, mode=True): - """Convert the model into training mode while keep normalization layer - freezed.""" - super(UNet, self).train(mode) - if mode and self.norm_eval: - for m in self.modules(): - # trick: eval have effect on BatchNorm only - if isinstance(m, _BatchNorm): - m.eval() - - def _check_input_divisible(self, x): - h, w = x.shape[-2:] - whole_downsample_rate = 1 - for i in range(1, self.num_stages): - if self.strides[i] == 2 or self.downsamples[i - 1]: - whole_downsample_rate *= 2 - assert (h % whole_downsample_rate == 0) and (w % whole_downsample_rate == 0), ( - f'The input image size {(h, w)} should be divisible by the whole ' - f'downsample rate {whole_downsample_rate}, when num_stages is ' - f'{self.num_stages}, strides is {self.strides}, and downsamples ' - f'is {self.downsamples}.' - ) - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if isinstance(pretrained, str): - logger = get_root_logger() - load_checkpoint(self, pretrained, strict=False, logger=logger) - elif pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - else: - raise TypeError('pretrained must be a str or None') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py deleted file mode 100644 index 7dcf93aa357b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py +++ /dev/null @@ -1,554 +0,0 @@ -# -------------------------------------------------------- -# UniFormer -# Copyright (c) 2022 SenseTime X-Lab -# Licensed under The MIT License [see LICENSE for details] -# Written by Kunchang Li -# -------------------------------------------------------- - -import math -from collections import OrderedDict -from functools import partial - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.checkpoint as checkpoint -from timm.models.layers import DropPath, to_2tuple, trunc_normal_ - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv_custom import load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES - - -class Mlp(nn.Module): - def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class CMlp(nn.Module): - def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Conv2d(in_features, hidden_features, 1) - self.act = act_layer() - self.fc2 = nn.Conv2d(hidden_features, out_features, 1) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class CBlock(nn.Module): - def __init__( - self, - dim, - num_heads, - mlp_ratio=4.0, - qkv_bias=False, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - drop_path=0.0, - act_layer=nn.GELU, - norm_layer=nn.LayerNorm, - ): - super().__init__() - self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) - self.norm1 = nn.BatchNorm2d(dim) - self.conv1 = nn.Conv2d(dim, dim, 1) - self.conv2 = nn.Conv2d(dim, dim, 1) - self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) - # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - self.norm2 = nn.BatchNorm2d(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) - - def forward(self, x): - x = x + self.pos_embed(x) - x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x))))) - x = x + self.drop_path(self.mlp(self.norm2(x))) - return x - - -class Attention(nn.Module): - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): - super().__init__() - self.num_heads = num_heads - head_dim = dim // num_heads - # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights - self.scale = qk_scale or head_dim ** -0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) - self.attn_drop = nn.Dropout(attn_drop) - self.proj = nn.Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - - def forward(self, x): - B, N, C = x.shape - qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) - q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) - - attn = (q @ k.transpose(-2, -1)) * self.scale - attn = attn.softmax(dim=-1) - attn = self.attn_drop(attn) - - x = (attn @ v).transpose(1, 2).reshape(B, N, C) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class SABlock(nn.Module): - def __init__( - self, - dim, - num_heads, - mlp_ratio=4.0, - qkv_bias=False, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - drop_path=0.0, - act_layer=nn.GELU, - norm_layer=nn.LayerNorm, - ): - super().__init__() - self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) - self.norm1 = norm_layer(dim) - self.attn = Attention( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop - ) - # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - self.norm2 = norm_layer(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) - - def forward(self, x): - x = x + self.pos_embed(x) - B, N, H, W = x.shape - x = x.flatten(2).transpose(1, 2) - x = x + self.drop_path(self.attn(self.norm1(x))) - x = x + self.drop_path(self.mlp(self.norm2(x))) - x = x.transpose(1, 2).reshape(B, N, H, W) - return x - - -def window_partition(x, window_size): - """ - Args: - x: (B, H, W, C) - window_size (int): window size - Returns: - windows: (num_windows*B, window_size, window_size, C) - """ - B, H, W, C = x.shape - x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) - windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) - return windows - - -def window_reverse(windows, window_size, H, W): - """ - Args: - windows: (num_windows*B, window_size, window_size, C) - window_size (int): Window size - H (int): Height of image - W (int): Width of image - Returns: - x: (B, H, W, C) - """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) - x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) - x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) - return x - - -class SABlock_Windows(nn.Module): - def __init__( - self, - dim, - num_heads, - window_size=14, - mlp_ratio=4.0, - qkv_bias=False, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - drop_path=0.0, - act_layer=nn.GELU, - norm_layer=nn.LayerNorm, - ): - super().__init__() - self.window_size = window_size - self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) - self.norm1 = norm_layer(dim) - self.attn = Attention( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop - ) - # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - self.norm2 = norm_layer(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) - - def forward(self, x): - x = x + self.pos_embed(x) - x = x.permute(0, 2, 3, 1) - B, H, W, C = x.shape - shortcut = x - x = self.norm1(x) - - pad_l = pad_t = 0 - pad_r = (self.window_size - W % self.window_size) % self.window_size - pad_b = (self.window_size - H % self.window_size) % self.window_size - x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) - _, Hp, Wp, _ = x.shape - - x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C - x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C - - # W-MSA/SW-MSA - attn_windows = self.attn(x_windows) # nW*B, window_size*window_size, C - - # merge windows - attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) - x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C - - # reverse cyclic shift - if pad_r > 0 or pad_b > 0: - x = x[:, :H, :W, :].contiguous() - - x = shortcut + self.drop_path(x) - x = x + self.drop_path(self.mlp(self.norm2(x))) - x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) - return x - - -class PatchEmbed(nn.Module): - """ Image to Patch Embedding - """ - - def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): - super().__init__() - img_size = to_2tuple(img_size) - patch_size = to_2tuple(patch_size) - num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) - self.img_size = img_size - self.patch_size = patch_size - self.num_patches = num_patches - self.norm = nn.LayerNorm(embed_dim) - self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) - - def forward(self, x): - B, _, H, W = x.shape - x = self.proj(x) - B, _, H, W = x.shape - x = x.flatten(2).transpose(1, 2) - x = self.norm(x) - x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() - return x - - -@BACKBONES.register_module() -class UniFormer(nn.Module): - """ Vision Transformer - A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - - https://arxiv.org/abs/2010.11929 - """ - - def __init__( - self, - layers=[3, 4, 8, 3], - img_size=224, - in_chans=3, - num_classes=80, - embed_dim=[64, 128, 320, 512], - head_dim=64, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - representation_size=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.0, - norm_layer=partial(nn.LayerNorm, eps=1e-6), - pretrained_path=None, - use_checkpoint=False, - checkpoint_num=[0, 0, 0, 0], - windows=False, - hybrid=False, - window_size=14, - ): - """ - Args: - layer (list): number of block in each layer - img_size (int, tuple): input image size - in_chans (int): number of input channels - num_classes (int): number of classes for classification head - embed_dim (int): embedding dimension - head_dim (int): dimension of attention heads - mlp_ratio (int): ratio of mlp hidden dim to embedding dim - qkv_bias (bool): enable bias for qkv if True - qk_scale (float): override default qk scale of head_dim ** -0.5 if set - representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set - drop_rate (float): dropout rate - attn_drop_rate (float): attention dropout rate - drop_path_rate (float): stochastic depth rate - norm_layer (nn.Module): normalization layer - pretrained_path (str): path of pretrained model - use_checkpoint (bool): whether use checkpoint - checkpoint_num (list): index for using checkpoint in every stage - windows (bool): whether use window MHRA - hybrid (bool): whether use hybrid MHRA - window_size (int): size of window (>14) - """ - super().__init__() - self.num_classes = num_classes - self.use_checkpoint = use_checkpoint - self.checkpoint_num = checkpoint_num - self.windows = windows - print(f'Use Checkpoint: {self.use_checkpoint}') - print(f'Checkpoint Number: {self.checkpoint_num}') - self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models - norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) - - self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) - self.patch_embed2 = PatchEmbed( - img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1] - ) - self.patch_embed3 = PatchEmbed( - img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2] - ) - self.patch_embed4 = PatchEmbed( - img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3] - ) - - self.pos_drop = nn.Dropout(p=drop_rate) - dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule - num_heads = [dim // head_dim for dim in embed_dim] - self.blocks1 = nn.ModuleList( - [ - CBlock( - dim=embed_dim[0], - num_heads=num_heads[0], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i], - norm_layer=norm_layer, - ) - for i in range(layers[0]) - ] - ) - self.norm1 = norm_layer(embed_dim[0]) - self.blocks2 = nn.ModuleList( - [ - CBlock( - dim=embed_dim[1], - num_heads=num_heads[1], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0]], - norm_layer=norm_layer, - ) - for i in range(layers[1]) - ] - ) - self.norm2 = norm_layer(embed_dim[1]) - if self.windows: - print('Use local window for all blocks in stage3') - self.blocks3 = nn.ModuleList( - [ - SABlock_Windows( - dim=embed_dim[2], - num_heads=num_heads[2], - window_size=window_size, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0] + layers[1]], - norm_layer=norm_layer, - ) - for i in range(layers[2]) - ] - ) - elif hybrid: - print('Use hybrid window for blocks in stage3') - block3 = [] - for i in range(layers[2]): - if (i + 1) % 4 == 0: - block3.append( - SABlock( - dim=embed_dim[2], - num_heads=num_heads[2], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0] + layers[1]], - norm_layer=norm_layer, - ) - ) - else: - block3.append( - SABlock_Windows( - dim=embed_dim[2], - num_heads=num_heads[2], - window_size=window_size, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0] + layers[1]], - norm_layer=norm_layer, - ) - ) - self.blocks3 = nn.ModuleList(block3) - else: - print('Use global window for all blocks in stage3') - self.blocks3 = nn.ModuleList( - [ - SABlock( - dim=embed_dim[2], - num_heads=num_heads[2], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0] + layers[1]], - norm_layer=norm_layer, - ) - for i in range(layers[2]) - ] - ) - self.norm3 = norm_layer(embed_dim[2]) - self.blocks4 = nn.ModuleList( - [ - SABlock( - dim=embed_dim[3], - num_heads=num_heads[3], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[i + layers[0] + layers[1] + layers[2]], - norm_layer=norm_layer, - ) - for i in range(layers[3]) - ] - ) - self.norm4 = norm_layer(embed_dim[3]) - - # Representation layer - if representation_size: - self.num_features = representation_size - self.pre_logits = nn.Sequential( - OrderedDict([('fc', nn.Linear(embed_dim, representation_size)), ('act', nn.Tanh())]) - ) - else: - self.pre_logits = nn.Identity() - - self.apply(self._init_weights) - self.init_weights(pretrained=pretrained_path) - - def init_weights(self, pretrained): - if isinstance(pretrained, str): - logger = get_root_logger() - load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) - print(f'Load pretrained model from {pretrained}') - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - trunc_normal_(m.weight, std=0.02) - if isinstance(m, nn.Linear) and m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.LayerNorm): - nn.init.constant_(m.bias, 0) - nn.init.constant_(m.weight, 1.0) - - @torch.jit.ignore - def no_weight_decay(self): - return {'pos_embed', 'cls_token'} - - def get_classifier(self): - return self.head - - def reset_classifier(self, num_classes, global_pool=''): - self.num_classes = num_classes - self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() - - def forward_features(self, x): - out = [] - x = self.patch_embed1(x) - x = self.pos_drop(x) - for i, blk in enumerate(self.blocks1): - if self.use_checkpoint and i < self.checkpoint_num[0]: - x = checkpoint.checkpoint(blk, x) - else: - x = blk(x) - x_out = self.norm1(x.permute(0, 2, 3, 1)) - out.append(x_out.permute(0, 3, 1, 2).contiguous()) - x = self.patch_embed2(x) - for i, blk in enumerate(self.blocks2): - if self.use_checkpoint and i < self.checkpoint_num[1]: - x = checkpoint.checkpoint(blk, x) - else: - x = blk(x) - x_out = self.norm2(x.permute(0, 2, 3, 1)) - out.append(x_out.permute(0, 3, 1, 2).contiguous()) - x = self.patch_embed3(x) - for i, blk in enumerate(self.blocks3): - if self.use_checkpoint and i < self.checkpoint_num[2]: - x = checkpoint.checkpoint(blk, x) - else: - x = blk(x) - x_out = self.norm3(x.permute(0, 2, 3, 1)) - out.append(x_out.permute(0, 3, 1, 2).contiguous()) - x = self.patch_embed4(x) - for i, blk in enumerate(self.blocks4): - if self.use_checkpoint and i < self.checkpoint_num[3]: - x = checkpoint.checkpoint(blk, x) - else: - x = blk(x) - x_out = self.norm4(x.permute(0, 2, 3, 1)) - out.append(x_out.permute(0, 3, 1, 2).contiguous()) - return tuple(out) - - def forward(self, x): - x = self.forward_features(x) - return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py deleted file mode 100644 index 883d56fd5bc9..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py +++ /dev/null @@ -1,443 +0,0 @@ -"""Modified from https://github.com/rwightman/pytorch-image- -models/blob/master/timm/models/vision_transformer.py.""" - -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.checkpoint as cp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - Conv2d, - Linear, - build_activation_layer, - build_norm_layer, - constant_init, - kaiming_init, - normal_init, -) -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import _load_checkpoint -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger - -from ..builder import BACKBONES -from ..utils import DropPath, trunc_normal_ - - -class Mlp(nn.Module): - """MLP layer for Encoder block. - - Args: - in_features(int): Input dimension for the first fully - connected layer. - hidden_features(int): Output dimension for the first fully - connected layer. - out_features(int): Output dementsion for the second fully - connected layer. - act_cfg(dict): Config dict for activation layer. - Default: dict(type='GELU'). - drop(float): Drop rate for the dropout layer. Dropout rate has - to be between 0 and 1. Default: 0. - """ - - def __init__(self, in_features, hidden_features=None, out_features=None, act_cfg=dict(type='GELU'), drop=0.0): - super(Mlp, self).__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = Linear(in_features, hidden_features) - self.act = build_activation_layer(act_cfg) - self.fc2 = Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class Attention(nn.Module): - """Attention layer for Encoder block. - - Args: - dim (int): Dimension for the input vector. - num_heads (int): Number of parallel attention heads. - qkv_bias (bool): Enable bias for qkv if True. Default: False. - qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. - attn_drop (float): Drop rate for attention output weights. - Default: 0. - proj_drop (float): Drop rate for output weights. Default: 0. - """ - - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): - super(Attention, self).__init__() - self.num_heads = num_heads - head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) - self.attn_drop = nn.Dropout(attn_drop) - self.proj = Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - - def forward(self, x): - b, n, c = x.shape - qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, c // self.num_heads).permute(2, 0, 3, 1, 4) - q, k, v = qkv[0], qkv[1], qkv[2] - - attn = (q @ k.transpose(-2, -1)) * self.scale - attn = attn.softmax(dim=-1) - attn = self.attn_drop(attn) - - x = (attn @ v).transpose(1, 2).reshape(b, n, c) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class Block(nn.Module): - """Implements encoder block with residual connection. - - Args: - dim (int): The feature dimension. - num_heads (int): Number of parallel attention heads. - mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. - qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. - drop (float): Drop rate for mlp output weights. Default: 0. - attn_drop (float): Drop rate for attention output weights. - Default: 0. - proj_drop (float): Drop rate for attn layer output weights. - Default: 0. - drop_path (float): Drop rate for paths of model. - Default: 0. - act_cfg (dict): Config dict for activation layer. - Default: dict(type='GELU'). - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='LN', requires_grad=True). - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - """ - - def __init__( - self, - dim, - num_heads, - mlp_ratio=4, - qkv_bias=False, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - proj_drop=0.0, - drop_path=0.0, - act_cfg=dict(type='GELU'), - norm_cfg=dict(type='LN', eps=1e-6), - with_cp=False, - ): - super(Block, self).__init__() - self.with_cp = with_cp - _, self.norm1 = build_norm_layer(norm_cfg, dim) - self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, proj_drop) - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - _, self.norm2 = build_norm_layer(norm_cfg, dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_cfg=act_cfg, drop=drop) - - def forward(self, x): - def _inner_forward(x): - out = x + self.drop_path(self.attn(self.norm1(x))) - out = out + self.drop_path(self.mlp(self.norm2(out))) - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - return out - - -class PatchEmbed(nn.Module): - """Image to Patch Embedding. - - Args: - img_size (int | tuple): Input image size. - default: 224. - patch_size (int): Width and height for a patch. - default: 16. - in_channels (int): Input channels for images. Default: 3. - embed_dim (int): The embedding dimension. Default: 768. - """ - - def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768): - super(PatchEmbed, self).__init__() - if isinstance(img_size, int): - self.img_size = (img_size, img_size) - elif isinstance(img_size, tuple): - self.img_size = img_size - else: - raise TypeError('img_size must be type of int or tuple') - h, w = self.img_size - self.patch_size = (patch_size, patch_size) - self.num_patches = (h // patch_size) * (w // patch_size) - self.proj = Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) - - def forward(self, x): - return self.proj(x).flatten(2).transpose(1, 2) - - -@BACKBONES.register_module() -class VisionTransformer(nn.Module): - """Vision transformer backbone. - - A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for - Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 - - Args: - img_size (tuple): input image size. Default: (224, 224). - patch_size (int, tuple): patch size. Default: 16. - in_channels (int): number of input channels. Default: 3. - embed_dim (int): embedding dimension. Default: 768. - depth (int): depth of transformer. Default: 12. - num_heads (int): number of attention heads. Default: 12. - mlp_ratio (int): ratio of mlp hidden dim to embedding dim. - Default: 4. - out_indices (list | tuple | int): Output from which stages. - Default: -1. - qkv_bias (bool): enable bias for qkv if True. Default: True. - qk_scale (float): override default qk scale of head_dim ** -0.5 if set. - drop_rate (float): dropout rate. Default: 0. - attn_drop_rate (float): attention dropout rate. Default: 0. - drop_path_rate (float): Rate of DropPath. Default: 0. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='LN', eps=1e-6, requires_grad=True). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='GELU'). - norm_eval (bool): Whether to set norm layers to eval mode, namely, - freeze running stats (mean and var). Note: Effect on Batch Norm - and its variants only. Default: False. - final_norm (bool): Whether to add a additional layer to normalize - final feature map. Default: False. - interpolate_mode (str): Select the interpolate mode for position - embeding vector resize. Default: bicubic. - with_cls_token (bool): If concatenating class token into image tokens - as transformer input. Default: True. - with_cp (bool): Use checkpoint or not. Using checkpoint - will save some memory while slowing down the training speed. - Default: False. - """ - - def __init__( - self, - img_size=(224, 224), - patch_size=16, - in_channels=3, - embed_dim=768, - depth=12, - num_heads=12, - mlp_ratio=4, - out_indices=11, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.0, - norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), - act_cfg=dict(type='GELU'), - norm_eval=False, - final_norm=False, - with_cls_token=True, - interpolate_mode='bicubic', - with_cp=False, - ): - super(VisionTransformer, self).__init__() - self.img_size = img_size - self.patch_size = patch_size - self.features = self.embed_dim = embed_dim - self.patch_embed = PatchEmbed( - img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim - ) - - self.with_cls_token = with_cls_token - self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) - self.pos_embed = nn.Parameter(torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) - self.pos_drop = nn.Dropout(p=drop_rate) - - if isinstance(out_indices, int): - self.out_indices = [out_indices] - elif isinstance(out_indices, list) or isinstance(out_indices, tuple): - self.out_indices = out_indices - else: - raise TypeError('out_indices must be type of int, list or tuple') - - dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule - self.blocks = nn.ModuleList( - [ - Block( - dim=embed_dim, - num_heads=num_heads, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=dpr[i], - attn_drop=attn_drop_rate, - act_cfg=act_cfg, - norm_cfg=norm_cfg, - with_cp=with_cp, - ) - for i in range(depth) - ] - ) - - self.interpolate_mode = interpolate_mode - self.final_norm = final_norm - if final_norm: - _, self.norm = build_norm_layer(norm_cfg, embed_dim) - - self.norm_eval = norm_eval - self.with_cp = with_cp - - def init_weights(self, pretrained=None): - if isinstance(pretrained, str): - logger = get_root_logger() - checkpoint = _load_checkpoint(pretrained, logger=logger) - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - else: - state_dict = checkpoint - - if 'pos_embed' in state_dict.keys(): - if self.pos_embed.shape != state_dict['pos_embed'].shape: - logger.info( - msg=f'Resize the pos_embed shape from \ -{state_dict["pos_embed"].shape} to {self.pos_embed.shape}' - ) - h, w = self.img_size - pos_size = int(math.sqrt(state_dict['pos_embed'].shape[1] - 1)) - state_dict['pos_embed'] = self.resize_pos_embed( - state_dict['pos_embed'], (h, w), (pos_size, pos_size), self.patch_size, self.interpolate_mode - ) - - self.load_state_dict(state_dict, False) - - elif pretrained is None: - # We only implement the 'jax_impl' initialization implemented at - # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 - trunc_normal_(self.pos_embed, std=0.02) - trunc_normal_(self.cls_token, std=0.02) - for n, m in self.named_modules(): - if isinstance(m, Linear): - trunc_normal_(m.weight, std=0.02) - if m.bias is not None: - if 'mlp' in n: - normal_init(m.bias, std=1e-6) - else: - constant_init(m.bias, 0) - elif isinstance(m, Conv2d): - kaiming_init(m.weight, mode='fan_in') - if m.bias is not None: - constant_init(m.bias, 0) - elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): - constant_init(m.bias, 0) - constant_init(m.weight, 1.0) - else: - raise TypeError('pretrained must be a str or None') - - def _pos_embeding(self, img, patched_img, pos_embed): - """Positiong embeding method. - - Resize the pos_embed, if the input image size doesn't match - the training size. - Args: - img (torch.Tensor): The inference image tensor, the shape - must be [B, C, H, W]. - patched_img (torch.Tensor): The patched image, it should be - shape of [B, L1, C]. - pos_embed (torch.Tensor): The pos_embed weighs, it should be - shape of [B, L2, c]. - Return: - torch.Tensor: The pos encoded image feature. - """ - assert ( - patched_img.ndim == 3 and pos_embed.ndim == 3 - ), 'the shapes of patched_img and pos_embed must be [B, L, C]' - x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] - if x_len != pos_len: - if pos_len == (self.img_size[0] // self.patch_size) * (self.img_size[1] // self.patch_size) + 1: - pos_h = self.img_size[0] // self.patch_size - pos_w = self.img_size[1] // self.patch_size - else: - raise ValueError('Unexpected shape of pos_embed, got {}.'.format(pos_embed.shape)) - pos_embed = self.resize_pos_embed( - pos_embed, img.shape[2:], (pos_h, pos_w), self.patch_size, self.interpolate_mode - ) - return self.pos_drop(patched_img + pos_embed) - - @staticmethod - def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): - """Resize pos_embed weights. - - Resize pos_embed using bicubic interpolate method. - Args: - pos_embed (torch.Tensor): pos_embed weights. - input_shpae (tuple): Tuple for (input_h, intput_w). - pos_shape (tuple): Tuple for (pos_h, pos_w). - patch_size (int): Patch size. - Return: - torch.Tensor: The resized pos_embed of shape [B, L_new, C] - """ - assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' - input_h, input_w = input_shpae - pos_h, pos_w = pos_shape - cls_token_weight = pos_embed[:, 0] - pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w) :] - pos_embed_weight = pos_embed_weight.reshape(1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) - pos_embed_weight = F.interpolate( - pos_embed_weight, size=[input_h // patch_size, input_w // patch_size], align_corners=False, mode=mode - ) - cls_token_weight = cls_token_weight.unsqueeze(1) - pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) - pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) - return pos_embed - - def forward(self, inputs): - B = inputs.shape[0] - - x = self.patch_embed(inputs) - - cls_tokens = self.cls_token.expand(B, -1, -1) - x = torch.cat((cls_tokens, x), dim=1) - x = self._pos_embeding(inputs, x, self.pos_embed) - - if not self.with_cls_token: - # Remove class token for transformer input - x = x[:, 1:] - - outs = [] - for i, blk in enumerate(self.blocks): - x = blk(x) - if i == len(self.blocks) - 1: - if self.final_norm: - x = self.norm(x) - if i in self.out_indices: - if self.with_cls_token: - # Remove class token and reshape token for decoder head - out = x[:, 1:] - else: - out = x - B, _, C = out.shape - out = out.reshape( - B, inputs.shape[2] // self.patch_size, inputs.shape[3] // self.patch_size, C - ).permute(0, 3, 1, 2) - outs.append(out) - - return tuple(outs) - - def train(self, mode=True): - super(VisionTransformer, self).train(mode) - if mode and self.norm_eval: - for m in self.modules(): - if isinstance(m, nn.LayerNorm): - m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py deleted file mode 100644 index 4cc391e48a34..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py +++ /dev/null @@ -1,43 +0,0 @@ -import warnings - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import MODELS as MMCV_MODELS -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry - -MODELS = Registry('models', parent=MMCV_MODELS) - -BACKBONES = MODELS -NECKS = MODELS -HEADS = MODELS -LOSSES = MODELS -SEGMENTORS = MODELS - - -def build_backbone(cfg): - """Build backbone.""" - return BACKBONES.build(cfg) - - -def build_neck(cfg): - """Build neck.""" - return NECKS.build(cfg) - - -def build_head(cfg): - """Build head.""" - return HEADS.build(cfg) - - -def build_loss(cfg): - """Build loss.""" - return LOSSES.build(cfg) - - -def build_segmentor(cfg, train_cfg=None, test_cfg=None): - """Build segmentor.""" - if train_cfg is not None or test_cfg is not None: - warnings.warn('train_cfg and test_cfg is deprecated, ' 'please specify them in model', UserWarning) - assert ( - cfg.get('train_cfg') is None or train_cfg is None - ), 'train_cfg specified in both outer field and model field ' - assert cfg.get('test_cfg') is None or test_cfg is None, 'test_cfg specified in both outer field and model field ' - return SEGMENTORS.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py deleted file mode 100644 index 1c4ab285953c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -from .ann_head import ANNHead -from .apc_head import APCHead -from .aspp_head import ASPPHead -from .cc_head import CCHead -from .da_head import DAHead -from .dm_head import DMHead -from .dnl_head import DNLHead -from .ema_head import EMAHead -from .enc_head import EncHead -from .fcn_head import FCNHead -from .fpn_head import FPNHead -from .gc_head import GCHead -from .lraspp_head import LRASPPHead -from .nl_head import NLHead -from .ocr_head import OCRHead - -# from .point_head import PointHead -from .psa_head import PSAHead -from .psp_head import PSPHead -from .sep_aspp_head import DepthwiseSeparableASPPHead -from .sep_fcn_head import DepthwiseSeparableFCNHead -from .uper_head import UPerHead - -__all__ = [ - 'FCNHead', - 'PSPHead', - 'ASPPHead', - 'PSAHead', - 'NLHead', - 'GCHead', - 'CCHead', - 'UPerHead', - 'DepthwiseSeparableASPPHead', - 'ANNHead', - 'DAHead', - 'OCRHead', - 'EncHead', - 'DepthwiseSeparableFCNHead', - 'FPNHead', - 'EMAHead', - 'DNLHead', - 'APCHead', - 'DMHead', - 'LRASPPHead', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py deleted file mode 100644 index 363c155b214b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py +++ /dev/null @@ -1,259 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from ..builder import HEADS -from ..utils import SelfAttentionBlock as _SelfAttentionBlock -from .decode_head import BaseDecodeHead - - -class PPMConcat(nn.ModuleList): - """Pyramid Pooling Module that only concat the features of each layer. - - Args: - pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module. - """ - - def __init__(self, pool_scales=(1, 3, 6, 8)): - super(PPMConcat, self).__init__([nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) - - def forward(self, feats): - """Forward function.""" - ppm_outs = [] - for ppm in self: - ppm_out = ppm(feats) - ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) - concat_outs = torch.cat(ppm_outs, dim=2) - return concat_outs - - -class SelfAttentionBlock(_SelfAttentionBlock): - """Make a ANN used SelfAttentionBlock. - - Args: - low_in_channels (int): Input channels of lower level feature, - which is the key feature for self-attention. - high_in_channels (int): Input channels of higher level feature, - which is the query feature for self-attention. - channels (int): Output channels of key/query transform. - out_channels (int): Output channels. - share_key_query (bool): Whether share projection weight between key - and query projection. - query_scale (int): The scale of query feature map. - key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module of key feature. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict|None): Config of activation layers. - """ - - def __init__( - self, - low_in_channels, - high_in_channels, - channels, - out_channels, - share_key_query, - query_scale, - key_pool_scales, - conv_cfg, - norm_cfg, - act_cfg, - ): - key_psp = PPMConcat(key_pool_scales) - if query_scale > 1: - query_downsample = nn.MaxPool2d(kernel_size=query_scale) - else: - query_downsample = None - super(SelfAttentionBlock, self).__init__( - key_in_channels=low_in_channels, - query_in_channels=high_in_channels, - channels=channels, - out_channels=out_channels, - share_key_query=share_key_query, - query_downsample=query_downsample, - key_downsample=key_psp, - key_query_num_convs=1, - key_query_norm=True, - value_out_num_convs=1, - value_out_norm=False, - matmul_norm=True, - with_out=True, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - - -class AFNB(nn.Module): - """Asymmetric Fusion Non-local Block(AFNB) - - Args: - low_in_channels (int): Input channels of lower level feature, - which is the key feature for self-attention. - high_in_channels (int): Input channels of higher level feature, - which is the query feature for self-attention. - channels (int): Output channels of key/query transform. - out_channels (int): Output channels. - and query projection. - query_scales (tuple[int]): The scales of query feature map. - Default: (1,) - key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module of key feature. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict|None): Config of activation layers. - """ - - def __init__( - self, - low_in_channels, - high_in_channels, - channels, - out_channels, - query_scales, - key_pool_scales, - conv_cfg, - norm_cfg, - act_cfg, - ): - super(AFNB, self).__init__() - self.stages = nn.ModuleList() - for query_scale in query_scales: - self.stages.append( - SelfAttentionBlock( - low_in_channels=low_in_channels, - high_in_channels=high_in_channels, - channels=channels, - out_channels=out_channels, - share_key_query=False, - query_scale=query_scale, - key_pool_scales=key_pool_scales, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - ) - self.bottleneck = ConvModule( - out_channels + high_in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None - ) - - def forward(self, low_feats, high_feats): - """Forward function.""" - priors = [stage(high_feats, low_feats) for stage in self.stages] - context = torch.stack(priors, dim=0).sum(dim=0) - output = self.bottleneck(torch.cat([context, high_feats], 1)) - return output - - -class APNB(nn.Module): - """Asymmetric Pyramid Non-local Block (APNB) - - Args: - in_channels (int): Input channels of key/query feature, - which is the key feature for self-attention. - channels (int): Output channels of key/query transform. - out_channels (int): Output channels. - query_scales (tuple[int]): The scales of query feature map. - Default: (1,) - key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module of key feature. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict|None): Config of activation layers. - """ - - def __init__( - self, in_channels, channels, out_channels, query_scales, key_pool_scales, conv_cfg, norm_cfg, act_cfg - ): - super(APNB, self).__init__() - self.stages = nn.ModuleList() - for query_scale in query_scales: - self.stages.append( - SelfAttentionBlock( - low_in_channels=in_channels, - high_in_channels=in_channels, - channels=channels, - out_channels=out_channels, - share_key_query=True, - query_scale=query_scale, - key_pool_scales=key_pool_scales, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - ) - self.bottleneck = ConvModule( - 2 * in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg - ) - - def forward(self, feats): - """Forward function.""" - priors = [stage(feats, feats) for stage in self.stages] - context = torch.stack(priors, dim=0).sum(dim=0) - output = self.bottleneck(torch.cat([context, feats], 1)) - return output - - -@HEADS.register_module() -class ANNHead(BaseDecodeHead): - """Asymmetric Non-local Neural Networks for Semantic Segmentation. - - This head is the implementation of `ANNNet - `_. - - Args: - project_channels (int): Projection channels for Nonlocal. - query_scales (tuple[int]): The scales of query feature map. - Default: (1,) - key_pool_scales (tuple[int]): The pooling scales of key feature map. - Default: (1, 3, 6, 8). - """ - - def __init__(self, project_channels, query_scales=(1,), key_pool_scales=(1, 3, 6, 8), **kwargs): - super(ANNHead, self).__init__(input_transform='multiple_select', **kwargs) - assert len(self.in_channels) == 2 - low_in_channels, high_in_channels = self.in_channels - self.project_channels = project_channels - self.fusion = AFNB( - low_in_channels=low_in_channels, - high_in_channels=high_in_channels, - out_channels=high_in_channels, - channels=project_channels, - query_scales=query_scales, - key_pool_scales=key_pool_scales, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.bottleneck = ConvModule( - high_in_channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.context = APNB( - in_channels=self.channels, - out_channels=self.channels, - channels=project_channels, - query_scales=query_scales, - key_pool_scales=key_pool_scales, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - low_feats, high_feats = self._transform_inputs(inputs) - output = self.fusion(low_feats, high_feats) - output = self.dropout(output) - output = self.bottleneck(output) - output = self.context(output) - output = self.cls_seg(output) - - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py deleted file mode 100644 index 04721c1d46f6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py +++ /dev/null @@ -1,141 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -class ACM(nn.Module): - """Adaptive Context Module used in APCNet. - - Args: - pool_scale (int): Pooling scale used in Adaptive Context - Module to extract region features. - fusion (bool): Add one conv to fuse residual feature. - in_channels (int): Input channels. - channels (int): Channels after modules, before conv_seg. - conv_cfg (dict | None): Config of conv layers. - norm_cfg (dict | None): Config of norm layers. - act_cfg (dict): Config of activation layers. - """ - - def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): - super(ACM, self).__init__() - self.pool_scale = pool_scale - self.fusion = fusion - self.in_channels = in_channels - self.channels = channels - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.pooled_redu_conv = ConvModule( - self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - self.input_redu_conv = ConvModule( - self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - self.global_info = ConvModule( - self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - self.gla = nn.Conv2d(self.channels, self.pool_scale ** 2, 1, 1, 0) - - self.residual_conv = ConvModule( - self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - if self.fusion: - self.fusion_conv = ConvModule( - self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - def forward(self, x): - """Forward function.""" - pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) - # [batch_size, channels, h, w] - x = self.input_redu_conv(x) - # [batch_size, channels, pool_scale, pool_scale] - pooled_x = self.pooled_redu_conv(pooled_x) - batch_size = x.size(0) - # [batch_size, pool_scale * pool_scale, channels] - pooled_x = pooled_x.view(batch_size, self.channels, -1).permute(0, 2, 1).contiguous() - # [batch_size, h * w, pool_scale * pool_scale] - affinity_matrix = ( - self.gla(x + resize(self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:])) - .permute(0, 2, 3, 1) - .reshape(batch_size, -1, self.pool_scale ** 2) - ) - affinity_matrix = F.sigmoid(affinity_matrix) - # [batch_size, h * w, channels] - z_out = torch.matmul(affinity_matrix, pooled_x) - # [batch_size, channels, h * w] - z_out = z_out.permute(0, 2, 1).contiguous() - # [batch_size, channels, h, w] - z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) - z_out = self.residual_conv(z_out) - z_out = F.relu(z_out + x) - if self.fusion: - z_out = self.fusion_conv(z_out) - - return z_out - - -@HEADS.register_module() -class APCHead(BaseDecodeHead): - """Adaptive Pyramid Context Network for Semantic Segmentation. - - This head is the implementation of - `APCNet `_. - - Args: - pool_scales (tuple[int]): Pooling scales used in Adaptive Context - Module. Default: (1, 2, 3, 6). - fusion (bool): Add one conv to fuse residual feature. - """ - - def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): - super(APCHead, self).__init__(**kwargs) - assert isinstance(pool_scales, (list, tuple)) - self.pool_scales = pool_scales - self.fusion = fusion - acm_modules = [] - for pool_scale in self.pool_scales: - acm_modules.append( - ACM( - pool_scale, - self.fusion, - self.in_channels, - self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - self.acm_modules = nn.ModuleList(acm_modules) - self.bottleneck = ConvModule( - self.in_channels + len(pool_scales) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - acm_outs = [x] - for acm_module in self.acm_modules: - acm_outs.append(acm_module(x)) - acm_outs = torch.cat(acm_outs, dim=1) - output = self.bottleneck(acm_outs) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py deleted file mode 100644 index 8d121ca61222..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py +++ /dev/null @@ -1,106 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -class ASPPModule(nn.ModuleList): - """Atrous Spatial Pyramid Pooling (ASPP) Module. - - Args: - dilations (tuple[int]): Dilation rate of each layer. - in_channels (int): Input channels. - channels (int): Channels after modules, before conv_seg. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict): Config of activation layers. - """ - - def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, act_cfg): - super(ASPPModule, self).__init__() - self.dilations = dilations - self.in_channels = in_channels - self.channels = channels - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - for dilation in dilations: - self.append( - ConvModule( - self.in_channels, - self.channels, - 1 if dilation == 1 else 3, - dilation=dilation, - padding=0 if dilation == 1 else dilation, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - - def forward(self, x): - """Forward function.""" - aspp_outs = [] - for aspp_module in self: - aspp_outs.append(aspp_module(x)) - - return aspp_outs - - -@HEADS.register_module() -class ASPPHead(BaseDecodeHead): - """Rethinking Atrous Convolution for Semantic Image Segmentation. - - This head is the implementation of `DeepLabV3 - `_. - - Args: - dilations (tuple[int]): Dilation rates for ASPP module. - Default: (1, 6, 12, 18). - """ - - def __init__(self, dilations=(1, 6, 12, 18), **kwargs): - super(ASPPHead, self).__init__(**kwargs) - assert isinstance(dilations, (list, tuple)) - self.dilations = dilations - self.image_pool = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - ConvModule( - self.in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ), - ) - self.aspp_modules = ASPPModule( - dilations, - self.in_channels, - self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.bottleneck = ConvModule( - (len(dilations) + 1) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] - aspp_outs.extend(self.aspp_modules(x)) - aspp_outs = torch.cat(aspp_outs, dim=1) - output = self.bottleneck(aspp_outs) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py deleted file mode 100644 index 40f498d3679c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py +++ /dev/null @@ -1,56 +0,0 @@ -from abc import ABCMeta, abstractmethod - -from .decode_head import BaseDecodeHead - - -class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): - """Base class for cascade decode head used in - :class:`CascadeEncoderDecoder.""" - - def __init__(self, *args, **kwargs): - super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) - - @abstractmethod - def forward(self, inputs, prev_output): - """Placeholder of forward function.""" - pass - - def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): - """Forward function for training. - Args: - inputs (list[Tensor]): List of multi-level img features. - prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. - train_cfg (dict): The training config. - - Returns: - dict[str, Tensor]: a dictionary of loss components - """ - seg_logits = self.forward(inputs, prev_output) - losses = self.losses(seg_logits, gt_semantic_seg) - - return losses - - def forward_test(self, inputs, prev_output, img_metas, test_cfg): - """Forward function for testing. - - Args: - inputs (list[Tensor]): List of multi-level img features. - prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - test_cfg (dict): The testing config. - - Returns: - Tensor: Output segmentation map. - """ - return self.forward(inputs, prev_output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py deleted file mode 100644 index 98e0340501d5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py +++ /dev/null @@ -1,41 +0,0 @@ -import torch - -from ..builder import HEADS -from .fcn_head import FCNHead - -try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import CrissCrossAttention -except ModuleNotFoundError: - CrissCrossAttention = None - - -@HEADS.register_module() -class CCHead(FCNHead): - """CCNet: Criss-Cross Attention for Semantic Segmentation. - - This head is the implementation of `CCNet - `_. - - Args: - recurrence (int): Number of recurrence of Criss Cross Attention - module. Default: 2. - """ - - def __init__(self, recurrence=2, **kwargs): - if CrissCrossAttention is None: - raise RuntimeError('Please install mmcv-full for ' 'CrissCrossAttention ops') - super(CCHead, self).__init__(num_convs=2, **kwargs) - self.recurrence = recurrence - self.cca = CrissCrossAttention(self.channels) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - output = self.convs[0](x) - for _ in range(self.recurrence): - output = self.cca(output) - output = self.convs[1](output) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py deleted file mode 100644 index d63ed0e84dd5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py +++ /dev/null @@ -1,174 +0,0 @@ -import torch -import torch.nn.functional as F -from torch import nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, Scale -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix - -from ..builder import HEADS -from ..utils import SelfAttentionBlock as _SelfAttentionBlock -from .decode_head import BaseDecodeHead - - -class PAM(_SelfAttentionBlock): - """Position Attention Module (PAM) - - Args: - in_channels (int): Input channels of key/query feature. - channels (int): Output channels of key/query transform. - """ - - def __init__(self, in_channels, channels): - super(PAM, self).__init__( - key_in_channels=in_channels, - query_in_channels=in_channels, - channels=channels, - out_channels=in_channels, - share_key_query=False, - query_downsample=None, - key_downsample=None, - key_query_num_convs=1, - key_query_norm=False, - value_out_num_convs=1, - value_out_norm=False, - matmul_norm=False, - with_out=False, - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - ) - - self.gamma = Scale(0) - - def forward(self, x): - """Forward function.""" - out = super(PAM, self).forward(x, x) - - out = self.gamma(out) + x - return out - - -class CAM(nn.Module): - """Channel Attention Module (CAM)""" - - def __init__(self): - super(CAM, self).__init__() - self.gamma = Scale(0) - - def forward(self, x): - """Forward function.""" - batch_size, channels, height, width = x.size() - proj_query = x.view(batch_size, channels, -1) - proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) - energy = torch.bmm(proj_query, proj_key) - energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy) - energy - attention = F.softmax(energy_new, dim=-1) - proj_value = x.view(batch_size, channels, -1) - - out = torch.bmm(attention, proj_value) - out = out.view(batch_size, channels, height, width) - - out = self.gamma(out) + x - return out - - -@HEADS.register_module() -class DAHead(BaseDecodeHead): - """Dual Attention Network for Scene Segmentation. - - This head is the implementation of `DANet - `_. - - Args: - pam_channels (int): The channels of Position Attention Module(PAM). - """ - - def __init__(self, pam_channels, **kwargs): - super(DAHead, self).__init__(**kwargs) - self.pam_channels = pam_channels - self.pam_in_conv = ConvModule( - self.in_channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.pam = PAM(self.channels, pam_channels) - self.pam_out_conv = ConvModule( - self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.pam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) - - self.cam_in_conv = ConvModule( - self.in_channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.cam = CAM() - self.cam_out_conv = ConvModule( - self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.cam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) - - def pam_cls_seg(self, feat): - """PAM feature classification.""" - if self.dropout is not None: - feat = self.dropout(feat) - output = self.pam_conv_seg(feat) - return output - - def cam_cls_seg(self, feat): - """CAM feature classification.""" - if self.dropout is not None: - feat = self.dropout(feat) - output = self.cam_conv_seg(feat) - return output - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - pam_feat = self.pam_in_conv(x) - pam_feat = self.pam(pam_feat) - pam_feat = self.pam_out_conv(pam_feat) - pam_out = self.pam_cls_seg(pam_feat) - - cam_feat = self.cam_in_conv(x) - cam_feat = self.cam(cam_feat) - cam_feat = self.cam_out_conv(cam_feat) - cam_out = self.cam_cls_seg(cam_feat) - - feat_sum = pam_feat + cam_feat - pam_cam_out = self.cls_seg(feat_sum) - - return pam_cam_out, pam_out, cam_out - - def forward_test(self, inputs, img_metas, test_cfg): - """Forward function for testing, only ``pam_cam`` is used.""" - return self.forward(inputs)[0] - - def losses(self, seg_logit, seg_label): - """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" - pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit - loss = dict() - loss.update(add_prefix(super(DAHead, self).losses(pam_cam_seg_logit, seg_label), 'pam_cam')) - loss.update(add_prefix(super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) - loss.update(add_prefix(super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) - return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py deleted file mode 100644 index e1aa23944d86..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py +++ /dev/null @@ -1,226 +0,0 @@ -from abc import ABCMeta, abstractmethod - -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import normal_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16, force_fp32 - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import build_pixel_sampler -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import build_loss -from ..losses import accuracy - - -class BaseDecodeHead(nn.Module, metaclass=ABCMeta): - """Base class for BaseDecodeHead. - - Args: - in_channels (int|Sequence[int]): Input channels. - channels (int): Channels after modules, before conv_seg. - num_classes (int): Number of classes. - dropout_ratio (float): Ratio of dropout layer. Default: 0.1. - conv_cfg (dict|None): Config of conv layers. Default: None. - norm_cfg (dict|None): Config of norm layers. Default: None. - act_cfg (dict): Config of activation layers. - Default: dict(type='ReLU') - in_index (int|Sequence[int]): Input feature index. Default: -1 - input_transform (str|None): Transformation type of input features. - Options: 'resize_concat', 'multiple_select', None. - 'resize_concat': Multiple feature maps will be resize to the - same size as first one and than concat together. - Usually used in FCN head of HRNet. - 'multiple_select': Multiple feature maps will be bundle into - a list and passed into decode head. - None: Only one select feature map is allowed. - Default: None. - loss_decode (dict): Config of decode loss. - Default: dict(type='CrossEntropyLoss'). - ignore_index (int | None): The label index to be ignored. When using - masked BCE loss, ignore_index should be set to None. Default: 255 - sampler (dict|None): The config of segmentation map sampler. - Default: None. - align_corners (bool): align_corners argument of F.interpolate. - Default: False. - """ - - def __init__( - self, - in_channels, - channels, - *, - num_classes, - dropout_ratio=0.1, - conv_cfg=None, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - in_index=-1, - input_transform=None, - loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - ignore_index=255, - sampler=None, - align_corners=False, - ): - super(BaseDecodeHead, self).__init__() - self._init_inputs(in_channels, in_index, input_transform) - self.channels = channels - self.num_classes = num_classes - self.dropout_ratio = dropout_ratio - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.in_index = in_index - self.loss_decode = build_loss(loss_decode) - self.ignore_index = ignore_index - self.align_corners = align_corners - if sampler is not None: - self.sampler = build_pixel_sampler(sampler, context=self) - else: - self.sampler = None - - self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) - if dropout_ratio > 0: - self.dropout = nn.Dropout2d(dropout_ratio) - else: - self.dropout = None - self.fp16_enabled = False - - def extra_repr(self): - """Extra repr.""" - s = ( - f'input_transform={self.input_transform}, ' - f'ignore_index={self.ignore_index}, ' - f'align_corners={self.align_corners}' - ) - return s - - def _init_inputs(self, in_channels, in_index, input_transform): - """Check and initialize input transforms. - - The in_channels, in_index and input_transform must match. - Specifically, when input_transform is None, only single feature map - will be selected. So in_channels and in_index must be of type int. - When input_transform - - Args: - in_channels (int|Sequence[int]): Input channels. - in_index (int|Sequence[int]): Input feature index. - input_transform (str|None): Transformation type of input features. - Options: 'resize_concat', 'multiple_select', None. - 'resize_concat': Multiple feature maps will be resize to the - same size as first one and than concat together. - Usually used in FCN head of HRNet. - 'multiple_select': Multiple feature maps will be bundle into - a list and passed into decode head. - None: Only one select feature map is allowed. - """ - - if input_transform is not None: - assert input_transform in ['resize_concat', 'multiple_select'] - self.input_transform = input_transform - self.in_index = in_index - if input_transform is not None: - assert isinstance(in_channels, (list, tuple)) - assert isinstance(in_index, (list, tuple)) - assert len(in_channels) == len(in_index) - if input_transform == 'resize_concat': - self.in_channels = sum(in_channels) - else: - self.in_channels = in_channels - else: - assert isinstance(in_channels, int) - assert isinstance(in_index, int) - self.in_channels = in_channels - - def init_weights(self): - """Initialize weights of classification layer.""" - normal_init(self.conv_seg, mean=0, std=0.01) - - def _transform_inputs(self, inputs): - """Transform inputs for decoder. - - Args: - inputs (list[Tensor]): List of multi-level img features. - - Returns: - Tensor: The transformed inputs - """ - - if self.input_transform == 'resize_concat': - inputs = [inputs[i] for i in self.in_index] - upsampled_inputs = [ - resize(input=x, size=inputs[0].shape[2:], mode='bilinear', align_corners=self.align_corners) - for x in inputs - ] - inputs = torch.cat(upsampled_inputs, dim=1) - elif self.input_transform == 'multiple_select': - inputs = [inputs[i] for i in self.in_index] - else: - inputs = inputs[self.in_index] - - return inputs - - @auto_fp16() - @abstractmethod - def forward(self, inputs): - """Placeholder of forward function.""" - pass - - def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): - """Forward function for training. - Args: - inputs (list[Tensor]): List of multi-level img features. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. - train_cfg (dict): The training config. - - Returns: - dict[str, Tensor]: a dictionary of loss components - """ - seg_logits = self.forward(inputs) - losses = self.losses(seg_logits, gt_semantic_seg) - return losses - - def forward_test(self, inputs, img_metas, test_cfg): - """Forward function for testing. - - Args: - inputs (list[Tensor]): List of multi-level img features. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - test_cfg (dict): The testing config. - - Returns: - Tensor: Output segmentation map. - """ - return self.forward(inputs) - - def cls_seg(self, feat): - """Classify each pixel.""" - if self.dropout is not None: - feat = self.dropout(feat) - output = self.conv_seg(feat) - return output - - @force_fp32(apply_to=('seg_logit',)) - def losses(self, seg_logit, seg_label): - """Compute segmentation loss.""" - loss = dict() - seg_logit = resize( - input=seg_logit, size=seg_label.shape[2:], mode='bilinear', align_corners=self.align_corners - ) - if self.sampler is not None: - seg_weight = self.sampler.sample(seg_logit, seg_label) - else: - seg_weight = None - seg_label = seg_label.squeeze(1) - loss['loss_seg'] = self.loss_decode(seg_logit, seg_label, weight=seg_weight, ignore_index=self.ignore_index) - loss['acc_seg'] = accuracy(seg_logit, seg_label) - return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py deleted file mode 100644 index a5a58165b326..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py +++ /dev/null @@ -1,137 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( - ConvModule, - build_activation_layer, - build_norm_layer, -) - -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -class DCM(nn.Module): - """Dynamic Convolutional Module used in DMNet. - - Args: - filter_size (int): The filter size of generated convolution kernel - used in Dynamic Convolutional Module. - fusion (bool): Add one conv to fuse DCM output feature. - in_channels (int): Input channels. - channels (int): Channels after modules, before conv_seg. - conv_cfg (dict | None): Config of conv layers. - norm_cfg (dict | None): Config of norm layers. - act_cfg (dict): Config of activation layers. - """ - - def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): - super(DCM, self).__init__() - self.filter_size = filter_size - self.fusion = fusion - self.in_channels = in_channels - self.channels = channels - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, 0) - - self.input_redu_conv = ConvModule( - self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - if self.norm_cfg is not None: - self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] - else: - self.norm = None - self.activate = build_activation_layer(self.act_cfg) - - if self.fusion: - self.fusion_conv = ConvModule( - self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - def forward(self, x): - """Forward function.""" - generated_filter = self.filter_gen_conv(F.adaptive_avg_pool2d(x, self.filter_size)) - x = self.input_redu_conv(x) - b, c, h, w = x.shape - # [1, b * c, h, w], c = self.channels - x = x.view(1, b * c, h, w) - # [b * c, 1, filter_size, filter_size] - generated_filter = generated_filter.view(b * c, 1, self.filter_size, self.filter_size) - pad = (self.filter_size - 1) // 2 - if (self.filter_size - 1) % 2 == 0: - p2d = (pad, pad, pad, pad) - else: - p2d = (pad + 1, pad, pad + 1, pad) - x = F.pad(input=x, pad=p2d, mode='constant', value=0) - # [1, b * c, h, w] - output = F.conv2d(input=x, weight=generated_filter, groups=b * c) - # [b, c, h, w] - output = output.view(b, c, h, w) - if self.norm is not None: - output = self.norm(output) - output = self.activate(output) - - if self.fusion: - output = self.fusion_conv(output) - - return output - - -@HEADS.register_module() -class DMHead(BaseDecodeHead): - """Dynamic Multi-scale Filters for Semantic Segmentation. - - This head is the implementation of - `DMNet `_. - - Args: - filter_sizes (tuple[int]): The size of generated convolutional filters - used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). - fusion (bool): Add one conv to fuse DCM output feature. - """ - - def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): - super(DMHead, self).__init__(**kwargs) - assert isinstance(filter_sizes, (list, tuple)) - self.filter_sizes = filter_sizes - self.fusion = fusion - dcm_modules = [] - for filter_size in self.filter_sizes: - dcm_modules.append( - DCM( - filter_size, - self.fusion, - self.in_channels, - self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - self.dcm_modules = nn.ModuleList(dcm_modules) - self.bottleneck = ConvModule( - self.in_channels + len(filter_sizes) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - dcm_outs = [x] - for dcm_module in self.dcm_modules: - dcm_outs.append(dcm_module(x)) - dcm_outs = torch.cat(dcm_outs, dim=1) - output = self.bottleneck(dcm_outs) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py deleted file mode 100644 index 2ecd75787808..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py +++ /dev/null @@ -1,126 +0,0 @@ -import torch -from torch import nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d - -from ..builder import HEADS -from .fcn_head import FCNHead - - -class DisentangledNonLocal2d(NonLocal2d): - """Disentangled Non-Local Blocks. - - Args: - temperature (float): Temperature to adjust attention. Default: 0.05 - """ - - def __init__(self, *arg, temperature, **kwargs): - super().__init__(*arg, **kwargs) - self.temperature = temperature - self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) - - def embedded_gaussian(self, theta_x, phi_x): - """Embedded gaussian with temperature.""" - - # NonLocal2d pairwise_weight: [N, HxW, HxW] - pairwise_weight = torch.matmul(theta_x, phi_x) - if self.use_scale: - # theta_x.shape[-1] is `self.inter_channels` - pairwise_weight /= theta_x.shape[-1] ** 0.5 - pairwise_weight /= self.temperature - pairwise_weight = pairwise_weight.softmax(dim=-1) - return pairwise_weight - - def forward(self, x): - # x: [N, C, H, W] - n = x.size(0) - - # g_x: [N, HxW, C] - g_x = self.g(x).view(n, self.inter_channels, -1) - g_x = g_x.permute(0, 2, 1) - - # theta_x: [N, HxW, C], phi_x: [N, C, HxW] - if self.mode == 'gaussian': - theta_x = x.view(n, self.in_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - if self.sub_sample: - phi_x = self.phi(x).view(n, self.in_channels, -1) - else: - phi_x = x.view(n, self.in_channels, -1) - elif self.mode == 'concatenation': - theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) - phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) - else: - theta_x = self.theta(x).view(n, self.inter_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - phi_x = self.phi(x).view(n, self.inter_channels, -1) - - # subtract mean - theta_x -= theta_x.mean(dim=-2, keepdim=True) - phi_x -= phi_x.mean(dim=-1, keepdim=True) - - pairwise_func = getattr(self, self.mode) - # pairwise_weight: [N, HxW, HxW] - pairwise_weight = pairwise_func(theta_x, phi_x) - - # y: [N, HxW, C] - y = torch.matmul(pairwise_weight, g_x) - # y: [N, C, H, W] - y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) - - # unary_mask: [N, 1, HxW] - unary_mask = self.conv_mask(x) - unary_mask = unary_mask.view(n, 1, -1) - unary_mask = unary_mask.softmax(dim=-1) - # unary_x: [N, 1, C] - unary_x = torch.matmul(unary_mask, g_x) - # unary_x: [N, C, 1, 1] - unary_x = unary_x.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, 1, 1) - - output = x + self.conv_out(y + unary_x) - - return output - - -@HEADS.register_module() -class DNLHead(FCNHead): - """Disentangled Non-Local Neural Networks. - - This head is the implementation of `DNLNet - `_. - - Args: - reduction (int): Reduction factor of projection transform. Default: 2. - use_scale (bool): Whether to scale pairwise_weight by - sqrt(1/inter_channels). Default: False. - mode (str): The nonlocal mode. Options are 'embedded_gaussian', - 'dot_product'. Default: 'embedded_gaussian.'. - temperature (float): Temperature to adjust attention. Default: 0.05 - """ - - def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', temperature=0.05, **kwargs): - super(DNLHead, self).__init__(num_convs=2, **kwargs) - self.reduction = reduction - self.use_scale = use_scale - self.mode = mode - self.temperature = temperature - self.dnl_block = DisentangledNonLocal2d( - in_channels=self.channels, - reduction=self.reduction, - use_scale=self.use_scale, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - mode=self.mode, - temperature=self.temperature, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - output = self.convs[0](x) - output = self.dnl_block(output) - output = self.convs[1](output) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py deleted file mode 100644 index f66406d38ff8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py +++ /dev/null @@ -1,155 +0,0 @@ -import math - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -def reduce_mean(tensor): - """Reduce mean when distributed training.""" - if not (dist.is_available() and dist.is_initialized()): - return tensor - tensor = tensor.clone() - dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) - return tensor - - -class EMAModule(nn.Module): - """Expectation Maximization Attention Module used in EMANet. - - Args: - channels (int): Channels of the whole module. - num_bases (int): Number of bases. - num_stages (int): Number of the EM iterations. - """ - - def __init__(self, channels, num_bases, num_stages, momentum): - super(EMAModule, self).__init__() - assert num_stages >= 1, 'num_stages must be at least 1!' - self.num_bases = num_bases - self.num_stages = num_stages - self.momentum = momentum - - bases = torch.zeros(1, channels, self.num_bases) - bases.normal_(0, math.sqrt(2.0 / self.num_bases)) - # [1, channels, num_bases] - bases = F.normalize(bases, dim=1, p=2) - self.register_buffer('bases', bases) - - def forward(self, feats): - """Forward function.""" - batch_size, channels, height, width = feats.size() - # [batch_size, channels, height*width] - feats = feats.view(batch_size, channels, height * width) - # [batch_size, channels, num_bases] - bases = self.bases.repeat(batch_size, 1, 1) - - with torch.no_grad(): - for i in range(self.num_stages): - # [batch_size, height*width, num_bases] - attention = torch.einsum('bcn,bck->bnk', feats, bases) - attention = F.softmax(attention, dim=2) - # l1 norm - attention_normed = F.normalize(attention, dim=1, p=1) - # [batch_size, channels, num_bases] - bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) - # l2 norm - bases = F.normalize(bases, dim=1, p=2) - - feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) - feats_recon = feats_recon.view(batch_size, channels, height, width) - - if self.training: - bases = bases.mean(dim=0, keepdim=True) - bases = reduce_mean(bases) - # l2 norm - bases = F.normalize(bases, dim=1, p=2) - self.bases = (1 - self.momentum) * self.bases + self.momentum * bases - - return feats_recon - - -@HEADS.register_module() -class EMAHead(BaseDecodeHead): - """Expectation Maximization Attention Networks for Semantic Segmentation. - - This head is the implementation of `EMANet - `_. - - Args: - ema_channels (int): EMA module channels - num_bases (int): Number of bases. - num_stages (int): Number of the EM iterations. - concat_input (bool): Whether concat the input and output of convs - before classification layer. Default: True - momentum (float): Momentum to update the base. Default: 0.1. - """ - - def __init__(self, ema_channels, num_bases, num_stages, concat_input=True, momentum=0.1, **kwargs): - super(EMAHead, self).__init__(**kwargs) - self.ema_channels = ema_channels - self.num_bases = num_bases - self.num_stages = num_stages - self.concat_input = concat_input - self.momentum = momentum - self.ema_module = EMAModule(self.ema_channels, self.num_bases, self.num_stages, self.momentum) - - self.ema_in_conv = ConvModule( - self.in_channels, - self.ema_channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - # project (0, inf) -> (-inf, inf) - self.ema_mid_conv = ConvModule( - self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=None, act_cfg=None - ) - for param in self.ema_mid_conv.parameters(): - param.requires_grad = False - - self.ema_out_conv = ConvModule( - self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None - ) - self.bottleneck = ConvModule( - self.ema_channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - if self.concat_input: - self.conv_cat = ConvModule( - self.in_channels + self.channels, - self.channels, - kernel_size=3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - feats = self.ema_in_conv(x) - identity = feats - feats = self.ema_mid_conv(feats) - recon = self.ema_module(feats) - recon = F.relu(recon, inplace=True) - recon = self.ema_out_conv(recon) - output = F.relu(identity + recon, inplace=True) - output = self.bottleneck(output) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py deleted file mode 100644 index 8e94db9cebb4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py +++ /dev/null @@ -1,174 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_norm_layer - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Encoding, resize -from ..builder import HEADS, build_loss -from .decode_head import BaseDecodeHead - - -class EncModule(nn.Module): - """Encoding Module used in EncNet. - - Args: - in_channels (int): Input channels. - num_codes (int): Number of code words. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict): Config of activation layers. - """ - - def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): - super(EncModule, self).__init__() - self.encoding_project = ConvModule( - in_channels, in_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg - ) - # TODO: resolve this hack - # change to 1d - if norm_cfg is not None: - encoding_norm_cfg = norm_cfg.copy() - if encoding_norm_cfg['type'] in ['BN', 'IN']: - encoding_norm_cfg['type'] += '1d' - else: - encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace('2d', '1d') - else: - # fallback to BN1d - encoding_norm_cfg = dict(type='BN1d') - self.encoding = nn.Sequential( - Encoding(channels=in_channels, num_codes=num_codes), - build_norm_layer(encoding_norm_cfg, num_codes)[1], - nn.ReLU(inplace=True), - ) - self.fc = nn.Sequential(nn.Linear(in_channels, in_channels), nn.Sigmoid()) - - def forward(self, x): - """Forward function.""" - encoding_projection = self.encoding_project(x) - encoding_feat = self.encoding(encoding_projection).mean(dim=1) - batch_size, channels, _, _ = x.size() - gamma = self.fc(encoding_feat) - y = gamma.view(batch_size, channels, 1, 1) - output = F.relu_(x + x * y) - return encoding_feat, output - - -@HEADS.register_module() -class EncHead(BaseDecodeHead): - """Context Encoding for Semantic Segmentation. - - This head is the implementation of `EncNet - `_. - - Args: - num_codes (int): Number of code words. Default: 32. - use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to - regularize the training. Default: True. - add_lateral (bool): Whether use lateral connection to fuse features. - Default: False. - loss_se_decode (dict): Config of decode loss. - Default: dict(type='CrossEntropyLoss', use_sigmoid=True). - """ - - def __init__( - self, - num_codes=32, - use_se_loss=True, - add_lateral=False, - loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), - **kwargs - ): - super(EncHead, self).__init__(input_transform='multiple_select', **kwargs) - self.use_se_loss = use_se_loss - self.add_lateral = add_lateral - self.num_codes = num_codes - self.bottleneck = ConvModule( - self.in_channels[-1], - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - if add_lateral: - self.lateral_convs = nn.ModuleList() - for in_channels in self.in_channels[:-1]: # skip the last one - self.lateral_convs.append( - ConvModule( - in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - self.fusion = ConvModule( - len(self.in_channels) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.enc_module = EncModule( - self.channels, num_codes=num_codes, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - if self.use_se_loss: - self.loss_se_decode = build_loss(loss_se_decode) - self.se_layer = nn.Linear(self.channels, self.num_classes) - - def forward(self, inputs): - """Forward function.""" - inputs = self._transform_inputs(inputs) - feat = self.bottleneck(inputs[-1]) - if self.add_lateral: - laterals = [ - resize(lateral_conv(inputs[i]), size=feat.shape[2:], mode='bilinear', align_corners=self.align_corners) - for i, lateral_conv in enumerate(self.lateral_convs) - ] - feat = self.fusion(torch.cat([feat, *laterals], 1)) - encode_feat, output = self.enc_module(feat) - output = self.cls_seg(output) - if self.use_se_loss: - se_output = self.se_layer(encode_feat) - return output, se_output - else: - return output - - def forward_test(self, inputs, img_metas, test_cfg): - """Forward function for testing, ignore se_loss.""" - if self.use_se_loss: - return self.forward(inputs)[0] - else: - return self.forward(inputs) - - @staticmethod - def _convert_to_onehot_labels(seg_label, num_classes): - """Convert segmentation label to onehot. - - Args: - seg_label (Tensor): Segmentation label of shape (N, H, W). - num_classes (int): Number of classes. - - Returns: - Tensor: Onehot labels of shape (N, num_classes). - """ - - batch_size = seg_label.size(0) - onehot_labels = seg_label.new_zeros((batch_size, num_classes)) - for i in range(batch_size): - hist = seg_label[i].float().histc(bins=num_classes, min=0, max=num_classes - 1) - onehot_labels[i] = hist > 0 - return onehot_labels - - def losses(self, seg_logit, seg_label): - """Compute segmentation and semantic encoding loss.""" - seg_logit, se_seg_logit = seg_logit - loss = dict() - loss.update(super(EncHead, self).losses(seg_logit, seg_label)) - se_loss = self.loss_se_decode(se_seg_logit, self._convert_to_onehot_labels(seg_label, self.num_classes)) - loss['loss_se'] = se_loss - return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py deleted file mode 100644 index 7e1a34a2a416..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py +++ /dev/null @@ -1,81 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -@HEADS.register_module() -class FCNHead(BaseDecodeHead): - """Fully Convolution Networks for Semantic Segmentation. - - This head is implemented of `FCNNet `_. - - Args: - num_convs (int): Number of convs in the head. Default: 2. - kernel_size (int): The kernel size for convs in the head. Default: 3. - concat_input (bool): Whether concat the input and output of convs - before classification layer. - dilation (int): The dilation rate for convs in the head. Default: 1. - """ - - def __init__(self, num_convs=2, kernel_size=3, concat_input=True, dilation=1, **kwargs): - assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) - self.num_convs = num_convs - self.concat_input = concat_input - self.kernel_size = kernel_size - super(FCNHead, self).__init__(**kwargs) - if num_convs == 0: - assert self.in_channels == self.channels - - conv_padding = (kernel_size // 2) * dilation - convs = [] - convs.append( - ConvModule( - self.in_channels, - self.channels, - kernel_size=kernel_size, - padding=conv_padding, - dilation=dilation, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - for i in range(num_convs - 1): - convs.append( - ConvModule( - self.channels, - self.channels, - kernel_size=kernel_size, - padding=conv_padding, - dilation=dilation, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - if num_convs == 0: - self.convs = nn.Identity() - else: - self.convs = nn.Sequential(*convs) - if self.concat_input: - self.conv_cat = ConvModule( - self.in_channels + self.channels, - self.channels, - kernel_size=kernel_size, - padding=kernel_size // 2, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - output = self.convs(x) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py deleted file mode 100644 index 28637489e7a7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py +++ /dev/null @@ -1,61 +0,0 @@ -import numpy as np -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -@HEADS.register_module() -class FPNHead(BaseDecodeHead): - """Panoptic Feature Pyramid Networks. - - This head is the implementation of `Semantic FPN - `_. - - Args: - feature_strides (tuple[int]): The strides for input feature maps. - stack_lateral. All strides suppose to be power of 2. The first - one is of largest resolution. - """ - - def __init__(self, feature_strides, **kwargs): - super(FPNHead, self).__init__(input_transform='multiple_select', **kwargs) - assert len(feature_strides) == len(self.in_channels) - assert min(feature_strides) == feature_strides[0] - self.feature_strides = feature_strides - - self.scale_heads = nn.ModuleList() - for i in range(len(feature_strides)): - head_length = max(1, int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) - scale_head = [] - for k in range(head_length): - scale_head.append( - ConvModule( - self.in_channels[i] if k == 0 else self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - ) - if feature_strides[i] != feature_strides[0]: - scale_head.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=self.align_corners)) - self.scale_heads.append(nn.Sequential(*scale_head)) - - def forward(self, inputs): - - x = self._transform_inputs(inputs) - - output = self.scale_heads[0](x[0]) - for i in range(1, len(self.feature_strides)): - # non inplace - output = output + resize( - self.scale_heads[i](x[i]), size=output.shape[2:], mode='bilinear', align_corners=self.align_corners - ) - - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py deleted file mode 100644 index 8898bdffe5c6..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py +++ /dev/null @@ -1,41 +0,0 @@ -import torch -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ContextBlock - -from ..builder import HEADS -from .fcn_head import FCNHead - - -@HEADS.register_module() -class GCHead(FCNHead): - """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. - - This head is the implementation of `GCNet - `_. - - Args: - ratio (float): Multiplier of channels ratio. Default: 1/4. - pooling_type (str): The pooling type of context aggregation. - Options are 'att', 'avg'. Default: 'avg'. - fusion_types (tuple[str]): The fusion type for feature fusion. - Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) - """ - - def __init__(self, ratio=1 / 4.0, pooling_type='att', fusion_types=('channel_add',), **kwargs): - super(GCHead, self).__init__(num_convs=2, **kwargs) - self.ratio = ratio - self.pooling_type = pooling_type - self.fusion_types = fusion_types - self.gc_block = ContextBlock( - in_channels=self.channels, ratio=self.ratio, pooling_type=self.pooling_type, fusion_types=self.fusion_types - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - output = self.convs[0](x) - output = self.gc_block(output) - output = self.convs[1](output) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py deleted file mode 100644 index 75e2fa4b1e19..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py +++ /dev/null @@ -1,77 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import is_tuple_of -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -@HEADS.register_module() -class LRASPPHead(BaseDecodeHead): - """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. - - This head is the improved implementation of `Searching for MobileNetV3 - `_. - - Args: - branch_channels (tuple[int]): The number of output channels in every - each branch. Default: (32, 64). - """ - - def __init__(self, branch_channels=(32, 64), **kwargs): - super(LRASPPHead, self).__init__(**kwargs) - if self.input_transform != 'multiple_select': - raise ValueError( - 'in Lite R-ASPP (LRASPP) head, input_transform ' - f'must be \'multiple_select\'. But received ' - f'\'{self.input_transform}\'' - ) - assert is_tuple_of(branch_channels, int) - assert len(branch_channels) == len(self.in_channels) - 1 - self.branch_channels = branch_channels - - self.convs = nn.Sequential() - self.conv_ups = nn.Sequential() - for i in range(len(branch_channels)): - self.convs.add_module(f'conv{i}', nn.Conv2d(self.in_channels[i], branch_channels[i], 1, bias=False)) - self.conv_ups.add_module( - f'conv_up{i}', - ConvModule( - self.channels + branch_channels[i], - self.channels, - 1, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - bias=False, - ), - ) - - self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) - - self.aspp_conv = ConvModule( - self.in_channels[-1], self.channels, 1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, bias=False - ) - self.image_pool = nn.Sequential( - nn.AvgPool2d(kernel_size=49, stride=(16, 20)), - ConvModule(self.in_channels[2], self.channels, 1, act_cfg=dict(type='Sigmoid'), bias=False), - ) - - def forward(self, inputs): - """Forward function.""" - inputs = self._transform_inputs(inputs) - - x = inputs[-1] - - x = self.aspp_conv(x) * resize( - self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners - ) - x = self.conv_up_input(x) - - for i in range(len(self.branch_channels) - 1, -1, -1): - x = resize(x, size=inputs[i].size()[2:], mode='bilinear', align_corners=self.align_corners) - x = torch.cat([x, self.convs[i](inputs[i])], 1) - x = self.conv_ups[i](x) - - return self.cls_seg(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py deleted file mode 100644 index 0f3def19ccc5..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py +++ /dev/null @@ -1,46 +0,0 @@ -import torch -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d - -from ..builder import HEADS -from .fcn_head import FCNHead - - -@HEADS.register_module() -class NLHead(FCNHead): - """Non-local Neural Networks. - - This head is the implementation of `NLNet - `_. - - Args: - reduction (int): Reduction factor of projection transform. Default: 2. - use_scale (bool): Whether to scale pairwise_weight by - sqrt(1/inter_channels). Default: True. - mode (str): The nonlocal mode. Options are 'embedded_gaussian', - 'dot_product'. Default: 'embedded_gaussian.'. - """ - - def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', **kwargs): - super(NLHead, self).__init__(num_convs=2, **kwargs) - self.reduction = reduction - self.use_scale = use_scale - self.mode = mode - self.nl_block = NonLocal2d( - in_channels=self.channels, - reduction=self.reduction, - use_scale=self.use_scale, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - mode=self.mode, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - output = self.convs[0](x) - output = self.nl_block(output) - output = self.convs[1](output) - if self.concat_input: - output = self.conv_cat(torch.cat([x, output], dim=1)) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py deleted file mode 100644 index c5d9c3bfa89b..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py +++ /dev/null @@ -1,124 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from ..utils import SelfAttentionBlock as _SelfAttentionBlock -from .cascade_decode_head import BaseCascadeDecodeHead - - -class SpatialGatherModule(nn.Module): - """Aggregate the context features according to the initial predicted - probability distribution. - - Employ the soft-weighted method to aggregate the context. - """ - - def __init__(self, scale): - super(SpatialGatherModule, self).__init__() - self.scale = scale - - def forward(self, feats, probs): - """Forward function.""" - batch_size, num_classes, height, width = probs.size() - channels = feats.size(1) - probs = probs.view(batch_size, num_classes, -1) - feats = feats.view(batch_size, channels, -1) - # [batch_size, height*width, num_classes] - feats = feats.permute(0, 2, 1) - # [batch_size, channels, height*width] - probs = F.softmax(self.scale * probs, dim=2) - # [batch_size, channels, num_classes] - ocr_context = torch.matmul(probs, feats) - ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) - return ocr_context - - -class ObjectAttentionBlock(_SelfAttentionBlock): - """Make a OCR used SelfAttentionBlock.""" - - def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, act_cfg): - if scale > 1: - query_downsample = nn.MaxPool2d(kernel_size=scale) - else: - query_downsample = None - super(ObjectAttentionBlock, self).__init__( - key_in_channels=in_channels, - query_in_channels=in_channels, - channels=channels, - out_channels=in_channels, - share_key_query=False, - query_downsample=query_downsample, - key_downsample=None, - key_query_num_convs=2, - key_query_norm=True, - value_out_num_convs=1, - value_out_norm=True, - matmul_norm=True, - with_out=True, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - self.bottleneck = ConvModule( - in_channels * 2, in_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - - def forward(self, query_feats, key_feats): - """Forward function.""" - context = super(ObjectAttentionBlock, self).forward(query_feats, key_feats) - output = self.bottleneck(torch.cat([context, query_feats], dim=1)) - if self.query_downsample is not None: - output = resize(query_feats) - - return output - - -@HEADS.register_module() -class OCRHead(BaseCascadeDecodeHead): - """Object-Contextual Representations for Semantic Segmentation. - - This head is the implementation of `OCRNet - `_. - - Args: - ocr_channels (int): The intermediate channels of OCR block. - scale (int): The scale of probability map in SpatialGatherModule in - Default: 1. - """ - - def __init__(self, ocr_channels, scale=1, **kwargs): - super(OCRHead, self).__init__(**kwargs) - self.ocr_channels = ocr_channels - self.scale = scale - self.object_context_block = ObjectAttentionBlock( - self.channels, - self.ocr_channels, - self.scale, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.spatial_gather_module = SpatialGatherModule(self.scale) - - self.bottleneck = ConvModule( - self.in_channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs, prev_output): - """Forward function.""" - x = self._transform_inputs(inputs) - feats = self.bottleneck(x) - context = self.spatial_gather_module(feats, prev_output) - object_context = self.object_context_block(feats, context) - output = self.cls_seg(object_context) - - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py deleted file mode 100644 index 7e4c0ef8e10f..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py +++ /dev/null @@ -1,312 +0,0 @@ -# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa - -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, normal_init -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import point_sample - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.builder import HEADS -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..losses import accuracy -from .cascade_decode_head import BaseCascadeDecodeHead - - -def calculate_uncertainty(seg_logits): - """Estimate uncertainty based on seg logits. - - For each location of the prediction ``seg_logits`` we estimate - uncertainty as the difference between top first and top second - predicted logits. - - Args: - seg_logits (Tensor): Semantic segmentation logits, - shape (batch_size, num_classes, height, width). - - Returns: - scores (Tensor): T uncertainty scores with the most uncertain - locations having the highest uncertainty score, shape ( - batch_size, 1, height, width) - """ - top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] - return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) - - -@HEADS.register_module() -class PointHead(BaseCascadeDecodeHead): - """A mask point head use in PointRend. - - ``PointHead`` use shared multi-layer perceptron (equivalent to - nn.Conv1d) to predict the logit of input points. The fine-grained feature - and coarse feature will be concatenate together for predication. - - Args: - num_fcs (int): Number of fc layers in the head. Default: 3. - in_channels (int): Number of input channels. Default: 256. - fc_channels (int): Number of fc channels. Default: 256. - num_classes (int): Number of classes for logits. Default: 80. - class_agnostic (bool): Whether use class agnostic classification. - If so, the output channels of logits will be 1. Default: False. - coarse_pred_each_layer (bool): Whether concatenate coarse feature with - the output of each fc layer. Default: True. - conv_cfg (dict|None): Dictionary to construct and config conv layer. - Default: dict(type='Conv1d')) - norm_cfg (dict|None): Dictionary to construct and config norm layer. - Default: None. - loss_point (dict): Dictionary to construct and config loss layer of - point head. Default: dict(type='CrossEntropyLoss', use_mask=True, - loss_weight=1.0). - """ - - def __init__( - self, - num_fcs=3, - coarse_pred_each_layer=True, - conv_cfg=dict(type='Conv1d'), - norm_cfg=None, - act_cfg=dict(type='ReLU', inplace=False), - **kwargs - ): - super(PointHead, self).__init__( - input_transform='multiple_select', conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, **kwargs - ) - - self.num_fcs = num_fcs - self.coarse_pred_each_layer = coarse_pred_each_layer - - fc_in_channels = sum(self.in_channels) + self.num_classes - fc_channels = self.channels - self.fcs = nn.ModuleList() - for k in range(num_fcs): - fc = ConvModule( - fc_in_channels, - fc_channels, - kernel_size=1, - stride=1, - padding=0, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - self.fcs.append(fc) - fc_in_channels = fc_channels - fc_in_channels += self.num_classes if self.coarse_pred_each_layer else 0 - self.fc_seg = nn.Conv1d(fc_in_channels, self.num_classes, kernel_size=1, stride=1, padding=0) - if self.dropout_ratio > 0: - self.dropout = nn.Dropout(self.dropout_ratio) - delattr(self, 'conv_seg') - - def init_weights(self): - """Initialize weights of classification layer.""" - normal_init(self.fc_seg, std=0.001) - - def cls_seg(self, feat): - """Classify each pixel with fc.""" - if self.dropout is not None: - feat = self.dropout(feat) - output = self.fc_seg(feat) - return output - - def forward(self, fine_grained_point_feats, coarse_point_feats): - x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) - for fc in self.fcs: - x = fc(x) - if self.coarse_pred_each_layer: - x = torch.cat((x, coarse_point_feats), dim=1) - return self.cls_seg(x) - - def _get_fine_grained_point_feats(self, x, points): - """Sample from fine grained features. - - Args: - x (list[Tensor]): Feature pyramid from by neck or backbone. - points (Tensor): Point coordinates, shape (batch_size, - num_points, 2). - - Returns: - fine_grained_feats (Tensor): Sampled fine grained feature, - shape (batch_size, sum(channels of x), num_points). - """ - - fine_grained_feats_list = [point_sample(_, points, align_corners=self.align_corners) for _ in x] - if len(fine_grained_feats_list) > 1: - fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) - else: - fine_grained_feats = fine_grained_feats_list[0] - - return fine_grained_feats - - def _get_coarse_point_feats(self, prev_output, points): - """Sample from fine grained features. - - Args: - prev_output (list[Tensor]): Prediction of previous decode head. - points (Tensor): Point coordinates, shape (batch_size, - num_points, 2). - - Returns: - coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, - num_classes, num_points). - """ - - coarse_feats = point_sample(prev_output, points, align_corners=self.align_corners) - - return coarse_feats - - def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): - """Forward function for training. - Args: - inputs (list[Tensor]): List of multi-level img features. - prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. - train_cfg (dict): The training config. - - Returns: - dict[str, Tensor]: a dictionary of loss components - """ - x = self._transform_inputs(inputs) - with torch.no_grad(): - points = self.get_points_train(prev_output, calculate_uncertainty, cfg=train_cfg) - fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) - coarse_point_feats = self._get_coarse_point_feats(prev_output, points) - point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) - point_label = point_sample(gt_semantic_seg.float(), points, mode='nearest', align_corners=self.align_corners) - point_label = point_label.squeeze(1).long() - - losses = self.losses(point_logits, point_label) - - return losses - - def forward_test(self, inputs, prev_output, img_metas, test_cfg): - """Forward function for testing. - - Args: - inputs (list[Tensor]): List of multi-level img features. - prev_output (Tensor): The output of previous decode head. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - test_cfg (dict): The testing config. - - Returns: - Tensor: Output segmentation map. - """ - - x = self._transform_inputs(inputs) - refined_seg_logits = prev_output.clone() - for _ in range(test_cfg.subdivision_steps): - refined_seg_logits = resize( - refined_seg_logits, - scale_factor=test_cfg.scale_factor, - mode='bilinear', - align_corners=self.align_corners, - ) - batch_size, channels, height, width = refined_seg_logits.shape - point_indices, points = self.get_points_test(refined_seg_logits, calculate_uncertainty, cfg=test_cfg) - fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) - coarse_point_feats = self._get_coarse_point_feats(prev_output, points) - point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) - - point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) - refined_seg_logits = refined_seg_logits.reshape(batch_size, channels, height * width) - refined_seg_logits = refined_seg_logits.scatter_(2, point_indices, point_logits) - refined_seg_logits = refined_seg_logits.view(batch_size, channels, height, width) - - return refined_seg_logits - - def losses(self, point_logits, point_label): - """Compute segmentation loss.""" - loss = dict() - loss['loss_point'] = self.loss_decode(point_logits, point_label, ignore_index=self.ignore_index) - loss['acc_point'] = accuracy(point_logits, point_label) - return loss - - def get_points_train(self, seg_logits, uncertainty_func, cfg): - """Sample points for training. - - Sample points in [0, 1] x [0, 1] coordinate space based on their - uncertainty. The uncertainties are calculated for each point using - 'uncertainty_func' function that takes point's logit prediction as - input. - - Args: - seg_logits (Tensor): Semantic segmentation logits, shape ( - batch_size, num_classes, height, width). - uncertainty_func (func): uncertainty calculation function. - cfg (dict): Training config of point head. - - Returns: - point_coords (Tensor): A tensor of shape (batch_size, num_points, - 2) that contains the coordinates of ``num_points`` sampled - points. - """ - num_points = cfg.num_points - oversample_ratio = cfg.oversample_ratio - importance_sample_ratio = cfg.importance_sample_ratio - assert oversample_ratio >= 1 - assert 0 <= importance_sample_ratio <= 1 - batch_size = seg_logits.shape[0] - num_sampled = int(num_points * oversample_ratio) - point_coords = torch.rand(batch_size, num_sampled, 2, device=seg_logits.device) - point_logits = point_sample(seg_logits, point_coords) - # It is crucial to calculate uncertainty based on the sampled - # prediction value for the points. Calculating uncertainties of the - # coarse predictions first and sampling them for points leads to - # incorrect results. To illustrate this: assume uncertainty func( - # logits)=-abs(logits), a sampled point between two coarse - # predictions with -1 and 1 logits has 0 logits, and therefore 0 - # uncertainty value. However, if we calculate uncertainties for the - # coarse predictions first, both will have -1 uncertainty, - # and sampled point will get -1 uncertainty. - point_uncertainties = uncertainty_func(point_logits) - num_uncertain_points = int(importance_sample_ratio * num_points) - num_random_points = num_points - num_uncertain_points - idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] - shift = num_sampled * torch.arange(batch_size, dtype=torch.long, device=seg_logits.device) - idx += shift[:, None] - point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(batch_size, num_uncertain_points, 2) - if num_random_points > 0: - rand_point_coords = torch.rand(batch_size, num_random_points, 2, device=seg_logits.device) - point_coords = torch.cat((point_coords, rand_point_coords), dim=1) - return point_coords - - def get_points_test(self, seg_logits, uncertainty_func, cfg): - """Sample points for testing. - - Find ``num_points`` most uncertain points from ``uncertainty_map``. - - Args: - seg_logits (Tensor): A tensor of shape (batch_size, num_classes, - height, width) for class-specific or class-agnostic prediction. - uncertainty_func (func): uncertainty calculation function. - cfg (dict): Testing config of point head. - - Returns: - point_indices (Tensor): A tensor of shape (batch_size, num_points) - that contains indices from [0, height x width) of the most - uncertain points. - point_coords (Tensor): A tensor of shape (batch_size, num_points, - 2) that contains [0, 1] x [0, 1] normalized coordinates of the - most uncertain points from the ``height x width`` grid . - """ - - num_points = cfg.subdivision_num_points - uncertainty_map = uncertainty_func(seg_logits) - batch_size, _, height, width = uncertainty_map.shape - h_step = 1.0 / height - w_step = 1.0 / width - - uncertainty_map = uncertainty_map.view(batch_size, height * width) - num_points = min(height * width, num_points) - point_indices = uncertainty_map.topk(num_points, dim=1)[1] - point_coords = torch.zeros(batch_size, num_points, 2, dtype=torch.float, device=seg_logits.device) - point_coords[:, :, 0] = w_step / 2.0 + (point_indices % width).float() * w_step - point_coords[:, :, 1] = h_step / 2.0 + (point_indices // width).float() * h_step - return point_indices, point_coords diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py deleted file mode 100644 index 3ef4088a23e4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py +++ /dev/null @@ -1,186 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - -try: - from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import PSAMask -except ModuleNotFoundError: - PSAMask = None - - -@HEADS.register_module() -class PSAHead(BaseDecodeHead): - """Point-wise Spatial Attention Network for Scene Parsing. - - This head is the implementation of `PSANet - `_. - - Args: - mask_size (tuple[int]): The PSA mask size. It usually equals input - size. - psa_type (str): The type of psa module. Options are 'collect', - 'distribute', 'bi-direction'. Default: 'bi-direction' - compact (bool): Whether use compact map for 'collect' mode. - Default: True. - shrink_factor (int): The downsample factors of psa mask. Default: 2. - normalization_factor (float): The normalize factor of attention. - psa_softmax (bool): Whether use softmax for attention. - """ - - def __init__( - self, - mask_size, - psa_type='bi-direction', - compact=False, - shrink_factor=2, - normalization_factor=1.0, - psa_softmax=True, - **kwargs - ): - if PSAMask is None: - raise RuntimeError('Please install mmcv-full for PSAMask ops') - super(PSAHead, self).__init__(**kwargs) - assert psa_type in ['collect', 'distribute', 'bi-direction'] - self.psa_type = psa_type - self.compact = compact - self.shrink_factor = shrink_factor - self.mask_size = mask_size - mask_h, mask_w = mask_size - self.psa_softmax = psa_softmax - if normalization_factor is None: - normalization_factor = mask_h * mask_w - self.normalization_factor = normalization_factor - - self.reduce = ConvModule( - self.in_channels, - self.channels, - kernel_size=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.attention = nn.Sequential( - ConvModule( - self.channels, - self.channels, - kernel_size=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ), - nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), - ) - if psa_type == 'bi-direction': - self.reduce_p = ConvModule( - self.in_channels, - self.channels, - kernel_size=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.attention_p = nn.Sequential( - ConvModule( - self.channels, - self.channels, - kernel_size=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ), - nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), - ) - self.psamask_collect = PSAMask('collect', mask_size) - self.psamask_distribute = PSAMask('distribute', mask_size) - else: - self.psamask = PSAMask(psa_type, mask_size) - self.proj = ConvModule( - self.channels * (2 if psa_type == 'bi-direction' else 1), - self.in_channels, - kernel_size=1, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - self.bottleneck = ConvModule( - self.in_channels * 2, - self.channels, - kernel_size=3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - identity = x - align_corners = self.align_corners - if self.psa_type in ['collect', 'distribute']: - out = self.reduce(x) - n, c, h, w = out.size() - if self.shrink_factor != 1: - if h % self.shrink_factor and w % self.shrink_factor: - h = (h - 1) // self.shrink_factor + 1 - w = (w - 1) // self.shrink_factor + 1 - align_corners = True - else: - h = h // self.shrink_factor - w = w // self.shrink_factor - align_corners = False - out = resize(out, size=(h, w), mode='bilinear', align_corners=align_corners) - y = self.attention(out) - if self.compact: - if self.psa_type == 'collect': - y = y.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) - else: - y = self.psamask(y) - if self.psa_softmax: - y = F.softmax(y, dim=1) - out = torch.bmm(out.view(n, c, h * w), y.view(n, h * w, h * w)).view(n, c, h, w) * ( - 1.0 / self.normalization_factor - ) - else: - x_col = self.reduce(x) - x_dis = self.reduce_p(x) - n, c, h, w = x_col.size() - if self.shrink_factor != 1: - if h % self.shrink_factor and w % self.shrink_factor: - h = (h - 1) // self.shrink_factor + 1 - w = (w - 1) // self.shrink_factor + 1 - align_corners = True - else: - h = h // self.shrink_factor - w = w // self.shrink_factor - align_corners = False - x_col = resize(x_col, size=(h, w), mode='bilinear', align_corners=align_corners) - x_dis = resize(x_dis, size=(h, w), mode='bilinear', align_corners=align_corners) - y_col = self.attention(x_col) - y_dis = self.attention_p(x_dis) - if self.compact: - y_dis = y_dis.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) - else: - y_col = self.psamask_collect(y_col) - y_dis = self.psamask_distribute(y_dis) - if self.psa_softmax: - y_col = F.softmax(y_col, dim=1) - y_dis = F.softmax(y_dis, dim=1) - x_col = torch.bmm(x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view(n, c, h, w) * ( - 1.0 / self.normalization_factor - ) - x_dis = torch.bmm(x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view(n, c, h, w) * ( - 1.0 / self.normalization_factor - ) - out = torch.cat([x_col, x_dis], 1) - out = self.proj(out) - out = resize(out, size=identity.shape[2:], mode='bilinear', align_corners=align_corners) - out = self.bottleneck(torch.cat((identity, out), dim=1)) - out = self.cls_seg(out) - return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py deleted file mode 100644 index ad87e1514885..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py +++ /dev/null @@ -1,101 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - -class PPM(nn.ModuleList): - """Pooling Pyramid Module used in PSPNet. - - Args: - pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module. - in_channels (int): Input channels. - channels (int): Channels after modules, before conv_seg. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict): Config of activation layers. - align_corners (bool): align_corners argument of F.interpolate. - """ - - def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, act_cfg, align_corners): - super(PPM, self).__init__() - self.pool_scales = pool_scales - self.align_corners = align_corners - self.in_channels = in_channels - self.channels = channels - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - for pool_scale in pool_scales: - self.append( - nn.Sequential( - nn.AdaptiveAvgPool2d(pool_scale), - ConvModule( - self.in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ), - ) - ) - - def forward(self, x): - """Forward function.""" - ppm_outs = [] - for ppm in self: - ppm_out = ppm(x) - upsampled_ppm_out = resize(ppm_out, size=x.size()[2:], mode='bilinear', align_corners=self.align_corners) - ppm_outs.append(upsampled_ppm_out) - return ppm_outs - - -@HEADS.register_module() -class PSPHead(BaseDecodeHead): - """Pyramid Scene Parsing Network. - - This head is the implementation of - `PSPNet `_. - - Args: - pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module. Default: (1, 2, 3, 6). - """ - - def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): - super(PSPHead, self).__init__(**kwargs) - assert isinstance(pool_scales, (list, tuple)) - self.pool_scales = pool_scales - self.psp_modules = PPM( - self.pool_scales, - self.in_channels, - self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - align_corners=self.align_corners, - ) - self.bottleneck = ConvModule( - self.in_channels + len(pool_scales) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - psp_outs = [x] - psp_outs.extend(self.psp_modules(x)) - psp_outs = torch.cat(psp_outs, dim=1) - output = self.bottleneck(psp_outs) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py deleted file mode 100644 index 6a6db6e93945..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py +++ /dev/null @@ -1,82 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, DepthwiseSeparableConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .aspp_head import ASPPHead, ASPPModule - - -class DepthwiseSeparableASPPModule(ASPPModule): - """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable - conv.""" - - def __init__(self, **kwargs): - super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) - for i, dilation in enumerate(self.dilations): - if dilation > 1: - self[i] = DepthwiseSeparableConvModule( - self.in_channels, - self.channels, - 3, - dilation=dilation, - padding=dilation, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - -@HEADS.register_module() -class DepthwiseSeparableASPPHead(ASPPHead): - """Encoder-Decoder with Atrous Separable Convolution for Semantic Image - Segmentation. - - This head is the implementation of `DeepLabV3+ - `_. - - Args: - c1_in_channels (int): The input channels of c1 decoder. If is 0, - the no decoder will be used. - c1_channels (int): The intermediate channels of c1 decoder. - """ - - def __init__(self, c1_in_channels, c1_channels, **kwargs): - super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) - assert c1_in_channels >= 0 - self.aspp_modules = DepthwiseSeparableASPPModule( - dilations=self.dilations, - in_channels=self.in_channels, - channels=self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - if c1_in_channels > 0: - self.c1_bottleneck = ConvModule( - c1_in_channels, c1_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ) - else: - self.c1_bottleneck = None - self.sep_bottleneck = nn.Sequential( - DepthwiseSeparableConvModule( - self.channels + c1_channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ), - DepthwiseSeparableConvModule( - self.channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg - ), - ) - - def forward(self, inputs): - """Forward function.""" - x = self._transform_inputs(inputs) - aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] - aspp_outs.extend(self.aspp_modules(x)) - aspp_outs = torch.cat(aspp_outs, dim=1) - output = self.bottleneck(aspp_outs) - if self.c1_bottleneck is not None: - c1_output = self.c1_bottleneck(inputs[0]) - output = resize(input=output, size=c1_output.shape[2:], mode='bilinear', align_corners=self.align_corners) - output = torch.cat([output, c1_output], dim=1) - output = self.sep_bottleneck(output) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py deleted file mode 100644 index 1df80ae7b4ef..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py +++ /dev/null @@ -1,54 +0,0 @@ -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import DepthwiseSeparableConvModule - -from ..builder import HEADS -from .fcn_head import FCNHead - - -@HEADS.register_module() -class DepthwiseSeparableFCNHead(FCNHead): - """Depthwise-Separable Fully Convolutional Network for Semantic - Segmentation. - - This head is implemented according to Fast-SCNN paper. - Args: - in_channels(int): Number of output channels of FFM. - channels(int): Number of middle-stage channels in the decode head. - concat_input(bool): Whether to concatenate original decode input into - the result of several consecutive convolution layers. - Default: True. - num_classes(int): Used to determine the dimension of - final prediction tensor. - in_index(int): Correspond with 'out_indices' in FastSCNN backbone. - norm_cfg (dict | None): Config of norm layers. - align_corners (bool): align_corners argument of F.interpolate. - Default: False. - loss_decode(dict): Config of loss type and some - relevant additional options. - """ - - def __init__(self, **kwargs): - super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) - self.convs[0] = DepthwiseSeparableConvModule( - self.in_channels, - self.channels, - kernel_size=self.kernel_size, - padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg, - ) - for i in range(1, self.num_convs): - self.convs[i] = DepthwiseSeparableConvModule( - self.channels, - self.channels, - kernel_size=self.kernel_size, - padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg, - ) - - if self.concat_input: - self.conv_cat = DepthwiseSeparableConvModule( - self.in_channels + self.channels, - self.channels, - kernel_size=self.kernel_size, - padding=self.kernel_size // 2, - norm_cfg=self.norm_cfg, - ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py deleted file mode 100644 index 35148c150fa7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py +++ /dev/null @@ -1,118 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from ..builder import HEADS -from .decode_head import BaseDecodeHead -from .psp_head import PPM - - -@HEADS.register_module() -class UPerHead(BaseDecodeHead): - """Unified Perceptual Parsing for Scene Understanding. - - This head is the implementation of `UPerNet - `_. - - Args: - pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid - Module applied on the last feature. Default: (1, 2, 3, 6). - """ - - def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): - super(UPerHead, self).__init__(input_transform='multiple_select', **kwargs) - # PSP Module - self.psp_modules = PPM( - pool_scales, - self.in_channels[-1], - self.channels, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - align_corners=self.align_corners, - ) - self.bottleneck = ConvModule( - self.in_channels[-1] + len(pool_scales) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - # FPN Module - self.lateral_convs = nn.ModuleList() - self.fpn_convs = nn.ModuleList() - for in_channels in self.in_channels[:-1]: # skip the top layer - l_conv = ConvModule( - in_channels, - self.channels, - 1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - inplace=False, - ) - fpn_conv = ConvModule( - self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - inplace=False, - ) - self.lateral_convs.append(l_conv) - self.fpn_convs.append(fpn_conv) - - self.fpn_bottleneck = ConvModule( - len(self.in_channels) * self.channels, - self.channels, - 3, - padding=1, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg, - ) - - def psp_forward(self, inputs): - """Forward function of PSP module.""" - x = inputs[-1] - psp_outs = [x] - psp_outs.extend(self.psp_modules(x)) - psp_outs = torch.cat(psp_outs, dim=1) - output = self.bottleneck(psp_outs) - - return output - - def forward(self, inputs): - """Forward function.""" - - inputs = self._transform_inputs(inputs) - - # build laterals - laterals = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] - - laterals.append(self.psp_forward(inputs)) - - # build top-down path - used_backbone_levels = len(laterals) - for i in range(used_backbone_levels - 1, 0, -1): - prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += resize(laterals[i], size=prev_shape, mode='bilinear', align_corners=self.align_corners) - - # build outputs - fpn_outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels - 1)] - # append psp feature - fpn_outs.append(laterals[-1]) - - for i in range(used_backbone_levels - 1, 0, -1): - fpn_outs[i] = resize( - fpn_outs[i], size=fpn_outs[0].shape[2:], mode='bilinear', align_corners=self.align_corners - ) - fpn_outs = torch.cat(fpn_outs, dim=1) - output = self.fpn_bottleneck(fpn_outs) - output = self.cls_seg(output) - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py deleted file mode 100644 index aaf307b3eaa1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .accuracy import Accuracy, accuracy -from .cross_entropy_loss import CrossEntropyLoss, binary_cross_entropy, cross_entropy, mask_cross_entropy -from .dice_loss import DiceLoss -from .lovasz_loss import LovaszLoss -from .utils import reduce_loss, weight_reduce_loss, weighted_loss - -__all__ = [ - 'accuracy', - 'Accuracy', - 'cross_entropy', - 'binary_cross_entropy', - 'mask_cross_entropy', - 'CrossEntropyLoss', - 'reduce_loss', - 'weight_reduce_loss', - 'weighted_loss', - 'LovaszLoss', - 'DiceLoss', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py deleted file mode 100644 index 85b13399ee70..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py +++ /dev/null @@ -1,77 +0,0 @@ -import torch.nn as nn - - -def accuracy(pred, target, topk=1, thresh=None): - """Calculate accuracy according to the prediction and target. - - Args: - pred (torch.Tensor): The model prediction, shape (N, num_class, ...) - target (torch.Tensor): The target of each prediction, shape (N, , ...) - topk (int | tuple[int], optional): If the predictions in ``topk`` - matches the target, the predictions will be regarded as - correct ones. Defaults to 1. - thresh (float, optional): If not None, predictions with scores under - this threshold are considered incorrect. Default to None. - - Returns: - float | tuple[float]: If the input ``topk`` is a single integer, - the function will return a single float as accuracy. If - ``topk`` is a tuple containing multiple integers, the - function will return a tuple containing accuracies of - each ``topk`` number. - """ - assert isinstance(topk, (int, tuple)) - if isinstance(topk, int): - topk = (topk,) - return_single = True - else: - return_single = False - - maxk = max(topk) - if pred.size(0) == 0: - accu = [pred.new_tensor(0.0) for i in range(len(topk))] - return accu[0] if return_single else accu - assert pred.ndim == target.ndim + 1 - assert pred.size(0) == target.size(0) - assert maxk <= pred.size(1), f'maxk {maxk} exceeds pred dimension {pred.size(1)}' - pred_value, pred_label = pred.topk(maxk, dim=1) - # transpose to shape (maxk, N, ...) - pred_label = pred_label.transpose(0, 1) - correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) - if thresh is not None: - # Only prediction values larger than thresh are counted as correct - correct = correct & (pred_value > thresh).t() - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / target.numel())) - return res[0] if return_single else res - - -class Accuracy(nn.Module): - """Accuracy calculation module.""" - - def __init__(self, topk=(1,), thresh=None): - """Module to calculate the accuracy. - - Args: - topk (tuple, optional): The criterion used to calculate the - accuracy. Defaults to (1,). - thresh (float, optional): If not None, predictions with scores - under this threshold are considered incorrect. Default to None. - """ - super().__init__() - self.topk = topk - self.thresh = thresh - - def forward(self, pred, target): - """Forward function to calculate accuracy. - - Args: - pred (torch.Tensor): Prediction of models. - target (torch.Tensor): Target for each prediction. - - Returns: - tuple[float]: The accuracies under different topk criterions. - """ - return accuracy(pred, target, self.topk, self.thresh) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py deleted file mode 100644 index 766812eb5221..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py +++ /dev/null @@ -1,155 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..builder import LOSSES -from .utils import get_class_weight, weight_reduce_loss - - -def cross_entropy(pred, label, weight=None, class_weight=None, reduction='mean', avg_factor=None, ignore_index=-100): - """The wrapper function for :func:`F.cross_entropy`""" - # class_weight is a manual rescaling weight given to each class. - # If given, has to be a Tensor of size C element-wise losses - loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none', ignore_index=ignore_index) - - # apply weights and do the reduction - if weight is not None: - weight = weight.float() - loss = weight_reduce_loss(loss, weight=weight, reduction=reduction, avg_factor=avg_factor) - - return loss - - -def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): - """Expand onehot labels to match the size of prediction.""" - bin_labels = labels.new_zeros(target_shape) - valid_mask = (labels >= 0) & (labels != ignore_index) - inds = torch.nonzero(valid_mask, as_tuple=True) - - if inds[0].numel() > 0: - if labels.dim() == 3: - bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 - else: - bin_labels[inds[0], labels[valid_mask]] = 1 - - valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() - if label_weights is None: - bin_label_weights = valid_mask - else: - bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) - bin_label_weights *= valid_mask - - return bin_labels, bin_label_weights - - -def binary_cross_entropy( - pred, label, weight=None, reduction='mean', avg_factor=None, class_weight=None, ignore_index=255 -): - """Calculate the binary CrossEntropy loss. - - Args: - pred (torch.Tensor): The prediction with shape (N, 1). - label (torch.Tensor): The learning label of the prediction. - weight (torch.Tensor, optional): Sample-wise loss weight. - reduction (str, optional): The method used to reduce the loss. - Options are "none", "mean" and "sum". - avg_factor (int, optional): Average factor that is used to average - the loss. Defaults to None. - class_weight (list[float], optional): The weight for each class. - ignore_index (int | None): The label index to be ignored. Default: 255 - - Returns: - torch.Tensor: The calculated loss - """ - if pred.dim() != label.dim(): - assert (pred.dim() == 2 and label.dim() == 1) or (pred.dim() == 4 and label.dim() == 3), ( - 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' - 'H, W], label shape [N, H, W] are supported' - ) - label, weight = _expand_onehot_labels(label, weight, pred.shape, ignore_index) - - # weighted element-wise losses - if weight is not None: - weight = weight.float() - loss = F.binary_cross_entropy_with_logits(pred, label.float(), pos_weight=class_weight, reduction='none') - # do the reduction for the weighted loss - loss = weight_reduce_loss(loss, weight, reduction=reduction, avg_factor=avg_factor) - - return loss - - -def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None, class_weight=None, ignore_index=None): - """Calculate the CrossEntropy loss for masks. - - Args: - pred (torch.Tensor): The prediction with shape (N, C), C is the number - of classes. - target (torch.Tensor): The learning label of the prediction. - label (torch.Tensor): ``label`` indicates the class label of the mask' - corresponding object. This will be used to select the mask in the - of the class which the object belongs to when the mask prediction - if not class-agnostic. - reduction (str, optional): The method used to reduce the loss. - Options are "none", "mean" and "sum". - avg_factor (int, optional): Average factor that is used to average - the loss. Defaults to None. - class_weight (list[float], optional): The weight for each class. - ignore_index (None): Placeholder, to be consistent with other loss. - Default: None. - - Returns: - torch.Tensor: The calculated loss - """ - assert ignore_index is None, 'BCE loss does not support ignore_index' - # TODO: handle these two reserved arguments - assert reduction == 'mean' and avg_factor is None - num_rois = pred.size()[0] - inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) - pred_slice = pred[inds, label].squeeze(1) - return F.binary_cross_entropy_with_logits(pred_slice, target, weight=class_weight, reduction='mean')[None] - - -@LOSSES.register_module() -class CrossEntropyLoss(nn.Module): - """CrossEntropyLoss. - - Args: - use_sigmoid (bool, optional): Whether the prediction uses sigmoid - of softmax. Defaults to False. - use_mask (bool, optional): Whether to use mask cross entropy loss. - Defaults to False. - reduction (str, optional): . Defaults to 'mean'. - Options are "none", "mean" and "sum". - class_weight (list[float] | str, optional): Weight of each class. If in - str format, read them from a file. Defaults to None. - loss_weight (float, optional): Weight of the loss. Defaults to 1.0. - """ - - def __init__(self, use_sigmoid=False, use_mask=False, reduction='mean', class_weight=None, loss_weight=1.0): - super(CrossEntropyLoss, self).__init__() - assert (use_sigmoid is False) or (use_mask is False) - self.use_sigmoid = use_sigmoid - self.use_mask = use_mask - self.reduction = reduction - self.loss_weight = loss_weight - self.class_weight = get_class_weight(class_weight) - - if self.use_sigmoid: - self.cls_criterion = binary_cross_entropy - elif self.use_mask: - self.cls_criterion = mask_cross_entropy - else: - self.cls_criterion = cross_entropy - - def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): - """Forward function.""" - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = reduction_override if reduction_override else self.reduction - if self.class_weight is not None: - class_weight = cls_score.new_tensor(self.class_weight) - else: - class_weight = None - loss_cls = self.loss_weight * self.cls_criterion( - cls_score, label, weight, class_weight=class_weight, reduction=reduction, avg_factor=avg_factor, **kwargs - ) - return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py deleted file mode 100644 index 9384e60bd048..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ -segmentron/solver/loss.py (Apache-2.0 License)""" -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..builder import LOSSES -from .utils import get_class_weight, weighted_loss - - -@weighted_loss -def dice_loss(pred, target, valid_mask, smooth=1, exponent=2, class_weight=None, ignore_index=255): - assert pred.shape[0] == target.shape[0] - total_loss = 0 - num_classes = pred.shape[1] - for i in range(num_classes): - if i != ignore_index: - dice_loss = binary_dice_loss( - pred[:, i], target[..., i], valid_mask=valid_mask, smooth=smooth, exponent=exponent - ) - if class_weight is not None: - dice_loss *= class_weight[i] - total_loss += dice_loss - return total_loss / num_classes - - -@weighted_loss -def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwards): - assert pred.shape[0] == target.shape[0] - pred = pred.reshape(pred.shape[0], -1) - target = target.reshape(target.shape[0], -1) - valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) - - num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth - den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth - - return 1 - num / den - - -@LOSSES.register_module() -class DiceLoss(nn.Module): - """DiceLoss. - - This loss is proposed in `V-Net: Fully Convolutional Neural Networks for - Volumetric Medical Image Segmentation `_. - - Args: - loss_type (str, optional): Binary or multi-class loss. - Default: 'multi_class'. Options are "binary" and "multi_class". - smooth (float): A float number to smooth loss, and avoid NaN error. - Default: 1 - exponent (float): An float number to calculate denominator - value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. - reduction (str, optional): The method used to reduce the loss. Options - are "none", "mean" and "sum". This parameter only works when - per_image is True. Default: 'mean'. - class_weight (list[float] | str, optional): Weight of each class. If in - str format, read them from a file. Defaults to None. - loss_weight (float, optional): Weight of the loss. Default to 1.0. - ignore_index (int | None): The label index to be ignored. Default: 255. - """ - - def __init__( - self, smooth=1, exponent=2, reduction='mean', class_weight=None, loss_weight=1.0, ignore_index=255, **kwards - ): - super(DiceLoss, self).__init__() - self.smooth = smooth - self.exponent = exponent - self.reduction = reduction - self.class_weight = get_class_weight(class_weight) - self.loss_weight = loss_weight - self.ignore_index = ignore_index - - def forward(self, pred, target, avg_factor=None, reduction_override=None, **kwards): - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = reduction_override if reduction_override else self.reduction - if self.class_weight is not None: - class_weight = pred.new_tensor(self.class_weight) - else: - class_weight = None - - pred = F.softmax(pred, dim=1) - num_classes = pred.shape[1] - one_hot_target = F.one_hot(torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes) - valid_mask = (target != self.ignore_index).long() - - loss = self.loss_weight * dice_loss( - pred, - one_hot_target, - valid_mask=valid_mask, - reduction=reduction, - avg_factor=avg_factor, - smooth=self.smooth, - exponent=self.exponent, - class_weight=class_weight, - ignore_index=self.ignore_index, - ) - return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py deleted file mode 100644 index e1c049874490..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py +++ /dev/null @@ -1,304 +0,0 @@ -"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor -ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim -Berman 2018 ESAT-PSI KU Leuven (MIT License)""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - -from ..builder import LOSSES -from .utils import get_class_weight, weight_reduce_loss - - -def lovasz_grad(gt_sorted): - """Computes gradient of the Lovasz extension w.r.t sorted errors. - - See Alg. 1 in paper. - """ - p = len(gt_sorted) - gts = gt_sorted.sum() - intersection = gts - gt_sorted.float().cumsum(0) - union = gts + (1 - gt_sorted).float().cumsum(0) - jaccard = 1.0 - intersection / union - if p > 1: # cover 1-pixel case - jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] - return jaccard - - -def flatten_binary_logits(logits, labels, ignore_index=None): - """Flattens predictions in the batch (binary case) Remove labels equal to - 'ignore_index'.""" - logits = logits.view(-1) - labels = labels.view(-1) - if ignore_index is None: - return logits, labels - valid = labels != ignore_index - vlogits = logits[valid] - vlabels = labels[valid] - return vlogits, vlabels - - -def flatten_probs(probs, labels, ignore_index=None): - """Flattens predictions in the batch.""" - if probs.dim() == 3: - # assumes output of a sigmoid layer - B, H, W = probs.size() - probs = probs.view(B, 1, H, W) - B, C, H, W = probs.size() - probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C - labels = labels.view(-1) - if ignore_index is None: - return probs, labels - valid = labels != ignore_index - vprobs = probs[valid.nonzero().squeeze()] - vlabels = labels[valid] - return vprobs, vlabels - - -def lovasz_hinge_flat(logits, labels): - """Binary Lovasz hinge loss. - - Args: - logits (torch.Tensor): [P], logits at each prediction - (between -infty and +infty). - labels (torch.Tensor): [P], binary ground truth labels (0 or 1). - - Returns: - torch.Tensor: The calculated loss. - """ - if len(labels) == 0: - # only void pixels, the gradients should be 0 - return logits.sum() * 0.0 - signs = 2.0 * labels.float() - 1.0 - errors = 1.0 - logits * signs - errors_sorted, perm = torch.sort(errors, dim=0, descending=True) - perm = perm.data - gt_sorted = labels[perm] - grad = lovasz_grad(gt_sorted) - loss = torch.dot(F.relu(errors_sorted), grad) - return loss - - -def lovasz_hinge( - logits, - labels, - classes='present', - per_image=False, - class_weight=None, - reduction='mean', - avg_factor=None, - ignore_index=255, -): - """Binary Lovasz hinge loss. - - Args: - logits (torch.Tensor): [B, H, W], logits at each pixel - (between -infty and +infty). - labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). - classes (str | list[int], optional): Placeholder, to be consistent with - other loss. Default: None. - per_image (bool, optional): If per_image is True, compute the loss per - image instead of per batch. Default: False. - class_weight (list[float], optional): Placeholder, to be consistent - with other loss. Default: None. - reduction (str, optional): The method used to reduce the loss. Options - are "none", "mean" and "sum". This parameter only works when - per_image is True. Default: 'mean'. - avg_factor (int, optional): Average factor that is used to average - the loss. This parameter only works when per_image is True. - Default: None. - ignore_index (int | None): The label index to be ignored. Default: 255. - - Returns: - torch.Tensor: The calculated loss. - """ - if per_image: - loss = [ - lovasz_hinge_flat(*flatten_binary_logits(logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) - for logit, label in zip(logits, labels) - ] - loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) - else: - loss = lovasz_hinge_flat(*flatten_binary_logits(logits, labels, ignore_index)) - return loss - - -def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): - """Multi-class Lovasz-Softmax loss. - - Args: - probs (torch.Tensor): [P, C], class probabilities at each prediction - (between 0 and 1). - labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). - classes (str | list[int], optional): Classes chosen to calculate loss. - 'all' for all classes, 'present' for classes present in labels, or - a list of classes to average. Default: 'present'. - class_weight (list[float], optional): The weight for each class. - Default: None. - - Returns: - torch.Tensor: The calculated loss. - """ - if probs.numel() == 0: - # only void pixels, the gradients should be 0 - return probs * 0.0 - C = probs.size(1) - losses = [] - class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes - for c in class_to_sum: - fg = (labels == c).float() # foreground for class c - if classes == 'present' and fg.sum() == 0: - continue - if C == 1: - if len(classes) > 1: - raise ValueError('Sigmoid output possible only with 1 class') - class_pred = probs[:, 0] - else: - class_pred = probs[:, c] - errors = (fg - class_pred).abs() - errors_sorted, perm = torch.sort(errors, 0, descending=True) - perm = perm.data - fg_sorted = fg[perm] - loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) - if class_weight is not None: - loss *= class_weight[c] - losses.append(loss) - return torch.stack(losses).mean() - - -def lovasz_softmax( - probs, - labels, - classes='present', - per_image=False, - class_weight=None, - reduction='mean', - avg_factor=None, - ignore_index=255, -): - """Multi-class Lovasz-Softmax loss. - - Args: - probs (torch.Tensor): [B, C, H, W], class probabilities at each - prediction (between 0 and 1). - labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and - C - 1). - classes (str | list[int], optional): Classes chosen to calculate loss. - 'all' for all classes, 'present' for classes present in labels, or - a list of classes to average. Default: 'present'. - per_image (bool, optional): If per_image is True, compute the loss per - image instead of per batch. Default: False. - class_weight (list[float], optional): The weight for each class. - Default: None. - reduction (str, optional): The method used to reduce the loss. Options - are "none", "mean" and "sum". This parameter only works when - per_image is True. Default: 'mean'. - avg_factor (int, optional): Average factor that is used to average - the loss. This parameter only works when per_image is True. - Default: None. - ignore_index (int | None): The label index to be ignored. Default: 255. - - Returns: - torch.Tensor: The calculated loss. - """ - - if per_image: - loss = [ - lovasz_softmax_flat( - *flatten_probs(prob.unsqueeze(0), label.unsqueeze(0), ignore_index), - classes=classes, - class_weight=class_weight - ) - for prob, label in zip(probs, labels) - ] - loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) - else: - loss = lovasz_softmax_flat( - *flatten_probs(probs, labels, ignore_index), classes=classes, class_weight=class_weight - ) - return loss - - -@LOSSES.register_module() -class LovaszLoss(nn.Module): - """LovaszLoss. - - This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate - for the optimization of the intersection-over-union measure in neural - networks `_. - - Args: - loss_type (str, optional): Binary or multi-class loss. - Default: 'multi_class'. Options are "binary" and "multi_class". - classes (str | list[int], optional): Classes chosen to calculate loss. - 'all' for all classes, 'present' for classes present in labels, or - a list of classes to average. Default: 'present'. - per_image (bool, optional): If per_image is True, compute the loss per - image instead of per batch. Default: False. - reduction (str, optional): The method used to reduce the loss. Options - are "none", "mean" and "sum". This parameter only works when - per_image is True. Default: 'mean'. - class_weight (list[float] | str, optional): Weight of each class. If in - str format, read them from a file. Defaults to None. - loss_weight (float, optional): Weight of the loss. Defaults to 1.0. - """ - - def __init__( - self, - loss_type='multi_class', - classes='present', - per_image=False, - reduction='mean', - class_weight=None, - loss_weight=1.0, - ): - super(LovaszLoss, self).__init__() - assert loss_type in ( - 'binary', - 'multi_class', - ), "loss_type should be \ - 'binary' or 'multi_class'." - - if loss_type == 'binary': - self.cls_criterion = lovasz_hinge - else: - self.cls_criterion = lovasz_softmax - assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) - if not per_image: - assert ( - reduction == 'none' - ), "reduction should be 'none' when \ - per_image is False." - - self.classes = classes - self.per_image = per_image - self.reduction = reduction - self.loss_weight = loss_weight - self.class_weight = get_class_weight(class_weight) - - def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): - """Forward function.""" - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = reduction_override if reduction_override else self.reduction - if self.class_weight is not None: - class_weight = cls_score.new_tensor(self.class_weight) - else: - class_weight = None - - # if multi-class loss, transform logits to probs - if self.cls_criterion == lovasz_softmax: - cls_score = F.softmax(cls_score, dim=1) - - loss_cls = self.loss_weight * self.cls_criterion( - cls_score, - label, - self.classes, - self.per_image, - class_weight=class_weight, - reduction=reduction, - avg_factor=avg_factor, - **kwargs - ) - return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py deleted file mode 100644 index e1719c276160..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py +++ /dev/null @@ -1,117 +0,0 @@ -import functools - -import numpy as np -import torch.nn.functional as F - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv - - -def get_class_weight(class_weight): - """Get class weight for loss function. - - Args: - class_weight (list[float] | str | None): If class_weight is a str, - take it as a file name and read from it. - """ - if isinstance(class_weight, str): - # take it as a file path - if class_weight.endswith('.npy'): - class_weight = np.load(class_weight) - else: - # pkl, json or yaml - class_weight = mmcv.load(class_weight) - - return class_weight - - -def reduce_loss(loss, reduction): - """Reduce loss as specified. - - Args: - loss (Tensor): Elementwise loss tensor. - reduction (str): Options are "none", "mean" and "sum". - - Return: - Tensor: Reduced loss tensor. - """ - reduction_enum = F._Reduction.get_enum(reduction) - # none: 0, elementwise_mean:1, sum: 2 - if reduction_enum == 0: - return loss - elif reduction_enum == 1: - return loss.mean() - elif reduction_enum == 2: - return loss.sum() - - -def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): - """Apply element-wise weight and reduce loss. - - Args: - loss (Tensor): Element-wise loss. - weight (Tensor): Element-wise weights. - reduction (str): Same as built-in losses of PyTorch. - avg_factor (float): Avarage factor when computing the mean of losses. - - Returns: - Tensor: Processed loss values. - """ - # if weight is specified, apply element-wise weight - if weight is not None: - assert weight.dim() == loss.dim() - if weight.dim() > 1: - assert weight.size(1) == 1 or weight.size(1) == loss.size(1) - loss = loss * weight - - # if avg_factor is not specified, just reduce the loss - if avg_factor is None: - loss = reduce_loss(loss, reduction) - else: - # if reduction is mean, then average the loss by avg_factor - if reduction == 'mean': - loss = loss.sum() / avg_factor - # if reduction is 'none', then do nothing, otherwise raise an error - elif reduction != 'none': - raise ValueError('avg_factor can not be used with reduction="sum"') - return loss - - -def weighted_loss(loss_func): - """Create a weighted version of a given loss function. - - To use this decorator, the loss function must have the signature like - `loss_func(pred, target, **kwargs)`. The function only needs to compute - element-wise loss without any reduction. This decorator will add weight - and reduction arguments to the function. The decorated function will have - the signature like `loss_func(pred, target, weight=None, reduction='mean', - avg_factor=None, **kwargs)`. - - :Example: - - >>> import torch - >>> @weighted_loss - >>> def l1_loss(pred, target): - >>> return (pred - target).abs() - - >>> pred = torch.Tensor([0, 2, 3]) - >>> target = torch.Tensor([1, 1, 1]) - >>> weight = torch.Tensor([1, 0, 1]) - - >>> l1_loss(pred, target) - tensor(1.3333) - >>> l1_loss(pred, target, weight) - tensor(1.) - >>> l1_loss(pred, target, reduction='none') - tensor([1., 1., 2.]) - >>> l1_loss(pred, target, weight, avg_factor=2) - tensor(1.5000) - """ - - @functools.wraps(loss_func) - def wrapper(pred, target, weight=None, reduction='mean', avg_factor=None, **kwargs): - # get element-wise loss - loss = loss_func(pred, target, **kwargs) - loss = weight_reduce_loss(loss, weight, reduction, avg_factor) - return loss - - return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py deleted file mode 100644 index 9b9d3d5b3fe8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .fpn import FPN -from .multilevel_neck import MultiLevelNeck - -__all__ = ['FPN', 'MultiLevelNeck'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py deleted file mode 100644 index fd5ca4c087b4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py +++ /dev/null @@ -1,210 +0,0 @@ -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, xavier_init - -from ..builder import NECKS - - -@NECKS.register_module() -class FPN(nn.Module): - """Feature Pyramid Network. - - This is an implementation of - Feature Pyramid Networks for Object - Detection (https://arxiv.org/abs/1612.03144) - - Args: - in_channels (List[int]): Number of input channels per scale. - out_channels (int): Number of output channels (used at each scale) - num_outs (int): Number of output scales. - start_level (int): Index of the start input backbone level used to - build the feature pyramid. Default: 0. - end_level (int): Index of the end input backbone level (exclusive) to - build the feature pyramid. Default: -1, which means the last level. - add_extra_convs (bool | str): If bool, it decides whether to add conv - layers on top of the original feature maps. Default to False. - If True, its actual mode is specified by `extra_convs_on_inputs`. - If str, it specifies the source feature map of the extra convs. - Only the following options are allowed - - - 'on_input': Last feat map of neck inputs (i.e. backbone feature). - - 'on_lateral': Last feature map after lateral convs. - - 'on_output': The last output feature map after fpn convs. - extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs - on the original feature from the backbone. If True, - it is equivalent to `add_extra_convs='on_input'`. If False, it is - equivalent to set `add_extra_convs='on_output'`. Default to True. - relu_before_extra_convs (bool): Whether to apply relu before the extra - conv. Default: False. - no_norm_on_lateral (bool): Whether to apply norm on lateral. - Default: False. - conv_cfg (dict): Config dict for convolution layer. Default: None. - norm_cfg (dict): Config dict for normalization layer. Default: None. - act_cfg (str): Config dict for activation layer in ConvModule. - Default: None. - upsample_cfg (dict): Config dict for interpolate layer. - Default: `dict(mode='nearest')` - - Example: - >>> import torch - >>> in_channels = [2, 3, 5, 7] - >>> scales = [340, 170, 84, 43] - >>> inputs = [torch.rand(1, c, s, s) - ... for c, s in zip(in_channels, scales)] - >>> self = FPN(in_channels, 11, len(in_channels)).eval() - >>> outputs = self.forward(inputs) - >>> for i in range(len(outputs)): - ... print(f'outputs[{i}].shape = {outputs[i].shape}') - outputs[0].shape = torch.Size([1, 11, 340, 340]) - outputs[1].shape = torch.Size([1, 11, 170, 170]) - outputs[2].shape = torch.Size([1, 11, 84, 84]) - outputs[3].shape = torch.Size([1, 11, 43, 43]) - """ - - def __init__( - self, - in_channels, - out_channels, - num_outs, - start_level=0, - end_level=-1, - add_extra_convs=False, - extra_convs_on_inputs=False, - relu_before_extra_convs=False, - no_norm_on_lateral=False, - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - upsample_cfg=dict(mode='nearest'), - ): - super(FPN, self).__init__() - assert isinstance(in_channels, list) - self.in_channels = in_channels - self.out_channels = out_channels - self.num_ins = len(in_channels) - self.num_outs = num_outs - self.relu_before_extra_convs = relu_before_extra_convs - self.no_norm_on_lateral = no_norm_on_lateral - self.fp16_enabled = False - self.upsample_cfg = upsample_cfg.copy() - - if end_level == -1: - self.backbone_end_level = self.num_ins - assert num_outs >= self.num_ins - start_level - else: - # if end_level < inputs, no extra level is allowed - self.backbone_end_level = end_level - assert end_level <= len(in_channels) - assert num_outs == end_level - start_level - self.start_level = start_level - self.end_level = end_level - self.add_extra_convs = add_extra_convs - assert isinstance(add_extra_convs, (str, bool)) - if isinstance(add_extra_convs, str): - # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' - assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') - elif add_extra_convs: # True - if extra_convs_on_inputs: - # For compatibility with previous release - # TODO: deprecate `extra_convs_on_inputs` - self.add_extra_convs = 'on_input' - else: - self.add_extra_convs = 'on_output' - - self.lateral_convs = nn.ModuleList() - self.fpn_convs = nn.ModuleList() - - for i in range(self.start_level, self.backbone_end_level): - l_conv = ConvModule( - in_channels[i], - out_channels, - 1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, - act_cfg=act_cfg, - inplace=False, - ) - fpn_conv = ConvModule( - out_channels, - out_channels, - 3, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - inplace=False, - ) - - self.lateral_convs.append(l_conv) - self.fpn_convs.append(fpn_conv) - - # add extra conv layers (e.g., RetinaNet) - extra_levels = num_outs - self.backbone_end_level + self.start_level - if self.add_extra_convs and extra_levels >= 1: - for i in range(extra_levels): - if i == 0 and self.add_extra_convs == 'on_input': - in_channels = self.in_channels[self.backbone_end_level - 1] - else: - in_channels = out_channels - extra_fpn_conv = ConvModule( - in_channels, - out_channels, - 3, - stride=2, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - inplace=False, - ) - self.fpn_convs.append(extra_fpn_conv) - - # default init_weights for conv(msra) and norm in ConvModule - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - xavier_init(m, distribution='uniform') - - def forward(self, inputs): - assert len(inputs) == len(self.in_channels) - - # build laterals - laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)] - - # build top-down path - used_backbone_levels = len(laterals) - for i in range(used_backbone_levels - 1, 0, -1): - # In some cases, fixing `scale factor` (e.g. 2) is preferred, but - # it cannot co-exist with `size` in `F.interpolate`. - if 'scale_factor' in self.upsample_cfg: - laterals[i - 1] += F.interpolate(laterals[i], **self.upsample_cfg) - else: - prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] += F.interpolate(laterals[i], size=prev_shape, **self.upsample_cfg) - - # build outputs - # part 1: from original levels - outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)] - # part 2: add extra levels - if self.num_outs > len(outs): - # use max pool to get more levels on top of outputs - # (e.g., Faster R-CNN, Mask R-CNN) - if not self.add_extra_convs: - for i in range(self.num_outs - used_backbone_levels): - outs.append(F.max_pool2d(outs[-1], 1, stride=2)) - # add conv layers on top of original feature maps (RetinaNet) - else: - if self.add_extra_convs == 'on_input': - extra_source = inputs[self.backbone_end_level - 1] - elif self.add_extra_convs == 'on_lateral': - extra_source = laterals[-1] - elif self.add_extra_convs == 'on_output': - extra_source = outs[-1] - else: - raise NotImplementedError - outs.append(self.fpn_convs[used_backbone_levels](extra_source)) - for i in range(used_backbone_levels + 1, self.num_outs): - if self.relu_before_extra_convs: - outs.append(self.fpn_convs[i](F.relu(outs[-1]))) - else: - outs.append(self.fpn_convs[i](outs[-1])) - return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py deleted file mode 100644 index 395bcd5ed655..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py +++ /dev/null @@ -1,53 +0,0 @@ -import torch.nn as nn -import torch.nn.functional as F -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from ..builder import NECKS - - -@NECKS.register_module() -class MultiLevelNeck(nn.Module): - """MultiLevelNeck. - - A neck structure connect vit backbone and decoder_heads. - Args: - in_channels (List[int]): Number of input channels per scale. - out_channels (int): Number of output channels (used at each scale). - scales (List[int]): Scale factors for each input feature map. - norm_cfg (dict): Config dict for normalization layer. Default: None. - act_cfg (dict): Config dict for activation layer in ConvModule. - Default: None. - """ - - def __init__(self, in_channels, out_channels, scales=[0.5, 1, 2, 4], norm_cfg=None, act_cfg=None): - super(MultiLevelNeck, self).__init__() - assert isinstance(in_channels, list) - self.in_channels = in_channels - self.out_channels = out_channels - self.scales = scales - self.num_outs = len(scales) - self.lateral_convs = nn.ModuleList() - self.convs = nn.ModuleList() - for in_channel in in_channels: - self.lateral_convs.append( - ConvModule(in_channel, out_channels, kernel_size=1, norm_cfg=norm_cfg, act_cfg=act_cfg) - ) - for _ in range(self.num_outs): - self.convs.append( - ConvModule( - out_channels, out_channels, kernel_size=3, padding=1, stride=1, norm_cfg=norm_cfg, act_cfg=act_cfg - ) - ) - - def forward(self, inputs): - assert len(inputs) == len(self.in_channels) - print(inputs[0].shape) - inputs = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] - # for len(inputs) not equal to self.num_outs - if len(inputs) == 1: - inputs = [inputs[0] for _ in range(self.num_outs)] - outs = [] - for i in range(self.num_outs): - x_resize = F.interpolate(inputs[i], scale_factor=self.scales[i], mode='bilinear') - outs.append(self.convs[i](x_resize)) - return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py deleted file mode 100644 index dca2f0940533..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .base import BaseSegmentor -from .cascade_encoder_decoder import CascadeEncoderDecoder -from .encoder_decoder import EncoderDecoder - -__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py deleted file mode 100644 index bb1eb40a38d7..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py +++ /dev/null @@ -1,256 +0,0 @@ -import logging -import warnings -from abc import ABCMeta, abstractmethod -from collections import OrderedDict - -import numpy as np -import torch -import torch.distributed as dist -import torch.nn as nn - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16 - - -class BaseSegmentor(nn.Module): - """Base class for segmentors.""" - - __metaclass__ = ABCMeta - - def __init__(self): - super(BaseSegmentor, self).__init__() - self.fp16_enabled = False - - @property - def with_neck(self): - """bool: whether the segmentor has neck""" - return hasattr(self, 'neck') and self.neck is not None - - @property - def with_auxiliary_head(self): - """bool: whether the segmentor has auxiliary head""" - return hasattr(self, 'auxiliary_head') and self.auxiliary_head is not None - - @property - def with_decode_head(self): - """bool: whether the segmentor has decode head""" - return hasattr(self, 'decode_head') and self.decode_head is not None - - @abstractmethod - def extract_feat(self, imgs): - """Placeholder for extract features from images.""" - pass - - @abstractmethod - def encode_decode(self, img, img_metas): - """Placeholder for encode images with backbone and decode into a - semantic segmentation map of the same size as input.""" - pass - - @abstractmethod - def forward_train(self, imgs, img_metas, **kwargs): - """Placeholder for Forward function for training.""" - pass - - @abstractmethod - def simple_test(self, img, img_meta, **kwargs): - """Placeholder for single image test.""" - pass - - @abstractmethod - def aug_test(self, imgs, img_metas, **kwargs): - """Placeholder for augmentation test.""" - pass - - def init_weights(self, pretrained=None): - """Initialize the weights in segmentor. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if pretrained is not None: - logger = logging.getLogger() - logger.info(f'load model from: {pretrained}') - - def forward_test(self, imgs, img_metas, **kwargs): - """ - Args: - imgs (List[Tensor]): the outer list indicates test-time - augmentations and inner Tensor should have a shape NxCxHxW, - which contains all images in the batch. - img_metas (List[List[dict]]): the outer list indicates test-time - augs (multiscale, flip, etc.) and the inner list indicates - images in a batch. - """ - for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: - if not isinstance(var, list): - raise TypeError(f'{name} must be a list, but got ' f'{type(var)}') - - num_augs = len(imgs) - if num_augs != len(img_metas): - raise ValueError(f'num of augmentations ({len(imgs)}) != ' f'num of image meta ({len(img_metas)})') - # all images in the same aug batch all of the same ori_shape and pad - # shape - for img_meta in img_metas: - ori_shapes = [_['ori_shape'] for _ in img_meta] - assert all(shape == ori_shapes[0] for shape in ori_shapes) - img_shapes = [_['img_shape'] for _ in img_meta] - assert all(shape == img_shapes[0] for shape in img_shapes) - pad_shapes = [_['pad_shape'] for _ in img_meta] - assert all(shape == pad_shapes[0] for shape in pad_shapes) - - if num_augs == 1: - return self.simple_test(imgs[0], img_metas[0], **kwargs) - else: - return self.aug_test(imgs, img_metas, **kwargs) - - @auto_fp16(apply_to=('img',)) - def forward(self, img, img_metas, return_loss=True, **kwargs): - """Calls either :func:`forward_train` or :func:`forward_test` depending - on whether ``return_loss`` is ``True``. - - Note this setting will change the expected inputs. When - ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor - and List[dict]), and when ``resturn_loss=False``, img and img_meta - should be double nested (i.e. List[Tensor], List[List[dict]]), with - the outer list indicating test time augmentations. - """ - if return_loss: - return self.forward_train(img, img_metas, **kwargs) - else: - return self.forward_test(img, img_metas, **kwargs) - - def train_step(self, data_batch, optimizer, **kwargs): - """The iteration step during training. - - This method defines an iteration step during training, except for the - back propagation and optimizer updating, which are done in an optimizer - hook. Note that in some complicated cases or models, the whole process - including back propagation and optimizer updating is also defined in - this method, such as GAN. - - Args: - data (dict): The output of dataloader. - optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of - runner is passed to ``train_step()``. This argument is unused - and reserved. - - Returns: - dict: It should contain at least 3 keys: ``loss``, ``log_vars``, - ``num_samples``. - ``loss`` is a tensor for back propagation, which can be a - weighted sum of multiple losses. - ``log_vars`` contains all the variables to be sent to the - logger. - ``num_samples`` indicates the batch size (when the model is - DDP, it means the batch size on each GPU), which is used for - averaging the logs. - """ - losses = self(**data_batch) - loss, log_vars = self._parse_losses(losses) - - outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data_batch['img_metas'])) - - return outputs - - def val_step(self, data_batch, **kwargs): - """The iteration step during validation. - - This method shares the same signature as :func:`train_step`, but used - during val epochs. Note that the evaluation after training epochs is - not implemented with this method, but an evaluation hook. - """ - output = self(**data_batch, **kwargs) - return output - - @staticmethod - def _parse_losses(losses): - """Parse the raw outputs (losses) of the network. - - Args: - losses (dict): Raw output of the network, which usually contain - losses and other necessary information. - - Returns: - tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor - which may be a weighted sum of all losses, log_vars contains - all the variables to be sent to the logger. - """ - log_vars = OrderedDict() - for loss_name, loss_value in losses.items(): - if isinstance(loss_value, torch.Tensor): - log_vars[loss_name] = loss_value.mean() - elif isinstance(loss_value, list): - log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) - else: - raise TypeError(f'{loss_name} is not a tensor or list of tensors') - - loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) - - log_vars['loss'] = loss - for loss_name, loss_value in log_vars.items(): - # reduce loss when distributed training - if dist.is_available() and dist.is_initialized(): - loss_value = loss_value.data.clone() - dist.all_reduce(loss_value.div_(dist.get_world_size())) - log_vars[loss_name] = loss_value.item() - - return loss, log_vars - - def show_result(self, img, result, palette=None, win_name='', show=False, wait_time=0, out_file=None, opacity=0.5): - """Draw `result` over `img`. - - Args: - img (str or Tensor): The image to be displayed. - result (Tensor): The semantic segmentation results to draw over - `img`. - palette (list[list[int]]] | np.ndarray | None): The palette of - segmentation map. If None is given, random palette will be - generated. Default: None - win_name (str): The window name. - wait_time (int): Value of waitKey param. - Default: 0. - show (bool): Whether to show the image. - Default: False. - out_file (str or None): The filename to write the image. - Default: None. - opacity(float): Opacity of painted segmentation map. - Default 0.5. - Must be in (0, 1] range. - Returns: - img (Tensor): Only if not `show` or `out_file` - """ - img = mmcv.imread(img) - img = img.copy() - seg = result[0] - if palette is None: - if self.PALETTE is None: - palette = np.random.randint(0, 255, size=(len(self.CLASSES), 3)) - else: - palette = self.PALETTE - palette = np.array(palette) - assert palette.shape[0] == len(self.CLASSES) - assert palette.shape[1] == 3 - assert len(palette.shape) == 2 - assert 0 < opacity <= 1.0 - color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) - for label, color in enumerate(palette): - color_seg[seg == label, :] = color - # convert to BGR - color_seg = color_seg[..., ::-1] - - img = img * (1 - opacity) + color_seg * opacity - img = img.astype(np.uint8) - # if out_file specified, do not show image in window - if out_file is not None: - show = False - - if show: - mmcv.imshow(img, win_name, wait_time) - if out_file is not None: - mmcv.imwrite(img, out_file) - - if not (show or out_file): - warnings.warn('show==False and out_file is not specified, only ' 'result image will be returned') - return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py deleted file mode 100644 index 2f53cfb9e41c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py +++ /dev/null @@ -1,95 +0,0 @@ -from torch import nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from .. import builder -from ..builder import SEGMENTORS -from .encoder_decoder import EncoderDecoder - - -@SEGMENTORS.register_module() -class CascadeEncoderDecoder(EncoderDecoder): - """Cascade Encoder Decoder segmentors. - - CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of - CascadeEncoderDecoder are cascaded. The output of previous decoder_head - will be the input of next decoder_head. - """ - - def __init__( - self, - num_stages, - backbone, - decode_head, - neck=None, - auxiliary_head=None, - train_cfg=None, - test_cfg=None, - pretrained=None, - ): - self.num_stages = num_stages - super(CascadeEncoderDecoder, self).__init__( - backbone=backbone, - decode_head=decode_head, - neck=neck, - auxiliary_head=auxiliary_head, - train_cfg=train_cfg, - test_cfg=test_cfg, - pretrained=pretrained, - ) - - def _init_decode_head(self, decode_head): - """Initialize ``decode_head``""" - assert isinstance(decode_head, list) - assert len(decode_head) == self.num_stages - self.decode_head = nn.ModuleList() - for i in range(self.num_stages): - self.decode_head.append(builder.build_head(decode_head[i])) - self.align_corners = self.decode_head[-1].align_corners - self.num_classes = self.decode_head[-1].num_classes - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone and heads. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - self.backbone.init_weights(pretrained=pretrained) - for i in range(self.num_stages): - self.decode_head[i].init_weights() - if self.with_auxiliary_head: - if isinstance(self.auxiliary_head, nn.ModuleList): - for aux_head in self.auxiliary_head: - aux_head.init_weights() - else: - self.auxiliary_head.init_weights() - - def encode_decode(self, img, img_metas): - """Encode images with backbone and decode into a semantic segmentation - map of the same size as input.""" - x = self.extract_feat(img) - out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) - for i in range(1, self.num_stages): - out = self.decode_head[i].forward_test(x, out, img_metas, self.test_cfg) - out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) - return out - - def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): - """Run forward function and calculate loss for decode head in - training.""" - losses = dict() - - loss_decode = self.decode_head[0].forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) - - losses.update(add_prefix(loss_decode, 'decode_0')) - - for i in range(1, self.num_stages): - # forward test again, maybe unnecessary for most methods. - prev_outputs = self.decode_head[i - 1].forward_test(x, img_metas, self.test_cfg) - loss_decode = self.decode_head[i].forward_train( - x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg - ) - losses.update(add_prefix(loss_decode, f'decode_{i}')) - - return losses diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py deleted file mode 100644 index 198bf34de1e8..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py +++ /dev/null @@ -1,275 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix -from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize -from .. import builder -from ..builder import SEGMENTORS -from .base import BaseSegmentor - - -@SEGMENTORS.register_module() -class EncoderDecoder(BaseSegmentor): - """Encoder Decoder segmentors. - - EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. - Note that auxiliary_head is only used for deep supervision during training, - which could be dumped during inference. - """ - - def __init__( - self, backbone, decode_head, neck=None, auxiliary_head=None, train_cfg=None, test_cfg=None, pretrained=None - ): - super(EncoderDecoder, self).__init__() - self.backbone = builder.build_backbone(backbone) - if neck is not None: - self.neck = builder.build_neck(neck) - self._init_decode_head(decode_head) - self._init_auxiliary_head(auxiliary_head) - - self.train_cfg = train_cfg - self.test_cfg = test_cfg - - self.init_weights(pretrained=pretrained) - - assert self.with_decode_head - - def _init_decode_head(self, decode_head): - """Initialize ``decode_head``""" - self.decode_head = builder.build_head(decode_head) - self.align_corners = self.decode_head.align_corners - self.num_classes = self.decode_head.num_classes - - def _init_auxiliary_head(self, auxiliary_head): - """Initialize ``auxiliary_head``""" - if auxiliary_head is not None: - if isinstance(auxiliary_head, list): - self.auxiliary_head = nn.ModuleList() - for head_cfg in auxiliary_head: - self.auxiliary_head.append(builder.build_head(head_cfg)) - else: - self.auxiliary_head = builder.build_head(auxiliary_head) - - def init_weights(self, pretrained=None): - """Initialize the weights in backbone and heads. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - - super(EncoderDecoder, self).init_weights(pretrained) - self.backbone.init_weights(pretrained=pretrained) - self.decode_head.init_weights() - if self.with_auxiliary_head: - if isinstance(self.auxiliary_head, nn.ModuleList): - for aux_head in self.auxiliary_head: - aux_head.init_weights() - else: - self.auxiliary_head.init_weights() - - def extract_feat(self, img): - """Extract features from images.""" - x = self.backbone(img) - if self.with_neck: - x = self.neck(x) - return x - - def encode_decode(self, img, img_metas): - """Encode images with backbone and decode into a semantic segmentation - map of the same size as input.""" - x = self.extract_feat(img) - out = self._decode_head_forward_test(x, img_metas) - out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) - return out - - def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): - """Run forward function and calculate loss for decode head in - training.""" - losses = dict() - loss_decode = self.decode_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) - - losses.update(add_prefix(loss_decode, 'decode')) - return losses - - def _decode_head_forward_test(self, x, img_metas): - """Run forward function and calculate loss for decode head in - inference.""" - seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) - return seg_logits - - def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): - """Run forward function and calculate loss for auxiliary head in - training.""" - losses = dict() - if isinstance(self.auxiliary_head, nn.ModuleList): - for idx, aux_head in enumerate(self.auxiliary_head): - loss_aux = aux_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) - losses.update(add_prefix(loss_aux, f'aux_{idx}')) - else: - loss_aux = self.auxiliary_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) - losses.update(add_prefix(loss_aux, 'aux')) - - return losses - - def forward_dummy(self, img): - """Dummy forward function.""" - seg_logit = self.encode_decode(img, None) - - return seg_logit - - def forward_train(self, img, img_metas, gt_semantic_seg): - """Forward function for training. - - Args: - img (Tensor): Input images. - img_metas (list[dict]): List of image info dict where each dict - has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - gt_semantic_seg (Tensor): Semantic segmentation masks - used if the architecture supports semantic segmentation task. - - Returns: - dict[str, Tensor]: a dictionary of loss components - """ - - x = self.extract_feat(img) - - losses = dict() - - loss_decode = self._decode_head_forward_train(x, img_metas, gt_semantic_seg) - losses.update(loss_decode) - - if self.with_auxiliary_head: - loss_aux = self._auxiliary_head_forward_train(x, img_metas, gt_semantic_seg) - losses.update(loss_aux) - - return losses - - # TODO refactor - def slide_inference(self, img, img_meta, rescale): - """Inference by sliding-window with overlap. - - If h_crop > h_img or w_crop > w_img, the small patch will be used to - decode without padding. - """ - - h_stride, w_stride = self.test_cfg.stride - h_crop, w_crop = self.test_cfg.crop_size - batch_size, _, h_img, w_img = img.size() - num_classes = self.num_classes - h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 - w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 - preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) - count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) - for h_idx in range(h_grids): - for w_idx in range(w_grids): - y1 = h_idx * h_stride - x1 = w_idx * w_stride - y2 = min(y1 + h_crop, h_img) - x2 = min(x1 + w_crop, w_img) - y1 = max(y2 - h_crop, 0) - x1 = max(x2 - w_crop, 0) - crop_img = img[:, :, y1:y2, x1:x2] - crop_seg_logit = self.encode_decode(crop_img, img_meta) - preds += F.pad(crop_seg_logit, (int(x1), int(preds.shape[3] - x2), int(y1), int(preds.shape[2] - y2))) - - count_mat[:, :, y1:y2, x1:x2] += 1 - assert (count_mat == 0).sum() == 0 - if torch.onnx.is_in_onnx_export(): - # cast count_mat to constant while exporting to ONNX - count_mat = torch.from_numpy(count_mat.cpu().detach().numpy()).to(device=img.device) - preds = preds / count_mat - if rescale: - preds = resize( - preds, - size=img_meta[0]['ori_shape'][:2], - mode='bilinear', - align_corners=self.align_corners, - warning=False, - ) - return preds - - def whole_inference(self, img, img_meta, rescale): - """Inference with full image.""" - - seg_logit = self.encode_decode(img, img_meta) - if rescale: - # support dynamic shape for onnx - if torch.onnx.is_in_onnx_export(): - size = img.shape[2:] - else: - size = img_meta[0]['ori_shape'][:2] - seg_logit = resize(seg_logit, size=size, mode='bilinear', align_corners=self.align_corners, warning=False) - - return seg_logit - - def inference(self, img, img_meta, rescale): - """Inference with slide/whole style. - - Args: - img (Tensor): The input image of shape (N, 3, H, W). - img_meta (dict): Image info dict where each dict has: 'img_shape', - 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys see - `mmseg/datasets/pipelines/formatting.py:Collect`. - rescale (bool): Whether rescale back to original shape. - - Returns: - Tensor: The output segmentation map. - """ - - assert self.test_cfg.mode in ['slide', 'whole'] - ori_shape = img_meta[0]['ori_shape'] - assert all(_['ori_shape'] == ori_shape for _ in img_meta) - if self.test_cfg.mode == 'slide': - seg_logit = self.slide_inference(img, img_meta, rescale) - else: - seg_logit = self.whole_inference(img, img_meta, rescale) - output = F.softmax(seg_logit, dim=1) - flip = img_meta[0]['flip'] - if flip: - flip_direction = img_meta[0]['flip_direction'] - assert flip_direction in ['horizontal', 'vertical'] - if flip_direction == 'horizontal': - output = output.flip(dims=(3,)) - elif flip_direction == 'vertical': - output = output.flip(dims=(2,)) - - return output - - def simple_test(self, img, img_meta, rescale=True): - """Simple test with single image.""" - seg_logit = self.inference(img, img_meta, rescale) - seg_pred = seg_logit.argmax(dim=1) - if torch.onnx.is_in_onnx_export(): - # our inference backend only support 4D output - seg_pred = seg_pred.unsqueeze(0) - return seg_pred - seg_pred = seg_pred.cpu().numpy() - # unravel batch dim - seg_pred = list(seg_pred) - return seg_pred - - def aug_test(self, imgs, img_metas, rescale=True): - """Test with augmentations. - - Only rescale=True is supported. - """ - # aug_test rescale all imgs back to ori_shape for now - assert rescale - # to save memory, we get augmented seg logit inplace - seg_logit = self.inference(imgs[0], img_metas[0], rescale) - for i in range(1, len(imgs)): - cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) - seg_logit += cur_seg_logit - seg_logit /= len(imgs) - seg_pred = seg_logit.argmax(dim=1) - seg_pred = seg_pred.cpu().numpy() - # unravel batch dim - seg_pred = list(seg_pred) - return seg_pred diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py deleted file mode 100644 index be7a5bd7a676..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from .drop import DropPath -from .inverted_residual import InvertedResidual, InvertedResidualV3 -from .make_divisible import make_divisible -from .res_layer import ResLayer -from .se_layer import SELayer -from .self_attention_block import SelfAttentionBlock -from .up_conv_block import UpConvBlock -from .weight_init import trunc_normal_ - -__all__ = [ - 'ResLayer', - 'SelfAttentionBlock', - 'make_divisible', - 'InvertedResidual', - 'UpConvBlock', - 'InvertedResidualV3', - 'SELayer', - 'DropPath', - 'trunc_normal_', -] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py deleted file mode 100644 index cf9492f1c324..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Modified from https://github.com/rwightman/pytorch-image- -models/blob/master/timm/models/layers/drop.py.""" - -import torch -from torch import nn - - -class DropPath(nn.Module): - """Drop paths (Stochastic Depth) per sample (when applied in main path of - residual blocks). - - Args: - drop_prob (float): Drop rate for paths of model. Dropout rate has - to be between 0 and 1. Default: 0. - """ - - def __init__(self, drop_prob=0.0): - super(DropPath, self).__init__() - self.drop_prob = drop_prob - self.keep_prob = 1 - drop_prob - - def forward(self, x): - if self.drop_prob == 0.0 or not self.training: - return x - shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets - random_tensor = self.keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) - random_tensor.floor_() # binarize - output = x.div(self.keep_prob) * random_tensor - return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py deleted file mode 100644 index 6c2262f7922c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py +++ /dev/null @@ -1,218 +0,0 @@ -from torch import nn -from torch.utils import checkpoint as cp - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from .se_layer import SELayer - - -class InvertedResidual(nn.Module): - """InvertedResidual block for MobileNetV2. - - Args: - in_channels (int): The input channels of the InvertedResidual block. - out_channels (int): The output channels of the InvertedResidual block. - stride (int): Stride of the middle (first) 3x3 convolution. - expand_ratio (int): Adjusts number of channels of the hidden layer - in InvertedResidual by this amount. - dilation (int): Dilation rate of depthwise conv. Default: 1 - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='ReLU6'). - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - - Returns: - Tensor: The output tensor. - """ - - def __init__( - self, - in_channels, - out_channels, - stride, - expand_ratio, - dilation=1, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU6'), - with_cp=False, - ): - super(InvertedResidual, self).__init__() - self.stride = stride - assert stride in [1, 2], f'stride must in [1, 2]. ' f'But received {stride}.' - self.with_cp = with_cp - self.use_res_connect = self.stride == 1 and in_channels == out_channels - hidden_dim = int(round(in_channels * expand_ratio)) - - layers = [] - if expand_ratio != 1: - layers.append( - ConvModule( - in_channels=in_channels, - out_channels=hidden_dim, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - ) - layers.extend( - [ - ConvModule( - in_channels=hidden_dim, - out_channels=hidden_dim, - kernel_size=3, - stride=stride, - padding=dilation, - dilation=dilation, - groups=hidden_dim, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ), - ConvModule( - in_channels=hidden_dim, - out_channels=out_channels, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None, - ), - ] - ) - self.conv = nn.Sequential(*layers) - - def forward(self, x): - def _inner_forward(x): - if self.use_res_connect: - return x + self.conv(x) - else: - return self.conv(x) - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - return out - - -class InvertedResidualV3(nn.Module): - """Inverted Residual Block for MobileNetV3. - - Args: - in_channels (int): The input channels of this Module. - out_channels (int): The output channels of this Module. - mid_channels (int): The input channels of the depthwise convolution. - kernel_size (int): The kernel size of the depthwise convolution. - Default: 3. - stride (int): The stride of the depthwise convolution. Default: 1. - se_cfg (dict): Config dict for se layer. Default: None, which means no - se layer. - with_expand_conv (bool): Use expand conv or not. If set False, - mid_channels must be the same with in_channels. Default: True. - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. - norm_cfg (dict): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict): Config dict for activation layer. - Default: dict(type='ReLU'). - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - - Returns: - Tensor: The output tensor. - """ - - def __init__( - self, - in_channels, - out_channels, - mid_channels, - kernel_size=3, - stride=1, - se_cfg=None, - with_expand_conv=True, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - with_cp=False, - ): - super(InvertedResidualV3, self).__init__() - self.with_res_shortcut = stride == 1 and in_channels == out_channels - assert stride in [1, 2] - self.with_cp = with_cp - self.with_se = se_cfg is not None - self.with_expand_conv = with_expand_conv - - if self.with_se: - assert isinstance(se_cfg, dict) - if not self.with_expand_conv: - assert mid_channels == in_channels - - if self.with_expand_conv: - self.expand_conv = ConvModule( - in_channels=in_channels, - out_channels=mid_channels, - kernel_size=1, - stride=1, - padding=0, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - self.depthwise_conv = ConvModule( - in_channels=mid_channels, - out_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - padding=kernel_size // 2, - groups=mid_channels, - conv_cfg=dict(type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - - if self.with_se: - self.se = SELayer(**se_cfg) - - self.linear_conv = ConvModule( - in_channels=mid_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None, - ) - - def forward(self, x): - def _inner_forward(x): - out = x - - if self.with_expand_conv: - out = self.expand_conv(out) - - out = self.depthwise_conv(out) - - if self.with_se: - out = self.se(out) - - out = self.linear_conv(out) - - if self.with_res_shortcut: - return x + out - else: - return out - - if self.with_cp and x.requires_grad: - out = cp.checkpoint(_inner_forward, x) - else: - out = _inner_forward(x) - - return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py deleted file mode 100644 index 75ad75605252..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py +++ /dev/null @@ -1,27 +0,0 @@ -def make_divisible(value, divisor, min_value=None, min_ratio=0.9): - """Make divisible function. - - This function rounds the channel number to the nearest value that can be - divisible by the divisor. It is taken from the original tf repo. It ensures - that all layers have a channel number that is divisible by divisor. It can - be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa - - Args: - value (int): The original channel number. - divisor (int): The divisor to fully divide the channel number. - min_value (int): The minimum value of the output channel. - Default: None, means that the minimum value equal to the divisor. - min_ratio (float): The minimum ratio of the rounded channel number to - the original channel number. Default: 0.9. - - Returns: - int: The modified output channel number. - """ - - if min_value is None: - min_value = divisor - new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than (1-min_ratio). - if new_value < min_ratio * value: - new_value += divisor - return new_value diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py deleted file mode 100644 index 370d078863f1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py +++ /dev/null @@ -1,96 +0,0 @@ -from torch import nn as nn - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer - - -class ResLayer(nn.Sequential): - """ResLayer to build ResNet style backbone. - - Args: - block (nn.Module): block used to build ResLayer. - inplanes (int): inplanes of block. - planes (int): planes of block. - num_blocks (int): number of blocks. - stride (int): stride of the first block. Default: 1 - avg_down (bool): Use AvgPool instead of stride conv when - downsampling in the bottleneck. Default: False - conv_cfg (dict): dictionary to construct and config conv layer. - Default: None - norm_cfg (dict): dictionary to construct and config norm layer. - Default: dict(type='BN') - multi_grid (int | None): Multi grid dilation rates of last - stage. Default: None - contract_dilation (bool): Whether contract first dilation of each layer - Default: False - """ - - def __init__( - self, - block, - inplanes, - planes, - num_blocks, - stride=1, - dilation=1, - avg_down=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - multi_grid=None, - contract_dilation=False, - **kwargs - ): - self.block = block - - downsample = None - if stride != 1 or inplanes != planes * block.expansion: - downsample = [] - conv_stride = stride - if avg_down: - conv_stride = 1 - downsample.append( - nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) - ) - downsample.extend( - [ - build_conv_layer( - conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False - ), - build_norm_layer(norm_cfg, planes * block.expansion)[1], - ] - ) - downsample = nn.Sequential(*downsample) - - layers = [] - if multi_grid is None: - if dilation > 1 and contract_dilation: - first_dilation = dilation // 2 - else: - first_dilation = dilation - else: - first_dilation = multi_grid[0] - layers.append( - block( - inplanes=inplanes, - planes=planes, - stride=stride, - dilation=first_dilation, - downsample=downsample, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - **kwargs - ) - ) - inplanes = planes * block.expansion - for i in range(1, num_blocks): - layers.append( - block( - inplanes=inplanes, - planes=planes, - stride=1, - dilation=dilation if multi_grid is None else multi_grid[i], - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - **kwargs - ) - ) - super(ResLayer, self).__init__(*layers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py deleted file mode 100644 index b00aaeade295..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py +++ /dev/null @@ -1,61 +0,0 @@ -import torch.nn as nn - -import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule - -from .make_divisible import make_divisible - - -class SELayer(nn.Module): - """Squeeze-and-Excitation Module. - - Args: - channels (int): The input (and output) channels of the SE layer. - ratio (int): Squeeze ratio in SELayer, the intermediate channel will be - ``int(channels/ratio)``. Default: 16. - conv_cfg (None or dict): Config dict for convolution layer. - Default: None, which means using conv2d. - act_cfg (dict or Sequence[dict]): Config dict for activation layer. - If act_cfg is a dict, two activation layers will be configured - by this dict. If act_cfg is a sequence of dicts, the first - activation layer will be configured by the first dict and the - second activation layer will be configured by the second dict. - Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, - divisor=6.0)). - """ - - def __init__( - self, - channels, - ratio=16, - conv_cfg=None, - act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), - ): - super(SELayer, self).__init__() - if isinstance(act_cfg, dict): - act_cfg = (act_cfg, act_cfg) - assert len(act_cfg) == 2 - assert mmcv.is_tuple_of(act_cfg, dict) - self.global_avgpool = nn.AdaptiveAvgPool2d(1) - self.conv1 = ConvModule( - in_channels=channels, - out_channels=make_divisible(channels // ratio, 8), - kernel_size=1, - stride=1, - conv_cfg=conv_cfg, - act_cfg=act_cfg[0], - ) - self.conv2 = ConvModule( - in_channels=make_divisible(channels // ratio, 8), - out_channels=channels, - kernel_size=1, - stride=1, - conv_cfg=conv_cfg, - act_cfg=act_cfg[1], - ) - - def forward(self, x): - out = self.global_avgpool(x) - out = self.conv1(out) - out = self.conv2(out) - return x * out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py deleted file mode 100644 index 52f37c728381..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py +++ /dev/null @@ -1,162 +0,0 @@ -import torch -from torch import nn as nn -from torch.nn import functional as F - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init - - -class SelfAttentionBlock(nn.Module): - """General self-attention block/non-local block. - - Please refer to https://arxiv.org/abs/1706.03762 for details about key, - query and value. - - Args: - key_in_channels (int): Input channels of key feature. - query_in_channels (int): Input channels of query feature. - channels (int): Output channels of key/query transform. - out_channels (int): Output channels. - share_key_query (bool): Whether share projection weight between key - and query projection. - query_downsample (nn.Module): Query downsample module. - key_downsample (nn.Module): Key downsample module. - key_query_num_convs (int): Number of convs for key/query projection. - value_num_convs (int): Number of convs for value projection. - matmul_norm (bool): Whether normalize attention map with sqrt of - channels - with_out (bool): Whether use out projection. - conv_cfg (dict|None): Config of conv layers. - norm_cfg (dict|None): Config of norm layers. - act_cfg (dict|None): Config of activation layers. - """ - - def __init__( - self, - key_in_channels, - query_in_channels, - channels, - out_channels, - share_key_query, - query_downsample, - key_downsample, - key_query_num_convs, - value_out_num_convs, - key_query_norm, - value_out_norm, - matmul_norm, - with_out, - conv_cfg, - norm_cfg, - act_cfg, - ): - super(SelfAttentionBlock, self).__init__() - if share_key_query: - assert key_in_channels == query_in_channels - self.key_in_channels = key_in_channels - self.query_in_channels = query_in_channels - self.out_channels = out_channels - self.channels = channels - self.share_key_query = share_key_query - self.conv_cfg = conv_cfg - self.norm_cfg = norm_cfg - self.act_cfg = act_cfg - self.key_project = self.build_project( - key_in_channels, - channels, - num_convs=key_query_num_convs, - use_conv_module=key_query_norm, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - if share_key_query: - self.query_project = self.key_project - else: - self.query_project = self.build_project( - query_in_channels, - channels, - num_convs=key_query_num_convs, - use_conv_module=key_query_norm, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - self.value_project = self.build_project( - key_in_channels, - channels if with_out else out_channels, - num_convs=value_out_num_convs, - use_conv_module=value_out_norm, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - if with_out: - self.out_project = self.build_project( - channels, - out_channels, - num_convs=value_out_num_convs, - use_conv_module=value_out_norm, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - else: - self.out_project = None - - self.query_downsample = query_downsample - self.key_downsample = key_downsample - self.matmul_norm = matmul_norm - - self.init_weights() - - def init_weights(self): - """Initialize weight of later layer.""" - if self.out_project is not None: - if not isinstance(self.out_project, ConvModule): - constant_init(self.out_project, 0) - - def build_project(self, in_channels, channels, num_convs, use_conv_module, conv_cfg, norm_cfg, act_cfg): - """Build projection layer for key/query/value/out.""" - if use_conv_module: - convs = [ConvModule(in_channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)] - for _ in range(num_convs - 1): - convs.append(ConvModule(channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)) - else: - convs = [nn.Conv2d(in_channels, channels, 1)] - for _ in range(num_convs - 1): - convs.append(nn.Conv2d(channels, channels, 1)) - if len(convs) > 1: - convs = nn.Sequential(*convs) - else: - convs = convs[0] - return convs - - def forward(self, query_feats, key_feats): - """Forward function.""" - batch_size = query_feats.size(0) - query = self.query_project(query_feats) - if self.query_downsample is not None: - query = self.query_downsample(query) - query = query.reshape(*query.shape[:2], -1) - query = query.permute(0, 2, 1).contiguous() - - key = self.key_project(key_feats) - value = self.value_project(key_feats) - if self.key_downsample is not None: - key = self.key_downsample(key) - value = self.key_downsample(value) - key = key.reshape(*key.shape[:2], -1) - value = value.reshape(*value.shape[:2], -1) - value = value.permute(0, 2, 1).contiguous() - - sim_map = torch.matmul(query, key) - if self.matmul_norm: - sim_map = (self.channels ** -0.5) * sim_map - sim_map = F.softmax(sim_map, dim=-1) - - context = torch.matmul(sim_map, value) - context = context.permute(0, 2, 1).contiguous() - context = context.reshape(batch_size, -1, *query_feats.shape[2:]) - if self.out_project is not None: - context = self.out_project(context) - return context diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py deleted file mode 100644 index 8558925074e1..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py +++ /dev/null @@ -1,106 +0,0 @@ -import torch -import torch.nn as nn -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_upsample_layer - - -class UpConvBlock(nn.Module): - """Upsample convolution block in decoder for UNet. - - This upsample convolution block consists of one upsample module - followed by one convolution block. The upsample module expands the - high-level low-resolution feature map and the convolution block fuses - the upsampled high-level low-resolution feature map and the low-level - high-resolution feature map from encoder. - - Args: - conv_block (nn.Sequential): Sequential of convolutional layers. - in_channels (int): Number of input channels of the high-level - skip_channels (int): Number of input channels of the low-level - high-resolution feature map from encoder. - out_channels (int): Number of output channels. - num_convs (int): Number of convolutional layers in the conv_block. - Default: 2. - stride (int): Stride of convolutional layer in conv_block. Default: 1. - dilation (int): Dilation rate of convolutional layer in conv_block. - Default: 1. - with_cp (bool): Use checkpoint or not. Using checkpoint will save some - memory while slowing down the training speed. Default: False. - conv_cfg (dict | None): Config dict for convolution layer. - Default: None. - norm_cfg (dict | None): Config dict for normalization layer. - Default: dict(type='BN'). - act_cfg (dict | None): Config dict for activation layer in ConvModule. - Default: dict(type='ReLU'). - upsample_cfg (dict): The upsample config of the upsample module in - decoder. Default: dict(type='InterpConv'). If the size of - high-level feature map is the same as that of skip feature map - (low-level feature map from encoder), it does not need upsample the - high-level feature map and the upsample_cfg is None. - dcn (bool): Use deformable convolution in convolutional layer or not. - Default: None. - plugins (dict): plugins for convolutional layers. Default: None. - """ - - def __init__( - self, - conv_block, - in_channels, - skip_channels, - out_channels, - num_convs=2, - stride=1, - dilation=1, - with_cp=False, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - upsample_cfg=dict(type='InterpConv'), - dcn=None, - plugins=None, - ): - super(UpConvBlock, self).__init__() - assert dcn is None, 'Not implemented yet.' - assert plugins is None, 'Not implemented yet.' - - self.conv_block = conv_block( - in_channels=2 * skip_channels, - out_channels=out_channels, - num_convs=num_convs, - stride=stride, - dilation=dilation, - with_cp=with_cp, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - dcn=None, - plugins=None, - ) - if upsample_cfg is not None: - self.upsample = build_upsample_layer( - cfg=upsample_cfg, - in_channels=in_channels, - out_channels=skip_channels, - with_cp=with_cp, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - else: - self.upsample = ConvModule( - in_channels, - skip_channels, - kernel_size=1, - stride=1, - padding=0, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - ) - - def forward(self, skip, x): - """Forward function.""" - - x = self.upsample(x) - out = torch.cat([skip, x], dim=1) - out = self.conv_block(out) - - return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py deleted file mode 100644 index fc3419e9a74c..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Modified from https://github.com/rwightman/pytorch-image- -models/blob/master/timm/models/layers/drop.py.""" - -import math -import warnings - -import torch - - -def _no_grad_trunc_normal_(tensor, mean, std, a, b): - """Reference: https://people.sc.fsu.edu/~jburkardt/presentations - /truncated_normal.pdf""" - - def norm_cdf(x): - # Computes standard normal cumulative distribution function - return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 - - if (mean < a - 2 * std) or (mean > b + 2 * std): - warnings.warn( - 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' - 'The distribution of values may be incorrect.', - stacklevel=2, - ) - - with torch.no_grad(): - # Values are generated by using a truncated uniform distribution and - # then using the inverse CDF for the normal distribution. - # Get upper and lower cdf values - lower_bound = norm_cdf((a - mean) / std) - upper_bound = norm_cdf((b - mean) / std) - - # Uniformly fill tensor with values from [l, u], then translate to - # [2l-1, 2u-1]. - tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1) - - # Use inverse cdf transform for normal distribution to get truncated - # standard normal - tensor.erfinv_() - - # Transform to proper mean, std - tensor.mul_(std * math.sqrt(2.0)) - tensor.add_(mean) - - # Clamp to ensure it's in the proper range - tensor.clamp_(min=a, max=b) - return tensor - - -def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): - r"""Fills the input Tensor with values drawn from a truncated - normal distribution. The values are effectively drawn from the - normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` - with values outside :math:`[a, b]` redrawn until they are within - the bounds. The method used for generating the random values works - best when :math:`a \leq \text{mean} \leq b`. - Args: - tensor (``torch.Tensor``): an n-dimensional `torch.Tensor` - mean (float): the mean of the normal distribution - std (float): the standard deviation of the normal distribution - a (float): the minimum cutoff value - b (float): the maximum cutoff value - """ - return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py deleted file mode 100644 index bec51c75b936..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .encoding import Encoding -from .wrappers import Upsample, resize - -__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py deleted file mode 100644 index ea4a06ba9297..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py +++ /dev/null @@ -1,65 +0,0 @@ -import torch -from torch import nn -from torch.nn import functional as F - - -class Encoding(nn.Module): - """Encoding Layer: a learnable residual encoder. - - Input is of shape (batch_size, channels, height, width). - Output is of shape (batch_size, num_codes, channels). - - Args: - channels: dimension of the features or feature channels - num_codes: number of code words - """ - - def __init__(self, channels, num_codes): - super(Encoding, self).__init__() - # init codewords and smoothing factor - self.channels, self.num_codes = channels, num_codes - std = 1.0 / ((num_codes * channels) ** 0.5) - # [num_codes, channels] - self.codewords = nn.Parameter( - torch.empty(num_codes, channels, dtype=torch.float).uniform_(-std, std), requires_grad=True - ) - # [num_codes] - self.scale = nn.Parameter(torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), requires_grad=True) - - @staticmethod - def scaled_l2(x, codewords, scale): - num_codes, channels = codewords.size() - batch_size = x.size(0) - reshaped_scale = scale.view((1, 1, num_codes)) - expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) - reshaped_codewords = codewords.view((1, 1, num_codes, channels)) - - scaled_l2_norm = reshaped_scale * (expanded_x - reshaped_codewords).pow(2).sum(dim=3) - return scaled_l2_norm - - @staticmethod - def aggregate(assignment_weights, x, codewords): - num_codes, channels = codewords.size() - reshaped_codewords = codewords.view((1, 1, num_codes, channels)) - batch_size = x.size(0) - - expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) - encoded_feat = (assignment_weights.unsqueeze(3) * (expanded_x - reshaped_codewords)).sum(dim=1) - return encoded_feat - - def forward(self, x): - assert x.dim() == 4 and x.size(1) == self.channels - # [batch_size, channels, height, width] - batch_size = x.size(0) - # [batch_size, height x width, channels] - x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() - # assignment_weights: [batch_size, channels, num_codes] - assignment_weights = F.softmax(self.scaled_l2(x, self.codewords, self.scale), dim=2) - # aggregate - encoded_feat = self.aggregate(assignment_weights, x, self.codewords) - return encoded_feat - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' f'x{self.channels})' - return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py deleted file mode 100644 index d366cf693b49..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py +++ /dev/null @@ -1,43 +0,0 @@ -import warnings - -import torch.nn as nn -import torch.nn.functional as F - - -def resize(input, size=None, scale_factor=None, mode='nearest', align_corners=None, warning=True): - if warning: - if size is not None and align_corners: - input_h, input_w = tuple(int(x) for x in input.shape[2:]) - output_h, output_w = tuple(int(x) for x in size) - if output_h > input_h or output_w > output_h: - if ( - (output_h > 1 and output_w > 1 and input_h > 1 and input_w > 1) - and (output_h - 1) % (input_h - 1) - and (output_w - 1) % (input_w - 1) - ): - warnings.warn( - f'When align_corners={align_corners}, ' - 'the output would more aligned if ' - f'input size {(input_h, input_w)} is `x+1` and ' - f'out size {(output_h, output_w)} is `nx+1`' - ) - return F.interpolate(input, size, scale_factor, mode, align_corners) - - -class Upsample(nn.Module): - def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None): - super(Upsample, self).__init__() - self.size = size - if isinstance(scale_factor, tuple): - self.scale_factor = tuple(float(factor) for factor in scale_factor) - else: - self.scale_factor = float(scale_factor) if scale_factor else None - self.mode = mode - self.align_corners = align_corners - - def forward(self, x): - if not self.size: - size = [int(t * self.scale_factor) for t in x.shape[-2:]] - else: - size = self.size - return resize(x, size, None, self.mode, self.align_corners) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py deleted file mode 100644 index ac489e2dbbc0..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .collect_env import collect_env -from .logger import get_root_logger - -__all__ = ['get_root_logger', 'collect_env'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py deleted file mode 100644 index 8ad826babee2..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py +++ /dev/null @@ -1,16 +0,0 @@ -import nemo.collections.multimodal.models.controlnet.uniformer.mmseg as mmseg -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import collect_env as collect_base_env -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_git_hash - - -def collect_env(): - """Collect the information of the running environments.""" - env_info = collect_base_env() - env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' - - return env_info - - -if __name__ == '__main__': - for name, val in collect_env().items(): - print('{}: {}'.format(name, val)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py deleted file mode 100644 index 6167a6f88ea4..000000000000 --- a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging - -from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_logger - - -def get_root_logger(log_file=None, log_level=logging.INFO): - """Get the root logger. - - The logger will be initialized if it has not been initialized. By default a - StreamHandler will be added. If `log_file` is specified, a FileHandler will - also be added. The name of the root logger is the top-level package name, - e.g., "mmseg". - - Args: - log_file (str | None): The log filename. If specified, a FileHandler - will be added to the root logger. - log_level (int): The root logger level. Note that only the process of - rank 0 is affected, while other processes will set the level to - "Error" and be silent most of the time. - - Returns: - logging.Logger: The root logger. - """ - - logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) - - return logger diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py deleted file mode 100644 index 4ffd6d16e4ff..000000000000 --- a/nemo/collections/multimodal/models/controlnet/util.py +++ /dev/null @@ -1,98 +0,0 @@ -import os - -import numpy as np -import torch -import torchvision -from PIL import Image -from pytorch_lightning import Callback -from pytorch_lightning.utilities.rank_zero import rank_zero_only -from nemo.collections.multimodal.models.controlnet.uniformer import UniformerDetector - - -class ImageLogger(Callback): - def __init__( - self, - batch_frequency=2000, - max_images=4, - clamp=True, - increase_log_steps=True, - rescale=True, - disabled=False, - log_on_batch_idx=False, - log_first_step=False, - log_images_kwargs=None, - ): - super().__init__() - self.rescale = rescale - self.batch_freq = batch_frequency - self.max_images = max_images - if not increase_log_steps: - self.log_steps = [self.batch_freq] - self.clamp = clamp - self.disabled = disabled - self.log_on_batch_idx = log_on_batch_idx - self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {} - self.log_first_step = log_first_step - - @rank_zero_only - def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx): - root = os.path.join(save_dir, "image_log", split) - for k in images: - grid = torchvision.utils.make_grid(images[k], nrow=4) - if self.rescale: - grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w - grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1) - grid = grid.numpy() - grid = (grid * 255).astype(np.uint8) - filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(k, global_step, current_epoch, batch_idx) - path = os.path.join(root, filename) - os.makedirs(os.path.split(path)[0], exist_ok=True) - Image.fromarray(grid).save(path) - - def log_img(self, pl_module, batch, batch_idx, split="train"): - check_idx = batch_idx # if self.log_on_batch_idx else pl_module.global_step - if ( - self.check_frequency(check_idx) - and hasattr(pl_module, "log_images") # batch_idx % self.batch_freq == 0 - and callable(pl_module.log_images) - and self.max_images > 0 - ): - logger = type(pl_module.logger) - - is_train = pl_module.training - if is_train: - pl_module.eval() - - with torch.no_grad(): - images = pl_module.log_images(batch, split=split, **self.log_images_kwargs) - - for k in images: - N = min(images[k].shape[0], self.max_images) - images[k] = images[k][:N] - if isinstance(images[k], torch.Tensor): - images[k] = images[k].detach().cpu() - if self.clamp: - images[k] = torch.clamp(images[k], -1.0, 1.0) - - self.log_local( - pl_module.logger.save_dir, split, images, pl_module.global_step, pl_module.current_epoch, batch_idx - ) - - if is_train: - pl_module.train() - - def check_frequency(self, check_idx): - return check_idx % self.batch_freq == 0 - - def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): - if not self.disabled: - self.log_img(pl_module, batch, batch_idx, split="train") - - -def get_preprocessing_function(name): - if name == 'seg2img': - apply_uniformer = UniformerDetector() - return apply_uniformer - else: - print("The application is not yet supported") - raise NotImplementedError diff --git a/nemo/collections/multimodal/models/dreambooth/__init__.py b/nemo/collections/multimodal/models/dreambooth/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/dreambooth/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py deleted file mode 100644 index 5e4abd8522d7..000000000000 --- a/nemo/collections/multimodal/models/dreambooth/dreambooth.py +++ /dev/null @@ -1,654 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from abc import ABC -from functools import partial -from typing import Any, Dict, Optional, Union - -import numpy as np -import pytorch_lightning as pl -import torch -from omegaconf import DictConfig, OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.utilities import GradClipAlgorithmType -from torch._inductor import config as inductor_config -from torch.optim.lr_scheduler import LambdaLR - -from nemo.collections.multimodal.data.dreambooth.dreambooth_dataset import DreamBoothDataset -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - extract_into_tensor, - make_beta_schedule, - noise_like, -) -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( - DiagonalGaussianDistribution, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists -from nemo.collections.multimodal.parts.utils import randn_like -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.module import Float16Module -from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank -from nemo.core.classes import ModelPT -from nemo.core.classes.common import Serialization -from nemo.core.config import hydra_runner -from nemo.utils import logging -from nemo.utils.exp_manager import exp_manager - -try: - from apex import amp - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def disabled_train(self, mode=True): - """Overwrite model.train with this function to make sure train/eval mode - does not change anymore.""" - return self - - -def _collate_fn(examples, with_prior_preservation=False): - if with_prior_preservation: - prompts = [[example["instance_prompt"], example["reg_prompt"]] for example in examples] - images = [example["instance_images"] for example in examples] + [example["reg_images"] for example in examples] - else: - prompts = [[example["instance_prompt"]] for example in examples] - images = [example["instance_images"] for example in examples] - - images = torch.stack(images) - images = images.to(memory_format=torch.contiguous_format).float() - - return prompts, images - - -class DreamBooth(torch.nn.Module, Serialization): - def __init__(self, cfg, model_parallel_config): - super().__init__() - self.cfg = cfg - self.config = model_parallel_config - self.with_prior_preservation = self.cfg.with_prior_preservation - self.num_reg_images = self.cfg.data.num_reg_images - self.prior_loss_weight = self.cfg.prior_loss_weight - self.num_images_per_prompt = self.cfg.data.num_images_per_prompt - - self.train_text_encoder = self.cfg.train_text_encoder - self.instantiate_text_encoder(self.cfg.cond_stage_config) - - self.inductor = self.cfg.inductor - self.inductor_cudagraphs = self.cfg.inductor_cudagraphs - - self.instantiate_vae(self.cfg.first_stage_config) - self.instantiate_unet(self.cfg.unet_config) - - self.scale_factor = self.cfg.scale_factor - self.num_timesteps = self.cfg.noise_scheduler.timesteps - self.parameterization = self.cfg.noise_scheduler.parameterization - self.get_noise_scheduler(self.cfg.noise_scheduler) - - self.model_type = None - self.rng = torch.Generator(device=torch.cuda.current_device(),) - - self.use_cached_latents = self.cfg.use_cached_latents - - if self.cfg.channels_last: - self.unet = self.unet.to(memory_format=torch.channels_last) - - def instantiate_unet(self, cfg): - self.unet = DreamBooth.from_config_dict(cfg) - self.unet.train() - if self.inductor: - # TorchInductor with CUDA graph can lead to OOM - inductor_config.triton.cudagraphs = self.inductor_cudagraphs - torch._dynamo.config.dynamic_shapes = False - torch._dynamo.config.automatic_dynamic_shapes = False - self.unet = torch.compile(self.unet) - - def instantiate_vae(self, cfg): - model = DreamBooth.from_config_dict(cfg) - self.vae = model.eval() - self.vae.train = disabled_train - for param in self.vae.parameters(): - param.requires_grad = False - - def instantiate_text_encoder(self, cfg): - model = DreamBooth.from_config_dict(cfg) - if self.train_text_encoder: - self.text_encoder = model.train() - for param in self.text_encoder.parameters(): - param.requires_grad = True - else: - self.text_encoder = model.eval() - self.text_encoder.train = disabled_train - for param in self.text_encoder.parameters(): - param.requires_grad = False - - def get_noise_scheduler(self, cfg): - model = DreamBooth.from_config_dict(cfg) - self.noise_scheduler = model.eval() - - def forward(self, batch): - - x, cond = batch - if self.use_cached_latents: - x = DiagonalGaussianDistribution(x) - latents = x.sample().detach() * self.scale_factor - else: - latents = self.vae.encode(x).sample().detach() - latents = latents * self.scale_factor - - noise = randn_like(latents, generator=self.rng) - t = torch.randint(0, self.num_timesteps, (latents.shape[0],), generator=self.rng, device=latents.device).long() - x_noisy = self.noise_scheduler(x_start=latents, t=t, noise=noise) - - # cond = self.text_encoder([t[0] for t in batch["prompts"]]) - # if self.with_prior_preservation: - # cond_prior = self.text_encoder([t[1] for t in batch["prompts"]]) - # cond = torch.cat([cond, cond_prior], dim=0) - - model_output = self.unet(x_noisy, t, cond) - - if self.parameterization == "x0": - target = latents - elif self.parameterization == "eps": - target = noise - else: - raise NotImplementedError() - - if self.with_prior_preservation: - model_pred, model_pred_prior = torch.chunk(model_output, 2, dim=0) - target, target_prior = torch.chunk(target, 2, dim=0) - loss = torch.nn.functional.mse_loss(model_pred.float(), target.float(), reduction="mean") - prior_loss = torch.nn.functional.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean") - loss = loss + prior_loss * self.prior_loss_weight - - else: - loss = torch.nn.functional.mse_loss(target.float(), model_output.float(), reduction="mean") - return loss - - def parameters(self): - params = list(self.unet.parameters()) - if self.train_text_encoder: - # print(f"{self.__class__.__name__}: Also optimizing conditioner params!") - params = params + list(self.text_encoder.parameters()) - return params - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - pass - - -class MegatronDreamBooth(MegatronBaseModel): - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - - # megatron_amp_O2 is not yet supported in diffusion models - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - self.model = self.model_provider_func() - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process=True, post_process=True): - """Model depends on pipeline paralellism.""" - model = DreamBooth(cfg=self.cfg, model_parallel_config=self.model_parallel_config) - return model - - def forward(self, batch): - output_tensor = self.model(batch) - return output_tensor - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - tensor_shape = None # Placeholder - - # handle asynchronous grad reduction - no_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # losses_reduced_per_micro_batch is a list of dictionaries - # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps - # only the last stages of the pipeline return losses - loss_dict = {} - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - prefix = 'train' - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[f'{prefix}/{key}'] = loss_tensor.mean() - loss_mean = loss_dict["train/loss"] - else: - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - if forward_only: - loss_mean = [] - else: - loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) - - return loss_mean, loss_dict - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - - # we zero grads here because we also call backward in the apex fwd/bwd functions - self._optimizer.zero_grad() - - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - torch.distributed.broadcast(loss_mean, get_last_rank()) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # gradients are reduced internally in distributed optimizer - pass - elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() - self._optimizer.allreduce_main_grads() - elif not self.cfg.get('ddp_overlap', True): - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, prog_bar=True, batch_size=1) - - self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - return loss_mean - - def validation_step(self, dataloader_iter, batch_idx): - loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - - self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) - - return loss - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def get_forward_output_and_loss_func(self): - def process_batch(batch): - # noise_map, condition - prompts, images = batch - # DB has more dedicated structure for encoding, so we enable autocasting here as well - with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, - ): - images = images.cuda(non_blocking=True) - - cond = self.model.text_encoder([t[0] for t in prompts]) - if self.cfg.with_prior_preservation: - cond_prior = self.model.text_encoder([t[1] for t in prompts]) - cond = torch.cat([cond, cond_prior], dim=0) - - return images, cond - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - batch = process_batch(batch) - batch = [x.cuda(non_blocking=True) for x in batch] - loss = model(batch) - - def dummy(output_tensor): - return loss, {'loss': loss} - - return loss, dummy - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # Batch size need to be provided for webdatset - self._num_micro_batches = get_num_microbatches() - self._micro_batch_size = self.cfg.micro_batch_size - - self.setup_training_data(self.cfg.data) - - def setup_training_data(self, cfg): - if self.cfg.with_prior_preservation: - if cfg.regularization_dir is None: - raise ValueError("Regularization images must be provided to train with prior preservation loss") - if cfg.regularization_prompt is None: - raise ValueError("Regularization prompts must be provided to train with prior preservation loss") - - self.train_dataset = DreamBoothDataset( - instance_data_root=cfg.instance_dir, - instance_prompt=cfg.instance_prompt, - with_prior_preservation=self.cfg.with_prior_preservation, - reg_data_root=cfg.regularization_dir if self.cfg.with_prior_preservation else None, - reg_prompt=cfg.regularization_prompt if self.cfg.with_prior_preservation else None, - size=cfg.resolution, - center_crop=cfg.center_crop, - load_cache_latents=self.model.use_cached_latents, - cached_instance_data_root=self.cfg.data.get("cached_instance_dir", None), - cached_reg_data_root=self.cfg.data.get("cached_reg_dir", None) - if self.cfg.with_prior_preservation - else None, - vae=self.model.vae, - text_encoder=self.model.text_encoder, - ) - - batch_sampler = MegatronPretrainingRandomSampler( - total_samples=len(self.train_dataset), - consumed_samples=self.compute_consumed_samples(0), - micro_batch_size=self.cfg.micro_batch_size, - global_batch_size=self.cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=False, - ) - - self._train_dl = torch.utils.data.DataLoader( - self.train_dataset, - batch_sampler=batch_sampler, - collate_fn=partial(_collate_fn, with_prior_preservation=self.cfg.with_prior_preservation), - num_workers=cfg.num_workers, - pin_memory=True, - persistent_workers=True, - ) - - def setup_validation_data(self, cfg): - pass - - def setup_test_data(self, cfg): - pass - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls): - return None - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() - - @classmethod - def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, - ): - """ - Loads ModelPT from checkpoint, with some maintenance of restoration. - For documentation, please refer to LightningModule.load_from_checkpoin() documentation. - """ - checkpoint = None - try: - cls._set_model_restore_state(is_being_restored=True) - # TODO: replace with proper PTL API - with pl_legacy_patch(): - if map_location is not None: - checkpoint = pl_load(checkpoint_path, map_location=map_location) - else: - checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) - - if hparams_file is not None: - extension = hparams_file.split(".")[-1] - if extension.lower() == "csv": - hparams = load_hparams_from_tags_csv(hparams_file) - elif extension.lower() in ("yml", "yaml"): - hparams = load_hparams_from_yaml(hparams_file) - else: - raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") - - hparams["on_gpu"] = False - - # overwrite hparams by the given file - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams - - # for past checkpoint need to add the new key - if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} - # override the hparams with values that were passed in - cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) - # TODO: can we do this without overriding? - config_kwargs = kwargs.copy() - if 'trainer' in config_kwargs: - config_kwargs.pop('trainer') - cfg.update(config_kwargs) - - # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error - if cfg: - if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): - cfg.unet_config.from_pretrained = None - if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): - cfg.first_stage_config.from_pretrained = None - ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod - if cfg.get('inductor'): - cfg.inductor = False - ## Append some dummy configs that DB didn't support - if not cfg.get('channels_last'): - cfg.channels_last = True - if not cfg.get('capture_cudagraph_iters'): - cfg.capture_cudagraph_iters = -1 - - # compatibility for stable diffusion old checkpoint tweaks - first_key = list(checkpoint['state_dict'].keys())[0] - if first_key == "betas": - # insert "model." into for megatron wrapper - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = "model." + key - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - elif ( - first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' - or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' - ): - # remap state keys from dreambooth when using HF clip - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', "") - new_key = new_key.replace('unet', 'model.diffusion_model') - new_key = new_key.replace('vae', 'first_stage_model') - new_key = new_key.replace('text_encoder', 'cond_stage_model') - new_key = new_key.replace('.noise_scheduler', '') - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - # compatibility for inductor in inference - if not cfg.get('inductor', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', '', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if cfg.get('megatron_amp_O2', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('model.', 'model.module.', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if 'cfg' in kwargs: - model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) - else: - model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) - # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg - - checkpoint = model - - finally: - cls._set_model_restore_state(is_being_restored=False) - return checkpoint diff --git a/nemo/collections/multimodal/models/dreambooth/util.py b/nemo/collections/multimodal/models/dreambooth/util.py deleted file mode 100644 index 8f8a142f99f3..000000000000 --- a/nemo/collections/multimodal/models/dreambooth/util.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from functools import partial - -import numpy as np -import pytorch_lightning as pl -import torch -import torch.nn as nn - -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - extract_into_tensor, - make_beta_schedule, - noise_like, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists -from nemo.core.classes.common import Serialization - - -class DiffusionWrapper(torch.nn.Module, Serialization): - def __init__(self, diff_model_config, conditioning_key): - super().__init__() - if isinstance(diff_model_config, nn.Module): - self.diffusion_model = diff_model_config - else: - self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) - self.conditioning_key = conditioning_key - assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] - - def forward(self, x_noisy, t, cond, return_ids=False): - if isinstance(cond, dict): - # hybrid case, cond is exptected to be a dict - pass - else: - if not isinstance(cond, list): - cond = [cond] - key = 'c_concat' if self.conditioning_key == 'concat' else 'c_crossattn' - cond = {key: cond} - x_recon = self.apply_step(x_noisy, t, **cond) - return x_recon - - def apply_step(self, x, t, c_concat: list = None, c_crossattn: list = None): - if self.conditioning_key is None: - out = self.diffusion_model(x, t) - elif self.conditioning_key == 'concat': - xc = torch.cat([x] + c_concat, dim=1) - out = self.diffusion_model(xc, t) - elif self.conditioning_key == 'crossattn': - cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(x, t, context=cc) - elif self.conditioning_key == 'hybrid': - xc = torch.cat([x] + c_concat, dim=1) - cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(xc, t, context=cc) - elif self.conditioning_key == 'adm': - cc = c_crossattn[0] - out = self.diffusion_model(x, t, y=cc) - else: - raise NotImplementedError() - - return out - - -class sd_noise_scheduler(nn.Module): - def __init__( - self, - parameterization='eps', - v_posterior=0, - given_betas=None, - beta_schedule='linear', - timesteps=1000, - linear_start=0.00085, - linear_end=0.012, - cosine_s=8e-3, - ): - super().__init__() - self.parameterization = parameterization - self.v_posterior = v_posterior - self.register_schedule( - given_betas=given_betas, - beta_schedule=beta_schedule, - timesteps=timesteps, - linear_start=linear_start, - linear_end=linear_end, - cosine_s=cosine_s, - ) - - def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, - ): - if exists(given_betas): - betas = given_betas - else: - betas = make_beta_schedule( - beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s - ) - alphas = 1.0 - betas - alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - - (timesteps,) = betas.shape - self.num_timesteps = int(timesteps) - self.linear_start = linear_start - self.linear_end = linear_end - assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' - - to_torch = partial(torch.tensor, dtype=torch.float32) - - self.register_buffer('betas', to_torch(betas)) - self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) - self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) - - # calculations for diffusion q(x_t | x_{t-1}) and others - self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) - - # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( - 1.0 - alphas_cumprod - ) + self.v_posterior * betas - # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) - self.register_buffer('posterior_variance', to_torch(posterior_variance)) - # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain - self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer( - 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) - ) - self.register_buffer( - 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) - ) - - if self.parameterization == "eps": - lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) - ) - elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) - else: - raise NotImplementedError("mu not supported") - # TODO how to choose this term - lvlb_weights[0] = lvlb_weights[1] - self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) - assert not torch.isnan(self.lvlb_weights).all() - - def forward(self, x_start, t, noise=None): - noise = default(noise, lambda: torch.randn_like(x_start)) - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise - ) diff --git a/nemo/collections/multimodal/models/imagen/__init__.py b/nemo/collections/multimodal/models/imagen/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/imagen/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py deleted file mode 100644 index 64c1382e2d54..000000000000 --- a/nemo/collections/multimodal/models/imagen/imagen.py +++ /dev/null @@ -1,602 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import itertools -from datetime import datetime -from functools import partial -from typing import Any - -import numpy as np -import torch -import torch.nn.functional as F -from einops import rearrange -from omegaconf import DictConfig, open_dict -from pytorch_lightning import Trainer -from tqdm import tqdm - -from nemo.collections.multimodal.data.imagen.imagen_dataset import build_train_valid_datasets -from nemo.collections.multimodal.models.imagen.precond import ContinousDDPMPrecond, EDMPrecond -from nemo.collections.multimodal.modules.imagen.diffusionmodules.nets import EfficientUNetModel, UNetModel -from nemo.collections.multimodal.modules.imagen.encoder.t5encoder import T5Encoder -from nemo.collections.multimodal.modules.imagen.sampler.sampler import DDPMSampler, EDMSampler -from nemo.collections.multimodal.parts.imagen.utils import random_dropout -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.module import Float16Module -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.core.classes.common import Serialization -from nemo.utils import logging - -try: - from apex import amp - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - HAVE_MEGATRON_CORE = False - -try: - from group_norm import GroupNormOpt - - OPT_GROUP_NORM = True -except Exception: - print('Fused optimized group norm has not been installed.') - OPT_GROUP_NORM = False - -DUMMY_TENSOR = torch.tensor([1.0]) - - -class Imagen(torch.nn.Module, Serialization): - def __init__(self, cfg, model_parallel_config): - super().__init__() - self.cfg = cfg - self.config = model_parallel_config - # Make sure the initialization on different GPUs are the same - self.unet_type = cfg.get('unet_type', 'base') - self.noise_cond_aug = cfg.get('noise_cond_aug', False) - if self.unet_type == 'base': - logging.info('Initializing UNet.') - unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim) - elif self.unet_type == 'sr': - logging.info('Initializing Efficient-UNet.') - unet = EfficientUNetModel( - **cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug - ) - elif self.unet_type == 'sr-unet': - logging.info('Initializing UNet for SR model.') - unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug) - else: - raise NotImplemented(f'{self.unet_type} UNet is not implemented.') - - self.channels_last = cfg.get('channels_last', False) - if self.channels_last: - assert OPT_GROUP_NORM, 'Training in channels last format requires optmized group norm implementation.' - logging.info('Training in torch channels last format.') - unet = unet.to(memory_format=torch.channels_last) - - # Preconditioning - self.preconditioning_type = cfg.get('preconditioning_type', 'DDPM') - if self.preconditioning_type == 'DDPM': - logging.info('Preconditioned with Continous DDPM') - self.model = ContinousDDPMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) - self.sampler = DDPMSampler(unet_type=self.unet_type, denoiser=self.model.scheduler) - elif self.preconditioning_type == 'EDM': - logging.info('Preconditioned with EDM') - self.model = EDMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) - self.sampler = EDMSampler(unet_type=self.unet_type) - else: - raise NotImplemented(f'{self.preconditioning_type} preconditioning is not implemented.') - - self.rng = None - self.conditioning = cfg.conditioning - self.text_drop_rate = cfg.conditioning.drop_rate - self.model_type = None - self.image_size = cfg.unet.image_size - - def setup_rng(self): - # We need to set different rng seed for different GPUs/ different runs; - # otherwise, the noise map and time will be exactly the same. - self.rng = torch.Generator(device=torch.cuda.current_device()) - self.rng_seed = int(datetime.now().timestamp()) + self.cfg.seed + parallel_state.get_data_parallel_rank() - logging.info(f'RNG seed set as {self.rng_seed} for rank {parallel_state.get_data_parallel_rank()}') - self.rng.manual_seed(self.rng_seed) - self.model.set_rng(self.rng) - - @property - def unet(self): - return self.model.unet - - def get_text_encoder(self, encoder_path=None): - # TODO Assume using T5 for all - return T5Encoder(max_seq_len=self.conditioning.token_length, encoder_path=encoder_path) - - def forward(self, x_start, text_embed, text_mask, x_lowres=None): - if self.unet_type == 'base': - assert x_lowres[0].item() == DUMMY_TENSOR.item(), 'Base model should have no low-resolution conditioning' - x_lowres = None - else: - assert x_lowres[0].dim() not in [0, 1], 'SR model should have low-resolution conditioning' - - if self.channels_last: - x_start = x_start.to(memory_format=torch.channels_last) - if x_lowres is not None: - x_lowres = x_lowres.to(memory_format=torch.channels_last) - - # Apply random dropout to text embedding - text_embed = random_dropout(text_embed, drop_rate=self.text_drop_rate) - # UNet Forward Pass - low_res_cond = {'x_low_res': x_lowres} if x_lowres is not None else {} - # UNet Forward Pass and compute loss - loss = self.model.compute_loss( - x0=x_start, - text_embed=text_embed, - text_mask=text_mask, - time=None, # Randomly Sample - noise=None, # Randomly Sample - **low_res_cond, - ) - return loss, {'train/loss': loss} - - @torch.no_grad() - def sample_image( - self, - noise_map, - text_encoding, - text_mask, - x_low_res=None, - cond_scale=1.0, - sampling_steps=None, - thresholding_method='dynamic', - ): - return self.sampler( - self.model, noise_map, text_encoding, text_mask, x_low_res, cond_scale, sampling_steps, thresholding_method - ) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - # only required for pipeline parallelism - pass - - -class MegatronImagen(MegatronBaseModel): - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - with open_dict(cfg): - cfg.hidden_size = cfg.unet.embed_dim - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - # megatron_amp_O2 is not yet supported in diffusion models - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - - self.model = self.model_provider_func() - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - self.online_encoding = cfg.conditioning.get("online_encoding", False) - self.text_encoder_path = cfg.conditioning.get("encoder_path", None) - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process=True, post_process=True): - """Model depends on pipeline paralellism.""" - model = Imagen(cfg=self.cfg, model_parallel_config=self.model_parallel_config) - return model - - def get_forward_output_and_loss_func(self): - def process_batch(batch): - """ Prepares the batch for megatron fwd/bwd functions. - Global batch is a list of micro batches. - """ - # Base model and SR models have slightly different batch input: - # Base model would only require images (64x64), - # while SR models (both SR256 and SR1024) require low-res image (64x64) and - # actual (cropped) image (256x256) - if self.cfg.unet_type == 'base': - x_start = batch['images'] - # Pass in DUMMY_TENSOR because megatron requires each input to be - # tensor (not None) with same batch size (first dim) - x_lowres = DUMMY_TENSOR.repeat(x_start.shape[0]) - elif self.cfg.unet_type == 'sr' or self.cfg.unet_type == 'sr-unet': - x_start = batch['images_256'] - x_lowres = batch['images_64'] - else: - raise NotImplemented(f'Unknown UNet type: {self.cfg.unet_type}') - - if self.cfg.conditioning.get("online_encoding", False): - input_text = batch["raw_text"] - # Encode the text embeddings using text encoder. - with torch.no_grad(): - text_embed, text_mask = self.text_encoder.encode(input_text) - else: - text_conditioning_key = self.cfg.conditioning.out_key - text_embed = batch[f'{text_conditioning_key}_embeddings'] - text_mask = batch[f'{text_conditioning_key}_mask'] - return [x_start, text_embed, text_mask, x_lowres] - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - batch = process_batch(batch) - batch = [x.cuda(non_blocking=True) for x in batch] - loss, loss_dict = model(*batch) - - def dummy(output_tensor): - return loss, loss_dict - - # output_tensor, and a function to convert output_tensor to loss + loss_dict - return loss, dummy - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - def build_train_valid_test_datasets(self): - logging.info('Building datasets for Imagen...') - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) - ) - # We do not have test dataset - self._test_ds = None - - if self._train_ds is not None: - logging.info(f'Length of train dataset: {len(self._train_ds)}') - if self._validation_ds is not None: - logging.info(f'Length of val dataset: {len(self._validation_ds)}') - if self._test_ds is not None: - logging.info(f'Length of test dataset: {len(self._test_ds)}') - logging.info(f'Finished building datasets for LatentDiffusion.') - return self._train_ds, self._validation_ds, self._test_ds - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = torch.utils.data.DataLoader( - self._train_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=True, - persistent_workers=True, - ) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - self._validation_dl = torch.utils.data.DataLoader( - self._validation_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=False, - persistent_workers=True, - ) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, - ) - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - tensor_shape = None - - # handle asynchronous grad reduction - no_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # losses_reduced_per_micro_batch is a list of dictionaries - # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps - # only the last stages of the pipeline return losses - loss_dict = {} - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[key] = loss_tensor.mean() - loss_mean = loss_dict["train/loss"] - else: - # Get the total loss since micro batches sizes are not uniform - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - # we're not on the last pipeline stage so no losses - if forward_only: - loss_mean = [] - else: - loss_mean = torch.tensor(0.0).cuda() - - return loss_mean, loss_dict - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() - - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - torch.distributed.broadcast(loss_mean, get_last_rank()) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # synchronize asynchronous grad reductions - # note: not necessary, but reduces performance degradation - # from multiple simultaneous NCCL calls - self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() - self._optimizer.allreduce_main_grads() - elif not self.cfg.get('ddp_overlap', True): - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - return loss_mean - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def validation_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ - - loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - - self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) - return loss - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - - # Batch size need to be provided for webdatset - self._num_micro_batches = get_num_microbatches() - self._micro_batch_size = self.cfg.micro_batch_size - - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - # Setup RNG seed in model - self.model.setup_rng() - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls): - return None - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() - - def on_save_checkpoint(self, checkpoint) -> None: - if self.online_encoding: - # Removing the weights relating to Text encoder when saving the checkpoints - frozen_weights_keys = [k for k in checkpoint['state_dict'].keys() if k.startswith("text_encoder")] - for k in frozen_weights_keys: - del checkpoint['state_dict'][k] - - def on_load_checkpoint(self, checkpoint) -> None: - # make sure inductor naming is consistent with checkpoint's - inductor_enabled = self.cfg.get('inductor', False) - state_dict = checkpoint['state_dict'] - inductor_checkpoint = False - for k, v, in state_dict.items(): - if '_orig_mod' in k: - inductor_checkpoint = True - break - - if inductor_enabled and not inductor_checkpoint: - # ckpt needs to be converted to inductor-format weights (add .orig_mod) - logging.info('Add .orig_mod to all weight keys.') - new_state_dict = {} - for k, v in state_dict.items(): - idx = k.find('._orig_mod') - new_key = k[:idx] + k[idx + len('._orig_mod') :] - new_state_dict[new_key] = v - checkpoint['state_dict'] = new_state_dict - elif not inductor_enabled and inductor_checkpoint: - # ckpt needs to be converted to non-inductor-format weights (remove .orig_mod) - logging.info('Remove .orig_mod to all weight keys.') - new_state_dict = {} - for k, v in state_dict.items(): - new_key = k.replace("._orig_mod", "") - new_state_dict[new_key] = v - checkpoint['state_dict'] = new_state_dict - super().on_load_checkpoint(checkpoint) - - def on_fit_start(self) -> None: - if self.online_encoding: - # if encoding text online, set up text_encoder here (after loading checkpoints) instead of in __init__. - # This is because text encoder weights are not saved, so the encoder must be loaded after other weights - # are loaded. - logging.info( - f'Setting up pretrained text encoder: {self.text_encoder_path or "download or use cached t5-11b"}' - ) - self.text_encoder = self.model.get_text_encoder(encoder_path=self.text_encoder_path).to( - torch.cuda.current_device() - ) - self.text_encoder.eval() - for param in self.text_encoder.parameters(): - param.requires_grad = False diff --git a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py deleted file mode 100644 index 15916907dbfc..000000000000 --- a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -from dataclasses import dataclass, field -from typing import Callable, Dict, List, Literal, Optional, Union - -import torch -from omegaconf.omegaconf import OmegaConf -from pytorch_lightning import Trainer -from torch.cuda.amp import autocast - -from nemo.collections.multimodal.models.imagen.imagen import Imagen, MegatronImagen -from nemo.collections.multimodal.parts.utils import numpy_to_pil, setup_trainer_and_models_for_inference - - -@dataclass -class ImagenCustomizedModelConfig: - base_ckpt: Optional[str] = None - base_cfg: Optional[str] = None - sr256_ckpt: Optional[str] = None - sr256_cfg: Optional[str] = None - sr1024_ckpt: Optional[str] = None - sr1024_cfg: Optional[str] = None - - -@dataclass -class ImagenSamplingConfig: - step: Optional[int] = None - cfg: Optional[float] = 1 - - -@dataclass -class ImagenPipelineConfig: - model_name: Optional[str] = None - run_ema_model: Optional[bool] = True - customized_model: Optional[ImagenCustomizedModelConfig] = None - num_images_per_promt: Optional[int] = 8 - texts: Optional[List[str]] = field(default_factory=lambda: []) - output_path: Optional[str] = 'output/imagen_inference' - record_time: Optional[bool] = False - encoder_path: Optional[str] = None - target_resolution: Optional[int] = 256 - inference_precision: Optional[str] = '32' - thresholding_method: Optional[str] = 'dynamic' - samplings: Optional[List[ImagenSamplingConfig]] = field(default_factory=lambda: list()) - part: Optional[int] = 0 - - -class ImagenPipeline(Callable): - def __init__(self, models: List[Imagen], text_encoder, cfg, device): - self.models = [model.to(device) for model in models] - self.text_encoder = text_encoder.to(device) - self.cfg = cfg - self.device = device - - def _load_model(model_ckpt: str, model_cfg: str, eval_mode: bool = True, trainer: Trainer = None): - assert model_ckpt is not None, 'model ckpt cannot be None' - if model_ckpt.endswith('.nemo'): - model_cfg = MegatronImagen.restore_from(restore_path=model_ckpt, trainer=trainer, return_config=True) - model_cfg.unet.flash_attention = False - model_cfg.micro_batch_size = 1 - model_cfg.global_batch_size = 1 - model = MegatronImagen.restore_from( - restore_path=model_ckpt, override_config_path=model_cfg, trainer=trainer, - ) - elif model_ckpt.endswith('.ckpt'): - model_cfg = OmegaConf.load(model_cfg) - model_cfg.model.unet.flash_attention = False - model_cfg.model.micro_batch_size = 1 - model_cfg.model.global_batch_size = 1 - model = MegatronImagen(cfg=model_cfg.model, trainer=trainer) - checkpoint = torch.load(model_ckpt, map_location=lambda storage, loc: storage) - - # Change weight keys if training using TorchInductor - state_dict = checkpoint['state_dict'] - del_keys = [] - for k, v in state_dict.items(): - if '._orig_mod' in k: - del_keys.append(k) - if len(del_keys) != 0: - print('ckpt was saved with TorchInductor. Renaming weights..') - for k in del_keys: - new_k = k.replace("._orig_mod", "") - state_dict[new_k] = state_dict[k] - del state_dict[k] - model.load_state_dict(state_dict, strict=True) - else: - raise Exception('Invalid ckpt type. Should be either .nemo or .ckpt with cfg') - - model = model.model # We do not need Megatron Instance for inference - model.model.set_inference_mode(True) # Used for adding the least noise for EDM inference for SR model. - if eval_mode: - model.unet.cuda().eval() - return model - - def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None, megatron_loading=False, megatron_cfg=None): - if megatron_loading: - assert megatron_cfg - - def model_cfg_modifier(model_cfg): - model_cfg.inductor = False - model_cfg.unet.flash_attention = False - model_cfg.micro_batch_size = megatron_cfg.fid.ncaptions_per_batch - model_cfg.global_batch_size = model_cfg.micro_batch_size * megatron_cfg.fid.ntasks_per_node - - trainer, megatron_models = setup_trainer_and_models_for_inference( - MegatronImagen, cfg=megatron_cfg, model_cfg_modifier=model_cfg_modifier - ) - models = [mm.model for mm in megatron_models] - for model in models: - model.cuda().eval() - model.model.set_inference_mode(True) - return models - customized_models = cfg.customized_model - models = [] - print('Load base model.') - model = ImagenPipeline._load_model( - model_ckpt=customized_models.base_ckpt, model_cfg=customized_models.base_cfg, trainer=trainer, - ) - models.append(model) - - if cfg.target_resolution >= 256: - print('Load SR256 model.') - model = ImagenPipeline._load_model( - model_ckpt=customized_models.sr256_ckpt, model_cfg=customized_models.sr256_cfg, trainer=trainer - ) - models.append(model) - - if cfg.target_resolution >= 1024: - print('Load SR1024 model.') - model = ImagenPipeline._load_model( - model_ckpt=customized_models.sr1024_ckpt, model_cfg=customized_models.sr1024_cfg, trainer=trainer - ) - models.append(model) - return models - - @classmethod - def from_pretrained( - cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda', megatron_loading=False, megatron_cfg=None - ): - target_resolution = cfg.target_resolution - assert target_resolution in [64, 256, 1024] - - # Set encoder_path which will be used when inst the model - if cfg.encoder_path is not None: - os.environ['ENCODER_PATH'] = cfg.encoder_path - - assert cfg.model_name is None, 'No predefined model for now' - assert cfg.customized_model is not None, 'Need to provide customized models for inference' - models = ImagenPipeline._load_customized_model(cfg, trainer, megatron_loading, megatron_cfg) - assert len(models) >= 1, 'Need to load at least one model' - if cfg.inference_precision == '16': - print('Running Inference in FP16.') - print('Converting all difussion models to FP16..') - for model in models: - model.half() - - print('Loading text encoder') - text_encoder = models[0].get_text_encoder(encoder_path=cfg.encoder_path) - if cfg.inference_precision == '16': - print('Converting text encoders to FP16..') - text_encoder.half() - return ImagenPipeline(models=models, text_encoder=text_encoder, cfg=cfg, device=device) - - @torch.no_grad() - def get_text_encodings(self, input_text, repeat=1): - # Repeat the inputs so that we generate multiple samples per query - if isinstance(input_text, str): - inp_text_batch = [input_text] - else: - inp_text_batch = input_text - # Encode the text embeddings using text encoder. - text_encodings, text_mask = self.text_encoder.encode(inp_text_batch, device=self.device) - if repeat != 1: - assert len(inp_text_batch) == 1, 'Repeat should only be applied if we feed single text to encoder.' - text_encodings = text_encodings.repeat(repeat, 1, 1) - text_mask = text_mask.repeat(repeat, 1) - return text_encodings, text_mask - - @torch.no_grad() - def __call__( - self, - prompts: Union[str, List[str]] = None, - inference_steps: Union[int, List[int]] = None, - classifier_free_guidance: Union[float, List[float]] = None, - num_images_per_promt: Optional[int] = 0, - thresholding_method: bool = None, - output_type: Optional[str] = 'pil', - seed: Union[int, List[int]] = 2000, - single_batch_mode: bool = False, - output_res: Optional[int] = None, - low_res_input: Optional[torch.Tensor] = None, - ): - if prompts is None: - prompts = OmegaConf.to_object(self.cfg.texts) - if num_images_per_promt == 0: - num_images_per_promt = self.cfg.num_images_per_promt - if thresholding_method is None: - thresholding_method = self.cfg.thresholding_method - device = self.device - inference_precision = self.cfg.inference_precision - assert inference_precision in ['16', '32', 'AMP'], "Inference Precision should be one of ['16', '32', 'AMP']" - print(f'Running inference in {inference_precision} mode.') - amp_enabled = inference_precision == 'AMP' - - # Based on output_res and low_res_input, determine which models to run - if output_res is not None or low_res_input is not None: - models = [] - if output_res is not None: - for model in self.models: - models.append(model) - if model.image_size == output_res: - break - else: - models = self.models - if low_res_input is not None: - print(f'Low-res input shape: {low_res_input.shape}') - low_res_dim = low_res_input.shape[-1] - num_images_per_promt = low_res_input.shape[0] - for idx, model in enumerate(models): - if model.image_size == low_res_dim: - models = models[idx + 1 :] - break - print(f'Running inference on {len(models)} models.') - else: - models = self.models - - if classifier_free_guidance is None: - cfgs = [each.cfg for each in self.cfg.samplings] - cfgs = cfgs[: len(models)] - else: - cfgs = classifier_free_guidance - if isinstance(cfgs, int) or isinstance(cfgs, float): - cfgs = [cfgs] * len(models) - - if inference_steps is None: - steps = [each.step for each in self.cfg.samplings] - steps = steps[: len(models)] - else: - steps = inference_steps - if isinstance(steps, int): - steps = [steps] * len(models) - - assert len(steps) == len(cfgs) == len(models) - - output = [] - all_res_output = [[] for _ in range(len(models))] - if single_batch_mode: - num_images_per_promt = len(prompts) - - throughputs = {'text-encoding': []} - for idx in range(len(models)): - throughputs[f'stage-{idx+1}'] = [] - for prompt in prompts: - if single_batch_mode: - text_input = prompts - else: - text_input = prompt.strip('\n') - print('Input caption: {}'.format(text_input)) - tic = time.perf_counter() - text_encodings, text_mask = self.get_text_encodings( - text_input, repeat=num_images_per_promt if not single_batch_mode else 1 - ) - throughputs['text-encoding'].append(time.perf_counter() - tic) - - # Set seed - noise_maps = [] - if isinstance(seed, int): - # Single seed for the batch - torch.random.manual_seed(seed) - # Generate noise maps - for model in models: - noise_map = torch.randn( - (num_images_per_promt, 3, model.unet.image_size, model.unet.image_size), device=device - ) - noise_map = noise_map.half() if inference_precision == '16' else noise_map - noise_maps.append(noise_map) - elif isinstance(seed, list): - assert len(seed) == num_images_per_promt - for model in models: - noise_map_batch = [] - for single_seed in seed: - torch.random.manual_seed(single_seed) - noise_map_single = torch.randn( - (1, 3, model.unet.image_size, model.unet.image_size), device=device - ) - noise_map_batch.append(noise_map_single) - noise_map_batch = torch.cat(noise_map_batch, dim=0) - noise_map_batch = noise_map_batch.half() if inference_precision == '16' else noise_map_batch - noise_maps.append(noise_map_batch) - else: - raise RuntimeError('Seed type incorrect.') - - x_low_res = low_res_input - all_res = [] - for idx, (model, noise_map, cfg, step) in enumerate(zip(models, noise_maps, cfgs, steps)): - tic = time.perf_counter() - with autocast(enabled=amp_enabled): - generated_images = model.sample_image( - noise_map=noise_map, - text_encoding=text_encodings, - text_mask=text_mask, - x_low_res=x_low_res, - cond_scale=cfg, - sampling_steps=step, - thresholding_method=thresholding_method, - ) - x_low_res = generated_images - all_res.append(generated_images) - throughputs[f'stage-{idx+1}'].append(time.perf_counter() - tic) - # recenter from [-1, 1] to [0, 1] - assert generated_images is not None - generated_images = ((generated_images + 1) / 2).clamp_(0, 1) - all_res = [((each + 1) / 2).clamp_(0, 1) for each in all_res] - output.append(generated_images) - for idx, each in enumerate(all_res): - all_res_output[idx].append(each) - if single_batch_mode: - break - - if output_type == 'torch': - return torch.cat(output, dim=0), [torch.cat(each, dim=0) for each in all_res_output] - output_new = [] - for x_samples_image in output: - # Convert to numpy - x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() - if output_type == 'pil': - x_samples_image = numpy_to_pil(x_samples_image) - output_new.append(x_samples_image) - - all_res_output_new = [[] for each in range(len(models))] - for idx, res_output in enumerate(all_res_output): - for x_samples_image in res_output: - # Convert to numpy - x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() - if output_type == 'pil': - x_samples_image = numpy_to_pil(x_samples_image) - all_res_output_new[idx].append(x_samples_image) - - for item in throughputs: - throughputs[item] = sum(throughputs[item]) / len(throughputs[item]) - - return output_new, all_res_output_new, throughputs diff --git a/nemo/collections/multimodal/models/imagen/precond.py b/nemo/collections/multimodal/models/imagen/precond.py deleted file mode 100644 index fc3b3ed7d18d..000000000000 --- a/nemo/collections/multimodal/models/imagen/precond.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn.functional as F - -from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_mul -from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes -from nemo.collections.multimodal.parts.utils import randn_like - - -class PrecondModel(torch.nn.Module): - def __init__(self, unet, loss_type): - super().__init__() - self.unet = unet - self.rng = None - self.inference = False - if loss_type == 'l1': - self.loss_fn = F.l1_loss - elif loss_type == 'l2': - self.loss_fn = F.mse_loss - elif loss_type == 'huber': - self.loss_fn = F.smooth_l1_loss - else: - raise NotImplementedError(f'{loss_type} loss is not supported') - - def set_inference_mode(self, value): - self.inference = value - - def forward(self, **model_kwargs): - return self.unet(**model_kwargs) - - def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): - logits = self.forward(*args, text_embed=text_embed, **kwargs) - if cond_scale == 1.0: - return logits - null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) - return null_logits + (logits - null_logits) * cond_scale - - def set_rng(self, generator): - self.rng = generator - - -class ContinousDDPMPrecond(PrecondModel): - def __init__( - self, - unet, - loss_type='l2', - pred_objective='noise', - noise_schedule='cosine', - timesteps=1000, - noise_cond_aug=False, - ): - super().__init__(unet, loss_type) - self.scheduler = GaussianDiffusionContinuousTimes(noise_schedule=noise_schedule, timesteps=timesteps) - self.pred_objective = pred_objective - assert noise_cond_aug == False, 'noise cond aug currently not supported for DDPM' - - def sample_time(self, batch_size, device=None): - return self.scheduler.sample_random_times(batch_size=batch_size, device=device) - - def get_xt(self, x0, t=None, epsilon=None): - if epsilon is None: - epsilon = randn_like(x0, generator=self.rng) - if t is None: - t = self.sample_time(batch_size=x0.shape[0], device=x0.device) - x_noisy, log_snr, alpha, sigma = self.scheduler.q_sample(x_start=x0, t=t, noise=epsilon,) - return x_noisy, t, epsilon - - def forward(self, x, time, text_embed, text_mask, **model_kwargs): - # Convert time to FP32 for calculating time embedding due to FP16 overflow - time = time.float() - time = self.scheduler.get_condition(time) - time = time.type_as(x) - - return self.unet(x=x, time=time, text_embed=text_embed, text_mask=text_mask, **model_kwargs) - - def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): - x_noisy, time, noise = self.get_xt(x0=x0, t=time, epsilon=noise) - pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) - # Determine target - if self.pred_objective == 'noise': - target = noise - elif self.pred_objective == 'x_start': - target = x0 - else: - raise ValueError(f'unknown objective {self.pred_objective}') - return self.loss_fn(pred, target) - - def set_rng(self, generator): - self.scheduler.rng = generator - self.rng = generator - - -class EDMPrecond(PrecondModel): - def __init__( - self, - unet, # Underlying model. - loss_type='l2', - sigma_data=0.5, # Expected standard deviation of the training data. - p_mean=-1.2, - p_std=1.2, - noise_cond_aug=False, - ): - super().__init__(unet, loss_type) - self.sigma_data = sigma_data - self.p_mean = p_mean - self.p_std = p_std - self.noise_cond_aug = noise_cond_aug - - def forward(self, x, time, text_embed, text_mask, **model_kwargs): - bs = x.shape[0] - assert time.ndim <= 1, 'time should be in shape of either [bs] or scalar' - sigma = time - c_skip = self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - c_out = sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2).sqrt() - c_in = 1 / (self.sigma_data ** 2 + sigma ** 2).sqrt() - c_noise = sigma.log() / 4 - - if c_noise.ndim < 1: - c_noise = c_noise.repeat(bs,) - - if self.noise_cond_aug: - # Applying noise conditioning augmentation - assert 'x_low_res' in model_kwargs, 'x_low_res does not exist when attemping to apply noise augmentation' - x_low_res = model_kwargs['x_low_res'] - if self.inference: - batch_size = x_low_res.shape[0] - time_low_res = torch.ones(batch_size, device=x_low_res.device) * 0.002 - x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=time_low_res, epsilon=None) - else: - x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=None, epsilon=None) - c_in_noise = 1 / (self.sigma_data ** 2 + time_low_res ** 2).sqrt() - c_noise_noise = time_low_res.log() / 4 - model_kwargs['x_low_res'] = batch_mul(c_in_noise, x_low_res_noisy) - model_kwargs['time_low_res'] = c_noise_noise - - F_x = self.unet(batch_mul(c_in, x), c_noise, text_embed, text_mask, **model_kwargs) - D_x = batch_mul(c_skip, x) + batch_mul(c_out, F_x) - return D_x - - def sample_time(self, batch_size, device=None): - return (torch.randn(batch_size, device=device, generator=self.rng) * self.p_std + self.p_mean).exp() - - def get_xt(self, x0, t=None, epsilon=None): - if epsilon is None: - epsilon = randn_like(x0, generator=self.rng) - assert epsilon.shape == x0.shape - if t is None: - t = self.sample_time(batch_size=x0.shape[0], device=x0.device) - sigma = t - noise = batch_mul(epsilon, sigma) - return x0 + noise, sigma - - def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): - x_noisy, time = self.get_xt(x0=x0, t=None, epsilon=noise) - pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) - sigma = time - weight = ((sigma ** 2 + self.sigma_data ** 2) / (sigma * self.sigma_data) ** 2).sqrt() - target = x0 - return self.loss_fn(batch_mul(weight, target), batch_mul(weight, pred),) - - def set_rng(self, generator): - self.rng = generator diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py deleted file mode 100644 index e25b0ecbe041..000000000000 --- a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -https://github.com/timothybrooks/instruct-pix2pix/blob/2afcb7e45bd350765f21a58a0c135871e9dc5a78/stable_diffusion/ldm/models/diffusion/ddpm_edit.py -""" - -from contextlib import contextmanager -from functools import partial - -import numpy as np -import pytorch_lightning as pl -import torch -import torch.nn as nn -from einops import rearrange, repeat -from torch.optim.lr_scheduler import LambdaLR -from torchvision.utils import make_grid -from tqdm import tqdm - -from nemo.collections.multimodal.data.instruct_pix2pix.edit_dataset import EditDataset -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, - MegatronPretrainingSampler, -) -from nemo.utils import logging - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -class LatentDiffusionEdit(LatentDiffusion): - def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): - pl_sd = torch.load(path, map_location="cpu") - if "state_dict" in list(pl_sd.keys()): - pl_sd = pl_sd["state_dict"] - sd = {} - - first_key = list(pl_sd.keys())[0] - # State keys of model trained with TorchDynamo changed from - # "model.xxx" to "model._orig_mod.xxx" - for k, v in pl_sd.items(): - new_k = k.replace("._orig_mod", "") - # compatibility for stable diffusion old checkpoint - # remove megatron wrapper prefix - if first_key == "model.betas": - new_k = new_k.lstrip("model.") - sd[new_k] = v - keys = list(sd.keys()) - - # Our model adds additional channels to the first layer to condition on an input image. - # For the first layer, copy existing channel weights and initialize new channel weights to zero. - input_keys = [ - "model.diffusion_model.input_blocks.0.0.weight", - ] - - self_sd = self.state_dict() - for input_key in input_keys: - if input_key not in sd or input_key not in self_sd: - continue - - input_weight = self_sd[input_key] - if input_weight.size() != sd[input_key].size(): - print(f"Manual init: {input_key}") - input_weight.zero_() - input_weight[:, :4, :, :].copy_(sd[input_key]) - ignore_keys.append(input_key) - - for k in keys: - for ik in ignore_keys: - if k.startswith(ik): - print("Deleting key {} from state_dict.".format(k)) - del sd[k] - missing, unexpected = ( - self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) - ) - print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") - - @torch.no_grad() - def get_input( - self, - batch, - k, - return_first_stage_outputs=False, - force_c_encode=False, - cond_key=None, - return_original_cond=False, - bs=None, - uncond=0.05, - ): - x = batch[k] - if bs is not None: - x = x[:bs] - - encoder_posterior = self.encode_first_stage(x) - z = self.get_first_stage_encoding(encoder_posterior).detach() - cond_key = cond_key or self.cond_stage_key - xc = batch[cond_key] - if bs is not None: - xc["c_crossattn"] = xc["c_crossattn"][:bs] - xc["c_concat"] = xc["c_concat"][:bs] - cond = {} - - # To support classifier-free guidance, randomly drop out only text conditioning 5%, only image conditioning 5%, and both 5%. - random = torch.rand(x.size(0), device=x.device) - prompt_mask = rearrange(random < 2 * uncond, "n -> n 1 1") - input_mask = 1 - rearrange((random >= uncond).float() * (random < 3 * uncond).float(), "n -> n 1 1 1") - - null_prompt = self.get_learned_conditioning([""]) - cond["c_crossattn"] = torch.where( - prompt_mask, null_prompt, self.get_learned_conditioning(xc["c_crossattn"]).detach() - ) - cond["c_concat"] = input_mask * self.encode_first_stage((xc["c_concat"].to(x.device))).mode().detach() - - out = [z, cond] - if return_first_stage_outputs: - xrec = self.decode_first_stage(z) - out.extend([x, xrec]) - if return_original_cond: - out.append(xc) - return out - - -class MegatronLatentDiffusionEdit(MegatronLatentDiffusion): - def model_provider_func(self, pre_process=True, post_process=True): - """Model depends on pipeline paralellism.""" - model = LatentDiffusionEdit(cfg=self.cfg, model_parallel_config=self.model_parallel_config) - return model - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - def build_train_valid_test_datasets(self): - # TODO (yuya): set up splits ratio and other params - if self.cfg.data.data_path is not None: - self._train_ds = EditDataset(path=self.cfg.data.data_path, split="train", flip_prob=0.5) - self._validation_ds = EditDataset(path=self.cfg.data.data_path, split="val") - self._test_ds = EditDataset(path=self.cfg.data.data_path, split="test") - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - drop_last = True - if not self.cfg.get('validation_drop_last', True): - logging.info(f'Drop last in validation dataset is set to False') - drop_last = False - self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples, drop_last) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - drop_last = True - if not self.cfg.get('validation_drop_last', True): - logging.info(f'Drop last in validation dataset is set to False') - drop_last = False - self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples, drop_last) - - def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): - """Build dataloader given an input dataset.""" - - if dataset is None: - return None - logging.info(f'Building dataloader with consumed samples: {consumed_samples}') - # Megatron sampler - if hasattr(self._cfg.data, 'dataloader_type') and self._cfg.data.dataloader_type is not None: - # TODO (yuya): fix this - if self._cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self._cfg.micro_batch_size, - global_batch_size=self._cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - ) - elif self._cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronPretrainingRandomSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self._cfg.micro_batch_size, - global_batch_size=self._cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - ) - else: - raise Exception(f'{self._cfg.dataloader_type} dataloader type is not supported.') - else: - raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') - - # Torch dataloader. - return torch.utils.data.DataLoader( - dataset, batch_sampler=batch_sampler, num_workers=self._cfg.data.num_workers, pin_memory=True, - ) diff --git a/nemo/collections/multimodal/models/kosmos/__init__.py b/nemo/collections/multimodal/models/kosmos/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py deleted file mode 100644 index e4aaddd3214a..000000000000 --- a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py +++ /dev/null @@ -1,1154 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import itertools -import math -import os -import random -import tempfile -from functools import partial -from typing import Any, List, Optional, Union - -import numpy as np -import pandas as pd -import torch -from einops import rearrange, repeat -from omegaconf.dictconfig import DictConfig -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.multimodal.data.kosmos.kosmos_dataset import MAX_NUM_IMAGES, MergedKosmosDataLoader -from nemo.collections.multimodal.data.kosmos.kosmos_dataset import ( - build_train_valid_datasets as build_media_train_valid_datasets, -) -from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer -from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, - MegatronPretrainingSampler, -) -from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( - build_train_valid_test_datasets as build_text_train_valid_test_datasets, -) -from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ( - ApexGuardDefaults, - average_losses_across_data_parallel_group, - get_all_params_for_weight_decay_optimization, - get_params_for_weight_decay_optimization, - init_method_normal, - parallel_lm_logits, - scaled_init_method_normal, -) -from nemo.collections.nlp.modules.common.text_generation_utils import ( - generate, - get_computeprob_response, - get_default_length_params, - get_default_sampling_params, - megatron_gpt_generate, -) -from nemo.collections.nlp.modules.common.transformer.text_generation import ( - LengthParam, - OutputType, - SamplingParam, - TextGeneration, -) -from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone -from nemo.core.classes.common import PretrainedModelInfo -from nemo.utils import logging - -try: - import apex.transformer.pipeline_parallel.utils - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True - -except (ImportError, ModuleNotFoundError): - - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -try: - import transformer_engine - - HAVE_TE = True - -except (ImportError, ModuleNotFoundError): - HAVE_TE = False - - -class FrozenCLIPVisionTransformer(CLIPVisionTransformer): - def __init__(self, model_cfg, pre_process=True, post_process=True): - super().__init__( - model_cfg, pre_process=pre_process, post_process=post_process, skip_head=True, - ) - self.frozen = False - - def train(self, mode): - if self.frozen: - return self - - super().train(mode) - return self - - def forward(self, input): - assert self.training == False - hidden_states = self.backbone(input) - # Do not add header after backbone - return hidden_states - - def freeze(self) -> None: - for param in self.parameters(): - param.requires_grad = False - - self.eval() - self.frozen = True - - -class KosmosModel(MegatronModule): - def __init__( - self, model_cfg, vocab_size, media_start_id=None, media_end_id=None, pre_process=True, post_process=True, - ): - super(KosmosModel, self).__init__() - - llm_cfg = model_cfg.llm - vision_cfg = model_cfg.vision - - self.parallel_output = True # TODO (yuya): Fix this hard-code - self.media_start_id = media_start_id - self.media_end_id = media_end_id - self.pre_process = pre_process - self.post_process = post_process - self.fp16_lm_cross_entropy = llm_cfg.get('fp16_lm_cross_entropy', False) - self.sequence_parallel = llm_cfg.sequence_parallel - self.gradient_accumulation_fusion = llm_cfg.gradient_accumulation_fusion - self.share_embeddings_and_output_weights = llm_cfg.share_embeddings_and_output_weights - self.position_embedding_type = llm_cfg.get('position_embedding_type', 'learned_absolute') - - use_scaled_init_method = llm_cfg.get('use_scaled_init_method', True) - kv_channels = llm_cfg.get('kv_channels', None) - hidden_size = llm_cfg.hidden_size - num_attention_heads = llm_cfg.num_attention_heads - num_layers = llm_cfg.num_layers - init_method_std = llm_cfg.init_method_std - - if kv_channels is None: - assert ( - hidden_size % num_attention_heads == 0 - ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' - kv_channels = hidden_size // num_attention_heads - - scaled_init_method = ( - scaled_init_method_normal(init_method_std, num_layers) - if use_scaled_init_method - else init_method_normal(init_method_std) - ) - self.language_model, self._language_model_key = get_language_model( - vocab_size=vocab_size, - hidden_size=llm_cfg.hidden_size, - max_position_embeddings=llm_cfg.max_position_embeddings, - num_layers=llm_cfg.num_layers, - num_attention_heads=llm_cfg.num_attention_heads, - apply_query_key_layer_scaling=llm_cfg.get('apply_query_key_layer_scaling', True), - kv_channels=kv_channels, - ffn_hidden_size=llm_cfg.ffn_hidden_size, - num_tokentypes=0, - add_pooler=False, - encoder_attn_mask_type=AttnMaskType.causal, - pre_process=pre_process, - post_process=post_process, - init_method_std=llm_cfg.get('init_method_std', 0.02), - scaled_init_method=scaled_init_method, - use_cpu_initialization=llm_cfg.get('use_cpu_initialization', False), - hidden_dropout=llm_cfg.get('hidden_dropout', 0.1), - attention_dropout=llm_cfg.get('attention_dropout', 0.1), - ffn_dropout=llm_cfg.get('ffn_dropout', 0.0), - precision=llm_cfg.get('precision', 16), - fp32_residual_connection=llm_cfg.get('fp32_residual_connection', False), - activations_checkpoint_granularity=llm_cfg.get('activations_checkpoint_granularity', None), - activations_checkpoint_method=llm_cfg.get('activations_checkpoint_method', None), - activations_checkpoint_num_layers=llm_cfg.get('activations_checkpoint_num_layers', 1), - activations_checkpoint_layers_per_pipeline=llm_cfg.get('activations_checkpoint_layers_per_pipeline', None), - normalization=llm_cfg.get('normalization', 'layernorm'), - layernorm_epsilon=llm_cfg.get('layernorm_epsilon', 1e-5), - onnx_safe=llm_cfg.get('onnx_safe', False), - bias=llm_cfg.get('bias', True), - bias_activation_fusion=llm_cfg.get('bias_activation_fusion', True), - bias_dropout_add_fusion=llm_cfg.get('bias_dropout_add_fusion', True), - activation=llm_cfg.get('activation', 'gelu'), - headscale=llm_cfg.get('headscale', False), - transformer_block_type=llm_cfg.get('transformer_block_type', 'pre_ln'), - openai_gelu=llm_cfg.get('openai_gelu', False), - normalize_attention_scores=llm_cfg.get('normalize_attention_scores', True), - position_embedding_type=llm_cfg.get('position_embedding_type', 'learned_absolute'), - rotary_percentage=llm_cfg.get('rotary_percentage', 1.0), - share_embeddings_and_output_weights=llm_cfg.get('share_embeddings_and_output_weights', True), - attention_type=llm_cfg.get('attention_type', 'multihead'), - masked_softmax_fusion=llm_cfg.get('masked_softmax_fusion', True), - gradient_accumulation_fusion=llm_cfg.get('gradient_accumulation_fusion', False), - persist_layer_norm=llm_cfg.get('persist_layer_norm', False), - sequence_parallel=llm_cfg.get('sequence_parallel', False), - transformer_engine=llm_cfg.get('transformer_engine', False), - fp8=llm_cfg.get('fp8', False), - fp8_e4m3=llm_cfg.get('fp8_e4m3', False), - fp8_hybrid=llm_cfg.get('fp8_hybrid', False), - fp8_margin=llm_cfg.get('fp8_margin', 0), - fp8_interval=llm_cfg.get('fp8_interval', 1), - fp8_amax_history_len=llm_cfg.get('fp8_amax_history_len', 1), - fp8_amax_compute_algo=llm_cfg.get('fp8_amax_compute_algo', 'most_recent'), - reduce_amax=llm_cfg.get('reduce_amax', True), - use_emha=llm_cfg.get('use_emha', False), - ) - - if self.share_embeddings_and_output_weights: - self.initialize_word_embeddings( - init_method=init_method_normal(init_method_std), vocab_size=vocab_size, hidden_size=hidden_size - ) - - # TODO (yuya): check when PP is added - self.vision_encoder = FrozenCLIPVisionTransformer( - vision_cfg, pre_process=vision_cfg.pre_process, post_process=vision_cfg.post_process, - ) - if vision_cfg.from_pretrained is not None: - logging.info(f"Loading CLIP vision encoder weights from checkpoint {vision_cfg.from_pretrained}") - self.load_vision_encoder_weights(vision_cfg.from_pretrained) - self.perceiver = PerceiverResampler(dim=vision_cfg.hidden_size, num_latents=model_cfg.num_media_latents) - self.vision_connector = torch.nn.Linear(vision_cfg.hidden_size, llm_cfg.hidden_size, bias=False,) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - self.language_model.set_input_tensor(input_tensor) - - def encode_vision_x(self, vision_x: torch.Tensor): - """ - Compute media tokens from vision input by passing it through vision encoder and conditioning language model. - Args: - vision_x (torch.Tensor): Vision input - shape (B, T_img, F, C, H, W) - Images in the same chunk are collated along T_img, and frames are collated along F - Currently only F=1 is supported (single-frame videos) - - rearrange code based on https://github.com/dhansmair/flamingo-mini - """ - - assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" - b, T, F = vision_x.shape[:3] - assert F == 1, "Only single frame supported" - - vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") - with torch.no_grad(): - vision_x = self.vision_encoder(vision_x) - vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) - vision_x = self.perceiver(vision_x) # reshapes to (b, T, n, d) - vision_x = self.vision_connector(vision_x) - return vision_x - - def replace_media_embeddings(self, input_ids, inputs_embeds, media=None): - if media is None: - return inputs_embeds - - batch_size, sequence_length, hidden_size = inputs_embeds.shape - - # calculate media features without gradients - with torch.no_grad(): - media_features = self.encode_vision_x(media) - num_images_per_sample = media_features.size(1) - num_patches = media_features.size(2) - - # flatten patches - media_features = media_features.view(batch_size, -1, hidden_size) - - # create an indices matrix used in torch.scatter - padded_media_indices = torch.ones( - (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device - ) - padded_media_indices *= sequence_length - for idx, input_id in enumerate(input_ids): - media_end_positions = torch.where(input_id == self.media_end_id)[0] - # locate the first media token positions - padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches - - # use indices to create a span - padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( - num_patches, device=padded_media_indices.device - ).repeat(*padded_media_indices.shape, 1) - padded_media_indices = padded_media_indices.reshape(batch_size, -1) - padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) - - # concat placeholder - updated_input_embeds = torch.cat( - (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 - ) - updated_input_embeds = updated_input_embeds.type(media_features.dtype) - # scatter media_features - updated_input_embeds.scatter_(1, padded_media_indices, media_features) - - # chop off placeholder - updated_input_embeds = updated_input_embeds[:, :sequence_length] - - return updated_input_embeds - - def forward( - self, - input_ids, - position_ids, - attention_mask, - labels=None, - media=None, - token_type_ids=None, - layer_past=None, - get_key_value=False, - forward_method_parallel_output=None, - encoder_input=None, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - checkpoint_activations_all_layers=None, - ): - # input_ids: [b, s] - # position_ids: [b, s] - # attention_mask: [1, 1, s, s] - - # Multimodal uses different forward pass. Vision tower must be inserted. - enc_input_ids, enc_position_ids, enc_attn_mask = input_ids, position_ids, attention_mask - - # Embeddings. - if self.pre_process and encoder_input is None: - embedding_module = self.language_model.embedding - - words_embeddings = embedding_module.word_embeddings(enc_input_ids) - words_embeddings = self.replace_media_embeddings(enc_input_ids, words_embeddings, media=media) - - if self.position_embedding_type == 'learned_absolute': - assert position_ids is not None - position_embeddings = embedding_module.position_embeddings(position_ids) - embeddings = words_embeddings + position_embeddings - elif self.position_embedding_type == 'learned_parameters': - embeddings = words_embeddings + embedding_module.position_embeddings - else: - embeddings = words_embeddings - - if token_type_ids is not None: - assert embedding_module.tokentype_embeddings is not None - embeddings = embeddings + embedding_module.tokentype_embeddings(token_type_ids) - else: - assert embedding_module.tokentype_embeddings is None - - # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. - if embedding_module.transpose_batch_sequence: - embeddings = embeddings.transpose(0, 1).contiguous() - - # If the input flag for fp32 residual connection is set, convert for float. - if embedding_module.fp32_residual_connection: - embeddings = embeddings.float() - - # Dropout. - if self.sequence_parallel: - embeddings = tensor_parallel.mappings.scatter_to_sequence_parallel_region(embeddings) - with tensor_parallel.random.get_cuda_rng_tracker().fork(): - embeddings = embedding_module.embedding_dropout(embeddings) - else: - embeddings = embedding_module.embedding_dropout(embeddings) - - encoder_input = embeddings - else: - pass - - # enc_attn_mask: [1, 1, s, s] - - if self.position_embedding_type == 'rope': - if inference_max_sequence_len is not None: - rotary_pos_emb = self.language_model.rotary_pos_emb(inference_max_sequence_len) - elif self.language_model.encoder.input_tensor is not None: - if self.sequence_parallel: - rotary_pos_emb = self.language_model.rotary_pos_emb( - self.language_model.encoder.input_tensor.size(0) - * parallel_state.get_tensor_model_parallel_world_size() - ) - else: - rotary_pos_emb = self.language_model.rotary_pos_emb(self.encoder.input_tensor.size(0)) - else: - if self.sequence_parallel: - rotary_pos_emb = self.language_model.rotary_pos_emb( - encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size() - ) - else: - rotary_pos_emb = self.language_model.rotary_pos_emb(encoder_input.size(0)) - else: - rotary_pos_emb = None - - # encoder but decoder for GPT - encoder_output = self.language_model.encoder( - encoder_input, - enc_attn_mask, - layer_past=layer_past, - get_key_value=get_key_value, - set_inference_key_value_memory=set_inference_key_value_memory, - inference_max_sequence_len=inference_max_sequence_len, - checkpoint_activations_all_layers=checkpoint_activations_all_layers, - rotary_pos_emb=(rotary_pos_emb, None, None) - if rotary_pos_emb is not None - else None, # This assumes that this being used as a GPT/BERT model only (no cross-attention) - ) - - lm_output = encoder_output - - if self.post_process: - return post_language_model_processing( - lm_output, - labels, - self.language_model.output_layer.weight - if not self.share_embeddings_and_output_weights - else self.word_embeddings_weight(), - get_key_value, - self.parallel_output, - forward_method_parallel_output, - self.fp16_lm_cross_entropy, - return_logits=False, - sequence_parallel=self.sequence_parallel, - gradient_accumulation_fusion=self.gradient_accumulation_fusion, - ) - else: - return lm_output - - def load_vision_encoder_weights(self, nemo_path): - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - - # Change current working directory to - os.chdir(tmpdir) - config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) - cfg = OmegaConf.load(config_yaml) - - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.vision_encoder."): - new_k = k.lstrip("model.vision_encoder.") - new_state_dict[new_k] = v - - missing, unexpected = self.vision_encoder.load_state_dict(new_state_dict, strict=False) - print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") - - def state_dict_for_save_checkpoint(self, destination=None, prefix='', keep_vars=False): - - state_dict_ = {} - state_dict_[self._language_model_key] = self.language_model.state_dict_for_save_checkpoint( - destination, prefix, keep_vars - ) - # Save word_embeddings. - if self.post_process and not self.pre_process: - state_dict_[self._word_embeddings_for_head_key] = self.word_embeddings.state_dict( - destination, prefix, keep_vars - ) - return state_dict_ - - def load_state_dict(self, state_dict, strict=True): - """Customized load.""" - - # Load word_embeddings. - if self.post_process and not self.pre_process: - self.word_embeddings.load_state_dict(state_dict[self._word_embeddings_for_head_key], strict=strict) - if self._language_model_key in state_dict: - state_dict = state_dict[self._language_model_key] - self.language_model.load_state_dict(state_dict, strict=strict) - - -class MegatronKosmosModel(MegatronGPTModel): - """ - Megatron Kosmos pretraining - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer): - super().__init__(cfg, trainer) - - self.image_size = (self.cfg.vision.img_h, self.cfg.vision.img_w) - self.megatron_amp_O2 = getattr(self, 'megatron_amp_O2', False) - self.enabled_data_types = self.cfg.get("enabled_data_types", []) - logging.info(f"Data types enabled in Kosmos training: {self.enabled_data_types}") - self.per_type_micro_batch_size = self.cfg.per_type_micro_batch_size - self.per_type_global_batch_size = {} - self.per_type_loss_weights = {} - for data_type in self.enabled_data_types: - self.per_type_global_batch_size[data_type] = ( - self.per_type_micro_batch_size[data_type] * self.cfg.global_batch_size // self.cfg.micro_batch_size - ) - self.per_type_loss_weights[data_type] = self.cfg.per_type_loss_weights[data_type] - - def get_gpt_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def set_inference_config(self, inference_config): - self._inference_config = inference_config - - def get_inference_config(self): - return self._inference_config - - def model_provider_func(self, pre_process, post_process): - """Model depends on pipeline paralellism.""" - media_start_id = self.tokenizer.token_to_id(self.cfg.media_start_token) - media_end_id = self.tokenizer.token_to_id(self.cfg.media_end_token) - - model = KosmosModel( - model_cfg=self.cfg, - vocab_size=self.padded_vocab_size, - media_start_id=media_start_id, - media_end_id=media_end_id, - pre_process=pre_process, - post_process=post_process, - ) - - # Freeze vit - model.vision_encoder.freeze() - - logging.info( - f"Kosmos model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" - ) - - return model - - def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): - output_tensor = self.model(tokens, text_position_ids, attention_mask, labels=labels, media=media) - return output_tensor - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - - tensor_shape = [self.cfg.llm.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.llm.hidden_size] - - # handle asynchronous grad reduction - no_sync_func = None - grad_sync_func = None - param_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - grad_sync_func = self.reduce_overlap_gradients - param_sync_func = self.sync_overlap_parameters - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - tensor_shape=tensor_shape, - dtype=self.autocast_dtype, - grad_scaler=self.trainer.precision_plugin.scaler.scale - if self.cfg.precision in [16, '16', '16-mixed'] - else None, - sequence_parallel=self.cfg.get('sequence_parallel', False), - enable_autocast=self.enable_autocast, - no_sync_func=no_sync_func, - grad_sync_func=grad_sync_func, - param_sync_func=param_sync_func, - ) - - # only the last stages of the pipeline return losses - loss_dict = {} - if losses_reduced_per_micro_batch: - # average loss across micro batches - loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_mean = loss_tensor.mean() - for data_type in self.enabled_data_types: - loss_tensors_list = [loss_reduced[data_type] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[data_type] = loss_tensor.mean() - else: - loss_mean = torch.tensor(0.0).cuda() - - return loss_mean, loss_dict - - def training_step(self, dataloader_iter, batch_idx): - """ - We pass the dataloader iterator function to the micro-batch scheduler. - The input batch to each micro-batch is fetched using the dataloader function - in the micro-batch fwd function. - """ - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() - - if self.with_distributed_adam: - # hack to enable overlapping param sync and forward compute - # note: the distributed optimizer monkey-patches each - # parameter's __getattribute__ function so that it can - # launch parameter all-gathers the first time the - # parameter is accessed after the optimizer step. However, - # PyTorch directly passes embedding parameters into a C++, - # bypassing this process. A quick-and-dirty hack is to - # manually interact with the parameter. - modules = self.model if isinstance(self.model, list) else [self.model] - for module in modules: - if isinstance(module, Float16Module): - module = module.module - module = module.language_model - if hasattr(module, 'embedding'): - for param in module.embedding.parameters(): - param.data_ptr() - - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # synchronize asynchronous grad reductions - # note: not necessary, but reduces performance degradation - # from multiple simultaneous NCCL calls - self._optimizer._finish_bucket_grad_sync() - elif self.megatron_amp_O2: - # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # main grads are stored in the MainParamsOptimizer wrapper - self._optimizer.allreduce_main_grads() - else: - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - if self.cfg.get('pipeline_model_parallel_size', 1) > 1 and self.cfg.get( - 'share_embeddings_and_output_weights', True - ): - # when using pipeline parallelism the first and last stage must keep embeddings in sync - self.allreduce_first_last_embeddings() - - ## logging - # we can only log on one rank if it is rank zero so we broadcast from last rank - # we can avoid this broadcast by updating the PTL log function to accept specific ranks - torch.distributed.broadcast(loss_mean, get_last_rank()) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log_dict({'train/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True, batch_size=1) - self.log( - 'global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1, - ) - - consumed_samples = self.compute_consumed_samples(self.trainer.global_step - self.init_global_step) - # TODO: make sure compute_consumed_samples works for pipeline parallelism - self.log( - 'consumed_samples', consumed_samples, prog_bar=True, rank_zero_only=True, batch_size=1, - ) - - if self.cfg.get('rampup_batch_size', None): - micro_batch_size = self.cfg.get('micro_batch_size', 1) - total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes - current_global_batch_size = get_num_microbatches() * micro_batch_size * total_gpus_number - self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1) - - num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR - num_microbatch_calculator.update( - consumed_samples=consumed_samples, consistency_check=True, - ) - - return loss_mean - - def get_forward_output_and_loss_func(self, validation_step=False): - def loss_func(output_tensors, loss_masks): - loss_list = [] - loss_for_ub = 0 - for data_type in self.enabled_data_types: - output_tensor = output_tensors[data_type] - loss_mask = loss_masks[data_type] - # Loss for a micro-batch (ub) - loss_list.append(self.loss_func(loss_mask, output_tensor)) - loss_for_ub += loss_list[-1] * self.per_type_loss_weights[data_type] - loss_for_ub /= sum(self.per_type_loss_weights.values()) - - if validation_step and not self.cfg.data.get('validation_drop_last', True): - raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Kosmos!") - # num_valid_tokens_in_ub = loss_mask.sum() - # if loss_for_ub.isnan(): - # assert loss_mask.count_nonzero() == 0, 'Got NaN loss with non-empty input' - # loss_sum_for_ub = torch.zeros_like(num_valid_tokens_in_ub) - # else: - # loss_sum_for_ub = num_valid_tokens_in_ub * loss_for_ub - # - # loss_sum_and_ub_size_all_gpu = torch.cat( - # [ - # loss_sum_for_ub.clone().detach().view(1), - # torch.tensor([num_valid_tokens_in_ub]).cuda().clone().detach(), - # ] - # ) - # # Could potentially reduce num_valid_samples_in_microbatch and use that to aggregate instead of len(self._validation_ds) - # torch.distributed.all_reduce( - # loss_sum_and_ub_size_all_gpu, group=parallel_state.get_data_parallel_group() - # ) - # return loss_for_ub, {'loss_sum_and_ub_size': loss_sum_and_ub_size_all_gpu} - else: - reduced_loss = average_losses_across_data_parallel_group([loss_for_ub] + loss_list) - loss_dict = {data_type: reduced_loss[i + 1] for i, data_type in enumerate(self.enabled_data_types)} - loss_dict['avg'] = reduced_loss[0] - return loss_for_ub, loss_dict - - def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): - output_tensors = {} - loss_masks = {} - combined_batch = next(dataloader_iter) - for data_type in self.enabled_data_types: - if parallel_state.get_pipeline_model_parallel_world_size() == 1: - batch = combined_batch[data_type] - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None - else: - batch[k] = batch[k].cuda(non_blocking=True) - else: - if parallel_state.is_pipeline_first_stage(): - batch = combined_batch[data_type] - # First pipeline stage needs tokens, position_ids, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['tokens', 'position_ids', 'media'] - else None - ) - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['tokens', 'position_ids', 'attention_mask', 'media'] - else None - ) - elif parallel_state.is_pipeline_last_stage(): - batch = combined_batch[data_type] - # Last pipeline stage needs the labels, loss_mask, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['labels', 'loss_mask', 'attention_mask'] - else None - ) - else: - # Intermediate pipeline stage doesn't need any inputs - batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} - - output_tensor = model( - batch['tokens'], - batch['position_ids'], - batch['attention_mask'], - batch['labels'], - batch.get('media'), - checkpoint_activations_all_layers=checkpoint_activations_all_layers, - ) - output_tensors[data_type] = output_tensor - loss_masks[data_type] = batch['loss_mask'] - - return output_tensors, partial(loss_func, loss_masks=loss_masks) - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - extra_arg = {} - if len(batch) == 3: - batch = [x.cuda() for x in batch] - tokens, attention_mask, position_ids = batch - attention_mask = attention_mask[0:1] - else: - ( - tokens, - attention_mask, - position_ids, - set_inference_key_value_memory, - inference_max_sequence_len, - ) = batch - tokens = tokens.cuda() - attention_mask = attention_mask.cuda() - position_ids = position_ids.cuda() - attention_mask = attention_mask[0:1] - extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() - extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() - output_tensor = model(tokens, position_ids, attention_mask, **extra_arg) - - def id_func(output_tensor): - return output_tensor, {'logits': output_tensor} - - return output_tensor, id_func - - return fwd_output_only_func - - def validation_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. - """ - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - loss_dict['avg'] = loss_mean - return loss_dict - - def validation_epoch_end(self, outputs): - loss_dict = {} - if parallel_state.is_pipeline_last_stage(): - # only the last pipeline parallel stages return loss with their batch size - if self.cfg.data.get('validation_drop_last', True): - averaged_loss = torch.stack([loss['avg'] for loss in outputs]).mean() - for data_type in self.enabled_data_types: - loss_dict[data_type] = torch.stack([loss[data_type] for loss in outputs]).mean() - else: - # Compute the avg loss by total_loss across all samples / total number of samples - # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) - # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] - # averaged_loss = avg_loss.type(torch.float32).cuda() - raise NotImplementedError("`validation_drop_last=False` is not supported!") - else: - averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() - for data_type in self.enabled_data_types: - loss_dict[data_type] = torch.tensor(0.0, dtype=torch.float32).cuda() - - # we can only log on one rank if it is rank zero so we broadcast from last rank - torch.distributed.broadcast(averaged_loss, get_last_rank()) - for data_type in self.enabled_data_types: - torch.distributed.broadcast(loss_dict[data_type], get_last_rank()) - - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log_dict({'val/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) - - return averaged_loss - - def test_step(self, batch, batch_idx): - return self.validation_step(batch, batch_idx) - - def test_epoch_end(self, outputs): - averaged_loss = average_losses_across_data_parallel_group(outputs) - logging.info(f'test_loss: {averaged_loss[0]}') - - def loss_func(self, loss_mask, output_tensor): - losses = output_tensor.float() - loss_mask = loss_mask.view(-1).float() - # TODO: add nemo version here - loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll - return loss - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( - self.model - ) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - rampup_batch_size = self.cfg.get('rampup_batch_size', None) - if rampup_batch_size: - start_batch_size = rampup_batch_size[0] - batch_size_increment = rampup_batch_size[1] - total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes - - assert start_batch_size % (total_gpus_number) == 0, ( - 'expected' - ' start batch size ({}) to be divisible by total number of GPUs' - ' ({})'.format(start_batch_size, total_gpus_number) - ) - - micro_batch_size = self.cfg.get('micro_batch_size', 1) - tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) - pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) - total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) - - assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( - 'expected' - ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' - ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) - ) - - if stage == 'predict': - return - else: - # TODO: consider adding a ModelPT guard to check if model is being restored. - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - if self.cfg.get('share_embeddings_and_output_weights', True): - module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - if self.cfg.get('share_embeddings_and_output_weights', True): - self.model.sync_initial_word_embeddings() - - if self.cfg.get('transformer_engine', False): - self.setup_transformer_engine_tp_groups() - - def build_train_valid_test_datasets(self): - logging.info('Building Kosmos datasets.') - - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - - global_batch_size = self.cfg.global_batch_size - max_train_steps = self.trainer.max_steps - eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches - test_iters = self.trainer.limit_test_batches - - train_valid_test_num_samples = [ - max_train_steps * global_batch_size, - eval_iters * global_batch_size, - test_iters * global_batch_size, - ] - - if self.trainer.limit_val_batches <= 1.0 and isinstance(self.trainer.limit_val_batches, float): - train_valid_test_num_samples[ - 1 - ] = 1 # This is to make sure we only have one epoch on every validation iteration - - self._train_ds, self._validation_ds, self._test_ds = {}, {}, {} - - for data_type in self.enabled_data_types: - if data_type == "text": - ( - self._train_ds[data_type], - self._validation_ds[data_type], - self._test_ds[data_type], - ) = build_text_train_valid_test_datasets( - cfg=self.cfg, - trainer=self.trainer, - data_prefix=self.cfg.data.data_prefix, - data_impl=self.cfg.data.data_impl, - splits_string=self.cfg.data.splits_string, - train_valid_test_num_samples=train_valid_test_num_samples, - seq_length=self.cfg.data.seq_length, - seed=self.cfg.seed, - skip_warmup=self.cfg.data.get('skip_warmup', True), - tokenizer=self.tokenizer, - ) - - if data_type in ["image_caption", "image_interleaved"]: - self._train_ds[data_type], self._validation_ds[data_type] = build_media_train_valid_datasets( - model_cfg=self.cfg, - consumed_samples=self.compute_consumed_samples(0) - * self.per_type_micro_batch_size[data_type] - // self.cfg.micro_batch_size, - tokenizer=self.tokenizer, - data_type=data_type, - ) - self._test_ds[data_type] = None - - data = [] - for ds_name, ds in [("Train", self._train_ds), ("Validation", self._validation_ds), ("Test", self._test_ds)]: - for key in self.enabled_data_types: - # Append the name of the dataset, the key, and the length of the data under that key to the list - if ds_name == "Train": - consumed_samples = ( - self.compute_consumed_samples(0) - * self.per_type_micro_batch_size[key] - // self.cfg.micro_batch_size - ) - else: - consumed_samples = 0 - data.append([ds_name, key, len(ds[key]) if ds[key] is not None else 0, consumed_samples]) - - df = pd.DataFrame(data, columns=["Dataset", "Type", "Length", "Consumed"]) - df['Length'] = df['Length'].apply(lambda x: "{:,}".format(x)) - df['Consumed'] = df['Consumed'].apply(lambda x: "{:,}".format(x)) - - logging.info(f"\nFinished Building Kosmos Dataset:\n{df}") - return self._train_ds, self._validation_ds, self._test_ds - - def build_pretraining_text_data_loader( - self, - dataset, - consumed_samples, - micro_batch_size, - global_batch_size, - drop_last=True, - pad_samples_to_global_batch_size=False, - ): - """Buld dataloader given an input dataset.""" - - logging.info(f'Building dataloader with consumed samples: {consumed_samples}') - # Megatron sampler - if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: - if self.cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=micro_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - global_batch_size=global_batch_size, - pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, - ) - elif self.cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronPretrainingRandomSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=micro_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=self.cfg.get('drop_last', True), - global_batch_size=global_batch_size, - ) - else: - raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') - else: - raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') - - return torch.utils.data.DataLoader( - dataset, - batch_sampler=batch_sampler, - num_workers=self.cfg.data.num_workers, - pin_memory=True, - persistent_workers=True if self.cfg.data.num_workers > 0 else False, - ) - - @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: - """ - This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. - Returns: - List of available pre-trained models. - """ - return [] - - def setup_training_data(self, cfg): - consumed_samples = self.compute_consumed_samples(0) - - train_dls = {} - for data_type in self.enabled_data_types: - if hasattr(self, '_train_ds') and self._train_ds.get(data_type) is not None: - if data_type == "text": - train_dls[data_type] = self.build_pretraining_text_data_loader( - self._train_ds[data_type], - consumed_samples=consumed_samples - * self.per_type_micro_batch_size[data_type] - // self.cfg.micro_batch_size, - micro_batch_size=self.per_type_micro_batch_size[data_type], - global_batch_size=self.per_type_global_batch_size[data_type], - ) - elif data_type in ["image_caption", "image_interleaved"]: - train_dls[data_type] = torch.utils.data.DataLoader( - self._train_ds[data_type], - batch_size=self.per_type_micro_batch_size[data_type], - num_workers=cfg.get(data_type).num_workers, - pin_memory=True, - drop_last=True, - persistent_workers=True, - ) - else: - raise ValueError(f"Unrecognized dataset type {data_type}") - - self._train_dl = MergedKosmosDataLoader(train_dls) - - def setup_validation_data(self, cfg): - consumed_samples = 0 - - validation_dls = {} - for data_type in self.enabled_data_types: - if hasattr(self, '_validation_ds') and self._validation_ds.get(data_type) is not None: - if data_type == "text": - validation_dls[data_type] = self.build_pretraining_text_data_loader( - self._validation_ds[data_type], - consumed_samples=consumed_samples, - micro_batch_size=self.per_type_micro_batch_size[data_type], - global_batch_size=self.per_type_global_batch_size[data_type], - ) - elif data_type in ["image_caption", "image_interleaved"]: - validation_dls[data_type] = torch.utils.data.DataLoader( - self._validation_ds[data_type], - batch_size=self.per_type_micro_batch_size[data_type], - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=True, - persistent_workers=True, - ) - else: - raise ValueError(f"Unrecognized dataset type {data_type}") - - self._validation_dl = MergedKosmosDataLoader(validation_dls) - - def setup_test_data(self, cfg): - pass diff --git a/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py b/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py deleted file mode 100644 index 14bdedc5324f..000000000000 --- a/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Taken from https://github.com/lucidrains/flamingo-pytorch -""" - -""" -# Usage: -perceive = PerceiverResampler( - dim = 1024, - depth = 2, - dim_head = 64, - heads = 8, - num_latents = 64, # the number of latents to shrink your media sequence to, perceiver style - num_time_embeds = 4 # say you have 4 images maximum in your dialogue -) - -medias = torch.randn(1, 2, 256, 1024) # (batch, time, sequence length, dimension) -perceived = perceive(medias) # (1, 2, 64, 1024) - (batch, time, num latents, dimension) -""" - -import torch -from einops import rearrange, repeat -from einops_exts import rearrange_many -from torch import einsum, nn - - -def exists(val): - return val is not None - - -def FeedForward(dim, mult=4): - inner_dim = int(dim * mult) - return nn.Sequential( - nn.LayerNorm(dim), nn.Linear(dim, inner_dim, bias=False), nn.GELU(), nn.Linear(inner_dim, dim, bias=False), - ) - - -class PerceiverAttention(nn.Module): - def __init__(self, *, dim, dim_head=64, heads=8): - super().__init__() - self.scale = dim_head ** -0.5 - self.heads = heads - inner_dim = dim_head * heads - - self.norm_media = nn.LayerNorm(dim) - self.norm_latents = nn.LayerNorm(dim) - - self.to_q = nn.Linear(dim, inner_dim, bias=False) - self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) - self.to_out = nn.Linear(inner_dim, dim, bias=False) - - def forward(self, x, latents): - """ - Args: - x (torch.Tensor): image features - shape (b, T, n1, D) - latent (torch.Tensor): latent features - shape (b, T, n2, D) - """ - x = self.norm_media(x) - latents = self.norm_latents(latents) - - h = self.heads - - q = self.to_q(latents) - kv_input = torch.cat((x, latents), dim=-2) - k, v = self.to_kv(kv_input).chunk(2, dim=-1) - q, k, v = rearrange_many((q, k, v), "b t n (h d) -> b h t n d", h=h) - q = q * self.scale - - # attention - sim = einsum("... i d, ... j d -> ... i j", q, k) - sim = sim - sim.amax(dim=-1, keepdim=True).detach() - attn = sim.softmax(dim=-1) - - out = einsum("... i j, ... j d -> ... i d", attn, v) - out = rearrange(out, "b h t n d -> b t n (h d)", h=h) - return self.to_out(out) - - -class PerceiverResampler(nn.Module): - def __init__( - self, - *, - dim, - depth=6, - dim_head=64, - heads=8, - num_latents=64, - max_num_media=None, - max_num_frames=None, - ff_mult=4, - ): - super().__init__() - self.latents = nn.Parameter(torch.randn(num_latents, dim)) - self.frame_embs = nn.Parameter(torch.randn(max_num_frames, dim)) if exists(max_num_frames) else None - self.media_time_embs = nn.Parameter(torch.randn(max_num_media, 1, dim)) if exists(max_num_media) else None - - self.layers = nn.ModuleList([]) - for _ in range(depth): - self.layers.append( - nn.ModuleList( - [PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), FeedForward(dim=dim, mult=ff_mult),] - ) - ) - - self.norm = nn.LayerNorm(dim) - - def forward(self, x): - """ - Args: - x (torch.Tensor): image features - shape (b, T, F, v, D) - Returns: - shape (b, T, n, D) where n is self.num_latents - """ - b, T, F, v = x.shape[:4] - - # frame and media time embeddings - if exists(self.frame_embs): - frame_embs = repeat(self.frame_embs[:F], "F d -> b T F v d", b=b, T=T, v=v) - x = x + frame_embs - x = rearrange(x, "b T F v d -> b T (F v) d") # flatten the frame and spatial dimensions - if exists(self.media_time_embs): - x = x + self.media_time_embs[:T] - - # blocks - latents = repeat(self.latents, "n d -> b T n d", b=b, T=T) - for attn, ff in self.layers: - latents = attn(x, latents) + latents - latents = ff(latents) + latents - return self.norm(latents) diff --git a/nemo/collections/multimodal/models/nerf/base.py b/nemo/collections/multimodal/models/nerf/base.py deleted file mode 100644 index d1908080e90c..000000000000 --- a/nemo/collections/multimodal/models/nerf/base.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytorch_lightning as pl - -from nemo.core.classes.common import Serialization -from nemo.core.classes.modelPT import ModelPT - - -class NerfModelBase(ModelPT, Serialization): - def __init__(self, cfg): - super().__init__(cfg=cfg) - self.save_hyperparameters() - self._cfg = cfg - - @staticmethod - def is_module_updatable(module): - return hasattr(module, 'update_step') and callable(module.update_step) - - def list_available_models(self): - pass - - def setup_training_data(self): - pass - - def setup_validation_data(self): - pass diff --git a/nemo/collections/multimodal/models/nerf/dreamfusion.py b/nemo/collections/multimodal/models/nerf/dreamfusion.py deleted file mode 100644 index 6b7784b002ff..000000000000 --- a/nemo/collections/multimodal/models/nerf/dreamfusion.py +++ /dev/null @@ -1,313 +0,0 @@ -import logging -import os -import random - -import cv2 -import imageio -import numpy as np -import torch - -from nemo.collections.multimodal.models.nerf.txt2nerf_base import Txt2NerfBase -from nemo.collections.multimodal.modules.nerf.loss.laplacian_smooth_loss import LaplacianSmoothLoss -from nemo.collections.multimodal.modules.nerf.loss.normal_consistency_loss import NormalConsistencyLoss -from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum -from nemo.core import optim - - -# TODO(ahmadki): split dmtet from dreamfusion -class DreamFusion(Txt2NerfBase): - def __init__(self, cfg): - super(DreamFusion, self).__init__(cfg) - - self.guidance_scale = cfg.guidance_scale - - self.iters = cfg.iters - self.latent_iter_ratio = cfg.latent_iter_ratio - self.albedo_iter_ratio = cfg.albedo_iter_ratio - self.min_ambient_ratio = cfg.min_ambient_ratio - self.textureless_ratio = cfg.textureless_ratio - - # Lambdas - self.lambda_sds = cfg.loss.lambda_sds - self.lambda_opacity = cfg.loss.lambda_opacity - self.lambda_entropy = cfg.loss.lambda_entropy - self.lambda_orientation = cfg.loss.lambda_orientation - self.lambda_2d_normal_smooth = cfg.loss.lambda_2d_normal_smooth - self.lambda_3d_normal_smooth = cfg.loss.lambda_3d_normal_smooth - self.lambda_mesh_normal = cfg.loss.lambda_mesh_normal - self.lambda_mesh_laplacian = cfg.loss.lambda_mesh_laplacian - - if self.lambda_mesh_normal > 0: - self.normal_consistency_loss_fn = NormalConsistencyLoss() - if self.lambda_mesh_laplacian > 0: - self.laplacian_smooth_loss_fn = LaplacianSmoothLoss() - - # Video - self.test_images = [] - self.test_depths = [] - - def training_step(self, batch, batch_idx): - # experiment iterations ratio - # i.e. what proportion of this experiment have we completed (in terms of iterations) so far? - exp_iter_ratio = self.global_step / self.iters - - # TODO(ahmadki): move to database - if exp_iter_ratio < self.latent_iter_ratio: - ambient_ratio = 1.0 - shading_type = ShadingEnum.NORMAL - as_latent = True - else: - if exp_iter_ratio <= self.albedo_iter_ratio: - ambient_ratio = 1.0 - shading_type = None - else: - # random shading - ambient_ratio = self.min_ambient_ratio + (1.0 - self.min_ambient_ratio) * random.random() - rand = random.random() - if rand >= (1.0 - self.textureless_ratio): - shading_type = ShadingEnum.TEXTURELESS - else: - shading_type = ShadingEnum.LAMBERTIAN - - as_latent = False - - return_normal_image = bool(self.lambda_2d_normal_smooth) - return_normal_perturb = bool(self.lambda_3d_normal_smooth) - return_vertices = bool(self.lambda_mesh_laplacian) - return_faces = bool(self.lambda_mesh_normal) or bool(self.lambda_mesh_laplacian) - return_faces_normals = bool(self.lambda_mesh_normal) - outputs = self( - rays_o=batch['rays_o'], # [B, H, W, 3] - rays_d=batch['rays_d'], # [B, H, W, 3] - mvp=batch['mvp'], # [B, 4, 4] - perturb=True, - ambient_ratio=ambient_ratio, - shading_type=shading_type, - binarize=False, - return_normal_image=return_normal_image, - return_normal_perturb=return_normal_perturb, - return_vertices=return_vertices, - return_faces=return_faces, - return_faces_normals=return_faces_normals, - ) - - if as_latent: - pred_rgb = ( - torch.cat([outputs['image'], outputs['opacity']], dim=-1).permute(0, 3, 1, 2).contiguous() - ) # [B, 4, H, W] - else: - pred_rgb = outputs['image'].permute(0, 3, 1, 2).contiguous() # [B, 3, H, W] - - # TODO(ahmadki): move into guidance - azimuth = batch['azimuth'] - text_z = [self.text_z['uncond']] * azimuth.shape[0] - for b in range(azimuth.shape[0]): - if azimuth[b] >= -90 and azimuth[b] < 90: - if azimuth[b] >= 0: - r = 1 - azimuth[b] / 90 - else: - r = 1 + azimuth[b] / 90 - start_z = self.text_z['front'] - end_z = self.text_z['side'] - else: - if azimuth[b] >= 0: - r = 1 - (azimuth[b] - 90) / 90 - else: - r = 1 + (azimuth[b] + 90) / 90 - start_z = self.text_z['side'] - end_z = self.text_z['back'] - pos_z = r * start_z + (1 - r) * end_z - text_z.append(pos_z) - text_z = torch.cat(text_z, dim=0) - - loss_dict = {} - - # SDS loss - guidance_loss = self.guidance.train_step( - text_z, pred_rgb, as_latent=as_latent, guidance_scale=self.guidance_scale - ) - loss_dict['lambda_sds'] = guidance_loss * self.lambda_sds - - # opacity loss - if self.lambda_opacity > 0 and 'opacity' in outputs: - loss_opacity = (outputs['opacity'] ** 2).mean() - loss_dict['loss_opacity'] = self.lambda_opacity * loss_opacity - - # entropy loss - if self.lambda_entropy > 0 and 'weights' in outputs: - alphas = outputs['weights'].clamp(1e-5, 1 - 1e-5) - loss_entropy = (-alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean() - lambda_entropy = self.lambda_entropy * min(1, 2 * self.global_step / self.iters) - loss_dict['loss_entropy'] = lambda_entropy * loss_entropy - - if self.lambda_2d_normal_smooth > 0 and 'normal_image' in outputs: - pred_normal = outputs['normal_image'] - loss_smooth = (pred_normal[:, 1:, :, :] - pred_normal[:, :-1, :, :]).square().mean() + ( - pred_normal[:, :, 1:, :] - pred_normal[:, :, :-1, :] - ).square().mean() - loss_dict['loss_smooth'] = self.lambda_2d_normal_smooth * loss_smooth - - # orientation loss - if self.lambda_orientation > 0 and all(key in outputs for key in ['weights', 'normals', 'dirs']): - loss_orientation = ( - outputs['weights'].detach() * (outputs['normals'] * outputs['dirs']).sum(-1).clamp(min=0) ** 2 - ) - loss_orientation = loss_orientation.mean() - loss_dict['loss_orientation'] = self.lambda_orientation * loss_orientation - - if self.lambda_3d_normal_smooth > 0 and all(key in outputs for key in ['normals', 'normal_perturb']): - loss_normal_perturb = (outputs['normal_perturb'] - outputs['normals']).abs().mean() - loss_dict['loss_normal_smooth'] = self.lambda_3d_normal_smooth * loss_normal_perturb - - if self.lambda_mesh_normal > 0 and all(key in outputs for key in ['face_normals', 'faces']): - normal_consistency_loss = self.normal_consistency_loss_fn( - face_normals=outputs['face_normals'], t_pos_idx=outputs['faces'] - ) - loss_dict['normal_consistency_loss'] = self.lambda_mesh_normal * normal_consistency_loss - - if self.lambda_mesh_laplacian > 0 and all(key in outputs for key in ['verts', 'faces']): - laplacian_loss = self.laplacian_smooth_loss_fn(verts=outputs['verts'], faces=outputs['faces']) - loss_dict['laplacian_loss'] = self.lambda_mesh_laplacian * laplacian_loss - - loss = sum(loss_dict.values()) - - self.log_dict(loss_dict, prog_bar=False, rank_zero_only=True) - self.log('loss', loss, prog_bar=True, rank_zero_only=True) - - # TODO(ahmadki): LearningRateMonitor - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True) - - self.log('global_step', self.global_step + 1, prog_bar=True, rank_zero_only=True) - - return loss - - def validation_step(self, batch, batch_idx): - # save image - images, depths = self._shared_predict(batch) - - save_path = os.path.join(self.trainer.log_dir, 'validation') - os.makedirs(save_path, exist_ok=True) - for i, (image, depth) in enumerate(zip(images, depths)): - # Save image - cv2.imwrite( - os.path.join( - save_path, - f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_rgb.png', - ), - cv2.cvtColor(image, cv2.COLOR_RGB2BGR), - ) - # Save depth - cv2.imwrite( - os.path.join( - save_path, - f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_depth.png', - ), - depth, - ) - - def test_step(self, batch, batch_idx): - # save image - images, depths = self._shared_predict(batch) - self.test_images.append(images) - self.test_depths.append(depths) - - def on_test_epoch_end(self): - save_path = os.path.join(self.trainer.log_dir, 'test') - os.makedirs(save_path, exist_ok=True) - - images = np.concatenate(self.test_images, axis=0) - imageio.mimwrite( - os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_rgb.mp4')), - images, - fps=25, - quality=8, - macro_block_size=1, - ) - - depths = np.concatenate(self.test_depths, axis=0) - imageio.mimwrite( - os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_depth.mp4')), - depths, - fps=25, - quality=8, - macro_block_size=1, - ) - - self.test_images.clear() - self.test_depths.clear() - - def predict_step(self, batch, batch_idx): - return self._shared_predict(self, batch) - - def forward( - self, - rays_o, - rays_d, - mvp, - perturb, - ambient_ratio, - shading_type, - binarize, - return_normal_image, - return_normal_perturb, - return_vertices, - return_faces, - return_faces_normals, - ): - outputs = self.renderer( - rays_o=rays_o, - rays_d=rays_d, - mvp=mvp, - perturb=perturb, - ambient_ratio=ambient_ratio, - shading_type=shading_type, - binarize=binarize, - return_normal_image=return_normal_image, - return_normal_perturb=return_normal_perturb, - return_vertices=return_vertices, - return_faces=return_faces, - return_faces_normals=return_faces_normals, - ) - return outputs - - def _shared_predict(self, data): - outputs = self( - rays_o=data['rays_o'], # [B, H, W, 3] - rays_d=data['rays_d'], # [B, H, W, 3] - mvp=data['mvp'], - perturb=False, - ambient_ratio=data['ambient_ratio'] if 'ambient_ratio' in data else 1.0, # TODO(ahmadki): move to dataset - shading_type=data['shading_type'] if 'shading_type' in data else None, # TODO(ahmadki): move to dataset - binarize=False, - return_normal_image=False, - return_normal_perturb=False, - return_vertices=False, - return_faces=False, - return_faces_normals=False, - ) - - images_np = outputs['image'].detach().cpu().numpy() - images_np = (images_np * 255).astype(np.uint8) - - depths_np = outputs['depth'].detach().cpu().numpy() - depths_np = (depths_np - depths_np.min()) / (np.ptp(depths_np) + 1e-6) - depths_np = (depths_np * 255).astype(np.uint8) - - return images_np, depths_np - - # TODO(ahmadki): rework - def setup_optimization(self): - cfg = self._cfg.optim - optimizer_args = dict(cfg) - optimizer_args.pop('name', None) - - optimizer = optim.get_optimizer(cfg.name) - - optimizer = optimizer(params=self.parameters(), **optimizer_args) - - self._optimizer = optimizer - - def configure_optimizers(self): - self.setup_optimization() - return self._optimizer diff --git a/nemo/collections/multimodal/models/nerf/txt2nerf_base.py b/nemo/collections/multimodal/models/nerf/txt2nerf_base.py deleted file mode 100644 index 19a393aa4774..000000000000 --- a/nemo/collections/multimodal/models/nerf/txt2nerf_base.py +++ /dev/null @@ -1,81 +0,0 @@ -import logging - -from nemo.collections.multimodal.models.nerf.base import NerfModelBase - - -class Txt2NerfBase(NerfModelBase): - def __init__(self, cfg): - super().__init__(cfg) - self.prompt = cfg.prompt - self.negative_prompt = cfg.negative_prompt - self.front_prompt = cfg.front_prompt - self.side_prompt = cfg.side_prompt - self.back_prompt = cfg.back_prompt - - self.nerf_cfg = cfg.nerf - self.renderer_cfg = cfg.renderer - self.guidance_cfg = cfg.guidance - - nerf = self.from_config_dict(cfg.nerf) - material = self.from_config_dict(cfg.material) - background = self.from_config_dict(cfg.background) - self.renderer = self.build_renderer(cfg.renderer, nerf, material, background) - self.guidance = None - - def build_renderer(self, cfg, nerf, material, background): - renderer = self.from_config_dict(cfg) - renderer.nerf = nerf - renderer.material = material - renderer.background = background - return renderer - - def build_guidance(self, cfg): - self.guidance = self.from_config_dict(cfg) - self.guidance.eval() - for p in self.guidance.parameters(): - p.requires_grad = False - - def prepare_embeddings(self): - # TODO(ahmadki): add top view ? - self.text_z = { - "default": self.guidance.get_text_embeds([self.prompt]), - "uncond": self.guidance.get_text_embeds([self.negative_prompt]), - "front": self.guidance.get_text_embeds([f"{self.prompt}{self.front_prompt}"]), - "side": self.guidance.get_text_embeds([f"{self.prompt}{self.side_prompt}"]), - "back": self.guidance.get_text_embeds([f"{self.prompt}{self.back_prompt}"]), - } - - def on_fit_start(self) -> None: - self.build_guidance(self.guidance_cfg) - self.prepare_embeddings() - - def on_train_batch_start(self, batch, batch_idx, unused=0): - if self.is_module_updatable(self.guidance): - self.guidance.update_step(epoch=self.current_epoch, global_step=self.global_step) - - if self.is_module_updatable(self.renderer.nerf): - self.renderer.nerf.update_step(epoch=self.current_epoch, global_step=self.global_step) - - if self.is_module_updatable(self.renderer.material): - self.renderer.material.update_step(epoch=self.current_epoch, global_step=self.global_step) - - if self.is_module_updatable(self.renderer.background): - self.renderer.background.update_step(epoch=self.current_epoch, global_step=self.global_step) - - if self.is_module_updatable(self.renderer): - self.renderer.update_step(epoch=self.current_epoch, global_step=self.global_step) - - dataset = self.trainer.train_dataloader.dataset - if self.is_module_updatable(dataset): - dataset.update_step(epoch=self.current_epoch, global_step=self.global_step) - - def mesh(self, resolution, batch_size=128, density_thresh=None): - return self.nerf.mesh(resolution=resolution, batch_size=batch_size, density_thresh=density_thresh) - - def on_save_checkpoint(self, checkpoint): - # remove guidance from checkpoint. - # We can still laod the model without guidance checkpoints because the module is not initalized - # at __init__ time. - keys_to_remove = [key for key in checkpoint['state_dict'].keys() if key.startswith('guidance.')] - for key in keys_to_remove: - del checkpoint['state_dict'][key] diff --git a/nemo/collections/multimodal/models/neva/__init__.py b/nemo/collections/multimodal/models/neva/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/neva/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py deleted file mode 100644 index d6b8e2336375..000000000000 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ /dev/null @@ -1,1076 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os -import random -import re -import tempfile -from functools import partial -from itertools import chain -from typing import Any, List, Optional, Union - -import numpy as np -import pandas as pd -import torch -from einops import rearrange, repeat -from omegaconf.dictconfig import DictConfig -from omegaconf.omegaconf import OmegaConf, open_dict -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.trainer.trainer import Trainer -from transformers import CLIPVisionModel - -from nemo.collections.multimodal.data.neva.neva_dataset import ( - DEFAULT_BOS_TOKEN, - DEFAULT_EOS_TOKEN, - DEFAULT_IM_END_TOKEN, - DEFAULT_IM_START_TOKEN, - DataCollatorForSupervisedDataset, - make_supervised_data_module, -) -from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer, MegatronCLIPModel -from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler -from nemo.collections.multimodal.parts.utils import extend_instance -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( - MegatronPretrainingRandomSampler, - MegatronPretrainingSampler, -) -from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( - build_train_valid_test_datasets as build_text_train_valid_test_datasets, -) -from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel -from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel -from nemo.collections.nlp.models.nlp_model import NLPModel -from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, MMLinearAdapterConfig -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.language_model import Embedding, get_language_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ( - ApexGuardDefaults, - average_losses_across_data_parallel_group, - get_all_params_for_weight_decay_optimization, - get_params_for_weight_decay_optimization, - init_method_normal, - parallel_lm_logits, - scaled_init_method_normal, -) -from nemo.collections.nlp.modules.common.text_generation_utils import ( - generate, - get_computeprob_response, - get_default_length_params, - get_default_sampling_params, - megatron_neva_generate, -) -from nemo.collections.nlp.modules.common.transformer.text_generation import ( - LengthParam, - OutputType, - SamplingParam, - TextGeneration, -) -from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin -from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone -from nemo.core import adapter_mixins -from nemo.core.classes.common import PretrainedModelInfo -from nemo.utils import AppState, logging, model_utils - -try: - import apex.transformer.pipeline_parallel.utils - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True - -except (ImportError, ModuleNotFoundError): - - HAVE_APEX = False - -try: - from megatron.core import dist_checkpointing, parallel_state - from megatron.core.models.gpt import GPTModel as MCoreGPTModel - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -try: - import transformer_engine - - HAVE_TE = True - -except (ImportError, ModuleNotFoundError): - HAVE_TE = False - - -class FrozenCLIPVisionTransformer(CLIPVisionTransformer): - def __init__(self, model_cfg, model_parallel_config, pre_process=True, post_process=True): - super().__init__( - model_cfg, model_parallel_config, pre_process=pre_process, post_process=post_process, skip_head=True, - ) - self.frozen = False - - def train(self, mode): - if self.frozen: - return self - - super().train(mode) - return self - - def forward(self, input): - assert self.training == False - hidden_states = self.backbone(input) - # Do not add header after backbone - return hidden_states - - def freeze(self) -> None: - for param in self.parameters(): - param.requires_grad = False - - self.eval() - self.frozen = True - - -class NevaWordEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): - def init_vision( - self, - vision_encoder, - media_start_id, - media_end_id, - vision_select_layer=-1, - class_token_length=1, - use_im_start_end=False, - llama_tricks=False, - ): - self.vision_encoder = vision_encoder - self.from_hf = isinstance(vision_encoder, CLIPVisionModel) - self.media_start_id = media_start_id - self.media_end_id = media_end_id - self.class_token_length = class_token_length - self.use_im_start_end = use_im_start_end - self.vision_select_layer = vision_select_layer - self.media = None - self.set_accepted_adapter_types([MMLinearAdapterConfig._target_]) - self.llama_tricks = llama_tricks - - def set_media(self, media): - self.media = media - - def forward(self, input_ids, **kwargs): - media = self.media # avoid change the signature of embedding forward function - if self.llama_tricks and not self.use_im_start_end: - masked_input_ids = input_ids.detach().clone() - if self.num_embeddings < 32000: - raise ValueError("Not supported tokenizer with llama 2!") - else: - masked_input_ids[masked_input_ids >= 32000] = 0 - words_embeddings = super().forward(masked_input_ids, **kwargs) - - else: - words_embeddings = super().forward(input_ids, **kwargs) - - return self.replace_media_embeddings(input_ids, words_embeddings, media) - - def encode_vision_x(self, vision_x: torch.Tensor): - """ - Compute media tokens from vision input by passing it through vision encoder and conditioning language model. - Args: - vision_x (torch.Tensor): Vision input - shape (B, T_img, F, C, H, W) - Images in the same chunk are collated along T_img, and frames are collated along F - Currently only F=1 is supported (single-frame videos) - - rearrange code based on https://github.com/dhansmair/flamingo-mini - """ - - assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" - b, T, F = vision_x.shape[:3] - assert F == 1, "Only single frame supported" - - vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") - with torch.no_grad(): - if self.from_hf: - vision_x = self.vision_encoder(vision_x, output_hidden_states=True) - vision_x = vision_x.hidden_states[self.vision_select_layer] - else: - self.vision_encoder.backbone.transformer.return_select_layer = self.vision_select_layer - vision_x = self.vision_encoder(vision_x) - vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) - vision_x = vision_x[:, :, :, self.class_token_length :] - assert self.is_adapter_available(), "Cannot find multimodal vision adapter!" - vision_connector = self.get_adapter_module(AdapterName.MM_LINEAR_ADAPTER) - vision_x = vision_connector(vision_x) - return vision_x - - def replace_media_embeddings(self, input_ids, inputs_embeds, media): - if media is None: - return inputs_embeds - - batch_size, sequence_length, hidden_size = inputs_embeds.shape - - # calculate media features without gradients - media_features = self.encode_vision_x(media) # b T F S(eq) H(idden) - num_images_per_sample = media_features.size(1) - num_patches = media_features.size(3) - # flatten patches - media_features = media_features.view(batch_size, -1, hidden_size) - - # create an indices matrix used in torch.scatter - padded_media_indices = torch.ones( - (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device - ) - padded_media_indices *= sequence_length - for idx, input_id in enumerate(input_ids): - media_end_positions = torch.where(input_id == self.media_end_id)[0] - if self.use_im_start_end: - # locate the first media token positions - padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches - assert ( - input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id - ).all() - else: - padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + 1 - assert (input_id[padded_media_indices[idx, : len(media_end_positions)]] == self.media_start_id).all() - - # use indices to create a span - padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( - num_patches, device=padded_media_indices.device - ).repeat(*padded_media_indices.shape, 1) - padded_media_indices = padded_media_indices.reshape(batch_size, -1) - padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) - - # concat placeholder - updated_input_embeds = torch.cat( - (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 - ) - updated_input_embeds = updated_input_embeds.type(media_features.dtype) - # scatter media_features - updated_input_embeds.scatter_(1, padded_media_indices, media_features) - - # chop off placeholder - updated_input_embeds = updated_input_embeds[:, :sequence_length] - - return updated_input_embeds - - -class MCoreNevaModel(MCoreGPTModel): - def __init__( - self, mm_cfg, media_start_id, media_end_id, **kwargs, - ): - super(MCoreNevaModel, self).__init__(**kwargs,) - - self.mm_cfg = mm_cfg - self.media_start_id = media_start_id - self.media_end_id = media_end_id - self.dist_ckpt = False - - if mm_cfg.llm.from_pretrained is not None: - logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") - self.load_llm_weights(mm_cfg.llm.from_pretrained) - - if mm_cfg.llm.freeze: - for param in chain( - self.embedding.parameters(), self.decoder.parameters(), self.output_layer.parameters(), - ): - param.requires_grad = False - self.embedding = self.embedding.eval() - self.decoder = self.decoder.eval() - self.output_layer = self.output_layer.eval() - - # Initialize vision encoder and freeze it - if mm_cfg.vision_encoder.from_hf: - vision_encoder = CLIPVisionModel.from_pretrained( - mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, - ).cuda() - vision_encoder = vision_encoder.to(torch.bfloat16) - if mm_cfg.vision_encoder.freeze: - for param in vision_encoder.parameters(): - param.requires_grad = False - vision_encoder = vision_encoder.eval() - else: - vision_cfg = MegatronCLIPModel.restore_from( - mm_cfg.vision_encoder.from_pretrained, return_config=True - ).vision - vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) - self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) - if mm_cfg.vision_encoder.freeze: - vision_encoder.freeze() - - model_type = self.mm_cfg.llm.get("model_type", "nvgpt") - # Monkey patch embedding - if kwargs.get("pre_process", True): - extend_instance(self.embedding.word_embeddings, NevaWordEmbeddingMixin) - self.embedding.word_embeddings.init_vision( - vision_encoder, - media_start_id, - media_end_id, - vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), - class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), - use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2"), - ) - - def forward( - self, *args, **kwargs, - ): - media = kwargs.pop('media', None) - self.embedding.word_embeddings.set_media(media) - return super().forward(*args, **kwargs) - - def _load_model_weights(self, nemo_path): - """ - Shared method to load model weights from a given nemo_path. - """ - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - app_state = AppState() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - if os.path.isfile(nemo_path): - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - else: - tmpdir = nemo_path - os.chdir(tmpdir) - if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: - model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( - tmpdir, save_restore_connector.model_weights_ckpt - ) - else: - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - - # distributed checkpointing - if state_dict is None: - self.dist_ckpt = True - sharded_state_dict = self.sharded_state_dict(prefix="model.") - checkpoint = dict(state_dict=sharded_state_dict) - tmp_model_weights_ckpt = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] - assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' - checkpoint = dist_checkpointing.load( - sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir, - ) - state_dict = checkpoint["state_dict"] - - finally: - os.chdir(cwd) - - return state_dict - - def load_vision_encoder_weights(self, vision_encoder, nemo_path): - state_dict = self._load_model_weights(nemo_path) - - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.vision_encoder."): - new_k = k.replace("model.vision_encoder.", "") - new_state_dict[new_k] = v - - missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) - print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") - - def load_llm_weights(self, nemo_path): - state_dict = self._load_model_weights(nemo_path) - - new_state_dict = {} - if self.dist_ckpt: - for k, v in state_dict.items(): - new_k = k - if k.startswith("model."): - new_k = k.replace("model.", "", 1) - new_state_dict[new_k] = v - self.load_state_dict(new_state_dict, strict=True) - else: - for k, v in state_dict.items(): - if k.startswith("model.language_model."): - new_k = k.replace("model.language_model.", "", 1) - module_key, param_key = new_k.split(".", 1) - if module_key not in new_state_dict: - new_state_dict[module_key] = {} - new_state_dict[module_key][param_key] = v - self.language_model.load_state_dict(new_state_dict, strict=True) - print(f"Restored LLM weights from {nemo_path}.") - - -class NevaModel(GPTModel): - def __init__( - self, mm_cfg, media_start_id, media_end_id, **kwargs, - ): - super(NevaModel, self).__init__(**kwargs,) - - self.mm_cfg = mm_cfg - self.media_start_id = media_start_id - self.media_end_id = media_end_id - - if mm_cfg.llm.from_pretrained is not None: - logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") - self.load_llm_weights(self.language_model, mm_cfg.llm.from_pretrained) - if mm_cfg.llm.freeze: - for param in self.language_model.parameters(): - param.requires_grad = False - self.language_model = self.language_model.eval() - - # Initialize vision encoder and freeze it - if mm_cfg.vision_encoder.from_hf: - vision_encoder = CLIPVisionModel.from_pretrained( - mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, - ).cuda() - vision_encoder = vision_encoder.to(torch.bfloat16) - if mm_cfg.vision_encoder.freeze: - for param in vision_encoder.parameters(): - param.requires_grad = False - vision_encoder = vision_encoder.eval() - else: - vision_cfg = MegatronCLIPModel.restore_from( - mm_cfg.vision_encoder.from_pretrained, return_config=True - ).vision - vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) - self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) - if mm_cfg.vision_encoder.freeze: - vision_encoder.freeze() - - model_type = self.mm_cfg.llm.get("model_type", "nvgpt") - # Monkey patch embedding - if kwargs.get("pre_process", True): - extend_instance(self.language_model.embedding.word_embeddings, NevaWordEmbeddingMixin) - self.language_model.embedding.word_embeddings.init_vision( - vision_encoder, - media_start_id, - media_end_id, - vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), - class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), - use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2"), - ) - - def forward( - self, *args, **kwargs, - ): - media = kwargs.pop('media', None) - self.language_model.embedding.word_embeddings.set_media(media) - return super().forward(*args, **kwargs) - - def _load_model_weights(self, nemo_path): - """ - Shared method to load model weights from a given nemo_path. - """ - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - app_state = AppState() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - if os.path.isfile(nemo_path): - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - else: - tmpdir = nemo_path - os.chdir(tmpdir) - if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: - model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( - tmpdir, save_restore_connector.model_weights_ckpt - ) - else: - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - return state_dict - - def load_vision_encoder_weights(self, vision_encoder, nemo_path): - state_dict = self._load_model_weights(nemo_path) - - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.vision_encoder."): - new_k = k.replace("model.vision_encoder.", "") - new_state_dict[new_k] = v - - missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) - print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") - - def load_llm_weights(self, language_model, nemo_path): - state_dict = self._load_model_weights(nemo_path) - - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.language_model."): - new_k = k.replace("model.language_model.", "", 1) - module_key, param_key = new_k.split(".", 1) - if module_key not in new_state_dict: - new_state_dict[module_key] = {} - new_state_dict[module_key][param_key] = v - - language_model.load_state_dict(new_state_dict, strict=True) - print(f"Restored LLM weights from {nemo_path}.") - - -class MegatronNevaModel(MultimodalAdapterModelMixin, MegatronGPTModel): - """ - Megatron Neva pretraining - """ - - def __init__(self, cfg: DictConfig, trainer: Trainer): - # MegatronGPTModel.__init__(self, cfg, trainer) - super().__init__(cfg, trainer) - self.init_neva_adapter() - - def init_neva_adapter(self): - self.base_keys = self._get_all_keys() - adapter_name = AdapterName.MM_LINEAR_ADAPTER - adapter_cfg = MMLinearAdapterConfig( - in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, out_features=self.cfg.hidden_size, bias=True, - ) - for name, module in self.named_modules(): - self._check_and_add_adapter( - name, module, adapter_name, adapter_cfg, autocast_dtype=self.autocast_dtype, - ) - self.adapter_keys = self._get_all_keys() - self.base_keys - - def model_provider_func(self, pre_process, post_process): - """Model depends on pipeline paralellism.""" - media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) - media_end_id = self.tokenizer.token_to_id(DEFAULT_IM_END_TOKEN) - - if self.mcore_gpt: - if parallel_state.is_unitialized(): - - def dummy(): - return - - if self.trainer.strategy.launcher is not None: - self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) - self.trainer.strategy.setup_environment() - - model = MCoreNevaModel( - mm_cfg=self.cfg.mm_cfg, - media_start_id=media_start_id, - media_end_id=media_end_id, - config=self.transformer_config, - vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), - max_sequence_length=self.cfg.get('encoder_seq_length', 512), - pre_process=pre_process, - post_process=post_process, - parallel_output=True, - share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), - position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), - rotary_percent=self.cfg.get('rotary_percentage', 1.0), - seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), - ) - else: - model = NevaModel( - mm_cfg=self.cfg.mm_cfg, - media_start_id=media_start_id, - media_end_id=media_end_id, - config=self.model_parallel_config, - vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), - hidden_size=self.cfg.hidden_size, - max_position_embeddings=self.cfg.max_position_embeddings, - num_layers=self.cfg.num_layers, - num_attention_heads=self.cfg.num_attention_heads, - apply_query_key_layer_scaling=self.cfg.get('apply_query_key_layer_scaling', True), - kv_channels=self.cfg.get('kv_channels', None), - ffn_hidden_size=self.cfg.ffn_hidden_size, - num_tokentypes=0, - parallel_output=True, - pre_process=pre_process, - post_process=post_process, - init_method_std=self.cfg.get('init_method_std', 0.02), - use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), - fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), - megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), - hidden_dropout=self.cfg.get('hidden_dropout', 0.1), - attention_dropout=self.cfg.get('attention_dropout', 0.1), - ffn_dropout=self.cfg.get('ffn_dropout', 0.0), - precision=self.cfg.get('precision', 16), - fp32_residual_connection=self.cfg.get('fp32_residual_connection', False), - activations_checkpoint_granularity=self.cfg.get('activations_checkpoint_granularity', None), - activations_checkpoint_method=self.cfg.get('activations_checkpoint_method', None), - activations_checkpoint_num_layers=self.cfg.get('activations_checkpoint_num_layers', 1), - activations_checkpoint_layers_per_pipeline=self.cfg.get( - 'activations_checkpoint_layers_per_pipeline', None - ), - normalization=self.cfg.get('normalization', 'layernorm'), - layernorm_epsilon=self.cfg.get('layernorm_epsilon', 1e-5), - onnx_safe=self.cfg.get('onnx_safe', False), - bias=self.cfg.get('bias', True), - bias_activation_fusion=self.cfg.get('bias_activation_fusion', True), - bias_dropout_add_fusion=self.cfg.get('bias_dropout_add_fusion', True), - activation=self.cfg.get('activation', 'gelu'), - headscale=self.cfg.get('headscale', False), - transformer_block_type=self.cfg.get('transformer_block_type', 'pre_ln'), - openai_gelu=self.cfg.get('openai_gelu', False), - normalize_attention_scores=self.cfg.get('normalize_attention_scores', True), - position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), - rotary_percentage=self.cfg.get('rotary_percentage', 1.0), - share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), - attention_type=self.cfg.get('attention_type', 'multihead'), - masked_softmax_fusion=self.cfg.get('masked_softmax_fusion', True), - persist_layer_norm=self.cfg.get('persist_layer_norm', False), - transformer_engine=self.cfg.get('transformer_engine', False), - fp8=self.cfg.get('fp8', False), - fp8_e4m3=self.cfg.get('fp8_e4m3', False), - fp8_hybrid=self.cfg.get('fp8_hybrid', False), - fp8_margin=self.cfg.get('fp8_margin', 0), - fp8_interval=self.cfg.get('fp8_interval', 1), - fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1), - fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), - reduce_amax=self.cfg.get('reduce_amax', True), - use_emha=self.cfg.get('use_emha', False), - ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), - use_flash_attention=self.cfg.get('use_flash_attention', False), - megatron_legacy=self.cfg.get('megatron_legacy', False), - seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), - ) - - logging.info( - f"Neva model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" - ) - - return model - - def setup_optimizer_param_groups(self): - """ModelPT override. Optimizer will get self._optimizer_param_groups""" - if self.cfg.mm_cfg.llm.freeze: - super().setup_optimizer_param_groups() - else: - MegatronGPTModel.setup_optimizer_param_groups(self) - - # filter out params doesn't have grad - for param_group in self._optimizer_param_groups: - params_with_grad = [param for param in param_group['params'] if param.requires_grad] - param_group['params'] = params_with_grad - - def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): - output_tensor = self.model(tokens, text_position_ids, attention_mask, labels, media) - return output_tensor - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only) - - def training_step(self, dataloader_iter, batch_idx): - """ - We pass the dataloader iterator function to the micro-batch scheduler. - The input batch to each micro-batch is fetched using the dataloader function - in the micro-batch fwd function. - """ - return MegatronGPTModel.training_step(self, dataloader_iter, batch_idx) - - def get_forward_output_and_loss_func(self, validation_step=False): - def loss_func(output_tensor, loss_mask): - loss_for_ub = self.loss_func(loss_mask, output_tensor) - if validation_step and not self.cfg.data.get('validation_drop_last', True): - raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Neva!") - else: - reduced_loss = average_losses_across_data_parallel_group([loss_for_ub]) - return loss_for_ub, dict(avg=reduced_loss[0].unsqueeze(0)) - - def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): - batch = next(dataloader_iter) - if parallel_state.get_pipeline_model_parallel_world_size() == 1: - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None - else: - batch[k] = batch[k].cuda(non_blocking=True) - else: - if parallel_state.is_pipeline_first_stage(): - # First pipeline stage needs tokens, position_ids, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = ( - batch[k].cuda(non_blocking=True) if k in ['tokens', 'position_ids', 'media'] else None - ) - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['tokens', 'position_ids', 'attention_mask', 'media'] - else None - ) - elif parallel_state.is_pipeline_last_stage(): - # Last pipeline stage needs the labels, loss_mask, and attention_mask - for k in batch.keys(): - if self.get_attention_mask_from_fusion: - batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None - else: - batch[k] = ( - batch[k].cuda(non_blocking=True) - if k in ['labels', 'loss_mask', 'attention_mask'] - else None - ) - else: - # Intermediate pipeline stage doesn't need any inputs - batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} - - forward_args = { - 'input_ids': batch['tokens'], - 'position_ids': batch['position_ids'], - 'attention_mask': batch['attention_mask'], - 'labels': batch['labels'], - 'media': batch.get('media', None), - } - if not self.mcore_gpt: - if self.use_loss_mask: - forward_args['loss_mask'] = batch['loss_mask'] - forward_args['checkpoint_activations_all_layers'] = checkpoint_activations_all_layers - - output_tensor = model(**forward_args) - - return output_tensor, partial(loss_func, loss_mask=batch['loss_mask']) - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(dataloader_iter, model): - batch = next(dataloader_iter) - extra_arg = {} - ( - tokens, - attention_mask, - position_ids, - media, - set_inference_key_value_memory, - inference_max_sequence_len, - ) = batch - tokens = tokens.cuda() - attention_mask = attention_mask.cuda() - position_ids = position_ids.cuda() - attention_mask = attention_mask[0:1] - if media is not None: - media = media.cuda() - labels = None - extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() - extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() - # TODO : Should I add labels ? - output_tensor = model(tokens, position_ids, attention_mask, labels, media, **extra_arg) - - def id_func(output_tensor): - return output_tensor, {'logits': output_tensor} - - return output_tensor, id_func - - return fwd_output_only_func - - def validation_step(self, dataloader_iter, batch_idx): - return MegatronGPTModel.validation_step(self, dataloader_iter, batch_idx) - - def on_validation_epoch_end(self): - if not self.validation_step_outputs: - return - - if parallel_state.is_pipeline_last_stage(): - # only the last pipeline parallel stages return loss with their batch size - if self.cfg.data.get('validation_drop_last', True): - averaged_loss = torch.stack(self.validation_step_outputs).mean() - else: - # Compute the avg loss by total_loss across all samples / total number of samples - # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) - # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] - # averaged_loss = avg_loss.type(torch.float32).cuda() - raise NotImplementedError("`validation_drop_last=False` is not supported!") - else: - averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() - - # we can only log on one rank if it is rank zero so we broadcast from last rank - torch.distributed.broadcast(averaged_loss, get_last_rank()) - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) - self.validation_step_outputs.clear() # free memory - - return averaged_loss - - def on_validation_epoch_start(self): - pass - - def test_step(self, batch, batch_idx): - return self.validation_step(batch, batch_idx) - - def test_epoch_end(self, outputs): - averaged_loss = average_losses_across_data_parallel_group(outputs) - logging.info(f'test_loss: {averaged_loss[0]}') - - def loss_func(self, loss_mask, output_tensor): - losses = output_tensor.float() - loss_mask = loss_mask.view(-1).float() - # TODO: add nemo version here - loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll - return loss - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( - self.model - ) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - rampup_batch_size = self.cfg.get('rampup_batch_size', None) - if rampup_batch_size: - start_batch_size = rampup_batch_size[0] - batch_size_increment = rampup_batch_size[1] - total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes - - assert start_batch_size % (total_gpus_number) == 0, ( - 'expected' - ' start batch size ({}) to be divisible by total number of GPUs' - ' ({})'.format(start_batch_size, total_gpus_number) - ) - - micro_batch_size = self.cfg.get('micro_batch_size', 1) - tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) - pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) - total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) - - assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( - 'expected' - ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' - ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) - ) - - if stage == 'predict': - return - else: - # TODO: consider adding a ModelPT guard to check if model is being restored. - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - if self.cfg.get('share_embeddings_and_output_weights', True): - module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - if self.cfg.get('share_embeddings_and_output_weights', True): - self.model.sync_initial_word_embeddings() - - if self.cfg.get('transformer_engine', False): - self.setup_transformer_engine_tp_groups() - - def build_train_valid_test_datasets(self): - logging.info('Building Neva datasets.') - ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg,) - self._train_ds = ds_dict["train_dataset"] - self._validation_ds = ds_dict["eval_dataset"] - - return self._train_ds, self._validation_ds - - def build_pretraining_data_loader( - self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False - ): - """Buld dataloader given an input dataset.""" - - logging.info(f'Building dataloader with consumed samples: {consumed_samples}') - # Megatron sampler - if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: - if self.cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self.cfg.micro_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - global_batch_size=self.cfg.global_batch_size, - pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, - ) - elif self.cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronPretrainingRandomSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self.cfg.micro_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=self.cfg.get('drop_last', True), - ) - else: - raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') - else: - raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') - - collate_func = DataCollatorForSupervisedDataset(self.cfg, self.tokenizer) - return torch.utils.data.DataLoader( - dataset, - batch_sampler=batch_sampler, - collate_fn=collate_func, - num_workers=self.cfg.data.num_workers, - pin_memory=True, - persistent_workers=True if self.cfg.data.num_workers > 0 else False, - ) - - @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: - """ - This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. - Returns: - List of available pre-trained models. - """ - return [] - - def setup_test_data(self, cfg): - pass - - def state_dict(self, destination=None, prefix='', keep_vars=False): - # Get the original state dictionary - original_state_dict = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) - keys_to_keep = list(self.adapter_keys) - # TODO(yuya): maybe not hard-code vision_encoder keys here - if self.megatron_amp_O2: - vision_encoder_keys = [ - k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" in k - ] - llm_keys = [k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" not in k] - else: - vision_encoder_keys = [k for k in self.base_keys if "vision_encoder" in k] - llm_keys = [k for k in self.base_keys if "vision_encoder" not in k] - if not self.cfg.mm_cfg.llm.freeze: - keys_to_keep += llm_keys - if not self.cfg.mm_cfg.vision_encoder.freeze: - keys_to_keep += vision_encoder_keys - return {k: original_state_dict[k] for k in keys_to_keep if k in original_state_dict} - - def load_state_dict(self, state_dict, strict=False): - logging.warning('Loading state dict for MegatronNevaModel...') - missing_keys, unexpected_keys = NLPModel.load_state_dict(self, state_dict, strict=False) - - if len(missing_keys) > 0: - logging.warning('Missing keys were detected during the load. Please double check.') - logging.warning(f'Missing keys: \n{missing_keys}') - if len(unexpected_keys) > 0: - logging.critical('Unexpected keys were detected during the load. Please double check.') - logging.critical(f'Unexpected keys: \n{unexpected_keys}') - - def on_load_checkpoint(self, checkpoint) -> None: - if self.mcore_gpt: - state_dict = checkpoint["state_dict"] - self.load_state_dict(state_dict) - - def sharded_state_dict(self, prefix: str = ''): - return None - # sharded_state_dict = MegatronGPTModel.sharded_state_dict(self, prefix) - # return sharded_state_dict - - def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: - inference_config = self.get_inference_config() - - if inference_config is None: - return None - else: - # need to overwrite some configuration, make it immutable - image = os.path.join(inference_config['images_base_path'], batch['image'][0]) - prompt = batch['prompt'][0] - inference_config = inference_config.copy() - compute_logprob = inference_config['compute_logprob'] - if compute_logprob: - inference_config['inputs'] = prompt - inference_config['tokens_to_generate'] = 1 - inference_config['all_probs'] = True - inference_config["add_BOS"] = False - inference_config['greedy'] = True - inference_config['image_list'] = image - response = generate(self, **inference_config) - compute_prob_response = get_computeprob_response(self.tokenizer, response, prompt) - return compute_prob_response - else: - inference_config['inputs'] = prompt - inference_config['image_list'] = image - return generate(self, **inference_config) - - def generate( - self, input_prompts, inference_config, length_params: LengthParam, sampling_params: SamplingParam = None, - ) -> OutputType: - - # check whether the DDP is initialized - if parallel_state.is_unitialized(): - - def dummy(): - return - - import os - - if self.trainer.strategy.launcher is not None: - self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) - self.trainer.strategy.setup_environment() - - # set the default sampling params if it is None. - # default do greedy sampling - if sampling_params is None: - sampling_params = get_default_sampling_params() - - # set the default length params if it is None. - # default do greedy sampling - if length_params is None: - length_params = get_default_length_params() - - import time - - start = time.time() - # Supports only one prompt at a time - result = megatron_neva_generate(self.cuda(), input_prompts, length_params, sampling_params, inference_config) - end = time.time() - # print(f'Time taken {end - start}') - - return result diff --git a/nemo/collections/multimodal/models/neva/neva_peft_models.py b/nemo/collections/multimodal/models/neva/neva_peft_models.py deleted file mode 100644 index ac03b5983430..000000000000 --- a/nemo/collections/multimodal/models/neva/neva_peft_models.py +++ /dev/null @@ -1,60 +0,0 @@ -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel -from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( - AdapterName, - InfusedAdapterConfig, - LoraKQVAdapterConfig, - MLPInfusedAdapterConfig, - ParallelLinearAdapterConfig, - PromptEncoderAdapterConfig, -) -from nemo.core.classes.mixins import adapter_mixins -from nemo.utils import logging, model_utils - - -class MegatronNevaLoRAModel(MegatronNevaModel): - """ - MegatronNevaLoRAModel is a model that combines a base model (MegatronNevaModel) with a low-rank adapters. - The lora adapters will be added in `nemo/collections/nlp/modules/common/megatron/attention.py` - The implementation is based on Hu et al. nemo/collections/nlp/modules/common/megatron/attention.py - - A single low-rank feedfowrad layer is used in parallel with the KQV projection layer. - TODO: Add support to also include an option to adda low-rank adapter in the output projection layer. - """ - - def __init__( - self, cfg: DictConfig, trainer: Trainer, - ): - self.peft_name_keys = [ - AdapterName.LORA_KQV_ADAPTER, - ] - lora_cfg = cfg.peft.lora_tuning - if cfg.get("kv_channels", None) is None: - assert ( - cfg.hidden_size % cfg.num_attention_heads == 0 - ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' - kv_channels = cfg.hidden_size // cfg.num_attention_heads - else: - kv_channels = cfg.kv_channels - projection_size = kv_channels * cfg.num_attention_heads - - adapter_cfg = LoraKQVAdapterConfig( - in_features=cfg.hidden_size, - out_features=3 * projection_size, - dim=lora_cfg.adapter_dim, - norm_position="none", - norm_type="none", - activation="identity", - column_init_method=lora_cfg.get("column_init_method", "normal"), - row_init_method=lora_cfg.get("row_init_method", "zero"), - gather_output=False, - dropout=lora_cfg.adapter_dropout, - ) - - self.name_key_to_cfg = {} - for k in self.peft_name_keys: - self.name_key_to_cfg[k] = adapter_cfg - - super().__init__(cfg, trainer) diff --git a/nemo/collections/multimodal/models/stable_diffusion/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py deleted file mode 100644 index c3ca34b35233..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import re -from abc import ABC, abstractclassmethod -from typing import Any, Optional - -import torch - -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.core.classes import ModelPT -from nemo.utils import logging - - -class DiffusionModel(ModelPT, ABC): - @abstractclassmethod - def get_conditioning(self, c: Any) -> Any: - """ - Encode conditioning c. - For txt2img use-case, the input conditioning would be the plain text, - and output would be the encoded embedding for the corresponding text; - For img2img use-case, the input conditioning would be the raw image, - and output would be the corresponding image embedding - - Args: - c: conditioning - - Returns: - encoded conditioning - """ - pass - - @abstractclassmethod - def apply_model(self, x_t: torch.Tensor, t: torch.Tensor, c: Optional[torch.Tensor]) -> torch.Tensor: - """ - Apply Diffusion model. - If c is not given, the model acts as an unconditional diffusion model. - For diffusion model that applies on the pixel space, x_t should be in the pixel space; - for diffusion model that applies on the latent space, x_t is in latent space. - - Args: - x_t: noisy input x at timestamp t - t: timestamp - c: conditioning - - Returns: - Predicted result that has the same shape as x_t - """ - - def on_train_start(self) -> None: - super().on_train_start() - self.init_global_step = self.trainer.global_step - - def _extract_consumed_samples_from_ckpt(self, ckpt_path): - try: - init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) - except (ValueError, TypeError, IndexError): - logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") - init_consumed_samples = 0 - - return init_consumed_samples - - def compute_consumed_samples(self, steps_since_resume=0): - consumed_samples = ( - self.init_consumed_samples - + steps_since_resume - * self.trainer.world_size - * self.cfg.micro_batch_size - * self.trainer.accumulate_grad_batches - ) - return int(consumed_samples) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py deleted file mode 100644 index 6f2dd37424d0..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py +++ /dev/null @@ -1,608 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from contextlib import contextmanager - -import pytorch_lightning as pl -import torch -import torch.nn.functional as F -from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer - -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.model import Decoder, Encoder -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( - DiagonalGaussianDistribution, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config - - -class VQModel(pl.LightningModule): - def __init__( - self, - ddconfig, - lossconfig, - n_embed, - embed_dim, - ckpt_path=None, - ignore_keys=[], - image_key="image", - colorize_nlabels=None, - monitor=None, - batch_resize_range=None, - scheduler_config=None, - lr_g_factor=1.0, - remap=None, - sane_index_shape=False, # tell vector quantizer to return indices as bhw - ): - super().__init__() - self.embed_dim = embed_dim - self.n_embed = n_embed - self.image_key = image_key - self.encoder = Encoder(**ddconfig) - self.decoder = Decoder(**ddconfig) - self.loss = instantiate_from_config(lossconfig) - self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, remap=remap, sane_index_shape=sane_index_shape) - self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) - self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) - if colorize_nlabels is not None: - assert type(colorize_nlabels) == int - self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) - if monitor is not None: - self.monitor = monitor - self.batch_resize_range = batch_resize_range - if self.batch_resize_range is not None: - print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.") - - if ckpt_path is not None: - self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) - self.scheduler_config = scheduler_config - self.lr_g_factor = lr_g_factor - - @contextmanager - def ema_scope(self, context=None): - if self.use_ema: - self.model_ema.store(self.parameters()) - self.model_ema.copy_to(self) - if context is not None: - print(f"{context}: Switched to EMA weights") - try: - yield None - finally: - if self.use_ema: - self.model_ema.restore(self.parameters()) - if context is not None: - print(f"{context}: Restored training weights") - - def init_from_ckpt(self, path, ignore_keys=list()): - sd = torch.load(path, map_location="cpu")["state_dict"] - keys = list(sd.keys()) - for k in keys: - for ik in ignore_keys: - if k.startswith(ik): - print("Deleting key {} from state_dict.".format(k)) - del sd[k] - missing, unexpected = self.load_state_dict(sd, strict=False) - print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - print(f"Unexpected Keys: {unexpected}") - - def on_train_batch_end(self, *args, **kwargs): - if self.use_ema: - self.model_ema(self) - - def encode(self, x): - h = self.encoder(x) - h = self.quant_conv(h) - quant, emb_loss, info = self.quantize(h) - return quant, emb_loss, info - - def encode_to_prequant(self, x): - h = self.encoder(x) - h = self.quant_conv(h) - return h - - def decode(self, quant): - quant = self.post_quant_conv(quant) - dec = self.decoder(quant) - return dec - - def decode_code(self, code_b): - quant_b = self.quantize.embed_code(code_b) - dec = self.decode(quant_b) - return dec - - def forward(self, input, return_pred_indices=False): - quant, diff, (_, _, ind) = self.encode(input) - dec = self.decode(quant) - if return_pred_indices: - return dec, diff, ind - return dec, diff - - def get_input(self, batch, k): - x = batch[k] - if len(x.shape) == 3: - x = x[..., None] - x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() - if self.batch_resize_range is not None: - lower_size = self.batch_resize_range[0] - upper_size = self.batch_resize_range[1] - if self.global_step <= 4: - # do the first few batches with max size to avoid later oom - new_resize = upper_size - else: - new_resize = np.random.choice(np.arange(lower_size, upper_size + 16, 16)) - if new_resize != x.shape[2]: - x = F.interpolate(x, size=new_resize, mode="bicubic") - x = x.detach() - return x - - def training_step(self, batch, batch_idx, optimizer_idx): - # https://github.com/pytorch/pytorch/issues/37142 - # try not to fool the heuristics - x = self.get_input(batch, self.image_key) - xrec, qloss, ind = self(x, return_pred_indices=True) - - if optimizer_idx == 0: - # autoencode - aeloss, log_dict_ae = self.loss( - qloss, - x, - xrec, - optimizer_idx, - self.global_step, - last_layer=self.get_last_layer(), - split="train", - predicted_indices=ind, - ) - - self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True) - return aeloss - - if optimizer_idx == 1: - # discriminator - discloss, log_dict_disc = self.loss( - qloss, x, xrec, optimizer_idx, self.global_step, last_layer=self.get_last_layer(), split="train" - ) - self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True) - return discloss - - def validation_step(self, batch, batch_idx): - log_dict = self._validation_step(batch, batch_idx) - with self.ema_scope(): - log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema") - return log_dict - - def _validation_step(self, batch, batch_idx, suffix=""): - x = self.get_input(batch, self.image_key) - xrec, qloss, ind = self(x, return_pred_indices=True) - aeloss, log_dict_ae = self.loss( - qloss, - x, - xrec, - 0, - self.global_step, - last_layer=self.get_last_layer(), - split="val" + suffix, - predicted_indices=ind, - ) - - discloss, log_dict_disc = self.loss( - qloss, - x, - xrec, - 1, - self.global_step, - last_layer=self.get_last_layer(), - split="val" + suffix, - predicted_indices=ind, - ) - rec_loss = log_dict_ae[f"val{suffix}/rec_loss"] - self.log( - f"val{suffix}/rec_loss", rec_loss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True - ) - self.log( - f"val{suffix}/aeloss", aeloss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True - ) - if version.parse(pl.__version__) >= version.parse('1.4.0'): - del log_dict_ae[f"val{suffix}/rec_loss"] - self.log_dict(log_dict_ae) - self.log_dict(log_dict_disc) - return self.log_dict - - def configure_optimizers(self): - lr_d = self.learning_rate - lr_g = self.lr_g_factor * self.learning_rate - print("lr_d", lr_d) - print("lr_g", lr_g) - opt_ae = torch.optim.Adam( - list(self.encoder.parameters()) - + list(self.decoder.parameters()) - + list(self.quantize.parameters()) - + list(self.quant_conv.parameters()) - + list(self.post_quant_conv.parameters()), - lr=lr_g, - betas=(0.5, 0.9), - ) - opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr_d, betas=(0.5, 0.9)) - - if self.scheduler_config is not None: - scheduler = instantiate_from_config(self.scheduler_config) - - print("Setting up LambdaLR scheduler...") - scheduler = [ - {'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, - {'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, - ] - return [opt_ae, opt_disc], scheduler - return [opt_ae, opt_disc], [] - - def get_last_layer(self): - return self.decoder.conv_out.weight - - def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): - log = dict() - x = self.get_input(batch, self.image_key) - x = x.to(self.device) - if only_inputs: - log["inputs"] = x - return log - xrec, _ = self(x) - if x.shape[1] > 3: - # colorize with random projection - assert xrec.shape[1] > 3 - x = self.to_rgb(x) - xrec = self.to_rgb(xrec) - log["inputs"] = x - log["reconstructions"] = xrec - if plot_ema: - with self.ema_scope(): - xrec_ema, _ = self(x) - if x.shape[1] > 3: - xrec_ema = self.to_rgb(xrec_ema) - log["reconstructions_ema"] = xrec_ema - return log - - def to_rgb(self, x): - assert self.image_key == "segmentation" - if not hasattr(self, "colorize"): - self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) - x = F.conv2d(x, weight=self.colorize) - x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 - return x - - -class VQModelInterface(VQModel): - def __init__(self, embed_dim, *args, **kwargs): - super().__init__(embed_dim=embed_dim, *args, **kwargs) - self.embed_dim = embed_dim - - def encode(self, x): - h = self.encoder(x) - h = self.quant_conv(h) - return h - - def decode(self, h, force_not_quantize=False): - # also go through quantization layer - if not force_not_quantize: - quant, emb_loss, info = self.quantize(h) - else: - quant = h - quant = self.post_quant_conv(quant) - dec = self.decoder(quant) - return dec - - -class AutoencoderKL(pl.LightningModule): - def __init__( - self, - ddconfig, - embed_dim, - lossconfig=None, # TODO make it configurable - ckpt_path=None, - ignore_keys=[], - image_key="image", - colorize_nlabels=None, - monitor=None, - from_pretrained: str = None, - capture_cudagraph_iters=-1, - ): - super().__init__() - self.image_key = image_key - self.encoder = Encoder(**ddconfig) - self.decoder = Decoder(**ddconfig) - self.loss = torch.nn.Identity() # instantiate_from_config(lossconfig) - assert ddconfig["double_z"] - self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1) - self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) - self.embed_dim = embed_dim - if colorize_nlabels is not None: - assert type(colorize_nlabels) == int - self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) - if monitor is not None: - self.monitor = monitor - if ckpt_path is not None: - self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) - - if from_pretrained is not None: - state_dict = torch.load(from_pretrained) - self._load_pretrained_model(state_dict) - - # CUDA graph captured sub-modules - self.capture_cudagraph_iters = capture_cudagraph_iters - self.stream = torch.cuda.Stream() - self.encoder_iterations = self.decoder_iterations = 0 - self.encoder_graph = torch.cuda.CUDAGraph() # eval - self.decoder_graph = torch.cuda.CUDAGraph() # eval - self.graphed_encoder = self.graphed_decoder = None # train - self.static_x = self.static_moments = None - self.static_z = self.static_dec = None - - def _state_key_mapping(self, state_dict: dict): - import re - - res_dict = {} - key_list = state_dict.keys() - key_str = " ".join(key_list) - up_block_pattern = re.compile('upsamplers') - p1 = re.compile('mid.block_[0-9]') - p2 = re.compile('decoder.up.[0-9]') - up_blocks_count = int(len(re.findall(up_block_pattern, key_str)) / 2 + 1) - for key_, val_ in state_dict.items(): - key_ = ( - key_.replace("up_blocks", "up") - .replace("down_blocks", "down") - .replace('resnets', 'block') - .replace('mid_block', 'mid') - .replace("mid.block.", "mid.block_") - .replace('mid.attentions.0.key', 'mid.attn_1.k') - .replace('mid.attentions.0.query', 'mid.attn_1.q') - .replace('mid.attentions.0.value', 'mid.attn_1.v') - .replace('mid.attentions.0.group_norm', 'mid.attn_1.norm') - .replace('mid.attentions.0.proj_attn', 'mid.attn_1.proj_out') - .replace('upsamplers.0', 'upsample') - .replace('downsamplers.0', 'downsample') - .replace('conv_shortcut', 'nin_shortcut') - .replace('conv_norm_out', 'norm_out') - ) - - mid_list = re.findall(p1, key_) - if len(mid_list) != 0: - mid_str = mid_list[0] - mid_id = int(mid_str[-1]) + 1 - key_ = key_.replace(mid_str, mid_str[:-1] + str(mid_id)) - - up_list = re.findall(p2, key_) - if len(up_list) != 0: - up_str = up_list[0] - up_id = up_blocks_count - 1 - int(up_str[-1]) - key_ = key_.replace(up_str, up_str[:-1] + str(up_id)) - res_dict[key_] = val_ - return res_dict - - def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False): - state_dict = self._state_key_mapping(state_dict) - model_state_dict = self.state_dict() - loaded_keys = [k for k in state_dict.keys()] - expected_keys = list(model_state_dict.keys()) - original_loaded_keys = loaded_keys - missing_keys = list(set(expected_keys) - set(loaded_keys)) - unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - def _find_mismatched_keys( - state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, - ): - mismatched_keys = [] - if ignore_mismatched_sizes: - for checkpoint_key in loaded_keys: - model_key = checkpoint_key - - if ( - model_key in model_state_dict - and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape - ): - mismatched_keys.append( - (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) - ) - del state_dict[checkpoint_key] - return mismatched_keys - - if state_dict['encoder.mid.attn_1.q.weight'].shape == torch.Size([512, 512]): - for key in [ - 'encoder.mid.attn_1.q.weight', - 'decoder.mid.attn_1.q.weight', - 'encoder.mid.attn_1.v.weight', - 'decoder.mid.attn_1.v.weight', - 'encoder.mid.attn_1.k.weight', - 'decoder.mid.attn_1.k.weight', - 'encoder.mid.attn_1.proj_out.weight', - 'decoder.mid.attn_1.proj_out.weight', - ]: - state_dict[key] = state_dict[key].unsqueeze(2).unsqueeze(3) - - if state_dict is not None: - # Whole checkpoint - mismatched_keys = _find_mismatched_keys( - state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, - ) - error_msgs = self._load_state_dict_into_model(state_dict) - return missing_keys, unexpected_keys, mismatched_keys, error_msgs - - def _load_state_dict_into_model(self, state_dict): - # Convert old format to new format if needed from a PyTorch state_dict - # copy state_dict so _load_from_state_dict can modify it - state_dict = state_dict.copy() - error_msgs = [] - - # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants - # so we need to apply the function recursively. - def load(module: torch.nn.Module, prefix=""): - args = (state_dict, prefix, {}, True, [], [], error_msgs) - module._load_from_state_dict(*args) - - for name, child in module._modules.items(): - if child is not None: - load(child, prefix + name + ".") - - load(self) - - return error_msgs - - def init_from_ckpt(self, path, ignore_keys=list()): - sd = torch.load(path, map_location="cpu")["state_dict"] - keys = list(sd.keys()) - for k in keys: - for ik in ignore_keys: - if k.startswith(ik): - print("Deleting key {} from state_dict.".format(k)) - del sd[k] - self.load_state_dict(sd, strict=False) - print(f"Restored from {path}") - - def encode(self, x): - h = self.encoder(x) - moments = self.quant_conv(h) - posterior = DiagonalGaussianDistribution(moments) - return posterior - - def decode(self, z): - z = self.post_quant_conv(z) - dec = self.decoder(z) - return dec - - def forward(self, input, sample_posterior=True): - posterior = self.encode(input) - if sample_posterior: - z = posterior.sample() - else: - z = posterior.mode() - dec = self.decode(z) - return dec, posterior - - def get_input(self, batch, k): - x = batch[k] - if len(x.shape) == 3: - x = x[..., None] - x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() - return x - - def training_step(self, batch, batch_idx, optimizer_idx): - inputs = self.get_input(batch, self.image_key) - reconstructions, posterior = self(inputs) - - if optimizer_idx == 0: - # train encoder+decoder+logvar - aeloss, log_dict_ae = self.loss( - inputs, - reconstructions, - posterior, - optimizer_idx, - self.global_step, - last_layer=self.get_last_layer(), - split="train", - ) - self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) - self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False) - return aeloss - - if optimizer_idx == 1: - # train the discriminator - discloss, log_dict_disc = self.loss( - inputs, - reconstructions, - posterior, - optimizer_idx, - self.global_step, - last_layer=self.get_last_layer(), - split="train", - ) - - self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) - self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False) - return discloss - - def validation_step(self, batch, batch_idx): - inputs = self.get_input(batch, self.image_key) - reconstructions, posterior = self(inputs) - aeloss, log_dict_ae = self.loss( - inputs, reconstructions, posterior, 0, self.global_step, last_layer=self.get_last_layer(), split="val" - ) - - discloss, log_dict_disc = self.loss( - inputs, reconstructions, posterior, 1, self.global_step, last_layer=self.get_last_layer(), split="val" - ) - - self.log("val/rec_loss", log_dict_ae["val/rec_loss"]) - self.log_dict(log_dict_ae) - self.log_dict(log_dict_disc) - return self.log_dict - - def configure_optimizers(self): - lr = self.learning_rate - opt_ae = torch.optim.Adam( - list(self.encoder.parameters()) - + list(self.decoder.parameters()) - + list(self.quant_conv.parameters()) - + list(self.post_quant_conv.parameters()), - lr=lr, - betas=(0.5, 0.9), - ) - opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr, betas=(0.5, 0.9)) - return [opt_ae, opt_disc], [] - - def get_last_layer(self): - return self.decoder.conv_out.weight - - @torch.no_grad() - def log_images(self, batch, only_inputs=False, **kwargs): - log = dict() - x = self.get_input(batch, self.image_key) - x = x.to(self.device) - if not only_inputs: - xrec, posterior = self(x) - if x.shape[1] > 3: - # colorize with random projection - assert xrec.shape[1] > 3 - x = self.to_rgb(x) - xrec = self.to_rgb(xrec) - log["samples"] = self.decode(torch.randn_like(posterior.sample())) - log["reconstructions"] = xrec - log["inputs"] = x - return log - - def to_rgb(self, x): - assert self.image_key == "segmentation" - if not hasattr(self, "colorize"): - self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) - x = F.conv2d(x, weight=self.colorize) - x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 - return x - - -class IdentityFirstStage(torch.nn.Module): - def __init__(self, *args, vq_interface=False, **kwargs): - self.vq_interface = vq_interface # TODO: Should be true by default but check to not break older stuff - super().__init__() - - def encode(self, x, *args, **kwargs): - return x - - def decode(self, x, *args, **kwargs): - return x - - def quantize(self, x, *args, **kwargs): - if self.vq_interface: - return x, None, [None, None, None] - return x - - def forward(self, x, *args, **kwargs): - return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py deleted file mode 100644 index c88c7f932339..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py +++ /dev/null @@ -1,2170 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import itertools -from contextlib import contextmanager -from functools import partial -from typing import Any, Dict, Optional, Union - -import numpy as np -import pytorch_lightning as pl -import torch -import torch.nn as nn -from einops import rearrange, repeat -from lightning_fabric.utilities.cloud_io import _load as pl_load -from omegaconf import DictConfig, OmegaConf, open_dict -from pytorch_lightning import Trainer -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.core.saving import _load_state as ptl_load_state -from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml -from pytorch_lightning.utilities.migration import pl_legacy_patch -from pytorch_lightning.utilities.rank_zero import rank_zero_only -from torch._dynamo import optimize -from torch._inductor import config as inductor_config -from torch.optim.lr_scheduler import LambdaLR -from torchvision.utils import make_grid -from tqdm import tqdm - -from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import ( - build_train_valid_datasets, - build_train_valid_precached_datasets, -) -from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel -from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import ( - AutoencoderKL, - IdentityFirstStage, - VQModelInterface, -) -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - extract_into_tensor, - make_beta_schedule, - noise_like, -) -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( - DiagonalGaussianDistribution, - normal_kl, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import ( - count_params, - default, - exists, - isimage, - ismap, - log_txt_as_img, - mean_flat, -) -from nemo.collections.multimodal.parts.utils import randn_like -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.core.classes.common import Serialization -from nemo.utils import logging - -try: - from apex import amp - from apex.transformer.enums import AttnMaskType - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -__conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} - - -def random_dropout(embeddings, drop_rate): - r""" - Function to perform random dropout for embeddings. - When we drop embeddings, we zero them out. - Args: - embeddings (tensor): Input embeddings - drop_rate (float): Rate of dropping the embedding. - """ - nsamples = embeddings.shape[0] - zero_flag = torch.ones(nsamples, 1, 1, device=torch.cuda.current_device()).to(embeddings.dtype) * (1 - drop_rate) - zero_flag = torch.bernoulli(zero_flag).cuda(non_blocking=True) - embeddings = embeddings * zero_flag - return embeddings - - -def disabled_train(self, mode=True): - """Overwrite model.train with this function to make sure train/eval mode - does not change anymore.""" - return self - - -def uniform_on_device(r1, r2, shape, device): - return (r1 - r2) * torch.rand(*shape, device=device) + r2 - - -class DDPM(torch.nn.Module): - def __init__(self, cfg): - super().__init__() - assert cfg.parameterization in ["eps", "x0", "v"], 'currently only supporting "eps" and "x0" and "v"' - self.parameterization = cfg.parameterization - logging.info(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") - self.cond_stage_model = None - self.clip_denoised = cfg.clip_denoised - self.log_every_t = cfg.log_every_t - self.first_stage_key = cfg.first_stage_key - self.image_size = cfg.image_size # try conv? - self.channels = cfg.channels - self.channels_last = cfg.get("channels_last", False) - self.use_positional_encodings = cfg.use_positional_encodings - self.model = DiffusionWrapper( - cfg.unet_config, - cfg.conditioning_key, - cfg.inductor, - cfg.inductor_cudagraphs, - cfg.get("capture_cudagraph_iters", -1), - ) - self.model_type = None - count_params(self.model, verbose=True) - - self.v_posterior = cfg.v_posterior - self.original_elbo_weight = cfg.original_elbo_weight - self.l_simple_weight = cfg.l_simple_weight - - self.register_schedule( - given_betas=cfg.given_betas, - beta_schedule=cfg.beta_schedule, - timesteps=cfg.timesteps, - linear_start=cfg.linear_start, - linear_end=cfg.linear_end, - cosine_s=cfg.cosine_s, - ) - - self.loss_type = cfg.loss_type - - self.learn_logvar = cfg.learn_logvar - self.logvar = torch.full(fill_value=cfg.logvar_init, size=(self.num_timesteps,)) - if self.learn_logvar: - self.logvar = nn.Parameter(self.logvar, requires_grad=True) - - self.rng = torch.Generator(device=torch.cuda.current_device(),) - - def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, - ): - if exists(given_betas): - betas = given_betas - else: - betas = make_beta_schedule( - beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s - ) - alphas = 1.0 - betas - alphas_cumprod = np.cumprod(alphas, axis=0) - alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) - - (timesteps,) = betas.shape - self.num_timesteps = int(timesteps) - self.linear_start = linear_start - self.linear_end = linear_end - assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' - - to_torch = partial(torch.tensor, dtype=torch.float32) - - self.register_buffer('betas', to_torch(betas)) - self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) - self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) - - # calculations for diffusion q(x_t | x_{t-1}) and others - self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) - self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) - self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) - self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) - - # calculations for posterior q(x_{t-1} | x_t, x_0) - posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( - 1.0 - alphas_cumprod - ) + self.v_posterior * betas - # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) - self.register_buffer('posterior_variance', to_torch(posterior_variance)) - # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain - self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) - self.register_buffer( - 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) - ) - self.register_buffer( - 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) - ) - - if self.parameterization == "eps": - lvlb_weights = self.betas ** 2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) - ) - elif self.parameterization == "x0": - lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) - elif self.parameterization == "v": - lvlb_weights = torch.ones_like( - self.betas ** 2 / (2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) - ) - else: - raise NotImplementedError("mu not supported") - # TODO how to choose this term - lvlb_weights[0] = lvlb_weights[1] - self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) - assert not torch.isnan(self.lvlb_weights).all() - - def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): - pl_sd = torch.load(path, map_location="cpu") - if "state_dict" in list(pl_sd.keys()): - pl_sd = pl_sd["state_dict"] - - sd = {} - first_key = list(pl_sd.keys())[0] - # State keys of model trained with TorchDynamo changed from - # "model.xxx" to "model._orig_mod.xxx" - for k, v in pl_sd.items(): - new_k = k.replace("._orig_mod", "") - # compatibility for stable diffusion old checkpoint - # remove megatron wrapper prefix - if first_key == "model.betas": - new_k = new_k.lstrip("model.") - sd[new_k] = v - - keys = list(sd.keys()) - for k in keys: - for ik in ignore_keys: - if k.startswith(ik): - logging.info("Deleting key {} from state_dict.".format(k)) - del sd[k] - missing, unexpected = ( - self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) - ) - logging.info(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - logging.info(f"Missing Keys: {missing}") - if len(unexpected) > 0: - logging.info(f"Unexpected Keys: {unexpected}") - - def q_mean_variance(self, x_start, t): - """ - Get the distribution q(x_t | x_0). - :param x_start: the [N x C x ...] tensor of noiseless inputs. - :param t: the number of diffusion steps (minus 1). Here, 0 means one step. - :return: A tuple (mean, variance, log_variance), all of x_start's shape. - """ - mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) - log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) - return mean, variance, log_variance - - def predict_start_from_noise(self, x_t, t, noise): - return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise - ) - - def predict_start_from_z_and_v(self, x_t, t, v): - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v - ) - - def predict_eps_from_z_and_v(self, x_t, t, v): - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t - ) - - def q_posterior(self, x_start, x_t, t): - posterior_mean = ( - extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start - + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t - ) - posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) - posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) - return posterior_mean, posterior_variance, posterior_log_variance_clipped - - def p_mean_variance(self, x, t, clip_denoised: bool): - model_out = self.model(x, t) - if self.parameterization == "eps": - x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) - elif self.parameterization == "x0": - x_recon = model_out - if clip_denoised: - x_recon.clamp_(-1.0, 1.0) - - model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) - return model_mean, posterior_variance, posterior_log_variance - - @torch.no_grad() - def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): - b, *_, device = *x.shape, x.device - model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) - noise = noise_like(x.shape, device, repeat_noise) - # no noise when t == 0 - nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise - - @torch.no_grad() - def p_sample_loop(self, shape, return_intermediates=False): - device = self.betas.device - b = shape[0] - img = torch.randn(shape, generator=self.rng, device=device) - intermediates = [img] - for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): - img = self.p_sample( - img, torch.full((b,), i, device=device, dtype=torch.long), clip_denoised=self.clip_denoised - ) - if i % self.log_every_t == 0 or i == self.num_timesteps - 1: - intermediates.append(img) - if return_intermediates: - return img, intermediates - return img - - @torch.no_grad() - def sample(self, batch_size=16, return_intermediates=False): - image_size = self.image_size - channels = self.channels - return self.p_sample_loop( - (batch_size, channels, image_size, image_size), return_intermediates=return_intermediates - ) - - def q_sample(self, x_start, t, noise=None): - noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise - ) - - def get_v(self, x, noise, t): - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise - - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x - ) - - def get_loss(self, pred, target, mean=True): - if self.loss_type == 'l1': - loss = (target - pred).abs() - if mean: - loss = loss.mean() - elif self.loss_type == 'l2': - if mean: - loss = torch.nn.functional.mse_loss(target, pred) - else: - loss = torch.nn.functional.mse_loss(target, pred, reduction='none') - else: - raise NotImplementedError("unknown loss type '{loss_type}'") - - return loss - - def p_losses(self, x_start, t, noise=None): - noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - model_out = self.model(x_noisy, t) - - loss_dict = {} - if self.parameterization == "eps": - target = noise - elif self.parameterization == "x0": - target = x_start - elif self.parameterization == "v": - target = self.get_v(x_start, noise, t) - else: - raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") - - loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) - - log_prefix = 'train' if self.training else 'val' - - loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) - loss_simple = loss.mean() * self.l_simple_weight - - loss_vlb = (self.lvlb_weights[t] * loss).mean() - loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) - - loss = loss_simple + self.original_elbo_weight * loss_vlb - - loss_dict.update({f'{log_prefix}/loss': loss}) - - return loss, loss_dict - - def forward(self, x, *args, **kwargs): - # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size - # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' - t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() - return self.p_losses(x, t, *args, **kwargs) - - def get_input(self, batch, k): - x = batch[k] - if len(x.shape) == 3: - x = x[..., None] - if self.channels_last: - x = x.permute(0, 3, 1, 2).to(non_blocking=True) - else: - x = rearrange(x, "b h w c -> b c h w") - x = x.to(memory_format=torch.contiguous_format, non_blocking=True) - return x - - def shared_step(self, batch): - x = self.get_input(batch, self.first_stage_key) - loss, loss_dict = self(x) - return loss, loss_dict - - def _get_rows_from_list(self, samples): - n_imgs_per_row = len(samples) - denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') - denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') - denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) - return denoise_grid - - @torch.no_grad() - def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): - log = dict() - x = self.get_input(batch, self.first_stage_key) - N = min(x.shape[0], N) - n_row = min(x.shape[0], n_row) - x = x[:N] - log["inputs"] = x - - # get diffusion row - diffusion_row = list() - x_start = x[:n_row] - - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.long() - noise = randn_like(x_start, generator=self.rng) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - diffusion_row.append(x_noisy) - - log["diffusion_row"] = self._get_rows_from_list(diffusion_row) - - if sample: - # get denoise row - with self.ema_scope("Plotting"): - samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) - - log["samples"] = samples - log["denoise_row"] = self._get_rows_from_list(denoise_row) - - if return_keys: - if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: - return log - else: - return {key: log[key] for key in return_keys} - return log - - -class LatentDiffusion(DDPM, Serialization): - """main class""" - - def __init__(self, cfg, model_parallel_config): - self.config = model_parallel_config - self.num_timesteps_cond = default(cfg.num_timesteps_cond, 1) - self.scale_by_std = cfg.scale_by_std - assert self.num_timesteps_cond <= cfg.timesteps - # for backwards compatibility after implementation of DiffusionWrapper - if cfg.conditioning_key is None: - conditioning_key = 'concat' if cfg.concat_mode else 'crossattn' - else: - conditioning_key = cfg.conditioning_key - if cfg.cond_stage_config == '__is_unconditional__': - conditioning_key = None - ckpt_path = cfg.ckpt_path - ignore_keys = cfg.ignore_keys - cfg.conditioning_key = conditioning_key - super().__init__(cfg=cfg) - self.precision = cfg.precision - self.concat_mode = cfg.concat_mode - self.cond_stage_trainable = cfg.cond_stage_trainable - self.cond_stage_key = cfg.cond_stage_key - - self.num_downs = 0 - if "ddconfig" in cfg.first_stage_config and "ch_mult" in cfg.first_stage_config.ddconfig: - self.num_downs = len(cfg.first_stage_config.ddconfig.ch_mult) - 1 - if not cfg.scale_by_std: - self.scale_factor = cfg.scale_factor - else: - self.register_buffer('scale_factor', torch.tensor(cfg.scale_factor)) - self.instantiate_first_stage(cfg.first_stage_config) - self.instantiate_cond_stage(cfg.cond_stage_config) - self.cond_stage_forward = cfg.cond_stage_forward - self.clip_denoised = False - self.bbox_tokenizer = None - self.text_embedding_dropout_rate = cfg.text_embedding_dropout_rate - self.fused_opt = cfg.fused_opt - - self.restarted_from_ckpt = False - if ckpt_path is not None: - self.init_from_ckpt(ckpt_path, ignore_keys) - self.restarted_from_ckpt = True - - if self.channels_last: - self.first_stage_model = self.first_stage_model.to(memory_format=torch.channels_last) - self.model = self.model.to(memory_format=torch.channels_last) - - def make_cond_schedule(self,): - self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) - ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() - self.cond_ids[: self.num_timesteps_cond] = ids - - def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): - # only for very first batch - # set rescale weight to 1./std of encodings - logging.info("### USING STD-RESCALING ###") - x = super().get_input(batch, self.first_stage_key) - encoder_posterior = self.encode_first_stage(x) - z = self.get_first_stage_encoding(encoder_posterior).detach() - del self.scale_factor - self.register_buffer('scale_factor', 1.0 / z.flatten().std()) - logging.info(f"setting self.scale_factor to {self.scale_factor}") - logging.info("### USING STD-RESCALING ###") - - def register_schedule( - self, - given_betas=None, - beta_schedule="linear", - timesteps=1000, - linear_start=1e-4, - linear_end=2e-2, - cosine_s=8e-3, - ): - super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) - - self.shorten_cond_schedule = self.num_timesteps_cond > 1 - if self.shorten_cond_schedule: - self.make_cond_schedule() - - def instantiate_first_stage(self, config): - model = LatentDiffusion.from_config_dict(config) - self.first_stage_model = model.eval() - self.first_stage_model.train = disabled_train - for param in self.first_stage_model.parameters(): - param.requires_grad = False - - def instantiate_cond_stage(self, config): - if not self.cond_stage_trainable: - if config == "__is_first_stage__": - logging.info("Using first stage also as cond stage.") - self.cond_stage_model = self.first_stage_model - elif config == "__is_unconditional__": - logging.info(f"Training {self.__class__.__name__} as an unconditional model.") - self.cond_stage_model = None - # self.be_unconditional = True - else: - model = LatentDiffusion.from_config_dict(config) - self.cond_stage_model = model.eval() - self.cond_stage_model.train = disabled_train - for param in self.cond_stage_model.parameters(): - param.requires_grad = False - else: - assert config != '__is_first_stage__' - assert config != '__is_unconditional__' - model = LatentDiffusion.from_config_dict(config) - self.cond_stage_model = model - - def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): - denoise_row = [] - for zd in tqdm(samples, desc=desc): - denoise_row.append(self.decode_first_stage(zd, force_not_quantize=force_no_decoder_quantization)) - n_imgs_per_row = len(denoise_row) - denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W - denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') - denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') - denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) - return denoise_grid - - def get_first_stage_encoding(self, encoder_posterior): - if isinstance(encoder_posterior, DiagonalGaussianDistribution): - z = encoder_posterior.sample() - elif isinstance(encoder_posterior, torch.Tensor): - z = encoder_posterior - else: - raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") - return self.scale_factor * z - - def get_learned_conditioning(self, c): - if self.cond_stage_forward is None: - if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): - c = self.cond_stage_model.encode(c) - if isinstance(c, DiagonalGaussianDistribution): - c = c.mode() - else: - c = self.cond_stage_model(c) - else: - assert hasattr(self.cond_stage_model, self.cond_stage_forward) - c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) - return c - - def meshgrid(self, h, w): - y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) - x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) - - arr = torch.cat([y, x], dim=-1) - return arr - - def delta_border(self, h, w): - """ - :param h: height - :param w: width - :return: normalized distance to image border, - wtith min distance = 0 at border and max dist = 0.5 at image center - """ - lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) - arr = self.meshgrid(h, w) / lower_right_corner - dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] - dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] - edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] - return edge_dist - - def get_weighting(self, h, w, Ly, Lx, device): - weighting = self.delta_border(h, w) - weighting = torch.clip( - weighting, self.split_input_params["clip_min_weight"], self.split_input_params["clip_max_weight"], - ) - weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) - - if self.split_input_params["tie_braker"]: - L_weighting = self.delta_border(Ly, Lx) - L_weighting = torch.clip( - L_weighting, - self.split_input_params["clip_min_tie_weight"], - self.split_input_params["clip_max_tie_weight"], - ) - - L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) - weighting = weighting * L_weighting - return weighting - - def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code - """ - :param x: img of size (bs, c, h, w) - :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) - """ - bs, nc, h, w = x.shape - - # number of crops in image - Ly = (h - kernel_size[0]) // stride[0] + 1 - Lx = (w - kernel_size[1]) // stride[1] + 1 - - if uf == 1 and df == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) - - weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) - - elif uf > 1 and df == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold_params2 = dict( - kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), - dilation=1, - padding=0, - stride=(stride[0] * uf, stride[1] * uf), - ) - fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) - - weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) - - elif df > 1 and uf == 1: - fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) - unfold = torch.nn.Unfold(**fold_params) - - fold_params2 = dict( - kernel_size=(kernel_size[0] // df, kernel_size[0] // df), - dilation=1, - padding=0, - stride=(stride[0] // df, stride[1] // df), - ) - fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) - - weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) - normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap - weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) - - else: - raise NotImplementedError - - return fold, unfold, normalization, weighting - - @torch.no_grad() - def get_input( - self, - batch, - k, - return_first_stage_outputs=False, - force_c_encode=False, - cond_key=None, - return_original_cond=False, - bs=None, - ): - if self.first_stage_key.endswith('encoded'): - gaussian_parameters = batch[self.first_stage_key] - encoder_posterior = DiagonalGaussianDistribution(gaussian_parameters) - else: - x = super().get_input(batch, k) - if bs is not None: - x = x[:bs] - - encoder_posterior = self.encode_first_stage(x) - z = self.get_first_stage_encoding(encoder_posterior).detach() - - if self.model.conditioning_key is not None: - if cond_key is None: - cond_key = self.cond_stage_key - if cond_key != self.first_stage_key: - if cond_key in ['captions', 'coordinates_bbox', 'txt'] or cond_key.endswith("encoded"): - xc = batch[cond_key] - elif cond_key == 'class_label': - xc = batch - else: - xc = super().get_input(batch, cond_key) - else: - xc = x - if (not self.cond_stage_trainable or force_c_encode) and (not cond_key.endswith('encoded')): - if isinstance(xc, dict) or isinstance(xc, list): - # import pudb; pudb.set_trace() - c = self.get_learned_conditioning(xc) - else: - c = self.get_learned_conditioning(xc) - else: - c = xc - if bs is not None: - c = c[:bs] - - if self.use_positional_encodings: - pos_x, pos_y = self.compute_latent_shifts(batch) - ckey = __conditioning_keys__[self.model.conditioning_key] - c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} - - if self.text_embedding_dropout_rate > 0: - assert self.text_embedding_dropout_rate < 1.0 - c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) - - else: - c = None - xc = None - if self.use_positional_encodings: - pos_x, pos_y = self.compute_latent_shifts(batch) - c = {'pos_x': pos_x, 'pos_y': pos_y} - out = [z, c] - if return_first_stage_outputs: - xrec = self.decode_first_stage(z) - out.extend([x, xrec]) - if return_original_cond: - out.append(xc) - return out - - @torch.no_grad() - def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): - if predict_cids: - if z.dim() == 4: - z = torch.argmax(z.exp(), dim=1).long() - z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) - z = rearrange(z, 'b h w c -> b c h w').contiguous() - - z = 1.0 / self.scale_factor * z - - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - uf = self.split_input_params["vqf"] - bs, nc, h, w = z.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - logging.info("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - logging.info("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) - - z = unfold(z) # (bn, nc * prod(**ks), L) - # 1. Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - # 2. apply model loop over last dim - if isinstance(self.first_stage_model, VQModelInterface): - output_list = [ - self.first_stage_model.decode( - z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize - ) - for i in range(z.shape[-1]) - ] - else: - - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) - o = o * weighting - # Reverse 1. reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization # norm is shape (1, 1, h, w) - return decoded - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - # same as above but without decorator - def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): - if predict_cids: - if z.dim() == 4: - z = torch.argmax(z.exp(), dim=1).long() - z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) - z = rearrange(z, 'b h w c -> b c h w').contiguous() - - z = 1.0 / self.scale_factor * z - - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - uf = self.split_input_params["vqf"] - bs, nc, h, w = z.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - logging.info("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - logging.info("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) - - z = unfold(z) # (bn, nc * prod(**ks), L) - # 1. Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - # 2. apply model loop over last dim - if isinstance(self.first_stage_model, VQModelInterface): - output_list = [ - self.first_stage_model.decode( - z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize - ) - for i in range(z.shape[-1]) - ] - else: - - output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) - o = o * weighting - # Reverse 1. reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization # norm is shape (1, 1, h, w) - return decoded - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - else: - if isinstance(self.first_stage_model, VQModelInterface): - return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) - else: - return self.first_stage_model.decode(z) - - @torch.no_grad() - def encode_first_stage(self, x): - if hasattr(self, "split_input_params"): - if self.split_input_params["patch_distributed_vq"]: - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - df = self.split_input_params["vqf"] - self.split_input_params['original_image_size'] = x.shape[-2:] - bs, nc, h, w = x.shape - if ks[0] > h or ks[1] > w: - ks = (min(ks[0], h), min(ks[1], w)) - logging.info("reducing Kernel") - - if stride[0] > h or stride[1] > w: - stride = (min(stride[0], h), min(stride[1], w)) - logging.info("reducing stride") - - fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) - z = unfold(x) # (bn, nc * prod(**ks), L) - # Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) for i in range(z.shape[-1])] - - o = torch.stack(output_list, axis=-1) - o = o * weighting - - # Reverse reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - decoded = fold(o) - decoded = decoded / normalization - return decoded - - else: - return self.first_stage_model.encode(x) - else: - return self.first_stage_model.encode(x) - - def shared_step(self, batch, **kwargs): - x, c = self.get_input(batch, self.first_stage_key) - loss = self(x, c) - return loss - - def forward(self, x, c, *args, **kwargs): - t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() - if self.model.conditioning_key is not None: - assert c is not None - if self.cond_stage_trainable: - c = self.get_learned_conditioning(c) - if self.shorten_cond_schedule: # TODO: drop this option - tc = self.cond_ids[t] - c = self.q_sample(x_start=c, t=tc, noise=randn_like(c.float(), generator=self.rng)) - return self.p_losses(x, c, t, *args, **kwargs) - - def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset - def rescale_bbox(bbox): - x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2]) - y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3]) - w = min(bbox[2] / crop_coordinates[2], 1 - x0) - h = min(bbox[3] / crop_coordinates[3], 1 - y0) - return x0, y0, w, h - - return [rescale_bbox(b) for b in bboxes] - - def apply_model(self, x_noisy, t, cond, return_ids=False): - - if isinstance(cond, dict): - # hybrid case, cond is exptected to be a dict - for key in cond: - if not isinstance(cond[key], list): - cond[key] = [cond[key]] - else: - if not isinstance(cond, list): - cond = [cond] - key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' - cond = {key: cond} - - if hasattr(self, "split_input_params"): - assert len(cond) == 1 # todo can only deal with one conditioning atm - assert not return_ids - ks = self.split_input_params["ks"] # eg. (128, 128) - stride = self.split_input_params["stride"] # eg. (64, 64) - - h, w = x_noisy.shape[-2:] - - fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) - - z = unfold(x_noisy) # (bn, nc * prod(**ks), L) - # Reshape to img shape - z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] - - if ( - self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] - and self.model.conditioning_key - ): # todo check for completeness - c_key = next(iter(cond.keys())) # get key - c = next(iter(cond.values())) # get value - assert len(c) == 1 # todo extend to list with more than one elem - c = c[0] # get element - - c = unfold(c) - c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) - - cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] - - elif self.cond_stage_key == 'coordinates_bbox': - assert ( - 'original_image_size' in self.split_input_params - ), 'BoudingBoxRescaling is missing original_image_size' - - # assuming padding of unfold is always 0 and its dilation is always 1 - n_patches_per_row = int((w - ks[0]) / stride[0] + 1) - full_img_h, full_img_w = self.split_input_params['original_image_size'] - # as we are operating on latents, we need the factor from the original image size to the - # spatial latent size to properly rescale the crops for regenerating the bbox annotations - num_downs = self.first_stage_model.encoder.num_resolutions - 1 - rescale_latent = 2 ** (num_downs) - - # get top left postions of patches as conforming for the bbbox tokenizer, therefore we - # need to rescale the tl patch coordinates to be in between (0,1) - tl_patch_coordinates = [ - ( - rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, - rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h, - ) - for patch_nr in range(z.shape[-1]) - ] - - # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) - patch_limits = [ - (x_tl, y_tl, rescale_latent * ks[0] / full_img_w, rescale_latent * ks[1] / full_img_h) - for x_tl, y_tl in tl_patch_coordinates - ] - # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] - - # tokenize crop coordinates for the bounding boxes of the respective patches - patch_limits_tknzd = [ - torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None] for bbox in patch_limits - ] # list of length l with tensors of shape (1, 2) - logging.info(patch_limits_tknzd[0].shape) - # cut tknzd crop position from conditioning - assert isinstance(cond, dict), 'cond must be dict to be fed into model' - cut_cond = cond['c_crossattn'][0][..., :-2] - logging.info(cut_cond.shape) - - adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) - adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') - logging.info(adapted_cond.shape) - adapted_cond = self.get_learned_conditioning(adapted_cond) - logging.info(adapted_cond.shape) - adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) - logging.info(adapted_cond.shape) - - cond_list = [{'c_crossattn': [e]} for e in adapted_cond] - - else: - cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient - - # apply model by loop over crops - output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] - assert not isinstance( - output_list[0], tuple - ) # todo cant deal with multiple model outputs check this never happens - - o = torch.stack(output_list, axis=-1) - o = o * weighting - # Reverse reshape to img shape - o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) - # stitch crops together - x_recon = fold(o) / normalization - - else: - x_recon = self.model(x_noisy, t, **cond) - - if isinstance(x_recon, tuple) and not return_ids: - return x_recon[0] - else: - return x_recon - - def _predict_eps_from_xstart(self, x_t, t, pred_xstart): - return ( - extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart - ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) - - def _prior_bpd(self, x_start): - """ - Get the prior KL term for the variational lower-bound, measured in - bits-per-dim. - This term can't be optimized, as it only depends on the encoder. - :param x_start: the [N x C x ...] tensor of inputs. - :return: a batch of [N] KL values (in bits), one per batch element. - """ - batch_size = x_start.shape[0] - t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) - qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) - kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) - return mean_flat(kl_prior) / np.log(2.0) - - def p_losses(self, x_start, cond, t, noise=None): - noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) - model_output = self.apply_model(x_noisy, t, cond) - - loss_dict = {} - prefix = 'train' if self.training else 'val' - - if self.parameterization == "x0": - target = x_start - elif self.parameterization == "eps": - target = noise - elif self.parameterization == "v": - target = self.get_v(x_start, noise, t) - else: - raise NotImplementedError() - - if (self.precision in ['bf16', 'bf16-mixed']) or (self.precision in [16, '16', '16-mixed']): - model_output = model_output.type(torch.float32) - loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) - loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) - self.logvar = self.logvar.cuda(non_blocking=True) - logvar_t = self.logvar[t].cuda(non_blocking=True) - loss = loss_simple / torch.exp(logvar_t) + logvar_t - # loss = loss_simple / torch.exp(self.logvar) + self.logvar - if self.learn_logvar: - loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) - loss_dict.update({'logvar': self.logvar.data.mean()}) - - loss = self.l_simple_weight * loss.mean() - - loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) - loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() - loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) - loss += self.original_elbo_weight * loss_vlb - loss_dict.update({f'{prefix}/loss': loss}) - - return loss, loss_dict - - def p_mean_variance( - self, - x, - c, - t, - clip_denoised: bool, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - score_corrector=None, - corrector_kwargs=None, - ): - t_in = t - model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) - - if score_corrector is not None: - assert self.parameterization == "eps" - model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) - - if return_codebook_ids: - model_out, logits = model_out - - if self.parameterization == "eps": - x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) - elif self.parameterization == "x0": - x_recon = model_out - else: - raise NotImplementedError() - - if clip_denoised: - x_recon.clamp_(-1.0, 1.0) - if quantize_denoised: - x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) - model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) - if return_codebook_ids: - return model_mean, posterior_variance, posterior_log_variance, logits - elif return_x0: - return model_mean, posterior_variance, posterior_log_variance, x_recon - else: - return model_mean, posterior_variance, posterior_log_variance - - @torch.no_grad() - def p_sample( - self, - x, - c, - t, - clip_denoised=False, - repeat_noise=False, - return_codebook_ids=False, - quantize_denoised=False, - return_x0=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - ): - b, *_, device = *x.shape, x.device - outputs = self.p_mean_variance( - x=x, - c=c, - t=t, - clip_denoised=clip_denoised, - return_codebook_ids=return_codebook_ids, - quantize_denoised=quantize_denoised, - return_x0=return_x0, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - ) - if return_codebook_ids: - raise DeprecationWarning("Support dropped.") - model_mean, _, model_log_variance, logits = outputs - elif return_x0: - model_mean, _, model_log_variance, x0 = outputs - else: - model_mean, _, model_log_variance = outputs - - noise = noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.0: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - # no noise when t == 0 - nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) - - if return_codebook_ids: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) - if return_x0: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 - else: - return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise - - @torch.no_grad() - def progressive_denoising( - self, - cond, - shape, - verbose=True, - callback=None, - quantize_denoised=False, - img_callback=None, - mask=None, - x0=None, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - batch_size=None, - x_T=None, - start_T=None, - log_every_t=None, - ): - if not log_every_t: - log_every_t = self.log_every_t - timesteps = self.num_timesteps - if batch_size is not None: - b = batch_size if batch_size is not None else shape[0] - shape = [batch_size] + list(shape) - else: - b = batch_size = shape[0] - if x_T is None: - img = torch.randn(shape, generator=self.rng, device=torch.cuda.current_device()) - else: - img = x_T - intermediates = [] - if cond is not None: - if isinstance(cond, dict): - cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) - for key in cond - } - else: - cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - - if start_T is not None: - timesteps = min(timesteps, start_T) - iterator = ( - tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', total=timesteps) - if verbose - else reversed(range(0, timesteps)) - ) - if type(temperature) == float: - temperature = [temperature] * timesteps - - for i in iterator: - ts = torch.full((b,), i, device=torch.cuda.current_device(), dtype=torch.long) - if self.shorten_cond_schedule: - assert self.model.conditioning_key != 'hybrid' - tc = self.cond_ids[ts].to(cond.device) - cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) - - img, x0_partial = self.p_sample( - img, - cond, - ts, - clip_denoised=self.clip_denoised, - quantize_denoised=quantize_denoised, - return_x0=True, - temperature=temperature[i], - noise_dropout=noise_dropout, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - ) - if mask is not None: - assert x0 is not None - img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1.0 - mask) * img - - if i % log_every_t == 0 or i == timesteps - 1: - intermediates.append(x0_partial) - if callback: - callback(i) - if img_callback: - img_callback(img, i) - return img, intermediates - - @torch.no_grad() - def p_sample_loop( - self, - cond, - shape, - return_intermediates=False, - x_T=None, - verbose=True, - callback=None, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - img_callback=None, - start_T=None, - log_every_t=None, - ): - - if not log_every_t: - log_every_t = self.log_every_t - device = self.betas.device - b = shape[0] - if x_T is None: - img = torch.randn(shape, generator=self.rng, device=device) - else: - img = x_T - - intermediates = [img] - if timesteps is None: - timesteps = self.num_timesteps - - if start_T is not None: - timesteps = min(timesteps, start_T) - iterator = ( - tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) - if verbose - else reversed(range(0, timesteps)) - ) - - if mask is not None: - assert x0 is not None - assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match - - for i in iterator: - ts = torch.full((b,), i, device=device, dtype=torch.long) - if self.shorten_cond_schedule: - assert self.model.conditioning_key != 'hybrid' - tc = self.cond_ids[ts].to(cond.device) - cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) - - img = self.p_sample(img, cond, ts, clip_denoised=self.clip_denoised, quantize_denoised=quantize_denoised) - if mask is not None: - img_orig = self.q_sample(x0, ts) - img = img_orig * mask + (1.0 - mask) * img - - if i % log_every_t == 0 or i == timesteps - 1: - intermediates.append(img) - if callback: - callback(i) - if img_callback: - img_callback(img, i) - - if return_intermediates: - return img, intermediates - return img - - @torch.no_grad() - def sample( - self, - cond, - batch_size=16, - return_intermediates=False, - x_T=None, - verbose=True, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - shape=None, - **kwargs, - ): - if shape is None: - shape = (batch_size, self.channels, self.image_size, self.image_size) - if cond is not None: - if isinstance(cond, dict): - cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) - for key in cond - } - else: - cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] - return self.p_sample_loop( - cond, - shape, - return_intermediates=return_intermediates, - x_T=x_T, - verbose=verbose, - timesteps=timesteps, - quantize_denoised=quantize_denoised, - mask=mask, - x0=x0, - ) - - @torch.no_grad() - def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): - - if ddim: - ddim_sampler = DDIMSampler(self) - shape = (self.channels, self.image_size, self.image_size) - samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) - - else: - samples, intermediates = self.sample(cond=cond, batch_size=batch_size, return_intermediates=True, **kwargs) - - return samples, intermediates - - @torch.no_grad() - def log_images( - self, - batch, - N=8, - n_row=4, - sample=True, - ddim_steps=200, - ddim_eta=1.0, - return_keys=None, - quantize_denoised=True, - inpaint=True, - plot_denoise_rows=False, - plot_progressive_rows=True, - plot_diffusion_rows=True, - **kwargs, - ): - - use_ddim = ddim_steps is not None - - log = dict() - z, c, x, xrec, xc = self.get_input( - batch, - self.first_stage_key, - return_first_stage_outputs=True, - force_c_encode=True, - return_original_cond=True, - bs=N, - ) - N = min(x.shape[0], N) - n_row = min(x.shape[0], n_row) - log["inputs"] = x - log["reconstruction"] = xrec - if self.model.conditioning_key is not None: - if hasattr(self.cond_stage_model, "decode"): - xc = self.cond_stage_model.decode(c) - log["conditioning"] = xc - elif self.cond_stage_key in ["caption"]: - xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) - log["conditioning"] = xc - elif self.cond_stage_key == 'class_label': - xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) - log['conditioning'] = xc - elif isimage(xc): - log["conditioning"] = xc - if ismap(xc): - log["original_conditioning"] = self.to_rgb(xc) - - if plot_diffusion_rows: - # get diffusion row - diffusion_row = list() - z_start = z[:n_row] - for t in range(self.num_timesteps): - if t % self.log_every_t == 0 or t == self.num_timesteps - 1: - t = repeat(torch.tensor([t]), '1 -> b', b=n_row) - t = t.long() - noise = randn_like(z_start, generator=self.rng) - z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) - diffusion_row.append(self.decode_first_stage(z_noisy)) - - diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W - diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') - diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') - diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) - log["diffusion_row"] = diffusion_grid - - if sample: - # get denoise row - with self.ema_scope("Plotting"): - samples, z_denoise_row = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, ddim_steps=ddim_steps, eta=ddim_eta - ) - # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) - x_samples = self.decode_first_stage(samples) - log["samples"] = x_samples - if plot_denoise_rows: - denoise_grid = self._get_denoise_row_from_list(z_denoise_row) - log["denoise_row"] = denoise_grid - - if ( - quantize_denoised - and not isinstance(self.first_stage_model, AutoencoderKL) - and not isinstance(self.first_stage_model, IdentityFirstStage) - ): - # also display when quantizing x0 while sampling - with self.ema_scope("Plotting Quantized Denoised"): - samples, z_denoise_row = self.sample_log( - cond=c, - batch_size=N, - ddim=use_ddim, - ddim_steps=ddim_steps, - eta=ddim_eta, - quantize_denoised=True, - ) - # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, - # quantize_denoised=True) - x_samples = self.decode_first_stage(samples) - log["samples_x0_quantized"] = x_samples - - if inpaint: - # make a simple center square - b, h, w = z.shape[0], z.shape[2], z.shape[3] - mask = torch.ones(N, h, w) - # zeros will be filled in - mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 - mask = mask[:, None, ...] - with self.ema_scope("Plotting Inpaint"): - samples, _ = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask - ) - x_samples = self.decode_first_stage(samples) - log["samples_inpainting"] = x_samples - log["mask"] = mask - - # outpaint - with self.ema_scope("Plotting Outpaint"): - samples, _ = self.sample_log( - cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask - ) - x_samples = self.decode_first_stage(samples) - log["samples_outpainting"] = x_samples - - if plot_progressive_rows: - with self.ema_scope("Plotting Progressives"): - img, progressives = self.progressive_denoising( - c, shape=(self.channels, self.image_size, self.image_size), batch_size=N - ) - prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") - log["progressive_row"] = prog_row - - if return_keys: - if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: - return log - else: - return {key: log[key] for key in return_keys} - return log - - def parameters(self): - params = list(self.model.parameters()) - if self.cond_stage_trainable: - logging.info(f"{self.__class__.__name__}: Also optimizing conditioner params!") - params = params + list(self.cond_stage_model.parameters()) - if self.learn_logvar: - logging.info('Diffusion model optimizing logvar') - params.append(self.logvar) - return params - - @torch.no_grad() - def to_rgb(self, x): - x = x.float() - if not hasattr(self, "colorize"): - self.colorize = torch.randn(3, x.shape[1], 1, 1, generator=self.rng).to(x) - x = nn.functional.conv2d(x, weight=self.colorize) - x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 - return x - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - # only required for pipeline parallelism - pass - - -class MegatronLatentDiffusion(MegatronBaseModel): - """Megatron LatentDiffusion Model.""" - - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - # this prevents base constructor from initializing tokenizer - self.tokenizer = None - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - - # megatron_amp_O2 is not yet supported in diffusion models - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - - self.model = self.model_provider_func() - - self.conditioning_keys = [] - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process=True, post_process=True): - """Model depends on pipeline paralellism.""" - model = LatentDiffusion(cfg=self.cfg, model_parallel_config=self.model_parallel_config) - return model - - def forward(self, x, c, *args, **kwargs): - output_tensor = self.model(x, c, *args, **kwargs) - return output_tensor - - @rank_zero_only - @torch.no_grad() - def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): - if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: - assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' - batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) - self.model.on_train_batch_start(batch, batch_idx) - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - tensor_shape = None # Placeholder - - # handle asynchronous grad reduction - no_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=self.model, - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=None, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # losses_reduced_per_micro_batch is a list of dictionaries - # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps - # only the last stages of the pipeline return losses - loss_dict = {} - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - for key in losses_reduced_per_micro_batch[0]: - loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_dict[key] = loss_tensor.mean() - loss_mean = loss_dict["val/loss"] if forward_only else loss_dict["train/loss"] - else: - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - if forward_only: - loss_mean = [] - else: - loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) - - return loss_mean, loss_dict - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() - - loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - torch.distributed.broadcast(loss_mean, get_last_rank()) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # gradients are reduced internally in distributed optimizer - pass - elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() - self._optimizer.allreduce_main_grads() - elif not self.cfg.get('ddp_overlap', True): - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - return loss_mean - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def get_forward_output_and_loss_func(self): - def process_batch(batch): - """ Prepares the global batch for apex fwd/bwd functions. - Global batch is a list of micro batches. - """ - # noise_map, condition - batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) - if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): - # in the case of precached text embeddings, cond_stage is also a tensor - batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) - - # SD has more dedicated structure for encoding, so we enable autocasting here as well - with torch.cuda.amp.autocast( - self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, - ): - x, c = self.model.get_input(batch, self.cfg.first_stage_key) - - if not isinstance(c, dict): - return [x, c] - - if len(self.conditioning_keys) == 0: - self.conditioning_keys = list(c.keys()) - c_list = [c[key] for key in self.conditioning_keys] - return [x, *c_list] - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - batch = process_batch(batch) - batch = [x.cuda(non_blocking=True) for x in batch] - if len(self.conditioning_keys) == 0: - x, c = batch - else: - x = batch[0] - c = {} - for idx, key in enumerate(self.conditioning_keys): - c[key] = batch[1 + idx] - loss, loss_dict = model(x, c) - - def dummy(output_tensor): - return loss, loss_dict - - # output_tensor, and a function to convert output_tensor to loss + loss_dict - return loss, dummy - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - def validation_step(self, dataloader_iter, batch_idx): - loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - - self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) - - return loss - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - - # Batch size need to be provided for webdatset - self._num_micro_batches = get_num_microbatches() - self._micro_batch_size = self.cfg.micro_batch_size - - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - def build_train_valid_test_datasets(self): - logging.info('Building datasets for Stable Diffusion...') - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - - if self.cfg.first_stage_key.endswith("encoded"): - self._train_ds, self._validation_ds = build_train_valid_precached_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), - ) - else: - self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) - ) - self._test_ds = None - - if self._train_ds is not None: - logging.info(f'Length of train dataset: {len(self._train_ds)}') - if self._validation_ds is not None: - logging.info(f'Length of val dataset: {len(self._validation_ds)}') - if self._test_ds is not None: - logging.info(f'Length of test dataset: {len(self._test_ds)}') - logging.info(f'Finished building datasets for LatentDiffusion.') - return self._train_ds, self._validation_ds, self._test_ds - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = torch.utils.data.DataLoader( - self._train_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=True, - persistent_workers=True, - ) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - self._validation_dl = torch.utils.data.DataLoader( - self._validation_ds, - batch_size=self._micro_batch_size, - num_workers=cfg.num_workers, - pin_memory=True, - drop_last=False, - persistent_workers=True, - ) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - self._test_dl = torch.utils.data.DataLoader( - self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, - ) - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls): - return None - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() - - def save_to(self, save_path: str): - # Replace .nemo path in config for NeMo CLIP - cfg = self._cfg - if cfg.get('cond_stage_config').get('restore_from_path'): - with open_dict(cfg): - cfg.cond_stage_config.restore_from_path = None - cfg.cond_stage_config.cfg = self.model.cond_stage_model.cfg - self._cfg = cfg - super().save_to(save_path) - - @classmethod - def load_from_checkpoint( - cls, - checkpoint_path: str, - map_location: Any = None, - hparams_file: Optional[str] = None, - strict: bool = True, - **kwargs, - ): - """ - Loads ModelPT from checkpoint, with some maintenance of restoration. - For documentation, please refer to LightningModule.load_from_checkpoin() documentation. - """ - checkpoint = None - try: - cls._set_model_restore_state(is_being_restored=True) - # TODO: replace with proper PTL API - with pl_legacy_patch(): - if map_location is not None: - checkpoint = pl_load(checkpoint_path, map_location=map_location) - else: - checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) - - if hparams_file is not None: - extension = hparams_file.split(".")[-1] - if extension.lower() == "csv": - hparams = load_hparams_from_tags_csv(hparams_file) - elif extension.lower() in ("yml", "yaml"): - hparams = load_hparams_from_yaml(hparams_file) - else: - raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") - - hparams["on_gpu"] = False - - # overwrite hparams by the given file - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams - - # for past checkpoint need to add the new key - if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: - checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} - # override the hparams with values that were passed in - cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) - # TODO: can we do this without overriding? - config_kwargs = kwargs.copy() - if 'trainer' in config_kwargs: - config_kwargs.pop('trainer') - cfg.update(config_kwargs) - - # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error - if cfg: - if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): - cfg.unet_config.from_pretrained = None - if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): - cfg.first_stage_config.from_pretrained = None - ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod - if cfg.get('inductor'): - cfg.inductor = False - ## Append some dummy configs that DB didn't support - if not cfg.get('channels_last'): - cfg.channels_last = True - if not cfg.get('capture_cudagraph_iters'): - cfg.capture_cudagraph_iters = -1 - - # compatibility for stable diffusion old checkpoint tweaks - first_key = list(checkpoint['state_dict'].keys())[0] - if first_key == "betas": - # insert "model." into for megatron wrapper - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = "model." + key - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - elif ( - first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' - or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' - ): - # remap state keys from dreambooth when using HF clip - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', "") - new_key = new_key.replace('unet', 'model.diffusion_model') - new_key = new_key.replace('vae', 'first_stage_model') - new_key = new_key.replace('text_encoder', 'cond_stage_model') - new_key = new_key.replace('.noise_scheduler', '') - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - # compatibility for inductor in inference - if not cfg.get('inductor', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('._orig_mod', '', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if cfg.get('megatron_amp_O2', False): - new_state_dict = {} - for key in checkpoint['state_dict'].keys(): - new_key = key.replace('model.', 'model.module.', 1) - new_state_dict[new_key] = checkpoint['state_dict'][key] - checkpoint['state_dict'] = new_state_dict - - if 'cfg' in kwargs: - model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) - else: - model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) - # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg - - checkpoint = model - - finally: - cls._set_model_restore_state(is_being_restored=False) - return checkpoint - - -class DiffusionWrapper(pl.LightningModule, Serialization): - def __init__( - self, - diff_model_config, - conditioning_key, - inductor: bool = False, - inductor_cudagraphs: bool = False, - capture_cudagraph_iters: int = -1, - ): - super().__init__() - self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) - self.conditioning_key = conditioning_key - assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] - - # Fusing VAE and CLIP doesn't give benefit - if inductor: - # TorchInductor with CUDA graph can lead to OOM - torch._dynamo.config.dynamic_shapes = False - torch._dynamo.config.automatic_dynamic_shapes = False - inductor_config.triton.cudagraphs = inductor_cudagraphs - self.diffusion_model = torch.compile(self.diffusion_model) - # CUDA graph - self.capture_cudagraph_iters = capture_cudagraph_iters - self.iterations = 0 - self.graphed_diffusion_model = None - - def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): - if self.conditioning_key is None: - out = self.diffusion_model(x, t) - elif self.conditioning_key == 'concat': - xc = torch.cat([x] + c_concat, dim=1) - out = self.diffusion_model(xc, t) - elif self.conditioning_key == 'crossattn': - cc = torch.cat(c_crossattn, 1) - if self.iterations == self.capture_cudagraph_iters: - logging.info("Capturing CUDA graph for module: %s", self.diffusion_model.__class__.__name__) - self.graphed_diffusion_model = torch.cuda.make_graphed_callables(self.diffusion_model, (x, t, cc)) - - if 0 <= self.capture_cudagraph_iters <= self.iterations: - out = self.graphed_diffusion_model(x, t, cc) - else: - out = self.diffusion_model(x, t, context=cc) - self.iterations += 1 - elif self.conditioning_key == 'hybrid': - xc = torch.cat([x] + c_concat, dim=1) - cc = torch.cat(c_crossattn, 1) - out = self.diffusion_model(xc, t, context=cc) - elif self.conditioning_key == 'adm': - cc = c_crossattn[0] - out = self.diffusion_model(x, t, y=cc) - else: - raise NotImplementedError() - - return out diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py deleted file mode 100644 index 2f2acb40ed43..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from dataclasses import dataclass, field -from typing import Any, List, Optional - -from nemo.core.config import modelPT as model_cfg - - -@dataclass -class LDMUnetConfig: - cls: Optional[str] = 'nemo.collections.multimodal.modules.diffusionmodules.openaimodel.UNetModel' - image_size: Optional[int] = 32 # unused - in_channels: Optional[int] = 4 - out_channels: Optional[int] = 4 - model_channels: Optional[int] = 320 - attention_resolutions: Optional[List[int]] = field(default_factory=lambda: [4, 2, 1]) - num_res_blocks: Optional[int] = 2 - channel_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) - num_heads: Optional[int] = 8 - use_spatial_transformer: Optional[bool] = True - transformer_depth: Optional[int] = 1 - context_dim: Optional[int] = 768 - use_checkpoint: Optional[bool] = True - legacy: Optional[bool] = False - use_flash_attention: Optional[bool] = False - - -@dataclass -class SchedulerConfig: - cls: Optional[str] = 'nemo.collections.multimodal.parts.lr_scheduler.LambdaLinearScheduler' - warm_up_steps: Optional[List[int]] = field(default_factory=lambda: [10000]) - cycle_lengths: Optional[List[int]] = field( - default_factory=lambda: [10000000000000] - ) # incredibly large number to prevent corner cases - f_start: Optional[List[float]] = field(default_factory=lambda: [1.0e-6]) - f_max: Optional[List[float]] = field(default_factory=lambda: [1.0]) - f_min: Optional[List[float]] = field(default_factory=lambda: [1.0]) - - -@dataclass -class CLIPEmbedderConfig: - cls: Optional[str] = 'nemo.collections.multimodal.modules.encoders.modules.FrozenCLIPEmbedder' - version: Optional[str] = 'openai/clip-vit-large-patch14' - device: Optional[str] = 'cuda' - max_length: Optional[int] = 77 - - -@dataclass -class LDMEncoderConfig: - double_z: Optional[bool] = True - z_channels: Optional[int] = 4 - resolution: Optional[int] = 256 - in_channels: Optional[int] = 3 - out_ch: Optional[int] = 3 - ch: Optional[int] = 128 - ch_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) - num_res_blocks: Optional[int] = 2 - attn_resolutions: Optional[List[int]] = field(default_factory=lambda: []) - dropout: Optional[float] = 0.0 - - -@dataclass -class LDMFirstStageConfig: # Autoencoder - cls: Optional[str] = 'nemo.collections.multimodal.models.ldm.autoencoder.AutoencoderKL' - embed_dim: Optional[int] = 4 - monitor: Optional[str] = 'val/rec_loss' - ddconfig: Optional[LDMEncoderConfig] = LDMEncoderConfig() - - -@dataclass -class DDPMDiffusionModelConfig(model_cfg.ModelConfig): - unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() - timesteps: Optional[int] = 1000 - beta_schedule: Optional[str] = 'linear' - loss_type: Optional[str] = 'l2' - ckpt_path: Optional[str] = None - ignore_keys: Optional[List[str]] = field(default_factory=list) - load_only_unet: Optional[bool] = False - monitor: Optional[str] = 'val/loss' - use_ema: Optional[bool] = True - first_stage_key: Optional[str] = 'image' - image_size: Optional[int] = 256 - channels: Optional[int] = 3 - log_every_t: Optional[int] = 100 - clip_denoised: Optional[bool] = True - linear_start: Optional[float] = 1e-4 - linear_end: Optional[float] = 2e-2 - cosine_s: Optional[float] = 8e-3 - given_betas: Optional[float] = None - original_elbo_weight: Optional[float] = 0.0 - v_posterior: Optional[ - float - ] = 0.0 # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta - l_simple_weight: Optional[float] = 1.0 - conditioning_key: Optional[str] = None - parameterization: Optional[str] = 'eps' # all assuming fixed variance schedules - scheduler_config: Optional[Any] = None - use_positional_encodings: Optional[bool] = False - learn_logvar: Optional[bool] = False - logvar_init: Optional[float] = 0.0 - learning_rate: Optional[float] = 1.0e-04 - - -@dataclass -class LatentDiffusionModelConfig(DDPMDiffusionModelConfig): - # Overrite Default values - linear_start: Optional[float] = 0.00085 - linear_end: Optional[float] = 0.0120 - num_timesteps_cond: Optional[int] = 1 - log_every_t: Optional[int] = 200 - timesteps: Optional[int] = 1000 - first_stage_key: Optional[str] = 'jpg' - cond_stage_key: Optional[str] = 'txt' - image_size: Optional[int] = 64 - channels: Optional[int] = 4 - cond_stage_trainable: Optional[bool] = False - conditioning_key: Optional[str] = 'crossattn' - monitor: Optional[str] = 'val/loss_simple_ema' - scale_factor: Optional[float] = 0.18215 - use_ema: Optional[bool] = False # TODO - unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() - first_stage_config: Optional[LDMFirstStageConfig] = LDMFirstStageConfig() - scheduler_config: Optional[SchedulerConfig] = SchedulerConfig() - # New attributes in additon to DDPMDiffusionModel - concat_mode: Optional[bool] = True - trainable: Optional[bool] = False - cond_stage_config: Optional[CLIPEmbedderConfig] = CLIPEmbedderConfig() - cond_stage_forward: Optional[Any] = None - scale_by_std: Optional[bool] = False - text_embedding_dropout_rate: Optional[float] = 0 - fused_opt: Optional[bool] = False - inductor: Optional[bool] = False - inductor_cudagraphs: Optional[bool] = False diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py deleted file mode 100644 index 70256058631d..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from enum import Enum - -Sampler = Enum('Sampler', ['PLMS', 'DDIM', 'DPM', 'PARA_DDIM']) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py deleted file mode 100644 index 1a4ebed123c9..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC, abstractmethod - -import numpy as np -import torch -from tqdm import tqdm - -from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - make_ddim_sampling_parameters, - make_ddim_timesteps, - noise_like, -) - - -class AbstractBaseSampler(ABC): - def __init__(self, model, sampler, schedule="linear", **kwargs): - super().__init__() - self.model = model - self.ddpm_num_timesteps = model.num_timesteps - self.schedule = schedule - assert isinstance(sampler, Sampler), "Sampler should be of ENUM type Sampler" - self.sampler = sampler - - def register_buffer(self, name, attr): - if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) - setattr(self, name, attr) - - def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True): - self.ddim_timesteps = make_ddim_timesteps( - ddim_discr_method=ddim_discretize, - num_ddim_timesteps=ddim_num_steps, - num_ddpm_timesteps=self.ddpm_num_timesteps, - verbose=verbose, - ) - alphas_cumprod = self.model.alphas_cumprod - assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, "alphas have to be defined for each timestep" - to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) - self.register_buffer("betas", to_torch(self.model.betas)) - self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) - self.register_buffer("alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev)) - # calculations for diffusion q(x_t | x_{t-1}) and others - self.register_buffer("sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu()))) - self.register_buffer( - "sqrt_one_minus_alphas_cumprod", to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), - ) - self.register_buffer("log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu()))) - self.register_buffer("sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu()))) - self.register_buffer( - "sqrt_recipm1_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), - ) - # ddim sampling parameters - ddim_sigmas, ddim_alphas, ddim_alphas_prev, ddim_variance = make_ddim_sampling_parameters( - alphacums=alphas_cumprod.cpu(), ddim_timesteps=self.ddim_timesteps, eta=ddim_eta, verbose=verbose, - ) - self.register_buffer("ddim_sigmas", ddim_sigmas) - self.register_buffer("ddim_alphas", ddim_alphas) - self.register_buffer("ddim_alphas_prev", ddim_alphas_prev) - self.register_buffer("ddim_variance", ddim_variance) - self.register_buffer("ddim_sqrt_one_minus_alphas", np.sqrt(1.0 - ddim_alphas)) - sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( - (1 - self.alphas_cumprod_prev) - / (1 - self.alphas_cumprod) - * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) - ) - self.register_buffer("ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps) - - @abstractmethod - def p_sampling_fn(self): - pass - - def dpm_sampling_fn(self): - pass - - def para_ddim_sampling_fn(self): - pass - - @torch.no_grad() - def sample( - self, - S, - batch_size, - shape, - conditioning=None, - callback=None, - normals_sequence=None, - img_callback=None, - quantize_x0=False, - eta=0.0, - mask=None, - x0=None, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - verbose=True, - x_T=None, - log_every_t=100, - unconditional_guidance_scale=1.0, - unconditional_conditioning=None, - parallelism=8, - tolerance=0.1, - # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... - **kwargs, - ): - if conditioning is not None: - if isinstance(conditioning, dict): - ctmp = conditioning[list(conditioning.keys())[0]] - while isinstance(ctmp, list): - ctmp = ctmp[0] - cbs = ctmp.shape[0] - if cbs != batch_size: - print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") - else: - if conditioning.shape[0] != batch_size: - print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") - self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) - # sampling - C, H, W = shape - size = (batch_size, C, H, W) - print(f"Data shape for sampling is {size}, eta {eta}") - - if self.sampler is Sampler.DPM: - return self.dpm_sampling_fn( - shape=shape, - steps=S, - conditioning=conditioning, - unconditional_conditioning=unconditional_conditioning, - unconditional_guidance_scale=unconditional_guidance_scale, - x_T=x_T, - ) - - if self.sampler is Sampler.PARA_DDIM: - return self.para_ddim_sampling_fn( - cond=conditioning, - batch_size=batch_size, - per_latent_shape=shape, - x_T=x_T, - steps=S, - parallelism=parallelism, - tolerance=tolerance, - temperature=temperature, - noise_dropout=noise_dropout, - quantize_denoised=quantize_x0, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - ) - - samples, intermediates = self.sampling_fn( - conditioning, - size, - callback=callback, - img_callback=img_callback, - quantize_denoised=quantize_x0, - mask=mask, - x0=x0, - ddim_use_original_steps=False, - noise_dropout=noise_dropout, - temperature=temperature, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - x_T=x_T, - log_every_t=log_every_t, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - ) - return samples, intermediates - - @torch.no_grad() - def sampling_fn( - self, - cond, - shape, - x_T=None, - ddim_use_original_steps=False, - callback=None, - timesteps=None, - quantize_denoised=False, - mask=None, - x0=None, - img_callback=None, - log_every_t=100, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - unconditional_guidance_scale=1.0, - unconditional_conditioning=None, - ): - device = self.model.betas.device - b = shape[0] - if x_T is None: - img = torch.randn(shape, generator=self.model.rng, device=device) - else: - img = x_T - - if timesteps is None: - timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps - elif timesteps is not None and not ddim_use_original_steps: - subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 - timesteps = self.ddim_timesteps[:subset_end] - intermediates = {"x_inter": [img], "pred_x0": [img]} - - # TODO: Is this needed - if self.sampler is Sampler.PLMS: - time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) - else: - time_range = reversed(range(0, timesteps)) if ddim_use_original_steps else np.flip(timesteps) - total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] - print(f"Running {self.sampler.name} Sampling with {total_steps} timesteps") - iterator = tqdm(time_range, desc=f"{self.sampler.name} Sampler", total=total_steps) - old_eps = [] - for i, step in enumerate(iterator): - index = total_steps - i - 1 - ts = torch.full((b,), step, device=device, dtype=torch.long) - if self.sampler is Sampler.PLMS: - ts_next = torch.full( - (b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long, - ) - else: - old_eps = None - ts_next = None - if mask is not None: - assert x0 is not None - img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? - img = img_orig * mask + (1.0 - mask) * img - outs = self.p_sampling_fn( - img, - cond, - ts, - index=index, - use_original_steps=ddim_use_original_steps, - quantize_denoised=quantize_denoised, - temperature=temperature, - noise_dropout=noise_dropout, - score_corrector=score_corrector, - corrector_kwargs=corrector_kwargs, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - old_eps=old_eps, - t_next=ts_next, - ) - img, pred_x0 = outs[0], outs[1] - if self.sampler is Sampler.PLMS: - e_t = outs[2] - old_eps.append(e_t) - if len(old_eps) >= 4: - old_eps.pop(0) - if callback: - callback(i) - if img_callback: - img_callback(pred_x0, i) - if index % log_every_t == 0 or index == total_steps - 1: - intermediates["x_inter"].append(img) - intermediates["pred_x0"].append(pred_x0) - return img, intermediates - - def _get_model_output( - self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs, - ): - if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: - model_output = self.model.apply_model(x, t, c) - elif isinstance(c, dict): - ### Contolnet conditioning is dict format - model_t = self.model.apply_model(x, t, c) - model_uncond = self.model.apply_model(x, t, unconditional_conditioning) - model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t] * 2) - c_in = torch.cat([unconditional_conditioning, c]) - e_t_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - model_output = e_t_uncond + unconditional_guidance_scale * (model_t - e_t_uncond) - if self.model.parameterization == "v": - e_t = self.model.predict_eps_from_z_and_v(x, t, model_output) - else: - e_t = model_output - if score_corrector is not None: - assert self.model.parameterization == "eps" - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - return e_t, model_output - - def _get_x_prev_and_pred_x0( - self, - use_original_steps, - b, - index, - device, - x, - t, - model_output, - e_t, - quantize_denoised, - repeat_noise, - temperature, - noise_dropout, - ): - alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = ( - self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas - ) - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas - - # select parameters corresponding to the currently considered timestep - a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) - a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) - sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) - # current prediction for x_0 - if self.model.parameterization != "v": - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - else: - pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output) - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - # direction pointing to x_t - dir_xt = (1.0 - a_prev - sigma_t ** 2).sqrt() * e_t - noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.0: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev, pred_x0 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py deleted file mode 100644 index 2d6b121dced4..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""SAMPLING ONLY.""" - -import numpy as np -import torch -from tqdm import tqdm - -from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import extract_into_tensor -from nemo.collections.multimodal.parts.utils import randn_like - - -class DDIMSampler(AbstractBaseSampler): - def __init__(self, model, schedule="linear", **kwargs): - super().__init__(model, sampler=Sampler.DDIM, schedule="linear", **kwargs) - - @torch.no_grad() - def p_sampling_fn( - self, - x, - c, - t, - index, - repeat_noise=False, - use_original_steps=False, - quantize_denoised=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - unconditional_guidance_scale=1.0, - unconditional_conditioning=None, - old_eps=None, - t_next=None, - ): - b, *_, device = *x.shape, x.device - e_t, model_output = self._get_model_output( - x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs - ) - x_prev, pred_x0 = self._get_x_prev_and_pred_x0( - use_original_steps, - b, - index, - device, - x, - t, - model_output, - e_t, - quantize_denoised, - repeat_noise, - temperature, - noise_dropout, - ) - return x_prev, pred_x0 - - @torch.no_grad() - def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): - # fast, but does not allow for exact reconstruction - # t serves as an index to gather the correct alphas - if use_original_steps: - sqrt_alphas_cumprod = self.sqrt_alphas_cumprod - sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod - else: - sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas) - sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas - - if noise is None: - noise = randn_like(x0, generator=self.model.rng) - return ( - extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 - + extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise - ) - - @torch.no_grad() - def decode( - self, - x_latent, - cond, - t_start, - unconditional_guidance_scale=1.0, - unconditional_conditioning=None, - use_original_steps=False, - ): - - timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps - timesteps = timesteps[:t_start] - - time_range = np.flip(timesteps) - total_steps = timesteps.shape[0] - print(f"Running DDIM Sampling with {total_steps} timesteps") - - iterator = tqdm(time_range, desc='Decoding image', total=total_steps) - x_dec = x_latent - for i, step in enumerate(iterator): - index = total_steps - i - 1 - ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long) - x_dec, _ = self.p_sample_ddim( - x_dec, - cond, - ts, - index=index, - use_original_steps=use_original_steps, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=unconditional_conditioning, - ) - return x_dec diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py deleted file mode 100644 index b1b046a2c5db..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py +++ /dev/null @@ -1,493 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math - -import torch - -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import expand_dims, interpolate_fn - - -class NoiseScheduleVP: - def __init__( - self, schedule="discrete", betas=None, alphas_cumprod=None, continuous_beta_0=0.1, continuous_beta_1=20.0, - ): - """Create a wrapper class for the forward SDE.""" - - if schedule not in ["discrete", "linear", "cosine"]: - raise ValueError( - "Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format( - schedule - ) - ) - - self.schedule = schedule - if schedule == "discrete": - if betas is not None: - log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) - else: - assert alphas_cumprod is not None - log_alphas = 0.5 * torch.log(alphas_cumprod) - self.total_N = len(log_alphas) - self.T = 1.0 - self.t_array = torch.linspace(0.0, 1.0, self.total_N + 1)[1:].reshape((1, -1)) - self.log_alpha_array = log_alphas.reshape((1, -1,)) - else: - self.total_N = 1000 - self.beta_0 = continuous_beta_0 - self.beta_1 = continuous_beta_1 - self.cosine_s = 0.008 - self.cosine_beta_max = 999.0 - self.cosine_t_max = ( - math.atan(self.cosine_beta_max * (1.0 + self.cosine_s) / math.pi) - * 2.0 - * (1.0 + self.cosine_s) - / math.pi - - self.cosine_s - ) - self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1.0 + self.cosine_s) * math.pi / 2.0)) - self.schedule = schedule - if schedule == "cosine": - self.T = 0.9946 - else: - self.T = 1.0 - - def marginal_log_mean_coeff(self, t): - """ - Compute log(alpha_t) of a given continuous-time label t in [0, T]. - """ - if self.schedule == "discrete": - return interpolate_fn( - t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device), - ).reshape((-1)) - elif self.schedule == "linear": - return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 - elif self.schedule == "cosine": - - def log_alpha_fn(s): - return torch.log(torch.cos((s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0)) - - log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 - return log_alpha_t - - def marginal_alpha(self, t): - """ - Compute alpha_t of a given continuous-time label t in [0, T]. - """ - return torch.exp(self.marginal_log_mean_coeff(t)) - - def marginal_std(self, t): - """ - Compute sigma_t of a given continuous-time label t in [0, T]. - """ - return torch.sqrt(1.0 - torch.exp(2.0 * self.marginal_log_mean_coeff(t))) - - def marginal_lambda(self, t): - """ - Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. - """ - log_mean_coeff = self.marginal_log_mean_coeff(t) - log_std = 0.5 * torch.log(1.0 - torch.exp(2.0 * log_mean_coeff)) - return log_mean_coeff - log_std - - def inverse_lambda(self, lamb): - """ - Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. - """ - if self.schedule == "linear": - tmp = 2.0 * (self.beta_1 - self.beta_0) * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) - Delta = self.beta_0 ** 2 + tmp - return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) - elif self.schedule == "discrete": - log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2.0 * lamb) - t = interpolate_fn( - log_alpha.reshape((-1, 1)), - torch.flip(self.log_alpha_array.to(lamb.device), [1]), - torch.flip(self.t_array.to(lamb.device), [1]), - ) - return t.reshape((-1,)) - else: - log_alpha = -0.5 * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) - - def t_fn(log_alpha_t): - return ( - torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) - * 2.0 - * (1.0 + self.cosine_s) - / math.pi - - self.cosine_s - ) - - t = t_fn(log_alpha) - return t - - -def model_wrapper( - model, - noise_schedule, - model_type="noise", - model_kwargs={}, - guidance_type="uncond", - condition=None, - unconditional_condition=None, - guidance_scale=1.0, - classifier_fn=None, - classifier_kwargs={}, -): - """Create a wrapper function for the noise prediction model.""" - - def get_model_input_time(t_continuous): - if noise_schedule.schedule == "discrete": - return (t_continuous - 1.0 / noise_schedule.total_N) * 1000.0 - else: - return t_continuous - - def noise_pred_fn(x, t_continuous, cond=None): - if t_continuous.reshape((-1,)).shape[0] == 1: - t_continuous = t_continuous.expand((x.shape[0])) - t_input = get_model_input_time(t_continuous) - if cond is None: - output = model(x, t_input, **model_kwargs) - else: - output = model(x, t_input, cond, **model_kwargs) - if model_type == "noise": - return output - elif model_type == "x_start": - alpha_t, sigma_t = ( - noise_schedule.marginal_alpha(t_continuous), - noise_schedule.marginal_std(t_continuous), - ) - dims = x.dim() - return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) - elif model_type == "v": - alpha_t, sigma_t = ( - noise_schedule.marginal_alpha(t_continuous), - noise_schedule.marginal_std(t_continuous), - ) - dims = x.dim() - return expand_dims(alpha_t, dims) * output + expand_dims(sigma_t, dims) * x - - def cond_grad_fn(x, t_input): - """ - Compute the gradient of the classifier, i.e. nabla_{x} log p_t(cond | x_t). - """ - with torch.enable_grad(): - x_in = x.detach().requires_grad_(True) - log_prob = classifier_fn(x_in, t_input, condition, **classifier_kwargs) - return torch.autograd.grad(log_prob.sum(), x_in)[0] - - def model_fn(x, t_continuous): - """ - The noise predicition model function that is used for DPM-Solver. - """ - if t_continuous.reshape((-1,)).shape[0] == 1: - t_continuous = t_continuous.expand((x.shape[0])) - if guidance_type == "uncond": - return noise_pred_fn(x, t_continuous) - elif guidance_type == "classifier": - assert classifier_fn is not None - t_input = get_model_input_time(t_continuous) - cond_grad = cond_grad_fn(x, t_input) - sigma_t = noise_schedule.marginal_std(t_continuous) - noise = noise_pred_fn(x, t_continuous) - return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad - elif guidance_type == "classifier-free": - if guidance_scale == 1.0 or unconditional_condition is None: - return noise_pred_fn(x, t_continuous, cond=condition) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t_continuous] * 2) - c_in = torch.cat([unconditional_condition, condition]) - noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) - return noise_uncond + guidance_scale * (noise - noise_uncond) - - assert model_type in ["noise", "x_start", "v"] - assert guidance_type in ["uncond", "classifier", "classifier-free"] - return model_fn - - -class DPMSolver: - def __init__( - self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.0, - ): - """Construct a DPM-Solver.""" - self.model = model_fn - self.noise_schedule = noise_schedule - self.predict_x0 = predict_x0 - self.thresholding = thresholding - self.max_val = max_val - - def noise_prediction_fn(self, x, t): - """ - Return the noise prediction model. - """ - return self.model(x, t) - - def data_prediction_fn(self, x, t): - """ - Return the data prediction model (with thresholding). - """ - noise = self.noise_prediction_fn(x, t) - dims = x.dim() - alpha_t, sigma_t = ( - self.noise_schedule.marginal_alpha(t), - self.noise_schedule.marginal_std(t), - ) - x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims) - if self.thresholding: - p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. - s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) - s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) - x0 = torch.clamp(x0, -s, s) / s - return x0 - - def model_fn(self, x, t): - """ - Convert the model to the noise prediction model or the data prediction model. - """ - if self.predict_x0: - return self.data_prediction_fn(x, t) - else: - return self.noise_prediction_fn(x, t) - - def get_time_steps(self, skip_type, t_T, t_0, N, device): - """Compute the intermediate time steps for sampling.""" - if skip_type == "logSNR": - lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) - lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) - logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) - return self.noise_schedule.inverse_lambda(logSNR_steps) - elif skip_type == "time_uniform": - return torch.linspace(t_T, t_0, N + 1).to(device) - elif skip_type == "time_quadratic": - t_order = 2 - t = torch.linspace(t_T ** (1.0 / t_order), t_0 ** (1.0 / t_order), N + 1).pow(t_order).to(device) - return t - else: - raise ValueError( - "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type) - ) - - def denoise_to_zero_fn(self, x, s): - """ - Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization. - """ - return self.data_prediction_fn(x, s) - - def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=False): - """ - DPM-Solver-1 (equivalent to DDIM) from time `s` to time `t`. - """ - ns = self.noise_schedule - dims = x.dim() - lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t) - h = lambda_t - lambda_s - log_alpha_s, log_alpha_t = ( - ns.marginal_log_mean_coeff(s), - ns.marginal_log_mean_coeff(t), - ) - sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t) - alpha_t = torch.exp(log_alpha_t) - - if self.predict_x0: - phi_1 = torch.expm1(-h) - if model_s is None: - model_s = self.model_fn(x, s) - x_t = expand_dims(sigma_t / sigma_s, dims) * x - expand_dims(alpha_t * phi_1, dims) * model_s - if return_intermediate: - return x_t, {"model_s": model_s} - else: - return x_t - else: - phi_1 = torch.expm1(h) - if model_s is None: - model_s = self.model_fn(x, s) - x_t = ( - expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x - - expand_dims(sigma_t * phi_1, dims) * model_s - ) - if return_intermediate: - return x_t, {"model_s": model_s} - else: - return x_t - - def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): - """ - Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`. - """ - if solver_type not in ["dpm_solver", "taylor"]: - raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type)) - ns = self.noise_schedule - dims = x.dim() - model_prev_1, model_prev_0 = model_prev_list - t_prev_1, t_prev_0 = t_prev_list - lambda_prev_1, lambda_prev_0, lambda_t = ( - ns.marginal_lambda(t_prev_1), - ns.marginal_lambda(t_prev_0), - ns.marginal_lambda(t), - ) - log_alpha_prev_0, log_alpha_t = ( - ns.marginal_log_mean_coeff(t_prev_0), - ns.marginal_log_mean_coeff(t), - ) - sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) - alpha_t = torch.exp(log_alpha_t) - - h_0 = lambda_prev_0 - lambda_prev_1 - h = lambda_t - lambda_prev_0 - r0 = h_0 / h - D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) - if self.predict_x0: - if solver_type == "dpm_solver": - x_t = ( - expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 - - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * D1_0 - ) - elif solver_type == "taylor": - x_t = ( - expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 - + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1_0 - ) - else: - if solver_type == "dpm_solver": - x_t = ( - expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 - - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * D1_0 - ) - elif solver_type == "taylor": - x_t = ( - expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 - - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1_0 - ) - return x_t - - def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): - """ - Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`. - """ - ns = self.noise_schedule - dims = x.dim() - model_prev_2, model_prev_1, model_prev_0 = model_prev_list - t_prev_2, t_prev_1, t_prev_0 = t_prev_list - lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ( - ns.marginal_lambda(t_prev_2), - ns.marginal_lambda(t_prev_1), - ns.marginal_lambda(t_prev_0), - ns.marginal_lambda(t), - ) - log_alpha_prev_0, log_alpha_t = ( - ns.marginal_log_mean_coeff(t_prev_0), - ns.marginal_log_mean_coeff(t), - ) - sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) - alpha_t = torch.exp(log_alpha_t) - - h_1 = lambda_prev_1 - lambda_prev_2 - h_0 = lambda_prev_0 - lambda_prev_1 - h = lambda_t - lambda_prev_0 - r0, r1 = h_0 / h, h_1 / h - D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) - D1_1 = expand_dims(1.0 / r1, dims) * (model_prev_1 - model_prev_2) - D1 = D1_0 + expand_dims(r0 / (r0 + r1), dims) * (D1_0 - D1_1) - D2 = expand_dims(1.0 / (r0 + r1), dims) * (D1_0 - D1_1) - if self.predict_x0: - x_t = ( - expand_dims(sigma_t / sigma_prev_0, dims) * x - - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 - + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1 - - expand_dims(alpha_t * ((torch.exp(-h) - 1.0 + h) / h ** 2 - 0.5), dims) * D2 - ) - else: - x_t = ( - expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x - - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 - - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1 - - expand_dims(sigma_t * ((torch.exp(h) - 1.0 - h) / h ** 2 - 0.5), dims) * D2 - ) - return x_t - - def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type="dpm_solver"): - """ - Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`. - """ - if order == 1: - return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1]) - elif order == 2: - return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) - elif order == 3: - return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) - else: - raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order)) - - def sample( - self, - x, - steps=20, - t_start=None, - t_end=None, - order=3, - skip_type="time_uniform", - method="singlestep", - lower_order_final=True, - denoise_to_zero=False, - solver_type="dpm_solver", - atol=0.0078, - rtol=0.05, - ): - """ - Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`. - """ - t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end - t_T = self.noise_schedule.T if t_start is None else t_start - device = x.device - - if method == "multistep": - assert steps >= order - timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) - assert timesteps.shape[0] - 1 == steps - with torch.no_grad(): - vec_t = timesteps[0].expand((x.shape[0])) - model_prev_list = [self.model_fn(x, vec_t)] - t_prev_list = [vec_t] - # Init the first `order` values by lower order multistep DPM-Solver. - for init_order in range(1, order): - vec_t = timesteps[init_order].expand(x.shape[0]) - x = self.multistep_dpm_solver_update( - x, model_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type, - ) - model_prev_list.append(self.model_fn(x, vec_t)) - t_prev_list.append(vec_t) - # Compute the remaining values by `order`-th order multistep DPM-Solver. - for step in range(order, steps + 1): - vec_t = timesteps[step].expand(x.shape[0]) - if lower_order_final and steps < 15: - step_order = min(order, steps + 1 - step) - else: - step_order = order - x = self.multistep_dpm_solver_update( - x, model_prev_list, t_prev_list, vec_t, step_order, solver_type=solver_type, - ) - for i in range(order - 1): - t_prev_list[i] = t_prev_list[i + 1] - model_prev_list[i] = model_prev_list[i + 1] - t_prev_list[-1] = vec_t - # We do not need to evaluate the final model value. - if step < steps: - model_prev_list[-1] = self.model_fn(x, vec_t) - if denoise_to_zero: - x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0) - return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py deleted file mode 100644 index ac4f8f7ad73d..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py +++ /dev/null @@ -1,838 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math - -import torch -import torchsde -from scipy import integrate -from torch import nn -from torchdiffeq import odeint -from tqdm.auto import tqdm, trange - - -def append_zero(x): - return torch.cat([x, x.new_zeros([1])]) - - -def append_dims(x, target_dims): - """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" - dims_to_append = target_dims - x.ndim - if dims_to_append < 0: - raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') - return x[(...,) + (None,) * dims_to_append] - - -def get_sigmas_karras(n, sigma_min, sigma_max, rho=7.0, device='cpu'): - """Constructs the noise schedule of Karras et al. (2022).""" - ramp = torch.linspace(0, 1, n) - min_inv_rho = sigma_min ** (1 / rho) - max_inv_rho = sigma_max ** (1 / rho) - sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho - return append_zero(sigmas).to(device) - - -def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'): - """Constructs an exponential noise schedule.""" - sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), n, device=device).exp() - return append_zero(sigmas) - - -def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1.0, device='cpu'): - """Constructs an polynomial in log sigma noise schedule.""" - ramp = torch.linspace(1, 0, n, device=device) ** rho - sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min)) - return append_zero(sigmas) - - -def get_sigmas_vp(n, beta_d=19.9, beta_min=0.1, eps_s=1e-3, device='cpu'): - """Constructs a continuous VP noise schedule.""" - t = torch.linspace(1, eps_s, n, device=device) - sigmas = torch.sqrt(torch.exp(beta_d * t ** 2 / 2 + beta_min * t) - 1) - return append_zero(sigmas) - - -def to_d(x, sigma, denoised): - """Converts a denoiser output to a Karras ODE derivative.""" - return (x - denoised) / append_dims(sigma, x.ndim) - - -def get_ancestral_step(sigma_from, sigma_to, eta=1.0): - """Calculates the noise level (sigma_down) to step down to and the amount - of noise to add (sigma_up) when doing an ancestral sampling step.""" - if not eta: - return sigma_to, 0.0 - sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5) - sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 - return sigma_down, sigma_up - - -def default_noise_sampler(x): - return lambda sigma, sigma_next: torch.randn_like(x) - - -class BatchedBrownianTree: - """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" - - def __init__(self, x, t0, t1, seed=None, **kwargs): - t0, t1, self.sign = self.sort(t0, t1) - w0 = kwargs.get('w0', torch.zeros_like(x)) - if seed is None: - seed = torch.randint(0, 2 ** 63 - 1, []).item() - self.batched = True - try: - assert len(seed) == x.shape[0] - w0 = w0[0] - except TypeError: - seed = [seed] - self.batched = False - self.trees = [torchsde.BrownianTree(t0, w0, t1, entropy=s, **kwargs) for s in seed] - - @staticmethod - def sort(a, b): - return (a, b, 1) if a < b else (b, a, -1) - - def __call__(self, t0, t1): - t0, t1, sign = self.sort(t0, t1) - w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign) - return w if self.batched else w[0] - - -class BrownianTreeNoiseSampler: - """A noise sampler backed by a torchsde.BrownianTree. - - Args: - x (Tensor): The tensor whose shape, device and dtype to use to generate - random samples. - sigma_min (float): The low end of the valid interval. - sigma_max (float): The high end of the valid interval. - seed (int or List[int]): The random seed. If a list of seeds is - supplied instead of a single integer, then the noise sampler will - use one BrownianTree per batch item, each with its own seed. - transform (callable): A function that maps sigma to the sampler's - internal timestep. - """ - - def __init__(self, x, sigma_min, sigma_max, seed=None, transform=lambda x: x): - self.transform = transform - t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max)) - self.tree = BatchedBrownianTree(x, t0, t1, seed) - - def __call__(self, sigma, sigma_next): - t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next)) - return self.tree(t0, t1) / (t1 - t0).abs().sqrt() - - -@torch.no_grad() -def sample_euler( - model, - x, - sigmas, - extra_args=None, - callback=None, - disable=None, - s_churn=0.0, - s_tmin=0.0, - s_tmax=float('inf'), - s_noise=1.0, -): - """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 - eps = torch.randn_like(x) * s_noise - sigma_hat = sigmas[i] * (gamma + 1) - if gamma > 0: - x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 - denoised = model(x, sigma_hat * s_in, **extra_args) - d = to_d(x, sigma_hat, denoised) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) - dt = sigmas[i + 1] - sigma_hat - # Euler method - x = x + d * dt - return x - - -@torch.no_grad() -def sample_euler_ancestral( - model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None -): - """Ancestral sampling with Euler method steps.""" - extra_args = {} if extra_args is None else extra_args - noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler - s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - d = to_d(x, sigmas[i], denoised) - # Euler method - dt = sigma_down - sigmas[i] - x = x + d * dt - if sigmas[i + 1] > 0: - x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up - return x - - -@torch.no_grad() -def sample_heun( - model, - x, - sigmas, - extra_args=None, - callback=None, - disable=None, - s_churn=0.0, - s_tmin=0.0, - s_tmax=float('inf'), - s_noise=1.0, -): - """Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 - eps = torch.randn_like(x) * s_noise - sigma_hat = sigmas[i] * (gamma + 1) - if gamma > 0: - x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 - denoised = model(x, sigma_hat * s_in, **extra_args) - d = to_d(x, sigma_hat, denoised) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) - dt = sigmas[i + 1] - sigma_hat - if sigmas[i + 1] == 0: - # Euler method - x = x + d * dt - else: - # Heun's method - x_2 = x + d * dt - denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) - d_2 = to_d(x_2, sigmas[i + 1], denoised_2) - d_prime = (d + d_2) / 2 - x = x + d_prime * dt - return x - - -@torch.no_grad() -def sample_dpm_2( - model, - x, - sigmas, - extra_args=None, - callback=None, - disable=None, - s_churn=0.0, - s_tmin=0.0, - s_tmax=float('inf'), - s_noise=1.0, -): - """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): - gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 - eps = torch.randn_like(x) * s_noise - sigma_hat = sigmas[i] * (gamma + 1) - if gamma > 0: - x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 - denoised = model(x, sigma_hat * s_in, **extra_args) - d = to_d(x, sigma_hat, denoised) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) - if sigmas[i + 1] == 0: - # Euler method - dt = sigmas[i + 1] - sigma_hat - x = x + d * dt - else: - # DPM-Solver-2 - sigma_mid = sigma_hat.log().lerp(sigmas[i + 1].log(), 0.5).exp() - dt_1 = sigma_mid - sigma_hat - dt_2 = sigmas[i + 1] - sigma_hat - x_2 = x + d * dt_1 - denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) - d_2 = to_d(x_2, sigma_mid, denoised_2) - x = x + d_2 * dt_2 - return x - - -@torch.no_grad() -def sample_dpm_2_ancestral( - model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None -): - """Ancestral sampling with DPM-Solver second-order steps.""" - extra_args = {} if extra_args is None else extra_args - noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler - s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - d = to_d(x, sigmas[i], denoised) - if sigma_down == 0: - # Euler method - dt = sigma_down - sigmas[i] - x = x + d * dt - else: - # DPM-Solver-2 - sigma_mid = sigmas[i].log().lerp(sigma_down.log(), 0.5).exp() - dt_1 = sigma_mid - sigmas[i] - dt_2 = sigma_down - sigmas[i] - x_2 = x + d * dt_1 - denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) - d_2 = to_d(x_2, sigma_mid, denoised_2) - x = x + d_2 * dt_2 - x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up - return x - - -def linear_multistep_coeff(order, t, i, j): - if order - 1 > i: - raise ValueError(f'Order {order} too high for step {i}') - - def fn(tau): - prod = 1.0 - for k in range(order): - if j == k: - continue - prod *= (tau - t[i - k]) / (t[i - j] - t[i - k]) - return prod - - return integrate.quad(fn, t[i], t[i + 1], epsrel=1e-4)[0] - - -@torch.no_grad() -def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, order=4): - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - sigmas_cpu = sigmas.detach().cpu().numpy() - ds = [] - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - d = to_d(x, sigmas[i], denoised) - ds.append(d) - if len(ds) > order: - ds.pop(0) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - cur_order = min(i + 1, order) - coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)] - x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds))) - return x - - -@torch.no_grad() -def log_likelihood(model, x, sigma_min, sigma_max, extra_args=None, atol=1e-4, rtol=1e-4): - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - v = torch.randint_like(x, 2) * 2 - 1 - fevals = 0 - - def ode_fn(sigma, x): - nonlocal fevals - with torch.enable_grad(): - x = x[0].detach().requires_grad_() - denoised = model(x, sigma * s_in, **extra_args) - d = to_d(x, sigma, denoised) - fevals += 1 - grad = torch.autograd.grad((d * v).sum(), x)[0] - d_ll = (v * grad).flatten(1).sum(1) - return d.detach(), d_ll - - x_min = x, x.new_zeros([x.shape[0]]) - t = x.new_tensor([sigma_min, sigma_max]) - sol = odeint(ode_fn, x_min, t, atol=atol, rtol=rtol, method='dopri5') - latent, delta_ll = sol[0][-1], sol[1][-1] - ll_prior = torch.distributions.Normal(0, sigma_max).log_prob(latent).flatten(1).sum(1) - return ll_prior + delta_ll, {'fevals': fevals} - - -class PIDStepSizeController: - """A PID controller for ODE adaptive step size control.""" - - def __init__(self, h, pcoeff, icoeff, dcoeff, order=1, accept_safety=0.81, eps=1e-8): - self.h = h - self.b1 = (pcoeff + icoeff + dcoeff) / order - self.b2 = -(pcoeff + 2 * dcoeff) / order - self.b3 = dcoeff / order - self.accept_safety = accept_safety - self.eps = eps - self.errs = [] - - def limiter(self, x): - return 1 + math.atan(x - 1) - - def propose_step(self, error): - inv_error = 1 / (float(error) + self.eps) - if not self.errs: - self.errs = [inv_error, inv_error, inv_error] - self.errs[0] = inv_error - factor = self.errs[0] ** self.b1 * self.errs[1] ** self.b2 * self.errs[2] ** self.b3 - factor = self.limiter(factor) - accept = factor >= self.accept_safety - if accept: - self.errs[2] = self.errs[1] - self.errs[1] = self.errs[0] - self.h *= factor - return accept - - -class DPMSolver(nn.Module): - """DPM-Solver. See https://arxiv.org/abs/2206.00927.""" - - def __init__(self, model, extra_args=None, eps_callback=None, info_callback=None): - super().__init__() - self.model = model - self.extra_args = {} if extra_args is None else extra_args - self.eps_callback = eps_callback - self.info_callback = info_callback - - def t(self, sigma): - return -sigma.log() - - def sigma(self, t): - return t.neg().exp() - - def eps(self, eps_cache, key, x, t, *args, **kwargs): - if key in eps_cache: - return eps_cache[key], eps_cache - sigma = self.sigma(t) * x.new_ones([x.shape[0]]) - eps = (x - self.model(x, sigma, *args, **self.extra_args, **kwargs)) / self.sigma(t) - if self.eps_callback is not None: - self.eps_callback() - return eps, {key: eps, **eps_cache} - - def dpm_solver_1_step(self, x, t, t_next, eps_cache=None): - eps_cache = {} if eps_cache is None else eps_cache - h = t_next - t - eps, eps_cache = self.eps(eps_cache, 'eps', x, t) - x_1 = x - self.sigma(t_next) * h.expm1() * eps - return x_1, eps_cache - - def dpm_solver_2_step(self, x, t, t_next, r1=1 / 2, eps_cache=None): - eps_cache = {} if eps_cache is None else eps_cache - h = t_next - t - eps, eps_cache = self.eps(eps_cache, 'eps', x, t) - s1 = t + r1 * h - u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps - eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) - x_2 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / (2 * r1) * h.expm1() * (eps_r1 - eps) - return x_2, eps_cache - - def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None): - eps_cache = {} if eps_cache is None else eps_cache - h = t_next - t - eps, eps_cache = self.eps(eps_cache, 'eps', x, t) - s1 = t + r1 * h - s2 = t + r2 * h - u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps - eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) - u2 = ( - x - - self.sigma(s2) * (r2 * h).expm1() * eps - - self.sigma(s2) * (r2 / r1) * ((r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) - ) - eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2) - x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps) - return x_3, eps_cache - - def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0.0, s_noise=1.0, noise_sampler=None): - noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler - if not t_end > t_start and eta: - raise ValueError('eta must be 0 for reverse sampling') - - m = math.floor(nfe / 3) + 1 - ts = torch.linspace(t_start, t_end, m + 1, device=x.device) - - if nfe % 3 == 0: - orders = [3] * (m - 2) + [2, 1] - else: - orders = [3] * (m - 1) + [nfe % 3] - - for i in range(len(orders)): - eps_cache = {} - t, t_next = ts[i], ts[i + 1] - if eta: - sd, su = get_ancestral_step(self.sigma(t), self.sigma(t_next), eta) - t_next_ = torch.minimum(t_end, self.t(sd)) - su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5 - else: - t_next_, su = t_next, 0.0 - - eps, eps_cache = self.eps(eps_cache, 'eps', x, t) - denoised = x - self.sigma(t) * eps - if self.info_callback is not None: - self.info_callback({'x': x, 'i': i, 't': ts[i], 't_up': t, 'denoised': denoised}) - - if orders[i] == 1: - x, eps_cache = self.dpm_solver_1_step(x, t, t_next_, eps_cache=eps_cache) - elif orders[i] == 2: - x, eps_cache = self.dpm_solver_2_step(x, t, t_next_, eps_cache=eps_cache) - else: - x, eps_cache = self.dpm_solver_3_step(x, t, t_next_, eps_cache=eps_cache) - - x = x + su * s_noise * noise_sampler(self.sigma(t), self.sigma(t_next)) - - return x - - def dpm_solver_adaptive( - self, - x, - t_start, - t_end, - order=3, - rtol=0.05, - atol=0.0078, - h_init=0.05, - pcoeff=0.0, - icoeff=1.0, - dcoeff=0.0, - accept_safety=0.81, - eta=0.0, - s_noise=1.0, - noise_sampler=None, - ): - noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler - if order not in {2, 3}: - raise ValueError('order should be 2 or 3') - forward = t_end > t_start - if not forward and eta: - raise ValueError('eta must be 0 for reverse sampling') - h_init = abs(h_init) * (1 if forward else -1) - atol = torch.tensor(atol) - rtol = torch.tensor(rtol) - s = t_start - x_prev = x - accept = True - pid = PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) - info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} - - while s < t_end - 1e-5 if forward else s > t_end + 1e-5: - eps_cache = {} - t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) - if eta: - sd, su = get_ancestral_step(self.sigma(s), self.sigma(t), eta) - t_ = torch.minimum(t_end, self.t(sd)) - su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 - else: - t_, su = t, 0.0 - - eps, eps_cache = self.eps(eps_cache, 'eps', x, s) - denoised = x - self.sigma(s) * eps - - if order == 2: - x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) - else: - x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) - delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) - error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 - accept = pid.propose_step(error) - if accept: - x_prev = x_low - x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) - s = t - info['n_accept'] += 1 - else: - info['n_reject'] += 1 - info['nfe'] += order - info['steps'] += 1 - - if self.info_callback is not None: - self.info_callback( - { - 'x': x, - 'i': info['steps'] - 1, - 't': s, - 't_up': s, - 'denoised': denoised, - 'error': error, - 'h': pid.h, - **info, - } - ) - - return x, info - - -@torch.no_grad() -def sample_dpm_fast( - model, - x, - sigma_min, - sigma_max, - n, - extra_args=None, - callback=None, - disable=None, - eta=0.0, - s_noise=1.0, - noise_sampler=None, -): - """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(total=n, disable=disable) as pbar: - dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback( - {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} - ) - return dpm_solver.dpm_solver_fast( - x, - dpm_solver.t(torch.tensor(sigma_max)), - dpm_solver.t(torch.tensor(sigma_min)), - n, - eta, - s_noise, - noise_sampler, - ) - - -@torch.no_grad() -def sample_dpm_adaptive( - model, - x, - sigma_min, - sigma_max, - extra_args=None, - callback=None, - disable=None, - order=3, - rtol=0.05, - atol=0.0078, - h_init=0.05, - pcoeff=0.0, - icoeff=1.0, - dcoeff=0.0, - accept_safety=0.81, - eta=0.0, - s_noise=1.0, - noise_sampler=None, - return_info=False, -): - """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(disable=disable) as pbar: - dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback( - {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} - ) - x, info = dpm_solver.dpm_solver_adaptive( - x, - dpm_solver.t(torch.tensor(sigma_max)), - dpm_solver.t(torch.tensor(sigma_min)), - order, - rtol, - atol, - h_init, - pcoeff, - icoeff, - dcoeff, - accept_safety, - eta, - s_noise, - noise_sampler, - ) - if return_info: - return x, info - return x - - -@torch.no_grad() -def sample_dpmpp_2s_ancestral( - model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None -): - """Ancestral sampling with DPM-Solver++(2S) second-order steps.""" - extra_args = {} if extra_args is None else extra_args - noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler - s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.log().neg() - - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - if sigma_down == 0: - # Euler method - d = to_d(x, sigmas[i], denoised) - dt = sigma_down - sigmas[i] - x = x + d * dt - else: - # DPM-Solver++(2S) - t, t_next = t_fn(sigmas[i]), t_fn(sigma_down) - r = 1 / 2 - h = t_next - t - s = t + r * h - x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised - denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) - x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_2 - # Noise addition - if sigmas[i + 1] > 0: - x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up - return x - - -@torch.no_grad() -def sample_dpmpp_sde( - model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None, r=1 / 2 -): - """DPM-Solver++ (stochastic).""" - sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() - noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.log().neg() - - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - if sigmas[i + 1] == 0: - # Euler method - d = to_d(x, sigmas[i], denoised) - dt = sigmas[i + 1] - sigmas[i] - x = x + d * dt - else: - # DPM-Solver++ - t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) - h = t_next - t - s = t + h * r - fac = 1 / (2 * r) - - # Step 1 - sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta) - s_ = t_fn(sd) - x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised - x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su - denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) - - # Step 2 - sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta) - t_next_ = t_fn(sd) - denoised_d = (1 - fac) * denoised + fac * denoised_2 - x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d - x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su - return x - - -@torch.no_grad() -def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): - """DPM-Solver++(2M).""" - extra_args = {} if extra_args is None else extra_args - s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.log().neg() - old_denoised = None - - for i in trange(len(sigmas) - 1, disable=disable): - denoised = model(x, sigmas[i] * s_in, **extra_args) - if callback is not None: - callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) - t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) - h = t_next - t - if old_denoised is None or sigmas[i + 1] == 0: - x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised - else: - h_last = t - t_fn(sigmas[i - 1]) - r = h_last / h - denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised - x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d - old_denoised = denoised - return x - - -class DiscreteSchedule(nn.Module): - """A mapping between continuous noise levels (sigmas) and a list of discrete noise - levels.""" - - def __init__(self, sigmas, quantize): - super().__init__() - self.register_buffer('sigmas', sigmas) - self.register_buffer('log_sigmas', sigmas.log()) - self.quantize = quantize - - @property - def sigma_min(self): - return self.sigmas[0] - - @property - def sigma_max(self): - return self.sigmas[-1] - - def get_sigmas(self, n=None): - if n is None: - return append_zero(self.sigmas.flip(0)) - t_max = len(self.sigmas) - 1 - t = torch.linspace(t_max, 0, n, device=self.sigmas.device) - return append_zero(self.t_to_sigma(t)) - - def sigma_to_t(self, sigma, quantize=None): - quantize = self.quantize if quantize is None else quantize - log_sigma = sigma.log() - dists = log_sigma - self.log_sigmas[:, None] - if quantize: - return dists.abs().argmin(dim=0).view(sigma.shape) - low_idx = dists.ge(0).cumsum(dim=0).argmax(dim=0).clamp(max=self.log_sigmas.shape[0] - 2) - high_idx = low_idx + 1 - low, high = self.log_sigmas[low_idx], self.log_sigmas[high_idx] - w = (low - log_sigma) / (low - high) - w = w.clamp(0, 1) - t = (1 - w) * low_idx + w * high_idx - return t.view(sigma.shape) - - def t_to_sigma(self, t): - t = t.float() - low_idx, high_idx, w = t.floor().long(), t.ceil().long(), t.frac() - log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] - return log_sigma.exp() - - -class DiscreteEpsDDPMDenoiser(DiscreteSchedule): - """A wrapper for discrete schedule DDPM models that output eps (the predicted - noise).""" - - def __init__(self, model, quantize=False): - alphas_cumprod = model.alphas_cumprod - super().__init__(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, quantize) - self.inner_model = model - self.sigma_data = 1.0 - - def get_scalings(self, sigma): - c_out = -sigma - c_in = 1 / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 - return c_out, c_in - - def get_eps(self, *args, **kwargs): - return self.inner_model.apply_model(*args, **kwargs) - - def loss(self, input, noise, sigma, **kwargs): - c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] - noised_input = input + noise * append_dims(sigma, input.ndim) - eps = self.get_eps(noised_input * c_in, self.sigma_to_t(sigma), **kwargs) - return (eps - noise).pow(2).flatten(1).mean(1) - - def forward(self, input, sigma, **kwargs): - c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] - eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs) - return input + eps * c_out diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py deleted file mode 100644 index af5988f79502..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any, Tuple - -import numpy as np -import torch -import torch.nn as nn -from tqdm import tqdm - -from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import noise_like - - -class ParaDDIMSampler(AbstractBaseSampler): - """ Parallel version of DDIM sampler. Utilizes Parallel Sampling (https://arxiv.org/abs/2305.16317). - It reduces the latency of a model, but the total compute cost is increased. - - The main three parameters that affect the performance of the algorithm are: - Parallelism (int): Defines the maximal size of the window. That many diffusion steps can happen in - parallel. - Tolerance (float): Sets the maximal error tolerance defined as a ratio between drift of the trajectory - and noise. The larger the tolerance the faster the method is. The smaller the tolerance the better - quality output is achieved. - Number of GPUs (int): Number of GPUs utilizing DataParallel parallelism to compute diffusion steps in - parallel. - - Different combination of these parameters values can result in different latency-quality-compute trade-off. - For more details please refer to the Parallel Sampling paper (https://arxiv.org/abs/2305.16317). - """ - - def __init__(self, model, **kwargs): - super().__init__(model, sampler=Sampler.PARA_DDIM, **kwargs) - - @torch.no_grad() - def p_sampling_fn(self): - pass - - @torch.no_grad() - def para_ddim_sampling_fn( - self, - cond: torch.tensor, - batch_size: int, - per_latent_shape: Tuple[int, ...], - x_T: torch.tensor = None, - steps: int = 50, - parallelism: int = 8, - tolerance: float = 0.1, - temperature: float = 0.0, - noise_dropout: float = 0.0, - quantize_denoised: bool = False, - unconditional_guidance_scale: float = 1.0, - unconditional_conditioning: torch.tensor = None, - score_corrector=None, - corrector_kwargs=None, - ): - print( - f"Running {self.sampler.name} with {steps} timesteps, " - f"parallelism={parallelism}, " - f"and tolerance={tolerance}" - ) - - device = self.model.betas.device - size = (batch_size, *per_latent_shape) - x_T = torch.randn(size, generator=self.model.rng, device=device) if x_T is None else x_T - time_range = np.flip(self.ddim_timesteps).copy() # Make a copy to resolve issue with negative strides - - # Processing window of timesteps [window_start, window_end) in parallel - window_start = 0 - window_size = min(parallelism, steps) - window_end = window_size - - # Store the whole trajectory in memory; it will be iteratively improved - latents = torch.stack([x_T] * (steps + 1)) - - # Pre-computing noises to ensure noise is sampled once per diffusion step - noises = torch.zeros_like(latents) - for i in range(steps - 1, -1, -1): - gaussian_noise = torch.randn_like(x_T) - noise = (self.ddim_variance[i] ** 0.5) * gaussian_noise - noises[i] = noise.clone() - - # Store inverse of the variance to avoid division at every iteration - variance = [self.ddim_variance[i] for i in range(steps - 1, -1, -1)] + [0] - inverse_variance = 1.0 / torch.tensor(variance).to(noises.device) - latent_dim = noises[0, 0].numel() - inverse_variance_norm = inverse_variance[:, None] / latent_dim - - scaled_tolerance = tolerance ** 2 - - with tqdm(total=steps) as progress_bar: - while window_start < steps: - window_size = window_end - window_start - - # Prepare the input to the model. Model will perform window_size noise predictions in parallel - window_cond = torch.stack([cond] * window_size) - window_uncond_cond = torch.stack([unconditional_conditioning] * window_size) - window_latents = latents[window_start:window_end] - window_timesteps = torch.tensor(time_range[window_start:window_end], device=device).repeat( - 1, batch_size - ) - - # Reshape (w, b, ...) -> (w * b, ...) - latents_input = window_latents.flatten(0, 1) - timesteps_input = window_timesteps.flatten(0, 1) - cond_input = window_cond.flatten(0, 1) - uncond_cond_input = window_uncond_cond.flatten(0, 1) - - # Model call - e_t, _ = self._get_model_output( - latents_input, - timesteps_input, - uncond_cond_input, - unconditional_guidance_scale, - score_corrector, - cond_input, - corrector_kwargs, - ) - # Reshape back (w * b, ...) -> (w, b, ...) - e_t = e_t.reshape(window_size, batch_size, *per_latent_shape) - - # Perform Picard iteration - window_latents_picard_iteration = self._get_x_prev( - batch_size=batch_size, - steps=steps, - x=window_latents, - e_t=e_t, - temperature=temperature, - noise_dropout=noise_dropout, - quantize_denoised=quantize_denoised, - window_start=window_start, - window_end=window_end, - device=device, - ).reshape(window_latents.shape) - - # Calculate cumulative drift - delta = window_latents_picard_iteration - window_latents - delta_cum = torch.cumsum(delta, dim=0) - block_latents_new = latents[window_start][None,] + delta_cum - - # Calculate the error - error = torch.linalg.norm( - (block_latents_new - latents[window_start + 1 : window_end + 1]).reshape( - window_size, batch_size, -1 - ), - dim=-1, - ).pow(2) - - # Calculate error magnitude - error_magnitude = error * inverse_variance_norm[window_start + 1 : window_end + 1] - # Pad so at least one value exceeds tolerance - error_magnitude = nn.functional.pad(error_magnitude, (0, 0, 0, 1), value=1e9) - error_exceeding = torch.max(error_magnitude > scaled_tolerance, dim=1).values.int() - - # Find how many diffusion steps have error below given threshold tolerance and shift the window - ind = torch.argmax(error_exceeding).item() - new_window_start = window_start + min(1 + ind, window_size) - new_window_end = min(new_window_start + window_size, steps) - - # Update the trajectory - latents[window_start + 1 : window_end + 1] = block_latents_new - latents[window_end : new_window_end + 1] = latents[window_end][ - None, - ] - - progress_bar.update(new_window_start - window_start) - window_start = new_window_start - window_end = new_window_end - - intermediates = {"x_inter": [latents[i] for i in range(steps)]} - return latents[-1], intermediates - - def _get_x_prev( - self, - batch_size: int, - steps: int, - x: torch.tensor, - e_t: torch.tensor, - temperature: float, - noise_dropout: float, - quantize_denoised: bool, - window_start: int, - window_end: int, - device: Any, - ): - alphas = self.ddim_alphas - alphas_prev = self.ddim_alphas_prev - sqrt_one_minus_alphas = self.ddim_sqrt_one_minus_alphas - sigmas = self.ddim_sigmas - window_size = window_end - window_start - - def prepare_tensor(x): - x = torch.tensor(x, device=device).flip(dims=[0]) - x = x.unsqueeze(1).repeat(1, batch_size).reshape(window_size, batch_size, 1, 1, 1) - return x - - # Select parameters corresponding to the currently considered timesteps. Note that index_end < index_start, - # because during diffusion the time is reversed (we go from timestep step to 0) - index_start = steps - window_start - index_end = steps - window_end - a_t = prepare_tensor(alphas[index_end:index_start]) - a_prev = prepare_tensor(alphas_prev[index_end:index_start]) - sigma_t = prepare_tensor(sigmas[index_end:index_start]) - sqrt_one_minus_at = prepare_tensor(sqrt_one_minus_alphas[index_end:index_start]) - - # Current prediction for x_0 - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - - # Direction pointing to x_t - dir_xt = (1.0 - a_prev - sigma_t ** 2).sqrt() * e_t - - noise = sigma_t * noise_like(x.shape, device) * temperature - if noise_dropout > 0.0: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py deleted file mode 100644 index 1602ec6245d4..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""SAMPLING ONLY.""" - -import torch - -from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler - - -class PLMSSampler(AbstractBaseSampler): - def __init__(self, model, schedule="linear", **kwargs): - super().__init__(model, sampler=Sampler.PLMS, schedule="linear", **kwargs) - - def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False): - if ddim_eta != 0: - raise ValueError('ddim_eta must be 0 for PLMS') - super().make_schedule(ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False) - - @torch.no_grad() - def p_sampling_fn( - self, - x, - c, - t, - index, - repeat_noise=False, - use_original_steps=False, - quantize_denoised=False, - temperature=1.0, - noise_dropout=0.0, - score_corrector=None, - corrector_kwargs=None, - unconditional_guidance_scale=1.0, - unconditional_conditioning=None, - old_eps=None, - t_next=None, - ): - b, *_, device = *x.shape, x.device - e_t, model_output = self._get_model_output( - x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs - ) - if len(old_eps) == 0: - # Pseudo Improved Euler (2nd order) - x_prev, pred_x0 = self._get_x_prev_and_pred_x0( - use_original_steps, - b, - index, - device, - x, - t, - model_output, - e_t, - quantize_denoised, - repeat_noise, - temperature, - noise_dropout, - ) - e_t_next, model_output = self._get_model_output( - x_prev, - t_next, - unconditional_conditioning, - unconditional_guidance_scale, - score_corrector, - c, - corrector_kwargs, - ) - e_t_prime = (e_t + e_t_next) / 2 - elif len(old_eps) == 1: - # 2nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (3 * e_t - old_eps[-1]) / 2 - elif len(old_eps) == 2: - # 3nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 - elif len(old_eps) >= 3: - # 4nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 - - x_prev, pred_x0 = self._get_x_prev_and_pred_x0( - use_original_steps, - b, - index, - device, - x, - t, - model_output, - e_t_prime, - quantize_denoised, - repeat_noise, - temperature, - noise_dropout, - ) - - return x_prev, pred_x0, e_t diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py deleted file mode 100644 index 1da34e16508b..000000000000 --- a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""SAMPLING ONLY.""" - -import torch - -from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler - -from .dpmsolver import DPMSolver, NoiseScheduleVP, model_wrapper - -MODEL_TYPES = {"eps": "noise", "v": "v"} - - -class DPMSolverSampler(AbstractBaseSampler): - def __init__(self, model, **kwargs): - - super().__init__(model, sampler=Sampler.DPM, **kwargs) - - def to_torch(x, model): - x_copy = x.clone() - x_detached = x_copy.detach() - x_float32 = x_detached.to(torch.float32) - x_device = x_float32.to(model.betas.device) - return x_device - - self.register_buffer("alphas_cumprod", to_torch(model.alphas_cumprod, model)) - - @torch.no_grad() - def p_sampling_fn(self): - pass - - @torch.no_grad() - def dpm_sampling_fn( - self, - shape, - steps, - conditioning=None, - unconditional_conditioning=None, - unconditional_guidance_scale=1.0, - x_T=None, - ): - - device = self.model.betas.device - if x_T is None: - img = torch.randn(shape, generator=self.model.rng, device=device) - else: - img = x_T - - ns = NoiseScheduleVP("discrete", alphas_cumprod=self.alphas_cumprod) - - model_fn = model_wrapper( - lambda x, t, c: self.model.apply_model(x, t, c), - ns, - model_type=MODEL_TYPES[self.model.parameterization], - guidance_type="classifier-free", - condition=conditioning, - unconditional_condition=unconditional_conditioning, - guidance_scale=unconditional_guidance_scale, - ) - dpm_solver = DPMSolver(model_fn, ns, predict_x0=True, thresholding=False) - x = dpm_solver.sample( - img, steps=steps, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True, - ) - - return x.to(device), None diff --git a/nemo/collections/multimodal/modules/__init__.py b/nemo/collections/multimodal/modules/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/modules/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py deleted file mode 100644 index de301e0bc038..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Adapted from: -https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py -""" -import math - -import numpy as np -import torch -import torch.nn as nn -from torch.cuda.amp import custom_bwd, custom_fwd - - -def count_flops_attn(model, _x, y): - """ - A counter for the `thop` package to count the operations in an - attention operation. - Meant to be used like: - macs, params = thop.profile( - model, - inputs=(inputs, timestamps), - custom_ops={QKVAttention: QKVAttention.count_flops}, - ) - """ - b, c, *spatial = y[0].shape - num_spatial = int(np.prod(spatial)) - # We perform two matmuls with the same number of ops. - # The first computes the weight matrix, the second computes - # the combination of the value vectors. - matmul_ops = 2 * b * (num_spatial ** 2) * c - model.total_ops += torch.DoubleTensor([matmul_ops]) - - -# Stable attention -class StableAttentionOp(torch.autograd.Function): - # This function defines the attention weight computation in a stable way - # The idea is to scale the gradients of weight matrix by the maximum absolute value. - # In case of overflow, this will prevent weight gradients from exploding. - # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. - - @staticmethod - def forward(ctx, q, k): - w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) - ctx.save_for_backward(q, k, w) - return w - - @staticmethod - def backward(ctx, dw): - q, k, w = ctx.saved_tensors - - s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) - dw = dw / s - - # Due to softmax, w is fp32, making db fp32. - # Type casting is required for amp to work. - db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) - s = s / math.sqrt(k.shape[1]) - - dq = torch.einsum('nck,nqk->ncq', k, db) * s - dk = torch.einsum('ncq,nqk->nck', q, db) * s - - return dq, dk - - -class QKVStableAttention(nn.Module): - """ - A module which performs QKV attention and splits in a different order. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - - :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.chunk(3, dim=1) - - # Reshaping q and k - # try: - # q = q.view(bs * self.n_heads, ch, length) - # k = k.view(bs * self.n_heads, ch, length) - # except Exception: - q = q.reshape(bs * self.n_heads, ch, length) - k = k.reshape(bs * self.n_heads, ch, length) - - weight = StableAttentionOp.apply(q, k) - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) - return a.reshape(bs, -1, length), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class QKVAttention(nn.Module): - """ - A module which performs QKV attention and splits in a different order. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - - :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.chunk(3, dim=1) - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = torch.einsum( - "bct,bcs->bts", - (q * scale).view(bs * self.n_heads, ch, length), - (k * scale).view(bs * self.n_heads, ch, length), - ) # More stable with f16 than dividing afterwards - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) - return a.reshape(bs, -1, length), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class StableMaskedAttentionOp(torch.autograd.Function): - # Robust attention operation in case of masked attention - @staticmethod - @custom_fwd - def forward(ctx, q, k, mask): - max_neg_value = -float('inf') - w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) - w = w.masked_fill(mask, max_neg_value) - w = w.softmax(dim=2) - - # When we use an arbitrary mask, there is a possibility that we get nans in softmax. - # In this case, use nan_to_num to make it a stable number. - w = w.nan_to_num_() - ctx.save_for_backward(q, k, w, mask) - return w - - @staticmethod - @custom_bwd - def backward(ctx, dw): - q, k, w, mask = ctx.saved_tensors - max_neg_value = -torch.finfo(q.dtype).max - s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) - dw = dw / s - db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) - - # Masking db - db_in = db.clone().masked_fill_(mask, 0) - - s = s / math.sqrt(k.shape[1]) - dq = torch.einsum('nck,nqk->ncq', k, db_in) * s - dk = torch.einsum('ncq,nqk->nck', q, db_in) * s - - # These are dummy derivatives since mask is a constant - dmask = (max_neg_value - w) * db.clone() * s - - return dq, dk, dmask - - -class QKVMaskedAttention(nn.Module): - """ - A module which performs QKV attention. - Attention mask is accepted as input. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, q, k, v, mask): - r""" - Apply QKV attention with attention mask. - - Args: - q: an [N x d x n_seq1] of queries. - k: an [N x d x n_seq2] of keys. - v: an [N x d x n_seq2] of values. - mask: Attention mask of size N x n_seq1 x n_seq2 - - Returns: an [N x d x n_seq1] tensor after attention. - """ - - bs, width, length_q = q.shape - _, _, length_k = k.shape - - assert width % self.n_heads == 0 - ch = width // self.n_heads - - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = torch.einsum( - "bct,bcs->bts", - (q * scale).view(bs * self.n_heads, ch, length_q), - (k * scale).view(bs * self.n_heads, ch, length_k), - ) # More stable with f16 than dividing afterwards - - # Duplicate mask n_heads times - mask = mask.repeat_interleave(self.n_heads, dim=0) - assert mask.shape == weight.shape - max_neg_value = -float('inf') - weight = weight.masked_fill(~mask, max_neg_value) - - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) - - # When we use an arbitrary mask, there is a possibility that we get nans in softmax. - # In this case, use nan_to_num to make it a non-nan number. - weight = weight.nan_to_num_() - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) - # We also return weight here for attention visualization. - return a.reshape(bs, -1, length_q), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class QKVStableMaskedAttention(nn.Module): - """ - A module which performs QKV attention. - Attention mask is accepted as input. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, q, k, v, mask): - r""" - Apply QKV attention with attention mask. - - Args: - q: an [N x d x n_seq1] of queries. - k: an [N x d x n_seq2] of keys. - v: an [N x d x n_seq2] of values. - mask: Attention mask of size N x n_seq1 x n_seq2 - - Returns: an [N x d x n_seq1] tensor after attention. - """ - - bs, width, length_q = q.shape - _, _, length_k = k.shape - - assert width % self.n_heads == 0 - ch = width // self.n_heads - - q = q.view(bs * self.n_heads, ch, length_q) - k = k.view(bs * self.n_heads, ch, length_k) - - # Forming attention mask - mask = mask.repeat_interleave(self.n_heads, dim=0) - - weight = StableMaskedAttentionOp.apply(q, k, ~mask) - - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) - # We also return weight here for attention visualization. - return a.reshape(bs, -1, length_q), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class SelfAttentionPooling(nn.Module): - """ - Implementation of SelfAttentionPooling - Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition - https://arxiv.org/pdf/2008.01077v1.pdf - Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 - """ - - def __init__(self, input_dim): - super(SelfAttentionPooling, self).__init__() - self.W = nn.Linear(input_dim, 1) - - def forward(self, batch_rep): - """ - input: - batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension - - attention_weight: - att_w : size (N, T, 1) - - return: - utter_rep: size (N, H) - """ - softmax = nn.functional.softmax - att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) - utter_rep = torch.sum(batch_rep * att_w, dim=1) - - return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py deleted file mode 100644 index 8927226c818e..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Adapted from: -https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py -""" -import math - -import numpy as np -import torch -import torch.nn as nn -from torch.cuda.amp import custom_bwd, custom_fwd - -USE_ALT = False - - -def count_flops_attn(model, _x, y): - """ - A counter for the `thop` package to count the operations in an - attention operation. - Meant to be used like: - macs, params = thop.profile( - model, - inputs=(inputs, timestamps), - custom_ops={QKVAttention: QKVAttention.count_flops}, - ) - """ - b, c, *spatial = y[0].shape - num_spatial = int(np.prod(spatial)) - # We perform two matmuls with the same number of ops. - # The first computes the weight matrix, the second computes - # the combination of the value vectors. - matmul_ops = 2 * b * (num_spatial ** 2) * c - model.total_ops += torch.DoubleTensor([matmul_ops]) - - -# Stable attention -class StableAttentionOp(torch.autograd.Function): - # This function defines the attention weight computation in a stable way - # The idea is to scale the gradients of weight matrix by the maximum absolute value. - # In case of overflow, this will prevent weight gradients from exploding. - # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. - - @staticmethod - def forward(ctx, q, k): - w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) - ctx.save_for_backward(q, k, w) - return w - - @staticmethod - def backward(ctx, dw): - q, k, w = ctx.saved_tensors - - s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) - dw = dw / s - - # Due to softmax, w is fp32, making db fp32. - # Type casting is required for amp to work. - db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) - s = s / math.sqrt(k.shape[1]) - - dq = torch.einsum('nck,nqk->ncq', k, db) * s - dk = torch.einsum('ncq,nqk->nck', q, db) * s - - return dq, dk - - -class QKVStableAttention(nn.Module): - """ - A module which performs QKV attention and splits in a different order. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - - :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.chunk(3, dim=1) - - # Reshaping q and k - # try: - # q = q.view(bs * self.n_heads, ch, length) - # k = k.view(bs * self.n_heads, ch, length) - # except Exception: - q = q.reshape(bs * self.n_heads, ch, length) - k = k.reshape(bs * self.n_heads, ch, length) - - weight = StableAttentionOp.apply(q, k) - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) - return a.reshape(bs, -1, length), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class QKVAttention(nn.Module): - """ - A module which performs QKV attention and splits in a different order. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - - :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.chunk(3, dim=1) - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = torch.einsum( - "bct,bcs->bts", - (q * scale).view(bs * self.n_heads, ch, length), - (k * scale).view(bs * self.n_heads, ch, length), - ) # More stable with f16 than dividing afterwards - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) - return a.reshape(bs, -1, length), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class StableMaskedAttentionOp(torch.autograd.Function): - # Robust attention operation in case of masked attention - @staticmethod - @custom_fwd - def forward(ctx, q, k, mask): - max_neg_value = -float('inf') - w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) - w = w.masked_fill(mask, max_neg_value) - w = w.softmax(dim=2) - - # When we use an arbitrary mask, there is a possibility that we get nans in softmax. - # In this case, use nan_to_num to make it a stable number. - # w = w.nan_to_num_() - ctx.save_for_backward(q, k, w, mask) - return w - - @staticmethod - @custom_bwd - def backward(ctx, dw): - q, k, w, mask = ctx.saved_tensors - max_neg_value = -torch.finfo(q.dtype).max - s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) - dw = dw / s - db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) - - # Masking db - db_in = db.clone().masked_fill_(mask, 0) - - s = s / math.sqrt(k.shape[1]) - dq = torch.einsum('nck,nqk->ncq', k, db_in) * s - dk = torch.einsum('ncq,nqk->nck', q, db_in) * s - - # These are dummy derivatives since mask is a constant - dmask = (max_neg_value - w) * db.clone() * s - - return dq, dk, dmask - - -class QKVMaskedAttention(nn.Module): - """ - A module which performs QKV attention. - Attention mask is accepted as input. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, q, k, v, mask): - r""" - Apply QKV attention with attention mask. - - Args: - q: an [N x d x n_seq1] of queries. - k: an [N x d x n_seq2] of keys. - v: an [N x d x n_seq2] of values. - mask: Attention mask of size N x n_seq1 x n_seq2 - - Returns: an [N x d x n_seq1] tensor after attention. - """ - - bs, width, length_q = q.shape - _, _, length_k = k.shape - - assert width % self.n_heads == 0 - ch = width // self.n_heads - - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = torch.einsum( - "bct,bcs->bts", - (q * scale).view(bs * self.n_heads, ch, length_q), - (k * scale).view(bs * self.n_heads, ch, length_k), - ) # More stable with f16 than dividing afterwards - - # Duplicate mask n_heads times - # mask = mask.repeat_interleave(self.n_heads, dim=0) - mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) - assert mask.shape == weight.shape - max_neg_value = -float('inf') - weight = weight.masked_fill(~mask, max_neg_value) - - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) - - # When we use an arbitrary mask, there is a possibility that we get nans in softmax. - # In this case, use nan_to_num to make it a non-nan number. - # weight = weight.nan_to_num_() - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) - # We also return weight here for attention visualization. - return a.reshape(bs, -1, length_q), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class QKVStableMaskedAttention(nn.Module): - """ - A module which performs QKV attention. - Attention mask is accepted as input. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, q, k, v, mask): - r""" - Apply QKV attention with attention mask. - - Args: - q: an [N x d x n_seq1] of queries. - k: an [N x d x n_seq2] of keys. - v: an [N x d x n_seq2] of values. - mask: Attention mask of size N x n_seq1 x n_seq2 - - Returns: an [N x d x n_seq1] tensor after attention. - """ - - bs, width, length_q = q.shape - _, _, length_k = k.shape - - assert width % self.n_heads == 0 - ch = width // self.n_heads - - q = q.view(bs * self.n_heads, ch, length_q) - k = k.view(bs * self.n_heads, ch, length_k) - - # Forming attention mask - # mask = mask.repeat_interleave(self.n_heads, dim=0) - mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) - - weight = StableMaskedAttentionOp.apply(q, k, ~mask) - - a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) - # We also return weight here for attention visualization. - return a.reshape(bs, -1, length_q), weight - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class SelfAttentionPooling(nn.Module): - """ - Implementation of SelfAttentionPooling - Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition - https://arxiv.org/pdf/2008.01077v1.pdf - Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 - """ - - def __init__(self, input_dim): - super(SelfAttentionPooling, self).__init__() - self.W = nn.Linear(input_dim, 1) - - def forward(self, batch_rep): - """ - input: - batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension - - attention_weight: - att_w : size (N, T, 1) - - return: - utter_rep: size (N, H) - """ - softmax = nn.functional.softmax - att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) - utter_rep = torch.sum(batch_rep * att_w, dim=1) - - return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py deleted file mode 100644 index 1d6b8395a58f..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py +++ /dev/null @@ -1,906 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Adapted from: -https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py -""" -import math -from abc import abstractmethod - -import torch as th -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.checkpoint as checkpoint -from einops import rearrange - -from nemo.collections.multimodal.modules.imagen.diffusionmodules import attention_alt - -if attention_alt.USE_ALT: - from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention_alt import ( - QKVAttention, - QKVMaskedAttention, - QKVStableAttention, - QKVStableMaskedAttention, - ) -else: - from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import ( - QKVAttention, - QKVMaskedAttention, - QKVStableAttention, - QKVStableMaskedAttention, - ) -from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import ( - Downsample, - Upsample, - UpsampleLearnable, - conv_nd, - linear, - normalization, - zero_module, -) - - -def check_cuda(): - if not th.cuda.is_available(): - raise RuntimeError('CUDA is not available') - cur_device = th.cuda.current_device() - dprops = th.cuda.get_device_properties(cur_device) - - is_sm75 = dprops.major == 7 and dprops.minor == 5 - is_sm8x = dprops.major == 8 and dprops.minor >= 0 - is_sm90 = dprops.major == 9 and dprops.minor >= 0 - - return is_sm8x or is_sm75 or is_sm90 - - -try: - from flash_attn import flash_attn_varlen_func, flash_attn_varlen_kvpacked_func - - flash_attn_installed = check_cuda() -except ImportError: - flash_attn_installed = False - - -class TextConditionedBlock(nn.Module): - r""" - Any module where forward() takes text embeddings as arguments. - """ - - @abstractmethod - def forward(self, x, text_emb, text_mask): - """ - Apply the module to `x` given `text_emb` text embedding and 'text_mask' text valid mask. - """ - - -class TimestepBlock(nn.Module): - """ - Any module where forward() takes timestep embeddings as a second argument. - """ - - @abstractmethod - def forward(self, x, emb): - """ - Apply the module to `x` given `emb` timestep embeddings. - """ - - -class ConditionalSequential(nn.Sequential, TimestepBlock, TextConditionedBlock): - r""" - A sequential module that accepts timestep embeddings, text embedding and text mask in addition to the input x. - Depending on the type of block, we either pass timestep embedding or text embeddings as inputs. - """ - - def forward(self, x, emb, text_emb, text_mask): - for layer in self: - if isinstance(layer, TimestepBlock): - x = layer(x, emb) - elif isinstance(layer, TextConditionedBlock): - x = layer(x, text_emb, text_mask) - else: - x = layer(x) - return x - - -class ResBlock(TimestepBlock): - """ - A residual block that can optionally change the number of channels. - - :param channels: the number of input channels. - :param emb_channels: the number of timestep embedding channels. - :param dropout: the rate of dropout. - :param out_channels: if specified, the number of out channels. - :param use_conv: if True and out_channels is specified, use a spatial - convolution instead of a smaller 1x1 convolution to change the - channels in the skip connection. - :param dims: determines if the signal is 1D, 2D, or 3D. - :param use_checkpoint: if True, use gradient checkpointing on this module. - :param up: if True, use this block for upsampling. - :param down: if True, use this block for downsampling. - """ - - def __init__( - self, - channels, - emb_channels, - dropout, - out_channels=None, - use_conv=False, - use_scale_shift_norm=False, - dims=2, - use_checkpoint=False, - up=False, - down=False, - learnable_upsampling=False, - ): - super().__init__() - self.channels = channels - self.emb_channels = emb_channels - self.dropout = dropout - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.use_checkpoint = use_checkpoint - self.use_scale_shift_norm = use_scale_shift_norm - - self.in_layers = nn.Sequential( - normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1), - ) - - self.updown = up or down - if learnable_upsampling: - upsample_fn = UpsampleLearnable - else: - upsample_fn = Upsample - - if up: - self.h_upd = upsample_fn(channels, False, dims) - self.x_upd = upsample_fn(channels, False, dims) - elif down: - self.h_upd = Downsample(channels, False, dims) - self.x_upd = Downsample(channels, False, dims) - else: - self.h_upd = self.x_upd = nn.Identity() - - self.emb_layers = nn.Sequential( - nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), - ) - self.out_layers = nn.Sequential( - normalization(self.out_channels), - nn.SiLU(), - nn.Dropout(p=dropout), - zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), - ) - - if self.out_channels == channels: - self.skip_connection = nn.Identity() - elif use_conv: - self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) - else: - self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) - - def forward(self, x, emb): - """ - Apply the block to a Tensor, conditioned on a timestep embedding. - - :param x: an [N x C x ...] Tensor of features. - :param emb: an [N x emb_channels] Tensor of timestep embeddings. - :return: an [N x C x ...] Tensor of outputs. - """ - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x, emb) - else: - return self._forward(x, emb) - - def _forward(self, x, emb): - if self.updown: - in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] - h = in_rest(x) - h = self.h_upd(h) - x = self.x_upd(x) - h = in_conv(h) - else: - h = self.in_layers(x) - emb_out = self.emb_layers(emb) - while len(emb_out.shape) < len(h.shape): - emb_out = emb_out[..., None] - if self.use_scale_shift_norm: - out_norm, out_rest = self.out_layers[0], self.out_layers[1:] - scale, shift = th.chunk(emb_out, 2, dim=1) - h = out_norm(h) * (1 + scale) + shift - h = out_rest(h) - else: - h = h + emb_out - h = self.out_layers(h) - return self.skip_connection(x) + h - - -class EfficientResBlock(TimestepBlock): - """ - A residual block that can optionally change the number of channels. - Follow Figure A.27 in Imagen Paper. - :param channels: the number of input channels. - :param emb_channels: the number of timestep embedding channels. - :param out_channels: if specified, the number of out channels. - :param use_conv: if True and out_channels is specified, use a spatial - convolution instead of a smaller 1x1 convolution to change the - channels in the skip connection. - :param dims: determines if the signal is 1D, 2D, or 3D. - :param use_checkpoint: if True, use gradient checkpointing on this module. - :param up: if True, use this block for upsampling. - :param down: if True, use this block for downsampling. - """ - - def __init__( - self, - channels, - emb_channels, - out_channels=None, - use_scale_shift_norm=False, - dims=2, - use_checkpoint=False, - skip_connection_scaling=False, - ): - super().__init__() - - out_channels = out_channels or channels - - self.use_scale_shift_norm = use_scale_shift_norm - self.use_checkpoint = use_checkpoint - - self.in_layers = nn.Sequential( - normalization(channels), nn.SiLU(), conv_nd(dims, channels, out_channels, 3, padding=1) - ) - - self.emb_layers = nn.Sequential( - nn.SiLU(), nn.Linear(emb_channels, 2 * out_channels if use_scale_shift_norm else out_channels,), - ) - - self.out_layers = nn.Sequential( - normalization(out_channels), - nn.SiLU(), - zero_module(conv_nd(dims, out_channels, out_channels, 3, padding=1)), - ) - - self.shortcut = conv_nd(dims, channels, out_channels, 1) - self.shortcut_scale = 1 / math.sqrt(2) if skip_connection_scaling else 1 - - def forward(self, x, emb): - """ - Apply the block to a Tensor, conditioned on a timestep embedding. - - :param x: an [N x C x ...] Tensor of features. - :param emb: an [N x emb_channels] Tensor of timestep embeddings. - :return: an [N x C x ...] Tensor of outputs. - """ - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x, emb) - else: - return self._forward(x, emb) - - def _forward(self, x, emb): - h = self.in_layers(x) - emb_out = self.emb_layers(emb) - while len(emb_out.shape) < len(h.shape): - emb_out = emb_out[..., None] - if self.use_scale_shift_norm: - out_norm, out_rest = self.out_layers[0], self.out_layers[1:] - scale, shift = th.chunk(emb_out, 2, dim=1) - h = out_norm(h) * (1 + scale) + shift - h = out_rest(h) - else: - h = h + emb_out - h = self.out_layers(h) - - return h + self.shortcut(x) * self.shortcut_scale - - -class Block(nn.Module): - def __init__( - self, - channels, - emb_channels, - out_channels=None, - use_scale_shift_norm=True, - num_resblocks=2, - attention_type=None, - text_embed_dim=0, - stable_attention=True, - flash_attention=False, - num_head_channels=-1, - num_heads=8, - dims=2, - use_checkpoint=False, - skip_connection_scaling=False, - ): - super().__init__() - - out_channels = out_channels or channels - - self.attention_type = attention_type - self.text_embed_dim = text_embed_dim - - blocks = [ - EfficientResBlock( - channels, - emb_channels, - out_channels=out_channels, - use_scale_shift_norm=use_scale_shift_norm, - dims=dims, - use_checkpoint=use_checkpoint, - skip_connection_scaling=skip_connection_scaling, - ) - ] - - blocks += [ - EfficientResBlock( - out_channels, - emb_channels, - out_channels=out_channels, - use_scale_shift_norm=use_scale_shift_norm, - dims=dims, - use_checkpoint=use_checkpoint, - skip_connection_scaling=skip_connection_scaling, - ) - for _ in range(num_resblocks - 1) - ] - - self.blocks = nn.ModuleList(blocks) - - # Attention blocks - # Self - Self-attention blocks - # fused - Single attention layer for fusing self and cross attention. - if self.attention_type is not None: - assert self.attention_type in ('self', 'cross', 'fused', 'stacked') - attention_kwargs = dict() - - if self.attention_type == 'self': - attention_fn = SelfAttentionBlock - elif self.attention_type == 'cross': - attention_fn = CrossAttentionBlock - attention_kwargs['context_dim'] = self.text_embed_dim - elif self.attention_type == 'stacked': - attention_fn = StackedCrossAttentionBlock - attention_kwargs['context_dim'] = self.text_embed_dim - else: - attention_fn = FusedCrossAttentionBlock - attention_kwargs['context_dim'] = self.text_embed_dim - - self.attention_layer = attention_fn( - out_channels, - num_heads=num_heads, - num_head_channels=num_head_channels, - use_checkpoint=use_checkpoint, - stable_attention=stable_attention, - flash_attention=flash_attention, - **attention_kwargs, - ) - - @abstractmethod - def forward(self, x, emb, text_embed=None, text_mask=None): - pass - - -class DBlock(Block): - def __init__( - self, - channels, - emb_channels, - out_channels=None, - use_scale_shift_norm=True, - conv_down=True, - stride=2, - num_resblocks=2, - attention_type=None, - text_embed_dim=0, - stable_attention=True, - flash_attention=False, - num_head_channels=-1, - num_heads=8, - dims=2, - use_checkpoint=False, - skip_connection_scaling=False, - ): - super().__init__( - channels, - emb_channels, - out_channels=out_channels, - use_scale_shift_norm=use_scale_shift_norm, - num_resblocks=num_resblocks, - attention_type=attention_type, - text_embed_dim=text_embed_dim, - stable_attention=stable_attention, - flash_attention=flash_attention, - num_head_channels=num_head_channels, - num_heads=num_heads, - dims=dims, - use_checkpoint=use_checkpoint, - skip_connection_scaling=skip_connection_scaling, - ) - - self.conv_down = conv_down - if self.conv_down: - # self.conv = nn.Conv2d(channels, channels, 3, stride=stride, padding=1) - self.conv = nn.Conv2d(channels, channels, 4, stride=stride, padding=1) - - def forward(self, x, emb, text_embed=None, text_mask=None): - if self.conv_down: - x = self.conv(x) - - for block in self.blocks: - x = block(x, emb) - - if self.attention_type in ('cross', 'fused', 'stacked'): - x = self.attention_layer(x, text_embed, text_mask) - elif self.attention_type == 'self': - x = self.attention_layer(x) - - return x - - -class UBlock(Block): - def __init__( - self, - channels, - emb_channels, - out_channels=None, - use_scale_shift_norm=True, - conv_up=True, - stride=2, - num_resblocks=2, - attention_type=None, - text_embed_dim=0, - stable_attention=True, - flash_attention=False, - num_head_channels=-1, - num_heads=8, - dims=2, - use_checkpoint=False, - skip_connection_scaling=False, - ): - super().__init__( - channels, - emb_channels, - out_channels=out_channels, - use_scale_shift_norm=use_scale_shift_norm, - num_resblocks=num_resblocks, - attention_type=attention_type, - text_embed_dim=text_embed_dim, - stable_attention=stable_attention, - flash_attention=flash_attention, - num_head_channels=num_head_channels, - num_heads=num_heads, - dims=dims, - use_checkpoint=use_checkpoint, - skip_connection_scaling=skip_connection_scaling, - ) - - self.conv_up = conv_up - if self.conv_up: - self.conv = nn.ConvTranspose2d(out_channels, out_channels, 4, stride, 1) - - def forward(self, x, emb, text_embed=None, text_mask=None): - for block in self.blocks: - x = block(x, emb) - - if self.attention_type in ('cross', 'fused', 'stacked'): - x = self.attention_layer(x, text_embed, text_mask) - elif self.attention_type == 'self': - x = self.attention_layer(x) - - if self.conv_up: - x = self.conv(x) - - return x - - -class FusedCrossAttentionBlock(TextConditionedBlock): - """ - An attention block that fuses self-attention and cross-attention - in a single block. - """ - - def __init__( - self, - channels, - context_dim, - num_heads=1, - num_head_channels=-1, - use_checkpoint=False, - stable_attention=True, - flash_attention=False, - ): - super().__init__() - self.channels = channels - if num_head_channels == -1: - self.num_heads = num_heads - else: - assert ( - channels % num_head_channels == 0 - ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" - self.num_heads = channels // num_head_channels - self.use_checkpoint = use_checkpoint - self.flash_attention = flash_attention - self.norm = normalization(channels) - self.norm_context = normalization(context_dim) - self.norm_self = normalization(channels) - - # For image features - self.q = conv_nd(1, channels, channels, 1) - - # For context - self.kv_context = conv_nd(1, context_dim, channels * 2, 1) - - # For spatial - self.kv_self = conv_nd(1, channels, channels * 2, 1) - - if flash_attention: - assert flash_attn_installed, "FlashAttention is not installed." - assert not stable_attention, "FlashAttention doesn't support the stable form." - - elif stable_attention: - self.attention = QKVStableMaskedAttention(self.num_heads) - else: - self.attention = QKVMaskedAttention(self.num_heads) - - self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) - - def forward(self, x, context, mask): - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x, context, mask) - else: - return self._forward(x, context, mask) - - def _forward(self, x, context, mask): - - b, c, *spatial = x.shape - x = x.reshape(b, c, -1) - - q = self.q(self.norm(x)) - - # Key-value pairs for self-attention - kv_self = self.kv_self(self.norm_self(x)) - k_self, v_self = kv_self.chunk(2, dim=1) - k_self = k_self.contiguous() - v_self = v_self.contiguous() - - # Key-value pairs for cross-attention - context = th.permute(context, (0, 2, 1)) - context_n = self.norm_context(context) - kv_context = self.kv_context(context_n) - k_context, v_context = kv_context.chunk(2, dim=1) - k_context = k_context.contiguous() - v_context = v_context.contiguous() - - # Appending key-value pairs - k_full = th.cat([k_self, k_context], dim=2) - v_full = th.cat([v_self, v_context], dim=2) - - if self.flash_attention: - # q: b (h d) s, k_context: b (h d) s - batch_size = q.shape[0] - max_seqlen_q, max_seqlen_k = q.shape[2], q.shape[2] + k_context.shape[2] - q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) - - mask_self = th.ones((batch_size, max_seqlen_q), device=q.device, dtype=th.bool) - mask_context = mask.bool() - mask_full = th.cat([mask_self, mask_context], dim=1) - - k_full_unpadded = k_full.transpose(1, 2)[mask_full] - total_k = k_full_unpadded.shape[0] - k_full_unpadded = k_full_unpadded.view(total_k, self.num_heads, -1) - - v_full_unpadded = v_full.transpose(1, 2)[mask_full] - v_full_unpadded = v_full_unpadded.view(total_k, self.num_heads, -1) - - # (b s) t h d - kv_full_unpadded = th.stack([k_full_unpadded, v_full_unpadded], dim=1) - - cu_seqlens_q = th.arange( - 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device - ) - cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=k_full.device) - cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) - cu_seqlens_k += cu_seqlens_q - - out = flash_attn_varlen_kvpacked_func( - q, kv_full_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 - ) - h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) - else: - # Computing mask for self attention - mask_self = th.ones(k_self.shape[0], q.shape[2], k_self.shape[2], device=mask.device) - - # Mask for cross attention - mask_context = mask.view(mask.shape[0], 1, mask.shape[1]) - mask_context = mask_context.repeat(1, q.shape[2], 1) - - # Fused mask - mask_full = th.cat([mask_self, mask_context], dim=2) - mask_full = mask_full.to(th.bool) - - h, _ = self.attention(q, k_full, v_full, mask_full) - - h = self.proj_out(h) - return (x + h).reshape(b, c, *spatial) - - -class SelfAttentionBlock(nn.Module): - """ - An attention block that allows spatial positions to attend to each other. - - Originally ported from here, but adapted to the N-d case. - https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. - """ - - def __init__( - self, - channels, - num_heads=1, - num_head_channels=-1, - use_checkpoint=False, - stable_attention=False, - flash_attention=False, - ): - super().__init__() - self.channels = channels - if num_head_channels == -1: - self.num_heads = num_heads - else: - assert ( - channels % num_head_channels == 0 - ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" - self.num_heads = channels // num_head_channels - self.use_checkpoint = use_checkpoint - self.norm = normalization(channels) - self.qkv = conv_nd(1, channels, channels * 3, 1) - self.flash_attention = flash_attention - if flash_attention: - assert flash_attn_installed, "FlashAttention is not installed." - assert not stable_attention, "FlashAttention doesn't support the stable form." - elif stable_attention: - self.attention = QKVStableAttention(self.num_heads) - else: - self.attention = QKVAttention(self.num_heads) - - self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) - - def forward(self, x): - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x) - else: - return self._forward(x) - - def _forward(self, x): - - b, c, *spatial = x.shape - x = x.reshape(b, c, -1) - qkv = self.qkv(self.norm(x)) - - if self.flash_attention: - # qkv shape: (b, (3 h d) s), need to reshape to (b, s, h, d) for each q, k, v - b, _, _ = qkv.shape - h = self.num_heads - q, k, v = qkv.chunk(3, dim=1) - max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] - q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) - k = rearrange(k, 'b (h d) s -> (b s) h d', h=self.num_heads) - v = rearrange(v, 'b (h d) s -> (b s) h d', h=self.num_heads) - cu_seqlens_q = th.arange(0, (b + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device) - cu_seqlens_k = th.arange(0, (b + 1) * max_seqlen_k, step=max_seqlen_k, dtype=th.int32, device=k.device) - h = flash_attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0) - h = rearrange(h, '(b s) h d -> b (h d) s', b=b, h=self.num_heads) - else: - h, _ = self.attention(qkv) - h = self.proj_out(h) - return (x + h).reshape(b, c, *spatial) - - -######################################################################### -# These are the attention blocks as implemented by Stable Diffusion -# https://github.com/CompVis/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/ldm/modules/attention.py#L196 - - -class CrossAttentionBlock(TextConditionedBlock): - """ - An attention block that allows spatial positions to attend to context. - In our case, context is the token-wise text embeddings. - """ - - def __init__( - self, - channels, - context_dim, - num_heads=1, - num_head_channels=-1, - use_checkpoint=False, - stable_attention=True, - flash_attention=False, - ): - super().__init__() - self.channels = channels - if num_head_channels == -1: - self.num_heads = num_heads - else: - assert ( - channels % num_head_channels == 0 - ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" - self.num_heads = channels // num_head_channels - self.use_checkpoint = use_checkpoint - self.norm = normalization(channels) - self.norm_context = normalization(context_dim) - self.flash_attention = flash_attention - # For image features - self.q = conv_nd(1, channels, channels, 1) - - # For context - self.kv = conv_nd(1, context_dim, channels * 2, 1) - - if flash_attention: - assert flash_attn_installed, "FlashAttention is not installed." - assert not stable_attention, "FlashAttention doesn't support the stable form." - elif stable_attention: - self.attention = QKVStableMaskedAttention(self.num_heads) - else: - self.attention = QKVMaskedAttention(self.num_heads) - - self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) - - def forward(self, x, context, mask): - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x, context, mask) - else: - return self._forward(x, context, mask) - - def _forward(self, x, context, mask): - - b, c, *spatial = x.shape - x = x.reshape(b, c, -1) - - q = self.q(self.norm(x)) - context = th.permute(context, (0, 2, 1)) - context_n = self.norm_context(context) - kv = self.kv(context_n) - k, v = kv.chunk(2, dim=1) - k = k.contiguous() - v = v.contiguous() - - if self.flash_attention: - batch_size = q.shape[0] - max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] - q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) - mask = mask.to(th.bool) - k_unpadded = k.transpose(1, 2)[mask] - total_k = k_unpadded.shape[0] - k_unpadded = k_unpadded.view(total_k, self.num_heads, -1) - v_unpadded = v.transpose(1, 2)[mask] - v_unpadded = v_unpadded.view(total_k, self.num_heads, -1) - kv_unpadded = th.stack([k_unpadded, v_unpadded], dim=1) - cu_seqlens_q = th.arange( - 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device - ) - cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=q.device) - cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) - - out = flash_attn_varlen_kvpacked_func( - q, kv_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 - ) - h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) - else: - # Computing mask for cross attention - mask = mask.view(mask.shape[0], 1, mask.shape[1]) - mask = mask.repeat(1, q.shape[-1], 1) - mask = mask.to(th.bool) - - h, _ = self.attention(q, k, v, mask) - h = self.proj_out(h) - return (x + h).reshape(b, c, *spatial) - - -class GEGLU(nn.Module): - def __init__(self, dim_in, dim_out): - super().__init__() - self.proj = nn.Linear(dim_in, dim_out * 2) - - def forward(self, x): - x, gate = self.proj(x).chunk(2, dim=-1) - return x * F.gelu(gate) - - -class FeedForward(nn.Module): - def __init__(self, dim, mult=4, glu=False, dropout=0.0): - super().__init__() - inner_dim = int(dim * mult) - project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) - - self.norm = normalization(dim) - self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim)) - - def forward(self, x): - b, c, *spatial = x.shape - x = x.reshape(b, c, -1) - - h = self.norm(x) - - # Reshape so that the channel dim moves to last - # Linear function operates on the last dimension - h = th.permute(h, (0, 2, 1)) - - h = self.net(h) - - # Permute it back - h = th.permute(h, (0, 2, 1)) - - return (x + h).reshape(b, c, *spatial) - - -class StackedCrossAttentionBlock(TextConditionedBlock): - """ - An attention block that stacks self-attention and cross-attention layers - in a single block. - """ - - def __init__( - self, - channels, - context_dim, - num_heads=1, - num_head_channels=-1, - use_checkpoint=False, - stable_attention=True, - flash_attention=False, - ): - super().__init__() - self.proj_in = conv_nd(2, channels, channels, 1) - self.norm = normalization(channels) - self.use_checkpoint = use_checkpoint - - self.self_attention_block = SelfAttentionBlock( - channels=channels, - num_heads=num_heads, - num_head_channels=num_head_channels, - use_checkpoint=use_checkpoint, - stable_attention=stable_attention, - flash_attention=flash_attention, - ) - - self.cross_attention_block = CrossAttentionBlock( - channels=channels, - context_dim=context_dim, - num_heads=num_heads, - num_head_channels=num_head_channels, - use_checkpoint=use_checkpoint, - stable_attention=stable_attention, - flash_attention=flash_attention, - ) - - self.ff = FeedForward(dim=channels, glu=True) - self.proj_out = zero_module(conv_nd(2, channels, channels, 1)) - - def forward(self, x, context, mask): - if self.use_checkpoint: - return checkpoint.checkpoint(self._forward, x, context, mask) - else: - return self._forward(x, context, mask) - - def _forward(self, x, context, mask): - - h = self.norm(x) - h = self.proj_in(h) - - h = self.self_attention_block(h) - h = self.cross_attention_block(h, context, mask) - h = self.ff(h) - - h = self.proj_out(h) - return h + x diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py deleted file mode 100644 index 6d5f50023166..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import torch -import torch.nn as nn -from einops import rearrange - - -class LearnedSinusoidalPosEmb(nn.Module): - """ following @crowsonkb 's lead with learned sinusoidal pos emb """ - - """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ - - def __init__(self, dim): - super().__init__() - assert (dim % 2) == 0 - half_dim = dim // 2 - self.weights = nn.Parameter(torch.randn(half_dim)) - - def forward(self, x): - x = rearrange(x, 'b -> b 1') - freqs = x * rearrange(self.weights, 'd -> 1 d') * 2 * math.pi - fouriered = torch.cat((freqs.sin(), freqs.cos()), dim=-1) - fouriered = torch.cat((x, fouriered), dim=-1) - return fouriered - - -class UnLearnedSinusoidalPosEmb(nn.Module): - def __init__(self, dim, max_period=10000): - """ - Create sinusoidal timestep embeddings. - - :param timesteps: a 1-D Tensor of N indices, one per batch element. - These may be fractional. - :param dim: the dimension of the output. - :param max_period: controls the minimum frequency of the embeddings. - :return: an [N x dim] Tensor of positional embeddings. - """ - super().__init__() - self.dim = dim - self.max_period = max_period - print(f'Unlearned Timestep Embedding Schedule: dim={dim}, max_period={max_period}') - - def forward(self, timesteps): - dim = self.dim - half = dim // 2 - max_period = self.max_period - dtype = timesteps.dtype - freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to( - device=timesteps.device - ) - args = timesteps[:, None].float() * freqs[None] - freqs = freqs.to(dtype=dtype) - args = args.to(dtype=dtype) - embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) - if dim % 2: - embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) - return embedding diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py deleted file mode 100644 index a5cb19444057..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Copyright (c) 2021 OpenAI -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -""" -Brought from: -https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py - -Various utilities for neural networks. -""" - -import math - -import torch as th -import torch.nn as nn -import torch.nn.functional as F - -try: - from group_norm import GroupNormOpt - - OPT_GROUP_NORM = True -except Exception: - print('Fused optimized group norm has not been installed.') - OPT_GROUP_NORM = False - - -def conv_nd(dims, *args, **kwargs): - """ - Create a 1D, 2D, or 3D convolution module. - """ - if dims == 1: - return nn.Conv1d(*args, **kwargs) - elif dims == 2: - return nn.Conv2d(*args, **kwargs) - elif dims == 3: - return nn.Conv3d(*args, **kwargs) - raise ValueError(f"unsupported dimensions: {dims}") - - -def linear(*args, **kwargs): - """ - Create a linear module. - """ - return nn.Linear(*args, **kwargs) - - -def avg_pool_nd(dims, *args, **kwargs): - """ - Create a 1D, 2D, or 3D average pooling module. - """ - if dims == 1: - return nn.AvgPool1d(*args, **kwargs) - elif dims == 2: - return nn.AvgPool2d(*args, **kwargs) - elif dims == 3: - return nn.AvgPool3d(*args, **kwargs) - raise ValueError(f"unsupported dimensions: {dims}") - - -def update_ema(target_params, source_params, rate=0.99): - """ - Update target parameters to be closer to those of source parameters using - an exponential moving average. - - :param target_params: the target parameter sequence. - :param source_params: the source parameter sequence. - :param rate: the EMA rate (closer to 1 means slower). - """ - for targ, src in zip(target_params, source_params): - targ.detach().mul_(rate).add_(src, alpha=1 - rate) - - -def zero_module(module): - """ - Zero out the parameters of a module and return it. - """ - for p in module.parameters(): - p.detach().zero_() - return module - - -def scale_module(module, scale): - """ - Scale the parameters of a module and return it. - """ - for p in module.parameters(): - p.detach().mul_(scale) - return module - - -def mean_flat(tensor): - """ - Take the mean over all non-batch dimensions. - """ - return tensor.mean(dim=list(range(1, len(tensor.shape)))) - - -def normalization(channels, act=""): - """ - Make a standard normalization layer. - - :param channels: number of input channels. - :return: an nn.Module for normalization. - """ - if OPT_GROUP_NORM: - return GroupNormOpt(32, channels, act=act) - - return nn.GroupNorm(32, channels) - - -def timestep_embedding(timesteps, dim, max_period=10000, dtype=th.float32): - """ - Create sinusoidal timestep embeddings. - - :param timesteps: a 1-D Tensor of N indices, one per batch element. - These may be fractional. - :param dim: the dimension of the output. - :param max_period: controls the minimum frequency of the embeddings. - :return: an [N x dim] Tensor of positional embeddings. - """ - half = dim // 2 - freqs = th.exp(-math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half).to( - device=timesteps.device - ) - args = timesteps[:, None].float() * freqs[None] - freqs = freqs.to(dtype=dtype) - args = args.to(dtype=dtype) - embedding = th.cat([th.cos(args), th.sin(args)], dim=-1) - if dim % 2: - embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1) - return embedding - - -# Native ADM nearest neighbor upsampling -class Upsample(nn.Module): - """ - An upsampling layer with an optional convolution. - - :param channels: channels in the inputs and outputs. - :param use_conv: a bool determining if a convolution is applied. - :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then - upsampling occurs in the inner-two dimensions. - """ - - def __init__(self, channels, use_conv, dims=2, out_channels=None): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.dims = dims - if use_conv: - self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) - - def forward(self, x): - assert x.shape[1] == self.channels - if self.dims == 3: - x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") - else: - x = F.interpolate(x, scale_factor=2, mode="nearest") - if self.use_conv: - x = self.conv(x) - return x - - -class UpsampleLearnable(nn.Module): - """ - Upsampling based on ConvTranspose2d. This is needed for bfloat support. - - :param channels: channels in the inputs and outputs. - :param use_conv: a bool determining if a convolution is applied. - :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then - upsampling occurs in the inner-two dimensions. - """ - - def __init__(self, channels, use_conv, dims=2, out_channels=None): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.dims = dims - - if self.dims == 2: - self.conv = nn.ConvTranspose2d(self.channels, self.out_channels, 4, 2, 1) - elif self.dims == 3: - self.conv = nn.ConvTranspose3d( - self.channels, self.out_channels, kernel_size=(1, 4, 4), stride=(1, 2, 2), padding=(0, 1, 1) - ) - else: - raise ValueError('Upsampling support only for 2D and 3D') - - def forward(self, x): - assert x.shape[1] == self.channels - x = self.conv(x) - return x - - -class Downsample(nn.Module): - """ - A downsampling layer with an optional convolution. - - :param channels: channels in the inputs and outputs. - :param use_conv: a bool determining if a convolution is applied. - :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then - downsampling occurs in the inner-two dimensions. - """ - - def __init__(self, channels, use_conv, dims=2, out_channels=None): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.dims = dims - stride = 2 if dims != 3 else (1, 2, 2) - if use_conv: - self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) - else: - assert self.channels == self.out_channels - self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) - - def forward(self, x): - assert x.shape[1] == self.channels - return self.op(x) diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py deleted file mode 100644 index 0ce1a46a5884..000000000000 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py +++ /dev/null @@ -1,642 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import SelfAttentionPooling -from nemo.collections.multimodal.modules.imagen.diffusionmodules.blocks import ( - ConditionalSequential, - DBlock, - FusedCrossAttentionBlock, - ResBlock, - StackedCrossAttentionBlock, - UBlock, -) -from nemo.collections.multimodal.modules.imagen.diffusionmodules.embs import ( - LearnedSinusoidalPosEmb, - UnLearnedSinusoidalPosEmb, -) -from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import Downsample -from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import UpsampleLearnable as Upsample -from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import linear, normalization, zero_module - - -class UNetModel(nn.Module): - def __init__( - self, - embed_dim, # Dimension of embeddings. Also used to calculate the number of channels in ResBlock - image_size, # Input image size. Used to calculate where to inject attention layers in UNet - channels=3, # Input channel number - text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values - num_res_blocks=3, # Number of ResBlock in each level of UNet - channel_mult=[1, 2, 3, 4], # Used with embed_dim to calculate the number of channels for each level of UNet - num_attn_heads=4, # The number of heads in the attention layer - per_head_channels=64, # The number of channels per attention head - cond_dim=512, # Dimension of Conditioning projections - attention_type='fused', # Type of attention layer - feature_pooling_type='attention', # Type of pooling - learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. - attention_resolutions=[8, 16, 32], # List of resolutions to inject attention layers - dropout=False, # The rate of dropout - use_null_token=False, # Whether to create a learned null token for attention - init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 - gradient_checkpointing=False, # Whether to use gradient checkpointing - scale_shift_norm=True, # Whether to use scale shift norm - stable_attention=True, # Whether to use numerically-stable attention calculation - flash_attention=False, # Whether to use flash attention calculation - resblock_updown=False, # Whether to use ResBlock or Downsample/Upsample - resample_with_conv=True, # When resblock_updown=False, whether to use conv in addition to Pooling&ConvTranspose - low_res_cond=False, - noise_cond_aug=False, - ): - super().__init__() - - # Attention Class - if attention_type == 'stacked': - attention_fn = StackedCrossAttentionBlock - elif attention_type == 'fused': - attention_fn = FusedCrossAttentionBlock - else: - raise ValueError('Attention {} not defined'.format(attention_type)) - - # Time embedding for log(snr) noise from continous version - time_embed_dim = embed_dim * 4 - assert learned_sinu_pos_emb_dim >= 0 - if learned_sinu_pos_emb_dim > 0: - sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) - sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 - self.time_embed = nn.Sequential( - sinu_pos_emb, - nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - else: - # Unlearned Time Embedding - sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) - self.time_embed = nn.Sequential( - sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) - ) - - # Pooling - assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' - self.feature_pooling_type = feature_pooling_type - if feature_pooling_type == 'attention': - self.attention_pooling = nn.Sequential( - SelfAttentionPooling(input_dim=text_embed_dim), - nn.LayerNorm(text_embed_dim), - nn.Linear(text_embed_dim, cond_dim), - ) - - # Context Projections - self.text_to_cond = linear(text_embed_dim, cond_dim) - self.to_text_non_attn_cond = nn.Sequential( - nn.LayerNorm(cond_dim), - nn.Linear(cond_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - - # Register for Null Token - if use_null_token: - self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) - self.use_null_token = use_null_token - - # Converting attention resolutions to downsampling factor - attention_ds = [] - attention_resolutions = sorted(attention_resolutions) - self.image_size = image_size - for res in attention_resolutions: - attention_ds.append(image_size // int(res)) - - self.low_res_cond = low_res_cond - # Low res noise conditioning augmentation - self.noise_cond_aug = noise_cond_aug - if self.noise_cond_aug: - assert ( - self.low_res_cond - ), 'noise conditioning augmentation should only be enabled when training with low-res cond' - if learned_sinu_pos_emb_dim > 0: - lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) - lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 - else: - lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) - lowres_sinu_pos_emb_dim = embed_dim - self.lowres_time_embed = nn.Sequential( - lowres_sinu_pos_emb, - nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - - # Initial Convolution - in_channels = 2 * channels if low_res_cond else channels - init_dim = embed_dim * channel_mult[0] - self.init_conv = ConditionalSequential( - nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) - ) - - if isinstance(num_res_blocks, int): - res_blocks_list = [num_res_blocks] * len(channel_mult) - else: - res_blocks_list = num_res_blocks - # UNet Init - # Downsampling Layers - # We use Conv2D for UNet - CONV_DIM = 2 - ch = init_dim - ds = 1 - self.input_blocks = nn.ModuleList([self.init_conv]) - num_input_block_channels = [ch] - for level, mult in enumerate(channel_mult): - num_res_blocks = res_blocks_list[level] - for _ in range(num_res_blocks): - out_channels = mult * embed_dim - layers = [ - ResBlock( - channels=ch, - emb_channels=time_embed_dim, - dropout=dropout, - out_channels=out_channels, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - learnable_upsampling=True, - ) - ] - ch = out_channels - if ds in attention_ds: - layers.append( - attention_fn( - channels=ch, - num_heads=num_attn_heads, - num_head_channels=per_head_channels, - use_checkpoint=gradient_checkpointing, - stable_attention=stable_attention, - flash_attention=flash_attention, - context_dim=cond_dim, - ) - ) - self.input_blocks.append(ConditionalSequential(*layers)) - num_input_block_channels.append(ch) - is_last_level = level == len(channel_mult) - 1 - if not is_last_level: - # DownSampling - self.input_blocks.append( - ConditionalSequential( - ResBlock( - channels=ch, - emb_channels=time_embed_dim, - dropout=dropout, - out_channels=ch, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - down=True, - learnable_upsampling=True, - ) - if resblock_updown - else Downsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch,) - ) - ) - num_input_block_channels.append(ch) - ds *= 2 - - # Middle Layers - self.middle_block = ConditionalSequential( - # Mid Block 1 - ResBlock( - channels=ch, - emb_channels=time_embed_dim, - dropout=dropout, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - learnable_upsampling=True, - ), - # Attention Layer - attention_fn( - channels=ch, - num_heads=num_attn_heads, - num_head_channels=per_head_channels, - use_checkpoint=gradient_checkpointing, - stable_attention=stable_attention, - flash_attention=flash_attention, - context_dim=cond_dim, - ), - # Mid Block 2 - ResBlock( - channels=ch, - emb_channels=time_embed_dim, - dropout=dropout, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - learnable_upsampling=True, - ), - ) - - # Upsampling Layers - self.output_blocks = nn.ModuleList([]) - for level, mult in list(enumerate(channel_mult))[::-1]: - num_res_blocks = res_blocks_list[level] - for i in range(num_res_blocks + 1): - ich = num_input_block_channels.pop() - out_channels = embed_dim * mult - layers = [ - ResBlock( - channels=ch + ich, - emb_channels=time_embed_dim, - dropout=dropout, - out_channels=out_channels, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - learnable_upsampling=True, - ) - ] - ch = out_channels - - if ds in attention_ds: - layers.append( - attention_fn( - channels=ch, - num_heads=-1, # TODO - num_head_channels=per_head_channels, - use_checkpoint=gradient_checkpointing, - stable_attention=stable_attention, - flash_attention=flash_attention, - context_dim=cond_dim, - ) - ) - is_last_block = i == num_res_blocks - if level and is_last_block: - layers.append( - ResBlock( - channels=ch, - emb_channels=time_embed_dim, - dropout=dropout, - out_channels=ch, - dims=CONV_DIM, - use_checkpoint=gradient_checkpointing, - use_scale_shift_norm=scale_shift_norm, - up=True, - learnable_upsampling=True, - ) - if resblock_updown - else Upsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch) - ) - ds //= 2 - self.output_blocks.append(ConditionalSequential(*layers)) - - self.out = nn.Sequential( - normalization(ch), - nn.SiLU(), - zero_module(nn.Conv2d(init_dim, channels, init_conv_kernel_size, padding=init_conv_kernel_size // 2)), - ) - - def forward( - self, x, time, text_embed=None, text_mask=None, x_low_res=None, time_low_res=None, - ): - if self.low_res_cond: - assert x_low_res is not None, 'x_low_res cannot be None' - else: - assert x_low_res is None, 'x_low_res cannot be presented' - if self.noise_cond_aug: - assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' - else: - assert time_low_res is None, 'time_low_res cannot be presented' - # Concatenating low resolution images - if x_low_res is not None: - if x_low_res.shape != x.shape: - # Upscale if not done in the trainer - _, _, new_height, new_width = x.shape - x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") - x = torch.cat([x, x_low_res], dim=1) - batch_size, device = x.shape[0], x.device - - if x.dtype != time.dtype or time.dtype != text_embed.dtype: - dtype = text_embed.dtype - x = x.to(dtype=dtype) - time = time.to(dtype=dtype) - if x_low_res is not None: - x_low_res = x_low_res.to(dtype=dtype) - if time_low_res is not None: - time_low_res = time_low_res.to(dtype=dtype) - # Time Conditioning - t = self.time_embed(time) - # Add lowres time conditioning - if self.noise_cond_aug: - lowres_t = self.lowres_time_embed(time_low_res) - t += lowres_t - # Text Conditioning - text_cond = self.text_to_cond(text_embed) - - # Context Embedding - # TODO We may want to concat time token here - if self.use_null_token: - # Null Context (Helpful when text_embed is drop) - null_context = self.null_text_embedding.repeat(batch_size, 1, 1) - context_emb = torch.cat([text_cond, null_context], dim=1) - context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) - else: - context_emb = text_cond - context_mask = text_mask - - # Add pooled text embeddings to the diffusion timestep - # TODO We may only want to calculated the pooled feature based on text token length - if self.feature_pooling_type == 'mean': - pooled_text_cond = text_cond.mean(dim=-2) - elif self.feature_pooling_type == 'attention': - pooled_text_cond = self.attention_pooling(text_embed) - text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) - t += text_hiddens - - h = x - hs = [] - # UNet Forward - for module in self.input_blocks: - h = module(h, t, context_emb, context_mask) - hs.append(h) - h = self.middle_block(h, t, context_emb, context_mask) - for module in self.output_blocks: - h_prev = hs.pop() - h = torch.cat([h, h_prev], dim=1) - h = module(h, t, context_emb, context_mask) - return self.out(h) - - def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): - logits = self.forward(*args, text_embed=text_embed, **kwargs) - if cond_scale == 1.0: - return logits - null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) - return null_logits + (logits - null_logits) * cond_scale - - -class EfficientUNetModel(nn.Module): - def __init__( - self, - embed_dim, - image_size, - channels=3, - text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values - channel_mult=[ - 1, - 1, - 2, - 4, - 8, - ], # Used with embed_dim to calculate the number of channels for each level of Efficient-UNet - num_attn_heads=8, # The number of heads in the attention layer - per_head_channels=64, # The number of channels per attention head - attention_type='fused', # Type of attention layer - atnn_enabled_at=[0, 0, 0, 0, 1], # Whether to enable attention at each level - feature_pooling_type='attention', # Type of pooling - stride=2, # Stride in ResBlock - num_resblocks=[ - 1, - 2, - 4, - 8, - 8, - ], # Used with num_res_blocks to calculate the number of residual blocks at each level of Efficient-UNet - learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. - use_null_token=False, # Whether to create a learned null token for attention - init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 - gradient_checkpointing=False, # Whether to use gradient checkpointing - scale_shift_norm=True, # Whether to use scale shift norm - stable_attention=True, # Whether to use numerically-stable attention calculation - flash_attention=False, # Whether to use flash attention calculation - skip_connection_scaling=False, # Whether to use 1/sqrt(2) scaling for ResBlock skip connection - noise_cond_aug=False, - ): - super().__init__() - - self.n_levels = len(channel_mult) - self.image_size = image_size - # Time embedding for log(snr) noise from continous version - time_embed_dim = embed_dim * 4 - assert learned_sinu_pos_emb_dim >= 0 - if learned_sinu_pos_emb_dim > 0: - sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) - sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 - self.time_embed = nn.Sequential( - sinu_pos_emb, - nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - else: - # Unlearned Time Embedding - sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) - self.time_embed = nn.Sequential( - sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) - ) - - self.noise_cond_aug = noise_cond_aug - if self.noise_cond_aug: - if learned_sinu_pos_emb_dim > 0: - lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) - lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 - else: - lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) - lowres_sinu_pos_emb_dim = embed_dim - self.lowres_time_embed = nn.Sequential( - lowres_sinu_pos_emb, - nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - cond_dim = text_embed_dim # time_embed_dim - # Pooling - assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' - self.feature_pooling_type = feature_pooling_type - if feature_pooling_type == 'attention': - self.attention_pooling = nn.Sequential( - SelfAttentionPooling(input_dim=text_embed_dim), - nn.LayerNorm(text_embed_dim), - nn.Linear(text_embed_dim, cond_dim), - ) - - # Context Projections - self.text_to_cond = linear(text_embed_dim, cond_dim) - self.to_text_non_attn_cond = nn.Sequential( - nn.LayerNorm(cond_dim), - nn.Linear(cond_dim, time_embed_dim), - nn.SiLU(), - nn.Linear(time_embed_dim, time_embed_dim), - ) - # Register for Null Token - if use_null_token: - self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) - self.use_null_token = use_null_token - - # Initial Convolution - # Multiply in_channels by 2 because we concatenate with low res inputs. - in_channels = channels * 2 - init_dim = embed_dim * channel_mult[0] - self.init_conv = nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) - # Efficient-UNet Init - self.DBlocks = nn.ModuleDict() - self.UBlocks = nn.ModuleDict() - ch = init_dim - for level, mult in enumerate(channel_mult): - # Different level has different num of res blocks - num_resblock = num_resblocks[level] - # Only perform upsample/downsample if it is not the last (deepest) level - is_last_level = level == len(channel_mult) - 1 - level_attention_type = attention_type if atnn_enabled_at[level] else None - - level_key = str(level) # TODO Change to more meaningful naming - self.DBlocks[level_key] = DBlock( - channels=ch, - emb_channels=time_embed_dim, - out_channels=int(mult * embed_dim), - use_scale_shift_norm=scale_shift_norm, - conv_down=not is_last_level, - stride=stride, - num_resblocks=num_resblock, - attention_type=level_attention_type, - text_embed_dim=cond_dim, - num_heads=num_attn_heads, - num_head_channels=per_head_channels, - use_checkpoint=gradient_checkpointing, - stable_attention=stable_attention, - flash_attention=flash_attention, - skip_connection_scaling=skip_connection_scaling, - ) - self.UBlocks[level_key] = UBlock( - channels=int(mult * embed_dim), - emb_channels=time_embed_dim, - out_channels=ch, - use_scale_shift_norm=scale_shift_norm, - conv_up=not is_last_level, - stride=stride, - num_resblocks=num_resblock, - attention_type=level_attention_type, - text_embed_dim=cond_dim, - num_heads=num_attn_heads, - num_head_channels=per_head_channels, - use_checkpoint=gradient_checkpointing, - stable_attention=stable_attention, - flash_attention=flash_attention, - skip_connection_scaling=skip_connection_scaling, - ) - ch = int(mult * embed_dim) - self.out = nn.Conv2d(channel_mult[0] * embed_dim, channels, 1) - - def forward( - self, x, time, text_embed, text_mask, x_low_res, time_low_res=None, - ): - if self.noise_cond_aug: - assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' - else: - assert time_low_res is None, 'time_low_res cannot be presented' - - if x.dtype != time.dtype or time.dtype != text_embed.dtype: - dtype = text_embed.dtype - x = x.to(dtype=dtype) - time = time.to(dtype=dtype) - if x_low_res is not None: - x_low_res = x_low_res.to(dtype=dtype) - if time_low_res is not None: - time_low_res = time_low_res.to(dtype=dtype) - - batch_size, device = x.shape[0], x.device - # Time Conditioning - t = self.time_embed(time) - # Text Conditioning - text_cond = self.text_to_cond(text_embed) - # Concatenating low resolution images - if x_low_res.shape != x.shape: - # Upscale if not done in the trainer - _, _, new_height, new_width = x.shape - x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") - x = torch.cat([x, x_low_res], dim=1) - - # Add lowres time conditioning - if self.noise_cond_aug: - lowres_t = self.lowres_time_embed(time_low_res) - t += lowres_t - # Context Embedding - # TODO We may want to concat time token here - if self.use_null_token: - # Null Context (Helpful when text_embed is drop) - null_context = self.null_text_embedding.repeat(batch_size, 1, 1) - context_emb = torch.cat([text_cond, null_context], dim=1) - context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) - else: - context_emb = text_cond - context_mask = text_mask - - # Add pooled text embeddings to the diffusion timestep - # TODO We may only want to calculated the pooled feature based on text token length - if self.feature_pooling_type == 'mean': - pooled_text_cond = text_cond.mean(dim=-2) - elif self.feature_pooling_type == 'attention': - pooled_text_cond = self.attention_pooling(text_embed) - text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) - t += text_hiddens - - # UNet forward - x = self.init_conv(x) - feats = dict() - for level in range(self.n_levels): - level_key = str(level) - x = self.DBlocks[level_key](x, t, context_emb, context_mask) - # Save feats for UBlocks - if level < self.n_levels - 1: - feats[level_key] = x - for level in range(self.n_levels - 1, -1, -1): - level_key = str(level) - if level < self.n_levels - 1: - x = x + feats[level_key] - x = self.UBlocks[level_key](x, t, context_emb, context_mask) - return self.out(x) - - def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): - logits = self.forward(*args, text_embed=text_embed, **kwargs) - if cond_scale == 1.0: - return logits - null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) - return null_logits + (logits - null_logits) * cond_scale - - -if __name__ == '__main__': - model = UNetModel(embed_dim=512, image_size=64,) - - pytorch_total_params = sum(p.numel() for p in model.parameters()) - print(pytorch_total_params) - - image_batch = torch.rand(4, 3, 64, 64) - text_cond = torch.rand(4, 88, 512) - text_mask = torch.ones(4, 88) - time = torch.ones(4) - - output = model(image_batch, time, text_cond, text_mask,) - - print(output.shape) - - model_sr = EfficientUNetModel(embed_dim=128, image_size=256) - pytorch_total_params = sum(p.numel() for p in model_sr.parameters()) - print(pytorch_total_params) - output = model_sr( - torch.randn(4, 3, 256, 256), - torch.randn(4, 3, 256, 256), - torch.ones(4), - torch.randn(4, 88, 512), - torch.ones(4, 88), - ) - print(output.shape) diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json deleted file mode 100644 index 3fb4ffdac7f1..000000000000 --- a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "architectures": [ - "T5WithLMHeadModel" - ], - "d_ff": 65536, - "d_kv": 128, - "d_model": 1024, - "decoder_start_token_id": 0, - "dropout_rate": 0.1, - "eos_token_id": 1, - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "n_positions": 512, - "num_heads": 128, - "num_layers": 24, - "output_past": true, - "pad_token_id": 0, - "relative_attention_num_buckets": 32, - "task_specific_params": { - "summarization": { - "early_stopping": true, - "length_penalty": 2.0, - "max_length": 200, - "min_length": 30, - "no_repeat_ngram_size": 3, - "num_beams": 4, - "prefix": "summarize: " - }, - "translation_en_to_de": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to German: " - }, - "translation_en_to_fr": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to French: " - }, - "translation_en_to_ro": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to Romanian: " - } - }, - "vocab_size": 32128 -} diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py deleted file mode 100644 index 56472db3f052..000000000000 --- a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import torch -from transformers import T5Config, T5EncoderModel, T5Tokenizer - - -class T5Encoder(torch.nn.Module): - def __init__(self, max_seq_len=512, encoder_path=None): - super().__init__() - self.max_seq_len = max_seq_len - - self.model_seq_len = 512 - # Initializing T5 model - self.tokenizer = T5Tokenizer.from_pretrained("t5-11b", model_max_length=self.model_seq_len) - - if encoder_path is None: - self.model = T5EncoderModel.from_pretrained("t5-11b", low_cpu_mem_usage=True) - else: - print(f'Load T5 encoder from {encoder_path}') - hard_coded_encoder_weight_location = os.path.join(encoder_path, "t5xxl-encoder.bin") - hard_coded_encoder_config_location = os.path.join(os.path.dirname(__file__), "t5encoder.json") - self.model = T5EncoderModel.from_pretrained( - hard_coded_encoder_weight_location, - config=T5Config.from_json_file(hard_coded_encoder_config_location), - low_cpu_mem_usage=True, - ) - - def encode(self, text_batch, device='cuda'): - encoded = self.tokenizer.batch_encode_plus( - text_batch, return_tensors="pt", padding="max_length", max_length=self.model_seq_len, truncation=True - ) - # We expect all the processing is done in GPU. - input_ids = encoded.input_ids.to(device=device) - attn_mask = encoded.attention_mask.to(device=device) - - with torch.no_grad(): - output = self.model(input_ids=input_ids, attention_mask=attn_mask) - encoded_text = output.last_hidden_state.detach() - - encoded_text = encoded_text[:, 0 : self.max_seq_len] - attn_mask = attn_mask[:, 0 : self.max_seq_len] - for bnum in range(encoded_text.shape[0]): - nvalid_elem = attn_mask[bnum].sum().item() - encoded_text[bnum][nvalid_elem:] = 0 - - return encoded_text, attn_mask diff --git a/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py b/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py deleted file mode 100644 index 029bbf60ffbc..000000000000 --- a/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Functions for performing operations with broadcasting to the right axis -# -# Example -# input1: tensor of size (N1, N2) -# input2: tensor of size (N1, N2, N3, N4) -# batch_mul(input1, input2) = input1[:, :, None, None] * input2 -# -# If the common dimensions don't match, we raise an assertion error. - - -def common_broadcast(x, y): - ndims1 = x.ndim - ndims2 = y.ndim - - common_ndims = min(ndims1, ndims2) - for axis in range(common_ndims): - assert x.shape[axis] == y.shape[axis], 'Dimensions not equal at axis {}'.format(axis) - - if ndims1 < ndims2: - x = x.reshape(x.shape + (1,) * (ndims2 - ndims1)) - elif ndims2 < ndims1: - y = y.reshape(y.shape + (1,) * (ndims1 - ndims2)) - - return x, y - - -def batch_add(x, y): - x, y = common_broadcast(x, y) - return x + y - - -def batch_mul(x, y): - x, y = common_broadcast(x, y) - return x * y - - -def batch_sub(x, y): - x, y = common_broadcast(x, y) - return x - y - - -def batch_div(x, y): - x, y = common_broadcast(x, y) - return x / y diff --git a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py deleted file mode 100644 index 2b48f28ce9c9..000000000000 --- a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -from functools import partial, wraps - -import torch -import torch.nn as nn -from einops import repeat -from torch.special import expm1 - -from nemo.collections.multimodal.parts.utils import randn_like - - -def exists(val): - return val is not None - - -def default(val, d): - if exists(val): - return val - return d() if callable(d) else d - - -def maybe(fn): - @wraps(fn) - def inner(x): - if not exists(x): - return x - return fn(x) - - return inner - - -def log(t, eps: float = 1e-12): - return torch.log(t.clamp(min=eps)) - - -def right_pad_dims_to(x, t): - padding_dims = x.ndim - t.ndim - if padding_dims <= 0: - return t - return t.view(*t.shape, *((1,) * padding_dims)) - - -@torch.jit.script -def beta_linear_log_snr(t): - return -torch.log(expm1(1e-4 + 10 * (t ** 2))) - - -@torch.jit.script -def alpha_cosine_log_snr(t, s: float = 0.008): - return -log( - (torch.cos((t + s) / (1 + s) * math.pi * 0.5) ** -2) - 1, eps=1e-5 - ) # not sure if this accounts for beta being clipped to 0.999 in discrete version - - -def log_snr_to_alpha_sigma(log_snr): - return torch.sqrt(torch.sigmoid(log_snr)), torch.sqrt(torch.sigmoid(-log_snr)) - - -class GaussianDiffusionContinuousTimes(nn.Module): - def __init__(self, *, noise_schedule, timesteps=1000, rng=None): - super().__init__() - - if noise_schedule == "linear": - self.log_snr = beta_linear_log_snr - elif noise_schedule == "cosine": - self.log_snr = alpha_cosine_log_snr - else: - raise ValueError(f'invalid noise schedule {noise_schedule}') - - self.num_timesteps = timesteps - self.rng = rng - - def get_times(self, batch_size, noise_level, *, device): - return torch.full((batch_size,), noise_level, device=device, dtype=torch.float32) - - def sample_random_times(self, batch_size, *, device): - return torch.rand((batch_size,), device=device, generator=self.rng, dtype=torch.float32) - - def get_condition(self, times): - return maybe(self.log_snr)(times) - - def get_sampling_timesteps(self, batch, *, device): - times = torch.linspace(1.0, 0.0, self.num_timesteps + 1, device=device) - times = repeat(times, 't -> b t', b=batch) - times = torch.stack((times[:, :-1], times[:, 1:]), dim=0) - times = times.unbind(dim=-1) - return times - - def q_posterior(self, x_start, x_t, t, *, t_next=None): - t_next = default(t_next, lambda: (t - 1.0 / self.num_timesteps).clamp(min=0.0)) - - """ https://openreview.net/attachment?id=2LdBqxc1Yv&name=supplementary_material """ - log_snr = self.log_snr(t) - log_snr_next = self.log_snr(t_next) - log_snr, log_snr_next = map(partial(right_pad_dims_to, x_t), (log_snr, log_snr_next)) - - alpha, sigma = log_snr_to_alpha_sigma(log_snr) - alpha_next, sigma_next = log_snr_to_alpha_sigma(log_snr_next) - - # c - as defined near eq 33 - c = -expm1(log_snr - log_snr_next) - posterior_mean = alpha_next * (x_t * (1 - c) / alpha + c * x_start) - - # following (eq. 33) - posterior_variance = (sigma_next ** 2) * c - posterior_log_variance_clipped = log(posterior_variance, eps=1e-20) - return posterior_mean, posterior_variance, posterior_log_variance_clipped - - def q_sample(self, x_start, t, noise=None): - dtype = x_start.dtype - - if isinstance(t, float): - batch = x_start.shape[0] - t = torch.full((batch,), t, device=x_start.device, dtype=dtype) - - noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - log_snr = self.log_snr(t).type(dtype) - log_snr_padded_dim = right_pad_dims_to(x_start, log_snr) - alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) - - return alpha * x_start + sigma * noise, log_snr, alpha, sigma - - def q_sample_from_to(self, x_from, from_t, to_t, noise=None): - shape, device, dtype = x_from.shape, x_from.device, x_from.dtype - batch = shape[0] - - if isinstance(from_t, float): - from_t = torch.full((batch,), from_t, device=device, dtype=dtype) - - if isinstance(to_t, float): - to_t = torch.full((batch,), to_t, device=device, dtype=dtype) - - noise = default(noise, lambda: randn_like(x_from, generator=self.rng)) - - log_snr = self.log_snr(from_t) - log_snr_padded_dim = right_pad_dims_to(x_from, log_snr) - alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) - - log_snr_to = self.log_snr(to_t) - log_snr_padded_dim_to = right_pad_dims_to(x_from, log_snr_to) - alpha_to, sigma_to = log_snr_to_alpha_sigma(log_snr_padded_dim_to) - - return x_from * (alpha_to / alpha) + noise * (sigma_to * alpha - sigma * alpha_to) / alpha - - def predict_start_from_v(self, x_t, t, v): - log_snr = self.log_snr(t) - log_snr = right_pad_dims_to(x_t, log_snr) - alpha, sigma = log_snr_to_alpha_sigma(log_snr) - return alpha * x_t - sigma * v - - def predict_start_from_noise(self, x_t, t, noise): - log_snr = self.log_snr(t) - log_snr = right_pad_dims_to(x_t, log_snr) - alpha, sigma = log_snr_to_alpha_sigma(log_snr) - return (x_t - sigma * noise) / alpha.clamp(min=1e-8) diff --git a/nemo/collections/multimodal/modules/imagen/sampler/sampler.py b/nemo/collections/multimodal/modules/imagen/sampler/sampler.py deleted file mode 100644 index 2fd05faf814d..000000000000 --- a/nemo/collections/multimodal/modules/imagen/sampler/sampler.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import torch -from einops import rearrange -from tqdm import tqdm - -from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_div, batch_mul -from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes - - -def right_pad_dims_to(x, t): - padding_dims = x.ndim - t.ndim - if padding_dims <= 0: - return t - return t.view(*t.shape, *((1,) * padding_dims)) - - -def thresholding_x0(x0, method='dynamic', th=0.995): - if method is None: - return x0 - elif method == 'static': - return x0.clamp(-1.0, 1.0) - elif method == 'dynamic': - # torch.quantile only suppoprt either float or double dtype - # we need to manual cast it if running in FP16/AMP mode - original_dtype = x0.dtype - if original_dtype not in [torch.float, torch.double]: - x0 = x0.float() - s = torch.quantile(rearrange(x0, 'b ... -> b (...)').abs(), th, dim=-1) # From Figure A.10 (b) - s.clamp_(min=1.0) - s = right_pad_dims_to(x0, s) - x0 = x0.clamp(-s, s) / s - return x0.type(original_dtype) - else: - raise RuntimeError(f'Thresholding method: {method} not supported.') - - -def thresholding_derivative(x, t, d, thresholding_method='dynamic'): - x0 = x - batch_mul(d, t) - corrected_x0 = thresholding_x0(x0, thresholding_method) - corrected_d = batch_div(x - corrected_x0, t) - return corrected_d - - -class Sampler(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, model, model_kwargs, shape, z=None): - pass - - -class DDPMSampler(Sampler): - def __init__(self, unet_type, denoiser): - super().__init__() - self.unet_type = unet_type - self.noise_scheduler = denoiser - self.pred_objective = 'noise' - - def p_mean_variance( - self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' - ): - - if self.unet_type == 'base': - pred = unet.forward_with_cond_scale( - x=x, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale - ) - elif self.unet_type == 'sr': - pred = unet.forward_with_cond_scale( - x=x, x_low_res=x_low_res, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale - ) - - if self.pred_objective == 'noise': - x_start = self.noise_scheduler.predict_start_from_noise(x, t=t, noise=pred) - elif self.pred_objective == 'x_start': - x_start = pred - elif self.pred_objective == 'v': - x_start = self.noise_scheduler.predict_start_from_v(x, t=t, v=pred) - else: - raise ValueError(f'unknown objective {self.pred_objective}') - - x_start = thresholding_x0(x_start, method=thresholding_method) - mean_and_variance = self.noise_scheduler.q_posterior(x_start=x_start, x_t=x, t=t, t_next=t_next) - return mean_and_variance, x_start - - @torch.no_grad() - def p_sample( - self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' - ): - (model_mean, _, model_log_variance), x_start = self.p_mean_variance( - unet=unet, - x=x, - t=t, - t_next=t_next, - text_embeds=text_embeds, - text_mask=text_mask, - cond_scale=cond_scale, - x_low_res=x_low_res, - thresholding_method=thresholding_method, - ) - noise = torch.randn_like(x) - # no noise when t == 0 - b = x.shape[0] - is_last_sampling_timestep = ( - (t_next == 0) if isinstance(self.noise_scheduler, GaussianDiffusionContinuousTimes) else (t == 0) - ) - nonzero_mask = (1 - is_last_sampling_timestep.type_as(x)).reshape(b, *((1,) * (len(x.shape) - 1))) - pred = model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise - return pred, x_start - - def forward( - self, - model, - noise_map, - text_encoding, - text_mask, - x_low_res=None, - cond_scale=1.0, - sampling_steps=None, - thresholding_method='dynamic', - ): - batch = noise_map.shape[0] - device = noise_map.device - dtype = noise_map.dtype - original_steps = self.noise_scheduler.num_timesteps - if sampling_steps: - self.noise_scheduler.num_timesteps = sampling_steps - timesteps = self.noise_scheduler.get_sampling_timesteps(batch, device=device) - img = noise_map - for times, times_next in tqdm(timesteps, total=len(timesteps)): - img, x_start = self.p_sample( - unet=model, - x=img.type(dtype), - t=times.type(dtype), - t_next=times_next.type(dtype), - text_embeds=text_encoding, - text_mask=text_mask, - cond_scale=cond_scale, - x_low_res=x_low_res.type(dtype) if x_low_res is not None else None, - thresholding_method=thresholding_method, - ) - self.noise_scheduler.num_timesteps = original_steps - return img - - -class EDMSampler(Sampler): - def __init__( - self, - unet_type, - num_steps=50, - sigma_min=0.002, - sigma_max=80, - rho=7, - S_churn=0, - S_min=0, - S_max=float('inf'), - S_noise=1, - ): - super().__init__() - self.unet_type = unet_type - self.sigma_min = sigma_min - self.sigma_max = sigma_max - self.rho = rho - self.S_churn = S_churn - self.S_min = S_min - self.S_max = S_max - self.S_noise = S_noise - self.num_steps = num_steps - - def forward( - self, - unet, - noise_map, - text_encoding, - text_mask, - x_low_res=None, - cond_scale=1.0, - sampling_steps=None, - thresholding_method='dynamic', - ): - if self.unet_type == 'base': - assert x_low_res is None - elif self.unet_type == 'sr': - assert x_low_res is not None - low_res_cond = {'x_low_res': x_low_res} if x_low_res is not None else {} - thresholding_method = 'dynamic' - sigma_min = self.sigma_min - sigma_max = self.sigma_max - print(f'Sampling with sigma in [{sigma_min}, {sigma_max}], cfg={cond_scale}') - # Time step discretization - num_steps = sampling_steps if sampling_steps else self.num_steps - step_indices = torch.arange(num_steps, device=noise_map.device) - # Table 1: Sampling - Time steps - t_steps = ( - sigma_max ** (1 / self.rho) - + step_indices / (num_steps - 1) * (sigma_min ** (1 / self.rho) - sigma_max ** (1 / self.rho)) - ) ** self.rho - t_steps = torch.cat([t_steps, torch.zeros_like(t_steps[:1])]) # t_N = 0 - - # Main sampling loop. - x_next = noise_map * t_steps[0] - for i, (t_cur, t_next) in tqdm( - enumerate(zip(t_steps[:-1], t_steps[1:])), total=len(t_steps[:-1]) - ): # 0, ..., N-1 - x_cur = x_next - - # Increase noise temporarily. - gamma = min(self.S_churn / num_steps, np.sqrt(2) - 1) if self.S_min <= t_cur <= self.S_max else 0 - t_hat = (t_cur + gamma * t_cur).to(x_cur.device) - x_hat = x_cur + (t_hat ** 2 - t_cur ** 2).sqrt() * self.S_noise * torch.randn_like(x_cur) - - # Euler step. - denoised = unet.forward_with_cond_scale( - x=x_hat.to(torch.float32), - time=t_hat.to(torch.float32), - text_embed=text_encoding, - text_mask=text_mask, - cond_scale=cond_scale, - **low_res_cond, - ) - d_cur = (x_hat - denoised) / t_hat - d_cur = thresholding_derivative(x_hat, t_hat, d_cur, thresholding_method=thresholding_method) - x_next = x_hat + (t_next - t_hat) * d_cur - - # Apply 2nd order correction. - if i < num_steps - 1: - denoised = unet.forward_with_cond_scale( - x=x_next.to(torch.float32), - time=t_next.to(torch.float32), - text_embed=text_encoding, - text_mask=text_mask, - cond_scale=cond_scale, - **low_res_cond, - ) - d_prime = (x_next - denoised) / t_next - d_prime = thresholding_derivative(x_next, t_next, d_prime, thresholding_method=thresholding_method) - x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime) - return x_next diff --git a/nemo/collections/multimodal/modules/nerf/__init__.py b/nemo/collections/multimodal/modules/nerf/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py b/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py deleted file mode 100644 index 3d03d14694be..000000000000 --- a/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py +++ /dev/null @@ -1,22 +0,0 @@ -import torch -import torch.nn as nn - -# TODO(ahmadki): abstract class -class NeRFBackgroundBase(nn.Module): - def __init__(self): - super().__init__() - - def encode(self, rays_d: torch.Tensor) -> torch.Tensor: - """ - positions = [B*N, 3] - """ - raise NotImplementedError - - def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: - raise NotImplementedError - - def forward(self, rays_d: torch.Tensor) -> torch.Tensor: - rays_d_encoding = self.encode(rays_d) - features = self.forward_net(rays_d_encoding) - features = torch.sigmoid(features) - return features diff --git a/nemo/collections/multimodal/modules/nerf/background/random_background.py b/nemo/collections/multimodal/modules/nerf/background/random_background.py deleted file mode 100644 index 5f7f77d99596..000000000000 --- a/nemo/collections/multimodal/modules/nerf/background/random_background.py +++ /dev/null @@ -1,19 +0,0 @@ -import random -from typing import Tuple - -import torch -import torch.nn as nn - - -class RandomBackground(nn.Module): - def __init__(self, base_background: Tuple, random_ratio: float) -> None: - super().__init__() - self.random_ratio = random_ratio - self.num_output_dims = len(base_background) - self.register_buffer("base_background", torch.tensor(base_background)) - - def forward(self, rays_d: torch.Tensor) -> torch.Tensor: - if random.random() < self.random_ratio: - return torch.rand(rays_d.shape[0], self.num_output_dims).to(rays_d) - else: - return self.base_background.to(rays_d).expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/static_background.py b/nemo/collections/multimodal/modules/nerf/background/static_background.py deleted file mode 100644 index 955884161626..000000000000 --- a/nemo/collections/multimodal/modules/nerf/background/static_background.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Tuple - -import torch -import torch.nn as nn - - -class StaticBackground(nn.Module): - def __init__(self, background: Tuple) -> None: - super().__init__() - self.register_buffer("background", torch.tensor(background)) - - def forward(self, rays_d: torch.Tensor) -> torch.Tensor: - background = self.background.to(rays_d) - return background.expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py b/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py deleted file mode 100644 index 3b45a60717a5..000000000000 --- a/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import Dict - -import numpy as np -import tinycudann as tcnn -import torch - -from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase - - -class TCNNBackground(NeRFBackgroundBase): - def __init__( - self, - bound: int, - encoder_num_input_dims: int, - encoder_cfg: Dict, - background_net_num_output_dims: int, - background_net_cfg: Dict, - ): - super().__init__() - self.bound = bound - if encoder_cfg.get('per_level_scale') is None: - encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) - self.encoder = tcnn.Encoding(n_input_dims=encoder_num_input_dims, encoding_config=dict(encoder_cfg)) - self.background_net = tcnn.Network( - self.encoder.n_output_dims, background_net_num_output_dims, network_config=dict(background_net_cfg) - ) - - def encode(self, rays_d: torch.Tensor) -> torch.Tensor: - return self.encoder(rays_d) - - def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: - return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py b/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py deleted file mode 100644 index e792858cacce..000000000000 --- a/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py +++ /dev/null @@ -1,31 +0,0 @@ -from typing import Dict - -import torch - -from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase -from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP -from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder - - -class TorchNGPBackground(NeRFBackgroundBase): - def __init__( - self, encoder_type: str, encoder_input_dims: int, encoder_multi_res: int, num_output_dims: int, net_cfg: Dict - ): - super().__init__() - - self.encoder, self.encoder_output_dims = get_encoder( - encoder_type, input_dim=encoder_input_dims, multires=encoder_multi_res - ) - self.background_net = MLP( - num_input_dims=self.encoder_output_dims, - num_output_dims=num_output_dims, - num_hidden_dims=net_cfg.num_hidden_dims, - num_layers=net_cfg.num_layers, - bias=net_cfg.bias, - ) - - def encode(self, rays_d: torch.Tensor) -> torch.Tensor: - return self.encoder(rays_d) - - def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: - return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/__init__.py b/nemo/collections/multimodal/modules/nerf/geometry/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py b/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py deleted file mode 100644 index c04ac342cec0..000000000000 --- a/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py +++ /dev/null @@ -1,150 +0,0 @@ -import torch - - -class DeepMarchingTetrahedra: - """ - Class for Deep Marching Tetrahedra (DMTet). - - Attributes: - device (torch.device): Device to place the tensors. - triangle_table (Tensor): Lookup table for the triangles. - num_triangles_table (Tensor): Table for the number of triangles. - base_tet_edges (Tensor): The base edges for the tetrahedrons. - """ - - def __init__(self, device: torch.device) -> None: - """Initialize DMTet instance with the given device. - - Args: - device (torch.device): The device to place the tensors on. - """ - self.device = device - self.triangle_table = self._create_triangle_table() - self.num_triangles_table = self._create_num_triangles_table() - self.base_tet_edges = self._create_base_tet_edges() - - def _create_triangle_table(self) -> torch.Tensor: - """Create the lookup table for triangles. - - Returns: - Tensor: The triangle lookup table. - """ - return torch.tensor( - [ - [-1, -1, -1, -1, -1, -1], - [1, 0, 2, -1, -1, -1], - [4, 0, 3, -1, -1, -1], - [1, 4, 2, 1, 3, 4], - [3, 1, 5, -1, -1, -1], - [2, 3, 0, 2, 5, 3], - [1, 4, 0, 1, 5, 4], - [4, 2, 5, -1, -1, -1], - [4, 5, 2, -1, -1, -1], - [4, 1, 0, 4, 5, 1], - [3, 2, 0, 3, 5, 2], - [1, 3, 5, -1, -1, -1], - [4, 1, 2, 4, 3, 1], - [3, 0, 4, -1, -1, -1], - [2, 0, 1, -1, -1, -1], - [-1, -1, -1, -1, -1, -1], - ], - dtype=torch.long, - device=self.device, - ) - - def _create_num_triangles_table(self) -> torch.Tensor: - """Create the table for number of triangles. - - Returns: - Tensor: The number of triangles table. - """ - return torch.tensor([0, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 0], dtype=torch.long, device=self.device) - - def _create_base_tet_edges(self) -> torch.Tensor: - """Create the base edges for the tetrahedrons. - - Returns: - Tensor: The base edges for tetrahedrons. - """ - return torch.tensor([0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device=self.device) - - def _sort_edges(self, edges_ex2: torch.Tensor) -> torch.Tensor: - """Sort the given edges. - - Args: - edges_ex2 (Tensor): The edges to be sorted. - - Returns: - Tensor: The sorted edges. - """ - with torch.no_grad(): - order = (edges_ex2[:, 0] > edges_ex2[:, 1]).long() - order = order.unsqueeze(dim=1) - a = torch.gather(input=edges_ex2, index=order, dim=1) - b = torch.gather(input=edges_ex2, index=1 - order, dim=1) - return torch.stack([a, b], -1) - - # TODO(ahmadki): rename to forward ? return mesh ? - def __call__(self, positions: torch.Tensor, sdf_n: torch.Tensor, tet_fx4: torch.Tensor) -> tuple: - """ - Process the provided data to generate vertices and faces. - - Args: - positions (Tensor): Position tensor with shape [N, 3]. - sdf_n (Tensor): SDF tensor with shape [N]. - tet_fx4 (Tensor): Tetrahedron faces tensor with shape [F, 4]. - - Returns: - tuple: Vertices and faces tensors. - """ - with torch.no_grad(): - occ_n = sdf_n > 0 - occ_fx4 = occ_n[tet_fx4.reshape(-1)].reshape(-1, 4) - occ_sum = torch.sum(occ_fx4, -1) - valid_tets = (occ_sum > 0) & (occ_sum < 4) - occ_sum = occ_sum[valid_tets] - - # find all vertices - all_edges = tet_fx4[valid_tets][:, self.base_tet_edges].reshape(-1, 2) - all_edges = self._sort_edges(all_edges) - unique_edges, idx_map = torch.unique(all_edges, dim=0, return_inverse=True) - - unique_edges = unique_edges.long() - mask_edges = occ_n[unique_edges.reshape(-1)].reshape(-1, 2).sum(-1) == 1 - mapping = torch.ones((unique_edges.shape[0]), dtype=torch.long, device=self.device) * -1 - mapping[mask_edges] = torch.arange(mask_edges.sum(), dtype=torch.long, device=self.device) - idx_map = mapping[idx_map] # map edges to verts - - interp_v = unique_edges[mask_edges] - - edges_to_interp = positions[interp_v.reshape(-1)].reshape(-1, 2, 3) - edges_to_interp_sdf = sdf_n[interp_v.reshape(-1)].reshape(-1, 2, 1) - edges_to_interp_sdf[:, -1] *= -1 - - denominator = edges_to_interp_sdf.sum(1, keepdim=True) - edges_to_interp_sdf = torch.flip(edges_to_interp_sdf, [1]) / denominator - verts = (edges_to_interp * edges_to_interp_sdf).sum(1) - - idx_map = idx_map.reshape(-1, 6) - v_id = torch.pow(2, torch.arange(4, dtype=torch.long, device=self.device)) - tetindex = (occ_fx4[valid_tets] * v_id.unsqueeze(0)).sum(-1) - num_triangles = self.num_triangles_table[tetindex] - - # Generate triangle indices - faces = torch.cat( - ( - torch.gather( - input=idx_map[num_triangles == 1], - dim=1, - index=self.triangle_table[tetindex[num_triangles == 1]][:, :3], - ).reshape(-1, 3), - torch.gather( - input=idx_map[num_triangles == 2], - dim=1, - index=self.triangle_table[tetindex[num_triangles == 2]][:, :6], - ).reshape(-1, 3), - ), - dim=0, - ) - - return verts, faces diff --git a/nemo/collections/multimodal/modules/nerf/geometry/layers.py b/nemo/collections/multimodal/modules/nerf/geometry/layers.py deleted file mode 100644 index c80696bd170c..000000000000 --- a/nemo/collections/multimodal/modules/nerf/geometry/layers.py +++ /dev/null @@ -1,129 +0,0 @@ -from typing import Callable, List, Type, Union - -import torch -import torch.nn as nn - -BlockBuilder = Union[Callable[[int, int, bool], nn.Module], Type[nn.Module], None] - - -class MLP(nn.Module): - """ - A Multi-Layer Perceptron (MLP) module. - - Args: - num_input_dims (int): Number of input dimensions. - num_output_dims (int): Number of output dimensions. - num_hidden_dims (int): Number of hidden dimensions. - num_layers (int): Number of layers in the MLP. - bias (bool): If True, enables the bias in Linear layers. Default is True. - block (BlockBuilder): A callable or class for constructing a block. Default is None. - """ - - def __init__( - self, - num_input_dims: int, - num_output_dims: int, - num_hidden_dims: int, - num_layers: int, - bias: bool = True, - block: BlockBuilder = None, - ): - super().__init__() - - # Initialize the network as an empty list - network = [] - - # Add input layer - network.append(nn.Linear(num_input_dims, num_hidden_dims, bias=bias)) - network.append(nn.ReLU(inplace=True)) - - # Add hidden layers - for _ in range(1, num_layers - 1): - network.extend(self.build_layer(num_hidden_dims, num_hidden_dims, bias, block)) - - # Add output layer - network.append(nn.Linear(num_hidden_dims, num_output_dims, bias=bias)) - - # Wrap layers in ModuleList for proper registration - self.net = nn.ModuleList(network) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Forward pass through the MLP. - - Args: - x (torch.Tensor): Input tensor. - - Returns: - torch.Tensor: Output tensor. - """ - for module in self.net: - x = module(x) - return x - - @staticmethod - def build_layer( - num_input_dims: int, num_output_dims: int, bias: bool = True, block_builder: BlockBuilder = None - ) -> List[nn.Module]: - """ - Build a single layer for the MLP. - - Args: - num_input_dims (int): Number of input dimensions. - num_output_dims (int): Number of output dimensions. - bias (bool): If True, enables the bias in Linear layers. Default is True. - block_builder (BlockBuilder): A callable or class for constructing a block. Default is None. - - Returns: - List[nn.Module]: A list containing the layer's modules. - """ - if block_builder is None: - return [nn.Linear(num_input_dims, num_output_dims, bias=bias), nn.ReLU(inplace=True)] - else: - return [block_builder(num_input_dims, num_output_dims, bias=bias)] - - -class ResBlock(nn.Module): - """ - A residual block module. - - Args: - num_input_dims (int): Number of input dimensions. - num_output_dims (int): Number of output dimensions. - bias (bool): If True, enables the bias in Linear layers. Default is True. - """ - - def __init__(self, num_input_dims: int, num_output_dims: int, bias: bool = True): - super().__init__() - - self.dense = nn.Linear(num_input_dims, num_output_dims, bias=bias) - self.norm = nn.LayerNorm(num_output_dims) - self.activation = nn.SiLU(inplace=True) - - if num_input_dims != num_output_dims: - self.skip = nn.Linear(num_input_dims, num_output_dims, bias=False) - else: - self.skip = None - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Forward pass through the residual block. - - Args: - x (torch.Tensor): Input tensor. - - Returns: - torch.Tensor: Output tensor. - """ - identity = x - - out = self.dense(x) - out = self.norm(out) - - if self.skip is not None: - identity = self.skip(identity) - - out += identity - out = self.activation(out) - - return out diff --git a/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py b/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py deleted file mode 100644 index 0b5eb6b6f260..000000000000 --- a/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py +++ /dev/null @@ -1,360 +0,0 @@ -from enum import Enum -from typing import Callable, Optional, Tuple, Union - -import mcubes -import numpy as np -import pymeshlab -import torch -import torch.nn as nn -import torch.nn.functional as F -import trimesh - -from nemo.collections.multimodal.modules.nerf.utils.activation import trunc_exp - - -class DensityActivationEnum(str, Enum): - EXP = "exp" - SOFTPLUS = "softplus" - - -class NormalTypeEnum(str, Enum): - AUTOGRAD = "autograd" - FORWARD_FINITE_DIFFERENCE = "forward_finite_difference" - BACKWARD_FINITE_DIFFERENCE = "backward_finite_difference" - CENTRAL_FINITE_DIFFERENCE = "central_finite_difference" - - -# TODO(ahmadki): make abstract -class NeRFBase(nn.Module): - """ - A base class for Neural Radiance Fields (NeRF) models. - - Args: - num_input_dims (int): Number of input dimensions. - bound (torch.Tensor): The bounding box tensor. - density_activation (DensityActivationEnum): Activation function for density. - blob_radius (float): Radius for the blob. - blob_density (float): Density for the blob. - normal_type (Optional[NormalTypeEnum]): Method to compute normals. - """ - - def __init__( - self, - num_input_dims: int, - bound: torch.Tensor, - density_activation: DensityActivationEnum, - blob_radius: float, - blob_density: float, - normal_type: Optional[NormalTypeEnum] = NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, - ) -> None: - super().__init__() - self.num_input_dims = num_input_dims - self.bound = bound - self.density_activation = density_activation - self.blob_radius = blob_radius - self.blob_density = blob_density - self.normal_type = normal_type - - def encode(self, positions: torch.Tensor) -> torch.Tensor: - """Encode 3D positions. To be implemented by subclasses.""" - raise NotImplementedError - - def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """Calculate sigma (density). To be implemented by subclasses.""" - raise NotImplementedError - - def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """Calculate features. To be implemented by subclasses.""" - raise NotImplementedError - - def forward( - self, positions: torch.Tensor, return_normal: bool = True - ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - """ - Forward pass for the NeRF model. - - Args: - positions (torch.Tensor): The positions. - return_normal (bool): Flag to indicate whether to return normals or not. - - Returns: - Tuple containing density, features, and possibly normals. - """ - - if return_normal: - if self.normal_type == NormalTypeEnum.AUTOGRAD: - with torch.enable_grad(): - positions.requires_grad_(True) - sigma, features = self.forward_density_features(positions) - normal = -torch.autograd.grad(torch.sum(sigma), positions, create_graph=True)[0] # [N, D] - elif self.normal_type in [ - NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, - NormalTypeEnum.FORWARD_FINITE_DIFFERENCE, - NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE, - ]: - sigma, features = self.forward_density_features(positions) - normal = self.normal_finite_differences(positions) - else: - raise NotImplementedError("Invalid normal type.") - - normal = F.normalize(normal) - normal = torch.nan_to_num(normal) - else: - sigma, features = self.forward_density_features(positions) - normal = None - - return sigma, features, normal - - def forward_density_features(self, positions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Calculate both density and features based on the input positions. - - This function takes into account edge cases like empty input tensors and calculates - the density and features accordingly. See GitHub issues for details: - - https://github.com/KAIR-BAIR/nerfacc/issues/207#issuecomment-1653621720 - - https://github.com/ashawkey/torch-ngp/issues/176 - - Args: - positions (torch.Tensor): Input positions tensor with shape [B*N, D]. - - Returns: - Tuple[torch.Tensor, torch.Tensor]: Tuple containing density and features tensors. - """ - - # Handle empty positions - if positions.shape[0] == 0: - sigma = torch.zeros(0, device=positions.device) - features = torch.zeros(0, self.num_input_dims, device=positions.device) - return sigma, features - - # Encode positions - positions_encoding = self.encode(positions) - - # Compute density - density = self.forward_density(positions, positions_encoding) - - # Compute features - features = self.forward_features(positions, positions_encoding) - - return density, features - - def forward_density( - self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None - ) -> torch.Tensor: - """ - Calculate the density based on the input positions and their encoding. - - Args: - positions (torch.Tensor): Input positions tensor with shape [B*N, D]. - positions_encoding (Optional[torch.Tensor]): Optional encoded positions. - Will be computed from `positions` if not provided. - - Returns: - torch.Tensor: Density tensor. - """ - - # Handle empty positions - if positions.shape[0] == 0: - sigma = torch.zeros(0, device=positions.device) - return sigma - - # Compute encoded positions if not provided - if positions_encoding is None: - positions_encoding = self.encode(positions) - - # Compute sigma using the neural network - sigma = self.sigma_net(positions_encoding) - - # Compute density using activation function - if self.density_activation == DensityActivationEnum.EXP: - density = trunc_exp(sigma + self.density_blob(positions)) - elif self.density_activation == DensityActivationEnum.SOFTPLUS: - density = F.softplus(sigma + self.density_blob(positions)) - else: - raise NotImplementedError("Invalid density activation.") - - return density - - def forward_features( - self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None - ) -> torch.Tensor: - """ - Compute the features based on the input positions and their encoding. - - Args: - positions (torch.Tensor): Input positions tensor with shape [B*N, D]. - positions_encoding (Optional[torch.Tensor]): Optional encoded positions. - Will be computed from `positions` if not provided. - - Returns: - torch.Tensor: Features tensor with shape [B*N, num_features_dims]. - """ - - # Handle empty positions - if positions.shape[0] == 0: - features = torch.zeros(0, self.num_features_dims, device=positions.device) - return features - - # Compute encoded positions if not provided - if positions_encoding is None: - positions_encoding = self.encode(positions) - - # Compute features using the neural network - features = self.features_net(positions_encoding) - - # Apply the sigmoid activation function to the features - features = torch.sigmoid(features) - - return features - - @torch.no_grad() - def density_blob(self, positions: torch.Tensor) -> torch.Tensor: - """ - Compute the density blob for the given positions. - - This method computes a density blob for each position in the tensor. It is - used to add a density value based on the distance of each position from the origin. - - Args: - positions (torch.Tensor): Input positions tensor with shape [B*N, D]. - - Returns: - torch.Tensor: Density blob tensor with shape [B*N, 1]. - """ - - # Compute the squared distance for each position - d = (positions ** 2).sum(-1) - - # Compute the density blob based on the activation function - if self.density_activation == DensityActivationEnum.EXP: - g = self.blob_density * torch.exp(-d / (2 * self.blob_radius ** 2)) - elif self.density_activation == DensityActivationEnum.SOFTPLUS: - g = self.blob_density * (1 - torch.sqrt(d) / self.blob_radius) - else: - raise NotImplementedError("Invalid density activation.") - - return g - - def normal_finite_differences(self, positions: torch.Tensor, eps: float = 1e-2) -> torch.Tensor: - """ - Calculate normals using finite differences. - - Args: - positions (torch.Tensor): Input positions tensor with shape [B*N, D]. - eps (float): A small value for finite difference calculation. Default is 1e-2. - - Returns: - torch.Tensor: Calculated normals tensor [B*N, D] - """ - # Create perturbation tensor - perturb = torch.eye(self.num_input_dims).to(positions.device).float() * eps # Shape (D, D) - - # Expand dims for batched operation - positions_expanded = positions[:, None, :] # (B*N, 1, D) - perturb_expanded = perturb[None, :, :] # (1, D, D) - - # Compute perturbed points - if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: - positions_perturbed = positions_expanded + perturb_expanded # (B*N, D, D) - elif self.normal_type == NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE: - positions_perturbed = positions_expanded - perturb_expanded # (B*N, D, D) - elif self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: - positions_perturbed_pos = positions_expanded + perturb_expanded # (B*N, D, D) - positions_perturbed_neg = positions_expanded - perturb_expanded # (B*N, D, D) - positions_perturbed = torch.cat([positions_perturbed_pos, positions_perturbed_neg], dim=1) # (B*N, 2*D, D) - - # Reshape perturbed points for batched function call - positions_perturbed_reshaped = positions_perturbed.view(-1, self.num_input_dims) # (B*N * {D or 2*D}, D) - - # Evaluate function at perturbed points - perturbed_sigma = self.forward_density(positions_perturbed_reshaped) # (B*N * {D or 2*D}, 1) - - # Reshape function values - if self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: - perturbed_sigma = perturbed_sigma.view(-1, 2 * self.num_input_dims) # (B*N, 2*D) - sigma_pos, sigma_neg = torch.chunk(perturbed_sigma, 2, dim=1) # (B*N, D) each - normal = 0.5 * (sigma_pos - sigma_neg) / eps # (B*N, D) - else: - perturbed_sigma = perturbed_sigma.view(-1, self.num_input_dims) # (B*N, D) - sigma = self.forward_density(positions) # (B*N,) # TODO(ahmadki): use the value from forward ? - if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: - normal = (perturbed_sigma - sigma[:, None]) / eps # (B*N, D) - else: # self.normal_type == BACKWARD_FINITE_DIFFERENCE - normal = (sigma[:, None] - perturbed_sigma) / eps # (B*N, D) - - return -normal - - # TODO(ahmadki): needs ar ework: - # 1. texture/vertices are off-axis, needs a fix. - # 2. device='cuda' is hardcoded - # 3. DMTet needs to go through a different code path ? create a base volume nerf, and a base dmtet nerf class ? - @torch.no_grad() - def mesh( - self, resolution: Optional[int] = 128, batch_size: int = 128, density_thresh: Optional[float] = None - ) -> pymeshlab.Mesh: - """ - Generate a mesh from the nerf. - - Args: - resolution (Optional[int]): Resolution of the mesh grid. Default is 128. - batch_size (int): Batch size for the mesh generation. Default is 128. - density_thresh (Optional[float]): Density threshold for the mesh generation. Default is None, will be calculated from mean density. - - Returns: - pymeshlab.Mesh: Mesh object. - """ - # Generate a grid of 3D points - x = np.linspace(-self.bound, self.bound, resolution) - y = np.linspace(-self.bound, self.bound, resolution) - z = np.linspace(-self.bound, self.bound, resolution) - xx, yy, zz = np.meshgrid(x, y, z) - - grid = np.stack((xx, yy, zz), axis=-1) # Shape (resolution, resolution, resolution, 3) - torch_grid = torch.tensor(grid, dtype=torch.float32).reshape(-1, 3).to(device="cuda") - - def batch_process(fn, input, batch_size): - num_points = input.shape[0] - batches = [input[i : i + batch_size] for i in range(0, num_points, batch_size)] - results = [fn(batch) for batch in batches] - results = [result.detach().cpu().numpy() for result in results] - return np.concatenate(results, axis=0) - - density = batch_process(fn=self.forward_density, input=torch_grid, batch_size=batch_size) - density = density.reshape(resolution, resolution, resolution) - - # If not provided set density_thresh based on mean density - if density_thresh is None: - density_thresh = density[density > 1e-3].mean().item() - - # Apply Marching Cubes - vertices, triangles = mcubes.marching_cubes(density, density_thresh) - - # Create a new Mesh - ms = pymeshlab.MeshSet() - - # Create Mesh using vertices and faces - m = pymeshlab.Mesh(vertices.copy(), triangles.copy()) - - # Add mesh to the MeshSet - ms.add_mesh(m, "generated_mesh") - - # Filters - ms.meshing_remove_unreferenced_vertices() - ms.meshing_remove_duplicate_faces() - ms.meshing_remove_null_faces() - ms.meshing_repair_non_manifold_edges(method=0) - ms.meshing_repair_non_manifold_vertices(vertdispratio=0) - - m = ms.current_mesh() - vertices = m.vertex_matrix() - faces = m.face_matrix() - - scaled_vertice = ( - -self.bound + (vertices / resolution) * 2 * self.bound - ) # scale vertices back to [-self.bound, self.bound] - scaled_vertices_torch = torch.tensor(scaled_vertice, dtype=torch.float32).to(device="cuda") - color = batch_process(fn=self.forward_features, input=scaled_vertices_torch, batch_size=batch_size) - - # Create the final mesh from cleaned vertices and faces and with color - mesh = trimesh.Trimesh(vertices=vertices, faces=faces, vertex_colors=color) - return mesh diff --git a/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py deleted file mode 100644 index 2922df999d15..000000000000 --- a/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py +++ /dev/null @@ -1,108 +0,0 @@ -from typing import Dict, Optional - -import numpy as np -import tinycudann as tcnn -import torch - -from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum - - -# Don't fuse sigma_net with features_net: -# 1. performance benefit is questionable, especially that we sometimes require only density or features -# 2. we sacrifice generality -class TCNNNerf(NeRFBase): - """ - NeRF model with TCNN encoding and MLPs for sigma and features. - - Args: - num_input_dims (int): Number of input dimensions. - bound (torch.Tensor): The bounding box tensor. - density_activation (DensityActivationEnum): Activation function for density. - blob_radius (float): Radius for the blob. - blob_density (float): Density for the blob. - normal_type (Optional[NormalTypeEnum]): Method to compute normals. - encoder_cfg (Dict): Configuration for the TCNN encoder. - sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. - sigma_net_cfg (Dict): Configuration for the sigma network. - features_net_num_output_dims (int): Number of output dimensions for the features network. - features_net_cfg (Optional[Dict]): Configuration for the features network. - """ - - def __init__( - self, - num_input_dims: int, - bound: torch.Tensor, - density_activation: DensityActivationEnum, - blob_radius: float, - blob_density: float, - normal_type: Optional[NormalTypeEnum], - encoder_cfg: Dict, - sigma_net_num_output_dims: int, - sigma_net_cfg: Dict, - features_net_num_output_dims: int, - features_net_cfg: Optional[Dict], - ) -> None: - super().__init__( - num_input_dims=num_input_dims, - bound=bound, - density_activation=density_activation, - blob_radius=blob_radius, - blob_density=blob_density, - normal_type=normal_type, - ) - - # Set per_level_scale if not set - if encoder_cfg.get('per_level_scale') is None: - encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) - # Build the TCNN encoder - self.encoder = tcnn.Encoding(n_input_dims=num_input_dims, encoding_config=dict(encoder_cfg)) - - # Build the sigma network - assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims!=1 is not supported" - self.sigma_tcnn = tcnn.Network( - self.encoder.n_output_dims, sigma_net_num_output_dims, network_config=dict(sigma_net_cfg) - ) - - # Build the features network - self.features_tcnn = None - if features_net_cfg is not None: - self.features_tcnn = tcnn.Network( - self.encoder.n_output_dims, features_net_num_output_dims, network_config=dict(features_net_cfg) - ) - - def encode(self, positions: torch.Tensor) -> torch.Tensor: - """ - Encode the positions using the TCNN encoder. - - Args: - positions (torch.Tensor): The positions tensor. - - Returns: - torch.Tensor: The encoded positions tensor. - """ - # TODO(ahmadki): is it safe to do with FP16 ? - return self.encoder((positions + self.bound) / (2 * self.bound)) - - def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """ - Compute the sigma using the TCNN network. - - Args: - positions_encoding (torch.Tensor): The encoded positions tensor. - - Returns: - torch.Tensor: The sigma tensor. - """ - return self.sigma_tcnn(positions_encoding).squeeze() - - def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """ - Compute the features using the TCNN network. - - Args: - positions_encoding (torch.Tensor): The encoded positions tensor. - - Returns: - torch.Tensor: The features tensor. - """ - return self.features_tcnn(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py deleted file mode 100644 index b831b94ef84b..000000000000 --- a/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py +++ /dev/null @@ -1,114 +0,0 @@ -from typing import Dict, Optional - -import torch - -from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP -from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum -from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder - - -# Don't fuse sigma_net with features_net: -# 1. performance benefit is questionable, especially that we sometimes require only density or features -# 2. we sacrifice generality -class TorchNGPNerf(NeRFBase): - """ - NeRF model with Torch-NGP encoding and MLPs for sigma and features. - - Args: - num_input_dims (int): Number of input dimensions. - bound (torch.Tensor): The bounding box tensor. - density_activation (DensityActivationEnum): Activation function for density. - blob_radius (float): Radius for the blob. - blob_density (float): Density for the blob. - normal_type (Optional[NormalTypeEnum]): Method to compute normals. - encoder_type (str): Type of the encoder. - encoder_max_level (int): Maximum level of the encoder. - sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. - sigma_net_cfg (Dict): Configuration for the sigma network. - features_net_num_output_dims (int): Number of output dimensions for the features network. - features_net_cfg (Optional[Dict]): Configuration for the features network. - """ - - def __init__( - self, - num_input_dims: int, - bound: torch.Tensor, - density_activation: DensityActivationEnum, - blob_radius: float, - blob_density: float, - normal_type: Optional[NormalTypeEnum], - encoder_cfg: Dict, - sigma_net_num_output_dims: int, - sigma_net_cfg: Dict, - features_net_num_output_dims: int, - features_net_cfg: Optional[Dict], - ): - super().__init__( - num_input_dims=num_input_dims, - bound=bound, - density_activation=density_activation, - blob_radius=blob_radius, - blob_density=blob_density, - normal_type=normal_type, - ) - - # Build the Torch-NGP encoder - self.encoder_max_level = encoder_cfg.get('encoder_max_level', None) - self.encoder, self.encoder_output_dims = get_encoder(input_dim=num_input_dims, **encoder_cfg) - - # Build the sigma network - assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims must be equal to 1" - self.sigma_mlp = MLP( - num_input_dims=self.encoder_output_dims, - num_output_dims=sigma_net_num_output_dims, - num_hidden_dims=sigma_net_cfg.num_hidden_dims, - num_layers=sigma_net_cfg.num_layers, - bias=sigma_net_cfg.bias, - ) - - # Build the features network - self.features_mlp = None - if features_net_cfg is not None: - self.features_mlp = MLP( - num_input_dims=self.encoder_output_dims, - num_output_dims=features_net_num_output_dims, - num_hidden_dims=features_net_cfg.num_hidden_dims, - num_layers=features_net_cfg.num_layers, - bias=features_net_cfg.bias, - ) - - def encode(self, positions: torch.Tensor) -> torch.Tensor: - """ - Encode the positions. - - Args: - positions (torch.Tensor): The positions tensor. - - Returns: - torch.Tensor: The encoded positions tensor. - """ - return self.encoder(positions, bound=self.bound, max_level=self.encoder_max_level) - - def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """ - Compute the sigma using the sigma network. - - Args: - positions_encoding (torch.Tensor): The encoded positions tensor. - - Returns: - torch.Tensor: The sigma tensor. - """ - return self.sigma_mlp(positions_encoding).squeeze() - - def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: - """ - Compute the features using the features network. - - Args: - positions_encoding (torch.Tensor): The encoded positions tensor. - - Returns: - torch.Tensor: The features tensor. - """ - return self.features_mlp(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/guidance/__init__.py b/nemo/collections/multimodal/modules/nerf/guidance/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py deleted file mode 100644 index 008a7b3d3627..000000000000 --- a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import List, Union - -import torch -import torch.nn.functional as F -from diffusers import DDIMScheduler, StableDiffusionPipeline - -from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase - - -class StableDiffusion(Txt2ImgGuidanceBase): - def __init__( - self, - model_key: str = "stabilityai/stable-diffusion-2-1-base", - t_range: List[float] = [0.02, 0.98], - precision: str = "16", - device: torch.device = torch.device('cuda'), - ): - """ - Initialize StableDiffusion with model_key, t_range, precision and device. - - Parameters: - model_key (str): Pre-trained model key. - t_range (List[float]): Range for timesteps. - precision (str): Model precision ("16", "bf16" or other for float32). - device (torch.device): Device for torch tensor. - """ - super().__init__() - - self.device = device - self.model_key = model_key - self.precision_t = self._get_precision_type(precision) - - # Create model - pipe = StableDiffusionPipeline.from_pretrained(model_key, torch_dtype=self.precision_t).to(self.device) - if self.precision_t in [torch.float16, torch.bfloat16]: - pipe.unet.to(memory_format=torch.channels_last) - - self.vae = pipe.vae - self.tokenizer = pipe.tokenizer - self.text_encoder = pipe.text_encoder - self.unet = pipe.unet - self.scheduler = DDIMScheduler.from_pretrained(model_key, subfolder="scheduler", torch_dtype=self.precision_t) - - del pipe - - self.num_train_timesteps = self.scheduler.config.num_train_timesteps - self.min_step = int(self.num_train_timesteps * t_range[0]) - self.max_step = int(self.num_train_timesteps * t_range[1]) - self.alphas = self.scheduler.alphas_cumprod.to(self.device) - - def _get_precision_type(self, precision: str) -> torch.dtype: - """ - Map string precision representation to torch dtype. - - Parameters: - precision (str): String representation of precision. - - Returns: - torch.dtype: Corresponding torch dtype. - """ - precision_map = {"16": torch.float16, "bf16": torch.bfloat16} - return precision_map.get(precision, torch.float32) - - @torch.no_grad() - def get_text_embeds(self, prompt: str) -> torch.Tensor: - """ - Get text embeddings from the given prompt. - - Parameters: - prompt (str): Input text. - - Returns: - torch.Tensor: Text embeddings tensor [B, 77, 1024]. - """ - inputs = self.tokenizer( - prompt, padding='max_length', max_length=self.tokenizer.model_max_length, return_tensors='pt' - ) - embeddings = self.text_encoder(inputs.input_ids.to(self.device))[0] - return embeddings - - # @torch.compile() # TODO(ahmadki) - def train_step( - self, - text_embeddings: torch.Tensor, - pred_rgb: torch.Tensor, - guidance_scale: float = 100.0, - as_latent: bool = False, - ) -> float: - """ - Train step function for StableDiffusion. - - Parameters: - text_embeddings (torch.Tensor): Embeddings tensor [B, 512]. - pred_rgb (torch.Tensor): Predicted RGB tensor [B, 3, 512, 512]. - guidance_scale (float): Guidance scaling factor. - as_latent (bool): If True, considers pred_rgb as latent. - - Returns: - float: Loss value. - """ - if as_latent: - latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 - else: - pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) - latents = self.encode_imgs(pred_rgb_512) - - t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) - - with torch.no_grad(): - # add noise - noise = torch.randn_like(latents) - latents_noisy = self.scheduler.add_noise(latents, noise, t) - # pred noise - latent_model_input = torch.cat([latents_noisy] * 2) - td = torch.cat([t] * 2) - noise_pred = self.unet(latent_model_input, td, encoder_hidden_states=text_embeddings).sample - - noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) - - w = 1 - self.alphas[t] - grad = w[:, None, None, None] * (noise_pred - noise) - grad = torch.nan_to_num(grad) - - targets = (latents - grad).detach() - loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] - return loss - - def encode_imgs(self, imgs: torch.Tensor) -> torch.Tensor: - """ - Encode images into latent representations. - - Parameters: - imgs (torch.Tensor): Image tensor [B, 3, H, W]. - - Returns: - torch.Tensor: Encoded latent tensor. - """ - imgs = 2 * imgs - 1 - posterior = self.vae.encode(imgs).latent_dist - latents = posterior.sample() * self.vae.config.scaling_factor - return latents diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py deleted file mode 100644 index a605391b9c92..000000000000 --- a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -import tempfile - -import torch -import torch.nn as nn -import torch.nn.functional as F -from omegaconf import OmegaConf - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase -from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( - DiagonalGaussianDistribution, -) -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector - - -class StableDiffusion(Txt2ImgGuidanceBase): - def __init__( - self, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], precision="16", device=torch.device('cuda') - ): - super().__init__() - - self.device = device - self.checkpoint = checkpoint - self.sampler_type = sampler_type - - cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) - - cfg.precision = precision - cfg.ckpt_path = None - cfg.unet_config.from_pretrained = None - cfg.first_stage_config.from_pretrained = None - - self.model = LatentDiffusion(cfg).to(device) - - sd_state_dict = {} - # Remove Megatron wrapper and inductor - for key, value in state_dict.items(): - key = key[6:] - sd_state_dict[key] = value - self.model.load_state_dict(sd_state_dict) - self.first_stage_model = self.model.first_stage_model - self.text_encoder = self.model.cond_stage_model.encode - - self.num_train_timesteps = self.model.num_timesteps - self.min_step = int(self.num_train_timesteps * t_range[0]) - self.max_step = int(self.num_train_timesteps * t_range[1]) - self.alphas = self.model.alphas_cumprod.to(self.device) - - @torch.no_grad() - def get_text_embeds(self, prompt): - return self.text_encoder(prompt) - - @torch.autocast(device_type="cuda") - def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): - - if as_latent: - latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 - else: - # interp to 512x512 to be fed into vae. - pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) - # encode image into latents with vae, requires grad! - latents = self.encode_imgs(pred_rgb_512) - - # timestep ~ U(0.02, 0.98) to avoid very high/low noise level - t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) - - with torch.no_grad(): - noise = torch.randn_like(latents) - latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) - latent_model_input = torch.cat([latents_noisy] * 2) - td = torch.cat([t] * 2) - noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) - - noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) - - # w(t), sigma_t^2 - w = 1 - self.alphas[t] - grad = w[:, None, None, None] * (noise_pred - noise) - grad = torch.nan_to_num(grad) - - targets = (latents - grad).detach() - loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] - return loss - - def image_encoder(self, x): - h = self.first_stage_model.encoder(x) - moments = self.first_stage_model.quant_conv(h) - posterior = DiagonalGaussianDistribution(moments) - return posterior - - def encode_imgs(self, imgs): - # imgs: [B, 3, H, W] - - imgs = 2 * imgs - 1 - - posterior = self.image_encoder(imgs) - latents = ( - posterior.sample() * self.image_encoder.config.scaling_factor - ) # self.vae.config.scaling_factor==0.18215 - - return latents - - def load_config_and_state_from_nemo(self, nemo_path): - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - - # Change current working directory to - os.chdir(tmpdir) - config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) - cfg = OmegaConf.load(config_yaml) - - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - return cfg, state_dict diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py deleted file mode 100644 index bf8c5e971002..000000000000 --- a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py +++ /dev/null @@ -1,221 +0,0 @@ -import logging -import os -import tempfile - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from omegaconf import OmegaConf -from polygraphy import cuda -from transformers import CLIPTokenizer - -from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion -from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase -from nemo.collections.multimodal.modules.nerf.utils.trt_engine import Engine, device_view -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - extract_into_tensor, - make_beta_schedule, -) -from nemo.collections.multimodal.parts.stable_diffusion.utils import default -from nemo.collections.multimodal.parts.utils import randn_like -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector - - -class LatentDiffusionWrapper(Txt2ImgGuidanceBase): - def __init__(self, plan_dir, checkpoint): - super().__init__() - with open(os.path.join(plan_dir, "conf.yaml"), "rb") as fp: - config = OmegaConf.load(fp.name) - max_batch_size = config.batch_size - - self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") - self.max_length = config.clip.max_length - self.rng = torch.Generator(device=torch.cuda.current_device(),) - - self.set_beta_schedule() - - stream = cuda.Stream() - - self.image_encoder = self.load_vae_from_checkpoint(checkpoint) - - self.text_encoder = Engine(os.path.join(plan_dir, "clip.plan")) - shape_dict = {'tokens': config.clip.tokens, 'logits': config.clip.logits} - self.text_encoder.set_engine(stream, shape_dict) - - self.unet = Engine(os.path.join(plan_dir, "unet.plan")) - shape_dict = { - 'x': config.unet.x, - 't': (max_batch_size * 2,), - 'context': config.unet.context, - 'logits': config.unet.logits, - } - self.unet.set_engine(stream, shape_dict) - - def set_beta_schedule(self): - betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.0120, cosine_s=0.008) - alphas = 1.0 - betas - alphas_cumprod = np.cumprod(alphas, axis=0) - betas = torch.tensor(betas) - alphas = torch.tensor(alphas) - alphas_cumprod = torch.tensor(alphas_cumprod) - to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) - self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) - self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) - self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod.cpu()))) - (timesteps,) = betas.shape - self.num_timesteps = int(timesteps) - - def q_sample(self, x_start, t, noise=None): - noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) - return ( - extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start - + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise - ) - - def encode_imgs(self, imgs): - imgs = 2 * imgs - 1 - posterior = self.image_encoder(imgs) - latents = posterior.sample() * 0.18215 - return latents - - def clip_encode(self, text): - batch_encoding = self.tokenizer( - text, - truncation=True, - max_length=self.max_length, - return_length=True, - return_overflowing_tokens=False, - padding="max_length", - return_tensors="pt", - ) - tokens = batch_encoding["input_ids"].to("cuda", non_blocking=True) - z = self.text_encoder.infer({"tokens": device_view(tokens.type(torch.int32))})['logits'].clone() - seq_len = (z.shape[1] + 8 - 1) // 8 * 8 - z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) - return z - - def apply_model(self, x, t, cond, return_ids=False): - self.conditioning_key = "crossattn" - if isinstance(cond, dict): - # hybrid case, cond is exptected to be a dict - pass - else: - if not isinstance(cond, list): - cond = [cond] - # key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' - key = 'c_crossattn' - cond = {key: cond} - # UNET TRT - cc = torch.cat(cond['c_crossattn'], 1) # needs to be changed I think - out = self.unet.infer( - { - "x": device_view(x.contiguous()), - "t": device_view(t.type(torch.int32).contiguous()), - "context": device_view(cc.contiguous()), - } - )['logits'].clone() - if isinstance(out, tuple) and not return_ids: - return out[0] - else: - return out - - def load_vae_from_checkpoint(self, checkpoint): - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) - - if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): - cfg.unet_config.from_pretrained = None - if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): - cfg.first_stage_config.from_pretrained = None - - model = LatentDiffusion(cfg).to(device) - - sd_state_dict = {} - for key, value in state_dict.items(): - key = key[6:] - sd_state_dict[key] = value - model.load_state_dict(sd_state_dict) - - return model.first_stage_model.encode - - def load_config_and_state_from_nemo(self, nemo_path): - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - - # Change current working directory to - os.chdir(tmpdir) - config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) - cfg = OmegaConf.load(config_yaml) - - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - return cfg, state_dict - - -class StableDiffusion(nn.Module): - def __init__(self, plan_dir, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], device=torch.device('cuda')): - super().__init__() - logging.info(f'loading stable diffusion...') - - self.device = device - self.sampler_type = sampler_type - self.model = LatentDiffusionWrapper(plan_dir, checkpoint) - - self.text_encoder = self.model.clip_encode - - self.num_train_timesteps = self.model.num_timesteps - self.min_step = int(self.num_train_timesteps * t_range[0]) - self.max_step = int(self.num_train_timesteps * t_range[1]) - self.alphas = self.model.alphas_cumprod.to(self.device) # for convenience - - logging.info(f'loaded stable diffusion!') - - @torch.no_grad() - def get_text_embeds(self, prompt): - return self.text_encoder(prompt) - - def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): - - if as_latent: - latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 - else: - # interp to 512x512 to be fed into vae. - pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) - # encode image into latents with vae, requires grad! - latents = self.model.encode_imgs(pred_rgb_512) - - # timestep ~ U(0.02, 0.98) to avoid very high/low noise level - t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) - - with torch.no_grad(): - noise = torch.randn_like(latents) - latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) - latent_model_input = torch.cat([latents_noisy] * 2) - td = torch.cat([t] * 2) - noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) - - noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) - - # w(t), sigma_t^2 - w = 1 - self.alphas[t] - grad = w[:, None, None, None] * (noise_pred - noise) - grad = torch.nan_to_num(grad) - - targets = (latents - grad).detach() - loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] - return loss diff --git a/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py b/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py deleted file mode 100644 index 8e03ffb41d71..000000000000 --- a/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py +++ /dev/null @@ -1,6 +0,0 @@ -import torch.nn as nn - - -class Txt2ImgGuidanceBase(nn.Module): - def __init__(self): - super().__init__() diff --git a/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py b/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py deleted file mode 100644 index 2240f0aee8ce..000000000000 --- a/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py +++ /dev/null @@ -1,38 +0,0 @@ -import torch -import torch.nn as nn - - -class LaplacianSmoothLoss(nn.Module): - def __init__(self): - super(LaplacianSmoothLoss, self).__init__() - - @torch.cuda.amp.autocast(enabled=False) - def forward(self, verts, faces): - with torch.no_grad(): - L = self.laplacian_uniform(verts, faces.long()) - loss = L.mm(verts) - loss = loss.norm(dim=1) - loss = loss.mean() - return loss - - # TODO(ahmadki): should be moved to a separate mesh class - def laplacian_uniform(self, verts, faces): - V = verts.shape[0] - F = faces.shape[0] - - # Neighbor indices - ii = faces[:, [1, 2, 0]].flatten() - jj = faces[:, [2, 0, 1]].flatten() - adj = torch.stack([torch.cat([ii, jj]), torch.cat([jj, ii])], dim=0).unique(dim=1) - adj_values = torch.ones(adj.shape[1], device=verts.device, dtype=torch.float) - - # Diagonal indices - diag_idx = adj[0] - - # Build the sparse matrix - idx = torch.cat((adj, torch.stack((diag_idx, diag_idx), dim=0)), dim=1) - values = torch.cat((-adj_values, adj_values)) - - # The coalesce operation sums the duplicate indices, resulting in the - # correct diagonal - return torch.sparse_coo_tensor(idx, values, (V, V)).coalesce() diff --git a/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py b/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py deleted file mode 100644 index 4459c7003fd4..000000000000 --- a/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py +++ /dev/null @@ -1,56 +0,0 @@ -import torch -import torch.nn as nn - - -class NormalConsistencyLoss(nn.Module): - def __init__(self): - super(NormalConsistencyLoss, self).__init__() - - # TODO(ahmadki): is this safe to do in FP16 ? - def forward(self, face_normals, t_pos_idx): - tris_per_edge = self.compute_edge_to_face_mapping(t_pos_idx) - - # Fetch normals for both faces sharind an edge - n0 = face_normals[tris_per_edge[:, 0], :] - n1 = face_normals[tris_per_edge[:, 1], :] - - # Compute error metric based on normal difference - term = torch.clamp(torch.sum(n0 * n1, -1, keepdim=True), min=-1.0, max=1.0) - term = 1.0 - term - - return torch.mean(torch.abs(term)) - - # TODO(ahmadki): should belog to mesh class - def compute_edge_to_face_mapping(self, attr_idx): - with torch.no_grad(): - # Get unique edges - # Create all edges, packed by triangle - all_edges = torch.cat( - ( - torch.stack((attr_idx[:, 0], attr_idx[:, 1]), dim=-1), - torch.stack((attr_idx[:, 1], attr_idx[:, 2]), dim=-1), - torch.stack((attr_idx[:, 2], attr_idx[:, 0]), dim=-1), - ), - dim=-1, - ).view(-1, 2) - - # Swap edge order so min index is always first - order = (all_edges[:, 0] > all_edges[:, 1]).long().unsqueeze(dim=1) - sorted_edges = torch.cat( - (torch.gather(all_edges, 1, order), torch.gather(all_edges, 1, 1 - order)), dim=-1 - ) - - # Elliminate duplicates and return inverse mapping - unique_edges, idx_map = torch.unique(sorted_edges, dim=0, return_inverse=True) - - tris = torch.arange(attr_idx.shape[0]).repeat_interleave(3).cuda() - - tris_per_edge = torch.zeros((unique_edges.shape[0], 2), dtype=torch.int64).cuda() - - # Compute edge to face table - mask0 = order[:, 0] == 0 - mask1 = order[:, 0] == 1 - tris_per_edge[idx_map[mask0], 0] = tris[mask0] - tris_per_edge[idx_map[mask1], 1] = tris[mask1] - - return tris_per_edge diff --git a/nemo/collections/multimodal/modules/nerf/materials/__init__.py b/nemo/collections/multimodal/modules/nerf/materials/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py b/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py deleted file mode 100644 index 434f58552a05..000000000000 --- a/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py +++ /dev/null @@ -1,66 +0,0 @@ -from typing import Optional - -import torch - -from nemo.collections.multimodal.modules.nerf.materials.materials_base import MaterialsBase, ShadingEnum - - -class BasicShading(MaterialsBase): - """ - Material model for handling various shading types. - """ - - def __init__(self): - super(BasicShading, self).__init__() - self.specular = torch.nn.Parameter(torch.rand(3)) - self.shininess = torch.nn.Parameter(torch.rand(1)) - - def forward( - self, - albedo: torch.Tensor, - normals: torch.Tensor, - light_d: torch.Tensor, - ambient_ratio: float, - shading_type: Optional[ShadingEnum] = None, - ) -> torch.Tensor: - """ - Apply material and shading to the input RGB tensor. - - Args: - albedo (Tensor): Base albedo values. - normals (Tensor): Normal vectors at each ray intersection. - light_d (Tensor): Light direction. - ambient_ratio (float): Ratio for ambient lighting. - shading_type (ShadingEnum): The type of shading to apply - - Returns: - Tensor: The output RGB tensor after applying material and shading. - """ - if shading_type is None: - return albedo - elif shading_type == ShadingEnum.TEXTURELESS: - return torch.ones_like(albedo) * ambient_ratio - elif shading_type == ShadingEnum.NORMAL: - return (normals + 1) / 2 # Map normals from [-1, 1] to [0, 1] - elif shading_type in [ShadingEnum.LAMBERTIAN, ShadingEnum.PHONG]: - # Ambient light - ambient_light = ambient_ratio * albedo - # Dot product between light direction and normals - dot_product = torch.sum(normals * light_d, dim=1, keepdim=True) - # Lambertian term - diffuse_term = albedo * torch.clamp(dot_product, min=0) - - if shading_type == ShadingEnum.LAMBERTIAN: - return ambient_light + diffuse_term - elif shading_type == ShadingEnum.PHONG: - # Phong specular term - specular_term = ( - self.specular - * (self.shininess + 2) - * torch.pow(torch.clamp(dot_product, min=0), self.shininess) - / (2 * 3.14159) - ) - - return ambient_light + diffuse_term + specular_term - else: - raise ValueError(f"Unknown shading_type: {shading_type}") diff --git a/nemo/collections/multimodal/modules/nerf/materials/materials_base.py b/nemo/collections/multimodal/modules/nerf/materials/materials_base.py deleted file mode 100644 index 393a5ffcc4fb..000000000000 --- a/nemo/collections/multimodal/modules/nerf/materials/materials_base.py +++ /dev/null @@ -1,29 +0,0 @@ -from enum import Enum -from typing import Literal, Optional - -from torch import nn - - -class ShadingEnum(str, Enum): - TEXTURELESS = "textureless" - NORMAL = "normal" - LAMBERTIAN = "lambertian" - PHONG = "phong" - - # TODO(ahmadki): - # Oren–Nayar - # Minnaert - # Cook–Torrance - # Ward anisotropic - # Hanrahan–Krueger - # Cel shading - # Gooch shading - - -class MaterialsBase(nn.Module): - """ - Base class for materials. - """ - - def __init__(self): - super(MaterialsBase, self).__init__() diff --git a/nemo/collections/multimodal/modules/nerf/renderers/__init__.py b/nemo/collections/multimodal/modules/nerf/renderers/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py deleted file mode 100644 index 36b78218a695..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py +++ /dev/null @@ -1,18 +0,0 @@ -import torch -import torch.nn as nn - -# TODO(ahmadki): make abstract -class BaseRenderer(nn.Module): - def __init__(self, bound, update_interval): - super().__init__() - self.bound = bound - aabb = torch.FloatTensor([-bound, -bound, -bound, bound, bound, bound]) - self.register_buffer('aabb', aabb) - self.update_interval = update_interval - - @torch.no_grad() - def update_step(self, epoch: int, global_step: int, decay: float = 0.95, **kwargs): - raise NotImplementedError - - def forward(self, rays_o, rays_d, return_normal_image=False, return_normal_perturb=False, **kwargs): - raise NotImplementedError diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py deleted file mode 100644 index 511908e826be..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py +++ /dev/null @@ -1,20 +0,0 @@ -import torch - -from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase - - -class BaseSDFRenderer(RendererBase): - def __init__(self, bound): - super().__init__(bound) - - # TODO(ahmadki): needs a rework - @torch.no_grad() - def get_vertices_and_triangles(self, resolution=None, S=128): - deform = torch.tanh(self.deform) / self.grid_size - - vertices, triangles = self.dmtet(self.verts + deform, self.sdf, self.indices) - - vertices = vertices.detach().cpu().numpy() - triangles = triangles.detach().cpu().numpy() - - return vertices, triangles diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py deleted file mode 100644 index a49f37dd0741..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py +++ /dev/null @@ -1,7 +0,0 @@ -from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum -from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase - - -class BaseVolumeRenderer(RendererBase): - def __init__(self, bound, update_interval): - super().__init__(bound, update_interval) diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py deleted file mode 100644 index 103c7b82ad23..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py +++ /dev/null @@ -1,364 +0,0 @@ -import collections -import math -from typing import Optional - -import torch -from nerfacc.estimators.occ_grid import OccGridEstimator -from nerfacc.grid import ray_aabb_intersect, traverse_grids -from nerfacc.volrend import accumulate_along_rays_, render_weight_from_density, rendering - -from nemo.collections.multimodal.modules.renderer.base_renderer import BaseRenderer - -Rays = collections.namedtuple("Rays", ("origins", "viewdirs")) - - -def namedtuple_map(fn, tup): - """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.""" - return type(tup)(*(None if x is None else fn(x) for x in tup)) - - -def render_image_with_occgrid( - # scene - nerf: torch.nn.Module, - estimator: OccGridEstimator, - rays: Rays, - # rendering options - near_plane: float = 0.0, - far_plane: float = 1e10, - render_step_size: float = 1e-3, - render_bkgd: Optional[torch.Tensor] = None, - cone_angle: float = 0.0, - alpha_thre: float = 0.0, - # test options - test_chunk_size: int = 8192, -): - """Render the pixels of an image.""" - rays_shape = rays.origins.shape - if len(rays_shape) == 3: - height, width, _ = rays_shape - num_rays = height * width - rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) - else: - num_rays, _ = rays_shape - - # TODO(ahmadki): optimize, cache result between sigma_fn and rgb_sigma_fn - def sigma_fn(t_starts, t_ends, ray_indices): - t_origins = chunk_rays.origins[ray_indices] - t_dirs = chunk_rays.viewdirs[ray_indices] - positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 - sigmas = nerf.density(positions)['sigma'] - return sigmas - - def rgb_sigma_fn(t_starts, t_ends, ray_indices): - t_origins = chunk_rays.origins[ray_indices] - t_dirs = chunk_rays.viewdirs[ray_indices] - positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 - sigmas, rgbs, normal = nerf( - positions=positions, view_dirs=None, light_dirs=t_dirs - ) # TODO(ahmadki): t_dirs is incorrect - return rgbs, sigmas - - results = [] - chunk = torch.iinfo(torch.int32).max if nerf.training else test_chunk_size - - for i in range(0, num_rays, chunk): - chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays) - ray_indices, t_starts, t_ends = estimator.sampling( - chunk_rays.origins, - chunk_rays.viewdirs, - sigma_fn=sigma_fn, - near_plane=near_plane, - far_plane=far_plane, - render_step_size=render_step_size, - stratified=nerf.training, - cone_angle=cone_angle, - alpha_thre=alpha_thre, - ) - rgb, opacity, depth, extras = rendering( - t_starts, - t_ends, - ray_indices, - n_rays=chunk_rays.origins.shape[0], - rgb_sigma_fn=rgb_sigma_fn, - render_bkgd=render_bkgd, - ) - - weight = extras["weights"] - alpha = extras["alphas"] - - chunk_results = [rgb, opacity, depth, weight, alpha, len(t_starts)] - results.append(chunk_results) - - colors, opacities, depths, weights, alphas, n_rendering_samples = [ - torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r for r in zip(*results) - ] - - return ( - colors.view((*rays_shape[:-1], -1)), - opacities.view((*rays_shape[:-1], -1)), - depths.view((*rays_shape[:-1], -1)), - weights, - alphas, - sum(n_rendering_samples), - ) - - -@torch.no_grad() -def render_image_with_occgrid_test( - max_samples: int, - # scene - nerf: torch.nn.Module, - estimator: OccGridEstimator, - rays: Rays, - # rendering options - near_plane: float = 0.0, - far_plane: float = 1e10, - render_step_size: float = 1e-3, - render_bkgd: Optional[torch.Tensor] = None, - cone_angle: float = 0.0, - alpha_thre: float = 0.0, - early_stop_eps: float = 1e-4, -): - """Render the pixels of an image.""" - rays_shape = rays.origins.shape - if len(rays_shape) == 3: - height, width, _ = rays_shape - num_rays = height * width - rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) - else: - num_rays, _ = rays_shape - - def rgb_sigma_fn(t_starts, t_ends, ray_indices): - t_origins = rays.origins[ray_indices] - t_dirs = rays.viewdirs[ray_indices] - positions = t_origins + t_dirs * (t_starts[:, None] + t_ends[:, None]) / 2.0 - sigmas, rgbs, normal = nerf( - positions=positions, view_dirs=None, light_dirs=t_dirs - ) # TODO(ahmadki): t_dirs is incorrect ? - return rgbs, sigmas - - device = rays.origins.device - opacity = torch.zeros(num_rays, 1, device=device) - depth = torch.zeros(num_rays, 1, device=device) - rgb = torch.zeros(num_rays, 3, device=device) - - ray_mask = torch.ones(num_rays, device=device).bool() - - # 1 for synthetic scenes, 4 for real scenes - min_samples = 1 if cone_angle == 0 else 4 - - iter_samples = total_samples = 0 - - rays_o = rays.origins - rays_d = rays.viewdirs - - near_planes = torch.full_like(rays_o[..., 0], fill_value=near_plane) - far_planes = torch.full_like(rays_o[..., 0], fill_value=far_plane) - - t_mins, t_maxs, hits = ray_aabb_intersect(rays_o, rays_d, estimator.aabbs) - - n_grids = estimator.binaries.size(0) - - if n_grids > 1: - t_sorted, t_indices = torch.sort(torch.cat([t_mins, t_maxs], -1), -1) - else: - t_sorted = torch.cat([t_mins, t_maxs], -1) - t_indices = torch.arange(0, n_grids * 2, device=t_mins.device, dtype=torch.int64).expand(num_rays, n_grids * 2) - - opc_thre = 1 - early_stop_eps - - while iter_samples < max_samples: - - n_alive = ray_mask.sum().item() - if n_alive == 0: - break - - # the number of samples to add on each ray - n_samples = max(min(num_rays // n_alive, 64), min_samples) - iter_samples += n_samples - - # ray marching - (intervals, samples, termination_planes) = traverse_grids( - # rays - rays_o, # [n_rays, 3] - rays_d, # [n_rays, 3] - # grids - estimator.binaries, # [m, resx, resy, resz] - estimator.aabbs, # [m, 6] - # options - near_planes, # [n_rays] - far_planes, # [n_rays] - render_step_size, - cone_angle, - n_samples, - True, - ray_mask, - # pre-compute intersections - t_sorted, # [n_rays, m*2] - t_indices, # [n_rays, m*2] - hits, # [n_rays, m] - ) - t_starts = intervals.vals[intervals.is_left] - t_ends = intervals.vals[intervals.is_right] - ray_indices = samples.ray_indices[samples.is_valid] - packed_info = samples.packed_info - - # get rgb and sigma from radiance field - rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices) - # volume rendering using native cuda scan - weights, _, alphas = render_weight_from_density( - t_starts, - t_ends, - sigmas, - ray_indices=ray_indices, - n_rays=num_rays, - prefix_trans=1 - opacity[ray_indices].squeeze(-1), - ) - if alpha_thre > 0: - vis_mask = alphas >= alpha_thre - ray_indices, rgbs, weights, t_starts, t_ends = ( - ray_indices[vis_mask], - rgbs[vis_mask], - weights[vis_mask], - t_starts[vis_mask], - t_ends[vis_mask], - ) - - accumulate_along_rays_( - weights, values=rgbs, ray_indices=ray_indices, outputs=rgb, - ) - accumulate_along_rays_( - weights, values=None, ray_indices=ray_indices, outputs=opacity, - ) - accumulate_along_rays_( - weights, values=(t_starts + t_ends)[..., None] / 2.0, ray_indices=ray_indices, outputs=depth, - ) - # update near_planes using termination planes - near_planes = termination_planes - # update rays status - ray_mask = torch.logical_and( - # early stopping - opacity.view(-1) <= opc_thre, - # remove rays that have reached the far plane - packed_info[:, 1] == n_samples, - ) - total_samples += ray_indices.shape[0] - - if render_bkgd is not None: - rgb = rgb + render_bkgd * (1.0 - opacity) - - depth = depth / opacity.clamp_min(torch.finfo(rgbs.dtype).eps) - - return ( - rgb.view((*rays_shape[:-1], -1)), - opacity.view((*rays_shape[:-1], -1)), - depth.view((*rays_shape[:-1], -1)), - weights, - alphas, - total_samples, - ) - - -class NerfaccVolumeBaseRenderer(BaseRenderer): - def __init__( - self, - bound, - grid_resolution, - grid_levels, - render_step_size=1e-3, - near_plane=0.2, - cone_angle=0.004, - alpha_thre=1e-2, - ): - - super().__init__(bound) - - self.grid_resolution = grid_resolution - self.grid_levels = grid_levels - self.render_step_size = render_step_size - self.near_plane = near_plane - self.cone_angle = cone_angle - self.alpha_thre = alpha_thre - self.nerf = None - - self.estimator = OccGridEstimator(roi_aabb=self.aabb, resolution=self.grid_resolution, levels=self.grid_levels) - - @torch.no_grad() # TODO(ahmadki) - def update_step( - self, - epoch: int, - global_step: int, - update_interval: int = 16, - decay: float = 0.95, - occ_thre: float = 0.01, - warmup_steps: int = 256, - **kwargs - ): - def occ_eval_fn(x): - density = self.nerf.forward_density(x) - return density * self.render_step_size - - self.estimator.update_every_n_steps( - step=global_step, - occ_eval_fn=occ_eval_fn, - occ_thre=occ_thre, - ema_decay=decay, - warmup_steps=warmup_steps, - n=update_interval, - ) - - def forward(self, rays_o, rays_d, mvp, h, w, staged=False, max_ray_batch=4096, step=None, **kwargs): - return self._render(rays_o=rays_o, rays_d=rays_d, step=step, **kwargs) - - def _render( - self, - rays_o, - rays_d, - light_d=None, - ambient_ratio=1.0, - shading='albedo', - bg_color=None, - perturb=False, - T_thresh=1e-4, - binarize=False, - step=None, - **kwargs - ): - rays_o = rays_o.contiguous().view(-1, 3) - rays_d = rays_d.contiguous().view(-1, 3) - - N = rays_o.shape[0] # N = B * N, in fact - - rays = Rays(origins=rays_o, viewdirs=rays_d) - - if self.training: - rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid( - nerf=self.nerf, - estimator=self.estimator, - rays=rays, - near_plane=self.near_plane, - render_step_size=self.render_step_size, - render_bkgd=bg_color, - cone_angle=self.cone_angle, - alpha_thre=self.alpha_thre, - ) - else: - rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid_test( - max_samples=1024, - nerf=self.nerf, - estimator=self.estimator, - rays=rays, - near_plane=self.near_plane, - render_step_size=self.render_step_size, - render_bkgd=bg_color, - cone_angle=self.cone_angle, - alpha_thre=self.alpha_thre, - ) - - results = {} - results['weights'] = weights - results['image'] = rgb.view(1, -1, 3) - results['depth'] = depth.view(1, -1) - results['weights_sum'] = acc.view(1, -1) - - return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py deleted file mode 100644 index 9b23e1db890c..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py +++ /dev/null @@ -1,222 +0,0 @@ -import math - -import numpy as np -import nvdiffrast.torch as dr -import torch -import torch.nn.functional as F - -from nemo.collections.multimodal.modules.nerf.geometry.dmtet import DeepMarchingTetrahedra -from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum -from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer - - -# TODO: self.density_thresh, self.mean_density need a rework, they can be infered at run time -# and shouldn't be loaded from the checkpoint -class NVDiffRastRenderer(BaseRenderer): - def __init__(self, bound, update_interval, grid_resolution, density_thresh, quartet_file): - - super().__init__(bound, update_interval) - - self.grid_resolution = grid_resolution - self.density_thresh = density_thresh - self.quartet_file = quartet_file - - self.cascade = 1 + math.ceil(math.log2(bound)) - density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] - density_bitfield = torch.zeros( - self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 - ) # [CAS * H * H * H // 8] - self.register_buffer('density_grid', density_grid) - self.register_buffer('density_bitfield', density_bitfield) - self.mean_density = 0 - self.iter_density = 0 - - # load dmtet vertices - # TODO(ahmadki): hard coded devices - tets = np.load(quartet_file) - self.verts = -torch.tensor(tets['vertices'], dtype=torch.float32, device='cuda') * 2 # covers [-1, 1] - self.indices = torch.tensor(tets['indices'], dtype=torch.long, device='cuda') - self.tet_scale = torch.tensor([1, 1, 1], dtype=torch.float32, device='cuda') - self.dmtet = DeepMarchingTetrahedra(device='cuda') - - # vert sdf and deform - sdf = torch.nn.Parameter(torch.zeros_like(self.verts[..., 0]), requires_grad=True) - self.register_parameter('sdf', sdf) - deform = torch.nn.Parameter(torch.zeros_like(self.verts), requires_grad=True) - self.register_parameter('deform', deform) - - edges = torch.tensor( - [0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device="cuda" - ) # six edges for each tetrahedron. - all_edges = self.indices[:, edges].reshape(-1, 2) # [M * 6, 2] - all_edges_sorted = torch.sort(all_edges, dim=1)[0] - self.all_edges = torch.unique(all_edges_sorted, dim=0) - - self.initialized = False # TODO(ahmadki): not a good approach - - self.glctx = dr.RasterizeCudaContext() - - # TODO(ahmadki): not a good approach - self.nerf = None - self.material = None - self.background = None - - # TODO(ahmkadi): doesn't look good to me !! - @torch.no_grad() - def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): - pass - - @torch.no_grad() - def init_tet(self): - # TODO(ahmadki): a better approach would be to have a global nerf representation (mesh) that - # we can init the tets from. this would work with checkpoints. - - # TODO(ahmadki): a placeholder, but it works for now - self.mean_density = 300 - density_thresh = min(self.mean_density, self.density_thresh) - - if self.nerf.density_activation == DensityActivationEnum.SOFTPLUS: - density_thresh = density_thresh * 25 - - # Get initial sigma - sigma = self.nerf.forward_density(positions=self.verts) - mask = sigma > density_thresh - valid_verts = self.verts[mask] - self.tet_scale = valid_verts.abs().amax(dim=0) + 1e-1 - - # Scale vertices - self.verts = self.verts * self.tet_scale - - # get sigma using the scaled vertices - sigma = self.nerf.forward_density(positions=self.verts) - self.sdf.data += (sigma - density_thresh).clamp(-1, 1) - - def forward( - self, - rays_o, - rays_d, - mvp, - light_d=None, - ambient_ratio=1.0, - shading_type=None, - return_normal_image=False, - return_vertices=False, - return_faces=False, - return_faces_normals=False, - **kwargs - ): - if not self.initialized: - self.init_tet() - self.initialized = True - return self._render( - rays_o=rays_o, - rays_d=rays_d, - mvp=mvp, - light_d=light_d, - ambient_ratio=ambient_ratio, - shading_type=shading_type, - return_normal_image=return_normal_image, - return_vertices=return_vertices, - return_faces=return_faces, - return_faces_normals=return_faces_normals, - **kwargs - ) - - def _render( - self, - rays_o, - rays_d, - mvp, - light_d=None, - ambient_ratio=1.0, - shading_type=None, - return_normal_image=False, - return_vertices=False, - return_faces=False, - return_faces_normals=False, - **kwargs - ): - # mvp: [B, 4, 4] - B, H, W, _ = rays_o.shape - - # TODO(ahmadki): move to dataset - # random sample light_d if not provided - if light_d is None: - # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) - light_d = rays_o + torch.randn(3, device=rays_o.device) - light_d = F.normalize(light_d) - - results = {} - - # get mesh - deform = torch.tanh(self.deform) / self.grid_resolution - - verts, faces = self.dmtet(self.verts + deform, self.sdf, self.indices) - - # get normals - i0, i1, i2 = faces[:, 0], faces[:, 1], faces[:, 2] - v0, v1, v2 = verts[i0, :], verts[i1, :], verts[i2, :] - - faces = faces.int() - - face_normals = torch.cross(v1 - v0, v2 - v0) - face_normals = F.normalize(face_normals) - - vn = torch.zeros_like(verts) - vn.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals) - vn.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals) - vn.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals) - - vn = torch.where( - torch.sum(vn * vn, -1, keepdim=True) > 1e-20, - vn, - torch.tensor([0.0, 0.0, 1.0], dtype=torch.float32, device=vn.device), - ) - - # rasterization - verts_clip = torch.bmm( - F.pad(verts, pad=(0, 1), mode='constant', value=1.0).unsqueeze(0).repeat(mvp.shape[0], 1, 1), - mvp.permute(0, 2, 1), - ).float() # [B, N, 4] - rast, _ = dr.rasterize(self.glctx, verts_clip, faces, (H, W)) - - alpha = (rast[..., 3:] > 0).float() - xyzs, _ = dr.interpolate(verts.unsqueeze(0), rast, faces) # [B, H, W, 3] - normal, _ = dr.interpolate(vn.unsqueeze(0).contiguous(), rast, faces) - normal = F.normalize(normal) - - xyzs = xyzs.view(-1, 3) - mask = (rast[..., 3:] > 0).view(-1).detach() - - # do the lighting here since we have normal from mesh now. - albedo = torch.zeros_like(xyzs, dtype=torch.float32) - if mask.any(): - masked_albedo = self.nerf.forward_features(positions=xyzs[mask]) - albedo[mask] = masked_albedo.float() - albedo = albedo.view(B, H, W, 3) - fg_color = self.material( - albedo=albedo, normals=normal, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type - ) - - fg_color = dr.antialias(fg_color, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 3] - alpha = dr.antialias(alpha, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 1] - - # mix background color - bg_color = self.background(rays_d=rays_d) # [N, 3] - - depth = rast[:, :, :, [2]] # [B, H, W] - color = fg_color + (1 - alpha) * bg_color - - results['depth'] = depth - results['image'] = color - if return_normal_image: - results['normal_image'] = dr.antialias((normal + 1) / 2, rast, verts_clip, faces).clamp( - 0, 1 - ) # [B, H, W, 3] - if return_vertices: - results['vertices'] = verts - if return_faces: - results['faces'] = faces - if return_faces_normals: - results['face_normals'] = face_normals - return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py deleted file mode 100644 index 46096857a773..000000000000 --- a/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py +++ /dev/null @@ -1,275 +0,0 @@ -import math - -import torch -import torch.nn.functional as F - -import nemo.collections.multimodal.modules.nerf.utils.torch_ngp.raymarching as raymarching -from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum -from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer - - -class TorchNGPVolumeRenderer(BaseRenderer): - def __init__(self, bound, update_interval, grid_resolution, density_thresh, max_steps, dt_gamma): - - super().__init__(bound, update_interval) - - self.cascade = 1 + math.ceil(math.log2(bound)) - self.grid_resolution = grid_resolution - self.density_thresh = density_thresh - self.dt_gamma = dt_gamma - self.max_steps = max_steps - - # density grid - # TODO(ahmadki): needs rework - density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] - density_bitfield = torch.zeros( - self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 - ) # [CAS * H * H * H // 8] - self.register_buffer('density_grid', density_grid) - self.register_buffer('density_bitfield', density_bitfield) - self.mean_density = 0 - self.iter_density = 0 - - # TODO(ahmadki): needs rework - self.nerf = None - self.material = None - self.background = None - - @torch.no_grad() - def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): - if global_step % self.update_interval != 0: - return - - ### update density grid - tmp_grid = -torch.ones_like(self.density_grid) - - X = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) - Y = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) - Z = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) - - for xs in X: - for ys in Y: - for zs in Z: - - # construct points - xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing='ij') - coords = torch.cat( - [xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1 - ) # [N, 3], in [0, 128) - indices = raymarching.morton3D(coords).long() # [N] - xyzs = 2 * coords.float() / (self.grid_resolution - 1) - 1 # [N, 3] in [-1, 1] - - # cascading - for cas in range(self.cascade): - bound = min(2 ** cas, self.bound) - half_grid_resolution = bound / self.grid_resolution - # scale to current cascade's resolution - cas_xyzs = xyzs * (bound - half_grid_resolution) - # add noise in [-hgs, hgs] - cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_resolution - # query density - density = self.nerf.forward_density(cas_xyzs).reshape(-1).detach() - # assign - tmp_grid[cas, indices] = density - # ema update - valid_mask = self.density_grid >= 0 - self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) - self.mean_density = torch.mean(self.density_grid[valid_mask]).item() - self.iter_density += 1 - - # convert to bitfield - density_thresh = min(self.mean_density, self.density_thresh) - self.density_bitfield = raymarching.packbits(self.density_grid, density_thresh, self.density_bitfield) - - def forward( - self, - rays_o, - rays_d, - light_d=None, - ambient_ratio=1.0, - shading_type=None, - return_normal_image=False, - return_normal_perturb=False, - **kwargs - ): - return self._render( - rays_o=rays_o, - rays_d=rays_d, - light_d=light_d, - ambient_ratio=ambient_ratio, - shading_type=shading_type, - return_normal_image=return_normal_image, - return_normal_perturb=return_normal_perturb, - **kwargs - ) - - # TODO(ahmadki): return_normal_image is always False ? - def _render( - self, - rays_o, - rays_d, - light_d=None, - ambient_ratio=1.0, - shading_type=None, - return_normal_image=False, - return_normal_perturb=False, - perturb=False, - T_thresh=1e-4, - binarize=False, - **kwargs - ): - # rays_o, rays_d: [B, H, W, 3] - B, H, W, _ = rays_o.shape - - # group all rays into a single batch - rays_o = rays_o.contiguous().view(-1, 3) - rays_d = rays_d.contiguous().view(-1, 3) - num_rays = rays_o.shape[0] # num_rays = B * H * W - - # pre-calculate near far - nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb) - - # random sample light_d if not provided - # TODO(ahmadki): move to dataset - if light_d is None: - # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) - light_d = rays_o + torch.randn(3, device=rays_o.device) - light_d = F.normalize(light_d) - - normal_image = None - normals_perturb = None - weights = None - - if self.training: - positions, dirs, ts, rays = raymarching.march_rays_train( - rays_o, - rays_d, - self.bound, - self.density_bitfield, - self.cascade, - self.grid_resolution, - nears, - fars, - perturb, - self.dt_gamma, - self.max_steps, - ) - dirs = F.normalize(dirs) - - if light_d.shape[0] > 1: - flatten_rays = raymarching.flatten_rays(rays, positions.shape[0]).long() - light_d = light_d[flatten_rays] - - return_normal = (shading_type is not None) or return_normal_image - sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) - - fg_color = self.material( - albedo=albedo, normals=normals, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type - ) - - weights, opacity, depth, image = raymarching.composite_rays_train( - sigmas, fg_color, ts, rays, T_thresh, binarize - ) - - if return_normal_image and normals is not None: - _, _, _, normal_image = raymarching.composite_rays_train( - sigmas.detach(), (normals + 1) / 2, ts, rays, T_thresh, binarize - ) - - if return_normal_perturb: - perturb_positions = positions + torch.randn_like(positions) * 1e-2 - normals_perturb = self.normal(positions=perturb_positions) - - else: - # allocate tensors - image = torch.zeros(num_rays, 3, device=rays_o.device) - depth = torch.zeros(num_rays, device=rays_o.device) - opacity = torch.zeros(num_rays, device=rays_o.device) - - n_alive = num_rays - rays_alive = torch.arange(n_alive, dtype=torch.int32, device=rays_o.device) - rays_t = nears.clone() - - step = 0 - - while step < self.max_steps: # hard coded max step - # count alive rays - n_alive = rays_alive.shape[0] - - # exit loop - if n_alive <= 0: - break - - # decide compact_steps - n_step = max(min(num_rays // n_alive, 8), 1) - - positions, dirs, ts = raymarching.march_rays( - n_alive, - n_step, - rays_alive, - rays_t, - rays_o, - rays_d, - self.bound, - self.density_bitfield, - self.cascade, - self.grid_resolution, - nears, - fars, - perturb if step == 0 else False, - self.dt_gamma, - self.max_steps, - ) - dirs = F.normalize(dirs) - - return_normal = shading_type not in [None, ShadingEnum.TEXTURELESS] - sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) - - fg_color = self.material( - albedo=albedo, - normals=normals, - light_d=light_d, - ambient_ratio=ambient_ratio, - shading_type=shading_type, - ) - raymarching.composite_rays( - n_alive, - n_step, - rays_alive, - rays_t, - sigmas, - fg_color, - ts, - opacity, - depth, - image, - T_thresh, - binarize, - ) - - # TODO(ahmadki): add optoin to return normal_image, like in training - - rays_alive = rays_alive[rays_alive >= 0] - - step += n_step - - # mix background color - bg_color = self.background(rays_d) # [N, 3] - image = image + (1 - opacity).unsqueeze(-1) * bg_color - - results = { - "image": image.view(B, H, W, 3), - "depth": depth.view(B, H, W, 1), - "opacity": opacity.view(B, H, W, 1), - "dirs": dirs, - } - if normals is not None: - results["normals"] = normals - if weights is not None: - results["weights"] = weights - if normal_image is not None: - results["normal_image"] = normal_image.view(B, H, W, 3) - if normals_perturb is not None: - results["normal_perturb"] = normals_perturb - - return results diff --git a/nemo/collections/multimodal/modules/nerf/utils/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/utils/activation.py b/nemo/collections/multimodal/modules/nerf/utils/activation.py deleted file mode 100644 index fa7f3c60829a..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/activation.py +++ /dev/null @@ -1,20 +0,0 @@ -import torch -from torch.autograd import Function -from torch.cuda.amp import custom_bwd, custom_fwd - - -class _trunc_exp(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float) - def forward(ctx, x): - ctx.save_for_backward(x) - return torch.exp(x) - - @staticmethod - @custom_bwd - def backward(ctx, g): - x = ctx.saved_tensors[0] - return g * torch.exp(x.clamp(max=15)) - - -trunc_exp = _trunc_exp.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py deleted file mode 100644 index 3d2c1e8c74e6..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py +++ /dev/null @@ -1,137 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class FreqEncoder_torch(nn.Module): - def __init__( - self, - input_dim, - max_freq_log2, - N_freqs, - log_sampling=True, - include_input=True, - periodic_fns=(torch.sin, torch.cos), - ): - - super().__init__() - - self.input_dim = input_dim - self.include_input = include_input - self.periodic_fns = periodic_fns - self.N_freqs = N_freqs - - self.output_dim = 0 - if self.include_input: - self.output_dim += self.input_dim - - self.output_dim += self.input_dim * N_freqs * len(self.periodic_fns) - - if log_sampling: - self.freq_bands = 2 ** torch.linspace(0, max_freq_log2, N_freqs) - else: - self.freq_bands = torch.linspace(2 ** 0, 2 ** max_freq_log2, N_freqs) - - self.freq_bands = self.freq_bands.numpy().tolist() - - def forward(self, input, max_level=None, **kwargs): - - if max_level is None: - max_level = self.N_freqs - else: - max_level = int(max_level * self.N_freqs) - - out = [] - if self.include_input: - out.append(input) - - for i in range(max_level): - freq = self.freq_bands[i] - for p_fn in self.periodic_fns: - out.append(p_fn(input * freq)) - - # append 0 - if self.N_freqs - max_level > 0: - out.append( - torch.zeros( - input.shape[0], - (self.N_freqs - max_level) * 2 * input.shape[1], - device=input.device, - dtype=input.dtype, - ) - ) - - out = torch.cat(out, dim=-1) - - return out - - -def get_encoder( - encoder_type, - input_dim=3, - multires=6, - degree=4, - num_levels=16, - level_dim=2, - base_resolution=16, - log2_hashmap_size=19, - desired_resolution=2048, - align_corners=False, - interpolation='linear', - **kwargs -): - - if encoder_type is None: - return lambda x, **kwargs: x, input_dim - - elif encoder_type == 'frequency_torch': - encoder = FreqEncoder_torch( - input_dim=input_dim, max_freq_log2=multires - 1, N_freqs=multires, log_sampling=True - ) - - elif encoder_type == 'frequency': # CUDA implementation, faster than torch. - from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.freqencoder import FreqEncoder - - encoder = FreqEncoder(input_dim=input_dim, degree=multires) - - elif encoder_type == 'sphere_harmonics': - from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.shencoder import SHEncoder - - encoder = SHEncoder(input_dim=input_dim, degree=degree) - - elif encoder_type == 'hashgrid': - from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder - - encoder = GridEncoder( - input_dim=input_dim, - num_levels=num_levels, - level_dim=level_dim, - base_resolution=base_resolution, - log2_hashmap_size=log2_hashmap_size, - desired_resolution=desired_resolution, - gridtype='hash', - align_corners=align_corners, - interpolation=interpolation, - ) - - elif encoder_type == 'tiledgrid': - from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder - - encoder = GridEncoder( - input_dim=input_dim, - num_levels=num_levels, - level_dim=level_dim, - base_resolution=base_resolution, - log2_hashmap_size=log2_hashmap_size, - desired_resolution=desired_resolution, - gridtype='tiled', - align_corners=align_corners, - interpolation=interpolation, - ) - - else: - raise NotImplementedError( - 'Unknown encoder type, choose from [None, frequency, sphere_harmonics, hashgrid, tiledgrid]' - ) - - return encoder, encoder.output_dim diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py deleted file mode 100644 index 1c217f9c8b7d..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py +++ /dev/null @@ -1,73 +0,0 @@ -import _freqencoder as _backend -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.cuda.amp import custom_bwd, custom_fwd - - -class _freq_encoder(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision - def forward(ctx, inputs, degree, output_dim): - # inputs: [B, input_dim], float - # RETURN: [B, F], float - - if not inputs.is_cuda: - inputs = inputs.cuda() - inputs = inputs.contiguous() - - B, input_dim = inputs.shape # batch size, coord dim - - outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) - - _backend.freq_encode_forward(inputs, B, input_dim, degree, output_dim, outputs) - - ctx.save_for_backward(inputs, outputs) - ctx.dims = [B, input_dim, degree, output_dim] - - return outputs - - @staticmethod - # @once_differentiable - @custom_bwd - def backward(ctx, grad): - # grad: [B, C * C] - - grad = grad.contiguous() - inputs, outputs = ctx.saved_tensors - B, input_dim, degree, output_dim = ctx.dims - - grad_inputs = torch.zeros_like(inputs) - _backend.freq_encode_backward(grad, outputs, B, input_dim, degree, output_dim, grad_inputs) - - return grad_inputs, None, None - - -freq_encode = _freq_encoder.apply - - -class FreqEncoder(nn.Module): - def __init__(self, input_dim=3, degree=4): - super().__init__() - - self.input_dim = input_dim - self.degree = degree - self.output_dim = input_dim + input_dim * 2 * degree - - def __repr__(self): - return f"FreqEncoder: input_dim={self.input_dim} degree={self.degree} output_dim={self.output_dim}" - - def forward(self, inputs, **kwargs): - # inputs: [..., input_dim] - # return: [..., ] - - prefix_shape = list(inputs.shape[:-1]) - inputs = inputs.reshape(-1, self.input_dim) - - outputs = freq_encode(inputs, self.degree, self.output_dim) - - outputs = outputs.reshape(prefix_shape + [self.output_dim]) - - return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py deleted file mode 100644 index 1a7487aee6f3..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py +++ /dev/null @@ -1,287 +0,0 @@ -import math - -import _gridencoder as _backend -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.cuda.amp import custom_bwd, custom_fwd - -_gridtype_to_id = { - 'hash': 0, - 'tiled': 1, -} - -_interp_to_id = { - 'linear': 0, - 'smoothstep': 1, -} - - -class _grid_encode(Function): - @staticmethod - @custom_fwd - def forward( - ctx, - inputs, - embeddings, - offsets, - per_level_scale, - base_resolution, - calc_grad_inputs=False, - gridtype=0, - align_corners=False, - interpolation=0, - max_level=None, - ): - # inputs: [B, D], float in [0, 1] - # embeddings: [sO, C], float - # offsets: [L + 1], int - # RETURN: [B, F], float - - inputs = inputs.contiguous() - - B, D = inputs.shape # batch size, coord dim - L = offsets.shape[0] - 1 # level - C = embeddings.shape[1] # embedding dim for each level - S = np.log2(per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f - H = base_resolution # base resolution - - max_level = L if max_level is None else max(min(int(math.ceil(max_level * L)), L), 1) - - # manually handle autocast (only use half precision embeddings, inputs must be float for enough precision) - # if C % 2 != 0, force float, since half for atomicAdd is very slow. - if torch.is_autocast_enabled() and C % 2 == 0: - embeddings = embeddings.to(torch.half) - - # L first, optimize cache for cuda kernel, but needs an extra permute later - outputs = torch.empty(L, B, C, device=inputs.device, dtype=embeddings.dtype) - - # zero init if we only calculate partial levels - if max_level < L: - outputs.zero_() - - if calc_grad_inputs: - dy_dx = torch.empty(B, L * D * C, device=inputs.device, dtype=embeddings.dtype) - if max_level < L: - dy_dx.zero_() - else: - dy_dx = None - - _backend.grid_encode_forward( - inputs, - embeddings, - offsets, - outputs, - B, - D, - C, - L, - max_level, - S, - H, - dy_dx, - gridtype, - align_corners, - interpolation, - ) - - # permute back to [B, L * C] - outputs = outputs.permute(1, 0, 2).reshape(B, L * C) - - ctx.save_for_backward(inputs, embeddings, offsets, dy_dx) - ctx.dims = [B, D, C, L, S, H, gridtype, interpolation, max_level] - ctx.align_corners = align_corners - - return outputs - - @staticmethod - # @once_differentiable - @custom_bwd - def backward(ctx, grad): - - inputs, embeddings, offsets, dy_dx = ctx.saved_tensors - B, D, C, L, S, H, gridtype, interpolation, max_level = ctx.dims - align_corners = ctx.align_corners - - # grad: [B, L * C] --> [L, B, C] - grad = grad.view(B, L, C).permute(1, 0, 2).contiguous() - - grad_embeddings = torch.zeros_like(embeddings) - - if dy_dx is not None: - grad_inputs = torch.zeros_like(inputs, dtype=embeddings.dtype) - else: - grad_inputs = None - - _backend.grid_encode_backward( - grad, - inputs, - embeddings, - offsets, - grad_embeddings, - B, - D, - C, - L, - max_level, - S, - H, - dy_dx, - grad_inputs, - gridtype, - align_corners, - interpolation, - ) - - if dy_dx is not None: - grad_inputs = grad_inputs.to(inputs.dtype) - - return grad_inputs, grad_embeddings, None, None, None, None, None, None, None, None - - -grid_encode = _grid_encode.apply - - -class GridEncoder(nn.Module): - def __init__( - self, - input_dim=3, - num_levels=16, - level_dim=2, - per_level_scale=2, - base_resolution=16, - log2_hashmap_size=19, - desired_resolution=None, - gridtype='hash', - align_corners=False, - interpolation='linear', - ): - super().__init__() - - # the finest resolution desired at the last level, if provided, overridee per_level_scale - if desired_resolution is not None: - per_level_scale = np.exp2(np.log2(desired_resolution / base_resolution) / (num_levels - 1)) - - self.input_dim = input_dim # coord dims, 2 or 3 - self.num_levels = num_levels # num levels, each level multiply resolution by 2 - self.level_dim = level_dim # encode channels per level - self.per_level_scale = per_level_scale # multiply resolution by this scale at each level. - self.log2_hashmap_size = log2_hashmap_size - self.base_resolution = base_resolution - self.output_dim = num_levels * level_dim - self.gridtype = gridtype - self.gridtype_id = _gridtype_to_id[gridtype] # "tiled" or "hash" - self.interpolation = interpolation - self.interp_id = _interp_to_id[interpolation] # "linear" or "smoothstep" - self.align_corners = align_corners - - # allocate parameters - offsets = [] - offset = 0 - self.max_params = 2 ** log2_hashmap_size - for i in range(num_levels): - resolution = int(np.ceil(base_resolution * per_level_scale ** i)) - params_in_level = min(self.max_params, (resolution) ** input_dim) # limit max number - params_in_level = int(np.ceil(params_in_level / 8) * 8) # make divisible - offsets.append(offset) - offset += params_in_level - offsets.append(offset) - offsets = torch.from_numpy(np.array(offsets, dtype=np.int32)) - self.register_buffer('offsets', offsets) - - self.n_params = offsets[-1] * level_dim - - # parameters - self.embeddings = nn.Parameter(torch.empty(offset, level_dim)) - - self.reset_parameters() - - def reset_parameters(self): - std = 1e-4 - self.embeddings.data.uniform_(-std, std) - - def __repr__(self): - return f"GridEncoder: input_dim={self.input_dim} num_levels={self.num_levels} level_dim={self.level_dim} resolution={self.base_resolution} -> {int(round(self.base_resolution * self.per_level_scale ** (self.num_levels - 1)))} per_level_scale={self.per_level_scale:.4f} params={tuple(self.embeddings.shape)} gridtype={self.gridtype} align_corners={self.align_corners} interpolation={self.interpolation}" - - def forward(self, inputs, bound=1, max_level=None): - # inputs: [..., input_dim], normalized real world positions in [-bound, bound] - # max_level: only calculate first max_level levels (None will use all levels) - # return: [..., num_levels * level_dim] - - inputs = (inputs + bound) / (2 * bound) # map to [0, 1] - - # print('inputs', inputs.shape, inputs.dtype, inputs.min().item(), inputs.max().item()) - - prefix_shape = list(inputs.shape[:-1]) - inputs = inputs.view(-1, self.input_dim) - - outputs = grid_encode( - inputs, - self.embeddings, - self.offsets, - self.per_level_scale, - self.base_resolution, - inputs.requires_grad, - self.gridtype_id, - self.align_corners, - self.interp_id, - max_level, - ) - outputs = outputs.view(prefix_shape + [self.output_dim]) - - # print('outputs', outputs.shape, outputs.dtype, outputs.min().item(), outputs.max().item()) - - return outputs - - # always run in float precision! - @torch.cuda.amp.autocast(enabled=False) - def grad_total_variation(self, weight=1e-7, inputs=None, bound=1, B=1000000): - # inputs: [..., input_dim], float in [-b, b], location to calculate TV loss. - - D = self.input_dim - C = self.embeddings.shape[1] # embedding dim for each level - L = self.offsets.shape[0] - 1 # level - S = np.log2(self.per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f - H = self.base_resolution # base resolution - - if inputs is None: - # randomized in [0, 1] - inputs = torch.rand(B, self.input_dim, device=self.embeddings.device) - else: - inputs = (inputs + bound) / (2 * bound) # map to [0, 1] - inputs = inputs.view(-1, self.input_dim) - B = inputs.shape[0] - - if self.embeddings.grad is None: - raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') - - _backend.grad_total_variation( - inputs, - self.embeddings, - self.embeddings.grad, - self.offsets, - weight, - B, - D, - C, - L, - S, - H, - self.gridtype_id, - self.align_corners, - ) - - @torch.cuda.amp.autocast(enabled=False) - def grad_weight_decay(self, weight=0.1): - # level-wise meaned weight decay (ref: zip-nerf) - - B = self.embeddings.shape[0] # size of embedding - C = self.embeddings.shape[1] # embedding dim for each level - L = self.offsets.shape[0] - 1 # level - - if self.embeddings.grad is None: - raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') - - _backend.grad_weight_decay(self.embeddings, self.embeddings.grad, self.offsets, weight, B, C, L) diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py deleted file mode 100644 index c0f15641fa5a..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py +++ /dev/null @@ -1,551 +0,0 @@ -import time - -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.cuda.amp import custom_bwd, custom_fwd - -# lazy building: -# `import raymarching` will not immediately build the extension, only if you actually call any functions. - -BACKEND = None - - -def get_backend(): - global BACKEND - - if BACKEND is None: - try: - import _raymarching as _backend - except ImportError: - from .backend import _backend - - BACKEND = _backend - - return BACKEND - - -# ---------------------------------------- -# utils -# ---------------------------------------- - - -class _near_far_from_aabb(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward(ctx, rays_o, rays_d, aabb, min_near=0.2): - ''' near_far_from_aabb, CUDA implementation - Calculate rays' intersection time (near and far) with aabb - Args: - rays_o: float, [N, 3] - rays_d: float, [N, 3] - aabb: float, [6], (xmin, ymin, zmin, xmax, ymax, zmax) - min_near: float, scalar - Returns: - nears: float, [N] - fars: float, [N] - ''' - if not rays_o.is_cuda: - rays_o = rays_o.cuda() - if not rays_d.is_cuda: - rays_d = rays_d.cuda() - - rays_o = rays_o.contiguous().view(-1, 3) - rays_d = rays_d.contiguous().view(-1, 3) - - N = rays_o.shape[0] # num rays - - nears = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) - fars = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) - - get_backend().near_far_from_aabb(rays_o, rays_d, aabb, N, min_near, nears, fars) - - return nears, fars - - -near_far_from_aabb = _near_far_from_aabb.apply - - -class _sph_from_ray(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward(ctx, rays_o, rays_d, radius): - ''' sph_from_ray, CUDA implementation - get spherical coordinate on the background sphere from rays. - Assume rays_o are inside the Sphere(radius). - Args: - rays_o: [N, 3] - rays_d: [N, 3] - radius: scalar, float - Return: - coords: [N, 2], in [-1, 1], theta and phi on a sphere. (further-surface) - ''' - if not rays_o.is_cuda: - rays_o = rays_o.cuda() - if not rays_d.is_cuda: - rays_d = rays_d.cuda() - - rays_o = rays_o.contiguous().view(-1, 3) - rays_d = rays_d.contiguous().view(-1, 3) - - N = rays_o.shape[0] # num rays - - coords = torch.empty(N, 2, dtype=rays_o.dtype, device=rays_o.device) - - get_backend().sph_from_ray(rays_o, rays_d, radius, N, coords) - - return coords - - -sph_from_ray = _sph_from_ray.apply - - -class _morton3D(Function): - @staticmethod - def forward(ctx, coords): - ''' morton3D, CUDA implementation - Args: - coords: [N, 3], int32, in [0, 128) (for some reason there is no uint32 tensor in torch...) - TODO: check if the coord range is valid! (current 128 is safe) - Returns: - indices: [N], int32, in [0, 128^3) - - ''' - if not coords.is_cuda: - coords = coords.cuda() - - N = coords.shape[0] - - indices = torch.empty(N, dtype=torch.int32, device=coords.device) - - get_backend().morton3D(coords.int(), N, indices) - - return indices - - -morton3D = _morton3D.apply - - -class _morton3D_invert(Function): - @staticmethod - def forward(ctx, indices): - ''' morton3D_invert, CUDA implementation - Args: - indices: [N], int32, in [0, 128^3) - Returns: - coords: [N, 3], int32, in [0, 128) - - ''' - if not indices.is_cuda: - indices = indices.cuda() - - N = indices.shape[0] - - coords = torch.empty(N, 3, dtype=torch.int32, device=indices.device) - - get_backend().morton3D_invert(indices.int(), N, coords) - - return coords - - -morton3D_invert = _morton3D_invert.apply - - -class _packbits(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward(ctx, grid, thresh, bitfield=None): - ''' packbits, CUDA implementation - Pack up the density grid into a bit field to accelerate ray marching. - Args: - grid: float, [C, H * H * H], assume H % 2 == 0 - thresh: float, threshold - Returns: - bitfield: uint8, [C, H * H * H / 8] - ''' - if not grid.is_cuda: - grid = grid.cuda() - grid = grid.contiguous() - - C = grid.shape[0] - H3 = grid.shape[1] - N = C * H3 // 8 - - if bitfield is None: - bitfield = torch.empty(N, dtype=torch.uint8, device=grid.device) - - get_backend().packbits(grid, N, thresh, bitfield) - - return bitfield - - -packbits = _packbits.apply - - -class _flatten_rays(Function): - @staticmethod - def forward(ctx, rays, M): - ''' flatten rays - Args: - rays: [N, 2], all rays' (point_offset, point_count), - M: scalar, int, count of points (we cannot get this info from rays unfortunately...) - Returns: - res: [M], flattened ray index. - ''' - if not rays.is_cuda: - rays = rays.cuda() - rays = rays.contiguous() - - N = rays.shape[0] - - res = torch.zeros(M, dtype=torch.int, device=rays.device) - - get_backend().flatten_rays(rays, N, M, res) - - return res - - -flatten_rays = _flatten_rays.apply - -# ---------------------------------------- -# train functions -# ---------------------------------------- - - -class _march_rays_train(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward( - ctx, - rays_o, - rays_d, - bound, - density_bitfield, - C, - H, - nears, - fars, - perturb=False, - dt_gamma=0, - max_steps=1024, - contract=False, - ): - ''' march rays to generate points (forward only) - Args: - rays_o/d: float, [N, 3] - bound: float, scalar - density_bitfield: uint8: [CHHH // 8] - C: int - H: int - nears/fars: float, [N] - step_counter: int32, (2), used to count the actual number of generated points. - mean_count: int32, estimated mean steps to accelerate training. (but will randomly drop rays if the actual point count exceeded this threshold.) - perturb: bool - align: int, pad output so its size is dividable by align, set to -1 to disable. - force_all_rays: bool, ignore step_counter and mean_count, always calculate all rays. Useful if rendering the whole image, instead of some rays. - dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) - max_steps: int, max number of sampled points along each ray, also affect min_stepsize. - Returns: - xyzs: float, [M, 3], all generated points' coords. (all rays concated, need to use `rays` to extract points belonging to each ray) - dirs: float, [M, 3], all generated points' view dirs. - ts: float, [M, 2], all generated points' ts. - rays: int32, [N, 2], all rays' (point_offset, point_count), e.g., xyzs[rays[i, 0]:(rays[i, 0] + rays[i, 1])] --> points belonging to rays[i, 0] - ''' - - if not rays_o.is_cuda: - rays_o = rays_o.cuda() - if not rays_d.is_cuda: - rays_d = rays_d.cuda() - if not density_bitfield.is_cuda: - density_bitfield = density_bitfield.cuda() - - rays_o = rays_o.float().contiguous().view(-1, 3) - rays_d = rays_d.float().contiguous().view(-1, 3) - density_bitfield = density_bitfield.contiguous() - - N = rays_o.shape[0] # num rays - - step_counter = torch.zeros(1, dtype=torch.int32, device=rays_o.device) # point counter, ray counter - - if perturb: - noises = torch.rand(N, dtype=rays_o.dtype, device=rays_o.device) - else: - noises = torch.zeros(N, dtype=rays_o.dtype, device=rays_o.device) - - # first pass: write rays, get total number of points M to render - rays = torch.empty(N, 2, dtype=torch.int32, device=rays_o.device) # id, offset, num_steps - get_backend().march_rays_train( - rays_o, - rays_d, - density_bitfield, - bound, - contract, - dt_gamma, - max_steps, - N, - C, - H, - nears, - fars, - None, - None, - None, - rays, - step_counter, - noises, - ) - - # allocate based on M - M = step_counter.item() - # print(M, N) - # print(rays[:, 0].max()) - - xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) - dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) - ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) - - # second pass: write outputs - get_backend().march_rays_train( - rays_o, - rays_d, - density_bitfield, - bound, - contract, - dt_gamma, - max_steps, - N, - C, - H, - nears, - fars, - xyzs, - dirs, - ts, - rays, - step_counter, - noises, - ) - - return xyzs, dirs, ts, rays - - -march_rays_train = _march_rays_train.apply - - -class _composite_rays_train(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward(ctx, sigmas, rgbs, ts, rays, T_thresh=1e-4, binarize=False): - ''' composite rays' rgbs, according to the ray marching formula. - Args: - rgbs: float, [M, 3] - sigmas: float, [M,] - ts: float, [M, 2] - rays: int32, [N, 3] - Returns: - weights: float, [M] - weights_sum: float, [N,], the alpha channel - depth: float, [N, ], the Depth - image: float, [N, 3], the RGB channel (after multiplying alpha!) - ''' - - sigmas = sigmas.float().contiguous() - rgbs = rgbs.float().contiguous() - - M = sigmas.shape[0] - N = rays.shape[0] - - weights = torch.zeros(M, dtype=sigmas.dtype, device=sigmas.device) # may leave unmodified, so init with 0 - weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) - - depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) - image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) - - get_backend().composite_rays_train_forward( - sigmas, rgbs, ts, rays, M, N, T_thresh, binarize, weights, weights_sum, depth, image - ) - - ctx.save_for_backward(sigmas, rgbs, ts, rays, weights_sum, depth, image) - ctx.dims = [M, N, T_thresh, binarize] - - return weights, weights_sum, depth, image - - @staticmethod - @custom_bwd - def backward(ctx, grad_weights, grad_weights_sum, grad_depth, grad_image): - - grad_weights = grad_weights.contiguous() - grad_weights_sum = grad_weights_sum.contiguous() - grad_depth = grad_depth.contiguous() - grad_image = grad_image.contiguous() - - sigmas, rgbs, ts, rays, weights_sum, depth, image = ctx.saved_tensors - M, N, T_thresh, binarize = ctx.dims - - grad_sigmas = torch.zeros_like(sigmas) - grad_rgbs = torch.zeros_like(rgbs) - - get_backend().composite_rays_train_backward( - grad_weights, - grad_weights_sum, - grad_depth, - grad_image, - sigmas, - rgbs, - ts, - rays, - weights_sum, - depth, - image, - M, - N, - T_thresh, - binarize, - grad_sigmas, - grad_rgbs, - ) - - return grad_sigmas, grad_rgbs, None, None, None, None - - -composite_rays_train = _composite_rays_train.apply - -# ---------------------------------------- -# infer functions -# ---------------------------------------- - - -class _march_rays(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) - def forward( - ctx, - n_alive, - n_step, - rays_alive, - rays_t, - rays_o, - rays_d, - bound, - density_bitfield, - C, - H, - near, - far, - perturb=False, - dt_gamma=0, - max_steps=1024, - contract=False, - ): - ''' march rays to generate points (forward only, for inference) - Args: - n_alive: int, number of alive rays - n_step: int, how many steps we march - rays_alive: int, [N], the alive rays' IDs in N (N >= n_alive, but we only use first n_alive) - rays_t: float, [N], the alive rays' time, we only use the first n_alive. - rays_o/d: float, [N, 3] - bound: float, scalar - density_bitfield: uint8: [CHHH // 8] - C: int - H: int - nears/fars: float, [N] - align: int, pad output so its size is dividable by align, set to -1 to disable. - perturb: bool/int, int > 0 is used as the random seed. - dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) - max_steps: int, max number of sampled points along each ray, also affect min_stepsize. - Returns: - xyzs: float, [n_alive * n_step, 3], all generated points' coords - dirs: float, [n_alive * n_step, 3], all generated points' view dirs. - ts: float, [n_alive * n_step, 2], all generated points' ts - ''' - - if not rays_o.is_cuda: - rays_o = rays_o.cuda() - if not rays_d.is_cuda: - rays_d = rays_d.cuda() - - rays_o = rays_o.float().contiguous().view(-1, 3) - rays_d = rays_d.float().contiguous().view(-1, 3) - - M = n_alive * n_step - - xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) - dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) - ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) # 2 vals, one for rgb, one for depth - - if perturb: - # torch.manual_seed(perturb) # test_gui uses spp index as seed - noises = torch.rand(n_alive, dtype=rays_o.dtype, device=rays_o.device) - else: - noises = torch.zeros(n_alive, dtype=rays_o.dtype, device=rays_o.device) - - get_backend().march_rays( - n_alive, - n_step, - rays_alive, - rays_t, - rays_o, - rays_d, - bound, - contract, - dt_gamma, - max_steps, - C, - H, - density_bitfield, - near, - far, - xyzs, - dirs, - ts, - noises, - ) - - return xyzs, dirs, ts - - -march_rays = _march_rays.apply - - -class _composite_rays(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float - def forward( - ctx, - n_alive, - n_step, - rays_alive, - rays_t, - sigmas, - rgbs, - ts, - weights_sum, - depth, - image, - T_thresh=1e-2, - binarize=False, - ): - ''' composite rays' rgbs, according to the ray marching formula. (for inference) - Args: - n_alive: int, number of alive rays - n_step: int, how many steps we march - rays_alive: int, [n_alive], the alive rays' IDs in N (N >= n_alive) - rays_t: float, [N], the alive rays' time - sigmas: float, [n_alive * n_step,] - rgbs: float, [n_alive * n_step, 3] - ts: float, [n_alive * n_step, 2] - In-place Outputs: - weights_sum: float, [N,], the alpha channel - depth: float, [N,], the depth value - image: float, [N, 3], the RGB channel (after multiplying alpha!) - ''' - sigmas = sigmas.float().contiguous() - rgbs = rgbs.float().contiguous() - get_backend().composite_rays( - n_alive, n_step, T_thresh, binarize, rays_alive, rays_t, sigmas, rgbs, ts, weights_sum, depth, image - ) - return tuple() - - -composite_rays = _composite_rays.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py deleted file mode 100644 index a97332089e52..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py +++ /dev/null @@ -1,82 +0,0 @@ -import _shencoder as _backend -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.cuda.amp import custom_bwd, custom_fwd - - -class _sh_encoder(Function): - @staticmethod - @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision - def forward(ctx, inputs, degree, calc_grad_inputs=False): - # inputs: [B, input_dim], float in [-1, 1] - # RETURN: [B, F], float - - inputs = inputs.contiguous() - B, input_dim = inputs.shape # batch size, coord dim - output_dim = degree ** 2 - - outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) - - if calc_grad_inputs: - dy_dx = torch.empty(B, input_dim * output_dim, dtype=inputs.dtype, device=inputs.device) - else: - dy_dx = None - - _backend.sh_encode_forward(inputs, outputs, B, input_dim, degree, dy_dx) - - ctx.save_for_backward(inputs, dy_dx) - ctx.dims = [B, input_dim, degree] - - return outputs - - @staticmethod - # @once_differentiable - @custom_bwd - def backward(ctx, grad): - # grad: [B, C * C] - - inputs, dy_dx = ctx.saved_tensors - - if dy_dx is not None: - grad = grad.contiguous() - B, input_dim, degree = ctx.dims - grad_inputs = torch.zeros_like(inputs) - _backend.sh_encode_backward(grad, inputs, B, input_dim, degree, dy_dx, grad_inputs) - return grad_inputs, None, None - else: - return None, None, None - - -sh_encode = _sh_encoder.apply - - -class SHEncoder(nn.Module): - def __init__(self, input_dim=3, degree=4): - super().__init__() - - self.input_dim = input_dim # coord dims, must be 3 - self.degree = degree # 0 ~ 4 - self.output_dim = degree ** 2 - - assert self.input_dim == 3, "SH encoder only support input dim == 3" - assert self.degree > 0 and self.degree <= 8, "SH encoder only supports degree in [1, 8]" - - def __repr__(self): - return f"SHEncoder: input_dim={self.input_dim} degree={self.degree}" - - def forward(self, inputs, size=1): - # inputs: [..., input_dim], normalized real world positions in [-size, size] - # return: [..., degree^2] - - inputs = inputs / size # [-1, 1] - - prefix_shape = list(inputs.shape[:-1]) - inputs = inputs.reshape(-1, self.input_dim) - - outputs = sh_encode(inputs, self.degree, inputs.requires_grad) - outputs = outputs.reshape(prefix_shape + [self.output_dim]) - - return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py b/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py deleted file mode 100644 index ebf0a43da596..000000000000 --- a/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os -from collections import OrderedDict -from copy import copy - -import numpy as np -import tensorrt as trt -import torch -from einops import repeat -from polygraphy import cuda -from polygraphy.backend.common import bytes_from_path -from polygraphy.backend.trt import engine_from_bytes -from polygraphy.backend.trt import util as trt_util - -TRT_LOGGER = trt.Logger(trt.Logger.ERROR) - -# Map of numpy dtype -> torch dtype -numpy_to_torch_dtype_dict = { - np.uint8: torch.uint8, - np.int8: torch.int8, - np.int16: torch.int16, - np.int32: torch.int32, - np.int64: torch.int64, - np.float16: torch.float16, - np.float32: torch.float32, - np.float64: torch.float64, - np.complex64: torch.complex64, - np.complex128: torch.complex128, -} -if np.version.full_version >= "1.24.0": - numpy_to_torch_dtype_dict[np.bool_] = torch.bool -else: - numpy_to_torch_dtype_dict[np.bool] = torch.bool - -# Map of torch dtype -> numpy dtype -torch_to_numpy_dtype_dict = {value: key for (key, value) in numpy_to_torch_dtype_dict.items()} - - -def device_view(t): - return cuda.DeviceView(ptr=t.data_ptr(), shape=t.shape, dtype=torch_to_numpy_dtype_dict[t.dtype]) - - -class Engine: - def __init__( - self, engine_path, - ): - self.engine_path = engine_path - self.engine = None - self.context = None - self.buffers = OrderedDict() - self.tensors = OrderedDict() - - def __del__(self): - [buf.free() for buf in self.buffers.values() if isinstance(buf, cuda.DeviceArray)] - del self.engine - del self.context - del self.buffers - del self.tensors - - def set_engine(self, stream, shape_dict): - self.load() - self.activate() - self.stream = stream - self.allocate_buffers(shape_dict, device='cuda') - - def load(self): - print(f"Loading TensorRT engine: {self.engine_path}") - self.engine = engine_from_bytes(bytes_from_path(self.engine_path)) - - def activate(self): - self.context = self.engine.create_execution_context() - - def allocate_buffers(self, shape_dict=None, device="cuda"): - for idx in range(trt_util.get_bindings_per_profile(self.engine)): - binding = self.engine[idx] - if shape_dict and binding in shape_dict: - shape = shape_dict[binding] - else: - shape = self.engine.get_binding_shape(binding) - dtype = trt.nptype(self.engine.get_binding_dtype(binding)) - if self.engine.binding_is_input(binding): - self.context.set_binding_shape(idx, shape) - tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(device=device) - self.tensors[binding] = tensor - self.buffers[binding] = cuda.DeviceView(ptr=tensor.data_ptr(), shape=shape, dtype=dtype) - - def infer(self, feed_dict): - stream = self.stream - start_binding, end_binding = trt_util.get_active_profile_bindings(self.context) - # shallow copy of ordered dict - device_buffers = copy(self.buffers) - for name, buf in feed_dict.items(): - assert isinstance(buf, cuda.DeviceView) - device_buffers[name] = buf - bindings = [0] * start_binding + [buf.ptr for buf in device_buffers.values()] - noerror = self.context.execute_async_v2(bindings=bindings, stream_handle=stream.ptr) - if not noerror: - raise ValueError(f"ERROR: inference failed.") - - return self.tensors - - -def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): - if schedule == "linear": - betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 - - elif schedule == "cosine": - timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s - alphas = timesteps / (1 + cosine_s) * np.pi / 2 - alphas = torch.cos(alphas).pow(2) - alphas = alphas / alphas[0] - betas = 1 - alphas[1:] / alphas[:-1] - betas = np.clip(betas, a_min=0, a_max=0.999) - - elif schedule == "sqrt_linear": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) - elif schedule == "sqrt": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 - else: - raise ValueError(f"schedule '{schedule}' unknown.") - return betas.numpy() - - -def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): - if ddim_discr_method == 'uniform': - c = num_ddpm_timesteps // num_ddim_timesteps - ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) - elif ddim_discr_method == 'quad': - ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) - else: - raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') - - # assert ddim_timesteps.shape[0] == num_ddim_timesteps - # add one to get the final alpha values right (the ones from first scale to data during sampling) - steps_out = ddim_timesteps + 1 - if verbose: - print(f'Selected timesteps for ddim sampler: {steps_out}') - return steps_out - - -def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): - # select alphas for computing the variance schedule - alphas = alphacums[ddim_timesteps] - alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) - - # according the the formula provided in https://arxiv.org/abs/2010.02502 - sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) - if verbose: - print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') - print( - f'For the chosen value of eta, which is {eta}, ' - f'this results in the following sigma_t schedule for ddim sampler {sigmas}' - ) - return sigmas, alphas, alphas_prev - - -def noise_like(shape, device, repeat=False): - repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) - noise = lambda: torch.randn(shape, device=device) - return repeat_noise() if repeat else noise() diff --git a/nemo/collections/multimodal/modules/stable_diffusion/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py deleted file mode 100644 index 07f37ece3d6f..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ /dev/null @@ -1,408 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -from inspect import isfunction - -import torch -import torch.nn.functional as F -from einops import rearrange, repeat -from group_norm import GroupNormOpt -from torch import einsum, nn -from torch._dynamo import disable - -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import checkpoint - - -def check_cuda(): - if not torch.cuda.is_available(): - raise RuntimeError('CUDA is not available') - cur_device = torch.cuda.current_device() - dprops = torch.cuda.get_device_properties(cur_device) - - is_sm75 = dprops.major == 7 and dprops.minor == 5 - is_sm8x = dprops.major == 8 and dprops.minor >= 0 - is_sm90 = dprops.major == 9 and dprops.minor >= 0 - - return is_sm8x or is_sm75 or is_sm90 - - -try: - import torch.nn as nn - from flash_attn.modules.mha import FlashCrossAttention, FlashSelfAttention - - flash_attn_installed = check_cuda() - print("FlashAttention Installed") - - # Disable TorchDynamo on FlashAttention - FlashSelfAttention.forward = disable(FlashSelfAttention.forward) - FlashCrossAttention.forward = disable(FlashCrossAttention.forward) -except ImportError: - flash_attn_installed = False - - -def exists(val): - return val is not None - - -def uniq(arr): - return {el: True for el in arr}.keys() - - -def default(val, d): - if exists(val): - return val - if isinstance(d, (torch.Tensor, float, int)): - return d - return d() if isfunction(d) else d - - -def max_neg_value(t): - return -torch.finfo(t.dtype).max - - -def init_(tensor): - dim = tensor.shape[-1] - std = 1 / math.sqrt(dim) - tensor.uniform_(-std, std) - return tensor - - -# feedforward -class GEGLU(nn.Module): - def __init__(self, dim_in, dim_out): - super().__init__() - self.proj = nn.Linear(dim_in, dim_out * 2) - - def forward(self, x): - x, gate = self.proj(x).chunk(2, dim=-1) - return x * F.gelu(gate) - - -class FeedForward(nn.Module): - def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): - super().__init__() - inner_dim = int(dim * mult) - dim_out = default(dim_out, dim) - project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) - - self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) - - def forward(self, x): - return self.net(x) - - -def zero_module(module): - """ - Zero out the parameters of a module and return it. - """ - for p in module.parameters(): - p.detach().zero_() - return module - - -def Normalize(in_channels): - return GroupNormOpt(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) - - -class LinearAttention(nn.Module): - def __init__(self, dim, heads=4, dim_head=32): - super().__init__() - self.heads = heads - hidden_dim = dim_head * heads - self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) - self.to_out = nn.Conv2d(hidden_dim, dim, 1) - - def forward(self, x): - b, c, h, w = x.shape - qkv = self.to_qkv(x) - q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3) - k = k.softmax(dim=-1) - context = torch.einsum('bhdn,bhen->bhde', k, v) - out = torch.einsum('bhde,bhdn->bhen', context, q) - out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w) - return self.to_out(out) - - -class SpatialSelfAttention(nn.Module): - def __init__(self, in_channels): - super().__init__() - self.in_channels = in_channels - - self.norm = Normalize(in_channels) - self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - - def forward(self, x): - h_ = x - h_ = self.norm(h_) - q = self.q(h_) - k = self.k(h_) - v = self.v(h_) - - # compute attention - b, c, h, w = q.shape - q = rearrange(q, 'b c h w -> b (h w) c') - k = rearrange(k, 'b c h w -> b c (h w)') - w_ = torch.einsum('bij,bjk->bik', q, k) - - w_ = w_ * (int(c) ** (-0.5)) - w_ = torch.nn.functional.softmax(w_, dim=2) - - # attend to values - v = rearrange(v, 'b c h w -> b c (h w)') - w_ = rearrange(w_, 'b i j -> b j i') - h_ = torch.einsum('bij,bjk->bik', v, w_) - h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h) - h_ = self.proj_out(h_) - - return x + h_ - - -# b n (h d) -> (b h) n d -def rearrange_heads_outer(t: torch.Tensor, h: int) -> torch.Tensor: - b, n, ch = t.shape - return t.view(b, n, h, -1).transpose(1, 2).reshape(b * h, n, -1) - - -# (b h) n d -> b n (h d) -def rearrange_heads_inner(t: torch.Tensor, h: int) -> torch.Tensor: - b = t.shape[0] // h - n = t.shape[1] - return t.view(b, h, n, -1).transpose(1, 2).reshape(b, n, -1) - - -class CrossAttention(nn.Module): - def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, use_flash_attention=False): - super().__init__() - inner_dim = dim_head * heads - context_dim = default(context_dim, query_dim) - # make attention part be aware of self-attention/cross-attention - self.context_dim = context_dim - self.query_dim = query_dim - self.dim_head = dim_head - - self.scale = dim_head ** -0.5 - self.heads = heads - - self.to_q = nn.Linear(query_dim, inner_dim, bias=False) - self.to_k = nn.Linear(context_dim, inner_dim, bias=False) - self.to_v = nn.Linear(context_dim, inner_dim, bias=False) - - self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) - self.use_flash_attention = use_flash_attention - - if dim_head <= 160 and (dim_head % 8) == 0 and flash_attn_installed: - if context_dim == query_dim: - self.flash_attn = FlashSelfAttention(softmax_scale=self.scale) - else: - self.flash_attn = FlashCrossAttention(softmax_scale=self.scale) - - def forward(self, x, context=None, mask=None): - h = self.heads - - q = self.to_q(x) - context = default(context, x) - k = self.to_k(context) - v = self.to_v(context) - - out = self._attention(q, k, v, mask) - - return self.to_out(out) - - def _attention(self, q, k, v, mask=None): - h = self.heads - - if ( - not flash_attn_installed - or not self.use_flash_attention - or q.dtype == torch.float32 - or (self.dim_head > 160 or (self.dim_head % 8) != 0) - or mask is not None - ): - # original implementation - # b n (h d) -> (b h) n d - q = rearrange_heads_outer(q, h) - k = rearrange_heads_outer(k, h) - v = rearrange_heads_outer(v, h) - - sim = einsum('b i d, b j d -> b i j', q, k) * self.scale - - if exists(mask): - # standard stable diffusion does not run into here - mask = mask.view(mask.shape[0], -1) - b, j = mask.shape - mask = mask.unsqueeze(1).expand(b, h, j).reshape(b * h, 1, j) # b j -> (b h) () j - sim.masked_fill_(~mask, self.max_neg[sim.dtype]) - - # attention, what we cannot get enough of - attn = sim.softmax(dim=-1) - - out = einsum('b i j, b j d -> b i d', attn, v) - - # (b h) n d -> b n (h d) - out = rearrange_heads_inner(out, h) - elif self.context_dim == self.query_dim: - # self-attention - qkv = torch.stack([q, k, v], dim=2) - b, s, t, hd = qkv.shape - d = hd // h - qkv = qkv.view(b, s, t, h, d) - - out = self.flash_attn(qkv) - out = out.view(b, s, hd) - else: - # cross-attention - kv = torch.stack([k, v], dim=2) - - s_q = q.shape[1] - b, s_kv, t, hd = kv.shape - d = hd // h - - q = q.view(b, s_q, h, d) - kv = kv.view(b, s_kv, t, h, d) - - out = self.flash_attn(q, kv) - out = out.view(b, s_q, hd) - - return out - - -class BasicTransformerBlock(nn.Module): - def __init__( - self, - dim, - n_heads, - d_head, - dropout=0.0, - context_dim=None, - gated_ff=True, - use_checkpoint=False, - use_flash_attention=False, - disable_self_attn=False, - ): - super().__init__() - self.disable_self_attn = disable_self_attn - self.attn1 = CrossAttention( - query_dim=dim, - heads=n_heads, - dim_head=d_head, - dropout=dropout, - use_flash_attention=use_flash_attention, - context_dim=context_dim if self.disable_self_attn else None, - ) # is a self-attention - self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) - self.attn2 = CrossAttention( - query_dim=dim, - context_dim=context_dim, - heads=n_heads, - dim_head=d_head, - dropout=dropout, - use_flash_attention=use_flash_attention, - ) # is self-attn if context is none - self.norm1 = nn.LayerNorm(dim) - self.norm2 = nn.LayerNorm(dim) - self.norm3 = nn.LayerNorm(dim) - self.use_checkpoint = use_checkpoint - - def forward(self, x, context=None): - if self.use_checkpoint: - return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) - else: - return self._forward(x, context) - - def _forward(self, x, context=None): - x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x - x = self.attn2(self.norm2(x), context=context) + x - x = self.ff(self.norm3(x)) + x - return x - - -class SpatialTransformer(nn.Module): - """ - Transformer block for image-like data. - First, project the input (aka embedding) - and reshape to b, t, d. - Then apply standard transformer action. - Finally, reshape to image - """ - - def __init__( - self, - in_channels, - n_heads, - d_head, - depth=1, - dropout=0.0, - context_dim=None, - disable_self_attn=False, - use_linear=False, - use_checkpoint=False, - use_flash_attention=False, - ): - super().__init__() - if exists(context_dim) and not isinstance(context_dim, list): - context_dim = [context_dim] - self.in_channels = in_channels - inner_dim = n_heads * d_head - self.norm = Normalize(in_channels) - - if not use_linear: - self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) - else: - self.proj_in = nn.Linear(in_channels, inner_dim) - - self.transformer_blocks = nn.ModuleList( - [ - BasicTransformerBlock( - inner_dim, - n_heads, - d_head, - dropout=dropout, - context_dim=context_dim[d], - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - disable_self_attn=disable_self_attn, - ) - for d in range(depth) - ] - ) - - if not use_linear: - self.proj_out = zero_module(nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)) - else: - self.proj_out = zero_module(nn.Linear(in_channels, inner_dim)) - self.use_linear = use_linear - - def forward(self, x, context=None): - # note: if no context is given, cross-attention defaults to self-attention - if not isinstance(context, list): - context = [context] - b, c, h, w = x.shape - x_in = x - x = self.norm(x) - if not self.use_linear: - x = self.proj_in(x) - x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c - if self.use_linear: - x = self.proj_in(x) - for i, block in enumerate(self.transformer_blocks): - x = block(x, context=context[i]) - if self.use_linear: - x = self.proj_out(x) - x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w - if not self.use_linear: - x = self.proj_out(x) - return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py deleted file mode 100644 index dbfab3ab4b07..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py +++ /dev/null @@ -1,878 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# pytorch_diffusion + derived encoder decoder -import math -import numpy as np -import torch -import torch.nn as nn -from einops import rearrange -from group_norm import GroupNormOpt - -from nemo.collections.multimodal.modules.stable_diffusion.attention import LinearAttention -from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config - - -def get_timestep_embedding(timesteps, embedding_dim): - """ - This matches the implementation in Denoising Diffusion Probabilistic Models: - From Fairseq. - Build sinusoidal embeddings. - This matches the implementation in tensor2tensor, but differs slightly - from the description in Section 3.5 of "Attention Is All You Need". - """ - assert len(timesteps.shape) == 1 - - half_dim = embedding_dim // 2 - emb = math.log(10000) / (half_dim - 1) - emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb) - emb = emb.to(device=timesteps.device) - emb = timesteps.float()[:, None] * emb[None, :] - emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) - if embedding_dim % 2 == 1: # zero pad - emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) - return emb - - -def nonlinearity(x): - # swish - return torch.nn.functional.silu(x) - - -def Normalize(in_channels, num_groups=32, act=""): - return GroupNormOpt(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) - - -class Upsample(nn.Module): - def __init__(self, in_channels, with_conv): - super().__init__() - self.with_conv = with_conv - if self.with_conv: - self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) - - def forward(self, x): - # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 - # TODO(yuya): Remove this cast once the issue is fixed in PyTorch - # https://github.com/pytorch/pytorch/issues/86679 - dtype = x.dtype - if dtype == torch.bfloat16: - x = x.to(torch.float32) - x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") - if dtype == torch.bfloat16: - x = x.to(dtype) - if self.with_conv: - x = self.conv(x) - return x - - -class Downsample(nn.Module): - def __init__(self, in_channels, with_conv): - super().__init__() - self.with_conv = with_conv - if self.with_conv: - # no asymmetric padding in torch conv, must do it ourselves - self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0) - - def forward(self, x): - if self.with_conv: - pad = (0, 1, 0, 1) - x = torch.nn.functional.pad(x, pad, mode="constant", value=0) - x = self.conv(x) - else: - x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) - return x - - -class ResnetBlock(nn.Module): - def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropout, temb_channels=512): - super().__init__() - self.in_channels = in_channels - out_channels = in_channels if out_channels is None else out_channels - self.out_channels = out_channels - self.use_conv_shortcut = conv_shortcut - - self.norm1 = Normalize(in_channels, act="silu") - self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) - if temb_channels > 0: - self.temb_proj = torch.nn.Linear(temb_channels, out_channels) - self.norm2 = Normalize(out_channels, act="silu") - self.dropout = torch.nn.Dropout(dropout) - self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) - if self.in_channels != self.out_channels: - if self.use_conv_shortcut: - self.conv_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) - else: - self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) - - def forward(self, x, temb): - h = x - h = self.norm1(h) - h = self.conv1(h) - - if temb is not None: - h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] - - h = self.norm2(h) - h = self.dropout(h) - h = self.conv2(h) - - if self.in_channels != self.out_channels: - if self.use_conv_shortcut: - x = self.conv_shortcut(x) - else: - x = self.nin_shortcut(x) - - return x + h - - -class LinAttnBlock(LinearAttention): - """to match AttnBlock usage""" - - def __init__(self, in_channels): - super().__init__(dim=in_channels, heads=1, dim_head=in_channels) - - -class AttnBlock(nn.Module): - def __init__(self, in_channels): - super().__init__() - self.in_channels = in_channels - - self.norm = Normalize(in_channels) - self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) - - def forward(self, x): - h_ = x - h_ = self.norm(h_) - q = self.q(h_) - k = self.k(h_) - v = self.v(h_) - - # compute attention - b, c, h, w = q.shape - q = q.reshape(b, c, h * w) - q = q.permute(0, 2, 1) # b,hw,c - k = k.reshape(b, c, h * w) # b,c,hw - w_ = torch.bmm(q, k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j] - w_ = w_ * (int(c) ** (-0.5)) - w_ = torch.nn.functional.softmax(w_, dim=2) - - # attend to values - v = v.reshape(b, c, h * w) - w_ = w_.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q) - h_ = torch.bmm(v, w_) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j] - h_ = h_.reshape(b, c, h, w) - - h_ = self.proj_out(h_) - - return x + h_ - - -def make_attn(in_channels, attn_type="vanilla"): - assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown' - print(f"making attention of type '{attn_type}' with {in_channels} in_channels") - if attn_type == "vanilla": - return AttnBlock(in_channels) - elif attn_type == "none": - return nn.Identity(in_channels) - else: - return LinAttnBlock(in_channels) - - -class Model(nn.Module): - def __init__( - self, - *, - ch, - out_ch, - ch_mult=(1, 2, 4, 8), - num_res_blocks, - attn_resolutions, - dropout=0.0, - resamp_with_conv=True, - in_channels, - resolution, - use_timestep=True, - use_linear_attn=False, - attn_type="vanilla", - ): - super().__init__() - if use_linear_attn: - attn_type = "linear" - self.ch = ch - self.temb_ch = self.ch * 4 - self.num_resolutions = len(ch_mult) - self.num_res_blocks = num_res_blocks - self.resolution = resolution - self.in_channels = in_channels - - self.use_timestep = use_timestep - if self.use_timestep: - # timestep embedding - self.temb = nn.Module() - self.temb.dense = nn.ModuleList( - [torch.nn.Linear(self.ch, self.temb_ch), torch.nn.Linear(self.temb_ch, self.temb_ch),] - ) - - # downsampling - self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) - - curr_res = resolution - in_ch_mult = (1,) + tuple(ch_mult) - self.down = nn.ModuleList() - for i_level in range(self.num_resolutions): - block = nn.ModuleList() - attn = nn.ModuleList() - block_in = ch * in_ch_mult[i_level] - block_out = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks): - block.append( - ResnetBlock( - in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout - ) - ) - block_in = block_out - if curr_res in attn_resolutions: - attn.append(make_attn(block_in, attn_type=attn_type)) - down = nn.Module() - down.block = block - down.attn = attn - if i_level != self.num_resolutions - 1: - down.downsample = Downsample(block_in, resamp_with_conv) - curr_res = curr_res // 2 - self.down.append(down) - - # middle - self.mid = nn.Module() - self.mid.block_1 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - - # upsampling - self.up = nn.ModuleList() - for i_level in reversed(range(self.num_resolutions)): - block = nn.ModuleList() - attn = nn.ModuleList() - block_out = ch * ch_mult[i_level] - skip_in = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks + 1): - if i_block == self.num_res_blocks: - skip_in = ch * in_ch_mult[i_level] - block.append( - ResnetBlock( - in_channels=block_in + skip_in, - out_channels=block_out, - temb_channels=self.temb_ch, - dropout=dropout, - ) - ) - block_in = block_out - if curr_res in attn_resolutions: - attn.append(make_attn(block_in, attn_type=attn_type)) - up = nn.Module() - up.block = block - up.attn = attn - if i_level != 0: - up.upsample = Upsample(block_in, resamp_with_conv) - curr_res = curr_res * 2 - self.up.insert(0, up) # prepend to get consistent order - - # end - self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) - - def forward(self, x, t=None, context=None): - # assert x.shape[2] == x.shape[3] == self.resolution - if context is not None: - # assume aligned context, cat along channel axis - x = torch.cat((x, context), dim=1) - if self.use_timestep: - # timestep embedding - assert t is not None - temb = get_timestep_embedding(t, self.ch) - temb = self.temb.dense[0](temb) - temb = nonlinearity(temb) - temb = self.temb.dense[1](temb) - else: - temb = None - - # downsampling - hs = [self.conv_in(x)] - for i_level in range(self.num_resolutions): - for i_block in range(self.num_res_blocks): - h = self.down[i_level].block[i_block](hs[-1], temb) - if len(self.down[i_level].attn) > 0: - h = self.down[i_level].attn[i_block](h) - hs.append(h) - if i_level != self.num_resolutions - 1: - hs.append(self.down[i_level].downsample(hs[-1])) - - # middle - h = hs[-1] - h = self.mid.block_1(h, temb) - h = self.mid.attn_1(h) - h = self.mid.block_2(h, temb) - - # upsampling - for i_level in reversed(range(self.num_resolutions)): - for i_block in range(self.num_res_blocks + 1): - h = self.up[i_level].block[i_block](torch.cat([h, hs.pop()], dim=1), temb) - if len(self.up[i_level].attn) > 0: - h = self.up[i_level].attn[i_block](h) - if i_level != 0: - h = self.up[i_level].upsample(h) - - # end - h = self.norm_out(h) - h = nonlinearity(h) - h = self.conv_out(h) - return h - - def get_last_layer(self): - return self.conv_out.weight - - -class Encoder(nn.Module): - def __init__( - self, - *, - ch, - out_ch, - ch_mult=(1, 2, 4, 8), - num_res_blocks, - attn_resolutions, - dropout=0.0, - resamp_with_conv=True, - in_channels, - resolution, - z_channels, - double_z=True, - use_linear_attn=False, - attn_type="vanilla", - **ignore_kwargs, - ): - super().__init__() - if use_linear_attn: - attn_type = "linear" - self.ch = ch - self.temb_ch = 0 - self.num_resolutions = len(ch_mult) - self.num_res_blocks = num_res_blocks - self.resolution = resolution - self.in_channels = in_channels - - # downsampling - self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) - - curr_res = resolution - in_ch_mult = (1,) + tuple(ch_mult) - self.in_ch_mult = in_ch_mult - self.down = nn.ModuleList() - for i_level in range(self.num_resolutions): - block = nn.ModuleList() - attn = nn.ModuleList() - block_in = ch * in_ch_mult[i_level] - block_out = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks): - block.append( - ResnetBlock( - in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout - ) - ) - block_in = block_out - if curr_res in attn_resolutions: - attn.append(make_attn(block_in, attn_type=attn_type)) - down = nn.Module() - down.block = block - down.attn = attn - if i_level != self.num_resolutions - 1: - down.downsample = Downsample(block_in, resamp_with_conv) - curr_res = curr_res // 2 - self.down.append(down) - - # middle - self.mid = nn.Module() - self.mid.block_1 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - - # end - self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d( - block_in, 2 * z_channels if double_z else z_channels, kernel_size=3, stride=1, padding=1 - ) - - def forward(self, x): - # timestep embedding - temb = None - - # downsampling - hs = [self.conv_in(x)] - for i_level in range(self.num_resolutions): - for i_block in range(self.num_res_blocks): - h = self.down[i_level].block[i_block](hs[-1], temb) - if len(self.down[i_level].attn) > 0: - h = self.down[i_level].attn[i_block](h) - hs.append(h) - if i_level != self.num_resolutions - 1: - hs.append(self.down[i_level].downsample(hs[-1])) - - # middle - h = hs[-1] - h = self.mid.block_1(h, temb) - h = self.mid.attn_1(h) - h = self.mid.block_2(h, temb) - - # end - h = self.norm_out(h) - h = nonlinearity(h) - h = self.conv_out(h) - return h - - -class Decoder(nn.Module): - def __init__( - self, - *, - ch, - out_ch, - ch_mult=(1, 2, 4, 8), - num_res_blocks, - attn_resolutions, - dropout=0.0, - resamp_with_conv=True, - in_channels, - resolution, - z_channels, - give_pre_end=False, - tanh_out=False, - use_linear_attn=False, - attn_type="vanilla", - **ignorekwargs, - ): - super().__init__() - if use_linear_attn: - attn_type = "linear" - self.ch = ch - self.temb_ch = 0 - self.num_resolutions = len(ch_mult) - self.num_res_blocks = num_res_blocks - self.resolution = resolution - self.in_channels = in_channels - self.give_pre_end = give_pre_end - self.tanh_out = tanh_out - - # compute in_ch_mult, block_in and curr_res at lowest res - in_ch_mult = (1,) + tuple(ch_mult) - block_in = ch * ch_mult[self.num_resolutions - 1] - curr_res = resolution // 2 ** (self.num_resolutions - 1) - self.z_shape = (1, z_channels, curr_res, curr_res) - print("Working with z of shape {} = {} dimensions.".format(self.z_shape, np.prod(self.z_shape))) - - # z to block_in - self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1) - - # middle - self.mid = nn.Module() - self.mid.block_1 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) - self.mid.block_2 = ResnetBlock( - in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout - ) - - # upsampling - self.up = nn.ModuleList() - for i_level in reversed(range(self.num_resolutions)): - block = nn.ModuleList() - attn = nn.ModuleList() - block_out = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks + 1): - block.append( - ResnetBlock( - in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout - ) - ) - block_in = block_out - if curr_res in attn_resolutions: - attn.append(make_attn(block_in, attn_type=attn_type)) - up = nn.Module() - up.block = block - up.attn = attn - if i_level != 0: - up.upsample = Upsample(block_in, resamp_with_conv) - curr_res = curr_res * 2 - self.up.insert(0, up) # prepend to get consistent order - - # end - self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) - - def forward(self, z): - # assert z.shape[1:] == self.z_shape[1:] - self.last_z_shape = z.shape - - # timestep embedding - temb = None - - # z to block_in - h = self.conv_in(z) - - # middle - h = self.mid.block_1(h, temb) - h = self.mid.attn_1(h) - h = self.mid.block_2(h, temb) - - # upsampling - for i_level in reversed(range(self.num_resolutions)): - for i_block in range(self.num_res_blocks + 1): - h = self.up[i_level].block[i_block](h, temb) - if len(self.up[i_level].attn) > 0: - h = self.up[i_level].attn[i_block](h) - if i_level != 0: - h = self.up[i_level].upsample(h) - - # end - if self.give_pre_end: - return h - - h = self.norm_out(h) - h = nonlinearity(h) - h = self.conv_out(h) - if self.tanh_out: - h = torch.tanh(h) - return h - - -class SimpleDecoder(nn.Module): - def __init__(self, in_channels, out_channels, *args, **kwargs): - super().__init__() - self.model = nn.ModuleList( - [ - nn.Conv2d(in_channels, in_channels, 1), - ResnetBlock(in_channels=in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), - ResnetBlock(in_channels=2 * in_channels, out_channels=4 * in_channels, temb_channels=0, dropout=0.0), - ResnetBlock(in_channels=4 * in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), - nn.Conv2d(2 * in_channels, in_channels, 1), - Upsample(in_channels, with_conv=True), - ] - ) - # end - self.norm_out = Normalize(in_channels) - self.conv_out = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) - - def forward(self, x): - for i, layer in enumerate(self.model): - if i in [1, 2, 3]: - x = layer(x, None) - else: - x = layer(x) - - h = self.norm_out(x) - h = nonlinearity(h) - x = self.conv_out(h) - return x - - -class UpsampleDecoder(nn.Module): - def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, ch_mult=(2, 2), dropout=0.0): - super().__init__() - # upsampling - self.temb_ch = 0 - self.num_resolutions = len(ch_mult) - self.num_res_blocks = num_res_blocks - block_in = in_channels - curr_res = resolution // 2 ** (self.num_resolutions - 1) - self.res_blocks = nn.ModuleList() - self.upsample_blocks = nn.ModuleList() - for i_level in range(self.num_resolutions): - res_block = [] - block_out = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks + 1): - res_block.append( - ResnetBlock( - in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout - ) - ) - block_in = block_out - self.res_blocks.append(nn.ModuleList(res_block)) - if i_level != self.num_resolutions - 1: - self.upsample_blocks.append(Upsample(block_in, True)) - curr_res = curr_res * 2 - - # end - self.norm_out = Normalize(block_in) - self.conv_out = torch.nn.Conv2d(block_in, out_channels, kernel_size=3, stride=1, padding=1) - - def forward(self, x): - # upsampling - h = x - for k, i_level in enumerate(range(self.num_resolutions)): - for i_block in range(self.num_res_blocks + 1): - h = self.res_blocks[i_level][i_block](h, None) - if i_level != self.num_resolutions - 1: - h = self.upsample_blocks[k](h) - h = self.norm_out(h) - h = nonlinearity(h) - h = self.conv_out(h) - return h - - -class LatentRescaler(nn.Module): - def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2): - super().__init__() - # residual block, interpolate, residual block - self.factor = factor - self.conv_in = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=1, padding=1) - self.res_block1 = nn.ModuleList( - [ - ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) - for _ in range(depth) - ] - ) - self.attn = AttnBlock(mid_channels) - self.res_block2 = nn.ModuleList( - [ - ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) - for _ in range(depth) - ] - ) - - self.conv_out = nn.Conv2d(mid_channels, out_channels, kernel_size=1,) - - def forward(self, x): - x = self.conv_in(x) - for block in self.res_block1: - x = block(x, None) - x = torch.nn.functional.interpolate( - x, size=(int(round(x.shape[2] * self.factor)), int(round(x.shape[3] * self.factor))) - ) - x = self.attn(x) - for block in self.res_block2: - x = block(x, None) - x = self.conv_out(x) - return x - - -class MergedRescaleEncoder(nn.Module): - def __init__( - self, - in_channels, - ch, - resolution, - out_ch, - num_res_blocks, - attn_resolutions, - dropout=0.0, - resamp_with_conv=True, - ch_mult=(1, 2, 4, 8), - rescale_factor=1.0, - rescale_module_depth=1, - ): - super().__init__() - intermediate_chn = ch * ch_mult[-1] - self.encoder = Encoder( - in_channels=in_channels, - num_res_blocks=num_res_blocks, - ch=ch, - ch_mult=ch_mult, - z_channels=intermediate_chn, - double_z=False, - resolution=resolution, - attn_resolutions=attn_resolutions, - dropout=dropout, - resamp_with_conv=resamp_with_conv, - out_ch=None, - ) - self.rescaler = LatentRescaler( - factor=rescale_factor, - in_channels=intermediate_chn, - mid_channels=intermediate_chn, - out_channels=out_ch, - depth=rescale_module_depth, - ) - - def forward(self, x): - x = self.encoder(x) - x = self.rescaler(x) - return x - - -class MergedRescaleDecoder(nn.Module): - def __init__( - self, - z_channels, - out_ch, - resolution, - num_res_blocks, - attn_resolutions, - ch, - ch_mult=(1, 2, 4, 8), - dropout=0.0, - resamp_with_conv=True, - rescale_factor=1.0, - rescale_module_depth=1, - ): - super().__init__() - tmp_chn = z_channels * ch_mult[-1] - self.decoder = Decoder( - out_ch=out_ch, - z_channels=tmp_chn, - attn_resolutions=attn_resolutions, - dropout=dropout, - resamp_with_conv=resamp_with_conv, - in_channels=None, - num_res_blocks=num_res_blocks, - ch_mult=ch_mult, - resolution=resolution, - ch=ch, - ) - self.rescaler = LatentRescaler( - factor=rescale_factor, - in_channels=z_channels, - mid_channels=tmp_chn, - out_channels=tmp_chn, - depth=rescale_module_depth, - ) - - def forward(self, x): - x = self.rescaler(x) - x = self.decoder(x) - return x - - -class Upsampler(nn.Module): - def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2): - super().__init__() - assert out_size >= in_size - num_blocks = int(np.log2(out_size // in_size)) + 1 - factor_up = 1.0 + (out_size % in_size) - print( - f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}" - ) - self.rescaler = LatentRescaler( - factor=factor_up, in_channels=in_channels, mid_channels=2 * in_channels, out_channels=in_channels - ) - self.decoder = Decoder( - out_ch=out_channels, - resolution=out_size, - z_channels=in_channels, - num_res_blocks=2, - attn_resolutions=[], - in_channels=None, - ch=in_channels, - ch_mult=[ch_mult for _ in range(num_blocks)], - ) - - def forward(self, x): - x = self.rescaler(x) - x = self.decoder(x) - return x - - -class Resize(nn.Module): - def __init__(self, in_channels=None, learned=False, mode="bilinear"): - super().__init__() - self.with_conv = learned - self.mode = mode - if self.with_conv: - print(f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode") - raise NotImplementedError() - assert in_channels is not None - # no asymmetric padding in torch conv, must do it ourselves - self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1) - - def forward(self, x, scale_factor=1.0): - if scale_factor == 1.0: - return x - else: - x = torch.nn.functional.interpolate(x, mode=self.mode, align_corners=False, scale_factor=scale_factor) - return x - - -class FirstStagePostProcessor(nn.Module): - def __init__( - self, - ch_mult: list, - in_channels, - pretrained_model: nn.Module = None, - reshape=False, - n_channels=None, - dropout=0.0, - pretrained_config=None, - ): - super().__init__() - if pretrained_config is None: - assert pretrained_model is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' - self.pretrained_model = pretrained_model - else: - assert pretrained_config is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' - self.instantiate_pretrained(pretrained_config) - - self.do_reshape = reshape - - if n_channels is None: - n_channels = self.pretrained_model.encoder.ch - - self.proj_norm = Normalize(in_channels, num_groups=in_channels // 2) - self.proj = nn.Conv2d(in_channels, n_channels, kernel_size=3, stride=1, padding=1) - - blocks = [] - downs = [] - ch_in = n_channels - for m in ch_mult: - blocks.append(ResnetBlock(in_channels=ch_in, out_channels=m * n_channels, dropout=dropout)) - ch_in = m * n_channels - downs.append(Downsample(ch_in, with_conv=False)) - - self.model = nn.ModuleList(blocks) - self.downsampler = nn.ModuleList(downs) - - def instantiate_pretrained(self, config): - model = instantiate_from_config(config) - self.pretrained_model = model.eval() - # self.pretrained_model.train = False - for param in self.pretrained_model.parameters(): - param.requires_grad = False - - @torch.no_grad() - def encode_with_pretrained(self, x): - c = self.pretrained_model.encode(x) - if isinstance(c, DiagonalGaussianDistribution): - c = c.mode() - return c - - def forward(self, x): - z_fs = self.encode_with_pretrained(x) - z = self.proj_norm(z_fs) - z = self.proj(z) - z = nonlinearity(z) - - for submodel, downmodel in zip(self.model, self.downsampler): - z = submodel(z, temb=None) - z = downmodel(z) - - if self.do_reshape: - z = rearrange(z, 'b c h w -> b (h w) c') - return z diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py deleted file mode 100644 index 9c52198f1566..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ /dev/null @@ -1,1191 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -from abc import abstractmethod -from functools import partial -from typing import Iterable - -import numpy as np -import torch -import torch as th -import torch.nn as nn -import torch.nn.functional as F - -from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer -from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( - avg_pool_nd, - checkpoint, - conv_nd, - linear, - normalization, - timestep_embedding, - zero_module, -) - - -def convert_module_to_dtype(module, dtype): - # Convert module parameters to dtype - if isinstance(module, (torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Linear)): - module.weight.data = module.weight.data.to(dtype) - if module.bias is not None: - module.bias.data = module.bias.data.to(dtype) - - -def convert_module_to_fp16(module): - convert_module_to_dtype(module, torch.float16) - - -## go -class AttentionPool2d(nn.Module): - """ - Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py - """ - - def __init__( - self, spacial_dim: int, embed_dim: int, num_heads_channels: int, output_dim: int = None, - ): - super().__init__() - self.positional_embedding = nn.Parameter(th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5) - self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) - self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) - self.num_heads = embed_dim // num_heads_channels - self.attention = QKVAttention(self.num_heads) - - def forward(self, x): - b, c, *_spatial = x.shape - x = x.reshape(b, c, -1) # NC(HW) - x = th.cat([x.mean(dim=-1, keepdim=True), x], dim=-1) # NC(HW+1) - x = x + self.positional_embedding[None, :, :].to(x.dtype) # NC(HW+1) - x = self.qkv_proj(x) - x = self.attention(x) - x = self.c_proj(x) - return x[:, :, 0] - - -class TimestepBlock(nn.Module): - """ - Any module where forward() takes timestep embeddings as a second argument. - """ - - @abstractmethod - def forward(self, x, emb): - """ - Apply the module to `x` given `emb` timestep embeddings. - """ - - -class TimestepEmbedSequential(nn.Sequential, TimestepBlock): - """ - A sequential module that passes timestep embeddings to the children that - support it as an extra input. - """ - - def forward(self, x, emb, context=None): - for layer in self: - if isinstance(layer, TimestepBlock): - x = layer(x, emb) - elif isinstance(layer, SpatialTransformer): - x = layer(x, context) - else: - x = layer(x) - return x - - -class Upsample(nn.Module): - """ - An upsampling layer with an optional convolution. - :param channels: channels in the inputs and outputs. - :param use_conv: a bool determining if a convolution is applied. - :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then - upsampling occurs in the inner-two dimensions. - """ - - def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.dims = dims - if use_conv: - self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=padding) - - def forward(self, x): - assert x.shape[1] == self.channels - - # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 - # TODO(yuya): Remove this cast once the issue is fixed in PyTorch - # https://github.com/pytorch/pytorch/issues/86679 - dtype = x.dtype - if dtype == torch.bfloat16: - x = x.to(torch.float32) - if self.dims == 3: - x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") - else: - x = F.interpolate(x, scale_factor=2, mode="nearest") - if dtype == torch.bfloat16: - x = x.to(dtype) - - if self.use_conv: - x = self.conv(x) - return x - - -class TransposedUpsample(nn.Module): - 'Learned 2x upsampling without padding' - - def __init__(self, channels, out_channels=None, ks=5): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - - self.up = nn.ConvTranspose2d(self.channels, self.out_channels, kernel_size=ks, stride=2) - - def forward(self, x): - return self.up(x) - - -class Downsample(nn.Module): - """ - A downsampling layer with an optional convolution. - :param channels: channels in the inputs and outputs. - :param use_conv: a bool determining if a convolution is applied. - :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then - downsampling occurs in the inner-two dimensions. - """ - - def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): - super().__init__() - self.channels = channels - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.dims = dims - stride = 2 if dims != 3 else (1, 2, 2) - if use_conv: - self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=padding) - else: - assert self.channels == self.out_channels - self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) - - def forward(self, x): - assert x.shape[1] == self.channels - return self.op(x) - - -class ResBlock(TimestepBlock): - """ - A residual block that can optionally change the number of channels. - :param channels: the number of input channels. - :param emb_channels: the number of timestep embedding channels. - :param dropout: the rate of dropout. - :param out_channels: if specified, the number of out channels. - :param use_conv: if True and out_channels is specified, use a spatial - convolution instead of a smaller 1x1 convolution to change the - channels in the skip connection. - :param dims: determines if the signal is 1D, 2D, or 3D. - :param use_checkpoint: if True, use gradient checkpointing on this module. - :param up: if True, use this block for upsampling. - :param down: if True, use this block for downsampling. - """ - - def __init__( - self, - channels, - emb_channels, - dropout, - out_channels=None, - use_conv=False, - use_scale_shift_norm=False, - dims=2, - use_checkpoint=False, - up=False, - down=False, - ): - super().__init__() - self.channels = channels - self.emb_channels = emb_channels - self.dropout = dropout - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.use_checkpoint = use_checkpoint - self.use_scale_shift_norm = use_scale_shift_norm - - self.in_layers = nn.Sequential( - normalization(channels, act="silu"), conv_nd(dims, channels, self.out_channels, 3, padding=1), - ) - - self.updown = up or down - - if up: - self.h_upd = Upsample(channels, False, dims) - self.x_upd = Upsample(channels, False, dims) - elif down: - self.h_upd = Downsample(channels, False, dims) - self.x_upd = Downsample(channels, False, dims) - else: - self.h_upd = self.x_upd = nn.Identity() - - self.emb_layers = nn.Sequential( - nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), - ) - self.out_layers = nn.Sequential( - normalization(self.out_channels, act="silu"), - nn.Dropout(p=dropout), - zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), - ) - - if self.out_channels == channels: - self.skip_connection = nn.Identity() - elif use_conv: - self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) - else: - self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) - - def forward(self, x, emb): - """ - Apply the block to a Tensor, conditioned on a timestep embedding. - :param x: an [N x C x ...] Tensor of features. - :param emb: an [N x emb_channels] Tensor of timestep embeddings. - :return: an [N x C x ...] Tensor of outputs. - """ - if self.use_checkpoint: - return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) - else: - return self._forward(x, emb) - - def _forward(self, x, emb): - if self.updown: - in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] - h = in_rest(x) - h = self.h_upd(h) - x = self.x_upd(x) - h = in_conv(h) - else: - h = self.in_layers(x) - emb_out = self.emb_layers(emb).type(h.dtype) - while len(emb_out.shape) < len(h.shape): - emb_out = emb_out[..., None] - if self.use_scale_shift_norm: - out_norm, out_rest = self.out_layers[0], self.out_layers[1:] - scale, shift = th.chunk(emb_out, 2, dim=1) - h = out_norm(h) * (1 + scale) + shift - h = out_rest(h) - else: - h = h + emb_out - h = self.out_layers(h) - return self.skip_connection(x) + h - - -class AttentionBlock(nn.Module): - """ - An attention block that allows spatial positions to attend to each other. - Originally ported from here, but adapted to the N-d case. - https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. - """ - - def __init__( - self, channels, num_heads=1, num_head_channels=-1, use_checkpoint=False, use_new_attention_order=False, - ): - super().__init__() - self.channels = channels - if num_head_channels == -1: - self.num_heads = num_heads - else: - assert ( - channels % num_head_channels == 0 - ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" - self.num_heads = channels // num_head_channels - self.use_checkpoint = use_checkpoint - self.norm = normalization(channels) - self.qkv = conv_nd(1, channels, channels * 3, 1) - if use_new_attention_order: - # split qkv before split heads - self.attention = QKVAttention(self.num_heads) - else: - # split heads before split qkv - self.attention = QKVAttentionLegacy(self.num_heads) - - self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) - - def forward(self, x): - return checkpoint( - self._forward, (x,), self.parameters(), True - ) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! - # return pt_checkpoint(self._forward, x) # pytorch - - def _forward(self, x): - b, c, *spatial = x.shape - x = x.reshape(b, c, -1) - qkv = self.qkv(self.norm(x)) - h = self.attention(qkv) - h = self.proj_out(h) - return (x + h).reshape(b, c, *spatial) - - -def count_flops_attn(model, _x, y): - """ - A counter for the `thop` package to count the operations in an - attention operation. - Meant to be used like: - macs, params = thop.profile( - model, - inputs=(inputs, timestamps), - custom_ops={QKVAttention: QKVAttention.count_flops}, - ) - """ - b, c, *spatial = y[0].shape - num_spatial = int(np.prod(spatial)) - # We perform two matmuls with the same number of ops. - # The first computes the weight matrix, the second computes - # the combination of the value vectors. - matmul_ops = 2 * b * (num_spatial ** 2) * c - model.total_ops += th.DoubleTensor([matmul_ops]) - - -class QKVAttentionLegacy(nn.Module): - """ - A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = th.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards - weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) - a = th.einsum("bts,bcs->bct", weight, v) - return a.reshape(bs, -1, length) - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class QKVAttention(nn.Module): - """ - A module which performs QKV attention and splits in a different order. - """ - - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv): - """ - Apply QKV attention. - :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.chunk(3, dim=1) - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = th.einsum( - "bct,bcs->bts", - (q * scale).view(bs * self.n_heads, ch, length), - (k * scale).view(bs * self.n_heads, ch, length), - ) # More stable with f16 than dividing afterwards - weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) - a = th.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) - return a.reshape(bs, -1, length) - - @staticmethod - def count_flops(model, _x, y): - return count_flops_attn(model, _x, y) - - -class UNetModel(nn.Module): - """ - The full UNet model with attention and timestep embedding. - :param in_channels: channels in the input Tensor. - :param model_channels: base channel count for the model. - :param out_channels: channels in the output Tensor. - :param num_res_blocks: number of residual blocks per downsample. - :param attention_resolutions: a collection of downsample rates at which - attention will take place. May be a set, list, or tuple. - For example, if this contains 4, then at 4x downsampling, attention - will be used. - :param dropout: the dropout probability. - :param channel_mult: channel multiplier for each level of the UNet. - :param conv_resample: if True, use learned convolutions for upsampling and - downsampling. - :param dims: determines if the signal is 1D, 2D, or 3D. - :param num_classes: if specified (as an int), then this model will be - class-conditional with `num_classes` classes. - :param use_checkpoint: use gradient checkpointing to reduce memory usage. - :param num_heads: the number of attention heads in each attention layer. - :param num_heads_channels: if specified, ignore num_heads and instead use - a fixed channel width per attention head. - :param num_heads_upsample: works with num_heads to set a different number - of heads for upsampling. Deprecated. - :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. - :param resblock_updown: use residual blocks for up/downsampling. - :param use_new_attention_order: use a different attention pattern for potentially - increased efficiency. - """ - - def __init__( - self, - image_size, - in_channels, - model_channels, - out_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - num_classes=None, - use_checkpoint=False, - num_heads=-1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - use_spatial_transformer=False, # custom transformer support - transformer_depth=1, # custom transformer support - context_dim=None, # custom transformer support - n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model - legacy=True, - use_linear_in_transformer=False, - from_pretrained: str = None, - from_NeMo=False, - # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF - use_flash_attention: bool = False, - enable_amp_o2_fp16: bool = False, - ): - super().__init__() - if use_spatial_transformer: - assert ( - context_dim is not None - ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' - - if context_dim is not None: - assert ( - use_spatial_transformer - ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' - from omegaconf.listconfig import ListConfig - - if type(context_dim) == ListConfig: - context_dim = list(context_dim) - - if num_heads_upsample == -1: - num_heads_upsample = num_heads - - if num_heads == -1: - assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' - - if num_head_channels == -1: - assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' - - self.image_size = image_size - self.in_channels = in_channels - self.model_channels = model_channels - self.out_channels = out_channels - self.num_res_blocks = num_res_blocks - self.attention_resolutions = attention_resolutions - self.dropout = dropout - self.channel_mult = channel_mult - self.conv_resample = conv_resample - self.num_classes = num_classes - self.use_checkpoint = use_checkpoint - self.num_heads = num_heads - self.num_head_channels = num_head_channels - self.num_heads_upsample = num_heads_upsample - self.predict_codebook_ids = n_embed is not None - time_embed_dim = model_channels * 4 - self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), - ) - - if self.num_classes is not None: - self.label_emb = nn.Embedding(num_classes, time_embed_dim) - - self.input_blocks = nn.ModuleList( - [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] - ) - self._feature_size = model_channels - input_block_chans = [model_channels] - ch = model_channels - ds = 1 - for level, mult in enumerate(channel_mult): - for _ in range(num_res_blocks): - layers = [ - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=mult * model_channels, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ) - ] - ch = mult * model_channels - if ds in attention_resolutions: - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - layers.append( - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - ) - ) - self.input_blocks.append(TimestepEmbedSequential(*layers)) - self._feature_size += ch - input_block_chans.append(ch) - if level != len(channel_mult) - 1: - out_ch = ch - self.input_blocks.append( - TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=out_ch, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - down=True, - ) - if resblock_updown - else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) - ) - ) - ch = out_ch - input_block_chans.append(ch) - ds *= 2 - self._feature_size += ch - - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - self.middle_block = TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - ), - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - ) - self._feature_size += ch - - self.output_blocks = nn.ModuleList([]) - for level, mult in list(enumerate(channel_mult))[::-1]: - for i in range(num_res_blocks + 1): - ich = input_block_chans.pop() - layers = [ - ResBlock( - ch + ich, - time_embed_dim, - dropout, - out_channels=model_channels * mult, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ) - ] - ch = model_channels * mult - if ds in attention_resolutions: - if num_head_channels == -1: - dim_head = ch // num_heads - else: - num_heads = ch // num_head_channels - dim_head = num_head_channels - if legacy: - # num_heads = 1 - dim_head = ch // num_heads if use_spatial_transformer else num_head_channels - layers.append( - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads_upsample, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, - use_flash_attention=use_flash_attention, - ) - ) - if level and i == num_res_blocks: - out_ch = ch - layers.append( - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=out_ch, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - up=True, - ) - if resblock_updown - else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch) - ) - ds //= 2 - self.output_blocks.append(TimestepEmbedSequential(*layers)) - self._feature_size += ch - - self.out = nn.Sequential( - normalization(ch), nn.SiLU(), zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), - ) - if self.predict_codebook_ids: - self.id_predictor = nn.Sequential( - normalization(ch), - conv_nd(dims, model_channels, n_embed, 1), - # nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits - ) - - if from_pretrained is not None: - state_dict = torch.load(from_pretrained, map_location='cpu') - if 'state_dict' in state_dict.keys(): - state_dict = state_dict['state_dict'] - missing_key, unexpected_keys, _, _ = self._load_pretrained_model(state_dict, from_NeMo=from_NeMo) - if len(missing_key) > 0: - print( - 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' - ) - print(f"Missing keys: {missing_key}") - print(f"Unexpected keys: {unexpected_keys}") - - if enable_amp_o2_fp16: - self.convert_to_fp16() - - def _input_blocks_mapping(self, input_dict): - res_dict = {} - for key_, value_ in input_dict.items(): - id_0 = int(key_[13]) - if "resnets" in key_: - id_1 = int(key_[23]) - target_id = 3 * id_0 + 1 + id_1 - post_fix = ( - key_[25:] - .replace('time_emb_proj', 'emb_layers.1') - .replace('norm1', 'in_layers.0') - .replace('norm2', 'out_layers.0') - .replace('conv1', 'in_layers.2') - .replace('conv2', 'out_layers.3') - .replace('conv_shortcut', 'skip_connection') - ) - res_dict["input_blocks." + str(target_id) + '.0.' + post_fix] = value_ - elif "attentions" in key_: - id_1 = int(key_[26]) - target_id = 3 * id_0 + 1 + id_1 - post_fix = key_[28:] - res_dict["input_blocks." + str(target_id) + '.1.' + post_fix] = value_ - elif "downsamplers" in key_: - post_fix = key_[35:] - target_id = 3 * (id_0 + 1) - res_dict["input_blocks." + str(target_id) + '.0.op.' + post_fix] = value_ - return res_dict - - def _mid_blocks_mapping(self, mid_dict): - res_dict = {} - for key_, value_ in mid_dict.items(): - if "resnets" in key_: - temp_key_ = ( - key_.replace('time_emb_proj', 'emb_layers.1') - .replace('norm1', 'in_layers.0') - .replace('norm2', 'out_layers.0') - .replace('conv1', 'in_layers.2') - .replace('conv2', 'out_layers.3') - .replace('conv_shortcut', 'skip_connection') - .replace('middle_block.resnets.0', 'middle_block.0') - .replace('middle_block.resnets.1', 'middle_block.2') - ) - res_dict[temp_key_] = value_ - elif "attentions" in key_: - res_dict[key_.replace('attentions.0', '1')] = value_ - return res_dict - - def _other_blocks_mapping(self, other_dict): - res_dict = {} - for key_, value_ in other_dict.items(): - tmp_key = ( - key_.replace('conv_in', 'input_blocks.0.0') - .replace('time_embedding.linear_1', 'time_embed.0') - .replace('time_embedding.linear_2', 'time_embed.2') - .replace('conv_norm_out', 'out.0') - .replace('conv_out', 'out.2') - ) - res_dict[tmp_key] = value_ - return res_dict - - def _output_blocks_mapping(self, output_dict): - res_dict = {} - for key_, value_ in output_dict.items(): - id_0 = int(key_[14]) - if "resnets" in key_: - id_1 = int(key_[24]) - target_id = 3 * id_0 + id_1 - post_fix = ( - key_[26:] - .replace('time_emb_proj', 'emb_layers.1') - .replace('norm1', 'in_layers.0') - .replace('norm2', 'out_layers.0') - .replace('conv1', 'in_layers.2') - .replace('conv2', 'out_layers.3') - .replace('conv_shortcut', 'skip_connection') - ) - res_dict["output_blocks." + str(target_id) + '.0.' + post_fix] = value_ - elif "attentions" in key_: - id_1 = int(key_[27]) - target_id = 3 * id_0 + id_1 - post_fix = key_[29:] - res_dict["output_blocks." + str(target_id) + '.1.' + post_fix] = value_ - elif "upsamplers" in key_: - post_fix = key_[34:] - target_id = 3 * (id_0 + 1) - 1 - mid_str = '.2.conv.' if target_id != 2 else '.1.conv.' - res_dict["output_blocks." + str(target_id) + mid_str + post_fix] = value_ - return res_dict - - def _state_key_mapping(self, state_dict: dict): - import re - - res_dict = {} - input_dict = {} - mid_dict = {} - output_dict = {} - other_dict = {} - for key_, value_ in state_dict.items(): - if "down_blocks" in key_: - input_dict[key_.replace('down_blocks', 'input_blocks')] = value_ - elif "up_blocks" in key_: - output_dict[key_.replace('up_blocks', 'output_blocks')] = value_ - elif "mid_block" in key_: - mid_dict[key_.replace('mid_block', 'middle_block')] = value_ - else: - other_dict[key_] = value_ - - input_dict = self._input_blocks_mapping(input_dict) - output_dict = self._output_blocks_mapping(output_dict) - mid_dict = self._mid_blocks_mapping(mid_dict) - other_dict = self._other_blocks_mapping(other_dict) - # key_list = state_dict.keys() - # key_str = " ".join(key_list) - - # for key_, val_ in state_dict.items(): - # key_ = key_.replace("down_blocks", "input_blocks")\ - # .replace("up_blocks", 'output_blocks') - # res_dict[key_] = val_ - res_dict.update(input_dict) - res_dict.update(output_dict) - res_dict.update(mid_dict) - res_dict.update(other_dict) - - return res_dict - - def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from_NeMo=False): - if from_NeMo: - state_dict = self._strip_unet_key_prefix(state_dict) - else: - state_dict = self._state_key_mapping(state_dict) - model_state_dict = self.state_dict() - loaded_keys = [k for k in state_dict.keys()] - expected_keys = list(model_state_dict.keys()) - original_loaded_keys = loaded_keys - missing_keys = list(set(expected_keys) - set(loaded_keys)) - unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - if ( - 'input_blocks.1.0.in_layers.2.weight' in loaded_keys - and 'input_blocks.1.0.in_layers.1.weight' in expected_keys - ): - # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following - for key_ in missing_keys: - s = key_.split('.') - idx = int(s[-2]) - new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) - state_dict[key_] = state_dict[new_key_] - - loaded_keys = list(state_dict.keys()) - missing_keys = list(set(expected_keys) - set(loaded_keys)) - unexpected_keys = list(set(loaded_keys) - set(expected_keys)) - - def _find_mismatched_keys( - state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, - ): - mismatched_keys = [] - if ignore_mismatched_sizes: - for checkpoint_key in loaded_keys: - model_key = checkpoint_key - - if ( - model_key in model_state_dict - and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape - ): - mismatched_keys.append( - (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) - ) - del state_dict[checkpoint_key] - return mismatched_keys - - if state_dict is not None: - # Whole checkpoint - mismatched_keys = _find_mismatched_keys( - state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, - ) - error_msgs = self._load_state_dict_into_model(state_dict) - return missing_keys, unexpected_keys, mismatched_keys, error_msgs - - # TODO MMY maybe combine these cases of key prefix - def _strip_unet_key_prefix(self, state_dict): - re_state_dict = {} - for key_, value_ in state_dict.items(): - if key_.startswith('model.diffusion_model'): - re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ - if key_.startswith('model.model.diffusion_model'): - re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ - if key_.startswith('model._orig_mod.diffusion_model.'): - re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ - if key_.startswith('model.model._orig_mod.diffusion_model.'): - re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ - if key_.startswith('model.model.diffusion_model._orig_mod.'): - re_state_dict[key_.replace('model.model.diffusion_model._orig_mod.', '')] = value_ - return re_state_dict - - def _load_state_dict_into_model(self, state_dict): - # Convert old format to new format if needed from a PyTorch state_dict - # copy state_dict so _load_from_state_dict can modify it - state_dict = state_dict.copy() - error_msgs = [] - - # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants - # so we need to apply the function recursively. - def load(module: torch.nn.Module, prefix=""): - args = (state_dict, prefix, {}, True, [], [], error_msgs) - module._load_from_state_dict(*args) - - for name, child in module._modules.items(): - if child is not None: - load(child, prefix + name + ".") - - load(self) - - return error_msgs - - def convert_to_fp16(self): - """ - Convert the torso of the model to float16. - """ - self.apply(convert_module_to_fp16) - - def forward(self, x, timesteps=None, context=None, y=None, **kwargs): - """ - Apply the model to an input batch. - :param x: an [N x C x ...] Tensor of inputs. - :param timesteps: a 1-D batch of timesteps. - :param context: conditioning plugged in via crossattn - :param y: an [N] Tensor of labels, if class-conditional. - :return: an [N x C x ...] Tensor of outputs. - """ - assert (y is not None) == ( - self.num_classes is not None - ), "must specify y if and only if the model is class-conditional" - hs = [] - t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) - emb = self.time_embed(t_emb) - - if self.num_classes is not None: - assert y.shape == (x.shape[0],) - emb = emb + self.label_emb(y) - - h = x.type(emb.dtype) - for module in self.input_blocks: - h = module(h, emb, context) - hs.append(h) - h = self.middle_block(h, emb, context) - for module in self.output_blocks: - h = th.cat([h, hs.pop()], dim=1) - h = module(h, emb, context) - if self.predict_codebook_ids: - return self.id_predictor(h) - else: - return self.out(h) - - -class EncoderUNetModel(nn.Module): - """ - The half UNet model with attention and timestep embedding. - For usage, see UNet. - """ - - def __init__( - self, - image_size, - in_channels, - model_channels, - out_channels, - num_res_blocks, - attention_resolutions, - dropout=0, - channel_mult=(1, 2, 4, 8), - conv_resample=True, - dims=2, - use_checkpoint=False, - use_fp16=False, - num_heads=1, - num_head_channels=-1, - num_heads_upsample=-1, - use_scale_shift_norm=False, - resblock_updown=False, - use_new_attention_order=False, - pool="adaptive", - *args, - **kwargs, - ): - super().__init__() - if num_heads_upsample == -1: - num_heads_upsample = num_heads - - self.in_channels = in_channels - self.model_channels = model_channels - self.out_channels = out_channels - self.num_res_blocks = num_res_blocks - self.attention_resolutions = attention_resolutions - self.dropout = dropout - self.channel_mult = channel_mult - self.conv_resample = conv_resample - self.use_checkpoint = use_checkpoint - self.dtype = th.float16 if use_fp16 else th.float32 - self.num_heads = num_heads - self.num_head_channels = num_head_channels - self.num_heads_upsample = num_heads_upsample - - time_embed_dim = model_channels * 4 - self.time_embed = nn.Sequential( - linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), - ) - - self.input_blocks = nn.ModuleList( - [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] - ) - self._feature_size = model_channels - input_block_chans = [model_channels] - ch = model_channels - ds = 1 - for level, mult in enumerate(channel_mult): - for _ in range(num_res_blocks): - layers = [ - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=mult * model_channels, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ) - ] - ch = mult * model_channels - if ds in attention_resolutions: - layers.append( - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=num_head_channels, - use_new_attention_order=use_new_attention_order, - ) - ) - self.input_blocks.append(TimestepEmbedSequential(*layers)) - self._feature_size += ch - input_block_chans.append(ch) - if level != len(channel_mult) - 1: - out_ch = ch - self.input_blocks.append( - TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - out_channels=out_ch, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - down=True, - ) - if resblock_updown - else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) - ) - ) - ch = out_ch - input_block_chans.append(ch) - ds *= 2 - self._feature_size += ch - - self.middle_block = TimestepEmbedSequential( - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=num_head_channels, - use_new_attention_order=use_new_attention_order, - ), - ResBlock( - ch, - time_embed_dim, - dropout, - dims=dims, - use_checkpoint=use_checkpoint, - use_scale_shift_norm=use_scale_shift_norm, - ), - ) - self._feature_size += ch - self.pool = pool - if pool == "adaptive": - self.out = nn.Sequential( - normalization(ch), - nn.SiLU(), - nn.AdaptiveAvgPool2d((1, 1)), - zero_module(conv_nd(dims, ch, out_channels, 1)), - nn.Flatten(), - ) - elif pool == "attention": - assert num_head_channels != -1 - self.out = nn.Sequential( - normalization(ch), nn.SiLU(), AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), - ) - elif pool == "spatial": - self.out = nn.Sequential( - nn.Linear(self._feature_size, 2048), nn.ReLU(), nn.Linear(2048, self.out_channels), - ) - elif pool == "spatial_v2": - self.out = nn.Sequential( - nn.Linear(self._feature_size, 2048), - normalization(2048), - nn.SiLU(), - nn.Linear(2048, self.out_channels), - ) - else: - raise NotImplementedError(f"Unexpected {pool} pooling") - - def convert_to_fp16(self): - """ - Convert the torso of the model to float16. - """ - self.input_blocks.apply(convert_module_to_fp16) - self.middle_block.apply(convert_module_to_fp16) - - def forward(self, x, timesteps): - """ - Apply the model to an input batch. - :param x: an [N x C x ...] Tensor of inputs. - :param timesteps: a 1-D batch of timesteps. - :return: an [N x K] Tensor of outputs. - """ - emb = self.time_embed(timestep_embedding(timesteps, self.model_channels), use_fp16=self.use_fp16) - - # future support - if self.dtype == th.float32: - self.dtype == x.dtype - - results = [] - h = x.type(self.dtype) - for module in self.input_blocks: - h = module(h, emb) - if self.pool.startswith("spatial"): - results.append(h.type(x.dtype).mean(dim=(2, 3))) - h = self.middle_block(h, emb) - if self.pool.startswith("spatial"): - results.append(h.type(x.dtype).mean(dim=(2, 3))) - h = th.cat(results, axis=-1) - return self.out(h) - else: - h = h.type(x.dtype) - return self.out(h) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py deleted file mode 100644 index 2225d45a3a78..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# adopted from -# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py -# and -# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py -# and -# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py -# -# thanks! - - -import math - -import numpy as np -import torch -import torch.nn as nn -from einops import repeat -from group_norm import GroupNormOpt -from torch._dynamo import disable -from torch.cuda.amp import custom_bwd, custom_fwd - - -def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): - if schedule == "linear": - betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 - - elif schedule == "cosine": - timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s - alphas = timesteps / (1 + cosine_s) * np.pi / 2 - alphas = torch.cos(alphas).pow(2) - alphas = alphas / alphas[0] - betas = 1 - alphas[1:] / alphas[:-1] - betas = np.clip(betas, a_min=0, a_max=0.999) - - elif schedule == "sqrt_linear": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) - elif schedule == "sqrt": - betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 - else: - raise ValueError(f"schedule '{schedule}' unknown.") - return betas.numpy() - - -def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): - if ddim_discr_method == "uniform": - c = num_ddpm_timesteps // num_ddim_timesteps - ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) - elif ddim_discr_method == "quad": - ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) - else: - raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') - - # assert ddim_timesteps.shape[0] == num_ddim_timesteps - # add one to get the final alpha values right (the ones from first scale to data during sampling) - steps_out = ddim_timesteps + 1 - if verbose: - print(f"Selected timesteps for ddim sampler: {steps_out}") - return steps_out - - -def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): - # select alphas for computing the variance schedule - alphas = alphacums[ddim_timesteps] - alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) - - # according the the formula provided in https://arxiv.org/abs/2010.02502 - variance = (1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev) - sigmas = eta * np.sqrt(variance) - if verbose: - print(f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}") - print( - f"For the chosen value of eta, which is {eta}, " - f"this results in the following sigma_t schedule for ddim sampler {sigmas}" - ) - return sigmas, alphas, alphas_prev, variance - - -def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): - """ - Create a beta schedule that discretizes the given alpha_t_bar function, - which defines the cumulative product of (1-beta) over time from t = [0,1]. - :param num_diffusion_timesteps: the number of betas to produce. - :param alpha_bar: a lambda that takes an argument t from 0 to 1 and - produces the cumulative product of (1-beta) up to that - part of the diffusion process. - :param max_beta: the maximum beta to use; use values lower than 1 to - prevent singularities. - """ - betas = [] - for i in range(num_diffusion_timesteps): - t1 = i / num_diffusion_timesteps - t2 = (i + 1) / num_diffusion_timesteps - betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) - return np.array(betas) - - -def extract_into_tensor(a, t, x_shape): - b, *_ = t.shape - out = a.gather(-1, t) - return out.reshape(b, *((1,) * (len(x_shape) - 1))) - - -def checkpoint(func, inputs, params, flag): - """ - Evaluate a function without caching intermediate activations, allowing for - reduced memory at the expense of extra compute in the backward pass. - :param func: the function to evaluate. - :param inputs: the argument sequence to pass to `func`. - :param params: a sequence of parameters `func` depends on but does not - explicitly take as arguments. - :param flag: if False, disable gradient checkpointing. - """ - if flag: - args = tuple(inputs) + tuple(params) - return CheckpointFunction.apply(func, len(inputs), *args) - else: - return func(*inputs) - - -class CheckpointFunction(torch.autograd.Function): - @staticmethod - @custom_fwd - def forward(ctx, run_function, length, *args): - ctx.run_function = run_function - ctx.input_tensors = list(args[:length]) - ctx.input_params = list(args[length:]) - - with torch.no_grad(): - output_tensors = ctx.run_function(*ctx.input_tensors) - return output_tensors - - @staticmethod - @custom_bwd - def backward(ctx, *output_grads): - ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] - with torch.enable_grad(): - # Fixes a bug where the first op in run_function modifies the - # Tensor storage in place, which is not allowed for detach()'d - # Tensors. - shallow_copies = [x.view_as(x) for x in ctx.input_tensors] - output_tensors = ctx.run_function(*shallow_copies) - input_grads = torch.autograd.grad( - output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True, - ) - del ctx.input_tensors - del ctx.input_params - del output_tensors - return (None, None) + input_grads - - -# Temporary hack to get rid of TorchDynamo issue with DDP -# TODO: remove this if https://github.com/pytorch/pytorch/issues/94574 fixed -@disable -def get_idx(end, device): - return torch.arange(start=0, end=end, dtype=torch.float32, device=device) - - -def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): - """ - Create sinusoidal timestep embeddings. - :param timesteps: a 1-D Tensor of N indices, one per batch element. - These may be fractional. - :param dim: the dimension of the output. - :param max_period: controls the minimum frequency of the embeddings. - :return: an [N x dim] Tensor of positional embeddings. - """ - if not repeat_only: - half = dim // 2 - idx = get_idx(half, timesteps.device) - freqs = torch.exp(-math.log(max_period) / half * idx) - args = timesteps[:, None].float() * freqs[None] - embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) - if dim % 2: - embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) - else: - embedding = repeat(timesteps, "b -> b d", d=dim) - return embedding - - -def zero_module(module): - """ - Zero out the parameters of a module and return it. - """ - for p in module.parameters(): - p.detach().zero_() - return module - - -def scale_module(module, scale): - """ - Scale the parameters of a module and return it. - """ - for p in module.parameters(): - p.detach().mul_(scale) - return module - - -def mean_flat(tensor): - """ - Take the mean over all non-batch dimensions. - """ - return tensor.mean(dim=list(range(1, len(tensor.shape)))) - - -def normalization(channels, act=""): - """ - Make a standard normalization layer. - :param channels: number of input channels. - :return: an nn.Module for normalization. - """ - return GroupNormOpt(32, channels, act=act) - - -# PyTorch 1.7 has SiLU, but we support PyTorch 1.5. -class SiLU(nn.Module): - def forward(self, x): - return x * torch.sigmoid(x) - - -class GroupNorm32(nn.GroupNorm): - def forward(self, x): - return super().forward(x.float()).type(x.dtype) - - -def conv_nd(dims, *args, **kwargs): - """ - Create a 1D, 2D, or 3D convolution module. - """ - if dims == 1: - return nn.Conv1d(*args, **kwargs) - elif dims == 2: - return nn.Conv2d(*args, **kwargs) - elif dims == 3: - return nn.Conv3d(*args, **kwargs) - raise ValueError(f"unsupported dimensions: {dims}") - - -def linear(*args, **kwargs): - """ - Create a linear module. - """ - return nn.Linear(*args, **kwargs) - - -def avg_pool_nd(dims, *args, **kwargs): - """ - Create a 1D, 2D, or 3D average pooling module. - """ - if dims == 1: - return nn.AvgPool1d(*args, **kwargs) - elif dims == 2: - return nn.AvgPool2d(*args, **kwargs) - elif dims == 3: - return nn.AvgPool3d(*args, **kwargs) - raise ValueError(f"unsupported dimensions: {dims}") - - -def noise_like(shape, device, repeat=False): - repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) - noise = lambda: torch.randn(shape, device=device) - return repeat_noise() if repeat else noise() - - -def interpolate_fn(x, xp, yp): - """ - A piecewise linear function y = f(x), using xp and yp as keypoints. - """ - N, K = x.shape[0], xp.shape[1] - all_x = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((N, 1, 1))], dim=2) - sorted_all_x, x_indices = torch.sort(all_x, dim=2) - x_idx = torch.argmin(x_indices, dim=2) - cand_start_idx = x_idx - 1 - start_idx = torch.where( - torch.eq(x_idx, 0), - torch.tensor(1, device=x.device), - torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), - ) - end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) - start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) - end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) - start_idx2 = torch.where( - torch.eq(x_idx, 0), - torch.tensor(0, device=x.device), - torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), - ) - y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) - start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) - end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) - cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) - return cand - - -def expand_dims(v, dims): - """ - Expand the tensor `v` to the dim `dims`. - """ - return v[(...,) + (None,) * (dims - 1)] diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py deleted file mode 100644 index 81d79ac5801a..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import torch - - -class AbstractDistribution: - def sample(self): - raise NotImplementedError() - - def mode(self): - raise NotImplementedError() - - -class DiracDistribution(AbstractDistribution): - def __init__(self, value): - self.value = value - - def sample(self): - return self.value - - def mode(self): - return self.value - - -class DiagonalGaussianDistribution(object): - def __init__(self, parameters, deterministic=False): - self.parameters = parameters - self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) - self.logvar = torch.clamp(self.logvar, -30.0, 20.0) - self.deterministic = deterministic - self.std = torch.exp(0.5 * self.logvar) - self.var = torch.exp(self.logvar) - if self.deterministic: - self.var = self.std = torch.zeros_like(self.mean, device=self.parameters.device) - - def sample(self): - x = self.mean + self.std * torch.randn(self.mean.shape, device=self.parameters.device) - return x - - def kl(self, other=None): - if self.deterministic: - return torch.Tensor([0.0]) - else: - if other is None: - return 0.5 * torch.sum(torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, dim=[1, 2, 3]) - else: - return 0.5 * torch.sum( - torch.pow(self.mean - other.mean, 2) / other.var - + self.var / other.var - - 1.0 - - self.logvar - + other.logvar, - dim=[1, 2, 3], - ) - - def nll(self, sample, dims=[1, 2, 3]): - if self.deterministic: - return torch.Tensor([0.0]) - logtwopi = np.log(2.0 * np.pi) - return 0.5 * torch.sum(logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, dim=dims) - - def mode(self): - return self.mean - - -def normal_kl(mean1, logvar1, mean2, logvar2): - """ - source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 - Compute the KL divergence between two gaussians. - Shapes are automatically broadcasted, so batches can be compared to - scalars, among other use cases. - """ - tensor = None - for obj in (mean1, logvar1, mean2, logvar2): - if isinstance(obj, torch.Tensor): - tensor = obj - break - assert tensor is not None, "at least one argument must be a Tensor" - - # Force variances to be Tensors. Broadcasting helps convert scalars to - # Tensors, but it does not work for torch.exp(). - logvar1, logvar2 = [x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) for x in (logvar1, logvar2)] - - return 0.5 * ( - -1.0 + logvar2 - logvar1 + torch.exp(logvar1 - logvar2) + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) - ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py deleted file mode 100644 index 7b8f3c38d53f..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py +++ /dev/null @@ -1,471 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import tempfile -from functools import partial - -import kornia -import open_clip -import torch -import torch.nn as nn -from einops import rearrange, repeat -from omegaconf import OmegaConf -from torch.utils.checkpoint import checkpoint -from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer -from transformers.models.clip.modeling_clip import CLIPTextTransformer - -from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns -from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPModel -from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( - TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test -) -from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer -from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector -from nemo.utils import logging - -try: - from megatron.core import ModelParallelConfig, parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - ModelParallelConfig = ApexGuardDefaults - - HAVE_MEGATRON_CORE = False - - -class AbstractEncoder(nn.Module): - def __init__(self): - super().__init__() - - def encode(self, *args, **kwargs): - raise NotImplementedError - - -class ClassEmbedder(nn.Module): - def __init__(self, embed_dim, n_classes=1000, key='class'): - super().__init__() - self.key = key - self.embedding = nn.Embedding(n_classes, embed_dim) - - def forward(self, batch, key=None): - if key is None: - key = self.key - # this is for use in crossattn - c = batch[key][:, None] - c = self.embedding(c) - return c - - -class TransformerEmbedder(AbstractEncoder): - """Some transformer encoder layers""" - - def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"): - super().__init__() - self.device = device - self.transformer = TransformerWrapper( - num_tokens=vocab_size, max_seq_len=max_seq_len, attn_layers=Encoder(dim=n_embed, depth=n_layer) - ) - - def forward(self, tokens): - tokens = tokens.to(self.device) # meh - z = self.transformer(tokens, return_embeddings=True) - return z - - def encode(self, x): - return self(x) - - -class BERTTokenizer(AbstractEncoder): - """ Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)""" - - def __init__(self, device="cuda", vq_interface=True, max_length=77): - super().__init__() - from transformers import BertTokenizerFast # TODO: add to reuquirements - - self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") - self.device = device - self.vq_interface = vq_interface - self.max_length = max_length - - def forward(self, text): - batch_encoding = self.tokenizer( - text, - truncation=True, - max_length=self.max_length, - return_length=True, - return_overflowing_tokens=False, - padding="max_length", - return_tensors="pt", - ) - tokens = batch_encoding["input_ids"].to(self.device) - return tokens - - @torch.no_grad() - def encode(self, text): - tokens = self(text) - if not self.vq_interface: - return tokens - return None, None, [None, None, tokens] - - def decode(self, text): - return text - - -class BERTEmbedder(AbstractEncoder): - """Uses the BERT tokenizr model and add some transformer encoder layers""" - - def __init__( - self, - n_embed, - n_layer, - vocab_size=30522, - max_seq_len=77, - device="cuda", - use_tokenizer=True, - embedding_dropout=0.0, - ): - super().__init__() - self.use_tknz_fn = use_tokenizer - if self.use_tknz_fn: - self.tknz_fn = BERTTokenizer(vq_interface=False, max_length=max_seq_len) - self.device = device - self.transformer = TransformerWrapper( - num_tokens=vocab_size, - max_seq_len=max_seq_len, - attn_layers=Encoder(dim=n_embed, depth=n_layer), - emb_dropout=embedding_dropout, - ) - - def forward(self, text): - if self.use_tknz_fn: - tokens = self.tknz_fn(text) # .to(self.device) - else: - tokens = text - z = self.transformer(tokens, return_embeddings=True) - return z - - def encode(self, text): - # output of length 77 - return self(text) - - -class SpatialRescaler(nn.Module): - def __init__(self, n_stages=1, method='bilinear', multiplier=0.5, in_channels=3, out_channels=None, bias=False): - super().__init__() - self.n_stages = n_stages - assert self.n_stages >= 0 - assert method in ['nearest', 'linear', 'bilinear', 'trilinear', 'bicubic', 'area'] - self.multiplier = multiplier - self.interpolator = partial(torch.nn.functional.interpolate, mode=method) - self.remap_output = out_channels is not None - if self.remap_output: - print(f'Spatial Rescaler mapping from {in_channels} to {out_channels} channels after resizing.') - self.channel_mapper = nn.Conv2d(in_channels, out_channels, 1, bias=bias) - - def forward(self, x): - for stage in range(self.n_stages): - x = self.interpolator(x, scale_factor=self.multiplier) - - if self.remap_output: - x = self.channel_mapper(x) - return x - - def encode(self, x): - return self(x) - - -class FrozenCLIPEmbedder(AbstractEncoder): - """Uses the CLIP transformer encoder for text (from Hugging Face)""" - - def __init__( - self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, capture_cudagraph_iters: int = -1 - ): - super().__init__() - self.tokenizer = CLIPTokenizer.from_pretrained(version) - self.transformer = CLIPTextModel.from_pretrained(version) - self.device = device - self.max_length = max_length - self.freeze() - - # CUDA graph captured sub-modules - self.capture_cudagraph_iters = capture_cudagraph_iters - self.iterations = 0 - self.stream = torch.cuda.Stream() - self.transformer_graph = torch.cuda.CUDAGraph() - self.static_tokens = None - self.static_outputs = None - - def freeze(self): - self.transformer = self.transformer.eval() - for param in self.parameters(): - param.requires_grad = False - - def forward(self, text): - batch_encoding = self.tokenizer( - text, - truncation=True, - max_length=self.max_length, - return_length=True, - return_overflowing_tokens=False, - padding="max_length", - return_tensors="pt", - ) - if self.capture_cudagraph_iters < 0: - tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) - outputs = self.transformer(input_ids=tokens) - z = outputs.last_hidden_state - - else: - if self.static_tokens is None: - self.static_tokens = batch_encoding["input_ids"].to(device=self.device, non_blocking=True) - self.static_tokens.copy_(batch_encoding["input_ids"], non_blocking=True) - - if self.iterations == self.capture_cudagraph_iters: - # cuda graph capture - logging.info("Capturing CUDA graph for module: %s", self.transformer.__class__.__name__) - with torch.cuda.graph(self.transformer_graph): - self.static_outputs = self.transformer(input_ids=self.static_tokens) - - if 0 <= self.capture_cudagraph_iters <= self.iterations: - # cuda graph replay - self.transformer_graph.replay() - else: - # warmup - self.stream.wait_stream(torch.cuda.current_stream()) - with torch.cuda.stream(self.stream): - self.static_outputs = self.transformer(input_ids=self.static_tokens) - torch.cuda.current_stream().wait_stream(self.stream) - self.iterations += 1 - z = self.static_outputs.last_hidden_state - - # # Pad the seq length to multiple of 8 - seq_len = (z.shape[1] + 8 - 1) // 8 * 8 - z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) - return z - - def encode(self, text): - return self(text) - - -class FrozenOpenCLIPEmbedder(AbstractEncoder): - """ - Uses the OpenCLIP transformer encoder for text - """ - - LAYERS = [ - # "pooled", - "last", - "penultimate", - ] - - def __init__( - self, - arch="ViT-H-14", - version="laion2b_s32b_b79k", - device="cuda", - max_length=77, - freeze=True, - layer="last", - use_fp16=False, - ): - super().__init__() - assert layer in self.LAYERS - model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version) - del model.visual - self.model = model - - self.device = device - self.max_length = max_length - if freeze: - self.freeze() - self.layer = layer - if self.layer == "last": - self.layer_idx = 0 - elif self.layer == "penultimate": - self.layer_idx = 1 - else: - raise NotImplementedError() - - def freeze(self): - self.model = self.model.eval() - for param in self.parameters(): - param.requires_grad = False - - def forward(self, text): - tokens = open_clip.tokenize(text) - z = self.encode_with_transformer(tokens.to(self.device)) - return z - - def encode_with_transformer(self, text): - x = self.model.token_embedding(text) # [batch_size, n_ctx, d_model] - x = x + self.model.positional_embedding - x = x.permute(1, 0, 2) # NLD -> LND - x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) - x = x.permute(1, 0, 2) # LND -> NLD - x = self.model.ln_final(x) - return x - - def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): - for i, r in enumerate(self.model.transformer.resblocks): - if i == len(self.model.transformer.resblocks) - self.layer_idx: - break - if self.model.transformer.grad_checkpointing and not torch.jit.is_scripting(): - x = checkpoint(r, x, attn_mask) - else: - x = r(x, attn_mask=attn_mask) - return x - - def encode(self, text): - return self(text) - - -class FrozenMegatronCLIPEmbedder(AbstractEncoder): - def __init__(self, restore_from_path, device="cuda", layer="last", freeze=True, cfg=None, use_fp16=False): - super().__init__() - if restore_from_path is not None: - cfg, state_dict = self.load_config_and_state_from_nemo(restore_from_path) - elif cfg is not None: - state_dict = None - else: - raise ValueError("Either restore_from_path or cfg should not be None") - - self.cfg = cfg - self.build_tokenizer(cfg) - self.load_model(cfg, state_dict) - - self.device = device - if freeze: - self.freeze() - self.layer = layer - if self.layer == "last": - self.layer_idx = 0 - elif self.layer == "penultimate": - self.layer_idx = 1 - else: - raise NotImplementedError() - - def freeze(self): - self.model = self.model.eval() - for param in self.parameters(): - param.requires_grad = False - - def load_config_and_state_from_nemo(self, nemo_path): - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - - # Change current working directory to - os.chdir(tmpdir) - config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) - cfg = OmegaConf.load(config_yaml) - - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - return cfg, state_dict - - def build_tokenizer(self, cfg): - legacy = cfg.tokenizer.sentencepiece_legacy - self.tokenizer = get_nmt_tokenizer( - library=cfg.tokenizer.library, - model_name=cfg.tokenizer.type, - tokenizer_model=cfg.tokenizer.model, - vocab_file=cfg.tokenizer.vocab_file, - merges_file=cfg.tokenizer.merge_file, - delimiter=cfg.tokenizer.get('delimiter', None), - legacy=legacy, - ) - - _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) - self.max_length = cfg.text.get("max_position_embeddings") - - def load_model(self, cfg, state_dict): - padded_vocab_size = self._vocab_size_with_padding( - orig_vocab_size=self.tokenizer.vocab_size, - make_vocab_size_divisible_by=cfg.get('make_vocab_size_divisible_by', 128), - tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), - ) - model = CLIPModel( - model_cfg=cfg, - model_parallel_config=ModelParallelConfig(), - padded_vocab_size=padded_vocab_size, - pre_process=cfg.text.pre_process, - post_process=cfg.text.post_process, - ) - - if state_dict is not None: - clip_state_dict = {} - for key, value in state_dict.items(): - key = key[6:] - clip_state_dict[key] = value - model.load_state_dict(clip_state_dict) - - del model.vision_encoder - self.model = model.text_encoder - - def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size): - after = orig_vocab_size - multiple = make_vocab_size_divisible_by * tensor_model_parallel_size - while (after % multiple) != 0: - after += 1 - return after - - def forward(self, text): - texts = self.text_transform(text) - z = self.encode_with_transformer(texts.to(self.device)) - # # Pad the seq length to multiple of 8 - seq_len = (z.shape[1] + 8 - 1) // 8 * 8 - z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) - return z - - def encode_with_transformer(self, text): - x = self.model.language_model.embedding.word_embeddings(text) - x += self.model.language_model.embedding.position_embeddings - x = x.permute(1, 0, 2) # NLD -> LND - x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) - x = self.model.language_model.encoder.final_layernorm(x) - x = x.permute(1, 0, 2) # LND -> NLD - return x - - def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): - for i, r in enumerate(self.model.language_model.encoder.layers): - if i == len(self.model.language_model.encoder.layers) - self.layer_idx: - break - x = r(x, attn_mask) - return x - - def encode(self, text): - return self(text) - - -if __name__ == "__main__": - from ldm.util import count_params - - model = FrozenCLIPEmbedder() - count_params(model, verbose=True) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py deleted file mode 100644 index ff47596f9932..000000000000 --- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py +++ /dev/null @@ -1,629 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""shout-out to https://github.com/lucidrains/x-transformers/tree/main/x_transformers""" -from collections import namedtuple -from functools import partial -from inspect import isfunction - -import torch -import torch.nn.functional as F -from einops import rearrange, reduce, repeat -from torch import einsum, nn - -# constants - -DEFAULT_DIM_HEAD = 64 - -Intermediates = namedtuple('Intermediates', ['pre_softmax_attn', 'post_softmax_attn']) - -LayerIntermediates = namedtuple('Intermediates', ['hiddens', 'attn_intermediates']) - - -class AbsolutePositionalEmbedding(nn.Module): - def __init__(self, dim, max_seq_len): - super().__init__() - self.emb = nn.Embedding(max_seq_len, dim) - self.init_() - - def init_(self): - nn.init.normal_(self.emb.weight, std=0.02) - - def forward(self, x): - n = torch.arange(x.shape[1], device=x.device) - return self.emb(n)[None, :, :] - - -class FixedPositionalEmbedding(nn.Module): - def __init__(self, dim): - super().__init__() - inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) - self.register_buffer('inv_freq', inv_freq) - - def forward(self, x, seq_dim=1, offset=0): - t = torch.arange(x.shape[seq_dim], device=x.device).type_as(self.inv_freq) + offset - sinusoid_inp = torch.einsum('i , j -> i j', t, self.inv_freq) - emb = torch.cat((sinusoid_inp.sin(), sinusoid_inp.cos()), dim=-1) - return emb[None, :, :] - - -# helpers - - -def exists(val): - return val is not None - - -def default(val, d): - if exists(val): - return val - return d() if isfunction(d) else d - - -def always(val): - def inner(*args, **kwargs): - return val - - return inner - - -def not_equals(val): - def inner(x): - return x != val - - return inner - - -def equals(val): - def inner(x): - return x == val - - return inner - - -def max_neg_value(tensor): - return -torch.finfo(tensor.dtype).max - - -# keyword argument helpers - - -def pick_and_pop(keys, d): - values = list(map(lambda key: d.pop(key), keys)) - return dict(zip(keys, values)) - - -def group_dict_by_key(cond, d): - return_val = [dict(), dict()] - for key in d.keys(): - match = bool(cond(key)) - ind = int(not match) - return_val[ind][key] = d[key] - return (*return_val,) - - -def string_begins_with(prefix, str): - return str.startswith(prefix) - - -def group_by_key_prefix(prefix, d): - return group_dict_by_key(partial(string_begins_with, prefix), d) - - -def groupby_prefix_and_trim(prefix, d): - kwargs_with_prefix, kwargs = group_dict_by_key(partial(string_begins_with, prefix), d) - kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix) :], x[1]), tuple(kwargs_with_prefix.items()))) - return kwargs_without_prefix, kwargs - - -# classes -class Scale(nn.Module): - def __init__(self, value, fn): - super().__init__() - self.value = value - self.fn = fn - - def forward(self, x, **kwargs): - x, *rest = self.fn(x, **kwargs) - return (x * self.value, *rest) - - -class Rezero(nn.Module): - def __init__(self, fn): - super().__init__() - self.fn = fn - self.g = nn.Parameter(torch.zeros(1)) - - def forward(self, x, **kwargs): - x, *rest = self.fn(x, **kwargs) - return (x * self.g, *rest) - - -class ScaleNorm(nn.Module): - def __init__(self, dim, eps=1e-5): - super().__init__() - self.scale = dim ** -0.5 - self.eps = eps - self.g = nn.Parameter(torch.ones(1)) - - def forward(self, x): - norm = torch.norm(x, dim=-1, keepdim=True) * self.scale - return x / norm.clamp(min=self.eps) * self.g - - -class RMSNorm(nn.Module): - def __init__(self, dim, eps=1e-8): - super().__init__() - self.scale = dim ** -0.5 - self.eps = eps - self.g = nn.Parameter(torch.ones(dim)) - - def forward(self, x): - norm = torch.norm(x, dim=-1, keepdim=True) * self.scale - return x / norm.clamp(min=self.eps) * self.g - - -class Residual(nn.Module): - def forward(self, x, residual): - return x + residual - - -class GRUGating(nn.Module): - def __init__(self, dim): - super().__init__() - self.gru = nn.GRUCell(dim, dim) - - def forward(self, x, residual): - gated_output = self.gru(rearrange(x, 'b n d -> (b n) d'), rearrange(residual, 'b n d -> (b n) d')) - - return gated_output.reshape_as(x) - - -# feedforward - - -class GEGLU(nn.Module): - def __init__(self, dim_in, dim_out): - super().__init__() - self.proj = nn.Linear(dim_in, dim_out * 2) - - def forward(self, x): - x, gate = self.proj(x).chunk(2, dim=-1) - return x * F.gelu(gate) - - -class FeedForward(nn.Module): - def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): - super().__init__() - inner_dim = int(dim * mult) - dim_out = default(dim_out, dim) - project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) - - self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) - - def forward(self, x): - return self.net(x) - - -# attention. -class Attention(nn.Module): - def __init__( - self, - dim, - dim_head=DEFAULT_DIM_HEAD, - heads=8, - causal=False, - mask=None, - talking_heads=False, - sparse_topk=None, - use_entmax15=False, - num_mem_kv=0, - dropout=0.0, - on_attn=False, - ): - super().__init__() - if use_entmax15: - raise NotImplementedError("Check out entmax activation instead of softmax activation!") - self.scale = dim_head ** -0.5 - self.heads = heads - self.causal = causal - self.mask = mask - - inner_dim = dim_head * heads - - self.to_q = nn.Linear(dim, inner_dim, bias=False) - self.to_k = nn.Linear(dim, inner_dim, bias=False) - self.to_v = nn.Linear(dim, inner_dim, bias=False) - self.dropout = nn.Dropout(dropout) - - # talking heads - self.talking_heads = talking_heads - if talking_heads: - self.pre_softmax_proj = nn.Parameter(torch.randn(heads, heads)) - self.post_softmax_proj = nn.Parameter(torch.randn(heads, heads)) - - # explicit topk sparse attention - self.sparse_topk = sparse_topk - - # entmax - # self.attn_fn = entmax15 if use_entmax15 else F.softmax - self.attn_fn = F.softmax - - # add memory key / values - self.num_mem_kv = num_mem_kv - if num_mem_kv > 0: - self.mem_k = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) - self.mem_v = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) - - # attention on attention - self.attn_on_attn = on_attn - self.to_out = nn.Sequential(nn.Linear(inner_dim, dim * 2), nn.GLU()) if on_attn else nn.Linear(inner_dim, dim) - - def forward( - self, - x, - context=None, - mask=None, - context_mask=None, - rel_pos=None, - sinusoidal_emb=None, - prev_attn=None, - mem=None, - ): - b, n, _, h, talking_heads, device = *x.shape, self.heads, self.talking_heads, x.device - kv_input = default(context, x) - - q_input = x - k_input = kv_input - v_input = kv_input - - if exists(mem): - k_input = torch.cat((mem, k_input), dim=-2) - v_input = torch.cat((mem, v_input), dim=-2) - - if exists(sinusoidal_emb): - # in shortformer, the query would start at a position offset depending on the past cached memory - offset = k_input.shape[-2] - q_input.shape[-2] - q_input = q_input + sinusoidal_emb(q_input, offset=offset) - k_input = k_input + sinusoidal_emb(k_input) - - q = self.to_q(q_input) - k = self.to_k(k_input) - v = self.to_v(v_input) - - q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), (q, k, v)) - - input_mask = None - if any(map(exists, (mask, context_mask))): - q_mask = default(mask, lambda: torch.ones((b, n), device=device).bool()) - k_mask = q_mask if not exists(context) else context_mask - k_mask = default(k_mask, lambda: torch.ones((b, k.shape[-2]), device=device).bool()) - q_mask = rearrange(q_mask, 'b i -> b () i ()') - k_mask = rearrange(k_mask, 'b j -> b () () j') - input_mask = q_mask * k_mask - - if self.num_mem_kv > 0: - mem_k, mem_v = map(lambda t: repeat(t, 'h n d -> b h n d', b=b), (self.mem_k, self.mem_v)) - k = torch.cat((mem_k, k), dim=-2) - v = torch.cat((mem_v, v), dim=-2) - if exists(input_mask): - input_mask = F.pad(input_mask, (self.num_mem_kv, 0), value=True) - - dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale - mask_value = max_neg_value(dots) - - if exists(prev_attn): - dots = dots + prev_attn - - pre_softmax_attn = dots - - if talking_heads: - dots = einsum('b h i j, h k -> b k i j', dots, self.pre_softmax_proj).contiguous() - - if exists(rel_pos): - dots = rel_pos(dots) - - if exists(input_mask): - dots.masked_fill_(~input_mask, mask_value) - del input_mask - - if self.causal: - i, j = dots.shape[-2:] - r = torch.arange(i, device=device) - mask = rearrange(r, 'i -> () () i ()') < rearrange(r, 'j -> () () () j') - mask = F.pad(mask, (j - i, 0), value=False) - dots.masked_fill_(mask, mask_value) - del mask - - if exists(self.sparse_topk) and self.sparse_topk < dots.shape[-1]: - top, _ = dots.topk(self.sparse_topk, dim=-1) - vk = top[..., -1].unsqueeze(-1).expand_as(dots) - mask = dots < vk - dots.masked_fill_(mask, mask_value) - del mask - - attn = self.attn_fn(dots, dim=-1) - post_softmax_attn = attn - - attn = self.dropout(attn) - - if talking_heads: - attn = einsum('b h i j, h k -> b k i j', attn, self.post_softmax_proj).contiguous() - - out = einsum('b h i j, b h j d -> b h i d', attn, v) - out = rearrange(out, 'b h n d -> b n (h d)') - - intermediates = Intermediates(pre_softmax_attn=pre_softmax_attn, post_softmax_attn=post_softmax_attn) - - return self.to_out(out), intermediates - - -class AttentionLayers(nn.Module): - def __init__( - self, - dim, - depth, - heads=8, - causal=False, - cross_attend=False, - only_cross=False, - use_scalenorm=False, - use_rmsnorm=False, - use_rezero=False, - rel_pos_num_buckets=32, - rel_pos_max_distance=128, - position_infused_attn=False, - custom_layers=None, - sandwich_coef=None, - par_ratio=None, - residual_attn=False, - cross_residual_attn=False, - macaron=False, - pre_norm=True, - gate_residual=False, - **kwargs, - ): - super().__init__() - ff_kwargs, kwargs = groupby_prefix_and_trim('ff_', kwargs) - attn_kwargs, _ = groupby_prefix_and_trim('attn_', kwargs) - - dim_head = attn_kwargs.get('dim_head', DEFAULT_DIM_HEAD) - - self.dim = dim - self.depth = depth - self.layers = nn.ModuleList([]) - - self.has_pos_emb = position_infused_attn - self.pia_pos_emb = FixedPositionalEmbedding(dim) if position_infused_attn else None - self.rotary_pos_emb = always(None) - - assert ( - rel_pos_num_buckets <= rel_pos_max_distance - ), 'number of relative position buckets must be less than the relative position max distance' - self.rel_pos = None - - self.pre_norm = pre_norm - - self.residual_attn = residual_attn - self.cross_residual_attn = cross_residual_attn - - norm_class = ScaleNorm if use_scalenorm else nn.LayerNorm - norm_class = RMSNorm if use_rmsnorm else norm_class - norm_fn = partial(norm_class, dim) - - norm_fn = nn.Identity if use_rezero else norm_fn - branch_fn = Rezero if use_rezero else None - - if cross_attend and not only_cross: - default_block = ('a', 'c', 'f') - elif cross_attend and only_cross: - default_block = ('c', 'f') - else: - default_block = ('a', 'f') - - if macaron: - default_block = ('f',) + default_block - - if exists(custom_layers): - layer_types = custom_layers - elif exists(par_ratio): - par_depth = depth * len(default_block) - assert 1 < par_ratio <= par_depth, 'par ratio out of range' - default_block = tuple(filter(not_equals('f'), default_block)) - par_attn = par_depth // par_ratio - depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper - par_width = (depth_cut + depth_cut // par_attn) // par_attn - assert len(default_block) <= par_width, 'default block is too large for par_ratio' - par_block = default_block + ('f',) * (par_width - len(default_block)) - par_head = par_block * par_attn - layer_types = par_head + ('f',) * (par_depth - len(par_head)) - elif exists(sandwich_coef): - assert sandwich_coef > 0 and sandwich_coef <= depth, 'sandwich coefficient should be less than the depth' - layer_types = ('a',) * sandwich_coef + default_block * (depth - sandwich_coef) + ('f',) * sandwich_coef - else: - layer_types = default_block * depth - - self.layer_types = layer_types - self.num_attn_layers = len(list(filter(equals('a'), layer_types))) - - for layer_type in self.layer_types: - if layer_type == 'a': - layer = Attention(dim, heads=heads, causal=causal, **attn_kwargs) - elif layer_type == 'c': - layer = Attention(dim, heads=heads, **attn_kwargs) - elif layer_type == 'f': - layer = FeedForward(dim, **ff_kwargs) - layer = layer if not macaron else Scale(0.5, layer) - else: - raise Exception(f'invalid layer type {layer_type}') - - if isinstance(layer, Attention) and exists(branch_fn): - layer = branch_fn(layer) - - if gate_residual: - residual_fn = GRUGating(dim) - else: - residual_fn = Residual() - - self.layers.append(nn.ModuleList([norm_fn(), layer, residual_fn])) - - def forward(self, x, context=None, mask=None, context_mask=None, mems=None, return_hiddens=False): - hiddens = [] - intermediates = [] - prev_attn = None - prev_cross_attn = None - - mems = mems.copy() if exists(mems) else [None] * self.num_attn_layers - - for ind, (layer_type, (norm, block, residual_fn)) in enumerate(zip(self.layer_types, self.layers)): - is_last = ind == (len(self.layers) - 1) - - if layer_type == 'a': - hiddens.append(x) - layer_mem = mems.pop(0) - - residual = x - - if self.pre_norm: - x = norm(x) - - if layer_type == 'a': - out, inter = block( - x, - mask=mask, - sinusoidal_emb=self.pia_pos_emb, - rel_pos=self.rel_pos, - prev_attn=prev_attn, - mem=layer_mem, - ) - elif layer_type == 'c': - out, inter = block(x, context=context, mask=mask, context_mask=context_mask, prev_attn=prev_cross_attn) - elif layer_type == 'f': - out = block(x) - - x = residual_fn(out, residual) - - if layer_type in ('a', 'c'): - intermediates.append(inter) - - if layer_type == 'a' and self.residual_attn: - prev_attn = inter.pre_softmax_attn - elif layer_type == 'c' and self.cross_residual_attn: - prev_cross_attn = inter.pre_softmax_attn - - if not self.pre_norm and not is_last: - x = norm(x) - - if return_hiddens: - intermediates = LayerIntermediates(hiddens=hiddens, attn_intermediates=intermediates) - - return x, intermediates - - return x - - -class Encoder(AttentionLayers): - def __init__(self, **kwargs): - assert 'causal' not in kwargs, 'cannot set causality on encoder' - super().__init__(causal=False, **kwargs) - - -class TransformerWrapper(nn.Module): - def __init__( - self, - *, - num_tokens, - max_seq_len, - attn_layers, - emb_dim=None, - max_mem_len=0.0, - emb_dropout=0.0, - num_memory_tokens=None, - tie_embedding=False, - use_pos_emb=True, - ): - super().__init__() - assert isinstance(attn_layers, AttentionLayers), 'attention layers must be one of Encoder or Decoder' - - dim = attn_layers.dim - emb_dim = default(emb_dim, dim) - - self.max_seq_len = max_seq_len - self.max_mem_len = max_mem_len - self.num_tokens = num_tokens - - self.token_emb = nn.Embedding(num_tokens, emb_dim) - self.pos_emb = ( - AbsolutePositionalEmbedding(emb_dim, max_seq_len) - if (use_pos_emb and not attn_layers.has_pos_emb) - else always(0) - ) - self.emb_dropout = nn.Dropout(emb_dropout) - - self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity() - self.attn_layers = attn_layers - self.norm = nn.LayerNorm(dim) - - self.init_() - - self.to_logits = nn.Linear(dim, num_tokens) if not tie_embedding else lambda t: t @ self.token_emb.weight.t() - - # memory tokens (like [cls]) from Memory Transformers paper - num_memory_tokens = default(num_memory_tokens, 0) - self.num_memory_tokens = num_memory_tokens - if num_memory_tokens > 0: - self.memory_tokens = nn.Parameter(torch.randn(num_memory_tokens, dim)) - - # let funnel encoder know number of memory tokens, if specified - if hasattr(attn_layers, 'num_memory_tokens'): - attn_layers.num_memory_tokens = num_memory_tokens - - def init_(self): - nn.init.normal_(self.token_emb.weight, std=0.02) - - def forward( - self, x, return_embeddings=False, mask=None, return_mems=False, return_attn=False, mems=None, **kwargs - ): - b, n, device, num_mem = *x.shape, x.device, self.num_memory_tokens - x = self.token_emb(x) - x += self.pos_emb(x) - x = self.emb_dropout(x) - - x = self.project_emb(x) - - if num_mem > 0: - mem = repeat(self.memory_tokens, 'n d -> b n d', b=b) - x = torch.cat((mem, x), dim=1) - - # auto-handle masking after appending memory tokens - if exists(mask): - mask = F.pad(mask, (num_mem, 0), value=True) - - x, intermediates = self.attn_layers(x, mask=mask, mems=mems, return_hiddens=True, **kwargs) - x = self.norm(x) - - mem, x = x[:, :num_mem], x[:, num_mem:] - - out = self.to_logits(x) if not return_embeddings else x - - if return_mems: - hiddens = intermediates.hiddens - new_mems = list(map(lambda pair: torch.cat(pair, dim=-2), zip(mems, hiddens))) if exists(mems) else hiddens - new_mems = list(map(lambda t: t[..., -self.max_mem_len :, :].detach(), new_mems)) - return out, new_mems - - if return_attn: - attn_maps = list(map(lambda t: t.post_softmax_attn, intermediates.attn_intermediates)) - return out, attn_maps - - return out diff --git a/nemo/collections/multimodal/parts/__init__.py b/nemo/collections/multimodal/parts/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/parts/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/parts/imagen/__init__.py b/nemo/collections/multimodal/parts/imagen/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/parts/imagen/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/parts/imagen/utils.py b/nemo/collections/multimodal/parts/imagen/utils.py deleted file mode 100644 index 565b1ed6a2b4..000000000000 --- a/nemo/collections/multimodal/parts/imagen/utils.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch - - -def random_dropout(embeddings, drop_rate): - r""" - Function to perform random dropout for embeddings. - When we drop embeddings, we zero them out. - Args: - embeddings (tensor): Input embeddings - drop_rate (float): Rate of dropping the embedding. - """ - nsamples = embeddings.shape[0] - zero_flag = torch.ones(nsamples, 1, 1).to(embeddings.dtype) * (1 - drop_rate) - zero_flag = torch.bernoulli(zero_flag).cuda() - embeddings = embeddings * zero_flag - return embeddings diff --git a/nemo/collections/multimodal/parts/stable_diffusion/__init__.py b/nemo/collections/multimodal/parts/stable_diffusion/__init__.py deleted file mode 100644 index 4fc50543f1d2..000000000000 --- a/nemo/collections/multimodal/parts/stable_diffusion/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py deleted file mode 100644 index 620d1dcad41a..000000000000 --- a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np - - -class LambdaWarmUpCosineScheduler: - """ - note: use with a base_lr of 1.0 - """ - - def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0): - self.lr_warm_up_steps = warm_up_steps - self.lr_start = lr_start - self.lr_min = lr_min - self.lr_max = lr_max - self.lr_max_decay_steps = max_decay_steps - self.last_lr = 0.0 - self.verbosity_interval = verbosity_interval - - def schedule(self, n, **kwargs): - if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: - print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") - if n < self.lr_warm_up_steps: - lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start - self.last_lr = lr - return lr - else: - t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) - t = min(t, 1.0) - lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (1 + np.cos(t * np.pi)) - self.last_lr = lr - return lr - - def __call__(self, n, **kwargs): - return self.schedule(n, **kwargs) - - -class LambdaWarmUpCosineScheduler2: - """ - supports repeated iterations, configurable via lists - note: use with a base_lr of 1.0. - """ - - def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0): - assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths) - self.lr_warm_up_steps = warm_up_steps - self.f_start = f_start - self.f_min = f_min - self.f_max = f_max - self.cycle_lengths = cycle_lengths - self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths)) - self.last_f = 0.0 - self.verbosity_interval = verbosity_interval - - def find_in_interval(self, n): - interval = 0 - for cl in self.cum_cycles[1:]: - if n <= cl: - return interval - interval += 1 - - def schedule(self, n, **kwargs): - cycle = self.find_in_interval(n) - n = n - self.cum_cycles[cycle] - if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: - print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") - if n < self.lr_warm_up_steps[cycle]: - f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] - self.last_f = f - return f - else: - t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]) - t = min(t, 1.0) - f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (1 + np.cos(t * np.pi)) - self.last_f = f - return f - - def __call__(self, n, **kwargs): - return self.schedule(n, **kwargs) - - -class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): - def schedule(self, n, **kwargs): - cycle = self.find_in_interval(n) - n = n - self.cum_cycles[cycle] - if self.verbosity_interval > 0: - if n % self.verbosity_interval == 0: - print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") - - if n < self.lr_warm_up_steps[cycle]: - f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] - self.last_f = f - return f - else: - f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / ( - self.cycle_lengths[cycle] - ) - self.last_f = f - return f diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py deleted file mode 100644 index cdfd3c37300e..000000000000 --- a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import pickle -import time - -import torch -from PIL import Image - -from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.para_ddim import ParaDDIMSampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler -from nemo.collections.multimodal.models.stable_diffusion.samplers.sampler_dpm import DPMSolverSampler -from nemo.collections.multimodal.parts.stable_diffusion.utils import DataParallelWrapper - - -def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): - c = cond_stage_model.encode(batch_size * [prompt]) - if unconditional_guidance_scale != 1.0: - uc = cond_stage_model.encode(batch_size * [""]) - else: - uc = None - return c, uc - - -def initialize_sampler(model, sampler_type): - if sampler_type == 'DDIM': - sampler = DDIMSampler(model) - elif sampler_type == 'PLMS': - sampler = PLMSSampler(model) - elif sampler_type == 'DPM': - sampler = DPMSolverSampler(model) - elif sampler_type == 'PARA_DDIM': - sampler = ParaDDIMSampler(model) - else: - raise ValueError(f'Sampler {sampler_type} is not supported.') - return sampler - - -def decode_images(model, samples): - images = model.decode_first_stage(samples) - - images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) - - return images - - -def numpy_to_pil(images): - """ - Convert a numpy image or a batch of images to a PIL image. - """ - if images.ndim == 3: - images = images[None, ...] - images = (images * 255).round().astype("uint8") - pil_images = [Image.fromarray(image) for image in images] - - return pil_images - - -def torch_to_numpy(images): - numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] - return numpy_images - - -def pipeline(model, cfg, verbose=True, rng=None): - # setup default values for inference configs - unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) - batch_size = cfg.infer.get('num_images_per_prompt', 1) - prompts = cfg.infer.get('prompts', []) - height = cfg.infer.get('height', 512) - width = cfg.infer.get('width', 512) - downsampling_factor = cfg.infer.get('down_factor', 8) - sampler_type = cfg.infer.get('sampler_type', 'DDIM') - sampler_parallelism = cfg.infer.get('sampler_parallelism', 1) - sampler_tolerance = cfg.infer.get('sampler_tolerance', 0.1) - inference_steps = cfg.infer.get('inference_steps', 50) - output_type = cfg.infer.get('output_type', 'pil') - save_to_file = cfg.infer.get('save_to_file', True) - out_path = cfg.infer.get('out_path', '') - eta = cfg.infer.get('eta', 0) - num_devices = cfg.infer.get('devices', 1) - - if sampler_parallelism > 1: - if not sampler_type.startswith('PARA'): - raise ValueError('Parallel sampler is required when parallelism > 1') - if not num_devices > 1: - print("It is recommended to run parallel sampler with multiple GPUs") - - if num_devices > 1: - print(f"Running DataParallel model with {num_devices} GPUs.") - model.model.diffusion_model = DataParallelWrapper( - model.model.diffusion_model, device_ids=list(range(num_devices)) - ) - - # get autocast_dtype - if cfg.trainer.precision in ['bf16', 'bf16-mixed']: - autocast_dtype = torch.bfloat16 - elif cfg.trainer.precision in [32, '32', '32-true']: - autocast_dtype = torch.float - elif cfg.trainer.precision in [16, '16', '16-mixed']: - autocast_dtype = torch.half - else: - raise ValueError('precision must be in [32, 16, "bf16"]') - - with torch.no_grad(), torch.cuda.amp.autocast( - enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, - ): - - in_channels = model.model.diffusion_model.in_channels - - sampler = initialize_sampler(model, sampler_type.upper()) - - output = [] - throughput = [] - - if isinstance(prompts, str): - prompts = [prompts] - - for prompt in prompts: - tic = time.perf_counter() - tic_total = tic - cond, u_cond = encode_prompt(model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size) - toc = time.perf_counter() - conditioning_time = toc - tic - - latent_shape = [in_channels, height // downsampling_factor, width // downsampling_factor] - latents = torch.randn( - [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng - ).to(torch.cuda.current_device()) - - tic = time.perf_counter() - samples, intermediates = sampler.sample( - S=inference_steps, - conditioning=cond, - batch_size=batch_size, - shape=latent_shape, - verbose=False, - unconditional_guidance_scale=unconditional_guidance_scale, - unconditional_conditioning=u_cond, - eta=eta, - x_T=latents, - parallelism=sampler_parallelism, - tolerance=sampler_tolerance, - ) - toc = time.perf_counter() - sampling_time = toc - tic - - tic = time.perf_counter() - images = decode_images(model, samples) - toc = time.perf_counter() - decode_time = toc - tic - - toc_total = time.perf_counter() - total_time = toc_total - tic_total - output.append(images) - - throughput.append( - { - 'text-conditioning-time': conditioning_time, - 'sampling-time': sampling_time, - 'decode-time': decode_time, - 'total-time': total_time, - 'sampling-steps': inference_steps, - } - ) - - # Convert output type and save to disk - if output_type == 'torch': - output = torch.cat(output, dim=0) - else: - output = torch_to_numpy(output) - if output_type == 'pil': - output = [numpy_to_pil(x) for x in output] - - if save_to_file: - os.makedirs(out_path, exist_ok=True) - if output_type == 'pil': - for text_prompt, pils in zip(prompts, output): - for idx, image in enumerate(pils): - image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) - else: - with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: - pickle.dump(output, f) - else: - return output - - ave_metrics = {} - for key in throughput[0].keys(): - ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) - if verbose: - print(ave_metrics) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/utils.py b/nemo/collections/multimodal/parts/stable_diffusion/utils.py deleted file mode 100644 index 7126283cb1e8..000000000000 --- a/nemo/collections/multimodal/parts/stable_diffusion/utils.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import importlib -import multiprocessing as mp -from collections import abc -from functools import partial -from inspect import isfunction -from queue import Queue -from threading import Thread - -import numpy as np -import torch -from einops import rearrange -from PIL import Image, ImageDraw, ImageFont - - -class DataParallelWrapper(torch.nn.DataParallel): - def __getattr__(self, name): - try: - return super().__getattr__(name) - except AttributeError: - return getattr(self.module, name) - - -def log_txt_as_img(wh, xc, size=10): - # wh a tuple of (width, height) - # xc a list of captions to plot - b = len(xc) - txts = list() - for bi in range(b): - txt = Image.new("RGB", wh, color="white") - draw = ImageDraw.Draw(txt) - nc = int(40 * (wh[0] / 256)) - lines = "\n".join(xc[bi][start : start + nc] for start in range(0, len(xc[bi]), nc)) - - try: - draw.text((0, 0), lines, fill="black") - except UnicodeEncodeError: - print("Cant encode string for logging. Skipping.") - - txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 - txts.append(txt) - txts = np.stack(txts) - txts = torch.tensor(txts) - return txts - - -def ismap(x): - if not isinstance(x, torch.Tensor): - return False - return (len(x.shape) == 4) and (x.shape[1] > 3) - - -def isimage(x): - if not isinstance(x, torch.Tensor): - return False - return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1) - - -def exists(x): - return x is not None - - -def default(val, d): - if exists(val): - return val - return d() if isfunction(d) else d - - -def mean_flat(tensor): - """ - https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86 - Take the mean over all non-batch dimensions. - """ - return tensor.mean(dim=list(range(1, len(tensor.shape)))) - - -def count_params(model, verbose=False): - total_params = sum(p.numel() for p in model.parameters()) - if verbose: - print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.") - return total_params - - -def instantiate_from_config(config): - if not "target" in config: - if config == '__is_first_stage__': - return None - elif config == "__is_unconditional__": - return None - raise KeyError("Expected key `target` to instantiate.") - return get_obj_from_str(config["target"])(**config.get("params", dict())) - - -def get_obj_from_str(string, reload=False): - module, cls = string.rsplit(".", 1) - print(f'Getting module=<{module}>, cls=<{cls}>') - if reload: - module_imp = importlib.import_module(module) - importlib.reload(module_imp) - return getattr(importlib.import_module(module, package=None), cls) - - -def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False): - # create dummy dataset instance - - # run prefetching - if idx_to_fn: - res = func(data, worker_id=idx) - else: - res = func(data) - Q.put([idx, res]) - Q.put("Done") - - -def parallel_data_prefetch( - func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False -): - # if target_data_type not in ["ndarray", "list"]: - # raise ValueError( - # "Data, which is passed to parallel_data_prefetch has to be either of type list or ndarray." - # ) - if isinstance(data, np.ndarray) and target_data_type == "list": - raise ValueError("list expected but function got ndarray.") - elif isinstance(data, abc.Iterable): - if isinstance(data, dict): - print( - f'WARNING:"data" argument passed to parallel_data_prefetch is a dict: Using only its values and disregarding keys.' - ) - data = list(data.values()) - if target_data_type == "ndarray": - data = np.asarray(data) - else: - data = list(data) - else: - raise TypeError( - f"The data, that shall be processed parallel has to be either an np.ndarray or an Iterable, but is actually {type(data)}." - ) - - if cpu_intensive: - Q = mp.Queue(1000) - proc = mp.Process - else: - Q = Queue(1000) - proc = Thread - # spawn processes - if target_data_type == "ndarray": - arguments = [[func, Q, part, i, use_worker_id] for i, part in enumerate(np.array_split(data, n_proc))] - else: - step = int(len(data) / n_proc + 1) if len(data) % n_proc != 0 else int(len(data) / n_proc) - arguments = [ - [func, Q, part, i, use_worker_id] - for i, part in enumerate([data[i : i + step] for i in range(0, len(data), step)]) - ] - processes = [] - for i in range(n_proc): - p = proc(target=_do_parallel_data_prefetch, args=arguments[i]) - processes += [p] - - # start processes - print(f"Start prefetching...") - import time - - start = time.time() - gather_res = [[] for _ in range(n_proc)] - try: - for p in processes: - p.start() - - k = 0 - while k < n_proc: - # get result - res = Q.get() - if res == "Done": - k += 1 - else: - gather_res[res[0]] = res[1] - - except Exception as e: - print("Exception: ", e) - for p in processes: - p.terminate() - - raise e - finally: - for p in processes: - p.join() - print(f"Prefetching complete. [{time.time() - start} sec.]") - - if target_data_type == 'ndarray': - if not isinstance(gather_res[0], np.ndarray): - return np.concatenate([np.asarray(r) for r in gather_res], axis=0) - - # order outputs - return np.concatenate(gather_res, axis=0) - elif target_data_type == 'list': - out = [] - for r in gather_res: - out.extend(r) - return out - else: - return gather_res diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py deleted file mode 100644 index 89a7e98cef00..000000000000 --- a/nemo/collections/multimodal/parts/utils.py +++ /dev/null @@ -1,267 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from typing import Any, Callable, Dict, Tuple - -import torch -from omegaconf import DictConfig, OmegaConf, open_dict -from PIL import Image -from pytorch_lightning import Trainer -from pytorch_lightning.plugins.environments import TorchElasticEnvironment - -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector -from nemo.utils import AppState, logging -from nemo.utils.distributed import initialize_distributed - -try: - from megatron.core import parallel_state - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -def numpy_to_pil(images): - """ - Convert a numpy image or a batch of images to a PIL image. - """ - if images.ndim == 3: - images = images[None, ...] - images = (images * 255).round().astype("uint8") - pil_images = [Image.fromarray(image) for image in images] - - return pil_images - - -def randn_like(x, generator=None): - return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) - - -def extend_instance(obj, mixin): - """Apply mixins to a class instance after creation""" - base_cls = obj.__class__ - base_cls_name = obj.__class__.__name__ - obj.__class__ = type( - base_cls_name, (mixin, base_cls), {} - ) # mixin needs to go first for our forward() logic to work - - -def getattr_recursive(obj, att): - """ - Return nested attribute of obj - Example: getattr_recursive(obj, 'a.b.c') is equivalent to obj.a.b.c - """ - if att == "": - return obj - i = att.find(".") - if i < 0: - return getattr(obj, att) - else: - return getattr_recursive(getattr(obj, att[:i]), att[i + 1 :]) - - -def setattr_recursive(obj, att, val): - """ - Set nested attribute of obj - Example: setattr_recursive(obj, 'a.b.c', val) is equivalent to obj.a.b.c = val - """ - if "." in att: - obj = getattr_recursive(obj, ".".join(att.split(".")[:-1])) - setattr(obj, att.split(".")[-1], val) - - -def apply_with_stopping_condition(module, apply_fn, apply_condition=None, stopping_condition=None, **other_args): - if stopping_condition(module): - return - if apply_condition(module): - apply_fn(module, **other_args) - for child in module.children(): - apply_with_stopping_condition( - child, apply_fn, apply_condition=apply_condition, stopping_condition=stopping_condition, **other_args - ) - - -def setup_trainer_and_models_for_inference( - model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, -): - """ - Set up a trainer and NeMo model for inference. - - Args: - model_provider (Any): An object that provides the NeMo model. - cfg (DictConfig): The configuration dictionary, containing the - necessary settings for the trainer and the models. - model_cfg_modifier (Callable): A function that modifies the model - configuration for inference. - - Returns: - Tuple[Trainer, Any]: A tuple containing the trainer and the model. - """ - - # Check if we need to use the TorchElasticEnvironment plugin for the trainer. - plugins = [] - if cfg.get('cluster_type', None) == 'BCP': - plugins.append(TorchElasticEnvironment()) - - # Use the NLPDDPStrategy for the distributed data parallel strategy. - # We don't use DDP for async grad allreduce and don't find unused parameters. - strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) - - # Set up the trainer with the specified plugins and strategy. - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - - # Create the NLPSaveRestoreConnector object for model saving and restoring. - save_restore_connector = NLPSaveRestoreConnector() - - print(f'Loading {cfg.models} models') - models = [] - for single_model_cfg in cfg.models: - if not single_model_cfg.restore_from_path: - continue - if single_model_cfg.restore_from_path.endswith(".nemo"): - # Set the model_extracted_dir attribute if the restore path is a directory. - if os.path.isdir(single_model_cfg.restore_from_path): - save_restore_connector.model_extracted_dir = single_model_cfg.restore_from_path - - # Restore the model configuration from the specified path and modify it for inference. - model_cfg = model_provider.restore_from( - restore_path=single_model_cfg.restore_from_path, - trainer=trainer, - save_restore_connector=save_restore_connector, - return_config=True, - ) - with open_dict(model_cfg): - model_cfg_modifier(model_cfg) # modify the configuration for inference - - # Restore the model from the specified path and configuration, and set it up for inference. - model = model_provider.restore_from( - restore_path=single_model_cfg.restore_from_path, - trainer=trainer, - override_config_path=model_cfg, - save_restore_connector=save_restore_connector, - strict=True, - ) - models.append(model) - - elif single_model_cfg.restore_from_path.endswith(".ckpt"): - logging.warning( - "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" - ) - - model = model_provider.load_from_checkpoint( - single_model_cfg.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, - ) - models.append(model) - - else: - raise ValueError(f"Unrecognized checkpoint type: {single_model_cfg.restore_from_path}") - - # initialize apex DDP strategy - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - models = [model.cuda() for model in models] # move the model to the GPU - for model in models: - model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients - - # Return the trainer and model objects. - return trainer, models - - -def setup_trainer_and_model_for_inference( - model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, -) -> Tuple[Trainer, Any]: - """ - Set up a trainer and NeMo model for inference. - - Args: - model_provider (Any): An object that provides the NeMo model. - cfg (DictConfig): The configuration dictionary, containing the - necessary settings for the trainer and the model. - model_cfg_modifier (Callable): A function that modifies the model - configuration for inference. - - Returns: - Tuple[Trainer, Any]: A tuple containing the trainer and the model. - """ - - # Check if we need to use the TorchElasticEnvironment plugin for the trainer. - plugins = [] - plugins.append(TorchElasticEnvironment()) - - # Use the NLPDDPStrategy for the distributed data parallel strategy. - # We don't use DDP for async grad allreduce and don't find unused parameters. - strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) - - # Set up the trainer with the specified plugins and strategy. - trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) - - # Create the NLPSaveRestoreConnector object for model saving and restoring. - save_restore_connector = NLPSaveRestoreConnector() - - if cfg.model.restore_from_path.endswith(".nemo") or os.path.isdir(cfg.model.restore_from_path): - # Set the model_extracted_dir attribute if the restore path is a directory. - if os.path.isdir(cfg.model.restore_from_path): - save_restore_connector.model_extracted_dir = cfg.model.restore_from_path - - # Restore the model configuration from the specified path and modify it for inference. - model_cfg = model_provider.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - save_restore_connector=save_restore_connector, - return_config=True, - ) - with open_dict(model_cfg): - model_cfg_modifier(model_cfg) # modify the configuration for inference - - # Restore the model from the specified path and configuration, and set it up for inference. - model = model_provider.restore_from( - restore_path=cfg.model.restore_from_path, - trainer=trainer, - override_config_path=model_cfg, - save_restore_connector=save_restore_connector, - strict=True, - ) - - elif cfg.model.restore_from_path.endswith(".ckpt"): - logging.warning( - "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" - ) - - model = model_provider.load_from_checkpoint( - cfg.model.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, - ) - - else: - raise ValueError(f"Unrecognized checkpoint type: {cfg.model.restore_from_path}") - - # initialize apex DDP strategy - def dummy(): - return - - if trainer.strategy.launcher is not None: - trainer.strategy.launcher.launch(dummy, trainer=trainer) - trainer.strategy.setup_environment() - - model = model.cuda() # move the model to the GPU - model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients - - # Return the trainer and model objects. - return trainer, model diff --git a/nemo/collections/vision/__init__.py b/nemo/collections/vision/__init__.py deleted file mode 100644 index edf1849f216f..000000000000 --- a/nemo/collections/vision/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from nemo.collections.vision import data, losses, models, modules -from nemo.package_info import __version__ - -# Set collection version equal to NeMo version. -__version = __version__ - -# Authorship. -__author__ = "NVIDIA Corporation" - -# Set collection name. -__description__ = "Computer Vision collection" -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/data/__init__.py b/nemo/collections/vision/data/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/data/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/data/imagenet_classnames.py b/nemo/collections/vision/data/imagenet_classnames.py deleted file mode 100644 index 4dcd3e595923..000000000000 --- a/nemo/collections/vision/data/imagenet_classnames.py +++ /dev/null @@ -1,1016 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -imagenet_classnames = [ - "tench", - "goldfish", - "great white shark", - "tiger shark", - "hammerhead shark", - "electric ray", - "stingray", - "rooster", - "hen", - "ostrich", - "brambling", - "goldfinch", - "house finch", - "junco", - "indigo bunting", - "American robin", - "bulbul", - "jay", - "magpie", - "chickadee", - "American dipper", - "kite (bird of prey)", - "bald eagle", - "vulture", - "great grey owl", - "fire salamander", - "smooth newt", - "newt", - "spotted salamander", - "axolotl", - "American bullfrog", - "tree frog", - "tailed frog", - "loggerhead sea turtle", - "leatherback sea turtle", - "mud turtle", - "terrapin", - "box turtle", - "banded gecko", - "green iguana", - "Carolina anole", - "desert grassland whiptail lizard", - "agama", - "frilled-necked lizard", - "alligator lizard", - "Gila monster", - "European green lizard", - "chameleon", - "Komodo dragon", - "Nile crocodile", - "American alligator", - "triceratops", - "worm snake", - "ring-necked snake", - "eastern hog-nosed snake", - "smooth green snake", - "kingsnake", - "garter snake", - "water snake", - "vine snake", - "night snake", - "boa constrictor", - "African rock python", - "Indian cobra", - "green mamba", - "sea snake", - "Saharan horned viper", - "eastern diamondback rattlesnake", - "sidewinder rattlesnake", - "trilobite", - "harvestman", - "scorpion", - "yellow garden spider", - "barn spider", - "European garden spider", - "southern black widow", - "tarantula", - "wolf spider", - "tick", - "centipede", - "black grouse", - "ptarmigan", - "ruffed grouse", - "prairie grouse", - "peafowl", - "quail", - "partridge", - "african grey parrot", - "macaw", - "sulphur-crested cockatoo", - "lorikeet", - "coucal", - "bee eater", - "hornbill", - "hummingbird", - "jacamar", - "toucan", - "duck", - "red-breasted merganser", - "goose", - "black swan", - "tusker", - "echidna", - "platypus", - "wallaby", - "koala", - "wombat", - "jellyfish", - "sea anemone", - "brain coral", - "flatworm", - "nematode", - "conch", - "snail", - "slug", - "sea slug", - "chiton", - "chambered nautilus", - "Dungeness crab", - "rock crab", - "fiddler crab", - "red king crab", - "American lobster", - "spiny lobster", - "crayfish", - "hermit crab", - "isopod", - "white stork", - "black stork", - "spoonbill", - "flamingo", - "little blue heron", - "great egret", - "bittern bird", - "crane bird", - "limpkin", - "common gallinule", - "American coot", - "bustard", - "ruddy turnstone", - "dunlin", - "common redshank", - "dowitcher", - "oystercatcher", - "pelican", - "king penguin", - "albatross", - "grey whale", - "killer whale", - "dugong", - "sea lion", - "Chihuahua", - "Japanese Chin", - "Maltese", - "Pekingese", - "Shih Tzu", - "King Charles Spaniel", - "Papillon", - "toy terrier", - "Rhodesian Ridgeback", - "Afghan Hound", - "Basset Hound", - "Beagle", - "Bloodhound", - "Bluetick Coonhound", - "Black and Tan Coonhound", - "Treeing Walker Coonhound", - "English foxhound", - "Redbone Coonhound", - "borzoi", - "Irish Wolfhound", - "Italian Greyhound", - "Whippet", - "Ibizan Hound", - "Norwegian Elkhound", - "Otterhound", - "Saluki", - "Scottish Deerhound", - "Weimaraner", - "Staffordshire Bull Terrier", - "American Staffordshire Terrier", - "Bedlington Terrier", - "Border Terrier", - "Kerry Blue Terrier", - "Irish Terrier", - "Norfolk Terrier", - "Norwich Terrier", - "Yorkshire Terrier", - "Wire Fox Terrier", - "Lakeland Terrier", - "Sealyham Terrier", - "Airedale Terrier", - "Cairn Terrier", - "Australian Terrier", - "Dandie Dinmont Terrier", - "Boston Terrier", - "Miniature Schnauzer", - "Giant Schnauzer", - "Standard Schnauzer", - "Scottish Terrier", - "Tibetan Terrier", - "Australian Silky Terrier", - "Soft-coated Wheaten Terrier", - "West Highland White Terrier", - "Lhasa Apso", - "Flat-Coated Retriever", - "Curly-coated Retriever", - "Golden Retriever", - "Labrador Retriever", - "Chesapeake Bay Retriever", - "German Shorthaired Pointer", - "Vizsla", - "English Setter", - "Irish Setter", - "Gordon Setter", - "Brittany dog", - "Clumber Spaniel", - "English Springer Spaniel", - "Welsh Springer Spaniel", - "Cocker Spaniel", - "Sussex Spaniel", - "Irish Water Spaniel", - "Kuvasz", - "Schipperke", - "Groenendael dog", - "Malinois", - "Briard", - "Australian Kelpie", - "Komondor", - "Old English Sheepdog", - "Shetland Sheepdog", - "collie", - "Border Collie", - "Bouvier des Flandres dog", - "Rottweiler", - "German Shepherd Dog", - "Dobermann", - "Miniature Pinscher", - "Greater Swiss Mountain Dog", - "Bernese Mountain Dog", - "Appenzeller Sennenhund", - "Entlebucher Sennenhund", - "Boxer", - "Bullmastiff", - "Tibetan Mastiff", - "French Bulldog", - "Great Dane", - "St. Bernard", - "husky", - "Alaskan Malamute", - "Siberian Husky", - "Dalmatian", - "Affenpinscher", - "Basenji", - "pug", - "Leonberger", - "Newfoundland dog", - "Great Pyrenees dog", - "Samoyed", - "Pomeranian", - "Chow Chow", - "Keeshond", - "brussels griffon", - "Pembroke Welsh Corgi", - "Cardigan Welsh Corgi", - "Toy Poodle", - "Miniature Poodle", - "Standard Poodle", - "Mexican hairless dog (xoloitzcuintli)", - "grey wolf", - "Alaskan tundra wolf", - "red wolf or maned wolf", - "coyote", - "dingo", - "dhole", - "African wild dog", - "hyena", - "red fox", - "kit fox", - "Arctic fox", - "grey fox", - "tabby cat", - "tiger cat", - "Persian cat", - "Siamese cat", - "Egyptian Mau", - "cougar", - "lynx", - "leopard", - "snow leopard", - "jaguar", - "lion", - "tiger", - "cheetah", - "brown bear", - "American black bear", - "polar bear", - "sloth bear", - "mongoose", - "meerkat", - "tiger beetle", - "ladybug", - "ground beetle", - "longhorn beetle", - "leaf beetle", - "dung beetle", - "rhinoceros beetle", - "weevil", - "fly", - "bee", - "ant", - "grasshopper", - "cricket insect", - "stick insect", - "cockroach", - "praying mantis", - "cicada", - "leafhopper", - "lacewing", - "dragonfly", - "damselfly", - "red admiral butterfly", - "ringlet butterfly", - "monarch butterfly", - "small white butterfly", - "sulphur butterfly", - "gossamer-winged butterfly", - "starfish", - "sea urchin", - "sea cucumber", - "cottontail rabbit", - "hare", - "Angora rabbit", - "hamster", - "porcupine", - "fox squirrel", - "marmot", - "beaver", - "guinea pig", - "common sorrel horse", - "zebra", - "pig", - "wild boar", - "warthog", - "hippopotamus", - "ox", - "water buffalo", - "bison", - "ram (adult male sheep)", - "bighorn sheep", - "Alpine ibex", - "hartebeest", - "impala (antelope)", - "gazelle", - "arabian camel", - "llama", - "weasel", - "mink", - "European polecat", - "black-footed ferret", - "otter", - "skunk", - "badger", - "armadillo", - "three-toed sloth", - "orangutan", - "gorilla", - "chimpanzee", - "gibbon", - "siamang", - "guenon", - "patas monkey", - "baboon", - "macaque", - "langur", - "black-and-white colobus", - "proboscis monkey", - "marmoset", - "white-headed capuchin", - "howler monkey", - "titi monkey", - "Geoffroy's spider monkey", - "common squirrel monkey", - "ring-tailed lemur", - "indri", - "Asian elephant", - "African bush elephant", - "red panda", - "giant panda", - "snoek fish", - "eel", - "silver salmon", - "rock beauty fish", - "clownfish", - "sturgeon", - "gar fish", - "lionfish", - "pufferfish", - "abacus", - "abaya", - "academic gown", - "accordion", - "acoustic guitar", - "aircraft carrier", - "airliner", - "airship", - "altar", - "ambulance", - "amphibious vehicle", - "analog clock", - "apiary", - "apron", - "trash can", - "assault rifle", - "backpack", - "bakery", - "balance beam", - "balloon", - "ballpoint pen", - "Band-Aid", - "banjo", - "baluster / handrail", - "barbell", - "barber chair", - "barbershop", - "barn", - "barometer", - "barrel", - "wheelbarrow", - "baseball", - "basketball", - "bassinet", - "bassoon", - "swimming cap", - "bath towel", - "bathtub", - "station wagon", - "lighthouse", - "beaker", - "military hat (bearskin or shako)", - "beer bottle", - "beer glass", - "bell tower", - "baby bib", - "tandem bicycle", - "bikini", - "ring binder", - "binoculars", - "birdhouse", - "boathouse", - "bobsleigh", - "bolo tie", - "poke bonnet", - "bookcase", - "bookstore", - "bottle cap", - "hunting bow", - "bow tie", - "brass memorial plaque", - "bra", - "breakwater", - "breastplate", - "broom", - "bucket", - "buckle", - "bulletproof vest", - "high-speed train", - "butcher shop", - "taxicab", - "cauldron", - "candle", - "cannon", - "canoe", - "can opener", - "cardigan", - "car mirror", - "carousel", - "tool kit", - "cardboard box / carton", - "car wheel", - "automated teller machine", - "cassette", - "cassette player", - "castle", - "catamaran", - "CD player", - "cello", - "mobile phone", - "chain", - "chain-link fence", - "chain mail", - "chainsaw", - "storage chest", - "chiffonier", - "bell or wind chime", - "china cabinet", - "Christmas stocking", - "church", - "movie theater", - "cleaver", - "cliff dwelling", - "cloak", - "clogs", - "cocktail shaker", - "coffee mug", - "coffeemaker", - "spiral or coil", - "combination lock", - "computer keyboard", - "candy store", - "container ship", - "convertible", - "corkscrew", - "cornet", - "cowboy boot", - "cowboy hat", - "cradle", - "construction crane", - "crash helmet", - "crate", - "infant bed", - "Crock Pot", - "croquet ball", - "crutch", - "cuirass", - "dam", - "desk", - "desktop computer", - "rotary dial telephone", - "diaper", - "digital clock", - "digital watch", - "dining table", - "dishcloth", - "dishwasher", - "disc brake", - "dock", - "dog sled", - "dome", - "doormat", - "drilling rig", - "drum", - "drumstick", - "dumbbell", - "Dutch oven", - "electric fan", - "electric guitar", - "electric locomotive", - "entertainment center", - "envelope", - "espresso machine", - "face powder", - "feather boa", - "filing cabinet", - "fireboat", - "fire truck", - "fire screen", - "flagpole", - "flute", - "folding chair", - "football helmet", - "forklift", - "fountain", - "fountain pen", - "four-poster bed", - "freight car", - "French horn", - "frying pan", - "fur coat", - "garbage truck", - "gas mask or respirator", - "gas pump", - "goblet", - "go-kart", - "golf ball", - "golf cart", - "gondola", - "gong", - "gown", - "grand piano", - "greenhouse", - "radiator grille", - "grocery store", - "guillotine", - "hair clip", - "hair spray", - "half-track", - "hammer", - "hamper", - "hair dryer", - "hand-held computer", - "handkerchief", - "hard disk drive", - "harmonica", - "harp", - "combine harvester", - "hatchet", - "holster", - "home theater", - "honeycomb", - "hook", - "hoop skirt", - "gymnastic horizontal bar", - "horse-drawn vehicle", - "hourglass", - "iPod", - "clothes iron", - "carved pumpkin", - "jeans", - "jeep", - "T-shirt", - "jigsaw puzzle", - "rickshaw", - "joystick", - "kimono", - "knee pad", - "knot", - "lab coat", - "ladle", - "lampshade", - "laptop computer", - "lawn mower", - "lens cap", - "letter opener", - "library", - "lifeboat", - "lighter", - "limousine", - "ocean liner", - "lipstick", - "slip-on shoe", - "lotion", - "music speaker", - "loupe magnifying glass", - "sawmill", - "magnetic compass", - "messenger bag", - "mailbox", - "tights", - "one-piece bathing suit", - "manhole cover", - "maraca", - "marimba", - "mask", - "matchstick", - "maypole", - "maze", - "measuring cup", - "medicine cabinet", - "megalith", - "microphone", - "microwave oven", - "military uniform", - "milk can", - "minibus", - "miniskirt", - "minivan", - "missile", - "mitten", - "mixing bowl", - "mobile home", - "ford model t", - "modem", - "monastery", - "monitor", - "moped", - "mortar and pestle", - "graduation cap", - "mosque", - "mosquito net", - "vespa", - "mountain bike", - "tent", - "computer mouse", - "mousetrap", - "moving van", - "muzzle", - "metal nail", - "neck brace", - "necklace", - "baby pacifier", - "notebook computer", - "obelisk", - "oboe", - "ocarina", - "odometer", - "oil filter", - "pipe organ", - "oscilloscope", - "overskirt", - "bullock cart", - "oxygen mask", - "product packet / packaging", - "paddle", - "paddle wheel", - "padlock", - "paintbrush", - "pajamas", - "palace", - "pan flute", - "paper towel", - "parachute", - "parallel bars", - "park bench", - "parking meter", - "railroad car", - "patio", - "payphone", - "pedestal", - "pencil case", - "pencil sharpener", - "perfume", - "Petri dish", - "photocopier", - "plectrum", - "Pickelhaube", - "picket fence", - "pickup truck", - "pier", - "piggy bank", - "pill bottle", - "pillow", - "ping-pong ball", - "pinwheel", - "pirate ship", - "drink pitcher", - "block plane", - "planetarium", - "plastic bag", - "plate rack", - "farm plow", - "plunger", - "Polaroid camera", - "pole", - "police van", - "poncho", - "pool table", - "soda bottle", - "plant pot", - "potter's wheel", - "power drill", - "prayer rug", - "printer", - "prison", - "missile", - "projector", - "hockey puck", - "punching bag", - "purse", - "quill", - "quilt", - "race car", - "racket", - "radiator", - "radio", - "radio telescope", - "rain barrel", - "recreational vehicle", - "fishing casting reel", - "reflex camera", - "refrigerator", - "remote control", - "restaurant", - "revolver", - "rifle", - "rocking chair", - "rotisserie", - "eraser", - "rugby ball", - "ruler measuring stick", - "sneaker", - "safe", - "safety pin", - "salt shaker", - "sandal", - "sarong", - "saxophone", - "scabbard", - "weighing scale", - "school bus", - "schooner", - "scoreboard", - "CRT monitor", - "screw", - "screwdriver", - "seat belt", - "sewing machine", - "shield", - "shoe store", - "shoji screen / room divider", - "shopping basket", - "shopping cart", - "shovel", - "shower cap", - "shower curtain", - "ski", - "balaclava ski mask", - "sleeping bag", - "slide rule", - "sliding door", - "slot machine", - "snorkel", - "snowmobile", - "snowplow", - "soap dispenser", - "soccer ball", - "sock", - "solar thermal collector", - "sombrero", - "soup bowl", - "keyboard space bar", - "space heater", - "space shuttle", - "spatula", - "motorboat", - "spider web", - "spindle", - "sports car", - "spotlight", - "stage", - "steam locomotive", - "through arch bridge", - "steel drum", - "stethoscope", - "scarf", - "stone wall", - "stopwatch", - "stove", - "strainer", - "tram", - "stretcher", - "couch", - "stupa", - "submarine", - "suit", - "sundial", - "sunglasses", - "sunglasses", - "sunscreen", - "suspension bridge", - "mop", - "sweatshirt", - "swim trunks / shorts", - "swing", - "electrical switch", - "syringe", - "table lamp", - "tank", - "tape player", - "teapot", - "teddy bear", - "television", - "tennis ball", - "thatched roof", - "front curtain", - "thimble", - "threshing machine", - "throne", - "tile roof", - "toaster", - "tobacco shop", - "toilet seat", - "torch", - "totem pole", - "tow truck", - "toy store", - "tractor", - "semi-trailer truck", - "tray", - "trench coat", - "tricycle", - "trimaran", - "tripod", - "triumphal arch", - "trolleybus", - "trombone", - "hot tub", - "turnstile", - "typewriter keyboard", - "umbrella", - "unicycle", - "upright piano", - "vacuum cleaner", - "vase", - "vaulted or arched ceiling", - "velvet fabric", - "vending machine", - "vestment", - "viaduct", - "violin", - "volleyball", - "waffle iron", - "wall clock", - "wallet", - "wardrobe", - "military aircraft", - "sink", - "washing machine", - "water bottle", - "water jug", - "water tower", - "whiskey jug", - "whistle", - "hair wig", - "window screen", - "window shade", - "Windsor tie", - "wine bottle", - "airplane wing", - "wok", - "wooden spoon", - "wool", - "split-rail fence", - "shipwreck", - "sailboat", - "yurt", - "website", - "comic book", - "crossword", - "traffic or street sign", - "traffic light", - "dust jacket", - "menu", - "plate", - "guacamole", - "consomme", - "hot pot", - "trifle", - "ice cream", - "popsicle", - "baguette", - "bagel", - "pretzel", - "cheeseburger", - "hot dog", - "mashed potatoes", - "cabbage", - "broccoli", - "cauliflower", - "zucchini", - "spaghetti squash", - "acorn squash", - "butternut squash", - "cucumber", - "artichoke", - "bell pepper", - "cardoon", - "mushroom", - "Granny Smith apple", - "strawberry", - "orange", - "lemon", - "fig", - "pineapple", - "banana", - "jackfruit", - "cherimoya (custard apple)", - "pomegranate", - "hay", - "carbonara", - "chocolate syrup", - "dough", - "meatloaf", - "pizza", - "pot pie", - "burrito", - "red wine", - "espresso", - "tea cup", - "eggnog", - "mountain", - "bubble", - "cliff", - "coral reef", - "geyser", - "lakeshore", - "promontory", - "sandbar", - "beach", - "valley", - "volcano", - "baseball player", - "bridegroom", - "scuba diver", - "rapeseed", - "daisy", - "yellow lady's slipper", - "corn", - "acorn", - "rose hip", - "horse chestnut seed", - "coral fungus", - "agaric", - "gyromitra", - "stinkhorn mushroom", - "earth star fungus", - "hen of the woods mushroom", - "bolete", - "corn cob", - "toilet paper", -] diff --git a/nemo/collections/vision/data/megatron/__init__.py b/nemo/collections/vision/data/megatron/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/data/megatron/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/data/megatron/autoaugment.py b/nemo/collections/vision/data/megatron/autoaugment.py deleted file mode 100644 index b55f395ed430..000000000000 --- a/nemo/collections/vision/data/megatron/autoaugment.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Code adapted from https://github.com/DeepVoltaire/AutoAugment. - -This module implements the fixed AutoAugment data augmentation policy for ImageNet provided in -Appendix A, Table 9 of reference [1]. It does not include any of the search code for augmentation -policies. - -Reference: -[1] https://arxiv.org/abs/1805.09501 -""" - -import random - -import numpy as np -from PIL import Image, ImageEnhance, ImageOps - -_MAX_LEVEL = 10 # Maximum integer strength of an augmentation, if applicable. - - -class ImageNetPolicy: - """Definition of an ImageNetPolicy. - - Implements a fixed AutoAugment data augmentation policy targeted at - ImageNet training by randomly applying at runtime one of the 25 pre-defined - data augmentation sub-policies provided in Reference [1]. - - Usage example as a Pytorch Transform: - >>> transform=transforms.Compose([transforms.Resize(256), - >>> ImageNetPolicy(), - >>> transforms.ToTensor()]) - """ - - def __init__(self, fillcolor=(128, 128, 128)): - """Initialize an ImageNetPolicy. - - Args: - fillcolor (tuple): RGB color components of the color to be used for - filling when needed (default: (128, 128, 128), which - corresponds to gray). - """ - # Instantiate a list of sub-policies. - # Each entry of the list is a SubPolicy which consists of - # two augmentation operations, - # each of those parametrized as operation, probability, magnitude. - # Those two operations are applied sequentially on the image upon call. - self.policies = [ - SubPolicy("posterize", 0.4, 8, "rotate", 0.6, 9, fillcolor), - SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), - SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), - SubPolicy("posterize", 0.6, 7, "posterize", 0.6, 6, fillcolor), - SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), - SubPolicy("equalize", 0.4, 4, "rotate", 0.8, 8, fillcolor), - SubPolicy("solarize", 0.6, 3, "equalize", 0.6, 7, fillcolor), - SubPolicy("posterize", 0.8, 5, "equalize", 1.0, 2, fillcolor), - SubPolicy("rotate", 0.2, 3, "solarize", 0.6, 8, fillcolor), - SubPolicy("equalize", 0.6, 8, "posterize", 0.4, 6, fillcolor), - SubPolicy("rotate", 0.8, 8, "color", 0.4, 0, fillcolor), - SubPolicy("rotate", 0.4, 9, "equalize", 0.6, 2, fillcolor), - SubPolicy("equalize", 0.0, 7, "equalize", 0.8, 8, fillcolor), - SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), - SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), - SubPolicy("rotate", 0.8, 8, "color", 1.0, 2, fillcolor), - SubPolicy("color", 0.8, 8, "solarize", 0.8, 7, fillcolor), - SubPolicy("sharpness", 0.4, 7, "invert", 0.6, 8, fillcolor), - SubPolicy("shearX", 0.6, 5, "equalize", 1.0, 9, fillcolor), - SubPolicy("color", 0.4, 0, "equalize", 0.6, 3, fillcolor), - SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), - SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), - SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), - SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), - SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), - ] - - def __call__(self, img): - """Define call method for ImageNetPolicy class.""" - policy_idx = random.randint(0, len(self.policies) - 1) - return self.policies[policy_idx](img) - - def __repr__(self): - """Define repr method for ImageNetPolicy class.""" - return "ImageNetPolicy" - - -class SubPolicy: - """Definition of a SubPolicy. - - A SubPolicy consists of two augmentation operations, - each of those parametrized as operation, probability, magnitude. - The two operations are applied sequentially on the image upon call. - """ - - def __init__( - self, operation1, probability1, magnitude_idx1, operation2, probability2, magnitude_idx2, fillcolor, - ): - """Initialize a SubPolicy. - - Args: - operation1 (str): Key specifying the first augmentation operation. - There are fourteen key values altogether (see supported_ops below - listing supported operations). probability1 (float): Probability - within [0., 1.] of applying the first augmentation operation. - magnitude_idx1 (int): Integer specifiying the strength of the first - operation as an index further used to derive the magnitude from a - range of possible values. - operation2 (str): Key specifying the second augmentation operation. - probability2 (float): Probability within [0., 1.] of applying the - second augmentation operation. - magnitude_idx2 (int): Integer specifiying the strength of the - second operation as an index further used to derive the magnitude - from a range of possible values. - fillcolor (tuple): RGB color components of the color to be used for - filling. - Returns: - """ - # List of supported operations for operation1 and operation2. - supported_ops = [ - "shearX", - "shearY", - "translateX", - "translateY", - "rotate", - "color", - "posterize", - "solarize", - "contrast", - "sharpness", - "brightness", - "autocontrast", - "equalize", - "invert", - ] - assert (operation1 in supported_ops) and ( - operation2 in supported_ops - ), "SubPolicy:one of oper1 or oper2 refers to an unsupported operation." - - assert ( - 0.0 <= probability1 <= 1.0 and 0.0 <= probability2 <= 1.0 - ), "SubPolicy: prob1 and prob2 should be within [0., 1.]." - - assert ( - isinstance(magnitude_idx1, int) and 0 <= magnitude_idx1 <= 10 - ), "SubPolicy: idx1 should be specified as an integer within [0, 10]." - - assert ( - isinstance(magnitude_idx2, int) and 0 <= magnitude_idx2 <= 10 - ), "SubPolicy: idx2 should be specified as an integer within [0, 10]." - - # Define a dictionary where each key refers to a specific type of - # augmentation and the corresponding value is a range of ten possible - # magnitude values for that augmentation. - num_levels = _MAX_LEVEL + 1 - ranges = { - "shearX": np.linspace(0, 0.3, num_levels), - "shearY": np.linspace(0, 0.3, num_levels), - "translateX": np.linspace(0, 150 / 331, num_levels), - "translateY": np.linspace(0, 150 / 331, num_levels), - "rotate": np.linspace(0, 30, num_levels), - "color": np.linspace(0.0, 0.9, num_levels), - "posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int), - "solarize": np.linspace(256, 0, num_levels), # range [0, 256] - "contrast": np.linspace(0.0, 0.9, num_levels), - "sharpness": np.linspace(0.0, 0.9, num_levels), - "brightness": np.linspace(0.0, 0.9, num_levels), - "autocontrast": [0] * num_levels, # This augmentation doesn't use magnitude parameter. - "equalize": [0] * num_levels, # This augmentation doesn't use magnitude parameter. - "invert": [0] * num_levels, # This augmentation doesn't use magnitude parameter. - } - - def rotate_with_fill(img, magnitude): - """Define rotation transformation with fill. - - The input image is first rotated, then it is blended together with - a gray mask of the same size. Note that fillcolor as defined - elsewhere in this module doesn't apply here. - - Args: - magnitude (float): rotation angle in degrees. - Returns: - rotated_filled (PIL Image): rotated image with gray filling for - disoccluded areas unveiled by the rotation. - """ - rotated = img.convert("RGBA").rotate(magnitude) - rotated_filled = Image.composite(rotated, Image.new("RGBA", rotated.size, (128,) * 4), rotated) - return rotated_filled.convert(img.mode) - - # Define a dictionary of augmentation functions where each key refers - # to a specific type of augmentation and the corresponding value defines - # the augmentation itself using a lambda function. - # pylint: disable=unnecessary-lambda - func_dict = { - "shearX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), - Image.BICUBIC, - fillcolor=fillcolor, - ), - "shearY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), - Image.BICUBIC, - fillcolor=fillcolor, - ), - "translateX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0,), - fillcolor=fillcolor, - ), - "translateY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1]),), - fillcolor=fillcolor, - ), - "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), - "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), - "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), - "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), - "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( - 1 + magnitude * random.choice([-1, 1]) - ), - "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( - 1 + magnitude * random.choice([-1, 1]) - ), - "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( - 1 + magnitude * random.choice([-1, 1]) - ), - "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), - "equalize": lambda img, magnitude: ImageOps.equalize(img), - "invert": lambda img, magnitude: ImageOps.invert(img), - } - - # Store probability, function and magnitude of the first augmentation - # for the sub-policy. - self.probability1 = probability1 - self.operation1 = func_dict[operation1] - self.magnitude1 = ranges[operation1][magnitude_idx1] - - # Store probability, function and magnitude of the second augmentation - # for the sub-policy. - self.probability2 = probability2 - self.operation2 = func_dict[operation2] - self.magnitude2 = ranges[operation2][magnitude_idx2] - - def __call__(self, img): - """Define call method for SubPolicy class.""" - # Randomly apply operation 1. - if random.random() < self.probability1: - img = self.operation1(img, self.magnitude1) - - # Randomly apply operation 2. - if random.random() < self.probability2: - img = self.operation2(img, self.magnitude2) - - return img diff --git a/nemo/collections/vision/data/megatron/data_samplers.py b/nemo/collections/vision/data/megatron/data_samplers.py deleted file mode 100644 index 44cd8fb14149..000000000000 --- a/nemo/collections/vision/data/megatron/data_samplers.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any, Dict, List, Optional - -import torch -from torch.utils.data import Dataset - -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler -from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset - - -class MegatronVisionPretrainingRandomSampler(MegatronPretrainingRandomSampler): - def __init__( - self, - dataset: Dataset, - total_samples: int, - consumed_samples: int, - micro_batch_size: int, - data_parallel_rank: int, - data_parallel_size: int, - data_sharding: bool, - drop_last: bool = True, - global_batch_size: Optional[int] = None, - pad_samples_to_global_batch_size: Optional[bool] = False, - ) -> None: - super().__init__( - total_samples=total_samples, - consumed_samples=consumed_samples, - micro_batch_size=micro_batch_size, - data_parallel_rank=data_parallel_rank, - data_parallel_size=data_parallel_size, - drop_last=drop_last, - global_batch_size=global_batch_size, - pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, - ) - self.dataset = dataset - self.data_sharding = data_sharding - - def __iter__(self): - active_total_samples = self.total_samples - self.last_batch_size - self.epoch = self.consumed_samples // active_total_samples - current_epoch_samples = self.consumed_samples % active_total_samples - assert current_epoch_samples % self.micro_batch_times_data_parallel_size == 0 - - if isinstance(self.dataset, RandomSeedDataset): - self.dataset.set_epoch(self.epoch) - - # data sharding and random sampling - if self.data_sharding: - bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) * self.micro_batch_size - bucket_offset = current_epoch_samples // self.data_parallel_size - start_idx = self.data_parallel_rank * bucket_size - - g = torch.Generator() - g.manual_seed(self.epoch) - random_idx = torch.randperm(bucket_size, generator=g).tolist() - idx_range = [start_idx + x for x in random_idx[bucket_offset:]] - else: - full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size - full_bucket_offset = current_epoch_samples - g = torch.Generator() - g.manual_seed(self.epoch) - idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() - idx_range_active = idx_range_total[full_bucket_offset:] - idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] - - batch = [] - # Last batch if not complete will be dropped. - for idx in idx_range: - batch.append(idx) - if len(batch) == self.micro_batch_size: - self.consumed_samples += self.micro_batch_times_data_parallel_size - yield batch - batch = [] - - # Check the last partial batch and see drop_last is set - if len(batch) > 0 and not self.drop_last: - yield batch diff --git a/nemo/collections/vision/data/megatron/image_folder.py b/nemo/collections/vision/data/megatron/image_folder.py deleted file mode 100644 index 44138dec3320..000000000000 --- a/nemo/collections/vision/data/megatron/image_folder.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# code taken from -# https://github.com/pytorch/vision/blob/main/torchvision/datasets/folder.py -# added support for classes_fraction and data_per_class_fraction - -import os -import os.path -from typing import Any, Callable, Dict, List, Optional, Tuple, cast - -import numpy as np -from PIL import Image -from torchvision.datasets import VisionDataset - - -def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool: - """Checks if a file is an allowed extension. - Args: - filename (string): path to a file - extensions (tuple of strings): extensions to consider (lowercase) - Returns: - bool: True if the filename ends with one of given extensions - """ - return filename.lower().endswith(extensions) - - -def is_image_file(filename: str) -> bool: - """Checks if a file is an allowed image extension. - Args: - filename (string): path to a file - Returns: - bool: True if the filename ends with a known image extension - """ - return has_file_allowed_extension(filename, IMG_EXTENSIONS) - - -def make_dataset( - directory: str, - class_to_idx: Dict[str, int], - data_per_class_fraction: float, - extensions: Optional[Tuple[str, ...]] = None, - is_valid_file: Optional[Callable[[str], bool]] = None, -) -> List[Tuple[str, int]]: - """Generates a list of samples of a form (path_to_sample, class). - Args: - directory (str): root dataset directory - class_to_idx (Dict[str, int]): dictionary mapping class name to class index - extensions (optional): A list of allowed extensions. - Either extensions or is_valid_file should be passed. Defaults to None. - is_valid_file (optional): A function that takes path of a file - and checks if the file is a valid file - (used to check of corrupt files) both extensions and - is_valid_file should not be passed. Defaults to None. - Raises: - ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None. - Returns: - List[Tuple[str, int]]: samples of a form (path_to_sample, class) - """ - instances = [] - directory = os.path.expanduser(directory) - both_none = extensions is None and is_valid_file is None - both_something = extensions is not None and is_valid_file is not None - if both_none or both_something: - raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time") - if extensions is not None: - - def is_valid_file(x: str) -> bool: - return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions)) - - is_valid_file = cast(Callable[[str], bool], is_valid_file) - for target_class in sorted(class_to_idx.keys()): - class_index = class_to_idx[target_class] - target_dir = os.path.join(directory, target_class) - if not os.path.isdir(target_dir): - continue - local_instances = [] - for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)): - for fname in sorted(fnames): - path = os.path.join(root, fname) - if is_valid_file(path): - item = path, class_index - local_instances.append(item) - - instances.extend(local_instances[0 : int(len(local_instances) * data_per_class_fraction)]) - - return instances - - -class DatasetFolder(VisionDataset): - """A generic data loader where the samples are arranged in this way: :: - root/class_x/xxx.ext - root/class_x/xxy.ext - root/class_x/[...]/xxz.ext - root/class_y/123.ext - root/class_y/nsdf3.ext - root/class_y/[...]/asd932_.ext - Args: - root (string): Root directory path. - loader (callable): A function to load a sample given its path. - extensions (tuple[string]): A list of allowed extensions. - both extensions and is_valid_file should not be passed. - transform (callable, optional): A function/transform that takes in - a sample and returns a transformed version. - E.g, ``transforms.RandomCrop`` for images. - target_transform (callable, optional): A function/transform that takes - in the target and transforms it. - is_valid_file (callable, optional): A function that takes path of a file - and check if the file is a valid file (used to check of corrupt files) - both extensions and is_valid_file should not be passed. - Attributes: - classes (list): List of the class names sorted alphabetically. - class_to_idx (dict): Dict with items (class_name, class_index). - samples (list): List of (sample path, class_index) tuples - targets (list): The class_index value for each image in the dataset - """ - - def __init__( - self, - root: str, - loader: Callable[[str], Any], - extensions: Optional[Tuple[str, ...]] = None, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - classes_fraction=1.0, - data_per_class_fraction=1.0, - is_valid_file: Optional[Callable[[str], bool]] = None, - ) -> None: - super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform) - self.classes_fraction = classes_fraction - self.data_per_class_fraction = data_per_class_fraction - classes, class_to_idx = self._find_classes(self.root) - samples = self.make_dataset(self.root, class_to_idx, self.data_per_class_fraction, extensions, is_valid_file) - if len(samples) == 0: - msg = "Found 0 files in subfolders of: {}\n".format(self.root) - if extensions is not None: - msg += "Supported extensions are: {}".format(",".join(extensions)) - raise RuntimeError(msg) - - self.loader = loader - self.extensions = extensions - self.total = len(samples) - self.classes = classes - self.class_to_idx = class_to_idx - self.samples = samples - self.targets = [s[1] for s in samples] - - @staticmethod - def make_dataset( - directory: str, - class_to_idx: Dict[str, int], - data_per_class_fraction: float, - extensions: Optional[Tuple[str, ...]] = None, - is_valid_file: Optional[Callable[[str], bool]] = None, - ) -> List[Tuple[str, int]]: - return make_dataset( - directory, class_to_idx, data_per_class_fraction, extensions=extensions, is_valid_file=is_valid_file - ) - - def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]: - """ - Finds the class folders in a dataset. - Args: - dir (string): Root directory path. - Returns: - tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary. - Ensures: - No class is a subdirectory of another. - """ - all_classes = [d.name for d in os.scandir(dir) if d.is_dir()] - classes = all_classes[0 : int(len(all_classes) * self.classes_fraction)] - classes.sort() - class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} - return classes, class_to_idx - - def __getitem__(self, index: int) -> Tuple[Any, Any]: - """ - Args: - index (int): Index - Returns: - tuple: (sample, target) where target is class_index of the target class. - """ - curr_index = index - for x in range(self.total): - try: - path, target = self.samples[curr_index] - sample = self.loader(path) - break - except Exception as e: - curr_index = np.random.randint(0, self.total) - - if self.transform is not None: - sample = self.transform(sample) - if self.target_transform is not None: - target = self.target_transform(target) - - return sample, target - - def __len__(self) -> int: - return len(self.samples) - - -IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') - - -def pil_loader(path: str) -> Image.Image: - # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - -# TODO: specify the return type -def accimage_loader(path: str) -> Any: - import accimage - - try: - return accimage.Image(path) - except IOError: - # Potentially a decoding problem, fall back to PIL.Image - return pil_loader(path) - - -def default_loader(path: str) -> Any: - from torchvision import get_image_backend - - if get_image_backend() == 'accimage': - return accimage_loader(path) - else: - return pil_loader(path) - - -class ImageFolder(DatasetFolder): - """A generic data loader where the images are arranged in this way: :: - root/dog/xxx.png - root/dog/xxy.png - root/dog/[...]/xxz.png - root/cat/123.png - root/cat/nsdf3.png - root/cat/[...]/asd932_.png - Args: - root (string): Root directory path. - transform (callable, optional): A function/transform that takes in an PIL image - and returns a transformed version. E.g, ``transforms.RandomCrop`` - target_transform (callable, optional): A function/transform that takes in the - target and transforms it. - loader (callable, optional): A function to load an image given its path. - is_valid_file (callable, optional): A function that takes path of an Image file - and check if the file is a valid file (used to check of corrupt files) - Attributes: - classes (list): List of the class names sorted alphabetically. - class_to_idx (dict): Dict with items (class_name, class_index). - imgs (list): List of (image path, class_index) tuples - """ - - def __init__( - self, - root: str, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - classes_fraction=1.0, - data_per_class_fraction=1.0, - loader: Callable[[str], Any] = default_loader, - is_valid_file: Optional[Callable[[str], bool]] = None, - ): - super(ImageFolder, self).__init__( - root, - loader, - IMG_EXTENSIONS if is_valid_file is None else None, - transform=transform, - target_transform=target_transform, - classes_fraction=classes_fraction, - data_per_class_fraction=data_per_class_fraction, - is_valid_file=is_valid_file, - ) - self.imgs = self.samples diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py deleted file mode 100644 index 5ba711dd0b28..000000000000 --- a/nemo/collections/vision/data/megatron/vit_dataset.py +++ /dev/null @@ -1,284 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random - -import numpy as np -import torch -import torchvision.transforms as T -from PIL import Image, ImageFilter, ImageOps -from torch.utils.data import Dataset - -from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch -from nemo.collections.vision.data.megatron.autoaugment import ImageNetPolicy -from nemo.collections.vision.data.megatron.image_folder import ImageFolder - - -def _to_torch_data_type(precision): - if precision in ['bf16', 'bf16-mixed']: - return torch.bfloat16 - elif precision in [16, '16', '16-mixed']: - return torch.float16 - elif precision in [32, '32', '32-true']: - return torch.float32 - else: - raise ValueError(f"Cannot recognize precision {precision}") - - -class RandomSeedDataset(Dataset): - def __init__(self, dataset, seed=1234): - self.base_seed = seed - self.dataset = dataset - self.epoch = SharedEpoch() - - def __len__(self): - return len(self.dataset) - - def set_epoch(self, epoch): - self.epoch.set_value(epoch) - - def __getitem__(self, idx): - seed = idx + self.base_seed + self.epoch.get_value() * 32768 - torch.manual_seed(seed) - random.seed(seed) - np.random.seed(seed) - return self.dataset[idx] - - -class GaussianBlur(object): - """ - Apply Gaussian Blur to the PIL image. - """ - - def __init__(self, p=0.5, radius_min=0.1, radius_max=2.0): - self.prob = p - self.radius_min = radius_min - self.radius_max = radius_max - - def __call__(self, img): - do_it = random.random() <= self.prob - if not do_it: - return img - - return img.filter(ImageFilter.GaussianBlur(radius=random.uniform(self.radius_min, self.radius_max))) - - -class Solarization(object): - """ - Apply Solarization to the PIL image. - """ - - def __init__(self, p): - self.p = p - - def __call__(self, img): - if random.random() < self.p: - return ImageOps.solarize(img) - else: - return img - - -class ClassificationTransform: - def __init__(self, model_cfg, image_size, train=True): - self.data_type = _to_torch_data_type(model_cfg.precision) - if train: - self.transform = T.Compose( - [ - T.RandomResizedCrop(image_size), - T.RandomHorizontalFlip(), - T.ColorJitter(0.4, 0.4, 0.4, 0.1), - ImageNetPolicy(), - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type), - ] - ) - else: - self.transform = T.Compose( - [ - T.Resize(image_size), - T.CenterCrop(image_size), - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type), - ] - ) - - def __call__(self, input): - output = self.transform(input) - return output - - -class InpaintingTransform: - def __init__(self, model_cfg, image_size, train=True): - self.mask_factor = model_cfg.mask_factor - self.mask_type = model_cfg.mask_type - self.image_size = image_size - self.patch_size = model_cfg.patch_dim - self.mask_size = int(self.mask_factor * (image_size[0] / self.patch_size) * (image_size[1] / self.patch_size)) - self.train = train - self.data_type = _to_torch_data_type(model_cfg.precision) - - if self.train: - self.transform = T.Compose( - [ - T.RandomResizedCrop(self.image_size), - T.RandomHorizontalFlip(), - T.ColorJitter(0.4, 0.4, 0.4, 0.1), - ImageNetPolicy(), - T.ToTensor(), - T.ConvertImageDtype(self.data_type), - ] - ) - else: - self.transform = T.Compose( - [ - T.Resize(self.image_size, interpolation=2), - T.CenterCrop(self.image_size), - T.ToTensor(), - T.ConvertImageDtype(self.data_type), - ] - ) - - def gen_mask(self, image_size, mask_size, mask_type, patch_size): - # output: mask as a list with indices for missing patches - action_list = [[0, 1], [0, -1], [1, 0], [-1, 0]] - assert image_size[0] == image_size[1] - img_size_patch = image_size[0] // patch_size - - # drop masked patches - mask = torch.zeros((image_size[0], image_size[1]), dtype=torch.float) - - if mask_type == 'random': - x = torch.randint(0, img_size_patch, ()) - y = torch.randint(0, img_size_patch, ()) - for i in range(mask_size): - r = torch.randint(0, len(action_list), ()) - x = torch.clamp(x + action_list[r][0], min=0, max=img_size_patch - 1) - y = torch.clamp(y + action_list[r][1], min=0, max=img_size_patch - 1) - x_offset = x * patch_size - y_offset = y * patch_size - mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 - else: - assert mask_type == 'row' - count = 0 - for x in reversed(range(img_size_patch)): - for y in reversed(range(img_size_patch)): - if count < mask_size: - count += 1 - x_offset = x * patch_size - y_offset = y * patch_size - mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 - return mask - - def __call__(self, input): - trans_input = self.transform(input) - mask = self.gen_mask(self.image_size, self.mask_size, self.mask_type, self.patch_size) - mask = mask.unsqueeze(dim=0) - return trans_input, mask - - -class DinoTransform(object): - def __init__(self, model_cfg, image_size, train=True): - self.data_type = _to_torch_data_type(model_cfg.precision) - flip_and_color_jitter = T.Compose( - [ - T.RandomHorizontalFlip(p=0.5), - T.RandomApply([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], p=0.8), - T.RandomGrayscale(p=0.2), - ] - ) - - if model_cfg.precision in [16, "bf16"]: - normalize = T.Compose( - [ - T.ToTensor(), - T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - T.ConvertImageDtype(self.data_type), - ] - ) - else: - normalize = T.Compose([T.ToTensor(), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),]) - - # first global crop - scale_const = 0.4 - self.global_transform1 = T.Compose( - [ - T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), - flip_and_color_jitter, - GaussianBlur(1.0), - normalize, - ] - ) - # second global crop - self.global_transform2 = T.Compose( - [ - T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), - flip_and_color_jitter, - GaussianBlur(0.1), - Solarization(0.2), - normalize, - ] - ) - # transformation for the local small crops - self.local_crops_number = model_cfg.dino_local_crops_number - self.local_transform = T.Compose( - [ - T.RandomResizedCrop( - model_cfg.dino_local_img_size, scale=(0.05, scale_const), interpolation=Image.BICUBIC - ), - flip_and_color_jitter, - GaussianBlur(p=0.5), - normalize, - ] - ) - - def __call__(self, image): - crops = [] - crops.append(self.global_transform1(image)) - crops.append(self.global_transform2(image)) - for _ in range(self.local_crops_number): - crops.append(self.local_transform(image)) - return crops - - -def build_train_valid_datasets(model_cfg, data_path, image_size=224): - if model_cfg.vision_pretraining_type == 'classify': - train_transform = ClassificationTransform(model_cfg, image_size) - val_transform = ClassificationTransform(model_cfg, image_size, train=False) - elif model_cfg.vision_pretraining_type == 'inpaint': - train_transform = InpaintingTransform(model_cfg, image_size, train=False) - val_transform = InpaintingTransform(model_cfg, image_size, train=False) - elif model_cfg.vision_pretraining_type == 'dino': - train_transform = DinoTransform(model_cfg, image_size, train=True) - val_transform = ClassificationTransform(model_cfg, image_size, train=False) - else: - raise Exception('{} vit pretraining type is not supported.'.format(model_cfg.vit_pretraining_type)) - - # training dataset - train_data_path = data_path[0] if len(data_path) <= 2 else data_path[2] - train_data = ImageFolder( - root=train_data_path, - transform=train_transform, - classes_fraction=model_cfg.classes_fraction, - data_per_class_fraction=model_cfg.data_per_class_fraction, - ) - train_data = RandomSeedDataset(train_data) - - # validation dataset - val_data_path = data_path[1] - val_data = ImageFolder(root=val_data_path, transform=val_transform) - val_data = RandomSeedDataset(val_data) - - return train_data, val_data diff --git a/nemo/collections/vision/losses/__init__.py b/nemo/collections/vision/losses/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/losses/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/metrics/__init__.py b/nemo/collections/vision/metrics/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/metrics/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/models/__init__.py b/nemo/collections/vision/models/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/models/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py deleted file mode 100644 index 854e1d6b5a83..000000000000 --- a/nemo/collections/vision/models/megatron_vit_classification_models.py +++ /dev/null @@ -1,801 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import itertools -from functools import partial -from typing import Any, Dict, List, Optional - -import numpy as np -import torch -from omegaconf.dictconfig import DictConfig -from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.trainer.trainer import Trainer - -from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingSampler -from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel -from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION -from nemo.collections.nlp.modules.common.megatron.build_model import build_model -from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ( - average_losses_across_data_parallel_group, - get_all_params_for_weight_decay_optimization, - get_linear_layer, - get_params_for_weight_decay_optimization, - init_method_normal, - scaled_init_method_normal, -) -from nemo.collections.nlp.parts.utils_funcs import get_last_rank -from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomSampler -from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets -from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead -from nemo.core.classes.common import PretrainedModelInfo -from nemo.core.neural_types import ChannelType, NeuralType -from nemo.utils import logging - -try: - import apex.transformer.pipeline_parallel.utils - from apex.transformer.pipeline_parallel.utils import get_num_microbatches - - HAVE_APEX = True - -except (ImportError, ModuleNotFoundError): - - HAVE_APEX = False - -try: - from megatron.core import parallel_state - from megatron.core.pipeline_parallel.schedules import get_forward_backward_func - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - - -class VitClassificationModel(MegatronModule): - """Vision Transformer Model.""" - - def __init__( - self, model_cfg, model_parallel_config, num_classes, finetune=False, pre_process=True, post_process=True - ): - super(VitClassificationModel, self).__init__() - - scaled_init_method = ( - scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) - if model_cfg.use_scaled_init_method - else init_method_normal(model_cfg.init_method_std) - ) - - self.config = model_parallel_config - self.hidden_size = model_cfg.hidden_size - self.num_classes = num_classes - self.finetune = finetune - self.pre_process = pre_process - self.post_process = post_process - self.backbone = VitBackbone( - model_cfg, - model_parallel_config, - init_method=init_method_normal(model_cfg.init_method_std), - scaled_init_method=scaled_init_method, - pre_process=self.pre_process, - post_process=self.post_process, - single_token_output=True, - ) - - if self.post_process: - if not self.finetune: - self.head = VitMlpHead(self.hidden_size, self.num_classes) - else: - self.head = get_linear_layer(self.hidden_size, self.num_classes, torch.nn.init.zeros_) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - self.backbone.set_input_tensor(input_tensor) - - def forward(self, input): - hidden_states = self.backbone(input) - - if self.post_process: - hidden_states = self.head(hidden_states) - hidden_states = hidden_states.contiguous() - return hidden_states - - -class MegatronVitClassificationModel(MegatronBaseModel): - """Megatron Vision Transformer Model.""" - - def __init__(self, cfg: DictConfig, trainer: Trainer): - if not HAVE_APEX: - raise ImportError( - "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - if not HAVE_MEGATRON_CORE: - raise ImportError( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." - ) - - super().__init__(cfg, trainer=trainer) - - self._validate_trainer() - - # TODO(yuya): clean up all default values - self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) - - if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): - raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') - - # build_model returns a list of modules which are used for interleaved pipeline parallelism - if isinstance(self.trainer.accelerator, CPUAccelerator): - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - on_cpu=True, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) - else: - self.model = build_model( - model_provider_func=self.model_provider_func, - wrap_with_ddp=False, - virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), - ) - - # if we're not using interleaved, then self.model is a module. - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: - self.model = self.model[0] - - if self.megatron_amp_O2: - - if not self.with_distributed_adam: - # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type - if isinstance(self.model, list): - for module in self.model: - module.cuda(torch.cuda.current_device()) - else: - self.model.cuda(torch.cuda.current_device()) - - # Model wrapper to convert both model and inputs to half precision - if isinstance(self.model, list): - converted_model = [] - for module in self.model: - converted_model.append( - Float16Module(config=self.model_parallel_config, module=module, precision=cfg.precision) - ) - self.model = converted_model - else: - self.model = Float16Module( - config=self.model_parallel_config, module=self.model, precision=cfg.precision - ) - - if self.trainer.precision in ['bf16', 'bf16-mixed']: - self.autocast_dtype = torch.bfloat16 - elif self.trainer.precision in [32, '32', '32-true']: - self.autocast_dtype = torch.float - elif self.trainer.precision in [16, '16', '16-mixed']: - self.autocast_dtype = torch.half - else: - raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') - - self.enable_autocast = ( - True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False - ) - - self.transformer_engine = cfg.get('transformer_engine', False) - - # Convert the global-batch-based profile index to micro-batch index - if hasattr(self, '_nsys_profile_enabled'): - mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) - data_parallel_world_size = trainer.world_size // mp_size - grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) - self._nsys_profile_start_step *= grad_accum_steps - self._nsys_profile_end_step *= grad_accum_steps - self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) - self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) - - def get_module_list(self): - if isinstance(self.model, list): - return [model.module if isinstance(model, Float16Module) else model for model in self.model] - elif isinstance(self.model, Float16Module): - return [self.model.module] - else: - return [self.model] - - def model_provider_func(self, pre_process, post_process): - """Model depends on pipeline paralellism.""" - model = VitClassificationModel( - model_cfg=self.cfg, - model_parallel_config=self.model_parallel_config, - num_classes=self.cfg.get("num_classes"), # TODO(yuya): clean this up - finetune=self.cfg.get("finetune", False), - pre_process=pre_process, - post_process=post_process, - ) - return model - - def setup_optimizer_param_groups(self): - """ModelPT override. Optimizer will get self._optimizer_param_groups""" - if self.cfg.get('do_layer_norm_weight_decay', False): - if isinstance(self.model, list): - self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) - else: - self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) - - else: - self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) - - def configure_optimizers(self): - - if self.with_distributed_adam: - - # Disable overlapped grad sync for embedding grad when - # pipeline parallelism is enabled - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if parallel_state.is_pipeline_first_stage(ignore_virtual=True): - if isinstance(self.model, list): - module = self.model[0] # only the first virtual rank has the embeddings - else: - module = self.model - - if parallel_state.is_pipeline_last_stage(ignore_virtual=True): - if isinstance(self.model, list): - module = self.model[-1] # only the last virtual rank has the embeddings - else: - module = self.model - - # Disable overlapped grad sync for layer norm grads when - # sequence parallelism is enabled - for param in self.parameters(): - if getattr(param, 'sequence_parallel_enabled', False): - param._disable_greedy_grad_copy = not self.megatron_amp_O2 - param._disable_overlap_grad_sync = True - - # KJJ - Copied this entire block, up to "return" here blindly from megatron_gpt_model.py - - # Initialize parameter buckets for overlapped grad and param syncs - # Note: Params with disabled overlapping are put in the - # last param bucket - buckets = [] - if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: - # Initialize a bucket for each virtual pipeline stage - for module in self.model: - if isinstance(module, Float16Module): - module = module.module - stage_bucket = [] - # for layer in module.language_model.encoder.layers: - for layer in module.backbone.transformer.layers: - stage_bucket.extend( - p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) - ) - buckets.append(stage_bucket) - else: - # Initialize a bucket for each Transformer layer - modules = self.model if isinstance(self.model, list) else [self.model] - for module in modules: - if isinstance(module, Float16Module): - module = module.module - # for layer in module.language_model.encoder.layers: - for layer in module.backbone.transformer.layers: - - buckets.append( - [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] - ) - buckets.reverse() - used_params = set() - for bucket in buckets: - used_params.update(bucket) - buckets[-1].extend(p for p in self.parameters() if p not in used_params) - self.distributed_adam_buckets = buckets - - return super().configure_optimizers() - - def forward(self, tokens): - output_tensor = self.model(tokens) - return output_tensor - - def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): - - # handle asynchronous grad reduction - no_sync_func = None - grad_sync_func = None - param_sync_func = None - if not forward_only and self.with_distributed_adam: - no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) - grad_sync_func = self.reduce_overlap_gradients - param_sync_func = self.sync_overlap_parameters - - # pipeline schedules will get these from self.model.config - for module in self.get_module_list(): - module.config.no_sync_func = no_sync_func - module.config.grad_sync_func = grad_sync_func - module.config.param_sync_func = param_sync_func - - # run forward and backwards passes for an entire global batch - # we do this inside training_step to support pipeline parallelism - fwd_bwd_function = get_forward_backward_func() - - # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready - losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), - data_iterator=dataloader_iter, - model=[self.model], - num_microbatches=get_num_microbatches(), - forward_only=forward_only, - seq_length=self.cfg.encoder_seq_length, - micro_batch_size=self.cfg.micro_batch_size, - ) - - # only the last stages of the pipeline return losses - if losses_reduced_per_micro_batch: - if (not forward_only) or self.cfg.data.get('validation_drop_last', True): - # average loss across micro batches - loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.stack(loss_tensors_list) - loss_mean = loss_tensor.mean() - acc_tensors_list = [loss_reduced['accuracy'] for loss_reduced in losses_reduced_per_micro_batch] - acc_tensor = torch.stack(acc_tensors_list) - accuracy_mean = acc_tensor.mean() - else: - # Get the total loss since micro batches sizes are not uniform - raise NotImplementedError("Losses of micro batches sizes must be uniform!") - else: - # we're not on the last pipeline stage so no losses - if forward_only: - loss_mean = [] - accuracy_mean = [] - else: - loss_mean = torch.tensor(0.0).cuda() - accuracy_mean = loss_mean.copy() - - return loss_mean, accuracy_mean - - def initialize_ub_func(self): - ub_cfgs = self.cfg.get('ub_tp_comm_overlap_cfg', None) - if ub_cfgs is None: - warnings.warn( - "Couldn't find TP config. Please check the path correctness. Initializing TP comm overlap with the default config." - ) - - input_shape = [ - self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), - self.cfg.get('hidden_size'), - ] - - te_module.base.initialize_ub( - shape=input_shape, - tp_size=self.cfg.get('tensor_model_parallel_size'), - use_fp8=self.cfg.get('fp8'), - ub_cfgs=ub_cfgs, - ) - self.initialize_ub = False - - def training_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - Batch should be a list of microbatches and those microbatches should on CPU. - Microbatches are then moved to GPU during the pipeline. - The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. - """ - # Initialize userbuffer communicators. - if self.initialize_ub: - self.initialize_ub_func() - - # we zero grads here because we also call backward in the megatron-core fwd/bwd functions - self._optimizer.zero_grad() - - loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, False) - - # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced - if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): - self.allreduce_sequence_parallel_gradients() - - if self.with_distributed_adam: - # KJJ - Added this block from megatron_gpt_model. It says it's not necessary - # and it's not clear if the remaining "if not" logic is still needed. - # keeping it for now, but might need to delete one or both of these. - - # synchronize asynchronous grad reductions - # note: not necessary, but reduces performance degradation - # from multiple simultaneous NCCL calls - self._optimizer._finish_bucket_grad_sync() - - # launch grad reductions - # Note: grads in first pipeline stage have already been - # reduced - if not parallel_state.is_pipeline_first_stage(): - self.reduce_overlap_gradients() - elif self.megatron_amp_O2: - # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): - # # main grads are stored in the MainParamsOptimizer wrapper - # self._optimizer.allreduce_main_grads() - self._optimizer.allreduce_main_grads() - else: - # async grad allreduce is not currently implemented for O1/autocasting mixed precision training - # so we all-reduce gradients after the pipeline - self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) - - # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: - # # when using pipeline parallelism the first and last stage must keep embeddings in sync - # self.allreduce_first_last_embeddings() - - ## logging - # we can only log on one rank if it is rank zero so we broadcast from last rank - # we can avoid this broadcast by updating the PTL log function to accept specific ranks - torch.distributed.broadcast(loss_mean, get_last_rank()) - - if self.cfg.precision in [16, '16', '16-mixed']: - loss_scale = self.trainer.precision_plugin.scaler._scale - if loss_scale is not None: - self.log('loss_scale', loss_scale, batch_size=1) - - self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) - lr = self._optimizer.param_groups[0]['lr'] - self.log('lr', lr, rank_zero_only=True, batch_size=1) - self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log( - 'consumed_samples', - self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), - prog_bar=True, - rank_zero_only=True, - batch_size=1, - ) - - return loss_mean - - def backward(self, *args, **kwargs): - """ LightningModule hook to do backward. - We want this to do nothing since we run backward in the fwd/bwd functions from apex. - No need to call it here. - """ - pass - - def optimizer_zero_grad(self, *args, **kwargs): - """ LightningModule hook to zero grad. - We want this to do nothing as we are zeroing grads during the training_step. - """ - pass - - def _append_sequence_parallel_module_grads(self, module, grads): - """ Helper method for allreduce_sequence_parallel_gradients""" - - for param in module.parameters(): - sequence_parallel_param = getattr(param, 'sequence_parallel', False) - if sequence_parallel_param and param.requires_grad: - if self.megatron_amp_O2: - grad = param.main_grad - else: - grad = param.grad - grads.append(grad.data) - - def allreduce_sequence_parallel_gradients(self): - """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. - Modified from megatron-lm: - https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 - """ - - grads = [] - if isinstance(self.model, list): - for module in self.model: - self._append_sequence_parallel_module_grads(module, grads) - else: - self._append_sequence_parallel_module_grads(self.model, grads) - - coalesced = torch._utils._flatten_dense_tensors(grads) - torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) - for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): - buf.copy_(synced) - - def get_forward_output_and_loss_func(self, validation_step=False): - def loss_func(labels, output_tensor): - logits = output_tensor.contiguous().float() - loss = torch.nn.functional.cross_entropy(logits, labels) - - outputs = torch.argmax(logits, -1) - correct = (outputs == labels).float() - accuracy = torch.mean(correct) - - averaged_loss = average_losses_across_data_parallel_group([loss, accuracy]) - - return loss, {"loss": averaged_loss[0], "accuracy": averaged_loss[1]} - - def fwd_output_and_loss_func(dataloader_iter, model): - batch = next(dataloader_iter) - if parallel_state.get_pipeline_model_parallel_world_size() == 1: - batch = [x.cuda(non_blocking=True) for x in batch] - tokens, labels = batch - else: - # Vision transformer doesn't need attention mask - if parallel_state.is_pipeline_first_stage(): - # Fist pipeline stage needs only the tokens and position_ids - tokens = batch[0].cuda(non_blocking=True) - labels = None - elif parallel_state.is_pipeline_last_stage(): - # Last pipeline stage needs only the labels and loss_mask - labels = batch[1].cuda(non_blocking=True) - tokens = None - else: - # Intermediate pipeline stage doesn't need any inputs - tokens, labels = None, None - - output_tensor = model(tokens) - return output_tensor, partial(loss_func, labels) - - return fwd_output_and_loss_func - - def get_forward_output_only_func(self): - def fwd_output_only_func(batch, model): - raise NotImplementedError - - return fwd_output_only_func - - def validation_step(self, dataloader_iter, batch_idx): - """ - Our dataloaders produce a micro-batch and then we fetch - a number of microbatches depending on the global batch size and model parallel size - from the dataloader to produce a list of microbatches. - The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. - """ - mode = 'test' if self.trainer.testing else 'val' - - # Initialize userbuffer communicators. - if self.initialize_ub: - self.initialize_ub_func() - - loss, accuracy = self.fwd_bwd_step(dataloader_iter, batch_idx, True) - - self.validation_step_outputs.append((loss, accuracy)) if mode == 'val' else self.test_step_outputs.append( - (loss, accuracy) - ) - return loss, accuracy - - def on_validation_epoch_end(self): - # TODO (yuya): need fix later, check with Sean - if not self.validation_step_outputs: - return - - if parallel_state.is_pipeline_last_stage(): - loss_outputs = [output[0] for output in self.validation_step_outputs] - acc_outputs = [output[1] for output in self.validation_step_outputs] - - averaged_metrics = torch.tensor( - [torch.stack(loss_outputs).mean(), torch.stack(acc_outputs).mean()], dtype=torch.float32, device='cuda' - ) - else: - averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32, device='cuda') - - # we can only log on one rank if it is rank zero so we broadcast from last rank - torch.distributed.broadcast(averaged_metrics, get_last_rank()) - - averaged_loss, averaged_acc = averaged_metrics - - self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) - self.log('val_accuracy', averaged_acc, prog_bar=True, rank_zero_only=True, batch_size=1) - self.validation_step_outputs.clear() # free memory - - return averaged_loss - - def test_step(self, batch, batch_idx): - return self.validation_step(batch, batch_idx) - - def on_test_epoch_end(self): - pass - - def build_train_valid_test_datasets(self): - logging.info('Building datasets for ViT...') - if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): - raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") - - self._train_ds, self._validation_ds = build_train_valid_datasets( - model_cfg=self.cfg, data_path=self.cfg.data.data_path, image_size=(self.cfg.img_h, self.cfg.img_w), - ) - self._test_ds = None - - if self._train_ds is not None: - logging.info(f'Length of train dataset: {len(self._train_ds)}') - if self._validation_ds is not None: - logging.info(f'Length of val dataset: {len(self._validation_ds)}') - if self._test_ds is not None: - logging.info(f'Length of test dataset: {len(self._test_ds)}') - logging.info(f'Finished building datasets for ViT.') - - return self._train_ds, self._validation_ds, self._test_ds - - def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): - """Buld dataloader given an input dataset.""" - - logging.info(f'Building dataloader with consumed samples: {consumed_samples}') - # Megatron sampler - if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: - if self.cfg.data.dataloader_type == 'single': - batch_sampler = MegatronPretrainingSampler( - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self.cfg.micro_batch_size, - global_batch_size=self.cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - ) - elif self.cfg.data.dataloader_type == 'cyclic': - batch_sampler = MegatronVisionPretrainingRandomSampler( - dataset=dataset, - total_samples=len(dataset), - consumed_samples=consumed_samples, - micro_batch_size=self.cfg.micro_batch_size, - global_batch_size=self.cfg.global_batch_size, - data_parallel_rank=parallel_state.get_data_parallel_rank(), - data_parallel_size=parallel_state.get_data_parallel_world_size(), - drop_last=drop_last, - data_sharding=self.cfg.data.get("data_sharding", True), - ) - else: - raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') - else: - raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') - - return torch.utils.data.DataLoader( - dataset, - batch_sampler=batch_sampler, - num_workers=self.cfg.data.num_workers, - pin_memory=True, - persistent_workers=True if self.cfg.data.num_workers > 0 else False, - ) - - def setup(self, stage=None): - """ PTL hook that is executed after DDP spawns. - We setup datasets here as megatron datasets require DDP to instantiate. - See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. - Args: - stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. - """ - - # log number of parameters - if isinstance(self.model, list): - num_parameters_on_device = sum( - [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] - ) - # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( - # ignore_virtual=True - # ): - # # substract the embedding weights on the last virtual stage - # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) - # num_parameters_on_device -= num_word_embedding_parameters - else: - num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) - - # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( - # ignore_virtual=True - # ): - # # substract the embedding weights on the last stage - # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) - # - # num_parameters_on_device -= num_word_embedding_parameters - - # to be summed across data parallel group - total_num_parameters = torch.tensor(num_parameters_on_device).cuda() - - torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) - - logging.info( - f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' - f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' - f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' - f'Total number of model parameters: {total_num_parameters:.2e}.' - ) - - resume_checkpoint_path = self.trainer.ckpt_path - if resume_checkpoint_path: - init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) - else: - init_consumed_samples = 0 - self.init_consumed_samples = init_consumed_samples - self.init_global_step = self.trainer.global_step - - # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) - - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - # module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - # self.model.sync_initial_word_embeddings() - pass - - def setup_training_data(self, cfg): - if hasattr(self, '_train_ds') and self._train_ds is not None: - consumed_samples = self.compute_consumed_samples(0) - logging.info( - f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' - ) - self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) - - def setup_validation_data(self, cfg): - if hasattr(self, '_validation_ds') and self._validation_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' - ) - drop_last = True - if not self.cfg.data.get('validation_drop_last', True): - logging.info(f'Drop last in validation dataset is set to False') - drop_last = False - self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples,) - - def setup_test_data(self, cfg): - if hasattr(self, '_test_ds') and self._test_ds is not None: - consumed_samples = 0 - logging.info( - f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' - ) - self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples) - - def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: - raise NotImplementedError - - def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: - """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device - When using pipeline parallelism, we need the global batch to remain on the CPU, - since the memory overhead will be too high when using a large number of microbatches. - Microbatches are transferred from CPU to GPU inside the pipeline. - """ - return batch - - def _validate_trainer(self): - """ Certain trainer configurations can break training. - Here we try to catch them and raise an error. - """ - if self.trainer.accumulate_grad_batches > 1: - raise ValueError( - f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' - ) - - @classmethod - def list_available_models(cls) -> Optional[PretrainedModelInfo]: - return None - - def on_save_checkpoint(self, checkpoint) -> None: - """LightningModule hook: - https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint - """ - if isinstance(self.model, list): - for i in range(len(self.model)): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - - def on_load_checkpoint(self, checkpoint) -> None: - """LightningModule hook: - https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint - """ - if isinstance(self.model, list): - for i in range(len(self.model)): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) - self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - - def parameters(self): - if isinstance(self.model, list): - return itertools.chain.from_iterable(module.parameters() for module in self.model) - else: - return self.model.parameters() diff --git a/nemo/collections/vision/modules/__init__.py b/nemo/collections/vision/modules/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/modules/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/modules/common/__init__.py b/nemo/collections/vision/modules/common/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/modules/common/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/__init__.py b/nemo/collections/vision/modules/common/megatron/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/modules/common/megatron/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py deleted file mode 100644 index 792f0bdc4253..000000000000 --- a/nemo/collections/vision/modules/common/megatron/vision_transformer.py +++ /dev/null @@ -1,492 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# coding=utf-8 - - -"""Transformer.""" - -import torch - -from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType -from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer, ParallelTransformerLayer_ -from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults - -try: - from apex.normalization import MixedFusedRMSNorm - from apex.transformer.enums import AttnMaskType, AttnType, ModelType - from apex.transformer.utils import divide as safe_divide - - HAVE_APEX = True - -except (ImportError, ModuleNotFoundError): - - HAVE_APEX = False - - # fake missing classes with None attributes - ModelType = AttnMaskType = AttnType = LayerType = ApexGuardDefaults() - -try: - from megatron.core import parallel_state, tensor_parallel - from megatron.core.parallel_state import get_tensor_model_parallel_world_size - - HAVE_MEGATRON_CORE = True - -except (ImportError, ModuleNotFoundError): - - HAVE_MEGATRON_CORE = False - -""" We use the following notation throughout this file: - h: hidden size - n: number of attention heads - p: number of model parallel partitions - np: n/p - hp: h/p - hn: h/n - b: batch size - s: sequence length - l: number of layers - Transformer takes input of size [s, b, h] and returns a - tensor of the same size. We use the following arguments: - hyperparameters: transformer hyperparameters -""" - - -class DropPath(MegatronModule): - """Drop paths (Stochastic Depth) per sample - (when applied in main path of residual blocks). - """ - - def __init__(self, drop_prob=0.0): - super(DropPath, self).__init__() - self.drop_prob = drop_prob - - def forward(self, hidden_state): - if self.drop_prob == 0.0 or not self.training: - return hidden_state - keep_prob = 1 - self.drop_prob - # work with diff dim tensors, not just 2D ConvNets - # hidden_state: [s, b, h] - shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2) - random_tensor = keep_prob + torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) - random_tensor.floor_() # binarize - output = hidden_state.div(keep_prob) * random_tensor - return output - - -class ParallelVisionTransformerLayer_(ParallelTransformerLayer_): - """A single transformer layer. - - Transformer layer takes input with size [s, b, h] and returns an - output of the same size. - """ - - def __init__( - self, - config, - init_method, - output_layer_init_method, - layer_number, - hidden_size, - ffn_hidden_size, - num_attention_heads, - layer_type=LayerType.encoder, - self_attn_mask_type=AttnMaskType.padding, - fp32_residual_connection=False, - precision=16, - apply_query_key_layer_scaling=True, - kv_channels=None, - layernorm_epsilon=1e-5, - hidden_dropout=0.1, - bias_dropout_add_fusion=True, - persist_layer_norm=False, - bias_activation_fusion=True, - openai_gelu=False, - onnx_safe=False, - masked_softmax_fusion=True, - attention_dropout=0.1, - ffn_dropout=0.0, - drop_path_rate=0.0, - activation='gelu', - megatron_legacy=False, - bias=True, - chunk_size=64, - normalization='layernorm', - transformer_block_type='pre_ln', - headscale=False, - activations_checkpoint_granularity=None, - normalize_attention_scores=True, - use_flash_attention=False, - ): - kwargs = locals() - for key in ["self", "__class__"]: - kwargs.pop(key) - drop_path_rate = kwargs.pop("drop_path_rate") - super(ParallelVisionTransformerLayer_, self).__init__(**kwargs) - - self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None - - def forward( - self, - hidden_states, - attention_mask, - encoder_output=None, - enc_dec_attn_mask=None, - layer_past=None, - get_key_value=False, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - rotary_pos_emb=None, - # list of positional embedding tensors, first one self attention, second one and third one are for cross attention (q, k) - self_attention_relative_position_bias=None, - cross_attention_relative_position_bias=None, - checkpoint_core_attention=False, - ): - # Self attention. - if rotary_pos_emb is not None: - # self attention pos_emb is (q, q) - self_attention_pos_emb = (rotary_pos_emb[0], rotary_pos_emb[0]) - cross_attention_pos_emb = (rotary_pos_emb[1], rotary_pos_emb[2]) - else: - self_attention_pos_emb = None - cross_attention_pos_emb = None - - if self.layer_type != LayerType.retrieval_decoder_after_self_attn: - # hidden_states: [b, s, h] - - # Pre-LN: x -> LN -> MHA -> Residual -> LN -> MLP -> Residual - # Post-LN: x -> MHA -> Residual -> LN -> MLP -> Residual -> LN - # Normformer: x -> LN -> MHA -> LN -> Residual -> MLP (w/LN) -> Residual - - residual = hidden_states - # Layer norm at the beginning of the transformer layer. - if self.transformer_block_type in ['pre_ln', 'normformer']: - hidden_states = self.input_layernorm(hidden_states) - - attention_output, attention_bias = self.self_attention( - hidden_states, - attention_mask, - layer_past=layer_past, - get_key_value=get_key_value, - set_inference_key_value_memory=set_inference_key_value_memory, - inference_max_sequence_len=inference_max_sequence_len, - rotary_pos_emb=self_attention_pos_emb, - relative_position_bias=self_attention_relative_position_bias, - checkpoint_core_attention=checkpoint_core_attention, - ) - - if get_key_value: - attention_output, presents = attention_output - - # If normformer, apply norm on the output of the self attention. - if self.transformer_block_type == 'normformer': - # Normformer normalization - attention_output = ( - attention_output + attention_bias if attention_bias is not None else attention_output - ) - attention_output = self.post_attention_normformer_norm(attention_output) - attention_bias = None - - # jit scripting for a nn.module (with dropout) is not - # trigerring the fusion kernel. For now, we use two - # different nn.functional routines to account for varying - # dropout semantics during training and inference phases. - - if self.drop_path is None: - bias_dropout_add_func = self._get_bias_droput_add_func( - transformer_block_type=self.transformer_block_type, position_after='attention' - ) - if attention_bias is not None: - attention_bias = attention_bias.expand_as(residual) - - layernorm_input = bias_dropout_add_func( - attention_output, attention_bias, residual, self.hidden_dropout - ) - else: - assert self.transformer_block_type != 'normformer', "Normfomer doesn't support drop_path" - out = torch.nn.functional.dropout( - attention_output + attention_bias, p=self.hidden_dropout, training=self.training - ) - layernorm_input = residual + self.drop_path(out) - # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}") - - # Post-LN normalization after residual - if self.transformer_block_type == 'post_ln': - normalization_output = self.input_layernorm(layernorm_input) - layernorm_input = normalization_output - elif self.transformer_block_type in ['pre_ln', 'normformer']: - # Layer norm post the self attention. - normalization_output = self.post_attention_layernorm(layernorm_input) - else: - layernorm_input, normalization_output = hidden_states - - if self.layer_type == LayerType.decoder_pre_mlp: - return layernorm_input, normalization_output - - if ( - self.layer_type == LayerType.decoder - or self.layer_type == LayerType.retrieval_decoder - or self.layer_type == LayerType.retrieval_encoder - or self.layer_type == LayerType.retrieval_decoder_after_self_attn - ): - if ( - self.layer_type == LayerType.retrieval_decoder - or self.layer_type == LayerType.retrieval_decoder_after_self_attn - ): - attention_output, attention_bias = self.inter_attention( - normalization_output, - enc_dec_attn_mask, - encoder_output=encoder_output, - rotary_pos_emb=cross_attention_pos_emb, - set_inference_key_value_memory=set_inference_key_value_memory, - inference_max_sequence_len=inference_max_sequence_len, - checkpoint_core_attention=checkpoint_core_attention, - ) - else: - attention_output, attention_bias = self.inter_attention( - normalization_output, - enc_dec_attn_mask, - encoder_output=encoder_output, - rotary_pos_emb=cross_attention_pos_emb, - relative_position_bias=cross_attention_relative_position_bias, - checkpoint_core_attention=checkpoint_core_attention, - ) - - # If normformer, apply norm on the output of the self attention. - if self.transformer_block_type == 'normformer': - # Normformer normalization - attention_output = ( - attention_output + attention_bias if attention_bias is not None else attention_output - ) - attention_output = self.post_inter_attention_normformer_norm(attention_output) - attention_bias = None - - residual = layernorm_input - - bias_dropout_add_func = self._get_bias_droput_add_func( - transformer_block_type=self.transformer_block_type, position_after='attention' - ) - - layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) - # print(f"Layer: {self.layer_number} Cross-Attention checksum {layernorm_input.sum()}") - normalization_output = self.post_inter_attention_layernorm(layernorm_input) - # Post-LN normalization after residual - if self.transformer_block_type == 'post_ln': - layernorm_input = normalization_output - # MLP. - mlp_output, mlp_bias = self.mlp(normalization_output) - - residual = layernorm_input - - if self.drop_path is None: - bias_dropout_add_func = self._get_bias_droput_add_func( - transformer_block_type=self.transformer_block_type, position_after='mlp' - ) - - output = bias_dropout_add_func(mlp_output, mlp_bias, residual, self.hidden_dropout) - - else: - out = torch.nn.functional.dropout(mlp_output + mlp_bias, p=self.hidden_dropout, training=self.training) - output = residual + self.drop_path(out) - # print(f"Layer: {self.layer_number} MLP + Dropout + Residual checksum {output.sum()}") - - if self.transformer_block_type == 'post_ln': - output = self.post_attention_layernorm(output) - - if get_key_value: - output = [output, presents] - - return output - - -class ParallelVisionTransformerLayer(ParallelVisionTransformerLayer_): - def __init__(self, **kwargs): - super(ParallelVisionTransformerLayer, self).__init__(**kwargs) - precision = kwargs['precision'] - if precision in ['bf16', 'bf16-mixed']: - self.dtype = torch.bfloat16 - elif precision in [16, '16', '16-mixed']: - self.dtype = torch.float16 - elif precision in [32, '32', '32-true']: - self.dtype = torch.float32 - else: - raise ValueError(f"Cannot recognize precision {precision}") - - def forward( - self, - hidden_states, - attention_mask, - encoder_output=None, - enc_dec_attn_mask=None, - rotary_pos_emb=None, - layer_past=None, - get_key_value=False, - set_inference_key_value_memory=False, - inference_max_sequence_len=None, - self_attention_relative_position_bias=None, - cross_attention_relative_position_bias=None, - checkpoint_core_attention=False, - ): - kwargs = locals() - for key in ["self", "__class__"]: - kwargs.pop(key) - if self.dtype == torch.float32: - return super().forward(**kwargs) - with torch.autocast(device_type="cuda", dtype=self.dtype): - return super().forward(**kwargs) - - -class ParallelVisionTransformer(ParallelTransformer): - """Transformer class.""" - - def __init__( - self, - config, - init_method, - output_layer_init_method, - num_layers, - hidden_size, - ffn_hidden_size, - num_attention_heads, - apply_query_key_layer_scaling=True, - kv_channels=None, - layer_type=LayerType.encoder, # it can be a list of types or single type - self_attn_mask_type=AttnMaskType.padding, - pre_process=True, - post_process=True, - precision=16, - fp32_residual_connection=False, - activations_checkpoint_method=None, - activations_checkpoint_num_layers=None, - layernorm_epsilon=1e-5, - hidden_dropout=0.1, - attention_dropout=0.1, - ffn_dropout=0.0, - drop_path_rate=0.0, - bias_activation_fusion=True, - bias_dropout_add_fusion=True, - masked_softmax_fusion=True, - persist_layer_norm=False, - openai_gelu=False, - onnx_safe=False, - activation='gelu', - model_type=ModelType.encoder_or_decoder, - megatron_legacy=False, - bias=True, - chunk_size=64, - normalization='layernorm', - transformer_block_type='pre_ln', - headscale=False, - layer_number_offset=0, # this is use only for attention norm_factor scaling - activations_checkpoint_granularity=None, - normalize_attention_scores=True, - ub_tp_comm_overlap=False, - use_flash_attention=False, - ): - kwargs = locals() - for key in ["self", "__class__"]: - kwargs.pop(key) - self.drop_path_rate = kwargs.pop("drop_path_rate") - super(ParallelVisionTransformer, self).__init__(**kwargs) - - self.num_layers = self.get_num_layers(num_layers) - - self.drop_path_rates = [ - rate.item() - for rate in torch.linspace( - 0, self.drop_path_rate, self.num_layers * parallel_state.get_pipeline_model_parallel_world_size() - ) - ] - - # Rebuild with vision transformer layers. - def build_layer(layer_number): - if isinstance(layer_type, list): - lt = layer_type[layer_number - 1] - else: - lt = layer_type - return ParallelVisionTransformerLayer( - config=config, - init_method=init_method, - output_layer_init_method=output_layer_init_method, - layer_number=layer_number + layer_number_offset, - hidden_size=hidden_size, - ffn_hidden_size=ffn_hidden_size, - num_attention_heads=num_attention_heads, - apply_query_key_layer_scaling=apply_query_key_layer_scaling, - kv_channels=kv_channels, - layer_type=lt, - self_attn_mask_type=self_attn_mask_type, - precision=precision, - fp32_residual_connection=fp32_residual_connection, - layernorm_epsilon=layernorm_epsilon, - hidden_dropout=hidden_dropout, - attention_dropout=attention_dropout, - ffn_dropout=ffn_dropout, - drop_path_rate=self.drop_path_rates[layer_number - 1], - bias_activation_fusion=bias_activation_fusion, - bias_dropout_add_fusion=bias_dropout_add_fusion, - masked_softmax_fusion=masked_softmax_fusion, - persist_layer_norm=persist_layer_norm, - openai_gelu=openai_gelu, - onnx_safe=onnx_safe, - activation=activation, - megatron_legacy=megatron_legacy, - bias=bias, - chunk_size=chunk_size, - normalization=normalization, - transformer_block_type=transformer_block_type, - headscale=headscale, - activations_checkpoint_granularity=activations_checkpoint_granularity, - normalize_attention_scores=normalize_attention_scores, - use_flash_attention=use_flash_attention, - ) - - if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: - assert num_layers % parallel_state.get_virtual_pipeline_model_parallel_world_size() == 0, ( - 'num_layers_per_stage must be divisible by ' 'virtual_pipeline_model_parallel_size' - ) - - # self.model_type != ModelType.encoder_and_decoder - assert self.model_type.value != 2, f'virtual pipeline parallel currently only supported for GPT' - - # Number of layers in each model chunk is the number of layers in the stage, - # divided by the number of model chunks in a stage. - self.num_layers = self.num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() - # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of - # layers to stages like (each list is a model chunk): - # Stage 0: [0] [2] [4] [6] - # Stage 1: [1] [3] [5] [7] - # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of - # layers to stages like (each list is a model chunk): - # Stage 0: [0, 1] [4, 5] - # Stage 1: [2, 3] [6, 7] - offset = parallel_state.get_virtual_pipeline_model_parallel_rank() * ( - num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() - ) + (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) - else: - # Each stage gets a contiguous set of layers. - if ( - self.model_type == ModelType.encoder_and_decoder - and parallel_state.get_pipeline_model_parallel_world_size() > 1 - ): - pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() - if layer_type == LayerType.encoder: - offset = pipeline_rank * self.num_layers - else: - num_ranks_in_enc = parallel_state.get_pipeline_model_parallel_split_rank() - offset = (pipeline_rank - num_ranks_in_enc) * self.num_layers - else: - offset = parallel_state.get_pipeline_model_parallel_rank() * self.num_layers - - self.layers = torch.nn.ModuleList([build_layer(i + 1 + offset) for i in range(self.num_layers)]) diff --git a/nemo/collections/vision/modules/vit/__init__.py b/nemo/collections/vision/modules/vit/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/modules/vit/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py deleted file mode 100644 index 5758a9ff7cdb..000000000000 --- a/nemo/collections/vision/modules/vit/vit_backbone.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Vision Transformer(VIT) model.""" - -import math -from functools import partial - -import einops -import torch -import torch.nn.functional as F - -from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm -from nemo.collections.nlp.modules.common.megatron.module import MegatronModule -from nemo.collections.nlp.modules.common.megatron.utils import ( - ApexGuardDefaults, - init_method_normal, - scaled_init_method_normal, -) -from nemo.collections.vision.modules.common.megatron.vision_transformer import ParallelVisionTransformer - -try: - import apex - from apex.transformer.enums import AttnMaskType - - HAVE_APEX = True -except (ImportError, ModuleNotFoundError): - HAVE_APEX = False - - # fake missing classes with None attributes - AttnMaskType = ApexGuardDefaults() - LayerType = ApexGuardDefaults() - - -class DropPatch(MegatronModule): - """ - https://arxiv.org/abs/2212.00794 - """ - - def __init__(self, prob, class_token_length=8, exclude_cls_tokens=True): - assert 0 <= prob < 1.0 - super(DropPatch, self).__init__() - self.prob = prob - self.class_token_length = class_token_length - self.exclude_cls_tokens = exclude_cls_tokens # exclude CLS token - - def __call__(self, x): - if self.prob == 0.0 or not self.training: - return x - - class_token_length = self.class_token_length - if self.exclude_cls_tokens: - cls_tokens, x = x[:, :class_token_length], x[:, class_token_length:] - - batch, num_tokens, _, device = *x.shape, x.device - - batch_indices = torch.arange(batch, device=device) - batch_indices = batch_indices[..., None] - - keep_prob = 1 - self.prob - num_patches_keep = max(1, int(num_tokens * keep_prob)) - - rand = torch.randn(batch, num_tokens, device=device) - patch_indices_keep = rand.topk(num_patches_keep, dim=-1).indices - - x = x[batch_indices, patch_indices_keep] - - if self.exclude_cls_tokens: - x = torch.cat((cls_tokens, x), dim=1) - - return x - - -class VitMlpHead(MegatronModule): - """Pooler layer. - - Pool hidden states of a specific token (for example start of the - sequence) and add a linear transformation followed by a tanh. - - Arguments: - hidden_size: hidden size - init_method: weight initialization method for the linear layer. - bias is set to zero. - """ - - def __init__(self, hidden_size, num_classes): - super(VitMlpHead, self).__init__() - self.dense_in = torch.nn.Linear(hidden_size, hidden_size) - self.relu = torch.nn.ReLU() - self.dense_out = torch.nn.Linear(hidden_size, num_classes) - torch.nn.init.constant_(self.dense_out.bias, -10) - - def forward(self, hidden_states): - # hidden_states: [b, 1, h] - # sequence_index: index of the token to pool. - dense_in_result = self.dense_in(hidden_states) - tanh_result = torch.tanh(dense_in_result) - dense_out_result = self.dense_out(tanh_result) - return dense_out_result - - -def isPerfectSquare(x): - if x >= 0: - sr = math.sqrt(x) - return int(sr) * int(sr) == x - return False - - -def twod_interpolate_position_embeddings_hook( - model_cfg, - class_token_present, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, -): - num_patches_per_dim_h = model_cfg.img_h // model_cfg.patch_dim - num_patches_per_dim_w = model_cfg.img_w // model_cfg.patch_dim - num_patches = num_patches_per_dim_h * num_patches_per_dim_w - hidden_size = model_cfg.hidden_size - class_token_length = model_cfg.get("class_token_length", 8) - - key = prefix + "weight" - - assert key in state_dict, f"{key} not in {state_dict.keys()}" - if key in state_dict: - input_param = state_dict[key] - - input_seq_len = input_param.shape[0] - assert isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - class_token_length) - input_has_class_token = not isPerfectSquare(input_seq_len) - num_tok_input = input_seq_len - class_token_length if input_has_class_token else input_seq_len - num_tok_output = num_patches - output_has_class_token = class_token_present - - # update input_param and load it to state_dict[key] - if input_has_class_token: - input_param_tok = input_param[:class_token_length, :] - input_param_grid = input_param[class_token_length:, :] - else: - input_param_tok = torch.zeros(class_token_length, hidden_size, device=input_param.device) - input_param_grid = input_param - - assert input_param.shape[1] == hidden_size - - if num_tok_input != num_tok_output: - gs_input = int(math.sqrt(num_tok_input)) - gs_new = (num_patches_per_dim_h, num_patches_per_dim_w) - - input_param_grid = input_param_grid.transpose(0, 1).contiguous() - input_param_grid = input_param_grid.reshape((1, -1, gs_input, gs_input)) - input_param_grid = input_param_grid.float() - scale_factor = (gs_new[0] / gs_input, gs_new[1] / gs_input) - - input_param_grid = F.interpolate(input_param_grid, scale_factor=scale_factor, mode="bilinear") - - input_param_grid = input_param_grid.half() - input_param_grid = input_param_grid.reshape((-1, num_tok_output)) - input_param_grid = input_param_grid.transpose(0, 1).contiguous() - - assert input_param_grid.shape[1] == hidden_size - - input_param = input_param_grid - assert input_param.shape[0] == num_tok_output and input_param.shape[1] == hidden_size - - if output_has_class_token: - input_param = torch.cat((input_param_tok, input_param), dim=0) - - state_dict[key] = input_param - - -class VitBackbone(MegatronModule): - """Vision Transformer Model.""" - - def __init__( - self, - model_cfg, - model_parallel_config, - init_method=None, - scaled_init_method=None, - pre_process=True, - post_process=True, - class_token=True, - single_token_output=False, - ): - super(VitBackbone, self).__init__(share_token_embeddings=False) - - self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy - num_layers = model_cfg.num_layers - init_method_std = model_cfg.init_method_std - if init_method is None: - init_method = init_method_normal(init_method_std) - if scaled_init_method is None: - scaled_init_method = scaled_init_method_normal(init_method_std, num_layers) - - self.pre_process = pre_process - self.post_process = post_process - self.class_token = class_token - self.hidden_size = model_cfg.hidden_size - self.patch_dim = model_cfg.patch_dim - self.img_h = model_cfg.img_h - self.img_w = model_cfg.img_w - self.single_token_output = single_token_output - self.drop_patch_rate = model_cfg.get("drop_patch_rate", 0.0) - self.drop_path_rate = model_cfg.get("drop_path_rate", 0.0) - preprocess_layernorm = model_cfg.get("preprocess_layernorm", False) - - assert self.img_h % self.patch_dim == 0 - assert self.img_w % self.patch_dim == 0 - self.num_patches_per_dim_h = self.img_h // self.patch_dim - self.num_patches_per_dim_w = self.img_w // self.patch_dim - self.num_patches = self.num_patches_per_dim_h * self.num_patches_per_dim_w - class_token_length = model_cfg.get("class_token_length", 8) - self.seq_length = self.num_patches + (class_token_length if self.class_token else 0) - self.flatten_dim = self.patch_dim * self.patch_dim * model_cfg.num_channels - self.input_tensor = None - self.position_ids = None - self.preprocess_layernorm = None - - if self.pre_process: - # cls_token - if self.class_token: - self.cls_token = torch.nn.Parameter(torch.randn(1, class_token_length, self.hidden_size)) - torch.nn.init.zeros_(self.cls_token) - self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda() - - # Linear encoder - self.linear_encoder = torch.nn.Linear(self.flatten_dim, self.hidden_size) - - # embedding - self.position_embedding_type = model_cfg.get("position_embedding_type", "learned_absolute") - - if self.position_embedding_type == "learned_absolute": - self.position_embeddings = torch.nn.Embedding(self.seq_length, self.hidden_size) - init_method_normal(model_cfg.init_method_std)(self.position_embeddings.weight) - - class_token_present = self.class_token - self.position_embeddings._register_load_state_dict_pre_hook( - partial(twod_interpolate_position_embeddings_hook, model_cfg, class_token_present) - ) - elif self.position_embedding_type == "learned_parameters": - self.position_embeddings = torch.nn.Parameter(torch.empty(self.seq_length, self.hidden_size)) - init_method_normal(model_cfg.init_method_std)(self.position_embeddings) - else: - raise ValueError(f"Unrecognized positional embedding type {self.position_embedding_type}!") - - self.embedding_dropout = torch.nn.Dropout(model_cfg.hidden_dropout) - self.drop_patch = DropPatch( - self.drop_patch_rate, class_token_length=class_token_length, exclude_cls_tokens=self.class_token - ) - - if preprocess_layernorm: - self.preprocess_layernorm = get_layer_norm( - model_cfg.hidden_size, - model_cfg.layernorm_epsilon, - model_cfg.persist_layer_norm, - sequence_parallel=model_cfg.sequence_parallel, - ) - - self.transformer = ParallelVisionTransformer( - config=model_parallel_config, - init_method=init_method, - output_layer_init_method=scaled_init_method, - num_layers=model_cfg.num_layers, - hidden_size=model_cfg.hidden_size, - num_attention_heads=model_cfg.num_attention_heads, - apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, - kv_channels=model_cfg.kv_channels, - ffn_hidden_size=model_cfg.ffn_hidden_size, - # self_attn_mask_type=self.encoder_attn_mask_type, # TODO (yuya) - pre_process=self.pre_process, - post_process=self.post_process, - precision=model_cfg.precision, - fp32_residual_connection=model_cfg.fp32_residual_connection, - activations_checkpoint_method=model_cfg.activations_checkpoint_method, - activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, - normalization=model_cfg.normalization, - layernorm_epsilon=model_cfg.layernorm_epsilon, - hidden_dropout=model_cfg.hidden_dropout, - attention_dropout=model_cfg.attention_dropout, - drop_path_rate=model_cfg.drop_path_rate, - bias_activation_fusion=model_cfg.get("bias_activation_fusion", False), - persist_layer_norm=model_cfg.persist_layer_norm, - openai_gelu=model_cfg.openai_gelu, - onnx_safe=model_cfg.onnx_safe, - masked_softmax_fusion=model_cfg.masked_softmax_fusion, - megatron_legacy=model_cfg.megatron_legacy, - activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, - activation=model_cfg.get('activation', 'gelu'), - ub_tp_comm_overlap=model_cfg.get('ub_tp_comm_overlap', False), - use_flash_attention=model_cfg.get('use_flash_attention', False), - ) - - def set_input_tensor(self, input_tensor): - """See megatron.model.transformer.set_input_tensor()""" - self.transformer.set_input_tensor(input_tensor) - - def forward(self, input): - - if self.pre_process: - rearranged_input = einops.rearrange( - input, "b c (h p1) (w p2) -> b (h w) (p1 p2 c)", p1=self.patch_dim, p2=self.patch_dim, - ) - - # [b num_patch patch_dim*patch_dim*c] -> [b, s, h]; s:=num_patch, h:=hidden - encoder_output = self.linear_encoder(rearranged_input) - - concatenated_tokens = encoder_output - if self.class_token: - cls_tokens = self.cls_token.expand(encoder_output.shape[0], -1, -1) - concatenated_tokens = torch.cat((cls_tokens, encoder_output), dim=1) - - if self.position_embedding_type == "learned_absolute": - token_embeddings = concatenated_tokens + self.position_embeddings( - self.position_ids[:, : concatenated_tokens.shape[1]] - ) - elif self.position_embedding_type == "learned_parameters": - token_embeddings = concatenated_tokens + self.position_embeddings - - # a patch_dropout of 0. would mean it is disabled and this function would do nothing but return what was passed in - token_embeddings = self.drop_patch(token_embeddings) - - if self.preprocess_layernorm is not None: - token_embeddings = self.preprocess_layernorm(token_embeddings) - - # [b s h] => [s b h] - token_embeddings = token_embeddings.transpose(0, 1).contiguous() - hidden_states = self.embedding_dropout(token_embeddings) - else: - hidden_states = input - - # 0 represents masking, 1 represents not masking - # attention_mask = torch.zeros( - # [1, 1, hidden_states.shape[0], hidden_states.shape[0]], - # device=hidden_states.device, - # dtype=torch.bool, - # ) - hidden_states = self.transformer(hidden_states, None) - - if self.post_process: - # [s b h] => [b s h] - if self.single_token_output: - hidden_states = hidden_states[0] - else: - hidden_states = hidden_states.transpose(0, 1).contiguous() - - return hidden_states diff --git a/nemo/collections/vision/parts/__init__.py b/nemo/collections/vision/parts/__init__.py deleted file mode 100644 index 2db92b257416..000000000000 --- a/nemo/collections/vision/parts/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/requirements/requirements_vision.txt b/requirements/requirements_vision.txt deleted file mode 100644 index d3d4b29db16f..000000000000 --- a/requirements/requirements_vision.txt +++ /dev/null @@ -1,8 +0,0 @@ -boto3 -einops -flask_restful -ftfy -gdown -matplotlib>=3.3.2 -nltk>=3.6.5 -numpy diff --git a/scripts/fid-eval-text2img/TFinception_V3.py b/scripts/fid-eval-text2img/TFinception_V3.py deleted file mode 100644 index 6cb212f73ab0..000000000000 --- a/scripts/fid-eval-text2img/TFinception_V3.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# share: outside-ok - -""" -Modified from -https://github.com/mseitzer/pytorch-fid - -Code adapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead -of Tensorflow -Copyright 2018 Institute of Bioinformatics, JKU Linz -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import torch -import torch.nn.functional as F -from torch import nn - -# from imaginaire.utils.misc import apply_imagenet_normalization - - -try: - from torchvision.models.utils import load_state_dict_from_url -except ImportError: - from torch.utils.model_zoo import load_url as load_state_dict_from_url - -from torchvision.models import inception, inception_v3, vgg16 - -# Inception weights ported to Pytorch from -# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz -FID_WEIGHTS_URL = ( - 'https://github.com/mseitzer/pytorch-fid/releases' '/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' -) - - -class SwAV(nn.Module): - def __init__(self): - super().__init__() - self.model = torch.hub.load('facebookresearch/swav', 'resnet50', pretrained=True) - self.model.fc = torch.nn.Sequential() - - def forward(self, x, align_corners=True): - y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) - return y - - -class Vgg16(nn.Module): - def __init__(self): - super().__init__() - self.model = vgg16(pretrained=True, init_weights=False) - self.model.classifier = torch.nn.Sequential(*[self.model.classifier[i] for i in range(4)]) - - def forward(self, x, align_corners=True): - y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) - return y - - -class InceptionV3(nn.Module): - def __init__(self): - super().__init__() - self.model = inception_v3(transform_input=False, pretrained=True, init_weights=False) - self.model.fc = torch.nn.Sequential() - - def forward(self, x, align_corners=True): - y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) - return y - - -class TFInceptionV3(nn.Module): - def __init__(self): - super().__init__() - self.model = inception_v3( - transform_input=False, num_classes=1008, aux_logits=False, pretrained=False, init_weights=False - ) - self.model.Mixed_5b = FIDInceptionA(192, pool_features=32) - self.model.Mixed_5c = FIDInceptionA(256, pool_features=64) - self.model.Mixed_5d = FIDInceptionA(288, pool_features=64) - self.model.Mixed_6b = FIDInceptionC(768, channels_7x7=128) - self.model.Mixed_6c = FIDInceptionC(768, channels_7x7=160) - self.model.Mixed_6d = FIDInceptionC(768, channels_7x7=160) - self.model.Mixed_6e = FIDInceptionC(768, channels_7x7=192) - self.model.Mixed_7b = FIDInceptionE_1(1280) - self.model.Mixed_7c = FIDInceptionE_2(2048) - - state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True, map_location='cpu') - self.model.load_state_dict(state_dict) - self.model.fc = torch.nn.Sequential() - - def forward(self, x, align_corners=True): - # x = apply_imagenet_normalization(x) - y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) - return y - - -class FIDInceptionA(inception.InceptionA): - """InceptionA block patched for FID computation""" - - def __init__(self, in_channels, pool_features): - super(FIDInceptionA, self).__init__(in_channels, pool_features) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch5x5 = self.branch5x5_1(x) - branch5x5 = self.branch5x5_2(branch5x5) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) - - # Patch: Tensorflow's average pool does not use the padded zero's in - # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) - - -class FIDInceptionC(inception.InceptionC): - """InceptionC block patched for FID computation""" - - def __init__(self, in_channels, channels_7x7): - super(FIDInceptionC, self).__init__(in_channels, channels_7x7) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch7x7 = self.branch7x7_1(x) - branch7x7 = self.branch7x7_2(branch7x7) - branch7x7 = self.branch7x7_3(branch7x7) - - branch7x7dbl = self.branch7x7dbl_1(x) - branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) - - # Patch: Tensorflow's average pool does not use the padded zero's in - # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] - return torch.cat(outputs, 1) - - -class FIDInceptionE_1(inception.InceptionE): - """First InceptionE block patched for FID computation""" - - def __init__(self, in_channels): - super(FIDInceptionE_1, self).__init__(in_channels) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch3x3 = self.branch3x3_1(x) - branch3x3 = [ - self.branch3x3_2a(branch3x3), - self.branch3x3_2b(branch3x3), - ] - branch3x3 = torch.cat(branch3x3, 1) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = [ - self.branch3x3dbl_3a(branch3x3dbl), - self.branch3x3dbl_3b(branch3x3dbl), - ] - branch3x3dbl = torch.cat(branch3x3dbl, 1) - - # Patch: Tensorflow's average pool does not use the padded zero's in - # its average calculation - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) - - -class FIDInceptionE_2(inception.InceptionE): - """Second InceptionE block patched for FID computation""" - - def __init__(self, in_channels): - super(FIDInceptionE_2, self).__init__(in_channels) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch3x3 = self.branch3x3_1(x) - branch3x3 = [ - self.branch3x3_2a(branch3x3), - self.branch3x3_2b(branch3x3), - ] - branch3x3 = torch.cat(branch3x3, 1) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = [ - self.branch3x3dbl_3a(branch3x3dbl), - self.branch3x3dbl_3b(branch3x3dbl), - ] - branch3x3dbl = torch.cat(branch3x3dbl, 1) - - # Patch: The FID Inception model uses max pooling instead of average - # pooling. This is likely an error in this specific Inception - # implementation, as other Inception models use average pooling here - # (which matches the description in the paper). - branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) diff --git a/scripts/fid-eval-text2img/compute_clip_score.py b/scripts/fid-eval-text2img/compute_clip_score.py deleted file mode 100644 index da587a9c3c32..000000000000 --- a/scripts/fid-eval-text2img/compute_clip_score.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -python clip_script.py --captions_path /path/to/coco2014_val/captions \ - --fid_images_path /path/to/synthetic_images \ - --output_path /path/to/output/clip_scores.csv - -1. `--captions_path`: The path to the real images captions directory. In this example, - it is set to `/path/to/coco2014_val/captions`. This path should point to the - directory containing the COCO 2014 validation dataset captions. - -2. `--fid_images_path`: The path to the directory containing subfolders with synthetic - images. In this example, it is set to `/path/to/synthetic_images`. Each subfolder - should contain a set of synthetic images for which you want to compute CLIP scores - against the captions from `--captions_path`. - -3. `--output_path`: The path to the output CSV file where the CLIP scores will be saved. - In this example, it is set to `/path/to/output/clip_scores.csv`. This file will - contain a table with two columns: `cfg` and `clip_score`. The `cfg` - column lists the names of the subfolders in `--fid_images_path`, and the - `clip_score` column lists the corresponding average CLIP scores between the synthetic - images in each subfolder and the captions from `--captions_path`. -""" - -import argparse -import csv -import os -from glob import glob - -import open_clip -import torch -import torch.nn as nn -from PIL import Image -from tqdm import tqdm - - -class CLIPEncoder(nn.Module): - def __init__(self, clip_version='ViT-B/32', pretrained='', cache_dir=None, device='cuda'): - super().__init__() - - self.clip_version = clip_version - if not pretrained: - if self.clip_version == 'ViT-H-14': - self.pretrained = 'laion2b_s32b_b79k' - elif self.clip_version == 'ViT-g-14': - self.pretrained = 'laion2b_s12b_b42k' - else: - self.pretrained = 'openai' - - self.model, _, self.preprocess = open_clip.create_model_and_transforms( - self.clip_version, pretrained=self.pretrained, cache_dir=cache_dir - ) - - self.model.eval() - self.model.to(device) - - self.device = device - - @torch.no_grad() - def get_clip_score(self, text, image): - if isinstance(image, str): # filenmae - image = Image.open(image) - if isinstance(image, Image.Image): # PIL Image - image = self.preprocess(image).unsqueeze(0).to(self.device) - image_features = self.model.encode_image(image).float() - image_features /= image_features.norm(dim=-1, keepdim=True) - - if not isinstance(text, (list, tuple)): - text = [text] - text = open_clip.tokenize(text).to(self.device) - text_features = self.model.encode_text(text).float() - text_features /= text_features.norm(dim=-1, keepdim=True) - similarity = image_features @ text_features.T - - return similarity - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--captions_path', default='/coco2014/coco2014_val_sampled_30k/captions/', type=str) - parser.add_argument('--fid_images_path', default=None, type=str) - parser.add_argument('--output_path', default='./clip_scores.csv', type=str) - args = parser.parse_args() - - captions_path = args.captions_path - print('Init CLIP Encoder..') - encoder = CLIPEncoder(clip_version='ViT-L-14') - - # Create output CSV file - with open(args.output_path, 'w', newline='') as csvfile: - fieldnames = ['cfg', 'clip_score'] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - - # Iterate through subfolders in fid_images_path - for subfolder in os.listdir(args.fid_images_path): - subfolder_path = os.path.join(args.fid_images_path, subfolder) - if os.path.isdir(subfolder_path): - images = sorted( - glob(f'{subfolder_path}/*.png'), key=lambda x: (int(x.split('/')[-1].strip('.png').split('_')[1])) - ) - texts = sorted(glob(f'{captions_path}/*.txt')) - print(images[:5], texts[:5]) - assert len(images) == len(texts) - print(f'Number of images text pairs: {len(images)}') - - ave_sim = 0.0 - count = 0 - for text, img in zip(tqdm(texts), images): - with open(text, 'r') as f: - text = f.read().strip() - sim = encoder.get_clip_score(text, img) - ave_sim += sim - count += 1 - if count % 2000 == 0: - print(ave_sim / count) - - ave_sim /= count - print(f'The CLIP similarity for CFG {subfolder}: {ave_sim}') - - # Write CLIP score to output CSV file - writer.writerow({'cfg': subfolder, 'clip_score': ave_sim}) diff --git a/scripts/fid-eval-text2img/compute_fid.py b/scripts/fid-eval-text2img/compute_fid.py deleted file mode 100644 index cbeb81e1e4a7..000000000000 --- a/scripts/fid-eval-text2img/compute_fid.py +++ /dev/null @@ -1,409 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import os - -import numpy as np -import torch -import torch.distributed as dist -import torch.nn.functional as F -from scipy import linalg -from TFinception_V3 import InceptionV3, SwAV, TFInceptionV3, Vgg16 -from torch import nn - - -def network_init(network='inception'): - # inception = inception_v3(pretrained=True, transform_input=False) - # inception = inception.to('cuda') - # inception.eval() - # inception.fc = torch.nn.Sequential() - - if dist.is_initialized() and not is_local_master(): - # Make sure only the first process in distributed training downloads - # the model, and the others will use the cache - # noinspection PyUnresolvedReferences - torch.distributed.barrier() - - if network == 'tf_inception': - model = TFInceptionV3() - elif network == 'inception': - model = InceptionV3() - elif network == 'vgg16': - model = Vgg16() - elif network == 'swav': - model = SwAV() - elif network == 'clean_inception': - model = CleanInceptionV3() - else: - raise NotImplementedError(f'Network "{network}" is not supported!') - - if dist.is_initialized() and is_local_master(): - # Make sure only the first process in distributed training downloads - # the model, and the others will use the cache - # noinspection PyUnresolvedReferences - dist.barrier() - - model = model.to('cuda').eval() - return model - - -def _calculate_frechet_distance(act_1, act_2, eps=1e-6): - mu1 = np.mean(act_1.cpu().numpy(), axis=0) - sigma1 = np.cov(act_1.cpu().numpy(), rowvar=False) - mu2 = np.mean(act_2.cpu().numpy(), axis=0) - sigma2 = np.cov(act_2.cpu().numpy(), rowvar=False) - mu1 = np.atleast_1d(mu1) - mu2 = np.atleast_1d(mu2) - sigma1 = np.atleast_2d(sigma1) - sigma2 = np.atleast_2d(sigma2) - assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths' - assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions' - diff = mu1 - mu2 - # Product might be almost singular - covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) - if not np.isfinite(covmean).all(): - msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps - print(msg) - offset = np.eye(sigma1.shape[0]) * eps - covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) - - # Numerical error might give slight imaginary component - if np.iscomplexobj(covmean): - if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): - m = np.max(np.abs(covmean.imag)) - print('Imaginary component {}'.format(m)) - # raise ValueError('Imaginary component {}'.format(m)) - covmean = covmean.real - tr_covmean = np.trace(covmean) - return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean)} - - -def is_master(): - r"""check if current process is the master""" - return get_rank() == 0 - - -def get_rank(): - r"""Get rank of the thread.""" - rank = 0 - if dist.is_available(): - if dist.is_initialized(): - rank = dist.get_rank() - return rank - - -def is_local_master(): - return torch.cuda.current_device() == 0 - - -def load_or_compute_activations( - act_path, - data_loader, - key_real, - key_fake, - generator=None, - sample_size=None, - preprocess=None, - is_video=False, - few_shot_video=False, - network='inception', - **kwargs, -): - r"""Load mean and covariance from saved npy file if exists. Otherwise, compute the mean and covariance. - - Args: - act_path (str or None): Location for the numpy file to store or to load the activations. - data_loader (obj): PyTorch dataloader object. - key_real (str): Dictionary key value for the real data. - key_fake (str): Dictionary key value for the fake data. - generator (obj): PyTorch trainer network. - sample_size (int): How many samples to be used for computing the KID. - preprocess (func): The preprocess function to be applied to the data. - is_video (bool): Whether we are handling video sequences. - few_shot_video (bool): If ``True``, uses few-shot video synthesis. - network (str): Which recognition backbone to use. - Returns: - (torch.Tensor) Feature activations. - """ - if act_path is not None and os.path.exists(act_path): - # Loading precomputed activations. - print('Load activations from {}'.format(act_path)) - act = torch.load(act_path, map_location='cpu').cuda() - else: - # Compute activations. - if is_video: - act = get_video_activations( - data_loader, key_real, key_fake, generator, sample_size, preprocess, few_shot_video, network, **kwargs - ) - else: - act = get_activations( - data_loader, key_real, key_fake, generator, sample_size, preprocess, True, network, **kwargs - ) - if act_path is not None and is_local_master(): - print('Save activations to {}'.format(act_path)) - if not os.path.exists(os.path.dirname(act_path)): - os.makedirs(os.path.dirname(act_path), exist_ok=True) - torch.save(act, act_path) - return act - - -@torch.no_grad() -def compute_fid( - fid_path, - data_loader, - net_G, - key_real='images', - key_fake='fake_images', - sample_size=None, - preprocess=None, - return_act=False, - is_video=False, - few_shot_video=False, - **kwargs, -): - r"""Compute the fid score. - - Args: - fid_path (str): Location for the numpy file to store or to load the statistics. - data_loader (obj): PyTorch dataloader object. - net_G (obj): For image generation modes, net_G is the generator network. - For video generation models, net_G is the trainer. - key_real (str): Dictionary key value for the real data. - key_fake (str): Dictionary key value for the fake data. - sample_size (int or tuple): How many samples to be used. - preprocess (func): The preprocess function to be applied to the data. - return_act (bool): If ``True``, also returns feature activations of - real and fake data. - is_video (bool): Whether we are handling video sequences. - few_shot_video (bool): If ``True``, uses few-shot video synthesis. - Returns: - (float): FID value. - """ - print('Computing FID.') - act_path = os.path.join(os.path.dirname(fid_path), 'activations_real.npy') - # Get the fake mean and covariance. - fake_act = load_or_compute_activations( - None, - data_loader, - key_real, - key_fake, - net_G, - sample_size, - preprocess, - is_video=is_video, - few_shot_video=few_shot_video, - **kwargs, - ) - - # Get the ground truth mean and covariance. - real_act = load_or_compute_activations( - act_path, - data_loader, - key_real, - key_fake, - None, - sample_size, - preprocess, - is_video=is_video, - few_shot_video=few_shot_video, - **kwargs, - ) - - if is_master(): - fid = _calculate_frechet_distance(fake_act, real_act)["FID"] - if return_act: - return fid, real_act, fake_act - else: - return fid - elif return_act: - return None, None, None - else: - return None - - -def get_world_size(): - r"""Get world size. How many GPUs are available in this job.""" - world_size = 1 - if dist.is_available(): - if dist.is_initialized(): - world_size = dist.get_world_size() - return world_size - - -def dist_all_gather_tensor(tensor): - r""" gather to all ranks """ - world_size = get_world_size() - if world_size < 2: - return [tensor] - tensor_list = [torch.ones_like(tensor) for _ in range(dist.get_world_size())] - with torch.no_grad(): - dist.all_gather(tensor_list, tensor) - return tensor_list - - -def to_device(data, device): - r"""Move all tensors inside data to device. - - Args: - data (dict, list, or tensor): Input data. - device (str): 'cpu' or 'cuda'. - """ - assert device in ['cpu', 'cuda'] - string_classes = (str, bytes) - if isinstance(data, torch.Tensor): - data = data.to(torch.device(device)) - return data - elif isinstance(data, collections.abc.Mapping): - return type(data)({key: to_device(data[key], device) for key in data}) - elif isinstance(data, collections.abc.Sequence) and not isinstance(data, string_classes): - return type(data)([to_device(d, device) for d in data]) - else: - return data - - -def to_cuda(data): - r"""Move all tensors inside data to gpu. - - Args: - data (dict, list, or tensor): Input data. - """ - return to_device(data, 'cuda') - - -@torch.no_grad() -def get_activations( - data_loader, - key_real, - key_fake, - generator=None, - sample_size=None, - preprocess=None, - align_corners=True, - network='inception', - **kwargs, -): - r"""Compute activation values and pack them in a list. - - Args: - data_loader (obj): PyTorch dataloader object. - key_real (str): Dictionary key value for the real data. - key_fake (str): Dictionary key value for the fake data. - generator (obj): PyTorch trainer network. - sample_size (int): How many samples to use for FID. - preprocess (func): Pre-processing function to use. - align_corners (bool): The ``'align_corners'`` parameter to be used for `torch.nn.functional.interpolate`. - network (str): Which recognition backbone to use. - Returns: - batch_y (tensor): Inception features of the current batch. Note that only the master gpu will get it. - """ - model = network_init(network) - batch_y = [] - world_size = get_world_size() - - # Iterate through the dataset to compute the activation. - for it, data in enumerate(data_loader): - data = to_cuda(data) - # Preprocess the data. - if preprocess is not None: - data = preprocess(data) - # Load real data if the generator is not specified. - if generator is None: - images = data[key_real] - if torch.max(images) > 1: - images = images / 255.0 # convert RGB to (0,1) - else: - # Compute the generated image. - text = data[1]['caption'] ### input is captions - net_G_output = generator(text, **kwargs) - images = net_G_output - # Clamp the image for models that do not set the output to between - # -1, 1. For models that employ tanh, this has no effect. - images.clamp_(-1, 1) - y = model(images, align_corners=align_corners) - # y = network_forward(model, images, align_corners=align_corners) - batch_y.append(y) - if sample_size is not None and data_loader.batch_size * world_size * (it + 1) >= sample_size: - # Reach the number of samples we need. - break - - batch_y = torch.cat(dist_all_gather_tensor(torch.cat(batch_y))) - if sample_size is not None: - batch_y = batch_y[:sample_size] - print(f"Computed feature activations of size {batch_y.shape}") - return batch_y - - -@torch.no_grad() -def compute_fid_data( - folder_to_store_real_act, - data_loader_a, - data_loader_b, - key_a='images', - key_b='images', - sample_size=None, - is_video=False, - few_shot_video=False, - network='inception', - **kwargs, -): - r"""Compute the fid score between two datasets. - - Args: - folder_to_store_real_act (str): Location to store the statistics or to load the statistics. - data_loader_a (obj): PyTorch dataloader object for dataset a. - data_loader_b (obj): PyTorch dataloader object for dataset b. - key_a (str): Dictionary key value for images in the dataset a. - key_b (str): Dictionary key value for images in the dataset b. - sample_size (int or None): How many samples to be used for computing the FID. - is_video (bool): Whether we are handling video sequences. - few_shot_video (bool): If ``True``, uses few-shot video synthesis. - network (str): Which recognition backbone to use. - Returns: - (float): FID value. - """ - print('Computing FID.') - if folder_to_store_real_act is None: - path_a = None - else: - path_a = os.path.join(os.path.dirname(folder_to_store_real_act), 'activations_a.npy') - # min_data_size = min(len(data_loader_a.dataset), len(data_loader_b.dataset)) - # sample_size = min_data_size if sample_size is None else min(sample_size, min_data_size) - - act_a = load_or_compute_activations( - path_a, - data_loader_a, - key_a, - key_b, - None, - sample_size=sample_size, - is_video=is_video, - few_shot_video=few_shot_video, - network=network, - **kwargs, - ) - act_b = load_or_compute_activations( - None, - data_loader_b, - key_a, - key_b, - None, - sample_size=sample_size, - is_video=is_video, - few_shot_video=few_shot_video, - network=network, - **kwargs, - ) - print(act_a.shape, act_b.shape) - if is_master(): - return _calculate_frechet_distance(act_a, act_b)["FID"] diff --git a/scripts/fid-eval-text2img/eval_fid.py b/scripts/fid-eval-text2img/eval_fid.py deleted file mode 100644 index d6312fad843a..000000000000 --- a/scripts/fid-eval-text2img/eval_fid.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Example usage: - python eval_fid.py \ - --coco_images_path /path/to/coco2014_val \ - --fid_images_path /path/to/synthetic_images \ - --output_path /path/to/output/fid_scores.csv - -1. `--coco_images_path`: The path to the real images directory. In this example, - it is set to `/path/to/coco2014_val`. This path should point to the - directory containing the COCO 2014 validation dataset images, resized - to 256x256 pixels. - -2. `--fid_images_path`: The path to the directory containing subfolders - with synthetic images. In this example, it is set to - `/path/to/synthetic_images`. Each subfolder should contain a - set of synthetic images for which you want to compute FID scores - against the real images from `--coco_images_path`. - -3. `--output_path`: The path to the output CSV file where the FID scores - will be saved. In this example, it is set to - `/path/to/output/fid_scores.csv`. This file will contain a table with - two columns: `cfg` and `fid`. The `cfg` column lists the - names of the subfolders in `--fid_images_path`, and the `fid` column - lists the corresponding FID scores between the synthetic images in - each subfolder and the real images from `--coco_images_path`. -""" - -import argparse -import csv -import os -import torch - -from compute_fid import compute_fid_data -from fid_dataset import CustomDataset - -if __name__ == '__main__': - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument('--coco_images_path', default='/coco2014/coco2014_val/images_256', type=str) - parser.add_argument('--fid_images_path', default=None, type=str) - parser.add_argument('--output_path', default='./fid_scores.csv', type=str) - args = parser.parse_args() - - # Set paths for synthetic images and real images - fid_images_path = args.fid_images_path - real_path = args.coco_images_path - - # Create dataset and data loader for real images - real_dataset = CustomDataset(real_path) - loader_real = torch.utils.data.DataLoader( - real_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False - ) - - # Create output CSV file - with open(args.output_path, 'w', newline='') as csvfile: - fieldnames = ['cfg', 'fid'] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - - # Iterate through subfolders in fid_images_path - for subfolder in os.listdir(fid_images_path): - subfolder_path = os.path.join(fid_images_path, subfolder) - if os.path.isdir(subfolder_path): - # Create dataset and data loader for synthetic images in subfolder - synthetic_dataset = CustomDataset(subfolder_path, target_size=256) - loader_synthetic = torch.utils.data.DataLoader( - synthetic_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False - ) - - # Compute FID score between synthetic images in subfolder and real images - fid = compute_fid_data( - './', - loader_real, - loader_synthetic, - key_a=0, - key_b=0, - sample_size=None, - is_video=False, - few_shot_video=False, - network='tf_inception', - interpolation_mode='bilinear', - ) - - print(f"The FID score between {subfolder_path} and {real_path} is {fid}") - - # Write FID score to output CSV file - writer.writerow({'cfg': subfolder, 'fid': fid}) diff --git a/scripts/fid-eval-text2img/fid_dataset.py b/scripts/fid-eval-text2img/fid_dataset.py deleted file mode 100644 index 6da1db7cd00c..000000000000 --- a/scripts/fid-eval-text2img/fid_dataset.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import numpy as np -import torch -import torch.utils.data as data -import torchvision.transforms as transforms -from PIL import Image -from pycocotools.coco import COCO -from torchvision.io import ImageReadMode, read_image - - -def _pil_interp(method): - if method == 'bicubic': - return Image.BICUBIC - elif method == 'lanczos': - return Image.LANCZOS - elif method == 'hamming': - return Image.HAMMING - else: - # default bilinear, do we want to allow nearest? - return Image.BILINEAR - - -def _size_tuple(size): - if isinstance(size, int): - return size, size - else: - assert len(size) == 2 - return size - - -class CenterCropResize: - def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)): - self.target_size = _size_tuple(target_size) - self.interpolation = interpolation - self.fill_color = fill_color - - def __call__(self, img): - w, h = img.size - img = np.array(img).astype(np.uint8) - crop = min(w, h) - img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] - image = Image.fromarray(img) - if self.target_size is not None: - interp_method = _pil_interp(self.interpolation) - new_img = image.resize(self.target_size, resample=interp_method) - return new_img - - -class CustomDataset(data.Dataset): - def __init__(self, root, target_size=None): - self.root = root - self.files = [f for f in os.listdir(self.root) if os.path.isfile(os.path.join(self.root, f))] - self.transform = transforms.ToTensor() - self.target_size = target_size - - def __len__(self): - return len(self.files) - - def __getitem__(self, index): - file = self.files[index] - # image = read_image(os.path.join(self.root, file), mode=ImageReadMode.RGB).type(torch.float32) / 255 - image = Image.open(os.path.join(self.root, file)).convert('RGB') - if self.target_size is not None: - image = image.resize((self.target_size, self.target_size), resample=Image.BICUBIC) - image = self.transform(image) - image = 2 * image - 1 - return image, file - - -class CocoDataset(data.Dataset): - def __init__(self, root, ann_file, captions, transform=None, target_size=None): - self.root = root - self.coco = None - self.captions = captions - self.img_ids = [x['image_id'] for x in self.captions] - self.has_annotations = 'image_info' not in ann_file - self.transforms = [transforms.ToTensor()] - if transform is not None: - self.transforms.append(transform) - self.target_size = target_size - self.img_ids_invalid = [] - self.img_infos = [] - self._load_annotations(ann_file) - - def _load_annotations(self, ann_file): - assert self.coco is None - self.coco = COCO(ann_file) - img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) - for img_id in self.img_ids: - info = self.coco.loadImgs([img_id])[0] - valid_annotation = not self.has_annotations or img_id in img_ids_with_ann - if valid_annotation and min(info['width'], info['height']) >= 32: - self.img_infos.append(info) - else: - self.img_ids_invalid.append(img_id) - - def __len__(self): - return len(self.img_infos) - - def _compose(self, image): - for t in self.transforms[::-1]: - image = t(image) - return image - - def __getitem__(self, index): - img_id = self.img_ids[index] - img_info = self.img_infos[index] - cap = self.captions[index] - path = img_info['file_name'] - image = Image.open(os.path.join(self.root, path)).convert('RGB') - if self.target_size is not None: - image = image.resize((512, 512)) - image = self._compose(image) - return image, cap diff --git a/scripts/fid-eval-text2img/plot.py b/scripts/fid-eval-text2img/plot.py deleted file mode 100644 index e9217f4d6e72..000000000000 --- a/scripts/fid-eval-text2img/plot.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -python plot_fid_vs_clip.py \ - --fid_scores_csv path/to/fid_scores.csv \ - --clip_scores_csv path/to/clip_scores.csv -Replace path/to/fid_scores.csv and path/to/clip_scores.csv with the paths -to the respective CSV files. The script will display the plot with FID -scores against CLIP scores, with cfg values annotated on each point. -""" - -import argparse - -import matplotlib.pyplot as plt -import pandas as pd - - -def plot_fid_vs_clip(fid_scores_csv, clip_scores_csv): - fid_scores = pd.read_csv(fid_scores_csv) - clip_scores = pd.read_csv(clip_scores_csv) - merged_data = pd.merge(fid_scores, clip_scores, on='cfg') - - fig, ax = plt.subplots() - ax.plot(merged_data['clip_score'], merged_data['fid'], marker='o', linestyle='-') # Connect points with a line - - for i, txt in enumerate(merged_data['cfg']): - ax.annotate(txt, (merged_data['clip_score'][i], merged_data['fid'][i])) - - ax.set_xlabel('CLIP Score') - ax.set_ylabel('FID') - ax.set_title('FID vs CLIP Score') - - plt.show() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--fid_scores_csv', required=True, type=str, help='Path to the FID scores CSV file') - parser.add_argument('--clip_scores_csv', required=True, type=str, help='Path to the CLIP scores CSV file') - args = parser.parse_args() - - plot_fid_vs_clip(args.fid_scores_csv, args.clip_scores_csv) diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py deleted file mode 100644 index 781757c5869f..000000000000 --- a/tests/collections/multimodal/test_clip_model.py +++ /dev/null @@ -1,482 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import pytest -import torch -from omegaconf import DictConfig, OmegaConf -from pytorch_lightning import Trainer - -from nemo.collections.multimodal.data.clip.clip_dataset import build_train_valid_datasets -from nemo.collections.multimodal.models.clip.megatron_clip_models import ( - CLIPModel, - CLIPTextTransformer, - CLIPVisionTransformer, - MegatronCLIPModel, -) -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy - -DEVICE_CAPABILITY = None -if torch.cuda.is_available(): - DEVICE_CAPABILITY = torch.cuda.get_device_capability() - - -@pytest.fixture() -def model_cfg(): - - model_cfg_string = """ - precision: 16 - micro_batch_size: 2 # limited by GPU memory - global_batch_size: 2 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - # multimodal configs - output_dim: 64 - local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) - gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue - - vision: - precision: 16 - # vision configs - patch_dim: 16 - img_h: 224 - img_w: 224 - image_mean: null - image_std: null - num_channels: 3 - drop_patch_rate: 0.0 - drop_path_rate: 0.0 - global_average_pool: False - output_dim: 64 - class_token_length: 8 - preprocess_layernorm: True # apply layer norm to embedded tokens - - # model architecture - encoder_seq_length: 196 - max_position_embeddings: 196 - position_embedding_type: learned_absolute - num_layers: 2 - hidden_size: 768 - ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 12 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - - text: - precision: 16 - # text configs - output_dim: 64 - - # model architecture - encoder_seq_length: 77 - max_position_embeddings: 77 - position_embedding_type: learned_absolute - num_layers: 2 - hidden_size: 512 - ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 8 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - ## Activation Checkpointing - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - activations_checkpoint_num_layers: null # not used with 'selective' - num_micro_batches_with_partial_activation_checkpoints: null - activations_checkpoint_layers_per_pipeline: null - sequence_parallel: False - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # model fusions - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - transformer_engine: False - fp8: False # enables fp8 in TransformerLayer forward - fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 - fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID - fp8_margin: 0 # scaling margin - fp8_interval: 1 # scaling update interval - fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor - fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history - use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - - tokenizer: - library: 'huggingface' - type: 'openai/clip-vit-large-patch14' - model: null - vocab_file: null - merge_file: null - delimiter: null # only used for tabular tokenizer - sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. - make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. - - data: - num_workers: 1 - dataset_type: webdataset - - train: - data_path: # List of paths to pkl files or tar files - - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_{000000..000001}.tar - drop_last: True # drop_last = False is not implemented yet - validation: # List of paths to pkl files or tar files - data_path: - - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_000002.tar - drop_last: True # drop_last = False is not implemented yet - webdataset: - object_store: False - bucket: datasets - pbss_credentials_file: pbss_credential - local_root_path: / # tar files local root path - chunk_size: 1000 # if data path is list of tar files, chunk_size needs to be provided - - imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [ 0 ] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 1e-3 - weight_decay: 0.2 - betas: - - 0.9 - - 0.98 - sched: - name: CosineAnnealing - warmup_steps: 2000 - constant_steps: 0 - min_lr: 1e-5 - """ - model_cfg = OmegaConf.create(model_cfg_string) - return model_cfg - - -@pytest.fixture() -def trainer_cfg(): - - trainer_cfg_string = """ - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 4 - log_every_n_steps: 1 - val_check_interval: 4 - limit_val_batches: 2 - limit_test_batches: 2 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False - """ - trainer_cfg = OmegaConf.create(trainer_cfg_string) - - return trainer_cfg - - -@pytest.fixture() -def exp_manager_cfg(): - - exp_manager_cfg_string = """ - explicit_log_dir: null - exp_dir: null - name: megatron_clip - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: False - resume_ignore_no_checkpoint: True - create_checkpoint_callback: False - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: 1 - ema: - enable: False - decay: 0.9999 - validate_original_weights: False - every_n_steps: 1 - cpu_offload: False - """ - exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) - - return exp_manager_cfg - - -@pytest.fixture() -def precision(): - return 32 - - -@pytest.fixture() -def clip_trainer_and_model(model_cfg, trainer_cfg, precision): - model_cfg['vision']['precision'] = precision - model_cfg['text']['precision'] = precision - trainer_cfg['precision'] = precision - - strategy = NLPDDPStrategy() - - trainer = Trainer(strategy=strategy, **trainer_cfg) - - cfg = DictConfig(model_cfg) - - model = MegatronCLIPModel(cfg=cfg, trainer=trainer) - - def dummy(): - return - - if model.trainer.strategy.launcher is not None: - model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) - model.trainer.strategy.setup_environment() - - return trainer, model - - -def build_datasets(cfg, tokenizer): - return build_train_valid_datasets(model_cfg=cfg, consumed_samples=0, tokenizer=tokenizer,) - - -@pytest.mark.run_only_on('GPU') -class TestMegatronCLIPModel: - @pytest.mark.unit - def test_constructor(self, clip_trainer_and_model): - clip_model = clip_trainer_and_model[1] - assert isinstance(clip_model, MegatronCLIPModel) - - num_weights = clip_model.num_weights - assert num_weights == 46643969 - - @pytest.mark.unit - def test_build_dataset(self, clip_trainer_and_model, test_data_dir): - clip_model = clip_trainer_and_model[1] - train_ds, validation_ds = build_train_valid_datasets( - model_cfg=clip_model.cfg, consumed_samples=0, tokenizer=clip_model.tokenizer, - ) - assert len(train_ds) == 2000 - assert len(validation_ds) == 1000 - sample = next(iter(train_ds)) - assert "captions" in sample - assert "images" in sample - - @pytest.mark.parametrize( - "precision", - [ - 32, - 16, - pytest.param( - "bf16", - marks=pytest.mark.skipif( - not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, - reason='bfloat16 is not supported on this device', - ), - ), - ], - ) - @pytest.mark.unit - def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): - trainer, clip_model = clip_trainer_and_model - - dtype = None - if clip_model.cfg['precision'] in [32, '32', '32-true']: - dtype = torch.float - elif clip_model.cfg['precision'] in [16, '16', '16-mixed']: - dtype = torch.float16 - elif clip_model.cfg['precision'] in ['bf16', 'bf16-mixed']: - dtype = torch.bfloat16 - else: - raise ValueError(f"precision: {clip_model.cfg['precision']} is not supported.") - - clip_model.eval() - _, validation_ds = build_datasets(clip_model.cfg, clip_model.tokenizer) - - val_loader = torch.utils.data.DataLoader(validation_ds, batch_size=4) - batch = next(iter(val_loader)) - - tokens = batch["images"] - texts = batch["captions"] - with torch.no_grad(): - B, C, H, W = tokens.shape - assert H == W - with torch.autocast('cuda', dtype=dtype): - output_tensor = clip_model(image=tokens.cuda(), text=texts.cuda(),) - # output is (B, #classes) - # assert output_tensor.shape == torch.Size([B, clip_model.cfg['num_classes']]) - # assert output_tensor.dtype == dtype - - # @pytest.mark.unit - # def test_vit_backbone(self, model_cfg, trainer_cfg, precision): - # initialize_model_parallel_for_nemo( - # world_size=1, - # global_rank=0, - # local_rank=0, - # tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), - # pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), - # virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), - # pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), - # micro_batch_size=model_cfg.get('micro_batch_size'), - # global_batch_size=model_cfg.get('global_batch_size'), - # seed=model_cfg.get('seed', 1234), - # apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), - # ) - # - # dtype = None - # if trainer_cfg['precision'] in [32, '32', '32-true']: - # dtype = torch.float - # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: - # dtype = torch.float16 - # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: - # dtype = torch.bfloat16 - # else: - # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") - # - # vit_backbone = VitBackbone( - # model_cfg, - # init_method=None, - # scaled_init_method=None, - # pre_process=True, - # post_process=True, - # single_token_output=True - # ).cuda() - # vit_backbone.eval() - # - # # shape: (B, C, H, W) - # tokens = torch.rand((6, 3, 224, 224)) - # - # with torch.no_grad(): - # B, C, H, W = tokens.shape - # assert H == W - # with torch.autocast('cuda', dtype=dtype): - # output_tensor = vit_backbone( - # tokens.cuda(), - # ) - # # output is (B, #classes) - # assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) - # assert output_tensor.dtype == dtype - # - # @pytest.mark.unit - # def test_vit_head(self, model_cfg, trainer_cfg, precision): - # dtype = None - # if trainer_cfg['precision'] in [32, '32', '32-true']: - # dtype = torch.float - # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: - # dtype = torch.float16 - # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: - # dtype = torch.bfloat16 - # else: - # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") - # - # vit_head = VitMlpHead( - # 24, 50, - # ).cuda() - # vit_head.eval() - # - # hidden = torch.rand((6, 24)) - # - # with torch.no_grad(): - # with torch.autocast('cuda', dtype=dtype): - # output_tensor = vit_head( - # hidden.cuda(), - # ) - # # output is (B, #classes) - # assert output_tensor.shape == torch.Size([6, 50]) - # assert output_tensor.dtype == dtype diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py deleted file mode 100644 index e50106957679..000000000000 --- a/tests/collections/vision/test_vit_model.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import pytest -import torch -from omegaconf import DictConfig, OmegaConf -from pytorch_lightning import Trainer - -from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo -from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy -from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets -from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel -from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead - -DEVICE_CAPABILITY = None -if torch.cuda.is_available(): - DEVICE_CAPABILITY = torch.cuda.get_device_capability() - - -@pytest.fixture() -def model_cfg(): - model_cfg_string = """ - precision: 16 - micro_batch_size: 2 # limited by GPU memory - global_batch_size: 4 # will use more micro batches to reach global batch size - tensor_model_parallel_size: 1 # intra-layer model parallelism - pipeline_model_parallel_size: 1 # inter-layer model parallelism - virtual_pipeline_model_parallel_size: null # interleaved pipeline - - restore_from_path: null # used in fine-tuning - - # vision configs - vision_pretraining_type: "classify" - num_classes: 1000 - patch_dim: 16 - img_h: 224 - img_w: 224 - classes_fraction: 1.0 - data_per_class_fraction: 1.0 - num_channels: 3 - drop_path_rate: 0.0 - - # model architecture - encoder_seq_length: 4 - max_position_embeddings: ${.encoder_seq_length} - num_layers: 12 - hidden_size: 768 - ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. - num_attention_heads: 12 - init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') - use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0.1 # Dropout probability for hidden state transformer. - attention_dropout: 0. - kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null - apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. - normalization: layernorm # Type of normalization layers - layernorm_epsilon: 1e-5 - do_layer_norm_weight_decay: False # True means weight decay on all params - pre_process: True # add embedding - post_process: True # add pooler - persist_layer_norm: True # Use of persistent fused layer norm kernel. - - # precision - native_amp_init_scale: 4294967296 # 2 ** 32 - native_amp_growth_interval: 1000 - hysteresis: 2 # Gradient scale hysteresis - fp32_residual_connection: False # Move residual connections to fp32 - fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 - - # Megatron O2-style half-precision - megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters - grad_allreduce_chunk_size_mb: 125 - grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce - masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. - bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. - - # miscellaneous - seed: 1234 - resume_from_checkpoint: null # manually set the checkpoint file to load from - use_cpu_initialization: False # Init weights on the CPU (slow for large models) - onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. - apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this - gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) - gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. - openai_gelu: False - bias_activation_fusion: False - megatron_legacy: False - - ## Activation Checkpointing - # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. - # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). - # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. - # 'full' will checkpoint the entire transformer layer. - activations_checkpoint_granularity: null # 'selective' or 'full' - activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' - # 'uniform' divides the total number of transformer layers and checkpoints the input activation - # of each chunk at the specified granularity - # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity - activations_checkpoint_num_layers: null # not used with 'selective' - # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. - # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. - - ## Sequence Parallelism - # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially - # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. - sequence_parallel: False - - data: - # Path to image dataset must be specified by the user. - # Supports List - # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", - data_path: "dummy/path" - num_workers: 2 - dataloader_type: cyclic # cyclic - validation_drop_last: True # Set to false if the last partial validation samples is to be consumed - data_sharding: False - - # Nsys profiling options - nsys_profile: - enabled: False - start_step: 10 # Global batch to start profiling - end_step: 10 # Global batch to end profiling - ranks: [0] # Global rank IDs to profile - gen_shape: False # Generate model and kernel details including input shapes - - optim: - name: fused_adam - lr: 5e-4 - weight_decay: 0.1 - betas: - - 0.9 - - 0.999 - sched: - name: CosineAnnealing - warmup_steps: 10000 - constant_steps: 0 - min_lr: 1e-5 - """ - model_cfg = OmegaConf.create(model_cfg_string) - return model_cfg - - -@pytest.fixture() -def trainer_cfg(): - trainer_cfg_string = """ - devices: 1 - num_nodes: 1 - accelerator: gpu - precision: 16 - logger: False - enable_checkpointing: False - use_distributed_sampler: False - max_epochs: -1 - max_steps: 4 - log_every_n_steps: 1 - val_check_interval: 4 - limit_val_batches: 2 - limit_test_batches: 2 - accumulate_grad_batches: 1 - gradient_clip_val: 1.0 - benchmark: False - enable_model_summary: False - """ - trainer_cfg = OmegaConf.create(trainer_cfg_string) - - return trainer_cfg - - -@pytest.fixture() -def exp_manager_cfg(): - exp_manager_cfg_string = """ - explicit_log_dir: null - exp_dir: null - name: megatron_vit_classify - create_wandb_logger: False - wandb_logger_kwargs: - project: null - name: null - resume_if_exists: False - resume_ignore_no_checkpoint: True - create_checkpoint_callback: False - checkpoint_callback_params: - monitor: val_loss - save_top_k: 10 - mode: min - always_save_nemo: False # saves nemo file during validation, not implemented for model parallel - save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits - filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' - model_parallel_size: 1 - """ - exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) - - return exp_manager_cfg - - -@pytest.fixture() -def precision(): - return 32 - - -@pytest.fixture() -def vit_classification_trainer_and_model(model_cfg, trainer_cfg, precision): - model_cfg['precision'] = precision - trainer_cfg['precision'] = precision - - strategy = NLPDDPStrategy() - - trainer = Trainer(strategy=strategy, **trainer_cfg) - - cfg = DictConfig(model_cfg) - - model = MegatronVitClassificationModel(cfg=cfg, trainer=trainer) - - return trainer, model - - -def build_datasets(cfg, test_data_dir): - data_path = [ - os.path.join(test_data_dir, "vision/tiny_imagenet/train"), - os.path.join(test_data_dir, "vision/tiny_imagenet/val"), - ] - return build_train_valid_datasets(model_cfg=cfg, data_path=data_path, image_size=(cfg.img_h, cfg.img_w),) - - -@pytest.mark.run_only_on('GPU') -class TestMegatronVitClassificationModel: - @pytest.mark.unit - def test_constructor(self, vit_classification_trainer_and_model): - vit_classification_model = vit_classification_trainer_and_model[1] - assert isinstance(vit_classification_model, MegatronVitClassificationModel) - - num_weights = vit_classification_model.num_weights - assert num_weights == 87169000 - - @pytest.mark.unit - def test_build_dataset(self, vit_classification_trainer_and_model, test_data_dir): - vit_classification_model = vit_classification_trainer_and_model[1] - data_path = [ - os.path.join(test_data_dir, "vision/tiny_imagenet/train"), - os.path.join(test_data_dir, "vision/tiny_imagenet/val"), - ] - train_ds, validation_ds = build_train_valid_datasets( - model_cfg=vit_classification_model.cfg, - data_path=data_path, - image_size=(vit_classification_model.cfg.img_h, vit_classification_model.cfg.img_w), - ) - assert len(train_ds) == 20 - assert len(validation_ds) == 20 - assert train_ds[0][0].shape == torch.Size([3, 224, 224]) - assert validation_ds[0][0].shape == torch.Size([3, 224, 224]) - - @pytest.mark.parametrize( - "precision", - [ - 32, - 16, - pytest.param( - "bf16", - marks=pytest.mark.skipif( - not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, - reason='bfloat16 is not supported on this device', - ), - ), - ], - ) - @pytest.mark.unit - def test_forward(self, vit_classification_trainer_and_model, test_data_dir): - trainer, vit_classification_model = vit_classification_trainer_and_model - - dtype = None - if vit_classification_model.cfg['precision'] in [32, '32', '32-true']: - dtype = torch.float - elif vit_classification_model.cfg['precision'] in [16, '16', '16-mixed']: - dtype = torch.float16 - elif vit_classification_model.cfg['precision'] in ['bf16', 'bf16-mixed']: - dtype = torch.bfloat16 - else: - raise ValueError(f"precision: {vit_classification_model.cfg['precision']} is not supported.") - - vit_classification_model.eval() - _, validation_ds = build_datasets(vit_classification_model.cfg, test_data_dir) - - # shape: (B, C, H, W) - images = [validation_ds[i][0] for i in range(4)] - tokens = torch.stack(images, dim=0) - - with torch.no_grad(): - B, C, H, W = tokens.shape - assert H == W - with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_classification_model.forward(tokens=tokens.cuda(),) - # output is (B, #classes) - assert output_tensor.shape == torch.Size([B, vit_classification_model.cfg['num_classes']]) - assert output_tensor.dtype == dtype - - @pytest.mark.unit - def test_vit_backbone(self, model_cfg, trainer_cfg, precision): - initialize_model_parallel_for_nemo( - world_size=1, - global_rank=0, - local_rank=0, - tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), - pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), - virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), - pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), - micro_batch_size=model_cfg.get('micro_batch_size'), - global_batch_size=model_cfg.get('global_batch_size'), - seed=model_cfg.get('seed', 1234), - apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), - ) - - dtype = None - if trainer_cfg['precision'] in [32, '32', '32-true']: - dtype = torch.float - elif trainer_cfg['precision'] in [16, '16', '16-mixed']: - dtype = torch.float16 - elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: - dtype = torch.bfloat16 - else: - raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") - - vit_backbone = VitBackbone( - model_cfg, - init_method=None, - scaled_init_method=None, - pre_process=True, - post_process=True, - single_token_output=True, - ).cuda() - vit_backbone.eval() - - # shape: (B, C, H, W) - tokens = torch.rand((6, 3, 224, 224)) - - with torch.no_grad(): - B, C, H, W = tokens.shape - assert H == W - with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_backbone(tokens.cuda(),) - # output is (B, #classes) - assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) - assert output_tensor.dtype == dtype - - @pytest.mark.unit - def test_vit_head(self, model_cfg, trainer_cfg, precision): - dtype = None - if trainer_cfg['precision'] in [32, '32', '32-true']: - dtype = torch.float - elif trainer_cfg['precision'] in [16, '16', '16-mixed']: - dtype = torch.float16 - elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: - dtype = torch.bfloat16 - else: - raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") - - vit_head = VitMlpHead(24, 50,).cuda() - vit_head.eval() - - hidden = torch.rand((6, 24)) - - with torch.no_grad(): - with torch.autocast('cuda', dtype=dtype): - output_tensor = vit_head(hidden.cuda(),) - # output is (B, #classes) - assert output_tensor.shape == torch.Size([6, 50]) - assert output_tensor.dtype == dtype diff --git a/utils/flash-attention.patch b/utils/flash-attention.patch deleted file mode 100644 index 3587ffd57257..000000000000 --- a/utils/flash-attention.patch +++ /dev/null @@ -1,87 +0,0 @@ -diff --git a/csrc/flash_attn/fmha_api.cpp b/csrc/flash_attn/fmha_api.cpp -index 6602a6c..19d1551 100644 ---- a/csrc/flash_attn/fmha_api.cpp -+++ b/csrc/flash_attn/fmha_api.cpp -@@ -207,6 +207,11 @@ mha_fwd(const at::Tensor &q, // total_q x num_heads x head_size, total_q - bool is_sm75 = dprops->major == 7 && dprops->minor == 5; - bool is_sm80 = dprops->major == 8 && dprops->minor == 0; - bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; -+ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; -+ if (is_sm90){ -+ // use sm8x codes for h100 -+ is_sm8x = 1; -+ } - TORCH_CHECK(is_sm8x || is_sm75); - auto stream = at::cuda::getCurrentCUDAStream().stream(); - bool is_dropout = p_dropout > 0.0; -@@ -359,6 +364,11 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size - bool is_sm75 = dprops->major == 7 && dprops->minor == 5; - bool is_sm80 = dprops->major == 8 && dprops->minor == 0; - bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; -+ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; -+ if (is_sm90){ -+ // use sm8x codes for h100 -+ is_sm8x = 1; -+ } - TORCH_CHECK(is_sm8x || is_sm75); - auto launch = &run_fmha_bwd; - -@@ -407,7 +417,7 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size - TORCH_CHECK(batch_size > 0); - TORCH_CHECK((head_size % 8 == 0) && (head_size <= 128)); - if (head_size > 64) { // TODO: eventually we should support SM86 and SM70 with d=128 as well -- TORCH_CHECK(is_sm80); -+ TORCH_CHECK(is_sm80 || is_sm90); - } - - CHECK_SHAPE(q, total_q, num_heads, head_size); -@@ -650,7 +660,12 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size - auto dprops = at::cuda::getCurrentDeviceProperties(); - bool is_sm80 = dprops->major == 8 && dprops->minor == 0; - bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; -- TORCH_CHECK(dprops->major == 8 && dprops->minor >= 0); -+ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; -+ if (is_sm90){ -+ // use sm8x codes for h100 -+ is_sm8x = 1; -+ } -+ TORCH_CHECK(is_sm8x); - auto launch = &run_fmha_block_dgrad_fp16_sm80; - - bool is_dropout = p_dropout > 0.0; -@@ -700,7 +715,7 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size - TORCH_CHECK(batch_size > 0); - TORCH_CHECK(head_size == 16 || head_size == 32 || head_size == 64 || head_size == 128); - if (head_size == 128) { // TODO: eventually we should support SM86 and SM70 with d=128 as well -- TORCH_CHECK(is_sm80); -+ TORCH_CHECK(is_sm80 || is_sm90); - } - - CHECK_SHAPE(q, total_q, num_heads, head_size); -diff --git a/csrc/flash_attn/src/fmha_bwd_hdim64.cu b/csrc/flash_attn/src/fmha_bwd_hdim64.cu -index 7dd8650..d039726 100644 ---- a/csrc/flash_attn/src/fmha_bwd_hdim64.cu -+++ b/csrc/flash_attn/src/fmha_bwd_hdim64.cu -@@ -24,6 +24,9 @@ void run_fmha_bwd_hdim64(FMHA_dgrad_params ¶ms, cudaStream_t stream, const b - } else if (dprops->major == 7 && dprops->minor == 5) { - using Kernel_traits = FMHA_kernel_traits<128, 64, 16, 1, 8, 0x08u, elem_type>; - run_fmha_bwd_loop(params, stream, configure); -+ } else if (dprops->major == 9 && dprops->minor == 0) { -+ using Kernel_traits = FMHA_kernel_traits<256, 64, 16, 1, 8, 0x100u, elem_type>; -+ run_fmha_bwd_loop(params, stream, configure); - } - } - })); -diff --git a/setup.py b/setup.py -index 5516804..a21a903 100644 ---- a/setup.py -+++ b/setup.py -@@ -112,6 +112,8 @@ cc_flag.append("-gencode") - cc_flag.append("arch=compute_75,code=sm_75") - cc_flag.append("-gencode") - cc_flag.append("arch=compute_80,code=sm_80") -+cc_flag.append("-gencode") -+cc_flag.append("arch=compute_90,code=sm_90") - - subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) - ext_modules.append( diff --git a/utils/triton.patch b/utils/triton.patch deleted file mode 100644 index 66c2ac97609e..000000000000 --- a/utils/triton.patch +++ /dev/null @@ -1,53 +0,0 @@ -diff --git a/lib/driver/llvm.cc b/lib/driver/llvm.cc -index a73e6541d..01e38f825 100644 ---- a/lib/driver/llvm.cc -+++ b/lib/driver/llvm.cc -@@ -25,6 +25,7 @@ - #endif - #include - #include -+#include - #include "triton/driver/llvm.h" - #include "triton/driver/dispatch.h" - #include "triton/driver/error.h" -@@ -148,7 +149,9 @@ namespace triton - - int vptx(int version) - { -- if (version >= 11040) -+ if (version >= 12000) -+ return 80; -+ else if (version >= 11040) - return 74; - // if(version >= 11030) return 73; - // if(version >= 11020) return 72; -@@ -163,15 +166,15 @@ namespace triton - std::string llir_to_ptx(llvm::Module *module, int cc, int version) - { - // LLVM version in use may not officially support target hardware -- int max_nvvm_cc = 75; -- int max_nvvm_ptx = 74; -+ int max_nvvm_cc = 90; -+ int max_nvvm_ptx = 80; - // options - auto options = llvm::cl::getRegisteredOptions(); - auto *short_ptr = static_cast *>(options["nvptx-short-ptr"]); - assert(short_ptr); - short_ptr->setValue(true); - // compute capability -- std::string sm = "sm_" + std::to_string(cc); -+ std::string sm = cc == 90 ? "sm_90a" : "sm_" + std::to_string(cc); - // max PTX version - int ptx = vptx(version); - int ptx_major = ptx / 10; -@@ -244,7 +247,9 @@ namespace triton - ofs.close(); - std::string cmd; - int err; -- cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; -+ cmd = cc == 90 ? -+ ptxas + " -v --gpu-name=sm_90a " + fsrc + " -o " + fsrc + ".o 2> " + flog : -+ ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; - err = system(cmd.c_str()); - if (err != 0) - { From 3b1b802c387e9189d563e9c51b2c931bdbcfa14c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 03:17:33 +0000 Subject: [PATCH 346/512] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- tools/asr_evaluator/asr_evaluator.py | 2 +- tools/ctc_segmentation/scripts/run_ctc_segmentation.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index 3bb241659e48..e7bffa888653 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf +from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging -from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index 0f67aa11e09b..ff7b2b0675ea 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch +from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager -from utils import get_model """ This script supports training of G2PModels diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index 82b61290e66f..de05088a55b9 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -15,11 +15,11 @@ import git from omegaconf import OmegaConf, open_dict +from utils import cal_target_metadata_wer, run_asr_inference from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.core.config import hydra_runner from nemo.utils import logging -from utils import cal_target_metadata_wer, run_asr_inference """ This script serves as evaluator of ASR models diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 884b6186e249..90e67ab844c7 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm +from utils import get_segments import nemo.collections.asr as nemo_asr -from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") From a1f7296e44af2c0febd7d7eb86f645df13ecb3c7 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 23 Oct 2023 10:28:06 -0700 Subject: [PATCH 347/512] Fix Eric's comments --- .../optim/{fused_adam.py => megatron_fused_adam.py} | 0 nemo/core/optim/optimizers.py | 2 +- nemo/utils/trt_utils.py | 12 ++++++++++-- tests/collections/asr/test_asr_ctcencdec_model.py | 4 ++-- 4 files changed, 13 insertions(+), 5 deletions(-) rename nemo/core/optim/{fused_adam.py => megatron_fused_adam.py} (100%) diff --git a/nemo/core/optim/fused_adam.py b/nemo/core/optim/megatron_fused_adam.py similarity index 100% rename from nemo/core/optim/fused_adam.py rename to nemo/core/optim/megatron_fused_adam.py diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index a19676517fbd..69bcb46641a2 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -68,7 +68,7 @@ try: # Try importing wrapper for Apex FusedAdam optimizer - from nemo.core.optim.fused_adam import MegatronFusedAdam + from nemo.core.optim.megatron_fused_adam import MegatronFusedAdam AVAILABLE_OPTIMIZERS['megatron_fused_adam'] = MegatronFusedAdam except (ImportError, ModuleNotFoundError): diff --git a/nemo/utils/trt_utils.py b/nemo/utils/trt_utils.py index 492ca3c2a6e0..73e899532691 100644 --- a/nemo/utils/trt_utils.py +++ b/nemo/utils/trt_utils.py @@ -11,8 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import tensorrt as trt -from polygraphy.backend.trt import CreateConfig, Profile, engine_from_network, network_from_onnx_path, save_engine + +try: + import tensorrt as trt + from polygraphy.backend.trt import CreateConfig, Profile, engine_from_network, network_from_onnx_path, save_engine + + HAVE_TRT = True + +except (ImportError, ModuleNotFoundError): + + HAVE_TRT = False def build_engine( diff --git a/tests/collections/asr/test_asr_ctcencdec_model.py b/tests/collections/asr/test_asr_ctcencdec_model.py index 02b2cadc8ab1..8d90079d0c51 100644 --- a/tests/collections/asr/test_asr_ctcencdec_model.py +++ b/tests/collections/asr/test_asr_ctcencdec_model.py @@ -167,11 +167,11 @@ def test_change_conv_asr_se_context_window(self, asr_model): new_config = asr_model.cfg assert old_cfg.encoder.jasper[0].se_context_size == -1 - assert new_config.encoder.jasper[0].se_context_size in [32, '32', '32-true'] + assert new_config.encoder.jasper[0].se_context_size == 32 for name, m in asr_model.encoder.named_modules(): if type(m).__class__.__name__ == 'SqueezeExcite': - assert m.context_window in [32, '32', '32-true'] + assert m.context_window == 32 @pytest.mark.unit def test_change_conv_asr_se_context_window_no_config_update(self, asr_model): From 41632c66eacf2f1ff3272ae68bb3aac113463929 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 23 Oct 2023 16:47:46 -0700 Subject: [PATCH 348/512] Revert "Hide multimodal code changes" This reverts commit d6900f9bc1922d086e2e388dcec6e3bd2b0f59dc. --- .../nsfw/conf/megatron_nsfw_config.yaml | 230 ++ .../nsfw/conf/megatron_nsfw_infer.yaml | 12 + .../nsfw/megatron_nsfw_infer.py | 78 + .../nsfw/megatron_nsfw_pretrain.py | 60 + examples/multimodal/convert_ckpt_to_nemo.py | 197 ++ .../clip/conf/megatron_clip_config.yaml | 250 ++ .../conf/megatron_clip_imagenet_zeroshot.yaml | 17 + .../clip/conf/megatron_clip_infer.yaml | 13 + .../clip/convert_external_clip_to_nemo.py | 284 +++ .../clip/megatron_clip_imagenet_zeroshot.py | 142 ++ .../foundation/clip/megatron_clip_infer.py | 89 + .../foundation/clip/megatron_clip_pretrain.py | 49 + .../controlnet/conf/controlnet_infer.yaml | 36 + .../controlnet/conf/controlnet_v1-5.yaml | 220 ++ .../generative/controlnet/controlnet_infer.py | 247 ++ .../generative/controlnet/controlnet_train.py | 54 + .../generative/convert_hf_ckpt_to_nemo.py | 226 ++ .../dreambooth/conf/dreambooth.yaml | 224 ++ .../dreambooth/conf/dreambooth_infer.yaml | 32 + .../generative/dreambooth/dreambooth.py | 119 + .../generative/dreambooth/dreambooth_infer.py | 44 + .../multimodal/generative/imagen/README.md | 104 + .../generative/imagen/conf/base64-2b.yaml | 142 ++ .../imagen/conf/base64-500m-edm.yaml | 136 ++ .../generative/imagen/conf/base64-500m.yaml | 144 ++ .../conf/base64-500m_online_encoding.yaml | 137 ++ .../generative/imagen/conf/fid_inference.yaml | 26 + .../imagen/conf/imagen_fid_images.yaml | 57 + .../imagen/conf/inference_pipeline.yaml | 42 + .../generative/imagen/conf/sr1024-600m.yaml | 145 ++ .../imagen/conf/sr256-400m-edm.yaml | 222 ++ .../generative/imagen/conf/sr256-400m.yaml | 150 ++ .../imagen/conf/sr256-450m-edm.yaml | 222 ++ .../imagen/conf/sr256-600m-edm-noise.yaml | 142 ++ .../imagen/conf/sr256-600m-edm.yaml | 219 ++ .../generative/imagen/conf/sr256-600m.yaml | 146 ++ .../generative/imagen/generate_fid_images.py | 104 + .../imagen/imagen_generate_images.py | 62 + .../generative/imagen/imagen_infer.py | 45 + .../generative/imagen/imagen_training.py | 71 + .../instruct_pix2pix/conf/sd_edit.yaml | 23 + .../instruct_pix2pix/conf/sd_finetune.yaml | 168 ++ .../instruct_pix2pix/sd_edit_cli.py | 174 ++ .../instruct_pix2pix/sd_finetune.py | 45 + .../generative/nerf/benchmark_callback.py | 82 + .../generative/nerf/config/config.yaml | 52 + .../nerf/config/model/background/random.yaml | 3 + .../nerf/config/model/background/static.yaml | 2 + .../nerf/config/model/background/tcnn.yaml | 19 + .../config/model/background/torchngp.yaml | 11 + .../nerf/config/model/data/data.yaml | 41 + .../nerf/config/model/dreamfusion-dmtet.yaml | 40 + .../nerf/config/model/dreamfusion.yaml | 40 + .../config/model/guidance/sd_huggingface.yaml | 4 + .../nerf/config/model/guidance/sd_nemo.yaml | 4 + .../nerf/config/model/guidance/sd_trt.yaml | 5 + .../nerf/config/model/loss/dmtet.yaml | 8 + .../nerf/config/model/loss/dreamfusion.yaml | 8 + .../config/model/material/basic_shading.yaml | 1 + .../nerf/config/model/nerf/tcnn.yaml | 32 + .../nerf/config/model/nerf/torchngp.yaml | 26 + .../nerf/config/model/optim/adan.yaml | 6 + .../nerf/config/model/renderer/nerfacc.yaml | 8 + .../config/model/renderer/nvdiffrast.yaml | 6 + .../model/renderer/torchngp_raymarching.yaml | 7 + examples/multimodal/generative/nerf/data.py | 73 + examples/multimodal/generative/nerf/main.py | 71 + .../stable_diffusion/conf/sd2_train.yaml | 192 ++ .../stable_diffusion/conf/sd_fid_images.yaml | 45 + .../stable_diffusion/conf/sd_infer.yaml | 31 + .../stable_diffusion/conf/sd_train.yaml | 207 ++ .../stable_diffusion/generate_fid_images.py | 83 + .../generative/stable_diffusion/sd_infer.py | 44 + .../generative/stable_diffusion/sd_train.py | 87 + .../mllm/kosmos/conf/kosmos_config.yaml | 317 +++ .../multimodal/mllm/kosmos/kosmos_pretrain.py | 51 + .../mllm/neva/conf/llava_config.yaml | 213 ++ .../mllm/neva/conf/neva_config.yaml | 212 ++ .../mllm/neva/conf/neva_finetune.yaml | 209 ++ .../mllm/neva/conf/neva_inference.yaml | 52 + .../multimodal/mllm/neva/conf/neva_peft.yaml | 215 ++ .../mllm/neva/convert_hf_llava_to_neva.py | 343 +++ .../multimodal/mllm/neva/neva_evaluation.py | 353 +++ .../multimodal/mllm/neva/neva_finetune.py | 55 + examples/multimodal/mllm/neva/neva_peft.py | 56 + .../multimodal/mllm/neva/neva_pretrain.py | 47 + examples/vision/convert_ckpt_to_nemo.py | 160 ++ .../megatron_vit_classification_config.yaml | 163 ++ .../megatron_vit_classification_evaluate.yaml | 15 + .../megatron_vit_classification_infer.yaml | 12 + .../megatron_vit_classification_evaluate.py | 124 + .../megatron_vit_classification_finetune.py | 52 + .../megatron_vit_classification_infer.py | 145 ++ .../megatron_vit_classification_pretrain.py | 44 + nemo/collections/multimodal/data/__init__.py | 13 + .../multimodal/data/clip/__init__.py | 13 + .../data/clip/augmentations/__init__.py | 13 + .../data/clip/augmentations/augmentations.py | 108 + .../multimodal/data/clip/clip_dataset.py | 194 ++ .../data/clip/imagenet_zeroshot_data.py | 1100 +++++++++ .../multimodal/data/common/__init__.py | 13 + .../multimodal/data/common/data_samplers.py | 133 + .../multimodal/data/common/webdataset.py | 286 +++ .../multimodal/data/common/webdataset_s3.py | 237 ++ .../data/controlnet/controlnet_dataset.py | 100 + .../data/dreambooth/dreambooth_dataset.py | 148 ++ .../multimodal/data/imagen/__init__.py | 13 + .../data/imagen/augmentations/__init__.py | 13 + .../imagen/augmentations/augmentations.py | 76 + .../data/imagen/augmentations/corruption.py | 33 + .../multimodal/data/imagen/imagen_dataset.py | 156 ++ .../data/instruct_pix2pix/__init__.py | 13 + .../data/instruct_pix2pix/edit_dataset.py | 132 + .../multimodal/data/kosmos/__init__.py | 0 .../multimodal/data/kosmos/kosmos_dataset.py | 366 +++ .../multimodal/data/nerf/__init__.py | 0 .../multimodal/data/nerf/cameras.py | 178 ++ .../multimodal/data/nerf/circle_poses.py | 214 ++ .../multimodal/data/nerf/random_poses.py | 436 ++++ .../collections/multimodal/data/nerf/utils.py | 204 ++ .../multimodal/data/neva/__init__.py | 0 .../multimodal/data/neva/conversation.py | 406 +++ .../multimodal/data/neva/neva_dataset.py | 584 +++++ .../multimodal/data/nsfw/__init__.py | 13 + .../multimodal/data/nsfw/nsfw_dataset.py | 53 + .../data/stable_diffusion/__init__.py | 13 + .../stable_diffusion/augmentation/__init__.py | 13 + .../augmentation/augmentations.py | 71 + .../stable_diffusion_dataset.py | 184 ++ .../collections/multimodal/losses/__init__.py | 13 + .../multimodal/losses/clip_loss.py | 112 + .../collections/multimodal/models/__init__.py | 13 + .../multimodal/models/clip/__init__.py | 13 + .../models/clip/megatron_clip_models.py | 1017 ++++++++ .../models/content_filter/__init__.py | 13 + .../megatron_nsfw_clip_models.py | 398 +++ .../multimodal/models/controlnet/__init__.py | 13 + .../models/controlnet/controlnet.py | 1002 ++++++++ .../models/controlnet/uniformer/LICENSE | 203 ++ .../models/controlnet/uniformer/__init__.py | 33 + .../configs/_base_/datasets/ade20k.py | 58 + .../configs/_base_/datasets/chase_db1.py | 64 + .../configs/_base_/datasets/cityscapes.py | 50 + .../_base_/datasets/cityscapes_769x769.py | 32 + .../configs/_base_/datasets/drive.py | 64 + .../uniformer/configs/_base_/datasets/hrf.py | 64 + .../configs/_base_/datasets/pascal_context.py | 64 + .../_base_/datasets/pascal_context_59.py | 64 + .../configs/_base_/datasets/pascal_voc12.py | 61 + .../_base_/datasets/pascal_voc12_aug.py | 8 + .../configs/_base_/datasets/stare.py | 64 + .../configs/_base_/default_runtime.py | 15 + .../configs/_base_/models/ann_r50-d8.py | 48 + .../configs/_base_/models/apcnet_r50-d8.py | 46 + .../configs/_base_/models/ccnet_r50-d8.py | 46 + .../uniformer/configs/_base_/models/cgnet.py | 54 + .../configs/_base_/models/danet_r50-d8.py | 46 + .../configs/_base_/models/deeplabv3_r50-d8.py | 46 + .../_base_/models/deeplabv3_unet_s5-d16.py | 52 + .../_base_/models/deeplabv3plus_r50-d8.py | 48 + .../configs/_base_/models/dmnet_r50-d8.py | 46 + .../configs/_base_/models/dnl_r50-d8.py | 48 + .../configs/_base_/models/emanet_r50-d8.py | 49 + .../configs/_base_/models/encnet_r50-d8.py | 49 + .../configs/_base_/models/fast_scnn.py | 59 + .../configs/_base_/models/fcn_hr18.py | 37 + .../configs/_base_/models/fcn_r50-d8.py | 47 + .../configs/_base_/models/fcn_unet_s5-d16.py | 53 + .../configs/_base_/models/fpn_r50.py | 34 + .../configs/_base_/models/fpn_uniformer.py | 32 + .../configs/_base_/models/gcnet_r50-d8.py | 48 + .../configs/_base_/models/lraspp_m-v3-d8.py | 22 + .../configs/_base_/models/nonlocal_r50-d8.py | 48 + .../configs/_base_/models/ocrnet_hr18.py | 53 + .../configs/_base_/models/ocrnet_r50-d8.py | 49 + .../configs/_base_/models/pointrend_r50.py | 49 + .../configs/_base_/models/psanet_r50-d8.py | 51 + .../configs/_base_/models/pspnet_r50-d8.py | 46 + .../_base_/models/pspnet_unet_s5-d16.py | 52 + .../configs/_base_/models/upernet_r50.py | 46 + .../_base_/models/upernet_uniformer.py | 45 + .../configs/_base_/schedules/schedule_160k.py | 9 + .../configs/_base_/schedules/schedule_20k.py | 9 + .../configs/_base_/schedules/schedule_40k.py | 9 + .../configs/_base_/schedules/schedule_80k.py | 9 + .../exp/upernet_global_small/config.py | 48 + .../uniformer/exp/upernet_global_small/run.sh | 10 + .../exp/upernet_global_small/test.sh | 10 + .../exp/upernet_global_small/test_config_g.py | 48 + .../upernet_global_small/test_config_h32.py | 49 + .../upernet_global_small/test_config_w32.py | 49 + .../controlnet/uniformer/mmcv/__init__.py | 15 + .../uniformer/mmcv/arraymisc/__init__.py | 4 + .../uniformer/mmcv/arraymisc/quantization.py | 49 + .../controlnet/uniformer/mmcv/cnn/__init__.py | 131 + .../controlnet/uniformer/mmcv/cnn/alexnet.py | 62 + .../uniformer/mmcv/cnn/bricks/__init__.py | 61 + .../uniformer/mmcv/cnn/bricks/activation.py | 93 + .../mmcv/cnn/bricks/context_block.py | 123 + .../uniformer/mmcv/cnn/bricks/conv.py | 44 + .../cnn/bricks/conv2d_adaptive_padding.py | 46 + .../uniformer/mmcv/cnn/bricks/conv_module.py | 206 ++ .../uniformer/mmcv/cnn/bricks/conv_ws.py | 121 + .../bricks/depthwise_separable_conv_module.py | 95 + .../uniformer/mmcv/cnn/bricks/drop.py | 64 + .../mmcv/cnn/bricks/generalized_attention.py | 346 +++ .../uniformer/mmcv/cnn/bricks/hsigmoid.py | 34 + .../uniformer/mmcv/cnn/bricks/hswish.py | 29 + .../uniformer/mmcv/cnn/bricks/non_local.py | 272 +++ .../uniformer/mmcv/cnn/bricks/norm.py | 148 ++ .../uniformer/mmcv/cnn/bricks/padding.py | 36 + .../uniformer/mmcv/cnn/bricks/plugin.py | 87 + .../uniformer/mmcv/cnn/bricks/registry.py | 16 + .../uniformer/mmcv/cnn/bricks/scale.py | 21 + .../uniformer/mmcv/cnn/bricks/swish.py | 25 + .../uniformer/mmcv/cnn/bricks/transformer.py | 607 +++++ .../uniformer/mmcv/cnn/bricks/upsample.py | 83 + .../uniformer/mmcv/cnn/bricks/wrappers.py | 173 ++ .../controlnet/uniformer/mmcv/cnn/builder.py | 28 + .../controlnet/uniformer/mmcv/cnn/resnet.py | 271 ++ .../uniformer/mmcv/cnn/utils/__init__.py | 48 + .../uniformer/mmcv/cnn/utils/flops_counter.py | 580 +++++ .../uniformer/mmcv/cnn/utils/fuse_conv_bn.py | 56 + .../uniformer/mmcv/cnn/utils/sync_bn.py | 59 + .../uniformer/mmcv/cnn/utils/weight_init.py | 644 +++++ .../controlnet/uniformer/mmcv/cnn/vgg.py | 159 ++ .../uniformer/mmcv/engine/__init__.py | 4 + .../controlnet/uniformer/mmcv/engine/test.py | 195 ++ .../uniformer/mmcv/fileio/__init__.py | 19 + .../uniformer/mmcv/fileio/file_client.py | 1127 +++++++++ .../mmcv/fileio/handlers/__init__.py | 7 + .../uniformer/mmcv/fileio/handlers/base.py | 30 + .../mmcv/fileio/handlers/json_handler.py | 35 + .../mmcv/fileio/handlers/pickle_handler.py | 26 + .../mmcv/fileio/handlers/yaml_handler.py | 24 + .../controlnet/uniformer/mmcv/fileio/io.py | 148 ++ .../controlnet/uniformer/mmcv/fileio/parse.py | 89 + .../uniformer/mmcv/image/__init__.py | 106 + .../uniformer/mmcv/image/colorspace.py | 304 +++ .../uniformer/mmcv/image/geometric.py | 671 +++++ .../controlnet/uniformer/mmcv/image/io.py | 256 ++ .../controlnet/uniformer/mmcv/image/misc.py | 43 + .../uniformer/mmcv/image/photometric.py | 422 ++++ .../uniformer/mmcv/model_zoo/deprecated.json | 6 + .../uniformer/mmcv/model_zoo/mmcls.json | 31 + .../uniformer/mmcv/model_zoo/open_mmlab.json | 50 + .../controlnet/uniformer/mmcv/ops/__init__.py | 134 + .../uniformer/mmcv/ops/assign_score_withk.py | 117 + .../uniformer/mmcv/ops/ball_query.py | 49 + .../controlnet/uniformer/mmcv/ops/bbox.py | 71 + .../uniformer/mmcv/ops/border_align.py | 98 + .../uniformer/mmcv/ops/box_iou_rotated.py | 44 + .../controlnet/uniformer/mmcv/ops/carafe.py | 281 +++ .../uniformer/mmcv/ops/cc_attention.py | 81 + .../uniformer/mmcv/ops/contour_expand.py | 45 + .../uniformer/mmcv/ops/corner_pool.py | 162 ++ .../uniformer/mmcv/ops/correlation.py | 197 ++ .../uniformer/mmcv/ops/deform_conv.py | 406 +++ .../uniformer/mmcv/ops/deform_roi_pool.py | 165 ++ .../uniformer/mmcv/ops/deprecated_wrappers.py | 43 + .../uniformer/mmcv/ops/focal_loss.py | 183 ++ .../mmcv/ops/furthest_point_sample.py | 74 + .../mmcv/ops/fused_bias_leakyrelu.py | 249 ++ .../uniformer/mmcv/ops/gather_points.py | 47 + .../uniformer/mmcv/ops/group_points.py | 206 ++ .../controlnet/uniformer/mmcv/ops/info.py | 36 + .../controlnet/uniformer/mmcv/ops/iou3d.py | 82 + .../controlnet/uniformer/mmcv/ops/knn.py | 73 + .../uniformer/mmcv/ops/masked_conv.py | 82 + .../uniformer/mmcv/ops/merge_cells.py | 140 ++ .../mmcv/ops/modulated_deform_conv.py | 280 +++ .../mmcv/ops/multi_scale_deform_attn.py | 348 +++ .../controlnet/uniformer/mmcv/ops/nms.py | 383 +++ .../uniformer/mmcv/ops/pixel_group.py | 71 + .../uniformer/mmcv/ops/point_sample.py | 317 +++ .../uniformer/mmcv/ops/points_in_boxes.py | 111 + .../uniformer/mmcv/ops/points_sampler.py | 169 ++ .../controlnet/uniformer/mmcv/ops/psa_mask.py | 85 + .../uniformer/mmcv/ops/roi_align.py | 204 ++ .../uniformer/mmcv/ops/roi_align_rotated.py | 161 ++ .../controlnet/uniformer/mmcv/ops/roi_pool.py | 79 + .../uniformer/mmcv/ops/roiaware_pool3d.py | 100 + .../uniformer/mmcv/ops/roipoint_pool3d.py | 75 + .../controlnet/uniformer/mmcv/ops/saconv.py | 128 + .../uniformer/mmcv/ops/scatter_points.py | 132 + .../controlnet/uniformer/mmcv/ops/sync_bn.py | 288 +++ .../uniformer/mmcv/ops/three_interpolate.py | 62 + .../controlnet/uniformer/mmcv/ops/three_nn.py | 50 + .../uniformer/mmcv/ops/tin_shift.py | 67 + .../uniformer/mmcv/ops/upfirdn2d.py | 307 +++ .../controlnet/uniformer/mmcv/ops/voxelize.py | 115 + .../uniformer/mmcv/parallel/__init__.py | 19 + .../uniformer/mmcv/parallel/_functions.py | 76 + .../uniformer/mmcv/parallel/collate.py | 69 + .../uniformer/mmcv/parallel/data_container.py | 83 + .../uniformer/mmcv/parallel/data_parallel.py | 93 + .../uniformer/mmcv/parallel/distributed.py | 106 + .../mmcv/parallel/distributed_deprecated.py | 58 + .../uniformer/mmcv/parallel/registry.py | 8 + .../uniformer/mmcv/parallel/scatter_gather.py | 59 + .../uniformer/mmcv/parallel/utils.py | 20 + .../uniformer/mmcv/runner/__init__.py | 118 + .../uniformer/mmcv/runner/base_module.py | 194 ++ .../uniformer/mmcv/runner/base_runner.py | 515 ++++ .../uniformer/mmcv/runner/builder.py | 21 + .../uniformer/mmcv/runner/checkpoint.py | 670 +++++ .../mmcv/runner/default_constructor.py | 43 + .../uniformer/mmcv/runner/dist_utils.py | 157 ++ .../mmcv/runner/epoch_based_runner.py | 172 ++ .../uniformer/mmcv/runner/fp16_utils.py | 388 +++ .../uniformer/mmcv/runner/hooks/__init__.py | 58 + .../uniformer/mmcv/runner/hooks/checkpoint.py | 156 ++ .../uniformer/mmcv/runner/hooks/closure.py | 10 + .../uniformer/mmcv/runner/hooks/ema.py | 84 + .../uniformer/mmcv/runner/hooks/evaluation.py | 493 ++++ .../uniformer/mmcv/runner/hooks/hook.py | 100 + .../uniformer/mmcv/runner/hooks/iter_timer.py | 17 + .../mmcv/runner/hooks/logger/__init__.py | 20 + .../mmcv/runner/hooks/logger/base.py | 157 ++ .../mmcv/runner/hooks/logger/dvclive.py | 51 + .../mmcv/runner/hooks/logger/mlflow.py | 70 + .../mmcv/runner/hooks/logger/neptune.py | 75 + .../mmcv/runner/hooks/logger/pavi.py | 113 + .../mmcv/runner/hooks/logger/tensorboard.py | 49 + .../mmcv/runner/hooks/logger/text.py | 235 ++ .../mmcv/runner/hooks/logger/wandb.py | 54 + .../uniformer/mmcv/runner/hooks/lr_updater.py | 615 +++++ .../uniformer/mmcv/runner/hooks/memory.py | 24 + .../mmcv/runner/hooks/momentum_updater.py | 421 ++++ .../uniformer/mmcv/runner/hooks/optimizer.py | 461 ++++ .../uniformer/mmcv/runner/hooks/profiler.py | 174 ++ .../mmcv/runner/hooks/sampler_seed.py | 20 + .../mmcv/runner/hooks/sync_buffer.py | 22 + .../mmcv/runner/iter_based_runner.py | 256 ++ .../uniformer/mmcv/runner/log_buffer.py | 40 + .../mmcv/runner/optimizer/__init__.py | 11 + .../mmcv/runner/optimizer/builder.py | 40 + .../runner/optimizer/default_constructor.py | 246 ++ .../uniformer/mmcv/runner/priority.py | 60 + .../controlnet/uniformer/mmcv/runner/utils.py | 92 + .../uniformer/mmcv/utils/__init__.py | 185 ++ .../controlnet/uniformer/mmcv/utils/config.py | 658 +++++ .../controlnet/uniformer/mmcv/utils/env.py | 100 + .../uniformer/mmcv/utils/ext_loader.py | 72 + .../uniformer/mmcv/utils/logging.py | 109 + .../controlnet/uniformer/mmcv/utils/misc.py | 371 +++ .../uniformer/mmcv/utils/parrots_jit.py | 33 + .../uniformer/mmcv/utils/parrots_wrapper.py | 106 + .../controlnet/uniformer/mmcv/utils/path.py | 99 + .../uniformer/mmcv/utils/progressbar.py | 204 ++ .../uniformer/mmcv/utils/registry.py | 303 +++ .../uniformer/mmcv/utils/testing.py | 138 ++ .../controlnet/uniformer/mmcv/utils/timer.py | 117 + .../controlnet/uniformer/mmcv/utils/trace.py | 24 + .../uniformer/mmcv/utils/version_utils.py | 88 + .../controlnet/uniformer/mmcv/version.py | 36 + .../uniformer/mmcv/video/__init__.py | 29 + .../controlnet/uniformer/mmcv/video/io.py | 310 +++ .../uniformer/mmcv/video/optflow.py | 248 ++ .../uniformer/mmcv/video/processing.py | 128 + .../uniformer/mmcv/visualization/__init__.py | 15 + .../uniformer/mmcv/visualization/color.py | 52 + .../uniformer/mmcv/visualization/image.py | 144 ++ .../uniformer/mmcv/visualization/optflow.py | 109 + .../uniformer/mmcv_custom/__init__.py | 5 + .../uniformer/mmcv_custom/checkpoint.py | 477 ++++ .../uniformer/mmseg/apis/__init__.py | 14 + .../uniformer/mmseg/apis/inference.py | 127 + .../controlnet/uniformer/mmseg/apis/test.py | 217 ++ .../controlnet/uniformer/mmseg/apis/train.py | 109 + .../uniformer/mmseg/core/__init__.py | 3 + .../mmseg/core/evaluation/__init__.py | 14 + .../mmseg/core/evaluation/class_names.py | 458 ++++ .../mmseg/core/evaluation/eval_hooks.py | 108 + .../mmseg/core/evaluation/metrics.py | 297 +++ .../uniformer/mmseg/core/seg/__init__.py | 4 + .../uniformer/mmseg/core/seg/builder.py | 8 + .../mmseg/core/seg/sampler/__init__.py | 4 + .../core/seg/sampler/base_pixel_sampler.py | 12 + .../core/seg/sampler/ohem_pixel_sampler.py | 76 + .../uniformer/mmseg/core/utils/__init__.py | 3 + .../uniformer/mmseg/core/utils/misc.py | 17 + .../uniformer/mmseg/datasets/__init__.py | 30 + .../uniformer/mmseg/datasets/ade.py | 322 +++ .../uniformer/mmseg/datasets/builder.py | 172 ++ .../uniformer/mmseg/datasets/chase_db1.py | 25 + .../uniformer/mmseg/datasets/cityscapes.py | 241 ++ .../uniformer/mmseg/datasets/custom.py | 392 +++ .../mmseg/datasets/dataset_wrappers.py | 50 + .../uniformer/mmseg/datasets/drive.py | 25 + .../uniformer/mmseg/datasets/hrf.py | 23 + .../mmseg/datasets/pascal_context.py | 294 +++ .../mmseg/datasets/pipelines/__init__.py | 43 + .../mmseg/datasets/pipelines/compose.py | 51 + .../mmseg/datasets/pipelines/formating.py | 294 +++ .../mmseg/datasets/pipelines/loading.py | 145 ++ .../mmseg/datasets/pipelines/test_time_aug.py | 118 + .../mmseg/datasets/pipelines/transforms.py | 839 +++++++ .../uniformer/mmseg/datasets/stare.py | 25 + .../uniformer/mmseg/datasets/voc.py | 65 + .../uniformer/mmseg/models/__init__.py | 17 + .../mmseg/models/backbones/__init__.py | 27 + .../uniformer/mmseg/models/backbones/cgnet.py | 362 +++ .../mmseg/models/backbones/fast_scnn.py | 355 +++ .../uniformer/mmseg/models/backbones/hrnet.py | 536 ++++ .../mmseg/models/backbones/mobilenet_v2.py | 183 ++ .../mmseg/models/backbones/mobilenet_v3.py | 267 ++ .../mmseg/models/backbones/resnest.py | 299 +++ .../mmseg/models/backbones/resnet.py | 650 +++++ .../mmseg/models/backbones/resnext.py | 124 + .../uniformer/mmseg/models/backbones/unet.py | 451 ++++ .../mmseg/models/backbones/uniformer.py | 554 +++++ .../uniformer/mmseg/models/backbones/vit.py | 443 ++++ .../uniformer/mmseg/models/builder.py | 43 + .../mmseg/models/decode_heads/__init__.py | 45 + .../mmseg/models/decode_heads/ann_head.py | 259 ++ .../mmseg/models/decode_heads/apc_head.py | 141 ++ .../mmseg/models/decode_heads/aspp_head.py | 106 + .../decode_heads/cascade_decode_head.py | 56 + .../mmseg/models/decode_heads/cc_head.py | 41 + .../mmseg/models/decode_heads/da_head.py | 174 ++ .../mmseg/models/decode_heads/decode_head.py | 226 ++ .../mmseg/models/decode_heads/dm_head.py | 137 ++ .../mmseg/models/decode_heads/dnl_head.py | 126 + .../mmseg/models/decode_heads/ema_head.py | 155 ++ .../mmseg/models/decode_heads/enc_head.py | 174 ++ .../mmseg/models/decode_heads/fcn_head.py | 81 + .../mmseg/models/decode_heads/fpn_head.py | 61 + .../mmseg/models/decode_heads/gc_head.py | 41 + .../mmseg/models/decode_heads/lraspp_head.py | 77 + .../mmseg/models/decode_heads/nl_head.py | 46 + .../mmseg/models/decode_heads/ocr_head.py | 124 + .../mmseg/models/decode_heads/point_head.py | 312 +++ .../mmseg/models/decode_heads/psa_head.py | 186 ++ .../mmseg/models/decode_heads/psp_head.py | 101 + .../models/decode_heads/sep_aspp_head.py | 82 + .../mmseg/models/decode_heads/sep_fcn_head.py | 54 + .../mmseg/models/decode_heads/uper_head.py | 118 + .../uniformer/mmseg/models/losses/__init__.py | 19 + .../uniformer/mmseg/models/losses/accuracy.py | 77 + .../mmseg/models/losses/cross_entropy_loss.py | 155 ++ .../mmseg/models/losses/dice_loss.py | 98 + .../mmseg/models/losses/lovasz_loss.py | 304 +++ .../uniformer/mmseg/models/losses/utils.py | 117 + .../uniformer/mmseg/models/necks/__init__.py | 4 + .../uniformer/mmseg/models/necks/fpn.py | 210 ++ .../mmseg/models/necks/multilevel_neck.py | 53 + .../mmseg/models/segmentors/__init__.py | 5 + .../uniformer/mmseg/models/segmentors/base.py | 256 ++ .../segmentors/cascade_encoder_decoder.py | 95 + .../models/segmentors/encoder_decoder.py | 275 +++ .../uniformer/mmseg/models/utils/__init__.py | 20 + .../uniformer/mmseg/models/utils/drop.py | 29 + .../mmseg/models/utils/inverted_residual.py | 218 ++ .../mmseg/models/utils/make_divisible.py | 27 + .../uniformer/mmseg/models/utils/res_layer.py | 96 + .../uniformer/mmseg/models/utils/se_layer.py | 61 + .../models/utils/self_attention_block.py | 162 ++ .../mmseg/models/utils/up_conv_block.py | 106 + .../mmseg/models/utils/weight_init.py | 63 + .../uniformer/mmseg/ops/__init__.py | 4 + .../uniformer/mmseg/ops/encoding.py | 65 + .../uniformer/mmseg/ops/wrappers.py | 43 + .../uniformer/mmseg/utils/__init__.py | 4 + .../uniformer/mmseg/utils/collect_env.py | 16 + .../uniformer/mmseg/utils/logger.py | 27 + .../multimodal/models/controlnet/util.py | 98 + .../multimodal/models/dreambooth/__init__.py | 13 + .../models/dreambooth/dreambooth.py | 654 +++++ .../multimodal/models/dreambooth/util.py | 169 ++ .../multimodal/models/imagen/__init__.py | 13 + .../multimodal/models/imagen/imagen.py | 602 +++++ .../models/imagen/imagen_pipeline.py | 355 +++ .../multimodal/models/imagen/precond.py | 174 ++ .../models/instruct_pix2pix/__init__.py | 13 + .../models/instruct_pix2pix/ldm/__init__.py | 13 + .../models/instruct_pix2pix/ldm/ddpm_edit.py | 268 ++ .../multimodal/models/kosmos/__init__.py | 0 .../models/kosmos/megatron_kosmos_model.py | 1154 +++++++++ .../models/kosmos/perceiver_resampler.py | 131 + .../multimodal/models/nerf/base.py | 24 + .../multimodal/models/nerf/dreamfusion.py | 313 +++ .../multimodal/models/nerf/txt2nerf_base.py | 81 + .../multimodal/models/neva/__init__.py | 13 + .../multimodal/models/neva/neva_model.py | 1076 ++++++++ .../models/neva/neva_peft_models.py | 60 + .../models/stable_diffusion/__init__.py | 13 + .../stable_diffusion/diffusion_model.py | 81 + .../models/stable_diffusion/ldm/__init__.py | 13 + .../stable_diffusion/ldm/autoencoder.py | 608 +++++ .../models/stable_diffusion/ldm/ddpm.py | 2170 +++++++++++++++++ .../models/stable_diffusion/ldm_config.py | 144 ++ .../stable_diffusion/samplers/__init__.py | 16 + .../stable_diffusion/samplers/base_sampler.py | 339 +++ .../models/stable_diffusion/samplers/ddim.py | 119 + .../stable_diffusion/samplers/dpmsolver.py | 493 ++++ .../stable_diffusion/samplers/k_diffusion.py | 838 +++++++ .../stable_diffusion/samplers/para_ddim.py | 231 ++ .../models/stable_diffusion/samplers/plms.py | 105 + .../stable_diffusion/samplers/sampler_dpm.py | 77 + .../multimodal/modules/__init__.py | 13 + .../imagen/diffusionmodules/attention.py | 317 +++ .../imagen/diffusionmodules/attention_alt.py | 321 +++ .../modules/imagen/diffusionmodules/blocks.py | 906 +++++++ .../modules/imagen/diffusionmodules/embs.py | 69 + .../modules/imagen/diffusionmodules/layers.py | 251 ++ .../modules/imagen/diffusionmodules/nets.py | 642 +++++ .../modules/imagen/encoder/t5encoder.json | 51 + .../modules/imagen/encoder/t5encoder.py | 59 + .../modules/imagen/sampler/batch_ops.py | 57 + .../modules/imagen/sampler/continuous_ddpm.py | 168 ++ .../modules/imagen/sampler/sampler.py | 250 ++ .../multimodal/modules/nerf/__init__.py | 0 .../nerf/background/nerf_background_base.py | 22 + .../nerf/background/random_background.py | 19 + .../nerf/background/static_background.py | 14 + .../nerf/background/tcnn_background.py | 32 + .../nerf/background/torchngp_background.py | 31 + .../modules/nerf/geometry/__init__.py | 0 .../multimodal/modules/nerf/geometry/dmtet.py | 150 ++ .../modules/nerf/geometry/layers.py | 129 + .../modules/nerf/geometry/nerf_base.py | 360 +++ .../modules/nerf/geometry/tcnn_nerf.py | 108 + .../modules/nerf/geometry/torchngp_nerf.py | 114 + .../modules/nerf/guidance/__init__.py | 0 .../stablediffusion_huggingface_pipeline.py | 142 ++ .../guidance/stablediffusion_nemo_pipeline.py | 129 + .../guidance/stablediffusion_trt_pipeline.py | 221 ++ .../nerf/guidance/txt2img_guidance_base.py | 6 + .../nerf/loss/laplacian_smooth_loss.py | 38 + .../nerf/loss/normal_consistency_loss.py | 56 + .../modules/nerf/materials/__init__.py | 0 .../modules/nerf/materials/basic_shading.py | 66 + .../modules/nerf/materials/materials_base.py | 29 + .../modules/nerf/renderers/__init__.py | 0 .../modules/nerf/renderers/base_renderer.py | 18 + .../nerf/renderers/base_sdf_renderer.py | 20 + .../nerf/renderers/base_volume_renderer.py | 7 + .../nerf/renderers/nerfacc_volume_renderer.py | 364 +++ .../nerf/renderers/nvdiffrast_renderer.py | 222 ++ .../renderers/torchngp_volume_renderer.py | 275 +++ .../multimodal/modules/nerf/utils/__init__.py | 0 .../modules/nerf/utils/activation.py | 20 + .../modules/nerf/utils/torch_ngp/__init__.py | 0 .../modules/nerf/utils/torch_ngp/encoding.py | 137 ++ .../nerf/utils/torch_ngp/freqencoder.py | 73 + .../nerf/utils/torch_ngp/gridencoder.py | 287 +++ .../nerf/utils/torch_ngp/raymarching.py | 551 +++++ .../modules/nerf/utils/torch_ngp/shencoder.py | 82 + .../modules/nerf/utils/trt_engine.py | 173 ++ .../modules/stable_diffusion/__init__.py | 13 + .../modules/stable_diffusion/attention.py | 408 ++++ .../diffusionmodules/__init__.py | 13 + .../diffusionmodules/model.py | 878 +++++++ .../diffusionmodules/openaimodel.py | 1191 +++++++++ .../stable_diffusion/diffusionmodules/util.py | 309 +++ .../distributions/__init__.py | 13 + .../distributions/distributions.py | 98 + .../stable_diffusion/encoders/__init__.py | 13 + .../stable_diffusion/encoders/modules.py | 471 ++++ .../encoders/x_transformer.py | 629 +++++ nemo/collections/multimodal/parts/__init__.py | 13 + .../multimodal/parts/imagen/__init__.py | 13 + .../multimodal/parts/imagen/utils.py | 29 + .../parts/stable_diffusion/__init__.py | 13 + .../parts/stable_diffusion/lr_scheduler.py | 112 + .../parts/stable_diffusion/pipeline.py | 202 ++ .../parts/stable_diffusion/utils.py | 213 ++ nemo/collections/multimodal/parts/utils.py | 267 ++ nemo/collections/vision/__init__.py | 38 + nemo/collections/vision/data/__init__.py | 13 + .../vision/data/imagenet_classnames.py | 1016 ++++++++ .../vision/data/megatron/__init__.py | 13 + .../vision/data/megatron/autoaugment.py | 270 ++ .../vision/data/megatron/data_samplers.py | 89 + .../vision/data/megatron/image_folder.py | 286 +++ .../vision/data/megatron/vit_dataset.py | 284 +++ nemo/collections/vision/losses/__init__.py | 13 + nemo/collections/vision/metrics/__init__.py | 13 + nemo/collections/vision/models/__init__.py | 13 + .../megatron_vit_classification_models.py | 801 ++++++ nemo/collections/vision/modules/__init__.py | 13 + .../vision/modules/common/__init__.py | 13 + .../modules/common/megatron/__init__.py | 13 + .../common/megatron/vision_transformer.py | 492 ++++ .../vision/modules/vit/__init__.py | 13 + .../vision/modules/vit/vit_backbone.py | 361 +++ nemo/collections/vision/parts/__init__.py | 13 + requirements/requirements_vision.txt | 8 + scripts/fid-eval-text2img/TFinception_V3.py | 231 ++ .../fid-eval-text2img/compute_clip_score.py | 120 + scripts/fid-eval-text2img/compute_fid.py | 409 ++++ scripts/fid-eval-text2img/eval_fid.py | 100 + scripts/fid-eval-text2img/fid_dataset.py | 128 + scripts/fid-eval-text2img/plot.py | 40 + .../collections/multimodal/test_clip_model.py | 482 ++++ tests/collections/vision/test_vit_model.py | 379 +++ utils/flash-attention.patch | 87 + utils/triton.patch | 53 + 599 files changed, 88910 insertions(+) create mode 100644 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml create mode 100755 examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml create mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py create mode 100644 examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py create mode 100644 examples/multimodal/convert_ckpt_to_nemo.py create mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml create mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml create mode 100755 examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml create mode 100644 examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_infer.py create mode 100644 examples/multimodal/foundation/clip/megatron_clip_pretrain.py create mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml create mode 100644 examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml create mode 100644 examples/multimodal/generative/controlnet/controlnet_infer.py create mode 100644 examples/multimodal/generative/controlnet/controlnet_train.py create mode 100644 examples/multimodal/generative/convert_hf_ckpt_to_nemo.py create mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth.yaml create mode 100644 examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml create mode 100644 examples/multimodal/generative/dreambooth/dreambooth.py create mode 100644 examples/multimodal/generative/dreambooth/dreambooth_infer.py create mode 100644 examples/multimodal/generative/imagen/README.md create mode 100644 examples/multimodal/generative/imagen/conf/base64-2b.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml create mode 100644 examples/multimodal/generative/imagen/conf/fid_inference.yaml create mode 100644 examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml create mode 100644 examples/multimodal/generative/imagen/conf/inference_pipeline.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr1024-600m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-400m.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml create mode 100644 examples/multimodal/generative/imagen/conf/sr256-600m.yaml create mode 100644 examples/multimodal/generative/imagen/generate_fid_images.py create mode 100644 examples/multimodal/generative/imagen/imagen_generate_images.py create mode 100644 examples/multimodal/generative/imagen/imagen_infer.py create mode 100644 examples/multimodal/generative/imagen/imagen_training.py create mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml create mode 100644 examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml create mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py create mode 100644 examples/multimodal/generative/instruct_pix2pix/sd_finetune.py create mode 100644 examples/multimodal/generative/nerf/benchmark_callback.py create mode 100644 examples/multimodal/generative/nerf/config/config.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/random.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/static.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/tcnn.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/background/torchngp.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/data/data.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/dreamfusion.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/optim/adan.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml create mode 100644 examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml create mode 100644 examples/multimodal/generative/nerf/data.py create mode 100644 examples/multimodal/generative/nerf/main.py create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml create mode 100644 examples/multimodal/generative/stable_diffusion/generate_fid_images.py create mode 100644 examples/multimodal/generative/stable_diffusion/sd_infer.py create mode 100644 examples/multimodal/generative/stable_diffusion/sd_train.py create mode 100644 examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml create mode 100644 examples/multimodal/mllm/kosmos/kosmos_pretrain.py create mode 100644 examples/multimodal/mllm/neva/conf/llava_config.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_config.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_finetune.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_inference.yaml create mode 100644 examples/multimodal/mllm/neva/conf/neva_peft.yaml create mode 100644 examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py create mode 100644 examples/multimodal/mllm/neva/neva_evaluation.py create mode 100644 examples/multimodal/mllm/neva/neva_finetune.py create mode 100644 examples/multimodal/mllm/neva/neva_peft.py create mode 100644 examples/multimodal/mllm/neva/neva_pretrain.py create mode 100644 examples/vision/convert_ckpt_to_nemo.py create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml create mode 100755 examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_evaluate.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_finetune.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_infer.py create mode 100644 examples/vision/vision_transformer/megatron_vit_classification_pretrain.py create mode 100644 nemo/collections/multimodal/data/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/augmentations/__init__.py create mode 100644 nemo/collections/multimodal/data/clip/augmentations/augmentations.py create mode 100644 nemo/collections/multimodal/data/clip/clip_dataset.py create mode 100644 nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py create mode 100644 nemo/collections/multimodal/data/common/__init__.py create mode 100644 nemo/collections/multimodal/data/common/data_samplers.py create mode 100644 nemo/collections/multimodal/data/common/webdataset.py create mode 100644 nemo/collections/multimodal/data/common/webdataset_s3.py create mode 100644 nemo/collections/multimodal/data/controlnet/controlnet_dataset.py create mode 100644 nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py create mode 100644 nemo/collections/multimodal/data/imagen/__init__.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/__init__.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/augmentations.py create mode 100644 nemo/collections/multimodal/data/imagen/augmentations/corruption.py create mode 100644 nemo/collections/multimodal/data/imagen/imagen_dataset.py create mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/__init__.py create mode 100644 nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py create mode 100644 nemo/collections/multimodal/data/kosmos/__init__.py create mode 100644 nemo/collections/multimodal/data/kosmos/kosmos_dataset.py create mode 100644 nemo/collections/multimodal/data/nerf/__init__.py create mode 100644 nemo/collections/multimodal/data/nerf/cameras.py create mode 100644 nemo/collections/multimodal/data/nerf/circle_poses.py create mode 100644 nemo/collections/multimodal/data/nerf/random_poses.py create mode 100644 nemo/collections/multimodal/data/nerf/utils.py create mode 100644 nemo/collections/multimodal/data/neva/__init__.py create mode 100644 nemo/collections/multimodal/data/neva/conversation.py create mode 100644 nemo/collections/multimodal/data/neva/neva_dataset.py create mode 100644 nemo/collections/multimodal/data/nsfw/__init__.py create mode 100644 nemo/collections/multimodal/data/nsfw/nsfw_dataset.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py create mode 100644 nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py create mode 100644 nemo/collections/multimodal/losses/__init__.py create mode 100644 nemo/collections/multimodal/losses/clip_loss.py create mode 100644 nemo/collections/multimodal/models/__init__.py create mode 100644 nemo/collections/multimodal/models/clip/__init__.py create mode 100644 nemo/collections/multimodal/models/clip/megatron_clip_models.py create mode 100644 nemo/collections/multimodal/models/content_filter/__init__.py create mode 100644 nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py create mode 100644 nemo/collections/multimodal/models/controlnet/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/controlnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/LICENSE create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py create mode 100644 nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py create mode 100644 nemo/collections/multimodal/models/controlnet/util.py create mode 100644 nemo/collections/multimodal/models/dreambooth/__init__.py create mode 100644 nemo/collections/multimodal/models/dreambooth/dreambooth.py create mode 100644 nemo/collections/multimodal/models/dreambooth/util.py create mode 100644 nemo/collections/multimodal/models/imagen/__init__.py create mode 100644 nemo/collections/multimodal/models/imagen/imagen.py create mode 100644 nemo/collections/multimodal/models/imagen/imagen_pipeline.py create mode 100644 nemo/collections/multimodal/models/imagen/precond.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/__init__.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py create mode 100644 nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py create mode 100644 nemo/collections/multimodal/models/kosmos/__init__.py create mode 100644 nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py create mode 100644 nemo/collections/multimodal/models/kosmos/perceiver_resampler.py create mode 100644 nemo/collections/multimodal/models/nerf/base.py create mode 100644 nemo/collections/multimodal/models/nerf/dreamfusion.py create mode 100644 nemo/collections/multimodal/models/nerf/txt2nerf_base.py create mode 100644 nemo/collections/multimodal/models/neva/__init__.py create mode 100644 nemo/collections/multimodal/models/neva/neva_model.py create mode 100644 nemo/collections/multimodal/models/neva/neva_peft_models.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/ldm_config.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py create mode 100644 nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py create mode 100644 nemo/collections/multimodal/modules/__init__.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py create mode 100644 nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py create mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json create mode 100644 nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py create mode 100644 nemo/collections/multimodal/modules/imagen/sampler/sampler.py create mode 100644 nemo/collections/multimodal/modules/nerf/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/random_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/static_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/tcnn_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/background/torchngp_background.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/dmtet.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/layers.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py create mode 100644 nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py create mode 100644 nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py create mode 100644 nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/basic_shading.py create mode 100644 nemo/collections/multimodal/modules/nerf/materials/materials_base.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/activation.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py create mode 100644 nemo/collections/multimodal/modules/nerf/utils/trt_engine.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/attention.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py create mode 100644 nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py create mode 100644 nemo/collections/multimodal/parts/__init__.py create mode 100644 nemo/collections/multimodal/parts/imagen/__init__.py create mode 100644 nemo/collections/multimodal/parts/imagen/utils.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/__init__.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/pipeline.py create mode 100644 nemo/collections/multimodal/parts/stable_diffusion/utils.py create mode 100644 nemo/collections/multimodal/parts/utils.py create mode 100644 nemo/collections/vision/__init__.py create mode 100644 nemo/collections/vision/data/__init__.py create mode 100644 nemo/collections/vision/data/imagenet_classnames.py create mode 100644 nemo/collections/vision/data/megatron/__init__.py create mode 100644 nemo/collections/vision/data/megatron/autoaugment.py create mode 100644 nemo/collections/vision/data/megatron/data_samplers.py create mode 100644 nemo/collections/vision/data/megatron/image_folder.py create mode 100644 nemo/collections/vision/data/megatron/vit_dataset.py create mode 100644 nemo/collections/vision/losses/__init__.py create mode 100644 nemo/collections/vision/metrics/__init__.py create mode 100644 nemo/collections/vision/models/__init__.py create mode 100644 nemo/collections/vision/models/megatron_vit_classification_models.py create mode 100644 nemo/collections/vision/modules/__init__.py create mode 100644 nemo/collections/vision/modules/common/__init__.py create mode 100644 nemo/collections/vision/modules/common/megatron/__init__.py create mode 100644 nemo/collections/vision/modules/common/megatron/vision_transformer.py create mode 100644 nemo/collections/vision/modules/vit/__init__.py create mode 100644 nemo/collections/vision/modules/vit/vit_backbone.py create mode 100644 nemo/collections/vision/parts/__init__.py create mode 100644 requirements/requirements_vision.txt create mode 100644 scripts/fid-eval-text2img/TFinception_V3.py create mode 100644 scripts/fid-eval-text2img/compute_clip_score.py create mode 100644 scripts/fid-eval-text2img/compute_fid.py create mode 100644 scripts/fid-eval-text2img/eval_fid.py create mode 100644 scripts/fid-eval-text2img/fid_dataset.py create mode 100644 scripts/fid-eval-text2img/plot.py create mode 100644 tests/collections/multimodal/test_clip_model.py create mode 100644 tests/collections/vision/test_vit_model.py create mode 100644 utils/flash-attention.patch create mode 100644 utils/triton.patch diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml new file mode 100644 index 000000000000..11dc65155cf5 --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_config.yaml @@ -0,0 +1,230 @@ +name: megatron_clip +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + max_epochs: 10 + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + use_distributed_sampler: False + check_val_every_n_epoch: 1 + limit_val_batches: 1.0 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_nsfw + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_pretrained: null # used in fine-tuning + # multimodal configs + output_dim: 768 + # As the number of devices used to train increases, so does the space complexity of + # the logit matrix. Using a naïve all-gather scheme, space complexity will be + # `O(n^2)`. Instead, complexity may become effectively linear if the flags + # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one + # numerical results as the naïve method. + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: ${trainer.precision} + patch_dim: 14 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: false + output_dim: ${model.output_dim} + class_token_length: 1 + preprocess_layernorm: true + encoder_seq_length: 196 + max_position_embeddings: 196 + position_embedding_type: learned_parameters + num_layers: 24 + hidden_size: 1024 + ffn_hidden_size: 4096 + num_attention_heads: 16 + init_method_std: 0.02 + use_scaled_init_method: true + hidden_dropout: 0.0 + attention_dropout: 0.0 + kv_channels: null + apply_query_key_layer_scaling: true + normalization: layernorm + layernorm_epsilon: 1.0e-05 + do_layer_norm_weight_decay: false + pre_process: true + post_process: true + persist_layer_norm: true + activations_checkpoint_granularity: null + activations_checkpoint_method: null + activations_checkpoint_num_layers: null + sequence_parallel: false + native_amp_init_scale: 4294967296 + native_amp_growth_interval: 1000 + hysteresis: 2 + fp32_residual_connection: false + fp16_lm_cross_entropy: false + masked_softmax_fusion: true + bias_dropout_add_fusion: true + use_cpu_initialization: false + onnx_safe: false + gradient_accumulation_fusion: false + openai_gelu: false + bias_activation_fusion: false + megatron_legacy: true + activation: quick-gelu + + text: + precision: ${trainer.precision} + # text configs + output_dim: ${model.output_dim} + + encoder_seq_length: 77 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 + num_attention_heads: 12 + init_method_std: 0.02 + use_scaled_init_method: true + hidden_dropout: 0.0 + attention_dropout: 0.0 + kv_channels: null + apply_query_key_layer_scaling: true + normalization: layernorm + layernorm_epsilon: 1.0e-05 + do_layer_norm_weight_decay: false + pre_process: true + post_process: true + persist_layer_norm: true + activations_checkpoint_granularity: null + activations_checkpoint_method: null + activations_checkpoint_num_layers: null + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: false + native_amp_init_scale: 4294967296 + native_amp_growth_interval: 1000 + hysteresis: 2 + fp32_residual_connection: false + fp16_lm_cross_entropy: false + masked_softmax_fusion: true + bias_dropout_add_fusion: true + use_cpu_initialization: false + onnx_safe: false + gradient_accumulation_fusion: false + openai_gelu: false + bias_activation_fusion: false + megatron_legacy: true + transformer_engine: false + fp8: false + fp8_e4m3: false + fp8_hybrid: false + fp8_margin: 0 + fp8_interval: 1 + fp8_amax_history_len: 1 + fp8_amax_compute_algo: most_recent + use_emha: false + activation: quick-gelu + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + sim_hidden_dim: 64 + cls_hidden_dim: 64 + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 8 + train: + dataset_path: /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: adam + lr: 1e-3 + weight_decay: 0.0 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 1e-5 + concepts: ??? + diff --git a/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml new file mode 100755 index 000000000000..f78eba0bdc96 --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/conf/megatron_nsfw_infer.yaml @@ -0,0 +1,12 @@ +image_path: ??? # Path to a image for inference + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py new file mode 100644 index 000000000000..d6b4bed6d01a --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_infer.py @@ -0,0 +1,78 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from omegaconf.omegaconf import OmegaConf +from PIL import Image + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +def _get_autocast_dtype(precision: str): + if precision in ["bf16", "bf16-mixed"]: + return torch.bfloat16 + if precision in [32, "32", "32-true"]: + return torch.float + if precision in [16, "16", "16-mixed"]: + return torch.half + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + +@hydra_runner(config_path="conf", config_name="megatron_nsfw_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronContentFilteringModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, + ) + image_transform_fn = image_transform( + (model.cfg.vision.img_h, model.cfg.vision.img_w), + is_train=False, + mean=model.cfg.vision.image_mean, + std=model.cfg.vision.image_std, + resize_longest_max=True, + ) + + autocast_dtype = _get_autocast_dtype(trainer.precision) + image = Image.open(cfg.image_path).convert('RGB') + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + image = image_transform_fn(image).unsqueeze(0).cuda() + probability = model(image).sigmoid() + + if is_global_rank_zero: + print("Given image's NSFW probability: ", probability.cpu().item()) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py new file mode 100644 index 000000000000..51ccd596431a --- /dev/null +++ b/examples/multimodal/content_filtering/nsfw/megatron_nsfw_pretrain.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.content_filter.megatron_nsfw_clip_models import MegatronContentFilteringModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_nsfw_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( + "Gradient accumulation is not supported in CLIP yet." + ) + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronContentFilteringModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + if "save_path" in cfg.model: + logging.info(f"Saving model to path: {cfg.model.save_path}") + model.save_to(cfg.model.save_path) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/convert_ckpt_to_nemo.py b/examples/multimodal/convert_ckpt_to_nemo.py new file mode 100644 index 000000000000..d640e0c11ede --- /dev/null +++ b/examples/multimodal/convert_ckpt_to_nemo.py @@ -0,0 +1,197 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert PTL checkpoints into nemo checkpoint. + Example to run this conversion script: + python -m torch.distributed.launch --nproc_per_node= * \ + convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size +""" + +import os +from argparse import ArgumentParser + +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--checkpoint_folder", + type=str, + default=None, + required=True, + help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", + ) + parser.add_argument( + "--checkpoint_name", + type=str, + default=None, + required=True, + help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", + ) + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--model_type", type=str, required=False, default="megatron_clip") + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + + cfg = OmegaConf.load(args.hparams_file) + with open_dict(cfg): + cfg['model'] = cfg['cfg'] + cfg['trainer'] = {'precision': cfg['model']['precision']} + if args.bcp: + cfg['cluster_type'] = 'BCP' + trainer = MegatronTrainerBuilder(cfg).create_trainer() + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + # inject model parallel rank + checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) + + logging.info( + f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' + ) + + if args.model_type == 'megatron_clip': + model = MegatronCLIPModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'stable_diffusion': + model = MegatronLatentDiffusion.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'instruct_pix2pix': + model = MegatronLatentDiffusionEdit.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'dreambooth': + model = MegatronLatentDiffusion.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'imagen': + model = MegatronImagen.load_from_checkpoint(checkpoint_path, hparams_file=args.hparams_file, trainer=trainer) + elif args.model_type == 'controlnet': + model = MegatronControlNet.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'kosmos': + model = MegatronKosmosModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + elif args.model_type == 'neva': + model = MegatronNevaModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + else: + raise ValueError(f"Unrecognized model_type {args.model_type}.") + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml new file mode 100644 index 000000000000..a6b1928ef13f --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml @@ -0,0 +1,250 @@ +name: megatron_clip +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + # multimodal configs + output_dim: 512 + # As the number of devices used to train increases, so does the space complexity of + # the logit matrix. Using a naïve all-gather scheme, space complexity will be + # `O(n^2)`. Instead, complexity may become effectively linear if the flags + # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one + # numerical results as the naïve method. + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: ${trainer.precision} + # vision configs + patch_dim: 16 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + output_dim: ${model.output_dim} + class_token_length: 8 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + + text: + precision: ${trainer.precision} + # text configs + output_dim: ${model.output_dim} + + # model architecture + encoder_seq_length: 77 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 512 + ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 8 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.2 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 1e-5 \ No newline at end of file diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml new file mode 100755 index 000000000000..79bdac888887 --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_imagenet_zeroshot.yaml @@ -0,0 +1,17 @@ +trainer: + devices: 8 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: bf16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} + micro_batch_size: 1000 + global_batch_size: 8000 + + data: + num_workers: 2 + imagenet_val: ??? # path to imagenet val folder + diff --git a/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml b/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml new file mode 100755 index 000000000000..215cd17841ae --- /dev/null +++ b/examples/multimodal/foundation/clip/conf/megatron_clip_infer.yaml @@ -0,0 +1,13 @@ +image_path: ??? # Path to a image for inference +texts: ??? # List of texts to compute similarity + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py b/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py new file mode 100644 index 000000000000..67151d95e971 --- /dev/null +++ b/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py @@ -0,0 +1,284 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage example: + python /opt/NeMo/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py + --arch=ViT-H-14 + --version=laion2b_s32b_b79k + --hparams_file=path/to/saved.yaml + --nemo_file_path=open_clip.nemo + +If converting from OpenCLIP, specify the architecture (`arch`) and version (`version`) from the OpenCLIP model list (https://github.com/mlfoundations/open_clip#usage). + +If converting from Hugging Face, set the version to `huggingface` and the architecture (`arch`) to the Hugging Face model name (e.g., `yuvalkirstain/PickScore_v1`). + +Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. +""" + +import os +from argparse import ArgumentParser + +import einops +import open_clip +import torch +from omegaconf import OmegaConf +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer +from transformers import CLIPModel + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--arch", type=str, default="ViT-H-14") + parser.add_argument("--version", type=str, default="laion2b_s32b_b79k") + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def mapping_openclip_state_dict(open_model): + open_state_dict = open_model.state_dict() + key_mapping = { + "positional_embedding": "text_encoder.language_model.embedding.position_embeddings", + "token_embedding.weight": "text_encoder.language_model.embedding.word_embeddings.weight", + "ln_final.weight": "text_encoder.language_model.encoder.final_layernorm.weight", + "ln_final.bias": "text_encoder.language_model.encoder.final_layernorm.bias", + "text_projection": "text_encoder.head.weight", + } + layer_mapping = { + ".ln_1.weight": ".input_layernorm.weight", + ".ln_1.bias": ".input_layernorm.bias", + ".attn.in_proj_weight": ".self_attention.query_key_value.weight", + ".attn.in_proj_bias": ".self_attention.query_key_value.bias", + ".attn.out_proj.weight": ".self_attention.dense.weight", + ".attn.out_proj.bias": ".self_attention.dense.bias", + ".ln_2.weight": ".post_attention_layernorm.weight", + ".ln_2.bias": ".post_attention_layernorm.bias", + ".mlp.c_fc.weight": ".mlp.dense_h_to_4h.weight", + ".mlp.c_fc.bias": ".mlp.dense_h_to_4h.bias", + ".mlp.c_proj.weight": ".mlp.dense_4h_to_h.weight", + ".mlp.c_proj.bias": ".mlp.dense_4h_to_h.bias", + ".ln_pre.weight": ".preprocess_layernorm.weight", + ".ln_pre.bias": ".preprocess_layernorm.bias", + ".ln_post.weight": ".transformer.final_layernorm.weight", + ".ln_post.bias": ".transformer.final_layernorm.bias", + ".positional_embedding": ".position_embeddings", + ".backbone.proj": ".head.weight", + ".class_embedding": ".cls_token", + ".backbone.conv1.weight": ".backbone.linear_encoder.weight", + } + + nemo_state_dict = {} + for key in open_state_dict.keys(): + if key.startswith("transformer.resblocks."): + key_ = key.replace("transformer.resblocks.", "text_encoder.language_model.encoder.layers.") + elif key.startswith("visual.transformer.resblocks."): + key_ = key.replace("visual.transformer.resblocks.", "vision_encoder.backbone.transformer.layers.") + elif key.startswith('visual.'): + key_ = key.replace("visual.", "vision_encoder.backbone.") + else: + key_ = key + for pat in key_mapping: + if key_ == pat: + key_ = key_.replace(pat, key_mapping[pat]) + for pat in layer_mapping: + if key_.endswith(pat): + key_ = key_[: -len(pat)] + layer_mapping[pat] + break + nemo_state_dict[key_] = open_state_dict[key] + + nemo_state_dict["text_encoder.head.weight"] = nemo_state_dict["text_encoder.head.weight"].T + nemo_state_dict["vision_encoder.head.weight"] = nemo_state_dict["vision_encoder.head.weight"].T + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ + "vision_encoder.backbone.cls_token" + ].reshape(1, 1, -1) + w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) + nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) + + return nemo_state_dict + + +def mapping_hf_state_dict(hf_model): + hf_state_dict = hf_model.state_dict() + key_mapping = { + "text_projection.weight": "text_encoder.head.weight", + "visual_projection.weight": "vision_encoder.head.weight", + } + + layer_mapping = { + ".layer_norm1.weight": ".input_layernorm.weight", + ".layer_norm1.bias": ".input_layernorm.bias", + ".self_attn.out_proj.weight": ".self_attention.dense.weight", + ".self_attn.out_proj.bias": ".self_attention.dense.bias", + ".layer_norm2.weight": ".post_attention_layernorm.weight", + ".layer_norm2.bias": ".post_attention_layernorm.bias", + ".mlp.fc1.weight": ".mlp.dense_h_to_4h.weight", + ".mlp.fc1.bias": ".mlp.dense_h_to_4h.bias", + ".mlp.fc2.weight": ".mlp.dense_4h_to_h.weight", + ".mlp.fc2.bias": ".mlp.dense_4h_to_h.bias", + ".pre_layrnorm.weight": ".preprocess_layernorm.weight", + ".pre_layrnorm.bias": ".preprocess_layernorm.bias", + ".post_layernorm.weight": ".transformer.final_layernorm.weight", + ".post_layernorm.bias": ".transformer.final_layernorm.bias", + ".backbone.embeddings.position_embedding.weight": ".backbone.position_embeddings", + ".language_model.embeddings.position_embedding.weight": ".language_model.embedding.position_embeddings", + ".embeddings.class_embedding": ".cls_token", + ".backbone.embeddings.patch_embedding.weight": ".backbone.linear_encoder.weight", + ".final_layer_norm.weight": ".encoder.final_layernorm.weight", + ".final_layer_norm.bias": ".encoder.final_layernorm.bias", + ".embeddings.token_embedding.weight": ".embedding.word_embeddings.weight", + } + + nemo_state_dict = {} + for key in hf_state_dict.keys(): + if key.startswith("text_model.encoder.layers"): + key_ = key.replace("text_model.encoder.layers", "text_encoder.language_model.encoder.layers") + elif key.startswith("vision_model.encoder.layers"): + key_ = key.replace("vision_model.encoder.layers", "vision_encoder.backbone.transformer.layers") + elif key.startswith('vision_model.'): + key_ = key.replace("vision_model.", "vision_encoder.backbone.") + elif key.startswith('text_model.'): + key_ = key.replace('text_model.', 'text_encoder.language_model.') + else: + key_ = key + for pat in key_mapping: + if key_ == pat: + key_ = key_.replace(pat, key_mapping[pat]) + for pat in layer_mapping: + if key_.endswith(pat): + key_ = key_[: -len(pat)] + layer_mapping[pat] + break + if 'q_proj' in key_: + key_k = key.replace('q_proj', 'k_proj') + key_v = key.replace('q_proj', 'v_proj') + key_new = key_.replace('self_attn.q_proj', 'self_attention.query_key_value') + value_new = torch.concat((hf_state_dict[key], hf_state_dict[key_k], hf_state_dict[key_v]), dim=0) + nemo_state_dict[key_new] = value_new + elif not ('k_proj' in key_ or 'v_proj' in key_ or 'position_ids' in key_): + nemo_state_dict[key_] = hf_state_dict[key] + + nemo_state_dict["vision_encoder.backbone.cls_token"] = nemo_state_dict[ + "vision_encoder.backbone.cls_token" + ].reshape(1, 1, -1) + w = nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] + nemo_state_dict["vision_encoder.backbone.linear_encoder.weight"] = einops.rearrange(w, "b c p1 p2 -> b (p1 p2 c)",) + nemo_state_dict["vision_encoder.backbone.linear_encoder.bias"] = torch.zeros(w.shape[0]) + + return nemo_state_dict + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + cfg = OmegaConf.load(args.hparams_file) + model = MegatronCLIPModel(cfg.model, trainer) + + if args.version == "huggingface": + hf_model = CLIPModel.from_pretrained(args.arch) + state_dict = mapping_hf_state_dict(hf_model) + else: + open_model, _, _ = open_clip.create_model_and_transforms(args.arch, pretrained=args.version) + state_dict = mapping_openclip_state_dict(open_model) + + model.model.load_state_dict(state_dict) + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py new file mode 100644 index 000000000000..2c536ca7b5bb --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_imagenet_zeroshot.py @@ -0,0 +1,142 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +import torch.nn.functional as F +from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from tqdm import tqdm + +from nemo.collections.multimodal.data.clip.clip_dataset import ( + ImagenetClassnameDataset, + build_imagenet_validation_dataloader, + get_preprocess_fns, + tokenize, +) +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + +try: + from megatron.core import parallel_state, tensor_parallel + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def accuracy(output, target, topk=(1,)): + pred = output.topk(max(topk), 1, True, True)[1].t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] + + +@hydra_runner(config_path="conf", config_name="megatron_clip_imagenet_zeroshot") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + model_cfg.text.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, + ) + + if model.cfg.get("megatron_amp_O2", False): + vision_encoder = model.model.module.vision_encoder + text_encoder = model.model.module.text_encoder + else: + vision_encoder = model.model.vision_encoder + text_encoder = model.model.text_encoder + + # get autocast_dtype + if trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + with open_dict(cfg): + cfg.model["vision"] = model.cfg.vision + cfg.model["text"] = model.cfg.text + + imagenet_val = build_imagenet_validation_dataloader(cfg.model, model.tokenizer) + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + # build imagenet classification classifier + classifier = [] + for texts in imagenet_val["texts"]: + texts = texts.cuda(non_blocking=True) + class_embeddings = text_encoder(texts) + class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) + class_embedding /= class_embedding.norm() + classifier.append(class_embedding) + classifier = torch.stack(classifier, dim=1) + + top1, top5, n = 0.0, 0.0, 0.0 + for images, target in tqdm(imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + if images is None or target is None: + continue + + images = images.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + # predict + image_features = vision_encoder(images) + image_features = F.normalize(image_features, dim=-1) + logits = 100.0 * image_features @ classifier + + # measure accuracy + acc1, acc5 = accuracy(logits, target, topk=(1, 5)) + top1 += acc1 + top5 += acc5 + n += images.size(0) + + logging.info('Finished zero-shot imagenet.') + top1 = top1 / n + top5 = top5 / n + + imagenet_metric = torch.zeros(2).cuda() + imagenet_metric[0], imagenet_metric[1] = top1, top5 + imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) + + if is_global_rank_zero: + logging.info(f"Zero-shot CLIP accuracy Top-1: {imagenet_metric[0]:.4f}; Top-5: {imagenet_metric[1]:.4f}") + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_infer.py b/examples/multimodal/foundation/clip/megatron_clip_infer.py new file mode 100644 index 000000000000..06f37081b9be --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_infer.py @@ -0,0 +1,89 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment + +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +@hydra_runner(config_path="conf", config_name="megatron_clip_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + # These configs are required to be off during inference. + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.vision.precision = cfg.trainer.precision + model_cfg.text.precision = cfg.trainer.precision + if cfg.trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + trainer, model = setup_trainer_and_model_for_inference( + model_provider=MegatronCLIPModel, cfg=cfg, model_cfg_modifier=model_cfg_modifier, + ) + + if model.cfg.get("megatron_amp_O2", False): + vision_encoder = model.model.module.vision_encoder + text_encoder = model.model.module.text_encoder + else: + vision_encoder = model.model.vision_encoder + text_encoder = model.model.text_encoder + + val_image_transform, text_transform = get_preprocess_fns(model.cfg, model.tokenizer, is_train=False,) + + # get autocast_dtype + if trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + image = Image.open(cfg.image_path).convert('RGB') + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + image = val_image_transform(image).unsqueeze(0).cuda() + texts = text_transform(cfg.texts).cuda() + image_features = vision_encoder(image) + text_features = text_encoder(texts) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + if is_global_rank_zero: + print(f"Given image's CLIP text probability: ", list(zip(cfg.texts, text_probs[0].cpu().numpy()))) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/foundation/clip/megatron_clip_pretrain.py b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py new file mode 100644 index 000000000000..d0dcc07ffe3e --- /dev/null +++ b/examples/multimodal/foundation/clip/megatron_clip_pretrain.py @@ -0,0 +1,49 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.clip.megatron_clip_models import MegatronCLIPModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_clip_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + ) * cfg.model.micro_batch_size == cfg.model.global_batch_size, ( + "Gradient accumulation is not supported in CLIP yet." + ) + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronCLIPModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml new file mode 100644 index 000000000000..0012e272aac4 --- /dev/null +++ b/examples/multimodal/generative/controlnet/conf/controlnet_infer.yaml @@ -0,0 +1,36 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 3 + num_images_per_prompt: 4 + hint_image_size: 512 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'DDIM' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'controlnet' + seed: 355 + prompts: + - high quality picture of a house in oil painting style + control: + - /datasets/coco-stuff/house.png #images/val2017/000000001584.jpg + # Depending on the input control, if the input control is already the conditioning image, null should be passed here + # If a reconstruction target is used as control, then preprocessing function that turns it into a conditioning image needs to be specified + control_image_preprocess: seg2img + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: /ckpts/controlnet/30k.nemo + precision: ${trainer.precision} + strength: 2.0 + guess_mode: False \ No newline at end of file diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml new file mode 100644 index 000000000000..beb4fd80ee84 --- /dev/null +++ b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml @@ -0,0 +1,220 @@ +trainer: + devices: 2 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: True + max_epochs: 3 # PTL default. In practice, max_steps will be reached first. + max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: controlnet + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: controlnet + name: controlnet-v1.5 + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + save_top_k: -1 + every_n_train_steps: 5000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 8 + + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions + control_key: hint + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + learning_rate: 1.0e-04 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + only_mid_control: False + sd_locked: True + + control_stage_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet + params: + from_pretrained_unet: /ckpts/v1-5-pruned.ckpt + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + hint_channels: 3 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + use_linear_in_transformer: False + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + unet_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel + from_pretrained: /ckpts/v1-5-pruned.ckpt + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + data: + num_workers: 16 + train: + dataset_path: + #- /datasets/tarfiles/fill50k.pkl + - /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 0 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + image_logger: + batch_frequency: 1000 + max_images: 4 + + #miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) diff --git a/examples/multimodal/generative/controlnet/controlnet_infer.py b/examples/multimodal/generative/controlnet/controlnet_infer.py new file mode 100644 index 000000000000..c050010a73c0 --- /dev/null +++ b/examples/multimodal/generative/controlnet/controlnet_infer.py @@ -0,0 +1,247 @@ +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time + +import cv2 +import einops +import torch +from PIL import Image + +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.controlnet.util import get_preprocessing_function +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +def get_control_input(image_path, batch_size, hint_image_size, control_image_preprocess=None): + image = cv2.imread(image_path) + if control_image_preprocess: + # More applications can be supported here + process = get_preprocessing_function(control_image_preprocess) + image = process(image) + image = cv2.resize(image, (hint_image_size, hint_image_size)) + control = torch.from_numpy(image).float() / 255.0 + control = torch.stack([control for _ in range(batch_size)], dim=0) + control = einops.rearrange(control, 'b h w c -> b c h w') + return control + + +def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): + c = cond_stage_model.encode(batch_size * [prompt]) + if unconditional_guidance_scale != 1.0: + uc = cond_stage_model.encode(batch_size * [""]) + else: + uc = None + return c, uc + + +def initialize_sampler(model, sampler_type): + if sampler_type == 'DDIM': + sampler = DDIMSampler(model) + elif sampler_type == 'PLMS': + sampler = PLMSSampler(model) + else: + raise ValueError(f'Sampler {sampler_type} is not supported for {cls.__name__}') + return sampler + + +def decode_images(model, samples): + images = model.decode_first_stage(samples) + + images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) + + return images + + +def torch_to_numpy(images): + numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] + return numpy_images + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def pipeline(model, cfg, rng=None, verbose=True): + # setup default values for inference configs + unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + batch_size = cfg.infer.get('num_images_per_prompt', 1) + prompts = cfg.infer.get('prompts', []) + control = cfg.infer.get('control', []) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + sampler_type = cfg.infer.get('sampler_type', 'DDIM') + inference_steps = cfg.infer.get('inference_steps', 50) + output_type = cfg.infer.get('output_type', 'pil') + save_to_file = cfg.infer.get('save_to_file', True) + out_path = cfg.infer.get('out_path', '') + eta = cfg.infer.get('eta', 0) + guess_mode = cfg.model.get('guess_mode', False) + hint_image_size = cfg.infer.get('hint_image_size', 512) + control_image_preprocess = cfg.infer.get('control_image_preprocess', None) + + # get autocast_dtype + if cfg.trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif cfg.trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif cfg.trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + + in_channels = model.model.diffusion_model.in_channels + + sampler = initialize_sampler(model, sampler_type.upper()) + + output = [] + throughput = [] + + if isinstance(prompts, str): + prompts = [prompts] + + assert len(prompts) == len(control) + + for control, prompt in zip(control, prompts): + tic = time.perf_counter() + tic_total = tic + txt_cond, txt_u_cond = encode_prompt( + model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size + ) + + control = get_control_input(control, batch_size, hint_image_size, control_image_preprocess).to( + torch.cuda.current_device(), dtype=autocast_dtype + ) + + cond = {"c_concat": control, "c_crossattn": txt_cond} + u_cond = {"c_concat": None if guess_mode else control, "c_crossattn": txt_u_cond} + + toc = time.perf_counter() + conditioning_time = toc - tic + + latent_shape = [batch_size, height // downsampling_factor, width // downsampling_factor] + latents = torch.randn( + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng + ).to(torch.cuda.current_device()) + + tic = time.perf_counter() + samples, intermediates = sampler.sample( + S=inference_steps, + conditioning=cond, + batch_size=batch_size, + shape=latent_shape, + verbose=False, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=u_cond, + eta=eta, + x_T=latents, + ) + toc = time.perf_counter() + sampling_time = toc - tic + + tic = time.perf_counter() + images = decode_images(model, samples) + toc = time.perf_counter() + decode_time = toc - tic + + toc_total = time.perf_counter() + total_time = toc_total - tic_total + output.append(images) + + throughput.append( + { + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + } + ) + + # Convert output type and save to disk + if output_type == 'torch': + output = torch.cat(output, dim=0) + else: + output = torch_to_numpy(output) + if output_type == 'pil': + output = [numpy_to_pil(x) for x in output] + + if save_to_file: + os.makedirs(out_path, exist_ok=True) + # Saving control map + control_image = control[0].float().cpu().permute(1, 2, 0).numpy() + control_image = Image.fromarray((control_image * 255).round().astype("uint8")) + control_image.save(os.path.join(out_path, f'{prompt[:50]}_control.png')) + if output_type == 'pil': + for text_prompt, pils in zip(prompts, output): + for idx, image in enumerate(pils): + image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) + else: + with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: + pickle.dump(output, f) + else: + return output + + ave_metrics = {} + for key in throughput[0].keys(): + ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) + if verbose: + print(ave_metrics) + + +@hydra_runner(config_path='conf', config_name='controlnet_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.control_stage_config.from_pretrained_unet = None + model_cfg.channels_last = True + model_cfg.capture_cudagraph_iters = -1 + + torch.backends.cuda.matmul.allow_tf32 = True + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronControlNet, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + guess_mode = cfg.model.guess_mode + model.contol_scales = ( + [cfg.model.strength * (0.825 ** float(12 - i)) for i in range(13)] + if guess_mode + else ([cfg.model.strength] * 13) + ) + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/controlnet/controlnet_train.py b/examples/multimodal/generative/controlnet/controlnet_train.py new file mode 100644 index 000000000000..8a7a46b79480 --- /dev/null +++ b/examples/multimodal/generative/controlnet/controlnet_train.py @@ -0,0 +1,54 @@ +from datetime import timedelta + +import pytorch_lightning as pl +import torch +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.strategies.ddp import DDPStrategy + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.controlnet.util import ImageLogger +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils.exp_manager import StatelessTimer, exp_manager + + +class MegatronControlNetTrainerBuilder(MegatronTrainerBuilder): + """Builder for T5 model Trainer with overrides.""" + + def create_trainer(self, callbacks=[]) -> Trainer: + strategy = self._training_strategy() + plugins = self._plugins() + return Trainer(plugins=plugins, strategy=strategy, **self.cfg.trainer, callbacks=callbacks) + + +@hydra_runner(config_path='conf', config_name='controlnet_v1-5.yaml') +def main(cfg): + callbacks = [] + + if cfg.model.get('image_logger', None): + callbacks.append(ImageLogger(**cfg.model.image_logger)) + + trainer = MegatronControlNetTrainerBuilder(cfg).create_trainer(callbacks=callbacks) + + exp_manager(trainer, cfg.get("exp_manager", None)) + + model = MegatronControlNet(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py b/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py new file mode 100644 index 000000000000..cd7e97ded64f --- /dev/null +++ b/examples/multimodal/generative/convert_hf_ckpt_to_nemo.py @@ -0,0 +1,226 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage example: + python /opt/NeMo/examples/multimodal/generative/stable_diffusion/convert_hf_ckpt_to_nemo.py + --ckpt_path=path/to/hf.ckpt + --hparams_file=path/to/saved.yaml + --nemo_file_path=hf2sd.nemo + +Additionally, provide a NeMo hparams file with the correct model architecture arguments. Refer to examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml. +""" + +import os +import tempfile +from argparse import ArgumentParser + +import torch +from lightning_fabric.utilities.cloud_io import _load as pl_load +from omegaconf import OmegaConf +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--ckpt_path", type=str, default=None, required=True, help="Path to checkpoint.") + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=False, default=1) + parser.add_argument("--tensor_model_parallel_size", type=int, required=False, default=1) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=False, default=1) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + parser.add_argument("--model_type", type=str, required=False, default="stable_diffusion") + parser.add_argument("--nemo_clip_path", type=str, required=False, help="Path to clip ckpt file in .nemo format") + + args = parser.parse_args() + return args + + +def load_config_and_state_from_nemo(nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk(model_weights, map_location=map_location) + finally: + os.chdir(cwd) + + return cfg, state_dict + + +def mapping_hf_state_dict(hf_state_dict, model, clip_dict=None): + nemo_state = model.state_dict() + new_state_dict = {} + for k, v in hf_state_dict.items(): + k = 'model.' + k + # This is not necessary when you turn off model.inductor in config file + # if 'diffusion_model' in k: + # k = k.replace('diffusion_model', 'diffusion_model._orig_mod') + if 'in_layers' in k or 'out_layers' in k: + s = k.split('.') + idx = int(s[-2]) + if idx != 0: + k = ".".join(s[:-2] + [str(int(idx - 1))] + [s[-1]]) + if k in nemo_state: + new_state_dict[k] = v + if clip_dict: + for k, v in clip_dict.items(): + k = k.replace("model.text_encoder", "model.cond_stage_model.model") + if k in nemo_state: + new_state_dict[k] = v + for k in [ + 'betas', + 'alphas_cumprod', + 'alphas_cumprod_prev', + 'sqrt_alphas_cumprod', + 'sqrt_one_minus_alphas_cumprod', + 'log_one_minus_alphas_cumprod', + 'sqrt_recip_alphas_cumprod', + 'sqrt_recipm1_alphas_cumprod', + 'posterior_variance', + 'posterior_log_variance_clipped', + 'posterior_mean_coef1', + 'posterior_mean_coef2', + ]: + new_state_dict['model.' + k] = nemo_state['model.' + k] + + return new_state_dict + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + if args.ckpt_path.endswith('safetensors'): + from safetensors.torch import load_file as load_safetensors + + checkpoint = load_safetensors(args.ckpt_path) + else: + checkpoint = pl_load(args.ckpt_path, map_location='cpu') + if 'state_dict' in checkpoint.keys(): + checkpoint = checkpoint['state_dict'] + cfg = OmegaConf.load(args.hparams_file) + cfg.model.inductor = False + if args.model_type == 'stable_diffusion': + model = MegatronLatentDiffusion(cfg.model, trainer) + elif args.model_type == 'controlnet': + model = MegatronControlNet(cfg.model, trainer) + + if 'nemo' in model.cfg.cond_stage_config._target_: + assert ( + args.nemo_clip_path is not None + ), "To align with current hparams file, you need to provide .nemo checkpoint of clip model for stable diffusion. If you want to convert HF clip checkpoint to .nemo checkpoint first, please refer to /opt/NeMo/examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py" + _, clip_dict = load_config_and_state_from_nemo(args.nemo_clip_path) + else: + clip_dict = None + + state_dict = mapping_hf_state_dict(checkpoint, model, clip_dict=clip_dict) + + model._save_restore_connector = NLPSaveRestoreConnector() + + model.load_state_dict(state_dict) + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml new file mode 100644 index 000000000000..37e9b284e219 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth.yaml @@ -0,0 +1,224 @@ +name: Dreambooth + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16-mixed + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 400 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + +exp_manager: + exp_dir: null + name: ${name} + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 200 + every_n_epochs: 0 + monitor: reduced_train_loss + save_on_train_epoch_end: False + filename: '${name}-{step}' + save_top_k: -1 + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 2 # will use more micro batches to reach global batch size + + with_prior_preservation: False + use_cached_latents: True + prior_loss_weight: 0.5 + train_text_encoder: False + restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed + + + + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + channels_last: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: /ckpts/unet.bin #load unet weights for finetuning, can use .ckpt ckpts from various sources + from_NeMo: False #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /ckpts/openai.nemo + device: cuda + freeze: True + layer: "last" + # For compatibility of history version that uses HF clip model + # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + # version: openai/clip-vit-large-patch14 + # device: cuda + # max_length: 77 + + noise_scheduler: + _target_: nemo.collections.multimodal.models.dreambooth.util.sd_noise_scheduler + parameterization: eps + v_posterior: 0 + given_betas: + beta_schedule: linear + timesteps: 1000 + linear_start: 0.00085 + linear_end: 0.012 + cosine_s: 8e-3 + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-6 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 1 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + name: pbss + num_workers: 4 + instance_dir: /datasets/instance_dir + instance_prompt: a photo of a sks dog + regularization_dir: /datasets/nemo_dogs + regularization_prompt: a photo of a dog + num_reg_images: 10 + num_images_per_prompt: 4 + resolution: 512 + center_crop: True + cached_instance_dir: #/datasets/instance_dir_cached + cached_reg_dir: #/datasets/nemo_dogs_cached + +##The below infer config is to use inference script generating regularization images +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: ${model.data.num_images_per_prompt} + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + output_type: 'pil' + save_to_file: False + out_path: ${model.data.regularization_dir} + prompts: ${model.data.regularization_prompt} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml b/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml new file mode 100644 index 000000000000..fc8d35443767 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/conf/dreambooth_infer.yaml @@ -0,0 +1,32 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 4 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 100 + sampler_type: 'DDIM' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'dreambooth' + seed: 123 + prompts: + - 'a photo of a sks dog' + - 'a photo of a sks dog in the Acropolis' + - 'a photo of a sks dog in front of eiffel tower' + - 'a photo of sks dog sleeping' + - 'a photo of a sks dog riding a bike' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/dreambooth/dreambooth.py b/examples/multimodal/generative/dreambooth/dreambooth.py new file mode 100644 index 000000000000..2b6212f0bba9 --- /dev/null +++ b/examples/multimodal/generative/dreambooth/dreambooth.py @@ -0,0 +1,119 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pytorch_lightning as pl +import torch + +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector + +from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder + +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def prepare_reg_data(cfg): + reg_dir = cfg.model.data.regularization_dir + num_reg_images = cfg.model.data.num_reg_images + num_images_per_prompt = cfg.model.data.num_images_per_prompt + reg_prompt = cfg.model.data.regularization_prompt + os.makedirs(reg_dir, exist_ok=True) + NUM_REG_IMAGES = len(os.listdir(reg_dir)) + if NUM_REG_IMAGES < num_reg_images: + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.micro_batch_size = cfg.model.micro_batch_size + model_cfg.global_batch_size = cfg.model.global_batch_size + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + rng = torch.Generator() + rng.manual_seed(trainer.global_rank * 100 + cfg.model.seed) + images_to_generate = cfg.model.data.num_reg_images - NUM_REG_IMAGES + images_to_generate = images_to_generate // trainer.world_size + + logging.info( + f"No enough images in regularization folder, generating {images_to_generate} from provided ckpt on each device" + ) + + for i in range(images_to_generate // num_images_per_prompt + 1): + output = pipeline(model, cfg, verbose=False, rng=rng) + for text_prompt, pils in zip(reg_prompt, output): + for idx, image in enumerate(pils): + image.save( + os.path.join( + cfg.infer.out_path, + f'{reg_prompt}_{trainer.global_rank}_{NUM_REG_IMAGES + i * num_images_per_prompt + idx}.png', + ) + ) + del model + del trainer + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +@hydra_runner(config_path='conf', config_name='dreambooth.yaml') +def main(cfg): + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + torch.backends.cuda.matmul.allow_tf32 = True + + if cfg.model.with_prior_preservation: + prepare_reg_data(cfg) + parallel_state.destroy_model_parallel() + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + + exp_manager(trainer, cfg.exp_manager) + + model = MegatronDreamBooth(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/dreambooth/dreambooth_infer.py b/examples/multimodal/generative/dreambooth/dreambooth_infer.py new file mode 100644 index 000000000000..e652fa68ddcd --- /dev/null +++ b/examples/multimodal/generative/dreambooth/dreambooth_infer.py @@ -0,0 +1,44 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='dreambooth_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.target = 'nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion' + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/imagen/README.md b/examples/multimodal/generative/imagen/README.md new file mode 100644 index 000000000000..ba33b649cb35 --- /dev/null +++ b/examples/multimodal/generative/imagen/README.md @@ -0,0 +1,104 @@ +# Imagen +## A. Overview + +Imagen is a multi-stage text-to-image diffusion model with an unprecedented degree of photorealism and a deep level of language understanding. Given a text prompt, Imagen first generates an image at a 64x64 resolution and then upsamples the generated image to 256x256 and 1024x1024 resolutions, all using diffusion models. + +**Table of Contents:** +- [Imagen](#imagen) + - [A. Overview](#a-overview) + - [B. Imagen Pipeline](#b-imagen-pipeline) + - [C. Files in this folder](#c-files-in-this-folder) + - [D. Imagen Training](#d-imagen-training) + - [D.1 Training Dataset](#d1-training-dataset) + - [D.2 Training configs](#d2-training-configs) + - [E. Imagen Inference](#e-imagen-inference) + - [E.1 Inference Settings](#e1-inference-settings) + - [E.2 Running the sample inference code](#e2-running-the-sample-inference-code) + - [E.3 Inference GPU Memory Usage](#e3-inference-gpu-memory-usage) + - [E.3.1 FP16 Inference](#e31-fp16-inference) + - [E.3.2 FP32 Inference](#e32-fp32-inference) + - [E.3.3 AMP Inference (Autocast Enabled)](#e33-amp-inference-autocast-enabled) + - [F. UNet Architecture](#f-unet-architecture) + - [F.1 U-Net (used for base model)](#f1-u-net-used-for-base-model) + - [F.2 Efficient U-Net (used for SR models)](#f2-efficient-u-net-used-for-sr-models) + +## B. Imagen Pipeline + +Imagen comprises a frozen text encoder (e.g. T5-XXL) to map input text into a sequence of embeddings, and a 64x64 image diffusion model, followed by two super-resolution diffusion models for generating 256x256 and 1024x1024 images. All diffusion models are conditioned on the text embedding sequence and use classifier-free guidance. + +## C. Files in this folder + +- [imagen_training.py](imagen_training.py): Script for running inference +- [imagen_generate_images.py](imagen_generate_images.py): Script for generating images for FID-CLIP analysis +- [imagen_infer.py](imagen_infer.py): Script for running inference + +## D. Imagen Training + +All three diffusion models (64x64, 256x256, 1024x1024) can be trained independently. + +### D.1 Training Dataset + +### D.2 Training configs +| configs | Description | +|---|---| +| base64-2b.yaml | 2b-parameter base 64x64 model as described in Imagen paper | +| base64-500m.yaml | 500m-parameter base 64x64 model with decreased number of embedding channels| +|sr256-400m.yaml| 400m-parameter sr 256x256 model as described in Imagen paper | +|sr1024-400m.yaml| 400m-parameter sr 1024x1024 model as described in Imagen paper | + +## E. Imagen Inference + +### E.1 Inference Settings + +[inference_pipeline.yaml](conf/inference_pipeline.yaml) specifies every config for running the sample inference code. Specifically: +- num_images_per_promt: The number of images you want to generate for each text prompt +- model_name: Different pre-defined configs (not used for now) +- run_ema_model: Either run reg/ema model for pretrained models +- customized_model: Instead of loading pre-defined models, load specified checkpoint. .ckpt checkpoint (generated during in-the-middle of training) and .nemo checkpoint (generated once training completed) are both acceptable +- target_resolution: should be one of [64, 256, 1024] +- inference_precision: Running inference in one of [16, 32, AMP] mode +- dynamic_thresholding: Whether to use dynamic thresholding when generating images +- texts: List of text prompts that are used to generate images +- output_path: The path to save generate images +- encoder_path: If not set (null), it will download text encoder first time running the inference code (and will be saved to HF_HOME), you can also load it offline by setting it to the prepared folder +- samplers: List of sampler settings that are used for each model. `step` (the number of iterations to denoise the image, ideally the larger the better, but also consume more time) and `cfg` for classifier free guidance value. You can tweak these values for better visual quality. + +### E.2 Running the sample inference code +``` +(inside NeMo root folder) +python examples/multimodal/generative/imagen/imagen_infer.py +``` + +### E.3 Inference GPU Memory Usage + +#### E.3.1 FP16 Inference +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 11.7G | 11.9G | +| 256x256 | 12.5G | 13.0G | +| 1024x1024 | 14.1G | 21.6G | + +#### E.3.2 FP32 Inference +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 21.7G | 22.6G | +| 256x256 | 23.4G | 24.5G | +| 1024x1024 | 26.6G | 40.6G | + +#### E.3.3 AMP Inference (Autocast Enabled) +| Output\Batch size | 1 | 8 | +|-------------------|-------|-------| +| 64x64 | 22.4G | 23.4G | +| 256x256 | 24.0G | 25.1G | +| 1024x1024 | 26.4G | 33.7G | + +## F. UNet Architecture + +We have prepared two types of UNet for Imagen according to the paper. Base model (64x64) and SR models (256x256, 1024x1024) are using different UNet models. + +### F.1 U-Net (used for base model) + + + +### F.2 Efficient U-Net (used for SR models) + diff --git a/examples/multimodal/generative/imagen/conf/base64-2b.yaml b/examples/multimodal/generative/imagen/conf/base64-2b.yaml new file mode 100644 index 000000000000..4c02c97c9e4e --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-2b.yaml @@ -0,0 +1,142 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf512 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: True + inductor_cudagraphs: False + unet_type: base + channels_last: True + + unet: + embed_dim: 512 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 2048 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml b/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml new file mode 100644 index 000000000000..11224e3b84d2 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m-edm.yaml @@ -0,0 +1,136 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 48 # limited by GPU memory + global_batch_size: 48 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + unet_type: base + + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml new file mode 100644 index 000000000000..4541110caf98 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -0,0 +1,144 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + limit_val_batches: 0 + log_every_n_steps: 5 # Interval of logging. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 128 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + inductor: True + inductor_cudagraphs: False + unet_type: base + channels_last: True + + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: False # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + synthetic_data: False + synthetic_data_length: 800000 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null diff --git a/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml b/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml new file mode 100644 index 000000000000..efbab7bc1ca8 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/base64-500m_online_encoding.yaml @@ -0,0 +1,137 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-base64 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-base64-nf256 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 100 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 48 # limited by GPU memory + global_batch_size: 48 # will use more micro batches to reach global batch size + + unet_type: base + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: True + flash_attention: False + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: DDPM + preconditioning: + loss_type: l2 + pred_objective: noise + noise_schedule: cosine + timesteps: 1000 + + conditioning: + online_encoding: True # defaults to False (use precached encodings) if not specified + # Online encoding increases training time by about 3-4x, and is only for users who want to do a quick dev run of + # Imagen, and/or those who do not have the disk space to store precached embeddings. + # Optionally specify encoder_path if online_encoding; else, specify precached_key and out_key + encoder_path: # folder path to t5xxl-encoder.bin, or leave empty to download (and cache) t5-11b weights + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/fid_inference.yaml b/examples/multimodal/generative/imagen/conf/fid_inference.yaml new file mode 100644 index 000000000000..413da2b8eeac --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/fid_inference.yaml @@ -0,0 +1,26 @@ +num_images_per_promt: 8 # The number of images generated for each promt text +model_name: null # Avaliable model_name defined in pretrained_models.yaml +run_ema_model: True # Whether load the reg/ema model when using pretrained models +customized_model: # Mutually exclusive with model_name + base_ckpt: /aot/exp/nemo-megatron-stacked-ddpm-16n/imagen-nemo/checkpoints/imagen-nemo--reduced_train_loss=0.03-step=100000-consumed_samples=512000000.0.ckpt # Either .ckpt or .nemo is accepatable + base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint + sr256_ckpt: null + sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml + sr1024_ckpt: null + sr1024_cfg: null +target_resolution: 64 # in [64, 256, 1024] +inference_precision: '32' # [16, 32, AMP] +thresholding_method: 'dynamic' +output_path: 'output/imagen-megatron-pipeline-fid' # Save location +record_time: True # Whether to record inference time meta +encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly +samplings: + - + step: 250 + cfg: 7.5 + - + step: 20 + cfg: 7.5 + + + diff --git a/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml b/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml new file mode 100644 index 000000000000..5a5867cfae50 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/imagen_fid_images.yaml @@ -0,0 +1,57 @@ +name: imagen_fid_images + +fid: + classifier_free_guidance: + - 1 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + nnodes_per_cfg: 1 + ntasks_per_node: 8 + local_task_id: null + num_images_to_eval: 30000 + coco_captions_path: /aot/datasets/coco2014/coco2014_val_sampled_30k/captions + coco_images_path: /aot/datasets/coco2014/coco2014_val/images_256 + save_path: output/fid-launcher-test + ncaptions_per_batch: 4 + save_all_res: False + save_text: False + +infer: + num_images_per_promt: 1 # The number of images generated for each promt text + model_name: null # Avaliable model_name defined in pretrained_models.yaml + run_ema_model: True # Whether load the reg/ema model when using pretrained models + customized_model: # Mutually exclusive with model_name + base_ckpt: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo # Either .ckpt or .nemo is accepatable + base_cfg: null # Must provided if loading .ckpt checkpoint + sr256_ckpt: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo + sr256_cfg: null + sr1024_ckpt: null + sr1024_cfg: null + target_resolution: 256 # in [64, 256, 1024] + inference_precision: '32' # [16, 32, AMP] + thresholding_method: 'dynamic' + record_time: True # Whether to record inference time meta + encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly + samplings: + - + step: 30 + - + step: 20 + +models: + - + restore_from_path: /aot/exp/ckpts/imagen-megatron/edm-fused-1150k-ema.nemo + - + restore_from_path: /aot/exp/ckpts/imagen-megatron/sr-noise-aug-280k.nemo + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 32 + logger: False # logger provided by exp_manager diff --git a/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml b/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml new file mode 100644 index 000000000000..1b4bbd9e5a17 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/inference_pipeline.yaml @@ -0,0 +1,42 @@ +num_images_per_promt: 4 # The number of images generated for each promt text +model_name: null # Avaliable model_name defined in pretrained_models.yaml +run_ema_model: True # Whether load the reg/ema model when using pretrained models +customized_model: # Mutually exclusive with model_name + base_ckpt: null # Either .ckpt or .nemo is accepatable + base_cfg: examples/multimodal/generative/imagen/conf/base64-500m.yaml # Must provided if loading .ckpt checkpoint + sr256_ckpt: null + sr256_cfg: examples/multimodal/generative/imagen/conf/sr256-400m.yaml + sr1024_ckpt: null + sr1024_cfg: examples/multimodal/generative/imagen/conf/sr1024-400m.yaml +target_resolution: 64 # in [64, 256, 1024] +inference_precision: 32 # [16, 32, AMP] +thresholding_method: dynamic +texts: + - 'a photograph of an astronaut riding a horse' + - 'a highly detailed digital painting of a portal in a mystic forest with many beautiful trees. A person is standing in front of the portal' + - A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat. + - A cute corgi lives in a house made out of sushi. + - A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him. + - A brain riding a rocketship heading towards the moon. + - One cat and two dogs sitting on the grass. + - A wine glass on top of a dog. + - A blue coloured pizza. + - A transparent sculpture of a duck made out of glass. There is a painting on the wall behind it. + - A raccoon wearing cowboy hat and black leather jacket is behind the backyard window. Rain droplets on the window. + +output_path: 'output/imagen_output' # Save location +record_time: True # Whether to record inference time meta +encoder_path: '/ckpts/encoders' # Set to null if you wish to download encoders on the fly +samplings: + - # Base64 + step: 30 + cfg: 7.5 + - # SR256 + step: 20 + cfg: 8 + - # SR1024 + step: 20 + cfg: 7.5 + + + diff --git a/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml new file mode 100644 index 000000000000..3652267193b1 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr1024-600m.yaml @@ -0,0 +1,145 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-1024 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr1024-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + inductor: True + inductor_cudagraphs: False + unet_type: sr + channels_last: True + + unet: + embed_dim: 128 + image_size: 1024 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: cross + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: True + flash_attention: False + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 1024 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 1024 + estimated_portion: 0.2 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [64, 256] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml new file mode 100644 index 000000000000..22ab0672e577 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-400m-edm.yaml @@ -0,0 +1,222 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-400m.yaml b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml new file mode 100644 index 000000000000..984bddda2c55 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-400m.yaml @@ -0,0 +1,150 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: True + inductor_cudagraphs: False + channels_last: True + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [ 64, 256 ] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml new file mode 100644 index 000000000000..cbee92a40a58 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-450m-edm.yaml @@ -0,0 +1,222 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr-unet + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + num_res_blocks: [2, 2, 3, 4, 3] + channel_mult: [ 1, 2, 4, 6, 6 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: stacked + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [32, 16] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + low_res_cond: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml new file mode 100644 index 000000000000..3e5318186961 --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m-edm-noise.yaml @@ -0,0 +1,142 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: stacked + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml new file mode 100644 index 000000000000..67f05c52ff6e --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m-edm.yaml @@ -0,0 +1,219 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 32 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: stacked + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + # - datasets/improved-aesthetic/wdinfo-selene.pkl + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + corruption_aug: + target_resolution: [ 64, 256 ] + kernel_radius_dict: # used for blurring & resizing, otherwise, not necessary. + 8: 1 + 16: 2 + 32: 3 + 64: 6 + 128: 11 + 256: 22 + 512: 44 + 1024: 88 + 2048: 176 + 4096: 352 + + blur: + add_random_blur: True + blur_prob1: 0.2 + blur_prob2: 0.2 + + blur_sigma_dict: + 8: 0.25 + 16: 0.5 + 32: 0.75 + 64: 1.5 + 128: 3 + 256: 6 + 512: 12 + 1024: 24 + 2048: 48 + 4096: 96 + + resize: + add_random_resize: True + + resize_prob1: + up: 0.2 + down: 0.2 + keep: 0.6 + resize_prob2: + up: 0.2 + down: 0.2 + keep: 0.6 + + resize_range1: + - 0.8 + - 1.2 + resize_range2: + - 0.8 + - 1.2 + + noise: + add_random_noise: True + gaussian_noise_prob1: 1.0 # 0.5 + gaussian_noise_prob2: 1.0 # 0.5 + gray_noise_prob1: 0.0 # 0.4 + gray_noise_prob2: 0.0 # 0.4 + + gaussian_sigma_range1: + - 0 + - 3 + gaussian_sigma_range2: + - 0 + - 2.5 + + poisson_scale_range1: + - 0.005 + - 3 + poisson_scale_range2: + - 0.005 + - 2.5 + + jpeg: + add_random_compression: False + jpeg_range1: + - 75 + - 95 + jpeg_range2: + - 75 + - 95 + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + pbss_checkpoint_saving: + enable: False + pbss_credentials_file: pbss_credentials_joc.secret + save_frequency: 1000 + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/conf/sr256-600m.yaml b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml new file mode 100644 index 000000000000..115e9dd3099c --- /dev/null +++ b/examples/multimodal/generative/imagen/conf/sr256-600m.yaml @@ -0,0 +1,146 @@ +name: imagen-nemo # The name of your model +allow_tf32: True + +trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + + +exp_manager: + exp_dir: /train/imagen-256 # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: imagen-sr256-nf128 + project: imagen + group: nemo-imagen + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 64 # limited by GPU memory + global_batch_size: 64 # will use more micro batches to reach global batch size + inductor: True + inductor_cudagraphs: False + channels_last: True + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: fused + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + skip_connection_scaling: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + noise_cond_aug: True + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + # If want to switch to continuous DDPM training, + # use the following config: + # preconditioning_type: DDPM + # preconditioning: + # loss_type: l2 + # pred_objective: noise + # noise_schedule: cosine + # timesteps: 1000 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + train: + dataset_path: + - datasets/laion_aesthetic/wdinfo-selene.pkl # 48,874,000 + - datasets/coyo-700m/wdinfo-selene.pkl # 627,172,000 + augmentations: + resize_smallest_side: 256 + center_crop_h_w: 256, 256 + horizontal_flip: False + filterings: + resolution: + method: larger + value: 256 + estimated_portion: 0.8 # Estimated % of examples left after filtering. This is use to estimate # epoch + target_resolutions: [64, 256] + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: True + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null \ No newline at end of file diff --git a/examples/multimodal/generative/imagen/generate_fid_images.py b/examples/multimodal/generative/imagen/generate_fid_images.py new file mode 100644 index 000000000000..55ca92ace8bd --- /dev/null +++ b/examples/multimodal/generative/imagen/generate_fid_images.py @@ -0,0 +1,104 @@ +import os +import time + +import torch +from omegaconf.omegaconf import open_dict +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='imagen_fid_images') +def main(cfg): + # Read configuration parameters + nnodes_per_cfg = cfg.fid.nnodes_per_cfg + ntasks_per_node = cfg.fid.ntasks_per_node + local_task_id = cfg.fid.local_task_id + num_images_to_eval = cfg.fid.num_images_to_eval + path = cfg.fid.coco_captions_path + save_text = cfg.fid.save_text + + node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) + node_id_per_cfg = node_id % nnodes_per_cfg + + current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] + save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) + + # Read and store captions + captions = [] + caption_files = sorted(os.listdir(path)) + assert len(caption_files) >= num_images_to_eval + for file in caption_files[:num_images_to_eval]: + with open(os.path.join(path, file), 'r') as f: + captions += f.readlines() + print(f"The total number of captions to generate is: {len(captions)}") + + # Calculate partition sizes and select the partition for the current node + partition_size_per_node = num_images_to_eval // nnodes_per_cfg + start_idx = node_id_per_cfg * partition_size_per_node + end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None + captions = captions[start_idx:end_idx] + print(f"Current node {node_id} will generate images from {start_idx} to {end_idx}") + + local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) + partition_size_per_task = int(len(captions) // ntasks_per_node) + + # Select the partition for the current task + start_idx = local_task_id * partition_size_per_task + end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None + input = captions[start_idx:end_idx] + chunk_size = len(input) + + print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") + os.makedirs(save_path, exist_ok=True) + + trainer = Trainer() + pipeline = ImagenPipeline.from_pretrained(cfg=cfg.infer, trainer=trainer, megatron_loading=True, megatron_cfg=cfg) + + # Generate images using the model and save them + batch_idx = 0 + batch_size = cfg.fid.ncaptions_per_batch + while True: + if batch_idx * batch_size >= len(input): + break + batch_captions = input[batch_idx * batch_size : (batch_idx + 1) * batch_size] + # Different seed for every image + seeds = [local_task_id * chunk_size + batch_idx * batch_size + idx for idx in range(len(batch_captions))] + with torch.no_grad(): + images, all_res_images, *_ = pipeline( + prompts=batch_captions, seed=seeds, single_batch_mode=True, classifier_free_guidance=current_node_cfg, + ) + + if cfg.fid.save_all_res: + all_res = [f'_RES{model.image_size}' for model in pipeline.models] + outpaths = [] + # for the highest resolution we save as its original name so that + # we can automate the CLIP & FID calculation process from Megatron-Launcher + all_res[-1] = '' + for res in all_res: + outpath = f"{save_path}{res}" + os.makedirs(outpath, exist_ok=True) + outpaths.append(outpath) + for outpath, one_res in zip(outpaths, all_res_images): + for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): + image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx + image.save(os.path.join(outpath, f'image{image_idx:06d}.png')) + if save_text: + with open(os.path.join(outpath, f'image{image_idx:06d}.txt'), 'w') as f: + f.writelines(caption) + else: + for idx, (caption, image) in enumerate(zip(batch_captions, images[0])): + image_idx = local_task_id * chunk_size + batch_idx * batch_size + idx + image.save(os.path.join(save_path, f'image{image_idx:06d}.png')) + if save_text: + with open(os.path.join(save_path, f'image{image_idx:06d}.txt'), 'w') as f: + f.writelines(caption) + print( + f'Save {len(images[0])} images to {save_path} with name from image{(local_task_id*chunk_size+batch_idx*batch_size):06d}.png to image{image_idx:06d}.png' + ) + batch_idx += 1 + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/imagen/imagen_generate_images.py b/examples/multimodal/generative/imagen/imagen_generate_images.py new file mode 100644 index 000000000000..b7e4c857decc --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_generate_images.py @@ -0,0 +1,62 @@ +import os +import pickle + +import torch +from omegaconf import OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='fid_inference.yaml') +def main(inference_config): + inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) + captions = pickle.load(open('coco_captions.pkl', 'rb')) + ntasks = 8 + if os.environ.get('CUDA_VISIBLE_DEVICES'): + # Multi-GPU + task_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0)) + else: + # Single GPU + task_id = 0 + chuncksize = int(len(captions) // ntasks) + if task_id != ntasks - 1: + input = captions[task_id * chuncksize : (task_id + 1) * chuncksize] + else: + input = captions[task_id * chuncksize :] + captions = input + + trainer = Trainer() + pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) + batch_size = 16 + batch_idx = 0 + + possible_res = [64, 256] # [64, 256] + outpaths = [] + for res in possible_res: + outpath = f'{inference_config.output_path}_RES{res}' + os.makedirs(outpath, exist_ok=True) + outpaths.append(outpath) + while True: + if batch_idx * batch_size >= len(captions): + break + batch_captions = captions[batch_idx * batch_size : (batch_idx + 1) * batch_size] + + # Different seed for every image + seeds = [task_id * chuncksize + batch_idx * batch_size + idx for idx in range(len(batch_captions))] + seed = batch_idx + chuncksize + + with torch.no_grad(): + images, all_res_images, throughput = pipeline(prompts=batch_captions, seed=seeds, single_batch_mode=True,) + + for outpath, one_res in zip(outpaths, all_res_images): + for idx, (caption, image) in enumerate(zip(batch_captions, one_res[0])): + image.save(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.png')) + with open(os.path.join(outpath, f'image_{task_id*chuncksize+batch_idx*batch_size+idx}.txt'), 'w') as f: + f.writelines(caption) + batch_idx += 1 + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/imagen/imagen_infer.py b/examples/multimodal/generative/imagen/imagen_infer.py new file mode 100644 index 000000000000..97402b3ee500 --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_infer.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from omegaconf import OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.imagen.imagen_pipeline import ImagenPipeline, ImagenPipelineConfig +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='inference_pipeline.yaml') +def main(inference_config): + if inference_config.get('infer'): + # invoking from launcher + trainer = Trainer(**inference_config.trainer) + inference_config = inference_config.infer + else: + trainer = Trainer() + inference_config: ImagenPipelineConfig = OmegaConf.merge(ImagenPipelineConfig(), inference_config) + pipeline = ImagenPipeline.from_pretrained(cfg=inference_config, trainer=trainer) + + # Texts are passed in the config files + images, all_res, throughput = pipeline() + + # Save images + outpath = inference_config.output_path + os.makedirs(outpath, exist_ok=True) + for text, pils in zip(inference_config.texts, images): + for idx, image in enumerate(pils): + image.save(os.path.join(outpath, f'{text}_{idx}.png')) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/imagen/imagen_training.py b/examples/multimodal/generative/imagen/imagen_training.py new file mode 100644 index 000000000000..66a1f0aedefb --- /dev/null +++ b/examples/multimodal/generative/imagen/imagen_training.py @@ -0,0 +1,71 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytorch_lightning as pl +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector +from torch._dynamo import disable +from torch._inductor import config as inductor_config + +from nemo.collections.multimodal.models.imagen.imagen import MegatronImagen +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path='conf', config_name='base64-500m') +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronImagen(cfg.model, trainer) + + if cfg.model.get("inductor", False): + # Temporary hack to get rid of TorchDynamo issue with DDP + # TODO: remove these if https://github.com/pytorch/pytorch/issues/94574 fixed + torch.arange = disable(torch.arange) + torch.ones = disable(torch.ones) + torch.zeros = disable(torch.zeros) + + # TODO: remove this if latest TorchDynamo fixed `t.uniform_(0, 1)` failure + torch.Tensor.uniform_ = disable(torch.Tensor.uniform_) + + # Disable TorchDynamo for unsupported function + pl.core.LightningModule.log = disable(pl.core.LightningModule.log) + + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.model.inductor_cudagraphs + model.model.model.unet = torch.compile(model.model.model.unet) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml new file mode 100644 index 000000000000..75eed9d9b6bf --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_edit.yaml @@ -0,0 +1,23 @@ +edit: + resolution: 256 + steps: 100 + input: path/to/input/picture + outpath: path/to/output/folder + prompt: "" + cfg_text: 7.5 + cfg_image: 1.2 + num_images_per_prompt: 8 + combine_images: [ 2, 4 ] # [row, column] + seed: 1234 + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained instruct pix2pix .nemo file + precision: ${trainer.precision} + diff --git a/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml b/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml new file mode 100644 index 000000000000..34ef1f436cd6 --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/conf/sd_finetune.yaml @@ -0,0 +1,168 @@ +name: instruct-pix2pix-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 10000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 1 + val_check_interval: 100 + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: instruct-pix2pix + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + save_top_k: 4 + mode: min + monitor: val/loss + filename: 'instruct-pix2pix--{val/loss:.4f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + ckpt_path: null # load checkpoint weights from previous stages for fine-tuning + precision: ${trainer.precision} + micro_batch_size: 32 + global_batch_size: 32 # `= micro_batch_size * total_devices` fake global batch size for sampler + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: edited + cond_stage_key: edit # txt for cifar, caption for pbss + image_size: 32 + channels: 4 + cond_stage_trainable: false + conditioning_key: hybrid + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + image_size: 32 # unused + in_channels: 8 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 100 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + data: + # Path to instruct-pix2pix dataset must be specified by the user. + # https://github.com/timothybrooks/instruct-pix2pix#generated-dataset + data_path: ??? + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py new file mode 100644 index 000000000000..83658fd1194a --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/sd_edit_cli.py @@ -0,0 +1,174 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import math +import os +import random +import sys +from argparse import ArgumentParser + +import einops +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange, repeat +from omegaconf import OmegaConf, open_dict +from PIL import Image, ImageOps +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch import autocast + +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.models.stable_diffusion.samplers.k_diffusion import ( + DiscreteEpsDDPMDenoiser, + sample_euler_ancestral, +) +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging + + +class CFGDenoiser(nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + + def forward(self, z, sigma, cond, uncond, text_cfg_scale, image_cfg_scale): + cfg_z = einops.repeat(z, "b ... -> (n b) ...", n=3) + cfg_sigma = einops.repeat(sigma, "b ... -> (n b) ...", n=3) + cfg_cond = { + "c_crossattn": [torch.cat([cond["c_crossattn"][0], uncond["c_crossattn"][0], uncond["c_crossattn"][0]])], + "c_concat": [torch.cat([cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]])], + } + out_cond, out_img_cond, out_uncond = self.inner_model(cfg_z, cfg_sigma, cond=cfg_cond).chunk(3) + out = out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) + return out + + +@hydra_runner(config_path='conf', config_name='sd_edit') +def main(cfg): + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + with open_dict(cfg): + edit_cfg = cfg.pop("edit") + + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusionEdit, cfg=cfg, model_cfg_modifier=model_cfg_modifier, + ) + + # inference use the latent diffusion part of megatron wrapper + model = megatron_diffusion_model.model + model_wrap = DiscreteEpsDDPMDenoiser(model) + model_wrap_cfg = CFGDenoiser(model_wrap) + null_token = model.get_learned_conditioning([""]) + + seed = random.randint(0, 100000) if edit_cfg.seed is None else edit_cfg.seed + input_image = Image.open(edit_cfg.input).convert("RGB") + width, height = input_image.size + factor = edit_cfg.resolution / max(width, height) + factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height) + width = int((width * factor) // 64) * 64 + height = int((height * factor) // 64) * 64 + input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) + + if edit_cfg.prompt == "": + input_image.save(edit_cfg.output) + return + + # get autocast_dtype + if trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + num_images_per_prompt = edit_cfg.num_images_per_prompt + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + cond = {} + cond["c_crossattn"] = [ + repeat(model.get_learned_conditioning([edit_cfg.prompt]), "1 ... -> n ...", n=num_images_per_prompt) + ] + input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1 + input_image = rearrange(input_image, "h w c -> 1 c h w").cuda(non_blocking=True) + cond["c_concat"] = [ + repeat(model.encode_first_stage(input_image).mode(), "1 ... -> n ...", n=num_images_per_prompt) + ] + + uncond = {} + uncond["c_crossattn"] = [repeat(null_token, "1 ... -> n ...", n=num_images_per_prompt)] + uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])] + + sigmas = model_wrap.get_sigmas(edit_cfg.steps) + + extra_args = { + "cond": cond, + "uncond": uncond, + "text_cfg_scale": edit_cfg.cfg_text, + "image_cfg_scale": edit_cfg.cfg_image, + } + torch.manual_seed(seed) + z = torch.randn_like(cond["c_concat"][0]) + z = z * sigmas[0] + z = sample_euler_ancestral(model_wrap_cfg, z, sigmas, extra_args=extra_args) + x = model.decode_first_stage(z) + x = torch.clamp((x + 1.0) / 2.0, min=0.0, max=1.0) + x = 255.0 * rearrange(x, "n c h w -> n h w c") + + os.makedirs(edit_cfg.outpath, exist_ok=True) + if edit_cfg.get("combine_images") is None: + for idx, image in enumerate(x): + edited_image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) + save_path = os.path.join( + edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_{idx}.jpg', + ) + edited_image.save(save_path) + logging.info(f"Edited image saved to: {save_path}") + else: + row, column = edit_cfg.combine_images + width, height = x.size(2), x.size(1) + total_width, total_height = width * column, height * row + edited_image = Image.new('RGB', (total_width, total_height)) + x_offset = 0 + y_offset = 0 + for idx, image in enumerate(x): + image = Image.fromarray(image.type(torch.uint8).cpu().numpy()) + edited_image.paste(image, (x_offset, y_offset)) + x_offset += image.size[0] + if (idx + 1) % column == 0: + x_offset = 0 + y_offset += height + save_path = os.path.join( + edit_cfg.outpath, + f'{edit_cfg.prompt.replace(" ", "_")}_{edit_cfg.cfg_text}_{edit_cfg.cfg_image}_{seed}_combine.jpg', + ) + edited_image.save(save_path) + logging.info(f"Edited image saved to: {save_path}") + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py b/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py new file mode 100644 index 000000000000..430fc5af0ec9 --- /dev/null +++ b/examples/multimodal/generative/instruct_pix2pix/sd_finetune.py @@ -0,0 +1,45 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="sd_finetune") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronLatentDiffusionEdit(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/nerf/benchmark_callback.py b/examples/multimodal/generative/nerf/benchmark_callback.py new file mode 100644 index 000000000000..7d216a6679c4 --- /dev/null +++ b/examples/multimodal/generative/nerf/benchmark_callback.py @@ -0,0 +1,82 @@ +import time +from typing import Optional + +from pytorch_lightning import Callback, LightningModule, Trainer + +from nemo.utils import logging + + +class BenchmarkCallback(Callback): + def __init__( + self, + start_benchmark_at_step: int = 0, + stop_benchmark_at_step: Optional[int] = None, + log_every_n_steps: int = 10, + ): + super().__init__() + self.start_benchmark_at_step = start_benchmark_at_step + self.stop_benchmark_at_step = stop_benchmark_at_step + self.log_every_n_steps = log_every_n_steps + self.train_times = [] + self.val_times = [] + self.train_steps_times = [] + self.val_steps_times = [] + + def should_benchmark(self, trainer: Trainer): + if self.stop_benchmark_at_step is None: + return trainer.global_step >= self.start_benchmark_at_step + return self.start_benchmark_at_step <= trainer.global_step <= self.stop_benchmark_at_step + + def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule): + self.epoch_start_time = time.time() + + def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + epoch_time = time.time() - self.epoch_start_time + self.train_times.append(epoch_time) + logging.info(f'Training-Epoch-{trainer.current_epoch}-Time: {epoch_time} [sec]') + + def on_train_batch_start(self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int): + self.step_start_time = time.time() + + def on_train_batch_end(self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int): + if self.should_benchmark(trainer): + step_time = time.time() - self.step_start_time + self.train_steps_times.append(step_time) + if trainer.global_step % self.log_every_n_steps == 0: + logging.info(f'Training-Step-{trainer.global_step}-Time: {step_time} [sec]') + + def on_validation_epoch_start(self, trainer: Trainer, pl_module: LightningModule): + self.val_start_time = time.time() + + def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + val_time = time.time() - self.val_start_time + self.val_times.append(val_time) + logging.info(f'Validation-Epoch-{trainer.current_epoch}-Time: {val_time} [sec]') + + def on_validation_batch_start( + self, trainer: Trainer, pl_module: LightningModule, batch, batch_idx: int, dataloader_idx: int + ): + self.val_step_start_time = time.time() + + def on_validation_batch_end( + self, trainer: Trainer, pl_module: LightningModule, outputs, batch, batch_idx: int, dataloader_idx: int + ): + if self.should_benchmark(trainer): + val_step_time = time.time() - self.val_step_start_time + self.val_steps_times.append(val_step_time) + if trainer.global_step % self.log_every_n_steps == 0: + logging.info(f'Validation-Step-{trainer.global_step}-Time: {val_step_time} [sec]') + + def on_fit_end(self, trainer: Trainer, pl_module: LightningModule): + if self.should_benchmark(trainer): + avg_train_time = sum(self.train_times) / len(self.train_times) + avg_val_time = sum(self.val_times) / len(self.val_times) + avg_train_step_time = sum(self.train_steps_times) / len(self.train_steps_times) + avg_val_step_time = sum(self.val_steps_times) / len(self.val_steps_times) + + logging.info(f'Average-Training-Epoch-Time: {avg_train_time} [sec]') + logging.info(f'Average-Validation-Epoch-Time: {avg_val_time} [sec]') + logging.info(f'Average-Training-Step-Time: {avg_train_step_time} [sec]') + logging.info(f'Average-Validation-Step-Time: {avg_val_step_time} [sec]') diff --git a/examples/multimodal/generative/nerf/config/config.yaml b/examples/multimodal/generative/nerf/config/config.yaml new file mode 100644 index 000000000000..1adcbae72c26 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/config.yaml @@ -0,0 +1,52 @@ +defaults: + - model: dreamfusion + - _self_ + +name: DreamFusion +seed: 2023 +mode: fit # fit, validate, test, export-mesh + +# export-mesh options +mesh_fname: /results/mesh.obj # mesh file name when mode=export-mesh +mesh_resolution: 128 # Mesh resolution when mode=export-mesh + +# benchmark options +enable_benchmark: False +benchmark_callback: + _target_: benchmark_callback.BenchmarkCallback + log_every_n_steps: 1 + +trainer: + devices: 1 + num_nodes: 1 + precision: 16 + max_steps: 10000 # example configs: dreamfuions=10000, dmtet=5000 + accelerator: gpu + enable_checkpointing: False + logger: False + log_every_n_steps: 1 + val_check_interval: 100 + accumulate_grad_batches: 1 + benchmark: False + enable_model_summary: True + +exp_manager: + name: ${name} + exp_dir: /results + create_tensorboard_logger: False + create_wandb_logger: False + wandb_logger_kwargs: + project: dreamfusion + group: nemo-df + name: ${name} + resume: True + create_checkpoint_callback: True + checkpoint_callback_params: + every_n_epochs: 0 + every_n_train_steps: 1000 # TODO(ahmadki): being ignored ? + monitor: loss + filename: '${name}-{step}' + save_top_k: -1 + always_save_nemo: False + resume_if_exists: True + resume_ignore_no_checkpoint: True diff --git a/examples/multimodal/generative/nerf/config/model/background/random.yaml b/examples/multimodal/generative/nerf/config/model/background/random.yaml new file mode 100644 index 000000000000..9cfb09fc6eca --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/random.yaml @@ -0,0 +1,3 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.random_background.RandomBackground +base_background: [1, 1, 1] +random_ratio: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/background/static.yaml b/examples/multimodal/generative/nerf/config/model/background/static.yaml new file mode 100644 index 000000000000..eb82f9944991 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/static.yaml @@ -0,0 +1,2 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.static_background.StaticBackground +background: [0, 0, 1] # rgb diff --git a/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml new file mode 100644 index 000000000000..8daf7bcd8349 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/tcnn.yaml @@ -0,0 +1,19 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.tcnn_background.TCNNBackground +bound: 1 +encoder_num_input_dims: 3 # 3 directions +encoder_cfg: + otype: "HashGrid" + n_levels: 16 + n_features_per_level: 2 + log2_hashmap_size: 19 + base_resolution: 16 + interpolation: "Smoothstep" + per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) + +background_net_num_output_dims: 3 # rgb +background_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 32 + n_hidden_layers: 2 diff --git a/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml new file mode 100644 index 000000000000..b77778099e79 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/background/torchngp.yaml @@ -0,0 +1,11 @@ +_target_: nemo.collections.multimodal.modules.nerf.background.torchngp_background.TorchNGPBackground + +encoder_type: "frequency" +encoder_input_dims: 3 +encoder_multi_res: 6 + +num_output_dims: 3 +net_cfg: + num_hidden_dims: 32 + num_layers: 2 + bias: True diff --git a/examples/multimodal/generative/nerf/config/model/data/data.yaml b/examples/multimodal/generative/nerf/config/model/data/data.yaml new file mode 100644 index 000000000000..0b5f88b9f1fb --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/data/data.yaml @@ -0,0 +1,41 @@ +_target_: data.AggregatorDataModule + +train_batch_size: 1 +train_shuffle: false +train_dataset: + _target_: nemo.collections.multimodal.data.nerf.random_poses.RandomPosesDataset + internal_batch_size: 100 + width: 64 + height: 64 + radius_range: [3.0, 3.5] + theta_range: [45, 105] + phi_range: [-180, 180] + fovx_range: [10, 30] + fovy_range: [10, 30] + jitter: False + jitter_center: 0.2 + jitter_target: 0.2 + jitter_up: 0.02 + uniform_sphere_rate: 0 + angle_overhead: 30 + angle_front: 60 + +val_batch_size: 1 +val_shuffle: false +val_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 5 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 + +test_batch_size: 1 +test_shuffle: false +test_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 100 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml new file mode 100644 index 000000000000..bfadd4f426b3 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/dreamfusion-dmtet.yaml @@ -0,0 +1,40 @@ +_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion # TODO(ahmadki): dreamfusion-dmetet should have it's own class +defaults: + - nerf: torchngp + - background: torchngp + - material: basic_shading + - renderer: nvdiffrast + - guidance: sd_huggingface + - optim: adan + - loss: dmtet + - data: data + - _self_ + +### model options +resume_from_checkpoint: +prompt: 'a hamburger' +negative_prompt: '' +front_prompt: ', front view' +side_prompt: ', side view' +back_prompt: ', back view' +update_extra_interval: 16 +guidance_scale: 100 +export_video: False + +iters: ${trainer.max_steps} +# TODO(ahmadki): move to database +latent_iter_ratio: 0.0 +albedo_iter_ratio: 0 +min_ambient_ratio: 0.1 +textureless_ratio: 0.2 + +data: + train_dataset: + width: 512 + height: 512 + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml new file mode 100644 index 000000000000..a67393341b53 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/dreamfusion.yaml @@ -0,0 +1,40 @@ +_target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion +defaults: + - nerf: torchngp + - background: static + - material: basic_shading + - renderer: torchngp_raymarching + - guidance: sd_huggingface + - optim: adan + - loss: dreamfusion + - data: data + - _self_ + +### model options +resume_from_checkpoint: +prompt: 'a hamburger' +negative_prompt: '' +front_prompt: ', front view' +side_prompt: ', side view' +back_prompt: ', back view' +update_extra_interval: 16 +guidance_scale: 100 +export_video: False + +iters: ${trainer.max_steps} +# TODO(ahmadki): move to database +latent_iter_ratio: 0.2 +albedo_iter_ratio: 0.0 +min_ambient_ratio: 0.1 +textureless_ratio: 0.2 + +data: + train_dataset: + width: 64 + height: 64 + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml new file mode 100644 index 000000000000..a8b7adca3c55 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_huggingface.yaml @@ -0,0 +1,4 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_huggingface_pipeline.StableDiffusion +precision: ${trainer.precision} +model_key: stabilityai/stable-diffusion-2-1-base +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml new file mode 100644 index 000000000000..fd4517ec1f7c --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_nemo.yaml @@ -0,0 +1,4 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_nemo_pipeline.StableDiffusion +checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo +sampler_type: 'DDIM' +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml b/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml new file mode 100644 index 000000000000..45c1e2ac8fb5 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/guidance/sd_trt.yaml @@ -0,0 +1,5 @@ +_target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_trt_pipeline.StableDiffusion +checkpoint: /sd_checkpoints/nemo-1.5/sd-1.5.nemo +plan_dir: /sd_checkpoints/nemo-1.5/plan +sampler_type=: DDIM" +t_range: [0.02, 0.98] diff --git a/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml b/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml new file mode 100644 index 000000000000..188c1034fc27 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/loss/dmtet.yaml @@ -0,0 +1,8 @@ +lambda_sds: 1.0 +lambda_opacity: 0.0 +lambda_entropy: 0.0 +lambda_orientation: 0.0 +lambda_2d_normal_smooth: 0.0 +lambda_3d_normal_smooth: 0.0 +lambda_mesh_normal: 0.5 +lambda_mesh_laplacian: 0.5 diff --git a/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml b/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml new file mode 100644 index 000000000000..8cfd4b47eb51 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/loss/dreamfusion.yaml @@ -0,0 +1,8 @@ +lambda_sds: 1.0 +lambda_opacity: 0.0 +lambda_entropy: 1e-3 +lambda_orientation: 1e-2 +lambda_2d_normal_smooth: 0.0 +lambda_3d_normal_smooth: 0.0 +lambda_mesh_normal: 0.0 +lambda_mesh_laplacian: 0.0 diff --git a/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml b/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml new file mode 100644 index 000000000000..802defad1637 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/material/basic_shading.yaml @@ -0,0 +1 @@ +_target_: nemo.collections.multimodal.modules.nerf.materials.basic_shading.BasicShading diff --git a/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml b/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml new file mode 100644 index 000000000000..0bf5ed6c5e2f --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/nerf/tcnn.yaml @@ -0,0 +1,32 @@ +_target_: nemo.collections.multimodal.modules.nerf.geometry.tcnn_nerf.TCNNNerf +num_input_dims: 3 # 3D space +bound: 1 +density_activation: softplus # softplus, exp +blob_radius: 0.5 +blob_density: 10 +normal_type: central_finite_difference + +encoder_cfg: + otype: "HashGrid" + n_levels: 16 + n_features_per_level: 2 + log2_hashmap_size: 19 + base_resolution: 16 + interpolation: "Smoothstep" + per_level_scale: # default is np.exp2(np.log2(2048 * bound / 16) / (16 - 1)) + +sigma_net_num_output_dims: 1 # density +sigma_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 64 + n_hidden_layers: 3 + +features_net_num_output_dims: 3 # rgb +features_net_cfg: + otype: "FullyFusedMLP" + activation: "ReLU" + output_activation: "None" + n_neurons: 64 + n_hidden_layers: 3 diff --git a/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml b/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml new file mode 100644 index 000000000000..48877dcfa871 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/nerf/torchngp.yaml @@ -0,0 +1,26 @@ +_target_: nemo.collections.multimodal.modules.nerf.geometry.torchngp_nerf.TorchNGPNerf +num_input_dims: 3 # 3D space +bound: 1 +density_activation: exp # softplus, exp +blob_radius: 0.2 +blob_density: 5 +normal_type: central_finite_difference + +encoder_cfg: + encoder_type: 'hashgrid' + encoder_max_level: + log2_hashmap_size: 19 + desired_resolution: 2048 + interpolation: smoothstep + +sigma_net_num_output_dims: 1 # density +sigma_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + +features_net_num_output_dims: 3 # rgb +features_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True diff --git a/examples/multimodal/generative/nerf/config/model/optim/adan.yaml b/examples/multimodal/generative/nerf/config/model/optim/adan.yaml new file mode 100644 index 000000000000..885c13fcca8a --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/optim/adan.yaml @@ -0,0 +1,6 @@ +name: adan +lr: 5e-3 +eps: 1e-8 +weight_decay: 2e-5 +max_grad_norm: 5.0 +foreach: False diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml new file mode 100644 index 000000000000..73f48a7a0ca9 --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/nerfacc.yaml @@ -0,0 +1,8 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.nerfacc_volume_renderer.NerfaccVolumeBaseRenderer +grid_resolution: 128 +grid_levels: 3 +bound: ${model.nerf.bound} +render_step_size: 1.e-3 +near_plane: 0.2 +cone_angle: 0.004 +alpha_thre: 1.e-2 diff --git a/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml b/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml new file mode 100644 index 000000000000..fefc217f4aec --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/nvdiffrast.yaml @@ -0,0 +1,6 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.nvdiffrast_renderer.NVDiffRastRenderer +bound: ${model.nerf.bound} +grid_resolution: 128 +density_thresh: 10.0 +update_interval: 16 +quartet_file: "/results/tets/128_tets.npz" diff --git a/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml b/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml new file mode 100644 index 000000000000..5075a5fbc85c --- /dev/null +++ b/examples/multimodal/generative/nerf/config/model/renderer/torchngp_raymarching.yaml @@ -0,0 +1,7 @@ +_target_: nemo.collections.multimodal.modules.nerf.renderers.torchngp_volume_renderer.TorchNGPVolumeRenderer +bound: ${model.nerf.bound} +update_interval: 16 +grid_resolution: 128 +density_thresh: 10 +max_steps: 1024 +dt_gamma: 0 diff --git a/examples/multimodal/generative/nerf/data.py b/examples/multimodal/generative/nerf/data.py new file mode 100644 index 000000000000..1126a816a97e --- /dev/null +++ b/examples/multimodal/generative/nerf/data.py @@ -0,0 +1,73 @@ +import pytorch_lightning as pl +from hydra.utils import instantiate +from omegaconf.omegaconf import DictConfig +from torch.utils.data import DataLoader + + +# TODO(ahmadki): multi-GPU needs more work, we currently don't shard data +# across GPUs, which is OK for trainnig, but needs fixing for validation and testing. +class AggregatorDataModule(pl.LightningDataModule): + def __init__( + self, + train_dataset: DictConfig = None, + train_batch_size: int = 1, + train_shuffle: bool = False, + val_dataset: DictConfig = None, + val_batch_size: int = 1, + val_shuffle: bool = False, + test_dataset: DictConfig = None, + test_batch_size: int = 1, + test_shuffle: bool = False, + ): + super().__init__() + + self.train_dataset = train_dataset + self.train_batch_size = train_batch_size + self.train_shuffle = train_shuffle + self.val_dataset = val_dataset + self.val_batch_size = val_batch_size + self.val_shuffle = val_shuffle + self.test_dataset = test_dataset + self.test_batch_size = test_batch_size + self.test_shuffle = test_shuffle + + # TODO(ahmadki): lazy init + # def setup(self, stage=None) -> None: + # if stage in [None, "fit"]: + # self.train_dataset = instantiate(self.train_dataset) + # if stage in [None, "fit", "validate"]: + # self.val_dataset = instantiate(self.val_dataset) + # if stage in [None, "test", "predict"]: + # self.test_dataset = instantiate(self.test_dataset) + + def train_dataloader(self) -> DataLoader: + loader = DataLoader( + self.train_dataset, + batch_size=self.train_batch_size, + collate_fn=self.train_dataset.collate_fn, + pin_memory=True, + num_workers=4, + ) + return loader + + def val_dataloader(self) -> DataLoader: + loader = DataLoader( + self.val_dataset, + batch_size=self.val_batch_size, + collate_fn=self.val_dataset.collate_fn, + shuffle=self.val_shuffle, + pin_memory=True, + num_workers=0, + ) + return loader + + def test_dataloader(self) -> DataLoader: + loader = DataLoader( + self.test_dataset, + batch_size=self.test_batch_size, + collate_fn=self.test_dataset.collate_fn, + shuffle=self.test_shuffle, + pin_memory=True, + num_workers=0, + ) + return loader diff --git a/examples/multimodal/generative/nerf/main.py b/examples/multimodal/generative/nerf/main.py new file mode 100644 index 000000000000..35b80052ef7f --- /dev/null +++ b/examples/multimodal/generative/nerf/main.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from hydra.utils import get_class, instantiate +from omegaconf.omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer, seed_everything + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path='config', config_name='config') +def main(cfg: DictConfig) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + seed_everything(cfg.seed, workers=True) + + mode = cfg.mode + logging.info(f"{mode=}") + + model = None + model_cls = get_class(cfg.model._target_) + if cfg.model.resume_from_checkpoint is None: + model = model_cls(cfg=cfg.model) + else: + logging.info(f"Loading model from checkpoint: {cfg.model.resume_from_checkpoint}") + model = model_cls.load_from_checkpoint(cfg.model.resume_from_checkpoint, strict=False, cfg=cfg.model) + + if mode == "export-mesh": + mesh = model.mesh(resolution=cfg.mesh_resolution) + mesh.export(cfg.mesh_fname) + return + + # Prepare callbacks + callbacks = [] + if cfg.enable_benchmark: + callbacks.append(instantiate(cfg.benchmark_callback)) + + # Setup trainer + trainer = Trainer(callbacks=callbacks, **cfg.trainer) + exp_manager(trainer, cfg.exp_manager) + + # Setup datamodule + dm = instantiate(cfg.model.data) + + if mode == "fit": + trainer.fit(model, datamodule=dm) + elif mode == "validate": + trainer.validate(model, datamodule=dm) + elif mode == "test": + trainer.test(model, datamodule=dm) + else: + raise ValueError(f"Invalid mode: {mode}") + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml new file mode 100644 index 000000000000..3cfc822f8462 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd2_train.yaml @@ -0,0 +1,192 @@ +name: stable-diffusion2-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 140000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + +exp_manager: + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: nemo-sd + name: ${name} + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 16 # will use more micro batches to reach global batch size + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: True + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + from_NeMo: #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_head_channels: 64 + use_spatial_transformer: true + use_linear_in_transformer: true + transformer_depth: 1 + context_dim: 1024 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /path/to/clip.nemo + device: cuda + freeze: True + layer: "penultimate" + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + num_workers: 16 + synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 + train: + dataset_path: + - /datasets/coyo/test.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml new file mode 100644 index 000000000000..e526bc52d673 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_fid_images.yaml @@ -0,0 +1,45 @@ +name: stable-diffusion-train + +fid: + classifier_free_guidance: + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + nnodes_per_cfg: 1 + ntasks_per_node: 8 + local_task_id: null + num_images_to_eval: 30000 + coco_captions_path: /coco2014/coco2014_val_sampled_30k/captions + coco_images_path: /coco2014/coco2014_val/images_256 + save_path: output + +infer: + unconditional_guidance_scale: null + num_images_per_prompt: 1 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 50 + sampler_type: 'PLMS' + eta: 0 + output_type: 'pil' + save_to_file: False # We need to rename and maintain the order of images for clip score calculation, so we will save it outside the inference pipeline + out_path: ${fid.save_path} + seed: 123 + prompts: + +trainer: + devices: ${fid.ntasks_per_node} + num_nodes: 1 + accelerator: gpu + precision: 32 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml new file mode 100644 index 000000000000..dbe384dd2566 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_infer.yaml @@ -0,0 +1,31 @@ +name: stable-diffusion-train + +infer: + unconditional_guidance_scale: 7.5 + num_images_per_prompt: 4 + height: 512 + width: 512 + down_factor: 8 + inference_steps: 25 + sampler_type: 'DPM' + eta: 0 + output_type: 'pil' + save_to_file: True + out_path: 'stable-diffusion' + seed: 123 + prompts: + - 'A photo of a Shiba Inu dog with a backpack riding a bike. It is wearing sunglasses and a beach hat.' + - 'A cute corgi lives in a house made out of sushi.' + - 'A high contrast portrait of a very happy fuzzy panda dressed as a chef in a high end kitchen making dough. There is a painting of flowers on the wall behind him.' + - 'A brain riding a rocketship heading towards the moon.' + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + +model: + restore_from_path: null + precision: ${trainer.precision} \ No newline at end of file diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml new file mode 100644 index 000000000000..78a8dedccbc2 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -0,0 +1,207 @@ +name: stable-diffusion-train + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: 2 # PTL default. In practice, max_steps will be reached first. + max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + + +exp_manager: + exp_dir: null + name: ${name} + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: nemo-sd + name: ${name} + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 1 # will use more micro batches to reach global batch size + native_amp_init_scale: 65536.0 # Init scale for grad scaler used at fp16 + + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: #/ckpts/nemo-v1-2.ckpt + from_NeMo: True #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: True + enable_amp_o2_fp16: True + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + capture_cudagraph_iters: ${model.capture_cudagraph_iters} + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + restore_from_path: /ckpts/openai.nemo + device: cuda + freeze: True + layer: "last" + # For compatibility of history version that uses HF clip model + # _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + # version: openai/clip-vit-large-patch14 + # device: cuda + # max_length: 77 + # capture_cudagraph_iters: {$model.capture_cudagraph_iters} + + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch DDP overlap. + + optim: + name: megatron_fused_adam + lr: null + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + capturable: True + master_weights: True + max_norm: ${trainer.gradient_clip_val} + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + num_workers: 16 + synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 + train: + dataset_path: + - /datasets/coyo/wdinfo.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo diff --git a/examples/multimodal/generative/stable_diffusion/generate_fid_images.py b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py new file mode 100644 index 000000000000..c1a37cd953a6 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/generate_fid_images.py @@ -0,0 +1,83 @@ +import os +import time +import torch +from omegaconf.omegaconf import open_dict + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='sd_fid_images') +def main(cfg): + # Read configuration parameters + nnodes_per_cfg = cfg.fid.nnodes_per_cfg + ntasks_per_node = cfg.fid.ntasks_per_node + local_task_id = cfg.fid.local_task_id + num_images_to_eval = cfg.fid.num_images_to_eval + path = cfg.fid.coco_captions_path + + node_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) + node_id_per_cfg = node_id % nnodes_per_cfg + + current_node_cfg = cfg.fid.classifier_free_guidance[node_id // nnodes_per_cfg] + with open_dict(cfg): + cfg.infer.unconditional_guidance_scale = current_node_cfg + save_path = os.path.join(cfg.fid.save_path, str(current_node_cfg)) + + # Read and store captions + captions = [] + caption_files = sorted(os.listdir(path)) + assert len(caption_files) >= num_images_to_eval + for file in caption_files[:num_images_to_eval]: + with open(os.path.join(path, file), 'r') as f: + captions += f.readlines() + + # Calculate partition sizes and select the partition for the current node + partition_size_per_node = num_images_to_eval // nnodes_per_cfg + start_idx = node_id_per_cfg * partition_size_per_node + end_idx = (node_id_per_cfg + 1) * partition_size_per_node if node_id_per_cfg != nnodes_per_cfg - 1 else None + captions = captions[start_idx:end_idx] + + local_task_id = int(local_task_id) if local_task_id is not None else int(os.environ.get("SLURM_LOCALID", 0)) + partition_size_per_task = int(len(captions) // ntasks_per_node) + + # Select the partition for the current task + start_idx = local_task_id * partition_size_per_task + end_idx = (local_task_id + 1) * partition_size_per_task if local_task_id != ntasks_per_node - 1 else None + input = captions[start_idx:end_idx] + + print(f"Current worker {node_id}:{local_task_id} will generate {len(input)} images") + + os.makedirs(save_path, exist_ok=True) + + # Modify the model configuration + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + model_cfg.global_batch_size = model_cfg.micro_batch_size * ntasks_per_node + + # Set up the trainer and model for inference + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + # Generate images using the model and save them + for i, prompt in enumerate(input): + cfg.infer.prompts = [prompt] + rng = torch.Generator().manual_seed(cfg.infer.seed + local_task_id * 10 + node_id_per_cfg * 100 + i * 1000) + output = pipeline(model, cfg, rng=rng) + for image in output[0]: + image_num = i + partition_size_per_node * node_id_per_cfg + partition_size_per_task * local_task_id + image.save(os.path.join(save_path, f'image{image_num:06d}.png')) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_infer.py b/examples/multimodal/generative/stable_diffusion/sd_infer.py new file mode 100644 index 000000000000..0fe9a0064e47 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/sd_infer.py @@ -0,0 +1,44 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.parts.stable_diffusion.pipeline import pipeline +from nemo.collections.multimodal.parts.utils import setup_trainer_and_model_for_inference +from nemo.core.config import hydra_runner + + +@hydra_runner(config_path='conf', config_name='sd_infer') +def main(cfg): + def model_cfg_modifier(model_cfg): + model_cfg.precision = cfg.trainer.precision + model_cfg.ckpt_path = None + model_cfg.inductor = False + model_cfg.unet_config.use_flash_attention = False + model_cfg.unet_config.from_pretrained = None + model_cfg.first_stage_config.from_pretrained = None + + torch.backends.cuda.matmul.allow_tf32 = True + trainer, megatron_diffusion_model = setup_trainer_and_model_for_inference( + model_provider=MegatronLatentDiffusion, cfg=cfg, model_cfg_modifier=model_cfg_modifier + ) + model = megatron_diffusion_model.model + model.cuda().eval() + + rng = torch.Generator().manual_seed(cfg.infer.seed) + pipeline(model, cfg, rng=rng) + + +if __name__ == "__main__": + main() diff --git a/examples/multimodal/generative/stable_diffusion/sd_train.py b/examples/multimodal/generative/stable_diffusion/sd_train.py new file mode 100644 index 000000000000..b741af3d76e6 --- /dev/null +++ b/examples/multimodal/generative/stable_diffusion/sd_train.py @@ -0,0 +1,87 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from datetime import timedelta + +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +class MegatronStableDiffusionTrainerBuilder(MegatronTrainerBuilder): + """Builder for SD model Trainer with overrides.""" + + def _training_strategy(self) -> NLPDDPStrategy: + """ + Returns a ddp strategy passed to Trainer.strategy. + """ + ddp_overlap = self.cfg.model.get('ddp_overlap', True) + if ddp_overlap: + return NLPDDPStrategy( + no_ddp_communication_hook=False, + gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, + find_unused_parameters=True, + bucket_cap_mb=256, + ) + else: + return NLPDDPStrategy( + no_ddp_communication_hook=True, + gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view, + find_unused_parameters=False, + ) + + +@hydra_runner(config_path='conf', config_name='sd_train') +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + torch.backends.cuda.matmul.allow_tf32 = True + + if cfg.model.capture_cudagraph_iters >= 0: + # Required by CUDA graph with DDP + os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0" + + # Hack to avoid CUDA graph issue with AMP, PyTorch Lightning doesn't support + # changing autocast arguments for now. + # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/cuda/graphs.py#L234 + def amp_autocast_init(self, *args, **kwargs): + if "cache_enabled" not in kwargs: + kwargs["cache_enabled"] = False + return self.__orig_init__(*args, **kwargs) + + torch.cuda.amp.autocast.__orig_init__ = torch.cuda.amp.autocast.__init__ + torch.cuda.amp.autocast.__init__ = amp_autocast_init + torch.autocast.__orig_init__ = torch.autocast.__init__ + torch.autocast.__init__ = amp_autocast_init + + trainer = MegatronStableDiffusionTrainerBuilder(cfg).create_trainer() + + exp_manager(trainer, cfg.exp_manager) + + model = MegatronLatentDiffusion(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml new file mode 100644 index 000000000000..fbe1883276fe --- /dev/null +++ b/examples/multimodal/mllm/kosmos/conf/kosmos_config.yaml @@ -0,0 +1,317 @@ +name: nemo_kosmos +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_kosmos + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 256 # will use more micro batches to reach global batch size + + media_start_token: "" + media_end_token: "" + + enabled_data_types: ["text", "image_caption"] + per_type_micro_batch_size: + text: 1 + image_caption: 32 + image_interleaved: 1 + per_type_sequence_length: + text: 2048 # placeholder + image_caption: 128 + image_interleaved: 2048 + per_type_loss_weights: + text: 1 + image_caption: 1 + image_interleaved: 1 + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + # multimodal configs + num_media_latents: 64 # each media is encoded and sampled into `num_media_latents` LM embeddings + + llm: + precision: ${trainer.precision} + + # model architecture + encoder_seq_length: 2048 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_absolute + num_layers: 12 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: True # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + vision: + from_pretrained: /path/to/clip_model.nemo + precision: ${trainer.precision} + # vision configs + patch_dim: 14 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + class_token_length: 1 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: learned_parameters + num_layers: 24 + hidden_size: 1024 + ffn_hidden_size: 4096 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add final layer norm + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + # TEXT DATA + # ================================================================================== + # Path to data must be specified by the user. + # Supports List, String and Dictionary + # List : can override from the CLI: "model.data.data_prefix=[.5,/raid/data/pile/my-gpt3_00_text_document,.5,/raid/data/pile/my-gpt3_01_text_document]", + # Or see example below: + # data_prefix: + # - .5 + # - /raid/data/pile/my-gpt3_00_text_document + # - .5 + # - /raid/data/pile/my-gpt3_01_text_document + # Dictionary: can override from CLI "model.data.data_prefix"={"train":[1.0, /path/to/data], "validation":/path/to/data, "test":/path/to/test} + # Or see example below: + # "model.data.data_prefix: {train:[1.0,/path/to/data], validation:[/path/to/data], test:[/path/to/test]}" + num_workers: 2 + data_prefix: ??? + index_mapping_dir: null # path to save index mapping .npy files, by default will save in the same location as data_prefix + data_impl: mmap + splits_string: 950,50,0 + seq_length: ${model.llm.encoder_seq_length} + skip_warmup: True + dataloader_type: single # cyclic + reset_position_ids: False # Reset position ids after end-of-document token + reset_attention_mask: False # Reset attention mask after end-of-document token + eod_mask_loss: False # Mask loss for the end of document tokens + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + no_seqlen_plus_one_input_tokens: False # Set to True to disable fetching (sequence length + 1) input tokens, instead get (sequence length) input tokens and mask the last token + pad_samples_to_global_batch_size: False # Set to True if you want to pad the last partial batch with -1's to equal global batch size + shuffle_documents: True # Set to False to disable documents shuffling. Sample index will still be shuffled + + # ================================================================================= + # MEDIA DATA + image_caption: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + + image_interleaved: + num_workers: 8 + train: + dataset_path: # List of paths to pkl files or tar files + - /datasets/coyo/test.pkl + validation: # List of paths to pkl files or tar files + dataset_path: + - /datasets/coyo/test.pkl + webdataset: + chunk_size: 190 + infinite_sampler: False + local_root_path: /datasets/coyo +# boto3: +# credentials_file: /lustre/fsw/joc/yuya/kosmos/s3_cred +# bucket: webdataset + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/kosmos/kosmos_pretrain.py b/examples/multimodal/mllm/kosmos/kosmos_pretrain.py new file mode 100644 index 000000000000..8ac7679ade74 --- /dev/null +++ b/examples/multimodal/mllm/kosmos/kosmos_pretrain.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.connectors.checkpoint_connector import _CheckpointConnector + +from nemo.collections.multimodal.models.kosmos.megatron_kosmos_model import MegatronKosmosModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="kosmos_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronKosmosModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/conf/llava_config.yaml b/examples/multimodal/mllm/neva/conf/llava_config.yaml new file mode 100644 index 000000000000..0b2cf826c606 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/llava_config.yaml @@ -0,0 +1,213 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: False + model_type: llama_2 # Only support nvgpt or llama_2 + vision_encoder: + from_pretrained: "openai/clip-vit-large-patch14" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: False + + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: False + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.0 # Dropout probability for hidden state transformer. + attention_dropout: 0.0 # Dropout probability for attention + ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: 'rmsnorm' # Normalization layer to use. Options are 'layernorm', 'rmsnorm' + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + override_vocab_size: 32000 + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'sentencepiece' + type: null + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + additional_special_tokens: null # ["", "", "", "", "", ""] + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: llama_2 # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-3 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 140 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml new file mode 100644 index 000000000000..c2f44de2c1b9 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -0,0 +1,212 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4650 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 16 # limited by GPU memory + global_batch_size: 128 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: True + model_type: llama_2 # `nvgpt` or `llama_2` supported + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: False + + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: True + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 40 + hidden_size: 5120 + ffn_hidden_size: 13824 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 40 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: rmsnorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + use_flash_attention: True + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + async_grad_allreduce: False + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'sentencepiece' + type: null + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + additional_special_tokens: null # ["", "", "", "", "", ""] + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-3 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 140 + constant_steps: 0 + min_lr: 2e-5 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml new file mode 100644 index 000000000000..bd902b9f5d15 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml @@ -0,0 +1,209 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: False + model_type: nvgpt # Only support nvgpt or llama_2 + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: True # only support True now + + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: False + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml new file mode 100644 index 000000000000..35ca1e179f98 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_inference.yaml @@ -0,0 +1,52 @@ +inference: + greedy: False # Whether or not to use sampling ; use greedy decoding otherwise + top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: 0.2 # sampling temperature + add_BOS: False # add the bos token at the begining of the prompt + tokens_to_generate: 256 # The minimum length of the sequence to be generated. + all_probs: False # whether return the log prob for all the tokens in vocab + repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. + min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. + compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False + end_strings: ["","",] # generation will stop when one of these tokens is generated + images_base_path: /pwd/images + +trainer: + devices: 8 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: bf16 # 16, 32, or bf16 + +cluster_type: BCP +tensor_model_parallel_size: 8 +pipeline_model_parallel_size: 1 +pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model (0 for others) +neva_model_file: /pwd/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo +checkpoint_dir: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/checkpoints # checkpoint file dir. This is used to load the PTL checkpoint generated during the Kosmos training +checkpoint_name: null #megatron_clip--val_loss=0.41-step=13499-consumed_samples=431904.0.ckpt # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/version_0/hparams.yaml # model configuration file, only used for PTL checkpoint loading +quality: 9 +toxicity: 0 +humor: 6 +creativity: 6 +violence: 0 +helpfulness: 6 +not_appropriate: 0 + +# MORE THAN ONE INFERENCE IS NOT RUNNING PROPERLY NEED TO CHECK WHY SECOND IS OUTPUTING JUNK N +prompt_file: /pwd/nemo_experiments/input_prompts.jsonl +output_file: /pwd/nemo_experiments/results.jsonl + +server: False # whether launch the API server +port: 5555 # the port number for the inference server +web_server: False # whether launch the web inference server +share: False # whether create a public URL +username: test # user name for web client +password: test2 # password for web client +web_port: 9889 # the port number of the web server + +quantization: + algorithm: awq # int8_sq, fp8, int8, awq + enable: False \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml new file mode 100644 index 000000000000..0099d1d8c4d4 --- /dev/null +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -0,0 +1,215 @@ +name: nemo_neva +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 4900 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + check_val_every_n_epoch: null + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: nemo_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_clip--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +model: + precision: ${trainer.precision} + + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + + # Batch size guideline for different types of dataset + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 32 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: True # Set this to True in adapter learning! + model_type: nvgpt # Only support nvgpt or llama_2 + vision_encoder: + from_pretrained: "" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: True # only support True now + + peft: + lora_tuning: + adapter_dim: 32 + adapter_dropout: 0.0 + column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal + row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: False + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 24 + hidden_size: 2048 + ffn_hidden_size: 5440 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 16 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm1p # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'megatron' + type: 'GPT2BPETokenizer' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: null + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 200 + constant_steps: 0 + min_lr: 2e-7 \ No newline at end of file diff --git a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py new file mode 100644 index 000000000000..b70faf61a413 --- /dev/null +++ b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py @@ -0,0 +1,343 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert Huggingface LLaMA checkpoints into nemo checkpoint. + Example to run this conversion script: + python convert_hf_llava_to_nevo.py \ + --in-file \ + --out-file \ + --tokenizer-model +""" + +import os +from argparse import ArgumentParser +from collections import OrderedDict + +import torch +from llava import LlavaLlamaForCausalLM +from omegaconf import OmegaConf +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.trainer.trainer import Trainer +from transformers import LlamaTokenizer + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.nlp_overrides import ( + GradScaler, + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.utils import logging + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--in-file", type=str, default=None, required=True, help="Path to Huggingface LLaMA checkpoints", + ) + parser.add_argument("--out-file", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument( + "--tokenizer-model", type=str, default=None, required=False, help="Path to sentencepiece tokenizer model." + ) + parser.add_argument("--precision", type=str, default="32", help="Model precision") + args = parser.parse_args() + return args + + +def load_model(cls, checkpoint, strict, **kwargs): + try: + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + # model = ptl_load_state( + # cls, checkpoint, strict=strict, cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs + # ) + model = cls(cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs) + for name, module in model.named_parameters(): + if name in checkpoint['state_dict']: + module.data = checkpoint['state_dict'][name] + checkpoint['state_dict'].pop(name) + else: + print(f"Unexpected key: {name} not in checkpoint but in model.") + + for name, buffer in model.named_buffers(): + if name in checkpoint['state_dict']: + buffer.data = checkpoint['state_dict'][name] + checkpoint['state_dict'].pop(name) + + if len(checkpoint['state_dict'].keys()) != 0: + raise RuntimeError( + f"Additional keys: {checkpoint['state_dict'].keys()} in checkpoint but not in model." + ) + + # register the artifacts + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] + if cfg.tokenizer.model is not None: + model.register_artifact("tokenizer.tokenizer_model", cfg.tokenizer.model) + if cfg.tokenizer.vocab_file is not None: + model.register_artifact("tokenizer.vocab_file", cfg.tokenizer.vocab_file) + if cfg.tokenizer.merge_file is not None: + model.register_artifact("tokenizer.merge_file", cfg.tokenizer.merge_file) + finally: + cls._set_model_restore_state(is_being_restored=False) + return model + + +def load_config(args, llama_config): + nemo_config = OmegaConf.load(os.path.join(os.path.dirname(__file__), 'conf/llava_config.yaml')).model + nemo_config.encoder_seq_length = llama_config['max_position_embeddings'] + nemo_config.num_layers = int(llama_config['num_hidden_layers']) + nemo_config.hidden_size = llama_config['hidden_size'] + nemo_config.ffn_hidden_size = llama_config['intermediate_size'] + nemo_config.num_attention_heads = llama_config['num_attention_heads'] + nemo_config.max_position_embeddings = llama_config['max_position_embeddings'] + nemo_config.init_method_std = llama_config['initializer_range'] + nemo_config.layernorm_epsilon = llama_config['rms_norm_eps'] + if 'num_key_value_heads' in llama_config: + nemo_config.num_query_groups = llama_config['num_key_value_heads'] + nemo_config.use_cpu_initialization = True + nemo_config.activation = 'fast-swiglu' + if args.tokenizer_model is None: + nemo_config.tokenizer.model = llama_config['tokenizer_model'] + else: + nemo_config.tokenizer.model = args.tokenizer_model + if llama_config['rope_scaling'] is not None: + if llama_config['rope_scaling']['type'] == 'linear': + nemo_config['seq_len_interpolation_factor'] = llama_config['rope_scaling']['factor'] + else: + raise ValueError("Only linear rope scaling type is supported now") + + base = 128 + while llama_config['vocab_size'] % base != 0: + base //= 2 + nemo_config.make_vocab_size_divisible_by = base + + return nemo_config + + +def convert(args): + logging.info(f"loading checkpoint {args.in_file}") + model = LlavaLlamaForCausalLM.from_pretrained(args.in_file) + tokenizer = LlamaTokenizer.from_pretrained(args.in_file) + hf_config = vars(model.config) + hf_config['tokenizer_model'] = str(tokenizer.vocab_file) + print(f"hf_config: {hf_config}") + print("named parameters:") + for name, param in model.named_parameters(): + print(f"- {name}") + + nemo_config = load_config(args, hf_config) + print(nemo_config) + + if args.precision in ["32", "16"]: + precision = int(float(args.precision)) + elif args.precision in ["bf16", "bf16-mixed"]: + if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): + precision = args.precision + else: + logging.warning("BF16 is not supported on this device. Using FP16 instead.") + precision = args.precision[2:] # prune bf in string + else: + precision = args.precision + + plugins = [] + if precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']: + scaler = None + if precision in [16, '16', '16-mixed']: + scaler = GradScaler( + init_scale=nemo_config.get('native_amp_init_scale', 2 ** 32), + growth_interval=nemo_config.get('native_amp_growth_interval', 1000), + hysteresis=nemo_config.get('hysteresis', 2), + ) + # MixedPrecisionPlugin in PTL >= 2.0 requires precision to be 16-mixed or bf16-mixed + plugin_precision = '16-mixed' + else: + plugin_precision = 'bf16-mixed' + + if nemo_config.get('megatron_amp_O2', False): + plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) + else: + plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler)) + + if precision == 32: + dtype = torch.float32 + elif precision in [16, "16", "16-mixed"]: + dtype = torch.float16 + elif precision in ["bf16", "bf16-mixed"]: + dtype = torch.bfloat16 + else: + dtype = torch.float32 # fallback + + nemo_config.precision = precision + print(f"nemo_config: {nemo_config}") + + trainer = Trainer(plugins=plugins, accelerator='cpu', precision=precision, strategy=NLPDDPStrategy()) + + hidden_size = hf_config["hidden_size"] + head_num = hf_config["num_attention_heads"] + head_size = hidden_size // head_num + num_layers = hf_config["num_hidden_layers"] + + mcore_gpt = nemo_config.mcore_gpt + + assert mcore_gpt == nemo_config.get( + 'transformer_engine', False + ), "mcore_gpt transformer_engine must be enabled (or disabled) together." + + param_to_weights = lambda param: param.float() + + checkpoint = OrderedDict() + checkpoint['state_dict'] = OrderedDict() + + # Multimodal projection + if mcore_gpt: + raise NotImplementedError + else: + mm_projection_layer_base_name = ( + f'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear' + ) + checkpoint['state_dict'][f'{mm_projection_layer_base_name}.weight'] = param_to_weights( + model.state_dict()[f'model.mm_projector.weight'] + ) + checkpoint['state_dict'][f'{mm_projection_layer_base_name}.bias'] = param_to_weights( + model.state_dict()[f'model.mm_projector.bias'] + ) + + embed_weight = model.state_dict()[f'model.embed_tokens.weight'] + if mcore_gpt: + embed_weights_base_name = f'model.embedding.word_embeddings.weight' + else: + embed_weights_base_name = f'model.language_model.embedding.word_embeddings.weight' + checkpoint['state_dict'][embed_weights_base_name] = param_to_weights(embed_weight) + + # in hf, this is defined as register_buffer(..., persistent=False) so it won't be in the state dict + if f'model.layers.0.self_attn.rotary_emb.inv_freq' in model.state_dict(): + rotary_embed_weight = model.state_dict()[f'model.layers.0.self_attn.rotary_emb.inv_freq'] + if mcore_gpt: + rotary_embed_weight_base_name = f'model.rotary_pos_emb.inv_freq' + else: + rotary_embed_weight_base_name = f'model.language_model.rotary_pos_emb.inv_freq' + checkpoint['state_dict'][rotary_embed_weight_base_name] = param_to_weights(rotary_embed_weight) + + if nemo_config.num_query_groups is None or nemo_config.num_query_groups == head_num: + num_query_groups = head_num + else: + num_query_groups = nemo_config.num_query_groups + assert head_num % num_query_groups == 0, 'head_num must be divisible by num_query_groups' + if mcore_gpt: + assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.' + + for l in range(int(num_layers)): + print(f"converting layer {l}") + old_tensor_shape = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].size() + new_q_tensor_shape = (head_num, head_size) + old_tensor_shape[1:] + new_kv_tensor_shape = (num_query_groups, head_size) + old_tensor_shape[1:] + q = model.state_dict()[f'model.layers.{l}.self_attn.q_proj.weight'].view(*new_q_tensor_shape) + k = model.state_dict()[f'model.layers.{l}.self_attn.k_proj.weight'].view(*new_kv_tensor_shape) + v = model.state_dict()[f'model.layers.{l}.self_attn.v_proj.weight'].view(*new_kv_tensor_shape) + qkv_weights = torch.empty((0, head_size) + old_tensor_shape[1:]) + heads_per_group = head_num // num_query_groups + for i in range(num_query_groups): + qkv_weights = torch.cat((qkv_weights, q[i * heads_per_group : (i + 1) * heads_per_group, :, :])) + qkv_weights = torch.cat((qkv_weights, k[i : i + 1, :, :])) + qkv_weights = torch.cat((qkv_weights, v[i : i + 1, :, :])) + qkv_weights = qkv_weights.reshape([head_size * (head_num + 2 * num_query_groups), hidden_size]) + if mcore_gpt: + qkv_weights_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.weight' + else: + qkv_weights_base_name = f'model.language_model.encoder.layers.{l}.self_attention.query_key_value.weight' + checkpoint['state_dict'][qkv_weights_base_name] = param_to_weights(qkv_weights) + + # attention dense + o_weight = model.state_dict()[f'model.layers.{l}.self_attn.o_proj.weight'] + if mcore_gpt: + o_weight_base_name = f'model.decoder.layers.{l}.self_attention.linear_proj.weight' + else: + o_weight_base_name = f'model.language_model.encoder.layers.{l}.self_attention.dense.weight' + checkpoint['state_dict'][o_weight_base_name] = param_to_weights(o_weight) + + # MLP + mlp_down_weight = model.state_dict()[f'model.layers.{l}.mlp.gate_proj.weight'] + mlp_gate_weight = model.state_dict()[f'model.layers.{l}.mlp.up_proj.weight'] + if mcore_gpt: + mlp_down_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.weight' + else: + mlp_down_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_h_to_4h.weight' + mlp_down_weight = torch.cat((mlp_down_weight, mlp_gate_weight), axis=0) + checkpoint['state_dict'][mlp_down_base_name] = param_to_weights(mlp_down_weight) + + mlp_up_weight = model.state_dict()[f'model.layers.{l}.mlp.down_proj.weight'] + if mcore_gpt: + mlp_up_base_name = f'model.decoder.layers.{l}.mlp.linear_fc2.weight' + else: + mlp_up_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_4h_to_h.weight' + checkpoint['state_dict'][mlp_up_base_name] = param_to_weights(mlp_up_weight) + + # LayerNorm + input_ln_weight = model.state_dict()[f'model.layers.{l}.input_layernorm.weight'] + if mcore_gpt: + input_ln_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight' + else: + input_ln_base_name = f'model.language_model.encoder.layers.{l}.input_layernorm.weight' + checkpoint['state_dict'][input_ln_base_name] = param_to_weights(input_ln_weight) + + post_attn_ln_weight = model.state_dict()[f'model.layers.{l}.post_attention_layernorm.weight'] + if mcore_gpt: + post_attn_ln_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight' + else: + post_attn_ln_base_name = f'model.language_model.encoder.layers.{l}.post_attention_layernorm.weight' + checkpoint['state_dict'][post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight) + + print(f"done layer {l}") + + final_ln_weight = model.state_dict()[f'model.norm.weight'] + if mcore_gpt: + final_ln_base_name = f'model.decoder.final_layernorm.weight' + else: + final_ln_base_name = f'model.language_model.encoder.final_layernorm.weight' + checkpoint['state_dict'][final_ln_base_name] = param_to_weights(final_ln_weight) + + output_layer_weight = model.state_dict()[f'lm_head.weight'] + if mcore_gpt: + output_layer_base_name = f'model.output_layer.weight' + else: + output_layer_base_name = f'model.language_model.output_layer.weight' + checkpoint['state_dict'][output_layer_base_name] = param_to_weights(output_layer_weight) + + checkpoint[MegatronNevaModel.CHECKPOINT_HYPER_PARAMS_KEY] = nemo_config + + del model + + if nemo_config.get('megatron_amp_O2', False): + keys = list(checkpoint['state_dict'].keys()) + for key in keys: + checkpoint['state_dict'][key.replace('model.', 'model.module.', 1)] = checkpoint['state_dict'].pop(key) + + model = load_model(MegatronNevaModel, checkpoint, strict=False, trainer=trainer) + + model._save_restore_connector = NLPSaveRestoreConnector() + + # cast to target precision and disable cpu init + model = model.to(dtype=dtype) + model.cfg.use_cpu_initialization = False + + model.save_to(args.out_file) + logging.info(f'NeMo model saved to: {args.out_file}') + + +if __name__ == '__main__': + args = get_args() + convert(args) diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py new file mode 100644 index 000000000000..256d58018c9b --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_evaluation.py @@ -0,0 +1,353 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import os +import re +import threading + +import torch +from omegaconf import OmegaConf, open_dict +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.modules.common.megatron_web_server import get_demo +from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer +from nemo.collections.nlp.modules.common.text_generation_utils import generate +from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + import ammo.torch.quantization as atq + + HAVE_AMMO = True + +except (ImportError, ModuleNotFoundError): + + HAVE_AMMO = False + + +""" +This is the script to run GPT text generation. + +Usage: + Assume the model has TP=1, PP=1 in the following use cases. + a. run greedy inference from a nemo file: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.greedy=True \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + b. run greedy inference from a PTL checkpoint file: + python neva_evaluation.py \ + checkpoint_dir=PATH_TO_CHECKPOINT_FILE \ + checkpoint_name=CHECKPOINT_FILE_NAME \ + hparams_file=HPARAMS_FILE \ + inference.greedy=True \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + c. run top_p inference from a nemo file: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.greedy=False \ + inference.top_k=0 \ + inference.top_p=0.9 \ + inference.repetition_penalty=1.2 \ + inference.add_BOS=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[prompt1,prompt2] + + d. If you don't need to generate tokens and need model to compute logprobs: + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + inference.compute_logprob=True \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + prompts=[text to get logprob] + + e. Launch the inference server + python neva_evaluation.py \ + neva_model_file=PATH_TO_MODEL \ + trainer.devices=1 \ + trainer.num_nodes=1 \ + tensor_model_parallel_size=-1 \ + pipeline_model_parallel_size=-1 \ + server=True + + To send a request to the server, here is one example code: + ```python + import json + import requests + + batch_size = 8 + port_num = 5555 + headers = {"Content-Type": "application/json"} + + + def request_data(data): + resp = requests.put('http://localhost:{}/generate'.format(port_num), + data=json.dumps(data), + headers=headers) + sentences = resp.json()['sentences'] + return sentences + + + data = { + "sentences": [""] * batch_size, + "images" : [] * batch_size, + "tokens_to_generate": 300, + "temperature": 1.0, + "add_BOS": True, + "top_k": 0, + "top_p": 0.9, + "greedy": False, + "all_probs": False, + "repetition_penalty": 1.2, + "min_tokens_to_generate": 2, + } + + sentences = request_data(data) + ``` +""" + +if not torch.cuda.is_available(): + raise EnvironmentError("GPU is needed for the inference") + + +class RequestDataSet(Dataset): + def __init__(self, sentences): + super().__init__() + self.sentences = sentences + + def __len__(self,): + return len(self.sentences) + + def __getitem__(self, idx): + return self.sentences[idx] + + +@hydra_runner(config_path="conf", config_name="neva_inference") +def main(cfg) -> None: + + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=NLPDDPStrategy(), **cfg.trainer) + + if ( + cfg.tensor_model_parallel_size < 0 + or cfg.pipeline_model_parallel_size < 0 + or cfg.get('pipeline_model_parallel_split_rank', -1) < 0 + ): + model_config = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, trainer=trainer, return_config=True, + ) + + with open_dict(cfg): + cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1) + cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1) + cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + if cfg.neva_model_file: + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.neva_model_file): + save_restore_connector.model_extracted_dir = cfg.neva_model_file + + pretrained_cfg = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + OmegaConf.set_struct(pretrained_cfg, True) + with open_dict(pretrained_cfg): + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + pretrained_cfg.mm_cfg.llm.from_pretrained = None + # pretrained_cfg.mm_cfg.vision_encoder.from_pretrained = None + + model = MegatronNevaModel.restore_from( + restore_path=cfg.neva_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + save_restore_connector=save_restore_connector, + ) + + elif cfg.checkpoint_dir: + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + # TODO: This wont work properly (We need to set model.llm.from_pretrained model.vision.from_pretrained to nul) + model = MegatronNevaModel.load_from_checkpoint(checkpoint_path, hparams_file=cfg.hparams_file, trainer=trainer) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + + model.freeze() + + # Have to turn off activations_checkpoint_method for inference + # Have to turn off activations_checkpoint_method for inference + try: + model.model.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + try: + model.model.module.language_model.encoder.activations_checkpoint_method = None + except AttributeError: + pass + + length_params: LengthParam = { + "max_length": cfg.inference.tokens_to_generate, + "min_length": cfg.inference.min_tokens_to_generate, + } + + sampling_params: SamplingParam = { + "use_greedy": cfg.inference.greedy, + "temperature": cfg.inference.temperature, + "top_k": cfg.inference.top_k, + "top_p": cfg.inference.top_p, + "repetition_penalty": cfg.inference.repetition_penalty, + "add_BOS": cfg.inference.add_BOS, + "all_probs": cfg.inference.all_probs, + "compute_logprob": cfg.inference.compute_logprob, + "end_strings": cfg.inference.end_strings, + } + + with open(cfg.prompt_file, 'r') as f: + lines = f.readlines() + + final_prompts = [] + for line in lines: + prompt_dict = json.loads(line) + final_prompts.append(prompt_dict) + + responses = model.generate( + input_prompts=final_prompts, length_params=length_params, sampling_params=sampling_params, inference_config=cfg + ) + + # =================== Start Quantization ==================== + # see https://gitlab-master.nvidia.com/omniml/ammo/-/tree/main/examples/nemo/neva for details + if HAVE_AMMO and cfg.quantization.enable == True: + print(f"Using quantization algorithm: {cfg.quantization.algorithm}") + if cfg.quantization.algorithm == "int8_sq": + atq_config = atq.INT8_SMOOTHQUANT_CFG + elif cfg.quantization.algorithm == "fp8": + atq_config = atq.FP8_DEFAULT_CFG + elif cfg.quantization.algorithm == "awq": + atq_config = atq.INT4_AWQ_CFG + else: + raise ValueError(f"Unsupported quantization algorithm: {cfg.quantization.algorithm}") + + def forward_loop(): + model.generate( + input_prompts=final_prompts, + length_params=length_params, + sampling_params=sampling_params, + inference_config=cfg, + ) + + atq.quantize(model, atq_config, forward_loop) + + responses = model.generate( + input_prompts=final_prompts, + length_params=length_params, + sampling_params=sampling_params, + inference_config=cfg, + ) + # ============== Quantization End ========================= + + results = [] + for response, prompt in zip(responses, final_prompts): + prompt['full_text'] = response["clean_text"] + prompt['text'] = response["clean_response"] + prompt['model_id'] = cfg.neva_model_file + prompt['answer_id'] = 0 + prompt['metadata'] = {} + results.append(prompt) + + with open(cfg.output_file, 'w') as f: + for result in results: + f.write(json.dumps(result) + '\n') + + """ + # Second method of running text generation, call trainer.predict + ds = RequestDataSet(final_prompts) + request_dl = DataLoader(dataset=ds, batch_size=1) + config = OmegaConf.to_container(cfg.inference) + model.set_inference_config(config) + response = trainer.predict(model, request_dl) + + print("***************************") + print(response) + print("***************************") + """ + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/examples/multimodal/mllm/neva/neva_finetune.py b/examples/multimodal/mllm/neva/neva_finetune.py new file mode 100644 index 000000000000..fa32e5e2d24b --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_finetune.py @@ -0,0 +1,55 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_finetune") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + if cfg.model.restore_from_path is None: + model = MegatronNevaModel(cfg.model, trainer) + else: + model = MegatronNevaModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py new file mode 100644 index 000000000000..1738c41c2e48 --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_peft.py @@ -0,0 +1,56 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaLoRAModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_peft") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + if cfg.model.restore_from_path is None: + model = MegatronNevaLoRAModel(cfg.model, trainer) + else: + model = MegatronNevaLoRAModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/multimodal/mllm/neva/neva_pretrain.py b/examples/multimodal/mllm/neva/neva_pretrain.py new file mode 100644 index 000000000000..b7d23532c1b0 --- /dev/null +++ b/examples/multimodal/mllm/neva/neva_pretrain.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch.multiprocessing as mp +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +mp.set_start_method("spawn", force=True) + + +@hydra_runner(config_path="conf", config_name="neva_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronNevaModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/convert_ckpt_to_nemo.py b/examples/vision/convert_ckpt_to_nemo.py new file mode 100644 index 000000000000..27782f34d0bb --- /dev/null +++ b/examples/vision/convert_ckpt_to_nemo.py @@ -0,0 +1,160 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert PTL checkpoints into nemo checkpoint. + Example to run this conversion script: + python -m torch.distributed.launch --nproc_per_node= * \ + convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size +""" + +import os +from argparse import ArgumentParser + +import torch +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed +from nemo.utils.model_utils import inject_model_parallel_rank + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def get_args(): + parser = ArgumentParser() + parser.add_argument( + "--checkpoint_folder", + type=str, + default=None, + required=True, + help="Path to PTL checkpoints saved during training. Ex: /raid/nemo_experiments/megatron_gpt/checkpoints", + ) + parser.add_argument( + "--checkpoint_name", + type=str, + default=None, + required=True, + help="Name of checkpoint to be used. Ex: megatron_gpt--val_loss=6.34-step=649-last.ckpt", + ) + + parser.add_argument( + "--hparams_file", + type=str, + default=None, + required=False, + help="Path config for restoring. It's created during training and may need to be modified during restore if restore environment is different than training. Ex: /raid/nemo_experiments/megatron_gpt/hparams.yaml", + ) + parser.add_argument("--nemo_file_path", type=str, default=None, required=True, help="Path to output .nemo file.") + parser.add_argument("--gpus_per_node", type=int, required=True, default=None) + parser.add_argument("--tensor_model_parallel_size", type=int, required=True, default=None) + parser.add_argument("--pipeline_model_parallel_size", type=int, required=True, default=None) + parser.add_argument( + "--pipeline_model_parallel_split_rank", + type=int, + required=False, + default=None, + help="If pipeline parallel size > 1, this is the rank at which the encoder ends and the decoder begins.", + ) + parser.add_argument("--model_type", type=str, required=True, default="vit_classification") + parser.add_argument("--local_rank", type=int, required=False, default=os.getenv('LOCAL_RANK', -1)) + parser.add_argument("--bcp", action="store_true", help="Whether on BCP platform") + + args = parser.parse_args() + return args + + +def convert(local_rank, rank, world_size, args): + app_state = AppState() + app_state.data_parallel_rank = 0 + num_nodes = world_size // args.gpus_per_node + if args.bcp: + trainer = Trainer( + devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu', plugins=[TorchElasticEnvironment()] + ) + else: + trainer = Trainer(devices=args.gpus_per_node, num_nodes=num_nodes, accelerator='gpu') + + app_state.pipeline_model_parallel_size = args.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = args.tensor_model_parallel_size + + # no use atm, use to split ranks in encoder/decoder models. + if args.pipeline_model_parallel_size > 1 and args.model_type in []: + if args.pipeline_model_parallel_split_rank is not None: + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_split_rank + else: + if args.pipeline_model_parallel_size % 2 != 0: + raise ValueError( + f"Pipeline model parallel size {args.pipeline_model_parallel_size} must be even if split rank is not specified." + ) + else: + # If split rank is not set, then we set it to be pipeline_model_parallel_size // 2 - this is because in most cases we have the same number of enc/dec layers. + app_state.pipeline_model_parallel_split_rank = args.pipeline_model_parallel_size // 2 + else: + app_state.pipeline_model_parallel_split_rank = None + + app_state.model_parallel_size = app_state.tensor_model_parallel_size * app_state.pipeline_model_parallel_size + + parallel_state.initialize_model_parallel( + tensor_model_parallel_size=app_state.tensor_model_parallel_size, + pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, + pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + ) + + app_state.pipeline_model_parallel_rank = parallel_state.get_pipeline_model_parallel_rank() + app_state.tensor_model_parallel_rank = parallel_state.get_tensor_model_parallel_rank() + + # inject model parallel rank + checkpoint_path = inject_model_parallel_rank(os.path.join(args.checkpoint_folder, args.checkpoint_name)) + + logging.info( + f'rank: {rank}, local_rank: {local_rank}, is loading checkpoint: {checkpoint_path} for tp_rank: {app_state.tensor_model_parallel_rank} and pp_rank: {app_state.pipeline_model_parallel_rank}' + ) + + if args.model_type == 'vit_classification': + model = MegatronVitClassificationModel.load_from_checkpoint( + checkpoint_path, hparams_file=args.hparams_file, trainer=trainer + ) + else: + raise ValueError(f"Unrecognized model_type {args.model_type}.") + + model._save_restore_connector = NLPSaveRestoreConnector() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + model.save_to(args.nemo_file_path) + + logging.info(f'NeMo model saved to: {args.nemo_file_path}') + + +if __name__ == '__main__': + args = get_args() + local_rank, rank, world_size = initialize_distributed(args) + convert(local_rank, rank, world_size, args) diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml new file mode 100755 index 000000000000..264b49a0ba37 --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_config.yaml @@ -0,0 +1,163 @@ +# shared by ViT classification pretraining and fine-tuning + +name: megatron_vit_classify +restore_from_path: null # used when starting from a .nemo file + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 95000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + val_check_interval: 100 + limit_val_batches: 50 + limit_test_batches: 500 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually + +exp_manager: + explicit_log_dir: null + exp_dir: null + name: megatron_vit_classification + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: True + resume_ignore_no_checkpoint: True + create_checkpoint_callback: True + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classification--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} + + +model: + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 256 # limited by GPU memory + global_batch_size: 4096 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # vision configs + vision_pretraining_type: "classify" + num_classes: 1000 + patch_dim: 16 + img_h: 224 + img_w: 224 + classes_fraction: 1.0 + data_per_class_fraction: 1.0 + num_channels: 3 + drop_path_rate: 0.0 + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: ${.encoder_seq_length} + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.1 # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + ## Activation Checkpointing + # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. + # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + # 'full' will checkpoint the entire transformer layer. + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. + # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + data: + # Path to image dataset must be specified by the user. + # Supports List + # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", + data_path: ??? + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + data_sharding: False + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.999 + sched: + name: CosineAnnealing + warmup_steps: 10000 + constant_steps: 0 + min_lr: 1e-5 \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml new file mode 100755 index 000000000000..4b9a71bedc7d --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_evaluate.yaml @@ -0,0 +1,15 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} + micro_batch_size: 512 # we only supports DP=1 eval at the moment, GBS=MBS + + data: + num_workers: 2 + imagenet_val: ??? # path to imagenet val folder \ No newline at end of file diff --git a/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml b/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml new file mode 100755 index 000000000000..553abb5bc23b --- /dev/null +++ b/examples/vision/vision_transformer/conf/megatron_vit_classification_infer.yaml @@ -0,0 +1,12 @@ +data_path: ??? # Path to a image folder for inference + +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + +model: + restore_from_path: null # Path to a trained ViT .nemo file + precision: ${trainer.precision} diff --git a/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py new file mode 100644 index 000000000000..d1e596ad1bce --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_evaluate.py @@ -0,0 +1,124 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os + +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm + +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_evaluate") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce + ) + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + model_cfg = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # These configs are required to be off during inference. + with open_dict(model_cfg): + model_cfg.precision = trainer.precision + if trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + model.eval() + + val_transform = ClassificationTransform(model.cfg, (model.cfg.img_h, model.cfg.img_w), train=False) + val_data = ImageFolder(root=cfg.model.data.imagenet_val, transform=val_transform,) + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + test_loader = DataLoader(val_data, batch_size=cfg.model.micro_batch_size, num_workers=cfg.model.data.num_workers,) + + # get autocast_dtype + if trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + total = correct = 0.0 + for tokens, labels in tqdm(test_loader): + logits = model(tokens.cuda()) + class_indices = torch.argmax(logits, -1) + correct += (class_indices == labels.cuda()).float().sum() + total += len(labels) + + if is_global_rank_zero: + print(f"ViT Imagenet 1K Evaluation Accuracy: {correct / total:.4f}") + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_finetune.py b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py new file mode 100644 index 000000000000..cc81cda2c477 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_finetune.py @@ -0,0 +1,52 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.finetune = True + cfg.model.precision = cfg.trainer.precision + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=cfg.model, + save_restore_connector=NLPSaveRestoreConnector(), + strict=False, + ) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_infer.py b/examples/vision/vision_transformer/megatron_vit_classification_infer.py new file mode 100644 index 000000000000..c48090d31475 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_infer.py @@ -0,0 +1,145 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os + +import torch +from omegaconf.omegaconf import OmegaConf, open_dict +from PIL import Image +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment +from torch.utils.data import DataLoader, Dataset + +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.collections.vision.data.imagenet_classnames import imagenet_classnames +from nemo.collections.vision.data.megatron.vit_dataset import ClassificationTransform +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.get_rank import is_global_rank_zero + +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() + + +class ImageFolderDataset(Dataset): + def __init__(self, folder_path, transform=None): + self.folder_path = folder_path + self.transform = transform + # Use glob to find all image files in folder_path + image_paths = [] + for ext in _IMG_EXTENSIONS + [x.upper() for x in _IMG_EXTENSIONS]: + search_pattern = os.path.join(folder_path, f"*.{ext}") + image_paths += glob.glob(search_pattern) + self.image_paths = image_paths + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_path = self.image_paths[idx] + image = Image.open(image_path).convert('RGB') + if self.transform is not None: + image = self.transform(image) + return image + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_infer") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + plugins = [] + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, find_unused_parameters=False, # we don't use DDP for async grad allreduce + ) + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # trainer required for restoring model parallel models + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + model_cfg = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == model_cfg.tensor_model_parallel_size * model_cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + # These configs are required to be off during inference. + with open_dict(model_cfg): + model_cfg.precision = trainer.precision + if trainer.precision != "bf16": + model_cfg.megatron_amp_O2 = False + model_cfg.sequence_parallel = False + model_cfg.activations_checkpoint_granularity = None + model_cfg.activations_checkpoint_method = None + + model = MegatronVitClassificationModel.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + model.eval() + + test_transform = ClassificationTransform(cfg.model, (model_cfg.img_h, model_cfg.img_w), train=False) + test_data = ImageFolderDataset(folder_path=cfg.data_path, transform=test_transform,) + test_loader = DataLoader(test_data, batch_size=8) + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + # get autocast_dtype + if trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + class_names = [] + for tokens in test_loader: + logits = model(tokens.cuda()) + class_indices = torch.argmax(logits, -1) + class_names += [imagenet_classnames[x] for x in class_indices] + + if is_global_rank_zero: + filenames = [os.path.basename(f) for f in test_data.image_paths] + print(f"Predicted classes: ", list(zip(filenames, class_names))) + + +if __name__ == '__main__': + main() diff --git a/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py new file mode 100644 index 000000000000..d39df23e6ba1 --- /dev/null +++ b/examples/vision/vision_transformer/megatron_vit_classification_pretrain.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from omegaconf.omegaconf import OmegaConf, open_dict + +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel + +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + + +@hydra_runner(config_path="conf", config_name="megatron_vit_classification_config") +def main(cfg) -> None: + logging.info("\n\n************** Experiment configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(cfg)}') + + trainer = MegatronTrainerBuilder(cfg).create_trainer() + exp_manager(trainer, cfg.exp_manager) + + # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams + with open_dict(cfg): + cfg.model.precision = cfg.trainer.precision + + model = MegatronVitClassificationModel(cfg.model, trainer) + + trainer.fit(model) + + +if __name__ == '__main__': + main() diff --git a/nemo/collections/multimodal/data/__init__.py b/nemo/collections/multimodal/data/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/__init__.py b/nemo/collections/multimodal/data/clip/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/__init__.py b/nemo/collections/multimodal/data/clip/augmentations/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/augmentations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/clip/augmentations/augmentations.py b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py new file mode 100644 index 000000000000..2cf3dad64464 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/augmentations/augmentations.py @@ -0,0 +1,108 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is adapted from public repo +https://github.com/mlfoundations/open_clip/blob/28c994406e39a5babc749c76871d92f33e9c558d/src/open_clip/transform.py +by @yaoyu-33 +""" +from typing import Optional, Sequence, Tuple + +import torch +import torch.nn as nn +import torchvision.transforms.functional as F +from torchvision.transforms import ( + CenterCrop, + Compose, + InterpolationMode, + Normalize, + RandomResizedCrop, + Resize, + ToTensor, +) + +OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) +OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) + + +class ResizeMaxSize(nn.Module): + def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, fn='max', fill=0): + super().__init__() + if not isinstance(max_size, int): + raise TypeError(f"Size should be int. Got {type(max_size)}") + self.max_size = max_size + self.interpolation = interpolation + self.fn = min if fn == 'min' else min + self.fill = fill + + def forward(self, img): + if isinstance(img, torch.Tensor): + height, width = img.shape[:2] + else: + width, height = img.size + scale = self.max_size / float(max(height, width)) + if scale != 1.0: + new_size = tuple(round(dim * scale) for dim in (height, width)) + img = F.resize(img, new_size, self.interpolation) + pad_h = self.max_size - new_size[0] + pad_w = self.max_size - new_size[1] + img = F.pad(img, padding=[pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2], fill=self.fill) + return img + + +def _convert_to_rgb(image): + return image.convert('RGB') + + +def image_transform( + image_size: int, + is_train: bool, + mean: Optional[Tuple[float, ...]] = None, + std: Optional[Tuple[float, ...]] = None, + resize_longest_max: bool = False, + fill_color: int = 0, +): + mean = mean or OPENAI_DATASET_MEAN + if not isinstance(mean, (list, tuple)): + mean = (mean,) * 3 + + std = std or OPENAI_DATASET_STD + if not isinstance(std, (list, tuple)): + std = (std,) * 3 + + if isinstance(image_size, (list, tuple)) and image_size[0] == image_size[1]: + # for square size, pass size as int so that Resize() uses aspect preserving shortest edge + image_size = image_size[0] + + normalize = Normalize(mean=mean, std=std) + if is_train: + return Compose( + [ + RandomResizedCrop(image_size, scale=(0.9, 1.0), interpolation=InterpolationMode.BICUBIC), + _convert_to_rgb, + ToTensor(), + normalize, + ] + ) + else: + if resize_longest_max: + transforms = [ResizeMaxSize(image_size, fill=fill_color)] + else: + transforms = [ + Resize(image_size, interpolation=InterpolationMode.BICUBIC), + CenterCrop(image_size), + ] + transforms.extend( + [_convert_to_rgb, ToTensor(), normalize,] + ) + return Compose(transforms) diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py new file mode 100644 index 000000000000..f63a86dc9174 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/clip_dataset.py @@ -0,0 +1,194 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Any, Dict, List, Optional, Union + +import torch +from torch.utils.data import Dataset, default_collate + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int = 77) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + tokenizer: + Tokenizer loaded in NeMo NeMo + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + + bos_id = tokenizer.bos_id + eos_id = tokenizer.eos_id + all_tokens = [[bos_id] + tokenizer.text_to_ids(text) + [eos_id] for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + tokens = tokens[:context_length] # Truncate + tokens[-1] = eos_id + result[i, : len(tokens)] = torch.tensor(tokens) + + if texts_is_str: + result = result[0] + return result + + +def get_preprocess_fns(model_cfg, tokenizer=None, is_train=True): + # Define transforms + img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) + img_mean = model_cfg.vision.get("img_mean") + img_std = model_cfg.vision.get("img_std") + img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) + text_transform = lambda x: x + if tokenizer is not None: + text_transform = partial( + tokenize, tokenizer=tokenizer, context_length=model_cfg.text.get("max_position_embeddings"), + ) + return img_transform, text_transform + + +def build_train_valid_datasets( + model_cfg, consumed_samples, tokenizer=None, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + out_dict['captions'] = input[1] + yield out_dict + + def transform_fn(sample, img_transform, text_transform): + image, text = sample["jpg"], sample["txt"] + return img_transform(image), text_transform(text) + + train_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=True) + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=partial(transform_fn, img_transform=train_img_transform, text_transform=text_transform), + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): + val_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=0, + map_fn=partial(transform_fn, img_transform=val_img_transform, text_transform=text_transform), + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data + + +# For zero-shot imagenet validation +def build_imagenet_validation_dataloader(model_cfg, tokenizer=None): + val_image_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=False) + data_cfg = model_cfg.data + + imagenet_val = {} + + imagenet_path = data_cfg.get("imagenet_val") + if imagenet_path is None: + return None + + image_dataset = ImageFolder(root=imagenet_path, transform=val_image_transform,) + + image_batch_sampler = MegatronPretrainingSampler( + total_samples=len(image_dataset), + consumed_samples=0, + micro_batch_size=model_cfg.micro_batch_size, + global_batch_size=model_cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=False, + ) + + def custom_collate(batch): + if len(batch) == 0: + return None, None + else: + return default_collate(batch) + + imagenet_val["images"] = torch.utils.data.DataLoader( + image_dataset, + batch_sampler=image_batch_sampler, + num_workers=min(data_cfg.num_workers, 2), + collate_fn=custom_collate, + pin_memory=True, + persistent_workers=True, + ) + + text_dataset = ImagenetClassnameDataset(imagenet_classnames, openai_imagenet_template, text_transform) + imagenet_val["texts"] = torch.utils.data.DataLoader( + text_dataset, + batch_size=text_dataset.num_templates, + num_workers=0, + pin_memory=True, + persistent_workers=False, + drop_last=False, + ) + return imagenet_val + + +class ImagenetClassnameDataset(Dataset): + def __init__(self, classnames, templates, text_transform): + self.num_templates = len(templates) + self.samples = [] + for classname in classnames: + texts = [template(classname) for template in templates] + self.samples.extend(text_transform(texts)) + + def __getitem__(self, index): + return self.samples[index] + + def __len__(self): + return len(self.samples) diff --git a/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py new file mode 100644 index 000000000000..c7387d37eba7 --- /dev/null +++ b/nemo/collections/multimodal/data/clip/imagenet_zeroshot_data.py @@ -0,0 +1,1100 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +imagenet_classnames = [ + "tench", + "goldfish", + "great white shark", + "tiger shark", + "hammerhead shark", + "electric ray", + "stingray", + "rooster", + "hen", + "ostrich", + "brambling", + "goldfinch", + "house finch", + "junco", + "indigo bunting", + "American robin", + "bulbul", + "jay", + "magpie", + "chickadee", + "American dipper", + "kite (bird of prey)", + "bald eagle", + "vulture", + "great grey owl", + "fire salamander", + "smooth newt", + "newt", + "spotted salamander", + "axolotl", + "American bullfrog", + "tree frog", + "tailed frog", + "loggerhead sea turtle", + "leatherback sea turtle", + "mud turtle", + "terrapin", + "box turtle", + "banded gecko", + "green iguana", + "Carolina anole", + "desert grassland whiptail lizard", + "agama", + "frilled-necked lizard", + "alligator lizard", + "Gila monster", + "European green lizard", + "chameleon", + "Komodo dragon", + "Nile crocodile", + "American alligator", + "triceratops", + "worm snake", + "ring-necked snake", + "eastern hog-nosed snake", + "smooth green snake", + "kingsnake", + "garter snake", + "water snake", + "vine snake", + "night snake", + "boa constrictor", + "African rock python", + "Indian cobra", + "green mamba", + "sea snake", + "Saharan horned viper", + "eastern diamondback rattlesnake", + "sidewinder rattlesnake", + "trilobite", + "harvestman", + "scorpion", + "yellow garden spider", + "barn spider", + "European garden spider", + "southern black widow", + "tarantula", + "wolf spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse", + "prairie grouse", + "peafowl", + "quail", + "partridge", + "african grey parrot", + "macaw", + "sulphur-crested cockatoo", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "duck", + "red-breasted merganser", + "goose", + "black swan", + "tusker", + "echidna", + "platypus", + "wallaby", + "koala", + "wombat", + "jellyfish", + "sea anemone", + "brain coral", + "flatworm", + "nematode", + "conch", + "snail", + "slug", + "sea slug", + "chiton", + "chambered nautilus", + "Dungeness crab", + "rock crab", + "fiddler crab", + "red king crab", + "American lobster", + "spiny lobster", + "crayfish", + "hermit crab", + "isopod", + "white stork", + "black stork", + "spoonbill", + "flamingo", + "little blue heron", + "great egret", + "bittern bird", + "crane bird", + "limpkin", + "common gallinule", + "American coot", + "bustard", + "ruddy turnstone", + "dunlin", + "common redshank", + "dowitcher", + "oystercatcher", + "pelican", + "king penguin", + "albatross", + "grey whale", + "killer whale", + "dugong", + "sea lion", + "Chihuahua", + "Japanese Chin", + "Maltese", + "Pekingese", + "Shih Tzu", + "King Charles Spaniel", + "Papillon", + "toy terrier", + "Rhodesian Ridgeback", + "Afghan Hound", + "Basset Hound", + "Beagle", + "Bloodhound", + "Bluetick Coonhound", + "Black and Tan Coonhound", + "Treeing Walker Coonhound", + "English foxhound", + "Redbone Coonhound", + "borzoi", + "Irish Wolfhound", + "Italian Greyhound", + "Whippet", + "Ibizan Hound", + "Norwegian Elkhound", + "Otterhound", + "Saluki", + "Scottish Deerhound", + "Weimaraner", + "Staffordshire Bull Terrier", + "American Staffordshire Terrier", + "Bedlington Terrier", + "Border Terrier", + "Kerry Blue Terrier", + "Irish Terrier", + "Norfolk Terrier", + "Norwich Terrier", + "Yorkshire Terrier", + "Wire Fox Terrier", + "Lakeland Terrier", + "Sealyham Terrier", + "Airedale Terrier", + "Cairn Terrier", + "Australian Terrier", + "Dandie Dinmont Terrier", + "Boston Terrier", + "Miniature Schnauzer", + "Giant Schnauzer", + "Standard Schnauzer", + "Scottish Terrier", + "Tibetan Terrier", + "Australian Silky Terrier", + "Soft-coated Wheaten Terrier", + "West Highland White Terrier", + "Lhasa Apso", + "Flat-Coated Retriever", + "Curly-coated Retriever", + "Golden Retriever", + "Labrador Retriever", + "Chesapeake Bay Retriever", + "German Shorthaired Pointer", + "Vizsla", + "English Setter", + "Irish Setter", + "Gordon Setter", + "Brittany dog", + "Clumber Spaniel", + "English Springer Spaniel", + "Welsh Springer Spaniel", + "Cocker Spaniel", + "Sussex Spaniel", + "Irish Water Spaniel", + "Kuvasz", + "Schipperke", + "Groenendael dog", + "Malinois", + "Briard", + "Australian Kelpie", + "Komondor", + "Old English Sheepdog", + "Shetland Sheepdog", + "collie", + "Border Collie", + "Bouvier des Flandres dog", + "Rottweiler", + "German Shepherd Dog", + "Dobermann", + "Miniature Pinscher", + "Greater Swiss Mountain Dog", + "Bernese Mountain Dog", + "Appenzeller Sennenhund", + "Entlebucher Sennenhund", + "Boxer", + "Bullmastiff", + "Tibetan Mastiff", + "French Bulldog", + "Great Dane", + "St. Bernard", + "husky", + "Alaskan Malamute", + "Siberian Husky", + "Dalmatian", + "Affenpinscher", + "Basenji", + "pug", + "Leonberger", + "Newfoundland dog", + "Great Pyrenees dog", + "Samoyed", + "Pomeranian", + "Chow Chow", + "Keeshond", + "brussels griffon", + "Pembroke Welsh Corgi", + "Cardigan Welsh Corgi", + "Toy Poodle", + "Miniature Poodle", + "Standard Poodle", + "Mexican hairless dog (xoloitzcuintli)", + "grey wolf", + "Alaskan tundra wolf", + "red wolf or maned wolf", + "coyote", + "dingo", + "dhole", + "African wild dog", + "hyena", + "red fox", + "kit fox", + "Arctic fox", + "grey fox", + "tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat", + "Egyptian Mau", + "cougar", + "lynx", + "leopard", + "snow leopard", + "jaguar", + "lion", + "tiger", + "cheetah", + "brown bear", + "American black bear", + "polar bear", + "sloth bear", + "mongoose", + "meerkat", + "tiger beetle", + "ladybug", + "ground beetle", + "longhorn beetle", + "leaf beetle", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant", + "grasshopper", + "cricket insect", + "stick insect", + "cockroach", + "praying mantis", + "cicada", + "leafhopper", + "lacewing", + "dragonfly", + "damselfly", + "red admiral butterfly", + "ringlet butterfly", + "monarch butterfly", + "small white butterfly", + "sulphur butterfly", + "gossamer-winged butterfly", + "starfish", + "sea urchin", + "sea cucumber", + "cottontail rabbit", + "hare", + "Angora rabbit", + "hamster", + "porcupine", + "fox squirrel", + "marmot", + "beaver", + "guinea pig", + "common sorrel horse", + "zebra", + "pig", + "wild boar", + "warthog", + "hippopotamus", + "ox", + "water buffalo", + "bison", + "ram (adult male sheep)", + "bighorn sheep", + "Alpine ibex", + "hartebeest", + "impala (antelope)", + "gazelle", + "arabian camel", + "llama", + "weasel", + "mink", + "European polecat", + "black-footed ferret", + "otter", + "skunk", + "badger", + "armadillo", + "three-toed sloth", + "orangutan", + "gorilla", + "chimpanzee", + "gibbon", + "siamang", + "guenon", + "patas monkey", + "baboon", + "macaque", + "langur", + "black-and-white colobus", + "proboscis monkey", + "marmoset", + "white-headed capuchin", + "howler monkey", + "titi monkey", + "Geoffroy's spider monkey", + "common squirrel monkey", + "ring-tailed lemur", + "indri", + "Asian elephant", + "African bush elephant", + "red panda", + "giant panda", + "snoek fish", + "eel", + "silver salmon", + "rock beauty fish", + "clownfish", + "sturgeon", + "gar fish", + "lionfish", + "pufferfish", + "abacus", + "abaya", + "academic gown", + "accordion", + "acoustic guitar", + "aircraft carrier", + "airliner", + "airship", + "altar", + "ambulance", + "amphibious vehicle", + "analog clock", + "apiary", + "apron", + "trash can", + "assault rifle", + "backpack", + "bakery", + "balance beam", + "balloon", + "ballpoint pen", + "Band-Aid", + "banjo", + "baluster / handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel", + "wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "swimming cap", + "bath towel", + "bathtub", + "station wagon", + "lighthouse", + "beaker", + "military hat (bearskin or shako)", + "beer bottle", + "beer glass", + "bell tower", + "baby bib", + "tandem bicycle", + "bikini", + "ring binder", + "binoculars", + "birdhouse", + "boathouse", + "bobsleigh", + "bolo tie", + "poke bonnet", + "bookcase", + "bookstore", + "bottle cap", + "hunting bow", + "bow tie", + "brass memorial plaque", + "bra", + "breakwater", + "breastplate", + "broom", + "bucket", + "buckle", + "bulletproof vest", + "high-speed train", + "butcher shop", + "taxicab", + "cauldron", + "candle", + "cannon", + "canoe", + "can opener", + "cardigan", + "car mirror", + "carousel", + "tool kit", + "cardboard box / carton", + "car wheel", + "automated teller machine", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello", + "mobile phone", + "chain", + "chain-link fence", + "chain mail", + "chainsaw", + "storage chest", + "chiffonier", + "bell or wind chime", + "china cabinet", + "Christmas stocking", + "church", + "movie theater", + "cleaver", + "cliff dwelling", + "cloak", + "clogs", + "cocktail shaker", + "coffee mug", + "coffeemaker", + "spiral or coil", + "combination lock", + "computer keyboard", + "candy store", + "container ship", + "convertible", + "corkscrew", + "cornet", + "cowboy boot", + "cowboy hat", + "cradle", + "construction crane", + "crash helmet", + "crate", + "infant bed", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam", + "desk", + "desktop computer", + "rotary dial telephone", + "diaper", + "digital clock", + "digital watch", + "dining table", + "dishcloth", + "dishwasher", + "disc brake", + "dock", + "dog sled", + "dome", + "doormat", + "drilling rig", + "drum", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso machine", + "face powder", + "feather boa", + "filing cabinet", + "fireboat", + "fire truck", + "fire screen", + "flagpole", + "flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster bed", + "freight car", + "French horn", + "frying pan", + "fur coat", + "garbage truck", + "gas mask or respirator", + "gas pump", + "goblet", + "go-kart", + "golf ball", + "golf cart", + "gondola", + "gong", + "gown", + "grand piano", + "greenhouse", + "radiator grille", + "grocery store", + "guillotine", + "hair clip", + "hair spray", + "half-track", + "hammer", + "hamper", + "hair dryer", + "hand-held computer", + "handkerchief", + "hard disk drive", + "harmonica", + "harp", + "combine harvester", + "hatchet", + "holster", + "home theater", + "honeycomb", + "hook", + "hoop skirt", + "gymnastic horizontal bar", + "horse-drawn vehicle", + "hourglass", + "iPod", + "clothes iron", + "carved pumpkin", + "jeans", + "jeep", + "T-shirt", + "jigsaw puzzle", + "rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat", + "ladle", + "lampshade", + "laptop computer", + "lawn mower", + "lens cap", + "letter opener", + "library", + "lifeboat", + "lighter", + "limousine", + "ocean liner", + "lipstick", + "slip-on shoe", + "lotion", + "music speaker", + "loupe magnifying glass", + "sawmill", + "magnetic compass", + "messenger bag", + "mailbox", + "tights", + "one-piece bathing suit", + "manhole cover", + "maraca", + "marimba", + "mask", + "matchstick", + "maypole", + "maze", + "measuring cup", + "medicine cabinet", + "megalith", + "microphone", + "microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home", + "ford model t", + "modem", + "monastery", + "monitor", + "moped", + "mortar and pestle", + "graduation cap", + "mosque", + "mosquito net", + "vespa", + "mountain bike", + "tent", + "computer mouse", + "mousetrap", + "moving van", + "muzzle", + "metal nail", + "neck brace", + "necklace", + "baby pacifier", + "notebook computer", + "obelisk", + "oboe", + "ocarina", + "odometer", + "oil filter", + "pipe organ", + "oscilloscope", + "overskirt", + "bullock cart", + "oxygen mask", + "product packet / packaging", + "paddle", + "paddle wheel", + "padlock", + "paintbrush", + "pajamas", + "palace", + "pan flute", + "paper towel", + "parachute", + "parallel bars", + "park bench", + "parking meter", + "railroad car", + "patio", + "payphone", + "pedestal", + "pencil case", + "pencil sharpener", + "perfume", + "Petri dish", + "photocopier", + "plectrum", + "Pickelhaube", + "picket fence", + "pickup truck", + "pier", + "piggy bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate ship", + "drink pitcher", + "block plane", + "planetarium", + "plastic bag", + "plate rack", + "farm plow", + "plunger", + "Polaroid camera", + "pole", + "police van", + "poncho", + "pool table", + "soda bottle", + "plant pot", + "potter's wheel", + "power drill", + "prayer rug", + "printer", + "prison", + "missile", + "projector", + "hockey puck", + "punching bag", + "purse", + "quill", + "quilt", + "race car", + "racket", + "radiator", + "radio", + "radio telescope", + "rain barrel", + "recreational vehicle", + "fishing casting reel", + "reflex camera", + "refrigerator", + "remote control", + "restaurant", + "revolver", + "rifle", + "rocking chair", + "rotisserie", + "eraser", + "rugby ball", + "ruler measuring stick", + "sneaker", + "safe", + "safety pin", + "salt shaker", + "sandal", + "sarong", + "saxophone", + "scabbard", + "weighing scale", + "school bus", + "schooner", + "scoreboard", + "CRT monitor", + "screw", + "screwdriver", + "seat belt", + "sewing machine", + "shield", + "shoe store", + "shoji screen / room divider", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "balaclava ski mask", + "sleeping bag", + "slide rule", + "sliding door", + "slot machine", + "snorkel", + "snowmobile", + "snowplow", + "soap dispenser", + "soccer ball", + "sock", + "solar thermal collector", + "sombrero", + "soup bowl", + "keyboard space bar", + "space heater", + "space shuttle", + "spatula", + "motorboat", + "spider web", + "spindle", + "sports car", + "spotlight", + "stage", + "steam locomotive", + "through arch bridge", + "steel drum", + "stethoscope", + "scarf", + "stone wall", + "stopwatch", + "stove", + "strainer", + "tram", + "stretcher", + "couch", + "stupa", + "submarine", + "suit", + "sundial", + "sunglasses", + "sunglasses", + "sunscreen", + "suspension bridge", + "mop", + "sweatshirt", + "swim trunks / shorts", + "swing", + "electrical switch", + "syringe", + "table lamp", + "tank", + "tape player", + "teapot", + "teddy bear", + "television", + "tennis ball", + "thatched roof", + "front curtain", + "thimble", + "threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop", + "toilet seat", + "torch", + "totem pole", + "tow truck", + "toy store", + "tractor", + "semi-trailer truck", + "tray", + "trench coat", + "tricycle", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus", + "trombone", + "hot tub", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle", + "upright piano", + "vacuum cleaner", + "vase", + "vaulted or arched ceiling", + "velvet fabric", + "vending machine", + "vestment", + "viaduct", + "violin", + "volleyball", + "waffle iron", + "wall clock", + "wallet", + "wardrobe", + "military aircraft", + "sink", + "washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "hair wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "airplane wing", + "wok", + "wooden spoon", + "wool", + "split-rail fence", + "shipwreck", + "sailboat", + "yurt", + "website", + "comic book", + "crossword", + "traffic or street sign", + "traffic light", + "dust jacket", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot", + "trifle", + "ice cream", + "popsicle", + "baguette", + "bagel", + "pretzel", + "cheeseburger", + "hot dog", + "mashed potatoes", + "cabbage", + "broccoli", + "cauliflower", + "zucchini", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber", + "artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith apple", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple", + "banana", + "jackfruit", + "cherimoya (custard apple)", + "pomegranate", + "hay", + "carbonara", + "chocolate syrup", + "dough", + "meatloaf", + "pizza", + "pot pie", + "burrito", + "red wine", + "espresso", + "tea cup", + "eggnog", + "mountain", + "bubble", + "cliff", + "coral reef", + "geyser", + "lakeshore", + "promontory", + "sandbar", + "beach", + "valley", + "volcano", + "baseball player", + "bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper", + "corn", + "acorn", + "rose hip", + "horse chestnut seed", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn mushroom", + "earth star fungus", + "hen of the woods mushroom", + "bolete", + "corn cob", + "toilet paper", +] + +openai_imagenet_template = [ + lambda c: f'a bad photo of a {c}.', + lambda c: f'a photo of many {c}.', + lambda c: f'a sculpture of a {c}.', + lambda c: f'a photo of the hard to see {c}.', + lambda c: f'a low resolution photo of the {c}.', + lambda c: f'a rendering of a {c}.', + lambda c: f'graffiti of a {c}.', + lambda c: f'a bad photo of the {c}.', + lambda c: f'a cropped photo of the {c}.', + lambda c: f'a tattoo of a {c}.', + lambda c: f'the embroidered {c}.', + lambda c: f'a photo of a hard to see {c}.', + lambda c: f'a bright photo of a {c}.', + lambda c: f'a photo of a clean {c}.', + lambda c: f'a photo of a dirty {c}.', + lambda c: f'a dark photo of the {c}.', + lambda c: f'a drawing of a {c}.', + lambda c: f'a photo of my {c}.', + lambda c: f'the plastic {c}.', + lambda c: f'a photo of the cool {c}.', + lambda c: f'a close-up photo of a {c}.', + lambda c: f'a black and white photo of the {c}.', + lambda c: f'a painting of the {c}.', + lambda c: f'a painting of a {c}.', + lambda c: f'a pixelated photo of the {c}.', + lambda c: f'a sculpture of the {c}.', + lambda c: f'a bright photo of the {c}.', + lambda c: f'a cropped photo of a {c}.', + lambda c: f'a plastic {c}.', + lambda c: f'a photo of the dirty {c}.', + lambda c: f'a jpeg corrupted photo of a {c}.', + lambda c: f'a blurry photo of the {c}.', + lambda c: f'a photo of the {c}.', + lambda c: f'a good photo of the {c}.', + lambda c: f'a rendering of the {c}.', + lambda c: f'a {c} in a video game.', + lambda c: f'a photo of one {c}.', + lambda c: f'a doodle of a {c}.', + lambda c: f'a close-up photo of the {c}.', + lambda c: f'a photo of a {c}.', + lambda c: f'the origami {c}.', + lambda c: f'the {c} in a video game.', + lambda c: f'a sketch of a {c}.', + lambda c: f'a doodle of the {c}.', + lambda c: f'a origami {c}.', + lambda c: f'a low resolution photo of a {c}.', + lambda c: f'the toy {c}.', + lambda c: f'a rendition of the {c}.', + lambda c: f'a photo of the clean {c}.', + lambda c: f'a photo of a large {c}.', + lambda c: f'a rendition of a {c}.', + lambda c: f'a photo of a nice {c}.', + lambda c: f'a photo of a weird {c}.', + lambda c: f'a blurry photo of a {c}.', + lambda c: f'a cartoon {c}.', + lambda c: f'art of a {c}.', + lambda c: f'a sketch of the {c}.', + lambda c: f'a embroidered {c}.', + lambda c: f'a pixelated photo of a {c}.', + lambda c: f'itap of the {c}.', + lambda c: f'a jpeg corrupted photo of the {c}.', + lambda c: f'a good photo of a {c}.', + lambda c: f'a plushie {c}.', + lambda c: f'a photo of the nice {c}.', + lambda c: f'a photo of the small {c}.', + lambda c: f'a photo of the weird {c}.', + lambda c: f'the cartoon {c}.', + lambda c: f'art of the {c}.', + lambda c: f'a drawing of the {c}.', + lambda c: f'a photo of the large {c}.', + lambda c: f'a black and white photo of a {c}.', + lambda c: f'the plushie {c}.', + lambda c: f'a dark photo of a {c}.', + lambda c: f'itap of a {c}.', + lambda c: f'graffiti of the {c}.', + lambda c: f'a toy {c}.', + lambda c: f'itap of my {c}.', + lambda c: f'a photo of a cool {c}.', + lambda c: f'a photo of a small {c}.', + lambda c: f'a tattoo of the {c}.', +] diff --git a/nemo/collections/multimodal/data/common/__init__.py b/nemo/collections/multimodal/data/common/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/common/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/common/data_samplers.py b/nemo/collections/multimodal/data/common/data_samplers.py new file mode 100644 index 000000000000..1cfd3d046a76 --- /dev/null +++ b/nemo/collections/multimodal/data/common/data_samplers.py @@ -0,0 +1,133 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from multiprocessing import Value + +import torch +from webdataset.pytorch import IterableDataset +from nemo.utils import logging + + +class SharedEpoch: + def __init__(self, epoch: int = 0): + self.shared_epoch = Value('i', epoch) + + def set_value(self, epoch): + self.shared_epoch.value = epoch + + def get_value(self): + return self.shared_epoch.value + + +class WDSUrlsRandomSampler(IterableDataset): + def __init__( + self, + urls, + total_urls: int, + chunk_size: int, + consumed_samples: int, + data_parallel_rank: int, + data_parallel_size: int, + num_workers: int, + drop_last: bool, + data_sharding: bool, + ): + r"""Sampler for WebDataset Urls with data parallelism. + Args: + urls : The urls of the tar files from which to sample. + total_urls (int): Total number of urls in the dataset. + chunk_size (int): Number of objects per tar file. + consumed_samples (int): Number of samples consumed so far by the training process. + **Note samples here is not urls.** + data_parallel_rank (int): Rank of the current data parallel process. + data_parallel_size (int): Number of data parallel processes. + drop_last (bool): If True, drop the remaining urls if the number is smaller than `data_parallel_size`. + If False, pad the urls until its size is divisible by `data_parallel_size`. + data_sharding (bool): If True, use data sharding before data shuffling, i.e. only shuffle within the data parallel group. + """ + super().__init__() + self.urls = urls + self.total_urls = total_urls + self.chunk_size = chunk_size + + if consumed_samples % data_parallel_size == 0: + logging.warning("Multimodal data resuming will be approximate!") + self.consumed_urls = ( + consumed_samples // (data_parallel_size * num_workers) // chunk_size * (data_parallel_size * num_workers) + ) + self.consumed_samples = self.consumed_urls * chunk_size + + self.data_parallel_rank = data_parallel_rank + self.data_parallel_size = data_parallel_size + self.drop_last = drop_last + self.data_sharding = data_sharding + self.epoch = SharedEpoch() + + self.remaining_urls = self.total_urls % self.data_parallel_size + + def __len__(self): + if self.drop_last: + return self.total_urls // self.data_parallel_size + else: + return (self.total_urls + self.data_parallel_size - 1) // self.data_parallel_size + + def __iter__(self): + worker_id, num_workers = 0, 1 + worker_info = torch.utils.data.get_worker_info() + if worker_info is not None: + worker_id, num_workers = worker_info.id, worker_info.num_workers + + self.consumed_urls = ( + self.consumed_samples + // (self.data_parallel_size * num_workers) + // self.chunk_size + * (self.data_parallel_size * num_workers) + ) + + if self.drop_last or self.remaining_urls == 0: + active_total_urls = self.total_urls - self.remaining_urls + else: + active_total_urls = self.total_urls + self.data_parallel_size - self.remaining_urls + + self.epoch.set_value(self.consumed_urls // active_total_urls) + current_epoch_urls = self.consumed_urls % active_total_urls + + # data sharding and random sampling + if self.data_sharding: + bucket_size = active_total_urls // self.data_parallel_size + bucket_offset = current_epoch_urls // self.data_parallel_size + start_idx = self.data_parallel_rank * bucket_size + + g = torch.Generator() + g.manual_seed(self.epoch.get_value()) + random_idx = torch.randperm(bucket_size, generator=g).tolist() + idx_range = [start_idx + x for x in random_idx[bucket_offset:]] + else: + full_bucket_size = active_total_urls + full_bucket_offset = current_epoch_urls + g = torch.Generator() + g.manual_seed(self.epoch.get_value()) + idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_active = idx_range_total[full_bucket_offset:] + idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] + + # Use additional permutation to replace out-of-range indices when drop_last is False + additional_random_idx = torch.randperm(self.total_urls, generator=g).tolist() + for n, idx in enumerate(idx_range): + self.consumed_samples += self.data_parallel_size * self.chunk_size + if worker_info is not None and n % num_workers != worker_id: + continue + if idx < self.total_urls: + yield dict(url=self.urls[idx]) + else: + yield dict(url=self.urls[additional_random_idx[idx - self.total_urls]]) diff --git a/nemo/collections/multimodal/data/common/webdataset.py b/nemo/collections/multimodal/data/common/webdataset.py new file mode 100644 index 000000000000..785f834b34f9 --- /dev/null +++ b/nemo/collections/multimodal/data/common/webdataset.py @@ -0,0 +1,286 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import itertools +import json +import os +import pickle +import random +import re +from typing import Callable, List, Union + +import boto3 +import torch.distributed as dist +import webdataset as wds +from botocore.config import Config +from PIL import Image +from webdataset import WebDataset, warn_and_continue +from webdataset.filters import _shuffle +from webdataset.utils import pytorch_worker_info + +from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch, WDSUrlsRandomSampler +from nemo.collections.multimodal.data.common.webdataset_s3 import WebDataset as WebDatasetS3 +from nemo.core.classes import IterableDataset as NeMoIterableDataset +from nemo.utils import logging + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +Image.MAX_IMAGE_PIXELS = 933120000 +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() +from webdataset import warn_and_continue + + +class detshuffle2(wds.PipelineStage): + def __init__( + self, bufsize=1000, initial=100, seed=0, epoch=-1, + ): + self.bufsize = bufsize + self.initial = initial + self.seed = seed + self.epoch = epoch + + def run(self, src): + if isinstance(self.epoch, SharedEpoch): + epoch = self.epoch.get_value() + else: + # NOTE: this is epoch tracking is problematic in a multiprocess (dataloader workers or train) + # situation as different workers may wrap at different times (or not at all). + self.epoch += 1 + epoch = self.epoch + rng = random.Random() + # This seed to be deterministic AND the same across all nodes/workers in each epoch + if parallel_state.is_unitialized(): + seed = self.seed + epoch + else: + seed = self.seed + epoch + (100 * parallel_state.get_data_parallel_rank()) + rng.seed(seed) + return _shuffle(src, self.bufsize, self.initial, rng) + + +def pil_loader(key, data): + r""" + Function to load an image. + If the image is corrupt, it returns a black image. + Args: + key: Image key. + data: Image data stream. + """ + extension = re.sub(r".*[.]", "", key) + if extension.lower() not in _IMG_EXTENSIONS: + return None + + with io.BytesIO(data) as stream: + img = Image.open(stream) + img.load() + img = img.convert("RGB") + + return img + + +def get_world_size(): + r"""Get world size. How many GPUs are available in this job.""" + world_size = 1 + if dist.is_available(): + if dist.is_initialized(): + world_size = dist.get_world_size() + return world_size + + +class WebDatasetCommon(NeMoIterableDataset): + def __init__( + self, + dataset_cfg, + map_fn: Callable, + compose_fn: Union[Callable, List[Callable]], + consumed_samples: int, + filter_fn: Callable = None, + gen_cfg=None, + decode_fn: Callable = None, + is_train=True, + ): + + super().__init__() + self.dataset_cfg = dataset_cfg + self.num_workers = dataset_cfg.num_workers + self.world_size = get_world_size() + self.webdata_cfg = dataset_cfg.webdataset + self.infinite_sampler = self.webdata_cfg.get("infinite_sampler", False) + self.gen_cfg = gen_cfg + self.consumed_samples = consumed_samples + + self.local_root_path = self.webdata_cfg.local_root_path + if is_train: + dataset_path = dataset_cfg.train.dataset_path + self.augmentations = dataset_cfg.train.get("augmentations", None) + self.filterings = dataset_cfg.train.get("filterings", None) + else: + dataset_path = dataset_cfg.validation.dataset_path + self.augmentations = dataset_cfg.validation.get("augmentations", None) + self.filterings = dataset_cfg.validation.get("filterings", None) + + if "boto3" in dataset_cfg: + logging.info(f'Init boto3 using credentials file at {dataset_cfg.boto3.credentials_file}') + self.use_boto3 = True + assert dataset_cfg.boto3.credentials_file is not None + with open(dataset_cfg.boto3.credentials_file) as fin: + self.credentials = json.load(fin) + config = Config(connect_timeout=30, signature_version="s3", retries={"max_attempts": 999999}) + self.s3 = boto3.client('s3', **self.credentials, config=config) + self.bucket = dataset_cfg.boto3.bucket + self.local_root_path = "" + else: + logging.info(f'Read Webdataset locally. Data stores at {self.local_root_path}') + self.use_boto3 = False + self.s3 = None + self.bucket = None + + # wdinfo in a dict containing webdata information + self.wdinfo = dict() + if dataset_path[0].endswith(".pkl"): + for dset_info_path in dataset_path: + with open(dset_info_path, 'rb') as fp: + dset_info = pickle.load(fp) + if 'tar_files' not in self.wdinfo: + self.wdinfo['tar_files'] = dset_info['tar_files'] + self.wdinfo['total_key_count'] = dset_info['total_key_count'] + self.wdinfo['chunk_size'] = dset_info['chunk_size'] + else: + self.wdinfo['tar_files'].extend(dset_info['tar_files']) + self.wdinfo['total_key_count'] += dset_info['total_key_count'] + train_info = self.wdinfo + else: + train_info = self.wdinfo + train_info['tar_files'] = map(wds.shardlists.expand_urls, dataset_path) + train_info['tar_files'] = list(itertools.chain.from_iterable(train_info['tar_files'])) + train_info['chunk_size'] = self.webdata_cfg.get("chunk_size", 1000) + train_info['total_key_count'] = train_info['chunk_size'] * len(train_info['tar_files']) + + self.data_parallel_size = parallel_state.get_data_parallel_world_size() + chunk_size = train_info['chunk_size'] + + num_workers = dataset_cfg.get("num_workers") or 1 + self.consumed_urls = ( + consumed_samples + // (self.data_parallel_size * num_workers) + // chunk_size + * (self.data_parallel_size * num_workers) + ) + self.consumed_samples = self.consumed_urls * chunk_size + self.skip_ahead = consumed_samples - self.consumed_samples + + decode_fn = pil_loader if decode_fn is None else decode_fn + shards_train_list = train_info["tar_files"] + num_shards = len(shards_train_list) + assert num_shards > 0, "Did not find any training data." + + # Shuffle buffer: + shuffle_buffer_size = train_info["chunk_size"] + + if self.filterings is not None: + # TODO : Not a good way of estimating filtering (We expect user to give estimated portion) + # We should estimate in someway. This is anyway used only in progress bar + logging.info(f'Estimated {self.filterings.estimated_portion} will be remaining after filtering') + train_info["total_key_count"] = int(train_info["total_key_count"] * self.filterings.estimated_portion) + + # WDS Dataset Pipeline + # DetShuffle -> Decode -> Filter -> Map -> Compose + train_dataset, epoch = self._get_webdataset_and_epoch() + train_dataset = train_dataset.compose(detshuffle2(bufsize=shuffle_buffer_size, epoch=epoch)) + train_dataset = train_dataset.decode(decode_fn, handler=warn_and_continue) + + if self.filterings is not None: + if self.filterings.resolution is not None: + train_dataset = train_dataset.select(filter_fn) + + train_dataset = train_dataset.map(map_fn, handler=warn_and_continue) + if not isinstance(compose_fn, list): + compose_fn = [compose_fn] + for fn in compose_fn: + train_dataset = train_dataset.compose(fn) + train_dataset.total_images = train_info["total_key_count"] + + if train_info["total_key_count"] != train_info["chunk_size"] * len(train_info["tar_files"]): + logging.warning("Total image count is not equal to chunk_size * number of tar files.") + + if self.infinite_sampler: + rank, world_size, worker_id, num_workers = pytorch_worker_info() + nbatches = train_dataset.total_images // world_size // self.num_workers + logging.info(f'Setting nbatches={nbatches} for infinite sampler. world_size={world_size}') + train_dataset = train_dataset.with_epoch(nbatches=nbatches) + + logging.info("Total number of training shards: %d", num_shards) + logging.info("Total training key count: %d", train_dataset.total_images) + + self._dataset = train_dataset + + def _get_webdataset_and_epoch(self): + train_info = self.wdinfo + chunk_size = train_info["chunk_size"] + shards_train_list = train_info["tar_files"] + shards_train_list = [os.path.join(self.local_root_path, x) for x in shards_train_list] + epoch = 0 + + if not self.infinite_sampler: + logging.info(f'Initiating Webdataset Random Sampler..') + assert ( + self.filterings is None + ), 'Webdataset Random Sampler should not be used with filters. Switch to infinite sampler' + shards_train_list = WDSUrlsRandomSampler( + urls=shards_train_list, + total_urls=len(shards_train_list), + chunk_size=chunk_size, + consumed_samples=self.consumed_samples, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + num_workers=self.dataset_cfg.get("num_workers") or 1, + drop_last=True, + data_sharding=self.dataset_cfg.train.get("data_sharding", True), + ) + epoch = shards_train_list.epoch + + if self.use_boto3: + train_dataset = WebDatasetS3( + shards_train_list, + handler=warn_and_continue, + resampled=self.infinite_sampler or False, + load_from_object_store=self.use_boto3, + s3_client=self.s3, + s3_bucket_name=self.bucket, + ) + else: + train_dataset = WebDataset( + shards_train_list, handler=warn_and_continue, resampled=self.infinite_sampler or False, + ) + + return train_dataset, epoch + + def __iter__(self): + ds_iter = self._dataset.__iter__() + while self.skip_ahead > 0 and not self.infinite_sampler: + try: + _ = next(ds_iter) + self.skip_ahead -= self.data_parallel_size * self.num_workers + except StopIteration: + self.skip_ahead = 0 + return ds_iter + + def __len__(self): + return self._dataset.total_images diff --git a/nemo/collections/multimodal/data/common/webdataset_s3.py b/nemo/collections/multimodal/data/common/webdataset_s3.py new file mode 100644 index 000000000000..d1cc7c9278e3 --- /dev/null +++ b/nemo/collections/multimodal/data/common/webdataset_s3.py @@ -0,0 +1,237 @@ +import io +import os +import sys +from urllib.parse import urlparse + +import webdataset.gopen as gopen_webdata +import yaml +from webdataset import cache, filters, shardlists +from webdataset.compat import FluidInterface +from webdataset.handlers import reraise_exception +from webdataset.pipeline import DataPipeline +from webdataset.pytorch import IterableDataset +from webdataset.tariterators import group_by_keys, tar_file_expander + +# Number of attempts to read aws objects. +_NUM_OBJECT_STORE_READ_ATTEMPTS = 10 + + +def gopen(url, mode="rb", bufsize=8192, **kw): + r"""Open the URL. + This uses the `gopen_schemes` dispatch table to dispatch based + on scheme. + Support for the following schemes is built-in: pipe, file, + http, https, sftp, ftps, scp. + When no scheme is given the url is treated as a file. + You can use the OPEN_VERBOSE argument to get info about + files being opened. + + This implementation is based on webdataset's gopen, + with the modification of supporting reading from s3 object_store: + https://webdataset.github.io/webdataset/api/webdataset/gopen.html#gopen + Args: + url (list[str]): the source URL + mode (str): the mode ("rb", "r") + bufsize (int): the buffer size + """ + global fallback_gopen + verbose = int(os.environ.get("GOPEN_VERBOSE", 0)) + if verbose: + print("GOPEN", url, gopen_webdata.info, file=sys.stderr) + + assert mode in ["rb", "wb"], mode + if url == "-": + if mode == "rb": + return sys.stdin.buffer + elif mode == "wb": + return sys.stdout.buffer + else: + raise ValueError(f"unknown mode {mode}") + + # If we specify 'object_store' in keyword arguments, + # then we would load from AWS. + # In this case, you also need to specify s3_client and s3_bucket_name + # in arguments. + if 'object_store' in kw and kw['object_store']: + # Load from object store + attempt = 0 + + while attempt < _NUM_OBJECT_STORE_READ_ATTEMPTS: + try: + s3_response_object = kw['s3_client'].get_object(Bucket=kw['s3_bucket_name'], Key=url) + object_content = s3_response_object['Body'].read() + + # This is a check to verify is the object is fully read. + full_read = s3_response_object['ContentLength'] == len(object_content) + if full_read: + return io.BytesIO(object_content) + else: + attempt += 1 + except Exception as e: # noqa + # If there is an exception (usually connectivity error or protocol error), read again + attempt += 1 + print(e) + print('Retrying tar file download, attempt {}'.format(attempt)) + continue + raise ConnectionError('Unable to read {} from PBSS. {} attempts tried.'.format(url, attempt)) + + # Append root path to the url if dataset is stored on local disk system + elif 'local_root_path' in kw and kw['local_root_path'] is not None: + url = os.path.join(kw['local_root_path'], url) + + # For all other gopen schemes, use the native webdataset gopen functions. + pr = urlparse(url) + if pr.scheme == "": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(url, mode, buffering=bufsize) + if pr.scheme == "file": + bufsize = int(os.environ.get("GOPEN_BUFFER", -1)) + return open(pr.path, mode, buffering=bufsize) + handler = gopen_webdata.gopen_schemes["__default__"] + handler = gopen_webdata.gopen_schemes.get(pr.scheme, handler) + return handler(url, mode, bufsize, **kw) + + +def url_opener(data, handler=reraise_exception, **kw): + r"""Given a stream of url names (packaged in `dict(url=url)`), yield opened streams. + + Args: + data: Iterator of dictionaires containing url paths. + handler: Exception handler. + """ + for sample in data: + assert isinstance(sample, dict), sample + assert "url" in sample + url = sample["url"] + try: + stream = gopen(url, **kw) + sample.update(stream=stream) + yield sample + except Exception as exn: + exn.args = exn.args + (url,) + if handler(exn): + continue + else: + break + + +# Define a new tarfile_samples +def tarfile_samples( + src, + handler=reraise_exception, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, +): + r""" + Given an iterator of filenames, this function opens the URL streams + and groups data by keys. + + Args: + src: Iterator of data dictionaires containing URL names. + handler: Exception handler. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + streams = url_opener( + src, + handler=handler, + object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + files = tar_file_expander(streams, handler=handler) + samples = group_by_keys(files, handler=handler) + return samples + + +tarfile_to_samples = filters.pipelinefilter(tarfile_samples) + + +class WebDataset(DataPipeline, FluidInterface): + r"""Webdataset class modified to support loading from object store.""" + + def __init__( + self, + urls, + handler=reraise_exception, + resampled=False, + shardshuffle=None, + cache_size=-1, + cache_dir=None, + detshuffle=False, + nodesplitter=shardlists.single_node_only, + verbose=False, + load_from_object_store=False, + s3_client=None, + s3_bucket_name=None, + local_root_path=None, + ): + r""" + Args: + urls: An iterator containing a list of url names. + handler: Exception handler. + resampled: If true, sample shards from shard list with replacement. + shardshuffle: If true, shuffles the entire shard list. + cache_size: Size of cache. + cache_dir: Path to store cache. + detshuffle: Whether to use deterministic shuffling when shardshuffle is True. + nodesplitter: Function for splitting urls among nodes. + verbose: If True, prints logs. + load_from_object_store (bool): A boolean flag to specify whether to load from + object store. + s3_client: If loading from object store, specify S3 client. + s3_bucket_name: If loading from object store, specify S3 bucket name. + local_root_path: If loading from local (or mounted) disk system, + specify the root path of the dataset. + """ + super().__init__() + if isinstance(urls, IterableDataset): + assert not resampled + self.append(urls) + elif isinstance(urls, str) and (urls.endswith(".yaml") or urls.endswith(".yml")): + with (open(urls)) as stream: + spec = yaml.safe_load(stream) + assert "datasets" in spec + self.append(shardlists.MultiShardSample(spec)) + elif isinstance(urls, dict): + assert "datasets" in urls + self.append(shardlists.MultiShardSample(urls)) + elif resampled: + self.append(shardlists.ResampledShards(urls)) + else: + self.append(shardlists.SimpleShardList(urls)) + self.append(nodesplitter) + self.append(shardlists.split_by_worker) + if shardshuffle is True: + shardshuffle = 100 + if shardshuffle is not None: + if detshuffle: + self.append(filters.detshuffle(shardshuffle)) + else: + self.append(filters.shuffle(shardshuffle)) + if cache_dir is None or cache_size == 0: + self.append( + tarfile_to_samples( + handler=handler, + load_from_object_store=load_from_object_store, + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + local_root_path=local_root_path, + ) + ) + else: + + # We dont use cache. + assert cache_size == -1 or cache_size > 0 + self.append( + cache.cached_tarfile_to_samples( + handler=handler, verbose=verbose, cache_size=cache_size, cache_dir=cache_dir, + ) + ) diff --git a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py new file mode 100644 index 000000000000..301be555dad1 --- /dev/null +++ b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) + + +def build_train_valid_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0].permute(1, 2, 0) + out_dict['captions'] = input[1] + out_dict['hint'] = input[2].permute(1, 2, 0) + yield out_dict + + def transform_fn(sample): + + image, text, hint = sample["jpg"], sample["txt"], sample["png"] + # TODO : If no agumentations just return the image ? + img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) + text_transform = identical_transform + return img_transform(image), text_transform(text), img_transform(hint) + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data + + +def build_train_valid_precached_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) + out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) + yield out_dict + + def transform_fn(sample): + return sample['pickle'] + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data diff --git a/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py new file mode 100644 index 000000000000..e25dcfbecf6b --- /dev/null +++ b/nemo/collections/multimodal/data/dreambooth/dreambooth_dataset.py @@ -0,0 +1,148 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pickle +from pathlib import Path + +import torch +from PIL import Image +from pytorch_lightning.utilities import rank_zero_only +from torch.utils.data import Dataset +from torchvision import transforms +from tqdm import tqdm + + +class DreamBoothDataset(Dataset): + """ + A dataset to prepare the instance and class images with the prompts for fine-tuning the model. + It pre-processes the images and the tokenizes prompts. + """ + + def __init__( + self, + instance_data_root, + instance_prompt, + with_prior_preservation=False, + reg_data_root=None, + reg_prompt=None, + size=512, + center_crop=True, + repeat=10000, + load_cache_latents=False, + cached_instance_data_root=None, + cached_reg_data_root=None, + vae=None, + text_encoder=None, + ): + self.size = size + self.center_crop = center_crop + + assert instance_data_root or cached_instance_data_root, "must provide instance images to start training." + self.instance_data_root = Path(instance_data_root) + self.cached_instance_data_root = cached_instance_data_root + self.cached_reg_data_root = cached_reg_data_root + + self.instance_images_path = list(Path(instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + self.instance_prompt = instance_prompt + self._length = self.num_instance_images * repeat + self.load_cache_latents = load_cache_latents + self.with_prior_preservation = with_prior_preservation + + if reg_data_root is not None: + self.reg_data_root = Path(reg_data_root) + self.reg_images_path = list(self.reg_data_root.iterdir()) + self.num_reg_images = len(self.reg_images_path) + self.reg_prompt = reg_prompt + else: + self.reg_data_root = None + + self.image_transforms = transforms.Compose( + [ + transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR), + transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size), + transforms.ToTensor(), + transforms.Normalize([0.5], [0.5]), + ] + ) + + if self.load_cache_latents: + if (self.cached_instance_data_root is None) or ( + self.with_prior_preservation and self.cached_reg_data_root is None + ): + self.cache_latents(vae, text_encoder) + + self.cached_instance_data_root = f'{self.instance_data_root}_cached' + self.cached_reg_data_root = f'{self.reg_data_root}_cached' + self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + + if self.with_prior_preservation: + self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) + self.num_reg_images = len(self.reg_images_path) + + if self.cached_instance_data_root: + self.instance_images_path = list(Path(self.cached_instance_data_root).iterdir()) + self.num_instance_images = len(self.instance_images_path) + if self.with_prior_preservation and self.cached_reg_data_root: + self.reg_images_path = list(Path(self.cached_reg_data_root).iterdir()) + self.num_reg_images = len(self.reg_images_path) + + def __len__(self): + return self._length + + def get_image(self, path): + image = Image.open(path) + if not image.mode == "RGB": + image = image.convert("RGB") + image = self.image_transforms(image) + return image + + def __getitem__(self, index): + example = {} + if self.load_cache_latents: + example["instance_images"] = torch.load(self.instance_images_path[index % self.num_instance_images]) + else: + example["instance_images"] = self.get_image(self.instance_images_path[index % self.num_instance_images]) + example["instance_prompt"] = self.instance_prompt + + if self.reg_data_root: + if self.load_cache_latents: + example["reg_images"] = torch.load(self.reg_images_path[index % self.num_reg_images]) + else: + example["reg_images"] = self.get_image(self.reg_images_path[index % self.num_reg_images]) + example["reg_prompt"] = self.reg_prompt + + return example + + @rank_zero_only + def cache_latents(self, vae, text_encoder): + os.makedirs(f'{self.instance_data_root}_cached', exist_ok=True) + self.cached_instance_data_root = f'{self.instance_data_root}_cached' + self.cached_reg_data_root = f'{self.reg_data_root}_cached' + if self.instance_data_root and (len(os.listdir(self.cached_instance_data_root)) < self.num_instance_images): + for i in tqdm(range(self.num_instance_images)): + x = torch.Tensor(self.get_image(self.instance_images_path[i % self.num_instance_images])) + x = torch.unsqueeze(x, dim=0) + params = vae.encode(x).parameters.squeeze(dim=0) + torch.save(params, f'{self.instance_data_root}_cached/instance_image_cache_{i}.pt') + + if self.with_prior_preservation: + os.makedirs(f'{self.reg_data_root}_cached', exist_ok=True) + if self.reg_data_root and (len(os.listdir(self.cached_reg_data_root)) < self.num_reg_images): + for i in tqdm(range(self.num_reg_images)): + x = torch.Tensor(self.get_image(self.reg_images_path[i % self.num_reg_images])) + x = torch.unsqueeze(x, dim=0) + params = vae.encode(x).parameters.squeeze(dim=0) + torch.save(params, f'{self.reg_data_root}_cached/reg_image_cache_{i}.pt') diff --git a/nemo/collections/multimodal/data/imagen/__init__.py b/nemo/collections/multimodal/data/imagen/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/__init__.py b/nemo/collections/multimodal/data/imagen/augmentations/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py b/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py new file mode 100644 index 000000000000..23f481bc8720 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/augmentations.py @@ -0,0 +1,76 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +import torch + +from nemo.utils import logging + + +def build_resolution_filter(value=None, method='larger', image_idx=0): + """ + Filter image based on its resolution. + value: filter threshold + method: Either larger or smaller + image_idx: idx of the image in the tuple input + """ + assert method == 'larger' or method == 'smaller' + if method == 'larger': + logging.info(f'Only Selecting images with resolution >= {value}') + return lambda x: x[image_idx].size[0] >= value and x[image_idx].size[1] >= value + + logging.info(f'Only Selecting images with resolution <= {value}') + return lambda x: x[image_idx].size[0] <= value and x[image_idx].size[1] <= value + + +class PickleTransform: + """ + Convert encodings stored in the pickle file to encoding and mask. + Transform the pad and resize the embedding to match the generator config. + """ + + def __init__(self, encoding_lengths: List[int], encoding_keys: List[str], out_keys: Optional[List[str]] = None): + assert len(encoding_keys) == len(encoding_lengths) + self.encoding_lengths = encoding_lengths + self.encoding_keys = encoding_keys + self.out_keys = out_keys if out_keys is not None else encoding_keys + + def _pad_and_resize(self, arr, ntokens): + # Function for padding and resizing a numpy array + + arr = torch.tensor(arr) + embed_dim = arr.shape[1] + + arr_padded = torch.zeros(ntokens, embed_dim, device=arr.device, dtype=torch.float32) + + # If the input text is larger than num_text_tokens, clip it. + if arr.shape[0] > ntokens: + arr = arr[0:ntokens] + + mask = torch.LongTensor(ntokens).zero_() + if len(arr.shape) > 1: + mask[0 : arr.shape[0]] = 1 + + if len(arr.shape) > 1: + arr_padded[0 : arr.shape[0]] = arr + + return arr_padded, mask + + def __call__(self, data): + out_dict = dict() + for token_length, encoding_key, out_key in zip(self.encoding_lengths, self.encoding_keys, self.out_keys): + embed, mask = self._pad_and_resize(data[encoding_key]['encodings'], token_length) + out_dict[f'{out_key}_embeddings'] = embed + out_dict[f'{out_key}_mask'] = mask + return out_dict diff --git a/nemo/collections/multimodal/data/imagen/augmentations/corruption.py b/nemo/collections/multimodal/data/imagen/augmentations/corruption.py new file mode 100644 index 000000000000..6c17066fd285 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/augmentations/corruption.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torchvision.transforms.functional as torchvision_F + + +class ImagePyramidNoCorruptions: + r""" + Only downsample image without any additional corruption. + """ + + def __init__(self, target_resolutions): + self.resolutions = target_resolutions + + def obtain_image_pyramid(self, image): + # Downsampling + data_dict = dict() + for res in self.resolutions: + image_downsampled = torchvision_F.resize( + image, res, interpolation=torchvision_F.InterpolationMode.BICUBIC, antialias=True + ) + data_dict[f'images_{res}'] = image_downsampled + return data_dict diff --git a/nemo/collections/multimodal/data/imagen/imagen_dataset.py b/nemo/collections/multimodal/data/imagen/imagen_dataset.py new file mode 100644 index 000000000000..c3db3b3a4612 --- /dev/null +++ b/nemo/collections/multimodal/data/imagen/imagen_dataset.py @@ -0,0 +1,156 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.imagen.augmentations.augmentations import ( + PickleTransform, + build_resolution_filter, +) +from nemo.collections.multimodal.data.imagen.augmentations.corruption import ImagePyramidNoCorruptions +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) +from nemo.core.classes import Dataset as NeMoDataset +from nemo.utils import logging + + +class ImagenSyntheticDataset(NeMoDataset): + def __init__( + self, res, conditioning_cfg, fake_len=100000, no_embedding=False, + ): + super().__init__() + self.fake_len = fake_len + self.res = res + self.no_embedding = no_embedding + if not no_embedding: + self.out_key = conditioning_cfg.out_key if conditioning_cfg.out_key else conditioning_cfg.precached_key + self.token_length = conditioning_cfg.token_length + self.embed_dim = conditioning_cfg.embed_dim + + def __getitem__(self, index): + item = {} + if isinstance(self.res, list): + for resolution in self.res: + image_key = f'images_{resolution}' + item[image_key] = torch.randn(3, resolution, resolution) + else: + item['images'] = torch.randn(3, self.res, self.res) + + item['raw_text'] = f'fake text {index}' + if not self.no_embedding: + item[f'{self.out_key}_embeddings'] = torch.randn(self.token_length, self.embed_dim) + item[f'{self.out_key}_mask'] = torch.ones(self.token_length, dtype=torch.long) + return item + + def __len__(self): + return self.fake_len + + +def _build_functions_with_pickles(data_cfg, condition_cfg): + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + + # Output from pickle transform is already a dictionary + out_dict.update(input[1]) + + out_dict['raw_text'] = input[2] + yield out_dict + + def transform_fn(sample): + image, encodings, text = sample['jpg'], sample['pickle'], sample['txt'] + img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) + pickle_transform = PickleTransform( + encoding_keys=[condition_cfg.precached_key], + encoding_lengths=[condition_cfg.token_length], + out_keys=[condition_cfg.out_key], + ) + text_transform = identical_transform + return img_transform(image), pickle_transform(encodings), text_transform(text) + + return tuple_to_dict, transform_fn + + +def _build_functions_no_pickles(data_cfg): + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict['images'] = input[0] + out_dict['raw_text'] = input[1] + yield out_dict + + def transform_fn(sample): + image, text = sample['jpg'], sample['txt'] + img_transform = construct_image_augmentations(data_cfg.train.get('augmentations'), normalize=True) + text_transform = identical_transform + return img_transform(image), text_transform(text) + + return tuple_to_dict, transform_fn + + +def build_train_valid_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + condition_cfg = model_cfg.conditioning + + if data_cfg.get('synthetic_data', False): + logging.info(f'Creating Synthetic Datasaet.') + train_data = ImagenSyntheticDataset( + res=data_cfg.train.get('target_resolutions', 64), + conditioning_cfg=condition_cfg, + fake_len=data_cfg.get('synthetic_data_length', 10000), + no_embedding=condition_cfg.get("online_encoding", False), + ) + return train_data, None + # This function maps data that are tuples to dictionary. + if condition_cfg.get("online_encoding", False): + tuple_to_dict, transform_fn = _build_functions_no_pickles(data_cfg) + else: + tuple_to_dict, transform_fn = _build_functions_with_pickles(data_cfg, condition_cfg) + + filter_cfg = data_cfg.train.get('filterings', None) + + # For adding corruptions and obtaining image pyramid + if model_cfg.unet_type.startswith('sr'): + assert data_cfg.train.get('target_resolutions'), 'SR model requires multiple resolution for training' + logging.info(f'Resizing input images into the follow resolutions: {data_cfg.train.target_resolutions}') + corruption_gen = ImagePyramidNoCorruptions(target_resolutions=data_cfg.train.target_resolutions) + else: + corruption_gen = None + + # This function is used for obtaining image pyramid + # in SR models for Imagen, we need to use low-res image as conditioning. + def obtain_image_pyramid(inp): + for data_dict in inp: + data_pyramid = corruption_gen.obtain_image_pyramid(data_dict['images']) + data_dict.update(data_pyramid) + yield data_dict + + compose_fn = [tuple_to_dict] + if corruption_gen: + compose_fn.append(obtain_image_pyramid) + + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=compose_fn, + filter_fn=build_resolution_filter(**filter_cfg.resolution, image_idx='jpg') if filter_cfg else None, + is_train=True, + ) + return train_data, None diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/instruct_pix2pix/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py new file mode 100644 index 000000000000..37f4518528a9 --- /dev/null +++ b/nemo/collections/multimodal/data/instruct_pix2pix/edit_dataset.py @@ -0,0 +1,132 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import json +import math +from pathlib import Path +from typing import Any + +import numpy as np +import torch +import torchvision +from einops import rearrange +from PIL import Image +from torch.utils.data import Dataset + +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import construct_image_augmentations + + +class EditDataset(Dataset): + def __init__( + self, + path: str, + split: str = "train", + splits: tuple[float, float, float] = (0.95, 0.04, 0.01), + min_resize_res: int = 256, + max_resize_res: int = 256, + crop_res: int = 256, + flip_prob: float = 0.0, + ): + assert split in ("train", "val", "test") + assert sum(splits) == 1 + self.path = path + self.min_resize_res = min_resize_res + self.max_resize_res = max_resize_res + self.crop_res = crop_res + self.flip_prob = flip_prob + + with open(Path(self.path, "seeds.json")) as f: + self.seeds = json.load(f) + + split_0, split_1 = { + "train": (0.0, splits[0]), + "val": (splits[0], splits[0] + splits[1]), + "test": (splits[0] + splits[1], 1.0), + }[split] + + idx_0 = math.floor(split_0 * len(self.seeds)) + idx_1 = math.floor(split_1 * len(self.seeds)) + self.seeds = self.seeds[idx_0:idx_1] + + def __len__(self) -> int: + return len(self.seeds) + + def __getitem__(self, i: int) -> dict[str, Any]: + name, seeds = self.seeds[i] + propt_dir = Path(self.path, name) + seed = seeds[torch.randint(0, len(seeds), ()).item()] + with open(propt_dir.joinpath("prompt.json")) as fp: + prompt = json.load(fp)["edit"] + + image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) + image_1 = Image.open(propt_dir.joinpath(f"{seed}_1.jpg")) + + resize_res = torch.randint(self.min_resize_res, self.max_resize_res + 1, ()).item() + image_0 = image_0.resize((resize_res, resize_res), Image.Resampling.LANCZOS) + image_1 = image_1.resize((resize_res, resize_res), Image.Resampling.LANCZOS) + + image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") + image_1 = rearrange(2 * torch.tensor(np.array(image_1)).float() / 255 - 1, "h w c -> c h w") + + crop = torchvision.transforms.RandomCrop(self.crop_res) + flip = torchvision.transforms.RandomHorizontalFlip(float(self.flip_prob)) + image_0, image_1 = flip(crop(torch.cat((image_0, image_1)))).chunk(2) + + return dict(edited=image_1, edit=dict(c_concat=image_0, c_crossattn=prompt)) + + +class EditDatasetEval(Dataset): + def __init__( + self, path: str, split: str = "train", splits: tuple[float, float, float] = (0.9, 0.05, 0.05), res: int = 256, + ): + assert split in ("train", "val", "test") + assert sum(splits) == 1 + self.path = path + self.res = res + + with open(Path(self.path, "seeds.json")) as f: + self.seeds = json.load(f) + + split_0, split_1 = { + "train": (0.0, splits[0]), + "val": (splits[0], splits[0] + splits[1]), + "test": (splits[0] + splits[1], 1.0), + }[split] + + idx_0 = math.floor(split_0 * len(self.seeds)) + idx_1 = math.floor(split_1 * len(self.seeds)) + self.seeds = self.seeds[idx_0:idx_1] + + def __len__(self) -> int: + return len(self.seeds) + + def __getitem__(self, i: int) -> dict[str, Any]: + name, seeds = self.seeds[i] + propt_dir = Path(self.path, name) + seed = seeds[torch.randint(0, len(seeds), ()).item()] + with open(propt_dir.joinpath("prompt.json")) as fp: + prompt = json.load(fp) + edit = prompt["edit"] + input_prompt = prompt["input"] + output_prompt = prompt["output"] + + image_0 = Image.open(propt_dir.joinpath(f"{seed}_0.jpg")) + + reize_res = torch.randint(self.res, self.res + 1, ()).item() + image_0 = image_0.resize((reize_res, reize_res), Image.Resampling.LANCZOS) + + image_0 = rearrange(2 * torch.tensor(np.array(image_0)).float() / 255 - 1, "h w c -> c h w") + + return dict(image_0=image_0, input_prompt=input_prompt, edit=edit, output_prompt=output_prompt) diff --git a/nemo/collections/multimodal/data/kosmos/__init__.py b/nemo/collections/multimodal/data/kosmos/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py b/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py new file mode 100644 index 000000000000..78b736ca36b8 --- /dev/null +++ b/nemo/collections/multimodal/data/kosmos/kosmos_dataset.py @@ -0,0 +1,366 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import json +import re +from functools import partial +from typing import Any, Dict, List, Optional, Union + +import torch +from einops import rearrange +from PIL import Image +from torch.utils.data import Dataset, default_collate + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform +from nemo.collections.multimodal.data.clip.imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import _create_ltor_masks_and_position_ids +from nemo.collections.vision.data.megatron.image_folder import ImageFolder +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +MIN_KB = 10 +MAX_NUM_IMAGES = 6 +Image.MAX_IMAGE_PIXELS = 933120000 +_IMG_EXTENSIONS = "jpg jpeg png ppm pgm pbm pnm".split() +_DATASET_TYPES = ["image_caption", "image_interleaved"] + + +def pil_loader(key, data): + r""" + Function to load an image. + If the image is corrupt, it returns a black image. + Args: + key: Image key. + data: Image data stream. + """ + extension = re.sub(r".*[.]", "", key) + if extension.lower() not in _IMG_EXTENSIONS: + return None + if len(data) // 1000 <= MIN_KB: + return None + + with io.BytesIO(data) as stream: + img = Image.open(stream) + img.load() + img = img.convert("RGB") + + return img + + +def tokenize_and_insert_media_tokens( + texts: Union[str, List[str]], + tokenizer: Any, + context_length: int, + num_media_tokens: int, + add_extra_token: int, + media_start_id: str, + media_end_id: str, +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) with media tokens inserted. + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize. + tokenizer : Any + A tokenizer to be used for tokenization. + context_length : int + The context length to be used for the output tensor. + num_media_tokens : int + The number of media latents to insert between media tokens. + + Returns + ------- + torch.LongTensor + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. + """ + assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." + + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + + # bos token is never used + # bos_id = tokenizer.bos_id + eos_id = tokenizer.eos_id + + all_tokens = [] + for text in texts: + tokens = tokenizer.text_to_ids(text) + media_positions = [i for i, x in enumerate(tokens) if x == media_start_id] + for media_pos in media_positions[::-1]: + tokens[media_pos : media_pos + 1] = [media_start_id] + [-1] * num_media_tokens + [media_end_id] + tokens = tokens + [eos_id] + all_tokens.append(tokens) + + # truncate and padding + result = torch.zeros(len(all_tokens), context_length + add_extra_token, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length + add_extra_token: + tokens = tokens[: context_length + add_extra_token] # Truncate + result[i, : len(tokens)] = torch.tensor(tokens) + + if texts_is_str: + result = result[0] + return result + + +def get_preprocess_fns( + model_cfg, data_type, tokenizer=None, is_train=True, add_extra_token=1, media_start_id=None, media_end_id=None, +): + assert ( + media_start_id is not None and media_end_id is not None + ), "`media_start_id` and `media_end_id` should be provided." + + # Define transforms + img_size = (model_cfg.vision.get("img_h"), model_cfg.vision.get("img_w")) + img_mean = model_cfg.vision.get("img_mean") + img_std = model_cfg.vision.get("img_std") + img_transform = image_transform(img_size, is_train=is_train, mean=img_mean, std=img_std,) + + text_transform = lambda x: x + if tokenizer is not None: + text_transform = partial( + tokenize_and_insert_media_tokens, + tokenizer=tokenizer, + context_length=model_cfg.per_type_sequence_length[data_type], + num_media_tokens=model_cfg.num_media_latents, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + else: + raise ValueError("tokenizer should not be None here!") + + return img_transform, text_transform + + +def transform_fn_for_image_caption(sample, img_transform, text_transform, media_start_token): + image, text = sample["jpg"], sample["txt"] + caption_template = lambda x: f"{media_start_token}{x.strip()}" + text = caption_template(text) + return img_transform(image), text_transform(text) + + +def transform_fn_for_image_interleaved(sample, img_transform, text_transform, media_start_token, sim_threshold=0.3): + info = sample["json"] + sentences = info["text_list"] + + images, sentence_ixs = [], [] + for sample_image in info["image_info"]: + image = sample[sample_image["image_name"]] + # filter to images >= 10KB + if isinstance(image, bytes): + continue + if sample_image["matched_sim"] < sim_threshold: + continue + + images.append(image) + sentence_ixs.append(sample_image["matched_text_index"]) + + if len(images) == 0: + raise ValueError("No images in sample") + + keep_ixs = min(len(images), MAX_NUM_IMAGES) + images = images[:keep_ixs] + sentence_ixs = sentence_ixs[:keep_ixs] + + def interleaved_template(sentences, sentence_ixs): + for ix in sentence_ixs: + sentences[ix] = f"{media_start_token}{sentences[ix]}" + text = " ".join(sentences) + return text + + text = interleaved_template(sentences, sentence_ixs) + images_tensors = torch.stack([img_transform(image) for image in images]) + image_size = images_tensors.shape[1:] + if len(images_tensors) < MAX_NUM_IMAGES: + zero_padding = torch.zeros((MAX_NUM_IMAGES - len(images_tensors), *image_size), dtype=torch.float) + images_tensors = torch.cat((images_tensors, zero_padding), dim=0) + + return images_tensors, text_transform(text) + + +def compose_batch(inp, model_cfg, tokenizer, add_extra_token, media_start_id, media_end_id, newline_id): + pad_id = tokenizer.pad_id + for input in inp: + media = input[0] + + # vision_x should be of shape (b, T_img, F, C, H, W) + if len(media.shape) == 3: # image_caption + media = rearrange(media, "c h w -> 1 1 c h w") + elif len(media.shape) == 4: # image_interleaved + media = rearrange(media, "T c h w -> T 1 c h w") + else: + raise ValueError(f"Media shape length is not expected: {media.shape}.") + + tokens = input[1] + if add_extra_token: + tokens = input[1][:-1].contiguous() + labels = input[1][1:].contiguous().clone().detach() + else: + labels = torch.roll(tokens, shifts=-1, dims=0) + labels[-1] = -1 + + labels[labels == media_start_id] = newline_id + labels[labels == media_end_id] = -1 + labels[labels == pad_id] = -1 + + attention_mask, loss_mask, position_ids = _create_ltor_masks_and_position_ids( + tokens=tokens, + eod_token=tokenizer.eos_id, + eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), + reset_attention_mask=False, + reset_position_ids=False, + ) + + loss_mask[labels == -1] = 0.0 + tokens[tokens == -1] = 0 + labels[labels == -1] = 0 + + yield { + 'tokens': tokens, + 'labels': labels, + 'attention_mask': attention_mask, + 'loss_mask': loss_mask, + 'position_ids': position_ids, + 'media': media, + } + + +def build_train_valid_datasets( + model_cfg, consumed_samples, tokenizer=None, data_type='image_caption', +): + assert data_type in _DATASET_TYPES, f"`data_type={data_type}` is not available: {_DATASET_TYPES}." + + media_start_token = model_cfg.media_start_token + media_end_token = model_cfg.media_end_token + assert ( + media_start_token in tokenizer.vocab and media_end_token in tokenizer.vocab + ), f"Cannot find media tokens in tokenizer vocab: {media_start_token} {media_end_token}" + media_start_id = tokenizer.token_to_id(media_start_token) + media_end_id = tokenizer.token_to_id(media_end_token) + newline_id = tokenizer.text_to_ids("\n")[-1] + + data_cfg = model_cfg.data.get(data_type) + + no_seqlen_plus_one_input_tokens = model_cfg.data.get('no_seqlen_plus_one_input_tokens', False) + add_extra_token = 0 if no_seqlen_plus_one_input_tokens else 1 + + compose_fn = compose_batch + if data_type == 'image_caption': + transform_fn = transform_fn_for_image_caption + elif data_type == 'image_interleaved': + transform_fn = transform_fn_for_image_interleaved + + train_img_transform, text_transform = get_preprocess_fns( + model_cfg, + data_type=data_type, + tokenizer=tokenizer, + is_train=True, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + decode_fn=pil_loader if data_type == 'interleaved' else None, + map_fn=partial( + transform_fn, + img_transform=train_img_transform, + text_transform=text_transform, + media_start_token=media_start_token, + ), + compose_fn=partial( + compose_fn, + model_cfg=model_cfg, + tokenizer=tokenizer, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + newline_id=newline_id, + ), + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("dataset_path"): + val_img_transform, text_transform = get_preprocess_fns( + model_cfg, + data_type=data_type, + tokenizer=tokenizer, + is_train=False, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + ) + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=0, + decode_fn=pil_loader if data_type == 'interleaved' else None, + map_fn=partial( + transform_fn, + img_transform=train_img_transform, + text_transform=text_transform, + media_start_token=media_start_token, + ), + compose_fn=partial( + compose_fn, + model_cfg=model_cfg, + tokenizer=tokenizer, + add_extra_token=add_extra_token, + media_start_id=media_start_id, + media_end_id=media_end_id, + newline_id=newline_id, + ), + is_train=False, + ) + + return train_data, val_data + + +class MergedKosmosDataLoader: + def __init__(self, dataloaders): + self.dataloaders = dataloaders + self.dataloader_iters = {type: iter(dataloader) for type, dataloader in dataloaders.items()} + self.lengths = {type: len(dataloader) for type, dataloader in dataloaders.items()} + self.min_length = min(self.lengths.values()) + + def __iter__(self): + while True: + try: + batch = {type: next(iter) for type, iter in self.dataloader_iters.items()} + except StopIteration: + return + yield batch + + def __len__(self): + return self.min_length diff --git a/nemo/collections/multimodal/data/nerf/__init__.py b/nemo/collections/multimodal/data/nerf/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/nerf/cameras.py b/nemo/collections/multimodal/data/nerf/cameras.py new file mode 100644 index 000000000000..c1496b7eeaa3 --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/cameras.py @@ -0,0 +1,178 @@ +from abc import ABC, abstractmethod +from typing import List + +import numpy as np +import torch + + +class Camera(ABC): + """ + Abstract base class for Camera models. + """ + + def __init__(self, width: int, height: int, device: torch.device = 'cuda') -> None: + """ + Initializes the Camera instance with given dimensions and device. + + Parameters: + width: int - Width of the camera frame. + height: int - Height of the camera frame. + device: torch.device - The device where tensor computations will be performed. + """ + self.width = width + self.height = height + self.device = device + + @abstractmethod + def compute_intrinsics(self) -> None: + """ + Abstract method to compute camera intrinsics. + """ + pass + + @abstractmethod + def compute_projection_matrix(self) -> None: + """ + Abstract method to compute the projection matrix. + """ + pass + + +class OrthographicCamera(Camera): + """ + Class for Orthographic Camera models. + """ + + def compute_projection_matrix(self) -> torch.Tensor: + """ + Computes the projection matrix for an Orthographic camera. + + Returns: + torch.Tensor: The projection matrix. + """ + projection = torch.tensor( + [[2 / self.width, 0, 0, 0], [0, -2 / self.height, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + dtype=torch.float32, + device=self.device, + ).unsqueeze(0) + return projection + + +class PinholeCamera(Camera): + """ + Class for Pinhole Camera models. + """ + + def __init__(self, width: int, height: int, near: float, far: float, device: torch.device = 'cuda') -> None: + """ + Initializes the Pinhole Camera instance with given parameters. + + Parameters: + width: int - Width of the camera frame. + height: int - Height of the camera frame. + near: float - Near clipping plane. + far: float - Far clipping plane. + device: torch.device - The device where tensor computations will be performed. + """ + super().__init__(width, height, device) + self.near = near + self.far = far + + def compute_intrinsics(self, fovx: float, fovy: float) -> np.ndarray: + """ + Computes the intrinsic matrix for the camera based on field of views. + + Parameters: + fovx: float - Field of view in X direction. + fovy: float - Field of view in Y direction. + + Returns: + np.ndarray: The intrinsic matrix. + """ + focal_x = self.width / (2 * np.tan(np.deg2rad(fovx) / 2)) + focal_y = self.height / (2 * np.tan(np.deg2rad(fovy) / 2)) + cx, cy = self.width / 2, self.height / 2 + return np.array([focal_x, focal_y, cx, cy]) + + def compute_projection_matrix(self, focal_x: float, focal_y: float) -> torch.Tensor: + """ + Computes the projection matrix for the camera. + + Parameters: + focal_x: float - Focal length in X direction. + focal_y: float - Focal length in Y direction. + + Returns: + torch.Tensor: The projection matrix. + """ + projection = torch.tensor( + [ + [2 * focal_x / self.width, 0, 0, 0], + [0, -2 * focal_y / self.height, 0, 0], + [ + 0, + 0, + -(self.far + self.near) / (self.far - self.near), + -(2 * self.far * self.near) / (self.far - self.near), + ], + [0, 0, -1, 0], + ], + dtype=torch.float32, + device=self.device, + ).unsqueeze(0) + return projection + + +class CubeCamera(Camera): + """ + Class for Cube Camera models, which is essentially six pinhole cameras. + """ + + def __init__( + self, width: int, height: int, near: float = 0.01, far: float = 1000, device: torch.device = 'cuda' + ) -> None: + """ + Initializes the Cube Camera instance with given parameters. + + Parameters: + width: int - Width of each camera face. + height: int - Height of each camera face. + near: float - Near clipping plane. + far: float - Far clipping plane. + device: torch.device - The device where tensor computations will be performed. + """ + self.width = width + self.height = height + self.near = near + self.far = far + self.device = device + + def compute_intrinsics(self) -> List[np.ndarray]: + """ + Computes the intrinsic matrices for the six faces of the cube using a Pinhole camera model. + + Returns: + List[np.ndarray]: List of 6 intrinsic matrices, one for each face. + """ + # Similar to Pinhole but repeated six times for six faces of the cube + return [ + PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ).compute_intrinsics(90, 90) + for _ in range(6) + ] + + def compute_projection_matrix(self) -> List[torch.Tensor]: + """ + Computes the projection matrices for the six faces of the cube using a Pinhole camera model. + + Returns: + List[torch.Tensor]: List of 6 projection matrices, one for each face. + """ + # Similar to Pinhole but repeated six times for six faces of the cube + return [ + PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ).compute_projection_matrix(1, 1) + for _ in range(6) + ] diff --git a/nemo/collections/multimodal/data/nerf/circle_poses.py b/nemo/collections/multimodal/data/nerf/circle_poses.py new file mode 100644 index 000000000000..9c833ad94bab --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/circle_poses.py @@ -0,0 +1,214 @@ +from typing import Dict, Union + +import numpy as np +import torch +from torch.utils.data import Dataset + +from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera +from nemo.collections.multimodal.data.nerf.utils import ( + compute_look_at_vectors, + construct_poses, + get_rays, + get_view_direction, +) + + +def circle_poses( + radius: torch.Tensor = torch.tensor([3.2]), + theta: torch.Tensor = torch.tensor([60]), + phi: torch.Tensor = torch.tensor([0]), + angle_overhead: float = 30, + angle_front: float = 60, + return_dirs: bool = False, + device: torch.device = "cuda", +) -> torch.Tensor: + """ + Generate camera poses based on a circular arrangement. + + Parameters: + radius: torch.Tensor - Radii for the camera positions. + theta: torch.Tensor - Theta angles for the camera positions. + phi: torch.Tensor - Phi angles for the camera positions. + angle_overhead: float - Angle range of the overhead view. + angle_front: float - Angle range of the front view. + return_dirs: bool - Whether to return the view directions. + device: str - The device to allocate the tensor on (e.g., 'cuda' or 'cpu'). + + Returns: + Tuple: Contains the following: + - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. + - dirs (torch.Tensor, optional): View directions, if requested. + """ + # Convert degrees to radians for theta and phi + theta = theta / 180 * np.pi + phi = phi / 180 * np.pi + angle_overhead = angle_overhead / 180 * np.pi + angle_front = angle_front / 180 * np.pi + + # Calculate camera centers in Cartesian coordinates + centers = torch.stack( + [ + radius * torch.sin(theta) * torch.sin(phi), + radius * torch.cos(theta), + radius * torch.sin(theta) * torch.cos(phi), + ], + dim=-1, + ) # [B, 3] + + # Compute camera look-at matrix + forward_vector, up_vector, right_vector = compute_look_at_vectors(centers=centers, device=device) + + # Construct the 4x4 pose matrices + poses = construct_poses( + centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device + ) + + dirs = get_view_direction(theta, phi, angle_overhead, angle_front) if return_dirs else None + + return poses, dirs + + +class CirclePosesDataset(Dataset): + """ + A dataset class to generate circle poses. + """ + + def __init__( + self, + size: int = 100, + height: int = 256, + width: int = 256, + default_fovx: float = 20.0, + default_fovy: float = 20.0, + default_radius: float = 3.2, + default_polar: float = 90.0, + default_azimuth: float = 0.0, + angle_overhead: float = 30.0, + angle_front: float = 60.0, + near: float = 0.01, + far: float = 1000.0, + device: torch.device = 'cpu', + ) -> None: + """ + Initializes a new CirclePosesDataset instance. + + Parameters: + size (int): Number of samples in the dataset. + height (int): Height of the image. + width (int): Width of the image. + default_fovx (float): Default field of view in x-direction. + default_fovy (float): Default field of view in y-direction. + default_radius (float): Default radius of the circle. + default_polar (float): Default polar angle. + default_azimuth (float): Default azimuth angle. + angle_overhead (float): Overhead angle. + angle_front (float): Frontal angle. + near (float): Near clipping distance. + far (float): Far clipping distance. + device (torch.device): Device to generate data on. + """ + super().__init__() + self.size = size + self.height = height + self.width = width + + self.default_fovx = default_fovx + self.default_fovy = default_fovy + self.default_radius = default_radius + self.default_polar = default_polar + self.default_azimuth = default_azimuth + + self.angle_overhead = angle_overhead + self.angle_front = angle_front + self.near = near + self.far = far + + self.device = device + + # TODO(ahmadki): make camera type a parameter + self.camera = PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ) + + def __len__(self) -> int: + """Returns the number of samples in the dataset.""" + return self.size + + def __getitem__(self, idx: int) -> Dict[str, Union[int, torch.Tensor]]: + """Get an item from the dataset. + + Args: + idx (int): Index of the item to retrieve. + + Returns: + dict: Data dictionary containing the following: + - height (int): Height of the image. + - width (int): Width of the image. + - rays_o (torch.Tensor): Ray origins, shape [height, width, 3]. + - rays_d (torch.Tensor): Ray directions, shape [height, width, 3]. + - dir (torch.Tensor): View direction, shape [3]. + - mvp (torch.Tensor): Model-view-projection matrix, shape [4, 4]. + - azimuth (torch.Tensor): Azimuth angle, shape [1]. + """ + # Initialize circle pose parameters + thetas = torch.FloatTensor([self.default_polar]).to(self.device) + phis = torch.FloatTensor([(idx / self.size) * 360]).to(self.device) + radius = torch.FloatTensor([self.default_radius]).to(self.device) + + # Generate circle poses and directions + poses, dirs = circle_poses( + radius=radius, + theta=thetas, + phi=phis, + angle_overhead=self.angle_overhead, + angle_front=self.angle_front, + return_dirs=True, + device=self.device, + ) + + # Compute camera intrinsics + intrinsics = self.camera.compute_intrinsics(fovx=self.default_fovx, fovy=self.default_fovy) + + # Compute projection matrix + projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) + mvp = projection @ torch.inverse(poses) # [1, 4, 4] + + # Sample rays + rays_o, rays_d = get_rays( + poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device + ) + + # Compute azimuth delta + delta_azimuth = phis - self.default_azimuth + delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] + + data = { + 'height': self.height, + 'width': self.width, + 'rays_o': rays_o, + 'rays_d': rays_d, + 'dir': dirs, + 'mvp': mvp, + 'azimuth': delta_azimuth, + } + + return data + + def collate_fn(self, batch: list) -> Dict[str, Union[int, torch.Tensor]]: + """Collate function to combine multiple data points into batches. + + Args: + batch (list): List of data dictionaries. + + Returns: + dict: Collated data. + """ + return { + 'height': self.height, + 'width': self.width, + 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), + 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), + 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), + 'dir': torch.cat([item['dir'] for item in batch], dim=0), + 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), + } diff --git a/nemo/collections/multimodal/data/nerf/random_poses.py b/nemo/collections/multimodal/data/nerf/random_poses.py new file mode 100644 index 000000000000..5dd7121b7063 --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/random_poses.py @@ -0,0 +1,436 @@ +import random +from typing import Any, Dict, Iterator, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import IterableDataset + +from nemo.collections.multimodal.data.nerf.cameras import PinholeCamera +from nemo.collections.multimodal.data.nerf.utils import ( + compute_look_at_vectors, + construct_poses, + get_rays, + get_view_direction, +) + + +def linear_normalization(x: float, lower_bound: float, upper_bound: float) -> float: + """ + Linearly normalize a value between lower_bound and upper_bound to a value between 0 and 1. + + Parameters: + x: The value to normalize. + lower_bound: The lower bound of the range of x. + upper_bound: The upper bound of the range of x. + + Returns: + The normalized value between 0 and 1. + """ + return min(1, max(0, (x - lower_bound) / (upper_bound - lower_bound))) + + +def rand_poses( + size: int, + radius_range: List[float] = [1, 1.5], + theta_range: List[float] = [0, 120], + phi_range: List[float] = [0, 360], + angle_overhead: float = 30, + angle_front: float = 60, + uniform_sphere_rate: float = 0.5, + jitter: bool = False, + jitter_center: float = 0.2, + jitter_target: float = 0.2, + jitter_up: float = 0.02, + return_dirs: bool = False, + device: torch.device = "cuda", +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Generate random poses from an orbit camera. + + Args: + size (int): Number of poses to generate. + radius_range (List[float]): Min and max radii for camera [min, max]. + theta_range (List[float]): Elevation angle range in degrees [min, max]. + phi_range (List[float]): Azimuth angle range in degrees [min, max]. + angle_overhead (float): Overhead angle in degrees. + angle_front (float): Front angle in degrees. + uniform_sphere_rate (float): The probability of sampling from a uniform sphere. + jitter (bool): Whether to add noise to the poses. + jitter_center (float): Noise range for the camera center. + jitter_target (float): Noise range for the camera target. + jitter_up (float): Noise range for the camera up vector. + return_dirs (bool): Whether to return the view directions. + device (torch.device): The device on which to allocate tensors. + + Returns: + Tuple: Contains the following: + - poses (torch.Tensor): Generated poses, shape [size, 4, 4]. + - thetas (torch.Tensor): Elevation angles in degrees, shape [size]. + - phis (torch.Tensor): Azimuth angles in degrees, shape [size]. + - radius (torch.Tensor): Radii of the camera orbits, shape [size]. + - dirs (torch.Tensor, optional): View directions, if requested. + """ + + # Convert angles from degrees to radians + theta_range = np.radians(theta_range) + phi_range = np.radians(phi_range) + angle_overhead = np.radians(angle_overhead) + angle_front = np.radians(angle_front) + + # Generate radius for each pose + radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0] + + # Generate camera center positions + if random.random() < uniform_sphere_rate: + centers, thetas, phis = sample_uniform_sphere(size=size, radius=radius, device=device) + else: + centers, thetas, phis = sample_orbit( + size=size, radius=radius, theta_range=theta_range, phi_range=phi_range, device=device + ) + + # Initialize targets to 0 (assuming 0 is a point in 3D space that cameras are looking at) + targets = torch.zeros_like(centers) + + # Apply jitter + if jitter: + centers += torch.rand_like(centers) * jitter_center - jitter_center / 2.0 + targets = torch.randn_like(centers) * jitter_target + + # Compute camera look-at matrix + forward_vector, up_vector, right_vector = compute_look_at_vectors( + centers=centers - targets, jitter_up=jitter_up if jitter else 0, device=device + ) + + # Construct the 4x4 pose matrices + poses = construct_poses( + centers=centers, right_vector=right_vector, up_vector=up_vector, forward_vector=forward_vector, device=device + ) + + # Optionally compute view directions + dirs = get_view_direction(thetas, phis, angle_overhead, angle_front) if return_dirs else None + + # Convert back to degrees for thetas and phis + thetas, phis = torch.rad2deg(thetas), torch.rad2deg(phis) + + return poses, thetas, phis, radius, dirs + + +def sample_uniform_sphere( + size: int, radius: torch.Tensor, device: torch.device +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Sample points uniformly on a sphere. + + Args: + size (int): Number of points to sample. + device (torch.device): Device to allocate tensors on. + radius (torch.Tensor): Radii for the points. + + Returns: + Tuple: Contains the following: + - centers (torch.Tensor): The Cartesian coordinates of the sampled points. + - thetas (torch.Tensor): Elevation angles in radians. + - phis (torch.Tensor): Azimuth angles in radians. + """ + # Generate unit vectors + unit_centers = F.normalize( + torch.stack( + [ + torch.randn(size, device=device), + torch.abs(torch.randn(size, device=device)), + torch.randn(size, device=device), + ], + dim=-1, + ), + p=2, + dim=1, + ) + # Generate radii and scale unit vectors + centers = unit_centers * radius.unsqueeze(-1) + # Calculate spherical coordinates + thetas = torch.acos(unit_centers[:, 1]) + phis = torch.atan2(unit_centers[:, 0], unit_centers[:, 2]) + phis[phis < 0] += 2 * np.pi + + return centers, thetas, phis + + +def sample_orbit( + size: int, radius: torch.Tensor, theta_range: np.ndarray, phi_range: np.ndarray, device: torch.device = "cuda" +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Sample points on a spherical orbit. + + Args: + size (int): Number of points to sample. + radius (torch.Tensor): Radii for the points. + theta_range (np.ndarray): Elevation angle range in radians [min, max]. + phi_range (np.ndarray): Azimuth angle range in radians [min, max]. + device (torch.device): Device to allocate tensors on. + + Returns: + Tuple: Contains the following: + - centers (torch.Tensor): The Cartesian coordinates of the sampled points. + - thetas (torch.Tensor): Elevation angles in radians. + - phis (torch.Tensor): Azimuth angles in radians. + """ + thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0] + phis = torch.rand(size, device=device) * (phi_range[1] - phi_range[0]) + phi_range[0] + phis[phis < 0] += 2 * np.pi + + x = radius * torch.sin(thetas) * torch.sin(phis) + y = radius * torch.cos(thetas) + z = radius * torch.sin(thetas) * torch.cos(phis) + + centers = torch.stack([x, y, z], dim=-1) + + return centers, thetas, phis + + +class RandomPosesDataset(IterableDataset): + """ + A dataset class to generate random poses. + """ + + def __init__( + self, + internal_batch_size: int = 100, + height: int = 256, + width: int = 256, + radius_range: Tuple[float, float] = [3.0, 3.5], + theta_range: Tuple[float, float] = [45.0, 105.0], + phi_range: Tuple[float, float] = [-180.0, 180.0], + fovx_range: Tuple[float, float] = [10.0, 30.0], + default_fovx: float = 20.0, + fovy_range: Tuple[float, float] = [10.0, 30.0], + default_fovy: float = 20.0, + default_radius: float = 3.2, + default_polar: float = 90.0, + default_azimuth: float = 0.0, + jitter: bool = False, + jitter_center: float = 0.2, + jitter_target: float = 0.2, + jitter_up: float = 0.02, + angle_overhead: float = 30.0, + angle_front: float = 60.0, + uniform_sphere_rate: float = 0.0, + near: float = 0.01, + far: float = 1000.0, + device: torch.device = 'cpu', + ) -> None: + """ + Initializes a new RandomPosesDataset instance. + + Parameters: + internal_batch_size (int): Number of samples to pre-generate internally. + height (int): Height of the image. + width (int): Width of the image. + radius_range (Tuple[float, float]): Range of generated radii. + theta_range (Tuple[float, float]): Range of generated theta angles. + phi_range (Tuple[float, float]): Range of generated phi angles. + fovx_range (Tuple[float, float]): Range of generated field of view in x-direction. + default_fovx (float): Default field of view in x-direction. + fovy_range (Tuple[float, float]): Range of generated field of view angles in y-direction. + default_fovy (float): Default field of view in y-direction. + default_radius (float): Default radius of the circle. + default_polar (float): Default polar angle. + default_azimuth (float): Default azimuth angle. + jitter (bool): Whether to jitter the poses. + jitter_center (float): Jittering center range. + jitter_target (float): Jittering target range. + jitter_up (float): Jittering up range. + angle_overhead (float): Overhead angle. + angle_front (float): Frontal angle. + uniform_sphere_rate (float): Rate of sampling uniformly on a sphere. + near (float): Near clipping distance. + far (float): Far clipping distance. + device (torch.device): Device to generate data on. + """ + + super().__init__() + self.height = height + self.width = width + self.internal_batch_size = internal_batch_size + + # TODO(ahmadki): expose for models other than dreamfusion + self.progressive_view = False + self.progressive_view_start_step = 0 + self.progressive_view_end_step = 500 + + self.default_fovx = default_fovx + self.default_fovy = default_fovy + self.default_radius = default_radius + self.default_polar = default_polar + self.default_azimuth = default_azimuth + self.same_fov_random = True + + self.radius_range = radius_range + self.theta_range = theta_range + self.phi_range = phi_range + self.fovx_range = fovx_range + self.fovy_range = fovy_range + + self.current_radius_range = radius_range + self.current_theta_range = theta_range + self.current_phi_range = phi_range + self.current_fovx_range = fovx_range + self.current_fovy_range = fovy_range + + self.angle_overhead = angle_overhead + self.angle_front = angle_front + self.uniform_sphere_rate = uniform_sphere_rate + self.jitter = jitter + self.jitter_center = jitter_center + self.jitter_target = jitter_target + self.jitter_up = jitter_up + + self.near = near + self.far = far + + self.device = device + + # TODO(ahmadki): make camera type a parameter + self.camera = PinholeCamera( + width=self.width, height=self.height, near=self.near, far=self.far, device=self.device + ) + + def update_step(self, epoch: int, global_step: int) -> None: + """ + Update the dataset at the beginning of each epoch. + + Parameters: + epoch (int): Current epoch. + global_step (int): Current global step. + + """ + if self.progressive_view: + self.progressive_view_update_step(global_step=global_step) + + def progressive_view_update_step(self, global_step: int) -> None: + """ + progressively relaxing view range + + Parameters: + global_step (int): Current global step. + """ + # TODO(ahmadki): support non-linear progressive_views + r = linear_normalization( + x=global_step, lower_bound=self.progressive_view_start_step, upper_bound=self.progressive_view_end_step + ) + self.current_phi_range = [ + (1 - r) * self.default_azimuth + r * self.phi_range[0], + (1 - r) * self.default_azimuth + r * self.phi_range[1], + ] + self.current_theta_range = [ + (1 - r) * self.default_polar + r * self.theta_range[0], + (1 - r) * self.default_polar + r * self.theta_range[1], + ] + self.current_radius_range = [ + (1 - r) * self.default_radius + r * self.radius_range[0], + (1 - r) * self.default_radius + r * self.radius_range[1], + ] + self.current_fovy_range = [ + (1 - r) * self.default_fovy + r * self.fovy_range[0], + (1 - r) * self.default_fovy + r * self.fovy_range[1], + ] + + def __iter__(self) -> Iterator[Dict[str, torch.Tensor]]: + """ + Returns an iterator over the dataset. + + Returns: + Iterator: An iterator over the dataset. + + """ + while True: + # Generate samples + rays_o, rays_d, dirs, mvp, delta_azimuth = self.generate_samples() + for i in range(self.internal_batch_size): + # Yield one sample at a time from the internal batch + yield { + 'height': self.height, + 'width': self.width, + 'rays_o': rays_o[i].unsqueeze(0), + 'rays_d': rays_d[i].unsqueeze(0), + 'dir': dirs[i].unsqueeze(0), + 'mvp': mvp[i].unsqueeze(0), + 'azimuth': delta_azimuth[i].unsqueeze(0), + } + + def generate_samples(self): + """ + Generate a batch of random poses. + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + A tuple containing: + - rays (Dict[str, torch.Tensor]): A dictionary containing the origin and direction of the rays. + - dirs (torch.Tensor): A tensor containing the directions of the rays. + - mvp (torch.Tensor): A tensor containing the model-view-projection matrix. + - azimuth (torch.Tensor): A A tensor containing the azimuth angle. + """ + # Generate random poses and directions + poses, dirs, thetas, phis, radius = rand_poses( + size=self.internal_batch_size, + radius_range=self.current_radius_range, + theta_range=self.current_theta_range, + phi_range=self.current_phi_range, + angle_overhead=self.angle_overhead, + angle_front=self.angle_front, + uniform_sphere_rate=self.uniform_sphere_rate, + jitter=self.jitter, + jitter_center=self.jitter_center, + jitter_target=self.jitter_target, + jitter_up=self.jitter_up, + return_dirs=True, + device=self.device, + ) + + # random focal + if self.same_fov_random: + fovx_random = random.random() + fovy_random = fovx_random + else: + fovx_random = random.random() + fovy_random = random.random() + fovx = fovx_random * (self.current_fovx_range[1] - self.current_fovx_range[0]) + self.current_fovx_range[0] + fovy = fovy_random * (self.current_fovy_range[1] - self.current_fovy_range[0]) + self.current_fovy_range[0] + + # Compute camera intrinsics + intrinsics = self.camera.compute_intrinsics(fovx=fovx, fovy=fovy) + + # Compute projection matrix + projection = self.camera.compute_projection_matrix(focal_x=intrinsics[0], focal_y=intrinsics[1]) + mvp = projection @ torch.inverse(poses) # [internal batch size, 4, 4] + + # Sample rays + rays_o, rays_d = get_rays( + poses=poses, intrinsics=intrinsics, height=self.height, width=self.width, device=poses.device + ) + + # Compute azimuth delta + delta_azimuth = phis - self.default_azimuth + delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180] + + return rays_o, rays_d, dirs, mvp, delta_azimuth + + def collate_fn(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Collate function to bundle multiple samples into a single batch. + + Args: + batch (List[Dict]): List of samples to collate. + + Returns: + Dict: A dictionary containing the collated batch. + """ + return { + 'height': self.height, + 'width': self.width, + 'rays_o': torch.cat([item['rays_o'] for item in batch], dim=0), + 'rays_d': torch.cat([item['rays_d'] for item in batch], dim=0), + 'mvp': torch.cat([item['mvp'] for item in batch], dim=0), + 'dir': torch.cat([item['dir'] for item in batch], dim=0), + 'azimuth': torch.cat([item['azimuth'] for item in batch], dim=0), + } diff --git a/nemo/collections/multimodal/data/nerf/utils.py b/nemo/collections/multimodal/data/nerf/utils.py new file mode 100644 index 000000000000..117d0de701fb --- /dev/null +++ b/nemo/collections/multimodal/data/nerf/utils.py @@ -0,0 +1,204 @@ +from typing import Dict, Optional + +import numpy as np +import torch +import torch.nn.functional as F + + +def get_view_direction(thetas: torch.Tensor, phis: torch.Tensor, overhead: float, front: float) -> torch.Tensor: + """ + Get the view direction based on given theta and phi values. + + Parameters: + - thetas (torch.Tensor): Array of theta values with shape [B,] + - phis (torch.Tensor): Array of phi values with shape [B,] + - overhead (float): Threshold for determining top and bottom views. + - front (float): Threshold for determining front, back and side views. + + Returns: + - torch.Tensor: Array of view directions. Values can be: + 0: front + 1: side (camera left) + 2: back + 3: side (camera right) + 4: top + 5: bottom + + Notes: + - Phi and theta values are assumed to be in radians. + """ + + num_samples = thetas.shape[0] + res = torch.zeros(num_samples, dtype=torch.long) + + # Normalize phis values to [0, 2*pi] + phis = phis % (2 * np.pi) + + # Determine direction based on phis + res[(phis < front / 2) | (phis >= 2 * np.pi - front / 2)] = 0 + res[(phis >= front / 2) & (phis < np.pi - front / 2)] = 1 + res[(phis >= np.pi - front / 2) & (phis < np.pi + front / 2)] = 2 + res[(phis >= np.pi + front / 2) & (phis < 2 * np.pi - front / 2)] = 3 + + # Override directions based on thetas for top and bottom views + res[thetas <= overhead] = 4 + res[thetas >= (np.pi - overhead)] = 5 + + return res + + +def compute_look_at_vectors(centers: torch.Tensor, jitter_up: Optional[float] = None, device: torch.device = "cuda"): + """ + Compute the look-at vectors for camera poses. + + Parameters: + centers: The centers of the cameras. + jitter_up: The noise range for the up vector of the camera. + device: Device to allocate the output tensor. + + Returns: + Tuple: Contains the following: + - forward_vector: The forward vectors of the cameras, shape [B, 3]. + - up_vector: The up vectors of the cameras, shape [B, 3]. + - right_vector: The right vectors of the cameras, shape [B, 3]. + """ + forward_vector = F.normalize(centers) + up_vector = torch.FloatTensor([0, 1, 0]).to(device).unsqueeze(0).repeat(len(centers), 1) + right_vector = F.normalize(torch.cross(forward_vector, up_vector, dim=-1)) + up_noise = torch.randn_like(up_vector) * jitter_up if jitter_up is not None else 0 + up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1)) + up_vector = F.normalize(torch.cross(right_vector, forward_vector, dim=-1) + up_noise) + + return forward_vector, up_vector, right_vector + + +def construct_poses( + centers: torch.Tensor, + right_vector: torch.Tensor, + up_vector: torch.Tensor, + forward_vector: torch.Tensor, + device: torch.device, +) -> torch.Tensor: + """ + Construct the 4x4 pose matrices. + + Args: + size (int): Number of pose matrices to construct. + centers (torch.Tensor): The Cartesian coordinates of the camera centers. + right_vector (torch.Tensor): The right vectors of the cameras. + up_vector (torch.Tensor): The up vectors of the cameras. + forward_vector (torch.Tensor): The forward vectors of the cameras. + device (torch.device): Device to allocate tensors on. + + Returns: + torch.Tensor: The pose matrices, shape [size, 4, 4]. + """ + poses = torch.eye(4, dtype=torch.float32, device=device).unsqueeze(0).repeat(len(centers), 1, 1) + poses[:, :3, :3] = torch.stack([right_vector, up_vector, forward_vector], dim=-1) + poses[:, :3, 3] = centers + + return poses + + +@torch.cuda.amp.autocast(enabled=False) +def get_rays( + poses: torch.Tensor, + intrinsics: torch.Tensor, + height: int, + width: int, + num_samples: Optional[int] = None, + error_map: Optional[torch.Tensor] = None, + device: torch.device = "cuda", +) -> Dict[str, torch.Tensor]: + """ + Generates rays from camera poses and intrinsics. + + Args: + poses (torch.Tensor): Camera poses, shape [B, 4, 4] (cam2world). + intrinsics (torch.Tensor): Intrinsic camera parameters [fx, fy, cx, cy]. + height (int): Height of the image. + width (int): Width of the image. + num_samples: Number of rays to sample, default is None for all rays. + error_map: Optional tensor to use for non-uniform sampling of rays. + device (torch.device): Device on which to generate the rays. + + Returns: + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - 'rays_o': Origin of the rays, shape [B, N, 3] + - 'rays_d': Directions of the rays, shape [B, N, 3] + - 'inds': Indices of the rays, shape [B, N] (if N > 0) + - 'inds_coarse': Coarse indices of the rays, shape [B, N] (if error_map is not None) + """ + + batch_size = poses.shape[0] + fx, fy, cx, cy = intrinsics + + i, j = torch.meshgrid( + torch.linspace(0, width - 1, width, device=device), + torch.linspace(0, height - 1, height, device=device), + indexing='ij', + ) + i = i.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 + j = j.t().reshape([1, height * width]).expand([batch_size, height * width]) + 0.5 + + results = {} + + if num_samples is not None: + num_samples = min(num_samples, height * width) + + if error_map is None: + sampled_indices = torch.randint(0, height * width, size=[num_samples], device=device) + sampled_indices = sampled_indices.expand([batch_size, num_samples]) + else: + sampled_indices, sampled_indices_coarse = non_uniform_sampling( + error_map=error_map, num_samples=num_samples, height=height, width=width, device=device + ) + results['sampled_indices_coarse'] = sampled_indices_coarse + + i = torch.gather(i, -1, sampled_indices) + j = torch.gather(j, -1, sampled_indices) + results['sampled_indices'] = sampled_indices + else: + sampled_indices = torch.arange(height * width, device=device).expand([batch_size, height * width]) + + zs = torch.full_like(i, -1.0) + xs = -(i - cx) / fx * zs + ys = (j - cy) / fy * zs + directions = torch.stack((xs, ys, zs), dim=-1) + + rays_d = directions @ poses[:, :3, :3].transpose(-1, -2) + rays_o = poses[..., :3, 3].unsqueeze(-2).expand_as(rays_d) + + rays_o = rays_o.view(-1, height, width, 3) + rays_d = rays_d.view(-1, height, width, 3) + + return rays_o, rays_d + + +def non_uniform_sampling( + error_map: torch.Tensor, batch_size: int, num_samples: int, height: int, width: int, device: torch.device = "cuda" +) -> torch.Tensor: + """ + Perform non-uniform sampling based on the provided error_map. + + Parameters: + error_map: The error map for non-uniform sampling. + batch_size (int): Batch size of the generated samples. + num_samples (int): Number of samples to pick. + height (int): Height of the image. + width (int): Width of the image. + device: Device on which tensors are stored. + + Returns: + A tensor containing the sampled indices. + """ + + sampled_indices_coarse = torch.multinomial(error_map.to(device), num_samples, replacement=False) + inds_x, inds_y = sampled_indices_coarse // 128, sampled_indices_coarse % 128 + sx, sy = height / 128, width / 128 + + inds_x = (inds_x * sx + torch.rand(batch_size, num_samples, device=device) * sx).long().clamp(max=height - 1) + inds_y = (inds_y * sy + torch.rand(batch_size, num_samples, device=device) * sy).long().clamp(max=width - 1) + sampled_indices = inds_x * width + inds_y + + return sampled_indices, sampled_indices_coarse diff --git a/nemo/collections/multimodal/data/neva/__init__.py b/nemo/collections/multimodal/data/neva/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py new file mode 100644 index 000000000000..4d46bf4decf2 --- /dev/null +++ b/nemo/collections/multimodal/data/neva/conversation.py @@ -0,0 +1,406 @@ +import dataclasses +import re +from enum import Enum, auto +from typing import List, Tuple + + +class SeparatorStyle(Enum): + """Different separator style.""" + + SINGLE = auto() + TWO = auto() + MPT = auto() + PLAIN = auto() + LLAMA_2 = auto() + NVGPT = auto() + + +@dataclasses.dataclass +class Conversation: + """A class that keeps all conversation history.""" + + system: str + roles: List[str] + messages: List[List[str]] + offset: int + sep_style: SeparatorStyle = SeparatorStyle.SINGLE + sep: str = "###" + sep2: str = None + version: str = "Unknown" + skip_next: bool = False + + def get_prompt(self): + messages = self.messages + if len(messages) > 0 and type(messages[0][1]) is tuple: + messages = self.messages.copy() + init_role, init_msg = messages[0].copy() + init_msg = init_msg[0].replace("", "").strip() + if 'mmtag' in self.version: + messages[0] = (init_role, init_msg) + messages.insert(0, (self.roles[0], "")) + messages.insert(1, (self.roles[1], "Received.")) + else: + messages[0] = (init_role, "\n" + init_msg) + + if self.sep_style == SeparatorStyle.SINGLE: + ret = self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + self.sep + else: + ret += role + ":" + elif self.sep_style == SeparatorStyle.TWO: + seps = [self.sep, self.sep2] + ret = self.system + seps[0] + for i, (role, message) in enumerate(messages): + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + elif self.sep_style == SeparatorStyle.MPT: + ret = self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + message + self.sep + else: + ret += role + elif self.sep_style == SeparatorStyle.LLAMA_2: + wrap_sys = lambda msg: f"<>\n{msg}\n<>\n\n" + wrap_inst = lambda msg: f"[INST] {msg} [/INST]" + ret = "" + + for i, (role, message) in enumerate(messages): + if i == 0: + assert message, "first message should not be none" + assert role == self.roles[0], "first message should come from user" + if message: + if type(message) is tuple: + message, _, _ = message + if i == 0: + message = wrap_sys(self.system) + message + if i % 2 == 0: + message = wrap_inst(message) + ret += self.sep + message + else: + ret += " " + message + " " + self.sep2 + else: + ret += "" + ret = ret.lstrip(self.sep) + elif self.sep_style == SeparatorStyle.PLAIN: + seps = [self.sep, self.sep2] + ret = self.system + for i, (role, message) in enumerate(messages): + if message: + if type(message) is tuple: + message, _, _ = message + ret += message + seps[i % 2] + else: + ret += "" + elif self.sep_style == SeparatorStyle.NVGPT: + ret = self.sep2 + self.system + self.sep + for role, message in messages: + if message: + if type(message) is tuple: + message, _, _ = message + ret += role + '\n' + message + '\n' + self.sep + else: + ret += role + '\n' + else: + raise ValueError(f"Invalid style: {self.sep_style}") + + return ret + + def append_message(self, role, message): + self.messages.append([role, message]) + + def get_images(self, return_pil=False): + images = [] + for i, (role, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + + from PIL import Image + + msg, image, image_process_mode = msg + if image_process_mode == "Pad": + + def expand2square(pil_img, background_color=(122, 116, 104)): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image) + elif image_process_mode == "Crop": + pass + elif image_process_mode == "Resize": + image = image.resize((336, 336)) + else: + raise ValueError(f"Invalid image_process_mode: {image_process_mode}") + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + if return_pil: + images.append(image) + else: + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + images.append(img_b64_str) + return images + + def to_gradio_chatbot(self): + ret = [] + for i, (role, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + if type(msg) is tuple: + import base64 + from io import BytesIO + + msg, image, image_process_mode = msg + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 800, 400 + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + # image = image.resize((224, 224)) + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_b64_str = base64.b64encode(buffered.getvalue()).decode() + img_str = f'user upload image' + msg = msg.replace('', img_str) + ret.append([msg, None]) + else: + ret[-1][-1] = msg + return ret + + def copy(self): + return Conversation( + system=self.system, + roles=self.roles, + messages=[[x, y] for x, y in self.messages], + offset=self.offset, + sep_style=self.sep_style, + sep=self.sep, + sep2=self.sep2, + version=self.version, + ) + + def dict(self): + if len(self.get_images()) > 0: + return { + "system": self.system, + "roles": self.roles, + "messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages], + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + return { + "system": self.system, + "roles": self.roles, + "messages": self.messages, + "offset": self.offset, + "sep": self.sep, + "sep2": self.sep2, + } + + +# . . +# NVGPT +# . . + +conv_nvgpt = Conversation( + system="""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n""", + roles=("User", "Assistant"), + version="nvgpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.NVGPT, + sep="", + sep2="System\n", +) + +conv_vicuna_v0 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=( + ("Human", "What are the key differences between renewable and non-renewable energy sources?"), + ( + "Assistant", + "Renewable energy sources are those that can be replenished naturally in a relatively " + "short amount of time, such as solar, wind, hydro, geothermal, and biomass. " + "Non-renewable energy sources, on the other hand, are finite and will eventually be " + "depleted, such as coal, oil, and natural gas. Here are some key differences between " + "renewable and non-renewable energy sources:\n" + "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable " + "energy sources are finite and will eventually run out.\n" + "2. Environmental impact: Renewable energy sources have a much lower environmental impact " + "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, " + "and other negative effects.\n" + "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically " + "have lower operational costs than non-renewable sources.\n" + "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote " + "locations than non-renewable sources.\n" + "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different " + "situations and needs, while non-renewable sources are more rigid and inflexible.\n" + "6. Sustainability: Renewable energy sources are more sustainable over the long term, while " + "non-renewable sources are not, and their depletion can lead to economic and social instability.\n", + ), + ), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_vicuna_v1 = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the user's questions.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +conv_llama_2 = Conversation( + system="""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. + +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""", + roles=("USER", "ASSISTANT"), + version="llama_v2", + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA_2, + sep="", + sep2="", +) + +conv_llava_llama_2 = Conversation( + system="You are a helpful language and vision assistant. " + "You are able to understand the visual content that the user provides, " + "and assist the user with a variety of tasks using natural language.", + roles=("USER", "ASSISTANT"), + version="llama_v2", + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA_2, + sep="", + sep2="", +) + +conv_mpt = Conversation( + system="""<|im_start|>system +A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""", + roles=("<|im_start|>user\n", "<|im_start|>assistant\n"), + version="mpt", + messages=(), + offset=0, + sep_style=SeparatorStyle.MPT, + sep="<|im_end|>", +) + +conv_llava_plain = Conversation( + system="", roles=("", ""), messages=(), offset=0, sep_style=SeparatorStyle.PLAIN, sep="\n", +) + +conv_llava_v0 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("Human", "Assistant"), + messages=(("Human", "Hi!"), ("Assistant", "Hi there! How can I help you today?")), + offset=2, + sep_style=SeparatorStyle.SINGLE, + sep="###", +) + +conv_llava_v0_mmtag = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "The visual content will be provided with the following format: visual content.", + roles=("Human", "Assistant"), + messages=(), + offset=0, + sep_style=SeparatorStyle.SINGLE, + sep="###", + version="v0_mmtag", +) + +conv_llava_v1 = Conversation( + system="A chat between a curious human and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the human's questions.", + roles=("USER", "ASSISTANT"), + version="v1", + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", +) + +conv_llava_v1_mmtag = Conversation( + system="A chat between a curious user and an artificial intelligence assistant. " + "The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." + "The visual content will be provided with the following format: visual content.", + roles=("USER", "ASSISTANT"), + messages=(), + offset=0, + sep_style=SeparatorStyle.TWO, + sep=" ", + sep2="", + version="v1_mmtag", +) + +default_conversation = conv_vicuna_v0 +conv_templates = { + "default": conv_vicuna_v0, + "v0": conv_vicuna_v0, + "v1": conv_vicuna_v1, + "vicuna_v1": conv_vicuna_v1, + "llama_2": conv_llama_2, + "plain": conv_llava_plain, + "v0_plain": conv_llava_plain, + "llava_v0": conv_llava_v0, + "v0_mmtag": conv_llava_v0_mmtag, + "llava_v1": conv_llava_v1, + "v1_mmtag": conv_llava_v1_mmtag, + "llava_llama_2": conv_llava_llama_2, + "mpt": conv_mpt, + "nvgpt": conv_nvgpt, +} + + +if __name__ == "__main__": + print(default_conversation.get_prompt()) diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py new file mode 100644 index 000000000000..dc4a609f8d48 --- /dev/null +++ b/nemo/collections/multimodal/data/neva/neva_dataset.py @@ -0,0 +1,584 @@ +import copy +import json +import logging +import os +import pathlib +import re +import tarfile +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Sequence, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +import transformers +from einops import rearrange +from omegaconf import DictConfig +from PIL import Image +from torch.utils.data import Dataset, default_collate +from transformers import CLIPImageProcessor + +import nemo.collections.multimodal.data.neva.conversation as conversation_lib +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import tokenize_and_insert_media_tokens +from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids + +MAX_NUM_IMAGES = 4 +IGNORE_INDEX = -1 +DEFAULT_PAD_TOKEN = "" +DEFAULT_BOS_TOKEN = "" +DEFAULT_EOS_TOKEN = "" +DEFAULT_UNK_TOKEN = "" +DEFAULT_IMAGE_TOKEN = "" +DEFAULT_SYSTEM_TOKEN = "" +DEFAULT_SEPARATOR_TOKEN = "" +DEFAULT_LABELS_TOKEN = "" +DEFAULT_IMAGE_PATCH_TOKEN = "" +DEFAULT_IM_START_TOKEN = "" +DEFAULT_IM_END_TOKEN = "" + + +class TarOrFolderImageLoader: + def __init__(self, image_folder): + self.image_folder = image_folder + self.tar_index = {} + if self.image_folder.endswith('.tar'): + self.build_index() + + def build_index(self): + with tarfile.open(self.image_folder, 'r') as tar: + for member in tar.getmembers(): + self.tar_index[member.name] = member + + def open_image(self, file_name): + if self.image_folder.endswith('.tar'): + with tarfile.open(self.image_folder, 'r') as tar: + member = self.tar_index.get(file_name) + if member: + f = tar.extractfile(member) + return Image.open(f).convert('RGB') + else: + return Image.open(os.path.join(self.image_folder, file_name)).convert('RGB') + return None + + +def tokenize( + texts: Union[str, List[str]], tokenizer: Any, context_length: int, add_extra_token: int, +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s). If the list of tokens exceeds the context + length plus the number of extra tokens, it gets truncated. If it's smaller, it gets padded with zeros. + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize. + tokenizer : Any + A tokenizer to be used for tokenization. + context_length : int + The context length to be used for the output tensor. + add_extra_token : int + Number of extra tokens to add, should be either 0 or 1. + + Returns + ------- + torch.LongTensor + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length + add_extra_token]. + """ + assert add_extra_token == 0 or add_extra_token == 1, "`add_extra_token` should be either 0 or 1." + + texts_is_str = False + if isinstance(texts, str): + texts = [texts] + texts_is_str = True + tokens = tokenizer.text_to_ids(texts) + max_len = max([len(token) for token in tokens]) + context_length = min(max_len - add_extra_token, context_length) + # truncate and padding + result = torch.zeros(len(tokens), context_length + add_extra_token, dtype=torch.long) + + for i, token in enumerate(tokens): + if len(token) > context_length + add_extra_token: + token = token[: context_length + add_extra_token] # Truncate + result[i, : len(token)] = torch.tensor(token) + if texts_is_str: + result = result[0] + return result + + +def preprocess_multimodal(sources: dict, multimodal_cfg: dict, cur_token_len: int,) -> Dict: + is_multimodal = multimodal_cfg['is_multimodal'] + image_token_len = cur_token_len + if not is_multimodal: + return sources + + for source in sources: + conversation = source['conversations'] + if multimodal_cfg['sep_image_conv_front']: + assert DEFAULT_IMAGE_TOKEN in conversation[0]['value'] + conversation[0]['value'] = conversation[0]['value'].replace(DEFAULT_IMAGE_TOKEN, '').strip() + conversation[0]['value'] = ( + DEFAULT_IMAGE_TOKEN + + conversation_lib.default_conversation.sep + + conversation_lib.default_conversation.roles[0] + + ": " + + conversation[0]['value'] + ) + for turn in conversation: + if multimodal_cfg['use_im_start_end']: + replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len + else: + replace_token = DEFAULT_IMAGE_PATCH_TOKEN * (image_token_len - 2) + replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN + turn["value"] = turn["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) + + return sources + + +def preprocess_llama_2(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: + conv = conversation_lib.conv_llava_llama_2.copy() + roles = {"human": conv.roles[0], "gpt": conv.roles[1]} + + # Apply prompt templates + conversations = [] + for i, source in enumerate(sources): + source = source['conversations'] + if roles[source[0]["from"]] != conv.roles[0]: + # Skip the first one if it is not from human + source = source[1:] + + conv.messages = [] + for j, sentence in enumerate(source): + role = roles[sentence["from"]] + assert role == conv.roles[j % 2], f"{i}" + conv.append_message(role, sentence["value"]) + conversations.append(conv.get_prompt()) + + add_extra_token = cfg.get("add_extra_token") + + # Tokenize conversations + tokens = tokenize( + texts=conversations, + tokenizer=tokenizer, + context_length=cfg.get("context_length"), + add_extra_token=add_extra_token, + ) + + # llama tricks + tokens[tokens == 32003] = 0 # DEFAULT_IMAGE_PATCH_TOKEN + tokens[tokens == 32006] = 1 # + tokens[tokens == 32007] = 2 # + labels = tokens.clone().detach() + + # Mask labels + sep = "[/INST] " + for conversation, target in zip(conversations, labels): + rounds = conversation.split(conv.sep2) + cur_len = 0 + for i, rou in enumerate(rounds): + + if rou == "": + break + + parts = rou.split(sep) + if len(parts) != 2: + break + parts[0] += sep + + round_len = len(tokenizer.text_to_ids(rou + conv.sep2)) + if i > 0: + round_len -= 1 # Remove extra token added by sp tokenizer + instruction_len = len(tokenizer.text_to_ids(parts[0])) - 1 + target[cur_len : cur_len + instruction_len] = IGNORE_INDEX + + cur_len += round_len + target[cur_len:] = IGNORE_INDEX + + # Check if masking working correctly + # print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())]) + + if add_extra_token: + tokens = tokens[:, :-1].contiguous() + labels = labels[:, 1:].contiguous() + else: + labels = torch.roll(labels, shifts=-1, dims=-1) + labels[:, -1] = IGNORE_INDEX + + return dict(tokens=tokens, labels=labels,) + + +def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: + conv = conversation_lib.conv_vicuna_v1.copy() + roles = {"human": conv.roles[0], "gpt": conv.roles[1]} + + # Apply prompt templates + conversations = [] + for i, source in enumerate(sources): + source = source['conversations'] + if roles[source[0]["from"]] != conv.roles[0]: + # Skip the first one if it is not from human + source = source[1:] + + conv.messages = [] + for j, sentence in enumerate(source): + role = roles[sentence["from"]] + assert role == conv.roles[j % 2], f"{i}" + conv.append_message(role, sentence["value"]) + conversations.append(conv.get_prompt()) + + # Tokenize conversations + + add_extra_token = cfg.get("add_extra_token") + # Tokenize conversations + tokens = tokenize( + texts=conversations, + tokenizer=tokenizer, + context_length=cfg.get("context_length"), + add_extra_token=add_extra_token, + ) + + labels = tokens.clone().detach() + + # Mask labels + sep = conv.sep + conv.roles[1] + ": " + for conversation, target in zip(conversations, labels): + + rounds = conversation.split(conv.sep2) + cur_len = 1 + target[:cur_len] = IGNORE_INDEX + for i, rou in enumerate(rounds): + if rou == "": + break + + parts = rou.split(sep) + if len(parts) != 2: + break + parts[0] += sep + + round_len = len(tokenizer.text_to_ids(rou)) + instruction_len = len(tokenizer.text_to_ids(parts[0])) - 2 + + target[cur_len : cur_len + instruction_len] = IGNORE_INDEX + + cur_len += round_len + target[cur_len:] = IGNORE_INDEX + + if add_extra_token: + tokens = tokens[:, :-1].contiguous() + labels = labels[:, 1:].contiguous() + else: + labels = torch.roll(labels, shifts=-1, dims=-1) + labels[:, -1] = IGNORE_INDEX + + return dict(tokens=tokens, labels=labels,) + + +def preprocess_nvgpt(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cfg,) -> Dict: + """ + Given a record this transform: + 1. Add signal '<>' at the beginning each sentence, with end signal '\n'; + 2. Concatenate conversations together; + 3. Tokenize the concatenated conversation; + 4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX. + """ + + conv = conversation_lib.conv_nvgpt.copy() + + # Apply prompt templates + conversations = [] + for source in sources: + conv.messages = [] + conv.system = source.get('system', conv.system) + if len(source['conversations']) >= 2: + conv.roles = (source['conversations'][0]['from'], source['conversations'][1]['from']) + + strip_end_for_inference = False + for turn in source['conversations']: + if 'label' in turn: + value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value'] + conv.append_message(turn['from'], value) + if not turn["value"]: + strip_end_for_inference = ( + True # in inference, current turn is empty, thus end tokens need to striped. + ) + else: + conv.append_message(turn['from'], turn['value']) + context = conv.get_prompt() + if strip_end_for_inference: + context = context.rstrip("\n") + "\n" + conversations.append(context) + + add_extra_token = cfg.get("add_extra_token") + # Tokenize conversations + tokens = tokenize( + texts=conversations, + tokenizer=tokenizer, + context_length=cfg.get("context_length"), + add_extra_token=add_extra_token, + ) + + labels = tokens.clone().detach() + + # Mask targets + sep = conv.sep + conv.roles[1] + "\n" + labels_str_regexp = re.compile(f"{DEFAULT_LABELS_TOKEN}quality:.*\n") + for conversation, target in zip(conversations, labels): + rounds = conversation.split(conv.sep) + re_rounds = [conv.sep.join(rounds[:3])] # system + user + gpt + + for conv_idx in range(3, len(rounds), 2): + re_rounds.append(conv.sep.join(rounds[conv_idx : conv_idx + 2])) # user + gpt + + cur_len = 0 + for i, rou in enumerate(re_rounds): + if rou == "": + break + parts = rou.split(sep) + if len(parts) != 2: + break + + # Match the pattern + match = labels_str_regexp.search(parts[1]) + labels_str = match.group() if match else "" + + instruction_len = len(tokenizer.text_to_ids(parts[0] + sep + labels_str)) + round_len = len(tokenizer.text_to_ids(rou + conv.sep)) + target[cur_len : cur_len + instruction_len] = IGNORE_INDEX + + cur_len += round_len + target[cur_len:] = IGNORE_INDEX + + if add_extra_token: + tokens = tokens[:, :-1].contiguous() + labels = labels[:, 1:].contiguous() + else: + labels = torch.roll(labels, shifts=-1, dims=-1) + labels[:, -1] = IGNORE_INDEX + + return dict(tokens=tokens, labels=labels,) + + +class LazySupervisedDataset(Dataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): + super(LazySupervisedDataset, self).__init__() + logging.warning("Loading data...") + if data_path is not None: + logging.warning("Loading data...") + list_data_dict = json.load(open(data_path, "r")) + else: + list_data_dict = [] + + logging.warning("Formatting inputs...Skip in lazy mode") + self.tokenizer = tokenizer + self.list_data_dict = list_data_dict + self.multimodal_cfg = multimodal_cfg + self.conv_template = multimodal_cfg["conv_template"] + self.image_folder = multimodal_cfg['image_folder'] + self.processor = multimodal_cfg["image_processor"] + + self.image_loader = TarOrFolderImageLoader(self.image_folder) + + def __len__(self): + return len(self.list_data_dict) + + def __getitem__(self, i) -> Dict[str, torch.Tensor]: + sources = self.list_data_dict[i] + processor = self.processor + if isinstance(i, int): + sources = [sources] + assert len(sources) == 1, "Don't know why it is wrapped to a list" # FIXME + if 'image' in sources[0]: + if not isinstance(self.list_data_dict[i]['image'], list): + self.list_data_dict[i]['image'] = [self.list_data_dict[i]['image']] + + images = [] + for image_file in self.list_data_dict[i]['image']: + image = self.image_loader.open_image(image_file) + if image is None: + logging.warning(f"Image {image_file} could not be found!") + if self.multimodal_cfg['image_aspect_ratio'] == 'keep': + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + max_len, min_len = 448, 224 + shortest_edge = int(min(max_len / aspect_ratio, min_len)) + image = processor.preprocess( + image, return_tensors='pt', do_center_crop=False, size={"shortest_edge": shortest_edge} + )['pixel_values'][0] + elif self.multimodal_cfg['image_aspect_ratio'] == 'pad': + + def expand2square(pil_img, background_color): + width, height = pil_img.size + if width == height: + return pil_img + elif width > height: + result = Image.new(pil_img.mode, (width, width), background_color) + result.paste(pil_img, (0, (width - height) // 2)) + return result + else: + result = Image.new(pil_img.mode, (height, height), background_color) + result.paste(pil_img, ((height - width) // 2, 0)) + return result + + image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + else: + image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] + images.append(image) + images_tensors = torch.tensor([]) + if images: + images_tensors = torch.stack(images) + cur_token_len = (images_tensors[0].shape[1] // 14) * ( + images_tensors[0].shape[2] // 14 + ) # FIXME: 14 is hardcoded patch size + sources = preprocess_multimodal(copy.deepcopy(sources), self.multimodal_cfg, cur_token_len) + else: + images_tensors = torch.tensor([]) + sources = copy.deepcopy(sources) + + if self.conv_template == "nvgpt": + data_dict = preprocess_nvgpt(sources, self.tokenizer, self.multimodal_cfg,) + elif self.conv_template == "v1": + data_dict = preprocess_v1(sources, self.tokenizer, self.multimodal_cfg,) + elif self.conv_template == "llama_2": + data_dict = preprocess_llama_2(sources, self.tokenizer, self.multimodal_cfg,) + else: + raise ValueError(f"Conversation template `{self.conv_template}` is not supported in Neva now.") + + if isinstance(i, int): + data_dict = dict(tokens=data_dict["tokens"][0], labels=data_dict["labels"][0]) + + # image exist in the data + if self.multimodal_cfg['is_multimodal']: + crop_size = self.processor.crop_size + # image does not exist in the data, but the model is multimodal + zero_padding = torch.zeros( + (MAX_NUM_IMAGES - len(images_tensors), 3, crop_size['height'], crop_size['width']), dtype=torch.float + ) + images_tensors = torch.cat((images_tensors, zero_padding), dim=0) + data_dict['image'] = images_tensors + return data_dict + + +class NevaDataset(LazySupervisedDataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, multimodal_cfg: dict): + + if data_path.endswith(".json"): + super(NevaDataset, self).__init__(data_path, tokenizer, multimodal_cfg) + + elif data_path.endswith(".jsonl"): + super(NevaDataset, self).__init__(None, tokenizer, multimodal_cfg) + logging.warning("Loading image inputs from SteerLM Dataset") + image_folder = multimodal_cfg['image_folder'] + for line in open(data_path, "r"): + record = json.loads(line) + + # This currently supports only a single image + # search for tag + + record['image'] = [] + for turn in record['conversations']: + matches = re.finditer('', DEFAULT_IMAGE_TOKEN, turn['value']) + + self.list_data_dict.append(record) + + else: + raise ValueError(f"Formatting of {data_path} is not supported in Neva.") + + +@dataclass +class DataCollatorForSupervisedDataset(object): + """Collate examples for supervised fine-tuning.""" + + model_cfg: DictConfig + tokenizer: transformers.PreTrainedTokenizer + + def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: + max_len = max(instance['tokens'].shape[0] for instance in instances) + max_len = (max_len - 1) // 4 * 4 + 4 + for instance in instances: + pad_len = max_len - instance['tokens'].shape[0] + instance['tokens'] = F.pad(instance['tokens'], (0, pad_len), 'constant', 0) + instance['labels'] = F.pad(instance['labels'], (0, pad_len), 'constant', -1) + + batch = default_collate(instances) + tokenizer = self.tokenizer + model_cfg = self.model_cfg + + tokens = batch['tokens'] + labels = batch['labels'] + media = batch.get('image') + + attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids( + data=tokens, + eod_token=tokenizer.eos_id, + eod_mask_loss=model_cfg.data.get("eod_mask_loss", False), + reset_attention_mask=False, + reset_position_ids=False, + ) + + loss_mask[labels == -1] = 0.0 + tokens[tokens == -1] = 0 + labels[labels == -1] = 0 + + if media is None: + raise NotImplementedError + else: + media = rearrange(media, "b T c h w -> b T 1 c h w") + + batch = { + 'tokens': tokens, + 'labels': labels, + 'attention_mask': attention_mask, + 'loss_mask': loss_mask, + 'position_ids': position_ids, + 'media': media, + } + return batch + + +def make_supervised_data_module(tokenizer, model_cfg) -> Dict: + """Make dataset and collator for supervised fine-tuning.""" + data_cfg = model_cfg.data + mm_cfg = model_cfg.mm_cfg + add_extra_token = 1 + if getattr(model_cfg, 'no_seqlen_plus_one_input_tokens', False): + add_extra_token = 0 + if mm_cfg.vision_encoder.from_hf: + image_processor = CLIPImageProcessor.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16 + ) + else: + # TODO(yuya): Fix this hard-code for our own CLIP + image_processor = CLIPImageProcessor.from_pretrained( + "openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16 + ) + train_dataset = NevaDataset( + tokenizer=tokenizer, + data_path=data_cfg.data_path, + multimodal_cfg=dict( + is_multimodal=data_cfg.is_multimodal, + sep_image_conv_front=data_cfg.sep_image_conv_front, + conv_template=data_cfg.get("conv_template", "nvgpt"), + image_token_len=data_cfg.image_token_len, + image_folder=data_cfg.image_folder, + image_aspect_ratio=data_cfg.image_aspect_ratio, + use_im_start_end=getattr(model_cfg.mm_cfg, 'use_im_start_end', False), + image_processor=image_processor, + add_extra_token=add_extra_token, + context_length=model_cfg.encoder_seq_length, + ), + ) + # data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) + return dict(train_dataset=train_dataset, eval_dataset=train_dataset) diff --git a/nemo/collections/multimodal/data/nsfw/__init__.py b/nemo/collections/multimodal/data/nsfw/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/nsfw/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py b/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py new file mode 100644 index 000000000000..de2406012fbc --- /dev/null +++ b/nemo/collections/multimodal/data/nsfw/nsfw_dataset.py @@ -0,0 +1,53 @@ +import pathlib +from typing import Callable, List, Optional, Tuple + +import torch +from omegaconf.dictconfig import DictConfig +from PIL import Image + +from nemo.collections.multimodal.data.clip.augmentations.augmentations import image_transform + + +class DirectoryBasedDataset(torch.utils.data.Dataset): + def __init__(self, path: str, transform: Optional[Callable] = None): + super(DirectoryBasedDataset, self).__init__() + + self._transform = transform + self._samples = self._get_files(path, "nsfw", 1) + self._get_files(path, "safe", 0) + + def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]: + if index >= len(self): + raise IndexError(f"Index {index} ot of bound {len(self)}") + + sample_path, category = self._samples[index] + + image = Image.open(sample_path) + + if self._transform is not None: + image = self._transform(image) + + return image, category + + def __len__(self) -> int: + return len(self._samples) + + def _get_files(self, path: str, subdir: str, category: int) -> List[Tuple[str, int]]: + globpath = pathlib.Path(path) / subdir + return [(x, category) for x in globpath.glob("*.*")] + + +def build_dataset(model_cfg: DictConfig, consumed_samples: int, is_train: bool): + img_fn = image_transform( + (model_cfg.vision.img_h, model_cfg.vision.img_w), + is_train=False, + mean=model_cfg.vision.image_mean, + std=model_cfg.vision.image_std, + resize_longest_max=True, + ) + + if is_train: + path = model_cfg.data.train.dataset_path + else: + path = model_cfg.data.validation.dataset_path + + return DirectoryBasedDataset(path, transform=img_fn) diff --git a/nemo/collections/multimodal/data/stable_diffusion/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/augmentation/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py new file mode 100644 index 000000000000..eba00f96c0c2 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/augmentation/augmentations.py @@ -0,0 +1,71 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +import torchvision.transforms as transforms + + +def construct_clip_augmentations(n_px=224): + def _convert_image_to_rgb(image): + return image.convert("RGB") + + return transforms.Compose( + [ + transforms.Resize(n_px, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(n_px), + _convert_image_to_rgb, + transforms.ToTensor(), + transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ] + ) + + +def construct_image_augmentations(augmentation_dict, normalize=True): + train_img_transform = [] + for aug in augmentation_dict: + if aug == 'resize_smallest_side': + img_size = int(augmentation_dict[aug]) + train_img_transform.append( + transforms.Resize(img_size, interpolation=transforms.InterpolationMode.BICUBIC, antialias=True) + ) + + elif aug == 'center_crop_h_w': + img_w, img_h = augmentation_dict[aug].split(',') + img_w = int(img_w) + img_h = int(img_h) + train_img_transform.append(transforms.CenterCrop((img_w, img_h))) + + elif aug == 'random_crop_h_w': + img_w, img_h = augmentation_dict[aug].split(',') + img_w = int(img_w) + img_h = int(img_h) + train_img_transform.append(transforms.RandomCrop((img_w, img_h))) + + elif aug == 'horizontal_flip': + enabled = augmentation_dict[aug] + if enabled: + train_img_transform.append(transforms.RandomHorizontalFlip(p=0.5)) + else: + raise ValueError('Augmentation not supported') + + # Always need to convert data to tensor + train_img_transform.append(transforms.ToTensor()) + if normalize: + train_img_transform.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) + train_img_transform = transforms.Compose(train_img_transform) + return train_img_transform + + +def identical_transform(x): + return x diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py new file mode 100644 index 000000000000..963982e14cb6 --- /dev/null +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -0,0 +1,184 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nemo.collections.multimodal.data.common.webdataset import WebDatasetCommon +from nemo.collections.multimodal.data.stable_diffusion.augmentation.augmentations import ( + construct_image_augmentations, + identical_transform, +) +from nemo.core.classes import Dataset as NeMoDataset +from nemo.utils import logging + + +class SDSyntheticDataset(NeMoDataset): + def __init__( + self, image_H, image_W, fake_len=100000, image_key='images', txt_key='txt', seq_len=80, context_dim=768 + ): + super().__init__() + self.fake_len = fake_len + self.H = image_H + self.W = image_W + self.image_key = image_key + self.txt_key = txt_key + assert image_key.endswith('encoded') == txt_key.endswith( + 'encoded' + ), 'In precached mode, first and second stage key must both end with "encoded"' + self.precached = self.image_key.endswith('encoded') + self.seq_len = seq_len + self.context_dim = context_dim + + def __getitem__(self, index): + item = {} + if self.precached: + item[self.image_key] = torch.randn(8, self.H // 8, self.W // 8) + item[self.txt_key] = torch.randn(self.seq_len, self.context_dim) + else: + item[self.image_key] = torch.randn(self.H, self.W, 3) + item[self.txt_key] = f'This is meaningless fake text No.{index}' + + return item + + def __len__(self): + return self.fake_len + + +def build_train_valid_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + + def build_resolution_filter(value=None, method='larger'): + assert method == 'larger' or method == 'smaller' + if method == 'larger': + logging.info(f'Only Selecting images with resolution >= {value}') + return lambda x: x['jpg'].size[0] >= value and x['jpg'].size[1] >= value + logging.info(f'Only Selecting images with resolution <= {value}') + return lambda x: x['jpg'].size[0] <= value and x['jpg'].size[1] <= value + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict[model_cfg.first_stage_key] = input[0].permute(1, 2, 0) + out_dict[model_cfg.cond_stage_key] = input[1] + yield out_dict + + def transform_fn(sample): + image, text = sample["jpg"], sample["txt"] + # TODO : If no agumentations just return the image ? + img_transform = construct_image_augmentations(data_cfg.train.get("augmentations", None)) + text_transform = identical_transform + return img_transform(image), text_transform(text) + + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + + else: + filter_cfg = data_cfg.train.get('filterings', None) + filter_fn = build_resolution_filter(**filter_cfg.resolution) if filter_cfg else None + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + filter_fn=filter_fn, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + if data_cfg.get('synthetic_data', False): + val_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + filter_fn=filter_fn, + is_train=False, + ) + + return train_data, val_data + + +def build_train_valid_precached_datasets( + model_cfg, consumed_samples, +): + data_cfg = model_cfg.data + + # This function maps data that are tuples to dictionary. + def tuple_to_dict(inp): + for input in inp: + out_dict = dict() + out_dict[model_cfg.first_stage_key] = torch.tensor(input['autoencoderkl_image']) + out_dict[model_cfg.cond_stage_key] = torch.tensor(input['clip-vit-large-patch14_text']) + yield out_dict + + def transform_fn(sample): + return sample['pickle'] + + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) + + val_data = None + if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = SDSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + context_dim=model_cfg.unet_config.context_dim, + ) + else: + val_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=False, + ) + + return train_data, val_data diff --git a/nemo/collections/multimodal/losses/__init__.py b/nemo/collections/multimodal/losses/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/losses/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/losses/clip_loss.py b/nemo/collections/multimodal/losses/clip_loss.py new file mode 100644 index 000000000000..5eb84b020aed --- /dev/null +++ b/nemo/collections/multimodal/losses/clip_loss.py @@ -0,0 +1,112 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.distributed.nn +import torch.nn as nn +from torch import distributed as dist +from torch.nn import functional as F + +from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def gather_features( + image_features, text_features, local_loss=False, gather_with_grad=False, +): + data_parallel_world_size = parallel_state.get_data_parallel_world_size() + data_parallel_rank = parallel_state.get_data_parallel_rank() + data_parallel_group = parallel_state.get_data_parallel_group() + + if gather_with_grad: + # TODO (yuya): this is not working in current version of pytorch + # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/loss.py#L48 + all_image_features = torch.cat(torch.distributed.nn.all_gather(image_features), dim=0) + all_text_features = torch.cat(torch.distributed.nn.all_gather(text_features), dim=0) + + else: + gathered_image_features = [torch.zeros_like(image_features) for _ in range(data_parallel_world_size)] + gathered_text_features = [torch.zeros_like(text_features) for _ in range(data_parallel_world_size)] + dist.all_gather(gathered_image_features, image_features, group=data_parallel_group) + dist.all_gather(gathered_text_features, text_features, group=data_parallel_group) + # TODO (yuya): check what's this + if not local_loss: + # ensure grads for local rank when all_* features don't have a gradient + # https://amsword.medium.com/gradient-backpropagation-with-torch-distributed-all-gather-9f3941a381f8 + gathered_image_features[data_parallel_rank] = image_features + gathered_text_features[data_parallel_rank] = text_features + all_image_features = torch.cat(gathered_image_features, dim=0) + all_text_features = torch.cat(gathered_text_features, dim=0) + + return all_image_features, all_text_features + + +class ClipLoss(nn.Module): + def __init__( + self, local_loss=False, gather_with_grad=False, cache_labels=False, + ): + super().__init__() + self.local_loss = local_loss + self.gather_with_grad = gather_with_grad + self.cache_labels = cache_labels + + # cache state + self.prev_num_logits = 0 + self.labels = {} + + self.world_size = parallel_state.get_data_parallel_world_size() + self.rank = parallel_state.get_data_parallel_rank() + + def forward(self, output_tensor): + image_features, text_features, logit_scale = output_tensor + device = image_features.device + if self.world_size > 1: + all_image_features, all_text_features = gather_features( + image_features, text_features, self.local_loss, self.gather_with_grad + ) + + if self.local_loss: + logits_per_image = logit_scale * image_features @ all_text_features.T + logits_per_text = logit_scale * text_features @ all_image_features.T + else: + logits_per_image = logit_scale * all_image_features @ all_text_features.T + logits_per_text = logits_per_image.T + else: + logits_per_image = logit_scale * image_features @ text_features.T + logits_per_text = logit_scale * text_features @ image_features.T + + # calculated ground-truth and cache if enabled + num_logits = logits_per_image.shape[0] + if self.prev_num_logits != num_logits or device not in self.labels: + labels = torch.arange(num_logits, device=device, dtype=torch.long) + if self.world_size > 1 and self.local_loss: + labels = labels + num_logits * self.rank + if self.cache_labels: + self.labels[device] = labels + self.prev_num_logits = num_logits + else: + labels = self.labels[device] + + total_loss = (F.cross_entropy(logits_per_image, labels) + F.cross_entropy(logits_per_text, labels)) / 2 + + # TODO (yuya): this is not necessary; not necessary if global! + reduced_loss = average_losses_across_data_parallel_group([total_loss]) + return total_loss, {"loss": reduced_loss} diff --git a/nemo/collections/multimodal/models/__init__.py b/nemo/collections/multimodal/models/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/__init__.py b/nemo/collections/multimodal/models/clip/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/clip/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py new file mode 100644 index 000000000000..e24e95e68af8 --- /dev/null +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -0,0 +1,1017 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +from functools import partial +from typing import Any, List, Optional, Union + +import numpy as np +import torch +import torch.nn.functional as F +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer +from tqdm import tqdm + +from nemo.collections.multimodal.data.clip.clip_dataset import ( + build_imagenet_validation_dataloader, + build_train_valid_datasets, + tokenize, +) +from nemo.collections.multimodal.losses.clip_loss import ClipLoss +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_linear_layer, + get_params_for_weight_decay_optimization, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, +) +from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + +try: + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class CLIPVisionTransformer(MegatronModule): + """Vision Transformer Model.""" + + def __init__(self, model_cfg, model_parallel_config, pre_process=True, post_process=True, skip_head=False): + super(CLIPVisionTransformer, self).__init__() + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + + self.config = model_parallel_config + self.hidden_size = model_cfg.hidden_size + self.global_average_pool = model_cfg.global_average_pool + self.pre_process = pre_process + self.post_process = post_process + self.skip_head = skip_head + + if model_cfg.get("class_token_length") is None or model_cfg.get("class_token_length") <= 0: + class_token = False + else: + class_token = True + self.backbone = VitBackbone( + model_cfg, + model_parallel_config, + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + class_token=class_token, + single_token_output=False, + ) + + if self.post_process and not skip_head: + self.output_dim = model_cfg.output_dim + self.head = torch.nn.Linear(self.hidden_size, self.output_dim, bias=False,) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.backbone.set_input_tensor(input_tensor) + + def forward(self, input): + hidden_states = self.backbone(input) + + if self.post_process and not self.skip_head: + if self.global_average_pool: + hidden_states = hidden_states.mean(dim=1) + else: + hidden_states = hidden_states[:, 0] + hidden_states = self.head(hidden_states) + # print("vision_head", hidden_states.shape) + return hidden_states + + +class CLIPTextTransformer(MegatronModule): + """Text Transformer Model.""" + + def __init__(self, model_cfg, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True): + super(CLIPTextTransformer, self).__init__() + + self.config = model_parallel_config + self.pre_process = pre_process + self.post_process = post_process + self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy + self.sequence_parallel = model_cfg.sequence_parallel + self.gradient_accumulation_fusion = model_cfg.gradient_accumulation_fusion + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + self.language_model, self._language_model_key = get_language_model( + config=model_parallel_config, + vocab_size=padded_vocab_size, + hidden_size=model_cfg.hidden_size, + hidden_dropout=model_cfg.hidden_dropout, + attention_dropout=model_cfg.attention_dropout, + num_tokentypes=0, + max_position_embeddings=model_cfg.max_position_embeddings, + num_layers=model_cfg.num_layers, + num_attention_heads=model_cfg.num_attention_heads, + apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, + kv_channels=model_cfg.kv_channels, + ffn_hidden_size=model_cfg.ffn_hidden_size, + add_pooler=False, + encoder_attn_mask_type=AttnMaskType.causal, + position_embedding_type=model_cfg.get("position_embedding_type", "learned_absolute"), + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + init_method_std=model_cfg.init_method_std, + precision=model_cfg.precision, + fp32_residual_connection=model_cfg.fp32_residual_connection, + activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, + activations_checkpoint_method=model_cfg.activations_checkpoint_method, + activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, + activations_checkpoint_layers_per_pipeline=model_cfg.activations_checkpoint_layers_per_pipeline, + normalization=model_cfg.normalization, + layernorm_epsilon=model_cfg.layernorm_epsilon, + bias_activation_fusion=model_cfg.bias_activation_fusion, + bias_dropout_add_fusion=model_cfg.bias_dropout_add_fusion, + masked_softmax_fusion=model_cfg.masked_softmax_fusion, + persist_layer_norm=model_cfg.persist_layer_norm, + openai_gelu=model_cfg.openai_gelu, + onnx_safe=model_cfg.onnx_safe, + megatron_legacy=model_cfg.megatron_legacy, + transformer_engine=model_cfg.transformer_engine, + fp8=model_cfg.fp8, + fp8_e4m3=model_cfg.fp8_e4m3, + fp8_hybrid=model_cfg.fp8_hybrid, + fp8_margin=model_cfg.fp8_margin, + fp8_interval=model_cfg.fp8_interval, + fp8_amax_history_len=model_cfg.fp8_amax_history_len, + fp8_amax_compute_algo=model_cfg.fp8_amax_compute_algo, + reduce_amax=model_cfg.get('reduce_amax', True), + use_emha=model_cfg.use_emha, + activation=model_cfg.get('activation', 'gelu'), + use_flash_attention=model_cfg.get('flash_attention', False), + ) + + self.initialize_word_embeddings( + init_method=init_method_normal(model_cfg.init_method_std), + vocab_size=padded_vocab_size, + hidden_size=model_cfg.hidden_size, + ) + + # TODO (yuya): check this position id + self.position_ids = None + if self.pre_process: + self.position_ids = torch.arange(model_cfg.max_position_embeddings).expand(1, -1).cuda() + + if self.post_process: + self.output_dim = model_cfg.output_dim + self.head = torch.nn.Linear(model_cfg.hidden_size, self.output_dim, bias=False,) + + self.attn_mask = self.build_attention_mask(model_cfg.max_position_embeddings) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.language_model.set_input_tensor(input_tensor) + + def build_attention_mask(self, max_position_embeddings): + # lazily create causal attention mask, with full attention between the tokens + mask = torch.empty(max_position_embeddings, max_position_embeddings, dtype=bool, device='cuda') + mask.fill_(True) + mask.triu_(1) # zero out the lower diagonal + mask = mask.reshape(1, 1, max_position_embeddings, max_position_embeddings) + return mask + + def forward( + self, input_ids, + ): + # input_ids: [b, s] + # position_ids: [b, s] + # attention_mask: [1, 1, s, s] + + hidden_states = self.language_model( + input_ids, + self.position_ids, + self.attn_mask, + token_type_ids=None, + layer_past=None, + get_key_value=False, + encoder_input=None, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + checkpoint_activations_all_layers=None, + ) + + if self.post_process: + # shape = [seq, bsz, hidden] + # take features from the eot embedding (eot_token is the highest number in each sequence) + hidden_states = hidden_states[input_ids.argmax(dim=-1), torch.arange(hidden_states.shape[1])] + return self.head(hidden_states) + + return hidden_states + + +class CLIPModel(MegatronModule): + """CLIP Model""" + + def __init__(self, model_cfg, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True): + super(CLIPModel, self).__init__() + + self.config = model_parallel_config + self.pre_process = pre_process + self.post_process = post_process + self.vision_encoder = CLIPVisionTransformer( + model_cfg.vision, model_parallel_config, pre_process=self.pre_process, post_process=self.post_process, + ) + self.text_encoder = CLIPTextTransformer( + model_cfg.text, + model_parallel_config, + padded_vocab_size, + pre_process=self.pre_process, + post_process=self.post_process, + ) + + self.logit_scale = torch.nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # TODO (yuya): fix this + pass + + def forward(self, images, captions): + image_features = self.vision_encoder(images) + text_features = self.text_encoder(captions) + + if self.post_process: + return F.normalize(image_features, dim=-1), F.normalize(text_features, dim=-1), self.logit_scale.exp() + + return image_features, text_features + + +class MegatronCLIPModel(MegatronBaseModel): + """Megatron CLIP Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + self.imagenet_val = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') + + # build_model returns a list of modules which are used for interleaved pipeline parallelism + if isinstance(self.trainer.accelerator, CPUAccelerator): + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=True, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + else: + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + + # if we're not using interleaved, then self.model is a module. + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: + self.model = self.model[0] + + if self.megatron_amp_O2: + + if not self.with_distributed_adam: + # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type + if isinstance(self.model, list): + for module in self.model: + module.cuda(torch.cuda.current_device()) + else: + self.model.cuda(torch.cuda.current_device()) + + # Model wrapper to convert both model and inputs to half precision + # TODO (yuya): check this; FP16 Module might not work; when self.model is a list? + if isinstance(self.model, list): + converted_model = [] + for module in self.model: + converted_model.append( + Float16Module(config=self.model_parallel_config, module=module, precision=cfg.precision) + ) + self.model = converted_model + else: + self.model = Float16Module( + config=self.model_parallel_config, module=self.model, precision=cfg.precision + ) + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + self.enable_autocast = ( + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + + self.transformer_engine = cfg.get('transformer_engine', False) + + # Convert the global-batch-based profile index to micro-batch index + if hasattr(self, '_nsys_profile_enabled'): + mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) + data_parallel_world_size = trainer.world_size // mp_size + grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) + self._nsys_profile_start_step *= grad_accum_steps + self._nsys_profile_end_step *= grad_accum_steps + self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + model = CLIPModel( + model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + padded_vocab_size=self.padded_vocab_size, + pre_process=pre_process, + post_process=post_process, + ) + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.get('do_layer_norm_weight_decay', False): + if isinstance(self.model, list): + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) + else: + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) + + else: + self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) + + def configure_optimizers(self): + + if self.with_distributed_adam: + + # Disable overlapped grad sync for embedding grad when + # pipeline parallelism is enabled + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[0] # only the first virtual rank has the embeddings + else: + module = self.model + # TODO (yuya): text transformer's embedding needs to be taken care of when PP>1 + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[-1] # only the last virtual rank has the embeddings + else: + module = self.model + # if module.share_token_embeddings: + # param = module.word_embeddings_weight() + # param._disable_greedy_grad_copy = not self.megatron_amp_O2 + # param._disable_overlap_grad_sync = True + + # Disable overlapped grad sync for layer norm grads when + # sequence parallelism is enabled + for param in self.parameters(): + if getattr(param, 'sequence_parallel', False): + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True + + # Initialize parameter buckets for overlapped grad and param syncs + # Note: Params with disabled overlapping are put in the + # last param bucket + buckets = [] + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + # Initialize a bucket for each virtual pipeline stage + for module in self.model: + if isinstance(module, Float16Module): + module = module.module + stage_bucket = [] + for layer in itertools.chain( + module.vision_encoder.backbone.transformer.layers, + module.text_encoder.language_model.encoder.layers, + ): + stage_bucket.extend( + p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + buckets.append(stage_bucket) + else: + # Initialize a bucket for each Transformer layer + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + for layer in itertools.chain( + module.vision_encoder.backbone.transformer.layers, + module.text_encoder.language_model.encoder.layers, + ): + buckets.append( + [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] + ) + buckets.reverse() + used_params = set() + for bucket in buckets: + used_params.update(bucket) + buckets[-1].extend(p for p in self.parameters() if p not in used_params) + self.distributed_adam_buckets = buckets + + return super().configure_optimizers() + + def forward(self, image, text): + output_tensor = self.model(image, text) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + module.config.grad_sync_func = grad_sync_func + module.config.param_sync_func = param_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean + + def initialize_ub_func(self): + ub_cfgs = self.cfg.get('ub_tp_comm_overlap_cfg', None) + if ub_cfgs is None: + warnings.warn( + "Couldn't find TP config. Please check the path correctness. Initializing TP comm overlap with the default config." + ) + + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + + te_module.base.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + if self.with_distributed_adam: + # hack to enable overlapping param sync and forward compute + # note: the distributed optimizer monkey-patches each + # parameter's __getattribute__ function so that it can + # launch parameter all-gathers the first time the + # parameter is accessed after the optimizer step. However, + # PyTorch directly passes embedding parameters into a C++, + # bypassing this process. A quick-and-dirty hack is to + # manually interact with the parameter. + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + module = module.text_encoder.language_model + if hasattr(module, 'embedding'): + for param in module.embedding.parameters(): + param.data_ptr() + + loss_mean = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + elif self.megatron_amp_O2: + # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + # TODO (yuya): check if this is needed in text transformer when PP>1 + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + # # when using pipeline parallelism the first and last stage must keep embeddings in sync + # self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def allreduce_sequence_parallel_gradients(self): + """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. + Modified from megatron-lm: + https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 + """ + + grads = [] + if isinstance(self.model, list): + for module in self.model: + self._append_sequence_parallel_module_grads(module, grads) + else: + self._append_sequence_parallel_module_grads(self.model, grads) + + coalesced = torch._utils._flatten_dense_tensors(grads) + torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + def get_forward_output_and_loss_func(self): + loss_func = ClipLoss(local_loss=self.cfg.local_loss, gather_with_grad=self.cfg.gather_with_grad,) + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + images = batch["images"].cuda(non_blocking=True) + captions = batch["captions"].cuda(non_blocking=True) + else: + # GPT3 uses only causal mask, which doesn't need attention mask + if parallel_state.is_pipeline_first_stage(): + # Fist pipeline stage needs only the tokens and position_ids + images = batch["images"].cuda(non_blocking=True) + captions = batch["captions"].cuda(non_blocking=True) + else: + # Intermediate / Last pipeline stage doesn't need any inputs + images, captions = None, None + + output_tensor = model(images, captions) + return output_tensor, loss_func + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def zero_shot_classifier(self): + if self.cfg.get("megatron_amp_O2", False): + text_encoder = self.model.module.text_encoder + else: + text_encoder = self.model.text_encoder + + with torch.no_grad(): + zeroshot_weights = [] + for texts in self.imagenet_val["texts"]: + texts = texts.cuda(non_blocking=True) + # TODO (yuya): distributed not working + with torch.cuda.amp.autocast( + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + ): + class_embeddings = text_encoder(texts) + class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) + class_embedding /= class_embedding.norm() + zeroshot_weights.append(class_embedding) + zeroshot_weights = torch.stack(zeroshot_weights, dim=1) + return zeroshot_weights + + def zero_shot_eval(self): + def accuracy(output, target, topk=(1,)): + pred = output.topk(max(topk), 1, True, True)[1].t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk] + + logging.info('Starting zero-shot imagenet.') + + logging.info('Building zero-shot classifier') + classifier = self.zero_shot_classifier() + + logging.info('Using classifier') + + if self.cfg.get("megatron_amp_O2", False): + vision_encoder = self.model.module.vision_encoder + else: + vision_encoder = self.model.vision_encoder + with torch.no_grad(): + top1, top5, n = 0.0, 0.0, 0.0 + for images, target in tqdm(self.imagenet_val["images"], desc="Imagenet Zero-shot Evaluation", leave=False): + if images is None or target is None: + continue + + images = images.cuda(non_blocking=True).to(self.autocast_dtype) + target = target.cuda(non_blocking=True) + # predict + with torch.cuda.amp.autocast( + enabled=self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + ): + image_features = vision_encoder(images) + image_features = F.normalize(image_features, dim=-1) + logits = 100.0 * image_features @ classifier + + # measure accuracy + acc1, acc5 = accuracy(logits, target, topk=(1, 5)) + top1 += acc1 + top5 += acc5 + n += images.size(0) + + logging.info('Finished zero-shot imagenet.') + top1 = top1 / n + top5 = top5 / n + return top1, top5 + + def validation_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + + loss = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + self.validation_step_outputs.append(loss) + + return loss + + def on_validation_epoch_end(self): + # TODO (yuya): need fix later, check with Sean + if not self.validation_step_outputs: + return + + # Run zero shot imagenet evaluation + if self.imagenet_val is not None: + imagenet_metric = torch.zeros(2).cuda() + imagenet_metric[0], imagenet_metric[1] = self.zero_shot_eval() + imagenet_metric = average_losses_across_data_parallel_group(imagenet_metric) + self.log('imagenet_top1', imagenet_metric[0], prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('imagenet_top5', imagenet_metric[1], prog_bar=True, rank_zero_only=True, batch_size=1) + + if parallel_state.is_pipeline_last_stage(): + averaged_metrics = torch.tensor( + [torch.stack(self.validation_step_outputs).mean()], dtype=torch.float32, device='cuda' + ) + else: + averaged_metrics = torch.tensor([0.0], dtype=torch.float32, device='cuda') + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + averaged_loss = averaged_metrics + + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.validation_step_outputs.clear() # free memory + + return averaged_loss + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for CLIP...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), tokenizer=self.tokenizer, + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for CLIP.') + + return self._train_ds, self._validation_ds, self._test_ds + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last virtual stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) + # num_parameters_on_device -= num_word_embedding_parameters + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) + # + # num_parameters_on_device -= num_word_embedding_parameters + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda() + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + if self.cfg.data.get("imagenet_val") is not None: + self.imagenet_val = build_imagenet_validation_dataloader(self.cfg, self.tokenizer) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + # module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + # self.model.sync_initial_word_embeddings() + pass + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=cfg.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=cfg.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, + ) + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + raise NotImplementedError + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None + + def on_save_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def on_load_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() diff --git a/nemo/collections/multimodal/models/content_filter/__init__.py b/nemo/collections/multimodal/models/content_filter/__init__.py new file mode 100644 index 000000000000..9ff638194e7a --- /dev/null +++ b/nemo/collections/multimodal/models/content_filter/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py new file mode 100644 index 000000000000..d49e2bfafe6b --- /dev/null +++ b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py @@ -0,0 +1,398 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import itertools +from typing import List, Optional, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from apex.transformer.pipeline_parallel.utils import get_num_microbatches +from megatron.core import parallel_state +from megatron.core.pipeline_parallel.schedules import get_forward_backward_func +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.data.clip.clip_dataset import tokenize +from nemo.collections.multimodal.data.nsfw.nsfw_dataset import build_dataset +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPTextTransformer, CLIPVisionTransformer +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + + +class ContentFilteringModel(MegatronModule): + """Clip based content filtering model for NSFW.""" + + def __init__(self, model_cfg: DictConfig, model_parallel_config, padded_vocab_size: int, tokenizer: Optional): + super(ContentFilteringModel, self).__init__() + self.cfg = model_cfg + self.config = model_parallel_config + self.tokenizer = tokenizer + + self.concept_list = self._load_concept_list(model_cfg.concepts) + self.concept_count = len(self.concept_list) + + self.vision_encoder = CLIPVisionTransformer( + model_cfg.vision, model_parallel_config, pre_process=True, post_process=True + ) + + if "text" in model_cfg and model_cfg.text is not None: + self.text_encoder = CLIPTextTransformer( + model_cfg.text, model_parallel_config, padded_vocab_size, pre_process=True, post_process=True + ) + else: + self.text_encoder = None + + self.mlp_similarity_model = nn.Sequential( + nn.Linear(model_cfg.output_dim * 2, model_cfg.sim_hidden_dim), + nn.ReLU(), + nn.Linear(model_cfg.sim_hidden_dim, 1), + ) + + self.nn_classifier = nn.Sequential( + nn.Linear(self.concept_count * 2 + model_cfg.output_dim, model_cfg.cls_hidden_dim), + nn.ReLU(), + nn.Linear(model_cfg.cls_hidden_dim, 1), + ) + + self.register_buffer("concepts", torch.zeros(self.concept_count, model_cfg.output_dim)) + + def initialize_concept_embeddings(self, concepts: torch.Tensor): + if self.text_encoder is None: + return + + self.concepts.copy_(concepts.detach()) + del self.text_encoder + self.text_encoder = None + + def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: + """Perform model forward pass for given image and factor. + While inferencing, factors should be equal to default value + """ + + with torch.no_grad(): + embedding = self.vision_encoder(image).detach() + cos_similarity = self.cosine_similarity(embedding, self.concepts) + mlp_similarity = self.mlp_similarity(embedding, self.concepts) + + features = torch.cat([cos_similarity, mlp_similarity * mlp_factor, embedding * emb_factor], dim=-1) + + return self.nn_classifier(features) + + def cosine_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + """Compute cosine similarity between prediction tensor and target tensor + Args: + prediction: Tensor of shape [X, H] for prediction embedding + target: Tensor of shape [Y, H] for target to compare + Returns: + Similarity matrix of shape [X, Y] and value range [-1, 1] + """ + normalized_prediction = F.normalize(prediction) + normalized_target = F.normalize(target) + + return torch.matmul(normalized_prediction, normalized_target.t()) + + def mlp_similarity(self, prediction: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + """Compute mlp based similarity between prediction tensor and target tensor + Args: + prediction: Tensor of shape [X, H] for prediction embedding + target: Tensor of shape [Y, H] for target to compare + Returns: + Similarity matrix of shape [X, Y] and value range [-1, 1] + """ + + prediction, target = torch.broadcast_tensors(prediction.unsqueeze(1), target.unsqueeze(0)) + + combined = torch.cat([prediction, target], dim=-1) + + return torch.tanh(self.mlp_similarity_model(combined).squeeze(-1)) + + def set_input_tensor(self, input_tensor: torch.Tensor): + pass + + def _load_concept_list(self, config: Union[str, List[str]]) -> List[str]: + if isinstance(config, str): + config = [config] + + result_list = [] + for concept_file in config: + with open(concept_file, "r") as f: + result_list += [x.strip() for x in f.readlines() if x.strip() != ""] + + return result_list + + +def _get_autocast_dtype(precision: str): + if precision in ["bf16", "bf16-mixed"]: + return torch.bfloat16 + if precision in [32, "32", "32-true"]: + return torch.float + if precision in [16, "16", "16-mixed"]: + return torch.half + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + +class MegatronContentFilteringModel(MegatronBaseModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + super(MegatronContentFilteringModel, self).__init__(cfg, trainer) + + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=isinstance(self.trainer.accelerator, CPUAccelerator), + virtual_pipeline_model_parallel_size=None, + ) + self.model = self.model[0] + + self.megatron_amp_O2 = cfg.get("megatron_amp_O2", False) + if self.megatron_amp_O2: + if isinstance(self.model, list): + self.model = [ + Float16Module(config=self.model_parallel_config, module=x, precision=cfg.precision) + for x in self.model + ] + else: + self.model = Float16Module( + config=self.model_parallel_config, module=self.model, precision=cfg.precision + ) + + self.autocast_dtype = _get_autocast_dtype(self.trainer.precision) + self.enable_autocast = (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) + + self.init_consumed_samples = 0 + self.mlp_factor = 1.0 + self.emb_factor = 1.0 + + self.validation_metrics = None + + def get_module_list(self): + if isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process, post_process): + return ContentFilteringModel(self.cfg, self.model_parallel_config, self.padded_vocab_size, self.tokenizer) + + def forward(self, image: torch.Tensor, mlp_factor: float = 1.0, emb_factor: float = 1.0) -> torch.Tensor: + return self.model(image, mlp_factor, emb_factor) + + def get_forward_output_and_loss_func(self, with_accuracy: bool = False): + def loss_fn(prediction: torch.Tensor, target: torch.Tensor): + loss = F.binary_cross_entropy_with_logits(prediction, target) + out_dict = {"loss": loss} + + if with_accuracy: + accuracy_components = torch.stack( + [ + ((prediction > 0) & (target == 1.0)).sum(), # tp + ((prediction < 0) & (target == 0.0)).sum(), # tn + ((prediction > 0) & (target == 0.0)).sum(), # fp + ((prediction < 0) & (target == 1.0)).sum(), # fn + ] + ) + out_dict["accuracy"] = accuracy_components + + return loss, out_dict + + def forward_step(dataloader_iter, model): + images, labels = next(dataloader_iter) + + if ( + parallel_state.get_pipeline_model_parallel_world_size() == 1 + or parallel_state.is_pipeline_first_stage() + ): + images = images.cuda(non_blocking=True) + labels = labels.cuda(non_blocking=True) + else: + images, labels = None, None + + classification = model(images, mlp_factor=self.mlp_factor, emb_factor=self.emb_factor) + + return classification.squeeze(-1), functools.partial(loss_fn, target=labels.float()) + + return forward_step + + def get_forward_embedding_func(self): + def forward_step(dataloader_iter, model): + concepts = next(dataloader_iter) + concepts = tokenize(concepts, self.tokenizer, self.cfg.text.max_position_embeddings) + return (model.text_encoder(concepts.cuda(non_blocking=True)), lambda x: (0.0, {"concepts": x})) + + return forward_step + + def fwd_bwd_step(self, dataloader_iter, batch_idx: int, forward_only: bool): + fwd_bwd_function = get_forward_backward_func() + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(with_accuracy=forward_only), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + metrics = None + if losses_reduced_per_micro_batch: + loss_mean = torch.stack([l["loss"] for l in losses_reduced_per_micro_batch]).mean() + if forward_only: + metrics = torch.stack([l["accuracy"] for l in losses_reduced_per_micro_batch]).sum(dim=0) + else: + loss_mean = 0.0 + + return loss_mean, metrics + + def training_step(self, dataloader_iter, batch_idx): + self._optimizer.zero_grad() + + loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=False) + + if self.megatron_amp_O2: + self._optimizer.allreduce_main_grads() + else: + self.allreduce_gradients() + + torch.distributed.broadcast(loss_mean, get_last_rank()) + if self.cfg.precision == 16: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log("loss_scale", loss_scale, batch_size=1, prog_bar=True) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1, prog_bar=True) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + + return loss_mean + + def validation_step(self, dataloader_iter, batch_idx): + loss, metrics = self.fwd_bwd_step(dataloader_iter, batch_idx, forward_only=True) + if self.validation_metrics is None: + self.validation_metrics = metrics + else: + self.validation_metrics += metrics + + self.validation_step_outputs.append(loss) + return loss + + def on_validation_epoch_end(self): + torch.distributed.all_reduce(self.validation_metrics, op=torch.distributed.ReduceOp.SUM) + accuracy = (self.validation_metrics[0] + self.validation_metrics[1]) / self.validation_metrics.sum() + self.validation_metrics = None + + averaged_metrics = 0 + if parallel_state.is_pipeline_last_stage(): + averaged_metrics = torch.stack(self.validation_step_outputs).mean() + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + self.log("val_loss", averaged_metrics, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log("accuracy", accuracy, prog_bar=True, rank_zero_only=True, batch_size=1) + + logging.info(f"Current evaluation accuracy: {accuracy}") + + return averaged_metrics + + def test_step(self, dataloader_iter, batch_idx): + return self.validation_step(dataloader_iter, batch_idx) + + def backward(self, *args, **kwargs): + pass + + def optimizer_zero_grad(self, *args, **kwargs): + pass + + def on_fit_start(self): + if self.model.text_encoder is not None: + fwd_bwd_function = get_forward_backward_func() + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_embedding_func(), + data_iterator=iter([self.model.concept_list]), + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=True, + seq_length=None, + micro_batch_size=self.model.concept_count, + ) + + concepts = torch.cat([x["concepts"] for x in losses_reduced_per_micro_batch], dim=0) + self.model.initialize_concept_embeddings(concepts) + self._cfg["text"] = None + + def setup(self, stage): + resume_checkpoint_path = self.trainer.ckpt_path + self.init_consumed_samples = ( + self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) if resume_checkpoint_path else 0 + ) + self.setup_training_data(self.cfg) + self.setup_validation_data(self.cfg) + + def setup_training_data(self, cfg: DictConfig) -> None: + logging.info("Setting up training dataset.") + train_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=True) + + sampler = torch.utils.data.distributed.DistributedSampler( + train_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True + ) + + self._train_dl = torch.utils.data.DataLoader( + train_ds, + sampler=sampler, + batch_size=cfg.micro_batch_size, + num_workers=cfg.data.num_workers, + pin_memory=True, + drop_last=cfg.data.train.get("drop_last", True), + persistent_workers=True, + ) + + def setup_validation_data(self, cfg: DictConfig) -> None: + logging.info("Setting up validation dataset.") + val_ds = build_dataset(cfg, self.compute_consumed_samples(0), is_train=False) + + sampler = torch.utils.data.distributed.DistributedSampler( + val_ds, num_replicas=self.trainer.world_size, rank=self.trainer.global_rank, shuffle=True + ) + + self._validation_dl = torch.utils.data.DataLoader( + val_ds, + sampler=sampler, + batch_size=cfg.micro_batch_size, + num_workers=cfg.data.num_workers, + pin_memory=True, + drop_last=cfg.data.validation.get("drop_last", True), + persistent_workers=True, + ) + + def parameters(self): + return itertools.chain(self.model.mlp_similarity_model.parameters(), self.model.nn_classifier.parameters()) + + def on_load_checkpoint(self, checkpoint) -> None: + if "model.concepts" in checkpoint["state_dict"]: + self.model.text_encoder = None + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None diff --git a/nemo/collections/multimodal/models/controlnet/__init__.py b/nemo/collections/multimodal/models/controlnet/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/controlnet/controlnet.py b/nemo/collections/multimodal/models/controlnet/controlnet.py new file mode 100644 index 000000000000..0b0c7b291d69 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/controlnet.py @@ -0,0 +1,1002 @@ +from typing import Any, Dict, Optional, Union + +import einops +import torch +import torch.nn as nn +from einops import rearrange, repeat +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.utilities.rank_zero import rank_zero_only +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torchvision.utils import make_grid + +from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel import ( + AttentionBlock, + Downsample, + ResBlock, + TimestepEmbedSequential, + UNetModel, +) +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + conv_nd, + linear, + timestep_embedding, + zero_module, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import exists, log_txt_as_img +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.module import Float16Module +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.utils import logging + +try: + from apex import amp + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class ControlledUnetModel(UNetModel): + def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs): + hs = [] + with torch.no_grad(): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + h = x.type(emb.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + + if control is not None: + h += control.pop() + + for i, module in enumerate(self.output_blocks): + if only_mid_control or control is None: + h = torch.cat([h, hs.pop()], dim=1) + else: + h = torch.cat([h, hs.pop() + control.pop()], dim=1) + h = module(h, emb, context) + + h = h.type(x.dtype) + return self.out(h) + + +class ControlLDM(LatentDiffusion): + def __init__(self, cfg, model_parallel_config): + super().__init__(cfg=cfg, model_parallel_config=model_parallel_config) + self.control_model = ControlLDM.from_config_dict(cfg.control_stage_config) + self.control_key = cfg.control_key + self.only_mid_control = cfg.only_mid_control + self.control_scales = [1.0] * 13 + self.sd_locked = cfg.sd_locked + self.channels_last = cfg.channels_last + + if cfg.get("inductor", False): + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = cfg.get("inductor_cudagraphs", False) + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False + self.control_model = torch.compile(self.control_model) + + if self.channels_last: + self.control_model = self.control_model.to(memory_format=torch.channels_last) + + @torch.no_grad() + def get_input(self, batch, k, bs=None, *args, **kwargs): + x, c = super().get_input(batch, self.first_stage_key, *args, **kwargs) + control = batch[self.control_key] + if bs is not None: + control = control[:bs] + control = control.to(torch.cuda.current_device()) + if self.channels_last: + control = control.permute(0, 3, 1, 2).to(non_blocking=True) + else: + control = einops.rearrange(control, 'b h w c -> b c h w') + control = control.to(memory_format=torch.contiguous_format).float() + return x, dict(c_crossattn=c, c_concat=control) + + def apply_model(self, x_noisy, t, cond, *args, **kwargs): + assert isinstance(cond, dict) + diffusion_model = self.model.diffusion_model + + # cond_txt = torch.cat(cond['c_crossattn'], 1) ## Has removed this first dim in the get_input function, same for below hint input + cond_txt = cond['c_crossattn'] + + if cond['c_concat'] is None: + eps = diffusion_model( + x=x_noisy, timesteps=t, context=cond_txt, control=None, only_mid_control=self.only_mid_control + ) + else: + control = self.control_model(x=x_noisy, hint=cond['c_concat'], timesteps=t, context=cond_txt) + control = [c * scale for c, scale in zip(control, self.control_scales)] + eps = diffusion_model( + x=x_noisy, timesteps=t, context=cond_txt, control=control, only_mid_control=self.only_mid_control + ) + return eps + + @torch.no_grad() + def get_unconditional_conditioning(self, N): + return self.get_learned_conditioning([""] * N) + + @torch.no_grad() + def log_images( + self, + batch, + N=4, + n_row=2, + sample=False, + ddim_steps=50, + ddim_eta=0.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=False, + unconditional_guidance_scale=9.0, + unconditional_guidance_label=None, + use_ema_scope=True, + **kwargs, + ): + use_ddim = ddim_steps is not None + + log = dict() + batch = next(batch) + batch['images'] = batch['images'].to(torch.cuda.current_device()) + batch['hint'] = batch['hint'].to(torch.cuda.current_device()) + N = batch['images'].shape[0] + z, c = self.get_input(batch, self.first_stage_key, bs=N) + c_cat, c = c["c_concat"][:N], c["c_crossattn"][:N] + N = min(z.shape[0], N) + n_row = min(z.shape[0], n_row) + log["reconstruction"] = self.decode_first_stage(z) + log["control"] = c_cat * 2.0 - 1.0 + log["conditioning"] = log_txt_as_img((512, 512), batch[self.cond_stage_key], size=16) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(z_start) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + samples, z_denoise_row = self.sample_log( + cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + ) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if unconditional_guidance_scale > 1.0: + uc_cross = self.get_unconditional_conditioning(N) + uc_cat = c_cat # torch.zeros_like(c_cat) + uc_full = {"c_concat": uc_cat, "c_crossattn": uc_cross} + samples_cfg, _ = self.sample_log( + cond={"c_concat": c_cat, "c_crossattn": c}, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=uc_full, + ) + x_samples_cfg = self.decode_first_stage(samples_cfg) + log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg + + return log + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + ddim_sampler = DDIMSampler(self) + c, h, w = cond["c_concat"][0].shape + shape = (self.channels, h // 8, w // 8) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) + return samples, intermediates + + def parameters(self): + params = list(self.control_model.parameters()) + if not self.sd_locked: + params += list(self.model.diffusion_model.output_blocks.parameters()) + params += list(self.model.diffusion_model.out.parameters()) + return params + + def low_vram_shift(self, is_diffusing): + if is_diffusing: + self.model = self.model.cuda() + self.control_model = self.control_model.cuda() + self.first_stage_model = self.first_stage_model.cpu() + self.cond_stage_model = self.cond_stage_model.cpu() + else: + self.model = self.model.cpu() + self.control_model = self.control_model.cpu() + self.first_stage_model = self.first_stage_model.cuda() + self.cond_stage_model = self.cond_stage_model.cuda() + + +class ControlNet(nn.Module): + def __init__( + self, + image_size, + in_channels, + model_channels, + hint_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, ###TODO MMY these are new + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + use_flash_attention=False, + from_pretrained_unet=None, + from_NeMo=True, + ): + super().__init__() + if use_spatial_transformer: + assert ( + context_dim is not None + ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert ( + use_spatial_transformer + ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + from omegaconf.listconfig import ListConfig + + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.dims = dims + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + if isinstance(num_res_blocks, int): + self.num_res_blocks = len(channel_mult) * [num_res_blocks] + else: + if len(num_res_blocks) != len(channel_mult): + raise ValueError( + "provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult" + ) + self.num_res_blocks = num_res_blocks + if disable_self_attentions is not None: + # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not + assert len(disable_self_attentions) == len(channel_mult) + if num_attention_blocks is not None: + assert len(num_attention_blocks) == len(self.num_res_blocks) + assert all( + map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks))) + ) + print( + f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " + f"This option has LESS priority than attention_resolutions {attention_resolutions}, " + f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, " + f"attention will still not be set." + ) + + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = torch.float16 if use_fp16 else torch.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] + ) + self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels)]) + + self.input_hint_block = TimestepEmbedSequential( + conv_nd(dims, hint_channels, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 16, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 16, 32, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 32, 32, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 32, 96, 3, padding=1, stride=2), + nn.SiLU(), + conv_nd(dims, 96, 96, 3, padding=1), + nn.SiLU(), + conv_nd(dims, 96, 256, 3, padding=1, stride=2), + nn.SiLU(), + zero_module(conv_nd(dims, 256, model_channels, 3, padding=1)), + ) + + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for nr in range(self.num_res_blocks[level]): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + disable_self_attn=disabled_sa, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self.zero_convs.append(self.make_zero_conv(ch)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ) + ch = out_ch + input_block_chans.append(ch) + self.zero_convs.append(self.make_zero_conv(ch)) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( # always uses a self-attn + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self.middle_block_out = self.make_zero_conv(ch) + self._feature_size += ch + + if from_pretrained_unet is not None: + self.load_from_unet(from_pretrained_unet=from_pretrained_unet, from_NeMo=from_NeMo) + + def load_from_unet(self, from_pretrained_unet, from_NeMo=True): + if not from_NeMo: + print('loading from other source of unet is experimental! Carefully check if keys are loaded correctly.') + else: + print("Loading unet blocks from sd") + + state_dict = torch.load(from_pretrained_unet, map_location='cpu') + state_dict = state_dict['state_dict'] + model_state_dict = self.state_dict() + + re_state_dict = {} + for key_, value_ in state_dict.items(): + if key_.startswith('model.model.diffusion_model'): + re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ + if key_.startswith('model.diffusion_model'): + re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ + if key_.startswith('model.model._orig_mod.diffusion_model'): + re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model._orig_mod.diffusion_model'): + re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ + + expected_keys = list(model_state_dict.keys()) + loaded_keys = list(re_state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + if ( + 'input_blocks.1.0.in_layers.2.weight' in loaded_keys + and 'input_blocks.1.0.in_layers.1.weight' in expected_keys + ): + # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + for key_ in missing_keys: + if key_.startswith('input_blocks') or key_.startswith('middle_block.'): + s = key_.split('.') + idx = int(s[-2]) + new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) + re_state_dict[key_] = re_state_dict[new_key_] + + loaded_keys = list(re_state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + self.load_state_dict(re_state_dict, strict=False) + + if len(missing_keys) > 42: + print( + 'warning: only input hint blocks and zero conv layers are randomly initialized. This message indicates some unet blocks are not loaded correctly.' + ) + print(f'There is {len(missing_keys)} total missing keys') + print("Missing:", missing_keys) + print("Unexpected:", unexpected_keys) + else: + print("sd blocks loaded successfully") + + # Check if unet blocks are loaded + # for key, value in self.state_dict().items(): + # if key in missing_keys: + # continue + # if torch.allclose(value, re_state_dict[key], atol = 1e-5): + # pass + # else: + # print(f"{key} not matching after loading") + + def make_zero_conv(self, channels): + return TimestepEmbedSequential(zero_module(conv_nd(self.dims, channels, channels, 1, padding=0))) + + def forward(self, x, hint, timesteps, context, **kwargs): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + guided_hint = self.input_hint_block(hint, emb, context) + outs = [] + + h = x.type(self.dtype) + for module, zero_conv in zip(self.input_blocks, self.zero_convs): + if guided_hint is not None: + h = module(h, emb, context) + h += guided_hint + guided_hint = None + else: + h = module(h, emb, context) + outs.append(zero_conv(h, emb, context)) + + h = self.middle_block(h, emb, context) + outs.append(self.middle_block_out(h, emb, context)) + + return outs + + +class MegatronControlNet(MegatronBaseModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + self.conditioning_keys = [] + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = ControlLDM(cfg=self.cfg, model_parallel_config=self.model_parallel_config) + return model + + def forward(self, x, c, *args, **kwargs): + output_tensor = self.model(x, c, *args, **kwargs) + return output_tensor + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: + assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) + self.model.on_train_batch_start(batch, batch_idx) + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None # Placeholder + + # handle asynchronous grad reduction + no_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision == [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) + if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): + # in the case of precached text embeddings, cond_stage is also a tensor + batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) + + # SD has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + ): + x, c = self.model.get_input(batch, self.cfg.first_stage_key) + + if not isinstance(c, dict): + return [x, c] + + if len(self.conditioning_keys) == 0: + self.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in self.conditioning_keys] + return [x, *c_list] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + if len(self.conditioning_keys) == 0: + x, c = batch + else: + x = batch[0] + c = {} + for idx, key in enumerate(self.conditioning_keys): + c[key] = batch[1 + idx] + loss, loss_dict = model(x, c) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + tensor_shape = None # Placeholder + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=True, + tensor_shape=None, # required by pipeline parallelism + dtype=self.autocast_dtype, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=True, + ) + # only the last stages of the pipeline return losses + val_loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + val_loss_dict[key] = loss_tensor.mean() + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Stable Diffusion...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + if self.cfg.first_stage_key.endswith("encoded"): + self._train_ds, self._validation_ds = build_train_valid_precached_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), + ) + else: + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, + ) + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def log_images(self, *args, **kwargs): + return self.model.log_images(*args, **kwargs) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE b/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE new file mode 100644 index 000000000000..c38dc639e6e2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/LICENSE @@ -0,0 +1,203 @@ +Copyright 2022 SenseTime X-Lab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 SenseTime X-Lab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py new file mode 100644 index 000000000000..a03ce9a4511d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/__init__.py @@ -0,0 +1,33 @@ +# Uniformer +# From https://github.com/Sense-X/UniFormer +# # Apache-2.0 license + +import os + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import ( + inference_segmentor, + init_segmentor, + show_result_pyplot, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core.evaluation import get_palette + +checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" + + +class UniformerDetector: + def __init__(self): + annotator_ckpts_path = '/opt/NeMo/nemo/collections/multimodal/models/controlnet/uniformer' + modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") + if not os.path.exists(modelpath): + from basicsr.utils.download_util import load_file_from_url + + load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) + config_file = os.path.join( + os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py" + ) + self.model = init_segmentor(config_file, modelpath).cuda() + + def __call__(self, img): + result = inference_segmentor(self.model, img) + res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1) + return res_img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py new file mode 100644 index 000000000000..868ea7214c35 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/ade20k.py @@ -0,0 +1,58 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py new file mode 100644 index 000000000000..4a234cc4de85 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline, + ), + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py new file mode 100644 index 000000000000..e44904a99a8d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,50 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline + ), + test=dict( + type=dataset_type, data_root=data_root, img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=test_pipeline + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 000000000000..f4a0def57ae7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,32 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict(train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py new file mode 100644 index 000000000000..51849ec17534 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/drive.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline, + ), + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py new file mode 100644 index 000000000000..ef920a7e9491 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/hrf.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline, + ), + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py new file mode 100644 index 000000000000..9b7a0d335b16 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 000000000000..8e757090c2a2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 000000000000..55d49f3b0156 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,61 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 000000000000..5dfc7c2e640a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,8 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=['ImageSets/Segmentation/train.txt', 'ImageSets/Segmentation/aug.txt'], + ) +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py new file mode 100644 index 000000000000..c2e6bbc32e0b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/datasets/stare.py @@ -0,0 +1,64 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline, + ), + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline, + ), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py new file mode 100644 index 000000000000..42ed60a779ae --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/default_runtime.py @@ -0,0 +1,15 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 000000000000..74d4d7851a59 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1,), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 000000000000..96ece2073821 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 000000000000..b949aa80e45e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py new file mode 100644 index 000000000000..19f45463bbb9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/cgnet.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + ), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, + 6.7415504, + 3.5354059, + 9.8663225, + 9.690899, + 9.369352, + 10.289121, + 9.953208, + 4.3097677, + 9.490387, + 7.674431, + 9.396905, + 10.347791, + 6.3927646, + 10.226669, + 10.241062, + 10.280587, + 10.396974, + 10.055647, + ], + ), + ), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 000000000000..758161a914a5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 000000000000..501b207c0de2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 000000000000..4f1a8536caf9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + ), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 000000000000..fbf847d8941d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 000000000000..42ab79c4ce82 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 000000000000..5e6656c49b78 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 000000000000..ff8a84c1c491 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 000000000000..c61fb7d77e35 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py new file mode 100644 index 000000000000..d6a4fb7205dc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fast_scnn.py @@ -0,0 +1,59 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False, + ), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py new file mode 100644 index 000000000000..0c20335075a9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,37 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), + stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), + stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) + ), + ), + ), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 000000000000..43364899324f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 000000000000..ebfd9879787a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + ), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py new file mode 100644 index 000000000000..a51398d3a5b0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_r50.py @@ -0,0 +1,34 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py new file mode 100644 index 000000000000..f81960a35c2d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/fpn_uniformer.py @@ -0,0 +1,32 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4.0, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + ), + neck=dict(type='FPN', in_channels=[64, 128, 320, 512], out_channels=256, num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 000000000000..c1d4477e0250 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4.0, + pooling_type='att', + fusion_types=('channel_add',), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 000000000000..2b2fa51f8d01 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,22 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict(type='MobileNetV3', arch='large', out_indices=(1, 3, 16), norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 000000000000..7477ac076da2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 000000000000..282f7d239eb5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict(num_modules=1, num_branches=1, block='BOTTLENECK', num_blocks=(4,), num_channels=(64,)), + stage2=dict(num_modules=1, num_branches=2, block='BASIC', num_blocks=(4, 4), num_channels=(18, 36)), + stage3=dict(num_modules=4, num_branches=3, block='BASIC', num_blocks=(4, 4, 4), num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, num_branches=4, block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144) + ), + ), + ), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 000000000000..a5dcc09b6750 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py new file mode 100644 index 000000000000..88ec38a37a5c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + neck=dict(type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + ], + # model training and testing settings + train_cfg=dict(num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict(mode='whole', subdivision_steps=2, subdivision_num_points=8196, scale_factor=2), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 000000000000..07aba72c3f7d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 000000000000..e6c2a5534fc0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 000000000000..7010b76bc4e0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + ), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py new file mode 100644 index 000000000000..bef6484ab3ae --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_r50.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True, + ), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py new file mode 100644 index 000000000000..df70f56cf9a0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/models/upernet_uniformer.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4.0, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + ), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 000000000000..52603890b10f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 000000000000..bf780a1b6f65 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 000000000000..cdbf841abcb2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 000000000000..c190cee6bdc7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py new file mode 100644 index 000000000000..3d17fe03c602 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/config.py @@ -0,0 +1,48 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False, + ), + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) + +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh new file mode 100644 index 000000000000..9fb22edfa7a3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh new file mode 100644 index 000000000000..d9a85e7a0d3b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/test.py ${work_path}/test_config_h32.py \ + ${work_path}/ckpt/latest.pth \ + --launcher pytorch \ + --eval mIoU \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py new file mode 100644 index 000000000000..3d17fe03c602 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_g.py @@ -0,0 +1,48 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False, + ), + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) + +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py new file mode 100644 index 000000000000..4a5923cb210c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_h32.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=True, + window_size=32, + ), + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) + +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py new file mode 100644 index 000000000000..4fde8ab1ebe4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/exp/upernet_global_small/test_config_w32.py @@ -0,0 +1,49 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py', +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=True, + hybrid=False, + window_size=32, + ), + decode_head=dict(in_channels=[64, 128, 320, 512], num_classes=150), + auxiliary_head=dict(in_channels=320, num_classes=150), +) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + } + ), +) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False, +) + +data = dict(samples_per_gpu=2) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py new file mode 100644 index 000000000000..210a29891383 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +from .arraymisc import * +from .fileio import * +from .image import * +from .utils import * +from .version import * +from .video import * +from .visualization import * + +# The following modules are not imported to this level, so mmcv may be used +# without PyTorch. +# - runner +# - parallel +# - op diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py new file mode 100644 index 000000000000..4b4700d6139a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .quantization import dequantize, quantize + +__all__ = ['quantize', 'dequantize'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py new file mode 100644 index 000000000000..87ba022c1ced --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/arraymisc/quantization.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + + +def quantize(arr, min_val, max_val, levels, dtype=np.int64): + """Quantize an array of (-inf, inf) to [0, levels-1]. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the quantized array. + + Returns: + tuple: Quantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError(f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + arr = np.clip(arr, min_val, max_val) - min_val + quantized_arr = np.minimum(np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + + return quantized_arr + + +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): + """Dequantize an array. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the dequantized array. + + Returns: + tuple: Dequantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError(f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError(f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - min_val) / levels + min_val + + return dequantized_arr diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py new file mode 100644 index 000000000000..f87bac5fafca --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/__init__.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .alexnet import AlexNet + +# yapf: disable +from .bricks import ( + ACTIVATION_LAYERS, + CONV_LAYERS, + NORM_LAYERS, + PADDING_LAYERS, + PLUGIN_LAYERS, + UPSAMPLE_LAYERS, + ContextBlock, + Conv2d, + Conv3d, + ConvAWS2d, + ConvModule, + ConvTranspose2d, + ConvTranspose3d, + ConvWS2d, + DepthwiseSeparableConvModule, + GeneralizedAttention, + HSigmoid, + HSwish, + Linear, + MaxPool2d, + MaxPool3d, + NonLocal1d, + NonLocal2d, + NonLocal3d, + Scale, + Swish, + build_activation_layer, + build_conv_layer, + build_norm_layer, + build_padding_layer, + build_plugin_layer, + build_upsample_layer, + conv_ws_2d, + is_norm, +) +from .builder import MODELS, build_model_from_cfg + +# yapf: enable +from .resnet import ResNet, make_res_layer +from .utils import ( + INITIALIZERS, + Caffe2XavierInit, + ConstantInit, + KaimingInit, + NormalInit, + PretrainedInit, + TruncNormalInit, + UniformInit, + XavierInit, + bias_init_with_prob, + caffe2_xavier_init, + constant_init, + fuse_conv_bn, + get_model_complexity_info, + initialize, + kaiming_init, + normal_init, + trunc_normal_init, + uniform_init, + xavier_init, +) +from .vgg import VGG, make_vgg_layer + +__all__ = [ + 'AlexNet', + 'VGG', + 'make_vgg_layer', + 'ResNet', + 'make_res_layer', + 'constant_init', + 'xavier_init', + 'normal_init', + 'trunc_normal_init', + 'uniform_init', + 'kaiming_init', + 'caffe2_xavier_init', + 'bias_init_with_prob', + 'ConvModule', + 'build_activation_layer', + 'build_conv_layer', + 'build_norm_layer', + 'build_padding_layer', + 'build_upsample_layer', + 'build_plugin_layer', + 'is_norm', + 'NonLocal1d', + 'NonLocal2d', + 'NonLocal3d', + 'ContextBlock', + 'HSigmoid', + 'Swish', + 'HSwish', + 'GeneralizedAttention', + 'ACTIVATION_LAYERS', + 'CONV_LAYERS', + 'NORM_LAYERS', + 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', + 'PLUGIN_LAYERS', + 'Scale', + 'get_model_complexity_info', + 'conv_ws_2d', + 'ConvAWS2d', + 'ConvWS2d', + 'fuse_conv_bn', + 'DepthwiseSeparableConvModule', + 'Linear', + 'Conv2d', + 'ConvTranspose2d', + 'MaxPool2d', + 'ConvTranspose3d', + 'MaxPool3d', + 'Conv3d', + 'initialize', + 'INITIALIZERS', + 'ConstantInit', + 'XavierInit', + 'NormalInit', + 'TruncNormalInit', + 'UniformInit', + 'KaimingInit', + 'PretrainedInit', + 'Caffe2XavierInit', + 'MODELS', + 'build_model_from_cfg', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py new file mode 100644 index 000000000000..e52d852bceaa --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/alexnet.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + + +class AlexNet(nn.Module): + """AlexNet backbone. + + Args: + num_classes (int): number of classes for classification. + """ + + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + # use default initializer + pass + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + + x = self.features(x) + if self.num_classes > 0: + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py new file mode 100644 index 000000000000..4405eb058c4c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/__init__.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .activation import build_activation_layer +from .context_block import ContextBlock +from .conv import build_conv_layer +from .conv2d_adaptive_padding import Conv2dAdaptivePadding +from .conv_module import ConvModule +from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d +from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .drop import Dropout, DropPath +from .generalized_attention import GeneralizedAttention +from .hsigmoid import HSigmoid +from .hswish import HSwish +from .non_local import NonLocal1d, NonLocal2d, NonLocal3d +from .norm import build_norm_layer, is_norm +from .padding import build_padding_layer +from .plugin import build_plugin_layer +from .registry import ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS +from .scale import Scale +from .swish import Swish +from .upsample import build_upsample_layer +from .wrappers import Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, Linear, MaxPool2d, MaxPool3d + +__all__ = [ + 'ConvModule', + 'build_activation_layer', + 'build_conv_layer', + 'build_norm_layer', + 'build_padding_layer', + 'build_upsample_layer', + 'build_plugin_layer', + 'is_norm', + 'HSigmoid', + 'HSwish', + 'NonLocal1d', + 'NonLocal2d', + 'NonLocal3d', + 'ContextBlock', + 'GeneralizedAttention', + 'ACTIVATION_LAYERS', + 'CONV_LAYERS', + 'NORM_LAYERS', + 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', + 'PLUGIN_LAYERS', + 'Scale', + 'ConvAWS2d', + 'ConvWS2d', + 'conv_ws_2d', + 'DepthwiseSeparableConvModule', + 'Swish', + 'Linear', + 'Conv2dAdaptivePadding', + 'Conv2d', + 'ConvTranspose2d', + 'MaxPool2d', + 'ConvTranspose3d', + 'MaxPool3d', + 'Conv3d', + 'Dropout', + 'DropPath', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py new file mode 100644 index 000000000000..74134627bd60 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/activation.py @@ -0,0 +1,93 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + TORCH_VERSION, + build_from_cfg, + digit_version, +) + +from .registry import ACTIVATION_LAYERS + +for module in [nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh]: + ACTIVATION_LAYERS.register_module(module=module) + + +@ACTIVATION_LAYERS.register_module(name='Clip') +@ACTIVATION_LAYERS.register_module() +class Clamp(nn.Module): + """Clamp activation layer. + + This activation function is to clamp the feature map value within + :math:`[min, max]`. More details can be found in ``torch.clamp()``. + + Args: + min (Number | optional): Lower-bound of the range to be clamped to. + Default to -1. + max (Number | optional): Upper-bound of the range to be clamped to. + Default to 1. + """ + + def __init__(self, min=-1.0, max=1.0): + super(Clamp, self).__init__() + self.min = min + self.max = max + + def forward(self, x): + """Forward function. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: Clamped tensor. + """ + return torch.clamp(x, min=self.min, max=self.max) + + +class GELU(nn.Module): + r"""Applies the Gaussian Error Linear Units function: + + .. math:: + \text{GELU}(x) = x * \Phi(x) + where :math:`\Phi(x)` is the Cumulative Distribution Function for + Gaussian Distribution. + + Shape: + - Input: :math:`(N, *)` where `*` means, any number of additional + dimensions + - Output: :math:`(N, *)`, same shape as the input + + .. image:: scripts/activation_images/GELU.png + + Examples:: + + >>> m = nn.GELU() + >>> input = torch.randn(2) + >>> output = m(input) + """ + + def forward(self, input): + return F.gelu(input) + + +if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4'): + ACTIVATION_LAYERS.register_module(module=GELU) +else: + ACTIVATION_LAYERS.register_module(module=nn.GELU) + + +def build_activation_layer(cfg): + """Build activation layer. + + Args: + cfg (dict): The activation layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + + Returns: + nn.Module: Created activation layer. + """ + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py new file mode 100644 index 000000000000..fd2a4b80ea18 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/context_block.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn + +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + + +def last_zero_init(m): + if isinstance(m, nn.Sequential): + constant_init(m[-1], val=0) + else: + constant_init(m, val=0) + + +@PLUGIN_LAYERS.register_module() +class ContextBlock(nn.Module): + """ContextBlock module in GCNet. + + See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + (https://arxiv.org/abs/1904.11492) for details. + + Args: + in_channels (int): Channels of the input feature map. + ratio (float): Ratio of channels of transform bottleneck + pooling_type (str): Pooling method for context modeling. + Options are 'att' and 'avg', stand for attention pooling and + average pooling respectively. Default: 'att'. + fusion_types (Sequence[str]): Fusion method for feature fusion, + Options are 'channels_add', 'channel_mul', stand for channelwise + addition and multiplication respectively. Default: ('channel_add',) + """ + + _abbr_ = 'context_block' + + def __init__(self, in_channels, ratio, pooling_type='att', fusion_types=('channel_add',)): + super(ContextBlock, self).__init__() + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + valid_fusion_types = ['channel_add', 'channel_mul'] + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + self.in_channels = in_channels + self.ratio = ratio + self.planes = int(in_channels * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1), + ) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1), + ) + else: + self.channel_mul_conv = None + self.reset_parameters() + + def reset_parameters(self): + if self.pooling_type == 'att': + kaiming_init(self.conv_mask, mode='fan_in') + self.conv_mask.inited = True + + if self.channel_add_conv is not None: + last_zero_init(self.channel_add_conv) + if self.channel_mul_conv is not None: + last_zero_init(self.channel_mul_conv) + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py new file mode 100644 index 000000000000..cf54491997a4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn + +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) + + +def build_conv_layer(cfg, *args, **kwargs): + """Build convolution layer. + + Args: + cfg (None or dict): The conv layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an conv layer. + args (argument list): Arguments passed to the `__init__` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the `__init__` + method of the corresponding conv layer. + + Returns: + nn.Module: Created conv layer. + """ + if cfg is None: + cfg_ = dict(type='Conv2d') + else: + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) + + layer = conv_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py new file mode 100644 index 000000000000..39f9c01dd794 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -0,0 +1,46 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from torch import nn +from torch.nn import functional as F + +from .registry import CONV_LAYERS + + +@CONV_LAYERS.register_module() +class Conv2dAdaptivePadding(nn.Conv2d): + """Implementation of 2D convolution in tensorflow with `padding` as "same", + which applies padding to input (if needed) so that input image gets fully + covered by filter and stride you specified. For stride 1, this will ensure + that output image size is same as input. For stride of 2, output dimensions + will be half, for example. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + + def forward(self, x): + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + stride_h, stride_w = self.stride + output_h = math.ceil(img_h / stride_h) + output_w = math.ceil(img_w / stride_w) + pad_h = max((output_h - 1) * self.stride[0] + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0) + pad_w = max((output_w - 1) * self.stride[1] + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py new file mode 100644 index 000000000000..3e9f76b3f9e5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_module.py @@ -0,0 +1,206 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm +from ..utils import constant_init, kaiming_init +from .activation import build_activation_layer +from .conv import build_conv_layer +from .norm import build_norm_layer +from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class ConvModule(nn.Module): + """A conv block that bundles conv/norm/activation layers. + + This block simplifies the usage of convolution layers, which are commonly + used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). + It is based upon three build methods: `build_conv_layer()`, + `build_norm_layer()` and `build_activation_layer()`. + + Besides, we add some additional features in this module. + 1. Automatically set `bias` of the conv layer. + 2. Spectral norm is supported. + 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only + supports zero and circular padding, and we add "reflect" padding mode. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. + groups (int): Number of blocked connections from input channels to + output channels. Same as that in ``nn._ConvNd``. + bias (bool | str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise + False. Default: "auto". + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + inplace (bool): Whether to use inplace mode for activation. + Default: True. + with_spectral_norm (bool): Whether use spectral norm in conv module. + Default: False. + padding_mode (str): If the `padding_mode` has not been supported by + current `Conv2d` in PyTorch, we will use our own padding layer + instead. Currently, we support ['zeros', 'circular'] with official + implementation and ['reflect'] with our own implementation. + Default: 'zeros'. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Default: ('conv', 'norm', 'act'). + """ + + _abbr_ = 'conv_block' + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act'), + ): + super(ConvModule, self).__init__() + assert conv_cfg is None or isinstance(conv_cfg, dict) + assert norm_cfg is None or isinstance(norm_cfg, dict) + assert act_cfg is None or isinstance(act_cfg, dict) + official_padding_mode = ['zeros', 'circular'] + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.inplace = inplace + self.with_spectral_norm = with_spectral_norm + self.with_explicit_padding = padding_mode not in official_padding_mode + self.order = order + assert isinstance(self.order, tuple) and len(self.order) == 3 + assert set(order) == set(['conv', 'norm', 'act']) + + self.with_norm = norm_cfg is not None + self.with_activation = act_cfg is not None + # if the conv layer is before a norm layer, bias is unnecessary. + if bias == 'auto': + bias = not self.with_norm + self.with_bias = bias + + if self.with_explicit_padding: + pad_cfg = dict(type=padding_mode) + self.padding_layer = build_padding_layer(pad_cfg, padding) + + # reset padding to 0 for conv module + conv_padding = 0 if self.with_explicit_padding else padding + # build convolution layer + self.conv = build_conv_layer( + conv_cfg, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=conv_padding, + dilation=dilation, + groups=groups, + bias=bias, + ) + # export the attributes of self.conv to a higher level for convenience + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_spectral_norm: + self.conv = nn.utils.spectral_norm(self.conv) + + # build normalization layers + if self.with_norm: + # norm layer is after conv layer + if order.index('norm') > order.index('conv'): + norm_channels = out_channels + else: + norm_channels = in_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + self.add_module(self.norm_name, norm) + if self.with_bias: + if isinstance(norm, (_BatchNorm, _InstanceNorm)): + warnings.warn('Unnecessary conv bias before batch/instance norm') + else: + self.norm_name = None + + # build activation layer + if self.with_activation: + act_cfg_ = act_cfg.copy() + # nn.Tanh has no 'inplace' argument + if act_cfg_['type'] not in ['Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish']: + act_cfg_.setdefault('inplace', inplace) + self.activate = build_activation_layer(act_cfg_) + + # Use msra init by default + self.init_weights() + + @property + def norm(self): + if self.norm_name: + return getattr(self, self.norm_name) + else: + return None + + def init_weights(self): + # 1. It is mainly for customized conv layers with their own + # initialization manners by calling their own ``init_weights()``, + # and we do not want ConvModule to override the initialization. + # 2. For customized conv layers without their own initialization + # manners (that is, they don't have their own ``init_weights()``) + # and PyTorch's conv layers, they will be initialized by + # this method with default ``kaiming_init``. + # Note: For PyTorch's conv layers, they will be overwritten by our + # initialization implementation using default ``kaiming_init``. + if not hasattr(self.conv, 'init_weights'): + if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': + nonlinearity = 'leaky_relu' + a = self.act_cfg.get('negative_slope', 0.01) + else: + nonlinearity = 'relu' + a = 0 + kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) + if self.with_norm: + constant_init(self.norm, 1, bias=0) + + def forward(self, x, activate=True, norm=True): + for layer in self.order: + if layer == 'conv': + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + elif layer == 'norm' and norm and self.with_norm: + x = self.norm(x) + elif layer == 'act' and activate and self.with_activation: + x = self.activate(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py new file mode 100644 index 000000000000..ecd8ed0db777 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/conv_ws.py @@ -0,0 +1,121 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .registry import CONV_LAYERS + + +def conv_ws_2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, eps=1e-5): + c_in = weight.size(0) + weight_flat = weight.view(c_in, -1) + mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) + std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) + weight = (weight - mean) / (std + eps) + return F.conv2d(input, weight, bias, stride, padding, dilation, groups) + + +@CONV_LAYERS.register_module('ConvWS') +class ConvWS2d(nn.Conv2d): + def __init__( + self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, eps=1e-5 + ): + super(ConvWS2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + ) + self.eps = eps + + def forward(self, x): + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.eps) + + +@CONV_LAYERS.register_module(name='ConvAWS') +class ConvAWS2d(nn.Conv2d): + """AWS (Adaptive Weight Standardization) + + This is a variant of Weight Standardization + (https://arxiv.org/pdf/1903.10520.pdf) + It is used in DetectoRS to avoid NaN + (https://arxiv.org/pdf/2006.02334.pdf) + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the conv kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If set True, adds a learnable bias to the + output. Default: True + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + ) + self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) + + def _get_weight(self, weight): + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + weight = (weight - mean) / std + weight = self.weight_gamma * weight + self.weight_beta + return weight + + def forward(self, x): + weight = self._get_weight(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + """Override default load function. + + AWS overrides the function _load_from_state_dict to recover + weight_gamma and weight_beta if they are missing. If weight_gamma and + weight_beta are found in the checkpoint, this function will return + after super()._load_from_state_dict. Otherwise, it will compute the + mean and std of the pretrained weights and store them in weight_beta + and weight_gamma. + """ + + self.weight_gamma.data.fill_(-1) + local_missing_keys = [] + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, local_missing_keys, unexpected_keys, error_msgs + ) + if self.weight_gamma.data.mean() > 0: + for k in local_missing_keys: + missing_keys.append(k) + return + weight = self.weight.data + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + self.weight_beta.data.copy_(mean) + self.weight_gamma.data.copy_(std) + missing_gamma_beta = [k for k in local_missing_keys if k.endswith('weight_gamma') or k.endswith('weight_beta')] + for k in missing_gamma_beta: + local_missing_keys.remove(k) + for k in local_missing_keys: + missing_keys.append(k) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py new file mode 100644 index 000000000000..6e4b622aed59 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -0,0 +1,95 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .conv_module import ConvModule + + +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. + + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. Default: 1. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. Default: 0. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs + ): + super(DepthwiseSeparableConvModule, self).__init__() + assert 'groups' not in kwargs, 'groups should not be specified' + + # if norm/activation config of depthwise/pointwise ConvModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg + dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg + pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg + + # depthwise convolution + self.depthwise_conv = ConvModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs + ) + + self.pointwise_conv = ConvModule( + in_channels, out_channels, 1, norm_cfg=pw_norm_cfg, act_cfg=pw_act_cfg, **kwargs + ) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py new file mode 100644 index 000000000000..b7f1af30b38d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/drop.py @@ -0,0 +1,64 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import build_from_cfg +from .registry import DROPOUT_LAYERS + + +def drop_path(x, drop_prob=0.0, training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + # handle tensors with different dimensions, not just 4D tensors. + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + output = x.div(keep_prob) * random_tensor.floor() + return output + + +@DROPOUT_LAYERS.register_module() +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + + Args: + drop_prob (float): Probability of the path to be zeroed. Default: 0.1 + """ + + def __init__(self, drop_prob=0.1): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +@DROPOUT_LAYERS.register_module() +class Dropout(nn.Dropout): + """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of + ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with + ``DropPath`` + + Args: + drop_prob (float): Probability of the elements to be + zeroed. Default: 0.5. + inplace (bool): Do the operation inplace or not. Default: False. + """ + + def __init__(self, drop_prob=0.5, inplace=False): + super().__init__(p=drop_prob, inplace=inplace) + + +def build_dropout(cfg, default_args=None): + """Builder for drop out layers.""" + return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py new file mode 100644 index 000000000000..3886a902c75c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/generalized_attention.py @@ -0,0 +1,346 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class GeneralizedAttention(nn.Module): + """GeneralizedAttention module. + + See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + (https://arxiv.org/abs/1711.07971) for details. + + Args: + in_channels (int): Channels of the input feature map. + spatial_range (int): The spatial range. -1 indicates no spatial range + constraint. Default: -1. + num_heads (int): The head number of empirical_attention module. + Default: 9. + position_embedding_dim (int): The position embedding dimension. + Default: -1. + position_magnitude (int): A multiplier acting on coord difference. + Default: 1. + kv_stride (int): The feature stride acting on key/value feature map. + Default: 2. + q_stride (int): The feature stride acting on query feature map. + Default: 1. + attention_type (str): A binary indicator string for indicating which + items in generalized empirical_attention module are used. + Default: '1111'. + + - '1000' indicates 'query and key content' (appr - appr) item, + - '0100' indicates 'query content and relative position' + (appr - position) item, + - '0010' indicates 'key content only' (bias - appr) item, + - '0001' indicates 'relative position only' (bias - position) item. + """ + + _abbr_ = 'gen_attention_block' + + def __init__( + self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111', + ): + + super(GeneralizedAttention, self).__init__() + + # hard range means local range for non-local operation + self.position_embedding_dim = position_embedding_dim if position_embedding_dim > 0 else in_channels + + self.position_magnitude = position_magnitude + self.num_heads = num_heads + self.in_channels = in_channels + self.spatial_range = spatial_range + self.kv_stride = kv_stride + self.q_stride = q_stride + self.attention_type = [bool(int(_)) for _ in attention_type] + self.qk_embed_dim = in_channels // num_heads + out_c = self.qk_embed_dim * num_heads + + if self.attention_type[0] or self.attention_type[1]: + self.query_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) + self.query_conv.kaiming_init = True + + if self.attention_type[0] or self.attention_type[2]: + self.key_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) + self.key_conv.kaiming_init = True + + self.v_dim = in_channels // num_heads + self.value_conv = nn.Conv2d( + in_channels=in_channels, out_channels=self.v_dim * num_heads, kernel_size=1, bias=False + ) + self.value_conv.kaiming_init = True + + if self.attention_type[1] or self.attention_type[3]: + self.appr_geom_fc_x = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x.kaiming_init = True + + self.appr_geom_fc_y = nn.Linear(self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y.kaiming_init = True + + if self.attention_type[2]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.appr_bias = nn.Parameter(appr_bias_value) + + if self.attention_type[3]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.geom_bias = nn.Parameter(geom_bias_value) + + self.proj_conv = nn.Conv2d( + in_channels=self.v_dim * num_heads, out_channels=in_channels, kernel_size=1, bias=True + ) + self.proj_conv.kaiming_init = True + self.gamma = nn.Parameter(torch.zeros(1)) + + if self.spatial_range >= 0: + # only works when non local is after 3*3 conv + if in_channels == 256: + max_len = 84 + elif in_channels == 512: + max_len = 42 + + max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) + local_constraint_map = np.ones((max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + for iy in range(max_len): + for ix in range(max_len): + local_constraint_map[ + iy, + ix, + max((iy - self.spatial_range) // self.kv_stride, 0) : min( + (iy + self.spatial_range + 1) // self.kv_stride + 1, max_len + ), + max((ix - self.spatial_range) // self.kv_stride, 0) : min( + (ix + self.spatial_range + 1) // self.kv_stride + 1, max_len + ), + ] = 0 + + self.local_constraint_map = nn.Parameter( + torch.from_numpy(local_constraint_map).byte(), requires_grad=False + ) + + if self.q_stride > 1: + self.q_downsample = nn.AvgPool2d(kernel_size=1, stride=self.q_stride) + else: + self.q_downsample = None + + if self.kv_stride > 1: + self.kv_downsample = nn.AvgPool2d(kernel_size=1, stride=self.kv_stride) + else: + self.kv_downsample = None + + self.init_weights() + + def get_position_embedding(self, h, w, h_kv, w_kv, q_stride, kv_stride, device, dtype, feat_dim, wave_length=1000): + # the default type of Tensor is float32, leading to type mismatch + # in fp16 mode. Cast it to support fp16 mode. + h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) + h_idxs = h_idxs.view((h, 1)) * q_stride + + w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) + w_idxs = w_idxs.view((w, 1)) * q_stride + + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(device=device, dtype=dtype) + h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride + + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(device=device, dtype=dtype) + w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride + + # (h, h_kv, 1) + h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) + h_diff *= self.position_magnitude + + # (w, w_kv, 1) + w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) + w_diff *= self.position_magnitude + + feat_range = torch.arange(0, feat_dim / 4).to(device=device, dtype=dtype) + + dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) + dim_mat = dim_mat ** ((4.0 / feat_dim) * feat_range) + dim_mat = dim_mat.view((1, 1, -1)) + + embedding_x = torch.cat(((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + + embedding_y = torch.cat(((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + + return embedding_x, embedding_y + + def forward(self, x_input): + num_heads = self.num_heads + + # use empirical_attention + if self.q_downsample is not None: + x_q = self.q_downsample(x_input) + else: + x_q = x_input + n, _, h, w = x_q.shape + + if self.kv_downsample is not None: + x_kv = self.kv_downsample(x_input) + else: + x_kv = x_input + _, _, h_kv, w_kv = x_kv.shape + + if self.attention_type[0] or self.attention_type[1]: + proj_query = self.query_conv(x_q).view((n, num_heads, self.qk_embed_dim, h * w)) + proj_query = proj_query.permute(0, 1, 3, 2) + + if self.attention_type[0] or self.attention_type[2]: + proj_key = self.key_conv(x_kv).view((n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + + if self.attention_type[1] or self.attention_type[3]: + position_embed_x, position_embed_y = self.get_position_embedding( + h, + w, + h_kv, + w_kv, + self.q_stride, + self.kv_stride, + x_input.device, + x_input.dtype, + self.position_embedding_dim, + ) + # (n, num_heads, w, w_kv, dim) + position_feat_x = ( + self.appr_geom_fc_x(position_embed_x) + .view(1, w, w_kv, num_heads, self.qk_embed_dim) + .permute(0, 3, 1, 2, 4) + .repeat(n, 1, 1, 1, 1) + ) + + # (n, num_heads, h, h_kv, dim) + position_feat_y = ( + self.appr_geom_fc_y(position_embed_y) + .view(1, h, h_kv, num_heads, self.qk_embed_dim) + .permute(0, 3, 1, 2, 4) + .repeat(n, 1, 1, 1, 1) + ) + + position_feat_x /= math.sqrt(2) + position_feat_y /= math.sqrt(2) + + # accelerate for saliency only + if (np.sum(self.attention_type) == 1) and self.attention_type[2]: + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) + + energy = torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, h_kv * w_kv) + + h = 1 + w = 1 + else: + # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for + if not self.attention_type[0]: + energy = torch.zeros(n, num_heads, h, w, h_kv, w_kv, dtype=x_input.dtype, device=x_input.device) + + # attention_type[0]: appr - appr + # attention_type[1]: appr - position + # attention_type[2]: bias - appr + # attention_type[3]: bias - position + if self.attention_type[0] or self.attention_type[2]: + if self.attention_type[0] and self.attention_type[2]: + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[0]: + energy = torch.matmul(proj_query, proj_key).view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[2]: + appr_bias = self.appr_bias.view(1, num_heads, 1, self.qk_embed_dim).repeat(n, 1, 1, 1) + + energy += torch.matmul(appr_bias, proj_key).view(n, num_heads, 1, 1, h_kv, w_kv) + + if self.attention_type[1] or self.attention_type[3]: + if self.attention_type[1] and self.attention_type[3]: + geom_bias = self.geom_bias.view(1, num_heads, 1, self.qk_embed_dim) + + proj_query_reshape = (proj_query + geom_bias).view(n, num_heads, h, w, self.qk_embed_dim) + + energy_x = torch.matmul( + proj_query_reshape.permute(0, 1, 3, 2, 4), position_feat_x.permute(0, 1, 2, 4, 3) + ) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, position_feat_y.permute(0, 1, 2, 4, 3)) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[1]: + proj_query_reshape = proj_query.view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, position_feat_x_reshape) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, position_feat_y_reshape) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[3]: + geom_bias = self.geom_bias.view(1, num_heads, self.qk_embed_dim, 1).repeat(n, 1, 1, 1) + + position_feat_x_reshape = position_feat_x.view(n, num_heads, w * w_kv, self.qk_embed_dim) + + position_feat_y_reshape = position_feat_y.view(n, num_heads, h * h_kv, self.qk_embed_dim) + + energy_x = torch.matmul(position_feat_x_reshape, geom_bias) + energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) + + energy_y = torch.matmul(position_feat_y_reshape, geom_bias) + energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) + + energy += energy_x + energy_y + + energy = energy.view(n, num_heads, h * w, h_kv * w_kv) + + if self.spatial_range >= 0: + cur_local_constraint_map = ( + self.local_constraint_map[:h, :w, :h_kv, :w_kv].contiguous().view(1, 1, h * w, h_kv * w_kv) + ) + + energy = energy.masked_fill_(cur_local_constraint_map, float('-inf')) + + attention = F.softmax(energy, 3) + + proj_value = self.value_conv(x_kv) + proj_value_reshape = proj_value.view((n, num_heads, self.v_dim, h_kv * w_kv)).permute(0, 1, 3, 2) + + out = ( + torch.matmul(attention, proj_value_reshape) + .permute(0, 1, 3, 2) + .contiguous() + .view(n, self.v_dim * self.num_heads, h, w) + ) + + out = self.proj_conv(out) + + # output is downsampled, upsample back to input size + if self.q_downsample is not None: + out = F.interpolate(out, size=x_input.shape[2:], mode='bilinear', align_corners=False) + + out = self.gamma * out + x_input + return out + + def init_weights(self): + for m in self.modules(): + if hasattr(m, 'kaiming_init') and m.kaiming_init: + kaiming_init(m, mode='fan_in', nonlinearity='leaky_relu', bias=0, distribution='uniform', a=1) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py new file mode 100644 index 000000000000..30b1a3d6580c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hsigmoid.py @@ -0,0 +1,34 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSigmoid(nn.Module): + """Hard Sigmoid Module. Apply the hard sigmoid function: + Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) + Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) + + Args: + bias (float): Bias of the input feature map. Default: 1.0. + divisor (float): Divisor of the input feature map. Default: 2.0. + min_value (float): Lower bound value. Default: 0.0. + max_value (float): Upper bound value. Default: 1.0. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): + super(HSigmoid, self).__init__() + self.bias = bias + self.divisor = divisor + assert self.divisor != 0 + self.min_value = min_value + self.max_value = max_value + + def forward(self, x): + x = (x + self.bias) / self.divisor + + return x.clamp_(self.min_value, self.max_value) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py new file mode 100644 index 000000000000..7e0c090ff037 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/hswish.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSwish(nn.Module): + """Hard Swish Module. + + This module applies the hard swish function: + + .. math:: + Hswish(x) = x * ReLU6(x + 3) / 6 + + Args: + inplace (bool): can optionally do the operation in-place. + Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, inplace=False): + super(HSwish, self).__init__() + self.act = nn.ReLU6(inplace) + + def forward(self, x): + return x * self.act(x + 3) / 6 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py new file mode 100644 index 000000000000..34a3602e2a84 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/non_local.py @@ -0,0 +1,272 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta + +import torch +import torch.nn as nn + +from ..utils import constant_init, normal_init +from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS + + +class _NonLocalNd(nn.Module, metaclass=ABCMeta): + """Basic Non-local module. + + This module is proposed in + "Non-local Neural Networks" + Paper reference: https://arxiv.org/abs/1711.07971 + Code reference: https://github.com/AlexHex7/Non-local_pytorch + + Args: + in_channels (int): Channels of the input feature map. + reduction (int): Channel reduction ratio. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. + Default: True. + conv_cfg (None | dict): The config dict for convolution layers. + If not specified, it will use `nn.Conv2d` for convolution layers. + Default: None. + norm_cfg (None | dict): The config dict for normalization layers. + Default: None. (This parameter is only applicable to conv_out.) + mode (str): Options are `gaussian`, `concatenation`, + `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. + """ + + def __init__( + self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs, + ): + super(_NonLocalNd, self).__init__() + self.in_channels = in_channels + self.reduction = reduction + self.use_scale = use_scale + self.inter_channels = max(in_channels // reduction, 1) + self.mode = mode + + if mode not in ['gaussian', 'embedded_gaussian', 'dot_product', 'concatenation']: + raise ValueError( + "Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.' + ) + + # g, theta, phi are defaulted as `nn.ConvNd`. + # Here we use ConvModule for potential usage. + self.g = ConvModule(self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) + self.conv_out = ConvModule( + self.inter_channels, self.in_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None + ) + + if self.mode != 'gaussian': + self.theta = ConvModule( + self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None + ) + self.phi = ConvModule( + self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None + ) + + if self.mode == 'concatenation': + self.concat_project = ConvModule( + self.inter_channels * 2, 1, kernel_size=1, stride=1, padding=0, bias=False, act_cfg=dict(type='ReLU') + ) + + self.init_weights(**kwargs) + + def init_weights(self, std=0.01, zeros_init=True): + if self.mode != 'gaussian': + for m in [self.g, self.theta, self.phi]: + normal_init(m.conv, std=std) + else: + normal_init(self.g.conv, std=std) + if zeros_init: + if self.conv_out.norm_cfg is None: + constant_init(self.conv_out.conv, 0) + else: + constant_init(self.conv_out.norm, 0) + else: + if self.conv_out.norm_cfg is None: + normal_init(self.conv_out.conv, std=std) + else: + normal_init(self.conv_out.norm, std=std) + + def gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def embedded_gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1] ** 0.5 + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def dot_product(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight /= pairwise_weight.shape[-1] + return pairwise_weight + + def concatenation(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + + concat_feature = torch.cat([theta_x, phi_x], dim=1) + pairwise_weight = self.concat_project(concat_feature) + n, _, h, w = pairwise_weight.size() + pairwise_weight = pairwise_weight.view(n, h, w) + pairwise_weight /= pairwise_weight.shape[-1] + + return pairwise_weight + + def forward(self, x): + # Assume `reduction = 1`, then `inter_channels = C` + # or `inter_channels = C` when `mode="gaussian"` + + # NonLocal1d x: [N, C, H] + # NonLocal2d x: [N, C, H, W] + # NonLocal3d x: [N, C, T, H, W] + n = x.size(0) + + # NonLocal1d g_x: [N, H, C] + # NonLocal2d g_x: [N, HxW, C] + # NonLocal3d g_x: [N, TxHxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] + # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] + # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + pairwise_func = getattr(self, self.mode) + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # NonLocal1d y: [N, H, C] + # NonLocal2d y: [N, HxW, C] + # NonLocal3d y: [N, TxHxW, C] + y = torch.matmul(pairwise_weight, g_x) + # NonLocal1d y: [N, C, H] + # NonLocal2d y: [N, C, H, W] + # NonLocal3d y: [N, C, T, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) + + output = x + self.conv_out(y) + + return output + + +class NonLocal1d(_NonLocalNd): + """1D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv1d'). + """ + + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv1d'), **kwargs): + super(NonLocal1d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool1d(kernel_size=2) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +@PLUGIN_LAYERS.register_module() +class NonLocal2d(_NonLocalNd): + """2D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv2d'). + """ + + _abbr_ = 'nonlocal_block' + + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv2d'), **kwargs): + super(NonLocal2d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +class NonLocal3d(_NonLocalNd): + """3D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv3d'). + """ + + def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv3d'), **kwargs): + super(NonLocal3d, self).__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py new file mode 100644 index 000000000000..e3f5eaa8af18 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/norm.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( + SyncBatchNorm, + _BatchNorm, + _InstanceNorm, +) + +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + When we build a norm layer with `build_norm_layer()`, we want to preserve + the norm type in variable names, e.g, self.bn1, self.gn. This method will + infer the abbreviation to map class types to abbreviations. + + Rule 1: If the class has the property "_abbr_", return the property. + Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or + InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and + "in" respectively. + Rule 3: If the class name contains "batch", "group", "layer" or "instance", + the abbreviation of this layer will be "bn", "gn", "ln" and "in" + respectively. + Rule 4: Otherwise, the abbreviation falls back to "norm". + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + if not inspect.isclass(class_type): + raise TypeError(f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN + return 'in' + elif issubclass(class_type, _BatchNorm): + return 'bn' + elif issubclass(class_type, nn.GroupNorm): + return 'gn' + elif issubclass(class_type, nn.LayerNorm): + return 'ln' + else: + class_name = class_type.__name__.lower() + if 'batch' in class_name: + return 'bn' + elif 'group' in class_name: + return 'gn' + elif 'layer' in class_name: + return 'ln' + elif 'instance' in class_name: + return 'in' + else: + return 'norm_layer' + + +def build_norm_layer(cfg, num_features, postfix=''): + """Build normalization layer. + + Args: + cfg (dict): The norm layer config, which should contain: + + - type (str): Layer type. + - layer args: Args needed to instantiate a norm layer. + - requires_grad (bool, optional): Whether stop gradient updates. + num_features (int): Number of input channels. + postfix (int | str): The postfix to be appended into norm abbreviation + to create named layer. + + Returns: + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + + norm_layer = NORM_LAYERS.get(layer_type) + abbr = infer_abbr(norm_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + requires_grad = cfg_.pop('requires_grad', True) + cfg_.setdefault('eps', 1e-5) + if layer_type != 'GN': + layer = norm_layer(num_features, **cfg_) + if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): + layer._specify_ddp_gpu_num(1) + else: + assert 'num_groups' in cfg_ + layer = norm_layer(num_channels=num_features, **cfg_) + + for param in layer.parameters(): + param.requires_grad = requires_grad + + return name, layer + + +def is_norm(layer, exclude=None): + """Check if a layer is a normalization layer. + + Args: + layer (nn.Module): The layer to be checked. + exclude (type | tuple[type]): Types to be excluded. + + Returns: + bool: Whether the layer is a norm layer. + """ + if exclude is not None: + if not isinstance(exclude, tuple): + exclude = (exclude,) + if not is_tuple_of(exclude, type): + raise TypeError( + f'"exclude" must be either None or type or a tuple of types, ' f'but got {type(exclude)}: {exclude}' + ) + + if exclude and isinstance(layer, exclude): + return False + + all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) + return isinstance(layer, all_norm_bases) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py new file mode 100644 index 000000000000..e4ac6b28a178 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/padding.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) + + +def build_padding_layer(cfg, *args, **kwargs): + """Build padding layer. + + Args: + cfg (None or dict): The padding layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate a padding layer. + + Returns: + nn.Module: Created padding layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + + cfg_ = cfg.copy() + padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) + + layer = padding_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py new file mode 100644 index 000000000000..d1e6d6fb326a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/plugin.py @@ -0,0 +1,87 @@ +import inspect +import platform + +from .registry import PLUGIN_LAYERS + +if platform.system() == 'Windows': + import regex as re +else: + import re + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + This method will infer the abbreviation to map class types to + abbreviations. + + Rule 1: If the class has the property "abbr", return the property. + Rule 2: Otherwise, the abbreviation falls back to snake case of class + name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + + def camel2snack(word): + """Convert camel case word into snack case. + + Modified from `inflection lib + `_. + + Example:: + + >>> camel2snack("FancyBlock") + 'fancy_block' + """ + + word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) + word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) + word = word.replace('-', '_') + return word.lower() + + if not inspect.isclass(class_type): + raise TypeError(f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + else: + return camel2snack(class_type.__name__) + + +def build_plugin_layer(cfg, postfix='', **kwargs): + """Build plugin layer. + + Args: + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. + postfix (int, str): appended into norm abbreviation to + create named layer. Default: ''. + + Returns: + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') + + plugin_layer = PLUGIN_LAYERS.get(layer_type) + abbr = infer_abbr(plugin_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + layer = plugin_layer(**kwargs, **cfg_) + + return name, layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py new file mode 100644 index 000000000000..584e3b0870fc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/registry.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') + +DROPOUT_LAYERS = Registry('drop out layers') +POSITIONAL_ENCODING = Registry('position encoding') +ATTENTION = Registry('attention') +FEEDFORWARD_NETWORK = Registry('feed-forward Network') +TRANSFORMER_LAYER = Registry('transformerLayer') +TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py new file mode 100644 index 000000000000..c905fffcc8bf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/scale.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +class Scale(nn.Module): + """A learnable scale parameter. + + This layer scales the input by a learnable factor. It multiplies a + learnable scale parameter of shape (1,) with input of any shape. + + Args: + scale (float): Initial value of scale factor. Default: 1.0 + """ + + def __init__(self, scale=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) + + def forward(self, x): + return x * self.scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py new file mode 100644 index 000000000000..e2ca8ed7b749 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/swish.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class Swish(nn.Module): + """Swish Module. + + This module applies the swish function: + + .. math:: + Swish(x) = x * Sigmoid(x) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(Swish, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py new file mode 100644 index 000000000000..7661266316c8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/transformer.py @@ -0,0 +1,607 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings + +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import ConfigDict, deprecated_api_warning +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + Linear, + build_activation_layer, + build_norm_layer, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.base_module import ( + BaseModule, + ModuleList, + Sequential, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg + +from .drop import build_dropout +from .registry import ( + ATTENTION, + FEEDFORWARD_NETWORK, + POSITIONAL_ENCODING, + TRANSFORMER_LAYER, + TRANSFORMER_LAYER_SEQUENCE, +) + +# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import ( # noqa F401 + MultiScaleDeformableAttention, + ) + + warnings.warn( + ImportWarning( + '``MultiScaleDeformableAttention`` has been moved to ' + '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 + '``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 + 'to ``from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 + ) + ) + +except ImportError: + warnings.warn( + 'Fail to import ``MultiScaleDeformableAttention`` from ' + '``mmcv.ops.multi_scale_deform_attn``, ' + 'You should install ``mmcv-full`` if you need this module. ' + ) + + +def build_positional_encoding(cfg, default_args=None): + """Builder for Position Encoding.""" + return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) + + +def build_attention(cfg, default_args=None): + """Builder for attention.""" + return build_from_cfg(cfg, ATTENTION, default_args) + + +def build_feedforward_network(cfg, default_args=None): + """Builder for feed-forward network (FFN).""" + return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) + + +def build_transformer_layer(cfg, default_args=None): + """Builder for transformer layer.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) + + +def build_transformer_layer_sequence(cfg, default_args=None): + """Builder for transformer encoder and transformer decoder.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) + + +@ATTENTION.register_module() +class MultiheadAttention(BaseModule): + """A wrapper for ``torch.nn.MultiheadAttention``. + + This module implements MultiheadAttention with identity connection, + and positional encoding is also passed as input. + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): When it is True, Key, Query and Value are shape of + (batch, n, embed_dim), otherwise (n, batch, embed_dim). + Default to False. + """ + + def __init__( + self, + embed_dims, + num_heads, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=dict(type='Dropout', drop_prob=0.0), + init_cfg=None, + batch_first=False, + **kwargs, + ): + super(MultiheadAttention, self).__init__(init_cfg) + if 'dropout' in kwargs: + warnings.warn( + 'The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ' + ) + attn_drop = kwargs['dropout'] + dropout_layer['drop_prob'] = kwargs.pop('dropout') + + self.embed_dims = embed_dims + self.num_heads = num_heads + self.batch_first = batch_first + + self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, **kwargs) + + self.proj_drop = nn.Dropout(proj_drop) + self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else nn.Identity() + + @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiheadAttention') + def forward( + self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_pos=None, + attn_mask=None, + key_padding_mask=None, + **kwargs, + ): + """Forward function for `MultiheadAttention`. + + **kwargs allow passing a more general data flow when combining + with other operations in `transformerlayer`. + + Args: + query (Tensor): The input query with shape [num_queries, bs, + embed_dims] if self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + If None, the ``query`` will be used. Defaults to None. + value (Tensor): The value tensor with same shape as `key`. + Same in `nn.MultiheadAttention.forward`. Defaults to None. + If None, the `key` will be used. + identity (Tensor): This tensor, with the same shape as x, + will be used for the identity link. + If None, `x` will be used. Defaults to None. + query_pos (Tensor): The positional encoding for query, with + the same shape as `x`. If not None, it will + be added to `x` before forward function. Defaults to None. + key_pos (Tensor): The positional encoding for `key`, with the + same shape as `key`. Defaults to None. If not None, it will + be added to `key` before forward function. If None, and + `query_pos` has the same shape as `key`, then `query_pos` + will be used for `key_pos`. Defaults to None. + attn_mask (Tensor): ByteTensor mask with shape [num_queries, + num_keys]. Same in `nn.MultiheadAttention.forward`. + Defaults to None. + key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. + Defaults to None. + + Returns: + Tensor: forwarded results with shape + [num_queries, bs, embed_dims] + if self.batch_first is False, else + [bs, num_queries embed_dims]. + """ + + if key is None: + key = query + if value is None: + value = key + if identity is None: + identity = query + if key_pos is None: + if query_pos is not None: + # use query_pos if key_pos is not available + if query_pos.shape == key.shape: + key_pos = query_pos + else: + warnings.warn(f'position encoding of key is' f'missing in {self.__class__.__name__}.') + if query_pos is not None: + query = query + query_pos + if key_pos is not None: + key = key + key_pos + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + out = self.attn(query=query, key=key, value=value, attn_mask=attn_mask, key_padding_mask=key_padding_mask)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + +@FEEDFORWARD_NETWORK.register_module() +class FFN(BaseModule): + """Implements feed-forward networks (FFNs) with identity connection. + + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + num_fcs (int, optional): The number of fully-connected layers in + FFNs. Default: 2. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + add_identity (bool, optional): Whether to add the + identity connection. Default: `True`. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + @deprecated_api_warning({'dropout': 'ffn_drop', 'add_residual': 'add_identity'}, cls_name='FFN') + def __init__( + self, + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True, + init_cfg=None, + **kwargs, + ): + super(FFN, self).__init__(init_cfg) + assert num_fcs >= 2, 'num_fcs should be no less ' f'than 2. got {num_fcs}.' + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.num_fcs = num_fcs + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + layers = [] + in_channels = embed_dims + for _ in range(num_fcs - 1): + layers.append(Sequential(Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(ffn_drop))) + in_channels = feedforward_channels + layers.append(Linear(feedforward_channels, embed_dims)) + layers.append(nn.Dropout(ffn_drop)) + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout(dropout_layer) if dropout_layer else torch.nn.Identity() + self.add_identity = add_identity + + @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') + def forward(self, x, identity=None): + """Forward function for `FFN`. + + The function would add x to the output tensor if residue is None. + """ + out = self.layers(x) + if not self.add_identity: + return self.dropout_layer(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +@TRANSFORMER_LAYER.register_module() +class BaseTransformerLayer(BaseModule): + """Base `TransformerLayer` for vision transformer. + + It can be built from `mmcv.ConfigDict` and support more flexible + customization, for example, using any number of `FFN or LN ` and + use different kinds of `attention` by specifying a list of `ConfigDict` + named `attn_cfgs`. It is worth mentioning that it supports `prenorm` + when you specifying `norm` as the first element of `operation_order`. + More details about the `prenorm`: `On Layer Normalization in the + Transformer Architecture `_ . + + Args: + attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for `self_attention` or `cross_attention` modules, + The order of the configs in the list should be consistent with + corresponding attentions in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. Default: None. + ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for FFN, The order of the configs in the list should be + consistent with corresponding ffn in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. + operation_order (tuple[str]): The execution order of operation + in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). + Support `prenorm` when you specifying first element as `norm`. + Default:None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape + of (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + """ + + def __init__( + self, + attn_cfgs=None, + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True), + ), + operation_order=None, + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False, + **kwargs, + ): + + deprecated_args = dict( + feedforward_channels='feedforward_channels', ffn_dropout='ffn_drop', ffn_num_fcs='num_fcs' + ) + for ori_name, new_name in deprecated_args.items(): + if ori_name in kwargs: + warnings.warn( + f'The arguments `{ori_name}` in BaseTransformerLayer ' + f'has been deprecated, now you should set `{new_name}` ' + f'and other FFN related arguments ' + f'to a dict named `ffn_cfgs`. ' + ) + ffn_cfgs[new_name] = kwargs[ori_name] + + super(BaseTransformerLayer, self).__init__(init_cfg) + + self.batch_first = batch_first + + assert set(operation_order) & set(['self_attn', 'norm', 'ffn', 'cross_attn']) == set(operation_order), ( + f'The operation_order of' + f' {self.__class__.__name__} should ' + f'contains all four operation type ' + f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" + ) + + num_attn = operation_order.count('self_attn') + operation_order.count('cross_attn') + if isinstance(attn_cfgs, dict): + attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] + else: + assert num_attn == len(attn_cfgs), ( + f'The length ' + f'of attn_cfg {num_attn} is ' + f'not consistent with the number of attention' + f'in operation_order {operation_order}.' + ) + + self.num_attn = num_attn + self.operation_order = operation_order + self.norm_cfg = norm_cfg + self.pre_norm = operation_order[0] == 'norm' + self.attentions = ModuleList() + + index = 0 + for operation_name in operation_order: + if operation_name in ['self_attn', 'cross_attn']: + if 'batch_first' in attn_cfgs[index]: + assert self.batch_first == attn_cfgs[index]['batch_first'] + else: + attn_cfgs[index]['batch_first'] = self.batch_first + attention = build_attention(attn_cfgs[index]) + # Some custom attentions used as `self_attn` + # or `cross_attn` can have different behavior. + attention.operation_name = operation_name + self.attentions.append(attention) + index += 1 + + self.embed_dims = self.attentions[0].embed_dims + + self.ffns = ModuleList() + num_ffns = operation_order.count('ffn') + if isinstance(ffn_cfgs, dict): + ffn_cfgs = ConfigDict(ffn_cfgs) + if isinstance(ffn_cfgs, dict): + ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] + assert len(ffn_cfgs) == num_ffns + for ffn_index in range(num_ffns): + if 'embed_dims' not in ffn_cfgs[ffn_index]: + ffn_cfgs['embed_dims'] = self.embed_dims + else: + assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims + self.ffns.append(build_feedforward_network(ffn_cfgs[ffn_index], dict(type='FFN'))) + + self.norms = ModuleList() + num_norms = operation_order.count('norm') + for _ in range(num_norms): + self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) + + def forward( + self, + query, + key=None, + value=None, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs, + ): + """Forward function for `TransformerDecoderLayer`. + + **kwargs contains some specific arguments of attentions. + + Args: + query (Tensor): The input query with shape + [num_queries, bs, embed_dims] if + self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + value (Tensor): The value tensor with same shape as `key`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor] | None): 2D Tensor used in + calculation of corresponding attention. The length of + it should equal to the number of `attention` in + `operation_order`. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in `self_attn` layer. + Defaults to None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: forwarded results with shape [num_queries, bs, embed_dims]. + """ + + norm_index = 0 + attn_index = 0 + ffn_index = 0 + identity = query + if attn_masks is None: + attn_masks = [None for _ in range(self.num_attn)] + elif isinstance(attn_masks, torch.Tensor): + attn_masks = [copy.deepcopy(attn_masks) for _ in range(self.num_attn)] + warnings.warn(f'Use same attn_mask in all attentions in ' f'{self.__class__.__name__} ') + else: + assert len(attn_masks) == self.num_attn, ( + f'The length of ' + f'attn_masks {len(attn_masks)} must be equal ' + f'to the number of attention in ' + f'operation_order {self.num_attn}' + ) + + for layer in self.operation_order: + if layer == 'self_attn': + temp_key = temp_value = query + query = self.attentions[attn_index]( + query, + temp_key, + temp_value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=query_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=query_key_padding_mask, + **kwargs, + ) + attn_index += 1 + identity = query + + elif layer == 'norm': + query = self.norms[norm_index](query) + norm_index += 1 + + elif layer == 'cross_attn': + query = self.attentions[attn_index]( + query, + key, + value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=key_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=key_padding_mask, + **kwargs, + ) + attn_index += 1 + identity = query + + elif layer == 'ffn': + query = self.ffns[ffn_index](query, identity if self.pre_norm else None) + ffn_index += 1 + + return query + + +@TRANSFORMER_LAYER_SEQUENCE.register_module() +class TransformerLayerSequence(BaseModule): + """Base class for TransformerEncoder and TransformerDecoder in vision + transformer. + + As base-class of Encoder and Decoder in vision transformer. + Support customization such as specifying different kind + of `transformer_layer` in `transformer_coder`. + + Args: + transformerlayer (list[obj:`mmcv.ConfigDict`] | + obj:`mmcv.ConfigDict`): Config of transformerlayer + in TransformerCoder. If it is obj:`mmcv.ConfigDict`, + it would be repeated `num_layer` times to a + list[`mmcv.ConfigDict`]. Default: None. + num_layers (int): The number of `TransformerLayer`. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): + super(TransformerLayerSequence, self).__init__(init_cfg) + if isinstance(transformerlayers, dict): + transformerlayers = [copy.deepcopy(transformerlayers) for _ in range(num_layers)] + else: + assert isinstance(transformerlayers, list) and len(transformerlayers) == num_layers + self.num_layers = num_layers + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append(build_transformer_layer(transformerlayers[i])) + self.embed_dims = self.layers[0].embed_dims + self.pre_norm = self.layers[0].pre_norm + + def forward( + self, + query, + key, + value, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs, + ): + """Forward function for `TransformerCoder`. + + Args: + query (Tensor): Input query with shape + `(num_queries, bs, embed_dims)`. + key (Tensor): The key tensor with shape + `(num_keys, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_keys, bs, embed_dims)`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor], optional): Each element is 2D Tensor + which is used in calculation of corresponding attention in + operation_order. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in self-attention + Default: None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: results with shape [num_queries, bs, embed_dims]. + """ + for layer in self.layers: + query = layer( + query, + key, + value, + query_pos=query_pos, + key_pos=key_pos, + attn_masks=attn_masks, + query_key_padding_mask=query_key_padding_mask, + key_padding_mask=key_padding_mask, + **kwargs, + ) + return query diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py new file mode 100644 index 000000000000..f4d0f1fa8291 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/upsample.py @@ -0,0 +1,83 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) + + +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') +class PixelShufflePack(nn.Module): + """Pixel Shuffle upsample layer. + + This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to + achieve a simple upsampling with pixel shuffle. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of the conv layer to expand the + channels. + """ + + def __init__(self, in_channels, out_channels, scale_factor, upsample_kernel): + super(PixelShufflePack, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2, + ) + self.init_weights() + + def init_weights(self): + xavier_init(self.upsample_conv, distribution='uniform') + + def forward(self, x): + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + + +def build_upsample_layer(cfg, *args, **kwargs): + """Build upsample layer. + + Args: + cfg (dict): The upsample layer config, which should contain: + + - type (str): Layer type. + - scale_factor (int): Upsample ratio, which is not applicable to + deconv. + - layer args: Args needed to instantiate a upsample layer. + args (argument list): Arguments passed to the ``__init__`` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the + ``__init__`` method of the corresponding conv layer. + + Returns: + nn.Module: Created upsample layer. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + raise KeyError(f'the cfg dict must contain the key "type", but got {cfg}') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) + + if upsample is nn.Upsample: + cfg_['mode'] = layer_type + layer = upsample(*args, **kwargs, **cfg_) + return layer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py new file mode 100644 index 000000000000..9028d69d4480 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/bricks/wrappers.py @@ -0,0 +1,173 @@ +# Copyright (c) OpenMMLab. All rights reserved. +r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 + +Wrap some nn modules to support empty tensor input. Currently, these wrappers +are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask +heads are trained on only positive RoIs. +""" +import math + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair, _triple + +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + +if torch.__version__ == 'parrots': + TORCH_VERSION = torch.__version__ +else: + # torch.__version__ could be 1.3.1+cu92, we only need the first two + # for comparison + TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) + + +def obsolete_torch_version(torch_version, version_threshold): + return torch_version == 'parrots' or torch_version <= version_threshold + + +class NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return NewEmptyTensorOp.apply(grad, shape), None + + +@CONV_LAYERS.register_module('Conv', force=True) +class Conv2d(nn.Conv2d): + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module('Conv3d', force=True) +class Conv3d(nn.Conv3d): + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) +class ConvTranspose2d(nn.ConvTranspose2d): + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip( + x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding + ): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv3d') +@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) +class ConvTranspose3d(nn.ConvTranspose3d): + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip( + x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding + ): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +class MaxPool2d(nn.MaxPool2d): + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip( + x.shape[-2:], _pair(self.kernel_size), _pair(self.padding), _pair(self.stride), _pair(self.dilation) + ): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class MaxPool3d(nn.MaxPool3d): + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip( + x.shape[-3:], + _triple(self.kernel_size), + _triple(self.padding), + _triple(self.stride), + _triple(self.dilation), + ): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class Linear(torch.nn.Linear): + def forward(self, x): + # empty tensor forward of Linear layer is supported in Pytorch 1.6 + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): + out_shape = [x.shape[0], self.out_features] + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py new file mode 100644 index 000000000000..64e378712149 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/builder.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..runner import Sequential +from ..utils import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py new file mode 100644 index 000000000000..a432cd00d4c5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/resnet.py @@ -0,0 +1,271 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn +import torch.utils.checkpoint as cp + +from .utils import constant_init, kaiming_init + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): + super(BasicBlock, self).__init__() + assert style in ['pytorch', 'caffe'] + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + assert not with_cp + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=conv2_stride, padding=dilation, dilation=dilation, bias=False + ) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(inplanes, planes, stride, dilation, downsample, style=style, with_cp=with_cp)) + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + } + + def __init__( + self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False, + ): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + + self.out_indices = out_indices + self.style = style + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): + stride = strides[i] + dilation = dilations[i] + planes = 64 * 2 ** i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp, + ) + self.inplanes = planes * block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self.feat_dim = block.expansion * 64 * 2 ** (len(stage_blocks) - 1) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, f'layer{i}') + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py new file mode 100644 index 000000000000..2f1607650cb1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/__init__.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .flops_counter import get_model_complexity_info +from .fuse_conv_bn import fuse_conv_bn +from .sync_bn import revert_sync_batchnorm +from .weight_init import ( + INITIALIZERS, + Caffe2XavierInit, + ConstantInit, + KaimingInit, + NormalInit, + PretrainedInit, + TruncNormalInit, + UniformInit, + XavierInit, + bias_init_with_prob, + caffe2_xavier_init, + constant_init, + initialize, + kaiming_init, + normal_init, + trunc_normal_init, + uniform_init, + xavier_init, +) + +__all__ = [ + 'get_model_complexity_info', + 'bias_init_with_prob', + 'caffe2_xavier_init', + 'constant_init', + 'kaiming_init', + 'normal_init', + 'trunc_normal_init', + 'uniform_init', + 'xavier_init', + 'fuse_conv_bn', + 'initialize', + 'INITIALIZERS', + 'ConstantInit', + 'XavierInit', + 'NormalInit', + 'TruncNormalInit', + 'UniformInit', + 'KaimingInit', + 'PretrainedInit', + 'Caffe2XavierInit', + 'revert_sync_batchnorm', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py new file mode 100644 index 000000000000..afbba392fa97 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/flops_counter.py @@ -0,0 +1,580 @@ +# Modified from flops-counter.pytorch by Vladislav Sovrasov +# original repo: https://github.com/sovrasov/flops-counter.pytorch + +# MIT License + +# Copyright (c) 2018 Vladislav Sovrasov + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +from functools import partial + +import numpy as np +import torch +import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def get_model_complexity_info( + model, input_shape, print_per_layer_stat=True, as_strings=True, input_constructor=None, flush=False, ost=sys.stdout +): + """Get complexity information of a model. + + This method can calculate FLOPs and parameter counts of a model with + corresponding input shape. It can also print complexity information for + each layer in a model. + + Supported layers are listed as below: + - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. + - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, + ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, + ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. + - Linear: ``nn.Linear``. + - Deconvolution: ``nn.ConvTranspose2d``. + - Upsample: ``nn.Upsample``. + + Args: + model (nn.Module): The model for complexity calculation. + input_shape (tuple): Input shape used for calculation. + print_per_layer_stat (bool): Whether to print complexity information + for each layer in a model. Default: True. + as_strings (bool): Output FLOPs and params counts in a string form. + Default: True. + input_constructor (None | callable): If specified, it takes a callable + method that generates input. otherwise, it will generate a random + tensor with input shape to calculate FLOPs. Default: None. + flush (bool): same as that in :func:`print`. Default: False. + ost (stream): same as ``file`` param in :func:`print`. + Default: sys.stdout. + + Returns: + tuple[float | str]: If ``as_strings`` is set to True, it will return + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. + """ + assert type(input_shape) is tuple + assert len(input_shape) >= 1 + assert isinstance(model, nn.Module) + flops_model = add_flops_counting_methods(model) + flops_model.eval() + flops_model.start_flops_count() + if input_constructor: + input = input_constructor(input_shape) + _ = flops_model(**input) + else: + try: + batch = torch.ones(()).new_empty( + (1, *input_shape), + dtype=next(flops_model.parameters()).dtype, + device=next(flops_model.parameters()).device, + ) + except StopIteration: + # Avoid StopIteration for models which have no parameters, + # like `nn.Relu()`, `nn.AvgPool2d`, etc. + batch = torch.ones(()).new_empty((1, *input_shape)) + + _ = flops_model(batch) + + flops_count, params_count = flops_model.compute_average_flops_cost() + if print_per_layer_stat: + print_model_with_flops(flops_model, flops_count, params_count, ost=ost, flush=flush) + flops_model.stop_flops_count() + + if as_strings: + return flops_to_string(flops_count), params_to_string(params_count) + + return flops_count, params_count + + +def flops_to_string(flops, units='GFLOPs', precision=2): + """Convert FLOPs number into a string. + + Note that Here we take a multiply-add counts as one FLOP. + + Args: + flops (float): FLOPs number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', + 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically + choose the most suitable unit for FLOPs. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted FLOPs number with units. + + Examples: + >>> flops_to_string(1e9) + '1.0 GFLOPs' + >>> flops_to_string(2e5, 'MFLOPs') + '0.2 MFLOPs' + >>> flops_to_string(3e-9, None) + '3e-09 FLOPs' + """ + if units is None: + if flops // 10 ** 9 > 0: + return str(round(flops / 10.0 ** 9, precision)) + ' GFLOPs' + elif flops // 10 ** 6 > 0: + return str(round(flops / 10.0 ** 6, precision)) + ' MFLOPs' + elif flops // 10 ** 3 > 0: + return str(round(flops / 10.0 ** 3, precision)) + ' KFLOPs' + else: + return str(flops) + ' FLOPs' + else: + if units == 'GFLOPs': + return str(round(flops / 10.0 ** 9, precision)) + ' ' + units + elif units == 'MFLOPs': + return str(round(flops / 10.0 ** 6, precision)) + ' ' + units + elif units == 'KFLOPs': + return str(round(flops / 10.0 ** 3, precision)) + ' ' + units + else: + return str(flops) + ' FLOPs' + + +def params_to_string(num_params, units=None, precision=2): + """Convert parameter number into a string. + + Args: + num_params (float): Parameter number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'M', + 'K' and ''. If set to None, it will automatically choose the most + suitable unit for Parameter number. Default: None. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted parameter number with units. + + Examples: + >>> params_to_string(1e9) + '1000.0 M' + >>> params_to_string(2e5) + '200.0 k' + >>> params_to_string(3e-9) + '3e-09' + """ + if units is None: + if num_params // 10 ** 6 > 0: + return str(round(num_params / 10 ** 6, precision)) + ' M' + elif num_params // 10 ** 3: + return str(round(num_params / 10 ** 3, precision)) + ' k' + else: + return str(num_params) + else: + if units == 'M': + return str(round(num_params / 10.0 ** 6, precision)) + ' ' + units + elif units == 'K': + return str(round(num_params / 10.0 ** 3, precision)) + ' ' + units + else: + return str(num_params) + + +def print_model_with_flops(model, total_flops, total_params, units='GFLOPs', precision=3, ost=sys.stdout, flush=False): + """Print a model with FLOPs for each layer. + + Args: + model (nn.Module): The model to be printed. + total_flops (float): Total FLOPs of the model. + total_params (float): Total parameter counts of the model. + units (str | None): Converted FLOPs units. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 3. + ost (stream): same as `file` param in :func:`print`. + Default: sys.stdout. + flush (bool): same as that in :func:`print`. Default: False. + + Example: + >>> class ExampleModel(nn.Module): + + >>> def __init__(self): + >>> super().__init__() + >>> self.conv1 = nn.Conv2d(3, 8, 3) + >>> self.conv2 = nn.Conv2d(8, 256, 3) + >>> self.conv3 = nn.Conv2d(256, 8, 3) + >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + >>> self.flatten = nn.Flatten() + >>> self.fc = nn.Linear(8, 1) + + >>> def forward(self, x): + >>> x = self.conv1(x) + >>> x = self.conv2(x) + >>> x = self.conv3(x) + >>> x = self.avg_pool(x) + >>> x = self.flatten(x) + >>> x = self.fc(x) + >>> return x + + >>> model = ExampleModel() + >>> x = (3, 16, 16) + to print the complexity information state for each layer, you can use + >>> get_model_complexity_info(model, x) + or directly use + >>> print_model_with_flops(model, 4579784.0, 37361) + ExampleModel( + 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, + (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 + (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) + (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) + (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) + (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) + (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) + ) + """ + + def accumulate_params(self): + if is_supported_instance(self): + return self.__params__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_params() + return sum + + def accumulate_flops(self): + if is_supported_instance(self): + return self.__flops__ / model.__batch_counter__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_flops() + return sum + + def flops_repr(self): + accumulated_num_params = self.accumulate_params() + accumulated_flops_cost = self.accumulate_flops() + return ', '.join( + [ + params_to_string(accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string(accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr(), + ] + ) + + def add_extra_repr(m): + m.accumulate_flops = accumulate_flops.__get__(m) + m.accumulate_params = accumulate_params.__get__(m) + flops_extra_repr = flops_repr.__get__(m) + if m.extra_repr != flops_extra_repr: + m.original_extra_repr = m.extra_repr + m.extra_repr = flops_extra_repr + assert m.extra_repr != m.original_extra_repr + + def del_extra_repr(m): + if hasattr(m, 'original_extra_repr'): + m.extra_repr = m.original_extra_repr + del m.original_extra_repr + if hasattr(m, 'accumulate_flops'): + del m.accumulate_flops + + model.apply(add_extra_repr) + print(model, file=ost, flush=flush) + model.apply(del_extra_repr) + + +def get_model_parameters_number(model): + """Calculate parameter number of a model. + + Args: + model (nn.module): The model for parameter number calculation. + + Returns: + float: Parameter number of the model. + """ + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + return num_params + + +def add_flops_counting_methods(net_main_module): + # adding additional methods to the existing module object, + # this is done this way so that each function has access to self object + net_main_module.start_flops_count = start_flops_count.__get__(net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__(net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__(net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__(net_main_module) # noqa: E501 + + net_main_module.reset_flops_count() + + return net_main_module + + +def compute_average_flops_cost(self): + """Compute average FLOPs cost. + + A method to compute average FLOPs cost, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + + Returns: + float: Current mean flops consumption per image. + """ + batches_count = self.__batch_counter__ + flops_sum = 0 + for module in self.modules(): + if is_supported_instance(module): + flops_sum += module.__flops__ + params_sum = get_model_parameters_number(self) + return flops_sum / batches_count, params_sum + + +def start_flops_count(self): + """Activate the computation of mean flops consumption per image. + + A method to activate the computation of mean flops consumption per image. + which will be available after ``add_flops_counting_methods()`` is called on + a desired net object. It should be called before running the network. + """ + add_batch_counter_hook_function(self) + + def add_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + return + + else: + handle = module.register_forward_hook(get_modules_mapping()[type(module)]) + + module.__flops_handle__ = handle + + self.apply(partial(add_flops_counter_hook_function)) + + +def stop_flops_count(self): + """Stop computing the mean flops consumption per image. + + A method to stop computing the mean flops consumption per image, which will + be available after ``add_flops_counting_methods()`` is called on a desired + net object. It can be called to pause the computation whenever. + """ + remove_batch_counter_hook_function(self) + self.apply(remove_flops_counter_hook_function) + + +def reset_flops_count(self): + """Reset statistics computed so far. + + A method to Reset computed statistics, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + """ + add_batch_counter_variables_or_reset(self) + self.apply(add_flops_counter_variable_or_reset) + + +# ---- Internal functions +def empty_flops_counter_hook(module, input, output): + module.__flops__ += 0 + + +def upsample_flops_counter_hook(module, input, output): + output_size = output[0] + batch_size = output_size.shape[0] + output_elements_count = batch_size + for val in output_size.shape[1:]: + output_elements_count *= val + module.__flops__ += int(output_elements_count) + + +def relu_flops_counter_hook(module, input, output): + active_elements_count = output.numel() + module.__flops__ += int(active_elements_count) + + +def linear_flops_counter_hook(module, input, output): + input = input[0] + output_last_dim = output.shape[-1] # pytorch checks dimensions, so here we don't care much + module.__flops__ += int(np.prod(input.shape) * output_last_dim) + + +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) + + +def norm_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) + if getattr(module, 'affine', False) or getattr(module, 'elementwise_affine', False): + batch_flops *= 2 + module.__flops__ += int(batch_flops) + + +def deconv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] + + kernel_height, kernel_width = conv_module.kernel_size + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = kernel_height * kernel_width * in_channels * filters_per_channel + + active_elements_count = batch_size * input_height * input_width + overall_conv_flops = conv_per_position_flops * active_elements_count + bias_flops = 0 + if conv_module.bias is not None: + output_height, output_width = output.shape[2:] + bias_flops = out_channels * batch_size * output_height * output_height + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def conv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(conv_module.kernel_size) + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = int(np.prod(kernel_dims)) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + + bias_flops = 0 + + if conv_module.bias is not None: + + bias_flops = out_channels * active_elements_count + + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def batch_counter_hook(module, input, output): + batch_size = 1 + if len(input) > 0: + # Can have multiple inputs, getting the first one + input = input[0] + batch_size = len(input) + else: + pass + print('Warning! No positional inputs found for a module, ' 'assuming batch size is 1.') + module.__batch_counter__ += batch_size + + +def add_batch_counter_variables_or_reset(module): + + module.__batch_counter__ = 0 + + +def add_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + return + + handle = module.register_forward_hook(batch_counter_hook) + module.__batch_counter_handle__ = handle + + +def remove_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + module.__batch_counter_handle__.remove() + del module.__batch_counter_handle__ + + +def add_flops_counter_variable_or_reset(module): + if is_supported_instance(module): + if hasattr(module, '__flops__') or hasattr(module, '__params__'): + print( + 'Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + ' ptflops can affect your code!' + ) + module.__flops__ = 0 + module.__params__ = get_model_parameters_number(module) + + +def is_supported_instance(module): + if type(module) in get_modules_mapping(): + return True + return False + + +def remove_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + module.__flops_handle__.remove() + del module.__flops_handle__ + + +def get_modules_mapping(): + return { + # convolutions + nn.Conv1d: conv_flops_counter_hook, + nn.Conv2d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, + nn.Conv3d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, + # activations + nn.ReLU: relu_flops_counter_hook, + nn.PReLU: relu_flops_counter_hook, + nn.ELU: relu_flops_counter_hook, + nn.LeakyReLU: relu_flops_counter_hook, + nn.ReLU6: relu_flops_counter_hook, + # poolings + nn.MaxPool1d: pool_flops_counter_hook, + nn.AvgPool1d: pool_flops_counter_hook, + nn.AvgPool2d: pool_flops_counter_hook, + nn.MaxPool2d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, + nn.MaxPool3d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, + nn.AvgPool3d: pool_flops_counter_hook, + nn.AdaptiveMaxPool1d: pool_flops_counter_hook, + nn.AdaptiveAvgPool1d: pool_flops_counter_hook, + nn.AdaptiveMaxPool2d: pool_flops_counter_hook, + nn.AdaptiveAvgPool2d: pool_flops_counter_hook, + nn.AdaptiveMaxPool3d: pool_flops_counter_hook, + nn.AdaptiveAvgPool3d: pool_flops_counter_hook, + # normalizations + nn.BatchNorm1d: norm_flops_counter_hook, + nn.BatchNorm2d: norm_flops_counter_hook, + nn.BatchNorm3d: norm_flops_counter_hook, + nn.GroupNorm: norm_flops_counter_hook, + nn.InstanceNorm1d: norm_flops_counter_hook, + nn.InstanceNorm2d: norm_flops_counter_hook, + nn.InstanceNorm3d: norm_flops_counter_hook, + nn.LayerNorm: norm_flops_counter_hook, + # FC + nn.Linear: linear_flops_counter_hook, + mmcv.cnn.bricks.Linear: linear_flops_counter_hook, + # Upscale + nn.Upsample: upsample_flops_counter_hook, + # Deconvolution + nn.ConvTranspose2d: deconv_flops_counter_hook, + mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, + } diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py new file mode 100644 index 000000000000..33dd13e18826 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/fuse_conv_bn.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like(bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py new file mode 100644 index 000000000000..d88b7e476317 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/sync_bn.py @@ -0,0 +1,59 @@ +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + """A general BatchNorm layer without input dimension check. + + Reproduced from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + is `_check_input_dim` that is designed for tensor sanity checks. + The check has been bypassed in this class for the convenience of converting + SyncBatchNorm. + """ + + def _check_input_dim(self, input): + return + + +def revert_sync_batchnorm(module): + """Helper function to convert all `SyncBatchNorm` (SyncBN) and + `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to + `BatchNormXd` layers. + + Adapted from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + + Args: + module (nn.Module): The module containing `SyncBatchNorm` layers. + + Returns: + module_output: The converted module with `BatchNormXd` layers. + """ + module_output = module + module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] + if hasattr(mmcv, 'ops'): + module_checklist.append(mmcv.ops.SyncBatchNorm) + if isinstance(module, tuple(module_checklist)): + module_output = _BatchNormXd( + module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats + ) + if module.affine: + # no_grad() may not be needed here but + # just to be consistent with `convert_sync_batchnorm()` + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + module_output.training = module.training + # qconfig exists in quantized models + if hasattr(module, 'qconfig'): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py new file mode 100644 index 000000000000..aa5047e743cb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,644 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import math +import warnings + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + Registry, + build_from_cfg, + get_logger, + print_log, +) + +INITIALIZERS = Registry('initializer') + + +def update_init_info(module, init_info): + """Update the `_params_init_info` in the module if the value of parameters + are changed. + + Args: + module (obj:`nn.Module`): The module of PyTorch with a user-defined + attribute `_params_init_info` which records the initialization + information. + init_info (str): The string that describes the initialization. + """ + assert hasattr(module, '_params_init_info'), f'Can not find `_params_init_info` in {module}' + for name, param in module.named_parameters(): + + assert param in module._params_init_info, ( + f'Find a new :obj:`Parameter` ' + f'named `{name}` during executing the ' + f'`init_weights` of ' + f'`{module.__class__.__name__}`. ' + f'Please do not add or ' + f'replace parameters during executing ' + f'the `init_weights`. ' + ) + + # The parameter has been changed during executing the + # `init_weights` of module + mean_value = param.data.mean() + if module._params_init_info[param]['tmp_mean_value'] != mean_value: + module._params_init_info[param]['init_info'] = init_info + module._params_init_info[param]['tmp_mean_value'] = mean_value + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def trunc_normal_init( + module: nn.Module, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, bias: float = 0 +) -> None: + if hasattr(module, 'weight') and module.weight is not None: + trunc_normal_(module.weight, mean, std, a, b) # type: ignore + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) # type: ignore + + +def uniform_init(module, a=0, b=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.kaiming_uniform_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init(module, a=1, mode='fan_in', nonlinearity='leaky_relu', bias=bias, distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to a given probability value.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +def _get_bases_name(m): + return [b.__name__ for b in m.__class__.__bases__] + + +class BaseInit(object): + def __init__(self, *, bias=0, bias_prob=None, layer=None): + self.wholemodule = False + if not isinstance(bias, (int, float)): + raise TypeError(f'bias must be a number, but got a {type(bias)}') + + if bias_prob is not None: + if not isinstance(bias_prob, float): + raise TypeError( + f'bias_prob type must be float, \ + but got {type(bias_prob)}' + ) + + if layer is not None: + if not isinstance(layer, (str, list)): + raise TypeError( + f'layer must be a str or a list of str, \ + but got a {type(layer)}' + ) + else: + layer = [] + + if bias_prob is not None: + self.bias = bias_init_with_prob(bias_prob) + else: + self.bias = bias + self.layer = [layer] if isinstance(layer, str) else layer + + def _get_init_info(self): + info = f'{self.__class__.__name__}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Constant') +class ConstantInit(BaseInit): + """Initialize module parameters with constant values. + + Args: + val (int | float): the value to fill the weights in the module with + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, val, **kwargs): + super().__init__(**kwargs) + self.val = val + + def __call__(self, module): + def init(m): + if self.wholemodule: + constant_init(m, self.val, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + constant_init(m, self.val, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Xavier') +class XavierInit(BaseInit): + r"""Initialize module parameters with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks - Glorot, X. & Bengio, Y. (2010). + `_ + + Args: + gain (int | float): an optional scaling factor. Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` + or ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, gain=1, distribution='normal', **kwargs): + super().__init__(**kwargs) + self.gain = gain + self.distribution = distribution + + def __call__(self, module): + def init(m): + if self.wholemodule: + xavier_init(m, self.gain, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + xavier_init(m, self.gain, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: gain={self.gain}, ' f'distribution={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Normal') +class NormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + Args: + mean (int | float):the mean of the normal distribution. Defaults to 0. + std (int | float): the standard deviation of the normal distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean=0, std=1, **kwargs): + super().__init__(**kwargs) + self.mean = mean + self.std = std + + def __call__(self, module): + def init(m): + if self.wholemodule: + normal_init(m, self.mean, self.std, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + normal_init(m, self.mean, self.std, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: mean={self.mean},' f' std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='TruncNormal') +class TruncNormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values + outside :math:`[a, b]`. + + Args: + mean (float): the mean of the normal distribution. Defaults to 0. + std (float): the standard deviation of the normal distribution. + Defaults to 1. + a (float): The minimum cutoff value. + b ( float): The maximum cutoff value. + bias (float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, **kwargs) -> None: + super().__init__(**kwargs) + self.mean = mean + self.std = std + self.a = a + self.b = b + + def __call__(self, module: nn.Module) -> None: + def init(m): + if self.wholemodule: + trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = ( + f'{self.__class__.__name__}: a={self.a}, b={self.b},' + f' mean={self.mean}, std={self.std}, bias={self.bias}' + ) + return info + + +@INITIALIZERS.register_module(name='Uniform') +class UniformInit(BaseInit): + r"""Initialize module parameters with values drawn from the uniform + distribution :math:`\mathcal{U}(a, b)`. + + Args: + a (int | float): the lower bound of the uniform distribution. + Defaults to 0. + b (int | float): the upper bound of the uniform distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, b=1, **kwargs): + super().__init__(**kwargs) + self.a = a + self.b = b + + def __call__(self, module): + def init(m): + if self.wholemodule: + uniform_init(m, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + uniform_init(m, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a},' f' b={self.b}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Kaiming') +class KaimingInit(BaseInit): + r"""Initialize module parameters with the values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015). + `_ + + Args: + a (int | float): the negative slope of the rectifier used after this + layer (only used with ``'leaky_relu'``). Defaults to 0. + mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing + ``'fan_in'`` preserves the magnitude of the variance of the weights + in the forward pass. Choosing ``'fan_out'`` preserves the + magnitudes in the backwards pass. Defaults to ``'fan_out'``. + nonlinearity (str): the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` . + Defaults to 'relu'. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` or + ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, mode='fan_out', nonlinearity='relu', distribution='normal', **kwargs): + super().__init__(**kwargs) + self.a = a + self.mode = mode + self.nonlinearity = nonlinearity + self.distribution = distribution + + def __call__(self, module): + def init(m): + if self.wholemodule: + kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = ( + f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' + f'nonlinearity={self.nonlinearity}, ' + f'distribution ={self.distribution}, bias={self.bias}' + ) + return info + + +@INITIALIZERS.register_module(name='Caffe2Xavier') +class Caffe2XavierInit(KaimingInit): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + def __init__(self, **kwargs): + super().__init__(a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform', **kwargs) + + def __call__(self, module): + super().__call__(module) + + +@INITIALIZERS.register_module(name='Pretrained') +class PretrainedInit(object): + """Initialize module by loading a pretrained model. + + Args: + checkpoint (str): the checkpoint file of the pretrained model should + be load. + prefix (str, optional): the prefix of a sub-module in the pretrained + model. it is for loading a part of the pretrained model to + initialize. For example, if we would like to only load the + backbone of a detector model, we can set ``prefix='backbone.'``. + Defaults to None. + map_location (str): map tensors into proper locations. + """ + + def __init__(self, checkpoint, prefix=None, map_location=None): + self.checkpoint = checkpoint + self.prefix = prefix + self.map_location = map_location + + def __call__(self, module): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import ( + _load_checkpoint_with_prefix, + load_checkpoint, + load_state_dict, + ) + + logger = get_logger('mmcv') + if self.prefix is None: + print_log(f'load model from: {self.checkpoint}', logger=logger) + load_checkpoint(module, self.checkpoint, map_location=self.map_location, strict=False, logger=logger) + else: + print_log(f'load {self.prefix} in model from: {self.checkpoint}', logger=logger) + state_dict = _load_checkpoint_with_prefix(self.prefix, self.checkpoint, map_location=self.map_location) + load_state_dict(module, state_dict, strict=False, logger=logger) + + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: load from {self.checkpoint}' + return info + + +def _initialize(module, cfg, wholemodule=False): + func = build_from_cfg(cfg, INITIALIZERS) + # wholemodule flag is for override mode, there is no layer key in override + # and initializer will give init values for the whole module with the name + # in override. + func.wholemodule = wholemodule + func(module) + + +def _initialize_override(module, override, cfg): + if not isinstance(override, (dict, list)): + raise TypeError( + f'override must be a dict or a list of dict, \ + but got {type(override)}' + ) + + override = [override] if isinstance(override, dict) else override + + for override_ in override: + + cp_override = copy.deepcopy(override_) + name = cp_override.pop('name', None) + if name is None: + raise ValueError('`override` must contain the key "name",' f'but got {cp_override}') + # if override only has name key, it means use args in init_cfg + if not cp_override: + cp_override.update(cfg) + # if override has name key and other args except type key, it will + # raise error + elif 'type' not in cp_override.keys(): + raise ValueError(f'`override` need "type" key, but got {cp_override}') + + if hasattr(module, name): + _initialize(getattr(module, name), cp_override, wholemodule=True) + else: + raise RuntimeError(f'module did not have attribute {name}, ' f'but init_cfg is {cp_override}.') + + +def initialize(module, init_cfg): + """Initialize a module. + + Args: + module (``torch.nn.Module``): the module will be initialized. + init_cfg (dict | list[dict]): initialization configuration dict to + define initializer. OpenMMLab has implemented 6 initializers + including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, + ``Kaiming``, and ``Pretrained``. + Example: + >>> module = nn.Linear(2, 3, bias=True) + >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) + >>> initialize(module, init_cfg) + + >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + >>> # define key ``'layer'`` for initializing layer with different + >>> # configuration + >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Linear', val=2)] + >>> initialize(module, init_cfg) + + >>> # define key``'override'`` to initialize some specific part in + >>> # module + >>> class FooNet(nn.Module): + >>> def __init__(self): + >>> super().__init__() + >>> self.feat = nn.Conv2d(3, 16, 3) + >>> self.reg = nn.Conv2d(16, 10, 3) + >>> self.cls = nn.Conv2d(16, 5, 3) + >>> model = FooNet() + >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', + >>> override=dict(type='Constant', name='reg', val=3, bias=4)) + >>> initialize(model, init_cfg) + + >>> model = ResNet(depth=50) + >>> # Initialize weights with the pretrained model. + >>> init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + >>> initialize(model, init_cfg) + + >>> # Initialize weights of a sub-module with the specific part of + >>> # a pretrained model by using "prefix". + >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + >>> 'retinanet_r50_fpn_1x_coco/'\ + >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + >>> init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + """ + if not isinstance(init_cfg, (dict, list)): + raise TypeError( + f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}' + ) + + if isinstance(init_cfg, dict): + init_cfg = [init_cfg] + + for cfg in init_cfg: + # should deeply copy the original config because cfg may be used by + # other modules, e.g., one init_cfg shared by multiple bottleneck + # blocks, the expected cfg will be changed after pop and will change + # the initialization behavior of other modules + cp_cfg = copy.deepcopy(cfg) + override = cp_cfg.pop('override', None) + _initialize(module, cp_cfg) + + if override is not None: + cp_cfg.pop('layer', None) + _initialize_override(module, override, cp_cfg) + else: + # All attributes in module have same initialization. + pass + + +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, b: float) -> Tensor: + # Method based on + # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + # Modified from + # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2, + ) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower = norm_cdf((a - mean) / std) + upper = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [lower, upper], then translate + # to [2lower-1, 2upper-1]. + tensor.uniform_(2 * lower - 1, 2 * upper - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.0)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0) -> Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + Modified from + https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. + mean (float): the mean of the normal distribution. + std (float): the standard deviation of the normal distribution. + a (float): the minimum cutoff value. + b (float): the maximum cutoff value. + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py new file mode 100644 index 000000000000..c430ff61db6e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/cnn/vgg.py @@ -0,0 +1,159 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + +from .utils import constant_init, kaiming_init, normal_init + + +def conv3x3(in_planes, out_planes, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, padding=dilation, dilation=dilation) + + +def make_vgg_layer(inplanes, planes, num_blocks, dilation=1, with_bn=False, ceil_mode=False): + layers = [] + for _ in range(num_blocks): + layers.append(conv3x3(inplanes, planes, dilation)) + if with_bn: + layers.append(nn.BatchNorm2d(planes)) + layers.append(nn.ReLU(inplace=True)) + inplanes = planes + layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) + + return layers + + +class VGG(nn.Module): + """VGG backbone. + + Args: + depth (int): Depth of vgg, from {11, 13, 16, 19}. + with_bn (bool): Use BatchNorm or not. + num_classes (int): number of classes for classification. + num_stages (int): VGG stages, normally 5. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + """ + + arch_settings = {11: (1, 1, 2, 2, 2), 13: (2, 2, 2, 2, 2), 16: (2, 2, 3, 3, 3), 19: (2, 2, 4, 4, 4)} + + def __init__( + self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True, + ): + super(VGG, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for vgg') + assert num_stages >= 1 and num_stages <= 5 + stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + assert len(dilations) == num_stages + assert max(out_indices) <= num_stages + + self.num_classes = num_classes + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + + self.inplanes = 3 + start_idx = 0 + vgg_layers = [] + self.range_sub_modules = [] + for i, num_blocks in enumerate(self.stage_blocks): + num_modules = num_blocks * (2 + with_bn) + 1 + end_idx = start_idx + num_modules + dilation = dilations[i] + planes = 64 * 2 ** i if i < 4 else 512 + vgg_layer = make_vgg_layer( + self.inplanes, planes, num_blocks, dilation=dilation, with_bn=with_bn, ceil_mode=ceil_mode + ) + vgg_layers.extend(vgg_layer) + self.inplanes = planes + self.range_sub_modules.append([start_idx, end_idx]) + start_idx = end_idx + if not with_last_pool: + vgg_layers.pop(-1) + self.range_sub_modules[-1][1] -= 1 + self.module_name = 'features' + self.add_module(self.module_name, nn.Sequential(*vgg_layers)) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, std=0.01) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + vgg_layers = getattr(self, self.module_name) + for i in range(len(self.stage_blocks)): + for j in range(*self.range_sub_modules[i]): + vgg_layer = vgg_layers[j] + x = vgg_layer(x) + if i in self.out_indices: + outs.append(x) + if self.num_classes > 0: + x = x.view(x.size(0), -1) + x = self.classifier(x) + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(VGG, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + vgg_layers = getattr(self, self.module_name) + if mode and self.frozen_stages >= 0: + for i in range(self.frozen_stages): + for j in range(*self.range_sub_modules[i]): + mod = vgg_layers[j] + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py new file mode 100644 index 000000000000..91307c41c0a5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .test import collect_results_cpu, collect_results_gpu, multi_gpu_test, single_gpu_test + +__all__ = ['collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', 'single_gpu_test'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py new file mode 100644 index 000000000000..93f07f70ea4a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/engine/test.py @@ -0,0 +1,195 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import pickle +import shutil +import tempfile +import time + +import torch +import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info + + +def single_gpu_test(model, data_loader): + """Test model with a single gpu. + + This method tests model with a single gpu and displays test progress bar. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + # Assume result has the same length of batch_size + # refer to https://github.com/open-mmlab/mmcv/issues/985 + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting + ``gpu_collect=True``, it encodes results to gpu tensors and use gpu + communication for results collection. On cpu mode it saves the results on + different gpus to ``tmpdir`` and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + time.sleep(2) # This line can prevent deadlock problem in some cases. + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + if rank == 0: + batch_size = len(result) + batch_size_all = batch_size * world_size + if batch_size_all + prog_bar.completed > len(dataset): + batch_size_all = len(dataset) - prog_bar.completed + for _ in range(batch_size_all): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results under cpu mode. + + On cpu mode, this function will save the results on different gpus to + ``tmpdir`` and collect them by the rank 0 worker. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + tmpdir (str | None): temporal directory for collected results to + store. If set to None, it will create a random temporal directory + for it. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') + if rank == 0: + mmcv.mkdir_or_exist('.dist_test') + tmpdir = tempfile.mkdtemp(dir='.dist_test') + tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[: len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, f'part_{i}.pkl') + part_result = mmcv.load(part_file) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results under gpu mode. + + On gpu mode, this function will encode results to gpu tensors and use gpu + communication for results collection. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[: shape_tensor[0]] = part_tensor + part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_result = pickle.loads(recv[: shape[0]].cpu().numpy().tobytes()) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py new file mode 100644 index 000000000000..48c6ac0c6999 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', + 'FileClient', + 'load', + 'dump', + 'register_handler', + 'BaseFileHandler', + 'JsonHandler', + 'PickleHandler', + 'YamlHandler', + 'list_from_file', + 'dict_from_file', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py new file mode 100644 index 000000000000..fe088721ca25 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/file_client.py @@ -0,0 +1,1127 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os +import os.path as osp +import re +import tempfile +import warnings +from abc import ABCMeta, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator, Optional, Tuple, Union +from urllib.request import urlopen + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.misc import has_method +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.path import is_filepath + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + # a flag to indicate whether the backend can create a symlink for a file + _allow_symlink = False + + @property + def name(self): + return self.__class__.__name__ + + @property + def allow_symlink(self): + return self._allow_symlink + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + PetrelBackend supports reading and writing data to multiple clusters. + If the file path contains the cluster name, PetrelBackend will read data + from specified cluster or write data to it. Otherwise, PetrelBackend will + access the default cluster. + + Args: + path_mapping (dict, optional): Path mapping dict from local path to + Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in + ``filepath`` will be replaced by ``dst``. Default: None. + enable_mc (bool, optional): Whether to enable memcached support. + Default: True. + + Examples: + >>> filepath1 = 's3://path/of/file' + >>> filepath2 = 'cluster-name:s3://path/of/file' + >>> client = PetrelBackend() + >>> client.get(filepath1) # get data from default cluster + >>> client.get(filepath2) # get data from 'cluster-name' cluster + """ + + def __init__(self, path_mapping: Optional[dict] = None, enable_mc: bool = True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def _map_path(self, filepath: Union[str, Path]) -> str: + """Map ``filepath`` to a string path whose prefix will be replaced by + :attr:`self.path_mapping`. + + Args: + filepath (str): Path to be mapped. + """ + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + return filepath + + def _format_path(self, filepath: str) -> str: + """Convert a ``filepath`` to standard format of petrel oss. + + If the ``filepath`` is concatenated by ``os.path.join``, in a Windows + environment, the ``filepath`` will be the format of + 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the + above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. + + Args: + filepath (str): Path to be formatted. + """ + return re.sub(r'\\+', '/', filepath) + + def get(self, filepath: Union[str, Path]) -> memoryview: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + memoryview: A memory view of expected bytes object to avoid + copying. The memoryview object can be converted to bytes by + ``value_buf.tobytes()``. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return str(self.get(filepath), encoding=encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Save data to a given ``filepath``. + + Args: + obj (bytes): Data to be saved. + filepath (str or Path): Path to write data. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.put(filepath, obj) + + def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: + """Save data to a given ``filepath``. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to encode the ``obj``. + Default: 'utf-8'. + """ + self.put(bytes(obj, encoding=encoding), filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + if not has_method(self._client, 'delete'): + raise NotImplementedError( + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.' + ) + ) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.delete(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + if not (has_method(self._client, 'contains') and has_method(self._client, 'isdir')): + raise NotImplementedError( + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.' + ) + ) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) or self._client.isdir(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + if not has_method(self._client, 'isdir'): + raise NotImplementedError( + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.' + ) + ) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + if not has_method(self._client, 'contains'): + raise NotImplementedError( + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.' + ) + ) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) + + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result after concatenation. + """ + filepath = self._format_path(self._map_path(filepath)) + if filepath.endswith('/'): + filepath = filepath[:-1] + formatted_paths = [filepath] + for path in filepaths: + formatted_paths.append(self._format_path(self._map_path(path))) + return '/'.join(formatted_paths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download a file from ``filepath`` and return a temporary path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str | Path): Download a file from ``filepath``. + + Examples: + >>> client = PetrelBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('s3://path/of/your/file') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one temporary path. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + assert self.isfile(filepath) + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + Petrel has no concept of directories but it simulates the directory + hierarchy in the filesystem through public prefixes. In addition, + if the returned path ends with '/', it means the path is a public + prefix which is a logical directory. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + In addition, the returned path of directory will not contains the + suffix '/' which is consistent with other backends. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if not has_method(self._client, 'list'): + raise NotImplementedError( + ( + 'Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.' + ) + ) + + dir_path = self._map_path(dir_path) + dir_path = self._format_path(dir_path) + if list_dir and suffix is not None: + raise TypeError('`list_dir` should be False when `suffix` is not None') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + # Petrel's simulated directory hierarchy assumes that directory paths + # should end with `/` + if not dir_path.endswith('/'): + dir_path += '/' + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): + for path in self._client.list(dir_path): + # the `self.isdir` is not used here to determine whether path + # is a directory, because `self.isdir` relies on + # `self._client.list` + if path.endswith('/'): # a directory path + next_dir_path = self.join_path(dir_path, path) + if list_dir: + # get the relative path and exclude the last + # character '/' + rel_dir = next_dir_path[len(root) : -1] + yield rel_dir + if recursive: + yield from _list_dir_or_file(next_dir_path, list_dir, list_file, suffix, recursive) + else: # a file path + absolute_path = self.join_path(dir_path, path) + rel_path = absolute_path[len(root) :] + if (suffix is None or rel_path.endswith(suffix)) and list_file: + yield rel_path + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError('Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, db_path, readonly=True, lock=False, readahead=False, **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open(self.db_path, readonly=readonly, lock=lock, readahead=readahead, **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + _allow_symlink = True + + def get(self, filepath: Union[str, Path]) -> bytes: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes: Expected bytes object. + """ + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + with open(filepath, 'r', encoding=encoding) as f: + value_buf = f.read() + return value_buf + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` will create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'wb') as f: + f.write(obj) + + def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` will create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'w', encoding=encoding) as f: + f.write(obj) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + os.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return osp.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return osp.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return osp.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return osp.join(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + """Only for unified API and do nothing.""" + yield filepath + + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if list_dir and suffix is not None: + raise TypeError('`suffix` should be None when `list_dir` is True') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if (suffix is None or rel_path.endswith(suffix)) and list_file: + yield rel_path + elif osp.isdir(entry.path): + if list_dir: + rel_dir = osp.relpath(entry.path, root) + yield rel_dir + if recursive: + yield from _list_dir_or_file(entry.path, list_dir, list_file, suffix, recursive) + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) + + +class HTTPBackend(BaseStorageBackend): + """HTTP and HTTPS storage bachend.""" + + def get(self, filepath): + value_buf = urlopen(filepath).read() + return value_buf + + def get_text(self, filepath, encoding='utf-8'): + value_buf = urlopen(filepath).read() + return value_buf.decode(encoding) + + @contextmanager + def get_local_path(self, filepath: str) -> Iterable[str]: + """Download a file from ``filepath``. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str): Download a file from ``filepath``. + + Examples: + >>> client = HTTPBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('http://path/of/your/file') as path: + ... # do something here + """ + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + +class FileClient: + """A general file client to access files in different backends. + + The client loads a file or text in a specified backend from its path + and returns it as a binary or text file. There are two ways to choose a + backend, the name of backend and the prefix of path. Although both of them + can be used to choose a storage backend, ``backend`` has a higher priority + that is if they are all set, the storage backend will be chosen by the + backend argument. If they are all `None`, the disk backend will be chosen. + Note that It can also register other backend accessor with a given name, + prefixes, and backend class. In addition, We use the singleton pattern to + avoid repeated object creation. If the arguments are the same, the same + object will be returned. + + Args: + backend (str, optional): The storage backend type. Options are "disk", + "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. + prefix (str, optional): The prefix of the registered storage backend. + Options are "s3", "http", "https". Default: None. + + Examples: + >>> # only set backend + >>> file_client = FileClient(backend='petrel') + >>> # only set prefix + >>> file_client = FileClient(prefix='s3') + >>> # set both backend and prefix but use backend to choose client + >>> file_client = FileClient(backend='petrel', prefix='s3') + >>> # if the arguments are the same, the same object is returned + >>> file_client1 = FileClient(backend='petrel') + >>> file_client1 is file_client + True + + Attributes: + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + 'http': HTTPBackend, + } + # This collection is used to record the overridden backends, and when a + # backend appears in the collection, the singleton pattern is disabled for + # that backend, because if the singleton pattern is used, then the object + # returned will be the backend before overwriting + _overridden_backends = set() + _prefix_to_backends = { + 's3': PetrelBackend, + 'http': HTTPBackend, + 'https': HTTPBackend, + } + _overridden_prefixes = set() + + _instances = {} + + def __new__(cls, backend=None, prefix=None, **kwargs): + if backend is None and prefix is None: + backend = 'disk' + if backend is not None and backend not in cls._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' f' are {list(cls._backends.keys())}' + ) + if prefix is not None and prefix not in cls._prefix_to_backends: + raise ValueError( + f'prefix {prefix} is not supported. Currently supported ones ' + f'are {list(cls._prefix_to_backends.keys())}' + ) + + # concatenate the arguments to a unique key for determining whether + # objects with the same arguments were created + arg_key = f'{backend}:{prefix}' + for key, value in kwargs.items(): + arg_key += f':{key}:{value}' + + # if a backend was overridden, it will create a new object + if ( + arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes + ): + _instance = cls._instances[arg_key] + else: + # create a new object and put it to _instance + _instance = super().__new__(cls) + if backend is not None: + _instance.client = cls._backends[backend](**kwargs) + else: + _instance.client = cls._prefix_to_backends[prefix](**kwargs) + + cls._instances[arg_key] = _instance + + return _instance + + @property + def name(self): + return self.client.name + + @property + def allow_symlink(self): + return self.client.allow_symlink + + @staticmethod + def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: + """Parse the prefix of a uri. + + Args: + uri (str | Path): Uri to be parsed that contains the file prefix. + + Examples: + >>> FileClient.parse_uri_prefix('s3://path/of/your/file') + 's3' + + Returns: + str | None: Return the prefix of uri if the uri contains '://' + else ``None``. + """ + assert is_filepath(uri) + uri = str(uri) + if '://' not in uri: + return None + else: + prefix, _ = uri.split('://') + # In the case of PetrelBackend, the prefix may contains the cluster + # name like clusterName:s3 + if ':' in prefix: + _, prefix = prefix.split(':') + return prefix + + @classmethod + def infer_client( + cls, file_client_args: Optional[dict] = None, uri: Optional[Union[str, Path]] = None + ) -> 'FileClient': + """Infer a suitable file client based on the URI and arguments. + + Args: + file_client_args (dict, optional): Arguments to instantiate a + FileClient. Default: None. + uri (str | Path, optional): Uri to be parsed that contains the file + prefix. Default: None. + + Examples: + >>> uri = 's3://path/of/your/file' + >>> file_client = FileClient.infer_client(uri=uri) + >>> file_client_args = {'backend': 'petrel'} + >>> file_client = FileClient.infer_client(file_client_args) + + Returns: + FileClient: Instantiated FileClient object. + """ + assert file_client_args is not None or uri is not None + if file_client_args is None: + file_prefix = cls.parse_uri_prefix(uri) # type: ignore + return cls(prefix=file_prefix) + else: + return cls(**file_client_args) + + @classmethod + def _register_backend(cls, name, backend, force=False, prefixes=None): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError(f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError(f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' 'add "force=True" if you want to override it' + ) + + if name in cls._backends and force: + cls._overridden_backends.add(name) + cls._backends[name] = backend + + if prefixes is not None: + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if prefix not in cls._prefix_to_backends: + cls._prefix_to_backends[prefix] = backend + elif (prefix in cls._prefix_to_backends) and force: + cls._overridden_prefixes.add(prefix) + cls._prefix_to_backends[prefix] = backend + else: + raise KeyError( + f'{prefix} is already registered as a storage backend,' + ' add "force=True" if you want to override it' + ) + + @classmethod + def register_backend(cls, name, backend=None, force=False, prefixes=None): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + prefixes (str or list[str] or tuple[str], optional): The prefixes + of the registered storage backend. Default: None. + `New in version 1.3.15.` + """ + if backend is not None: + cls._register_backend(name, backend, force=force, prefixes=prefixes) + return + + def _register(backend_cls): + cls._register_backend(name, backend_cls, force=force, prefixes=prefixes) + return backend_cls + + return _register + + def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: + """Read data from a given ``filepath`` with 'rb' mode. + + Note: + There are two types of return values for ``get``, one is ``bytes`` + and the other is ``memoryview``. The advantage of using memoryview + is that you can avoid copying, and if you want to convert it to + ``bytes``, you can use ``.tobytes()``. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes | memoryview: Expected bytes object or a memory view of the + bytes object. + """ + return self.client.get(filepath) + + def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return self.client.get_text(filepath, encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` should create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + self.client.put(obj, filepath) + + def put_text(self, obj: str, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` should create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str, optional): The encoding format used to open the + `filepath`. Default: 'utf-8'. + """ + self.client.put_text(obj, filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str, Path): Path to be removed. + """ + self.client.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return self.client.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return self.client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return self.client.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return self.client.join_path(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download data from ``filepath`` and write the data to local path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Note: + If the ``filepath`` is a local path, just return itself. + + .. warning:: + ``get_local_path`` is an experimental interface that may change in + the future. + + Args: + filepath (str or Path): Path to be read data. + + Examples: + >>> file_client = FileClient(prefix='s3') + >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one path. + """ + with self.client.get_local_path(str(filepath)) as local_path: + yield local_path + + def list_dir_or_file( + self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False, + ) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py new file mode 100644 index 000000000000..aa24d9197283 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py new file mode 100644 index 000000000000..288878bc5728 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/base.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + # `str_like` is a flag to indicate whether the type of file object is + # str-like object or bytes-like object. Pickle only processes bytes-like + # objects but json only processes str-like object. If it is str-like + # object, `StringIO` will be used to process the buffer. + str_like = True + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 000000000000..c95a8b72d202 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 000000000000..fffd741130ff --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,26 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + str_like = False + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path(filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path(obj, filepath, mode='wb', **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 000000000000..cf89a1efa70c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import yaml + +try: + from yaml import CDumper as Dumper + from yaml import CLoader as Loader +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py new file mode 100644 index 000000000000..bcbdc2eb3803 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/io.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from io import BytesIO, StringIO +from pathlib import Path + +from ..utils import is_list_of, is_str +from .file_client import FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler(), +} + + +def load(file, file_format=None, file_client_args=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Note: + In v1.3.16 and later, ``load`` supports loading data from serialized + files those can be storaged in different backends. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> load('/path/of/your/file') # file is storaged in disk + >>> load('https://path/of/your/file') # file is storaged in Internet + >>> load('s3://path/of/your/file') # file is storaged in petrel + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO(file_client.get_text(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + else: + with BytesIO(file_client.get(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Note: + In v1.3.16 and later, ``dump`` supports dumping data as strings or to + files which is saved to different backends. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dumped to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dump('hello world', '/path/of/your/file') # disk + >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError('file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put_text(f.getvalue(), file) + else: + with BytesIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put(f.getvalue(), file) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError(f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py new file mode 100644 index 000000000000..19c618d9a034 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/fileio/parse.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from io import StringIO + +from .file_client import FileClient + + +def list_from_file(filename, prefix='', offset=0, max_num=0, encoding='utf-8', file_client_args=None): + """Load a text file and parse the content as a list of strings. + + Note: + In v1.3.16 and later, ``list_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a list for strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the beginning of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> list_from_file('/path/of/your/file') # disk + ['hello', 'world'] + >>> list_from_file('s3://path/of/your/file') # ceph or petrel + ['hello', 'world'] + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for _ in range(offset): + f.readline() + for line in f: + if 0 < max_num <= cnt: + break + item_list.append(prefix + line.rstrip('\n\r')) + cnt += 1 + return item_list + + +def dict_from_file(filename, key_type=str, encoding='utf-8', file_client_args=None): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns split by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Note: + In v1.3.16 and later, ``dict_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a dict. + + Args: + filename(str): Filename. + key_type(type): Type of the dict keys. str is user by default and + type conversion will be performed if specified. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dict_from_file('/path/of/your/file') # disk + {'key1': 'value1', 'key2': 'value2'} + >>> dict_from_file('s3://path/of/your/file') # ceph or petrel + {'key1': 'value1', 'key2': 'value2'} + + Returns: + dict: The parsed contents. + """ + mapping = {} + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py new file mode 100644 index 000000000000..bf63e993892c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/__init__.py @@ -0,0 +1,106 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .colorspace import ( + bgr2gray, + bgr2hls, + bgr2hsv, + bgr2rgb, + bgr2ycbcr, + gray2bgr, + gray2rgb, + hls2bgr, + hsv2bgr, + imconvert, + rgb2bgr, + rgb2gray, + rgb2ycbcr, + ycbcr2bgr, + ycbcr2rgb, +) +from .geometric import ( + cutout, + imcrop, + imflip, + imflip_, + impad, + impad_to_multiple, + imrescale, + imresize, + imresize_like, + imresize_to_multiple, + imrotate, + imshear, + imtranslate, + rescale_size, +) +from .io import imfrombytes, imread, imwrite, supported_backends, use_backend +from .misc import tensor2imgs +from .photometric import ( + adjust_brightness, + adjust_color, + adjust_contrast, + adjust_lighting, + adjust_sharpness, + auto_contrast, + clahe, + imdenormalize, + imequalize, + iminvert, + imnormalize, + imnormalize_, + lut_transform, + posterize, + solarize, +) + +__all__ = [ + 'bgr2gray', + 'bgr2hls', + 'bgr2hsv', + 'bgr2rgb', + 'gray2bgr', + 'gray2rgb', + 'hls2bgr', + 'hsv2bgr', + 'imconvert', + 'rgb2bgr', + 'rgb2gray', + 'imrescale', + 'imresize', + 'imresize_like', + 'imresize_to_multiple', + 'rescale_size', + 'imcrop', + 'imflip', + 'imflip_', + 'impad', + 'impad_to_multiple', + 'imrotate', + 'imfrombytes', + 'imread', + 'imwrite', + 'supported_backends', + 'use_backend', + 'imdenormalize', + 'imnormalize', + 'imnormalize_', + 'iminvert', + 'posterize', + 'solarize', + 'rgb2ycbcr', + 'bgr2ycbcr', + 'ycbcr2rgb', + 'ycbcr2bgr', + 'tensor2imgs', + 'imshear', + 'imtranslate', + 'adjust_color', + 'imequalize', + 'adjust_brightness', + 'adjust_contrast', + 'lut_transform', + 'clahe', + 'adjust_sharpness', + 'auto_contrast', + 'cutout', + 'adjust_lighting', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py new file mode 100644 index 000000000000..e167caac49f3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/colorspace.py @@ -0,0 +1,304 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + + +def imconvert(img, src, dst): + """Convert an image from the src colorspace to dst colorspace. + + Args: + img (ndarray): The input image. + src (str): The source colorspace, e.g., 'rgb', 'hsv'. + dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. + + Returns: + ndarray: The converted image. + """ + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + out_img = cv2.cvtColor(img, code) + return out_img + + +def bgr2gray(img, keepdim=False): + """Convert a BGR image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def rgb2gray(img, keepdim=False): + """Convert a RGB image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def gray2bgr(img): + """Convert a grayscale image to BGR image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted BGR image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return out_img + + +def gray2rgb(img): + """Convert a grayscale image to RGB image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted RGB image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + return out_img + + +def _convert_input_type_range(img): + """Convert the type and range of the input image. + + It converts the input image to np.float32 type and range of [0, 1]. + It is mainly used for pre-processing the input image in colorspace + conversion functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + (ndarray): The converted image with type of np.float32 and range of + [0, 1]. + """ + img_type = img.dtype + img = img.astype(np.float32) + if img_type == np.float32: + pass + elif img_type == np.uint8: + img /= 255.0 + else: + raise TypeError('The img type should be np.float32 or np.uint8, ' f'but got {img_type}') + return img + + +def _convert_output_type_range(img, dst_type): + """Convert the type and range of the image according to dst_type. + + It converts the image to desired type and range. If `dst_type` is np.uint8, + images will be converted to np.uint8 type with range [0, 255]. If + `dst_type` is np.float32, it converts the image to np.float32 type with + range [0, 1]. + It is mainly used for post-processing images in colorspace conversion + functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The image to be converted with np.float32 type and + range [0, 255]. + dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it + converts the image to np.uint8 type with range [0, 255]. If + dst_type is np.float32, it converts the image to np.float32 type + with range [0, 1]. + + Returns: + (ndarray): The converted image with desired type and range. + """ + if dst_type not in (np.uint8, np.float32): + raise TypeError('The dst_type should be np.float32 or np.uint8, ' f'but got {dst_type}') + if dst_type == np.uint8: + img = img.round() + else: + img /= 255.0 + return img.astype(dst_type) + + +def rgb2ycbcr(img, y_only=False): + """Convert a RGB image to YCbCr image. + + This function produces the same results as Matlab's `rgb2ycbcr` function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 + else: + out_img = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], [24.966, 112.0, -18.214]]) + [ + 16, + 128, + 128, + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [ + 16, + 128, + 128, + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2rgb(img): + """Convert a YCbCr image to RGB image. + + This function produces the same results as Matlab's ycbcr2rgb function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted RGB image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul( + img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071], [0.00625893, -0.00318811, 0]] + ) * 255.0 + [-222.921, 135.576, -276.836] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2bgr(img): + """Convert a YCbCr image to BGR image. + + The bgr version of ycbcr2rgb. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted BGR image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul( + img, [[0.00456621, 0.00456621, 0.00456621], [0.00791071, -0.00153632, 0], [0, -0.00318811, 0.00625893]] + ) * 255.0 + [-276.836, 135.576, -222.921] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def convert_color_factory(src, dst): + + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + + def convert_color(img): + out_img = cv2.cvtColor(img, code) + return out_img + + convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} + image. + + Args: + img (ndarray or str): The input image. + + Returns: + ndarray: The converted {dst.upper()} image. + """ + + return convert_color + + +bgr2rgb = convert_color_factory('bgr', 'rgb') + +rgb2bgr = convert_color_factory('rgb', 'bgr') + +bgr2hsv = convert_color_factory('bgr', 'hsv') + +hsv2bgr = convert_color_factory('hsv', 'bgr') + +bgr2hls = convert_color_factory('bgr', 'hls') + +hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py new file mode 100644 index 000000000000..2c96fae34feb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/geometric.py @@ -0,0 +1,671 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers + +import cv2 +import numpy as np + +from ..utils import to_2tuple +from .io import imread_backend + +try: + from PIL import Image +except ImportError: + Image = None + + +def _scale_size(size, scale): + """Rescale a size by a ratio. + + Args: + size (tuple[int]): (w, h). + scale (float | tuple(float)): Scaling factor. + + Returns: + tuple[int]: scaled size. + """ + if isinstance(scale, (float, int)): + scale = (scale, scale) + w, h = size + return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) + + +cv2_interp_codes = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, + 'area': cv2.INTER_AREA, + 'lanczos': cv2.INTER_LANCZOS4, +} + +if Image is not None: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING, + } + + +def imresize(img, size, return_scale=False, interpolation='bilinear', out=None, backend=None): + """Resize image to a given size. + + Args: + img (ndarray): The input image. + size (tuple[int]): Target size (w, h). + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if backend is None: + backend = imread_backend + if backend not in ['cv2', 'pillow']: + raise ValueError(f'backend: {backend} is not supported for resize.' f"Supported backends are 'cv2', 'pillow'") + + if backend == 'pillow': + assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' + pil_image = Image.fromarray(img) + pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) + resized_img = np.array(pil_image) + else: + resized_img = cv2.resize(img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + if not return_scale: + return resized_img + else: + w_scale = size[0] / w + h_scale = size[1] / h + return resized_img, w_scale, h_scale + + +def imresize_to_multiple( + img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None, +): + """Resize image according to a given size or scale factor and then rounds + up the the resized or rescaled image size to the nearest value that can be + divided by the divisor. + + Args: + img (ndarray): The input image. + divisor (int | tuple): Resized image size will be a multiple of + divisor. If divisor is a tuple, divisor should be + (w_divisor, h_divisor). + size (None | int | tuple[int]): Target size (w, h). Default: None. + scale_factor (None | float | tuple[float]): Multiplier for spatial + size. Should match input size if it is a tuple and the 2D style is + (w_scale_factor, h_scale_factor). Default: None. + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Default: False. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if size is not None and scale_factor is not None: + raise ValueError('only one of size or scale_factor should be defined') + elif size is None and scale_factor is None: + raise ValueError('one of size or scale_factor should be defined') + elif size is not None: + size = to_2tuple(size) + if keep_ratio: + size = rescale_size((w, h), size, return_scale=False) + else: + size = _scale_size((w, h), scale_factor) + + divisor = to_2tuple(divisor) + size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) + resized_img, w_scale, h_scale = imresize( + img, size, return_scale=True, interpolation=interpolation, out=out, backend=backend + ) + if return_scale: + return resized_img, w_scale, h_scale + else: + return resized_img + + +def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear', backend=None): + """Resize image to the same size of a given image. + + Args: + img (ndarray): The input image. + dst_img (ndarray): The target image. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = dst_img.shape[:2] + return imresize(img, (w, h), return_scale, interpolation, backend=backend) + + +def rescale_size(old_size, scale, return_scale=False): + """Calculate the new size to be rescaled to. + + Args: + old_size (tuple[int]): The old size (w, h) of image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image size. + + Returns: + tuple[int]: The new rescaled image size. + """ + w, h = old_size + if isinstance(scale, (float, int)): + if scale <= 0: + raise ValueError(f'Invalid scale {scale}, must be positive.') + scale_factor = scale + elif isinstance(scale, tuple): + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) + else: + raise TypeError(f'Scale must be a number or tuple of int, but got {type(scale)}') + + new_size = _scale_size((w, h), scale_factor) + + if return_scale: + return new_size, scale_factor + else: + return new_size + + +def imrescale(img, scale, return_scale=False, interpolation='bilinear', backend=None): + """Resize image while keeping the aspect ratio. + + Args: + img (ndarray): The input image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + ndarray: The rescaled image. + """ + h, w = img.shape[:2] + new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) + rescaled_img = imresize(img, new_size, interpolation=interpolation, backend=backend) + if return_scale: + return rescaled_img, scale_factor + else: + return rescaled_img + + +def imflip(img, direction='horizontal'): + """Flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image. + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return np.flip(img, axis=1) + elif direction == 'vertical': + return np.flip(img, axis=0) + else: + return np.flip(img, axis=(0, 1)) + + +def imflip_(img, direction='horizontal'): + """Inplace flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image (inplace). + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return cv2.flip(img, 1, img) + elif direction == 'vertical': + return cv2.flip(img, 0, img) + else: + return cv2.flip(img, -1, img) + + +def imrotate(img, angle, center=None, scale=1.0, border_value=0, interpolation='bilinear', auto_bound=False): + """Rotate an image. + + Args: + img (ndarray): Image to be rotated. + angle (float): Rotation angle in degrees, positive values mean + clockwise rotation. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. + scale (float): Isotropic scale factor. + border_value (int): Border value. + interpolation (str): Same as :func:`resize`. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. + + Returns: + ndarray: The rotated image. + """ + if center is not None and auto_bound: + raise ValueError('`auto_bound` conflicts with `center`') + h, w = img.shape[:2] + if center is None: + center = ((w - 1) * 0.5, (h - 1) * 0.5) + assert isinstance(center, tuple) + + matrix = cv2.getRotationMatrix2D(center, -angle, scale) + if auto_bound: + cos = np.abs(matrix[0, 0]) + sin = np.abs(matrix[0, 1]) + new_w = h * sin + w * cos + new_h = h * cos + w * sin + matrix[0, 2] += (new_w - w) * 0.5 + matrix[1, 2] += (new_h - h) * 0.5 + w = int(np.round(new_w)) + h = int(np.round(new_h)) + rotated = cv2.warpAffine(img, matrix, (w, h), flags=cv2_interp_codes[interpolation], borderValue=border_value) + return rotated + + +def bbox_clip(bboxes, img_shape): + """Clip bboxes to fit the image shape. + + Args: + bboxes (ndarray): Shape (..., 4*k) + img_shape (tuple[int]): (height, width) of the image. + + Returns: + ndarray: Clipped bboxes. + """ + assert bboxes.shape[-1] % 4 == 0 + cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) + cmin[0::2] = img_shape[1] - 1 + cmin[1::2] = img_shape[0] - 1 + clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) + return clipped_bboxes + + +def bbox_scaling(bboxes, scale, clip_shape=None): + """Scaling bboxes w.r.t the box center. + + Args: + bboxes (ndarray): Shape(..., 4). + scale (float): Scaling factor. + clip_shape (tuple[int], optional): If specified, bboxes that exceed the + boundary will be clipped according to the given shape (h, w). + + Returns: + ndarray: Scaled bboxes. + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes + + +def imcrop(img, bboxes, scale=1.0, pad_fill=None): + """Crop image patches. + + 3 steps: scale the bboxes -> clip bboxes -> crop and pad. + + Args: + img (ndarray): Image to be cropped. + bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. + scale (float, optional): Scale ratio of bboxes, the default value + 1.0 means no padding. + pad_fill (Number | list[Number]): Value to be filled for padding. + Default: None, which means no padding. + + Returns: + list[ndarray] | ndarray: The cropped image patches. + """ + chn = 1 if img.ndim == 2 else img.shape[2] + if pad_fill is not None: + if isinstance(pad_fill, (int, float)): + pad_fill = [pad_fill for _ in range(chn)] + assert len(pad_fill) == chn + + _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes + scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) + clipped_bbox = bbox_clip(scaled_bboxes, img.shape) + + patches = [] + for i in range(clipped_bbox.shape[0]): + x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) + if pad_fill is None: + patch = img[y1 : y2 + 1, x1 : x2 + 1, ...] + else: + _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) + if chn == 1: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) + else: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) + patch = np.array(pad_fill, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) + x_start = 0 if _x1 >= 0 else -_x1 + y_start = 0 if _y1 >= 0 else -_y1 + w = x2 - x1 + 1 + h = y2 - y1 + 1 + patch[y_start : y_start + h, x_start : x_start + w, ...] = img[y1 : y1 + h, x1 : x1 + w, ...] + patches.append(patch) + + if bboxes.ndim == 1: + return patches[0] + else: + return patches + + +def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. + + Args: + img (ndarray): Image to be padded. + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. + pad_val (Number | Sequence[Number]): Values to be filled in padding + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + ndarray: The padded image. + """ + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): + assert len(pad_val) == img.shape[-1] + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT, + } + img = cv2.copyMakeBorder( + img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val + ) + + return img + + +def impad_to_multiple(img, divisor, pad_val=0): + """Pad an image to ensure each edge to be multiple to some number. + + Args: + img (ndarray): Image to be padded. + divisor (int): Padded image edges will be multiple to divisor. + pad_val (Number | Sequence[Number]): Same as :func:`impad`. + + Returns: + ndarray: The padded image. + """ + pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor + pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor + return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) + + +def cutout(img, shape, pad_val=0): + """Randomly cut out a rectangle from the original img. + + Args: + img (ndarray): Image to be cutout. + shape (int | tuple[int]): Expected cutout shape (h, w). If given as a + int, the value will be used for both h and w. + pad_val (int | float | tuple[int | float]): Values to be filled in the + cut area. Defaults to 0. + + Returns: + ndarray: The cutout image. + """ + + channels = 1 if img.ndim == 2 else img.shape[2] + if isinstance(shape, int): + cut_h, cut_w = shape, shape + else: + assert isinstance(shape, tuple) and len(shape) == 2, ( + f'shape must be a int or a tuple with length 2, but got type ' f'{type(shape)} instead.' + ) + cut_h, cut_w = shape + if isinstance(pad_val, (int, float)): + pad_val = tuple([pad_val] * channels) + elif isinstance(pad_val, tuple): + assert len(pad_val) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(pad_val), channels) + ) + else: + raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') + + img_h, img_w = img.shape[:2] + y0 = np.random.uniform(img_h) + x0 = np.random.uniform(img_w) + + y1 = int(max(0, y0 - cut_h / 2.0)) + x1 = int(max(0, x0 - cut_w / 2.0)) + y2 = min(img_h, y1 + cut_h) + x2 = min(img_w, x1 + cut_w) + + if img.ndim == 2: + patch_shape = (y2 - y1, x2 - x1) + else: + patch_shape = (y2 - y1, x2 - x1, channels) + + img_cutout = img.copy() + patch = np.array(pad_val, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype) + img_cutout[y1:y2, x1:x2, ...] = patch + + return img_cutout + + +def _get_shear_matrix(magnitude, direction='horizontal'): + """Generate the shear matrix for transformation. + + Args: + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + + Returns: + ndarray: The shear matrix with dtype float32. + """ + if direction == 'horizontal': + shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) + elif direction == 'vertical': + shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) + return shear_matrix + + +def imshear(img, magnitude, direction='horizontal', border_value=0, interpolation='bilinear'): + """Shear an image. + + Args: + img (ndarray): Image to be sheared with format (h, w) + or (h, w, c). + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The sheared image. + """ + assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(border_value), channels) + ) + else: + raise ValueError(f'Invalid type {type(border_value)} for `border_value`') + shear_matrix = _get_shear_matrix(magnitude, direction) + sheared = cv2.warpAffine( + img, + shear_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. shearing masks whose channels large + # than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation], + ) + return sheared + + +def _get_translate_matrix(offset, direction='horizontal'): + """Generate the translate matrix. + + Args: + offset (int | float): The offset used for translate. + direction (str): The translate direction, either + "horizontal" or "vertical". + + Returns: + ndarray: The translate matrix with dtype float32. + """ + if direction == 'horizontal': + translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) + elif direction == 'vertical': + translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) + return translate_matrix + + +def imtranslate(img, offset, direction='horizontal', border_value=0, interpolation='bilinear'): + """Translate an image. + + Args: + img (ndarray): Image to be translated with format + (h, w) or (h, w, c). + offset (int | float): The offset used for translate. + direction (str): The translate direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The translated image. + """ + assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, ( + 'Expected the num of elements in tuple equals the channels' + 'of input image. Found {} vs {}'.format(len(border_value), channels) + ) + else: + raise ValueError(f'Invalid type {type(border_value)} for `border_value`.') + translate_matrix = _get_translate_matrix(offset, direction) + translated = cv2.warpAffine( + img, + translate_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. translating masks whose channels + # large than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation], + ) + return translated diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py new file mode 100644 index 000000000000..eebffaac43d7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/io.py @@ -0,0 +1,256 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os.path as osp +from pathlib import Path + +import cv2 +import numpy as np +from cv2 import IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import check_file_exist, is_str, mkdir_or_exist + +try: + from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG +except ImportError: + TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None + +try: + from PIL import Image, ImageOps +except ImportError: + Image = None + +try: + import tifffile +except ImportError: + tifffile = None + +jpeg = None +supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] + +imread_flags = { + 'color': IMREAD_COLOR, + 'grayscale': IMREAD_GRAYSCALE, + 'unchanged': IMREAD_UNCHANGED, + 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, + 'grayscale_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE, +} + +imread_backend = 'cv2' + + +def use_backend(backend): + """Select a backend for image decoding. + + Args: + backend (str): The image decoding backend type. Options are `cv2`, + `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) + and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` + file format. + """ + assert backend in supported_backends + global imread_backend + imread_backend = backend + if imread_backend == 'turbojpeg': + if TurboJPEG is None: + raise ImportError('`PyTurboJPEG` is not installed') + global jpeg + if jpeg is None: + jpeg = TurboJPEG() + elif imread_backend == 'pillow': + if Image is None: + raise ImportError('`Pillow` is not installed') + elif imread_backend == 'tifffile': + if tifffile is None: + raise ImportError('`tifffile` is not installed') + + +def _jpegflag(flag='color', channel_order='bgr'): + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'color': + if channel_order == 'bgr': + return TJPF_BGR + elif channel_order == 'rgb': + return TJCS_RGB + elif flag == 'grayscale': + return TJPF_GRAY + else: + raise ValueError('flag must be "color" or "grayscale"') + + +def _pillow2array(img, flag='color', channel_order='bgr'): + """Convert a pillow image to numpy array. + + Args: + img (:obj:`PIL.Image.Image`): The image loaded using PIL + flag (str): Flags specifying the color type of a loaded image, + candidates are 'color', 'grayscale' and 'unchanged'. + Default to 'color'. + channel_order (str): The channel order of the output image array, + candidates are 'bgr' and 'rgb'. Default to 'bgr'. + + Returns: + np.ndarray: The converted numpy array + """ + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'unchanged': + array = np.array(img) + if array.ndim >= 3 and array.shape[2] >= 3: # color image + array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR + else: + # Handle exif orientation tag + if flag in ['color', 'grayscale']: + img = ImageOps.exif_transpose(img) + # If the image mode is not 'RGB', convert it to 'RGB' first. + if img.mode != 'RGB': + if img.mode != 'LA': + # Most formats except 'LA' can be directly converted to RGB + img = img.convert('RGB') + else: + # When the mode is 'LA', the default conversion will fill in + # the canvas with black, which sometimes shadows black objects + # in the foreground. + # + # Therefore, a random color (124, 117, 104) is used for canvas + img_rgba = img.convert('RGBA') + img = Image.new('RGB', img_rgba.size, (124, 117, 104)) + img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha + if flag in ['color', 'color_ignore_orientation']: + array = np.array(img) + if channel_order != 'rgb': + array = array[:, :, ::-1] # RGB to BGR + elif flag in ['grayscale', 'grayscale_ignore_orientation']: + img = img.convert('L') + array = np.array(img) + else: + raise ValueError( + 'flag must be "color", "grayscale", "unchanged", ' + f'"color_ignore_orientation" or "grayscale_ignore_orientation"' + f' but got {flag}' + ) + return array + + +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): + """Read an image. + + Args: + img_or_path (ndarray or str or Path): Either a numpy array or str or + pathlib.Path. If it is a numpy array (loaded image), then + it will be returned as is. + flag (str): Flags specifying the color type of a loaded image, + candidates are `color`, `grayscale`, `unchanged`, + `color_ignore_orientation` and `grayscale_ignore_orientation`. + By default, `cv2` and `pillow` backend would rotate the image + according to its EXIF info unless called with `unchanged` or + `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend + always ignore image's EXIF info regardless of the flag. + The `turbojpeg` backend only supports `color` and `grayscale`. + channel_order (str): Order of channel, candidates are `bgr` and `rgb`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. + If backend is None, the global imread_backend specified by + ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError( + f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" + ) + if isinstance(img_or_path, Path): + img_or_path = str(img_or_path) + + if isinstance(img_or_path, np.ndarray): + return img_or_path + elif is_str(img_or_path): + check_file_exist(img_or_path, f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + elif backend == 'tifffile': + img = tifffile.imread(img_or_path) + return img + else: + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + else: + raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object') + + +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): + """Read an image from bytes. + + Args: + content (bytes): Image bytes got from files or other streams. + flag (str): Same as :func:`imread`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError( + f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow'" + ) + if backend == 'turbojpeg': + img = jpeg.decode(content, _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) + return img + else: + img_np = np.frombuffer(content, np.uint8) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imdecode(img_np, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + + +def imwrite(img, file_path, params=None, auto_mkdir=True): + """Write image to file. + + Args: + img (ndarray): Image array to be written. + file_path (str): Image file path. + params (None or list): Same as opencv :func:`imwrite` interface. + auto_mkdir (bool): If the parent folder of `file_path` does not exist, + whether to create it automatically. + + Returns: + bool: Successful or not. + """ + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py new file mode 100644 index 000000000000..a66ed60474b9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/misc.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +try: + import torch +except ImportError: + torch = None + + +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. + + Args: + tensor (torch.Tensor): Tensor that contains multiple images, shape ( + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). + to_rgb (bool, optional): Whether the tensor was converted to RGB + format in the first place. If so, convert it back to BGR. + Defaults to True. + + Returns: + list[np.ndarray]: A list that contains multiple images. + """ + + if torch is None: + raise RuntimeError('pytorch is not installed') + assert torch.is_tensor(tensor) and tensor.ndim == 4 + assert len(mean) == 3 + assert len(std) == 3 + + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + imgs = [] + for img_id in range(num_imgs): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + img = mmcv.imdenormalize(img, mean, std, to_bgr=to_rgb).astype(np.uint8) + imgs.append(np.ascontiguousarray(img)) + return imgs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py new file mode 100644 index 000000000000..a68b8f49cade --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/image/photometric.py @@ -0,0 +1,422 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from ..utils import is_tuple_of +from .colorspace import bgr2gray, gray2bgr + + +def imnormalize(img, mean, std, to_rgb=True): + """Normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + img = img.copy().astype(np.float32) + return imnormalize_(img, mean, std, to_rgb) + + +def imnormalize_(img, mean, std, to_rgb=True): + """Inplace normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(mean.reshape(1, -1)) + stdinv = 1 / np.float64(std.reshape(1, -1)) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def imdenormalize(img, mean, std, to_bgr=True): + assert img.dtype != np.uint8 + mean = mean.reshape(1, -1).astype(np.float64) + std = std.reshape(1, -1).astype(np.float64) + img = cv2.multiply(img, std) # make a copy + cv2.add(img, mean, img) # inplace + if to_bgr: + cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace + return img + + +def iminvert(img): + """Invert (negate) an image. + + Args: + img (ndarray): Image to be inverted. + + Returns: + ndarray: The inverted image. + """ + return np.full_like(img, 255) - img + + +def solarize(img, thr=128): + """Solarize an image (invert all pixel values above a threshold) + + Args: + img (ndarray): Image to be solarized. + thr (int): Threshold for solarizing (0 - 255). + + Returns: + ndarray: The solarized image. + """ + img = np.where(img < thr, img, 255 - img) + return img + + +def posterize(img, bits): + """Posterize an image (reduce the number of bits for each color channel) + + Args: + img (ndarray): Image to be posterized. + bits (int): Number of bits (1 to 8) to use for posterizing. + + Returns: + ndarray: The posterized image. + """ + shift = 8 - bits + img = np.left_shift(np.right_shift(img, shift), shift) + return img + + +def adjust_color(img, alpha=1, beta=None, gamma=0): + r"""It blends the source image and its gray image: + + .. math:: + output = img * alpha + gray\_img * beta + gamma + + Args: + img (ndarray): The input source image. + alpha (int | float): Weight for the source image. Default 1. + beta (int | float): Weight for the converted gray image. + If None, it's assigned the value (1 - `alpha`). + gamma (int | float): Scalar added to each sum. + Same as :func:`cv2.addWeighted`. Default 0. + + Returns: + ndarray: Colored image which has the same size and dtype as input. + """ + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not the default `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img + + +def imequalize(img): + """Equalize the image histogram. + + This function applies a non-linear mapping to the input image, + in order to create a uniform distribution of grayscale values + in the output image. + + Args: + img (ndarray): Image to be equalized. + + Returns: + ndarray: The equalized image. + """ + + def _scale_channel(im, c): + """Scale the data in the corresponding channel.""" + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # For computing the step, filter out the nonzeros. + nonzero_histo = histo[histo > 0] + step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 + if not step: + lut = np.array(range(256)) + else: + # Compute the cumulative sum, shifted by step // 2 + # and then normalized by step. + lut = (np.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = np.concatenate([[0], lut[:-1]], 0) + # handle potential integer overflow + lut[lut > 255] = 255 + # If step is zero, return the original image. + # Otherwise, index from lut. + return np.where(np.equal(step, 0), im, lut[im]) + + # Scales each channel independently and then stacks + # the result. + s1 = _scale_channel(img, 0) + s2 = _scale_channel(img, 1) + s3 = _scale_channel(img, 2) + equalized_img = np.stack([s1, s2, s3], axis=-1) + return equalized_img.astype(img.dtype) + + +def adjust_brightness(img, factor=1.0): + """Adjust image brightness. + + This function controls the brightness of an image. An + enhancement factor of 0.0 gives a black image. + A factor of 1.0 gives the original image. This function + blends the source image and the degenerated black image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be brightened. + factor (float): A value controls the enhancement. + Factor 1.0 returns the original image, lower + factors mean less color (brightness, contrast, + etc), and higher values more. Default 1. + + Returns: + ndarray: The brightened image. + """ + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) + brightened_img = np.clip(brightened_img, 0, 255) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.0): + """Adjust image contrast. + + This function controls the contrast of an image. An + enhancement factor of 0.0 gives a solid grey + image. A factor of 1.0 gives the original image. It + blends the source image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be contrasted. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + + Returns: + ndarray: The contrasted image. + """ + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) + contrasted_img = np.clip(contrasted_img, 0, 255) + return contrasted_img.astype(img.dtype) + + +def auto_contrast(img, cutoff=0): + """Auto adjust image contrast. + + This function maximize (normalize) image contrast by first removing cutoff + percent of the lightest and darkest pixels from the histogram and remapping + the image so that the darkest pixel becomes black (0), and the lightest + becomes white (255). + + Args: + img (ndarray): Image to be contrasted. BGR order. + cutoff (int | float | tuple): The cutoff percent of the lightest and + darkest pixels to be removed. If given as tuple, it shall be + (low, high). Otherwise, the single value will be used for both. + Defaults to 0. + + Returns: + ndarray: The contrasted image. + """ + + def _auto_contrast_channel(im, c, cutoff): + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # Remove cut-off percent pixels from histo + histo_sum = np.cumsum(histo) + cut_low = histo_sum[-1] * cutoff[0] // 100 + cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 + histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low + histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) + + # Compute mapping + low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] + # If all the values have been cut off, return the origin img + if low >= high: + return im + scale = 255.0 / (high - low) + offset = -low * scale + lut = np.array(range(256)) + lut = lut * scale + offset + lut = np.clip(lut, 0, 255) + return lut[im] + + if isinstance(cutoff, (int, float)): + cutoff = (cutoff, cutoff) + else: + assert isinstance(cutoff, tuple), ( + 'cutoff must be of type int, ' f'float or tuple, but got {type(cutoff)} instead.' + ) + # Auto adjusts contrast for each channel independently and then stacks + # the result. + s1 = _auto_contrast_channel(img, 0, cutoff) + s2 = _auto_contrast_channel(img, 1, cutoff) + s3 = _auto_contrast_channel(img, 2, cutoff) + contrasted_img = np.stack([s1, s2, s3], axis=-1) + return contrasted_img.astype(img.dtype) + + +def adjust_sharpness(img, factor=1.0, kernel=None): + """Adjust image sharpness. + + This function controls the sharpness of an image. An + enhancement factor of 0.0 gives a blurred image. A + factor of 1.0 gives the original image. And a factor + of 2.0 gives a sharpened image. It blends the source + image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be sharpened. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + kernel (np.ndarray, optional): Filter kernel to be applied on the img + to obtain the degenerated img. Defaults to None. + + Note: + No value sanity check is enforced on the kernel set by users. So with + an inappropriate kernel, the ``adjust_sharpness`` may fail to perform + the function its name indicates but end up performing whatever + transform determined by the kernel. + + Returns: + ndarray: The sharpened image. + """ + + if kernel is None: + # adopted from PIL.ImageFilter.SMOOTH + kernel = np.array([[1.0, 1.0, 1.0], [1.0, 5.0, 1.0], [1.0, 1.0, 1.0]]) / 13 + assert isinstance(kernel, np.ndarray), f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' + assert kernel.ndim == 2, f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' + + degenerated = cv2.filter2D(img, -1, kernel) + sharpened_img = cv2.addWeighted(img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) + sharpened_img = np.clip(sharpened_img, 0, 255) + return sharpened_img.astype(img.dtype) + + +def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): + """AlexNet-style PCA jitter. + + This data augmentation is proposed in `ImageNet Classification with Deep + Convolutional Neural Networks + `_. + + Args: + img (ndarray): Image to be adjusted lighting. BGR order. + eigval (ndarray): the eigenvalue of the convariance matrix of pixel + values, respectively. + eigvec (ndarray): the eigenvector of the convariance matrix of pixel + values, respectively. + alphastd (float): The standard deviation for distribution of alpha. + Defaults to 0.1 + to_rgb (bool): Whether to convert img to rgb. + + Returns: + ndarray: The adjusted image. + """ + assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), ( + f'eigval and eigvec should both be of type np.ndarray, got ' f'{type(eigval)} and {type(eigvec)} instead.' + ) + + assert eigval.ndim == 1 and eigvec.ndim == 2 + assert eigvec.shape == (3, eigval.shape[0]) + n_eigval = eigval.shape[0] + assert isinstance(alphastd, float), 'alphastd should be of type float, ' f'got {type(alphastd)} instead.' + + img = img.copy().astype(np.float32) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + + alpha = np.random.normal(0, alphastd, n_eigval) + alter = ( + eigvec + * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) + * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) + ) + alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) + img_adjusted = img + alter + return img_adjusted + + +def lut_transform(img, lut_table): + """Transform array by look-up table. + + The function lut_transform fills the output array with values from the + look-up table. Indices of the entries are taken from the input array. + + Args: + img (ndarray): Image to be transformed. + lut_table (ndarray): look-up table of 256 elements; in case of + multi-channel input array, the table should either have a single + channel (in this case the same table is used for all channels) or + the same number of channels as in the input array. + + Returns: + ndarray: The transformed image. + """ + assert isinstance(img, np.ndarray) + assert 0 <= np.min(img) and np.max(img) <= 255 + assert isinstance(lut_table, np.ndarray) + assert lut_table.shape == (256,) + + return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) + + +def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + img (ndarray): Image to be processed. + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + + Returns: + ndarray: The processed image. + """ + assert isinstance(img, np.ndarray) + assert img.ndim == 2 + assert isinstance(clip_limit, (float, int)) + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + + clahe = cv2.createCLAHE(clip_limit, tile_grid_size) + return clahe.apply(np.array(img, dtype=np.uint8)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json new file mode 100644 index 000000000000..25cf6f28caec --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json new file mode 100644 index 000000000000..bdb311d9fe6d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/mmcls.json @@ -0,0 +1,31 @@ +{ + "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", + "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", + "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", + "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", + "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", + "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", + "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", + "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", + "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth", + "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth", + "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth", + "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth", + "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth", + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth", + "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", + "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", + "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", + "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", + "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", + "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", + "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", + "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 000000000000..8311db4feef9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,50 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", + "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", + "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" +} diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py new file mode 100644 index 000000000000..ecee97e0c0cb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/__init__.py @@ -0,0 +1,134 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .assign_score_withk import assign_score_withk +from .ball_query import ball_query +from .bbox import bbox_overlaps +from .border_align import BorderAlign, border_align +from .box_iou_rotated import box_iou_rotated +from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive +from .cc_attention import CrissCrossAttention +from .contour_expand import contour_expand +from .corner_pool import CornerPool +from .correlation import Correlation +from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d +from .deform_roi_pool import DeformRoIPool, DeformRoIPoolPack, ModulatedDeformRoIPoolPack, deform_roi_pool +from .deprecated_wrappers import Conv2d_deprecated as Conv2d +from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d +from .deprecated_wrappers import Linear_deprecated as Linear +from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d +from .focal_loss import SigmoidFocalLoss, SoftmaxFocalLoss, sigmoid_focal_loss, softmax_focal_loss +from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist +from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu +from .gather_points import gather_points +from .group_points import GroupAll, QueryAndGroup, grouping_operation +from .info import get_compiler_version, get_compiling_cuda_version, get_onnxruntime_op_path +from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev +from .knn import knn +from .masked_conv import MaskedConv2d, masked_conv2d +from .modulated_deform_conv import ModulatedDeformConv2d, ModulatedDeformConv2dPack, modulated_deform_conv2d +from .multi_scale_deform_attn import MultiScaleDeformableAttention +from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms +from .pixel_group import pixel_group +from .point_sample import SimpleRoIAlign, point_sample, rel_roi_point_to_rel_img_point +from .points_in_boxes import points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part +from .points_sampler import PointsSampler +from .psa_mask import PSAMask +from .roi_align import RoIAlign, roi_align +from .roi_align_rotated import RoIAlignRotated, roi_align_rotated +from .roi_pool import RoIPool, roi_pool +from .roiaware_pool3d import RoIAwarePool3d +from .roipoint_pool3d import RoIPointPool3d +from .saconv import SAConv2d +from .scatter_points import DynamicScatter, dynamic_scatter +from .sync_bn import SyncBatchNorm +from .three_interpolate import three_interpolate +from .three_nn import three_nn +from .tin_shift import TINShift, tin_shift +from .upfirdn2d import upfirdn2d +from .voxelize import Voxelization, voxelization + +__all__ = [ + 'bbox_overlaps', + 'CARAFE', + 'CARAFENaive', + 'CARAFEPack', + 'carafe', + 'carafe_naive', + 'CornerPool', + 'DeformConv2d', + 'DeformConv2dPack', + 'deform_conv2d', + 'DeformRoIPool', + 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', + 'deform_roi_pool', + 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', + 'sigmoid_focal_loss', + 'softmax_focal_loss', + 'get_compiler_version', + 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', + 'MaskedConv2d', + 'masked_conv2d', + 'ModulatedDeformConv2d', + 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', + 'batched_nms', + 'nms', + 'soft_nms', + 'nms_match', + 'RoIAlign', + 'roi_align', + 'RoIPool', + 'roi_pool', + 'SyncBatchNorm', + 'Conv2d', + 'ConvTranspose2d', + 'Linear', + 'MaxPool2d', + 'CrissCrossAttention', + 'PSAMask', + 'point_sample', + 'rel_roi_point_to_rel_img_point', + 'SimpleRoIAlign', + 'SAConv2d', + 'TINShift', + 'tin_shift', + 'assign_score_withk', + 'box_iou_rotated', + 'RoIPointPool3d', + 'nms_rotated', + 'knn', + 'ball_query', + 'upfirdn2d', + 'FusedBiasLeakyReLU', + 'fused_bias_leakyrelu', + 'RoIAlignRotated', + 'roi_align_rotated', + 'pixel_group', + 'QueryAndGroup', + 'GroupAll', + 'grouping_operation', + 'contour_expand', + 'three_nn', + 'three_interpolate', + 'MultiScaleDeformableAttention', + 'BorderAlign', + 'border_align', + 'gather_points', + 'furthest_point_sample', + 'furthest_point_sample_with_dist', + 'PointsSampler', + 'Correlation', + 'boxes_iou_bev', + 'nms_bev', + 'nms_normal_bev', + 'Voxelization', + 'voxelization', + 'dynamic_scatter', + 'DynamicScatter', + 'RoIAwarePool3d', + 'points_in_boxes_part', + 'points_in_boxes_cpu', + 'points_in_boxes_all', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py new file mode 100644 index 000000000000..399600eb812b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/assign_score_withk.py @@ -0,0 +1,117 @@ +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) + + +class AssignScoreWithK(Function): + r"""Perform weighted sum to generate output features according to scores. + Modified from `PAConv `_. + + This is a memory-efficient CUDA implementation of assign_scores operation, + which first transform all point features with weight bank, then assemble + neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. + + See the `paper `_ appendix Sec. D for + more detailed descriptions. + + Note: + This implementation assumes using ``neighbor`` kernel input, which is + (point_features - center_features, point_features). + See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ + pointnet2/paconv.py#L128 for more details. + """ + + @staticmethod + def forward(ctx, scores, point_features, center_features, knn_idx, aggregate='sum'): + """ + Args: + scores (torch.Tensor): (B, npoint, K, M), predicted scores to + aggregate weight matrices in the weight bank. + ``npoint`` is the number of sampled centers. + ``K`` is the number of queried neighbors. + ``M`` is the number of weight matrices in the weight bank. + point_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed point features to be aggregated. + center_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed center features to be aggregated. + knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. + We assume the first idx in each row is the idx of the center. + aggregate (str, optional): Aggregation method. + Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. + + Returns: + torch.Tensor: (B, out_dim, npoint, K), the aggregated features. + """ + agg = {'sum': 0, 'avg': 1, 'max': 2} + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + output = point_features.new_zeros((B, out_dim, npoint, K)) + ext_module.assign_score_withk_forward( + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + output, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg[aggregate], + ) + + ctx.save_for_backward(output, point_features, center_features, scores, knn_idx) + ctx.agg = agg[aggregate] + + return output + + @staticmethod + def backward(ctx, grad_out): + """ + Args: + grad_out (torch.Tensor): (B, out_dim, npoint, K) + + Returns: + grad_scores (torch.Tensor): (B, npoint, K, M) + grad_point_features (torch.Tensor): (B, N, M, out_dim) + grad_center_features (torch.Tensor): (B, N, M, out_dim) + """ + _, point_features, center_features, scores, knn_idx = ctx.saved_tensors + + agg = ctx.agg + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + grad_point_features = point_features.new_zeros(point_features.shape) + grad_center_features = center_features.new_zeros(center_features.shape) + grad_scores = scores.new_zeros(scores.shape) + + ext_module.assign_score_withk_backward( + grad_out.contiguous(), + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + grad_point_features, + grad_center_features, + grad_scores, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg, + ) + + return grad_scores, grad_point_features, grad_center_features, None, None + + +assign_score_withk = AssignScoreWithK.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py new file mode 100644 index 000000000000..51c403292391 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/ball_query.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) + + +class BallQuery(Function): + """Find nearby points in spherical space.""" + + @staticmethod + def forward( + ctx, min_radius: float, max_radius: float, sample_num: int, xyz: torch.Tensor, center_xyz: torch.Tensor + ) -> torch.Tensor: + """ + Args: + min_radius (float): minimum radius of the balls. + max_radius (float): maximum radius of the balls. + sample_num (int): maximum number of features in the balls. + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) centers of the ball query. + + Returns: + Tensor: (B, npoint, nsample) tensor with the indices of + the features that form the query balls. + """ + assert center_xyz.is_contiguous() + assert xyz.is_contiguous() + assert min_radius < max_radius + + B, N, _ = xyz.size() + npoint = center_xyz.size(1) + idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) + + ext_module.ball_query_forward( + center_xyz, xyz, idx, b=B, n=N, m=npoint, min_radius=min_radius, max_radius=max_radius, nsample=sample_num + ) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py new file mode 100644 index 000000000000..44aa88881385 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/bbox.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): + """Calculate overlap between two set of bboxes. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + mode_dict = {'iou': 0, 'iof': 1} + assert mode in mode_dict.keys() + mode_flag = mode_dict[mode] + # Either the boxes are empty or the length of boxes' last dimension is 4 + assert bboxes1.size(-1) == 4 or bboxes1.size(0) == 0 + assert bboxes2.size(-1) == 4 or bboxes2.size(0) == 0 + assert offset == 1 or offset == 0 + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) + ext_module.bbox_overlaps(bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py new file mode 100644 index 000000000000..beea1a66e997 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/border_align.py @@ -0,0 +1,98 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from +# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['border_align_forward', 'border_align_backward']) + + +class BorderAlignFunction(Function): + @staticmethod + def symbolic(g, input, boxes, pool_size): + return g.op('mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) + + @staticmethod + def forward(ctx, input, boxes, pool_size): + ctx.pool_size = pool_size + ctx.input_shape = input.size() + + assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' + assert boxes.size(2) == 4, 'the last dimension of boxes must be (x1, y1, x2, y2)' + assert input.size(1) % 4 == 0, 'the channel for input feature must be divisible by factor 4' + + # [B, C//4, H*W, 4] + output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) + output = input.new_zeros(output_shape) + # `argmax_idx` only used for backward + argmax_idx = input.new_zeros(output_shape).to(torch.int) + + ext_module.border_align_forward(input, boxes, output, argmax_idx, pool_size=ctx.pool_size) + + ctx.save_for_backward(boxes, argmax_idx) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + boxes, argmax_idx = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous + grad_output = grad_output.contiguous() + ext_module.border_align_backward(grad_output, boxes, argmax_idx, grad_input, pool_size=ctx.pool_size) + return grad_input, None, None + + +border_align = BorderAlignFunction.apply + + +class BorderAlign(nn.Module): + r"""Border align pooling layer. + + Applies border_align over the input feature based on predicted bboxes. + The details were described in the paper + `BorderDet: Border Feature for Dense Object Detection + `_. + + For each border line (e.g. top, left, bottom or right) of each box, + border_align does the following: + 1. uniformly samples `pool_size`+1 positions on this line, involving \ + the start and end points. + 2. the corresponding features on these points are computed by \ + bilinear interpolation. + 3. max pooling over all the `pool_size`+1 positions are used for \ + computing pooled feature. + + Args: + pool_size (int): number of positions sampled over the boxes' borders + (e.g. top, bottom, left, right). + + """ + + def __init__(self, pool_size): + super(BorderAlign, self).__init__() + self.pool_size = pool_size + + def forward(self, input, boxes): + """ + Args: + input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), + [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, + right features respectively. + boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). + + Returns: + Tensor: Pooled features with shape [N,C,H*W,4]. The order is + (top,left,bottom,right) for the last dimension. + """ + return border_align(input, boxes, self.pool_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(pool_size={self.pool_size})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py new file mode 100644 index 000000000000..dfadb39c715c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/box_iou_rotated.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) + + +def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): + """Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Arguments: + boxes1 (Tensor): rotated bboxes 1. \ + It has shape (N, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + boxes2 (Tensor): rotated bboxes 2. \ + It has shape (M, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (N, M) if aligned == False else shape (N,) + """ + assert mode in ['iou', 'iof'] + mode_dict = {'iou': 0, 'iof': 1} + mode_flag = mode_dict[mode] + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows * cols)) + bboxes1 = bboxes1.contiguous() + bboxes2 = bboxes2.contiguous() + ext_module.box_iou_rotated(bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) + if not aligned: + ious = ious.view(rows, cols) + return ious diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py new file mode 100644 index 000000000000..bc0eb0d32f71 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/carafe.py @@ -0,0 +1,281 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.nn.modules.module import Module + +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', 'carafe_backward'] +) + + +class CARAFENaiveFunction(Function): + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFENaive', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor, + ) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + ext_module.carafe_naive_forward( + features, masks, output, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor + ) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + grad_input = torch.zeros_like(features) + grad_masks = torch.zeros_like(masks) + ext_module.carafe_naive_backward( + grad_output.contiguous(), + features, + masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor, + ) + + return grad_input, grad_masks, None, None, None + + +carafe_naive = CARAFENaiveFunction.apply + + +class CARAFENaive(Module): + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() + + assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe_naive(features, masks, self.kernel_size, self.group_size, self.scale_factor) + + +class CARAFEFunction(Function): + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFE', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor, + ) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + routput = features.new_zeros(output.size(), requires_grad=False) + rfeatures = features.new_zeros(features.size(), requires_grad=False) + rmasks = masks.new_zeros(masks.size(), requires_grad=False) + ext_module.carafe_forward( + features, + masks, + rfeatures, + routput, + rmasks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor, + ) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks, rfeatures) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks, rfeatures = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + rgrad_output = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input = torch.zeros_like(features, requires_grad=False) + rgrad_masks = torch.zeros_like(masks, requires_grad=False) + grad_input = torch.zeros_like(features, requires_grad=False) + grad_masks = torch.zeros_like(masks, requires_grad=False) + ext_module.carafe_backward( + grad_output.contiguous(), + rfeatures, + masks, + rgrad_output, + rgrad_input_hs, + rgrad_input, + rgrad_masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor, + ) + return grad_input, grad_masks, None, None, None + + +carafe = CARAFEFunction.apply + + +class CARAFE(Module): + """ CARAFE: Content-Aware ReAssembly of FEatures + + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + kernel_size (int): reassemble kernel size + group_size (int): reassemble group size + scale_factor (int): upsample ratio + + Returns: + upsampled feature map + """ + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() + + assert isinstance(kernel_size, int) and isinstance(group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe(features, masks, self.kernel_size, self.group_size, self.scale_factor) + + +@UPSAMPLE_LAYERS.register_module(name='carafe') +class CARAFEPack(nn.Module): + """A unified package of CARAFE upsampler that contains: 1) channel + compressor 2) content encoder 3) CARAFE op. + + Official implementation of ICCV 2019 paper + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + channels (int): input feature channels + scale_factor (int): upsample ratio + up_kernel (int): kernel size of CARAFE op + up_group (int): group size of CARAFE op + encoder_kernel (int): kernel size of content encoder + encoder_dilation (int): dilation of content encoder + compressed_channels (int): output channels of channels compressor + + Returns: + upsampled feature map + """ + + def __init__( + self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64, + ): + super(CARAFEPack, self).__init__() + self.channels = channels + self.scale_factor = scale_factor + self.up_kernel = up_kernel + self.up_group = up_group + self.encoder_kernel = encoder_kernel + self.encoder_dilation = encoder_dilation + self.compressed_channels = compressed_channels + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, 1) + self.content_encoder = nn.Conv2d( + self.compressed_channels, + self.up_kernel * self.up_kernel * self.up_group * self.scale_factor * self.scale_factor, + self.encoder_kernel, + padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), + dilation=self.encoder_dilation, + groups=1, + ) + self.init_weights() + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + normal_init(self.content_encoder, std=0.001) + + def kernel_normalizer(self, mask): + mask = F.pixel_shuffle(mask, self.scale_factor) + n, mask_c, h, w = mask.size() + # use float division explicitly, + # to void inconsistency while exporting to onnx + mask_channel = int(mask_c / float(self.up_kernel ** 2)) + mask = mask.view(n, mask_channel, -1, h, w) + + mask = F.softmax(mask, dim=2, dtype=mask.dtype) + mask = mask.view(n, mask_c, h, w).contiguous() + + return mask + + def feature_reassemble(self, x, mask): + x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) + return x + + def forward(self, x): + compressed_x = self.channel_compressor(x) + mask = self.content_encoder(compressed_x) + mask = self.kernel_normalizer(mask) + + x = self.feature_reassemble(x, mask) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py new file mode 100644 index 000000000000..48fe50696acb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/cc_attention.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import PLUGIN_LAYERS, Scale + + +def NEG_INF_DIAG(n, device): + """Returns a diagonal matrix of size [n, n]. + + The diagonal are all "-inf". This is for avoiding calculating the + overlapped element in the Criss-Cross twice. + """ + return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) + + +@PLUGIN_LAYERS.register_module() +class CrissCrossAttention(nn.Module): + """Criss-Cross Attention Module. + + .. note:: + Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch + to a pure PyTorch and equivalent implementation. For more + details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. + + Speed comparison for one forward pass + + - Input size: [2,512,97,97] + - Device: 1 NVIDIA GeForce RTX 2080 Ti + + +-----------------------+---------------+------------+---------------+ + | |PyTorch version|CUDA version|Relative speed | + +=======================+===============+============+===============+ + |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | + +-----------------------+---------------+------------+---------------+ + |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | + +-----------------------+---------------+------------+---------------+ + + Args: + in_channels (int): Channels of the input feature map. + """ + + def __init__(self, in_channels): + super().__init__() + self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.value_conv = nn.Conv2d(in_channels, in_channels, 1) + self.gamma = Scale(0.0) + self.in_channels = in_channels + + def forward(self, x): + """forward function of Criss-Cross Attention. + + Args: + x (Tensor): Input feature. \ + shape (batch_size, in_channels, height, width) + Returns: + Tensor: Output of the layer, with shape of \ + (batch_size, in_channels, height, width) + """ + B, C, H, W = x.size() + query = self.query_conv(x) + key = self.key_conv(x) + value = self.value_conv(x) + energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(H, query.device) + energy_H = energy_H.transpose(1, 2) + energy_W = torch.einsum('bchw,bchj->bhwj', query, key) + attn = F.softmax(torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] + out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) + out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) + + out = self.gamma(out) + x + out = out.contiguous() + + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py new file mode 100644 index 000000000000..14281d4c5d63 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/contour_expand.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['contour_expand']) + + +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num): + """Expand kernel contours so that foreground pixels are assigned into + instances. + + Arguments: + kernel_mask (np.array or Tensor): The instance kernel mask with + size hxw. + internal_kernel_label (np.array or Tensor): The instance internal + kernel label with size hxw. + min_kernel_area (int): The minimum kernel area. + kernel_num (int): The instance kernel number. + + Returns: + label (list): The instance index map with size hxw. + """ + assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) + assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(min_kernel_area, int) + assert isinstance(kernel_num, int) + + if isinstance(kernel_mask, np.ndarray): + kernel_mask = torch.from_numpy(kernel_mask) + if isinstance(internal_kernel_label, np.ndarray): + internal_kernel_label = torch.from_numpy(internal_kernel_label) + + if torch.__version__ == 'parrots': + if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: + label = [] + else: + label = ext_module.contour_expand( + kernel_mask, internal_kernel_label, min_kernel_area=min_kernel_area, kernel_num=kernel_num + ) + label = label.tolist() + else: + label = ext_module.contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num) + return label diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py new file mode 100644 index 000000000000..ede2266be45c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/corner_pool.py @@ -0,0 +1,162 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + [ + 'top_pool_forward', + 'top_pool_backward', + 'bottom_pool_forward', + 'bottom_pool_backward', + 'left_pool_forward', + 'left_pool_backward', + 'right_pool_forward', + 'right_pool_backward', + ], +) + +_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} + + +class TopPoolFunction(Function): + @staticmethod + def symbolic(g, input): + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + (input,) = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + @staticmethod + def symbolic(g, input): + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + (input,) = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output + + +class LeftPoolFunction(Function): + @staticmethod + def symbolic(g, input): + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + (input,) = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + @staticmethod + def symbolic(g, input): + output = g.op('mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + (input,) = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output + + +class CornerPool(nn.Module): + """Corner Pooling. + + Corner Pooling is a new type of pooling layer that helps a + convolutional network better localize corners of bounding boxes. + + Please refer to https://arxiv.org/abs/1808.01244 for more details. + Code is modified from https://github.com/princeton-vl/CornerNet-Lite. + + Args: + mode(str): Pooling orientation for the pooling layer + + - 'bottom': Bottom Pooling + - 'left': Left Pooling + - 'right': Right Pooling + - 'top': Top Pooling + + Returns: + Feature map after pooling. + """ + + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + + cummax_dim_flip = { + 'bottom': (2, False), + 'left': (3, True), + 'right': (3, False), + 'top': (2, True), + } + + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions + self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + if torch.onnx.is_in_onnx_export(): + assert torch.__version__ >= '1.7.0', ( + 'When `cummax` serves as an intermediate component whose ' + 'outputs is used as inputs for another modules, it\'s ' + 'expected that pytorch version must be >= 1.7.0, ' + 'otherwise Error appears like: `RuntimeError: tuple ' + 'appears in op that does not forward tuples, unsupported ' + 'kind: prim::PythonOp`.' + ) + + dim, flip = self.cummax_dim_flip[self.mode] + if flip: + x = x.flip(dim) + pool_tensor, _ = torch.cummax(x, dim=dim) + if flip: + pool_tensor = pool_tensor.flip(dim) + return pool_tensor + else: + return self.corner_pool.apply(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py new file mode 100644 index 000000000000..a5f89fa68576 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/correlation.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import Tensor, nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['correlation_forward', 'correlation_backward']) + + +class CorrelationFunction(Function): + @staticmethod + def forward( + ctx, input1, input2, kernel_size=1, max_displacement=1, stride=1, padding=1, dilation=1, dilation_patch=1 + ): + + ctx.save_for_backward(input1, input2) + + kH, kW = ctx.kernel_size = _pair(kernel_size) + patch_size = max_displacement * 2 + 1 + ctx.patch_size = patch_size + dH, dW = ctx.stride = _pair(stride) + padH, padW = ctx.padding = _pair(padding) + dilationH, dilationW = ctx.dilation = _pair(dilation) + dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(dilation_patch) + + output_size = CorrelationFunction._output_size(ctx, input1) + + output = input1.new_zeros(output_size) + + ext_module.correlation_forward( + input1, + input2, + output, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW, + ) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilation_patchH, dilation_patchW = ctx.dilation_patch + dH, dW = ctx.stride + grad_input1 = torch.zeros_like(input1) + grad_input2 = torch.zeros_like(input2) + + ext_module.correlation_backward( + grad_output, + input1, + input2, + grad_input1, + grad_input2, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW, + ) + return grad_input1, grad_input2, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input1): + iH, iW = input1.size(2), input1.size(3) + batch_size = input1.size(0) + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + dH, dW = ctx.stride + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilatedKH = (kH - 1) * dilationH + 1 + dilatedKW = (kW - 1) * dilationW + 1 + + oH = int((iH + 2 * padH - dilatedKH) / dH + 1) + oW = int((iW + 2 * padW - dilatedKW) / dW + 1) + + output_size = (batch_size, patch_size, patch_size, oH, oW) + return output_size + + +class Correlation(nn.Module): + r"""Correlation operator + + This correlation operator works for optical flow correlation computation. + + There are two batched tensors with shape :math:`(N, C, H, W)`, + and the correlation output's shape is :math:`(N, max\_displacement \times + 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` + + where + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - + dilation \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation + \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding + window convolution between input1 and shifted input2, + + .. math:: + Corr(N_i, dx, dy) = + \sum_{c=0}^{C-1} + input1(N_i, c) \star + \mathcal{S}(input2(N_i, c), dy, dx) + + where :math:`\star` is the valid 2d sliding window convolution operator, + and :math:`\mathcal{S}` means shifting the input features (auto-complete + zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in + [-max\_displacement \times dilation\_patch, max\_displacement \times + dilation\_patch]`. + + Args: + kernel_size (int): The size of sliding window i.e. local neighborhood + representing the center points and involved in correlation + computation. Defaults to 1. + max_displacement (int): The radius for computing correlation volume, + but the actual working space can be dilated by dilation_patch. + Defaults to 1. + stride (int): The stride of the sliding blocks in the input spatial + dimensions. Defaults to 1. + padding (int): Zero padding added to all four sides of the input1. + Defaults to 0. + dilation (int): The spacing of local neighborhood that will involved + in correlation. Defaults to 1. + dilation_patch (int): The spacing between position need to compute + correlation. Defaults to 1. + """ + + def __init__( + self, + kernel_size: int = 1, + max_displacement: int = 1, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + dilation_patch: int = 1, + ) -> None: + super().__init__() + self.kernel_size = kernel_size + self.max_displacement = max_displacement + self.stride = stride + self.padding = padding + self.dilation = dilation + self.dilation_patch = dilation_patch + + def forward(self, input1: Tensor, input2: Tensor) -> Tensor: + return CorrelationFunction.apply( + input1, + input2, + self.kernel_size, + self.max_displacement, + self.stride, + self.padding, + self.dilation, + self.dilation_patch, + ) + + def __repr__(self) -> str: + s = self.__class__.__name__ + s += f'(kernel_size={self.kernel_size}, ' + s += f'max_displacement={self.max_displacement}, ' + s += f'stride={self.stride}, ' + s += f'padding={self.padding}, ' + s += f'dilation={self.dilation}, ' + s += f'dilation_patch={self.dilation_patch})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py new file mode 100644 index 000000000000..6c6d14243d22 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_conv.py @@ -0,0 +1,406 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext( + '_ext', ['deform_conv_forward', 'deform_conv_backward_input', 'deform_conv_backward_parameters'] +) + + +class DeformConv2dFunction(Function): + @staticmethod + def symbolic( + g, input, offset, weight, stride, padding, dilation, groups, deform_groups, bias=False, im2col_step=32 + ): + return g.op( + 'mmcv::MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + bias_i=bias, + im2col_step_i=im2col_step, + ) + + @staticmethod + def forward( + ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32, + ): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.' + ) + assert bias is False, 'Only support bias is False.' + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.im2col_step = im2col_step + + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty(DeformConv2dFunction._output_size(ctx, input, weight)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % cur_im2col_step) == 0, 'im2col step must divide batchsize' + ext_module.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % cur_im2col_step) == 0, 'batch size must be divisible by im2col_step' + + grad_output = grad_output.contiguous() + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + ext_module.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step, + ) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + ext_module.deform_conv_backward_parameters( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + scale=1, + im2col_step=cur_im2col_step, + ) + + return grad_input, grad_offset, grad_weight, None, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' + ) + return output_size + + +deform_conv2d = DeformConv2dFunction.apply + + +class DeformConv2d(nn.Module): + r"""Deformable 2D convolution. + + Applies a deformable 2D convolution over an input signal composed of + several input planes. DeformConv2d was described in the paper + `Deformable Convolutional Networks + `_ + + Note: + The argument ``im2col_step`` was added in version 1.3.17, which means + number of samples processed by the ``im2col_cuda_kernel`` per call. + It enables users to define ``batch_size`` and ``im2col_step`` more + flexibly and solved `issue mmcv#1440 + `_. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + im2col_step (int): Number of samples processed by im2col_cuda_kernel + per call. It will work when ``batch_size`` > ``im2col_step``, but + ``batch_size`` must be divisible by ``im2col_step``. Default: 32. + `New in version 1.3.17.` + """ + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='DeformConv2d') + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]], + stride: Union[int, Tuple[int, ...]] = 1, + padding: Union[int, Tuple[int, ...]] = 0, + dilation: Union[int, Tuple[int, ...]] = 1, + groups: int = 1, + deform_groups: int = 1, + bias: bool = False, + im2col_step: int = 32, + ) -> None: + super(DeformConv2d, self).__init__() + + assert not bias, f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert ( + out_channels % groups == 0 + ), f'out_channels {out_channels} cannot be divisible by groups \ + {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + self.im2col_step = im2col_step + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + # only weight, no bias + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + # switch the initialization of `self.weight` to the standard kaiming + # method described in `Delving deep into rectifiers: Surpassing + # human-level performance on ImageNet classification` - He, K. et al. + # (2015), using a uniform distribution + nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') + + def forward(self, x: Tensor, offset: Tensor) -> Tensor: + """Deformable Convolutional forward function. + + Args: + x (Tensor): Input feature, shape (B, C_in, H_in, W_in) + offset (Tensor): Offset for deformable convolution, shape + (B, deform_groups*kernel_size[0]*kernel_size[1]*2, + H_out, W_out), H_out, W_out are equal to the output's. + + An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Returns: + Tensor: Output of the layer. + """ + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) + offset = offset.contiguous() + out = deform_conv2d( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + False, + self.im2col_step, + ) + if input_pad: + out = out[:, :, : out.size(2) - pad_h, : out.size(3) - pad_w].contiguous() + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels},\n' + s += f'out_channels={self.out_channels},\n' + s += f'kernel_size={self.kernel_size},\n' + s += f'stride={self.stride},\n' + s += f'padding={self.padding},\n' + s += f'dilation={self.dilation},\n' + s += f'groups={self.groups},\n' + s += f'deform_groups={self.deform_groups},\n' + # bias is not supported in DeformConv2d. + s += 'bias=False)' + return s + + +@CONV_LAYERS.register_module('DCN') +class DeformConv2dPack(DeformConv2d): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True, + ) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv2d( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + False, + self.im2col_step, + ) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, DeformConvPack loads previous benchmark models. + if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') + if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: + state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') + + if version is not None and version > 1: + print_log(f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py new file mode 100644 index 000000000000..1528a0748922 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deform_roi_pool.py @@ -0,0 +1,165 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) + + +class DeformRoIPoolFunction(Function): + @staticmethod + def symbolic(g, input, rois, offset, output_size, spatial_scale, sampling_ratio, gamma): + return g.op( + 'mmcv::MMCVDeformRoIPool', + input, + rois, + offset, + pooled_height_i=output_size[0], + pooled_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_f=sampling_ratio, + gamma_f=gamma, + ) + + @staticmethod + def forward(ctx, input, rois, offset, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): + if offset is None: + offset = input.new_zeros(0) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = float(spatial_scale) + ctx.sampling_ratio = int(sampling_ratio) + ctx.gamma = float(gamma) + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) + output = input.new_zeros(output_shape) + + ext_module.deform_roi_pool_forward( + input, + rois, + offset, + output, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma, + ) + + ctx.save_for_backward(input, rois, offset) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, offset = ctx.saved_tensors + grad_input = grad_output.new_zeros(input.shape) + grad_offset = grad_output.new_zeros(offset.shape) + + ext_module.deform_roi_pool_backward( + grad_output, + input, + rois, + offset, + grad_input, + grad_offset, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma, + ) + if grad_offset.numel() == 0: + grad_offset = None + return grad_input, None, grad_offset, None, None, None, None + + +deform_roi_pool = DeformRoIPoolFunction.apply + + +class DeformRoIPool(nn.Module): + def __init__(self, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): + super(DeformRoIPool, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.gamma = float(gamma) + + def forward(self, input, rois, offset=None): + return deform_roi_pool( + input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma + ) + + +class DeformRoIPoolPack(DeformRoIPool): + def __init__( + self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 + ): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), + ) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) + return deform_roi_pool( + input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma + ) + + +class ModulatedDeformRoIPoolPack(DeformRoIPool): + def __init__( + self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1 + ): + super(ModulatedDeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2), + ) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + self.mask_fc = nn.Sequential( + nn.Linear(self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid(), + ) + self.mask_fc[2].weight.data.zero_() + self.mask_fc[2].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) + mask = self.mask_fc(x.view(rois_num, -1)) + mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) + d = deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) + return d * mask diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py new file mode 100644 index 000000000000..47d87b75d87f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/deprecated_wrappers.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This file is for backward compatibility. +# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. +import warnings + +from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + + +class Conv2d_deprecated(Conv2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead' + ) + + +class ConvTranspose2d_deprecated(ConvTranspose2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' + 'deprecated in the future. Please import them from "mmcv.cnn" ' + 'instead' + ) + + +class MaxPool2d_deprecated(MaxPool2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead' + ) + + +class Linear_deprecated(Linear): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead' + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py new file mode 100644 index 000000000000..b218ed24ebc1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/focal_loss.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + [ + 'sigmoid_focal_loss_forward', + 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', + 'softmax_focal_loss_backward', + ], +) + + +class SigmoidFocalLossFunction(Function): + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSigmoidFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction, + ) + + @staticmethod + def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + output = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_forward(input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input, target, weight) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, target, weight = ctx.saved_tensors + + grad_input = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_backward(input, target, weight, grad_input, gamma=ctx.gamma, alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input.size(0) + return grad_input, None, None, None, None, None + + +sigmoid_focal_loss = SigmoidFocalLossFunction.apply + + +class SigmoidFocalLoss(nn.Module): + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SigmoidFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s + + +class SoftmaxFocalLossFunction(Function): + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSoftmaxFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction, + ) + + @staticmethod + def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + channel_stats, _ = torch.max(input, dim=1) + input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) + input_softmax.exp_() + + channel_stats = input_softmax.sum(dim=1) + input_softmax /= channel_stats.unsqueeze(1).expand_as(input) + + output = input.new_zeros(input.size(0)) + ext_module.softmax_focal_loss_forward(input_softmax, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input_softmax, target, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input_softmax, target, weight = ctx.saved_tensors + buff = input_softmax.new_zeros(input_softmax.size(0)) + grad_input = input_softmax.new_zeros(input_softmax.size()) + + ext_module.softmax_focal_loss_backward( + input_softmax, target, weight, buff, grad_input, gamma=ctx.gamma, alpha=ctx.alpha + ) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input_softmax.size(0) + return grad_input, None, None, None, None, None + + +softmax_focal_loss = SoftmaxFocalLossFunction.apply + + +class SoftmaxFocalLoss(nn.Module): + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return softmax_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py new file mode 100644 index 000000000000..606855fef5f9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/furthest_point_sample.py @@ -0,0 +1,74 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['furthest_point_sampling_forward', 'furthest_point_sampling_with_dist_forward'] +) + + +class FurthestPointSampling(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor: + """ + Args: + points_xyz (Tensor): (B, N, 3) where N > num_points. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_xyz.is_contiguous() + + B, N = points_xyz.size()[:2] + output = torch.cuda.IntTensor(B, num_points) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + ext_module.furthest_point_sampling_forward( + points_xyz, temp, output, b=B, n=N, m=num_points, + ) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +class FurthestPointSamplingWithDist(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor: + """ + Args: + points_dist (Tensor): (B, N, N) Distance between each point pair. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_dist.is_contiguous() + + B, N, _ = points_dist.size() + output = points_dist.new_zeros([B, num_points], dtype=torch.int32) + temp = points_dist.new_zeros([B, N]).fill_(1e10) + + ext_module.furthest_point_sampling_with_dist_forward(points_dist, temp, output, b=B, n=N, m=num_points) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply +furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py new file mode 100644 index 000000000000..a1f89dd27ebe --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/fused_bias_leakyrelu.py @@ -0,0 +1,249 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +import torch.nn.functional as F +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) + + +class FusedBiasLeakyReLUFunctionBackward(Function): + """Calculate second order deviation. + + This function is to compute the second order deviation for the fused leaky + relu operation. + """ + + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = ext_module.fused_bias_leakyrelu( + grad_output, empty, out, act=3, grad=1, alpha=negative_slope, scale=scale + ) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + (out,) = ctx.saved_tensors + + # The second order deviation, in fact, contains two parts, while the + # the first part is zero. Thus, we direct consider the second part + # which is similar with the first order deviation in implementation. + gradgrad_out = ext_module.fused_bias_leakyrelu( + gradgrad_input, gradgrad_bias.to(out.dtype), out, act=3, grad=1, alpha=ctx.negative_slope, scale=ctx.scale + ) + + return gradgrad_out, None, None, None + + +class FusedBiasLeakyReLUFunction(Function): + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + + out = ext_module.fused_bias_leakyrelu(input, bias, empty, act=3, grad=0, alpha=negative_slope, scale=scale) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + (out,) = ctx.saved_tensors + + grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale + ) + + return grad_input, grad_bias, None, None + + +class FusedBiasLeakyReLU(nn.Module): + """Fused bias leaky ReLU. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + TODO: Implement the CPU version. + + Args: + channel (int): The channel number of the feature map. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + """ + + def __init__(self, num_channels, negative_slope=0.2, scale=2 ** 0.5): + super(FusedBiasLeakyReLU, self).__init__() + + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.negative_slope = negative_slope + self.scale = scale + + def forward(self, input): + return fused_bias_leakyrelu(input, self.bias, self.negative_slope, self.scale) + + +def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2 ** 0.5): + """Fused bias leaky ReLU function. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + Args: + input (torch.Tensor): Input feature map. + bias (nn.Parameter): The bias from convolution operation. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + + Returns: + torch.Tensor: Feature map after non-linear activation. + """ + + if not input.is_cuda: + return bias_leakyrelu_ref(input, bias, negative_slope, scale) + + return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), negative_slope, scale) + + +def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2 ** 0.5): + + if bias is not None: + assert bias.ndim == 1 + assert bias.shape[0] == x.shape[1] + x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) + + x = F.leaky_relu(x, negative_slope) + if scale != 1: + x = x * scale + + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py new file mode 100644 index 000000000000..b6aa89d50279 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/gather_points.py @@ -0,0 +1,47 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['gather_points_forward', 'gather_points_backward']) + + +class GatherPoints(Function): + """Gather points with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) features to gather. + indices (Tensor): (B, M) where M is the number of points. + + Returns: + Tensor: (B, C, M) where M is the number of points. + """ + assert features.is_contiguous() + assert indices.is_contiguous() + + B, npoint = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + ext_module.gather_points_forward(features, indices, output, b=B, c=C, n=N, npoints=npoint) + + ctx.for_backwards = (indices, C, N) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(indices) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + grad_out_data = grad_out.data.contiguous() + ext_module.gather_points_backward(grad_out_data, idx, grad_features.data, b=B, c=C, n=N, npoints=npoint) + return grad_features, None + + +gather_points = GatherPoints.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py new file mode 100644 index 000000000000..85e8956baa99 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/group_points.py @@ -0,0 +1,206 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple + +import torch +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader +from .ball_query import ball_query +from .knn import knn + +ext_module = ext_loader.load_ext('_ext', ['group_points_forward', 'group_points_backward']) + + +class QueryAndGroup(nn.Module): + """Groups points with a ball query of radius. + + Args: + max_radius (float): The maximum radius of the balls. + If None is given, we will use kNN sampling instead of ball query. + sample_num (int): Maximum number of features to gather in the ball. + min_radius (float, optional): The minimum radius of the balls. + Default: 0. + use_xyz (bool, optional): Whether to use xyz. + Default: True. + return_grouped_xyz (bool, optional): Whether to return grouped xyz. + Default: False. + normalize_xyz (bool, optional): Whether to normalize xyz. + Default: False. + uniform_sample (bool, optional): Whether to sample uniformly. + Default: False + return_unique_cnt (bool, optional): Whether to return the count of + unique samples. Default: False. + return_grouped_idx (bool, optional): Whether to return grouped idx. + Default: False. + """ + + def __init__( + self, + max_radius, + sample_num, + min_radius=0, + use_xyz=True, + return_grouped_xyz=False, + normalize_xyz=False, + uniform_sample=False, + return_unique_cnt=False, + return_grouped_idx=False, + ): + super().__init__() + self.max_radius = max_radius + self.min_radius = min_radius + self.sample_num = sample_num + self.use_xyz = use_xyz + self.return_grouped_xyz = return_grouped_xyz + self.normalize_xyz = normalize_xyz + self.uniform_sample = uniform_sample + self.return_unique_cnt = return_unique_cnt + self.return_grouped_idx = return_grouped_idx + if self.return_unique_cnt: + assert self.uniform_sample, 'uniform_sample should be True when ' 'returning the count of unique samples' + if self.max_radius is None: + assert not self.normalize_xyz, 'can not normalize grouped xyz when max_radius is None' + + def forward(self, points_xyz, center_xyz, features=None): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. + """ + # if self.max_radius is None, we will perform kNN instead of ball query + # idx is of shape [B, npoint, sample_num] + if self.max_radius is None: + idx = knn(self.sample_num, points_xyz, center_xyz, False) + idx = idx.transpose(1, 2).contiguous() + else: + idx = ball_query(self.min_radius, self.max_radius, self.sample_num, points_xyz, center_xyz) + + if self.uniform_sample: + unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) + for i_batch in range(idx.shape[0]): + for i_region in range(idx.shape[1]): + unique_ind = torch.unique(idx[i_batch, i_region, :]) + num_unique = unique_ind.shape[0] + unique_cnt[i_batch, i_region] = num_unique + sample_ind = torch.randint(0, num_unique, (self.sample_num - num_unique,), dtype=torch.long) + all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) + idx[i_batch, i_region, :] = all_ind + + xyz_trans = points_xyz.transpose(1, 2).contiguous() + # (B, 3, npoint, sample_num) + grouped_xyz = grouping_operation(xyz_trans, idx) + grouped_xyz_diff = grouped_xyz - center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets + if self.normalize_xyz: + grouped_xyz_diff /= self.max_radius + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + # (B, C + 3, npoint, sample_num) + new_features = torch.cat([grouped_xyz_diff, grouped_features], dim=1) + else: + new_features = grouped_features + else: + assert self.use_xyz, 'Cannot have not features and not use xyz as a feature!' + new_features = grouped_xyz_diff + + ret = [new_features] + if self.return_grouped_xyz: + ret.append(grouped_xyz) + if self.return_unique_cnt: + ret.append(unique_cnt) + if self.return_grouped_idx: + ret.append(idx) + if len(ret) == 1: + return ret[0] + else: + return tuple(ret) + + +class GroupAll(nn.Module): + """Group xyz with feature. + + Args: + use_xyz (bool): Whether to use xyz. + """ + + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): + """ + Args: + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + new_xyz (Tensor): new xyz coordinates of the features. + features (Tensor): (B, C, N) features to group. + + Returns: + Tensor: (B, C + 3, 1, N) Grouped feature. + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + # (B, 3 + C, 1, N) + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features + + +class GroupingOperation(Function): + """Group feature with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) tensor of features to group. + indices (Tensor): (B, npoint, nsample) the indices of + features to group with. + + Returns: + Tensor: (B, C, npoint, nsample) Grouped features. + """ + features = features.contiguous() + indices = indices.contiguous() + + B, nfeatures, nsample = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, indices, output) + + ctx.for_backwards = (indices, N) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients + of the output from forward. + + Returns: + Tensor: (B, C, N) gradient of the features. + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + + grad_out_data = grad_out.data.contiguous() + ext_module.group_points_backward(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py new file mode 100644 index 000000000000..5be5ea46aa91 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/info.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os + +import torch + +if torch.__version__ == 'parrots': + import parrots + + def get_compiler_version(): + return 'GCC ' + parrots.version.compiler + + def get_compiling_cuda_version(): + return parrots.version.cuda + + +else: + from ..utils import ext_loader + + ext_module = ext_loader.load_ext('_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + def get_compiler_version(): + return ext_module.get_compiler_version() + + def get_compiling_cuda_version(): + return ext_module.get_compiling_cuda_version() + + +def get_onnxruntime_op_path(): + wildcard = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), '_ext_ort.*.so') + + paths = glob.glob(wildcard) + if len(paths) > 0: + return paths[0] + else: + return '' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py new file mode 100644 index 000000000000..35a098e50995 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/iou3d.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_nms_normal_forward'] +) + + +def boxes_iou_bev(boxes_a, boxes_b): + """Calculate boxes IoU in the Bird's Eye View. + + Args: + boxes_a (torch.Tensor): Input boxes a with shape (M, 5). + boxes_b (torch.Tensor): Input boxes b with shape (N, 5). + + Returns: + ans_iou (torch.Tensor): IoU result with shape (M, N). + """ + ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) + + ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) + + return ans_iou + + +def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): + """NMS function GPU implementation (for BEV boxes). The overlap of two + boxes for IoU calculation is defined as the exact overlapping area of the + two boxes. In this function, one can also set ``pre_max_size`` and + ``post_max_size``. + + Args: + boxes (torch.Tensor): Input boxes with the shape of [N, 5] + ([x1, y1, x2, y2, ry]). + scores (torch.Tensor): Scores of boxes with the shape of [N]. + thresh (float): Overlap threshold of NMS. + pre_max_size (int, optional): Max size of boxes before NMS. + Default: None. + post_max_size (int, optional): Max size of boxes after NMS. + Default: None. + + Returns: + torch.Tensor: Indexes after NMS. + """ + assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + if pre_max_size is not None: + order = order[:pre_max_size] + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh) + keep = order[keep[:num_out].cuda(boxes.device)].contiguous() + if post_max_size is not None: + keep = keep[:post_max_size] + return keep + + +def nms_normal_bev(boxes, scores, thresh): + """Normal NMS function GPU implementation (for BEV boxes). The overlap of + two boxes for IoU calculation is defined as the exact overlapping area of + the two boxes WITH their yaw angle set to 0. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (float): Overlap threshold of NMS. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. + """ + assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh) + return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py new file mode 100644 index 000000000000..66be24b2c0db --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/knn.py @@ -0,0 +1,73 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['knn_forward']) + + +class KNN(Function): + r"""KNN (CUDA) based on heap data structure. + Modified from `PAConv `_. + + Find k-nearest points. + """ + + @staticmethod + def forward( + ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False + ) -> torch.Tensor: + """ + Args: + k (int): number of nearest neighbors. + xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). + xyz coordinates of the features. + center_xyz (Tensor, optional): (B, npoint, 3) if transposed == + False, else (B, 3, npoint). centers of the knn query. + Default: None. + transposed (bool, optional): whether the input tensors are + transposed. Should not explicitly use this keyword when + calling knn (=KNN.apply), just add the fourth param. + Default: False. + + Returns: + Tensor: (B, k, npoint) tensor with the indices of + the features that form k-nearest neighbours. + """ + assert (k > 0) & (k < 100), 'k should be in range(0, 100)' + + if center_xyz is None: + center_xyz = xyz + + if transposed: + xyz = xyz.transpose(2, 1).contiguous() + center_xyz = center_xyz.transpose(2, 1).contiguous() + + assert xyz.is_contiguous() # [B, N, 3] + assert center_xyz.is_contiguous() # [B, npoint, 3] + + center_xyz_device = center_xyz.get_device() + assert center_xyz_device == xyz.get_device(), 'center_xyz and xyz should be put on the same device' + if torch.cuda.current_device() != center_xyz_device: + torch.cuda.set_device(center_xyz_device) + + B, npoint, _ = center_xyz.shape + N = xyz.shape[1] + + idx = center_xyz.new_zeros((B, npoint, k)).int() + dist2 = center_xyz.new_zeros((B, npoint, k)).float() + + ext_module.knn_forward(xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) + # idx shape to [B, k, npoint] + idx = idx.transpose(2, 1).contiguous() + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None + + +knn = KNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py new file mode 100644 index 000000000000..c067f11ca8c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/masked_conv.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['masked_im2col_forward', 'masked_col2im_forward']) + + +class MaskedConv2dFunction(Function): + @staticmethod + def symbolic(g, features, mask, weight, bias, padding, stride): + return g.op('mmcv::MMCVMaskedConv2d', features, mask, weight, bias, padding_i=padding, stride_i=stride) + + @staticmethod + def forward(ctx, features, mask, weight, bias, padding=0, stride=1): + assert mask.dim() == 3 and mask.size(0) == 1 + assert features.dim() == 4 and features.size(0) == 1 + assert features.size()[2:] == mask.size()[1:] + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair(stride) + if stride_h != 1 or stride_w != 1: + raise ValueError('Stride could not only be 1 in masked_conv2d currently.') + out_channel, in_channel, kernel_h, kernel_w = weight.size() + + batch_size = features.size(0) + out_h = int(math.floor((features.size(2) + 2 * pad_h - (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int(math.floor((features.size(3) + 2 * pad_w - (kernel_h - 1) - 1) / stride_w + 1)) + mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) + output = features.new_zeros(batch_size, out_channel, out_h, out_w) + if mask_inds.numel() > 0: + mask_h_idx = mask_inds[:, 0].contiguous() + mask_w_idx = mask_inds[:, 1].contiguous() + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, mask_inds.size(0)) + ext_module.masked_im2col_forward( + features, + mask_h_idx, + mask_w_idx, + data_col, + kernel_h=kernel_h, + kernel_w=kernel_w, + pad_h=pad_h, + pad_w=pad_w, + ) + + masked_output = torch.addmm(1, bias[:, None], 1, weight.view(out_channel, -1), data_col) + ext_module.masked_col2im_forward( + masked_output, mask_h_idx, mask_w_idx, output, height=out_h, width=out_w, channels=out_channel + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + return (None,) * 5 + + +masked_conv2d = MaskedConv2dFunction.apply + + +class MaskedConv2d(nn.Conv2d): + """A MaskedConv2d which inherits the official Conv2d. + + The masked forward doesn't implement the backward function and only + supports the stride parameter to be 1 currently. + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): + super(MaskedConv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias + ) + + def forward(self, input, mask=None): + if mask is None: # fallback to the normal Conv2d + return super(MaskedConv2d, self).forward(input) + else: + return masked_conv2d(input, mask, self.weight, self.bias, self.padding) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py new file mode 100644 index 000000000000..ed7f7ece8e33 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/merge_cells.py @@ -0,0 +1,140 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..cnn import ConvModule + + +class BaseMergeCell(nn.Module): + """The basic class for cells used in NAS-FPN and NAS-FCOS. + + BaseMergeCell takes 2 inputs. After applying convolution + on them, they are resized to the target size. Then, + they go through binary_op, which depends on the type of cell. + If with_out_conv is True, the result of output will go through + another convolution layer. + + Args: + in_channels (int): number of input channels in out_conv layer. + out_channels (int): number of output channels in out_conv layer. + with_out_conv (bool): Whether to use out_conv layer + out_conv_cfg (dict): Config dict for convolution layer, which should + contain "groups", "kernel_size", "padding", "bias" to build + out_conv layer. + out_norm_cfg (dict): Config dict for normalization layer in out_conv. + out_conv_order (tuple): The order of conv/norm/activation layers in + out_conv. + with_input1_conv (bool): Whether to use convolution on input1. + with_input2_conv (bool): Whether to use convolution on input2. + input_conv_cfg (dict): Config dict for building input1_conv layer and + input2_conv layer, which is expected to contain the type of + convolution. + Default: None, which means using conv2d. + input_norm_cfg (dict): Config dict for normalization layer in + input1_conv and input2_conv layer. Default: None. + upsample_mode (str): Interpolation method used to resize the output + of input1_conv and input2_conv to target size. Currently, we + support ['nearest', 'bilinear']. Default: 'nearest'. + """ + + def __init__( + self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict(groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest', + ): + super(BaseMergeCell, self).__init__() + assert upsample_mode in ['nearest', 'bilinear'] + self.with_out_conv = with_out_conv + self.with_input1_conv = with_input1_conv + self.with_input2_conv = with_input2_conv + self.upsample_mode = upsample_mode + + if self.with_out_conv: + self.out_conv = ConvModule( + fused_channels, out_channels, **out_conv_cfg, norm_cfg=out_norm_cfg, order=out_conv_order + ) + + self.input1_conv = ( + self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) + if with_input1_conv + else nn.Sequential() + ) + self.input2_conv = ( + self._build_input_conv(out_channels, input_conv_cfg, input_norm_cfg) + if with_input2_conv + else nn.Sequential() + ) + + def _build_input_conv(self, channel, conv_cfg, norm_cfg): + return ConvModule(channel, channel, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True) + + @abstractmethod + def _binary_op(self, x1, x2): + pass + + def _resize(self, x, size): + if x.shape[-2:] == size: + return x + elif x.shape[-2:] < size: + return F.interpolate(x, size=size, mode=self.upsample_mode) + else: + assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 + kernel_size = x.shape[-1] // size[-1] + x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) + return x + + def forward(self, x1, x2, out_size=None): + assert x1.shape[:2] == x2.shape[:2] + assert out_size is None or len(out_size) == 2 + if out_size is None: # resize to larger one + out_size = max(x1.size()[2:], x2.size()[2:]) + + x1 = self.input1_conv(x1) + x2 = self.input2_conv(x2) + + x1 = self._resize(x1, out_size) + x2 = self._resize(x2, out_size) + + x = self._binary_op(x1, x2) + if self.with_out_conv: + x = self.out_conv(x) + return x + + +class SumCell(BaseMergeCell): + def __init__(self, in_channels, out_channels, **kwargs): + super(SumCell, self).__init__(in_channels, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + return x1 + x2 + + +class ConcatCell(BaseMergeCell): + def __init__(self, in_channels, out_channels, **kwargs): + super(ConcatCell, self).__init__(in_channels * 2, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + ret = torch.cat([x1, x2], dim=1) + return ret + + +class GlobalPoolingCell(BaseMergeCell): + def __init__(self, in_channels=None, out_channels=None, **kwargs): + super().__init__(in_channels, out_channels, **kwargs) + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + + def _binary_op(self, x1, x2): + x2_att = self.global_pool(x2).sigmoid() + return x2 + x2_att * x1 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py new file mode 100644 index 000000000000..f70b7d356cf5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/modulated_deform_conv.py @@ -0,0 +1,280 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext('_ext', ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) + + +class ModulatedDeformConv2dFunction(Function): + @staticmethod + def symbolic(g, input, offset, mask, weight, bias, stride, padding, dilation, groups, deform_groups): + input_tensors = [input, offset, mask, weight] + if bias is not None: + input_tensors.append(bias) + return g.op( + 'mmcv::MMCVModulatedDeformConv2d', + *input_tensors, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + ) + + @staticmethod + def forward( + ctx, input, offset, mask, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, deform_groups=1 + ): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.' + ) + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(0) # fake tensor + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty(ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + ext_module.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + grad_output = grad_output.contiguous() + ext_module.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias, + ) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')' + ) + return output_size + + +modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply + + +class ModulatedDeformConv2d(nn.Module): + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='ModulatedDeformConv2d') + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True, + ): + super(ModulatedDeformConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1.0 / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv2d( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + ) + + +@CONV_LAYERS.register_module('DCNv2') +class ModulatedDeformConv2dPack(ModulatedDeformConv2d): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv + layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int): Same as nn.Conv2d, while tuple is not supported. + padding (int): Same as nn.Conv2d, while tuple is not supported. + dilation (int): Same as nn.Conv2d, while tuple is not supported. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=True, + ) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConv2dPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv2d( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deform_groups, + ) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, ModulatedDeformConvPack + # loads previous benchmark models. + if prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict: + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(prefix[:-1] + '_offset.weight') + if prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict: + state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') + + if version is not None and version > 1: + print_log(f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py new file mode 100644 index 000000000000..e4ef4f9ab2d7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/multi_scale_deform_attn.py @@ -0,0 +1,348 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import Function, once_differentiable + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import deprecated_api_warning +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import constant_init, xavier_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks.registry import ATTENTION +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import BaseModule +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) + + +class MultiScaleDeformableAttnFunction(Function): + @staticmethod + def forward( + ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step + ): + """GPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + im2col_step (Tensor): The step used in image to column. + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + ctx.im2col_step = im2col_step + output = ext_module.ms_deform_attn_forward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + im2col_step=ctx.im2col_step, + ) + ctx.save_for_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + """GPU version of backward function. + + Args: + grad_output (Tensor): Gradient + of output tensor of forward. + + Returns: + Tuple[Tensor]: Gradient + of input tensors in forward. + """ + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value = torch.zeros_like(value) + grad_sampling_loc = torch.zeros_like(sampling_locations) + grad_attn_weight = torch.zeros_like(attention_weights) + + ext_module.ms_deform_attn_backward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + grad_output.contiguous(), + grad_value, + grad_sampling_loc, + grad_attn_weight, + im2col_step=ctx.im2col_step, + ) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): + """CPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for level, (H_, W_) in enumerate(value_spatial_shapes): + # bs, H_*W_, num_heads, embed_dims -> + # bs, H_*W_, num_heads*embed_dims -> + # bs, num_heads*embed_dims, H_*W_ -> + # bs*num_heads, embed_dims, H_, W_ + value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_) + # bs, num_queries, num_heads, num_points, 2 -> + # bs, num_heads, num_queries, num_points, 2 -> + # bs*num_heads, num_queries, num_points, 2 + sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, sampling_grid_l_, mode='bilinear', padding_mode='zeros', align_corners=False + ) + sampling_value_list.append(sampling_value_l_) + # (bs, num_queries, num_heads, num_levels, num_points) -> + # (bs, num_heads, num_queries, num_levels, num_points) -> + # (bs, num_heads, 1, num_queries, num_levels*num_points) + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points + ) + output = ( + (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights) + .sum(-1) + .view(bs, num_heads * embed_dims, num_queries) + ) + return output.transpose(1, 2).contiguous() + + +@ATTENTION.register_module() +class MultiScaleDeformableAttention(BaseModule): + """An attention module used in Deformable-Detr. + + `Deformable DETR: Deformable Transformers for End-to-End Object Detection. + `_. + + Args: + embed_dims (int): The embedding dimension of Attention. + Default: 256. + num_heads (int): Parallel attention heads. Default: 64. + num_levels (int): The number of feature map used in + Attention. Default: 4. + num_points (int): The number of sampling points for + each query in each head. Default: 4. + im2col_step (int): The step used in image_to_column. + Default: 64. + dropout (float): A Dropout layer on `inp_identity`. + Default: 0.1. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + norm_cfg (dict): Config dict for normalization layer. + Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__( + self, + embed_dims=256, + num_heads=8, + num_levels=4, + num_points=4, + im2col_step=64, + dropout=0.1, + batch_first=False, + norm_cfg=None, + init_cfg=None, + ): + super().__init__(init_cfg) + if embed_dims % num_heads != 0: + raise ValueError(f'embed_dims must be divisible by num_heads, ' f'but got {embed_dims} and {num_heads}') + dim_per_head = embed_dims // num_heads + self.norm_cfg = norm_cfg + self.dropout = nn.Dropout(dropout) + self.batch_first = batch_first + + # you'd better set dim_per_head to a power of 2 + # which is more efficient in the CUDA implementation + def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError('invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n))) + return (n & (n - 1) == 0) and n != 0 + + if not _is_power_of_2(dim_per_head): + warnings.warn( + "You'd better set embed_dims in " + 'MultiScaleDeformAttention to make ' + 'the dimension of each attention head a power of 2 ' + 'which is more efficient in our CUDA implementation.' + ) + + self.im2col_step = im2col_step + self.embed_dims = embed_dims + self.num_levels = num_levels + self.num_heads = num_heads + self.num_points = num_points + self.sampling_offsets = nn.Linear(embed_dims, num_heads * num_levels * num_points * 2) + self.attention_weights = nn.Linear(embed_dims, num_heads * num_levels * num_points) + self.value_proj = nn.Linear(embed_dims, embed_dims) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.init_weights() + + def init_weights(self): + """Default initialization for Parameters of Module.""" + constant_init(self.sampling_offsets, 0.0) + thetas = torch.arange(self.num_heads, dtype=torch.float32) * (2.0 * math.pi / self.num_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = ( + (grid_init / grid_init.abs().max(-1, keepdim=True)[0]) + .view(self.num_heads, 1, 1, 2) + .repeat(1, self.num_levels, self.num_points, 1) + ) + for i in range(self.num_points): + grid_init[:, :, i, :] *= i + 1 + + self.sampling_offsets.bias.data = grid_init.view(-1) + constant_init(self.attention_weights, val=0.0, bias=0.0) + xavier_init(self.value_proj, distribution='uniform', bias=0.0) + xavier_init(self.output_proj, distribution='uniform', bias=0.0) + self._is_init = True + + @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiScaleDeformableAttention') + def forward( + self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_padding_mask=None, + reference_points=None, + spatial_shapes=None, + level_start_index=None, + **kwargs, + ): + """Forward Function of MultiScaleDeformAttention. + + Args: + query (Tensor): Query of Transformer with shape + (num_query, bs, embed_dims). + key (Tensor): The key tensor with shape + `(num_key, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_key, bs, embed_dims)`. + identity (Tensor): The tensor used for addition, with the + same shape as `query`. Default None. If None, + `query` will be used. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. Default + None. + reference_points (Tensor): The normalized reference + points with shape (bs, num_query, num_levels, 2), + all elements is range in [0, 1], top-left (0,0), + bottom-right (1, 1), including padding area. + or (N, Length_{query}, num_levels, 4), add + additional two dimensions is (w, h) to + form reference boxes. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_key]. + spatial_shapes (Tensor): Spatial shape of features in + different levels. With shape (num_levels, 2), + last dimension represents (h, w). + level_start_index (Tensor): The start index of each level. + A tensor has shape ``(num_levels, )`` and can be represented + as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. + + Returns: + Tensor: forwarded results with shape [num_query, bs, embed_dims]. + """ + + if value is None: + value = query + + if identity is None: + identity = query + if query_pos is not None: + query = query + query_pos + if not self.batch_first: + # change to (bs, num_query ,embed_dims) + query = query.permute(1, 0, 2) + value = value.permute(1, 0, 2) + + bs, num_query, _ = query.shape + bs, num_value, _ = value.shape + assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value + + value = self.value_proj(value) + if key_padding_mask is not None: + value = value.masked_fill(key_padding_mask[..., None], 0.0) + value = value.view(bs, num_value, self.num_heads, -1) + sampling_offsets = self.sampling_offsets(query).view( + bs, num_query, self.num_heads, self.num_levels, self.num_points, 2 + ) + attention_weights = self.attention_weights(query).view( + bs, num_query, self.num_heads, self.num_levels * self.num_points + ) + attention_weights = attention_weights.softmax(-1) + + attention_weights = attention_weights.view(bs, num_query, self.num_heads, self.num_levels, self.num_points) + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack([spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) + sampling_locations = ( + reference_points[:, :, None, :, None, :] + + sampling_offsets / offset_normalizer[None, None, None, :, None, :] + ) + elif reference_points.shape[-1] == 4: + sampling_locations = ( + reference_points[:, :, None, :, None, :2] + + sampling_offsets / self.num_points * reference_points[:, :, None, :, None, 2:] * 0.5 + ) + else: + raise ValueError( + f'Last dim of reference_points must be' f' 2 or 4, but get {reference_points.shape[-1]} instead.' + ) + if torch.cuda.is_available() and value.is_cuda: + output = MultiScaleDeformableAttnFunction.apply( + value, spatial_shapes, level_start_index, sampling_locations, attention_weights, self.im2col_step + ) + else: + output = multi_scale_deformable_attn_pytorch(value, spatial_shapes, sampling_locations, attention_weights) + + output = self.output_proj(output) + + if not self.batch_first: + # (num_query, bs ,embed_dims) + output = output.permute(1, 0, 2) + + return self.dropout(output) + identity diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py new file mode 100644 index 000000000000..4914c139427b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/nms.py @@ -0,0 +1,383 @@ +import os + +import numpy as np +import torch + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) + + +# This function is modified from: https://github.com/pytorch/vision/ +class NMSop(torch.autograd.Function): + @staticmethod + def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): + is_filtering_by_score = score_threshold > 0 + if is_filtering_by_score: + valid_mask = scores > score_threshold + bboxes, scores = bboxes[valid_mask], scores[valid_mask] + valid_inds = torch.nonzero(valid_mask, as_tuple=False).squeeze(dim=1) + + inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + + if max_num > 0: + inds = inds[:max_num] + if is_filtering_by_score: + inds = valid_inds[inds] + return inds + + @staticmethod + def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, max_num): + from ..onnx import is_custom_op_loaded + + has_custom_op = is_custom_op_loaded() + # TensorRT nms plugin is aligned with original nms in ONNXRuntime + is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' + if has_custom_op and (not is_trt_backend): + return g.op( + 'mmcv::NonMaxSuppression', bboxes, scores, iou_threshold_f=float(iou_threshold), offset_i=int(offset) + ) + else: + from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze + from ..onnx.onnx_utils.symbolic_helper import _size_helper + + boxes = unsqueeze(g, bboxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + + if max_num > 0: + max_num = g.op('Constant', value_t=torch.tensor(max_num, dtype=torch.long)) + else: + dim = g.op('Constant', value_t=torch.tensor(0)) + max_num = _size_helper(g, bboxes, dim) + max_output_per_class = max_num + iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float)) + score_threshold = g.op('Constant', value_t=torch.tensor([score_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold, score_threshold) + return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1) + + +class SoftNMSop(torch.autograd.Function): + @staticmethod + def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, offset): + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + inds = ext_module.softnms( + boxes.cpu(), + scores.cpu(), + dets.cpu(), + iou_threshold=float(iou_threshold), + sigma=float(sigma), + min_score=float(min_score), + method=int(method), + offset=int(offset), + ) + return dets, inds + + @staticmethod + def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, offset): + from packaging import version + + assert version.parse(torch.__version__) >= version.parse('1.7.0') + nms_out = g.op( + 'mmcv::SoftNonMaxSuppression', + boxes, + scores, + iou_threshold_f=float(iou_threshold), + sigma_f=float(sigma), + min_score_f=float(min_score), + method_i=int(method), + offset_i=int(offset), + outputs=2, + ) + return nms_out + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): + """Dispatch to either CPU or GPU NMS implementations. + + The input can be either torch tensor or numpy array. GPU NMS will be used + if the input is gpu tensor, otherwise CPU NMS + will be used. The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + score_threshold (float): score threshold for NMS. + max_num (int): maximum number of boxes after NMS. + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], + >>> [49.3, 32.9, 51.0, 35.3], + >>> [49.2, 31.8, 51.0, 35.4], + >>> [35.1, 11.5, 39.1, 15.7], + >>> [35.6, 11.8, 39.3, 14.2], + >>> [35.3, 11.5, 39.9, 14.5], + >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ + dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = nms(boxes, scores, iou_threshold) + >>> assert len(inds) == len(dets) == 3 + """ + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + + if torch.__version__ == 'parrots': + indata_list = [boxes, scores] + indata_dict = {'iou_threshold': float(iou_threshold), 'offset': int(offset)} + inds = ext_module.nms(*indata_list, **indata_dict) + else: + inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold, max_num) + dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def soft_nms(boxes, scores, iou_threshold=0.3, sigma=0.5, min_score=1e-3, method='linear', offset=0): + """Dispatch to only CPU Soft NMS implementations. + + The input can be either a torch tensor or numpy array. + The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + sigma (float): hyperparameter for gaussian method + min_score (float): score filter threshold + method (str): either 'linear' or 'gaussian' + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[4., 3., 5., 3.], + >>> [4., 3., 5., 4.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) + >>> assert len(inds) == len(dets) == 5 + """ + + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} + assert method in method_dict.keys() + + if torch.__version__ == 'parrots': + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'sigma': float(sigma), + 'min_score': min_score, + 'method': method_dict[method], + 'offset': int(offset), + } + inds = ext_module.softnms(*indata_list, **indata_dict) + else: + dets, inds = SoftNMSop.apply( + boxes.cpu(), + scores.cpu(), + float(iou_threshold), + float(sigma), + float(min_score), + method_dict[method], + int(offset), + ) + + dets = dets[: inds.size(0)] + + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + else: + return dets.to(device=boxes.device), inds.to(device=boxes.device) + + +def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): + """Performs non-maximum suppression in a batched fashion. + + Modified from https://github.com/pytorch/vision/blob + /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. + In order to perform NMS independently per class, we add an offset to all + the boxes. The offset is dependent only on the class idx, and is large + enough so that boxes from different classes do not overlap. + + Arguments: + boxes (torch.Tensor): boxes in shape (N, 4). + scores (torch.Tensor): scores in shape (N, ). + idxs (torch.Tensor): each index value correspond to a bbox cluster, + and NMS will not be applied between elements of different idxs, + shape (N, ). + nms_cfg (dict): specify nms type and other parameters like iou_thr. + Possible keys includes the following. + + - iou_thr (float): IoU threshold used for NMS. + - split_thr (float): threshold number of boxes. In some cases the + number of boxes is large (e.g., 200k). To avoid OOM during + training, the users could set `split_thr` to a small value. + If the number of boxes is greater than the threshold, it will + perform NMS on each group of boxes separately and sequentially. + Defaults to 10000. + class_agnostic (bool): if true, nms is class agnostic, + i.e. IoU thresholding happens over all boxes, + regardless of the predicted class. + + Returns: + tuple: kept dets and indice. + """ + nms_cfg_ = nms_cfg.copy() + class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) + if class_agnostic: + boxes_for_nms = boxes + else: + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + + nms_type = nms_cfg_.pop('type', 'nms') + nms_op = eval(nms_type) + + split_thr = nms_cfg_.pop('split_thr', 10000) + # Won't split to multiple nms nodes when exporting to onnx + if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): + dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) + boxes = boxes[keep] + # -1 indexing works abnormal in TensorRT + # This assumes `dets` has 5 dimensions where + # the last dimension is score. + # TODO: more elegant way to handle the dimension issue. + # Some type of nms would reweight the score, such as SoftNMS + scores = dets[:, 4] + else: + max_num = nms_cfg_.pop('max_num', -1) + total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + # Some type of nms would reweight the score, such as SoftNMS + scores_after_nms = scores.new_zeros(scores.size()) + for id in torch.unique(idxs): + mask = (idxs == id).nonzero(as_tuple=False).view(-1) + dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) + total_mask[mask[keep]] = True + scores_after_nms[mask[keep]] = dets[:, -1] + keep = total_mask.nonzero(as_tuple=False).view(-1) + + scores, inds = scores_after_nms[keep].sort(descending=True) + keep = keep[inds] + boxes = boxes[keep] + + if max_num > 0: + keep = keep[:max_num] + boxes = boxes[:max_num] + scores = scores[:max_num] + + return torch.cat([boxes, scores[:, None]], -1), keep + + +def nms_match(dets, iou_threshold): + """Matched dets into different groups by NMS. + + NMS match is Similar to NMS but when a bbox is suppressed, nms match will + record the indice of suppressed bbox and form a group with the indice of + kept bbox. In each group, indice is sorted as score order. + + Arguments: + dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). + iou_thr (float): IoU thresh for NMS. + + Returns: + List[torch.Tensor | np.ndarray]: The outer list corresponds different + matched group, the inner Tensor corresponds the indices for a group + in score order. + """ + if dets.shape[0] == 0: + matched = [] + else: + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' f'but get {dets.shape}' + if isinstance(dets, torch.Tensor): + dets_t = dets.detach().cpu() + else: + dets_t = torch.from_numpy(dets) + indata_list = [dets_t] + indata_dict = {'iou_threshold': float(iou_threshold)} + matched = ext_module.nms_match(*indata_list, **indata_dict) + if torch.__version__ == 'parrots': + matched = matched.tolist() + + if isinstance(dets, torch.Tensor): + return [dets.new_tensor(m, dtype=torch.long) for m in matched] + else: + return [np.array(m, dtype=np.int) for m in matched] + + +def nms_rotated(dets, scores, iou_threshold, labels=None): + """Performs non-maximum suppression (NMS) on the rotated boxes according to + their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Args: + boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ + be in (x_ctr, y_ctr, width, height, angle_radian) format. + scores (Tensor): scores in shape (N, ). + iou_threshold (float): IoU thresh for NMS. + labels (Tensor): boxes' label in shape (N,). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + """ + if dets.shape[0] == 0: + return dets, None + multi_label = labels is not None + if multi_label: + dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) + else: + dets_wl = dets + _, order = scores.sort(0, descending=True) + dets_sorted = dets_wl.index_select(0, order) + + if torch.__version__ == 'parrots': + keep_inds = ext_module.nms_rotated( + dets_wl, scores, order, dets_sorted, iou_threshold=iou_threshold, multi_label=multi_label + ) + else: + keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, iou_threshold, multi_label) + dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), dim=1) + return dets, keep_inds diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py new file mode 100644 index 000000000000..228769d37089 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/pixel_group.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['pixel_group']) + + +def pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold): + """Group pixels into text instances, which is widely used text detection + methods. + + Arguments: + score (np.array or Tensor): The foreground score with size hxw. + mask (np.array or Tensor): The foreground mask with size hxw. + embedding (np.array or Tensor): The embedding with size hxwxc to + distinguish instances. + kernel_label (np.array or Tensor): The instance kernel index with + size hxw. + kernel_contour (np.array or Tensor): The kernel contour with size hxw. + kernel_region_num (int): The instance kernel region number. + distance_threshold (float): The embedding distance threshold between + kernel and pixel in one instance. + + Returns: + pixel_assignment (List[List[float]]): The instance coordinate list. + Each element consists of averaged confidence, pixel number, and + coordinates (x_i, y_i for all pixels) in order. + """ + assert isinstance(score, (torch.Tensor, np.ndarray)) + assert isinstance(mask, (torch.Tensor, np.ndarray)) + assert isinstance(embedding, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_region_num, int) + assert isinstance(distance_threshold, float) + + if isinstance(score, np.ndarray): + score = torch.from_numpy(score) + if isinstance(mask, np.ndarray): + mask = torch.from_numpy(mask) + if isinstance(embedding, np.ndarray): + embedding = torch.from_numpy(embedding) + if isinstance(kernel_label, np.ndarray): + kernel_label = torch.from_numpy(kernel_label) + if isinstance(kernel_contour, np.ndarray): + kernel_contour = torch.from_numpy(kernel_contour) + + if torch.__version__ == 'parrots': + label = ext_module.pixel_group( + score, + mask, + embedding, + kernel_label, + kernel_contour, + kernel_region_num=kernel_region_num, + distance_threshold=distance_threshold, + ) + label = label.tolist() + label = label[0] + list_index = kernel_region_num + pixel_assignment = [] + for x in range(kernel_region_num): + pixel_assignment.append(np.array(label[list_index : list_index + int(label[x])], dtype=np.float)) + list_index = list_index + int(label[x]) + else: + pixel_assignment = ext_module.pixel_group( + score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold + ) + return pixel_assignment diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py new file mode 100644 index 000000000000..204ff1c74e12 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/point_sample.py @@ -0,0 +1,317 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa + +from os import path as osp + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair +from torch.onnx.operators import shape_as_tensor + + +def bilinear_grid_sample(im, grid, align_corners=False): + """Given an input and a flow-field grid, computes the output using input + values and pixel locations from grid. Supported only bilinear interpolation + method to sample the input pixels. + + Args: + im (torch.Tensor): Input feature map, shape (N, C, H, W) + grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) + align_corners {bool}: If set to True, the extrema (-1 and 1) are + considered as referring to the center points of the input’s + corner pixels. If set to False, they are instead considered as + referring to the corner points of the input’s corner pixels, + making the sampling more resolution agnostic. + Returns: + torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) + """ + n, c, h, w = im.shape + gn, gh, gw, _ = grid.shape + assert n == gn + + x = grid[:, :, :, 0] + y = grid[:, :, :, 1] + + if align_corners: + x = ((x + 1) / 2) * (w - 1) + y = ((y + 1) / 2) * (h - 1) + else: + x = ((x + 1) * w - 1) / 2 + y = ((y + 1) * h - 1) / 2 + + x = x.view(n, -1) + y = y.view(n, -1) + + x0 = torch.floor(x).long() + y0 = torch.floor(y).long() + x1 = x0 + 1 + y1 = y0 + 1 + + wa = ((x1 - x) * (y1 - y)).unsqueeze(1) + wb = ((x1 - x) * (y - y0)).unsqueeze(1) + wc = ((x - x0) * (y1 - y)).unsqueeze(1) + wd = ((x - x0) * (y - y0)).unsqueeze(1) + + # Apply default for grid_sample function zero padding + im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) + padded_h = h + 2 + padded_w = w + 2 + # save points positions after padding + x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 + + # Clip coordinates to padded image size + x0 = torch.where(x0 < 0, torch.tensor(0), x0) + x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) + x1 = torch.where(x1 < 0, torch.tensor(0), x1) + x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) + y0 = torch.where(y0 < 0, torch.tensor(0), y0) + y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) + y1 = torch.where(y1 < 0, torch.tensor(0), y1) + y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) + + im_padded = im_padded.view(n, c, -1) + + x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + + Ia = torch.gather(im_padded, 2, x0_y0) + Ib = torch.gather(im_padded, 2, x0_y1) + Ic = torch.gather(im_padded, 2, x1_y0) + Id = torch.gather(im_padded, 2, x1_y1) + + return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) + + +def is_in_onnx_export_without_custom_ops(): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import get_onnxruntime_op_path + + ort_custom_op_path = get_onnxruntime_op_path() + return torch.onnx.is_in_onnx_export() and not osp.exists(ort_custom_op_path) + + +def normalize(grid): + """Normalize input grid from [-1, 1] to [0, 1] + Args: + grid (Tensor): The grid to be normalize, range [-1, 1]. + Returns: + Tensor: Normalized grid, range [0, 1]. + """ + + return (grid + 1.0) / 2.0 + + +def denormalize(grid): + """Denormalize input grid from range [0, 1] to [-1, 1] + Args: + grid (Tensor): The grid to be denormalize, range [0, 1]. + Returns: + Tensor: Denormalized grid, range [-1, 1]. + """ + + return grid * 2.0 - 1.0 + + +def generate_grid(num_grid, size, device): + """Generate regular square grid of points in [0, 1] x [0, 1] coordinate + space. + + Args: + num_grid (int): The number of grids to sample, one for each region. + size (tuple(int, int)): The side size of the regular grid. + device (torch.device): Desired device of returned tensor. + + Returns: + (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that + contains coordinates for the regular grids. + """ + + affine_trans = torch.tensor([[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]], device=device) + grid = F.affine_grid(affine_trans, torch.Size((1, 1, *size)), align_corners=False) + grid = normalize(grid) + return grid.view(1, -1, 2).expand(num_grid, -1, -1) + + +def rel_roi_point_to_abs_img_point(rois, rel_roi_points): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + Returns: + Tensor: Image based absolute point coordinates, shape (N, P, 2) + """ + + with torch.no_grad(): + assert rel_roi_points.size(0) == rois.size(0) + assert rois.dim() == 2 + assert rel_roi_points.dim() == 3 + assert rel_roi_points.size(2) == 2 + # remove batch idx + if rois.size(1) == 5: + rois = rois[:, 1:] + abs_img_points = rel_roi_points.clone() + # To avoid an error during exporting to onnx use independent + # variables instead inplace computation + xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) + ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) + xs += rois[:, None, 0] + ys += rois[:, None, 1] + abs_img_points = torch.stack([xs, ys], dim=2) + return abs_img_points + + +def get_shape_from_feature_map(x): + """Get spatial resolution of input feature map considering exporting to + onnx mode. + + Args: + x (torch.Tensor): Input tensor, shape (N, C, H, W) + Returns: + torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) + """ + if torch.onnx.is_in_onnx_export(): + img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(x.device).float() + else: + img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(x.device).float() + return img_shape + + +def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.0): + """Convert image based absolute point coordinates to image based relative + coordinates for sampling. + + Args: + abs_img_points (Tensor): Image based absolute point coordinates, + shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + assert (isinstance(img, tuple) and len(img) == 2) or (isinstance(img, torch.Tensor) and len(img.shape) == 4) + + if isinstance(img, tuple): + h, w = img + scale = torch.tensor([w, h], dtype=torch.float, device=abs_img_points.device) + scale = scale.view(1, 1, 2) + else: + scale = get_shape_from_feature_map(img) + + return abs_img_points / scale * spatial_scale + + +def rel_roi_point_to_rel_img_point(rois, rel_roi_points, img, spatial_scale=1.0): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, spatial_scale) + + return rel_img_point + + +def point_sample(input, points, align_corners=False, **kwargs): + """A wrapper around :func:`grid_sample` to support 3D point_coords tensors + Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to + lie inside ``[0, 1] x [0, 1]`` square. + + Args: + input (Tensor): Feature map, shape (N, C, H, W). + points (Tensor): Image based absolute point coordinates (normalized), + range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). + align_corners (bool): Whether align_corners. Default: False + + Returns: + Tensor: Features of `point` on `input`, shape (N, C, P) or + (N, C, Hgrid, Wgrid). + """ + + add_dim = False + if points.dim() == 3: + add_dim = True + points = points.unsqueeze(2) + if is_in_onnx_export_without_custom_ops(): + # If custom ops for onnx runtime not compiled use python + # implementation of grid_sample function to make onnx graph + # with supported nodes + output = bilinear_grid_sample(input, denormalize(points), align_corners=align_corners) + else: + output = F.grid_sample(input, denormalize(points), align_corners=align_corners, **kwargs) + if add_dim: + output = output.squeeze(3) + return output + + +class SimpleRoIAlign(nn.Module): + def __init__(self, output_size, spatial_scale, aligned=True): + """Simple RoI align in PointRend, faster than standard RoIAlign. + + Args: + output_size (tuple[int]): h, w + spatial_scale (float): scale the input boxes by this number + aligned (bool): if False, use the legacy implementation in + MMDetection, align_corners=True will be used in F.grid_sample. + If True, align the results more perfectly. + """ + + super(SimpleRoIAlign, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + # to be consistent with other RoI ops + self.use_torchvision = False + self.aligned = aligned + + def forward(self, features, rois): + num_imgs = features.size(0) + num_rois = rois.size(0) + rel_roi_points = generate_grid(num_rois, self.output_size, device=rois.device) + + if torch.onnx.is_in_onnx_export(): + rel_img_points = rel_roi_point_to_rel_img_point(rois, rel_roi_points, features, self.spatial_scale) + rel_img_points = rel_img_points.reshape(num_imgs, -1, *rel_img_points.shape[1:]) + point_feats = point_sample(features, rel_img_points, align_corners=not self.aligned) + point_feats = point_feats.transpose(1, 2) + else: + point_feats = [] + for batch_ind in range(num_imgs): + # unravel batch dim + feat = features[batch_ind].unsqueeze(0) + inds = rois[:, 0].long() == batch_ind + if inds.any(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois[inds], rel_roi_points[inds], feat, self.spatial_scale + ).unsqueeze(0) + point_feat = point_sample(feat, rel_img_points, align_corners=not self.aligned) + point_feat = point_feat.squeeze(0).transpose(0, 1) + point_feats.append(point_feat) + + point_feats = torch.cat(point_feats, dim=0) + + channels = features.size(1) + roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) + + return roi_feats + + def __repr__(self): + format_str = self.__class__.__name__ + format_str += '(output_size={}, spatial_scale={}'.format(self.output_size, self.spatial_scale) + return format_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py new file mode 100644 index 000000000000..bf01e1d7746e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_in_boxes.py @@ -0,0 +1,111 @@ +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', 'points_in_boxes_all_forward'] +) + + +def points_in_boxes_part(points, boxes): + """Find the box in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in + LiDAR/DEPTH coordinate, (x, y, z) is the bottom center + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 + """ + assert points.shape[0] == boxes.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + + box_idxs_of_pts = points.new_zeros((batch_size, num_points), dtype=torch.int).fill_(-1) + + # If manually put the tensor 'points' or 'boxes' on a device + # which is not the current device, some temporary variables + # will be created on the current device in the cuda op, + # and the output will be incorrect. + # Therefore, we force the current device to be the same + # as the device of the tensors if it was not. + # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 + # for the incorrect output before the fix. + points_device = points.get_device() + assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_part_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) + + return box_idxs_of_pts + + +def points_in_boxes_cpu(points, boxes): + """Find all boxes in which each point is (CPU). The CPU version of + :meth:`points_in_boxes_all`. + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in + LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert points.shape[0] == boxes.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {points.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + point_indices = points.new_zeros((batch_size, num_boxes, num_points), dtype=torch.int) + for b in range(batch_size): + ext_module.points_in_boxes_cpu_forward( + boxes[b].float().contiguous(), points[b].float().contiguous(), point_indices[b] + ) + point_indices = point_indices.transpose(1, 2) + + return point_indices + + +def points_in_boxes_all(points, boxes): + """Find all boxes in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert boxes.shape[0] == points.shape[0], ( + 'Points and boxes should have the same batch size, ' f'but got {boxes.shape[0]} and {boxes.shape[0]}' + ) + assert boxes.shape[2] == 7, 'boxes dimension should be 7, ' f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, 'points dimension should be 3, ' f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), dtype=torch.int).fill_(0) + + # Same reason as line 25-32 + points_device = points.get_device() + assert points_device == boxes.get_device(), 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_all_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) + + return box_idxs_of_pts diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py new file mode 100644 index 000000000000..5e6d0078813f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/points_sampler.py @@ -0,0 +1,169 @@ +from typing import List + +import torch +from torch import nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import force_fp32 + +from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist + + +def calc_square_dist(point_feat_a, point_feat_b, norm=True): + """Calculating square distance between a and b. + + Args: + point_feat_a (Tensor): (B, N, C) Feature vector of each point. + point_feat_b (Tensor): (B, M, C) Feature vector of each point. + norm (Bool, optional): Whether to normalize the distance. + Default: True. + + Returns: + Tensor: (B, N, M) Distance between each pair points. + """ + num_channel = point_feat_a.shape[-1] + # [bs, n, 1] + a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) + # [bs, 1, m] + b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) + + corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) + + dist = a_square + b_square - 2 * corr_matrix + if norm: + dist = torch.sqrt(dist) / num_channel + return dist + + +def get_sampler_cls(sampler_type): + """Get the type and mode of points sampler. + + Args: + sampler_type (str): The type of points sampler. + The valid value are "D-FPS", "F-FPS", or "FS". + + Returns: + class: Points sampler type. + """ + sampler_mappings = { + 'D-FPS': DFPSSampler, + 'F-FPS': FFPSSampler, + 'FS': FSSampler, + } + try: + return sampler_mappings[sampler_type] + except KeyError: + raise KeyError( + f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ + {sampler_type}' + ) + + +class PointsSampler(nn.Module): + """Points sampling. + + Args: + num_point (list[int]): Number of sample points. + fps_mod_list (list[str], optional): Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. + """ + + def __init__( + self, num_point: List[int], fps_mod_list: List[str] = ['D-FPS'], fps_sample_range_list: List[int] = [-1] + ): + super().__init__() + # FPS would be applied to different fps_mod in the list, + # so the length of the num_point should be equal to + # fps_mod_list and fps_sample_range_list. + assert len(num_point) == len(fps_mod_list) == len(fps_sample_range_list) + self.num_point = num_point + self.fps_sample_range_list = fps_sample_range_list + self.samplers = nn.ModuleList() + for fps_mod in fps_mod_list: + self.samplers.append(get_sampler_cls(fps_mod)()) + self.fp16_enabled = False + + @force_fp32() + def forward(self, points_xyz, features): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, npoint, sample_num) Indices of sampled points. + """ + indices = [] + last_fps_end_index = 0 + + for fps_sample_range, sampler, npoint in zip(self.fps_sample_range_list, self.samplers, self.num_point): + assert fps_sample_range < points_xyz.shape[1] + + if fps_sample_range == -1: + sample_points_xyz = points_xyz[:, last_fps_end_index:] + if features is not None: + sample_features = features[:, :, last_fps_end_index:] + else: + sample_features = None + else: + sample_points_xyz = points_xyz[:, last_fps_end_index:fps_sample_range] + if features is not None: + sample_features = features[:, :, last_fps_end_index:fps_sample_range] + else: + sample_features = None + + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, npoint) + + indices.append(fps_idx + last_fps_end_index) + last_fps_end_index += fps_sample_range + indices = torch.cat(indices, dim=1) + + return indices + + +class DFPSSampler(nn.Module): + """Using Euclidean distances of points for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with D-FPS.""" + fps_idx = furthest_point_sample(points.contiguous(), npoint) + return fps_idx + + +class FFPSSampler(nn.Module): + """Using feature distances for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with F-FPS.""" + assert features is not None, 'feature input to FFPS_Sampler should not be None' + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist(features_for_fps, features_for_fps, norm=False) + fps_idx = furthest_point_sample_with_dist(features_dist, npoint) + return fps_idx + + +class FSSampler(nn.Module): + """Using F-FPS and D-FPS simultaneously.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with FS_Sampling.""" + assert features is not None, 'feature input to FS_Sampler should not be None' + ffps_sampler = FFPSSampler() + dfps_sampler = DFPSSampler() + fps_idx_ffps = ffps_sampler(points, features, npoint) + fps_idx_dfps = dfps_sampler(points, features, npoint) + fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) + return fps_idx diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py new file mode 100644 index 000000000000..e49546cb9059 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/psa_mask.py @@ -0,0 +1,85 @@ +# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['psamask_forward', 'psamask_backward']) + + +class PSAMaskFunction(Function): + @staticmethod + def symbolic(g, input, psa_type, mask_size): + return g.op('mmcv::MMCVPSAMask', input, psa_type_i=psa_type, mask_size_i=mask_size) + + @staticmethod + def forward(ctx, input, psa_type, mask_size): + ctx.psa_type = psa_type + ctx.mask_size = _pair(mask_size) + ctx.save_for_backward(input) + + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + assert channels == h_mask * w_mask + output = input.new_zeros((batch_size, h_feature * w_feature, h_feature, w_feature)) + + ext_module.psamask_forward( + input, + output, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2, + ) + return output + + @staticmethod + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + psa_type = ctx.psa_type + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + grad_input = grad_output.new_zeros((batch_size, channels, h_feature, w_feature)) + ext_module.psamask_backward( + grad_output, + grad_input, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2, + ) + return grad_input, None, None, None + + +psa_mask = PSAMaskFunction.apply + + +class PSAMask(nn.Module): + def __init__(self, psa_type, mask_size=None): + super(PSAMask, self).__init__() + assert psa_type in ['collect', 'distribute'] + if psa_type == 'collect': + psa_type_enum = 0 + else: + psa_type_enum = 1 + self.psa_type_enum = psa_type_enum + self.mask_size = mask_size + self.psa_type = psa_type + + def forward(self, input): + return psa_mask(input, self.psa_type_enum, self.mask_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(psa_type={self.psa_type}, ' + s += f'mask_size={self.mask_size})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py new file mode 100644 index 000000000000..15be8de4db83 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import deprecated_api_warning, ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roi_align_forward', 'roi_align_backward']) + + +class RoIAlignFunction(Function): + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, pool_mode, aligned): + from ..onnx import is_custom_op_loaded + + has_custom_op = is_custom_op_loaded() + if has_custom_op: + return g.op( + 'mmcv::MMCVRoiAlign', + input, + rois, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned, + ) + else: + from torch.onnx import TensorProtoDataType + from torch.onnx.symbolic_helper import _slice_helper + from torch.onnx.symbolic_opset9 import squeeze, sub + + # batch_indices = rois[:, 0].long() + batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1]) + batch_indices = squeeze(g, batch_indices, 1) + batch_indices = g.op('Cast', batch_indices, to_i=TensorProtoDataType.INT64) + # rois = rois[:, 1:] + rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) + if aligned: + # rois -= 0.5/spatial_scale + aligned_offset = g.op('Constant', value_t=torch.tensor([0.5 / spatial_scale], dtype=torch.float32)) + rois = sub(g, rois, aligned_offset) + # roi align + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=max(0, sampling_ratio), + mode_s=pool_mode, + ) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + assert pool_mode in ('max', 'avg') + ctx.pool_mode = 0 if pool_mode == 'max' else 1 + ctx.aligned = aligned + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) + output = input.new_zeros(output_shape) + if ctx.pool_mode == 0: + argmax_y = input.new_zeros(output_shape) + argmax_x = input.new_zeros(output_shape) + else: + argmax_y = input.new_zeros(0) + argmax_x = input.new_zeros(0) + + ext_module.roi_align_forward( + input, + rois, + output, + argmax_y, + argmax_x, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned, + ) + + ctx.save_for_backward(rois, argmax_y, argmax_x) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax_y, argmax_x = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous. + grad_output = grad_output.contiguous() + ext_module.roi_align_backward( + grad_output, + rois, + argmax_y, + argmax_x, + grad_input, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned, + ) + return grad_input, None, None, None, None, None, None + + +roi_align = RoIAlignFunction.apply + + +class RoIAlign(nn.Module): + """RoI align pooling layer. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + pool_mode (str, 'avg' or 'max'): pooling mode in each bin. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + use_torchvision (bool): whether to use roi_align from torchvision. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + @deprecated_api_warning({'out_size': 'output_size', 'sample_num': 'sampling_ratio'}, cls_name='RoIAlign') + def __init__( + self, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True, use_torchvision=False + ): + super(RoIAlign, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.pool_mode = pool_mode + self.aligned = aligned + self.use_torchvision = use_torchvision + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N.\ + The other 4 columns are xyxy. + """ + if self.use_torchvision: + from torchvision.ops import roi_align as tv_roi_align + + if 'aligned' in tv_roi_align.__code__.co_varnames: + return tv_roi_align( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned + ) + else: + if self.aligned: + rois -= rois.new_tensor([0.0] + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) + else: + return roi_align( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.pool_mode, self.aligned + ) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale}, ' + s += f'sampling_ratio={self.sampling_ratio}, ' + s += f'pool_mode={self.pool_mode}, ' + s += f'aligned={self.aligned}, ' + s += f'use_torchvision={self.use_torchvision})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py new file mode 100644 index 000000000000..07108d2bc888 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_align_rotated.py @@ -0,0 +1,161 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) + + +class RoIAlignRotatedFunction(Function): + @staticmethod + def symbolic(g, features, rois, out_size, spatial_scale, sample_num, aligned, clockwise): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError('"out_size" must be an integer or tuple of integers') + return g.op( + 'mmcv::MMCVRoIAlignRotated', + features, + rois, + output_height_i=out_h, + output_width_i=out_h, + spatial_scale_f=spatial_scale, + sampling_ratio_i=sample_num, + aligned_i=aligned, + clockwise_i=clockwise, + ) + + @staticmethod + def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError('"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.aligned = aligned + ctx.clockwise = clockwise + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + ext_module.roi_align_rotated_forward( + features, + rois, + output, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise, + ) + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + aligned = ctx.aligned + clockwise = ctx.clockwise + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert feature_size is not None + batch_size, num_channels, data_height, data_width = feature_size + + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + + if ctx.needs_input_grad[0]: + grad_input = rois.new_zeros(batch_size, num_channels, data_height, data_width) + ext_module.roi_align_rotated_backward( + grad_output.contiguous(), + rois, + grad_input, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise, + ) + return grad_input, grad_rois, None, None, None, None, None + + +roi_align_rotated = RoIAlignRotatedFunction.apply + + +class RoIAlignRotated(nn.Module): + """RoI align pooling layer for rotated proposals. + + It accepts a feature map of shape (N, C, H, W) and rois with shape + (n, 6) with each roi decoded as (batch_index, center_x, center_y, + w, h, angle). The angle is in radian. + + Args: + out_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sample_num (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + Default: True. + clockwise (bool): If True, the angle in each proposal follows a + clockwise fashion in image space, otherwise, the angle is + counterclockwise. Default: False. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + def __init__(self, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): + super(RoIAlignRotated, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + self.aligned = aligned + self.clockwise = clockwise + + def forward(self, features, rois): + return RoIAlignRotatedFunction.apply( + features, rois, self.out_size, self.spatial_scale, self.sample_num, self.aligned, self.clockwise + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py new file mode 100644 index 000000000000..04e3d55a3a5c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roi_pool.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roi_pool_forward', 'roi_pool_backward']) + + +class RoIPoolFunction(Function): + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale): + return g.op('MaxRoiPool', input, rois, pooled_shape_i=output_size, spatial_scale_f=spatial_scale) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) + output = input.new_zeros(output_shape) + argmax = input.new_zeros(output_shape, dtype=torch.int) + + ext_module.roi_pool_forward( + input, + rois, + output, + argmax, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + ) + + ctx.save_for_backward(rois, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_pool_backward( + grad_output, + rois, + argmax, + grad_input, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + ) + + return grad_input, None, None, None + + +roi_pool = RoIPoolFunction.apply + + +class RoIPool(nn.Module): + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py new file mode 100644 index 000000000000..befb3a757324 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roiaware_pool3d.py @@ -0,0 +1,100 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn as nn +from torch.autograd import Function + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) + + +class RoIAwarePool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `PartA2 `_ for more + details. + + Args: + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int, optional): The maximum number of points per + voxel. Default: 128. + mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. + Default: 'max'. + """ + + def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): + super().__init__() + + self.out_size = out_size + self.max_pts_per_voxel = max_pts_per_voxel + assert mode in ['max', 'avg'] + pool_mapping = {'max': 0, 'avg': 1} + self.mode = pool_mapping[mode] + + def forward(self, rois, pts, pts_feature): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] + """ + + return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, self.out_size, self.max_pts_per_voxel, self.mode) + + +class RoIAwarePool3dFunction(Function): + @staticmethod + def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, mode): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int): The maximum number of points per voxel. + Default: 128. + mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average + pool). + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output + pooled features. + """ + + if isinstance(out_size, int): + out_x = out_y = out_z = out_size + else: + assert len(out_size) == 3 + assert mmcv.is_tuple_of(out_size, int) + out_x, out_y, out_z = out_size + + num_rois = rois.shape[0] + num_channels = pts_feature.shape[-1] + num_pts = pts.shape[0] + + pooled_features = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels)) + argmax = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) + pts_idx_of_voxels = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, max_pts_per_voxel), dtype=torch.int) + + ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, mode) + + ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, num_pts, num_channels) + return pooled_features + + @staticmethod + def backward(ctx, grad_out): + ret = ctx.roiaware_pool3d_for_backward + pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret + + grad_in = grad_out.new_zeros((num_pts, num_channels)) + ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out.contiguous(), grad_in, mode) + + return None, None, grad_in, None, None, None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py new file mode 100644 index 000000000000..c24c4844bd24 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/roipoint_pool3d.py @@ -0,0 +1,75 @@ +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) + + +class RoIPointPool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `Paper of PartA2 `_ + for more details. + + Args: + num_sampled_points (int, optional): Number of samples in each roi. + Default: 512. + """ + + def __init__(self, num_sampled_points=512): + super().__init__() + self.num_sampled_points = num_sampled_points + + def forward(self, points, point_features, boxes3d): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, self.num_sampled_points) + + +class RoIPointPool3dFunction(Function): + @staticmethod + def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + num_sampled_points (int, optional): The num of sampled points. + Default: 512. + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + assert len(points.shape) == 3 and points.shape[2] == 3 + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[1], point_features.shape[2] + pooled_boxes3d = boxes3d.view(batch_size, -1, 7) + pooled_features = point_features.new_zeros((batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros((batch_size, boxes_num)).int() + + ext_module.roipoint_pool3d_forward( + points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, + pooled_empty_flag, + ) + + return pooled_features, pooled_empty_flag + + @staticmethod + def backward(ctx, grad_out): + raise NotImplementedError diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py new file mode 100644 index 000000000000..c72bbb8a502d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/saconv.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops.deform_conv import deform_conv2d +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + + +@CONV_LAYERS.register_module(name='SAC') +class SAConv2d(ConvAWS2d): + """SAC (Switchable Atrous Convolution) + + This is an implementation of SAC in DetectoRS + (https://arxiv.org/pdf/2006.02334.pdf). + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + padding_mode (string, optional): ``'zeros'``, ``'reflect'``, + ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + use_deform: If ``True``, replace convolution with deformable + convolution. Default: ``False``. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False, + ): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + ) + self.use_deform = use_deform + self.switch = nn.Conv2d(self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) + self.pre_context = nn.Conv2d(self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=1, bias=True) + if self.use_deform: + self.offset_s = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) + self.offset_l = nn.Conv2d(self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) + self.init_weights() + + def init_weights(self): + constant_init(self.switch, 0, bias=1) + self.weight_diff.data.zero_() + constant_init(self.pre_context, 0) + constant_init(self.post_context, 0) + if self.use_deform: + constant_init(self.offset_s, 0) + constant_init(self.offset_l, 0) + + def forward(self, x): + # pre-context + avg_x = F.adaptive_avg_pool2d(x, output_size=1) + avg_x = self.pre_context(avg_x) + avg_x = avg_x.expand_as(x) + x = x + avg_x + # switch + avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') + avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) + switch = self.switch(avg_x) + # sac + weight = self._get_weight(self.weight) + zero_bias = torch.zeros(self.out_channels, device=weight.device, dtype=weight.dtype) + + if self.use_deform: + offset = self.offset_s(avg_x) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) + else: + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): + out_s = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_s = super()._conv_forward(x, weight, zero_bias) + else: + out_s = super()._conv_forward(x, weight) + ori_p = self.padding + ori_d = self.dilation + self.padding = tuple(3 * p for p in self.padding) + self.dilation = tuple(3 * d for d in self.dilation) + weight = weight + self.weight_diff + if self.use_deform: + offset = self.offset_l(avg_x) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) + else: + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0'): + out_l = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_l = super()._conv_forward(x, weight, zero_bias) + else: + out_l = super()._conv_forward(x, weight) + + out = switch * out_s + (1 - switch) * out_l + self.padding = ori_p + self.dilation = ori_d + # post-context + avg_x = F.adaptive_avg_pool2d(out, output_size=1) + avg_x = self.post_context(avg_x) + avg_x = avg_x.expand_as(out) + out = out + avg_x + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py new file mode 100644 index 000000000000..6d5866dcf2c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/scatter_points.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) + + +class _DynamicScatter(Function): + @staticmethod + def forward(ctx, feats, coors, reduce_type='max'): + """convert kitti points(N, >=3) to voxels. + + Args: + feats (torch.Tensor): [N, C]. Points features to be reduced + into voxels. + coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates + (specifically multi-dim voxel index) of each points. + reduce_type (str, optional): Reduce op. support 'max', 'sum' and + 'mean'. Default: 'max'. + + Returns: + voxel_feats (torch.Tensor): [M, C]. Reduced features, input + features that shares the same voxel coordinates are reduced to + one row. + voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. + """ + results = ext_module.dynamic_point_to_voxel_forward(feats, coors, reduce_type) + (voxel_feats, voxel_coors, point2voxel_map, voxel_points_count) = results + ctx.reduce_type = reduce_type + ctx.save_for_backward(feats, voxel_feats, point2voxel_map, voxel_points_count) + ctx.mark_non_differentiable(voxel_coors) + return voxel_feats, voxel_coors + + @staticmethod + def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): + (feats, voxel_feats, point2voxel_map, voxel_points_count) = ctx.saved_tensors + grad_feats = torch.zeros_like(feats) + # TODO: whether to use index put or use cuda_backward + # To use index put, need point to voxel index + ext_module.dynamic_point_to_voxel_backward( + grad_feats, + grad_voxel_feats.contiguous(), + feats, + voxel_feats, + point2voxel_map, + voxel_points_count, + ctx.reduce_type, + ) + return grad_feats, None, None + + +dynamic_scatter = _DynamicScatter.apply + + +class DynamicScatter(nn.Module): + """Scatters points into voxels, used in the voxel encoder with dynamic + voxelization. + + Note: + The CPU and GPU implementation get the same output, but have numerical + difference after summation and division (e.g., 5e-7). + + Args: + voxel_size (list): list [x, y, z] size of three dimension. + point_cloud_range (list): The coordinate range of points, [x_min, + y_min, z_min, x_max, y_max, z_max]. + average_points (bool): whether to use avg pooling to scatter points + into voxel. + """ + + def __init__(self, voxel_size, point_cloud_range, average_points: bool): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.average_points = average_points + + def forward_single(self, points, coors): + """Scatters points into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + reduce = 'mean' if self.average_points else 'max' + return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) + + def forward(self, points, coors): + """Scatters points/features into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + if coors.size(-1) == 3: + return self.forward_single(points, coors) + else: + batch_size = coors[-1, 0] + 1 + voxels, voxel_coors = [], [] + for i in range(batch_size): + inds = torch.where(coors[:, 0] == i) + voxel, voxel_coor = self.forward_single(points[inds], coors[inds][:, 1:]) + coor_pad = nn.functional.pad(voxel_coor, (1, 0), mode='constant', value=i) + voxel_coors.append(coor_pad) + voxels.append(voxel) + features = torch.cat(voxels, dim=0) + feature_coors = torch.cat(voxel_coors, dim=0) + + return features, feature_coors + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', average_points=' + str(self.average_points) + s += ')' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py new file mode 100644 index 000000000000..28a609585caf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/sync_bn.py @@ -0,0 +1,288 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NORM_LAYERS +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + [ + 'sync_bn_forward_mean', + 'sync_bn_forward_var', + 'sync_bn_forward_output', + 'sync_bn_backward_param', + 'sync_bn_backward_data', + ], +) + + +class SyncBatchNormFunction(Function): + @staticmethod + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): + return g.op( + 'mmcv::MMCVSyncBatchNorm', + input, + running_mean, + running_var, + weight, + bias, + momentum_f=momentum, + eps_f=eps, + group_i=group, + group_size_i=group_size, + stats_mode=stats_mode, + ) + + @staticmethod + def forward(self, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): + self.momentum = momentum + self.eps = eps + self.group = group + self.group_size = group_size + self.stats_mode = stats_mode + + assert isinstance( + input, (torch.HalfTensor, torch.FloatTensor, torch.cuda.HalfTensor, torch.cuda.FloatTensor) + ), f'only support Half or Float Tensor, but {input.type()}' + output = torch.zeros_like(input) + input3d = input.flatten(start_dim=2) + output3d = output.view_as(input3d) + num_channels = input3d.size(1) + + # ensure mean/var/norm/std are initialized as zeros + # ``torch.empty()`` does not guarantee that + mean = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) + var = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) + norm = torch.zeros_like(input3d, dtype=torch.float, device=input3d.device) + std = torch.zeros(num_channels, dtype=torch.float, device=input3d.device) + + batch_size = input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_forward_mean(input3d, mean) + batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) + else: + # skip updating mean and leave it as zeros when the input is empty + batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) + + # synchronize mean and the batch flag + vec = torch.cat([mean, batch_flag]) + if self.stats_mode == 'N': + vec *= batch_size + if self.group_size > 1: + dist.all_reduce(vec, group=self.group) + total_batch = vec[-1].detach() + mean = vec[:num_channels] + + if self.stats_mode == 'default': + mean = mean / self.group_size + elif self.stats_mode == 'N': + mean = mean / total_batch.clamp(min=1) + else: + raise NotImplementedError + + # leave var as zeros when the input is empty + if batch_size > 0: + ext_module.sync_bn_forward_var(input3d, mean, var) + + if self.stats_mode == 'N': + var *= batch_size + if self.group_size > 1: + dist.all_reduce(var, group=self.group) + + if self.stats_mode == 'default': + var /= self.group_size + elif self.stats_mode == 'N': + var /= total_batch.clamp(min=1) + else: + raise NotImplementedError + + # if the total batch size over all the ranks is zero, + # we should not update the statistics in the current batch + update_flag = total_batch.clamp(max=1) + momentum = update_flag * self.momentum + ext_module.sync_bn_forward_output( + input3d, + mean, + var, + weight, + bias, + running_mean, + running_var, + norm, + std, + output3d, + eps=self.eps, + momentum=momentum, + group_size=self.group_size, + ) + self.save_for_backward(norm, std, weight) + return output + + @staticmethod + @once_differentiable + def backward(self, grad_output): + norm, std, weight = self.saved_tensors + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(weight) + grad_input = torch.zeros_like(grad_output) + grad_output3d = grad_output.flatten(start_dim=2) + grad_input3d = grad_input.view_as(grad_output3d) + + batch_size = grad_input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, grad_bias) + + # all reduce + if self.group_size > 1: + dist.all_reduce(grad_weight, group=self.group) + dist.all_reduce(grad_bias, group=self.group) + grad_weight /= self.group_size + grad_bias /= self.group_size + + if batch_size > 0: + ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight, grad_bias, norm, std, grad_input3d) + + return grad_input, None, None, grad_weight, grad_bias, None, None, None, None, None + + +@NORM_LAYERS.register_module(name='MMSyncBN') +class SyncBatchNorm(Module): + """Synchronized Batch Normalization. + + Args: + num_features (int): number of features/chennels in input tensor + eps (float, optional): a value added to the denominator for numerical + stability. Defaults to 1e-5. + momentum (float, optional): the value used for the running_mean and + running_var computation. Defaults to 0.1. + affine (bool, optional): whether to use learnable affine parameters. + Defaults to True. + track_running_stats (bool, optional): whether to track the running + mean and variance during training. When set to False, this + module does not track such statistics, and initializes statistics + buffers ``running_mean`` and ``running_var`` as ``None``. When + these buffers are ``None``, this module always uses batch + statistics in both training and eval modes. Defaults to True. + group (int, optional): synchronization of stats happen within + each process group individually. By default it is synchronization + across the whole world. Defaults to None. + stats_mode (str, optional): The statistical mode. Available options + includes ``'default'`` and ``'N'``. Defaults to 'default'. + When ``stats_mode=='default'``, it computes the overall statistics + using those from each worker with equal weight, i.e., the + statistics are synchronized and simply divied by ``group``. This + mode will produce inaccurate statistics when empty tensors occur. + When ``stats_mode=='N'``, it compute the overall statistics using + the total number of batches in each worker ignoring the number of + group, i.e., the statistics are synchronized and then divied by + the total batch ``N``. This mode is beneficial when empty tensors + occur during training, as it average the total mean by the real + number of batch. + """ + + def __init__( + self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None, + stats_mode='default', + ): + super(SyncBatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + group = dist.group.WORLD if group is None else group + self.group = group + self.group_size = dist.get_world_size(group) + assert stats_mode in ['default', 'N'], f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' + self.stats_mode = stats_mode + if self.affine: + self.weight = Parameter(torch.Tensor(num_features)) + self.bias = Parameter(torch.Tensor(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + if self.track_running_stats: + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.register_buffer('num_batches_tracked', torch.tensor(0, dtype=torch.long)) + else: + self.register_buffer('running_mean', None) + self.register_buffer('running_var', None) + self.register_buffer('num_batches_tracked', None) + self.reset_parameters() + + def reset_running_stats(self): + if self.track_running_stats: + self.running_mean.zero_() + self.running_var.fill_(1) + self.num_batches_tracked.zero_() + + def reset_parameters(self): + self.reset_running_stats() + if self.affine: + self.weight.data.uniform_() # pytorch use ones_() + self.bias.data.zero_() + + def forward(self, input): + if input.dim() < 2: + raise ValueError(f'expected at least 2D input, got {input.dim()}D input') + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float(self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + if self.training or not self.track_running_stats: + return SyncBatchNormFunction.apply( + input, + self.running_mean, + self.running_var, + self.weight, + self.bias, + exponential_average_factor, + self.eps, + self.group, + self.group_size, + self.stats_mode, + ) + else: + return F.batch_norm( + input, + self.running_mean, + self.running_var, + self.weight, + self.bias, + False, + exponential_average_factor, + self.eps, + ) + + def __repr__(self): + s = self.__class__.__name__ + s += f'({self.num_features}, ' + s += f'eps={self.eps}, ' + s += f'momentum={self.momentum}, ' + s += f'affine={self.affine}, ' + s += f'track_running_stats={self.track_running_stats}, ' + s += f'group_size={self.group_size},' + s += f'stats_mode={self.stats_mode})' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py new file mode 100644 index 000000000000..09333e484221 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_interpolate.py @@ -0,0 +1,62 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['three_interpolate_forward', 'three_interpolate_backward']) + + +class ThreeInterpolate(Function): + """Performs weighted linear interpolation on 3 features. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, M) Features descriptors to be + interpolated + indices (Tensor): (B, n, 3) index three nearest neighbors + of the target features in features + weight (Tensor): (B, n, 3) weights of interpolation + + Returns: + Tensor: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert indices.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = indices.size(1) + ctx.three_interpolate_for_backward = (indices, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + ext_module.three_interpolate_forward(features, indices, weight, output, b=B, c=c, m=m, n=n) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, N) tensor with gradients of outputs + + Returns: + Tensor: (B, C, M) tensor with gradients of features + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = torch.cuda.FloatTensor(B, c, m).zero_() + grad_out_data = grad_out.data.contiguous() + + ext_module.three_interpolate_backward(grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py new file mode 100644 index 000000000000..384d91534d17 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/three_nn.py @@ -0,0 +1,50 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) + + +class ThreeNN(Function): + """Find the top-3 nearest neighbors of the target set from the source set. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, target: torch.Tensor, source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + target (Tensor): shape (B, N, 3), points set that needs to + find the nearest neighbors. + source (Tensor): shape (B, M, 3), points set that is used + to find the nearest neighbors of points in target set. + + Returns: + Tensor: shape (B, N, 3), L2 distance of each point in target + set to their corresponding nearest neighbors. + """ + target = target.contiguous() + source = source.contiguous() + + B, N, _ = target.size() + m = source.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py new file mode 100644 index 000000000000..4b0a8162e811 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/tin_shift.py @@ -0,0 +1,67 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Code reference from "Temporal Interlacing Network" +# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py +# Hao Shao, Shengju Qian, Yu Liu +# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk + +import torch +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['tin_shift_forward', 'tin_shift_backward']) + + +class TINShiftFunction(Function): + @staticmethod + def forward(ctx, input, shift): + C = input.size(2) + num_segments = shift.size(1) + if C // num_segments <= 0 or C % num_segments != 0: + raise ValueError( + 'C should be a multiple of num_segments, ' f'but got C={C} and num_segments={num_segments}.' + ) + + ctx.save_for_backward(shift) + + out = torch.zeros_like(input) + ext_module.tin_shift_forward(input, shift, out) + + return out + + @staticmethod + def backward(ctx, grad_output): + + shift = ctx.saved_tensors[0] + data_grad_input = grad_output.new(*grad_output.size()).zero_() + shift_grad_input = shift.new(*shift.size()).zero_() + ext_module.tin_shift_backward(grad_output, shift, data_grad_input) + + return data_grad_input, shift_grad_input + + +tin_shift = TINShiftFunction.apply + + +class TINShift(nn.Module): + """Temporal Interlace Shift. + + Temporal Interlace shift is a differentiable temporal-wise frame shifting + which is proposed in "Temporal Interlacing Network" + + Please refer to https://arxiv.org/abs/2001.06499 for more details. + Code is modified from https://github.com/mit-han-lab/temporal-shift-module + """ + + def forward(self, input, shift): + """Perform temporal interlace shift. + + Args: + input (Tensor): Feature map with shape [N, num_segments, C, H * W]. + shift (Tensor): Shift tensor with shape [N, num_segments]. + + Returns: + Feature map after temporal interlace shift. + """ + return tin_shift(input, shift) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py new file mode 100644 index 000000000000..c1f330686b9c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/upfirdn2d.py @@ -0,0 +1,307 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +from torch.autograd import Function +from torch.nn import functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import to_2tuple +from ..utils import ext_loader + +upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) + + +class UpFirDn2dBackward(Function): + @staticmethod + def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size): + + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_ext.upfirdn2d( + grad_output, + grad_kernel, + up_x=down_x, + up_y=down_y, + down_x=up_x, + down_y=up_y, + pad_x0=g_pad_x0, + pad_x1=g_pad_x1, + pad_y0=g_pad_y0, + pad_y1=g_pad_y1, + ) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + (kernel,) = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_ext.upfirdn2d( + gradgrad_input, + kernel, + up_x=ctx.up_x, + up_y=ctx.up_y, + down_x=ctx.down_x, + down_y=ctx.down_y, + pad_x0=ctx.pad_x0, + pad_x1=ctx.pad_x1, + pad_y0=ctx.pad_y0, + pad_y1=ctx.pad_y1, + ) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], + # ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + batch, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_ext.upfirdn2d( + input, + kernel, + up_x=up_x, + up_y=up_y, + down_x=down_x, + down_y=down_y, + pad_x0=pad_x0, + pad_x1=pad_x1, + pad_y0=pad_y0, + pad_y1=pad_y1, + ) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, kernel, grad_kernel, ctx.up, ctx.down, ctx.pad, ctx.g_pad, ctx.in_size, ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): + """UpFRIDn for 2d features. + + UpFIRDn is short for upsample, apply FIR filter and downsample. More + details can be found in: + https://www.mathworks.com/help/signal/ref/upfirdn.html + + Args: + input (Tensor): Tensor with shape of (n, c, h, w). + kernel (Tensor): Filter kernel. + up (int | tuple[int], optional): Upsampling factor. If given a number, + we will use this factor for the both height and width side. + Defaults to 1. + down (int | tuple[int], optional): Downsampling factor. If given a + number, we will use this factor for the both height and width side. + Defaults to 1. + pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or + (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). + + Returns: + Tensor: Tensor after UpFIRDn. + """ + if input.device.type == 'cpu': + if len(pad) == 2: + pad = (pad[0], pad[1], pad[0], pad[1]) + + up = to_2tuple(up) + + down = to_2tuple(down) + + out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], pad[0], pad[1], pad[2], pad[3]) + else: + _up = to_2tuple(up) + + _down = to_2tuple(down) + + if len(pad) == 4: + _pad = pad + elif len(pad) == 2: + _pad = (pad[0], pad[1], pad[0], pad[1]) + + out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) + + return out + + +def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): + _, channel, in_h, in_w = input.shape + input = input.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[ + :, max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), :, + ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py new file mode 100644 index 000000000000..60e23663270d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/ops/voxelize.py @@ -0,0 +1,115 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) + + +class _Voxelization(Function): + @staticmethod + def forward(ctx, points, voxel_size, coors_range, max_points=35, max_voxels=20000): + """Convert kitti points(N, >=3) to voxels. + + Args: + points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points + and points[:, 3:] contain other information like reflectivity. + voxel_size (tuple or float): The size of voxel with the shape of + [3]. + coors_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_points (int, optional): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. Default: 35. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + + Returns: + voxels_out (torch.Tensor): Output voxels with the shape of [M, + max_points, ndim]. Only contain points and returned when + max_points != -1. + coors_out (torch.Tensor): Output coordinates with the shape of + [M, 3]. + num_points_per_voxel_out (torch.Tensor): Num points per voxel with + the shape of [M]. Only returned when max_points != -1. + """ + if max_points == -1 or max_voxels == -1: + coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) + ext_module.dynamic_voxelize_forward(points, coors, voxel_size, coors_range, 3) + return coors + else: + voxels = points.new_zeros(size=(max_voxels, max_points, points.size(1))) + coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) + num_points_per_voxel = points.new_zeros(size=(max_voxels,), dtype=torch.int) + voxel_num = ext_module.hard_voxelize_forward( + points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, 3 + ) + # select the valid voxels + voxels_out = voxels[:voxel_num] + coors_out = coors[:voxel_num] + num_points_per_voxel_out = num_points_per_voxel[:voxel_num] + return voxels_out, coors_out, num_points_per_voxel_out + + +voxelization = _Voxelization.apply + + +class Voxelization(nn.Module): + """Convert kitti points(N, >=3) to voxels. + + Please refer to `PVCNN `_ for more + details. + + Args: + voxel_size (tuple or float): The size of voxel with the shape of [3]. + point_cloud_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_num_points (int): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + """ + + def __init__(self, voxel_size, point_cloud_range, max_num_points, max_voxels=20000): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.max_num_points = max_num_points + if isinstance(max_voxels, tuple): + self.max_voxels = max_voxels + else: + self.max_voxels = _pair(max_voxels) + + point_cloud_range = torch.tensor(point_cloud_range, dtype=torch.float32) + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size + grid_size = torch.round(grid_size).long() + input_feat_shape = grid_size[:2] + self.grid_size = grid_size + # the origin shape is as [x-len, y-len, z-len] + # [w, h, d] -> [d, h, w] + self.pcd_shape = [*input_feat_shape, 1][::-1] + + def forward(self, input): + if self.training: + max_voxels = self.max_voxels[0] + else: + max_voxels = self.max_voxels[1] + + return voxelization(input, self.voxel_size, self.point_cloud_range, self.max_num_points, max_voxels) + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', max_num_points=' + str(self.max_num_points) + s += ', max_voxels=' + str(self.max_voxels) + s += ')' + return s diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py new file mode 100644 index 000000000000..da4f1557d34a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .collate import collate +from .data_container import DataContainer +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter, scatter_kwargs +from .utils import is_module_wrapper + +__all__ = [ + 'collate', + 'DataContainer', + 'MMDataParallel', + 'MMDistributedDataParallel', + 'scatter', + 'scatter_kwargs', + 'is_module_wrapper', + 'MODULE_WRAPPERS', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py new file mode 100644 index 000000000000..154a0302584c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/_functions.py @@ -0,0 +1,76 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + # unsqueeze the first dimension thus the tensor's shape is the + # same as those scattered with GPU. + output = output.unsqueeze(0) + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py new file mode 100644 index 000000000000..9607ce7efb6d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/collate.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections.abc import Mapping, Sequence + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .data_container import DataContainer + + +def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ + + if not isinstance(batch, Sequence): + raise TypeError(f'{batch.dtype} is not supported.') + + if isinstance(batch[0], DataContainer): + stacked = [] + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + + if batch[i].pad_dims is not None: + ndim = batch[i].dim() + assert ndim > batch[i].pad_dims + max_shape = [0 for _ in range(batch[i].pad_dims)] + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = batch[i].size(-dim) + for sample in batch[i : i + samples_per_gpu]: + for dim in range(0, ndim - batch[i].pad_dims): + assert batch[i].size(dim) == sample.size(dim) + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], sample.size(-dim)) + padded_samples = [] + for sample in batch[i : i + samples_per_gpu]: + pad = [0 for _ in range(batch[i].pad_dims * 2)] + for dim in range(1, batch[i].pad_dims + 1): + pad[2 * dim - 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append(F.pad(sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + elif batch[i].pad_dims is None: + stacked.append(default_collate([sample.data for sample in batch[i : i + samples_per_gpu]])) + else: + raise ValueError('pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append([sample.data for sample in batch[i : i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], Mapping): + return {key: collate([d[key] for d in batch], samples_per_gpu) for key in batch[0]} + else: + return default_collate(batch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py new file mode 100644 index 000000000000..120f68b8fa60 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_container.py @@ -0,0 +1,83 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import torch + + +def assert_tensor_type(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError( + f'{args[0].__class__.__name__} has no attribute ' f'{func.__name__} for type {args[0].datatype}' + ) + return func(*args, **kwargs) + + return wrapper + + +class DataContainer: + """A container for any type of objects. + + Typically tensors will be stacked in the collate function and sliced along + some dimension in the scatter function. This behavior has some limitations. + 1. All tensors have to be the same size. + 2. Types are limited (numpy array or Tensor). + + We design `DataContainer` and `MMDataParallel` to overcome these + limitations. The behavior can be either of the following. + + - copy to GPU, pad all tensors to the same size and stack them + - copy to GPU without stacking + - leave the objects as is and pass it to the model + - pad_dims specifies the number of last few dimensions to do padding + """ + + def __init__(self, data, stack=False, padding_value=0, cpu_only=False, pad_dims=2): + self._data = data + self._cpu_only = cpu_only + self._stack = stack + self._padding_value = padding_value + assert pad_dims in [None, 1, 2, 3] + self._pad_dims = pad_dims + + def __repr__(self): + return f'{self.__class__.__name__}({repr(self.data)})' + + def __len__(self): + return len(self._data) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self._cpu_only + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @property + def pad_dims(self): + return self._pad_dims + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py new file mode 100644 index 000000000000..bd0715da94ad --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/data_parallel.py @@ -0,0 +1,93 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the data in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, ( + 'MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.' + ) + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}' + ) + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, ( + 'MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.' + ) + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}' + ) + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py new file mode 100644 index 000000000000..755c4398fcdc --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed.py @@ -0,0 +1,106 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel.distributed import DistributedDataParallel, _find_tensors + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import print_log +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def to_kwargs(self, inputs, kwargs, device_id): + # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 + # to move all tensors to device_id + return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ( + 'parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets() + ): + print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.train_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ( + 'parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets() + ): + print_log('Reducer buckets have been rebuilt in this iteration.', logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply(self._module_copies[: len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr(self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if 'parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2'): + self.require_forward_param_sync = False + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py new file mode 100644 index 000000000000..7e8a47648e1e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/distributed_deprecated.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version + +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter_kwargs + + +@MODULE_WRAPPERS.register_module() +class MMDistributedDataParallel(nn.Module): + def __init__(self, module, dim=0, broadcast_buffers=True, bucket_cap_mb=25): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 + self._sync_params() + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip(tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def _sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, self.broadcast_bucket_size) + if self.broadcast_buffers: + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version('1.0'): + buffers = [b.data for b in self.module._all_buffers()] + else: + buffers = [b.data for b in self.module.buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, self.broadcast_bucket_size) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) + + def train_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + return output + + def val_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) + output = self.module.val_step(*inputs[0], **kwargs[0]) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py new file mode 100644 index 000000000000..4d31bc1d08e6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/registry.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch.nn.parallel import DataParallel, DistributedDataParallel + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +MODULE_WRAPPERS = Registry('module wrapper') +MODULE_WRAPPERS.register_module(module=DataParallel) +MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py new file mode 100644 index 000000000000..900ff88566f8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/scatter_gather.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter +from .data_container import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + if target_gpus != [-1]: + return OrigScatter.apply(target_gpus, None, dim, obj) + else: + # for CPU inference we use self-implemented scatter + return Scatter.forward(target_gpus, obj) + if isinstance(obj, DataContainer): + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary.""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py new file mode 100644 index 000000000000..0f5712cb42c3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/parallel/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .registry import MODULE_WRAPPERS + + +def is_module_wrapper(module): + """Check if a module is a module wrapper. + + The following 3 modules in MMCV (and their subclasses) are regarded as + module wrappers: DataParallel, DistributedDataParallel, + MMDistributedDataParallel (the deprecated version). You may add you own + module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: True if the input module is a module wrapper. + """ + module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) + return isinstance(module, module_wrappers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py new file mode 100644 index 000000000000..8edd3a098aed --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/__init__.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_module import BaseModule, ModuleList, Sequential +from .base_runner import BaseRunner +from .builder import RUNNERS, build_runner +from .checkpoint import ( + CheckpointLoader, + _load_checkpoint, + _load_checkpoint_with_prefix, + load_checkpoint, + load_state_dict, + save_checkpoint, + weights_to_cpu, +) +from .default_constructor import DefaultRunnerConstructor +from .dist_utils import allreduce_grads, allreduce_params, get_dist_info, init_dist, master_only +from .epoch_based_runner import EpochBasedRunner, Runner +from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model +from .hooks import ( + HOOKS, + CheckpointHook, + ClosureHook, + DistEvalHook, + DistSamplerSeedHook, + DvcliveLoggerHook, + EMAHook, + EvalHook, + Fp16OptimizerHook, + GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, + Hook, + IterTimerHook, + LoggerHook, + LrUpdaterHook, + MlflowLoggerHook, + NeptuneLoggerHook, + OptimizerHook, + PaviLoggerHook, + SyncBuffersHook, + TensorboardLoggerHook, + TextLoggerHook, + WandbLoggerHook, +) +from .iter_based_runner import IterBasedRunner, IterLoader +from .log_buffer import LogBuffer +from .optimizer import ( + OPTIMIZER_BUILDERS, + OPTIMIZERS, + DefaultOptimizerConstructor, + build_optimizer, + build_optimizer_constructor, +) +from .priority import Priority, get_priority +from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed + +__all__ = [ + 'BaseRunner', + 'Runner', + 'EpochBasedRunner', + 'IterBasedRunner', + 'LogBuffer', + 'HOOKS', + 'Hook', + 'CheckpointHook', + 'ClosureHook', + 'LrUpdaterHook', + 'OptimizerHook', + 'IterTimerHook', + 'DistSamplerSeedHook', + 'LoggerHook', + 'PaviLoggerHook', + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'NeptuneLoggerHook', + 'WandbLoggerHook', + 'MlflowLoggerHook', + 'DvcliveLoggerHook', + '_load_checkpoint', + 'load_state_dict', + 'load_checkpoint', + 'weights_to_cpu', + 'save_checkpoint', + 'Priority', + 'get_priority', + 'get_host_info', + 'get_time_str', + 'obj_from_dict', + 'init_dist', + 'get_dist_info', + 'master_only', + 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', + 'DefaultOptimizerConstructor', + 'build_optimizer', + 'build_optimizer_constructor', + 'IterLoader', + 'set_random_seed', + 'auto_fp16', + 'force_fp32', + 'wrap_fp16_model', + 'Fp16OptimizerHook', + 'SyncBuffersHook', + 'EMAHook', + 'build_runner', + 'RUNNERS', + 'allreduce_grads', + 'allreduce_params', + 'LossScaler', + 'CheckpointLoader', + 'BaseModule', + '_load_checkpoint_with_prefix', + 'EvalHook', + 'DistEvalHook', + 'Sequential', + 'ModuleList', + 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', + 'DefaultRunnerConstructor', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py new file mode 100644 index 000000000000..c0c66594dccb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_module.py @@ -0,0 +1,194 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings +from abc import ABCMeta +from collections import defaultdict +from logging import FileHandler + +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner.dist_utils import master_only +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.logging import ( + get_logger, + logger_initialized, + print_log, +) + + +class BaseModule(nn.Module, metaclass=ABCMeta): + """Base module for all modules in openmmlab. + + ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional + functionality of parameter initialization. Compared with + ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. + + - ``init_cfg``: the config to control the initialization. + - ``init_weights``: The function of parameter + initialization and recording initialization + information. + - ``_params_init_info``: Used to track the parameter + initialization information. This attribute only + exists during executing the ``init_weights``. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, init_cfg=None): + """Initialize BaseModule, inherited from `torch.nn.Module`""" + + # NOTE init_cfg can be defined in different levels, but init_cfg + # in low levels has a higher priority. + + super(BaseModule, self).__init__() + # define default value of init_cfg instead of hard code + # in init_weights() function + self._is_init = False + + self.init_cfg = copy.deepcopy(init_cfg) + + # Backward compatibility in derived classes + # if pretrained is not None: + # warnings.warn('DeprecationWarning: pretrained is a deprecated \ + # key, please consider using init_cfg') + # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + + @property + def is_init(self): + return self._is_init + + def init_weights(self): + """Initialize the weights.""" + + is_top_level_module = False + # check if it is top-level module + if not hasattr(self, '_params_init_info'): + # The `_params_init_info` is used to record the initialization + # information of the parameters + # the key should be the obj:`nn.Parameter` of model and the value + # should be a dict containing + # - init_info (str): The string that describes the initialization. + # - tmp_mean_value (FloatTensor): The mean of the parameter, + # which indicates whether the parameter has been modified. + # this attribute would be deleted after all parameters + # is initialized. + self._params_init_info = defaultdict(dict) + is_top_level_module = True + + # Initialize the `_params_init_info`, + # When detecting the `tmp_mean_value` of + # the corresponding parameter is changed, update related + # initialization information + for name, param in self.named_parameters(): + self._params_init_info[param]['init_info'] = ( + f'The value is the same before and ' + f'after calling `init_weights` ' + f'of {self.__class__.__name__} ' + ) + self._params_init_info[param]['tmp_mean_value'] = param.data.mean() + + # pass `params_init_info` to all submodules + # All submodules share the same `params_init_info`, + # so it will be updated when parameters are + # modified at any level of the model. + for sub_module in self.modules(): + sub_module._params_init_info = self._params_init_info + + # Get the initialized logger, if not exist, + # create a logger named `mmcv` + logger_names = list(logger_initialized.keys()) + logger_name = logger_names[0] if logger_names else 'mmcv' + + from ..cnn import initialize + from ..cnn.utils.weight_init import update_init_info + + module_name = self.__class__.__name__ + if not self._is_init: + if self.init_cfg: + print_log(f'initialize {module_name} with init_cfg {self.init_cfg}', logger=logger_name) + initialize(self, self.init_cfg) + if isinstance(self.init_cfg, dict): + # prevent the parameters of + # the pre-trained model + # from being overwritten by + # the `init_weights` + if self.init_cfg['type'] == 'Pretrained': + return + + for m in self.children(): + if hasattr(m, 'init_weights'): + m.init_weights() + # users may overload the `init_weights` + update_init_info( + m, init_info=f'Initialized by ' f'user-defined `init_weights`' f' in {m.__class__.__name__} ' + ) + + self._is_init = True + else: + warnings.warn(f'init_weights of {self.__class__.__name__} has ' f'been called more than once.') + + if is_top_level_module: + self._dump_init_info(logger_name) + + for sub_module in self.modules(): + del sub_module._params_init_info + + @master_only + def _dump_init_info(self, logger_name): + """Dump the initialization information to a file named + `initialization.log.json` in workdir. + + Args: + logger_name (str): The name of logger. + """ + + logger = get_logger(logger_name) + + with_file_handler = False + # dump the information to the logger file if there is a `FileHandler` + for handler in logger.handlers: + if isinstance(handler, FileHandler): + handler.stream.write('Name of parameter - Initialization information\n') + for name, param in self.named_parameters(): + handler.stream.write( + f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n" + ) + handler.stream.flush() + with_file_handler = True + if not with_file_handler: + for name, param in self.named_parameters(): + print_log( + f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n ", + logger=logger_name, + ) + + def __repr__(self): + s = super().__repr__() + if self.init_cfg: + s += f'\ninit_cfg={self.init_cfg}' + return s + + +class Sequential(BaseModule, nn.Sequential): + """Sequential module in openmmlab. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, *args, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.Sequential.__init__(self, *args) + + +class ModuleList(BaseModule, nn.ModuleList): + """ModuleList in openmmlab. + + Args: + modules (iterable, optional): an iterable of modules to add. + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, modules=None, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.ModuleList.__init__(self, modules) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py new file mode 100644 index 000000000000..92948a64963c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/base_runner.py @@ -0,0 +1,515 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook +from .log_buffer import LogBuffer +from .priority import Priority, get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__( + self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None, + ): + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' f'but got {type(batch_processor)}') + warnings.warn( + 'batch_processor is deprecated, please implement ' 'train_step() and val_step() in the model instead.' + ) + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' 'cannot be both available.' + ) + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}' + ) + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' f'or dict or None, but got {type(optimizer)}' + ) + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError('Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, out_dir, filename_tmpl, save_optimizer=True, meta=None, create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError('lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError('momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def get_hook_info(self): + # Get hooks info in each stage + stage_hook_map = {stage: [] for stage in Hook.stages} + for hook in self.hooks: + try: + priority = Priority(hook.priority).name + except ValueError: + priority = hook.priority + classname = hook.__class__.__name__ + hook_info = f'({priority:<12}) {classname:<35}' + for trigger_stage in hook.get_triggered_stages(): + stage_hook_map[trigger_stage].append(hook_info) + + stage_hook_infos = [] + for stage in Hook.stages: + hook_infos = stage_hook_map[stage] + if len(hook_infos) > 0: + info = f'{stage}:\n' + info += '\n'.join(hook_infos) + info += '\n -------------------- ' + stage_hook_infos.append(info) + return '\n'.join(stage_hook_infos) + + def load_checkpoint(self, filename, map_location='cpu', strict=False, revise_keys=[(r'^module.', '')]): + return load_checkpoint(self.model, filename, map_location, strict, self.logger, revise_keys=revise_keys) + + def resume(self, checkpoint, resume_optimizer=True, map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, map_location=lambda storage, loc: storage.cuda(device_id) + ) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if self.meta is None: + self.meta = {} + self.meta.setdefault('hook_msgs', {}) + # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages + self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) + + # Re-calculate the number of iterations when resuming + # models with different number of GPUs + if 'config' in checkpoint['meta']: + config = mmcv.Config.fromstring(checkpoint['meta']['config'], file_format='.py') + previous_gpu_ids = config.get('gpu_ids', None) + if previous_gpu_ids and len(previous_gpu_ids) > 0 and len(previous_gpu_ids) != self.world_size: + self._iter = int(self._iter * len(previous_gpu_ids) / self.world_size) + self.logger.info('the iteration number is changed due to ' 'change of GPU number') + + # resume meta information meta + self.meta = checkpoint['meta'] + + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) + else: + raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if lr_config is None: + return + elif isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook, priority='VERY_HIGH') + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook, priority='HIGH') + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook, priority='ABOVE_NORMAL') + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook, priority='NORMAL') + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg(info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_timer_hook(self, timer_config): + if timer_config is None: + return + if isinstance(timer_config, dict): + timer_config_ = copy.deepcopy(timer_config) + hook = mmcv.build_from_cfg(timer_config_, HOOKS) + else: + hook = timer_config + self.register_hook(hook, priority='LOW') + + def register_custom_hooks(self, custom_config): + if custom_config is None: + return + + if not isinstance(custom_config, list): + custom_config = [custom_config] + + for item in custom_config: + if isinstance(item, dict): + self.register_hook_from_cfg(item) + else: + self.register_hook(item, priority='NORMAL') + + def register_profiler_hook(self, profiler_config): + if profiler_config is None: + return + if isinstance(profiler_config, dict): + profiler_config.setdefault('type', 'ProfilerHook') + hook = mmcv.build_from_cfg(profiler_config, HOOKS) + else: + hook = profiler_config + self.register_hook(hook) + + def register_training_hooks( + self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + timer_config=dict(type='IterTimerHook'), + custom_hooks_config=None, + ): + """Register default and custom hooks for training. + + Default and custom hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_timer_hook(timer_config) + self.register_logger_hooks(log_config) + self.register_custom_hooks(custom_hooks_config) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py new file mode 100644 index 000000000000..aaebf844ced3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/builder.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +from ..utils import Registry + +RUNNERS = Registry('runner') +RUNNER_BUILDERS = Registry('runner builder') + + +def build_runner_constructor(cfg): + return RUNNER_BUILDERS.build(cfg) + + +def build_runner(cfg, default_args=None): + runner_cfg = copy.deepcopy(cfg) + constructor_type = runner_cfg.pop('constructor', 'DefaultRunnerConstructor') + runner_constructor = build_runner_constructor( + dict(type=constructor_type, runner_cfg=runner_cfg, default_args=default_args) + ) + runner = runner_constructor() + return runner diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py new file mode 100644 index 000000000000..479da4f51b26 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/checkpoint.py @@ -0,0 +1,670 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import re +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from ..fileio import FileClient +from ..fileio import load as load_file +from ..parallel import is_module_wrapper +from ..utils import mkdir_or_exist +from .dist_utils import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) + ) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg + ) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] + + if unexpected_keys: + err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +class CheckpointLoader: + """A general checkpoint loader to manage all schemes.""" + + _schemes = {} + + @classmethod + def _register_scheme(cls, prefixes, loader, force=False): + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if (prefix not in cls._schemes) or force: + cls._schemes[prefix] = loader + else: + raise KeyError( + f'{prefix} is already registered as a loader backend, ' + 'add "force=True" if you want to override it' + ) + # sort, longer prefixes take priority + cls._schemes = OrderedDict(sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) + + @classmethod + def register_scheme(cls, prefixes, loader=None, force=False): + """Register a loader to CheckpointLoader. + + This method can be used as a normal class method or a decorator. + + Args: + prefixes (str or list[str] or tuple[str]): + The prefix of the registered loader. + loader (function, optional): The loader function to be registered. + When this method is used as a decorator, loader is None. + Defaults to None. + force (bool, optional): Whether to override the loader + if the prefix has already been registered. Defaults to False. + """ + + if loader is not None: + cls._register_scheme(prefixes, loader, force=force) + return + + def _register(loader_cls): + cls._register_scheme(prefixes, loader_cls, force=force) + return loader_cls + + return _register + + @classmethod + def _get_checkpoint_loader(cls, path): + """Finds a loader that supports the given path. Falls back to the local + loader if no other loader is found. + + Args: + path (str): checkpoint path + + Returns: + loader (function): checkpoint loader + """ + + for p in cls._schemes: + if path.startswith(p): + return cls._schemes[p] + + @classmethod + def load_checkpoint(cls, filename, map_location=None, logger=None): + """load checkpoint through URL scheme path. + + Args: + filename (str): checkpoint file name with given prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + logger (:mod:`logging.Logger`, optional): The logger for message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint_loader = cls._get_checkpoint_loader(filename) + class_name = checkpoint_loader.__name__ + mmcv.print_log(f'load checkpoint from {class_name[10:]} path: {filename}', logger) + return checkpoint_loader(filename, map_location) + + +@CheckpointLoader.register_scheme(prefixes='') +def load_from_local(filename, map_location): + """load checkpoint by local file path. + + Args: + filename (str): local checkpoint file path + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) +def load_from_http(filename, map_location=None, model_dir=None): + """load checkpoint through HTTP or HTTPS scheme path. In distributed + setting, this function only download checkpoint at local rank 0. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + model_dir (string, optional): directory in which to save the object, + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(filename, model_dir=model_dir, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='pavi://') +def load_from_pavi(filename, map_location=None): + """load checkpoint through the file path prefixed with pavi. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with pavi prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + assert filename.startswith('pavi://'), f'Expected filename startswith `pavi://`, but get {filename}' + model_path = filename[7:] + + try: + from pavi import modelcloud + except ImportError: + raise ImportError('Please install pavi to load checkpoint from modelcloud.') + + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='s3://') +def load_from_ceph(filename, map_location=None, backend='petrel'): + """load checkpoint through the file path prefixed with s3. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with s3 prefix + map_location (str, optional): Same as :func:`torch.load`. + backend (str, optional): The storage backend type. Options are 'ceph', + 'petrel'. Default: 'petrel'. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + allowed_backends = ['ceph', 'petrel'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + + if backend == 'ceph': + warnings.warn('CephBackend will be deprecated, please use PetrelBackend instead') + + # CephClient and PetrelBackend have the same prefix 's3://' and the latter + # will be chosen as default. If PetrelBackend can not be instantiated + # successfully, the CephClient will be chosen. + try: + file_client = FileClient(backend=backend) + except ImportError: + allowed_backends.remove(backend) + file_client = FileClient(backend=allowed_backends[0]) + + with io.BytesIO(file_client.get(filename)) as buffer: + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) +def load_from_torchvision(filename, map_location=None): + """load checkpoint through the file path prefixed with modelzoo or + torchvision. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + model_urls = get_torchvision_models() + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') + model_name = filename[11:] + else: + model_name = filename[14:] + return load_from_http(model_urls[model_name], map_location=map_location) + + +@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) +def load_from_openmmlab(filename, map_location=None): + """load checkpoint through the file path prefixed with open-mmlab or + openmmlab. + + Args: + filename (str): checkpoint file path with open-mmlab or + openmmlab prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_external_models() + prefix_str = 'open-mmlab://' + if filename.startswith(prefix_str): + model_name = filename[13:] + else: + model_name = filename[12:] + prefix_str = 'openmmlab://' + + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn( + f'{prefix_str}{model_name} is deprecated in favor ' f'of {prefix_str}{deprecated_urls[model_name]}' + ) + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_from_http(model_url, map_location=map_location) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='mmcls://') +def load_from_mmcls(filename, map_location=None): + """load checkpoint through the file path prefixed with mmcls. + + Args: + filename (str): checkpoint file path with mmcls prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_from_http(model_urls[model_name], map_location=map_location) + checkpoint = _process_mmcls_checkpoint(checkpoint) + return checkpoint + + +def _load_checkpoint(filename, map_location=None, logger=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str, optional): Same as :func:`torch.load`. + Default: None. + logger (:mod:`logging.Logger`, optional): The logger for error message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + return CheckpointLoader.load_checkpoint(filename, map_location, logger) + + +def _load_checkpoint_with_prefix(prefix, filename, map_location=None): + """Load partial pretrained model with specific prefix. + + Args: + prefix (str): The prefix of sub-module. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint = _load_checkpoint(filename, map_location=map_location) + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + if not prefix.endswith('.'): + prefix += '.' + prefix_len = len(prefix) + + state_dict = {k[prefix_len:]: v for k, v in state_dict.items() if k.startswith(prefix)} + + assert state_dict, f'{prefix} is not in the pretrained model' + return state_dict + + +def load_checkpoint(model, filename, map_location=None, strict=False, logger=None, revise_keys=[(r'^module\.', '')]): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + revise_keys (list): A list of customized keywords to modify the + state_dict in checkpoint. Each item is a (pattern, replacement) + pair of the regular expression operations. Default: strip + the prefix 'module.' by [(r'^module\\.', '')]. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location, logger) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError(f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + # strip prefix of state_dict + metadata = getattr(state_dict, '_metadata', OrderedDict()) + for p, r in revise_keys: + state_dict = OrderedDict({re.sub(p, r, k): v for k, v in state_dict.items()}) + # Keep metadata in state_dict + state_dict._metadata = metadata + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + # Keep metadata in state_dict + state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None, file_client_args=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + if file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" if filename starts with' f'"pavi://", but got {file_client_args}' + ) + try: + from pavi import exception, modelcloud + except ImportError: + raise ImportError('Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except exception.NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + file_client = FileClient.infer_client(file_client_args, filename) + with io.BytesIO() as f: + torch.save(checkpoint, f) + file_client.put(f.getvalue(), filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py new file mode 100644 index 000000000000..c840d803f743 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/default_constructor.py @@ -0,0 +1,43 @@ +from .builder import RUNNER_BUILDERS, RUNNERS + + +@RUNNER_BUILDERS.register_module() +class DefaultRunnerConstructor: + """Default constructor for runners. + + Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. + For example, We can inject some new properties and functions for `Runner`. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import RUNNER_BUILDERS, build_runner + >>> # Define a new RunnerReconstructor + >>> @RUNNER_BUILDERS.register_module() + >>> class MyRunnerConstructor: + ... def __init__(self, runner_cfg, default_args=None): + ... if not isinstance(runner_cfg, dict): + ... raise TypeError('runner_cfg should be a dict', + ... f'but got {type(runner_cfg)}') + ... self.runner_cfg = runner_cfg + ... self.default_args = default_args + ... + ... def __call__(self): + ... runner = RUNNERS.build(self.runner_cfg, + ... default_args=self.default_args) + ... # Add new properties for existing runner + ... runner.my_name = 'my_runner' + ... runner.my_function = lambda self: print(self.my_name) + ... ... + >>> # build your runner + >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, + ... constructor='MyRunnerConstructor') + >>> runner = build_runner(runner_cfg) + """ + + def __init__(self, runner_cfg, default_args=None): + if not isinstance(runner_cfg, dict): + raise TypeError('runner_cfg should be a dict', f'but got {type(runner_cfg)}') + self.runner_cfg = runner_cfg + self.default_args = default_args + + def __call__(self): + return RUNNERS.build(self.runner_cfg, default_args=self.default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py new file mode 100644 index 000000000000..19799b785be9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/dist_utils.py @@ -0,0 +1,157 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput(f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [param.grad.data for param in params if param.requires_grad and param.grad is not None] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip(bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py new file mode 100644 index 000000000000..ba7a97fa0241 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/epoch_based_runner.py @@ -0,0 +1,172 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor(self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self._iter += 1 + + self.call_hook('after_train_epoch') + self._epoch += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' 'please set max_epochs in runner_config', + DeprecationWarning, + ) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, 'max_epochs must be specified during instantiation' + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) + self.logger.info('workflow: %s, max: %d epochs', workflow, self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError(f'runner has no method named "{mode}" to run an ' 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError('mode in workflow must be a str, but got {}'.format(type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint( + self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True + ): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn('Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py new file mode 100644 index 000000000000..e205ab42af90 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/fp16_utils.py @@ -0,0 +1,388 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .dist_utils import allreduce_grads as _allreduce_grads + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 + # manually, so the behavior may not be consistent with real amp. + from torch.cuda.amp import autocast +except ImportError: + pass + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, nn.Module): + return inputs + elif isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()}) + elif isinstance(inputs, abc.Iterable): + return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[: len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append(cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): + with autocast(enabled=True): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. If you are using PyTorch >= 1.6, + torch.cuda.amp is used as the backend, otherwise, original mmcv + implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[: len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append(cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): + with autocast(enabled=False): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads' + ) + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + For PyTorch >= 1.6, this function will + 1. Set fp16 flag inside the model to True. + + Otherwise: + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + 3. Set `fp16_enabled` flag inside the model to True. + + Args: + model (nn.Module): Model in FP32. + """ + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.6.0'): + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): + module.float() + if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, init_scale=2 ** 32, mode='dynamic', scale_factor=2.0, scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + def state_dict(self): + """Returns the state of the scaler as a :class:`dict`.""" + return dict( + cur_scale=self.cur_scale, + cur_iter=self.cur_iter, + mode=self.mode, + last_overflow_iter=self.last_overflow_iter, + scale_factor=self.scale_factor, + scale_window=self.scale_window, + ) + + def load_state_dict(self, state_dict): + """Loads the loss_scaler state dict. + + Args: + state_dict (dict): scaler state. + """ + self.cur_scale = state_dict['cur_scale'] + self.cur_iter = state_dict['cur_iter'] + self.mode = state_dict['mode'] + self.last_overflow_iter = state_dict['last_overflow_iter'] + self.scale_factor = state_dict['scale_factor'] + self.scale_window = state_dict['scale_window'] + + @property + def loss_scale(self): + return self.cur_scale diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py new file mode 100644 index 000000000000..6b1b86fba36e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/__init__.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .checkpoint import CheckpointHook +from .closure import ClosureHook +from .ema import EMAHook +from .evaluation import DistEvalHook, EvalHook +from .hook import HOOKS, Hook +from .iter_timer import IterTimerHook +from .logger import ( + DvcliveLoggerHook, + LoggerHook, + MlflowLoggerHook, + NeptuneLoggerHook, + PaviLoggerHook, + TensorboardLoggerHook, + TextLoggerHook, + WandbLoggerHook, +) +from .lr_updater import LrUpdaterHook +from .memory import EmptyCacheHook +from .momentum_updater import MomentumUpdaterHook +from .optimizer import ( + Fp16OptimizerHook, + GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, + OptimizerHook, +) +from .profiler import ProfilerHook +from .sampler_seed import DistSamplerSeedHook +from .sync_buffer import SyncBuffersHook + +__all__ = [ + 'HOOKS', + 'Hook', + 'CheckpointHook', + 'ClosureHook', + 'LrUpdaterHook', + 'OptimizerHook', + 'Fp16OptimizerHook', + 'IterTimerHook', + 'DistSamplerSeedHook', + 'EmptyCacheHook', + 'LoggerHook', + 'MlflowLoggerHook', + 'PaviLoggerHook', + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'NeptuneLoggerHook', + 'WandbLoggerHook', + 'DvcliveLoggerHook', + 'MomentumUpdaterHook', + 'SyncBuffersHook', + 'EMAHook', + 'EvalHook', + 'DistEvalHook', + 'ProfilerHook', + 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py new file mode 100644 index 000000000000..24381d6876ef --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/checkpoint.py @@ -0,0 +1,156 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from ..dist_utils import allreduce_params, master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + interval (int): The saving period. If ``by_epoch=True``, interval + indicates epochs, otherwise it indicates iterations. + Default: -1, which means "never". + by_epoch (bool): Saving checkpoints by epoch or by iteration. + Default: True. + save_optimizer (bool): Whether to save optimizer state_dict in the + checkpoint. It is usually used for resuming experiments. + Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, ``runner.work_dir`` will be used by default. If + specified, the ``out_dir`` will be the concatenation of ``out_dir`` + and the last level directory of ``runner.work_dir``. + `Changed in version 1.3.16.` + max_keep_ckpts (int, optional): The maximum checkpoints to keep. + In some cases we want only the latest few checkpoints and would + like to delete old ones to save the disk space. + Default: -1, which means unlimited. + save_last (bool, optional): Whether to force the last checkpoint to be + saved regardless of interval. Default: True. + sync_buffer (bool, optional): Whether to synchronize buffers in + different gpus. Default: False. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + + .. warning:: + Before v1.3.16, the ``out_dir`` argument indicates the path where the + checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the + root directory and the final path to save checkpoint is the + concatenation of ``out_dir`` and the last level directory of + ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" + and the value of ``runner.work_dir`` is "/path/of/B", then the final + path will be "/path/of/A/B". + """ + + def __init__( + self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + save_last=True, + sync_buffer=False, + file_client_args=None, + **kwargs, + ): + self.interval = interval + self.by_epoch = by_epoch + self.save_optimizer = save_optimizer + self.out_dir = out_dir + self.max_keep_ckpts = max_keep_ckpts + self.save_last = save_last + self.args = kwargs + self.sync_buffer = sync_buffer + self.file_client_args = file_client_args + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + + runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' f'{self.file_client.name}.')) + + # disable the create_symlink option because some file backends do not + # allow to create a symlink + if 'create_symlink' in self.args: + if self.args['create_symlink'] and not self.file_client.allow_symlink: + self.args['create_symlink'] = False + warnings.warn( + ( + 'create_symlink is set as True by the user but is changed' + 'to be False because creating symbolic link is not ' + f'allowed in {self.file_client.name}' + ) + ) + else: + self.args['create_symlink'] = self.file_client.allow_symlink + + def after_train_epoch(self, runner): + if not self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` epochs + # 2. reach the last epoch of training + if self.every_n_epochs(runner, self.interval) or (self.save_last and self.is_last_epoch(runner)): + runner.logger.info(f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) + + @master_only + def _save_checkpoint(self, runner): + """Save the current checkpoint and delete unwanted checkpoint.""" + runner.save_checkpoint(self.out_dir, save_optimizer=self.save_optimizer, **self.args) + if runner.meta is not None: + if self.by_epoch: + cur_ckpt_filename = self.args.get('filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + else: + cur_ckpt_filename = self.args.get('filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + runner.meta.setdefault('hook_msgs', dict()) + runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path(self.out_dir, cur_ckpt_filename) + # remove other checkpoints + if self.max_keep_ckpts > 0: + if self.by_epoch: + name = 'epoch_{}.pth' + current_ckpt = runner.epoch + 1 + else: + name = 'iter_{}.pth' + current_ckpt = runner.iter + 1 + redundant_ckpts = range(current_ckpt - self.max_keep_ckpts * self.interval, 0, -self.interval) + filename_tmpl = self.args.get('filename_tmpl', name) + for _step in redundant_ckpts: + ckpt_path = self.file_client.join_path(self.out_dir, filename_tmpl.format(_step)) + if self.file_client.isfile(ckpt_path): + self.file_client.remove(ckpt_path) + else: + break + + def after_train_iter(self, runner): + if self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` iterations + # 2. reach the last iteration of training + if self.every_n_iters(runner, self.interval) or (self.save_last and self.is_last_iter(runner)): + runner.logger.info(f'Saving checkpoint at {runner.iter + 1} iterations') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py new file mode 100644 index 000000000000..0781664b46a3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/closure.py @@ -0,0 +1,10 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ClosureHook(Hook): + def __init__(self, fn_name, fn): + assert hasattr(self, fn_name) + assert callable(fn) + setattr(self, fn_name, fn) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py new file mode 100644 index 000000000000..8114b106bf3c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/ema.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...parallel import is_module_wrapper +from ..hooks.hook import HOOKS, Hook + + +@HOOKS.register_module() +class EMAHook(Hook): + r"""Exponential Moving Average Hook. + + Use Exponential Moving Average on all parameters of model in training + process. All parameters have a ema backup, which update by the formula + as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. + + .. math:: + + \text{Xema\_{t+1}} = (1 - \text{momentum}) \times + \text{Xema\_{t}} + \text{momentum} \times X_t + + Args: + momentum (float): The momentum used for updating ema parameter. + Defaults to 0.0002. + interval (int): Update ema parameter every interval iteration. + Defaults to 1. + warm_up (int): During first warm_up steps, we may use smaller momentum + to update ema parameters more slowly. Defaults to 100. + resume_from (str): The checkpoint path. Defaults to None. + """ + + def __init__(self, momentum=0.0002, interval=1, warm_up=100, resume_from=None): + assert isinstance(interval, int) and interval > 0 + self.warm_up = warm_up + self.interval = interval + assert momentum > 0 and momentum < 1 + self.momentum = momentum ** interval + self.checkpoint = resume_from + + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + model = runner.model + if is_module_wrapper(model): + model = model.module + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + + def after_train_iter(self, runner): + """Update ema parameter every self.interval iterations.""" + curr_step = runner.iter + # We warm up the momentum considering the instability at beginning + momentum = min(self.momentum, (1 + curr_step) / (self.warm_up + curr_step)) + if curr_step % self.interval != 0: + return + for name, parameter in self.model_parameters.items(): + buffer_name = self.param_ema_buffer[name] + buffer_parameter = self.model_buffers[buffer_name] + buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) + + def after_train_epoch(self, runner): + """We load parameter values from ema backup to model before the + EvalHook.""" + self._swap_ema_parameters() + + def before_train_epoch(self, runner): + """We recover model's parameter from ema backup after last epoch's + EvalHook.""" + self._swap_ema_parameters() + + def _swap_ema_parameters(self): + """Swap the parameter of model with parameter in ema_buffer.""" + for name, value in self.model_parameters.items(): + temp = value.data.clone() + ema_buffer = self.model_buffers[self.param_ema_buffer[name]] + value.data.copy_(ema_buffer.data) + ema_buffer.data.copy_(temp) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py new file mode 100644 index 000000000000..1431bb39a665 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/evaluation.py @@ -0,0 +1,493 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from math import inf + +import torch.distributed as dist +from torch.nn.modules.batchnorm import _BatchNorm +from torch.utils.data import DataLoader + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_seq_of +from .hook import Hook +from .logger import LoggerHook + + +class EvalHook(Hook): + """Non-Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in non-distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader, and return the test results. If ``None``, the default + test function ``mmcv.engine.single_gpu_test`` will be used. + (default: ``None``) + greater_keys (List[str] | None, optional): Metric keys that will be + inferred by 'greater' comparison rule. If ``None``, + _default_greater_keys will be used. (default: ``None``) + less_keys (List[str] | None, optional): Metric keys that will be + inferred by 'less' comparison rule. If ``None``, _default_less_keys + will be used. (default: ``None``) + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + `New in version 1.3.16.` + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + `New in version 1.3.16.` + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + + Notes: + If new arguments are added for EvalHook, tools/test.py, + tools/eval_metric.py may be affected. + """ + + # Since the key for determine greater or less is related to the downstream + # tasks, downstream repos may need to overwrite the following inner + # variable accordingly. + + rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} + init_value_map = {'greater': -inf, 'less': inf} + _default_greater_keys = ['acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', 'mAcc', 'aAcc'] + _default_less_keys = ['loss'] + + def __init__( + self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + out_dir=None, + file_client_args=None, + **eval_kwargs, + ): + if not isinstance(dataloader, DataLoader): + raise TypeError(f'dataloader must be a pytorch DataLoader, ' f'but got {type(dataloader)}') + + if interval <= 0: + raise ValueError(f'interval must be a positive number, ' f'but got {interval}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' + + if start is not None and start < 0: + raise ValueError(f'The evaluation start epoch {start} is smaller ' f'than 0') + + self.dataloader = dataloader + self.interval = interval + self.start = start + self.by_epoch = by_epoch + + assert isinstance(save_best, str) or save_best is None, ( + '""save_best"" should be a str or None ' f'rather than {type(save_best)}' + ) + self.save_best = save_best + self.eval_kwargs = eval_kwargs + self.initial_flag = True + + if test_fn is None: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import single_gpu_test + + self.test_fn = single_gpu_test + else: + self.test_fn = test_fn + + if greater_keys is None: + self.greater_keys = self._default_greater_keys + else: + if not isinstance(greater_keys, (list, tuple)): + greater_keys = (greater_keys,) + assert is_seq_of(greater_keys, str) + self.greater_keys = greater_keys + + if less_keys is None: + self.less_keys = self._default_less_keys + else: + if not isinstance(less_keys, (list, tuple)): + less_keys = (less_keys,) + assert is_seq_of(less_keys, str) + self.less_keys = less_keys + + if self.save_best is not None: + self.best_ckpt_path = None + self._init_rule(rule, self.save_best) + + self.out_dir = out_dir + self.file_client_args = file_client_args + + def _init_rule(self, rule, key_indicator): + """Initialize rule, key_indicator, comparison_func, and best score. + + Here is the rule to determine which rule is used for key indicator + when the rule is not specific (note that the key indicator matching + is case-insensitive): + 1. If the key indicator is in ``self.greater_keys``, the rule will be + specified as 'greater'. + 2. Or if the key indicator is in ``self.less_keys``, the rule will be + specified as 'less'. + 3. Or if the key indicator is equal to the substring in any one item + in ``self.greater_keys``, the rule will be specified as 'greater'. + 4. Or if the key indicator is equal to the substring in any one item + in ``self.less_keys``, the rule will be specified as 'less'. + + Args: + rule (str | None): Comparison rule for best score. + key_indicator (str | None): Key indicator to determine the + comparison rule. + """ + if rule not in self.rule_map and rule is not None: + raise KeyError(f'rule must be greater, less or None, ' f'but got {rule}.') + + if rule is None: + if key_indicator != 'auto': + # `_lc` here means we use the lower case of keys for + # case-insensitive matching + key_indicator_lc = key_indicator.lower() + greater_keys = [key.lower() for key in self.greater_keys] + less_keys = [key.lower() for key in self.less_keys] + + if key_indicator_lc in greater_keys: + rule = 'greater' + elif key_indicator_lc in less_keys: + rule = 'less' + elif any(key in key_indicator_lc for key in greater_keys): + rule = 'greater' + elif any(key in key_indicator_lc for key in less_keys): + rule = 'less' + else: + raise ValueError( + f'Cannot infer the rule for key ' + f'{key_indicator}, thus a specific rule ' + f'must be specified.' + ) + self.rule = rule + self.key_indicator = key_indicator + if self.rule is not None: + self.compare_func = self.rule_map[self.rule] + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info((f'The best checkpoint will be saved to {self.out_dir} by ' f'{self.file_client.name}')) + + if self.save_best is not None: + if runner.meta is None: + warnings.warn('runner.meta is None. Creating an empty one.') + runner.meta = dict() + runner.meta.setdefault('hook_msgs', dict()) + self.best_ckpt_path = runner.meta['hook_msgs'].get('best_ckpt', None) + + def before_train_iter(self, runner): + """Evaluate the model only at the start of training by iteration.""" + if self.by_epoch or not self.initial_flag: + return + if self.start is not None and runner.iter >= self.start: + self.after_train_iter(runner) + self.initial_flag = False + + def before_train_epoch(self, runner): + """Evaluate the model only at the start of training by epoch.""" + if not (self.by_epoch and self.initial_flag): + return + if self.start is not None and runner.epoch >= self.start: + self.after_train_epoch(runner) + self.initial_flag = False + + def after_train_iter(self, runner): + """Called after every training iter to evaluate the results.""" + if not self.by_epoch and self._should_evaluate(runner): + # Because the priority of EvalHook is higher than LoggerHook, the + # training log and the evaluating log are mixed. Therefore, + # we need to dump the training log and clear it before evaluating + # log is generated. In addition, this problem will only appear in + # `IterBasedRunner` whose `self.by_epoch` is False, because + # `EpochBasedRunner` whose `self.by_epoch` is True calls + # `_do_evaluate` in `after_train_epoch` stage, and at this stage + # the training log has been printed, so it will not cause any + # problem. more details at + # https://github.com/open-mmlab/mmsegmentation/issues/694 + for hook in runner._hooks: + if isinstance(hook, LoggerHook): + hook.after_train_iter(runner) + runner.log_buffer.clear() + + self._do_evaluate(runner) + + def after_train_epoch(self, runner): + """Called after every training epoch to evaluate the results.""" + if self.by_epoch and self._should_evaluate(runner): + self._do_evaluate(runner) + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + results = self.test_fn(runner.model, self.dataloader) + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to save + # the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) + + def _should_evaluate(self, runner): + """Judge whether to perform evaluation. + + Here is the rule to judge whether to perform evaluation: + 1. It will not perform evaluation during the epoch/iteration interval, + which is determined by ``self.interval``. + 2. It will not perform evaluation if the start time is larger than + current time. + 3. It will not perform evaluation when current time is larger than + the start time but during epoch/iteration interval. + + Returns: + bool: The flag indicating whether to perform evaluation. + """ + if self.by_epoch: + current = runner.epoch + check_time = self.every_n_epochs + else: + current = runner.iter + check_time = self.every_n_iters + + if self.start is None: + if not check_time(runner, self.interval): + # No evaluation during the interval. + return False + elif (current + 1) < self.start: + # No evaluation if start is larger than the current time. + return False + else: + # Evaluation only at epochs/iters 3, 5, 7... + # if start==3 and interval==2 + if (current + 1 - self.start) % self.interval: + return False + return True + + def _save_ckpt(self, runner, key_score): + """Save the best checkpoint. + + It will compare the score according to the compare function, write + related information (best score, best checkpoint path) and save the + best checkpoint into ``work_dir``. + """ + if self.by_epoch: + current = f'epoch_{runner.epoch + 1}' + cur_type, cur_time = 'epoch', runner.epoch + 1 + else: + current = f'iter_{runner.iter + 1}' + cur_type, cur_time = 'iter', runner.iter + 1 + + best_score = runner.meta['hook_msgs'].get('best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + best_score = key_score + runner.meta['hook_msgs']['best_score'] = best_score + + if self.best_ckpt_path and self.file_client.isfile(self.best_ckpt_path): + self.file_client.remove(self.best_ckpt_path) + runner.logger.info((f'The previous best checkpoint {self.best_ckpt_path} was ' 'removed')) + + best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' + self.best_ckpt_path = self.file_client.join_path(self.out_dir, best_ckpt_name) + runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path + + runner.save_checkpoint(self.out_dir, best_ckpt_name, create_symlink=False) + runner.logger.info(f'Now best checkpoint is saved as {best_ckpt_name}.') + runner.logger.info(f'Best {self.key_indicator} is {best_score:0.4f} ' f'at {cur_time} {cur_type}.') + + def evaluate(self, runner, results): + """Evaluate the results. + + Args: + runner (:obj:`mmcv.Runner`): The underlined training runner. + results (list): Output results. + """ + eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs) + + for name, val in eval_res.items(): + runner.log_buffer.output[name] = val + runner.log_buffer.ready = True + + if self.save_best is not None: + # If the performance of model is pool, the `eval_res` may be an + # empty dict and it will raise exception when `self.save_best` is + # not None. More details at + # https://github.com/open-mmlab/mmdetection/issues/6265. + if not eval_res: + warnings.warn( + 'Since `eval_res` is an empty dict, the behavior to save ' + 'the best checkpoint will be skipped in this evaluation.' + ) + return None + + if self.key_indicator == 'auto': + # infer from eval_results + self._init_rule(self.rule, list(eval_res.keys())[0]) + return eval_res[self.key_indicator] + + return None + + +class DistEvalHook(EvalHook): + """Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader in a multi-gpu manner, and return the test results. If + ``None``, the default test function ``mmcv.engine.multi_gpu_test`` + will be used. (default: ``None``) + tmpdir (str | None): Temporary directory to save the results of all + processes. Default: None. + gpu_collect (bool): Whether to use gpu or cpu to collect results. + Default: False. + broadcast_bn_buffer (bool): Whether to broadcast the + buffer(running_mean and running_var) of rank 0 to other rank + before evaluation. Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + """ + + def __init__( + self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + broadcast_bn_buffer=True, + tmpdir=None, + gpu_collect=False, + out_dir=None, + file_client_args=None, + **eval_kwargs, + ): + + if test_fn is None: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.engine import multi_gpu_test + + test_fn = multi_gpu_test + + super().__init__( + dataloader, + start=start, + interval=interval, + by_epoch=by_epoch, + save_best=save_best, + rule=rule, + test_fn=test_fn, + greater_keys=greater_keys, + less_keys=less_keys, + out_dir=out_dir, + file_client_args=file_client_args, + **eval_kwargs, + ) + + self.broadcast_bn_buffer = broadcast_bn_buffer + self.tmpdir = tmpdir + self.gpu_collect = gpu_collect + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + # Synchronization of BatchNorm's buffer (running_mean + # and running_var) is not supported in the DDP of pytorch, + # which may cause the inconsistent performance of models in + # different ranks, so we broadcast BatchNorm's buffers + # of rank 0 to other ranks to avoid this. + if self.broadcast_bn_buffer: + model = runner.model + for name, module in model.named_modules(): + if isinstance(module, _BatchNorm) and module.track_running_stats: + dist.broadcast(module.running_var, 0) + dist.broadcast(module.running_mean, 0) + + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + + results = self.test_fn(runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to + # save the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py new file mode 100644 index 000000000000..730cb0f21e7c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/hook.py @@ -0,0 +1,100 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, is_method_overridden + +HOOKS = Registry('hook') + + +class Hook: + stages = ( + 'before_run', + 'before_train_epoch', + 'before_train_iter', + 'after_train_iter', + 'after_train_epoch', + 'before_val_epoch', + 'before_val_iter', + 'after_val_iter', + 'after_val_epoch', + 'after_run', + ) + + def before_run(self, runner): + pass + + def after_run(self, runner): + pass + + def before_epoch(self, runner): + pass + + def after_epoch(self, runner): + pass + + def before_iter(self, runner): + pass + + def after_iter(self, runner): + pass + + def before_train_epoch(self, runner): + self.before_epoch(runner) + + def before_val_epoch(self, runner): + self.before_epoch(runner) + + def after_train_epoch(self, runner): + self.after_epoch(runner) + + def after_val_epoch(self, runner): + self.after_epoch(runner) + + def before_train_iter(self, runner): + self.before_iter(runner) + + def before_val_iter(self, runner): + self.before_iter(runner) + + def after_train_iter(self, runner): + self.after_iter(runner) + + def after_val_iter(self, runner): + self.after_iter(runner) + + def every_n_epochs(self, runner, n): + return (runner.epoch + 1) % n == 0 if n > 0 else False + + def every_n_inner_iters(self, runner, n): + return (runner.inner_iter + 1) % n == 0 if n > 0 else False + + def every_n_iters(self, runner, n): + return (runner.iter + 1) % n == 0 if n > 0 else False + + def end_of_epoch(self, runner): + return runner.inner_iter + 1 == len(runner.data_loader) + + def is_last_epoch(self, runner): + return runner.epoch + 1 == runner._max_epochs + + def is_last_iter(self, runner): + return runner.iter + 1 == runner._max_iters + + def get_triggered_stages(self): + trigger_stages = set() + for stage in Hook.stages: + if is_method_overridden(stage, Hook, self): + trigger_stages.add(stage) + + # some methods will be triggered in multi stages + # use this dict to map method to stages. + method_stages_map = { + 'before_epoch': ['before_train_epoch', 'before_val_epoch'], + 'after_epoch': ['after_train_epoch', 'after_val_epoch'], + 'before_iter': ['before_train_iter', 'before_val_iter'], + 'after_iter': ['after_train_iter', 'after_val_iter'], + } + + for method, map_stages in method_stages_map.items(): + if is_method_overridden(method, Hook, self): + trigger_stages.update(map_stages) + + return [stage for stage in Hook.stages if stage in trigger_stages] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py new file mode 100644 index 000000000000..734404f95c9f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/iter_timer.py @@ -0,0 +1,17 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + def before_epoch(self, runner): + self.t = time.time() + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + runner.log_buffer.update({'time': time.time() - self.t}) + self.t = time.time() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py new file mode 100644 index 000000000000..17da656e176e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import LoggerHook +from .dvclive import DvcliveLoggerHook +from .mlflow import MlflowLoggerHook +from .neptune import NeptuneLoggerHook +from .pavi import PaviLoggerHook +from .tensorboard import TensorboardLoggerHook +from .text import TextLoggerHook +from .wandb import WandbLoggerHook + +__all__ = [ + 'LoggerHook', + 'MlflowLoggerHook', + 'PaviLoggerHook', + 'TensorboardLoggerHook', + 'TextLoggerHook', + 'WandbLoggerHook', + 'NeptuneLoggerHook', + 'DvcliveLoggerHook', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py new file mode 100644 index 000000000000..cb873734d28e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/base.py @@ -0,0 +1,157 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from abc import ABCMeta, abstractmethod + +import numpy as np +import torch + +from ..hook import Hook + + +class LoggerHook(Hook): + """Base class for logger hooks. + + Args: + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + by_epoch (bool): Whether EpochBasedRunner is used. + """ + + __metaclass__ = ABCMeta + + def __init__(self, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): + self.interval = interval + self.ignore_last = ignore_last + self.reset_flag = reset_flag + self.by_epoch = by_epoch + + @abstractmethod + def log(self, runner): + pass + + @staticmethod + def is_scalar(val, include_np=True, include_torch=True): + """Tell the input variable is a scalar or not. + + Args: + val: Input variable. + include_np (bool): Whether include 0-d np.ndarray as a scalar. + include_torch (bool): Whether include 0-d torch.Tensor as a scalar. + + Returns: + bool: True or False. + """ + if isinstance(val, numbers.Number): + return True + elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: + return True + elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: + return True + else: + return False + + def get_mode(self, runner): + if runner.mode == 'train': + if 'time' in runner.log_buffer.output: + mode = 'train' + else: + mode = 'val' + elif runner.mode == 'val': + mode = 'val' + else: + raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') + return mode + + def get_epoch(self, runner): + if runner.mode == 'train': + epoch = runner.epoch + 1 + elif runner.mode == 'val': + # normal val mode + # runner.epoch += 1 has been done before val workflow + epoch = runner.epoch + else: + raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') + return epoch + + def get_iter(self, runner, inner_iter=False): + """Get the current training iteration step.""" + if self.by_epoch and inner_iter: + current_iter = runner.inner_iter + 1 + else: + current_iter = runner.iter + 1 + return current_iter + + def get_lr_tags(self, runner): + tags = {} + lrs = runner.current_lr() + if isinstance(lrs, dict): + for name, value in lrs.items(): + tags[f'learning_rate/{name}'] = value[0] + else: + tags['learning_rate'] = lrs[0] + return tags + + def get_momentum_tags(self, runner): + tags = {} + momentums = runner.current_momentum() + if isinstance(momentums, dict): + for name, value in momentums.items(): + tags[f'momentum/{name}'] = value[0] + else: + tags['momentum'] = momentums[0] + return tags + + def get_loggable_tags( + self, runner, allow_scalar=True, allow_text=False, add_mode=True, tags_to_skip=('time', 'data_time') + ): + tags = {} + for var, val in runner.log_buffer.output.items(): + if var in tags_to_skip: + continue + if self.is_scalar(val) and not allow_scalar: + continue + if isinstance(val, str) and not allow_text: + continue + if add_mode: + var = f'{self.get_mode(runner)}/{var}' + tags[var] = val + tags.update(self.get_lr_tags(runner)) + tags.update(self.get_momentum_tags(runner)) + return tags + + def before_run(self, runner): + for hook in runner.hooks[::-1]: + if isinstance(hook, LoggerHook): + hook.reset_flag = True + break + + def before_epoch(self, runner): + runner.log_buffer.clear() # clear logs of last epoch + + def after_train_iter(self, runner): + if self.by_epoch and self.every_n_inner_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif not self.by_epoch and self.every_n_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif self.end_of_epoch(runner) and not self.ignore_last: + # not precise but more stable + runner.log_buffer.average(self.interval) + + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_train_epoch(self, runner): + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_val_epoch(self, runner): + runner.log_buffer.average() + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py new file mode 100644 index 000000000000..d92f1696909d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/dvclive.py @@ -0,0 +1,51 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class DvcliveLoggerHook(LoggerHook): + """Class to log metrics with dvclive. + + It requires `dvclive`_ to be installed. + + Args: + path (str): Directory where dvclive will write TSV log files. + interval (int): Logging interval (every k iterations). + Default 10. + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + Default: True. + reset_flag (bool): Whether to clear the output buffer after logging. + Default: True. + by_epoch (bool): Whether EpochBasedRunner is used. + Default: True. + + .. _dvclive: + https://dvc.org/doc/dvclive + """ + + def __init__(self, path, interval=10, ignore_last=True, reset_flag=True, by_epoch=True): + + super(DvcliveLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.path = path + self.import_dvclive() + + def import_dvclive(self): + try: + import dvclive + except ImportError: + raise ImportError('Please run "pip install dvclive" to install dvclive') + self.dvclive = dvclive + + @master_only + def before_run(self, runner): + self.dvclive.init(self.path) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for k, v in tags.items(): + self.dvclive.log(k, v, step=self.get_iter(runner)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py new file mode 100644 index 000000000000..3392baa8f43d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/mlflow.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class MlflowLoggerHook(LoggerHook): + def __init__( + self, exp_name=None, tags=None, log_model=True, interval=10, ignore_last=True, reset_flag=False, by_epoch=True + ): + """Class to log metrics and (optionally) a trained model to MLflow. + + It requires `MLflow`_ to be installed. + + Args: + exp_name (str, optional): Name of the experiment to be used. + Default None. + If not None, set the active experiment. + If experiment does not exist, an experiment with provided name + will be created. + tags (dict of str: str, optional): Tags for the current run. + Default None. + If not None, set tags for the current run. + log_model (bool, optional): Whether to log an MLflow artifact. + Default True. + If True, log runner.model as an MLflow artifact + for the current run. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _MLflow: + https://www.mlflow.org/docs/latest/index.html + """ + super(MlflowLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.import_mlflow() + self.exp_name = exp_name + self.tags = tags + self.log_model = log_model + + def import_mlflow(self): + try: + import mlflow + import mlflow.pytorch as mlflow_pytorch + except ImportError: + raise ImportError('Please run "pip install mlflow" to install mlflow') + self.mlflow = mlflow + self.mlflow_pytorch = mlflow_pytorch + + @master_only + def before_run(self, runner): + super(MlflowLoggerHook, self).before_run(runner) + if self.exp_name is not None: + self.mlflow.set_experiment(self.exp_name) + if self.tags is not None: + self.mlflow.set_tags(self.tags) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.mlflow.log_metrics(tags, step=self.get_iter(runner)) + + @master_only + def after_run(self, runner): + if self.log_model: + self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py new file mode 100644 index 000000000000..25e6f1d85ebb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/neptune.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class NeptuneLoggerHook(LoggerHook): + """Class to log metrics to NeptuneAI. + + It requires `neptune-client` to be installed. + + Args: + init_kwargs (dict): a dict contains the initialization keys as below: + - project (str): Name of a project in a form of + namespace/project_name. If None, the value of + NEPTUNE_PROJECT environment variable will be taken. + - api_token (str): User’s API token. + If None, the value of NEPTUNE_API_TOKEN environment + variable will be taken. Note: It is strongly recommended + to use NEPTUNE_API_TOKEN environment variable rather than + placing your API token in plain text in your source code. + - name (str, optional, default is 'Untitled'): Editable name of + the run. Name is displayed in the run's Details and in + Runs table as a column. + Check https://docs.neptune.ai/api-reference/neptune#init for + more init arguments. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _NeptuneAI: + https://docs.neptune.ai/you-should-know/logging-metadata + """ + + def __init__( + self, init_kwargs=None, interval=10, ignore_last=True, reset_flag=True, with_step=True, by_epoch=True + ): + + super(NeptuneLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.import_neptune() + self.init_kwargs = init_kwargs + self.with_step = with_step + + def import_neptune(self): + try: + import neptune.new as neptune + except ImportError: + raise ImportError('Please run "pip install neptune-client" to install neptune') + self.neptune = neptune + self.run = None + + @master_only + def before_run(self, runner): + if self.init_kwargs: + self.run = self.neptune.init(**self.init_kwargs) + else: + self.run = self.neptune.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for tag_name, tag_value in tags.items(): + if self.with_step: + self.run[tag_name].log(tag_value, step=self.get_iter(runner)) + else: + tags['global_step'] = self.get_iter(runner) + self.run[tag_name].log(tags) + + @master_only + def after_run(self, runner): + self.run.stop() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py new file mode 100644 index 000000000000..1f79cb0f305e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/pavi.py @@ -0,0 +1,113 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import os +import os.path as osp + +import torch +import yaml + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from ....parallel.utils import is_module_wrapper +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class PaviLoggerHook(LoggerHook): + def __init__( + self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True, + img_key='img_info', + ): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.init_kwargs = init_kwargs + self.add_graph = add_graph + self.add_last_ckpt = add_last_ckpt + self.img_key = img_key + + @master_only + def before_run(self, runner): + super(PaviLoggerHook, self).before_run(runner) + try: + from pavi import SummaryWriter + except ImportError: + raise ImportError('Please run "pip install pavi" to install pavi.') + + self.run_name = runner.work_dir.split('/')[-1] + + if not self.init_kwargs: + self.init_kwargs = dict() + self.init_kwargs['name'] = self.run_name + self.init_kwargs['model'] = runner._model_name + if runner.meta is not None: + if 'config_dict' in runner.meta: + config_dict = runner.meta['config_dict'] + assert isinstance(config_dict, dict), ( + 'meta["config_dict"] has to be of a dict, ' f'but got {type(config_dict)}' + ) + elif 'config_file' in runner.meta: + config_file = runner.meta['config_file'] + config_dict = dict(mmcv.Config.fromfile(config_file)) + else: + config_dict = None + if config_dict is not None: + # 'max_.*iter' is parsed in pavi sdk as the maximum iterations + # to properly set up the progress bar. + config_dict = config_dict.copy() + config_dict.setdefault('max_iter', runner.max_iters) + # non-serializable values are first converted in + # mmcv.dump to json + config_dict = json.loads(mmcv.dump(config_dict, file_format='json')) + session_text = yaml.dump(config_dict) + self.init_kwargs['session_text'] = session_text + self.writer = SummaryWriter(**self.init_kwargs) + + def get_step(self, runner): + """Get the total training step/epoch.""" + if self.get_mode(runner) == 'val' and self.by_epoch: + return self.get_epoch(runner) + else: + return self.get_iter(runner) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, add_mode=False) + if tags: + self.writer.add_scalars(self.get_mode(runner), tags, self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.add_last_ckpt: + ckpt_path = osp.join(runner.work_dir, 'latest.pth') + if osp.islink(ckpt_path): + ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) + + if osp.isfile(ckpt_path): + # runner.epoch += 1 has been done before `after_run`. + iteration = runner.epoch if self.by_epoch else runner.iter + return self.writer.add_snapshot_file( + tag=self.run_name, snapshot_file_path=ckpt_path, iteration=iteration + ) + + # flush the buffer and send a task ending signal to Pavi + self.writer.close() + + @master_only + def before_epoch(self, runner): + if runner.epoch == 0 and self.add_graph: + if is_module_wrapper(runner.model): + _model = runner.model.module + else: + _model = runner.model + device = next(_model.parameters()).device + data = next(iter(runner.data_loader)) + image = data[self.img_key][0:1].to(device) + with torch.no_grad(): + self.writer.add_graph(_model, image) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py new file mode 100644 index 000000000000..ccd0c5b5aac0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/tensorboard.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TensorboardLoggerHook(LoggerHook): + def __init__(self, log_dir=None, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.log_dir = log_dir + + @master_only + def before_run(self, runner): + super(TensorboardLoggerHook, self).before_run(runner) + if TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.1'): + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' 'TensorboardLoggerHook.') + else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)' + ) + + if self.log_dir is None: + self.log_dir = osp.join(runner.work_dir, 'tf_logs') + self.writer = SummaryWriter(self.log_dir) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, allow_text=True) + for tag, val in tags.items(): + if isinstance(val, str): + self.writer.add_text(tag, val, self.get_iter(runner)) + else: + self.writer.add_scalar(tag, val, self.get_iter(runner)) + + @master_only + def after_run(self, runner): + self.writer.close() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py new file mode 100644 index 000000000000..da54f3d56059 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/text.py @@ -0,0 +1,235 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import datetime +import os +import os.path as osp +from collections import OrderedDict + +import torch +import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio.file_client import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_tuple_of, scandir +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TextLoggerHook(LoggerHook): + """Logger hook in text. + + In this logger hook, the information will be printed on terminal and + saved in json file. + + Args: + by_epoch (bool, optional): Whether EpochBasedRunner is used. + Default: True. + interval (int, optional): Logging interval (every k iterations). + Default: 10. + ignore_last (bool, optional): Ignore the log of last iterations in each + epoch if less than :attr:`interval`. Default: True. + reset_flag (bool, optional): Whether to clear the output buffer after + logging. Default: False. + interval_exp_name (int, optional): Logging interval for experiment + name. This feature is to help users conveniently get the experiment + information from screen or log file. Default: 1000. + out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. + If ``out_dir`` is specified, logs will be copied to a new directory + which is the concatenation of ``out_dir`` and the last level + directory of ``runner.work_dir``. Default: None. + `New in version 1.3.16.` + out_suffix (str or tuple[str], optional): Those filenames ending with + ``out_suffix`` will be copied to ``out_dir``. + Default: ('.log.json', '.log', '.py'). + `New in version 1.3.16.` + keep_local (bool, optional): Whether to keep local log when + :attr:`out_dir` is specified. If False, the local log will be + removed. Default: True. + `New in version 1.3.16.` + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + + def __init__( + self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000, + out_dir=None, + out_suffix=('.log.json', '.log', '.py'), + keep_local=True, + file_client_args=None, + ): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.by_epoch = by_epoch + self.time_sec_tot = 0 + self.interval_exp_name = interval_exp_name + + if out_dir is None and file_client_args is not None: + raise ValueError('file_client_args should be "None" when `out_dir` is not' 'specified.') + self.out_dir = out_dir + + if not (out_dir is None or isinstance(out_dir, str) or is_tuple_of(out_dir, str)): + raise TypeError('out_dir should be "None" or string or tuple of ' 'string, but got {out_dir}') + self.out_suffix = out_suffix + + self.keep_local = keep_local + self.file_client_args = file_client_args + if self.out_dir is not None: + self.file_client = FileClient.infer_client(file_client_args, self.out_dir) + + def before_run(self, runner): + super(TextLoggerHook, self).before_run(runner) + + if self.out_dir is not None: + self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) + # The final `self.out_dir` is the concatenation of `self.out_dir` + # and the last level directory of `runner.work_dir` + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + ( + f'Text logs will be saved to {self.out_dir} by ' + f'{self.file_client.name} after the training process.' + ) + ) + + self.start_iter = runner.iter + self.json_log_path = osp.join(runner.work_dir, f'{runner.timestamp}.log.json') + if runner.meta is not None: + self._dump_log(runner.meta, runner) + + def _get_max_memory(self, runner): + device = getattr(runner.model, 'output_device', None) + mem = torch.cuda.max_memory_allocated(device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], dtype=torch.int, device=device) + if runner.world_size > 1: + dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) + return mem_mb.item() + + def _log_info(self, log_dict, runner): + # print exp name for users to distinguish experiments + # at every ``interval_exp_name`` iterations and the end of each epoch + if runner.meta is not None and 'exp_name' in runner.meta: + if (self.every_n_iters(runner, self.interval_exp_name)) or (self.by_epoch and self.end_of_epoch(runner)): + exp_info = f'Exp name: {runner.meta["exp_name"]}' + runner.logger.info(exp_info) + + if log_dict['mode'] == 'train': + if isinstance(log_dict['lr'], dict): + lr_str = [] + for k, val in log_dict['lr'].items(): + lr_str.append(f'lr_{k}: {val:.3e}') + lr_str = ' '.join(lr_str) + else: + lr_str = f'lr: {log_dict["lr"]:.3e}' + + # by epoch: Epoch [4][100/1000] + # by iter: Iter [100/100000] + if self.by_epoch: + log_str = f'Epoch [{log_dict["epoch"]}]' f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + else: + log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' + log_str += f'{lr_str}, ' + + if 'time' in log_dict.keys(): + self.time_sec_tot += log_dict['time'] * self.interval + time_sec_avg = self.time_sec_tot / (runner.iter - self.start_iter + 1) + eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) + eta_str = str(datetime.timedelta(seconds=int(eta_sec))) + log_str += f'eta: {eta_str}, ' + log_str += f'time: {log_dict["time"]:.3f}, ' f'data_time: {log_dict["data_time"]:.3f}, ' + # statistic memory + if torch.cuda.is_available(): + log_str += f'memory: {log_dict["memory"]}, ' + else: + # val/test time + # here 1000 is the length of the val dataloader + # by epoch: Epoch[val] [4][1000] + # by iter: Iter[val] [1000] + if self.by_epoch: + log_str = f'Epoch({log_dict["mode"]}) ' f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' + else: + log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' + + log_items = [] + for name, val in log_dict.items(): + # TODO: resolve this hack + # these items have been in log_str + if name in ['mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', 'memory', 'epoch']: + continue + if isinstance(val, float): + val = f'{val:.4f}' + log_items.append(f'{name}: {val}') + log_str += ', '.join(log_items) + + runner.logger.info(log_str) + + def _dump_log(self, log_dict, runner): + # dump log in json format + json_log = OrderedDict() + for k, v in log_dict.items(): + json_log[k] = self._round_float(v) + # only append log at last line + if runner.rank == 0: + with open(self.json_log_path, 'a+') as f: + mmcv.dump(json_log, f, file_format='json') + f.write('\n') + + def _round_float(self, items): + if isinstance(items, list): + return [self._round_float(item) for item in items] + elif isinstance(items, float): + return round(items, 5) + else: + return items + + def log(self, runner): + if 'eval_iter_num' in runner.log_buffer.output: + # this doesn't modify runner.iter and is regardless of by_epoch + cur_iter = runner.log_buffer.output.pop('eval_iter_num') + else: + cur_iter = self.get_iter(runner, inner_iter=True) + + log_dict = OrderedDict(mode=self.get_mode(runner), epoch=self.get_epoch(runner), iter=cur_iter) + + # only record lr of the first param group + cur_lr = runner.current_lr() + if isinstance(cur_lr, list): + log_dict['lr'] = cur_lr[0] + else: + assert isinstance(cur_lr, dict) + log_dict['lr'] = {} + for k, lr_ in cur_lr.items(): + assert isinstance(lr_, list) + log_dict['lr'].update({k: lr_[0]}) + + if 'time' in runner.log_buffer.output: + # statistic memory + if torch.cuda.is_available(): + log_dict['memory'] = self._get_max_memory(runner) + + log_dict = dict(log_dict, **runner.log_buffer.output) + + self._log_info(log_dict, runner) + self._dump_log(log_dict, runner) + return log_dict + + def after_run(self, runner): + # copy or upload logs to self.out_dir + if self.out_dir is not None: + for filename in scandir(runner.work_dir, self.out_suffix, True): + local_filepath = osp.join(runner.work_dir, filename) + out_filepath = self.file_client.join_path(self.out_dir, filename) + with open(local_filepath, 'r') as f: + self.file_client.put_text(f.read(), out_filepath) + + runner.logger.info((f'The file {local_filepath} has been uploaded to ' f'{out_filepath}.')) + + if not self.keep_local: + os.remove(local_filepath) + runner.logger.info((f'{local_filepath} was removed due to the ' '`self.keep_local=False`')) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py new file mode 100644 index 000000000000..c94d8391711c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/logger/wandb.py @@ -0,0 +1,54 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class WandbLoggerHook(LoggerHook): + def __init__( + self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=False, + commit=True, + by_epoch=True, + with_step=True, + ): + super(WandbLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) + self.import_wandb() + self.init_kwargs = init_kwargs + self.commit = commit + self.with_step = with_step + + def import_wandb(self): + try: + import wandb + except ImportError: + raise ImportError('Please run "pip install wandb" to install wandb') + self.wandb = wandb + + @master_only + def before_run(self, runner): + super(WandbLoggerHook, self).before_run(runner) + if self.wandb is None: + self.import_wandb() + if self.init_kwargs: + self.wandb.init(**self.init_kwargs) + else: + self.wandb.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + if self.with_step: + self.wandb.log(tags, step=self.get_iter(runner), commit=self.commit) + else: + tags['global_step'] = self.get_iter(runner) + self.wandb.log(tags, commit=self.commit) + + @master_only + def after_run(self, runner): + self.wandb.join() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py new file mode 100644 index 000000000000..8f92871c64cd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/lr_updater.py @@ -0,0 +1,615 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from math import cos, pi + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .hook import HOOKS, Hook + + +class LrUpdaterHook(Hook): + """LR Scheduler in MMCV. + + Args: + by_epoch (bool): LR changes epoch by epoch + warmup (string): Type of warmup used. It can be None(use no warmup), + 'constant', 'linear' or 'exp' + warmup_iters (int): The number of iterations or epochs that warmup + lasts + warmup_ratio (float): LR used at the beginning of warmup equals to + warmup_ratio * initial_lr + warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters + means the number of epochs that warmup lasts, otherwise means the + number of iteration that warmup lasts + """ + + def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.1, warmup_by_epoch=False): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' + ) + if warmup is not None: + assert warmup_iters > 0, '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, '"warmup_ratio" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + self.warmup_by_epoch = warmup_by_epoch + + if self.warmup_by_epoch: + self.warmup_epochs = self.warmup_iters + self.warmup_iters = None + else: + self.warmup_epochs = None + + self.base_lr = [] # initial lr for all param groups + self.regular_lr = [] # expected lr if no warming up is performed + + def _set_lr(self, runner, lr_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, lr in zip(optim.param_groups, lr_groups[k]): + param_group['lr'] = lr + else: + for param_group, lr in zip(runner.optimizer.param_groups, lr_groups): + param_group['lr'] = lr + + def get_lr(self, runner, base_lr): + raise NotImplementedError + + def get_regular_lr(self, runner): + if isinstance(runner.optimizer, dict): + lr_groups = {} + for k in runner.optimizer.keys(): + _lr_group = [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr[k]] + lr_groups.update({k: _lr_group}) + + return lr_groups + else: + return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] + + def get_warmup_lr(self, cur_iters): + def _get_warmup_lr(cur_iters, regular_lr): + if self.warmup == 'constant': + warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) + warmup_lr = [_lr * (1 - k) for _lr in regular_lr] + elif self.warmup == 'exp': + k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) + warmup_lr = [_lr * k for _lr in regular_lr] + return warmup_lr + + if isinstance(self.regular_lr, dict): + lr_groups = {} + for key, regular_lr in self.regular_lr.items(): + lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr) + return lr_groups + else: + return _get_warmup_lr(cur_iters, self.regular_lr) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + group.setdefault('initial_lr', group['lr']) + _base_lr = [group['initial_lr'] for group in optim.param_groups] + self.base_lr.update({k: _base_lr}) + else: + for group in runner.optimizer.param_groups: + group.setdefault('initial_lr', group['lr']) + self.base_lr = [group['initial_lr'] for group in runner.optimizer.param_groups] + + def before_train_epoch(self, runner): + if self.warmup_iters is None: + epoch_len = len(runner.data_loader) + self.warmup_iters = self.warmup_epochs * epoch_len + + if not self.by_epoch: + return + + self.regular_lr = self.get_regular_lr(runner) + self._set_lr(runner, self.regular_lr) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_lr = self.get_regular_lr(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + + +@HOOKS.register_module() +class FixedLrUpdaterHook(LrUpdaterHook): + def __init__(self, **kwargs): + super(FixedLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + return base_lr + + +@HOOKS.register_module() +class StepLrUpdaterHook(LrUpdaterHook): + """Step LR scheduler with min_lr clipping. + + Args: + step (int | list[int]): Step to decay the LR. If an int value is given, + regard it as the decay interval. If a list is given, decay LR at + these steps. + gamma (float, optional): Decay LR ratio. Default: 0.1. + min_lr (float, optional): Minimum LR value to keep. If LR after decay + is lower than `min_lr`, it will be clipped to this value. If None + is given, we don't perform lr clipping. Default: None. + """ + + def __init__(self, step, gamma=0.1, min_lr=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_lr = min_lr + super(StepLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + lr = base_lr * (self.gamma ** exp) + if self.min_lr is not None: + # clip to a minimum value + lr = max(lr, self.min_lr) + return lr + + +@HOOKS.register_module() +class ExpLrUpdaterHook(LrUpdaterHook): + def __init__(self, gamma, **kwargs): + self.gamma = gamma + super(ExpLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * self.gamma ** progress + + +@HOOKS.register_module() +class PolyLrUpdaterHook(LrUpdaterHook): + def __init__(self, power=1.0, min_lr=0.0, **kwargs): + self.power = power + self.min_lr = min_lr + super(PolyLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + coeff = (1 - progress / max_progress) ** self.power + return (base_lr - self.min_lr) * coeff + self.min_lr + + +@HOOKS.register_module() +class InvLrUpdaterHook(LrUpdaterHook): + def __init__(self, gamma, power=1.0, **kwargs): + self.gamma = gamma + self.power = power + super(InvLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * (1 + self.gamma * progress) ** (-self.power) + + +@HOOKS.register_module() +class CosineAnnealingLrUpdaterHook(LrUpdaterHook): + def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): + """Flat + Cosine lr schedule. + + Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501 + + Args: + start_percent (float): When to start annealing the learning rate + after the percentage of the total training steps. + The value should be in range [0, 1). + Default: 0.75 + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, start_percent=0.75, min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + if start_percent < 0 or start_percent > 1 or not isinstance(start_percent, float): + raise ValueError('expected float between 0 and 1 start_percent, but ' f'got {start_percent}') + self.start_percent = start_percent + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + start = round(runner.max_epochs * self.start_percent) + progress = runner.epoch - start + max_progress = runner.max_epochs - start + else: + start = round(runner.max_iters * self.start_percent) + progress = runner.iter - start + max_progress = runner.max_iters - start + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + if progress < 0: + return base_lr + else: + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class CosineRestartLrUpdaterHook(LrUpdaterHook): + """Cosine annealing with restarts learning rate scheme. + + Args: + periods (list[int]): Periods for each cosine anneling cycle. + restart_weights (list[float], optional): Restart weights at each + restart iteration. Default: [1]. + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, periods, restart_weights=[1], min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.periods = periods + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + self.restart_weights = restart_weights + assert len(self.periods) == len( + self.restart_weights + ), 'periods and restart_weights should have the same length.' + super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) + + self.cumulative_periods = [sum(self.periods[0 : i + 1]) for i in range(0, len(self.periods))] + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + else: + progress = runner.iter + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + idx = get_position_from_periods(progress, self.cumulative_periods) + current_weight = self.restart_weights[idx] + nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] + current_periods = self.periods[idx] + + alpha = min((progress - nearest_restart) / current_periods, 1) + return annealing_cos(base_lr, target_lr, alpha, current_weight) + + +def get_position_from_periods(iteration, cumulative_periods): + """Get the position from a period list. + + It will return the index of the right-closest number in the period list. + For example, the cumulative_periods = [100, 200, 300, 400], + if iteration == 50, return 0; + if iteration == 210, return 2; + if iteration == 300, return 3. + + Args: + iteration (int): Current iteration. + cumulative_periods (list[int]): Cumulative period list. + + Returns: + int: The position of the right-closest number in the period list. + """ + for i, period in enumerate(cumulative_periods): + if iteration < period: + return i + raise ValueError(f'Current iteration {iteration} exceeds ' f'cumulative_periods {cumulative_periods}') + + +@HOOKS.register_module() +class CyclicLrUpdaterHook(LrUpdaterHook): + """Cyclic LR Scheduler. + + Implement the cyclical learning rate policy (CLR) described in + https://arxiv.org/pdf/1506.01186.pdf + + Different from the original paper, we use cosine annealing rather than + triangular policy inside a cycle. This improves the performance in the + 3D detection area. + + Args: + by_epoch (bool): Whether to update LR by epoch. + target_ratio (tuple[float]): Relative ratio of the highest LR and the + lowest LR to the initial LR. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of LR in + the total cycle. + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. Default: 'cos'. + """ + + def __init__( + self, + by_epoch=False, + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + anneal_strategy='cos', + **kwargs, + ): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.lr_phases = [] # init lr_phases + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + + assert not by_epoch, 'currently only support "by_epoch" = False' + super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicLrUpdaterHook, self).before_run(runner) + # initiate lr_phases + # total lr_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.lr_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.lr_phases.append( + [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] + ) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.lr_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return self.anneal_func(base_lr * start_ratio, base_lr * end_ratio, progress / (end_iter - start_iter)) + + +@HOOKS.register_module() +class OneCycleLrUpdaterHook(LrUpdaterHook): + """One Cycle LR Scheduler. + + The 1cycle learning rate policy changes the learning rate after every + batch. The one cycle learning rate policy is described in + https://arxiv.org/pdf/1708.07120.pdf + + Args: + max_lr (float or list): Upper learning rate boundaries in the cycle + for each parameter group. + total_steps (int, optional): The total number of steps in the cycle. + Note that if a value is not provided here, it will be the max_iter + of runner. Default: None. + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + div_factor (float): Determines the initial learning rate via + initial_lr = max_lr/div_factor + Default: 25 + final_div_factor (float): Determines the minimum learning rate via + min_lr = initial_lr/final_div_factor + Default: 1e4 + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__( + self, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy='cos', + div_factor=25, + final_div_factor=1e4, + three_phase=False, + **kwargs, + ): + # validate by_epoch, currently only support by_epoch = False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' + if not isinstance(max_lr, (numbers.Number, list, dict)): + raise ValueError('the type of max_lr must be the one of list or ' f'dict, but got {type(max_lr)}') + self._max_lr = max_lr + if total_steps is not None: + if not isinstance(total_steps, int): + raise ValueError('the type of total_steps must be int, but' f'got {type(total_steps)}') + self.total_steps = total_steps + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('expected float between 0 and 1 pct_start, but ' f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.div_factor = div_factor + self.final_div_factor = final_div_factor + self.three_phase = three_phase + self.lr_phases = [] # init lr_phases + super(OneCycleLrUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if hasattr(self, 'total_steps'): + total_steps = self.total_steps + else: + total_steps = runner.max_iters + if total_steps < runner.max_iters: + raise ValueError( + 'The total steps must be greater than or equal to max ' + f'iterations {runner.max_iters} of runner, but total steps ' + f'is {total_steps}.' + ) + + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + _max_lr = format_param(k, optim, self._max_lr) + self.base_lr[k] = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(optim.param_groups, self.base_lr[k]): + group.setdefault('initial_lr', lr) + else: + k = type(runner.optimizer).__name__ + _max_lr = format_param(k, runner.optimizer, self._max_lr) + self.base_lr = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(runner.optimizer.param_groups, self.base_lr): + group.setdefault('initial_lr', lr) + + if self.three_phase: + self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1]) + self.lr_phases.append([total_steps - 1, 1, 1 / self.final_div_factor]) + else: + self.lr_phases.append([float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([total_steps - 1, self.div_factor, 1 / self.final_div_factor]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + start_iter = 0 + for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): + if curr_iter <= end_iter: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, pct) + break + start_iter = end_iter + return lr + + +def annealing_cos(start, end, factor, weight=1): + """Calculate annealing cos learning rate. + + Cosine anneal from `weight * start + (1 - weight) * end` to `end` as + percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the cosine annealing. + end (float): The ending learing rate of the cosine annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + weight (float, optional): The combination factor of `start` and `end` + when calculating the actual starting learning rate. Default to 1. + """ + cos_out = cos(pi * factor) + 1 + return end + 0.5 * weight * (start - end) * cos_out + + +def annealing_linear(start, end, factor): + """Calculate annealing linear learning rate. + + Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the linear annealing. + end (float): The ending learing rate of the linear annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + """ + return start + (end - start) * factor + + +def format_param(name, optim, param): + if isinstance(param, numbers.Number): + return [param] * len(optim.param_groups) + elif isinstance(param, (list, tuple)): # multi param groups + if len(param) != len(optim.param_groups): + raise ValueError(f'expected {len(optim.param_groups)} ' f'values for {name}, got {len(param)}') + return param + else: # multi optimizers + if name not in param: + raise KeyError(f'{name} is not found in {param.keys()}') + return param[name] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py new file mode 100644 index 000000000000..d483c16b512c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/memory.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class EmptyCacheHook(Hook): + def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): + self._before_epoch = before_epoch + self._after_epoch = after_epoch + self._after_iter = after_iter + + def after_iter(self, runner): + if self._after_iter: + torch.cuda.empty_cache() + + def before_epoch(self, runner): + if self._before_epoch: + torch.cuda.empty_cache() + + def after_epoch(self, runner): + if self._after_epoch: + torch.cuda.empty_cache() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py new file mode 100644 index 000000000000..b366fa8e6817 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/momentum_updater.py @@ -0,0 +1,421 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .hook import HOOKS, Hook +from .lr_updater import annealing_cos, annealing_linear, format_param + + +class MomentumUpdaterHook(Hook): + def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.9): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"' + ) + if warmup is not None: + assert warmup_iters > 0, '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, '"warmup_momentum" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + + self.base_momentum = [] # initial momentum for all param groups + self.regular_momentum = [] # expected momentum if no warming up is performed + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, base_momentum): + raise NotImplementedError + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k in runner.optimizer.keys(): + _momentum_group = [ + self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum[k] + ] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + return [self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum] + + def get_warmup_momentum(self, cur_iters): + def _get_warmup_momentum(cur_iters, regular_momentum): + if self.warmup == 'constant': + warmup_momentum = [_momentum / self.warmup_ratio for _momentum in self.regular_momentum] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) + warmup_momentum = [_momentum / (1 - k) for _momentum in self.regular_mom] + elif self.warmup == 'exp': + k = self.warmup_ratio ** (1 - cur_iters / self.warmup_iters) + warmup_momentum = [_momentum / k for _momentum in self.regular_mom] + return warmup_momentum + + if isinstance(self.regular_momentum, dict): + momentum_groups = {} + for key, regular_momentum in self.regular_momentum.items(): + momentum_groups[key] = _get_warmup_momentum(cur_iters, regular_momentum) + return momentum_groups + else: + return _get_warmup_momentum(cur_iters, self.regular_momentum) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, + # if 'initial_momentum' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_momentum = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + _base_momentum = [group['initial_momentum'] for group in optim.param_groups] + self.base_momentum.update({k: _base_momentum}) + else: + for group in runner.optimizer.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + self.base_momentum = [group['initial_momentum'] for group in runner.optimizer.param_groups] + + def before_train_epoch(self, runner): + if not self.by_epoch: + return + self.regular_mom = self.get_regular_momentum(runner) + self._set_momentum(runner, self.regular_mom) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_mom = self.get_regular_momentum(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + + +@HOOKS.register_module() +class StepMomentumUpdaterHook(MomentumUpdaterHook): + """Step momentum scheduler with min value clipping. + + Args: + step (int | list[int]): Step to decay the momentum. If an int value is + given, regard it as the decay interval. If a list is given, decay + momentum at these steps. + gamma (float, optional): Decay momentum ratio. Default: 0.5. + min_momentum (float, optional): Minimum momentum value to keep. If + momentum after decay is lower than this value, it will be clipped + accordingly. If None is given, we don't perform lr clipping. + Default: None. + """ + + def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_momentum = min_momentum + super(StepMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + momentum = base_momentum * (self.gamma ** exp) + if self.min_momentum is not None: + # clip to a minimum value + momentum = max(momentum, self.min_momentum) + return momentum + + +@HOOKS.register_module() +class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): + def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): + assert (min_momentum is None) ^ (min_momentum_ratio is None) + self.min_momentum = min_momentum + self.min_momentum_ratio = min_momentum_ratio + super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + if self.min_momentum_ratio is not None: + target_momentum = base_momentum * self.min_momentum_ratio + else: + target_momentum = self.min_momentum + return annealing_cos(base_momentum, target_momentum, progress / max_progress) + + +@HOOKS.register_module() +class CyclicMomentumUpdaterHook(MomentumUpdaterHook): + """Cyclic momentum Scheduler. + + Implement the cyclical momentum scheduler policy described in + https://arxiv.org/pdf/1708.07120.pdf + + This momentum scheduler usually used together with the CyclicLRUpdater + to improve the performance in the 3D detection area. + + Attributes: + target_ratio (tuple[float]): Relative ratio of the lowest momentum and + the highest momentum to the initial momentum. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of momentum + in the total cycle. + by_epoch (bool): Whether to update momentum by epoch. + """ + + def __init__(self, by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4, **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.momentum_phases = [] # init momentum_phases + # currently only support by_epoch=False + assert not by_epoch, 'currently only support "by_epoch" = False' + super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicMomentumUpdaterHook, self).before_run(runner) + # initiate momentum_phases + # total momentum_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.momentum_phases.append([0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.momentum_phases.append( + [iter_up_phase, max_iter_per_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1]] + ) + + def get_momentum(self, runner, base_momentum): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, end_ratio) in self.momentum_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return annealing_cos( + base_momentum * start_ratio, base_momentum * end_ratio, progress / (end_iter - start_iter) + ) + + +@HOOKS.register_module() +class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): + """OneCycle momentum Scheduler. + + This momentum scheduler usually used together with the OneCycleLrUpdater + to improve the performance. + + Args: + base_momentum (float or list): Lower momentum boundaries in the cycle + for each parameter group. Note that momentum is cycled inversely + to learning rate; at the peak of a cycle, momentum is + 'base_momentum' and learning rate is 'max_lr'. + Default: 0.85 + max_momentum (float or list): Upper momentum boundaries in the cycle + for each parameter group. Functionally, + it defines the cycle amplitude (max_momentum - base_momentum). + Note that momentum is cycled inversely + to learning rate; at the start of a cycle, momentum is + 'max_momentum' and learning rate is 'base_lr' + Default: 0.95 + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__( + self, base_momentum=0.85, max_momentum=0.95, pct_start=0.3, anneal_strategy='cos', three_phase=False, **kwargs + ): + # validate by_epoch, currently only support by_epoch=False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], 'currently only support "by_epoch" = False' + if not isinstance(base_momentum, (float, list, dict)): + raise ValueError('base_momentum must be the type among of float,' 'list or dict.') + self._base_momentum = base_momentum + if not isinstance(max_momentum, (float, list, dict)): + raise ValueError('max_momentum must be the type among of float,' 'list or dict.') + self._max_momentum = max_momentum + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('Expected float between 0 and 1 pct_start, but ' f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must by one of "cos" or ' f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.three_phase = three_phase + self.momentum_phases = [] # init momentum_phases + super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: + raise ValueError('optimizer must support momentum with' 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + else: + optim = runner.optimizer + if 'momentum' not in optim.defaults and 'betas' not in optim.defaults: + raise ValueError('optimizer must support momentum with' 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + k = type(optim).__name__ + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + + if self.three_phase: + self.momentum_phases.append( + { + 'end_iter': float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'base_momentum', + } + ) + self.momentum_phases.append( + { + 'end_iter': float(2 * self.pct_start * runner.max_iters) - 2, + 'start_momentum': 'base_momentum', + 'end_momentum': 'max_momentum', + } + ) + self.momentum_phases.append( + {'end_iter': runner.max_iters - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'max_momentum'} + ) + else: + self.momentum_phases.append( + { + 'end_iter': float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'base_momentum', + } + ) + self.momentum_phases.append( + {'end_iter': runner.max_iters - 1, 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum'} + ) + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, param_group): + curr_iter = runner.iter + start_iter = 0 + for i, phase in enumerate(self.momentum_phases): + end_iter = phase['end_iter'] + if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + momentum = self.anneal_func( + param_group[phase['start_momentum']], param_group[phase['end_momentum']], pct + ) + break + start_iter = end_iter + return momentum + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k, optim in runner.optimizer.items(): + _momentum_group = [self.get_momentum(runner, param_group) for param_group in optim.param_groups] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + momentum_groups = [] + for param_group in runner.optimizer.param_groups: + momentum_groups.append(self.get_momentum(runner, param_group)) + return momentum_groups diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py new file mode 100644 index 000000000000..03090c2e97ff --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/optimizer.py @@ -0,0 +1,461 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + from torch.cuda.amp import GradScaler +except ImportError: + pass + + +@HOOKS.register_module() +class OptimizerHook(Hook): + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list(filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + runner.optimizer.step() + + +@HOOKS.register_module() +class GradientCumulativeOptimizerHook(OptimizerHook): + """Optimizer Hook implements multi-iters gradient cumulating. + + Args: + cumulative_iters (int, optional): Num of gradient cumulative iters. + The optimizer will step every `cumulative_iters` iters. + Defaults to 1. + + Examples: + >>> # Use cumulative_iters to simulate a large batch size + >>> # It is helpful when the hardware cannot handle a large batch size. + >>> loader = DataLoader(data, batch_size=64) + >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4) + >>> # almost equals to + >>> loader = DataLoader(data, batch_size=256) + >>> optim_hook = OptimizerHook() + """ + + def __init__(self, cumulative_iters=1, **kwargs): + super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) + + assert isinstance(cumulative_iters, int) and cumulative_iters > 0, ( + f'cumulative_iters only accepts positive int, but got ' f'{type(cumulative_iters)} instead.' + ) + + self.cumulative_iters = cumulative_iters + self.divisible_iters = 0 + self.remainder_iters = 0 + self.initialized = False + + def has_batch_norm(self, module): + if isinstance(module, _BatchNorm): + return True + for m in module.children(): + if self.has_batch_norm(m): + return True + return False + + def _init(self, runner): + if runner.iter % self.cumulative_iters != 0: + runner.logger.warning( + 'Resume iter number is not divisible by cumulative_iters in ' + 'GradientCumulativeOptimizerHook, which means the gradient of ' + 'some iters is lost and the result may be influenced slightly.' + ) + + if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: + runner.logger.warning( + 'GradientCumulativeOptimizerHook may slightly decrease ' + 'performance if the model has BatchNorm layers.' + ) + + residual_iters = runner.max_iters - runner.iter + + self.divisible_iters = residual_iters // self.cumulative_iters * self.cumulative_iters + self.remainder_iters = residual_iters - self.divisible_iters + + self.initialized = True + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + loss.backward() + + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + runner.optimizer.step() + runner.optimizer.zero_grad() + + +if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0'): + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (using PyTorch's implementation). + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of GradScalar. + Defaults to 512. For Pytorch >= 1.6, mmcv uses official + implementation of GradScaler. If you use a dict version of + loss_scale to create GradScaler, please refer to: + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler + for the parameters. + + Examples: + >>> loss_scale = dict( + ... init_scale=65536.0, + ... growth_factor=2.0, + ... backoff_factor=0.5, + ... growth_interval=2000 + ... ) + >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) + """ + + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + self._scale_update_param = None + if loss_scale == 'dynamic': + self.loss_scaler = GradScaler() + elif isinstance(loss_scale, float): + self._scale_update_param = loss_scale + self.loss_scaler = GradScaler(init_scale=loss_scale) + elif isinstance(loss_scale, dict): + self.loss_scaler = GradScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training.""" + # wrap model mode to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer to + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler. + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients. + 3. Unscale the optimizer’s gradient tensors. + 4. Call optimizer.step() and update scale factor. + 5. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + + self.loss_scaler.scale(runner.outputs['loss']).backward() + self.loss_scaler.unscale_(runner.optimizer) + # grad clip + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): + """Fp16 optimizer Hook (using PyTorch's implementation) implements + multi-iters gradient cumulating. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + """ + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + + self.loss_scaler.scale(loss).backward() + + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): + + # copy fp16 grads in the model to fp32 params in the optimizer + self.loss_scaler.unscale_(runner.optimizer) + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() + + +else: + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (mmcv's implementation). + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy(runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] for g in runner.optimizer.param_groups)), + ) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + 6. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') + + # save state_dict of loss_scaler + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): + """Fp16 optimizer Hook (using mmcv implementation) implements multi- + iters gradient cumulating.""" + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + + loss = runner.outputs['loss'] + loss = loss / loss_factor + + # scale the loss value + scaled_loss = loss * self.loss_scaler.loss_scale + scaled_loss.backward() + + if self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner): + + # copy fp16 grads in the model to fp32 params in the optimizer + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + else: + runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') + + self.loss_scaler.update_scale(has_overflow) + + # save state_dict of loss_scaler + runner.meta.setdefault('fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py new file mode 100644 index 000000000000..ad58c981b2be --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/profiler.py @@ -0,0 +1,174 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from typing import Callable, List, Optional, Union + +import torch + +from ..dist_utils import master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ProfilerHook(Hook): + """Profiler to analyze performance during training. + + PyTorch Profiler is a tool that allows the collection of the performance + metrics during the training. More details on Profiler can be found at + https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile + + Args: + by_epoch (bool): Profile performance by epoch or by iteration. + Default: True. + profile_iters (int): Number of iterations for profiling. + If ``by_epoch=True``, profile_iters indicates that they are the + first profile_iters epochs at the beginning of the + training, otherwise it indicates the first profile_iters + iterations. Default: 1. + activities (list[str]): List of activity groups (CPU, CUDA) to use in + profiling. Default: ['cpu', 'cuda']. + schedule (dict, optional): Config of generating the callable schedule. + if schedule is None, profiler will not add step markers into the + trace and table view. Default: None. + on_trace_ready (callable, dict): Either a handler or a dict of generate + handler. Default: None. + record_shapes (bool): Save information about operator's input shapes. + Default: False. + profile_memory (bool): Track tensor memory allocation/deallocation. + Default: False. + with_stack (bool): Record source information (file and line number) + for the ops. Default: False. + with_flops (bool): Use formula to estimate the FLOPS of specific + operators (matrix multiplication and 2D convolution). + Default: False. + json_trace_path (str, optional): Exports the collected trace in Chrome + JSON format. Default: None. + + Example: + >>> runner = ... # instantiate a Runner + >>> # tensorboard trace + >>> trace_config = dict(type='tb_trace', dir_name='work_dir') + >>> profiler_config = dict(on_trace_ready=trace_config) + >>> runner.register_profiler_hook(profiler_config) + >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) + """ + + def __init__( + self, + by_epoch: bool = True, + profile_iters: int = 1, + activities: List[str] = ['cpu', 'cuda'], + schedule: Optional[dict] = None, + on_trace_ready: Optional[Union[Callable, dict]] = None, + record_shapes: bool = False, + profile_memory: bool = False, + with_stack: bool = False, + with_flops: bool = False, + json_trace_path: Optional[str] = None, + ) -> None: + try: + from torch import profiler # torch version >= 1.8.1 + except ImportError: + raise ImportError('profiler is the new feature of torch1.8.1, ' f'but your version is {torch.__version__}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' + self.by_epoch = by_epoch + + if profile_iters < 1: + raise ValueError('profile_iters should be greater than 0, but got ' f'{profile_iters}') + self.profile_iters = profile_iters + + if not isinstance(activities, list): + raise ValueError(f'activities should be list, but got {type(activities)}') + self.activities = [] + for activity in activities: + activity = activity.lower() + if activity == 'cpu': + self.activities.append(profiler.ProfilerActivity.CPU) + elif activity == 'cuda': + self.activities.append(profiler.ProfilerActivity.CUDA) + else: + raise ValueError(f'activity should be "cpu" or "cuda", but got {activity}') + + if schedule is not None: + self.schedule = profiler.schedule(**schedule) + else: + self.schedule = None + + self.on_trace_ready = on_trace_ready + self.record_shapes = record_shapes + self.profile_memory = profile_memory + self.with_stack = with_stack + self.with_flops = with_flops + self.json_trace_path = json_trace_path + + @master_only + def before_run(self, runner): + if self.by_epoch and runner.max_epochs < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_epochs}') + + if not self.by_epoch and runner.max_iters < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_iters}') + + if callable(self.on_trace_ready): # handler + _on_trace_ready = self.on_trace_ready + elif isinstance(self.on_trace_ready, dict): # config of handler + trace_cfg = self.on_trace_ready.copy() + trace_type = trace_cfg.pop('type') # log_trace handler + if trace_type == 'log_trace': + + def _log_handler(prof): + print(prof.key_averages().table(**trace_cfg)) + + _on_trace_ready = _log_handler + elif trace_type == 'tb_trace': # tensorboard_trace handler + try: + import torch_tb_profiler # noqa: F401 + except ImportError: + raise ImportError('please run "pip install ' 'torch-tb-profiler" to install ' 'torch_tb_profiler') + _on_trace_ready = torch.profiler.tensorboard_trace_handler(**trace_cfg) + else: + raise ValueError('trace_type should be "log_trace" or ' f'"tb_trace", but got {trace_type}') + elif self.on_trace_ready is None: + _on_trace_ready = None # type: ignore + else: + raise ValueError('on_trace_ready should be handler, dict or None, ' f'but got {type(self.on_trace_ready)}') + + if runner.max_epochs > 1: + warnings.warn( + f'profiler will profile {runner.max_epochs} epochs ' + 'instead of 1 epoch. Since profiler will slow down ' + 'the training, it is recommended to train 1 epoch ' + 'with ProfilerHook and adjust your setting according' + ' to the profiler summary. During normal training ' + '(epoch > 1), you may disable the ProfilerHook.' + ) + + self.profiler = torch.profiler.profile( + activities=self.activities, + schedule=self.schedule, + on_trace_ready=_on_trace_ready, + record_shapes=self.record_shapes, + profile_memory=self.profile_memory, + with_stack=self.with_stack, + with_flops=self.with_flops, + ) + + self.profiler.__enter__() + runner.logger.info('profiler is profiling...') + + @master_only + def after_train_epoch(self, runner): + if self.by_epoch and runner.epoch == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) + + @master_only + def after_train_iter(self, runner): + self.profiler.step() + if not self.by_epoch and runner.iter == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py new file mode 100644 index 000000000000..ee0dc6bdd8df --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sampler_seed.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class DistSamplerSeedHook(Hook): + """Data-loading sampler for distributed training. + + When distributed training, it is only useful in conjunction with + :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same + purpose with :obj:`IterLoader`. + """ + + def before_epoch(self, runner): + if hasattr(runner.data_loader.sampler, 'set_epoch'): + # in case the data loader uses `SequentialSampler` in Pytorch + runner.data_loader.sampler.set_epoch(runner.epoch) + elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): + # batch sampler in pytorch warps the sampler as its attributes. + runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py new file mode 100644 index 000000000000..6376b7ff8942 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/hooks/sync_buffer.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..dist_utils import allreduce_params +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class SyncBuffersHook(Hook): + """Synchronize model buffers such as running_mean and running_var in BN at + the end of each epoch. + + Args: + distributed (bool): Whether distributed training is used. It is + effective only for distributed training. Defaults to True. + """ + + def __init__(self, distributed=True): + self.distributed = distributed + + def after_epoch(self, runner): + """All-reduce model buffers at the end of each epoch.""" + if self.distributed: + allreduce_params(runner.model.buffers()) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py new file mode 100644 index 000000000000..f73f8ca649f1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/iter_based_runner.py @@ -0,0 +1,256 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch +from torch.optim import Optimizer + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .hooks import IterTimerHook +from .utils import get_host_info + + +class IterLoader: + def __init__(self, dataloader): + self._dataloader = dataloader + self.iter_loader = iter(self._dataloader) + self._epoch = 0 + + @property + def epoch(self): + return self._epoch + + def __next__(self): + try: + data = next(self.iter_loader) + except StopIteration: + self._epoch += 1 + if hasattr(self._dataloader.sampler, 'set_epoch'): + self._dataloader.sampler.set_epoch(self._epoch) + time.sleep(2) # Prevent possible deadlock during epoch transition + self.iter_loader = iter(self._dataloader) + data = next(self.iter_loader) + + return data + + def __len__(self): + return len(self._dataloader) + + +@RUNNERS.register_module() +class IterBasedRunner(BaseRunner): + """Iteration-based Runner. + + This runner train models iteration by iteration. + """ + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._epoch = data_loader.epoch + data_batch = next(data_loader) + self.call_hook('before_train_iter') + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.train_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_train_iter') + self._inner_iter += 1 + self._iter += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + data_batch = next(data_loader) + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.val_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_val_iter') + self._inner_iter += 1 + + def run(self, data_loaders, workflow, max_iters=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, iters) to specify the + running order and iterations. E.g, [('train', 10000), + ('val', 1000)] means running 10000 iterations for training and + 1000 iterations for validation, iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_iters is not None: + warnings.warn( + 'setting max_iters in run is deprecated, ' 'please set max_iters in runner_config', DeprecationWarning + ) + self._max_iters = max_iters + assert self._max_iters is not None, 'max_iters must be specified during instantiation' + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) + self.logger.info('workflow: %s, max: %d iters', workflow, self._max_iters) + self.call_hook('before_run') + + iter_loaders = [IterLoader(x) for x in data_loaders] + + self.call_hook('before_epoch') + + while self.iter < self._max_iters: + for i, flow in enumerate(workflow): + self._inner_iter = 0 + mode, iters = flow + if not isinstance(mode, str) or not hasattr(self, mode): + raise ValueError('runner has no method named "{}" to run a workflow'.format(mode)) + iter_runner = getattr(self, mode) + for _ in range(iters): + if mode == 'train' and self.iter >= self._max_iters: + break + iter_runner(iter_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_epoch') + self.call_hook('after_run') + + def resume(self, checkpoint, resume_optimizer=True, map_location='default'): + """Resume model from checkpoint. + + Args: + checkpoint (str): Checkpoint to resume from. + resume_optimizer (bool, optional): Whether resume the optimizer(s) + if the checkpoint file includes optimizer(s). Default to True. + map_location (str, optional): Same as :func:`torch.load`. + Default to 'default'. + """ + if map_location == 'default': + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint(checkpoint, map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + self._inner_iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict(checkpoint['optimizer'][k]) + else: + raise TypeError('Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') + + self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') + + def save_checkpoint( + self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True + ): + """Save checkpoint to file. + + Args: + out_dir (str): Directory to save checkpoint files. + filename_tmpl (str, optional): Checkpoint file template. + Defaults to 'iter_{}.pth'. + meta (dict, optional): Metadata to be saved in checkpoint. + Defaults to None. + save_optimizer (bool, optional): Whether save optimizer. + Defaults to True. + create_symlink (bool, optional): Whether create symlink to the + latest checkpoint file. Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.iter + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + def register_training_hooks( + self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + custom_hooks_config=None, + ): + """Register default hooks for iter-based training. + + Checkpoint hook, optimizer stepper hook and logger hooks will be set to + `by_epoch=False` by default. + + Default hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + if checkpoint_config is not None: + checkpoint_config.setdefault('by_epoch', False) + if lr_config is not None: + lr_config.setdefault('by_epoch', False) + if log_config is not None: + for info in log_config['hooks']: + info.setdefault('by_epoch', False) + super(IterBasedRunner, self).register_training_hooks( + lr_config=lr_config, + momentum_config=momentum_config, + optimizer_config=optimizer_config, + checkpoint_config=checkpoint_config, + log_config=log_config, + timer_config=IterTimerHook(), + custom_hooks_config=custom_hooks_config, + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py new file mode 100644 index 000000000000..5a08dfb3b937 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/log_buffer.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict + +import numpy as np + + +class LogBuffer: + def __init__(self): + self.val_history = OrderedDict() + self.n_history = OrderedDict() + self.output = OrderedDict() + self.ready = False + + def clear(self): + self.val_history.clear() + self.n_history.clear() + self.clear_output() + + def clear_output(self): + self.output.clear() + self.ready = False + + def update(self, vars, count=1): + assert isinstance(vars, dict) + for key, var in vars.items(): + if key not in self.val_history: + self.val_history[key] = [] + self.n_history[key] = [] + self.val_history[key].append(var) + self.n_history[key].append(count) + + def average(self, n=0): + """Average latest n values or all values.""" + assert n >= 0 + for key in self.val_history: + values = np.array(self.val_history[key][-n:]) + nums = np.array(self.n_history[key][-n:]) + avg = np.sum(values * nums) / np.sum(nums) + self.output[key] = avg + self.ready = True diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py new file mode 100644 index 000000000000..c5a0041381c9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, build_optimizer_constructor +from .default_constructor import DefaultOptimizerConstructor + +__all__ = [ + 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', + 'DefaultOptimizerConstructor', + 'build_optimizer', + 'build_optimizer_constructor', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py new file mode 100644 index 000000000000..d305b1a6eadd --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/builder.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import inspect + +import torch + +from ...utils import Registry, build_from_cfg + +OPTIMIZERS = Registry('optimizer') +OPTIMIZER_BUILDERS = Registry('optimizer builder') + + +def register_torch_optimizers(): + torch_optimizers = [] + for module_name in dir(torch.optim): + if module_name.startswith('__'): + continue + _optim = getattr(torch.optim, module_name) + if inspect.isclass(_optim) and issubclass(_optim, torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + return torch_optimizers + + +TORCH_OPTIMIZERS = register_torch_optimizers() + + +def build_optimizer_constructor(cfg): + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict(type=constructor_type, optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg) + ) + optimizer = optim_constructor(model) + return optimizer diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py new file mode 100644 index 000000000000..c0721ccad28f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/optimizer/default_constructor.py @@ -0,0 +1,246 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +from torch.nn import GroupNorm, LayerNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + _BatchNorm, + _InstanceNorm, + build_from_cfg, + is_list_of, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.ext_loader import check_ops_exist + +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS + + +@OPTIMIZER_BUILDERS.register_module() +class DefaultOptimizerConstructor: + """Default constructor for optimizers. + + By default each parameter share the same optimizer settings, and we + provide an argument ``paramwise_cfg`` to specify parameter-wise settings. + It is a dict and may contain the following fields: + + - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If + one of the keys in ``custom_keys`` is a substring of the name of one + parameter, then the setting of the parameter will be specified by + ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will + be ignored. It should be noted that the aforementioned ``key`` is the + longest key that is a substring of the name of the parameter. If there + are multiple matched keys with the same length, then the key with lower + alphabet order will be chosen. + ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` + and ``decay_mult``. See Example 2 below. + - ``bias_lr_mult`` (float): It will be multiplied to the learning + rate for all bias parameters (except for those in normalization + layers and offset layers of DCN). + - ``bias_decay_mult`` (float): It will be multiplied to the weight + decay for all bias parameters (except for those in + normalization layers, depthwise conv layers, offset layers of DCN). + - ``norm_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of normalization + layers. + - ``dwconv_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of depthwise conv + layers. + - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning + rate for parameters of offset layer in the deformable convs + of a model. + - ``bypass_duplicate`` (bool): If true, the duplicate parameters + would not be added into optimizer. Default: False. + + Note: + 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will + override the effect of ``bias_lr_mult`` in the bias of offset + layer. So be careful when using both ``bias_lr_mult`` and + ``dcn_offset_lr_mult``. If you wish to apply both of them to the + offset layer in deformable convs, set ``dcn_offset_lr_mult`` + to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. + 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will + apply it to all the DCN layers in the model. So be careful when + the model contains multiple DCN layers in places other than + backbone. + + Args: + model (:obj:`nn.Module`): The model with parameters to be optimized. + optimizer_cfg (dict): The config dict of the optimizer. + Positional fields are + + - `type`: class name of the optimizer. + + Optional fields are + + - any arguments of the corresponding optimizer type, e.g., + lr, weight_decay, momentum, etc. + paramwise_cfg (dict, optional): Parameter-wise options. + + Example 1: + >>> model = torch.nn.modules.Conv1d(1, 1, 1) + >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, + >>> weight_decay=0.0001) + >>> paramwise_cfg = dict(norm_decay_mult=0.) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + + Example 2: + >>> # assume model have attribute model.backbone and model.cls_head + >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) + >>> paramwise_cfg = dict(custom_keys={ + '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + >>> # Then the `lr` and `weight_decay` for model.backbone is + >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for + >>> # model.cls_head is (0.01, 0.95). + """ + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + if not isinstance(optimizer_cfg, dict): + raise TypeError('optimizer_cfg should be a dict', f'but got {type(optimizer_cfg)}') + self.optimizer_cfg = optimizer_cfg + self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg + self.base_lr = optimizer_cfg.get('lr', None) + self.base_wd = optimizer_cfg.get('weight_decay', None) + self._validate_cfg() + + def _validate_cfg(self): + if not isinstance(self.paramwise_cfg, dict): + raise TypeError('paramwise_cfg should be None or a dict, ' f'but got {type(self.paramwise_cfg)}') + + if 'custom_keys' in self.paramwise_cfg: + if not isinstance(self.paramwise_cfg['custom_keys'], dict): + raise TypeError( + 'If specified, custom_keys must be a dict, ' f'but got {type(self.paramwise_cfg["custom_keys"])}' + ) + if self.base_wd is None: + for key in self.paramwise_cfg['custom_keys']: + if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: + raise ValueError('base_wd should not be None') + + # get base lr and weight decay + # weight_decay must be explicitly specified if mult is specified + if ( + 'bias_decay_mult' in self.paramwise_cfg + or 'norm_decay_mult' in self.paramwise_cfg + or 'dwconv_decay_mult' in self.paramwise_cfg + ): + if self.base_wd is None: + raise ValueError('base_wd should not be None') + + def _is_in(self, param_group, param_group_list): + assert is_list_of(param_group_list, dict) + param = set(param_group['params']) + param_set = set() + for group in param_group_list: + param_set.update(set(group['params'])) + + return not param.isdisjoint(param_set) + + def add_params(self, params, module, prefix='', is_dcn_module=None): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + prefix (str): The prefix of the module + is_dcn_module (int|float|None): If the current module is a + submodule of DCN, `is_dcn_module` will be passed to + control conv_offset layer's learning rate. Defaults to None. + """ + # get param-wise options + custom_keys = self.paramwise_cfg.get('custom_keys', {}) + # first sort with alphabet order and then sort with reversed len of str + sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) + + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.0) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.0) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.0) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.0) + bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.0) + + # special rules for norm layers and depth-wise conv layers + is_norm = isinstance(module, (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = isinstance(module, torch.nn.Conv2d) and module.in_channels == module.groups + + for name, param in module.named_parameters(recurse=False): + param_group = {'params': [param]} + if not param.requires_grad: + params.append(param_group) + continue + if bypass_duplicate and self._is_in(param_group, params): + warnings.warn(f'{prefix} is duplicate. It is skipped since ' f'bypass_duplicate={bypass_duplicate}') + continue + # if the parameter match one of the custom keys, ignore other rules + is_custom = False + for key in sorted_keys: + if key in f'{prefix}.{name}': + is_custom = True + lr_mult = custom_keys[key].get('lr_mult', 1.0) + param_group['lr'] = self.base_lr * lr_mult + if self.base_wd is not None: + decay_mult = custom_keys[key].get('decay_mult', 1.0) + param_group['weight_decay'] = self.base_wd * decay_mult + break + + if not is_custom: + # bias_lr_mult affects all bias parameters + # except for norm.bias dcn.conv_offset.bias + if name == 'bias' and not (is_norm or is_dcn_module): + param_group['lr'] = self.base_lr * bias_lr_mult + + if prefix.find('conv_offset') != -1 and is_dcn_module and isinstance(module, torch.nn.Conv2d): + # deal with both dcn_offset's bias & weight + param_group['lr'] = self.base_lr * dcn_offset_lr_mult + + # apply weight decay policies + if self.base_wd is not None: + # norm decay + if is_norm: + param_group['weight_decay'] = self.base_wd * norm_decay_mult + # depth-wise conv + elif is_dwconv: + param_group['weight_decay'] = self.base_wd * dwconv_decay_mult + # bias lr and decay + elif name == 'bias' and not is_dcn_module: + # TODO: current bias_decay_mult will have affect on DCN + param_group['weight_decay'] = self.base_wd * bias_decay_mult + params.append(param_group) + + if check_ops_exist(): + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( + DeformConv2d, + ModulatedDeformConv2d, + ) + + is_dcn_module = isinstance(module, (DeformConv2d, ModulatedDeformConv2d)) + else: + is_dcn_module = False + for child_name, child_mod in module.named_children(): + child_prefix = f'{prefix}.{child_name}' if prefix else child_name + self.add_params(params, child_mod, prefix=child_prefix, is_dcn_module=is_dcn_module) + + def __call__(self, model): + if hasattr(model, 'module'): + model = model.module + + optimizer_cfg = self.optimizer_cfg.copy() + # if no paramwise option is specified, just use the global setting + if not self.paramwise_cfg: + optimizer_cfg['params'] = model.parameters() + return build_from_cfg(optimizer_cfg, OPTIMIZERS) + + # set param-wise lr and weight decay recursively + params = [] + self.add_params(params, model) + optimizer_cfg['params'] = params + + return build_from_cfg(optimizer_cfg, OPTIMIZERS) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py new file mode 100644 index 000000000000..64cc4e3a05f8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/priority.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + + +class Priority(Enum): + """Hook priority levels. + + +--------------+------------+ + | Level | Value | + +==============+============+ + | HIGHEST | 0 | + +--------------+------------+ + | VERY_HIGH | 10 | + +--------------+------------+ + | HIGH | 30 | + +--------------+------------+ + | ABOVE_NORMAL | 40 | + +--------------+------------+ + | NORMAL | 50 | + +--------------+------------+ + | BELOW_NORMAL | 60 | + +--------------+------------+ + | LOW | 70 | + +--------------+------------+ + | VERY_LOW | 90 | + +--------------+------------+ + | LOWEST | 100 | + +--------------+------------+ + """ + + HIGHEST = 0 + VERY_HIGH = 10 + HIGH = 30 + ABOVE_NORMAL = 40 + NORMAL = 50 + BELOW_NORMAL = 60 + LOW = 70 + VERY_LOW = 90 + LOWEST = 100 + + +def get_priority(priority): + """Get priority value. + + Args: + priority (int or str or :obj:`Priority`): Priority. + + Returns: + int: The priority value. + """ + if isinstance(priority, int): + if priority < 0 or priority > 100: + raise ValueError('priority must be between 0 and 100') + return priority + elif isinstance(priority, Priority): + return priority.value + elif isinstance(priority, str): + return Priority[priority.upper()].value + else: + raise TypeError('priority must be an integer or Priority enum value') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py new file mode 100644 index 000000000000..4ac2ec3e88ff --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/runner/utils.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import random +import sys +import time +import warnings +from getpass import getuser +from socket import gethostname + +import numpy as np +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def get_host_info(): + """Get hostname and username. + + Return empty string if exception raised, e.g. ``getpass.getuser()`` will + lead to error in docker container + """ + host = '' + try: + host = f'{getuser()}@{gethostname()}' + except Exception as e: + warnings.warn(f'Host or user not found: {str(e)}') + finally: + return host + + +def get_time_str(): + return time.strftime('%Y%m%d_%H%M%S', time.localtime()) + + +def obj_from_dict(info, parent=None, default_args=None): + """Initialize an object from dict. + + The dict must contain the key "type", which indicates the object type, it + can be either a string or type, such as "list" or ``list``. Remaining + fields are treated as the arguments for constructing the object. + + Args: + info (dict): Object types and arguments. + parent (:class:`module`): Module which may containing expected object + classes. + default_args (dict, optional): Default arguments for initializing the + object. + + Returns: + any type: Object built from the dict. + """ + assert isinstance(info, dict) and 'type' in info + assert isinstance(default_args, dict) or default_args is None + args = info.copy() + obj_type = args.pop('type') + if mmcv.is_str(obj_type): + if parent is not None: + obj_type = getattr(parent, obj_type) + else: + obj_type = sys.modules[obj_type] + elif not isinstance(obj_type, type): + raise TypeError('type must be a str or valid type, but ' f'got {type(obj_type)}') + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + return obj_type(**args) + + +def set_random_seed(seed, deterministic=False, use_rank_shift=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + rank_shift (bool): Whether to add rank number to the random seed to + have different random seed in different threads. Default: False. + """ + if use_rank_shift: + rank, _ = mmcv.runner.get_dist_info() + seed += rank + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py new file mode 100644 index 000000000000..f0eb61d01a3d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/__init__.py @@ -0,0 +1,185 @@ +# flake8: noqa +# Copyright (c) OpenMMLab. All rights reserved. +from .config import Config, ConfigDict, DictAction +from .misc import ( + check_prerequisites, + concat_list, + deprecated_api_warning, + has_method, + import_modules_from_strings, + is_list_of, + is_method_overridden, + is_seq_of, + is_str, + is_tuple_of, + iter_cast, + list_cast, + requires_executable, + requires_package, + slice_list, + to_1tuple, + to_2tuple, + to_3tuple, + to_4tuple, + to_ntuple, + tuple_cast, +) +from .path import check_file_exist, fopen, is_filepath, mkdir_or_exist, scandir, symlink +from .progressbar import ProgressBar, track_iter_progress, track_parallel_progress, track_progress +from .testing import ( + assert_attrs_equal, + assert_dict_contains_subset, + assert_dict_has_keys, + assert_is_norm_layer, + assert_keys_equal, + assert_params_all_zeros, + check_python_script, +) +from .timer import Timer, TimerError, check_time +from .version_utils import digit_version, get_git_hash + +try: + import torch +except ImportError: + __all__ = [ + 'Config', + 'ConfigDict', + 'DictAction', + 'is_str', + 'iter_cast', + 'list_cast', + 'tuple_cast', + 'is_seq_of', + 'is_list_of', + 'is_tuple_of', + 'slice_list', + 'concat_list', + 'check_prerequisites', + 'requires_package', + 'requires_executable', + 'is_filepath', + 'fopen', + 'check_file_exist', + 'mkdir_or_exist', + 'symlink', + 'scandir', + 'ProgressBar', + 'track_progress', + 'track_iter_progress', + 'track_parallel_progress', + 'Timer', + 'TimerError', + 'check_time', + 'deprecated_api_warning', + 'digit_version', + 'get_git_hash', + 'import_modules_from_strings', + 'assert_dict_contains_subset', + 'assert_attrs_equal', + 'assert_dict_has_keys', + 'assert_keys_equal', + 'check_python_script', + 'to_1tuple', + 'to_2tuple', + 'to_3tuple', + 'to_4tuple', + 'to_ntuple', + 'is_method_overridden', + 'has_method', + ] +else: + from .env import collect_env + from .logging import get_logger, print_log + from .parrots_jit import jit, skip_no_elena + from .parrots_wrapper import ( + TORCH_VERSION, + BuildExtension, + CppExtension, + CUDAExtension, + DataLoader, + PoolDataLoader, + SyncBatchNorm, + _AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, + _AvgPoolNd, + _BatchNorm, + _ConvNd, + _ConvTransposeMixin, + _get_cuda_home, + _InstanceNorm, + _MaxPoolNd, + get_build_config, + is_rocm_pytorch, + ) + from .registry import Registry, build_from_cfg + from .trace import is_jit_tracing + + __all__ = [ + 'Config', + 'ConfigDict', + 'DictAction', + 'collect_env', + 'get_logger', + 'print_log', + 'is_str', + 'iter_cast', + 'list_cast', + 'tuple_cast', + 'is_seq_of', + 'is_list_of', + 'is_tuple_of', + 'slice_list', + 'concat_list', + 'check_prerequisites', + 'requires_package', + 'requires_executable', + 'is_filepath', + 'fopen', + 'check_file_exist', + 'mkdir_or_exist', + 'symlink', + 'scandir', + 'ProgressBar', + 'track_progress', + 'track_iter_progress', + 'track_parallel_progress', + 'Registry', + 'build_from_cfg', + 'Timer', + 'TimerError', + 'check_time', + 'SyncBatchNorm', + '_AdaptiveAvgPoolNd', + '_AdaptiveMaxPoolNd', + '_AvgPoolNd', + '_BatchNorm', + '_ConvNd', + '_ConvTransposeMixin', + '_InstanceNorm', + '_MaxPoolNd', + 'get_build_config', + 'BuildExtension', + 'CppExtension', + 'CUDAExtension', + 'DataLoader', + 'PoolDataLoader', + 'TORCH_VERSION', + 'deprecated_api_warning', + 'digit_version', + 'get_git_hash', + 'import_modules_from_strings', + 'jit', + 'skip_no_elena', + 'assert_dict_contains_subset', + 'assert_attrs_equal', + 'assert_dict_has_keys', + 'assert_keys_equal', + 'assert_is_norm_layer', + 'assert_params_all_zeros', + 'check_python_script', + 'is_method_overridden', + 'is_jit_tracing', + 'is_rocm_pytorch', + '_get_cuda_home', + 'has_method', + ] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py new file mode 100644 index 000000000000..2d8eb6858bc4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/config.py @@ -0,0 +1,658 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == 'Windows': + import regex as re +else: + import re + +BASE_KEY = '_base_' +DELETE_KEY = '_delete_' +DEPRECATION_KEY = '_deprecation_' +RESERVED_KEYS = ['filename', 'text', 'pretty_text'] + + +class ConfigDict(Dict): + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{name}'") + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=''): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument('--' + prefix + k) + elif isinstance(v, int): + parser.add_argument('--' + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument('--' + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument('--' + prefix + k, action='store_true') + elif isinstance(v, dict): + add_args(parser, v, prefix + k + '.') + elif isinstance(v, abc.Iterable): + parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') + else: + print(f'cannot parse key {prefix + k} of type {type(v)}') + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError('There are syntax errors in config ' f'file {filename}: {e}') + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname, + ) + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' + value = value.replace('\\', '/') + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}' + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' + base_var_dict[randstr] = base_var + regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split('.'): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple(Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg) + elif isinstance(cfg, list): + cfg = [Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split('.'): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile(dir=temp_config_dir, suffix=fileExtname) + if platform.system() == 'Windows': + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars(temp_config_file.name, temp_config_file.name) + + if filename.endswith('.py'): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = {name: value for name, value in mod.__dict__.items() if not name.startswith('__')} + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith(('.yml', '.yaml', '.json')): + import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + cfg_dict = mmcv.load(temp_config_file.name) + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = f'The config file {filename} will be deprecated ' 'in the future.' + if 'expected' in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' 'instead.' + if 'reference' in deprecation_info: + warning_msg += ' More information can be found at ' f'{deprecation_info["reference"]}' + warnings.warn(warning_msg) + + cfg_text = filename + '\n' + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = base_filename if isinstance(base_filename, list) else [base_filename] + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError('Duplicate key is not allowed among bases. ' f'Duplicate keys: {duplicate_keys}') + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, base_cfg_dict) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = '\n'.join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f'Index {k} exceeds the length of list {b}') + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f'{k}={v} in child config cannot inherit from base ' + f'because {k} is a dict in the child config but is of ' + f'type {type(b[k])} in base config. You may set ' + f'`{DELETE_KEY}=True` to ignore the base config' + ) + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) + if import_custom_modules and cfg_dict.get('custom_imports', None): + import_modules_from_strings(**cfg_dict['custom_imports']) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + if file_format != '.py' and 'dict(' in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn('Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile('w', encoding='utf-8', suffix=file_format, delete=False) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument('config', help='config file path') + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument('config', help='config file path') + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError('cfg_dict must be a dict, but ' f'got {type(cfg_dict)}') + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f'{key} is reserved for config file') + + super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) + super(Config, self).__setattr__('_filename', filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, 'r') as f: + text = f.read() + else: + text = '' + super(Config, self).__setattr__('_text', text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split('\n') + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * ' ') + line for line in s] + s = '\n'.join(s) + s = first + '\n' + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = '[\n' + v_str += '\n'.join(f'dict({_indent(_format_dict(v_), indent)}),' for v_ in v).rstrip(',') + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + ']' + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= not str(key_name).isidentifier() + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = '' + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += '{' + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = '' if outest_level or is_last else ',' + if isinstance(v, dict): + v_str = '\n' + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: dict({v_str}' + else: + attr_str = f'{str(k)}=dict({v_str}' + attr_str = _indent(attr_str, indent) + ')' + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += '\n'.join(s) + if use_mapping: + r += '}' + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style='pep8', + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True, + ) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__('_cfg_dict', _cfg_dict) + super(Config, self).__setattr__('_filename', _filename) + super(Config, self).__setattr__('_text', _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() + if self.filename.endswith('.py'): + if file is None: + return self.pretty_text + else: + with open(file, 'w', encoding='utf-8') as f: + f.write(self.pretty_text) + else: + import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + if file is None: + file_format = self.filename.split('.')[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'model.backbone.depth': 50, + ... 'model.backbone.with_cp':True} + >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... model=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split('.') + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + super(Config, self).__setattr__( + '_cfg_dict', Config._merge_a_into_b(option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys) + ) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return True if val.lower() == 'true' else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count('(') == string.count(')')) and ( + string.count('[') == string.count(']') + ), f'Imbalanced brackets exist in {string}' + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if (char == ',') and (pre.count('(') == pre.count(')')) and (pre.count('[') == pre.count(']')): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip('\'\"').replace(' ', '') + is_tuple = False + if val.startswith('(') and val.endswith(')'): + is_tuple = True + val = val[1:-1] + elif val.startswith('[') and val.endswith(']'): + val = val[1:-1] + elif ',' not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1 :] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py new file mode 100644 index 000000000000..484c17be1767 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/env.py @@ -0,0 +1,100 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""This file holding some environment constant for sharing by other files.""" + +import os.path as osp +import subprocess +import sys +from collections import defaultdict + +import cv2 +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from .parrots_wrapper import get_build_config + + +def collect_env(): + """Collect the information of the running environments. + + Returns: + dict: The environment information. The following fields are contained. + + - sys.platform: The variable of ``sys.platform``. + - Python: Python version. + - CUDA available: Bool, indicating if CUDA is available. + - GPU devices: Device type of each GPU. + - CUDA_HOME (optional): The env var ``CUDA_HOME``. + - NVCC (optional): NVCC version. + - GCC: GCC version, "n/a" if GCC is not installed. + - PyTorch: PyTorch version. + - PyTorch compiling details: The output of \ + ``torch.__config__.show()``. + - TorchVision (optional): TorchVision version. + - OpenCV: OpenCV version. + - MMCV: MMCV version. + - MMCV Compiler: The GCC version for compiling MMCV ops. + - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. + """ + env_info = {} + env_info['sys.platform'] = sys.platform + env_info['Python'] = sys.version.replace('\n', '') + + cuda_available = torch.cuda.is_available() + env_info['CUDA available'] = cuda_available + + if cuda_available: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + devices[torch.cuda.get_device_name(k)].append(str(k)) + for name, device_ids in devices.items(): + env_info['GPU ' + ','.join(device_ids)] = name + + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _get_cuda_home + + CUDA_HOME = _get_cuda_home() + env_info['CUDA_HOME'] = CUDA_HOME + + if CUDA_HOME is not None and osp.isdir(CUDA_HOME): + try: + nvcc = osp.join(CUDA_HOME, 'bin/nvcc') + nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True) + nvcc = nvcc.decode('utf-8').strip() + except subprocess.SubprocessError: + nvcc = 'Not Available' + env_info['NVCC'] = nvcc + + try: + gcc = subprocess.check_output('gcc --version | head -n1', shell=True) + gcc = gcc.decode('utf-8').strip() + env_info['GCC'] = gcc + except subprocess.CalledProcessError: # gcc is unavailable + env_info['GCC'] = 'n/a' + + env_info['PyTorch'] = torch.__version__ + env_info['PyTorch compiling details'] = get_build_config() + + try: + import torchvision + + env_info['TorchVision'] = torchvision.__version__ + except ModuleNotFoundError: + pass + + env_info['OpenCV'] = cv2.__version__ + + env_info['MMCV'] = mmcv.__version__ + + try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import ( + get_compiler_version, + get_compiling_cuda_version, + ) + except ModuleNotFoundError: + env_info['MMCV Compiler'] = 'n/a' + env_info['MMCV CUDA Compiler'] = 'n/a' + else: + env_info['MMCV Compiler'] = get_compiler_version() + env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() + + return env_info diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py new file mode 100644 index 000000000000..6e2217c7e99d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/ext_loader.py @@ -0,0 +1,72 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib +import os +import pkgutil +import warnings +from collections import namedtuple + +import torch + +if torch.__version__ != 'parrots': + + def load_ext(name, funcs): + ext = importlib.import_module('mmcv.' + name) + for fun in funcs: + assert hasattr(ext, fun), f'{fun} miss in module {name}' + return ext + + +else: + from parrots import extension + from parrots.base import ParrotsException + + has_return_value_ops = [ + 'nms', + 'softnms', + 'nms_match', + 'nms_rotated', + 'top_pool_forward', + 'top_pool_backward', + 'bottom_pool_forward', + 'bottom_pool_backward', + 'left_pool_forward', + 'left_pool_backward', + 'right_pool_forward', + 'right_pool_backward', + 'fused_bias_leakyrelu', + 'upfirdn2d', + 'ms_deform_attn_forward', + 'pixel_group', + 'contour_expand', + ] + + def get_fake_func(name, e): + def fake_func(*args, **kwargs): + warnings.warn(f'{name} is not supported in parrots now') + raise e + + return fake_func + + def load_ext(name, funcs): + ExtModule = namedtuple('ExtModule', funcs) + ext_list = [] + lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + for fun in funcs: + try: + ext_fun = extension.load(fun, name, lib_dir=lib_root) + except ParrotsException as e: + if 'No element registered' not in e.message: + warnings.warn(e.message) + ext_fun = get_fake_func(fun, e) + ext_list.append(ext_fun) + else: + if fun in has_return_value_ops: + ext_list.append(ext_fun.op) + else: + ext_list.append(ext_fun.op_) + return ExtModule(*ext_list) + + +def check_ops_exist(): + ext_loader = pkgutil.find_loader('mmcv._ext') + return ext_loader is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py new file mode 100644 index 000000000000..403a1ad7aa77 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/logging.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.distributed as dist + +logger_initialized = {} + + +def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): + """Initialize and get a logger by name. + + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + file_mode (str): The file mode used in opening log file. + Defaults to 'w'. + + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + # handle duplicate logs to the console + # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) + # to the root logger. As logger.propagate is True by default, this root + # level handler causes logging messages from rank>0 processes to + # unexpectedly show up on the console, creating much unwanted clutter. + # To fix this issue, we set the root logger's StreamHandler, if any, to log + # at the ERROR level. + for handler in logger.root.handlers: + if type(handler) is logging.StreamHandler: + handler.setLevel(logging.ERROR) + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + # only rank 0 will add a FileHandler + if rank == 0 and log_file is not None: + # Here, the default behaviour of the official logger is 'a'. Thus, we + # provide an interface to change the file mode to the default + # behaviour. + file_handler = logging.FileHandler(log_file, file_mode) + handlers.append(file_handler) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + if rank == 0: + logger.setLevel(log_level) + else: + logger.setLevel(logging.ERROR) + + logger_initialized[name] = True + + return logger + + +def print_log(msg, logger=None, level=logging.INFO): + """Print a log message. + + Args: + msg (str): The message to be logged. + logger (logging.Logger | str | None): The logger to be used. + Some special loggers are: + - "silent": no message will be printed. + - other str: the logger obtained with `get_root_logger(logger)`. + - None: The `print()` method will be used to print log messages. + level (int): Logging level. Only available when `logger` is a Logger + object or "root". + """ + if logger is None: + print(msg) + elif isinstance(logger, logging.Logger): + logger.log(level, msg) + elif logger == 'silent': + pass + elif isinstance(logger, str): + _logger = get_logger(logger) + _logger.log(level, msg) + else: + raise TypeError( + 'logger should be either a logging.Logger object, str, ' f'"silent" or None, but got {type(logger)}' + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py new file mode 100644 index 000000000000..01204666f985 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/misc.py @@ -0,0 +1,371 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections.abc +import functools +import itertools +import subprocess +import warnings +from collections import abc +from importlib import import_module +from inspect import getfullargspec +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError(f'custom_imports must be a list but got type {type(imports)}') + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError(f'{imp} is of type {type(imp)} and cannot be imported.') + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f'{imp} failed to import and is ignored.', UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +def iter_cast(inputs, dst_type, return_type=None): + """Cast elements of an iterable object into some type. + + Args: + inputs (Iterable): The input object. + dst_type (type): Destination type. + return_type (type, optional): If specified, the output object will be + converted to this type, otherwise an iterator. + + Returns: + iterator or specified type: The converted object. + """ + if not isinstance(inputs, abc.Iterable): + raise TypeError('inputs must be an iterable object') + if not isinstance(dst_type, type): + raise TypeError('"dst_type" must be a valid type') + + out_iterable = map(dst_type, inputs) + + if return_type is None: + return out_iterable + else: + return return_type(out_iterable) + + +def list_cast(inputs, dst_type): + """Cast elements of an iterable object into a list of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=list) + + +def tuple_cast(inputs, dst_type): + """Cast elements of an iterable object into a tuple of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=tuple) + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_list_of(seq, expected_type): + """Check whether it is a list of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=list) + + +def is_tuple_of(seq, expected_type): + """Check whether it is a tuple of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=tuple) + + +def slice_list(in_list, lens): + """Slice a list into several sub lists by a list of given length. + + Args: + in_list (list): The list to be sliced. + lens(int or list): The expected length of each out list. + + Returns: + list: A list of sliced list. + """ + if isinstance(lens, int): + assert len(in_list) % lens == 0 + lens = [lens] * int(len(in_list) / lens) + if not isinstance(lens, list): + raise TypeError('"indices" must be an integer or a list of integers') + elif sum(lens) != len(in_list): + raise ValueError('sum of lens and list length does not ' f'match: {sum(lens)} != {len(in_list)}') + out_list = [] + idx = 0 + for i in range(len(lens)): + out_list.append(in_list[idx : idx + lens[i]]) + idx += lens[i] + return out_list + + +def concat_list(in_list): + """Concatenate a list of list into a single list. + + Args: + in_list (list): The list of list to be merged. + + Returns: + list: The concatenated flat list. + """ + return list(itertools.chain(*in_list)) + + +def check_prerequisites( + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' 'found, please install them first.', +): # yapf: disable + """A decorator factory to check if prerequisites are satisfied. + + Args: + prerequisites (str of list[str]): Prerequisites to be checked. + checker (callable): The checker method that returns True if a + prerequisite is meet, False otherwise. + msg_tmpl (str): The message template with two variables. + + Returns: + decorator: A specific decorator. + """ + + def wrap(func): + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + requirements = [prerequisites] if isinstance(prerequisites, str) else prerequisites + missing = [] + for item in requirements: + if not checker(item): + missing.append(item) + if missing: + print(msg_tmpl.format(', '.join(missing), func.__name__)) + raise RuntimeError('Prerequisites not meet.') + else: + return func(*args, **kwargs) + + return wrapped_func + + return wrap + + +def _check_py_package(package): + try: + import_module(package) + except ImportError: + return False + else: + return True + + +def _check_executable(cmd): + if subprocess.call(f'which {cmd}', shell=True) != 0: + return False + else: + return True + + +def requires_package(prerequisites): + """A decorator to check if some python packages are installed. + + Example: + >>> @requires_package('numpy') + >>> func(arg1, args): + >>> return numpy.zeros(1) + array([0.]) + >>> @requires_package(['numpy', 'non_package']) + >>> func(arg1, args): + >>> return numpy.zeros(1) + ImportError + """ + return check_prerequisites(prerequisites, checker=_check_py_package) + + +def requires_executable(prerequisites): + """A decorator to check if some executable files are installed. + + Example: + >>> @requires_executable('ffmpeg') + >>> func(arg1, args): + >>> print(1) + 1 + """ + return check_prerequisites(prerequisites, checker=_check_executable) + + +def deprecated_api_warning(name_dict, cls_name=None): + """A decorator to check if some arguments are deprecate and try to replace + deprecate src_arg_name to dst_arg_name. + + Args: + name_dict(dict): + key (str): Deprecate argument names. + val (str): Expected argument names. + + Returns: + func: New function. + """ + + def api_warning_wrapper(old_func): + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get name of the function + func_name = old_func.__name__ + if cls_name is not None: + func_name = f'{cls_name}.{func_name}' + if args: + arg_names = args_info.args[: len(args)] + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in arg_names: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead' + ) + arg_names[arg_names.index(src_arg_name)] = dst_arg_name + if kwargs: + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in kwargs: + + assert dst_arg_name not in kwargs, ( + f'The expected behavior is to replace ' + f'the deprecated key `{src_arg_name}` to ' + f'new key `{dst_arg_name}`, but got them ' + f'in the arguments at the same time, which ' + f'is confusing. `{src_arg_name} will be ' + f'deprecated in the future, please ' + f'use `{dst_arg_name}` instead.' + ) + + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead' + ) + kwargs[dst_arg_name] = kwargs.pop(src_arg_name) + + # apply converted arguments to the decorated method + output = old_func(*args, **kwargs) + return output + + return new_func + + return api_warning_wrapper + + +def is_method_overridden(method, base_class, derived_class): + """Check if a method of base class is overridden in derived class. + + Args: + method (str): the method name to check. + base_class (type): the class of the base class. + derived_class (type | Any): the class or instance of the derived class. + """ + assert isinstance(base_class, type), "base_class doesn't accept instance, Please pass class instead." + + if not isinstance(derived_class, type): + derived_class = derived_class.__class__ + + base_method = getattr(base_class, method) + derived_method = getattr(derived_class, method) + return derived_method != base_method + + +def has_method(obj: object, method: str) -> bool: + """Check whether the object has a method. + + Args: + method (str): The method name to check. + obj (object): The object to check. + + Returns: + bool: True if the object has the method else False. + """ + return hasattr(obj, method) and callable(getattr(obj, method)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py new file mode 100644 index 000000000000..e68d315d4a23 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_jit.py @@ -0,0 +1,33 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os + +from .parrots_wrapper import TORCH_VERSION + +parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') + +if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': + from parrots.jit import pat as jit +else: + + def jit(func=None, check_input=None, full_shape=True, derivate=False, coderize=False, optimize=False): + def wrapper(func): + def wrapper_inner(*args, **kargs): + return func(*args, **kargs) + + return wrapper_inner + + if func is None: + return wrapper + else: + return func + + +if TORCH_VERSION == 'parrots': + from parrots.utils.tester import skip_no_elena +else: + + def skip_no_elena(func): + def wrapper(*args, **kargs): + return func(*args, **kargs) + + return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py new file mode 100644 index 000000000000..5f57625d7971 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/parrots_wrapper.py @@ -0,0 +1,106 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from functools import partial + +import torch + +TORCH_VERSION = torch.__version__ + + +def is_rocm_pytorch() -> bool: + is_rocm = False + if TORCH_VERSION != 'parrots': + try: + from torch.utils.cpp_extension import ROCM_HOME + + is_rocm = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False + except ImportError: + pass + return is_rocm + + +def _get_cuda_home(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import CUDA_HOME + else: + if is_rocm_pytorch(): + from torch.utils.cpp_extension import ROCM_HOME + + CUDA_HOME = ROCM_HOME + else: + from torch.utils.cpp_extension import CUDA_HOME + return CUDA_HOME + + +def get_build_config(): + if TORCH_VERSION == 'parrots': + from parrots.config import get_build_info + + return get_build_info() + else: + return torch.__config__.show() + + +def _get_conv(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin + else: + from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin + return _ConvNd, _ConvTransposeMixin + + +def _get_dataloader(): + if TORCH_VERSION == 'parrots': + from torch.utils.data import DataLoader, PoolDataLoader + else: + from torch.utils.data import DataLoader + + PoolDataLoader = DataLoader + return DataLoader, PoolDataLoader + + +def _get_extension(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import BuildExtension, Extension + + CppExtension = partial(Extension, cuda=False) + CUDAExtension = partial(Extension, cuda=True) + else: + from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension + return BuildExtension, CppExtension, CUDAExtension + + +def _get_pool(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.pool import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + else: + from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + + +def _get_norm(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm + + SyncBatchNorm_ = torch.nn.SyncBatchNorm2d + else: + from torch.nn.modules.batchnorm import _BatchNorm + from torch.nn.modules.instancenorm import _InstanceNorm + + SyncBatchNorm_ = torch.nn.SyncBatchNorm + return _BatchNorm, _InstanceNorm, SyncBatchNorm_ + + +_ConvNd, _ConvTransposeMixin = _get_conv() +DataLoader, PoolDataLoader = _get_dataloader() +BuildExtension, CppExtension, CUDAExtension = _get_extension() +_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() +_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() + + +class SyncBatchNorm(SyncBatchNorm_): + def _check_input_dim(self, input): + if TORCH_VERSION == 'parrots': + if input.dim() < 2: + raise ValueError(f'expected at least 2D input (got {input.dim()}D input)') + else: + super()._check_input_dim(input) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py new file mode 100644 index 000000000000..56eb66140d73 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/path.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError('`filepath` should be a string or a Path') + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == '': + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = suffix.lower() if isinstance(suffix, str) else tuple(item.lower() for item in suffix) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=('.git',)): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py new file mode 100644 index 000000000000..68d2e2383dcf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/progressbar.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import sys +from collections.abc import Iterable +from multiprocessing import Pool +from shutil import get_terminal_size + +from .timer import Timer + + +class ProgressBar: + """A progress bar which can print the progress.""" + + def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): + self.task_num = task_num + self.bar_width = bar_width + self.completed = 0 + self.file = file + if start: + self.start() + + @property + def terminal_width(self): + width, _ = get_terminal_size() + return width + + def start(self): + if self.task_num > 0: + self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' 'elapsed: 0s, ETA:') + else: + self.file.write('completed: 0, elapsed: 0s') + self.file.flush() + self.timer = Timer() + + def update(self, num_tasks=1): + assert num_tasks > 0 + self.completed += num_tasks + elapsed = self.timer.since_start() + if elapsed > 0: + fps = self.completed / elapsed + else: + fps = float('inf') + if self.task_num > 0: + percentage = self.completed / float(self.task_num) + eta = int(elapsed * (1 - percentage) / percentage + 0.5) + msg = ( + f'\r[{{}}] {self.completed}/{self.task_num}, ' + f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' + f'ETA: {eta:5}s' + ) + + bar_width = min(self.bar_width, int(self.terminal_width - len(msg)) + 2, int(self.terminal_width * 0.6)) + bar_width = max(2, bar_width) + mark_width = int(bar_width * percentage) + bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) + self.file.write(msg.format(bar_chars)) + else: + self.file.write(f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' f' {fps:.1f} tasks/s') + self.file.flush() + + +def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): + """Track the progress of tasks execution with a progress bar. + + Tasks are done with a simple for-loop. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + results = [] + for task in tasks: + results.append(func(task, **kwargs)) + prog_bar.update() + prog_bar.file.write('\n') + return results + + +def init_pool(process_num, initializer=None, initargs=None): + if initializer is None: + return Pool(process_num) + elif initargs is None: + return Pool(process_num, initializer) + else: + if not isinstance(initargs, tuple): + raise TypeError('"initargs" must be a tuple') + return Pool(process_num, initializer, initargs) + + +def track_parallel_progress( + func, + tasks, + nproc, + initializer=None, + initargs=None, + bar_width=50, + chunksize=1, + skip_first=False, + keep_order=True, + file=sys.stdout, +): + """Track the progress of parallel task execution with a progress bar. + + The built-in :mod:`multiprocessing` module is used for process pools and + tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + nproc (int): Process (worker) number. + initializer (None or callable): Refer to :class:`multiprocessing.Pool` + for details. + initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for + details. + chunksize (int): Refer to :class:`multiprocessing.Pool` for details. + bar_width (int): Width of progress bar. + skip_first (bool): Whether to skip the first sample for each worker + when estimating fps, since the initialization step may takes + longer. + keep_order (bool): If True, :func:`Pool.imap` is used, otherwise + :func:`Pool.imap_unordered` is used. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') + pool = init_pool(nproc, initializer, initargs) + start = not skip_first + task_num -= nproc * chunksize * int(skip_first) + prog_bar = ProgressBar(task_num, bar_width, start, file=file) + results = [] + if keep_order: + gen = pool.imap(func, tasks, chunksize) + else: + gen = pool.imap_unordered(func, tasks, chunksize) + for result in gen: + results.append(result) + if skip_first: + if len(results) < nproc * chunksize: + continue + elif len(results) == nproc * chunksize: + prog_bar.start() + continue + prog_bar.update() + prog_bar.file.write('\n') + pool.close() + pool.join() + return results + + +def track_iter_progress(tasks, bar_width=50, file=sys.stdout): + """Track the progress of tasks iteration or enumeration with a progress + bar. + + Tasks are yielded with a simple for-loop. + + Args: + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Yields: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError('"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + for task in tasks: + yield task + prog_bar.update() + prog_bar.file.write('\n') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py new file mode 100644 index 000000000000..d5433ed2b063 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/registry.py @@ -0,0 +1,303 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from config dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError('`cfg` or `default_args` must contain the key "type", ' f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError(f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError(f'type must be a str or valid type, but got {type(obj_type)}') + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f'{obj_cls.__name__}: {e}') + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + f'(name={self._name}, ' f'items={self._module_dict})' + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split('.') + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find('.') + if split_index != -1: + return key[:split_index], key[split_index + 1 :] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert registry.scope not in self.children, f'scope {registry.scope} exists in {self.name} registry' + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError('module must be a class, ' f'but got {type(module_class)}') + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f'{name} is already registered ' f'in {self.name}') + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.' + ) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + 'name must be either of None, an instance of str or a sequence' f' of str, but got {type(name)}' + ) + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module(module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py new file mode 100644 index 000000000000..4ba7d184d326 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/testing.py @@ -0,0 +1,138 @@ +# Copyright (c) Open-MMLab. +import sys +from collections.abc import Iterable +from runpy import run_path +from shlex import split +from typing import Any, Dict, List +from unittest.mock import patch + + +def check_python_script(cmd): + """Run the python cmd script with `__main__`. The difference between + `os.system` is that, this function exectues code in the current process, so + that it can be tracked by coverage tools. Currently it supports two forms: + + - ./tests/data/scripts/hello.py zz + - python tests/data/scripts/hello.py zz + """ + args = split(cmd) + if args[0] == 'python': + args = args[1:] + with patch.object(sys, 'argv', args): + run_path(args[0], run_name='__main__') + + +def _any(judge_result): + """Since built-in ``any`` works only when the element of iterable is not + iterable, implement the function.""" + if not isinstance(judge_result, Iterable): + return judge_result + + try: + for element in judge_result: + if _any(element): + return True + except TypeError: + # Maybe encounter the case: torch.tensor(True) | torch.tensor(False) + if judge_result: + return True + return False + + +def assert_dict_contains_subset(dict_obj: Dict[Any, Any], expected_subset: Dict[Any, Any]) -> bool: + """Check if the dict_obj contains the expected_subset. + + Args: + dict_obj (Dict[Any, Any]): Dict object to be checked. + expected_subset (Dict[Any, Any]): Subset expected to be contained in + dict_obj. + + Returns: + bool: Whether the dict_obj contains the expected_subset. + """ + + for key, value in expected_subset.items(): + if key not in dict_obj.keys() or _any(dict_obj[key] != value): + return False + return True + + +def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: + """Check if attribute of class object is correct. + + Args: + obj (object): Class object to be checked. + expected_attrs (Dict[str, Any]): Dict of the expected attrs. + + Returns: + bool: Whether the attribute of class object is correct. + """ + for attr, value in expected_attrs.items(): + if not hasattr(obj, attr) or _any(getattr(obj, attr) != value): + return False + return True + + +def assert_dict_has_keys(obj: Dict[str, Any], expected_keys: List[str]) -> bool: + """Check if the obj has all the expected_keys. + + Args: + obj (Dict[str, Any]): Object to be checked. + expected_keys (List[str]): Keys expected to contained in the keys of + the obj. + + Returns: + bool: Whether the obj has the expected keys. + """ + return set(expected_keys).issubset(set(obj.keys())) + + +def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool: + """Check if target_keys is equal to result_keys. + + Args: + result_keys (List[str]): Result keys to be checked. + target_keys (List[str]): Target keys to be checked. + + Returns: + bool: Whether target_keys is equal to result_keys. + """ + return set(result_keys) == set(target_keys) + + +def assert_is_norm_layer(module) -> bool: + """Check if the module is a norm layer. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the module is a norm layer. + """ + from torch.nn import GroupNorm, LayerNorm + + from .parrots_wrapper import _BatchNorm, _InstanceNorm + + norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) + return isinstance(module, norm_layer_candidates) + + +def assert_params_all_zeros(module) -> bool: + """Check if the parameters of the module is all zeros. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the parameters of the module is all zeros. + """ + weight_data = module.weight.data + is_weight_zero = weight_data.allclose(weight_data.new_zeros(weight_data.size())) + + if hasattr(module, 'bias') and module.bias is not None: + bias_data = module.bias.data + is_bias_zero = bias_data.allclose(bias_data.new_zeros(bias_data.size())) + else: + is_bias_zero = True + + return is_weight_zero and is_bias_zero diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py new file mode 100644 index 000000000000..c20892b088e0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/timer.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from time import time + + +class TimerError(Exception): + def __init__(self, message): + self.message = message + super(TimerError, self).__init__(message) + + +class Timer: + """A flexible Timer class. + + :Example: + + >>> import time + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> with mmcv.Timer(): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + 1.000 + >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + it takes 1.0 seconds + >>> timer = mmcv.Timer() + >>> time.sleep(0.5) + >>> print(timer.since_start()) + 0.500 + >>> time.sleep(0.5) + >>> print(timer.since_last_check()) + 0.500 + >>> print(timer.since_start()) + 1.000 + """ + + def __init__(self, start=True, print_tmpl=None): + self._is_running = False + self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' + if start: + self.start() + + @property + def is_running(self): + """bool: indicate whether the timer is running""" + return self._is_running + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + print(self.print_tmpl.format(self.since_last_check())) + self._is_running = False + + def start(self): + """Start the timer.""" + if not self._is_running: + self._t_start = time() + self._is_running = True + self._t_last = time() + + def since_start(self): + """Total time since the timer is started. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + self._t_last = time() + return self._t_last - self._t_start + + def since_last_check(self): + """Time since the last checking. + + Either :func:`since_start` or :func:`since_last_check` is a checking + operation. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + dur = time() - self._t_last + self._t_last = time() + return dur + + +_g_timers = {} # global timers + + +def check_time(timer_id): + """Add check points in a single line. + + This method is suitable for running a task on a list of items. A timer will + be registered when the method is called for the first time. + + :Example: + + >>> import time + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> for i in range(1, 6): + >>> # simulate a code block + >>> time.sleep(i) + >>> mmcv.check_time('task1') + 2.000 + 3.000 + 4.000 + 5.000 + + Args: + timer_id (str): Timer identifier. + """ + if timer_id not in _g_timers: + _g_timers[timer_id] = Timer() + return 0 + else: + return _g_timers[timer_id].since_last_check() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py new file mode 100644 index 000000000000..12f297ee2eaa --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/trace.py @@ -0,0 +1,24 @@ +import warnings + +import torch + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import digit_version + + +def is_jit_tracing() -> bool: + if torch.__version__ != 'parrots' and digit_version(torch.__version__) >= digit_version('1.6.0'): + on_trace = torch.jit.is_tracing() + # In PyTorch 1.6, torch.jit.is_tracing has a bug. + # Refers to https://github.com/pytorch/pytorch/issues/42448 + if isinstance(on_trace, bool): + return on_trace + else: + return torch._C._is_tracing() + else: + warnings.warn( + 'torch.jit.is_tracing is only supported after v1.6.0. ' + 'Therefore is_tracing returns False automatically. Please ' + 'set on_trace manually if you are using trace.', + UserWarning, + ) + return False diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py new file mode 100644 index 000000000000..a0abd9d4596e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/utils/version_utils.py @@ -0,0 +1,88 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import subprocess +import warnings + +from packaging.version import parse + + +def digit_version(version_str: str, length: int = 4): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. For pre-release + versions: alpha < beta < rc. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int]: The version info in digits (integers). + """ + assert 'parrots' not in version_str + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + mapping = {'a': -3, 'b': -2, 'rc': -1} + val = -4 + # version.pre can be None + if version.pre: + if version.pre[0] not in mapping: + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 'version checking may go wrong') + else: + val = mapping[version.pre[0]] + release.extend([val, version.pre[-1]]) + else: + release.extend([val, 0]) + + elif version.is_postrelease: + release.extend([1, version.post]) + else: + release.extend([0, 0]) + return tuple(release) + + +def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + +def get_git_hash(fallback='unknown', digits=None): + """Get the git hash of the current repo. + + Args: + fallback (str, optional): The fallback string when git hash is + unavailable. Defaults to 'unknown'. + digits (int, optional): kept digits of the hash. Defaults to None, + meaning all digits are kept. + + Returns: + str: Git commit hash. + """ + + if digits is not None and not isinstance(digits, int): + raise TypeError('digits must be None or an integer') + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + if digits is not None: + sha = sha[:digits] + except OSError: + sha = fallback + + return sha diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py new file mode 100644 index 000000000000..50d390de11c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/version.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +__version__ = '1.3.17' + + +def parse_version_info(version_str: str, length: int = 4) -> tuple: + """Parse a version string into a tuple. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int | str]: The version info, e.g., "1.3.0" is parsed into + (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into + (2, 0, 0, 0, 'rc', 1) (when length is set to 4). + """ + from packaging.version import parse + + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + release.extend(list(version.pre)) + elif version.is_postrelease: + release.extend(list(version.post)) + else: + release.extend([0, 0]) + return tuple(release) + + +version_info = tuple(int(x) for x in __version__.split('.')[:3]) + +__all__ = ['__version__', 'version_info', 'parse_version_info'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py new file mode 100644 index 000000000000..71e5ece71438 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .io import Cache, VideoReader, frames2video +from .optflow import ( + dequantize_flow, + flow_from_bytes, + flow_warp, + flowread, + flowwrite, + quantize_flow, + sparse_flow_from_bytes, +) +from .processing import concat_video, convert_video, cut_video, resize_video + +__all__ = [ + 'Cache', + 'VideoReader', + 'frames2video', + 'convert_video', + 'resize_video', + 'cut_video', + 'concat_video', + 'flowread', + 'flowwrite', + 'quantize_flow', + 'dequantize_flow', + 'flow_warp', + 'flow_from_bytes', + 'sparse_flow_from_bytes', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py new file mode 100644 index 000000000000..43363f2dd8ed --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/io.py @@ -0,0 +1,310 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict + +import cv2 +from cv2 import ( + CAP_PROP_FOURCC, + CAP_PROP_FPS, + CAP_PROP_FRAME_COUNT, + CAP_PROP_FRAME_HEIGHT, + CAP_PROP_FRAME_WIDTH, + CAP_PROP_POS_FRAMES, + VideoWriter_fourcc, +) + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import ( + check_file_exist, + mkdir_or_exist, + scandir, + track_progress, +) + + +class Cache: + def __init__(self, capacity): + self._cache = OrderedDict() + self._capacity = int(capacity) + if capacity <= 0: + raise ValueError('capacity must be a positive integer') + + @property + def capacity(self): + return self._capacity + + @property + def size(self): + return len(self._cache) + + def put(self, key, val): + if key in self._cache: + return + if len(self._cache) >= self.capacity: + self._cache.popitem(last=False) + self._cache[key] = val + + def get(self, key, default=None): + val = self._cache[key] if key in self._cache else default + return val + + +class VideoReader: + """Video class with similar usage to a list object. + + This video warpper class provides convenient apis to access frames. + There exists an issue of OpenCV's VideoCapture class that jumping to a + certain frame may be inaccurate. It is fixed in this class by checking + the position after jumping each time. + Cache is used when decoding videos. So if the same frame is visited for + the second time, there is no need to decode again if it is stored in the + cache. + + :Example: + + >>> import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + >>> v = mmcv.VideoReader('sample.mp4') + >>> len(v) # get the total frame number with `len()` + 120 + >>> for img in v: # v is iterable + >>> mmcv.imshow(img) + >>> v[5] # get the 6th frame + """ + + def __init__(self, filename, cache_capacity=10): + # Check whether the video path is a url + if not filename.startswith(('https://', 'http://')): + check_file_exist(filename, 'Video file not found: ' + filename) + self._vcap = cv2.VideoCapture(filename) + assert cache_capacity > 0 + self._cache = Cache(cache_capacity) + self._position = 0 + # get basic info + self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) + self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) + self._fps = self._vcap.get(CAP_PROP_FPS) + self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) + self._fourcc = self._vcap.get(CAP_PROP_FOURCC) + + @property + def vcap(self): + """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" + return self._vcap + + @property + def opened(self): + """bool: Indicate whether the video is opened.""" + return self._vcap.isOpened() + + @property + def width(self): + """int: Width of video frames.""" + return self._width + + @property + def height(self): + """int: Height of video frames.""" + return self._height + + @property + def resolution(self): + """tuple: Video resolution (width, height).""" + return (self._width, self._height) + + @property + def fps(self): + """float: FPS of the video.""" + return self._fps + + @property + def frame_cnt(self): + """int: Total frames of the video.""" + return self._frame_cnt + + @property + def fourcc(self): + """str: "Four character code" of the video.""" + return self._fourcc + + @property + def position(self): + """int: Current cursor position, indicating frame decoded.""" + return self._position + + def _get_real_position(self): + return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) + + def _set_real_position(self, frame_id): + self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) + pos = self._get_real_position() + for _ in range(frame_id - pos): + self._vcap.read() + self._position = frame_id + + def read(self): + """Read the next frame. + + If the next frame have been decoded before and in the cache, then + return it directly, otherwise decode, cache and return it. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + # pos = self._position + if self._cache: + img = self._cache.get(self._position) + if img is not None: + ret = True + else: + if self._position != self._get_real_position(): + self._set_real_position(self._position) + ret, img = self._vcap.read() + if ret: + self._cache.put(self._position, img) + else: + ret, img = self._vcap.read() + if ret: + self._position += 1 + return img + + def get_frame(self, frame_id): + """Get frame by index. + + Args: + frame_id (int): Index of the expected frame, 0-based. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + if frame_id < 0 or frame_id >= self._frame_cnt: + raise IndexError(f'"frame_id" must be between 0 and {self._frame_cnt - 1}') + if frame_id == self._position: + return self.read() + if self._cache: + img = self._cache.get(frame_id) + if img is not None: + self._position = frame_id + 1 + return img + self._set_real_position(frame_id) + ret, img = self._vcap.read() + if ret: + if self._cache: + self._cache.put(self._position, img) + self._position += 1 + return img + + def current_frame(self): + """Get the current frame (frame that is just visited). + + Returns: + ndarray or None: If the video is fresh, return None, otherwise + return the frame. + """ + if self._position == 0: + return None + return self._cache.get(self._position - 1) + + def cvt2frames(self, frame_dir, file_start=0, filename_tmpl='{:06d}.jpg', start=0, max_num=0, show_progress=True): + """Convert a video to frame images. + + Args: + frame_dir (str): Output directory to store all the frame images. + file_start (int): Filenames will start from the specified number. + filename_tmpl (str): Filename template with the index as the + placeholder. + start (int): The starting frame index. + max_num (int): Maximum number of frames to be written. + show_progress (bool): Whether to show a progress bar. + """ + mkdir_or_exist(frame_dir) + if max_num == 0: + task_num = self.frame_cnt - start + else: + task_num = min(self.frame_cnt - start, max_num) + if task_num <= 0: + raise ValueError('start must be less than total frame number') + if start > 0: + self._set_real_position(start) + + def write_frame(file_idx): + img = self.read() + if img is None: + return + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + cv2.imwrite(filename, img) + + if show_progress: + track_progress(write_frame, range(file_start, file_start + task_num)) + else: + for i in range(task_num): + write_frame(file_start + i) + + def __len__(self): + return self.frame_cnt + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.get_frame(i) for i in range(*index.indices(self.frame_cnt))] + # support negative indexing + if index < 0: + index += self.frame_cnt + if index < 0: + raise IndexError('index out of range') + return self.get_frame(index) + + def __iter__(self): + self._set_real_position(0) + return self + + def __next__(self): + img = self.read() + if img is not None: + return img + else: + raise StopIteration + + next = __next__ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._vcap.release() + + +def frames2video( + frame_dir, video_file, fps=30, fourcc='XVID', filename_tmpl='{:06d}.jpg', start=0, end=0, show_progress=True +): + """Read the frame images from a directory and join them as a video. + + Args: + frame_dir (str): The directory containing video frames. + video_file (str): Output filename. + fps (float): FPS of the output video. + fourcc (str): Fourcc of the output video, this should be compatible + with the output file type. + filename_tmpl (str): Filename template with the index as the variable. + start (int): Starting frame index. + end (int): Ending frame index. + show_progress (bool): Whether to show a progress bar. + """ + if end == 0: + ext = filename_tmpl.split('.')[-1] + end = len([name for name in scandir(frame_dir, ext)]) + first_file = osp.join(frame_dir, filename_tmpl.format(start)) + check_file_exist(first_file, 'The start frame not found: ' + first_file) + img = cv2.imread(first_file) + height, width = img.shape[:2] + resolution = (width, height) + vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, resolution) + + def write_frame(file_idx): + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + img = cv2.imread(filename) + vwriter.write(img) + + if show_progress: + track_progress(write_frame, range(start, end)) + else: + for i in range(start, end): + write_frame(i) + vwriter.release() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py new file mode 100644 index 000000000000..90b81c714f05 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/optflow.py @@ -0,0 +1,248 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import cv2 +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.arraymisc import dequantize, quantize +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str + + +def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): + """Read an optical flow map. + + Args: + flow_or_path (ndarray or str): A flow map or filepath. + quantize (bool): whether to read quantized pair, if set to True, + remaining args will be passed to :func:`dequantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + + Returns: + ndarray: Optical flow represented as a (h, w, 2) numpy array + """ + if isinstance(flow_or_path, np.ndarray): + if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): + raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') + return flow_or_path + elif not is_str(flow_or_path): + raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' f'not {type(flow_or_path)}') + + if not quantize: + with open(flow_or_path, 'rb') as f: + try: + header = f.read(4).decode('utf-8') + except Exception: + raise IOError(f'Invalid flow file: {flow_or_path}') + else: + if header != 'PIEH': + raise IOError(f'Invalid flow file: {flow_or_path}, ' 'header does not contain PIEH') + + w = np.fromfile(f, np.int32, 1).squeeze() + h = np.fromfile(f, np.int32, 1).squeeze() + flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) + else: + assert concat_axis in [0, 1] + cat_flow = imread(flow_or_path, flag='unchanged') + if cat_flow.ndim != 2: + raise IOError(f'{flow_or_path} is not a valid quantized flow file, ' f'its dimension is {cat_flow.ndim}.') + assert cat_flow.shape[concat_axis] % 2 == 0 + dx, dy = np.split(cat_flow, 2, axis=concat_axis) + flow = dequantize_flow(dx, dy, *args, **kwargs) + + return flow.astype(np.float32) + + +def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): + """Write optical flow to file. + + If the flow is not quantized, it will be saved as a .flo file losslessly, + otherwise a jpeg image which is lossy but of much smaller size. (dx and dy + will be concatenated horizontally into a single image if quantize is True.) + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + filename (str): Output filepath. + quantize (bool): Whether to quantize the flow and save it to 2 jpeg + images. If set to True, remaining args will be passed to + :func:`quantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + """ + if not quantize: + with open(filename, 'wb') as f: + f.write('PIEH'.encode('utf-8')) + np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) + flow = flow.astype(np.float32) + flow.tofile(f) + f.flush() + else: + assert concat_axis in [0, 1] + dx, dy = quantize_flow(flow, *args, **kwargs) + dxdy = np.concatenate((dx, dy), axis=concat_axis) + imwrite(dxdy, filename) + + +def quantize_flow(flow, max_val=0.02, norm=True): + """Quantize flow to [0, 255]. + + After this step, the size of flow will be much smaller, and can be + dumped as jpeg images. + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + max_val (float): Maximum value of flow, values beyond + [-max_val, max_val] will be truncated. + norm (bool): Whether to divide flow values by image width/height. + + Returns: + tuple[ndarray]: Quantized dx and dy. + """ + h, w, _ = flow.shape + dx = flow[..., 0] + dy = flow[..., 1] + if norm: + dx = dx / w # avoid inplace operations + dy = dy / h + # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. + flow_comps = [quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy]] + return tuple(flow_comps) + + +def dequantize_flow(dx, dy, max_val=0.02, denorm=True): + """Recover from quantized flow. + + Args: + dx (ndarray): Quantized dx. + dy (ndarray): Quantized dy. + max_val (float): Maximum value used when quantizing. + denorm (bool): Whether to multiply flow values with width/height. + + Returns: + ndarray: Dequantized flow. + """ + assert dx.shape == dy.shape + assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) + + dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] + + if denorm: + dx *= dx.shape[1] + dy *= dx.shape[0] + flow = np.dstack((dx, dy)) + return flow + + +def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): + """Use flow to warp img. + + Args: + img (ndarray, float or uint8): Image to be warped. + flow (ndarray, float): Optical Flow. + filling_value (int): The missing pixels will be set with filling_value. + interpolate_mode (str): bilinear -> Bilinear Interpolation; + nearest -> Nearest Neighbor. + + Returns: + ndarray: Warped image with the same shape of img + """ + warnings.warn('This function is just for prototyping and cannot ' 'guarantee the computational efficiency.') + assert flow.ndim == 3, 'Flow must be in 3D arrays.' + height = flow.shape[0] + width = flow.shape[1] + channels = img.shape[2] + + output = np.ones((height, width, channels), dtype=img.dtype) * filling_value + + grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) + dx = grid[:, :, 0] + flow[:, :, 1] + dy = grid[:, :, 1] + flow[:, :, 0] + sx = np.floor(dx).astype(int) + sy = np.floor(dy).astype(int) + valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) + + if interpolate_mode == 'nearest': + output[valid, :] = img[dx[valid].round().astype(int), dy[valid].round().astype(int), :] + elif interpolate_mode == 'bilinear': + # dirty walkround for integer positions + eps_ = 1e-6 + dx, dy = dx + eps_, dy + eps_ + left_top_ = ( + img[np.floor(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] + * (np.ceil(dx[valid]) - dx[valid])[:, None] + * (np.ceil(dy[valid]) - dy[valid])[:, None] + ) + left_down_ = ( + img[np.ceil(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] + * (dx[valid] - np.floor(dx[valid]))[:, None] + * (np.ceil(dy[valid]) - dy[valid])[:, None] + ) + right_top_ = ( + img[np.floor(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] + * (np.ceil(dx[valid]) - dx[valid])[:, None] + * (dy[valid] - np.floor(dy[valid]))[:, None] + ) + right_down_ = ( + img[np.ceil(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] + * (dx[valid] - np.floor(dx[valid]))[:, None] + * (dy[valid] - np.floor(dy[valid]))[:, None] + ) + output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ + else: + raise NotImplementedError( + 'We only support interpolation modes of nearest and bilinear, ' f'but got {interpolate_mode}.' + ) + return output.astype(img.dtype) + + +def flow_from_bytes(content): + """Read dense optical flow from bytes. + + .. note:: + This load optical flow function works for FlyingChairs, FlyingThings3D, + Sintel, FlyingChairsOcc datasets, but cannot load the data from + ChairsSDHom. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + ndarray: Loaded optical flow with the shape (H, W, 2). + """ + + # header in first 4 bytes + header = content[:4] + if header.decode('utf-8') != 'PIEH': + raise Exception('Flow file header does not contain PIEH') + # width in second 4 bytes + width = np.frombuffer(content[4:], np.int32, 1).squeeze() + # height in third 4 bytes + height = np.frombuffer(content[8:], np.int32, 1).squeeze() + # after first 12 bytes, all bytes are flow + flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape((height, width, 2)) + + return flow + + +def sparse_flow_from_bytes(content): + """Read the optical flow in KITTI datasets from bytes. + + This function is modified from RAFT load the `KITTI datasets + `_. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2) + and flow valid mask with the shape (H, W). + """ # nopa + + content = np.frombuffer(content, np.uint8) + flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) + flow = flow[:, :, ::-1].astype(np.float32) + # flow shape (H, W, 2) valid shape (H, W) + flow, valid = flow[:, :, :2], flow[:, :, 2] + flow = (flow - 2 ** 15) / 64.0 + return flow, valid diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py new file mode 100644 index 000000000000..8933708ef744 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/video/processing.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +import subprocess +import tempfile + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import requires_executable + + +@requires_executable('ffmpeg') +def convert_video(in_file, out_file, print_cmd=False, pre_options='', **kwargs): + """Convert a video with ffmpeg. + + This provides a general api to ffmpeg, the executed command is:: + + `ffmpeg -y -i ` + + Options(kwargs) are mapped to ffmpeg commands with the following rules: + + - key=val: "-key val" + - key=True: "-key" + - key=False: "" + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + pre_options (str): Options appears before "-i ". + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = [] + for k, v in kwargs.items(): + if isinstance(v, bool): + if v: + options.append(f'-{k}') + elif k == 'log_level': + assert v in ['quiet', 'panic', 'fatal', 'error', 'warning', 'info', 'verbose', 'debug', 'trace'] + options.append(f'-loglevel {v}') + else: + options.append(f'-{k} {v}') + cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' f'{out_file}' + if print_cmd: + print(cmd) + subprocess.call(cmd, shell=True) + + +@requires_executable('ffmpeg') +def resize_video(in_file, out_file, size=None, ratio=None, keep_ar=False, log_level='info', print_cmd=False): + """Resize a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). + ratio (tuple or float): Expected resize ratio, (2, 0.5) means + (w*2, h*0.5). + keep_ar (bool): Whether to keep original aspect ratio. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + if size is None and ratio is None: + raise ValueError('expected size or ratio must be specified') + if size is not None and ratio is not None: + raise ValueError('size and ratio cannot be specified at the same time') + options = {'log_level': log_level} + if size: + if not keep_ar: + options['vf'] = f'scale={size[0]}:{size[1]}' + else: + options['vf'] = f'scale=w={size[0]}:h={size[1]}:' 'force_original_aspect_ratio=decrease' + else: + if not isinstance(ratio, tuple): + ratio = (ratio, ratio) + options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def cut_video(in_file, out_file, start=None, end=None, vcodec=None, acodec=None, log_level='info', print_cmd=False): + """Cut a clip from a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + start (None or float): Start time (in seconds). + end (None or float): End time (in seconds). + vcodec (None or str): Output video codec, None for unchanged. + acodec (None or str): Output audio codec, None for unchanged. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + if start: + options['ss'] = start + else: + start = 0 + if end: + options['t'] = end - start + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def concat_video(video_list, out_file, vcodec=None, acodec=None, log_level='info', print_cmd=False): + """Concatenate multiple videos into a single one. + + Args: + video_list (list): A list of video filenames + out_file (str): Output video filename + vcodec (None or str): Output video codec, None for unchanged + acodec (None or str): Output audio codec, None for unchanged + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) + with open(tmp_filename, 'w') as f: + for filename in video_list: + f.write(f'file {osp.abspath(filename)}\n') + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + convert_video(tmp_filename, out_file, print_cmd, pre_options='-f concat -safe 0', **options) + os.close(tmp_filehandler) + os.remove(tmp_filename) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py new file mode 100644 index 000000000000..f336d6ce01b4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .color import Color, color_val +from .image import imshow, imshow_bboxes, imshow_det_bboxes +from .optflow import flow2rgb, flowshow, make_color_wheel + +__all__ = [ + 'Color', + 'color_val', + 'imshow', + 'imshow_bboxes', + 'imshow_det_bboxes', + 'flowshow', + 'flow2rgb', + 'make_color_wheel', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py new file mode 100644 index 000000000000..d2290a315f9e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/color.py @@ -0,0 +1,52 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import is_str + + +class Color(Enum): + """An enum that defines common colors. + + Contains red, green, blue, cyan, yellow, magenta, white and black. + """ + + red = (0, 0, 255) + green = (0, 255, 0) + blue = (255, 0, 0) + cyan = (255, 255, 0) + yellow = (0, 255, 255) + magenta = (255, 0, 255) + white = (255, 255, 255) + black = (0, 0, 0) + + +def color_val(color): + """Convert various input to color tuples. + + Args: + color (:obj:`Color`/str/tuple/int/ndarray): Color inputs + + Returns: + tuple[int]: A tuple of 3 integers indicating BGR channels. + """ + if is_str(color): + return Color[color].value + elif isinstance(color, Color): + return color.value + elif isinstance(color, tuple): + assert len(color) == 3 + for channel in color: + assert 0 <= channel <= 255 + return color + elif isinstance(color, int): + assert 0 <= color <= 255 + return color, color, color + elif isinstance(color, np.ndarray): + assert color.ndim == 1 and color.size == 3 + assert np.all((color >= 0) & (color <= 255)) + color = color.astype(np.uint8) + return tuple(color) + else: + raise TypeError(f'Invalid type for color: {type(color)}') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py new file mode 100644 index 000000000000..feda6fa59520 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/image.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import imread, imwrite +from .color import color_val + + +def imshow(img, win_name='', wait_time=0): + """Show an image. + + Args: + img (str or ndarray): The image to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + cv2.imshow(win_name, imread(img)) + if wait_time == 0: # prevent from hanging if windows was closed + while True: + ret = cv2.waitKey(1) + + closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 + # if user closed window or if some key pressed + if closed or ret != -1: + break + else: + ret = cv2.waitKey(wait_time) + + +def imshow_bboxes( + img, bboxes, colors='green', top_k=-1, thickness=1, show=True, win_name='', wait_time=0, out_file=None +): + """Draw bboxes on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (list or ndarray): A list of ndarray of shape (k, 4). + colors (list[str or tuple or Color]): A list of colors. + top_k (int): Plot the first k bboxes only if set positive. + thickness (int): Thickness of lines. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str, optional): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + img = imread(img) + img = np.ascontiguousarray(img) + + if isinstance(bboxes, np.ndarray): + bboxes = [bboxes] + if not isinstance(colors, list): + colors = [colors for _ in range(len(bboxes))] + colors = [color_val(c) for c in colors] + assert len(bboxes) == len(colors) + + for i, _bboxes in enumerate(bboxes): + _bboxes = _bboxes.astype(np.int32) + if top_k <= 0: + _top_k = _bboxes.shape[0] + else: + _top_k = min(top_k, _bboxes.shape[0]) + for j in range(_top_k): + left_top = (_bboxes[j, 0], _bboxes[j, 1]) + right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) + cv2.rectangle(img, left_top, right_bottom, colors[i], thickness=thickness) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img + + +def imshow_det_bboxes( + img, + bboxes, + labels, + class_names=None, + score_thr=0, + bbox_color='green', + text_color='green', + thickness=1, + font_scale=0.5, + show=True, + win_name='', + wait_time=0, + out_file=None, +): + """Draw bboxes and class labels (with scores) on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or + (n, 5). + labels (ndarray): Labels of bboxes. + class_names (list[str]): Names of each classes. + score_thr (float): Minimum score of bboxes to be shown. + bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. + text_color (str or tuple or :obj:`Color`): Color of texts. + thickness (int): Thickness of lines. + font_scale (float): Font scales of texts. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str or None): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + assert bboxes.ndim == 2 + assert labels.ndim == 1 + assert bboxes.shape[0] == labels.shape[0] + assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 + img = imread(img) + img = np.ascontiguousarray(img) + + if score_thr > 0: + assert bboxes.shape[1] == 5 + scores = bboxes[:, -1] + inds = scores > score_thr + bboxes = bboxes[inds, :] + labels = labels[inds] + + bbox_color = color_val(bbox_color) + text_color = color_val(text_color) + + for bbox, label in zip(bboxes, labels): + bbox_int = bbox.astype(np.int32) + left_top = (bbox_int[0], bbox_int[1]) + right_bottom = (bbox_int[2], bbox_int[3]) + cv2.rectangle(img, left_top, right_bottom, bbox_color, thickness=thickness) + label_text = class_names[label] if class_names is not None else f'cls {label}' + if len(bbox) > 4: + label_text += f'|{bbox[-1]:.02f}' + cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py new file mode 100644 index 000000000000..1954452dcda1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv/visualization/optflow.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from __future__ import division + +import numpy as np + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import rgb2bgr +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.video import flowread +from .image import imshow + + +def flowshow(flow, win_name='', wait_time=0): + """Show optical flow. + + Args: + flow (ndarray or str): The optical flow to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + flow = flowread(flow) + flow_img = flow2rgb(flow) + imshow(rgb2bgr(flow_img), win_name, wait_time) + + +def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): + """Convert flow map to RGB image. + + Args: + flow (ndarray): Array of optical flow. + color_wheel (ndarray or None): Color wheel used to map flow field to + RGB colorspace. Default color wheel will be used if not specified. + unknown_thr (str): Values above this threshold will be marked as + unknown and thus ignored. + + Returns: + ndarray: RGB image that can be visualized. + """ + assert flow.ndim == 3 and flow.shape[-1] == 2 + if color_wheel is None: + color_wheel = make_color_wheel() + assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 + num_bins = color_wheel.shape[0] + + dx = flow[:, :, 0].copy() + dy = flow[:, :, 1].copy() + + ignore_inds = np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | (np.abs(dy) > unknown_thr) + dx[ignore_inds] = 0 + dy[ignore_inds] = 0 + + rad = np.sqrt(dx ** 2 + dy ** 2) + if np.any(rad > np.finfo(float).eps): + max_rad = np.max(rad) + dx /= max_rad + dy /= max_rad + + rad = np.sqrt(dx ** 2 + dy ** 2) + angle = np.arctan2(-dy, -dx) / np.pi + + bin_real = (angle + 1) / 2 * (num_bins - 1) + bin_left = np.floor(bin_real).astype(int) + bin_right = (bin_left + 1) % num_bins + w = (bin_real - bin_left.astype(np.float32))[..., None] + flow_img = (1 - w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] + small_ind = rad <= 1 + flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) + flow_img[np.logical_not(small_ind)] *= 0.75 + + flow_img[ignore_inds, :] = 0 + + return flow_img + + +def make_color_wheel(bins=None): + """Build a color wheel. + + Args: + bins(list or tuple, optional): Specify the number of bins for each + color range, corresponding to six ranges: red -> yellow, + yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, + magenta -> red. [15, 6, 4, 11, 13, 6] is used for default + (see Middlebury). + + Returns: + ndarray: Color wheel of shape (total_bins, 3). + """ + if bins is None: + bins = [15, 6, 4, 11, 13, 6] + assert len(bins) == 6 + + RY, YG, GC, CB, BM, MR = tuple(bins) + + ry = [1, np.arange(RY) / RY, 0] + yg = [1 - np.arange(YG) / YG, 1, 0] + gc = [0, 1, np.arange(GC) / GC] + cb = [0, 1 - np.arange(CB) / CB, 1] + bm = [np.arange(BM) / BM, 0, 1] + mr = [1, 0, 1 - np.arange(MR) / MR] + + num_bins = RY + YG + GC + CB + BM + MR + + color_wheel = np.zeros((3, num_bins), dtype=np.float32) + + col = 0 + for i, color in enumerate([ry, yg, gc, cb, bm, mr]): + for j in range(3): + color_wheel[j, col : col + bins[i]] = color[j] + col += bins[i] + + return color_wheel.T diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py new file mode 100644 index 000000000000..7e0e39b03e2a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .checkpoint import load_checkpoint + +__all__ = ['load_checkpoint'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py new file mode 100644 index 000000000000..9f27d7fea454 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmcv_custom/checkpoint.py @@ -0,0 +1,477 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.nn import functional as F +from torch.optim import Optimizer +from torch.utils import model_zoo + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import FileClient +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.fileio import load as load_file +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import is_module_wrapper +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import mkdir_or_exist + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv(ENV_MMCV_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')) + ) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg + ) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [key for key in all_missing_keys if 'num_batches_tracked' not in key] + + if unexpected_keys: + err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append(f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert(0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def load_url_dist(url, model_dir=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + return checkpoint + + +def load_pavimodel_dist(model_path, map_location=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + try: + from pavi import modelcloud + except ImportError: + raise ImportError('Please install pavi to load checkpoint from modelcloud.') + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + return checkpoint + + +def load_fileclient_dist(filename, backend, map_location): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + allowed_backends = ['ceph'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + if rank == 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +def _load_checkpoint(filename, map_location=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict | OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead') + model_urls = get_torchvision_models() + model_name = filename[11:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('torchvision://'): + model_urls = get_torchvision_models() + model_name = filename[14:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('open-mmlab://'): + model_urls = get_external_models() + model_name = filename[13:] + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn( + f'open-mmlab://{model_name} is deprecated in favor ' f'of open-mmlab://{deprecated_urls[model_name]}' + ) + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_url_dist(model_url) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + elif filename.startswith('mmcls://'): + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_url_dist(model_urls[model_name]) + checkpoint = _process_mmcls_checkpoint(checkpoint) + elif filename.startswith(('http://', 'https://')): + checkpoint = load_url_dist(filename) + elif filename.startswith('pavi://'): + model_path = filename[7:] + checkpoint = load_pavimodel_dist(model_path, map_location=map_location) + elif filename.startswith('s3://'): + checkpoint = load_fileclient_dist(filename, backend='ceph', map_location=map_location) + else: + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +def load_checkpoint(model, filename, map_location='cpu', strict=False, logger=None): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError(f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # for MoBY, load model of online branch + if sorted(list(state_dict.keys()))[0].startswith('encoder'): + state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = model.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H * W: + logger.warning("Error in loading absolute_pos_embed, pass") + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) + + # interpolate position bias table if needed + relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + table_current = model.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + logger.warning(f"Error in loading {table_key}, pass") + else: + if L1 != L2: + S1 = int(L1 ** 0.5) + S2 = int(L2 ** 0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).view(1, nH1, S1, S1), size=(S2, S2), mode='bicubic' + ) + state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict(version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict(child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = {'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model))} + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + try: + from pavi import modelcloud + from pavi.exception import NodeNotFoundError + except ImportError: + raise ImportError('Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + mmcv.mkdir_or_exist(osp.dirname(filename)) + # immediately flush buffer + with open(filename, 'wb') as f: + torch.save(checkpoint, f) + f.flush() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py new file mode 100644 index 000000000000..1752e7fc7969 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/__init__.py @@ -0,0 +1,14 @@ +from .inference import inference_segmentor, init_segmentor, show_result_pyplot +from .test import multi_gpu_test, single_gpu_test +from .train import get_root_logger, set_random_seed, train_segmentor + +__all__ = [ + 'get_root_logger', + 'set_random_seed', + 'train_segmentor', + 'init_segmentor', + 'inference_segmentor', + 'multi_gpu_test', + 'single_gpu_test', + 'show_result_pyplot', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py new file mode 100644 index 000000000000..32c6db9f1ccb --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/inference.py @@ -0,0 +1,127 @@ +import matplotlib.pyplot as plt +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate, scatter +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets.pipelines import Compose +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import build_segmentor + + +def init_segmentor(config, checkpoint=None, device='cuda:0'): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, img, result, palette=None, fig_size=(15, 10), opacity=0.5, title='', block=True): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result(img, result, palette=palette, show=False, opacity=opacity) + # plt.figure(figsize=fig_size) + # plt.imshow(mmcv.bgr2rgb(img)) + # plt.title(title) + # plt.tight_layout() + # plt.show(block=block) + return mmcv.bgr2rgb(img) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py new file mode 100644 index 000000000000..961b5e0a781b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/test.py @@ -0,0 +1,217 @@ +import os.path as osp +import pickle +import shutil +import tempfile + +import numpy as np +import torch +import torch.distributed as dist + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.image import tensor2imgs +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info + + +def np2tmp(array, temp_file_name=None): + """Save ndarray to local numpy file. + + Args: + array (ndarray): Ndarray to save. + temp_file_name (str): Numpy file name. If 'temp_file_name=None', this + function will generate a file name with tempfile.NamedTemporaryFile + to save ndarray. Default: None. + + Returns: + str: The numpy file name. + """ + + if temp_file_name is None: + temp_file_name = tempfile.NamedTemporaryFile(suffix='.npy', delete=False).name + np.save(temp_file_name, array) + return temp_file_name + + +def single_gpu_test(model, data_loader, show=False, out_dir=None, efficient_test=False, opacity=0.5): + """Test with single GPU. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + show (bool): Whether show results during inference. Default: False. + out_dir (str, optional): If specified, the results will be dumped into + the directory to save output results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + + if show or out_dir: + img_tensor = data['img'][0] + img_metas = data['img_metas'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) + assert len(imgs) == len(img_metas) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + + ori_h, ori_w = img_meta['ori_shape'][:-1] + img_show = mmcv.imresize(img_show, (ori_w, ori_h)) + + if out_dir: + out_file = osp.join(out_dir, img_meta['ori_filename']) + else: + out_file = None + + model.module.show_result( + img_show, result, palette=dataset.PALETTE, show=show, out_file=out_file, opacity=opacity + ) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False, efficient_test=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' + it encodes results to gpu tensors and use gpu communication for results + collection. On cpu mode it saves the results on different gpus to 'tmpdir' + and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + if rank == 0: + batch_size = data['img'][0].size(0) + for _ in range(batch_size * world_size): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results with CPU.""" + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') + if rank == 0: + tmpdir = tempfile.mkdtemp() + tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[: len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) + part_list.append(mmcv.load(part_file)) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results with GPU.""" + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[: shape_tensor[0]] = part_tensor + part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes())) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py new file mode 100644 index 000000000000..1ed5228bcfb6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/apis/train.py @@ -0,0 +1,109 @@ +import random +import warnings + +import numpy as np +import torch + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import ( + MMDataParallel, + MMDistributedDataParallel, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import build_optimizer, build_runner +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import DistEvalHook, EvalHook +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.datasets import build_dataloader, build_dataset +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + + +def set_random_seed(seed, deterministic=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): + """Launch segmentor training.""" + logger = get_root_logger(cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + # cfg.gpus will be ignored if distributed + len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed, + drop_last=True, + ) + for ds in dataset + ] + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters, + ) + else: + model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + if cfg.get('runner') is None: + cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} + warnings.warn( + 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning + ) + + runner = build_runner( + cfg.runner, + default_args=dict( + model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta + ), + ) + + # register hooks + runner.register_training_hooks( + cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None) + ) + + # an ugly walkaround to make the .log and .log.json filenames the same + runner.timestamp = timestamp + + # register eval hooks + if validate: + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + val_dataloader = build_dataloader( + val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False + ) + eval_cfg = cfg.get('evaluation', {}) + eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' + eval_hook = DistEvalHook if distributed else EvalHook + runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py new file mode 100644 index 000000000000..965605587211 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/__init__.py @@ -0,0 +1,3 @@ +from .evaluation import * # noqa: F401, F403 +from .seg import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py new file mode 100644 index 000000000000..c77282a68a12 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/__init__.py @@ -0,0 +1,14 @@ +from .class_names import get_classes, get_palette +from .eval_hooks import DistEvalHook, EvalHook +from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou + +__all__ = [ + 'EvalHook', + 'DistEvalHook', + 'mean_dice', + 'mean_iou', + 'mean_fscore', + 'eval_metrics', + 'get_classes', + 'get_palette', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py new file mode 100644 index 000000000000..7ebbe83b1851 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/class_names.py @@ -0,0 +1,458 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', + 'sidewalk', + 'building', + 'wall', + 'fence', + 'pole', + 'traffic light', + 'traffic sign', + 'vegetation', + 'terrain', + 'sky', + 'person', + 'rider', + 'car', + 'truck', + 'bus', + 'train', + 'motorcycle', + 'bicycle', + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', + 'building', + 'sky', + 'floor', + 'tree', + 'ceiling', + 'road', + 'bed ', + 'windowpane', + 'grass', + 'cabinet', + 'sidewalk', + 'person', + 'earth', + 'door', + 'table', + 'mountain', + 'plant', + 'curtain', + 'chair', + 'car', + 'water', + 'painting', + 'sofa', + 'shelf', + 'house', + 'sea', + 'mirror', + 'rug', + 'field', + 'armchair', + 'seat', + 'fence', + 'desk', + 'rock', + 'wardrobe', + 'lamp', + 'bathtub', + 'railing', + 'cushion', + 'base', + 'box', + 'column', + 'signboard', + 'chest of drawers', + 'counter', + 'sand', + 'sink', + 'skyscraper', + 'fireplace', + 'refrigerator', + 'grandstand', + 'path', + 'stairs', + 'runway', + 'case', + 'pool table', + 'pillow', + 'screen door', + 'stairway', + 'river', + 'bridge', + 'bookcase', + 'blind', + 'coffee table', + 'toilet', + 'flower', + 'book', + 'hill', + 'bench', + 'countertop', + 'stove', + 'palm', + 'kitchen island', + 'computer', + 'swivel chair', + 'boat', + 'bar', + 'arcade machine', + 'hovel', + 'bus', + 'towel', + 'light', + 'truck', + 'tower', + 'chandelier', + 'awning', + 'streetlight', + 'booth', + 'television receiver', + 'airplane', + 'dirt track', + 'apparel', + 'pole', + 'land', + 'bannister', + 'escalator', + 'ottoman', + 'bottle', + 'buffet', + 'poster', + 'stage', + 'van', + 'ship', + 'fountain', + 'conveyer belt', + 'canopy', + 'washer', + 'plaything', + 'swimming pool', + 'stool', + 'barrel', + 'basket', + 'waterfall', + 'tent', + 'bag', + 'minibike', + 'cradle', + 'oven', + 'ball', + 'food', + 'step', + 'tank', + 'trade name', + 'microwave', + 'pot', + 'animal', + 'bicycle', + 'lake', + 'dishwasher', + 'screen', + 'blanket', + 'sculpture', + 'hood', + 'sconce', + 'vase', + 'traffic light', + 'tray', + 'ashcan', + 'fan', + 'pier', + 'crt screen', + 'plate', + 'monitor', + 'bulletin board', + 'shower', + 'radiator', + 'glass', + 'clock', + 'flag', + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', + 'aeroplane', + 'bicycle', + 'bird', + 'boat', + 'bottle', + 'bus', + 'car', + 'cat', + 'chair', + 'cow', + 'diningtable', + 'dog', + 'horse', + 'motorbike', + 'person', + 'pottedplant', + 'sheep', + 'sofa', + 'train', + 'tvmonitor', + ] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [ + [128, 64, 128], + [244, 35, 232], + [70, 70, 70], + [102, 102, 156], + [190, 153, 153], + [153, 153, 153], + [250, 170, 30], + [220, 220, 0], + [107, 142, 35], + [152, 251, 152], + [70, 130, 180], + [220, 20, 60], + [255, 0, 0], + [0, 0, 142], + [0, 0, 70], + [0, 60, 100], + [0, 80, 100], + [0, 0, 230], + [119, 11, 32], + ] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + [11, 200, 200], + [255, 82, 0], + [0, 255, 245], + [0, 61, 255], + [0, 255, 112], + [0, 255, 133], + [255, 0, 0], + [255, 163, 0], + [255, 102, 0], + [194, 255, 0], + [0, 143, 255], + [51, 255, 0], + [0, 82, 255], + [0, 255, 41], + [0, 255, 173], + [10, 0, 255], + [173, 255, 0], + [0, 255, 153], + [255, 92, 0], + [255, 0, 255], + [255, 0, 245], + [255, 0, 102], + [255, 173, 0], + [255, 0, 20], + [255, 184, 184], + [0, 31, 255], + [0, 255, 61], + [0, 71, 255], + [255, 0, 204], + [0, 255, 194], + [0, 255, 82], + [0, 10, 255], + [0, 112, 255], + [51, 0, 255], + [0, 194, 255], + [0, 122, 255], + [0, 255, 163], + [255, 153, 0], + [0, 255, 10], + [255, 112, 0], + [143, 255, 0], + [82, 0, 255], + [163, 255, 0], + [255, 235, 0], + [8, 184, 170], + [133, 0, 255], + [0, 255, 92], + [184, 0, 255], + [255, 0, 31], + [0, 184, 255], + [0, 214, 255], + [255, 0, 112], + [92, 255, 0], + [0, 224, 255], + [112, 224, 255], + [70, 184, 160], + [163, 0, 255], + [153, 0, 255], + [71, 255, 0], + [255, 0, 163], + [255, 204, 0], + [255, 0, 143], + [0, 255, 235], + [133, 255, 0], + [255, 0, 235], + [245, 0, 255], + [255, 0, 122], + [255, 245, 0], + [10, 190, 212], + [214, 255, 0], + [0, 204, 255], + [20, 0, 255], + [255, 255, 0], + [0, 153, 255], + [0, 41, 255], + [0, 255, 204], + [41, 0, 255], + [41, 255, 0], + [173, 0, 255], + [0, 245, 255], + [71, 0, 255], + [122, 0, 255], + [0, 255, 184], + [0, 92, 255], + [184, 255, 0], + [0, 133, 255], + [255, 214, 0], + [25, 194, 194], + [102, 255, 0], + [92, 0, 255], + ] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [ + [0, 0, 0], + [128, 0, 0], + [0, 128, 0], + [128, 128, 0], + [0, 0, 128], + [128, 0, 128], + [0, 128, 128], + [128, 128, 128], + [64, 0, 0], + [192, 0, 0], + [64, 128, 0], + [192, 128, 0], + [64, 0, 128], + [192, 0, 128], + [64, 128, 128], + [192, 128, 128], + [0, 64, 0], + [128, 64, 0], + [0, 192, 0], + [128, 192, 0], + [0, 64, 128], + ] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py new file mode 100644 index 000000000000..34b01f515383 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/eval_hooks.py @@ -0,0 +1,108 @@ +import os.path as osp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import DistEvalHook as _DistEvalHook +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import EvalHook as _EvalHook + + +class EvalHook(_EvalHook): + """Single GPU EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + + runner.log_buffer.clear() + results = single_gpu_test(runner.model, self.dataloader, show=False, efficient_test=self.efficient_test) + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import single_gpu_test + + runner.log_buffer.clear() + results = single_gpu_test(runner.model, self.dataloader, show=False) + self.evaluate(runner, results) + + +class DistEvalHook(_DistEvalHook): + """Distributed EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect, + efficient_test=self.efficient_test, + ) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.apis import multi_gpu_test + + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect + ) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py new file mode 100644 index 000000000000..06b9755207e1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/evaluation/metrics.py @@ -0,0 +1,297 @@ +from collections import OrderedDict + +import numpy as np +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def f_score(precision, recall, beta=1): + """calcuate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta ** 2) * (precision * recall) / ((beta ** 2 * precision) + recall) + return score + + +def intersect_and_union(pred_label, label, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray | str): Ground truth segmentation map + or label filename. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + if isinstance(label, str): + label = torch.from_numpy(mmcv.imread(label, flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = label != ignore_index + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc(intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc(pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc(label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + +def total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False +): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + ndarray: The intersection of prediction and ground truth histogram + on all classes. + ndarray: The union of prediction and ground truth histogram on all + classes. + ndarray: The prediction histogram on all classes. + ndarray: The ground truth histogram on all classes. + """ + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) + total_area_union = torch.zeros((num_classes,), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) + total_area_label = torch.zeros((num_classes,), dtype=torch.float64) + for i in range(num_imgs): + area_intersect, area_union, area_pred_label, area_label = intersect_and_union( + results[i], gt_seg_maps[i], num_classes, ignore_index, label_map, reduce_zero_label + ) + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + return total_area_intersect, total_area_union, total_area_pred_label, total_area_label + + +def mean_iou( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False +): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category IoU, shape (num_classes, ). + """ + iou_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mIoU'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + ) + return iou_result + + +def mean_dice( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False +): + """Calculate Mean Dice (mDice) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category dice, shape (num_classes, ). + """ + + dice_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mDice'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + ) + return dice_result + + +def mean_fscore( + results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None, label_map=dict(), reduce_zero_label=False, beta=1 +): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category recall, shape (num_classes, ). + ndarray: Per category precision, shape (num_classes, ). + ndarray: Per category f-score, shape (num_classes, ). + """ + fscore_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mFscore'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + beta=beta, + ) + return fscore_result + + +def eval_metrics( + results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1, +): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError('metrics {} is not supported'.format(metrics)) + + total_area_intersect, total_area_union, total_area_pred_label, total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, reduce_zero_label + ) + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / (total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor([f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = {metric: value.numpy() for metric, value in ret_metrics.items()} + if nan_to_num is not None: + ret_metrics = OrderedDict( + {metric: np.nan_to_num(metric_value, nan=nan_to_num) for metric, metric_value in ret_metrics.items()} + ) + return ret_metrics diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py new file mode 100644 index 000000000000..93bc129b685e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_pixel_sampler +from .sampler import BasePixelSampler, OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py new file mode 100644 index 000000000000..908e885cb71d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/builder.py @@ -0,0 +1,8 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg + +PIXEL_SAMPLERS = Registry('pixel sampler') + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py new file mode 100644 index 000000000000..332b242c03d1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/__init__.py @@ -0,0 +1,4 @@ +from .base_pixel_sampler import BasePixelSampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py new file mode 100644 index 000000000000..b75b1566c9f1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py @@ -0,0 +1,12 @@ +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py new file mode 100644 index 000000000000..cfab50a07df6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F + +from ..builder import PIXEL_SAMPLERS +from .base_pixel_sampler import BasePixelSampler + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super(OHEMPixelSampler, self).__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.0 + else: + losses = self.context.loss_decode( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none', + ) + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1.0 + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py new file mode 100644 index 000000000000..f2678b321c29 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .misc import add_prefix + +__all__ = ['add_prefix'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py new file mode 100644 index 000000000000..eb862a82bd47 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/core/utils/misc.py @@ -0,0 +1,17 @@ +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py new file mode 100644 index 000000000000..3612a6e86e94 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/__init__.py @@ -0,0 +1,30 @@ +from .ade import ADE20KDataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .custom import CustomDataset +from .dataset_wrappers import ConcatDataset, RepeatDataset +from .drive import DRIVEDataset +from .hrf import HRFDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .stare import STAREDataset +from .voc import PascalVOCDataset + +__all__ = [ + 'CustomDataset', + 'build_dataloader', + 'ConcatDataset', + 'RepeatDataset', + 'DATASETS', + 'build_dataset', + 'PIPELINES', + 'CityscapesDataset', + 'PascalVOCDataset', + 'ADE20KDataset', + 'PascalContextDataset', + 'PascalContextDataset59', + 'ChaseDB1Dataset', + 'DRIVEDataset', + 'HRFDataset', + 'STAREDataset', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py new file mode 100644 index 000000000000..6a69943b1ce7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/ade.py @@ -0,0 +1,322 @@ +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ADE20KDataset(CustomDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ( + 'wall', + 'building', + 'sky', + 'floor', + 'tree', + 'ceiling', + 'road', + 'bed ', + 'windowpane', + 'grass', + 'cabinet', + 'sidewalk', + 'person', + 'earth', + 'door', + 'table', + 'mountain', + 'plant', + 'curtain', + 'chair', + 'car', + 'water', + 'painting', + 'sofa', + 'shelf', + 'house', + 'sea', + 'mirror', + 'rug', + 'field', + 'armchair', + 'seat', + 'fence', + 'desk', + 'rock', + 'wardrobe', + 'lamp', + 'bathtub', + 'railing', + 'cushion', + 'base', + 'box', + 'column', + 'signboard', + 'chest of drawers', + 'counter', + 'sand', + 'sink', + 'skyscraper', + 'fireplace', + 'refrigerator', + 'grandstand', + 'path', + 'stairs', + 'runway', + 'case', + 'pool table', + 'pillow', + 'screen door', + 'stairway', + 'river', + 'bridge', + 'bookcase', + 'blind', + 'coffee table', + 'toilet', + 'flower', + 'book', + 'hill', + 'bench', + 'countertop', + 'stove', + 'palm', + 'kitchen island', + 'computer', + 'swivel chair', + 'boat', + 'bar', + 'arcade machine', + 'hovel', + 'bus', + 'towel', + 'light', + 'truck', + 'tower', + 'chandelier', + 'awning', + 'streetlight', + 'booth', + 'television receiver', + 'airplane', + 'dirt track', + 'apparel', + 'pole', + 'land', + 'bannister', + 'escalator', + 'ottoman', + 'bottle', + 'buffet', + 'poster', + 'stage', + 'van', + 'ship', + 'fountain', + 'conveyer belt', + 'canopy', + 'washer', + 'plaything', + 'swimming pool', + 'stool', + 'barrel', + 'basket', + 'waterfall', + 'tent', + 'bag', + 'minibike', + 'cradle', + 'oven', + 'ball', + 'food', + 'step', + 'tank', + 'trade name', + 'microwave', + 'pot', + 'animal', + 'bicycle', + 'lake', + 'dishwasher', + 'screen', + 'blanket', + 'sculpture', + 'hood', + 'sconce', + 'vase', + 'traffic light', + 'tray', + 'ashcan', + 'fan', + 'pier', + 'crt screen', + 'plate', + 'monitor', + 'bulletin board', + 'shower', + 'radiator', + 'glass', + 'clock', + 'flag', + ) + + PALETTE = [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + [11, 200, 200], + [255, 82, 0], + [0, 255, 245], + [0, 61, 255], + [0, 255, 112], + [0, 255, 133], + [255, 0, 0], + [255, 163, 0], + [255, 102, 0], + [194, 255, 0], + [0, 143, 255], + [51, 255, 0], + [0, 82, 255], + [0, 255, 41], + [0, 255, 173], + [10, 0, 255], + [173, 255, 0], + [0, 255, 153], + [255, 92, 0], + [255, 0, 255], + [255, 0, 245], + [255, 0, 102], + [255, 173, 0], + [255, 0, 20], + [255, 184, 184], + [0, 31, 255], + [0, 255, 61], + [0, 71, 255], + [255, 0, 204], + [0, 255, 194], + [0, 255, 82], + [0, 10, 255], + [0, 112, 255], + [51, 0, 255], + [0, 194, 255], + [0, 122, 255], + [0, 255, 163], + [255, 153, 0], + [0, 255, 10], + [255, 112, 0], + [143, 255, 0], + [82, 0, 255], + [163, 255, 0], + [255, 235, 0], + [8, 184, 170], + [133, 0, 255], + [0, 255, 92], + [184, 0, 255], + [255, 0, 31], + [0, 184, 255], + [0, 214, 255], + [255, 0, 112], + [92, 255, 0], + [0, 224, 255], + [112, 224, 255], + [70, 184, 160], + [163, 0, 255], + [153, 0, 255], + [71, 255, 0], + [255, 0, 163], + [255, 204, 0], + [255, 0, 143], + [0, 255, 235], + [133, 255, 0], + [255, 0, 235], + [245, 0, 255], + [255, 0, 122], + [255, 245, 0], + [10, 190, 212], + [214, 255, 0], + [0, 204, 255], + [20, 0, 255], + [255, 255, 0], + [0, 153, 255], + [0, 41, 255], + [0, 255, 204], + [41, 0, 255], + [41, 255, 0], + [173, 0, 255], + [0, 245, 255], + [71, 0, 255], + [122, 0, 255], + [0, 255, 184], + [0, 92, 255], + [184, 255, 0], + [0, 133, 255], + [255, 214, 0], + [25, 194, 194], + [102, 255, 0], + [92, 0, 255], + ] + + def __init__(self, **kwargs): + super(ADE20KDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', reduce_zero_label=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py new file mode 100644 index 000000000000..c076a55fe358 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/builder.py @@ -0,0 +1,172 @@ +import copy +import platform +import random +from functools import partial + +import numpy as np +from torch.utils.data import DistributedSampler + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import collate +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import get_dist_info +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry, build_from_cfg +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import ( + DataLoader, + PoolDataLoader, +) + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + hard_limit = rlimit[1] + soft_limit = min(4096, hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + +DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') + + +def _concat_dataset(cfg, default_args=None): + """Build :obj:`ConcatDataset by.""" + from .dataset_wrappers import ConcatDataset + + img_dir = cfg['img_dir'] + ann_dir = cfg.get('ann_dir', None) + split = cfg.get('split', None) + num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 + if ann_dir is not None: + num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 + else: + num_ann_dir = 0 + if split is not None: + num_split = len(split) if isinstance(split, (list, tuple)) else 1 + else: + num_split = 0 + if num_img_dir > 1: + assert num_img_dir == num_ann_dir or num_ann_dir == 0 + assert num_img_dir == num_split or num_split == 0 + else: + assert num_split == num_ann_dir or num_ann_dir <= 1 + num_dset = max(num_split, num_img_dir) + + datasets = [] + for i in range(num_dset): + data_cfg = copy.deepcopy(cfg) + if isinstance(img_dir, (list, tuple)): + data_cfg['img_dir'] = img_dir[i] + if isinstance(ann_dir, (list, tuple)): + data_cfg['ann_dir'] = ann_dir[i] + if isinstance(split, (list, tuple)): + data_cfg['split'] = split[i] + datasets.append(build_dataset(data_cfg, default_args)) + + return ConcatDataset(datasets) + + +def build_dataset(cfg, default_args=None): + """Build datasets.""" + from .dataset_wrappers import ConcatDataset, RepeatDataset + + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset(build_dataset(cfg['dataset'], default_args), cfg['times']) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance(cfg.get('split', None), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset + + +def build_dataloader( + dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + dataloader_type='PoolDataLoader', + **kwargs, +): + """Build PyTorch DataLoader. + + In distributed training, each GPU/process has a dataloader. + In non-distributed training, there is only one dataloader for all GPUs. + + Args: + dataset (Dataset): A PyTorch dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e., + batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data loading + for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed training. + dist (bool): Distributed training/test or not. Default: True. + shuffle (bool): Whether to shuffle the data at every epoch. + Default: True. + seed (int | None): Seed to be used. Default: None. + drop_last (bool): Whether to drop the last incomplete batch in epoch. + Default: False + pin_memory (bool): Whether to use pin_memory in DataLoader. + Default: True + dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' + kwargs: any keyword argument to be used to initialize DataLoader + + Returns: + DataLoader: A PyTorch dataloader. + """ + rank, world_size = get_dist_info() + if dist: + sampler = DistributedSampler(dataset, world_size, rank, shuffle=shuffle) + shuffle = False + batch_size = samples_per_gpu + num_workers = workers_per_gpu + else: + sampler = None + batch_size = num_gpus * samples_per_gpu + num_workers = num_gpus * workers_per_gpu + + init_fn = partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None + + assert dataloader_type in ('DataLoader', 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' + + if dataloader_type == 'PoolDataLoader': + dataloader = PoolDataLoader + elif dataloader_type == 'DataLoader': + dataloader = DataLoader + + data_loader = dataloader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + **kwargs, + ) + + return data_loader + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + np.random.seed(worker_seed) + random.seed(worker_seed) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py new file mode 100644 index 000000000000..906e51485f72 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/chase_db1.py @@ -0,0 +1,25 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(CustomDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(ChaseDB1Dataset, self).__init__( + img_suffix='.png', seg_map_suffix='_1stHO.png', reduce_zero_label=False, **kwargs + ) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py new file mode 100644 index 000000000000..4a8a0ecd589b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/cityscapes.py @@ -0,0 +1,241 @@ +import os.path as osp +import tempfile + +import numpy as np +from PIL import Image + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class CityscapesDataset(CustomDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + + CLASSES = ( + 'road', + 'sidewalk', + 'building', + 'wall', + 'fence', + 'pole', + 'traffic light', + 'traffic sign', + 'vegetation', + 'terrain', + 'sky', + 'person', + 'rider', + 'car', + 'truck', + 'bus', + 'train', + 'motorcycle', + 'bicycle', + ) + + PALETTE = [ + [128, 64, 128], + [244, 35, 232], + [70, 70, 70], + [102, 102, 156], + [190, 153, 153], + [153, 153, 153], + [250, 170, 30], + [220, 220, 0], + [107, 142, 35], + [152, 251, 152], + [70, 130, 180], + [220, 20, 60], + [255, 0, 0], + [0, 0, 142], + [0, 0, 70], + [0, 60, 100], + [0, 80, 100], + [0, 0, 230], + [119, 11, 32], + ] + + def __init__(self, **kwargs): + super(CityscapesDataset, self).__init__( + img_suffix='_leftImg8bit.png', seg_map_suffix='_gtFine_labelTrainIds.png', **kwargs + ) + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + import cityscapesscripts.helpers.labels as CSLabels + + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy + + def results2img(self, results, imgfile_prefix, to_label_id): + """Write the segmentation results to images. + + Args: + results (list[list | tuple | ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + prog_bar = mmcv.ProgressBar(len(self)) + for idx in range(len(self)): + result = results[idx] + if to_label_id: + result = self._convert_to_label_id(result) + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + output = Image.fromarray(result.astype(np.uint8)).convert('P') + import cityscapesscripts.helpers.labels as CSLabels + + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) + for label_id, label in CSLabels.id2label.items(): + palette[label_id] = label.color + + output.putpalette(palette) + output.save(png_filename) + result_files.append(png_filename) + prog_bar.update() + + return result_files + + def format_results(self, results, imgfile_prefix=None, to_label_id=True): + """Format the results into dir (standard format for Cityscapes + evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str | None): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". If not specified, a temp file will be created. + Default: None. + to_label_id (bool): whether convert output to label_id for + submission. Default: False + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + + assert isinstance(results, list), 'results must be a list' + assert len(results) == len(self), ( + 'The length of results is not equal to the dataset len: ' f'{len(results)} != {len(self)}' + ) + + if imgfile_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + imgfile_prefix = tmp_dir.name + else: + tmp_dir = None + result_files = self.results2img(results, imgfile_prefix, to_label_id) + + return result_files, tmp_dir + + def evaluate(self, results, metric='mIoU', logger=None, imgfile_prefix=None, efficient_test=False): + """Evaluation in Cityscapes/default protocol. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file, + for cityscapes evaluation only. It includes the file path and + the prefix of filename, e.g., "a/b/prefix". + If results are evaluated with cityscapes protocol, it would be + the prefix of output png files. The output files would be + png images under folder "a/b/prefix/xxx.png", where "xxx" is + the image name of cityscapes. If not specified, a temp file + will be created for evaluation. + Default: None. + + Returns: + dict[str, float]: Cityscapes/default metrics. + """ + + eval_results = dict() + metrics = metric.copy() if isinstance(metric, list) else [metric] + if 'cityscapes' in metrics: + eval_results.update(self._evaluate_cityscapes(results, logger, imgfile_prefix)) + metrics.remove('cityscapes') + if len(metrics) > 0: + eval_results.update(super(CityscapesDataset, self).evaluate(results, metrics, logger, efficient_test)) + + return eval_results + + def _evaluate_cityscapes(self, results, logger, imgfile_prefix): + """Evaluation in Cityscapes protocol. + + Args: + results (list): Testing results of the dataset. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + try: + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + except ImportError: + raise ImportError('Please run "pip install cityscapesscripts" to ' 'install cityscapesscripts first.') + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + result_files, tmp_dir = self.format_results(results, imgfile_prefix) + + if tmp_dir is None: + result_dir = imgfile_prefix + else: + result_dir = tmp_dir.name + + eval_results = dict() + print_log(f'Evaluating results under {result_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + seg_map_list = [] + pred_list = [] + + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + for seg_map in mmcv.scandir(self.ann_dir, 'gtFine_labelIds.png', recursive=True): + seg_map_list.append(osp.join(self.ann_dir, seg_map)) + pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + + eval_results.update(CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + + if tmp_dir is not None: + tmp_dir.cleanup() + + return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py new file mode 100644 index 000000000000..28680a832ca5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/custom.py @@ -0,0 +1,392 @@ +import os +import os.path as osp +from collections import OrderedDict +from functools import reduce + +import numpy as np +from prettytable import PrettyTable +from torch.utils.data import Dataset + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import print_log +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import eval_metrics +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from .builder import DATASETS +from .pipelines import Compose + + +@DATASETS.register_module() +class CustomDataset(Dataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of CustomDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/tutorials/new_dataset.md`` for more details. + + + Args: + pipeline (list[dict]): Processing pipeline + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. Default: '.jpg' + ann_dir (str, optional): Path to annotation directory. Default: None + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + split (str, optional): Split txt file. If split is specified, only + file with suffix in the splits will be loaded. Otherwise, all + images in img_dir/ann_dir will be loaded. Default: None + data_root (str, optional): Data root for img_dir/ann_dir. Default: + None. + test_mode (bool): If test_mode=True, gt wouldn't be loaded. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default: False + classes (str | Sequence[str], optional): Specify classes to load. + If is None, ``cls.CLASSES`` will be used. Default: None. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, and + self.PALETTE is None, random palette will be generated. + Default: None + """ + + CLASSES = None + + PALETTE = None + + def __init__( + self, + pipeline, + img_dir, + img_suffix='.jpg', + ann_dir=None, + seg_map_suffix='.png', + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False, + classes=None, + palette=None, + ): + self.pipeline = Compose(pipeline) + self.img_dir = img_dir + self.img_suffix = img_suffix + self.ann_dir = ann_dir + self.seg_map_suffix = seg_map_suffix + self.split = split + self.data_root = data_root + self.test_mode = test_mode + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.label_map = None + self.CLASSES, self.PALETTE = self.get_classes_and_palette(classes, palette) + + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.img_dir): + self.img_dir = osp.join(self.data_root, self.img_dir) + if not (self.ann_dir is None or osp.isabs(self.ann_dir)): + self.ann_dir = osp.join(self.data_root, self.ann_dir) + if not (self.split is None or osp.isabs(self.split)): + self.split = osp.join(self.data_root, self.split) + + # load annotations + self.img_infos = self.load_annotations( + self.img_dir, self.img_suffix, self.ann_dir, self.seg_map_suffix, self.split + ) + + def __len__(self): + """Total number of samples of data.""" + return len(self.img_infos) + + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, split): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos + + def get_ann_info(self, idx): + """Get annotation by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + return self.img_infos[idx]['ann'] + + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + results['seg_fields'] = [] + results['img_prefix'] = self.img_dir + results['seg_prefix'] = self.ann_dir + if self.custom_classes: + results['label_map'] = self.label_map + + def __getitem__(self, idx): + """Get training/test data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training/test data (with annotation if `test_mode` is set + False). + """ + + if self.test_mode: + return self.prepare_test_img(idx) + else: + return self.prepare_train_img(idx) + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys + introduced by pipeline. + """ + + img_info = self.img_infos[idx] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def prepare_test_img(self, idx): + """Get testing data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Testing data after pipeline with new keys introduced by + pipeline. + """ + + img_info = self.img_infos[idx] + results = dict(img_info=img_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def format_results(self, results, **kwargs): + """Place holder to format result to dataset specific output.""" + + def get_gt_seg_maps(self, efficient_test=False): + """Get ground truth segmentation maps for evaluation.""" + gt_seg_maps = [] + for img_info in self.img_infos: + seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map']) + if efficient_test: + gt_seg_map = seg_map + else: + gt_seg_map = mmcv.imread(seg_map, flag='unchanged', backend='pillow') + gt_seg_maps.append(gt_seg_map) + return gt_seg_maps + + def get_classes_and_palette(self, classes=None, palette=None): + """Get class names of current dataset. + + Args: + classes (Sequence[str] | str | None): If classes is None, use + default CLASSES defined by builtin dataset. If classes is a + string, take it as a file name. The file contains the name of + classes where each line contains one class name. If classes is + a tuple or list, override the CLASSES defined by the dataset. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, random + palette will be generated. Default: None + """ + if classes is None: + self.custom_classes = False + return self.CLASSES, self.PALETTE + + self.custom_classes = True + if isinstance(classes, str): + # take it as a file path + class_names = mmcv.list_from_file(classes) + elif isinstance(classes, (tuple, list)): + class_names = classes + else: + raise ValueError(f'Unsupported type {type(classes)} of classes.') + + if self.CLASSES: + if not set(classes).issubset(self.CLASSES): + raise ValueError('classes is not a subset of CLASSES.') + + # dictionary, its keys are the old label ids and its values + # are the new label ids. + # used for changing pixel labels in load_annotations. + self.label_map = {} + for i, c in enumerate(self.CLASSES): + if c not in class_names: + self.label_map[i] = -1 + else: + self.label_map[i] = classes.index(c) + + palette = self.get_palette_for_custom_classes(class_names, palette) + + return class_names, palette + + def get_palette_for_custom_classes(self, class_names, palette=None): + + if self.label_map is not None: + # return subset of palette + palette = [] + for old_id, new_id in sorted(self.label_map.items(), key=lambda x: x[1]): + if new_id != -1: + palette.append(self.PALETTE[old_id]) + palette = type(self.PALETTE)(palette) + + elif palette is None: + if self.PALETTE is None: + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + else: + palette = self.PALETTE + + return palette + + def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): + """Evaluate the dataset. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. 'mIoU', + 'mDice' and 'mFscore' are supported. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + + Returns: + dict[str, float]: Default metrics. + """ + + if isinstance(metric, str): + metric = [metric] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metric).issubset(set(allowed_metrics)): + raise KeyError('metric {} is not supported'.format(metric)) + eval_results = {} + gt_seg_maps = self.get_gt_seg_maps(efficient_test) + if self.CLASSES is None: + num_classes = len(reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) + else: + num_classes = len(self.CLASSES) + ret_metrics = eval_metrics( + results, + gt_seg_maps, + num_classes, + self.ignore_index, + metric, + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label, + ) + + if self.CLASSES is None: + class_names = tuple(range(num_classes)) + else: + class_names = self.CLASSES + + # summary table + ret_metrics_summary = OrderedDict( + { + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + } + ) + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict( + {ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items()} + ) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + + # for logger + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + summary_table_data = PrettyTable() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + summary_table_data.add_column(key, [val]) + else: + summary_table_data.add_column('m' + key, [val]) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + print_log('Summary:', logger) + print_log('\n' + summary_table_data.get_string(), logger=logger) + + # each metric dict + for key, value in ret_metrics_summary.items(): + if key == 'aAcc': + eval_results[key] = value / 100.0 + else: + eval_results['m' + key] = value / 100.0 + + ret_metrics_class.pop('Class', None) + for key, value in ret_metrics_class.items(): + eval_results.update({key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names)}) + + if mmcv.is_list_of(results, str): + for file_name in results: + os.remove(file_name) + return eval_results diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 000000000000..d6a5e957ec3b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,50 @@ +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + +from .builder import DATASETS + + +@DATASETS.register_module() +class ConcatDataset(_ConcatDataset): + """A wrapper of concatenated dataset. + + Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but + concat the group flag for image aspect ratio. + + Args: + datasets (list[:obj:`Dataset`]): A list of datasets. + """ + + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + self.CLASSES = datasets[0].CLASSES + self.PALETTE = datasets[0].PALETTE + + +@DATASETS.register_module() +class RepeatDataset(object): + """A wrapper of repeated dataset. + + The length of repeated dataset will be `times` larger than the original + dataset. This is useful when the data loading time is long but the dataset + is small. Using RepeatDataset can reduce the data loading time between + epochs. + + Args: + dataset (:obj:`Dataset`): The dataset to be repeated. + times (int): Repeat times. + """ + + def __init__(self, dataset, times): + self.dataset = dataset + self.times = times + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self._ori_len = len(self.dataset) + + def __getitem__(self, idx): + """Get item from original dataset.""" + return self.dataset[idx % self._ori_len] + + def __len__(self): + """The length is multiplied by ``times``""" + return self.times * self._ori_len diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py new file mode 100644 index 000000000000..9cb073329ef0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/drive.py @@ -0,0 +1,25 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class DRIVEDataset(CustomDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(DRIVEDataset, self).__init__( + img_suffix='.png', seg_map_suffix='_manual1.png', reduce_zero_label=False, **kwargs + ) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py new file mode 100644 index 000000000000..b67616f5f58f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/hrf.py @@ -0,0 +1,23 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class HRFDataset(CustomDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(HRFDataset, self).__init__(img_suffix='.png', seg_map_suffix='.png', reduce_zero_label=False, **kwargs) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py new file mode 100644 index 000000000000..35028ac9b15a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pascal_context.py @@ -0,0 +1,294 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ( + 'background', + 'aeroplane', + 'bag', + 'bed', + 'bedclothes', + 'bench', + 'bicycle', + 'bird', + 'boat', + 'book', + 'bottle', + 'building', + 'bus', + 'cabinet', + 'car', + 'cat', + 'ceiling', + 'chair', + 'cloth', + 'computer', + 'cow', + 'cup', + 'curtain', + 'dog', + 'door', + 'fence', + 'floor', + 'flower', + 'food', + 'grass', + 'ground', + 'horse', + 'keyboard', + 'light', + 'motorbike', + 'mountain', + 'mouse', + 'person', + 'plate', + 'platform', + 'pottedplant', + 'road', + 'rock', + 'sheep', + 'shelves', + 'sidewalk', + 'sign', + 'sky', + 'snow', + 'sofa', + 'table', + 'track', + 'train', + 'tree', + 'truck', + 'tvmonitor', + 'wall', + 'water', + 'window', + 'wood', + ) + + PALETTE = [ + [120, 120, 120], + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + ] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=False, **kwargs + ) + assert osp.exists(self.img_dir) and self.split is not None + + +@DATASETS.register_module() +class PascalContextDataset59(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ( + 'aeroplane', + 'bag', + 'bed', + 'bedclothes', + 'bench', + 'bicycle', + 'bird', + 'boat', + 'book', + 'bottle', + 'building', + 'bus', + 'cabinet', + 'car', + 'cat', + 'ceiling', + 'chair', + 'cloth', + 'computer', + 'cow', + 'cup', + 'curtain', + 'dog', + 'door', + 'fence', + 'floor', + 'flower', + 'food', + 'grass', + 'ground', + 'horse', + 'keyboard', + 'light', + 'motorbike', + 'mountain', + 'mouse', + 'person', + 'plate', + 'platform', + 'pottedplant', + 'road', + 'rock', + 'sheep', + 'shelves', + 'sidewalk', + 'sign', + 'sky', + 'snow', + 'sofa', + 'table', + 'track', + 'train', + 'tree', + 'truck', + 'tvmonitor', + 'wall', + 'water', + 'window', + 'wood', + ) + + PALETTE = [ + [180, 120, 120], + [6, 230, 230], + [80, 50, 50], + [4, 200, 3], + [120, 120, 80], + [140, 140, 140], + [204, 5, 255], + [230, 230, 230], + [4, 250, 7], + [224, 5, 255], + [235, 255, 7], + [150, 5, 61], + [120, 120, 70], + [8, 255, 51], + [255, 6, 82], + [143, 255, 140], + [204, 255, 4], + [255, 51, 7], + [204, 70, 3], + [0, 102, 200], + [61, 230, 250], + [255, 6, 51], + [11, 102, 255], + [255, 7, 71], + [255, 9, 224], + [9, 7, 230], + [220, 220, 220], + [255, 9, 92], + [112, 9, 255], + [8, 255, 214], + [7, 255, 224], + [255, 184, 6], + [10, 255, 71], + [255, 41, 10], + [7, 255, 255], + [224, 255, 8], + [102, 8, 255], + [255, 61, 6], + [255, 194, 7], + [255, 122, 8], + [0, 255, 20], + [255, 8, 41], + [255, 5, 153], + [6, 51, 255], + [235, 12, 255], + [160, 150, 20], + [0, 163, 255], + [140, 140, 140], + [250, 10, 15], + [20, 255, 0], + [31, 255, 0], + [255, 31, 0], + [255, 224, 0], + [153, 255, 0], + [0, 0, 255], + [255, 71, 0], + [0, 235, 255], + [0, 173, 255], + [31, 0, 255], + ] + + def __init__(self, split, **kwargs): + super(PascalContextDataset59, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, reduce_zero_label=True, **kwargs + ) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py new file mode 100644 index 000000000000..52eb533242b3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/__init__.py @@ -0,0 +1,43 @@ +from .compose import Compose +from .formating import Collect, ImageToTensor, ToDataContainer, ToTensor, Transpose, to_tensor +from .loading import LoadAnnotations, LoadImageFromFile +from .test_time_aug import MultiScaleFlipAug +from .transforms import ( + CLAHE, + AdjustGamma, + Normalize, + Pad, + PhotoMetricDistortion, + RandomCrop, + RandomFlip, + RandomRotate, + Rerange, + Resize, + RGB2Gray, + SegRescale, +) + +__all__ = [ + 'Compose', + 'to_tensor', + 'ToTensor', + 'ImageToTensor', + 'ToDataContainer', + 'Transpose', + 'Collect', + 'LoadAnnotations', + 'LoadImageFromFile', + 'MultiScaleFlipAug', + 'Resize', + 'RandomFlip', + 'Pad', + 'RandomCrop', + 'Normalize', + 'SegRescale', + 'PhotoMetricDistortion', + 'RandomRotate', + 'AdjustGamma', + 'CLAHE', + 'Rerange', + 'RGB2Gray', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py new file mode 100644 index 000000000000..c3b11a9870a5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/compose.py @@ -0,0 +1,51 @@ +import collections + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import build_from_cfg + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Compose(object): + """Compose multiple transforms sequentially. + + Args: + transforms (Sequence[dict | callable]): Sequence of transform object or + config dict to be composed. + """ + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + """Call function to apply transforms sequentially. + + Args: + data (dict): A result dict contains the data to transform. + + Returns: + dict: Transformed data. + """ + + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py new file mode 100644 index 000000000000..e5222a69bec6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/formating.py @@ -0,0 +1,294 @@ +from collections.abc import Sequence + +import numpy as np +import torch + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + + Args: + data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to + be converted. + """ + + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor(object): + """Convert some results to :obj:`torch.Tensor` by given keys. + + Args: + keys (Sequence[str]): Keys that need to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert data in results to :obj:`torch.Tensor`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted + to :obj:`torch.Tensor`. + """ + + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class ImageToTensor(object): + """Convert image to :obj:`torch.Tensor` by given keys. + + The dimension order of input image is (H, W, C). The pipeline will convert + it to (C, H, W). If only 2 dimension (H, W) is given, the output would be + (1, H, W). + + Args: + keys (Sequence[str]): Key of images to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + img = results[key] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + results[key] = to_tensor(img.transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose(object): + """Transpose some results by given keys. + + Args: + keys (Sequence[str]): Keys of results to be transposed. + order (Sequence[int]): Order of transpose. + """ + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys}, order={self.order})' + + +@PIPELINES.register_module() +class ToDataContainer(object): + """Convert results to :obj:`mmcv.DataContainer` by given fields. + + Args: + fields (Sequence[dict]): Each field is a dict like + ``dict(key='xxx', **kwargs)``. The ``key`` in result will + be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. + Default: ``(dict(key='img', stack=True), + dict(key='gt_semantic_seg'))``. + """ + + def __init__(self, fields=(dict(key='img', stack=True), dict(key='gt_semantic_seg'))): + self.fields = fields + + def __call__(self, results): + """Call function to convert data in results to + :obj:`mmcv.DataContainer`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted to + :obj:`mmcv.DataContainer`. + """ + + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img" + and "gt_semantic_seg". These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + """Call function to transform and format common fields in results. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data that is formatted with + default bundle. + """ + + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + if 'gt_semantic_seg' in results: + # convert to long + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, ...].astype(np.int64)), stack=True + ) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module() +class Collect(object): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "gt_semantic_seg". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg')`` + """ + + def __init__( + self, + keys, + meta_keys=( + 'filename', + 'ori_filename', + 'ori_shape', + 'img_shape', + 'pad_shape', + 'scale_factor', + 'flip', + 'flip_direction', + 'img_norm_cfg', + ), + ): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + """Call function to collect keys in results. The keys in ``meta_keys`` + will be converted to :obj:mmcv.DataContainer. + + Args: + results (dict): Result dict contains the data to collect. + + Returns: + dict: The result dict contains the following keys + - keys in``self.keys`` + - ``img_metas`` + """ + + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_metas'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py new file mode 100644 index 000000000000..5d2e2a51a1bf --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/loading.py @@ -0,0 +1,145 @@ +import os.path as osp + +import numpy as np + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class LoadImageFromFile(object): + """Load an image from file. + + Required keys are "img_prefix" and "img_info" (a dict that must contain the + key "filename"). Added or updated keys are "filename", "img", "img_shape", + "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), + "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + color_type (str): The flag argument for :func:`mmcv.imfrombytes`. + Defaults to 'color'. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'cv2' + """ + + def __init__( + self, to_float32=False, color_type='color', file_client_args=dict(backend='disk'), imdecode_backend='cv2' + ): + self.to_float32 = to_float32 + self.color_type = color_type + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call functions to load image and get image meta information. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('img_prefix') is not None: + filename = osp.join(results['img_prefix'], results['img_info']['filename']) + else: + filename = results['img_info']['filename'] + img_bytes = self.file_client.get(filename) + img = mmcv.imfrombytes(img_bytes, flag=self.color_type, backend=self.imdecode_backend) + if self.to_float32: + img = img.astype(np.float32) + + results['filename'] = filename + results['ori_filename'] = results['img_info']['filename'] + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + num_channels = 1 if len(img.shape) < 3 else img.shape[2] + results['img_norm_cfg'] = dict( + mean=np.zeros(num_channels, dtype=np.float32), std=np.ones(num_channels, dtype=np.float32), to_rgb=False + ) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(to_float32={self.to_float32},' + repr_str += f"color_type='{self.color_type}'," + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str + + +@PIPELINES.register_module() +class LoadAnnotations(object): + """Load annotations for semantic segmentation. + + Args: + reduce_zero_label (bool): Whether reduce all label value by 1. + Usually used for datasets where 0 is background label. + Default: False. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'pillow' + """ + + def __init__(self, reduce_zero_label=False, file_client_args=dict(backend='disk'), imdecode_backend='pillow'): + self.reduce_zero_label = reduce_zero_label + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call function to load multiple types annotations. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('seg_prefix', None) is not None: + filename = osp.join(results['seg_prefix'], results['ann_info']['seg_map']) + else: + filename = results['ann_info']['seg_map'] + img_bytes = self.file_client.get(filename) + gt_semantic_seg = ( + mmcv.imfrombytes(img_bytes, flag='unchanged', backend=self.imdecode_backend).squeeze().astype(np.uint8) + ) + # modify if custom classes + if results.get('label_map', None) is not None: + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg == old_id] = new_id + # reduce zero_label + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + results['gt_semantic_seg'] = gt_semantic_seg + results['seg_fields'].append('gt_semantic_seg') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label},' + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py new file mode 100644 index 000000000000..e8675fb4c872 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/test_time_aug.py @@ -0,0 +1,118 @@ +import warnings + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from ..builder import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module() +class MultiScaleFlipAug(object): + """Test-time augmentation with multiple scales and flipping. + + An example configuration is as followed: + + .. code-block:: + + img_scale=(2048, 1024), + img_ratios=[0.5, 1.0], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ] + + After MultiScaleFLipAug with above configuration, the results are wrapped + into lists of the same length as followed: + + .. code-block:: + + dict( + img=[...], + img_shape=[...], + scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] + flip=[False, True, False, True] + ... + ) + + Args: + transforms (list[dict]): Transforms to apply in each augmentation. + img_scale (None | tuple | list[tuple]): Images scales for resizing. + img_ratios (float | list[float]): Image ratios for resizing + flip (bool): Whether apply flip augmentation. Default: False. + flip_direction (str | list[str]): Flip augmentation directions, + options are "horizontal" and "vertical". If flip_direction is list, + multiple flip augmentations will be applied. + It has no effect when flip == False. Default: "horizontal". + """ + + def __init__(self, transforms, img_scale, img_ratios=None, flip=False, flip_direction='horizontal'): + self.transforms = Compose(transforms) + if img_ratios is not None: + img_ratios = img_ratios if isinstance(img_ratios, list) else [img_ratios] + assert mmcv.is_list_of(img_ratios, float) + if img_scale is None: + # mode 1: given img_scale=None and a range of image ratio + self.img_scale = None + assert mmcv.is_list_of(img_ratios, float) + elif isinstance(img_scale, tuple) and mmcv.is_list_of(img_ratios, float): + assert len(img_scale) == 2 + # mode 2: given a scale and a range of image ratio + self.img_scale = [(int(img_scale[0] * ratio), int(img_scale[1] * ratio)) for ratio in img_ratios] + else: + # mode 3: given multiple scales + self.img_scale = img_scale if isinstance(img_scale, list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None + self.flip = flip + self.img_ratios = img_ratios + self.flip_direction = flip_direction if isinstance(flip_direction, list) else [flip_direction] + assert mmcv.is_list_of(self.flip_direction, str) + if not self.flip and self.flip_direction != ['horizontal']: + warnings.warn('flip_direction has no effect when flip is set to False') + if self.flip and not any([t['type'] == 'RandomFlip' for t in transforms]): + warnings.warn('flip has no effect when RandomFlip is not in transforms') + + def __call__(self, results): + """Call function to apply test time augment transforms on results. + + Args: + results (dict): Result dict contains the data to transform. + + Returns: + dict[str: list]: The augmented data, where each value is wrapped + into a list. + """ + + aug_data = [] + if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): + h, w = results['img'].shape[:2] + img_scale = [(int(w * ratio), int(h * ratio)) for ratio in self.img_ratios] + else: + img_scale = self.img_scale + flip_aug = [False, True] if self.flip else [False] + for scale in img_scale: + for flip in flip_aug: + for direction in self.flip_direction: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + _results['flip_direction'] = direction + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(transforms={self.transforms}, ' + repr_str += f'img_scale={self.img_scale}, flip={self.flip})' + repr_str += f'flip_direction={self.flip_direction}' + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py new file mode 100644 index 000000000000..12bf591cff32 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/pipelines/transforms.py @@ -0,0 +1,839 @@ +import numpy as np +from numpy import random + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import deprecated_api_warning, is_tuple_of + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Resize(object): + """Resize images & seg. + + This transform resizes the input image to some scale. If the input dict + contains the key "scale", then the scale in the input dict is used, + otherwise the specified scale in the init method is used. + + ``img_scale`` can be None, a tuple (single-scale) or a list of tuple + (multi-scale). There are 4 multiscale modes: + + - ``ratio_range is not None``: + 1. When img_scale is None, img_scale is the shape of image in results + (img_scale = results['img'].shape[:2]) and the image is resized based + on the original size. (mode 1) + 2. When img_scale is a tuple (single-scale), randomly sample a ratio from + the ratio range and multiply it with the image scale. (mode 2) + + - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a + scale from the a range. (mode 3) + + - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a + scale from multiple scales. (mode 4) + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + multiscale_mode (str): Either "range" or "value". + ratio_range (tuple[float]): (min_ratio, max_ratio) + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. + """ + + def __init__(self, img_scale=None, multiscale_mode='range', ratio_range=None, keep_ratio=True): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + + Args: + img_scales (list[tuple]): Images scales for selection. + + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and upper bound of image scales. + + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint(min(img_scale_long), max(img_scale_long) + 1) + short_edge = np.random.randint(min(img_scale_short), max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + + Args: + results (dict): Result dict from :obj:`dataset`. + + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio(self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale(results[key], results['scale'], interpolation='nearest') + else: + gt_seg = mmcv.imresize(results[key], results['scale'], interpolation='nearest') + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ( + f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})' + ) + return repr_str + + +@PIPELINES.register_module() +class RandomFlip(object): + """Flip the image & seg. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + prob (float, optional): The flipping probability. Default: None. + direction(str, optional): The flipping direction. Options are + 'horizontal' and 'vertical'. Default: 'horizontal'. + """ + + @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') + def __init__(self, prob=None, direction='horizontal'): + self.prob = prob + self.direction = direction + if prob is not None: + assert prob >= 0 and prob <= 1 + assert direction in ['horizontal', 'vertical'] + + def __call__(self, results): + """Call function to flip bounding boxes, masks, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Flipped results, 'flip', 'flip_direction' keys are added into + result dict. + """ + + if 'flip' not in results: + flip = True if np.random.rand() < self.prob else False + results['flip'] = flip + if 'flip_direction' not in results: + results['flip_direction'] = self.direction + if results['flip']: + # flip image + results['img'] = mmcv.imflip(results['img'], direction=results['flip_direction']) + + # flip segs + for key in results.get('seg_fields', []): + # use copy() to make numpy stride positive + results[key] = mmcv.imflip(results[key], direction=results['flip_direction']).copy() + return results + + def __repr__(self): + return self.__class__.__name__ + f'(prob={self.prob})' + + +@PIPELINES.register_module() +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + """ + + def __init__(self, size=None, size_divisor=None, pad_val=0, seg_pad_val=255): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + """Pad images according to ``self.size``.""" + if self.size is not None: + padded_img = mmcv.impad(results['img'], shape=self.size, pad_val=self.pad_val) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple(results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_seg(self, results): + """Pad masks according to ``results['pad_shape']``.""" + for key in results.get('seg_fields', []): + results[key] = mmcv.impad(results[key], shape=results['pad_shape'][:2], pad_val=self.seg_pad_val) + + def __call__(self, results): + """Call function to pad images, masks, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + + self._pad_img(results) + self._pad_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' f'pad_val={self.pad_val})' + return repr_str + + +@PIPELINES.register_module() +class Normalize(object): + """Normalize the image. + + Added key is "img_norm_cfg". + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + """Call function to normalize images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Normalized results, 'img_norm_cfg' key is added into + result dict. + """ + + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, self.to_rgb) + results['img_norm_cfg'] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' f'{self.to_rgb})' + return repr_str + + +@PIPELINES.register_module() +class Rerange(object): + """Rerange the image pixel value. + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def __call__(self, results): + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@PIPELINES.register_module() +class CLAHE(object): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def __call__(self, results): + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), self.clip_limit, self.tile_grid_size + ) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, ' f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@PIPELINES.register_module() +class RandomCrop(object): + """Random crop the image & seg. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + cat_max_ratio (float): The maximum ratio that single category could + occupy. + """ + + def __init__(self, crop_size, cat_max_ratio=1.0, ignore_index=255): + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + def get_crop_bbox(self, img): + """Randomly get a crop bounding box.""" + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + def crop(self, img, crop_bbox): + """Crop from ``img``""" + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def __call__(self, results): + """Call function to randomly crop images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.get_crop_bbox(img) + if self.cat_max_ratio < 1.0: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.cat_max_ratio: + break + crop_bbox = self.get_crop_bbox(img) + + # crop the image + img = self.crop(img, crop_bbox) + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@PIPELINES.register_module() +class RandomRotate(object): + """Rotate the image & seg. + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, prob, degree, pad_val=0, seg_pad_val=255, center=None, auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + def __call__(self, results): + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate = True if np.random.rand() < self.prob else False + degree = np.random.uniform(min(*self.degree), max(*self.degree)) + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], angle=degree, border_value=self.pal_val, center=self.center, auto_bound=self.auto_bound + ) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest', + ) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ( + f'(prob={self.prob}, ' + f'degree={self.degree}, ' + f'pad_val={self.pal_val}, ' + f'seg_pad_val={self.seg_pad_val}, ' + f'center={self.center}, ' + f'auto_bound={self.auto_bound})' + ) + return repr_str + + +@PIPELINES.register_module() +class RGB2Gray(object): + """Convert RGB image to grayscale image. + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def __call__(self, results): + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' f'weights={self.weights})' + return repr_str + + +@PIPELINES.register_module() +class AdjustGamma(object): + """Using gamma correction to process the image. + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0) ** inv_gamma * 255 for i in np.arange(256)]).astype('uint8') + + def __call__(self, results): + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform(np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@PIPELINES.register_module() +class SegRescale(object): + """Rescale semantic segmentation maps. + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale(results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@PIPELINES.register_module() +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, img, alpha=1, beta=0): + """Multiple with alpha and add beat with clip.""" + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img): + """Brightness distortion.""" + if random.randint(2): + return self.convert(img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)) + return img + + def contrast(self, img): + """Contrast distortion.""" + if random.randint(2): + return self.convert(img, alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img): + """Saturation distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], alpha=random.uniform(self.saturation_lower, self.saturation_upper) + ) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img): + """Hue distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 0] = (img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def __call__(self, results): + """Call function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ( + f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})' + ) + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py new file mode 100644 index 000000000000..a94d01763980 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/stare.py @@ -0,0 +1,25 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class STAREDataset(CustomDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(STAREDataset, self).__init__( + img_suffix='.png', seg_map_suffix='.ah.png', reduce_zero_label=False, **kwargs + ) + assert osp.exists(self.img_dir) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py new file mode 100644 index 000000000000..5fd6641b33e1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/datasets/voc.py @@ -0,0 +1,65 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalVOCDataset(CustomDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ( + 'background', + 'aeroplane', + 'bicycle', + 'bird', + 'boat', + 'bottle', + 'bus', + 'car', + 'cat', + 'chair', + 'cow', + 'diningtable', + 'dog', + 'horse', + 'motorbike', + 'person', + 'pottedplant', + 'sheep', + 'sofa', + 'train', + 'tvmonitor', + ) + + PALETTE = [ + [0, 0, 0], + [128, 0, 0], + [0, 128, 0], + [128, 128, 0], + [0, 0, 128], + [128, 0, 128], + [0, 128, 128], + [128, 128, 128], + [64, 0, 0], + [192, 0, 0], + [64, 128, 0], + [192, 128, 0], + [64, 0, 128], + [192, 0, 128], + [64, 128, 128], + [192, 128, 128], + [0, 64, 0], + [128, 64, 0], + [0, 192, 0], + [128, 192, 0], + [0, 64, 128], + ] + + def __init__(self, split, **kwargs): + super(PascalVOCDataset, self).__init__(img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py new file mode 100644 index 000000000000..130f6c12914e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/__init__.py @@ -0,0 +1,17 @@ +from .backbones import * # noqa: F401,F403 +from .builder import BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, build_head, build_loss, build_segmentor +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', + 'HEADS', + 'LOSSES', + 'SEGMENTORS', + 'build_backbone', + 'build_head', + 'build_loss', + 'build_segmentor', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py new file mode 100644 index 000000000000..ceb46fade97e --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/__init__.py @@ -0,0 +1,27 @@ +from .cgnet import CGNet + +# from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .unet import UNet +from .uniformer import UniFormer +from .vit import VisionTransformer + +__all__ = [ + 'ResNet', + 'ResNetV1c', + 'ResNetV1d', + 'ResNeXt', + 'HRNet', + 'ResNeSt', + 'MobileNetV2', + 'UNet', + 'CGNet', + 'MobileNetV3', + 'VisionTransformer', + 'UniFormer', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py new file mode 100644 index 000000000000..37a147de274d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/cgnet.py @@ -0,0 +1,362 @@ +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + build_conv_layer, + build_norm_layer, + constant_init, + kaiming_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super(GlobalContextExtractor, self).__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid(), + ) + + def forward(self, x): + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__( + self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False, + ): + super(ContextGuidedBlock, self).__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, channels, kernel_size, stride, padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) + + self.f_loc = build_conv_layer( + conv_cfg, channels, channels, kernel_size=3, padding=1, groups=channels, bias=False + ) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False, + ) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer(conv_cfg, 2 * channels, out_channels, kernel_size=1, bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super(InputInjection, self).__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@BACKBONES.register_module() +class CGNet(nn.Module): + """CGNet backbone. + + A Light-weight Context Guided Network for Semantic Segmentation + arXiv: https://arxiv.org/abs/1811.08201 + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__( + self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False, + ): + + super(CGNet, self).__init__() + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len(self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp, + ) + ) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp, + ) + ) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential(build_norm_layer(norm_cfg, cur_channels)[1], nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + elif isinstance(m, nn.PReLU): + constant_init(m, 0) + else: + raise TypeError('pretrained must be a str or None') + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(CGNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 000000000000..532d781d7ffa --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,355 @@ +import torch +import torch.nn as nn +from torch.nn.modules.batchnorm import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + DepthwiseSeparableConvModule, + constant_init, + kaiming_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.decode_heads.psp_head import PPM +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize + +from ..builder import BACKBONES +from ..utils.inverted_residual import InvertedResidual + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + """ + + def __init__( + self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + ): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, dw_channels2, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg + ) + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, out_channels, kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg + ) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__( + self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], num_blocks[0], strides[0], expand_ratio) + self.bottleneck2 = self._make_layer( + block_channels[0], block_channels[1], num_blocks[1], strides[1], expand_ratio + ) + self.bottleneck3 = self._make_layer( + block_channels[1], block_channels[2], num_blocks[2], strides[2], expand_ratio + ) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners, + ) + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def _make_layer(self, in_channels, out_channels, blocks, stride=1, expand_ratio=6): + layers = [InvertedResidual(in_channels, out_channels, stride, expand_ratio, norm_cfg=self.norm_cfg)] + for i in range(1, blocks): + layers.append(InvertedResidual(out_channels, out_channels, 1, expand_ratio, norm_cfg=self.norm_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__( + self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): + super(FeatureFusionModule, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + self.conv_lower_res = ConvModule( + out_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) + self.conv_higher_res = ConvModule( + higher_in_channels, out_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, size=higher_res_feature.size()[2:], mode='bilinear', align_corners=self.align_corners + ) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(nn.Module): + """Fast-SCNN Backbone. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__( + self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + ): + + super(FastSCNN, self).__init__() + if global_in_channels != higher_in_channels: + raise AssertionError( + 'Global Input Channels must be the same \ + with Higher Input Channels!' + ) + elif global_out_channels != lower_in_channels: + raise AssertionError( + 'Global Output Channels must be the same \ + with Lower Input Channels!' + ) + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners, + ) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners, + ) + + def init_weights(self, pretrained=None): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py new file mode 100644 index 000000000000..03dd604869ea --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/hrnet.py @@ -0,0 +1,536 @@ +import torch.nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + build_conv_layer, + build_norm_layer, + constant_init, + kaiming_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Upsample, resize +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES +from .resnet import BasicBlock, Bottleneck + + +class HRModule(nn.Module): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__( + self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + ): + super(HRModule, self).__init__() + self._check_branches(num_branches, num_blocks, in_channels, num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or self.in_channels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * block.expansion)[1], + ) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + ) + ) + self.in_channels[branch_index] = num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + ) + ) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append(self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False, + ), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample(scale_factor=2 ** (j - i), mode='bilinear', align_corners=False), + ) + ) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False, + ), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + ) + ) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False, + ), + build_norm_layer(self.norm_cfg, in_channels[j])[1], + nn.ReLU(inplace=False), + ) + ) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), size=x[i].shape[2:], mode='bilinear', align_corners=False + ) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@BACKBONES.register_module() +class HRNet(nn.Module): + """HRNet backbone. + + High-Resolution Representations for Labeling Pixels and Regions + arXiv: https://arxiv.org/abs/1904.04514 + + Args: + extra (dict): detailed configuration for each stage of HRNet. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): dictionary to construct and config conv layer. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__( + self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + zero_init_residual=False, + ): + super(HRNet, self).__init__() + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.zero_init_residual = zero_init_residual + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer(self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer(self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) + self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False, + ), + build_norm_layer(self.norm_cfg, num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True), + ) + ) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False, + ), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True), + ) + ) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False + ), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1], + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + ) + ) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block(inplanes, planes, with_cp=self.with_cp, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg) + ) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + ) + ) + + return nn.Sequential(*hr_modules), in_channels + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(HRNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 000000000000..cda42da943f5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,183 @@ +import logging + +import torch.nn as nn +from torch.nn.modules.batchnorm import _BatchNorm + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint + +from ..builder import BACKBONES +from ..utils import InvertedResidual, make_divisible + + +@BACKBONES.register_module() +class MobileNetV2(nn.Module): + """MobileNetV2 backbone. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__( + self, + widen_factor=1.0, + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False, + ): + super(MobileNetV2, self).__init__() + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' f'range(0, 8). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio, + ) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp, + ) + ) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV2, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 000000000000..11c665237e1c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,267 @@ +import logging + +import torch.nn as nn +from torch.nn.modules.batchnorm import _BatchNorm + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init, kaiming_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn.bricks import Conv2dAdaptivePadding +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint + +from ..builder import BACKBONES +from ..utils import InvertedResidualV3 as InvertedResidual + + +@BACKBONES.register_module() +class MobileNetV3(nn.Module): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + """ + + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [ + [3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1], + ], + 'large': [ + [3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1], + ], + } # yapf: disable + + def __init__( + self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False, + ): + super(MobileNetV3, self).__init__() + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert mmcv.is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}' + ) + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}' + ) + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish'), + ) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), + ) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp, + ) + in_channels = out_channels + layer_name = 'layer{}'.format(i + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish'), + ) + layer_name = 'layer{}'.format(len(layer_setting) + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV3, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py new file mode 100644 index 000000000000..83915384db3a --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnest.py @@ -0,0 +1,299 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__( + self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + ): + super(SplitAttentionConv2d, self).__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False, + ) + self.norm0_name, norm0 = build_norm_layer(norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer(None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer(norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer(None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + + expansion = 4 + + def __init__( + self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs + ): + """Bottleneck block for ResNeSt.""" + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False + ) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn, + ) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)), + } + + def __init__(self, groups=1, base_width=4, radix=2, reduction_factor=4, avg_down_stride=True, **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super(ResNeSt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py new file mode 100644 index 000000000000..8b418aad171d --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnet.py @@ -0,0 +1,650 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + build_conv_layer, + build_norm_layer, + build_plugin_layer, + constant_init, + kaiming_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES +from ..utils import ResLayer + + +class BasicBlock(nn.Module): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + ): + super(BasicBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, inplanes, planes, 3, stride=stride, padding=dilation, dilation=dilation, bias=False + ) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer(conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + ): + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv1'] + self.after_conv2_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv2'] + self.after_conv3_plugins = [plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv3'] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False, + ) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False, + ) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins(planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins(planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins(planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer(plugin, in_channels=in_channels, postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default" 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + norm_cfg (dict): Dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages' + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + } + + def __init__( + self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True, + ): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.zero_init_residual = zero_init_residual + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len(self.stage_blocks) - 1 else None + planes = base_channels * 2 ** i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation, + ) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2 ** (len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, in_channels, stem_channels // 2, kernel_size=3, stride=2, padding=1, bias=False + ), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False, + ), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, stem_channels // 2, stem_channels, kernel_size=3, stride=1, padding=1, bias=False + ), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True), + ) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, in_channels, stem_channels, kernel_size=7, stride=2, padding=3, bias=False + ) + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.dcn is not None: + for m in self.modules(): + if isinstance(m, Bottleneck) and hasattr(m, 'conv2_offset'): + constant_init(m.conv2_offset, 0) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@BACKBONES.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv + in the input stem with three 3x3 convs. + + References: + .. [1] https://arxiv.org/pdf/1812.01187.pdf + """ + + def __init__(self, **kwargs): + super(ResNetV1c, self).__init__(deep_stem=True, avg_down=False, **kwargs) + + +@BACKBONES.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1d, self).__init__(deep_stem=True, avg_down=True, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py new file mode 100644 index 000000000000..5ee1d26d15a6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/resnext.py @@ -0,0 +1,124 @@ +import math + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False + ) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False, + ) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False, + ) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer(groups=self.groups, base_width=self.base_width, base_channels=self.base_channels, **kwargs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py new file mode 100644 index 000000000000..e3a5a76e39f3 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/unet.py @@ -0,0 +1,451 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + UPSAMPLE_LAYERS, + ConvModule, + build_activation_layer, + build_norm_layer, + constant_init, + kaiming_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES +from ..utils import UpConvBlock + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__( + self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None, + ): + super(BasicConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__( + self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2, + ): + super(DeconvModule, self).__init__() + + assert (kernel_size - scale_factor >= 0) and (kernel_size - scale_factor) % 2 == 0, ( + f'kernel_size should be greater than or equal to scale_factor ' + f'and (kernel_size - scale_factor) should be even numbers, ' + f'while the kernel size is {kernel_size} and scale_factor is ' + f'{scale_factor}.' + ) + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__( + self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False), + ): + super(InterpConv, self).__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + upsample = nn.Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@BACKBONES.register_module() +class UNet(nn.Module): + """UNet backbone. + U-Net: Convolutional Networks for Biomedical Image Segmentation. + https://arxiv.org/pdf/1505.04597.pdf + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + + """ + + def __init__( + self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None, + ): + super(UNet, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, ( + 'The length of strides should be equal to num_stages, ' + f'while the strides is {strides}, the length of ' + f'strides is {len(strides)}, and the num_stages is ' + f'{num_stages}.' + ) + assert len(enc_num_convs) == num_stages, ( + 'The length of enc_num_convs should be equal to num_stages, ' + f'while the enc_num_convs is {enc_num_convs}, the length of ' + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is ' + f'{num_stages}.' + ) + assert len(dec_num_convs) == (num_stages - 1), ( + 'The length of dec_num_convs should be equal to (num_stages-1), ' + f'while the dec_num_convs is {dec_num_convs}, the length of ' + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is ' + f'{num_stages}.' + ) + assert len(downsamples) == (num_stages - 1), ( + 'The length of downsamples should be equal to (num_stages-1), ' + f'while the downsamples is {downsamples}, the length of ' + f'downsamples is {len(downsamples)}, and the num_stages is ' + f'{num_stages}.' + ) + assert len(enc_dilations) == num_stages, ( + 'The length of enc_dilations should be equal to num_stages, ' + f'while the enc_dilations is {enc_dilations}, the length of ' + f'enc_dilations is {len(enc_dilations)}, and the num_stages is ' + f'{num_stages}.' + ) + assert len(dec_dilations) == (num_stages - 1), ( + 'The length of dec_dilations should be equal to (num_stages-1), ' + f'while the dec_dilations is {dec_dilations}, the length of ' + f'dec_dilations is {len(dec_dilations)}, and the num_stages is ' + f'{num_stages}.' + ) + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = strides[i] != 1 or downsamples[i - 1] + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2 ** i, + skip_channels=base_channels * 2 ** (i - 1), + out_channels=base_channels * 2 ** (i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None, + ) + ) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2 ** i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None, + ) + ) + self.encoder.append((nn.Sequential(*enc_conv_block))) + in_channels = base_channels * 2 ** i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(UNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) and (w % whole_downsample_rate == 0), ( + f'The input image size {(h, w)} should be divisible by the whole ' + f'downsample rate {whole_downsample_rate}, when num_stages is ' + f'{self.num_stages}, strides is {self.strides}, and downsamples ' + f'is {self.downsamples}.' + ) + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py new file mode 100644 index 000000000000..7dcf93aa357b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/uniformer.py @@ -0,0 +1,554 @@ +# -------------------------------------------------------- +# UniFormer +# Copyright (c) 2022 SenseTime X-Lab +# Licensed under The MIT License [see LICENSE for details] +# Written by Kunchang Li +# -------------------------------------------------------- + +import math +from collections import OrderedDict +from functools import partial + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv_custom import load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CMlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.act = act_layer() + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CBlock(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = nn.BatchNorm2d(dim) + self.conv1 = nn.Conv2d(dim, dim, 1) + self.conv2 = nn.Conv2d(dim, dim, 1) + self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = nn.BatchNorm2d(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x))))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SABlock(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop + ) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + B, N, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.transpose(1, 2).reshape(B, N, H, W) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SABlock_Windows(nn.Module): + def __init__( + self, + dim, + num_heads, + window_size=14, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.window_size = window_size + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop + ) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x.permute(0, 2, 3, 1) + B, H, W, C = x.shape + shortcut = x + x = self.norm1(x) + + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) + return x + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + self.norm = nn.LayerNorm(embed_dim) + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + B, _, H, W = x.shape + x = self.proj(x) + B, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + return x + + +@BACKBONES.register_module() +class UniFormer(nn.Module): + """ Vision Transformer + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - + https://arxiv.org/abs/2010.11929 + """ + + def __init__( + self, + layers=[3, 4, 8, 3], + img_size=224, + in_chans=3, + num_classes=80, + embed_dim=[64, 128, 320, 512], + head_dim=64, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + representation_size=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + pretrained_path=None, + use_checkpoint=False, + checkpoint_num=[0, 0, 0, 0], + windows=False, + hybrid=False, + window_size=14, + ): + """ + Args: + layer (list): number of block in each layer + img_size (int, tuple): input image size + in_chans (int): number of input channels + num_classes (int): number of classes for classification head + embed_dim (int): embedding dimension + head_dim (int): dimension of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + qk_scale (float): override default qk scale of head_dim ** -0.5 if set + representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set + drop_rate (float): dropout rate + attn_drop_rate (float): attention dropout rate + drop_path_rate (float): stochastic depth rate + norm_layer (nn.Module): normalization layer + pretrained_path (str): path of pretrained model + use_checkpoint (bool): whether use checkpoint + checkpoint_num (list): index for using checkpoint in every stage + windows (bool): whether use window MHRA + hybrid (bool): whether use hybrid MHRA + window_size (int): size of window (>14) + """ + super().__init__() + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.checkpoint_num = checkpoint_num + self.windows = windows + print(f'Use Checkpoint: {self.use_checkpoint}') + print(f'Checkpoint Number: {self.checkpoint_num}') + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) + + self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) + self.patch_embed2 = PatchEmbed( + img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1] + ) + self.patch_embed3 = PatchEmbed( + img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2] + ) + self.patch_embed4 = PatchEmbed( + img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3] + ) + + self.pos_drop = nn.Dropout(p=drop_rate) + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule + num_heads = [dim // head_dim for dim in embed_dim] + self.blocks1 = nn.ModuleList( + [ + CBlock( + dim=embed_dim[0], + num_heads=num_heads[0], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer, + ) + for i in range(layers[0]) + ] + ) + self.norm1 = norm_layer(embed_dim[0]) + self.blocks2 = nn.ModuleList( + [ + CBlock( + dim=embed_dim[1], + num_heads=num_heads[1], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0]], + norm_layer=norm_layer, + ) + for i in range(layers[1]) + ] + ) + self.norm2 = norm_layer(embed_dim[1]) + if self.windows: + print('Use local window for all blocks in stage3') + self.blocks3 = nn.ModuleList( + [ + SABlock_Windows( + dim=embed_dim[2], + num_heads=num_heads[2], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + for i in range(layers[2]) + ] + ) + elif hybrid: + print('Use hybrid window for blocks in stage3') + block3 = [] + for i in range(layers[2]): + if (i + 1) % 4 == 0: + block3.append( + SABlock( + dim=embed_dim[2], + num_heads=num_heads[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + ) + else: + block3.append( + SABlock_Windows( + dim=embed_dim[2], + num_heads=num_heads[2], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + ) + self.blocks3 = nn.ModuleList(block3) + else: + print('Use global window for all blocks in stage3') + self.blocks3 = nn.ModuleList( + [ + SABlock( + dim=embed_dim[2], + num_heads=num_heads[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1]], + norm_layer=norm_layer, + ) + for i in range(layers[2]) + ] + ) + self.norm3 = norm_layer(embed_dim[2]) + self.blocks4 = nn.ModuleList( + [ + SABlock( + dim=embed_dim[3], + num_heads=num_heads[3], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i + layers[0] + layers[1] + layers[2]], + norm_layer=norm_layer, + ) + for i in range(layers[3]) + ] + ) + self.norm4 = norm_layer(embed_dim[3]) + + # Representation layer + if representation_size: + self.num_features = representation_size + self.pre_logits = nn.Sequential( + OrderedDict([('fc', nn.Linear(embed_dim, representation_size)), ('act', nn.Tanh())]) + ) + else: + self.pre_logits = nn.Identity() + + self.apply(self._init_weights) + self.init_weights(pretrained=pretrained_path) + + def init_weights(self, pretrained): + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + print(f'Load pretrained model from {pretrained}') + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token'} + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + out = [] + x = self.patch_embed1(x) + x = self.pos_drop(x) + for i, blk in enumerate(self.blocks1): + if self.use_checkpoint and i < self.checkpoint_num[0]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm1(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed2(x) + for i, blk in enumerate(self.blocks2): + if self.use_checkpoint and i < self.checkpoint_num[1]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm2(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed3(x) + for i, blk in enumerate(self.blocks3): + if self.use_checkpoint and i < self.checkpoint_num[2]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm3(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed4(x) + for i, blk in enumerate(self.blocks4): + if self.use_checkpoint and i < self.checkpoint_num[3]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm4(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + return tuple(out) + + def forward(self, x): + x = self.forward_features(x) + return x diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py new file mode 100644 index 000000000000..883d56fd5bc9 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/backbones/vit.py @@ -0,0 +1,443 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/vision_transformer.py.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + Conv2d, + Linear, + build_activation_layer, + build_norm_layer, + constant_init, + kaiming_init, + normal_init, +) +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import _load_checkpoint +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils.parrots_wrapper import _BatchNorm +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.utils import get_root_logger + +from ..builder import BACKBONES +from ..utils import DropPath, trunc_normal_ + + +class Mlp(nn.Module): + """MLP layer for Encoder block. + + Args: + in_features(int): Input dimension for the first fully + connected layer. + hidden_features(int): Output dimension for the first fully + connected layer. + out_features(int): Output dementsion for the second fully + connected layer. + act_cfg(dict): Config dict for activation layer. + Default: dict(type='GELU'). + drop(float): Drop rate for the dropout layer. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, in_features, hidden_features=None, out_features=None, act_cfg=dict(type='GELU'), drop=0.0): + super(Mlp, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = Linear(in_features, hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + """Attention layer for Encoder block. + + Args: + dim (int): Dimension for the input vector. + num_heads (int): Number of parallel attention heads. + qkv_bias (bool): Enable bias for qkv if True. Default: False. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for output weights. Default: 0. + """ + + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.0, proj_drop=0.0): + super(Attention, self).__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + b, n, c = x.shape + qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, c // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + """Implements encoder block with residual connection. + + Args: + dim (int): The feature dimension. + num_heads (int): Number of parallel attention heads. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop (float): Drop rate for mlp output weights. Default: 0. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for attn layer output weights. + Default: 0. + drop_path (float): Drop rate for paths of model. + Default: 0. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', requires_grad=True). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__( + self, + dim, + num_heads, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + with_cp=False, + ): + super(Block, self).__init__() + self.with_cp = with_cp + _, self.norm1 = build_norm_layer(norm_cfg, dim) + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, proj_drop) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + _, self.norm2 = build_norm_layer(norm_cfg, dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_cfg=act_cfg, drop=drop) + + def forward(self, x): + def _inner_forward(x): + out = x + self.drop_path(self.attn(self.norm1(x))) + out = out + self.drop_path(self.mlp(self.norm2(out))) + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding. + + Args: + img_size (int | tuple): Input image size. + default: 224. + patch_size (int): Width and height for a patch. + default: 16. + in_channels (int): Input channels for images. Default: 3. + embed_dim (int): The embedding dimension. Default: 768. + """ + + def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768): + super(PatchEmbed, self).__init__() + if isinstance(img_size, int): + self.img_size = (img_size, img_size) + elif isinstance(img_size, tuple): + self.img_size = img_size + else: + raise TypeError('img_size must be type of int or tuple') + h, w = self.img_size + self.patch_size = (patch_size, patch_size) + self.num_patches = (h // patch_size) * (w // patch_size) + self.proj = Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + return self.proj(x).flatten(2).transpose(1, 2) + + +@BACKBONES.register_module() +class VisionTransformer(nn.Module): + """Vision transformer backbone. + + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for + Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 + + Args: + img_size (tuple): input image size. Default: (224, 224). + patch_size (int, tuple): patch size. Default: 16. + in_channels (int): number of input channels. Default: 3. + embed_dim (int): embedding dimension. Default: 768. + depth (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + qk_scale (float): override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): dropout rate. Default: 0. + attn_drop_rate (float): attention dropout rate. Default: 0. + drop_path_rate (float): Rate of DropPath. Default: 0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', eps=1e-6, requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + with_cls_token (bool): If concatenating class token into image tokens + as transformer input. Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + """ + + def __init__( + self, + img_size=(224, 224), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + out_indices=11, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), + act_cfg=dict(type='GELU'), + norm_eval=False, + final_norm=False, + with_cls_token=True, + interpolate_mode='bicubic', + with_cp=False, + ): + super(VisionTransformer, self).__init__() + self.img_size = img_size + self.patch_size = patch_size + self.features = self.embed_dim = embed_dim + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim + ) + + self.with_cls_token = with_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) + self.pos_drop = nn.Dropout(p=drop_rate) + + if isinstance(out_indices, int): + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule + self.blocks = nn.ModuleList( + [ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=dpr[i], + attn_drop=attn_drop_rate, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + ) + for i in range(depth) + ] + ) + + self.interpolate_mode = interpolate_mode + self.final_norm = final_norm + if final_norm: + _, self.norm = build_norm_layer(norm_cfg, embed_dim) + + self.norm_eval = norm_eval + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = get_root_logger() + checkpoint = _load_checkpoint(pretrained, logger=logger) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + logger.info( + msg=f'Resize the pos_embed shape from \ +{state_dict["pos_embed"].shape} to {self.pos_embed.shape}' + ) + h, w = self.img_size + pos_size = int(math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], (h, w), (pos_size, pos_size), self.patch_size, self.interpolate_mode + ) + + self.load_state_dict(state_dict, False) + + elif pretrained is None: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=0.02) + trunc_normal_(self.cls_token, std=0.02) + for n, m in self.named_modules(): + if isinstance(m, Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + if 'mlp' in n: + normal_init(m.bias, std=1e-6) + else: + constant_init(m.bias, 0) + elif isinstance(m, Conv2d): + kaiming_init(m.weight, mode='fan_in') + if m.bias is not None: + constant_init(m.bias, 0) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m.bias, 0) + constant_init(m.weight, 1.0) + else: + raise TypeError('pretrained must be a str or None') + + def _pos_embeding(self, img, patched_img, pos_embed): + """Positiong embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + img (torch.Tensor): The inference image tensor, the shape + must be [B, C, H, W]. + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert ( + patched_img.ndim == 3 and pos_embed.ndim == 3 + ), 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * (self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError('Unexpected shape of pos_embed, got {}.'.format(pos_embed.shape)) + pos_embed = self.resize_pos_embed( + pos_embed, img.shape[2:], (pos_h, pos_w), self.patch_size, self.interpolate_mode + ) + return self.pos_drop(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): pos_embed weights. + input_shpae (tuple): Tuple for (input_h, intput_w). + pos_shape (tuple): Tuple for (pos_h, pos_w). + patch_size (int): Patch size. + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + input_h, input_w = input_shpae + pos_h, pos_w = pos_shape + cls_token_weight = pos_embed[:, 0] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w) :] + pos_embed_weight = pos_embed_weight.reshape(1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = F.interpolate( + pos_embed_weight, size=[input_h // patch_size, input_w // patch_size], align_corners=False, mode=mode + ) + cls_token_weight = cls_token_weight.unsqueeze(1) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x = self.patch_embed(inputs) + + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(inputs, x, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer input + x = x[:, 1:] + + outs = [] + for i, blk in enumerate(self.blocks): + x = blk(x) + if i == len(self.blocks) - 1: + if self.final_norm: + x = self.norm(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape( + B, inputs.shape[2] // self.patch_size, inputs.shape[3] // self.patch_size, C + ).permute(0, 3, 1, 2) + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(VisionTransformer, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py new file mode 100644 index 000000000000..4cc391e48a34 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/builder.py @@ -0,0 +1,43 @@ +import warnings + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import MODELS as MMCV_MODELS +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import Registry + +MODELS = Registry('models', parent=MMCV_MODELS) + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn('train_cfg and test_cfg is deprecated, ' 'please specify them in model', UserWarning) + assert ( + cfg.get('train_cfg') is None or train_cfg is None + ), 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py new file mode 100644 index 000000000000..1c4ab285953c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,45 @@ +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .lraspp_head import LRASPPHead +from .nl_head import NLHead +from .ocr_head import OCRHead + +# from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .uper_head import UPerHead + +__all__ = [ + 'FCNHead', + 'PSPHead', + 'ASPPHead', + 'PSAHead', + 'NLHead', + 'GCHead', + 'CCHead', + 'UPerHead', + 'DepthwiseSeparableASPPHead', + 'ANNHead', + 'DAHead', + 'OCRHead', + 'EncHead', + 'DepthwiseSeparableFCNHead', + 'FPNHead', + 'EMAHead', + 'DNLHead', + 'APCHead', + 'DMHead', + 'LRASPPHead', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 000000000000..363c155b214b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,259 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super(PPMConcat, self).__init__([nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__( + self, + low_in_channels, + high_in_channels, + channels, + out_channels, + share_key_query, + query_scale, + key_pool_scales, + conv_cfg, + norm_cfg, + act_cfg, + ): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super(SelfAttentionBlock, self).__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__( + self, + low_in_channels, + high_in_channels, + channels, + out_channels, + query_scales, + key_pool_scales, + conv_cfg, + norm_cfg, + act_cfg, + ): + super(AFNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) + self.bottleneck = ConvModule( + out_channels + high_in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None + ) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__( + self, in_channels, channels, out_channels, query_scales, key_pool_scales, conv_cfg, norm_cfg, act_cfg + ): + super(APNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) + self.bottleneck = ConvModule( + 2 * in_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@HEADS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, project_channels, query_scales=(1,), key_pool_scales=(1, 3, 6, 8), **kwargs): + super(ANNHead, self).__init__(input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 000000000000..04721c1d46f6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,141 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): + super(ACM, self).__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + self.input_redu_conv = ConvModule( + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + self.global_info = ConvModule( + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + self.gla = nn.Conv2d(self.channels, self.pool_scale ** 2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = ( + self.gla(x + resize(self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:])) + .permute(0, 2, 3, 1) + .reshape(batch_size, -1, self.pool_scale ** 2) + ) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@HEADS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super(APCHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM( + pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 000000000000..8d121ca61222 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,106 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, act_cfg): + super(ASPPModule, self).__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@HEADS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super(ASPPHead, self).__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ), + ) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 000000000000..40f498d3679c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,56 @@ +from abc import ABCMeta, abstractmethod + +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.losses(seg_logits, gt_semantic_seg) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs, prev_output) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 000000000000..98e0340501d5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,41 @@ +import torch + +from ..builder import HEADS +from .fcn_head import FCNHead + +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@HEADS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' 'CrissCrossAttention ops') + super(CCHead, self).__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py new file mode 100644 index 000000000000..d63ed0e84dd5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,174 @@ +import torch +import torch.nn.functional as F +from torch import nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, Scale +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + ) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.pam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.cam_conv_seg = nn.Conv2d(self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update(add_prefix(super(DAHead, self).losses(pam_cam_seg_logit, seg_label), 'pam_cam')) + loss.update(add_prefix(super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update(add_prefix(super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 000000000000..e1aa23944d86 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,226 @@ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import normal_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16, force_fp32 + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import build_pixel_sampler +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import build_loss +from ..losses import accuracy + + +class BaseDecodeHead(nn.Module, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255 + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__( + self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False, + ): + super(BaseDecodeHead, self).__init__() + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.num_classes = num_classes + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + self.loss_decode = build_loss(loss_decode) + self.ignore_index = ignore_index + self.align_corners = align_corners + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + self.fp16_enabled = False + + def extra_repr(self): + """Extra repr.""" + s = ( + f'input_transform={self.input_transform}, ' + f'ignore_index={self.ignore_index}, ' + f'align_corners={self.align_corners}' + ) + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.conv_seg, mean=0, std=0.01) + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize(input=x, size=inputs[0].shape[2:], mode='bilinear', align_corners=self.align_corners) + for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @auto_fp16() + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.losses(seg_logits, gt_semantic_seg) + return losses + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs) + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + @force_fp32(apply_to=('seg_logit',)) + def losses(self, seg_logit, seg_label): + """Compute segmentation loss.""" + loss = dict() + seg_logit = resize( + input=seg_logit, size=seg_label.shape[2:], mode='bilinear', align_corners=self.align_corners + ) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logit, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + loss['loss_seg'] = self.loss_decode(seg_logit, seg_label, weight=seg_weight, ignore_index=self.ignore_index) + loss['acc_seg'] = accuracy(seg_logit, seg_label) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 000000000000..a5a58165b326 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,137 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ( + ConvModule, + build_activation_layer, + build_norm_layer, +) + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, norm_cfg, act_cfg): + super(DCM, self).__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, 0) + + self.input_redu_conv = ConvModule( + self.in_channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, self.channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv(F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@HEADS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super(DMHead, self).__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM( + filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 000000000000..2ecd75787808 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,126 @@ +import torch +from torch import nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1] ** 0.5 + pairwise_weight /= self.temperature + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@HEADS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', temperature=0.05, **kwargs): + super(DNLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 000000000000..f66406d38ff8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,155 @@ +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super(EMAModule, self).__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2.0 / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@HEADS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, ema_channels, num_bases, num_stages, concat_input=True, momentum=0.1, **kwargs): + super(EMAHead, self).__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=None, act_cfg=None + ) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, self.ema_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None + ) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 000000000000..8e94db9cebb4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,174 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_norm_layer + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import Encoding, resize +from ..builder import HEADS, build_loss +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super(EncModule, self).__init__() + self.encoding_project = ConvModule( + in_channels, in_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg + ) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace('2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True), + ) + self.fc = nn.Sequential(nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@HEADS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__( + self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2), + **kwargs + ): + super(EncHead, self).__init__(input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.enc_module = EncModule( + self.channels, num_codes=num_codes, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize(lateral_conv(inputs[i]), size=feat.shape[2:], mode='bilinear', align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + return self.forward(inputs)[0] + else: + return self.forward(inputs) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc(bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def losses(self, seg_logit, seg_label): + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super(EncHead, self).losses(seg_logit, seg_label)) + se_loss = self.loss_se_decode(se_seg_logit, self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 000000000000..7e1a34a2a416 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, num_convs=2, kernel_size=3, concat_input=True, dilation=1, **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super(FCNHead, self).__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs(x) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 000000000000..28637489e7a7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,61 @@ +import numpy as np +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super(FPNHead, self).__init__(input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max(1, int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + ) + if feature_strides[i] != feature_strides[0]: + scale_head.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), size=output.shape[2:], mode='bilinear', align_corners=self.align_corners + ) + + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 000000000000..8898bdffe5c6 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,41 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ContextBlock + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, ratio=1 / 4.0, pooling_type='att', fusion_types=('channel_add',), **kwargs): + super(GCHead, self).__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, ratio=self.ratio, pooling_type=self.pooling_type, fusion_types=self.fusion_types + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 000000000000..75e2fa4b1e19 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,77 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv import is_tuple_of +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super(LRASPPHead, self).__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError( + 'in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'' + ) + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module(f'conv{i}', nn.Conv2d(self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False, + ), + ) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], self.channels, 1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, bias=False + ) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule(self.in_channels[2], self.channels, 1, act_cfg=dict(type='Sigmoid'), bias=False), + ) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners + ) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize(x, size=inputs[i].size()[2:], mode='bilinear', align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 000000000000..0f3def19ccc5 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,46 @@ +import torch +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, reduction=2, use_scale=True, mode='embedded_gaussian', **kwargs): + super(NLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 000000000000..c5d9c3bfa89b --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,124 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super(SpatialGatherModule, self).__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super(ObjectAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + self.bottleneck = ConvModule( + in_channels * 2, in_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super(ObjectAttentionBlock, self).forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@HEADS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super(OCRHead, self).__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py new file mode 100644 index 000000000000..7e4c0ef8e10f --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,312 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, normal_init +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import point_sample + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.models.builder import HEADS +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__( + self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs + ): + super(PointHead, self).__init__( + input_transform='multiple_select', conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, **kwargs + ) + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer else 0 + self.fc_seg = nn.Conv1d(fc_in_channels, self.num_classes, kernel_size=1, stride=1, padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.fc_seg, std=0.001) + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [point_sample(_, points, align_corners=self.align_corners) for _ in x] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample(prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train(prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) + point_label = point_sample(gt_semantic_seg.float(), points, mode='nearest', align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners, + ) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test(refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats(x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape(batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_(2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view(batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + loss['loss_point'] = self.loss_decode(point_logits, point_label, ignore_index=self.ignore_index) + loss['acc_point'] = accuracy(point_logits, point_label) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand(batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange(batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand(batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros(batch_size, num_points, 2, dtype=torch.float, device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // width).float() * h_step + return point_indices, point_coords diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 000000000000..3ef4088a23e4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + +try: + from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@HEADS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__( + self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs + ): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super(PSAHead, self).__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ), + nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), + ) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ), + nn.Conv2d(self.channels, mask_h * mask_w, kernel_size=1, bias=False), + ) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize(out, size=(h, w), mode='bilinear', align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm(out.view(n, c, h * w), y.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize(x_col, size=(h, w), mode='bilinear', align_corners=align_corners) + x_dis = resize(x_dis, size=(h, w), mode='bilinear', align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm(x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) + x_dis = torch.bmm(x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view(n, c, h, w) * ( + 1.0 / self.normalization_factor + ) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize(out, size=identity.shape[2:], mode='bilinear', align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 000000000000..ad87e1514885 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, act_cfg, align_corners): + super(PPM, self).__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ), + ) + ) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize(ppm_out, size=x.size()[2:], mode='bilinear', align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners, + ) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 000000000000..6a6db6e93945 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,82 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + +@HEADS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, c1_channels, 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ), + DepthwiseSeparableConvModule( + self.channels, self.channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg + ), + ) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [resize(self.image_pool(x), size=x.size()[2:], mode='bilinear', align_corners=self.align_corners)] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize(input=output, size=c1_output.shape[2:], mode='bilinear', align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 000000000000..1df80ae7b4ef --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,54 @@ +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import DepthwiseSeparableConvModule + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to Fast-SCNN paper. + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + """ + + def __init__(self, **kwargs): + super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + ) + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + ) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + ) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 000000000000..35148c150fa7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@HEADS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(UPerHead, self).__init__(input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners, + ) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False, + ) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False, + ) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + ) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def forward(self, inputs): + """Forward function.""" + + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += resize(laterals[i], size=prev_shape, mode='bilinear', align_corners=self.align_corners) + + # build outputs + fpn_outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels - 1)] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], size=fpn_outs[0].shape[2:], mode='bilinear', align_corners=self.align_corners + ) + fpn_outs = torch.cat(fpn_outs, dim=1) + output = self.fpn_bottleneck(fpn_outs) + output = self.cls_seg(output) + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py new file mode 100644 index 000000000000..aaf307b3eaa1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/__init__.py @@ -0,0 +1,19 @@ +from .accuracy import Accuracy, accuracy +from .cross_entropy_loss import CrossEntropyLoss, binary_cross_entropy, cross_entropy, mask_cross_entropy +from .dice_loss import DiceLoss +from .lovasz_loss import LovaszLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', + 'Accuracy', + 'cross_entropy', + 'binary_cross_entropy', + 'mask_cross_entropy', + 'CrossEntropyLoss', + 'reduce_loss', + 'weight_reduce_loss', + 'weighted_loss', + 'LovaszLoss', + 'DiceLoss', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py new file mode 100644 index 000000000000..85b13399ee70 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/accuracy.py @@ -0,0 +1,77 @@ +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk,) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.0) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / target.numel())) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1,), thresh=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 000000000000..766812eb5221 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,155 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, label, weight=None, class_weight=None, reduction='mean', avg_factor=None, ignore_index=-100): + """The wrapper function for :func:`F.cross_entropy`""" + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none', ignore_index=ignore_index) + + # apply weights and do the reduction + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss(loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights *= valid_mask + + return bin_labels, bin_label_weights + + +def binary_cross_entropy( + pred, label, weight=None, reduction='mean', avg_factor=None, class_weight=None, ignore_index=255 +): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int | None): The label index to be ignored. Default: 255 + + Returns: + torch.Tensor: The calculated loss + """ + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or (pred.dim() == 4 and label.dim() == 3), ( + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' + 'H, W], label shape [N, H, W] are supported' + ) + label, weight = _expand_onehot_labels(label, weight, pred.shape, ignore_index) + + # weighted element-wise losses + if weight is not None: + weight = weight.float() + loss = F.binary_cross_entropy_with_logits(pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss(loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None, class_weight=None, ignore_index=None): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits(pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, use_sigmoid=False, use_mask=False, reduction='mean', class_weight=None, loss_weight=1.0): + super(CrossEntropyLoss, self).__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + + def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = reduction_override if reduction_override else self.reduction + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, label, weight, class_weight=class_weight, reduction=reduction, avg_factor=avg_factor, **kwargs + ) + return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py new file mode 100644 index 000000000000..9384e60bd048 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/dice_loss.py @@ -0,0 +1,98 @@ +"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ +segmentron/solver/loss.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def dice_loss(pred, target, valid_mask, smooth=1, exponent=2, class_weight=None, ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + dice_loss = binary_dice_loss( + pred[:, i], target[..., i], valid_mask=valid_mask, smooth=smooth, exponent=exponent + ) + if class_weight is not None: + dice_loss *= class_weight[i] + total_loss += dice_loss + return total_loss / num_classes + + +@weighted_loss +def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwards): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth + den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth + + return 1 - num / den + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + """DiceLoss. + + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1 + exponent (float): An float number to calculate denominator + value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + """ + + def __init__( + self, smooth=1, exponent=2, reduction='mean', class_weight=None, loss_weight=1.0, ignore_index=255, **kwards + ): + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.reduction = reduction + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, avg_factor=None, reduction_override=None, **kwards): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = reduction_override if reduction_override else self.reduction + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot(torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor, + smooth=self.smooth, + exponent=self.exponent, + class_weight=class_weight, + ignore_index=self.ignore_index, + ) + return loss diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 000000000000..e1c049874490 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,304 @@ +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = labels != ignore_index + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = labels != ignore_index + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * signs + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge( + logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255, +): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits(logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat(*flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0.0 + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if classes == 'present' and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax( + probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255, +): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs(prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight + ) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss(torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), classes=classes, class_weight=class_weight + ) + return loss + + +@LOSSES.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__( + self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ): + super(LovaszLoss, self).__init__() + assert loss_type in ( + 'binary', + 'multi_class', + ), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) + if not per_image: + assert ( + reduction == 'none' + ), "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None, **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = reduction_override if reduction_override else self.reduction + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs + ) + return loss_cls diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py new file mode 100644 index 000000000000..e1719c276160 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/losses/utils.py @@ -0,0 +1,117 @@ +import functools + +import numpy as np +import torch.nn.functional as F + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = mmcv.load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, target, weight=None, reduction='mean', avg_factor=None, **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py new file mode 100644 index 000000000000..9b9d3d5b3fe8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/__init__.py @@ -0,0 +1,4 @@ +from .fpn import FPN +from .multilevel_neck import MultiLevelNeck + +__all__ = ['FPN', 'MultiLevelNeck'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py new file mode 100644 index 000000000000..fd5ca4c087b4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/fpn.py @@ -0,0 +1,210 @@ +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, xavier_init + +from ..builder import NECKS + + +@NECKS.register_module() +class FPN(nn.Module): + """Feature Pyramid Network. + + This is an implementation of - Feature Pyramid Networks for Object + Detection (https://arxiv.org/abs/1612.03144) + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale) + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (str): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: `dict(mode='nearest')` + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__( + self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest'), + ): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False, + ) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False, + ) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False, + ) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] += F.interpolate(laterals[i], **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += F.interpolate(laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 000000000000..395bcd5ed655 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,53 @@ +import torch.nn as nn +import torch.nn.functional as F +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from ..builder import NECKS + + +@NECKS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[int]): Scale factors for each input feature map. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, in_channels, out_channels, scales=[0.5, 1, 2, 4], norm_cfg=None, act_cfg=None): + super(MultiLevelNeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule(in_channel, out_channels, kernel_size=1, norm_cfg=norm_cfg, act_cfg=act_cfg) + ) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, out_channels, kernel_size=3, padding=1, stride=1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) + ) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + print(inputs[0].shape) + inputs = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = F.interpolate(inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py new file mode 100644 index 000000000000..dca2f0940533 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/__init__.py @@ -0,0 +1,5 @@ +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .encoder_decoder import EncoderDecoder + +__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py new file mode 100644 index 000000000000..bb1eb40a38d7 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/base.py @@ -0,0 +1,256 @@ +import logging +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.runner import auto_fp16 + + +class BaseSegmentor(nn.Module): + """Base class for segmentors.""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseSegmentor, self).__init__() + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self): + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self): + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, imgs): + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, img, img_metas): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + """Placeholder for Forward function for training.""" + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + """Placeholder for single image test.""" + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + """Placeholder for augmentation test.""" + pass + + def init_weights(self, pretrained=None): + """Initialize the weights in segmentor. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if pretrained is not None: + logger = logging.getLogger() + logger.info(f'load model from: {pretrained}') + + def forward_test(self, imgs, img_metas, **kwargs): + """ + Args: + imgs (List[Tensor]): the outer list indicates test-time + augmentations and inner Tensor should have a shape NxCxHxW, + which contains all images in the batch. + img_metas (List[List[dict]]): the outer list indicates test-time + augs (multiscale, flip, etc.) and the inner list indicates + images in a batch. + """ + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError(f'{name} must be a list, but got ' f'{type(var)}') + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError(f'num of augmentations ({len(imgs)}) != ' f'num of image meta ({len(img_metas)})') + # all images in the same aug batch all of the same ori_shape and pad + # shape + for img_meta in img_metas: + ori_shapes = [_['ori_shape'] for _ in img_meta] + assert all(shape == ori_shapes[0] for shape in ori_shapes) + img_shapes = [_['img_shape'] for _ in img_meta] + assert all(shape == img_shapes[0] for shape in img_shapes) + pad_shapes = [_['pad_shape'] for _ in img_meta] + assert all(shape == pad_shapes[0] for shape in pad_shapes) + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + @auto_fp16(apply_to=('img',)) + def forward(self, img, img_metas, return_loss=True, **kwargs): + """Calls either :func:`forward_train` or :func:`forward_test` depending + on whether ``return_loss`` is ``True``. + + Note this setting will change the expected inputs. When + ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor + and List[dict]), and when ``resturn_loss=False``, img and img_meta + should be double nested (i.e. List[Tensor], List[List[dict]]), with + the outer list indicating test time augmentations. + """ + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: + return self.forward_test(img, img_metas, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data_batch['img_metas'])) + + return outputs + + def val_step(self, data_batch, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + output = self(**data_batch, **kwargs) + return output + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError(f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def show_result(self, img, result, palette=None, win_name='', show=False, wait_time=0, out_file=None, opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The semantic segmentation results to draw over + `img`. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + if palette is None: + if self.PALETTE is None: + palette = np.random.randint(0, 255, size=(len(self.CLASSES), 3)) + else: + palette = self.PALETTE + palette = np.array(palette) + assert palette.shape[0] == len(self.CLASSES) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' 'result image will be returned') + return img diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 000000000000..2f53cfb9e41c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,95 @@ +from torch import nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .encoder_decoder import EncoderDecoder + + +@SEGMENTORS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + """ + + def __init__( + self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None, + ): + self.num_stages = num_stages + super(CascadeEncoderDecoder, self).__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained, + ) + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(builder.build_head(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + self.backbone.init_weights(pretrained=pretrained) + for i in range(self.num_stages): + self.decode_head[i].init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + for i in range(1, self.num_stages): + out = self.decode_head[i].forward_test(x, out, img_metas, self.test_cfg) + out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + prev_outputs = self.decode_head[i - 1].forward_test(x, img_metas, self.test_cfg) + loss_decode = self.decode_head[i].forward_train( + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg + ) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 000000000000..198bf34de1e8 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,275 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.core import add_prefix +from nemo.collections.multimodal.models.controlnet.uniformer.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .base import BaseSegmentor + + +@SEGMENTORS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + """ + + def __init__( + self, backbone, decode_head, neck=None, auxiliary_head=None, train_cfg=None, test_cfg=None, pretrained=None + ): + super(EncoderDecoder, self).__init__() + self.backbone = builder.build_backbone(backbone) + if neck is not None: + self.neck = builder.build_neck(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + assert self.with_decode_head + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + self.decode_head = builder.build_head(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + + def _init_auxiliary_head(self, auxiliary_head): + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(builder.build_head(head_cfg)) + else: + self.auxiliary_head = builder.build_head(auxiliary_head) + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + + super(EncoderDecoder, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + self.decode_head.init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def extract_feat(self, img): + """Extract features from images.""" + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self._decode_head_forward_test(x, img_metas) + out = resize(input=out, size=img.shape[2:], mode='bilinear', align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _decode_head_forward_test(self, x, img_metas): + """Run forward function and calculate loss for decode head in + inference.""" + seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + return seg_logits + + def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.forward_train(x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def forward_dummy(self, img): + """Dummy forward function.""" + seg_logit = self.encode_decode(img, None) + + return seg_logit + + def forward_train(self, img, img_metas, gt_semantic_seg): + """Forward function for training. + + Args: + img (Tensor): Input images. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(img) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, img_metas, gt_semantic_seg) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train(x, img_metas, gt_semantic_seg) + losses.update(loss_aux) + + return losses + + # TODO refactor + def slide_inference(self, img, img_meta, rescale): + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = img.size() + num_classes = self.num_classes + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = img[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, img_meta) + preds += F.pad(crop_seg_logit, (int(x1), int(preds.shape[3] - x2), int(y1), int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + if torch.onnx.is_in_onnx_export(): + # cast count_mat to constant while exporting to ONNX + count_mat = torch.from_numpy(count_mat.cpu().detach().numpy()).to(device=img.device) + preds = preds / count_mat + if rescale: + preds = resize( + preds, + size=img_meta[0]['ori_shape'][:2], + mode='bilinear', + align_corners=self.align_corners, + warning=False, + ) + return preds + + def whole_inference(self, img, img_meta, rescale): + """Inference with full image.""" + + seg_logit = self.encode_decode(img, img_meta) + if rescale: + # support dynamic shape for onnx + if torch.onnx.is_in_onnx_export(): + size = img.shape[2:] + else: + size = img_meta[0]['ori_shape'][:2] + seg_logit = resize(seg_logit, size=size, mode='bilinear', align_corners=self.align_corners, warning=False) + + return seg_logit + + def inference(self, img, img_meta, rescale): + """Inference with slide/whole style. + + Args: + img (Tensor): The input image of shape (N, 3, H, W). + img_meta (dict): Image info dict where each dict has: 'img_shape', + 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + rescale (bool): Whether rescale back to original shape. + + Returns: + Tensor: The output segmentation map. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = img_meta[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in img_meta) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(img, img_meta, rescale) + else: + seg_logit = self.whole_inference(img, img_meta, rescale) + output = F.softmax(seg_logit, dim=1) + flip = img_meta[0]['flip'] + if flip: + flip_direction = img_meta[0]['flip_direction'] + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + output = output.flip(dims=(3,)) + elif flip_direction == 'vertical': + output = output.flip(dims=(2,)) + + return output + + def simple_test(self, img, img_meta, rescale=True): + """Simple test with single image.""" + seg_logit = self.inference(img, img_meta, rescale) + seg_pred = seg_logit.argmax(dim=1) + if torch.onnx.is_in_onnx_export(): + # our inference backend only support 4D output + seg_pred = seg_pred.unsqueeze(0) + return seg_pred + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(imgs[0], img_metas[0], rescale) + for i in range(1, len(imgs)): + cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit += cur_seg_logit + seg_logit /= len(imgs) + seg_pred = seg_logit.argmax(dim=1) + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py new file mode 100644 index 000000000000..be7a5bd7a676 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/__init__.py @@ -0,0 +1,20 @@ +from .drop import DropPath +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .up_conv_block import UpConvBlock +from .weight_init import trunc_normal_ + +__all__ = [ + 'ResLayer', + 'SelfAttentionBlock', + 'make_divisible', + 'InvertedResidual', + 'UpConvBlock', + 'InvertedResidualV3', + 'SELayer', + 'DropPath', + 'trunc_normal_', +] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py new file mode 100644 index 000000000000..cf9492f1c324 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/drop.py @@ -0,0 +1,29 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import torch +from torch import nn + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + Args: + drop_prob (float): Drop rate for paths of model. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, drop_prob=0.0): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.keep_prob = 1 - drop_prob + + def forward(self, x): + if self.drop_prob == 0.0 or not self.training: + return x + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = self.keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(self.keep_prob) * random_tensor + return output diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py new file mode 100644 index 000000000000..6c2262f7922c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,218 @@ +from torch import nn +from torch.utils import checkpoint as cp + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__( + self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False, + ): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) + layers.extend( + [ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + ), + ] + ) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__( + self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False, + ): + super(InvertedResidualV3, self).__init__() + self.with_res_shortcut = stride == 1 and in_channels == out_channels + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict(type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + ) + + def forward(self, x): + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py new file mode 100644 index 000000000000..75ad75605252 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/make_divisible.py @@ -0,0 +1,27 @@ +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py new file mode 100644 index 000000000000..370d078863f1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/res_layer.py @@ -0,0 +1,96 @@ +from torch import nn as nn + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import build_conv_layer, build_norm_layer + + +class ResLayer(nn.Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__( + self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs + ): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) + ) + downsample.extend( + [ + build_conv_layer( + conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False + ), + build_norm_layer(norm_cfg, planes * block.expansion)[1], + ] + ) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs + ) + ) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs + ) + ) + super(ResLayer, self).__init__(*layers) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py new file mode 100644 index 000000000000..b00aaeade295 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/se_layer.py @@ -0,0 +1,61 @@ +import torch.nn as nn + +import nemo.collections.multimodal.models.controlnet.uniformer.mmcv as mmcv +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__( + self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, divisor=6.0)), + ): + super(SELayer, self).__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert mmcv.is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0], + ) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1], + ) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py new file mode 100644 index 000000000000..52f37c728381 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,162 @@ +import torch +from torch import nn as nn +from torch.nn import functional as F + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, constant_init + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__( + self, + key_in_channels, + query_in_channels, + channels, + out_channels, + share_key_query, + query_downsample, + key_downsample, + key_query_num_convs, + value_out_num_convs, + key_query_norm, + value_out_norm, + matmul_norm, + with_out, + conv_cfg, + norm_cfg, + act_cfg, + ): + super(SelfAttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ConvModule(in_channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)] + for _ in range(num_convs - 1): + convs.append(ConvModule(channels, channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels ** -0.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py new file mode 100644 index 000000000000..8558925074e1 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,106 @@ +import torch +import torch.nn as nn +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__( + self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None, + ): + super(UpConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None, + ) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py new file mode 100644 index 000000000000..fc3419e9a74c --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/models/utils/weight_init.py @@ -0,0 +1,63 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import math +import warnings + +import torch + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + """Reference: https://people.sc.fsu.edu/~jburkardt/presentations + /truncated_normal.pdf""" + + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2, + ) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower_bound = norm_cdf((a - mean) / std) + upper_bound = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.0)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor` + mean (float): the mean of the normal distribution + std (float): the standard deviation of the normal distribution + a (float): the minimum cutoff value + b (float): the maximum cutoff value + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py new file mode 100644 index 000000000000..bec51c75b936 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/__init__.py @@ -0,0 +1,4 @@ +from .encoding import Encoding +from .wrappers import Upsample, resize + +__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py new file mode 100644 index 000000000000..ea4a06ba9297 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/encoding.py @@ -0,0 +1,65 @@ +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super(Encoding, self).__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1.0 / ((num_codes * channels) ** 0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, dtype=torch.float).uniform_(-std, std), requires_grad=True + ) + # [num_codes] + self.scale = nn.Parameter(torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * (expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand((batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax(self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' f'x{self.channels})' + return repr_str diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py new file mode 100644 index 000000000000..d366cf693b49 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/ops/wrappers.py @@ -0,0 +1,43 @@ +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, size=None, scale_factor=None, mode='nearest', align_corners=None, warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ( + (output_h > 1 and output_w > 1 and input_h > 1 and input_w > 1) + and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1) + ): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`' + ) + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None): + super(Upsample, self).__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py new file mode 100644 index 000000000000..ac489e2dbbc0 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/__init__.py @@ -0,0 +1,4 @@ +from .collect_env import collect_env +from .logger import get_root_logger + +__all__ = ['get_root_logger', 'collect_env'] diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py new file mode 100644 index 000000000000..8ad826babee2 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/collect_env.py @@ -0,0 +1,16 @@ +import nemo.collections.multimodal.models.controlnet.uniformer.mmseg as mmseg +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import collect_env as collect_base_env +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_git_hash + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print('{}: {}'.format(name, val)) diff --git a/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py new file mode 100644 index 000000000000..6167a6f88ea4 --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/uniformer/mmseg/utils/logger.py @@ -0,0 +1,27 @@ +import logging + +from nemo.collections.multimodal.models.controlnet.uniformer.mmcv.utils import get_logger + + +def get_root_logger(log_file=None, log_level=logging.INFO): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name, + e.g., "mmseg". + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + + Returns: + logging.Logger: The root logger. + """ + + logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) + + return logger diff --git a/nemo/collections/multimodal/models/controlnet/util.py b/nemo/collections/multimodal/models/controlnet/util.py new file mode 100644 index 000000000000..4ffd6d16e4ff --- /dev/null +++ b/nemo/collections/multimodal/models/controlnet/util.py @@ -0,0 +1,98 @@ +import os + +import numpy as np +import torch +import torchvision +from PIL import Image +from pytorch_lightning import Callback +from pytorch_lightning.utilities.rank_zero import rank_zero_only +from nemo.collections.multimodal.models.controlnet.uniformer import UniformerDetector + + +class ImageLogger(Callback): + def __init__( + self, + batch_frequency=2000, + max_images=4, + clamp=True, + increase_log_steps=True, + rescale=True, + disabled=False, + log_on_batch_idx=False, + log_first_step=False, + log_images_kwargs=None, + ): + super().__init__() + self.rescale = rescale + self.batch_freq = batch_frequency + self.max_images = max_images + if not increase_log_steps: + self.log_steps = [self.batch_freq] + self.clamp = clamp + self.disabled = disabled + self.log_on_batch_idx = log_on_batch_idx + self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {} + self.log_first_step = log_first_step + + @rank_zero_only + def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx): + root = os.path.join(save_dir, "image_log", split) + for k in images: + grid = torchvision.utils.make_grid(images[k], nrow=4) + if self.rescale: + grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w + grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1) + grid = grid.numpy() + grid = (grid * 255).astype(np.uint8) + filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(k, global_step, current_epoch, batch_idx) + path = os.path.join(root, filename) + os.makedirs(os.path.split(path)[0], exist_ok=True) + Image.fromarray(grid).save(path) + + def log_img(self, pl_module, batch, batch_idx, split="train"): + check_idx = batch_idx # if self.log_on_batch_idx else pl_module.global_step + if ( + self.check_frequency(check_idx) + and hasattr(pl_module, "log_images") # batch_idx % self.batch_freq == 0 + and callable(pl_module.log_images) + and self.max_images > 0 + ): + logger = type(pl_module.logger) + + is_train = pl_module.training + if is_train: + pl_module.eval() + + with torch.no_grad(): + images = pl_module.log_images(batch, split=split, **self.log_images_kwargs) + + for k in images: + N = min(images[k].shape[0], self.max_images) + images[k] = images[k][:N] + if isinstance(images[k], torch.Tensor): + images[k] = images[k].detach().cpu() + if self.clamp: + images[k] = torch.clamp(images[k], -1.0, 1.0) + + self.log_local( + pl_module.logger.save_dir, split, images, pl_module.global_step, pl_module.current_epoch, batch_idx + ) + + if is_train: + pl_module.train() + + def check_frequency(self, check_idx): + return check_idx % self.batch_freq == 0 + + def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): + if not self.disabled: + self.log_img(pl_module, batch, batch_idx, split="train") + + +def get_preprocessing_function(name): + if name == 'seg2img': + apply_uniformer = UniformerDetector() + return apply_uniformer + else: + print("The application is not yet supported") + raise NotImplementedError diff --git a/nemo/collections/multimodal/models/dreambooth/__init__.py b/nemo/collections/multimodal/models/dreambooth/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/dreambooth/dreambooth.py new file mode 100644 index 000000000000..5e4abd8522d7 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/dreambooth.py @@ -0,0 +1,654 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from abc import ABC +from functools import partial +from typing import Any, Dict, Optional, Union + +import numpy as np +import pytorch_lightning as pl +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.utilities import GradClipAlgorithmType +from torch._inductor import config as inductor_config +from torch.optim.lr_scheduler import LambdaLR + +from nemo.collections.multimodal.data.dreambooth.dreambooth_dataset import DreamBoothDataset +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.module import Float16Module +from nemo.collections.nlp.parts.utils_funcs import get_last_rank, is_last_rank +from nemo.core.classes import ModelPT +from nemo.core.classes.common import Serialization +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.exp_manager import exp_manager + +try: + from apex import amp + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def _collate_fn(examples, with_prior_preservation=False): + if with_prior_preservation: + prompts = [[example["instance_prompt"], example["reg_prompt"]] for example in examples] + images = [example["instance_images"] for example in examples] + [example["reg_images"] for example in examples] + else: + prompts = [[example["instance_prompt"]] for example in examples] + images = [example["instance_images"] for example in examples] + + images = torch.stack(images) + images = images.to(memory_format=torch.contiguous_format).float() + + return prompts, images + + +class DreamBooth(torch.nn.Module, Serialization): + def __init__(self, cfg, model_parallel_config): + super().__init__() + self.cfg = cfg + self.config = model_parallel_config + self.with_prior_preservation = self.cfg.with_prior_preservation + self.num_reg_images = self.cfg.data.num_reg_images + self.prior_loss_weight = self.cfg.prior_loss_weight + self.num_images_per_prompt = self.cfg.data.num_images_per_prompt + + self.train_text_encoder = self.cfg.train_text_encoder + self.instantiate_text_encoder(self.cfg.cond_stage_config) + + self.inductor = self.cfg.inductor + self.inductor_cudagraphs = self.cfg.inductor_cudagraphs + + self.instantiate_vae(self.cfg.first_stage_config) + self.instantiate_unet(self.cfg.unet_config) + + self.scale_factor = self.cfg.scale_factor + self.num_timesteps = self.cfg.noise_scheduler.timesteps + self.parameterization = self.cfg.noise_scheduler.parameterization + self.get_noise_scheduler(self.cfg.noise_scheduler) + + self.model_type = None + self.rng = torch.Generator(device=torch.cuda.current_device(),) + + self.use_cached_latents = self.cfg.use_cached_latents + + if self.cfg.channels_last: + self.unet = self.unet.to(memory_format=torch.channels_last) + + def instantiate_unet(self, cfg): + self.unet = DreamBooth.from_config_dict(cfg) + self.unet.train() + if self.inductor: + # TorchInductor with CUDA graph can lead to OOM + inductor_config.triton.cudagraphs = self.inductor_cudagraphs + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False + self.unet = torch.compile(self.unet) + + def instantiate_vae(self, cfg): + model = DreamBooth.from_config_dict(cfg) + self.vae = model.eval() + self.vae.train = disabled_train + for param in self.vae.parameters(): + param.requires_grad = False + + def instantiate_text_encoder(self, cfg): + model = DreamBooth.from_config_dict(cfg) + if self.train_text_encoder: + self.text_encoder = model.train() + for param in self.text_encoder.parameters(): + param.requires_grad = True + else: + self.text_encoder = model.eval() + self.text_encoder.train = disabled_train + for param in self.text_encoder.parameters(): + param.requires_grad = False + + def get_noise_scheduler(self, cfg): + model = DreamBooth.from_config_dict(cfg) + self.noise_scheduler = model.eval() + + def forward(self, batch): + + x, cond = batch + if self.use_cached_latents: + x = DiagonalGaussianDistribution(x) + latents = x.sample().detach() * self.scale_factor + else: + latents = self.vae.encode(x).sample().detach() + latents = latents * self.scale_factor + + noise = randn_like(latents, generator=self.rng) + t = torch.randint(0, self.num_timesteps, (latents.shape[0],), generator=self.rng, device=latents.device).long() + x_noisy = self.noise_scheduler(x_start=latents, t=t, noise=noise) + + # cond = self.text_encoder([t[0] for t in batch["prompts"]]) + # if self.with_prior_preservation: + # cond_prior = self.text_encoder([t[1] for t in batch["prompts"]]) + # cond = torch.cat([cond, cond_prior], dim=0) + + model_output = self.unet(x_noisy, t, cond) + + if self.parameterization == "x0": + target = latents + elif self.parameterization == "eps": + target = noise + else: + raise NotImplementedError() + + if self.with_prior_preservation: + model_pred, model_pred_prior = torch.chunk(model_output, 2, dim=0) + target, target_prior = torch.chunk(target, 2, dim=0) + loss = torch.nn.functional.mse_loss(model_pred.float(), target.float(), reduction="mean") + prior_loss = torch.nn.functional.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean") + loss = loss + prior_loss * self.prior_loss_weight + + else: + loss = torch.nn.functional.mse_loss(target.float(), model_output.float(), reduction="mean") + return loss + + def parameters(self): + params = list(self.unet.parameters()) + if self.train_text_encoder: + # print(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.text_encoder.parameters()) + return params + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + pass + + +class MegatronDreamBooth(MegatronBaseModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + self.model = self.model_provider_func() + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = DreamBooth(cfg=self.cfg, model_parallel_config=self.model_parallel_config) + return model + + def forward(self, batch): + output_tensor = self.model(batch) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None # Placeholder + + # handle asynchronous grad reduction + no_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + prefix = 'train' + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[f'{prefix}/{key}'] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the apex fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + elif not self.cfg.get('ddp_overlap', True): + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, prog_bar=True, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + return loss_mean + + def validation_step(self, dataloader_iter, batch_idx): + loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) + + return loss + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + # noise_map, condition + prompts, images = batch + # DB has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + ): + images = images.cuda(non_blocking=True) + + cond = self.model.text_encoder([t[0] for t in prompts]) + if self.cfg.with_prior_preservation: + cond_prior = self.model.text_encoder([t[1] for t in prompts]) + cond = torch.cat([cond, cond_prior], dim=0) + + return images, cond + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + loss = model(batch) + + def dummy(output_tensor): + return loss, {'loss': loss} + + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + + def setup_training_data(self, cfg): + if self.cfg.with_prior_preservation: + if cfg.regularization_dir is None: + raise ValueError("Regularization images must be provided to train with prior preservation loss") + if cfg.regularization_prompt is None: + raise ValueError("Regularization prompts must be provided to train with prior preservation loss") + + self.train_dataset = DreamBoothDataset( + instance_data_root=cfg.instance_dir, + instance_prompt=cfg.instance_prompt, + with_prior_preservation=self.cfg.with_prior_preservation, + reg_data_root=cfg.regularization_dir if self.cfg.with_prior_preservation else None, + reg_prompt=cfg.regularization_prompt if self.cfg.with_prior_preservation else None, + size=cfg.resolution, + center_crop=cfg.center_crop, + load_cache_latents=self.model.use_cached_latents, + cached_instance_data_root=self.cfg.data.get("cached_instance_dir", None), + cached_reg_data_root=self.cfg.data.get("cached_reg_dir", None) + if self.cfg.with_prior_preservation + else None, + vae=self.model.vae, + text_encoder=self.model.text_encoder, + ) + + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(self.train_dataset), + consumed_samples=self.compute_consumed_samples(0), + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=False, + ) + + self._train_dl = torch.utils.data.DataLoader( + self.train_dataset, + batch_sampler=batch_sampler, + collate_fn=partial(_collate_fn, with_prior_preservation=self.cfg.with_prior_preservation), + num_workers=cfg.num_workers, + pin_memory=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + pass + + def setup_test_data(self, cfg): + pass + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + @classmethod + def load_from_checkpoint( + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, + ): + """ + Loads ModelPT from checkpoint, with some maintenance of restoration. + For documentation, please refer to LightningModule.load_from_checkpoin() documentation. + """ + checkpoint = None + try: + cls._set_model_restore_state(is_being_restored=True) + # TODO: replace with proper PTL API + with pl_legacy_patch(): + if map_location is not None: + checkpoint = pl_load(checkpoint_path, map_location=map_location) + else: + checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) + + if hparams_file is not None: + extension = hparams_file.split(".")[-1] + if extension.lower() == "csv": + hparams = load_hparams_from_tags_csv(hparams_file) + elif extension.lower() in ("yml", "yaml"): + hparams = load_hparams_from_yaml(hparams_file) + else: + raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") + + hparams["on_gpu"] = False + + # overwrite hparams by the given file + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams + + # for past checkpoint need to add the new key + if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} + # override the hparams with values that were passed in + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) + # TODO: can we do this without overriding? + config_kwargs = kwargs.copy() + if 'trainer' in config_kwargs: + config_kwargs.pop('trainer') + cfg.update(config_kwargs) + + # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error + if cfg: + if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): + cfg.unet_config.from_pretrained = None + if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): + cfg.first_stage_config.from_pretrained = None + ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod + if cfg.get('inductor'): + cfg.inductor = False + ## Append some dummy configs that DB didn't support + if not cfg.get('channels_last'): + cfg.channels_last = True + if not cfg.get('capture_cudagraph_iters'): + cfg.capture_cudagraph_iters = -1 + + # compatibility for stable diffusion old checkpoint tweaks + first_key = list(checkpoint['state_dict'].keys())[0] + if first_key == "betas": + # insert "model." into for megatron wrapper + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = "model." + key + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + elif ( + first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' + or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' + ): + # remap state keys from dreambooth when using HF clip + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', "") + new_key = new_key.replace('unet', 'model.diffusion_model') + new_key = new_key.replace('vae', 'first_stage_model') + new_key = new_key.replace('text_encoder', 'cond_stage_model') + new_key = new_key.replace('.noise_scheduler', '') + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + # compatibility for inductor in inference + if not cfg.get('inductor', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if cfg.get('megatron_amp_O2', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('model.', 'model.module.', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) + # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg + + checkpoint = model + + finally: + cls._set_model_restore_state(is_being_restored=False) + return checkpoint diff --git a/nemo/collections/multimodal/models/dreambooth/util.py b/nemo/collections/multimodal/models/dreambooth/util.py new file mode 100644 index 000000000000..8f8a142f99f3 --- /dev/null +++ b/nemo/collections/multimodal/models/dreambooth/util.py @@ -0,0 +1,169 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import default, exists +from nemo.core.classes.common import Serialization + + +class DiffusionWrapper(torch.nn.Module, Serialization): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + if isinstance(diff_model_config, nn.Module): + self.diffusion_model = diff_model_config + else: + self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + def forward(self, x_noisy, t, cond, return_ids=False): + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + x_recon = self.apply_step(x_noisy, t, **cond) + return x_recon + + def apply_step(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out + + +class sd_noise_scheduler(nn.Module): + def __init__( + self, + parameterization='eps', + v_posterior=0, + given_betas=None, + beta_schedule='linear', + timesteps=1000, + linear_start=0.00085, + linear_end=0.012, + cosine_s=8e-3, + ): + super().__init__() + self.parameterization = parameterization + self.v_posterior = v_posterior + self.register_schedule( + given_betas=given_betas, + beta_schedule=beta_schedule, + timesteps=timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s, + ) + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule( + beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s + ) + alphas = 1.0 - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) + + (timesteps,) = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( + 1.0 - alphas_cumprod + ) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer( + 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) + ) + self.register_buffer( + 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) + ) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + ) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + def forward(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) diff --git a/nemo/collections/multimodal/models/imagen/__init__.py b/nemo/collections/multimodal/models/imagen/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py new file mode 100644 index 000000000000..64c1382e2d54 --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -0,0 +1,602 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from datetime import datetime +from functools import partial +from typing import Any + +import numpy as np +import torch +import torch.nn.functional as F +from einops import rearrange +from omegaconf import DictConfig, open_dict +from pytorch_lightning import Trainer +from tqdm import tqdm + +from nemo.collections.multimodal.data.imagen.imagen_dataset import build_train_valid_datasets +from nemo.collections.multimodal.models.imagen.precond import ContinousDDPMPrecond, EDMPrecond +from nemo.collections.multimodal.modules.imagen.diffusionmodules.nets import EfficientUNetModel, UNetModel +from nemo.collections.multimodal.modules.imagen.encoder.t5encoder import T5Encoder +from nemo.collections.multimodal.modules.imagen.sampler.sampler import DDPMSampler, EDMSampler +from nemo.collections.multimodal.parts.imagen.utils import random_dropout +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.module import Float16Module +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import Serialization +from nemo.utils import logging + +try: + from apex import amp + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + HAVE_MEGATRON_CORE = False + +try: + from group_norm import GroupNormOpt + + OPT_GROUP_NORM = True +except Exception: + print('Fused optimized group norm has not been installed.') + OPT_GROUP_NORM = False + +DUMMY_TENSOR = torch.tensor([1.0]) + + +class Imagen(torch.nn.Module, Serialization): + def __init__(self, cfg, model_parallel_config): + super().__init__() + self.cfg = cfg + self.config = model_parallel_config + # Make sure the initialization on different GPUs are the same + self.unet_type = cfg.get('unet_type', 'base') + self.noise_cond_aug = cfg.get('noise_cond_aug', False) + if self.unet_type == 'base': + logging.info('Initializing UNet.') + unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim) + elif self.unet_type == 'sr': + logging.info('Initializing Efficient-UNet.') + unet = EfficientUNetModel( + **cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug + ) + elif self.unet_type == 'sr-unet': + logging.info('Initializing UNet for SR model.') + unet = UNetModel(**cfg.unet, text_embed_dim=cfg.conditioning.embed_dim, noise_cond_aug=self.noise_cond_aug) + else: + raise NotImplemented(f'{self.unet_type} UNet is not implemented.') + + self.channels_last = cfg.get('channels_last', False) + if self.channels_last: + assert OPT_GROUP_NORM, 'Training in channels last format requires optmized group norm implementation.' + logging.info('Training in torch channels last format.') + unet = unet.to(memory_format=torch.channels_last) + + # Preconditioning + self.preconditioning_type = cfg.get('preconditioning_type', 'DDPM') + if self.preconditioning_type == 'DDPM': + logging.info('Preconditioned with Continous DDPM') + self.model = ContinousDDPMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) + self.sampler = DDPMSampler(unet_type=self.unet_type, denoiser=self.model.scheduler) + elif self.preconditioning_type == 'EDM': + logging.info('Preconditioned with EDM') + self.model = EDMPrecond(unet=unet, **cfg.preconditioning, noise_cond_aug=self.noise_cond_aug) + self.sampler = EDMSampler(unet_type=self.unet_type) + else: + raise NotImplemented(f'{self.preconditioning_type} preconditioning is not implemented.') + + self.rng = None + self.conditioning = cfg.conditioning + self.text_drop_rate = cfg.conditioning.drop_rate + self.model_type = None + self.image_size = cfg.unet.image_size + + def setup_rng(self): + # We need to set different rng seed for different GPUs/ different runs; + # otherwise, the noise map and time will be exactly the same. + self.rng = torch.Generator(device=torch.cuda.current_device()) + self.rng_seed = int(datetime.now().timestamp()) + self.cfg.seed + parallel_state.get_data_parallel_rank() + logging.info(f'RNG seed set as {self.rng_seed} for rank {parallel_state.get_data_parallel_rank()}') + self.rng.manual_seed(self.rng_seed) + self.model.set_rng(self.rng) + + @property + def unet(self): + return self.model.unet + + def get_text_encoder(self, encoder_path=None): + # TODO Assume using T5 for all + return T5Encoder(max_seq_len=self.conditioning.token_length, encoder_path=encoder_path) + + def forward(self, x_start, text_embed, text_mask, x_lowres=None): + if self.unet_type == 'base': + assert x_lowres[0].item() == DUMMY_TENSOR.item(), 'Base model should have no low-resolution conditioning' + x_lowres = None + else: + assert x_lowres[0].dim() not in [0, 1], 'SR model should have low-resolution conditioning' + + if self.channels_last: + x_start = x_start.to(memory_format=torch.channels_last) + if x_lowres is not None: + x_lowres = x_lowres.to(memory_format=torch.channels_last) + + # Apply random dropout to text embedding + text_embed = random_dropout(text_embed, drop_rate=self.text_drop_rate) + # UNet Forward Pass + low_res_cond = {'x_low_res': x_lowres} if x_lowres is not None else {} + # UNet Forward Pass and compute loss + loss = self.model.compute_loss( + x0=x_start, + text_embed=text_embed, + text_mask=text_mask, + time=None, # Randomly Sample + noise=None, # Randomly Sample + **low_res_cond, + ) + return loss, {'train/loss': loss} + + @torch.no_grad() + def sample_image( + self, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + return self.sampler( + self.model, noise_map, text_encoding, text_mask, x_low_res, cond_scale, sampling_steps, thresholding_method + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # only required for pipeline parallelism + pass + + +class MegatronImagen(MegatronBaseModel): + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + with open_dict(cfg): + cfg.hidden_size = cfg.unet.embed_dim + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + self.online_encoding = cfg.conditioning.get("online_encoding", False) + self.text_encoder_path = cfg.conditioning.get("encoder_path", None) + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = Imagen(cfg=self.cfg, model_parallel_config=self.model_parallel_config) + return model + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + """ Prepares the batch for megatron fwd/bwd functions. + Global batch is a list of micro batches. + """ + # Base model and SR models have slightly different batch input: + # Base model would only require images (64x64), + # while SR models (both SR256 and SR1024) require low-res image (64x64) and + # actual (cropped) image (256x256) + if self.cfg.unet_type == 'base': + x_start = batch['images'] + # Pass in DUMMY_TENSOR because megatron requires each input to be + # tensor (not None) with same batch size (first dim) + x_lowres = DUMMY_TENSOR.repeat(x_start.shape[0]) + elif self.cfg.unet_type == 'sr' or self.cfg.unet_type == 'sr-unet': + x_start = batch['images_256'] + x_lowres = batch['images_64'] + else: + raise NotImplemented(f'Unknown UNet type: {self.cfg.unet_type}') + + if self.cfg.conditioning.get("online_encoding", False): + input_text = batch["raw_text"] + # Encode the text embeddings using text encoder. + with torch.no_grad(): + text_embed, text_mask = self.text_encoder.encode(input_text) + else: + text_conditioning_key = self.cfg.conditioning.out_key + text_embed = batch[f'{text_conditioning_key}_embeddings'] + text_mask = batch[f'{text_conditioning_key}_mask'] + return [x_start, text_embed, text_mask, x_lowres] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + loss, loss_dict = model(*batch) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Imagen...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) + ) + # We do not have test dataset + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, + ) + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None + + # handle asynchronous grad reduction + no_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["train/loss"] + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + elif not self.cfg.get('ddp_overlap', True): + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def validation_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + + loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + # Setup RNG seed in model + self.model.setup_rng() + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + def on_save_checkpoint(self, checkpoint) -> None: + if self.online_encoding: + # Removing the weights relating to Text encoder when saving the checkpoints + frozen_weights_keys = [k for k in checkpoint['state_dict'].keys() if k.startswith("text_encoder")] + for k in frozen_weights_keys: + del checkpoint['state_dict'][k] + + def on_load_checkpoint(self, checkpoint) -> None: + # make sure inductor naming is consistent with checkpoint's + inductor_enabled = self.cfg.get('inductor', False) + state_dict = checkpoint['state_dict'] + inductor_checkpoint = False + for k, v, in state_dict.items(): + if '_orig_mod' in k: + inductor_checkpoint = True + break + + if inductor_enabled and not inductor_checkpoint: + # ckpt needs to be converted to inductor-format weights (add .orig_mod) + logging.info('Add .orig_mod to all weight keys.') + new_state_dict = {} + for k, v in state_dict.items(): + idx = k.find('._orig_mod') + new_key = k[:idx] + k[idx + len('._orig_mod') :] + new_state_dict[new_key] = v + checkpoint['state_dict'] = new_state_dict + elif not inductor_enabled and inductor_checkpoint: + # ckpt needs to be converted to non-inductor-format weights (remove .orig_mod) + logging.info('Remove .orig_mod to all weight keys.') + new_state_dict = {} + for k, v in state_dict.items(): + new_key = k.replace("._orig_mod", "") + new_state_dict[new_key] = v + checkpoint['state_dict'] = new_state_dict + super().on_load_checkpoint(checkpoint) + + def on_fit_start(self) -> None: + if self.online_encoding: + # if encoding text online, set up text_encoder here (after loading checkpoints) instead of in __init__. + # This is because text encoder weights are not saved, so the encoder must be loaded after other weights + # are loaded. + logging.info( + f'Setting up pretrained text encoder: {self.text_encoder_path or "download or use cached t5-11b"}' + ) + self.text_encoder = self.model.get_text_encoder(encoder_path=self.text_encoder_path).to( + torch.cuda.current_device() + ) + self.text_encoder.eval() + for param in self.text_encoder.parameters(): + param.requires_grad = False diff --git a/nemo/collections/multimodal/models/imagen/imagen_pipeline.py b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py new file mode 100644 index 000000000000..15916907dbfc --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/imagen_pipeline.py @@ -0,0 +1,355 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from dataclasses import dataclass, field +from typing import Callable, Dict, List, Literal, Optional, Union + +import torch +from omegaconf.omegaconf import OmegaConf +from pytorch_lightning import Trainer +from torch.cuda.amp import autocast + +from nemo.collections.multimodal.models.imagen.imagen import Imagen, MegatronImagen +from nemo.collections.multimodal.parts.utils import numpy_to_pil, setup_trainer_and_models_for_inference + + +@dataclass +class ImagenCustomizedModelConfig: + base_ckpt: Optional[str] = None + base_cfg: Optional[str] = None + sr256_ckpt: Optional[str] = None + sr256_cfg: Optional[str] = None + sr1024_ckpt: Optional[str] = None + sr1024_cfg: Optional[str] = None + + +@dataclass +class ImagenSamplingConfig: + step: Optional[int] = None + cfg: Optional[float] = 1 + + +@dataclass +class ImagenPipelineConfig: + model_name: Optional[str] = None + run_ema_model: Optional[bool] = True + customized_model: Optional[ImagenCustomizedModelConfig] = None + num_images_per_promt: Optional[int] = 8 + texts: Optional[List[str]] = field(default_factory=lambda: []) + output_path: Optional[str] = 'output/imagen_inference' + record_time: Optional[bool] = False + encoder_path: Optional[str] = None + target_resolution: Optional[int] = 256 + inference_precision: Optional[str] = '32' + thresholding_method: Optional[str] = 'dynamic' + samplings: Optional[List[ImagenSamplingConfig]] = field(default_factory=lambda: list()) + part: Optional[int] = 0 + + +class ImagenPipeline(Callable): + def __init__(self, models: List[Imagen], text_encoder, cfg, device): + self.models = [model.to(device) for model in models] + self.text_encoder = text_encoder.to(device) + self.cfg = cfg + self.device = device + + def _load_model(model_ckpt: str, model_cfg: str, eval_mode: bool = True, trainer: Trainer = None): + assert model_ckpt is not None, 'model ckpt cannot be None' + if model_ckpt.endswith('.nemo'): + model_cfg = MegatronImagen.restore_from(restore_path=model_ckpt, trainer=trainer, return_config=True) + model_cfg.unet.flash_attention = False + model_cfg.micro_batch_size = 1 + model_cfg.global_batch_size = 1 + model = MegatronImagen.restore_from( + restore_path=model_ckpt, override_config_path=model_cfg, trainer=trainer, + ) + elif model_ckpt.endswith('.ckpt'): + model_cfg = OmegaConf.load(model_cfg) + model_cfg.model.unet.flash_attention = False + model_cfg.model.micro_batch_size = 1 + model_cfg.model.global_batch_size = 1 + model = MegatronImagen(cfg=model_cfg.model, trainer=trainer) + checkpoint = torch.load(model_ckpt, map_location=lambda storage, loc: storage) + + # Change weight keys if training using TorchInductor + state_dict = checkpoint['state_dict'] + del_keys = [] + for k, v in state_dict.items(): + if '._orig_mod' in k: + del_keys.append(k) + if len(del_keys) != 0: + print('ckpt was saved with TorchInductor. Renaming weights..') + for k in del_keys: + new_k = k.replace("._orig_mod", "") + state_dict[new_k] = state_dict[k] + del state_dict[k] + model.load_state_dict(state_dict, strict=True) + else: + raise Exception('Invalid ckpt type. Should be either .nemo or .ckpt with cfg') + + model = model.model # We do not need Megatron Instance for inference + model.model.set_inference_mode(True) # Used for adding the least noise for EDM inference for SR model. + if eval_mode: + model.unet.cuda().eval() + return model + + def _load_customized_model(cfg: ImagenPipelineConfig, trainer=None, megatron_loading=False, megatron_cfg=None): + if megatron_loading: + assert megatron_cfg + + def model_cfg_modifier(model_cfg): + model_cfg.inductor = False + model_cfg.unet.flash_attention = False + model_cfg.micro_batch_size = megatron_cfg.fid.ncaptions_per_batch + model_cfg.global_batch_size = model_cfg.micro_batch_size * megatron_cfg.fid.ntasks_per_node + + trainer, megatron_models = setup_trainer_and_models_for_inference( + MegatronImagen, cfg=megatron_cfg, model_cfg_modifier=model_cfg_modifier + ) + models = [mm.model for mm in megatron_models] + for model in models: + model.cuda().eval() + model.model.set_inference_mode(True) + return models + customized_models = cfg.customized_model + models = [] + print('Load base model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.base_ckpt, model_cfg=customized_models.base_cfg, trainer=trainer, + ) + models.append(model) + + if cfg.target_resolution >= 256: + print('Load SR256 model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.sr256_ckpt, model_cfg=customized_models.sr256_cfg, trainer=trainer + ) + models.append(model) + + if cfg.target_resolution >= 1024: + print('Load SR1024 model.') + model = ImagenPipeline._load_model( + model_ckpt=customized_models.sr1024_ckpt, model_cfg=customized_models.sr1024_cfg, trainer=trainer + ) + models.append(model) + return models + + @classmethod + def from_pretrained( + cls, cfg: ImagenPipelineConfig, trainer=None, device='cuda', megatron_loading=False, megatron_cfg=None + ): + target_resolution = cfg.target_resolution + assert target_resolution in [64, 256, 1024] + + # Set encoder_path which will be used when inst the model + if cfg.encoder_path is not None: + os.environ['ENCODER_PATH'] = cfg.encoder_path + + assert cfg.model_name is None, 'No predefined model for now' + assert cfg.customized_model is not None, 'Need to provide customized models for inference' + models = ImagenPipeline._load_customized_model(cfg, trainer, megatron_loading, megatron_cfg) + assert len(models) >= 1, 'Need to load at least one model' + if cfg.inference_precision == '16': + print('Running Inference in FP16.') + print('Converting all difussion models to FP16..') + for model in models: + model.half() + + print('Loading text encoder') + text_encoder = models[0].get_text_encoder(encoder_path=cfg.encoder_path) + if cfg.inference_precision == '16': + print('Converting text encoders to FP16..') + text_encoder.half() + return ImagenPipeline(models=models, text_encoder=text_encoder, cfg=cfg, device=device) + + @torch.no_grad() + def get_text_encodings(self, input_text, repeat=1): + # Repeat the inputs so that we generate multiple samples per query + if isinstance(input_text, str): + inp_text_batch = [input_text] + else: + inp_text_batch = input_text + # Encode the text embeddings using text encoder. + text_encodings, text_mask = self.text_encoder.encode(inp_text_batch, device=self.device) + if repeat != 1: + assert len(inp_text_batch) == 1, 'Repeat should only be applied if we feed single text to encoder.' + text_encodings = text_encodings.repeat(repeat, 1, 1) + text_mask = text_mask.repeat(repeat, 1) + return text_encodings, text_mask + + @torch.no_grad() + def __call__( + self, + prompts: Union[str, List[str]] = None, + inference_steps: Union[int, List[int]] = None, + classifier_free_guidance: Union[float, List[float]] = None, + num_images_per_promt: Optional[int] = 0, + thresholding_method: bool = None, + output_type: Optional[str] = 'pil', + seed: Union[int, List[int]] = 2000, + single_batch_mode: bool = False, + output_res: Optional[int] = None, + low_res_input: Optional[torch.Tensor] = None, + ): + if prompts is None: + prompts = OmegaConf.to_object(self.cfg.texts) + if num_images_per_promt == 0: + num_images_per_promt = self.cfg.num_images_per_promt + if thresholding_method is None: + thresholding_method = self.cfg.thresholding_method + device = self.device + inference_precision = self.cfg.inference_precision + assert inference_precision in ['16', '32', 'AMP'], "Inference Precision should be one of ['16', '32', 'AMP']" + print(f'Running inference in {inference_precision} mode.') + amp_enabled = inference_precision == 'AMP' + + # Based on output_res and low_res_input, determine which models to run + if output_res is not None or low_res_input is not None: + models = [] + if output_res is not None: + for model in self.models: + models.append(model) + if model.image_size == output_res: + break + else: + models = self.models + if low_res_input is not None: + print(f'Low-res input shape: {low_res_input.shape}') + low_res_dim = low_res_input.shape[-1] + num_images_per_promt = low_res_input.shape[0] + for idx, model in enumerate(models): + if model.image_size == low_res_dim: + models = models[idx + 1 :] + break + print(f'Running inference on {len(models)} models.') + else: + models = self.models + + if classifier_free_guidance is None: + cfgs = [each.cfg for each in self.cfg.samplings] + cfgs = cfgs[: len(models)] + else: + cfgs = classifier_free_guidance + if isinstance(cfgs, int) or isinstance(cfgs, float): + cfgs = [cfgs] * len(models) + + if inference_steps is None: + steps = [each.step for each in self.cfg.samplings] + steps = steps[: len(models)] + else: + steps = inference_steps + if isinstance(steps, int): + steps = [steps] * len(models) + + assert len(steps) == len(cfgs) == len(models) + + output = [] + all_res_output = [[] for _ in range(len(models))] + if single_batch_mode: + num_images_per_promt = len(prompts) + + throughputs = {'text-encoding': []} + for idx in range(len(models)): + throughputs[f'stage-{idx+1}'] = [] + for prompt in prompts: + if single_batch_mode: + text_input = prompts + else: + text_input = prompt.strip('\n') + print('Input caption: {}'.format(text_input)) + tic = time.perf_counter() + text_encodings, text_mask = self.get_text_encodings( + text_input, repeat=num_images_per_promt if not single_batch_mode else 1 + ) + throughputs['text-encoding'].append(time.perf_counter() - tic) + + # Set seed + noise_maps = [] + if isinstance(seed, int): + # Single seed for the batch + torch.random.manual_seed(seed) + # Generate noise maps + for model in models: + noise_map = torch.randn( + (num_images_per_promt, 3, model.unet.image_size, model.unet.image_size), device=device + ) + noise_map = noise_map.half() if inference_precision == '16' else noise_map + noise_maps.append(noise_map) + elif isinstance(seed, list): + assert len(seed) == num_images_per_promt + for model in models: + noise_map_batch = [] + for single_seed in seed: + torch.random.manual_seed(single_seed) + noise_map_single = torch.randn( + (1, 3, model.unet.image_size, model.unet.image_size), device=device + ) + noise_map_batch.append(noise_map_single) + noise_map_batch = torch.cat(noise_map_batch, dim=0) + noise_map_batch = noise_map_batch.half() if inference_precision == '16' else noise_map_batch + noise_maps.append(noise_map_batch) + else: + raise RuntimeError('Seed type incorrect.') + + x_low_res = low_res_input + all_res = [] + for idx, (model, noise_map, cfg, step) in enumerate(zip(models, noise_maps, cfgs, steps)): + tic = time.perf_counter() + with autocast(enabled=amp_enabled): + generated_images = model.sample_image( + noise_map=noise_map, + text_encoding=text_encodings, + text_mask=text_mask, + x_low_res=x_low_res, + cond_scale=cfg, + sampling_steps=step, + thresholding_method=thresholding_method, + ) + x_low_res = generated_images + all_res.append(generated_images) + throughputs[f'stage-{idx+1}'].append(time.perf_counter() - tic) + # recenter from [-1, 1] to [0, 1] + assert generated_images is not None + generated_images = ((generated_images + 1) / 2).clamp_(0, 1) + all_res = [((each + 1) / 2).clamp_(0, 1) for each in all_res] + output.append(generated_images) + for idx, each in enumerate(all_res): + all_res_output[idx].append(each) + if single_batch_mode: + break + + if output_type == 'torch': + return torch.cat(output, dim=0), [torch.cat(each, dim=0) for each in all_res_output] + output_new = [] + for x_samples_image in output: + # Convert to numpy + x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == 'pil': + x_samples_image = numpy_to_pil(x_samples_image) + output_new.append(x_samples_image) + + all_res_output_new = [[] for each in range(len(models))] + for idx, res_output in enumerate(all_res_output): + for x_samples_image in res_output: + # Convert to numpy + x_samples_image = x_samples_image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == 'pil': + x_samples_image = numpy_to_pil(x_samples_image) + all_res_output_new[idx].append(x_samples_image) + + for item in throughputs: + throughputs[item] = sum(throughputs[item]) / len(throughputs[item]) + + return output_new, all_res_output_new, throughputs diff --git a/nemo/collections/multimodal/models/imagen/precond.py b/nemo/collections/multimodal/models/imagen/precond.py new file mode 100644 index 000000000000..fc3b3ed7d18d --- /dev/null +++ b/nemo/collections/multimodal/models/imagen/precond.py @@ -0,0 +1,174 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_mul +from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes +from nemo.collections.multimodal.parts.utils import randn_like + + +class PrecondModel(torch.nn.Module): + def __init__(self, unet, loss_type): + super().__init__() + self.unet = unet + self.rng = None + self.inference = False + if loss_type == 'l1': + self.loss_fn = F.l1_loss + elif loss_type == 'l2': + self.loss_fn = F.mse_loss + elif loss_type == 'huber': + self.loss_fn = F.smooth_l1_loss + else: + raise NotImplementedError(f'{loss_type} loss is not supported') + + def set_inference_mode(self, value): + self.inference = value + + def forward(self, **model_kwargs): + return self.unet(**model_kwargs) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + def set_rng(self, generator): + self.rng = generator + + +class ContinousDDPMPrecond(PrecondModel): + def __init__( + self, + unet, + loss_type='l2', + pred_objective='noise', + noise_schedule='cosine', + timesteps=1000, + noise_cond_aug=False, + ): + super().__init__(unet, loss_type) + self.scheduler = GaussianDiffusionContinuousTimes(noise_schedule=noise_schedule, timesteps=timesteps) + self.pred_objective = pred_objective + assert noise_cond_aug == False, 'noise cond aug currently not supported for DDPM' + + def sample_time(self, batch_size, device=None): + return self.scheduler.sample_random_times(batch_size=batch_size, device=device) + + def get_xt(self, x0, t=None, epsilon=None): + if epsilon is None: + epsilon = randn_like(x0, generator=self.rng) + if t is None: + t = self.sample_time(batch_size=x0.shape[0], device=x0.device) + x_noisy, log_snr, alpha, sigma = self.scheduler.q_sample(x_start=x0, t=t, noise=epsilon,) + return x_noisy, t, epsilon + + def forward(self, x, time, text_embed, text_mask, **model_kwargs): + # Convert time to FP32 for calculating time embedding due to FP16 overflow + time = time.float() + time = self.scheduler.get_condition(time) + time = time.type_as(x) + + return self.unet(x=x, time=time, text_embed=text_embed, text_mask=text_mask, **model_kwargs) + + def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): + x_noisy, time, noise = self.get_xt(x0=x0, t=time, epsilon=noise) + pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) + # Determine target + if self.pred_objective == 'noise': + target = noise + elif self.pred_objective == 'x_start': + target = x0 + else: + raise ValueError(f'unknown objective {self.pred_objective}') + return self.loss_fn(pred, target) + + def set_rng(self, generator): + self.scheduler.rng = generator + self.rng = generator + + +class EDMPrecond(PrecondModel): + def __init__( + self, + unet, # Underlying model. + loss_type='l2', + sigma_data=0.5, # Expected standard deviation of the training data. + p_mean=-1.2, + p_std=1.2, + noise_cond_aug=False, + ): + super().__init__(unet, loss_type) + self.sigma_data = sigma_data + self.p_mean = p_mean + self.p_std = p_std + self.noise_cond_aug = noise_cond_aug + + def forward(self, x, time, text_embed, text_mask, **model_kwargs): + bs = x.shape[0] + assert time.ndim <= 1, 'time should be in shape of either [bs] or scalar' + sigma = time + c_skip = self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) + c_out = sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2).sqrt() + c_in = 1 / (self.sigma_data ** 2 + sigma ** 2).sqrt() + c_noise = sigma.log() / 4 + + if c_noise.ndim < 1: + c_noise = c_noise.repeat(bs,) + + if self.noise_cond_aug: + # Applying noise conditioning augmentation + assert 'x_low_res' in model_kwargs, 'x_low_res does not exist when attemping to apply noise augmentation' + x_low_res = model_kwargs['x_low_res'] + if self.inference: + batch_size = x_low_res.shape[0] + time_low_res = torch.ones(batch_size, device=x_low_res.device) * 0.002 + x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=time_low_res, epsilon=None) + else: + x_low_res_noisy, time_low_res = self.get_xt(x0=x_low_res, t=None, epsilon=None) + c_in_noise = 1 / (self.sigma_data ** 2 + time_low_res ** 2).sqrt() + c_noise_noise = time_low_res.log() / 4 + model_kwargs['x_low_res'] = batch_mul(c_in_noise, x_low_res_noisy) + model_kwargs['time_low_res'] = c_noise_noise + + F_x = self.unet(batch_mul(c_in, x), c_noise, text_embed, text_mask, **model_kwargs) + D_x = batch_mul(c_skip, x) + batch_mul(c_out, F_x) + return D_x + + def sample_time(self, batch_size, device=None): + return (torch.randn(batch_size, device=device, generator=self.rng) * self.p_std + self.p_mean).exp() + + def get_xt(self, x0, t=None, epsilon=None): + if epsilon is None: + epsilon = randn_like(x0, generator=self.rng) + assert epsilon.shape == x0.shape + if t is None: + t = self.sample_time(batch_size=x0.shape[0], device=x0.device) + sigma = t + noise = batch_mul(epsilon, sigma) + return x0 + noise, sigma + + def compute_loss(self, x0, text_embed, text_mask, time=None, noise=None, **model_kwargs): + x_noisy, time = self.get_xt(x0=x0, t=None, epsilon=noise) + pred = self.forward(x_noisy, time, text_embed, text_mask, **model_kwargs) + sigma = time + weight = ((sigma ** 2 + self.sigma_data ** 2) / (sigma * self.sigma_data) ** 2).sqrt() + target = x0 + return self.loss_fn(batch_mul(weight, target), batch_mul(weight, pred),) + + def set_rng(self, generator): + self.rng = generator diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py new file mode 100644 index 000000000000..e25b0ecbe041 --- /dev/null +++ b/nemo/collections/multimodal/models/instruct_pix2pix/ldm/ddpm_edit.py @@ -0,0 +1,268 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/timothybrooks/instruct-pix2pix/blob/2afcb7e45bd350765f21a58a0c135871e9dc5a78/stable_diffusion/ldm/models/diffusion/ddpm_edit.py +""" + +from contextlib import contextmanager +from functools import partial + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from einops import rearrange, repeat +from torch.optim.lr_scheduler import LambdaLR +from torchvision.utils import make_grid +from tqdm import tqdm + +from nemo.collections.multimodal.data.instruct_pix2pix.edit_dataset import EditDataset +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion, MegatronLatentDiffusion +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.utils import logging + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class LatentDiffusionEdit(LatentDiffusion): + def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): + pl_sd = torch.load(path, map_location="cpu") + if "state_dict" in list(pl_sd.keys()): + pl_sd = pl_sd["state_dict"] + sd = {} + + first_key = list(pl_sd.keys())[0] + # State keys of model trained with TorchDynamo changed from + # "model.xxx" to "model._orig_mod.xxx" + for k, v in pl_sd.items(): + new_k = k.replace("._orig_mod", "") + # compatibility for stable diffusion old checkpoint + # remove megatron wrapper prefix + if first_key == "model.betas": + new_k = new_k.lstrip("model.") + sd[new_k] = v + keys = list(sd.keys()) + + # Our model adds additional channels to the first layer to condition on an input image. + # For the first layer, copy existing channel weights and initialize new channel weights to zero. + input_keys = [ + "model.diffusion_model.input_blocks.0.0.weight", + ] + + self_sd = self.state_dict() + for input_key in input_keys: + if input_key not in sd or input_key not in self_sd: + continue + + input_weight = self_sd[input_key] + if input_weight.size() != sd[input_key].size(): + print(f"Manual init: {input_key}") + input_weight.zero_() + input_weight[:, :4, :, :].copy_(sd[input_key]) + ignore_keys.append(input_key) + + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = ( + self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) + ) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + @torch.no_grad() + def get_input( + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + uncond=0.05, + ): + x = batch[k] + if bs is not None: + x = x[:bs] + + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + cond_key = cond_key or self.cond_stage_key + xc = batch[cond_key] + if bs is not None: + xc["c_crossattn"] = xc["c_crossattn"][:bs] + xc["c_concat"] = xc["c_concat"][:bs] + cond = {} + + # To support classifier-free guidance, randomly drop out only text conditioning 5%, only image conditioning 5%, and both 5%. + random = torch.rand(x.size(0), device=x.device) + prompt_mask = rearrange(random < 2 * uncond, "n -> n 1 1") + input_mask = 1 - rearrange((random >= uncond).float() * (random < 3 * uncond).float(), "n -> n 1 1 1") + + null_prompt = self.get_learned_conditioning([""]) + cond["c_crossattn"] = torch.where( + prompt_mask, null_prompt, self.get_learned_conditioning(xc["c_crossattn"]).detach() + ) + cond["c_concat"] = input_mask * self.encode_first_stage((xc["c_concat"].to(x.device))).mode().detach() + + out = [z, cond] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + +class MegatronLatentDiffusionEdit(MegatronLatentDiffusion): + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = LatentDiffusionEdit(cfg=self.cfg, model_parallel_config=self.model_parallel_config) + return model + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + # TODO (yuya): set up splits ratio and other params + if self.cfg.data.data_path is not None: + self._train_ds = EditDataset(path=self.cfg.data.data_path, split="train", flip_prob=0.5) + self._validation_ds = EditDataset(path=self.cfg.data.data_path, split="val") + self._test_ds = EditDataset(path=self.cfg.data.data_path, split="test") + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples, drop_last) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples, drop_last) + + def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): + """Build dataloader given an input dataset.""" + + if dataset is None: + return None + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self._cfg.data, 'dataloader_type') and self._cfg.data.dataloader_type is not None: + # TODO (yuya): fix this + if self._cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self._cfg.micro_batch_size, + global_batch_size=self._cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + elif self._cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self._cfg.micro_batch_size, + global_batch_size=self._cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + else: + raise Exception(f'{self._cfg.dataloader_type} dataloader type is not supported.') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + # Torch dataloader. + return torch.utils.data.DataLoader( + dataset, batch_sampler=batch_sampler, num_workers=self._cfg.data.num_workers, pin_memory=True, + ) diff --git a/nemo/collections/multimodal/models/kosmos/__init__.py b/nemo/collections/multimodal/models/kosmos/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py new file mode 100644 index 000000000000..e4aaddd3214a --- /dev/null +++ b/nemo/collections/multimodal/models/kosmos/megatron_kosmos_model.py @@ -0,0 +1,1154 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import math +import os +import random +import tempfile +from functools import partial +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd +import torch +from einops import rearrange, repeat +from omegaconf.dictconfig import DictConfig +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import MAX_NUM_IMAGES, MergedKosmosDataLoader +from nemo.collections.multimodal.data.kosmos.kosmos_dataset import ( + build_train_valid_datasets as build_media_train_valid_datasets, +) +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer +from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( + build_train_valid_test_datasets as build_text_train_valid_test_datasets, +) +from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.language_model import get_language_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, +) +from nemo.collections.nlp.modules.common.text_generation_utils import ( + generate, + get_computeprob_response, + get_default_length_params, + get_default_sampling_params, + megatron_gpt_generate, +) +from nemo.collections.nlp.modules.common.transformer.text_generation import ( + LengthParam, + OutputType, + SamplingParam, + TextGeneration, +) +from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import logging + +try: + import apex.transformer.pipeline_parallel.utils + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + import transformer_engine + + HAVE_TE = True + +except (ImportError, ModuleNotFoundError): + HAVE_TE = False + + +class FrozenCLIPVisionTransformer(CLIPVisionTransformer): + def __init__(self, model_cfg, pre_process=True, post_process=True): + super().__init__( + model_cfg, pre_process=pre_process, post_process=post_process, skip_head=True, + ) + self.frozen = False + + def train(self, mode): + if self.frozen: + return self + + super().train(mode) + return self + + def forward(self, input): + assert self.training == False + hidden_states = self.backbone(input) + # Do not add header after backbone + return hidden_states + + def freeze(self) -> None: + for param in self.parameters(): + param.requires_grad = False + + self.eval() + self.frozen = True + + +class KosmosModel(MegatronModule): + def __init__( + self, model_cfg, vocab_size, media_start_id=None, media_end_id=None, pre_process=True, post_process=True, + ): + super(KosmosModel, self).__init__() + + llm_cfg = model_cfg.llm + vision_cfg = model_cfg.vision + + self.parallel_output = True # TODO (yuya): Fix this hard-code + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.pre_process = pre_process + self.post_process = post_process + self.fp16_lm_cross_entropy = llm_cfg.get('fp16_lm_cross_entropy', False) + self.sequence_parallel = llm_cfg.sequence_parallel + self.gradient_accumulation_fusion = llm_cfg.gradient_accumulation_fusion + self.share_embeddings_and_output_weights = llm_cfg.share_embeddings_and_output_weights + self.position_embedding_type = llm_cfg.get('position_embedding_type', 'learned_absolute') + + use_scaled_init_method = llm_cfg.get('use_scaled_init_method', True) + kv_channels = llm_cfg.get('kv_channels', None) + hidden_size = llm_cfg.hidden_size + num_attention_heads = llm_cfg.num_attention_heads + num_layers = llm_cfg.num_layers + init_method_std = llm_cfg.init_method_std + + if kv_channels is None: + assert ( + hidden_size % num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = hidden_size // num_attention_heads + + scaled_init_method = ( + scaled_init_method_normal(init_method_std, num_layers) + if use_scaled_init_method + else init_method_normal(init_method_std) + ) + self.language_model, self._language_model_key = get_language_model( + vocab_size=vocab_size, + hidden_size=llm_cfg.hidden_size, + max_position_embeddings=llm_cfg.max_position_embeddings, + num_layers=llm_cfg.num_layers, + num_attention_heads=llm_cfg.num_attention_heads, + apply_query_key_layer_scaling=llm_cfg.get('apply_query_key_layer_scaling', True), + kv_channels=kv_channels, + ffn_hidden_size=llm_cfg.ffn_hidden_size, + num_tokentypes=0, + add_pooler=False, + encoder_attn_mask_type=AttnMaskType.causal, + pre_process=pre_process, + post_process=post_process, + init_method_std=llm_cfg.get('init_method_std', 0.02), + scaled_init_method=scaled_init_method, + use_cpu_initialization=llm_cfg.get('use_cpu_initialization', False), + hidden_dropout=llm_cfg.get('hidden_dropout', 0.1), + attention_dropout=llm_cfg.get('attention_dropout', 0.1), + ffn_dropout=llm_cfg.get('ffn_dropout', 0.0), + precision=llm_cfg.get('precision', 16), + fp32_residual_connection=llm_cfg.get('fp32_residual_connection', False), + activations_checkpoint_granularity=llm_cfg.get('activations_checkpoint_granularity', None), + activations_checkpoint_method=llm_cfg.get('activations_checkpoint_method', None), + activations_checkpoint_num_layers=llm_cfg.get('activations_checkpoint_num_layers', 1), + activations_checkpoint_layers_per_pipeline=llm_cfg.get('activations_checkpoint_layers_per_pipeline', None), + normalization=llm_cfg.get('normalization', 'layernorm'), + layernorm_epsilon=llm_cfg.get('layernorm_epsilon', 1e-5), + onnx_safe=llm_cfg.get('onnx_safe', False), + bias=llm_cfg.get('bias', True), + bias_activation_fusion=llm_cfg.get('bias_activation_fusion', True), + bias_dropout_add_fusion=llm_cfg.get('bias_dropout_add_fusion', True), + activation=llm_cfg.get('activation', 'gelu'), + headscale=llm_cfg.get('headscale', False), + transformer_block_type=llm_cfg.get('transformer_block_type', 'pre_ln'), + openai_gelu=llm_cfg.get('openai_gelu', False), + normalize_attention_scores=llm_cfg.get('normalize_attention_scores', True), + position_embedding_type=llm_cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percentage=llm_cfg.get('rotary_percentage', 1.0), + share_embeddings_and_output_weights=llm_cfg.get('share_embeddings_and_output_weights', True), + attention_type=llm_cfg.get('attention_type', 'multihead'), + masked_softmax_fusion=llm_cfg.get('masked_softmax_fusion', True), + gradient_accumulation_fusion=llm_cfg.get('gradient_accumulation_fusion', False), + persist_layer_norm=llm_cfg.get('persist_layer_norm', False), + sequence_parallel=llm_cfg.get('sequence_parallel', False), + transformer_engine=llm_cfg.get('transformer_engine', False), + fp8=llm_cfg.get('fp8', False), + fp8_e4m3=llm_cfg.get('fp8_e4m3', False), + fp8_hybrid=llm_cfg.get('fp8_hybrid', False), + fp8_margin=llm_cfg.get('fp8_margin', 0), + fp8_interval=llm_cfg.get('fp8_interval', 1), + fp8_amax_history_len=llm_cfg.get('fp8_amax_history_len', 1), + fp8_amax_compute_algo=llm_cfg.get('fp8_amax_compute_algo', 'most_recent'), + reduce_amax=llm_cfg.get('reduce_amax', True), + use_emha=llm_cfg.get('use_emha', False), + ) + + if self.share_embeddings_and_output_weights: + self.initialize_word_embeddings( + init_method=init_method_normal(init_method_std), vocab_size=vocab_size, hidden_size=hidden_size + ) + + # TODO (yuya): check when PP is added + self.vision_encoder = FrozenCLIPVisionTransformer( + vision_cfg, pre_process=vision_cfg.pre_process, post_process=vision_cfg.post_process, + ) + if vision_cfg.from_pretrained is not None: + logging.info(f"Loading CLIP vision encoder weights from checkpoint {vision_cfg.from_pretrained}") + self.load_vision_encoder_weights(vision_cfg.from_pretrained) + self.perceiver = PerceiverResampler(dim=vision_cfg.hidden_size, num_latents=model_cfg.num_media_latents) + self.vision_connector = torch.nn.Linear(vision_cfg.hidden_size, llm_cfg.hidden_size, bias=False,) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.language_model.set_input_tensor(input_tensor) + + def encode_vision_x(self, vision_x: torch.Tensor): + """ + Compute media tokens from vision input by passing it through vision encoder and conditioning language model. + Args: + vision_x (torch.Tensor): Vision input + shape (B, T_img, F, C, H, W) + Images in the same chunk are collated along T_img, and frames are collated along F + Currently only F=1 is supported (single-frame videos) + + rearrange code based on https://github.com/dhansmair/flamingo-mini + """ + + assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" + b, T, F = vision_x.shape[:3] + assert F == 1, "Only single frame supported" + + vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") + with torch.no_grad(): + vision_x = self.vision_encoder(vision_x) + vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) + vision_x = self.perceiver(vision_x) # reshapes to (b, T, n, d) + vision_x = self.vision_connector(vision_x) + return vision_x + + def replace_media_embeddings(self, input_ids, inputs_embeds, media=None): + if media is None: + return inputs_embeds + + batch_size, sequence_length, hidden_size = inputs_embeds.shape + + # calculate media features without gradients + with torch.no_grad(): + media_features = self.encode_vision_x(media) + num_images_per_sample = media_features.size(1) + num_patches = media_features.size(2) + + # flatten patches + media_features = media_features.view(batch_size, -1, hidden_size) + + # create an indices matrix used in torch.scatter + padded_media_indices = torch.ones( + (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device + ) + padded_media_indices *= sequence_length + for idx, input_id in enumerate(input_ids): + media_end_positions = torch.where(input_id == self.media_end_id)[0] + # locate the first media token positions + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + + # use indices to create a span + padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( + num_patches, device=padded_media_indices.device + ).repeat(*padded_media_indices.shape, 1) + padded_media_indices = padded_media_indices.reshape(batch_size, -1) + padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) + + # concat placeholder + updated_input_embeds = torch.cat( + (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 + ) + updated_input_embeds = updated_input_embeds.type(media_features.dtype) + # scatter media_features + updated_input_embeds.scatter_(1, padded_media_indices, media_features) + + # chop off placeholder + updated_input_embeds = updated_input_embeds[:, :sequence_length] + + return updated_input_embeds + + def forward( + self, + input_ids, + position_ids, + attention_mask, + labels=None, + media=None, + token_type_ids=None, + layer_past=None, + get_key_value=False, + forward_method_parallel_output=None, + encoder_input=None, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + checkpoint_activations_all_layers=None, + ): + # input_ids: [b, s] + # position_ids: [b, s] + # attention_mask: [1, 1, s, s] + + # Multimodal uses different forward pass. Vision tower must be inserted. + enc_input_ids, enc_position_ids, enc_attn_mask = input_ids, position_ids, attention_mask + + # Embeddings. + if self.pre_process and encoder_input is None: + embedding_module = self.language_model.embedding + + words_embeddings = embedding_module.word_embeddings(enc_input_ids) + words_embeddings = self.replace_media_embeddings(enc_input_ids, words_embeddings, media=media) + + if self.position_embedding_type == 'learned_absolute': + assert position_ids is not None + position_embeddings = embedding_module.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + elif self.position_embedding_type == 'learned_parameters': + embeddings = words_embeddings + embedding_module.position_embeddings + else: + embeddings = words_embeddings + + if token_type_ids is not None: + assert embedding_module.tokentype_embeddings is not None + embeddings = embeddings + embedding_module.tokentype_embeddings(token_type_ids) + else: + assert embedding_module.tokentype_embeddings is None + + # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + if embedding_module.transpose_batch_sequence: + embeddings = embeddings.transpose(0, 1).contiguous() + + # If the input flag for fp32 residual connection is set, convert for float. + if embedding_module.fp32_residual_connection: + embeddings = embeddings.float() + + # Dropout. + if self.sequence_parallel: + embeddings = tensor_parallel.mappings.scatter_to_sequence_parallel_region(embeddings) + with tensor_parallel.random.get_cuda_rng_tracker().fork(): + embeddings = embedding_module.embedding_dropout(embeddings) + else: + embeddings = embedding_module.embedding_dropout(embeddings) + + encoder_input = embeddings + else: + pass + + # enc_attn_mask: [1, 1, s, s] + + if self.position_embedding_type == 'rope': + if inference_max_sequence_len is not None: + rotary_pos_emb = self.language_model.rotary_pos_emb(inference_max_sequence_len) + elif self.language_model.encoder.input_tensor is not None: + if self.sequence_parallel: + rotary_pos_emb = self.language_model.rotary_pos_emb( + self.language_model.encoder.input_tensor.size(0) + * parallel_state.get_tensor_model_parallel_world_size() + ) + else: + rotary_pos_emb = self.language_model.rotary_pos_emb(self.encoder.input_tensor.size(0)) + else: + if self.sequence_parallel: + rotary_pos_emb = self.language_model.rotary_pos_emb( + encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size() + ) + else: + rotary_pos_emb = self.language_model.rotary_pos_emb(encoder_input.size(0)) + else: + rotary_pos_emb = None + + # encoder but decoder for GPT + encoder_output = self.language_model.encoder( + encoder_input, + enc_attn_mask, + layer_past=layer_past, + get_key_value=get_key_value, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + checkpoint_activations_all_layers=checkpoint_activations_all_layers, + rotary_pos_emb=(rotary_pos_emb, None, None) + if rotary_pos_emb is not None + else None, # This assumes that this being used as a GPT/BERT model only (no cross-attention) + ) + + lm_output = encoder_output + + if self.post_process: + return post_language_model_processing( + lm_output, + labels, + self.language_model.output_layer.weight + if not self.share_embeddings_and_output_weights + else self.word_embeddings_weight(), + get_key_value, + self.parallel_output, + forward_method_parallel_output, + self.fp16_lm_cross_entropy, + return_logits=False, + sequence_parallel=self.sequence_parallel, + gradient_accumulation_fusion=self.gradient_accumulation_fusion, + ) + else: + return lm_output + + def load_vision_encoder_weights(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.lstrip("model.vision_encoder.") + new_state_dict[new_k] = v + + missing, unexpected = self.vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def state_dict_for_save_checkpoint(self, destination=None, prefix='', keep_vars=False): + + state_dict_ = {} + state_dict_[self._language_model_key] = self.language_model.state_dict_for_save_checkpoint( + destination, prefix, keep_vars + ) + # Save word_embeddings. + if self.post_process and not self.pre_process: + state_dict_[self._word_embeddings_for_head_key] = self.word_embeddings.state_dict( + destination, prefix, keep_vars + ) + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + # Load word_embeddings. + if self.post_process and not self.pre_process: + self.word_embeddings.load_state_dict(state_dict[self._word_embeddings_for_head_key], strict=strict) + if self._language_model_key in state_dict: + state_dict = state_dict[self._language_model_key] + self.language_model.load_state_dict(state_dict, strict=strict) + + +class MegatronKosmosModel(MegatronGPTModel): + """ + Megatron Kosmos pretraining + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + super().__init__(cfg, trainer) + + self.image_size = (self.cfg.vision.img_h, self.cfg.vision.img_w) + self.megatron_amp_O2 = getattr(self, 'megatron_amp_O2', False) + self.enabled_data_types = self.cfg.get("enabled_data_types", []) + logging.info(f"Data types enabled in Kosmos training: {self.enabled_data_types}") + self.per_type_micro_batch_size = self.cfg.per_type_micro_batch_size + self.per_type_global_batch_size = {} + self.per_type_loss_weights = {} + for data_type in self.enabled_data_types: + self.per_type_global_batch_size[data_type] = ( + self.per_type_micro_batch_size[data_type] * self.cfg.global_batch_size // self.cfg.micro_batch_size + ) + self.per_type_loss_weights[data_type] = self.cfg.per_type_loss_weights[data_type] + + def get_gpt_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def set_inference_config(self, inference_config): + self._inference_config = inference_config + + def get_inference_config(self): + return self._inference_config + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + media_start_id = self.tokenizer.token_to_id(self.cfg.media_start_token) + media_end_id = self.tokenizer.token_to_id(self.cfg.media_end_token) + + model = KosmosModel( + model_cfg=self.cfg, + vocab_size=self.padded_vocab_size, + media_start_id=media_start_id, + media_end_id=media_end_id, + pre_process=pre_process, + post_process=post_process, + ) + + # Freeze vit + model.vision_encoder.freeze() + + logging.info( + f"Kosmos model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" + ) + + return model + + def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): + output_tensor = self.model(tokens, text_position_ids, attention_mask, labels=labels, media=media) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + + tensor_shape = [self.cfg.llm.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.llm.hidden_size] + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: remove sync related stuff from config, add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + tensor_shape=tensor_shape, + dtype=self.autocast_dtype, + grad_scaler=self.trainer.precision_plugin.scaler.scale + if self.cfg.precision in [16, '16', '16-mixed'] + else None, + sequence_parallel=self.cfg.get('sequence_parallel', False), + enable_autocast=self.enable_autocast, + no_sync_func=no_sync_func, + grad_sync_func=grad_sync_func, + param_sync_func=param_sync_func, + ) + + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + # average loss across micro batches + loss_tensors_list = [loss_reduced['avg'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + for data_type in self.enabled_data_types: + loss_tensors_list = [loss_reduced[data_type] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[data_type] = loss_tensor.mean() + else: + loss_mean = torch.tensor(0.0).cuda() + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + We pass the dataloader iterator function to the micro-batch scheduler. + The input batch to each micro-batch is fetched using the dataloader function + in the micro-batch fwd function. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + if self.with_distributed_adam: + # hack to enable overlapping param sync and forward compute + # note: the distributed optimizer monkey-patches each + # parameter's __getattribute__ function so that it can + # launch parameter all-gathers the first time the + # parameter is accessed after the optimizer step. However, + # PyTorch directly passes embedding parameters into a C++, + # bypassing this process. A quick-and-dirty hack is to + # manually interact with the parameter. + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + module = module.language_model + if hasattr(module, 'embedding'): + for param in module.embedding.parameters(): + param.data_ptr() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + elif self.megatron_amp_O2: + # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # main grads are stored in the MainParamsOptimizer wrapper + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.get('pipeline_model_parallel_size', 1) > 1 and self.cfg.get( + 'share_embeddings_and_output_weights', True + ): + # when using pipeline parallelism the first and last stage must keep embeddings in sync + self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log_dict({'train/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log( + 'global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1, + ) + + consumed_samples = self.compute_consumed_samples(self.trainer.global_step - self.init_global_step) + # TODO: make sure compute_consumed_samples works for pipeline parallelism + self.log( + 'consumed_samples', consumed_samples, prog_bar=True, rank_zero_only=True, batch_size=1, + ) + + if self.cfg.get('rampup_batch_size', None): + micro_batch_size = self.cfg.get('micro_batch_size', 1) + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + current_global_batch_size = get_num_microbatches() * micro_batch_size * total_gpus_number + self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1) + + num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR + num_microbatch_calculator.update( + consumed_samples=consumed_samples, consistency_check=True, + ) + + return loss_mean + + def get_forward_output_and_loss_func(self, validation_step=False): + def loss_func(output_tensors, loss_masks): + loss_list = [] + loss_for_ub = 0 + for data_type in self.enabled_data_types: + output_tensor = output_tensors[data_type] + loss_mask = loss_masks[data_type] + # Loss for a micro-batch (ub) + loss_list.append(self.loss_func(loss_mask, output_tensor)) + loss_for_ub += loss_list[-1] * self.per_type_loss_weights[data_type] + loss_for_ub /= sum(self.per_type_loss_weights.values()) + + if validation_step and not self.cfg.data.get('validation_drop_last', True): + raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Kosmos!") + # num_valid_tokens_in_ub = loss_mask.sum() + # if loss_for_ub.isnan(): + # assert loss_mask.count_nonzero() == 0, 'Got NaN loss with non-empty input' + # loss_sum_for_ub = torch.zeros_like(num_valid_tokens_in_ub) + # else: + # loss_sum_for_ub = num_valid_tokens_in_ub * loss_for_ub + # + # loss_sum_and_ub_size_all_gpu = torch.cat( + # [ + # loss_sum_for_ub.clone().detach().view(1), + # torch.tensor([num_valid_tokens_in_ub]).cuda().clone().detach(), + # ] + # ) + # # Could potentially reduce num_valid_samples_in_microbatch and use that to aggregate instead of len(self._validation_ds) + # torch.distributed.all_reduce( + # loss_sum_and_ub_size_all_gpu, group=parallel_state.get_data_parallel_group() + # ) + # return loss_for_ub, {'loss_sum_and_ub_size': loss_sum_and_ub_size_all_gpu} + else: + reduced_loss = average_losses_across_data_parallel_group([loss_for_ub] + loss_list) + loss_dict = {data_type: reduced_loss[i + 1] for i, data_type in enumerate(self.enabled_data_types)} + loss_dict['avg'] = reduced_loss[0] + return loss_for_ub, loss_dict + + def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): + output_tensors = {} + loss_masks = {} + combined_batch = next(dataloader_iter) + for data_type in self.enabled_data_types: + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + batch = combined_batch[data_type] + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None + else: + batch[k] = batch[k].cuda(non_blocking=True) + else: + if parallel_state.is_pipeline_first_stage(): + batch = combined_batch[data_type] + # First pipeline stage needs tokens, position_ids, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'media'] + else None + ) + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'attention_mask', 'media'] + else None + ) + elif parallel_state.is_pipeline_last_stage(): + batch = combined_batch[data_type] + # Last pipeline stage needs the labels, loss_mask, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['labels', 'loss_mask', 'attention_mask'] + else None + ) + else: + # Intermediate pipeline stage doesn't need any inputs + batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} + + output_tensor = model( + batch['tokens'], + batch['position_ids'], + batch['attention_mask'], + batch['labels'], + batch.get('media'), + checkpoint_activations_all_layers=checkpoint_activations_all_layers, + ) + output_tensors[data_type] = output_tensor + loss_masks[data_type] = batch['loss_mask'] + + return output_tensors, partial(loss_func, loss_masks=loss_masks) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + extra_arg = {} + if len(batch) == 3: + batch = [x.cuda() for x in batch] + tokens, attention_mask, position_ids = batch + attention_mask = attention_mask[0:1] + else: + ( + tokens, + attention_mask, + position_ids, + set_inference_key_value_memory, + inference_max_sequence_len, + ) = batch + tokens = tokens.cuda() + attention_mask = attention_mask.cuda() + position_ids = position_ids.cuda() + attention_mask = attention_mask[0:1] + extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() + extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() + output_tensor = model(tokens, position_ids, attention_mask, **extra_arg) + + def id_func(output_tensor): + return output_tensor, {'logits': output_tensor} + + return output_tensor, id_func + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. + """ + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + loss_dict['avg'] = loss_mean + return loss_dict + + def validation_epoch_end(self, outputs): + loss_dict = {} + if parallel_state.is_pipeline_last_stage(): + # only the last pipeline parallel stages return loss with their batch size + if self.cfg.data.get('validation_drop_last', True): + averaged_loss = torch.stack([loss['avg'] for loss in outputs]).mean() + for data_type in self.enabled_data_types: + loss_dict[data_type] = torch.stack([loss[data_type] for loss in outputs]).mean() + else: + # Compute the avg loss by total_loss across all samples / total number of samples + # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) + # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] + # averaged_loss = avg_loss.type(torch.float32).cuda() + raise NotImplementedError("`validation_drop_last=False` is not supported!") + else: + averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() + for data_type in self.enabled_data_types: + loss_dict[data_type] = torch.tensor(0.0, dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_loss, get_last_rank()) + for data_type in self.enabled_data_types: + torch.distributed.broadcast(loss_dict[data_type], get_last_rank()) + + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log_dict({'val/' + k: v for k, v in loss_dict.items()}, rank_zero_only=True, batch_size=1) + + return averaged_loss + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def loss_func(self, loss_mask, output_tensor): + losses = output_tensor.float() + loss_mask = loss_mask.view(-1).float() + # TODO: add nemo version here + loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( + self.model + ) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + rampup_batch_size = self.cfg.get('rampup_batch_size', None) + if rampup_batch_size: + start_batch_size = rampup_batch_size[0] + batch_size_increment = rampup_batch_size[1] + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + + assert start_batch_size % (total_gpus_number) == 0, ( + 'expected' + ' start batch size ({}) to be divisible by total number of GPUs' + ' ({})'.format(start_batch_size, total_gpus_number) + ) + + micro_batch_size = self.cfg.get('micro_batch_size', 1) + tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) + pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) + total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) + + assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( + 'expected' + ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' + ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) + ) + + if stage == 'predict': + return + else: + # TODO: consider adding a ModelPT guard to check if model is being restored. + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + if self.cfg.get('share_embeddings_and_output_weights', True): + self.model.sync_initial_word_embeddings() + + if self.cfg.get('transformer_engine', False): + self.setup_transformer_engine_tp_groups() + + def build_train_valid_test_datasets(self): + logging.info('Building Kosmos datasets.') + + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + global_batch_size = self.cfg.global_batch_size + max_train_steps = self.trainer.max_steps + eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches + test_iters = self.trainer.limit_test_batches + + train_valid_test_num_samples = [ + max_train_steps * global_batch_size, + eval_iters * global_batch_size, + test_iters * global_batch_size, + ] + + if self.trainer.limit_val_batches <= 1.0 and isinstance(self.trainer.limit_val_batches, float): + train_valid_test_num_samples[ + 1 + ] = 1 # This is to make sure we only have one epoch on every validation iteration + + self._train_ds, self._validation_ds, self._test_ds = {}, {}, {} + + for data_type in self.enabled_data_types: + if data_type == "text": + ( + self._train_ds[data_type], + self._validation_ds[data_type], + self._test_ds[data_type], + ) = build_text_train_valid_test_datasets( + cfg=self.cfg, + trainer=self.trainer, + data_prefix=self.cfg.data.data_prefix, + data_impl=self.cfg.data.data_impl, + splits_string=self.cfg.data.splits_string, + train_valid_test_num_samples=train_valid_test_num_samples, + seq_length=self.cfg.data.seq_length, + seed=self.cfg.seed, + skip_warmup=self.cfg.data.get('skip_warmup', True), + tokenizer=self.tokenizer, + ) + + if data_type in ["image_caption", "image_interleaved"]: + self._train_ds[data_type], self._validation_ds[data_type] = build_media_train_valid_datasets( + model_cfg=self.cfg, + consumed_samples=self.compute_consumed_samples(0) + * self.per_type_micro_batch_size[data_type] + // self.cfg.micro_batch_size, + tokenizer=self.tokenizer, + data_type=data_type, + ) + self._test_ds[data_type] = None + + data = [] + for ds_name, ds in [("Train", self._train_ds), ("Validation", self._validation_ds), ("Test", self._test_ds)]: + for key in self.enabled_data_types: + # Append the name of the dataset, the key, and the length of the data under that key to the list + if ds_name == "Train": + consumed_samples = ( + self.compute_consumed_samples(0) + * self.per_type_micro_batch_size[key] + // self.cfg.micro_batch_size + ) + else: + consumed_samples = 0 + data.append([ds_name, key, len(ds[key]) if ds[key] is not None else 0, consumed_samples]) + + df = pd.DataFrame(data, columns=["Dataset", "Type", "Length", "Consumed"]) + df['Length'] = df['Length'].apply(lambda x: "{:,}".format(x)) + df['Consumed'] = df['Consumed'].apply(lambda x: "{:,}".format(x)) + + logging.info(f"\nFinished Building Kosmos Dataset:\n{df}") + return self._train_ds, self._validation_ds, self._test_ds + + def build_pretraining_text_data_loader( + self, + dataset, + consumed_samples, + micro_batch_size, + global_batch_size, + drop_last=True, + pad_samples_to_global_batch_size=False, + ): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + global_batch_size=global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=self.cfg.get('drop_last', True), + global_batch_size=global_batch_size, + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + return torch.utils.data.DataLoader( + dataset, + batch_sampler=batch_sampler, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + Returns: + List of available pre-trained models. + """ + return [] + + def setup_training_data(self, cfg): + consumed_samples = self.compute_consumed_samples(0) + + train_dls = {} + for data_type in self.enabled_data_types: + if hasattr(self, '_train_ds') and self._train_ds.get(data_type) is not None: + if data_type == "text": + train_dls[data_type] = self.build_pretraining_text_data_loader( + self._train_ds[data_type], + consumed_samples=consumed_samples + * self.per_type_micro_batch_size[data_type] + // self.cfg.micro_batch_size, + micro_batch_size=self.per_type_micro_batch_size[data_type], + global_batch_size=self.per_type_global_batch_size[data_type], + ) + elif data_type in ["image_caption", "image_interleaved"]: + train_dls[data_type] = torch.utils.data.DataLoader( + self._train_ds[data_type], + batch_size=self.per_type_micro_batch_size[data_type], + num_workers=cfg.get(data_type).num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + else: + raise ValueError(f"Unrecognized dataset type {data_type}") + + self._train_dl = MergedKosmosDataLoader(train_dls) + + def setup_validation_data(self, cfg): + consumed_samples = 0 + + validation_dls = {} + for data_type in self.enabled_data_types: + if hasattr(self, '_validation_ds') and self._validation_ds.get(data_type) is not None: + if data_type == "text": + validation_dls[data_type] = self.build_pretraining_text_data_loader( + self._validation_ds[data_type], + consumed_samples=consumed_samples, + micro_batch_size=self.per_type_micro_batch_size[data_type], + global_batch_size=self.per_type_global_batch_size[data_type], + ) + elif data_type in ["image_caption", "image_interleaved"]: + validation_dls[data_type] = torch.utils.data.DataLoader( + self._validation_ds[data_type], + batch_size=self.per_type_micro_batch_size[data_type], + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + else: + raise ValueError(f"Unrecognized dataset type {data_type}") + + self._validation_dl = MergedKosmosDataLoader(validation_dls) + + def setup_test_data(self, cfg): + pass diff --git a/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py b/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py new file mode 100644 index 000000000000..14bdedc5324f --- /dev/null +++ b/nemo/collections/multimodal/models/kosmos/perceiver_resampler.py @@ -0,0 +1,131 @@ +""" +Taken from https://github.com/lucidrains/flamingo-pytorch +""" + +""" +# Usage: +perceive = PerceiverResampler( + dim = 1024, + depth = 2, + dim_head = 64, + heads = 8, + num_latents = 64, # the number of latents to shrink your media sequence to, perceiver style + num_time_embeds = 4 # say you have 4 images maximum in your dialogue +) + +medias = torch.randn(1, 2, 256, 1024) # (batch, time, sequence length, dimension) +perceived = perceive(medias) # (1, 2, 64, 1024) - (batch, time, num latents, dimension) +""" + +import torch +from einops import rearrange, repeat +from einops_exts import rearrange_many +from torch import einsum, nn + + +def exists(val): + return val is not None + + +def FeedForward(dim, mult=4): + inner_dim = int(dim * mult) + return nn.Sequential( + nn.LayerNorm(dim), nn.Linear(dim, inner_dim, bias=False), nn.GELU(), nn.Linear(inner_dim, dim, bias=False), + ) + + +class PerceiverAttention(nn.Module): + def __init__(self, *, dim, dim_head=64, heads=8): + super().__init__() + self.scale = dim_head ** -0.5 + self.heads = heads + inner_dim = dim_head * heads + + self.norm_media = nn.LayerNorm(dim) + self.norm_latents = nn.LayerNorm(dim) + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) + self.to_out = nn.Linear(inner_dim, dim, bias=False) + + def forward(self, x, latents): + """ + Args: + x (torch.Tensor): image features + shape (b, T, n1, D) + latent (torch.Tensor): latent features + shape (b, T, n2, D) + """ + x = self.norm_media(x) + latents = self.norm_latents(latents) + + h = self.heads + + q = self.to_q(latents) + kv_input = torch.cat((x, latents), dim=-2) + k, v = self.to_kv(kv_input).chunk(2, dim=-1) + q, k, v = rearrange_many((q, k, v), "b t n (h d) -> b h t n d", h=h) + q = q * self.scale + + # attention + sim = einsum("... i d, ... j d -> ... i j", q, k) + sim = sim - sim.amax(dim=-1, keepdim=True).detach() + attn = sim.softmax(dim=-1) + + out = einsum("... i j, ... j d -> ... i d", attn, v) + out = rearrange(out, "b h t n d -> b t n (h d)", h=h) + return self.to_out(out) + + +class PerceiverResampler(nn.Module): + def __init__( + self, + *, + dim, + depth=6, + dim_head=64, + heads=8, + num_latents=64, + max_num_media=None, + max_num_frames=None, + ff_mult=4, + ): + super().__init__() + self.latents = nn.Parameter(torch.randn(num_latents, dim)) + self.frame_embs = nn.Parameter(torch.randn(max_num_frames, dim)) if exists(max_num_frames) else None + self.media_time_embs = nn.Parameter(torch.randn(max_num_media, 1, dim)) if exists(max_num_media) else None + + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append( + nn.ModuleList( + [PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), FeedForward(dim=dim, mult=ff_mult),] + ) + ) + + self.norm = nn.LayerNorm(dim) + + def forward(self, x): + """ + Args: + x (torch.Tensor): image features + shape (b, T, F, v, D) + Returns: + shape (b, T, n, D) where n is self.num_latents + """ + b, T, F, v = x.shape[:4] + + # frame and media time embeddings + if exists(self.frame_embs): + frame_embs = repeat(self.frame_embs[:F], "F d -> b T F v d", b=b, T=T, v=v) + x = x + frame_embs + x = rearrange(x, "b T F v d -> b T (F v) d") # flatten the frame and spatial dimensions + if exists(self.media_time_embs): + x = x + self.media_time_embs[:T] + + # blocks + latents = repeat(self.latents, "n d -> b T n d", b=b, T=T) + for attn, ff in self.layers: + latents = attn(x, latents) + latents + latents = ff(latents) + latents + return self.norm(latents) diff --git a/nemo/collections/multimodal/models/nerf/base.py b/nemo/collections/multimodal/models/nerf/base.py new file mode 100644 index 000000000000..d1908080e90c --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/base.py @@ -0,0 +1,24 @@ +import pytorch_lightning as pl + +from nemo.core.classes.common import Serialization +from nemo.core.classes.modelPT import ModelPT + + +class NerfModelBase(ModelPT, Serialization): + def __init__(self, cfg): + super().__init__(cfg=cfg) + self.save_hyperparameters() + self._cfg = cfg + + @staticmethod + def is_module_updatable(module): + return hasattr(module, 'update_step') and callable(module.update_step) + + def list_available_models(self): + pass + + def setup_training_data(self): + pass + + def setup_validation_data(self): + pass diff --git a/nemo/collections/multimodal/models/nerf/dreamfusion.py b/nemo/collections/multimodal/models/nerf/dreamfusion.py new file mode 100644 index 000000000000..6b7784b002ff --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/dreamfusion.py @@ -0,0 +1,313 @@ +import logging +import os +import random + +import cv2 +import imageio +import numpy as np +import torch + +from nemo.collections.multimodal.models.nerf.txt2nerf_base import Txt2NerfBase +from nemo.collections.multimodal.modules.nerf.loss.laplacian_smooth_loss import LaplacianSmoothLoss +from nemo.collections.multimodal.modules.nerf.loss.normal_consistency_loss import NormalConsistencyLoss +from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum +from nemo.core import optim + + +# TODO(ahmadki): split dmtet from dreamfusion +class DreamFusion(Txt2NerfBase): + def __init__(self, cfg): + super(DreamFusion, self).__init__(cfg) + + self.guidance_scale = cfg.guidance_scale + + self.iters = cfg.iters + self.latent_iter_ratio = cfg.latent_iter_ratio + self.albedo_iter_ratio = cfg.albedo_iter_ratio + self.min_ambient_ratio = cfg.min_ambient_ratio + self.textureless_ratio = cfg.textureless_ratio + + # Lambdas + self.lambda_sds = cfg.loss.lambda_sds + self.lambda_opacity = cfg.loss.lambda_opacity + self.lambda_entropy = cfg.loss.lambda_entropy + self.lambda_orientation = cfg.loss.lambda_orientation + self.lambda_2d_normal_smooth = cfg.loss.lambda_2d_normal_smooth + self.lambda_3d_normal_smooth = cfg.loss.lambda_3d_normal_smooth + self.lambda_mesh_normal = cfg.loss.lambda_mesh_normal + self.lambda_mesh_laplacian = cfg.loss.lambda_mesh_laplacian + + if self.lambda_mesh_normal > 0: + self.normal_consistency_loss_fn = NormalConsistencyLoss() + if self.lambda_mesh_laplacian > 0: + self.laplacian_smooth_loss_fn = LaplacianSmoothLoss() + + # Video + self.test_images = [] + self.test_depths = [] + + def training_step(self, batch, batch_idx): + # experiment iterations ratio + # i.e. what proportion of this experiment have we completed (in terms of iterations) so far? + exp_iter_ratio = self.global_step / self.iters + + # TODO(ahmadki): move to database + if exp_iter_ratio < self.latent_iter_ratio: + ambient_ratio = 1.0 + shading_type = ShadingEnum.NORMAL + as_latent = True + else: + if exp_iter_ratio <= self.albedo_iter_ratio: + ambient_ratio = 1.0 + shading_type = None + else: + # random shading + ambient_ratio = self.min_ambient_ratio + (1.0 - self.min_ambient_ratio) * random.random() + rand = random.random() + if rand >= (1.0 - self.textureless_ratio): + shading_type = ShadingEnum.TEXTURELESS + else: + shading_type = ShadingEnum.LAMBERTIAN + + as_latent = False + + return_normal_image = bool(self.lambda_2d_normal_smooth) + return_normal_perturb = bool(self.lambda_3d_normal_smooth) + return_vertices = bool(self.lambda_mesh_laplacian) + return_faces = bool(self.lambda_mesh_normal) or bool(self.lambda_mesh_laplacian) + return_faces_normals = bool(self.lambda_mesh_normal) + outputs = self( + rays_o=batch['rays_o'], # [B, H, W, 3] + rays_d=batch['rays_d'], # [B, H, W, 3] + mvp=batch['mvp'], # [B, 4, 4] + perturb=True, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + binarize=False, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + ) + + if as_latent: + pred_rgb = ( + torch.cat([outputs['image'], outputs['opacity']], dim=-1).permute(0, 3, 1, 2).contiguous() + ) # [B, 4, H, W] + else: + pred_rgb = outputs['image'].permute(0, 3, 1, 2).contiguous() # [B, 3, H, W] + + # TODO(ahmadki): move into guidance + azimuth = batch['azimuth'] + text_z = [self.text_z['uncond']] * azimuth.shape[0] + for b in range(azimuth.shape[0]): + if azimuth[b] >= -90 and azimuth[b] < 90: + if azimuth[b] >= 0: + r = 1 - azimuth[b] / 90 + else: + r = 1 + azimuth[b] / 90 + start_z = self.text_z['front'] + end_z = self.text_z['side'] + else: + if azimuth[b] >= 0: + r = 1 - (azimuth[b] - 90) / 90 + else: + r = 1 + (azimuth[b] + 90) / 90 + start_z = self.text_z['side'] + end_z = self.text_z['back'] + pos_z = r * start_z + (1 - r) * end_z + text_z.append(pos_z) + text_z = torch.cat(text_z, dim=0) + + loss_dict = {} + + # SDS loss + guidance_loss = self.guidance.train_step( + text_z, pred_rgb, as_latent=as_latent, guidance_scale=self.guidance_scale + ) + loss_dict['lambda_sds'] = guidance_loss * self.lambda_sds + + # opacity loss + if self.lambda_opacity > 0 and 'opacity' in outputs: + loss_opacity = (outputs['opacity'] ** 2).mean() + loss_dict['loss_opacity'] = self.lambda_opacity * loss_opacity + + # entropy loss + if self.lambda_entropy > 0 and 'weights' in outputs: + alphas = outputs['weights'].clamp(1e-5, 1 - 1e-5) + loss_entropy = (-alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)).mean() + lambda_entropy = self.lambda_entropy * min(1, 2 * self.global_step / self.iters) + loss_dict['loss_entropy'] = lambda_entropy * loss_entropy + + if self.lambda_2d_normal_smooth > 0 and 'normal_image' in outputs: + pred_normal = outputs['normal_image'] + loss_smooth = (pred_normal[:, 1:, :, :] - pred_normal[:, :-1, :, :]).square().mean() + ( + pred_normal[:, :, 1:, :] - pred_normal[:, :, :-1, :] + ).square().mean() + loss_dict['loss_smooth'] = self.lambda_2d_normal_smooth * loss_smooth + + # orientation loss + if self.lambda_orientation > 0 and all(key in outputs for key in ['weights', 'normals', 'dirs']): + loss_orientation = ( + outputs['weights'].detach() * (outputs['normals'] * outputs['dirs']).sum(-1).clamp(min=0) ** 2 + ) + loss_orientation = loss_orientation.mean() + loss_dict['loss_orientation'] = self.lambda_orientation * loss_orientation + + if self.lambda_3d_normal_smooth > 0 and all(key in outputs for key in ['normals', 'normal_perturb']): + loss_normal_perturb = (outputs['normal_perturb'] - outputs['normals']).abs().mean() + loss_dict['loss_normal_smooth'] = self.lambda_3d_normal_smooth * loss_normal_perturb + + if self.lambda_mesh_normal > 0 and all(key in outputs for key in ['face_normals', 'faces']): + normal_consistency_loss = self.normal_consistency_loss_fn( + face_normals=outputs['face_normals'], t_pos_idx=outputs['faces'] + ) + loss_dict['normal_consistency_loss'] = self.lambda_mesh_normal * normal_consistency_loss + + if self.lambda_mesh_laplacian > 0 and all(key in outputs for key in ['verts', 'faces']): + laplacian_loss = self.laplacian_smooth_loss_fn(verts=outputs['verts'], faces=outputs['faces']) + loss_dict['laplacian_loss'] = self.lambda_mesh_laplacian * laplacian_loss + + loss = sum(loss_dict.values()) + + self.log_dict(loss_dict, prog_bar=False, rank_zero_only=True) + self.log('loss', loss, prog_bar=True, rank_zero_only=True) + + # TODO(ahmadki): LearningRateMonitor + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True) + + self.log('global_step', self.global_step + 1, prog_bar=True, rank_zero_only=True) + + return loss + + def validation_step(self, batch, batch_idx): + # save image + images, depths = self._shared_predict(batch) + + save_path = os.path.join(self.trainer.log_dir, 'validation') + os.makedirs(save_path, exist_ok=True) + for i, (image, depth) in enumerate(zip(images, depths)): + # Save image + cv2.imwrite( + os.path.join( + save_path, + f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_rgb.png', + ), + cv2.cvtColor(image, cv2.COLOR_RGB2BGR), + ) + # Save depth + cv2.imwrite( + os.path.join( + save_path, + f'{self.current_epoch:04d}_{self.global_step:04d}_{self.global_rank:04d}_{batch_idx:04d}_{i:04d}_depth.png', + ), + depth, + ) + + def test_step(self, batch, batch_idx): + # save image + images, depths = self._shared_predict(batch) + self.test_images.append(images) + self.test_depths.append(depths) + + def on_test_epoch_end(self): + save_path = os.path.join(self.trainer.log_dir, 'test') + os.makedirs(save_path, exist_ok=True) + + images = np.concatenate(self.test_images, axis=0) + imageio.mimwrite( + os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_rgb.mp4')), + images, + fps=25, + quality=8, + macro_block_size=1, + ) + + depths = np.concatenate(self.test_depths, axis=0) + imageio.mimwrite( + os.path.join(os.path.join(save_path, f'{self.current_epoch:04d}_{self.global_step:04d}_depth.mp4')), + depths, + fps=25, + quality=8, + macro_block_size=1, + ) + + self.test_images.clear() + self.test_depths.clear() + + def predict_step(self, batch, batch_idx): + return self._shared_predict(self, batch) + + def forward( + self, + rays_o, + rays_d, + mvp, + perturb, + ambient_ratio, + shading_type, + binarize, + return_normal_image, + return_normal_perturb, + return_vertices, + return_faces, + return_faces_normals, + ): + outputs = self.renderer( + rays_o=rays_o, + rays_d=rays_d, + mvp=mvp, + perturb=perturb, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + binarize=binarize, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + ) + return outputs + + def _shared_predict(self, data): + outputs = self( + rays_o=data['rays_o'], # [B, H, W, 3] + rays_d=data['rays_d'], # [B, H, W, 3] + mvp=data['mvp'], + perturb=False, + ambient_ratio=data['ambient_ratio'] if 'ambient_ratio' in data else 1.0, # TODO(ahmadki): move to dataset + shading_type=data['shading_type'] if 'shading_type' in data else None, # TODO(ahmadki): move to dataset + binarize=False, + return_normal_image=False, + return_normal_perturb=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + ) + + images_np = outputs['image'].detach().cpu().numpy() + images_np = (images_np * 255).astype(np.uint8) + + depths_np = outputs['depth'].detach().cpu().numpy() + depths_np = (depths_np - depths_np.min()) / (np.ptp(depths_np) + 1e-6) + depths_np = (depths_np * 255).astype(np.uint8) + + return images_np, depths_np + + # TODO(ahmadki): rework + def setup_optimization(self): + cfg = self._cfg.optim + optimizer_args = dict(cfg) + optimizer_args.pop('name', None) + + optimizer = optim.get_optimizer(cfg.name) + + optimizer = optimizer(params=self.parameters(), **optimizer_args) + + self._optimizer = optimizer + + def configure_optimizers(self): + self.setup_optimization() + return self._optimizer diff --git a/nemo/collections/multimodal/models/nerf/txt2nerf_base.py b/nemo/collections/multimodal/models/nerf/txt2nerf_base.py new file mode 100644 index 000000000000..19a393aa4774 --- /dev/null +++ b/nemo/collections/multimodal/models/nerf/txt2nerf_base.py @@ -0,0 +1,81 @@ +import logging + +from nemo.collections.multimodal.models.nerf.base import NerfModelBase + + +class Txt2NerfBase(NerfModelBase): + def __init__(self, cfg): + super().__init__(cfg) + self.prompt = cfg.prompt + self.negative_prompt = cfg.negative_prompt + self.front_prompt = cfg.front_prompt + self.side_prompt = cfg.side_prompt + self.back_prompt = cfg.back_prompt + + self.nerf_cfg = cfg.nerf + self.renderer_cfg = cfg.renderer + self.guidance_cfg = cfg.guidance + + nerf = self.from_config_dict(cfg.nerf) + material = self.from_config_dict(cfg.material) + background = self.from_config_dict(cfg.background) + self.renderer = self.build_renderer(cfg.renderer, nerf, material, background) + self.guidance = None + + def build_renderer(self, cfg, nerf, material, background): + renderer = self.from_config_dict(cfg) + renderer.nerf = nerf + renderer.material = material + renderer.background = background + return renderer + + def build_guidance(self, cfg): + self.guidance = self.from_config_dict(cfg) + self.guidance.eval() + for p in self.guidance.parameters(): + p.requires_grad = False + + def prepare_embeddings(self): + # TODO(ahmadki): add top view ? + self.text_z = { + "default": self.guidance.get_text_embeds([self.prompt]), + "uncond": self.guidance.get_text_embeds([self.negative_prompt]), + "front": self.guidance.get_text_embeds([f"{self.prompt}{self.front_prompt}"]), + "side": self.guidance.get_text_embeds([f"{self.prompt}{self.side_prompt}"]), + "back": self.guidance.get_text_embeds([f"{self.prompt}{self.back_prompt}"]), + } + + def on_fit_start(self) -> None: + self.build_guidance(self.guidance_cfg) + self.prepare_embeddings() + + def on_train_batch_start(self, batch, batch_idx, unused=0): + if self.is_module_updatable(self.guidance): + self.guidance.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.nerf): + self.renderer.nerf.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.material): + self.renderer.material.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer.background): + self.renderer.background.update_step(epoch=self.current_epoch, global_step=self.global_step) + + if self.is_module_updatable(self.renderer): + self.renderer.update_step(epoch=self.current_epoch, global_step=self.global_step) + + dataset = self.trainer.train_dataloader.dataset + if self.is_module_updatable(dataset): + dataset.update_step(epoch=self.current_epoch, global_step=self.global_step) + + def mesh(self, resolution, batch_size=128, density_thresh=None): + return self.nerf.mesh(resolution=resolution, batch_size=batch_size, density_thresh=density_thresh) + + def on_save_checkpoint(self, checkpoint): + # remove guidance from checkpoint. + # We can still laod the model without guidance checkpoints because the module is not initalized + # at __init__ time. + keys_to_remove = [key for key in checkpoint['state_dict'].keys() if key.startswith('guidance.')] + for key in keys_to_remove: + del checkpoint['state_dict'][key] diff --git a/nemo/collections/multimodal/models/neva/__init__.py b/nemo/collections/multimodal/models/neva/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/neva/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py new file mode 100644 index 000000000000..d6b8e2336375 --- /dev/null +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -0,0 +1,1076 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os +import random +import re +import tempfile +from functools import partial +from itertools import chain +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd +import torch +from einops import rearrange, repeat +from omegaconf.dictconfig import DictConfig +from omegaconf.omegaconf import OmegaConf, open_dict +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer +from transformers import CLIPVisionModel + +from nemo.collections.multimodal.data.neva.neva_dataset import ( + DEFAULT_BOS_TOKEN, + DEFAULT_EOS_TOKEN, + DEFAULT_IM_END_TOKEN, + DEFAULT_IM_START_TOKEN, + DataCollatorForSupervisedDataset, + make_supervised_data_module, +) +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPVisionTransformer, MegatronCLIPModel +from nemo.collections.multimodal.models.kosmos.perceiver_resampler import PerceiverResampler +from nemo.collections.multimodal.parts.utils import extend_instance +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import ( + MegatronPretrainingRandomSampler, + MegatronPretrainingSampler, +) +from nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset import ( + build_train_valid_test_datasets as build_text_train_valid_test_datasets, +) +from nemo.collections.nlp.models.language_modeling.megatron.gpt_model import GPTModel, post_language_model_processing +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel +from nemo.collections.nlp.models.nlp_model import NLPModel +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, MMLinearAdapterConfig +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.language_model import Embedding, get_language_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_params_for_weight_decay_optimization, + init_method_normal, + parallel_lm_logits, + scaled_init_method_normal, +) +from nemo.collections.nlp.modules.common.text_generation_utils import ( + generate, + get_computeprob_response, + get_default_length_params, + get_default_sampling_params, + megatron_neva_generate, +) +from nemo.collections.nlp.modules.common.transformer.text_generation import ( + LengthParam, + OutputType, + SamplingParam, + TextGeneration, +) +from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin +from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone +from nemo.core import adapter_mixins +from nemo.core.classes.common import PretrainedModelInfo +from nemo.utils import AppState, logging, model_utils + +try: + import apex.transformer.pipeline_parallel.utils + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + +try: + from megatron.core import dist_checkpointing, parallel_state + from megatron.core.models.gpt import GPTModel as MCoreGPTModel + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + import transformer_engine + + HAVE_TE = True + +except (ImportError, ModuleNotFoundError): + HAVE_TE = False + + +class FrozenCLIPVisionTransformer(CLIPVisionTransformer): + def __init__(self, model_cfg, model_parallel_config, pre_process=True, post_process=True): + super().__init__( + model_cfg, model_parallel_config, pre_process=pre_process, post_process=post_process, skip_head=True, + ) + self.frozen = False + + def train(self, mode): + if self.frozen: + return self + + super().train(mode) + return self + + def forward(self, input): + assert self.training == False + hidden_states = self.backbone(input) + # Do not add header after backbone + return hidden_states + + def freeze(self) -> None: + for param in self.parameters(): + param.requires_grad = False + + self.eval() + self.frozen = True + + +class NevaWordEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): + def init_vision( + self, + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=-1, + class_token_length=1, + use_im_start_end=False, + llama_tricks=False, + ): + self.vision_encoder = vision_encoder + self.from_hf = isinstance(vision_encoder, CLIPVisionModel) + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.class_token_length = class_token_length + self.use_im_start_end = use_im_start_end + self.vision_select_layer = vision_select_layer + self.media = None + self.set_accepted_adapter_types([MMLinearAdapterConfig._target_]) + self.llama_tricks = llama_tricks + + def set_media(self, media): + self.media = media + + def forward(self, input_ids, **kwargs): + media = self.media # avoid change the signature of embedding forward function + if self.llama_tricks and not self.use_im_start_end: + masked_input_ids = input_ids.detach().clone() + if self.num_embeddings < 32000: + raise ValueError("Not supported tokenizer with llama 2!") + else: + masked_input_ids[masked_input_ids >= 32000] = 0 + words_embeddings = super().forward(masked_input_ids, **kwargs) + + else: + words_embeddings = super().forward(input_ids, **kwargs) + + return self.replace_media_embeddings(input_ids, words_embeddings, media) + + def encode_vision_x(self, vision_x: torch.Tensor): + """ + Compute media tokens from vision input by passing it through vision encoder and conditioning language model. + Args: + vision_x (torch.Tensor): Vision input + shape (B, T_img, F, C, H, W) + Images in the same chunk are collated along T_img, and frames are collated along F + Currently only F=1 is supported (single-frame videos) + + rearrange code based on https://github.com/dhansmair/flamingo-mini + """ + + assert vision_x.ndim == 6, "vision_x should be of shape (b, T_img, F, C, H, W)" + b, T, F = vision_x.shape[:3] + assert F == 1, "Only single frame supported" + + vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") + with torch.no_grad(): + if self.from_hf: + vision_x = self.vision_encoder(vision_x, output_hidden_states=True) + vision_x = vision_x.hidden_states[self.vision_select_layer] + else: + self.vision_encoder.backbone.transformer.return_select_layer = self.vision_select_layer + vision_x = self.vision_encoder(vision_x) + vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) + vision_x = vision_x[:, :, :, self.class_token_length :] + assert self.is_adapter_available(), "Cannot find multimodal vision adapter!" + vision_connector = self.get_adapter_module(AdapterName.MM_LINEAR_ADAPTER) + vision_x = vision_connector(vision_x) + return vision_x + + def replace_media_embeddings(self, input_ids, inputs_embeds, media): + if media is None: + return inputs_embeds + + batch_size, sequence_length, hidden_size = inputs_embeds.shape + + # calculate media features without gradients + media_features = self.encode_vision_x(media) # b T F S(eq) H(idden) + num_images_per_sample = media_features.size(1) + num_patches = media_features.size(3) + # flatten patches + media_features = media_features.view(batch_size, -1, hidden_size) + + # create an indices matrix used in torch.scatter + padded_media_indices = torch.ones( + (batch_size, num_images_per_sample), dtype=torch.long, device=input_ids.device + ) + padded_media_indices *= sequence_length + for idx, input_id in enumerate(input_ids): + media_end_positions = torch.where(input_id == self.media_end_id)[0] + if self.use_im_start_end: + # locate the first media token positions + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + assert ( + input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id + ).all() + else: + padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + 1 + assert (input_id[padded_media_indices[idx, : len(media_end_positions)]] == self.media_start_id).all() + + # use indices to create a span + padded_media_indices = padded_media_indices.unsqueeze(-1) + torch.arange( + num_patches, device=padded_media_indices.device + ).repeat(*padded_media_indices.shape, 1) + padded_media_indices = padded_media_indices.reshape(batch_size, -1) + padded_media_indices = repeat(padded_media_indices, 'b s -> b s h', h=hidden_size) + + # concat placeholder + updated_input_embeds = torch.cat( + (inputs_embeds, torch.zeros((batch_size, num_patches, hidden_size), device=inputs_embeds.device)), dim=1 + ) + updated_input_embeds = updated_input_embeds.type(media_features.dtype) + # scatter media_features + updated_input_embeds.scatter_(1, padded_media_indices, media_features) + + # chop off placeholder + updated_input_embeds = updated_input_embeds[:, :sequence_length] + + return updated_input_embeds + + +class MCoreNevaModel(MCoreGPTModel): + def __init__( + self, mm_cfg, media_start_id, media_end_id, **kwargs, + ): + super(MCoreNevaModel, self).__init__(**kwargs,) + + self.mm_cfg = mm_cfg + self.media_start_id = media_start_id + self.media_end_id = media_end_id + self.dist_ckpt = False + + if mm_cfg.llm.from_pretrained is not None: + logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") + self.load_llm_weights(mm_cfg.llm.from_pretrained) + + if mm_cfg.llm.freeze: + for param in chain( + self.embedding.parameters(), self.decoder.parameters(), self.output_layer.parameters(), + ): + param.requires_grad = False + self.embedding = self.embedding.eval() + self.decoder = self.decoder.eval() + self.output_layer = self.output_layer.eval() + + # Initialize vision encoder and freeze it + if mm_cfg.vision_encoder.from_hf: + vision_encoder = CLIPVisionModel.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, + ).cuda() + vision_encoder = vision_encoder.to(torch.bfloat16) + if mm_cfg.vision_encoder.freeze: + for param in vision_encoder.parameters(): + param.requires_grad = False + vision_encoder = vision_encoder.eval() + else: + vision_cfg = MegatronCLIPModel.restore_from( + mm_cfg.vision_encoder.from_pretrained, return_config=True + ).vision + vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) + self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) + if mm_cfg.vision_encoder.freeze: + vision_encoder.freeze() + + model_type = self.mm_cfg.llm.get("model_type", "nvgpt") + # Monkey patch embedding + if kwargs.get("pre_process", True): + extend_instance(self.embedding.word_embeddings, NevaWordEmbeddingMixin) + self.embedding.word_embeddings.init_vision( + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), + class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), + use_im_start_end=mm_cfg.get("use_im_start_end", False), + llama_tricks=(model_type == "llama_2"), + ) + + def forward( + self, *args, **kwargs, + ): + media = kwargs.pop('media', None) + self.embedding.word_embeddings.set_media(media) + return super().forward(*args, **kwargs) + + def _load_model_weights(self, nemo_path): + """ + Shared method to load model weights from a given nemo_path. + """ + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + app_state = AppState() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + if os.path.isfile(nemo_path): + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + else: + tmpdir = nemo_path + os.chdir(tmpdir) + if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: + model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( + tmpdir, save_restore_connector.model_weights_ckpt + ) + else: + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + + # distributed checkpointing + if state_dict is None: + self.dist_ckpt = True + sharded_state_dict = self.sharded_state_dict(prefix="model.") + checkpoint = dict(state_dict=sharded_state_dict) + tmp_model_weights_ckpt = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + tmp_model_weights_dir = os.path.splitext(tmp_model_weights_ckpt)[0] + assert os.path.isdir(tmp_model_weights_dir), f'Expected {tmp_model_weights_dir} to be a directory.' + checkpoint = dist_checkpointing.load( + sharded_state_dict=checkpoint, checkpoint_dir=tmp_model_weights_dir, + ) + state_dict = checkpoint["state_dict"] + + finally: + os.chdir(cwd) + + return state_dict + + def load_vision_encoder_weights(self, vision_encoder, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.replace("model.vision_encoder.", "") + new_state_dict[new_k] = v + + missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def load_llm_weights(self, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + if self.dist_ckpt: + for k, v in state_dict.items(): + new_k = k + if k.startswith("model."): + new_k = k.replace("model.", "", 1) + new_state_dict[new_k] = v + self.load_state_dict(new_state_dict, strict=True) + else: + for k, v in state_dict.items(): + if k.startswith("model.language_model."): + new_k = k.replace("model.language_model.", "", 1) + module_key, param_key = new_k.split(".", 1) + if module_key not in new_state_dict: + new_state_dict[module_key] = {} + new_state_dict[module_key][param_key] = v + self.language_model.load_state_dict(new_state_dict, strict=True) + print(f"Restored LLM weights from {nemo_path}.") + + +class NevaModel(GPTModel): + def __init__( + self, mm_cfg, media_start_id, media_end_id, **kwargs, + ): + super(NevaModel, self).__init__(**kwargs,) + + self.mm_cfg = mm_cfg + self.media_start_id = media_start_id + self.media_end_id = media_end_id + + if mm_cfg.llm.from_pretrained is not None: + logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") + self.load_llm_weights(self.language_model, mm_cfg.llm.from_pretrained) + if mm_cfg.llm.freeze: + for param in self.language_model.parameters(): + param.requires_grad = False + self.language_model = self.language_model.eval() + + # Initialize vision encoder and freeze it + if mm_cfg.vision_encoder.from_hf: + vision_encoder = CLIPVisionModel.from_pretrained( + mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, + ).cuda() + vision_encoder = vision_encoder.to(torch.bfloat16) + if mm_cfg.vision_encoder.freeze: + for param in vision_encoder.parameters(): + param.requires_grad = False + vision_encoder = vision_encoder.eval() + else: + vision_cfg = MegatronCLIPModel.restore_from( + mm_cfg.vision_encoder.from_pretrained, return_config=True + ).vision + vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) + self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) + if mm_cfg.vision_encoder.freeze: + vision_encoder.freeze() + + model_type = self.mm_cfg.llm.get("model_type", "nvgpt") + # Monkey patch embedding + if kwargs.get("pre_process", True): + extend_instance(self.language_model.embedding.word_embeddings, NevaWordEmbeddingMixin) + self.language_model.embedding.word_embeddings.init_vision( + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), + class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), + use_im_start_end=mm_cfg.get("use_im_start_end", False), + llama_tricks=(model_type == "llama_2"), + ) + + def forward( + self, *args, **kwargs, + ): + media = kwargs.pop('media', None) + self.language_model.embedding.word_embeddings.set_media(media) + return super().forward(*args, **kwargs) + + def _load_model_weights(self, nemo_path): + """ + Shared method to load model weights from a given nemo_path. + """ + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + app_state = AppState() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + if os.path.isfile(nemo_path): + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + else: + tmpdir = nemo_path + os.chdir(tmpdir) + if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: + model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( + tmpdir, save_restore_connector.model_weights_ckpt + ) + else: + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return state_dict + + def load_vision_encoder_weights(self, vision_encoder, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.vision_encoder."): + new_k = k.replace("model.vision_encoder.", "") + new_state_dict[new_k] = v + + missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) + print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + if len(unexpected) > 0: + print(f"Unexpected Keys: {unexpected}") + + def load_llm_weights(self, language_model, nemo_path): + state_dict = self._load_model_weights(nemo_path) + + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("model.language_model."): + new_k = k.replace("model.language_model.", "", 1) + module_key, param_key = new_k.split(".", 1) + if module_key not in new_state_dict: + new_state_dict[module_key] = {} + new_state_dict[module_key][param_key] = v + + language_model.load_state_dict(new_state_dict, strict=True) + print(f"Restored LLM weights from {nemo_path}.") + + +class MegatronNevaModel(MultimodalAdapterModelMixin, MegatronGPTModel): + """ + Megatron Neva pretraining + """ + + def __init__(self, cfg: DictConfig, trainer: Trainer): + # MegatronGPTModel.__init__(self, cfg, trainer) + super().__init__(cfg, trainer) + self.init_neva_adapter() + + def init_neva_adapter(self): + self.base_keys = self._get_all_keys() + adapter_name = AdapterName.MM_LINEAR_ADAPTER + adapter_cfg = MMLinearAdapterConfig( + in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, out_features=self.cfg.hidden_size, bias=True, + ) + for name, module in self.named_modules(): + self._check_and_add_adapter( + name, module, adapter_name, adapter_cfg, autocast_dtype=self.autocast_dtype, + ) + self.adapter_keys = self._get_all_keys() - self.base_keys + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) + media_end_id = self.tokenizer.token_to_id(DEFAULT_IM_END_TOKEN) + + if self.mcore_gpt: + if parallel_state.is_unitialized(): + + def dummy(): + return + + if self.trainer.strategy.launcher is not None: + self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) + self.trainer.strategy.setup_environment() + + model = MCoreNevaModel( + mm_cfg=self.cfg.mm_cfg, + media_start_id=media_start_id, + media_end_id=media_end_id, + config=self.transformer_config, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), + max_sequence_length=self.cfg.get('encoder_seq_length', 512), + pre_process=pre_process, + post_process=post_process, + parallel_output=True, + share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), + position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percent=self.cfg.get('rotary_percentage', 1.0), + seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), + ) + else: + model = NevaModel( + mm_cfg=self.cfg.mm_cfg, + media_start_id=media_start_id, + media_end_id=media_end_id, + config=self.model_parallel_config, + vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size), + hidden_size=self.cfg.hidden_size, + max_position_embeddings=self.cfg.max_position_embeddings, + num_layers=self.cfg.num_layers, + num_attention_heads=self.cfg.num_attention_heads, + apply_query_key_layer_scaling=self.cfg.get('apply_query_key_layer_scaling', True), + kv_channels=self.cfg.get('kv_channels', None), + ffn_hidden_size=self.cfg.ffn_hidden_size, + num_tokentypes=0, + parallel_output=True, + pre_process=pre_process, + post_process=post_process, + init_method_std=self.cfg.get('init_method_std', 0.02), + use_scaled_init_method=self.cfg.get('use_scaled_init_method', True), + fp16_lm_cross_entropy=self.cfg.get('fp16_lm_cross_entropy', False), + megatron_amp_O2=self.cfg.get('megatron_amp_O2', False), + hidden_dropout=self.cfg.get('hidden_dropout', 0.1), + attention_dropout=self.cfg.get('attention_dropout', 0.1), + ffn_dropout=self.cfg.get('ffn_dropout', 0.0), + precision=self.cfg.get('precision', 16), + fp32_residual_connection=self.cfg.get('fp32_residual_connection', False), + activations_checkpoint_granularity=self.cfg.get('activations_checkpoint_granularity', None), + activations_checkpoint_method=self.cfg.get('activations_checkpoint_method', None), + activations_checkpoint_num_layers=self.cfg.get('activations_checkpoint_num_layers', 1), + activations_checkpoint_layers_per_pipeline=self.cfg.get( + 'activations_checkpoint_layers_per_pipeline', None + ), + normalization=self.cfg.get('normalization', 'layernorm'), + layernorm_epsilon=self.cfg.get('layernorm_epsilon', 1e-5), + onnx_safe=self.cfg.get('onnx_safe', False), + bias=self.cfg.get('bias', True), + bias_activation_fusion=self.cfg.get('bias_activation_fusion', True), + bias_dropout_add_fusion=self.cfg.get('bias_dropout_add_fusion', True), + activation=self.cfg.get('activation', 'gelu'), + headscale=self.cfg.get('headscale', False), + transformer_block_type=self.cfg.get('transformer_block_type', 'pre_ln'), + openai_gelu=self.cfg.get('openai_gelu', False), + normalize_attention_scores=self.cfg.get('normalize_attention_scores', True), + position_embedding_type=self.cfg.get('position_embedding_type', 'learned_absolute'), + rotary_percentage=self.cfg.get('rotary_percentage', 1.0), + share_embeddings_and_output_weights=self.cfg.get('share_embeddings_and_output_weights', True), + attention_type=self.cfg.get('attention_type', 'multihead'), + masked_softmax_fusion=self.cfg.get('masked_softmax_fusion', True), + persist_layer_norm=self.cfg.get('persist_layer_norm', False), + transformer_engine=self.cfg.get('transformer_engine', False), + fp8=self.cfg.get('fp8', False), + fp8_e4m3=self.cfg.get('fp8_e4m3', False), + fp8_hybrid=self.cfg.get('fp8_hybrid', False), + fp8_margin=self.cfg.get('fp8_margin', 0), + fp8_interval=self.cfg.get('fp8_interval', 1), + fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1), + fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), + reduce_amax=self.cfg.get('reduce_amax', True), + use_emha=self.cfg.get('use_emha', False), + ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=self.cfg.get('use_flash_attention', False), + megatron_legacy=self.cfg.get('megatron_legacy', False), + seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None), + ) + + logging.info( + f"Neva model initialized with {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters" + ) + + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.mm_cfg.llm.freeze: + super().setup_optimizer_param_groups() + else: + MegatronGPTModel.setup_optimizer_param_groups(self) + + # filter out params doesn't have grad + for param_group in self._optimizer_param_groups: + params_with_grad = [param for param in param_group['params'] if param.requires_grad] + param_group['params'] = params_with_grad + + def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): + output_tensor = self.model(tokens, text_position_ids, attention_mask, labels, media) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only) + + def training_step(self, dataloader_iter, batch_idx): + """ + We pass the dataloader iterator function to the micro-batch scheduler. + The input batch to each micro-batch is fetched using the dataloader function + in the micro-batch fwd function. + """ + return MegatronGPTModel.training_step(self, dataloader_iter, batch_idx) + + def get_forward_output_and_loss_func(self, validation_step=False): + def loss_func(output_tensor, loss_mask): + loss_for_ub = self.loss_func(loss_mask, output_tensor) + if validation_step and not self.cfg.data.get('validation_drop_last', True): + raise NotImplementedError(f"`validation_drop_last=False` is not implemented in Neva!") + else: + reduced_loss = average_losses_across_data_parallel_group([loss_for_ub]) + return loss_for_ub, dict(avg=reduced_loss[0].unsqueeze(0)) + + def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None): + batch = next(dataloader_iter) + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k not in ['attention_mask'] else None + else: + batch[k] = batch[k].cuda(non_blocking=True) + else: + if parallel_state.is_pipeline_first_stage(): + # First pipeline stage needs tokens, position_ids, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = ( + batch[k].cuda(non_blocking=True) if k in ['tokens', 'position_ids', 'media'] else None + ) + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['tokens', 'position_ids', 'attention_mask', 'media'] + else None + ) + elif parallel_state.is_pipeline_last_stage(): + # Last pipeline stage needs the labels, loss_mask, and attention_mask + for k in batch.keys(): + if self.get_attention_mask_from_fusion: + batch[k] = batch[k].cuda(non_blocking=True) if k in ['labels', 'loss_mask'] else None + else: + batch[k] = ( + batch[k].cuda(non_blocking=True) + if k in ['labels', 'loss_mask', 'attention_mask'] + else None + ) + else: + # Intermediate pipeline stage doesn't need any inputs + batch = {k: None for k in ['tokens', 'position_ids', 'attention_mask', 'labels', 'media']} + + forward_args = { + 'input_ids': batch['tokens'], + 'position_ids': batch['position_ids'], + 'attention_mask': batch['attention_mask'], + 'labels': batch['labels'], + 'media': batch.get('media', None), + } + if not self.mcore_gpt: + if self.use_loss_mask: + forward_args['loss_mask'] = batch['loss_mask'] + forward_args['checkpoint_activations_all_layers'] = checkpoint_activations_all_layers + + output_tensor = model(**forward_args) + + return output_tensor, partial(loss_func, loss_mask=batch['loss_mask']) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(dataloader_iter, model): + batch = next(dataloader_iter) + extra_arg = {} + ( + tokens, + attention_mask, + position_ids, + media, + set_inference_key_value_memory, + inference_max_sequence_len, + ) = batch + tokens = tokens.cuda() + attention_mask = attention_mask.cuda() + position_ids = position_ids.cuda() + attention_mask = attention_mask[0:1] + if media is not None: + media = media.cuda() + labels = None + extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() + extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() + # TODO : Should I add labels ? + output_tensor = model(tokens, position_ids, attention_mask, labels, media, **extra_arg) + + def id_func(output_tensor): + return output_tensor, {'logits': output_tensor} + + return output_tensor, id_func + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + return MegatronGPTModel.validation_step(self, dataloader_iter, batch_idx) + + def on_validation_epoch_end(self): + if not self.validation_step_outputs: + return + + if parallel_state.is_pipeline_last_stage(): + # only the last pipeline parallel stages return loss with their batch size + if self.cfg.data.get('validation_drop_last', True): + averaged_loss = torch.stack(self.validation_step_outputs).mean() + else: + # Compute the avg loss by total_loss across all samples / total number of samples + # total_loss_and_total_samples = torch.vstack(outputs).sum(axis=0) + # avg_loss = total_loss_and_total_samples[0] / total_loss_and_total_samples[1] + # averaged_loss = avg_loss.type(torch.float32).cuda() + raise NotImplementedError("`validation_drop_last=False` is not supported!") + else: + averaged_loss = torch.tensor(0.0, dtype=torch.float32).cuda() + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_loss, get_last_rank()) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.validation_step_outputs.clear() # free memory + + return averaged_loss + + def on_validation_epoch_start(self): + pass + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def test_epoch_end(self, outputs): + averaged_loss = average_losses_across_data_parallel_group(outputs) + logging.info(f'test_loss: {averaged_loss[0]}') + + def loss_func(self, loss_mask, output_tensor): + losses = output_tensor.float() + loss_mask = loss_mask.view(-1).float() + # TODO: add nemo version here + loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum() # sequence level nll + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + num_parameters_on_device, total_num_parameters = self._get_total_params_across_model_parallel_groups_gpt_bert( + self.model + ) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + rampup_batch_size = self.cfg.get('rampup_batch_size', None) + if rampup_batch_size: + start_batch_size = rampup_batch_size[0] + batch_size_increment = rampup_batch_size[1] + total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes + + assert start_batch_size % (total_gpus_number) == 0, ( + 'expected' + ' start batch size ({}) to be divisible by total number of GPUs' + ' ({})'.format(start_batch_size, total_gpus_number) + ) + + micro_batch_size = self.cfg.get('micro_batch_size', 1) + tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1) + pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1) + total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size) + + assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, ( + 'expected' + ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size' + ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size) + ) + + if stage == 'predict': + return + else: + # TODO: consider adding a ModelPT guard to check if model is being restored. + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + if self.cfg.get('share_embeddings_and_output_weights', True): + self.model.sync_initial_word_embeddings() + + if self.cfg.get('transformer_engine', False): + self.setup_transformer_engine_tp_groups() + + def build_train_valid_test_datasets(self): + logging.info('Building Neva datasets.') + ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg,) + self._train_ds = ds_dict["train_dataset"] + self._validation_ds = ds_dict["eval_dataset"] + + return self._train_ds, self._validation_ds + + def build_pretraining_data_loader( + self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False + ): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + global_batch_size=self.cfg.global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronPretrainingRandomSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=self.cfg.get('drop_last', True), + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + collate_func = DataCollatorForSupervisedDataset(self.cfg, self.tokenizer) + return torch.utils.data.DataLoader( + dataset, + batch_sampler=batch_sampler, + collate_fn=collate_func, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + Returns: + List of available pre-trained models. + """ + return [] + + def setup_test_data(self, cfg): + pass + + def state_dict(self, destination=None, prefix='', keep_vars=False): + # Get the original state dictionary + original_state_dict = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) + keys_to_keep = list(self.adapter_keys) + # TODO(yuya): maybe not hard-code vision_encoder keys here + if self.megatron_amp_O2: + vision_encoder_keys = [ + k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" in k + ] + llm_keys = [k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" not in k] + else: + vision_encoder_keys = [k for k in self.base_keys if "vision_encoder" in k] + llm_keys = [k for k in self.base_keys if "vision_encoder" not in k] + if not self.cfg.mm_cfg.llm.freeze: + keys_to_keep += llm_keys + if not self.cfg.mm_cfg.vision_encoder.freeze: + keys_to_keep += vision_encoder_keys + return {k: original_state_dict[k] for k in keys_to_keep if k in original_state_dict} + + def load_state_dict(self, state_dict, strict=False): + logging.warning('Loading state dict for MegatronNevaModel...') + missing_keys, unexpected_keys = NLPModel.load_state_dict(self, state_dict, strict=False) + + if len(missing_keys) > 0: + logging.warning('Missing keys were detected during the load. Please double check.') + logging.warning(f'Missing keys: \n{missing_keys}') + if len(unexpected_keys) > 0: + logging.critical('Unexpected keys were detected during the load. Please double check.') + logging.critical(f'Unexpected keys: \n{unexpected_keys}') + + def on_load_checkpoint(self, checkpoint) -> None: + if self.mcore_gpt: + state_dict = checkpoint["state_dict"] + self.load_state_dict(state_dict) + + def sharded_state_dict(self, prefix: str = ''): + return None + # sharded_state_dict = MegatronGPTModel.sharded_state_dict(self, prefix) + # return sharded_state_dict + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + inference_config = self.get_inference_config() + + if inference_config is None: + return None + else: + # need to overwrite some configuration, make it immutable + image = os.path.join(inference_config['images_base_path'], batch['image'][0]) + prompt = batch['prompt'][0] + inference_config = inference_config.copy() + compute_logprob = inference_config['compute_logprob'] + if compute_logprob: + inference_config['inputs'] = prompt + inference_config['tokens_to_generate'] = 1 + inference_config['all_probs'] = True + inference_config["add_BOS"] = False + inference_config['greedy'] = True + inference_config['image_list'] = image + response = generate(self, **inference_config) + compute_prob_response = get_computeprob_response(self.tokenizer, response, prompt) + return compute_prob_response + else: + inference_config['inputs'] = prompt + inference_config['image_list'] = image + return generate(self, **inference_config) + + def generate( + self, input_prompts, inference_config, length_params: LengthParam, sampling_params: SamplingParam = None, + ) -> OutputType: + + # check whether the DDP is initialized + if parallel_state.is_unitialized(): + + def dummy(): + return + + import os + + if self.trainer.strategy.launcher is not None: + self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) + self.trainer.strategy.setup_environment() + + # set the default sampling params if it is None. + # default do greedy sampling + if sampling_params is None: + sampling_params = get_default_sampling_params() + + # set the default length params if it is None. + # default do greedy sampling + if length_params is None: + length_params = get_default_length_params() + + import time + + start = time.time() + # Supports only one prompt at a time + result = megatron_neva_generate(self.cuda(), input_prompts, length_params, sampling_params, inference_config) + end = time.time() + # print(f'Time taken {end - start}') + + return result diff --git a/nemo/collections/multimodal/models/neva/neva_peft_models.py b/nemo/collections/multimodal/models/neva/neva_peft_models.py new file mode 100644 index 000000000000..ac03b5983430 --- /dev/null +++ b/nemo/collections/multimodal/models/neva/neva_peft_models.py @@ -0,0 +1,60 @@ +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import ( + AdapterName, + InfusedAdapterConfig, + LoraKQVAdapterConfig, + MLPInfusedAdapterConfig, + ParallelLinearAdapterConfig, + PromptEncoderAdapterConfig, +) +from nemo.core.classes.mixins import adapter_mixins +from nemo.utils import logging, model_utils + + +class MegatronNevaLoRAModel(MegatronNevaModel): + """ + MegatronNevaLoRAModel is a model that combines a base model (MegatronNevaModel) with a low-rank adapters. + The lora adapters will be added in `nemo/collections/nlp/modules/common/megatron/attention.py` + The implementation is based on Hu et al. nemo/collections/nlp/modules/common/megatron/attention.py + + A single low-rank feedfowrad layer is used in parallel with the KQV projection layer. + TODO: Add support to also include an option to adda low-rank adapter in the output projection layer. + """ + + def __init__( + self, cfg: DictConfig, trainer: Trainer, + ): + self.peft_name_keys = [ + AdapterName.LORA_KQV_ADAPTER, + ] + lora_cfg = cfg.peft.lora_tuning + if cfg.get("kv_channels", None) is None: + assert ( + cfg.hidden_size % cfg.num_attention_heads == 0 + ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None' + kv_channels = cfg.hidden_size // cfg.num_attention_heads + else: + kv_channels = cfg.kv_channels + projection_size = kv_channels * cfg.num_attention_heads + + adapter_cfg = LoraKQVAdapterConfig( + in_features=cfg.hidden_size, + out_features=3 * projection_size, + dim=lora_cfg.adapter_dim, + norm_position="none", + norm_type="none", + activation="identity", + column_init_method=lora_cfg.get("column_init_method", "normal"), + row_init_method=lora_cfg.get("row_init_method", "zero"), + gather_output=False, + dropout=lora_cfg.adapter_dropout, + ) + + self.name_key_to_cfg = {} + for k in self.peft_name_keys: + self.name_key_to_cfg[k] = adapter_cfg + + super().__init__(cfg, trainer) diff --git a/nemo/collections/multimodal/models/stable_diffusion/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py new file mode 100644 index 000000000000..c3ca34b35233 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/diffusion_model.py @@ -0,0 +1,81 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +from abc import ABC, abstractclassmethod +from typing import Any, Optional + +import torch + +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.core.classes import ModelPT +from nemo.utils import logging + + +class DiffusionModel(ModelPT, ABC): + @abstractclassmethod + def get_conditioning(self, c: Any) -> Any: + """ + Encode conditioning c. + For txt2img use-case, the input conditioning would be the plain text, + and output would be the encoded embedding for the corresponding text; + For img2img use-case, the input conditioning would be the raw image, + and output would be the corresponding image embedding + + Args: + c: conditioning + + Returns: + encoded conditioning + """ + pass + + @abstractclassmethod + def apply_model(self, x_t: torch.Tensor, t: torch.Tensor, c: Optional[torch.Tensor]) -> torch.Tensor: + """ + Apply Diffusion model. + If c is not given, the model acts as an unconditional diffusion model. + For diffusion model that applies on the pixel space, x_t should be in the pixel space; + for diffusion model that applies on the latent space, x_t is in latent space. + + Args: + x_t: noisy input x at timestamp t + t: timestamp + c: conditioning + + Returns: + Predicted result that has the same shape as x_t + """ + + def on_train_start(self) -> None: + super().on_train_start() + self.init_global_step = self.trainer.global_step + + def _extract_consumed_samples_from_ckpt(self, ckpt_path): + try: + init_consumed_samples = int(float(re.findall(r"consumed_samples\=([0-9]+.[0-9]+)", ckpt_path)[0])) + except (ValueError, TypeError, IndexError): + logging.warning("Cannot parse the checkpoint file to get the consumed samples. assume it is zero.") + init_consumed_samples = 0 + + return init_consumed_samples + + def compute_consumed_samples(self, steps_since_resume=0): + consumed_samples = ( + self.init_consumed_samples + + steps_since_resume + * self.trainer.world_size + * self.cfg.micro_batch_size + * self.trainer.accumulate_grad_batches + ) + return int(consumed_samples) diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py new file mode 100644 index 000000000000..6f2dd37424d0 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/autoencoder.py @@ -0,0 +1,608 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from contextlib import contextmanager + +import pytorch_lightning as pl +import torch +import torch.nn.functional as F +from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.model import Decoder, Encoder +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config + + +class VQModel(pl.LightningModule): + def __init__( + self, + ddconfig, + lossconfig, + n_embed, + embed_dim, + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + batch_resize_range=None, + scheduler_config=None, + lr_g_factor=1.0, + remap=None, + sane_index_shape=False, # tell vector quantizer to return indices as bhw + ): + super().__init__() + self.embed_dim = embed_dim + self.n_embed = n_embed + self.image_key = image_key + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + self.loss = instantiate_from_config(lossconfig) + self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, remap=remap, sane_index_shape=sane_index_shape) + self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + if colorize_nlabels is not None: + assert type(colorize_nlabels) == int + self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) + if monitor is not None: + self.monitor = monitor + self.batch_resize_range = batch_resize_range + if self.batch_resize_range is not None: + print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.") + + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) + self.scheduler_config = scheduler_config + self.lr_g_factor = lr_g_factor + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.parameters()) + self.model_ema.copy_to(self) + if context is not None: + print(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.parameters()) + if context is not None: + print(f"{context}: Restored training weights") + + def init_from_ckpt(self, path, ignore_keys=list()): + sd = torch.load(path, map_location="cpu")["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + print(f"Missing Keys: {missing}") + print(f"Unexpected Keys: {unexpected}") + + def on_train_batch_end(self, *args, **kwargs): + if self.use_ema: + self.model_ema(self) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + quant, emb_loss, info = self.quantize(h) + return quant, emb_loss, info + + def encode_to_prequant(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, quant): + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def decode_code(self, code_b): + quant_b = self.quantize.embed_code(code_b) + dec = self.decode(quant_b) + return dec + + def forward(self, input, return_pred_indices=False): + quant, diff, (_, _, ind) = self.encode(input) + dec = self.decode(quant) + if return_pred_indices: + return dec, diff, ind + return dec, diff + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() + if self.batch_resize_range is not None: + lower_size = self.batch_resize_range[0] + upper_size = self.batch_resize_range[1] + if self.global_step <= 4: + # do the first few batches with max size to avoid later oom + new_resize = upper_size + else: + new_resize = np.random.choice(np.arange(lower_size, upper_size + 16, 16)) + if new_resize != x.shape[2]: + x = F.interpolate(x, size=new_resize, mode="bicubic") + x = x.detach() + return x + + def training_step(self, batch, batch_idx, optimizer_idx): + # https://github.com/pytorch/pytorch/issues/37142 + # try not to fool the heuristics + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + + if optimizer_idx == 0: + # autoencode + aeloss, log_dict_ae = self.loss( + qloss, + x, + xrec, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + predicted_indices=ind, + ) + + self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return aeloss + + if optimizer_idx == 1: + # discriminator + discloss, log_dict_disc = self.loss( + qloss, x, xrec, optimizer_idx, self.global_step, last_layer=self.get_last_layer(), split="train" + ) + self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return discloss + + def validation_step(self, batch, batch_idx): + log_dict = self._validation_step(batch, batch_idx) + with self.ema_scope(): + log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema") + return log_dict + + def _validation_step(self, batch, batch_idx, suffix=""): + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + aeloss, log_dict_ae = self.loss( + qloss, + x, + xrec, + 0, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind, + ) + + discloss, log_dict_disc = self.loss( + qloss, + x, + xrec, + 1, + self.global_step, + last_layer=self.get_last_layer(), + split="val" + suffix, + predicted_indices=ind, + ) + rec_loss = log_dict_ae[f"val{suffix}/rec_loss"] + self.log( + f"val{suffix}/rec_loss", rec_loss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True + ) + self.log( + f"val{suffix}/aeloss", aeloss, prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True + ) + if version.parse(pl.__version__) >= version.parse('1.4.0'): + del log_dict_ae[f"val{suffix}/rec_loss"] + self.log_dict(log_dict_ae) + self.log_dict(log_dict_disc) + return self.log_dict + + def configure_optimizers(self): + lr_d = self.learning_rate + lr_g = self.lr_g_factor * self.learning_rate + print("lr_d", lr_d) + print("lr_g", lr_g) + opt_ae = torch.optim.Adam( + list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quantize.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr_g, + betas=(0.5, 0.9), + ) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr_d, betas=(0.5, 0.9)) + + if self.scheduler_config is not None: + scheduler = instantiate_from_config(self.scheduler_config) + + print("Setting up LambdaLR scheduler...") + scheduler = [ + {'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, + {'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}, + ] + return [opt_ae, opt_disc], scheduler + return [opt_ae, opt_disc], [] + + def get_last_layer(self): + return self.decoder.conv_out.weight + + def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): + log = dict() + x = self.get_input(batch, self.image_key) + x = x.to(self.device) + if only_inputs: + log["inputs"] = x + return log + xrec, _ = self(x) + if x.shape[1] > 3: + # colorize with random projection + assert xrec.shape[1] > 3 + x = self.to_rgb(x) + xrec = self.to_rgb(xrec) + log["inputs"] = x + log["reconstructions"] = xrec + if plot_ema: + with self.ema_scope(): + xrec_ema, _ = self(x) + if x.shape[1] > 3: + xrec_ema = self.to_rgb(xrec_ema) + log["reconstructions_ema"] = xrec_ema + return log + + def to_rgb(self, x): + assert self.image_key == "segmentation" + if not hasattr(self, "colorize"): + self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) + x = F.conv2d(x, weight=self.colorize) + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 + return x + + +class VQModelInterface(VQModel): + def __init__(self, embed_dim, *args, **kwargs): + super().__init__(embed_dim=embed_dim, *args, **kwargs) + self.embed_dim = embed_dim + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + +class AutoencoderKL(pl.LightningModule): + def __init__( + self, + ddconfig, + embed_dim, + lossconfig=None, # TODO make it configurable + ckpt_path=None, + ignore_keys=[], + image_key="image", + colorize_nlabels=None, + monitor=None, + from_pretrained: str = None, + capture_cudagraph_iters=-1, + ): + super().__init__() + self.image_key = image_key + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + self.loss = torch.nn.Identity() # instantiate_from_config(lossconfig) + assert ddconfig["double_z"] + self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + self.embed_dim = embed_dim + if colorize_nlabels is not None: + assert type(colorize_nlabels) == int + self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) + if monitor is not None: + self.monitor = monitor + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) + + if from_pretrained is not None: + state_dict = torch.load(from_pretrained) + self._load_pretrained_model(state_dict) + + # CUDA graph captured sub-modules + self.capture_cudagraph_iters = capture_cudagraph_iters + self.stream = torch.cuda.Stream() + self.encoder_iterations = self.decoder_iterations = 0 + self.encoder_graph = torch.cuda.CUDAGraph() # eval + self.decoder_graph = torch.cuda.CUDAGraph() # eval + self.graphed_encoder = self.graphed_decoder = None # train + self.static_x = self.static_moments = None + self.static_z = self.static_dec = None + + def _state_key_mapping(self, state_dict: dict): + import re + + res_dict = {} + key_list = state_dict.keys() + key_str = " ".join(key_list) + up_block_pattern = re.compile('upsamplers') + p1 = re.compile('mid.block_[0-9]') + p2 = re.compile('decoder.up.[0-9]') + up_blocks_count = int(len(re.findall(up_block_pattern, key_str)) / 2 + 1) + for key_, val_ in state_dict.items(): + key_ = ( + key_.replace("up_blocks", "up") + .replace("down_blocks", "down") + .replace('resnets', 'block') + .replace('mid_block', 'mid') + .replace("mid.block.", "mid.block_") + .replace('mid.attentions.0.key', 'mid.attn_1.k') + .replace('mid.attentions.0.query', 'mid.attn_1.q') + .replace('mid.attentions.0.value', 'mid.attn_1.v') + .replace('mid.attentions.0.group_norm', 'mid.attn_1.norm') + .replace('mid.attentions.0.proj_attn', 'mid.attn_1.proj_out') + .replace('upsamplers.0', 'upsample') + .replace('downsamplers.0', 'downsample') + .replace('conv_shortcut', 'nin_shortcut') + .replace('conv_norm_out', 'norm_out') + ) + + mid_list = re.findall(p1, key_) + if len(mid_list) != 0: + mid_str = mid_list[0] + mid_id = int(mid_str[-1]) + 1 + key_ = key_.replace(mid_str, mid_str[:-1] + str(mid_id)) + + up_list = re.findall(p2, key_) + if len(up_list) != 0: + up_str = up_list[0] + up_id = up_blocks_count - 1 - int(up_str[-1]) + key_ = key_.replace(up_str, up_str[:-1] + str(up_id)) + res_dict[key_] = val_ + return res_dict + + def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False): + state_dict = self._state_key_mapping(state_dict) + model_state_dict = self.state_dict() + loaded_keys = [k for k in state_dict.keys()] + expected_keys = list(model_state_dict.keys()) + original_loaded_keys = loaded_keys + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + def _find_mismatched_keys( + state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, + ): + mismatched_keys = [] + if ignore_mismatched_sizes: + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + + if ( + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + ): + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + if state_dict['encoder.mid.attn_1.q.weight'].shape == torch.Size([512, 512]): + for key in [ + 'encoder.mid.attn_1.q.weight', + 'decoder.mid.attn_1.q.weight', + 'encoder.mid.attn_1.v.weight', + 'decoder.mid.attn_1.v.weight', + 'encoder.mid.attn_1.k.weight', + 'decoder.mid.attn_1.k.weight', + 'encoder.mid.attn_1.proj_out.weight', + 'decoder.mid.attn_1.proj_out.weight', + ]: + state_dict[key] = state_dict[key].unsqueeze(2).unsqueeze(3) + + if state_dict is not None: + # Whole checkpoint + mismatched_keys = _find_mismatched_keys( + state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, + ) + error_msgs = self._load_state_dict_into_model(state_dict) + return missing_keys, unexpected_keys, mismatched_keys, error_msgs + + def _load_state_dict_into_model(self, state_dict): + # Convert old format to new format if needed from a PyTorch state_dict + # copy state_dict so _load_from_state_dict can modify it + state_dict = state_dict.copy() + error_msgs = [] + + # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants + # so we need to apply the function recursively. + def load(module: torch.nn.Module, prefix=""): + args = (state_dict, prefix, {}, True, [], [], error_msgs) + module._load_from_state_dict(*args) + + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + ".") + + load(self) + + return error_msgs + + def init_from_ckpt(self, path, ignore_keys=list()): + sd = torch.load(path, map_location="cpu")["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + self.load_state_dict(sd, strict=False) + print(f"Restored from {path}") + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, input, sample_posterior=True): + posterior = self.encode(input) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec, posterior + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() + return x + + def training_step(self, batch, batch_idx, optimizer_idx): + inputs = self.get_input(batch, self.image_key) + reconstructions, posterior = self(inputs) + + if optimizer_idx == 0: + # train encoder+decoder+logvar + aeloss, log_dict_ae = self.loss( + inputs, + reconstructions, + posterior, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + ) + self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) + self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False) + return aeloss + + if optimizer_idx == 1: + # train the discriminator + discloss, log_dict_disc = self.loss( + inputs, + reconstructions, + posterior, + optimizer_idx, + self.global_step, + last_layer=self.get_last_layer(), + split="train", + ) + + self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True) + self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False) + return discloss + + def validation_step(self, batch, batch_idx): + inputs = self.get_input(batch, self.image_key) + reconstructions, posterior = self(inputs) + aeloss, log_dict_ae = self.loss( + inputs, reconstructions, posterior, 0, self.global_step, last_layer=self.get_last_layer(), split="val" + ) + + discloss, log_dict_disc = self.loss( + inputs, reconstructions, posterior, 1, self.global_step, last_layer=self.get_last_layer(), split="val" + ) + + self.log("val/rec_loss", log_dict_ae["val/rec_loss"]) + self.log_dict(log_dict_ae) + self.log_dict(log_dict_disc) + return self.log_dict + + def configure_optimizers(self): + lr = self.learning_rate + opt_ae = torch.optim.Adam( + list(self.encoder.parameters()) + + list(self.decoder.parameters()) + + list(self.quant_conv.parameters()) + + list(self.post_quant_conv.parameters()), + lr=lr, + betas=(0.5, 0.9), + ) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), lr=lr, betas=(0.5, 0.9)) + return [opt_ae, opt_disc], [] + + def get_last_layer(self): + return self.decoder.conv_out.weight + + @torch.no_grad() + def log_images(self, batch, only_inputs=False, **kwargs): + log = dict() + x = self.get_input(batch, self.image_key) + x = x.to(self.device) + if not only_inputs: + xrec, posterior = self(x) + if x.shape[1] > 3: + # colorize with random projection + assert xrec.shape[1] > 3 + x = self.to_rgb(x) + xrec = self.to_rgb(xrec) + log["samples"] = self.decode(torch.randn_like(posterior.sample())) + log["reconstructions"] = xrec + log["inputs"] = x + return log + + def to_rgb(self, x): + assert self.image_key == "segmentation" + if not hasattr(self, "colorize"): + self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) + x = F.conv2d(x, weight=self.colorize) + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 + return x + + +class IdentityFirstStage(torch.nn.Module): + def __init__(self, *args, vq_interface=False, **kwargs): + self.vq_interface = vq_interface # TODO: Should be true by default but check to not break older stuff + super().__init__() + + def encode(self, x, *args, **kwargs): + return x + + def decode(self, x, *args, **kwargs): + return x + + def quantize(self, x, *args, **kwargs): + if self.vq_interface: + return x, None, [None, None, None] + return x + + def forward(self, x, *args, **kwargs): + return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py new file mode 100644 index 000000000000..c88c7f932339 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm/ddpm.py @@ -0,0 +1,2170 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from contextlib import contextmanager +from functools import partial +from typing import Any, Dict, Optional, Union + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +from einops import rearrange, repeat +from lightning_fabric.utilities.cloud_io import _load as pl_load +from omegaconf import DictConfig, OmegaConf, open_dict +from pytorch_lightning import Trainer +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.core.saving import _load_state as ptl_load_state +from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml +from pytorch_lightning.utilities.migration import pl_legacy_patch +from pytorch_lightning.utilities.rank_zero import rank_zero_only +from torch._dynamo import optimize +from torch._inductor import config as inductor_config +from torch.optim.lr_scheduler import LambdaLR +from torchvision.utils import make_grid +from tqdm import tqdm + +from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import ( + build_train_valid_datasets, + build_train_valid_precached_datasets, +) +from nemo.collections.multimodal.models.stable_diffusion.diffusion_model import DiffusionModel +from nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder import ( + AutoencoderKL, + IdentityFirstStage, + VQModelInterface, +) +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, + noise_like, +) +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, + normal_kl, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import ( + count_params, + default, + exists, + isimage, + ismap, + log_txt_as_img, + mean_flat, +) +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.core.classes.common import Serialization +from nemo.utils import logging + +try: + from apex import amp + from apex.transformer.enums import AttnMaskType + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +__conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} + + +def random_dropout(embeddings, drop_rate): + r""" + Function to perform random dropout for embeddings. + When we drop embeddings, we zero them out. + Args: + embeddings (tensor): Input embeddings + drop_rate (float): Rate of dropping the embedding. + """ + nsamples = embeddings.shape[0] + zero_flag = torch.ones(nsamples, 1, 1, device=torch.cuda.current_device()).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.bernoulli(zero_flag).cuda(non_blocking=True) + embeddings = embeddings * zero_flag + return embeddings + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def uniform_on_device(r1, r2, shape, device): + return (r1 - r2) * torch.rand(*shape, device=device) + r2 + + +class DDPM(torch.nn.Module): + def __init__(self, cfg): + super().__init__() + assert cfg.parameterization in ["eps", "x0", "v"], 'currently only supporting "eps" and "x0" and "v"' + self.parameterization = cfg.parameterization + logging.info(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") + self.cond_stage_model = None + self.clip_denoised = cfg.clip_denoised + self.log_every_t = cfg.log_every_t + self.first_stage_key = cfg.first_stage_key + self.image_size = cfg.image_size # try conv? + self.channels = cfg.channels + self.channels_last = cfg.get("channels_last", False) + self.use_positional_encodings = cfg.use_positional_encodings + self.model = DiffusionWrapper( + cfg.unet_config, + cfg.conditioning_key, + cfg.inductor, + cfg.inductor_cudagraphs, + cfg.get("capture_cudagraph_iters", -1), + ) + self.model_type = None + count_params(self.model, verbose=True) + + self.v_posterior = cfg.v_posterior + self.original_elbo_weight = cfg.original_elbo_weight + self.l_simple_weight = cfg.l_simple_weight + + self.register_schedule( + given_betas=cfg.given_betas, + beta_schedule=cfg.beta_schedule, + timesteps=cfg.timesteps, + linear_start=cfg.linear_start, + linear_end=cfg.linear_end, + cosine_s=cfg.cosine_s, + ) + + self.loss_type = cfg.loss_type + + self.learn_logvar = cfg.learn_logvar + self.logvar = torch.full(fill_value=cfg.logvar_init, size=(self.num_timesteps,)) + if self.learn_logvar: + self.logvar = nn.Parameter(self.logvar, requires_grad=True) + + self.rng = torch.Generator(device=torch.cuda.current_device(),) + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule( + beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s + ) + alphas = 1.0 - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) + + (timesteps,) = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1.0 - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1.0 / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1.0 - alphas_cumprod_prev) / ( + 1.0 - alphas_cumprod + ) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer( + 'posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)) + ) + self.register_buffer( + 'posterior_mean_coef2', to_torch((1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod)) + ) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) + ) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) + elif self.parameterization == "v": + lvlb_weights = torch.ones_like( + self.betas ** 2 / (2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + ) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): + pl_sd = torch.load(path, map_location="cpu") + if "state_dict" in list(pl_sd.keys()): + pl_sd = pl_sd["state_dict"] + + sd = {} + first_key = list(pl_sd.keys())[0] + # State keys of model trained with TorchDynamo changed from + # "model.xxx" to "model._orig_mod.xxx" + for k, v in pl_sd.items(): + new_k = k.replace("._orig_mod", "") + # compatibility for stable diffusion old checkpoint + # remove megatron wrapper prefix + if first_key == "model.betas": + new_k = new_k.lstrip("model.") + sd[new_k] = v + + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys: + if k.startswith(ik): + logging.info("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = ( + self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(sd, strict=False) + ) + logging.info(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if len(missing) > 0: + logging.info(f"Missing Keys: {missing}") + if len(unexpected) > 0: + logging.info(f"Unexpected Keys: {unexpected}") + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def predict_start_from_z_and_v(self, x_t, t, v): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v + ) + + def predict_eps_from_z_and_v(self, x_t, t, v): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool): + model_out = self.model(x, t) + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + if clip_denoised: + x_recon.clamp_(-1.0, 1.0) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_loop(self, shape, return_intermediates=False): + device = self.betas.device + b = shape[0] + img = torch.randn(shape, generator=self.rng, device=device) + intermediates = [img] + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): + img = self.p_sample( + img, torch.full((b,), i, device=device, dtype=torch.long), clip_denoised=self.clip_denoised + ) + if i % self.log_every_t == 0 or i == self.num_timesteps - 1: + intermediates.append(img) + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, batch_size=16, return_intermediates=False): + image_size = self.image_size + channels = self.channels + return self.p_sample_loop( + (batch_size, channels, image_size, image_size), return_intermediates=return_intermediates + ) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) + + def get_v(self, x, noise, t): + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise + - extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x + ) + + def get_loss(self, pred, target, mean=True): + if self.loss_type == 'l1': + loss = (target - pred).abs() + if mean: + loss = loss.mean() + elif self.loss_type == 'l2': + if mean: + loss = torch.nn.functional.mse_loss(target, pred) + else: + loss = torch.nn.functional.mse_loss(target, pred, reduction='none') + else: + raise NotImplementedError("unknown loss type '{loss_type}'") + + return loss + + def p_losses(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_out = self.model(x_noisy, t) + + loss_dict = {} + if self.parameterization == "eps": + target = noise + elif self.parameterization == "x0": + target = x_start + elif self.parameterization == "v": + target = self.get_v(x_start, noise, t) + else: + raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") + + loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) + + log_prefix = 'train' if self.training else 'val' + + loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) + loss_simple = loss.mean() * self.l_simple_weight + + loss_vlb = (self.lvlb_weights[t] * loss).mean() + loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) + + loss = loss_simple + self.original_elbo_weight * loss_vlb + + loss_dict.update({f'{log_prefix}/loss': loss}) + + return loss, loss_dict + + def forward(self, x, *args, **kwargs): + # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size + # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' + t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() + return self.p_losses(x, t, *args, **kwargs) + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + if self.channels_last: + x = x.permute(0, 3, 1, 2).to(non_blocking=True) + else: + x = rearrange(x, "b h w c -> b c h w") + x = x.to(memory_format=torch.contiguous_format, non_blocking=True) + return x + + def shared_step(self, batch): + x = self.get_input(batch, self.first_stage_key) + loss, loss_dict = self(x) + return loss, loss_dict + + def _get_rows_from_list(self, samples): + n_imgs_per_row = len(samples) + denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): + log = dict() + x = self.get_input(batch, self.first_stage_key) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + x = x[:N] + log["inputs"] = x + + # get diffusion row + diffusion_row = list() + x_start = x[:n_row] + + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.long() + noise = randn_like(x_start, generator=self.rng) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + diffusion_row.append(x_noisy) + + log["diffusion_row"] = self._get_rows_from_list(diffusion_row) + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) + + log["samples"] = samples + log["denoise_row"] = self._get_rows_from_list(denoise_row) + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + +class LatentDiffusion(DDPM, Serialization): + """main class""" + + def __init__(self, cfg, model_parallel_config): + self.config = model_parallel_config + self.num_timesteps_cond = default(cfg.num_timesteps_cond, 1) + self.scale_by_std = cfg.scale_by_std + assert self.num_timesteps_cond <= cfg.timesteps + # for backwards compatibility after implementation of DiffusionWrapper + if cfg.conditioning_key is None: + conditioning_key = 'concat' if cfg.concat_mode else 'crossattn' + else: + conditioning_key = cfg.conditioning_key + if cfg.cond_stage_config == '__is_unconditional__': + conditioning_key = None + ckpt_path = cfg.ckpt_path + ignore_keys = cfg.ignore_keys + cfg.conditioning_key = conditioning_key + super().__init__(cfg=cfg) + self.precision = cfg.precision + self.concat_mode = cfg.concat_mode + self.cond_stage_trainable = cfg.cond_stage_trainable + self.cond_stage_key = cfg.cond_stage_key + + self.num_downs = 0 + if "ddconfig" in cfg.first_stage_config and "ch_mult" in cfg.first_stage_config.ddconfig: + self.num_downs = len(cfg.first_stage_config.ddconfig.ch_mult) - 1 + if not cfg.scale_by_std: + self.scale_factor = cfg.scale_factor + else: + self.register_buffer('scale_factor', torch.tensor(cfg.scale_factor)) + self.instantiate_first_stage(cfg.first_stage_config) + self.instantiate_cond_stage(cfg.cond_stage_config) + self.cond_stage_forward = cfg.cond_stage_forward + self.clip_denoised = False + self.bbox_tokenizer = None + self.text_embedding_dropout_rate = cfg.text_embedding_dropout_rate + self.fused_opt = cfg.fused_opt + + self.restarted_from_ckpt = False + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys) + self.restarted_from_ckpt = True + + if self.channels_last: + self.first_stage_model = self.first_stage_model.to(memory_format=torch.channels_last) + self.model = self.model.to(memory_format=torch.channels_last) + + def make_cond_schedule(self,): + self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) + ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() + self.cond_ids[: self.num_timesteps_cond] = ids + + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + # only for very first batch + # set rescale weight to 1./std of encodings + logging.info("### USING STD-RESCALING ###") + x = super().get_input(batch, self.first_stage_key) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + del self.scale_factor + self.register_buffer('scale_factor', 1.0 / z.flatten().std()) + logging.info(f"setting self.scale_factor to {self.scale_factor}") + logging.info("### USING STD-RESCALING ###") + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): + super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) + + self.shorten_cond_schedule = self.num_timesteps_cond > 1 + if self.shorten_cond_schedule: + self.make_cond_schedule() + + def instantiate_first_stage(self, config): + model = LatentDiffusion.from_config_dict(config) + self.first_stage_model = model.eval() + self.first_stage_model.train = disabled_train + for param in self.first_stage_model.parameters(): + param.requires_grad = False + + def instantiate_cond_stage(self, config): + if not self.cond_stage_trainable: + if config == "__is_first_stage__": + logging.info("Using first stage also as cond stage.") + self.cond_stage_model = self.first_stage_model + elif config == "__is_unconditional__": + logging.info(f"Training {self.__class__.__name__} as an unconditional model.") + self.cond_stage_model = None + # self.be_unconditional = True + else: + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model.eval() + self.cond_stage_model.train = disabled_train + for param in self.cond_stage_model.parameters(): + param.requires_grad = False + else: + assert config != '__is_first_stage__' + assert config != '__is_unconditional__' + model = LatentDiffusion.from_config_dict(config) + self.cond_stage_model = model + + def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): + denoise_row = [] + for zd in tqdm(samples, desc=desc): + denoise_row.append(self.decode_first_stage(zd, force_not_quantize=force_no_decoder_quantization)) + n_imgs_per_row = len(denoise_row) + denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W + denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") + return self.scale_factor * z + + def get_learned_conditioning(self, c): + if self.cond_stage_forward is None: + if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): + c = self.cond_stage_model.encode(c) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + else: + c = self.cond_stage_model(c) + else: + assert hasattr(self.cond_stage_model, self.cond_stage_forward) + c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) + return c + + def meshgrid(self, h, w): + y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) + x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) + + arr = torch.cat([y, x], dim=-1) + return arr + + def delta_border(self, h, w): + """ + :param h: height + :param w: width + :return: normalized distance to image border, + wtith min distance = 0 at border and max dist = 0.5 at image center + """ + lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) + arr = self.meshgrid(h, w) / lower_right_corner + dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] + dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] + edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] + return edge_dist + + def get_weighting(self, h, w, Ly, Lx, device): + weighting = self.delta_border(h, w) + weighting = torch.clip( + weighting, self.split_input_params["clip_min_weight"], self.split_input_params["clip_max_weight"], + ) + weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) + + if self.split_input_params["tie_braker"]: + L_weighting = self.delta_border(Ly, Lx) + L_weighting = torch.clip( + L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"], + ) + + L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) + weighting = weighting * L_weighting + return weighting + + def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code + """ + :param x: img of size (bs, c, h, w) + :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) + """ + bs, nc, h, w = x.shape + + # number of crops in image + Ly = (h - kernel_size[0]) // stride[0] + 1 + Lx = (w - kernel_size[1]) // stride[1] + 1 + + if uf == 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) + + weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) + + elif uf > 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict( + kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, + padding=0, + stride=(stride[0] * uf, stride[1] * uf), + ) + fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) + + elif df > 1 and uf == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict( + kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, + padding=0, + stride=(stride[0] // df, stride[1] // df), + ) + fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) + + else: + raise NotImplementedError + + return fold, unfold, normalization, weighting + + @torch.no_grad() + def get_input( + self, + batch, + k, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + ): + if self.first_stage_key.endswith('encoded'): + gaussian_parameters = batch[self.first_stage_key] + encoder_posterior = DiagonalGaussianDistribution(gaussian_parameters) + else: + x = super().get_input(batch, k) + if bs is not None: + x = x[:bs] + + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + + if self.model.conditioning_key is not None: + if cond_key is None: + cond_key = self.cond_stage_key + if cond_key != self.first_stage_key: + if cond_key in ['captions', 'coordinates_bbox', 'txt'] or cond_key.endswith("encoded"): + xc = batch[cond_key] + elif cond_key == 'class_label': + xc = batch + else: + xc = super().get_input(batch, cond_key) + else: + xc = x + if (not self.cond_stage_trainable or force_c_encode) and (not cond_key.endswith('encoded')): + if isinstance(xc, dict) or isinstance(xc, list): + # import pudb; pudb.set_trace() + c = self.get_learned_conditioning(xc) + else: + c = self.get_learned_conditioning(xc) + else: + c = xc + if bs is not None: + c = c[:bs] + + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + ckey = __conditioning_keys__[self.model.conditioning_key] + c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} + + if self.text_embedding_dropout_rate > 0: + assert self.text_embedding_dropout_rate < 1.0 + c = random_dropout(c, drop_rate=self.text_embedding_dropout_rate) + + else: + c = None + xc = None + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + c = {'pos_x': pos_x, 'pos_y': pos_y} + out = [z, c] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1.0 / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + # same as above but without decorator + def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1.0 / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [ + self.first_stage_model.decode( + z[:, :, :, :, i], force_not_quantize=predict_cids or force_not_quantize + ) + for i in range(z.shape[-1]) + ] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + @torch.no_grad() + def encode_first_stage(self, x): + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + df = self.split_input_params["vqf"] + self.split_input_params['original_image_size'] = x.shape[-2:] + bs, nc, h, w = x.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + logging.info("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + logging.info("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) + z = unfold(x) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) + o = o * weighting + + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization + return decoded + + else: + return self.first_stage_model.encode(x) + else: + return self.first_stage_model.encode(x) + + def shared_step(self, batch, **kwargs): + x, c = self.get_input(batch, self.first_stage_key) + loss = self(x, c) + return loss + + def forward(self, x, c, *args, **kwargs): + t = torch.randint(0, self.num_timesteps, (x.shape[0],), generator=self.rng, device=x.device).long() + if self.model.conditioning_key is not None: + assert c is not None + if self.cond_stage_trainable: + c = self.get_learned_conditioning(c) + if self.shorten_cond_schedule: # TODO: drop this option + tc = self.cond_ids[t] + c = self.q_sample(x_start=c, t=tc, noise=randn_like(c.float(), generator=self.rng)) + return self.p_losses(x, c, t, *args, **kwargs) + + def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset + def rescale_bbox(bbox): + x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2]) + y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3]) + w = min(bbox[2] / crop_coordinates[2], 1 - x0) + h = min(bbox[3] / crop_coordinates[3], 1 - y0) + return x0, y0, w, h + + return [rescale_bbox(b) for b in bboxes] + + def apply_model(self, x_noisy, t, cond, return_ids=False): + + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + for key in cond: + if not isinstance(cond[key], list): + cond[key] = [cond[key]] + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + + if hasattr(self, "split_input_params"): + assert len(cond) == 1 # todo can only deal with one conditioning atm + assert not return_ids + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + + h, w = x_noisy.shape[-2:] + + fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) + + z = unfold(x_noisy) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] + + if ( + self.cond_stage_key in ["image", "LR_image", "segmentation", 'bbox_img'] + and self.model.conditioning_key + ): # todo check for completeness + c_key = next(iter(cond.keys())) # get key + c = next(iter(cond.values())) # get value + assert len(c) == 1 # todo extend to list with more than one elem + c = c[0] # get element + + c = unfold(c) + c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] + + elif self.cond_stage_key == 'coordinates_bbox': + assert ( + 'original_image_size' in self.split_input_params + ), 'BoudingBoxRescaling is missing original_image_size' + + # assuming padding of unfold is always 0 and its dilation is always 1 + n_patches_per_row = int((w - ks[0]) / stride[0] + 1) + full_img_h, full_img_w = self.split_input_params['original_image_size'] + # as we are operating on latents, we need the factor from the original image size to the + # spatial latent size to properly rescale the crops for regenerating the bbox annotations + num_downs = self.first_stage_model.encoder.num_resolutions - 1 + rescale_latent = 2 ** (num_downs) + + # get top left postions of patches as conforming for the bbbox tokenizer, therefore we + # need to rescale the tl patch coordinates to be in between (0,1) + tl_patch_coordinates = [ + ( + rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h, + ) + for patch_nr in range(z.shape[-1]) + ] + + # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) + patch_limits = [ + (x_tl, y_tl, rescale_latent * ks[0] / full_img_w, rescale_latent * ks[1] / full_img_h) + for x_tl, y_tl in tl_patch_coordinates + ] + # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] + + # tokenize crop coordinates for the bounding boxes of the respective patches + patch_limits_tknzd = [ + torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None] for bbox in patch_limits + ] # list of length l with tensors of shape (1, 2) + logging.info(patch_limits_tknzd[0].shape) + # cut tknzd crop position from conditioning + assert isinstance(cond, dict), 'cond must be dict to be fed into model' + cut_cond = cond['c_crossattn'][0][..., :-2] + logging.info(cut_cond.shape) + + adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) + adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') + logging.info(adapted_cond.shape) + adapted_cond = self.get_learned_conditioning(adapted_cond) + logging.info(adapted_cond.shape) + adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) + logging.info(adapted_cond.shape) + + cond_list = [{'c_crossattn': [e]} for e in adapted_cond] + + else: + cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient + + # apply model by loop over crops + output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] + assert not isinstance( + output_list[0], tuple + ) # todo cant deal with multiple model outputs check this never happens + + o = torch.stack(output_list, axis=-1) + o = o * weighting + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + x_recon = fold(o) / normalization + + else: + x_recon = self.model(x_noisy, t, **cond) + + if isinstance(x_recon, tuple) and not return_ids: + return x_recon[0] + else: + return x_recon + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart + ) / extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + This term can't be optimized, as it only depends on the encoder. + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def p_losses(self, x_start, cond, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_output = self.apply_model(x_noisy, t, cond) + + loss_dict = {} + prefix = 'train' if self.training else 'val' + + if self.parameterization == "x0": + target = x_start + elif self.parameterization == "eps": + target = noise + elif self.parameterization == "v": + target = self.get_v(x_start, noise, t) + else: + raise NotImplementedError() + + if (self.precision in ['bf16', 'bf16-mixed']) or (self.precision in [16, '16', '16-mixed']): + model_output = model_output.type(torch.float32) + loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) + loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) + self.logvar = self.logvar.cuda(non_blocking=True) + logvar_t = self.logvar[t].cuda(non_blocking=True) + loss = loss_simple / torch.exp(logvar_t) + logvar_t + # loss = loss_simple / torch.exp(self.logvar) + self.logvar + if self.learn_logvar: + loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) + loss_dict.update({'logvar': self.logvar.data.mean()}) + + loss = self.l_simple_weight * loss.mean() + + loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) + loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() + loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) + loss += self.original_elbo_weight * loss_vlb + loss_dict.update({f'{prefix}/loss': loss}) + + return loss, loss_dict + + def p_mean_variance( + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, + ): + t_in = t + model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) + + if score_corrector is not None: + assert self.parameterization == "eps" + model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) + + if return_codebook_ids: + model_out, logits = model_out + + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + else: + raise NotImplementedError() + + if clip_denoised: + x_recon.clamp_(-1.0, 1.0) + if quantize_denoised: + x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + if return_codebook_ids: + return model_mean, posterior_variance, posterior_log_variance, logits + elif return_x0: + return model_mean, posterior_variance, posterior_log_variance, x_recon + else: + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample( + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + ): + b, *_, device = *x.shape, x.device + outputs = self.p_mean_variance( + x=x, + c=c, + t=t, + clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) + if return_codebook_ids: + raise DeprecationWarning("Support dropped.") + model_mean, _, model_log_variance, logits = outputs + elif return_x0: + model_mean, _, model_log_variance, x0 = outputs + else: + model_mean, _, model_log_variance = outputs + + noise = noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.0: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + + if return_codebook_ids: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) + if return_x0: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 + else: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def progressive_denoising( + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, + ): + if not log_every_t: + log_every_t = self.log_every_t + timesteps = self.num_timesteps + if batch_size is not None: + b = batch_size if batch_size is not None else shape[0] + shape = [batch_size] + list(shape) + else: + b = batch_size = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.rng, device=torch.cuda.current_device()) + else: + img = x_T + intermediates = [] + if cond is not None: + if isinstance(cond, dict): + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) + if type(temperature) == float: + temperature = [temperature] * timesteps + + for i in iterator: + ts = torch.full((b,), i, device=torch.cuda.current_device(), dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) + + img, x0_partial = self.p_sample( + img, + cond, + ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, + return_x0=True, + temperature=temperature[i], + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) + if mask is not None: + assert x0 is not None + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1.0 - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(x0_partial) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + return img, intermediates + + @torch.no_grad() + def p_sample_loop( + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, + ): + + if not log_every_t: + log_every_t = self.log_every_t + device = self.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.rng, device=device) + else: + img = x_T + + intermediates = [img] + if timesteps is None: + timesteps = self.num_timesteps + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) + + if mask is not None: + assert x0 is not None + assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match + + for i in iterator: + ts = torch.full((b,), i, device=device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=randn_like(cond, generator=self.rng)) + + img = self.p_sample(img, cond, ts, clip_denoised=self.clip_denoised, quantize_denoised=quantize_denoised) + if mask is not None: + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1.0 - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(img) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample( + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, + ): + if shape is None: + shape = (batch_size, self.channels, self.image_size, self.image_size) + if cond is not None: + if isinstance(cond, dict): + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + return self.p_sample_loop( + cond, + shape, + return_intermediates=return_intermediates, + x_T=x_T, + verbose=verbose, + timesteps=timesteps, + quantize_denoised=quantize_denoised, + mask=mask, + x0=x0, + ) + + @torch.no_grad() + def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): + + if ddim: + ddim_sampler = DDIMSampler(self) + shape = (self.channels, self.image_size, self.image_size) + samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs) + + else: + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, return_intermediates=True, **kwargs) + + return samples, intermediates + + @torch.no_grad() + def log_images( + self, + batch, + N=8, + n_row=4, + sample=True, + ddim_steps=200, + ddim_eta=1.0, + return_keys=None, + quantize_denoised=True, + inpaint=True, + plot_denoise_rows=False, + plot_progressive_rows=True, + plot_diffusion_rows=True, + **kwargs, + ): + + use_ddim = ddim_steps is not None + + log = dict() + z, c, x, xrec, xc = self.get_input( + batch, + self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N, + ) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + log["inputs"] = x + log["reconstruction"] = xrec + if self.model.conditioning_key is not None: + if hasattr(self.cond_stage_model, "decode"): + xc = self.cond_stage_model.decode(c) + log["conditioning"] = xc + elif self.cond_stage_key in ["caption"]: + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) + log["conditioning"] = xc + elif self.cond_stage_key == 'class_label': + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) + log['conditioning'] = xc + elif isimage(xc): + log["conditioning"] = xc + if ismap(xc): + log["original_conditioning"] = self.to_rgb(xc) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = list() + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.long() + noise = randn_like(z_start, generator=self.rng) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, z_denoise_row = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, ddim_steps=ddim_steps, eta=ddim_eta + ) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if ( + quantize_denoised + and not isinstance(self.first_stage_model, AutoencoderKL) + and not isinstance(self.first_stage_model, IdentityFirstStage) + ): + # also display when quantizing x0 while sampling + with self.ema_scope("Plotting Quantized Denoised"): + samples, z_denoise_row = self.sample_log( + cond=c, + batch_size=N, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + quantize_denoised=True, + ) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, + # quantize_denoised=True) + x_samples = self.decode_first_stage(samples) + log["samples_x0_quantized"] = x_samples + + if inpaint: + # make a simple center square + b, h, w = z.shape[0], z.shape[2], z.shape[3] + mask = torch.ones(N, h, w) + # zeros will be filled in + mask[:, h // 4 : 3 * h // 4, w // 4 : 3 * w // 4] = 0.0 + mask = mask[:, None, ...] + with self.ema_scope("Plotting Inpaint"): + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) + x_samples = self.decode_first_stage(samples) + log["samples_inpainting"] = x_samples + log["mask"] = mask + + # outpaint + with self.ema_scope("Plotting Outpaint"): + samples, _ = self.sample_log( + cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta, ddim_steps=ddim_steps, x0=z[:N], mask=mask + ) + x_samples = self.decode_first_stage(samples) + log["samples_outpainting"] = x_samples + + if plot_progressive_rows: + with self.ema_scope("Plotting Progressives"): + img, progressives = self.progressive_denoising( + c, shape=(self.channels, self.image_size, self.image_size), batch_size=N + ) + prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") + log["progressive_row"] = prog_row + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def parameters(self): + params = list(self.model.parameters()) + if self.cond_stage_trainable: + logging.info(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.cond_stage_model.parameters()) + if self.learn_logvar: + logging.info('Diffusion model optimizing logvar') + params.append(self.logvar) + return params + + @torch.no_grad() + def to_rgb(self, x): + x = x.float() + if not hasattr(self, "colorize"): + self.colorize = torch.randn(3, x.shape[1], 1, 1, generator=self.rng).to(x) + x = nn.functional.conv2d(x, weight=self.colorize) + x = 2.0 * (x - x.min()) / (x.max() - x.min()) - 1.0 + return x + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + # only required for pipeline parallelism + pass + + +class MegatronLatentDiffusion(MegatronBaseModel): + """Megatron LatentDiffusion Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + # this prevents base constructor from initializing tokenizer + self.tokenizer = None + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # megatron_amp_O2 is not yet supported in diffusion models + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + self.model = self.model_provider_func() + + self.conditioning_keys = [] + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process=True, post_process=True): + """Model depends on pipeline paralellism.""" + model = LatentDiffusion(cfg=self.cfg, model_parallel_config=self.model_parallel_config) + return model + + def forward(self, x, c, *args, **kwargs): + output_tensor = self.model(x, c, *args, **kwargs) + return output_tensor + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx=0): + if self.cfg.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0: + assert self.cfg.scale_factor == 1.0, 'rather not use custom rescaling and std-rescaling simultaneously' + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) + self.model.on_train_batch_start(batch, batch_idx) + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + tensor_shape = None # Placeholder + + # handle asynchronous grad reduction + no_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=self.model, + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=None, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # losses_reduced_per_micro_batch is a list of dictionaries + # [{"loss": 0.1}, {"loss": 0.2}, ...] which are from gradient accumulation steps + # only the last stages of the pipeline return losses + loss_dict = {} + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + for key in losses_reduced_per_micro_batch[0]: + loss_tensors_list = [loss_reduced[key] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_dict[key] = loss_tensor.mean() + loss_mean = loss_dict["val/loss"] if forward_only else loss_dict["train/loss"] + else: + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + if forward_only: + loss_mean = [] + else: + loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + + return loss_mean, loss_dict + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + torch.distributed.broadcast(loss_mean, get_last_rank()) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # gradients are reduced internally in distributed optimizer + pass + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + elif not self.cfg.get('ddp_overlap', True): + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log_dict(loss_dict, prog_bar=False, logger=True, on_step=True, rank_zero_only=True, batch_size=1) + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def get_forward_output_and_loss_func(self): + def process_batch(batch): + """ Prepares the global batch for apex fwd/bwd functions. + Global batch is a list of micro batches. + """ + # noise_map, condition + batch[self.cfg.first_stage_key] = batch[self.cfg.first_stage_key].cuda(non_blocking=True) + if isinstance(batch[self.cfg.cond_stage_key], torch.Tensor): + # in the case of precached text embeddings, cond_stage is also a tensor + batch[self.cfg.cond_stage_key] = batch[self.cfg.cond_stage_key].cuda(non_blocking=True) + + # SD has more dedicated structure for encoding, so we enable autocasting here as well + with torch.cuda.amp.autocast( + self.autocast_dtype in (torch.half, torch.bfloat16), dtype=self.autocast_dtype, + ): + x, c = self.model.get_input(batch, self.cfg.first_stage_key) + + if not isinstance(c, dict): + return [x, c] + + if len(self.conditioning_keys) == 0: + self.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in self.conditioning_keys] + return [x, *c_list] + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + batch = process_batch(batch) + batch = [x.cuda(non_blocking=True) for x in batch] + if len(self.conditioning_keys) == 0: + x, c = batch + else: + x = batch[0] + c = {} + for idx, key in enumerate(self.conditioning_keys): + c[key] = batch[1 + idx] + loss, loss_dict = model(x, c) + + def dummy(output_tensor): + return loss, loss_dict + + # output_tensor, and a function to convert output_tensor to loss + loss_dict + return loss, dummy + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + loss, val_loss_dict = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + + self.log_dict(val_loss_dict, prog_bar=False, logger=True, on_step=False, on_epoch=True, batch_size=1) + + return loss + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + self.model.rng.manual_seed(self.cfg.seed + 100 * parallel_state.get_data_parallel_rank()) + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda(non_blocking=True) + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + + # Batch size need to be provided for webdatset + self._num_micro_batches = get_num_microbatches() + self._micro_batch_size = self.cfg.micro_batch_size + + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for Stable Diffusion...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + if self.cfg.first_stage_key.endswith("encoded"): + self._train_ds, self._validation_ds = build_train_valid_precached_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0), + ) + else: + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, consumed_samples=self.compute_consumed_samples(0) + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for LatentDiffusion.') + return self._train_ds, self._validation_ds, self._test_ds + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = torch.utils.data.DataLoader( + self._train_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=True, + persistent_workers=True, + ) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + self._validation_dl = torch.utils.data.DataLoader( + self._validation_ds, + batch_size=self._micro_batch_size, + num_workers=cfg.num_workers, + pin_memory=True, + drop_last=False, + persistent_workers=True, + ) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = torch.utils.data.DataLoader( + self._test_ds, batch_size=self._micro_batch_size, num_workers=cfg.num_workers, pin_memory=True, + ) + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls): + return None + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() + + def save_to(self, save_path: str): + # Replace .nemo path in config for NeMo CLIP + cfg = self._cfg + if cfg.get('cond_stage_config').get('restore_from_path'): + with open_dict(cfg): + cfg.cond_stage_config.restore_from_path = None + cfg.cond_stage_config.cfg = self.model.cond_stage_model.cfg + self._cfg = cfg + super().save_to(save_path) + + @classmethod + def load_from_checkpoint( + cls, + checkpoint_path: str, + map_location: Any = None, + hparams_file: Optional[str] = None, + strict: bool = True, + **kwargs, + ): + """ + Loads ModelPT from checkpoint, with some maintenance of restoration. + For documentation, please refer to LightningModule.load_from_checkpoin() documentation. + """ + checkpoint = None + try: + cls._set_model_restore_state(is_being_restored=True) + # TODO: replace with proper PTL API + with pl_legacy_patch(): + if map_location is not None: + checkpoint = pl_load(checkpoint_path, map_location=map_location) + else: + checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) + + if hparams_file is not None: + extension = hparams_file.split(".")[-1] + if extension.lower() == "csv": + hparams = load_hparams_from_tags_csv(hparams_file) + elif extension.lower() in ("yml", "yaml"): + hparams = load_hparams_from_yaml(hparams_file) + else: + raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") + + hparams["on_gpu"] = False + + # overwrite hparams by the given file + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams + + # for past checkpoint need to add the new key + if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: + checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} + # override the hparams with values that were passed in + cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) + # TODO: can we do this without overriding? + config_kwargs = kwargs.copy() + if 'trainer' in config_kwargs: + config_kwargs.pop('trainer') + cfg.update(config_kwargs) + + # Disable individual unet/vae weights loading otherwise the model will look for these partial ckpts and raise error + if cfg: + if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): + cfg.unet_config.from_pretrained = None + if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): + cfg.first_stage_config.from_pretrained = None + ## Now when we covert ckpt to nemo, let's always get rid of those _orig_mod + if cfg.get('inductor'): + cfg.inductor = False + ## Append some dummy configs that DB didn't support + if not cfg.get('channels_last'): + cfg.channels_last = True + if not cfg.get('capture_cudagraph_iters'): + cfg.capture_cudagraph_iters = -1 + + # compatibility for stable diffusion old checkpoint tweaks + first_key = list(checkpoint['state_dict'].keys())[0] + if first_key == "betas": + # insert "model." into for megatron wrapper + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = "model." + key + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + elif ( + first_key == 'model.text_encoder.transformer.text_model.embeddings.position_ids' + or first_key == 'model.text_encoder.model.language_model.embedding.position_embeddings' + ): + # remap state keys from dreambooth when using HF clip + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', "") + new_key = new_key.replace('unet', 'model.diffusion_model') + new_key = new_key.replace('vae', 'first_stage_model') + new_key = new_key.replace('text_encoder', 'cond_stage_model') + new_key = new_key.replace('.noise_scheduler', '') + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + # compatibility for inductor in inference + if not cfg.get('inductor', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('._orig_mod', '', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if cfg.get('megatron_amp_O2', False): + new_state_dict = {} + for key in checkpoint['state_dict'].keys(): + new_key = key.replace('model.', 'model.module.', 1) + new_state_dict[new_key] = checkpoint['state_dict'][key] + checkpoint['state_dict'] = new_state_dict + + if 'cfg' in kwargs: + model = ptl_load_state(cls, checkpoint, strict=strict, **kwargs) + else: + model = ptl_load_state(cls, checkpoint, strict=strict, cfg=cfg, **kwargs) + # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg + + checkpoint = model + + finally: + cls._set_model_restore_state(is_being_restored=False) + return checkpoint + + +class DiffusionWrapper(pl.LightningModule, Serialization): + def __init__( + self, + diff_model_config, + conditioning_key, + inductor: bool = False, + inductor_cudagraphs: bool = False, + capture_cudagraph_iters: int = -1, + ): + super().__init__() + self.diffusion_model = DiffusionWrapper.from_config_dict(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + # Fusing VAE and CLIP doesn't give benefit + if inductor: + # TorchInductor with CUDA graph can lead to OOM + torch._dynamo.config.dynamic_shapes = False + torch._dynamo.config.automatic_dynamic_shapes = False + inductor_config.triton.cudagraphs = inductor_cudagraphs + self.diffusion_model = torch.compile(self.diffusion_model) + # CUDA graph + self.capture_cudagraph_iters = capture_cudagraph_iters + self.iterations = 0 + self.graphed_diffusion_model = None + + def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + if self.iterations == self.capture_cudagraph_iters: + logging.info("Capturing CUDA graph for module: %s", self.diffusion_model.__class__.__name__) + self.graphed_diffusion_model = torch.cuda.make_graphed_callables(self.diffusion_model, (x, t, cc)) + + if 0 <= self.capture_cudagraph_iters <= self.iterations: + out = self.graphed_diffusion_model(x, t, cc) + else: + out = self.diffusion_model(x, t, context=cc) + self.iterations += 1 + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out diff --git a/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py new file mode 100644 index 000000000000..2f2acb40ed43 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/ldm_config.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field +from typing import Any, List, Optional + +from nemo.core.config import modelPT as model_cfg + + +@dataclass +class LDMUnetConfig: + cls: Optional[str] = 'nemo.collections.multimodal.modules.diffusionmodules.openaimodel.UNetModel' + image_size: Optional[int] = 32 # unused + in_channels: Optional[int] = 4 + out_channels: Optional[int] = 4 + model_channels: Optional[int] = 320 + attention_resolutions: Optional[List[int]] = field(default_factory=lambda: [4, 2, 1]) + num_res_blocks: Optional[int] = 2 + channel_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) + num_heads: Optional[int] = 8 + use_spatial_transformer: Optional[bool] = True + transformer_depth: Optional[int] = 1 + context_dim: Optional[int] = 768 + use_checkpoint: Optional[bool] = True + legacy: Optional[bool] = False + use_flash_attention: Optional[bool] = False + + +@dataclass +class SchedulerConfig: + cls: Optional[str] = 'nemo.collections.multimodal.parts.lr_scheduler.LambdaLinearScheduler' + warm_up_steps: Optional[List[int]] = field(default_factory=lambda: [10000]) + cycle_lengths: Optional[List[int]] = field( + default_factory=lambda: [10000000000000] + ) # incredibly large number to prevent corner cases + f_start: Optional[List[float]] = field(default_factory=lambda: [1.0e-6]) + f_max: Optional[List[float]] = field(default_factory=lambda: [1.0]) + f_min: Optional[List[float]] = field(default_factory=lambda: [1.0]) + + +@dataclass +class CLIPEmbedderConfig: + cls: Optional[str] = 'nemo.collections.multimodal.modules.encoders.modules.FrozenCLIPEmbedder' + version: Optional[str] = 'openai/clip-vit-large-patch14' + device: Optional[str] = 'cuda' + max_length: Optional[int] = 77 + + +@dataclass +class LDMEncoderConfig: + double_z: Optional[bool] = True + z_channels: Optional[int] = 4 + resolution: Optional[int] = 256 + in_channels: Optional[int] = 3 + out_ch: Optional[int] = 3 + ch: Optional[int] = 128 + ch_mult: Optional[List[int]] = field(default_factory=lambda: [1, 2, 4, 4]) + num_res_blocks: Optional[int] = 2 + attn_resolutions: Optional[List[int]] = field(default_factory=lambda: []) + dropout: Optional[float] = 0.0 + + +@dataclass +class LDMFirstStageConfig: # Autoencoder + cls: Optional[str] = 'nemo.collections.multimodal.models.ldm.autoencoder.AutoencoderKL' + embed_dim: Optional[int] = 4 + monitor: Optional[str] = 'val/rec_loss' + ddconfig: Optional[LDMEncoderConfig] = LDMEncoderConfig() + + +@dataclass +class DDPMDiffusionModelConfig(model_cfg.ModelConfig): + unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() + timesteps: Optional[int] = 1000 + beta_schedule: Optional[str] = 'linear' + loss_type: Optional[str] = 'l2' + ckpt_path: Optional[str] = None + ignore_keys: Optional[List[str]] = field(default_factory=list) + load_only_unet: Optional[bool] = False + monitor: Optional[str] = 'val/loss' + use_ema: Optional[bool] = True + first_stage_key: Optional[str] = 'image' + image_size: Optional[int] = 256 + channels: Optional[int] = 3 + log_every_t: Optional[int] = 100 + clip_denoised: Optional[bool] = True + linear_start: Optional[float] = 1e-4 + linear_end: Optional[float] = 2e-2 + cosine_s: Optional[float] = 8e-3 + given_betas: Optional[float] = None + original_elbo_weight: Optional[float] = 0.0 + v_posterior: Optional[ + float + ] = 0.0 # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta + l_simple_weight: Optional[float] = 1.0 + conditioning_key: Optional[str] = None + parameterization: Optional[str] = 'eps' # all assuming fixed variance schedules + scheduler_config: Optional[Any] = None + use_positional_encodings: Optional[bool] = False + learn_logvar: Optional[bool] = False + logvar_init: Optional[float] = 0.0 + learning_rate: Optional[float] = 1.0e-04 + + +@dataclass +class LatentDiffusionModelConfig(DDPMDiffusionModelConfig): + # Overrite Default values + linear_start: Optional[float] = 0.00085 + linear_end: Optional[float] = 0.0120 + num_timesteps_cond: Optional[int] = 1 + log_every_t: Optional[int] = 200 + timesteps: Optional[int] = 1000 + first_stage_key: Optional[str] = 'jpg' + cond_stage_key: Optional[str] = 'txt' + image_size: Optional[int] = 64 + channels: Optional[int] = 4 + cond_stage_trainable: Optional[bool] = False + conditioning_key: Optional[str] = 'crossattn' + monitor: Optional[str] = 'val/loss_simple_ema' + scale_factor: Optional[float] = 0.18215 + use_ema: Optional[bool] = False # TODO + unet_config: Optional[LDMUnetConfig] = LDMUnetConfig() + first_stage_config: Optional[LDMFirstStageConfig] = LDMFirstStageConfig() + scheduler_config: Optional[SchedulerConfig] = SchedulerConfig() + # New attributes in additon to DDPMDiffusionModel + concat_mode: Optional[bool] = True + trainable: Optional[bool] = False + cond_stage_config: Optional[CLIPEmbedderConfig] = CLIPEmbedderConfig() + cond_stage_forward: Optional[Any] = None + scale_by_std: Optional[bool] = False + text_embedding_dropout_rate: Optional[float] = 0 + fused_opt: Optional[bool] = False + inductor: Optional[bool] = False + inductor_cudagraphs: Optional[bool] = False diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py new file mode 100644 index 000000000000..70256058631d --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum + +Sampler = Enum('Sampler', ['PLMS', 'DDIM', 'DPM', 'PARA_DDIM']) diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py new file mode 100644 index 000000000000..1a4ebed123c9 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/base_sampler.py @@ -0,0 +1,339 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC, abstractmethod + +import numpy as np +import torch +from tqdm import tqdm + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + make_ddim_sampling_parameters, + make_ddim_timesteps, + noise_like, +) + + +class AbstractBaseSampler(ABC): + def __init__(self, model, sampler, schedule="linear", **kwargs): + super().__init__() + self.model = model + self.ddpm_num_timesteps = model.num_timesteps + self.schedule = schedule + assert isinstance(sampler, Sampler), "Sampler should be of ENUM type Sampler" + self.sampler = sampler + + def register_buffer(self, name, attr): + if type(attr) == torch.Tensor: + if attr.device != torch.device("cuda"): + attr = attr.to(torch.device("cuda")) + setattr(self, name, attr) + + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True): + self.ddim_timesteps = make_ddim_timesteps( + ddim_discr_method=ddim_discretize, + num_ddim_timesteps=ddim_num_steps, + num_ddpm_timesteps=self.ddpm_num_timesteps, + verbose=verbose, + ) + alphas_cumprod = self.model.alphas_cumprod + assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, "alphas have to be defined for each timestep" + to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) + self.register_buffer("betas", to_torch(self.model.betas)) + self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) + self.register_buffer("alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev)) + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer("sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu()))) + self.register_buffer( + "sqrt_one_minus_alphas_cumprod", to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), + ) + self.register_buffer("log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu()))) + self.register_buffer("sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu()))) + self.register_buffer( + "sqrt_recipm1_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), + ) + # ddim sampling parameters + ddim_sigmas, ddim_alphas, ddim_alphas_prev, ddim_variance = make_ddim_sampling_parameters( + alphacums=alphas_cumprod.cpu(), ddim_timesteps=self.ddim_timesteps, eta=ddim_eta, verbose=verbose, + ) + self.register_buffer("ddim_sigmas", ddim_sigmas) + self.register_buffer("ddim_alphas", ddim_alphas) + self.register_buffer("ddim_alphas_prev", ddim_alphas_prev) + self.register_buffer("ddim_variance", ddim_variance) + self.register_buffer("ddim_sqrt_one_minus_alphas", np.sqrt(1.0 - ddim_alphas)) + sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( + (1 - self.alphas_cumprod_prev) + / (1 - self.alphas_cumprod) + * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) + ) + self.register_buffer("ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps) + + @abstractmethod + def p_sampling_fn(self): + pass + + def dpm_sampling_fn(self): + pass + + def para_ddim_sampling_fn(self): + pass + + @torch.no_grad() + def sample( + self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0.0, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + parallelism=8, + tolerance=0.1, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs, + ): + if conditioning is not None: + if isinstance(conditioning, dict): + ctmp = conditioning[list(conditioning.keys())[0]] + while isinstance(ctmp, list): + ctmp = ctmp[0] + cbs = ctmp.shape[0] + if cbs != batch_size: + print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") + else: + if conditioning.shape[0] != batch_size: + print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") + self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) + # sampling + C, H, W = shape + size = (batch_size, C, H, W) + print(f"Data shape for sampling is {size}, eta {eta}") + + if self.sampler is Sampler.DPM: + return self.dpm_sampling_fn( + shape=shape, + steps=S, + conditioning=conditioning, + unconditional_conditioning=unconditional_conditioning, + unconditional_guidance_scale=unconditional_guidance_scale, + x_T=x_T, + ) + + if self.sampler is Sampler.PARA_DDIM: + return self.para_ddim_sampling_fn( + cond=conditioning, + batch_size=batch_size, + per_latent_shape=shape, + x_T=x_T, + steps=S, + parallelism=parallelism, + tolerance=tolerance, + temperature=temperature, + noise_dropout=noise_dropout, + quantize_denoised=quantize_x0, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) + + samples, intermediates = self.sampling_fn( + conditioning, + size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, + x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return samples, intermediates + + @torch.no_grad() + def sampling_fn( + self, + cond, + shape, + x_T=None, + ddim_use_original_steps=False, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + log_every_t=100, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + ): + device = self.model.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, generator=self.model.rng, device=device) + else: + img = x_T + + if timesteps is None: + timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps + elif timesteps is not None and not ddim_use_original_steps: + subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1 + timesteps = self.ddim_timesteps[:subset_end] + intermediates = {"x_inter": [img], "pred_x0": [img]} + + # TODO: Is this needed + if self.sampler is Sampler.PLMS: + time_range = list(reversed(range(0, timesteps))) if ddim_use_original_steps else np.flip(timesteps) + else: + time_range = reversed(range(0, timesteps)) if ddim_use_original_steps else np.flip(timesteps) + total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] + print(f"Running {self.sampler.name} Sampling with {total_steps} timesteps") + iterator = tqdm(time_range, desc=f"{self.sampler.name} Sampler", total=total_steps) + old_eps = [] + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full((b,), step, device=device, dtype=torch.long) + if self.sampler is Sampler.PLMS: + ts_next = torch.full( + (b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long, + ) + else: + old_eps = None + ts_next = None + if mask is not None: + assert x0 is not None + img_orig = self.model.q_sample(x0, ts) # TODO: deterministic forward pass? + img = img_orig * mask + (1.0 - mask) * img + outs = self.p_sampling_fn( + img, + cond, + ts, + index=index, + use_original_steps=ddim_use_original_steps, + quantize_denoised=quantize_denoised, + temperature=temperature, + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + old_eps=old_eps, + t_next=ts_next, + ) + img, pred_x0 = outs[0], outs[1] + if self.sampler is Sampler.PLMS: + e_t = outs[2] + old_eps.append(e_t) + if len(old_eps) >= 4: + old_eps.pop(0) + if callback: + callback(i) + if img_callback: + img_callback(pred_x0, i) + if index % log_every_t == 0 or index == total_steps - 1: + intermediates["x_inter"].append(img) + intermediates["pred_x0"].append(pred_x0) + return img, intermediates + + def _get_model_output( + self, x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs, + ): + if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: + model_output = self.model.apply_model(x, t, c) + elif isinstance(c, dict): + ### Contolnet conditioning is dict format + model_t = self.model.apply_model(x, t, c) + model_uncond = self.model.apply_model(x, t, unconditional_conditioning) + model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t] * 2) + c_in = torch.cat([unconditional_conditioning, c]) + e_t_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) + model_output = e_t_uncond + unconditional_guidance_scale * (model_t - e_t_uncond) + if self.model.parameterization == "v": + e_t = self.model.predict_eps_from_z_and_v(x, t, model_output) + else: + e_t = model_output + if score_corrector is not None: + assert self.model.parameterization == "eps" + e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) + return e_t, model_output + + def _get_x_prev_and_pred_x0( + self, + use_original_steps, + b, + index, + device, + x, + t, + model_output, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ): + alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas + alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev + sqrt_one_minus_alphas = ( + self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + ) + sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas + + # select parameters corresponding to the currently considered timestep + a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) + a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) + sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) + sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device) + # current prediction for x_0 + if self.model.parameterization != "v": + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + else: + pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output) + if quantize_denoised: + pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) + # direction pointing to x_t + dir_xt = (1.0 - a_prev - sigma_t ** 2).sqrt() * e_t + noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.0: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise + return x_prev, pred_x0 diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py new file mode 100644 index 000000000000..2d6b121dced4 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/ddim.py @@ -0,0 +1,119 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import numpy as np +import torch +from tqdm import tqdm + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import extract_into_tensor +from nemo.collections.multimodal.parts.utils import randn_like + + +class DDIMSampler(AbstractBaseSampler): + def __init__(self, model, schedule="linear", **kwargs): + super().__init__(model, sampler=Sampler.DDIM, schedule="linear", **kwargs) + + @torch.no_grad() + def p_sampling_fn( + self, + x, + c, + t, + index, + repeat_noise=False, + use_original_steps=False, + quantize_denoised=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + old_eps=None, + t_next=None, + ): + b, *_, device = *x.shape, x.device + e_t, model_output = self._get_model_output( + x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + ) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, + b, + index, + device, + x, + t, + model_output, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ) + return x_prev, pred_x0 + + @torch.no_grad() + def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): + # fast, but does not allow for exact reconstruction + # t serves as an index to gather the correct alphas + if use_original_steps: + sqrt_alphas_cumprod = self.sqrt_alphas_cumprod + sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod + else: + sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas) + sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas + + if noise is None: + noise = randn_like(x0, generator=self.model.rng) + return ( + extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 + + extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise + ) + + @torch.no_grad() + def decode( + self, + x_latent, + cond, + t_start, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + use_original_steps=False, + ): + + timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps + timesteps = timesteps[:t_start] + + time_range = np.flip(timesteps) + total_steps = timesteps.shape[0] + print(f"Running DDIM Sampling with {total_steps} timesteps") + + iterator = tqdm(time_range, desc='Decoding image', total=total_steps) + x_dec = x_latent + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long) + x_dec, _ = self.p_sample_ddim( + x_dec, + cond, + ts, + index=index, + use_original_steps=use_original_steps, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return x_dec diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py new file mode 100644 index 000000000000..b1b046a2c5db --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/dpmsolver.py @@ -0,0 +1,493 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import torch + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import expand_dims, interpolate_fn + + +class NoiseScheduleVP: + def __init__( + self, schedule="discrete", betas=None, alphas_cumprod=None, continuous_beta_0=0.1, continuous_beta_1=20.0, + ): + """Create a wrapper class for the forward SDE.""" + + if schedule not in ["discrete", "linear", "cosine"]: + raise ValueError( + "Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format( + schedule + ) + ) + + self.schedule = schedule + if schedule == "discrete": + if betas is not None: + log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) + else: + assert alphas_cumprod is not None + log_alphas = 0.5 * torch.log(alphas_cumprod) + self.total_N = len(log_alphas) + self.T = 1.0 + self.t_array = torch.linspace(0.0, 1.0, self.total_N + 1)[1:].reshape((1, -1)) + self.log_alpha_array = log_alphas.reshape((1, -1,)) + else: + self.total_N = 1000 + self.beta_0 = continuous_beta_0 + self.beta_1 = continuous_beta_1 + self.cosine_s = 0.008 + self.cosine_beta_max = 999.0 + self.cosine_t_max = ( + math.atan(self.cosine_beta_max * (1.0 + self.cosine_s) / math.pi) + * 2.0 + * (1.0 + self.cosine_s) + / math.pi + - self.cosine_s + ) + self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1.0 + self.cosine_s) * math.pi / 2.0)) + self.schedule = schedule + if schedule == "cosine": + self.T = 0.9946 + else: + self.T = 1.0 + + def marginal_log_mean_coeff(self, t): + """ + Compute log(alpha_t) of a given continuous-time label t in [0, T]. + """ + if self.schedule == "discrete": + return interpolate_fn( + t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device), + ).reshape((-1)) + elif self.schedule == "linear": + return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 + elif self.schedule == "cosine": + + def log_alpha_fn(s): + return torch.log(torch.cos((s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0)) + + log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 + return log_alpha_t + + def marginal_alpha(self, t): + """ + Compute alpha_t of a given continuous-time label t in [0, T]. + """ + return torch.exp(self.marginal_log_mean_coeff(t)) + + def marginal_std(self, t): + """ + Compute sigma_t of a given continuous-time label t in [0, T]. + """ + return torch.sqrt(1.0 - torch.exp(2.0 * self.marginal_log_mean_coeff(t))) + + def marginal_lambda(self, t): + """ + Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. + """ + log_mean_coeff = self.marginal_log_mean_coeff(t) + log_std = 0.5 * torch.log(1.0 - torch.exp(2.0 * log_mean_coeff)) + return log_mean_coeff - log_std + + def inverse_lambda(self, lamb): + """ + Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. + """ + if self.schedule == "linear": + tmp = 2.0 * (self.beta_1 - self.beta_0) * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) + Delta = self.beta_0 ** 2 + tmp + return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) + elif self.schedule == "discrete": + log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2.0 * lamb) + t = interpolate_fn( + log_alpha.reshape((-1, 1)), + torch.flip(self.log_alpha_array.to(lamb.device), [1]), + torch.flip(self.t_array.to(lamb.device), [1]), + ) + return t.reshape((-1,)) + else: + log_alpha = -0.5 * torch.logaddexp(-2.0 * lamb, torch.zeros((1,)).to(lamb)) + + def t_fn(log_alpha_t): + return ( + torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) + * 2.0 + * (1.0 + self.cosine_s) + / math.pi + - self.cosine_s + ) + + t = t_fn(log_alpha) + return t + + +def model_wrapper( + model, + noise_schedule, + model_type="noise", + model_kwargs={}, + guidance_type="uncond", + condition=None, + unconditional_condition=None, + guidance_scale=1.0, + classifier_fn=None, + classifier_kwargs={}, +): + """Create a wrapper function for the noise prediction model.""" + + def get_model_input_time(t_continuous): + if noise_schedule.schedule == "discrete": + return (t_continuous - 1.0 / noise_schedule.total_N) * 1000.0 + else: + return t_continuous + + def noise_pred_fn(x, t_continuous, cond=None): + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + t_input = get_model_input_time(t_continuous) + if cond is None: + output = model(x, t_input, **model_kwargs) + else: + output = model(x, t_input, cond, **model_kwargs) + if model_type == "noise": + return output + elif model_type == "x_start": + alpha_t, sigma_t = ( + noise_schedule.marginal_alpha(t_continuous), + noise_schedule.marginal_std(t_continuous), + ) + dims = x.dim() + return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) + elif model_type == "v": + alpha_t, sigma_t = ( + noise_schedule.marginal_alpha(t_continuous), + noise_schedule.marginal_std(t_continuous), + ) + dims = x.dim() + return expand_dims(alpha_t, dims) * output + expand_dims(sigma_t, dims) * x + + def cond_grad_fn(x, t_input): + """ + Compute the gradient of the classifier, i.e. nabla_{x} log p_t(cond | x_t). + """ + with torch.enable_grad(): + x_in = x.detach().requires_grad_(True) + log_prob = classifier_fn(x_in, t_input, condition, **classifier_kwargs) + return torch.autograd.grad(log_prob.sum(), x_in)[0] + + def model_fn(x, t_continuous): + """ + The noise predicition model function that is used for DPM-Solver. + """ + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + if guidance_type == "uncond": + return noise_pred_fn(x, t_continuous) + elif guidance_type == "classifier": + assert classifier_fn is not None + t_input = get_model_input_time(t_continuous) + cond_grad = cond_grad_fn(x, t_input) + sigma_t = noise_schedule.marginal_std(t_continuous) + noise = noise_pred_fn(x, t_continuous) + return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad + elif guidance_type == "classifier-free": + if guidance_scale == 1.0 or unconditional_condition is None: + return noise_pred_fn(x, t_continuous, cond=condition) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t_continuous] * 2) + c_in = torch.cat([unconditional_condition, condition]) + noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) + return noise_uncond + guidance_scale * (noise - noise_uncond) + + assert model_type in ["noise", "x_start", "v"] + assert guidance_type in ["uncond", "classifier", "classifier-free"] + return model_fn + + +class DPMSolver: + def __init__( + self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.0, + ): + """Construct a DPM-Solver.""" + self.model = model_fn + self.noise_schedule = noise_schedule + self.predict_x0 = predict_x0 + self.thresholding = thresholding + self.max_val = max_val + + def noise_prediction_fn(self, x, t): + """ + Return the noise prediction model. + """ + return self.model(x, t) + + def data_prediction_fn(self, x, t): + """ + Return the data prediction model (with thresholding). + """ + noise = self.noise_prediction_fn(x, t) + dims = x.dim() + alpha_t, sigma_t = ( + self.noise_schedule.marginal_alpha(t), + self.noise_schedule.marginal_std(t), + ) + x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims) + if self.thresholding: + p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. + s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) + s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) + x0 = torch.clamp(x0, -s, s) / s + return x0 + + def model_fn(self, x, t): + """ + Convert the model to the noise prediction model or the data prediction model. + """ + if self.predict_x0: + return self.data_prediction_fn(x, t) + else: + return self.noise_prediction_fn(x, t) + + def get_time_steps(self, skip_type, t_T, t_0, N, device): + """Compute the intermediate time steps for sampling.""" + if skip_type == "logSNR": + lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) + lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) + logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) + return self.noise_schedule.inverse_lambda(logSNR_steps) + elif skip_type == "time_uniform": + return torch.linspace(t_T, t_0, N + 1).to(device) + elif skip_type == "time_quadratic": + t_order = 2 + t = torch.linspace(t_T ** (1.0 / t_order), t_0 ** (1.0 / t_order), N + 1).pow(t_order).to(device) + return t + else: + raise ValueError( + "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type) + ) + + def denoise_to_zero_fn(self, x, s): + """ + Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization. + """ + return self.data_prediction_fn(x, s) + + def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=False): + """ + DPM-Solver-1 (equivalent to DDIM) from time `s` to time `t`. + """ + ns = self.noise_schedule + dims = x.dim() + lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t) + h = lambda_t - lambda_s + log_alpha_s, log_alpha_t = ( + ns.marginal_log_mean_coeff(s), + ns.marginal_log_mean_coeff(t), + ) + sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + if self.predict_x0: + phi_1 = torch.expm1(-h) + if model_s is None: + model_s = self.model_fn(x, s) + x_t = expand_dims(sigma_t / sigma_s, dims) * x - expand_dims(alpha_t * phi_1, dims) * model_s + if return_intermediate: + return x_t, {"model_s": model_s} + else: + return x_t + else: + phi_1 = torch.expm1(h) + if model_s is None: + model_s = self.model_fn(x, s) + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x + - expand_dims(sigma_t * phi_1, dims) * model_s + ) + if return_intermediate: + return x_t, {"model_s": model_s} + else: + return x_t + + def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): + """ + Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`. + """ + if solver_type not in ["dpm_solver", "taylor"]: + raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type)) + ns = self.noise_schedule + dims = x.dim() + model_prev_1, model_prev_0 = model_prev_list + t_prev_1, t_prev_0 = t_prev_list + lambda_prev_1, lambda_prev_0, lambda_t = ( + ns.marginal_lambda(t_prev_1), + ns.marginal_lambda(t_prev_0), + ns.marginal_lambda(t), + ) + log_alpha_prev_0, log_alpha_t = ( + ns.marginal_log_mean_coeff(t_prev_0), + ns.marginal_log_mean_coeff(t), + ) + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + h_0 = lambda_prev_0 - lambda_prev_1 + h = lambda_t - lambda_prev_0 + r0 = h_0 / h + D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) + if self.predict_x0: + if solver_type == "dpm_solver": + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * D1_0 + ) + elif solver_type == "taylor": + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1_0 + ) + else: + if solver_type == "dpm_solver": + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * D1_0 + ) + elif solver_type == "taylor": + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1_0 + ) + return x_t + + def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"): + """ + Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`. + """ + ns = self.noise_schedule + dims = x.dim() + model_prev_2, model_prev_1, model_prev_0 = model_prev_list + t_prev_2, t_prev_1, t_prev_0 = t_prev_list + lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ( + ns.marginal_lambda(t_prev_2), + ns.marginal_lambda(t_prev_1), + ns.marginal_lambda(t_prev_0), + ns.marginal_lambda(t), + ) + log_alpha_prev_0, log_alpha_t = ( + ns.marginal_log_mean_coeff(t_prev_0), + ns.marginal_log_mean_coeff(t), + ) + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + alpha_t = torch.exp(log_alpha_t) + + h_1 = lambda_prev_1 - lambda_prev_2 + h_0 = lambda_prev_0 - lambda_prev_1 + h = lambda_t - lambda_prev_0 + r0, r1 = h_0 / h, h_1 / h + D1_0 = expand_dims(1.0 / r0, dims) * (model_prev_0 - model_prev_1) + D1_1 = expand_dims(1.0 / r1, dims) * (model_prev_1 - model_prev_2) + D1 = D1_0 + expand_dims(r0 / (r0 + r1), dims) * (D1_0 - D1_1) + D2 = expand_dims(1.0 / (r0 + r1), dims) * (D1_0 - D1_1) + if self.predict_x0: + x_t = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * (torch.exp(-h) - 1.0), dims) * model_prev_0 + + expand_dims(alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0), dims) * D1 + - expand_dims(alpha_t * ((torch.exp(-h) - 1.0 + h) / h ** 2 - 0.5), dims) * D2 + ) + else: + x_t = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * (torch.exp(h) - 1.0), dims) * model_prev_0 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0) / h - 1.0), dims) * D1 + - expand_dims(sigma_t * ((torch.exp(h) - 1.0 - h) / h ** 2 - 0.5), dims) * D2 + ) + return x_t + + def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type="dpm_solver"): + """ + Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`. + """ + if order == 1: + return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1]) + elif order == 2: + return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + elif order == 3: + return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type) + else: + raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order)) + + def sample( + self, + x, + steps=20, + t_start=None, + t_end=None, + order=3, + skip_type="time_uniform", + method="singlestep", + lower_order_final=True, + denoise_to_zero=False, + solver_type="dpm_solver", + atol=0.0078, + rtol=0.05, + ): + """ + Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`. + """ + t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end + t_T = self.noise_schedule.T if t_start is None else t_start + device = x.device + + if method == "multistep": + assert steps >= order + timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) + assert timesteps.shape[0] - 1 == steps + with torch.no_grad(): + vec_t = timesteps[0].expand((x.shape[0])) + model_prev_list = [self.model_fn(x, vec_t)] + t_prev_list = [vec_t] + # Init the first `order` values by lower order multistep DPM-Solver. + for init_order in range(1, order): + vec_t = timesteps[init_order].expand(x.shape[0]) + x = self.multistep_dpm_solver_update( + x, model_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type, + ) + model_prev_list.append(self.model_fn(x, vec_t)) + t_prev_list.append(vec_t) + # Compute the remaining values by `order`-th order multistep DPM-Solver. + for step in range(order, steps + 1): + vec_t = timesteps[step].expand(x.shape[0]) + if lower_order_final and steps < 15: + step_order = min(order, steps + 1 - step) + else: + step_order = order + x = self.multistep_dpm_solver_update( + x, model_prev_list, t_prev_list, vec_t, step_order, solver_type=solver_type, + ) + for i in range(order - 1): + t_prev_list[i] = t_prev_list[i + 1] + model_prev_list[i] = model_prev_list[i + 1] + t_prev_list[-1] = vec_t + # We do not need to evaluate the final model value. + if step < steps: + model_prev_list[-1] = self.model_fn(x, vec_t) + if denoise_to_zero: + x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0) + return x diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py new file mode 100644 index 000000000000..ac4f8f7ad73d --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/k_diffusion.py @@ -0,0 +1,838 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import torch +import torchsde +from scipy import integrate +from torch import nn +from torchdiffeq import odeint +from tqdm.auto import tqdm, trange + + +def append_zero(x): + return torch.cat([x, x.new_zeros([1])]) + + +def append_dims(x, target_dims): + """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" + dims_to_append = target_dims - x.ndim + if dims_to_append < 0: + raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') + return x[(...,) + (None,) * dims_to_append] + + +def get_sigmas_karras(n, sigma_min, sigma_max, rho=7.0, device='cpu'): + """Constructs the noise schedule of Karras et al. (2022).""" + ramp = torch.linspace(0, 1, n) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return append_zero(sigmas).to(device) + + +def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'): + """Constructs an exponential noise schedule.""" + sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), n, device=device).exp() + return append_zero(sigmas) + + +def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1.0, device='cpu'): + """Constructs an polynomial in log sigma noise schedule.""" + ramp = torch.linspace(1, 0, n, device=device) ** rho + sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min)) + return append_zero(sigmas) + + +def get_sigmas_vp(n, beta_d=19.9, beta_min=0.1, eps_s=1e-3, device='cpu'): + """Constructs a continuous VP noise schedule.""" + t = torch.linspace(1, eps_s, n, device=device) + sigmas = torch.sqrt(torch.exp(beta_d * t ** 2 / 2 + beta_min * t) - 1) + return append_zero(sigmas) + + +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / append_dims(sigma, x.ndim) + + +def get_ancestral_step(sigma_from, sigma_to, eta=1.0): + """Calculates the noise level (sigma_down) to step down to and the amount + of noise to add (sigma_up) when doing an ancestral sampling step.""" + if not eta: + return sigma_to, 0.0 + sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5) + sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 + return sigma_down, sigma_up + + +def default_noise_sampler(x): + return lambda sigma, sigma_next: torch.randn_like(x) + + +class BatchedBrownianTree: + """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" + + def __init__(self, x, t0, t1, seed=None, **kwargs): + t0, t1, self.sign = self.sort(t0, t1) + w0 = kwargs.get('w0', torch.zeros_like(x)) + if seed is None: + seed = torch.randint(0, 2 ** 63 - 1, []).item() + self.batched = True + try: + assert len(seed) == x.shape[0] + w0 = w0[0] + except TypeError: + seed = [seed] + self.batched = False + self.trees = [torchsde.BrownianTree(t0, w0, t1, entropy=s, **kwargs) for s in seed] + + @staticmethod + def sort(a, b): + return (a, b, 1) if a < b else (b, a, -1) + + def __call__(self, t0, t1): + t0, t1, sign = self.sort(t0, t1) + w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign) + return w if self.batched else w[0] + + +class BrownianTreeNoiseSampler: + """A noise sampler backed by a torchsde.BrownianTree. + + Args: + x (Tensor): The tensor whose shape, device and dtype to use to generate + random samples. + sigma_min (float): The low end of the valid interval. + sigma_max (float): The high end of the valid interval. + seed (int or List[int]): The random seed. If a list of seeds is + supplied instead of a single integer, then the noise sampler will + use one BrownianTree per batch item, each with its own seed. + transform (callable): A function that maps sigma to the sampler's + internal timestep. + """ + + def __init__(self, x, sigma_min, sigma_max, seed=None, transform=lambda x: x): + self.transform = transform + t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max)) + self.tree = BatchedBrownianTree(x, t0, t1, seed) + + def __call__(self, sigma, sigma_next): + t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next)) + return self.tree(t0, t1) / (t1 - t0).abs().sqrt() + + +@torch.no_grad() +def sample_euler( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): + """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + # Euler method + x = x + d * dt + return x + + +@torch.no_grad() +def sample_euler_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): + """Ancestral sampling with Euler method steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_heun( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): + """Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + if sigmas[i + 1] == 0: + # Euler method + x = x + d * dt + else: + # Heun's method + x_2 = x + d * dt + denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) + d_2 = to_d(x_2, sigmas[i + 1], denoised_2) + d_prime = (d + d_2) / 2 + x = x + d_prime * dt + return x + + +@torch.no_grad() +def sample_dpm_2( + model, + x, + sigmas, + extra_args=None, + callback=None, + disable=None, + s_churn=0.0, + s_tmin=0.0, + s_tmax=float('inf'), + s_noise=1.0, +): + """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.0 + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + dt = sigmas[i + 1] - sigma_hat + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigma_hat.log().lerp(sigmas[i + 1].log(), 0.5).exp() + dt_1 = sigma_mid - sigma_hat + dt_2 = sigmas[i + 1] - sigma_hat + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + return x + + +@torch.no_grad() +def sample_dpm_2_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): + """Ancestral sampling with DPM-Solver second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + if sigma_down == 0: + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigmas[i].log().lerp(sigma_down.log(), 0.5).exp() + dt_1 = sigma_mid - sigmas[i] + dt_2 = sigma_down - sigmas[i] + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +def linear_multistep_coeff(order, t, i, j): + if order - 1 > i: + raise ValueError(f'Order {order} too high for step {i}') + + def fn(tau): + prod = 1.0 + for k in range(order): + if j == k: + continue + prod *= (tau - t[i - k]) / (t[i - j] - t[i - k]) + return prod + + return integrate.quad(fn, t[i], t[i + 1], epsrel=1e-4)[0] + + +@torch.no_grad() +def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, order=4): + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigmas_cpu = sigmas.detach().cpu().numpy() + ds = [] + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + d = to_d(x, sigmas[i], denoised) + ds.append(d) + if len(ds) > order: + ds.pop(0) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + cur_order = min(i + 1, order) + coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)] + x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds))) + return x + + +@torch.no_grad() +def log_likelihood(model, x, sigma_min, sigma_max, extra_args=None, atol=1e-4, rtol=1e-4): + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + v = torch.randint_like(x, 2) * 2 - 1 + fevals = 0 + + def ode_fn(sigma, x): + nonlocal fevals + with torch.enable_grad(): + x = x[0].detach().requires_grad_() + denoised = model(x, sigma * s_in, **extra_args) + d = to_d(x, sigma, denoised) + fevals += 1 + grad = torch.autograd.grad((d * v).sum(), x)[0] + d_ll = (v * grad).flatten(1).sum(1) + return d.detach(), d_ll + + x_min = x, x.new_zeros([x.shape[0]]) + t = x.new_tensor([sigma_min, sigma_max]) + sol = odeint(ode_fn, x_min, t, atol=atol, rtol=rtol, method='dopri5') + latent, delta_ll = sol[0][-1], sol[1][-1] + ll_prior = torch.distributions.Normal(0, sigma_max).log_prob(latent).flatten(1).sum(1) + return ll_prior + delta_ll, {'fevals': fevals} + + +class PIDStepSizeController: + """A PID controller for ODE adaptive step size control.""" + + def __init__(self, h, pcoeff, icoeff, dcoeff, order=1, accept_safety=0.81, eps=1e-8): + self.h = h + self.b1 = (pcoeff + icoeff + dcoeff) / order + self.b2 = -(pcoeff + 2 * dcoeff) / order + self.b3 = dcoeff / order + self.accept_safety = accept_safety + self.eps = eps + self.errs = [] + + def limiter(self, x): + return 1 + math.atan(x - 1) + + def propose_step(self, error): + inv_error = 1 / (float(error) + self.eps) + if not self.errs: + self.errs = [inv_error, inv_error, inv_error] + self.errs[0] = inv_error + factor = self.errs[0] ** self.b1 * self.errs[1] ** self.b2 * self.errs[2] ** self.b3 + factor = self.limiter(factor) + accept = factor >= self.accept_safety + if accept: + self.errs[2] = self.errs[1] + self.errs[1] = self.errs[0] + self.h *= factor + return accept + + +class DPMSolver(nn.Module): + """DPM-Solver. See https://arxiv.org/abs/2206.00927.""" + + def __init__(self, model, extra_args=None, eps_callback=None, info_callback=None): + super().__init__() + self.model = model + self.extra_args = {} if extra_args is None else extra_args + self.eps_callback = eps_callback + self.info_callback = info_callback + + def t(self, sigma): + return -sigma.log() + + def sigma(self, t): + return t.neg().exp() + + def eps(self, eps_cache, key, x, t, *args, **kwargs): + if key in eps_cache: + return eps_cache[key], eps_cache + sigma = self.sigma(t) * x.new_ones([x.shape[0]]) + eps = (x - self.model(x, sigma, *args, **self.extra_args, **kwargs)) / self.sigma(t) + if self.eps_callback is not None: + self.eps_callback() + return eps, {key: eps, **eps_cache} + + def dpm_solver_1_step(self, x, t, t_next, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + x_1 = x - self.sigma(t_next) * h.expm1() * eps + return x_1, eps_cache + + def dpm_solver_2_step(self, x, t, t_next, r1=1 / 2, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + x_2 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / (2 * r1) * h.expm1() * (eps_r1 - eps) + return x_2, eps_cache + + def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + s2 = t + r2 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + u2 = ( + x + - self.sigma(s2) * (r2 * h).expm1() * eps + - self.sigma(s2) * (r2 / r1) * ((r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) + ) + eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2) + x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps) + return x_3, eps_cache + + def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0.0, s_noise=1.0, noise_sampler=None): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if not t_end > t_start and eta: + raise ValueError('eta must be 0 for reverse sampling') + + m = math.floor(nfe / 3) + 1 + ts = torch.linspace(t_start, t_end, m + 1, device=x.device) + + if nfe % 3 == 0: + orders = [3] * (m - 2) + [2, 1] + else: + orders = [3] * (m - 1) + [nfe % 3] + + for i in range(len(orders)): + eps_cache = {} + t, t_next = ts[i], ts[i + 1] + if eta: + sd, su = get_ancestral_step(self.sigma(t), self.sigma(t_next), eta) + t_next_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5 + else: + t_next_, su = t_next, 0.0 + + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + denoised = x - self.sigma(t) * eps + if self.info_callback is not None: + self.info_callback({'x': x, 'i': i, 't': ts[i], 't_up': t, 'denoised': denoised}) + + if orders[i] == 1: + x, eps_cache = self.dpm_solver_1_step(x, t, t_next_, eps_cache=eps_cache) + elif orders[i] == 2: + x, eps_cache = self.dpm_solver_2_step(x, t, t_next_, eps_cache=eps_cache) + else: + x, eps_cache = self.dpm_solver_3_step(x, t, t_next_, eps_cache=eps_cache) + + x = x + su * s_noise * noise_sampler(self.sigma(t), self.sigma(t_next)) + + return x + + def dpm_solver_adaptive( + self, + x, + t_start, + t_end, + order=3, + rtol=0.05, + atol=0.0078, + h_init=0.05, + pcoeff=0.0, + icoeff=1.0, + dcoeff=0.0, + accept_safety=0.81, + eta=0.0, + s_noise=1.0, + noise_sampler=None, + ): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if order not in {2, 3}: + raise ValueError('order should be 2 or 3') + forward = t_end > t_start + if not forward and eta: + raise ValueError('eta must be 0 for reverse sampling') + h_init = abs(h_init) * (1 if forward else -1) + atol = torch.tensor(atol) + rtol = torch.tensor(rtol) + s = t_start + x_prev = x + accept = True + pid = PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) + info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} + + while s < t_end - 1e-5 if forward else s > t_end + 1e-5: + eps_cache = {} + t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) + if eta: + sd, su = get_ancestral_step(self.sigma(s), self.sigma(t), eta) + t_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 + else: + t_, su = t, 0.0 + + eps, eps_cache = self.eps(eps_cache, 'eps', x, s) + denoised = x - self.sigma(s) * eps + + if order == 2: + x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) + else: + x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) + delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) + error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 + accept = pid.propose_step(error) + if accept: + x_prev = x_low + x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) + s = t + info['n_accept'] += 1 + else: + info['n_reject'] += 1 + info['nfe'] += order + info['steps'] += 1 + + if self.info_callback is not None: + self.info_callback( + { + 'x': x, + 'i': info['steps'] - 1, + 't': s, + 't_up': s, + 'denoised': denoised, + 'error': error, + 'h': pid.h, + **info, + } + ) + + return x, info + + +@torch.no_grad() +def sample_dpm_fast( + model, + x, + sigma_min, + sigma_max, + n, + extra_args=None, + callback=None, + disable=None, + eta=0.0, + s_noise=1.0, + noise_sampler=None, +): + """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(total=n, disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback( + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} + ) + return dpm_solver.dpm_solver_fast( + x, + dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), + n, + eta, + s_noise, + noise_sampler, + ) + + +@torch.no_grad() +def sample_dpm_adaptive( + model, + x, + sigma_min, + sigma_max, + extra_args=None, + callback=None, + disable=None, + order=3, + rtol=0.05, + atol=0.0078, + h_init=0.05, + pcoeff=0.0, + icoeff=1.0, + dcoeff=0.0, + accept_safety=0.81, + eta=0.0, + s_noise=1.0, + noise_sampler=None, + return_info=False, +): + """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback( + {'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info} + ) + x, info = dpm_solver.dpm_solver_adaptive( + x, + dpm_solver.t(torch.tensor(sigma_max)), + dpm_solver.t(torch.tensor(sigma_min)), + order, + rtol, + atol, + h_init, + pcoeff, + icoeff, + dcoeff, + accept_safety, + eta, + s_noise, + noise_sampler, + ) + if return_info: + return x, info + return x + + +@torch.no_grad() +def sample_dpmpp_2s_ancestral( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None +): + """Ancestral sampling with DPM-Solver++(2S) second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigma_down == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++(2S) + t, t_next = t_fn(sigmas[i]), t_fn(sigma_down) + r = 1 / 2 + h = t_next - t + s = t + r * h + x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_2 + # Noise addition + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_dpmpp_sde( + model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1.0, noise_sampler=None, r=1 / 2 +): + """DPM-Solver++ (stochastic).""" + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigmas[i + 1] - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++ + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + s = t + h * r + fac = 1 / (2 * r) + + # Step 1 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta) + s_ = t_fn(sd) + x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised + x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + + # Step 2 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta) + t_next_ = t_fn(sd) + denoised_d = (1 - fac) * denoised + fac * denoised_2 + x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d + x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su + return x + + +@torch.no_grad() +def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): + """DPM-Solver++(2M).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + old_denoised = None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + if old_denoised is None or sigmas[i + 1] == 0: + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised + else: + h_last = t - t_fn(sigmas[i - 1]) + r = h_last / h + denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d + old_denoised = denoised + return x + + +class DiscreteSchedule(nn.Module): + """A mapping between continuous noise levels (sigmas) and a list of discrete noise + levels.""" + + def __init__(self, sigmas, quantize): + super().__init__() + self.register_buffer('sigmas', sigmas) + self.register_buffer('log_sigmas', sigmas.log()) + self.quantize = quantize + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def get_sigmas(self, n=None): + if n is None: + return append_zero(self.sigmas.flip(0)) + t_max = len(self.sigmas) - 1 + t = torch.linspace(t_max, 0, n, device=self.sigmas.device) + return append_zero(self.t_to_sigma(t)) + + def sigma_to_t(self, sigma, quantize=None): + quantize = self.quantize if quantize is None else quantize + log_sigma = sigma.log() + dists = log_sigma - self.log_sigmas[:, None] + if quantize: + return dists.abs().argmin(dim=0).view(sigma.shape) + low_idx = dists.ge(0).cumsum(dim=0).argmax(dim=0).clamp(max=self.log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + low, high = self.log_sigmas[low_idx], self.log_sigmas[high_idx] + w = (low - log_sigma) / (low - high) + w = w.clamp(0, 1) + t = (1 - w) * low_idx + w * high_idx + return t.view(sigma.shape) + + def t_to_sigma(self, t): + t = t.float() + low_idx, high_idx, w = t.floor().long(), t.ceil().long(), t.frac() + log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] + return log_sigma.exp() + + +class DiscreteEpsDDPMDenoiser(DiscreteSchedule): + """A wrapper for discrete schedule DDPM models that output eps (the predicted + noise).""" + + def __init__(self, model, quantize=False): + alphas_cumprod = model.alphas_cumprod + super().__init__(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, quantize) + self.inner_model = model + self.sigma_data = 1.0 + + def get_scalings(self, sigma): + c_out = -sigma + c_in = 1 / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 + return c_out, c_in + + def get_eps(self, *args, **kwargs): + return self.inner_model.apply_model(*args, **kwargs) + + def loss(self, input, noise, sigma, **kwargs): + c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] + noised_input = input + noise * append_dims(sigma, input.ndim) + eps = self.get_eps(noised_input * c_in, self.sigma_to_t(sigma), **kwargs) + return (eps - noise).pow(2).flatten(1).mean(1) + + def forward(self, input, sigma, **kwargs): + c_out, c_in = [append_dims(x, input.ndim) for x in self.get_scalings(sigma)] + eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs) + return input + eps * c_out diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py new file mode 100644 index 000000000000..af5988f79502 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/para_ddim.py @@ -0,0 +1,231 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Tuple + +import numpy as np +import torch +import torch.nn as nn +from tqdm import tqdm + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import noise_like + + +class ParaDDIMSampler(AbstractBaseSampler): + """ Parallel version of DDIM sampler. Utilizes Parallel Sampling (https://arxiv.org/abs/2305.16317). + It reduces the latency of a model, but the total compute cost is increased. + + The main three parameters that affect the performance of the algorithm are: + Parallelism (int): Defines the maximal size of the window. That many diffusion steps can happen in + parallel. + Tolerance (float): Sets the maximal error tolerance defined as a ratio between drift of the trajectory + and noise. The larger the tolerance the faster the method is. The smaller the tolerance the better + quality output is achieved. + Number of GPUs (int): Number of GPUs utilizing DataParallel parallelism to compute diffusion steps in + parallel. + + Different combination of these parameters values can result in different latency-quality-compute trade-off. + For more details please refer to the Parallel Sampling paper (https://arxiv.org/abs/2305.16317). + """ + + def __init__(self, model, **kwargs): + super().__init__(model, sampler=Sampler.PARA_DDIM, **kwargs) + + @torch.no_grad() + def p_sampling_fn(self): + pass + + @torch.no_grad() + def para_ddim_sampling_fn( + self, + cond: torch.tensor, + batch_size: int, + per_latent_shape: Tuple[int, ...], + x_T: torch.tensor = None, + steps: int = 50, + parallelism: int = 8, + tolerance: float = 0.1, + temperature: float = 0.0, + noise_dropout: float = 0.0, + quantize_denoised: bool = False, + unconditional_guidance_scale: float = 1.0, + unconditional_conditioning: torch.tensor = None, + score_corrector=None, + corrector_kwargs=None, + ): + print( + f"Running {self.sampler.name} with {steps} timesteps, " + f"parallelism={parallelism}, " + f"and tolerance={tolerance}" + ) + + device = self.model.betas.device + size = (batch_size, *per_latent_shape) + x_T = torch.randn(size, generator=self.model.rng, device=device) if x_T is None else x_T + time_range = np.flip(self.ddim_timesteps).copy() # Make a copy to resolve issue with negative strides + + # Processing window of timesteps [window_start, window_end) in parallel + window_start = 0 + window_size = min(parallelism, steps) + window_end = window_size + + # Store the whole trajectory in memory; it will be iteratively improved + latents = torch.stack([x_T] * (steps + 1)) + + # Pre-computing noises to ensure noise is sampled once per diffusion step + noises = torch.zeros_like(latents) + for i in range(steps - 1, -1, -1): + gaussian_noise = torch.randn_like(x_T) + noise = (self.ddim_variance[i] ** 0.5) * gaussian_noise + noises[i] = noise.clone() + + # Store inverse of the variance to avoid division at every iteration + variance = [self.ddim_variance[i] for i in range(steps - 1, -1, -1)] + [0] + inverse_variance = 1.0 / torch.tensor(variance).to(noises.device) + latent_dim = noises[0, 0].numel() + inverse_variance_norm = inverse_variance[:, None] / latent_dim + + scaled_tolerance = tolerance ** 2 + + with tqdm(total=steps) as progress_bar: + while window_start < steps: + window_size = window_end - window_start + + # Prepare the input to the model. Model will perform window_size noise predictions in parallel + window_cond = torch.stack([cond] * window_size) + window_uncond_cond = torch.stack([unconditional_conditioning] * window_size) + window_latents = latents[window_start:window_end] + window_timesteps = torch.tensor(time_range[window_start:window_end], device=device).repeat( + 1, batch_size + ) + + # Reshape (w, b, ...) -> (w * b, ...) + latents_input = window_latents.flatten(0, 1) + timesteps_input = window_timesteps.flatten(0, 1) + cond_input = window_cond.flatten(0, 1) + uncond_cond_input = window_uncond_cond.flatten(0, 1) + + # Model call + e_t, _ = self._get_model_output( + latents_input, + timesteps_input, + uncond_cond_input, + unconditional_guidance_scale, + score_corrector, + cond_input, + corrector_kwargs, + ) + # Reshape back (w * b, ...) -> (w, b, ...) + e_t = e_t.reshape(window_size, batch_size, *per_latent_shape) + + # Perform Picard iteration + window_latents_picard_iteration = self._get_x_prev( + batch_size=batch_size, + steps=steps, + x=window_latents, + e_t=e_t, + temperature=temperature, + noise_dropout=noise_dropout, + quantize_denoised=quantize_denoised, + window_start=window_start, + window_end=window_end, + device=device, + ).reshape(window_latents.shape) + + # Calculate cumulative drift + delta = window_latents_picard_iteration - window_latents + delta_cum = torch.cumsum(delta, dim=0) + block_latents_new = latents[window_start][None,] + delta_cum + + # Calculate the error + error = torch.linalg.norm( + (block_latents_new - latents[window_start + 1 : window_end + 1]).reshape( + window_size, batch_size, -1 + ), + dim=-1, + ).pow(2) + + # Calculate error magnitude + error_magnitude = error * inverse_variance_norm[window_start + 1 : window_end + 1] + # Pad so at least one value exceeds tolerance + error_magnitude = nn.functional.pad(error_magnitude, (0, 0, 0, 1), value=1e9) + error_exceeding = torch.max(error_magnitude > scaled_tolerance, dim=1).values.int() + + # Find how many diffusion steps have error below given threshold tolerance and shift the window + ind = torch.argmax(error_exceeding).item() + new_window_start = window_start + min(1 + ind, window_size) + new_window_end = min(new_window_start + window_size, steps) + + # Update the trajectory + latents[window_start + 1 : window_end + 1] = block_latents_new + latents[window_end : new_window_end + 1] = latents[window_end][ + None, + ] + + progress_bar.update(new_window_start - window_start) + window_start = new_window_start + window_end = new_window_end + + intermediates = {"x_inter": [latents[i] for i in range(steps)]} + return latents[-1], intermediates + + def _get_x_prev( + self, + batch_size: int, + steps: int, + x: torch.tensor, + e_t: torch.tensor, + temperature: float, + noise_dropout: float, + quantize_denoised: bool, + window_start: int, + window_end: int, + device: Any, + ): + alphas = self.ddim_alphas + alphas_prev = self.ddim_alphas_prev + sqrt_one_minus_alphas = self.ddim_sqrt_one_minus_alphas + sigmas = self.ddim_sigmas + window_size = window_end - window_start + + def prepare_tensor(x): + x = torch.tensor(x, device=device).flip(dims=[0]) + x = x.unsqueeze(1).repeat(1, batch_size).reshape(window_size, batch_size, 1, 1, 1) + return x + + # Select parameters corresponding to the currently considered timesteps. Note that index_end < index_start, + # because during diffusion the time is reversed (we go from timestep step to 0) + index_start = steps - window_start + index_end = steps - window_end + a_t = prepare_tensor(alphas[index_end:index_start]) + a_prev = prepare_tensor(alphas_prev[index_end:index_start]) + sigma_t = prepare_tensor(sigmas[index_end:index_start]) + sqrt_one_minus_at = prepare_tensor(sqrt_one_minus_alphas[index_end:index_start]) + + # Current prediction for x_0 + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + if quantize_denoised: + pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) + + # Direction pointing to x_t + dir_xt = (1.0 - a_prev - sigma_t ** 2).sqrt() * e_t + + noise = sigma_t * noise_like(x.shape, device) * temperature + if noise_dropout > 0.0: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + + x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise + return x_prev diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py new file mode 100644 index 000000000000..1602ec6245d4 --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/plms.py @@ -0,0 +1,105 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import torch + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler + + +class PLMSSampler(AbstractBaseSampler): + def __init__(self, model, schedule="linear", **kwargs): + super().__init__(model, sampler=Sampler.PLMS, schedule="linear", **kwargs) + + def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False): + if ddim_eta != 0: + raise ValueError('ddim_eta must be 0 for PLMS') + super().make_schedule(ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=False) + + @torch.no_grad() + def p_sampling_fn( + self, + x, + c, + t, + index, + repeat_noise=False, + use_original_steps=False, + quantize_denoised=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + old_eps=None, + t_next=None, + ): + b, *_, device = *x.shape, x.device + e_t, model_output = self._get_model_output( + x, t, unconditional_conditioning, unconditional_guidance_scale, score_corrector, c, corrector_kwargs + ) + if len(old_eps) == 0: + # Pseudo Improved Euler (2nd order) + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, + b, + index, + device, + x, + t, + model_output, + e_t, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ) + e_t_next, model_output = self._get_model_output( + x_prev, + t_next, + unconditional_conditioning, + unconditional_guidance_scale, + score_corrector, + c, + corrector_kwargs, + ) + e_t_prime = (e_t + e_t_next) / 2 + elif len(old_eps) == 1: + # 2nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (3 * e_t - old_eps[-1]) / 2 + elif len(old_eps) == 2: + # 3nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 + elif len(old_eps) >= 3: + # 4nd order Pseudo Linear Multistep (Adams-Bashforth) + e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 + + x_prev, pred_x0 = self._get_x_prev_and_pred_x0( + use_original_steps, + b, + index, + device, + x, + t, + model_output, + e_t_prime, + quantize_denoised, + repeat_noise, + temperature, + noise_dropout, + ) + + return x_prev, pred_x0, e_t diff --git a/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py new file mode 100644 index 000000000000..1da34e16508b --- /dev/null +++ b/nemo/collections/multimodal/models/stable_diffusion/samplers/sampler_dpm.py @@ -0,0 +1,77 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SAMPLING ONLY.""" + +import torch + +from nemo.collections.multimodal.models.stable_diffusion.samplers import Sampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.base_sampler import AbstractBaseSampler + +from .dpmsolver import DPMSolver, NoiseScheduleVP, model_wrapper + +MODEL_TYPES = {"eps": "noise", "v": "v"} + + +class DPMSolverSampler(AbstractBaseSampler): + def __init__(self, model, **kwargs): + + super().__init__(model, sampler=Sampler.DPM, **kwargs) + + def to_torch(x, model): + x_copy = x.clone() + x_detached = x_copy.detach() + x_float32 = x_detached.to(torch.float32) + x_device = x_float32.to(model.betas.device) + return x_device + + self.register_buffer("alphas_cumprod", to_torch(model.alphas_cumprod, model)) + + @torch.no_grad() + def p_sampling_fn(self): + pass + + @torch.no_grad() + def dpm_sampling_fn( + self, + shape, + steps, + conditioning=None, + unconditional_conditioning=None, + unconditional_guidance_scale=1.0, + x_T=None, + ): + + device = self.model.betas.device + if x_T is None: + img = torch.randn(shape, generator=self.model.rng, device=device) + else: + img = x_T + + ns = NoiseScheduleVP("discrete", alphas_cumprod=self.alphas_cumprod) + + model_fn = model_wrapper( + lambda x, t, c: self.model.apply_model(x, t, c), + ns, + model_type=MODEL_TYPES[self.model.parameterization], + guidance_type="classifier-free", + condition=conditioning, + unconditional_condition=unconditional_conditioning, + guidance_scale=unconditional_guidance_scale, + ) + dpm_solver = DPMSolver(model_fn, ns, predict_x0=True, thresholding=False) + x = dpm_solver.sample( + img, steps=steps, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True, + ) + + return x.to(device), None diff --git a/nemo/collections/multimodal/modules/__init__.py b/nemo/collections/multimodal/modules/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py new file mode 100644 index 000000000000..de301e0bc038 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.py @@ -0,0 +1,317 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math + +import numpy as np +import torch +import torch.nn as nn +from torch.cuda.amp import custom_bwd, custom_fwd + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += torch.DoubleTensor([matmul_ops]) + + +# Stable attention +class StableAttentionOp(torch.autograd.Function): + # This function defines the attention weight computation in a stable way + # The idea is to scale the gradients of weight matrix by the maximum absolute value. + # In case of overflow, this will prevent weight gradients from exploding. + # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. + + @staticmethod + def forward(ctx, q, k): + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) + ctx.save_for_backward(q, k, w) + return w + + @staticmethod + def backward(ctx, dw): + q, k, w = ctx.saved_tensors + + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + + # Due to softmax, w is fp32, making db fp32. + # Type casting is required for amp to work. + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) + s = s / math.sqrt(k.shape[1]) + + dq = torch.einsum('nck,nqk->ncq', k, db) * s + dk = torch.einsum('ncq,nqk->nck', q, db) * s + + return dq, dk + + +class QKVStableAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + + # Reshaping q and k + # try: + # q = q.view(bs * self.n_heads, ch, length) + # k = k.view(bs * self.n_heads, ch, length) + # except Exception: + q = q.reshape(bs * self.n_heads, ch, length) + k = k.reshape(bs * self.n_heads, ch, length) + + weight = StableAttentionOp.apply(q, k) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class StableMaskedAttentionOp(torch.autograd.Function): + # Robust attention operation in case of masked attention + @staticmethod + @custom_fwd + def forward(ctx, q, k, mask): + max_neg_value = -float('inf') + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) + w = w.masked_fill(mask, max_neg_value) + w = w.softmax(dim=2) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a stable number. + w = w.nan_to_num_() + ctx.save_for_backward(q, k, w, mask) + return w + + @staticmethod + @custom_bwd + def backward(ctx, dw): + q, k, w, mask = ctx.saved_tensors + max_neg_value = -torch.finfo(q.dtype).max + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) + + # Masking db + db_in = db.clone().masked_fill_(mask, 0) + + s = s / math.sqrt(k.shape[1]) + dq = torch.einsum('nck,nqk->ncq', k, db_in) * s + dk = torch.einsum('ncq,nqk->nck', q, db_in) * s + + # These are dummy derivatives since mask is a constant + dmask = (max_neg_value - w) * db.clone() * s + + return dq, dk, dmask + + +class QKVMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length_q), + (k * scale).view(bs * self.n_heads, ch, length_k), + ) # More stable with f16 than dividing afterwards + + # Duplicate mask n_heads times + mask = mask.repeat_interleave(self.n_heads, dim=0) + assert mask.shape == weight.shape + max_neg_value = -float('inf') + weight = weight.masked_fill(~mask, max_neg_value) + + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a non-nan number. + weight = weight.nan_to_num_() + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVStableMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + q = q.view(bs * self.n_heads, ch, length_q) + k = k.view(bs * self.n_heads, ch, length_k) + + # Forming attention mask + mask = mask.repeat_interleave(self.n_heads, dim=0) + + weight = StableMaskedAttentionOp.apply(q, k, ~mask) + + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class SelfAttentionPooling(nn.Module): + """ + Implementation of SelfAttentionPooling + Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition + https://arxiv.org/pdf/2008.01077v1.pdf + Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 + """ + + def __init__(self, input_dim): + super(SelfAttentionPooling, self).__init__() + self.W = nn.Linear(input_dim, 1) + + def forward(self, batch_rep): + """ + input: + batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension + + attention_weight: + att_w : size (N, T, 1) + + return: + utter_rep: size (N, H) + """ + softmax = nn.functional.softmax + att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) + utter_rep = torch.sum(batch_rep * att_w, dim=1) + + return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py new file mode 100644 index 000000000000..8927226c818e --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention_alt.py @@ -0,0 +1,321 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math + +import numpy as np +import torch +import torch.nn as nn +from torch.cuda.amp import custom_bwd, custom_fwd + +USE_ALT = False + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += torch.DoubleTensor([matmul_ops]) + + +# Stable attention +class StableAttentionOp(torch.autograd.Function): + # This function defines the attention weight computation in a stable way + # The idea is to scale the gradients of weight matrix by the maximum absolute value. + # In case of overflow, this will prevent weight gradients from exploding. + # In case of underflow, since we clipped the scale to 1e-4, this will prevent underflow. + + @staticmethod + def forward(ctx, q, k): + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])).softmax(dim=2) + ctx.save_for_backward(q, k, w) + return w + + @staticmethod + def backward(ctx, dw): + q, k, w = ctx.saved_tensors + + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + + # Due to softmax, w is fp32, making db fp32. + # Type casting is required for amp to work. + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype).to(q.dtype) + s = s / math.sqrt(k.shape[1]) + + dq = torch.einsum('nck,nqk->ncq', k, db) * s + dk = torch.einsum('ncq,nqk->nck', q, db) * s + + return dq, dk + + +class QKVStableAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + + # Reshaping q and k + # try: + # q = q.view(bs * self.n_heads, ch, length) + # k = k.view(bs * self.n_heads, ch, length) + # except Exception: + q = q.reshape(bs * self.n_heads, ch, length) + k = k.reshape(bs * self.n_heads, ch, length) + + weight = StableAttentionOp.apply(q, k) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class StableMaskedAttentionOp(torch.autograd.Function): + # Robust attention operation in case of masked attention + @staticmethod + @custom_fwd + def forward(ctx, q, k, mask): + max_neg_value = -float('inf') + w = torch.einsum('ncq,nck->nqk', q, k / math.sqrt(k.shape[1])) + w = w.masked_fill(mask, max_neg_value) + w = w.softmax(dim=2) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a stable number. + # w = w.nan_to_num_() + ctx.save_for_backward(q, k, w, mask) + return w + + @staticmethod + @custom_bwd + def backward(ctx, dw): + q, k, w, mask = ctx.saved_tensors + max_neg_value = -torch.finfo(q.dtype).max + s = dw.detach().norm(float('inf'), dim=[1, 2], keepdim=True).clip(min=1e-4) + dw = dw / s + db = torch._softmax_backward_data(grad_output=dw, output=w, dim=2, input_dtype=dw.dtype) + + # Masking db + db_in = db.clone().masked_fill_(mask, 0) + + s = s / math.sqrt(k.shape[1]) + dq = torch.einsum('nck,nqk->ncq', k, db_in) * s + dk = torch.einsum('ncq,nqk->nck', q, db_in) * s + + # These are dummy derivatives since mask is a constant + dmask = (max_neg_value - w) * db.clone() * s + + return dq, dk, dmask + + +class QKVMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = torch.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length_q), + (k * scale).view(bs * self.n_heads, ch, length_k), + ) # More stable with f16 than dividing afterwards + + # Duplicate mask n_heads times + # mask = mask.repeat_interleave(self.n_heads, dim=0) + mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) + assert mask.shape == weight.shape + max_neg_value = -float('inf') + weight = weight.masked_fill(~mask, max_neg_value) + + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # When we use an arbitrary mask, there is a possibility that we get nans in softmax. + # In this case, use nan_to_num to make it a non-nan number. + # weight = weight.nan_to_num_() + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVStableMaskedAttention(nn.Module): + """ + A module which performs QKV attention. + Attention mask is accepted as input. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, q, k, v, mask): + r""" + Apply QKV attention with attention mask. + + Args: + q: an [N x d x n_seq1] of queries. + k: an [N x d x n_seq2] of keys. + v: an [N x d x n_seq2] of values. + mask: Attention mask of size N x n_seq1 x n_seq2 + + Returns: an [N x d x n_seq1] tensor after attention. + """ + + bs, width, length_q = q.shape + _, _, length_k = k.shape + + assert width % self.n_heads == 0 + ch = width // self.n_heads + + q = q.view(bs * self.n_heads, ch, length_q) + k = k.view(bs * self.n_heads, ch, length_k) + + # Forming attention mask + # mask = mask.repeat_interleave(self.n_heads, dim=0) + mask = mask.unsqueeze(0).repeat(self.n_heads, 1, 1, 1).transpose(0, 1).flatten(0, 1) + + weight = StableMaskedAttentionOp.apply(q, k, ~mask) + + a = torch.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length_k)) + # We also return weight here for attention visualization. + return a.reshape(bs, -1, length_q), weight + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class SelfAttentionPooling(nn.Module): + """ + Implementation of SelfAttentionPooling + Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition + https://arxiv.org/pdf/2008.01077v1.pdf + Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362 + """ + + def __init__(self, input_dim): + super(SelfAttentionPooling, self).__init__() + self.W = nn.Linear(input_dim, 1) + + def forward(self, batch_rep): + """ + input: + batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension + + attention_weight: + att_w : size (N, T, 1) + + return: + utter_rep: size (N, H) + """ + softmax = nn.functional.softmax + att_w = softmax(self.W(batch_rep).squeeze(-1), dim=1).unsqueeze(-1) + utter_rep = torch.sum(batch_rep * att_w, dim=1) + + return utter_rep diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py new file mode 100644 index 000000000000..1d6b8395a58f --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py @@ -0,0 +1,906 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +""" +import math +from abc import abstractmethod + +import torch as th +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from einops import rearrange + +from nemo.collections.multimodal.modules.imagen.diffusionmodules import attention_alt + +if attention_alt.USE_ALT: + from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention_alt import ( + QKVAttention, + QKVMaskedAttention, + QKVStableAttention, + QKVStableMaskedAttention, + ) +else: + from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import ( + QKVAttention, + QKVMaskedAttention, + QKVStableAttention, + QKVStableMaskedAttention, + ) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import ( + Downsample, + Upsample, + UpsampleLearnable, + conv_nd, + linear, + normalization, + zero_module, +) + + +def check_cuda(): + if not th.cuda.is_available(): + raise RuntimeError('CUDA is not available') + cur_device = th.cuda.current_device() + dprops = th.cuda.get_device_properties(cur_device) + + is_sm75 = dprops.major == 7 and dprops.minor == 5 + is_sm8x = dprops.major == 8 and dprops.minor >= 0 + is_sm90 = dprops.major == 9 and dprops.minor >= 0 + + return is_sm8x or is_sm75 or is_sm90 + + +try: + from flash_attn import flash_attn_varlen_func, flash_attn_varlen_kvpacked_func + + flash_attn_installed = check_cuda() +except ImportError: + flash_attn_installed = False + + +class TextConditionedBlock(nn.Module): + r""" + Any module where forward() takes text embeddings as arguments. + """ + + @abstractmethod + def forward(self, x, text_emb, text_mask): + """ + Apply the module to `x` given `text_emb` text embedding and 'text_mask' text valid mask. + """ + + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class ConditionalSequential(nn.Sequential, TimestepBlock, TextConditionedBlock): + r""" + A sequential module that accepts timestep embeddings, text embedding and text mask in addition to the input x. + Depending on the type of block, we either pass timestep embedding or text embeddings as inputs. + """ + + def forward(self, x, emb, text_emb, text_mask): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, TextConditionedBlock): + x = layer(x, text_emb, text_mask) + else: + x = layer(x) + return x + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + learnable_upsampling=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + if learnable_upsampling: + upsample_fn = UpsampleLearnable + else: + upsample_fn = Upsample + + if up: + self.h_upd = upsample_fn(channels, False, dims) + self.x_upd = upsample_fn(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + nn.SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, emb) + else: + return self._forward(x, emb) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class EfficientResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + Follow Figure A.27 in Imagen Paper. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__() + + out_channels = out_channels or channels + + self.use_scale_shift_norm = use_scale_shift_norm + self.use_checkpoint = use_checkpoint + + self.in_layers = nn.Sequential( + normalization(channels), nn.SiLU(), conv_nd(dims, channels, out_channels, 3, padding=1) + ) + + self.emb_layers = nn.Sequential( + nn.SiLU(), nn.Linear(emb_channels, 2 * out_channels if use_scale_shift_norm else out_channels,), + ) + + self.out_layers = nn.Sequential( + normalization(out_channels), + nn.SiLU(), + zero_module(conv_nd(dims, out_channels, out_channels, 3, padding=1)), + ) + + self.shortcut = conv_nd(dims, channels, out_channels, 1) + self.shortcut_scale = 1 / math.sqrt(2) if skip_connection_scaling else 1 + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, emb) + else: + return self._forward(x, emb) + + def _forward(self, x, emb): + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + + return h + self.shortcut(x) * self.shortcut_scale + + +class Block(nn.Module): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__() + + out_channels = out_channels or channels + + self.attention_type = attention_type + self.text_embed_dim = text_embed_dim + + blocks = [ + EfficientResBlock( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + ] + + blocks += [ + EfficientResBlock( + out_channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + for _ in range(num_resblocks - 1) + ] + + self.blocks = nn.ModuleList(blocks) + + # Attention blocks + # Self - Self-attention blocks + # fused - Single attention layer for fusing self and cross attention. + if self.attention_type is not None: + assert self.attention_type in ('self', 'cross', 'fused', 'stacked') + attention_kwargs = dict() + + if self.attention_type == 'self': + attention_fn = SelfAttentionBlock + elif self.attention_type == 'cross': + attention_fn = CrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + elif self.attention_type == 'stacked': + attention_fn = StackedCrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + else: + attention_fn = FusedCrossAttentionBlock + attention_kwargs['context_dim'] = self.text_embed_dim + + self.attention_layer = attention_fn( + out_channels, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + **attention_kwargs, + ) + + @abstractmethod + def forward(self, x, emb, text_embed=None, text_mask=None): + pass + + +class DBlock(Block): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + conv_down=True, + stride=2, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + num_resblocks=num_resblocks, + attention_type=attention_type, + text_embed_dim=text_embed_dim, + stable_attention=stable_attention, + flash_attention=flash_attention, + num_head_channels=num_head_channels, + num_heads=num_heads, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + + self.conv_down = conv_down + if self.conv_down: + # self.conv = nn.Conv2d(channels, channels, 3, stride=stride, padding=1) + self.conv = nn.Conv2d(channels, channels, 4, stride=stride, padding=1) + + def forward(self, x, emb, text_embed=None, text_mask=None): + if self.conv_down: + x = self.conv(x) + + for block in self.blocks: + x = block(x, emb) + + if self.attention_type in ('cross', 'fused', 'stacked'): + x = self.attention_layer(x, text_embed, text_mask) + elif self.attention_type == 'self': + x = self.attention_layer(x) + + return x + + +class UBlock(Block): + def __init__( + self, + channels, + emb_channels, + out_channels=None, + use_scale_shift_norm=True, + conv_up=True, + stride=2, + num_resblocks=2, + attention_type=None, + text_embed_dim=0, + stable_attention=True, + flash_attention=False, + num_head_channels=-1, + num_heads=8, + dims=2, + use_checkpoint=False, + skip_connection_scaling=False, + ): + super().__init__( + channels, + emb_channels, + out_channels=out_channels, + use_scale_shift_norm=use_scale_shift_norm, + num_resblocks=num_resblocks, + attention_type=attention_type, + text_embed_dim=text_embed_dim, + stable_attention=stable_attention, + flash_attention=flash_attention, + num_head_channels=num_head_channels, + num_heads=num_heads, + dims=dims, + use_checkpoint=use_checkpoint, + skip_connection_scaling=skip_connection_scaling, + ) + + self.conv_up = conv_up + if self.conv_up: + self.conv = nn.ConvTranspose2d(out_channels, out_channels, 4, stride, 1) + + def forward(self, x, emb, text_embed=None, text_mask=None): + for block in self.blocks: + x = block(x, emb) + + if self.attention_type in ('cross', 'fused', 'stacked'): + x = self.attention_layer(x, text_embed, text_mask) + elif self.attention_type == 'self': + x = self.attention_layer(x) + + if self.conv_up: + x = self.conv(x) + + return x + + +class FusedCrossAttentionBlock(TextConditionedBlock): + """ + An attention block that fuses self-attention and cross-attention + in a single block. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.flash_attention = flash_attention + self.norm = normalization(channels) + self.norm_context = normalization(context_dim) + self.norm_self = normalization(channels) + + # For image features + self.q = conv_nd(1, channels, channels, 1) + + # For context + self.kv_context = conv_nd(1, context_dim, channels * 2, 1) + + # For spatial + self.kv_self = conv_nd(1, channels, channels * 2, 1) + + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + + elif stable_attention: + self.attention = QKVStableMaskedAttention(self.num_heads) + else: + self.attention = QKVMaskedAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + q = self.q(self.norm(x)) + + # Key-value pairs for self-attention + kv_self = self.kv_self(self.norm_self(x)) + k_self, v_self = kv_self.chunk(2, dim=1) + k_self = k_self.contiguous() + v_self = v_self.contiguous() + + # Key-value pairs for cross-attention + context = th.permute(context, (0, 2, 1)) + context_n = self.norm_context(context) + kv_context = self.kv_context(context_n) + k_context, v_context = kv_context.chunk(2, dim=1) + k_context = k_context.contiguous() + v_context = v_context.contiguous() + + # Appending key-value pairs + k_full = th.cat([k_self, k_context], dim=2) + v_full = th.cat([v_self, v_context], dim=2) + + if self.flash_attention: + # q: b (h d) s, k_context: b (h d) s + batch_size = q.shape[0] + max_seqlen_q, max_seqlen_k = q.shape[2], q.shape[2] + k_context.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + + mask_self = th.ones((batch_size, max_seqlen_q), device=q.device, dtype=th.bool) + mask_context = mask.bool() + mask_full = th.cat([mask_self, mask_context], dim=1) + + k_full_unpadded = k_full.transpose(1, 2)[mask_full] + total_k = k_full_unpadded.shape[0] + k_full_unpadded = k_full_unpadded.view(total_k, self.num_heads, -1) + + v_full_unpadded = v_full.transpose(1, 2)[mask_full] + v_full_unpadded = v_full_unpadded.view(total_k, self.num_heads, -1) + + # (b s) t h d + kv_full_unpadded = th.stack([k_full_unpadded, v_full_unpadded], dim=1) + + cu_seqlens_q = th.arange( + 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device + ) + cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=k_full.device) + cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) + cu_seqlens_k += cu_seqlens_q + + out = flash_attn_varlen_kvpacked_func( + q, kv_full_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 + ) + h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) + else: + # Computing mask for self attention + mask_self = th.ones(k_self.shape[0], q.shape[2], k_self.shape[2], device=mask.device) + + # Mask for cross attention + mask_context = mask.view(mask.shape[0], 1, mask.shape[1]) + mask_context = mask_context.repeat(1, q.shape[2], 1) + + # Fused mask + mask_full = th.cat([mask_self, mask_context], dim=2) + mask_full = mask_full.to(th.bool) + + h, _ = self.attention(q, k_full, v_full, mask_full) + + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +class SelfAttentionBlock(nn.Module): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=False, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + self.flash_attention = flash_attention + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + elif stable_attention: + self.attention = QKVStableAttention(self.num_heads) + else: + self.attention = QKVAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x) + else: + return self._forward(x) + + def _forward(self, x): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + qkv = self.qkv(self.norm(x)) + + if self.flash_attention: + # qkv shape: (b, (3 h d) s), need to reshape to (b, s, h, d) for each q, k, v + b, _, _ = qkv.shape + h = self.num_heads + q, k, v = qkv.chunk(3, dim=1) + max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + k = rearrange(k, 'b (h d) s -> (b s) h d', h=self.num_heads) + v = rearrange(v, 'b (h d) s -> (b s) h d', h=self.num_heads) + cu_seqlens_q = th.arange(0, (b + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device) + cu_seqlens_k = th.arange(0, (b + 1) * max_seqlen_k, step=max_seqlen_k, dtype=th.int32, device=k.device) + h = flash_attn_varlen_func(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0) + h = rearrange(h, '(b s) h d -> b (h d) s', b=b, h=self.num_heads) + else: + h, _ = self.attention(qkv) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +######################################################################### +# These are the attention blocks as implemented by Stable Diffusion +# https://github.com/CompVis/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/ldm/modules/attention.py#L196 + + +class CrossAttentionBlock(TextConditionedBlock): + """ + An attention block that allows spatial positions to attend to context. + In our case, context is the token-wise text embeddings. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.norm_context = normalization(context_dim) + self.flash_attention = flash_attention + # For image features + self.q = conv_nd(1, channels, channels, 1) + + # For context + self.kv = conv_nd(1, context_dim, channels * 2, 1) + + if flash_attention: + assert flash_attn_installed, "FlashAttention is not installed." + assert not stable_attention, "FlashAttention doesn't support the stable form." + elif stable_attention: + self.attention = QKVStableMaskedAttention(self.num_heads) + else: + self.attention = QKVMaskedAttention(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + q = self.q(self.norm(x)) + context = th.permute(context, (0, 2, 1)) + context_n = self.norm_context(context) + kv = self.kv(context_n) + k, v = kv.chunk(2, dim=1) + k = k.contiguous() + v = v.contiguous() + + if self.flash_attention: + batch_size = q.shape[0] + max_seqlen_q, max_seqlen_k = q.shape[2], k.shape[2] + q = rearrange(q, 'b (h d) s -> (b s) h d', h=self.num_heads) + mask = mask.to(th.bool) + k_unpadded = k.transpose(1, 2)[mask] + total_k = k_unpadded.shape[0] + k_unpadded = k_unpadded.view(total_k, self.num_heads, -1) + v_unpadded = v.transpose(1, 2)[mask] + v_unpadded = v_unpadded.view(total_k, self.num_heads, -1) + kv_unpadded = th.stack([k_unpadded, v_unpadded], dim=1) + cu_seqlens_q = th.arange( + 0, (batch_size + 1) * max_seqlen_q, step=max_seqlen_q, dtype=th.int32, device=q.device + ) + cu_seqlens_k = th.zeros((batch_size + 1), dtype=th.int32, device=q.device) + cu_seqlens_k[1:] = th.cumsum(mask.sum(dim=1), dim=0) + + out = flash_attn_varlen_kvpacked_func( + q, kv_unpadded, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, 0.0 + ) + h = rearrange(out, '(b s) h d -> b (h d) s', b=batch_size, h=self.num_heads) + else: + # Computing mask for cross attention + mask = mask.view(mask.shape[0], 1, mask.shape[1]) + mask = mask.repeat(1, q.shape[-1], 1) + mask = mask.to(th.bool) + + h, _ = self.attention(q, k, v, mask) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.norm = normalization(dim) + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim)) + + def forward(self, x): + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + + h = self.norm(x) + + # Reshape so that the channel dim moves to last + # Linear function operates on the last dimension + h = th.permute(h, (0, 2, 1)) + + h = self.net(h) + + # Permute it back + h = th.permute(h, (0, 2, 1)) + + return (x + h).reshape(b, c, *spatial) + + +class StackedCrossAttentionBlock(TextConditionedBlock): + """ + An attention block that stacks self-attention and cross-attention layers + in a single block. + """ + + def __init__( + self, + channels, + context_dim, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + stable_attention=True, + flash_attention=False, + ): + super().__init__() + self.proj_in = conv_nd(2, channels, channels, 1) + self.norm = normalization(channels) + self.use_checkpoint = use_checkpoint + + self.self_attention_block = SelfAttentionBlock( + channels=channels, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + ) + + self.cross_attention_block = CrossAttentionBlock( + channels=channels, + context_dim=context_dim, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_checkpoint=use_checkpoint, + stable_attention=stable_attention, + flash_attention=flash_attention, + ) + + self.ff = FeedForward(dim=channels, glu=True) + self.proj_out = zero_module(conv_nd(2, channels, channels, 1)) + + def forward(self, x, context, mask): + if self.use_checkpoint: + return checkpoint.checkpoint(self._forward, x, context, mask) + else: + return self._forward(x, context, mask) + + def _forward(self, x, context, mask): + + h = self.norm(x) + h = self.proj_in(h) + + h = self.self_attention_block(h) + h = self.cross_attention_block(h, context, mask) + h = self.ff(h) + + h = self.proj_out(h) + return h + x diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py new file mode 100644 index 000000000000..6d5f50023166 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/embs.py @@ -0,0 +1,69 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import torch +import torch.nn as nn +from einops import rearrange + + +class LearnedSinusoidalPosEmb(nn.Module): + """ following @crowsonkb 's lead with learned sinusoidal pos emb """ + + """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ + + def __init__(self, dim): + super().__init__() + assert (dim % 2) == 0 + half_dim = dim // 2 + self.weights = nn.Parameter(torch.randn(half_dim)) + + def forward(self, x): + x = rearrange(x, 'b -> b 1') + freqs = x * rearrange(self.weights, 'd -> 1 d') * 2 * math.pi + fouriered = torch.cat((freqs.sin(), freqs.cos()), dim=-1) + fouriered = torch.cat((x, fouriered), dim=-1) + return fouriered + + +class UnLearnedSinusoidalPosEmb(nn.Module): + def __init__(self, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + super().__init__() + self.dim = dim + self.max_period = max_period + print(f'Unlearned Timestep Embedding Schedule: dim={dim}, max_period={max_period}') + + def forward(self, timesteps): + dim = self.dim + half = dim // 2 + max_period = self.max_period + dtype = timesteps.dtype + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to( + device=timesteps.device + ) + args = timesteps[:, None].float() * freqs[None] + freqs = freqs.to(dtype=dtype) + args = args.to(dtype=dtype) + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py new file mode 100644 index 000000000000..a5cb19444057 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py @@ -0,0 +1,251 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright (c) 2021 OpenAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Brought from: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py + +Various utilities for neural networks. +""" + +import math + +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +try: + from group_norm import GroupNormOpt + + OPT_GROUP_NORM = True +except Exception: + print('Fused optimized group norm has not been installed.') + OPT_GROUP_NORM = False + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def normalization(channels, act=""): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + if OPT_GROUP_NORM: + return GroupNormOpt(32, channels, act=act) + + return nn.GroupNorm(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000, dtype=th.float32): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = th.exp(-math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half).to( + device=timesteps.device + ) + args = timesteps[:, None].float() * freqs[None] + freqs = freqs.to(dtype=dtype) + args = args.to(dtype=dtype) + embedding = th.cat([th.cos(args), th.sin(args)], dim=-1) + if dim % 2: + embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + +# Native ADM nearest neighbor upsampling +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class UpsampleLearnable(nn.Module): + """ + Upsampling based on ConvTranspose2d. This is needed for bfloat support. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + + if self.dims == 2: + self.conv = nn.ConvTranspose2d(self.channels, self.out_channels, 4, 2, 1) + elif self.dims == 3: + self.conv = nn.ConvTranspose3d( + self.channels, self.out_channels, kernel_size=(1, 4, 4), stride=(1, 2, 2), padding=(0, 1, 1) + ) + else: + raise ValueError('Upsampling support only for 2D and 3D') + + def forward(self, x): + assert x.shape[1] == self.channels + x = self.conv(x) + return x + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py new file mode 100644 index 000000000000..0ce1a46a5884 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/nets.py @@ -0,0 +1,642 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.imagen.diffusionmodules.attention import SelfAttentionPooling +from nemo.collections.multimodal.modules.imagen.diffusionmodules.blocks import ( + ConditionalSequential, + DBlock, + FusedCrossAttentionBlock, + ResBlock, + StackedCrossAttentionBlock, + UBlock, +) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.embs import ( + LearnedSinusoidalPosEmb, + UnLearnedSinusoidalPosEmb, +) +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import Downsample +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import UpsampleLearnable as Upsample +from nemo.collections.multimodal.modules.imagen.diffusionmodules.layers import linear, normalization, zero_module + + +class UNetModel(nn.Module): + def __init__( + self, + embed_dim, # Dimension of embeddings. Also used to calculate the number of channels in ResBlock + image_size, # Input image size. Used to calculate where to inject attention layers in UNet + channels=3, # Input channel number + text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values + num_res_blocks=3, # Number of ResBlock in each level of UNet + channel_mult=[1, 2, 3, 4], # Used with embed_dim to calculate the number of channels for each level of UNet + num_attn_heads=4, # The number of heads in the attention layer + per_head_channels=64, # The number of channels per attention head + cond_dim=512, # Dimension of Conditioning projections + attention_type='fused', # Type of attention layer + feature_pooling_type='attention', # Type of pooling + learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. + attention_resolutions=[8, 16, 32], # List of resolutions to inject attention layers + dropout=False, # The rate of dropout + use_null_token=False, # Whether to create a learned null token for attention + init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 + gradient_checkpointing=False, # Whether to use gradient checkpointing + scale_shift_norm=True, # Whether to use scale shift norm + stable_attention=True, # Whether to use numerically-stable attention calculation + flash_attention=False, # Whether to use flash attention calculation + resblock_updown=False, # Whether to use ResBlock or Downsample/Upsample + resample_with_conv=True, # When resblock_updown=False, whether to use conv in addition to Pooling&ConvTranspose + low_res_cond=False, + noise_cond_aug=False, + ): + super().__init__() + + # Attention Class + if attention_type == 'stacked': + attention_fn = StackedCrossAttentionBlock + elif attention_type == 'fused': + attention_fn = FusedCrossAttentionBlock + else: + raise ValueError('Attention {} not defined'.format(attention_type)) + + # Time embedding for log(snr) noise from continous version + time_embed_dim = embed_dim * 4 + assert learned_sinu_pos_emb_dim >= 0 + if learned_sinu_pos_emb_dim > 0: + sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 + self.time_embed = nn.Sequential( + sinu_pos_emb, + nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + else: + # Unlearned Time Embedding + sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + self.time_embed = nn.Sequential( + sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) + ) + + # Pooling + assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' + self.feature_pooling_type = feature_pooling_type + if feature_pooling_type == 'attention': + self.attention_pooling = nn.Sequential( + SelfAttentionPooling(input_dim=text_embed_dim), + nn.LayerNorm(text_embed_dim), + nn.Linear(text_embed_dim, cond_dim), + ) + + # Context Projections + self.text_to_cond = linear(text_embed_dim, cond_dim) + self.to_text_non_attn_cond = nn.Sequential( + nn.LayerNorm(cond_dim), + nn.Linear(cond_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + + # Register for Null Token + if use_null_token: + self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) + self.use_null_token = use_null_token + + # Converting attention resolutions to downsampling factor + attention_ds = [] + attention_resolutions = sorted(attention_resolutions) + self.image_size = image_size + for res in attention_resolutions: + attention_ds.append(image_size // int(res)) + + self.low_res_cond = low_res_cond + # Low res noise conditioning augmentation + self.noise_cond_aug = noise_cond_aug + if self.noise_cond_aug: + assert ( + self.low_res_cond + ), 'noise conditioning augmentation should only be enabled when training with low-res cond' + if learned_sinu_pos_emb_dim > 0: + lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 + else: + lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + lowres_sinu_pos_emb_dim = embed_dim + self.lowres_time_embed = nn.Sequential( + lowres_sinu_pos_emb, + nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + + # Initial Convolution + in_channels = 2 * channels if low_res_cond else channels + init_dim = embed_dim * channel_mult[0] + self.init_conv = ConditionalSequential( + nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) + ) + + if isinstance(num_res_blocks, int): + res_blocks_list = [num_res_blocks] * len(channel_mult) + else: + res_blocks_list = num_res_blocks + # UNet Init + # Downsampling Layers + # We use Conv2D for UNet + CONV_DIM = 2 + ch = init_dim + ds = 1 + self.input_blocks = nn.ModuleList([self.init_conv]) + num_input_block_channels = [ch] + for level, mult in enumerate(channel_mult): + num_res_blocks = res_blocks_list[level] + for _ in range(num_res_blocks): + out_channels = mult * embed_dim + layers = [ + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ) + ] + ch = out_channels + if ds in attention_ds: + layers.append( + attention_fn( + channels=ch, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ) + ) + self.input_blocks.append(ConditionalSequential(*layers)) + num_input_block_channels.append(ch) + is_last_level = level == len(channel_mult) - 1 + if not is_last_level: + # DownSampling + self.input_blocks.append( + ConditionalSequential( + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=ch, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + down=True, + learnable_upsampling=True, + ) + if resblock_updown + else Downsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch,) + ) + ) + num_input_block_channels.append(ch) + ds *= 2 + + # Middle Layers + self.middle_block = ConditionalSequential( + # Mid Block 1 + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ), + # Attention Layer + attention_fn( + channels=ch, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ), + # Mid Block 2 + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ), + ) + + # Upsampling Layers + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + num_res_blocks = res_blocks_list[level] + for i in range(num_res_blocks + 1): + ich = num_input_block_channels.pop() + out_channels = embed_dim * mult + layers = [ + ResBlock( + channels=ch + ich, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + learnable_upsampling=True, + ) + ] + ch = out_channels + + if ds in attention_ds: + layers.append( + attention_fn( + channels=ch, + num_heads=-1, # TODO + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + context_dim=cond_dim, + ) + ) + is_last_block = i == num_res_blocks + if level and is_last_block: + layers.append( + ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=ch, + dims=CONV_DIM, + use_checkpoint=gradient_checkpointing, + use_scale_shift_norm=scale_shift_norm, + up=True, + learnable_upsampling=True, + ) + if resblock_updown + else Upsample(channels=ch, use_conv=resample_with_conv, dims=CONV_DIM, out_channels=ch) + ) + ds //= 2 + self.output_blocks.append(ConditionalSequential(*layers)) + + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + zero_module(nn.Conv2d(init_dim, channels, init_conv_kernel_size, padding=init_conv_kernel_size // 2)), + ) + + def forward( + self, x, time, text_embed=None, text_mask=None, x_low_res=None, time_low_res=None, + ): + if self.low_res_cond: + assert x_low_res is not None, 'x_low_res cannot be None' + else: + assert x_low_res is None, 'x_low_res cannot be presented' + if self.noise_cond_aug: + assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' + else: + assert time_low_res is None, 'time_low_res cannot be presented' + # Concatenating low resolution images + if x_low_res is not None: + if x_low_res.shape != x.shape: + # Upscale if not done in the trainer + _, _, new_height, new_width = x.shape + x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") + x = torch.cat([x, x_low_res], dim=1) + batch_size, device = x.shape[0], x.device + + if x.dtype != time.dtype or time.dtype != text_embed.dtype: + dtype = text_embed.dtype + x = x.to(dtype=dtype) + time = time.to(dtype=dtype) + if x_low_res is not None: + x_low_res = x_low_res.to(dtype=dtype) + if time_low_res is not None: + time_low_res = time_low_res.to(dtype=dtype) + # Time Conditioning + t = self.time_embed(time) + # Add lowres time conditioning + if self.noise_cond_aug: + lowres_t = self.lowres_time_embed(time_low_res) + t += lowres_t + # Text Conditioning + text_cond = self.text_to_cond(text_embed) + + # Context Embedding + # TODO We may want to concat time token here + if self.use_null_token: + # Null Context (Helpful when text_embed is drop) + null_context = self.null_text_embedding.repeat(batch_size, 1, 1) + context_emb = torch.cat([text_cond, null_context], dim=1) + context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) + else: + context_emb = text_cond + context_mask = text_mask + + # Add pooled text embeddings to the diffusion timestep + # TODO We may only want to calculated the pooled feature based on text token length + if self.feature_pooling_type == 'mean': + pooled_text_cond = text_cond.mean(dim=-2) + elif self.feature_pooling_type == 'attention': + pooled_text_cond = self.attention_pooling(text_embed) + text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) + t += text_hiddens + + h = x + hs = [] + # UNet Forward + for module in self.input_blocks: + h = module(h, t, context_emb, context_mask) + hs.append(h) + h = self.middle_block(h, t, context_emb, context_mask) + for module in self.output_blocks: + h_prev = hs.pop() + h = torch.cat([h, h_prev], dim=1) + h = module(h, t, context_emb, context_mask) + return self.out(h) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + +class EfficientUNetModel(nn.Module): + def __init__( + self, + embed_dim, + image_size, + channels=3, + text_embed_dim=512, # Dimension of conditioned text embedding. Different text encoders and different model versions have different values + channel_mult=[ + 1, + 1, + 2, + 4, + 8, + ], # Used with embed_dim to calculate the number of channels for each level of Efficient-UNet + num_attn_heads=8, # The number of heads in the attention layer + per_head_channels=64, # The number of channels per attention head + attention_type='fused', # Type of attention layer + atnn_enabled_at=[0, 0, 0, 0, 1], # Whether to enable attention at each level + feature_pooling_type='attention', # Type of pooling + stride=2, # Stride in ResBlock + num_resblocks=[ + 1, + 2, + 4, + 8, + 8, + ], # Used with num_res_blocks to calculate the number of residual blocks at each level of Efficient-UNet + learned_sinu_pos_emb_dim=16, # Dimension of learned time positional embedding. 0 for unlearned timestep embeddings. + use_null_token=False, # Whether to create a learned null token for attention + init_conv_kernel_size=3, # Initial Conv kernel size. imagen_pytorch uses 7 + gradient_checkpointing=False, # Whether to use gradient checkpointing + scale_shift_norm=True, # Whether to use scale shift norm + stable_attention=True, # Whether to use numerically-stable attention calculation + flash_attention=False, # Whether to use flash attention calculation + skip_connection_scaling=False, # Whether to use 1/sqrt(2) scaling for ResBlock skip connection + noise_cond_aug=False, + ): + super().__init__() + + self.n_levels = len(channel_mult) + self.image_size = image_size + # Time embedding for log(snr) noise from continous version + time_embed_dim = embed_dim * 4 + assert learned_sinu_pos_emb_dim >= 0 + if learned_sinu_pos_emb_dim > 0: + sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + sinu_pos_emb_input_dim = learned_sinu_pos_emb_dim + 1 + self.time_embed = nn.Sequential( + sinu_pos_emb, + nn.Linear(sinu_pos_emb_input_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + else: + # Unlearned Time Embedding + sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + self.time_embed = nn.Sequential( + sinu_pos_emb, linear(embed_dim, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim) + ) + + self.noise_cond_aug = noise_cond_aug + if self.noise_cond_aug: + if learned_sinu_pos_emb_dim > 0: + lowres_sinu_pos_emb = LearnedSinusoidalPosEmb(learned_sinu_pos_emb_dim) + lowres_sinu_pos_emb_dim = learned_sinu_pos_emb_dim + 1 + else: + lowres_sinu_pos_emb = UnLearnedSinusoidalPosEmb(embed_dim) + lowres_sinu_pos_emb_dim = embed_dim + self.lowres_time_embed = nn.Sequential( + lowres_sinu_pos_emb, + nn.Linear(lowres_sinu_pos_emb_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + cond_dim = text_embed_dim # time_embed_dim + # Pooling + assert feature_pooling_type == 'attention' or feature_pooling_type == 'mean' + self.feature_pooling_type = feature_pooling_type + if feature_pooling_type == 'attention': + self.attention_pooling = nn.Sequential( + SelfAttentionPooling(input_dim=text_embed_dim), + nn.LayerNorm(text_embed_dim), + nn.Linear(text_embed_dim, cond_dim), + ) + + # Context Projections + self.text_to_cond = linear(text_embed_dim, cond_dim) + self.to_text_non_attn_cond = nn.Sequential( + nn.LayerNorm(cond_dim), + nn.Linear(cond_dim, time_embed_dim), + nn.SiLU(), + nn.Linear(time_embed_dim, time_embed_dim), + ) + # Register for Null Token + if use_null_token: + self.null_text_embedding = nn.Parameter(torch.randn(1, 1, cond_dim, dtype=self.text_to_cond.weight.dtype)) + self.use_null_token = use_null_token + + # Initial Convolution + # Multiply in_channels by 2 because we concatenate with low res inputs. + in_channels = channels * 2 + init_dim = embed_dim * channel_mult[0] + self.init_conv = nn.Conv2d(in_channels, init_dim, init_conv_kernel_size, padding=init_conv_kernel_size // 2) + # Efficient-UNet Init + self.DBlocks = nn.ModuleDict() + self.UBlocks = nn.ModuleDict() + ch = init_dim + for level, mult in enumerate(channel_mult): + # Different level has different num of res blocks + num_resblock = num_resblocks[level] + # Only perform upsample/downsample if it is not the last (deepest) level + is_last_level = level == len(channel_mult) - 1 + level_attention_type = attention_type if atnn_enabled_at[level] else None + + level_key = str(level) # TODO Change to more meaningful naming + self.DBlocks[level_key] = DBlock( + channels=ch, + emb_channels=time_embed_dim, + out_channels=int(mult * embed_dim), + use_scale_shift_norm=scale_shift_norm, + conv_down=not is_last_level, + stride=stride, + num_resblocks=num_resblock, + attention_type=level_attention_type, + text_embed_dim=cond_dim, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + skip_connection_scaling=skip_connection_scaling, + ) + self.UBlocks[level_key] = UBlock( + channels=int(mult * embed_dim), + emb_channels=time_embed_dim, + out_channels=ch, + use_scale_shift_norm=scale_shift_norm, + conv_up=not is_last_level, + stride=stride, + num_resblocks=num_resblock, + attention_type=level_attention_type, + text_embed_dim=cond_dim, + num_heads=num_attn_heads, + num_head_channels=per_head_channels, + use_checkpoint=gradient_checkpointing, + stable_attention=stable_attention, + flash_attention=flash_attention, + skip_connection_scaling=skip_connection_scaling, + ) + ch = int(mult * embed_dim) + self.out = nn.Conv2d(channel_mult[0] * embed_dim, channels, 1) + + def forward( + self, x, time, text_embed, text_mask, x_low_res, time_low_res=None, + ): + if self.noise_cond_aug: + assert time_low_res is not None, 'time_low_res cannot be None when training with noise conditioning aug' + else: + assert time_low_res is None, 'time_low_res cannot be presented' + + if x.dtype != time.dtype or time.dtype != text_embed.dtype: + dtype = text_embed.dtype + x = x.to(dtype=dtype) + time = time.to(dtype=dtype) + if x_low_res is not None: + x_low_res = x_low_res.to(dtype=dtype) + if time_low_res is not None: + time_low_res = time_low_res.to(dtype=dtype) + + batch_size, device = x.shape[0], x.device + # Time Conditioning + t = self.time_embed(time) + # Text Conditioning + text_cond = self.text_to_cond(text_embed) + # Concatenating low resolution images + if x_low_res.shape != x.shape: + # Upscale if not done in the trainer + _, _, new_height, new_width = x.shape + x_low_res = F.interpolate(x_low_res, (new_height, new_width), mode="bicubic") + x = torch.cat([x, x_low_res], dim=1) + + # Add lowres time conditioning + if self.noise_cond_aug: + lowres_t = self.lowres_time_embed(time_low_res) + t += lowres_t + # Context Embedding + # TODO We may want to concat time token here + if self.use_null_token: + # Null Context (Helpful when text_embed is drop) + null_context = self.null_text_embedding.repeat(batch_size, 1, 1) + context_emb = torch.cat([text_cond, null_context], dim=1) + context_mask = torch.cat([text_mask, torch.ones(batch_size, 1).to(device)], dim=1) + else: + context_emb = text_cond + context_mask = text_mask + + # Add pooled text embeddings to the diffusion timestep + # TODO We may only want to calculated the pooled feature based on text token length + if self.feature_pooling_type == 'mean': + pooled_text_cond = text_cond.mean(dim=-2) + elif self.feature_pooling_type == 'attention': + pooled_text_cond = self.attention_pooling(text_embed) + text_hiddens = self.to_text_non_attn_cond(pooled_text_cond) + t += text_hiddens + + # UNet forward + x = self.init_conv(x) + feats = dict() + for level in range(self.n_levels): + level_key = str(level) + x = self.DBlocks[level_key](x, t, context_emb, context_mask) + # Save feats for UBlocks + if level < self.n_levels - 1: + feats[level_key] = x + for level in range(self.n_levels - 1, -1, -1): + level_key = str(level) + if level < self.n_levels - 1: + x = x + feats[level_key] + x = self.UBlocks[level_key](x, t, context_emb, context_mask) + return self.out(x) + + def forward_with_cond_scale(self, *args, text_embed=None, cond_scale=1.0, **kwargs): + logits = self.forward(*args, text_embed=text_embed, **kwargs) + if cond_scale == 1.0: + return logits + null_logits = self.forward(*args, text_embed=torch.zeros_like(text_embed), **kwargs) + return null_logits + (logits - null_logits) * cond_scale + + +if __name__ == '__main__': + model = UNetModel(embed_dim=512, image_size=64,) + + pytorch_total_params = sum(p.numel() for p in model.parameters()) + print(pytorch_total_params) + + image_batch = torch.rand(4, 3, 64, 64) + text_cond = torch.rand(4, 88, 512) + text_mask = torch.ones(4, 88) + time = torch.ones(4) + + output = model(image_batch, time, text_cond, text_mask,) + + print(output.shape) + + model_sr = EfficientUNetModel(embed_dim=128, image_size=256) + pytorch_total_params = sum(p.numel() for p in model_sr.parameters()) + print(pytorch_total_params) + output = model_sr( + torch.randn(4, 3, 256, 256), + torch.randn(4, 3, 256, 256), + torch.ones(4), + torch.randn(4, 88, 512), + torch.ones(4, 88), + ) + print(output.shape) diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json new file mode 100644 index 000000000000..3fb4ffdac7f1 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.json @@ -0,0 +1,51 @@ +{ + "architectures": [ + "T5WithLMHeadModel" + ], + "d_ff": 65536, + "d_kv": 128, + "d_model": 1024, + "decoder_start_token_id": 0, + "dropout_rate": 0.1, + "eos_token_id": 1, + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_heads": 128, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_num_buckets": 32, + "task_specific_params": { + "summarization": { + "early_stopping": true, + "length_penalty": 2.0, + "max_length": 200, + "min_length": 30, + "no_repeat_ngram_size": 3, + "num_beams": 4, + "prefix": "summarize: " + }, + "translation_en_to_de": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to German: " + }, + "translation_en_to_fr": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to French: " + }, + "translation_en_to_ro": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to Romanian: " + } + }, + "vocab_size": 32128 +} diff --git a/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py new file mode 100644 index 000000000000..56472db3f052 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/encoder/t5encoder.py @@ -0,0 +1,59 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import torch +from transformers import T5Config, T5EncoderModel, T5Tokenizer + + +class T5Encoder(torch.nn.Module): + def __init__(self, max_seq_len=512, encoder_path=None): + super().__init__() + self.max_seq_len = max_seq_len + + self.model_seq_len = 512 + # Initializing T5 model + self.tokenizer = T5Tokenizer.from_pretrained("t5-11b", model_max_length=self.model_seq_len) + + if encoder_path is None: + self.model = T5EncoderModel.from_pretrained("t5-11b", low_cpu_mem_usage=True) + else: + print(f'Load T5 encoder from {encoder_path}') + hard_coded_encoder_weight_location = os.path.join(encoder_path, "t5xxl-encoder.bin") + hard_coded_encoder_config_location = os.path.join(os.path.dirname(__file__), "t5encoder.json") + self.model = T5EncoderModel.from_pretrained( + hard_coded_encoder_weight_location, + config=T5Config.from_json_file(hard_coded_encoder_config_location), + low_cpu_mem_usage=True, + ) + + def encode(self, text_batch, device='cuda'): + encoded = self.tokenizer.batch_encode_plus( + text_batch, return_tensors="pt", padding="max_length", max_length=self.model_seq_len, truncation=True + ) + # We expect all the processing is done in GPU. + input_ids = encoded.input_ids.to(device=device) + attn_mask = encoded.attention_mask.to(device=device) + + with torch.no_grad(): + output = self.model(input_ids=input_ids, attention_mask=attn_mask) + encoded_text = output.last_hidden_state.detach() + + encoded_text = encoded_text[:, 0 : self.max_seq_len] + attn_mask = attn_mask[:, 0 : self.max_seq_len] + for bnum in range(encoded_text.shape[0]): + nvalid_elem = attn_mask[bnum].sum().item() + encoded_text[bnum][nvalid_elem:] = 0 + + return encoded_text, attn_mask diff --git a/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py b/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py new file mode 100644 index 000000000000..029bbf60ffbc --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/batch_ops.py @@ -0,0 +1,57 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Functions for performing operations with broadcasting to the right axis +# +# Example +# input1: tensor of size (N1, N2) +# input2: tensor of size (N1, N2, N3, N4) +# batch_mul(input1, input2) = input1[:, :, None, None] * input2 +# +# If the common dimensions don't match, we raise an assertion error. + + +def common_broadcast(x, y): + ndims1 = x.ndim + ndims2 = y.ndim + + common_ndims = min(ndims1, ndims2) + for axis in range(common_ndims): + assert x.shape[axis] == y.shape[axis], 'Dimensions not equal at axis {}'.format(axis) + + if ndims1 < ndims2: + x = x.reshape(x.shape + (1,) * (ndims2 - ndims1)) + elif ndims2 < ndims1: + y = y.reshape(y.shape + (1,) * (ndims1 - ndims2)) + + return x, y + + +def batch_add(x, y): + x, y = common_broadcast(x, y) + return x + y + + +def batch_mul(x, y): + x, y = common_broadcast(x, y) + return x * y + + +def batch_sub(x, y): + x, y = common_broadcast(x, y) + return x - y + + +def batch_div(x, y): + x, y = common_broadcast(x, y) + return x / y diff --git a/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py new file mode 100644 index 000000000000..2b48f28ce9c9 --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/continuous_ddpm.py @@ -0,0 +1,168 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from functools import partial, wraps + +import torch +import torch.nn as nn +from einops import repeat +from torch.special import expm1 + +from nemo.collections.multimodal.parts.utils import randn_like + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if callable(d) else d + + +def maybe(fn): + @wraps(fn) + def inner(x): + if not exists(x): + return x + return fn(x) + + return inner + + +def log(t, eps: float = 1e-12): + return torch.log(t.clamp(min=eps)) + + +def right_pad_dims_to(x, t): + padding_dims = x.ndim - t.ndim + if padding_dims <= 0: + return t + return t.view(*t.shape, *((1,) * padding_dims)) + + +@torch.jit.script +def beta_linear_log_snr(t): + return -torch.log(expm1(1e-4 + 10 * (t ** 2))) + + +@torch.jit.script +def alpha_cosine_log_snr(t, s: float = 0.008): + return -log( + (torch.cos((t + s) / (1 + s) * math.pi * 0.5) ** -2) - 1, eps=1e-5 + ) # not sure if this accounts for beta being clipped to 0.999 in discrete version + + +def log_snr_to_alpha_sigma(log_snr): + return torch.sqrt(torch.sigmoid(log_snr)), torch.sqrt(torch.sigmoid(-log_snr)) + + +class GaussianDiffusionContinuousTimes(nn.Module): + def __init__(self, *, noise_schedule, timesteps=1000, rng=None): + super().__init__() + + if noise_schedule == "linear": + self.log_snr = beta_linear_log_snr + elif noise_schedule == "cosine": + self.log_snr = alpha_cosine_log_snr + else: + raise ValueError(f'invalid noise schedule {noise_schedule}') + + self.num_timesteps = timesteps + self.rng = rng + + def get_times(self, batch_size, noise_level, *, device): + return torch.full((batch_size,), noise_level, device=device, dtype=torch.float32) + + def sample_random_times(self, batch_size, *, device): + return torch.rand((batch_size,), device=device, generator=self.rng, dtype=torch.float32) + + def get_condition(self, times): + return maybe(self.log_snr)(times) + + def get_sampling_timesteps(self, batch, *, device): + times = torch.linspace(1.0, 0.0, self.num_timesteps + 1, device=device) + times = repeat(times, 't -> b t', b=batch) + times = torch.stack((times[:, :-1], times[:, 1:]), dim=0) + times = times.unbind(dim=-1) + return times + + def q_posterior(self, x_start, x_t, t, *, t_next=None): + t_next = default(t_next, lambda: (t - 1.0 / self.num_timesteps).clamp(min=0.0)) + + """ https://openreview.net/attachment?id=2LdBqxc1Yv&name=supplementary_material """ + log_snr = self.log_snr(t) + log_snr_next = self.log_snr(t_next) + log_snr, log_snr_next = map(partial(right_pad_dims_to, x_t), (log_snr, log_snr_next)) + + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + alpha_next, sigma_next = log_snr_to_alpha_sigma(log_snr_next) + + # c - as defined near eq 33 + c = -expm1(log_snr - log_snr_next) + posterior_mean = alpha_next * (x_t * (1 - c) / alpha + c * x_start) + + # following (eq. 33) + posterior_variance = (sigma_next ** 2) * c + posterior_log_variance_clipped = log(posterior_variance, eps=1e-20) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def q_sample(self, x_start, t, noise=None): + dtype = x_start.dtype + + if isinstance(t, float): + batch = x_start.shape[0] + t = torch.full((batch,), t, device=x_start.device, dtype=dtype) + + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + log_snr = self.log_snr(t).type(dtype) + log_snr_padded_dim = right_pad_dims_to(x_start, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) + + return alpha * x_start + sigma * noise, log_snr, alpha, sigma + + def q_sample_from_to(self, x_from, from_t, to_t, noise=None): + shape, device, dtype = x_from.shape, x_from.device, x_from.dtype + batch = shape[0] + + if isinstance(from_t, float): + from_t = torch.full((batch,), from_t, device=device, dtype=dtype) + + if isinstance(to_t, float): + to_t = torch.full((batch,), to_t, device=device, dtype=dtype) + + noise = default(noise, lambda: randn_like(x_from, generator=self.rng)) + + log_snr = self.log_snr(from_t) + log_snr_padded_dim = right_pad_dims_to(x_from, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr_padded_dim) + + log_snr_to = self.log_snr(to_t) + log_snr_padded_dim_to = right_pad_dims_to(x_from, log_snr_to) + alpha_to, sigma_to = log_snr_to_alpha_sigma(log_snr_padded_dim_to) + + return x_from * (alpha_to / alpha) + noise * (sigma_to * alpha - sigma * alpha_to) / alpha + + def predict_start_from_v(self, x_t, t, v): + log_snr = self.log_snr(t) + log_snr = right_pad_dims_to(x_t, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + return alpha * x_t - sigma * v + + def predict_start_from_noise(self, x_t, t, noise): + log_snr = self.log_snr(t) + log_snr = right_pad_dims_to(x_t, log_snr) + alpha, sigma = log_snr_to_alpha_sigma(log_snr) + return (x_t - sigma * noise) / alpha.clamp(min=1e-8) diff --git a/nemo/collections/multimodal/modules/imagen/sampler/sampler.py b/nemo/collections/multimodal/modules/imagen/sampler/sampler.py new file mode 100644 index 000000000000..2fd05faf814d --- /dev/null +++ b/nemo/collections/multimodal/modules/imagen/sampler/sampler.py @@ -0,0 +1,250 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +from einops import rearrange +from tqdm import tqdm + +from nemo.collections.multimodal.modules.imagen.sampler.batch_ops import batch_div, batch_mul +from nemo.collections.multimodal.modules.imagen.sampler.continuous_ddpm import GaussianDiffusionContinuousTimes + + +def right_pad_dims_to(x, t): + padding_dims = x.ndim - t.ndim + if padding_dims <= 0: + return t + return t.view(*t.shape, *((1,) * padding_dims)) + + +def thresholding_x0(x0, method='dynamic', th=0.995): + if method is None: + return x0 + elif method == 'static': + return x0.clamp(-1.0, 1.0) + elif method == 'dynamic': + # torch.quantile only suppoprt either float or double dtype + # we need to manual cast it if running in FP16/AMP mode + original_dtype = x0.dtype + if original_dtype not in [torch.float, torch.double]: + x0 = x0.float() + s = torch.quantile(rearrange(x0, 'b ... -> b (...)').abs(), th, dim=-1) # From Figure A.10 (b) + s.clamp_(min=1.0) + s = right_pad_dims_to(x0, s) + x0 = x0.clamp(-s, s) / s + return x0.type(original_dtype) + else: + raise RuntimeError(f'Thresholding method: {method} not supported.') + + +def thresholding_derivative(x, t, d, thresholding_method='dynamic'): + x0 = x - batch_mul(d, t) + corrected_x0 = thresholding_x0(x0, thresholding_method) + corrected_d = batch_div(x - corrected_x0, t) + return corrected_d + + +class Sampler(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, model, model_kwargs, shape, z=None): + pass + + +class DDPMSampler(Sampler): + def __init__(self, unet_type, denoiser): + super().__init__() + self.unet_type = unet_type + self.noise_scheduler = denoiser + self.pred_objective = 'noise' + + def p_mean_variance( + self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' + ): + + if self.unet_type == 'base': + pred = unet.forward_with_cond_scale( + x=x, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale + ) + elif self.unet_type == 'sr': + pred = unet.forward_with_cond_scale( + x=x, x_low_res=x_low_res, time=t, text_embed=text_embeds, text_mask=text_mask, cond_scale=cond_scale + ) + + if self.pred_objective == 'noise': + x_start = self.noise_scheduler.predict_start_from_noise(x, t=t, noise=pred) + elif self.pred_objective == 'x_start': + x_start = pred + elif self.pred_objective == 'v': + x_start = self.noise_scheduler.predict_start_from_v(x, t=t, v=pred) + else: + raise ValueError(f'unknown objective {self.pred_objective}') + + x_start = thresholding_x0(x_start, method=thresholding_method) + mean_and_variance = self.noise_scheduler.q_posterior(x_start=x_start, x_t=x, t=t, t_next=t_next) + return mean_and_variance, x_start + + @torch.no_grad() + def p_sample( + self, unet, x, t, t_next, text_embeds, text_mask, x_low_res=None, cond_scale=1.0, thresholding_method='dynamic' + ): + (model_mean, _, model_log_variance), x_start = self.p_mean_variance( + unet=unet, + x=x, + t=t, + t_next=t_next, + text_embeds=text_embeds, + text_mask=text_mask, + cond_scale=cond_scale, + x_low_res=x_low_res, + thresholding_method=thresholding_method, + ) + noise = torch.randn_like(x) + # no noise when t == 0 + b = x.shape[0] + is_last_sampling_timestep = ( + (t_next == 0) if isinstance(self.noise_scheduler, GaussianDiffusionContinuousTimes) else (t == 0) + ) + nonzero_mask = (1 - is_last_sampling_timestep.type_as(x)).reshape(b, *((1,) * (len(x.shape) - 1))) + pred = model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + return pred, x_start + + def forward( + self, + model, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + batch = noise_map.shape[0] + device = noise_map.device + dtype = noise_map.dtype + original_steps = self.noise_scheduler.num_timesteps + if sampling_steps: + self.noise_scheduler.num_timesteps = sampling_steps + timesteps = self.noise_scheduler.get_sampling_timesteps(batch, device=device) + img = noise_map + for times, times_next in tqdm(timesteps, total=len(timesteps)): + img, x_start = self.p_sample( + unet=model, + x=img.type(dtype), + t=times.type(dtype), + t_next=times_next.type(dtype), + text_embeds=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + x_low_res=x_low_res.type(dtype) if x_low_res is not None else None, + thresholding_method=thresholding_method, + ) + self.noise_scheduler.num_timesteps = original_steps + return img + + +class EDMSampler(Sampler): + def __init__( + self, + unet_type, + num_steps=50, + sigma_min=0.002, + sigma_max=80, + rho=7, + S_churn=0, + S_min=0, + S_max=float('inf'), + S_noise=1, + ): + super().__init__() + self.unet_type = unet_type + self.sigma_min = sigma_min + self.sigma_max = sigma_max + self.rho = rho + self.S_churn = S_churn + self.S_min = S_min + self.S_max = S_max + self.S_noise = S_noise + self.num_steps = num_steps + + def forward( + self, + unet, + noise_map, + text_encoding, + text_mask, + x_low_res=None, + cond_scale=1.0, + sampling_steps=None, + thresholding_method='dynamic', + ): + if self.unet_type == 'base': + assert x_low_res is None + elif self.unet_type == 'sr': + assert x_low_res is not None + low_res_cond = {'x_low_res': x_low_res} if x_low_res is not None else {} + thresholding_method = 'dynamic' + sigma_min = self.sigma_min + sigma_max = self.sigma_max + print(f'Sampling with sigma in [{sigma_min}, {sigma_max}], cfg={cond_scale}') + # Time step discretization + num_steps = sampling_steps if sampling_steps else self.num_steps + step_indices = torch.arange(num_steps, device=noise_map.device) + # Table 1: Sampling - Time steps + t_steps = ( + sigma_max ** (1 / self.rho) + + step_indices / (num_steps - 1) * (sigma_min ** (1 / self.rho) - sigma_max ** (1 / self.rho)) + ) ** self.rho + t_steps = torch.cat([t_steps, torch.zeros_like(t_steps[:1])]) # t_N = 0 + + # Main sampling loop. + x_next = noise_map * t_steps[0] + for i, (t_cur, t_next) in tqdm( + enumerate(zip(t_steps[:-1], t_steps[1:])), total=len(t_steps[:-1]) + ): # 0, ..., N-1 + x_cur = x_next + + # Increase noise temporarily. + gamma = min(self.S_churn / num_steps, np.sqrt(2) - 1) if self.S_min <= t_cur <= self.S_max else 0 + t_hat = (t_cur + gamma * t_cur).to(x_cur.device) + x_hat = x_cur + (t_hat ** 2 - t_cur ** 2).sqrt() * self.S_noise * torch.randn_like(x_cur) + + # Euler step. + denoised = unet.forward_with_cond_scale( + x=x_hat.to(torch.float32), + time=t_hat.to(torch.float32), + text_embed=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + **low_res_cond, + ) + d_cur = (x_hat - denoised) / t_hat + d_cur = thresholding_derivative(x_hat, t_hat, d_cur, thresholding_method=thresholding_method) + x_next = x_hat + (t_next - t_hat) * d_cur + + # Apply 2nd order correction. + if i < num_steps - 1: + denoised = unet.forward_with_cond_scale( + x=x_next.to(torch.float32), + time=t_next.to(torch.float32), + text_embed=text_encoding, + text_mask=text_mask, + cond_scale=cond_scale, + **low_res_cond, + ) + d_prime = (x_next - denoised) / t_next + d_prime = thresholding_derivative(x_next, t_next, d_prime, thresholding_method=thresholding_method) + x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime) + return x_next diff --git a/nemo/collections/multimodal/modules/nerf/__init__.py b/nemo/collections/multimodal/modules/nerf/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py b/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py new file mode 100644 index 000000000000..3d03d14694be --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/nerf_background_base.py @@ -0,0 +1,22 @@ +import torch +import torch.nn as nn + +# TODO(ahmadki): abstract class +class NeRFBackgroundBase(nn.Module): + def __init__(self): + super().__init__() + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + """ + positions = [B*N, 3] + """ + raise NotImplementedError + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + rays_d_encoding = self.encode(rays_d) + features = self.forward_net(rays_d_encoding) + features = torch.sigmoid(features) + return features diff --git a/nemo/collections/multimodal/modules/nerf/background/random_background.py b/nemo/collections/multimodal/modules/nerf/background/random_background.py new file mode 100644 index 000000000000..5f7f77d99596 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/random_background.py @@ -0,0 +1,19 @@ +import random +from typing import Tuple + +import torch +import torch.nn as nn + + +class RandomBackground(nn.Module): + def __init__(self, base_background: Tuple, random_ratio: float) -> None: + super().__init__() + self.random_ratio = random_ratio + self.num_output_dims = len(base_background) + self.register_buffer("base_background", torch.tensor(base_background)) + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + if random.random() < self.random_ratio: + return torch.rand(rays_d.shape[0], self.num_output_dims).to(rays_d) + else: + return self.base_background.to(rays_d).expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/static_background.py b/nemo/collections/multimodal/modules/nerf/background/static_background.py new file mode 100644 index 000000000000..955884161626 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/static_background.py @@ -0,0 +1,14 @@ +from typing import Tuple + +import torch +import torch.nn as nn + + +class StaticBackground(nn.Module): + def __init__(self, background: Tuple) -> None: + super().__init__() + self.register_buffer("background", torch.tensor(background)) + + def forward(self, rays_d: torch.Tensor) -> torch.Tensor: + background = self.background.to(rays_d) + return background.expand(rays_d.shape[0], -1) diff --git a/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py b/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py new file mode 100644 index 000000000000..3b45a60717a5 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/tcnn_background.py @@ -0,0 +1,32 @@ +from typing import Dict + +import numpy as np +import tinycudann as tcnn +import torch + +from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase + + +class TCNNBackground(NeRFBackgroundBase): + def __init__( + self, + bound: int, + encoder_num_input_dims: int, + encoder_cfg: Dict, + background_net_num_output_dims: int, + background_net_cfg: Dict, + ): + super().__init__() + self.bound = bound + if encoder_cfg.get('per_level_scale') is None: + encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) + self.encoder = tcnn.Encoding(n_input_dims=encoder_num_input_dims, encoding_config=dict(encoder_cfg)) + self.background_net = tcnn.Network( + self.encoder.n_output_dims, background_net_num_output_dims, network_config=dict(background_net_cfg) + ) + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + return self.encoder(rays_d) + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py b/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py new file mode 100644 index 000000000000..e792858cacce --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/background/torchngp_background.py @@ -0,0 +1,31 @@ +from typing import Dict + +import torch + +from nemo.collections.multimodal.modules.nerf.background.nerf_background_base import NeRFBackgroundBase +from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP +from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder + + +class TorchNGPBackground(NeRFBackgroundBase): + def __init__( + self, encoder_type: str, encoder_input_dims: int, encoder_multi_res: int, num_output_dims: int, net_cfg: Dict + ): + super().__init__() + + self.encoder, self.encoder_output_dims = get_encoder( + encoder_type, input_dim=encoder_input_dims, multires=encoder_multi_res + ) + self.background_net = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=num_output_dims, + num_hidden_dims=net_cfg.num_hidden_dims, + num_layers=net_cfg.num_layers, + bias=net_cfg.bias, + ) + + def encode(self, rays_d: torch.Tensor) -> torch.Tensor: + return self.encoder(rays_d) + + def forward_net(self, rays_d_encoding: torch.Tensor) -> torch.Tensor: + return self.background_net(rays_d_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/__init__.py b/nemo/collections/multimodal/modules/nerf/geometry/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py b/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py new file mode 100644 index 000000000000..c04ac342cec0 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/dmtet.py @@ -0,0 +1,150 @@ +import torch + + +class DeepMarchingTetrahedra: + """ + Class for Deep Marching Tetrahedra (DMTet). + + Attributes: + device (torch.device): Device to place the tensors. + triangle_table (Tensor): Lookup table for the triangles. + num_triangles_table (Tensor): Table for the number of triangles. + base_tet_edges (Tensor): The base edges for the tetrahedrons. + """ + + def __init__(self, device: torch.device) -> None: + """Initialize DMTet instance with the given device. + + Args: + device (torch.device): The device to place the tensors on. + """ + self.device = device + self.triangle_table = self._create_triangle_table() + self.num_triangles_table = self._create_num_triangles_table() + self.base_tet_edges = self._create_base_tet_edges() + + def _create_triangle_table(self) -> torch.Tensor: + """Create the lookup table for triangles. + + Returns: + Tensor: The triangle lookup table. + """ + return torch.tensor( + [ + [-1, -1, -1, -1, -1, -1], + [1, 0, 2, -1, -1, -1], + [4, 0, 3, -1, -1, -1], + [1, 4, 2, 1, 3, 4], + [3, 1, 5, -1, -1, -1], + [2, 3, 0, 2, 5, 3], + [1, 4, 0, 1, 5, 4], + [4, 2, 5, -1, -1, -1], + [4, 5, 2, -1, -1, -1], + [4, 1, 0, 4, 5, 1], + [3, 2, 0, 3, 5, 2], + [1, 3, 5, -1, -1, -1], + [4, 1, 2, 4, 3, 1], + [3, 0, 4, -1, -1, -1], + [2, 0, 1, -1, -1, -1], + [-1, -1, -1, -1, -1, -1], + ], + dtype=torch.long, + device=self.device, + ) + + def _create_num_triangles_table(self) -> torch.Tensor: + """Create the table for number of triangles. + + Returns: + Tensor: The number of triangles table. + """ + return torch.tensor([0, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 0], dtype=torch.long, device=self.device) + + def _create_base_tet_edges(self) -> torch.Tensor: + """Create the base edges for the tetrahedrons. + + Returns: + Tensor: The base edges for tetrahedrons. + """ + return torch.tensor([0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device=self.device) + + def _sort_edges(self, edges_ex2: torch.Tensor) -> torch.Tensor: + """Sort the given edges. + + Args: + edges_ex2 (Tensor): The edges to be sorted. + + Returns: + Tensor: The sorted edges. + """ + with torch.no_grad(): + order = (edges_ex2[:, 0] > edges_ex2[:, 1]).long() + order = order.unsqueeze(dim=1) + a = torch.gather(input=edges_ex2, index=order, dim=1) + b = torch.gather(input=edges_ex2, index=1 - order, dim=1) + return torch.stack([a, b], -1) + + # TODO(ahmadki): rename to forward ? return mesh ? + def __call__(self, positions: torch.Tensor, sdf_n: torch.Tensor, tet_fx4: torch.Tensor) -> tuple: + """ + Process the provided data to generate vertices and faces. + + Args: + positions (Tensor): Position tensor with shape [N, 3]. + sdf_n (Tensor): SDF tensor with shape [N]. + tet_fx4 (Tensor): Tetrahedron faces tensor with shape [F, 4]. + + Returns: + tuple: Vertices and faces tensors. + """ + with torch.no_grad(): + occ_n = sdf_n > 0 + occ_fx4 = occ_n[tet_fx4.reshape(-1)].reshape(-1, 4) + occ_sum = torch.sum(occ_fx4, -1) + valid_tets = (occ_sum > 0) & (occ_sum < 4) + occ_sum = occ_sum[valid_tets] + + # find all vertices + all_edges = tet_fx4[valid_tets][:, self.base_tet_edges].reshape(-1, 2) + all_edges = self._sort_edges(all_edges) + unique_edges, idx_map = torch.unique(all_edges, dim=0, return_inverse=True) + + unique_edges = unique_edges.long() + mask_edges = occ_n[unique_edges.reshape(-1)].reshape(-1, 2).sum(-1) == 1 + mapping = torch.ones((unique_edges.shape[0]), dtype=torch.long, device=self.device) * -1 + mapping[mask_edges] = torch.arange(mask_edges.sum(), dtype=torch.long, device=self.device) + idx_map = mapping[idx_map] # map edges to verts + + interp_v = unique_edges[mask_edges] + + edges_to_interp = positions[interp_v.reshape(-1)].reshape(-1, 2, 3) + edges_to_interp_sdf = sdf_n[interp_v.reshape(-1)].reshape(-1, 2, 1) + edges_to_interp_sdf[:, -1] *= -1 + + denominator = edges_to_interp_sdf.sum(1, keepdim=True) + edges_to_interp_sdf = torch.flip(edges_to_interp_sdf, [1]) / denominator + verts = (edges_to_interp * edges_to_interp_sdf).sum(1) + + idx_map = idx_map.reshape(-1, 6) + v_id = torch.pow(2, torch.arange(4, dtype=torch.long, device=self.device)) + tetindex = (occ_fx4[valid_tets] * v_id.unsqueeze(0)).sum(-1) + num_triangles = self.num_triangles_table[tetindex] + + # Generate triangle indices + faces = torch.cat( + ( + torch.gather( + input=idx_map[num_triangles == 1], + dim=1, + index=self.triangle_table[tetindex[num_triangles == 1]][:, :3], + ).reshape(-1, 3), + torch.gather( + input=idx_map[num_triangles == 2], + dim=1, + index=self.triangle_table[tetindex[num_triangles == 2]][:, :6], + ).reshape(-1, 3), + ), + dim=0, + ) + + return verts, faces diff --git a/nemo/collections/multimodal/modules/nerf/geometry/layers.py b/nemo/collections/multimodal/modules/nerf/geometry/layers.py new file mode 100644 index 000000000000..c80696bd170c --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/layers.py @@ -0,0 +1,129 @@ +from typing import Callable, List, Type, Union + +import torch +import torch.nn as nn + +BlockBuilder = Union[Callable[[int, int, bool], nn.Module], Type[nn.Module], None] + + +class MLP(nn.Module): + """ + A Multi-Layer Perceptron (MLP) module. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + num_hidden_dims (int): Number of hidden dimensions. + num_layers (int): Number of layers in the MLP. + bias (bool): If True, enables the bias in Linear layers. Default is True. + block (BlockBuilder): A callable or class for constructing a block. Default is None. + """ + + def __init__( + self, + num_input_dims: int, + num_output_dims: int, + num_hidden_dims: int, + num_layers: int, + bias: bool = True, + block: BlockBuilder = None, + ): + super().__init__() + + # Initialize the network as an empty list + network = [] + + # Add input layer + network.append(nn.Linear(num_input_dims, num_hidden_dims, bias=bias)) + network.append(nn.ReLU(inplace=True)) + + # Add hidden layers + for _ in range(1, num_layers - 1): + network.extend(self.build_layer(num_hidden_dims, num_hidden_dims, bias, block)) + + # Add output layer + network.append(nn.Linear(num_hidden_dims, num_output_dims, bias=bias)) + + # Wrap layers in ModuleList for proper registration + self.net = nn.ModuleList(network) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass through the MLP. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor. + """ + for module in self.net: + x = module(x) + return x + + @staticmethod + def build_layer( + num_input_dims: int, num_output_dims: int, bias: bool = True, block_builder: BlockBuilder = None + ) -> List[nn.Module]: + """ + Build a single layer for the MLP. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + bias (bool): If True, enables the bias in Linear layers. Default is True. + block_builder (BlockBuilder): A callable or class for constructing a block. Default is None. + + Returns: + List[nn.Module]: A list containing the layer's modules. + """ + if block_builder is None: + return [nn.Linear(num_input_dims, num_output_dims, bias=bias), nn.ReLU(inplace=True)] + else: + return [block_builder(num_input_dims, num_output_dims, bias=bias)] + + +class ResBlock(nn.Module): + """ + A residual block module. + + Args: + num_input_dims (int): Number of input dimensions. + num_output_dims (int): Number of output dimensions. + bias (bool): If True, enables the bias in Linear layers. Default is True. + """ + + def __init__(self, num_input_dims: int, num_output_dims: int, bias: bool = True): + super().__init__() + + self.dense = nn.Linear(num_input_dims, num_output_dims, bias=bias) + self.norm = nn.LayerNorm(num_output_dims) + self.activation = nn.SiLU(inplace=True) + + if num_input_dims != num_output_dims: + self.skip = nn.Linear(num_input_dims, num_output_dims, bias=False) + else: + self.skip = None + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Forward pass through the residual block. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor. + """ + identity = x + + out = self.dense(x) + out = self.norm(out) + + if self.skip is not None: + identity = self.skip(identity) + + out += identity + out = self.activation(out) + + return out diff --git a/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py b/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py new file mode 100644 index 000000000000..0b5eb6b6f260 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/nerf_base.py @@ -0,0 +1,360 @@ +from enum import Enum +from typing import Callable, Optional, Tuple, Union + +import mcubes +import numpy as np +import pymeshlab +import torch +import torch.nn as nn +import torch.nn.functional as F +import trimesh + +from nemo.collections.multimodal.modules.nerf.utils.activation import trunc_exp + + +class DensityActivationEnum(str, Enum): + EXP = "exp" + SOFTPLUS = "softplus" + + +class NormalTypeEnum(str, Enum): + AUTOGRAD = "autograd" + FORWARD_FINITE_DIFFERENCE = "forward_finite_difference" + BACKWARD_FINITE_DIFFERENCE = "backward_finite_difference" + CENTRAL_FINITE_DIFFERENCE = "central_finite_difference" + + +# TODO(ahmadki): make abstract +class NeRFBase(nn.Module): + """ + A base class for Neural Radiance Fields (NeRF) models. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum] = NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, + ) -> None: + super().__init__() + self.num_input_dims = num_input_dims + self.bound = bound + self.density_activation = density_activation + self.blob_radius = blob_radius + self.blob_density = blob_density + self.normal_type = normal_type + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """Encode 3D positions. To be implemented by subclasses.""" + raise NotImplementedError + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """Calculate sigma (density). To be implemented by subclasses.""" + raise NotImplementedError + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """Calculate features. To be implemented by subclasses.""" + raise NotImplementedError + + def forward( + self, positions: torch.Tensor, return_normal: bool = True + ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Forward pass for the NeRF model. + + Args: + positions (torch.Tensor): The positions. + return_normal (bool): Flag to indicate whether to return normals or not. + + Returns: + Tuple containing density, features, and possibly normals. + """ + + if return_normal: + if self.normal_type == NormalTypeEnum.AUTOGRAD: + with torch.enable_grad(): + positions.requires_grad_(True) + sigma, features = self.forward_density_features(positions) + normal = -torch.autograd.grad(torch.sum(sigma), positions, create_graph=True)[0] # [N, D] + elif self.normal_type in [ + NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE, + NormalTypeEnum.FORWARD_FINITE_DIFFERENCE, + NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE, + ]: + sigma, features = self.forward_density_features(positions) + normal = self.normal_finite_differences(positions) + else: + raise NotImplementedError("Invalid normal type.") + + normal = F.normalize(normal) + normal = torch.nan_to_num(normal) + else: + sigma, features = self.forward_density_features(positions) + normal = None + + return sigma, features, normal + + def forward_density_features(self, positions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Calculate both density and features based on the input positions. + + This function takes into account edge cases like empty input tensors and calculates + the density and features accordingly. See GitHub issues for details: + - https://github.com/KAIR-BAIR/nerfacc/issues/207#issuecomment-1653621720 + - https://github.com/ashawkey/torch-ngp/issues/176 + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple containing density and features tensors. + """ + + # Handle empty positions + if positions.shape[0] == 0: + sigma = torch.zeros(0, device=positions.device) + features = torch.zeros(0, self.num_input_dims, device=positions.device) + return sigma, features + + # Encode positions + positions_encoding = self.encode(positions) + + # Compute density + density = self.forward_density(positions, positions_encoding) + + # Compute features + features = self.forward_features(positions, positions_encoding) + + return density, features + + def forward_density( + self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Calculate the density based on the input positions and their encoding. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + positions_encoding (Optional[torch.Tensor]): Optional encoded positions. + Will be computed from `positions` if not provided. + + Returns: + torch.Tensor: Density tensor. + """ + + # Handle empty positions + if positions.shape[0] == 0: + sigma = torch.zeros(0, device=positions.device) + return sigma + + # Compute encoded positions if not provided + if positions_encoding is None: + positions_encoding = self.encode(positions) + + # Compute sigma using the neural network + sigma = self.sigma_net(positions_encoding) + + # Compute density using activation function + if self.density_activation == DensityActivationEnum.EXP: + density = trunc_exp(sigma + self.density_blob(positions)) + elif self.density_activation == DensityActivationEnum.SOFTPLUS: + density = F.softplus(sigma + self.density_blob(positions)) + else: + raise NotImplementedError("Invalid density activation.") + + return density + + def forward_features( + self, positions: torch.Tensor, positions_encoding: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Compute the features based on the input positions and their encoding. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + positions_encoding (Optional[torch.Tensor]): Optional encoded positions. + Will be computed from `positions` if not provided. + + Returns: + torch.Tensor: Features tensor with shape [B*N, num_features_dims]. + """ + + # Handle empty positions + if positions.shape[0] == 0: + features = torch.zeros(0, self.num_features_dims, device=positions.device) + return features + + # Compute encoded positions if not provided + if positions_encoding is None: + positions_encoding = self.encode(positions) + + # Compute features using the neural network + features = self.features_net(positions_encoding) + + # Apply the sigmoid activation function to the features + features = torch.sigmoid(features) + + return features + + @torch.no_grad() + def density_blob(self, positions: torch.Tensor) -> torch.Tensor: + """ + Compute the density blob for the given positions. + + This method computes a density blob for each position in the tensor. It is + used to add a density value based on the distance of each position from the origin. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + + Returns: + torch.Tensor: Density blob tensor with shape [B*N, 1]. + """ + + # Compute the squared distance for each position + d = (positions ** 2).sum(-1) + + # Compute the density blob based on the activation function + if self.density_activation == DensityActivationEnum.EXP: + g = self.blob_density * torch.exp(-d / (2 * self.blob_radius ** 2)) + elif self.density_activation == DensityActivationEnum.SOFTPLUS: + g = self.blob_density * (1 - torch.sqrt(d) / self.blob_radius) + else: + raise NotImplementedError("Invalid density activation.") + + return g + + def normal_finite_differences(self, positions: torch.Tensor, eps: float = 1e-2) -> torch.Tensor: + """ + Calculate normals using finite differences. + + Args: + positions (torch.Tensor): Input positions tensor with shape [B*N, D]. + eps (float): A small value for finite difference calculation. Default is 1e-2. + + Returns: + torch.Tensor: Calculated normals tensor [B*N, D] + """ + # Create perturbation tensor + perturb = torch.eye(self.num_input_dims).to(positions.device).float() * eps # Shape (D, D) + + # Expand dims for batched operation + positions_expanded = positions[:, None, :] # (B*N, 1, D) + perturb_expanded = perturb[None, :, :] # (1, D, D) + + # Compute perturbed points + if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: + positions_perturbed = positions_expanded + perturb_expanded # (B*N, D, D) + elif self.normal_type == NormalTypeEnum.BACKWARD_FINITE_DIFFERENCE: + positions_perturbed = positions_expanded - perturb_expanded # (B*N, D, D) + elif self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: + positions_perturbed_pos = positions_expanded + perturb_expanded # (B*N, D, D) + positions_perturbed_neg = positions_expanded - perturb_expanded # (B*N, D, D) + positions_perturbed = torch.cat([positions_perturbed_pos, positions_perturbed_neg], dim=1) # (B*N, 2*D, D) + + # Reshape perturbed points for batched function call + positions_perturbed_reshaped = positions_perturbed.view(-1, self.num_input_dims) # (B*N * {D or 2*D}, D) + + # Evaluate function at perturbed points + perturbed_sigma = self.forward_density(positions_perturbed_reshaped) # (B*N * {D or 2*D}, 1) + + # Reshape function values + if self.normal_type == NormalTypeEnum.CENTRAL_FINITE_DIFFERENCE: + perturbed_sigma = perturbed_sigma.view(-1, 2 * self.num_input_dims) # (B*N, 2*D) + sigma_pos, sigma_neg = torch.chunk(perturbed_sigma, 2, dim=1) # (B*N, D) each + normal = 0.5 * (sigma_pos - sigma_neg) / eps # (B*N, D) + else: + perturbed_sigma = perturbed_sigma.view(-1, self.num_input_dims) # (B*N, D) + sigma = self.forward_density(positions) # (B*N,) # TODO(ahmadki): use the value from forward ? + if self.normal_type == NormalTypeEnum.FORWARD_FINITE_DIFFERENCE: + normal = (perturbed_sigma - sigma[:, None]) / eps # (B*N, D) + else: # self.normal_type == BACKWARD_FINITE_DIFFERENCE + normal = (sigma[:, None] - perturbed_sigma) / eps # (B*N, D) + + return -normal + + # TODO(ahmadki): needs ar ework: + # 1. texture/vertices are off-axis, needs a fix. + # 2. device='cuda' is hardcoded + # 3. DMTet needs to go through a different code path ? create a base volume nerf, and a base dmtet nerf class ? + @torch.no_grad() + def mesh( + self, resolution: Optional[int] = 128, batch_size: int = 128, density_thresh: Optional[float] = None + ) -> pymeshlab.Mesh: + """ + Generate a mesh from the nerf. + + Args: + resolution (Optional[int]): Resolution of the mesh grid. Default is 128. + batch_size (int): Batch size for the mesh generation. Default is 128. + density_thresh (Optional[float]): Density threshold for the mesh generation. Default is None, will be calculated from mean density. + + Returns: + pymeshlab.Mesh: Mesh object. + """ + # Generate a grid of 3D points + x = np.linspace(-self.bound, self.bound, resolution) + y = np.linspace(-self.bound, self.bound, resolution) + z = np.linspace(-self.bound, self.bound, resolution) + xx, yy, zz = np.meshgrid(x, y, z) + + grid = np.stack((xx, yy, zz), axis=-1) # Shape (resolution, resolution, resolution, 3) + torch_grid = torch.tensor(grid, dtype=torch.float32).reshape(-1, 3).to(device="cuda") + + def batch_process(fn, input, batch_size): + num_points = input.shape[0] + batches = [input[i : i + batch_size] for i in range(0, num_points, batch_size)] + results = [fn(batch) for batch in batches] + results = [result.detach().cpu().numpy() for result in results] + return np.concatenate(results, axis=0) + + density = batch_process(fn=self.forward_density, input=torch_grid, batch_size=batch_size) + density = density.reshape(resolution, resolution, resolution) + + # If not provided set density_thresh based on mean density + if density_thresh is None: + density_thresh = density[density > 1e-3].mean().item() + + # Apply Marching Cubes + vertices, triangles = mcubes.marching_cubes(density, density_thresh) + + # Create a new Mesh + ms = pymeshlab.MeshSet() + + # Create Mesh using vertices and faces + m = pymeshlab.Mesh(vertices.copy(), triangles.copy()) + + # Add mesh to the MeshSet + ms.add_mesh(m, "generated_mesh") + + # Filters + ms.meshing_remove_unreferenced_vertices() + ms.meshing_remove_duplicate_faces() + ms.meshing_remove_null_faces() + ms.meshing_repair_non_manifold_edges(method=0) + ms.meshing_repair_non_manifold_vertices(vertdispratio=0) + + m = ms.current_mesh() + vertices = m.vertex_matrix() + faces = m.face_matrix() + + scaled_vertice = ( + -self.bound + (vertices / resolution) * 2 * self.bound + ) # scale vertices back to [-self.bound, self.bound] + scaled_vertices_torch = torch.tensor(scaled_vertice, dtype=torch.float32).to(device="cuda") + color = batch_process(fn=self.forward_features, input=scaled_vertices_torch, batch_size=batch_size) + + # Create the final mesh from cleaned vertices and faces and with color + mesh = trimesh.Trimesh(vertices=vertices, faces=faces, vertex_colors=color) + return mesh diff --git a/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py new file mode 100644 index 000000000000..2922df999d15 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/tcnn_nerf.py @@ -0,0 +1,108 @@ +from typing import Dict, Optional + +import numpy as np +import tinycudann as tcnn +import torch + +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum + + +# Don't fuse sigma_net with features_net: +# 1. performance benefit is questionable, especially that we sometimes require only density or features +# 2. we sacrifice generality +class TCNNNerf(NeRFBase): + """ + NeRF model with TCNN encoding and MLPs for sigma and features. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + encoder_cfg (Dict): Configuration for the TCNN encoder. + sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. + sigma_net_cfg (Dict): Configuration for the sigma network. + features_net_num_output_dims (int): Number of output dimensions for the features network. + features_net_cfg (Optional[Dict]): Configuration for the features network. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum], + encoder_cfg: Dict, + sigma_net_num_output_dims: int, + sigma_net_cfg: Dict, + features_net_num_output_dims: int, + features_net_cfg: Optional[Dict], + ) -> None: + super().__init__( + num_input_dims=num_input_dims, + bound=bound, + density_activation=density_activation, + blob_radius=blob_radius, + blob_density=blob_density, + normal_type=normal_type, + ) + + # Set per_level_scale if not set + if encoder_cfg.get('per_level_scale') is None: + encoder_cfg['per_level_scale'] = np.exp2(np.log2(2048 * self.bound / 16) / (16 - 1)) + # Build the TCNN encoder + self.encoder = tcnn.Encoding(n_input_dims=num_input_dims, encoding_config=dict(encoder_cfg)) + + # Build the sigma network + assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims!=1 is not supported" + self.sigma_tcnn = tcnn.Network( + self.encoder.n_output_dims, sigma_net_num_output_dims, network_config=dict(sigma_net_cfg) + ) + + # Build the features network + self.features_tcnn = None + if features_net_cfg is not None: + self.features_tcnn = tcnn.Network( + self.encoder.n_output_dims, features_net_num_output_dims, network_config=dict(features_net_cfg) + ) + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """ + Encode the positions using the TCNN encoder. + + Args: + positions (torch.Tensor): The positions tensor. + + Returns: + torch.Tensor: The encoded positions tensor. + """ + # TODO(ahmadki): is it safe to do with FP16 ? + return self.encoder((positions + self.bound) / (2 * self.bound)) + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the sigma using the TCNN network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The sigma tensor. + """ + return self.sigma_tcnn(positions_encoding).squeeze() + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the features using the TCNN network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The features tensor. + """ + return self.features_tcnn(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py b/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py new file mode 100644 index 000000000000..b831b94ef84b --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/geometry/torchngp_nerf.py @@ -0,0 +1,114 @@ +from typing import Dict, Optional + +import torch + +from nemo.collections.multimodal.modules.nerf.geometry.layers import MLP +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum, NeRFBase, NormalTypeEnum +from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.encoding import get_encoder + + +# Don't fuse sigma_net with features_net: +# 1. performance benefit is questionable, especially that we sometimes require only density or features +# 2. we sacrifice generality +class TorchNGPNerf(NeRFBase): + """ + NeRF model with Torch-NGP encoding and MLPs for sigma and features. + + Args: + num_input_dims (int): Number of input dimensions. + bound (torch.Tensor): The bounding box tensor. + density_activation (DensityActivationEnum): Activation function for density. + blob_radius (float): Radius for the blob. + blob_density (float): Density for the blob. + normal_type (Optional[NormalTypeEnum]): Method to compute normals. + encoder_type (str): Type of the encoder. + encoder_max_level (int): Maximum level of the encoder. + sigma_net_num_output_dims (int): Number of output dimensions for the sigma network. + sigma_net_cfg (Dict): Configuration for the sigma network. + features_net_num_output_dims (int): Number of output dimensions for the features network. + features_net_cfg (Optional[Dict]): Configuration for the features network. + """ + + def __init__( + self, + num_input_dims: int, + bound: torch.Tensor, + density_activation: DensityActivationEnum, + blob_radius: float, + blob_density: float, + normal_type: Optional[NormalTypeEnum], + encoder_cfg: Dict, + sigma_net_num_output_dims: int, + sigma_net_cfg: Dict, + features_net_num_output_dims: int, + features_net_cfg: Optional[Dict], + ): + super().__init__( + num_input_dims=num_input_dims, + bound=bound, + density_activation=density_activation, + blob_radius=blob_radius, + blob_density=blob_density, + normal_type=normal_type, + ) + + # Build the Torch-NGP encoder + self.encoder_max_level = encoder_cfg.get('encoder_max_level', None) + self.encoder, self.encoder_output_dims = get_encoder(input_dim=num_input_dims, **encoder_cfg) + + # Build the sigma network + assert sigma_net_num_output_dims == 1, "sigma_net_num_output_dims must be equal to 1" + self.sigma_mlp = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=sigma_net_num_output_dims, + num_hidden_dims=sigma_net_cfg.num_hidden_dims, + num_layers=sigma_net_cfg.num_layers, + bias=sigma_net_cfg.bias, + ) + + # Build the features network + self.features_mlp = None + if features_net_cfg is not None: + self.features_mlp = MLP( + num_input_dims=self.encoder_output_dims, + num_output_dims=features_net_num_output_dims, + num_hidden_dims=features_net_cfg.num_hidden_dims, + num_layers=features_net_cfg.num_layers, + bias=features_net_cfg.bias, + ) + + def encode(self, positions: torch.Tensor) -> torch.Tensor: + """ + Encode the positions. + + Args: + positions (torch.Tensor): The positions tensor. + + Returns: + torch.Tensor: The encoded positions tensor. + """ + return self.encoder(positions, bound=self.bound, max_level=self.encoder_max_level) + + def sigma_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the sigma using the sigma network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The sigma tensor. + """ + return self.sigma_mlp(positions_encoding).squeeze() + + def features_net(self, positions_encoding: torch.Tensor) -> torch.Tensor: + """ + Compute the features using the features network. + + Args: + positions_encoding (torch.Tensor): The encoded positions tensor. + + Returns: + torch.Tensor: The features tensor. + """ + return self.features_mlp(positions_encoding) diff --git a/nemo/collections/multimodal/modules/nerf/guidance/__init__.py b/nemo/collections/multimodal/modules/nerf/guidance/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py new file mode 100644 index 000000000000..008a7b3d3627 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_huggingface_pipeline.py @@ -0,0 +1,142 @@ +from typing import List, Union + +import torch +import torch.nn.functional as F +from diffusers import DDIMScheduler, StableDiffusionPipeline + +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase + + +class StableDiffusion(Txt2ImgGuidanceBase): + def __init__( + self, + model_key: str = "stabilityai/stable-diffusion-2-1-base", + t_range: List[float] = [0.02, 0.98], + precision: str = "16", + device: torch.device = torch.device('cuda'), + ): + """ + Initialize StableDiffusion with model_key, t_range, precision and device. + + Parameters: + model_key (str): Pre-trained model key. + t_range (List[float]): Range for timesteps. + precision (str): Model precision ("16", "bf16" or other for float32). + device (torch.device): Device for torch tensor. + """ + super().__init__() + + self.device = device + self.model_key = model_key + self.precision_t = self._get_precision_type(precision) + + # Create model + pipe = StableDiffusionPipeline.from_pretrained(model_key, torch_dtype=self.precision_t).to(self.device) + if self.precision_t in [torch.float16, torch.bfloat16]: + pipe.unet.to(memory_format=torch.channels_last) + + self.vae = pipe.vae + self.tokenizer = pipe.tokenizer + self.text_encoder = pipe.text_encoder + self.unet = pipe.unet + self.scheduler = DDIMScheduler.from_pretrained(model_key, subfolder="scheduler", torch_dtype=self.precision_t) + + del pipe + + self.num_train_timesteps = self.scheduler.config.num_train_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.scheduler.alphas_cumprod.to(self.device) + + def _get_precision_type(self, precision: str) -> torch.dtype: + """ + Map string precision representation to torch dtype. + + Parameters: + precision (str): String representation of precision. + + Returns: + torch.dtype: Corresponding torch dtype. + """ + precision_map = {"16": torch.float16, "bf16": torch.bfloat16} + return precision_map.get(precision, torch.float32) + + @torch.no_grad() + def get_text_embeds(self, prompt: str) -> torch.Tensor: + """ + Get text embeddings from the given prompt. + + Parameters: + prompt (str): Input text. + + Returns: + torch.Tensor: Text embeddings tensor [B, 77, 1024]. + """ + inputs = self.tokenizer( + prompt, padding='max_length', max_length=self.tokenizer.model_max_length, return_tensors='pt' + ) + embeddings = self.text_encoder(inputs.input_ids.to(self.device))[0] + return embeddings + + # @torch.compile() # TODO(ahmadki) + def train_step( + self, + text_embeddings: torch.Tensor, + pred_rgb: torch.Tensor, + guidance_scale: float = 100.0, + as_latent: bool = False, + ) -> float: + """ + Train step function for StableDiffusion. + + Parameters: + text_embeddings (torch.Tensor): Embeddings tensor [B, 512]. + pred_rgb (torch.Tensor): Predicted RGB tensor [B, 3, 512, 512]. + guidance_scale (float): Guidance scaling factor. + as_latent (bool): If True, considers pred_rgb as latent. + + Returns: + float: Loss value. + """ + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + latents = self.encode_imgs(pred_rgb_512) + + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + # add noise + noise = torch.randn_like(latents) + latents_noisy = self.scheduler.add_noise(latents, noise, t) + # pred noise + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.unet(latent_model_input, td, encoder_hidden_states=text_embeddings).sample + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss + + def encode_imgs(self, imgs: torch.Tensor) -> torch.Tensor: + """ + Encode images into latent representations. + + Parameters: + imgs (torch.Tensor): Image tensor [B, 3, H, W]. + + Returns: + torch.Tensor: Encoded latent tensor. + """ + imgs = 2 * imgs - 1 + posterior = self.vae.encode(imgs).latent_dist + latents = posterior.sample() * self.vae.config.scaling_factor + return latents diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py new file mode 100644 index 000000000000..a605391b9c92 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_nemo_pipeline.py @@ -0,0 +1,129 @@ +import os +import tempfile + +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import OmegaConf + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase +from nemo.collections.multimodal.modules.stable_diffusion.distributions.distributions import ( + DiagonalGaussianDistribution, +) +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + + +class StableDiffusion(Txt2ImgGuidanceBase): + def __init__( + self, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], precision="16", device=torch.device('cuda') + ): + super().__init__() + + self.device = device + self.checkpoint = checkpoint + self.sampler_type = sampler_type + + cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) + + cfg.precision = precision + cfg.ckpt_path = None + cfg.unet_config.from_pretrained = None + cfg.first_stage_config.from_pretrained = None + + self.model = LatentDiffusion(cfg).to(device) + + sd_state_dict = {} + # Remove Megatron wrapper and inductor + for key, value in state_dict.items(): + key = key[6:] + sd_state_dict[key] = value + self.model.load_state_dict(sd_state_dict) + self.first_stage_model = self.model.first_stage_model + self.text_encoder = self.model.cond_stage_model.encode + + self.num_train_timesteps = self.model.num_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.model.alphas_cumprod.to(self.device) + + @torch.no_grad() + def get_text_embeds(self, prompt): + return self.text_encoder(prompt) + + @torch.autocast(device_type="cuda") + def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): + + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + # interp to 512x512 to be fed into vae. + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + # encode image into latents with vae, requires grad! + latents = self.encode_imgs(pred_rgb_512) + + # timestep ~ U(0.02, 0.98) to avoid very high/low noise level + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + noise = torch.randn_like(latents) + latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + # w(t), sigma_t^2 + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss + + def image_encoder(self, x): + h = self.first_stage_model.encoder(x) + moments = self.first_stage_model.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def encode_imgs(self, imgs): + # imgs: [B, 3, H, W] + + imgs = 2 * imgs - 1 + + posterior = self.image_encoder(imgs) + latents = ( + posterior.sample() * self.image_encoder.config.scaling_factor + ) # self.vae.config.scaling_factor==0.18215 + + return latents + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict diff --git a/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py new file mode 100644 index 000000000000..bf8c5e971002 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/stablediffusion_trt_pipeline.py @@ -0,0 +1,221 @@ +import logging +import os +import tempfile + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import OmegaConf +from polygraphy import cuda +from transformers import CLIPTokenizer + +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import LatentDiffusion +from nemo.collections.multimodal.modules.nerf.guidance.txt2img_guidance_base import Txt2ImgGuidanceBase +from nemo.collections.multimodal.modules.nerf.utils.trt_engine import Engine, device_view +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + extract_into_tensor, + make_beta_schedule, +) +from nemo.collections.multimodal.parts.stable_diffusion.utils import default +from nemo.collections.multimodal.parts.utils import randn_like +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector + + +class LatentDiffusionWrapper(Txt2ImgGuidanceBase): + def __init__(self, plan_dir, checkpoint): + super().__init__() + with open(os.path.join(plan_dir, "conf.yaml"), "rb") as fp: + config = OmegaConf.load(fp.name) + max_batch_size = config.batch_size + + self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + self.max_length = config.clip.max_length + self.rng = torch.Generator(device=torch.cuda.current_device(),) + + self.set_beta_schedule() + + stream = cuda.Stream() + + self.image_encoder = self.load_vae_from_checkpoint(checkpoint) + + self.text_encoder = Engine(os.path.join(plan_dir, "clip.plan")) + shape_dict = {'tokens': config.clip.tokens, 'logits': config.clip.logits} + self.text_encoder.set_engine(stream, shape_dict) + + self.unet = Engine(os.path.join(plan_dir, "unet.plan")) + shape_dict = { + 'x': config.unet.x, + 't': (max_batch_size * 2,), + 'context': config.unet.context, + 'logits': config.unet.logits, + } + self.unet.set_engine(stream, shape_dict) + + def set_beta_schedule(self): + betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.0120, cosine_s=0.008) + alphas = 1.0 - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + betas = torch.tensor(betas) + alphas = torch.tensor(alphas) + alphas_cumprod = torch.tensor(alphas_cumprod) + to_torch = lambda x: x.clone().detach().to(torch.float32).to(torch.cuda.current_device()) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu()))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1.0 - alphas_cumprod.cpu()))) + (timesteps,) = betas.shape + self.num_timesteps = int(timesteps) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: randn_like(x_start, generator=self.rng)) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) + + def encode_imgs(self, imgs): + imgs = 2 * imgs - 1 + posterior = self.image_encoder(imgs) + latents = posterior.sample() * 0.18215 + return latents + + def clip_encode(self, text): + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) + tokens = batch_encoding["input_ids"].to("cuda", non_blocking=True) + z = self.text_encoder.infer({"tokens": device_view(tokens.type(torch.int32))})['logits'].clone() + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def apply_model(self, x, t, cond, return_ids=False): + self.conditioning_key = "crossattn" + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + # key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + key = 'c_crossattn' + cond = {key: cond} + # UNET TRT + cc = torch.cat(cond['c_crossattn'], 1) # needs to be changed I think + out = self.unet.infer( + { + "x": device_view(x.contiguous()), + "t": device_view(t.type(torch.int32).contiguous()), + "context": device_view(cc.contiguous()), + } + )['logits'].clone() + if isinstance(out, tuple) and not return_ids: + return out[0] + else: + return out + + def load_vae_from_checkpoint(self, checkpoint): + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + cfg, state_dict = self.load_config_and_state_from_nemo(checkpoint) + + if cfg.get('unet_config') and cfg.get('unet_config').get('from_pretrained'): + cfg.unet_config.from_pretrained = None + if cfg.get('first_stage_config') and cfg.get('first_stage_config').get('from_pretrained'): + cfg.first_stage_config.from_pretrained = None + + model = LatentDiffusion(cfg).to(device) + + sd_state_dict = {} + for key, value in state_dict.items(): + key = key[6:] + sd_state_dict[key] = value + model.load_state_dict(sd_state_dict) + + return model.first_stage_model.encode + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict + + +class StableDiffusion(nn.Module): + def __init__(self, plan_dir, checkpoint, sampler_type="DDIM", t_range=[0.02, 0.98], device=torch.device('cuda')): + super().__init__() + logging.info(f'loading stable diffusion...') + + self.device = device + self.sampler_type = sampler_type + self.model = LatentDiffusionWrapper(plan_dir, checkpoint) + + self.text_encoder = self.model.clip_encode + + self.num_train_timesteps = self.model.num_timesteps + self.min_step = int(self.num_train_timesteps * t_range[0]) + self.max_step = int(self.num_train_timesteps * t_range[1]) + self.alphas = self.model.alphas_cumprod.to(self.device) # for convenience + + logging.info(f'loaded stable diffusion!') + + @torch.no_grad() + def get_text_embeds(self, prompt): + return self.text_encoder(prompt) + + def train_step(self, text_embeddings, pred_rgb, guidance_scale=100, as_latent=False): + + if as_latent: + latents = F.interpolate(pred_rgb, (64, 64), mode='bilinear', align_corners=False) * 2 - 1 + else: + # interp to 512x512 to be fed into vae. + pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False) + # encode image into latents with vae, requires grad! + latents = self.model.encode_imgs(pred_rgb_512) + + # timestep ~ U(0.02, 0.98) to avoid very high/low noise level + t = torch.randint(self.min_step, self.max_step + 1, (latents.shape[0],), dtype=torch.long, device=self.device) + + with torch.no_grad(): + noise = torch.randn_like(latents) + latents_noisy = self.model.q_sample(x_start=latents, t=t, noise=noise) + latent_model_input = torch.cat([latents_noisy] * 2) + td = torch.cat([t] * 2) + noise_pred = self.model.apply_model(latent_model_input, td, text_embeddings) + + noise_pred_uncond, noise_pred_pos = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_pos - noise_pred_uncond) + + # w(t), sigma_t^2 + w = 1 - self.alphas[t] + grad = w[:, None, None, None] * (noise_pred - noise) + grad = torch.nan_to_num(grad) + + targets = (latents - grad).detach() + loss = 0.5 * F.mse_loss(latents.float(), targets, reduction='sum') / latents.shape[0] + return loss diff --git a/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py b/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py new file mode 100644 index 000000000000..8e03ffb41d71 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/guidance/txt2img_guidance_base.py @@ -0,0 +1,6 @@ +import torch.nn as nn + + +class Txt2ImgGuidanceBase(nn.Module): + def __init__(self): + super().__init__() diff --git a/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py b/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py new file mode 100644 index 000000000000..2240f0aee8ce --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/loss/laplacian_smooth_loss.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn + + +class LaplacianSmoothLoss(nn.Module): + def __init__(self): + super(LaplacianSmoothLoss, self).__init__() + + @torch.cuda.amp.autocast(enabled=False) + def forward(self, verts, faces): + with torch.no_grad(): + L = self.laplacian_uniform(verts, faces.long()) + loss = L.mm(verts) + loss = loss.norm(dim=1) + loss = loss.mean() + return loss + + # TODO(ahmadki): should be moved to a separate mesh class + def laplacian_uniform(self, verts, faces): + V = verts.shape[0] + F = faces.shape[0] + + # Neighbor indices + ii = faces[:, [1, 2, 0]].flatten() + jj = faces[:, [2, 0, 1]].flatten() + adj = torch.stack([torch.cat([ii, jj]), torch.cat([jj, ii])], dim=0).unique(dim=1) + adj_values = torch.ones(adj.shape[1], device=verts.device, dtype=torch.float) + + # Diagonal indices + diag_idx = adj[0] + + # Build the sparse matrix + idx = torch.cat((adj, torch.stack((diag_idx, diag_idx), dim=0)), dim=1) + values = torch.cat((-adj_values, adj_values)) + + # The coalesce operation sums the duplicate indices, resulting in the + # correct diagonal + return torch.sparse_coo_tensor(idx, values, (V, V)).coalesce() diff --git a/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py b/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py new file mode 100644 index 000000000000..4459c7003fd4 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/loss/normal_consistency_loss.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn + + +class NormalConsistencyLoss(nn.Module): + def __init__(self): + super(NormalConsistencyLoss, self).__init__() + + # TODO(ahmadki): is this safe to do in FP16 ? + def forward(self, face_normals, t_pos_idx): + tris_per_edge = self.compute_edge_to_face_mapping(t_pos_idx) + + # Fetch normals for both faces sharind an edge + n0 = face_normals[tris_per_edge[:, 0], :] + n1 = face_normals[tris_per_edge[:, 1], :] + + # Compute error metric based on normal difference + term = torch.clamp(torch.sum(n0 * n1, -1, keepdim=True), min=-1.0, max=1.0) + term = 1.0 - term + + return torch.mean(torch.abs(term)) + + # TODO(ahmadki): should belog to mesh class + def compute_edge_to_face_mapping(self, attr_idx): + with torch.no_grad(): + # Get unique edges + # Create all edges, packed by triangle + all_edges = torch.cat( + ( + torch.stack((attr_idx[:, 0], attr_idx[:, 1]), dim=-1), + torch.stack((attr_idx[:, 1], attr_idx[:, 2]), dim=-1), + torch.stack((attr_idx[:, 2], attr_idx[:, 0]), dim=-1), + ), + dim=-1, + ).view(-1, 2) + + # Swap edge order so min index is always first + order = (all_edges[:, 0] > all_edges[:, 1]).long().unsqueeze(dim=1) + sorted_edges = torch.cat( + (torch.gather(all_edges, 1, order), torch.gather(all_edges, 1, 1 - order)), dim=-1 + ) + + # Elliminate duplicates and return inverse mapping + unique_edges, idx_map = torch.unique(sorted_edges, dim=0, return_inverse=True) + + tris = torch.arange(attr_idx.shape[0]).repeat_interleave(3).cuda() + + tris_per_edge = torch.zeros((unique_edges.shape[0], 2), dtype=torch.int64).cuda() + + # Compute edge to face table + mask0 = order[:, 0] == 0 + mask1 = order[:, 0] == 1 + tris_per_edge[idx_map[mask0], 0] = tris[mask0] + tris_per_edge[idx_map[mask1], 1] = tris[mask1] + + return tris_per_edge diff --git a/nemo/collections/multimodal/modules/nerf/materials/__init__.py b/nemo/collections/multimodal/modules/nerf/materials/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py b/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py new file mode 100644 index 000000000000..434f58552a05 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/materials/basic_shading.py @@ -0,0 +1,66 @@ +from typing import Optional + +import torch + +from nemo.collections.multimodal.modules.nerf.materials.materials_base import MaterialsBase, ShadingEnum + + +class BasicShading(MaterialsBase): + """ + Material model for handling various shading types. + """ + + def __init__(self): + super(BasicShading, self).__init__() + self.specular = torch.nn.Parameter(torch.rand(3)) + self.shininess = torch.nn.Parameter(torch.rand(1)) + + def forward( + self, + albedo: torch.Tensor, + normals: torch.Tensor, + light_d: torch.Tensor, + ambient_ratio: float, + shading_type: Optional[ShadingEnum] = None, + ) -> torch.Tensor: + """ + Apply material and shading to the input RGB tensor. + + Args: + albedo (Tensor): Base albedo values. + normals (Tensor): Normal vectors at each ray intersection. + light_d (Tensor): Light direction. + ambient_ratio (float): Ratio for ambient lighting. + shading_type (ShadingEnum): The type of shading to apply + + Returns: + Tensor: The output RGB tensor after applying material and shading. + """ + if shading_type is None: + return albedo + elif shading_type == ShadingEnum.TEXTURELESS: + return torch.ones_like(albedo) * ambient_ratio + elif shading_type == ShadingEnum.NORMAL: + return (normals + 1) / 2 # Map normals from [-1, 1] to [0, 1] + elif shading_type in [ShadingEnum.LAMBERTIAN, ShadingEnum.PHONG]: + # Ambient light + ambient_light = ambient_ratio * albedo + # Dot product between light direction and normals + dot_product = torch.sum(normals * light_d, dim=1, keepdim=True) + # Lambertian term + diffuse_term = albedo * torch.clamp(dot_product, min=0) + + if shading_type == ShadingEnum.LAMBERTIAN: + return ambient_light + diffuse_term + elif shading_type == ShadingEnum.PHONG: + # Phong specular term + specular_term = ( + self.specular + * (self.shininess + 2) + * torch.pow(torch.clamp(dot_product, min=0), self.shininess) + / (2 * 3.14159) + ) + + return ambient_light + diffuse_term + specular_term + else: + raise ValueError(f"Unknown shading_type: {shading_type}") diff --git a/nemo/collections/multimodal/modules/nerf/materials/materials_base.py b/nemo/collections/multimodal/modules/nerf/materials/materials_base.py new file mode 100644 index 000000000000..393a5ffcc4fb --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/materials/materials_base.py @@ -0,0 +1,29 @@ +from enum import Enum +from typing import Literal, Optional + +from torch import nn + + +class ShadingEnum(str, Enum): + TEXTURELESS = "textureless" + NORMAL = "normal" + LAMBERTIAN = "lambertian" + PHONG = "phong" + + # TODO(ahmadki): + # Oren–Nayar + # Minnaert + # Cook–Torrance + # Ward anisotropic + # Hanrahan–Krueger + # Cel shading + # Gooch shading + + +class MaterialsBase(nn.Module): + """ + Base class for materials. + """ + + def __init__(self): + super(MaterialsBase, self).__init__() diff --git a/nemo/collections/multimodal/modules/nerf/renderers/__init__.py b/nemo/collections/multimodal/modules/nerf/renderers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py new file mode 100644 index 000000000000..36b78218a695 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_renderer.py @@ -0,0 +1,18 @@ +import torch +import torch.nn as nn + +# TODO(ahmadki): make abstract +class BaseRenderer(nn.Module): + def __init__(self, bound, update_interval): + super().__init__() + self.bound = bound + aabb = torch.FloatTensor([-bound, -bound, -bound, bound, bound, bound]) + self.register_buffer('aabb', aabb) + self.update_interval = update_interval + + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, **kwargs): + raise NotImplementedError + + def forward(self, rays_o, rays_d, return_normal_image=False, return_normal_perturb=False, **kwargs): + raise NotImplementedError diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py new file mode 100644 index 000000000000..511908e826be --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_sdf_renderer.py @@ -0,0 +1,20 @@ +import torch + +from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase + + +class BaseSDFRenderer(RendererBase): + def __init__(self, bound): + super().__init__(bound) + + # TODO(ahmadki): needs a rework + @torch.no_grad() + def get_vertices_and_triangles(self, resolution=None, S=128): + deform = torch.tanh(self.deform) / self.grid_size + + vertices, triangles = self.dmtet(self.verts + deform, self.sdf, self.indices) + + vertices = vertices.detach().cpu().numpy() + triangles = triangles.detach().cpu().numpy() + + return vertices, triangles diff --git a/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py new file mode 100644 index 000000000000..a49f37dd0741 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/base_volume_renderer.py @@ -0,0 +1,7 @@ +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum +from nemo.collections.multimodal.modules.renderer.base_renderer import RendererBase + + +class BaseVolumeRenderer(RendererBase): + def __init__(self, bound, update_interval): + super().__init__(bound, update_interval) diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py new file mode 100644 index 000000000000..103c7b82ad23 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/nerfacc_volume_renderer.py @@ -0,0 +1,364 @@ +import collections +import math +from typing import Optional + +import torch +from nerfacc.estimators.occ_grid import OccGridEstimator +from nerfacc.grid import ray_aabb_intersect, traverse_grids +from nerfacc.volrend import accumulate_along_rays_, render_weight_from_density, rendering + +from nemo.collections.multimodal.modules.renderer.base_renderer import BaseRenderer + +Rays = collections.namedtuple("Rays", ("origins", "viewdirs")) + + +def namedtuple_map(fn, tup): + """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.""" + return type(tup)(*(None if x is None else fn(x) for x in tup)) + + +def render_image_with_occgrid( + # scene + nerf: torch.nn.Module, + estimator: OccGridEstimator, + rays: Rays, + # rendering options + near_plane: float = 0.0, + far_plane: float = 1e10, + render_step_size: float = 1e-3, + render_bkgd: Optional[torch.Tensor] = None, + cone_angle: float = 0.0, + alpha_thre: float = 0.0, + # test options + test_chunk_size: int = 8192, +): + """Render the pixels of an image.""" + rays_shape = rays.origins.shape + if len(rays_shape) == 3: + height, width, _ = rays_shape + num_rays = height * width + rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) + else: + num_rays, _ = rays_shape + + # TODO(ahmadki): optimize, cache result between sigma_fn and rgb_sigma_fn + def sigma_fn(t_starts, t_ends, ray_indices): + t_origins = chunk_rays.origins[ray_indices] + t_dirs = chunk_rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 + sigmas = nerf.density(positions)['sigma'] + return sigmas + + def rgb_sigma_fn(t_starts, t_ends, ray_indices): + t_origins = chunk_rays.origins[ray_indices] + t_dirs = chunk_rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 + sigmas, rgbs, normal = nerf( + positions=positions, view_dirs=None, light_dirs=t_dirs + ) # TODO(ahmadki): t_dirs is incorrect + return rgbs, sigmas + + results = [] + chunk = torch.iinfo(torch.int32).max if nerf.training else test_chunk_size + + for i in range(0, num_rays, chunk): + chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays) + ray_indices, t_starts, t_ends = estimator.sampling( + chunk_rays.origins, + chunk_rays.viewdirs, + sigma_fn=sigma_fn, + near_plane=near_plane, + far_plane=far_plane, + render_step_size=render_step_size, + stratified=nerf.training, + cone_angle=cone_angle, + alpha_thre=alpha_thre, + ) + rgb, opacity, depth, extras = rendering( + t_starts, + t_ends, + ray_indices, + n_rays=chunk_rays.origins.shape[0], + rgb_sigma_fn=rgb_sigma_fn, + render_bkgd=render_bkgd, + ) + + weight = extras["weights"] + alpha = extras["alphas"] + + chunk_results = [rgb, opacity, depth, weight, alpha, len(t_starts)] + results.append(chunk_results) + + colors, opacities, depths, weights, alphas, n_rendering_samples = [ + torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r for r in zip(*results) + ] + + return ( + colors.view((*rays_shape[:-1], -1)), + opacities.view((*rays_shape[:-1], -1)), + depths.view((*rays_shape[:-1], -1)), + weights, + alphas, + sum(n_rendering_samples), + ) + + +@torch.no_grad() +def render_image_with_occgrid_test( + max_samples: int, + # scene + nerf: torch.nn.Module, + estimator: OccGridEstimator, + rays: Rays, + # rendering options + near_plane: float = 0.0, + far_plane: float = 1e10, + render_step_size: float = 1e-3, + render_bkgd: Optional[torch.Tensor] = None, + cone_angle: float = 0.0, + alpha_thre: float = 0.0, + early_stop_eps: float = 1e-4, +): + """Render the pixels of an image.""" + rays_shape = rays.origins.shape + if len(rays_shape) == 3: + height, width, _ = rays_shape + num_rays = height * width + rays = namedtuple_map(lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays) + else: + num_rays, _ = rays_shape + + def rgb_sigma_fn(t_starts, t_ends, ray_indices): + t_origins = rays.origins[ray_indices] + t_dirs = rays.viewdirs[ray_indices] + positions = t_origins + t_dirs * (t_starts[:, None] + t_ends[:, None]) / 2.0 + sigmas, rgbs, normal = nerf( + positions=positions, view_dirs=None, light_dirs=t_dirs + ) # TODO(ahmadki): t_dirs is incorrect ? + return rgbs, sigmas + + device = rays.origins.device + opacity = torch.zeros(num_rays, 1, device=device) + depth = torch.zeros(num_rays, 1, device=device) + rgb = torch.zeros(num_rays, 3, device=device) + + ray_mask = torch.ones(num_rays, device=device).bool() + + # 1 for synthetic scenes, 4 for real scenes + min_samples = 1 if cone_angle == 0 else 4 + + iter_samples = total_samples = 0 + + rays_o = rays.origins + rays_d = rays.viewdirs + + near_planes = torch.full_like(rays_o[..., 0], fill_value=near_plane) + far_planes = torch.full_like(rays_o[..., 0], fill_value=far_plane) + + t_mins, t_maxs, hits = ray_aabb_intersect(rays_o, rays_d, estimator.aabbs) + + n_grids = estimator.binaries.size(0) + + if n_grids > 1: + t_sorted, t_indices = torch.sort(torch.cat([t_mins, t_maxs], -1), -1) + else: + t_sorted = torch.cat([t_mins, t_maxs], -1) + t_indices = torch.arange(0, n_grids * 2, device=t_mins.device, dtype=torch.int64).expand(num_rays, n_grids * 2) + + opc_thre = 1 - early_stop_eps + + while iter_samples < max_samples: + + n_alive = ray_mask.sum().item() + if n_alive == 0: + break + + # the number of samples to add on each ray + n_samples = max(min(num_rays // n_alive, 64), min_samples) + iter_samples += n_samples + + # ray marching + (intervals, samples, termination_planes) = traverse_grids( + # rays + rays_o, # [n_rays, 3] + rays_d, # [n_rays, 3] + # grids + estimator.binaries, # [m, resx, resy, resz] + estimator.aabbs, # [m, 6] + # options + near_planes, # [n_rays] + far_planes, # [n_rays] + render_step_size, + cone_angle, + n_samples, + True, + ray_mask, + # pre-compute intersections + t_sorted, # [n_rays, m*2] + t_indices, # [n_rays, m*2] + hits, # [n_rays, m] + ) + t_starts = intervals.vals[intervals.is_left] + t_ends = intervals.vals[intervals.is_right] + ray_indices = samples.ray_indices[samples.is_valid] + packed_info = samples.packed_info + + # get rgb and sigma from radiance field + rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices) + # volume rendering using native cuda scan + weights, _, alphas = render_weight_from_density( + t_starts, + t_ends, + sigmas, + ray_indices=ray_indices, + n_rays=num_rays, + prefix_trans=1 - opacity[ray_indices].squeeze(-1), + ) + if alpha_thre > 0: + vis_mask = alphas >= alpha_thre + ray_indices, rgbs, weights, t_starts, t_ends = ( + ray_indices[vis_mask], + rgbs[vis_mask], + weights[vis_mask], + t_starts[vis_mask], + t_ends[vis_mask], + ) + + accumulate_along_rays_( + weights, values=rgbs, ray_indices=ray_indices, outputs=rgb, + ) + accumulate_along_rays_( + weights, values=None, ray_indices=ray_indices, outputs=opacity, + ) + accumulate_along_rays_( + weights, values=(t_starts + t_ends)[..., None] / 2.0, ray_indices=ray_indices, outputs=depth, + ) + # update near_planes using termination planes + near_planes = termination_planes + # update rays status + ray_mask = torch.logical_and( + # early stopping + opacity.view(-1) <= opc_thre, + # remove rays that have reached the far plane + packed_info[:, 1] == n_samples, + ) + total_samples += ray_indices.shape[0] + + if render_bkgd is not None: + rgb = rgb + render_bkgd * (1.0 - opacity) + + depth = depth / opacity.clamp_min(torch.finfo(rgbs.dtype).eps) + + return ( + rgb.view((*rays_shape[:-1], -1)), + opacity.view((*rays_shape[:-1], -1)), + depth.view((*rays_shape[:-1], -1)), + weights, + alphas, + total_samples, + ) + + +class NerfaccVolumeBaseRenderer(BaseRenderer): + def __init__( + self, + bound, + grid_resolution, + grid_levels, + render_step_size=1e-3, + near_plane=0.2, + cone_angle=0.004, + alpha_thre=1e-2, + ): + + super().__init__(bound) + + self.grid_resolution = grid_resolution + self.grid_levels = grid_levels + self.render_step_size = render_step_size + self.near_plane = near_plane + self.cone_angle = cone_angle + self.alpha_thre = alpha_thre + self.nerf = None + + self.estimator = OccGridEstimator(roi_aabb=self.aabb, resolution=self.grid_resolution, levels=self.grid_levels) + + @torch.no_grad() # TODO(ahmadki) + def update_step( + self, + epoch: int, + global_step: int, + update_interval: int = 16, + decay: float = 0.95, + occ_thre: float = 0.01, + warmup_steps: int = 256, + **kwargs + ): + def occ_eval_fn(x): + density = self.nerf.forward_density(x) + return density * self.render_step_size + + self.estimator.update_every_n_steps( + step=global_step, + occ_eval_fn=occ_eval_fn, + occ_thre=occ_thre, + ema_decay=decay, + warmup_steps=warmup_steps, + n=update_interval, + ) + + def forward(self, rays_o, rays_d, mvp, h, w, staged=False, max_ray_batch=4096, step=None, **kwargs): + return self._render(rays_o=rays_o, rays_d=rays_d, step=step, **kwargs) + + def _render( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading='albedo', + bg_color=None, + perturb=False, + T_thresh=1e-4, + binarize=False, + step=None, + **kwargs + ): + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # N = B * N, in fact + + rays = Rays(origins=rays_o, viewdirs=rays_d) + + if self.training: + rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid( + nerf=self.nerf, + estimator=self.estimator, + rays=rays, + near_plane=self.near_plane, + render_step_size=self.render_step_size, + render_bkgd=bg_color, + cone_angle=self.cone_angle, + alpha_thre=self.alpha_thre, + ) + else: + rgb, acc, depth, weights, alphas, n_rendering_samples = render_image_with_occgrid_test( + max_samples=1024, + nerf=self.nerf, + estimator=self.estimator, + rays=rays, + near_plane=self.near_plane, + render_step_size=self.render_step_size, + render_bkgd=bg_color, + cone_angle=self.cone_angle, + alpha_thre=self.alpha_thre, + ) + + results = {} + results['weights'] = weights + results['image'] = rgb.view(1, -1, 3) + results['depth'] = depth.view(1, -1) + results['weights_sum'] = acc.view(1, -1) + + return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py new file mode 100644 index 000000000000..9b23e1db890c --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/nvdiffrast_renderer.py @@ -0,0 +1,222 @@ +import math + +import numpy as np +import nvdiffrast.torch as dr +import torch +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.nerf.geometry.dmtet import DeepMarchingTetrahedra +from nemo.collections.multimodal.modules.nerf.geometry.nerf_base import DensityActivationEnum +from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer + + +# TODO: self.density_thresh, self.mean_density need a rework, they can be infered at run time +# and shouldn't be loaded from the checkpoint +class NVDiffRastRenderer(BaseRenderer): + def __init__(self, bound, update_interval, grid_resolution, density_thresh, quartet_file): + + super().__init__(bound, update_interval) + + self.grid_resolution = grid_resolution + self.density_thresh = density_thresh + self.quartet_file = quartet_file + + self.cascade = 1 + math.ceil(math.log2(bound)) + density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] + density_bitfield = torch.zeros( + self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 + ) # [CAS * H * H * H // 8] + self.register_buffer('density_grid', density_grid) + self.register_buffer('density_bitfield', density_bitfield) + self.mean_density = 0 + self.iter_density = 0 + + # load dmtet vertices + # TODO(ahmadki): hard coded devices + tets = np.load(quartet_file) + self.verts = -torch.tensor(tets['vertices'], dtype=torch.float32, device='cuda') * 2 # covers [-1, 1] + self.indices = torch.tensor(tets['indices'], dtype=torch.long, device='cuda') + self.tet_scale = torch.tensor([1, 1, 1], dtype=torch.float32, device='cuda') + self.dmtet = DeepMarchingTetrahedra(device='cuda') + + # vert sdf and deform + sdf = torch.nn.Parameter(torch.zeros_like(self.verts[..., 0]), requires_grad=True) + self.register_parameter('sdf', sdf) + deform = torch.nn.Parameter(torch.zeros_like(self.verts), requires_grad=True) + self.register_parameter('deform', deform) + + edges = torch.tensor( + [0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long, device="cuda" + ) # six edges for each tetrahedron. + all_edges = self.indices[:, edges].reshape(-1, 2) # [M * 6, 2] + all_edges_sorted = torch.sort(all_edges, dim=1)[0] + self.all_edges = torch.unique(all_edges_sorted, dim=0) + + self.initialized = False # TODO(ahmadki): not a good approach + + self.glctx = dr.RasterizeCudaContext() + + # TODO(ahmadki): not a good approach + self.nerf = None + self.material = None + self.background = None + + # TODO(ahmkadi): doesn't look good to me !! + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): + pass + + @torch.no_grad() + def init_tet(self): + # TODO(ahmadki): a better approach would be to have a global nerf representation (mesh) that + # we can init the tets from. this would work with checkpoints. + + # TODO(ahmadki): a placeholder, but it works for now + self.mean_density = 300 + density_thresh = min(self.mean_density, self.density_thresh) + + if self.nerf.density_activation == DensityActivationEnum.SOFTPLUS: + density_thresh = density_thresh * 25 + + # Get initial sigma + sigma = self.nerf.forward_density(positions=self.verts) + mask = sigma > density_thresh + valid_verts = self.verts[mask] + self.tet_scale = valid_verts.abs().amax(dim=0) + 1e-1 + + # Scale vertices + self.verts = self.verts * self.tet_scale + + # get sigma using the scaled vertices + sigma = self.nerf.forward_density(positions=self.verts) + self.sdf.data += (sigma - density_thresh).clamp(-1, 1) + + def forward( + self, + rays_o, + rays_d, + mvp, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + **kwargs + ): + if not self.initialized: + self.init_tet() + self.initialized = True + return self._render( + rays_o=rays_o, + rays_d=rays_d, + mvp=mvp, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + return_normal_image=return_normal_image, + return_vertices=return_vertices, + return_faces=return_faces, + return_faces_normals=return_faces_normals, + **kwargs + ) + + def _render( + self, + rays_o, + rays_d, + mvp, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + **kwargs + ): + # mvp: [B, 4, 4] + B, H, W, _ = rays_o.shape + + # TODO(ahmadki): move to dataset + # random sample light_d if not provided + if light_d is None: + # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) + light_d = rays_o + torch.randn(3, device=rays_o.device) + light_d = F.normalize(light_d) + + results = {} + + # get mesh + deform = torch.tanh(self.deform) / self.grid_resolution + + verts, faces = self.dmtet(self.verts + deform, self.sdf, self.indices) + + # get normals + i0, i1, i2 = faces[:, 0], faces[:, 1], faces[:, 2] + v0, v1, v2 = verts[i0, :], verts[i1, :], verts[i2, :] + + faces = faces.int() + + face_normals = torch.cross(v1 - v0, v2 - v0) + face_normals = F.normalize(face_normals) + + vn = torch.zeros_like(verts) + vn.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals) + vn.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals) + vn.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals) + + vn = torch.where( + torch.sum(vn * vn, -1, keepdim=True) > 1e-20, + vn, + torch.tensor([0.0, 0.0, 1.0], dtype=torch.float32, device=vn.device), + ) + + # rasterization + verts_clip = torch.bmm( + F.pad(verts, pad=(0, 1), mode='constant', value=1.0).unsqueeze(0).repeat(mvp.shape[0], 1, 1), + mvp.permute(0, 2, 1), + ).float() # [B, N, 4] + rast, _ = dr.rasterize(self.glctx, verts_clip, faces, (H, W)) + + alpha = (rast[..., 3:] > 0).float() + xyzs, _ = dr.interpolate(verts.unsqueeze(0), rast, faces) # [B, H, W, 3] + normal, _ = dr.interpolate(vn.unsqueeze(0).contiguous(), rast, faces) + normal = F.normalize(normal) + + xyzs = xyzs.view(-1, 3) + mask = (rast[..., 3:] > 0).view(-1).detach() + + # do the lighting here since we have normal from mesh now. + albedo = torch.zeros_like(xyzs, dtype=torch.float32) + if mask.any(): + masked_albedo = self.nerf.forward_features(positions=xyzs[mask]) + albedo[mask] = masked_albedo.float() + albedo = albedo.view(B, H, W, 3) + fg_color = self.material( + albedo=albedo, normals=normal, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type + ) + + fg_color = dr.antialias(fg_color, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 3] + alpha = dr.antialias(alpha, rast, verts_clip, faces).clamp(0, 1) # [B, H, W, 1] + + # mix background color + bg_color = self.background(rays_d=rays_d) # [N, 3] + + depth = rast[:, :, :, [2]] # [B, H, W] + color = fg_color + (1 - alpha) * bg_color + + results['depth'] = depth + results['image'] = color + if return_normal_image: + results['normal_image'] = dr.antialias((normal + 1) / 2, rast, verts_clip, faces).clamp( + 0, 1 + ) # [B, H, W, 3] + if return_vertices: + results['vertices'] = verts + if return_faces: + results['faces'] = faces + if return_faces_normals: + results['face_normals'] = face_normals + return results diff --git a/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py b/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py new file mode 100644 index 000000000000..46096857a773 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/renderers/torchngp_volume_renderer.py @@ -0,0 +1,275 @@ +import math + +import torch +import torch.nn.functional as F + +import nemo.collections.multimodal.modules.nerf.utils.torch_ngp.raymarching as raymarching +from nemo.collections.multimodal.modules.nerf.materials.materials_base import ShadingEnum +from nemo.collections.multimodal.modules.nerf.renderers.base_renderer import BaseRenderer + + +class TorchNGPVolumeRenderer(BaseRenderer): + def __init__(self, bound, update_interval, grid_resolution, density_thresh, max_steps, dt_gamma): + + super().__init__(bound, update_interval) + + self.cascade = 1 + math.ceil(math.log2(bound)) + self.grid_resolution = grid_resolution + self.density_thresh = density_thresh + self.dt_gamma = dt_gamma + self.max_steps = max_steps + + # density grid + # TODO(ahmadki): needs rework + density_grid = torch.zeros([self.cascade, self.grid_resolution ** 3]) # [CAS, H * H * H] + density_bitfield = torch.zeros( + self.cascade * self.grid_resolution ** 3 // 8, dtype=torch.uint8 + ) # [CAS * H * H * H // 8] + self.register_buffer('density_grid', density_grid) + self.register_buffer('density_bitfield', density_bitfield) + self.mean_density = 0 + self.iter_density = 0 + + # TODO(ahmadki): needs rework + self.nerf = None + self.material = None + self.background = None + + @torch.no_grad() + def update_step(self, epoch: int, global_step: int, decay: float = 0.95, S: int = 128, **kwargs): + if global_step % self.update_interval != 0: + return + + ### update density grid + tmp_grid = -torch.ones_like(self.density_grid) + + X = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + Y = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + Z = torch.arange(self.grid_resolution, dtype=torch.int32, device=self.aabb.device).split(S) + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing='ij') + coords = torch.cat( + [xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1 + ) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + xyzs = 2 * coords.float() / (self.grid_resolution - 1) - 1 # [N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_resolution = bound / self.grid_resolution + # scale to current cascade's resolution + cas_xyzs = xyzs * (bound - half_grid_resolution) + # add noise in [-hgs, hgs] + cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_resolution + # query density + density = self.nerf.forward_density(cas_xyzs).reshape(-1).detach() + # assign + tmp_grid[cas, indices] = density + # ema update + valid_mask = self.density_grid >= 0 + self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) + self.mean_density = torch.mean(self.density_grid[valid_mask]).item() + self.iter_density += 1 + + # convert to bitfield + density_thresh = min(self.mean_density, self.density_thresh) + self.density_bitfield = raymarching.packbits(self.density_grid, density_thresh, self.density_bitfield) + + def forward( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_normal_perturb=False, + **kwargs + ): + return self._render( + rays_o=rays_o, + rays_d=rays_d, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + return_normal_image=return_normal_image, + return_normal_perturb=return_normal_perturb, + **kwargs + ) + + # TODO(ahmadki): return_normal_image is always False ? + def _render( + self, + rays_o, + rays_d, + light_d=None, + ambient_ratio=1.0, + shading_type=None, + return_normal_image=False, + return_normal_perturb=False, + perturb=False, + T_thresh=1e-4, + binarize=False, + **kwargs + ): + # rays_o, rays_d: [B, H, W, 3] + B, H, W, _ = rays_o.shape + + # group all rays into a single batch + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + num_rays = rays_o.shape[0] # num_rays = B * H * W + + # pre-calculate near far + nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb) + + # random sample light_d if not provided + # TODO(ahmadki): move to dataset + if light_d is None: + # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face) + light_d = rays_o + torch.randn(3, device=rays_o.device) + light_d = F.normalize(light_d) + + normal_image = None + normals_perturb = None + weights = None + + if self.training: + positions, dirs, ts, rays = raymarching.march_rays_train( + rays_o, + rays_d, + self.bound, + self.density_bitfield, + self.cascade, + self.grid_resolution, + nears, + fars, + perturb, + self.dt_gamma, + self.max_steps, + ) + dirs = F.normalize(dirs) + + if light_d.shape[0] > 1: + flatten_rays = raymarching.flatten_rays(rays, positions.shape[0]).long() + light_d = light_d[flatten_rays] + + return_normal = (shading_type is not None) or return_normal_image + sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) + + fg_color = self.material( + albedo=albedo, normals=normals, light_d=light_d, ambient_ratio=ambient_ratio, shading_type=shading_type + ) + + weights, opacity, depth, image = raymarching.composite_rays_train( + sigmas, fg_color, ts, rays, T_thresh, binarize + ) + + if return_normal_image and normals is not None: + _, _, _, normal_image = raymarching.composite_rays_train( + sigmas.detach(), (normals + 1) / 2, ts, rays, T_thresh, binarize + ) + + if return_normal_perturb: + perturb_positions = positions + torch.randn_like(positions) * 1e-2 + normals_perturb = self.normal(positions=perturb_positions) + + else: + # allocate tensors + image = torch.zeros(num_rays, 3, device=rays_o.device) + depth = torch.zeros(num_rays, device=rays_o.device) + opacity = torch.zeros(num_rays, device=rays_o.device) + + n_alive = num_rays + rays_alive = torch.arange(n_alive, dtype=torch.int32, device=rays_o.device) + rays_t = nears.clone() + + step = 0 + + while step < self.max_steps: # hard coded max step + # count alive rays + n_alive = rays_alive.shape[0] + + # exit loop + if n_alive <= 0: + break + + # decide compact_steps + n_step = max(min(num_rays // n_alive, 8), 1) + + positions, dirs, ts = raymarching.march_rays( + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + self.bound, + self.density_bitfield, + self.cascade, + self.grid_resolution, + nears, + fars, + perturb if step == 0 else False, + self.dt_gamma, + self.max_steps, + ) + dirs = F.normalize(dirs) + + return_normal = shading_type not in [None, ShadingEnum.TEXTURELESS] + sigmas, albedo, normals = self.nerf(positions=positions, return_normal=return_normal) + + fg_color = self.material( + albedo=albedo, + normals=normals, + light_d=light_d, + ambient_ratio=ambient_ratio, + shading_type=shading_type, + ) + raymarching.composite_rays( + n_alive, + n_step, + rays_alive, + rays_t, + sigmas, + fg_color, + ts, + opacity, + depth, + image, + T_thresh, + binarize, + ) + + # TODO(ahmadki): add optoin to return normal_image, like in training + + rays_alive = rays_alive[rays_alive >= 0] + + step += n_step + + # mix background color + bg_color = self.background(rays_d) # [N, 3] + image = image + (1 - opacity).unsqueeze(-1) * bg_color + + results = { + "image": image.view(B, H, W, 3), + "depth": depth.view(B, H, W, 1), + "opacity": opacity.view(B, H, W, 1), + "dirs": dirs, + } + if normals is not None: + results["normals"] = normals + if weights is not None: + results["weights"] = weights + if normal_image is not None: + results["normal_image"] = normal_image.view(B, H, W, 3) + if normals_perturb is not None: + results["normal_perturb"] = normals_perturb + + return results diff --git a/nemo/collections/multimodal/modules/nerf/utils/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/utils/activation.py b/nemo/collections/multimodal/modules/nerf/utils/activation.py new file mode 100644 index 000000000000..fa7f3c60829a --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/activation.py @@ -0,0 +1,20 @@ +import torch +from torch.autograd import Function +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _trunc_exp(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float) + def forward(ctx, x): + ctx.save_for_backward(x) + return torch.exp(x) + + @staticmethod + @custom_bwd + def backward(ctx, g): + x = ctx.saved_tensors[0] + return g * torch.exp(x.clamp(max=15)) + + +trunc_exp = _trunc_exp.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py new file mode 100644 index 000000000000..3d2c1e8c74e6 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/encoding.py @@ -0,0 +1,137 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class FreqEncoder_torch(nn.Module): + def __init__( + self, + input_dim, + max_freq_log2, + N_freqs, + log_sampling=True, + include_input=True, + periodic_fns=(torch.sin, torch.cos), + ): + + super().__init__() + + self.input_dim = input_dim + self.include_input = include_input + self.periodic_fns = periodic_fns + self.N_freqs = N_freqs + + self.output_dim = 0 + if self.include_input: + self.output_dim += self.input_dim + + self.output_dim += self.input_dim * N_freqs * len(self.periodic_fns) + + if log_sampling: + self.freq_bands = 2 ** torch.linspace(0, max_freq_log2, N_freqs) + else: + self.freq_bands = torch.linspace(2 ** 0, 2 ** max_freq_log2, N_freqs) + + self.freq_bands = self.freq_bands.numpy().tolist() + + def forward(self, input, max_level=None, **kwargs): + + if max_level is None: + max_level = self.N_freqs + else: + max_level = int(max_level * self.N_freqs) + + out = [] + if self.include_input: + out.append(input) + + for i in range(max_level): + freq = self.freq_bands[i] + for p_fn in self.periodic_fns: + out.append(p_fn(input * freq)) + + # append 0 + if self.N_freqs - max_level > 0: + out.append( + torch.zeros( + input.shape[0], + (self.N_freqs - max_level) * 2 * input.shape[1], + device=input.device, + dtype=input.dtype, + ) + ) + + out = torch.cat(out, dim=-1) + + return out + + +def get_encoder( + encoder_type, + input_dim=3, + multires=6, + degree=4, + num_levels=16, + level_dim=2, + base_resolution=16, + log2_hashmap_size=19, + desired_resolution=2048, + align_corners=False, + interpolation='linear', + **kwargs +): + + if encoder_type is None: + return lambda x, **kwargs: x, input_dim + + elif encoder_type == 'frequency_torch': + encoder = FreqEncoder_torch( + input_dim=input_dim, max_freq_log2=multires - 1, N_freqs=multires, log_sampling=True + ) + + elif encoder_type == 'frequency': # CUDA implementation, faster than torch. + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.freqencoder import FreqEncoder + + encoder = FreqEncoder(input_dim=input_dim, degree=multires) + + elif encoder_type == 'sphere_harmonics': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.shencoder import SHEncoder + + encoder = SHEncoder(input_dim=input_dim, degree=degree) + + elif encoder_type == 'hashgrid': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder + + encoder = GridEncoder( + input_dim=input_dim, + num_levels=num_levels, + level_dim=level_dim, + base_resolution=base_resolution, + log2_hashmap_size=log2_hashmap_size, + desired_resolution=desired_resolution, + gridtype='hash', + align_corners=align_corners, + interpolation=interpolation, + ) + + elif encoder_type == 'tiledgrid': + from nemo.collections.multimodal.modules.nerf.utils.torch_ngp.gridencoder import GridEncoder + + encoder = GridEncoder( + input_dim=input_dim, + num_levels=num_levels, + level_dim=level_dim, + base_resolution=base_resolution, + log2_hashmap_size=log2_hashmap_size, + desired_resolution=desired_resolution, + gridtype='tiled', + align_corners=align_corners, + interpolation=interpolation, + ) + + else: + raise NotImplementedError( + 'Unknown encoder type, choose from [None, frequency, sphere_harmonics, hashgrid, tiledgrid]' + ) + + return encoder, encoder.output_dim diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py new file mode 100644 index 000000000000..1c217f9c8b7d --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/freqencoder.py @@ -0,0 +1,73 @@ +import _freqencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _freq_encoder(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision + def forward(ctx, inputs, degree, output_dim): + # inputs: [B, input_dim], float + # RETURN: [B, F], float + + if not inputs.is_cuda: + inputs = inputs.cuda() + inputs = inputs.contiguous() + + B, input_dim = inputs.shape # batch size, coord dim + + outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) + + _backend.freq_encode_forward(inputs, B, input_dim, degree, output_dim, outputs) + + ctx.save_for_backward(inputs, outputs) + ctx.dims = [B, input_dim, degree, output_dim] + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + # grad: [B, C * C] + + grad = grad.contiguous() + inputs, outputs = ctx.saved_tensors + B, input_dim, degree, output_dim = ctx.dims + + grad_inputs = torch.zeros_like(inputs) + _backend.freq_encode_backward(grad, outputs, B, input_dim, degree, output_dim, grad_inputs) + + return grad_inputs, None, None + + +freq_encode = _freq_encoder.apply + + +class FreqEncoder(nn.Module): + def __init__(self, input_dim=3, degree=4): + super().__init__() + + self.input_dim = input_dim + self.degree = degree + self.output_dim = input_dim + input_dim * 2 * degree + + def __repr__(self): + return f"FreqEncoder: input_dim={self.input_dim} degree={self.degree} output_dim={self.output_dim}" + + def forward(self, inputs, **kwargs): + # inputs: [..., input_dim] + # return: [..., ] + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.reshape(-1, self.input_dim) + + outputs = freq_encode(inputs, self.degree, self.output_dim) + + outputs = outputs.reshape(prefix_shape + [self.output_dim]) + + return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py new file mode 100644 index 000000000000..1a7487aee6f3 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/gridencoder.py @@ -0,0 +1,287 @@ +import math + +import _gridencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + +_gridtype_to_id = { + 'hash': 0, + 'tiled': 1, +} + +_interp_to_id = { + 'linear': 0, + 'smoothstep': 1, +} + + +class _grid_encode(Function): + @staticmethod + @custom_fwd + def forward( + ctx, + inputs, + embeddings, + offsets, + per_level_scale, + base_resolution, + calc_grad_inputs=False, + gridtype=0, + align_corners=False, + interpolation=0, + max_level=None, + ): + # inputs: [B, D], float in [0, 1] + # embeddings: [sO, C], float + # offsets: [L + 1], int + # RETURN: [B, F], float + + inputs = inputs.contiguous() + + B, D = inputs.shape # batch size, coord dim + L = offsets.shape[0] - 1 # level + C = embeddings.shape[1] # embedding dim for each level + S = np.log2(per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f + H = base_resolution # base resolution + + max_level = L if max_level is None else max(min(int(math.ceil(max_level * L)), L), 1) + + # manually handle autocast (only use half precision embeddings, inputs must be float for enough precision) + # if C % 2 != 0, force float, since half for atomicAdd is very slow. + if torch.is_autocast_enabled() and C % 2 == 0: + embeddings = embeddings.to(torch.half) + + # L first, optimize cache for cuda kernel, but needs an extra permute later + outputs = torch.empty(L, B, C, device=inputs.device, dtype=embeddings.dtype) + + # zero init if we only calculate partial levels + if max_level < L: + outputs.zero_() + + if calc_grad_inputs: + dy_dx = torch.empty(B, L * D * C, device=inputs.device, dtype=embeddings.dtype) + if max_level < L: + dy_dx.zero_() + else: + dy_dx = None + + _backend.grid_encode_forward( + inputs, + embeddings, + offsets, + outputs, + B, + D, + C, + L, + max_level, + S, + H, + dy_dx, + gridtype, + align_corners, + interpolation, + ) + + # permute back to [B, L * C] + outputs = outputs.permute(1, 0, 2).reshape(B, L * C) + + ctx.save_for_backward(inputs, embeddings, offsets, dy_dx) + ctx.dims = [B, D, C, L, S, H, gridtype, interpolation, max_level] + ctx.align_corners = align_corners + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + + inputs, embeddings, offsets, dy_dx = ctx.saved_tensors + B, D, C, L, S, H, gridtype, interpolation, max_level = ctx.dims + align_corners = ctx.align_corners + + # grad: [B, L * C] --> [L, B, C] + grad = grad.view(B, L, C).permute(1, 0, 2).contiguous() + + grad_embeddings = torch.zeros_like(embeddings) + + if dy_dx is not None: + grad_inputs = torch.zeros_like(inputs, dtype=embeddings.dtype) + else: + grad_inputs = None + + _backend.grid_encode_backward( + grad, + inputs, + embeddings, + offsets, + grad_embeddings, + B, + D, + C, + L, + max_level, + S, + H, + dy_dx, + grad_inputs, + gridtype, + align_corners, + interpolation, + ) + + if dy_dx is not None: + grad_inputs = grad_inputs.to(inputs.dtype) + + return grad_inputs, grad_embeddings, None, None, None, None, None, None, None, None + + +grid_encode = _grid_encode.apply + + +class GridEncoder(nn.Module): + def __init__( + self, + input_dim=3, + num_levels=16, + level_dim=2, + per_level_scale=2, + base_resolution=16, + log2_hashmap_size=19, + desired_resolution=None, + gridtype='hash', + align_corners=False, + interpolation='linear', + ): + super().__init__() + + # the finest resolution desired at the last level, if provided, overridee per_level_scale + if desired_resolution is not None: + per_level_scale = np.exp2(np.log2(desired_resolution / base_resolution) / (num_levels - 1)) + + self.input_dim = input_dim # coord dims, 2 or 3 + self.num_levels = num_levels # num levels, each level multiply resolution by 2 + self.level_dim = level_dim # encode channels per level + self.per_level_scale = per_level_scale # multiply resolution by this scale at each level. + self.log2_hashmap_size = log2_hashmap_size + self.base_resolution = base_resolution + self.output_dim = num_levels * level_dim + self.gridtype = gridtype + self.gridtype_id = _gridtype_to_id[gridtype] # "tiled" or "hash" + self.interpolation = interpolation + self.interp_id = _interp_to_id[interpolation] # "linear" or "smoothstep" + self.align_corners = align_corners + + # allocate parameters + offsets = [] + offset = 0 + self.max_params = 2 ** log2_hashmap_size + for i in range(num_levels): + resolution = int(np.ceil(base_resolution * per_level_scale ** i)) + params_in_level = min(self.max_params, (resolution) ** input_dim) # limit max number + params_in_level = int(np.ceil(params_in_level / 8) * 8) # make divisible + offsets.append(offset) + offset += params_in_level + offsets.append(offset) + offsets = torch.from_numpy(np.array(offsets, dtype=np.int32)) + self.register_buffer('offsets', offsets) + + self.n_params = offsets[-1] * level_dim + + # parameters + self.embeddings = nn.Parameter(torch.empty(offset, level_dim)) + + self.reset_parameters() + + def reset_parameters(self): + std = 1e-4 + self.embeddings.data.uniform_(-std, std) + + def __repr__(self): + return f"GridEncoder: input_dim={self.input_dim} num_levels={self.num_levels} level_dim={self.level_dim} resolution={self.base_resolution} -> {int(round(self.base_resolution * self.per_level_scale ** (self.num_levels - 1)))} per_level_scale={self.per_level_scale:.4f} params={tuple(self.embeddings.shape)} gridtype={self.gridtype} align_corners={self.align_corners} interpolation={self.interpolation}" + + def forward(self, inputs, bound=1, max_level=None): + # inputs: [..., input_dim], normalized real world positions in [-bound, bound] + # max_level: only calculate first max_level levels (None will use all levels) + # return: [..., num_levels * level_dim] + + inputs = (inputs + bound) / (2 * bound) # map to [0, 1] + + # print('inputs', inputs.shape, inputs.dtype, inputs.min().item(), inputs.max().item()) + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.view(-1, self.input_dim) + + outputs = grid_encode( + inputs, + self.embeddings, + self.offsets, + self.per_level_scale, + self.base_resolution, + inputs.requires_grad, + self.gridtype_id, + self.align_corners, + self.interp_id, + max_level, + ) + outputs = outputs.view(prefix_shape + [self.output_dim]) + + # print('outputs', outputs.shape, outputs.dtype, outputs.min().item(), outputs.max().item()) + + return outputs + + # always run in float precision! + @torch.cuda.amp.autocast(enabled=False) + def grad_total_variation(self, weight=1e-7, inputs=None, bound=1, B=1000000): + # inputs: [..., input_dim], float in [-b, b], location to calculate TV loss. + + D = self.input_dim + C = self.embeddings.shape[1] # embedding dim for each level + L = self.offsets.shape[0] - 1 # level + S = np.log2(self.per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f + H = self.base_resolution # base resolution + + if inputs is None: + # randomized in [0, 1] + inputs = torch.rand(B, self.input_dim, device=self.embeddings.device) + else: + inputs = (inputs + bound) / (2 * bound) # map to [0, 1] + inputs = inputs.view(-1, self.input_dim) + B = inputs.shape[0] + + if self.embeddings.grad is None: + raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') + + _backend.grad_total_variation( + inputs, + self.embeddings, + self.embeddings.grad, + self.offsets, + weight, + B, + D, + C, + L, + S, + H, + self.gridtype_id, + self.align_corners, + ) + + @torch.cuda.amp.autocast(enabled=False) + def grad_weight_decay(self, weight=0.1): + # level-wise meaned weight decay (ref: zip-nerf) + + B = self.embeddings.shape[0] # size of embedding + C = self.embeddings.shape[1] # embedding dim for each level + L = self.offsets.shape[0] - 1 # level + + if self.embeddings.grad is None: + raise ValueError('grad is None, should be called after loss.backward() and before optimizer.step()!') + + _backend.grad_weight_decay(self.embeddings, self.embeddings.grad, self.offsets, weight, B, C, L) diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py new file mode 100644 index 000000000000..c0f15641fa5a --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/raymarching.py @@ -0,0 +1,551 @@ +import time + +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.cuda.amp import custom_bwd, custom_fwd + +# lazy building: +# `import raymarching` will not immediately build the extension, only if you actually call any functions. + +BACKEND = None + + +def get_backend(): + global BACKEND + + if BACKEND is None: + try: + import _raymarching as _backend + except ImportError: + from .backend import _backend + + BACKEND = _backend + + return BACKEND + + +# ---------------------------------------- +# utils +# ---------------------------------------- + + +class _near_far_from_aabb(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, aabb, min_near=0.2): + ''' near_far_from_aabb, CUDA implementation + Calculate rays' intersection time (near and far) with aabb + Args: + rays_o: float, [N, 3] + rays_d: float, [N, 3] + aabb: float, [6], (xmin, ymin, zmin, xmax, ymax, zmax) + min_near: float, scalar + Returns: + nears: float, [N] + fars: float, [N] + ''' + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + nears = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + fars = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().near_far_from_aabb(rays_o, rays_d, aabb, N, min_near, nears, fars) + + return nears, fars + + +near_far_from_aabb = _near_far_from_aabb.apply + + +class _sph_from_ray(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, radius): + ''' sph_from_ray, CUDA implementation + get spherical coordinate on the background sphere from rays. + Assume rays_o are inside the Sphere(radius). + Args: + rays_o: [N, 3] + rays_d: [N, 3] + radius: scalar, float + Return: + coords: [N, 2], in [-1, 1], theta and phi on a sphere. (further-surface) + ''' + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + coords = torch.empty(N, 2, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().sph_from_ray(rays_o, rays_d, radius, N, coords) + + return coords + + +sph_from_ray = _sph_from_ray.apply + + +class _morton3D(Function): + @staticmethod + def forward(ctx, coords): + ''' morton3D, CUDA implementation + Args: + coords: [N, 3], int32, in [0, 128) (for some reason there is no uint32 tensor in torch...) + TODO: check if the coord range is valid! (current 128 is safe) + Returns: + indices: [N], int32, in [0, 128^3) + + ''' + if not coords.is_cuda: + coords = coords.cuda() + + N = coords.shape[0] + + indices = torch.empty(N, dtype=torch.int32, device=coords.device) + + get_backend().morton3D(coords.int(), N, indices) + + return indices + + +morton3D = _morton3D.apply + + +class _morton3D_invert(Function): + @staticmethod + def forward(ctx, indices): + ''' morton3D_invert, CUDA implementation + Args: + indices: [N], int32, in [0, 128^3) + Returns: + coords: [N, 3], int32, in [0, 128) + + ''' + if not indices.is_cuda: + indices = indices.cuda() + + N = indices.shape[0] + + coords = torch.empty(N, 3, dtype=torch.int32, device=indices.device) + + get_backend().morton3D_invert(indices.int(), N, coords) + + return coords + + +morton3D_invert = _morton3D_invert.apply + + +class _packbits(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, grid, thresh, bitfield=None): + ''' packbits, CUDA implementation + Pack up the density grid into a bit field to accelerate ray marching. + Args: + grid: float, [C, H * H * H], assume H % 2 == 0 + thresh: float, threshold + Returns: + bitfield: uint8, [C, H * H * H / 8] + ''' + if not grid.is_cuda: + grid = grid.cuda() + grid = grid.contiguous() + + C = grid.shape[0] + H3 = grid.shape[1] + N = C * H3 // 8 + + if bitfield is None: + bitfield = torch.empty(N, dtype=torch.uint8, device=grid.device) + + get_backend().packbits(grid, N, thresh, bitfield) + + return bitfield + + +packbits = _packbits.apply + + +class _flatten_rays(Function): + @staticmethod + def forward(ctx, rays, M): + ''' flatten rays + Args: + rays: [N, 2], all rays' (point_offset, point_count), + M: scalar, int, count of points (we cannot get this info from rays unfortunately...) + Returns: + res: [M], flattened ray index. + ''' + if not rays.is_cuda: + rays = rays.cuda() + rays = rays.contiguous() + + N = rays.shape[0] + + res = torch.zeros(M, dtype=torch.int, device=rays.device) + + get_backend().flatten_rays(rays, N, M, res) + + return res + + +flatten_rays = _flatten_rays.apply + +# ---------------------------------------- +# train functions +# ---------------------------------------- + + +class _march_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward( + ctx, + rays_o, + rays_d, + bound, + density_bitfield, + C, + H, + nears, + fars, + perturb=False, + dt_gamma=0, + max_steps=1024, + contract=False, + ): + ''' march rays to generate points (forward only) + Args: + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + step_counter: int32, (2), used to count the actual number of generated points. + mean_count: int32, estimated mean steps to accelerate training. (but will randomly drop rays if the actual point count exceeded this threshold.) + perturb: bool + align: int, pad output so its size is dividable by align, set to -1 to disable. + force_all_rays: bool, ignore step_counter and mean_count, always calculate all rays. Useful if rendering the whole image, instead of some rays. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [M, 3], all generated points' coords. (all rays concated, need to use `rays` to extract points belonging to each ray) + dirs: float, [M, 3], all generated points' view dirs. + ts: float, [M, 2], all generated points' ts. + rays: int32, [N, 2], all rays' (point_offset, point_count), e.g., xyzs[rays[i, 0]:(rays[i, 0] + rays[i, 1])] --> points belonging to rays[i, 0] + ''' + + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + if not density_bitfield.is_cuda: + density_bitfield = density_bitfield.cuda() + + rays_o = rays_o.float().contiguous().view(-1, 3) + rays_d = rays_d.float().contiguous().view(-1, 3) + density_bitfield = density_bitfield.contiguous() + + N = rays_o.shape[0] # num rays + + step_counter = torch.zeros(1, dtype=torch.int32, device=rays_o.device) # point counter, ray counter + + if perturb: + noises = torch.rand(N, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(N, dtype=rays_o.dtype, device=rays_o.device) + + # first pass: write rays, get total number of points M to render + rays = torch.empty(N, 2, dtype=torch.int32, device=rays_o.device) # id, offset, num_steps + get_backend().march_rays_train( + rays_o, + rays_d, + density_bitfield, + bound, + contract, + dt_gamma, + max_steps, + N, + C, + H, + nears, + fars, + None, + None, + None, + rays, + step_counter, + noises, + ) + + # allocate based on M + M = step_counter.item() + # print(M, N) + # print(rays[:, 0].max()) + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) + + # second pass: write outputs + get_backend().march_rays_train( + rays_o, + rays_d, + density_bitfield, + bound, + contract, + dt_gamma, + max_steps, + N, + C, + H, + nears, + fars, + xyzs, + dirs, + ts, + rays, + step_counter, + noises, + ) + + return xyzs, dirs, ts, rays + + +march_rays_train = _march_rays_train.apply + + +class _composite_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, ts, rays, T_thresh=1e-4, binarize=False): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ts: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights: float, [M] + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.float().contiguous() + rgbs = rgbs.float().contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights = torch.zeros(M, dtype=sigmas.dtype, device=sigmas.device) # may leave unmodified, so init with 0 + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + get_backend().composite_rays_train_forward( + sigmas, rgbs, ts, rays, M, N, T_thresh, binarize, weights, weights_sum, depth, image + ) + + ctx.save_for_backward(sigmas, rgbs, ts, rays, weights_sum, depth, image) + ctx.dims = [M, N, T_thresh, binarize] + + return weights, weights_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights, grad_weights_sum, grad_depth, grad_image): + + grad_weights = grad_weights.contiguous() + grad_weights_sum = grad_weights_sum.contiguous() + grad_depth = grad_depth.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, ts, rays, weights_sum, depth, image = ctx.saved_tensors + M, N, T_thresh, binarize = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + + get_backend().composite_rays_train_backward( + grad_weights, + grad_weights_sum, + grad_depth, + grad_image, + sigmas, + rgbs, + ts, + rays, + weights_sum, + depth, + image, + M, + N, + T_thresh, + binarize, + grad_sigmas, + grad_rgbs, + ) + + return grad_sigmas, grad_rgbs, None, None, None, None + + +composite_rays_train = _composite_rays_train.apply + +# ---------------------------------------- +# infer functions +# ---------------------------------------- + + +class _march_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward( + ctx, + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + bound, + density_bitfield, + C, + H, + near, + far, + perturb=False, + dt_gamma=0, + max_steps=1024, + contract=False, + ): + ''' march rays to generate points (forward only, for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [N], the alive rays' IDs in N (N >= n_alive, but we only use first n_alive) + rays_t: float, [N], the alive rays' time, we only use the first n_alive. + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + align: int, pad output so its size is dividable by align, set to -1 to disable. + perturb: bool/int, int > 0 is used as the random seed. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [n_alive * n_step, 3], all generated points' coords + dirs: float, [n_alive * n_step, 3], all generated points' view dirs. + ts: float, [n_alive * n_step, 2], all generated points' ts + ''' + + if not rays_o.is_cuda: + rays_o = rays_o.cuda() + if not rays_d.is_cuda: + rays_d = rays_d.cuda() + + rays_o = rays_o.float().contiguous().view(-1, 3) + rays_d = rays_d.float().contiguous().view(-1, 3) + + M = n_alive * n_step + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + ts = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) # 2 vals, one for rgb, one for depth + + if perturb: + # torch.manual_seed(perturb) # test_gui uses spp index as seed + noises = torch.rand(n_alive, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(n_alive, dtype=rays_o.dtype, device=rays_o.device) + + get_backend().march_rays( + n_alive, + n_step, + rays_alive, + rays_t, + rays_o, + rays_d, + bound, + contract, + dt_gamma, + max_steps, + C, + H, + density_bitfield, + near, + far, + xyzs, + dirs, + ts, + noises, + ) + + return xyzs, dirs, ts + + +march_rays = _march_rays.apply + + +class _composite_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward( + ctx, + n_alive, + n_step, + rays_alive, + rays_t, + sigmas, + rgbs, + ts, + weights_sum, + depth, + image, + T_thresh=1e-2, + binarize=False, + ): + ''' composite rays' rgbs, according to the ray marching formula. (for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [n_alive], the alive rays' IDs in N (N >= n_alive) + rays_t: float, [N], the alive rays' time + sigmas: float, [n_alive * n_step,] + rgbs: float, [n_alive * n_step, 3] + ts: float, [n_alive * n_step, 2] + In-place Outputs: + weights_sum: float, [N,], the alpha channel + depth: float, [N,], the depth value + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + sigmas = sigmas.float().contiguous() + rgbs = rgbs.float().contiguous() + get_backend().composite_rays( + n_alive, n_step, T_thresh, binarize, rays_alive, rays_t, sigmas, rgbs, ts, weights_sum, depth, image + ) + return tuple() + + +composite_rays = _composite_rays.apply diff --git a/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py new file mode 100644 index 000000000000..a97332089e52 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/torch_ngp/shencoder.py @@ -0,0 +1,82 @@ +import _shencoder as _backend +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + + +class _sh_encoder(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision + def forward(ctx, inputs, degree, calc_grad_inputs=False): + # inputs: [B, input_dim], float in [-1, 1] + # RETURN: [B, F], float + + inputs = inputs.contiguous() + B, input_dim = inputs.shape # batch size, coord dim + output_dim = degree ** 2 + + outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) + + if calc_grad_inputs: + dy_dx = torch.empty(B, input_dim * output_dim, dtype=inputs.dtype, device=inputs.device) + else: + dy_dx = None + + _backend.sh_encode_forward(inputs, outputs, B, input_dim, degree, dy_dx) + + ctx.save_for_backward(inputs, dy_dx) + ctx.dims = [B, input_dim, degree] + + return outputs + + @staticmethod + # @once_differentiable + @custom_bwd + def backward(ctx, grad): + # grad: [B, C * C] + + inputs, dy_dx = ctx.saved_tensors + + if dy_dx is not None: + grad = grad.contiguous() + B, input_dim, degree = ctx.dims + grad_inputs = torch.zeros_like(inputs) + _backend.sh_encode_backward(grad, inputs, B, input_dim, degree, dy_dx, grad_inputs) + return grad_inputs, None, None + else: + return None, None, None + + +sh_encode = _sh_encoder.apply + + +class SHEncoder(nn.Module): + def __init__(self, input_dim=3, degree=4): + super().__init__() + + self.input_dim = input_dim # coord dims, must be 3 + self.degree = degree # 0 ~ 4 + self.output_dim = degree ** 2 + + assert self.input_dim == 3, "SH encoder only support input dim == 3" + assert self.degree > 0 and self.degree <= 8, "SH encoder only supports degree in [1, 8]" + + def __repr__(self): + return f"SHEncoder: input_dim={self.input_dim} degree={self.degree}" + + def forward(self, inputs, size=1): + # inputs: [..., input_dim], normalized real world positions in [-size, size] + # return: [..., degree^2] + + inputs = inputs / size # [-1, 1] + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.reshape(-1, self.input_dim) + + outputs = sh_encode(inputs, self.degree, inputs.requires_grad) + outputs = outputs.reshape(prefix_shape + [self.output_dim]) + + return outputs diff --git a/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py b/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py new file mode 100644 index 000000000000..ebf0a43da596 --- /dev/null +++ b/nemo/collections/multimodal/modules/nerf/utils/trt_engine.py @@ -0,0 +1,173 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from collections import OrderedDict +from copy import copy + +import numpy as np +import tensorrt as trt +import torch +from einops import repeat +from polygraphy import cuda +from polygraphy.backend.common import bytes_from_path +from polygraphy.backend.trt import engine_from_bytes +from polygraphy.backend.trt import util as trt_util + +TRT_LOGGER = trt.Logger(trt.Logger.ERROR) + +# Map of numpy dtype -> torch dtype +numpy_to_torch_dtype_dict = { + np.uint8: torch.uint8, + np.int8: torch.int8, + np.int16: torch.int16, + np.int32: torch.int32, + np.int64: torch.int64, + np.float16: torch.float16, + np.float32: torch.float32, + np.float64: torch.float64, + np.complex64: torch.complex64, + np.complex128: torch.complex128, +} +if np.version.full_version >= "1.24.0": + numpy_to_torch_dtype_dict[np.bool_] = torch.bool +else: + numpy_to_torch_dtype_dict[np.bool] = torch.bool + +# Map of torch dtype -> numpy dtype +torch_to_numpy_dtype_dict = {value: key for (key, value) in numpy_to_torch_dtype_dict.items()} + + +def device_view(t): + return cuda.DeviceView(ptr=t.data_ptr(), shape=t.shape, dtype=torch_to_numpy_dtype_dict[t.dtype]) + + +class Engine: + def __init__( + self, engine_path, + ): + self.engine_path = engine_path + self.engine = None + self.context = None + self.buffers = OrderedDict() + self.tensors = OrderedDict() + + def __del__(self): + [buf.free() for buf in self.buffers.values() if isinstance(buf, cuda.DeviceArray)] + del self.engine + del self.context + del self.buffers + del self.tensors + + def set_engine(self, stream, shape_dict): + self.load() + self.activate() + self.stream = stream + self.allocate_buffers(shape_dict, device='cuda') + + def load(self): + print(f"Loading TensorRT engine: {self.engine_path}") + self.engine = engine_from_bytes(bytes_from_path(self.engine_path)) + + def activate(self): + self.context = self.engine.create_execution_context() + + def allocate_buffers(self, shape_dict=None, device="cuda"): + for idx in range(trt_util.get_bindings_per_profile(self.engine)): + binding = self.engine[idx] + if shape_dict and binding in shape_dict: + shape = shape_dict[binding] + else: + shape = self.engine.get_binding_shape(binding) + dtype = trt.nptype(self.engine.get_binding_dtype(binding)) + if self.engine.binding_is_input(binding): + self.context.set_binding_shape(idx, shape) + tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(device=device) + self.tensors[binding] = tensor + self.buffers[binding] = cuda.DeviceView(ptr=tensor.data_ptr(), shape=shape, dtype=dtype) + + def infer(self, feed_dict): + stream = self.stream + start_binding, end_binding = trt_util.get_active_profile_bindings(self.context) + # shallow copy of ordered dict + device_buffers = copy(self.buffers) + for name, buf in feed_dict.items(): + assert isinstance(buf, cuda.DeviceView) + device_buffers[name] = buf + bindings = [0] * start_binding + [buf.ptr for buf in device_buffers.values()] + noerror = self.context.execute_async_v2(bindings=bindings, stream_handle=stream.ptr) + if not noerror: + raise ValueError(f"ERROR: inference failed.") + + return self.tensors + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == "linear": + betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + + elif schedule == "cosine": + timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "sqrt_linear": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + elif schedule == "sqrt": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): + if ddim_discr_method == 'uniform': + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == 'quad': + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) + else: + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f'Selected timesteps for ddim sampler: {steps_out}') + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) + if verbose: + print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') + print( + f'For the chosen value of eta, which is {eta}, ' + f'this results in the following sigma_t schedule for ddim sampler {sigmas}' + ) + return sigmas, alphas, alphas_prev + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() diff --git a/nemo/collections/multimodal/modules/stable_diffusion/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py new file mode 100644 index 000000000000..07f37ece3d6f --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -0,0 +1,408 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from inspect import isfunction + +import torch +import torch.nn.functional as F +from einops import rearrange, repeat +from group_norm import GroupNormOpt +from torch import einsum, nn +from torch._dynamo import disable + +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import checkpoint + + +def check_cuda(): + if not torch.cuda.is_available(): + raise RuntimeError('CUDA is not available') + cur_device = torch.cuda.current_device() + dprops = torch.cuda.get_device_properties(cur_device) + + is_sm75 = dprops.major == 7 and dprops.minor == 5 + is_sm8x = dprops.major == 8 and dprops.minor >= 0 + is_sm90 = dprops.major == 9 and dprops.minor >= 0 + + return is_sm8x or is_sm75 or is_sm90 + + +try: + import torch.nn as nn + from flash_attn.modules.mha import FlashCrossAttention, FlashSelfAttention + + flash_attn_installed = check_cuda() + print("FlashAttention Installed") + + # Disable TorchDynamo on FlashAttention + FlashSelfAttention.forward = disable(FlashSelfAttention.forward) + FlashCrossAttention.forward = disable(FlashCrossAttention.forward) +except ImportError: + flash_attn_installed = False + + +def exists(val): + return val is not None + + +def uniq(arr): + return {el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + if isinstance(d, (torch.Tensor, float, int)): + return d + return d() if isfunction(d) else d + + +def max_neg_value(t): + return -torch.finfo(t.dtype).max + + +def init_(tensor): + dim = tensor.shape[-1] + std = 1 / math.sqrt(dim) + tensor.uniform_(-std, std) + return tensor + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def Normalize(in_channels): + return GroupNormOpt(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + + +class LinearAttention(nn.Module): + def __init__(self, dim, heads=4, dim_head=32): + super().__init__() + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) + self.to_out = nn.Conv2d(hidden_dim, dim, 1) + + def forward(self, x): + b, c, h, w = x.shape + qkv = self.to_qkv(x) + q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3) + k = k.softmax(dim=-1) + context = torch.einsum('bhdn,bhen->bhde', k, v) + out = torch.einsum('bhde,bhdn->bhen', context, q) + out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w) + return self.to_out(out) + + +class SpatialSelfAttention(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = rearrange(q, 'b c h w -> b (h w) c') + k = rearrange(k, 'b c h w -> b c (h w)') + w_ = torch.einsum('bij,bjk->bik', q, k) + + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = rearrange(v, 'b c h w -> b c (h w)') + w_ = rearrange(w_, 'b i j -> b j i') + h_ = torch.einsum('bij,bjk->bik', v, w_) + h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h) + h_ = self.proj_out(h_) + + return x + h_ + + +# b n (h d) -> (b h) n d +def rearrange_heads_outer(t: torch.Tensor, h: int) -> torch.Tensor: + b, n, ch = t.shape + return t.view(b, n, h, -1).transpose(1, 2).reshape(b * h, n, -1) + + +# (b h) n d -> b n (h d) +def rearrange_heads_inner(t: torch.Tensor, h: int) -> torch.Tensor: + b = t.shape[0] // h + n = t.shape[1] + return t.view(b, h, n, -1).transpose(1, 2).reshape(b, n, -1) + + +class CrossAttention(nn.Module): + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, use_flash_attention=False): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + # make attention part be aware of self-attention/cross-attention + self.context_dim = context_dim + self.query_dim = query_dim + self.dim_head = dim_head + + self.scale = dim_head ** -0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias=False) + + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) + self.use_flash_attention = use_flash_attention + + if dim_head <= 160 and (dim_head % 8) == 0 and flash_attn_installed: + if context_dim == query_dim: + self.flash_attn = FlashSelfAttention(softmax_scale=self.scale) + else: + self.flash_attn = FlashCrossAttention(softmax_scale=self.scale) + + def forward(self, x, context=None, mask=None): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + out = self._attention(q, k, v, mask) + + return self.to_out(out) + + def _attention(self, q, k, v, mask=None): + h = self.heads + + if ( + not flash_attn_installed + or not self.use_flash_attention + or q.dtype == torch.float32 + or (self.dim_head > 160 or (self.dim_head % 8) != 0) + or mask is not None + ): + # original implementation + # b n (h d) -> (b h) n d + q = rearrange_heads_outer(q, h) + k = rearrange_heads_outer(k, h) + v = rearrange_heads_outer(v, h) + + sim = einsum('b i d, b j d -> b i j', q, k) * self.scale + + if exists(mask): + # standard stable diffusion does not run into here + mask = mask.view(mask.shape[0], -1) + b, j = mask.shape + mask = mask.unsqueeze(1).expand(b, h, j).reshape(b * h, 1, j) # b j -> (b h) () j + sim.masked_fill_(~mask, self.max_neg[sim.dtype]) + + # attention, what we cannot get enough of + attn = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', attn, v) + + # (b h) n d -> b n (h d) + out = rearrange_heads_inner(out, h) + elif self.context_dim == self.query_dim: + # self-attention + qkv = torch.stack([q, k, v], dim=2) + b, s, t, hd = qkv.shape + d = hd // h + qkv = qkv.view(b, s, t, h, d) + + out = self.flash_attn(qkv) + out = out.view(b, s, hd) + else: + # cross-attention + kv = torch.stack([k, v], dim=2) + + s_q = q.shape[1] + b, s_kv, t, hd = kv.shape + d = hd // h + + q = q.view(b, s_q, h, d) + kv = kv.view(b, s_kv, t, h, d) + + out = self.flash_attn(q, kv) + out = out.view(b, s_q, hd) + + return out + + +class BasicTransformerBlock(nn.Module): + def __init__( + self, + dim, + n_heads, + d_head, + dropout=0.0, + context_dim=None, + gated_ff=True, + use_checkpoint=False, + use_flash_attention=False, + disable_self_attn=False, + ): + super().__init__() + self.disable_self_attn = disable_self_attn + self.attn1 = CrossAttention( + query_dim=dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout, + use_flash_attention=use_flash_attention, + context_dim=context_dim if self.disable_self_attn else None, + ) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention( + query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout, + use_flash_attention=use_flash_attention, + ) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.use_checkpoint = use_checkpoint + + def forward(self, x, context=None): + if self.use_checkpoint: + return checkpoint(self._forward, (x, context), self.parameters(), self.use_checkpoint) + else: + return self._forward(x, context) + + def _forward(self, x, context=None): + x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class SpatialTransformer(nn.Module): + """ + Transformer block for image-like data. + First, project the input (aka embedding) + and reshape to b, t, d. + Then apply standard transformer action. + Finally, reshape to image + """ + + def __init__( + self, + in_channels, + n_heads, + d_head, + depth=1, + dropout=0.0, + context_dim=None, + disable_self_attn=False, + use_linear=False, + use_checkpoint=False, + use_flash_attention=False, + ): + super().__init__() + if exists(context_dim) and not isinstance(context_dim, list): + context_dim = [context_dim] + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = Normalize(in_channels) + + if not use_linear: + self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + else: + self.proj_in = nn.Linear(in_channels, inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + n_heads, + d_head, + dropout=dropout, + context_dim=context_dim[d], + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + disable_self_attn=disable_self_attn, + ) + for d in range(depth) + ] + ) + + if not use_linear: + self.proj_out = zero_module(nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)) + else: + self.proj_out = zero_module(nn.Linear(in_channels, inner_dim)) + self.use_linear = use_linear + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + if not isinstance(context, list): + context = [context] + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + if not self.use_linear: + x = self.proj_in(x) + x = x.view(b, c, -1).transpose(1, 2) # b c h w -> b (h w) c + if self.use_linear: + x = self.proj_in(x) + for i, block in enumerate(self.transformer_blocks): + x = block(x, context=context[i]) + if self.use_linear: + x = self.proj_out(x) + x = x.transpose(1, 2).view(b, c, h, w) # b (h w) c -> b c h w + if not self.use_linear: + x = self.proj_out(x) + return x + x_in diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py new file mode 100644 index 000000000000..dbfab3ab4b07 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py @@ -0,0 +1,878 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pytorch_diffusion + derived encoder decoder +import math +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange +from group_norm import GroupNormOpt + +from nemo.collections.multimodal.modules.stable_diffusion.attention import LinearAttention +from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config + + +def get_timestep_embedding(timesteps, embedding_dim): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: + From Fairseq. + Build sinusoidal embeddings. + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + assert len(timesteps.shape) == 1 + + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb) + emb = emb.to(device=timesteps.device) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb + + +def nonlinearity(x): + # swish + return torch.nn.functional.silu(x) + + +def Normalize(in_channels, num_groups=32, act=""): + return GroupNormOpt(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) + + +class Upsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 + # TODO(yuya): Remove this cast once the issue is fixed in PyTorch + # https://github.com/pytorch/pytorch/issues/86679 + dtype = x.dtype + if dtype == torch.bfloat16: + x = x.to(torch.float32) + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + if dtype == torch.bfloat16: + x = x.to(dtype) + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) + return x + + +class ResnetBlock(nn.Module): + def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropout, temb_channels=512): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + + self.norm1 = Normalize(in_channels, act="silu") + self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + if temb_channels > 0: + self.temb_proj = torch.nn.Linear(temb_channels, out_channels) + self.norm2 = Normalize(out_channels, act="silu") + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + else: + self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = self.conv1(h) + + if temb is not None: + h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] + + h = self.norm2(h) + h = self.dropout(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + + return x + h + + +class LinAttnBlock(LinearAttention): + """to match AttnBlock usage""" + + def __init__(self, in_channels): + super().__init__(dim=in_channels, heads=1, dim_head=in_channels) + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, c, h * w) + q = q.permute(0, 2, 1) # b,hw,c + k = k.reshape(b, c, h * w) # b,c,hw + w_ = torch.bmm(q, k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j] + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = v.reshape(b, c, h * w) + w_ = w_.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q) + h_ = torch.bmm(v, w_) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j] + h_ = h_.reshape(b, c, h, w) + + h_ = self.proj_out(h_) + + return x + h_ + + +def make_attn(in_channels, attn_type="vanilla"): + assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown' + print(f"making attention of type '{attn_type}' with {in_channels} in_channels") + if attn_type == "vanilla": + return AttnBlock(in_channels) + elif attn_type == "none": + return nn.Identity(in_channels) + else: + return LinAttnBlock(in_channels) + + +class Model(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + use_timestep=True, + use_linear_attn=False, + attn_type="vanilla", + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = self.ch * 4 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + self.use_timestep = use_timestep + if self.use_timestep: + # timestep embedding + self.temb = nn.Module() + self.temb.dense = nn.ModuleList( + [torch.nn.Linear(self.ch, self.temb_ch), torch.nn.Linear(self.temb_ch, self.temb_ch),] + ) + + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + skip_in = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + if i_block == self.num_res_blocks: + skip_in = ch * in_ch_mult[i_level] + block.append( + ResnetBlock( + in_channels=block_in + skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) + + def forward(self, x, t=None, context=None): + # assert x.shape[2] == x.shape[3] == self.resolution + if context is not None: + # assume aligned context, cat along channel axis + x = torch.cat((x, context), dim=1) + if self.use_timestep: + # timestep embedding + assert t is not None + temb = get_timestep_embedding(t, self.ch) + temb = self.temb.dense[0](temb) + temb = nonlinearity(temb) + temb = self.temb.dense[1](temb) + else: + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](torch.cat([h, hs.pop()], dim=1), temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + def get_last_layer(self): + return self.conv_out.weight + + +class Encoder(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + double_z=True, + use_linear_attn=False, + attn_type="vanilla", + **ignore_kwargs, + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, 2 * z_channels if double_z else z_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + # timestep embedding + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + give_pre_end=False, + tanh_out=False, + use_linear_attn=False, + attn_type="vanilla", + **ignorekwargs, + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.tanh_out = tanh_out + + # compute in_ch_mult, block_in and curr_res at lowest res + in_ch_mult = (1,) + tuple(ch_mult) + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + print("Working with z of shape {} = {} dimensions.".format(self.z_shape, np.prod(self.z_shape))) + + # z to block_in + self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) + + def forward(self, z): + # assert z.shape[1:] == self.z_shape[1:] + self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h, temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + if self.tanh_out: + h = torch.tanh(h) + return h + + +class SimpleDecoder(nn.Module): + def __init__(self, in_channels, out_channels, *args, **kwargs): + super().__init__() + self.model = nn.ModuleList( + [ + nn.Conv2d(in_channels, in_channels, 1), + ResnetBlock(in_channels=in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=2 * in_channels, out_channels=4 * in_channels, temb_channels=0, dropout=0.0), + ResnetBlock(in_channels=4 * in_channels, out_channels=2 * in_channels, temb_channels=0, dropout=0.0), + nn.Conv2d(2 * in_channels, in_channels, 1), + Upsample(in_channels, with_conv=True), + ] + ) + # end + self.norm_out = Normalize(in_channels) + self.conv_out = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + for i, layer in enumerate(self.model): + if i in [1, 2, 3]: + x = layer(x, None) + else: + x = layer(x) + + h = self.norm_out(x) + h = nonlinearity(h) + x = self.conv_out(h) + return x + + +class UpsampleDecoder(nn.Module): + def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution, ch_mult=(2, 2), dropout=0.0): + super().__init__() + # upsampling + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + block_in = in_channels + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.res_blocks = nn.ModuleList() + self.upsample_blocks = nn.ModuleList() + for i_level in range(self.num_resolutions): + res_block = [] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + res_block.append( + ResnetBlock( + in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout + ) + ) + block_in = block_out + self.res_blocks.append(nn.ModuleList(res_block)) + if i_level != self.num_resolutions - 1: + self.upsample_blocks.append(Upsample(block_in, True)) + curr_res = curr_res * 2 + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d(block_in, out_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + # upsampling + h = x + for k, i_level in enumerate(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.res_blocks[i_level][i_block](h, None) + if i_level != self.num_resolutions - 1: + h = self.upsample_blocks[k](h) + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class LatentRescaler(nn.Module): + def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2): + super().__init__() + # residual block, interpolate, residual block + self.factor = factor + self.conv_in = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=1, padding=1) + self.res_block1 = nn.ModuleList( + [ + ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) + for _ in range(depth) + ] + ) + self.attn = AttnBlock(mid_channels) + self.res_block2 = nn.ModuleList( + [ + ResnetBlock(in_channels=mid_channels, out_channels=mid_channels, temb_channels=0, dropout=0.0) + for _ in range(depth) + ] + ) + + self.conv_out = nn.Conv2d(mid_channels, out_channels, kernel_size=1,) + + def forward(self, x): + x = self.conv_in(x) + for block in self.res_block1: + x = block(x, None) + x = torch.nn.functional.interpolate( + x, size=(int(round(x.shape[2] * self.factor)), int(round(x.shape[3] * self.factor))) + ) + x = self.attn(x) + for block in self.res_block2: + x = block(x, None) + x = self.conv_out(x) + return x + + +class MergedRescaleEncoder(nn.Module): + def __init__( + self, + in_channels, + ch, + resolution, + out_ch, + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + ch_mult=(1, 2, 4, 8), + rescale_factor=1.0, + rescale_module_depth=1, + ): + super().__init__() + intermediate_chn = ch * ch_mult[-1] + self.encoder = Encoder( + in_channels=in_channels, + num_res_blocks=num_res_blocks, + ch=ch, + ch_mult=ch_mult, + z_channels=intermediate_chn, + double_z=False, + resolution=resolution, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + out_ch=None, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=intermediate_chn, + mid_channels=intermediate_chn, + out_channels=out_ch, + depth=rescale_module_depth, + ) + + def forward(self, x): + x = self.encoder(x) + x = self.rescaler(x) + return x + + +class MergedRescaleDecoder(nn.Module): + def __init__( + self, + z_channels, + out_ch, + resolution, + num_res_blocks, + attn_resolutions, + ch, + ch_mult=(1, 2, 4, 8), + dropout=0.0, + resamp_with_conv=True, + rescale_factor=1.0, + rescale_module_depth=1, + ): + super().__init__() + tmp_chn = z_channels * ch_mult[-1] + self.decoder = Decoder( + out_ch=out_ch, + z_channels=tmp_chn, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + in_channels=None, + num_res_blocks=num_res_blocks, + ch_mult=ch_mult, + resolution=resolution, + ch=ch, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=z_channels, + mid_channels=tmp_chn, + out_channels=tmp_chn, + depth=rescale_module_depth, + ) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Upsampler(nn.Module): + def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2): + super().__init__() + assert out_size >= in_size + num_blocks = int(np.log2(out_size // in_size)) + 1 + factor_up = 1.0 + (out_size % in_size) + print( + f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}" + ) + self.rescaler = LatentRescaler( + factor=factor_up, in_channels=in_channels, mid_channels=2 * in_channels, out_channels=in_channels + ) + self.decoder = Decoder( + out_ch=out_channels, + resolution=out_size, + z_channels=in_channels, + num_res_blocks=2, + attn_resolutions=[], + in_channels=None, + ch=in_channels, + ch_mult=[ch_mult for _ in range(num_blocks)], + ) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Resize(nn.Module): + def __init__(self, in_channels=None, learned=False, mode="bilinear"): + super().__init__() + self.with_conv = learned + self.mode = mode + if self.with_conv: + print(f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode") + raise NotImplementedError() + assert in_channels is not None + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1) + + def forward(self, x, scale_factor=1.0): + if scale_factor == 1.0: + return x + else: + x = torch.nn.functional.interpolate(x, mode=self.mode, align_corners=False, scale_factor=scale_factor) + return x + + +class FirstStagePostProcessor(nn.Module): + def __init__( + self, + ch_mult: list, + in_channels, + pretrained_model: nn.Module = None, + reshape=False, + n_channels=None, + dropout=0.0, + pretrained_config=None, + ): + super().__init__() + if pretrained_config is None: + assert pretrained_model is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' + self.pretrained_model = pretrained_model + else: + assert pretrained_config is not None, 'Either "pretrained_model" or "pretrained_config" must not be None' + self.instantiate_pretrained(pretrained_config) + + self.do_reshape = reshape + + if n_channels is None: + n_channels = self.pretrained_model.encoder.ch + + self.proj_norm = Normalize(in_channels, num_groups=in_channels // 2) + self.proj = nn.Conv2d(in_channels, n_channels, kernel_size=3, stride=1, padding=1) + + blocks = [] + downs = [] + ch_in = n_channels + for m in ch_mult: + blocks.append(ResnetBlock(in_channels=ch_in, out_channels=m * n_channels, dropout=dropout)) + ch_in = m * n_channels + downs.append(Downsample(ch_in, with_conv=False)) + + self.model = nn.ModuleList(blocks) + self.downsampler = nn.ModuleList(downs) + + def instantiate_pretrained(self, config): + model = instantiate_from_config(config) + self.pretrained_model = model.eval() + # self.pretrained_model.train = False + for param in self.pretrained_model.parameters(): + param.requires_grad = False + + @torch.no_grad() + def encode_with_pretrained(self, x): + c = self.pretrained_model.encode(x) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + return c + + def forward(self, x): + z_fs = self.encode_with_pretrained(x) + z = self.proj_norm(z_fs) + z = self.proj(z) + z = nonlinearity(z) + + for submodel, downmodel in zip(self.model, self.downsampler): + z = submodel(z, temb=None) + z = downmodel(z) + + if self.do_reshape: + z = rearrange(z, 'b c h w -> b (h w) c') + return z diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py new file mode 100644 index 000000000000..9c52198f1566 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -0,0 +1,1191 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from abc import abstractmethod +from functools import partial +from typing import Iterable + +import numpy as np +import torch +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +from nemo.collections.multimodal.modules.stable_diffusion.attention import SpatialTransformer +from nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.util import ( + avg_pool_nd, + checkpoint, + conv_nd, + linear, + normalization, + timestep_embedding, + zero_module, +) + + +def convert_module_to_dtype(module, dtype): + # Convert module parameters to dtype + if isinstance(module, (torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Linear)): + module.weight.data = module.weight.data.to(dtype) + if module.bias is not None: + module.bias.data = module.bias.data.to(dtype) + + +def convert_module_to_fp16(module): + convert_module_to_dtype(module, torch.float16) + + +## go +class AttentionPool2d(nn.Module): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, spacial_dim: int, embed_dim: int, num_heads_channels: int, output_dim: int = None, + ): + super().__init__() + self.positional_embedding = nn.Parameter(th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + x = x.reshape(b, c, -1) # NC(HW) + x = th.cat([x.mean(dim=-1, keepdim=True), x], dim=-1) # NC(HW+1) + x = x + self.positional_embedding[None, :, :].to(x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb, context=None): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, SpatialTransformer): + x = layer(x, context) + else: + x = layer(x) + return x + + +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=padding) + + def forward(self, x): + assert x.shape[1] == self.channels + + # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 + # TODO(yuya): Remove this cast once the issue is fixed in PyTorch + # https://github.com/pytorch/pytorch/issues/86679 + dtype = x.dtype + if dtype == torch.bfloat16: + x = x.to(torch.float32) + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if dtype == torch.bfloat16: + x = x.to(dtype) + + if self.use_conv: + x = self.conv(x) + return x + + +class TransposedUpsample(nn.Module): + 'Learned 2x upsampling without padding' + + def __init__(self, channels, out_channels=None, ks=5): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + + self.up = nn.ConvTranspose2d(self.channels, self.out_channels, kernel_size=ks, stride=2) + + def forward(self, x): + return self.up(x) + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels, act="silu"), conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels, act="silu"), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + if self.use_checkpoint: + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + else: + return self._forward(x, emb) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb).type(h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Module): + """ + An attention block that allows spatial positions to attend to each other. + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, channels, num_heads=1, num_head_channels=-1, use_checkpoint=False, use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint( + self._forward, (x,), self.parameters(), True + ) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! + # return pt_checkpoint(self._forward, x) # pytorch + + def _forward(self, x): + b, c, *spatial = x.shape + x = x.reshape(b, c, -1) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + return (x + h).reshape(b, c, *spatial) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial ** 2) * c + model.total_ops += th.DoubleTensor([matmul_ops]) + + +class QKVAttentionLegacy(nn.Module): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum("bts,bcs->bct", weight, v) + return a.reshape(bs, -1, length) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + return a.reshape(bs, -1, length) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Module): + """ + The full UNet model with attention and timestep embedding. + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + use_linear_in_transformer=False, + from_pretrained: str = None, + from_NeMo=False, + # It must be specified when from pretrained is not None. It indicates loading unet from NeMo trained ckpt or HF + use_flash_attention: bool = False, + enable_amp_o2_fp16: bool = False, + ): + super().__init__() + if use_spatial_transformer: + assert ( + context_dim is not None + ), 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert ( + use_spatial_transformer + ), 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + from omegaconf.listconfig import ListConfig + + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + self.input_blocks = nn.ModuleList( + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=model_channels * mult, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = model_channels * mult + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + use_flash_attention=use_flash_attention, + ) + ) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) + if resblock_updown + else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), nn.SiLU(), zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), + ) + if self.predict_codebook_ids: + self.id_predictor = nn.Sequential( + normalization(ch), + conv_nd(dims, model_channels, n_embed, 1), + # nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits + ) + + if from_pretrained is not None: + state_dict = torch.load(from_pretrained, map_location='cpu') + if 'state_dict' in state_dict.keys(): + state_dict = state_dict['state_dict'] + missing_key, unexpected_keys, _, _ = self._load_pretrained_model(state_dict, from_NeMo=from_NeMo) + if len(missing_key) > 0: + print( + 'Following keys are missing during loading unet weights, which may lead to compromised image quality for a resumed training. Please check the checkpoint you provided.' + ) + print(f"Missing keys: {missing_key}") + print(f"Unexpected keys: {unexpected_keys}") + + if enable_amp_o2_fp16: + self.convert_to_fp16() + + def _input_blocks_mapping(self, input_dict): + res_dict = {} + for key_, value_ in input_dict.items(): + id_0 = int(key_[13]) + if "resnets" in key_: + id_1 = int(key_[23]) + target_id = 3 * id_0 + 1 + id_1 + post_fix = ( + key_[25:] + .replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') + .replace('conv_shortcut', 'skip_connection') + ) + res_dict["input_blocks." + str(target_id) + '.0.' + post_fix] = value_ + elif "attentions" in key_: + id_1 = int(key_[26]) + target_id = 3 * id_0 + 1 + id_1 + post_fix = key_[28:] + res_dict["input_blocks." + str(target_id) + '.1.' + post_fix] = value_ + elif "downsamplers" in key_: + post_fix = key_[35:] + target_id = 3 * (id_0 + 1) + res_dict["input_blocks." + str(target_id) + '.0.op.' + post_fix] = value_ + return res_dict + + def _mid_blocks_mapping(self, mid_dict): + res_dict = {} + for key_, value_ in mid_dict.items(): + if "resnets" in key_: + temp_key_ = ( + key_.replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') + .replace('conv_shortcut', 'skip_connection') + .replace('middle_block.resnets.0', 'middle_block.0') + .replace('middle_block.resnets.1', 'middle_block.2') + ) + res_dict[temp_key_] = value_ + elif "attentions" in key_: + res_dict[key_.replace('attentions.0', '1')] = value_ + return res_dict + + def _other_blocks_mapping(self, other_dict): + res_dict = {} + for key_, value_ in other_dict.items(): + tmp_key = ( + key_.replace('conv_in', 'input_blocks.0.0') + .replace('time_embedding.linear_1', 'time_embed.0') + .replace('time_embedding.linear_2', 'time_embed.2') + .replace('conv_norm_out', 'out.0') + .replace('conv_out', 'out.2') + ) + res_dict[tmp_key] = value_ + return res_dict + + def _output_blocks_mapping(self, output_dict): + res_dict = {} + for key_, value_ in output_dict.items(): + id_0 = int(key_[14]) + if "resnets" in key_: + id_1 = int(key_[24]) + target_id = 3 * id_0 + id_1 + post_fix = ( + key_[26:] + .replace('time_emb_proj', 'emb_layers.1') + .replace('norm1', 'in_layers.0') + .replace('norm2', 'out_layers.0') + .replace('conv1', 'in_layers.2') + .replace('conv2', 'out_layers.3') + .replace('conv_shortcut', 'skip_connection') + ) + res_dict["output_blocks." + str(target_id) + '.0.' + post_fix] = value_ + elif "attentions" in key_: + id_1 = int(key_[27]) + target_id = 3 * id_0 + id_1 + post_fix = key_[29:] + res_dict["output_blocks." + str(target_id) + '.1.' + post_fix] = value_ + elif "upsamplers" in key_: + post_fix = key_[34:] + target_id = 3 * (id_0 + 1) - 1 + mid_str = '.2.conv.' if target_id != 2 else '.1.conv.' + res_dict["output_blocks." + str(target_id) + mid_str + post_fix] = value_ + return res_dict + + def _state_key_mapping(self, state_dict: dict): + import re + + res_dict = {} + input_dict = {} + mid_dict = {} + output_dict = {} + other_dict = {} + for key_, value_ in state_dict.items(): + if "down_blocks" in key_: + input_dict[key_.replace('down_blocks', 'input_blocks')] = value_ + elif "up_blocks" in key_: + output_dict[key_.replace('up_blocks', 'output_blocks')] = value_ + elif "mid_block" in key_: + mid_dict[key_.replace('mid_block', 'middle_block')] = value_ + else: + other_dict[key_] = value_ + + input_dict = self._input_blocks_mapping(input_dict) + output_dict = self._output_blocks_mapping(output_dict) + mid_dict = self._mid_blocks_mapping(mid_dict) + other_dict = self._other_blocks_mapping(other_dict) + # key_list = state_dict.keys() + # key_str = " ".join(key_list) + + # for key_, val_ in state_dict.items(): + # key_ = key_.replace("down_blocks", "input_blocks")\ + # .replace("up_blocks", 'output_blocks') + # res_dict[key_] = val_ + res_dict.update(input_dict) + res_dict.update(output_dict) + res_dict.update(mid_dict) + res_dict.update(other_dict) + + return res_dict + + def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from_NeMo=False): + if from_NeMo: + state_dict = self._strip_unet_key_prefix(state_dict) + else: + state_dict = self._state_key_mapping(state_dict) + model_state_dict = self.state_dict() + loaded_keys = [k for k in state_dict.keys()] + expected_keys = list(model_state_dict.keys()) + original_loaded_keys = loaded_keys + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + if ( + 'input_blocks.1.0.in_layers.2.weight' in loaded_keys + and 'input_blocks.1.0.in_layers.1.weight' in expected_keys + ): + # GroupNormOpt fuses activation function to one layer, thus the indexing of weights are shifted for following + for key_ in missing_keys: + s = key_.split('.') + idx = int(s[-2]) + new_key_ = ".".join(s[:-2] + [str(int(idx + 1))] + [s[-1]]) + state_dict[key_] = state_dict[new_key_] + + loaded_keys = list(state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + + def _find_mismatched_keys( + state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes, + ): + mismatched_keys = [] + if ignore_mismatched_sizes: + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + + if ( + model_key in model_state_dict + and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape + ): + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + if state_dict is not None: + # Whole checkpoint + mismatched_keys = _find_mismatched_keys( + state_dict, model_state_dict, original_loaded_keys, ignore_mismatched_sizes, + ) + error_msgs = self._load_state_dict_into_model(state_dict) + return missing_keys, unexpected_keys, mismatched_keys, error_msgs + + # TODO MMY maybe combine these cases of key prefix + def _strip_unet_key_prefix(self, state_dict): + re_state_dict = {} + for key_, value_ in state_dict.items(): + if key_.startswith('model.diffusion_model'): + re_state_dict[key_.replace('model.diffusion_model.', '')] = value_ + if key_.startswith('model.model.diffusion_model'): + re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_ + if key_.startswith('model._orig_mod.diffusion_model.'): + re_state_dict[key_.replace('model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model.model._orig_mod.diffusion_model.'): + re_state_dict[key_.replace('model.model._orig_mod.diffusion_model.', '')] = value_ + if key_.startswith('model.model.diffusion_model._orig_mod.'): + re_state_dict[key_.replace('model.model.diffusion_model._orig_mod.', '')] = value_ + return re_state_dict + + def _load_state_dict_into_model(self, state_dict): + # Convert old format to new format if needed from a PyTorch state_dict + # copy state_dict so _load_from_state_dict can modify it + state_dict = state_dict.copy() + error_msgs = [] + + # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants + # so we need to apply the function recursively. + def load(module: torch.nn.Module, prefix=""): + args = (state_dict, prefix, {}, True, [], [], error_msgs) + module._load_from_state_dict(*args) + + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + ".") + + load(self) + + return error_msgs + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.apply(convert_module_to_fp16) + + def forward(self, x, timesteps=None, context=None, y=None, **kwargs): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param context: conditioning plugged in via crossattn + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == ( + self.num_classes is not None + ), "must specify y if and only if the model is class-conditional" + hs = [] + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + if self.num_classes is not None: + assert y.shape == (x.shape[0],) + emb = emb + self.label_emb(y) + + h = x.type(emb.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + for module in self.output_blocks: + h = th.cat([h, hs.pop()], dim=1) + h = module(h, emb, context) + if self.predict_codebook_ids: + return self.id_predictor(h) + else: + return self.out(h) + + +class EncoderUNetModel(nn.Module): + """ + The half UNet model with attention and timestep embedding. + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + *args, + **kwargs, + ): + super().__init__() + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), nn.SiLU(), linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [TimestepEmbedSequential(conv_nd(dims, in_channels, model_channels, 3, padding=1))] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + nn.AdaptiveAvgPool2d((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), nn.SiLU(), AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), nn.ReLU(), nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + nn.SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.input_blocks.apply(convert_module_to_fp16) + self.middle_block.apply(convert_module_to_fp16) + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels), use_fp16=self.use_fp16) + + # future support + if self.dtype == th.float32: + self.dtype == x.dtype + + results = [] + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = th.cat(results, axis=-1) + return self.out(h) + else: + h = h.type(x.dtype) + return self.out(h) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py new file mode 100644 index 000000000000..2225d45a3a78 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -0,0 +1,309 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# adopted from +# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py +# and +# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +# and +# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py +# +# thanks! + + +import math + +import numpy as np +import torch +import torch.nn as nn +from einops import repeat +from group_norm import GroupNormOpt +from torch._dynamo import disable +from torch.cuda.amp import custom_bwd, custom_fwd + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == "linear": + betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + + elif schedule == "cosine": + timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "sqrt_linear": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + elif schedule == "sqrt": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): + if ddim_discr_method == "uniform": + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == "quad": + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2).astype(int) + else: + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f"Selected timesteps for ddim sampler: {steps_out}") + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + variance = (1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev) + sigmas = eta * np.sqrt(variance) + if verbose: + print(f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}") + print( + f"For the chosen value of eta, which is {eta}, " + f"this results in the following sigma_t schedule for ddim sampler {sigmas}" + ) + return sigmas, alphas, alphas_prev, variance + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +def extract_into_tensor(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t) + return out.reshape(b, *((1,) * (len(x_shape) - 1))) + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + @custom_fwd + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + @custom_bwd + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +# Temporary hack to get rid of TorchDynamo issue with DDP +# TODO: remove this if https://github.com/pytorch/pytorch/issues/94574 fixed +@disable +def get_idx(end, device): + return torch.arange(start=0, end=end, dtype=torch.float32, device=device) + + +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): + """ + Create sinusoidal timestep embeddings. + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + if not repeat_only: + half = dim // 2 + idx = get_idx(half, timesteps.device) + freqs = torch.exp(-math.log(max_period) / half * idx) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + else: + embedding = repeat(timesteps, "b -> b d", d=dim) + return embedding + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def normalization(channels, act=""): + """ + Make a standard normalization layer. + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNormOpt(32, channels, act=act) + + +# PyTorch 1.7 has SiLU, but we support PyTorch 1.5. +class SiLU(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + def forward(self, x): + return super().forward(x.float()).type(x.dtype) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() + + +def interpolate_fn(x, xp, yp): + """ + A piecewise linear function y = f(x), using xp and yp as keypoints. + """ + N, K = x.shape[0], xp.shape[1] + all_x = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((N, 1, 1))], dim=2) + sorted_all_x, x_indices = torch.sort(all_x, dim=2) + x_idx = torch.argmin(x_indices, dim=2) + cand_start_idx = x_idx - 1 + start_idx = torch.where( + torch.eq(x_idx, 0), + torch.tensor(1, device=x.device), + torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), + ) + end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) + start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) + end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) + start_idx2 = torch.where( + torch.eq(x_idx, 0), + torch.tensor(0, device=x.device), + torch.where(torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,), + ) + y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) + start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) + end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) + cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) + return cand + + +def expand_dims(v, dims): + """ + Expand the tensor `v` to the dim `dims`. + """ + return v[(...,) + (None,) * (dims - 1)] diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/distributions/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py new file mode 100644 index 000000000000..81d79ac5801a --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/distributions/distributions.py @@ -0,0 +1,98 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch + + +class AbstractDistribution: + def sample(self): + raise NotImplementedError() + + def mode(self): + raise NotImplementedError() + + +class DiracDistribution(AbstractDistribution): + def __init__(self, value): + self.value = value + + def sample(self): + return self.value + + def mode(self): + return self.value + + +class DiagonalGaussianDistribution(object): + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) + self.logvar = torch.clamp(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = torch.exp(0.5 * self.logvar) + self.var = torch.exp(self.logvar) + if self.deterministic: + self.var = self.std = torch.zeros_like(self.mean, device=self.parameters.device) + + def sample(self): + x = self.mean + self.std * torch.randn(self.mean.shape, device=self.parameters.device) + return x + + def kl(self, other=None): + if self.deterministic: + return torch.Tensor([0.0]) + else: + if other is None: + return 0.5 * torch.sum(torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, dim=[1, 2, 3]) + else: + return 0.5 * torch.sum( + torch.pow(self.mean - other.mean, 2) / other.var + + self.var / other.var + - 1.0 + - self.logvar + + other.logvar, + dim=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return torch.Tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * torch.sum(logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, dim=dims) + + def mode(self): + return self.mean + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 + Compute the KL divergence between two gaussians. + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, torch.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for torch.exp(). + logvar1, logvar2 = [x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) for x in (logvar1, logvar2)] + + return 0.5 * ( + -1.0 + logvar2 - logvar1 + torch.exp(logvar1 - logvar2) + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) + ) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py new file mode 100644 index 000000000000..7b8f3c38d53f --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py @@ -0,0 +1,471 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import tempfile +from functools import partial + +import kornia +import open_clip +import torch +import torch.nn as nn +from einops import rearrange, repeat +from omegaconf import OmegaConf +from torch.utils.checkpoint import checkpoint +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer +from transformers.models.clip.modeling_clip import CLIPTextTransformer + +from nemo.collections.multimodal.data.clip.clip_dataset import get_preprocess_fns +from nemo.collections.multimodal.models.clip.megatron_clip_models import CLIPModel +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import ( + TransformerWrapper, # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test +) +from nemo.collections.multimodal.modules.stable_diffusion.encoders.x_transformer import Encoder +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.utils import logging + +try: + from megatron.core import ModelParallelConfig, parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + ModelParallelConfig = ApexGuardDefaults + + HAVE_MEGATRON_CORE = False + + +class AbstractEncoder(nn.Module): + def __init__(self): + super().__init__() + + def encode(self, *args, **kwargs): + raise NotImplementedError + + +class ClassEmbedder(nn.Module): + def __init__(self, embed_dim, n_classes=1000, key='class'): + super().__init__() + self.key = key + self.embedding = nn.Embedding(n_classes, embed_dim) + + def forward(self, batch, key=None): + if key is None: + key = self.key + # this is for use in crossattn + c = batch[key][:, None] + c = self.embedding(c) + return c + + +class TransformerEmbedder(AbstractEncoder): + """Some transformer encoder layers""" + + def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"): + super().__init__() + self.device = device + self.transformer = TransformerWrapper( + num_tokens=vocab_size, max_seq_len=max_seq_len, attn_layers=Encoder(dim=n_embed, depth=n_layer) + ) + + def forward(self, tokens): + tokens = tokens.to(self.device) # meh + z = self.transformer(tokens, return_embeddings=True) + return z + + def encode(self, x): + return self(x) + + +class BERTTokenizer(AbstractEncoder): + """ Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)""" + + def __init__(self, device="cuda", vq_interface=True, max_length=77): + super().__init__() + from transformers import BertTokenizerFast # TODO: add to reuquirements + + self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") + self.device = device + self.vq_interface = vq_interface + self.max_length = max_length + + def forward(self, text): + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) + tokens = batch_encoding["input_ids"].to(self.device) + return tokens + + @torch.no_grad() + def encode(self, text): + tokens = self(text) + if not self.vq_interface: + return tokens + return None, None, [None, None, tokens] + + def decode(self, text): + return text + + +class BERTEmbedder(AbstractEncoder): + """Uses the BERT tokenizr model and add some transformer encoder layers""" + + def __init__( + self, + n_embed, + n_layer, + vocab_size=30522, + max_seq_len=77, + device="cuda", + use_tokenizer=True, + embedding_dropout=0.0, + ): + super().__init__() + self.use_tknz_fn = use_tokenizer + if self.use_tknz_fn: + self.tknz_fn = BERTTokenizer(vq_interface=False, max_length=max_seq_len) + self.device = device + self.transformer = TransformerWrapper( + num_tokens=vocab_size, + max_seq_len=max_seq_len, + attn_layers=Encoder(dim=n_embed, depth=n_layer), + emb_dropout=embedding_dropout, + ) + + def forward(self, text): + if self.use_tknz_fn: + tokens = self.tknz_fn(text) # .to(self.device) + else: + tokens = text + z = self.transformer(tokens, return_embeddings=True) + return z + + def encode(self, text): + # output of length 77 + return self(text) + + +class SpatialRescaler(nn.Module): + def __init__(self, n_stages=1, method='bilinear', multiplier=0.5, in_channels=3, out_channels=None, bias=False): + super().__init__() + self.n_stages = n_stages + assert self.n_stages >= 0 + assert method in ['nearest', 'linear', 'bilinear', 'trilinear', 'bicubic', 'area'] + self.multiplier = multiplier + self.interpolator = partial(torch.nn.functional.interpolate, mode=method) + self.remap_output = out_channels is not None + if self.remap_output: + print(f'Spatial Rescaler mapping from {in_channels} to {out_channels} channels after resizing.') + self.channel_mapper = nn.Conv2d(in_channels, out_channels, 1, bias=bias) + + def forward(self, x): + for stage in range(self.n_stages): + x = self.interpolator(x, scale_factor=self.multiplier) + + if self.remap_output: + x = self.channel_mapper(x) + return x + + def encode(self, x): + return self(x) + + +class FrozenCLIPEmbedder(AbstractEncoder): + """Uses the CLIP transformer encoder for text (from Hugging Face)""" + + def __init__( + self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, capture_cudagraph_iters: int = -1 + ): + super().__init__() + self.tokenizer = CLIPTokenizer.from_pretrained(version) + self.transformer = CLIPTextModel.from_pretrained(version) + self.device = device + self.max_length = max_length + self.freeze() + + # CUDA graph captured sub-modules + self.capture_cudagraph_iters = capture_cudagraph_iters + self.iterations = 0 + self.stream = torch.cuda.Stream() + self.transformer_graph = torch.cuda.CUDAGraph() + self.static_tokens = None + self.static_outputs = None + + def freeze(self): + self.transformer = self.transformer.eval() + for param in self.parameters(): + param.requires_grad = False + + def forward(self, text): + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) + if self.capture_cudagraph_iters < 0: + tokens = batch_encoding["input_ids"].to(self.device, non_blocking=True) + outputs = self.transformer(input_ids=tokens) + z = outputs.last_hidden_state + + else: + if self.static_tokens is None: + self.static_tokens = batch_encoding["input_ids"].to(device=self.device, non_blocking=True) + self.static_tokens.copy_(batch_encoding["input_ids"], non_blocking=True) + + if self.iterations == self.capture_cudagraph_iters: + # cuda graph capture + logging.info("Capturing CUDA graph for module: %s", self.transformer.__class__.__name__) + with torch.cuda.graph(self.transformer_graph): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + + if 0 <= self.capture_cudagraph_iters <= self.iterations: + # cuda graph replay + self.transformer_graph.replay() + else: + # warmup + self.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(self.stream): + self.static_outputs = self.transformer(input_ids=self.static_tokens) + torch.cuda.current_stream().wait_stream(self.stream) + self.iterations += 1 + z = self.static_outputs.last_hidden_state + + # # Pad the seq length to multiple of 8 + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def encode(self, text): + return self(text) + + +class FrozenOpenCLIPEmbedder(AbstractEncoder): + """ + Uses the OpenCLIP transformer encoder for text + """ + + LAYERS = [ + # "pooled", + "last", + "penultimate", + ] + + def __init__( + self, + arch="ViT-H-14", + version="laion2b_s32b_b79k", + device="cuda", + max_length=77, + freeze=True, + layer="last", + use_fp16=False, + ): + super().__init__() + assert layer in self.LAYERS + model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version) + del model.visual + self.model = model + + self.device = device + self.max_length = max_length + if freeze: + self.freeze() + self.layer = layer + if self.layer == "last": + self.layer_idx = 0 + elif self.layer == "penultimate": + self.layer_idx = 1 + else: + raise NotImplementedError() + + def freeze(self): + self.model = self.model.eval() + for param in self.parameters(): + param.requires_grad = False + + def forward(self, text): + tokens = open_clip.tokenize(text) + z = self.encode_with_transformer(tokens.to(self.device)) + return z + + def encode_with_transformer(self, text): + x = self.model.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.model.positional_embedding + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.model.ln_final(x) + return x + + def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): + for i, r in enumerate(self.model.transformer.resblocks): + if i == len(self.model.transformer.resblocks) - self.layer_idx: + break + if self.model.transformer.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(r, x, attn_mask) + else: + x = r(x, attn_mask=attn_mask) + return x + + def encode(self, text): + return self(text) + + +class FrozenMegatronCLIPEmbedder(AbstractEncoder): + def __init__(self, restore_from_path, device="cuda", layer="last", freeze=True, cfg=None, use_fp16=False): + super().__init__() + if restore_from_path is not None: + cfg, state_dict = self.load_config_and_state_from_nemo(restore_from_path) + elif cfg is not None: + state_dict = None + else: + raise ValueError("Either restore_from_path or cfg should not be None") + + self.cfg = cfg + self.build_tokenizer(cfg) + self.load_model(cfg, state_dict) + + self.device = device + if freeze: + self.freeze() + self.layer = layer + if self.layer == "last": + self.layer_idx = 0 + elif self.layer == "penultimate": + self.layer_idx = 1 + else: + raise NotImplementedError() + + def freeze(self): + self.model = self.model.eval() + for param in self.parameters(): + param.requires_grad = False + + def load_config_and_state_from_nemo(self, nemo_path): + if torch.cuda.is_available(): + map_location = torch.device('cuda') + else: + map_location = torch.device('cpu') + save_restore_connector = NLPSaveRestoreConnector() + cwd = os.getcwd() + + with tempfile.TemporaryDirectory() as tmpdir: + try: + save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) + + # Change current working directory to + os.chdir(tmpdir) + config_yaml = os.path.join(tmpdir, save_restore_connector.model_config_yaml) + cfg = OmegaConf.load(config_yaml) + + model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) + state_dict = save_restore_connector._load_state_dict_from_disk( + model_weights, map_location=map_location + ) + finally: + os.chdir(cwd) + + return cfg, state_dict + + def build_tokenizer(self, cfg): + legacy = cfg.tokenizer.sentencepiece_legacy + self.tokenizer = get_nmt_tokenizer( + library=cfg.tokenizer.library, + model_name=cfg.tokenizer.type, + tokenizer_model=cfg.tokenizer.model, + vocab_file=cfg.tokenizer.vocab_file, + merges_file=cfg.tokenizer.merge_file, + delimiter=cfg.tokenizer.get('delimiter', None), + legacy=legacy, + ) + + _, self.text_transform = get_preprocess_fns(cfg, self.tokenizer, is_train=False,) + self.max_length = cfg.text.get("max_position_embeddings") + + def load_model(self, cfg, state_dict): + padded_vocab_size = self._vocab_size_with_padding( + orig_vocab_size=self.tokenizer.vocab_size, + make_vocab_size_divisible_by=cfg.get('make_vocab_size_divisible_by', 128), + tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), + ) + model = CLIPModel( + model_cfg=cfg, + model_parallel_config=ModelParallelConfig(), + padded_vocab_size=padded_vocab_size, + pre_process=cfg.text.pre_process, + post_process=cfg.text.post_process, + ) + + if state_dict is not None: + clip_state_dict = {} + for key, value in state_dict.items(): + key = key[6:] + clip_state_dict[key] = value + model.load_state_dict(clip_state_dict) + + del model.vision_encoder + self.model = model.text_encoder + + def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by, tensor_model_parallel_size): + after = orig_vocab_size + multiple = make_vocab_size_divisible_by * tensor_model_parallel_size + while (after % multiple) != 0: + after += 1 + return after + + def forward(self, text): + texts = self.text_transform(text) + z = self.encode_with_transformer(texts.to(self.device)) + # # Pad the seq length to multiple of 8 + seq_len = (z.shape[1] + 8 - 1) // 8 * 8 + z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0) + return z + + def encode_with_transformer(self, text): + x = self.model.language_model.embedding.word_embeddings(text) + x += self.model.language_model.embedding.position_embeddings + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask) + x = self.model.language_model.encoder.final_layernorm(x) + x = x.permute(1, 0, 2) # LND -> NLD + return x + + def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): + for i, r in enumerate(self.model.language_model.encoder.layers): + if i == len(self.model.language_model.encoder.layers) - self.layer_idx: + break + x = r(x, attn_mask) + return x + + def encode(self, text): + return self(text) + + +if __name__ == "__main__": + from ldm.util import count_params + + model = FrozenCLIPEmbedder() + count_params(model, verbose=True) diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py new file mode 100644 index 000000000000..ff47596f9932 --- /dev/null +++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/x_transformer.py @@ -0,0 +1,629 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""shout-out to https://github.com/lucidrains/x-transformers/tree/main/x_transformers""" +from collections import namedtuple +from functools import partial +from inspect import isfunction + +import torch +import torch.nn.functional as F +from einops import rearrange, reduce, repeat +from torch import einsum, nn + +# constants + +DEFAULT_DIM_HEAD = 64 + +Intermediates = namedtuple('Intermediates', ['pre_softmax_attn', 'post_softmax_attn']) + +LayerIntermediates = namedtuple('Intermediates', ['hiddens', 'attn_intermediates']) + + +class AbsolutePositionalEmbedding(nn.Module): + def __init__(self, dim, max_seq_len): + super().__init__() + self.emb = nn.Embedding(max_seq_len, dim) + self.init_() + + def init_(self): + nn.init.normal_(self.emb.weight, std=0.02) + + def forward(self, x): + n = torch.arange(x.shape[1], device=x.device) + return self.emb(n)[None, :, :] + + +class FixedPositionalEmbedding(nn.Module): + def __init__(self, dim): + super().__init__() + inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) + self.register_buffer('inv_freq', inv_freq) + + def forward(self, x, seq_dim=1, offset=0): + t = torch.arange(x.shape[seq_dim], device=x.device).type_as(self.inv_freq) + offset + sinusoid_inp = torch.einsum('i , j -> i j', t, self.inv_freq) + emb = torch.cat((sinusoid_inp.sin(), sinusoid_inp.cos()), dim=-1) + return emb[None, :, :] + + +# helpers + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def always(val): + def inner(*args, **kwargs): + return val + + return inner + + +def not_equals(val): + def inner(x): + return x != val + + return inner + + +def equals(val): + def inner(x): + return x == val + + return inner + + +def max_neg_value(tensor): + return -torch.finfo(tensor.dtype).max + + +# keyword argument helpers + + +def pick_and_pop(keys, d): + values = list(map(lambda key: d.pop(key), keys)) + return dict(zip(keys, values)) + + +def group_dict_by_key(cond, d): + return_val = [dict(), dict()] + for key in d.keys(): + match = bool(cond(key)) + ind = int(not match) + return_val[ind][key] = d[key] + return (*return_val,) + + +def string_begins_with(prefix, str): + return str.startswith(prefix) + + +def group_by_key_prefix(prefix, d): + return group_dict_by_key(partial(string_begins_with, prefix), d) + + +def groupby_prefix_and_trim(prefix, d): + kwargs_with_prefix, kwargs = group_dict_by_key(partial(string_begins_with, prefix), d) + kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix) :], x[1]), tuple(kwargs_with_prefix.items()))) + return kwargs_without_prefix, kwargs + + +# classes +class Scale(nn.Module): + def __init__(self, value, fn): + super().__init__() + self.value = value + self.fn = fn + + def forward(self, x, **kwargs): + x, *rest = self.fn(x, **kwargs) + return (x * self.value, *rest) + + +class Rezero(nn.Module): + def __init__(self, fn): + super().__init__() + self.fn = fn + self.g = nn.Parameter(torch.zeros(1)) + + def forward(self, x, **kwargs): + x, *rest = self.fn(x, **kwargs) + return (x * self.g, *rest) + + +class ScaleNorm(nn.Module): + def __init__(self, dim, eps=1e-5): + super().__init__() + self.scale = dim ** -0.5 + self.eps = eps + self.g = nn.Parameter(torch.ones(1)) + + def forward(self, x): + norm = torch.norm(x, dim=-1, keepdim=True) * self.scale + return x / norm.clamp(min=self.eps) * self.g + + +class RMSNorm(nn.Module): + def __init__(self, dim, eps=1e-8): + super().__init__() + self.scale = dim ** -0.5 + self.eps = eps + self.g = nn.Parameter(torch.ones(dim)) + + def forward(self, x): + norm = torch.norm(x, dim=-1, keepdim=True) * self.scale + return x / norm.clamp(min=self.eps) * self.g + + +class Residual(nn.Module): + def forward(self, x, residual): + return x + residual + + +class GRUGating(nn.Module): + def __init__(self, dim): + super().__init__() + self.gru = nn.GRUCell(dim, dim) + + def forward(self, x, residual): + gated_output = self.gru(rearrange(x, 'b n d -> (b n) d'), rearrange(residual, 'b n d -> (b n) d')) + + return gated_output.reshape_as(x) + + +# feedforward + + +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +# attention. +class Attention(nn.Module): + def __init__( + self, + dim, + dim_head=DEFAULT_DIM_HEAD, + heads=8, + causal=False, + mask=None, + talking_heads=False, + sparse_topk=None, + use_entmax15=False, + num_mem_kv=0, + dropout=0.0, + on_attn=False, + ): + super().__init__() + if use_entmax15: + raise NotImplementedError("Check out entmax activation instead of softmax activation!") + self.scale = dim_head ** -0.5 + self.heads = heads + self.causal = causal + self.mask = mask + + inner_dim = dim_head * heads + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_k = nn.Linear(dim, inner_dim, bias=False) + self.to_v = nn.Linear(dim, inner_dim, bias=False) + self.dropout = nn.Dropout(dropout) + + # talking heads + self.talking_heads = talking_heads + if talking_heads: + self.pre_softmax_proj = nn.Parameter(torch.randn(heads, heads)) + self.post_softmax_proj = nn.Parameter(torch.randn(heads, heads)) + + # explicit topk sparse attention + self.sparse_topk = sparse_topk + + # entmax + # self.attn_fn = entmax15 if use_entmax15 else F.softmax + self.attn_fn = F.softmax + + # add memory key / values + self.num_mem_kv = num_mem_kv + if num_mem_kv > 0: + self.mem_k = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) + self.mem_v = nn.Parameter(torch.randn(heads, num_mem_kv, dim_head)) + + # attention on attention + self.attn_on_attn = on_attn + self.to_out = nn.Sequential(nn.Linear(inner_dim, dim * 2), nn.GLU()) if on_attn else nn.Linear(inner_dim, dim) + + def forward( + self, + x, + context=None, + mask=None, + context_mask=None, + rel_pos=None, + sinusoidal_emb=None, + prev_attn=None, + mem=None, + ): + b, n, _, h, talking_heads, device = *x.shape, self.heads, self.talking_heads, x.device + kv_input = default(context, x) + + q_input = x + k_input = kv_input + v_input = kv_input + + if exists(mem): + k_input = torch.cat((mem, k_input), dim=-2) + v_input = torch.cat((mem, v_input), dim=-2) + + if exists(sinusoidal_emb): + # in shortformer, the query would start at a position offset depending on the past cached memory + offset = k_input.shape[-2] - q_input.shape[-2] + q_input = q_input + sinusoidal_emb(q_input, offset=offset) + k_input = k_input + sinusoidal_emb(k_input) + + q = self.to_q(q_input) + k = self.to_k(k_input) + v = self.to_v(v_input) + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), (q, k, v)) + + input_mask = None + if any(map(exists, (mask, context_mask))): + q_mask = default(mask, lambda: torch.ones((b, n), device=device).bool()) + k_mask = q_mask if not exists(context) else context_mask + k_mask = default(k_mask, lambda: torch.ones((b, k.shape[-2]), device=device).bool()) + q_mask = rearrange(q_mask, 'b i -> b () i ()') + k_mask = rearrange(k_mask, 'b j -> b () () j') + input_mask = q_mask * k_mask + + if self.num_mem_kv > 0: + mem_k, mem_v = map(lambda t: repeat(t, 'h n d -> b h n d', b=b), (self.mem_k, self.mem_v)) + k = torch.cat((mem_k, k), dim=-2) + v = torch.cat((mem_v, v), dim=-2) + if exists(input_mask): + input_mask = F.pad(input_mask, (self.num_mem_kv, 0), value=True) + + dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale + mask_value = max_neg_value(dots) + + if exists(prev_attn): + dots = dots + prev_attn + + pre_softmax_attn = dots + + if talking_heads: + dots = einsum('b h i j, h k -> b k i j', dots, self.pre_softmax_proj).contiguous() + + if exists(rel_pos): + dots = rel_pos(dots) + + if exists(input_mask): + dots.masked_fill_(~input_mask, mask_value) + del input_mask + + if self.causal: + i, j = dots.shape[-2:] + r = torch.arange(i, device=device) + mask = rearrange(r, 'i -> () () i ()') < rearrange(r, 'j -> () () () j') + mask = F.pad(mask, (j - i, 0), value=False) + dots.masked_fill_(mask, mask_value) + del mask + + if exists(self.sparse_topk) and self.sparse_topk < dots.shape[-1]: + top, _ = dots.topk(self.sparse_topk, dim=-1) + vk = top[..., -1].unsqueeze(-1).expand_as(dots) + mask = dots < vk + dots.masked_fill_(mask, mask_value) + del mask + + attn = self.attn_fn(dots, dim=-1) + post_softmax_attn = attn + + attn = self.dropout(attn) + + if talking_heads: + attn = einsum('b h i j, h k -> b k i j', attn, self.post_softmax_proj).contiguous() + + out = einsum('b h i j, b h j d -> b h i d', attn, v) + out = rearrange(out, 'b h n d -> b n (h d)') + + intermediates = Intermediates(pre_softmax_attn=pre_softmax_attn, post_softmax_attn=post_softmax_attn) + + return self.to_out(out), intermediates + + +class AttentionLayers(nn.Module): + def __init__( + self, + dim, + depth, + heads=8, + causal=False, + cross_attend=False, + only_cross=False, + use_scalenorm=False, + use_rmsnorm=False, + use_rezero=False, + rel_pos_num_buckets=32, + rel_pos_max_distance=128, + position_infused_attn=False, + custom_layers=None, + sandwich_coef=None, + par_ratio=None, + residual_attn=False, + cross_residual_attn=False, + macaron=False, + pre_norm=True, + gate_residual=False, + **kwargs, + ): + super().__init__() + ff_kwargs, kwargs = groupby_prefix_and_trim('ff_', kwargs) + attn_kwargs, _ = groupby_prefix_and_trim('attn_', kwargs) + + dim_head = attn_kwargs.get('dim_head', DEFAULT_DIM_HEAD) + + self.dim = dim + self.depth = depth + self.layers = nn.ModuleList([]) + + self.has_pos_emb = position_infused_attn + self.pia_pos_emb = FixedPositionalEmbedding(dim) if position_infused_attn else None + self.rotary_pos_emb = always(None) + + assert ( + rel_pos_num_buckets <= rel_pos_max_distance + ), 'number of relative position buckets must be less than the relative position max distance' + self.rel_pos = None + + self.pre_norm = pre_norm + + self.residual_attn = residual_attn + self.cross_residual_attn = cross_residual_attn + + norm_class = ScaleNorm if use_scalenorm else nn.LayerNorm + norm_class = RMSNorm if use_rmsnorm else norm_class + norm_fn = partial(norm_class, dim) + + norm_fn = nn.Identity if use_rezero else norm_fn + branch_fn = Rezero if use_rezero else None + + if cross_attend and not only_cross: + default_block = ('a', 'c', 'f') + elif cross_attend and only_cross: + default_block = ('c', 'f') + else: + default_block = ('a', 'f') + + if macaron: + default_block = ('f',) + default_block + + if exists(custom_layers): + layer_types = custom_layers + elif exists(par_ratio): + par_depth = depth * len(default_block) + assert 1 < par_ratio <= par_depth, 'par ratio out of range' + default_block = tuple(filter(not_equals('f'), default_block)) + par_attn = par_depth // par_ratio + depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper + par_width = (depth_cut + depth_cut // par_attn) // par_attn + assert len(default_block) <= par_width, 'default block is too large for par_ratio' + par_block = default_block + ('f',) * (par_width - len(default_block)) + par_head = par_block * par_attn + layer_types = par_head + ('f',) * (par_depth - len(par_head)) + elif exists(sandwich_coef): + assert sandwich_coef > 0 and sandwich_coef <= depth, 'sandwich coefficient should be less than the depth' + layer_types = ('a',) * sandwich_coef + default_block * (depth - sandwich_coef) + ('f',) * sandwich_coef + else: + layer_types = default_block * depth + + self.layer_types = layer_types + self.num_attn_layers = len(list(filter(equals('a'), layer_types))) + + for layer_type in self.layer_types: + if layer_type == 'a': + layer = Attention(dim, heads=heads, causal=causal, **attn_kwargs) + elif layer_type == 'c': + layer = Attention(dim, heads=heads, **attn_kwargs) + elif layer_type == 'f': + layer = FeedForward(dim, **ff_kwargs) + layer = layer if not macaron else Scale(0.5, layer) + else: + raise Exception(f'invalid layer type {layer_type}') + + if isinstance(layer, Attention) and exists(branch_fn): + layer = branch_fn(layer) + + if gate_residual: + residual_fn = GRUGating(dim) + else: + residual_fn = Residual() + + self.layers.append(nn.ModuleList([norm_fn(), layer, residual_fn])) + + def forward(self, x, context=None, mask=None, context_mask=None, mems=None, return_hiddens=False): + hiddens = [] + intermediates = [] + prev_attn = None + prev_cross_attn = None + + mems = mems.copy() if exists(mems) else [None] * self.num_attn_layers + + for ind, (layer_type, (norm, block, residual_fn)) in enumerate(zip(self.layer_types, self.layers)): + is_last = ind == (len(self.layers) - 1) + + if layer_type == 'a': + hiddens.append(x) + layer_mem = mems.pop(0) + + residual = x + + if self.pre_norm: + x = norm(x) + + if layer_type == 'a': + out, inter = block( + x, + mask=mask, + sinusoidal_emb=self.pia_pos_emb, + rel_pos=self.rel_pos, + prev_attn=prev_attn, + mem=layer_mem, + ) + elif layer_type == 'c': + out, inter = block(x, context=context, mask=mask, context_mask=context_mask, prev_attn=prev_cross_attn) + elif layer_type == 'f': + out = block(x) + + x = residual_fn(out, residual) + + if layer_type in ('a', 'c'): + intermediates.append(inter) + + if layer_type == 'a' and self.residual_attn: + prev_attn = inter.pre_softmax_attn + elif layer_type == 'c' and self.cross_residual_attn: + prev_cross_attn = inter.pre_softmax_attn + + if not self.pre_norm and not is_last: + x = norm(x) + + if return_hiddens: + intermediates = LayerIntermediates(hiddens=hiddens, attn_intermediates=intermediates) + + return x, intermediates + + return x + + +class Encoder(AttentionLayers): + def __init__(self, **kwargs): + assert 'causal' not in kwargs, 'cannot set causality on encoder' + super().__init__(causal=False, **kwargs) + + +class TransformerWrapper(nn.Module): + def __init__( + self, + *, + num_tokens, + max_seq_len, + attn_layers, + emb_dim=None, + max_mem_len=0.0, + emb_dropout=0.0, + num_memory_tokens=None, + tie_embedding=False, + use_pos_emb=True, + ): + super().__init__() + assert isinstance(attn_layers, AttentionLayers), 'attention layers must be one of Encoder or Decoder' + + dim = attn_layers.dim + emb_dim = default(emb_dim, dim) + + self.max_seq_len = max_seq_len + self.max_mem_len = max_mem_len + self.num_tokens = num_tokens + + self.token_emb = nn.Embedding(num_tokens, emb_dim) + self.pos_emb = ( + AbsolutePositionalEmbedding(emb_dim, max_seq_len) + if (use_pos_emb and not attn_layers.has_pos_emb) + else always(0) + ) + self.emb_dropout = nn.Dropout(emb_dropout) + + self.project_emb = nn.Linear(emb_dim, dim) if emb_dim != dim else nn.Identity() + self.attn_layers = attn_layers + self.norm = nn.LayerNorm(dim) + + self.init_() + + self.to_logits = nn.Linear(dim, num_tokens) if not tie_embedding else lambda t: t @ self.token_emb.weight.t() + + # memory tokens (like [cls]) from Memory Transformers paper + num_memory_tokens = default(num_memory_tokens, 0) + self.num_memory_tokens = num_memory_tokens + if num_memory_tokens > 0: + self.memory_tokens = nn.Parameter(torch.randn(num_memory_tokens, dim)) + + # let funnel encoder know number of memory tokens, if specified + if hasattr(attn_layers, 'num_memory_tokens'): + attn_layers.num_memory_tokens = num_memory_tokens + + def init_(self): + nn.init.normal_(self.token_emb.weight, std=0.02) + + def forward( + self, x, return_embeddings=False, mask=None, return_mems=False, return_attn=False, mems=None, **kwargs + ): + b, n, device, num_mem = *x.shape, x.device, self.num_memory_tokens + x = self.token_emb(x) + x += self.pos_emb(x) + x = self.emb_dropout(x) + + x = self.project_emb(x) + + if num_mem > 0: + mem = repeat(self.memory_tokens, 'n d -> b n d', b=b) + x = torch.cat((mem, x), dim=1) + + # auto-handle masking after appending memory tokens + if exists(mask): + mask = F.pad(mask, (num_mem, 0), value=True) + + x, intermediates = self.attn_layers(x, mask=mask, mems=mems, return_hiddens=True, **kwargs) + x = self.norm(x) + + mem, x = x[:, :num_mem], x[:, num_mem:] + + out = self.to_logits(x) if not return_embeddings else x + + if return_mems: + hiddens = intermediates.hiddens + new_mems = list(map(lambda pair: torch.cat(pair, dim=-2), zip(mems, hiddens))) if exists(mems) else hiddens + new_mems = list(map(lambda t: t[..., -self.max_mem_len :, :].detach(), new_mems)) + return out, new_mems + + if return_attn: + attn_maps = list(map(lambda t: t.post_softmax_attn, intermediates.attn_intermediates)) + return out, attn_maps + + return out diff --git a/nemo/collections/multimodal/parts/__init__.py b/nemo/collections/multimodal/parts/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/imagen/__init__.py b/nemo/collections/multimodal/parts/imagen/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/imagen/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/imagen/utils.py b/nemo/collections/multimodal/parts/imagen/utils.py new file mode 100644 index 000000000000..565b1ed6a2b4 --- /dev/null +++ b/nemo/collections/multimodal/parts/imagen/utils.py @@ -0,0 +1,29 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + + +def random_dropout(embeddings, drop_rate): + r""" + Function to perform random dropout for embeddings. + When we drop embeddings, we zero them out. + Args: + embeddings (tensor): Input embeddings + drop_rate (float): Rate of dropping the embedding. + """ + nsamples = embeddings.shape[0] + zero_flag = torch.ones(nsamples, 1, 1).to(embeddings.dtype) * (1 - drop_rate) + zero_flag = torch.bernoulli(zero_flag).cuda() + embeddings = embeddings * zero_flag + return embeddings diff --git a/nemo/collections/multimodal/parts/stable_diffusion/__init__.py b/nemo/collections/multimodal/parts/stable_diffusion/__init__.py new file mode 100644 index 000000000000..4fc50543f1d2 --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py new file mode 100644 index 000000000000..620d1dcad41a --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/lr_scheduler.py @@ -0,0 +1,112 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + + +class LambdaWarmUpCosineScheduler: + """ + note: use with a base_lr of 1.0 + """ + + def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0): + self.lr_warm_up_steps = warm_up_steps + self.lr_start = lr_start + self.lr_min = lr_min + self.lr_max = lr_max + self.lr_max_decay_steps = max_decay_steps + self.last_lr = 0.0 + self.verbosity_interval = verbosity_interval + + def schedule(self, n, **kwargs): + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_lr}") + if n < self.lr_warm_up_steps: + lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start + self.last_lr = lr + return lr + else: + t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps) + t = min(t, 1.0) + lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (1 + np.cos(t * np.pi)) + self.last_lr = lr + return lr + + def __call__(self, n, **kwargs): + return self.schedule(n, **kwargs) + + +class LambdaWarmUpCosineScheduler2: + """ + supports repeated iterations, configurable via lists + note: use with a base_lr of 1.0. + """ + + def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0): + assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths) + self.lr_warm_up_steps = warm_up_steps + self.f_start = f_start + self.f_min = f_min + self.f_max = f_max + self.cycle_lengths = cycle_lengths + self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths)) + self.last_f = 0.0 + self.verbosity_interval = verbosity_interval + + def find_in_interval(self, n): + interval = 0 + for cl in self.cum_cycles[1:]: + if n <= cl: + return interval + interval += 1 + + def schedule(self, n, **kwargs): + cycle = self.find_in_interval(n) + n = n - self.cum_cycles[cycle] + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") + if n < self.lr_warm_up_steps[cycle]: + f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] + self.last_f = f + return f + else: + t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]) + t = min(t, 1.0) + f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (1 + np.cos(t * np.pi)) + self.last_f = f + return f + + def __call__(self, n, **kwargs): + return self.schedule(n, **kwargs) + + +class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): + def schedule(self, n, **kwargs): + cycle = self.find_in_interval(n) + n = n - self.cum_cycles[cycle] + if self.verbosity_interval > 0: + if n % self.verbosity_interval == 0: + print(f"current step: {n}, recent lr-multiplier: {self.last_f}, " f"current cycle {cycle}") + + if n < self.lr_warm_up_steps[cycle]: + f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle] + self.last_f = f + return f + else: + f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / ( + self.cycle_lengths[cycle] + ) + self.last_f = f + return f diff --git a/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py new file mode 100644 index 000000000000..cdfd3c37300e --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/pipeline.py @@ -0,0 +1,202 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pickle +import time + +import torch +from PIL import Image + +from nemo.collections.multimodal.models.stable_diffusion.samplers.ddim import DDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.para_ddim import ParaDDIMSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.plms import PLMSSampler +from nemo.collections.multimodal.models.stable_diffusion.samplers.sampler_dpm import DPMSolverSampler +from nemo.collections.multimodal.parts.stable_diffusion.utils import DataParallelWrapper + + +def encode_prompt(cond_stage_model, prompt, unconditional_guidance_scale, batch_size): + c = cond_stage_model.encode(batch_size * [prompt]) + if unconditional_guidance_scale != 1.0: + uc = cond_stage_model.encode(batch_size * [""]) + else: + uc = None + return c, uc + + +def initialize_sampler(model, sampler_type): + if sampler_type == 'DDIM': + sampler = DDIMSampler(model) + elif sampler_type == 'PLMS': + sampler = PLMSSampler(model) + elif sampler_type == 'DPM': + sampler = DPMSolverSampler(model) + elif sampler_type == 'PARA_DDIM': + sampler = ParaDDIMSampler(model) + else: + raise ValueError(f'Sampler {sampler_type} is not supported.') + return sampler + + +def decode_images(model, samples): + images = model.decode_first_stage(samples) + + images = torch.clamp((images + 1.0) / 2.0, min=0.0, max=1.0) + + return images + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def torch_to_numpy(images): + numpy_images = [x.float().cpu().permute(0, 2, 3, 1).numpy() for x in images] + return numpy_images + + +def pipeline(model, cfg, verbose=True, rng=None): + # setup default values for inference configs + unconditional_guidance_scale = cfg.infer.get("unconditional_guidance_scale", 7.5) + batch_size = cfg.infer.get('num_images_per_prompt', 1) + prompts = cfg.infer.get('prompts', []) + height = cfg.infer.get('height', 512) + width = cfg.infer.get('width', 512) + downsampling_factor = cfg.infer.get('down_factor', 8) + sampler_type = cfg.infer.get('sampler_type', 'DDIM') + sampler_parallelism = cfg.infer.get('sampler_parallelism', 1) + sampler_tolerance = cfg.infer.get('sampler_tolerance', 0.1) + inference_steps = cfg.infer.get('inference_steps', 50) + output_type = cfg.infer.get('output_type', 'pil') + save_to_file = cfg.infer.get('save_to_file', True) + out_path = cfg.infer.get('out_path', '') + eta = cfg.infer.get('eta', 0) + num_devices = cfg.infer.get('devices', 1) + + if sampler_parallelism > 1: + if not sampler_type.startswith('PARA'): + raise ValueError('Parallel sampler is required when parallelism > 1') + if not num_devices > 1: + print("It is recommended to run parallel sampler with multiple GPUs") + + if num_devices > 1: + print(f"Running DataParallel model with {num_devices} GPUs.") + model.model.diffusion_model = DataParallelWrapper( + model.model.diffusion_model, device_ids=list(range(num_devices)) + ) + + # get autocast_dtype + if cfg.trainer.precision in ['bf16', 'bf16-mixed']: + autocast_dtype = torch.bfloat16 + elif cfg.trainer.precision in [32, '32', '32-true']: + autocast_dtype = torch.float + elif cfg.trainer.precision in [16, '16', '16-mixed']: + autocast_dtype = torch.half + else: + raise ValueError('precision must be in [32, 16, "bf16"]') + + with torch.no_grad(), torch.cuda.amp.autocast( + enabled=autocast_dtype in (torch.half, torch.bfloat16), dtype=autocast_dtype, + ): + + in_channels = model.model.diffusion_model.in_channels + + sampler = initialize_sampler(model, sampler_type.upper()) + + output = [] + throughput = [] + + if isinstance(prompts, str): + prompts = [prompts] + + for prompt in prompts: + tic = time.perf_counter() + tic_total = tic + cond, u_cond = encode_prompt(model.cond_stage_model, prompt, unconditional_guidance_scale, batch_size) + toc = time.perf_counter() + conditioning_time = toc - tic + + latent_shape = [in_channels, height // downsampling_factor, width // downsampling_factor] + latents = torch.randn( + [batch_size, in_channels, height // downsampling_factor, width // downsampling_factor], generator=rng + ).to(torch.cuda.current_device()) + + tic = time.perf_counter() + samples, intermediates = sampler.sample( + S=inference_steps, + conditioning=cond, + batch_size=batch_size, + shape=latent_shape, + verbose=False, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=u_cond, + eta=eta, + x_T=latents, + parallelism=sampler_parallelism, + tolerance=sampler_tolerance, + ) + toc = time.perf_counter() + sampling_time = toc - tic + + tic = time.perf_counter() + images = decode_images(model, samples) + toc = time.perf_counter() + decode_time = toc - tic + + toc_total = time.perf_counter() + total_time = toc_total - tic_total + output.append(images) + + throughput.append( + { + 'text-conditioning-time': conditioning_time, + 'sampling-time': sampling_time, + 'decode-time': decode_time, + 'total-time': total_time, + 'sampling-steps': inference_steps, + } + ) + + # Convert output type and save to disk + if output_type == 'torch': + output = torch.cat(output, dim=0) + else: + output = torch_to_numpy(output) + if output_type == 'pil': + output = [numpy_to_pil(x) for x in output] + + if save_to_file: + os.makedirs(out_path, exist_ok=True) + if output_type == 'pil': + for text_prompt, pils in zip(prompts, output): + for idx, image in enumerate(pils): + image.save(os.path.join(out_path, f'{text_prompt[:50]}_{idx}.png')) + else: + with open(os.path.join(out_path, 'output.pkl'), 'wb') as f: + pickle.dump(output, f) + else: + return output + + ave_metrics = {} + for key in throughput[0].keys(): + ave_metrics[f'avg-{key}'] = sum([dicts[key] for dicts in throughput]) / len(throughput) + if verbose: + print(ave_metrics) diff --git a/nemo/collections/multimodal/parts/stable_diffusion/utils.py b/nemo/collections/multimodal/parts/stable_diffusion/utils.py new file mode 100644 index 000000000000..7126283cb1e8 --- /dev/null +++ b/nemo/collections/multimodal/parts/stable_diffusion/utils.py @@ -0,0 +1,213 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import multiprocessing as mp +from collections import abc +from functools import partial +from inspect import isfunction +from queue import Queue +from threading import Thread + +import numpy as np +import torch +from einops import rearrange +from PIL import Image, ImageDraw, ImageFont + + +class DataParallelWrapper(torch.nn.DataParallel): + def __getattr__(self, name): + try: + return super().__getattr__(name) + except AttributeError: + return getattr(self.module, name) + + +def log_txt_as_img(wh, xc, size=10): + # wh a tuple of (width, height) + # xc a list of captions to plot + b = len(xc) + txts = list() + for bi in range(b): + txt = Image.new("RGB", wh, color="white") + draw = ImageDraw.Draw(txt) + nc = int(40 * (wh[0] / 256)) + lines = "\n".join(xc[bi][start : start + nc] for start in range(0, len(xc[bi]), nc)) + + try: + draw.text((0, 0), lines, fill="black") + except UnicodeEncodeError: + print("Cant encode string for logging. Skipping.") + + txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 + txts.append(txt) + txts = np.stack(txts) + txts = torch.tensor(txts) + return txts + + +def ismap(x): + if not isinstance(x, torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] > 3) + + +def isimage(x): + if not isinstance(x, torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1) + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def mean_flat(tensor): + """ + https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86 + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def count_params(model, verbose=False): + total_params = sum(p.numel() for p in model.parameters()) + if verbose: + print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.") + return total_params + + +def instantiate_from_config(config): + if not "target" in config: + if config == '__is_first_stage__': + return None + elif config == "__is_unconditional__": + return None + raise KeyError("Expected key `target` to instantiate.") + return get_obj_from_str(config["target"])(**config.get("params", dict())) + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + print(f'Getting module=<{module}>, cls=<{cls}>') + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + +def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False): + # create dummy dataset instance + + # run prefetching + if idx_to_fn: + res = func(data, worker_id=idx) + else: + res = func(data) + Q.put([idx, res]) + Q.put("Done") + + +def parallel_data_prefetch( + func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False +): + # if target_data_type not in ["ndarray", "list"]: + # raise ValueError( + # "Data, which is passed to parallel_data_prefetch has to be either of type list or ndarray." + # ) + if isinstance(data, np.ndarray) and target_data_type == "list": + raise ValueError("list expected but function got ndarray.") + elif isinstance(data, abc.Iterable): + if isinstance(data, dict): + print( + f'WARNING:"data" argument passed to parallel_data_prefetch is a dict: Using only its values and disregarding keys.' + ) + data = list(data.values()) + if target_data_type == "ndarray": + data = np.asarray(data) + else: + data = list(data) + else: + raise TypeError( + f"The data, that shall be processed parallel has to be either an np.ndarray or an Iterable, but is actually {type(data)}." + ) + + if cpu_intensive: + Q = mp.Queue(1000) + proc = mp.Process + else: + Q = Queue(1000) + proc = Thread + # spawn processes + if target_data_type == "ndarray": + arguments = [[func, Q, part, i, use_worker_id] for i, part in enumerate(np.array_split(data, n_proc))] + else: + step = int(len(data) / n_proc + 1) if len(data) % n_proc != 0 else int(len(data) / n_proc) + arguments = [ + [func, Q, part, i, use_worker_id] + for i, part in enumerate([data[i : i + step] for i in range(0, len(data), step)]) + ] + processes = [] + for i in range(n_proc): + p = proc(target=_do_parallel_data_prefetch, args=arguments[i]) + processes += [p] + + # start processes + print(f"Start prefetching...") + import time + + start = time.time() + gather_res = [[] for _ in range(n_proc)] + try: + for p in processes: + p.start() + + k = 0 + while k < n_proc: + # get result + res = Q.get() + if res == "Done": + k += 1 + else: + gather_res[res[0]] = res[1] + + except Exception as e: + print("Exception: ", e) + for p in processes: + p.terminate() + + raise e + finally: + for p in processes: + p.join() + print(f"Prefetching complete. [{time.time() - start} sec.]") + + if target_data_type == 'ndarray': + if not isinstance(gather_res[0], np.ndarray): + return np.concatenate([np.asarray(r) for r in gather_res], axis=0) + + # order outputs + return np.concatenate(gather_res, axis=0) + elif target_data_type == 'list': + out = [] + for r in gather_res: + out.extend(r) + return out + else: + return gather_res diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py new file mode 100644 index 000000000000..89a7e98cef00 --- /dev/null +++ b/nemo/collections/multimodal/parts/utils.py @@ -0,0 +1,267 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Any, Callable, Dict, Tuple + +import torch +from omegaconf import DictConfig, OmegaConf, open_dict +from PIL import Image +from pytorch_lightning import Trainer +from pytorch_lightning.plugins.environments import TorchElasticEnvironment + +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.utils import AppState, logging +from nemo.utils.distributed import initialize_distributed + +try: + from megatron.core import parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def randn_like(x, generator=None): + return torch.randn(x.shape, dtype=x.dtype, device=x.device, generator=generator) + + +def extend_instance(obj, mixin): + """Apply mixins to a class instance after creation""" + base_cls = obj.__class__ + base_cls_name = obj.__class__.__name__ + obj.__class__ = type( + base_cls_name, (mixin, base_cls), {} + ) # mixin needs to go first for our forward() logic to work + + +def getattr_recursive(obj, att): + """ + Return nested attribute of obj + Example: getattr_recursive(obj, 'a.b.c') is equivalent to obj.a.b.c + """ + if att == "": + return obj + i = att.find(".") + if i < 0: + return getattr(obj, att) + else: + return getattr_recursive(getattr(obj, att[:i]), att[i + 1 :]) + + +def setattr_recursive(obj, att, val): + """ + Set nested attribute of obj + Example: setattr_recursive(obj, 'a.b.c', val) is equivalent to obj.a.b.c = val + """ + if "." in att: + obj = getattr_recursive(obj, ".".join(att.split(".")[:-1])) + setattr(obj, att.split(".")[-1], val) + + +def apply_with_stopping_condition(module, apply_fn, apply_condition=None, stopping_condition=None, **other_args): + if stopping_condition(module): + return + if apply_condition(module): + apply_fn(module, **other_args) + for child in module.children(): + apply_with_stopping_condition( + child, apply_fn, apply_condition=apply_condition, stopping_condition=stopping_condition, **other_args + ) + + +def setup_trainer_and_models_for_inference( + model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, +): + """ + Set up a trainer and NeMo model for inference. + + Args: + model_provider (Any): An object that provides the NeMo model. + cfg (DictConfig): The configuration dictionary, containing the + necessary settings for the trainer and the models. + model_cfg_modifier (Callable): A function that modifies the model + configuration for inference. + + Returns: + Tuple[Trainer, Any]: A tuple containing the trainer and the model. + """ + + # Check if we need to use the TorchElasticEnvironment plugin for the trainer. + plugins = [] + if cfg.get('cluster_type', None) == 'BCP': + plugins.append(TorchElasticEnvironment()) + + # Use the NLPDDPStrategy for the distributed data parallel strategy. + # We don't use DDP for async grad allreduce and don't find unused parameters. + strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) + + # Set up the trainer with the specified plugins and strategy. + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + # Create the NLPSaveRestoreConnector object for model saving and restoring. + save_restore_connector = NLPSaveRestoreConnector() + + print(f'Loading {cfg.models} models') + models = [] + for single_model_cfg in cfg.models: + if not single_model_cfg.restore_from_path: + continue + if single_model_cfg.restore_from_path.endswith(".nemo"): + # Set the model_extracted_dir attribute if the restore path is a directory. + if os.path.isdir(single_model_cfg.restore_from_path): + save_restore_connector.model_extracted_dir = single_model_cfg.restore_from_path + + # Restore the model configuration from the specified path and modify it for inference. + model_cfg = model_provider.restore_from( + restore_path=single_model_cfg.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + with open_dict(model_cfg): + model_cfg_modifier(model_cfg) # modify the configuration for inference + + # Restore the model from the specified path and configuration, and set it up for inference. + model = model_provider.restore_from( + restore_path=single_model_cfg.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + models.append(model) + + elif single_model_cfg.restore_from_path.endswith(".ckpt"): + logging.warning( + "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" + ) + + model = model_provider.load_from_checkpoint( + single_model_cfg.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, + ) + models.append(model) + + else: + raise ValueError(f"Unrecognized checkpoint type: {single_model_cfg.restore_from_path}") + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + models = [model.cuda() for model in models] # move the model to the GPU + for model in models: + model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients + + # Return the trainer and model objects. + return trainer, models + + +def setup_trainer_and_model_for_inference( + model_provider: Any, cfg: DictConfig, model_cfg_modifier: Callable, +) -> Tuple[Trainer, Any]: + """ + Set up a trainer and NeMo model for inference. + + Args: + model_provider (Any): An object that provides the NeMo model. + cfg (DictConfig): The configuration dictionary, containing the + necessary settings for the trainer and the model. + model_cfg_modifier (Callable): A function that modifies the model + configuration for inference. + + Returns: + Tuple[Trainer, Any]: A tuple containing the trainer and the model. + """ + + # Check if we need to use the TorchElasticEnvironment plugin for the trainer. + plugins = [] + plugins.append(TorchElasticEnvironment()) + + # Use the NLPDDPStrategy for the distributed data parallel strategy. + # We don't use DDP for async grad allreduce and don't find unused parameters. + strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) + + # Set up the trainer with the specified plugins and strategy. + trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer) + + # Create the NLPSaveRestoreConnector object for model saving and restoring. + save_restore_connector = NLPSaveRestoreConnector() + + if cfg.model.restore_from_path.endswith(".nemo") or os.path.isdir(cfg.model.restore_from_path): + # Set the model_extracted_dir attribute if the restore path is a directory. + if os.path.isdir(cfg.model.restore_from_path): + save_restore_connector.model_extracted_dir = cfg.model.restore_from_path + + # Restore the model configuration from the specified path and modify it for inference. + model_cfg = model_provider.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + save_restore_connector=save_restore_connector, + return_config=True, + ) + with open_dict(model_cfg): + model_cfg_modifier(model_cfg) # modify the configuration for inference + + # Restore the model from the specified path and configuration, and set it up for inference. + model = model_provider.restore_from( + restore_path=cfg.model.restore_from_path, + trainer=trainer, + override_config_path=model_cfg, + save_restore_connector=save_restore_connector, + strict=True, + ) + + elif cfg.model.restore_from_path.endswith(".ckpt"): + logging.warning( + "Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!" + ) + + model = model_provider.load_from_checkpoint( + cfg.model.restore_from_path, hparams_file=cfg.model.get("hparams_file"), trainer=trainer, + ) + + else: + raise ValueError(f"Unrecognized checkpoint type: {cfg.model.restore_from_path}") + + # initialize apex DDP strategy + def dummy(): + return + + if trainer.strategy.launcher is not None: + trainer.strategy.launcher.launch(dummy, trainer=trainer) + trainer.strategy.setup_environment() + + model = model.cuda() # move the model to the GPU + model.eval().requires_grad_(False) # set the model to evaluation mode and disable gradients + + # Return the trainer and model objects. + return trainer, model diff --git a/nemo/collections/vision/__init__.py b/nemo/collections/vision/__init__.py new file mode 100644 index 000000000000..edf1849f216f --- /dev/null +++ b/nemo/collections/vision/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.collections.vision import data, losses, models, modules +from nemo.package_info import __version__ + +# Set collection version equal to NeMo version. +__version = __version__ + +# Authorship. +__author__ = "NVIDIA Corporation" + +# Set collection name. +__description__ = "Computer Vision collection" +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/__init__.py b/nemo/collections/vision/data/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/imagenet_classnames.py b/nemo/collections/vision/data/imagenet_classnames.py new file mode 100644 index 000000000000..4dcd3e595923 --- /dev/null +++ b/nemo/collections/vision/data/imagenet_classnames.py @@ -0,0 +1,1016 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +imagenet_classnames = [ + "tench", + "goldfish", + "great white shark", + "tiger shark", + "hammerhead shark", + "electric ray", + "stingray", + "rooster", + "hen", + "ostrich", + "brambling", + "goldfinch", + "house finch", + "junco", + "indigo bunting", + "American robin", + "bulbul", + "jay", + "magpie", + "chickadee", + "American dipper", + "kite (bird of prey)", + "bald eagle", + "vulture", + "great grey owl", + "fire salamander", + "smooth newt", + "newt", + "spotted salamander", + "axolotl", + "American bullfrog", + "tree frog", + "tailed frog", + "loggerhead sea turtle", + "leatherback sea turtle", + "mud turtle", + "terrapin", + "box turtle", + "banded gecko", + "green iguana", + "Carolina anole", + "desert grassland whiptail lizard", + "agama", + "frilled-necked lizard", + "alligator lizard", + "Gila monster", + "European green lizard", + "chameleon", + "Komodo dragon", + "Nile crocodile", + "American alligator", + "triceratops", + "worm snake", + "ring-necked snake", + "eastern hog-nosed snake", + "smooth green snake", + "kingsnake", + "garter snake", + "water snake", + "vine snake", + "night snake", + "boa constrictor", + "African rock python", + "Indian cobra", + "green mamba", + "sea snake", + "Saharan horned viper", + "eastern diamondback rattlesnake", + "sidewinder rattlesnake", + "trilobite", + "harvestman", + "scorpion", + "yellow garden spider", + "barn spider", + "European garden spider", + "southern black widow", + "tarantula", + "wolf spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse", + "prairie grouse", + "peafowl", + "quail", + "partridge", + "african grey parrot", + "macaw", + "sulphur-crested cockatoo", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "duck", + "red-breasted merganser", + "goose", + "black swan", + "tusker", + "echidna", + "platypus", + "wallaby", + "koala", + "wombat", + "jellyfish", + "sea anemone", + "brain coral", + "flatworm", + "nematode", + "conch", + "snail", + "slug", + "sea slug", + "chiton", + "chambered nautilus", + "Dungeness crab", + "rock crab", + "fiddler crab", + "red king crab", + "American lobster", + "spiny lobster", + "crayfish", + "hermit crab", + "isopod", + "white stork", + "black stork", + "spoonbill", + "flamingo", + "little blue heron", + "great egret", + "bittern bird", + "crane bird", + "limpkin", + "common gallinule", + "American coot", + "bustard", + "ruddy turnstone", + "dunlin", + "common redshank", + "dowitcher", + "oystercatcher", + "pelican", + "king penguin", + "albatross", + "grey whale", + "killer whale", + "dugong", + "sea lion", + "Chihuahua", + "Japanese Chin", + "Maltese", + "Pekingese", + "Shih Tzu", + "King Charles Spaniel", + "Papillon", + "toy terrier", + "Rhodesian Ridgeback", + "Afghan Hound", + "Basset Hound", + "Beagle", + "Bloodhound", + "Bluetick Coonhound", + "Black and Tan Coonhound", + "Treeing Walker Coonhound", + "English foxhound", + "Redbone Coonhound", + "borzoi", + "Irish Wolfhound", + "Italian Greyhound", + "Whippet", + "Ibizan Hound", + "Norwegian Elkhound", + "Otterhound", + "Saluki", + "Scottish Deerhound", + "Weimaraner", + "Staffordshire Bull Terrier", + "American Staffordshire Terrier", + "Bedlington Terrier", + "Border Terrier", + "Kerry Blue Terrier", + "Irish Terrier", + "Norfolk Terrier", + "Norwich Terrier", + "Yorkshire Terrier", + "Wire Fox Terrier", + "Lakeland Terrier", + "Sealyham Terrier", + "Airedale Terrier", + "Cairn Terrier", + "Australian Terrier", + "Dandie Dinmont Terrier", + "Boston Terrier", + "Miniature Schnauzer", + "Giant Schnauzer", + "Standard Schnauzer", + "Scottish Terrier", + "Tibetan Terrier", + "Australian Silky Terrier", + "Soft-coated Wheaten Terrier", + "West Highland White Terrier", + "Lhasa Apso", + "Flat-Coated Retriever", + "Curly-coated Retriever", + "Golden Retriever", + "Labrador Retriever", + "Chesapeake Bay Retriever", + "German Shorthaired Pointer", + "Vizsla", + "English Setter", + "Irish Setter", + "Gordon Setter", + "Brittany dog", + "Clumber Spaniel", + "English Springer Spaniel", + "Welsh Springer Spaniel", + "Cocker Spaniel", + "Sussex Spaniel", + "Irish Water Spaniel", + "Kuvasz", + "Schipperke", + "Groenendael dog", + "Malinois", + "Briard", + "Australian Kelpie", + "Komondor", + "Old English Sheepdog", + "Shetland Sheepdog", + "collie", + "Border Collie", + "Bouvier des Flandres dog", + "Rottweiler", + "German Shepherd Dog", + "Dobermann", + "Miniature Pinscher", + "Greater Swiss Mountain Dog", + "Bernese Mountain Dog", + "Appenzeller Sennenhund", + "Entlebucher Sennenhund", + "Boxer", + "Bullmastiff", + "Tibetan Mastiff", + "French Bulldog", + "Great Dane", + "St. Bernard", + "husky", + "Alaskan Malamute", + "Siberian Husky", + "Dalmatian", + "Affenpinscher", + "Basenji", + "pug", + "Leonberger", + "Newfoundland dog", + "Great Pyrenees dog", + "Samoyed", + "Pomeranian", + "Chow Chow", + "Keeshond", + "brussels griffon", + "Pembroke Welsh Corgi", + "Cardigan Welsh Corgi", + "Toy Poodle", + "Miniature Poodle", + "Standard Poodle", + "Mexican hairless dog (xoloitzcuintli)", + "grey wolf", + "Alaskan tundra wolf", + "red wolf or maned wolf", + "coyote", + "dingo", + "dhole", + "African wild dog", + "hyena", + "red fox", + "kit fox", + "Arctic fox", + "grey fox", + "tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat", + "Egyptian Mau", + "cougar", + "lynx", + "leopard", + "snow leopard", + "jaguar", + "lion", + "tiger", + "cheetah", + "brown bear", + "American black bear", + "polar bear", + "sloth bear", + "mongoose", + "meerkat", + "tiger beetle", + "ladybug", + "ground beetle", + "longhorn beetle", + "leaf beetle", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant", + "grasshopper", + "cricket insect", + "stick insect", + "cockroach", + "praying mantis", + "cicada", + "leafhopper", + "lacewing", + "dragonfly", + "damselfly", + "red admiral butterfly", + "ringlet butterfly", + "monarch butterfly", + "small white butterfly", + "sulphur butterfly", + "gossamer-winged butterfly", + "starfish", + "sea urchin", + "sea cucumber", + "cottontail rabbit", + "hare", + "Angora rabbit", + "hamster", + "porcupine", + "fox squirrel", + "marmot", + "beaver", + "guinea pig", + "common sorrel horse", + "zebra", + "pig", + "wild boar", + "warthog", + "hippopotamus", + "ox", + "water buffalo", + "bison", + "ram (adult male sheep)", + "bighorn sheep", + "Alpine ibex", + "hartebeest", + "impala (antelope)", + "gazelle", + "arabian camel", + "llama", + "weasel", + "mink", + "European polecat", + "black-footed ferret", + "otter", + "skunk", + "badger", + "armadillo", + "three-toed sloth", + "orangutan", + "gorilla", + "chimpanzee", + "gibbon", + "siamang", + "guenon", + "patas monkey", + "baboon", + "macaque", + "langur", + "black-and-white colobus", + "proboscis monkey", + "marmoset", + "white-headed capuchin", + "howler monkey", + "titi monkey", + "Geoffroy's spider monkey", + "common squirrel monkey", + "ring-tailed lemur", + "indri", + "Asian elephant", + "African bush elephant", + "red panda", + "giant panda", + "snoek fish", + "eel", + "silver salmon", + "rock beauty fish", + "clownfish", + "sturgeon", + "gar fish", + "lionfish", + "pufferfish", + "abacus", + "abaya", + "academic gown", + "accordion", + "acoustic guitar", + "aircraft carrier", + "airliner", + "airship", + "altar", + "ambulance", + "amphibious vehicle", + "analog clock", + "apiary", + "apron", + "trash can", + "assault rifle", + "backpack", + "bakery", + "balance beam", + "balloon", + "ballpoint pen", + "Band-Aid", + "banjo", + "baluster / handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel", + "wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "swimming cap", + "bath towel", + "bathtub", + "station wagon", + "lighthouse", + "beaker", + "military hat (bearskin or shako)", + "beer bottle", + "beer glass", + "bell tower", + "baby bib", + "tandem bicycle", + "bikini", + "ring binder", + "binoculars", + "birdhouse", + "boathouse", + "bobsleigh", + "bolo tie", + "poke bonnet", + "bookcase", + "bookstore", + "bottle cap", + "hunting bow", + "bow tie", + "brass memorial plaque", + "bra", + "breakwater", + "breastplate", + "broom", + "bucket", + "buckle", + "bulletproof vest", + "high-speed train", + "butcher shop", + "taxicab", + "cauldron", + "candle", + "cannon", + "canoe", + "can opener", + "cardigan", + "car mirror", + "carousel", + "tool kit", + "cardboard box / carton", + "car wheel", + "automated teller machine", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello", + "mobile phone", + "chain", + "chain-link fence", + "chain mail", + "chainsaw", + "storage chest", + "chiffonier", + "bell or wind chime", + "china cabinet", + "Christmas stocking", + "church", + "movie theater", + "cleaver", + "cliff dwelling", + "cloak", + "clogs", + "cocktail shaker", + "coffee mug", + "coffeemaker", + "spiral or coil", + "combination lock", + "computer keyboard", + "candy store", + "container ship", + "convertible", + "corkscrew", + "cornet", + "cowboy boot", + "cowboy hat", + "cradle", + "construction crane", + "crash helmet", + "crate", + "infant bed", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam", + "desk", + "desktop computer", + "rotary dial telephone", + "diaper", + "digital clock", + "digital watch", + "dining table", + "dishcloth", + "dishwasher", + "disc brake", + "dock", + "dog sled", + "dome", + "doormat", + "drilling rig", + "drum", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso machine", + "face powder", + "feather boa", + "filing cabinet", + "fireboat", + "fire truck", + "fire screen", + "flagpole", + "flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster bed", + "freight car", + "French horn", + "frying pan", + "fur coat", + "garbage truck", + "gas mask or respirator", + "gas pump", + "goblet", + "go-kart", + "golf ball", + "golf cart", + "gondola", + "gong", + "gown", + "grand piano", + "greenhouse", + "radiator grille", + "grocery store", + "guillotine", + "hair clip", + "hair spray", + "half-track", + "hammer", + "hamper", + "hair dryer", + "hand-held computer", + "handkerchief", + "hard disk drive", + "harmonica", + "harp", + "combine harvester", + "hatchet", + "holster", + "home theater", + "honeycomb", + "hook", + "hoop skirt", + "gymnastic horizontal bar", + "horse-drawn vehicle", + "hourglass", + "iPod", + "clothes iron", + "carved pumpkin", + "jeans", + "jeep", + "T-shirt", + "jigsaw puzzle", + "rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat", + "ladle", + "lampshade", + "laptop computer", + "lawn mower", + "lens cap", + "letter opener", + "library", + "lifeboat", + "lighter", + "limousine", + "ocean liner", + "lipstick", + "slip-on shoe", + "lotion", + "music speaker", + "loupe magnifying glass", + "sawmill", + "magnetic compass", + "messenger bag", + "mailbox", + "tights", + "one-piece bathing suit", + "manhole cover", + "maraca", + "marimba", + "mask", + "matchstick", + "maypole", + "maze", + "measuring cup", + "medicine cabinet", + "megalith", + "microphone", + "microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home", + "ford model t", + "modem", + "monastery", + "monitor", + "moped", + "mortar and pestle", + "graduation cap", + "mosque", + "mosquito net", + "vespa", + "mountain bike", + "tent", + "computer mouse", + "mousetrap", + "moving van", + "muzzle", + "metal nail", + "neck brace", + "necklace", + "baby pacifier", + "notebook computer", + "obelisk", + "oboe", + "ocarina", + "odometer", + "oil filter", + "pipe organ", + "oscilloscope", + "overskirt", + "bullock cart", + "oxygen mask", + "product packet / packaging", + "paddle", + "paddle wheel", + "padlock", + "paintbrush", + "pajamas", + "palace", + "pan flute", + "paper towel", + "parachute", + "parallel bars", + "park bench", + "parking meter", + "railroad car", + "patio", + "payphone", + "pedestal", + "pencil case", + "pencil sharpener", + "perfume", + "Petri dish", + "photocopier", + "plectrum", + "Pickelhaube", + "picket fence", + "pickup truck", + "pier", + "piggy bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate ship", + "drink pitcher", + "block plane", + "planetarium", + "plastic bag", + "plate rack", + "farm plow", + "plunger", + "Polaroid camera", + "pole", + "police van", + "poncho", + "pool table", + "soda bottle", + "plant pot", + "potter's wheel", + "power drill", + "prayer rug", + "printer", + "prison", + "missile", + "projector", + "hockey puck", + "punching bag", + "purse", + "quill", + "quilt", + "race car", + "racket", + "radiator", + "radio", + "radio telescope", + "rain barrel", + "recreational vehicle", + "fishing casting reel", + "reflex camera", + "refrigerator", + "remote control", + "restaurant", + "revolver", + "rifle", + "rocking chair", + "rotisserie", + "eraser", + "rugby ball", + "ruler measuring stick", + "sneaker", + "safe", + "safety pin", + "salt shaker", + "sandal", + "sarong", + "saxophone", + "scabbard", + "weighing scale", + "school bus", + "schooner", + "scoreboard", + "CRT monitor", + "screw", + "screwdriver", + "seat belt", + "sewing machine", + "shield", + "shoe store", + "shoji screen / room divider", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "balaclava ski mask", + "sleeping bag", + "slide rule", + "sliding door", + "slot machine", + "snorkel", + "snowmobile", + "snowplow", + "soap dispenser", + "soccer ball", + "sock", + "solar thermal collector", + "sombrero", + "soup bowl", + "keyboard space bar", + "space heater", + "space shuttle", + "spatula", + "motorboat", + "spider web", + "spindle", + "sports car", + "spotlight", + "stage", + "steam locomotive", + "through arch bridge", + "steel drum", + "stethoscope", + "scarf", + "stone wall", + "stopwatch", + "stove", + "strainer", + "tram", + "stretcher", + "couch", + "stupa", + "submarine", + "suit", + "sundial", + "sunglasses", + "sunglasses", + "sunscreen", + "suspension bridge", + "mop", + "sweatshirt", + "swim trunks / shorts", + "swing", + "electrical switch", + "syringe", + "table lamp", + "tank", + "tape player", + "teapot", + "teddy bear", + "television", + "tennis ball", + "thatched roof", + "front curtain", + "thimble", + "threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop", + "toilet seat", + "torch", + "totem pole", + "tow truck", + "toy store", + "tractor", + "semi-trailer truck", + "tray", + "trench coat", + "tricycle", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus", + "trombone", + "hot tub", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle", + "upright piano", + "vacuum cleaner", + "vase", + "vaulted or arched ceiling", + "velvet fabric", + "vending machine", + "vestment", + "viaduct", + "violin", + "volleyball", + "waffle iron", + "wall clock", + "wallet", + "wardrobe", + "military aircraft", + "sink", + "washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "hair wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "airplane wing", + "wok", + "wooden spoon", + "wool", + "split-rail fence", + "shipwreck", + "sailboat", + "yurt", + "website", + "comic book", + "crossword", + "traffic or street sign", + "traffic light", + "dust jacket", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot", + "trifle", + "ice cream", + "popsicle", + "baguette", + "bagel", + "pretzel", + "cheeseburger", + "hot dog", + "mashed potatoes", + "cabbage", + "broccoli", + "cauliflower", + "zucchini", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber", + "artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith apple", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple", + "banana", + "jackfruit", + "cherimoya (custard apple)", + "pomegranate", + "hay", + "carbonara", + "chocolate syrup", + "dough", + "meatloaf", + "pizza", + "pot pie", + "burrito", + "red wine", + "espresso", + "tea cup", + "eggnog", + "mountain", + "bubble", + "cliff", + "coral reef", + "geyser", + "lakeshore", + "promontory", + "sandbar", + "beach", + "valley", + "volcano", + "baseball player", + "bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper", + "corn", + "acorn", + "rose hip", + "horse chestnut seed", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn mushroom", + "earth star fungus", + "hen of the woods mushroom", + "bolete", + "corn cob", + "toilet paper", +] diff --git a/nemo/collections/vision/data/megatron/__init__.py b/nemo/collections/vision/data/megatron/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/data/megatron/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/data/megatron/autoaugment.py b/nemo/collections/vision/data/megatron/autoaugment.py new file mode 100644 index 000000000000..b55f395ed430 --- /dev/null +++ b/nemo/collections/vision/data/megatron/autoaugment.py @@ -0,0 +1,270 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Code adapted from https://github.com/DeepVoltaire/AutoAugment. + +This module implements the fixed AutoAugment data augmentation policy for ImageNet provided in +Appendix A, Table 9 of reference [1]. It does not include any of the search code for augmentation +policies. + +Reference: +[1] https://arxiv.org/abs/1805.09501 +""" + +import random + +import numpy as np +from PIL import Image, ImageEnhance, ImageOps + +_MAX_LEVEL = 10 # Maximum integer strength of an augmentation, if applicable. + + +class ImageNetPolicy: + """Definition of an ImageNetPolicy. + + Implements a fixed AutoAugment data augmentation policy targeted at + ImageNet training by randomly applying at runtime one of the 25 pre-defined + data augmentation sub-policies provided in Reference [1]. + + Usage example as a Pytorch Transform: + >>> transform=transforms.Compose([transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + """Initialize an ImageNetPolicy. + + Args: + fillcolor (tuple): RGB color components of the color to be used for + filling when needed (default: (128, 128, 128), which + corresponds to gray). + """ + # Instantiate a list of sub-policies. + # Each entry of the list is a SubPolicy which consists of + # two augmentation operations, + # each of those parametrized as operation, probability, magnitude. + # Those two operations are applied sequentially on the image upon call. + self.policies = [ + SubPolicy("posterize", 0.4, 8, "rotate", 0.6, 9, fillcolor), + SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), + SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), + SubPolicy("posterize", 0.6, 7, "posterize", 0.6, 6, fillcolor), + SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), + SubPolicy("equalize", 0.4, 4, "rotate", 0.8, 8, fillcolor), + SubPolicy("solarize", 0.6, 3, "equalize", 0.6, 7, fillcolor), + SubPolicy("posterize", 0.8, 5, "equalize", 1.0, 2, fillcolor), + SubPolicy("rotate", 0.2, 3, "solarize", 0.6, 8, fillcolor), + SubPolicy("equalize", 0.6, 8, "posterize", 0.4, 6, fillcolor), + SubPolicy("rotate", 0.8, 8, "color", 0.4, 0, fillcolor), + SubPolicy("rotate", 0.4, 9, "equalize", 0.6, 2, fillcolor), + SubPolicy("equalize", 0.0, 7, "equalize", 0.8, 8, fillcolor), + SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), + SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), + SubPolicy("rotate", 0.8, 8, "color", 1.0, 2, fillcolor), + SubPolicy("color", 0.8, 8, "solarize", 0.8, 7, fillcolor), + SubPolicy("sharpness", 0.4, 7, "invert", 0.6, 8, fillcolor), + SubPolicy("shearX", 0.6, 5, "equalize", 1.0, 9, fillcolor), + SubPolicy("color", 0.4, 0, "equalize", 0.6, 3, fillcolor), + SubPolicy("equalize", 0.4, 7, "solarize", 0.2, 4, fillcolor), + SubPolicy("solarize", 0.6, 5, "autocontrast", 0.6, 5, fillcolor), + SubPolicy("invert", 0.6, 4, "equalize", 1.0, 8, fillcolor), + SubPolicy("color", 0.6, 4, "contrast", 1.0, 8, fillcolor), + SubPolicy("equalize", 0.8, 8, "equalize", 0.6, 3, fillcolor), + ] + + def __call__(self, img): + """Define call method for ImageNetPolicy class.""" + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + """Define repr method for ImageNetPolicy class.""" + return "ImageNetPolicy" + + +class SubPolicy: + """Definition of a SubPolicy. + + A SubPolicy consists of two augmentation operations, + each of those parametrized as operation, probability, magnitude. + The two operations are applied sequentially on the image upon call. + """ + + def __init__( + self, operation1, probability1, magnitude_idx1, operation2, probability2, magnitude_idx2, fillcolor, + ): + """Initialize a SubPolicy. + + Args: + operation1 (str): Key specifying the first augmentation operation. + There are fourteen key values altogether (see supported_ops below + listing supported operations). probability1 (float): Probability + within [0., 1.] of applying the first augmentation operation. + magnitude_idx1 (int): Integer specifiying the strength of the first + operation as an index further used to derive the magnitude from a + range of possible values. + operation2 (str): Key specifying the second augmentation operation. + probability2 (float): Probability within [0., 1.] of applying the + second augmentation operation. + magnitude_idx2 (int): Integer specifiying the strength of the + second operation as an index further used to derive the magnitude + from a range of possible values. + fillcolor (tuple): RGB color components of the color to be used for + filling. + Returns: + """ + # List of supported operations for operation1 and operation2. + supported_ops = [ + "shearX", + "shearY", + "translateX", + "translateY", + "rotate", + "color", + "posterize", + "solarize", + "contrast", + "sharpness", + "brightness", + "autocontrast", + "equalize", + "invert", + ] + assert (operation1 in supported_ops) and ( + operation2 in supported_ops + ), "SubPolicy:one of oper1 or oper2 refers to an unsupported operation." + + assert ( + 0.0 <= probability1 <= 1.0 and 0.0 <= probability2 <= 1.0 + ), "SubPolicy: prob1 and prob2 should be within [0., 1.]." + + assert ( + isinstance(magnitude_idx1, int) and 0 <= magnitude_idx1 <= 10 + ), "SubPolicy: idx1 should be specified as an integer within [0, 10]." + + assert ( + isinstance(magnitude_idx2, int) and 0 <= magnitude_idx2 <= 10 + ), "SubPolicy: idx2 should be specified as an integer within [0, 10]." + + # Define a dictionary where each key refers to a specific type of + # augmentation and the corresponding value is a range of ten possible + # magnitude values for that augmentation. + num_levels = _MAX_LEVEL + 1 + ranges = { + "shearX": np.linspace(0, 0.3, num_levels), + "shearY": np.linspace(0, 0.3, num_levels), + "translateX": np.linspace(0, 150 / 331, num_levels), + "translateY": np.linspace(0, 150 / 331, num_levels), + "rotate": np.linspace(0, 30, num_levels), + "color": np.linspace(0.0, 0.9, num_levels), + "posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int), + "solarize": np.linspace(256, 0, num_levels), # range [0, 256] + "contrast": np.linspace(0.0, 0.9, num_levels), + "sharpness": np.linspace(0.0, 0.9, num_levels), + "brightness": np.linspace(0.0, 0.9, num_levels), + "autocontrast": [0] * num_levels, # This augmentation doesn't use magnitude parameter. + "equalize": [0] * num_levels, # This augmentation doesn't use magnitude parameter. + "invert": [0] * num_levels, # This augmentation doesn't use magnitude parameter. + } + + def rotate_with_fill(img, magnitude): + """Define rotation transformation with fill. + + The input image is first rotated, then it is blended together with + a gray mask of the same size. Note that fillcolor as defined + elsewhere in this module doesn't apply here. + + Args: + magnitude (float): rotation angle in degrees. + Returns: + rotated_filled (PIL Image): rotated image with gray filling for + disoccluded areas unveiled by the rotation. + """ + rotated = img.convert("RGBA").rotate(magnitude) + rotated_filled = Image.composite(rotated, Image.new("RGBA", rotated.size, (128,) * 4), rotated) + return rotated_filled.convert(img.mode) + + # Define a dictionary of augmentation functions where each key refers + # to a specific type of augmentation and the corresponding value defines + # the augmentation itself using a lambda function. + # pylint: disable=unnecessary-lambda + func_dict = { + "shearX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "shearY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "translateX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0,), + fillcolor=fillcolor, + ), + "translateY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1]),), + fillcolor=fillcolor, + ), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( + 1 + magnitude * random.choice([-1, 1]) + ), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1]) + ), + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1]) + ), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img), + } + + # Store probability, function and magnitude of the first augmentation + # for the sub-policy. + self.probability1 = probability1 + self.operation1 = func_dict[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + + # Store probability, function and magnitude of the second augmentation + # for the sub-policy. + self.probability2 = probability2 + self.operation2 = func_dict[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + """Define call method for SubPolicy class.""" + # Randomly apply operation 1. + if random.random() < self.probability1: + img = self.operation1(img, self.magnitude1) + + # Randomly apply operation 2. + if random.random() < self.probability2: + img = self.operation2(img, self.magnitude2) + + return img diff --git a/nemo/collections/vision/data/megatron/data_samplers.py b/nemo/collections/vision/data/megatron/data_samplers.py new file mode 100644 index 000000000000..44cd8fb14149 --- /dev/null +++ b/nemo/collections/vision/data/megatron/data_samplers.py @@ -0,0 +1,89 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Dict, List, Optional + +import torch +from torch.utils.data import Dataset + +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingRandomSampler +from nemo.collections.vision.data.megatron.vit_dataset import RandomSeedDataset + + +class MegatronVisionPretrainingRandomSampler(MegatronPretrainingRandomSampler): + def __init__( + self, + dataset: Dataset, + total_samples: int, + consumed_samples: int, + micro_batch_size: int, + data_parallel_rank: int, + data_parallel_size: int, + data_sharding: bool, + drop_last: bool = True, + global_batch_size: Optional[int] = None, + pad_samples_to_global_batch_size: Optional[bool] = False, + ) -> None: + super().__init__( + total_samples=total_samples, + consumed_samples=consumed_samples, + micro_batch_size=micro_batch_size, + data_parallel_rank=data_parallel_rank, + data_parallel_size=data_parallel_size, + drop_last=drop_last, + global_batch_size=global_batch_size, + pad_samples_to_global_batch_size=pad_samples_to_global_batch_size, + ) + self.dataset = dataset + self.data_sharding = data_sharding + + def __iter__(self): + active_total_samples = self.total_samples - self.last_batch_size + self.epoch = self.consumed_samples // active_total_samples + current_epoch_samples = self.consumed_samples % active_total_samples + assert current_epoch_samples % self.micro_batch_times_data_parallel_size == 0 + + if isinstance(self.dataset, RandomSeedDataset): + self.dataset.set_epoch(self.epoch) + + # data sharding and random sampling + if self.data_sharding: + bucket_size = (self.total_samples // self.micro_batch_times_data_parallel_size) * self.micro_batch_size + bucket_offset = current_epoch_samples // self.data_parallel_size + start_idx = self.data_parallel_rank * bucket_size + + g = torch.Generator() + g.manual_seed(self.epoch) + random_idx = torch.randperm(bucket_size, generator=g).tolist() + idx_range = [start_idx + x for x in random_idx[bucket_offset:]] + else: + full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size + full_bucket_offset = current_epoch_samples + g = torch.Generator() + g.manual_seed(self.epoch) + idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist() + idx_range_active = idx_range_total[full_bucket_offset:] + idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size] + + batch = [] + # Last batch if not complete will be dropped. + for idx in idx_range: + batch.append(idx) + if len(batch) == self.micro_batch_size: + self.consumed_samples += self.micro_batch_times_data_parallel_size + yield batch + batch = [] + + # Check the last partial batch and see drop_last is set + if len(batch) > 0 and not self.drop_last: + yield batch diff --git a/nemo/collections/vision/data/megatron/image_folder.py b/nemo/collections/vision/data/megatron/image_folder.py new file mode 100644 index 000000000000..44138dec3320 --- /dev/null +++ b/nemo/collections/vision/data/megatron/image_folder.py @@ -0,0 +1,286 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# code taken from +# https://github.com/pytorch/vision/blob/main/torchvision/datasets/folder.py +# added support for classes_fraction and data_per_class_fraction + +import os +import os.path +from typing import Any, Callable, Dict, List, Optional, Tuple, cast + +import numpy as np +from PIL import Image +from torchvision.datasets import VisionDataset + + +def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool: + """Checks if a file is an allowed extension. + Args: + filename (string): path to a file + extensions (tuple of strings): extensions to consider (lowercase) + Returns: + bool: True if the filename ends with one of given extensions + """ + return filename.lower().endswith(extensions) + + +def is_image_file(filename: str) -> bool: + """Checks if a file is an allowed image extension. + Args: + filename (string): path to a file + Returns: + bool: True if the filename ends with a known image extension + """ + return has_file_allowed_extension(filename, IMG_EXTENSIONS) + + +def make_dataset( + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, +) -> List[Tuple[str, int]]: + """Generates a list of samples of a form (path_to_sample, class). + Args: + directory (str): root dataset directory + class_to_idx (Dict[str, int]): dictionary mapping class name to class index + extensions (optional): A list of allowed extensions. + Either extensions or is_valid_file should be passed. Defaults to None. + is_valid_file (optional): A function that takes path of a file + and checks if the file is a valid file + (used to check of corrupt files) both extensions and + is_valid_file should not be passed. Defaults to None. + Raises: + ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None. + Returns: + List[Tuple[str, int]]: samples of a form (path_to_sample, class) + """ + instances = [] + directory = os.path.expanduser(directory) + both_none = extensions is None and is_valid_file is None + both_something = extensions is not None and is_valid_file is not None + if both_none or both_something: + raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time") + if extensions is not None: + + def is_valid_file(x: str) -> bool: + return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions)) + + is_valid_file = cast(Callable[[str], bool], is_valid_file) + for target_class in sorted(class_to_idx.keys()): + class_index = class_to_idx[target_class] + target_dir = os.path.join(directory, target_class) + if not os.path.isdir(target_dir): + continue + local_instances = [] + for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)): + for fname in sorted(fnames): + path = os.path.join(root, fname) + if is_valid_file(path): + item = path, class_index + local_instances.append(item) + + instances.extend(local_instances[0 : int(len(local_instances) * data_per_class_fraction)]) + + return instances + + +class DatasetFolder(VisionDataset): + """A generic data loader where the samples are arranged in this way: :: + root/class_x/xxx.ext + root/class_x/xxy.ext + root/class_x/[...]/xxz.ext + root/class_y/123.ext + root/class_y/nsdf3.ext + root/class_y/[...]/asd932_.ext + Args: + root (string): Root directory path. + loader (callable): A function to load a sample given its path. + extensions (tuple[string]): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + E.g, ``transforms.RandomCrop`` for images. + target_transform (callable, optional): A function/transform that takes + in the target and transforms it. + is_valid_file (callable, optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + Attributes: + classes (list): List of the class names sorted alphabetically. + class_to_idx (dict): Dict with items (class_name, class_index). + samples (list): List of (sample path, class_index) tuples + targets (list): The class_index value for each image in the dataset + """ + + def __init__( + self, + root: str, + loader: Callable[[str], Any], + extensions: Optional[Tuple[str, ...]] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + is_valid_file: Optional[Callable[[str], bool]] = None, + ) -> None: + super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform) + self.classes_fraction = classes_fraction + self.data_per_class_fraction = data_per_class_fraction + classes, class_to_idx = self._find_classes(self.root) + samples = self.make_dataset(self.root, class_to_idx, self.data_per_class_fraction, extensions, is_valid_file) + if len(samples) == 0: + msg = "Found 0 files in subfolders of: {}\n".format(self.root) + if extensions is not None: + msg += "Supported extensions are: {}".format(",".join(extensions)) + raise RuntimeError(msg) + + self.loader = loader + self.extensions = extensions + self.total = len(samples) + self.classes = classes + self.class_to_idx = class_to_idx + self.samples = samples + self.targets = [s[1] for s in samples] + + @staticmethod + def make_dataset( + directory: str, + class_to_idx: Dict[str, int], + data_per_class_fraction: float, + extensions: Optional[Tuple[str, ...]] = None, + is_valid_file: Optional[Callable[[str], bool]] = None, + ) -> List[Tuple[str, int]]: + return make_dataset( + directory, class_to_idx, data_per_class_fraction, extensions=extensions, is_valid_file=is_valid_file + ) + + def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]: + """ + Finds the class folders in a dataset. + Args: + dir (string): Root directory path. + Returns: + tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary. + Ensures: + No class is a subdirectory of another. + """ + all_classes = [d.name for d in os.scandir(dir) if d.is_dir()] + classes = all_classes[0 : int(len(all_classes) * self.classes_fraction)] + classes.sort() + class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} + return classes, class_to_idx + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Args: + index (int): Index + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + curr_index = index + for x in range(self.total): + try: + path, target = self.samples[curr_index] + sample = self.loader(path) + break + except Exception as e: + curr_index = np.random.randint(0, self.total) + + if self.transform is not None: + sample = self.transform(sample) + if self.target_transform is not None: + target = self.target_transform(target) + + return sample, target + + def __len__(self) -> int: + return len(self.samples) + + +IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') + + +def pil_loader(path: str) -> Image.Image: + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +# TODO: specify the return type +def accimage_loader(path: str) -> Any: + import accimage + + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path: str) -> Any: + from torchvision import get_image_backend + + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class ImageFolder(DatasetFolder): + """A generic data loader where the images are arranged in this way: :: + root/dog/xxx.png + root/dog/xxy.png + root/dog/[...]/xxz.png + root/cat/123.png + root/cat/nsdf3.png + root/cat/[...]/asd932_.png + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + is_valid_file (callable, optional): A function that takes path of an Image file + and check if the file is a valid file (used to check of corrupt files) + Attributes: + classes (list): List of the class names sorted alphabetically. + class_to_idx (dict): Dict with items (class_name, class_index). + imgs (list): List of (image path, class_index) tuples + """ + + def __init__( + self, + root: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + classes_fraction=1.0, + data_per_class_fraction=1.0, + loader: Callable[[str], Any] = default_loader, + is_valid_file: Optional[Callable[[str], bool]] = None, + ): + super(ImageFolder, self).__init__( + root, + loader, + IMG_EXTENSIONS if is_valid_file is None else None, + transform=transform, + target_transform=target_transform, + classes_fraction=classes_fraction, + data_per_class_fraction=data_per_class_fraction, + is_valid_file=is_valid_file, + ) + self.imgs = self.samples diff --git a/nemo/collections/vision/data/megatron/vit_dataset.py b/nemo/collections/vision/data/megatron/vit_dataset.py new file mode 100644 index 000000000000..5ba711dd0b28 --- /dev/null +++ b/nemo/collections/vision/data/megatron/vit_dataset.py @@ -0,0 +1,284 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random + +import numpy as np +import torch +import torchvision.transforms as T +from PIL import Image, ImageFilter, ImageOps +from torch.utils.data import Dataset + +from nemo.collections.multimodal.data.common.data_samplers import SharedEpoch +from nemo.collections.vision.data.megatron.autoaugment import ImageNetPolicy +from nemo.collections.vision.data.megatron.image_folder import ImageFolder + + +def _to_torch_data_type(precision): + if precision in ['bf16', 'bf16-mixed']: + return torch.bfloat16 + elif precision in [16, '16', '16-mixed']: + return torch.float16 + elif precision in [32, '32', '32-true']: + return torch.float32 + else: + raise ValueError(f"Cannot recognize precision {precision}") + + +class RandomSeedDataset(Dataset): + def __init__(self, dataset, seed=1234): + self.base_seed = seed + self.dataset = dataset + self.epoch = SharedEpoch() + + def __len__(self): + return len(self.dataset) + + def set_epoch(self, epoch): + self.epoch.set_value(epoch) + + def __getitem__(self, idx): + seed = idx + self.base_seed + self.epoch.get_value() * 32768 + torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + return self.dataset[idx] + + +class GaussianBlur(object): + """ + Apply Gaussian Blur to the PIL image. + """ + + def __init__(self, p=0.5, radius_min=0.1, radius_max=2.0): + self.prob = p + self.radius_min = radius_min + self.radius_max = radius_max + + def __call__(self, img): + do_it = random.random() <= self.prob + if not do_it: + return img + + return img.filter(ImageFilter.GaussianBlur(radius=random.uniform(self.radius_min, self.radius_max))) + + +class Solarization(object): + """ + Apply Solarization to the PIL image. + """ + + def __init__(self, p): + self.p = p + + def __call__(self, img): + if random.random() < self.p: + return ImageOps.solarize(img) + else: + return img + + +class ClassificationTransform: + def __init__(self, model_cfg, image_size, train=True): + self.data_type = _to_torch_data_type(model_cfg.precision) + if train: + self.transform = T.Compose( + [ + T.RandomResizedCrop(image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) + else: + self.transform = T.Compose( + [ + T.Resize(image_size), + T.CenterCrop(image_size), + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) + + def __call__(self, input): + output = self.transform(input) + return output + + +class InpaintingTransform: + def __init__(self, model_cfg, image_size, train=True): + self.mask_factor = model_cfg.mask_factor + self.mask_type = model_cfg.mask_type + self.image_size = image_size + self.patch_size = model_cfg.patch_dim + self.mask_size = int(self.mask_factor * (image_size[0] / self.patch_size) * (image_size[1] / self.patch_size)) + self.train = train + self.data_type = _to_torch_data_type(model_cfg.precision) + + if self.train: + self.transform = T.Compose( + [ + T.RandomResizedCrop(self.image_size), + T.RandomHorizontalFlip(), + T.ColorJitter(0.4, 0.4, 0.4, 0.1), + ImageNetPolicy(), + T.ToTensor(), + T.ConvertImageDtype(self.data_type), + ] + ) + else: + self.transform = T.Compose( + [ + T.Resize(self.image_size, interpolation=2), + T.CenterCrop(self.image_size), + T.ToTensor(), + T.ConvertImageDtype(self.data_type), + ] + ) + + def gen_mask(self, image_size, mask_size, mask_type, patch_size): + # output: mask as a list with indices for missing patches + action_list = [[0, 1], [0, -1], [1, 0], [-1, 0]] + assert image_size[0] == image_size[1] + img_size_patch = image_size[0] // patch_size + + # drop masked patches + mask = torch.zeros((image_size[0], image_size[1]), dtype=torch.float) + + if mask_type == 'random': + x = torch.randint(0, img_size_patch, ()) + y = torch.randint(0, img_size_patch, ()) + for i in range(mask_size): + r = torch.randint(0, len(action_list), ()) + x = torch.clamp(x + action_list[r][0], min=0, max=img_size_patch - 1) + y = torch.clamp(y + action_list[r][1], min=0, max=img_size_patch - 1) + x_offset = x * patch_size + y_offset = y * patch_size + mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 + else: + assert mask_type == 'row' + count = 0 + for x in reversed(range(img_size_patch)): + for y in reversed(range(img_size_patch)): + if count < mask_size: + count += 1 + x_offset = x * patch_size + y_offset = y * patch_size + mask[x_offset : x_offset + patch_size, y_offset : y_offset + patch_size] = 1 + return mask + + def __call__(self, input): + trans_input = self.transform(input) + mask = self.gen_mask(self.image_size, self.mask_size, self.mask_type, self.patch_size) + mask = mask.unsqueeze(dim=0) + return trans_input, mask + + +class DinoTransform(object): + def __init__(self, model_cfg, image_size, train=True): + self.data_type = _to_torch_data_type(model_cfg.precision) + flip_and_color_jitter = T.Compose( + [ + T.RandomHorizontalFlip(p=0.5), + T.RandomApply([T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], p=0.8), + T.RandomGrayscale(p=0.2), + ] + ) + + if model_cfg.precision in [16, "bf16"]: + normalize = T.Compose( + [ + T.ToTensor(), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + T.ConvertImageDtype(self.data_type), + ] + ) + else: + normalize = T.Compose([T.ToTensor(), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),]) + + # first global crop + scale_const = 0.4 + self.global_transform1 = T.Compose( + [ + T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(1.0), + normalize, + ] + ) + # second global crop + self.global_transform2 = T.Compose( + [ + T.RandomResizedCrop(image_size, scale=(scale_const, 1), interpolation=Image.BICUBIC), + flip_and_color_jitter, + GaussianBlur(0.1), + Solarization(0.2), + normalize, + ] + ) + # transformation for the local small crops + self.local_crops_number = model_cfg.dino_local_crops_number + self.local_transform = T.Compose( + [ + T.RandomResizedCrop( + model_cfg.dino_local_img_size, scale=(0.05, scale_const), interpolation=Image.BICUBIC + ), + flip_and_color_jitter, + GaussianBlur(p=0.5), + normalize, + ] + ) + + def __call__(self, image): + crops = [] + crops.append(self.global_transform1(image)) + crops.append(self.global_transform2(image)) + for _ in range(self.local_crops_number): + crops.append(self.local_transform(image)) + return crops + + +def build_train_valid_datasets(model_cfg, data_path, image_size=224): + if model_cfg.vision_pretraining_type == 'classify': + train_transform = ClassificationTransform(model_cfg, image_size) + val_transform = ClassificationTransform(model_cfg, image_size, train=False) + elif model_cfg.vision_pretraining_type == 'inpaint': + train_transform = InpaintingTransform(model_cfg, image_size, train=False) + val_transform = InpaintingTransform(model_cfg, image_size, train=False) + elif model_cfg.vision_pretraining_type == 'dino': + train_transform = DinoTransform(model_cfg, image_size, train=True) + val_transform = ClassificationTransform(model_cfg, image_size, train=False) + else: + raise Exception('{} vit pretraining type is not supported.'.format(model_cfg.vit_pretraining_type)) + + # training dataset + train_data_path = data_path[0] if len(data_path) <= 2 else data_path[2] + train_data = ImageFolder( + root=train_data_path, + transform=train_transform, + classes_fraction=model_cfg.classes_fraction, + data_per_class_fraction=model_cfg.data_per_class_fraction, + ) + train_data = RandomSeedDataset(train_data) + + # validation dataset + val_data_path = data_path[1] + val_data = ImageFolder(root=val_data_path, transform=val_transform) + val_data = RandomSeedDataset(val_data) + + return train_data, val_data diff --git a/nemo/collections/vision/losses/__init__.py b/nemo/collections/vision/losses/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/losses/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/metrics/__init__.py b/nemo/collections/vision/metrics/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/metrics/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/models/__init__.py b/nemo/collections/vision/models/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/models/megatron_vit_classification_models.py b/nemo/collections/vision/models/megatron_vit_classification_models.py new file mode 100644 index 000000000000..854e1d6b5a83 --- /dev/null +++ b/nemo/collections/vision/models/megatron_vit_classification_models.py @@ -0,0 +1,801 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +from functools import partial +from typing import Any, Dict, List, Optional + +import numpy as np +import torch +from omegaconf.dictconfig import DictConfig +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import MegatronPretrainingSampler +from nemo.collections.nlp.models.language_modeling.megatron_base_model import MegatronBaseModel +from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION +from nemo.collections.nlp.modules.common.megatron.build_model import build_model +from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + average_losses_across_data_parallel_group, + get_all_params_for_weight_decay_optimization, + get_linear_layer, + get_params_for_weight_decay_optimization, + init_method_normal, + scaled_init_method_normal, +) +from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.vision.data.megatron.data_samplers import MegatronVisionPretrainingRandomSampler +from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead +from nemo.core.classes.common import PretrainedModelInfo +from nemo.core.neural_types import ChannelType, NeuralType +from nemo.utils import logging + +try: + import apex.transformer.pipeline_parallel.utils + from apex.transformer.pipeline_parallel.utils import get_num_microbatches + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + +try: + from megatron.core import parallel_state + from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + + +class VitClassificationModel(MegatronModule): + """Vision Transformer Model.""" + + def __init__( + self, model_cfg, model_parallel_config, num_classes, finetune=False, pre_process=True, post_process=True + ): + super(VitClassificationModel, self).__init__() + + scaled_init_method = ( + scaled_init_method_normal(model_cfg.init_method_std, model_cfg.num_layers) + if model_cfg.use_scaled_init_method + else init_method_normal(model_cfg.init_method_std) + ) + + self.config = model_parallel_config + self.hidden_size = model_cfg.hidden_size + self.num_classes = num_classes + self.finetune = finetune + self.pre_process = pre_process + self.post_process = post_process + self.backbone = VitBackbone( + model_cfg, + model_parallel_config, + init_method=init_method_normal(model_cfg.init_method_std), + scaled_init_method=scaled_init_method, + pre_process=self.pre_process, + post_process=self.post_process, + single_token_output=True, + ) + + if self.post_process: + if not self.finetune: + self.head = VitMlpHead(self.hidden_size, self.num_classes) + else: + self.head = get_linear_layer(self.hidden_size, self.num_classes, torch.nn.init.zeros_) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.backbone.set_input_tensor(input_tensor) + + def forward(self, input): + hidden_states = self.backbone(input) + + if self.post_process: + hidden_states = self.head(hidden_states) + hidden_states = hidden_states.contiguous() + return hidden_states + + +class MegatronVitClassificationModel(MegatronBaseModel): + """Megatron Vision Transformer Model.""" + + def __init__(self, cfg: DictConfig, trainer: Trainer): + if not HAVE_APEX: + raise ImportError( + "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + if not HAVE_MEGATRON_CORE: + raise ImportError( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." + ) + + super().__init__(cfg, trainer=trainer) + + self._validate_trainer() + + # TODO(yuya): clean up all default values + self.megatron_amp_O2 = cfg.get('megatron_amp_O2', False) + + if not self.megatron_amp_O2 and self.cfg.get('virtual_pipeline_model_parallel_size', None): + raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2') + + # build_model returns a list of modules which are used for interleaved pipeline parallelism + if isinstance(self.trainer.accelerator, CPUAccelerator): + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + on_cpu=True, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + else: + self.model = build_model( + model_provider_func=self.model_provider_func, + wrap_with_ddp=False, + virtual_pipeline_model_parallel_size=self.cfg.get('virtual_pipeline_model_parallel_size', None), + ) + + # if we're not using interleaved, then self.model is a module. + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is None: + self.model = self.model[0] + + if self.megatron_amp_O2: + + if not self.with_distributed_adam: + # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type + if isinstance(self.model, list): + for module in self.model: + module.cuda(torch.cuda.current_device()) + else: + self.model.cuda(torch.cuda.current_device()) + + # Model wrapper to convert both model and inputs to half precision + if isinstance(self.model, list): + converted_model = [] + for module in self.model: + converted_model.append( + Float16Module(config=self.model_parallel_config, module=module, precision=cfg.precision) + ) + self.model = converted_model + else: + self.model = Float16Module( + config=self.model_parallel_config, module=self.model, precision=cfg.precision + ) + + if self.trainer.precision in ['bf16', 'bf16-mixed']: + self.autocast_dtype = torch.bfloat16 + elif self.trainer.precision in [32, '32', '32-true']: + self.autocast_dtype = torch.float + elif self.trainer.precision in [16, '16', '16-mixed']: + self.autocast_dtype = torch.half + else: + raise ValueError('precision must be in ["32-true", "16-mixed", "bf16-mixed"]') + + self.enable_autocast = ( + True if (not self.megatron_amp_O2) and (self.autocast_dtype in [torch.float16, torch.bfloat16]) else False + ) + + self.transformer_engine = cfg.get('transformer_engine', False) + + # Convert the global-batch-based profile index to micro-batch index + if hasattr(self, '_nsys_profile_enabled'): + mp_size = cfg.get('tensor_model_parallel_size', 1) * cfg.get('pipeline_model_parallel_size', 1) + data_parallel_world_size = trainer.world_size // mp_size + grad_accum_steps = cfg.get('global_batch_size') // (cfg.get('micro_batch_size') * data_parallel_world_size) + self._nsys_profile_start_step *= grad_accum_steps + self._nsys_profile_end_step *= grad_accum_steps + self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) + + def get_module_list(self): + if isinstance(self.model, list): + return [model.module if isinstance(model, Float16Module) else model for model in self.model] + elif isinstance(self.model, Float16Module): + return [self.model.module] + else: + return [self.model] + + def model_provider_func(self, pre_process, post_process): + """Model depends on pipeline paralellism.""" + model = VitClassificationModel( + model_cfg=self.cfg, + model_parallel_config=self.model_parallel_config, + num_classes=self.cfg.get("num_classes"), # TODO(yuya): clean this up + finetune=self.cfg.get("finetune", False), + pre_process=pre_process, + post_process=post_process, + ) + return model + + def setup_optimizer_param_groups(self): + """ModelPT override. Optimizer will get self._optimizer_param_groups""" + if self.cfg.get('do_layer_norm_weight_decay', False): + if isinstance(self.model, list): + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization(self.model) + else: + self._optimizer_param_groups = get_all_params_for_weight_decay_optimization([self.model]) + + else: + self._optimizer_param_groups = get_params_for_weight_decay_optimization(self.model) + + def configure_optimizers(self): + + if self.with_distributed_adam: + + # Disable overlapped grad sync for embedding grad when + # pipeline parallelism is enabled + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if parallel_state.is_pipeline_first_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[0] # only the first virtual rank has the embeddings + else: + module = self.model + + if parallel_state.is_pipeline_last_stage(ignore_virtual=True): + if isinstance(self.model, list): + module = self.model[-1] # only the last virtual rank has the embeddings + else: + module = self.model + + # Disable overlapped grad sync for layer norm grads when + # sequence parallelism is enabled + for param in self.parameters(): + if getattr(param, 'sequence_parallel_enabled', False): + param._disable_greedy_grad_copy = not self.megatron_amp_O2 + param._disable_overlap_grad_sync = True + + # KJJ - Copied this entire block, up to "return" here blindly from megatron_gpt_model.py + + # Initialize parameter buckets for overlapped grad and param syncs + # Note: Params with disabled overlapping are put in the + # last param bucket + buckets = [] + if self.cfg.get('virtual_pipeline_model_parallel_size', None) is not None: + # Initialize a bucket for each virtual pipeline stage + for module in self.model: + if isinstance(module, Float16Module): + module = module.module + stage_bucket = [] + # for layer in module.language_model.encoder.layers: + for layer in module.backbone.transformer.layers: + stage_bucket.extend( + p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False) + ) + buckets.append(stage_bucket) + else: + # Initialize a bucket for each Transformer layer + modules = self.model if isinstance(self.model, list) else [self.model] + for module in modules: + if isinstance(module, Float16Module): + module = module.module + # for layer in module.language_model.encoder.layers: + for layer in module.backbone.transformer.layers: + + buckets.append( + [p for p in layer.parameters() if not getattr(p, '_disable_overlap_grad_sync', False)] + ) + buckets.reverse() + used_params = set() + for bucket in buckets: + used_params.update(bucket) + buckets[-1].extend(p for p in self.parameters() if p not in used_params) + self.distributed_adam_buckets = buckets + + return super().configure_optimizers() + + def forward(self, tokens): + output_tensor = self.model(tokens) + return output_tensor + + def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): + + # handle asynchronous grad reduction + no_sync_func = None + grad_sync_func = None + param_sync_func = None + if not forward_only and self.with_distributed_adam: + no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_O2,) + grad_sync_func = self.reduce_overlap_gradients + param_sync_func = self.sync_overlap_parameters + + # pipeline schedules will get these from self.model.config + for module in self.get_module_list(): + module.config.no_sync_func = no_sync_func + module.config.grad_sync_func = grad_sync_func + module.config.param_sync_func = param_sync_func + + # run forward and backwards passes for an entire global batch + # we do this inside training_step to support pipeline parallelism + fwd_bwd_function = get_forward_backward_func() + + # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready + losses_reduced_per_micro_batch = fwd_bwd_function( + forward_step_func=self.get_forward_output_and_loss_func(), + data_iterator=dataloader_iter, + model=[self.model], + num_microbatches=get_num_microbatches(), + forward_only=forward_only, + seq_length=self.cfg.encoder_seq_length, + micro_batch_size=self.cfg.micro_batch_size, + ) + + # only the last stages of the pipeline return losses + if losses_reduced_per_micro_batch: + if (not forward_only) or self.cfg.data.get('validation_drop_last', True): + # average loss across micro batches + loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch] + loss_tensor = torch.stack(loss_tensors_list) + loss_mean = loss_tensor.mean() + acc_tensors_list = [loss_reduced['accuracy'] for loss_reduced in losses_reduced_per_micro_batch] + acc_tensor = torch.stack(acc_tensors_list) + accuracy_mean = acc_tensor.mean() + else: + # Get the total loss since micro batches sizes are not uniform + raise NotImplementedError("Losses of micro batches sizes must be uniform!") + else: + # we're not on the last pipeline stage so no losses + if forward_only: + loss_mean = [] + accuracy_mean = [] + else: + loss_mean = torch.tensor(0.0).cuda() + accuracy_mean = loss_mean.copy() + + return loss_mean, accuracy_mean + + def initialize_ub_func(self): + ub_cfgs = self.cfg.get('ub_tp_comm_overlap_cfg', None) + if ub_cfgs is None: + warnings.warn( + "Couldn't find TP config. Please check the path correctness. Initializing TP comm overlap with the default config." + ) + + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + + te_module.base.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False + + def training_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + Batch should be a list of microbatches and those microbatches should on CPU. + Microbatches are then moved to GPU during the pipeline. + The list of microbatches is then piped through the pipeline using Apex fwd/bwd functions. + """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + + # we zero grads here because we also call backward in the megatron-core fwd/bwd functions + self._optimizer.zero_grad() + + loss_mean, _ = self.fwd_bwd_step(dataloader_iter, batch_idx, False) + + # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced + if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): + self.allreduce_sequence_parallel_gradients() + + if self.with_distributed_adam: + # KJJ - Added this block from megatron_gpt_model. It says it's not necessary + # and it's not clear if the remaining "if not" logic is still needed. + # keeping it for now, but might need to delete one or both of these. + + # synchronize asynchronous grad reductions + # note: not necessary, but reduces performance degradation + # from multiple simultaneous NCCL calls + self._optimizer._finish_bucket_grad_sync() + + # launch grad reductions + # Note: grads in first pipeline stage have already been + # reduced + if not parallel_state.is_pipeline_first_stage(): + self.reduce_overlap_gradients() + elif self.megatron_amp_O2: + # # when using pipeline parallelism grads must be all-reduced after the pipeline (not asynchronously) + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1 or self.cfg.get('sequence_parallel', False): + # # main grads are stored in the MainParamsOptimizer wrapper + # self._optimizer.allreduce_main_grads() + self._optimizer.allreduce_main_grads() + else: + # async grad allreduce is not currently implemented for O1/autocasting mixed precision training + # so we all-reduce gradients after the pipeline + self.allreduce_gradients() # @sangkug we think this is causing memory to blow up (hurts perf) + + # if self.cfg.get('pipeline_model_parallel_size', 1) > 1: + # # when using pipeline parallelism the first and last stage must keep embeddings in sync + # self.allreduce_first_last_embeddings() + + ## logging + # we can only log on one rank if it is rank zero so we broadcast from last rank + # we can avoid this broadcast by updating the PTL log function to accept specific ranks + torch.distributed.broadcast(loss_mean, get_last_rank()) + + if self.cfg.precision in [16, '16', '16-mixed']: + loss_scale = self.trainer.precision_plugin.scaler._scale + if loss_scale is not None: + self.log('loss_scale', loss_scale, batch_size=1) + + self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1) + lr = self._optimizer.param_groups[0]['lr'] + self.log('lr', lr, rank_zero_only=True, batch_size=1) + self.log('global_step', self.trainer.global_step + 1, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log( + 'consumed_samples', + self.compute_consumed_samples(self.trainer.global_step + 1 - self.init_global_step), + prog_bar=True, + rank_zero_only=True, + batch_size=1, + ) + + return loss_mean + + def backward(self, *args, **kwargs): + """ LightningModule hook to do backward. + We want this to do nothing since we run backward in the fwd/bwd functions from apex. + No need to call it here. + """ + pass + + def optimizer_zero_grad(self, *args, **kwargs): + """ LightningModule hook to zero grad. + We want this to do nothing as we are zeroing grads during the training_step. + """ + pass + + def _append_sequence_parallel_module_grads(self, module, grads): + """ Helper method for allreduce_sequence_parallel_gradients""" + + for param in module.parameters(): + sequence_parallel_param = getattr(param, 'sequence_parallel', False) + if sequence_parallel_param and param.requires_grad: + if self.megatron_amp_O2: + grad = param.main_grad + else: + grad = param.grad + grads.append(grad.data) + + def allreduce_sequence_parallel_gradients(self): + """ All-reduce layernorm parameters across model parallel nodes when sequence parallelism is used. + Modified from megatron-lm: + https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/blob/3f91f09bb2ab32f9904b47f46f19d2fc3f518ed8/megatron/training.py#L425 + """ + + grads = [] + if isinstance(self.model, list): + for module in self.model: + self._append_sequence_parallel_module_grads(module, grads) + else: + self._append_sequence_parallel_module_grads(self.model, grads) + + coalesced = torch._utils._flatten_dense_tensors(grads) + torch.distributed.all_reduce(coalesced, group=parallel_state.get_tensor_model_parallel_group()) + for buf, synced in zip(grads, torch._utils._unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + def get_forward_output_and_loss_func(self, validation_step=False): + def loss_func(labels, output_tensor): + logits = output_tensor.contiguous().float() + loss = torch.nn.functional.cross_entropy(logits, labels) + + outputs = torch.argmax(logits, -1) + correct = (outputs == labels).float() + accuracy = torch.mean(correct) + + averaged_loss = average_losses_across_data_parallel_group([loss, accuracy]) + + return loss, {"loss": averaged_loss[0], "accuracy": averaged_loss[1]} + + def fwd_output_and_loss_func(dataloader_iter, model): + batch = next(dataloader_iter) + if parallel_state.get_pipeline_model_parallel_world_size() == 1: + batch = [x.cuda(non_blocking=True) for x in batch] + tokens, labels = batch + else: + # Vision transformer doesn't need attention mask + if parallel_state.is_pipeline_first_stage(): + # Fist pipeline stage needs only the tokens and position_ids + tokens = batch[0].cuda(non_blocking=True) + labels = None + elif parallel_state.is_pipeline_last_stage(): + # Last pipeline stage needs only the labels and loss_mask + labels = batch[1].cuda(non_blocking=True) + tokens = None + else: + # Intermediate pipeline stage doesn't need any inputs + tokens, labels = None, None + + output_tensor = model(tokens) + return output_tensor, partial(loss_func, labels) + + return fwd_output_and_loss_func + + def get_forward_output_only_func(self): + def fwd_output_only_func(batch, model): + raise NotImplementedError + + return fwd_output_only_func + + def validation_step(self, dataloader_iter, batch_idx): + """ + Our dataloaders produce a micro-batch and then we fetch + a number of microbatches depending on the global batch size and model parallel size + from the dataloader to produce a list of microbatches. + The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. + """ + mode = 'test' if self.trainer.testing else 'val' + + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + + loss, accuracy = self.fwd_bwd_step(dataloader_iter, batch_idx, True) + + self.validation_step_outputs.append((loss, accuracy)) if mode == 'val' else self.test_step_outputs.append( + (loss, accuracy) + ) + return loss, accuracy + + def on_validation_epoch_end(self): + # TODO (yuya): need fix later, check with Sean + if not self.validation_step_outputs: + return + + if parallel_state.is_pipeline_last_stage(): + loss_outputs = [output[0] for output in self.validation_step_outputs] + acc_outputs = [output[1] for output in self.validation_step_outputs] + + averaged_metrics = torch.tensor( + [torch.stack(loss_outputs).mean(), torch.stack(acc_outputs).mean()], dtype=torch.float32, device='cuda' + ) + else: + averaged_metrics = torch.tensor([0.0, 0.0], dtype=torch.float32, device='cuda') + + # we can only log on one rank if it is rank zero so we broadcast from last rank + torch.distributed.broadcast(averaged_metrics, get_last_rank()) + + averaged_loss, averaged_acc = averaged_metrics + + self.log('global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_loss', averaged_loss, prog_bar=True, rank_zero_only=True, batch_size=1) + self.log('val_accuracy', averaged_acc, prog_bar=True, rank_zero_only=True, batch_size=1) + self.validation_step_outputs.clear() # free memory + + return averaged_loss + + def test_step(self, batch, batch_idx): + return self.validation_step(batch, batch_idx) + + def on_test_epoch_end(self): + pass + + def build_train_valid_test_datasets(self): + logging.info('Building datasets for ViT...') + if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): + raise ValueError("limit_val_batches must be an integer or float less than or equal to 1.0.") + + self._train_ds, self._validation_ds = build_train_valid_datasets( + model_cfg=self.cfg, data_path=self.cfg.data.data_path, image_size=(self.cfg.img_h, self.cfg.img_w), + ) + self._test_ds = None + + if self._train_ds is not None: + logging.info(f'Length of train dataset: {len(self._train_ds)}') + if self._validation_ds is not None: + logging.info(f'Length of val dataset: {len(self._validation_ds)}') + if self._test_ds is not None: + logging.info(f'Length of test dataset: {len(self._test_ds)}') + logging.info(f'Finished building datasets for ViT.') + + return self._train_ds, self._validation_ds, self._test_ds + + def build_pretraining_data_loader(self, dataset, consumed_samples, drop_last=True): + """Buld dataloader given an input dataset.""" + + logging.info(f'Building dataloader with consumed samples: {consumed_samples}') + # Megatron sampler + if hasattr(self.cfg.data, 'dataloader_type') and self.cfg.data.dataloader_type is not None: + if self.cfg.data.dataloader_type == 'single': + batch_sampler = MegatronPretrainingSampler( + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + ) + elif self.cfg.data.dataloader_type == 'cyclic': + batch_sampler = MegatronVisionPretrainingRandomSampler( + dataset=dataset, + total_samples=len(dataset), + consumed_samples=consumed_samples, + micro_batch_size=self.cfg.micro_batch_size, + global_batch_size=self.cfg.global_batch_size, + data_parallel_rank=parallel_state.get_data_parallel_rank(), + data_parallel_size=parallel_state.get_data_parallel_world_size(), + drop_last=drop_last, + data_sharding=self.cfg.data.get("data_sharding", True), + ) + else: + raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"') + else: + raise ValueError('cfg.data.dataloader_type not found. Must be "single" or "cyclic"') + + return torch.utils.data.DataLoader( + dataset, + batch_sampler=batch_sampler, + num_workers=self.cfg.data.num_workers, + pin_memory=True, + persistent_workers=True if self.cfg.data.num_workers > 0 else False, + ) + + def setup(self, stage=None): + """ PTL hook that is executed after DDP spawns. + We setup datasets here as megatron datasets require DDP to instantiate. + See https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#setup for more information. + Args: + stage (str, optional): Can be 'fit', 'validate', 'test' or 'predict'. Defaults to None. + """ + + # log number of parameters + if isinstance(self.model, list): + num_parameters_on_device = sum( + [sum([p.nelement() for p in model_module.parameters()]) for model_module in self.model] + ) + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last virtual stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model[-1].word_embeddings_weight()]) + # num_parameters_on_device -= num_word_embedding_parameters + else: + num_parameters_on_device = sum([p.nelement() for p in self.model.parameters()]) + + # if parallel_state.get_pipeline_model_parallel_world_size() > 1 and parallel_state.is_pipeline_last_stage( + # ignore_virtual=True + # ): + # # substract the embedding weights on the last stage + # num_word_embedding_parameters = sum([p.nelement() for p in self.model.word_embeddings_weight()]) + # + # num_parameters_on_device -= num_word_embedding_parameters + + # to be summed across data parallel group + total_num_parameters = torch.tensor(num_parameters_on_device).cuda() + + torch.distributed.all_reduce(total_num_parameters, group=parallel_state.get_model_parallel_group()) + + logging.info( + f'Pipeline model parallel rank: {parallel_state.get_pipeline_model_parallel_rank()}, ' + f'Tensor model parallel rank: {parallel_state.get_tensor_model_parallel_rank()}, ' + f'Number of model parameters on device: {num_parameters_on_device:.2e}. ' + f'Total number of model parameters: {total_num_parameters:.2e}.' + ) + + resume_checkpoint_path = self.trainer.ckpt_path + if resume_checkpoint_path: + init_consumed_samples = self._extract_consumed_samples_from_ckpt(resume_checkpoint_path) + else: + init_consumed_samples = 0 + self.init_consumed_samples = init_consumed_samples + self.init_global_step = self.trainer.global_step + + # allowing restored models to optionally setup datasets + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) + + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + # module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: + # self.model.sync_initial_word_embeddings() + pass + + def setup_training_data(self, cfg): + if hasattr(self, '_train_ds') and self._train_ds is not None: + consumed_samples = self.compute_consumed_samples(0) + logging.info( + f'Setting up train dataloader with len(len(self._train_ds)): {len(self._train_ds)} and consumed samples: {consumed_samples}' + ) + self._train_dl = self.build_pretraining_data_loader(self._train_ds, consumed_samples) + + def setup_validation_data(self, cfg): + if hasattr(self, '_validation_ds') and self._validation_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up validation dataloader with len(len(self._validation_ds)): {len(self._validation_ds)} and consumed samples: {consumed_samples}' + ) + drop_last = True + if not self.cfg.data.get('validation_drop_last', True): + logging.info(f'Drop last in validation dataset is set to False') + drop_last = False + self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples,) + + def setup_test_data(self, cfg): + if hasattr(self, '_test_ds') and self._test_ds is not None: + consumed_samples = 0 + logging.info( + f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds)} and consumed samples: {consumed_samples}' + ) + self._test_dl = self.build_pretraining_data_loader(self._test_ds, consumed_samples) + + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any: + raise NotImplementedError + + def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any: + """ PTL hook: https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#transfer-batch-to-device + When using pipeline parallelism, we need the global batch to remain on the CPU, + since the memory overhead will be too high when using a large number of microbatches. + Microbatches are transferred from CPU to GPU inside the pipeline. + """ + return batch + + def _validate_trainer(self): + """ Certain trainer configurations can break training. + Here we try to catch them and raise an error. + """ + if self.trainer.accumulate_grad_batches > 1: + raise ValueError( + f'Gradient accumulation is done within training_step. trainer.accumulate_grad_batches must equal 1' + ) + + @classmethod + def list_available_models(cls) -> Optional[PretrainedModelInfo]: + return None + + def on_save_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-save-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + checkpoint[f'model{i}'] = self.model[i].module.state_dict_for_save_checkpoint() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def on_load_checkpoint(self, checkpoint) -> None: + """LightningModule hook: + https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-load-checkpoint + """ + if isinstance(self.model, list): + for i in range(len(self.model)): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + self.model[i].module.load_state_dict(checkpoint[f'model{i}'], strict=True) + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + + def parameters(self): + if isinstance(self.model, list): + return itertools.chain.from_iterable(module.parameters() for module in self.model) + else: + return self.model.parameters() diff --git a/nemo/collections/vision/modules/__init__.py b/nemo/collections/vision/modules/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/__init__.py b/nemo/collections/vision/modules/common/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/common/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/__init__.py b/nemo/collections/vision/modules/common/megatron/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/common/megatron/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/common/megatron/vision_transformer.py b/nemo/collections/vision/modules/common/megatron/vision_transformer.py new file mode 100644 index 000000000000..792f0bdc4253 --- /dev/null +++ b/nemo/collections/vision/modules/common/megatron/vision_transformer.py @@ -0,0 +1,492 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# coding=utf-8 + + +"""Transformer.""" + +import torch + +from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType +from nemo.collections.nlp.modules.common.megatron.module import MegatronModule +from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer, ParallelTransformerLayer_ +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults + +try: + from apex.normalization import MixedFusedRMSNorm + from apex.transformer.enums import AttnMaskType, AttnType, ModelType + from apex.transformer.utils import divide as safe_divide + + HAVE_APEX = True + +except (ImportError, ModuleNotFoundError): + + HAVE_APEX = False + + # fake missing classes with None attributes + ModelType = AttnMaskType = AttnType = LayerType = ApexGuardDefaults() + +try: + from megatron.core import parallel_state, tensor_parallel + from megatron.core.parallel_state import get_tensor_model_parallel_world_size + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +""" We use the following notation throughout this file: + h: hidden size + n: number of attention heads + p: number of model parallel partitions + np: n/p + hp: h/p + hn: h/n + b: batch size + s: sequence length + l: number of layers + Transformer takes input of size [s, b, h] and returns a + tensor of the same size. We use the following arguments: + hyperparameters: transformer hyperparameters +""" + + +class DropPath(MegatronModule): + """Drop paths (Stochastic Depth) per sample + (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=0.0): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, hidden_state): + if self.drop_prob == 0.0 or not self.training: + return hidden_state + keep_prob = 1 - self.drop_prob + # work with diff dim tensors, not just 2D ConvNets + # hidden_state: [s, b, h] + shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2) + random_tensor = keep_prob + torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) + random_tensor.floor_() # binarize + output = hidden_state.div(keep_prob) * random_tensor + return output + + +class ParallelVisionTransformerLayer_(ParallelTransformerLayer_): + """A single transformer layer. + + Transformer layer takes input with size [s, b, h] and returns an + output of the same size. + """ + + def __init__( + self, + config, + init_method, + output_layer_init_method, + layer_number, + hidden_size, + ffn_hidden_size, + num_attention_heads, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + fp32_residual_connection=False, + precision=16, + apply_query_key_layer_scaling=True, + kv_channels=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + bias_dropout_add_fusion=True, + persist_layer_norm=False, + bias_activation_fusion=True, + openai_gelu=False, + onnx_safe=False, + masked_softmax_fusion=True, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + activation='gelu', + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + activations_checkpoint_granularity=None, + normalize_attention_scores=True, + use_flash_attention=False, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + drop_path_rate = kwargs.pop("drop_path_rate") + super(ParallelVisionTransformerLayer_, self).__init__(**kwargs) + + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None + + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + rotary_pos_emb=None, + # list of positional embedding tensors, first one self attention, second one and third one are for cross attention (q, k) + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, + ): + # Self attention. + if rotary_pos_emb is not None: + # self attention pos_emb is (q, q) + self_attention_pos_emb = (rotary_pos_emb[0], rotary_pos_emb[0]) + cross_attention_pos_emb = (rotary_pos_emb[1], rotary_pos_emb[2]) + else: + self_attention_pos_emb = None + cross_attention_pos_emb = None + + if self.layer_type != LayerType.retrieval_decoder_after_self_attn: + # hidden_states: [b, s, h] + + # Pre-LN: x -> LN -> MHA -> Residual -> LN -> MLP -> Residual + # Post-LN: x -> MHA -> Residual -> LN -> MLP -> Residual -> LN + # Normformer: x -> LN -> MHA -> LN -> Residual -> MLP (w/LN) -> Residual + + residual = hidden_states + # Layer norm at the beginning of the transformer layer. + if self.transformer_block_type in ['pre_ln', 'normformer']: + hidden_states = self.input_layernorm(hidden_states) + + attention_output, attention_bias = self.self_attention( + hidden_states, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + rotary_pos_emb=self_attention_pos_emb, + relative_position_bias=self_attention_relative_position_bias, + checkpoint_core_attention=checkpoint_core_attention, + ) + + if get_key_value: + attention_output, presents = attention_output + + # If normformer, apply norm on the output of the self attention. + if self.transformer_block_type == 'normformer': + # Normformer normalization + attention_output = ( + attention_output + attention_bias if attention_bias is not None else attention_output + ) + attention_output = self.post_attention_normformer_norm(attention_output) + attention_bias = None + + # jit scripting for a nn.module (with dropout) is not + # trigerring the fusion kernel. For now, we use two + # different nn.functional routines to account for varying + # dropout semantics during training and inference phases. + + if self.drop_path is None: + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='attention' + ) + if attention_bias is not None: + attention_bias = attention_bias.expand_as(residual) + + layernorm_input = bias_dropout_add_func( + attention_output, attention_bias, residual, self.hidden_dropout + ) + else: + assert self.transformer_block_type != 'normformer', "Normfomer doesn't support drop_path" + out = torch.nn.functional.dropout( + attention_output + attention_bias, p=self.hidden_dropout, training=self.training + ) + layernorm_input = residual + self.drop_path(out) + # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}") + + # Post-LN normalization after residual + if self.transformer_block_type == 'post_ln': + normalization_output = self.input_layernorm(layernorm_input) + layernorm_input = normalization_output + elif self.transformer_block_type in ['pre_ln', 'normformer']: + # Layer norm post the self attention. + normalization_output = self.post_attention_layernorm(layernorm_input) + else: + layernorm_input, normalization_output = hidden_states + + if self.layer_type == LayerType.decoder_pre_mlp: + return layernorm_input, normalization_output + + if ( + self.layer_type == LayerType.decoder + or self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_encoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn + ): + if ( + self.layer_type == LayerType.retrieval_decoder + or self.layer_type == LayerType.retrieval_decoder_after_self_attn + ): + attention_output, attention_bias = self.inter_attention( + normalization_output, + enc_dec_attn_mask, + encoder_output=encoder_output, + rotary_pos_emb=cross_attention_pos_emb, + set_inference_key_value_memory=set_inference_key_value_memory, + inference_max_sequence_len=inference_max_sequence_len, + checkpoint_core_attention=checkpoint_core_attention, + ) + else: + attention_output, attention_bias = self.inter_attention( + normalization_output, + enc_dec_attn_mask, + encoder_output=encoder_output, + rotary_pos_emb=cross_attention_pos_emb, + relative_position_bias=cross_attention_relative_position_bias, + checkpoint_core_attention=checkpoint_core_attention, + ) + + # If normformer, apply norm on the output of the self attention. + if self.transformer_block_type == 'normformer': + # Normformer normalization + attention_output = ( + attention_output + attention_bias if attention_bias is not None else attention_output + ) + attention_output = self.post_inter_attention_normformer_norm(attention_output) + attention_bias = None + + residual = layernorm_input + + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='attention' + ) + + layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout) + # print(f"Layer: {self.layer_number} Cross-Attention checksum {layernorm_input.sum()}") + normalization_output = self.post_inter_attention_layernorm(layernorm_input) + # Post-LN normalization after residual + if self.transformer_block_type == 'post_ln': + layernorm_input = normalization_output + # MLP. + mlp_output, mlp_bias = self.mlp(normalization_output) + + residual = layernorm_input + + if self.drop_path is None: + bias_dropout_add_func = self._get_bias_droput_add_func( + transformer_block_type=self.transformer_block_type, position_after='mlp' + ) + + output = bias_dropout_add_func(mlp_output, mlp_bias, residual, self.hidden_dropout) + + else: + out = torch.nn.functional.dropout(mlp_output + mlp_bias, p=self.hidden_dropout, training=self.training) + output = residual + self.drop_path(out) + # print(f"Layer: {self.layer_number} MLP + Dropout + Residual checksum {output.sum()}") + + if self.transformer_block_type == 'post_ln': + output = self.post_attention_layernorm(output) + + if get_key_value: + output = [output, presents] + + return output + + +class ParallelVisionTransformerLayer(ParallelVisionTransformerLayer_): + def __init__(self, **kwargs): + super(ParallelVisionTransformerLayer, self).__init__(**kwargs) + precision = kwargs['precision'] + if precision in ['bf16', 'bf16-mixed']: + self.dtype = torch.bfloat16 + elif precision in [16, '16', '16-mixed']: + self.dtype = torch.float16 + elif precision in [32, '32', '32-true']: + self.dtype = torch.float32 + else: + raise ValueError(f"Cannot recognize precision {precision}") + + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + rotary_pos_emb=None, + layer_past=None, + get_key_value=False, + set_inference_key_value_memory=False, + inference_max_sequence_len=None, + self_attention_relative_position_bias=None, + cross_attention_relative_position_bias=None, + checkpoint_core_attention=False, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + if self.dtype == torch.float32: + return super().forward(**kwargs) + with torch.autocast(device_type="cuda", dtype=self.dtype): + return super().forward(**kwargs) + + +class ParallelVisionTransformer(ParallelTransformer): + """Transformer class.""" + + def __init__( + self, + config, + init_method, + output_layer_init_method, + num_layers, + hidden_size, + ffn_hidden_size, + num_attention_heads, + apply_query_key_layer_scaling=True, + kv_channels=None, + layer_type=LayerType.encoder, # it can be a list of types or single type + self_attn_mask_type=AttnMaskType.padding, + pre_process=True, + post_process=True, + precision=16, + fp32_residual_connection=False, + activations_checkpoint_method=None, + activations_checkpoint_num_layers=None, + layernorm_epsilon=1e-5, + hidden_dropout=0.1, + attention_dropout=0.1, + ffn_dropout=0.0, + drop_path_rate=0.0, + bias_activation_fusion=True, + bias_dropout_add_fusion=True, + masked_softmax_fusion=True, + persist_layer_norm=False, + openai_gelu=False, + onnx_safe=False, + activation='gelu', + model_type=ModelType.encoder_or_decoder, + megatron_legacy=False, + bias=True, + chunk_size=64, + normalization='layernorm', + transformer_block_type='pre_ln', + headscale=False, + layer_number_offset=0, # this is use only for attention norm_factor scaling + activations_checkpoint_granularity=None, + normalize_attention_scores=True, + ub_tp_comm_overlap=False, + use_flash_attention=False, + ): + kwargs = locals() + for key in ["self", "__class__"]: + kwargs.pop(key) + self.drop_path_rate = kwargs.pop("drop_path_rate") + super(ParallelVisionTransformer, self).__init__(**kwargs) + + self.num_layers = self.get_num_layers(num_layers) + + self.drop_path_rates = [ + rate.item() + for rate in torch.linspace( + 0, self.drop_path_rate, self.num_layers * parallel_state.get_pipeline_model_parallel_world_size() + ) + ] + + # Rebuild with vision transformer layers. + def build_layer(layer_number): + if isinstance(layer_type, list): + lt = layer_type[layer_number - 1] + else: + lt = layer_type + return ParallelVisionTransformerLayer( + config=config, + init_method=init_method, + output_layer_init_method=output_layer_init_method, + layer_number=layer_number + layer_number_offset, + hidden_size=hidden_size, + ffn_hidden_size=ffn_hidden_size, + num_attention_heads=num_attention_heads, + apply_query_key_layer_scaling=apply_query_key_layer_scaling, + kv_channels=kv_channels, + layer_type=lt, + self_attn_mask_type=self_attn_mask_type, + precision=precision, + fp32_residual_connection=fp32_residual_connection, + layernorm_epsilon=layernorm_epsilon, + hidden_dropout=hidden_dropout, + attention_dropout=attention_dropout, + ffn_dropout=ffn_dropout, + drop_path_rate=self.drop_path_rates[layer_number - 1], + bias_activation_fusion=bias_activation_fusion, + bias_dropout_add_fusion=bias_dropout_add_fusion, + masked_softmax_fusion=masked_softmax_fusion, + persist_layer_norm=persist_layer_norm, + openai_gelu=openai_gelu, + onnx_safe=onnx_safe, + activation=activation, + megatron_legacy=megatron_legacy, + bias=bias, + chunk_size=chunk_size, + normalization=normalization, + transformer_block_type=transformer_block_type, + headscale=headscale, + activations_checkpoint_granularity=activations_checkpoint_granularity, + normalize_attention_scores=normalize_attention_scores, + use_flash_attention=use_flash_attention, + ) + + if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: + assert num_layers % parallel_state.get_virtual_pipeline_model_parallel_world_size() == 0, ( + 'num_layers_per_stage must be divisible by ' 'virtual_pipeline_model_parallel_size' + ) + + # self.model_type != ModelType.encoder_and_decoder + assert self.model_type.value != 2, f'virtual pipeline parallel currently only supported for GPT' + + # Number of layers in each model chunk is the number of layers in the stage, + # divided by the number of model chunks in a stage. + self.num_layers = self.num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() + # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0] [2] [4] [6] + # Stage 1: [1] [3] [5] [7] + # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of + # layers to stages like (each list is a model chunk): + # Stage 0: [0, 1] [4, 5] + # Stage 1: [2, 3] [6, 7] + offset = parallel_state.get_virtual_pipeline_model_parallel_rank() * ( + num_layers // parallel_state.get_virtual_pipeline_model_parallel_world_size() + ) + (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) + else: + # Each stage gets a contiguous set of layers. + if ( + self.model_type == ModelType.encoder_and_decoder + and parallel_state.get_pipeline_model_parallel_world_size() > 1 + ): + pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() + if layer_type == LayerType.encoder: + offset = pipeline_rank * self.num_layers + else: + num_ranks_in_enc = parallel_state.get_pipeline_model_parallel_split_rank() + offset = (pipeline_rank - num_ranks_in_enc) * self.num_layers + else: + offset = parallel_state.get_pipeline_model_parallel_rank() * self.num_layers + + self.layers = torch.nn.ModuleList([build_layer(i + 1 + offset) for i in range(self.num_layers)]) diff --git a/nemo/collections/vision/modules/vit/__init__.py b/nemo/collections/vision/modules/vit/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/modules/vit/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo/collections/vision/modules/vit/vit_backbone.py b/nemo/collections/vision/modules/vit/vit_backbone.py new file mode 100644 index 000000000000..5758a9ff7cdb --- /dev/null +++ b/nemo/collections/vision/modules/vit/vit_backbone.py @@ -0,0 +1,361 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Vision Transformer(VIT) model.""" + +import math +from functools import partial + +import einops +import torch +import torch.nn.functional as F + +from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm +from nemo.collections.nlp.modules.common.megatron.module import MegatronModule +from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, + init_method_normal, + scaled_init_method_normal, +) +from nemo.collections.vision.modules.common.megatron.vision_transformer import ParallelVisionTransformer + +try: + import apex + from apex.transformer.enums import AttnMaskType + + HAVE_APEX = True +except (ImportError, ModuleNotFoundError): + HAVE_APEX = False + + # fake missing classes with None attributes + AttnMaskType = ApexGuardDefaults() + LayerType = ApexGuardDefaults() + + +class DropPatch(MegatronModule): + """ + https://arxiv.org/abs/2212.00794 + """ + + def __init__(self, prob, class_token_length=8, exclude_cls_tokens=True): + assert 0 <= prob < 1.0 + super(DropPatch, self).__init__() + self.prob = prob + self.class_token_length = class_token_length + self.exclude_cls_tokens = exclude_cls_tokens # exclude CLS token + + def __call__(self, x): + if self.prob == 0.0 or not self.training: + return x + + class_token_length = self.class_token_length + if self.exclude_cls_tokens: + cls_tokens, x = x[:, :class_token_length], x[:, class_token_length:] + + batch, num_tokens, _, device = *x.shape, x.device + + batch_indices = torch.arange(batch, device=device) + batch_indices = batch_indices[..., None] + + keep_prob = 1 - self.prob + num_patches_keep = max(1, int(num_tokens * keep_prob)) + + rand = torch.randn(batch, num_tokens, device=device) + patch_indices_keep = rand.topk(num_patches_keep, dim=-1).indices + + x = x[batch_indices, patch_indices_keep] + + if self.exclude_cls_tokens: + x = torch.cat((cls_tokens, x), dim=1) + + return x + + +class VitMlpHead(MegatronModule): + """Pooler layer. + + Pool hidden states of a specific token (for example start of the + sequence) and add a linear transformation followed by a tanh. + + Arguments: + hidden_size: hidden size + init_method: weight initialization method for the linear layer. + bias is set to zero. + """ + + def __init__(self, hidden_size, num_classes): + super(VitMlpHead, self).__init__() + self.dense_in = torch.nn.Linear(hidden_size, hidden_size) + self.relu = torch.nn.ReLU() + self.dense_out = torch.nn.Linear(hidden_size, num_classes) + torch.nn.init.constant_(self.dense_out.bias, -10) + + def forward(self, hidden_states): + # hidden_states: [b, 1, h] + # sequence_index: index of the token to pool. + dense_in_result = self.dense_in(hidden_states) + tanh_result = torch.tanh(dense_in_result) + dense_out_result = self.dense_out(tanh_result) + return dense_out_result + + +def isPerfectSquare(x): + if x >= 0: + sr = math.sqrt(x) + return int(sr) * int(sr) == x + return False + + +def twod_interpolate_position_embeddings_hook( + model_cfg, + class_token_present, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, +): + num_patches_per_dim_h = model_cfg.img_h // model_cfg.patch_dim + num_patches_per_dim_w = model_cfg.img_w // model_cfg.patch_dim + num_patches = num_patches_per_dim_h * num_patches_per_dim_w + hidden_size = model_cfg.hidden_size + class_token_length = model_cfg.get("class_token_length", 8) + + key = prefix + "weight" + + assert key in state_dict, f"{key} not in {state_dict.keys()}" + if key in state_dict: + input_param = state_dict[key] + + input_seq_len = input_param.shape[0] + assert isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - class_token_length) + input_has_class_token = not isPerfectSquare(input_seq_len) + num_tok_input = input_seq_len - class_token_length if input_has_class_token else input_seq_len + num_tok_output = num_patches + output_has_class_token = class_token_present + + # update input_param and load it to state_dict[key] + if input_has_class_token: + input_param_tok = input_param[:class_token_length, :] + input_param_grid = input_param[class_token_length:, :] + else: + input_param_tok = torch.zeros(class_token_length, hidden_size, device=input_param.device) + input_param_grid = input_param + + assert input_param.shape[1] == hidden_size + + if num_tok_input != num_tok_output: + gs_input = int(math.sqrt(num_tok_input)) + gs_new = (num_patches_per_dim_h, num_patches_per_dim_w) + + input_param_grid = input_param_grid.transpose(0, 1).contiguous() + input_param_grid = input_param_grid.reshape((1, -1, gs_input, gs_input)) + input_param_grid = input_param_grid.float() + scale_factor = (gs_new[0] / gs_input, gs_new[1] / gs_input) + + input_param_grid = F.interpolate(input_param_grid, scale_factor=scale_factor, mode="bilinear") + + input_param_grid = input_param_grid.half() + input_param_grid = input_param_grid.reshape((-1, num_tok_output)) + input_param_grid = input_param_grid.transpose(0, 1).contiguous() + + assert input_param_grid.shape[1] == hidden_size + + input_param = input_param_grid + assert input_param.shape[0] == num_tok_output and input_param.shape[1] == hidden_size + + if output_has_class_token: + input_param = torch.cat((input_param_tok, input_param), dim=0) + + state_dict[key] = input_param + + +class VitBackbone(MegatronModule): + """Vision Transformer Model.""" + + def __init__( + self, + model_cfg, + model_parallel_config, + init_method=None, + scaled_init_method=None, + pre_process=True, + post_process=True, + class_token=True, + single_token_output=False, + ): + super(VitBackbone, self).__init__(share_token_embeddings=False) + + self.fp16_lm_cross_entropy = model_cfg.fp16_lm_cross_entropy + num_layers = model_cfg.num_layers + init_method_std = model_cfg.init_method_std + if init_method is None: + init_method = init_method_normal(init_method_std) + if scaled_init_method is None: + scaled_init_method = scaled_init_method_normal(init_method_std, num_layers) + + self.pre_process = pre_process + self.post_process = post_process + self.class_token = class_token + self.hidden_size = model_cfg.hidden_size + self.patch_dim = model_cfg.patch_dim + self.img_h = model_cfg.img_h + self.img_w = model_cfg.img_w + self.single_token_output = single_token_output + self.drop_patch_rate = model_cfg.get("drop_patch_rate", 0.0) + self.drop_path_rate = model_cfg.get("drop_path_rate", 0.0) + preprocess_layernorm = model_cfg.get("preprocess_layernorm", False) + + assert self.img_h % self.patch_dim == 0 + assert self.img_w % self.patch_dim == 0 + self.num_patches_per_dim_h = self.img_h // self.patch_dim + self.num_patches_per_dim_w = self.img_w // self.patch_dim + self.num_patches = self.num_patches_per_dim_h * self.num_patches_per_dim_w + class_token_length = model_cfg.get("class_token_length", 8) + self.seq_length = self.num_patches + (class_token_length if self.class_token else 0) + self.flatten_dim = self.patch_dim * self.patch_dim * model_cfg.num_channels + self.input_tensor = None + self.position_ids = None + self.preprocess_layernorm = None + + if self.pre_process: + # cls_token + if self.class_token: + self.cls_token = torch.nn.Parameter(torch.randn(1, class_token_length, self.hidden_size)) + torch.nn.init.zeros_(self.cls_token) + self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda() + + # Linear encoder + self.linear_encoder = torch.nn.Linear(self.flatten_dim, self.hidden_size) + + # embedding + self.position_embedding_type = model_cfg.get("position_embedding_type", "learned_absolute") + + if self.position_embedding_type == "learned_absolute": + self.position_embeddings = torch.nn.Embedding(self.seq_length, self.hidden_size) + init_method_normal(model_cfg.init_method_std)(self.position_embeddings.weight) + + class_token_present = self.class_token + self.position_embeddings._register_load_state_dict_pre_hook( + partial(twod_interpolate_position_embeddings_hook, model_cfg, class_token_present) + ) + elif self.position_embedding_type == "learned_parameters": + self.position_embeddings = torch.nn.Parameter(torch.empty(self.seq_length, self.hidden_size)) + init_method_normal(model_cfg.init_method_std)(self.position_embeddings) + else: + raise ValueError(f"Unrecognized positional embedding type {self.position_embedding_type}!") + + self.embedding_dropout = torch.nn.Dropout(model_cfg.hidden_dropout) + self.drop_patch = DropPatch( + self.drop_patch_rate, class_token_length=class_token_length, exclude_cls_tokens=self.class_token + ) + + if preprocess_layernorm: + self.preprocess_layernorm = get_layer_norm( + model_cfg.hidden_size, + model_cfg.layernorm_epsilon, + model_cfg.persist_layer_norm, + sequence_parallel=model_cfg.sequence_parallel, + ) + + self.transformer = ParallelVisionTransformer( + config=model_parallel_config, + init_method=init_method, + output_layer_init_method=scaled_init_method, + num_layers=model_cfg.num_layers, + hidden_size=model_cfg.hidden_size, + num_attention_heads=model_cfg.num_attention_heads, + apply_query_key_layer_scaling=model_cfg.apply_query_key_layer_scaling, + kv_channels=model_cfg.kv_channels, + ffn_hidden_size=model_cfg.ffn_hidden_size, + # self_attn_mask_type=self.encoder_attn_mask_type, # TODO (yuya) + pre_process=self.pre_process, + post_process=self.post_process, + precision=model_cfg.precision, + fp32_residual_connection=model_cfg.fp32_residual_connection, + activations_checkpoint_method=model_cfg.activations_checkpoint_method, + activations_checkpoint_num_layers=model_cfg.activations_checkpoint_num_layers, + normalization=model_cfg.normalization, + layernorm_epsilon=model_cfg.layernorm_epsilon, + hidden_dropout=model_cfg.hidden_dropout, + attention_dropout=model_cfg.attention_dropout, + drop_path_rate=model_cfg.drop_path_rate, + bias_activation_fusion=model_cfg.get("bias_activation_fusion", False), + persist_layer_norm=model_cfg.persist_layer_norm, + openai_gelu=model_cfg.openai_gelu, + onnx_safe=model_cfg.onnx_safe, + masked_softmax_fusion=model_cfg.masked_softmax_fusion, + megatron_legacy=model_cfg.megatron_legacy, + activations_checkpoint_granularity=model_cfg.activations_checkpoint_granularity, + activation=model_cfg.get('activation', 'gelu'), + ub_tp_comm_overlap=model_cfg.get('ub_tp_comm_overlap', False), + use_flash_attention=model_cfg.get('use_flash_attention', False), + ) + + def set_input_tensor(self, input_tensor): + """See megatron.model.transformer.set_input_tensor()""" + self.transformer.set_input_tensor(input_tensor) + + def forward(self, input): + + if self.pre_process: + rearranged_input = einops.rearrange( + input, "b c (h p1) (w p2) -> b (h w) (p1 p2 c)", p1=self.patch_dim, p2=self.patch_dim, + ) + + # [b num_patch patch_dim*patch_dim*c] -> [b, s, h]; s:=num_patch, h:=hidden + encoder_output = self.linear_encoder(rearranged_input) + + concatenated_tokens = encoder_output + if self.class_token: + cls_tokens = self.cls_token.expand(encoder_output.shape[0], -1, -1) + concatenated_tokens = torch.cat((cls_tokens, encoder_output), dim=1) + + if self.position_embedding_type == "learned_absolute": + token_embeddings = concatenated_tokens + self.position_embeddings( + self.position_ids[:, : concatenated_tokens.shape[1]] + ) + elif self.position_embedding_type == "learned_parameters": + token_embeddings = concatenated_tokens + self.position_embeddings + + # a patch_dropout of 0. would mean it is disabled and this function would do nothing but return what was passed in + token_embeddings = self.drop_patch(token_embeddings) + + if self.preprocess_layernorm is not None: + token_embeddings = self.preprocess_layernorm(token_embeddings) + + # [b s h] => [s b h] + token_embeddings = token_embeddings.transpose(0, 1).contiguous() + hidden_states = self.embedding_dropout(token_embeddings) + else: + hidden_states = input + + # 0 represents masking, 1 represents not masking + # attention_mask = torch.zeros( + # [1, 1, hidden_states.shape[0], hidden_states.shape[0]], + # device=hidden_states.device, + # dtype=torch.bool, + # ) + hidden_states = self.transformer(hidden_states, None) + + if self.post_process: + # [s b h] => [b s h] + if self.single_token_output: + hidden_states = hidden_states[0] + else: + hidden_states = hidden_states.transpose(0, 1).contiguous() + + return hidden_states diff --git a/nemo/collections/vision/parts/__init__.py b/nemo/collections/vision/parts/__init__.py new file mode 100644 index 000000000000..2db92b257416 --- /dev/null +++ b/nemo/collections/vision/parts/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/requirements/requirements_vision.txt b/requirements/requirements_vision.txt new file mode 100644 index 000000000000..d3d4b29db16f --- /dev/null +++ b/requirements/requirements_vision.txt @@ -0,0 +1,8 @@ +boto3 +einops +flask_restful +ftfy +gdown +matplotlib>=3.3.2 +nltk>=3.6.5 +numpy diff --git a/scripts/fid-eval-text2img/TFinception_V3.py b/scripts/fid-eval-text2img/TFinception_V3.py new file mode 100644 index 000000000000..6cb212f73ab0 --- /dev/null +++ b/scripts/fid-eval-text2img/TFinception_V3.py @@ -0,0 +1,231 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# share: outside-ok + +""" +Modified from +https://github.com/mseitzer/pytorch-fid + +Code adapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead +of Tensorflow +Copyright 2018 Institute of Bioinformatics, JKU Linz +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import torch +import torch.nn.functional as F +from torch import nn + +# from imaginaire.utils.misc import apply_imagenet_normalization + + +try: + from torchvision.models.utils import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + +from torchvision.models import inception, inception_v3, vgg16 + +# Inception weights ported to Pytorch from +# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz +FID_WEIGHTS_URL = ( + 'https://github.com/mseitzer/pytorch-fid/releases' '/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' +) + + +class SwAV(nn.Module): + def __init__(self): + super().__init__() + self.model = torch.hub.load('facebookresearch/swav', 'resnet50', pretrained=True) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + return y + + +class Vgg16(nn.Module): + def __init__(self): + super().__init__() + self.model = vgg16(pretrained=True, init_weights=False) + self.model.classifier = torch.nn.Sequential(*[self.model.classifier[i] for i in range(4)]) + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate(x, size=(224, 224), mode='bicubic', align_corners=align_corners)) + return y + + +class InceptionV3(nn.Module): + def __init__(self): + super().__init__() + self.model = inception_v3(transform_input=False, pretrained=True, init_weights=False) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + return y + + +class TFInceptionV3(nn.Module): + def __init__(self): + super().__init__() + self.model = inception_v3( + transform_input=False, num_classes=1008, aux_logits=False, pretrained=False, init_weights=False + ) + self.model.Mixed_5b = FIDInceptionA(192, pool_features=32) + self.model.Mixed_5c = FIDInceptionA(256, pool_features=64) + self.model.Mixed_5d = FIDInceptionA(288, pool_features=64) + self.model.Mixed_6b = FIDInceptionC(768, channels_7x7=128) + self.model.Mixed_6c = FIDInceptionC(768, channels_7x7=160) + self.model.Mixed_6d = FIDInceptionC(768, channels_7x7=160) + self.model.Mixed_6e = FIDInceptionC(768, channels_7x7=192) + self.model.Mixed_7b = FIDInceptionE_1(1280) + self.model.Mixed_7c = FIDInceptionE_2(2048) + + state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True, map_location='cpu') + self.model.load_state_dict(state_dict) + self.model.fc = torch.nn.Sequential() + + def forward(self, x, align_corners=True): + # x = apply_imagenet_normalization(x) + y = self.model(F.interpolate(x, size=(299, 299), mode='bicubic', align_corners=align_corners)) + return y + + +class FIDInceptionA(inception.InceptionA): + """InceptionA block patched for FID computation""" + + def __init__(self, in_channels, pool_features): + super(FIDInceptionA, self).__init__(in_channels, pool_features) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionC(inception.InceptionC): + """InceptionC block patched for FID computation""" + + def __init__(self, in_channels, channels_7x7): + super(FIDInceptionC, self).__init__(in_channels, channels_7x7) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionE_1(inception.InceptionE): + """First InceptionE block patched for FID computation""" + + def __init__(self, in_channels): + super(FIDInceptionE_1, self).__init__(in_channels) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + # Patch: Tensorflow's average pool does not use the padded zero's in + # its average calculation + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, count_include_pad=False) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class FIDInceptionE_2(inception.InceptionE): + """Second InceptionE block patched for FID computation""" + + def __init__(self, in_channels): + super(FIDInceptionE_2, self).__init__(in_channels) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + # Patch: The FID Inception model uses max pooling instead of average + # pooling. This is likely an error in this specific Inception + # implementation, as other Inception models use average pooling here + # (which matches the description in the paper). + branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) diff --git a/scripts/fid-eval-text2img/compute_clip_score.py b/scripts/fid-eval-text2img/compute_clip_score.py new file mode 100644 index 000000000000..da587a9c3c32 --- /dev/null +++ b/scripts/fid-eval-text2img/compute_clip_score.py @@ -0,0 +1,120 @@ +""" +python clip_script.py --captions_path /path/to/coco2014_val/captions \ + --fid_images_path /path/to/synthetic_images \ + --output_path /path/to/output/clip_scores.csv + +1. `--captions_path`: The path to the real images captions directory. In this example, + it is set to `/path/to/coco2014_val/captions`. This path should point to the + directory containing the COCO 2014 validation dataset captions. + +2. `--fid_images_path`: The path to the directory containing subfolders with synthetic + images. In this example, it is set to `/path/to/synthetic_images`. Each subfolder + should contain a set of synthetic images for which you want to compute CLIP scores + against the captions from `--captions_path`. + +3. `--output_path`: The path to the output CSV file where the CLIP scores will be saved. + In this example, it is set to `/path/to/output/clip_scores.csv`. This file will + contain a table with two columns: `cfg` and `clip_score`. The `cfg` + column lists the names of the subfolders in `--fid_images_path`, and the + `clip_score` column lists the corresponding average CLIP scores between the synthetic + images in each subfolder and the captions from `--captions_path`. +""" + +import argparse +import csv +import os +from glob import glob + +import open_clip +import torch +import torch.nn as nn +from PIL import Image +from tqdm import tqdm + + +class CLIPEncoder(nn.Module): + def __init__(self, clip_version='ViT-B/32', pretrained='', cache_dir=None, device='cuda'): + super().__init__() + + self.clip_version = clip_version + if not pretrained: + if self.clip_version == 'ViT-H-14': + self.pretrained = 'laion2b_s32b_b79k' + elif self.clip_version == 'ViT-g-14': + self.pretrained = 'laion2b_s12b_b42k' + else: + self.pretrained = 'openai' + + self.model, _, self.preprocess = open_clip.create_model_and_transforms( + self.clip_version, pretrained=self.pretrained, cache_dir=cache_dir + ) + + self.model.eval() + self.model.to(device) + + self.device = device + + @torch.no_grad() + def get_clip_score(self, text, image): + if isinstance(image, str): # filenmae + image = Image.open(image) + if isinstance(image, Image.Image): # PIL Image + image = self.preprocess(image).unsqueeze(0).to(self.device) + image_features = self.model.encode_image(image).float() + image_features /= image_features.norm(dim=-1, keepdim=True) + + if not isinstance(text, (list, tuple)): + text = [text] + text = open_clip.tokenize(text).to(self.device) + text_features = self.model.encode_text(text).float() + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = image_features @ text_features.T + + return similarity + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--captions_path', default='/coco2014/coco2014_val_sampled_30k/captions/', type=str) + parser.add_argument('--fid_images_path', default=None, type=str) + parser.add_argument('--output_path', default='./clip_scores.csv', type=str) + args = parser.parse_args() + + captions_path = args.captions_path + print('Init CLIP Encoder..') + encoder = CLIPEncoder(clip_version='ViT-L-14') + + # Create output CSV file + with open(args.output_path, 'w', newline='') as csvfile: + fieldnames = ['cfg', 'clip_score'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # Iterate through subfolders in fid_images_path + for subfolder in os.listdir(args.fid_images_path): + subfolder_path = os.path.join(args.fid_images_path, subfolder) + if os.path.isdir(subfolder_path): + images = sorted( + glob(f'{subfolder_path}/*.png'), key=lambda x: (int(x.split('/')[-1].strip('.png').split('_')[1])) + ) + texts = sorted(glob(f'{captions_path}/*.txt')) + print(images[:5], texts[:5]) + assert len(images) == len(texts) + print(f'Number of images text pairs: {len(images)}') + + ave_sim = 0.0 + count = 0 + for text, img in zip(tqdm(texts), images): + with open(text, 'r') as f: + text = f.read().strip() + sim = encoder.get_clip_score(text, img) + ave_sim += sim + count += 1 + if count % 2000 == 0: + print(ave_sim / count) + + ave_sim /= count + print(f'The CLIP similarity for CFG {subfolder}: {ave_sim}') + + # Write CLIP score to output CSV file + writer.writerow({'cfg': subfolder, 'clip_score': ave_sim}) diff --git a/scripts/fid-eval-text2img/compute_fid.py b/scripts/fid-eval-text2img/compute_fid.py new file mode 100644 index 000000000000..cbeb81e1e4a7 --- /dev/null +++ b/scripts/fid-eval-text2img/compute_fid.py @@ -0,0 +1,409 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections +import os + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn.functional as F +from scipy import linalg +from TFinception_V3 import InceptionV3, SwAV, TFInceptionV3, Vgg16 +from torch import nn + + +def network_init(network='inception'): + # inception = inception_v3(pretrained=True, transform_input=False) + # inception = inception.to('cuda') + # inception.eval() + # inception.fc = torch.nn.Sequential() + + if dist.is_initialized() and not is_local_master(): + # Make sure only the first process in distributed training downloads + # the model, and the others will use the cache + # noinspection PyUnresolvedReferences + torch.distributed.barrier() + + if network == 'tf_inception': + model = TFInceptionV3() + elif network == 'inception': + model = InceptionV3() + elif network == 'vgg16': + model = Vgg16() + elif network == 'swav': + model = SwAV() + elif network == 'clean_inception': + model = CleanInceptionV3() + else: + raise NotImplementedError(f'Network "{network}" is not supported!') + + if dist.is_initialized() and is_local_master(): + # Make sure only the first process in distributed training downloads + # the model, and the others will use the cache + # noinspection PyUnresolvedReferences + dist.barrier() + + model = model.to('cuda').eval() + return model + + +def _calculate_frechet_distance(act_1, act_2, eps=1e-6): + mu1 = np.mean(act_1.cpu().numpy(), axis=0) + sigma1 = np.cov(act_1.cpu().numpy(), rowvar=False) + mu2 = np.mean(act_2.cpu().numpy(), axis=0) + sigma2 = np.cov(act_2.cpu().numpy(), rowvar=False) + mu1 = np.atleast_1d(mu1) + mu2 = np.atleast_1d(mu2) + sigma1 = np.atleast_2d(sigma1) + sigma2 = np.atleast_2d(sigma2) + assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths' + assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions' + diff = mu1 - mu2 + # Product might be almost singular + covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) + if not np.isfinite(covmean).all(): + msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps + print(msg) + offset = np.eye(sigma1.shape[0]) * eps + covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) + + # Numerical error might give slight imaginary component + if np.iscomplexobj(covmean): + if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): + m = np.max(np.abs(covmean.imag)) + print('Imaginary component {}'.format(m)) + # raise ValueError('Imaginary component {}'.format(m)) + covmean = covmean.real + tr_covmean = np.trace(covmean) + return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean)} + + +def is_master(): + r"""check if current process is the master""" + return get_rank() == 0 + + +def get_rank(): + r"""Get rank of the thread.""" + rank = 0 + if dist.is_available(): + if dist.is_initialized(): + rank = dist.get_rank() + return rank + + +def is_local_master(): + return torch.cuda.current_device() == 0 + + +def load_or_compute_activations( + act_path, + data_loader, + key_real, + key_fake, + generator=None, + sample_size=None, + preprocess=None, + is_video=False, + few_shot_video=False, + network='inception', + **kwargs, +): + r"""Load mean and covariance from saved npy file if exists. Otherwise, compute the mean and covariance. + + Args: + act_path (str or None): Location for the numpy file to store or to load the activations. + data_loader (obj): PyTorch dataloader object. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + generator (obj): PyTorch trainer network. + sample_size (int): How many samples to be used for computing the KID. + preprocess (func): The preprocess function to be applied to the data. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + network (str): Which recognition backbone to use. + Returns: + (torch.Tensor) Feature activations. + """ + if act_path is not None and os.path.exists(act_path): + # Loading precomputed activations. + print('Load activations from {}'.format(act_path)) + act = torch.load(act_path, map_location='cpu').cuda() + else: + # Compute activations. + if is_video: + act = get_video_activations( + data_loader, key_real, key_fake, generator, sample_size, preprocess, few_shot_video, network, **kwargs + ) + else: + act = get_activations( + data_loader, key_real, key_fake, generator, sample_size, preprocess, True, network, **kwargs + ) + if act_path is not None and is_local_master(): + print('Save activations to {}'.format(act_path)) + if not os.path.exists(os.path.dirname(act_path)): + os.makedirs(os.path.dirname(act_path), exist_ok=True) + torch.save(act, act_path) + return act + + +@torch.no_grad() +def compute_fid( + fid_path, + data_loader, + net_G, + key_real='images', + key_fake='fake_images', + sample_size=None, + preprocess=None, + return_act=False, + is_video=False, + few_shot_video=False, + **kwargs, +): + r"""Compute the fid score. + + Args: + fid_path (str): Location for the numpy file to store or to load the statistics. + data_loader (obj): PyTorch dataloader object. + net_G (obj): For image generation modes, net_G is the generator network. + For video generation models, net_G is the trainer. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + sample_size (int or tuple): How many samples to be used. + preprocess (func): The preprocess function to be applied to the data. + return_act (bool): If ``True``, also returns feature activations of + real and fake data. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + Returns: + (float): FID value. + """ + print('Computing FID.') + act_path = os.path.join(os.path.dirname(fid_path), 'activations_real.npy') + # Get the fake mean and covariance. + fake_act = load_or_compute_activations( + None, + data_loader, + key_real, + key_fake, + net_G, + sample_size, + preprocess, + is_video=is_video, + few_shot_video=few_shot_video, + **kwargs, + ) + + # Get the ground truth mean and covariance. + real_act = load_or_compute_activations( + act_path, + data_loader, + key_real, + key_fake, + None, + sample_size, + preprocess, + is_video=is_video, + few_shot_video=few_shot_video, + **kwargs, + ) + + if is_master(): + fid = _calculate_frechet_distance(fake_act, real_act)["FID"] + if return_act: + return fid, real_act, fake_act + else: + return fid + elif return_act: + return None, None, None + else: + return None + + +def get_world_size(): + r"""Get world size. How many GPUs are available in this job.""" + world_size = 1 + if dist.is_available(): + if dist.is_initialized(): + world_size = dist.get_world_size() + return world_size + + +def dist_all_gather_tensor(tensor): + r""" gather to all ranks """ + world_size = get_world_size() + if world_size < 2: + return [tensor] + tensor_list = [torch.ones_like(tensor) for _ in range(dist.get_world_size())] + with torch.no_grad(): + dist.all_gather(tensor_list, tensor) + return tensor_list + + +def to_device(data, device): + r"""Move all tensors inside data to device. + + Args: + data (dict, list, or tensor): Input data. + device (str): 'cpu' or 'cuda'. + """ + assert device in ['cpu', 'cuda'] + string_classes = (str, bytes) + if isinstance(data, torch.Tensor): + data = data.to(torch.device(device)) + return data + elif isinstance(data, collections.abc.Mapping): + return type(data)({key: to_device(data[key], device) for key in data}) + elif isinstance(data, collections.abc.Sequence) and not isinstance(data, string_classes): + return type(data)([to_device(d, device) for d in data]) + else: + return data + + +def to_cuda(data): + r"""Move all tensors inside data to gpu. + + Args: + data (dict, list, or tensor): Input data. + """ + return to_device(data, 'cuda') + + +@torch.no_grad() +def get_activations( + data_loader, + key_real, + key_fake, + generator=None, + sample_size=None, + preprocess=None, + align_corners=True, + network='inception', + **kwargs, +): + r"""Compute activation values and pack them in a list. + + Args: + data_loader (obj): PyTorch dataloader object. + key_real (str): Dictionary key value for the real data. + key_fake (str): Dictionary key value for the fake data. + generator (obj): PyTorch trainer network. + sample_size (int): How many samples to use for FID. + preprocess (func): Pre-processing function to use. + align_corners (bool): The ``'align_corners'`` parameter to be used for `torch.nn.functional.interpolate`. + network (str): Which recognition backbone to use. + Returns: + batch_y (tensor): Inception features of the current batch. Note that only the master gpu will get it. + """ + model = network_init(network) + batch_y = [] + world_size = get_world_size() + + # Iterate through the dataset to compute the activation. + for it, data in enumerate(data_loader): + data = to_cuda(data) + # Preprocess the data. + if preprocess is not None: + data = preprocess(data) + # Load real data if the generator is not specified. + if generator is None: + images = data[key_real] + if torch.max(images) > 1: + images = images / 255.0 # convert RGB to (0,1) + else: + # Compute the generated image. + text = data[1]['caption'] ### input is captions + net_G_output = generator(text, **kwargs) + images = net_G_output + # Clamp the image for models that do not set the output to between + # -1, 1. For models that employ tanh, this has no effect. + images.clamp_(-1, 1) + y = model(images, align_corners=align_corners) + # y = network_forward(model, images, align_corners=align_corners) + batch_y.append(y) + if sample_size is not None and data_loader.batch_size * world_size * (it + 1) >= sample_size: + # Reach the number of samples we need. + break + + batch_y = torch.cat(dist_all_gather_tensor(torch.cat(batch_y))) + if sample_size is not None: + batch_y = batch_y[:sample_size] + print(f"Computed feature activations of size {batch_y.shape}") + return batch_y + + +@torch.no_grad() +def compute_fid_data( + folder_to_store_real_act, + data_loader_a, + data_loader_b, + key_a='images', + key_b='images', + sample_size=None, + is_video=False, + few_shot_video=False, + network='inception', + **kwargs, +): + r"""Compute the fid score between two datasets. + + Args: + folder_to_store_real_act (str): Location to store the statistics or to load the statistics. + data_loader_a (obj): PyTorch dataloader object for dataset a. + data_loader_b (obj): PyTorch dataloader object for dataset b. + key_a (str): Dictionary key value for images in the dataset a. + key_b (str): Dictionary key value for images in the dataset b. + sample_size (int or None): How many samples to be used for computing the FID. + is_video (bool): Whether we are handling video sequences. + few_shot_video (bool): If ``True``, uses few-shot video synthesis. + network (str): Which recognition backbone to use. + Returns: + (float): FID value. + """ + print('Computing FID.') + if folder_to_store_real_act is None: + path_a = None + else: + path_a = os.path.join(os.path.dirname(folder_to_store_real_act), 'activations_a.npy') + # min_data_size = min(len(data_loader_a.dataset), len(data_loader_b.dataset)) + # sample_size = min_data_size if sample_size is None else min(sample_size, min_data_size) + + act_a = load_or_compute_activations( + path_a, + data_loader_a, + key_a, + key_b, + None, + sample_size=sample_size, + is_video=is_video, + few_shot_video=few_shot_video, + network=network, + **kwargs, + ) + act_b = load_or_compute_activations( + None, + data_loader_b, + key_a, + key_b, + None, + sample_size=sample_size, + is_video=is_video, + few_shot_video=few_shot_video, + network=network, + **kwargs, + ) + print(act_a.shape, act_b.shape) + if is_master(): + return _calculate_frechet_distance(act_a, act_b)["FID"] diff --git a/scripts/fid-eval-text2img/eval_fid.py b/scripts/fid-eval-text2img/eval_fid.py new file mode 100644 index 000000000000..d6312fad843a --- /dev/null +++ b/scripts/fid-eval-text2img/eval_fid.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Example usage: + python eval_fid.py \ + --coco_images_path /path/to/coco2014_val \ + --fid_images_path /path/to/synthetic_images \ + --output_path /path/to/output/fid_scores.csv + +1. `--coco_images_path`: The path to the real images directory. In this example, + it is set to `/path/to/coco2014_val`. This path should point to the + directory containing the COCO 2014 validation dataset images, resized + to 256x256 pixels. + +2. `--fid_images_path`: The path to the directory containing subfolders + with synthetic images. In this example, it is set to + `/path/to/synthetic_images`. Each subfolder should contain a + set of synthetic images for which you want to compute FID scores + against the real images from `--coco_images_path`. + +3. `--output_path`: The path to the output CSV file where the FID scores + will be saved. In this example, it is set to + `/path/to/output/fid_scores.csv`. This file will contain a table with + two columns: `cfg` and `fid`. The `cfg` column lists the + names of the subfolders in `--fid_images_path`, and the `fid` column + lists the corresponding FID scores between the synthetic images in + each subfolder and the real images from `--coco_images_path`. +""" + +import argparse +import csv +import os +import torch + +from compute_fid import compute_fid_data +from fid_dataset import CustomDataset + +if __name__ == '__main__': + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument('--coco_images_path', default='/coco2014/coco2014_val/images_256', type=str) + parser.add_argument('--fid_images_path', default=None, type=str) + parser.add_argument('--output_path', default='./fid_scores.csv', type=str) + args = parser.parse_args() + + # Set paths for synthetic images and real images + fid_images_path = args.fid_images_path + real_path = args.coco_images_path + + # Create dataset and data loader for real images + real_dataset = CustomDataset(real_path) + loader_real = torch.utils.data.DataLoader( + real_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False + ) + + # Create output CSV file + with open(args.output_path, 'w', newline='') as csvfile: + fieldnames = ['cfg', 'fid'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # Iterate through subfolders in fid_images_path + for subfolder in os.listdir(fid_images_path): + subfolder_path = os.path.join(fid_images_path, subfolder) + if os.path.isdir(subfolder_path): + # Create dataset and data loader for synthetic images in subfolder + synthetic_dataset = CustomDataset(subfolder_path, target_size=256) + loader_synthetic = torch.utils.data.DataLoader( + synthetic_dataset, batch_size=32, num_workers=0, pin_memory=True, drop_last=False + ) + + # Compute FID score between synthetic images in subfolder and real images + fid = compute_fid_data( + './', + loader_real, + loader_synthetic, + key_a=0, + key_b=0, + sample_size=None, + is_video=False, + few_shot_video=False, + network='tf_inception', + interpolation_mode='bilinear', + ) + + print(f"The FID score between {subfolder_path} and {real_path} is {fid}") + + # Write FID score to output CSV file + writer.writerow({'cfg': subfolder, 'fid': fid}) diff --git a/scripts/fid-eval-text2img/fid_dataset.py b/scripts/fid-eval-text2img/fid_dataset.py new file mode 100644 index 000000000000..6da1db7cd00c --- /dev/null +++ b/scripts/fid-eval-text2img/fid_dataset.py @@ -0,0 +1,128 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import numpy as np +import torch +import torch.utils.data as data +import torchvision.transforms as transforms +from PIL import Image +from pycocotools.coco import COCO +from torchvision.io import ImageReadMode, read_image + + +def _pil_interp(method): + if method == 'bicubic': + return Image.BICUBIC + elif method == 'lanczos': + return Image.LANCZOS + elif method == 'hamming': + return Image.HAMMING + else: + # default bilinear, do we want to allow nearest? + return Image.BILINEAR + + +def _size_tuple(size): + if isinstance(size, int): + return size, size + else: + assert len(size) == 2 + return size + + +class CenterCropResize: + def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)): + self.target_size = _size_tuple(target_size) + self.interpolation = interpolation + self.fill_color = fill_color + + def __call__(self, img): + w, h = img.size + img = np.array(img).astype(np.uint8) + crop = min(w, h) + img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] + image = Image.fromarray(img) + if self.target_size is not None: + interp_method = _pil_interp(self.interpolation) + new_img = image.resize(self.target_size, resample=interp_method) + return new_img + + +class CustomDataset(data.Dataset): + def __init__(self, root, target_size=None): + self.root = root + self.files = [f for f in os.listdir(self.root) if os.path.isfile(os.path.join(self.root, f))] + self.transform = transforms.ToTensor() + self.target_size = target_size + + def __len__(self): + return len(self.files) + + def __getitem__(self, index): + file = self.files[index] + # image = read_image(os.path.join(self.root, file), mode=ImageReadMode.RGB).type(torch.float32) / 255 + image = Image.open(os.path.join(self.root, file)).convert('RGB') + if self.target_size is not None: + image = image.resize((self.target_size, self.target_size), resample=Image.BICUBIC) + image = self.transform(image) + image = 2 * image - 1 + return image, file + + +class CocoDataset(data.Dataset): + def __init__(self, root, ann_file, captions, transform=None, target_size=None): + self.root = root + self.coco = None + self.captions = captions + self.img_ids = [x['image_id'] for x in self.captions] + self.has_annotations = 'image_info' not in ann_file + self.transforms = [transforms.ToTensor()] + if transform is not None: + self.transforms.append(transform) + self.target_size = target_size + self.img_ids_invalid = [] + self.img_infos = [] + self._load_annotations(ann_file) + + def _load_annotations(self, ann_file): + assert self.coco is None + self.coco = COCO(ann_file) + img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) + for img_id in self.img_ids: + info = self.coco.loadImgs([img_id])[0] + valid_annotation = not self.has_annotations or img_id in img_ids_with_ann + if valid_annotation and min(info['width'], info['height']) >= 32: + self.img_infos.append(info) + else: + self.img_ids_invalid.append(img_id) + + def __len__(self): + return len(self.img_infos) + + def _compose(self, image): + for t in self.transforms[::-1]: + image = t(image) + return image + + def __getitem__(self, index): + img_id = self.img_ids[index] + img_info = self.img_infos[index] + cap = self.captions[index] + path = img_info['file_name'] + image = Image.open(os.path.join(self.root, path)).convert('RGB') + if self.target_size is not None: + image = image.resize((512, 512)) + image = self._compose(image) + return image, cap diff --git a/scripts/fid-eval-text2img/plot.py b/scripts/fid-eval-text2img/plot.py new file mode 100644 index 000000000000..e9217f4d6e72 --- /dev/null +++ b/scripts/fid-eval-text2img/plot.py @@ -0,0 +1,40 @@ +""" +python plot_fid_vs_clip.py \ + --fid_scores_csv path/to/fid_scores.csv \ + --clip_scores_csv path/to/clip_scores.csv +Replace path/to/fid_scores.csv and path/to/clip_scores.csv with the paths +to the respective CSV files. The script will display the plot with FID +scores against CLIP scores, with cfg values annotated on each point. +""" + +import argparse + +import matplotlib.pyplot as plt +import pandas as pd + + +def plot_fid_vs_clip(fid_scores_csv, clip_scores_csv): + fid_scores = pd.read_csv(fid_scores_csv) + clip_scores = pd.read_csv(clip_scores_csv) + merged_data = pd.merge(fid_scores, clip_scores, on='cfg') + + fig, ax = plt.subplots() + ax.plot(merged_data['clip_score'], merged_data['fid'], marker='o', linestyle='-') # Connect points with a line + + for i, txt in enumerate(merged_data['cfg']): + ax.annotate(txt, (merged_data['clip_score'][i], merged_data['fid'][i])) + + ax.set_xlabel('CLIP Score') + ax.set_ylabel('FID') + ax.set_title('FID vs CLIP Score') + + plt.show() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--fid_scores_csv', required=True, type=str, help='Path to the FID scores CSV file') + parser.add_argument('--clip_scores_csv', required=True, type=str, help='Path to the CLIP scores CSV file') + args = parser.parse_args() + + plot_fid_vs_clip(args.fid_scores_csv, args.clip_scores_csv) diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py new file mode 100644 index 000000000000..781757c5869f --- /dev/null +++ b/tests/collections/multimodal/test_clip_model.py @@ -0,0 +1,482 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.data.clip.clip_dataset import build_train_valid_datasets +from nemo.collections.multimodal.models.clip.megatron_clip_models import ( + CLIPModel, + CLIPTextTransformer, + CLIPVisionTransformer, + MegatronCLIPModel, +) +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + + model_cfg_string = """ + precision: 16 + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 2 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + # multimodal configs + output_dim: 64 + local_loss: False # calculate loss w/ local features @ global (instead of realizing full global @ global matrix) + gather_with_grad: True # enable full distributed gradient for feature gather, set this to False may cause convergence issue + + vision: + precision: 16 + # vision configs + patch_dim: 16 + img_h: 224 + img_w: 224 + image_mean: null + image_std: null + num_channels: 3 + drop_patch_rate: 0.0 + drop_path_rate: 0.0 + global_average_pool: False + output_dim: 64 + class_token_length: 8 + preprocess_layernorm: True # apply layer norm to embedded tokens + + # model architecture + encoder_seq_length: 196 + max_position_embeddings: 196 + position_embedding_type: learned_absolute + num_layers: 2 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + + text: + precision: 16 + # text configs + output_dim: 64 + + # model architecture + encoder_seq_length: 77 + max_position_embeddings: 77 + position_embedding_type: learned_absolute + num_layers: 2 + hidden_size: 512 + ffn_hidden_size: 2048 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 8 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0. # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'huggingface' + type: 'openai/clip-vit-large-patch14' + model: null + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency. + + data: + num_workers: 1 + dataset_type: webdataset + + train: + data_path: # List of paths to pkl files or tar files + - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_{000000..000001}.tar + drop_last: True # drop_last = False is not implemented yet + validation: # List of paths to pkl files or tar files + data_path: + - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_000002.tar + drop_last: True # drop_last = False is not implemented yet + webdataset: + object_store: False + bucket: datasets + pbss_credentials_file: pbss_credential + local_root_path: / # tar files local root path + chunk_size: 1000 # if data path is list of tar files, chunk_size needs to be provided + + imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 1e-3 + weight_decay: 0.2 + betas: + - 0.9 + - 0.98 + sched: + name: CosineAnnealing + warmup_steps: 2000 + constant_steps: 0 + min_lr: 1e-5 + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 + max_steps: 4 + log_every_n_steps: 1 + val_check_interval: 4 + limit_val_batches: 2 + limit_test_batches: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def clip_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['vision']['precision'] = precision + model_cfg['text']['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronCLIPModel(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + + +def build_datasets(cfg, tokenizer): + return build_train_valid_datasets(model_cfg=cfg, consumed_samples=0, tokenizer=tokenizer,) + + +@pytest.mark.run_only_on('GPU') +class TestMegatronCLIPModel: + @pytest.mark.unit + def test_constructor(self, clip_trainer_and_model): + clip_model = clip_trainer_and_model[1] + assert isinstance(clip_model, MegatronCLIPModel) + + num_weights = clip_model.num_weights + assert num_weights == 46643969 + + @pytest.mark.unit + def test_build_dataset(self, clip_trainer_and_model, test_data_dir): + clip_model = clip_trainer_and_model[1] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=clip_model.cfg, consumed_samples=0, tokenizer=clip_model.tokenizer, + ) + assert len(train_ds) == 2000 + assert len(validation_ds) == 1000 + sample = next(iter(train_ds)) + assert "captions" in sample + assert "images" in sample + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + @pytest.mark.unit + def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): + trainer, clip_model = clip_trainer_and_model + + dtype = None + if clip_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif clip_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif clip_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {clip_model.cfg['precision']} is not supported.") + + clip_model.eval() + _, validation_ds = build_datasets(clip_model.cfg, clip_model.tokenizer) + + val_loader = torch.utils.data.DataLoader(validation_ds, batch_size=4) + batch = next(iter(val_loader)) + + tokens = batch["images"] + texts = batch["captions"] + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = clip_model(image=tokens.cuda(), text=texts.cuda(),) + # output is (B, #classes) + # assert output_tensor.shape == torch.Size([B, clip_model.cfg['num_classes']]) + # assert output_tensor.dtype == dtype + + # @pytest.mark.unit + # def test_vit_backbone(self, model_cfg, trainer_cfg, precision): + # initialize_model_parallel_for_nemo( + # world_size=1, + # global_rank=0, + # local_rank=0, + # tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), + # pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), + # virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), + # pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), + # micro_batch_size=model_cfg.get('micro_batch_size'), + # global_batch_size=model_cfg.get('global_batch_size'), + # seed=model_cfg.get('seed', 1234), + # apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), + # ) + # + # dtype = None + # if trainer_cfg['precision'] in [32, '32', '32-true']: + # dtype = torch.float + # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: + # dtype = torch.float16 + # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: + # dtype = torch.bfloat16 + # else: + # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + # + # vit_backbone = VitBackbone( + # model_cfg, + # init_method=None, + # scaled_init_method=None, + # pre_process=True, + # post_process=True, + # single_token_output=True + # ).cuda() + # vit_backbone.eval() + # + # # shape: (B, C, H, W) + # tokens = torch.rand((6, 3, 224, 224)) + # + # with torch.no_grad(): + # B, C, H, W = tokens.shape + # assert H == W + # with torch.autocast('cuda', dtype=dtype): + # output_tensor = vit_backbone( + # tokens.cuda(), + # ) + # # output is (B, #classes) + # assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) + # assert output_tensor.dtype == dtype + # + # @pytest.mark.unit + # def test_vit_head(self, model_cfg, trainer_cfg, precision): + # dtype = None + # if trainer_cfg['precision'] in [32, '32', '32-true']: + # dtype = torch.float + # elif trainer_cfg['precision'] in [16, '16', '16-mixed']: + # dtype = torch.float16 + # elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: + # dtype = torch.bfloat16 + # else: + # raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + # + # vit_head = VitMlpHead( + # 24, 50, + # ).cuda() + # vit_head.eval() + # + # hidden = torch.rand((6, 24)) + # + # with torch.no_grad(): + # with torch.autocast('cuda', dtype=dtype): + # output_tensor = vit_head( + # hidden.cuda(), + # ) + # # output is (B, #classes) + # assert output_tensor.shape == torch.Size([6, 50]) + # assert output_tensor.dtype == dtype diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py new file mode 100644 index 000000000000..e50106957679 --- /dev/null +++ b/tests/collections/vision/test_vit_model.py @@ -0,0 +1,379 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.vision.data.megatron.vit_dataset import build_train_valid_datasets +from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel +from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + model_cfg_string = """ + precision: 16 + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 4 # will use more micro batches to reach global batch size + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # vision configs + vision_pretraining_type: "classify" + num_classes: 1000 + patch_dim: 16 + img_h: 224 + img_w: 224 + classes_fraction: 1.0 + data_per_class_fraction: 1.0 + num_channels: 3 + drop_path_rate: 0.0 + + # model architecture + encoder_seq_length: 4 + max_position_embeddings: ${.encoder_seq_length} + num_layers: 12 + hidden_size: 768 + ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 12 + init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.1 # Dropout probability for hidden state transformer. + attention_dropout: 0. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: layernorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + ## Activation Checkpointing + # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. + # These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+). + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + # 'full' will checkpoint the entire transformer layer. + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + # 'uniform' divides the total number of transformer layers and checkpoints the input activation + # of each chunk at the specified granularity + # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity + activations_checkpoint_num_layers: null # not used with 'selective' + # when using 'uniform' this creates groups of transformer layers to checkpoint. Usually set to 1. Increase to save more memory. + # when using 'block' this this will checkpoint the first activations_checkpoint_num_layers per pipeline stage. + + ## Sequence Parallelism + # Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms and dropout sequentially + # See Reducing Activation Recomputation in Large Transformer Models: https://arxiv.org/abs/2205.05198 for more details. + sequence_parallel: False + + data: + # Path to image dataset must be specified by the user. + # Supports List + # List: can override from the CLI: "model.data.data_prefix=[/path/to/train, /path/to/val]", + data_path: "dummy/path" + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + data_sharding: False + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [0] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.999 + sched: + name: CosineAnnealing + warmup_steps: 10000 + constant_steps: 0 + min_lr: 1e-5 + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 + max_steps: 4 + log_every_n_steps: 1 + val_check_interval: 4 + limit_val_batches: 2 + limit_test_batches: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_vit_classify + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def vit_classification_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronVitClassificationModel(cfg=cfg, trainer=trainer) + + return trainer, model + + +def build_datasets(cfg, test_data_dir): + data_path = [ + os.path.join(test_data_dir, "vision/tiny_imagenet/train"), + os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + ] + return build_train_valid_datasets(model_cfg=cfg, data_path=data_path, image_size=(cfg.img_h, cfg.img_w),) + + +@pytest.mark.run_only_on('GPU') +class TestMegatronVitClassificationModel: + @pytest.mark.unit + def test_constructor(self, vit_classification_trainer_and_model): + vit_classification_model = vit_classification_trainer_and_model[1] + assert isinstance(vit_classification_model, MegatronVitClassificationModel) + + num_weights = vit_classification_model.num_weights + assert num_weights == 87169000 + + @pytest.mark.unit + def test_build_dataset(self, vit_classification_trainer_and_model, test_data_dir): + vit_classification_model = vit_classification_trainer_and_model[1] + data_path = [ + os.path.join(test_data_dir, "vision/tiny_imagenet/train"), + os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + ] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=vit_classification_model.cfg, + data_path=data_path, + image_size=(vit_classification_model.cfg.img_h, vit_classification_model.cfg.img_w), + ) + assert len(train_ds) == 20 + assert len(validation_ds) == 20 + assert train_ds[0][0].shape == torch.Size([3, 224, 224]) + assert validation_ds[0][0].shape == torch.Size([3, 224, 224]) + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + @pytest.mark.unit + def test_forward(self, vit_classification_trainer_and_model, test_data_dir): + trainer, vit_classification_model = vit_classification_trainer_and_model + + dtype = None + if vit_classification_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif vit_classification_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif vit_classification_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {vit_classification_model.cfg['precision']} is not supported.") + + vit_classification_model.eval() + _, validation_ds = build_datasets(vit_classification_model.cfg, test_data_dir) + + # shape: (B, C, H, W) + images = [validation_ds[i][0] for i in range(4)] + tokens = torch.stack(images, dim=0) + + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_classification_model.forward(tokens=tokens.cuda(),) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([B, vit_classification_model.cfg['num_classes']]) + assert output_tensor.dtype == dtype + + @pytest.mark.unit + def test_vit_backbone(self, model_cfg, trainer_cfg, precision): + initialize_model_parallel_for_nemo( + world_size=1, + global_rank=0, + local_rank=0, + tensor_model_parallel_size=model_cfg.get('tensor_model_parallel_size', 1), + pipeline_model_parallel_size=model_cfg.get('pipeline_model_parallel_size', 1), + virtual_pipeline_model_parallel_size=model_cfg.get('virtual_pipeline_model_parallel_size', None), + pipeline_model_parallel_split_rank=model_cfg.get('pipeline_model_parallel_split_rank', 0), + micro_batch_size=model_cfg.get('micro_batch_size'), + global_batch_size=model_cfg.get('global_batch_size'), + seed=model_cfg.get('seed', 1234), + apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), + ) + + dtype = None + if trainer_cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif trainer_cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + + vit_backbone = VitBackbone( + model_cfg, + init_method=None, + scaled_init_method=None, + pre_process=True, + post_process=True, + single_token_output=True, + ).cuda() + vit_backbone.eval() + + # shape: (B, C, H, W) + tokens = torch.rand((6, 3, 224, 224)) + + with torch.no_grad(): + B, C, H, W = tokens.shape + assert H == W + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_backbone(tokens.cuda(),) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([B, model_cfg['hidden_size']]) + assert output_tensor.dtype == dtype + + @pytest.mark.unit + def test_vit_head(self, model_cfg, trainer_cfg, precision): + dtype = None + if trainer_cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif trainer_cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif trainer_cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {trainer_cfg['precision']} is not supported.") + + vit_head = VitMlpHead(24, 50,).cuda() + vit_head.eval() + + hidden = torch.rand((6, 24)) + + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + output_tensor = vit_head(hidden.cuda(),) + # output is (B, #classes) + assert output_tensor.shape == torch.Size([6, 50]) + assert output_tensor.dtype == dtype diff --git a/utils/flash-attention.patch b/utils/flash-attention.patch new file mode 100644 index 000000000000..3587ffd57257 --- /dev/null +++ b/utils/flash-attention.patch @@ -0,0 +1,87 @@ +diff --git a/csrc/flash_attn/fmha_api.cpp b/csrc/flash_attn/fmha_api.cpp +index 6602a6c..19d1551 100644 +--- a/csrc/flash_attn/fmha_api.cpp ++++ b/csrc/flash_attn/fmha_api.cpp +@@ -207,6 +207,11 @@ mha_fwd(const at::Tensor &q, // total_q x num_heads x head_size, total_q + bool is_sm75 = dprops->major == 7 && dprops->minor == 5; + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } + TORCH_CHECK(is_sm8x || is_sm75); + auto stream = at::cuda::getCurrentCUDAStream().stream(); + bool is_dropout = p_dropout > 0.0; +@@ -359,6 +364,11 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size + bool is_sm75 = dprops->major == 7 && dprops->minor == 5; + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } + TORCH_CHECK(is_sm8x || is_sm75); + auto launch = &run_fmha_bwd; + +@@ -407,7 +417,7 @@ mha_bwd(const at::Tensor &dout, // total_q x num_heads, x head_size + TORCH_CHECK(batch_size > 0); + TORCH_CHECK((head_size % 8 == 0) && (head_size <= 128)); + if (head_size > 64) { // TODO: eventually we should support SM86 and SM70 with d=128 as well +- TORCH_CHECK(is_sm80); ++ TORCH_CHECK(is_sm80 || is_sm90); + } + + CHECK_SHAPE(q, total_q, num_heads, head_size); +@@ -650,7 +660,12 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size + auto dprops = at::cuda::getCurrentDeviceProperties(); + bool is_sm80 = dprops->major == 8 && dprops->minor == 0; + bool is_sm8x = dprops->major == 8 && dprops->minor >= 0; +- TORCH_CHECK(dprops->major == 8 && dprops->minor >= 0); ++ bool is_sm90 = dprops->major == 9 && dprops->minor == 0; ++ if (is_sm90){ ++ // use sm8x codes for h100 ++ is_sm8x = 1; ++ } ++ TORCH_CHECK(is_sm8x); + auto launch = &run_fmha_block_dgrad_fp16_sm80; + + bool is_dropout = p_dropout > 0.0; +@@ -700,7 +715,7 @@ mha_bwd_block(const at::Tensor &dout, // total x num_heads, x head_size + TORCH_CHECK(batch_size > 0); + TORCH_CHECK(head_size == 16 || head_size == 32 || head_size == 64 || head_size == 128); + if (head_size == 128) { // TODO: eventually we should support SM86 and SM70 with d=128 as well +- TORCH_CHECK(is_sm80); ++ TORCH_CHECK(is_sm80 || is_sm90); + } + + CHECK_SHAPE(q, total_q, num_heads, head_size); +diff --git a/csrc/flash_attn/src/fmha_bwd_hdim64.cu b/csrc/flash_attn/src/fmha_bwd_hdim64.cu +index 7dd8650..d039726 100644 +--- a/csrc/flash_attn/src/fmha_bwd_hdim64.cu ++++ b/csrc/flash_attn/src/fmha_bwd_hdim64.cu +@@ -24,6 +24,9 @@ void run_fmha_bwd_hdim64(FMHA_dgrad_params ¶ms, cudaStream_t stream, const b + } else if (dprops->major == 7 && dprops->minor == 5) { + using Kernel_traits = FMHA_kernel_traits<128, 64, 16, 1, 8, 0x08u, elem_type>; + run_fmha_bwd_loop(params, stream, configure); ++ } else if (dprops->major == 9 && dprops->minor == 0) { ++ using Kernel_traits = FMHA_kernel_traits<256, 64, 16, 1, 8, 0x100u, elem_type>; ++ run_fmha_bwd_loop(params, stream, configure); + } + } + })); +diff --git a/setup.py b/setup.py +index 5516804..a21a903 100644 +--- a/setup.py ++++ b/setup.py +@@ -112,6 +112,8 @@ cc_flag.append("-gencode") + cc_flag.append("arch=compute_75,code=sm_75") + cc_flag.append("-gencode") + cc_flag.append("arch=compute_80,code=sm_80") ++cc_flag.append("-gencode") ++cc_flag.append("arch=compute_90,code=sm_90") + + subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) + ext_modules.append( diff --git a/utils/triton.patch b/utils/triton.patch new file mode 100644 index 000000000000..66c2ac97609e --- /dev/null +++ b/utils/triton.patch @@ -0,0 +1,53 @@ +diff --git a/lib/driver/llvm.cc b/lib/driver/llvm.cc +index a73e6541d..01e38f825 100644 +--- a/lib/driver/llvm.cc ++++ b/lib/driver/llvm.cc +@@ -25,6 +25,7 @@ + #endif + #include + #include ++#include + #include "triton/driver/llvm.h" + #include "triton/driver/dispatch.h" + #include "triton/driver/error.h" +@@ -148,7 +149,9 @@ namespace triton + + int vptx(int version) + { +- if (version >= 11040) ++ if (version >= 12000) ++ return 80; ++ else if (version >= 11040) + return 74; + // if(version >= 11030) return 73; + // if(version >= 11020) return 72; +@@ -163,15 +166,15 @@ namespace triton + std::string llir_to_ptx(llvm::Module *module, int cc, int version) + { + // LLVM version in use may not officially support target hardware +- int max_nvvm_cc = 75; +- int max_nvvm_ptx = 74; ++ int max_nvvm_cc = 90; ++ int max_nvvm_ptx = 80; + // options + auto options = llvm::cl::getRegisteredOptions(); + auto *short_ptr = static_cast *>(options["nvptx-short-ptr"]); + assert(short_ptr); + short_ptr->setValue(true); + // compute capability +- std::string sm = "sm_" + std::to_string(cc); ++ std::string sm = cc == 90 ? "sm_90a" : "sm_" + std::to_string(cc); + // max PTX version + int ptx = vptx(version); + int ptx_major = ptx / 10; +@@ -244,7 +247,9 @@ namespace triton + ofs.close(); + std::string cmd; + int err; +- cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; ++ cmd = cc == 90 ? ++ ptxas + " -v --gpu-name=sm_90a " + fsrc + " -o " + fsrc + ".o 2> " + flog : ++ ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog; + err = system(cmd.c_str()); + if (err != 0) + { From f40b56ed4a7fa337d998484d12f2232de5ae030b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Oct 2023 23:50:21 +0000 Subject: [PATCH 349/512] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/tts/g2p/g2p_inference.py | 2 +- examples/tts/g2p/g2p_train_and_evaluate.py | 2 +- tools/asr_evaluator/asr_evaluator.py | 2 +- tools/ctc_segmentation/scripts/run_ctc_segmentation.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index e7bffa888653..3bb241659e48 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -19,11 +19,11 @@ import pytorch_lightning as pl import torch from omegaconf import OmegaConf -from utils import get_metrics from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import get_metrics """ python g2p_inference.py \ diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index ff7b2b0675ea..0f67aa11e09b 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -16,13 +16,13 @@ import pytorch_lightning as pl import torch -from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager +from utils import get_model """ This script supports training of G2PModels diff --git a/tools/asr_evaluator/asr_evaluator.py b/tools/asr_evaluator/asr_evaluator.py index de05088a55b9..82b61290e66f 100644 --- a/tools/asr_evaluator/asr_evaluator.py +++ b/tools/asr_evaluator/asr_evaluator.py @@ -15,11 +15,11 @@ import git from omegaconf import OmegaConf, open_dict -from utils import cal_target_metadata_wer, run_asr_inference from nemo.collections.asr.parts.utils.eval_utils import cal_write_wer from nemo.core.config import hydra_runner from nemo.utils import logging +from utils import cal_target_metadata_wer, run_asr_inference """ This script serves as evaluator of ASR models diff --git a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py index 90e67ab844c7..884b6186e249 100644 --- a/tools/ctc_segmentation/scripts/run_ctc_segmentation.py +++ b/tools/ctc_segmentation/scripts/run_ctc_segmentation.py @@ -24,9 +24,9 @@ import torch from joblib import Parallel, delayed from tqdm import tqdm -from utils import get_segments import nemo.collections.asr as nemo_asr +from utils import get_segments parser = argparse.ArgumentParser(description="CTC Segmentation") parser.add_argument("--output_dir", default="output", type=str, help="Path to output directory") From ec8256b71324e0d20d7a3789d6d7c2280299267d Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 24 Oct 2023 10:35:17 -0700 Subject: [PATCH 350/512] Fix configs --- examples/multimodal/mllm/neva/conf/neva_config.yaml | 11 ++++++----- examples/multimodal/mllm/neva/conf/neva_finetune.yaml | 2 +- examples/multimodal/mllm/neva/conf/neva_peft.yaml | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml index c2f44de2c1b9..78ec237719a7 100644 --- a/examples/multimodal/mllm/neva/conf/neva_config.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -84,7 +84,7 @@ model: # LLM configs # use GPTModel from megatron.core - mcore_gpt: True + mcore_gpt: False # model architecture encoder_seq_length: 4096 @@ -96,8 +96,9 @@ model: num_attention_heads: 40 init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') use_scaled_init_method: True # use scaled residuals initialization - hidden_dropout: 0. # Dropout probability for hidden state transformer. - attention_dropout: 0. + hidden_dropout: 0.0 # Dropout probability for hidden state transformer. + attention_dropout: 0.0 # Dropout probability for attention + ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. normalization: rmsnorm # Type of normalization layers @@ -111,7 +112,7 @@ model: headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. - rotary_percentage: 0.5 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. attention_type: 'multihead' # Attention type. Options ['multihead'] share_embeddings_and_output_weights: False # Share embedding and output layer weights. overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 @@ -186,7 +187,7 @@ model: is_multimodal: True sep_image_conv_front: False image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + conv_template: ${model.mm_cfg.llm.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml index bd902b9f5d15..61a6601624ca 100644 --- a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml @@ -183,7 +183,7 @@ model: is_multimodal: True sep_image_conv_front: False image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + conv_template: ${model.mm_cfg.llm.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml index 0099d1d8c4d4..2a3a0168d834 100644 --- a/examples/multimodal/mllm/neva/conf/neva_peft.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -189,7 +189,7 @@ model: is_multimodal: True sep_image_conv_front: False image_token_len: 256 - conv_template: ${model.mm_cfg.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + conv_template: ${model.mm_cfg.llm.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' From 5dad27717179fafa770a784ccbeb72bffb0438ff Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 24 Oct 2023 15:01:05 -0700 Subject: [PATCH 351/512] Fix neva model --- .../models/clip/megatron_clip_models.py | 4 +-- .../megatron_nsfw_clip_models.py | 4 +-- .../multimodal/models/neva/neva_model.py | 33 ++++++++++++------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/nemo/collections/multimodal/models/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/clip/megatron_clip_models.py index e24e95e68af8..e4ca6e1e8c79 100644 --- a/nemo/collections/multimodal/models/clip/megatron_clip_models.py +++ b/nemo/collections/multimodal/models/clip/megatron_clip_models.py @@ -938,7 +938,7 @@ def setup_training_data(self, cfg): num_workers=cfg.num_workers, pin_memory=True, drop_last=cfg.train.get("drop_last", True), - persistent_workers=True, + persistent_workers=True if cfg.num_workers > 0 else False, ) def setup_validation_data(self, cfg): @@ -953,7 +953,7 @@ def setup_validation_data(self, cfg): num_workers=cfg.num_workers, pin_memory=True, drop_last=cfg.train.get("drop_last", True), - persistent_workers=True, + persistent_workers=True if cfg.num_workers > 0 else False, ) def setup_test_data(self, cfg): diff --git a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py index d49e2bfafe6b..c55ac6cff03a 100644 --- a/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py +++ b/nemo/collections/multimodal/models/content_filter/megatron_nsfw_clip_models.py @@ -365,7 +365,7 @@ def setup_training_data(self, cfg: DictConfig) -> None: num_workers=cfg.data.num_workers, pin_memory=True, drop_last=cfg.data.train.get("drop_last", True), - persistent_workers=True, + persistent_workers=True if cfg.data.num_workers > 0 else False, ) def setup_validation_data(self, cfg: DictConfig) -> None: @@ -383,7 +383,7 @@ def setup_validation_data(self, cfg: DictConfig) -> None: num_workers=cfg.data.num_workers, pin_memory=True, drop_last=cfg.data.validation.get("drop_last", True), - persistent_workers=True, + persistent_workers=True if cfg.data.num_workers > 0 else False, ) def parameters(self): diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index d6b8e2336375..890c1c2924cb 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -17,8 +17,8 @@ import random import re import tempfile -from functools import partial from itertools import chain +from functools import partial from typing import Any, List, Optional, Union import numpy as np @@ -80,9 +80,9 @@ SamplingParam, TextGeneration, ) -from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin from nemo.collections.nlp.parts.nlp_overrides import GradScaler, NLPSaveRestoreConnector from nemo.collections.nlp.parts.utils_funcs import get_last_rank +from nemo.collections.nlp.parts.mixins.multimodal_adapter_mixins import MultimodalAdapterModelMixin from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone from nemo.core import adapter_mixins from nemo.core.classes.common import PretrainedModelInfo @@ -101,8 +101,8 @@ try: from megatron.core import dist_checkpointing, parallel_state - from megatron.core.models.gpt import GPTModel as MCoreGPTModel from megatron.core.pipeline_parallel.schedules import get_forward_backward_func + from megatron.core.models.gpt import GPTModel as MCoreGPTModel HAVE_MEGATRON_CORE = True @@ -268,7 +268,6 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): return updated_input_embeds - class MCoreNevaModel(MCoreGPTModel): def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, @@ -286,7 +285,9 @@ def __init__( if mm_cfg.llm.freeze: for param in chain( - self.embedding.parameters(), self.decoder.parameters(), self.output_layer.parameters(), + self.embedding.parameters(), + self.decoder.parameters(), + self.output_layer.parameters(), ): param.requires_grad = False self.embedding = self.embedding.eval() @@ -420,7 +421,6 @@ def load_llm_weights(self, nemo_path): self.language_model.load_state_dict(new_state_dict, strict=True) print(f"Restored LLM weights from {nemo_path}.") - class NevaModel(GPTModel): def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, @@ -560,14 +560,17 @@ def init_neva_adapter(self): self.base_keys = self._get_all_keys() adapter_name = AdapterName.MM_LINEAR_ADAPTER adapter_cfg = MMLinearAdapterConfig( - in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, out_features=self.cfg.hidden_size, bias=True, + in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, + out_features=self.cfg.hidden_size, bias=True, ) for name, module in self.named_modules(): self._check_and_add_adapter( - name, module, adapter_name, adapter_cfg, autocast_dtype=self.autocast_dtype, + name, module, adapter_name, adapter_cfg, + autocast_dtype=self.autocast_dtype if self.megatron_amp_O2 else None, ) self.adapter_keys = self._get_all_keys() - self.base_keys + def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) @@ -575,10 +578,8 @@ def model_provider_func(self, pre_process, post_process): if self.mcore_gpt: if parallel_state.is_unitialized(): - def dummy(): return - if self.trainer.strategy.launcher is not None: self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) self.trainer.strategy.setup_environment() @@ -683,7 +684,17 @@ def setup_optimizer_param_groups(self): param_group['params'] = params_with_grad def forward(self, tokens, text_position_ids, attention_mask, labels, media=None): - output_tensor = self.model(tokens, text_position_ids, attention_mask, labels, media) + forward_args = { + 'input_ids': tokens, + 'position_ids': text_position_ids, + 'attention_mask': attention_mask, + 'labels': labels, + 'media': media, + } + if not self.mcore_gpt: + forward_args['checkpoint_activations_all_layers'] = None + + output_tensor = self.model(**forward_args) return output_tensor def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): From c1c59813d2c3f938f701f144fa3d6e94250f80eb Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 24 Oct 2023 15:13:56 -0700 Subject: [PATCH 352/512] Fix neva casting --- nemo/collections/multimodal/models/neva/neva_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index 890c1c2924cb..66cdd32cd1c8 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -204,6 +204,7 @@ def encode_vision_x(self, vision_x: torch.Tensor): assert F == 1, "Only single frame supported" vision_x = rearrange(vision_x, "b T F c h w -> (b T F) c h w") + vision_x = vision_x.to(self.vision_encoder.dtype) with torch.no_grad(): if self.from_hf: vision_x = self.vision_encoder(vision_x, output_hidden_states=True) From b0c5320001ce5e36678415808cc142d50f58f668 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Wed, 25 Oct 2023 09:52:30 -0700 Subject: [PATCH 353/512] Fix neva LoRA non MCore version --- .../multimodal/mllm/neva/conf/neva_peft.yaml | 5 ++++ examples/multimodal/mllm/neva/neva_peft.py | 29 ++++++++++++++----- .../multimodal/models/neva/neva_model.py | 10 +++++-- .../parts/mixins/multimodal_adapter_mixins.py | 7 +++-- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml index 2a3a0168d834..b002259128a1 100644 --- a/examples/multimodal/mllm/neva/conf/neva_peft.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -82,11 +82,16 @@ model: use_im_start_end: True # only support True now peft: + peft_scheme: "lora" + restore_from_path: null lora_tuning: adapter_dim: 32 adapter_dropout: 0.0 column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal + layer_selection: null # selects in which layers to add lora adapters. e.g. [1,12] will add lora to layer 1 (lowest) and 12. null will apply adapters to all layers + weight_tying: False + position_embedding_strategy: null # used only when weight_tying is True # LLM configs # use GPTModel from megatron.core diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py index 1738c41c2e48..01c1676a157d 100644 --- a/examples/multimodal/mllm/neva/neva_peft.py +++ b/examples/multimodal/mllm/neva/neva_peft.py @@ -15,9 +15,10 @@ import torch.multiprocessing as mp from omegaconf.omegaconf import OmegaConf, open_dict -from nemo.collections.multimodal.models.neva.neva_peft_models import MegatronNevaLoRAModel -from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel +from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP from nemo.core.config import hydra_runner from nemo.utils import logging @@ -31,24 +32,38 @@ def main(cfg) -> None: logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') - trainer = MegatronTrainerBuilder(cfg).create_trainer() + trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer() exp_manager(trainer, cfg.exp_manager) # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams with open_dict(cfg): cfg.model.precision = cfg.trainer.precision - if cfg.model.restore_from_path is None: - model = MegatronNevaLoRAModel(cfg.model, trainer) + model_cfg = cfg.model + model = MegatronNevaModel(cfg.model, trainer) else: - model = MegatronNevaLoRAModel.restore_from( + model_cfg = MegatronGPTSFTModel.merge_cfg_with(cfg.model.restore_from_path, cfg) + model = MegatronNevaModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, - override_config_path=cfg.model, + override_config_path=model_cfg, save_restore_connector=NLPSaveRestoreConnector(), strict=False, ) + peft_cfg_cls = PEFT_CONFIG_MAP[cfg.model.peft.peft_scheme] + + if cfg.model.peft.restore_from_path is not None: + # initialize peft weights from a checkpoint instead of randomly + # This is not the same as resume training because optimizer states are not restored. + logging.info("PEFT Weights will be loaded from", cfg.model.peft.restore_from_path) + model.load_adapters(cfg.model.peft.restore_from_path, peft_cfg_cls(model_cfg)) + elif peft_cfg_cls is not None: + logging.info("Adding adapter weights to the model for PEFT") + model.add_adapter(peft_cfg_cls(model_cfg)) + else: + logging.info(f"Running full finetuning since no peft scheme is given.\n{model.summarize()}") + trainer.fit(model) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index 66cdd32cd1c8..9912bd79e291 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -570,6 +570,8 @@ def init_neva_adapter(self): autocast_dtype=self.autocast_dtype if self.megatron_amp_O2 else None, ) self.adapter_keys = self._get_all_keys() - self.base_keys + if self.megatron_amp_O2: + self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) def model_provider_func(self, pre_process, post_process): @@ -1014,11 +1016,13 @@ def load_state_dict(self, state_dict, strict=False): if len(unexpected_keys) > 0: logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') + raise ValueError def on_load_checkpoint(self, checkpoint) -> None: - if self.mcore_gpt: - state_dict = checkpoint["state_dict"] - self.load_state_dict(state_dict) + pass + # if self.mcore_gpt: + # state_dict = checkpoint["state_dict"] + # self.load_state_dict(state_dict) def sharded_state_dict(self, prefix: str = ''): return None diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py index 682a76aa939a..127c7385a7bb 100644 --- a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -57,8 +57,7 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): if not isinstance(peft_cfgs, List): peft_cfgs = [peft_cfgs] - self.base_keys = self._get_all_keys() - self.freeze() + self.base_keys = getattr(self, "base_keys", self._get_all_keys()) logging.info(f"Before adding PEFT params:\n{self.summarize()}") self.use_ptuning_only = len(peft_cfgs) == 1 and isinstance(peft_cfgs[0], PtuningPEFTConfig) @@ -74,6 +73,8 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): logging.info(f"After adding PEFT params:\n{self.summarize()}") self.adapter_keys = self._get_all_keys() - self.base_keys + if self.megatron_amp_O2: + self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) for cfg in peft_cfgs: if cfg.weight_tying: @@ -83,6 +84,8 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): def _check_and_add_adapter( self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None ): + name_key_to_mcore_mixins = getattr(peft_cfg, "name_key_to_mcore_mixins", None) if self.use_mcore_gpt else None + if name_key_to_mcore_mixins is not None: for mcore_target, mcore_mixin in name_key_to_mcore_mixins[peft_name]: if name in [ From 4e178e31308d030c4b087798553ab9fdb541873b Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Wed, 25 Oct 2023 16:14:12 -0700 Subject: [PATCH 354/512] Fix neva LoRA MCore --- nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py index 127c7385a7bb..120121c33f73 100644 --- a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -84,8 +84,6 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): def _check_and_add_adapter( self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None ): - name_key_to_mcore_mixins = getattr(peft_cfg, "name_key_to_mcore_mixins", None) if self.use_mcore_gpt else None - if name_key_to_mcore_mixins is not None: for mcore_target, mcore_mixin in name_key_to_mcore_mixins[peft_name]: if name in [ From cacf9a8815d0d6203c3e7285429bb4f6bdfe1920 Mon Sep 17 00:00:00 2001 From: Szymon Mikler Date: Wed, 25 Oct 2023 16:52:51 +0200 Subject: [PATCH 355/512] [SD] group norm fixes --- .../stable_diffusion/conf/sd_train.yaml | 1 + .../modules/imagen/diffusionmodules/layers.py | 13 ++---------- .../modules/stable_diffusion/attention.py | 6 +++--- .../diffusionmodules/model.py | 5 +++-- .../diffusionmodules/openaimodel.py | 20 ++++++++++++++++--- .../stable_diffusion/diffusionmodules/util.py | 11 +++------- 6 files changed, 29 insertions(+), 27 deletions(-) diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 78a8dedccbc2..7c7a125f9722 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -120,6 +120,7 @@ model: legacy: False use_flash_attention: True enable_amp_o2_fp16: True + resblock_gn_groups: 32 first_stage_config: _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py index a5cb19444057..c706a534463a 100644 --- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py +++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/layers.py @@ -44,13 +44,7 @@ import torch.nn as nn import torch.nn.functional as F -try: - from group_norm import GroupNormOpt - - OPT_GROUP_NORM = True -except Exception: - print('Fused optimized group norm has not been installed.') - OPT_GROUP_NORM = False +from apex.contrib.group_norm import GroupNorm def conv_nd(dims, *args, **kwargs): @@ -131,10 +125,7 @@ def normalization(channels, act=""): :param channels: number of input channels. :return: an nn.Module for normalization. """ - if OPT_GROUP_NORM: - return GroupNormOpt(32, channels, act=act) - - return nn.GroupNorm(32, channels) + return GroupNorm(32, channels, act=act) def timestep_embedding(timesteps, dim, max_period=10000, dtype=th.float32): diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py index 07f37ece3d6f..a57d82b984bd 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py @@ -16,8 +16,8 @@ import torch import torch.nn.functional as F +from apex.contrib.group_norm import GroupNorm from einops import rearrange, repeat -from group_norm import GroupNormOpt from torch import einsum, nn from torch._dynamo import disable @@ -111,8 +111,8 @@ def zero_module(module): return module -def Normalize(in_channels): - return GroupNormOpt(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) +def Normalize(in_channels, num_groups=32, act=""): + return GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) class LinearAttention(nn.Module): diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py index dbfab3ab4b07..fc95653863cc 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/model.py @@ -13,11 +13,12 @@ # limitations under the License. # pytorch_diffusion + derived encoder decoder import math + import numpy as np import torch import torch.nn as nn +from apex.contrib.group_norm import GroupNorm from einops import rearrange -from group_norm import GroupNormOpt from nemo.collections.multimodal.modules.stable_diffusion.attention import LinearAttention from nemo.collections.multimodal.parts.stable_diffusion.utils import instantiate_from_config @@ -50,7 +51,7 @@ def nonlinearity(x): def Normalize(in_channels, num_groups=32, act=""): - return GroupNormOpt(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) + return GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, act=act) class Upsample(nn.Module): diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py index 9c52198f1566..9039aebb4675 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py @@ -210,6 +210,7 @@ def __init__( use_checkpoint=False, up=False, down=False, + resblock_gn_groups=32, ): super().__init__() self.channels = channels @@ -221,7 +222,8 @@ def __init__( self.use_scale_shift_norm = use_scale_shift_norm self.in_layers = nn.Sequential( - normalization(channels, act="silu"), conv_nd(dims, channels, self.out_channels, 3, padding=1), + normalization(channels, act="silu", gn_groups=resblock_gn_groups), + conv_nd(dims, channels, self.out_channels, 3, padding=1), ) self.updown = up or down @@ -239,7 +241,7 @@ def __init__( nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,), ) self.out_layers = nn.Sequential( - normalization(self.out_channels, act="silu"), + normalization(self.out_channels, act="silu", gn_groups=resblock_gn_groups), nn.Dropout(p=dropout), zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), ) @@ -463,6 +465,7 @@ def __init__( num_heads=-1, num_head_channels=-1, num_heads_upsample=-1, + resblock_gn_groups=32, use_scale_shift_norm=False, resblock_updown=False, use_new_attention_order=False, @@ -543,6 +546,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ) ] ch = mult * model_channels @@ -591,6 +595,7 @@ def __init__( use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, down=True, + resblock_gn_groups=resblock_gn_groups, ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) @@ -617,6 +622,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ), AttentionBlock( ch, @@ -643,6 +649,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ), ) self._feature_size += ch @@ -704,6 +711,7 @@ def __init__( use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, up=True, + resblock_gn_groups=resblock_gn_groups, ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch) @@ -713,7 +721,8 @@ def __init__( self._feature_size += ch self.out = nn.Sequential( - normalization(ch), nn.SiLU(), zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), + normalization(ch, act="silu", gn_groups=resblock_gn_groups), + zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), ) if self.predict_codebook_ids: self.id_predictor = nn.Sequential( @@ -1018,6 +1027,7 @@ def __init__( resblock_updown=False, use_new_attention_order=False, pool="adaptive", + resblock_gn_groups=32, *args, **kwargs, ): @@ -1062,6 +1072,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ) ] ch = mult * model_channels @@ -1091,6 +1102,7 @@ def __init__( use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, down=True, + resblock_gn_groups=resblock_gn_groups, ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch) @@ -1109,6 +1121,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ), AttentionBlock( ch, @@ -1124,6 +1137,7 @@ def __init__( dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, + resblock_gn_groups=resblock_gn_groups, ), ) self._feature_size += ch diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py index 2225d45a3a78..39fd195bb291 100644 --- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py +++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/util.py @@ -26,8 +26,8 @@ import numpy as np import torch import torch.nn as nn +from apex.contrib.group_norm import GroupNorm from einops import repeat -from group_norm import GroupNormOpt from torch._dynamo import disable from torch.cuda.amp import custom_bwd, custom_fwd @@ -214,13 +214,8 @@ def mean_flat(tensor): return tensor.mean(dim=list(range(1, len(tensor.shape)))) -def normalization(channels, act=""): - """ - Make a standard normalization layer. - :param channels: number of input channels. - :return: an nn.Module for normalization. - """ - return GroupNormOpt(32, channels, act=act) +def normalization(in_channels, act="", gn_groups=32): + return GroupNorm(num_groups=gn_groups, num_channels=in_channels, eps=1e-5, affine=True, act=act) # PyTorch 1.7 has SiLU, but we support PyTorch 1.5. From 2da64dbfebbb792e47df3a74aeb58ba6fded8597 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Thu, 26 Oct 2023 11:18:39 -0700 Subject: [PATCH 356/512] Fix neva cfg merge --- examples/multimodal/mllm/neva/neva_peft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/mllm/neva/neva_peft.py b/examples/multimodal/mllm/neva/neva_peft.py index 01c1676a157d..12c28f077c1c 100644 --- a/examples/multimodal/mllm/neva/neva_peft.py +++ b/examples/multimodal/mllm/neva/neva_peft.py @@ -42,7 +42,7 @@ def main(cfg) -> None: model_cfg = cfg.model model = MegatronNevaModel(cfg.model, trainer) else: - model_cfg = MegatronGPTSFTModel.merge_cfg_with(cfg.model.restore_from_path, cfg) + model_cfg = MegatronNevaModel.merge_cfg_with(cfg.model.restore_from_path, cfg) model = MegatronNevaModel.restore_from( restore_path=cfg.model.restore_from_path, trainer=trainer, From fba254802d0e5ac6ff283bfbb15cbc1c0e35a7d2 Mon Sep 17 00:00:00 2001 From: aot Date: Fri, 27 Oct 2023 06:50:42 -0700 Subject: [PATCH 357/512] remove groupnorm dependency --- examples/multimodal/generative/imagen/conf/base64-500m.yaml | 2 +- nemo/collections/multimodal/models/imagen/imagen.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/multimodal/generative/imagen/conf/base64-500m.yaml b/examples/multimodal/generative/imagen/conf/base64-500m.yaml index 4541110caf98..eb66b5b36feb 100644 --- a/examples/multimodal/generative/imagen/conf/base64-500m.yaml +++ b/examples/multimodal/generative/imagen/conf/base64-500m.yaml @@ -53,7 +53,7 @@ model: # gradient accumulation will be done automatically based on data_parallel_size micro_batch_size: 128 # limited by GPU memory global_batch_size: 128 # will use more micro batches to reach global batch size - inductor: True + inductor: False inductor_cudagraphs: False unet_type: base channels_last: True diff --git a/nemo/collections/multimodal/models/imagen/imagen.py b/nemo/collections/multimodal/models/imagen/imagen.py index 64c1382e2d54..9afa5a6d3f28 100644 --- a/nemo/collections/multimodal/models/imagen/imagen.py +++ b/nemo/collections/multimodal/models/imagen/imagen.py @@ -54,7 +54,7 @@ HAVE_MEGATRON_CORE = False try: - from group_norm import GroupNormOpt + from apex.contrib.group_norm import GroupNorm OPT_GROUP_NORM = True except Exception: From 41b1b51d67d8faf128ff9caf3a7684568e7a295d Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 30 Oct 2023 11:59:27 -0700 Subject: [PATCH 358/512] Fix copyright headers --- nemo/core/optim/megatron_fused_adam.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nemo/core/optim/megatron_fused_adam.py b/nemo/core/optim/megatron_fused_adam.py index 9a7186250ad9..87439c1d80d7 100755 --- a/nemo/core/optim/megatron_fused_adam.py +++ b/nemo/core/optim/megatron_fused_adam.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import amp_C import torch from apex.multi_tensor_apply import multi_tensor_applier From 7422dbe75721597a07e8ae490622679ebeab1411 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Mon, 30 Oct 2023 13:33:54 -0700 Subject: [PATCH 359/512] LLaVA 1_5 and LORA update --- .../mllm/neva/conf/llava_config.yaml | 5 +- .../mllm/neva/conf/neva_config.yaml | 1 + .../mllm/neva/conf/neva_finetune.yaml | 3 +- .../mllm/neva/conf/neva_inference.yaml | 1 + .../multimodal/mllm/neva/conf/neva_peft.yaml | 3 +- .../mllm/neva/convert_hf_llava_to_neva.py | 54 +++++---- .../multimodal/mllm/neva/neva_evaluation.py | 25 ++-- .../multimodal/data/neva/conversation.py | 2 +- .../multimodal/data/neva/neva_dataset.py | 17 +-- .../multimodal/models/neva/neva_model.py | 109 +++++++++++------- .../megatron/adapters/parallel_adapters.py | 31 +++-- .../common/text_generation_strategy.py | 17 ++- .../parts/mixins/multimodal_adapter_mixins.py | 60 ++++++++++ nemo/collections/nlp/parts/nlp_overrides.py | 6 + 14 files changed, 237 insertions(+), 97 deletions(-) diff --git a/examples/multimodal/mllm/neva/conf/llava_config.yaml b/examples/multimodal/mllm/neva/conf/llava_config.yaml index 0b2cf826c606..06f8d0d4634d 100644 --- a/examples/multimodal/mllm/neva/conf/llava_config.yaml +++ b/examples/multimodal/mllm/neva/conf/llava_config.yaml @@ -69,7 +69,7 @@ model: llm: from_pretrained: null # path to nemo checkpoint freeze: False - model_type: llama_2 # Only support nvgpt or llama_2 + model_type: v1 # Only support nvgpt or v1 vision_encoder: from_pretrained: "openai/clip-vit-large-patch14" # path or name from_hf: True @@ -79,6 +79,7 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + mm_mlp_adapter_type: mlp2x_gelu use_im_start_end: False @@ -187,7 +188,7 @@ model: is_multimodal: True sep_image_conv_front: False image_token_len: 256 - conv_template: llama_2 # check `nemo/collections/multimodal/data/neva/conversation.py` + conv_template: v1 # check `nemo/collections/multimodal/data/neva/conversation.py` image_folder: null image_aspect_ratio: 'square' diff --git a/examples/multimodal/mllm/neva/conf/neva_config.yaml b/examples/multimodal/mllm/neva/conf/neva_config.yaml index 78ec237719a7..b41f15c384a8 100644 --- a/examples/multimodal/mllm/neva/conf/neva_config.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_config.yaml @@ -79,6 +79,7 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + mm_mlp_adapter_type: linear use_im_start_end: False diff --git a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml index 61a6601624ca..cafee118a8bd 100644 --- a/examples/multimodal/mllm/neva/conf/neva_finetune.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_finetune.yaml @@ -79,7 +79,8 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True # only support True now + mm_mlp_adapter_type: linear + use_im_start_end: False # only support True now # LLM configs diff --git a/examples/multimodal/mllm/neva/conf/neva_inference.yaml b/examples/multimodal/mllm/neva/conf/neva_inference.yaml index 35ca1e179f98..6ff8e889aba1 100644 --- a/examples/multimodal/mllm/neva/conf/neva_inference.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_inference.yaml @@ -24,6 +24,7 @@ tensor_model_parallel_size: 8 pipeline_model_parallel_size: 1 pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model (0 for others) neva_model_file: /pwd/nemo_experiments/nemo_llava.nemo #neva_22b_tp8_finetuned_v1.nemo neva_8b_tp4_finetuned_v1.nemo +llm_model_file: null checkpoint_dir: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/checkpoints # checkpoint file dir. This is used to load the PTL checkpoint generated during the Kosmos training checkpoint_name: null #megatron_clip--val_loss=0.41-step=13499-consumed_samples=431904.0.ckpt # PTL checkpoint file name, only used for PTL checkpoint loading hparams_file: null #/pwd/nemo_multimodal/nemo_experiments/nemo_llava_finetune/version_0/hparams.yaml # model configuration file, only used for PTL checkpoint loading diff --git a/examples/multimodal/mllm/neva/conf/neva_peft.yaml b/examples/multimodal/mllm/neva/conf/neva_peft.yaml index b002259128a1..36e706635b97 100644 --- a/examples/multimodal/mllm/neva/conf/neva_peft.yaml +++ b/examples/multimodal/mllm/neva/conf/neva_peft.yaml @@ -79,7 +79,8 @@ model: class_token_length: 1 freeze: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - use_im_start_end: True # only support True now + mm_mlp_adapter_type: linear + use_im_start_end: False # only support True now peft: peft_scheme: "lora" diff --git a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py index b70faf61a413..44cad58e435b 100644 --- a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py +++ b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py @@ -96,32 +96,36 @@ def load_model(cls, checkpoint, strict, **kwargs): return model -def load_config(args, llama_config): +def load_config(args, llava_config): nemo_config = OmegaConf.load(os.path.join(os.path.dirname(__file__), 'conf/llava_config.yaml')).model - nemo_config.encoder_seq_length = llama_config['max_position_embeddings'] - nemo_config.num_layers = int(llama_config['num_hidden_layers']) - nemo_config.hidden_size = llama_config['hidden_size'] - nemo_config.ffn_hidden_size = llama_config['intermediate_size'] - nemo_config.num_attention_heads = llama_config['num_attention_heads'] - nemo_config.max_position_embeddings = llama_config['max_position_embeddings'] - nemo_config.init_method_std = llama_config['initializer_range'] - nemo_config.layernorm_epsilon = llama_config['rms_norm_eps'] - if 'num_key_value_heads' in llama_config: - nemo_config.num_query_groups = llama_config['num_key_value_heads'] + nemo_config.mm_cfg.mm_mlp_adapter_type = llava_config.get('mm_projector_type', 'linear') + nemo_config.mm_cfg.vision_encoder.from_pretrained = llava_config.get('mm_vision_tower', 'openai/clip-vit-large-patch14') + if '336' in nemo_config.mm_cfg.vision_encoder.from_pretrained: + nemo_config.data.image_token_len = 576 + nemo_config.encoder_seq_length = llava_config['max_position_embeddings'] + nemo_config.num_layers = int(llava_config['num_hidden_layers']) + nemo_config.hidden_size = llava_config['hidden_size'] + nemo_config.ffn_hidden_size = llava_config['intermediate_size'] + nemo_config.num_attention_heads = llava_config['num_attention_heads'] + nemo_config.max_position_embeddings = llava_config['max_position_embeddings'] + nemo_config.init_method_std = llava_config['initializer_range'] + nemo_config.layernorm_epsilon = llava_config['rms_norm_eps'] + if 'num_key_value_heads' in llava_config: + nemo_config.num_query_groups = llava_config['num_key_value_heads'] nemo_config.use_cpu_initialization = True nemo_config.activation = 'fast-swiglu' if args.tokenizer_model is None: - nemo_config.tokenizer.model = llama_config['tokenizer_model'] + nemo_config.tokenizer.model = llava_config['tokenizer_model'] else: nemo_config.tokenizer.model = args.tokenizer_model - if llama_config['rope_scaling'] is not None: - if llama_config['rope_scaling']['type'] == 'linear': - nemo_config['seq_len_interpolation_factor'] = llama_config['rope_scaling']['factor'] + if llava_config['rope_scaling'] is not None: + if llava_config['rope_scaling']['type'] == 'linear': + nemo_config['seq_len_interpolation_factor'] = llava_config['rope_scaling']['factor'] else: raise ValueError("Only linear rope scaling type is supported now") base = 128 - while llama_config['vocab_size'] % base != 0: + while llava_config['vocab_size'] % base != 0: base //= 2 nemo_config.make_vocab_size_divisible_by = base @@ -204,17 +208,19 @@ def convert(args): # Multimodal projection if mcore_gpt: - raise NotImplementedError - else: mm_projection_layer_base_name = ( - f'model.language_model.embedding.word_embeddings.adapter_layer.mm_linear_adapter.linear' - ) - checkpoint['state_dict'][f'{mm_projection_layer_base_name}.weight'] = param_to_weights( - model.state_dict()[f'model.mm_projector.weight'] + f'model.embedding.word_embeddings.adapter_layer.mm_projector_adapter.mm_projector' ) - checkpoint['state_dict'][f'{mm_projection_layer_base_name}.bias'] = param_to_weights( - model.state_dict()[f'model.mm_projector.bias'] + else: + mm_projection_layer_base_name = ( + f'model.language_model.embedding.word_embeddings.adapter_layer.mm_projector_adapter.mm_projector' ) + for key in model.state_dict(): + if 'mm_projector' in key: + mm_projection_layer_suffix = key.split('mm_projector')[1] + checkpoint['state_dict'][f'{mm_projection_layer_base_name}{mm_projection_layer_suffix}'] = param_to_weights( + model.state_dict()[key] + ) embed_weight = model.state_dict()[f'model.embed_tokens.weight'] if mcore_gpt: diff --git a/examples/multimodal/mllm/neva/neva_evaluation.py b/examples/multimodal/mllm/neva/neva_evaluation.py index 256d58018c9b..f27c5aea373e 100644 --- a/examples/multimodal/mllm/neva/neva_evaluation.py +++ b/examples/multimodal/mllm/neva/neva_evaluation.py @@ -31,6 +31,7 @@ from nemo.collections.nlp.modules.common.text_generation_utils import generate from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP from nemo.core.config import hydra_runner from nemo.utils.app_state import AppState from nemo.utils.model_utils import inject_model_parallel_rank @@ -201,27 +202,31 @@ def main(cfg) -> None: if os.path.isdir(cfg.neva_model_file): save_restore_connector.model_extracted_dir = cfg.neva_model_file - pretrained_cfg = MegatronNevaModel.restore_from( + neva_cfg = MegatronNevaModel.restore_from( restore_path=cfg.neva_model_file, trainer=trainer, return_config=True, save_restore_connector=save_restore_connector, ) - OmegaConf.set_struct(pretrained_cfg, True) - with open_dict(pretrained_cfg): - pretrained_cfg.sequence_parallel = False - pretrained_cfg.activations_checkpoint_granularity = None - pretrained_cfg.activations_checkpoint_method = None - pretrained_cfg.precision = trainer.precision - pretrained_cfg.mm_cfg.llm.from_pretrained = None - # pretrained_cfg.mm_cfg.vision_encoder.from_pretrained = None + OmegaConf.set_struct(neva_cfg, True) + with open_dict(neva_cfg): + neva_cfg.sequence_parallel = False + neva_cfg.activations_checkpoint_granularity = None + neva_cfg.activations_checkpoint_method = None + neva_cfg.precision = trainer.precision + neva_cfg.mm_cfg.llm.from_pretrained = cfg.get('llm_model_file', None) + # neva_cfg.mm_cfg.vision_encoder.from_pretrained = None model = MegatronNevaModel.restore_from( restore_path=cfg.neva_model_file, trainer=trainer, - override_config_path=pretrained_cfg, + override_config_path=neva_cfg, save_restore_connector=save_restore_connector, ) + if neva_cfg.get('peft') is not None: + peft_cfg_cls = PEFT_CONFIG_MAP[neva_cfg.peft.peft_scheme] + if peft_cfg_cls is not None: + model.load_adapters(cfg.neva_model_file, peft_cfg_cls(neva_cfg)) elif cfg.checkpoint_dir: app_state = AppState() diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py index 4d46bf4decf2..e7dff7ef4aa2 100644 --- a/nemo/collections/multimodal/data/neva/conversation.py +++ b/nemo/collections/multimodal/data/neva/conversation.py @@ -383,7 +383,7 @@ def dict(self): version="v1_mmtag", ) -default_conversation = conv_vicuna_v0 +default_conversation = conv_vicuna_v1 conv_templates = { "default": conv_vicuna_v0, "v0": conv_vicuna_v0, diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py index dc4a609f8d48..670822eb5eec 100644 --- a/nemo/collections/multimodal/data/neva/neva_dataset.py +++ b/nemo/collections/multimodal/data/neva/neva_dataset.py @@ -225,8 +225,6 @@ def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cf conv.append_message(role, sentence["value"]) conversations.append(conv.get_prompt()) - # Tokenize conversations - add_extra_token = cfg.get("add_extra_token") # Tokenize conversations tokens = tokenize( @@ -236,6 +234,10 @@ def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cf add_extra_token=add_extra_token, ) + # llama tricks + tokens[tokens == 32003] = 0 # DEFAULT_IMAGE_PATCH_TOKEN + tokens[tokens == 32006] = 1 # + tokens[tokens == 32007] = 2 # labels = tokens.clone().detach() # Mask labels @@ -243,8 +245,7 @@ def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cf for conversation, target in zip(conversations, labels): rounds = conversation.split(conv.sep2) - cur_len = 1 - target[:cur_len] = IGNORE_INDEX + cur_len = 0 for i, rou in enumerate(rounds): if rou == "": break @@ -254,9 +255,11 @@ def preprocess_v1(sources: dict, tokenizer: transformers.PreTrainedTokenizer, cf break parts[0] += sep - round_len = len(tokenizer.text_to_ids(rou)) - instruction_len = len(tokenizer.text_to_ids(parts[0])) - 2 - + round_len = len(tokenizer.text_to_ids(rou + conv.sep2)) + instruction_len = len(tokenizer.text_to_ids(parts[0])) - 1 + if i > 0: + round_len -= 1 # Remove extra token added by sp tokenizer + instruction_len -= 1 target[cur_len : cur_len + instruction_len] = IGNORE_INDEX cur_len += round_len diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index 9912bd79e291..a5920101259d 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -54,7 +54,7 @@ from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.models.language_modeling.megatron_gpt_peft_models import MegatronGPTPEFTModel from nemo.collections.nlp.models.nlp_model import NLPModel -from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, MMLinearAdapterConfig +from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import AdapterName, MMProjectorAdapterConfig from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.language_model import Embedding, get_language_model from nemo.collections.nlp.modules.common.megatron.module import Float16Module, MegatronModule @@ -100,7 +100,7 @@ HAVE_APEX = False try: - from megatron.core import dist_checkpointing, parallel_state + from megatron.core import dist_checkpointing, parallel_state, InferenceParams from megatron.core.pipeline_parallel.schedules import get_forward_backward_func from megatron.core.models.gpt import GPTModel as MCoreGPTModel @@ -149,14 +149,14 @@ def freeze(self) -> None: class NevaWordEmbeddingMixin(torch.nn.Module, adapter_mixins.AdapterModuleMixin): def init_vision( - self, - vision_encoder, - media_start_id, - media_end_id, - vision_select_layer=-1, - class_token_length=1, - use_im_start_end=False, - llama_tricks=False, + self, + vision_encoder, + media_start_id, + media_end_id, + vision_select_layer=-1, + class_token_length=1, + use_im_start_end=False, + llama_tricks=False, ): self.vision_encoder = vision_encoder self.from_hf = isinstance(vision_encoder, CLIPVisionModel) @@ -166,7 +166,7 @@ def init_vision( self.use_im_start_end = use_im_start_end self.vision_select_layer = vision_select_layer self.media = None - self.set_accepted_adapter_types([MMLinearAdapterConfig._target_]) + self.set_accepted_adapter_types([MMProjectorAdapterConfig._target_]) self.llama_tricks = llama_tricks def set_media(self, media): @@ -213,9 +213,9 @@ def encode_vision_x(self, vision_x: torch.Tensor): self.vision_encoder.backbone.transformer.return_select_layer = self.vision_select_layer vision_x = self.vision_encoder(vision_x) vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F) - vision_x = vision_x[:, :, :, self.class_token_length :] + vision_x = vision_x[:, :, :, self.class_token_length:] assert self.is_adapter_available(), "Cannot find multimodal vision adapter!" - vision_connector = self.get_adapter_module(AdapterName.MM_LINEAR_ADAPTER) + vision_connector = self.get_adapter_module(AdapterName.MM_PROJECTOR_ADAPTER) vision_x = vision_connector(vision_x) return vision_x @@ -243,7 +243,7 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): # locate the first media token positions padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches assert ( - input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id + input_id[padded_media_indices[idx, : len(media_end_positions)] - 1] == self.media_start_id ).all() else: padded_media_indices[idx, : len(media_end_positions)] = media_end_positions - num_patches + 1 @@ -269,11 +269,12 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): return updated_input_embeds + class MCoreNevaModel(MCoreGPTModel): def __init__( - self, mm_cfg, media_start_id, media_end_id, **kwargs, + self, mm_cfg, media_start_id, media_end_id, **kwargs, ): - super(MCoreNevaModel, self).__init__(**kwargs,) + super(MCoreNevaModel, self).__init__(**kwargs, ) self.mm_cfg = mm_cfg self.media_start_id = media_start_id @@ -325,11 +326,11 @@ def __init__( vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2"), + llama_tricks=(model_type == "llama_2" or model_type == "v1"), ) def forward( - self, *args, **kwargs, + self, *args, **kwargs, ): media = kwargs.pop('media', None) self.embedding.word_embeddings.set_media(media) @@ -422,11 +423,12 @@ def load_llm_weights(self, nemo_path): self.language_model.load_state_dict(new_state_dict, strict=True) print(f"Restored LLM weights from {nemo_path}.") + class NevaModel(GPTModel): def __init__( - self, mm_cfg, media_start_id, media_end_id, **kwargs, + self, mm_cfg, media_start_id, media_end_id, **kwargs, ): - super(NevaModel, self).__init__(**kwargs,) + super(NevaModel, self).__init__(**kwargs, ) self.mm_cfg = mm_cfg self.media_start_id = media_start_id @@ -470,11 +472,11 @@ def __init__( vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2"), + llama_tricks=(model_type == "llama_2" or model_type == "v1"), ) def forward( - self, *args, **kwargs, + self, *args, **kwargs, ): media = kwargs.pop('media', None) self.language_model.embedding.word_embeddings.set_media(media) @@ -553,14 +555,14 @@ class MegatronNevaModel(MultimodalAdapterModelMixin, MegatronGPTModel): """ def __init__(self, cfg: DictConfig, trainer: Trainer): - # MegatronGPTModel.__init__(self, cfg, trainer) super().__init__(cfg, trainer) self.init_neva_adapter() def init_neva_adapter(self): self.base_keys = self._get_all_keys() - adapter_name = AdapterName.MM_LINEAR_ADAPTER - adapter_cfg = MMLinearAdapterConfig( + adapter_name = AdapterName.MM_PROJECTOR_ADAPTER + adapter_cfg = MMProjectorAdapterConfig( + adapter_type=self.cfg.mm_cfg.get("mm_mlp_adapter_type", "linear"), in_features=self.cfg.mm_cfg.vision_encoder.hidden_size, out_features=self.cfg.hidden_size, bias=True, ) @@ -573,7 +575,6 @@ def init_neva_adapter(self): if self.megatron_amp_O2: self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) - def model_provider_func(self, pre_process, post_process): """Model depends on pipeline paralellism.""" media_start_id = self.tokenizer.token_to_id(DEFAULT_IM_START_TOKEN) @@ -583,6 +584,7 @@ def model_provider_func(self, pre_process, post_process): if parallel_state.is_unitialized(): def dummy(): return + if self.trainer.strategy.launcher is not None: self.trainer.strategy.launcher.launch(dummy, trainer=self.trainer) self.trainer.strategy.setup_environment() @@ -794,10 +796,27 @@ def fwd_output_only_func(dataloader_iter, model): if media is not None: media = media.cuda() labels = None - extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() - extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() - # TODO : Should I add labels ? - output_tensor = model(tokens, position_ids, attention_mask, labels, media, **extra_arg) + if self.mcore_gpt: + # if first step, then clear KV cache, otherwise reuse inference_paarms + if set_inference_key_value_memory[0].item(): + self.inference_params = InferenceParams( + max_batch_size=tokens.size(0), max_sequence_length=inference_max_sequence_len[0].item() + ) + extra_arg['inference_params'] = self.inference_params + else: + extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item() + extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item() + + forward_args = { + 'input_ids': tokens, + 'position_ids': position_ids, + 'attention_mask': attention_mask, + 'labels': labels, + 'media': media, + } + if not self.mcore_gpt: + forward_args['checkpoint_activations_all_layers'] = None + output_tensor = model(**forward_args, **extra_arg) def id_func(output_tensor): return output_tensor, {'logits': output_tensor} @@ -926,14 +945,14 @@ def setup(self, stage=None): def build_train_valid_test_datasets(self): logging.info('Building Neva datasets.') - ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg,) + ds_dict = make_supervised_data_module(tokenizer=self.tokenizer, model_cfg=self.cfg, ) self._train_ds = ds_dict["train_dataset"] self._validation_ds = ds_dict["eval_dataset"] return self._train_ds, self._validation_ds def build_pretraining_data_loader( - self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False + self, dataset, consumed_samples, dataset_type=None, drop_last=True, pad_samples_to_global_batch_size=False ): """Buld dataloader given an input dataset.""" @@ -990,21 +1009,22 @@ def setup_test_data(self, cfg): def state_dict(self, destination=None, prefix='', keep_vars=False): # Get the original state dictionary original_state_dict = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) + keys_to_keep = list(self.adapter_keys) # TODO(yuya): maybe not hard-code vision_encoder keys here - if self.megatron_amp_O2: - vision_encoder_keys = [ - k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" in k - ] - llm_keys = [k.replace("model.module.", "model.", 1) for k in self.base_keys if "vision_encoder" not in k] - else: - vision_encoder_keys = [k for k in self.base_keys if "vision_encoder" in k] - llm_keys = [k for k in self.base_keys if "vision_encoder" not in k] + vision_encoder_keys = [k for k in self.base_keys if "vision_encoder" in k] + llm_keys = [k for k in self.base_keys if "vision_encoder" not in k] if not self.cfg.mm_cfg.llm.freeze: keys_to_keep += llm_keys if not self.cfg.mm_cfg.vision_encoder.freeze: keys_to_keep += vision_encoder_keys - return {k: original_state_dict[k] for k in keys_to_keep if k in original_state_dict} + if self.megatron_amp_O2: + new_state_dict = { + k: original_state_dict[k.replace("model.", "model.module.", 1)] for k in keys_to_keep + } + else: + new_state_dict = {k: original_state_dict[k] for k in keys_to_keep} + return new_state_dict def load_state_dict(self, state_dict, strict=False): logging.warning('Loading state dict for MegatronNevaModel...') @@ -1013,10 +1033,13 @@ def load_state_dict(self, state_dict, strict=False): if len(missing_keys) > 0: logging.warning('Missing keys were detected during the load. Please double check.') logging.warning(f'Missing keys: \n{missing_keys}') + if len(missing_keys) > 10: + logging.warning(f'Missing keys: {", ".join(missing_keys[:10])} and {len(missing_keys) - 10} more.') + else: + logging.warning(f'Missing keys: {", ".join(missing_keys)}') if len(unexpected_keys) > 0: logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') - raise ValueError def on_load_checkpoint(self, checkpoint) -> None: pass @@ -1056,7 +1079,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int] return generate(self, **inference_config) def generate( - self, input_prompts, inference_config, length_params: LengthParam, sampling_params: SamplingParam = None, + self, input_prompts, inference_config, length_params: LengthParam, sampling_params: SamplingParam = None, ) -> OutputType: # check whether the DDP is initialized diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index 4f1556ef572b..4b853947d908 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -14,6 +14,7 @@ # limitations under the License. +import re import enum import logging from dataclasses import dataclass @@ -66,7 +67,7 @@ class AdapterName(str, enum.Enum): LORA_KQV_ADAPTER = "lora_kqv_adapter" LORA_KV_ADAPTER = "lora_kv_adapter" LORA_Q_ADAPTER = "lora_q_adapter" - MM_LINEAR_ADAPTER = "mm_linear_adapter" + MM_PROJECTOR_ADAPTER = "mm_projector_adapter" class InfusedAdapter(nn.Module, AdapterModuleUtil): @@ -571,18 +572,34 @@ class LoraKQVAdapterWeightTyingConfig(ParallelLinearAdapterWeightTyingConfig): _target_: str = "{0}.{1}".format(LoraKQVAdapterWeightTying.__module__, LoraKQVAdapterWeightTying.__name__) -class MMLinearAdapter(nn.Module, AdapterModuleUtil): - def __init__(self, in_features: int, out_features: int, bias: bool, **kwargs) -> None: +class MMProjectorAdapter(nn.Module, AdapterModuleUtil): + def __init__(self, adapter_type: str, in_features: int, out_features: int, bias: bool, **kwargs) -> None: super().__init__() - self.linear = torch.nn.Linear(in_features, out_features, bias,) + + if adapter_type == 'linear': + self.mm_projector = torch.nn.Linear(in_features, out_features, bias) + elif adapter_type == 'identity': + self.mm_projector = lambda x: x + else: + mlp_gelu_match = re.match(r'^mlp(\d+)x_gelu$', adapter_type) + if mlp_gelu_match: + mlp_depth = int(mlp_gelu_match.group(1)) + modules = [torch.nn.Linear(in_features, out_features, bias)] + for _ in range(1, mlp_depth): + modules.append(torch.nn.GELU()) + modules.append(torch.nn.Linear(out_features, out_features, bias)) + self.mm_projector = torch.nn.Sequential(*modules) + else: + raise ValueError(f'Unknown mm_mlp_adapter_type type: {adapter_type}') def forward(self, x): - return self.linear(x) + return self.mm_projector(x) @dataclass -class MMLinearAdapterConfig: +class MMProjectorAdapterConfig: + adapter_type: str in_features: int out_features: int bias: bool - _target_: str = "{0}.{1}".format(MMLinearAdapter.__module__, MMLinearAdapter.__name__) + _target_: str = "{0}.{1}".format(MMProjectorAdapter.__module__, MMProjectorAdapter.__name__) diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index fa9a631d848c..da91beebc8e8 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -382,6 +382,7 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_ def process_prompts(self, prompt): from nemo.collections.multimodal.data.neva.neva_dataset import ( DEFAULT_IMAGE_TOKEN, + preprocess_v1, preprocess_llama_2, preprocess_multimodal, preprocess_nvgpt, @@ -416,7 +417,7 @@ def process_prompts(self, prompt): 'conversations': [{'from': 'human', 'value': prompt,}, {'from': 'gpt', 'value': '',},], } - for turn in record['conversations']: # + for turn in record['conversations']: if turn.get('value') is not None: turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) list_data_dict.append(record) @@ -425,6 +426,20 @@ def process_prompts(self, prompt): copy.deepcopy(list_data_dict), self.multimodal_cfg, self.num_media_latents ) # HARDCODED FOR NOW data_dict = preprocess_llama_2(sources, self.tokenizer, self.multimodal_cfg) + elif self.multimodal_cfg["conv_template"] == "v1": + record = { + 'conversations': [{'from': 'human', 'value': prompt,}, {'from': 'gpt', 'value': '',},], + } + + for turn in record['conversations']: + if turn.get('value') is not None: + turn['value'] = re.sub('', f'{DEFAULT_IMAGE_TOKEN}\n', turn['value']) + list_data_dict.append(record) + + sources = preprocess_multimodal( + copy.deepcopy(list_data_dict), self.multimodal_cfg, self.num_media_latents + ) # HARDCODED FOR NOW + data_dict = preprocess_v1(sources, self.tokenizer, self.multimodal_cfg) else: raise ValueError(f"Conversation template `{self.conv_template}` is not supported in Neva now.") return data_dict['tokens'].tolist() diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py index 120121c33f73..a79ac4dfcbc9 100644 --- a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -19,6 +19,7 @@ import torch from omegaconf import DictConfig, OmegaConf, open_dict +from nemo.collections.nlp.models.nlp_model import NLPModel from nemo.collections.nlp.parts.mixins.nlp_adapter_mixins import NLPAdapterModelMixin from nemo.collections.nlp.parts.peft_config import ( PEFT_CONFIG_MAP, @@ -81,6 +82,65 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]): self.tie_weights(cfg) self.use_peft = True + def load_adapters( + self, filepath: str, peft_cfgs: Optional[Union[PEFTConfig, List[PEFTConfig]]] = None, map_location: str = None, + ): + """ + Utility method that restores only the adapter module(s), and not the entire model itself. + This allows the sharing of adapters which are often just a fraction of the size of the full model, + enabling easier deliver. + + .. note:: + + During restoration, assumes that the model does not currently already have one or more adapter modules. + + Args: + filepath: Filepath of the .ckpt or .nemo file. + peft_cfgs: One or more PEFTConfig objects that specify the PEFT method configuration. + If none, will infer from the .nemo checkpoint + map_location: Pytorch flag, where to place the adapter(s) state dict(s). + """ + + # Determine device + if map_location is None: + if torch.cuda.is_available(): + map_location = 'cuda' + else: + map_location = 'cpu' + + if filepath.endswith('.nemo'): + conf, state_dict = self._get_config_and_state_dict_from_nemo(filepath, map_location) + elif filepath.endswith('.ckpt'): + state_dict = torch.load(filepath, map_location)['state_dict'] + else: + raise RuntimeError(f"{filepath} is not nemo file or ckpt file") + if not peft_cfgs: + assert filepath.endswith( + '.nemo' + ), "Inferring peft scheme is only supported for .nemo checkpoints. Please supply the `peft_cfgs` argument." + peft_cfgs = [PEFT_CONFIG_MAP[conf.peft.peft_scheme](conf)] + self.add_adapter(peft_cfgs) + assert set(state_dict.keys()) == self.adapter_keys + + if self.megatron_amp_O2: + state_dict = { + k.replace("model.", "model.module.", 1): v for k, v in state_dict.items() + } + + missing_keys, unexpected_keys = NLPModel.load_state_dict(self, state_dict, strict=False) + + if len(missing_keys) > 0: + logging.warning('Missing keys were detected during the load. Please double check.') + logging.warning(f'Missing keys: \n{missing_keys}') + if len(missing_keys) > 10: + logging.warning(f'Missing keys: {", ".join(missing_keys[:10])} and {len(missing_keys) - 10} more.') + else: + logging.warning(f'Missing keys: {", ".join(missing_keys)}') + if len(unexpected_keys) > 0: + logging.critical('Unexpected keys were detected during the load. Please double check.') + logging.critical(f'Unexpected keys: \n{unexpected_keys}') + raise ValueError('Unexpected keys were detected during the load. Please double check.') + def _check_and_add_adapter( self, name, module, peft_name, peft_cfg, name_key_to_mcore_mixins=None, autocast_dtype=None ): diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index f61fb2cd654d..b8ae38fc5382 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -589,6 +589,12 @@ def modify_state_dict(self, conf, state_dict): new_state_dict[new_key] = state_dict[key] state_dict = new_state_dict + new_state_dict = {} + for key in state_dict.keys(): + new_key = key.replace('word_embeddings.adapter_layer.mm_linear_adapter.linear', 'word_embeddings.adapter_layer.mm_projector_adapter.mm_projector', 1) + new_state_dict[new_key] = state_dict[key] + state_dict = new_state_dict + # compatibility for inductor in inference if not conf.get('inductor', False): new_state_dict = {} From 5965a5f9001cde0993481983a7005910a589c722 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Mon, 30 Oct 2023 16:21:13 -0700 Subject: [PATCH 360/512] Fix logs --- nemo/collections/multimodal/models/neva/neva_model.py | 5 ++--- .../nlp/parts/mixins/multimodal_adapter_mixins.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index a5920101259d..e6c1bf2815a9 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -1032,11 +1032,10 @@ def load_state_dict(self, state_dict, strict=False): if len(missing_keys) > 0: logging.warning('Missing keys were detected during the load. Please double check.') - logging.warning(f'Missing keys: \n{missing_keys}') if len(missing_keys) > 10: - logging.warning(f'Missing keys: {", ".join(missing_keys[:10])} and {len(missing_keys) - 10} more.') + logging.warning(f'Missing keys: {missing_keys[:10]} and {len(missing_keys) - 10} more.') else: - logging.warning(f'Missing keys: {", ".join(missing_keys)}') + logging.warning(f'Missing keys: {missing_keys}') if len(unexpected_keys) > 0: logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') diff --git a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py index a79ac4dfcbc9..9b14ed4e8212 100644 --- a/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/multimodal_adapter_mixins.py @@ -131,11 +131,10 @@ def load_adapters( if len(missing_keys) > 0: logging.warning('Missing keys were detected during the load. Please double check.') - logging.warning(f'Missing keys: \n{missing_keys}') if len(missing_keys) > 10: - logging.warning(f'Missing keys: {", ".join(missing_keys[:10])} and {len(missing_keys) - 10} more.') + logging.warning(f'Missing keys: {missing_keys[:10]} and {len(missing_keys) - 10} more.') else: - logging.warning(f'Missing keys: {", ".join(missing_keys)}') + logging.warning(f'Missing keys: {missing_keys}') if len(unexpected_keys) > 0: logging.critical('Unexpected keys were detected during the load. Please double check.') logging.critical(f'Unexpected keys: \n{unexpected_keys}') From 26ee7dc897b557e054f18d3abcc5bd68c6a9c053 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 31 Oct 2023 10:42:58 -0700 Subject: [PATCH 361/512] Fix neva mcore infernece --- nemo/collections/multimodal/models/neva/neva_model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index e6c1bf2815a9..824586390704 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -818,6 +818,14 @@ def fwd_output_only_func(dataloader_iter, model): forward_args['checkpoint_activations_all_layers'] = None output_tensor = model(**forward_args, **extra_arg) + # Advance inference sequence offset. + if self.inference_params: + # if last stage, then (final) output is [b, s, h], otherwise it's [s, b, h] + if parallel_state.is_pipeline_last_stage(): + self.inference_params.sequence_len_offset += output_tensor.size(1) + else: + self.inference_params.sequence_len_offset += output_tensor.size(0) + def id_func(output_tensor): return output_tensor, {'logits': output_tensor} From 7356b1c107cfdd02d31a3e1b12aa559359e7a293 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 31 Oct 2023 14:33:15 -0700 Subject: [PATCH 362/512] Fix ema --- nemo/collections/common/callbacks/ema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/collections/common/callbacks/ema.py b/nemo/collections/common/callbacks/ema.py index 55243c542866..2f295bf67354 100644 --- a/nemo/collections/common/callbacks/ema.py +++ b/nemo/collections/common/callbacks/ema.py @@ -244,7 +244,10 @@ def step(self, closure=None, grad_scaler=None, **kwargs): ) self.rebuild_ema_params = False - loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler) + if getattr(self.optimizer, "_step_supports_amp_scaling", False) and grad_scaler is not None: + loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler) + else: + loss = self.optimizer.step(closure) if self._should_update_at_step(): self.update() From 93e4f992265feba98b7f2e49890327f4d2a18f27 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 31 Oct 2023 14:33:15 -0700 Subject: [PATCH 363/512] Fix ema --- nemo/collections/common/callbacks/ema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/collections/common/callbacks/ema.py b/nemo/collections/common/callbacks/ema.py index 55243c542866..2f295bf67354 100644 --- a/nemo/collections/common/callbacks/ema.py +++ b/nemo/collections/common/callbacks/ema.py @@ -244,7 +244,10 @@ def step(self, closure=None, grad_scaler=None, **kwargs): ) self.rebuild_ema_params = False - loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler) + if getattr(self.optimizer, "_step_supports_amp_scaling", False) and grad_scaler is not None: + loss = self.optimizer.step(closure=closure, grad_scaler=grad_scaler) + else: + loss = self.optimizer.step(closure) if self._should_update_at_step(): self.update() From ca3d8f9e0a22409d7ca27c4548566bb5cad9322b Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 31 Oct 2023 17:08:22 -0700 Subject: [PATCH 364/512] Address Somshubra comments --- .../megatron_gpt_peft_models.py | 2 -- .../megatron/adapters/parallel_adapters.py | 6 ++-- nemo/core/optim/__init__.py | 1 + nemo/core/optim/megatron_fused_adam.py | 29 ++++++++++++++----- nemo/core/optim/optimizers.py | 2 +- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py index 281c451a1e7a..f985f99218e8 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py @@ -63,8 +63,6 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self.freeze() self.init_peft_modules() self.adapter_keys = self.get_all_keys() - self.base_keys - if self.megatron_amp_O2: - self.adapter_keys = set(key.replace("model.module.", "model.", 1) for key in self.adapter_keys) def first_stage_of_pipeline(self): if hasattr(self, "model") and hasattr(self.model, "pre_process"): diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index 4f1556ef572b..8c4fe405192d 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -571,7 +571,7 @@ class LoraKQVAdapterWeightTyingConfig(ParallelLinearAdapterWeightTyingConfig): _target_: str = "{0}.{1}".format(LoraKQVAdapterWeightTying.__module__, LoraKQVAdapterWeightTying.__name__) -class MMLinearAdapter(nn.Module, AdapterModuleUtil): +class MultiModalLinearAdapter(nn.Module, AdapterModuleUtil): def __init__(self, in_features: int, out_features: int, bias: bool, **kwargs) -> None: super().__init__() self.linear = torch.nn.Linear(in_features, out_features, bias,) @@ -581,8 +581,8 @@ def forward(self, x): @dataclass -class MMLinearAdapterConfig: +class MultiModalLinearAdapterConfig: in_features: int out_features: int bias: bool - _target_: str = "{0}.{1}".format(MMLinearAdapter.__module__, MMLinearAdapter.__name__) + _target_: str = "{0}.{1}".format(MultiModalLinearAdapter.__module__, MultiModalLinearAdapter.__name__) diff --git a/nemo/core/optim/__init__.py b/nemo/core/optim/__init__.py index 825aab446e94..79c4a8dc58ca 100644 --- a/nemo/core/optim/__init__.py +++ b/nemo/core/optim/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from nemo.core.optim.adafactor import Adafactor +from nemo.core.optim.adan import Adan from nemo.core.optim.lr_scheduler import ( CosineAnnealing, InverseSquareRootAnnealing, diff --git a/nemo/core/optim/megatron_fused_adam.py b/nemo/core/optim/megatron_fused_adam.py index 87439c1d80d7..909c87f9727d 100755 --- a/nemo/core/optim/megatron_fused_adam.py +++ b/nemo/core/optim/megatron_fused_adam.py @@ -14,8 +14,28 @@ import amp_C import torch -from apex.multi_tensor_apply import multi_tensor_applier -from apex.optimizers import FusedAdam +from nemo.collections.nlp.modules.common.megatron.module import param_is_not_shared + +try: + from megatron.core import parallel_state + from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + HAVE_MEGATRON_CORE = False + +try: + from apex.multi_tensor_apply import multi_tensor_applier + from apex.optimizers import FusedAdam + + HAVE_APEX = True + +except ModuleNotFoundError: + HAVE_APEX = False + + class MegatronFusedAdam(FusedAdam): @@ -34,11 +54,6 @@ def __init__(self, *args, max_norm=0, norm_type=2, **kwargs): self.norm_type = float(norm_type) def step(self, closure=None, grad_scaler=None): - from megatron.core import parallel_state - from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate - - from nemo.collections.nlp.modules.common.megatron.module import param_is_not_shared - # Code path below assumes capturable=True and master_weights=True if not (self.capturable and self.master_weights): return super().step(closure=closure, grad_scaler=grad_scaler) diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index 69bcb46641a2..1d52a9bf10f8 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -72,7 +72,7 @@ AVAILABLE_OPTIMIZERS['megatron_fused_adam'] = MegatronFusedAdam except (ImportError, ModuleNotFoundError): - logging.warning("Could not import MegatronFusedAdam optimizer") + pass __all__ = ['get_optimizer', 'register_optimizer', 'parse_optimizer_args'] From 544e5ea378aa0e750f895511eb8ca416d4dd7f44 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 00:09:23 +0000 Subject: [PATCH 365/512] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- nemo/core/optim/megatron_fused_adam.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nemo/core/optim/megatron_fused_adam.py b/nemo/core/optim/megatron_fused_adam.py index 909c87f9727d..9278f0a134ef 100755 --- a/nemo/core/optim/megatron_fused_adam.py +++ b/nemo/core/optim/megatron_fused_adam.py @@ -36,8 +36,6 @@ HAVE_APEX = False - - class MegatronFusedAdam(FusedAdam): """Wrapper class that supports NeMo-Megatron optimizations From 8493a8aed10f364e0f4edb5e7cf9884923812780 Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Tue, 31 Oct 2023 17:18:29 -0700 Subject: [PATCH 366/512] Fix NeVA --- .../mllm/neva/conf/llava_config.yaml | 1 - .../mllm/neva/convert_hf_llava_to_neva.py | 6 +- .../multimodal/models/neva/neva_model.py | 201 ++++++------------ 3 files changed, 72 insertions(+), 136 deletions(-) diff --git a/examples/multimodal/mllm/neva/conf/llava_config.yaml b/examples/multimodal/mllm/neva/conf/llava_config.yaml index 06f8d0d4634d..6f41f357e09e 100644 --- a/examples/multimodal/mllm/neva/conf/llava_config.yaml +++ b/examples/multimodal/mllm/neva/conf/llava_config.yaml @@ -121,7 +121,6 @@ model: batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. - override_vocab_size: 32000 ## Activation Checkpointing activations_checkpoint_granularity: null # 'selective' or 'full' diff --git a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py index 44cad58e435b..0ea8bef5e54b 100644 --- a/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py +++ b/examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py @@ -68,7 +68,11 @@ def load_model(cls, checkpoint, strict, **kwargs): model = cls(cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs) for name, module in model.named_parameters(): if name in checkpoint['state_dict']: - module.data = checkpoint['state_dict'][name] + if module.data.shape != checkpoint['state_dict'][name].shape: + print(f"WARNING: Auto padding {name} from {checkpoint['state_dict'][name].shape} to {module.data.shape}") + module.data[:checkpoint['state_dict'][name].size(0), :checkpoint['state_dict'][name].size(1)] = checkpoint['state_dict'][name] + else: + module.data = checkpoint['state_dict'][name] checkpoint['state_dict'].pop(name) else: print(f"Unexpected key: {name} not in checkpoint but in model.") diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index 824586390704..c33265d68fdb 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -24,6 +24,8 @@ import numpy as np import pandas as pd import torch +import torch.nn.functional as F + from einops import rearrange, repeat from omegaconf.dictconfig import DictConfig from omegaconf.omegaconf import OmegaConf, open_dict @@ -269,32 +271,22 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media): return updated_input_embeds - -class MCoreNevaModel(MCoreGPTModel): +class NevaBaseModel: def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, ): - super(MCoreNevaModel, self).__init__(**kwargs, ) - self.mm_cfg = mm_cfg self.media_start_id = media_start_id self.media_end_id = media_end_id self.dist_ckpt = False + if getattr(self, 'language_model', None) is not None: + self.embedding = self.language_model.embedding if mm_cfg.llm.from_pretrained is not None: logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") self.load_llm_weights(mm_cfg.llm.from_pretrained) - if mm_cfg.llm.freeze: - for param in chain( - self.embedding.parameters(), - self.decoder.parameters(), - self.output_layer.parameters(), - ): - param.requires_grad = False - self.embedding = self.embedding.eval() - self.decoder = self.decoder.eval() - self.output_layer = self.output_layer.eval() + self.freeze_llm(mm_cfg) # Initialize vision encoder and freeze it if mm_cfg.vision_encoder.from_hf: @@ -329,12 +321,8 @@ def __init__( llama_tricks=(model_type == "llama_2" or model_type == "v1"), ) - def forward( - self, *args, **kwargs, - ): - media = kwargs.pop('media', None) - self.embedding.word_embeddings.set_media(media) - return super().forward(*args, **kwargs) + def freeze_llm(self, mm_cfg): + raise NotImplementedError def _load_model_weights(self, nemo_path): """ @@ -406,6 +394,20 @@ def load_llm_weights(self, nemo_path): new_state_dict = {} if self.dist_ckpt: + if state_dict['model.embedding.word_embeddings.weight'].shape[0] < self.embedding.word_embeddings.num_embeddings: + assert state_dict['model.embedding.word_embeddings.weight'].shape == \ + state_dict['model.output_layer.weight'].shape + assert self.embedding.word_embeddings.num_embeddings == \ + self.embedding.word_embeddings.num_embeddings_per_partition, \ + "Word embedding doesn't match the word embedding shape from checkpoint!" + + pad_length = self.embedding.word_embeddings.num_embeddings - \ + state_dict['model.embedding.word_embeddings.weight'].shape[0] + state_dict['model.embedding.word_embeddings.weight'] = F.pad( + state_dict['model.embedding.word_embeddings.weight'], (0, 0, 0, pad_length)) + state_dict['model.output_layer.weight'] = F.pad( + state_dict['model.output_layer.weight'], (0, 0, 0, pad_length)) + for k, v in state_dict.items(): new_k = k if k.startswith("model."): @@ -413,6 +415,20 @@ def load_llm_weights(self, nemo_path): new_state_dict[new_k] = v self.load_state_dict(new_state_dict, strict=True) else: + if state_dict['model.language_model.embedding.word_embeddings.weight'].shape[0] < self.embedding.word_embeddings.num_embeddings: + assert state_dict['model.language_model.embedding.word_embeddings.weight'].shape == \ + state_dict['model.language_model.output_layer.weight'].shape + assert self.embedding.word_embeddings.num_embeddings == \ + self.embedding.word_embeddings.num_embeddings_per_partition, \ + "Word embedding doesn't match the word embedding shape from checkpoint!" + + pad_length = self.embedding.word_embeddings.num_embeddings - \ + state_dict['model.language_model.embedding.word_embeddings.weight'].shape[0] + state_dict['model.language_model.embedding.word_embeddings.weight'] = F.pad( + state_dict['model.language_model.embedding.word_embeddings.weight'], (0, 0, 0, pad_length)) + state_dict['model.language_model.output_layer.weight'] = F.pad( + state_dict['model.language_model.output_layer.weight'], (0, 0, 0, pad_length)) + for k, v in state_dict.items(): if k.startswith("model.language_model."): new_k = k.replace("model.language_model.", "", 1) @@ -423,131 +439,48 @@ def load_llm_weights(self, nemo_path): self.language_model.load_state_dict(new_state_dict, strict=True) print(f"Restored LLM weights from {nemo_path}.") - -class NevaModel(GPTModel): +class MCoreNevaModel(MCoreGPTModel, NevaBaseModel): def __init__( self, mm_cfg, media_start_id, media_end_id, **kwargs, ): - super(NevaModel, self).__init__(**kwargs, ) - - self.mm_cfg = mm_cfg - self.media_start_id = media_start_id - self.media_end_id = media_end_id - - if mm_cfg.llm.from_pretrained is not None: - logging.info(f"Loading LLM weights from checkpoint {mm_cfg.llm.from_pretrained}") - self.load_llm_weights(self.language_model, mm_cfg.llm.from_pretrained) - if mm_cfg.llm.freeze: - for param in self.language_model.parameters(): - param.requires_grad = False - self.language_model = self.language_model.eval() - - # Initialize vision encoder and freeze it - if mm_cfg.vision_encoder.from_hf: - vision_encoder = CLIPVisionModel.from_pretrained( - mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16, - ).cuda() - vision_encoder = vision_encoder.to(torch.bfloat16) - if mm_cfg.vision_encoder.freeze: - for param in vision_encoder.parameters(): - param.requires_grad = False - vision_encoder = vision_encoder.eval() - else: - vision_cfg = MegatronCLIPModel.restore_from( - mm_cfg.vision_encoder.from_pretrained, return_config=True - ).vision - vision_encoder = FrozenCLIPVisionTransformer(vision_cfg, self.config) - self.load_vision_encoder_weights(vision_encoder, mm_cfg.vision_encoder.from_pretrained) - if mm_cfg.vision_encoder.freeze: - vision_encoder.freeze() - - model_type = self.mm_cfg.llm.get("model_type", "nvgpt") - # Monkey patch embedding - if kwargs.get("pre_process", True): - extend_instance(self.language_model.embedding.word_embeddings, NevaWordEmbeddingMixin) - self.language_model.embedding.word_embeddings.init_vision( - vision_encoder, - media_start_id, - media_end_id, - vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), - class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), - use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2" or model_type == "v1"), - ) + MCoreGPTModel.__init__(self, **kwargs) + NevaBaseModel.__init__(self, mm_cfg, media_start_id, media_end_id, **kwargs) + + def freeze_llm(self, mm_cfg): + for param in chain( + self.embedding.parameters(), + self.decoder.parameters(), + self.output_layer.parameters(), + ): + param.requires_grad = False + self.embedding = self.embedding.eval() + self.decoder = self.decoder.eval() + self.output_layer = self.output_layer.eval() def forward( self, *args, **kwargs, ): media = kwargs.pop('media', None) - self.language_model.embedding.word_embeddings.set_media(media) - return super().forward(*args, **kwargs) - - def _load_model_weights(self, nemo_path): - """ - Shared method to load model weights from a given nemo_path. - """ - if torch.cuda.is_available(): - map_location = torch.device('cuda') - else: - map_location = torch.device('cpu') - - save_restore_connector = NLPSaveRestoreConnector() - cwd = os.getcwd() - app_state = AppState() - - with tempfile.TemporaryDirectory() as tmpdir: - try: - if os.path.isfile(nemo_path): - save_restore_connector._unpack_nemo_file(path2file=nemo_path, out_folder=tmpdir) - else: - tmpdir = nemo_path - os.chdir(tmpdir) - if app_state.model_parallel_size is not None and app_state.model_parallel_size > 1: - model_weights = save_restore_connector._inject_model_parallel_rank_for_ckpt( - tmpdir, save_restore_connector.model_weights_ckpt - ) - else: - model_weights = os.path.join(tmpdir, save_restore_connector.model_weights_ckpt) - - state_dict = save_restore_connector._load_state_dict_from_disk( - model_weights, map_location=map_location - ) - finally: - os.chdir(cwd) - - return state_dict - - def load_vision_encoder_weights(self, vision_encoder, nemo_path): - state_dict = self._load_model_weights(nemo_path) - - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.vision_encoder."): - new_k = k.replace("model.vision_encoder.", "") - new_state_dict[new_k] = v - - missing, unexpected = vision_encoder.load_state_dict(new_state_dict, strict=False) - print(f"Restored from {nemo_path} with {len(missing)} missing and {len(unexpected)} unexpected keys") - if len(missing) > 0: - print(f"Missing Keys: {missing}") - if len(unexpected) > 0: - print(f"Unexpected Keys: {unexpected}") + self.embedding.word_embeddings.set_media(media) + return MCoreNevaModel.forward(self, *args, **kwargs) - def load_llm_weights(self, language_model, nemo_path): - state_dict = self._load_model_weights(nemo_path) +class NevaModel(GPTModel, NevaBaseModel): + def __init__( + self, mm_cfg, media_start_id, media_end_id, **kwargs, + ): + GPTModel.__init__(self, **kwargs) + NevaBaseModel.__init__(self, mm_cfg, media_start_id, media_end_id, **kwargs) - new_state_dict = {} - for k, v in state_dict.items(): - if k.startswith("model.language_model."): - new_k = k.replace("model.language_model.", "", 1) - module_key, param_key = new_k.split(".", 1) - if module_key not in new_state_dict: - new_state_dict[module_key] = {} - new_state_dict[module_key][param_key] = v - - language_model.load_state_dict(new_state_dict, strict=True) - print(f"Restored LLM weights from {nemo_path}.") + def freeze_llm(self, mm_cfg): + for param in self.language_model.parameters(): + param.requires_grad = False + def forward( + self, *args, **kwargs, + ): + media = kwargs.pop('media', None) + self.embedding.word_embeddings.set_media(media) + return GPTModel.forward(self, *args, **kwargs) class MegatronNevaModel(MultimodalAdapterModelMixin, MegatronGPTModel): """ From ea3d4fc4cec95c0c544b4889e65e9cb58d19a58e Mon Sep 17 00:00:00 2001 From: yaoyu-33 Date: Wed, 1 Nov 2023 09:45:41 -0700 Subject: [PATCH 367/512] Remove llama tricks since we are padding the embedding weights directly now --- .../multimodal/models/neva/neva_model.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/nemo/collections/multimodal/models/neva/neva_model.py b/nemo/collections/multimodal/models/neva/neva_model.py index c33265d68fdb..5b9cf50d6be7 100644 --- a/nemo/collections/multimodal/models/neva/neva_model.py +++ b/nemo/collections/multimodal/models/neva/neva_model.py @@ -158,7 +158,6 @@ def init_vision( vision_select_layer=-1, class_token_length=1, use_im_start_end=False, - llama_tricks=False, ): self.vision_encoder = vision_encoder self.from_hf = isinstance(vision_encoder, CLIPVisionModel) @@ -169,23 +168,13 @@ def init_vision( self.vision_select_layer = vision_select_layer self.media = None self.set_accepted_adapter_types([MMProjectorAdapterConfig._target_]) - self.llama_tricks = llama_tricks def set_media(self, media): self.media = media def forward(self, input_ids, **kwargs): media = self.media # avoid change the signature of embedding forward function - if self.llama_tricks and not self.use_im_start_end: - masked_input_ids = input_ids.detach().clone() - if self.num_embeddings < 32000: - raise ValueError("Not supported tokenizer with llama 2!") - else: - masked_input_ids[masked_input_ids >= 32000] = 0 - words_embeddings = super().forward(masked_input_ids, **kwargs) - - else: - words_embeddings = super().forward(input_ids, **kwargs) + words_embeddings = super().forward(input_ids, **kwargs) return self.replace_media_embeddings(input_ids, words_embeddings, media) @@ -307,7 +296,6 @@ def __init__( if mm_cfg.vision_encoder.freeze: vision_encoder.freeze() - model_type = self.mm_cfg.llm.get("model_type", "nvgpt") # Monkey patch embedding if kwargs.get("pre_process", True): extend_instance(self.embedding.word_embeddings, NevaWordEmbeddingMixin) @@ -318,7 +306,6 @@ def __init__( vision_select_layer=mm_cfg.vision_encoder.get("vision_select_layer", -2), class_token_length=mm_cfg.vision_encoder.get("class_token_length", 1), use_im_start_end=mm_cfg.get("use_im_start_end", False), - llama_tricks=(model_type == "llama_2" or model_type == "v1"), ) def freeze_llm(self, mm_cfg): From 6f5df3fb8f4ddbbcc4be2d770b1c574c92c512cb Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Wed, 1 Nov 2023 11:29:17 -0700 Subject: [PATCH 368/512] Update Dockerfile and mm requirements --- Dockerfile | 23 +++++++++++++++++---- requirements/requirements_multimodal.txt | 26 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 requirements/requirements_multimodal.txt diff --git a/Dockerfile b/Dockerfile index 06f96a091a22..d2ae195124f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,8 +54,8 @@ WORKDIR /tmp/ # Distributed Adam support for multiple dtypes RUN git clone https://github.com/NVIDIA/apex.git && \ cd apex && \ - git checkout 52e18c894223800cb611682dce27d88050edf1de && \ - pip3 install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ + git checkout 9fc94b7d6db1b178adf9a6e92750f070dd9f825d && \ + pip install install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam --group_norm" ./ # uninstall stuff from base container RUN pip3 uninstall -y sacrebleu torchtext @@ -79,11 +79,26 @@ RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-chec # install flash attention dependencies RUN pip install flash-attn -# pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3 -RUN pip install triton==2.0.0.dev20221202 # install numba for latest containers RUN pip install numba>=0.57.1 +RUN pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers --src /opt + +RUN cd /tmp && \ + git clone https://github.com/ashawkey/stable-dreamfusion.git && \ + cd stable-dreamfusion && \ + git checkout 5550b91862a3af7842bb04875b7f1211e5095a63 && \ + find . -type f -name 'setup.py' -exec sed -i 's/c++14/c++17/g' {} + && \ + pip install --no-cache-dir ./raymarching && \ + pip install --no-cache-dir ./shencoder && \ + pip install --no-cache-dir ./freqencoder && \ + TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 9.0+PTX" pip install --no-cache-dir ./gridencoder && \ + cd /opt && \ + rm -rf /tmp/stable-dreamfusion + +RUN TCNN_CUDA_ARCHITECTURES="52,60,61,70,75,80,86,89,90" pip install --no-cache-dir \ + git+https://github.com/NVlabs/tiny-cuda-nn@6f018a9cd1b369bcb247e1d539968db8e48b2b3f#subdirectory=bindings/torch + # install k2, skip if installation fails COPY scripts /tmp/nemo/scripts/ RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/speech_recognition/k2/setup.sh); INSTALL_CODE=$?; \ diff --git a/requirements/requirements_multimodal.txt b/requirements/requirements_multimodal.txt new file mode 100644 index 000000000000..4e3d32337b67 --- /dev/null +++ b/requirements/requirements_multimodal.txt @@ -0,0 +1,26 @@ +Pillow==9.3.0 +kornia +jax[cpu] +accelerate +seaborn +prefetch_generator +diffusers==0.19.3 +torchdiffeq +torchsde +addict +yapf +basicsr +open_clip_torch +torch-ema +trimesh +PyMCubes +xatlas +imageio-ffmpeg +pymeshlab +nerfacc==0.5.3 +einops_exts +opencv-python==4.8.0.74 +git+https://github.com/openai/CLIP.git@main#egg=clip +git+https://github.com/NVlabs/nvdiffrast.git@c5caf7bdb8a2448acc491a9faa47753972edd380 +-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers +# webdataset==0.2.48 conflict with ASR models, manually pip install when using multimodal From 4dff83fea6ab755d7dc0263f35aacbe12a37c5b7 Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Wed, 1 Nov 2023 14:03:37 -0700 Subject: [PATCH 369/512] Multimodal unit and jenkins tests --- Jenkinsfile | 253 +++++++++++- .../controlnet/conf/controlnet_v1-5.yaml | 2 + .../stable_diffusion/conf/sd_train.yaml | 2 +- .../data/controlnet/controlnet_dataset.py | 54 ++- .../stable_diffusion_dataset.py | 1 + .../collections/multimodal/test_clip_model.py | 32 +- .../collections/multimodal/test_controlnet.py | 355 +++++++++++++++++ .../collections/multimodal/test_dreambooth.py | 326 ++++++++++++++++ .../multimodal/test_dreamfusion_model.py | 283 ++++++++++++++ .../multimodal/test_imagen_model.py | 255 ++++++++++++ .../multimodal/test_insp2p_model.py | 306 +++++++++++++++ .../collections/multimodal/test_neva_model.py | 366 ++++++++++++++++++ tests/collections/multimodal/test_sd_model.py | 319 +++++++++++++++ tests/collections/vision/test_vit_model.py | 21 +- 14 files changed, 2543 insertions(+), 32 deletions(-) create mode 100644 tests/collections/multimodal/test_controlnet.py create mode 100644 tests/collections/multimodal/test_dreambooth.py create mode 100644 tests/collections/multimodal/test_dreamfusion_model.py create mode 100644 tests/collections/multimodal/test_imagen_model.py create mode 100644 tests/collections/multimodal/test_insp2p_model.py create mode 100644 tests/collections/multimodal/test_neva_model.py create mode 100644 tests/collections/multimodal/test_sd_model.py diff --git a/Jenkinsfile b/Jenkinsfile index cff8c0f03924..db7ba305373d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4688,6 +4688,237 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } } + stage('L2: Multimodal Imagen Train') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/imagen_train" + sh "python examples/multimodal/generative/imagen/imagen_training.py \ + trainer.precision=16 \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=4 \ + model.global_batch_size=4 \ + model.data.synthetic_data=True \ + exp_manager.exp_dir=/home/TestData/multimodal/imagen_train \ + model.inductor=False + " + sh "rm -rf /home/TestData/multimodal/imagen_train" + } + } + + stage('L2: Multimodal Stable Diffusion Train') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/stable_diffusion_train" + sh "python examples/multimodal/generative/stable_diffusion/sd_train.py \ + trainer.precision=16 \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=1 \ + model.global_batch_size=1 \ + model.data.synthetic_data=True \ + exp_manager.exp_dir=/home/TestData/multimodal/stable_diffusion_train \ + model.inductor=False \ + model.cond_stage_config._target_=nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder \ + ++model.cond_stage_config.version=openai/clip-vit-large-patch14 \ + ++model.cond_stage_config.max_length=77 \ + ~model.cond_stage_config.restore_from_path \ + ~model.cond_stage_config.freeze \ + ~model.cond_stage_config.layer \ + model.unet_config.from_pretrained=null \ + model.first_stage_config.from_pretrained=null + " + sh "rm -rf /home/TestData/multimodal/stable_diffusion_train" + } + } + stage('L2: Multimodal ControlNet Train') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/controlnet_train" + sh "python examples/multimodal/generative/controlnet/controlnet_train.py \ + trainer.precision=16 \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=1 \ + model.global_batch_size=1 \ + model.data.synthetic_data=True \ + exp_manager.exp_dir=/home/TestData/multimodal/controlnet_train \ + model.inductor=False \ + model.image_logger.max_images=0 \ + model.control_stage_config.params.from_pretrained_unet=null \ + model.unet_config.from_pretrained=null \ + model.first_stage_config.from_pretrained=null + " + sh "rm -rf /home/TestData/multimodal/controlnet_train" + } + } + stage('L2: Multimodal DreamBooth Train') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/dreambooth_train" + sh "python examples/multimodal/generative/dreambooth/dreambooth.py \ + trainer.precision=16 \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=1 \ + model.global_batch_size=1 \ + exp_manager.exp_dir=/home/TestData/multimodal/dreambooth_train \ + model.inductor=False \ + model.cond_stage_config._target_=nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder \ + ++model.cond_stage_config.version=openai/clip-vit-large-patch14 \ + ++model.cond_stage_config.max_length=77 \ + ~model.cond_stage_config.restore_from_path \ + ~model.cond_stage_config.freeze \ + ~model.cond_stage_config.layer \ + model.unet_config.from_pretrained=null \ + model.first_stage_config.from_pretrained=null \ + model.data.instance_dir=/home/TestData/multimodal/tiny-dreambooth + " + sh "rm -rf /home/TestData/multimodal/dreambooth_train" + } + } + stage('L2: Vision ViT Pretrain TP=1') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/vision/vit_pretrain_tp1" + sh "python examples/vision/vision_transformer/megatron_vit_classification_pretrain.py \ + trainer.precision=16 \ + model.megatron_amp_O2=False \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + trainer.val_check_interval=5 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=2 \ + model.global_batch_size=4 \ + model.tensor_model_parallel_size=1 \ + model.pipeline_model_parallel_size=1 \ + model.data.num_workers=0 \ + exp_manager.create_checkpoint_callback=False \ + model.data.data_path=[/home/TestData/multimodal/tiny-imagenet/train,/home/TestData/multimodal/tiny-imagenet/val] \ + exp_manager.exp_dir=/home/TestData/vision/vit_pretrain_tp1 " + sh "rm -rf /home/TestData/vision/vit_pretrain_tp1" + } + } + + stage('L2: Multimodal CLIP Pretrain TP=1') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/clip_pretrain_tp1" + sh "python examples/multimodal/foundation/clip/megatron_clip_pretrain.py \ + trainer.precision=16 \ + model.megatron_amp_O2=False \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + trainer.val_check_interval=10 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=4 \ + model.global_batch_size=4 \ + model.tensor_model_parallel_size=1 \ + model.pipeline_model_parallel_size=1 \ + exp_manager.create_checkpoint_callback=False \ + model.data.num_workers=0 \ + model.vision.num_layers=2 \ + model.text.num_layers=2 \ + model.vision.patch_dim=32 \ + model.vision.encoder_seq_length=49 \ + model.vision.class_token_length=7 \ + model.data.train.dataset_path=[/home/TestData/multimodal/tiny-clip/00000.tar] \ + model.data.validation.dataset_path=[/home/TestData/multimodal/tiny-clip/00000.tar] \ + model.data.webdataset.local_root_path=/ \ + exp_manager.exp_dir=/home/TestData/multimodal/clip_pretrain_tp1 " + sh "rm -rf /home/TestData/multimodal/clip_pretrain_tp1" + } + } + + stage('L2: Multimodal NeVA Pretrain TP=1') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + steps { + sh "rm -rf /home/TestData/multimodal/neva_pretrain_tp1" + sh "python examples/multimodal/mllm/neva/neva_pretrain.py \ + trainer.precision=bf16 \ + model.megatron_amp_O2=False \ + trainer.num_nodes=1 \ + trainer.devices=1 \ + trainer.val_check_interval=10 \ + trainer.limit_val_batches=5 \ + trainer.log_every_n_steps=1 \ + ++exp_manager.max_time_per_run=00:00:03:00 \ + trainer.max_steps=20 \ + model.micro_batch_size=2 \ + model.global_batch_size=4 \ + model.tensor_model_parallel_size=1 \ + model.pipeline_model_parallel_size=1 \ + exp_manager.create_checkpoint_callback=False \ + model.data.data_path=/home/TestData/multimodal/tiny-neva/dummy.json \ + model.data.image_folder=/home/TestData/multimodal/tiny-neva/images \ + model.tokenizer.library=sentencepiece \ + model.tokenizer.model=/home/TestData/multimodal/tiny-neva/tokenizer_add_special.model \ + model.num_layers=2 \ + model.hidden_size=5120 \ + model.ffn_hidden_size=13824 \ + model.num_attention_heads=40 \ + model.normalization=rmsnorm \ + model.data.num_workers=0 \ + model.data.conv_template=llama_2 \ + model.mm_cfg.vision_encoder.from_pretrained='openai/clip-vit-large-patch14' \ + model.mm_cfg.llm.from_pretrained=null \ + exp_manager.exp_dir=/home/TestData/multimodal/neva_pretrain_tp1 " + sh "rm -rf /home/TestData/multimodal/neva_pretrain_tp1" + } + } + stage('L2: TTS Fast dev runs 1') { when { anyOf { @@ -4833,7 +5064,27 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' } } } - + stage('L2: NeRF') { + when { + anyOf { + branch 'r1.21.0' + changeRequest target: 'r1.21.0' + } + } + parallel { + stage('DreamFusion') { + steps { + sh 'python examples/multimodal/generative/nerf/main.py \ + trainer.num_nodes=1 \ + trainer.devices="[0]" \ + trainer.max_steps=1000 \ + model.prompt="a DSLR photo of a delicious hamburger" \ + exp_manager.exp_dir=examples/multimodal/generative/nerf/dreamfusion_results' + sh 'rm -rf examples/multimodal/generative/nerf/dreamfusion_results' + } + } + } + } stage('L??: Speech Checkpoints tests') { when { anyOf { diff --git a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml index beb4fd80ee84..13ca53e835f2 100644 --- a/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml +++ b/examples/multimodal/generative/controlnet/conf/controlnet_v1-5.yaml @@ -175,6 +175,8 @@ model: data: num_workers: 16 + synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 train: dataset_path: #- /datasets/tarfiles/fill50k.pkl diff --git a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml index 7c7a125f9722..6c07d460670c 100644 --- a/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml +++ b/examples/multimodal/generative/stable_diffusion/conf/sd_train.yaml @@ -157,7 +157,7 @@ model: # version: openai/clip-vit-large-patch14 # device: cuda # max_length: 77 - # capture_cudagraph_iters: {$model.capture_cudagraph_iters} + # capture_cudagraph_iters: ${model.capture_cudagraph_iters} # miscellaneous diff --git a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py index 301be555dad1..242589cc4a67 100644 --- a/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py +++ b/nemo/collections/multimodal/data/controlnet/controlnet_dataset.py @@ -18,6 +18,33 @@ construct_image_augmentations, identical_transform, ) +from nemo.core.classes import Dataset as NeMoDataset + + +class ControlNetSyntheticDataset(NeMoDataset): + def __init__( + self, image_H, image_W, fake_len=100000, image_key='images', txt_key='txt', control_key='hint', seq_len=80, context_dim=768 + ): + super().__init__() + self.fake_len = fake_len + self.H = image_H + self.W = image_W + self.image_key = image_key + self.txt_key = txt_key + self.control_key = control_key + self.seq_len = seq_len + self.context_dim = context_dim + + def __getitem__(self, index): + item = {} + item[self.image_key] = torch.randn(self.H, self.W, 3) + item[self.txt_key] = f'This is meaningless fake text No.{index}' + item[self.control_key] = torch.randn(self.H, self.W, 3) + return item + + def __len__(self): + return self.fake_len + def build_train_valid_datasets( @@ -42,13 +69,25 @@ def transform_fn(sample): text_transform = identical_transform return img_transform(image), text_transform(text), img_transform(hint) - train_data = WebDatasetCommon( - dataset_cfg=data_cfg, - consumed_samples=consumed_samples, - map_fn=transform_fn, - compose_fn=tuple_to_dict, - is_train=True, - ) + if data_cfg.get('synthetic_data', False): + H, W = data_cfg.train.augmentations.center_crop_h_w.split(',') + train_data = ControlNetSyntheticDataset( + int(H), + int(W), + image_key=model_cfg.first_stage_key, + txt_key=model_cfg.cond_stage_key, + control_key=model_cfg.control_key, + context_dim=model_cfg.unet_config.context_dim, + fake_len=data_cfg.synthetic_data_length + ) + else: + train_data = WebDatasetCommon( + dataset_cfg=data_cfg, + consumed_samples=consumed_samples, + map_fn=transform_fn, + compose_fn=tuple_to_dict, + is_train=True, + ) val_data = None if data_cfg.get("validation") is not None and data_cfg.validation.get("data_path"): @@ -59,7 +98,6 @@ def transform_fn(sample): compose_fn=tuple_to_dict, is_train=False, ) - return train_data, val_data diff --git a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py index 963982e14cb6..998449f80922 100644 --- a/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py +++ b/nemo/collections/multimodal/data/stable_diffusion/stable_diffusion_dataset.py @@ -90,6 +90,7 @@ def transform_fn(sample): image_key=model_cfg.first_stage_key, txt_key=model_cfg.cond_stage_key, context_dim=model_cfg.unet_config.context_dim, + fake_len=data_cfg.synthetic_data_length ) else: diff --git a/tests/collections/multimodal/test_clip_model.py b/tests/collections/multimodal/test_clip_model.py index 781757c5869f..3f5f82d8dfea 100644 --- a/tests/collections/multimodal/test_clip_model.py +++ b/tests/collections/multimodal/test_clip_model.py @@ -198,22 +198,15 @@ def model_cfg(): data: num_workers: 1 - dataset_type: webdataset - train: - data_path: # List of paths to pkl files or tar files - - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_{000000..000001}.tar - drop_last: True # drop_last = False is not implemented yet + dataset_path: # List of paths to pkl files or tar files + - /lustre/fsw/joc/yuya/tiny-datasets/tiny-clip/00000.tar validation: # List of paths to pkl files or tar files - data_path: - - /lustre/fsw/joc/multimodal/datasets/cc3m/00000-00008_000002.tar - drop_last: True # drop_last = False is not implemented yet + dataset_path: + - /lustre/fsw/joc/yuya/tiny-datasets/tiny-clip/00000.tar webdataset: - object_store: False - bucket: datasets - pbss_credentials_file: pbss_credential - local_root_path: / # tar files local root path - chunk_size: 1000 # if data path is list of tar files, chunk_size needs to be provided + infinite_sampler: False + local_root_path: / imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation. @@ -312,6 +305,7 @@ def precision(): def clip_trainer_and_model(model_cfg, trainer_cfg, precision): model_cfg['vision']['precision'] = precision model_cfg['text']['precision'] = precision + model_cfg['precision'] = precision trainer_cfg['precision'] = precision strategy = NLPDDPStrategy() @@ -332,7 +326,9 @@ def dummy(): return trainer, model -def build_datasets(cfg, tokenizer): +def build_datasets(cfg, tokenizer, test_data_dir): + cfg.data.train.dataset_path = [os.path.join(test_data_dir, "multimodal/tiny-clip/00000.tar")] + cfg.data.validation.dataset_path = [os.path.join(test_data_dir, "multimodal/tiny-clip/00000.tar")] return build_train_valid_datasets(model_cfg=cfg, consumed_samples=0, tokenizer=tokenizer,) @@ -344,16 +340,16 @@ def test_constructor(self, clip_trainer_and_model): assert isinstance(clip_model, MegatronCLIPModel) num_weights = clip_model.num_weights - assert num_weights == 46643969 + assert num_weights == 46656257 @pytest.mark.unit def test_build_dataset(self, clip_trainer_and_model, test_data_dir): clip_model = clip_trainer_and_model[1] + clip_model.cfg.data.train.dataset_path[0] = [os.path.join(test_data_dir, "multimodal/tiny-clip/00000.tar")] + clip_model.cfg.data.validation.dataset_path[0] = [os.path.join(test_data_dir, "multimodal/tiny-clip/00000.tar")] train_ds, validation_ds = build_train_valid_datasets( model_cfg=clip_model.cfg, consumed_samples=0, tokenizer=clip_model.tokenizer, ) - assert len(train_ds) == 2000 - assert len(validation_ds) == 1000 sample = next(iter(train_ds)) assert "captions" in sample assert "images" in sample @@ -387,7 +383,7 @@ def test_forward(self, clip_trainer_and_model, test_data_dir, precision=None): raise ValueError(f"precision: {clip_model.cfg['precision']} is not supported.") clip_model.eval() - _, validation_ds = build_datasets(clip_model.cfg, clip_model.tokenizer) + _, validation_ds = build_datasets(clip_model.cfg, clip_model.tokenizer, test_data_dir) val_loader = torch.utils.data.DataLoader(validation_ds, batch_size=4) batch = next(iter(val_loader)) diff --git a/tests/collections/multimodal/test_controlnet.py b/tests/collections/multimodal/test_controlnet.py new file mode 100644 index 000000000000..47452b5556ce --- /dev/null +++ b/tests/collections/multimodal/test_controlnet.py @@ -0,0 +1,355 @@ +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.controlnet.controlnet import MegatronControlNet +from nemo.collections.multimodal.data.controlnet.controlnet_dataset import build_train_valid_datasets +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + +@pytest.fixture() +def model_cfg(): + model_cfg_string=""" + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 4 # limited by GPU memory + global_batch_size: 8 + + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions + control_key: hint + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + learning_rate: 1.0e-04 + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + only_mid_control: False + sd_locked: True + + control_stage_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet + params: + from_pretrained_unet: /ckpts/v1-5-pruned.ckpt + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + hint_channels: 3 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + use_linear_in_transformer: False + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + unet_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel + from_pretrained: + from_NeMo: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + data: + num_workers: 16 + synthetic_data: True # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 + train: + dataset_path: + #- /datasets/tarfiles/fill50k.pkl + - /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + webdataset: + infinite_sampler: False + local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles + + optim: + name: fused_adam + lr: 2e-5 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 0 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + image_logger: + batch_frequency: 1000 + max_images: 0 + + #miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 2 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: True + max_epochs: 3 # PTL default. In practice, max_steps will be reached first. + max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: controlnet + create_wandb_logger: False + wandb_logger_kwargs: + project: stable-diffusion + group: controlnet + name: controlnet-v1.5 + resume: True + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + save_top_k: -1 + every_n_train_steps: 5000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + +@pytest.fixture() +def precision(): + return 32 + +@pytest.fixture() +def controlnet_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronControlNet(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + + model.trainer.strategy.setup_environment() + + return trainer, model + +@pytest.mark.run_only_on('GPU') +class TestMegatronControlNet: + @pytest.mark.unit + def test_constructor(self, controlnet_trainer_and_model): + controlnet_model = controlnet_trainer_and_model[1] + assert isinstance(controlnet_model, MegatronControlNet) + + num_weights = controlnet_model.num_weights + assert num_weights == 361279120 + + @pytest.mark.unit + def test_build_dataset(self, controlnet_trainer_and_model): + controlnet_model = controlnet_trainer_and_model[1] + train_ds, valid_ds = build_train_valid_datasets(model_cfg=controlnet_model.cfg, consumed_samples=0) + + assert len(train_ds) == controlnet_model.cfg.data.synthetic_data_length + sample = next(iter(train_ds)) + assert "images" in sample + assert "captions" in sample + assert "hint" in sample + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + def test_forward(self, controlnet_trainer_and_model, test_data_dir, precision=None): + trainer, controlnet_model = controlnet_trainer_and_model + + dtype = None + if controlnet_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif controlnet_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif controlnet_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {controlnet_model.cfg['precision']} is not supported.") + + + controlnet_model = controlnet_model.cuda() + controlnet_model.eval() + + train_ds, _ = build_train_valid_datasets(controlnet_model.cfg, 0) + train_loader = torch.utils.data.DataLoader(train_ds, batch_size=controlnet_model.cfg.micro_batch_size) + batch = next(iter(train_loader)) + batch[controlnet_model.cfg.first_stage_key] = batch[controlnet_model.cfg.first_stage_key].cuda(non_blocking=True) + x, c = controlnet_model.model.get_input(batch, controlnet_model.cfg.first_stage_key) + + if not isinstance(c, dict): + batch = [x, c] + + elif len(controlnet_model.conditioning_keys) == 0: + controlnet_model.conditioning_keys = list(c.keys()) + c_list = [c[key] for key in controlnet_model.conditioning_keys] + batch = [x, *c_list] + batch = [x.cuda(non_blocking=True) for x in batch] + if len(controlnet_model.conditioning_keys) == 0: + x, c = batch + else: + x = batch[0] + c = {} + for idx, key in enumerate(controlnet_model.conditioning_keys): + c[key] = batch[1 + idx] + with torch.no_grad(): + loss, _ = controlnet_model.model(x, c) + assert loss.dtype == torch.float + diff --git a/tests/collections/multimodal/test_dreambooth.py b/tests/collections/multimodal/test_dreambooth.py new file mode 100644 index 000000000000..8106285983fd --- /dev/null +++ b/tests/collections/multimodal/test_dreambooth.py @@ -0,0 +1,326 @@ +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.dreambooth.dreambooth import MegatronDreamBooth +from nemo.collections.multimodal.data.dreambooth.dreambooth_dataset import DreamBoothDataset +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + model_cfg_string = """ + precision: ${trainer.precision} + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 1 # will use more micro batches to reach global batch size + + with_prior_preservation: False + use_cached_latents: False + prior_loss_weight: 0.5 + train_text_encoder: False + restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed + + + + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [ ] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + channels_last: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: #load unet weights for finetuning, can use .ckpt ckpts from various sources + from_NeMo: False #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + # For compatibility of history version that uses HF clip model + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + noise_scheduler: + _target_: nemo.collections.multimodal.models.dreambooth.util.sd_noise_scheduler + parameterization: eps + v_posterior: 0 + given_betas: + beta_schedule: linear + timesteps: 1000 + linear_start: 0.00085 + linear_end: 0.012 + cosine_s: 8e-3 + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-6 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 1 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + num_workers: 4 + instance_dir: /opt/NeMo/docs/source/tools/images + instance_prompt: a photo of a sks dog + regularization_dir: + regularization_prompt: + num_reg_images: 10 + num_images_per_prompt: 4 + resolution: 512 + center_crop: True + cached_instance_dir: #/datasets/instance_dir_cached + cached_reg_dir: #/datasets/nemo_dogs_cached + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: bf16-mixed + logger: False # logger provided by exp_manager + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 # PTL default. In practice, max_steps will be reached first. + max_steps: 400 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches + log_every_n_steps: 10 + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + limit_val_batches: 0 + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + exp_dir: null + name: ${name} + create_checkpoint_callback: True + create_tensorboard_logger: True + checkpoint_callback_params: + every_n_train_steps: 200 + every_n_epochs: 0 + monitor: reduced_train_loss + save_on_train_epoch_end: False + filename: '${name}-{step}' + save_top_k: -1 + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + +@pytest.fixture() +def precision(): + return 32 + +@pytest.fixture() +def dreambooth_trainer_and_model(model_cfg, trainer_cfg, precision, test_data_dir): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + model_cfg['data']['instance_dir'] = os.path.join(test_data_dir, "multimodal/tiny-dreambooth") + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronDreamBooth(cfg=cfg, trainer=trainer) + + + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + + model.trainer.strategy.setup_environment() + + return trainer, model + + + +@pytest.mark.run_only_on('GPU') +class TestMegatronDreamBooth: + @pytest.mark.unit + def test_constructor(self, dreambooth_trainer_and_model): + dreambooth_model = dreambooth_trainer_and_model[1] + assert isinstance(dreambooth_model, MegatronDreamBooth) + + num_weights = dreambooth_model.num_weights + assert num_weights == 859520964 + + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + + @pytest.mark.unit + def test_forward(self, dreambooth_trainer_and_model, test_data_dir, precision=None): + trainer, dreambooth_model = dreambooth_trainer_and_model + + dtype = None + if dreambooth_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif dreambooth_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif dreambooth_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {dreambooth_model.cfg['precision']} is not supported.") + + dreambooth_model = dreambooth_model.cuda() + dreambooth_model.eval() + + images = torch.randn(1, 3, 512, 512).cuda() + caption = [f'This is meaningless fake text'] + batch = images, dreambooth_model.model.text_encoder(caption) + with torch.no_grad(): + loss = dreambooth_model(batch) + + assert loss.dtype == torch.float diff --git a/tests/collections/multimodal/test_dreamfusion_model.py b/tests/collections/multimodal/test_dreamfusion_model.py new file mode 100644 index 000000000000..2a6951406525 --- /dev/null +++ b/tests/collections/multimodal/test_dreamfusion_model.py @@ -0,0 +1,283 @@ +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from hydra.utils import get_class, instantiate +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.models.nerf.dreamfusion import DreamFusion + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + model_cfg_string = """ + ### model options + resume_from_checkpoint: + prompt: 'a hamburger' + negative_prompt: '' + front_prompt: ', front view' + side_prompt: ', side view' + back_prompt: ', back view' + update_extra_interval: 16 + guidance_scale: 100 + export_video: False + + iters: 1 + latent_iter_ratio: 0.2 + albedo_iter_ratio: 0.0 + min_ambient_ratio: 0.1 + textureless_ratio: 0.2 + + data: + _target_: examples.multimodal.generative.nerf.data.AggregatorDataModule + + train_batch_size: 1 + train_shuffle: false + train_dataset: + _target_: nemo.collections.multimodal.data.nerf.random_poses.RandomPosesDataset + internal_batch_size: 100 + width: 64 + height: 64 + radius_range: [3.0, 3.5] + theta_range: [45, 105] + phi_range: [-180, 180] + fovx_range: [10, 30] + fovy_range: [10, 30] + jitter: False + jitter_center: 0.2 + jitter_target: 0.2 + jitter_up: 0.02 + uniform_sphere_rate: 0 + angle_overhead: 30 + angle_front: 60 + + val_batch_size: 1 + val_shuffle: false + val_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 5 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 + + test_batch_size: 1 + test_shuffle: false + test_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 100 + width: 800 + height: 800 + angle_overhead: 30 + angle_front: 60 + + + nerf: + _target_: nemo.collections.multimodal.modules.nerf.geometry.torchngp_nerf.TorchNGPNerf + num_input_dims: 3 # 3D space + bound: 1 + density_activation: exp # softplus, exp + blob_radius: 0.2 + blob_density: 5 + normal_type: central_finite_difference + + encoder_cfg: + encoder_type: 'hashgrid' + encoder_max_level: + log2_hashmap_size: 19 + desired_resolution: 2048 + interpolation: smoothstep + + sigma_net_num_output_dims: 1 # density + sigma_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + + features_net_num_output_dims: 3 # rgb + features_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + + background: + _target_: nemo.collections.multimodal.modules.nerf.background.static_background.StaticBackground + background: [0, 0, 1] # rgb + + material: + _target_: nemo.collections.multimodal.modules.nerf.materials.basic_shading.BasicShading + + renderer: + _target_: nemo.collections.multimodal.modules.nerf.renderers.torchngp_volume_renderer.TorchNGPVolumeRenderer + bound: 1 + update_interval: 16 + grid_resolution: 128 + density_thresh: 10 + max_steps: 1024 + dt_gamma: 0 + + guidance: + _target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_huggingface_pipeline.StableDiffusion + precision: 16 + model_key: stabilityai/stable-diffusion-2-1-base + t_range: [0.02, 0.98] + + optim: + name: adan + lr: 5e-3 + eps: 1e-8 + weight_decay: 2e-5 + max_grad_norm: 5.0 + foreach: False + + loss: + lambda_sds: 1.0 + lambda_opacity: 0.0 + lambda_entropy: 1e-3 + lambda_orientation: 1e-2 + lambda_2d_normal_smooth: 0.0 + lambda_3d_normal_smooth: 0.0 + lambda_mesh_normal: 0.0 + lambda_mesh_laplacian: 0.0 + + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + precision: 16 + max_steps: 10000 # example configs: dreamfuions=10000, dmtet=5000 + accelerator: gpu + enable_checkpointing: False + logger: False + log_every_n_steps: 1 + val_check_interval: 100 + accumulate_grad_batches: 1 + benchmark: False + enable_model_summary: True + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + name: dreamfusion-test + exp_dir: /results + create_tensorboard_logger: False + create_wandb_logger: False + wandb_logger_kwargs: + project: dreamfusion + group: nemo-df + name: ${name} + resume: True + create_checkpoint_callback: True + checkpoint_callback_params: + every_n_epochs: 0 + every_n_train_steps: 1000 + monitor: loss + filename: '${name}-{step}' + save_top_k: -1 + always_save_nemo: False + resume_if_exists: True + resume_ignore_no_checkpoint: True + + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def dreamfusion_trainer_and_model(model_cfg, trainer_cfg, precision): + # Trainer + trainer_cfg['precision'] = precision + trainer = Trainer(**trainer_cfg) + + # Model + model_cfg = DictConfig(model_cfg) + model_cfg['iters'] = trainer_cfg['max_steps'] + model_cfg['guidance']['precision'] = precision + model = DreamFusion(cfg=model_cfg) + + datamodule = instantiate(model_cfg.data['val_dataset']) + + return trainer, model, datamodule + + +@pytest.mark.run_only_on('GPU') +class TestDreamFusion: + @pytest.mark.unit + def test_constructor(self, dreamfusion_trainer_and_model): + trainer, model, datamodule = dreamfusion_trainer_and_model + + assert isinstance(model, DreamFusion) + assert model.num_weights == 12209048 + + @pytest.mark.unit + def test_build_dataset(self, dreamfusion_trainer_and_model): + trainer, model, datamodule = dreamfusion_trainer_and_model + assert len(datamodule) == 5 + + @pytest.mark.unit + def test_forward(self, dreamfusion_trainer_and_model, test_data_dir, precision=None): + trainer, model, datamodule = dreamfusion_trainer_and_model + + dtype = None + if trainer.precision in [32, '32', '32-true']: + dtype = torch.float + elif trainer.precision in [16, '16', '16-mixed']: + dtype = torch.float16 + elif trainer.precision in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {trainer.precision} is not supported.") + + model = model.cuda() + batch = next(iter(datamodule)) + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + outputs = model( + rays_o=batch['rays_o'].cuda(), + rays_d=batch['rays_d'].cuda(), + mvp=batch['mvp'].cuda(), + perturb=False, + ambient_ratio=batch['ambient_ratio'] if 'ambient_ratio' in batch else 1.0, + shading_type=batch['shading_type'] if 'shading_type' in batch else None, + binarize=False, + return_normal_image=False, + return_normal_perturb=False, + return_vertices=False, + return_faces=False, + return_faces_normals=False, + ) + + assert outputs['image'].dtype == dtype + assert outputs['image'].shape == torch.Size([1, 800, 800, 3]) diff --git a/tests/collections/multimodal/test_imagen_model.py b/tests/collections/multimodal/test_imagen_model.py new file mode 100644 index 000000000000..6fbc467752b0 --- /dev/null +++ b/tests/collections/multimodal/test_imagen_model.py @@ -0,0 +1,255 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.multimodal.models.imagen.imagen import DUMMY_TENSOR, MegatronImagen +from nemo.collections.multimodal.data.imagen.imagen_dataset import build_train_valid_datasets + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + +@pytest.fixture() +def model_cfg(): + + model_cfg_string = """ + precision: 16 + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 2 # will use more micro batches to reach global batch size + inductor: False + inductor_cudagraphs: False + unet_type: base + channels_last: True + + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: True + resblock_updown: False + resample_with_conv: True + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: False # True for using PyTorch default DDP overlap. False for using Megatron's default configuration for async grad allreduce + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + + data: + num_workers: 16 + synthetic_data: True + synthetic_data_length: 800000 + train: + augmentations: + resize_smallest_side: 64 + center_crop_h_w: 64, 64 + horizontal_flip: False + filterings: null + + webdataset: + use_webdataset: True + object_store: False + infinite_sampler: False + local_root_path: /datasets + verbose: False + + optim: + # We need weight decay for large-scale odel + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + +@pytest.fixture() +def trainer_cfg(): + + trainer_cfg_string = """ + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 10 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: 16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + limit_val_batches: 0 + log_every_n_steps: 5 # Interval of logging. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + +@pytest.fixture() +def precision(): + return 16 + +@pytest.fixture() +def imagen_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronImagen(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + + +@pytest.mark.run_only_on('GPU') +class TestMegatronImagenModel: + @pytest.mark.unit + def test_constructor(self, imagen_trainer_and_model): + imagen_model = imagen_trainer_and_model[1] + assert isinstance(imagen_model, MegatronImagen) + + num_weights = imagen_model.num_weights + assert num_weights == 524897540 + + @pytest.mark.unit + def test_build_dataset(self, imagen_trainer_and_model, test_data_dir): + imagen_model = imagen_trainer_and_model[1] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=imagen_model.cfg, consumed_samples=0, + ) + assert len(train_ds) == 800000 + assert validation_ds is None + sample = next(iter(train_ds)) + assert "t5_text_embeddings" in sample + assert "t5_text_mask" in sample + assert "images" in sample + + + @pytest.mark.unit + def test_forward(self, imagen_trainer_and_model, test_data_dir, precision=None): + trainer, imagen_model = imagen_trainer_and_model + + dtype = None + if imagen_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif imagen_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif imagen_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {imagen_model.cfg['precision']} is not supported.") + + trainer_ds, _ = build_train_valid_datasets(model_cfg=imagen_model.cfg, consumed_samples=0) + + train_loader = torch.utils.data.DataLoader(trainer_ds, batch_size=4) + batch = next(iter(train_loader)) + + + imagen_model = imagen_model.to('cuda') + input_args = { + 'x_start': batch['images'].cuda(), + 'text_embed': batch['t5_text_embeddings'].cuda(), + 'text_mask': batch['t5_text_mask'].cuda(), + 'x_lowres': DUMMY_TENSOR.repeat(batch['images'].shape[0]).cuda(), + } + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + loss = imagen_model.model(**input_args) + assert len(loss) == 2 diff --git a/tests/collections/multimodal/test_insp2p_model.py b/tests/collections/multimodal/test_insp2p_model.py new file mode 100644 index 000000000000..d7f8a251690c --- /dev/null +++ b/tests/collections/multimodal/test_insp2p_model.py @@ -0,0 +1,306 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.multimodal.models.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit +from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import build_train_valid_datasets + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + model_cfg_string = """ + precision: 16 + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + ckpt_path: null # load checkpoint weights from previous stages for fine-tuning + micro_batch_size: 1 + global_batch_size: 1 # `= micro_batch_size * total_devices` fake global batch size for sampler + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: edited + cond_stage_key: edit # txt for cifar, caption for pbss + image_size: 32 + channels: 4 + cond_stage_trainable: false + conditioning_key: hybrid + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + + ignore_keys: [] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0 + fused_opt: True + inductor: False + inductor_cudagraphs: False + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: + image_size: 32 # unused + in_channels: 8 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: False + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + optim: + name: fused_adam + lr: 1e-4 + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + capturable: True + master_weights: True + max_norm: 1 + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + # Path to instruct-pix2pix dataset must be specified by the user. + # https://github.com/timothybrooks/instruct-pix2pix#generated-dataset + data_path: /lustre/fsw/joc/yuya/stable_diffusion/instruct-pix2pix/data/tiny-ip2p + num_workers: 2 + dataloader_type: cyclic # cyclic + validation_drop_last: True # Set to false if the last partial validation samples is to be consumed + + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + trainer_cfg_string = """ + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 10 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: 16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + limit_val_batches: 0 + log_every_n_steps: 5 # Interval of logging. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 16 + + +@pytest.fixture() +def sd_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronLatentDiffusionEdit(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + + +@pytest.mark.run_only_on('GPU') +class TestMegatronSDModel: + @pytest.mark.unit + def test_constructor(self, sd_trainer_and_model): + sd_model = sd_trainer_and_model[1] + assert isinstance(sd_model, MegatronLatentDiffusionEdit) + + num_weights = sd_model.num_weights + assert num_weights == 859532484 + + @pytest.mark.unit + def test_build_dataset(self, sd_trainer_and_model, test_data_dir): + sd_model = sd_trainer_and_model[1] + sd_model.cfg.data.data_path = os.path.join(test_data_dir, "multimodal/tiny-ip2p") + sd_model.build_train_valid_test_datasets() + train_ds, validation_ds = sd_model._train_ds, sd_model._validation_ds + print(len(train_ds), len(validation_ds)) + assert len(train_ds) == 205 + assert len(validation_ds) == 8 + sample = next(iter(train_ds)) + print(sample.keys(), sample['edit'].keys()) + assert "edit" in sample and "edited" in sample + assert "c_concat" in sample["edit"] and "c_crossattn" in sample["edit"] + + @pytest.mark.unit + def test_forward(self, sd_trainer_and_model, test_data_dir, precision=None): + trainer, sd_model = sd_trainer_and_model + + dtype = None + if sd_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif sd_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif sd_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {sd_model.cfg['precision']} is not supported.") + + sd_model.cfg.data.data_path = os.path.join(test_data_dir, "multimodal/tiny-ip2p") + sd_model.build_train_valid_test_datasets() + train_ds, validation_ds = sd_model._train_ds, sd_model._validation_ds + + train_loader = torch.utils.data.DataLoader(train_ds, batch_size=2) + batch = next(iter(train_loader)) + + sd_model = sd_model.to('cuda') + batch['edited'] = batch['edited'].cuda() + x, c = sd_model.model.get_input(batch, 'edited') + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + loss = sd_model(x, c) + assert len(loss) == 2 diff --git a/tests/collections/multimodal/test_neva_model.py b/tests/collections/multimodal/test_neva_model.py new file mode 100644 index 000000000000..5958466b6b26 --- /dev/null +++ b/tests/collections/multimodal/test_neva_model.py @@ -0,0 +1,366 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer + +from nemo.collections.multimodal.data.neva.neva_dataset import make_supervised_data_module, DataCollatorForSupervisedDataset +from nemo.collections.multimodal.models.neva.neva_model import MegatronNevaModel, NevaModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + + +@pytest.fixture() +def model_cfg(): + + model_cfg_string = """ + precision: 16 + + # Batch size guideline for different types of dataset + micro_batch_size: 2 # limited by GPU memory + global_batch_size: 4 # will use more micro batches to reach global batch size + + tensor_model_parallel_size: 1 # intra-layer model parallelism + pipeline_model_parallel_size: 1 # inter-layer model parallelism + virtual_pipeline_model_parallel_size: null # interleaved pipeline + + restore_from_path: null # used in fine-tuning + + # Multimodal configs + mm_cfg: + llm: + from_pretrained: null # path to nemo checkpoint + freeze: True + model_type: llama_2 # `nvgpt` or `llama_2` supported + vision_encoder: + from_pretrained: "openai/clip-vit-large-patch14" # path or name + from_hf: True + patch_dim: 14 + hidden_size: 1024 # could be found from model but tricky in code + vision_select_layer: -2 # default to the last layer + class_token_length: 1 + freeze: True + pretrain_mm_mlp_adapter: null # path to pretrained mm adapter + use_im_start_end: False + + # LLM configs + # use GPTModel from megatron.core + mcore_gpt: False + + # model architecture + encoder_seq_length: 4096 + max_position_embeddings: ${.encoder_seq_length} + position_embedding_type: rope + num_layers: 2 + hidden_size: 5120 + ffn_hidden_size: 13824 # Transformer FFN hidden size. Usually 4 * hidden_size. + num_attention_heads: 40 + init_method_std: 0.014 # Standard deviation of the zero mean normal distribution used for weight initialization.') + use_scaled_init_method: True # use scaled residuals initialization + hidden_dropout: 0.0 # Dropout probability for hidden state transformer. + attention_dropout: 0.0 # Dropout probability for attention + ffn_dropout: 0.0 # Dropout probability in the feed-forward layer. + kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null + apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number. + normalization: rmsnorm # Type of normalization layers + layernorm_epsilon: 1e-5 + do_layer_norm_weight_decay: False # True means weight decay on all params + pre_process: True # add embedding + post_process: True # add pooler + persist_layer_norm: True # Use of persistent fused layer norm kernel. + bias: False # Whether to use bias terms in all weight matrices. + activation: 'fast-swiglu' # Options ['gelu', 'geglu', 'swiglu', 'reglu', 'squared-relu', 'fast-geglu', 'fast-swiglu', 'fast-reglu'] + headscale: False # Whether to learn extra parameters that scale the output of the each self-attention head. + transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer'] + normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True. + rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this. + attention_type: 'multihead' # Attention type. Options ['multihead'] + share_embeddings_and_output_weights: False # Share embedding and output layer weights. + overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1 + seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595. + num_query_groups: null # Number of query groups for group query attention. If None, normal attention is used. + use_flash_attention: False + + ## Activation Checkpointing + activations_checkpoint_granularity: null # 'selective' or 'full' + activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' + activations_checkpoint_num_layers: null # not used with 'selective' + num_micro_batches_with_partial_activation_checkpoints: null + activations_checkpoint_layers_per_pipeline: null + sequence_parallel: False + + # precision + native_amp_init_scale: 4294967296 # 2 ** 32 + native_amp_growth_interval: 1000 + hysteresis: 2 # Gradient scale hysteresis + fp32_residual_connection: False # Move residual connections to fp32 + fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16 + + # model fusions + masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask. + bias_dropout_add_fusion: False # Use a kernel that fuses the bias addition, dropout and residual connection addition. + + use_cpu_initialization: False # Init weights on the CPU (slow for large models) + onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter. + gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism. + openai_gelu: False + bias_activation_fusion: False + megatron_legacy: False + + transformer_engine: False + fp8: False # enables fp8 in TransformerLayer forward + fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3 + fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID + fp8_margin: 0 # scaling margin + fp8_interval: 1 # scaling update interval + fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor + fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history + use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + + # Megatron O2-style half-precision + megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters + async_grad_allreduce: False + grad_allreduce_chunk_size_mb: 125 + grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + + tokenizer: + library: 'sentencepiece' + type: null + model: /lustre/fsw/joc/multimodal/datasets/LLaVA-CC3M-Pretrain-595K/tiny_neva/tokenizer_add_special.model + vocab_file: null + merge_file: null + delimiter: null # only used for tabular tokenizer + sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers. + additional_special_tokens: null # ["", "", "", "", "", ""] + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: /lustre/fsw/joc/multimodal/datasets/LLaVA-CC3M-Pretrain-595K/tiny_neva/dummy.json + lazy_preprocess: True + is_multimodal: True + sep_image_conv_front: False + image_token_len: 256 + conv_template: ${mm_cfg.llm.model_type} # check `nemo/collections/multimodal/data/neva/conversation.py` + image_folder: /lustre/fsw/joc/multimodal/datasets/LLaVA-CC3M-Pretrain-595K/tiny_neva/images + image_aspect_ratio: 'square' + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + optim: + name: fused_adam + lr: 2e-3 + weight_decay: 0. + betas: + - 0.9 + - 0.95 + sched: + name: CosineAnnealing + warmup_steps: 140 + constant_steps: 0 + min_lr: 2e-5 + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + + +@pytest.fixture() +def trainer_cfg(): + + trainer_cfg_string = """ + devices: 1 + num_nodes: 1 + accelerator: gpu + precision: 16 + logger: False + enable_checkpointing: False + use_distributed_sampler: False + max_epochs: -1 + max_steps: 4 + log_every_n_steps: 1 + val_check_interval: 4 + limit_val_batches: 2 + limit_test_batches: 2 + accumulate_grad_batches: 1 + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: False + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_neva + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + monitor: val_loss + save_top_k: 10 + mode: min + always_save_nemo: False # saves nemo file during validation, not implemented for model parallel + save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits + filename: 'megatron_vit_classify--{val_loss:.2f}-{step}-{consumed_samples}' + model_parallel_size: 1 + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + + +@pytest.fixture() +def precision(): + return 32 + + +@pytest.fixture() +def neva_trainer_and_model(model_cfg, trainer_cfg, precision, test_data_dir): + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + model_cfg['tokenizer']['model'] = os.path.join(test_data_dir, "multimodal/tiny-neva/tokenizer_add_special.model"), + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronNevaModel(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + + +def build_datasets(cfg, tokenizer, test_data_dir): + cfg.data.data_path = os.path.join(test_data_dir, "multimodal/tiny-neva/dummy.json"), + cfg.data.image_folder = os.path.join(test_data_dir, "multimodal/tiny-neva/images") + ds_dict = make_supervised_data_module(tokenizer=tokenizer, model_cfg=cfg,) + return ds_dict["train_dataset"], ds_dict["eval_dataset"] + + +@pytest.mark.run_only_on('GPU') +class TestMegatronNevaModel: + @pytest.mark.unit + def test_constructor(self, neva_trainer_and_model): + neva_model = neva_trainer_and_model[1] + assert isinstance(neva_model, MegatronNevaModel) + + num_weights = neva_model.num_weights + assert num_weights == 5248000 + + @pytest.mark.unit + def test_build_dataset(self, neva_trainer_and_model, test_data_dir): + neva_model = neva_trainer_and_model[1] + neva_model.cfg.data.data_path = os.path.join(test_data_dir, "multimodal/tiny-neva/dummy.json"), + neva_model.cfg.data.image_folder = os.path.join(test_data_dir, "multimodal/tiny-neva/images") + ds_dict = make_supervised_data_module(tokenizer=neva_model.tokenizer, model_cfg=neva_model.cfg,) + train_ds, validation_ds = ds_dict["train_dataset"], ds_dict["eval_dataset"] + assert len(train_ds) == 60 + assert len(validation_ds) == 60 + sample = next(iter(train_ds)) + assert "tokens" in sample + assert "labels" in sample + assert "image" in sample + + @pytest.mark.parametrize( + "precision", + [ + 32, + 16, + pytest.param( + "bf16", + marks=pytest.mark.skipif( + not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8, + reason='bfloat16 is not supported on this device', + ), + ), + ], + ) + @pytest.mark.unit + def test_forward(self, neva_trainer_and_model, test_data_dir, precision=None): + trainer, neva_model = neva_trainer_and_model + + dtype = None + if neva_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif neva_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif neva_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {neva_model.cfg['precision']} is not supported.") + + neva_model = neva_model.cuda() + neva_model.eval() + _, validation_ds = build_datasets(neva_model.cfg, neva_model.tokenizer, test_data_dir) + + collate_func = DataCollatorForSupervisedDataset(neva_model.cfg, neva_model.tokenizer) + val_loader = torch.utils.data.DataLoader(validation_ds, batch_size=4, collate_fn=collate_func,) + batch = next(iter(val_loader)) + + tokens = batch['tokens'].cuda() + position_ids = batch['position_ids'].cuda() + attention_mask = batch['attention_mask'].cuda() + labels = batch['labels'].cuda() + media = batch['media'].cuda() + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + output_tensor = neva_model(tokens, position_ids, attention_mask, labels, media) + print(tokens, output_tensor) + assert output_tensor.shape == tokens.shape + assert output_tensor.dtype == torch.float32 \ No newline at end of file diff --git a/tests/collections/multimodal/test_sd_model.py b/tests/collections/multimodal/test_sd_model.py new file mode 100644 index 000000000000..2b7b1401a31b --- /dev/null +++ b/tests/collections/multimodal/test_sd_model.py @@ -0,0 +1,319 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy +from nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion +from nemo.collections.multimodal.data.stable_diffusion.stable_diffusion_dataset import build_train_valid_datasets + +DEVICE_CAPABILITY = None +if torch.cuda.is_available(): + DEVICE_CAPABILITY = torch.cuda.get_device_capability() + +@pytest.fixture() +def model_cfg(): + + model_cfg_string = """ + precision: 16 + # specify micro_batch_size, global_batch_size, and model parallelism + # gradient accumulation will be done automatically based on data_parallel_size + micro_batch_size: 1 # limited by GPU memory + global_batch_size: 1 # will use more micro batches to reach global batch size + native_amp_init_scale: 65536.0 # Init scale for grad scaler used at fp16 + + + linear_start: 0.00085 + linear_end: 0.012 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: images + cond_stage_key: captions # txt for cifar, caption for pbss + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn # check + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + scale_by_std: False + ckpt_path: + ignore_keys: [] + parameterization: eps + clip_denoised: True + load_only_unet: False + cosine_s: 8e-3 + given_betas: + original_elbo_weight: 0 + v_posterior: 0 + l_simple_weight: 1 + use_positional_encodings: False + learn_logvar: False + logvar_init: 0 + beta_schedule: linear + loss_type: l2 + + concat_mode: True + cond_stage_forward: + text_embedding_dropout_rate: 0.1 + fused_opt: True + inductor: False + inductor_cudagraphs: False + capture_cudagraph_iters: -1 # -1 to disable + channels_last: True + + unet_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + from_pretrained: #/ckpts/nemo-v1-2.ckpt + from_NeMo: True #Must be specified when from pretrained is not None, False means loading unet from HF ckpt + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: + - 4 + - 2 + - 1 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 4 + - 4 + num_heads: 8 + use_spatial_transformer: true + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: True + enable_amp_o2_fp16: True + resblock_gn_groups: 32 + + first_stage_config: + _target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + from_pretrained: /ckpts/vae.bin + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 #Never used + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + capture_cudagraph_iters: -1 + + cond_stage_config: + _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder + version: openai/clip-vit-large-patch14 + device: cuda + max_length: 77 + + + # miscellaneous + seed: 1234 + resume_from_checkpoint: null # manually set the checkpoint file to load from + apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this + gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) + ddp_overlap: True # True for using PyTorch DDP overlap. + + optim: + name: megatron_fused_adam + lr: null + weight_decay: 0. + betas: + - 0.9 + - 0.999 + sched: + name: WarmupHoldPolicy + warmup_steps: 10000 + hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant + capturable: True + master_weights: True + max_norm: 1 + + # Nsys profiling options + nsys_profile: + enabled: False + start_step: 10 # Global batch to start profiling + end_step: 10 # Global batch to end profiling + ranks: [ 0 ] # Global rank IDs to profile + gen_shape: False # Generate model and kernel details including input shapes + + data: + num_workers: 16 + synthetic_data: True # dataset_path and local_root_path can be empty when using synthetic data + synthetic_data_length: 10000 + train: + dataset_path: + - /datasets/coyo/wdinfo.pkl + augmentations: + resize_smallest_side: 512 + center_crop_h_w: 512, 512 + horizontal_flip: False + filterings: + + webdataset: + infinite_sampler: False + local_root_path: /datasets/coyo + """ + model_cfg = OmegaConf.create(model_cfg_string) + return model_cfg + +@pytest.fixture() +def trainer_cfg(): + + trainer_cfg_string = """ + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 10 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: 16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + limit_val_batches: 0 + log_every_n_steps: 5 # Interval of logging. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + """ + trainer_cfg = OmegaConf.create(trainer_cfg_string) + + return trainer_cfg + +@pytest.fixture() +def exp_manager_cfg(): + + exp_manager_cfg_string = """ + explicit_log_dir: null + exp_dir: null + name: megatron_clip + create_wandb_logger: False + wandb_logger_kwargs: + project: null + name: null + resume_if_exists: False + resume_ignore_no_checkpoint: True + create_checkpoint_callback: False + checkpoint_callback_params: + every_n_train_steps: 1000 + every_n_epochs: 0 + monitor: reduced_train_loss + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + ema: + enable: False + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + """ + exp_manager_cfg = OmegaConf.create(exp_manager_cfg_string) + + return exp_manager_cfg + +@pytest.fixture() +def precision(): + return 16 + +@pytest.fixture() +def sd_trainer_and_model(model_cfg, trainer_cfg, precision): + model_cfg['precision'] = precision + model_cfg['precision'] = precision + trainer_cfg['precision'] = precision + + strategy = NLPDDPStrategy() + + trainer = Trainer(strategy=strategy, **trainer_cfg) + + cfg = DictConfig(model_cfg) + + model = MegatronLatentDiffusion(cfg=cfg, trainer=trainer) + + def dummy(): + return + + if model.trainer.strategy.launcher is not None: + model.trainer.strategy.launcher.launch(dummy, trainer=model.trainer) + model.trainer.strategy.setup_environment() + + return trainer, model + + +@pytest.mark.run_only_on('GPU') +class TestMegatronSDModel: + @pytest.mark.unit + def test_constructor(self, sd_trainer_and_model): + sd_model = sd_trainer_and_model[1] + assert isinstance(sd_model, MegatronLatentDiffusion) + + num_weights = sd_model.num_weights + assert num_weights == 859520964 + + @pytest.mark.unit + def test_build_dataset(self, sd_trainer_and_model, test_data_dir): + sd_model = sd_trainer_and_model[1] + train_ds, validation_ds = build_train_valid_datasets( + model_cfg=sd_model.cfg, consumed_samples=0, + ) + assert len(train_ds) == 100000 + assert validation_ds is None + sample = next(iter(train_ds)) + assert "captions" in sample + assert "images" in sample + + + @pytest.mark.unit + def test_forward(self, sd_trainer_and_model, test_data_dir, precision=None): + trainer, sd_model = sd_trainer_and_model + + dtype = None + if sd_model.cfg['precision'] in [32, '32', '32-true']: + dtype = torch.float + elif sd_model.cfg['precision'] in [16, '16', '16-mixed']: + dtype = torch.float16 + elif sd_model.cfg['precision'] in ['bf16', 'bf16-mixed']: + dtype = torch.bfloat16 + else: + raise ValueError(f"precision: {sd_model.cfg['precision']} is not supported.") + + trainer_ds, _ = build_train_valid_datasets(model_cfg=sd_model.cfg, consumed_samples=0) + + train_loader = torch.utils.data.DataLoader(trainer_ds, batch_size=4) + batch = next(iter(train_loader)) + + + sd_model = sd_model.to('cuda') + batch['images'] = batch['images'].cuda() + x, c = sd_model.model.get_input(batch, 'images') + with torch.no_grad(): + with torch.autocast('cuda', dtype=dtype): + loss = sd_model(x, c) + assert len(loss) == 2 diff --git a/tests/collections/vision/test_vit_model.py b/tests/collections/vision/test_vit_model.py index e50106957679..6817c734323e 100644 --- a/tests/collections/vision/test_vit_model.py +++ b/tests/collections/vision/test_vit_model.py @@ -25,6 +25,17 @@ from nemo.collections.vision.models.megatron_vit_classification_models import MegatronVitClassificationModel from nemo.collections.vision.modules.vit.vit_backbone import VitBackbone, VitMlpHead +try: + from megatron.core import ModelParallelConfig, parallel_state + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError): + + ModelParallelConfig = ApexGuardDefaults + + HAVE_MEGATRON_CORE = False + DEVICE_CAPABILITY = None if torch.cuda.is_available(): DEVICE_CAPABILITY = torch.cuda.get_device_capability() @@ -229,8 +240,8 @@ def vit_classification_trainer_and_model(model_cfg, trainer_cfg, precision): def build_datasets(cfg, test_data_dir): data_path = [ - os.path.join(test_data_dir, "vision/tiny_imagenet/train"), - os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + os.path.join(test_data_dir, "multimodal/tiny-imagenet/train"), + os.path.join(test_data_dir, "multimodal/tiny-imagenet/val"), ] return build_train_valid_datasets(model_cfg=cfg, data_path=data_path, image_size=(cfg.img_h, cfg.img_w),) @@ -249,8 +260,8 @@ def test_constructor(self, vit_classification_trainer_and_model): def test_build_dataset(self, vit_classification_trainer_and_model, test_data_dir): vit_classification_model = vit_classification_trainer_and_model[1] data_path = [ - os.path.join(test_data_dir, "vision/tiny_imagenet/train"), - os.path.join(test_data_dir, "vision/tiny_imagenet/val"), + os.path.join(test_data_dir, "multimodal/tiny-imagenet/train"), + os.path.join(test_data_dir, "multimodal/tiny-imagenet/val"), ] train_ds, validation_ds = build_train_valid_datasets( model_cfg=vit_classification_model.cfg, @@ -321,6 +332,7 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): seed=model_cfg.get('seed', 1234), apex_transformer_log_level=model_cfg.get('apex_transformer_log_level', 30), ) + model_parallel_config = ModelParallelConfig() dtype = None if trainer_cfg['precision'] in [32, '32', '32-true']: @@ -334,6 +346,7 @@ def test_vit_backbone(self, model_cfg, trainer_cfg, precision): vit_backbone = VitBackbone( model_cfg, + model_parallel_config=model_parallel_config, init_method=None, scaled_init_method=None, pre_process=True, From 724c956792cfb5662ce8498556269dfc6425057c Mon Sep 17 00:00:00 2001 From: Yu Yao Date: Wed, 1 Nov 2023 14:27:50 -0700 Subject: [PATCH 370/512] Add Multimodal Docs --- docs/source/conf.py | 5 + docs/source/index.rst | 19 +- docs/source/multimodal/api.rst | 81 +++ docs/source/multimodal/mllm/checkpoint.rst | 113 +++ docs/source/multimodal/mllm/configs.rst | 143 ++++ docs/source/multimodal/mllm/datasets.rst | 99 +++ .../multimodal/mllm/images/llava_arch.jpg | Bin 0 -> 117431 bytes docs/source/multimodal/mllm/intro.rst | 111 +++ docs/source/multimodal/mllm/neva.rst | 160 +++++ docs/source/multimodal/mm_all.bib | 206 ++++++ docs/source/multimodal/nerf/configs.rst | 142 ++++ docs/source/multimodal/nerf/datasets.rst | 81 +++ docs/source/multimodal/nerf/dreamfusion.rst | 310 +++++++++ .../images/dreamfusion_model_overview.png | Bin 0 -> 1653618 bytes docs/source/multimodal/nerf/intro.rst | 55 ++ .../source/multimodal/text2img/checkpoint.rst | 80 +++ docs/source/multimodal/text2img/configs.rst | 166 +++++ .../source/multimodal/text2img/controlnet.rst | 106 +++ docs/source/multimodal/text2img/datasets.rst | 40 ++ .../source/multimodal/text2img/dreambooth.rst | 132 ++++ docs/source/multimodal/text2img/imagen.rst | 287 ++++++++ .../text2img/images/controlnet-structure.png | Bin 0 -> 59531 bytes .../text2img/images/imagen_arch.png | Bin 0 -> 492447 bytes docs/source/multimodal/text2img/insp2p.rst | 84 +++ docs/source/multimodal/text2img/intro.rst | 99 +++ docs/source/multimodal/text2img/sd.rst | 170 +++++ docs/source/multimodal/vlm/checkpoint.rst | 92 +++ docs/source/multimodal/vlm/clip.rst | 156 +++++ docs/source/multimodal/vlm/configs.rst | 160 +++++ docs/source/multimodal/vlm/datasets.rst | 35 + .../multimodal/vlm/images/clip_arch.png | Bin 0 -> 252444 bytes docs/source/multimodal/vlm/intro.rst | 82 +++ docs/source/vision/checkpoint.rst | 77 +++ docs/source/vision/configs.rst | 134 ++++ docs/source/vision/datasets.rst | 45 ++ docs/source/vision/images/vit_arch.png | Bin 0 -> 114139 bytes docs/source/vision/intro.rst | 45 ++ docs/source/vision/vision_all.bib | 17 + docs/source/vision/vit.rst | 136 ++++ .../clip/conf/megatron_clip_VIT-L-14.yaml | 203 ++++++ .../multimodal/data/common/webdataset.py | 3 + .../data/dreambooth/dreambooth_dataset.py | 10 + .../models/controlnet/controlnet.py | 10 + .../stable_diffusion/ldm/autoencoder.py | 6 + .../modules/imagen/diffusionmodules/nets.py | 54 ++ .../modules/imagen/encoder/t5encoder.py | 9 + .../diffusionmodules/openaimodel.py | 1 + .../stable_diffusion/encoders/modules.py | 3 + requirements/requirements_docs.txt | 1 + tutorials/multimodal/DreamBooth.ipynb | 273 ++++++++ .../Multimodal Data Preparation.ipynb | 646 ++++++++++++++++++ .../Stable Diffusion Tutorial.ipynb | 278 ++++++++ 52 files changed, 5164 insertions(+), 1 deletion(-) create mode 100644 docs/source/multimodal/api.rst create mode 100644 docs/source/multimodal/mllm/checkpoint.rst create mode 100644 docs/source/multimodal/mllm/configs.rst create mode 100644 docs/source/multimodal/mllm/datasets.rst create mode 100644 docs/source/multimodal/mllm/images/llava_arch.jpg create mode 100644 docs/source/multimodal/mllm/intro.rst create mode 100644 docs/source/multimodal/mllm/neva.rst create mode 100644 docs/source/multimodal/mm_all.bib create mode 100644 docs/source/multimodal/nerf/configs.rst create mode 100644 docs/source/multimodal/nerf/datasets.rst create mode 100644 docs/source/multimodal/nerf/dreamfusion.rst create mode 100644 docs/source/multimodal/nerf/images/dreamfusion_model_overview.png create mode 100644 docs/source/multimodal/nerf/intro.rst create mode 100644 docs/source/multimodal/text2img/checkpoint.rst create mode 100644 docs/source/multimodal/text2img/configs.rst create mode 100644 docs/source/multimodal/text2img/controlnet.rst create mode 100644 docs/source/multimodal/text2img/datasets.rst create mode 100644 docs/source/multimodal/text2img/dreambooth.rst create mode 100644 docs/source/multimodal/text2img/imagen.rst create mode 100644 docs/source/multimodal/text2img/images/controlnet-structure.png create mode 100644 docs/source/multimodal/text2img/images/imagen_arch.png create mode 100644 docs/source/multimodal/text2img/insp2p.rst create mode 100644 docs/source/multimodal/text2img/intro.rst create mode 100644 docs/source/multimodal/text2img/sd.rst create mode 100644 docs/source/multimodal/vlm/checkpoint.rst create mode 100644 docs/source/multimodal/vlm/clip.rst create mode 100644 docs/source/multimodal/vlm/configs.rst create mode 100644 docs/source/multimodal/vlm/datasets.rst create mode 100644 docs/source/multimodal/vlm/images/clip_arch.png create mode 100644 docs/source/multimodal/vlm/intro.rst create mode 100644 docs/source/vision/checkpoint.rst create mode 100644 docs/source/vision/configs.rst create mode 100644 docs/source/vision/datasets.rst create mode 100644 docs/source/vision/images/vit_arch.png create mode 100644 docs/source/vision/intro.rst create mode 100644 docs/source/vision/vision_all.bib create mode 100644 docs/source/vision/vit.rst create mode 100644 examples/multimodal/foundation/clip/conf/megatron_clip_VIT-L-14.yaml create mode 100644 tutorials/multimodal/DreamBooth.ipynb create mode 100644 tutorials/multimodal/Multimodal Data Preparation.ipynb create mode 100644 tutorials/multimodal/Stable Diffusion Tutorial.ipynb diff --git a/docs/source/conf.py b/docs/source/conf.py index 952e25332ca4..586f6cf47675 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -61,6 +61,9 @@ 'ipadic', 'psutil', 'regex', + 'PIL', + 'boto3', + 'taming', ] _skipped_autodoc_mock_imports = ['wrapt', 'numpy'] @@ -125,6 +128,8 @@ 'tts/tts_all.bib', 'text_processing/text_processing_all.bib', 'core/adapters/adapter_bib.bib', + 'multimodal/mm_all.bib', + 'vision/vision_all.bib', ] intersphinx_mapping = { diff --git a/docs/source/index.rst b/docs/source/index.rst index 86ad55d1709b..7407886eefc8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -47,7 +47,7 @@ NVIDIA NeMo User Guide nlp/api nlp/megatron_onnx_export nlp/models - + .. toctree:: :maxdepth: 1 @@ -71,6 +71,23 @@ NVIDIA NeMo User Guide text_processing/g2p/g2p common/intro +.. toctree:: + :maxdepth: 3 + :caption: Multimodal (MM) + :name: Multimodal + + multimodal/mllm/intro + multimodal/vlm/intro + multimodal/text2img/intro + multimodal/nerf/intro + multimodal/api + +.. toctree:: + :maxdepth: 2 + :caption: Vision + :name: vision + + vision/intro .. toctree:: :maxdepth: 3 diff --git a/docs/source/multimodal/api.rst b/docs/source/multimodal/api.rst new file mode 100644 index 000000000000..ef517d6bdd5a --- /dev/null +++ b/docs/source/multimodal/api.rst @@ -0,0 +1,81 @@ +NeMo Megatron API +======================= + +Model Classes +------------- + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_base_model.MegatronBaseModel + :show-inheritance: + :no-members: + :members: __init__, configure_optimizers + + +.. autoclass:: nemo.collections.multimodal.models.stable_diffusion.ldm.ddpm.MegatronLatentDiffusion + :show-inheritance: + :no-members: + :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets + + +.. autoclass:: nemo.collections.multimodal.models.dreambooth.dreambooth.MegatronDreamBooth + :show-inheritance: + :no-members: + :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets + + +.. autoclass:: nemo.collections.multimodal.models.controlnet.controlnet.MegatronControlNet + :show-inheritance: + :no-members: + :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets + +.. autoclass:: nemo.collections.multimodal.models.imagen.imagen.MegatronImagen + :show-inheritance: + :no-members: + :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets + + + +Modules +------- + +.. autoclass:: nemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.openaimodel.UNetModel + :show-inheritance: + :no-members: + +.. autoclass:: nemo.collections.multimodal.modules.imagen.diffusionmodules.nets.UNetModel + :show-inheritance: + :no-members: + +.. autoclass:: nemo.collections.multimodal.modules.imagen.diffusionmodules.nets.EfficientUNetModel + :show-inheritance: + :no-members: + +.. autoclass:: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL + :show-inheritance: + :no-members: + :members: __init__, encode, decode + +.. autoclass:: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder + :show-inheritance: + :no-members: + :members: __init__, forward + +.. autoclass:: nemo.collections.multimodal.modules.imagen.encoder.t5encoder.T5Encoder + :show-inheritance: + :no-members: + :members: __init__, encode + + +.. autoclass:: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel + :show-inheritance: + :no-members: + :members: forward + +Datasets +--------- + +.. autoclass:: nemo.collections.multimodal.data.common.webdataset.WebDatasetCommon + :show-inheritance: + +.. autoclass:: nemo.collections.multimodal.data.dreambooth.dreambooth_dataset.DreamBoothDataset + :show-inheritance: + diff --git a/docs/source/multimodal/mllm/checkpoint.rst b/docs/source/multimodal/mllm/checkpoint.rst new file mode 100644 index 000000000000..8c46a5449c73 --- /dev/null +++ b/docs/source/multimodal/mllm/checkpoint.rst @@ -0,0 +1,113 @@ +Checkpoints +=========== + +In this section, we present four key functionalities of NVIDIA NeMo related to checkpoint management: + +1. **Checkpoint Loading**: Load local ``.nemo`` checkpoint files with the :code:`restore_from()` method. +2. **Partial Checkpoint Conversion**: Convert partially-trained ``.ckpt`` checkpoints to the ``.nemo`` format. +3. **Community Checkpoint Conversion**: Transition checkpoints from community sources, like HuggingFace, into the ``.nemo`` format. +4. **Model Parallelism Adjustment**: Modify model parallelism to efficiently train models that exceed the memory of a single GPU. NeMo employs both tensor (intra-layer) and pipeline (inter-layer) model parallelisms. Dive deeper with "Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM" (`link `_). This tool aids in adjusting model parallelism, accommodating users who need to deploy on larger GPU arrays due to memory constraints. + +Understanding Checkpoint Formats +-------------------------------- + +A ``.nemo`` checkpoint is fundamentally a tar file that bundles the model configurations (given as a YAML file), model weights, and other pertinent artifacts like tokenizer models or vocabulary files. This consolidated design streamlines sharing, loading, tuning, evaluating, and inference. + +On the other hand, the ``.ckpt`` file is a product of PyTorch Lightning training. It stores model weights and optimizer states, and it's generally used for resuming training. + +Subsequent sections delve into each of the previously listed functionalities, emphasizing the loading of fully trained checkpoints for evaluation or additional fine-tuning. + + +Loading Local Checkpoints +------------------------- + +NeMo inherently saves any model's checkpoints in the ``.nemo`` format. To manually save a model at any stage: + +.. code-block:: python + + model.save_to(.nemo) + +To load a local ``.nemo`` checkpoint: + +.. code-block:: python + + import nemo.collections.multimodal as nemo_multimodal + model = nemo_multimodal.models..restore_from(restore_path="") + +Replace `` with the appropriate MM model class. + +Converting Local Checkpoints +---------------------------- + +The training script only auto-converts the final checkpoint into the ``.nemo`` format. To evaluate intermediate training checkpoints, conversion to ``.nemo`` might be needed. For this: + +.. code-block:: python + + python -m torch.distributed.launch --nproc_per_node= * \ + examples/multimodal/convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size + +Converting Community Checkpoints +-------------------------------- + +NeVA Checkpoints +^^^^^^^^^^^^^^^^ + +Currently, the conversion mainly supports LLaVA checkpoints based on "llama-2 chat" checkpoints. As a reference, we'll consider the checkpoint `llava-llama-2-13b-chat-lightning-preview `_. + +After downloading this checkpoint and saving it at `/path/to/llava-llama-2-13b-chat-lightning-preview`, undertake the following procedures: + +Modifying the Tokenizer +""""""""""""""""""""""" + +NeMo mandates adding specific tokens to the tokenizer model for peak performance. To modify an existing tokenizer located in `/path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer`, execute the following in the NeMo container: + +.. code-block:: bash + + cd /opt/sentencepiece/src/ + protoc --python_out=/opt/NeMo/scripts/tokenizers/ sentencepiece_model.proto + python /opt/NeMo/scripts/tokenizers/add_special_tokens_to_sentencepiece.py \ + --input_file /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer.model \ + --output_file /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer_neva.model \ + --is_userdefined \ + --tokens "" "" "" "" \ + "" "" "" "" + +Checkpoint Conversion +""""""""""""""""""""" + +For conversion: + +.. code-block:: python + + python examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py \ + --in-file /path/to/llava-llama-2-13b-chat-lightning-preview \ + --out-file /path/to/neva-llava-llama-2-13b-chat-lightning-preview.nemo \ + --tokenizer-model /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer_add_special.model + + +Model Parallelism Adjustment +------------------------- + +NeVA Checkpoints +^^^^^^^^^^^^^^^^ + +Adjust model parallelism with: + +.. code-block:: python + + python examples/nlp/language_modeling/megatron_change_num_partitions.py \ + --model_file=/path/to/source.nemo \ + --target_file=/path/to/target.nemo \ + --tensor_model_parallel_size=??? \ + --target_tensor_model_parallel_size=??? \ + --pipeline_model_parallel_size=??? \ + --target_pipeline_model_parallel_size=??? \ + --model_class="nemo.collections.multimodal.models.neva.neva_model.MegatronNevaModel" \ + --precision=32 \ + --tokenizer_model_path=/path/to/tokenizer.model \ + --tp_conversion_only diff --git a/docs/source/multimodal/mllm/configs.rst b/docs/source/multimodal/mllm/configs.rst new file mode 100644 index 000000000000..38ee65da9dd3 --- /dev/null +++ b/docs/source/multimodal/mllm/configs.rst @@ -0,0 +1,143 @@ +Common Configuration Files +========================== + +This section provides a detailed overview of the NeMo configuration file setup specific to models within the NeMo Multimodal Language Model collection. For foundational knowledge about setting up and executing experiments common to all NeMo models, such as the Experiment Manager and PyTorch Lightning trainer parameters, refer to the :doc:`../core/core` section. + +Within the configuration files of the NeMo Multimodal Language Model, details concerning dataset(s), augmentation, optimization parameters, and model architectural specifications are central. This page explores each of these aspects. + +Discover exemplary configuration files for all NeMo Multimodal Language Model scripts in the `config directory of the examples `_. + +Dataset Configuration +--------------------- + +The NeMo multimodal language model currently supports a conversation data format, inspired by and designed from https://github.com/haotian-liu/LLaVA/tree/main. To explore a sample dataset, visit https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md. + +The configuration file allows setting any initialization parameter accepted by the Dataset class used in the experiment. For a comprehensive list of Datasets and their parameters, visit the `Datasets <./api.html#Datasets>`__ section of the API. + +A typical training configuration is as follows: + +.. code-block:: yaml + + data: + num_workers: 8 + dataloader_type: cyclic + data_path: path/to/conversations.json + lazy_preprocess: True + is_multimodal: True + conv_template: llama_2 + image_token_len: 256 + image_folder: path/to/images + image_aspect_ratio: 'square' + +Key parameters include: + +- ``data_path``: The path to the dataset in JSON format. +- ``is_multimodal``: Indicates if the dataset has multiple modalities (e.g., text and images). +- ``conv_template``: The template used for conversation format. Supports values like 'nvgpt' and 'llama_2'. +- ``image_token_len``: Specifies how many tokens in the language model word embedding each image will occupy. +- ``image_folder``: The path to the folder containing images related to the dataset. +- ``image_aspect_ratio``: Specifies whether to pad or crop the image to maintain the aspect ratio, such as 'square'. + +Trainer Configuration +--------------------- + +This section outlines arguments for the Pytorch Lightning Trainer Object. + +.. code-block:: yaml + + trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +For a detailed list of arguments, refer to the `Pytorch Lightning Trainer `__ API section. + +Experiment Manager Configurations +--------------------------------- + +The NeMo Experiment Manager provides a streamlined approach to manage various tasks such as logging, saving, and resuming. + +.. code-block:: yaml + + exp_manager: + exp_dir: null # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: True + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: training-session + project: text2img + group: nemo + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +Optimizer Configurations +------------------------- + +.. code-block:: yaml + + optim: + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null + +The default optimizer used is ``fused_adam``. For details on all supported optimizers, refer to the NeMo user guide. The learning rate scheduler can be specified in the ``optim.sched`` section. + +Model Configurations +-------------------- + +Each configuration file should detail the model architecture used for the experiment. + +The parameters commonly shared across most multimodal language models include: + ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| **Parameter** | **Datatype** | **Description** | ++===========================+==============+=======================================================================================+ +| :code:`micro_batch_size` | int | micro batch size that fits on each GPU | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`global_batch_size` | int | global batch size that takes consideration of gradient accumulation, data parallelism | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`tensor_model_parallel_size` | int | intra-layer model parallelism | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`pipeline_model_parallel_size` | int | inter-layer model parallelism | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`seed` | int | seed used in training | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ + +NeVA +~~~~~~~~ + +For model-specific configurations, refer to `Neva <./neva.html#neva>`_. diff --git a/docs/source/multimodal/mllm/datasets.rst b/docs/source/multimodal/mllm/datasets.rst new file mode 100644 index 000000000000..1c64c4d317d2 --- /dev/null +++ b/docs/source/multimodal/mllm/datasets.rst @@ -0,0 +1,99 @@ +Multimodal Language Model Datasets +================================== + +The NeMo multimodal language model supports the conversation data format, drawing inspiration from and designed based on `LLaVA `_. Sample datasets can be explored at `LLaVA's data documentation `_. + +Preparing the Training Dataset +------------------------------ + +The NeVA model training encompasses two phases: pretraining and finetuning. Each phase mandates a unique dataset. + +For **pretraining**, utilize the *LAION/CC/SBU BLIP-Caption Concept-balanced 558K* dataset. Access this dataset via `LLaVA's GitHub `_. After procuring the dataset, extract it to: + +.. code-block:: bash + + /path/to/neva/datasets/LLaVA-Pretrain-LCS-558K/blip_laion_cc_sbu_558k.json + +Acquire the image data from `HuggingFace `_ and extract to: + +.. code-block:: bash + + /path/to/neva/datasets/LLaVA-Pretrain-LCS-558K/images + +For **fine-tuning**, deploy the *LLaVA-Instruct-150K* dataset. This is also available on `LLaVA's GitHub `_. You can download the prompts from `HuggingFace `_: + +.. code-block:: bash + + /path/to/neva/datasets/LLaVA-Instruct-150K/ + +Image data for this phase can be obtained from the `COCO Dataset `_. Once downloaded, extract the images to: + +.. code-block:: bash + + /path/to/neva/datasets/LLaVA-Instruct-150K/images + +Additional Preparation for NeVA Model +------------------------------------- + +The following instructions are specific to the NeVA model within the NeMo Multimodal Language Models. + +Setting Up LLaMA-2 Chat Checkpoints +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Support is available for both the 7B and 13B chat models. Both can be downloaded from `LLaVA's Model Zoo `_. After downloading the desired HuggingFace checkpoint, extract and store it on your local system to prep for pretraining. + +To convert the LLaMA-2 checkpoints to NeMo's format, follow these steps: + +1. Adjust the default yaml file at `megatron_llama_config.yaml `_. Ensure ``model.mcore_gpt`` and ``model.transformer_engine`` are set to `False` before the checkpoint conversion. + +2. For the 7B chat model, use this conversion command: + +.. code-block:: bash + + python /opt/NeMo/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py \ + --in-file \ + --out-file /path/to/neva/checkpoints/llama-2-7b-chat.nemo + +For the 13B model, adjust the paths in the `--in-file` and `--out-file` parameters accordingly. + +3. Execute the subsequent command to divide the checkpoint for tensor model parallel sizes of 4 or 8. It's advisable to use TP=4 for the 7B model and TP=8 for the 13B model to ensure both pretraining and finetuning operate without memory complications. + +.. code-block:: bash + + # Instructions for the 7B model partitioning provided here. + # Adjust parameters for the 13B model as needed. + python /opt/NeMo/examples/nlp/language_modeling/megatron_change_num_partitions.py \ + --model_file=/path/to/neva/checkpoints/llama-2-7b-chat.nemo \ + --target_file=/path/to/neva/checkpoints/llama-2-7b-chat-tp4.nemo \ + --tensor_model_parallel_size=1 \ + --target_tensor_model_parallel_size=4 \ + --pipeline_model_parallel_size=1 \ + --target_pipeline_model_parallel_size=1 \ + --tp_conversion_only \ + --model_class="nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel" \ + --tokenizer_model_path=/tokenizer.model + +Tokenizer Configuration +^^^^^^^^^^^^^^^^^^^^^^^ + +For NeVA training, integrating special tokens into the tokenizer is vital. After obtaining the 7B/13B model from Huggingface, also procure the corresponding tokenizer model. Referring to the 7B-chat model: + +1. Download the `tokenizer.model `_ to: + +.. code-block:: bash + + /path/to/neva/tokenizers/tokenizer.model + +2. Executing the next script necessitates the NeMo dependency. It's more convenient to run the script within the NeMo container. + +3. Employ the command below to infuse special tokens into the tokenizer: + +.. code-block:: bash + + cd /opt/sentencepiece/src/; protoc --python_out=/opt/NeMo/scripts/tokenizers/ sentencepiece_model.proto + python /opt/NeMo/scripts/tokenizers/add_special_tokens_to_sentencepiece.py \ + --input_file /path/to/neva/tokenizers/tokenizer.model \ + --output_file /path/to/neva/tokenizers/tokenizer_neva.model \ + --is_userdefined \ + --tokens "" "" "" "" \ + "" "" "" "" diff --git a/docs/source/multimodal/mllm/images/llava_arch.jpg b/docs/source/multimodal/mllm/images/llava_arch.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7488935ff06c3fe85ea1a6574a2fc2e0a9921701 GIT binary patch literal 117431 zcmeFZcT`hfw~1LJE~{vg80=IG<| z@b|->UIBi$3@=GpS=&glE&&_>FCYkr0<4Zs{yus)Z(jR7=RdB0=KtphaqQ2#1H&hO z&xOAFGdyad>U?%dJ`$Mk?OkJAH3XAqtNVdZ=O?!N24gFPPx{{Fka z+waxlbn!C34PN=cE)HA+3;`*?2)GQ~0S|f_A|ff3_c6Cf1}^O-3u89*U}99((d2x-I)Y{ z+M57yIQ!r3DaZi;*IfYU(C~5ebNoHqpTAT^xt4kKL9vHqSLn}0D!d?0H}U+ zI{7u7P6cVm1kTy$OK${t*%)J3?3fsi1B|>3OuP*ARsaH0<`Bc5_V;N421X|4LoBRp z>>P)|0cak8k%5Vck(uexq2CP15C(n^F!LVbJEo?`!f)otdfZpw^wZ>gHt9>{O@g-u z@iJ%b`GvD{2nin*5tWsbS2&@lu5niL-1!Uom#-KY8W~@`eaGCw5=76*+2y|L12=d7 zfWV+&c*vuOXOU6SG0$UDQeUK{XS~eJDkyyO_Fd8Y55*OgRp`&vHD79*TUy)NJ370* z4GoWsj*U-DPGND2-+%mET3%TtY;EuC68A{^2fy)R0GR%U7Wnyhdi_VdctN}vnVFfG zS%2fjz!>}+abD&_$JAK(^vqZteff`{e#$0rDLKEqiCy~4Exh18zd;Tm8Fj2I;Wui3 zF#E3|7XJSVvwtG?Z+J}u`T*mf#>~jb%)-pf%)-V3HZ~5n-;INl<4@!Kdpq){@%(Ol z|7>({5^zD84ly5MV`gUK=Q_;A|NpqrXF-K^pFRa}GBJS6#Ka3g0V<^+Rss0G{r__` zV4@HF-?jNa^CeFA7{wSb-|T(3cMhXfCA~GqDxe+fIrUzki z@*C4+$xiK{u)bUn3n=`D-XKdYM!LVG33ku{jwNhg_vTTUoS=b-vRU-O>jPIu(u84eQi@-BpH%I8-bx8!q}~KA)#7EyQ;!-z-7Tl$Za}MfLzc*5;SlQlK7o0=hW4ys?D(C`Sa<&e%-SE z9(3OO<#s$xX3OI}Dx>FI<>Vcrb-+Db!${gpE>d>Vdo&knD{;%O-adI5gCChk&r|5Y zaxUq}JmtFWFy>+Zlu5Jb>xQ~Tc-1V-FtTH6W8=75?+-|QXDg@R>V+hS(}o#CJZw$h zEoF7pT0h9&dL_F0#WQeic%=8FRfKS_+tWGMYz_837{oD?xF1$@rLJq+?IyIv5zW= z@2DS28+dHF8R0x~`HNh7Y6AM4=`k(O-$|TFG~kAd`!>vFYPlVLLnw6-;|77rjnfIdx=R)6|4Pc#1g(!XLaa+^fVIt0po$$ zrvrmfyaa}n1^wTC-!4}Ajv424-O{H642G$@#h9M{>y^|Bs2hp{)>KCNtaHY4Xu-#; zwDg|Zw?QPYMPn1wUm;8Hr8N#d;OB9M&k)LRgYot`_F7>JOZ{a+4SBE=)4Vo=kd_KZ z3KwHq=s=95D)p7N1MNKiv{ZA~jQ|0&D@M(+3VDpNN{e)0Dvnt2iKKeBfd?P{t`JAPqUQNf z*QMHGMZM*dg!v6kCwUI`(i67=Aeu;bT4a;}ykKQl z!rRRMoFe)A~;CW z?KCxNPUyXPo(9PeHN$0WqVLj;dHauQS0o12eowo5x=3448BI3BV{p+C4X5B2roq+! zA}o}_#972>i8-8|%wg_!&rR8Z7ZjMdbvj@;vUlnw+S>ZpBS8T( zy$NucfPdAI^uhm;8ywRMf^Z^*8_Rw#qOGtA{jdZ;B$+F>Lekd<%DC34n*g4zumkZ(cl%TEbYBTA7ww+2{Ut_a*@ z_%C*5|La`!s=hwBbBl*EUoHUWWl|df%a_RJZIV14&`*q}0}+S4Y3FT`{Fv|A@Mv07 z2J%35ZC{QWg>+(rK1A`Z9SG2*y}7xLOo{B3=GC6ge4TOh%#~^}|M~M97a?OonS@}h zE;^_dk$_8(`xQ(sZ+Xm|h=?sSM(LxL&fr`ryjtxDNqp%T8b96sX|0+#s2=C!f6tS1 zT2G75Jjd~Cc_e3^YY9Vd5EO3+nX>JelOhhzQSML9qGW>k1OZ=9e zn3~MxZmGMk1wKqCv_FC7DJm2^;rOriZSPn*z=n_r^}z=;0Hn9?5-g3fjD9>0O0jwh zPyDWxAAsN{TkWEZ3!XqZklv;wIr*0B5G-ddFY^9L&XwAX=$09nm7)3`f$%(9+dMvB8-Y-z=%6RD+AQ#Qo*?J| zt>XX*iVJJw*E81CEah=ie0!PAzGrL2H^R)hJDD+x7#z->hepifZzV-x zDm9}TPOM;9pso;3W7Q3Xa@-VO@*aI%_Jv#+kP!7IYb1Z63=7uKL&tQt`55E3gM5kz5QYQIs zsIRrEwkb~a^rDGveN}mzZ^~YZ*HUi{XOQGz3Nq;jut^78=SecC^$jd6+QyIKKV;jA zkWW`TxkfGfdGvVl{;Af_TLvOdoK41l zww8HuBCRJe_*>tSa_d$HO58Gk@NZk}Kek!{%N{y_MzwA7Py|UD!MJ($_JsGn>Onrn z@=E;IbLTvl+b(M9EuNE?;t41;9?dF}nB>oTX7^)Ydw!~j_#p{vhs2j5#Mc`Lkz0;; z98=nkB12aL+%e(p~H zB%I5%NSkZAB4F7i4u0Nb{B7XeMu<_22(IhHxDEpH881p(2i~}vWd&84-(M4U36$Vo zK)`AbE-@U{j@|W(pDVMsoVq=xYJ6pVe4iM`@oYx)g+tMX<(~~#fRu|hF%^ZYYHF6>BO?yDe?I`&po9v!pqt&LpVLb=etQ(4p4L!a>S zPZ`}MPmS>NUOv_JApTsI4<%HES2Nu5N@$mbhl+yw7sDH54D4xQcj1oBcYv~zBQ}Xfrny0 ze#x$le~k91{$5r;*O8sregRe&gDt zRuu{`YVA&nxYcgnn6&S70999^%~KGo^pX)Y6FBGqH%$tTA^H#=la?Agy{bG4py+^) zSQ$M!a8h~=J)XiQ9`M7MiCBciv?j4ptndk_gnIq}m5*i7gNwd7r_ZAa{W zTbDCEyI(lsoJa}g-Gc?b|pOS=mw6CfQAh89PBkFy^ten3L)!U;4=yDmmPKjz(SEB9W9c83hbPPWcODnyYqGo*{y^6FFIgNr ziWC7nBq`K{tQ~r(l8_WG!+K-1>>hd~U|dH3l%G#`Qteuo1(Rafoix;3Emcv_&GId$ zg$Oxiv(_Vks@zajDeu?)_8959mt>3|IR5*_#yh@!>X0P7|A{zX~@ z9XKRHeTRIIBp9keGAx-P*)};q>TDmNE~DPrHY}TUx?yko`rP<+iSTK)U~xFN!0YcTX3+F@YG3^Yi(6Xrz_9Q^h{(|pn=p2X#g8xxtZIHN{m<> zUr*z1-;h0$>-%AO+^_a>>rs``F{`(1lQWFnVh?qEBgQbf+|!yw1rnO98+xLOltw7Q zDG=lek?%WPxuVE%3G0Jh(%nmTMy1c#BgkbRPaAmb=F-@g=zzj_HK;$XgL|Uk%m6E{ zzY#eVLHtQd)){Y9jnX|arV-}aRyiH>s|@un>*tt~OXgdi{xj8+$t}_8mlY@CfkzQ5 zFfoKAZ65PU_wAcdv+;Uw|Bxev{?!{6S&^eZD(xawzt3I?crRX2!uv(U>O@l~3S5D3 zDbl0nHJo80#auDM-|5|yO}p=)ic2u!u|MPZ6cTT3Zb41?*v*+n;L4Qj==>tUutV0 z)!gB+`$JW6ax6xRb?F@@<;Lt=i{47A%Fmd(ti=~nBUg6lK}Fa0`O+@&n`=VBm3G-7_k5)i4_8n6KeWGn=*Vcm3~=rt zqJ`GsaF&n}i(JUDfhAC`mpHhKj%3|HNzc#p%Ty~9?Cg~v-Vai-W_*A9)f1KrmVNVh z8*hna8`wHEkD`+jNE;d5efg^}g4EFO!C7F;oyV>0uH)xBFkXRZx!O+$n0p2XS&RpF zKLOw>mwW^ElW1&M0utBJCSi!WgxCf`&?;Gd`K)SbIfBrR9S02CM z`Zi1!6bxqq9Y~~!(RwTpQlyYl*s|sZ%BI;XVC}i5i&~ZDp}*>{l~$!O$1nV)qlBdc zq+@MAD&TQAorrV%CZ6w%KFk#Bbl%f>8rVN#IgpFDL5VC+d9RbP^Nd}Jbl|~i7S8^P zFyZJ)sZ1ASUPvJXuj&r$(t)S*LNw(`P=eJw5h^R=!55a|N_RZSFTJ&SKM4XV0;vRxk%ah&4OVxm%-233@JgVOJzcS1a(H~ z!@_%w9axL%l%MIO7LDC@0rmV*SgiYnwC?LgR{CnkAywCdgRpO>^aU;_OhihRGx(iJ z-pXiBiiG0LlBVZd^l|AZ0*gI&i{`&KyUVpf73N?zH zCKJCvD=eFKz|f6&u%zs`tjBf5^9IX|6fOLkC(RQIPIi%p%0~Jq|?~$8%uV1jse0e&) z@!&0+L1;zF>ipS@v)t4DS3KSTBt0UXD6yp&3sHsuDlOYn{@d z93Y$SY6s`TONgI_v^5L*D!Z^7ic@3T-n#a!m0joKbIrusP4myKash}w+8iDD%nYs2 zY`@vy@?kmqxfq-0;&vT7H>fvt7vyNRpXon70s9q%tk zCH0s^fl+Ea^dT$>#*BHeOHIPKi|uPzm8lwJ9~zx0_4V}(mV(@8_c3<6Tci9m_ZGWt{Few`M;&U~0+QOu$MfSymJA23CeUV}T_bSw044!A0;3$rdrc6op-ACIA2DO-CaelRh%k@u93J)K|V`Q&ri zw93~LILybS=kr_F>-E9Svl~O@+oddnq9;xh4LwgvUc`vmhF-?gvQZr#jt>Uj6}oW^ zwf8DTDacuH7%|BNwgJHyKIp=yc|MwwTLON5!7v-u->&wbWpzBbY4 z=l4y7fbB%3H3&)0a$RB(GFfrm5VzSjuQK0i^EI!c`cuM~X@%{Xk0~Or{oYwR-{6fg z7f-Q@+{Tbj6s=3(3;Gw@mO3C2qFiN9i%Eg$9&2G2)R={Z9yZz6KV#H@Kj;2(=?BxV z{_dokzqeS*BLcKzbD|E0H~%p_2QO;=oN>WMi*Kt-;FFeBXU7NVqhmAs*4nQISleOZ z;9j0I1ts>7=z#n>fh^~K`au@obs6?xZ#DGemsp%_U0mA!Vzv1CohBD4p1vkSUP8oz z_eK|)ZJCG>uTqT}@~}8%Dms7Iqx*S^X7UqQmFDoJ!8 zX@K=TX_LlbT56AI^g#^J*wNwBZ5dOsq#$Pw=kV^EYV|0^!X>|bMVSPvPlra%E*ynC zrLoa^=Btx7G8eFfzJ8op_}ovNbG7CadLg6d`L~ASpN)kMF^j7REHcXtu~P*}s$>pC zUq2YTs3Zn|AyiXjMZJn}Tm@Ua67}hi&{ga&H~LoijLlj5817p7{dH=B)NKxu*$y!U z1$MQPcoDr08YXB~#m|=hZoz&}2ecoCM(wm*GeiSnnPfJ&6LFPvn^0{|G9zD`pLeH; z*o%799iDtk4y>Db+gtC>M`+44P2ajFB65ReIfxino_o^6f#rIK1yU!uXf zQk47;)HLp*hz13X=uUOZQ3WYlWsC|v=ctk<9}_s;`7WAjjeby;NLWs)IYbBG8~{0+ z#$iTAlGd=MafQ^^x zba!20P?wbYFWSbS+hb;e`Em76_RzK>p?P-N`0Y*{zY(88V7NY4)9-X zanDgswBJs6U!wS-($DruNv(7Z9_YVHd%MhpZ=tcbf%X9aY6wUS65oTVdWiBb!DLIm zM*8L0cjnBTF?I_$GTV!`b>e~erFphRX~Sq78sr-kPm&nfb7BTA-TLU2s6(d5yZN&z z8P&WKb^`Z)rRs?lpZ+H7LFim_&|7rdN#X}J(Ou6nT~S`08bVZ-eg6K_$G)u7eTFw> zjN>FJGd zE6!hHRxNC-N6I|Q=H%>GJw5Zyn6)xq4*T(GImc7ez~#%Nq7c#)cl5X}7sU#bVKt(z z74Ws9p+>7ra-t^I=%@d-k=Z0iolDHUqpvuqugve<1S@0iG7NCgJfRDT#UIF4@L8U_ zTtdr$7I*dZ0BhD9C6J0jE)?We$u6}dMb624bFS3X+$F#mhMwq{p~56uEChP|h1TQqN11+dxh`%} z1UzW$ySRiiCZx77x$43>p{?U1??fZfLsy=x365POR74%U_02#}!BBvK+)K8m4NeKK zqw$tzqz@%6#JU$Q=;00jq$-E2-09DVHw9mJF5P?OA4`!Z*@77?;xCZhVouV5HXVE4*~{*Zt3*U2mtPkA42mSdK51-ZB+?9pN;OS@<}28X1>ck=wi(*B}vVwBvd2 zhpLFUbL(DEj(`~?;`sD?aNvlsj?zAcv_|0WfZ*>yrxH6B^rdd6$zRY_^aI0mqQKl^~g%s*2f>$ zYGoh>qBwZvoJ}$NUOUI_qQhWsEUhg7gTbUlAoFnAqzIW%N110GD9wFa}iRZ^O{eUhGcBU-g;KrZK8kk*c-~|zl zMRwIEkvVKYD*#njf`yP2iYVEgV6@;ZYU|NPH)hW5;1vmjH5nio#44cLOf&v{{Qi91 z<~}4;kPbMKAyiQ`6|mf<17Tq1ax)yW4t1gfg%+4R4k{FMs*+u}tr4#1w9wPw1a(PP zq-Yuk_@LY zQ|hOENi7Z9_Ba-Ypr$7h(>W@OMYr4{be9?7}M1mnDDimZj$2usLZ5-1) zzjY>m?Le&~0I64Hb|s9ki-n2O=3!eN2bp-g^Zl3iFp*FLgfGay9WS82RJTgZEjno0`E z%IG&8tG6|%T$bD2i<`TT45^VPF`1}kcg(d;%y(-O%55sQ>xn6(>IhgLuw!cXAFBlZ zRy9B${+}7KA~?}Fa>+8Z0az6>AyoDw<>&zPv*%8?Sd5qLhDAEfwm$Zon!oy$3Wgv4 zn=qBhP&tqtN8QGJ-H2!0%k>$ReG68nZV29SyhsO(mw`5e*5x=FZS8qDS`j*&in1J!f5y>B;AblxgC;*N>6yX21UuUP zY>2Ff|3gQ&h1ek3Qk9cG&tl@w+uxBQ2!g0HpkZ9Jwk${^B+=$-o0iqwDhSW>Bn=o~Dzmrl3V;O;~JBabI~yTNMcm%o0n}Ykps6-=*zDvOQw7{2i>A zjG~Rvfr^X`2K*-08CeC{u*Ml+2scn4Y**4hbNdp%Sg0zx*}r+g7t7zx;1s8*>HJ9> zG6yBeeSHF(aB3X0d@tfs{Z;wnIVIWM7C#*}4N?Nm4f7C2Qgm%QsUr4Tqzc1rrDpZU z$R+9er`TKCT@Kdgv+kRVGg_BNnv0-Ts>zZU=L?=WP73xV=knN>qA4^u=eCWXLOjbrEdk1?$=anL7u6c?` zosx<_X&M9l#JWKxUxa(@EtoZP=D{W)BncJxKm{rj>8 zy51{eAd{31VjMU5LvMh*5=aUr-X~=wal^BXw^gvpgc;edQ+6}j2*;g0{t{I-Rc5W|I6;0QH*Xd&((#LY5n_{ zcN`$!`;&4(Yomi&QO{aNLgHM*jR~+AudD7h39Xtu&r3B-&vlRe80lf{obx{KGYQS4e7owTGfnn`?XlR&p#8{~|TzJHO^Bc(C9YD ztFjqc4xB;zB=w04Zy(`Xd?ROC{8<()%XSD_bG;vh z=z|DqYb~e>d6<-uvaKtlZ;gD9uY40Mta&;wUW>9NF84DCDEE2^me~}6!5668Cd-O+ z6HOPdlhz;O1Uunc6Z>;;>dIok=R%Q()P*RuoTna8;&o~U@&Rm$OiYa+7ElXIRUYm< z_UJ^jrff-jdCOEQdTx~F-(`Dx_5J=RAJ_9X($T6(%Dr?Gju43NY-UC&^mKraMsQua}lukZjVJ3jrzVtk<%pi0u~I zA#sP=H=aE??I7dD@`!_${~(XM5jHtYJWfR;j@^GvLex*br_?~@%tH%OJHXccN!Ls}wBcOJHz z$^JR)_{zrKdY1p|o2s--p8SWO>{mN)2%Kuyj`9Y*#U>}jNjDh&W8jKLN!W#p_p(f$ ze_VeWk8iepaYFsQ-j85i?8Nv^%M}2jK#HKTmk|6-s4>b2M!0^fkfyFeIYEJ-Ws>QM zojd6;)5KYGvW8E@+2x`sv#QO0!_+42u=&Dc{)HInQLjR#Z^o1A(@TJFazL}ecO-wq zW4yAGYBnAo$Ips~vu4Qdi9PagSU5fkA+dEXz?H_nL{nE7k=i*A2 zY+uDYO#9#l8W%PxWn8>t(&E?l)`U` zky3xL_#ElF>8!+~&#A-%NH19Jl8L5kKdn5@LOv ztf7`Wh2WZiW_ptj=-Is*M^;9Cq60^1r)KmYW;e!wN=${YyNthDXVqI^@o1W zq_*Y8tN>t%gZ|#=E>ZJo0BvASiO}DM;-&-6D>mtwzMh`YsSWvg>Dd;=gukv!#}}w| zD~LOx1j)Wv0Hns0<$6D;#I8}3HoernpJr6LXNeg(!`4?yf0e$>p#x6!V#%H4X{YZ= z^*&sd*+dYX@HoU2O=H*_>Q<5&kZ`;$Mp*dgoEo#&oi5^IrHA(o0KaefpO~P6ROP{o zkh7us)O3VM=w<4w4C4Kfb`UD1ed$1qD#6F_Rc zftK&ZZMu*Sot)i>UxyCuxD~*?1i3r!SH4<;vlbdOI!E3+ZMu61e0|LZ2o=n{8P2g;#K-AXUr5?MT=EWzE#>WIe!97WaA)9V$yOH%mPiJGn%me2f z$-VoQR-o7i+0(r`7z+MkOg?Y_K}$_KEF~z`&xYEg_4&QZ+TPB4LIHuYEqu;7NtFZpT~$vgiZ%9sx~K0-NwaI%t)r#1VjCX zI)kJ;{4rBn8A*o)R*%a+jD)AY5N2XzQTf;X}7Ia7LShj ze4{t9r}I?L%iRn+r*R+)GOsjk3c?1xH_wh3@7A_l=H0hf_5qM?93qab=O47cp(adC(X4VP?j`?N+}?%~Pi=a}VSA^R4&} zj+a+Mo2;B1m7e}iCSAmMK_|0uxs9VFVY0T3cDKfludNw8lVP!A3VGe0mje?owZ$CW zYJQrU^MZk#Nme5b+mI5-T5#zOU76Irk-+Oh-*Bs@`zvQIHGmFXWA;hX_?PjY-W2{i z@b#q$+*r$J2nLe>?d1B~`O7y;d*6Jz8bK8?mpO3)cJhZuH)v2BFyF^_=MOeq=)haG@kw(6YGW7lfiY#^pZ8Fuo=B`a4Z68m z4kg>C+K?QGc*2V;6TFU5{U)=#cdt2lg*uW?vGa1=gkwGdtknFo%MT5OJyCS%pQ}g$y|1ry- zndl?`BmK?)bkyZtMac(CAYx(|)E55;dHns#l6Fg;8?B{DW0mlYh1DiH1dft@`*Z+&o9d zLeYph-1je~q2<39R_+TECrE+d!3j{%kXR8sSA@GJjyC5lYt6HMk(_A%!Z&%Yp*qC~ zr?31`@Q!ne53r~4!)B?2JMw@#E5*HhwwtZH!&!7CgSshm%9O;*h|aej{2*FB%X^E% z*C))h%an8*gLb8|fqvXmuv(g855X59-TM!CK=*-h1I!TClJ^{_C6lF+n@w5Fsf|Yl zgCDE=OfOB2)L#aDx}ieI)$Qw+9hj=%xT?J|74HT7Tyk>u>s+U2AC{R@qh4nZcUf{x zj0+RJ?CHS8axz%4(Itr_+UsuOp}O^s{)FJh3m77I97W}!7g4!D^Ysz4%BVxe%+9<` zt88VqG|>DKp#uY)$ayTxh+W+v^T)UK*H z-fXh^fPE>MM=YKH-^qdK3f0Ch;G>_aBRQS8oLo6of2~G@P;BMuM4^>_bl~7n(ma%A z1g%)vLp~*ifX6?Ai66m;6+0NBRGFs(afLL2%?)c%seqU)*kkB`FsvrPjGCQ+98gB= z(}68LYKCbYg}YS(vLL*nR!mxJ(X1VhaIUBtMthiicuT&~G?c!!3RQm~SFi_av%D4E+tkVi2&o0<-B{Pp0T=2*b6qkiaT5}B zzGMzj_FFG$+(Y#RlFpMY6rY0CxcmeVIV)INYtwMs{2WFu&l=v zSz}T>5P!2&CFCr^583>hS^{0Zgrpp<2CMPco)8*x2^j2`&oD9gwF$AWa~5kc^Fcjc ztvyG^yT@55MG`6dgwB;AM`Q4&z z$GufHe*}bha*b48m&rF+BA*sKGWdl3KCiOL$ki5L8+rf7{LW!W1Co*){fFmtK|TlT zaN3bOOpra2*KHi%6pjcz@fHqg9@FI;6!t_vGvPZZ7q5g6GoL&C zx(=EAf}}EbY=Y7x1DOmUV;U)-8x*NUwm=Sm??Dpw29pef|Cwu0GZz$Sz1^5)A!sNa z*q5Sp7tPXGu}hQ_^HY_yXY%0AU5BKp#dHKx)TZ#1AuD?57EB4Wse=sMwUrIoQXjidqj_Od}YabbTYZExZxA022x|BC(dh z4V%t-BHz!LxDPgeEVeSdpyl_&GLm`AWBIyAs|{<^24Wf~w)U6}iSa$&ipl0t>bp-G z#CE^Tf^k^Suf^u2%6>d_vg1q5tF|LLEjday8ulmJKL+3Uekc5u9pv_w4R-_-?R@GD z6#w+gM7}?<4$(qA$oS9lk#B?Q$Q52yI!*FBtY9H2;}Ld{-L91>qwno11fBk(Jst&* zZbS0cTRY0-hn39eM9+pa3`kY6k{cdEU7w^*nr*W0XtZvvf32(kn}`|a{!PXb>#tz@ z6C3!nv`w%v%CB}3?JtcwLoaV@LK-bIF-h7UVgg~Dj7PXPqur_O_@wnwoT(^gO4hW( z>YLx5FmIKKRR5fjns|yN&cN2Y@D2uobP);Q=60LHv}lFwisY+g>>qoZ8Z>`$OoQPf ztIsim5ay*+L!uY#pQ?}bZ(Jyhj~s8Cmt0-c!$(JE9c(%onaP2CKY_lO#s5!?c^{7twzbCHDsa6659^{SBC?<&-#VsCj~^~ZSbb+U-jD| zs}Iqe0!rIISF2f#hx;D!GJA;Cl8-_XcMGA>8x(fua#9IwxL)2{^RQFiw9M)5b=km_ zkQ>OQR}V%QCOvXRUCJvp$3$iD)5!iYl=35c40b8^9E~SyyK-?p4sqrvzFX_^EX1U8 z@`x54D00{0eX^T4y9U$8uV-jR+`_XyYxU3FkB29}3F#*ri$|HH=F~W`q4b~}sWy-Y zq>K`~wLCx1F0w{U2UV-}q7QVX#P}lQ`_#|?hKuejbK zC{2@Y+{VFprqxVd``=_(-}yYxN-0} zzymHe{@f1fBN<3BF412o)$PD;wqzUKzEiI+kycxFtok{E+L{-TsJ%2g6Dgi@ku_(I zWYK54hKi&Kru{-oToI(`e>8nAXC&?`i97q;MTFM?uo_koeitI)N<4$-C7g_osaH}@ zH}(wnR{xxMEm>PFZJXD)JL;jUOmd2WT6~z+LO$$zT3cJ4 zk1HXKoD7YEqXP_AoYzEYk=W_mVW_z#CpHk73_fL#{c^_oO`0$yI*`AzX)Kr~<@NNV z=X6)U_s;Xs8mPQF!`dCoNRNioqy&%q-n&Wb-fY%6+8b9FX1Ym+BC*N`+!|j{I@>61 zSR1+{rru7h{@mcs5Ogh%rw&CfRS8>WTE_5_wwn;VOf){vTVq{Gv&PP5(yrtWuY1Ct zXf(qp&edm=9%E-kXTmv6vs*WO+=e;zx(8j2QRctQsRY0{)G zJ(7H2%QRg4gtR8=RZqjC6ionM#`8?E>L zQ6fyy`-aC%Jt|I8A&&1W=2-wR&&eXCsk%4c2D!cm%S$`oqCVTzSqo;j4pAwun zwZa|ak=2%-0Uj9f*4m)wnAR}@rNCNXg$9X+Z1>GUayG^B*J6+jO2ZoSoXc4*FA1(4 zI(OW2Y*A@$Rc})|5v%&{Z?i(qH1Fp)Nj94A3{cDo8VrQXeh(0m?flmbCIkImH;gPJ z1D+wf1Rl5XYbBh{kT2l5S7}OG6v@@aX1a3#P^2 z(%p8q#`|(5+cn;qU@_09syv~w+9pD20CT;Kd2K$&g`c?Z-;g}dFG`jM2D>?VjRz0C zQIqP{`ee>DlXEB}t)m^uP}S?2XR7kWr-(&IMYwKhc9BH`bi&S|R8HV^0=d$I#vRU& z9@Bo+RV?ToBJ1SI3)z%Y!tjM!jC>?cHxR+YdqZ&=L~oLe$ew$SkIu=gqC{H-#j?q3vxsUIy9{kT$-R!a__u^i!QE63Y2 zI-M%FC3HQ~)Z614Yj-CThja{aS16v{SFu&3nZPdUY)X=grEg+XoxR~zf^0*qcf&D{ zu{puP{G(*K7tK5+lQzFDis_W|8HBOtaVvct{-}NOQR=Liez{=pNT&7jsQx9B*l;75eD~CMFjp2XDzUlH-DuK1QDh@JQG8?LeuWLEb zp*$j%ZLM9XaylIHNWTg(U;rcv?(i0qQytR&^|{!8AIJI!jHHkKU+d;yTSm?m9h{!( zJ-hkIEODC zVBH5%VfxL%*Jgdv{;7~YMEp&v0@K(U^!8{~sn9429rvDo;F@(RyJB$;P+3_w;MmcD(EA z%`gR~I`8;7-g+nQXN&a&<|m6Z+0UEm$UVx&6U6CTNRLpb@pHrK*%C*O$|!`##s zF1CKQW@pmta=cECW!@37R(ZW)uPaHd7;y*tSxv~~fiL-t=BMx;jX~Bq{Tq1VOJWKb zDB#pSER+LAWYi??e&pj{t70$9T_)?IryHvAsaQQxp$rN1+nI#$cnx{bzW8Bz(wTeNaKfK=T!^J%ePv~v+ zpjAz6(FdZT99q9JRL?Vw_rl)#)v}=R{g-Rtv?I%0WezFkze;(I?geiLviFVcwbVVf z^zg(-U#xqr!7XlQ2Txf!vJl9^#i&_qD`h1-v3_#Xp$I&_#n;66>@uMEz7hP-PDfra ze8=DuLrn>my|@H>brbOHO2%~=uMEkgV8x}iAMMeDCB^vumVQ?FC5yU&bu*PWbig|B zm|K}Mcu2fu%%~p zAQ#cG-^Pw@wQ)Q%TlLY(`M&jdJStR=L{@>6C<*Mqjd*;*v?q zNtWtE5cGP1N5RiS?XmNK)wJ7SvvW=Mp2da00w(3RhO>!kPl^jS)tVJnulQ$~K1C<- zyPqPJ;F1&F@*QR0Ws1ytJa~M@RWfvuynglrIrc&hqzMc>rA(NplwkY0=2kD1E-)@x zj>{x=Vth)D^7V?FP}DwNW&P4smg-PVdg@ka2ao8;k&n3i>GP$ncWGXlhjzAqg%0(Y zGCjO%+$f#}Y}SV?hMj4&IqV_4*qb(S#nbBzYtDW0`!{$U>m-i6@&(I8_?h-Ou7NkT zqbexd)&{u|BafChEwZ?G^IyEh3`F=8o;J>Or>1*d_d!Su>Kv5h zI5?H~L>bw9onUFV*g}(tyB4!0H(Dl)fc`J`-a9I)ZrdI#G6G6gaw(voBqd0u5G9L% zWD!b5k|fEw3Pf@g1e8=n$s$S?DUh6V&N*i&ic0q3`+oQK{q?(Jyf=Eh?jAkH`-`Fm zb=bAfK6|Y_*PL^02Un-K?hFaJWbmq2B+?%0wTE=5cTL&;mgQ_gJ+Z8XJH%XehaQyS z%*y--l7~XwSu0bUb|@C_E$Vk?qyP-|45_h&lQ_UOnbTRNHmAJ4It-crB)o~hZ<>lq zF0}>7Jyad`lNpCPqCFS9E7^mF^~NHV+9p?_kh2!|2jn&yi&pZ3E<4TpX*6Kv6W~33PdwDZPHJk-UnkERb!+R4 zPR~HmJd0zI9M|+rHtPJS-y6PuCqeQ(hGMld^U7@)$F_pdOZP)_G*mS$T6oyaP+*r& z!4g>dU*Gd#iN9{cq^bt@+5=%=<1abIk0Ov(k@VBgZeUBDxueU4r74L#2+VtOa037* z1&13&=lV$upK9;K)?tE68#=#=;qD*xS%fgluUH8~f+;SWjfYgwhClO2s*?54O;BQ>${gk+l9pTS1GbFnY$n^HzW~7;S=yDLq09G^Zrx z($1!MwMLQp?A6}B0p4pHfm`FO$Gv}zi0=~f^mR+6pqzKtdRNV4+Ry;sdfnzqE?+H(y2%Rty^-bB$$&KyCH!q@s zxI$m#TyifGNeuo2`eLX{2LS5#vN8HHhyXdh{3*$Bg}Yn(CA_3!_p`T|KU!B!TTass zdwHBmxDps+uc7A)OuHXWM!(rWQdjsos6gn@U%cJ% z&%{2~{%SIg#ov8T#KbdeicX zYygg*Fg*fqfQS%&y&g1n?XE?L?b*Q{wjlH1a7Y^}Hej@9nA#rP2z=<-)B$*(eZ2@T2uH(J&cjDV^ExGi0HDpe74FENIc0%MM zAnWr(o((qQZPt_Du32vX+^3&cUd2~TSr-QCSFdwdAAOa96M4NN=(;bdr5d}h%S3&D zo{3TAVF_Kxu9L4z`vdZhC)z!(UVBTiVyp7m{>gS!kN<0FaI*GLjJM|%>zPW+uT5V} zzap~RBH7-SPpc4%;E#iuZMHMGYo2D7*2oi%X`A;5U$PAh*!5pr`CX`4V5&lR$_T(G z?&Qm86gYGm-<<5-`=8PnSjb|6H5Pns?Kw?ScVYs6>B z6we!&bpo2i7}NTtaEa;2T@xF&0P$hBrMWZLI?O=l8_?)$n5QZ@(D-xhZP4i7!}9N% z;~YAPrJSxT3$ZWV`OpXLiV~1M^}zP{Pb8Ru|8W<$697(nxMZd+MNteyED$qdcfZuH zHk_;oyafM@BwL_u`+Kf7_vEzP-%zAp?{?FW)Q^ifH^Muj{Yr*>DM@b1^!?i`~C#>dMbJa-Kuy>XVYf%2bvxn|A5%Ts?}edRQkcmh3kHMB+2sJd!@q*r$Lg->v> zGabeF3x!YHX8EDh4-PEfZDcT4y&GZzc^eKMsqIaB^No~u?!AHUOsuT!&N*30L9r_@ z3^tZyZcZ`;lYC?OJCj?DMXObwKj1HB_yh8J*uNr%NjaW{{{DvE(@$+3m%N60TjNza ziL22RQN4Zj_gmQBl9sz@LP3{90$zhd4PFnglm7)5c~&dYTbUgz{SI?weM2TidUZzn z(>(1aMmtz28Uft05*~R&$|U)@-D*~a^^|#;wam;PQXpUW>GmhA@^1Up@^d@hvVh9i3vDWd3uMSVzX4JuAwRK~O=G|(5TmY@a%tQI$Ld$9)x%_?R^9h8D6!;Uf!tN7zT`@7Hx@SIa z;U7zQ6IpdiC;A))#rtH=38meOVU=6s%rTq)ya8b}A<+sZ*j3r~W35;JfErXNHjM@u zBq#&;IfMVcJz{n+?pmdyZhKml#WWa4+40x=*e0JRwNf>Q7MdjsE2>^Q2PA($&&em? z;-%*j?C!2i(x2N8@})2tL5JeKDbekiHad}PT<>`X#RBcv-^+eH)FAfJ4^2&7<`ViS zUgrX3l0p3!Fyg=^Ge=iK#3$%}$9;n42tld@E$C1>-J{%n=CO(tGvtkzit7|0(;dn9 z>qht2_HNBxt2`8(#+i!O71P@7ppiXcgxsG~iG10EgeW*f9NjA!v2?$g-sF zV93#68^wYQV!%ka;o7kE7-Ngfn!Dyi%tr1{{L*j#SFvHs3R_vfwA?=a9eq;fr+s42 z+JOlj&%rwB63{p#<}^Y1?+Go%R~P&N`CZ%%NYyELKNc8DnTh!S&p%nnJ-)we-@sug z#N?}f5w%V(Lk!;G9KZrGnPYk0ZcpnOF6BrGm?zju)mEz~ql4X_7WU@ce_vMnDuTZ> z@}D$eKnxA}H)h`?p@&3}DHcF*V94nJ$WWBP2lU2?>j0& z0Gqb|iy`EH$M1kn%8OaKQueNg%ACcO!FTN^y&!1zkA^|G2qx@4tqD6gvy-6jpL7I8 zqie4Fs1OtK`A(ULeAIcuJ-eDz3#FJiWH-(H9Q+f( zTEZtltiJaNw3h9W{PR6X{2^u##R^m!Vhd;S$t&P|NU3qg(b#nO3YG6goEe2zJ=gjJ z;`(+EJeRNnP8P!GhSmhV+j5od57>Y0i9q(f4wU94OuirK5^kW+k52p7fN{^=+-1b zDbcN4etK0;z^6kKuD}nHNzd^3{WyQZu*(zE-h?uNxtirUuK4P}D!!kss$KjXKcERQWcppQDo@XVk7PWWG9|p(rvFxd_uyCywKb=+%p+u?k)X2qM z8PFbd#te|M_t2Lx_9PXM6W{12SYv40@&dD?lFF+*Ek@^8`$wvKF@|aPtHzq?R0>a6 z@_Xg(Z>~LQGMWpxy)&|k-HVG=W{+Ka3(ZJ}hM)*gA)DFbpF#uAluy|E7YATpzD`d)a7ds# zk2RDl$0YPBVG@hVV?-^TardIz58Fu&#b+@9zU_RK>=ej|Z?tbKvPpb)E-^O6{lv9x zRw`H^o=EjRO<{_E=#=#HqdSc=8&F}O0EgbH5E4i4_NM@;ak^L(NS?`!I6!PQ$Y3Of zciM^eOj{5>*9H;9yBZ~E1_TJ3d}&vlck^d@0ZYu&&l?fShTm3?*WSB60|#rjGZ8t^ z^XP;T2TnLYPVsUW?n(bGQYckqJn~g$X%@bp4^Wd;xAh*rbf7-35#c+V*>mbS!SNG= ztF)b4KR$}x?>s64f^7F;&dz0s`*G$(k2dIkXjBWb>=tV z1hFv5g)^}7-q3gF3)EMbmgr~mc@6JZU4&m*KldF@&{kS9trC;k{J9sKeF82#%OuBt z_stMw6!@3OpU#z$+A?l$?L1KZ;KV%Wd@T4cWeDYeHF60298i~WSsxj{WHuKFoDo+s zDf6_4YyF98H1?lOgVN0Wj;Vh{ajQ9?F1jQ8WU4WlXAUhtNVD) z8OnbYn~em%)W&GI0`%cRi(eeNT_VjPoDVj3~`_; z zf_Ai%;EIh^=!20AX}Uy9%gVTrr@%lSa0(nv6DZfq?CRD-aXwSCJ7*!X(Q-E~F8g%( z;<7Q$hFM;drcs^DBIZl&c_ zOFVVL7HV;1fkFYcn^;BIEQ|%8!Sk^yG0)7{woO7>`~G=O(wbh(5}&d{szT-7eC0Zh zad=sKmjtWQx-@@z%kXLgbNtlM*bTv`()?!9mt0Z{`b}g(GGx~oCT{wSuh0L6#v%cU z)nda}qzxk6$i;Mn!eFFq(U~T`YLY>}f{b1hmlb!i>?DC_z)|4~6le%TBzllb61PSA zKCJo|s5^<1$>9l5%!rQbYW3q~4%FB1TBR|G#AQ3K%`e8vdA3!U=QKS@gS;BgSHl#U z7<@CNC42x+;g)}0>i_S&&P>^wny^|@)-G%XX)9a9ZM+eO+f4Dk%Qm=aaNa+$p6x&k z8)&e|Wqz`8nm5jou(3jTm=cM-FAZ7VQcazVp;)P5c44_*Z5GF!d7>LZu7|a3K^*Qn zw$BnCU{(kpy=9`1+m^63y?o> zbB?Rt{y!?=H`}*UYH8?9zjn}FKOteF%tu9TBA4elgRnzs@Z}u7<5uvi3@93#o{Il? zPO*6ms|57FM(>8ErtwN-{ZA?QwtPk-B1{$oJ^Qu*bVWdXYg2^HPy4OE{!93NPAkZ6 z)OKH}G|VK8T%O6LeS<WWc8CkmT3wk8uh|*w44xOIjrC z3i;O;$O5=R^p0Va(zuQSVth_=jA;&GViJr$a>hkFJg{|eAY@Buzm>*qwSuflo<$az z4K_A}Nz$X!Xor%qq7-YZ8BS-@u^m9j$kAU#Rs`2q0f3p&np20X;#HmqexuiS=zNGG z;(=t-UlQlr{0;7-7mIUyP4FtxOF(wke7-1Wck?&sq8Cy%pk*CQo4P9i-t^RwEY2oJ?6e<@r0 z7Xhj7S@m_F4kbbm-w89qQ+|q(u}^aOrUj8wtf5sO#g?y*nAO@AwOZ7VDhj8k{-6>_ zl`?EPzb8!QA&U^YmIKDxK9*fo9gOt)klLw@t4NTg7^w2XkUU%J@(vNXc@D1ycOema z7=M1^w7R@Tr(2Zos?XGC=FhsU!Qo7;6=P0U9P{tyXTJc})|#CQ%Z+MnVKT~wfLD2$ zG}{h$+9vB;wfgaA=c&h#p^}-Iyb^N@7xmu-Tk0ER>VoCXtVw#6`mYo8uj>KGpv|1$ zCop$4`?V2l1|glF*iO?ksvTDWIRI;?%E#32%TP%mJeYj_=GSyu%Br zu(jGWPp~xj@mpk`DWNyU+==rG$^t0m0%*f}O8tj+hWFu0ffF=Tpq#Zg03WSbIne+4 zo>PZ2W)Ppwkv1o*wbKqDLw-@T{SrC)drl1G^!%b4^X}KmMliIlij0h8C6Cg}vk4#r z&T*MgOno!$S)Hsuq)r}#zw=X0;I99J`pHlMhRaPrVlYEUwDZVuJyWcP>K#K?2VfOM zw#ubZEbOnM{k_1Y23WU*LkoImJnG!&bcH~i8>?Nd%Fy}wADAcT&MC1NtW_L0uVD;# zRsb+y#`DpQtO(MhMV$y)U=gSE6y(hWP&~C^*|Mus$(so#0j94XYhMi(&PZ*?oj5Wc z?7^?dZ|Z3qy%gGJ{p2EcraI^Y^A-mo9Fo?fzxdPyFxagZOnkJJ0wxiV>6bu{~2{hfz0)isaCmBj~ghPSL2&%)t0{eYp?E_iv? zwR!$WMlwDjYh!%;{;j1cetK|6Tzgy#a;uj49KL{@)<&J-^TZuQ7U7NPDSyY`bLtZQPgopy;#~myz|SlC z)z!+8ulRMo_LO*XEHy;nzkCKfxgiTn?R-0IlRTYgfDZS6`{R%hKSZ{$)z#aCdek@S57ej)8UJ|e4=S-e! zU#=YSEdT8&l;lGE{Gr_SOF9gfx@EqGBbg_*9) z9ZOF$-Oku+0TK?%ZZ4v-=6+tz2oFRveDwW_zCktdWNehtDthBv9g?GQ}nTM!c1pzTYm^CR)q z>gvj%x}2Grj zGB@@}ZiD@ejC=BY99qxl?g(W(YmzHoPH`Ku;-xG>(Cv$r`teuX?XA#1z`L7e60^H^%!)5G>Qr^y81 zQK2L7%w2wH{L8mBV?miiwng%38-iCO>}~`Fl~`1U-BSNFJ3-fb;SUH)f%XKfyq3#y zfGqmccOF8U!Zm~IiK#ji6Zk3%Tv*9U@=aW)-K+wjw0Rds7@Y?zkovhE^al}`w{sp3 zyI;P~9vP-pT7q4%E41Kq8;L z+^Cten0LOesF6i8(N9I$xetv-92jysX^+6)%wM*>??M4ori2*YQp$2^7Uojh-vsAXMYEbVEq@{`h%g7CYY+fb z1_2PRK`2!J)8w2t_ncAu1f#^S77gY{5k)K-eI90Yfv zeINW{FI|l?olxr1o{OsS{i3bXO)`;kUWU>dtcNbb(YLu0Grjh8%ru|>7(L~8r>Cl2nSoXJUgw=l(xn`FTC#qTmBY1|&saENP7?XU&J^eZV8J2Db*CY8m!V&3 zo->L@P389Bsvkqq3_trPIz`A8G_(zJV5BB)Ll7}(ErM^84M=3+=CgW4AJkqKUoZLtXIUf?m`L5NAq0$b{!U;UUpD9VqQ$}lvr)%nr zXa1OxeoM9G54`*yV#r~Zra4Urvo>>Ivay~W~L+E$N4V|4LDwW-Y;QW0C%&Xrr# zyr18l=3WWDsV4P;qC*P{g3VvaGhG&M;i8oJT1;Txgp@;e*bf;|&(RxmcMm1Q-6BWo zhQ2yX^^N9XAGQrCc?N%ATrtw0nOTUF9qCT|^0;IWa|jh6OwS+L!V~}#!pF!SqfzD~ z0r?YcU}y&gZYB0^#T_j_Gj`O=x`4prK3Dki0J?k=6{7Ylr0+C~j-VQ< zWMP{QqdIF7EMUaeq|zYhmAaIM1kTp4P2r+8du;t!B^Kq;-jVI*(hB>MNVYhYqY_j0 zKm%5V2g|hKfFCz_GFdBf3EEN|`PnUjJ3M%NqO5eJ93=RgxS#HoBB?0N!4)k{Kw#)L zgp>3?M6_VO<`&*6w#~c!WBy%N{Nt;hk53@Z>Kuz<8&BZ=Fm78e)^Qw3sZE0zJurru ziGI$-kHnFeqN~ei+q-I|ccTVxWu8A}d?P%j;De9(tB(XNY;^w0>qE}cn&H#Zdt;ZM zw%mKkUUQQ-I`-CW#hb1jH$JYj$F7T9SPziVqJJ)dBSmrK`1s{pcD@O($HxvzdwXZQ z(qv3?1n#uGr$^cfy=yEwf&m-)JN+#J_YmQR*ZGMOTKm%M^E>YpCFwP<#ldo1m$&Qy zYUn!!+bQy_!~GVSFGW|)o{XznSBSp&9)2s@%AGHL;>N|-+Os|4ufozM-kMZhl57Xk*;z>1SHpuc70{E;vY zs92Px1Da69nz9kdRIJ{xTkK?Ydo|_<_wPFLOe92U-*KA_Z_*XGrwlWPH(f)nn4tlj z3o7GD2z1s=O}RW_RBy5Ypfxxq_BJjMIsjt5Klyvq$xmxdm8^F?5fu`#^xVFxI+V=Z z(a8Bl*QM!-Di2)$vKE{`ReM0&9JDMF?JoUQeByqmH?>6ilv0Tz{)E18qJA+2@^d!1 z)X!uA68Z;3Pmmx?upxr$C6~w!)+0nD%$|7de@U@1bL8pZV7RMA+Edno*52GI$$_<-Y*z%1~I8<3MM zyFKUxBMNd@y_3HLWu8Xox3F0i;)|WE>!w!GBMRsh|Zmzph zoR?-r8o8)PaqeFkQSxkkzMH6J1+#hw9-=gwxO(4lT`1zhuqa$cAsq{u_q#XbE(^4v zW1cs$Ic%|dz-#L)S`o7e;0;2oCNZGd&Vq=a<+sfyu(XSkEtCxce&PcJ-swrLeC4ku zt=^pVE3EW4*^_U2Nk65OZ(`nX@AHB@9u(K6vaJJ$r1K8%09`1mY$im{buu3{ad0uY zeT_*V2jIdR|IgITE^2R289<#2I>dwe?0P>gf4_gs;t_Qfk|())4WQK)1id)@P>vCX zf6Bbf_By_uVL$O*?9;b*Vkbpz0;QnHcNDoV_;G#O+fP;VO;o0F-+J-u=do)pHpGx! zSzx1xeQ^x0sRe%q=tc8|-DmVSxJo?10F-ip{aE{lr~@FDC*1O!f*{+!ZFu8*MYZ%h z3BBzYxI(2>PLgq!+3{WFQ$FGV-s~U93d1LY%ig~lfByl^tCAe`vP(!eU`t2_$=7iG z;5-tMA9rqBUniOwWh^u;pSEXn{R4X3riwouKFxCLHfvfVx8hupMLulAkwHO3fr_l> zBkLOsMEl3pCnTpi*!$PkHd9;)lgos27I2=;=xqsh{^^%-VsLku#W_t1s4DD3fm)-E z>=iPZ9}VN4A~@gaIQiE%R98FkcWwuF<$v~d`BsJn(T@JW5Gk@nbw$>8fIKB0ZFzj2 zz9foJYjPTK#*Dc%)AZI&kpqmWe)Z02v|}eO7z?;*9&GrL$Yv*QgE)7pVi8x6 zL-hwV%jXK8zl|7)2s5=aO6!%33;osziQ5F~eQdKKln%q)fyA_m_nr=rYc)i^B=L@E zumfl^+MfW4fSsO6BGZpJ(B7NCkp+vY5%G0=;0NGyZ;_?M;x>F7DSdtUkk6?236(tH-SR2T_Mk=bhWL`kg#LnjF$pOOZJQa#SbI4-wj_q!Pr$V{2L6n zN)`{1;*L~-qvLMLa8K>qpn2qgXQr~#&L5CuYNt8Weiz#5j9E(?nzZfWR;JQ^^q><% zWD=_P^OvlB_6I#|lQ%?ZLie;2V-V?UI&t($|9!RLtJkila&R%lP88JK_4W}DVGEIl z*LG??&Ofj7NNdosjGdN4Y}rD7;QAc%;{Wk4N*~%e*xw_4gLn`DDn@Pc0;|rL6F*MDdP*zuaTd!}Gg{1;VW} z*YN|~1`z`atJgJAmT*m5qLvRoIDFV^Ts63}ORyq0>hpxiVKCOGrOG;)ccK6X{bwNl zBGEVZ!{(k<12=5;K@A}t!e;%spr24A5qtq2VON0q9gj+ZW}!)&2KiibQRa;5I46Xy z*FDDS-s(PwJL@zHW|`z*WrVwP{|i<;O72sCUwL#PVlP&2OM6qXgY8&nj8D&kj{e31 za+cRqYCERUdaQC>-Q+FiWAxj((Dz_v4w!pMQ9%~niz^_e3Q))2>?>UW$MR=3^qscz znB=kSSLP_kQHWDkYI&-S)W5$IlY1b*wS?K>cgQAlB;Wr$J#l^iRDx7S(@afRLbN|d z>0bYO41}ots|_pfRCMqBUd8(v8CGp7h0ogIVGJhRZ4`4r4F!_ZfOx;y}+Z=06fZ0ZeH#WNH`eK2G5IzfdaLzO*_o4+it{&!LK5x+l=X&eAJs zeYC|LZ=Gam2acwcaMqZh()jKnqI`fbgCt-f*9RKtkqy0A? z-v2ok)vLNoy8xaFYC=}ihBb)c_zw)L{cnEBN2RjYR1~Kc+lo3ULIKGgSN`d z`KeC#`_U3%CVUU5c9WD1geoV3`=wB26GgY{BD2{MA8_4h;%OAH(Y|xWX^jbrBdDt? zUltWTV&rQ({gW_w_4zz#e9~@dyltF%f2=JIEPd!xvjfd;vi~GDy!~We*{Es^H3Azhja^U7#}~qcVNGnv-?2<+ zcAZ!PeRzeY8SC|7;i0WPQtwA4y4BtdPjg>X*nlX6#*j}S3+Y@8heq!puV+b`GsK65 zPoh;=_oI@kzjZo^Qq(1WtZ*TFS*ymw6Xyzb*44d}Z1^p$EDNm*WFD&M2Zz>6wC%`C z+g!v44W*@1rklF0f~c!{Gwtk=g~rA8YF+uZ6r?d%>`pyVN$AS}CI_ZCg#=G6M|ul` zQu@{%mz~_NK7JZF<6;o{G^&|Ly3D5x!0H_$=Oa?AcP7iW5M){hhn&Te{q}CF)-}_d zGeW{6I+`L$>^=)2&)-q<>g3MGnWI#fssnI9=1O)QAL%Zy+Bi?^cKM;su%oq5)mUex zEGw9Wu|?Oo%|x-!gE4^9r3Ney8$(`3+U$y8!OJ4MvTpEdIn$fJO((mI`aberBfb<2;zv|=b0vz0r6fhn7@qMq^P zMZfm?ClFV0v`92ipddvL0LRssZ(Dk3Yt^J)L@-vXf=(kseS`Ooh0w%(U!xUBlT*}u zwbM>eKGcr|cwT}Wtu-T)y&U!{x>JLgdV@VHy-Xe!FK~T*kLw;;$CwX_`_j_djHQq# zWtf#l-JAZ6oQ=6jnR5U#Om{6xs|J!BglYv<>j4Z}t~3L3>;-xb3?Aap}2UfETrXFe;{y}Nyt)^|W-kIf9J-3(tS+h~ zh?%LBM7G3%JOO#yb&_Ui6@X$`G(A(o$^Sao9n0}&pyWx_8Qe2J_rPW1ev(ELM#5Gy zo;-0yjqhAK2tpM=BkJ~duVFPSIu)wFu(NO@{mi2JIdZOU-EU&pxqf5ff!r-%k4TCL zeY(}jus%uVH;sLYh~y|#^+p2*5#0;J`CQ`ss# zy;F4+fnx|=T#qGi;MfkiQV%H}j))6`^D!oPHAutSp%u`b&xhp%X+jrjoGm2omiO*Z8>B+}&3D(E6=MrmL4=*n43!GXbtaWie@e5WcVh#M~LB`e^I4T z;{|V^B9L~o%>A=qazYcb{2ZU805$7jptgh(|V?JmC}-~IXNr<;`& zD^)JRz|Rez2(dv5fK#r~>dPzEj7EqCXtWKR9fG(#t^#ZD>86U%W6=xtP312I|R)mP8U#;d@YCcoTmE z9nKi-3Ya`Ri)|61c{9cQt1an6t8k8`e39CcyR2@WGR<4Dzzl&i!w7`U>G8GuKwZu7 zH5u$Suy(BctQ`EVEtG|ps{@R->S~$yVUW!!iSSF9^ZnSYvMN7ytRK+yhXuV(3uOP^ z?ws-H8EB9_2zD+xnnllK~)-RGIED-xETnVgdEwf@QD0kfRKy+(Hf!@Pnn zFZL5hKP=W-qz!q-&OhQVpwsE=6V+;P^+|!%R4bhW5s|2qlm?;&`JHd))D{c%Q$Sx^ z6uI1jfu-ioZ%-XR*>@J2aU#Up^LewZcyGs(Q}i}+7y)_?(%^=Jd7M~Vd5*EnWy5*X z6wQqoxlvjsO-51tLhcRf3`qfttTF?9nxhcT8LR7Ogy4FQC9?7<_rX;mEWg)m@n?U( z{@+TbDLS5=Hl>+SD?F7j#VU4Q5Ntd7{?<+Dh^a*MO&4j6=3FSG0NUb1Rg}GU?{NKG z&jk3aqY=+fY~oJM&;C08^873ZqT}@l)V?-3(#}KS$S(+k82YMAdRKUYUdM-@7~mu9 z^XVqz?Ymh{qFvzk`;YS2%(7(j$t|{T2w}J&8Uxt@Y()h>bwCs^pJW&H``S5r^ejl8 z`5*vFNTj}@=hWWX#UYduJQueMPUbsd@{iwhbj-gF_qbO9^Fxg2kQMHg?uJ;qKgMKK z8to1NC@@#tPe710&rPT&DCNKF8k^Y*2087!Lq4zNC3(Ft8?MVdeMLrc`~2E5yzG+- zAR_w`uWi2zj$b}4zWdD7l*Y8J5Rk8%?(Xy9d>Z%n znqL6zk81J!lK^}(dxISj8ve#u zE+{T4uvv}e{qNIdf9oz*-q_d+Wx*xc=dpX9l50LNX2G)y`CMK87j0-BKzjHNj4}5L z(3nKB16c;}lEvRhTSbt~i;;4bIP2F(o+HP5c)+t!@SS!@CNu9d|1_;N`jpEY#@WOr`j1INN!9?>`{VFPSfgecuLc zUa%^*)Jo{2QZ}Q8t_0W7o*88lU?)T$c}B_zD}9A{oIM486nqa8FQ753{-Z*6)U$JT zJ_(0~G3NpHR0fwMSV&khd-@>3((NNaJJ&DXKEeH%F+kno2T%+AxhNJOY^0_cTXU11 zPZnK=<+TUgCG>VBt2v&vmcx-E;CMg;We_GW*-1DZ9rotm4agq1XD1^d>Xe0IqO&kP(5kOwIkKki8K-X(Orn# z$dCDl)aRO?c?nBL0NuAO3oAc6FX^1Z6sd%+rA=Q-wJ)caX^$R z?-hD~SpK)-Zx1=^HEAAFRVuTMYsK0hS*c1}jDErr^$%S6*x-(D$N9E#L-*Ck4|MTr znU}9xDgz{Q_gg>uvcIQSsaY%JTu_W+!AZ78Pr^URPqR+I;h7lzrP6Le!$a1aH<*1> zKzS&;?pQP?`PCngi!kK=_W7%9!wB;Gb^33h=v1^3et87pSON5nPk%VZ0_Srg@WPN9 z0{}=7rA!s9J3s|0bF%tYoqqfHru=h&{VPlRrdIjJ4V5)1;y`LOwF?Yp-=qznFDrg> z44;;}j`Jn3=9xiyxi!y@W6yl91Cja+=c?Euxhp-HpPx#YT%=f@KgcsB61XIi+9$7c z@&G_xdTjzgjoASpfbOiTO#H7png3g_{}%)3zd~64kDm7bg1=-Ra@=%$3hTM|eD|$B z`g8w<3a6yw-wjEwxn-UdMuQ&Kk*SW7yP5N*TDkgjnV`^m+_ycmfrRsMhfK6<^BnW= zxY|K$a6_t1J~z8@JWG?j+9MDw@rSZ(43J7a{RioZNLy#P0{^T_OVZ$-Y^ry%{RfS# z!STVwKvG|Nb12X|@ehdTB>nNW5AY`%zZ>Ph zdk?}4<0Sxr;41DmR(}pQ0O$k7d&OE&$th7OzkVfUxs8R4VbtXojsieQQmZ#d4Vt8g zLcx+_^WUL6;UzfU!zZqbPrp2!ALnR)A0l&$~X z+L*3d37)QPuWx;L-DvO5Y@AoWo5RdkmeweGmGjpSAMVZlGQ=b90XQ zU^g`Q#C_8C%e1l`F-Db&^Z}ZmIbT-O92Ie#W=gN=y%4P}%>_Tll|m)Bej`HZ?WfBq`#8s(h;}9n-x|KuKV=^`8=@ zPsA4zrR??d2!&3dxZwAkPM(KIb-fdBL%b8;O7&OK#aSO&u>eP-j8gHf%g0jSQgyC$ zOGYR95T|{~9=KS7kqv7)QZIRrTS?w{4-yAdT(BfSh6L98iM>4mIh=z(8_W)52G|XM z)mXMDAc$u_E*eRwsgc|&xSwxVlgr!FTw&q4Jpj8EAkJU1i%FYzzDl&U?h1PtI~KdM znvmMb<@D+;5=f?S$BLadN9>oJL||uxt;_94%wPmu5)<~w0y~re-%lju>WGL>LuZtX zc+fnokodLOe+inp=QN0+0NQVzA~^q^eL@#5661QFQqFWW$>y;}dCYp?c}f||_Z&Xn z|IJ6G25$-(N~^Q{?Df-*_6IxcR(Vgb_2Q(5dv77fLt4`m#h{1Nq$i58v$_cX;KB}fo_ag#O?kt zOv9AT{=G@t9Et2fmX{s?T4fIfWF&ZlOhHSj@77Ub^<4rJd+dtXPS%?H z&@r7v!8d!CfTv=Ju zLSN)he#ZyYcXTQZ!-dnG07VLi_DxlkD9O6@p!1fJbMjDK8HpC~+ z?F@f0uR(Y3FcdiE#d0@*k zL5KscwSem>d&k|PJEmUkhZ-)xo&N)3%jXPk%bc@3iCy)0c(jKJ9PoAV-_{UN^(N~* zyt2GkfGs>Z1sFVC75v0f|K#QdJ~i--ED#D1yMJkef)@{38HJu~BQc(_2g2}d!k56S z!XnqJZg8yjKQcEaFhjc=fb+z2zc3(^$`^)}4#2?a&#Eiljeu`^1M5wDBtwgF)^wVt zZ_#ytD<9Cslgm)s2qkm}D#bef0qM=<@e(DN%-jB|xj4sEiJYx2AL;h5N7zj&(e)Q} zSWI%Qa&{S)Jm zB=3p)r{ZX~cwwc(sPkS-;bIqU_SG*k3h|^vU68(gKh_N-mhTnnnW|x9GvgBDxbVh8 zPkot4o_ke-BcC%g;kSgD6OALaBhAB|hxW^zlLYlsknZ1}Dn$2;Iw{4^${l@Rn2fTt z*@dh(S<}9C?Lt6D*QVQ#|NZKJsqw$E@&8G6u!|-@gO&N*ucGR8P&N-5S5iKUmi-2K zf5GR@7OUm`Dcwk81pbpmp<`UNGIzI&8Fe5LnrDhZou6x{^GcE}g!UtjxvY}5HHsG(2oKtc(}EmQKP8+{SSHhw1DdSm19IGtig1+#9)O&hN_ zLFEPQ1qKtR37dL%>bUvPwuyHN3!Y$;LUn%`A24bT5_B}BoT0^&2S5aOdd-Cq)ptD) zt_=<|F%iAawnPDgP}4r;oKo9>Q0|;|Rj7 zg)d>FUm`6S`W*yC`VGOdo|@XcUL+hsLT;I~55H|BY9p}kp5#B?3NuG%RgN#UJ_@$@ z8kswL^74v~6ZaHS^aY*I39s8@FV+V8Vxz{g_q571S(s#;M_`WTMr!w;vWp+V{4A7sDO*XKJhp<}_td8$+ z@1Z%;OR!@uBPRL;5#p3#<>;*>B<6NqlF7pLU5D7f8~XS*o1*gkLm#%8>grFySxZ2B`d z`sht1_@v=)E~Ma>th9sVp!U|3hI-PWMsP4mg$@+8x3*juT`f|dIt$aw?mBwjYjRl$qTDryrkQlPJc}=1nW^DU0BSu zE{@GGHTglCyf^W;L9>$K6natcbgmbu>c1rG71*m9`oO9^ZK?C89+Z^P55LYSP^Z!U z;>J2O#dRgOjZX76)$5YITI*lwh>&&GurnU8XEI;s`GoMonHbVl`$E|9;e#uskaD?A zS+;G1bl4)sp{H#Dab#2C#@zg*ZkKaBY_?B3T*vX|&Jni2WQ&z>hMZrLo|tIwaOwos zvS!POrVH{!>qoz?I4|D3|M|}Bmq4p^C786CIXZ=uyUsRma%gk0SQ~f5T)5OnE$0(b zDb(7o$aj1-+~vxNI{6)AwUJSc!f<}2@*%WRd(Rc*(O(p<{@Bgyc2A|5S;9pS17_ObcU@V#H(T_pA3xq!7=sYBKgMJSO{roRh zTSYZEURf%9&W|9XY0c%xLXrL%kO6k!`FTjXfin3sRw)*pIC1)fw{E)QDj$5tBP*U` zZuBpl{G$-aeN7gqI6;<%??Mu_)rposLlk3wX+rzYuZq3(CRq2mpYM?@R~3KEb)!EC zQl2lnbw9{yWdzJNafy3-O*S0b3&jW9sFU8^(z&Kj)GHNxtd4z6YqtI+{heRRU;KD^ z-}o;Pu4!{Qh;Y%dYZkDofEk#xJdK&=y+XFq}NMNukU2ainpRlt}}2rS*-0o4I&?S?T>7rl$G>$J?)bJvW^72;r?p zfb#Ktm)J0Moqv+0y!~WB!oHsdKM&RY#e)a92VcI-5Y1s=ToT6KO?$;0CST`q*aG$= z^Z|_@W%9jC`FQgKvSCPJNQD7>d#21@qOtSRo;>JkS>OAiIZw6h`zP$2)p%ds|3%z^ z=@I0IpJx`;Ue#!DUd3(-pqB_zvQY(<1B18-S_#(Z&y?$;M~bfMo=1LHq~0IrkW_Om z%_luo>9XvV`54^WJFML#o+6sg+axo6F?MGcQaALntRVajbM3Pg1F`9mQIOOHaybgq7m=CzP5ej;Ij}H#3!@EbhWeU5)qU^Kbk)K3IRK%e#4u}EfcB3&Bs)}YmD!%z+O2y0SW(7Op8C2h zwIFh_L$LG0*9i5a2QuX$XHWkW+tx(==}}N?$bGU00NTkplIQR+I#M^@cyNf9fc@r~ zH;ta~!0P@vj~dh-qDk>OC18$Px{5{y6^@ngk&j8esnLymB943Ajkb?*du8-sEyh;J zHEpMW=7pYpcyEh6bmboU4qBt@z6#IdbWoFHdG;5dIvdYnGO-}`aNn9HIa5xE%#@bp zp10Y0&5_w#svvl0?bzCiV`_VU4(sD@Y3G|^vr&AkqYrZ%Yt5PS@XOJfnPyGt!(WD# zdK%~~x|h{zuK9jWhp?n1{zX40>kIkjSjj5Lq=Qnc{GtrmDBo`<4$7TJf zjKO#50BikXno<4TiI%*e0L1yJ7uo7Aff^(44Z_3l>jtGI+*q^=w5X-$(emwTE6yUi z^IGcJ;VQJbUmoFnG-5k!x<4(!tN1F^V3lMUVHDrCtU?(jG8}*b;)HpAo;R^Qebw(W zFXZUzZ(iqTD3ad>(Wf(fcE(Ap1NWE1Pr>i;z%SFX{x${(TZ09S2Tl32HQe)4BdDP}qR{{_p<< z%U%2@W=7ifmy|``58H42!|$e&rXmw0A$Y9?8#)3p_0Q{YNa@Wr^ks=V8W*7K_rXAI z>qiX;wrk)7J+Mq+1T0esDq*+nV>$udtQl&B`D(nPTXc!Zwi|>7MkVk#Ctnt*R!+5J69{sps!wG97*lD1gm>#?nuD~ za)}A?$?CM~Pq_Lb+s#ksK($FRn#SQn13>wGb)Eq2e#D<^A3(deFFG)Vst`0Z9&Znz z9l%GPcX!k(%8#JV-;x0k`|~V!(A^1(bARpN-4i@ku0~L;LUNYS+ItF5hmBq;^p*a> z)u)=q>)bRIxE;kvct5fD1+BF z?mGU>MZt%siT265H$7UGycK8e-M)g<+j!3syEX9<>+-12ob$WGq0gHYJ!UpV@A<;W z->n33hiNi1w@9m~}@= z8JQ+HqU@ud*v3iQv(NLv&3Z?++C4ATj_d0_V|c&knX>iwa;}Nvj<5pCxp6Uxi&N{= z4_LFp<@v2TOM`ZApAUB5VdLiRSw`FC_a@#9yxI)7q5mo5hl9@gE@aB~=ThV6-z$$% zl<~vuLj^gf=v?}i*?cd|E?!8Tf^GGz@W`*0e<25a0q>dP+xnZ3^h%e z!~L@2v909WC1&FiOs~E!9Qg<(2dg!z*9453c48oyjQ6au&SCIL zmi+srpd_gGem;4n(ZZ28Y9DN4WK)%P=3(M?uCEU1$VAw+WfwU6$e@~2d5|&mb}4kF z)%&`Jx_xZ3*h0c-`lkCNsj;Cb>_lu?-|!RkvRFXEIkd+9eunYmSnV$79I0!Crn~}! zVj7$&kCT#ElhXHKJN^jN$6-Juw&)Z@?Lu_p`CkZ&uW3~4-gW^sYu2ac`z9R256gUt zuDQnSrODFM<08>9bZ(>Q<$b%NjU&$j$QyLaJ6nOyFy?t|wtMi3n3xf5BkGfGVwF{f z(z7Wd_H1-Lp8}!a(FkiXt-2GhZJQA49v?^kZc=pM_>|(1x=*OsERLkJF}JxJhzM*U+aQrAt!|ClL94( zv3#8h!5cZuwMZzwKBB8!&SKw7dYbW2Lmsx%c%7osyXx}Zb12IvylZ}{>h>pMCH~9`)O;F`%ZgN4nY7H8jhHbHb zS~%if6P~?vC4DN=tT6~Fa7QQI%-yt#kz<%zgzR%WS0D4NLGxYXNCeAOEjPFO$q8$A z(Bsy8$NU$~@YzAxqd&akDusF;93vV-t9dXD!N{qL&4xYsO^}UrZ>c9JQPTRPMBRZR ztF{-7?#B#_=2lvti`UXTWt-Dh=(<6x3FFVPJ}WZXujpjX8{aHMmbx9<=>CK;HE@87 zwvCnOS}MeHk2Gd|#K3|bZmtBc8|d9jsq&Jk_;5(*_0sh5#7?Hod-z5@0(4bbcv0Hc zdiat3Q>zC`!Gc`E^t1#GQ1f=a$-vCfUQ316?U}tQ2y@Ssa2n-$upG?tq}TrPH60yr6SWLcUS!`>vtkyhfUMcjV6IAgarf;UeJ(g~_`{nSdxWI~A9qo=);5o9tD-L=fMbSq?4jESOO; zkTX4;=bdRW|crfW-e=}Q2-xY0)Pte#+L1Mhmbek#c(gOm!^9p6zU>pYE2Gu`iB?Gu0A z+}pFc9=^5Dv*O*mM2sD8IEGE-psKApx1T=!d#1CH9sL9d*3QPx;!&)tuF%=KJxKyT zzkxhv=*<4RknH!+?BDqp^(VVd{>y11fQ9pK{tYQ^a;tHbZN&W*5;T_NlQ@y$Wyj0Q zd1^miL9UlM?R^IIDdFK1Ei7bW%aJsqj9x9vmyh9)arG%wLp}usp-;0f{X!8#NO1yh z{Y=Mf3-3gdBBoB?M3$fZ`!SL3gs52}8_CK88tpLwWz)h8d-eAtuW`>gp9Qs~tzC#P z=$*D&7Ttx2O!%Z`g9c=`4|h;jBetJfz-Xv)??4HLM!mwj3n2_P{>A{G@Dot4OWWphyDZ2B|N=SvKG{vd&d^|%7g<$V*( zsKX*vsApe`Y3Fff@JrPJ*I!cSA39uq%yL<7C_+Ohvp8WM^J&Zf28P)^uD(*Rw3|!i zORQM^RS&p0p!!|L`B;(ezDJKELys=D$CAbF4B?`df6ThPi4MCoGK5Kf;K!v%XRa`T zLE{)H!m|BX7x&!Urop0KkCIo#TTl4L(x?++Q!Utj_3@YubDh;dZ}syimsN{<+sAb* z;pBa!6FOZpj{Du?%)79}=)E7OnZ$i#EN=%Lf95&0Lg)%|7hq zz^4UVm{9IhB9i*vTU9m?F(%vi%9NGMNRt08fhFw#`E%WmJyQoG&Xw(bcHkkRASo%Gz`ZQ2gA^7~*1*qE1ebyaD34g2G8or?zkaEUGea#PjAryz@D!#Es8cmvUS-nEX zo{kG^>))Y^(tsqStF)%q-^tU`5?$H|7F!nqa0di2BBnfU0^9J;P!Hb|*8DuhhWims zTh-x)YP0bP@yW#7=N4RZdw4I5ej0ryDR}mWdve`MShYcAsPE=}FmQJ)6e^)ry7?J5 z`ShJZhJ?i-C3rf;k*or?%1M42;ja9Wu%*A3?%_k`)-&SgA#?G{Z}QF7+!rSs!whMs zam=dkO=X8m`yQVB3gQvIPhktS=|3LZ%`!v##m4<#8yAcoCWZefMu@Hnwf7yH_ZP^Y%_`z$V zZW8%~5h} zSQNu@wO+BQ1T2pF>;Q6+e7U$QearQ!BfJ`n9RUYIcUqIttRM z3Jbt2^Z!%#n%E3>P?{LpQF*}n`+Vsm*kI-99;A+}&h=PVivmr4^Q(8M%$OLj7?M`) zm{d@?^=J3uiPfM@8<}ObZ+(nbP6GZC!-Ben0nL}TA15b;9!zX@*Kb<3(~eOpC__n( zSh8hqmz>F}Oj~09)%7DM)c1`~32d=HlVo1(U&%MrG!gb-O58vtB?w;j-o9~wZbR5^ z!W1)LSiKeT#;euKJoe%O);*hb?0&tP9a)4VStEE1lWSMB;HB;wEWP-g!!Qq|8Qc3n zizJv`VDZE?kwA}%@=IXm=!p@eGSAYqP{lCO9~b9hR}bcSUN8u$Q1xn!r<;Dju|(sc z##7Ecbarv%w3SWhS@LC5_}U>x-ydTkEk%|RlPdIX#j8$&mE8>Pv7=*gjdn#Posy=O zLAlL0hD)kmj0at6xO@=aR3-FbA4*$g?*oHNp$bX8#c-<*P*5u6)XGd)H%(?&!S@At zOId=LonM%4EjJgsTYW2Y_e9&{8{RrvY})SZv|P`RyV-52?dAS+fT>#dsFU`&F}eji zpy)ivj=n_2o(FiVnh>v+uX4shpA9(KH;)@KHH=8IJQ#eiao~XGHm(jqPZM)5{c4kV z3aorbm_Cq=5zW7*F{icNFXZ}lBK{%Xchb?<*)5<7c0|g4Dqt&ODDdT3Mc)V>u}I(V zQ8_DzWc-pgh%GB|+l)OKz(0+S;J)o(YcYEPx>RBVGn})@?!){|-VU1%fo~aYPWM`_ zw7;F6^@@UD(nDM&xsAho6_wohGQTS)r(cDbw#>+~pXS~Bu+r$+M%Ya+!!Tqkt{?_i zU*GISdS@itlgznyVYBH^d0S>Rg zU&e~XrVxXA#8f zyGedX)$l#ugBCio&dKl0V7Ld;%narCBp)`W6ex;njMm`G@;m~&g#Ij%;o zMy+{wTQM3!suVHxa9I@9Uc7}}SK3k`fW{e`kX@f=-1A4NCEZx{=qD(?y#}shQzk>& z{O3N(x12k;LP@#)!N_aX54o@L7={Sv9`;EZ?r%DUvA!db60zwOe>{HW$Ciz$4Q-4A z|DI$8u34LK8C^ZfR!AqDG&9%}-SRAE>LlI3l|+m)`lgys82KWt(*Vc0Y=uXK@^h<) z7CbMaRaYXU9rs(SD1I1=U>u5Ux|MDD@yKcUSC%QU#7YcGr6GT2`mO!bg%c-ENSyCf zICSx7@r#p%f?-z#o)6r9HB!^Xyyih9ZO@fa7@;-|1vhg&Q^$Z(W zcR6&$+_L2LnsHDQ6QhEm2$xV)U6eN)Nq8LlB9Yr(O8eBRZke*fhji_4ebRi|=j2YR zh}cd|3Pl-3`6L!&%|(@K0&06UqgUq#kRP4ri>Rrf2Eh;1DsYYoj|s=@*?@MA~CkiWRoNd1HVwWLPBlEFjw(wIg z@yrqDn=c89zrPowAlez`Ub?}NFzmW+j)?p4^3*)eN^fcxqCUHgdC;N8G5hZ1PccOt zqkTsda0W_HE@S9u^a7)#X6h5g$^Adcof=a@S0x9Tbx`4t%4}TTM^ z*oA~*``Fvg-s?IE^E5V%;GvITah+mg6wT@KW+CwcWBg(S8Z|h04c99*)Ye^i=JQY1 zXo?8&3oUc4XtuD~eNWZ8AduB(==g94C)NIa=I!vc`7TcvRMI-dcg(G7m6dV~Pl@gA zl%eGzM_f}gbfRUxHAZR5oum`7Zg;w26m34P#?feyo!Qjpu>7WmiSe4)Wx%ZS=p&?Y zJgjz<+gmZV_(m^xg|Ek;Yceal>23;_TXi<@!_dNyP& zFB%H(CB2X-HZeLtCO#(|965=4Hp-{_Nb%v_FE5_f$-fm`X<>g1ICr^Wi4Afx2HBY{ z_C1En2~MNG(~&A4oHP>%fRloNoAtoeI|h#&BFT&mp-;FEPBs+82iGda(7bAXNG59lf~EW+ zu&fO?y?sbzhG-ks*7tJ~(HsR=r!C`e$SU0ByJj>N0nvv(HuRqPl+pEZRkmz-3TL>c ze&PP2vIj+p6Lk?paUP8~^?9{*x;Eb_`-m7km$Y}#O^!Wt^t}ApY6FHB2Npy(kBZpe zgwm_Pf>HOSjuuWQv}aNzd#t|Y9c;Q7eIZ~~>XwBFZD$VY%}IrRFC2!}?I;xoD#D)f zPRC5yiA!G{itH%ad&JB<_2c|b8%8-g>vG)1;?o^1M}xU|kKfIruiopzflpy4r4a=P z+UCaDJJ_0K1Mil@-=b}#HIQqYr@#h?+<@6?6#TmFE+h@JV@QFvcTdq6NGK^1G;+sI z?ZKSM3VOnaZ>WwBS`xn2ye1RNm1O^Q?#GJ*IkxL2HUsZ$MsFqspZV(ufMF}5z-pN* z+52$Ehc4%SRH3(SCMCfkAzpl`(K_mKd0_PpCrorp6xo3(unfbiUJKCax<~6FT+3Rs zDYbo&pU<15xo1s~Va|+_^#l7AbFh-j?xIO+dp$H*76iSLJxGs_-WFKA1 z-RY+haab4bb1!`=d`dy;3a@~RlADzkS;}ziAp-S;{%cn#)m3|HkR#eTi`kqCoA0{a zCw>LYzl5>qH6$|Qx6l+Nn7_9UBw_GgU$P)ectF0C1&QZ{niIg#~AxJz#b(zi5ed+L?M76oy+YW3bx7iC!+aknhYD(Yu{aO&3j#C5;2)Wr82Ti2D6)lXe4I-=P znll71HPyVZrl#ydY-C7ApC~XA>OfDqUDlzZ*pPLJ(qc*x=4WiB(RTjv`-JVTal@LA zcndR9R=uH!0pVKq^|-Ay-fzYwr(doehJJi}2?9}=>j*oYx!ISrIzz!pCq8|OY{Rtv zVhvLzhpaF6Y`wW?iF|Re%>4&AT*S)#)~2f>5zna7&#NGGO9i@Sg;`qJZ)Y~;&ve~1 z%L50a&*B8fU<}iWqTv&}NyNTkUcq`36*i;MYpzj-7|AR0AEumG&by0hz_e-qQiS=n zFU0#c$?(ZUSqOrgBsn+FC)#-+s$oaEAgX-Z=|~T&V8oN?>RT!+$tE%a>4u)pKDaTY z>D$6HUvPHXGoa_D8m4QFeTA|d9n>0~7%rzV5whtu35gLGM~#Y`g_#@Qv3UL-XEE-S zmSpM1a9z@|wYia$@YCjbhmf_9odM%P0poqO>DRI*@7%U8Dl$7iB*wbgc{iSy1Af)pF{Mtqz%LJ8L}4C{}dG1sSn)PFc;mT!GXsukFsHXIzNvem)c}69(}sr zUW4^htrnQA8xby%#|9PNdWgflinX+q`wMH99)KA%l0H7&UC4~|rmk2-pKvkrdhym; z)VFVq!iN*D&~FTf0>A_>Vdvf)?k78yPzNlI!qPJHHBG09Y*Myy2RG(-RM$*I!!X!e zk7VpJ;gyBpA#nrxi39pI6Qzq?N4Kp+G85JLCxs5*j_}D1?hUiije5(Ma{J@Qk_WD^ ze8**BI|8y7#Xzc|OL1?}IN`W-VtV}|-tr}TGan=5T?tQ&XyZaD^%S834T&)}!Z{cEwVr=4z?M(M_}cZQB6Xl$e6sWIw1 zc=RfmJ>SU8Dtg1lsBf(=kUgnuabo!E z))T|V(ISkmvy%E4b+(Q_Y=7~1{-Fkp7xU4CQ_=zvJWs=)hJT|wE>KyGO+Y=8+P*k? z6s%`Ej@n2PU98eA+aLeW*Zz2&RptduB6=3Pj?K+7)Vs<5ZwM5Ii{b;4DMJhUi**IA-l?xdqslrKAcVo>P*7D>TxEnbK<7U!yGG zd*`^J)n%;QDqL}f1h&GF4<2WvF0?)H-ZT4_6@E*zNvN;4aJ_K}Gzp$s9Eb00z;@0p z&Up^;iimFVU<|h-12px5@4Nuwsdx9cG&%j$=#WS?_x>`x<4r*UyO z!df3g9Yr}PX{!;M>{V~zKgR4Ed1rO<+mTk0DD78sjgoLB4Kw_E!?|H^y7AF%m=xGi z)W5%L+q4h4?+e*jq8l+2?tKL(;Ij+)MrHvoNB>vd6Ak~*q8iU&_M&vwaDY*t#ihta zP@k}>@e!D?ct02C8f9dvHC+d_VJmpfILp8!zaE;XDYCJrl7$)%s_!$(AQj)|BMTr> z6DkS0OZ6j#qxw*)Fc0a@Nk`f3Paw2@Irn?^4aQG9Fi#-i8fa%p8540ns;1mMJ)v&w zybY%Xm} z*e>@>mqc!)WB$sYwLI_DI=%#RPqG$i0>0OFS+4UT;WIH z567%{;h!KrT;?!5Jx!xM0JqqIipSo~$?I~$%8K-aupZ5582%-?kAD~4nf5`k8hk&= zn{pRFwPuqjh@WZ7!SZjL<>FJJ(Mm&UZR$m@9jcT2z}mD(|#dwnlfvj zzfWn3whiABrHP$Xq;01R?*i9(17bsLnA7+=i``k z$>+z>3`*6_QLLSt;A~kCM~UP`(Mr^JA<0ptc=jh)W)~ap#F^u-)3gkikl_@i-c@g% zF<#cXgxJx71#X|hZ)N*Az4OG1816+q7Qd(i|EpfE0cDNb<~NrCz6jLiXn~&V@PTL=NQTrPh%6!7 zhN(0lTbIl2fI-*Zg-{P~&S3IIC~rW<8n?vDxt6=S4YJq8I8_V{;f>Q9woVEjm*4X{ z;9;c=^%HJt$XgFcCXmF=z^r-O?X;f|g3q=d9xF})#SS6u1{jgP z8@7?4wJHBk{wGb96uEgN1D{_D;IYAcn=FC})%Rf+(t$>(_O%1_H_98VA8heEFdL27 z4~y5W2n(LLZwb&^bP_OtC1F>7dl%M3fStX%EsNp+mJ>s3D_lnGflURFq-j+qZnS81 zb1||Zs;P{GrImwpw+q&mbObyatQtey(}mkekY7QqgizpX>hd({4JzE=FBXGcFKadwZ8fz5J616Rd z4ux%w0;YXGg)O%mTOGjAZdgpRQr)^x8;n`2+$-N;V3!RoD%@MDdL8u!yx5l;&wjT; zYuf*0v}aZa%4mw^2;f;{w${QcBXgHps1uf#;Nw?81UD}KJ5de*B$wNxsz5OIZUUa# z0`x(iR1n_*62-r^- z&w3Mxyc;%{MvrRZ!LL$1goS@%nAGp=p~<#vDmeg8FPQ%OGl~KEw2|5tkOxZUga)ZQ zVJ|;-)H2XXA$7oqTwUboxJL>CzvT6hEH~x8dM67%}yt z8>y8?+&%Pz*|AmhXR4udTRgt~!K2LTL$||DEkBj-QF_a>gbdO}Jg+=RS|+Peqz9mW z6!@uB8>qIeTs(=~9U_Rf6vsSxFlvityYsVLMZ z3zZJ4o_n+qzT|f}=6LEoby5(vWmSEi$}xdrg;}@1KpiEjjkO;!fj>EQ&R)!elWXIM zcHt0T=7BBjqg<_5(>iP221v_}gsAd6HDD>l1KUGx+eq(6Z;GbfEaV!l`KWp80mnMPJ~`^= zZ0g8DLQVW278zIz`U`ixda3*UVA~Cb-U=q=HfP>5t!g7Xqs38H7q*l263>+uM-p8) z)(k1qBFiGX5Vhc0fP)6AVvXR1e}cRMAkZx( zqj5zOK^Zuy{wYXIb?-tZyHU00k)5awqg)i;1ViyeXLjyFFh479422j98RfYPkzOF+0WKCep?*lSd+oA{y%30bp)X$0&UoUEXnYu`6&#{}q z3^CJmitAyU4N&Uo;7_pec$f`rN8Aubl7Q2cjQCDd!)fyFT`FJjjls&|?H>1>k7_f| zH*?f{ddxCVO|IfA%;gK?cDbk9YjS}Y2an{iy7lSz+9ig5)+MfeIk`Y^g_r#iYg9z-+!lefa^ zm%{kPDMc?;Lf}=mqVl|w`@_m^iGmZ_>8#Ed`V|4G1oW^C%M~AwiL!(EEQGcYs zoz1z9j9dB~53?L?P?4=So`&8w<4d#yw6n%W;r{qr^?%}gmegO_1?WW=$ny(`DHZ4u?%HH4ln8Xg~MY-3D15W=Wh8Dtk`!w5_PRX;*eT+t-UKVECo1hKvYbH;5SCUya} za|DLtcWwLQ5C8e5zqCz#C2kiYz&8l=jRu|Zu~YK~U>5)Qc(CyVoK5?VI6G>r2u4#- z|7jZLs6fr9REOX+A6zY^mI!1g`ZWo-!8;qH)IGN3hDt+fBFqZI29~}+gS_=8+SyPw z>OK>y4xP>R6#qjE@lDi8--xlt`_Cb6-=0k4>Zy=*c~ChOfhRoiOUcQi0E?L`{nx|R zpM*K>cc}rD!qz*9X)fbZL98E5Z4L9y3mDV<>1G}!UZ+GrxF#>n8{)>pE9HQZG5SgD zEMV&vd+PO)O;R#5TM%bZkxkd0rtM>3NK4y(m&#EOGTXe~z_2Cd$_9K!;{p5%oB-0T`T zm8PV}chomU% zeKeiSMj8$r+czFQNh8XUrm8YXc^6r!sz0B;4~RrQuDY1Sif>~Lh_V&DeYX9=hBxjQ7I zwv;sPynIK%M?oh2w|A4qHV}P!UFq0!;8#b;4yf;_nq5dtBluePpD&PInSz^L0^mk! zR0}eCXFKtong^P48a?!s=1b$GRuBNVs0>tfB$ zNX2-xo%@Fe2OKy851#NPI>>SC$l=#(w-VX#`vb=Bs70<^XjLBUGzg9I8xOL$pYlvA z>e5#RUcWo;4T-$B^mNu`(~<-ZwPj(%w1$0dND~7=Xv{E)Ak!^CcW!JF5LJ%bf{CCz z6Dx$RM2eCOZxY&Tr4Ey%G~FjldkTiKE}7rIYP(+dHMQCQq>ADgdw2s*$@?5x8aaSG z@?FAPCdF=q>sXPsV4&O=OD6d}$kw(qx}moBhGj2cWHvV#n`*EyG23C*M@i+4w4>_F zz_SBRGSdv9lrZ%_k~(ccn}%#$MNJ`kVSsYj`1x+b*Wln9NG*oV!NyU|lfW2)z~mqK zx(R}6n8g-ujWrwf1g!m${#V(b{QC`|Mzl##;+oW zFdI-jZW>(qYfmewjA%~HD|Z}k!7|$=Z29Y?_UlKS{99S3ml9pC*&l!C@-!*TCcMJD#;;**?DqP@l57lf?7s_VL5IMAZzNi1FGIN4l0LT%(Rh*<7_W3%Q$} z@fP!%g;gMBFA$!Ux z>)g=5H?PkKrDn0l$q5SJ?ph~gumRo+!$0gol%{^-=aG6uPtV3Zo-dP19&#k||V3+Cb%CPI`$K$ubE-MHta99kDQiJfxR~ReOk@4#HHf+#1qaKKy zPNUs>mvfP8F)ml%hHmn37qTwh%Oy?s>4M27du`Jm5b8zAj#AJFZsZWG08@RMn91( z*$bC=mvUC&+P&AWB)HFrYA`T_KA$(w-8b%2;FAzT8C;(8Ao-a_uFE*8)2wQpb#JfA zoH(SU*fJk=lGZ^FgPRa6I7O_JO9~1t4L*$DVbE(aGfjU=(OkoHaZ#R{qsQ8VL326s z_#<>xqF9NS5$TxQ?DE-VXid`PAqMhuc*Wz=Lk+k|yUA3FLI<{R=-FenY{}-zr7GU! zAJ3E)7o_$h>!1a)VHe>O=-fI#}dRBRU zZgtP7SQb|Icj2WOLM|%25?5NREoT4;)LjUPYutkktYUN_giEPS?BH#DF03?UW~Bh}w2Tsx%-uv@ zg;&rp85Q7In&1LD6#h80`EU8ZVSCzSZVZqoA>}>ZWxJ5i%0oEZpxeqCYPE4yIVM&9 zRt8e-Gwy4=&b7V;pMC$+rI-IsECl@%Xg&JYO^~%_1}AMgpfze1y$!fs$VnU27;NW2 zwkLkk+i6p&8MfDF4``igWu0NaLBf%pyzi10vEJuNu!#zjl{My;+2DTj6s8CjX{keE z0?(2m2WJN>+zd#d1ia4;pYt+dX)9s4#x%oKL*|5bTGedeEki5i=4^PXN7@pKAX|lC zL^(*Yf$0Y2v}4g91A4}#Tgx1_zHA##@0}V0E1PhfsA@IT0|b-xx^1L;uF@@|?pK9* z{x%O^5%rrz_xRj-<<*yJa4SUXFz)MmM#FiXHrH*;8B{e4<%r|;Mu5dGr&q%o6yBLz zr$ocM*<}2tt&f&Rv~us$8RCdH)<@3oWmCcSm(pxucb7%#V7xE~GCFj}KE!yQ*jE}} z>@gkM$S`!Z=Cs?`!TgqI=b|#}SXh)>NaNu3&JY@_+6OVFB*WZ!Zl69s=lgWl$*&P# z&6}*kuKHUX9#+VVT?=F!QtXvUe|n?4Pt`{5+Sh$^Vh;QcHcUk24;#%+)c1VH7bn-p2wX~Y_bj6Za1=So4RTCH?QBk{r5Nbk7Gx|Nn8@UEo0 zpUpIq#kF~s!p_T9vYdzPjSx*^RK6Xja^;CxYh_yZQNuBP>~bq$)nNh=EmgM)aX%$E z{-wCIfcw#UT}RpobyBlZ=ZivuBIIco#};Num+Uf!C{*O+JKDDttwFHFFn&oG5zwiq zJNY91uDJH0)MpRwYkWQV+KS2DL(S}DVCXyacTur*J0gh1U%Z@y=HCGM45sz!i|6bT z#m;3%3w={7O!(65F?7Jn^|-)U!|9VD2cIF2_|lj-$v*CGaCo4yF#eH~yXR%0b*oOv z@xry8X-Rqr9fQAk?c&w5T~RZP6DK*FksI#o^*e60tMrS?O1(#>3Od9cq-Hff-AOJA zN_|`wcp~uDehsb_u{xY3Y%sbSS_t=dfgUCCHSE+JIzMOfwYY`5Oz!eFLR54ASue0N z>>c>UvOQUq)Hk;OO_sMJ{-z_7dsnTrcJ?q>{f_(OffZ*;_;!ZOhWL)hm2nW5pD^}t< zAEWxIrhqgdV24XOUJd=yl=wItq?t=f-Xb(V4pKAWGS?B7VSO#pwsKO9>WX|-G=I_+ zAJ4@alRJIE$wn@k68v^w`;vYf?`}@+SH6sGg$|O51EfB(oVNnWW`bAhS{J7Q!2u)D zkY%4f^aGv$L+wyV`0?jKb@fojnY z*88kaq?4vDU^Cn5sJ5IikJH#hWhrmRkXl5(WgcO1JhZ4Z$#fnyc#-wA)r*^##mWT* zCljwt^Wz_@AvNb=cms6kh!Fk`E`TCCuE49_DBIVme$=&SOX^WkwcuKjX?FXC{IYoY zOZrU82CY|5YuV0~^oH`LAh$aZ&&`$~?=+{Zns-445XKts8q7KjZgu!2~ zONFDDFSLhY4gmk*SDM?SbH8IEY$%eacGe$Gwi3-ka!{-nJul{r`?#*w4t~?I_ep~b zmlKK)@#LK|)LY3ufL%F;OMsI^vDHWLi=b@6=KR{#rt;j&ALn%aO#3gUR_#Kbzlie6 zmrZpZ%QXHX9e3i!ybiP&nwQp4PueCcTJv2hjce^r3R#kmlY`k6(eb_+Q7JO>yo61U z*Y8da^{w8Idxs+l1vIRZ)RfOGp!T_V6b+CPr3#Gw6(yE4WBFbA9w_85Sm+`OF$JRy z3W#T;^xmq3iw1~-{-LA!*Baa1WLL(__>#ZfH9|SF4}QwgY)BJbaJ+_@4gWTFy3{Zp`4z#98RJ~02fine{i`0F-6vX?H|$+w(;30aZ$S{+8q{iJ%7|>P{D0Vc@3o$0SSQ=_kP~_{hoJr=9zb9c4l{WKcD4~ z1h{jTeCs*axz2Si4xJZ9xve%ghB1hLHn=t>7(e#FGX6E^#Pl9pUyhyg(`!XuQrfPp z&&^XKshG4@dz+uodu@Xwev?%7W=-!q;@q$en1X3R$vkY(*Xn-g-7wu_=JMYs#7z?IK&D6bW01gc++eD7Q)-xt@>|AC(?%}B*Qu?Ftx zGUk+9>2|QUDCQIreLeqH?w>j1_+xA;TCf*cyBTw!dI3)dANawq?-~KY)Gdbi%3~+o$FruN%fA`GY`>dE8Dhov)?3TvlIAxgwL(IYkaJ* zu0q0l*A&ai_i}SG4|%6%DK8`?49o+8H;dT^D<(P?q;04Qj2fyiqJlD(DE=K{y=sXW zl%9S$HlP1d=c>^|hYoqV>mZ-|#Z1wF2NTXm-O7QA={CgLb^7LVc3iZC(@7stN{TJJ z+5MtcyiOkd>&0%_`(i`yTQq4Z;JNgYt=6P4W$urr9tIh<9GY7Qvp>=bC_l07$|BJF ze7L+b^Az*aIRKfjg&ibp*kXCsU)j}1sX*@F0{t2Kev>c~FK;P|yzy>2v7R|;{Cr!z zX)cCh&8vZwLx^nX?%FdGz6}?4dwFC=X$>ADYXeAaNGRWV zx$s^hV$O}SO7x!Hj<9<{Q1CLtj0OgVvMx9%vy+UhFsB$U8vI!vNZe5G)2=BAa@lJZ z3$WXnPlGqN)&k`XPitht!;dDUQfB(M7N*PTjycQJSdUYknr*_=C4Q5zd7pyDBdTU`I>65tbRTI)dk*PPJ zQh@Uj@&?-XrIWIU?)HaBAd4kRgfJm<2xEmxLS(TwR%+3(UKJ}I4*g{Bdh4ieHmSzv zre|#NI%X=jd!bK=U3^srdrfE#j3o-(zy**@(a z2{E)E-|!7S43Q9L$K@VrWxo6!mTbQK2ja18&;_hlY>+#jAOfS$M#T@k(;BnoGqf4_ zv&KE7?(j8}EU&gyV1+97wo&G5gy{t1HTq!sM(NaXy28w(!qm-=6?b^8Kl=r^OsmL$ zV%S<+>F*gKgV>@nc@?qlMcO6KauNcXUQ|zFM-{ZmUr~lrc$e=qx<~NQDbt{0y4aV< zuSa#%wR;1U zKTFxV08`vt#e9YWO7h+M4j7o**}TFc=E|8h=&=7__v92ajg3Syz#QA2%&E=~ySSyh z$LBAs>&BlkMp2#iLT>@27$7)8)sJD|+wt8bv;+8jds79N-1KP?&XQ2*j`MLp%Zp*6 zG_#4xMU3PSgsg4mm#nn|oK|%T9&PP-a#`3j22mC5t|MpQtAY@90$dta*S@>R;hXM} z{b~M*b~e7tZt!F{$lxZejMnPPY4|0wrFThi>{rQD?0tzt z$*Up8+|1-}lP2qvsVF%IR8tdG74R&Af_;j7L|O?1cEjHQ$Y!I0j=*b2;`q^kf6c8o z=pz<*uiCu6+25p>yXA|hloV~;2-f`QHoCBD&LEh08(Y(9MO{6!bv{gptMpyy5(|uo zBbW24(=vEHP=kBYfJLGJH;zYfy7yeDfrGwEllcYv(80m0;$oq}=ORFfxpH`~syxa( zcz>k@Py({c>wQJEF$nVDX>JM5DzAHpDUKI?wmKmmo|5OazH~RPhGEfN(Sn9eyg=V^(BMzOPTzn6<=43Lu5xt4Ur!IDdtwv=9tlEU=s zI)rpNjs{50l!iyNbC~PP-K!1%I&1&zKp>cm|GCOTlEFh;pFhR&{x?hXfBOBeMEm}8 z&i_UXCJ;-xjGWWP^q@t$-FX%sTU&6QSeIcWuXr9td(}SIS+{}ogKh@xp4j~nz}1F+ zOOq1_ELnt}Xya=#4oC`#1nWLz8RRyA5aG1>P>;aOWb~#!nK)QJH8Do4g~iM;*)WJ+ zT;=*#i9sJ_Qe$~SLT#a0e_HdYcr<=K*%laNn z&FW`F9`>VFGwL)_BCoZoJ!-iz#FRE!Yt2QBe1g#f{9STSk^(`CSk9e>)3-`0$!0XX zUP|836%(#29zt)HYheZX&!-Eq2qHBOP2S5(<4%iZ=|nq2ueQdjOKOhW6f4mc0HQ)wIH<9&d_^mCH#;NGW8UkWU#!rA+1bQ6nn)cQ!s6I8IqzypD?2vm314*&oGMO;PGO?}{-P5x<_ zJ+`}kW@1BrC3w~&#JA)Mq^DjNunK4ItTF6c;>@ArijutuPUmAwf9`wn$M(KkKW2oI z=aui949u4cFLbb^U%%9$DP6EHP~e?>#a2NcP!&%`*bkHIV2^=&QnpV*uWL2D1;4P$ zX~-n2HlV4BqW&eubv5Kymt7$n;g&wF9AjNu7bT#)Vn!0eK*8wGA5HWIy)>A`tug%5 zuzP^#4zbHySgJ2%n`{l7`W~Qrr}I_Jn22}>Gntg^(E8;=0Ivw?hUiQ|KhJEB&Sn%GZN07y3B zFIUF*Tx*p&>DrpbJ+!FrFn;-TflFwa_Ky};1oSf??f)QXjgXZ1Ag!q@rn#;p_-6H0; z>OSd%XQ{H!sKAOMq7RebaPBEdv`eN{Y?xLmmiKB_Rv6oC#=| z*g6U4k(hK2^*ydDn7C6G%;^T~9{>kZ1F)x?qbBM1fRUNZC0 z+{FxC6Ok2lfEqJ6r6E!aE~| zrr_wpBbD%Wkw2UTJQDzTEaGH#Wvw8pZe=+P`!2M!Re?U&bo#x1s@vW@m=6-$k5(t; zlzA((I+4`6nxAQj*Y@Y=vqE-{aBV+ULM>#Q5|ez`7U-wp=x=Gi(g9HJdAl`H z;>_dQSTIf%A`JIwh-9BOZJF~6Fj_2Uav;Z+Jw1M`wzZUtQ29;b3J#G#v0$R6TX~#0 zQVSM!2|1t9VK|9F@#6MT*{${>a1g8$NOb~e{{C{+SCd+$2M=b1zDxO>HyS0>K$ zHH{bU2Z>DIum3)zwcNO<;<;ZpUhlESY%PTxHV zC9GuE;XU!XDBe00qW`?V8^LsQe(~~wwQdk+Uk@c1WApUgDaB>tY{?lAN(yPgGbvIg z{DQ^L-IK4M&SU4;%M12%Lo`2EtyY&!IA3>3r!*S#2zprRpwz4DebWV5zd4Us{+PDJ zVm*Q_KCr=B8Md6l9iqsUu{)+^_th;ceiGicQ^C4U2>H_bWJEr>`(qz*#)cFQ57>Jc`j?G%t_j_DT$Y^KL<#uVt&IZZi zgVJWxJO2`Kc%_q8>n?fEoO z1+dM%VWNi}hk~IzwYpom(Z3Aj|0TN~$U7sH1M%A(qA+Y`v;?+oUq@&*M|0qSnK(F! zzYm~9c>v}zHt0lt4p7BHJpDt}Y6#J-l&*e3?yNB`UQN6vW_;UT*=h^VG@hHVCdfjQEtfv_KmKvv83s8Du+7$obsaY4opT?WC1{#lbf!@R7$ ztQu~tJ`|g6L5qOt3Pi6HMxu}hK+}E4ya9cBef@t)k^Z;7f4S-Ax>uIp+TwA=@P?nS zJaw^@uIigdl^+iXbh}@X;)#g$v-yQ^tl^TJ%9OP$5nAHlwi6T{yt6nR04-5TZ=>{F=2n+?f z0x$5Jgj41)+|QsM39h%sJ$fC0Vres9OnMKuXFJD%ze7`3za@mXgHVUPG!Hhl+T(uG zOn&mJHw8N2ssr$1{>#|K%RSJQXCp64AXOu;Xu#oVqS`NkSVzQVA6yB@v~c4|&Owv$ zeg(jS>TFVh(SvG7@R<=kd^j#2cY=5tqH%|K1?CJtQ(%Y$VWf7D=av$$vM5jLa8$6t zY={zINl5cewF5Oa5tvs%cbWt2%gqi14gLYVKcnyc`C_zfU0Jh8DpO*zsb_cWG%(D% zGg`#}`l;Asz{3~(O|trh{J5V;a{-WT43i`Tm%=awP=Jiw2@IX}g;roF*$E-ewOzn5 zd<`OdXLh(d5KmaR6|~ByT&te_6ztqWqWhr%(nt|Y*J{2G!ZQK1^>2WsgOLIPsrkOO zq&VUK0VO2>X#2B_+F$)n`4A*It@DW%bI!2WbI-VhF;GKXEbe=Q^eHZ;Vu8>e{vd8xyxWLGb z8dO4GcMMw3mGQlx zz@c#8@=U*ePJE#GH0bZo4aglH?scPL*4kqzBZ%V8C8bt#0}f%f`B8bEvT5c~qv6^- zFY>~&2q!sGz3NMA6GTru`Pwf+A!4~u(Xj_kX??$5?sBQzZAu&Yy7979<-vSjtT>iL zo`k-u1%}nU3h)Q}kNDQK@dAl}MxWgo1G*GiVqDcQRdgz!X6hi?&5u$mRgvpj;u{Ka#Wfsp$hku~QgJmvL+V zDASCOa;D1^OuE8U>H-vk`9jvl_h)PUt07kWUms%s?ij_76B>S#JoAUSh^cTGj>^b; zySivO3$hF|B?;H3Q}oU2JTdIBR}N0&+!WjznO$41j{~eb{?T?er!kt~Lahg<+`e1~ zLeGu{wB^6C)H3Tly7*9RhvA>K_}_c5#9K`Mi%O*FR*^#4chOdEOd6IWA3BPl^Z|!u zly;5jRVL3ZvLZ2o?>~Bbx(*ENl#)`E+!~QN@%#HV^8ReDiBbuZgCqkM-4a>oD`F40 zDhsjqA;*QG6YQNZ>ltQni6My8u?hTO+=+J=j%% zLf3VJ0gnLzW9+h5_vFW)i8+x4Y~R>DOw57q`E$bm{RA11NzqJ2+4#Y~`3dPDqrxZW zi>jM!n!bpCFb083n4&oLWykPOvEwTYv)>1P4m(`_{KX_8O@=OPfz;lEDurwLgYnGt zcKYAHe#HoQR`4D;8hIHi2=PxdOPU1WG`XdNc#M7lrfkcT{gd zi$tj^eRFhpO0!@yblQLujF%DLGx$W3or)`PU!E7!VV;`AH|9NxjQk|}g_DG|gXGl( zC@HMd680hnw$FmQiC1nr7HGI}6Cv#wp7?aNhQjwDFBnr_alyj$$kcOINLxjiXC8Lv z_~J39hVVJ3C5_u4a*01f$oG=Sd^yMK&~8`Bq@?dk=7(@R%OiZi6qL>ui(7N62`ug{ zUl4D;kNkT4pnkN-EX6BX{eJ%jZ%pH>E(9ll*1OO0JtCB}-_|bNiA4KbYs~tTJ|yK% z5&2ec1d8{h+D*MXq(H*A3+?VWf1d6@|FQ5Hjf=NMZnQLYWTb3yaXbm|vn7gV&CL5o zp~YJq^D{I41=ZtKhmhN8cEOLUE>o>p^rPN@L`N#eTmdsLgHbP=hPSTJfKF}P;G^%u zYt3KA;s*eWcv}I8RQ|In8o=*}NW|PB@nOZPIU(+H12IhxJf5&`xLSd|u9}2qAM3`L zpr%rb{k`^>T2JN1tV`i9rF3{m(stPAsv-xv?1YY9GxQkn0XB#{$Q=>It2m{4)jy_7 zHt*52dm*Cf$cKh4R?kE5KF(x;nZxS}ZRR7&gF`%vJMlqE2l!;y>o-Y3%JhN^RuctQ z15Bbu2ptxd2N6LErj$lz z9rbG_A^Mn^)a5p(DNuDP>~?UPh50L2cz>oN$q&kvO*}i2sOKIE0Cg9E6ai!5*XDS} zaUw`n1l9!j4N7xI2>>$u2)Orq;$-%*0~;GANKN4{F#Axi;nKa?Lh^% zrtP_UAk!O2%ch3mgxl*RyWo`DZayn+H=(lu4nV!I(5d>mHkGj6JkY{KqiP0>ib@u%Z!sEf7t?vsL^r=%x zuz)qZLI$k<+J?lLBm7}ifcxf||92qq{JHNM;yy89_qYkMAw&agpa4B>z-2PWC|Y0TnqA0en+c7hK=h)&mQXk0zuubw3FdI2gxRxdXuU{xi|JIkgsXx7+j64PDRFU zU|DjuVT{tdWpbutYqCN*o>egG&fISdB-WhNUrwDC8eLo)Se#OnVu4$)MZP~>m)z`~ z!A^1%VGI2vDZxvPaU)yW7)Icp%2+>ONpk{~1v!JGkRTg5>7kw?7Y-@3I5FG7ayj48uWrd(=}(?zxN@}`FyJQ)WJO$;WTQ5 ze0p`_n=-jl2PLh6iee%u9uMn$f>=iZU{0k>0&$`OFq5zXfKWtw6fl$R(|?ob?7OVE z&v9S?06!!FIu2qq=sApBq90$I$e0r|s`r~R)UFoaKeO8Y4z){ZK~+2@e1tN>I!%6) zI7m{0{Y6$_A+-Ue6^ED44eA#yzY<-vlClci?MQw5TJ6dMJ~1HSY}lIa#}`nGn=yF1 zAo`C5+i0$GH^(PQeK8ZsFn-09H&XggQx_!45K~#U{T`3>zEc)^H>g;VHW7kUU%$s$#5tWHsbQuTzcfDZmSwyODKh(?2oHXQQC>p zF5;!jluzGLijYk2@pUtV^3kcX;&0btjhEY`mQ?7uVFEfC(oa3P$OeV*&&=O>ZUisF zA6_T_IWpUVA!0RgMi9$MC_7%J2GRGikWN?9&Co91gpSseUw?iw=_k!V_tQxEPLPlJ zvccYkD?M;b7+fW=BG|WZgHc%SV`LY(lV*2HiG_F~9VeGXh>x-u41Lx*{ zJ$`qy0m(`{Xc^I{ID zG*{<$+xc3y?>D8Y6QPCQ?Eb$uX^qxh>Rlg`@ zgObNPvPWRcwo*UiK~Z&Kmxia8o32X9l3kQ1A8WRGk`Wj0b3=N=>r}&F!u5m`GQ%KP zoSj=S{m$8M`04AE(4$RS6}8>R6*j3wM=JCgrV3LzK=y?e@bYh?U*Iqp%BbQkg1eOk zy{SvcLpYXQ#u+tDOS`nu7!$wOr)`X`hwZ27vNKw+hd&W?3tlmvCL!(^-sQUt?SPY4 zsFFaxW%HoinqIof#PkH|8O8PSxVgpe$15p&M6N#FNyXUygS-EuC;Laoz*#`N>jpm< z#{oc#8sJ=+vjyZ)G5}gu!GO?=%oYH)9-ZQ8fA$a(OrVi}m&X2I*QERgs;AuRT=`8h z*T1~c-yLM;VHZSO67YFxe{tPbtMpSPrv!J2l>9G+D_<&xVbs8+=zI!g#ww^@^!K|r z=JQcchd5|gp{r7x+(}0F#c;hb<|+rbGR_x5`(%v`m-W~dpwbZKP#j$ZyIv&SFG{pl zg9CG0c{=x*>#N>Li;JBg(PS*SL<4!sUA{0l>VW|%wtFRE|7;D=*oCOI=n*`-@3wGE zGqvbc8wl^m#?LQ^-4|QE<79utcb2A7>)@es0oM%*NcKp(_#T(M5tS`np zt%f&F0um)L_u@ewG@Bwl+HSp$x-?}04n zd;eE5fBvm=cx7-J+weIgxw5%Si`%f&>PtZOiB)P1Js-UZbD%TEfibU9y-SNN66>Kr zB}`|5k+m9`z3khly~mQM_>vWc{M*-x998npc^@Sa<01uhbNGvHN=cD#svFeb^egYf4VEiuXx8Opy7_h#-6I( z+J$HhMShVhD_SliB^<&|Stjebhk|ESgjBo)DzFE^A^seTF0G#WpmZo#O0(x%5z3WerJ=#EW7n z=>{T9(S&Lm08jlqo9=>+tp>k^#hz@!Oe+7Gjf?*N?>_J36pKIEQkLe{id3@rBfkULFi$GtO#9#=-T2|Py-+b>TMq=m4+ z4POFh|5cT@KF4i`y<%YOb@P2clwFssu$wfP&iR(h>xn1NrCX_M7JE8;Y|-76Rd!=d zyEo14UK@O{A*c}|f0H;K#(>E1bY7s|kt;8tHz#DaN)N22A4ye?J6OyN)4Gd~b9|FG zds@l!W}gi+%xZS+AMOhOXzl&49Q!Y>+kX|i`JetDbGV-9d7zfa=9CY$dy-RHiOLqs+VD{RoiqqkDg z50K~aROq`dj#JN)l+$<8&s`MkF{-|M1!k&!ajuQk*iOBBYj9o%H2?kP4& zR(n+lZ%dkMGq5H!s&Z_BYxfa*ZGMig=Q77M-KL8LWw*VQUBo^3J_cN{Ae&iN&8I8EqG!*3F5>x1@W;q|6v zBas`1XtNs<%khFdXbt7Ws|}u}bVVLC*Ucn?li=@Z?>XMM>2h+@Fxt04;}=ltmnaA^ zz`{@o-A4Y1)rV#7dE28|?XKN+UWq#Q4AcDhnb}C6k*-B!=a59|ARM1TQBd&7d}H4C z3*9p|XWA$#@bUfGL5bPqm_Wf_7oDFqL=}Dr5g;4t|3sdw5AncB5$D0Q0Dc8x*@d#9 z98&k+ohwHDSS%)0X>!eew)cEagDF)dvQs(uSHl~qpg+TEVT5rnoc_ec)$w6cbihB{SiId7~}VP#kN{C$b_3`kVd~1BT!v#@#o{L zqVGB*q#hDGrG07fTLhBaCU-6@vWt%%uLO#L_!L=hTe*H2O6R!Abn!K9cBuiWmaa01 zK15Fh&w7keO)w)4UEUwp&J4=8SO`$@fHxudP}GG&4bKeS?V-V=HtuiQuVVRD6T(-} zu3n7Y5YZgZNh&05{4b9uM zW13wP6J!LO(!D&6<@+IqK8>Xqm7{2^dmk8OSTQTu`S>b~?y-MTg=4q|>zKv7z6b3~ zMdFFMU4s9&=?NhHxc$06IWQx_o}w$dZOwSV58kJ;s6RA~F!6rEE5}7q^5qHDT9SJ5 z>uYlCbxwyaBH+mswI3BevyTSqAL%OGePy9l5J`z-E3yR+L=^6N zSX)}v-uPmz=~-2yv;2;^H23E?)E8~Po-{fu)`=}jujVn+QsX}y@9NI zr}`*N$!2AZMfwX`cc=Zlg>`y)BbPxWW`&SuymE+_U|F4kRmCl&UH?gwnx+?-B;*~x zn9HI%nM5O%W7udPJ*y<9`$5LfKX2z;Y^@@Uuc2_S8=cToiIwY`I}0^w4oX9THG~^7 zXFDtw`_n8DVR9GU-aK#b{hXiuB`ss&dPSV-btn>EJ>3=L=}x;#6yV4RYhlfxQx$1G znz_Q}%oy-Ac)6U=5z8osz}~@GLP)SU-#IWKA=K>$jkYcQtliKwr4MLK#aY=csvc|Q{BiV_x^C_pNc02ZSrj#wIytN7ic~5eeiueKyO-m2OPq=mJ z=u}OzFOAyA>&yV9NxoKB4)DjTU1540h}K(U=mv#kUjgK!b4u0nE!k({ZV0cxbbRUY z$uX(wJDYC5is;83p=RE6@Fn6mWCJK10brYvR){wMi)!>VPt0!|P+ZDYVNk4@Y@*5i*CK{RA6afsv`TS97ikEG0hrsJ8_#=vu8eau=K zVopbi+Wet@3}GGOMwx6ijP znr2{T^TiGMO|~Pg2%tfv2#IxVuHNANv%T(0lg6f+Z23znQuDfo2P=0EG_5nNWS{@h zB0+{Gt|ux$B|!b>-@UWPbb1D*AMo%>$z3$F38(O+`mTNd%u9{X1amN8#=4uMeN$Gk z^zfWW=&X4C=_6OU3qAK^q_2|u~gm_`-Ti!p|@dA>`f_{3u8AY%tuYe4NT}{@&H0v@C6793YZ!z7Q{&)#m4lN zy0we{CC*`G$n8GSV`Z82{LIip3U5N z=9N_v>jn3%cv|--t*psIg+3Ex1Q4W}=s*2WIA9f7BOS`bF>*{OacaAe5V6Orjgv=i z2EYr`vD2fhX76^sW}WfwAO##QhO5GWzi%^{$% zHp_3p<1g{7xIs#%eMmW9b`3H+N#F%B z!N5N%C)2mAA?I3pVYE=P#j4r-dl?cfA=Zqohe9by^Ise0@@esvq5C!|Zgjmqil3U2 zQ?{T#VV#og^ne*yNBH;&?VH0stlfLeqc+I-^d+LD-gwVMx@2D%B8WC80*;vDZ<433 zK-^RxGl@INb((XJ@OCj^*czo0-@6*WM3tVa$^2H6xAQI=mgZlbP@r?vCpHv$|0v}M zh`Dq=+lK&<3JqVEc>@`+zwAxxd5yf2glyhPxcgI5q(X>Z@$v*#4{+@$-eZ$i#K3ef z>W}>c&60(6g(u6)SKi;h3s^r+mUM-WKg_jZ=tF-QW5Ni4Tj!WSf!D1kWW$`SSzVQ^ z$DbjrKMt6!_Df2SQl!2bddpc|?aJ>cRDK6cN*o6=>+vja@zRsZKB(Q^*U-4)epm6S zN6s0^OJzMf`T1It+2*A4)V{2jGBZhDIPe7wTBdU3#x#Qf!m|Cy>}fZsCcXO`P=FjO zY3b^vGby9sW33cUW%)Fl)6PVa>&8vx(z9sRTOe;c_#g-?*pE;BO#IWE(H z+->}0L3F`E=o*L)tS8<~7n|le7a^2y&Kw1qOi+;%&3U58$iUrn#PBc9b<&r+*xLE| z-ME6myjO3#9H9vDa@SyX{BF~&?Ky`%E7Sa!FyER+V;|2Q#niKi@}6a~hcMke;D+gl z>L5fgY=y0H?4WSsffBd5OHtU!M<9}*PRU5i8?&+BPr)(_eM+2ow2Q0;)M?m3vFX0< z9-kL?Ztt3{)8+a*kh^(0fGJp`LBp5UPML zm0N|k7W2nPd>Pz^?qc@Zh{=(%kDQv0d{Cgm*D`pn2i&E z=m7shyqL=J*xXO}3&P7F`0^oE7~H`PdaJhr=j@h2MifvbX(@{=Em)A`Kg;|4jfk81xS-JzNY``UvAGD zK&kLqKA<-1VchGVHHCPJ>AqkuI|?J)rZzz3mucp$&3p-aC&e;K>;Q&+GJ?I{Kw$L& zCfPv&P=&4J>mU6N;v%x+xzQ0Z?!@aZzUSg{Q@*1erxd}0Lh%-~dm6>sxXq3m85vb;e^Z2aRm6fR5zBy@JtO`Gy4&*d&q}oIf^bK3#v!Ak z`j>3`o$^~>``vEcIdD0K5if1v1h!zc{ZVxiUaNNoh#aVGeN588Xx;G8+S`Rj(O?!S zwyOyPqz52CsLJUu7DlUjn+Q0KOn{ii<;nIscgN3ztcPy8RD0wko|J~e5CX6g1l;2z z!|U^tdrZG)L6S|;GE7W17Rr~uEs|j$i3Tm_0|Dx8_-q9j)ezH8uX!`orG$|4ct&3+ zglqr7gKBM|*fF}UhiY~qO`PAw4i4wdG4IgsA)se)>K!YYZ}YqDN>$a-)^FwgSJNh+ zH+w&gy<^YM|LRIo2x)~VFcgPi15L|(=VueJUo@}X_q$ODD>QtM12x@$Sx8?2)&BAG zH_u`zv$@r;(gCWGtIFsOArxtmOX~2wg5sE@P#iVTAxiC>cr~oGBRXO&lX)ku%q_X! zKsP_+`m&j5wa$-fo$u?+Bu{cY5_Q-1U{ug^g<9qtw4>ni9dpOi|58zBO%)u`E;@w^8uvf-F;YDBf6XvHe zX!y!W@A14ddqX&WN768#+b&uolkhb!h zy_ocJkBUWCC`!7(z=Jc2^c3PNb8Haa3b6I7wDGKzMw7~}?)>8tCt?FXMGL(wzz0r+ zYTx&Zx3FB_$J$=_tS88J234*_Y)KbXq;k5> za{vAIl$lZKk@65#!m#OJlxY*}jxz}(sXXN|1Ca^?EWA9e5BEGDz6FrjWfL~fjm?A` zw|}mu-#!-ES3`d@z;5GAmUj7A-6Rv<)|RR3ak}oQOOMXX515>Z35q|{jXrp8*Xg#R zJ`F?1tXAMzMtYI7Hz6{p{*JlLh?6aW;V`0rLaH{+E2|^*EduAhc+=OTTOM_swa6J9 zW*y-k{hHOPFN}`RUDwugNou%oDeblav+lc_(VY>X081%)58O=zH;~Rmw-UOFti|57 zzOcB#Ee5OeypvFU_81xzV&;(`bMP&tt~B8t@C{P<*ku+2SEgg3OuKhJl-0__A9dUo zPF6}180H+gXRi5#($C!r-%p4`I4U9L0$1^11~<2Yn$fac#qdJ$dv1(f>eufgCExG! zYYOq?e5ib-Mpz;wLzN+t*j>Lh9OCT~YYplkDX*-SQJ=2X?3Ck~NoLJZ6$x2L=(F{t zWZl6IEIE;C%Rq^vn_S0n7IVuQRrXcV8B$U%H7pF2xv=uzj|e zhb3E}6Oq!^)j8GlKlZwF2-#4@S%@R5`*Q(fC7S*{Bui^x<9;*xr$ECHQ-a-BS(B7E z!}F2RWqhUG#HFJk0#}&a0O8{qpE?jm>vH=eTbpg~PeqOd^U6OJIc#RKlExzzQ-II) z`TVQE3fqzPz8BgW6NNQi2@*6UWGP?CzJ+LMaXMJ>PFAB8eH-5MFhaOeN|$#5eWr%y zCQ^6fZ18;3#$CTjSfSj`8N4|{dWxOC4Td%Mdfx1Q2rx2<4V_?`ctrV2LKCLU3qUm? zwZthnd#k##AgX@_lTh*^j{9r<51RC(q}z>Q#}k>3Li8RZcU>?a_0c1}CGgow{Jl05 zHllCsZW<29;j*t7ZCz7Pzc`N?d(*hsLdi8EO?oJ8e!PfgAX1m$?D4$qq4%*>sJ?FX z#%3c_*}7ciB~u+*JIR}zWLmx{vB!^7fQaIJH<4OpO@f!suM2TOFys3UP(^fZ3@en> z__dimRZfgNQA_O#z}*|jt&!F%nI~Kx!x@L}P2bVr{>Fem_I+dtS{4zg-TGT1(tnp@ zSRse)Ts7eGS6cB!tX4!cJ7T7q3TO3HQp2a*Yahg<#)>iyS%0D#+)}8}$$bO5sO%1S zFp7klcJ{3zK++xo&>E}6?Ngsvf1Gi5U6ra7;&;9%$Cw|SiE0%Kd-(TyK|AW@XVT;^eo6_;5Ug2AP0vjB*39UJ64Xt?VN)Z*!x{`fJIkN z*S6Lpr`>LQ)IFVx=`X!SQ<0cdhqw3(lAVDreg&~G0%FaU?-8J{9jC?DtRMse#h2h38 z0rZTJPnjwEdZ_u;h&Rw%{$`TgcowaSRZ zstUirG++C`UHW|WKoOK7r5 z6LjId6Qa7En-cA|S8%OX4a|cL#Rvh_8bNdFl5-*Fze&t&p<*#ENp8s<;@{@F68Wz` zk$O^0hHP8kib)=bcA^XAT z{rt43Y%f&Bid;9a9B0!vp!m9N6{Hts$*i9x4-3V`jp^*^Db-)Jj9MnSMMB;I)RCP@ zSpjr{R`vCI`4|pQT)xE_Em+@gep{&%9o)S^@#vn~z<~i9Mwrc}M9oXZKoI*4b!R1_ zA4v_Tf;<{$c~o!om`lpY=?c1?%lxOtZan9!j(D+SKt4@yq*Hd1ILZ-qQc$%B=O<(i z#{N2nlmh7aGlZ>Efv87X8T}oP(K~(6OAu15B~ArvgEe1)zOOgDud(p3f2gt1qr=$& zbjMDFJj^ER)ALr?YWkigDsOeOZ(nW=8F_f(BoimG&wJgqAlUW&tnN?kw#SrSDc#T1 zlEx`BaF?SY#Zw%2+Pc~m32@LeP#9pkC&+*zp%)O7#A#0kz#U|WBYZ(0=t^6;#XV4mOTsU>NQ1QJq+kviyq5;pYSyS z>rJ^g32-7tfN~r9zyP{x_?v|O4Iu-urLgLbzf=Q+;zbGOxiuKWJ?DLg1#H90^xh3r z%u0F}A1j{yM;+Z`SFzr1j^qV>a+_-$PmU}lMmgf4_HHg?py$Zhj~%e;&9@b;+$iD! z@o4%{i(IvAj*yX#(CvJGW7bD;jXjR8Ib>GdrX7NPX6vZe&#s^T_`6x!s^t34lAaPnhq9Mz0wzc{9t@fU|ikyLgF_H zL#n&)T&d?1A>B@tBC#H}uB>OK2VyN2_&RJH;8A7};1F=RUK#fOZxTb)-1m*Ssl~jE zMNdyiU8>$E%+lRGlGU~~j29u4NJ$)ieTWw(vT`)1HEh)C1;TDUkGt}8N`BR1jkL|u zvh8yFSZ;xFw4Y9)k-lZk`pBFJMgq%&)6;3Lh%4QwnG!URkWsp5QBEBnb8W*rN!xWt z=gJMXBiLJDq9!6qe+4=CFtF^tuCH?Cc4IHd4d#UTYQ@PV?bapYrgwQ*6+PVc$%=$Z zKaBVrcR??sBqLmvg_m)h-ixw*x394OxUMs*txUR*nQS1GtSgiPRwV-L<7$S^k-67j z$=7zBT~CyxZ@3p(O7(k2KJ;b&`gJ=ctGtjRt3X$Xpsi`FTOwubyvUah2x}} zLbXzaoQrZQh(Xo#)qnIvvsC`E8?1L{qt`2vV!K@a$odhhHa$(|h+$>r&yVZ2?lKQR zh$?^m-=O~O@`NoccaJY2OG^zw?^7L8_-(JL;ntZwb3)BojwS&)P1VDD^hD*vxfG8( zIf^LE;46-)m!OPMFel!P=Gk!_+Y?sTPmrQF`l{?Lz@h4v7Q`r_DJSBu!gQrR?n%$*6%q5@@4S04CBz{n92*^P#z`;(9=p1 z2u`BO7F+z#isNq2i-|^z5 z@FU#-BquH|2I2SaQxk^yI`hpjtmc4#>YZy|<#d-jShMc&1fOcYx$05;!29(dZO3ll zbOph#YKCQn{0U>OB!%fyeo`!l5@ye9W}7(YLOp-X|eh5`w^v~vsZqo3Sxyj$T^ zEZ>WOoLW*6mwMo&nVD}HyDImc^b72x24rs&8)XX4v_r>l)pK`DcI?if1c~hts)j(^ zfk(IVw$+6Z9jJSPkM8C&Pp!8sCn1{7BwKu`%C-y@N%=ARDo{j-{Tj3mU4!m2;YW4q zp*^H^@E-{WQ~>v>qu)gD;dJco>Yn;js)#O%Nht2s+NN`){I=DJoTY_nyc``MtA$1` z@e{l-vN3|tZ(8ThpfJ3#Uqp%(>HG~DKjCEdRy`EjUhEGWYGyhX{5$4tC_l-O+}uu! zU?a4-VvYLtkV-AMM^5MLj?+=^+XwcEeQLk^Iq|wEc|ZblhvnD+5XGPtzX5fgH#3JG zAWDOtrSQ$_lp^$I4tGw?VMrzKCqKQd(?@vERVYY2=n#i&a!}2gIr6Sm&UpgzDG%rS zC4{v;+uH?a%T77ia5bf*^tMJ_Qq*STPKdkwiy^DpQIq4vyHZ+Mp1 zxHvbGaU<{a#@P-GL#nm%ne*<{&4ukxuzP^YQl8M*>#>r?9I|(#ZOYY;!&EnvRlZ4; zp55(HvWf0><8a1nw45t~8sv}Y2Ya!$&vRu{TYZW=j=nVvN;~c<=PMtEJOjzkGYp3) z2Hp? z4wj~9bX8ZNUsN_&(AjUwRVXVt6*JHa3Fp=g4XVd=uQbLUepRu>aVH%bFJg>cAh)Uy2YjxMs*QzMlM&BH-zRLGuyT^s6RCyfWZ~bkmx_od8$92r4MkGH(?biS>_cH`TR@c8yz?#|9dR$ zKYs>4O<7*9rW6qGl0-ue4KiTd?l+J4Zfr7cuqlX@Ra5AzZG>nJ^*(x4rF$<8Ro4)+W-|Zew{GfHj$h6@4;F)Hmz1*;PLXgO zG@eq?yHcI0Ya8+h%QbX6S66*zP1ZOIdYHw-`(A2E*LwzR! z5vYfgR?{vEiK`ap!j zunF|RT}wf-{Wgdu^(L3GhR|bqvIwP1J&uy9#6kiJfwzXh4WsIH93`a`y^;ajcRkT3 zM+dTy1>C6*`w}Jh*C-5B*uU?Cg_<51pIA6;pkW<~abeEBz24WGkI}gEoA9s*`Nz9b z4G~CWs4AoiLKaKwrLgB%sngiLFM_8wbcGBrAMFM0jW=JWIHBE52x8%B9R+bJ?JDD1 z;vPp&Mlz`#+G2iq6t@ns<=pq~Th)_aLBv~aYM!QpfHXK*hg9+h(41F^$#20PtV;jL z1!bjZmj>e1AKQ)|(%#W|YS zY^tGU=$f=ShGwFB_0pe9kEBvcq_v2@%^u#k<}WII=Ul2~ zmA;TwGwE+q4+DfHR=i`!eC>9{>USmCb6X7=51*vI{87y>VlGAW;k)CK&cUUYd|D{t z2^$qzb&^{}dD{2t)It)6S#oU?!<#1=A)Lpu;7yXqZ%D2<&y`P&D|_+W;}6*hS{pno zEua1O`Hh|jrJSD^YpvyvFvR$_aae7b^rj`C1Ys-1u(|~_gBG7ds~}ILjd4nCi`XkJ z-}4{vKZP`26GSe=%T!l>ru%cgh!CZX<7{fhl6Tq;ZwBQwrn{QzJDH~E&Qxyl_rtKa zU~^EsId*ZTZ#e>XPHVQ*y`je5p`XUM*!YKVcHYoMk2{u@w26j|zI2cVs>4dch1LwM z0-Qn^J8@BHKU{Ymf;<|7e{GC(&4R7e2YjrC{8o<;5?r3Gwc6v}0d>-g2Wz85?m!c= zXm-Ta{Eth^)>{pgVI{-a*gpAjgHBHU76B@F^fFc_CkT7g>XILJ_UXk z-}`Z}x|J|(x5N)JW~NH+vtatNRCX{pI!i(f(TZd{fsk}{$2g@@e)0TpBE^?ixMnLR zazI0IgZ*cF;~|vVx(a85nMX6GV=`}B8@*|&)Cb~zele)vqv(-LKK=qlnt~KtpG4FH zSi+Ow zotQQf2+c#CKc~1@K0=*ij@p%=uC3f~L>=Gy39@M^sKaMghhy+zK(4g^5=3dve1PIl z=nczOVU1GSId?6=7#Stv?=YSpULR-J%C%qMX$NSCRtJM1G1r`Ht#vWvtig*inY_o^ zf`zW-EU_SrxQ(8p#xG)fmg|c9MtNUL-oc&oe67V_=GGK?LG;)qP5FusHEKvq*Jtrr z=d}(Y==h^My_%kiOy>cI=WT7ew>0K}SEqc%vuuz{;+gB~0ZZ!1%#O-UZVJ5@4z`-p zxjdJ1mz>_}8YtR~NI^ZC_PUTE=}Cr#X+Cp{v9exJmV>T~Ka=c+dH&4{4{F?AUyWAc zdSto!F(ynNb5nik4}Dx*CU)4SUTYp*v$Qo#bkRs3`j}$@iD3Co~e-MVGug9&fn! z7aWyd2?u6qYdZU8C@)|N{M3$w1h0ax|49Cb)TE3#%Hy&NaB=W(r)A^1`s``t$fXOQ zO!TzsexkfT1giYRYUe6S5Lz|5eXpS5cH1g5E6HF*Pwr;=N9^r9QoDIrhYi=q;21;|7GLet2IqG4l3dw zRH?_D1=M45SEM0Lby+gnK0iw8TAA6PK#5pFlq6FmU|s#JL@Gs&psGiAn2E>`rjVGS)uQC$ugg)-DV)I;0w7D{+4fwW9q11=Ly|98@~L8XWI7bMXPIe@16 z4G}bH4MA^DnV-z!=C3n_}Oo(7yq;l&xL#p&)abHI6P-s$5h1 zGCg0fm!LcAz({{HTNxY6UF#Qasta;}MN7K1Ik9itZgzx?!_9fS=pvd2wF2~xO!^A| zp3YHzf+F{OuiS2{A-iU-@0(qZu!ra~Wu_~7x37V2)|;E+z^*(YxdFmONp}i_aIpIF zFvYXuB(_Xb-T*z9hrYs5bw7edKEdyUQ-P-f8d(iB0v-_Y+lL446qVp?~2bCn;b z8YCJ#&5W#J<8diHFJ~Mk-oX+bR6y0v91@0M&iei9(p%ej=AlpBl3X%7wbFH5&M^x? z$xN@T*5XJUfW(ue0N5dlI?$(!CP%8t39FCc4LryZ*Mq{z;=dqPK(ymPnSAidEqS#a zM@~hA79teDn}!mcS5*>SB<~lxTXEOEBJ~P|+CAJXQ&uk7HQ=B+s$X2sFL-p6v;Xc~ zpZur@t)<`(tfY!AP9>IP$Zw2Uygo-hhsm9@cuu`*`pRm51`{rh&jnvwN%EQmEOPNk$m6P8$wxtZX0k44I|z(cA7LT5!ZKy_-iD@YD5PxDl+gI1 z)@*A@E!z>lA-&~tHqI<*3T3~ zjeML$sWu-Ux4wBGRIKkG#oG%(x!U?9Ak@kijqksE$Ted=|H?=}zNi_hXDM2_W*10$ zLz#wG#r2@R%lBU+46clFx4&n1l&)&4k13b&lKG;TP$yNBQC{*FMDBj_y;MC^5qws5 zb-ENciuc2&^w26jW zZ=9K07jIt%Jy2r7f|n-dbRDi=cwOX4*5JeZ9(ym@E5K>_t}9WZf(iWdPxXNRg#XtD zrtll!&gRLaRoZrkd8nQ;5ZBF6f&B2kOCxZOMWn*R%vj@L1RfagmI)Gc+h0qqi+-tBjJ&{HyRtAl@^xaGHVb8YBBDDnNBN@-ZP95OZ7g21 zotx=ej&&4mQfa?{-N->H1ZHfE@LH3N-YR`{Y~Z?^yj2hU_Sf@@9@DmcMjL6Tz>R&I zoQ3-70m%LM_bB>B6#iPgB|wdXGbrkVJ{D|?qj11FmQUCfWl7w2C|olPqeEf_4=7v* zHZHIc+%`Xjt)R327CsjVv)XOnq`0M_{z(Q7Lz@?%T_^sFh}rG)fsmaPN=q!GCa7E{2biMg{*fhFVCFT`yQd4@kqCi3P{`z)Ako zxZu|jFpyIOi|EJ`*XKynN({>L9ZCPXKFD?Jk!9?xCC|M`HR#RF|NAUdg4SNx8v^`R z+70a2l=j!IT&vmmYSAk)t2PaVkRs6xUo~;16t}o_@)w{VM`nFIWkH6o< za#(?DB8h_qKQw`Im5tdL?H=!!O|JjGBrQW7yU)GbwvdR5gssrUA(%K>B3RPUD<9zX z!SStDmD)sBGT)qIwny%HOpR$^aaIkm3)+OrJ!%pKqr2yw&Ym2XPWf5-Sz!9 z(^1dI5y$4m&6%;h{LgBBP+x;40B9O&UZ296;wU@ovue4dxO4sb9BofUkBW@{baZ}# z0^8V$!OeqK=M&jVjKXSMw~qjViI7!_(7?TtbuRyz!_V_pM9g<=h~4+5`p)C~ljl57 zUIP2=PniWMmwLLcG0Yyfy)5Y>QXoZmlH(cQFKEm8)l_2p_GgUCX2dTE>cfYLxaG7b zm3=APn7hR#l+j$*{OZ-7{g|u@V)WDQn~`@6GcIH$*GkAs44g-9k?P4Ae?cBC&*!Z9 z?63x43;F~FzByH67bolp&-9qMxgM6|G6jDZ%d=hv5r{#X{Iz?NR74wHq%dUy){wTd ziOs}6Pg^O7(Nw}5JL71Vb1fiL=mni~8nS8%k`*m46SSN&tL4Wgv$`MFKd`P6ODw2nAGz zgLMEaXA~Tc>huCNlXwD*(hbTD@5ZeLVE>$)n_0%%p^k;MSU}cc4Yj5CoNE7Q8~hc` zSUtIfa!wbQw-y&)Z)8@Yi#O(yd~kis%Ux@f{mTPG1-|WB0i)V8Z(=)n2qGBn?t+xu zy*^`eye#%XrBY%@oQ0RHRf4~e--nyq`(L6ms^S;1EUNvQR)3_4%y!*gL?M!TD(zgc zPQ5=}*{2Oyg6PcZ^0p7t9q=A@2v^ZW`ZOz?#-&3o#8AON`ZWRQ4jGJU2kWz)gYCag z$ip7kp(oiqe@s+n5c*q2GStKj-|nX?i|@YRbn0RI>|K9yfSm)KE^#}Tz~9Z)@`=r> zB>veuJLGzP=*xY#&k6a!iNv*ZPqMH?=CL}ITim{3bxXPrC{sl4IE!!Z6t(o-5 zMnbU$uv5R`%rZYFK!s_muD>-d;a;p=(yT*~&&q@S$kPHTv-ZkoAiZ%E`X(YL;yf1H zKrYp~d`9!e@0W%3ZS{55-;>h?*!mUs?`>`hc|$weBv(QCp{1V-R?EFJU*ry_MF2ZX zFB|jc>fMQ(*}I}4MXj~%A!E_$%32}3`z*#=Su3mU1>`8$N!?seS2^qU5 zp`X0(uJ(=HIli<3dQCvMqaylqKGOf08 zs%fO(_$b$TH|w3{!r36Y^ZQz6bU_}l6mcd%yr0Tlu|%t;TF^fwDdA`&=^XrF(Ll{0-$cfm>QJ`95j9Ia zj@Cvy7tGDCRQ4>-u657UhD4c6C^7TjZmgNy-rW_6Gz#@9vYt&GusSkn^jAIh^?ThB zK}GtBoCW2kjTWLP$$Ff^H~LArv1p-3iTO&NzoOre%pm9Lpl;@4#B)_&j#tp<$VtF( z2D;Y@I7zGozxM!06&Qr)Lnmo&d~S(J9HM>BuXE=6y|UpEZeKNqif_;Ke}FhO37>$C z+uU<=@UV9%poEm4AS=v#kSVet{}BuXVnw?RjeYWVpZ3L z{@CitoPk(Qi}FpE73!3)D%;_>kx{%mxn`RrjxFd3AVhVjQ_4=i6lpf}&XIGd>F3zj zzFcZk@v_g(UEGuQ*cNRBb6$=tz}{w=gJEP+O6Nn|0B$@wgWno6^+>Gsv)%LN6i7O$ zR#jRcnp@x0A}W>VoH7Cq93J7KN|#Ua5*qsL5S8q-Ls$P~u+-@II-MDCHB(Wj z(N7m#Rgv53SLLAEY0iFM{PX1TNqdE6&~!s}-Gs3ujltSAo7XQ^Xie!wv9u^<6o)2! z4Gp;5cN{#hjhMrlS;STBkK~Ot4~FbtK|^7}`L)JJLOS1ApTR;r$J*sc>R@I15q?<9 znFvj<;(3{LR6P(bk$GGea5b{)Ia@@{K} z&!K4E&VFH3`L)!>B12hgs*Fm@Hcd99RxtGHHJCBVJ#Eee19(MNoYyK7RZ=+aG$l?? z(;mpke1J6Ou(_O#%62Mgb&b_SNrRA2th~;-S05t?;>nBHHjkQXoP2>hK%qqT3xehUE+x6grfMYbr5_ zE3nwS&Wx?;euJ<8_eZVoh_Gi?ZdvQj55Bf~^SAZDKA?t(`l|6>^*jWkBA~hWC`QqTin4^_M6h6H4iw3P zGv?I|k9LQZPu^YW3s_z^`-;-}$4Q_%P6v6d0KohuKMA?uCvzvo9U#6F)Rh0xSD= z#3}S5M4z8rI>z5x1K!Tsm5S<~E-vkh$TwtYxEHVtaAnWSjzLTx`3yM;Jx?+t-qI#Q z5t35{-P0Gn)JF`3vMTa_{Wh~+WSU`&HQ>1Q(en0Lo`&61`yki$t8#vsl6Wx_=zWEQ z0z|sxPua9(?9Bo$gc+~`#=+V%(oTegLJ#HUpib{D6f-Euz2w&bx4x+Nb& zBO`xJ(|lEh#1@5_=`@L?2ot(-mHkJ-IktBTl%|KQ?<}dw;M1A3T>9Kl@=hgZr=(20 z=ipeKv|}DEQI?Znuu%^+KF(oHNbD$l4`J}rR<6koYAs<`Q|w)pPxYrFr62n{Jg4No zIA4Fl%ag;{A(-*^3B}&31e7Oq{W}W1NI_h93_7+#Nz$hB>-+V^*4&t6_VN^(Dp5P@ znn)(^4b2P{)~}F|hJyFAzoA%Lt~#{C&-@G7zOebo!S^}@b!s^JZ^a@~-d~VZ^tK~d zxx)-w5_TMY50!)m8a55&@^_PULD;p23Xb8gjQ#M78Mca$P?Xz#Bi*Bdr8OV5QO*+& zoO#y_MU8NO)r4ltL?GC79;4*|{ zGbWTdn1Opt;L@IAN_Uj-=~k;O0NyoU9lO_6IhSdJmF+q0ApaM{wkq>hqYAV|c%OGH zrAa!aoX7(Sm3gZ#%0RXNEANNwgR=uj9kWL7bOBrCVaw*skD4&l_MqEyi3aw#1I?co z`VeEuZf{2jSV=H}d0gSk`)|J}_h}FOUj^KR`TnX&I1H-bMBY%FJTF!p_uq2 z_ocAs$%VWPGCj8%t})ebqp>=#wN_!wB)uscgGA5Q@?ze-g+|%;u6Cpc{NbQlX?84i z!=MP#{ENR+rPngyxty|nj=Q~u(E((9I^@ql<($ua@oG+-7CYrjyW+Ic@%W{{^D>)8 zHOcACc4gml&rET=0vOrvXXDGCFc2#7$6(kVAxZccLJa5-Yg}n4^0;4Vug?*TZ!E{@ zYg}s)$yYP8FMrB;uGI9MgvQemq(8QR#5Ms6@l7al5z&G$wOdj5+v9H6oY(a=(-*_$ ztfisUQgVB9_8&Hc?n#KsK|rL)vZe(3U#&odsS>v%`DV`U;l*=)8;o|IgAHzU{0(cW zl?7T%A+q>nFq%@t&6!W_LLn<#Pwqae8uEjj#a^$ijW4RNwz{Fp+OYXx^!o6y=O57a za!h0##q&WzMNBP;ypIdlT}Uh5dSAyp zzvPF1@8_T1%XGWuRHxP*j@>?5NXQY1HEGo+svh=~ud(Zew@8x1c)ekzYlA4gSmS4d zu3JTF_jqq!pnO?*0&JI#MwoM;z_4C)Vjl<|^Xp!D)tRk5^T zq!GCieLu-()@UWXAcb;~kVj!-5g^1_r7kJP6bz)!sk(+PD1Ods^)R48Iz$RQLOSxYqxT>5y}68GCzlmJts~a}ms? zHl%BlN~(za@9nFz$LD@Y=*(8ZW!%RKgM^gV{psiYUU}P7I)MGT+A3E3s{nA4BE(nA zben2G2!CgF$@Pm7?bKp7=e;LoMHHc#jmADGXff$cqYs%j;R5tnB5~Vxpo-|QVCWI% zS+HWbcBh8w+m$N~szUn)bl(bq1zLH5s#58!q;m*4SaobF-%@ud%ozWQIw$pARMZn6 zK5p>+*i75`yVVx(jv#|XQ0Ycflf>VVG*!f}8(*F}1n6ct&$QY>zCmcNK0m%{K!Cbq z70pPJ1>32RT!xe-WY3$Ei%C8Weaj3sdu~&*4S}=JJYpT)htpy5|6H5jmOn45e@tFfz80o`f)b~F0iGk#ad*uK~ z@)w|a3MdSxVGpM!4}A#wsCv*RH#(M@a_lop>e(yuw|BilRDwz%~0F zpi}6~L7k|1o-krB=D58jK=sBRuh06pIs4EU{Y>xqScE-=mPf4=f~$5;FQ5?a&F7n; z@ISQGeQvAt4u9~eSi0xdSJA{@=7WtdSR(>5V0>h=acR-2@rp+egj%nNKC=2?c};bz zAKdmal+l9ym*D}F?!Vci#=~-nzt{G*C5ckVIg0djtLsYIIcCY;+`x(N4-Y)bGbNfD zFJ818(i2MH_&_bG|CuRcfi(u(h^Hr@SJLK*X5^3c4o3Q!-7`;OOM|j;#=bt+;kuXg z7?hfOpSkc?UxK*N%)}zX!4|xS95cd|^+5U24*(N*uSdWt^Y!0m8Or%<#?3LGfUufk zE@brv(*oC}d7(8Bgo^jWW&`j5kRIt{Ym2ZTgsl~xC?1tQyjY!K6r>c8rPPa&t3w_hgYb3|FRRJBE7Zz%Uo*eO=k#cR+j)Aj3$KJBO+ z41*Icvh1|rhI+Wth=zo9RBgoz#F5nz-@Bj1vHQY4i;>rUyyTt%T-Rd&!7Cm~IZKio zr?3)~7e1uDx8rv)JiGDiinPdpFIDLIm#-qIqa~m z!U3nvt1Hysxa`UL72(<-va14-oj*`BNlr&?ou5Kvfvj%lWsn?_jfS&-3PjCz_nxz} z9v;{CD$zzM7X)RG_=|jIEfMG8Z{TLSrh1-kc9|b`Uv`ci=n&jrNsGvhi+?=7#}$II z$o>lgdhQupnuK54FRn=S(O(QY^%$y76{Crjr~2bVVxKUsA4fa0#Nnqs>*2Q*php|I zh5{6+KC~EWr4f4z&4!?V{N;wv2ZxW1PIp~JkyLS;@XxSX*>2|GmBtPj14$C6Ieo+z z8|RcgG5B!!TWzGF#7IeR=r0TCL+tW=3toN#yaz0^sO7V0PRXC4n>t}hCS8&34wH>%bL{3yJ$Y;ef{OI6o`#)snZs_Hw6lX( zufV^$ZNt{epuqQIEoR$=x9(?8*iR}E=Y6}GF68s)B(wgmN+!;{32~;-hY|ySbD09y zm6ME_KyY$Iw9=Q_FU-*{etz*N!s5Z`Fm-%isAyp2$G;#CU?6tsVVUj6H`f!;#SfcZ zN23lg!RPDrJm$^ZHyc7s2MXM*gS{U$h<2R6DnWe!QbE}*&R9tbL%-jeNkT{ehj~J( z`1|Gv)ju~^HjSZx}C zNL67p4DD_5oxKZHzaCQ=UpLhIjb!)X#{I@Nmt`jGkE&RV{?{0%o9g>7HRV*HIZvP@ zc25d}(g%+U?5HuW&Nets&79-O)GzBSwE-?%|0{ugrh9^iRAky;$V=FnBu&;&iy?5+ z#!098#NBDPhIChpd{JS8@1;WO?6Hn-u%1{Q${AFEH=0#5%zdQLYNCA69CpR>W%s$O zRIa0wPiD5ox5J4F1+!5r_OLTb#>Rnzf}y+K7g>3Vq6aEnvxK)S8lSorC(MDmr%LxJhyJ|N18 zk08_%Swb}Mp_8pT#qbcc&eTW|s^k~9)g2vqXv2!s2K~k0QyW(f^iFdebc`-SYnHnl zf>(;S4=`!bNV)8}T>zS@^9+ltm%^%foNw0=*6a2CzD_qD0TTrtCjoknBz8J&O*W>H zT~vMHNbZ+h--06kH}pC2D&hkq{pazY{w>ghO})wzg@hsp1(i`eMm&_62^qy93G0;c z#l=47c;P={JjZe8Iq7%c`OrGhtZCGnjXt-$TDhIDdZvzdF5q!C`;c^*?aiKyN4xj# zx;i2)?zQMs4Ri=vegHN=2KES@swGj|23(UbbIe=G4STrBM+YgF^=ul{;)m@(nJb-h zPfY26H=#+wF4cNq8P`~nyhU8A(6JiU0_5rJ>FM=#>Yug!1r^}V*vay^MXl8jM{OJU zwYM!L`pi;-VohyYPY*f^xkyl1v5IQvB(Z=dVkYV7FI|%KJNM-UYKP?IJU;l!kL=FB zjyQops4;s4ap*P-2ghqqA#B2NlBVN6nU0Ag8h#&)HQyVz+=34uRA4%^?)9kzk4R9f zP#7-(=uoByfN(GuP!c{@g6dPGJ8CeEz(V=#h}sJ5g_F^+2WSDx6pZ>l!3}$|+ciRw zzOzIaENQLoUhfqs>3(c>Yra;fe<-ze<~?!%PKPiCKvLk(b|t%ERbm1YF*Bx;Hf+@ugHHP*y?$3zIW*{@U$80$q`WatmPxD+iM=XC z<~XF-t>U9$!D({Qq-kw-#GwRA0!)5KITtv64cm_U6jwIF8IbNBYibba1q1|! zorGh}m-!KoGRY#>34YWbPXfY!uruRJEb^6`xXmw}r>9#&Kk~$ke&@&EnL@ejMICAJ z-~zwaAH@eIZOK;dN*lF?pVH-PO-$~z3}&-HjC>#XLGJ$tZXfy*T^C1Ue~WU5&FL7C z-=Hya>iT&1>eJms*p8sv_xJLsPsCec!JE)uBkBj^ZNd0?avB;_O=2w~s8HBs3Hq=a zpOn4Vc$0?EmkC9)jU!P$OC$c_SKtrFt_j*;buh)GOXz4v0fHrdc?UeceW99YmGk3x z>CVpb`fhmB;llSnl}Sh@ROeOwF4?&WJ8~&Y61oo6AXBfA{vkYSa35Xw&O@18w8PRe zw3vMC9Hh{HlPp#kp=Cs_MY*C0o|VzlApe4HL}3K?l_UaL>c}}eq5DPISM%X{XB9Tw zf_AS=1}Jc3-mH%_aQvy~>ZVXu zWu?R$md%X=%9(=IA>$`uvP>j>gU9?IsCu$9=!Ziw4s2!u(Pw{@h`(Z5Z8KRdCb0%% zmg5!M7?v^cgm>RQDgM@NXb&qx)ichj)aFGs7?7IIt|}*g`u4_|reI!Of*K>O0tzS{ zs0e6He;mqnd5+(ve(JHY-m39Mjl7?8(;9hYFN@ofT{Pm)`v$2!yZ`)t)Hz8^MKFly zR>CR}iy=P!M-5Nz`y)rMCI_?Ajxt=dDYSpucMBeajG!z6t(K#mP%Ls8LrE{|FM@l_ zY?ay>Dn2lH2e|w%2;>>K+-1vb(_u|(_S{!6A-_VcR28r&z6X@9%uaZ(3l^OM*(bfF zsS+I>tXF#y{MeozU8n882aZPCyuwOOmq{$Nt{@iX=JWK-kxB9Vtj$AqgH@YdCCRc_ z=MDLYme$62FH_DZ_XIXiN(rUd$__Lif*yrkUe%3!sD8WfscS+44X2uw9*@M&4$gc2 z%*b1mZZv>7pCd^UVn92e(;T)brkXMRoWvPNZsHB(w`G>U?=*34t8P6{AD^J+ zoa-!V*RgI*8OktdKfh{i*!9kIsh92IFT4+SoAs7eYGjp!eBBL46F;tj=J%bjhE43nDZqa7{LOgGae1nYC-DPkoN zN*T@t>atj;3CxE))wcEwz1GRfQ#X1u;>6q;`Cde z;D=c}OYGr0|5A8XxK_Qlu=MJ^5ba#WtB4v4ru~Ran-jO~9AYqX3Cbl!vLYJwhQ4vf ze>W6ICwL)8y>Ht}YRK@1Q60U!CVFbu00Own>P+cWR`%$BI-bJjzg$UU?2xWo6rPsU zaF4_4GSFz}Z$ey-G-nL|!xh#4Xyx^P`yAE3(2D#|C2RPn@ch5kcb6~1^~Olr_fGD% zc`)ykzgTAO@f7unO3%8d-MRVR!|a-%Zvh>WwXbucYfimNjmCODl7)OtQB zSBSy>=X@vZUr4_Ex0?9>&YR-Dcx?Yl6X1W==HK7p|Noi~>VH9g{$DW`YAiRP(p}k0 z1E?i*+#wpIBADBy3C)|grs_{u=Z+tl2D!V2EV*3KbhO^b_pN!@C!f6?0>WD-*_dSj zISI$2%?LT6xS5{ZQ#KDXJW5Y#9*GT`c3<)D+mLb_fiMY1Z?yjnXP-t5R)vI7`jULe z!fj=6tZv$|l2-twFAq%?u6e^eKl+^=UrN--W0^DRX?Le{&){vy{LUs@L)XsoZ6*cT zuRxh9V1jv*O;(^gjBCvLJ*K*EOhUaj7r$OD|2eL+Zo>D)|4jGWfq@HgP2>zh)S3e~ z2K}@i1tp|fCid&qghsm@%ie)`LRI`_D7#iyq-cA2Z4&3(CHG*vMS%JS=u z2ltiHFp=XF4`=3{mWk2J#ScuIj`is5%?j$ z$%_bs3`-9~!EY=0hh#qhBqzk-lgLdJD1shWpVq}peX9+Zma23yS*Oq=A~azmx%l-3 znhW<2XsPmihOgbBKW^y-N){D%E2D|hBqic4g!pu;;LN>2#_m97Y=)e#4ZSsM1Mair zE2IV0$+9jooCE`_kd6F`;`%f1Lqk#ki(-ZoED}K9pKxsm3(y?v*^j?9N_;W2-SHs& zP5y1ko>q_@q_v*_19H$hOEMt%mHo`^TI}@n<7iag=V{$zaI?zw^7s4Sj^41$ z@JaS+sgQ2r_PTaXoe%C?O`>zHV)hQhN~PVJY>IQ$*pGOm%0E)^(j1J6@a#fhw@NCL(C(=DJ-jzLw_c-w zk+5l*y$7$ia2qeb;PDBPPW@rS>)D5OR}n0v)UX1a-Bt^Dg0SsXP0J%D_^4 z>#Y-kV1olACVF=M()InWPF{JhzFGSFF^jR6sZNe{5t0D4yub(Dd0FH+qddpdE{MjG zp3$#(J`Ld4_ZQt?6l3iPc=1a9InNr+W~xaaC?l9Ip7!Bd4&laoW4$YJ=BDE{vLD8& zjCi1;G%6aROont>*NdT&Dyk4NS0P%Q0>;#7g@pbtVD0ZpOLOExgBg8^9O*kTQ9kn? z7U=dMm0ox;J@(3T-XS?h&p(k735;I<&1pvC-kie?`wEv8 zXMCGH{LQkS%U=+A&Pl#X@S8JX`L2;+Q?V+VZ>T|wQULE|J9?}RWkG$f0QlSYjrC+% zf+t1yD~!?xDu&-^Azo(o{VA<>??R|MbJ3}P_Pe@o)ORNi=iGDh;I)UdOi`#J_k`jHiF{SOep6zsTb7>nxzo2u}loh0!BNlS@y}67KM?i^X!6((zROhGGhyx)OX=e*2s;uK1NNtlT>~U;lD?b>Dc~ zj^*;Z`;nWB`&c;*tAoESpD(#!9%Hpb5A6*fB$9^0)U= zvxsOSGXe>p)4+RuLj;G(NNN)DzXFdcsM<{+ngMnWj#i!fWh8r?g%%68 zCswVpLU!He)9b}-n)m?+_9qw86Ikl*Id9M)t5BUcDC#T`z=QXp%f1`0Kb!*y7UwO3 z>V4_Cs$j>J{xj*F6s%y~C_&t$V+<>fzl)`0p;)p${w^cembl!HZhZJb@oHx(DZF#2BJJ$z=iwL7c-udn=4Q&kK*`FDY+H55GuV zg4t`9zgc@TxC0;F>y`aJ-_uY~MM)<*dOAL)2u~{+2A;PVu&$o?bK0UI>*?A>Q_+Rf z2A_YBiXhObrI?B>$BSc9B*VA28FCTIfp|g`at$D3HJw$OHn#3+Kj>~#`DV^8jmu#~$OIIX@ zS^sz@GFso=B6TlYSeyH{!EgKEUKBwpts5Zfe+TGb(VdM;l4BL`_u+Rl0-U{fS+oq~ zZdpI^`EK$-?G4N9M@f(uR=_tA0*MYGf@{5wjESE7i^{rjrW3noJQS&5QZcgyItZz|Rj-$HzGjyg&Ug%XoI<%IAIgo@o1&VR;Ga z#h;Y}mFH2Nx3wfl8WZ<%IY~%bLXt7IwfxO5m0BKN9rD+)Jg1L#RUUd_e8e)mC%L4Z z184@&`|u*hV z*Yh|LL?-VkV19X!Vj<1Tn7h>&vQd{7M@{->edDCC`-91RW%u5RsLptQVN(NMDML|O zatIkf6duA7kZMWYu(S0A(e0w&Do$tdr8w>=&$9b?Hl4-a;D2zL^X!wVH!NcelDOJF zG}dZS1T<i4*|(?H{V|S8N@S^TYI;Bbo>MAeR}c?rGn>qjTzv=sC|_R6Ianiua_& z!DuRwa*hb4DVbu63@;PI#9vdN9uq_s5Hk|HHIW~A;*aE`4rsd}RDWDKkGZjNfY4I5 zzml8f6hf5XQ0}Q-oBx2+hQ&HQ8zLx2((@ z^h-pjlCy+W&ANX0oY@{Cj~j5F=wg;eLg#n*ZTVv0P)wkA2N**eciOKM%a zv>xMyiEtH)q@ppUD>Q>=`~~5HtyDp4`=gb34rOB+g7f8DLhmX_h1S&ar?Tul&=Fy! z42zi@mE{ot;F~s)V24Goij6*PhlcpWx$U<0UwGHkwAxuSCHk%|ADQ!7n(&YGI%(ap zV!Ea|L}7UCN;C&e3GYzsihGGxqnNL7E3d1HNj%)Qm8(YuYH!4b1uy%)6?SpHNWZd{ zK{p4*8X1-40iwLF9IFh)n%0waH-DQ1VtBu3zA*yeQfXaqAF^;zJTDgBBTerhRln$V zYf^gJlsbO*F|o;1jwYf*j*jMFsXZ1<+dccGSxToCU+aWd%6gB!Wk|(T7SpH3MzDVO zEvna~J4Q@6s7_$INMjg@lg`p7i5pH`uEpVLMY&Y-x?-ccrlq0TRFn$#wd)?fEZ0+(j;jtU5OIvsd!{COU#s_{;FHt$6AI>xpPsBK^ zz7*7R?<9dFtMRv;%JvIim8OeIyop>dT|OQu(EcJkur%oGkm(8NiWN$PoO}W<2RF0x zo2ZSD!@9sYkrxnWChL~z(b=x8>TN9x*D}t1oUoO;Xi#GSZIzI}CdwwclQw&Rj|4W# zh@{d88g9lzMQhXN=-~HLK-uxX;`-Gfp2V%W_6TO z#>?AxW#swPh061~rUgM+OXwnXye%Q**ur)MpbFLxfdcdrv6wOzP_Qk{_iNfSub^Sz zjV(D>KhbO7_r2xXBNhJGvs9}8M|*D`4R!qgkB%jK*>@UQl3j=}nIv1%pdys1tPv`E zCSwWNw-Cyxh>|ARg|Y9lm3^5ZiA-k1G@7OR>i*8X=ljR^kMBMAe9k@R-us8cafbJ6 z=KXrVp3mpw`FK7aP~~}WnX@6INpp33=Z4xH&v$%$vL z=0zHSH3fjgGbdb`+4<`)P#Jn?K^H8g}5)Jx+~ULChUS5gK0w zKg2XZ*?gcnuLd5x8h-9C;AWxKP%+Wj> zLjFRa2N8A&D}JpiE=W~`MvU`?zWpNO7-eOkNKjCL6(i^O0mokrs<}ToafRO3C8q0Y zs-E}zdeNhA#oC~wvNI+Y@p$cAg|aH*v=#j?gzQ8Y=~}TUBGdz04|L3uVk_x>hB3|M zrdL&jnAQ1Oqgb0? z^gHqcnl6IC_Yv*iV%TTQ?h>|zA4Uk0OJmXR{eBmrWImW1BFEP=OW=GP3;-FbiYu{igh&yceM+qq0tb0WgYiCfzwEejH}i~#sB2*czbj?GCgY@Jtln3q2WYpcjr zv**issHPg!G6AQ*4%w5&k4MIuNcsGQaH4D~UAufWo_~4Hlc&qNU2m%XsN#_|q()!- zY4I^ON>nmE9SGu!2HBF7dxELPWCcPm_JtpB*nz5UhlTn6o4*E$cWc%{R68D5-bgfC z!kM=HPjL=N}0aVn9L$Y=xmWqY;~Xb$7jL!1NlXqPP>sCJ4K;<*Kk0=#B< z0vsI-`VVo*NkM29$@Dll39ua>Qg8=cR}#ufE)=!`za{tb?{^m!M%1+x&j*y}5Am{l z3^Yi4fHEjl0y}}M7AJN*XU-Ptw+8u_3=u%6w<#O7(GIgrXH0+vT*=H^-;$F7c+L* zrFi6RaFgPdYuQV4X1eUf$6Fu9AS64}ADB#oznE&yn9b;_&iMruam_pxpZg8{43sIc zY9BDO?$_)h4y!cxgONNnRR|dYq@=h*$oG)(e;OXCo2dUV(ZHN*i0qj>fi_&l^gFEC z&#j#Pa9y_G3^zXOX~q~o_2{lW0gdokmJG6q$$&NMweSc zL0Uekw^1foVK)(xZP^%;9{M7c#>1>o+rJ0a-Ux!o!OB;^CaorG{zA~Nu$%hP@O;cn zN*{j}y2CyYWeMol_Fn?`8pnng`tJPDpZQ$;#QeD2ZfANY?jjst1vYDNhImxpTz>M; z@u*ose*|0*54lS zoV_r6{NrKwexpCh^cnEZb5t{^$I70wNiuts|3YN28*qYU=Q0OMj2c-+*QTQVDYj}> ziG%UnL)D5k{lwn&tbwqTk~hO;)%M?lKQ9;+1;f8b=l<-qRJhw7wg|7ZAz z{mH|wH88m!zQK{~qWX3o*f73;4PLFE>0HMm=xv?(k9TB^`FaM-u>8;n5 zfrF`xUbX4x*k>SOr{mb#B>m8=bW1Xr=eDgcf9+fNcZK$Z0@xx&q}vhG$StKe#ZcpA~HDHTdUR z$+PO*yqW60>)xUuNBk#JcO)IpN)AQbGgiQ( zUV255O867t{?J$`z~P7?WT~zd^YV^J9N)G1nX26OYiH(u5T#`c4ZOM~Jo%h5XCZv4 zEXWr0ht~4$G-FCADIu49f6fLjID2c{WvNH^Z8k^T@t4L5+UosphVNZKz`nKG^tl-W z+f+C=GWU}JiJPQCSa~i7-)HikgkhPqhjV5ZoE($)5 z_sd2)CgrmcI?ite2RmSKK09-#xo3F!0L7;>+9Vou71{RxRV<(-Ge(VM_V@)dG+MD6 z=q}`rCdg{{h~ms>wSABn{I6G_|Ib&%{nu9% zgNl=dY?q2$2Jkt=!?(<7hRhMB29Dg<*(ym4QTsl}uOFEd0DIybr*~&RHT1Qr&V?&6 zX*2!UDz!{fUpJH`e9o^3&Ea=H3agZJyU|tc$vZyV?BjB2PhR)KWb8PR|Jb&p$htmy zA+{S!6Wv2I6!LJaU`HmScY^;_u+=W$NV2(I`Fn`cRK5UwsAOm9}0uW(Ql($Y_lWel zBE`s*9QC_BYxC`F2-OTVrEd4^dk4bBd-$oxMtiK_H?5hljpmU0v^UhsZUYy~#Phzx zHzrpOGIQlJn+J3xzGkGoZmjfgm)nJtY%*qlw=>nLe&R%TGGM}YUR=EvwX>E-vkwYV zdNDsdMsK@u`}`Vj0spY*V@-}QNYw2mI%0w;4<0+mV0e<-!P?e|w|jv?-+PZ?pX*O^ z7|PejFTPaFtU36p39fTeS+WQ1m`m@Z2RAh|f!#SUGZe773A;%6taQQRYwW<42LQa; zvQYbTzl1*p9=JWb8#Ruuv1lNq>GK=AOytupo@*bHF7mwT?)G&=GQ54GDdvvmMuII!&_j`taN)BV1kn}$_5WQbjO&zgD&g6$opD&}{!h9M_fJZY{}dPMVd`U&nA30~~N zJm)+bgF#!uCayuYB~xv0vZq*Oj4nucVA<<-^r`Euxy!W;VLADz`VQY=Q$ORxgBW&!`_jUC(Y4-N zTC0Ka&50ch%f7b58Roc2%=sOMpX%nkmTT|YFR-LuFMkf9U}Jz%TTGM?qaNl-=BIpM zJm0n2i@X+ekp`T`f`=UxA939Fm&+3IlCWEcs-@TcIRV7;|1eT<-!Zpv`{I8gDJA^h zmMKT5lhCai+O;c-ZoUEwK4Ibx-|y!RACg*fz2z~}z|$qyo2`0YBE;_Q6z=^WCdhj5 zmCz5CaZE_~=xOqD=dy3c*RS91@h^qe3wk=e`)m~#-^SWVm3&KAuAiv#-{qWm zry(?WEcZsbuC)722t+mNy5*6Ct@T1?82lFh{&_EOl)8Jz<96LJ$VuhYLolT`!hazq zIiT3D2P{x&CRhIJYv$zjo@OJ1pUPvfb);kRrszc~xAd<{s3cP(d~^jt7uDN5EJ|u= zgCAypM$}v6yQ=h|K>f&3XIP)*<>QO_`MHY3%cHCx;CRr%-?_VV6dZ#ANQQbH_9PkE z&5iQ_;uJvJnX}t|KsMD)zNzV>wDr+GN4`&+xSo2$-|viaS)<_>O_b^3jQ3HhD8p$J zX;2o_JeaMWR`>ppS^x1i4fa)4Mx|cTLs4f(`?Oqo-A+DTSOExufaJs*NV6*KArFVU zY+IAQRhA?C9;kn4cjs%&=1|S9R+kNZ4at=KD2&9-9c_3?&9?*#NqqOBt-3YItD!Nb zt4}>Z=WS{eyI{Y3n->|xQ?oK)V9pOx0k3TqXW$HDUTHPC*`IUURt=HHTl-7fk+FJr zt^1B^YVN|r=qRm!5DNTjZMXrm{R4W#W_nkYl)|*OOn`3*d;W#I5W-;ssT!Ra>eZ!# zx9HwP{6_>aY%hCW1-To5rMFR9orOznku!Ly(9 zX=?Vt5<=K~A7~wr@_gm{9c0$g4^>?kj-E$8K zJLctphvIL{y^R8SJeU-H4);b1qjb`>e^2YbdH{Rs8L06W63M~6(KPK2{aKCu2Bwh! z>Mn;ep&nQboOSBVn-<<&GHj`@otQF_k_^b0-M%OrW-F0?f=xoPxRyn0>mwPoR#|WX z{V;EQQlS>d;tvBCA00Z5tlCoWGO4uBCnnwDlmiaTN&;Ojajc0<)QCYbA8Ba=Jdd)& zg$EuU{|kA0BdDi??KOvbU!O&m-!I^*tU?pofV1N~M-z*cNKe(2`X0WNx=;giXGFBP z!;AZ%!Pg{3gZhQuP~P=X&XT8-ae;_A*LJsK!+67P-7oyCiqH}vm$ft= z&Z@OvXMn-km&*-W9S;;%FWb3jx%Cd|KCpaacE>|@x zy?R!*xNy#c_R&55N!(uV@#QkK$-hh_!DySWpndN>0Ei! zm6euOLx7{!h500-*Vz1?LR`>o|a zM!w9KK5XP4Ox`198*|XDIKra1WsHWuI^EKpU7-89!^zbTEtkyg+b`Qe*h;X`Calc4 zD7p8H;udb2Bue)43(}|7w~GsNn{M*0O7ib*hGw6MvFf0DEs+4l$`nc1pWHQO@>+v} z`1MQl%s5CfOT+5+GnBU+mVvmlr|&31&oPsZO4@8Zi-)dU~{>V<=wW|rkmxaxFRuCw(@7s6NUm3 zu$f3UKp>4Nj!00T3X&~;g{A2EpvMA~0x@`?*WM|*FTS=nUoc198+>5^Oj=@;eoBv z6Z~k9N^KU~sLFn6sC!IK^aT-?XgC}k3JIlXPm~RGcN*N7vA|<&!#I3>Ge=SxXe_qh z&nxm1ZJE{=&l)6`IQ$T)e#{B+z7^W{d3uw7Yn7(R9Bt0nLK8r)p3|l8QkGX$HtU<8 z26G?D+xaGv`|X#LkQ!^xrApQcR<>mNzfX1r-M>ou&1~7@XGk81T40Xgb^_X1z@r-k zfanRU3N~^OlV^JMk%=f3(Sfc8xucb^`Pk70gWvk0%kr?BH}l4e*HB;bEC-A+ell5w zj4tXa60QqhXN(Vi{1%Cn;;Wt#msi0;<{9bQH$|$JZ|je9aLy)*%a;9kc3BAow^6%99R}Z>Oj;1bX>E zk_AFlqu7zw(_qH0dN{BvCp{WJ2OckYUetl{b4iExDSZ(t6g z{cQq;sW(OH8b8<0%en=s`;QEcs9RisCq3k`{&^3fSjYe%NNi9hu!e!SGf`#W1{GA2 z&6vC}OZtg~v{3}#EU2$ow~w*?L4BSc@r?T+`h+6JCT){DIZJt=eMcH z!srEI-TX98EHfjy?I2jn5%hpId6W?kkTg?l3)s5P-?&mVsnH_dOr=e<$|^Pn{O*yi z#qzFGrl8XK^Wd+)HOM_Atv0fmO2dpMx-i;qHi8126%7g613^v|>z^|7|gD zGOuV*^iyL|zPgDo{oUE<#x$!NJ6}J^M z`RnN|>y1Fnn)A%qE}HLqeX$IQ@xgg!rZdyELtrkeM=IXjeg*;&-A}>;u#q#KDa)K% zCQZ1ACwZpW#{PL=F>jf^pk}Gjo~dWCI&2tk^ORppDas*y0e!md5FJU<>O&+phjn8U z^Q)I`{dwryt;R|kh<1}aP0O~pAc>fM&H*G(mWjs+83`C6(Dn7UuJe!VaZ_te7o8-2 z<)PH03b5XU`aU6!>s$vYPX$=y*nUBoym5aa@|@A=ts{{jGkl311Ed(Ba#~Q{wM;vJ zWv1Sz?}8nNR{=m28ZXwzX4fhSBtYz^zZ=knH+|L_sclf0LZXDgnu0bncpA!ByRULn6v`wdJ5Qf;4ib>juWI?laJ>3zut6tRi>Y#aIHH>b+(63 z^XBC{r-k(1dN4^m{EMPQOg%1lRy=@C^m|_eOe} zhXq(!z15OGfz^Pf?!c=}DCPJPdgD}8AiCo^O69={e3oU;_}HBJ_=zI5jnLT8UPsrP zeX=o6?Kp>U|GOFq9RGW3`Q>*sZyWnnYI^4%>uuzf45!#>;;7g*0*NK*vo+0 zSge)O+?$Gm->6u)+V)IlVmteqiGkX|?w7ug0AM$tCw-^m(G9n10^)GEbYI@YQn<8B z_0;nQ|LoSZZYQ2J#?*0L$b&RUNHC~jMgGHI=>Nu~@juf$iKDbKGSPvUx$P|@t>lnE z_KoJFIk23$xj96|>#*yWJqn5^Y(EL${X0{i9tyJD%A)BN@2X4m#r!9z|a;m3g-jhvQc6aAUilUQyW(O13T&)a!T!b4wQW=sWuZ);HqpJ zc=G$E^;*(*ZO*O04sn2mEc0Epq}jj|(-vP?$bTGLYf@2tfzh{#Z%RHQeTTpQ+)mVY zh*OKxCuH4pbV~>sPVpsYL`O*t)VO}${hd4$xIahDDr_0bGSd; z3!aLWhJ$A2qr)c{NigdWbeZ#}@VP&~=R77+13yc3T5B&><~@B5|6y|ZI-5*@?NTG$ z3rkfG$Xxy&B|r5u6aaMD7GNfH`3Gj*EsS=Ux#XtFh}R=p;=OxDITnUIDeE6S3~O=q z1z&_tj}LzIa1cBfYZr%k5({Cl#4&lL@d&YnzUL?JXks4Md&Gq*?QUvl( z0mdXsv&_~R;p^Mzi3%a)9R8uOC+znA5`Zmwp44<`UqVgRj6ytU% zAap{h%CBl@mwXG+-P;M`Q&VnA@9SwYBsVdufD%XV3?cbyYr7hj+#@lN-1*V+s84XN zCc0l>DAC+6u+&ebyuPR;JZd3P%D=R#>1)AG*i|cT&Zp7CT*zGEU886?Uz=!D zC$s^cUK^>E2(QIo+!mf`-!lHWE%s$GOK{2I(eoE4jZ>ST;oc``Yaj}^G3Velfy6Ap zMmdj?CM94Ak6IK2!G!6aWWeO5mb|4C3)8H;P7SBM7O0Q-2vOe$r;pIYm_wKb@vW>I zS~1zQj>3k769-MTG~FZ9%!t}~+%ls9o#P@k7I7f2SMij2cL9CXr{s`dubTjv1E(v$ z4p-=@ABuS8J2l~Q?abm~l}nX;>82g{bknav$WdrFj;hEHYU4zvK3X(tq*x*L+k_;2 zNLXVy&nUMMmfD}Jpkx&2GDMnMquZwbk9%LUejN;9LJ|mO-v{zZ~;@HKAG#P^2gdQ zUxq+VvSRH|AGmiImeW@q?(h?lpuO>13eQcsvS+Q7F!t;RZaCb*nnU>RwTgQ)oNS6A zyLOzv0ILyH1RW>g{s2cNe503zV80tZDAD8y)z>aLZ1VC& z^htD5n`j$(xhqhBs&M|JzPQn+f-rZ{iFNH$-C9EZ$3NM<4*=lsUkHK^R` zD)_UBBFd)qL&GGYy0G`;+wxm!gxt&o1z%!XXu$sa`FeWe0JPzM35Rfk9GRFzs-cWi zmtXinc})b7)Qi@nJZwOw?Wx^{%GXw@FF&R3-?8C&`4kJOc3VA)GiU?u^*ZQ4^z3>s z8j!9P$@%fYt+pkRPC1{N3*01mYAiN1iw8|TE>6lto3cCYnt}U<7$S`_dD#@*Tu^4D zOWw30Ht)SjE{57yZ1OV67Gx3zeIvfh7pdc9{xef~;`^dMx7qI`C`2T%zqhh~#N2#$ zN5}7vC>VFwr;_nnbEQ$`+!3Kq61!Rt*^hqv5J>riDh?N}9g$XN&+xnnn82(mhN>&S1Hw=Oa5`h z&Tx_Yq-4MW^qI$CGj@;x6YB)|V;5Rn4=WWl{O5Dn@w5K#(kQP(9VoIEVtl_f=cU6V zAdf`{*s~fS!ZdT50qO*`fP^Ew@mZEYU52O7EyM0Atr53MpJc7^z*0iw{A0Co$L311 zN9uxVBZ(jwtpj$wAH`w{?mPOhUaajvy6{RG2ifa`?#VPaNm9O`I1F;y*>GEettu)3 zjDZG{A_=cjNI`^v7*azQ{z3kk56RL5WzK7c_M|^(ks?VVssw&>*2+JXtOhq({Z+xwnPytSf{NsaQUSkM8Uqta{V1eS+ zK|`jrk1o}ytJAUhVHoA!f^AfDir!s4_mTNPMH^-{_54@&V!+yCl@{Z;GlFjW&+I>vi$gZ;+Ob>(_6PGl)`ZG+l8%e@ zTQ1kFTc}TYy|+=ZUP;$FyFVA2*;2)w=A|hqye=t?23_sTu#m_6o9cihE@lrXw{bVi0_+B^R45JJ@ zgYMvbwSwQ+7G_Qr9*$puCj54sV#)z7;WLS3V&Tbm{_At+_M`QuEzZAl;tcXgR|Cns zCw7KLX|Hc2)s+)XS8u}9@JaF*>#^YdmSJ0~yy&MmmOP z7n`=sNe6b&SLWoW)jb=WZe+=n1|(cXPa2+S{Oz*u$K;--sro@b;5sC01EtM{!i!v6 zB3sP{@5-4Y7CQt&2(cwv0L_kn0M_8xQd9fcUuklWD?41-BHik%UH|tkzPIIav4^-| zg2?2nXJ&cJ08{r>kO|KJVztWxH{=h-i!ittm-{ck$2NZkEk`?iGt>3cAL z2sS*R+$7=Haroz$y80Ms7d+KzNtz@c@0lfSFJ*=Z3JI+UYT(-{V_}Jnc%+*)U!6=_ zCOEY-I4MC?RTFXgp}Kg_>V^VhA!FWu1Ieu7^pDZ+2KZ7d{pOE`y{wGdlpS$dVYb$_o{yE?*|uJ~X<- zAL#ApeYfJk+0cdIVM+jC=nV8rut9<_IGo2QThuh17p7ZE*QKt)3R{)2pVj=BZGGu?7QphMm9iTbTHJgBkY00?fLyA)t#Iv@^ z?##JpQWtyWbHd}SOn_R9fg55MHMh(M^O}Zpp{zZ!^5@0MJS!08!wR3wtlm8T{O66{ zlT+N-jaSJKdJiKLU`-ft92d;T#4u{iO+Kf02{z-v{VmV+(d`|ceCq&sr25D3Lmg*g zPhX{fS%)@QZbi`Cqo)7tz03Ej2)DE)eM$hcck9E}oAy@TTd`b`8|gvCTvKzh!J(%-Mv0+fQ(?;_gf^t1!RhK?=0 zgm4f^-bB#%W5y+uNZy z5aW91Y#WH{uTYA06W)L{d3d{rWb?IkIA>FSgqWwqt$U#1PyksRZVi#K{kdT8dyo1Cn_e7XIYFl7N%VfU6JBgcSwGhs;q z)JTX_49f|v1f*o%;ONEa-5)7xt7Whn*Rwkbm$JnQZf4b^unG3RSPf+`J^V4tBm_v+ zyxRjMe4Dj8N5waaWhUw}-@9#U4<2>Cab{jtE?X?5z)ZiBRVPo*isdNo1rLytbO*L& z!a?m&F`V}Z8$r*gb&jzcITzvGP%w6#mlR#RoAqT;AQ8>D_V=E5ew4==T>2Xs{po zaIq+5Kb`QQzCX#L`4<~Cf83V2X&zIiq41G0RX`fX9lLxW?!F~4pC(0xQrby+Qkpe} z6LljR1-py3!o$bK`wC=|a+gJ(NJ_E`NF3&Zdw`M2;l6krJ|EsN-Gvky@FPw4h~3w7 z$d-nAc(M#tD*?Q>t=^uex{xm)xbdBCLP5|clW?%&;`-8jw*JVr)SMK{! z8ICho6RsQduLt4pyp&_?nL|*Yb22`R68;`9to(Mo&CX4^Hz) zmV~NzjkK#rnUAZf~Z)ij4qm#}VJ{4^+H6(@=2kH`KMu~p**tKG?O z9WSYOsdHUFtrYre1~^cO_if67Qp-6>#B+-m~9fDZW4ZA8o6gwHud_ z?Y)}OqFeZ92R6 z_U8R<;fS*XX?6ehOatXAqY>wk!j0m~U*h}Ha=gNqJ0eJW!54b#;d`rZk)dLb$_n)) zEcb1Wt=$XyQ?RPV6&~5g6#m>IxH9kAGI_%9mWJl@fDDm0mlG`I@FYvQ9m98d08R<~ z$ARsT14B=6at*bTa+@q8uAe#j?Mgv_NA>5bj;~&4?Il$v?>Fa6`+PsOPRO7O8&Don z3syAX;4nv_fxxeVZI)Mxr46leQbpF$+H3Cp>t2a&$tNBh7e}JLg4|e?yi5?C?gqW` zZuo%~ZkUVorqRU>%K4PFE}n-xS-GjGYSF{42ROMduxucoVMw-GGw!Q_TTQE2LBQCm z`cf+3D|V)nSIs)L(^2Rx#^)a7GLOg`w!HoMBazH*P|J$^=f9x;jVs-M0%-fM|C3Gp zDrq?pW$Q`_ | ✓ | ✓ | - | - | ✓ | ++-----------------------------------+----------+-------------+------+-------------------------+------------------+ +| Kosmos-1, Kosmos-2 | WIP | WIP | - | - | WIP | ++-----------------------------------+----------+-------------+------+-------------------------+------------------+ + +Spotlight Models +----------------- + +LLaVA: Visual Instruction Tuning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +LLaVA :cite:`mm-models-llava` focuses on creating a dataset for visual instruction tuning to enhance LLMs' ability to comprehend diverse instructions and provide detailed responses. NeMo's implementation of LLaVA is called NeVA. + +- Model Structure: + - Visual Encoder: Utilizes CLIP’s ViT-L/14. + - Text Decoder: Employs LLaMA. + - Connection: A simple linear mapping layer connects the visual encoder's output to the text decoder's word embedding space (v1.0 version). + +- Training: + 1. Cross-modal Pre-training: Utilizes 595k image-text data from CC3M, training only the linear mapping layer while keeping the visual encoder and text decoder frozen. + 2. Instruction Fine-tuning: Custom-built 158k multimodal instruction dataset employed for fine-tuning targeting multimodal chatbot scenarios, with a variant targeting the Science QA dataset. + +Flamingo: A Visual Language Model for Few-Shot Learning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Flamingo :cite:`mm-models-flamingo` addresses inconsistent visual feature map sizes by generating fixed-length feature sequences, enhancing visual relevance generation. + +- Model Structure: + - Resampler: Utilizes a Perceiver Resampler for generating fixed-length feature sequences. + - Attention: Adds cross-attention layers before each LLM layer to enhance visual relevance generation. + +- Training: + - Dataset: Utilizes data from various datasets like M3W, ALIGN, LTIP, and VTP emphasizing multimodal in-context learning. + +Kosmos-1: Language Is Not All You Need: Aligning Perception with Language Models +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Kosmos-1 :cite:`mm-models-kosmos1` by Microsoft is a Multimodal Large Language Model (MLLM) aimed at melding language, perception, action, and world modeling. + +- Model Structure: + - Core Backbone: Transformer-Based Causal Language Model. + - Architecture: Utilizes MAGNETO, a nuanced Transformer variant. + - Position Encoding: Employs XPOS relative position encoding for long-context modeling. + - Resampler: Employs Flamingo's Perceiver Resampler + +- Training: + - Dataset: Encompasses web-scale multimodal corpora including monomodal, cross-modal paired, and interleaved multimodal data. + - Objective: Focused on next-token prediction to maximize log-likelihood of tokens within examples. + +BLIP-2: Bootstrapping Language-Image Pre-training +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +BLIP-2 :cite:`mm-models-blip2` adopts a two-phase training strategy focusing on learning key visual information and adapting visual encoding structure to LLMs. + +- Model Structure: + - Visual Encoder: Combines a pre-trained image encoder with a Querying Transformer (Q-Former). + - Bridging: The Q-Former acts as the bridge between the image encoder and the Large Language Model (LLM). + +- Training: + 1. Phase 1: Focuses on tasks like Image-Text Contrastive Learning, Image-grounded Text Generation, and Image-Text Matching. + 2. Phase 2: Aims at adapting the visual encoding structure's output to LLMs with language modeling as the training task. + +Mini-GPT4: Enhancing Vision-Language Understanding +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Mini-GPT4 :cite:`mm-models-minigpt4` emphasizes the importance of multimodal instruction data for model performance in multimodal open-ended scenarios. + +- Model Structure: + - Visual Encoder: Employs BLIP2’s ViT and Q-Former. + - Text Decoder: Uses Vicuna (a fine-tuned version of LLaMA). + - Connection: A linear mapping layer projects visual features into text representation space. + +- Training: + 1. Cross-modal Learning: Focuses on learning the relationship between vision and language using data from CC+SBU+LAION datasets. + 2. Fine-tuning: Utilizes a multimodal fine-tuning dataset built using ChatGPT to enhance text descriptions generated in phase 1. + +.. note:: + NeMo Megatron has an Enterprise edition which proffers tools for data preprocessing, hyperparameter tuning, containers, scripts for various clouds, and more. With the Enterprise edition, you also garner deployment tools. Apply for `early access here `_ . + +For more information, see additional sections in the NeMo multimodal language model docs on the left-hand-side menu or in the list below: + +.. toctree:: + :maxdepth: 1 + + datasets + configs + checkpoint + neva + +References +---------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- \ No newline at end of file diff --git a/docs/source/multimodal/mllm/neva.rst b/docs/source/multimodal/mllm/neva.rst new file mode 100644 index 000000000000..9419f15eab92 --- /dev/null +++ b/docs/source/multimodal/mllm/neva.rst @@ -0,0 +1,160 @@ +NeVA +==== + +Model Introduction +------------------ + +Emerging from the roots of LLaVA (Large Language and Vision Assistant) :cite:`mm-models-llava`, NeVA stands as a pioneering model in the NeMo Multimodal ecosystem. It adeptly fuses large language-centric models, such as NVGPT or LLaMA, with a vision encoder. The training utilizes machine-generated multimodal language-image instruction-following data. Remarkably, even with a limited dataset, NeVA exhibits profound capabilities in deciphering images and adeptly answering queries about them. Its prowess is especially evident in tasks requiring intricate visual comprehension and instruction-following. Intriguingly, NeVA mirrors the capabilities of advanced multimodal models like GPT-4, even when faced with novel images and instructions. + +Building upon LLaVA's foundational principles, NeVA amplifies its training efficiency by harnessing the NeMo LLM framework's features, including model parallelism, activation checkpointing, AMP O2, Flash Attention, and more. + + .. image:: images/llava_arch.jpg + :align: center + :alt: LLaVA model + :scale: 30% + + +Main Language Model +^^^^^^^^^^^^^^^ + +The original LLaVA model incorporates the LLaMA architecture, renowned for its prowess in open-source, language-only instruction-tuning endeavors. LLaMA refines textual input through a process of tokenization and embedding. To these token embeddings, positional embeddings are integrated, and the combined representation is channeled through multiple transformer layers. The output from the concluding transformer layer, associated with the primary token, is designated as the text representation. + +In NeMo, the text encoder is anchored in the :class:`~nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel` class. This class is versatile, supporting not only NVGPT models but also LLaMA, LLaMA-2 and other community models, complete with a checkpoint conversion script. Concurrently, the vision model and projection layers enhance the primary language model's word embedding component. For a comprehensive understanding of the implementation, one can refer to the :class:`~nemo.collections.multimodal.models.neva.neva_model.MegatronNevaModel` class. + + +Vision Model +^^^^^^^^^^ + +For visual interpretation, NeVA harnesses the power of the pre-trained CLIP visual encoder, ViT-L/14, recognized for its visual comprehension acumen. Images are first partitioned into standardized patches, for instance, 16x16 pixels. These patches are linearly embedded, forming a flattened vector that subsequently feeds into the transformer. The culmination of the transformer's processing is a unified image representation. In the NeMo framework, the NeVA vision model, anchored on the CLIP visual encoder ViT-L/14, can either be instantiated via the :class:`~nemo.collections.multimodal.models.clip.megatron_clip_models.CLIPVisionTransformer` class or initiated through the `transformers` package from Hugging Face. + +Projection and Integration +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The encoder retrieves visual features from images and intertwines them with language embeddings using a modifiable projection matrix. This intricate projection translates visual cues into language embedding tokens, seamlessly merging text and imagery. LLaVA-1.5 :cite:`mm-models-liu2023improvedllava` introduces two pivotal enhancements. The integration of an MLP vision-language connector amplifies the system's prowess. Building on the triumphs of MLPs in self-supervised learning, LLaVA-1.5 undergoes a transformative design shift. Transitioning from a linear to a dual-layer MLP projection markedly bolsters LLaVA-1.5's multimodal faculties, empowering the model to adeptly navigate and synergize language and visual elements. + +Architecture Table +------------------ + ++------------------+---------------+------------+--------------------+-----------------+------------+----------------+--------------------------+ +| Base LLM | Vision Encoder| Projection | Encoder Seq Length | Number of Layers| Hidden Size| FFN Hidden Size| Number of Attention Heads| ++==================+===============+============+====================+=================+============+================+==========================+ +| LLaMA-2-13B-Chat | CLIP-L | Linear | 4096 | 40 | 5120 | 13824 | 40 | ++------------------+---------------+------------+--------------------+-----------------+------------+----------------+--------------------------+ +| LLaMA-2-7B-Chat | CLIP-L | Linear | 4096 | 32 | 4096 | 11008 | 32 | ++------------------+---------------+------------+--------------------+-----------------+------------+----------------+--------------------------+ + +Model Configuration +------------------ + +Multimodal Configuration +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + mm_cfg: + use_im_start_end: False + +- ``use_im_start_end``: If set to `True`, image start and end tokens will be used before and after image embeddings. + +Language Model Configuration within Multimodal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + mm_cfg: + llm: + from_pretrained: ${data_dir}/neva/checkpoints/llama-2-13b-chat-tp8.nemo + freeze: False + model_type: llama_2 + +- ``from_pretrained``: Path to the pretrained NeMo language model checkpoint. +- ``freeze``: If set to `True`, the model parameters will not be updated during training. +- ``model_type``: Specifies the type of model, either `nvgpt` or `llama_2`. + +Vision Encoder Configuration within Multimodal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + mm_cfg: + vision_encoder: + from_pretrained: "openai/clip-vit-large-patch14" + from_hf: True + patch_dim: 14 + hidden_size: 1024 + vision_select_layer: -2 + class_token_length: 1 + freeze: True + +- ``from_pretrained``: Path or name of the pretrained vision encoder. +- ``from_hf``: If set to `True`, the model will be loaded from the Hugging Face model hub. +- ``patch_dim``: Size of the patches the image is divided into. +- ``hidden_size``: Dimensionality of the hidden layers. +- ``vision_select_layer``: Specifies which layer to select from the vision model. +- ``class_token_length``: Length of the classification token. + +Main Language Model Configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + mcore_gpt: False + encoder_seq_length: 4096 + position_embedding_type: rope + num_layers: 40 + hidden_size: 5120 + ffn_hidden_size: 13824 + num_attention_heads: 40 + hidden_dropout: 0.0 + attention_dropout: 0.0 + ffn_dropout: 0.0 + normalization: rmsnorm + bias: False + activation: 'fast-swiglu' + +- ``mcore_gpt``: If set to `True`, the GPTModel from `megatron.core` will be used. +- ``encoder_seq_length``: Sequence length for the main language model encoder. +- ``position_embedding_type``: Type of position embedding used. +- ``num_layers``, ``hidden_size``, ``ffn_hidden_size``, ``num_attention_heads``: Parameters defining the architecture of the main language model. The ``ffn_hidden_size`` is typically 4 times the ``hidden_size``. +- ``hidden_dropout``, ``attention_dropout``, ``ffn_dropout``: Dropout probabilities for the hidden state, attention, and feed-forward layers in the transformer respectively. +- ``normalization``: Type of normalization layers used. +- ``bias``: If set to `True`, bias terms will be used in all weight matrices. +- ``activation``: Activation function used in the model. + +Optimizations +^^^^^^^^^^^^^^ + ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Feature | Description | To Enable | ++====================================+=========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+==================================================================================================================================================================================================================+ +| Data parallelism | Dataset is read concurrently across multiple GPUs or nodes, allowing for faster data loading and processing. | Automatically when training on multi GPUs/nodes | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Tensor parallelism | Each tensor is split up into multiple chunks, allowing for horizontal parallelism across GPUs. This technique, known as TensorParallel (TP), distributes the model's tensors across multiple GPUs. During processing, each shard gets processed separately and in parallel on different GPUs, and the results are synced at the end of the step. This approach is inspired by NVIDIA's Megatron implementation. [Reference](https://github.com/NVIDIA/Megatron-LM#distributed-pretraining) | ``model.tensor_model_parallel_size={parallel_size}`` | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Activation Checkpointing | To reduce memory usage, activations of certain layers are cleared and recomputed during a backward pass. This technique is particularly useful for training large models that wouldn't fit in GPU memory using traditional methods. | ``model.activations_checkpoint_granularity=full``, ``model.activations_checkpoint_method=block``, ``model.activations_checkpoint_num_layers={num_layers_to_check}`` | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Selective Activation Checkpointing | Selective granularity version of activation checkpointing. See our paper for details. [Reference](https://arxiv.org/pdf/2205.05198.pdf) | ``model.activations_checkpoint_granularity=selective``, ``model.activations_checkpoint_method=uniform`` | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Bfloat16 Training | Training is conducted in Bfloat16 precision, which offers a balance between the higher precision of FP32 and the memory savings and speed of FP16. | ``trainer.precision=bf16`` | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| BF16 O2 | Enables O2-level automatic mixed precision, optimizing Bfloat16 precision for better performance. | ``model.megatron_amp_O2=True`` | ++------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Flash Attention V2 | FlashAttention is a fast and memory-efficient algorithm to compute exact attention. It speeds up model training and reduces memory requirement by being IO-aware. This approach is particularly useful for large-scale models and is detailed further in the repository linked. [Reference](https://github.com/Dao-AILab/flash-attention) | ``model.use_flash_attention=True`` | ++----------------------------------- +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +NeVA Training +-------------- + +NeVA's training encompasses two essential stages that enhance its capacity to comprehend user instructions, understand both language and visual content, and generate accurate responses: + +1. Pre-training for Feature Alignment: In this initial stage, NeVA aligns visual and language features to ensure compatibility. +2. Fine-tuning End-to-End: The second training stage focuses on fine-tuning the entire model, end-to-end. While the visual encoder's weights remain unchanged, both the projection layer's pre-trained weights and the LLM's parameters become subjects of adaptation. This fine-tuning can be tailored to different application scenarios, yielding versatile capabilities. + +References +---------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- \ No newline at end of file diff --git a/docs/source/multimodal/mm_all.bib b/docs/source/multimodal/mm_all.bib new file mode 100644 index 000000000000..3930484d71e5 --- /dev/null +++ b/docs/source/multimodal/mm_all.bib @@ -0,0 +1,206 @@ +# Imagen +@inproceedings{saharia2022photorealistic, + title={Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding}, + author={Chitwan Saharia and William Chan and Saurabh Saxena and Lala Li and Jay Whang and Emily Denton and Seyed Kamyar Seyed Ghasemipour and Burcu Karagol Ayan and S. Sara Mahdavi and Rapha Gontijo Lopes and Tim Salimans and Jonathan Ho and David J Fleet and Mohammad Norouzi}, + booktitle={Conference on Neural Information Processing Systems (NeurIPS)}, + year={2022}, + doi={10.48550/arXiv.2205.11487} +} + +# DDPM +@misc{ho2020denoising, + title={Denoising Diffusion Probabilistic Models}, + author={Jonathan Ho and Ajay Jain and Pieter Abbeel}, + year={2020}, + eprint={2006.11239}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} + +# EDM +@misc{karras2022elucidating, + title={Elucidating the Design Space of Diffusion-Based Generative Models}, + author={Tero Karras and Miika Aittala and Timo Aila and Samuli Laine}, + year={2022}, + eprint={2206.00364}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# Make-A-Scene +@misc{gafni2022makeascene, + title={Make-A-Scene: Scene-Based Text-to-Image Generation with Human Priors}, + author={Oran Gafni and Adam Polyak and Oron Ashual and Shelly Sheynin and Devi Parikh and Yaniv Taigman}, + year={2022}, + eprint={2203.13131}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# Stable Diffusion +@misc{rombach2022highresolution, + title={High-Resolution Image Synthesis with Latent Diffusion Models}, + author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer}, + year={2022}, + eprint={2112.10752}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# Parti +@misc{yu2022scaling, + title={Scaling Autoregressive Models for Content-Rich Text-to-Image Generation}, + author={Jiahui Yu and Yuanzhong Xu and Jing Yu Koh and Thang Luong and Gunjan Baid and Zirui Wang and Vijay Vasudevan and Alexander Ku and Yinfei Yang and Burcu Karagol Ayan and Ben Hutchinson and Wei Han and Zarana Parekh and Xin Li and Han Zhang and Jason Baldridge and Yonghui Wu}, + year={2022}, + eprint={2206.10789}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# MUSE +@misc{chang2023muse, + title={Muse: Text-To-Image Generation via Masked Generative Transformers}, + author={Huiwen Chang and Han Zhang and Jarred Barber and AJ Maschinot and Jose Lezama and Lu Jiang and Ming-Hsuan Yang and Kevin Murphy and William T. Freeman and Michael Rubinstein and Yuanzhen Li and Dilip Krishnan}, + year={2023}, + eprint={2301.00704}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# Ins P2P +@misc{insp2p, + Author = {Tim Brooks and Aleksander Holynski and Alexei A. Efros}, + Title = {InstructPix2Pix: Learning to Follow Image Editing Instructions}, + Year = {2022}, + Eprint = {arXiv:2211.09800}, +} + +# Dream Booth +@misc{ruiz2023dreambooth, + title={DreamBooth: Fine Tuning Text-to-Image Diffusion Models for Subject-Driven Generation}, + author={Nataniel Ruiz and Yuanzhen Li and Varun Jampani and Yael Pritch and Michael Rubinstein and Kfir Aberman}, + year={2023}, + eprint={2208.12242}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# Control Net +@misc{zhang2023adding, + title={Adding Conditional Control to Text-to-Image Diffusion Models}, + author={Lvmin Zhang and Anyi Rao and Maneesh Agrawala}, + year={2023}, + eprint={2302.05543}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# LLAva +@misc{llava, + Author = {Haotian Liu and Chunyuan Li and Qingyang Wu and Yong Jae Lee}, + Title = {Visual Instruction Tuning}, + Year = {2023}, + Eprint = {arXiv:2304.08485}, +} + +@misc{liu2023improvedllava, + title={Improved Baselines with Visual Instruction Tuning}, + author={Liu, Haotian and Li, Chunyuan and Li, Yuheng and Lee, Yong Jae}, + publisher={arXiv:2310.03744}, + year={2023}, +} + +@misc{minigpt4, + Author = {Deyao Zhu and Jun Chen and Xiaoqian Shen and Xiang Li and Mohamed Elhoseiny}, + Title = {MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models}, + Year = {2023}, + Eprint = {arXiv:2304.10592}, +} + +@misc{flamingo, + Author = {Jean-Baptiste Alayrac and Jeff Donahue and Pauline Luc and Antoine Miech and Iain Barr and Yana Hasson and Karel Lenc and Arthur Mensch and Katie Millican and Malcolm Reynolds and Roman Ring and Eliza Rutherford and Serkan Cabi and Tengda Han and Zhitao Gong and Sina Samangooei and Marianne Monteiro and Jacob Menick and Sebastian Borgeaud and Andrew Brock and Aida Nematzadeh and Sahand Sharifzadeh and Mikolaj Binkowski and Ricardo Barreira and Oriol Vinyals and Andrew Zisserman and Karen Simonyan}, + Title = {Flamingo: a Visual Language Model for Few-Shot Learning}, + Year = {2022}, + Eprint = {arXiv:2204.14198}, +} + +@misc{blip2, + Author = {Junnan Li and Dongxu Li and Silvio Savarese and Steven Hoi}, + Title = {BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models}, + Year = {2023}, + Eprint = {arXiv:2301.12597}, +} + +@misc{kosmos1, + Author = {Shaohan Huang and Li Dong and Wenhui Wang and Yaru Hao and Saksham Singhal and Shuming Ma and Tengchao Lv and Lei Cui and Owais Khan Mohammed and Barun Patra and Qiang Liu and Kriti Aggarwal and Zewen Chi and Johan Bjorck and Vishrav Chaudhary and Subhojit Som and Xia Song and Furu Wei}, + Title = {Language Is Not All You Need: Aligning Perception with Language Models}, + Year = {2023}, + Eprint = {arXiv:2302.14045}, +} + +# DECLIP +@misc{li2021declip, + title={Supervision Exists Everywhere: A Data Efficient Contrastive Language-Image Pre-training Paradigm}, + author={Yangguang Li and Feng Liang and Lichen Zhao and Yufeng Cui and Wanli Ouyang and Jing Shao and Fengwei Yu and Junjie Yan}, + year={2021}, + eprint={2110.05208}, + archivePrefix={arXiv}, + primaryClass={cs.CV}, + url={https://ar5iv.org/abs/2110.05208} +} + +# CLIP +@misc{radford2021learning, + title={Learning Transferable Visual Models From Natural Language Supervision}, + author={Alec Radford and Jong Wook Kim and Chris Hallacy and Aditya Ramesh and Gabriel Goh and Sandhini Agarwal and Girish Sastry and Amanda Askell and Pamela Mishkin and Jack Clark and Gretchen Krueger and Ilya Sutskever}, + year={2021}, + eprint={2103.00020}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +# FLAVA +@inproceedings{singh2022flava, + title={FLAVA: A Foundational Language And Vision Alignment Model}, + author={Amanpreet Singh and Ronghang Hu and Vedanuj Goswami and Guillaume Couairon and Wojciech Galuba and Marcus Rohrbach and Douwe Kiela}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + pages={15638--15650}, + year={2022} +} + +# ControlNet GITHUB +@misc{controlnetgithub, + title={Lllyasviel/controlnet}, + url={https://github.com/lllyasviel/ControlNet}, + journal={GitHub}, + author={Lllyasviel, Zhang}, + year={2023} +} + +#DreamBooth Github +@misc{dreamboothdataset, + title={DreamBooth}, + url={https://github.com/google/dreambooth/tree/main/dataset}, + journal={GitHub}, + author={Google}, + year={2023} +} + +#DreamBooth Paper +@misc{dreamboothpaper, + title={DreamBooth: Fine Tuning Text-to-Image Diffusion Models +for Subject-Driven Generation}, + url={https://arxiv.org/abs/2208.12242}, + author={Nataniel Ruiz and Yuanzhen Li and Varun Jampani and Yael Pritch and Michael Rubinstein and Kfir Aberman}, + year={2022}, + archivePrefix={arXiv} +} + +# DreamFusion paper +@misc{poole2022dreamfusion, + title={DreamFusion: Text-to-3D using 2D Diffusion}, + url={https://arxiv.org/abs/2209.14988}, + author={Poole, Ben and Jain, Ajay and Barron, Jonathan T. and Mildenhall, Ben}, + year={2022}, + archivePrefix={arXiv}, +} \ No newline at end of file diff --git a/docs/source/multimodal/nerf/configs.rst b/docs/source/multimodal/nerf/configs.rst new file mode 100644 index 000000000000..96dac7694f6d --- /dev/null +++ b/docs/source/multimodal/nerf/configs.rst @@ -0,0 +1,142 @@ +Common Configuration Files +============================ + +This section describes the NeMo configuration file setup that is specific to models in the MM NeRF collection. For general information +about how to set up and run experiments that is common to all NeMo models (e.g. Experiment Manager and PyTorch Lightning trainer +parameters), see the `Core Documentation <../../core/core.html>`_ section. + +The model section of the NeMo Multimodal NeRF configuration files generally requires information about the dataset, +the background and/or foreground NeRF networks, renderer and the guidance model being used. The sections on +this page cover each of these in more detail. + +Example configuration files for all of the NeMo Multimodal NeRF scripts can be found in the +config directory of the examples ``{NEMO_ROOT/examples/multimodal/generative/nerf/conf}``. + + +Trainer Configuration +--------------------- + +Trainer configuration specifies the arguments for Pytorch Lightning Trainer Object. + +.. code-block:: yaml + + trainer: + devices: 1 # Number of GPUs for distributed, or the list of the GPUs to use e.g. [0, 1] + num_nodes: 1 # Number of nodes for distributed training + precision: 16 # Use 16 to enable or 32 for FP32 precision + max_steps: 10000 # Number of training steps to perform + accelerator: gpu # accelerator to use, only "gpu" is officially supported + enable_checkpointing: False # Provided by exp_manager + logger: False # Provided by exp_manager + log_every_n_steps: 1 # Interval of logging + val_check_interval: 100 # Interval of validation + accumulate_grad_batches: 1 # Accumulates gradients over k batches before stepping the optimizer. + benchmark: False # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. + enable_model_summary: True # Enable or disable the model summarization + + +Refer to the `Pytorch Lightning Trainer `__ API section +for all possible arguments + + +Experiment Manager Configurations +--------------------------------- + +NeMo Experiment Manager provides convenient way to configure logging, saving, resuming options and more. + +.. code-block:: yaml + + exp_manager: + name: ${name} # The name of the experiment. + exp_dir: /results # Directory of the experiment, if None, defaults to "./nemo_experiments" + create_tensorboard_logger: False # Whether you want exp_manger to create a TensorBoard logger + create_wandb_logger: False # Whether you want exp_manger to create a Wandb logger + wandb_logger_kwargs: # Wandb logger arguments + project: dreamfusion + group: nemo-df + name: ${name} + resume: True + create_checkpoint_callback: True # Whether you want Experiment manager to create a model checkpoint callback + checkpoint_callback_params: # Model checkpoint callback arguments + every_n_epochs: 0 + every_n_train_steps: + monitor: loss + filename: '${name}-{step}' + save_top_k: -1 + always_save_nemo: False + resume_if_exists: True # Whether this experiment is resuming from a previous run + resume_ignore_no_checkpoint: True # Experiment manager errors out if resume_if_exists is True and no checkpoint could be found. This behavior can be disabled, in which case exp_manager will print a message and continue without restoring, by setting resume_ignore_no_checkpoint to True + +Model Configuration +------------------- + +Dataset Configuration +^^^^^^^^^^^^^^^^^^^^^ + +Training, validation, and test parameters are specified using the ``data`` sections in the model +configuration file. Depending on the task, there may be arguments specifying the augmentations +for the dataset, the image resolution, camera parameters and so on. + +Any initialization parameter that is accepted for the Dataset class used in the experiment can be set in the config file. +Refer to the `Datasets <./datasets.html#Datasets>`__ section of the API for a list of Datasets and their respective parameters. + +An example NeRF dataset configuration should look similar to the following: + +.. code-block:: yaml + + model: + data: + train_batch_size: 1 + train_shuffle: false + train_dataset: + _target_: a pytorch Dataset or IterableDataset class + + val_batch_size: 1 + val_shuffle: false + val_dataset: + _target_: a pytorch Dataset or IterableDataset class + + test_batch_size: 1 + test_shuffle: false + test_dataset: + _target_: a pytorch Dataset or IterableDataset class + + +Model Architecture Configurations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Each configuration file should describe the model pipeline and architecture being used for the experiment. + +Here is a list of modules a nerf pipeline might use: + ++--------------------+-----------------------------------------------------+ +| **Module** | **Description** | ++====================+=====================================================+ +| :code:`guidance` | guidance model | ++--------------------+-----------------------------------------------------+ +| :code:`nerf` | the main network for foreground density and color | ++--------------------+-----------------------------------------------------+ +| :code:`background` | a complimentary layer for background color | ++--------------------+-----------------------------------------------------+ +| :code:`material` | materials network for lightning and shading effects | ++--------------------+-----------------------------------------------------+ +| :code:`renderer` | rendering layer | ++--------------------+-----------------------------------------------------+ + +Refer to `DreamFusion <./dreamfusion.html#dreamfusion>`_ for model specific configurations. + + +Optimizer Configurations +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + optim: + name: adan + lr: 5e-3 + eps: 1e-8 + weight_decay: 2e-5 + max_grad_norm: 5.0 + foreach: False + + +By default we use ``adan`` as the optimizer, refer to NeMo user guide for all supported optimizers. diff --git a/docs/source/multimodal/nerf/datasets.rst b/docs/source/multimodal/nerf/datasets.rst new file mode 100644 index 000000000000..3981cef34c9f --- /dev/null +++ b/docs/source/multimodal/nerf/datasets.rst @@ -0,0 +1,81 @@ +Datasets +======== + +.. note:: It is the responsibility of each user to check the content of the dataset, review the applicable licenses, and determine if it is suitable for their intended use. Users should review any applicable links associated with the dataset before placing the data on their machine. + + +Rays dataset +------------ +Ray datasets are specialized data structures designed for applications in computer graphics, notably in 3D reconstruction, neural rendering, and ray tracing. + +Ray datasets are characterized by their detailed representation of rays, each defined by an origin point (rays_o) and a direction vector (rays_d). +These datasets are closely tied to specific image dimensions, including height and width, which dictate the resolution and aspect ratio of the target images. +Alongside the core ray data, these datasets typically include additional metadata such as camera parameters, depth values, and color information. +The diversity and complexity of the dataset, encompassing a range of viewpoints and lighting conditions, play a crucial role in capturing the nuances of real-world light behavior. + + +Random Poses Dataset +^^^^^^^^^^^^^^^^^^^^ +The Random Poses Dataset randomly generates camera poses, each translating to a unique set of rays characterized by their origins and directions. +This randomization is key to covering a wide range of potential viewpoints and angles, mimicking a comprehensive exploration of a 3D scene. +This diverse sampling is essential for training robust NeRF models capable of accurately reconstructing and rendering 3D environments from previously unseen angles. + +The dataset inherently accounts for the necessary parameters of ray generation, such as the height and width of the target images, +ensuring that the rays are compatible with the specific requirements of the rendering or reconstruction algorithms. +In addition to the ray origins and directions, the dataset may also include other relevant metadata like camera intrinsic and extrinsic parameters, +contributing to a more detailed and versatile training process. + +An example of RandomPosesDataset usage as a training dataset is shown below: + +.. code-block:: yaml + + model: + data: + train_batch_size: 1 + train_shuffle: false + train_dataset: + _target_: nemo.collections.multimodal.data.nerf.random_poses.RandomPosesDataset + internal_batch_size: 100 + width: 512 + height: 512 + radius_range: [3.0, 3.5] + theta_range: [45, 105] + phi_range: [-180, 180] + fovx_range: [10, 30] + fovy_range: [10, 30] + jitter: False + jitter_center: 0.2 + jitter_target: 0.2 + jitter_up: 0.02 + uniform_sphere_rate: 0 + angle_overhead: 30 + angle_front: 60 + + +Circle Poses Dataset +^^^^^^^^^^^^^^^^^^^^ +Circle Poses Dataset is a specialized ray dataset designed for generating samples of rays in a circular pattern. +The key feature of this dataset is its ability to simulate camera positions arranged along a circular path, focusing on a central point. +This arrangement is particularly useful for capturing scenes from multiple, evenly spaced angles, ensuring a comprehensive view around a central axis. + +The defining parameter of the Circle Poses Dataset is its size, which dictates the number of samples or camera poses around the circle. +A larger size results in more camera positions being generated, offering finer granularity and coverage of the circle. +Each camera pose corresponds to a unique set of rays, with origins and directions calculated based on the position around the circle and the focus on the central point. + +The Circle Poses Dataset is particularly valuable during validation and testing to generate a holistic view of the reconstructed scene. + +An example of CirclePosesDataset usage as a validation dataset is shown below: + +.. code-block:: yaml + + model: + data: + val_batch_size: 1 + val_shuffle: false + val_dataset: + _target_: nemo.collections.multimodal.data.nerf.circle_poses.CirclePosesDataset + size: 5 + width: 512 + height: 512 + angle_overhead: 30 + angle_front: 60 diff --git a/docs/source/multimodal/nerf/dreamfusion.rst b/docs/source/multimodal/nerf/dreamfusion.rst new file mode 100644 index 000000000000..cf37873b15b9 --- /dev/null +++ b/docs/source/multimodal/nerf/dreamfusion.rst @@ -0,0 +1,310 @@ +DreamFusion +=========== + +Model Introduction +------------------- +DreamFsuion :cite:`mm-models-poole2022dreamfusion` uses a pretrained text-to-image diffusion model to perform +text-to-3D synthesis. The model uses a loss based on probability density distillation that enables the use of a 2D +diffusion model as a prior for optimization of a parametric image generator. + +Using this loss in a DeepDream-like procedure, the model optimizes a randomly-initialized 3D model +(a Neural Radiance Field, or NeRF) via gradient descent such that its 2D renderings from random angles achieve a low +loss. The resulting 3D model of the given text can be viewed from any angle, relit by arbitrary illumination, or composited +into any 3D environment. This approach requires no 3D training data and no modifications to the image diffusion model, +demonstrating the effectiveness of pretrained image diffusion models as priors. + +Dreamfusion models can be instantiated using the :class:`~nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion` class. + +.. image:: images/dreamfusion_model_overview.png + :align: center + :width: 800px + :alt: DreamFsuion, overview of the model + + +Image guidance +^^^^^^^^^^^^^^ +This section of DreamFusion pertains to the initial phase where the model interprets and translates text inputs into visual concepts. +Utilizing a diffusion based text-to-image model, DreamFusion processes the text input, extracts key visual elements, and translates these into initial 2D images. +The process ensures that the generated 3D models are not only accurate in terms of the text description but also visually coherent and detailed by conditioning +the 2D image based on the view angle. + + +NeRF (foreground) network +^^^^^^^^^^^^^^^^^^^^^^^^^ +The Neural Radiance Fields (NeRF) network is at the heart of DreamFusion's 3D rendering capabilities. +In DreamFusion, the NeRF network takes the 2D images generated from the textual description and constructs a 3D model. +This model is represented as a continuous volumetric scene function, which encodes the color and density of points in space, +allowing for highly detailed and photorealistic renderings. + +Background layer +^^^^^^^^^^^^^^^^ +DreamFusion can leverage a background layer dedicated to background modeling. + +In scenarios where a dynamic background is needed, DreamFusion can be configured to use a secondary NeRF network to generate a background. +This network functions in parallel to the primary NeRF network, focusing on creating a coherent and contextually appropriate backdrop for the main scene. +It dynamically adjusts to lighting and perspective changes, maintaining consistency with the foreground model. + +Alternatively, DreamFusion allows for the integration of a static background color, which is particularly useful in scenarios where the focus is predominantly on the object being generated, and a non-distracting backdrop is desirable. +Implementing a static color background involves setting a uniform chromatic value that encompasses the periphery of the 3D model. +This approach simplifies the rendering process and can be beneficial in reducing computational load while maintaining focus on the primary object. + +Materials network +^^^^^^^^^^^^^^^^^ +The material network in DreamFusion is responsible for adding realism to the 3D models by accurately simulating the physical properties of different materials. +This network takes into account various aspects like texture, reflectivity, and transparency. +By doing so, it adds another layer of detail, making the objects generated by DreamFusion not just structurally accurate but also visually and tactilely realistic. + + +Renderer layer +^^^^^^^^^^^^^^ +The renderer layer functions as the culminating stage in DreamFusion's processing pipeline. +It translates the synthesized volumetric data from the NeRF and material networks into perceptible imagery. +Employing ray-tracing algorithms, this layer computes the interaction of light with the 3D scene, +producing images that exhibit sophisticated attributes like accurate shadow casting, +dynamic lighting, and perspective-correct renderings. + + + +Model Configuration +------------------- + +DreamFusion models can be instantiated using the :class:`~nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion` class. +The model configuration file is organized into the following sections: + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion + defaults: + - nerf: torchngp + - background: static + - material: basic_shading + - renderer: torchngp_raymarching + - guidance: sd_huggingface + - optim: adan + - loss: dreamfusion + - data: data + - _self_ + + ### model options + resume_from_checkpoint: + prompt: 'a hamburger' + negative_prompt: '' + front_prompt: ', front view' + side_prompt: ', side view' + back_prompt: ', back view' + update_extra_interval: 16 + guidance_scale: 100 + export_video: False + + iters: ${trainer.max_steps} + latent_iter_ratio: 0.2 + albedo_iter_ratio: 0.0 + min_ambient_ratio: 0.1 + textureless_ratio: 0.2 + + data: + train_dataset: + width: 64 + height: 64 + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 + +- ``defaults``: Defines default modules for different components like nerf, background, material, etc. +- ``resume_from_checkpoint``: Path to a checkpoint file to initialize the model with. +- ``prompt``: Main textual input for the model describing the object to generate. +- ``negative_prompt``: Textual input describing what to avoid in the generated object. +- ``front_prompt``, ``side_prompt``, ``back_prompt``: Textual inputs that are appended to the prompts for more detailed orientation guidance. +- ``update_extra_interval``: Interval for updating internal module parameters. +- ``guidance_scale``: The guidance scaled used with the diffusion model. +- ``export_video``: Boolean to determine whether to export a 360 video of the generated object. +- ``iters``, ``latent_iter_ratio``, ``albedo_iter_ratio``, ``min_ambient_ratio``, ``textureless_ratio``: Various ratios and parameters defining iteration behavior and visual characteristics of the output. +- ``data``: Defines dataset dimensions for training, validation, and testing. + +The behavior of the pipeline can be precisely adjusted by fine-tuning the parameters of various components in the default section. +Some components support different backends and implementations, the full components catalog can be viewed in the config directory ``{NEMO_ROOT/examples/multimodal/generative/nerf/conf/model}``. + +Image guidance +^^^^^^^^^^^^^^ + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.guidance.stablediffusion_huggingface_pipeline.StableDiffusion + precision: ${trainer.precision} + model_key: stabilityai/stable-diffusion-2-1-base + t_range: [0.02, 0.98] + +- ``precision``: Sets the precision of computations (e.g., FP32 or FP16). +- ``model_key``: Specifies the pre-trained model to use for image guidance. +- ``t_range``: Range of threshold values for guidance stability. + + +NeRF (foreground) network +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.geometry.torchngp_nerf.TorchNGPNerf + num_input_dims: 3 + bound: 1 + density_activation: exp + blob_radius: 0.2 + blob_density: 5 + normal_type: central_finite_difference + + encoder_cfg: + encoder_type: 'hashgrid' + encoder_max_level: + log2_hashmap_size: 19 + desired_resolution: 2048 + interpolation: smoothstep + + sigma_net_num_output_dims: 1 + sigma_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + + features_net_num_output_dims: 3 + features_net_cfg: + num_hidden_dims: 64 + num_layers: 3 + bias: True + +Describes the NeRF network's architecture, including the density activation function, network configuration, and the specification of the sigma and features networks. + +Background layer +^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.background.static_background.StaticBackground + background: [0, 0, 1] + +Static background, where the background key is the RGB color. + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.background.torchngp_background.TorchNGPBackground + + encoder_type: "frequency" + encoder_input_dims: 3 + encoder_multi_res: 6 + + num_output_dims: 3 + net_cfg: + num_hidden_dims: 32 + num_layers: 2 + bias: True + +Dynamic background, where the background is generated by a NeRF network. + + +Materials network +^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.materials.basic_shading.BasicShading + +Defines the basic shading model for the material network. The basic shading model supports textureless, lambertian and phong shading. + + +Renderer layer +^^^^^^^^^^^^^^ + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.modules.nerf.renderers.torchngp_volume_renderer.TorchNGPVolumeRenderer + bound: ${model.nerf.bound} + update_interval: 16 + grid_resolution: 128 + density_thresh: 10 + max_steps: 1024 + dt_gamma: 0 + +Configures the renderer, specifying parameters like update interval, grid resolution, and rendering thresholds. + + +DreamFusion-DMTet +----------------- +NeRF models integrate geometry and appearance through volume rendering. As a result, +using NeRF for 3D modeling can be less effective when it comes to capturing both the intricate details of a surface as well as +its material and texture. + +DMTet finetunning disentangles the learning of geometry and appearance models, such that both a fine surface and a rich +material/texture can be generated. To enable such a disentangled learning, a hybrid scene representation of +[DMTet](https://nv-tlabs.github.io/DMTet/) is used. + +The DMTet model maintains a deformable tetrahedral grid that encodes a discretized signed distance function and a +differentiable marching tetrahedra layer that converts the implicit signed distance representation to the explicit +surface mesh representation. + + +Model Configuration +^^^^^^^^^^^^^^^^^^^ + +DreamFusion models can be instantiated using the same class as DreamFusion :class:`~nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion`. +However, the following changes to the training pipeline are necessary: + +.. code-block:: yaml + + _target_: nemo.collections.multimodal.models.nerf.dreamfusion.DreamFusion + defaults: + - nerf: torchngp + - background: torchngp + - material: basic_shading + - renderer: nvdiffrast # (1) + - guidance: sd_huggingface + - optim: adan + - loss: dmtet # (2) + - data: data + - _self_ + + ### model options + resume_from_checkpoint: "/results/DreamFusion/checkpoints/DreamFusion-step\=10000-last.ckpt" # (3) + prompt: 'a hamburger' + negative_prompt: '' + front_prompt: ', front view' + side_prompt: ', side view' + back_prompt: ', back view' + update_extra_interval: 16 + guidance_scale: 100 + export_video: False + + iters: ${trainer.max_steps} + latent_iter_ratio: 0.0 + albedo_iter_ratio: 0 + min_ambient_ratio: 0.1 + textureless_ratio: 0.2 + + data: + train_dataset: + width: 512 # (4) + height: 512 # (4) + val_dataset: + width: 800 + height: 800 + test_dataset: + width: 800 + height: 800 + + +We note the following changes: +1. The rendering module was changed from a volumetric based one to a rasterization based one (nvdiffrast). +2. The model loss is changed to account for the changes in the geometry representation. +3. DreamFusion-DMTet finetunes a pretrained DreamFusion model, the pretrained checkpoint is provided using ``resume_from_checkpoint``. +4. The training shape is incrased to 512x512 + + +References +---------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- diff --git a/docs/source/multimodal/nerf/images/dreamfusion_model_overview.png b/docs/source/multimodal/nerf/images/dreamfusion_model_overview.png new file mode 100644 index 0000000000000000000000000000000000000000..f70b84e932afaecbc520f10b5016bcff905b846c GIT binary patch literal 1653618 zcmZs@1z42Z`vr=EfJjJ4H!3JdOP91r2#R!fHw+B|QUW5+Nyo^y`leBXR~zp>VO*CzO_yc8A&DFzY}64q;JaU~=qOe`cM)JS!u^K5H(E zso!D04d|>itV(tyCv1Lf8Cw}rkeOA-95EZzZs{6Waev3H7?&d*E2}w~OG0Re3_#@zm|jgYty9|5zoSot-W7zTzQ&N*KB}`G2;$SNe7nBN=_H z;EVRW;pDtc{TWiv2ry9mUUe?9GsjwJwfYkM>ce2(EYh%Asv0rKEn5-IUlCAhtl94|CLo0^)+ zlau)0kde>0jmdVSd_~P~^4al8Nrv|JU(HKpTwJ*4Y$0NFSV_5g>ah5dX0DQ|p>i`i zD_ASIRR(j5^LUwDrKYi&S#UtWJxh4o!rntn)SO}SydDX0t{)$A3o9z5pGWJ1tL5Z8 zW9qs~jTiizpVL-IGw7r*%skM_)G5biP;w$A6kJ|X^QTo2Q>b}q=fD8o!NI|h`OCY$ zNS>aaeBA#VsgcG1#0mQ{5B8FMydz)F>MBE~QjVJK`@9c(XC4}aq^y_*Jx9M^de?Fd zc>mvR9dNn*e-(9`nWHj`vA;W=u?1_fBbUj<1DDfe9u;u@HT=9)9~WMxQ5_WYJ8^+$ z@xV1JJDY@pB2zt2UyZO-9SdHz=T=)WrQuNdbMLw=hC|T#DcM$ehjHy&= zhQEK_3NGDYSfthmhr{2fsEo!uItXcBf|gnBp9vTk7#OmXfNioP2bYlNBsGGAY{kLR zv7^H(q1%c*s>VAWzPuC|92^93$HvOqhk1^PiODI2jsJhXBEE9sP2y^P0?GN7ft8o{ zHCJNhTLz@rlZ)V1eMe{Kf${Oz3xU~$_2Ph>bL{;eSF25>I07?xaJLqh~h_9Q+c)FQW`Ok6OUJ5L+Q z1@C>+Hdv{yRcrC`~o&0}N4 zV8feP(NX;zot*kdN2Su=rhUjQb$%2X`h%@Wge%d=(9mB!Z$!Yl+zKxFG)m9Qi}EG_ zLxPf&>15;NZ=$cYY|EG-r@TBCJWr-lW6|%Cd|La+u`)TKR1-J1gpQ65<39R_=&S+) z-G_&q^zuojTTlPDkQY7u+bWpx!x!G>eIOwtql@a`lp@}}xO$9tAB0z{zM>1=BQp-L zI3SWvPkBJ3L+w^KHy_8#va_-_TwZ!u5aC8TnGsM%L_|b&7$(eRNIw>3kAF-V!NJcD zA9y3qT-Mqup_D^_kB{HOs%K(C-`v~`o_MX||11#JzYEk?@$PJK@$tB2jA*)#9`UJ% z(5FwIK)HkNoVW~!(Q}&sph9X% zEYgBX3Bt_S*tmV=ATH}B=7YsQ*3}7l2&ML}-Ii#>cS(;1I}zM<1rc%(bM4%=OUddU zzR~O3ZMC=lyku64IV)Q(s99+_8nUC3e`4A7<$=`S7W5`=f#;jdlcI5^jEszLT}Ek? z#H(9dMkm`-5CKZe=C{6)(ut;@S^Idb$y{);QSRO>S_Jey%H6yvaq*E%+^4eHgP+60 z_2FDuW_TC@;ORjpo!f(`R|mIC7{a;EJW@akWo2bC#CBC^GJkBO273(_6_f$Hq|#28 z+ZMAZGpK%8zh4j#eL$vT)u4V|x99DC=5BH3{-N7ucy&~@&6r2T>MZvb30T%j7G8Ji zUx%@)RqxgmUP|&b6kmE8c+oNRskU|cGrY-~%6g;qNxNYADcAEiS+R*?< z4@5v-fRdgZJGVmmM;L&Cm%=}e8yTiB` z#40Gg>2HG!*7X{j)YSpyVW8u9&WF6zomRgl2C(4^ zKXGr{1D|BML$y%9JD(cY0@@jc_XTFAU3*M2Eci!PY?u^Giu$ei$OcWdR@_GZbbjE{ z7R`oDahfDQU{~iFW*e!#$}7n0;fKRKr!-0~-E>yo3HO|hp#~o9oP28I8jbZXdzmyo zp3*GyYLI$%Vz#Z*s9=xJMwaVHMC1-bUflCA?7z+E?hxXWx&QAj#$~c0DVp6oUerh^ zv4|Ny^TufTe=3*%E%a%FLO~-2B@EDuaMnMb7tZ%k#V6i^obK;e{NJ1giTix{(d}x{ z)6;|QJm!3*?H4KS@9TT#Q(&N7r6Uejq;qjeNek$U00pVW1n3cq)8aaBO-OK(4p;i# zRG)UE?kq}lhWxnBAwT3W>CW?vXMex%{9CG=G-*NqS`p80cc_k#8at_9Ef8Xp9pnd&AZBf(v~M&4D1nrz_L#VF)_t66u}l=U0q?E2eoH` z{VP;8baea{A5Q`v=#w6&*y}=7Oi+P5#V3Lnc$k=&zQ|;@8?pnOgxQ=1TE94LKPKkt zU(T_(Kfs{-4C!pCKzZE-S`tHl@7`@diud+(;$m8@ds{^35Or4P1fqQu335xDmAT}U zOr;D3&sxF@FY8#c2bnamc4+47D^C0Bztgm}W)G62$|T83YzeSL5p#@DVM&QNU-DB> zg`P*-R%!xT^R?qKK9-*X02D^RT~IkevlU8(a>`XzR|i8E4v&uJ4$eU9xLxWe zo#~XArm|1g^?YfMB2m{3?){~L?E!u0HX1_+R1TgaZnfa^c4X}avDz=!G9=Yne?x-{ zpngy%0k*q^R5>KXL9P0hGj&?Gzy<6C13?1R3ZNs-oqnHKB_v%%p!LcsC`8F*%0D;<{jDDk51rYu`3E?~QoFnUvNwP4+xDX$+dOiPP+aYegb(lRt4azr5cu_09 zsNc0@58eVypwNn}nKek9c5|XM)RgnrU+NIq<&XR=hbWy%sou}X%C+w9KT^R~<9EvA ze540M$jG>9QRnC=u?Ihq|0)6{H1M)#(K4VGMfMmgkm>0HB5-(e!pKTJU<|%qDvSew zdcI_9RMhYN40{kwXkJhfepe9#VJKKeb`B0Pu)wUCe0+S;Pou!+7VNcpWB-UV%Q-kW z+$^Sib|KS)W>$bUoA|wBU(<>c$haSj>B#|nnNqhddV{vBp7kZhoFg8y;S@+!` zKiU=f;s%+uCx`#L-Ty-QH+;!is!UwS$e_D}c;q4fU>7M(W=3$3b<@)@&phhXy2<_z z`~tiE=FOWQgJyplYHXJTD>1;a0LP&Rh=S_4NDH9jKM>Bi82ks2GCtkEI6vP#V}^G1 z+~&SY`12$FcQHRBnBV>o8t^GCio6M5kUuXiT%W(g!TI(R4Xw#1)-C;NiG8H{uSR19 zxMWBxEr7Izg@uEXH}MHL*22LV0r5jKy4wf^uu@i5HpzJs8xyl{W~M-28+7n!y(UoB zn7W!c)H9egMz}LOM1^%Y{;LRc(?|;)s*&mW3 z?)!Q7Z_SD=!#lEt=$&1#9wGTgxY^Ds6^#7l%|GNoK?0Zw$Sd!kOXR$7x(!l(=go$N zJ%*e82j6|oF<}83@h*25zzka+S@w8CTU)?`;1d$!hEw4EckkZG%F6?Miv*Uk#{qe; zMswSO#{R7R|HLVJVDsj5ZQ{RiI!*w!bJTkK18eAK1j8R`Pe)&N@ZJ6m)>yPeH(T9s zH)#P(@fdpUSZ7<2e-d67&INcmSQ>zn8KPy7da5Ja7VK_1!RK3M{uESvK-U^X0DK0k z1~R8cy8xj2aRLyqAjUs`W(5Izi~IiuY|Db3CbJ4a9hL1s1p?gy#0Xe+KnvX5h>3|! zQBfVOZ*Y%Bj~%lYuLHlOUwftP&%k>zG1+);Gqq*^t}tyZk%{ZCo9tS6vfk*66l=YA zm02})^JXlN2Rc>`z-2*X-eNgG`v8#!TdnX<;1)9Fx}NOZmPoX;j0``3lK|2o#wA|T~o`8Gn2#15}{j*i-rdk zSb#iWVm<`qA+Y(DJhYZs0a|~X0&KSt-85(GK!9IAGXroM^Y>`q8}bbT3D6>4R%!P5 zAGf5o54<3ay=*4Uw0^c&{!)SoBk$Lr_G{`e3v0oG zyrU9cA`=a-1>WD5E@dlDbuUGQ+d`7v%C-o{f8+g;7G!;!rv3P42f<4vpY*66|ILt8 z`o`V6ntZLNv7OF0o2K~zC@z}J9DID8-QC@0F3+h*Nogq~{(ohg@W9#0>E+J#){)zd zsF9@_0}@>y2n_M@@yqVAfR%R`euW6+H~2`CB_t*qfJk%X3o}wf0wv~)?9*V$42VEV z3?(hC{j?Q3&!7J13&sa>d9wMCws;-;-hD@|4YOi3?4+aZD{S5TB3zR-=joTDqhm3} zn>vwUMLB^e@=8=%X;lIW;`y9UjCDLe`bAv%NO>zgz7C$5%|DrL=&2S(VyZoM5x5vr zaGkRvN+A<&W)XnAfaBxVym({g;%qETCwxXTbD4XU^!t@lkIpaDZ4rNIeImcnna-Cd zlRa4c)#zt1Z8u}06$V+V`8@j6#XRl^HZISvl^%~C2+yUwIzQHg^^)1lFY}j0dT$m$ zjzoex@~h7_cc#TsM+0J?w+_*?UGLTJP7y7~3#ipQyOA~r&V@QG6d;6Lrppp*Jx%Ij z)L&ESE}s?byUg%Pt{cWH$Dk9>-hUX5-B*92wOO--ZCz;@F+!RA^&1$&KK^k&fj5_pX3&`H|o{-09MmJrW#Y3iT8*jh`Y+K zQA4QLT^RC0Wou1xfhsg8Y5pyO-!8=sl9qV>6FIiYb#@#!d4oO{Yl^HAJ%kkg;;&n% zsmpo;BLJQR%ES$#!R;zaa6$AM7XwiQAjZF?#oJrBQTI=2A^;kIgVnrq1$vz-lYYX} ze>v8m`)BFILZ(m;As9f&w`vTM2q)X;Kp6RvucduXiK_ytS~HoYFM3 z;N?klkJe$u-VxaTkXW8c&PqW3<)GotbAUGk!r} z3Y3~z_B%|rzuxDfri7#PtKir!&T7bd&f-tncR80~`HwJ(6q zKf`RZ%?GcG}nd5#xx% z2jm_L=o|;izn1Wnp+DBQGIril>c9$cH|yhTDe}4=;#L-o4j?au#JdXy`rq+VwLEfKP)8DgAf= zlww0IOZd~+uE-8Uz-^j~m~JI5ZYj3C3nv_^Naq1`OaMxOf*K%t2Y@T;ht%=113=iM zw(amVX988ss@+axsdK}zKk9*fjs=0oqEF>g0jZYAqD*=Fd@xhaf|-nR9A&(*P;n4e z+!#~%XSP(CVpbD9N{_?<(MbCZTIM^FtgoQq@X9^EM{e2|xPcMb9V1oux4dvhtq*XI z6GgC~%wL4ry7akvHsQj{l|&~Fj@@aC9-l9^YeNjB-rP?xc;ytn7E8G_UVkA)>2``` zANi_EznWxvRL&%u%Ec+Nb(6_qZ9(>O^`5pTyN~y?z#i;#w?QV}UVEC9VE&;uhiw-X zh2Ll|C+;XoP0!ClC|x$qKb=?`T$E`GOrxk^Nv&Zg)xL*e9zC3PqTi`1j}n0_nH*jvQP-=%qu4yE#p9!>QQ+x+_KsPUrWYJ$?EEz=Q0e}3Po z|02J-a%5U^^^}cJYGyH$dxe3S`K@NBUVnZ-ICiB0%l^nUH{``s&v5G5qkO9HKB5}O zV=s-uH>v?_;T{N>>*CHyF`^=jn^$(e3Po`G$^JOov=EQ{`BFA7m%Xsc$|ErFQwx>2 z+JR2&4`H2}j?QHN6Cyc)umGhT5D!os&-j$t_B)}8OL7*8_c8rSdoDy zc`Fp&G)aJ3dFjIxWuHEM`g3^QPd8AM27r-j=dA%(UuiOP6jVt`$v?l(rAC1cBEV?SiTbzY$*?D{XN|jx5mIAbjU75>lC@G{^YK; zZ0ZAU)Y-;IyZUZgv-YoeK6fpd!{`5SuvMS1dF)VnU2L3A>LrKpcJ&$3*Z zc%8(48Y8jsK-Cv}(zEz(W;Vy>650DAZ1lA!BH~nU2)Um}rzZQPWJ&+gT+4#M5%9eMU`qH8E!#Fe0e$Cic1#u;Wnh;o5(ZqfXY;eY}Y zG(Vhcsk*~wf#XzHE-4r5-g7;B0?Fmxkr7^J3m(gUuNxX1D^+1nlod{x^>?mZF3+bP zghx+wTb4J3KKW$Db$;?yU)92~h3+7v>L3WN`DL=2y}p^}GmEuk8~P|Ir&Uxd|ZG*jQVM=X$h3e2F?anSzIq^| z{AIY#^0dfj)05QQVH--CgQ1KX!qC0*BOP5El*@d#LG?nB^ZuRv<4A*UdTZSkG_ORK zt9>oa(%e|v20x24qRdEyw($ir~KxFrHTHh_FU1_v>+`ak2br!k# z(D`D|9_vDA-{y3xmWtk_ZEfnMY{7OUK$IV_f#9lXLC&tK9^6bHuT2dVOWQhbOYKSD9gt!%XrOI0iubN?i(G=#s!uMX3lKIaZMJu6`4i_ zZ&=gnaMD`gaMH$8GNez1#z<*}m%~(A^`*`Q3qu<3oJsg!3!$g-F(VR8lqWU<&bW2f$qs* zT4Q}8_{@pd@m6?0g?;id*}(26^e4wfS264-o~za5$?kreVosf=G<7mF1*iZh`#i6a1cUVM9Q&zI*GERBPBun` zo$(myqj8?ENKDYN8~8$gk2dP%t(2WlIv1s^Y%?K>M_^8Z>Z1f=urVi7fUI@LII!C)J9c`Jn@UXjO3Z*efekcQy}3?G-b zH4whgy28zByNlvk;mIgT8U)!potm}RLuo#h8zqq>wQYE=$m9AkV4RoG;oh%k(wMDT zUHv%9NnfPvaAw-6DM#nPJyHwAbmiEjmY=wd+n1B6EA)5cv97L%4}~vQxX=2kZ8GQx zTEhGc;NN0GhJK;e+4_eY;!ws`wmD$TMm(-uzUG$}df6IzAGz5mpW|_r#OH_8B42Zl zj|Pq=A4?tZ5*8KnIOA2Yrr)Wq5qDiCjvBm&@#Fy+D<+XK&ubm;pg#qodd@MenRl0z zu)5e-CxtxHnfcD$)>cCv?&l${+rpL8s~kTD$i}p`3bZN(Bw#^2PMc~SPmcPA@f*wY zvn>>RdLo5+yE>vmzl98+7inq>T(hR95Nq|brhhp}c8BFFYLL@&yzJ_z#Cw^2t>JF? zxXZG!Eq}3tVhYxOl@Vv~0tU04!*=W|HK3}+<-P-p*AaJqB4uV5Grsv6;|y=%JFI6?}NuN3un|sBu*} z(TdNM9A|9rk&=L1hTKh+e%gk4rjG@UH+go%Wg|TJ>Ps6%p`2`agor)PUL7liZyx-P zylZ4;6n%VZGF>W*VxIa~IP$09x#PK`Ky*imU~&Gn^v|H4vFd$olhTK@RenN3{akV*K90(?R zVXV}&MFEajnSy!84r-;g9=w`=)GzXEvuvB)o(HguICJxaAB0=kuHz(?MUb^_gk@WB z!e4N!FC&@!YRN4qaLR9AYW^-%)j&`LKBM)4h?MDH{bs|U+qoI0L zyO51^-s)`zvTk`KG4J(+Z29JSIw zV3}_=VIJ7X?Ov+XHy*(1d*O6u%QD#F<%L%FBT6b>6R*JV~9eo)R_wYl(~cJjO{zSn8& z-#ab1>50g8-C+@Epm5QVV;`Du*cdzGbU;g8UMuhlUGcW}DTm7cd22rx)<#k@cgOl- z$Zzr>lHc{DZ@mCIvG}fLFT%T*>M9$`tLSTfm7&m}cAYr)S{so_f96*&IE+>UsVXZz z!KrJqxt_Z z9i9^aF!I}@`wsE~1-yFwTq89~amT$%**#@=*)5qYqp&)GvEEjrpWH0n$>lY?&m(Xx1+HBjLvJf zuUOz<{eBF_!`1cdYIs}z`=5&|al+>qXdD@`Tiz+LlxM8M2pjMCjWVbHseGz86@nE7 zb;*YN5}zPZYlp8SIgFg4>HJ9BKB?)c?l{3$4{Drt?_qmxA(k(KXXd8KP>DCymtI%W z_IXmBT;WhUBWT}W6fBt+1eTG&3wd69UB13v3cjC}rR6xy*ATu@K6k8H0n^u-rfxiC zMm@jfLBD z5#M7zluEY`XL<>XQ-84$$_}_j%r1VDf`t>b)dbVfBBo1;#qU|vw z1(_|cYnOY?O<^(FgAjdIcf<-4zSN?0U3HawY7z%aoPB_c4Ic|#kgeObk`J$I_s!m! zTV}b&rmB04y6zpy`)etrjRbMJnb2i_OwTV1o7F%0ed_8i!qI!&h zAr(wa{`DhqQ4pJ=0E-W8fPSRa{2?8UhU9WMUvNPa#X{at9NdT=_5utmdDA z7+B3?hi8H9(OGtrU}je|Zs{=e2Y6U+9UKZ7IKe^f5(YesGO12(F3ya(6*Gz)UgmB5 zlps-Dk|eEjUr%XqqU*nXlt_`r&pr2KqbCEh8d zacu)>WFjSu=j6$6or9I((-m3RiGaC38dBI$-zUp7Sxuw>~G`e0apX{_q+K*(d5q29-iwdZfD zeUaj(sprUqJHlLKIg-76RdmiNBM@rw-AC?Yr3OKDkfam{RdS&aAHS%-hxKj?am`Wf z;d@Lb8~S96?ZFrGW2({QmECMlwLL$LQeIHPC=!f>Aowa;p2*xfRE-Nl@19*(+BrR5 zty+t%oN@S?$bm-edggk)WC3HV=4?9DV|j2T)?$5%axzol2eCoBQ!Hv__jM>2w%d8# zOzu|Ftv@XB`1Ia(#jjY1;T9|`c|7CCO6{T;h2zZSGq3H5EzIM<`$gisSI?rC7={ z@#@}Fp<0R;#p)XDfeAyfI!uvRbSpd{pSI_g6|iutUBhKcKS@OAKoK#c-urlHF>b4f z1{=E*rzy+Od`LirCUHJ>V?IiAvlriGZoQlN8;->pk05CM5k<=79#4+>=Q$kV_Luzg zhfa{)v3oMbgP&^;3DCl8+TY-;JcD~p)}PblPYsKm5MuJJzmNT6;??GW;~5IAJHVeN zF1$azuee?3OfMFUEh_YY(dBD*qVQZ_F5PFKtt{^|-))Pn*chSMToqErGk4y4YPaF% zu|G2{-e2@n9lLiPC37KS?*w7{rKC2ZQDbM`07p9jht*Cm&RW11+s#`3%j(4UeWhyu zcO*ueVPc;tDNSf9{PNwsf+MEKrG7VuDZfzcOx5*T#KNxM<;!3ZZoF0tdRl$jeo-)= zK+}pU{zGv1C)x;gMuti7CQB@V%fQ0(3%F zVu9?Bfc*A^X6t}4=#2uYW0TN1R$TsOFbVQA z$C#}EAW&3PF!s9jX93Hn2|LNH)MpM799Z80`2qt!RM?h&*#Zx+H9U{TC{+&w7S>D8 z9s9~%h(N`ZHJHTS4WQl7RU-suK`?H98U?%s3-*lFhZs3y)aHKy zH)0YlD?+NPEMTU|7Brw>R1pYtlctse6SeP@v=4E4@pVdKw*^)DQ$H73QOmeMbNHiXh z4kLg)G@@2!Jy1t)e*E#2SoK()JCRs*IX)R>mN0Gx_k`S^+*ng@oJdNl=}=pe!F**k zzI|F7#3IVJA~z3V={z}oM16|}OrQtmb%;UY;L~D-W4ka_zPD|a|3DqGf^mdfwQMly zgvEM-S@3BYvbF(Z+u?~5@GW$#+s`!VlM{5g5J~2aj+!|9FXm6Gh<~ z85{9ooX8pXD8Vf&X>tAHc>lv~!4vxnsBn)|{VvUM>s|kvr}C~TO6OQ! z?6b={lwJuc(~u-7jAYB|Wt|20QEFrwqKcY|bDQNVoat_-qbJC&*i5wz1L>oXu8;wD z*h<9;o!DfJm7h^;=@X0`Zpc-lcN@NfEXP(=!qka(2+mb5>>wq#YAqDPmWL-#)04h) z^zdrr#C5$g*7cg@f#cSsJcA=$a&zMY%}uq;D$ZBu9R&w1!h)?93pAqKUSCeGCt^?g z1TV&LBW4|jrQS#*4&efbEt`Xu6R)-|9E7)kP)%%jFZv#Q9wo+Gz!(xV<|H<$G86AK zuF@{Z(S#pC{$ykGvDcu~>1zEsHSR1exm%ZPgVbXo=^fig9CnQE!U%qk`DB~r~9SuzD zX5g34Gi)9WAS}>KfPLHt%*vSv4PYsP6`J`D7??<7Wn!6{Mt0Aj!Q;FknHa+|SP#g& z`vO^J>6G%6iY)g_NhzBftcYjF!0`cKR#X4$1`fobYAf5IdUSr}1u!@jt0+}7s z2Lh(xb~936V9V=YE?hjh7%#OTs;#ZPwf&5zjTHBeAX0lOQ`u5~1<#mg=N${!w0!Da zQOP~gsWiaK5Kx9AI8ZNNi~JMKy0F0^MP^hLO%^aQh* zvGRM8^J(p7HMALeA%n6rBT-_J*XM&1>+;3rs?$b^oYcVPSz{T_lvvT|Ak!?@r1mA; zdZkyVbPrLJw9 zh>QAeG#AaDGK=~q;mg(xavrX>#ZYKGWUg@OfCNq9vHa*U*|_^C{b_@Va7!4sr9oQr z3KJg)dBkGCnEb%y0UBsK;mR;$nMOdq%-QeZP= zPiEDokyEgPYGZTrLmYO8W8;~!;{;+dN?^z@T+#}{FFyxUc_-S`zfa@w5Kg_EEZB?J zsZP~Oe`kfML*zTh?~0&0#^<``NU1+XqjX=`@Yw0U2yzl9sW`qn)57TeEuNcdflS9W z>X`+-isxCe$Nq_TD4KH~kC&F$=XU4(>q&&TNxPWJk;17r)f8-D=8}ycPf+uv$M8L7 z`BW2{1JsDiJuz}VloWn%U%1zW_p*!7NW|zA#Hh;W;U0fodte6cdB(l)s|zUaCC<$9 z@Rp#a(}L7^wXx_AxymZmrk#~(7FfUQ!QJ8%!O82wSLX=@R79V0qgJcEQ*cfhQ9KUF zFK1HcS{x$mF7YqojQROhm=%B-{F`so&`e|X++75-TH;?_dBykENeZ0Xl3?bol!J8C za5J+A7N4&{K~1a6x6kZsQu2+*KlsM@(!4Cpia0JBBVP$ur%>GX8Sq_0ZAAKvW?icH zAOe3h30}|w=Q0>_&_>Gu3vY6AGQhy7XlM##=Boe0L`At0TZ@>|t|_BrpMp~cD@!#< zK+OeCBQ;gkw6_d@&DlWx1a|eJ-^*t*bOO3?XNS6=p&r%u~?J}UH zH6b9)TA82}x^jPcw~(PRI`-ZreEiHoalU+_0`$b|(j6j;IO|J47yCh|&9XW?v;Z!b zrgm8^YKqpsD4#!Nuy>qJLP%)O#H7mVxVL@f$(6s(K0ZG-v-nCJ1U>2i7>i>)cQX_725 zZy0st7qnEgKNdrT9NjE23^7*ZOuta}&IC|A@@mUN$OJ|UcdBwLuiK_a&~l&@0*=%6 zEY%U=4hIH0OVNi1;`|7Mx8Kq4w$#A^6+711wH5c9$qZk1!UE$Z-2i} z810c)qjg$kuo3e1|32QBA8{=(I_ZJjq0C4tw~JZ0<5~VS0UR&T;q5$)@ak;Y^6S=d zf25Vy1IxG^!_Cr$#FTe&ODiha&d4n_TTWnbPBIMG6<*g=9Y3$ zvGc4b-E!r0NNV!U9uxksWO!^HjLO_TIeq{k` zGGAXX8k&+&73b=?wVr_~G4LJ%FMHy>%PhH(Vg#!Qn=5!l1jS=cN=Ws~j+k zf}>Jkw;Sgap9>emQZiD@q(hqT^d72c@FLKV@Rd$5G zY%{iNrmrKT1Ad9#3EKvzgqJVA4FBx(INM+9`YlSkanhA-MhcuRU|~wtw|4JEeD}D7 zq<{lhy2i$zJcMS5U;YMQS@x+Iu*rbqj&Ru8BMHTCxsMZt1O0q9Yj?*dDaog=-2z{J zC`b0BDXV+2^#ran6SR7jmv8^3^@ky2c{Nu4xbIk+wwldo+rgFRT7kvZHR4E(!jiR8 z&LL;n2~9^C4YtAct7Q5Z8{Z{<3{rZRD<#KclIJw|Zr4$&mrVV4hEBpYMbQohzhrB# ze5v3giD0WG=L69+3HP;-k1C9c8l(mum+WV20-*<8zX?N!o>`&YX>oa0B^S{m?Y({CdYM9ly)*dAts+|agNcygW->SL z!H5)-HYDX_`Wi)yj)YRs9$ECtAE`Js#Z&8W2fkXZ?c4AE*;lOG0jif=Y{JaJkxZYM z+t`OXj(vgMC&<&(Xzh0_h)$Z7pKd`NS&mkY)E;{vzblZ>?$(yG+5d^?t>nBb z{%z5Up7^rmH7ak{(m{5|l2B`n=SM8GtqN;GMh>;C8LEs`($ABw++tEV{bXjsnG6p9fCrkG#@Z4iR#jj`6FU`Gw@GV<`&Cc$t2QzG703$#@K)v#lkx~5B zE~cx3el=nEl74QKfP|v;UA#X&%9rn-A+xG^M;p;^4VXoLX9xj%n)h1fb8Jdj$Y~5A_D1;| z;0yxr_RGr2l}_$?o0lOtd4){cgRB7qS<%BcE; zpR!PX+0Mkf2hYZ2<{P5lY}VzKt^Mj8#z*5?w;De*U17oT`tn{QzBK;yqDU+r%klZ3 znL1-|y^Y@s+-uH{a)(hEd^RiU<2~71KNuauf7DqeK2j3G7Dx+8WqsSCK>e0LqVnps z55vD+0Jg_PWi9iEn3aX0hAefkGY*q@0pVB9#wh{Ov}MX|`MhJLq%o~>50N$^7L=YV zGZ4J=kD-poz{NmQoX1Og9vu=ogxR!*&c~N7E)0GI(X3ab-@bG8onL%5jZhTUnsJNa zp7Hi`sl#7rFrlP#Rl(WYp1N-{D5`i)d4>^j*+~yM+*KE;OZJSwNjXCeZsHO4*pQfZ zJ1hQ7&f6g-rN4QttbE5U=!@CI38o6|4VwOFV?%G^g=Xasget0P&ykbssR(l@X@i-S z9EH$^)^QM*op}h$`QE~!qVCD>cg|Vk=TLICd)%cqdLM33={4BKt0(9hxkfKN_c~k* z|EZ*|ZLH^Z@0r{=_K(+@?K#stK_E;1mBJKyCbnz{NC z{04t^tFGnDuc+7?Kb2eg#;I7iqO0|(o?=SFY;Ky1q#)yrd zzYCmuSg`)_s;QyS0NUr|sezA*1_Gcrg5S2`q9*|J(x4e~xf+!U2Y@bfdQ z=XWtk{riIXl)q~Sd1`-+BNLO!`9z^2baMzB@QnzebM#RsTu}hoz@$qL0)c4kT<0-# zLKRBYpCp31v2R2UeTCkSE+FJ_YD@RUI~$S`Xax_8MCFvTs3}`-mfWZU)@op1yLA&( z?0wOweU$rslJ>{9Fn`~@y}h;zF1%pd(_OveEqJyhl@fu(_~3EV5vH2=q_lfFm9+;8 z=K;*3V3MVgLaE@_R^n$K>OH?L{^3#k{3&*GNl&FgOB#QKdI|IB9;r5**W;~IHrMY# zCDXiZIj+KdSL48!PH`ok;?Qf2Y&-N#Q-g1U`KSawkA~2ZL%vT+;|YYSZ@-wn3@te5 z)YSB%iS}t68?q=$9eU094 zdUc*{vhb)ao`lgL(4CmiHOQY3C8c~o-}6VfcZgK@3{0d#c6GxW>cV{a(WKtN7Y%J& zx5WM6%UZHA_H@sd!eUT&eJ>+bP4DH{ljXMTpKLPU@{*jU(fP9Uz6Qmciq4LhSCp!| zr=w1>vN};DTTG^yKgu>YsN`$V~yRYNg;Apg?E+kLj2jp$if&3C5idmT*&oI zd155;D9df%MdJNvUazFu+fPOC3ZKGQ9E>Mk4LWhs^oin%c2HKKx&=!->0Q5juwuec zI-0RbqH@kuIp85NF0WF>Zku@q?`c&aSG`Aj`ph%AH`4i|3@IofAWADym1Hd8#^8`7 z@X+RYMLr=6WdoC=zP`Tj$CyZ_3Do55{Z3eeL(d34Jo|<*B=ErRdBUGR{w;PEV=-Yv zf}F^h3iO+*Ek~?;q_xpND{sUh#0(gw_!J!x0QfO5&VqyUkQDD7Ft7zmmmWCQ)knx& z^DN}MDfl}1x<`|?1Ac;%GojE8nHJRE6-L&{?vIt zw(`Uw!X7F}&s!lZ=KcJmp1Em4&n21gVzT#z_ioRi`{hZFMRR6(hrChzFQ@b!{p-4v(nI9HAJi@7djEzM+Bv8XL1RXg&MGdvQP-Lz*8L~75; zzsrP>dA>oTBYiSa9}$=Jsx(A7SFsW25N;ak{0YTUbCU6M+JZi>GDap|$_j3XIQrIN z7~5xtEK1r%HEBjEv(~H+_wcsvB@SVXMH^w1HAJ(C>P783arcYO&+9u&`vmE)>(OSR zjeR5P8p0sF_DtbcYAgDns##<^NnH3v9ul}Cm?a@foI{NM$~8yCwe)lI@~f0^t75+D zAPe5OIo7#$Y;JorNWlGHJKo8{7b-I=!=o&JW`B^l9p0z(n7<=G>H2vtl2gkLd8)6! zD^!2u08X4lMh>YvP^A=Zth?;fP289+BNc(Zb8%}9(ZiSedGGa%%Nkn9jQpe8rsi0DM3MdUtnO z*BsrXgbfn>uAQ{9m2$xGE;S(xZ& z*hL0;VHh}_UtpavdnTtP<^U&(yk5$$VpMU?bhZ{E65)LL-N|#SOQzCGon8fC4X=%o zQkCoCQs+{7krK)8_8dkdna*De??A>e4H(8FvzsT?DNv_D2f9?|XSy7K+k& zdLKFZ+yZJ2k=H>n_$@4Pk^PvSg3s{gW{v3{;r%~s zy=7FD?H4{MjdYhtBb_23-6`D-A|>7B;YdhImwqkuN*+M7*!5He;4>*1l8^s}Ps zUFr*qET``W@&nWEXJQ`NDc_!p(9OrvB!^~tpZ*Covzcmmc-A@*_AcEsEM3SsbSIka zbjg&iezg`Lp4nyWXlL4Aqh9^CdC}`uK8L!G$dH+kU9S4U6@S_i3z2PP$0|nvJ(aUr{w>EM{x{|-6wUv=m_~&}$Mv}13m+qdapQ$slD66J}yd-WT6om)y2Fn+! zTBad)O=XS;U#2|EJ_R5_?`0#%$@B6nvZ=e98n<8WNqyHfJLohcsB)fm$FLW$6QPy2 zTBaf&5}DZ96j2I&fV_CwHAnp?p5je1WvPL=>j~^>RqWb)Hrn=!R4al4Jd5iimo%q7 z>e0k3i=4Wa-((gl$>f`N?hk=#_6p_GuH->N_hxA~o%f`CUY`e*lMfEyqkAGyRu#`; z|A-#WGxVbyFChs^??z(zbc#f^kAJ7ZYSxVVOQj=^9Hmeuco*P|Dr}YE_gJ@PN0TOh zch0=p2Ib;bGM!^(wEA~A;;l1+-tWJfoP)#HOb3|~2=std3qYZ60wxp`V0WMc5-YN_ zn2Rf&EbAb)-uEv3(c@6EbzOX`xAl$P<6LN&CaB?|T4sf{{c$PcBz#`H0p=t`$$@!R z!RV}S>HCL%xf+0W_@4!de4)D7I2V|rP@7OB%~wMMQe6k0AbjAAXLsYZ=jos>3N0~iL~ z(3+{1i2&c-iLKk7{z;N`CEeZongHJ#vz(){2ic?Ba)u%bkBupMp6O`oyEvG>o86H8 zvNFZ;vsXn62wHQYwYCXUQ%LS`n2N(WpIQ}a9eGsd=3`E^Q_14xTi%oHvg3(G z$Tu1v3Nt25rN;Ww-o`~H?_e#1EP-=^GT}>Z(^a# z`W3@@Nn1&;u%%SamA3bb)^K?}4)w#5>HPKC;W3lZJ9i=u*eJ5GSYE?{bk2VCtYtc+ zZeN;*XVfNqdpwd2zVNBTO_MTrYrdR5SWsf&5DKDH<9v^+UEQCU8&kWHsW~x-|1}4t zeztRizyJt{>1uQ(kwpOwH&xMjZ@R*ujj1f}`TGj`Xqj)8Twn80(G0|=-e4`sMrP5g zR21=!No6g|7I>DgY!7-BZT%#o!s(oV4{-?4ruQq|U*M+iSBz0rEzwL$Nm=duf(p#q zu>3#`_3IZhH8u6eICM#s(xf;*yc1(hTpi1m4|2@E!9K3={AZ`zcor!;MfzTYlN6;R-T zn*)&X0`zuuehGYAAcQ{L`bF{7_n&X$p;F{nCb3TQS{i4+PLXmP^?W&1M8-2w`CuDK zi=nCdIEqZ$(23~!QMxw=HThPDiXT7boPT%}r%ag|lo&C+DeqZU*3~m1x9Xa6# z>NIb`FP`14h$i?-Qpb@8ybLzXZWpAR^HYD+>qc$qc8^?%&vb*o@ zt&g0shTs0ArmrkTQWig z7|$5Te8kN!spY2cU~y)c@d6qEBiTTF-DZ`6`uL4?>Cr&PlC@f##%JYoNZSEDzxP{F zTJ$IU3S1kYew(XpaM{Lb&@aVyAqV)AoJpqM>#bH(L+_kNV9&?5%I z#8AnkV;<&-63>{&f=!7u7l+qH$u2P8&-fD|GG6z~=z@<}V2EMcZF$!G9w;RNSs&n3 z%Z)x{9{)tRNy%yl~$>p>+eSa zTtranlJ>%66cvSm9}MVBzzPH`e}z0#o6qxCVQ&~_=xa%iHr

{D*dQ@=L}xB91vm$aJW`)PL*7YC)*Q(nKBYE?w)$|oS4~QD?cfA!18S+6f2q`_ow{3ew-TPVSU7tIeg6XX zz5>U|R_fW^mc~iG(b7Hn`N_S~UEQah2qCZcf#jG>1_gthr(uW3H<+!x&K9)mf{E8O zz8VC>E$wrQ_XLZNv59YmEwG_`sGn8}*-n44cXRhU(<@cnbhuxQLsiUHrqVWI(_7MG zdv7KWB=#o?hKxSkT|)=z+U`-xeByF;e|3G@E|QkDxcFrmWe&?CBq6r-Ef@a>Q`O+G zq4`b9$nixX*0P-N_6Q`})Nwle_LsV_N5mx*YhzdX_Uc?IJVy2ldo$-nKN0!;1a##V zdQ0u|5bx6PVYsvxX7420?ST1rM8pUavY&VpL$@V-P3yBcmA)AXWgl7#TU_>> z>$oC+z*bt|7s+Gt+t54;UH$%W3LRR6?&=r{(fw{ZT(4@oXE$83ZeWt&{#nD^=mu=vo;uHq+eC)(fr>Jp_$IQ3 zNPiv!3CmFJ_W`DtAA%LxULRKxde(Ty&D}u#`8i*OI2`dr1XUeT5*l5xmfi11UbXxE z;6rN5^b7D!yK$Cmzz;Zv+f}%0V`8}@%}f2T92u9tS_Y+4qP1%@!v42m>6?a51wh>h zb`}ssfWIu!p*>k3u(OLW6EtOZxQHRU7|@Z0nfw#~S^`fp(hfY{Il>; zW=%X6c*IFG(*dXlP>G=+faX&S8So8(Cjjt{7=9j?lD6rIhw`Q8Eu=;l0r)-rZ&q^^ zm^RJ?e(gCn=$V{ zOrD*ZF4NbDt(BBdkz7qLqS}oNua3Y_o|>MnX+0XbhMANiM=t*B?ZR}Yy1yG&X;rh& zWJ#^fw8A#-)m=A*?sKaLe_I2&8G3*JijhYIU-)-c=qzn$*4|665#+)kXi6`6a`s6T zvAN;>tHoR4-nZ_&;`Z*}%U>P;kn9h8yTO*ee$sIIE|CTU<2oG@l*ojO7wLi>g6?n@ z8|k=p<#SW_`eOsvP-iZt$2nbo($)6c9w&{xemrO4>&Q>`H9-qGFfvix6P?VciNZ@K zNs(8Uoub0mT#-KO@}D-wrMJ<`+9>OldVkXf@{FAAjvS4zKWBa1qww!)aiKL0;KD4c zl;aSVy_mQDe*cU1(`|mkBM&r8X(1tpE)S0E;$y`6W!ieGGt<|$dmK}8yeN_N`AAPt zfKKOgKMxsc8@g(W{iA(*zeHfJ-Fn7Y*2>p+y}7V%GyJn8zOU_yu({mh`+Csf+7%tS z8Q1eXM2Q>Zd?lw7hq!HLLSjJ9KknMuWo~`j_2HbiH{0>1vChK~VgOIXA?B2}a&6(c zjfq-(=Mm?kge*?LFrvVIKgo7=^d7-`i<0cFW^s8SU9dmX1rLrezw+0-p76%}7lcYe z#c2LF;)NS4!>vCX$Wb3EZ&0Z!y5^6!=VWx=CM-ZwFgFI)e4HXst1#T&jLe5Y2QfFj zc_B;iaMFh>nZ1{|Fx6WW+x$WUgpZyHo#~%ac-Wc;whsx)Ef$w+?jbpytZ{_eY0bYH zrhGbF8X_NVg?d-pZu#fki_;ghH>rQoZSgq@-yRAKbtFPN|9C@xRzc5T64PE}@n9Hw zMr`vKEzi$LHbbNvF6-L5qy33h_cchKuXMRPjT}C{ZneB8W^3l^$<=E8`MTyg>&uts zM(zPoT6vZ19ak^P93Py}-H#~Ku4WI9*YYEr*k8^p@h(brcE1Z}zgmNK5$3;#i^|xND#KE1; zg2yVA##DzRjiCvN^&P~CNkjh{d%O%1X8&Mxhz*No&?|!xL(~uyl*aqcU7#8jCJ!RdUI-5@7Uf^1{S%u2}Ou-r$}(P4-)8@rHh1DM1`$sd2R_l@km>`v54{{0&b z-C%t5OJL!)#5-IE_BWz~XdG>vBOgoNY0UolN@7+ercqVEODRBe9FIlWuIMs~uN0#U zAeO4YR#S;O{Dc4ahU<{L#o^DTrO%ymvWJMt( zr5u-{5Jitmg9b+BU|{i82>9=Y{H#5lb9)-ENf~f$H!3c@YT!~H-;xz)#UN%I5NU=5jPrSnJFlq-;A9P#E~T5&A_kAN0ZdEyEi1 z>6G?K>P=7U4!sMQS`Zi+M1>AbhhY!O5RGFY&t<7b2jB?Ocw;)Q#0D`aL{{-$aOU?K zZhp#2u^id|#L6LOa$YK-K>PnRlFN+nLevHQRizL6DklM;0vQyR1I=%xExh~&hoasmH%LK0eXZlbbq!=Do4CUawQ9=bHWDkCx5)h+w3`sLq&M@ zgV1m#^^Wao>x+WeZZXxwdzcsg4$O-5cwz*V#ouo*Mjp4Dz6%Qvf9&Y08W?Ol;b}hW zFd@)v4fQ-x;jHi)`#huYVF_2@ELS?oVR7wm1&_ySmajD(;Dm8R{PjkXzAdP8M|zGv zXd-pElUp|Me!0uy>Us{g5N_;erT*77id#j2UoGkPRFU@^TJ?_(e%dn6xE0^_o`pe& zP}vMu?&VL_H3(vx79?p%j9m~uGya?EWTwX;UU(?FPj?0GXCUT6jQQDG@80rsr~TX(d<7<8pqWxrq2% zUWPe7R;FPLT%Getm6F4u@Lk#Up#S{`FWsQKqXgC%$Z6Bv(c*oa*+8bbyg+`S1zVMJ z!LsLOlBadTq2UFO;*_h{b`y~Yd$+H5P{^U&*t2g2Ru_WD{GV?0u~{A%4?i7YTeO}N z`6KM=SJ~?M z4Th&AqAuPcSg_+9CceLi5|B)vdYB2C_8^O+s*Ll#!X%L=~`f(t5(wWa@yJ+1H;jt`bYhj!quMpo<})?zqFB) zS5kJTUeB^w>$~Gs7@2sNyA{>)m+%N0g(VxNER1z4?3P&Tif~PgXDtY~H^P6}sO4e3 z{gdxn9CqPjuNO@5>r=~y)qazqjjZPX;R3*N8ybZT@q5qp-I%`SoK6nEX$z1019xIl zMeFCq!zyZO?#=HHA>Ip#a7EU(cG)dJd{{Z}_LXXgsHr{*Lz2JM@NU;V5s_!o^ATxb z*zetHgu?AcQW3vHpPe;ye?4O|i>O-~;`Z@F*w6+;6D4wKLG{QJA7S}j_3vlW56f*C zhij1UpB|6|F1w5zOqhG$SNN@e3y8VT!6aL0PmOfg^sjTiRjI#oUt9{W8$KEB7GU~m z#vEkA_?;{yEZ$PFu@hfHH2h zu6CKb9iH;?fYml5h2h4Yzo*^950ZXG>$9IOEW6mS>3Nx7=Cm_3{?*_+O`Up{?(5&H zaA#lB*0vz&t09YCp-l%QX7r=_>0uUA1%oVV`ns9=vGtjFg@({Hmry*d z-%|@a+93iD-bSk-Mqa_u1`RGh{2l$8+AfL&ZX5!>v|Z~NdY*G7R+`CW1ueh@y>)n0 z6F8p8_gKoDBeEs_-0`zY9xs1GJ2}1I9_t;5HYChU~p-(hWq0cee zC`BabfkfwXZT)GZFsT0aqN8LHUC8ZQw$cM)5_J&Kb!^*`MOo@bm%L{JBbn49CC2={ zCz;0%Lz>5qiek=?0uC=)M#?DkDAM!hK)QZ&!oqVS1=O7lvX!U!jJh$UUDCWKM`{8W z*6Ev>y-J*oyGAT1?KF8B${#RccjxPoeTcWi9~+n22D zZ=1xdJc;Q9#51+Kom8SL_$8JiIip~8g?`|#6RbjUZu>ag?kmRi@7ke@5o|tYB)=c`5*C6aZZfZ(nU8c}MhCUyib9L zU!i7^3+Op-*_hX>KMzR^T zz5^o<&}io701O9IW58hq^p@Ty^X;w%poIieb7lfigna-B1;Cdabe0Kbz|0O*ZZb4} zc~<`bA<&2di&9`J{-lhPp}AP&X6%;&wc8WAtDs<_Ljbzmrly%TsPMMSO&%XllZdw1;70XDf*b#1sgYkB3=TZ3$_O4-mQ`$BdHDWwMD(#mL+mq%ykMo7-v@}` zdEV%}Nw}MVXg;K6qSc`7Ly@8_a^4#ydm{t(GRRE0%nV5els5II+#tL5qSjSA(vrVE zc|eBv%<wDAP#M z5K!VETAPJ9%>9UlpX^HpjM#I7-QBSx zNr-ee7vslV%L(Xo7(f1!i8*+n@hTC?)y`5aeLrDCI`uQQvux@sRs?N`J6(vB_`~1( zsgGS%{py=}I?U65Z0yI~++K-n5f!4fv^>Y5MD=z}Xt>_Fx#5a+s0{kk{;Us+i(AEO zNt2$LSy`JwrACn5l;-9lTBaxD3pYPpqoO?9=|NrF@Z%|xkp=U^z}-&U&Ea)F^qL-S zU?8p^pIL7uGc(CVY?S8s0Ix#(oASU^6Iq&C?)Oylh7Z^K_H-`4O*1EP zvI4RnzJN#RL2-s<&LzqmPZb@C%dHz*&mZgIhEP;3XVM^3^UvU(U^q85FkwRS=n6T%i^rsY@5A7LJxiq+=|;}q5daasgP1Bnjn8;8fA z*|Uf$$ZmofpQqeJ{M~P)-ZoA|jnld}?RC@F$D|?Y{LL|hoT>d?hD$=bj)A2_xn#0@ z?`AV3I*Wf?zLLr3=O|O0HTBV9Yb)lCta+%Db(OEc!{YZQjvuKJj}JQtZd4Wu26_K6 zYphm1$$&{GfK!1x3#b$_qCxA#UY?JOXuLnbY%ujaNDOd^yjcD3{dviFZ{{B%L7&>>38w{Q##gZ_ z?%0}m>&ak@3c^Q#vvNa2L(i5t@H7PB7+~nR>`)J>5Tqt@6#)eCE($ypY~cAC)CG_! zxm;ChD+=IAD@6dRuH0N&P~(VQQ8R)wz{Nm%yjl>#nnRzqS<%=C3>JZ@Y?|&+o0=x@ zDM@-42x!oq%jJ9#pc}R0WbtLxLvbrev_ugLtZiS}pi(g7L-^_9^2LZY)xU0vji!T~ zBSYMCs~OEkztz(_w>vWWV`?+)i-|>2D64<4v+R-5K>uXujW0joRC&+N%@i`xlsg)G z7lq$&{uUDUsM%IM@5)k}7&^c1bq^AAO^qyV?-_Vw{Z?vSx6OsmjTT1~BH@4G`7*C2 zDpilhQkPqM9nq|g%F;dm!e#c=dXM9=G%@|sBD(iInHr$9F~NIrNeU!WEi4Mzw`gaD z9bAO)iv*}j+^UgIi=~tX{zmV-5BkM5WqpPddO0@1Wn~SoORs2iHO-AUk`!?H(&U5D z?U@2H`d8jQU-0K5sY%|Fa!>;EZ%A4ZuOb&0wbc8wT9$>E&8Rns^|j*Sy}@qLU)`eL z)r~Y^B>NI5Mw7f7{0wYof}B2s;WS5{`f7j7X&>3m(!?s zKW<6Ev1kkM0BzbKq zW?+c`t|mi6sVP@r2)^t<)TNvlAJ@|NZ|V<9YCut|(3TkI0`A&i@qu^)0EOB2jgcfB)a@4%k)^WP+h4L@%H^)zS!} z&}HRs9$boyX;j2IlzBnH@0Fc9Dh>4Ys05Ir=A~0|Ov24-Y$SkJ0K+k-KmbgaJ!KUWX+S z>Z=FyOe;My0CV*rM&Yp3&qBn)~faPZ?O~37Bi;K&r}`@8WOu zCLf>g#5{W++qnDs!)n_;up&&}CB*uG;KJuZQ!J0X)x7v&{A*3882pQ!5o<#rJKim^ zOvbf@hPO4|s8QZ&xZ6A;j`NIp&RN=eWRw_{rcIYa9(-_KZZb5*vUi$^S^&X~p zIbcZ%V&KS^DfWP_kFK(wwJm4YH0tzt*tpb}5Oa9AE4GjJp zZxpP7xfkWu?rG4$xj^K^#6*upp$X9wq-L9;%?v6&Fvth^+Z(^IN(+1x1mH>lax=PX zZJz;qu-nV0InYpI?MK<(w;pcs#~}Yevof_S+Ph_nr|Mjy3KabyA8UVjtYl zR2+AU|21NAELO5Krgj@l>y7RlU9t5?I@QTA`KbN6ic7j|1PI}v-5eQI@VwRL0JB?4 zVL|NDY)(s=jKY(oOGgY8@uc7vPpGUZ)FvC>X8+vLUsbn-F&3g)mw zy{+u7)4B$pzryunyZylIq^t_tMD;u6Y@4)&1bQ6HYU$FKtUh9*yNzWhB`So~=g9;M z63)WhGb?Y7q9L=nb|T~{;1=C zX|vtMi09Ke3`lUB%CGw~aVPfK*x&eypfWck%P_xS*gRy|eNuk_M*d#;9 z)_=c=Z%(yOqv1UvBmXcW<5jxs|F$7>#{rTeQ@YdvJ=jlAw{~&QCB_2Om(+VOGg#kLK7#SMAPa)o?ILQ20R&HAOOi6KZ|2q#<2rVCZDCVyf2?o5N|~Jx%uryNm@q1P z+4P{aJTmKa`oe5(j*+K~nw|_zgyK1lm7i@)F}kU;QHJ=JeTd8J`sXR1)EtlC=zr|L ze%fE_1j`y)6(^g-FO0IHyn`YY6;uH?Ce?If??v+|^BOh7Sg&HtL3k{(J8#I7p1_>s z<>LpnIK5=!OI&k-yHP|r5xjUVFyvQ|_OlQ_7TCg;0a{N$Pc)m7WdK1qU~tO{I@_8Z zXbaehPaWp3XzFiyGsp5HEjjn{2TV0uxFp9i0W!SrXvlgii;7E!)ruSAa&j2kCp$Q4 z@yEL+1@!MI#Oj^yLn~wdU6G(HPh>UdthEim0tqDGek6W!4%g}n0TA5_2HJnYcfG)5 z3~bey=dY8{o{mY7yTj%tk0k~|X+SVZOckI)kz#-#p@>!B=a8ukWEoSbP(T)1@ie41 zd5YHpE>3w?AXeq#a8U^gZIdHu6)V0gG?~n>8W`ej4&kpVtiM0CiQC0$Y?lSIeomA(l+__U2q2GR2lzo zA%jWsInh6Jv#k+DbWzoLXMU?s9FDx3LUNL~cr@hC^#h8x)4d8GGiH(h_p zFP)_K+Y^uNb&q=n!$$p;_0@yTE{;DeJU3IS^gmsD(~S3C4+`HdUJZ&Py(PJjj?)02 zH3KZZL-8j8S-U}GcQTOk#EoTsUlVDaxzGjfSB$z)=1 z(=s$N)i>r)8ITGv5B(f7_)#j^pqodL7(7G*76gq*WrJ}%nDa-hX6$-V zXVCH1O>dEktJs08mv+z}0V(KoEjpNxt+o5Y0SYZJ`JcmidUOi%H^;rA#T9W6gcd$m z56P~g!Razc?VE<-59^n_QrsXJ( z*RLB6tTE$DJhte+F)bCyNgp+FnS_1Un`n@)K`e>66D z?Y5QhDKy1wxY`C?u=%{WuMf!>jPJMA(PM`=|9xR=fA8?o+8;Xj1_#bE-G~mM)l*al-2I7hMd3#t>S03pyOtLU!Q8B$N)Q_E__UpMcEdsXHL~JKo`-DyKhX zE8qw3M-i;_)ccA|@f(qj0ZOwgRVEgs&`#>Ytbl}@);0m5&d+Rax>S6~nLbXQc`_j+ z2t6i*rm@ssyDV!jZVAFKD5wCCHO(T*f5;k`5`Zi*AdXdPd%Dq^{P=f40N`sL9_fFC zP7v`4WQ{I-5H%WF1Q@9yU#X$=uon=iohRv)wEzI(o`#@A$ip0d(a6ao;_9%zPT4XH zIyyQg|0KAPPtgPC=b$VH(SP9FDO^>1I%RRcZ^#e}*rOXF3SW8rmEc1)- zu=Dt5)B9ZPVQu5Q^w5207fnv~j!?8APwfwZLIaYPnXSQ;wx~fi1dLiax6a(oA?w22 z^v;s0gfYdm7{xVxn12?7Z4Nvrcw>^Kl?s$BY2hiNRJn$1O5VlptL@0LEA;xuJ=`UT zw+Pw(oN`L_EB&~`Nqk3rFEoAO%D5|Y*TkG~ z9Q%(cyRK<=L6-CA{P_Cu=Wk9XNpFKJrcRMC7Y>I<gvyH#~V`v$57b66?Xh&s991 zDd~N=utHz?H`bCB4Z@?3tO+a?q>))_=6wdQh`Bl5#cDH8swQUbRa1Q9Ng3hIkG&?9F(MOdUNF5;zg0OlU1XPTC<#gVwi9R>U2<%1N= zLvkKqDg~NE$* zgNP2WrbuMl|DET3ZQQER1wPcH%@Y1M(IsYchn8gU_lB;|ynkaf9BUjH-!#^8zo~w^ z?JWM1=W9YJtq;cxUqgpccy}z&u*f4*zSwjU`!mNBV8mS@|L3;2YX{0%VfH`XOxx|mJpw^;36m`81s zkm_r{)Xn?Hy%x_n<|)YHWkZdNmVH%=j@QP}962{E4U8`ygB%TQ43$+(R)OhENRD=> zDFdB!1nyi#L=|JNoFZwjV%F-fTAP=8qbv00gW9OF99cA_6e6GDMfdm@IByrm^;}sl zHzM^k4E~OvvfIP?2HnEkxK0@OxEJoE?ff`Ae2C*)I*7`^5MGkId=NS4pftcmsbs); z?{R=}kWZRmS145#u3fp+IceAWAlYAAhZ$Z9YJ@{zZo zR||zBNwA9C$~3oKha>^5*j6xH;c7d(l~0K(wQ7vPMDDa6<<3lZf+Sn{mLvg-a1L>; zRS#KBMxZ*;rRV9NgEi!@!P z=|h7ef_jhNmxbG9_WLx3FJ%XTzPhTAo2|U~&kR>XE$+P~vKG~Tg7UrPb4Jfx(%H)o zikN+ZaayUdMIxEmf@s<}vgxIkh8Bk2gc!rt!g8O<^jfvDOIObYFcpn`6f5_cE*y_e zU&}mJ!9?K?)?|*H+VH%Hhs6j(&#>LW>3Tc6P=G;4H$aae+iH@d5av?cxcI(6p06xD zip{#-v0LOtOMg~_nl1Q*pB4}Me5Zu%-32R?o?w7kT{Sc@5P3eX(>#ml$xWa{J6nw=Rtae1BR0+9lG5MyXQ4*GGs0=?$y( zs3{{3DeW4rnDKMevNeuZc%~gR7gAq-vtb#pUWU4OOY#X?2EiB@4j1tQrc!5;{9OUq}sFvlAJn)8}rvHR-p0N{Ww@t^JmiErA9PHfKWi-g|ORzQ30;mEko3+ugSP395$G7XJU?0<_&A>#?NMdtSH_0dUo@ zIk6$~G4jq{*fq2+l^&CjgJ>bD+p_nXb=GRh%L5{O&rO78|7n)3W0)uNn5aM`zYy8) z<(~_<;A8Lb@z3C_+m>NChNJ`0ZdGfmV%;2|oC9AJN5^PDtq>R)AZu++lSxI$l948~ ztZS;CvjvAnYN=B1Ayuv+u_7{kBZdl|i5>x%Z}LW9;v>T%Q#!mP#5`)|BtTgCO8TiFYCOgjeJ~{W4~LGIU-w z&SJ~(IEJ#S-;xvFjMfMY8ZPBt3pBeJCH{d^x&ErpCl`s|-SNP526>gcC5iplF38+8 zz2|^6)h$T72ahySuAx)g_B#ll&sELa@Y(fe1WSL}HMU~i31Cbo10r(N2lWw@(jS$=U%B^ zEs79gAd)U>YBJxSdvS)|Jo4>BX1$JtYv=htc2%uQ=t$239C{p`Kj#H z+IZ0~2GTiELY+oQT9s?}>4XhzbX@iMod9@J~xiODn;;ubKfhT#arnScw0s z{gNumU5m72yJ>0_a06Yjlgj4{c}w9{!@p z9l-PM^z;+}_o|ErhqH`N58I<9yPn&3D?{0fD?4DnaM-gxjYydnwRpZJN^D@cD$Ye5 zCW|dzOGj5o^l&p374e5fq5n1AY3{LoS`JH$5t*ygMWzAxwy~vc{ru?(( zNtQ7_Ly&)=1{4;6+s38**8sIGbuOw6a{$vZ~G!sAs z?uS4g)FA!NGtZhkQiz_|l)PO!_+$<3w&N*Dd@F_a?)T9>1xuHv2s5n@TSmo{v8U&k_*`0WQPh?fXnZ715$FMa53j3U zgUr;|LT_YCG=DP1JIuo-6lwX7@X=NO(tAv-O|V0YkQ;MQCh54h-%@GKHFG>qT&QxkWTnc5t)*gVnO#HecI*J&wE@pjQ$1~~E{xG$yOu9IVjZhh?vZ78~0_Q>VC!f6b2M*nxXHD5b?ta?Y< zmdbBoLpI|ZZv0j5n)h-6^7S~>j1&>Aj;nB71gXGjf927-!~(2QEh#sXP0Bx$QyKST z4tDAH`7RG@{QoryjmmP^fs~1CM#EHS^tqMDaDYPNGenA*C4;)#$^HtB@+)2io&<7S z`7`;T5&{$x*|;c*vseM*-s(98-P_yhWrDnQ;+k0HOwjanHMse!xG-G33Nas7Me6evVL&s!W^dF+W)t1feU)oy;8zy%}GE;v%wc0jk z)KikYLV=W7Om9<^B=Mxo?j{aSZQJ*9=8f-{S5*yxviJ7(R>CBJ#YUj+7hsKkOG`8N zG4WNiP^@yeg-U0OlcP^i0~_OHGcTkcx8!0 zGjue6N`v~QSIAq7f`*;Qn~Hx!XZFZPRoBWQ?(Qx)+G3skz6$fKd5aIiuevP*w|#_k zOLe2jjHVRXulhx%m+yykwsgL_(`-1!Zm+|{i*6XB3>8w{jVQ$zr6gbe)$;$L*d&Jw1UP)x7BFgDET|MgV5`za5AJSBi8q{utScvc{TX`rSi+}O+v3I5y{|xTa zYwIy`;?BGT0duwc=7z8JkUAhkow1_6UZ%w`Uz?dsRtDPAU6C_)ZdJoqtt)h^AX`+j zB}E{(jP)Fefi$jMIYA6hEZ~J7?RnR08kI8HwL{#C*#J(eN>hp+RhU||3he35o?5&F zy>8c+D0VP)ODnvJwPHu{Sf6w8N@MWMzN#uqtT4!82W71|T!6L+9mA1a2OlD*9I-c>JkU1LY$q)k@vel;y$- zR8@R#Df2PyZr?JV~z7(=E)+H{LkHf`bc`ibyOxLK!^}jkrmA%wKM5 zj*jL^==c(i}%x_nw`s^Il#IRlqj~`+plP885%wy8;Th58QAn_ z>fO`tp8msY_wy$suQqaknXjAy=~DZrjX)mWu_9WdTYA{5?98pQxO97y^0MnmjKPC9 z(Gv>!ENx6@!c+=a6me;@#e)T2eIXel>9-7WdN00LePDyglT7xi(3!hB1*r#r%7MAJ zpVjoTdR6&dw33Y8K=Uj`?gVoZH@Mh$89N6VM~^3LejYQHHvcqjpy|fQB#;R^`z7Mn zr=tG9d-P~SD%ij6C3+wa9F8T-YI_Z9q1nIsE1d45*skaF1K-tdSA+iw%_yLe%%$hYz#qugXI|zzz_uYq(~>6b)d}!DKX(Q7 zm>4oRY(PZ_me`Mpo&Ouoa3mf7Sfj0{X9|I&+SP^qvw#DxjNejI0lOPbtI0}+;kn;u z!?K{mTbCvSuNFfBS_)SREU^z|cZ(+fg}a&)XGF7fJ=*4Zu2V?Ci$2U9iO1IXF6iy&k9$J<@pe zhn_x$Isrzy_KC`Q_3uSxHW*EpDN_?(w1SV(iE zqKkVSeo4rB=;3Vp-ld^($-C}sLw~;D%U!A<=a^-ZBIT>+al3wdJIa4R5F+suX8x(@+(s0^X0$hz6tcK%FI!+UbF?`j(v5!sNB*1bM*rs^A*l{=9NxjF@0bF!QZ1PD(AjR7fh3f=v8{ldZXj4@~+ z3DJX2{IGZ9OY#jXGL&%`$HRo@pz41E# zF3*d`1q_=_L*UJY(GK8|TS)T2;?`&RhogbQ3gBp9af1HfseluL+~k$2m*!BRQ2Vjx zY$8?93C@;~@iz(0HT_>yy=7PxT+p^nOUfpsk&tc$k#3~BySrPuHjRXobhmVuC?Fs$ zh;(;%*SEOe=Z$awz#krRD6TbY=9+8H(R25Jzr1{PXBXveyViymXcK7nh9trfvFEKw zuO?!IozAq#iYBTAGD%-oqpfXjb%B^T92ULqT<(5y$*b~cHS1`#K9HP&>C87g=mCDn zrL1(MIpBlK5-FYA-~6HXlN)-KX3x8q`k}<(iC^rS1~)fpKqqoQpb6OQs;ht8-?uvv zfa5SFyO?^Mq@hm{V*-T$`2*amzF0DUIcVPv9h@*^%(`KEZqCHRBL&!hi9RD`%7>97 zzjN|q*!Liz#o5|=Dq<`I!wxz(g0D`BV_bf=jdvgHZ*_|Kc|TXbLlSK@?zfJKcfCD9;_<~}Yqz9<~bF{ z-jLRjFwBJtakI#m4qcz4qZNyK<(gK_;k|mHTOMG-JKH+$eDuBD`~14j=ygOO;eZ8|)D1_H?B8woz@7|A{Wo7Q+1@!h zqUT4ENPB%#40bm)mWY4t@CUy;YMuWBsZ%ZUtA`2Yov>al;`LpcyMnnE=-*2Dfu}hI zg}s8go6^zPH*zCe7I+EuMsqvfOP4t(9Q-L9LTUbPovxLExq+K^g_1GimEjQ_jwUw zT_MmWo&}ZK+MuI5xn#^$ zK1YLW7c@;#kA~1JIrr0@gsCk+uR0e(foh0P6=PyDhuZgrt8pBtY9S$AFMsabG>fTr zF!T+m$g0y2R>i24|E2RRlpXVUuNhE?kgTkUt!vvm@ZOx({A(#`zAl47!`PT%Gd6g3 z8yep0``wszv5C=(7b>q@pR8qYIRzP?Xa|=)U(LrDEwGl|(=(MW66Tp^1@g$-b1U1s zO&+GLUzepnIv4CggzE+;9H__7rEEikc-_oU22$d9Jd*8PuiL^9A-NkRSdpT|6UY6c zpdcPmIV|veqt$*>_1L;#3bI46%0&~IH&nUTiW~Lon*mbyXJdc=?2QdGAaX`0YMZN- zs)F@jOyE*)&Vpi&U~%2l*blLEE^|_bMn|j#4aqs-6-&6cqxw;~> z#XmK+I0$g0R(7$fX(uNU$}vIE{H^YP?L~T7Pfvf&SYC;%UCDIOoCr54jXQ#e8hF5! zIABIYTf9v=g;gCHGn}?nv%v#WaMxMAE`7|>G+W+u+$ie(AklScQ*oyu?vF_f`lZl9 zTU=?uZ$r>K(KUrz?+qCbgJzwvm#eR=zW z$|er66A^ z5MruJTd8ASl_Mk@&$GN`iHyc)jMlLETlT)gd0y_TM0Yx7A#R0w6gWr!rl#1#0QS-e z#8*(~d^m6cDz?4vDp&87FDQ|IfU_$MA%_|QMk45m>|S`$G$KZztU+~N;OM^(O%VILe*=T1G16SHFh;xAawwkQ(ZBA8K7aPD>fgTI^ zel<03pEKl1YgLA%a%Z|BUbKRxKQRAU(7o8nRBcWUf&}*PQqL}l0>W|_aViGz3I==kq(xnlc3bp z%cGDHnO{#*76IQOgh|NFAuM6L_{5Ej%m*AWrY;4fNT!{F(pHFGlx0m!>BfJ+Uy=dR!;rmEpu1z88b;ju~)`&w}pV?>i;`)N?s>xqg^gYn~XR#H>OljnRjka-}8T zF2@eK|0nHzPP=m6VCe20t)<^V=MC9>j%NIcE? z(}r$tNg&CD1SyyHfP~M*#pSPcLplvhq={7VT3yqWu?M&VF{U;j%)KRQIu2^vs-|OK zF&q7$RC-dV70IK)v=K$}rUWxdQzIep|i#*X%42F)^_#59xo#1%O)n zvA@4B_YfoX>k^m<$#W*HYnqz+KoKeRq0Zu+mNX)g#Nr)58V zA-8zW+N{4xm=ZV{-(}W(e|7#KV4zeub*5oPP-kIYx;p276>g(r z)hfBaTSIZ~$D@IYW;NHNk7e`iqc}ZHVnMO72z7jx#4;Xore07ZRk-nGI&;szmzT;( z12TJ((mjD;;pJReAu6hxzY`?lIe4Ru9epH3fkyclhI#PW$Vg^4VoB)_mr7NyRPA?V}@pDf%RT&TZ!1pC0A`)w+dZrzq09 ze}sWm@Cp?_rD?|c#d53WJ2CCohtniy7DvmMYiqK}dbc%;)D@FkS}n(Lqm{SUMv`ND zSPYnDTc3_z9By7W2hZ!6Y!q62GFvE~Ktlw;k?p8sIsqg z@wr#u#DTXj7}0vn3X7<66#CnP+}cCf2`__T`c-u#d5jn11siTZ#GMSf5cbC-SB89l zp0UHMTsj+U99MVd`Sjztv@qexNrCzgp*=k{6G)`q=x$f(We^A%uhIZ zKJLwV;2;Ns*5}*-5dD9Lpy40;l2IW1uQ?+5OdgPve-Q643h|E2!5^Ep0Yf?6yV_9= z2b9FTENyZr@>7MR(zEtrZMc8VsA6nPULm-1_Ylo>mmaQp;Tiq;$bpzDpaBV{4h!veu=oButeU8d@B7U zhz#em)OYnT6hZFB0CT{>#>v?WjPPKh8~6$At{g^5liKSHtpz*&m4TPEd@}GIQg;KS z?=4Wm!8p^ufB%pwF=EIBM}V-rar#+b^EpdIcM!vcY(N*`HONb)HIOqCG!9-Kn8R5a zetP8?GBP@VeESkcDs9&xNb(J00ZIUjlpsH|pdu#o{y%y6StB7z#XMHH7ad>x+%@!a z?-Gs-L^A#=<=mAE86`-U)L9Ky9v~=NeF&MxhE|F;>59q8Dn^=^qap@bC+QbTtLhiG z%;rq(W6=wj6W6xP^!m9cWEoIfYGAG4{xS*bf`;g+KlE#gx>Rks&>i%n{~Lcp$mf@q zuQ}yQEFY9VdB|=YEp?g%fr-@p8{p^vPDh&7tm~Y_OT(f+ms{;dLs@~X%Zj|Abi9>E zm})eVNodUD$M4oSS2dw04p;J#OX6wEzT-XT>RCzTd3F5!JBS;}9AgWrKjT6E=;Bf5O82G8^ep=ER8TzaQ z(Qr=IfHckM`9|QszN5i>oYA=6{Dntq%hcCJQ-qICAowJ|=y#Oaz85{bse*YnWO5OTy%Z8|Md)=_qq7TE8HJHX9_~ za7)1_f>^$Lw=AC)(Yl4gxp@-@@34bUkuHMgC<4bTn5Q)!*%q@n)A_|0a^eAxpT9Ma z^GE(6D25GQQ7?AJ>pcuD^v4)3ULHTnY1iuM;Z>YC5|{0GQRWe#_4V%i|Mmj-W_A(2 z3hs=V^(06e4O9s}rR?HII&AjiHt`bo-w~aLd6n;#9{2tJ)%0W9;pk^aB1P<3)~-u< zU@-YR@_}L=c1AS!N4i9GryQ^1a~p_cr>qHpx&N1UN17f$ipD#6c z`4I5%F@1QL|DXD$roMe$>xYql-(~7l$fKFz2J_aCTuHKjL^-0ptfWUdVTl@X#U{8%cG1F)C+UDj@6aV$m{uP1P+TK0{ zc6TWiOlbh62FTMcm-{p2F+DEK2*N)=A`g;nTRXcNP`Dl*vVce>UmOgf#5qkFGy@6J zyS_lMZK6~a*i)8U?P<;x5br=vgTQmDxCD3+V(ju)O# z|4m0kg0LDp(BE8Tn}@!J5+}yOS0m_NcfE~Jrh_2#iA$1rfQzx;@h9`zifG zWx4vV#)eSCeRLr@-ayPwV5?VqSf1;nu+8l?EP22Bg2u!C{7n@GU3>Z>C=aIl=C$27jFzn|gQ~$iF zn6aU%Klz6~Hn!_;!|)eK#3NS+hBLwwMhhgTW)uUke)B7*Q;!+N**?jv$pRyzPsI*fF)gTSbH!85{lQOzODowvC)sCv z3jBjO6;ClwH?&*Fou^jcPq&FMH3a%cpBB8i*|iFoc|5@~yQkB&47Xg@X$4tA;Wb{L6N3kKzdQEWQ)V|-|BKFBGl7drmHGF2 zLLqnc*>|LmJ5|1EOE;aI*B32EYZ8yQtS9W9jxkLG5}!6k9|;97jy*ozcM)CLA>H>I zm(7J+>0c9JKCL($4|ZOXuHN~D#@_e%|5&;|ULdJLW`ZLc^^Xq|yz;&u5WL>=dD!!h z%6^O$_+{|0o@pswBxl= zzsF*H%@VxoAi2vFvegfdBmUiF_lN9!52x7aeSWD1&E00TpQfqWJ(0&oF0g$C*R9#) z;gPy4ygBFIrdE)b#BRytXN4ZsZgtvJ;6b0qFO&weK$2Y?*Qp{!qzp1DyvzZ4W_rsYLP`}<@66=HOSV5V^Ntacj?2L}fLKtV|s zP$wkdrD(m>crbKA5wjNf&^{5c*ZL_dMO?QP&jN+wyg?EW!O=zY=(^k%0{am~zS6;m z?wNRdrvqoevt(k?Ra#09R=9z=;_szX=l_*pYq{GJ0&u<^?}&Wm67dZ%zpZDL$|HKB zl))8@A6XGf+0)YJt3KVzT3~F7u5!c`ei45yIguz$f&i7C9kY`{qbwI>3B>b`Dsb-P zj&G=t+%3{Vxv}f0ic>DO34@J@rZgqMMuPPldxH*Viv6ohx0H>aH!7c`y`*2>D7|-4 zfK-q?+L(tlyP|>=>bULNCL7kr)r*@T%O)To|A#RTx4(dvQYpCW+}GQ{=O9dHws z8y2+krJlozosLt5Q8K>&Bu&#nJjtJA^Nu@4OTD#9sKhk7#XcPNmA^Azh{zugsz3Y+ z87O0RR$+lFBL!Pn=W~C3leg*MUy#5v<&H`*mL{}gT(J+2tto7ZP;Yt1xcWJh)#Yj( z7Nh=L)}>+{8-cqO_UtTz_}ZFG2s)dAy~EOMpl5e_}Bp$~J6!8{%>&aUJ6EJ&AXdwgk_XM5)L$b1ls8RN{Kg z!ap0I?CIswgGctp3g%;9yG`TJ`SIFs$E0D6*UKLM7j-@>NhkwHNz+YBzvuiMv3+Gq zM1FUqjK@CMJadHE!27yop-Bsi*f_UdJshZpn_><8)<+;YkG4Y1%I zwadOoeC*;Kbpg9!OvKTsWmFFzWqj62S!*9xx2~tE_=ZJeE6-}~MpRnX?pgqu;kvKy z!-;%nrb_WGGe_1ny1})-;IM=5CAGTz6ME-$ir~+_#Vb7d(P$-i7^+3mqpe5#A^*#v zwfi;NM`r(X&)Cx;4*#_mMX&@0&B1Nu6HN0>eRuNRo!19~YfqkBl&Hk*=OLrYUpv!J zF#W$O_$_J4lQJqfGeMb_wT_ni1YGfB?fH5&$ovyW{cm9f9-9`$3pF=qQ&c zXCU#$)U1r*aC}dPQX7Wc=*AJGAs!f+)AYyxhM1>TXb(|!57Dv{MhPtu?=`w&Mo1|c zLh-=l!Ya7`OE&+ficpU4B7w9W9zN_Jh{kCq)h%I|H0IyZFqcFpuC)vvyrm# z67jR*pX5j(NhTd~G*q?d>PY?!W1(DArE}X1W2ke1UkP!an2{`tSJQ!XEzzZ;~0Y)(!w1@SxwgA;wdMEs2hdw5~+2w~BClU}u zLqsTCa$HGX4Aw&O$UMnk=@-Ymn2NNU1BEFl6oD4T6~j&o^f!*>zt0^((EhdM)B>JP z7Z-LHxU7tf0$}Ci<+aJfd~Rv1z(@zGdWjl@p)>e~KysA1TfckeDdwE7qS0$}F!i(M zR;oHzQG15D3eq|#?C-EpF<#K3nu?FBYsY^7%QKu{rL%oW6{%^7^-VbkK{8d0sGmZ0 zTrKIm;z?oLfw@OL{2L}=9xLjUuf}ga2Gwt*NUJ##(`P#utx(|rRM9x>Cd~p)ftb(1 z$hJ^Aa=UcLV&QehAac{r8U+$*@G?}0MA_HKtoL%+!>(`{>h%03LCyUk?d2!R^5&JH zRoj#dkJJNn{gOPL9Y(aMhZi+&XQ{W&65&iO{z zj}s{E1XB_zRPF8mJRBUE#Uq`wk}r7vjPoTYv};Xm-#aPZ=Aqp4cx}h>*6YWmFlD}T zCk5RtBOTH657;{_Qj~|)C$%`Mxj&?k1`DqGSiU28!egoXYT+%X?k__&l8|PJ>3QRa zmSWIeGO@AAR{tO}%d&XQcQJRg`d7!^BG-J!X0ln><-#oMaTWesJ!NY=+)99+(TOkF z#(gRenu$hdT-*vOl=z##1=XnFDHWsL!#q-X_y^yhUspbfyj!Hb?V^63mP? zgSyj9hOFO%@0MDA{ME#wou{4mjPLt?Pq4QjWo>b^dPvrufHD{E<$&8&u5@CZksj|@ z_H)hPnt^@9!;@lmCDp-TeUYpq+oS!A)aQv*Wx7D{jESkKYe?s4)AT_m`bhBiM&D!N zsLx?V`v$y!{MCX#>rt~ECGFovCux(RMSn8dJ%gglW9;-%zjW|6&2zt;*!Y&yYhdAj zgYx};Sf;(*pOD)Lvu>$H?2ey3G2};Y*9lE3*Mt4y-Jh%DgB0~o+oNPF3W8^SKKs7@ zA*nm4XWjbn9eP2NVb>F^0zFOHc98D}R9Ed+dh_b6?CO7)UUWG-^^Uq+={T$%I(*L0 znqFTG#ylC9a4^}+YUB2^P9xL%iojPr5nfyCyQTZ^BQwxLp zmmAyIZQcrJ$7c>}9Uj?hcZ*IRu7=jLY_!EQ#+f`8Jz{QH1w@bH&Gfyt-?DfdnZd4c zz}&vh(=R*QWm{qe`C^P7+!2@TJm#E1uuzoRyF{oV{;U12#r);JiiP4O3xOqz5oH~S zWla>)v_D9vM&R@FAjoJLd)HWWObb)vtBxbWtDFdJJYGK*O~!nP(nj;NM@1~8p8m9A zcfnP~@Lay`i6_B*b_*dE;)0+AOy!^p6o+#BKh6O@&o9cQt}2MO+WoXvcW?LFC{j2n z|1}vEZ`AIg%_isB+=(^nOKIX#*N%6UaynTH{Q4qMv%ZY$MK>vo2rIme6Gq`gfF&8l z!kM>BDUFC~hHrL}1A(JZ?(GrlDHAf)f>u&Bw)9pX`a;JT5%Ezs$7dZ)BAzw+ZXmCN zybw52fT#w>#Xfv^mfb-1*FE`st0dPmQCNYckE9QbnineP>Q_B)L@9MWtGbyuYW-X4 zWM}tzVL{Kw;ED6(8NEuMBn_M=%Y0yOY27v)3@{WNxbL{~q^^NH5OgS*u!Vt*CX*vF zAn^-bWO!aUmlo-z7Z~CJZ498<6A%-N(uaU~p91KflO$VvixjDEa&mHQiBW07z{y%4 zkmd_@Uw@CX2U;5vd;s}KM3%T0JC(1BmxWNuH?hWR5t=NG-@dr-nf8y+uBDn1JieTN z)2+?Kzb`L%bK|>EIsOv1l=R#Wy@!giiqh&f+Re)Epf4K9+j;%KXPFXX&6U0vfyciH>%)1mK*hg zdrAo&PU0KY-$mM)KTa4*g2=IR@-dM4ou)i7W%55qACtD2-!2Rg;!>R@@0)*E{=$)a zI%BEiUFjoe6K6_P-*MiIVOnf~6J#3lpsVjSRM*;KlAR=V1{2uz(eQH`Q_=&xDSP=6 z=K955!tPGeZY=wrxjOZfel7R6ZiTst?HfKe2e>bo2SUn^p(Pm8BBo0ak@!q9IA^)- zp1;bsHIi7!9ofWd!u5Y1ZydQq71q(G!o{kxK5bLHeYUaL1VzGYE8GhhJfs+`qZxF_ zYDnk7^!lwkoULT1Z+z$NefdZVQb-M&!!iBNAEzEhjaR8^?*dY`1iw@_YW@j>#?H*#eu|f z^#dDWj{$;@5}m^z{`usQlNSw5>j@5F-gjKFtCX{Ce)coZGnHNY17_O&FAyKsBb*|S z9*JeM9_cn}D3dOU7A(M@zLn_wZ9;6Jb$>m2Tz9qjd#-HxtLUdSdUZ!ML{#@!F72%u znF*iuY09JT-n%*Bv$clC$)(n*R^>m@hBdBFC&-o`{H@~ej{14gsit4bva)fjo6lR- z9#lCQ$&Cvjq6;1(!<$wGJMc>K_Bu39>n(M^;5!`wP{6B&(bS2hyu}+N1;Izp*Lt^a z4|)7EZj3y|J)(|R;eF1GgA2_g)+VJu?_u>NFFwo~nhJ>y*T))<)J+_NwZbGav)`TR zzsmG~Fo|DYo*3LSWj}Sj@!5zVxXSXr?ulLM{(W;AF8M-{Alr21fo${Az(1On%r|YT z^W^<&YoBn;=%2FI>d5=XAweoRko`9vSt~bPqbCV<7M>Q1M&!))I;y*M(Het^JaYF* zQQx2xc8Cd^dGHbLM3VLQ9iiVEekaIy&uP+uM_zw%d ztf_lH%<%AyVh0YYN_T_4PTFYhKS505Co+!gkm`sS8ggM$qf9IrUfS@JreL$48l-OG z2)fAUNh&KVdY#YpU{nmmloppQN4={eUUXoBdm~j|u4$*KJ5OId<7|n9$BZ`nD^A}i zyP4ck7r!q7x6k8*#^cyp4plrQXm6P=0@-V9&&Z9pXW?}oKF$CtzfuoF@=33?W?6uq ztsx`X+-W>Er4?2`@>1jR0(NdiIHrH38Jmv+@Ee0__hdDSm9p@W_rUWt$RAaT#Q z&=yKl2VpJU(THIDz}gd=GJJo1=vcs&i!ob1~Cmg$nujgajwD6 zq?G-@am04zgv$vw&xNkWX3h_%bgU%##V!mgU?J#eq~32zH(rd6U8Ly8=}Amu3} zP-9`^?+|jbh1=meClO}Cm6l$hb;_@#O?XQ?Hx&a{FR4$~@Hd%K$63(Gu<#DuSNG$_ z`b4P10?HS9d6|tgM1F<*xPll|R9_kxsu)D+F%LXQ&UqF@Th;+1V!m!ujVIDIIzlaX z0pZj{wwNzHD!%XCdoI(A;9qB3LDIdxKa7*vz9yjDOHpDV$K%*jnMfK~O|E}s2+5L& zFDj&ZGgx>L>&?@2lNBw%sLZ1=z(GP7f+s?sPVxFS`K|G8pFM@m zZMCJY7fZK~rWCnwA+uCbTcdTF)`q~FGP`Kr)-cek4SX9gAEmu8rlQxB(zDijZt5~h z8m+2W|4iH9xN>S~+pN6sExlrsmQE(SV@WaaOPx&Oj>Ht`-}r~=6lsD>ti$p8iD9(r zsJ8W8B^l0-D$ccwAyFDV}A60NNO{z4i>VUd?L*vu3#(`mD<#qKEZxA zgn^HEZqNyrWuoLNQm{W}&-g_mB{(|M__5=Mh^K6r#$b&*Mk!=6aGAk zqcw7I5h(~-B?F#f7cUPI4fR&w8XeJBhV#2Pd-UQoTGVAWnDV?VR@n?6gFx>Np&$_m z^`JWn?7=^-e4!A)F$9w^z-nHV+AUNymH%uW+^$^+pRs)ev*aG_dXv7(cY1A^e%@1R z5+yJq$bYcNe{fDOV-pjn^u)+u)3RrKrlzJy3H)B~7^PDAGSuTPP}Aop?}F_S`?>A& z+eutl5$rOJ2e90uBeV6+%l}))DF4+iDpZ(Y?ICG|f9u=#fBN3p4O5S=GJlr|Gmq5- zrX8*p>6-FD6U9cQ>w`Y0rvDm{%O^$>nZU{tx}y5)Bi0i7?H{r=#dp~iw7{(QKc(|6 zRJM)Ow0>g?1RkwprMzU(s`;yx~FDj{p4U~TKJ|zDld604whUK5D@{sL^BJn zCLQkf;bBo{XEtC>2#?M|q0lK-BLd?&PT&=_@bMuTJ%h2dvnyRD{2ABZV<7{z zVTss!Q*Y7Xm#zS}pPssM4`5$~A%+CK?XCJKU-n$P7=Hr<9N=n4$He@bLb9{70|<*h zfBt~+-QO89lHkKdE8VXFrEa^myj^INc1a`sw0b_n;zw-pLQj>y%~|+E6kQ3Hj&^ z*xMYri30KQ_^^ZhC1e0%O53wR;?wtSd)o^}EXau(mS=4NEG0{z6;>#r+W5<>3S*!w&I0eNDek_~v zI_KgbG0#%ZCMLR4s%P@iW7ue&M!OF@4GK!9mn6C2dCgJXRUlc7H~M^`$7zwr((fc4 z^|4KixAzMQ#}(Nx*%xPJ=W5+$nUyss=PdO-**`p;ICJs{bjSKc=q)|M8O+$f5yhnU ze~mQ`#qZg+*!g_4Qn~TLC3wwcm(hP9SkN<)R9J{^!Sjj+4GWG*E(gu+Z()&9F4R2) z$xAwi0+K5M5oJ2Pc1X$(P zW)?sN-pX$pukj*2daQctxuDKz$UZbX*j@Ca=;~raXJSVf?ZptKX6uYoMXJb8;q0ag z4qoGxRi)6RvRmf;6d1+IQSj#@MRrm37%`mqK+@X%eyK{d+`pVL*`lUceYrUOuk|sA zc`1*LxOOe568lo2@*~UNhU=QYdZEg>)_h|Zm$9!Ik@yOi8;C4gu(#6kh1~EbOx{12^NLIBUY zu&@9u=xD6ER20YX0~^3jCEiP0COdqKYoqAo#^=f>w`volm|}@XKuF5?Lj9v*frx9j zxTS!yB~+Q*C&CrZXRfzu_LF-!E58rF*jG3>3`8LqX#NL_h$>1P07K{cdd?ot>SXTtL%xy`9$?M)~jamf~-Se<1?G z5Y89#LoHttBD9QNCuQkEbaE=QLn($ zdd2u_>IfYM6OFRz#^6Kk>sDcc^hPvlOY+r^-_idPX(&$pyXKCN;N{W2DM0=Azr6q$ zSYK{oMSr2KQ>sY`d3sw$T7)E#BIX33n5x z#QtBYhe8>Tns84fyi@k-U6jj$g(SV&L%hXy$VHd#qDxW@gg);r4jck9eRwQUNznkC z>Xxu>qpj@QP8Rz7v*r_msFM4VPS#MorsBZcx8aMyif`ggKY8|uKw!6ARKcOZ#%7@t z>9*_)r#`5LQmfL)*p3$0j2vz8QBWAY#x8lw?HI=nPfJI*aK3o^y0XQa_=D{(%~tf= z$yH<9uck$2@G@0;L8THNNR*$|w)2k$1a1QwBZ>h$6%Y3Fkspq}l30w-sP!p-xz@|| z?8Eh&cBH!FRYo;ohRj|z*)caUn&46^>7AJ|sSS3^Rv0GqH{vWWB1-x~B#<+Uir`Am z;kf(7u`JVsRa^oS-73$6S~3Zx=tSoo?PJYfcTrCM9W1l)(n5srG&g8{j6rATG0fYO@Z4O6Axg;!;c-aXE#o zmZHD`Og=@3(?OJ3gzs=r%Qg|g460=)c7}@_?LK#3-`6kXTgKm$!teMfL#F%UtEvI< z8V7u;eh{6l5T`+m6EQs#$+7pDZ8hVF82pi8d_kM?6&G zxSNZuYgmv0d}0RlEP##$7Ov%G@|b+SD8M}B;pNq8b)`?0-TW5DUd5rzFlhG-aZFCl z$S^mq9gv;LSta5M{8&;N)`?6ehsh=@%#a`d8(BFyCf5qNltM&Je_2WdzW^WXFm275~vFMP4#%;@b5Wwy(O8rD3ef?|Y z4p8YrCcw5`unV9*^1otvnr=tE_dw2zM|~t@~*(R8!|kZZQwT%#md!02CgKc zk1Qb^@2QVTQM_f|!;~Dfr#Ktn$HMj?c3|rSIbjXF^L=CCKZS<`d*S-$ZRFkiD!!KL zVunYbbXHT}AG?KRYwQ|fP|K`-Pq~zmdx~Anf1-ufzjU9}oXT{+*6l9k{G&8Wbd+!Q z%UuoTVa=;o@;0TS#`+*5#6c$Zw~6bjMM;$WuTONZNCS+}urI%tLv(BsrK-yoL*y0y zU&0wSyc2?dI=p0r7g}KWk28Abo@2L@N|Q=pr4S2En8{DR6VWl2*ZD)8F&>hAOGfMZ()$*ip+`B}Q(HruB`rLfh-Is<=`qu&S&?hWH@SsB(GX?`>*z9kZ z-%%^qTCB@b8&qCb*&;H5B`8qN$)0-8vi>W{)vp0rQU6AtA!1xeq^k<7Yk+rdC6j)E z|8cl)H=uG3>)}dd&|K1OIQFW@@K%e!AHw|zllU?)nk1OhR{2R6`u$X0kr%^6Q23=k z@oc52>mpKS6T=4VFnB-UZx>-p0%YWY^p{W$aMw(W@HL6z7(KA=}g)4#lpF9=nEUu-fQ+OSI~X$1Cd72?K!60I!sVqqSlH2836P_pi3;*2fQtqL?nT-a z|B_xpA(m;Om3tRg&HhA0<-9m#}YRSK-#&ixfh|@EGO3mpZ zs1+_KdJbZd;r%OU3@(rbQM-gfBm!L&SrnOS{0ikBvUGJEtQg^ihLLMU&>BeI^F#EC zq+S_O+N@q1Yl7YwK`jn~jKOyN79s*q5w{Jc6gRY%EY01LRc1yCVIkeQQ!Qqko_vR3 zWh&Q~?N&p}__v6m08Oi27d6)^$4ZjY?DOq)d*VeKALGsZtpRBhH<2JKd1M-+G)h#T z|CXj=b|Tc+2^&3sg)jndJJIJ&x$u+k&ttVzkFhQ-VhXbUA| zC=*oK;(3%wr&6MVA(2(tE@wn+Rlk(Du?eLP`+o1j-8%8&H+noy@i=9*4F1q#4{~cPeu^my6 zQP}>}Go`kN7%^!I(R|75yhmnBud^gXbHzEeD|io&Bg76-&#E7LdyX5LL}Su>#fDO4 zQ}E)Dz`OH_@cZfH(b>Wl4ZWLQUz_0Z>&5dJn;9P`GCl5q1)r-bYgPBdEiq3ciqD!~ zQ}rt!w;i1J6jN~wW`BS8f52VRy~_S=%3^L3D{L6+yjePW+(5?nE7(H4`mizfB-?*a zo0HXwM~9XE7kW!1n>UNmaJs-q_S2G1f3d-SSHWd7bPUoOm^HM87K+)720t2% z&kJe7s?gtkhWEn8P1P3hyMZG*kn$&HQw_Sk#}jkw?XDpgHg`eia|uJtU|!hV z{8fXuRQ>%sE^d%^Ez=WEf;O_-#Td&DqH$ja{q6A0L1CFU?7nQ*u?(@QL7Zn09?CY7 zA$5qw!#p(=ReNZ7$nmoU9N>Jxtq55GgzTMz179^G&`$(#dd71Z;+^L1crP8+VL58keNx19MoDX#I%nIY!+Zg!sezqD0^boKz5Uw zmSzl`^?+~-r0lFMio$P<$UPe%VqU$2cnj9r5%)PA^I^(O0P25zeLWyigZ|(%X{bV- z9v2nF4>2q975uLMk$0k_|4tzd4-dOx!k4OA3&((%3_dP6Fls2D^ICJi&ZWjIB?bMM zpJl^!*)jfk|54b#P!a|?kYAc30z|oNusEew$Z`_fG=aG|+eif*=NZC9 zzw$~4MOujhkCvOoP3hw0@jBO))9I8l87O$ts~5w z8~K`mIJM+t^sm#Nt=rWeX2lby(bXJF8V+21PcjH(6sHiG>R5mInkIuxuxg_R{OC2p z9B*OXwE6YpKV6ANC+cQE##|Je6F>NzV=&p2DSGjVuzvU4SZP|oe(|8p{_-K$V>~Yp^&Q|EUd6F@6?;!#J?<&g2%R|Fa(Ycuq zZ2i~upBa@4w7qxQEzyV_76xu*l4}b##3h_wI;5SBVIjTtN#uu@;BxayOie|N!E99` zpO}^3JAKcz(4J(|q%UG1k#+7#-zD|K{QA+~{4SPkIk4kb2nx(t`c&OxBX9FXhevf8 z46AYVbrQwbTrStRH?jObMl52TZ-=M~wI#pW=i1Hiu(WQPvvfGW*;9X?KXJ`V21gyU zcol2g`Gl1vpe=qNyJxDt^dmBTv6o*%*5!Q1f~$IuoK_*`txg$*3n>TdLjUgY2gkFy z%6#6c1V5CpZ8Hkr>M-uhp8S1p>lZI#GR&>Z#D+0YYS<>2*OSlxc+9AM>$GN1vC|bV z$EwQLH=CC#QaIciyQH1&yrQxQ?M+RkS8)+xV=qXbvpU$zt!qPhT4os-JTiayr_+3w zyjT)|nm;P=9datr0SdtRFxgIAKP8TeS2(vCW7di{n=X(q17Kiwcv=ka$zu=4t zbT1z85^(v5e0DL+dC#2dGK^>cX{v3yx;8aN_m%f8GVjyX((&|JcSc5xTu9px3HOib za8b2GqrRs*me4f0Qw*IOuR%P{&I@^a-_g(K#1>WAqg_`ene&6WE5Ww@+gkD&geI-n z5N)}2FZEjELC(t773F{We}6`$Img2ytYs{o(A{PGgz~q&Ak1c1i9|1Lq;{=niI!>n z9zV!=J~o-`w_?-sLB{h_0Q}X`=-FMYi`S^ofQJqt6ozfdIYqIZ7YcS{{ zQi~ffWn*Sk#_o^DOKZI(=2>?xaKq@9%$1Oak7(}OBXP*6c{6u&Su8m0(S+$ZrYAK@ zEZCpzx9-kjusHZ71`j+TMK29=Y*z0p{{$+0a%MdgpPRm3`EZ)M=@i3%7JJ{F@UFG$ z;)Ea5<}B5JXVjzfdg6A$Z+JWq7kZt18=jT5dxtr>*r%#5y75UJb^TlghVZg^6!~TR#jX`m6cHlcDJ_r z7%5iFun=Gyv_C&D(31db{ulykAHm;GPC)^)Mb}=q5(-U`C%BzK9bhZn)+<@?@ zUJkww05eQ*Fbz^BNrSyR0H<223ea(__IjGU6f2XI$Z)_N`P`3*jhz~eO-{xGN(E>a z$tR*bOR(7b@34>mKc3z)EUvC=!o-5RySux)I{^}00t9z=3+@u!IyeM(cMaA^aCZpq z4s)LOo0UEz3UPbQlf#Kfe+(eIDz5iXke^^*U z_C*SlW|Ry;Izqm)RhS@^Vdr{xf?gs1k>smv5Opl$PYrLONb|H4ccgu9a#<{LY3d1< zV!eoV3f1`mmc_EP@xQ={JS3}S05vjzp%RV(QLy5Uq%I%&v9U^BD_tlWnuyT9>9dg=8@i zjD(Aykrp*$aPfF(@*IDc{u}RnmE=|)=4XD#x>Nf6kVm0@qfs1_0(~$~rrp}QBW{o@ z=^xz#%GGQ7D!CRsj4#euoHe{f56e*K9JN~bKA{QAAJYmh8!)_3lp4BeCt^UI06eLf z0*YSVp=Hi~`Gu*#8!JW%235j3c18-|#nN+VR)4Mt=P#G?A}(gqEcCfWmqW|V$NlY| zJ+1P+Ic67{jz8e-%&z;%r@EsWgdKy#0yppcyEJb<^M%ZO@uv`f>jQqN8rU=&^Lu1T z@r1`h~T{*;z8g>3K`J8_-g7O?_ zH{X~I!5810PV`%!(rLrc z5f84nCE3e!HQq4-vgcp;zPxkbm?csec+Y_;mlVoTDV>>l(SVm7pM0d1!ggkY^D0Yvy|eq=@@^YFW)Jt+?H_33b%AKj%#s5bul!&Oi%~vHd3Q_ewxUR^ zsgK+)^3lXvuUIoRvEk|nvhB7~Pp8{y{Gs19FacrWjw_Sf{%`{IhqSy79l0kp=cPQB zJn)-gUYeE}g3TZ*-;%3S&f@xnC9(cG&D}^jAOOtXyS0U{DT7!3^qAMne7bwoCqet0 z*0bB3q#~Wyb$hZs6y_ee5G^1{uIG(iP1HMMu22R_1v8W)m<~aTMBOUievuUQY-7BR z$id`k;F@)IX7ScobkzL4>Wb5x1b%QzFB3i+Ivp6DBMPli6GL5i* z?8>v~kr9~4@puRdhC;$TZFY+yo(vzLx@lGkwr1iqT5R;QEZ*1p=%N;1|NBfz@cnVx zC_eV#K=+Rt9`5!{I1?};rr-8nK_9EP~#`9m+#@ zVL}U(ot@aK^G)EJ&i}fRG}tnXm)Jq&FG#U@w5L40Wt5gG$77>4d^`;(fAQ}G1Q}@S zCTFirh*C6r^YLb)R*MtbmC7L;&myi65s$=mu{Kbyn($EfdIY{OM_61qwRF*Sk>}~o z(MAYy+mni2n+aCB@hk~RdYBh>g6McYSzy>p_Y<}pY$^Q$B^#22_z^;_OOc?@k(UP@ z-M!uJzt!e?>l?!MsaYtbD+^UlpH~RE`}9jFGN^35wr~(xvAqY*qCM&MQaNP2aCgXC z2b#E8&rKgZyGQYQZqPfJ#&z6RQP$qx0XIgRtrZWipU~-^2zq9e!q&`VBQaipbnla_UK|)x3t*9nKVsRrMh|m!Vtpzak6r zM*GuDN*abgeHE9!X-=Y}M>quwU57mG@+q*usx{zEX~t5yc8O!#=Wl)#Q&* z{v9C>>{j|OKWPS}*jI??j?{1O6)H!;T!@96ato>yIUML0p%a|!NWTuz!%(I_NYM#n z1xK$j{*mHvV^yMBZ6F_?KbZrEd#6RB25@I2AR;nzazY<*h8VSIaC375y5Q7!vqer! zmYCv@Y4T3gnbXR@{QhE#I-e!iW~a$4u;dU)h6u$%%}uS54zE;DN-aA)GS-TJOxW)v zp*PAN93Y;3e+I!XTbuU|<?O zNnYW5pO{ZRHUw0eFJTaU&u42hNdyV}R0^|VNJ<&#VhD0|QkeeM*vR%v&A&#)we0C| zfHVnDX>Ab(AwfMsj8oQ$917E&VA;{SPk8Q@IXlSBwYxi^tV|j6wQNbAP1;70oGo^R zoEt@Dj>C=JU14k%Kss?BG9!=E{e_auUx;A@k;jVMAwtTB(oQ3jgkn;2A$@W=gZyI1 zSd2KVoFrb7Bst3;VeTH$tO{Ekq6!9JI7UVwGvI_6O_|klRdWuB0-l? zk>A%fVWYouL3WXb8EvY&%%81yQf^jI72zn)`wR9bCxbJFW&!QjtTy9Y0F`@juod$s z=bl0@^6$aF@*{7`KO=;y3*j!%4YsY;>Z}G^(I1jExRUm@f{+Gusr}!mW4k5ymV5KC%&`#=PP9EJG-a(+-d7$ zrN-fVN^tLgaq+k!CfpZQ5Vf%)by$oyH#--KqjhOPmqAEkwsTzL^zRZ~$_(|m+N0V) zo!pY=eHQW^UCNhiH!1LmR)|*I!}0rZI8LQ*7W8JRUe+&Rk*U*12;g+$iYLjjsE5sr z*Kg~jb5$J&=j#vWE{&f%zKMvv-h1Q;X-S>vO1{h%-!`=*Ix7C;l>Bjxp?WGXb$m!_ zaXM|%KfWOu==h+k2tCWRt4wUVCHFD$(0$1+gqn&Vls?@RKw?kApiv(#A~5{-C;-*Z zkt`%4f$IzJ=(;cT`0Csq?oVE1lJEARyJ9b}Vo$RVPsdUr*j!z1Zfu2tGDs?u-t`2~ zug=4$qgw|5V&$KFgR8GC8Q{bLn1yTAV#!FrsU4)RT}J|DLpVJ=iV zP;AyEH4$k|+ZcYcU73~g(ac%Ck|KROthw4!yXTb0TUTPW@YaTVsti9+X6NA?TXGfK zOM-@5_Us=T>&l%x={RQ2C`(n3PD>@yb=8bp?so3f6k2+EC76mmKm032_t<`B;`T7T z){-?(Bxr5VQ0E-sgnAYtK36DPY2JQalOb=wp~b!KA;C2)#jQQn-{tuEn`*g^zPb8^@8WVsjf_NRXJTMVCti0~lU z9~`9~DokkD_5G?U>K;@G7Xt&-;W4+RbWaJF@Tu+(ilU;C>>PC$<}7i_$QN;w+cOrz z3A|snJf43%6N8bhmyR9UwT${^H&CzjJ~?{i{K>~c_Hz|NMUBTZWwk#&6hQyB&!uCH zq`Am(Lx60kdG`;Oa`SfztcG=?@&R!)9zqRKzje5W*dMdaS4cJFHY?kj-GBPcS5tfj z%8LcbYl;Kaea^r7S+G)6IX~-4`nu+wTS#mz@;7_pJ2uH4UdJ5iyJGkgHM5R-cA~RlTh)dh9^nPzm}Z_7+3o)#lnfeB`It0!5?Vi6Y##KZpymG zO$L-bAjy2T(sb1R{sYb0hyj3g>NOA$Wq&g@&2kwUy6SbOp3jQdMWm+9sy4~MR65dO z2u__*D4xwoi*lgHxgiS@aA`{Zhs9cuoIY>QAZ^a z{DSABQmk=%e_szk9O{>)ooTlF$<5`f1VhZyt*py=f`En1PUMd&(7sn=O|lJWY{!yBMW_6PZY+;w1M zUs787U!F#z9H6+H;4}8+9`SY0jEkQZ_j0S?Y7!=DFrL(f2bWmi2vd0FMeOps?+5GT z1BO6JU>(5k{tvrUHJ1|pFBEU8Vx7%ABOo;x^T>ey+t`OLk|jUo!9qx;{Ces9#_?c? za3U2{zr|L}d4P_n-eSN~{6%smxgo4D>eJiqg^jOl!L@vOzvSOTeq-iaHmYqqgrwOY z;)ozS<07?*Gwcr246x5X%KTe{Uc8zZh_@O39;# zBeYll!&VH2g2{p~#42L`8IOv<;$4{|w^BcYLIeqO6bc2UqTRp7`9({B2gT&*7lBoK zJcbSaBvi|3VqTWaxGH8lZGO$}vgrOX=D{9vm<9pZnfwxC_Os+ZeB>}qGCuIhzcbx& z1#!hkz0t(NB8O2Ty^)9p`n4v8*qVp% z;D-FgJWmP`!=8x)?K*`LuTKR-zjbq>1bx0B{4MGp&W)bS9N#Vp8ttoDv0A& zZ3Q4-zIEuFZx48)1D>j9CawQ=o^YRN7F~{4MJFQO-_|P~7XA=Ho=(fs~_yT@zSRW5QLT*l!p_H#Y$3)a6Z z)V*b5DW5-)8h^y7U0!mbH9W{+3U!5>2Rh*#Pfhsc(+A>qXneQOs+tsRexvn#9rk_P zIx|%N!8lZ?=CTE6yc6@WJ5J31$kM#I7`o7yrX}FEziF@;TErb}wkgF4)Z;aAWW+LYeBnBn$X938z4_SI zI$@jS#rGXt+S6yImY2WM9GvR-#@G2GwYub|KxWwHwWoHqR{B9C_W1YkY3kP?>%yhI zr;+ZVECj8+_B$cpEt~n!HTY`r<$+6dRr108;j%>RRc)<-K~1z+xGL|&^9_e8V;B^W zLgs(%=)EHGFfLg`FP5LH4*;G}9Z8cxAA^Cr zM1c=9sO#FfAmyT7gP{_%SJ7w!~p7f}md zL(oevVgqD>b~)GYhXk+7H?MQMgtMm>ck37XX%8FYtIdY0zNFD=epzze_wL<~KNC~6 zjit5Kumo{^OlVF5cN28AYet}Dy?4|C6D|U*b4vGpSl`xrw%-<;>be{9;DdCIAN*?R zVvah)oum}LT+&`%ujs9dPT_E!&1H3!xFga6MiQ6g|NLYmwG#Z7rp;pWu}fl z{@m18vz^lU_5HhM1)w!mQqmk%Y>c7wycvnUCNcfHnIu^UU&(b3wY=|O3H#jY0`HT! z(g)`!045vHeT`z(1RQCZVV16*>Xxi))Udfmyo={@IQn4{F!F6mVKy!9*Kx z5*tt1J;VD-D#$25jv-aWoXLz|=X$oTs8G!xI*Cyrps2EkxLK431}B+!}PGHl-VkWyut`(roR*LC#Rk3X~-52tP5r z|J#db-F=Shv{b@n7*|$KDZlqY!u$}%rpHcmeyJLNJgB4eg&nQ~BRkb%*K#9;$*c;= z@z+QE97(ug+gSHI{VhC>clA5va_sVEjjLyL`#&$>dQu1dHT9)HVxAuH;UZ;PAU@(r zUu5Om9|(aCIMu*UvfXe^6!72&i%UtWzD#k{3XdrB9#j!WND(-At9E4uaTuO?4^PZn zX37ra#=rgj{|zS4Ktq*q0ub;5EQY+`0r9{U5h$Q!wfSQBwW)2QXc zTEpc~vR%kCo9q$W6_ln0YibZ&y{Z!qy70s)Vln?!!fI}e-pQio zK_+D^gg|oSJ15{`_|HatdRo-fl3$y zd6!Qy9qCRZ5B6_&0*v`Sh+Y{hRPtPtJ@l$UV+;~aD`{9YH!B{=*y)1)L^mLZq4 zSn%s0YdS`ANVPt`1>%5mel~7D`z1GuvGm!`Ai7>Wn0Qs^G07EDPGi||H4Ka28Q~IE zMOSx;7D3c|{+QY9(G1rqV`vdmko1uO&{n|fkb>J?~LZROfay+4F+dVYrwof&5E zS~%xlH(-?A{|3+akqfHMQXCy2G=e5JNthtaL#(c50jOX=Z4|) zODi)g{f+~ZpE%<+G0y}kU1?Y7Q+hv4xP$kiv- zp1isxfK*OC@rE{GOq&fC!$10cL4~T~K}IhuPy*o3*q|qS5dv@IDymT2w^OUg@Dn4O z0im|{4PURr7l-fe6$%UJR+0lYMzV)IWV>%49Mpg=y!#H9!1)-bR{xeJM!8xSjw29+ z{`&Lu<9I;$e*P>rY`yYzl&EyEY zZ%O+|O1Yn?6!X9=w8i96OR&(pw5;HiRgY0*(PmeQwWeg&Xv7 zs(C8XoX|%{wlf-`T~D0bFtPAAo}Kp+ee>~ERW(6}*?YIk;3F9Aa)D@ZdS0E}jMqzV z%y=NpQ)~3r7+j~vMzrb8aiY!a7tc9$><_-2whZWvs!J?c4G1B+Xp8V%ZwuZ?!%oZ3 zkHRiLz^QEA6?3XtmOjxA9DN`q-O2>NVr2_%D(Sz-xJ~_>WKt&5OLId0sF?I=T#+xXpf?XNEt-iP4# zw91kS|8q*}M%fEG!C(12jZ#)wS!Qg9#MT{I73bb$j_=c}Z3hKVlLRCxcvfw!^>|&u z9Up};nJ<>4kME+Y7msoQ*GD*el~CEwW>=n|I?!8+$Yu|SWKQ6gg>1h%p)+-LqczF= z6NwUBuky^$&QomA-mxv%-Je03Dhp|n>8R@;tLWt+??&JUN*pmh4GAa`Z*ukCfJrD`ntf*`(p}Y+yf~0+kYnqN?>@o+ z;8v|8iTIO=iN)2RtP1Ajxcg{$IFLvqj>z3e*Ig8P={`Javt;M$MP?@#hkz0P0Y{&w z=Wt|H({&wQRJ%x2S8rzSOA|4Wuj9CsR@Ky5f7(^F-lT;sqd+~Bz4Zxmu&)mVnH_&( z`Qhzckn2rOt1DoAP6Qgh7Xp0~uJicecYS?5=gH{U>l-IiC69lWB8DDsF!o^cFkK0Q zU*p$-Inn;QKl{SM;9(a#=J^C<-^uac&C@Hiy|M@j^punYKjnj`cw(=5oofxF_8Hu~ zaXJhJSm`SbP+drqoUWQ=ig5CgoHrlI<%+1XaRj*F41BhSQy4HN2GNtqu zbQR*KGHTi=vmO2d9l~B`;Ud37V>QESS_MqPIN29cfaR2=(QJv2PfFTNL6vGb+;8Xh z@qgGnfR+LT|NGhPh@d#3#d&uEfrj`mj6ecq*$$wG|Hs|0un<%=Gf+a8Y@CEzM*h@h z5U)`#?dZq`4GoQqf-*BV2b6#VJ!Td21@kt5Aw(e=*uXT~1NQ4Q^ZX`24~rQOZ@Fh7XhKgG&^V?K4-DhV+y%$p8w9hZOzQmj03^A#2=9pl9k zG|7cqli7O2-j`#bn*mp!t zd>D&=kV_0AXwd1<+m&NTW}2zquwwa)eko(&^`EuCoi1@)8+E(&@sCR3&Vk^vmGB+9 zuS|DWq{Y2>G#e9Gx5TY{1225PGQtUjqs?9B3mML7A+#Thso0%1=(7FECQA;oughCe zaa)jsY~)2ypyGWwMV~G0TTD>M&g30zZ~NuCdfG87tJdQ_zNaPE^yDe`Ec47{*vMMG zl7pAsyo{}KSm$UoPb)%-LNsvFXczhiJ$%uc zkbUmY#&J2+UFJ}fIB#T6_( z$%Q9z5fJ1ndJlQtdc!UR$A>CUeoE}H>EN;PPR<8lrr*lDi4dAn8J7*r9v3!v?BO1r_sEi)1vNo_Z6Tr8+9 z|09s&AJtacGfrj6$UQ9DclHpF_z<{7{UXa3b4;x>5WDF@7DhrRfSy7i#4gU!q9EV$ z%6Y@~o*@^|sXQb!^a4PqJOeLr@8dlY5ou;wKjRDa2c!Ar43!H^<-S9ef?g59Jx*l4 zsOM&0l~fxm=D!a~_d7Pbd+p5lu*ljYd_`T^ls>^ohsjR}=1K;fvc%I%fR@Pt;)b zwhdr=u1&3jIcB*ClOj{5g~atlC(xNFGvJ;5r=P-dbLf$({${+HU?j z(E2Rac$PZS4fqQ!bL;k^6uRvh#cw=`Zn*j-_o1}#;Z+Dv#p{dp=aP9Pr7G1pU0vaM z9dv+Q|83FBdB*QxO1UK6sELi=s7=f|JzV9f`R8ap+#sxK0^1^LBdt z#1K#R%9+mAJ@$I-J*7gkCPGc|pm#2LkolSaT667cJK;L`MXlbmao3LFRx|JPu;kC3 z%)uv*BqX(*gMB*I&rKssH&@1cA`hFBN&bZ?LacDx*rRc$mf)_y82z4Gk`gw9z0>D^ zXBbdE@h)5M!fzvH{A+4zK*(UP7SUJWzTEhmPQnhbK{Ey*~7!K`JeRUw<1C<^@vRGBkUlOrk@=i5>H&Qh_$P)yxSD2H@pH$}X3R5I$qRHVC z<3k!*Cc2u$13{B3%cVLjZ;6)i{)q!`tZnuditVJcOg}eoOh{jkT$wAsJk4g_03R7| zl}D*@EVRVT*ppgUA)(#$7n@JP_;2KKf`N#NhFas z5*84U*U`*C5Pbg2Gf^DK%zjuF?@)v_*YRA)ZkW}*v!2tlve;;nCbRg^YmkG}Z%0jZ z6xNas#LpKHCxqvnz;k}hsd0fjTLtKw1JL)g;RLXNIt^GELw`lv|wNMcay5;;Tlwij=n?e+l>KEdhuJTN9FzbGR>n$IYqAq}72 z8_Z9qFulbXN`$p938lZqB(tYGSV!HAWolZ$4;0uHc~ua3Z3M*Tf!(HTt7aHq;=M~b zE7TP=4ejeXzvQAp z#Rj-2Vy(p$!%`q;5tB4bmJwHB!9VwIaGyGrx)V!vMw7<9siQEwElT}Lt)^q+>saai zI^>f>EgJs(Urioxk{6`_L`NX!4%9{hU$dbaR6l>%vO%~)Q3VwX_ zT7_9F1~xo~?Vak}ze2q~7LNlnA2-V~COUlY-)kmYzU(<#Ah1_V5Yi~?a8ddYidla- zcGNNc)#L#FJl!@GS-A-M$c09)T*R&p0P*#E+wZW$%{!n$$Tw0VK>wKM(Q|dJG{`LA z~&>9{1mq^r_D*Qi0<=c+4<`BR)l zAu8S;EYi(A_djlZEtsv@u(U5MxfXTT_n$Ld8-TyT1uOHfgebEY{8g^{sz-IyT+>aU z98LHhr903&+7)bQ`hIVA1nnKF)i@>E_jg}OIqcJE@H%pt_wPe!BjFwaT6RxFkVVlb zj^9qP0Beq6v5(7-I87A!wQdC2XIgv-j24^5G&U~%a$vwk7>J~gRw!UpIGVf0(ZWvr zd-8=4J)J#bkv8As>T3zeFN>*rXqts=?~;Yjb?$3;Ffx+kH##clM3iqmnO@H{X-57@ z4Nb)Vj{J#x_96Jrmb*p%=C;O($f>#7r0?3*{eND73*+n4UGU9VCIKtR#@WWs|68fo zF(#8G>EWO9#Dl>qVt(%*LJmaVgi!LCk>@n=ODq=;cvkZje((@#s=2<82M#WN>~#nS zuqL>^I$muXzpJ&r9HP8C{k?pnXCV}5h>T@(kwqvNbMLipn? zWD=7P^do-i`^B=!FIIoEA5}o)hzbQ6)a4u^hBDZgp=HxnkBxfK6}CH$yr{FJreI%H z&Zn`KHkWgR?w5~u4!>J1R5E-suV0~nHEjIw3h|Rm7}V5z!d3pqE&a^2(K8+G(m7%W zhZpt;n#-1^*off>-O`K+z8BrJ?<4N8$C-uKcr^#YlgMeunT3Ps4ZN}cp7(*8*ud*Z zyoO~8B#zXXH*2R)lz0bL13R)CxyVsl*lub3)xQR{>(Q1&uj2@|I==__ ztGriO_1|BK^BphXTP{A~{I#Ax(8;Knscg9%2%?XbHw96be@>us^4}?&(xahOZSF$k zGcntTUB2s6b6q7F5ty~cUqU63^oze?Guod0Bop8zMkjUy;Y%CAix0b^^XW)rYGeH^ z7ELe5-X66U2dWY>JRa5QPMVZ=_T21t;eo5&`)fLjdVa!YS-s9;D1x{=F`tg^6~c`` z|G9G7~Csyw0XUwz0)>B8^Ig?dA1lBQ=yI7Sut z^LoWNu#bv`$r-;iDB#DaSMo*j)dJ{tf4;On9kGMO8U|OGz*qm43k`&41~vi~*Mi-Z z|5n!Wx(3gI7vL)?JJ9Z&uNXjMYf!coOqH|WJxXM(`PK4|pm@{f2dbqyoYX+u$BusS zV$1n$mU4LobD26^ToLB00Q*Xtd3KdgKF}Dgi^kE6N6u@DWG^00Y}lr72uQh;zP*#( zoMAzyAU#}n`%@vO8aBj5N4^rK7Bd)m?_n6-jnu`jkLQNeimFG#lI$-2Yy1(E2ve2) z#YQwB@t26L=5nSV%7RVfYh)r!vHx#i>8oMo*-`)gM{@4JX<>H1Y3sHy;#^KAj|1hF zSl<|r@7vwTQF;K;4}pyt;LRC`i`#?Zfkyo5PJhZX&VpBWC?ew6riVt|yTQyA(eH8^+0;}+I-oPp6r9Os4;MS`Iq0W;Vf}`pam#aiD=yIF_w?jFl_z35 zfeGX*fVcw}(BLFB*lE~qfK-gBl~y|1^85A{d2Wu&FuM2BA)eI(-})B*wy z(~S)yAeIro`zysJ8;^P12CTaE60o{8ZB9~*-F?x}-mVDXTeT`?thlWJa$&ZL-zFXM z0Lnpd?1a;Fn{p&7qz;@UzH8)3`&s?CMF8Lx0YE?ZbRh80@qH)axotP9sTV%U!kMkH zR3-q8(^R9s=)TkjwpOXprvM~0G zD}CV>zR0e*Z>t4!{z#+J$WZ9kf`S;|V99%lgUNH7#82!X?bVwVKRYWg+0E)06$tVS z?W8jOwK$CYjbe~V1sW%!P+^tRTy1P*KNz8wppSDiU}kU@{%=V7A~}tCB?F}{jm>S{ z{QJw!S7D}to}R_O+KAp?+D0;9*Zy*UX8gGqZDlQ0SzFW}_tV4wE24rVvSR+v#`sWK z#V9i(v+ovp6BdMtvu@{E2@aNx5Y)r`iEpqPknIeI6*8|-nXD&&-5n2X=ESqE^lM3n z6VF^vro-)s`kuR+rL0`Jb73Jv92V(#pj=(7e_HFxL+W~!t9U_vKMYJ0(k}i*wFwcr zk%2`>b?;b%H!PiKNGsT7k-}~fe*dJDMMyENN3}wRWg)_ITxSqc^=~$-;$$gXs#syw zG|uGaevx(^0PX)FGRqo`mN1JReC?E7Thy7~Z^~zfiu`YA%dQQg49k(5>R1@P7LfHw zsCzgdxturGVpUc*G@>v;R$@p48=2Ufq>{tWq`&8Y-_QcXP7FHYsEyhs-D#wkMJpPR zVB-91i z1_webR_5BvG2OH#%D_htk8{z{Jif0ixN}*)S=v3%_<@&Vf!W#?wxecbmYZ@PtQSI@ zqep3Dt1TJ(1g}groa-r}9ct{t%^^`Iv7TS?kIY8?nB5*u)zz|(ue@^G5?dy!7$wDO zu-!j8$TC+*p|{0{VeDuA6Q79lFR-3QpHDB(35+fcyo5ILxu%%{bxcgWLe0m;=%~kh)(0pdkmJr$-Yv3!oIBAQp^yZ#}$C+4fb+&`)#U!18+qeSi zx;vT7V!w~!U3<$93RpT+*N4)Cxz(2ieP$!J4RO+r#^S*q6%$jo0bfT`X_0}{wshSJ zwmMj3Y7T>dsQEA&twHwtC6ZH;$(e?E`^Jzy=njJAhM$s;h^aT0x>cZ}Kky>i(54$B zZ%^87m$zoZ4~2C6FSi2K6O(nFjnej2iVdWg(1_XZb(5>c37GMoiK}FtnYq4jE!_@$ zg_JrSqy46F#S7$Z!P)!)u6Y|9XNp=7s6(AfQ%zeunXv7h920NK#TR*f|B%OPM8TPD zp*|3`id6x@BV?d`XtY?a?fu!8P=e0h98UznE9;!V%*@PB7XD!_{?(ms;m6Agzu`P) z4uet(NQ}Zz{vGC-_}BzLwNtY4v;k!Mi47h}f0tO&RVzv5#|-K+^`1*iZQKfG*yyDi zj*qnl=B^fbf73`8+>MR#sE!LNJ?|vCz?+!P+rP$_XC!aPr>g^E&%N{lkd)?5b zui76uHE6GhEt{GL7Z->EZ+p9)&zEQduFNBQ8-#2kN$gKd=ELEWnxiiEs68K~4$plR zejg2iqhh{~Xv>ih2E-#Y#{Y^vzr%GTknVRqd|;!kdD1=PEXn9rw~Tz>yZ1{@c;Dgg z81a3Y+=yZd+&u_#G{4}5aG`GZxsS0oOL!_Dd+POfObghwwzdYf>rCdb^zJr$8<%_<%b z%v#4TFwy4+DTMTa6l?-}-#uapiVZ(JC3o&eaD86eOm`#^6zi4brtY&GdW;#V#E za-s~L;%cZJROBkcprM|Hw9cF|Kx!OR_GY8eSZbY>YX6i{QAb=`6rz>PuW%Uxk*edR zx62PzMGS+RMuQO_5+>Qs+V7rN9kLH!#4;HGLyxVd^-AI%4Yku}oMF!ui4CV{vbiV{Vo>KcUGZg0;`j;{fDI?n3##&|~82R#vn8Jl4 zw$5~N}+`-?}{(Ax$|MS}_iWzz$;b-`KXF~_4I(Eq7VSQo1@MXL-9)CGtfl}fP;aZz%Q+ercXPWTYrSH zV(XYdU`@tUAT;?R3Q&iAx;F9*#o>cb+U*;}E%=>?BLG(gkolC7lDfUU1yJd$2LSyN z2zkmD&g4}As5w#4h%+&Odtpc)2ePXkZw%Q@z0GI$bwKh1KYY-6P4#8dZKa@B1$0VK z(nK3g34}v{nlzwttO4jC16oyWZEXQ$U-J`HbO8!1WK~>Skt;3~MQp1TQ&OPTJCGeU ztpkd9fGB8F9A)Mc6%%-ZJ2_LVp1=N4=%YKgva%AW0UlUeGk9q~J3BKsHT?k?C>+** z{ojtD+=?9Y*p)aAmyqg!=fesjI5X~-)MJ6$%0_|EVrk9UH|bOBj7btZT^$H%!g#wL z>rj#&^dKEP6(VJR8fn6{&~!Z$ra0+*o=Ez>Xh~*Z41xNgh1Aq4@Adw*Aak5Q4(Is{ zzH~FcG*OOT1;28p^07?15C`5Q^Vz&y+BhKgJ)gJ>nsq;qGj$*KMk}}NTUb_)bVHM} z^e1BhX)Q8UFm+gdmsdz(*)>GkiRm)x?A@gcP2*1~q^NkTqDbe0L<7~GRz#C9_-$wm zNyH)+r<{x*R7Z98J}gAq@^kUUDtYk5%5Sho_?x7`82z5{c(`>dm5_L^(oaGO8Q43f`HVpwL& z(u2X|DjaxzN;~5npSNox*KDgf%v@H&9rF`Zl!7i8a0^k;3?*MUZc44j=*Nehw$MT= z=0R9aGWVS@J^DpeQzHv~BKfAus~zJ0oWOxmXXL21=($4Eyb22EE+^yy#*$^XH9o=q ztolFa@cL%qG|M{>?VYU6;O+kggjHuC*wP&fO-!>JD?XYgK6>Q-VcTt3V4wEJL$=@L zAby8qqcb_Of}W@f3?qmyd}Y#vcXuh$G$|)>Ng{;bYTESWFtL`pd3VdHsrTAmCTx%i z?D8CAd?2^^iUIBsRw>rY*LB>1LmwwD&NF#_Vfj)a>*}8LFAi4}3F$hbZ6EJLKU~c} z8ZOU2%M~m-{-j;QF4Upy{!U|*DKru`x4C;|nq+!`?*G&CIuGm@E_5N%HxpG+b4Da< zlGuOA@9PotGR*qWk{}6LVL*#EY9WoyC<#%Ozf2tqq$a40nZF@58Mm8Fy^o5 z(8x<-8N>bNn7m$=dwLIibPaet{ZwL+RyIGO3>`eReQ<*c#0|EmEdDbB=^Km2g237M4`s8y>T6{$n+x0G*E$2<9G);(G-;vPVYz`B0YCrXgypW zT)vL}iJOm;YISTEi9|v}ETC7E0LQ}@ucPdQ%ZYum%ePJc=+Y0T*bUN%oHKR$1Wu>x z#i!-fcxSI8{K*E9*PrQGe)q>Um-B(H;pq-txj)IC+(rGZQlR&_8$wmqe7OMq?BH<< zug$B^@vu+=+G6-jmWBF~=}Q``%T=bF_Ys?#(`gskq4vkNWvh3q_-4t!2rj3L5VBXE z_w+v<-nZK$-rJ`Ge%m0?K@c_TQ)kE_4g+#5m(Sehe)@YKNBqU+P`n2*$6?QVgx|@{ z2ieCv4d~l3rKW3zv^4s}Ui2K_hKS9}Z)jlefN+lhB_+D_d#+ zYu$~26b<0Q2PnmvAW-QHdzF2r2~a#=$)F_@7{(ji32Pp8Q;DPV?0Qj60^EsSO0h00nXAOKQ*Bke*oz6?E9RuWo3 znEc2I6Ic!Khmd@@W`=#!Pif-61@MwOO)19Qwj7Ktq>O!#DZ z{2TF|f|QU2Tn%}2OzSrIRT&*r_@wyvQVc8+|WzgXsgC!LcHK@#E(Gh;YNdG`51UJy*+DFTz)7MS6O4m`lRwi_iUtY4et!bI@bny&2D|Z=L0?PoP zQ5fhZ@4hH(a)^0rAb{e$5vP4`vCpPfSdo0Ak51G8>Z;PrVGNchVA9NG4JP+pfXn$` z;)OZf52j*Z<8RThOnN5@57oGErhe35=?BaYz_{=xMl`eGJb}#fzw$qzy3{_TKW<{K z1-e){mm-?7pMCJ*4Fxo{4}?#E<}9E!86fA{56BhHSb@1~y=5V9*7!iGH{yq`{4aU_ zfHNe=ggo1voD2Os$h0o<(EH3~VvL)M#BEA8F%$8B+6cgd%FHxzcE;G`iv$0k8==V( z->cz-8hGsj3oJS!7VbKBsCDI{3Z7qFkXP+6D}KruD729F-4&;N8J_<6Dh4D8tho*q z>|if;Yxb`{xKVO9SFPsKLZNc2fBq`Q5MwGvS2VE=E5#(XSBB<(W6|95E0O_rvE=QeRFhN-5YM>iS5R= z(b#s=I89?tqsF%Frm>nfHYc|2q_J(^^Zo9<|IJ!6Yt5c>&g}iZ&-)<0Atu?PSo*LD zWZi<}l1MHU9*z5z)J~hC3`C(1vWEShYP#uxNTWS{%zvXmOdLVZ^_f0+{79adW)7O_ zcO&l(!*MYb=VPPpMupd*=GTsD`VVUr1Lc*&!Zh{ePjWUFVn)x!JY3dJb5sRrML0On$7$HgKEIHN;nq;Mzss1aveHp>Aeu?&vfv1?u5_Q86d5-~=*0%3Be69w2|CJGhR^&1? zL@rs#{91_WAM-vKujowMY}^)^QrJZ>$A0F2F!ntPFhscp@Vg2du`w>G&jp$~PLSlL$#ZzMOs7KWvV=HoPp&bvDqEG1WvFARg#@Sv)-G6w-cP=wO zaW*Gy=0SNz!d=ne4`^wAeSos;c(U|L-nE=_XV5dhpHLc(sg1&h}IJ479Q@ zq$O@f8BXKfXDfXVQ(hibE;DW}*%?;r1Xn^}16gD?NZm^8;WEi9sUQahw)N`-}6})1fIzs$-w*)}+wt4ly?6{_?s&jWF zjSnEN2?>n?T|GTslznrP9D_6leAyQ!X68!v*0xRw^N=}0y32nDR$ObiN zPJ)gj61JRMK`Na3LyGID!pvnThJ`Mrx&#%oa(FjBA|%@SA8~+BAz`?WiA%Z6m>X+G z7nMAgDUua@-A^caGfde;>eO83I=o=xz>UZY{YX#Tqc8HchZxd{^jN=jfYB|lMl0x$ z+>CKG8E@JN*jcNcR^=E6HyIy)sxBk5PP$i9Yb3G<$kD(isIr#C)59V*z{OHs3L#H8 z1DRw4y7(~=LI8jVf_DILerZV;Ao%Hj$Rvb+daB?mu>}Sr*m|}A4Ny;)JXSJNUP@#m z=y(q!!=qtjMIn)T_Sgj=RRG3GKy2;*64Gi+2hafXCi~wE^0vzVGi?UUmvwY?DL~ML z$pFXzG~5FmF^5_C7fJ^?F$d^{_ZC!m=QtkvsUgQLfSTdzQ7C)AUKVWx1p%OhRxVic zW&%l;Tm0+axCdqOC$?rwt>~gwt^t}QRqVDa3G<4qg^7FPG(&|=RHs0yd4G)7-#OlZ zvWkkDh6b7cLHhOAyj+OfnPaX?eU3^v(q;wwZmB#~n}X@i%`-aW!H3+VBDy+7IK~7G zIVBk$g)?gLDPUexDc3Uqd|>D29|Q`LprD}txDV+m`CQ;EzYRNJInxIKw_rImK;y6; zZFlzn@dI4B;|q(Sr0!1e!IwP#^8`u$AJ_x!lv|i%dfFU=@Um!O5~PhKKv3yE8CA!} z^Sgst8_If|umWpQ5@G@$g5@l$xvxx1P3&+@2Qbxf@@!FOr-^KQ3Dy5n+3@sk33*zR zGAiykllpv03rJMXNh2phHLH@+%#tW_QC+zn^zymUCvEk@4U_%o8naJ8I!>Ve?+1 z#nCg~Npd>{$I1u^EXUtQ3PHwO7O$&g@zBu>(jR1& zzeAOlk@??QJ;xV!1&5sQPkTd~ZprysaWl5Fl8=?#30zl709O<|oeDo*p~#*!*Y98| zvIODW^vUEN?LW4CbysHOp%l(W&B)kQ{26+?J-Rhmq2erbx;4@Olcc(QBD&Jg%=fs_ zEJ_ot<17reUapHYYQ?prxB_LHM9yI>J;S zG3K4lSJyQ)w!tmRE$ImJp6I{RY&0n37V@Xsvl<`&tpByBXkgHum@XHWnu7u_HCZPs zqAl`cUp3W7?|)q0Tj%d2-&rApH;qDCY;T|9-sf_z9?rx8(~VCqxqRh~s~?cee`Sk2 z1?NLtBaW7lMpy_|TmcH0&fjX+OQd>wiQC&mrd#!`<vhH{K8=FfrMw^%=Qipr3iynXcw_k6^Y0Je+ zZ(G)myIm7h(87p+uy&{f*3*hpiB-Qg6IYf^8vH7ug#F_eF98w56q7q3=Ikh-(NnDC zG(xqOQd@6e5}!)XDXD2v8fmD0mSt%ABoM}oR4Ftbweg?`xw1reRzKODY)5z4dh_CU z+($$sTc2=<3$jEehd11hNE4m7VM_bjVJ)`&50?+p@fF=#+#}v(s-Tndc9|J@gu^#3 zlx3n4R;NG@JZ1Hr&Eo88kK*Z$<1JhMElOXxWBzt{;bzXNv%f+7X03lr(0+R_?mJpD zDvXQdlx^By#0a}fCS0t;?S{AYC*fP`iOoFQTpv4b>@(R(eI{()^dz^;mvf}cvO=>9CcUOLZCU{m zUo6l{0q#Bk?GCFvH3KkALis4-O~7;-?fq9?Q=(wb%)&RD>AH55jjeY1&t$&5z(L!g z{B$mMd_=coz0n&SL4tek6eX<+r5O%p7)1>xWgu*@)QFIR=Rbc}9Dx!s|9B*8S^n?H z){n(Wn#jS>n6g$`xmc(g6ooRFxxrE^#kbu*f^~C+rM4PKAP|2%L{ugzy-2C_geD1- z+2aaY$gSN%#(V_k3oRgzD_{O*UfMsb5tD0;UH6f4L!1FQSs6>A8zWy03+_|*B&`Z} zgn5z<9yw+MEOH$;SwiG3vmgn_aw>=k`!T~=osI}ACKfx9nwRb8w1h5iLqk=Om>y9n z6siP6a;*GTZ*q|)s9a5Fu^XCUa#-eAD7;J%s}3{;YlTS>tnlm%Cx9iTY!w=q@+Xca z6NF2NIM^I$(p9YKba==xwZc>d{<~(}n$a=7I|sDeIXU~Q&CcM>!vBrlYi$=;Bm1_B zfl;%E<+NQHCM@dJBWY-;o9W6rs>j}sW?slSm#FB(=G#ArqiuW^?CqTABd|45Ex6K3IY0UAX;pt76~w0{5b~T z3%T_&pea@|3Z*P^E@QKi8t?IW%fs;PruKKc)^X0EQBc;tQtBRn2kdL}_J}oXj8b!Q zbN2!TAu!B8T|!UR081)1OpF#*Egf7~kb8~{}Ti} z4$*q`gYn$mD1+I-93&m}^K#YdZEY0r#wMC1BYYyNF3;^N%=lJW#rcM8*m4w;5-A{#zDlxI{9`R6Pm!?-$sFf5D}k3Ba18H`&&Z%*WQqcc6D(N{ zxqxGSFpP=nQaUOELYM|uPO*ArExYK%=q%2yLP*n`#CD!Oi{`(AB!cOu_`xGg4RUsC zJZlI@OaM)VRW?rtn9cy1WRrauQ-N)x96|!a2aEKtK0Z56#c#GsFfu2sy)C6q&WDI% zCQ2}wX_>Q=K-V2A_&@BJe$akUovM?*&0NwM&v)j_oG0Y|sp*z|CLXj{_3Y}y z{=6Pr(sNRSyQju&BI(^v9e;a|OcB3aC-0&_S=&JzD>x?3zCw82-{L_o2%pqLa{5J1 zfQvGz52P{GrsNk-inaG7H+`#+`D%W6cv3y(e_%xnGbw=mY}rxUwvnt&@|d5jFoOzm zj;So^m{BpF&)l^rRp|iB1r0r-js!P(7DL_K_MPZw@fm&{~@X9 z&hVY_N0Wk!MQEwE?ajJtWZpWoH1TLwVRr~X$GaO~n>i5e1!C*`cF$i{9oL<6y_GLF z`w@mLhHqkGIUts>q|Kv!GZS3b>ymXUq|l&?&BzRM1#qqx5ss@e#gejfu^NQL{vKz$ z7)CeBT}E<@i=AvVO?F#}*L_Qg8hxAF_b_ge@!2nV{Kxjf`Tnm15(d@KaBf!?u|}`g zTdwWCb^j&{xj2Q>Qh8w}TlPQ@^^@aFXBaCAS>x)znBoYHTkngAd*Mc3e~Notd*9Z7 zx#%7dddeE{ep-aOc=J46(X$2OvHYao+$iR>E7UUdyB4T$8U&W=^L-D6oY``WW%Qbo zo(WHgzhQLN7{7dYeSJltwQOd8=4Q*bb#hnY;;s8)hV-NJ<{CfU*CTS0xbS>X1m08O zM*~LX>w(}ezb)?lBn~9EH!9ytKGFWQ5?Xq-nhRj2N(^kQ>NMh{E1F_lsSy$}UWFPv2wuG)f*~=&UUF0q3Jr(4dqiKC8huUtfrD zk65Xu2{2BKh=^za9hXxphMhmp9;43x!TnRu=zCzgl9=s;Nyp|O6k(1$qn11BY8BG| z!;VNb;E)-PE3%#}gxiixc5Wzwc8nUHBzN1NKSEwJ91BrHA)biEB>8U25kwT6Y{j64 zRciz#7f^+x5K664!M6&Em`AfB#!8Y?q&LX}L6OB!aZ4Z>Fj1r;+GLJT5(muxRQiZ7 zU3&19A$i)WNDZlulO&RUX6kzbOcA#+26iY%y-iZeF74kD<-lUuMXcTcDW(9eNNQko zO&vka=%_x-Q&8Z+tFR1GOmtDO&B!g{EO^$%n{H6k$Y;U8v$zpu7_$n2p1Ed(lbh7b z4OZS0!cd?rFWV-T8<8(i*UP+``vA=~;m?6)CfRp*qf=is3975Yz$(PCf~ua+B}X#3 zz?B%trIdqqVts%}0&!T2l=93ksS?En`{@abN}kM$WGTY&Ax!C{=8!3Xfkf#VC<(;NDBQq2=ac9(C%m}9=d{v%xCQelw(I1p^S z$cqLtwjcv|Ohp+*eZaB$$~O{54w~LCY6o8VPAms7 z4yLTCnwkayg2Vxk1qV1I0W6D!|CyU89A|T!QA^bS%KgRri zBC|QAGI_dIfM)D3*3?2U*`XVu^npdvZ;hhnI;pC*Km$FRMyV2B-CgjMoy^YmQ_`oj3&)a?+AV*xP`hQOdL zOH(@==&5!4~Yytjv2B`t?(W;~DDe{%d7XHna(Io-7Z?wb* z^d$)}5cqmpq%lu(66G*U81Qt6)|+y=eB(STNfEBGia1f1o+yGjk9nGS*#~i)&(6vv zPMvyfw@A?N%PKyq;q(PmZ=%-|_F0 zTmVVW;t$09Qh1?MvPbARpTiOFL-Ve|i8nftTT`zl3tOyf?$T)+_qv~Gu`@jfdml^9 zPNpYp%DZf*>=;)4y5fJF$7U(5J7Md;9e#9Xd*Nae;ust};}EI4-qL`6K#sc}ZiEbP zye(MjEZ!~b7dqBK2!op|H@8OU!gOI%!Qrf=y(y=Orj-RyqQdGk4 zb^vuItX8ZsQhhpXXxHrNBw0r0{g5r&t|e_N?QIpl{!87v>31B|#lVouf49k_P zM4qE{6Owj{&ZlQag!ePTv!`1oIdr{t+UxZPe7Y5n|=N57=2Rb|t*1RICj?%!!nVj41*Oj2N(}rXqBO*o#u> z%P6-Z6K|!}`s`b{p7IY?YpY(KYCg_xCR>99WDp!WARFhucQ9|2UKCiRbtzSa3eaznb zXS6ZmVoGNA!{JvpYIS=zR$s|`XqxJ{14KgGIyZ_8C3&y3QibbSu|YL@awZdUAbj6a z1%Dua%fXXdjc3@F8n^Fk4X$PNB{$v^UFTn)y`7TAH*7=5(^KpC5OptZi&*Dq`cffd ziyIkcd`JXriEaN*n?xGYO0fy? zEC@J%?)@01$tli_N0SkoASdNWsZzBiS(QNo75*=w9sFG07#07 zEDr~w@c@yK52@rKB~tabd~_#JystDDKg|+byaV{$HuWm)S_N}g7Y67Px0ePn`;5tS zlM4B50Dcm5)ddbX7B4u{D0kQV3%$Ec&lh=RVg;tC3pq#jM)yXK&fv0M{vz;gsb3&F z83d)LkbHxwLB)(XeN+`~f%l>o8NU~YA!lv61WmzG7jFftVC`L$pg_qDDxZR&;Yo!w zo77pumGP(8kILzJ~bpg;ta24od7Qk|q zdP^xnKICzbBI9Tx$}?nj!1%lqRWPd)u3t8w_lDyzGUXV5`mxp zAr>o6GT*gc&)nEJASDHtC&95Zgf(GY*ZWyS0)e&_UA1Y%z{6Bu9-!U$i_7 zy7g>ld47&?P~SItS|h+?v;dM1f$_7Bfq|KaN5%6tI-&P%4m;!p%< zZG$OBn>bf_vo}CRH~^3U%nSfE-?Bm{5Pk%Ny8;Q|Eng!T=}iC42xl>dYI;yOiJ<`wXygpmODqtnWMuCs9Y<^rbw<`uUJ#Bc>VXk zEecaS6=n%aLb{m+$?sj|6}u^0$hr_Q96NpQzOxZ+0U*>a7(jBs>HqPn>) zk(Xil$c&cUg=f$Em%=FQw;yWbaG-~`0IRt3}H>4^HXV`km-j>j31 z;%l8;_2b9|)(O4vCHuzK4DY3y!z(&s{o4)= zHD!HJR^xiVa><0l9hs5ETSUixmsx_$CTm6Or9%HRl@u82fxc1cvbWLWDD(b$5C{+2 zNH<|79Ec3ICC7`;b0b$&HNv?+LE0zt2>!f%Zb>gZu4Z%UQJyJn<1G8>e%9xgP{-gO z^vVI=Dg;*!&s~Iv!LaN-rnsE_QAwjM@fFFJMIa8wJ}@9VonqRS%xxETfV3&i{(WY_ z{jEsaEGO<*@0b}Xsu!og>(2Q(?B3xh=d1Zw&UwI58Vp5#4uVMEpv~=+WkY z`_}o{ZfAidgUmqs%LUz1t7&BQ&eMHd_|BkROX~gmU%{q}M|_XllxL44WIV)ks#S+; z6sddY>roK+BlOWnvX0dLCOqiObE8p#(mmq>mi^- zhcGQxH|LlQC~wYb)8}{vXp;(!-YDS;CRsQricUk^f6*ja=SRSD*k>#s!Tq@v;DI}I zoyD8GW^+C(efHSWjFK?KKE=5p^=BmAT{K0%I(T4jR$Y~2HOH)n^A%PFpV54MS>Cw1 zXhd_$pOu{e7C%tFvdEc=a%PA){{fqCeKfna^6I3SPSJ~TC0J_7)h~LP}M7GH%NXV)S*`|*+TKq_vk)KKo`ZQO1-zUvNryVGWGxNygKQvnS zE!&NiuQB~cgm|$cqt~){%~gN$8b1%Oz#&o-Lf%|mNkn`0OcD6qF0O$uBVTK6I{U+N z(7Pcl$bO|oe+N9G_HU5PA5JIU1QK5gtR{J^(oSwD$9FVd9&cEmn(biEO^H#8h0=vz`Xf)8rS zRtEklF!9eAQt)DJ>)Lj}wt49)ZVOnxnOa*%T<=dbvYgRkAp(0apd_Nkig1fIz`@!h zilHsHKn@_si%5v3lPl4J@C05+6->qAMvT)?28~NA7U5=}&sgyu80qEuc_cJ% zaU#!sNnL|Zg~L+rnE}ysF3?k4!hs?vZuE)33_`?0`nfFZ2Ce~G(B_JD<{%qrM7?~U zh)JwnWE_j?73M((Z;A`%5H_46jbhn#cqsUmgZLCY&g9qFb$R zQ+-8qYE8=KLa^?55H=V%=nJ2S^QDL3uKTWg79qA!97I4k$1yt42Aemf5wHt`6(F^U zx7;w=n~+Q%*hkAChEz98fDR2E2$}TnOg6F#na3)H0|fv=`($y!Owares^pX5-!3<_ zk!aW-;hc&lAJTUfV0FYxtZ)?me!Jv2VJzS#XNjN2z=XqMVNQ;x2e)Z7=@l4=DU7HNnp zn38x|S%m{<8)rroNloZ$rOV=d%Qoq<|3A+YY6c`@A&R!o6G5ykzZXW_zame7^XqAe zl)veQMz8g)AZ*+NVl-w}R-kvDX(-NCq6IRKY0^g83l!uj90h^5aF769I{;BLol@k9 zQvV9u@ddPFzqv8a!idX+w|yFP zpjR3LIxm_3o(~tZ8!}i`7-O%HU8I$US{7cc7r_}|fDS|-0Sq@_6*YBry;oP90EmAo zO|JRzpv?Za4rYUO37x-#DD`95@CV~;y~>1FER_FHd}KQjcc1& z{^AqkCE{(*lz{q|eA>`t1n0V(-z;p`Da5=E|stb zsxhp*i{cONxKYY@jOF@xJ>ZOqpUh*L$W9JW4L^`TWo221-=6k3>}v%49FLywZj$;6 zyU-Vkos~at%F>%`we3KNiaGPR9O&|@TSpEvn`sIY&93z(l3pxab{d8UZ z8_Rq$lteIRH#_e4<(H$XZxVHaineN2RJJ~N7NVe;Z*3J=-jX5vg1%jYiv)P2w%}n5 zM&Wdp(jVyFlXSY7TR}`cV&3NPuDmkA=g}skQ*c|hI=AE|)HgDF7j-L(r)&LH8 zA8MiveFPV@6qItx9WD->KR#L{%sqM_UE?ouEGY^cew*rMVPp5Yh!VOQWctO7p4fWF zTG3$>Y~UxipDY%-Dg3j8JGKS=_?hi8%icGr zz!IO~H+MewSjGSuO=c8l`gr2#9>EH)oTh(t>~!O)#o61*K!)}DzzkbkXP%!K``Il@l0=2{~xnzdp+XY{f;^6_IRiG)XVD`Z`nB0UO4U84!w%~eNvb;TU_j5A?5o8}zh zNko=*jFOCmfO8(dEP@O{%bU;1YMt2Qtl}?k>fwcFgVcMJBxUKUDXYd<{a@x!kIx=w z2_^hO2&|D;8QT2UN}_BySc@u=Z)2a$o;~G$*wnY7s-@^M%64>18pYS8j@SO^qlRh6 zq((Ze|Gi(LEA)n=>U{_|o&rzUVu8MJGJzRmuo3$r*nedb6daSkt&&?=0w#T{Xqb<(z*+|G*1M9O4egcYwx zg^GSgo#T~`X8n5zfZ2hP1qI`FMHUk-wq+oJ_)PIIG!ph}$@Ilq~Aj!NTx>x=aQl)#q zE_U4IApY6r;xD+&^PQh$@7#Qb*{0ms4MiU)&F3zGTZzoQ_5f-epusK)f6@7&r`1*jQ zy1G5dXfSl9?9)qv?2X^iBH{Q1vdn>m(V>rV=}$`ykCe4NfWK0xKt$y!XC^v}9e;H* zATiM(nd6_%sdHIo82#CLR(`qqG%n(XVaY400DEdhGZ?KJPcoPYNoGPOuiv(CUFR4$ zm7Q=pPf~R{IObT@NLW5eQ9=>WX;C>G6;~1QWi#q<`H%Zc7XEQhIT$H=(Wwal=5+<)_srbKhNEn4gN$2N~ zvOwAIqh0%+$Bt0v_jZ!IZ2zp_S=U~OXz(b{H@FHEs2{nsEYxepdH@jAZpE8v(Bu$M zYdHb1)-b#2QBcs)YXMJW-ow)@|9PSX7oZAAR-lR8Efa%5CnOUJHEqflz8LM?^Id!y&pQ16epLz#^`u+74kOBJUT$as-hE4^|zFFG?vD|1^b#ZXt{O_!Z&g&t$d;FH8;J zU&60x)l{`90dlDTi~wX3R~%!-4FI`Ow))I(UvumG@4bJH;f)nXEoeMs_g{C~Bevnq z{$Ww*nf8It>&^NQ4#kHp%n!MpQU*EHQLY z!4dxb3rMO=+~fdj0xJ)ZfG6$3kLqkJ~iO`aiMJrB@h76AFq_ypnl!nVa0qQ5c3Ev3WNDN&t!QYrg%#r0~4p zMZl$CrO}=)X^0)Db^+_?d!PI%`p=&~cllh*Q`ylKMN?Yp^aK13+J^-j9w5F8 z0E}b`s>db87WW|b%FoUzWII*{IWF6RNpQ-_Dv0LRq95T9an@!Roy z`FT_R1iE1N@$SWD50Lo4>Vp&pB=PpI~~mYIcBy(0Ot&c027 z1{+xWlPA}W`7^=zE|6Dh>h2y7QA)I^9X^~Xaeq1t`X$0K7BU043M5CzkZk6bqquU* z(h836?2Xy2wFZF~zK)yo838zPkE=w{m)aNlVD;5_JyY8dHKM>vjRJ!nTkh&$_Q#4S*)loyTYf-`Ud$CV&f%C!zm zD;@1{wch@V820**ak{uxuFWEdy}Wn^a+rueOu>QQPd>vi0RtqTq>upH

*}J8Wt^g~sL^YT*J9gt^9$7%dA5I8 zRXrsSy+o+O^xb6c1Hhunw|mju(zN0X1}}P4Em~Q7T9s(L@?G;Q9x@uv=+zs??+R^R3GxUf6W*>R+zjacjYDET@38^v3MCIPx zyqp|T8)}iGi^>MB$8-nvO!@Qopp1oGS$5g()27NTVQ}Yd66k{90kz!J0#_{&fzp-nM0F%A)yp?RU zWR**ncJ>5AGyZDdpYYf^zQ4Y39+|zK8D&YOZ&jykmyA}vNg!&)uR zV{eg>1y;)xx(lhM-=Nx6oSbGbgh}Ke%A|4kFiXPKiAR+ z?jy0ru8bj9OMYW~q=J2yi6LO%OY>gnr!+%H<;WaQw^Fs~(-d%sk*w(sIyN`10w%m8 zbKd)W3K17%1vv&T*)g^(CebYv=pBxyZ#*&R&VA53Y~FAEU@op$j;)mq)To{QIoD>6^)F?kOcQA0Gy( zdNK2Qv1ZMlG{8_n?2#UvcY@;6%-sH}zvMcz=KXKMFZ->;FAfGB(pejJ?Osk>k!fiz zqFP-zdCA-y!CV~pSV=NoWy(3+cXpwUCQ{=sg$?T{eS4Gc`x z_-7UT4Wr+~+#CzWRu`0ML8Z1+iL= z3?DvbHHX~y?>Sgw(!aC#lFH6$9$Y?GPP<`TD#stF5z!09-7$urxO}5I(9Hi2FWpdhd?U&FUu?d|gf8SCdd*&3 zZljI;jv;fIoB|18i4Ww{Q_^qd7D<63Ch@x6)Rtu!h>eiF%P-AfCY&5Y2oF3sbvmEE zYl&=7>#?LsK$Luoq}q?B%B+9#To9|b$s}t@PL@eq&3ak$z?G+V-og$qSQTB?6Iu~7 znL(F}F+|XP6fzEC$Ih}4|4bQdjwf9trGOkPg&2U9*x(cbYmQupt#hSqVXcNt793aC zSq%p%KrJ-_1>-zq$A!hsBW>CiR)X+Nc%Yd>5{Gf%qX(c!SOskg&4>L%9`#&dA(J3u z&}%FjK0O=~85!IIC@)?j6Yeu6^Dm^CxGpDFJ&-qrgZX}7p^PG?yV^ydlQlDU!{md+ zHlnyB@*f_0USj5b;r!J8gh(r?__4xp8L&d15I~;=Cz^6*06tV4JUr6O4dd&|#0m@m zgLT&=M6X`FV7*Y+?a~MQgT9VIFrCag$N{#2%JE~A4zmJdMs5j0A@_WI>%mr5;5^ZW zo33s~-umy!87?vA)T6Iq_8fJ*NHF{3S*8F}>@^yQ{)mo_1}wYp8ZhUKkc-b_(gO6# zW!k^W+OE+a3~W0B_87otz!EN*>@}PB8W%!wKtwuFs9s-RgO4Bb8Ux}nAnKpUsq@2j z?%2^sYL=fhh$^A;w#2xJcuWZfl%_(F(DJ^#dA7%z>&kTf?UC$WK`loOprh_~!@4-W z6#$Azz{L~5`gWa`=w0EpuA=%(^p_|BM3H(l3``eHLEA)#QJBD&h`dd6(2 z(-7ZDmikGFEJA8o0VZ!hunHX@AYq^N*y=MB5MVN*2RjO;;C8nOKJy4|5Yi>%?``PL zR|x?Hr$nB=x#5XCMVURYyuxmXNN8v%;NbGj%&gE*qzI=0+{PUIxPNV7z|O}PwoiB- z48*fuUOGB8uiD0<3LN8}bo%VbNx6)!knsXyVgC?(`vIGL)W8>LON_D&_IXO*l@bAmBSM z;Le>FEs_5L=V$6l{?hN6-wUPlf0K#6M9yK=E1*u?k1on6Obc7XBOnlxl3KVsgy(gM zKh@|a4&gMg=#Ev^)bS4tC6(~I*!&D>#{H85Z#9Y3volGw`^-(`lA4w($6|Ic5jSPU zT}8!GcaY8_YQ+D(rNg43{(?8OKSucO#BMGQDy)M)_Yz!!>H7xXfOs1w9lWfD5e7}4Fle>b>{jyi+1eH)rWpe(rs3< z&;|?3xdL8h zY7stZzkM}u9#J-M>?0SxR2=1zDOvQsw|v-H{LcE^ukCTwIr2-WUCh_ z;G*xXN^wPd#9D!LB;D7Yi-HM9PhM3=iwvitoQFk11y&Rp5Yl%FsmO%;S4PoKKzNt; zKOpvfzUSj7fu3%K0B=O#toNQ#X)yE^PTTMljdpz%!_lo5Gmczqp9ZdBJE4 zy@9iRFFvr5WfiFneojJjwV>0#GaAArdm&#~brMcHC!jcNgLHooGdT`xy4G%}z1<5E zdXBNbx{9Jp&o<1fGRUJW=zVVBYTi{~ZQbOpc>b4V0C9`X^Z!s@pI_}A88Rcw?_(GH z^Xfv6RD@<$Bj#|6k`ge#U{EdE+lrucrc-4Xu2q5@WgvLDgr?Loap!-QYQ!t`eBv40 zu*WP65q2bQd>#Ifw6H$xd@s`3m8QZeoLSPA`@65!%!Ng}x*-Vwo-RUq13+@lYH~blY zMTaKdr?e^{?@-xZC**9TpL(vknc^mdig!y=uM4RQk%O|?2$%< z(^MbfM!$DyfbT8Aj4ZnIl@44vj^49-MOdZSLYl6Cey0Q62c|$d9>NTUs**tCmJWC+ zo5U_#ks-FXOT3DwJwErl6@*XTJJ2IsGY4`%X9vI7wQj*zJP*V+1E8hV^Qr$P|5md}8RBF7 z$4lReCa{;g3R;e%3J=)L1tXZR-))xuI#ilkYAkRCJ@TvG9e%Ybb76rm$rklACjIz9Ztzco6)qYr9&8m9{ik`n=1c z?j)i*G1~x{e~QRPI#yO}N;ZIC13F&MgaT8e0F0t8YG9U{%Mw#FrcsJTn}vrIETJ%| zkyj)kKNF^bB*b!&OmT{gBSGi^4FzoqH8E|KCr-SiXT^;a++TwdV6>RNlp4;y55nuI z7qf{_gQmg#k6NP#!jJ)~6pn)`5lSm;pH|6%GafZF=Lu3_9IXmNLFahKw* zEl}Lu-Ccvbdx2sFikISEin|qeg1htG{{HiR^UP$1gvrcJ&OO;@?Y-7s8)OgZrEHvM z!1KCb&={^wFHfzUWl}ntOV2{nfT<4MwR_(Ug9S|pFKB}mw@8E21IG+X@Z}pPGM+;D zjY#~i1psE%fQk8n{ikYtwN4cwHR`P60q3ucj^93X+TM2pm9y>Ma8!U{ZSKIlxxM88 zoYOWoD5iK1H}|VUKoR|{0GO;ESUcnH0o;$<%3snStY>TpO= zSH^$GuZ*9^Jrx*u(J&JXRzY^pF(W2xwpG?a7>R{>MfHuL;=;&I7*=Ep(q`U=BlI`aptH& ziAAjMvbgm4L-7NN%}OIz)_ABt9a3BZ!oyp&l?yt%xBT*5>f%=}d#Mo8S|&R~S5npg zdB)EVeYz(^zJ;#d@ihhn6qI-($10ZoN-T{(OTW#goh&+Pmm z$v0%Yymp_^36$|22;YK{rj>S)mMR%BrcWOuzNXqq%ySF%I zJ2IM8;Jz6C1j3kU4h^%_Wsrl)A#Apj(`ek6bxj#^fCOh1hY#~!6#npo20c{xAbope zh!2=TStd(k=-bV5>o}ir`?-kS0zV%H0HQ3akF(J)*j=qG_TD4m(D~^LV$Z5TME8i6 z4|!%kKHnf)+`?I~#8`*k{w4j@0JfJDw~znO$?SIf4>w;pgg}vC7yB-{KrEUK+afDg z_83!-el8Cxo5^Cvv3I&+78m=Q@n(0fLK#cd%}}GA$tN9XR7C^D9AwzG8e%M8A@RV^}$z3-+QtUaX7C$qSY0OW+x>X7$B=9FL6z788q`%9S7 z^Q1tkXSCXrMjS;ZT#Doyc8l{U>r(^?n~m(7RiSvMj^TphfRx14 zZNWS?9%K3rL3l;$0=hm2r@w*U$7N+nxZ$@=k$CmtCKhG(=p-lG;N*@4ormHyF|1(b zE0$*1>~6uvyy4I6i@PuEd$a-+G<@20wW|YWjv?iXJsJfM==C2zeOp9Z&xXSHu!tU= zSGtz0UZ5cC(g1<2B96Z?gG&()Nms^hv!pH`n3TRyQz4sV8N29uvNe0>*7e*+Tq02n zoTHO(H(xsprW=MzkrP|EEqL%KnU}w9IVS1VsM%$^*D8Z$F_qsm?1;~h+tu6W5|Nmd zUF;B799ra5j*YMEufF3~a_M2xvV6D7wW5-6bjU4A5~;T?*c#W@WA760R0qf?Cl0$- z2b|(}g5-gy8x+(Mc27};``azwE}rNY8(8I>-4MS77zL;KMcFijpD7&6hFDcby1juy zby-dA+6xo~JmhL=5HRXq-^Z?>tDVZ)nUDO%u6kg)`6jN7QRfzs|Ciw9MHubZ0*Bib zN>wSHvC7iR7w4rl=XzMOahbrFg0+Q0^|qhntRZ*<@)&jAU($;dvc2Q6GCZ37L z?WHu=!HR$Y*Uak;qZzH!BMur{*U@ZZg64j1mD{BSSaAfC2mPIz&X_duxCrK?DXyqs zl2F|{Hg@jcMm)EVC2Gvkd1U50mvBitNYr1}#^+p_Qx(utca7*9N)QGKR+}=0r^tMg zYxmjvsisp*jqcECrmcNEy9ywCSE)?s+47Wm49e}1l4np$n{v`aCgsoEdd^q+dR^*; zQ|A{_$@v%6cBD#4?N^+BMpvuH1_7{@yuBf^5mUAvKlmFic6^YSrAs_xr^r#|g zcrAB#8uKS9i1&;<|ttC6$YaDPr$aly-kxcd`5gB!)ThuP2;US@1s!bh$ub{oKjz>tO!_P z=Qz@I(rg-(D>RKgyn|6kP!%&!6$L{|%axe1sPpBql?E@8F&ugaV-R+D;lve6>VKs6 z*Rg#4lvpnuv4!`TCm>f1ZslRbvoze}@VK+1-oLH`e7!5w0V)k(=YRnweDPP`mPKuq z8kPX+=5rOV)x6#jcZN%mlyRM$bE{Ox*35UCZ1>=G*{wMo2G(dKt(QLu8xSOUL0RNa zD5>!I>i3_F@i}RAkn|YOzdK=#v*0nV$Sjd*5v_Ot2CRocPPlxw5+?5(DerWpjiK!E zFSy8vFe!`d!xuQr#1)7_%Jv_uzEJ8AZH6Fe5{5%N1Dn^Zn^ns#mH$5I0;;3q|hdrJCn;$wN%6&tFGFw#Yuv(s4E=!S&@;hv9V|~3+s{g z+FBl76Hr7#>sk(0JlUx++?UMeOalTz)4ptWsic^CLfr%>6+@ zy`m3EyqTz;blLdFfkrfv8jH*3%EsoqeT*{0Op+yGGz=7Pj#-E}dje$cK-V{a;Hyxa zG%Qe&T8ji^kHcn>Uw(KJ)Ig@~cvg1y#@WSXe*%>k2-2Y=t`2c&rB8=)hrvIv5-0)V z51=Pi24ITrvgS>ni!9^H8bG-X5?;zXGhH z|I=-q>%KC;ECT2SQ2qnX??iDN+}tDj>7na2dDG%4~;-ubw7!(Onq75^UY}^0sb<2Z$ z0Br_XdNI_xhH`%mv~1=16Wa9Xpw6Z$U)}btM{l9oUdTEOg&|5Z#7#g6kkbKeh5(pi zV`CHL$&s>M?c^;n7fOG0K&T8>?6c{l_}lUFCQy# zcSMBVQ{d;rwJ2d7W9K}CVtVj5uxZ+kiV3~d|5RtHDW6J!*Z`ZvCaJLG(Yq$E-6 z&(MXwdV^Sn-ZveVB6+FrHh3fMQG8fbJUsGo$0JBO@Rp{N9Z?7`dKIQKSe4?qaE4QDftuqb-gY)DggxZt6R?no{^JS1*hi+78rj zHI}jhE1{nAZzLJ|9pTK5O9QjtfaohTY~ee#T$Ymjo=Yo8k|M-8ov`VkDFDj|dFHk- zE<+klToz>U*T&2m2^R(Qb@f$RuA2mv`s>S+^i(>$?P1tbhwNzS@6`;XBgr;9XfC^- zWFOE|-gNrea@?&22Zi_A<%}sq0uho6s|!~HbR?c0JK%aRL3T<){ zf2vsQ##Zo-#{@)n`h6HZph+$DeCz{VS0Q?Dy@wFJ0K2imX{_f$PN(i{u#}PacGN#v z3L>th_hIc@Snca+;AZDl%HP+>H&*hOsFhZ0Gt>6};bq`$tI-UT3C_z#eukca;26=b z5Gi-bYwHOus`VYOaeHn%mYXHxKamSG`XAU6@f#MOT`0YKL55B^ha%xtM(l0Sp<~%p z8qQ?qrf>xtRH47y1)iiS8K*t*D3pvV*a{;r5WnLd#PSiPVg%Ip5uuMfbe6}7gM#GX zS3IB#oj+YwjnR|iF4789mJFc(M+4^^ZuzSVDR|VC`C}TZC&v~)qCUXu4 zZiMtEO7*HubTM{yLR(!3Po93(MDF`o;U({w{XlLwD8u_?s<{R;qYGn~K(g3o6@&*ll35oZogC8EUB zAr2Alm7!j#O0!s9JO(IiQF*{Zt7HnLT8t5M{ch5#cfg`iyD*)KVsxsGk7<-jxc54$ zmYYdy4OK2MuPT_yuF(j3N{_B5#7bQM_M2c7?d9!Zv6LD-$KW2)Rm zU`BoS!>O*6im2`oGVwLFNEu_)Nq$S65U8|SdZkLA2Y|rxED)q7m3E55h!I|Ds_X%Ly zn6~UWp-=Crbuz4QC%J0B@-@QgOQ1#97V7nwsIeE+X8lznNRYq1MI<>+AUs=QSwDNM z7Qvk_>hlb@G3&^7t<(9@C9rjH0B`OHc75<%-t;f3WWh&OR#AC4ZkRyov6vu?lWzCA z2q}0uw!|GUv~LN!YyKnJ*@BPC))QX+DZBBZ$yc5M<@0Ic`n}G!#TW;e1d}8 zTU#MOHu@ehq#dQlrpX5Bl&Zc-T>OKsN32zWuj4rBnj?vrD ze&D5X92%HikujQ70kgbpjkf)gFVU z3%y&0&3|`w{Js4)V_h6`cS}qVC+&AVCDZQrkPPs4&R$-!UF(*PjzEQ9vTJ=GaAy(n zz6|RzGCEV1i;T6KIR?4<`>)PEm~j}_Y1u7tOY1683htvtN(^qZBO)TIYs|Gv-1&6) zp8U*nj0B`eZub|Ws?qZe!jW=MgcXU_eICh{k1b-2d)mz6KW0UfXZ-xv0|OU*5U8cy zCY(LU6)L^zvhLy8JJe9HX8HZp*a&VbVkCB^2+qPbjk*hY3-=#!uO|tR+zWNyYpt$2 zIeGk{j0B(X`q<7KB_&jpzaGHSfxMPWiCn zm6=HnpQvtOPfCxpSH7vk$eAYsNwVo*cEEv%5DA9Pk*9z7_9CG=$F<#=wC5^2tXQgX6oW!lzX*%#7alp;jH9dJra>u#qTuMuaY89wZBgO-qQNPR%Uz*gmD;*cOoXu9Vx}UTmcdI(V>XrQs6_@oU|~D$U=fTQWC3X;g9-&|)%!x0 zs_yp6OqTgr&EO6q{3gpT?D;fPF0^YOvh_upF_VMimL>(7hoyBUw zuo)sT5n-1?LcgDphpBE~8WE|9`TN#&oJ7^t|M=vDTNzW^VZQ6+XVUPqhDM!1ZB80q zJ&u?!cW`@C-^v1|@L7t#IxjP+IVtC%h#>k7muN9+rqVowo0pfDB|~AY(_aAybm!-Z zp`oz(BL07d$OA{0mIxs?znrOlD>IL`N>AMQjQNS|pPtq@TERsbyRGSI$Cx;f>x|_N@AJk9FxY_w_M&9=Eo*3DdpVMLE#{{EL=f6;|agv}h zv{OewTgAPznP9=jER+TFL8Y)s`$Z?R?4`LVz=?kT_bo6DhXaSCb@riK;@I3+2 zT=sz9xXCXw_qMOuwH04(F~t5(_KXQE8p`=oqExvLte%h#4V|aFQ_&v%NEZ=w6|ImW zWTpVNy9FDiaF)2bkZah6xUv?4k7udic#%6sN~7#V$D{=Lc@0g7J&)*X61dlMI`?AOwEox2#cF5j7>^XhYVWtRUx(g9EIZzO^RPJl%}f05kh0#^PGi>NTa z`3W(fnV7TDkWpSn)o1G2x5Of7bk`CGe)ON%dbOm; zz0=KeggUD|&&)fIhTWTveQSoH7mmFf=FCb!aOE(G1uz4!e@1}qa`iy6$cOgt=Rg_! zl9_}U1x;NCJ1hP@+(!d@&$RcLec2NUUL=;`2sYXW6Sj7$XNG=||UQ8@RcY@#!v zH9 zmrR7@{QUX31M8(=XCFL1Dr@b=zq^f2H_1KvQ1D8>S!3jrx!!TE0sQ*03 zisI(8{^*1&WHkwrl&Z%E7?XL^r ziWC!Ohnr5nQ|MKQLLLE?wz)R8xDihd_oKOqukT3M2@5CAd{6#nmfISgbxV}vI=psz zWxY-{QS$x61OL#_P;P#{i-*S`@M7k$kurAwJNt=1_%7{cLZA^smBA}0+PJ;5&7#szLa08bwzaqSZK{X?<@WB5UqHZwBTIFG z8;HCz31rSrPLtn76Td*Kau09CRilu&(*R4tNi-Ne1XT<`yGa0xFjPUfz zgCx70i}E zipsbb`$wp{&I|;L20M+O9a=Sfs>P!yrA1$=1r6rBZ|@N^&70k=LOK~65Z(TyzY0hZ zlYS~0^TsiYlneG#vTE5iD|5S^{BqRC#Kq6O;EFErUGT8TZMnbq56e<~g=2LZ!_-+Y z4TKmyiz0bPmMPQx>#(SH_PA{yJ zZ6CvjyFvz-?TB^|DTE1x5Em?~+C2S{jhp%3il|s7*(7AEP{A_&UE2{AdtP+{QjTx9 z{P)C8=Pesk>%lTzurFnkPe{*ByvC6jALaWAvL^O(^YShpZy*u4d}ImUuh0Cf2JQbg z1*{$^TBQxm^Fwl58+?PVmU4vpl?;V&^cqwd^&c-$LTTCZDJ`+@bxXI@R&hTD<-%1d zwm@pGB#A-AgB23ul8uBYzK&+~I!H-3l_ss_KRdEqpC2PqcyZ|VWci8b0L{*WP-N-_R59E#54a67iKxOHs-UR;YBEz z*hH>wNM#{Cu(9aTl}F6pA1o#>9>8ujrd*+5%0U3&2N2HUyu%T^wJt?1-h z(5b=fW=4pOSyT3B4O{W@NrFSn!Kfe0=&xr~Vcl*5y8tF@aa^z0`*5+w3EWr!^G)On zn+hq9x06FH-+>L^x#VX}iV&b3=U3-w?*ddRF_nk1=|uWivu}d!h=ZfO_(Ls>wx<39 zx|~pL{J&&)co-ST19A%rT)n-QMh({UPsprfZ#oQWmzLq)fZ&oVcW)mVtoUcb!|Kd6 zTQPNF(Y?@VoV#HUR*7t>r|5($arkN6UNh+Tr6!5ZqAmxBN;oABvN6<*a`r85YL zl1tZvZc#srVrb0V>vuCIKg#hk_d+p0IQw(AO%s*0?xsnd7S}kMh6XpyYdegX2hs1_ zTF5VL=dIIIhY@2VkpkW-g~pTon-QL~8z058nZb(C6K#1IdvPc z6|&gl6}OvfxB5N&{4b|^cA{W7G0^Xk9l!N{zo@_7In%OLp5(iN6HvAD3P+n#^qDop zp5NgVRkq9?Y}ZRmi%S!(8&ncwK;yq)s*IrwLNDxjw3j^gEePrDJjP;=j7)~{5sv6I zt37s|gw*YCUfc(zT(i$RAu{=HvbYWwQiDca^X^jf#*wjWXDbDCJ*)v zVVW(nVApl=6)_LsEX!YzE%jo>UAwl{C1!gt^Idkw^m6usmr-oS15szLODO1>&g?Bt zH}`R#sw7Wg{6JY&T4+p^R-N}nJb|-s#PdS*NU=+_ho>v|??h!^*&ED6fh1pw|0Y+NsH@$Kn@WT9v!ss!wPc5bbhdP6>qCVBsrG4 zJTot_e#dOaZhZ3ai)^8ozj2~WvRgG|8$%*NmY$Cv- zM3l!eE)E?+)!KfoQLU5=NGMn5QKydvk2NxVqQ;Q?iXKtyr6Jk1%4|ZHSgpydIl?_7 zxKi_BFSkA4<6m5Ir;mdYCgIiQ{e}64xc(xdUWR? z+&27`%ab+lZst!+sXR0_m6B0m$9x+Ud{Q0QeoFe%A>Xp{xv?dql+}n2rD*ukW#ba_ z5GW)k3uR?PPsTT$DagurWQxG0yo5Yf-x;hVi?Kt+Y5T1>Noq4}Ej%;y@;k*P;#@J- zAq{UkgbDd?VLe{56jUY;F;G}0Nd2l3;QNj99*OhvbSw2-f#|_tco3LF)Ev$M?0T4u zcrkS}E2U1w#I0rr4;DZWS3PR_WNn6LgI{m4_HHmHyQh2R%-g55Y=N0UL8aD`rq7^D zM$HT5%xQeZSxps}xZc83<>-8bDk6J$6>nr9cSt3m*UC*)`}TkYkw;YnKL*~?($eqo zuq+G&Vz=yl$7*msQ}CKv|93Qd**Nt1xVkd>!EfA-#}o(-aHonv4k?8iW5ZC3REjMV zuI-A`5oaFcqxDC>EJO-`Lf04&#>&dd{^2%f=N@3}C=eJ9-TwG=ASf0z$7j*x=OJ9$ zK--7BNLbfjiN+U<nyO>cRG#8PCizG}m$7^p3P$-(%4MhhM17ab=Bv$fslP z=qMThX8?Ev@@^pSWhcQPwoEu~hqj3M8(%iV*sv4}D4$a);dF<)`E|{|o@{_jJ#@Gu zqn7Lj0Yvxsn0R?i7o*7O++9;fNu0jHy2(KE3Droif>@a5l4tbT0iAH;1PP znuL&$Arb`1ywU+TSWD-D&ciWhimo~cE25dmd9tav!I-Wz;R$TGxqn} zAD>yhuXguyfsyo=N72^qGAHvJ_U4l6J?Mo5V(gN>!X-kI(FK&Ryuk^R?oyM;H168Z1bidl-tdlZ-gc_O~O;%U^s&r@i|b&9L*N6WD@#XXMITO0E>g@nbO@b0%_ zcx*S9?%L+?!Ia;;99y6^e&&_Y49{U>-a{BhIE$kTz@`#3|9wKJibXWwpaN4b@Q zbdGpNAH_dh0<=r`hEsO2c}`D(U8k4c+a*w1+9ks?<5x_u#NIix#Md(|GLeQD7VAJk z&Tn#3^!q%^J7)2Aa}nK7Qt&Wye!b7P5yOkyKGX=pk&eF39tV?~p`vYPe8mtI2!Q?C zj8q;@g**BZaLA8lE>>R21j~XkNJZS#aok?v_1@U^-afk$W%^?nTg1bi!Cq|K*{w?p z2V2pslXH3GA?QW@THAZhDTLlLEUH4~do$cPzS}&w+qhVlB6}kxwmbZ7LKp3|k1xK?Aukz2@8X2o$=T6zjmqX$Dy1b(O|EC4`6p1db`fY~W(O~`Juq>n9zrjdA zjqCQjv&1mHzrNcyGERNJ#}>5zgZ3}A5px;eSvRuxpf`2I5^5ECd<#PJI&9@1m)4!z zz4#mmaz+_*&G@{Yp}JxXY>KUnFe|tU8=Wi;RJ=^l=w-*E#&+p7Ed^x>y8-U=<$dX{7vhtl(rC{a!@Zp`?6|oxb4bS}LJ~>HwHpC<|>H!Q5-l3|2*fxl%1FHen5jU0U?JXal&!4HQjMR>40mLyk z>syIPugGGGpD@nyo8cT#VruACs$jBe)KM9LpQM?jmL+$m9rwhLnD7>n}bFkz6OI`>>1qAGV(t{T=JH&X4Ap;}a5yP0oSr_TN1O>ay0d z1Ei!S_Ayv$u-axzoVaqP^WFETs2sZ;p(=}G=b^bBHk;-H zxBJ{g0~_LB`EJZ2E+7lMPB(Uz{A*?Bl%!0F&M|{NgEgfuXJ1(Jw)vQ(6M~euy~|Ot ztoYLAG5)LZYW$+%ui|`dWo1QQWhId%=8wBry;Gr-S0gsr35J{sL&sP%l4!2`j{{MuvyWnOfVU0CoS~!+82OHV&(t>>9jyB_c5=k}WlT z$IAm9TEgm-V(ct+{P$A!%z%+B`x3YA>=Ji@2fFSRB<6Ie50f0QbV0_REc{Jne)^a4 z@sjccCzCt|B6WbvdFJkC*Xg#_*1h7FAt54CHnQW-_=e8k;2VZXA#)+B3-gSs*M%h1 z9k73$z{Dh*e+norLkRLgT3?$~zu;7AQ;bso0{@n0j1W&~pN5*BN{lr;#H=jXqqVs% zDeo1WCbr}^0cG{L+FVzPS@_HOld4RMfO+LntO$x?4824zWep-gQP}!47cN&fs>2RxSIg1MnoSOYre?tp{WX^>(--j7Uq6esBaQk z@{Q`EXOCyn-D#te8m_o{B@;*9ED1U5!{uBms{!lJU)Xe2A6GU88XZu$-L3XA2>pkIcnNdHY2H)lBKds`rb}-H0iH9ls-!FP(jjgk@{+qJg}!Q6g;y8+#%DT z?W-5PIuCR0Q=HF*GAyG-;-7@&H|>HE;X_2-K=2_Fp+m7aNl+Ug6emzj&@D_q z_PM4CMn5A5SoR~8xhM*?U3ri|WD1OXGq%+G!XspNxu*E<)IT-CJ-OF3ybQPX`}rAA z803ps3Kv|j=&v8Vg9L(NzsrTJnz|bhISzy7(F)7!s z-6J6h2)i@4`TS8N45Y<^?{_oz1xT42v{*n`08)Fn-%jv;yskj!aPy+CtOOF*cz`)E z=n5FSX?oe?d}v)iYSHMGr_7|8h`1X-NtT+kOh`(M1? zl#pFNf8Y;ETI^`r zJg68Th!gG(?jCd7{l~_X%m7D?_b3N9Nt>CkYh=3z>@|L|gA%rkZutk+X z?yiamCd+VRe=2c7(!%;qPFEY0ITG4v*Qd#GT-W7&8wkN1jAJCW+ie&w(I~^*zjHr> ztE>>2{k1n&`vKYAkq4{C_lvYE`&>%7ZutpkOCn`6h(|KX`qpVm za1h4EXYM&zqjA^=f`(`+3VHRmvYnRWTVFfk(_kMU$O8E>wwwR(@y^73;M?8DorW0X zmIio;In7Znr0bc}qBfvsxJp6!<16bu=Gz|D|B8^RUQZAE&llv+&~Ha7Z>d1$vi{Jq zzWwoD<_&y#aA3Q3{O_1Gm4;*LeD$pZExIp$dJN(8`MiAadV8};HF7nB?aRz*QZwmQ>UO5jwhKB~e!w4jf zAg6zNLHd}{7;(<1{BhWX$mpDqmY5=Q?Wkdo`zyr5uZjg z^?aN!cQd^Xo#(1#m`khEClP1)U8tC8-tW$mZ|r+NFDgqva7mZX{~5C%dnmo!CEkqL zCNtE(&0ToUU95#GwdV$i2%58ZQT0PDTk`lT z9O_mvKw}QKT;KJfpP{IDZ5TW|*bVvft{|%Uy+hVc$V7p*5xXB6tn+CLL!$ zEL2ty@CPu)0VrrG4M3lSPAGu0xw3P{lOzoHG^n-;+T~mlnz(-bKsB-;rcqHDJ#5yy zyGskyFWSAYB7l?Q!-o&qbMTr~8pRShR_h+PRyZe8JpFHa5sI0t(O)S80o_gnIR7SU z%A#uGa0LU_o3-AVs2uVu>##GAjAtiMgL(;8X_@3>WE6p1XP#qxP9F!QaHRKH&K$f+ zEbt}ha9o4_xN{Q45e-3~`F~!w?+vJ3u>F0w=uS=`lpN~0X zM3KRfxd<*=YxOfy@rXI=s{bon1c*v9^glv{UE68)l)3(3MUy#_>Dk(I%`gLlZ|M1; z;rCgZVEw3v!=iM&1nh6++7FZZt{N0s?3hVL9B@7q+QdJWaFjy!&VNKfXdy-&nd2*0 z^=`E0n}>HO2pt6%_eKR`f!#g;C;LUQOyS!VN(G#|Xe;K6#NTtU$yws-J>%L(x_Mu9 zwDM;ncQSNhnYmvzGxG4aqt_OTC4_}hX74SKt@W2vxzEl5X?1_M1j^Vuu~yuKLunJE zDxJ{~+tD{dbj8pWRp4in%?D`HAi-`_^*P~9ZE-HukmbeG&aq3eHm;oW>Sx`DCiTBz z$b8>fSN_cCEeX_XdQAM#{c(NLT!86iIlVWXK8>H=61g_QVkU7sG0gkxlwt$~8Ce@& zxSfy8Vzq-5W`9rF+T|i-3Sr`7*Og8dUcOlo-{(4CO@x#J1#a>WjepDDrn&|UpPt@= zWKyE%I>@Vw{hm27J`%E%+1+siYKF~PvW&8_=i!eJ^v|_#_v8&aEag?vU3Zvoo3LjS z|I#QU)lq8-75%@fi~`SEpdWJxB4+Jpj+Zjs_@|Sbz8GJ`-{{X%-q6V2$T%L}HjWB< zcFBu8-!Q+A$7PkRC`8zcnf~g$N_#bRCStfP68f7P$se|~_bi44<|V!<=;>LP%zu7z zx_$z!G7SqFOOX&5)5J;0L^=!;~x@q*qFnl+^^Mc;IH0XQu7h^>}5n&5gJm^83-=FyYvT3_v zTewpwaQxyJOYAPuME3ZuT)yZ7+{M74wJ6+vchs`ti82;>cXdxKQ045pzK+fo2{wqu zE&8g(dJ#i(@+GV%Ooy(2LyZ)cwvzeihM5Z0h(w}hDz9QJz9Q~Be zZ4mR_SH(Pnr$HBwDg0074ih4iO4~1~M>opkEcHb~cotsAu1|`^X&6i7? zsLy_pG%S*``cwH;Q&Wx^kw!ci6@wh5otdG%>>R$ha!6MGQ6sotNS3VPo$FqVZjFNh zjzrvb(ZLNt;C0xG<-}p!T{*2$2sM>WJd@Elu|bkJu&Llo-uM3@fucz>1eJt+jlBhz zQy0ZsY=f7y2Jo2=7@Y~GlCmYhRxY?ucrxG`O8#gRDUywCsCTwT;l+_Z;jN-flwm<2 zZR+pmHDAxUg}c5Q&d1Vm_XU`7fo>!6FUp6!W-a-CWJT~w{I$1>v=5kHCwXTE5iyEv zbLJpJ=_Zy|&mV7R~fL;r!;?_J(U zH1)aYs|McVvyT>U8{nw}sIvyaX-wkEKPGh>0O_Xk@g)^xCw+C{H5&7d^&Y6p{K7-% zM95OrU*HBx%`q{ko%dU)z~KP3xUyo(MFiA*bJ7sHksh*Qr|G&{%$wy5s^+5SY^3yc zzi0-e2jDEb0D39_<&%(;U+rcXUM^V{IQ#jHo}Aco<_KjT^W-)xU2$FrrVP6NZEqSr z^5(f$d?iCeK<^>>K6+BW?3x_)S^Lq zJCQBVfn;R)X3>Gyo<3U7(OjEn=h?2n!*`fxxydvgbA_3$5yG<|hq02Dm#|{(u@YZd zZ&0gUtqB|#-)0WeKZ~wdQwUv?mm5RYo{zE@sanFtYG_6jKA=SL(PrGT$t_hh%~~9 zB_$7Oi2r`GnVj7Y@@@Ndke65ob-HlGi_EpJFx?DZWP&nb`~)LHi)?Mg4M@G{4K9Lp za>|u}l&Uzv>5@d798@W?rO7}$_$}TF%?im14J<8W!%Swj<4IrlQG4BO>O<37t%nE# zG$=ctitN{Gb&&-O3)ae{gB^605Ms(n-5*sh!TRST(&%zVqW=&91E-$uB_X>g=@^CZgjyoHB1?`6}; zi%0ZoC)$uSE7S;OMh(AY-JQ=p(o4Ys2s=|Nd+rJEe0+(8)Hduu8;b&h0qODOA@qIAGomxBVuXX!KjCK0! zc%W;nPn2H?CejTqOc38Hm2a^gEIV2c#XH`2REm|JfC@PfkUR*F76$VXE3}4-5uQ=dK6AYR!_mPBA78;i!!D7Xl z1G~zfz#iF#RP_Xdsej2jP1O{YOLK+5MWW)xOD4O)v%}@W>Jj0>9|1MpG4zxMSKXyT zRi8dAUm5a?gMp#c7ihjw{$Qkv-0|S8?ihX$pW?eHxG-57D^Vw~dDLAHFBTESY@rxV zkEmKxvpCRg;S>D}Y|^YrB9`lIB?GZ@Iucb-C&fmE!`#Ap4{P2ZgJqC2U +l)k7kyRW*|@Ff`roa+p4KH^Fu;+%blL8YoHw;cuf1~Nq?KgCKK))o z-xeSp4N4Ub7X6;%Sf(YFO#hX!+q3*d!bH^;(O$C%Ufq!>e`vA%Y;E1VzD`}JHN2Yp zV(1Jg6K-y}H8eIm94&r3rqppxwXuvuAf4q3r>vA*+@?qEwlSkM)>>};yzBJVY#Ax4 z9BXDa>_@t@M~q|v%+9VmQ{`@!D8DNG1hfQ6X)_i%k%^j2HcOO{oRhcv3sXS9+Q1=d zP-@&^z#9+it%0mTy|XVc{>xmDZ=V1o((8=aGJ(Fc-P9CPy8o2leUa$3Ki_-D)tLDzblBG{?AL=U$HLm(F_-fP zi7y@mZLlBly(e7bSA`TIvww<$U7-FGBtN81g8i4%RG_uC5A)b_#Y3Yg^$`6{=!&5m92Yaa zBSG6)K&0%TZ|>K9AxK)AUK)5_cx{?YMOS5B_t$5G*BNz#cFC*SF~zaqwm9Q1l#6e& zoJ!>@Rl;6W6r!HT$QfM2<)ODNsuSQZ*yE*KIda6NS14JR3V2&iw`EHUd)B4N6Q>?f zJ|+A!yWYO99k@Z)oE(_3+Hm-`iW& zPN)}8^ZCt=$;FocE3PMp!zCUnFqL@yFG4~`SO2&u5Lx99{|V4aGXLG(bT=BL;}Uf-N;`Soi2zuaPLuFTUegXYFdyp%)WL zLtQDFc=z@M0WdEKqr+c;a7_7?smgrgW(C=D9~D1_2#X*NMI@kgsG*GdvBSjjyir0L z>4z3vjU&wd{-0sF9^kR-o*>15Y|}6hbG8{xHjdaYMZML+YmIY^dqIW*FjNycu30Vd zk^@rbYR;88V&nL@w9@7r$a?ykdb!>o)-Zkwi2&gXJm;36g&s^+LTQ8DBWMy2jnxx_ zHQJASgu8!bWEh=*4gUYAdJCW^19okgW23juMLMLr|Ht<|=b!U2;|w~32)OriUH28YT%Uy^Q#VFbhj(CMo(S0TxC-C&g91~OE{5OvYZ1+gG?Grq@2NQ03C?XrUfUsFiWO2^td z^o1e?n-wd>A&qYs&=f>8@i$Zkl}bQQq@!lYtL7&fcAAi;f4GG9S3;!-hz<;7v?{m6_Dru8Bh|aCDu0*q_!99->+aI z!x84jFp;Z`JGW!FJJt@D8Udc2!$)^hH6!g)a^*XL&->FmZIS6Uce)C3jJzy$^+A?V z_55hnQ`Nl@6Z*G#yQ8Bcfb{=!w{$$5{d06?H;p%kuNln?WIkx7S=JwlvPRlljZF03 zw-NPOr3PJboQyjt9-27tB+r{Kk9y{vPfVOXjE>){u1*M!c>TB>nYg^d#9aL0r73Xc z&4o@{ASp(=nF`iA{7gr1>Dn&obyi$<4_px~Z*M@^`Z4(lL25EiVCX8DikS^H=Zl$jWM+>W+O_C`bKgowOM) znv7&Qu}Ugqw|a6}$7=vRjw8{A_6J!3E{-L@%mL(;fJWVZU=Q~n6RN1MCpaHa*DKa^ZIv<~6@s-!lu}W>{A=kjQ;qk*s0jw#ct<NSUhzHIXM zqC6(AZ}{5=lqQ_L<3XD|CeDAO1FbN!8x9Qwcf`Ds&sBKd9EWOq=}(|1S}a^X*!hu; z`F-u-Mpe@P54Hxt;lF7oNy$>v1)BA1InKqA@*U-QmHBzEMT(`jp?#uJ`{VKddI2tU zKhH=5u<%tRvQ+p~Cq@rk`sx`?^bTA2rt#0!gvlAV>6J(*t2M&6f|j8DUzF$uaBsQhg!uxrD~hMKM}RM&nS&-Ev%zda6ef}xg0uIpoip3s75l+* zj;Y*;!oZ*OVG(q6VGG$gnxD|6>q(e}amSeq2E16r6$%He#$07izpoc*^d9P+!D3Cc z*=yf!cRRcN5fQn{p}KCC5ZOFhkF@odt{kxb`}ID*kTl46&gwaVlyx~O&~*)!b@nZS zB5Ujg^&65Xq&4YKX-IFD?tCffA9xOpc!z2|Q3SuNpTjsZZ*U~3i1AFm-yM?LZ-H#= zUoRLI-)J@woZx&FRH>fFMvNzovPHQnWcl#v<6K~WAW1@GDgn0@F)mvYhz0RguLymk zh_mPA&D!brSK_6lQ^tL0_wDuKD3Mi6k@V4i*6&?pWC_9cgVPw~^&Ijc!9PVkf4y5T zDjXr28F{g-Bh+iD_U3}+9$7)1hVN+?qx}(2nL&_Kb|?>zE;<@AzL?>th=a?j`Z5OU z((j>^T;h+@Db%F^S3VwZ9vPIIMC_&qV=)8pX?@r zar>o&{KV36UtfjMTdTz)enYOLA4iQYEM&>i{=$bO#|EtJ;27mew(W)Qs$tsYEoQ=$ zGsn0dMgC5BP!k{<3E1QT?^R46(y;_U_tLqI&o1^f`!{~IdPMjP%54Ly#LAZ^SBLt5 z5do$RI3e!u?s=LBx5p^BxVZsmvwjQ0)v_Jc`}Kg;m$%{s^{#hUX>PV+c$+tw%Xw6Uzc(*c zP0+KN^hneuytI9e8r()t`F^{Pd2NcD(pPvnDW2BF{-9R?)U+02Jh|A=y!TD$yO-XP z8JgX|;_@#FMV)Ue)}imx)##P&yu%MWy|l4%w8a8iF^0_shiog0TL-`2SL1?V4oltr zZx7E6?=9>6#i;Bp4!5&M?%cdA2ckI!eD2dGoLz3~lhJga7X5FVJ_r1l{MJB%zmcaO z%?10}k;_O)&}$kTkPHq4Yck%(3t7XD7#-9Cpps5Q6TI@%I<0IV;g!vm-ICjEe|@7> zpmZzFUg`PF7Dd@R0+Ieu#XlVpR+-SxB-+&0wsp?^`nHd|V?Bo3$koMV{Nr1K*+0=8 zejC@})&Zb)S`92PJYqNM%{SZ<95DUsVy^@shV-&C1gX>t6>^QiZ@SmkedHCKc}$U% zcW^75x6@uar$fwS=4Pfpa>UverxtfYefRMEw_PQfLFp6xT9Yq8PR{D>TQ}`X^Y5&NkWiE$WcA)NR7@p2${WU;Q_n37aabq`rVdE*Z5P$3+1-kEs&?wJXH#q0S-urD z^&342iLUYdmVxJ$w|I`~0s=7~T^l>gQ|oXq1|Yahl4ehH*%Z}Rd1*aj!o+6Yei_^F z_;Z>KL5XoYs@&}&)h(u62~JTuKkYWNS>^yJ3oo;VMUoO43ca((Q6PaPt zVJo5_(kJ&K^eA>uT0649C^TNYqbPg3tdh;0^-bAY%cC?cYG0O;)B>VMmkA@A&A2YY zUyg=!C1Pxa6#RfMX^ys^p`TgRd@{k(j#MP%z+?@~uR?uO4z7WOlD@R*657QU5`ih{ zg231>`ty^@%8iXgn`9v^7d18HZKj|Itwh4dP7PYc;tfN1N!uwO zrG*be5|nhLq`+xH(>fslsVX%&_Ywc0HlJs1si1H)@{72D31Cmp&Q7S?NJ>N`XK_+A zs+cO5erQI4@9<+J(iIB8ni=p7;@&JvcRkP!frWCxmr-ePDjz|*k@>-RlWeAV9Hr?m zal21yVMQ8~C?yb(e;u01>$V!*g1G!RlWu)=CM=~r&LjqeDCt5>o=}`roO(nu{7aLD z&(rS6YDCuhLb@5e8%`9BLg{~h*tN{K=8E5jl3t@6(6Eoj0y#y1EZ<~z-eymk-vIM7 z&+m#%>V9#;dg-cZA(sBaZ(?i=n}meK_wjPx_x`Z4&Hpj0>OfdRV(WBc;9st1W=0dx zY5+jG>>A>b$X+VwfNQztXz|+;>+#zpj^X}2-*QAj*GN3iM>+nLrPXD(lwk`-YEYA>XxBRpjtI45j#$?mV| z1Cy7PuM|hbe@5W{%tBw?-9WMYp+7e(L$PF1s4SBkuI*yV_mdEj2k@u@GB6xdAc(J@ zxX#We*6IV|4WDRE&H}~eMOEMT!cqlZS>q6A+_kh zKGRM-AxYjI{}~FvtdMdxM|4K&izN#f}U0`p>wpkMfLIQ;EXSkNLUJ2bVidNe0-?i#sK=;vHT@8c4Y z4oO0@cl+AWev0Wi=N3YC#M{8u-c`QD4@b=0@D^G_q2}#!c4a+4> z5{0BDAE0@&El1Ktc{Zw+noD!gl`v?|5)$;3hiW4d71ig)9hTbEfD@HV_d^?yHJIUA z#I#dM77;ID5m_a+IkS$+THQR}d!$%qlB@-v0#Rtugh+Xu$wSC9$dR|d(rz|0Ha`<% z6`C=`h8Eh}o3#3moLq&k#HcW^XzSM+NECXlng#g&^lP9oCOf*z{S~)Bjp(d)zQ4$`rv=%W? zaP{&dc=V)Mhe9h@SH9e&G_GUZCF9elY3!Qh!F(jGNESE6kF};C0VXOLBu?e8{fTGG zUK0uzz|Wmx_a3kkH`)tci~C%?;z(x>tzi~_UZ`2LSNP;ZOPrUNcMb?6|H*hToQ-=* zjmB0T{0ndQ#gJ^UBB!UP&$^8PmoB{?Zg_Wgc5-rZVik~qHgSfR>O+EI_7d2~eB>;0 ze_N_>6{lxfG?bU&@beW9`00W1bC&V*-TXfr7w~7uK)^DlFSzPpl{QTMfV;pZ%Oe(r zIHu1U1Ozt+Gzl?jI}%%7L>bOSU47|*GI0Z3w_WF%^-nt~mVKkX*5hm-KIWT_#D}k- zZexdtH3j6bkCKeZ>2-Z}em0HZ^B2wjq~WzRVKv6ubBe#5vcdrwNI7Z@NYt}>XAFxo z){(5J4_)^O=Q}JpQ}R3$0OTWO0!N!dP}!uE;&kyc+Uq6-k$4JhrtBFz6 zT<&@E?RX>?N-Ky_0?hVQD}7E=&gIEq!iXs#AZcO(F(@>jxXJh$$ZnpW%@IBCr;|L) z8Fy~Mefd75=1%A37X2ZNVV8OXQ7HBg51jz@xY{pGn)e<`EE^XdsO(lpy7aj$p5G|- z+4Lb*L?KpEs9AFZdy_F)5v8(;?v6T^>jQg<@ts3?$4Bmup1jAq6B(zPvex3WJBkKB_jeZR zI|C~^L$5SPr=KKj9!cLyMQlx&oR2l6<)JZ{?L33@eBDWn15hTO$EpqSWLIBo%aL8- zBbm13Q(^e2Tes{fZi8404!dv%muEi5$%m&p^UctmLoR2xjHNk}E?|pk!1HTqd!-u}CnInuVzqRN-zm0Yx9FKWB+OyI4bjqvy2+M0){-WX zMwZ(<&D$US8e9o)SS<7#_zp@X_i$S~9JAx9CU7zqZ1dy&;KN(fY@-X8Z~1OgHZV=j zv5CzO0pZi*2Rl(>UZ8zuMFxMP%X0EBWB9~^kCKFdh?YVa%W#Cs&HAxNdY~&XxQV2_ z9HYT6{{V?D2og+okuBFqHJPFgoPU)*Q@bevUwJnWG{o$3P4aU(=(pifSYcd08Efhg zy$LHt+V5>j!B_IZr(%fFo$ZkfYRL2;G9Yv+|nIRK)mwz!OZNhM#nWQCA98^G~ zc)zPq$LIzHy*0|Xtp8U9F+6TCx$OVm6V z`swIC7YLtsJl)y-XKi`AO#!g{-TT}U8!r}PPE3-=ibop50JwA;!;HB=RDFGwF1pbX=7 zNby!-K|@|TbWd?mmP0p2Cd9vWY!rj^rHdj$KR7*o@!fmRJIUhr#_jmI&!l@a4rO#X z5=hm%r+h0f4__@5?I?GDjvLM!HineLs@z7mtSoh8y=o9d6SI{oA z6N=|XV{tP~ICrq!eS;;ayfRaZY#gIH&^{XTQgGuP<3tE}nN&^=2Rh}vH^G1GoH3rh z;mox`yWao8(l@&Vu$09o(=jZJ-zVkRX zu6d~ZTtTVY)V^{$=HYak9dJ$+@K}-lupeXB;b-^p^3{hHx$;+Fb=n_CC_&69xp?8s zrI^K>EzR;r)su%Rm7PVlM!Y_cO4+4IrtbCD&M2?WZ{Rs5)NinG zV$b~Wd1d@75N-{JpQt%+NHH=@w~r|dj-`$4Qr?hW=2RPwh-}7Fg72?CpC6nNncJ6Q zL8 zdCdXxn%djX6Zz;{gWki^h+7>>FZcHPW{#YSp(dBc%wMy{%A8Y={8CFR=yIya?xar*a1yLJK0vIg6iGMr*#Ql zv^xlcK>0nDOXnB;W9uLKf&gcZS}9>3?i!i@c_1WYVAQZ|q_x}`)(}#__i?xTmkHi{ zrdI@18j66&5EwbBqhQP`t6^3O;+4i0Wth%?y^47xZ)`_D7ydd9jbo~e0{cecGah?^ z{)<*7uo)yY5`v{~xxlFE9P9%cOiYwCbu-)bx$`$!AnU=IervsiYPh7KL&Su+{j!gA zrJ}Q0$A_0^%!27w+acel7T6lnIWEhqh**72=W2YIZVUNAaxWKLh*N$gW|#2;O4 zMMAAviS>$tMQdrS;HK*(HU+cFngvtmh-u&hLs(^D-W~(TeCjBDu;mXXyWV$W@rO(7 z;g@Jpu+N+(dfhXqXTD*d-(D1Wn4_W$6-Pzp)5&3=5DRh9rV-P+o6o+2Y448C=5-MT!O z_r2Xp_dV?+0j#7}3=S2ddL7{Y5x3J{#*tH~Vf==J+l=;t6#p8Y0KFZEngVSCh2=#$ z&p;~_uomcd=LU8IZEc&tgf=iVtOanmdB~-=nzGOFBdXQwcWf4StYvNBwijLy(yj;}R;`m-8eyxr&OJ%Pfw87|H7y{A-cTuGM)P2Y8-uzGVmisF1( zv00qnG2@3RUTJEG31@a*RKy05g}^ zA=utMH6EGLh5XZ}ue;5pOn=?Qek>hky#3AA!jgwTyZL=IX-YcDSWe-y>PU*0_-B1f z$)JPQuF)M?kRu>qepxxw>$kS$uL0mZTwU28Uv6*WV{RI6&XwDLmVbZ>6|L3`XGgk~ z=)P&lI>)$4w(jUZAATxxkLBjgeVglpJ=l--vg}of+h$V{ELS~fIri4|ME9Bc{_48d zopoZ=91CUa82{v&O*U8!x0AM(TX>pJ*>_5@j~hJ)g2(P$c%}{>9_ga$#Eboh%|6cC~Q09c6c1W zI<&`Q43A64ofC-yb5P(m+0WFWXDD>?=_uD-@AzqxzP2Kp>8r>pUqv7Y5E31xL(f_o zAWJ-#484xz-DvFU$@={N?^J9}Eg_Ur z*#DiXwE3byqLq1|7GdyAy=KO^>)`oPcyAzXvZc1G=oy@~6JfLSJoV=icq}Slg*bc6 zJD+;Xo~FUwl(kXK2pt_gmY3b7*3=$lmuMrhJ>Ur*Bb}|^Kl(mOQf^U%-Oz5)e7e0K z$&`Up2sI9Sa1(GFp&GSSHF++5@S_0kDwSsiNgK=J2NghDq9-wI7V=wH^NP*3D!zNi z%!yS}PnJU~0^d@RA1gV_f?#1?CJM_RYxZ^p#mdLxOO7XBFD_qO@89n0Z>?)@9vwy> zU4=fmW0d4m>4xD!^2pb~ky9Sya?9h&CNO4$ubag+V0x^4rCOQ#?m!A`Sjew%RM;ht zE}fp)WT@>=tt1ny?~`-T*hL>s=swn=U(xr(895>)jEWr9g=^$T!KrTqEB&OzoU*O? zRTXrimgj z+TE~z{mD*VSMQnXX?kV1e?EB@h1^`Ng<+O36!;FRX(qkroN%}4A9&OM1)#D^RIL?0 zf^b7fjN;`5O1ZRW9fP=Gw~|_(H(LcnwBV+0KknDaS*#NqkhND5^DJUb^ydYZUDq&s~R}S(iDn-3=`G(RU*axdi_D&nrKF8X|1m-9N?OOUZuPO()q^H+41w9l zv!jUU4v*tqr`0wQh$1)fi_zGF13N%JPly>4hMqzK0swKPHpxk1Y53n7})I4WNX?w(JtJIEG<~ z4wK)VP$Qvpw^!PS?N0-{x!Ye4vzyT$_i{Mv($B)$j0QItRqwrG!9Nw(h0bS?-CI}0 zid{d11VMGVMHdd;i&L6fTZ_use*ci*kp_NW)Mh2P?`dm5OIH;8rJC;OuyYrcbVo+S zw9q$=_uxJk?2E0j&%OcMUaXaj?ijv)i91Z1;sw}GG%3^B>%MM=Bd^{0S?8uVm5C2* zXx(8OsVBX+g~9xFW<*2<$XU<`|D%j-^n|Si?3kX#CP?8@SC(>iq7SCiJ$ZH*^MW#J z0}kj@2v8YFxGY`mR&&!SpQ%jm8{hiVoWD0tSWZ8w;nr{aneGFZ^ciOjv}CJeMZ;2B zbRwdo1>D&x>9(hiVIl`duQgCx0Kl_V!IVAEjrnDa&oqutY%0^L=AC`QaK1)@qc{ZwV z==vpy=gJC;GnqacTF@E3=DMV6^oWJ0NdB*+^q)B>Wku>J{2^7!XN|J-bjK+(l#EZ3 zf8ovUVqUKEM8ZL6&~yY%@S?Rzf2`EU9H9JUBj6$A+4{j6{LVU;{%`>HEnf3IpiWVM zgFkp{5cFV#*XnBZ!qP7Is6$0wi82-~k!1@kucB5t(KTKtU2@gcE5;>fa9|Q+$AzQF zhRE=!Vg(cQ;-L;vy!|v&qOL8nDx`kS=vnwJg?-_Fy#OM#;;et&!TdZQgIZPo9N`NJ z1y|WM=p*1I`L;+eE_1}8H7G9h(=4-Yd%mR*reJe!zOxUQz zGxEE;Os8PYNAEn;QAn^BS7-Dmo=>bppAN)TW7z^tIu`#@|8?DJX6-F%Q%7S7PF_Yr z4i07srb7(YW8zBj_H`xi7UDn-g`hYAISTfD^@-A_T`YZ&;qPWup!+#UE*J$3qv(|a zfjI~Y3B2X~3$a!#<^e-r5%#GQK|GM42xHFp_!^84r3lBZ+TGveV3j!_lR)X8JD;3N zUNFcqCiW_>Z-qz;Q%4qa_ ze5nAC2I~Csk1OFM+#Q<_53C8)esW7(3H5#-z<7YE@~2s?v^D_W7M~qHpHdW*+cz$q zYJ~&D_ik09Yu*ju*iK9C;(Gbut`|dca{b4+NfnqFqF>Bljkt8j-oEOrwM&OhO-*Za z6*>k+MlK#61CEVF;P0{v6faV)H8rkM8jPS(6>Q9V>LHl~_wqR?Qk(=oS9$Ui zJ6{y&&tcrKHecU9b zFF#&l1EP&p09(Dz;1+ctmuhHysKShDpGA#z6X(apTx=gS#=?~gowMx&`u0i>y!nkX zt||sitpFYAhu&}Qms!oUJq`A(#&@p1>s?w;bRII9=rb)^e$?rwnfo_Q$MXGqZZ;M9 z$M;hwXv=}`f5S;PtSfmX!hlx- z25*}fkNMTh2u|KFJ$xCYgS)=S?3fmboZ!{GMGbfge^`z+MY9^NqG@VdJnL(01o-ZY zr^}t{n}_U1d-VyW9p>2`8ocrG$Prhl8%|v);iPFG7oAe=3bB6m)EKsXb>dNU{=D3q zw}-XZ=TMR1Qwm)< zI-tL~Z*hgYWT`TRy=M&*U)_lR&G9(+^7X5g+6it=!!0LE{$cP=(x7ma9XGqDRQ!Ak z>Ty)={{?BWd6d7paF`A9kl0Ov?nf%~VXv)5Zy@&(q($vba5B?v&kwAT zX+*P_KV0Fi?FFFCKjVilZBPQSXtcKh(Y{v`1zy8#n}F~M$DsvHB;2{=?A=(@yaYZH8hniZ^;W9~qxF#N#bczlZJBV`J|4%!hj8CsImI`V z>r|3X?j^tD5F^(qQe}4c5i7ham9=W=e&Lm&+F?87bQuah72_A9MIk0dh6WZky?^eV ze2@E6cu9Yd!Yw@c06j<&CV$Q5Y=c{QyTwK07YHxC{Gf9kQNqw<# zAFd@+8Uj;V#UYwZBsEjVJR%e6kBpku9e|4Ey+D$QwaqLgiSqu#&{<*wTTJ>H&LE~r z#Mj58#gY8`TEE&XJX6$0iU2H|3YrV-MG-c2;Q1o-rCtJYT&`xVNALo2-k%32(gESnv)k)9ZD?P;UoV1(pe54ujHmZO)T$C3}Qi?xgq8);QQ3?vd z2q6n0S0Z2yQjkHT-=z7Luhm@&%`c=YCNHh!Hz9uHN9PMz^SOxt9*4(RRtn^3j!xCU za&T@xw!Yrg?4gEc$~@NhYVrNQApN_s@a9hJVGQKP#>PnDMPDFC>EiP9pR{xJmkg-* z+yYMAi+f*00`wNyDdkDEgTl8+af zlJ|f4ByTqfe*f4yJNv}labtGj$h}T|VE*y=-0JmTZZ|@`{T6@blzUTNNQS@UW;Zpd z+nX~1HQA*f9K()JXZ6yfe}sAV9|4 zYujx~5#TE?2a4=~O1=F96uy?K4WPY_zXe!%qK6{^o6R2%Hu}cu=>*K--Rj0HehE|9 z^#5%(uADzOZHOS<&{dzZn^K>$p}ofKk)C(sR}0B>2HF7X-%oe~>K1ErLF8htiE+P| zvJ2bZmIcbmY2J9}r>~%kzwsxTigIa+4Bw;* zMwpUb5(iUsXWTB3tfax!$)4%X|GQs2fk>Ob%dNyae?PybUV7sICB^85ZaXbWR;7$h zd`H;YQs@0mvvE#Ry<*G~d3b*R)r-RF{^o%ZF5;T9Id;0|XT?1w^JQC!?xhj&E28Uz z_+u`|n~~1DwC5cir`s#G$$6UO*OKxRQs$lyysgQa4HEL}WaXSa26zPjz9jc|3Ts@s`7sg1>Y0^%1k#j^IDhAo}i)GX^Vs^@9PTmD?@ zd-oG%{Z|K|-g$%$t~HYt7>`l5DQSk)dmJzW5B{C&p)z%&cnjUr$`We{y~5SU#A;ON z$qezSr@*;~_}a1EpbQE@PygBdMEmT_{+Lw#1J?_J$HQuD1&zm!s=e>eXqW~SWIqxX z6GEPY9}0bMsbg1o6WcmQFe0u0qCHBbNS-bdJtLFWHEX(h3kO~jVEF&6LwI(sQo@P%? z`SAaFky>9_o8B^?oCYAb9ps7L)`59csbY@6d`v&5Hm{Z16>@w! zB^|82PzIWULjr3W0%A^Qy&kf`4eD{CcS7We#6O25Du0C9^TULoD}PIuIMrq}h5D>b z2H51%YD>!K-C{Wi#5)#U-0KUB<`&fA1jIIv@LK#r9jP@BbL)50wcNvd`JlpxNLE1% z>!F8i7RW&eGNkY51x!*2`a!|JsAO00%7b)udRyrDgMWlKg!9rC$R}b#iK1$m-U_@X z)qzX_RCh92>kYCdJtK3#Uj86BpTulVqdM(1gE(2~YcwcdVt8+2iua5rw@&~Kl)oxhN2}m4nILKe zn%28gb$qM+$~@?$_Bcc~x54CtPQ5#4fJ6nO;IWNG!;>8fm929OPU6^f(3M#a1lmyJ zOcs}1B^M!PnS;lE(KqJWJUx|(&nWegTOwAr){54YlfnSi#w4#vnM7b!(9rtCgr;Pz zYOMtZBYuyxk&BSlFjrK@H2D;#apuIIFBe3L)XgSmJ8C;Bs`VMO5VL%{`zrekz?~hF zZl4J<@2Tf{ZQT@R8tC<|WP5=cx_`zZAha&>I?Fi}YQ<%-51g<_TA(!F+}sQWit7IL zsQ*i>|C_dV7p(sk(t%aktBEt-JNTN-N*D^O-qD?wH;83)1vH4!9P%Z8+TC#e!cS`W z->VT|bq0$5g?$gVV0(T_aQB9*B;ErVx0LZt>&VUH&6E?zD}Q8xC%*RF6bdJ@Tz_eA zuIp;QQJ*CgCbwH1W)}9oQ3Ob4?V)#-)A8-qmi|+xv0qPhXUF=PBsh(R;B~I~d-bVg z=8F-cUsWKNVC?TwV)UW2Jyt4Bt+GeQrXJXPpTMiI?>ZbL%;KV&x#DHOlM{H*hL<2F z{gLiRnjd%m`D7%?+0Z%Qo%P=9nyhVkBQ-D|^74milkMD@81vkNcwQW>CUk5gnQ58_ zA-LBC&TWSXe(+NOIii38SD|7gt=KCNx@$S%MI#QQOn)s`^X!!?E(UNtx*rjJ!EXHA zQxfMFR9^kw%AiDPG2$0j^C-(sM*$w)?V%FyAgPayd4H`VUIJU4>K#Mc%-7U}3_`X& z^AxJ9JrI2a{OlO_2;z#p z>1ZJ9B*WeF`5))zf95NVAl9zSKR0JY#DB;%*`&Bk?lbDuRY1rQ_Q&)A5r6SSOp$w3 z@yDOSD9dL2(FW0!~j& z^+1!}evULoH>>DWhAFuZrMN0T9(E0zS2Uz`2z;CL zYdPkAjX?}M9EJXE5^NqO6bhmxr;Elyp2jUMW^kjjoj3POcn9T|23c3uE=adSW|0ey zqSV5)6kzfUL5#tF+ey&rWGH%Vd6*H*wh!l_l_+@3l=uYiu3;jJoV|f^AZ=O0?+mVH zhSP}aI2BmmklxPgY7)GY%|ao{unaE1VNfv%IT%A-(w~We#S)ug*9LLwN2_{c3zQRl=w(Q3Dw5Bq(r5WpAaU%?z(FI6W zB2HW3$V^qp{AOj)Z!X-ZX>n>u($XLN(D0Cs)Od8|N^7)CY0ZdXVJRsEI(M{qMH3-i zM$f>+luXnxYi~AhT;dTw^7?oc+F}#stPZ|QgaKBa{oK<>7P*T&do#U>pXJAlo^C*^hf;F_rlbxfAq zVWoevHHXllPg3)RdN0PGFbIw$*USZev~y571vVj}-dC64MtzGayX&V;T@E0nXkcud z5bE~f_I_0zT3#MqUteFV9u~`3Cj`;uN-xS8s7raUGzwiqd8G50dZp|b2)i=|oywCi zrZfa=yeD(jX8x1fP$n+V&D%el0> zo8eC)t&fag`=I!15x46}@^1kI<@D%NcG3ga>vNyb<^1x(o8S4N0D8Xnjfo@ZR=~}DROYF$_DBuMt;u{O73c0O%dqb^u z*!3^BeFzQzR+Gkfry6aq{61-syXuQS$eqppLbYK)hrElQ(pZ#3w=$sV9p(XXTm_NKPVrp~yJOVV`++O*cFW(b9N z!4^(dP!O0$3rFa|mrwbjvyFDIRj&g$w&${i&4m%%i{RFqMeacF=iR&gL@zj>KqiP? z5?qcLy(kUBgPzpUHSGvE&2+V|y{c&vB9G)v-VX`r-(6jZObn(Q+Iq>y&!`xkxjuly zz~(|ycx}osVD-`$6GMu6Wk|oIgH{}?zRN@q~~H&Fag`D;a@wBIhDPKCrwiT;($3+871;1`&pukO*w^m_tYD~CtVG|bCC#3ERB?_3DoOiKqvS_=2va+L8V3oLULf2OrY7F`KRLo+xiyY zG57lIsoPgMpCw8XbKXA)n{J100lLEf_lb0Lbb3ie082N}FEKMSb7~O;J8DJX{ppwN2dI~}HhK$-H;2Wo zO9F?@mq$8vR-H4V5Cpsen}?LJX`CBYI!mnKC^{=aavgrPe*!ecd797n%(&w3)AMy` zS^Gu17`{d+9Vz~*^J-fSNPNb|s+XPyIyOavu+6LzB0|0U4)mOifXacgDwQSqc#U~y z<6*Z3;e90l5q@9mcd%MKJjecymlO|+T7W> zGnPGX(B|0<ujRivmjL~YI5c3hCmZe#K`aig2~>a5Z@T{y@#jhSIsz9@_%VzEE9YTR z(&%%Sbm_J8e_}C>DDZ)o;fO8Nzpjbqg_0_=!|xqbgc^!r1+*pt*59l-bx{hT{MWtI zAJq5Ez@tOk>p??1q$UBhNGPgj3K=LKdT`Q){5t48!SoS|$|e<8A50FbkhVL~8c2<7 zj~*guU`&IiF*SD7n(>V>3VudCC#gzKWM_E=kYkO)G!-GTbfJF*gsyvLyQJkn_%#T; zZ+oqE`85E3q(Mqr{IzjS0mND`^w^A;As-4%Dj}>7X6>kC9vS?%B}HZoNN)`0@dwR= z^9U2$FZ2b?K;3(LyqM5l{zU#j27Ki_@5#`}b)VJDuBza~B@aA<$1f`{eGv-Hd3dE+ zUbV-7f)6kTDN3TttRB6B1AP|Kr6HRln~$)EO=KAm=nYaN{@5j>oRe(ui0V`fn4JfS zuKOOcN)<~h5qo;o z(1wNYl#q>LChTZYc$^<;gY|lgf11fohs)T8Yt4KKx)j7KqKUUgbGwpcp-a((l$-;` z1vsNt>Y;ncK85=dXTms5y>6v1&iKt#_b6~p^WUTa?j6m|%_#~3j^iztA>d81fg9xu zD;~y~gmez`!?|Apco3+Cq&uSOJofpq#0l8$(2edROobsb+S+c?`D~%(KO6=K zn1LoRYknR^Bch%N4P)BJbpNuJBq;5L`n}dBYp?rr_TSAD4INhV?cNj8jfHg9S}DeB zEhzyG78V7vupDZ0%FcAbm~mN_eowCV%28DZJg)1$Ab(}x{sVAj42#bk@Yg9I%lAXA zx$ihbA2d8L+bF?^bi)lB1)oxLP z-p8`?+3!+LF*i!Mnucm_BAmw4)R;3a1O3dXmX1Wo5%|ihWs)jVC6C;$HKcDp8Uh5f zz(LiK`*3NqsIg=44VhkGxIJoD~zHJ!_w)aE|s z^vx2B1L-f#CCHve|6afA0l%SB=mM*h%H*xNC*K^chfj2R_vmmtHXl>mHOcL3R{F$Ip!mDp-a)VPkCXo3ObL}9^0cD0BDN#9%SI5Db$(rPSf~`sYM~8{E{|R$ z3q9}(sCF-Vd~Wx@qN&Q{bwEPqr(iE78+y219o>xxa%ePfU#eL-^p&Vzs#|ni`1?b| zal{)iDlXO>xNp33A{7nycM?z3;_ad`Q+-WTdUp4tsx0P@v{BD5H#Nh#1~)bR+v}Dp z>-})c))QZCwa6zNW$48<4=qF{a(%ukuadoMC+DENt<~AkyDjqIr~2zq1V#6{nuuE; zycuVWt?4gGlsEwdmhaRDelEq|8*Ghy6#tTPX~);Yqbi}s5SV)D%4Lk{jqxn>V(pTA z?KUAmW%I5cyz#C|i|dnJ14qH8htwD0G`QZ@LDXN^-QQCmFI3h`HRnk{4WU|h@u!Io zIOp>hLs>1l|9@=M|HfD(eEYEHzd{6EL&Ov2(X#-DWXN1z2#W9{>e)kQzo2nX-!qcv z$tU3CGc`Acv6-6=6D|OtF|AhbG_*!utLW25#j;hx0er#Pi z4mmcOO)|>P@ytTuOlAa=?qu3;QRsuToqg`h&R zC?w?M&7r}`?Vq=B=*i$Bs~OEz8pGL9A_ut)%#k;Er$CP7sIsnScgh3{&wc&Xw7!N} zo{$WxA{uKTrWN8`UF~iiX_c5Jy}T4&0j>g;iA+stvGUaK573Sv77WI6d^XxpMG%-1 zW^*w%XmTM6NsGkFuwLyNyEM_=lY%slHXUdSa>)L)=<4MZQ|(gyfFD45O^e$0Qss7r zh(m$t5S@)KblK#CUvor&A{N&67*mYg3pqOFFr~0hN`4>>*6vYt|E@R;Bv?Ty>yiDq zCff_rAd~J^RAFZ+Z)5)C>);kzMN2)Q=%|9}BsnFKytG<^V~_E;=mHe{Ad7yJA$3C@ zX1&4ZiPda3{W{BQ!%h*MUdPu~xub1vPIKS-U)I$mSuPGPEV!V9coz7{tx0ZUBxCX`R=`cLtpaT3_603e&9%I4Cs29 zERwWvy+z$?fx4413u0cl6%@Zir?}hYN6Yl%yRo zew1}iJlKPv*NEL+U2)do*HsVLj!;>QquCZDL+_OfJZ{50AW9onA zNBGBgQ)Tk5v$(C=D#UnJ-SU2@K#RM|TvSHHt63oH1Uzlz0Nn~07t(9yvB=tJvY04bS_3Z?-lD$cpjLZ|(sj|y8gSWac=7pk z#o5X#E*7$Pk=@}HktW~9cDCaZ#4Hqo8FGxYz|4)Q@Kf@L;ST%q{c>HedpjuZw7j_#*fhF|d?S5@ zcIrv)aN^hw?%a9Ui2&}ywT_N$9i1DNu49(3ClGEsL0PZ_n1I?iXO$PIUdY(pyqaz< z?jV7#T#4?%li+x-smKl+&VQ|OuTgxdVb4bk7Sia@TPgTUOfT>JXhbk^-l{sC^`W*X z6`LJY8D1D;ZKHw^UWoI^Cu^oJtI1U9rnjdgI51Z#>@Ha|cu5k8m#kJG(9}?79Vrzz zIzXKgMU4uCK__Ib2BY}%smWBoQ|=RExBM zJUm`xx&@*)s%Ig)Xeu!UVj(+daP){`(`U%;hZH??N)IC~Wzvk83^J<4z*{dv|8>!AZ81OH>%>6T{UCX;+lRX$_f* zte=E^{$+x!?$kBI&ep99XKI3&dbuVGOIjutRi)0aYL4DD9G(h|0q2JDG29d?ETr#KnM(aC zN~TMQR!c#;v17K=#Bi2Ao?4S5^Oh+_K>&Mzwe?$e)uzJ+;%u<{2P%h5!8y4b3srY) zAHvrI7*aMWuM-ptlw8SrD#Thy(U1nBg-z(m!XbSi!*bgE(ek05;R%);xGYh3l7&-w zkIv*zMmlUlM6rFQ3kXv30$=)D4gw&xLd_l`FIPC{ z2zlNqs$C5a=~sLZ=kyRWti{N zKd7QTDkd1!XrbIUV$O0Zo}h5@d=1JCI}vdtNEvQ$o-;8JS!CGR1P57srXey<}2pluEaE)i);nlOCGSV z0IvCB`?rM+pO(&nE6*Wu_R0NHvVpeW?<2BCF+bMYYAIT5 zHVA`>0@1mVcNMYoBO?ESD7u8zOal;a@5J~#O$p2w<;CzzUeiK&PR;{H*C3ZET?CNZ zAmOM%U$z;sA;qm`Fty9_1wT8?W?4ae^&Ic**6+AIm6{0;D}BRLuiKyRyRIhQuX;O= z{S1uh@QPu}GldD(T2YJU54?(^{*(k~{t7wRS>~8>o6HJj8Qyc9{MQM|Er!Rf_VS9M ztH1l0O9NmFG@aHpLWzxad?`G-G)nXi-@-PMCYCp+)$W9g(uDGK8zxW>@>s_De8PXb z?8v9Tw&LRKI#{)Y@%Al8JGf)(;vpusleHG8(`y6hh)d3sLvqNm(O5mnd=&_k{t>oA zdvaf-Dv*1{v_6a6&~&t%e$I24zOGi9)ZN#u(-IL9hK>|Lz~0Ysm-4Ctb_vY}^RsD` zb0aY(VH#34yR_qf)*Ssay0)A59@-4iV<;BCwBlCDg;>&Xz6`7CE>%IH8tMLuar_3*SPJl58_`2>#L8qO}_prdJH&MU)y-P=A zc=HCXD1X7O$S87f6JQNyYrjNmRf@$+XWf$GwTh#&%4l)FI@1SO>EZnZS)DN(`f^k2 z?vcL8WH$5v=W71H|MHmx{7Mb!IG-mqL_xJ%Uo8XUfELX?qMg>DAXSV4IY!;Lgl);}hqh9kwRU|; z(R)({ctqX|B^-pJD0xBbbUviMo;UbbGJSE5C&pZMy6z}wq4@}f=qv*>he+&d^zqeX zL$cR83liD3Hj9|UOv5@R)>lt&65toZscC22H2aO&R^6-{T%=IkLUh7B`Qz|fMccv^ zbtQf?tK;S+-8#i7T|<>bKU#&k2Fdz5Q}m-kLfWv`C4(X02t%iAZ2GuLl|4NX%)-T+ z3bPTv&jD;jf8?6}M^$R~@0o^rzKNFDHuP1c?!Ib&hps zphiLy+?FqsRfVmFJEt=)fhpsbv7 zBtg$T+vurNd_3^d=$Ydwr>;iFZ>ROVVp62^rH|u+JXU;A^?5;jB=AZ@ zd^BY?Y7Mbv35#PjFj78o0>BE*s{dG>vt^x&#`%Hv*=W+NCu)7!t~aE4^WW`z&ft|nF-sd{ z-u77(c$m};UBAN;^mp8TbcmT<(@5{>|b)D+IO|MQL3++F1X)s>k&}j;go8IAFPne-#d~dRFjaR!p&w<|Iq=cfUCvdz! zx|x(mO<==HiOXFQ@QbA~Ts^^ztPECkWQSJ6-qN9kbX&y3blg!{I->|a$;CsYAKJ%X z0JMcyxdKPB>zuWO`>`^eLOa`2C%YQvZTs(%+q6{}#xFI%Z^!@db~BLde@dcLaQ7YQ z-=9G^0m$i&eGlT{Cuj=6+e^cY&+Kh4w9m~iAcsI0Z}=%_4_@qd(PO(J=kWhj>7=h@+k(`6 zl;$;*BHh9l(V#O8NW%j*LE&_wI>Z+t8YP_SoE?f4;OOsR?1}Kr>`x%F_OX6AYO5vm zrK}Atvlea?Nwtq1GaDAc@3TH=6C~pm^M(5Gpz3{HNHiD0BWo8oC;pz?S%bJ9P>qKt z5w6brgsQp7ee=c0Y-h89g+(#$kn>A%x2dFBE7aZ*Em_VD@{O!f6by&`Ba?h_l%5u1 zODtGi!BYMXzr@_T(6BiC(?>O?lT3oRsfuqV4!FJn6c#n(j9x*7ktH_QO?+QeZ zW-=s?a1HD3pM1H_+u(Sv3yjTsi4>j)cw)TIc-~%{?0#5k{kwa5$MO8Md%w=w9qZ{3 zkV^GTYBb9g?~DyVi0)^>smQRQNRCv&CAF+X@G76z3;ivg^DdCZcjw&)*1$4D?uo-i z-5c*8%CwL)Z0=g$JwEr%05`JCKgUh_-eRpXqnQ@NP6os(pbEyu$OI6sOR-AnG3)H8 z|E6Xl?!CnWt_s3F0pQ{0Q9gF<1Cua1+qeS62Eaeu6aMwrdf_Ne7})R3JiW~i)5%Yq zp55!gBBu59z)ySUe&o{dmCZa)&-u?;m7Fe0ZZK6M)z*7(Q4lq(6CtF%aVcJ3fBACt zaUOiP9uYexe8QL3q3Ls+JX2I6--N)d;3tZJ{^<z}^jDqogSVv$o}@vMKBdqrn=k#*gd*02E1Dt7j-iTKjC>hju zt&N-TMDDM{5WjRpOes{sq$zDuBY@r+QBleXQ}Q!he?>o@O_mR@aLvb|9RhWnFEY&R z4%brGxwh{kgx33mz9S_|vPe5WY`XbdH1M>HB@e3WvSW;WnzSXwj+tRm{3OL+mc|Et zMB(dsM(Tdz7avo7Y0iW023&uwf^;W}KSN$n#m^Z7a01t#kle4Aye~PPzufTGwShgo zE@~WbCj_uj3LA8{?O{=aF4sX`!cboXsDKNgqXSZE>I{Ew0$gf9uo7V+Ab_#O@=6h` zYI?;S{3y{!liRN`^M3OxTj7_05X1HEJ^lN#ONp<=D~Z?$8htmv?~NBMcp7X~{NEFxrnO~m`EwoqKU<(<-y>)YK{>$|Q2gIo!SZ1PY53Ph4VAklVs>L_kb%h-W-qeymYed}|{}F%I#Q z>j@viX`oZT_Ug~gQ0>nrEq#0Mr{1N$tF_bDqQ(fLERg<5tu<9S{OX7MU(zF1KCJq`~QD}>lhD3`YBP5g03i6+y_Vll2OZvPIymN;IK>;%v7fNNooPGVCl6`(d zpxzLOCE4_9Hl2Ie*$A2fY0>GhnOcNyoAwqOP2Cy@;54P72vHF4A@+TuSuY4<8j759 z3lp(_c}3~tAf9k8x;P3q^Q`=k(BKFgSzh8wv(b`z4adkL64>n9f%Gs%XA&|icDOaV zb@#4p>q=%*leDh)u<$?I!~az?{W3WJJ4g4!h}SoXjoaeZ+{3a0g97)vkNdHqxsh<4JA3T40R{|4)VGFff>KKs)h0fwFS%mz{tIl^Q2V?7`2Zsfd> zuEH>kAES6y*ZI3+j>LoJ7zG4K{&Sn`NXpc((|07=e$$TUuN z+Ek52-8$k!zpS;EB(A(u>R$wUt_N$2@82irrDDQ*n=lZvDo~Kake0YUB$M^a>-GQ5|0zxq=r>%h8Z!JT9zawK!Kg=Qr+?N6YrucOZYFfbN z>!idKy>y&YACWo#U_Bv=4!hQvq$vKQA*1ErSq0TsG0;t!S2mCjRunN*_vM3cObC!9 z*y84JsP-%aFX-j!)`$9hnz}_Q&6dWyS&r({kAH(gfOx5&77QH(|M05Dm#43J?n>q(XTyMc{3O zwQLOCATCP?lcE3&sjIvfvKgN3e$tc1k>^a#aDoGf;l zM9!?*7`cX~7hcHJJqFid=$0zGqto&-90*r}b&EkJpEtGJ^q%>0VXNXJs!xA$&gk;= z`+nwING8KoAv`v$Jgy2*hl-103|4uxV_O0AFL^sTkXMyBinogBs@y)Mu1ZajIwNi(hB4{Xl)rs3}rP67}(zwmPe+w9@d6yHK z@F2)AWz}BxW1dXr(IzW%XdJ9EI}Y7%w-GgeVpiGCbU5QYe#+UmFeK$aWJ}cG@Ww6# znfvd=5Pw_K%!87c5q|6TZA$Ap#c?h*0XpQc;G#>3q47t0kR>2?@$~eJd^GK_pw#X9 z??`&m9FnR`RLsHTS19|xIW?Ldv5hOPa+(jtF^mx%6)YBl-qhU zNFJq8Wx$;&L!ltb8p=6a;ySISYs7S0`_je9{M-8(ZNu{m7jq+Rt^$=?l6fBGc}?97 zHawqH$`Unjei5y(v?n%szb7G;{D5473PQ}RhX07QXurJu6q z+@+3ekF9TAXCp|y1v(7#fULkk+MccY%WD@0H{Zvy0C{))T;{Sc`T$>Cg=zI962y)F^2#}`csx;!f{sH+Zfx0=ZL=UkKvOW&D+i&t< zn~rF#BZ-&&D0s870$LR7llAvPjJllV)f0k;_%727#TKuKM|Q@v^%rv7$0xzY_fON$ zq`^DCxmx!%_U@GLal1~!|L?KLtQ+3gr^e1 zy-G;$Qg`U7^SNo&KO%jSF3u*&NEl)w?kzTE$+Q+=a_ftt24cAGPNMT2S&&LZh(_Z% zPt1loVwy?8azmT;8TCf53`jo9A{`PQ(r51C+k79ezZ)d{6@q#yiOKIiK;EtgJ!g|2 z;tvUH3WHn#Snd%;PqElR=XGa+T>E?mTHhYv@x6;)5CaQaM+@ zq#%iIa}bd{gL&d_vvj}KmV;mgVPbcznHh#!x`D6P`SEOd1E?kAQ>VDvwoR-oL#UX) zcM$g|J1Pw;G-c4`NNVUn1%*+pvINr7L23+bp{$i^Ka0F#*>NX02{n7j(X_CXkU)|Y z&Nx!t#Ah>@i*PjB;yC13U zmN~j7GfsRa0@ptTyyQJQi(UTFn0Q4cA90A&2EtGs4e6SlmFe2%tkO3?!h=2SyXK0& z{6z>ki93B8VY0rn0|>AU&;He1z$FKOLM%O~MOAZqL0itx69Oj*!_(39*Wa!) zzc3xrk0C%>bnFCdl->E#d>@p*kG`wv#0=D1Rd#LAp)eousPxglrUnTQIm>(k1dx$G zN^*Z-&iX-|&AZpAW|Va8;^&Z-`%R1+7bs5Vz>F?)!Htdo%-jY5_Byt=`R6h4e}RNn z6h*C@zi1a2A^*b#cqc%Odj936K;9W+4V&>ip_?u|Vx;qsVAfNMwhuH()$jE*dG*Gm z3y-4UPx7(DLW?EnF^M{~#Q z0A&{3(X}~vOvvE~j4SPcQCmdSlHFV2NqeV-_zCGy@J~!kgZNJfu^TxO+Vx^XCG%(1N{7&Sk>emNpB=P)0w)ZFq2V64G_Pbw7YP|+Ia@{l_?jR)-?-u)vtrAKC+E>#ky7$)%C3d~tGoSA#~F8O z@?Qu(bsag21Ipj+;{#_O8Flai-N|-M%QGeUd(Ff zGvO@ybVfGU3o^(X>moE{<}W%R>bL@2C70|X{~2e@GCBAKPQ3+eQkvxSVrG76`G@^X z({}(hVFf2>BFOK%MLqTrX25A6xEBjl$}y+aq8c!|<^0$aRzhP}{jk8(C2p*3v=urT zm@fW;!Erx27<8St^8^*|>%K7C^*d6GeD&DLZuI!lHGJ(b4*8SleY3}#=7_VL1FPS? zxF7`j+}%m>A(wWUcM3f`UA_uZ&wzs?pv(iDxq=YR?^68&m5jfcD(MJfK~Hd6!W3`^ zQmS(iADC&tsZNHRK(hI}L@M!}krYNi{e8JLl7b_MQLR-K+9>^X_fw(@oNffY7f`HK z#&#*gE=9x`4$Sq}sjF;#aavtsP&G^gCphz*xf4xD)UbZ|M`Yozr`+YbJy*kvY|L72}3P~aNU!Jc`pQ({B|VMWIM zzU5t;C4}R)Z)vo7VpgALh7!zzAG^ed-~DI*)^&|5DCQY_)9iJ`xGlWlo|zlC(nyi5 z->}}i@3rMY#-F@VHs9<&+kz2!S=Zl$iH!*zw(Q)vIEcAj4a}?aKNkJxJM?KCe*Gba7^s=xw)M~qz@_`(YsM|VzQgI?BM;2}9*5;dkmnIZGy%G4t-a^-#q8ri zrtYrJDOZ3x0rPqeO*m6kh+VuQb_`_ewLve$-@f@8u&^&Xp*_LG+9!YvTFg3dx}FDYN3! zgtY7_&JW_;i?Qi$JrXmg;)w}32nr`V^q8zzx9vey;Xopi)*(`?`=lw}8p7e=cSx0d zc3s}t(*kgt51hCxn-mE2n@EvQ)UEl$mNDMmz6HZQ)|D-c2Q3P0RlneTZda@; zD={*cd--ZLvPk*Dedd45xBqptcMit?U?6BwSm)Y+V*pmmX^tTrMT=3t(X&0KsmHF< za}gJ7pl<&IO1`lCD4F=71YmW zZ&Bb@m9XkleWm96TX<+X$+p2q;&jSe#B_GO8nMFAnp=_Lbp*#p8#kLly>Gv;oH+>^ zGC|{L$wl@dmFo(2{4y`tGjGCi1F2#IqjeH+waF*m7%j9*Fx`)d@}wxWI@7Y~l{Y8Ipl*r4n6 z{J1hLnB|Mj`(~`Qa4dY|OM*OzVnRV!htv$OssCPvII_|>M2o42hXu2!xrY0X(iate zGKwJ5HSNDH=i6)R+Rr?K39EfeZHz!ElSFuLhrB85_ zqP{7+RkLng{+v8FkDz8`lZT@tE`Yf-wtYdr6?aN+TZ2*-3@`*EOeAEmw=R~yg0m41 znH3ps6^*~+>y<(jvqzv{LnXna$COujGB$J2U&pk4K-1=Rv4!@}6XX7J7pR`D|LCMd z3c6+32;Uq=Gwv-PbZeO_MEi$)#f>c^FQ`^mQrKrl+Gg)gB!$+FJ%EplOEEvU$=(;U zP$5-!Sf>u0caH?i6n1knT$QP4@W&2rN<92~Vc{b$48YSbe*3s74A=ajqY>X$(&av_ z2-x_GT&mE{V-(d?%H1;^+$Xx2b>M6fx?E~8Z1+umNR-`*7JBO+I1K!*)LXfK?4J2B zW=ttnFMnN??)Oi()6IU5IPK73HuVt$Azrll;v$#%O!8`+$WW?|39`TZ7rWr=UqYr1 zIg8Khp0$D<(nJZI)CCR&3Z%bw-V`XLkj@#sxMskh=*MvA1H7&QE>TrR2H3}Yd~T3^ zDSl%GpPALvLJ7_Wiug7PEf<3&ejR}`2nRc~|5av4S&=geTM)yE?=Ou0DOXP%Vq)L7 zZ{=%iS$0)(YF%;7su3PxJ+J`CwFhMT`d!`Iw_f)L3>>va=bI0N92^|~yq7LcBC24b zx9*eH?}qS;)l1YL|KPCA$fyyd__gp5*EkoWZraQN*D*6~ zBR{?4#hj1-#VCp|%C{*XF|VmD*Fw1G^WU2~P-_yq0!7ZMw1iA6YL+&RKktT<7Opl9gO|5cUV#P|1abp^% z_c9P~_09|+f--4|G98*t!C0T>Ci{c~vAtjF23XJnvMny)zU4$d-8ymc^u@}Zy=Y;dV z`XOh!EnCY;BB7f%GnSt;nmF8I6DmQ57@J6>NEJ%%MQ#xuM2JWA<-zE0DaK~$hkrz) z6_N?YgpzEc6GiAN2x^>grYoh4oD+40P$&^^xuIM#*^$_PHk~u(wYMiYXL3UQJ-kFq z{XJ{IFv#YoGG8Rn1CiiMtFSBK$r_;UBPM5Bw2`?{%1PLCC>e0Vd^x>lX0U=Ahcw?^ zv=A_v3+#=H&bT|gc<$&wvxGT zEPe?#BO}gzG;ezroXDkzCuaY$Qz9mIAr=0JKGviz!RIy|U3ksQw*6sIO9TqJYN1bI z4%!RHIonek(j{6>DO;fRF#AYul5#$~RNi*uyA(B({^11u*1mDL+0p9vzZy7>!7?AD)6@cqPg zKZV|jt$Z~$R00?~pb2px8V87H%oXz=1)OsP#Kohp*!>qnz zL#lC;%5zIAsV>ft$XFqwxLJu~6{5QNtl9F5@p7W6wBxYjMDp8(my+ouuR2x5>-BXJ zC%vHx%2FGZF|LFLpT2pz;j)0zr6!TG&!t)1=JztD%@H$=JODDWfHaW2r zOEMD!dEQnFaK92%=+Elo&HsYM(sm!jEb)U#mg zT^Hd#(UwTuo#R6QCa&p~y5H*J9~&Zv@p2?SQ9H}5;}VN9Rm(}*;q2~qW4#*1LF zGOQxYS>JE(c`2Xz``Jc6=LBD74#J|z`laD>$uY#IwSr2l#>lg4rZ&5W7v;GcC5h0v&-U-n( zhAp3%KzL2(!ehddi8%3MQW28Z;-kve$?jt#K5ENGFKqTtkv9H>3+<60>CT$w$Bc-u z`Q;KzRLmGRonwNOq}Cd{LB?nF@oeUTNLmEd<_$c4#PWLCw(y%#d;Box{&GzM=>arI zG%D*!6gk(VKD7eUxECmW)Eb@=c`Wj@0~NO^4Cf_5PdGavC+x%TFoa++DM)~l8v;c1 zpj-_wFZKcFR-rT{)zp3lshYo%@>DNko5Ac;mypZPpV=Tg{`eag6zy9e2NKRqwH%%t zce34LAmX{j`w~((J(SXh6IA#{hp!JorxxFGGpw=@EmdFAeT(84 z6Lqczk31n3l}MiN>f;r2T(RRhZ74;1JY7a-o7#6Xj?c*Ax8cbLudMu@UQ_LQKD*}G z-r``MKD{}8=?=`A?9#cE7QgPi|Apfp6YYEGNwIV++FuWSeokmzd)CaF1{Q|j&W}TU zabtm{==1YGZXtkIEI71g4pv_Q(EY^+{8rW3U})om`<7kfzHo`?2IB^g%JT&Ht9=d+ zlri`bkf;A!7y|zH3%b>s$yKylV$iJvRE@}2sO{a!N8S!T&u-#r;%Im6l1VwG#SXVl zQvwWkR(~s$E9JjF9`A$Q2_Z_g}+gU z(3qlMkn36`P572nY;yaazJrlDkR&1$tRZSnhUO{Wq%lVgEvt2Pyh-{UsOMNQCX!*o zy+S`%bx@F9k*FM~hf*nIBr4k@jcK_yL0V@S- z_!n#R3Q%en3_tw3`hOOmxIo5+r?7 zHsd-j%O9`JtLh0!(Bnk!@Hr)dAU%{|h{0b6Uoo#wPAlg_SqPJBybsM&qUs(+mz&!l zqkvn{uB%n;yUnq>vxA+l<2Sj-0T2W45iJfF>?9{yj3V>LNDzrf5*o%CtuBCnqQjA_c<^wocSc} z0Ksep`Up3KS9xiiuRve}V`C3j7eWuOWf2o{Cu#V(j)_?!#bxV!BUFgW9inT|y=E5L zTb`;Ha%O}M(4ry*jpIy>btCHw5DxucEBiP2mkC z?H=W9;sue)kH|qB=Y=UU7=gm!8O{ZEyRm;N{8QlH_UJ{&DN;x%E8f)?rPXtE#edSW z)!ZoDzXFrHM=A+P$I+{TdNnS<SwNc zrdh=Eu?kBaEb0AKdE^?+u?vjy4fUS17%qGjq~`I2H-kyEnIT1L#8Wf8Rl~K^ol>Oe zCNhNja+L{jKZ;I;n-0$m&*u~jbOcm*1yMkxbDZvb5;4^N;SOOhc|ZAMo4>SATy}BU zPBi~t4+eFb63|h)!fWsk#J)H8NGuyE~;bfSc-VX8$LA?p6AdyCx^7zKOO3tI^#4ccz_5 zGG9_$u?i6l1_5G`5Nn%bYa$dfTfP57Nto{!DgN?Ad|#vr z9)^{PjYh8B@}Byc1-`4wgID6xs$y7!owN!Hwhx`3qctx0i7$p}lb%Rw0Ws!)juZ&z z1q4{;KMl<+e_1wuu~tD?i|xS@-h8-Q<$m)?+`o%C}28L+9oKzuV~*9c4ThiF&Ds|m-l)2gSSK(7Sr+8Lyx?fzt@8}+60W^% zdxpBKcuok{Jnhg=xvgarFE%use9q?jekoa1W-CDNS2uO>#YH8P)1)s}*xw?~ zwAg6YigUIOTUzN|BK9|?DBC_SUlaZ<(y~jHYV?XRSDZM5c)7zp>vQXAy?zZBVg-~c z+2fXi^_>nEMZZD5z;{U(3Wx|daj;yb2LrO0DLq`+SBwne_el(sqPG@*vapQqhqM(? z!u!5oj=7#>uWa888W`%W?r_%)GnkRAqPc+LZ= zqlN%X@MrA($(lIyPt6keEa&v1X7(_fg38s6gcV<3ynW(2#V(s756H6{p5DMixkMnk z6l@ghax@);TMz_H#%U_Tq=Tbm)^Hd$yL^otAAW}Th>BoBJsVNro&B5EK142oG>Pd;TVFetw2+-yvaWuiq&D=NkUc13R>o*`6@8K#*~U)AOgcC*-Y0SYu+4 zv~J(MQ3!EyASU(;GR1hv7EX{vARYW^&<`ny`Jhwd}hk1*E5Ig>ujK|<406*%2Jeorb+!t7_Vp=K~8>fC9Xwh+PFMv#5~>w6yJDWa-zPAHK+vJ7Gd z=h;y8U5Ty0!KsXae&4m(8KxmFI*V|Ku{94Jx4jiXYI?fuKzJpZb$9Jalo9yde}cS@`~yq$4X;(`0C0qM#1> zhMg9B!^OrUQrMrWlbPicjqq?!)joXk+;CH?z#cdt$D5RaK3ujbajQ&Y)|QX&y$`8Q z`dJ}nsYd}YZurYERaOc35WJY9$ZJi&976dv`NbiO2w0Jc^l}_Y47D7u(e7>60(#nD zKC-%8%Yo68uo0{KK5fIzCC@+AdjcH+DU7+^JdS}ko}Ke#BB4JAGP>_RbY09X zRUerAe74N>+GJjJfu{}c416y0+tCOdv2vigJc0y<8u=ZXiMZbBm76750*lGROBPoz zuYu`}kGP(1o@9)PWG(>Pm%%|KAVwJAsjjQK= zqsYJ667}y~?eFh^0OyDAdfL z*$nr=OGgv}(fbzPwsPU0sf=s$`E!$pjd!!lXYkG|&NtV)*#PH3eKhUq`_u8V&P~hO z#S-JzY056-VG+`QAXaNPRyQ(rK{k&?c_jW}!ap+DL&bdF5L#~}H!F_;^@+2ffcmDL z*Ec<$FWoo1mZ%CG>pt{I;HVj2iovUibIyexw&Xbatl-ofHGuzfj5ioVt8N#lj&oV@ zC&7Os4aBtbR3}b!AA64duh?GK4u$&*LhglXz&Fdi(z&KXgM$pMr^`Dx$HLWo5$|Kp zaTI9=qNW@yB|TIVV(CuQ5ZOcQ(ax~nqN0CwB93ByX_XApkK492W+LypX?DCEJ@%!@ zWt)R66PU>GE)&?ru)Ce_l7`xY~HiTd>JP^Q>kMgMl6laz3do(nd`j|xE( zK!#^#hADp=H?<6KV$(T`3M$VpELPcjlo7{){Ovk)S~He5AS@pcNUowI`@k^aG5DU< z&>htQo?)Y;B`=gZ3d11`8|CciFRqzJI+jxOhR|%3Sx_VfGHs?joI}Y;WUg!dpD8-MB(B_pn{;f?3WF36qzvNcW{9YVbvOSPK=zRK_cb<4_j{; z)OPr8ixzi>;_mM5?i4HT?(XjH?hd8J-CbK8iU$G&D-?IX>3{Ei?>YCJnM@{=Px zz3cZZd5D-$bLNgbzEdtYRb{BRVkjigfk&XT52#^h^1dAn? zWqVteJC&`^^3wWxJN(-W%IeGsC-jJ-qbKLfxVyaF#-3;<>Zkt)ivK@wUd5#O7K(uN zE@09b4SXIjx#93h!6!lZL&i)-9O*9RjR@!bsZbKU;QP~ztqC#$TnNaT@^4a@BU!Pe z$jA*ph48eK%=QM>=o=}dWvu`A13m|%&;W=7ieFDbo(96n#sLVr1IRZM<+J~e(WT@d zv0Lb}0-tE~l%i>yR+Knh;V886^5I6i>6%B_vE%abK8BFoLaO(7a%2+~Wew1;EFaR% z8k%adC_=245s)@;(!)8d$~6?su2QK41H?bRNkeC{DRl%&%eMu1myb+R#o_+O?nrf- zX8Yd4Fr@;Ci%{OyT4jgH3qdsDLgYOHU_+Ea5RMJ=(!t@Xi2aQ)k&ChN6v3Y(YqapQ zObKHGK01uaNV}RYBu!~SJ>~D3i8$sYBu$Kc>vw*m5^VEHRDQ}LavTK)3;Jq;GMO}Z zVHqz<|J&0}SiDG^sV0o)yV&XH?f8I)mg4>5kCrc&k@~qW`P^O)mi<8;pxc>?!|dQY z%GdVk$K%bnSRhTm#eZgKH1)-1UNar(?dF||4VL^nyt#iNfR z=Zd4?>TR0)c9FBE^HZyv9-zsA8CX^Pc5i3-_lV!=Ld)3O8ZW9YNH2(ayA$>0t!mlb z*cjvkx-pjsW@N1FB$-HJ1Gg&_+(13G^|P}C8;p0Nwa@=@^2`6ngAqh+g?t)_fx#1V z@vy0nMdmNW+esZBkfR;O4&GM){a!zZ)|ZrGG+z<}<|T}cKR)eZvyt&rINs*~(0L!& z7y?zmKZ$&duvCO;js>mO7j)4r$matwm4rPMk6wdFCFLh|n3K)$lgL!XXFFWqbPYa# zNVkr+*X;q71It5gJ~G`3;SbhyDbcuhP_|!p|Hz?zKvO8boI+C}zleLqYR;v2Z#@xN zMMbrO5{bAH*{77W`8$bK{==rdicE-y1i8{vF^#f(anbO8?d}Q>1`AFmT!%?jGY-Vm zh%!DBbLvB?a0~W(lS7lZjHIoS=uw@pNGLNBL`%9gf&lYIb|G9BxH)*kgyNV_1mb26 z;{fy8F(e!D+CD~l2F!)@JxB@?N4-K}moi7{YZqii7#FiW*zo7=CgKjx*@QdZ7)sh)mcSWy;C8oOJruj@|rauwMlX4u9OUrUrf+vbi% zVnda5RG7do4)YSag}T#w@QQNOb8T;yidBpL{MCZDoBOk_-ShtihzV$_{{CafY=Fq@ zcXS@_`#GVQbG)SOID=qO1AkHQJCQI_%LKgJ#|WJL0IkPo704FC^53EoA}HT3z*HH* z`3bx~#JBSSzw^QRaqG*-)|Z<=ZH-2Vthtdw(NNkT;_mb}y#K(k@Gfd4d`+fX7gv^| zX0}h}X?I*&ThgV*m1W$s~sws>e;n8c@y~W86d&^_L(}8v2Tj$9U}6 zyPAD3xqwhlM1E0gjzx9!&nv@%I@5LM;%u9esqApLp0UKeEAjWhj!NDJnLl|n8CYk4 z1YlS=f)Qd)C=TERdAhl#U9QeTnULIrWCEEHe~w`M)uEG~AvpIQMaMaXp;<9wFKtn0?gWb@=_`_N@2vMi;pdS(34QM_*)^bFJg^hfYD+4z;f`y>7%2l(>HdR_CS;w{YBe~oF^zWC_0 z#RHE>2X0b8JT1HS$dwkea~JNe zL9^*!gvWmV(0(j$hw1<^z1+EUA?xy?m(YAC1&^c52mpIqETEi z*>J@FwW=UJ>NC!M^LVjr11TqECEKO+ZE!X$muCvr8!xq;I7NnO&35*J4XWUD+^HuY zZd-46y*W?5VCJtu_$b;numjgt*<`7wN!;t%Ejx;5N4YrZ{`v zpyQn6qEANj3CTny=K)h9jy=3gd>l8inKuj#F&tDm;uz%;;+8OpnGIPxghEbvD<>yg zNK3j}lx8K@uvSKfL_3Gcp`l~(Y6AoQh_&IpW;%|h^%x)~5J?hHbk$ErU!IoQomXWe zzQF)9GYlU3QLkDBaJ;q4eD!!V(an_G&#hNuz2$`<9e)MY+CCP4--|{ihD{< zFRYY=Z8+Ikv@Fc15NtV^3KbuUh$V@${B`6z7Y@8>T_=vCekqymLS^IAK?(-?$*{!YLD@?s732OV`Ao>v5ZRWzocUr-D8 zSfxZX_U1Ye-pGkNIFt*n@M6M*Yxu>+!7221zWBTtU;IR_W_Ur+hKlLhC31l{VSw7U+MIAj#vPe9Kjesg z9NN8S&`bxO3iF=qXx+FZ2g2Q@Fd!$oCZmSutQN-+h$DAY7xSCaId*?n#Z9}A^K(i< zOF}^nH^&!JJVtDo>Y!DsTfq=Q$Qm5R#Ak$uZ`uo+U{4#B;FJ%uXn;iMC1^lWor(Dk zEB>1-luBA*I&>8|IcXW)y+hhgB3NbFG<L_6JA(r(^eSU&pDou` z(ngpnPWvk$fl6V~@L1iTA5~i%fTG>}jkx|2A6Wm-yl5|u~1ab0AX(4TzH^@5l-%0ns=FVNe0DEV9 zKEek^;DWB0dzFhoJ*!{jo*_Fm&I9vtKhD5q2Tu*C`1`iom!-gif!Di-t0G>}*>ql1 zeq9fbpy|YifEe$-Km7f-fOtUP+Ro|G(=F}5FMr@AtjM+fm+Qc7hTxMmrqzHW&fuXb z?>QaFtM!0iguq*>WYX>3C(#e8k2le`;3tV7-ix=jhHL+UJFO&Yc#;OoPC;)cpcQz( zlZVG}oBg*|M(^Qdv-1kN3c=De4h62l=T)A3)4Z;wRyEVVW|>;_z}dk=4d8 zjoWBHx~bp6B?^FWrX?*Ro2xt`^KYRv+KesBW7F-|9XlizS29IT&}THKI#p5a1FSbe zLNV8bLjicO;N~r_s%J21t$5WcX5aNssLI!xbImm`K(VtT?|A2QbeqPev!~OGa@FVH z+9R5ek&{)A`NhM64o4jY2#NPB>m{I1WJ1;4%68pZet@zy=-vxh_X?hOxBC%AnbvHs zeA6r5YG8~gmt1m;hw2dlu&q3e+=~2T(clM*(YMcgUPLdKmoJ=~$%}9ob3Oj|k56rz zB0h@Vy+M?)(54Ro0Af;8eUHD7T1k`Nf9k|>BG38>yL1`22Q}?qyUnoiofCVW1eMT^ zpW=uQ!LX0TM@fL;(K=Lur~`VNd>Z{TeC3K+xJ$@97kUjw=X6Pac?B`(uG)l#9^%5CRzT@9FAFD`W^##A=eGF*pn;9buW% zs)aCaBy5b;0`&BT*~bEMKOsg=dt_#z+P8P|6D%YJE8R;Uq@0B%0<&inc&XH#E@Hy$ zDA+0G1JS%ttdVhenytp-EOnb$@)@*gvN1sl^rhPi3#oEB4AQY`6d29nHl}pt53E>h zV;X_kk}hA(AQ8}A&Ds|5?gxiz(cj5E0gyne*t+27{I)b+%P zi(9oxQ)28BLM`+#ZQPRUE?xcm?d@Ep?$?*5C^gB=+J=mF4h31Kj&uy~{c*v5EZs?A-8m4A zIOsU}1<5OpoASA7E7=*}0mTv@6)s;N;@6=^kLG8)cCVIE z-)!kek~k9F%e9DZ+p!mKrl5vNCLBvQx6{#XRB7C9L{(t52_xtKv4E;oh$RJq(Tmj! z)xAOUeMK9Q#@Bvc!!-q2&&;Y_GleA-c{J@>P`yHas8DEtHp{U^L*OX$hnVX;nvAjYq3td}h#4&iWdO^5n&KM!wy5W( zOipgGWP3RhO?JIFBNo+Le8@8kP%$E3jsXba?%XX=uk|}|IZ}FXO{lEI)U_u%!f;aD z?E;Vn=o%Gtd9g|@gDID#BP%Mpgub6zAi5PR^g%2;u-%Y7VsB|dcsbPHE)uvHE1o{l zhil6w`1Qq2&aP8nh-X%W#f>6|wp|Q}r278pYPbgA_D^*jIUjTYKV-JAZrNKdu(ckt zT}eL$bei}!Ts}4QWumUNU7!f9Aoa-K3qIR_aeMQby+ZC}YoulkKBYGD zdm_me`pK@Hf7=#`a=Pr;|9S-6JztN5Bom$vO5)uDG(OK-KWZ00LVrB0qWYdX2j(Qd zpZNP;5GEfR_26`e%~(mMc!#_AySXU==Zuct9@;`>g!;jRd~Yvo+0HW(S(rwfSHf+R zloYOg4(EXkhdbtPQ40vjJRsvn5}grt*$+mb3wOY zBxSUhHwS~_tkv?_j%!7F{bpZx0-J-biSw*I_kh9QCJsDoct}oRwsS z$%U|aMhda`r(k*STj-(K;1|{T+m`SnO7#hzATqWF(>{ecz@gb$@uUqT z9gM;~JJrVGu|Z-=MDw_pdc@3<6Q?{wEe3=#`TZ)}K}s=7#a|ox1ee5$$f42trIBy& zvwKdR*A5bF&KEac*ni&>N&#J(gV^5hQ_wL_Im1H0G;BGwp6{wnnxPzgz zLV7crC%Vs-E+xM*@$yN5B@^7%JU?-sKDD)Rp>6F@Chp_Y+T{J?4 zS%)Z;>>9<{*>hJfpJ@H)2_jNrcMp$NPlC=Mx5D&vtfb^Gd;)xXHm;LHdnsU&Z6;#9 zJ(r^$JzsxnW|pM2TJ3Rrve1*&vsyD|yE>C;pLK7fm=LLcvtfiz(j>IEQp`PTWVHRu z=?~1+aVc6jyboO$NsOrS>qf@^gwZ0As?NoUw_jbJ0s zv^MW?_?ULNu`V^bS`%bRIUadcw=9@zHbyUP@@XBuUwtwJ!7lEWz`40^*KkO1OkhOd zn+IkM{O|P>gT9TCQ#*JDImip5KO?4Mn4LbYIPCg_A@&HEU8vPGN}TEl_hp||z%|+y z#%v6}s@Q5U-(mG*?rWbAL&BwIykVuX>k8Syp_;Fs(UiLgciB?JVTVo0RxM^s$qHbh zWdZ91N5bup;=CzjBjwA33_DVRHg9sD)PCFLcbYE8)HHH)DbAqTasMs{#)WBHV~Z)F ze~FO8NIGJx;asOrJhmoy><@D7h3yWjaoERkPAkgs8g_S0{eE3z4H&3~KK)%v`^C-Wg1`Uypy7J` zOMi&)3wzKRh;&DL`we;Z^zHR7tbp@iuVC3OvIk7Ld#O{tJTB~#NF9Q;!GnW?EX{4X zDRZ|zIduQ=%{PC!6eEgOU?MaZO7OtINKsO+c2lf@` zc9R8@ef)6&ywe8LULf1b?xPNU&sWItJ4efkR`6wj3YDm-#qiy!Kz6F*=f-0D z`<5p$*vpZxU1Lpum~I&mP&-8f;yVxeFUl3X>>}pE8}S^;EcU|VGI6@uy|X!@ zy-);`svVD=9Ic?WUA5`oZuC?1y>%i^F3~VpP()z$*Lvo15&uYvZUZlP+`e+^X^1gF zQKvBx+e<^yZz%i1g5fM^8tyyVaqj?i@~EDGOcgP@Vo*(7LGp|*pg%u@mLd&@R8GXV z!MGUCgq=T^;wx^gaDshEJWUb9Kn0hHN0~BjsUVSU*6vGd;m(+Yl0h`&7R6EGE2$tz zJKi4{OsHdP&KtGbgdex0Tb7=rLT&Zl0lycA8_g6$>u1HZ2oZfFiHaLN-2+4H{``B% zQ@GHhVlg#J3V+zuvrn(3SEot0ZoQV*;d;;{7bRDzP!n=u12G3vU@p*<&i9myf1f-k zTWK~@-K{lqw1VX9O-~g&whOoSj(+H?7|w!^ae78dhM4)mcO6}9gP}N(lbmZzs62G6 zD^Vt1Ikv3&@M%tAO4+8~a)Y4NtRE*Ea_+%F+SHBv`Rqk=HKRZwNs-&xF~PS9@VJ^k z(Ip_!e&SmJNwJ%U2UxQvmxYTaB4GmSh||ybH|xmV_=R4c*NI+%(68kFAj&$lT>Vj( zA?opOQhP<8?l7;OolY<%rT%W)ZoZUEE<0ymbw$-oRBpTPb&%aP7j2`^{_lnk@E}C_ zzrou4$#eg&`{Q4P{n+H8^k7`4An@e6pY&!Tyh5jR=P)|u#dy^DjXmh;?o9_snjDB- z-xvIGoD3`P^6C)5`%0p&_1GsWY4jAlllg%)@WB6iPn$Y6H}sSB@3*3V`iJZ=@w5qE z#*E$?vH}aDQKH>m>sQ!IJ1NXH73l1fAWnv5Axb?wr*JOilzQR69?Y#k6?d)@0-Wzj zP*G|>V@#n?)qN15Jmh6wj?Bb8yhpGLuwi+f-S|sPxwym?xcw}L>1ZiAawKaqVj?gw z*D%8qz)yi;79O)lrKro!7-MkO)}pz z9kt&-mqO{^!X=K?wv<3wO_&OiFExQT#OsL^=<&ldvF#p3C!*cpG(-R4N+RVL;A*-_ zPqwF7PR{f{UI3@yXBBPiM#waFw^vkq+9c^qg@_tAsX(Uv+Cn*-8*G_i&Qvv#1?_f< zn$()`n0zvciZr&-AJO8Q)z6VJn*!afc2QDh5lbPF;(S%anndeW7-FM8pv}kVC8GVJ z{Ut8*I`%%lQKDKXm3YA9TN*GYTpnQZ;L3?a_mX=P2c{iE(bqHJP|8=Tz{2!1q+ib%Ho!ZI|#}9%KKjFz0$OxNcqS=01ff5>(LBiav8n)v-x=T zad~kyto-g5>W>aE_WMJv zqLKyZbWI0=glT~YqS%1&Nx-BbzqOPX7`Ixt<`Jy3Ns!MBHqXQPfhU3@E5kX*+lv>g-XGqIoA>$k8&_IJ^qGy^6breL zl#-NLHRCOn%cYT7!TuLMI_q_SssA)<=FDbZ#k9qE`-PtzkUw?#w?p^6m;I;iz z>ziv&_k^I-6t0(_9V2q9$j0P5$j7`desYB!-0AH=3_Jcu-zMV2HP}|b+0(|8=(O%< zu6pfbV{q-k^7Dqy_U5byH1@#giKDYF@(gRZq6A*B`%uWo4v;Qb+umNF~FI^zE%)LqOT1 z2t6Aw`3Fdb_)73d2_LZqz}}LyE%-Ylsc80y$_hRVG`&jXtmZzPsqc=mJw>;2AxnQqtCqv_^CCih3D2Q z%e>0SvdWlMyOzPd4m*vQ#aR_r5fMjGPX*m@p;ERJn^P)^@B`rwMJIfvTrclRlYB*c z@ELBKb~t=}7ltCzImJsp70++2$>HV(selfhv3R zPmTU3B|cwy){`0@ZCr7yBz@r%5!riuOi2e(qN1Vk3-QG)!yiU7J&&CSDo{EAzim{~ zYqRpwZclCSu&i8t#c2Y)Io^V^Fa0mGHW!sBSN^;!Sn@>cM&HfpAvcBec(&jFUKjpH zC;ndvwGZ>oa>-XPX79EoEIkYfzH?e!Ay3y!1`~HLz3DWN)CYf>6+!EG_xYH9kP!$e zx8A)KX21EUL4_!!{B{u)&O59z5bJO*rZ@QaFlhhZ;d<SD02EF-D--jy+-S*g zgp;0Tsj!T-N@DS;^zzK=JK>*t#$aVu?<2K9(t#l!=Q{<~0i7xQH+*XhDMYzZ99zrP+#o}C zccD<~x|UP=JTg-c;$`wt1lgiIHFYkSU9zwIV?qTmE&|acIo}s!rK;a5mZXX?15b{- zbwW#KOdcjh9`vH5E54rb z&iS4kE3oK4S}DIjXZyTw2O2pcVSh>%zT)F`HUNFyUDKx#{Xhu%2!7#x9U6FtWpx?m zBz=;7EeHD0lD1LXrCh~n&4_t z=!?xmu`6DB?o0oGdR31xxplH-fDWFhcD~RVzqSH-<(ASdbCExK6Q~R00+l)8=bG}n ztD)X|BV|m7N21AqUR}Ei)L`bb1MsVdEHcnJx=9FZB6jcRr{pSZ4fBv@}bKRfL|Xnh=V&EKy4+5jergbh}jB`o>JQt2p9>yApaCBA3QRNXjlHo_syZ_^em>Uk2Bjax-7o3cYAWydL(FL7L5-zs()90;_-41oTj)7I&pb5Bn8% znBpO3p-iEMo908-BzWiQ_%=$iRp?mI?Ho=LtnpbSk0GL5qCUXPk|$xbaPmnbN#NRQ zpuLe#CpEpN09aE*)D9UPQcdeK@EhqKL-#npr?ca&SPD6!8%{iQImva)cqYC_9{z@(H6?0<~%9wc?Tq@a7j~Irva0JxfH?YClpupA2+5q*}Vx*^0Q#<(v zJC3S!rVE~`c&my58xk#q@)e5&?vPYUDLPgJAG>*Yc(#ruM0`KQC^Ti*Ey5pcDfA2Z zKk#=Gt`~b-%RJNFidMGL4Usuh=8`Kf7P`$=Fz^q&xE0HW0=ev>#x5QgDfTz^03Cy) z+Z_T~u(z(5$j!}f^iLZlc+q*MmtW#3?P`~=M|gVplFN3^OwJvrl`xfjX*@>7y{n0L zUt(h*oO|*n^qpacq3!u-1#!=#W6ehY6?OfaU%lx|zX9*dQNyd3>;_K1cJ%-0>?o%B zkHs!%#r2UUM-*eX;or2`egK;GA}pRY-0@pgAQ#opUwNNrZ;HNt29IVqaspJWEn7ZR zou~lRt)Za*xb}Rg0L1R4Iq7hdx|6z1WjSNhAEv?_<1Kv`ObXnpiSk#X32Ms)t%*8` zYNq>;4<{+A$+{>Kcm|1&hP4OG$JJuaQ4FbUs4L}aA)wE9Na1>;9cls#F-fY4@lvJg zOTJSBD=XesFNghPayZ{GNAny$UxgQ<=0v+k@KtSw@e&o@U}AD~+{zAZz5^6M`&56|*b?VH4E}paZDe z)y}F})sorB&s(ttVzuHWQ`46%aeHBfKv_+85vY}FQ!HYz5gU0@!4AzhWtJ%IoKLdC z7%6)7G3F#!as20DJ*k>5Ukxg&s4cil+P}-yf!aml%GY@Oh>|y?gQ3KTOWW1rk4nEk zlcKk>ppmDsUD;^k)MB-|d%%9D+eQEv29%=|Vg^>=dSo5TRYzn0EVLk*g@or+v!d&x zE2NXpQ1PCHl4sOawd;xftg_9KEjLj_S9wqFB&(4QVVAP2P=rW54SA zN<4R6m7Y{C%XV|)HZxWHfylZgd;}l|&*h7Jr_zP4Zrezc=Z$x%-CtPW6Xt?WG+s}? z^bH0I@7IiM`aaBw&h`Yo3nX9AK9cWbXFE6n_f*(woc*;SY8*_RaG^d?Lb>}neRs4>B1yO`*nID3aUv$zP z0p1gD<4gsFCab6*q0M_!&#mm>f>oWvLuhbftwRM$`GUDm&QqrrZ|bEnh^PMMKQfMw z=fe5jIqG&lZQ<3!-MuZFAIo1+w9SSZS#O(;>-I!5BHCYp9>4tb0ngRD|@CLiTp~ zBW^WXdn=YFo+rsWVPB%rJLQA6&A2Fdi_jh60r}IFTuVR|u1&$sTN5QSOmz^+D&m!Y z+a186&T$e+wX4H+nlZ-Y%7-Q!g-3Z^eeo$x96e3^BJ8uY`xk~^5EDcd=2p#Q-xp5{ zV4|6BV|)ipc#0#J2LFagi^_73J*s>+r}32WV^&y{I=Tt~*REg&l3oN^MV2Nn!^EK> z`;J((SZy`v9^pu?n-pQ-v?|&F#lImhukXBH7l3Xk&xwe@#q;CqPxue4IL5{4bvs=q z>KnreF-RwIigEXM$JVG358#!Rgcq*1rS}Nm#q9|o^^%$vcad|% zQgqJD-WB4ObZFmmx| z-r2a*s4q4;I{bq&t3q-#eua{hoWw6Aw12!4Gcq>hW!b6;Y|ymuP&SOTL-ue24nc3+ zop7rGIM?3Cy{{%Vqx&Wjox$d(w7>-57izEig5v*!(mvXn{kM|0B_A=saJ#y*U+8i( z()h|FnzAJ34M{7SEc7PG_90)(amfa6gVsX$Li_KQ-t`|QuH~aflKjjQo))iefS{2Q zj>iHcH7r*d!pD+n$@&?tV?`L=-f+taWm5f2A>Mg4Sh6I zr$Hzo7rs;AFv&U53Pu*5!jF1ZevM>g6c2V{jg+`XM)o(kZ=Gyi%%c{X%I^mJNobVJ z$@-*|u;p7bGMeh}d`TW5!Y;mbariJV1%wOu=uqFGZUXhv3ZhvkHGv`~M5yu~ zVtSO_|GL6p%25>16G_H=hzDtU zYPNa_z}ZI#4??q3>qhG#;`yx8Y4FZ@$~`x$Re=z^4cFA$=Bl__Prlslwy1)}O#JtRdlokg8hCAYoqTGj zz^BmqdVjDP?N1H-%CEPUywnbAY{>VT7S>v@X}RoJh8cegP#PpX4Hj*EC83dAvbJZ8 zd3NR@)NBw?O}L5fyw@AB2`n*$GKVXAyC*!_T<*P{;7_@F!&V)%5p^PCkbA=Y4Tt3Hyje5<5+7R zlaUZ4l1>w}9|m6HT-4!iIP_HnX_%WzY3uBql$3kYy@%WG_j)9?eHX$PR?t&r9GQu; z4`vg>Gfu19-!@P6{eA0}su%l%c~-0!y?PG74#ladsq0zu$|8x@DhraYpC0&}p~Ve) z3s~J1jDAUcl9m$3_9j34^y{IEMis)5qC&G5hDr)YJ#1eNb2!PE9HCljnX7-j<|>q~ zS_>a93$EE8PZ*g5_;L;(g~(ObN-%d>uk7JiCXdjf#4`blG>4!c$rOVWju3b*pM{Pq z;Hb-kq&0&PZ-p~n$zeX8mc;MKS?;K3wUr>vYrQ=yS#a2C&4iMHk0>_$BKC)6!d#`C z{zd^)21WuSTJB0|ZT|k3m8u(RdFb!9)|(_R_7R91@W$_m<)^&6b@q)T z(X+aV8Y|ni2^~4Ei5BpH2K# z>wIUtJQ0ly(4wgehWtz_?eq?7@Bw*|lMRli&?xy3?x^_SO{$2(zm?qbr`8QEkYO;E zdMp#gX;8OHo{&YQvSpOzZ7{m@F{DxrtH4o1{}hlr)`CAiB!CWvW*l|6k-BFyL02n~ zN`p;>My!wOGR338WXwX~!@o%o%n_tfVc9%axtGhXkw_$}={c3dH?-@wPnx*uOH+Y| z>>(>F&Ax~K+fM9Bi?73j$FYKj%+J|LPM4VV)nF5%P&|*(k=#CYP-EOvs#;@YlX=BK z60W`!ka-BX2$KhEfR)HqeQ&ctW~)DGCY}R)Q#8bpmOT}v;aHlyZvj9$NTV=|V@*d${**+U4 zA|_cFe=sP&qoU_#ulPQry4$RKd3*3 z2A<&O41#SyKyP#ma7<&h-tg%0@eztU2#C@Nw_8!KVKGml10#bpFGw1*bhI;$oHmQu|(qz##Dc$!+Ou?H({6FV4aI@!@1R4bNqx; z{@FRE`f-8*%-)%>N8$V#u(IO(z2{_mX|R+wF;9D@p$dOQ82-z$*ly#6-TRMSY48v3 zr#5mN={z&%U*>D;2Z8u6P+;x4^V-X@pwz}Y4MAKW4*T8_cN}iVvmQG(7gyJ&8y3}x zht4EW_wRcTLv}MJanCQ4VS$DA)qhT|n%E%?0=JW1HXWR~RByh6D%ytvzl_#`tP74h zh1>e?^a!S~?$0z5PtPt12v!~c`HpCM`|w10@8{si`gKlcgL(i<6Kj>5#Qg*lTO+Cl zREs^y*UXmL4Ux|Ve&}TdZ|-c1=d+j|zLY*&uEW#dzWjp1ujQdW12HsN&l&AW&2eOw zBML4GeSx^VCz$*8oKUPS=o_j>O>(Ae=rYg<@_G^yX}j|1%on_4pCH(w*MDkK9V(Xf z+7usz2I0j;L!)DHQ4@s9WNHy~Q;qp+mVG6U9S=})-mlx$k4}`;iy$9q*Vcn?4%%$W zVb+;j1!uJWSP>V_{s~Yq=S<_;{aJFB)4^$VhCKCoNQ~<+=tlxQ2`ZC5`lYK|5#n!}9Cv{G~ZPUqk@=jT3s^(1`Qg0GzJ;@aLnHb>Ng zbe+OJWg+xSJ}hS^Yra69f2oyOXI$Bp`G$qx{LqKRydG5zLP{*w_gjTcx=r^r%SZwh z@gh%-U&5QI$)vsk4_)k&flaHah;dpLGmELJUn&PKvccM*l{s>LQ9M}FXCs@hsF0CE z;bPuZGw~d4E5~wbaCX1h$8a(l_&Jl$>$5+u{5+HOUSVCh8Uw%0j4VHYG7a}y}|8TSS zEk`s?=$i16asa_O0D-50WPDAMQ0Emn=-StUYgh1}De^*E^FVdCWeIJVjc&c-I6R_| z&`V9KuiEPLV+XIPdB9vc_LQY`u(^ptZ;!TYXEg5h#Q&{RghWJ1x1!Y8G$ zl;mBVls6;{;mJElr<7hEjt$)tf>t+5C@l*UJem-{?E2dMqYFI-a<&}bAfCJThX9U6NToP z%Lr3QQl(^EVGEcU0`YWmKpbhZ-}O?ih)3n6)kqlcV6KdQ$H}NFOqm~a5gQCDew1{| z!gt|Oh^xwBf*9g4?EBCnHQ&II?f1IbuE8(Vfl2yDcFz%8*&9 z_~gVGZG-(PHB_StG2PkpZMN*FWx$%WW!IY>q~AVLqVe$JF(s|WG3@G#P&;B*eq`S# z`AX^ZetklB|4o)l@ww)SB1KcpQ!}w5Di370&jS)LLGHviOKJ`CP;jffk~*EwWtYu~ zXOqp5_aU(HB+aX!LUEHw5xI{!5iVf6eh-B@>b?J!-}`*MZ4u&;PPpoPo%^jMa7}x^ zkJSCWyy0d!8*ZCP@%`IaT)A~{YO?Xbcr>s|TX-SqX(_lEteT08v^MKQ*Ecbx-8k#R^lgxmrJGn zgbCVa4H5frsnMtRITnUDF-KKrd%|>Y3ntV zM>|1k5k?3EYhmOKtF0iR2}|wHi`ZjNrGzOQ1a$#}I{i$=MD0`cCDQhmL*1oh`Frm3 z@NqTeml8Ia%%Pw*P|Z_K6nV(xw!Q?hzGtujFt2~5uBc(NP1GKEKoms?Iz?Q=clh0T z&Mq_l#*aM0LK1NlHp27UOEY2OaqYOmY4ufogK&xHm)CzGW($1z`x|H5_Tp&+8M`PY z&-FYS(E%hXRDx8U?Yu^jLN^%J$@f`44>1gCWABBX<_oJpQ zo_SBUeDT#iLnLCK8Xw9glW>6;GFXct=0g&phe7H|n0l0YC@0($)*&9MIW9y)1&_28 zXoO1cN@C0zvTEu)k%fhMUrAs`h2xiguDp2sjSi|WRE{>4l{fgDg$tEQx}4@)MD7(6 zOm{K4Uaht=ii&BlnPh)!xTrAOQObd)c#Y(g;Y%6bArV#xA4Qyf0_PHk)^Z%~+~_=mfCd4~ z8385|lVCUTPV>+UOgj#b{y`9Mh-Lu=s>b}d8#XnI6ZYIw*tU(@)~Y7zyXjk29m?oK zSP29CY!xY=V9$Q7=+U0rNZ$?Txe++|cp!WZmf`x>T|n1i_PDr2#zxIL(>dP6V3u1U zwHRSX;oIBOWuG7qfeUfPc8ks$2)A?9;1FFJQc`c(uYduOSTkTy%ANQ(>C@;7lbC^j z`4q8tOjkSKxzXpn|I=pdgSHbs;`i@+e>j2gC(z`K@yNx;*cToGogn0{cNo4W7&8X{ zeOyy*BLRa)2AS%!WuJ^SGga$G>P^ufSs}&*jy-1GUzpqgEJ{%J_ z8nm7ih}M)r>7UW%_fd?GlMr44w_79BHR)t(;1XPM&Z*|T$ea;{J!Iv|Fw_P45W;1^eK>@Fwmg~JAFT9nb0Zlu{ zHZuz+F;$M<78?kn#DH*ULxjUk$J$oi*vrfiAygbRiW=iCTv4j6-77s6Y7SoI+5{s> zWsO~uh-XQT+wc>;8)0N~qAK6{rjZ+SvsXNdmm}p&#jT_x!<6Ao4zHsde#6xTu9?hN zdqT#m06@E*O5#z_vdf^uz^`C#a4j`7fH*ZpkvhKST>i%VcR8|nwCk4LtE-V` zz}IH~%n4J`-yo2Xod}36$#{w=dV>{Y4z3WLv1+;D_~ieTcvJ70H}kKYs8IY@r{h;t zkXHf<%1KV`8zY7DpYW?3oLI0)a6t?>@iMozQ+{Z?$<0GheMgf1Vc&JPX4-f1J~FAPifYg;wz|8;*+N9*K`q8=b++60#!}FeF_RL+md!0JbsaYQ}Aqw~?!W4*KW{`+< z5l4eO%~;8D3@OlypX5tzOO)rSk2lyTY{Up6&A387=XQaYRu8!lhM=$z&LgLVqU;g= z_LoXf!K#7%EN|B4kGrTPM|1osEXBaqU6OZNPI6?vqY=ky6Q^*-wynIhRC!n4WMwwD zjnitYz_P_;fl0T5-!N~XDysqkL%On8vdaWED=|oL-r0YW|1gs)RUL~2D(2pjS*FAi zRYnOO@-?U+c0#1Ab#X6G9C^w#;~W!-pjzODXFD|OyXn?6UzvH~VZpQ(5vz8ui0EFT z)li+)c;Su4xzV<@pu^9spIV3R&AVn6^lz(>%BkPw@X$3us6^V6>W77z=$_OLcs zD{b@t4^wX$)z;p;f8*}%1Z#13clY92ytoy2cPU<63&q{FNN|c5m*5na;QVvW`Q6WY zUhI{%k~dj9JM*2n=9H5(SJ0PvWTo zI19}k%F35R9O`d2HEK~W+T??#SB3=$Z44nap~Kr)7KkyiF!y zwJxeo9DJNi_q&&)Tq+5PqhcRQ3Ql%fCx5z9{1j2aohUXK=4Mv|ZpG9B1|>XA5qN&i zwohG+jf!Ckz6Td?ARO$t|91xe|E*>_J;7sNKL3-dqJ72S3&iiZLwtC=`JJRwNpA7# z?s9fh?|*$c`ERZB5xdlrjY=3Ji!fqN9(Ip{@nQ(Sb7U!00Wkc)>=8GQdVYjB!=K#D$+@jmB1HyRudh|ehrV$1y zY*qR{p7i)z7gYQ2#?7%K z_9IqM#4i1Gcn8$PnP`@i=tctwl``^<`>OX;%}ib%w;TV>r;|&16s7WL?JnJk{0n*p)+c23f zluGla+Ls?BBScicMF#3jU^&9cCtcvKl3Su_2x7hY(97D9&Lv3VT?3%2%&G0`y}XM?SiRh1dSBgjy?nWSS`vApU#56vPdb&F z6n+imGYL#^76sFsqm4lQuGZgppznK$z;wTPLYuP5?Y)9Ag$%bj=Lfy;dEc*;2MLyh zgf{CnnN;g%z6wUZySXqsuXI1+Y(18+T@AB|0WX5U4mR9?CV{D0WFV4QfXY@tG?L0Z zrztbUS95Umg*-e1Iml|^@aB)pwuCHfR0TMJ<9m0?PhjbV?*(}|sRbluW!G(VpE_jx z#&TZ9W5PpFQFI{iR=1aTRblJR@1(6m~Bo5)AyyU0y(S9a-FBD?r->OPx9F@_lfzQYAQgbDI4~yif&>< z-Y@-&hg#{U-hT0``M$jwh7-J17ovVXiO)T@BoNKw=Fwm7AgbaM&jV4(cp=U;6GIRr zTyHS~8ydt_44;YbBg^l6t1Y?@xb9cg_w`OS>+%fh5Q$M2*c1Kefw72V-v6bp=q|ts zFZX~=`1=cV>O{YQJ|ZzfDxD|HA<=e$53%OnK0mN1Z=qwRTz#H}fnYxfOUG+g_LBbe zB^_`ou(HbrK_5 ze+E`EF=sGFj3s&=ssd?Zr#-V?1c&(i99~ICYmC>gT2yD)rXLndo?lhK8DI%*Ivbn% z$z*cU_;nk08XhyfWbWBgb8Y~}dHV=il>QwQc0e&lusH4LP9?n}4SFq$h)6#ADOc-p z(L^cyS6W&g@3jbBc!jSjQuDlDRVIq_8gi$;=S|ClNv2t6Rvg+Ko>}(ti-@qY!uVz} z&EafChmBGd<5{rH?c|gy*2dU*gx3WmnM!{YSz#C+u_3k+lD<%z9kS#nbcaZ^J;2W< z>gczUFVVIlx7L6}gGpHl+LKz#$X&DK`_S(|m+RCQd}Gh^*6W z0zzVke_!H3CHSW$_;OWr5XZ@|!$5V;EDwtc*&gdiR!&x1Lbh5f4**jV9(_T8m;=kG~+h9*}1@kW~ zUxRMOZCX5y#81W=ge;%vHCZh5Lx(7tgC?WXSB!UI*G2Hu_LoKCVI}$gR-`#5G?rYd ztDzHgnjjfBZqvGx!H%`_7gge#E4grV)1uVDKL-oq$z%(`nQg_%GHWf*?yfqqgGWDw zCP&bRhS0O>=!DpmtHrGnw{rl9rp2uRsf(iwsdVv(rO+v1Boa`FR5UHBC#86tsIzG8 zCdqd=Du7CF&EM|7m5iR$^QqfOornq7+Qy*ccz2#NVo7(4a?w8?$gsM$I_=g*GnmKp zbXU#=mHzZtDXVjfIn#tbK?#68mwHLD5XQ%Z6Q{4M0Wzmk9ZIx*T>UJ=AZl4cX%}O0 zU9fG1EIDg1AC5Kd4}}Yy8VgSmrim7iC$T%8PWbhac{_>sBqv5B0IFtniOm;Kf&?A!DEc@XH% z^cBhXS|bp2toRs|x&8gDhw^5Z^3CEcD(FS~ZOgf5tYb15J+x%?H!pd@!YP=_g@zOk zdH>^xKOulyWe~!&gov@_r=5dHxPgib_Cd-Rly!_3Ch`XRqL%=3UzJbu`?+4>?;H}( zSW$cUi#-GV{7D8t4uQ@C!+GKAc-MfbP$+HC(gSE~P+j9xu=d8=kQT|&!;ltj!;a$4 zNUnBL*%?`mD0i)dr@y2LJxv;g*H&`TXJ0~q8eQ=&3)HvwgY~az$RcX9|ih_||p!lhIr}u>+SFfynL3+;-0lN~6)5c~~wt zhbAAZ_ya3U|4k+xI15YeQ*;N~)Y%kbGjYtR6FAb<*QKg=5OH(McKLX0)Q~U7yn(6T?&#g}4H>uO3ByTXP(PPP|CYWHS$eKD{&G1~B^=;#N$Xvam zLeBkDdtqN`MU{yd9<9g8*zT4%`tH(a?(g3$@Q;}KPpb-ZVDgd^%zZ1e_1Yu`QtN%O zdTS1EnxAKyxy58$Cuh0|f--&V*#af?-j*?4@Z!E5?2JD*5P#%zzCI1QxVWNxD+wC< zSP(#vH-(Y0eo)BsvMCexD^2 zCPYjGhX6kh=th)Sll*?VO+dNu4T~Mff-MV6@+`2!sNRj$&SZVX;WWZpA=Bs}$xEb+ zT4V~1URKXUEMtU~23Ok1*!xrN4GJkS;#yNA&F3fMD!Q;#m75&svEb!siI`qf1Slt77a+;D`2>V`z z2;wLkrA)Z5uN2hX+DJkoJ?N-j;$3g`+&Z6!K1b zwwC+Kz6BoJWD4Q3BvLLGrH-bTXq4wi#*QWQlTmz+Nvj2xtdfA{sZLICM)3K~Ba!4u zqRAw~jO(agaFl6mVq9aHsFCjikW^ z6H^FzF^NF`P8oC>wchJ7ju1U>Yb7lbiJA`yoqkqee2Qt{QiRbmh=8-?_V%;c@y-G z+an9(Hs8AmpUgsglt331c8+HL8*{5r@e9Vo*a~4Omu#V5R zfsoiP6uww+8l(@l8(W)k*KarlnfEYN3ZFJ`m<1_MQbP2JxMA}{=;9xB9u;>#0{}ST zql$Bsejh4cHBLoqr5tV2Av4A{5Hs_t&irDdU2v6K2$57vzBWb+i}CW=i97wF!G$1! z9Z_3H22aM>e}8DdZT=4&Y&&=8#&F-BVs!Xk#_xAnR=nP2M7wMv(+*)ISP8!Y7Mp@H zz4=Ej)UFH*@)`n?1C7cxUVh91#M{sr7!=GZ+_`y&{57Z36pFIwaRyT%B`?x0fC8uy zqrEp3BftQ?O#ca5B^jqUv=mZV;me28CV*h5k*Tn+(e(8vV|Q-1MHJN-WuFpI$KuoS ztg%QOk2HbQZIkZBTvkmh+nE4D$xa7(fp5~smLr;p{EGGj8!5y7V|IZ<3VvZ$?$SA( z-p)mn!{qtmcX6Z3!2UL3;`otpE3Oh_2vCh$!4|?; z*w3Da1k=qfV9$-;x)?-|y-rDRw4j(qYmAzWgr!(UHhE&#IKYI@B?7N}KAOP$6x-2s zfTvVP6&E?;N8p?28pCJ!*IZn<>5FgYqPT%&7PN8FWcWflJW;l^6UkGW##Fl?FPh#( zyMP1v(?d(3^X>O%GPBgNOxR39!gaB}nGHWL0ntrT^lAtPuug}JC5hjFqQQt6!UUXD z%9^tI;uAG*K1zjg9{JtE>cianIGL?ca07mjVu$SR(0TXYb0$p5vGO1P^DA`Qm`!pC1fugroS(LSft8->1pX zH#*$Ceg7-V)Hpa0{vlu{N-h7K&AxtDm7$p7|>ix)y`Rf9ej6!S}t&!|}S2eB=JG@>%u_{kM324^r@QPQyMF^9;< zQ{qaDY!_`UKk3$?lUHakLiun{46kM&MmFyIT;TaeX{MnSu*hY4r0aP28i!wLwkz;Q*G zGXtmr$b?eJDLL5yk`c5}7#u5wcor3`eAwd54JF6e-*9rVp&N>ESeKe=d~v=?#2qPE z2GY1!%bzjE2Kz@;Mkzpwmg1=Z+C~({n9oAA;)-O@YSC>KJQb)XCf#Vw%}c+%i#fgs z>!2y8XmDtu)=7HU@c+hxk)%rupM*|2)f{rAsl|?MJ{dY;z4jxA9dsE-o#~rC>mrhf8?oEVm$yy;b*X1a;1TM}65$&`-_wNJW!hV^q1r37m&s8_kWazewEeoRw$@Z{VP~z76Zr`4oD#Y{fGF^B5@abc2_e20Ib+aST&SPkHHhb87EDUK7_wA$t zj?2qNSts%c@ta!|tLsQ4OcVla0A-}B#crz~))3G~1gXo~Xgc@Q0bng~gj7}j&bjQgZ9bD^`wa`&cS`#I=EVqd2tsRzZOZ7t6x1j2*MiwZFa zq>D@1=gF9F7j1E_dI7#_A|FP{!h$-r7cA1{#n51_&HOd2=D5+%pItdwyMCNL=PUM7 zmi!=|z^%PE6R9J);A;`(dF5Ydh0;BF0qDm+-_+;vY_xCQQRu7*XY8~)fdakth%%A) z_1QTQ_;2BgNS4Ri|B~2|j_faqhJLxA8fpA@I21X^ho9mGj-UVPECv>4_j#s{scp@- z?w$m<;sO~%GanV>eBZ=TqE&~Ge#;AV2&4eLX8Q$TLVtUxISiU}FBDDftSxg4eW-;A zrP>71UcwQ`!g6{~vS7K3vL-J2kmj;~fO015PR>^ju{P~k2ySi23g=D6$&dDd>^mNW zc9D_aj?{ZSKMz5FAOPntv3+-yI3(oe{msv9SbmIK^TB$P6HMGxRI)3)X`^9vJs?Dn6)vHuH^Z+ZGP#&&_bVQrNN6MAt`D?$MHjW1XQrwsXSpvD?&(k zn44eVcDU)7xFknbqf1t!!>ns2?BdpJ9nfsuBe;7+|Lf$-RHJERc}#1OJ&6I!MVxBK zH2H~9#l*CWxJC+->aP=Joii;sd}uSiw`gN&@gc9}j=yhA&y5q7|0UvCm#hPm?RVo$ z-M23{*u7v8lfWH#lh++gpR=+|@3UdT6(f^f0@QlQ+&RGno#S_}XMhF7U_`Avx~^lTH*r{_Sgkxz~~Da-ukYUp7|uCZApvhja1`@yOrl;CWSU6S$%c z!MP5wT#xwI4UsTiD{h(>LGdDT0ky_|xfXrk!?Zx~LSgHpr}28kZ_)h;>&pH0vnG!1 zPq|n8B<-=_&u^Iz@C~}|uAWsr%N9cWdIHpbjfTn3Ro#--2d(p+F{Lih1);x9cI^0J z&m%k8`DLzFX>OmrZ!K3o8)ZDvN#n>S;kBK3;0(F0!-K1aI{4eRkRtj(PW(C}QloN6 z&bTrL+i>63sZFDAsX08QCCA8}5bY`yr1H0;`74uCKC==@7^zcP>OaZhNz(+D!m_V( zIl*Q`5%K-Vf<2EWqV~H%oFDz9jdlNu=QA9svm36Zk4K!wxZEJ4fMdb5`r#l8Z@9}3 zyOx@JR@FJyfxIPrE!g{)F$drJ;Gh0sjj?v5?*O*wwxNEy6IJRC>LsjvJ~9s~D%uc) zj|v~OEs%?|&Dfz7EF$_#)7cql8Dh=v9k&t?m))kMyGn){{n&qOre30V1chC^ABlRY zPV5!%9wn$03vJ z{wjJ;3+PPL!QExorsOl(>0{CeC7tRdCCpI1C0aH49T0pc8?Q0WVJ_q6IVH;07+XyJZShLixUzxYkCw>(vPbM(XQ%iD1Ivm92rt9RZBdF zRaI|Nqd&(Bm)b^ZDjpZuGc3sGd0xG%DHFpOGY8p4O*L*rxCts=btZ@=>>aq-DNuPX zNG9(udj3x*2~p};9sycDp2UiTJfwHR|81Rc&-rsFsH;(;aZ?k8E{t~0bPT@q;Sz1_ zTTB{fJRV7c276;N`CU2dY8NTQ=r8{jMz08!XU4#)0%fvab3#$9*&$h}!td%8Px25G z{YFYIR>zj7bA)M*!8A{>V6|c$7dg=rCWxd8p;--caNVNboNu&}oylcM5UN2wgVccN zF`ih~`rjQ5R#oEiY*tl%Ot~lSAyhd{X=X6z;6q+3MXWgt*)I;ho}oEr>#g@gCkZ4; zzk0G=peV>Q^nzsIg}d2M_Vc76N;yJ!AYT$5D&uW5ckpg8(RT+!>0kWN@CuPj+CH_# z|MLO_E^cjA@ix&9(Ut! zG^hl(wyxGS$mdtWK2KsNyxdvn+LWW}yvX!3D(f{}PB-0Q2$fBwgSTfl^y(bgs$yup zY}mg{us9s3cDynwVifU1I~$_$NZA?8F&2dtQ-3{Yj#NHPtc1xg{j9I*=sXz>eed)+I(b)Yod#rr;FPUg@93J?S{u@ zvq2?1z*CFFq-8S70+()4vw&s{HN6{wxRqQPz7l(b(%vl@Ft)K-w9;+fgOiEU`1;&= zBtUZ{`n;B{++;$PY+Z)6@872b5_Y{%|JA#_w)|7)4cokL?v@{Cy?O`1y>A}9?^Dj# zug-y~P4l0sCts(mxtun;$F6**#>SA<{P$?cC@JS#&S3kXMdiKET9zN~+xn(31219S zriJhSSooj27g%)U61=?t%$inQe0|5y*4jF}E+`HU53^QODX#zv9ls&xK}a9Qztbho zB>yMR460`Z!F9Y%JlMa(T+>0y(VV`-^%tHmY8bfx3ORlxMM8|FB{+;!qt(@$O3Lyb z0=ooPld&nSKhTHMa>93lQFbYYthO>;dD5s^=FW>a4Q12G@|=3rcYKTx;@z4B4#uLsTdqoW@OBrNmwQN&a@*>8Zi;9E@y$B}IUZ zw+53eGuv?ZC3-UbC+dWw3N*3m*rZ_xfbyxLY~a0+t%NaVi}PN7%B_2@VQGMHzW%}u4MWn54$8(E8>|blPDsNE z$4Y0PlP3Xc#f6&EJ^dVQz@eYqEz~KJ_N436EC36>q;v=5L1xN2f-nGIge)G%Y`m{W zcqpEcQ7678K@i_T2MnSCXKVgv)BR;U%?Hhd&X*%q@3HR?x+bCjv7dW(!87?=)7GoL zs80%p&RLMx?_A-5l8OH6q zyrhtD?Nfm$}b}4osQt*VEvl;JRz8?KOS&Ad|L~ zYp%1^_K-WM>uWJl!JQ8U2`&{@cJX!z1@!TaSKn2=aym(o&x0CtCNwuvFYE=uJq^%* z*Np07R%<{td<9Cy;P8>whty z4Vhu)?YhT!tRHkZNYo_-vw!1)SiW8r@a}ki{+o7Z6!0Q-Y@IUX2AzuWRHnE&bO77M0_HB7j17(^C)u@TS``3Eigs~<3kf`a%p;;kI6uZvT`+aw zvj7|w0fu_W(<>W)1CjX1xRJECJu^aHB>-UM)W6}1)Kp#Fm_q}!KqgLmPmDk34ogI9L~zgZ@b3*O~Crvn{PPFveO)tdzO*H^=ECl z&eOIt)?b?vAqk6JgJq9_Yqy|zkBPg#|K!zERw~GztS(8vMhEg0&9v$T%{%mdx3Dna zOIhA;k$L3HNn>p+(+LU>(PC5GD~V7Ygcs{7ks+;Q>_9!Y|9?@PK<5B-&0fFZ@bJdh zJ5cbpLn-@ko!Rj`LfZ8xIP1-4Z_Ty(W2fO`{`Pm9{GFzPSLOnr1LA!DYlYSOp27j| z!>hZxpuAizww_xY0h|25C)+@7D{xbtl*d^ren^Cgo# z2&Ogw`m5^qfPdi|V0<{qpA`Z|8C9^cev4!h^bGL7)k`Ct_y~w^qCcglJHdPi-)y5l z_ph7$is(h1kY1q^DwlsCiBPEeQh507qkaoR+gzwnQlS-_}K4uv&OJ~&17s`ulE0VT#-Y!5JlRlZU zlg$C)-25W25~q(9g}hNru8}~mt*$9hAq|VyJ^HJd+_iT!9N>M{v|3fXkyktopKbmC z?M)3wfil+fG9FCd5d+1!?cmy)6+Sn+GQ=^1lxT;wLQYlW9UC*HSEH&yqGg~Z2xWwm z!1yE7N&=dRXHl}a4m2U0x}hG%EU6=7luX+$OG?K*@C`$}*qC3&i{$!`)d})dxNn%9 zq&E9ZyR--Z7xXC_yE6Fmhb(v-=s3aM?FcBWtCDEW2hU~g^d_uLyh*CeC;md&<0QPs zV!1EsoQfZOhooeYsI_~`-tFddiQ-O*7Wl9l5yZ=5Fuojv~4f#FTXvt|LUtKDjmtx;I1x zqy>~rnRe@n6}7+-pZ;Eo78Yv1InSwOW*wI(y^iLk3kf$7VO})YwX^%!0dbGvb?~dI zz?{d%^k9Ehp(qA^)c!*2!dDgCO%&@U;-;1O+EaN3G$FeeGo(5wZCMbn)&g&5)b7W! zONwvq)$zPK2;U=S#nTiY6Abhqp7=lKHTvZ>tC{YW#0YrP7n2a$3u|w@Z8hB{JZxeSO!KENiroBVz3LHo zFFG@LC|Q$~bRt?Q$|$MmSI3);Qwi9;qB|doswEahvs54oJq<3W>a~`^#$;@Qs*v4Z zpb|Y*G85;Nm-S|0c4NHr@MK6XU^^+!V-F$<=OGaI$M*$_@g281$k@c(jT?3l^2R;Eex`H=J3wjVKsDrDRVgOQF7nU-xi zNBVuwsK+8gSqop~=j6wbj=^oAC}Mt&cO779SW{hB7uOzi2Oh1AiQ^jQg*6$=FHk=u zZMSP-l!g76OJ)j!%>SJ(*EVzn!m>Tq8_mTOnh}SkalqX5@DRjT#EA_t8{|f{^G38g zoj!K}0D!WZv>Pl7xBtm8%VEN3-4Cx9H#LX(AhzCnp0{0lQ_x+e>A~OIw76$*2fq-_ z@?jezAEi%tQqT*?|Kzyb=P9*dZ|q|MWCtc!!3<;xP*(&mq{rH~JH@vJ*(&r*0omV3 zHY4^f{Lh`)?D5OZdky;Aqu)deVS_xnCrz$SucDHOmq1@?LdmQ1R;`RWP>fiQ++rV? zZ5~veoW*j-fF*QsT&S>U+o1rR5`|qeI@-kNH`_bfJ}4@#9cl8>k7DF``rjLTrJZ(y zz(`UcgTY|y_|*ANy|08p147O{!-md15z&{J@0XY_Q(G_Skj-#Wv;U#;_=x6nrXf9~ zx3eNtyL6FjgpmTF3U1WNO9S*_d>}v?|H@yo99xBMWs=N{n=3P;Ph#I=3_fxyZr28oN*g zZ!{injl~J8V_^6m0q&T`_#R~;I5<2-NGay})7al|CuWde9N~D=NdwPkPEb>$txn2fU*lgm- z>tzgon%<6i*zlsHKS_&+9-Sk}LRWGJVp2?cg1N&#SN(=EM`H!Gwmpbo1BiZ#B=)9Z@%|d< z9foTgN#x2*_9SqkipDu$gjD17?8w2}FuJ)RfnrWR?1w@g5vt)zh`_~;E`dJR zmJbF|ji)p%Z|(*?oC>^eRc-B;7re%PR9iM$%wL%ms0%z5ySj&<@F3tOf%9FQr_=~M z%fRWui&y;j!xIY3-e(%omla)MLm?^r=G&M3M~FVTuWb@^qq6A_(%x+E6E4Jkc`iB? z%$UHJllw?UPOgRk8wA%5pX>EuJ?#>kKGe4 z5OZbs>*~#~rcH{i>80xwe*IX^0zzx9veQC5sZtnNb^uyC=4YN?ztP}3SU6~t+byO zdqn*lp%y%qjioRnN;^>o=$9Q4;db~9rg*ryK?9FKINMjU;774EqW&8n1yN3#Y?HniHt-D z&9dTJLZRp#a(5}SWAPIl+lMIS+HZ57@uR%@y9{W$oT5^B)g84O`xlN`*xRDtuEi) zHDLH|3f;@ORzOGy^rgjeY34{VX7Quaq|VPY-ca)&sEEMmxI%4)I;(QzT5>P(-8`T8 z4Ux|d_}u?zwRs5ZpM2PDnt2$OqrAFF65WODIAZgEx&ppj+lYa#*a8PYYN9&_Pb6*Z zL?Qwr`vYN!!u}7*QFXoJ_DK|1b4qNk5Vik&qh|sl3zap4yOD8wL%iQ!1PTJ~iN&5~ zlOFcjg2qxmsy)2i%slR;sy!^$1rAS5P1S(4v3HIhvUE)YJiLtmS?eh5{b&X85dmMH z!9Le}5Ec1$)~WaFzbD@daNxh{A;>RU(hC9n&er~jrsMqUL@Y#^(ggBEKO9g+OR6s% zcZT$ODJ~0Z{G`^z5;s<_{)rT`9ov2$k1&}Uy#KSzzO#WJ(waJEAo>+7?mDa9sLv0F zS^_OW+hk{H<+ZE+@>4p%#hJ1Fw;eLZTw-}cKdU$iRP=<4mO9DMfNlj#h0BBG(}*O^hAH4fMt>Q?9m$+bQp zrXr$EA68uPL2+?Ik#+RwH*CdZR0G4}Rw^YbkvZsM6%)y*#cLJPrxE zCss>?K|OJ_Z^@XABYQY|FbV=491j@-2&3M?vGWR8&*?~s#gpm;^h!o#u2Nsc|Cno5 zAT7c-QQ^H>OQ^HQ?Hr1?Dum~ykg2m}5mCcqDkNJV5wZ{4b5Xds+$0wfG~eP~93NF^ zp{Qi2v?d070YuIf9X;lj_Ed}CwMg{)2NK$OIYlClx&U$&u(Hif#!~I`+Lx8;_&@1F zThx_|KC-~FX(V=}!ijU)E{9Y7_}15YMT5+NxaaO=>Ge|eU}4k{?&{5;Qnt8F_A<6MUD%D56?v}9}~`RshnSu zFul(<-oN+xzJz}?HF0fWIl4|0suSKW)5v+#Q^}q=WX_cPM+f!y4;|(9c?Qa+!~lmt zu0*&wUNZ}=5wAVw{xd{CAz`sZKG6&_;UY27{5$NG)C;PUpy zL82(zrrh}QPOy>RpTufIoc+|a-W7*G4!lC!nHS_!mv5B|q* zRyIERs!N2S`~?WGAmMon{lo|kY>^~N5-76!L2qWO4$#AQxs)yKu(X1ae|pF(t?M%o zE05|5<-Jr!rmI$Z@5NdGBZS?h>B$)tV9XVGAi$fC_013g7Q%`4a#|nxn=@%|v^Y{MKSod2v}{$GLpEV)W~MSK$cuxv8A(H#&2FHPIa zP!tdZv-hg^@KodDFCKi=BPzLmfAd)1cQN>;@ABZVy1#QKQtSwVLQ-9B;37HlJBlq4 zgd+WAB`)zN6kR&d%7*wEzHS;$RS_geSqW zI!;UctZXNi$yK%gl#4=*KOVUF(MzIwY=Q9)@7~_hh*Y;dk!-aA5dyMInks7E5E7Yx zu-yau_8K=y^kb2Ugf_i#(N}U4ymf{a>NRM5vk#ICTu<=K)9F@XAWM(Qyb|}omWNQb zgTCFXdA7&h$A7|m{fg6{C&-lKPCvA>J26KPZsTv4PIdA+e8QL(B4aEbWyDArL!Q<$ zmy!q*$=c|89y7y;vu%f{adxfNoN*V&heas9Nar<=dE{Aa^f_O)#!h(P;Ar8LcG1@^pcjsd9xz4U!!R3U)g3_hOT>do z9veY!UjB%OtLI1NjD@&!|HBk&)i&y*CsZ&>}N$4Ir;)8~t+rB7KEMVD-=kS4{J)UHOtSioW>F|;)#v)#yWjP{38@j=0|d^{?ook2F2 zKzV9aFwc_#aLuoOo-i4sm10~)W+5-HieTZlk$ihM}wM2`7ry@ zM8W($(PaxklL%voL&Jq@o%w?v9Uk~x0|O>;yJn;2L-^9tRa)5Kw004)d2GhfP_gW6 ztly_zp>`&dmIV7G*9)k~FSkA~j+9OTY*G+p z$PAAQ`!9YIPD1CkwdiN?nMDauW^IcSYo%{KdUX28UpLS%GvvNIKJPV92%`|j*w7Y$ zhKOQ}SsXbU{7g`UNQp%n94jxQQsEIyA`7cYrctCA71#QsHOYa`^DfC2TOFu<598@P zuuIxUf(F~T(MrG5Eb&Tm$28Q2=T5K|@+r;Y;*UeFFV*P0K+_b1%|zEa6S%#rdvIlz zxO^h@-v8nf2G&rMT2rv}R(`qYj5v`|CO6x|fpW7g|M@`(5rzM^h@aTd}rS`9w&>T{Xr#vedp@n#C(ZH^A^#sOkq^#{>qNkAENZ&GV=-DiiQUE~!xu^VWAJcgYadtE*AGBMx*BTTORY*Xz&a*cLoH!QLlU=z)>NcD06IeHyC z<^hBIQ*<+R2V};kmw(-yxn2H=y;D41b#?qgB0IO1!)_g)MNQe;;BQW}xH@Z-(v`6N zFOM|ErRgC*N7n_mm)5QQ@)gy{#pM5F_$>+@CaD}uLsQe6c}&r5%C~#x$GWX|(f5A@ zoMD@Svx8l?H=Ct{J`-;mTsu z??UA5Cg^R(|6wP}|8YMP@?0C*UU!Z@9zV1740S=2oS?^^i>q_5zKOvo%#Wb&|0D(v zT*E`F|A^~-kTn@KywCpvOSXig#Tg{Gn3}$`pB7z&*M@UQcps&ZAM0=Y%1Mjh)P%NG zRQbb{dLKKb&dB)LDU0D?6AkW{P`19e1RttQiR}VY_ph)HK=^LuB1sv;$`{22&u0AI zPnTzShjU2xIOFq3p%v8D&(G}%c;+d`Mciy&ZRzHllC56lUiSOV^>JI4+tS|?zN*pq zX$UGVgRIonCD9n*ZHMYLQOad&6AKGFU zq3aAM4Ft}M@M_Ae7K&gN(X& zi9O8MwIWU&yGb?*7bGfvEPNqZ41+cMkcFfjG4NT4U#75vFGZFnFB>GI5rzE@TUto5 zT-CngxEKZzvy$n{EE~K2JIQAS#%75}Xb)XGmFGRk73?T1 zz`6FeCSRSDAMX{yx^ygzMsZi286(zH#g ztWEhy==V<%U^`6hWq3AaMu;YOpT}PLlJ5RXadWQjY%qhHSpY7lIrrOdUGE#cR$j=? zeW(cqO+Kviu|r;a59>A00e-S)4l8>qDiL>x5u`$Z(jP(w6ifgO?Fk_gm=EP4Ja{q2TnY$LusJ8tgq zo&&x;MU*=00-^^d?_yI+o<(<O8^N>h23dCeN`Gx&2nfq4Fzye9xw;xeG1Iti@nwguf>y= z98U&GXlQ7GFEVyx4ZDLNH_t(+zx7x0g(tv~SuUDGTe0YtyzJumDRC}>Dx%-%+?lsP z!nCkQYU;rKsf4a+o<_$s@ACHeQ~ri9Xyp=ZoyfnR85qoc)#@6*{o&#BpU+z66mko{ zspa|NbH7R(K=H!Z4=jHUGb07oG5=bvw?hiOkC3DiY(cMEBD~5+pvEvkmSapeApbN> zrvvL!K8E{?SQg%^Ri~rUFmejA8Eooqk!Lwm*GQ~u?@@Q=;d+8iKuioYy~1Eq{*jEb zj$csFxG z6xLieqF&Y_X*iaFUra1&7;x-|(N|YjiiUVl9nBe2R(a>Y3z$nRG&1hM_58;34UGnl z!&2zSj0ixZVdgn6kPL8SQ)^ITgmWrIN6S(;+v&yt##Muur%isw&c8(zs~Y&g6sdn$ zWmIu(jVp+4s*a1IPX0kgo_>5bv!47LhB`R*{8R_86JP}$KHE>ANMm|0V$jbKk7MpN zIMu)`(WvT&{3jZtK<>UV&T)y*wR>^7S~CjW;|p+IV=WXt9Ht4YZY%VN1VrLEbj@y| zS0XB%FUv&$Eyb?(OE{YT3w?oWNvdMG`=#VZ@;tmwvZ%$Yfr_<|PV)L9a~}LUS>F@H zm_T01)Xnua1sdA+mR=m>jt;ZlRM}ISe)0MrWV(gBwOI^K_-%@eGhLjpgx$Qwy@uY` zd~rntu*mGvOv6mFM0C-J`Y=)K!B-D98hozBo7&OZxMX$P)(Zo~mF!wJ6|x*9OL`%n z*=^K+lPI2sa>-*sdxC@&2d@YaliOK`(dD*{%((;( zC60c&hr6mpO}bMcQ=oQBNlmTD9Xem9NKtkRQWhpy*)kkC^qTn5%rZpajadp!QI^lcMIf1*?9>WF%uwG;`lw+>MA z6INiRmgcidUcQ^7(G6j_Ij8FB3#M_OS1gv)y6rDGS@5sM68vOg;!8-_|;|KNK* zT&*{x5-L%4Gk30!-lnf50ne=SP*6$F9rzN`R3Xh45#6G!0}PLPW6&|&b@tj66tCf@ z?=YnyOEpE4&Ky5)OQP|C$5)$-HCcqUNN@(Q47(He+R{8b`xC8E0mTOeOW1!OnVfvV zUvjiK{TmBx*_YpggQN;5%L5BpXi~!iw56RY@`x9EQ{*E9KD{0dBDy=}kAKNlgNviP-OEj3lJRIPv(OX-=;SLVvTf;A`fU6L zEIRsE308f)B(r+PLa3;l$;h~r%~Y{u#8M^i-)*0X!!cIhapDJJez%65ZHI3bL|wP) zn?sk1^xQ{Y9GU2GpjU-LmXuIo#f8iEUs179B7?8i{ECEt!N#d2i#M-0{i8^lEd6~@ zBUZ4DnKs7sA==UqNf|p^z1vMfd8_cYRv~?QCYhsySLgXTXCJV-C|CZgz|k!;ajx)@ zEhqaF<~pgGy1O|!;T)6$rx>Cr^94TFW_045Frw7(lKlVB-Rlb7`x!{z(^D75{e6s2 z_Xn}NZy$Uva715w*xfGn3x)5m)XLb|AtGq}B0`Y=z}-1=(XAS}7`L-^DA&`~?bVeg zfa~`maBmF?klDMgc_-;Im$x?RIkVfTwb`VX0Bmq3Xw zZ+!>{vWwGjf|GZJ0qD49&Hz#qg5eG{x8=p^rQT@3&MPtnb!{0BE^z8vw*uMH>1 z68Q7Q?h2~ugKX)<@!%l7ONJp<_2g_sJdjg{q?+(1_YYSg5}{4y-$X6>2vWqH6qHv9 zn^++En|S1k{t*$pl@!@=kr*M#uXamupsc>nvTld2(o#t5n7Rg7`mnjsc$7EqVX@?B zAE*;5CpC%oB&-0)ELB3Lr}Pb_Rx!DaR_RzL#ErN{)cGo##Skt9%`u zsqOH`LUuRELU3;J3RstRG0dR93 zUI{I_dtyz;gco(-nCdmG?GN5Kt2hL~{F43EJDYA|2fHocL-IP-j1H(=bW_PJcB1^v0g= zcqvG*KFj+noLH!=kicx+a}o(b$f#nDPgh!pn5W2kMV>^I)AqjyJ|9k6JY&SfTn?`h z_4f5HfuF!4Z=?HL#s0ki?V+@KQm1f}V*8GcVpoJN-okg5SV@B7#&A}KM>AIb!pp<{ z;87c?Uu2cw&$}uP79tX)&j5sL&71aVg%Xuiae;&)5>I-efTqD>;fr)!X5n&|8x_*I zslyZhmi8z(5#q#h`;Lz{Ufxb)q1D5d4xPQk$oG2yLit&M(R`;J8iNisJq}=7LkOt1MnbcC+(m){1B_Bw&4}5~|yD zd$bg788zCvcK#uFn$WhQJ-OgpWeOf zq7R`IZMWoN?yufH7cY;(&oku}yfM||YFQaONT<_`T;J%^El!%CM^6$fzWp)tf^Hv||Q-nV?Lt*Kmb3Jx7)3&k-ym zR&qs|X3GAi$`EB_hkoL(6Pt2FTr|)7!}U?@@PyjTb*oA(z%w0ZqTo0nV{=~h`!4K+ zuNWg1JWO`_+K*pYpGKrC-ZQx9<>#{SOa`PDQTBXvEvfks2%2Wj!b{5jf%(de7Z+aq z-Qc)}*>XRwOjS{C-z3b9f@w3j$rzt}r)#s*{{3XK>=F$stUlTI@J$epaLBKmo(ZbB zM8q2RV~Ve|s0iG+!!wL0;-NUmC&+N&E9=|DN!nqZ(pgNXaKE4p?77C#8AWtG)H`7e zCnOm~f?64kc1ye?9>q_&9)9ojF4)#iUk4j|ce!{=k@3!Ri1Rd`Ac5s?GN{->al4Uq zO}Rq^GYExNH;F?LII}dnhC6E4;b#1=L?lc?jitC!IXS0Et3gghc!J1uL4%1@>!|f| z&!k<|{-{NWdhMvy{??C0T*SRxC+opNHt90nrgs9y?WVv8w~iX18XP{Yjg~V#I2$!7 zkLXW%QWUUNLv0L-9XyUziYx{pzcucpz@?IS%3x1yzM3o^Hg}9~7uxs$vmh5Dy-kBm zKOl$(-`fkLkR|%`$;xvlvVYjzv&)ZjgMZAd8Qk*XX`k$)zgk~Xq83bGtj|7Qc<N&TaAZ*rkbiYIw0q?7|bvT^NspNGv8Q3Ikp1joU{N^QwjD>Yv@- zw@dIaI_*gC9c()A+4cvU$ ziiLeHu{tSSR=*8=<|a%YF&Db<8nhtnO!FNqI!xs>Lo)F-NncBT60<#Y6Fc-urYrA} z9;91JaTq@2UP^TiDpI<+>2YW5tvBK=Y5np#QbxK?MRAkLlASW&j9LF3DhY}2;#!o zC~`_ah&01kv?=nCiW%3D9~I0UWm;7t46f&g4rBnG{Dk26D>QuN2_VeRt#dQ z(7aN`WW1v*nEsp_>X)k0TnHSe+mk-^OdbP4XRRI&B?V0$6cF>}|qK|Kn*~CrAF3zyEtqAB51r zKNW}RGn4!V=t96UI@|IC!S70K?3HTwxwp$SxIs0%&{cfLaUs|ogg22|#P*KE!u1by zp^OE}JQBsZ$ccGA2$qWPp(vq=!`o+}s~I&ZabvUh@H?9tv)w)iNl~9uqj%c+?#kE{ zvP42xEsyIW*XK&BP4r{OxTXALDEgz8_;B($VV^LXk}X;5tCfWcG?h23jN-f0K&r+! z(O)}6@P$Ic(~LH!>0DhUX5x%O>Pr3kS@WIY@$8}AT&n0}I2#p7mU5s@(3=O>cmVow zPBclALaX1ODBl`EQ~BHOVg%m*)piBzA}pg0OI6RxM({^V|MaxVL$by6CfkiF*uk|IewZX>>8&QJM@yK@ zM$VTuY?R}0g~yOWpFqdz`qY6EuGX!>D*t)9w+0G^4I7>xZ&?CC0$%20HRH^s?)hRb zspPr0-U=xR6YscYpo&vbD4IMOVaZ!JGfcUjiLbcnPzRi*ghV<|5 zq)bH(9UL5b2lm~mPY;X}kbFPWfccR&<>cf_G?@YBFtF2T8hBMj?!wI6`cBwtP~HIi z$=H*BFZ>?(mQKmSJp}yrskQ$}+V=bScT}$4jZ?7qg{z5v9XO+pu#du2zmQ-%W$PMD z#$_Ad$+c?yP)Uk0a0~3DmWM5@x8hb`r!IZ4A$|AJ1KXxu{^!WhKn#Vxfq?@U+_Ak) z`*)-A=@Zz|v1?=m?eN;eg2P_>)N7XrXTh=AhL@ze#WioD11R5d5Tym2k|YsTK$!ZvQMuHe5r*hsBx^$#;oXl4Jt4N5;VY?1r(=RlZmF zFy}QWCm?8iM;OxOroLMTR|NXW@dE}M^X;tPk8dIDdRv`7#Q6-*CH79$TTf33mqPR` zS_BCv#0#kBW+x;StQApfCiSECB;x+U(fEsj>cximbAvN?`;x`2iQpCLba*Sk( z*%#(ckWIC!$Ry8%5?VP2`RK4wWp&?7?7?mYqiZo+tQqy6K)&!6m9s(`YcIoIb8| zlC+wb^TC#;gj@HBCjOe8mT%6e+`a^3ahcVoXUi#hIZJ=aP%SnMFrHK4^zz;@ldWy@YAa#tZ=H=65D2$-^6CcQ3b`u0;`{JzlL3>4&!TCZrydVCWPCpC zutmppE}nnp{vGat9jqYgKOD0bikDd5m&dlUu6H~o zUwfFXbK5e09vvM8Vk4SnSb^t^4gyrkt>@dd-P4zo)4MxJ%>8BryUS%K zK;NIeep9*lmx6G2(sX(;x@32GZ0DP%W#De+`u-iW&3`Q|+*I21{|6ip@%B(k5=RH( z9INfkMQ~~Rdp85A1aYfQ!i8wS453Teu1aW2C;BLHY&RpI{)PR04GxqQ=hFB0$K-=! zDn!8^s;Q}yL5m5^*!jl^9v^**VY2vT8Dt@$(zaB9+?-f-pDgx-kw6Vl1z6^8dwA;Fs~ z^AxIgDMnf?4Y12W0+Dejppp!Uul@&F=?v@{Z+T8(Oh0+v1IyG7^}7Ih3(`ye74hutsT5f_6_A^pM{5b~ek zzR%~rXs?U+ET=N}q%S@nS9<$-W4id?W7mE^%AlO;Y#lxlTGupyJeU)H@0oghb<2mU zx=AM+;0$=Qp?Y#@S|N%CswhAx%oOt-1xPS|3#p-@BXF~F5%+sloltq6s+wLrxoHF) zwXxV$Exhp@d-_rlZ+cKM0&$sZbmWk9OTShA%fs3o z6>uH>`A1iy(Kg`Vn}(H5S-r*?WouGCw(YFQXj+dQY4;bjl^^UM?cX_kv8^FOmtJmp zlC`O1So`W0ZdR3TJ2&qVfQE;a&n$Ae;D2u|F?WqS^XwJ*rgZJMyXr;Pqi^x10r@(X zz}9YNnA8pO)}Svw?4vGPT`11B!Z1R$>4f1%JxH@D@*ciQ+qhkLao1ctk`pUTAfgsc zpvDyzPo;zQV2AD-Rkv3t;>6th#C(QvYH@bO!bevo*FtnW(qXj9=UeI0AK9AHR{V09 zF3&`eaoq-+8EAH=eAC(6JChDZdi^A;vd1({i;5_Y;q>fsw@QX2OT&_OB1dt_P!S+X zZbUTf`w8F%G&>@}y;C!oW7?`aUaEOFf#vMn?IKj6gE3E+9x$(BT!gzINwg?jJb6pD z$k<98D`TdHTTJdKkw&iPdRO3IK|=n9N^wGRj?vpsnG6rfg8bNH6a32jbmPJaMy z>`~x@7SM!Px!UeVH6{Xv70G}!!=n-VbegF5^mqQd zwab6sl4BT}%!VTFontTChIjiR6c^{dk9J-c5Q@t?fK2Gt!D^62Pa z&^E@vGwLsfJCQ5beDyDedy*ma7sCyP#%Y~!yE!5fd*1(We+GcU04w(|NDxCIwlnoX zYCMDtJ_aUk=qUe|JR!ybGB&w zdrtx=?o%|TUn@>D{)nX!T!g@t$VgQKmd^rDf(v>_GyX7>DwN}J`+Lc3!jgxAy~~0z z8)SY*lN!I43GoA8(-)BZ9Ji`lkybwAO&$>nBAx^-Zonie(Vuk7BqMH&m-hqsx~-O% z${&;aE#uu{wxG%O$^F=qo4Zjt-)v9XWL+Pxe~@?k%WoO z*$X7fdj5L!Z>;O|mT!wuw-lk1l9GgdU(zkc zGLJP$>RVfPc6Ne~T_RE_sf7G}iIZsdU&k%q7*#ERup#VWw+$lPgleM@ zDnz~eYMQ1Er0}7l2a7`DJ82MOmb&Lhy~wdHn_1c=(K6S>?}T+jET~G$5D&$k%Z>G_bfhQrlow|Dk-;$CAe7mK;70)Nh{Qf7GWEA}wL&LarZ&VWlA_Lw$U&}DG*{ZiTORm9x=hBR zDSnKpk5cT@n?h0bLnxtRpJ@;ND^ejA49pHE-=AYV1tJ)A%!qU1@AT`ooMD^C{M7=& zCd;@c3%%%h1dRs!kN&RV_+jouVA4K>zS-b%vfP}Ej|6bv0g2^RzFauxA#+Vc>+dpa zyMz(Cop*KB?)=}AyADtfnVhon1w1}VgsVh|(@$r&OxZE>>DB8Sf-74$?;rdj#nhRf zNF6=EoeSFlb5?ZsGk`N(B+oFRB4`fL4)yrov+NuyB1uJ+*_|%tXX!Gm{3|E`7&nlv zk^08wt)Vfo-M^oK!k-rr6tBrYn_oqN$sdZM?Nt;gz<<(`bXby>0qTtUEe?Zv6}@8eSQ6-GK6of zrvd<<7pSl4KYuy``UwN?yI8(ew*Wo^-;5+fpOmSoss9#Yzz!_tU+4b+hNeJ7C%8lu zet|@D{&kiedl5oiX3xYL=mS*vs3-BUZtPug65Th_8*YkyHP&hX`@GZ{;s+mjFm8#TzD5Q)Kuh2xBL8a=dKpl;f7p`oX zuFg5XD_OW z)>oMb5P7#KIA(@;SmoBTdE&tjbkr1pptC@`wa+h4i#rA4-tsc zq%=}S@YB;X0bYrckpEPm9lUy%)dQfBp{J*3Nm5c|1c+I`W1mZ441SQ#^!>Q1(60PhzY0`Ju{rnSF~`)soRnhBf$Ox(W>e-KP>8i>^r6=Q zL-vJNt+W0~z4h&0`Eo&}craK<{dRS`s`=``(290ozd;d5E%@7{Iy=|snSrzjwae#( zr~_!K*^`D|f3~K-;74K-Zt+d3Do9Mp=(n-NvmM}`q%9^Jhq<=BF= zH=-e;v)iD=pd7G*9_Y8az=o!zhUvAJG$HJO3Lt`Cif0)g>2z(PUHFq{`)j7oalzvB z^igvr-91@j&ERqB3*|4?Y}?XTQRmsxvZ@0;4Xf3I!BJc_7=QeNRgmi}}Y)mog}{BU@gwWbuH z6S?w51pTo*->$6#A*ygTWa_ORnO<_i7vtVYA}(_;oAuRMSgn|2vX$|jwPeH2urBVX zP!QsX=@V__4Y8eE+|b*bvjNw&troHOYGaE7u%+MbJ_OzsEbl@}v_$yqGB5hO3iaF0 z;i6+ufQ|QgY2>;O`l_G$svldsY@U<6<$Azp{i{F1xYCFkPV~udu{ChTh5cnA&9h2~ zt?`WzTQ{&*;GaVy(lcunK}9C(rfzpW`@X=XeETk!^mutfNrpQ}hmy)_v@Q-<8Ka(Pv*BU`3XoAAabzEnY&rxJBsJZnYtIt2c&PdaD@MUd7qWW;^*gXB@ z0|HHfvHY{70dWZU{9w`c_1ZHL9nUyP;@13t!l++e~Z*9 zrHnENrUJ%+)C}f`)MTIF`iw(R1#X{alDQrinffb9o<8WFlriWt5xf7f;8SxKJF>Z? za@etL7V}^`@^NAb>vyk!K}3alw9aQ!2#}>0W!csxn59OTF?mebB$T)sKzvOR7C>X6jAN@!`kiP9W$~Ic>3XT!lbropv9o|1kfXr~p6;lINbH&@? zGbTL_+T+(EjI{VtV-DIC+AKF#C$`CWE??3pa8a|GP z+-zL_1jJy#c=TR$6L4k@TGuzn*Yl%}zZMZkP0}ULXOAAFT%dKDmD*W;Zfh zIDZ0DyAxMo7u)AS;=yB$WMpi`HAysChI10ORsxU|qCQ!NFzhcX{~~qc z!VTOrf2EFY&YT7|I4x@8jmIa+y|17R5e(D8Fh4pmFcnbW{4W_gS^BN?yl1QNZ z=!En#`eZ)yY-+=$b!`91A_)~t+PyS?`25+Xu8+tEA1!b_IDv%nP`q5+TrO=qvrno# z#E3GN+c%-xC&^`~kFAq0h-A?UCR9;Y2aM?pujZ)tTT!y%0wlLCY2UJ*-)Z!@ce zg!+goZH(&{Vnx)Z&(P(O_EDu(T)Vr9vXS;sC_`@^BICmXvz5hvd*|4^A=*@?dSI*l z6Z6PqOejvdsT<9yuZURoYY_ie_X+Z1oc4Gn&3539^kCRTwtT1KiaRB3{B7RbSj7)H zOu{*N(4XLI2qP4!baJZTbaC008AUdO)DKIsWpT6PL4?$B^i7oT1E7fS!ah=_Jaj4; zrv4_uxk)#o+(&Zfkwk|0(h1+ENYXR)EgA#`RdiF&J%Etd&lD6ChMdHfmf?LEZwW1X zp&4l(ep|oQP!UyE4^HkQ=T4_8BfR@K_JAjL=~iXfh;b*h+^2IvFk4 zk5lLF^DmlfF21_6)oqKh(DYPVIl+Pd>Beuya@nQG*D82S4&M$WxK zs4716!(Sxj%%E6$6f+n~I!xom$O@N7u|dNHFEt5|LafDZtLf2`D14``gWlMjH0w{n z(mCVHRLCCyRzzCbW!QXnVCbw~R z#zjBZ=weg!2J6^LxhbN4k#17f=tT5d=BJzht`0{!t4)(8(6(PtdR7L~D<>xz@4p+F5 zQI1UT4vOY&QpuQR$!V#;rwoTi(G5%IZij}?4ce8Yi7dfzQexxIBE{SOxijI{UkAR{ zm=nTl9zyn#fR{B;86jxFmrbts$A+)YFE49PX#^L5E9oWs;RxOaAz5&)K zWoz{vrGRbS-7Kr^a(&0c9^z2S2d_;WuLHH8jh&Y%oRI6_bS@wZLHO~gO86GAoPN3( z(sKE3BRmldShZ)mZ6dASkGfs})^!>5KfU7h>TEetyH@X<{^=Tq?{tkb&Hh)MxmqPd z#ghWJB?Uwk)5(Pj`+5iBz;o$rN%jn~IP2~jlL(}e?z-%*#0=^k*4`)Uwnl;Wv%;gS zuuETqQ`GZ)F^eTtT|`SF59n&ZF<<{=GT zilz?a^8n1O$lZyeDQ)DS_%O?55y(5$qdX<6yIPP6l=5)O!9{rNf&F!fN+UyLj^(ST z!hsuzs)yev^XDuGdS(#+2v8E1+2sAexotjYI&`#i#w(mtQJGV3xi3~>K}8h|{DERs z3~l~VkOGL*&|C`H8$`}$#3s#NL1MBJBqIFWUd=I>m|>lRR{%=UuT74OY(nWh7HB$# zTN)8)NH?~wG>L2?BkGEW&;eSZD@#>C9b#x@#m(Fs^n$U2fH|taA zX)bzFsNq;7(JNgjVnk2))D#K81V-)J45vDcoWoG>Qkw-RJB5XX>FMbe1|jkN>H;>z zsc#>qzZn5auk%))qV$!7=rm~3ZvLh0;lj0H|#pHU=s z8EsNI{X>s@VM;~KD}^Mvk}b{ZZg^q*yVm4UPK-Z~AB-Q&3|a=0lxC_)rD$pvTQIvS z9!e2cKNWhfF0N$02fCHNmDVccvd8guBi$)9If8BN&?Xae;*Ku8#1FL7%*!XUJk+ea z*d@qa{Rl2;PSCvxm?T|Ya#U9*49>jv=(#>D6n^MuUA@`^%-VfNT|7gMXTd0~@@`T^ z&G=luJ-meL{PngQfMUAH^;5S?nC;i~xyu;1`6But-5lLq1sOMju8}HU5~+64i-zEDbJ~)whXP0=b?WDahJ#+#LuZ5Ut9Br{|7mb&-wg9doENc z(`4%*tgbRz+^jucr?c3jEqAfAeOK5`3PJd1htlu08Sq~zcs;t^imkA#);qYV=8Jdc>s8_X{Nvi#vydx)lVivSz$oZx5<#?`nZ3E) zwh69H|MFes(s8}xE_%~(qLzV=`zfhOZzLb`au?Cy!UECl>Dro5{1;%16u#)E76x1$ zgwMNi{6Hy>Br+elwd# z7oFg3N$IpA3-^FAHk?Y4+z?wJ>=(EM0B(wwZNU02?69RBi<%}wdF1Fq9uXSi8|Egx zBK77lbebL;3TlEFbZ}hL-T(EcRgDN@7d#rJ9kUQ2o7UmvIBY%@<(6IeoA(kd37u?k zx2y(R8kFDMrlk*He$~OnBeE$fDjsCmz8R66csD9Z-#LiGd?=r~7e9F)uVGMiP&QFj z;NEm=i}odIFO?1LYSA=Rq8NmODVbgN*;K36nr*bf*QfXSl8g8DR4v$1zm|UkuPziq z7KnxL!-q_Myb>unw@}ZKtnT-l5Tkf7EDws=4jWb`cJ8Hg>Z)IN^7sC6*1+DcAoF^S z@*YIC;G({I0uI@RMLJ4!MMOIM5`{!u1$&K|JtiX) z=ZuAgd@3zhP9l(NgqeBezudv^-@ms^06N)uX2uUc3erL@3btB+jjQp2!iz-s#Q90s z{h@@ZxrZ-Pa&l&!ux@=0x5mWI#Fs{?Y8SEsk?w$t)Pj^*v+&u`E*D`2Qc@W&-qQOenshU|4t%T=hEkD5db97{Tfoy~T#Uu6nGvaHfD}I8} zQ02Juhn>ETAy9147XxKEW%&>KQKhS)5w$bcN7e5VBJ^guPW<_;kV9JBn}m3P=~e4Z z`=!+Ou(ZW51VOB~n*+J55?|?8Zjzsw?w>2#o+$q+M~k8+b(fR%ar*2-Cb0tkRT4r% z+11az6Cj~XN|J8d-#=ckODk0SN9?nfG!3+=XyrU(q4rS6h=!;W>$F`4>+`qZ%!coh z&nGa0rct~Pbc+5s(eK9TW1EYdc$b_jHXP}Ni89-MW=>$49??=zphTMWv&hBmTJEQX zE!ha1j_F)QXcuFk;6KM_8&+rsejIkFG-_%7%5DF=C?yf`x6QmW_qRCn z^x8-g?WPxN*milP@!k5>r(M8%KYFJacj_IjmFZ5=b}=+|an-ciqnGKGJXK!hb6&r6 z-sw1YUiG?8arR^(HgvP;-^AWFys*-w<$~vi=`gxg#dY#CgG@9PXk){S3F}>s7grh9 zH$o_O&iUF>>eO@nLdZQruRXkzt^aE?8;y90|GCxvITkXOzpNRdQH?)@8nURc@$k#u zi;`tD$OqQ3F!Vc3b2=j>A?kh(HXv62)98tKPP+UCBijgPwQ=sVc~Q`tO2)-B64g2j z4CmLPyt=m3S=w*{$vuMo-1jGs^OWrg{nU!(@Ms?aZa!isOyJq|USQT-1|}|YAV>#O zOI@nJ@csJNkHL*NLhyu|hrtZ-xPYVv#m}aJ1!tZwk37v@H1Ij;gNBvRZ;1fFcK;Y3 zJ4e>~+~*hf`U$1A#-^yWCk<>nt8JaiL;PNI?$$8B`6OZh9sKbgcfu;XCX4fk;@p*c zlzO>yJ_Um}MnFzW`~#-0Y}$|c##K({qh2xsmyU&&$K@u+-WDN?$R$Sx5*jo4QU0BI zv-ZP&m3$>KLiu@|Bj*x^%dmcH&eLdfkFWWSw;bHs+qd{OMrCurB?toq}mpY z@x>*1Ea0SzpwQ|TeaPBdP-E2&9-5V#+7IC1YMt0YsPpG_K>6K(?O2OJm2RmvTM}Sr z;GOcGlRT#^KM-O1*GEY%V#~p+DE!hA1V*h|m-jy#44KyIoYlJ$l&8R4-n{uyDNspQ zqLdD_#ftA1GEHxayZb+Ejzdr7UWoY16+gag>E z%~Fjm#0AGc|IYP9ZFQEXv-?eMjhSEO`_ect3czKpV7|un)3RA&SwV9+pqw0Oi zkt8t+d%Do4x3yHHN@ITlWSCf{2~mS5Icc)BBJ7-^$Z-%j`@%n#z{X@FM*9bMxMpNhU-d~J$#|`%7-Y6(ynJ}tfI??ah8qr2epn>i% zr(M?9={jDEtjMb(mqOib^We3*{Xan04Bfc%t8c%G#NjYs?>XHzB>zNV(66s)im7`H;*7b_=Z+&U#GYk3^4(PSzjVAMffMA7DlN2E>oEH!FqfAUeum+;t|l zhGehFFPlOe6~@~z3LYOta|rLy`AGh3KKH}lhLdPbpTpWUBx2V$F6Q~}wL{L(i_2@w za8rc*6hdT3Hf?D9I4K7N+`mGPz3jh+u3$8WxQaM;#(y`Y{#hYhyuFgT zb4`Rx)Ggn?J`@^jwcVh0*X!5Wi^0#va3Hkk<-;}4_4`{?|EaF!(3D2% z!5sny%zVlTdUHQ^PoLt9BmTz)=$dE%w+2j*g0o}g$uHV4=`_vSa@;1)yz-I1YRoc& z6XdrLhSkg68?@pIgn-;Dkjb~;IVX+fNVbZH9Aw0$1?O0T=RSF-*L4H zreG#4h&t68hwz)aes*C9N)q-0A{DD|J6T`oC!*9G!fV2C$Zy3pR~CF2GTDKkX){%N z7S+C3kH%>kBMC)0`GXGZta`-6_C~Bz(ud)BXYIEvQHajF-{KdxQFxos2$opWlK89* z7vHWDF)pYOR4ZjE4}K$_7084G%P17lp9Ru)fRc60L;XAuD`(L3E2xan^Vb#5RI>>3 z5GRB9Y75dgri;iW4^)xAs5>3W3rIj!V&2hxil0Q@%kfo_#s`s6k!^rK-9o$n*n1y% z#%&=x5N(F(8qPo^ORqu`)ISGzow?<|SriDCon+ALX3URnzz&Ckqn}$7#|q;u1}VgD zW!J}vt8wAiU#S);jB|667F}m>LNL230PI{&r!)rWOLej?dI3dU7t+7-`>y2Uqg;<-Y|HS>!pR9QJSEs~O0ALYY-kj!lnF9hbZYiq|es#y2QMoQ{^ zFxSt?a1y5!9`=Hx;EvC^57$77mOzI#XEYr zCBo2}$YI0nOFAi9dWm3}T79bm>2{{=Qsqb(%Ew09N=SP}R%EX^gLUt_xX9u-(r|`P z+nnwW4$#fr6)v6aMowN9^JVL%vWk(HHwGLb*6^^eHdpTkh178rgc@;!VIx{Ccv!Sq zP3kIoOgfI6>T+}cK}gf!w#jdOZgZPo%U`vgu7=iz>R!%%?uz1`X zwZ4jE0*+Vngt7DGa&Ny~v0|?w3~zcq1mol+gthTxi~Rpzg|Vr|e^nSD@+k+&*&>du zIUBU@>RU%elvXk-pG$)=vU5<_!jm^?aeobLtdR5c2lkcyz6%UYXNT43aI<&qnY6i2A$yg0Dd<%=~03~o-1J|*Od+-XE(W*xh zvB*e6p2_o9U5GtqR}FIJ%sEquqpN=E_`;j=&i4rHv8eH%IEiCFhvJ)9d}ENCMjAWA zM?(ph_Nj@UJWwRWf(3_}*K&8hi1HhSCNZRAs-TchNUI=~-+emR3yIDQbe1(lUM!ri z^{S&$kmSjZXXx@DQT)g*Vd0lUO-qz1-XvX!gt^=i&ShJxZ_QTE-!|5>Kty1n=i(Z9 z0q3`bbe*~S!$9i3ZA)_Y-3dy*tKi|jbygMBDbDO1e46n|Xhl_}FPi-SGs4 zTIzvl<+lpR*>vln@Kr0{sWWmj)J^^7H9)l4N{UHNZA;(6L25FRpT!c*d+!0OE`Gtr z-t%+k#A0gIgB|cWJb+&QNR5e(2F4&_KHKH>{3L^aJf?sCH2j!ui#!w(eteRocoL zF;Us$&f_JptqdWXy{=fUU21n}+TPss2libjYi(n;bzQ?{2}{l^248#t>;Pzq%jW>{ z#e?WhS%N>aw>E|={qC@B9RzTt17vUJ+~bo8*uik`7H5<9}|N%W*sw}<7FfTW8}F1wk4-z$EM1o9axm2 zkGH2=L#~%IP8~`1t~aiS12BfW(4E`M8{|*^jgn9_t(?@G+g08v*E=ukD(>h^6@=Jx zMC`Lfp>hBSAu%$UU8w?Ir65;^MS^ z@{wvt540NbG}s~vUGkFZoOsQ$C8fR6IY_@$bEd)c-~Jy@-xyt28+F^*P8!>2%*M8D zt8rtav27=f8a1|UH9E0KlT{=?;hiM_F8Msx#mQaEv{R_QA9`@z&y^J zI*E@C;$4%T;S(RTKnqYQQ=!5V;6WgXr&|}-4yrz=Noh6Vzy9F8>;6Rl(iG$XAxpTxIxwOG{2u=npJIOKhC-{$H!gTiOa)9-I+e#0ZF=}aHJHri@sDgU z&MZWB(^l=5)saa%sAu{nZxGdwlL>cB%X9V~kD&QAUe#aOWO$4k3K3owtMXjOO%UK{ zvgp*dYQTyN1vWhR!}mX}s5UBK>*)RW?Ck|d@8q%28xxD)uH;p|I9~wc_KSF6a;wk> zJP&R}WsgrR%O`ii+qCf^Pr#)v^VVaz*AqYK;dn&Epp7x4zT@XQ%k>}Om#63PZJ%h* z_hH~+0_Y70_O9Ih!;M$(GU&+D@QwIS35=myPbUAv_aIyW;OYU4VcorPQCYeh<5#rq zwDN#w;eFqp>%iN;+aPby?k5OuH1H+i_Q(z?{b%N$U4k@;cj$k+6-S%EfTs&l{}CQ8 zq1&W{>wdI=#O;sGZJ5GK8NbW*=3ki{R$U;|C!uj`;YWyoD_WtxHBu1Lds1u72^wX$ zktu%uCtdFihww0q;HOpoYdUa$-YH_3aFAlp4yk)2QFvg}T4-t;v{QEn{0IXc1GYdX zz|-&UBfxJx1HVuH1Cjr?kEho#ZAY&qW<=XoaoprK>afmrLOO#15WtkTh1SbL^S~@R zCmKW3qyCWLE=B4$8q39ILo50XWfYdOg$$d$paJiN(uV*+LVnGWN)_K}QmpxmM`4U3 z_F>4h04WAD*&ePPf14l&u2-=yKf)Til^3Wo8 z;2$OP_ToU|BPtGwR7a43*VO0ZLcx5Xw%O&{E`u)&4a(3{j971>+gIB=PbwHyoonj} zDNM47I7?nap>nV0P&>n4$R;NyFGU}Kq%mdLo4(8qvK?`Yu=5(g!r-K}SfK>O$Gjcq z=7sh5Jgb87IexhN~n%r6Sif4^~l2Wthug42MEVS;XHC zRZ&D9O#XKkxu0@gtu?AWG8)SRTNd3PGaLd|{QVa*Hl4%ceVC^GU#On14-2(G0gvQq z3sS$vX_q1CmJ`W(4n?D`=0@_qBZcVG4aB$_Ydr<~FgByAWaDTt}s+jgypC5IS z)wjNXX!iqd-pi$u#3!YN&9ze_j?+fSmj)>r)rY(xU{DVvIW^yG`jauQIKu<@T5rRdZ zEJIL!nL}=iYxH2-VnCizuioqbobnsZJ_yfTxH?4%GoCE&ZG#!UFCk-=9dM@EwZCCP zW_Oh)DQB3EA3S#^cUu1zu0Y!KBP!*R9&o z*8tu#_b0MKAoO6uyHl%+FC8_6W9ezW!hP%d7{u5B zthgQIY2Zt)wH5KD2)a7zdj!nrT>Wi!Rg+`-Zn=qs+x_*Rlzb)FcP}mX;u?Dcz0uA$V_U|Ww*?mtht3MmF6QD?h-JU zh38n}a6<6kG3{FIy=VlUam~gmq;m*akVRTcg%)dkHLzlalbe3+Z}zFST67z@G#NPi z{WY0Rw}f!FJdVaRpU;YwbWtE0e2V@=wQw1NsqB(L8`vE-g|jE#mK648qnlshOgH(Z zGU+-6c!QPrd0jf9AWPX!xE8PfT6+w7`H+SE?-zDs+2^tPg5-CVwY|4E`%Y5;dZvB8 zrxhAJF7S`XHTH}0Jywa&KMDgWzVDzBx!gVVdRRKJ@V6Aau=YOoJl)Jc1*95(=D9k) zk8V#cZZimYEzpKrYdU@DMED4%UV2X>$Lc8X*6A`Z4^iRU=hiNfA^*Y9Gy;pqN^FtGNOXK?qO zPw1|w;CN8@%~a^|g|g@TM!MZ)|6KUfZ+Zu2MraUM=;7RNkAw1+A~t1q1>YFbXKK78 z!TDTuI1og7Q$%R@^Mz#V1CI3gAddBYJIHJB9h{-_e}Q$7gNUXMQ|jXW5Hn$dfu~Uq z+%Z5!fZ$t;*{qobPxkdH@=)x@0Gq|3&iIZU_^ z{9VitF)D%hN-uaJYUtk~p{;=k!3FcsIpjnr@I_Mj%stdQe(cj>yU(Da}9)LDw*N0GaY_SPn1i z0@H`B@CgpgJh*;d2<7-5r#2Gj@*B`~T*|aVoywFfTK=9D97k3#lZh*oO)4!cLN^Jj zL^luSjh1>dV-1#BVl#N5^htYXixyE;Q%l4)IyX?oU4nh>B8!u^hl`~&`l+XaXNXTIXLs2y32j-H+(FBLU5t3lA0=ErpSpvA^UWVO=uh(WK2{)}X5Y$X|Jm#}j zLo}~H!s+g|M&0I7|7{(4e7Uvj-_*^A=&(n2%HW)|)4N9OYA$3NToCqW7}Yp!f=^Wd`kt(Gby zrmsw2kvf3?adrg+0pajkY=DU0#ZT@>iy;^+t87 zny)?E$J-+hP21&ej_&p&@HMfvpM!@^rStX#sHaP}|9iOWP^`k2o}S$tv7-3?Y!(P5 zJb~R9N6Ar-$4^zy0v!m@lYH)eUfDV4n)n#xaTKO2U(09XzG^vPK-8T^mH1wN3-8&t z=|0;1F938u;ch~H?qDN;j1u|n+Q;3D1%ClFF7SPYZfkbA5gglo(tM9gvJ5FoZ0-n`ZcE_^55tv;B@Y zf=xzsTQ73=S8cjtQhB4s{blCj@YPMs#vCExrWs8X;roE?pb3=tdRVa3=}B|R6Kb6~ zbx|e~*rX=*7nwUG!qymBh?^JznRQVtJ^-I)r(_g03DPDuYES9r3|0P%%RD<-w^wocee%_a+F@--mNA!uICPTtuUHN+K|V`2P+vCerl7=VW)! ze=$8#38c|F<6C+(`du{G)BF2|bh-#H9m#yg&31Hnn1&q;zT_ZSPCl9qGXJVo(n0C4 z=9zs~bGG-Kc}%nRzR5zn?f(Rl-t|9yu=lA;J+>+bCbyoAkX zIS(oyK{9il6K!oFogS0)7>pgWTE!l}BJGGaqr-Is`m90A=_4lIkS&8810Dx*O0z zMb);+&iwT3x!`6R0psxCfM%t3+YB%LZ5uos2zAu%gH&6HZ|B7%u-2L#V`z^>MEm52L zk2Ows+B&){@sb4bDZhK$I zt#ryl;bs`o3G6=p1Ey>_!$4&w`3t{qMC1^jGCN&o4v6^G4szG*xxouw&H({EJYsyD z+^k61%|B5At2V4*V~6jX?)~icoD+Od8Rs#!dP$Su;Pmg88B#H}!pZ=E2cAxi*1dlq zHE{Xr(qq!9vE9s#11v=)GgoM8X->?d$fPd*?xQ z31RdROFXhcrLc(JJ(Fk+Pk)I@m?bC~-Y9n-?Y@yHhSxN0zi9KmOC2Tc9lit*3O&8J zaszy0UqaZ7cRWJgvgNJjlXoyAy9Y=8Di3)R(J__81a|W&@?@JTVcCW!W$>si=-lC; z7$b4Tf_qe}LW_bK<5M(Of8fS5f#v^(B&%t`cbh>uNgF!Jd{!2fb5H!s{(DW@j>yiJ zrOrPYa;ve$a;(vyib-^2a$pCtDnV8wMFM3!@rW1Bkb2s?pFnqXWA zEk%`$x-L}l<>lmpXw#V1CIZ~u5K;a_#(^S#CM}L{qtu#I73K2Kj(pe_Nro}yMDL`a zK9%)pZ&cIw#Ov>5scm>UO z=$oMvLD}yon6*(&-$y;Ngb2f3>moWC+qMahkL-gDVF5DO8qf`)Y$9AltAr?YiO|tt zmweb3NXq8Pv&JZJy3mEhS^%(WK4Zv>b+23X3>XY4nLD3m81--tCSp0AfBDW{-#mR{ z_TF_lzvq+gcINB3HpBwo@r@C~rmaCZy${MD)WGw=S6xTp`v|Xh!@zLTmqux#1k(L( zVNgK+%QJ2pR>86T=#olt$sV`=;`_jsB<$mJvdeu z;)iDRY#RU~^$Ri%xRTv^9}LU_+uKu;yfz3j;$Cy)f3AII6})$T#y)|5jJ^tfA_(7Z ze;{lOeU^%T6nz2|+NlimgkKN;Uzf9{d)`+}}>ylTp@dROYF ziD!iGV&2aSK+=9Y1zj1&?_(T(kF|f#J=xfLzA|kMEPl?A?!JA#k3P=@ezv}3ka|Vi zDfeUn-##V>!FUw7``a_(CBq*+Pjr_4CHgJ z32x-S|KNoAe3%Ri7A&Wsqm1f`&0=n=6{>iKjLiRu=y7v`J6L5n17_j@UyfdU`5 z1Xh#xzu&#(P3c{AJ6(3qqi_<$5tG5Ae?dT}5S7J_$oZ;|*(_mWB|tJe_82;fftujJ zCx)RlhT*|GLa7)^oGuVU{ex3f(S}?)RYrB=i;BS)bVh`r)hE~vLQ4k+&Htwb=qB$q06>n6hlq_6>D zBAPdBg_!o%p%p_m%Wr1YQD;%A*GAV2Qg|-WHbWRu&>G1m!W7WC5)um6=R{@BJ9-V} zo{s>&TpqRHlyr2yOU6g!85fU!K4sN=9iuI|zscXOtZCeBRO?+0dxinlG5=BLwYByxqppg3!UQ{sGsY_w9}P0oe+F}GE( zrTp2~(_&vz?gfS3&XxXm+Kidb6|BEqgfxfU@_SeLvkb6qQG1Q2)TMqNcSz|8%zVPb zX;;%D+tg*WD~j?%V^7^iPI6`nm$^H+x;@(}pvLcthf3bET948t^QlW$-36Ez=9~)_ zs@S~8nF=(EZesB2!_chBeiH>%7&O-BrTRRlneXnCnd?k1vd5c3&^d3gItJtJz!>}c z=_?HGv5(5yoH9|(^3YGWIy%JY0VcxXHr;vhW_`8qC+X!m0aY1G2__{4WvrXdl}ww~ zUXR{a9ehWb=J9++gl2CbV2|r*Y*FITmqnKMQ?B}EY|Fd0m=8tMIy>70TcY|l9#KFb zg%svjdvEiq%&oFN(N#6X3K5VnO9Shb>v|zyifwql!LTGzMAYbi&AeC}S+8`@4+Kdr zuMmcps3S{{M&8fq1S!>rWeG8uy-g)yeg!lxPmf9l%8vcdhV`1TY5RV z&zVsnn+WV)k}vRUgU^757p=_(XS=1`$7zO7MFj^kzrt#WOsWWBjx%Wvdc8Kd`M3Yu zKJkpTIwdUG;#XC;$efnLCRwf;8XbLc5d??!8a?7ozTlla+DQdd&Phae<2^$(;LqGA zuVWY62ngW%-7+hb*b*lkR8(}#daXXZXlP_1LAk#?0%x8Oo;lT@w17jCmUrPxSD5z>Q@=YKu6;h`fp9xFC&?d zL4yQ?QSFiRaGW|v>QGe_43Eea|1^C?RGrDp>_k-Sn^l_2xr;fgDzbm3RH#IxZ}N{4 zS;H`xc||$-zk5D!9A6#eH{a=c{SfvIeE9L|i+enj3JklZ7&YP@h>7rYUfcd4<@=nN ze$Kg1GRj4S%=>zc0a;+?_@V4@@FhAiGTi}^h%ZtS`)F35fW92!12$S z_M4K8u1}t}^JSKwvsZ=&-u;RfQ}$yy;G1!jxgQa9b4L_#&F%Q2-8-4l%XRSVECCub zzLEZnp?zQJoqBNMPcV!L1Z*vE4O)}zRSJWS{cf(3LB8ALzCu570B^dJs#)go(4y}_ z_NQiBH?E{RFUo#tP8+Wge(z^c@9!vp)w^^++MW~7(YBTAlnXb7mI~>96m0}H;~)FY z?l4ew0bhWON=1V6^-knNhs6`f5IMKTbe3(D$Vn+Oh|GdyVxiG}P5mD}8-t?CSs^D~ zGwj*O*da#0WCUy&x(nvb9>$FXZN(iKp_n8=gcYO_dT!FmaHs{Q8VxiqOi2%t4W@+H zh2D}A!dwwf-`e?bsJ@3)?>EzCfj&2Kbyba2x+FTOhI}gxZ1Qlxa zAp@rw7Rf#IA(x1W@VY zJ`rJx148o}NZ(E;iwl;~n%s+%va@AMlM&;RDx$S#6W!(bf^id+5B}mM;?0>H^rAzq zIY;^V%PE-g+P7XuG7<2|G=th&A98o~0@FTX@ra%@Bb7hVse zltzXs5;HHv$$7nSehg6w>Wt6^vM9=`P=;7F4`C4s8iyT+^E=jk%YHT1n9PTmOEItY z3_rPRqOWv5-%sFJ7wu&7P5=j6{bd<(*Tn#llj?z_Y;UVfVEx$M9)qq&xW3oyO?rrH zyiwig?JYZ?#lak$GFM$w<3J%uI~*+0q=ddkHZhyLkn)K)?c>wH&Cnp)kneu#!voUI zn4=aizq?|45zZ?JU2C%1&y;)$;K{rC&>%g{G^k3rVrzHwJAFDmqyC*X;F$*7QF`N; zZTKVylC=22CQOf7ng?sf$=(@B|FyJ>ilVEE9yt^pT$vPTu%t>bO6K7;f73-*4v3sV z(W7cw@)A){EDDt6A*sEa_;Vg|{1a_8>X2FKKWosRSuFAey1BUDA?E;k^aE$`F*Js- zF*+O)ZD76UPoCZ^bfsdGs;bIn4K+0vSu3y9@}^siC)8Km?=lk4W{(teJPBl; zIvoGIk>>F$5SMs~I}$3^a$g8fdR%e7+aZ6!#BsmW3xcPJUxig&3Yi-C3io|3jtfc? zJ@~Pex}>lryReBbdkcW4US^s%Y;!uwo%*}z5)~l}_bV*5ek-%f3?E>%H;{I49wEGh zvbs*1W4%87j|dW#Z{?Q zT4LZrCqm~^Yd!JCJZkQ41B+fAi4m!az=lb|Nv5$A^-r~bGyDj)$oMc8*$WTEcJ0eUKyCRJAq8XT+uGf0JX!@ zs^H~?SX9B$#=tuq)O;eg*cf}^V?XB3mme*U@-w09ze|q!Zl1NDZ_6EzO&%ebdyn!A z0@o1RquP6D-9BuMe6C~sKu@2Uz3sH-*FEQLkHTP$fl)6ShiWQW;-Nw>)5gnT#l1 zQH3g`N2hS4yMJ-nA5S~m_cn0S=``rQ~A_;#kwBxR8#BG+SQnzbHT`hJ>WZEDl+1 zM`#XZO-l|YbJ$4tz(obv5bHtw4v~|fMmPM`i15OTP>lD_angSGqz736WQ;b>@@!;f!Ew<}t~yo}|= zZQQ8j#M3ElsJrqaLsvRI6wMM0+9h-tSYJzieEUOlaPP(La!_?ZE*3d7BIEy|ew;+S zrsU$s&#r2qxj(}9;w8doBtl5d2?2e}UV?Y9WEwdHVO~?|PFx<(&a4tH2|Z>J8AE&h z6VXf}q)_Q*iMP@vi;{)?mPtA{xq8QPi^*8p1=<}Nhf~j=4l+5IXQ0zTbgqiTHM+leB#tNpfvt@V zJZD+k9syv1yy3y)5l6OhhniZ1F3xnOoLTRi! zb3o+0p<0vZdfcpGC%0*{N-$b5&b8~GDKMOAQrE!1(%YM{d+SM$Gi$#F`FQHAGqT=t z@+)|CXV{t7f<23s2(_~6hn$sC`rINOoI=#JVX_+qXE_`5WP@TfR8&*00R_9~zT(Ql z$JIk^?>+yED%XJjit^JdBl#gu*!dAD_w6Dz==b?1r}~<*HmxD#r+s(-k&oi(w2`ul zCk})f(WU?`Mc|4t&*nY<@5AoPgZ=JuIz%5qfP1NJHhr+*A#K?C$R;OB%6RCF*OZrl z6(<3BDwI@JNvD>Vm%sPvwl$Xb9&3n2SJ*AI%|3O6t*q(zFl+hX9!nuyv3^sZ*k^nJ znzf+uS>MGJ)0Md|Uba>Wmv_i_vJT;u^CN0#aygW&wWv1}6Gs)c9wQy7g<_KQp+jn0TcbL94c`Uto!%AE}BR~ePx}FD51!+qH*)kdI?`owx zGGG!+%-`a&D;YdqYU=Nwhef;0>s?GGRaCjKF|h|B$(6`?$^{$Aot{Gu4<~Bd{1TT+ zVw-mD@BwQ(W!m}Dt?(Cuy2U;7Orn2Tp{%#sCl0zGi#jkAFRjB7Pnp4wOUk@(0rt0; zrI=tPeu7kNMERUgzXx-yywv;<)emqzw`IugplJ#x z`r=1}9ac{FbE2V?$(K{IkPaIor%{*OW<{EUT%rkKsEmB6g+pyqgN};=B11GEw@rZZ z4!?c!Z+rN^sK`;-Z0Q4wnN)ZQc6s&nd(P=1z)Q4*hQo0vIsu0qCeG4pfhX!snGEYC z3V{mKwiOAf$zb4-pk%)K4_Rd&bn`!%Dm`Vh{b*hpUchBz`Jz;R?9B~f``JvgxK7_U(rF#fH-ma|z`MIvcQckU%IhXZ z@5xku*XNBKz4ve+*XzFP*|2|`chTneDD_2fWWB;l1ELn~<+p*`uA@h<)^bs;%ll5F z-&+gN`DZuH!P{!U6?NBs_~!i#-C6iJ()Q5~kI?ur(17-Hjj;XstkqSN?;Mzhjwk%4 znEyUe{(N98JUX98dIZH|G>ZZRj512ko9}i_wx5^>CU|}v8WMe+T?)9JqY8b@kXmkU zuPHiFmvj8=WgCM_)qku%PG&TKK-(Leu-ECjI5eK8%18ts|M3%tSaxeueo zGrEy~-w1qQtL>~F67*rj)%c_hd>j@&v=gE=*4-7MV)vMh+O~#ZST1Ofi2&VuzO?oR3cu!m<|n)_%sQI@2PTE0lw%J6v_0po z+%4=s-Gyzx@NC?GTK_t%e63wm&)0qC3)lfyC~zTN3xA;X-m@z>G=4v#tq8c{d07j~ zvj->ZjiOG)6o>Y(LvNwmB05ju?NqPTiD+!&?}gWI&!F#IY0Q znEYJrt3XirVW~mCKN9Na9@#chs#b;ZZwfk)Ir|#C^dqN*W^Zb%ain7XaD3zEKHA3&JIC&F?4)dpj=? zh4DVP0O@TgRvT!+@g zvWfAKW1vyNVe9`>_0f{UVrE#-&OqK^Q6$PMZ>IARwqGW&zJx zM9vv(-}K9Cp?{vF=hkL1@gH^fIOfMVCV^TGW7ge)qSDuJ1viBRoZJJ8q1>wzdBs8!B`FW1kR%+OYn!Vxj(6B0^CKG_4$UPn5|PD%y%Nw! zj@j9Lo>wDIqA(01S;t*8Nwsn3GWEg}@(Zci9!1!p6Kts1A#h!cZgj(E5tp>B{6{5I zgI*)$eocf{;6MN_#kwe}+K@G%s<@J1Blf6@*$Vq#3q%+XJPJePVE5wM3TygEtS5-~ z#w4crq^|_Ro({5js1v8KQIk1t%XK~r_N#*p_ikIi zJzlCqK3TB;Wuuc)%KI*9xDq?MuV}jXabP7&Qzs#iMLiLPC)@eP-_ES&x9O#xdC(Dn z)vFlf`JrbnwmMOutt^0l?kOjXIq{# zWB$5U>A{^h)RLi&pP4lY{|_o$=;Z3Q%S$<>&FpqH7Anh;FFntgEs@zbdz(?RbsPO< zyFo$s9i>F%Vv5>o`?GOh{<~CMMF?VKHU)u0*TjfU4FfzXt=N&^M?iuDpe2NfHH(vo zKOk2;!iZvoBn2_ML)5uqEdZ`YRFGhJIjA_|M1`W|`G9rV4H{@$!ywYs6opt@_J?Xdc3o#o4ziI?ig~eEW6zS@kIyR#G zp>xFef83gs%Sa*7gOuJQO^0O2`CO3SqtdL8fK2^@2(a1rpJUyE=lP+R4Y0dY*}eRc9cw1v|~QBLQ> z%hpI_b8hq2u<96J#S6~ zkB>!u>3nO646LTopZ-)*zAX}T;)-h;8bxJgQpG{p1JdX*Ec7T6OiB>^wr}DLt_|vD z-FnyXGNqTuL-2}0RSojl^aQDRP>R(_Cy`Yebp5{Awkrzm>s;Rq<>9{fUUHPW7X4BR zghh9g+rjt?kc~3g&uVGN`OKmUvo$Q&0>fR65H6R{gK)r|9hK*GDzzPxCG0 zrEI@PFlqzp``vACj~CrQ1l;H0y6ys6W_@0nd52v+w+257KnyI=Dn>4pZ;!@fVcoic z9M1Y`q<-nvPS4(2+AY`NIj*{0LB!g5le0zyU{wIojBgiQ zYYSUX*H+>d#S6dC*f8$K1SQ(WY)S!$>It+*Z2*{D{cc>G|6jM|Wj}`+pMEMzN{AMjY-1CnFslD$Ic#a#Rr`6GA-CkiP8HlxitB=bZ ztyU~w>fGEPUypvj0K>I!ALLeg`MD)xI9OY5o`M&{d|$ZoKyOA~-a3gKNIrVGLP9~Y zY(0U{#zJsGKnLn`AQ@q`hhPD0qsvS_;SAj@VmVS>dAr9^Cc+z{Z7!b@saCK6!&+&u zpV+W$2> zD2j-4-qZJVO&F{#NAzoKYD`5LUP*bctEB{lR|+NK!pwo7;rXygvY8fFo|tzJ!KhIt zd(2e&DmR1Z$@rX5Ck8UBH70K?4Eyq5)82D7H+K3w9;$sHt3n%2jCO=~k=$&@AYOx< zI$HsjgfBLC3_2e+=ZYd+d&U3CrTDd>e+#zkqW@XMi-Xrz{IO9gN?;4~imEEA_8yIJ z@e4gC{h|<@T`Mn@pOI==V8r)@>@LwVX@C)ea+9k_iooV<0uh5eJhJq)q-L_Blw0<) zRCW3!TVgI~FbiF?#$V&I6$T5MayP~cUDO=9TESx*`kmq%zG^jawb&=a-dfD*m}mxR zuhb#1+3}AK??q%|m^ZuRkm(yhT7As+d>+b(sNLeg3o&Ey4hE{AG0~FgId@pVnw~|M zSw7uI@avoZ*H=NdVZqPDf4W(2W%iG|DS5@!wYudgR?NXLW>B}ZbP0yLp|U42Uc%_2i$L7r=H2Ntvl(#wD}@yEEm)*p_x;Jr zup@s@*0jv^*YfYC4qJ~8k9k35swZq^iZaj1WEl{`W>?vLE@0K4EKE7{Ljtev+=t-U z9%OwEbPGuCbtC{2UEiZ!%p-y&ZVRS8Z@1&h346{Z6jUkbVL9V;OKYlrbd>aY36xba zbUh)%LVd->#Yct(zktUSFt*ts!t;^(Mmqxl_Cx}<^)^IzFzVQK8@63`@Yp7F2%e_n+nogvllXmoqE+y%6L z4=s3dR)V-PThGC=M5X-Ewk)v><( z$>TTJLaM_r7&+NFqzNtO1PDqOgtc!0T3;~US~S8Q8FklLs5(muqeJjTRpdG1%GnrZ z)_j5@{Ta)Nlp|e#&vM0xEHgDo zaM`Q;F8t(AP&;7gKvN=%mO{e>k z{9xW1Kj4j?MV|2gX#wimK6cUmI#?p%PBZ6y7Lt=>`1&*o$T@H)@YL~?hQ1>*djntu zoPqlSuNypVmw%-Bfc`N0p3_$U_x!Jrmh;|RbZ^`RetvyIsWbhQ-kOXf?MOmuVVO&L zBqY}_-S|nb4Vn;^eDn+NylPKT204Akg94vSnE}0*TnUH5ymTdg*AO(J^q=g_y+_lh1xkSsJgc)UceTf(Ta5#x;7x}O z|52lvPAg#^p#P>$y=xb2?z}-D_ap+pV>VA2^!9D*~zukf?NCJgFr~IgRSg$xVUkf2E4A>6$*F< z`iJH)Z_nf(-V>;pjK!e&rSbRp5gG@W#m!rcy;@8;b6TnKwe$wEFzUAkc>WGP!;?l6 z8pTQ=J)mxBcp8AZ027A)cnriFXt<@l4&(zTw6A=l;n3>Xa*^?s7{cwu8w7znebRM1 z>;AuZ3KqV^_s7SyKd&Q@3ck}R`@JIh`O}y8){jYVwWQ^eUc3%ukwf*8Zp5z<)<+vYC1R?;YV}S zoR@jfFN6w6G8*2cFBqa-A#|!lGZF@oI{7M*7+53}k&Qx*#r60VW|lv~;-Jw*m!Maw zqA^7w(J@>iIml^?BZsn%*ddBBRZ$3+?=@6#I?;j<_D~Q*@|-NJp%%9hGwZcrilrnd z=@e>On_V3GCeRT)b?wUu>R5csKP47!WyEm!S2FS z6%-#6SK2y|`F&DKp@kERxVMT$6M}>)N@ihmFVQSOhdKnEB0=3m6@WhE-B3+7>x&&m zkMcZ~bSO3^06{6JH4Qzy$XQ5`Xk!7B6uGDWQZ_0Z-+_D{iyHMK{S>{ggjt$Xddyj9 z)HZmCh?9fTr7H4>cPgRTN@NKlo?DX$b?&qso+j1_fk+*-W~-U{m1ZMI={oN~tD zC{8t<@&xu5u;kR_7Eir{VG+-1I15P`L6|M4PR68<=9*B9(NDQ9`d)5h$SFjx!ow}My_HQn=t`$#$i}<$L#Vkq0i~2l ze`Eb6?TG1U^wzrbv8csrk2tj*jE#1B)~IM7Q*aJlW2f9?;*D0TS1T8-NGX>OhKnNSAhJ-9 zL?n0XTp7g>LK1@Nk0q`O^$;)T_J6@p|3u)qY7vm_EY-N+%}2~fqf!PYTyzZLnCzpl zKS;K$Bk!vJG?2)JH?tDJEJsYdcCZ>eAxq=$tA=Z#)c%wy;cSLX+F(TL7SZKA&{Z^2 zpo@+zu6NnyPqvXZOhkvc^usj}P|-+!D>5)3m{VTgUf{UKH&1DlPI-0Dh3EAdw- z&!_h;U76^4!=si8a$c8Ig_>KgB9zx<>1;f=)sX%t+Sb~-v0DqF9ZnP$wMHBZoTo109-nl^}oURL|7M24~%RX_;O9avz%Z(#M2G z!pY9CVk!;xh&QHw=6Z>oq#H<9@_=Z#KDl=l)8^J%lFOlH1O@qTyrk>-uj|t zN2|qfteLbcn^+(B1!TL6F`$~?{uI#T`X){k7vIO=%gopJl2PDGkm6E4S_v-RS#YE~ z<5qpQ{M&CxOSKaOln%arftw*?>A|a%# z@xR--6~X%g9^kCjYyAja{f=PL&mGMRgKtOXle!_VVWYf0*S4tff=SdK#`O`GzS_oy z1@7`pmt8@2LT*rj-rFAnuE(1j{?nb_Ph0-qTrJ{BX^skPzz}Um(a`}1+TECT?^wna z;rtTU{=*=Vphu`RY65_38|JHaYoj&LF?}+Z0$0yl_=_S(i-Xsna4U8*|0S{y;tSqu zRLA!|X|GR3ZI_2(Uta6;puO1be7R~R_JC85p6e)O*OkBWx*zie$E4d`5jgA3bMy>K zM1HxWSM7QHB)aCj0t36^qkKMQ5*Ky#z^=ePX{UhShS=CPI=+uWS8ofr&j;4PLl;!% zOYpvh@J)|hyUXVQ;rjr>kaILHX-5Lw)hntek-*GrrMQ5nbDSx$PJr*oYqR;tw43ge zVtDSbAg=4-?0>1(J;wXWJ>G-`$(IHlKr3dQ`}+-i&mdBCXp8ZU^H@@k@ficqW#ZQ3bV=KlVo(%#VRVqe^p>cNkDgn z#5`usBT}3r*HAvoU}F9vtGVXJC^F*|C78iTIeetb)Ebug%rs$RUnC1bXk3b?+AUIW z(%niinQke=wN-Jckj3k#KNBPTPF`FSv!I6xh0V5`ZGzC`@@MV{YAV}Lq8Uev7Ko{D zy(_U0X%zq-+ZFYu=&K={9xfv#{#7CyEd4x?nEt6x=ozA?QVqgs`WD8U!e91QECJyz zg^Ui3(GOPAs8p0|v#{zI2Nw+{FyJMODLM9|WF+1|Vj@uv*oh)jllx4H{87PV!xW1Y zQz0i1BuT#CB#o1neDYzeSdU%^0!04@>z z5VL5)`9Cy$V|1Kd+jeXxjm?Q|J88_uZ0t;IHcn%!vDw&alE${xaAM<|`}x+p*36&z zab0W8#<>ri$6f=cNMhiy+QQwb>&GNSsq$%{Tp=h^X!Ofsr&#fI@qNvoo&Jj3lr-Em zVrJBs@~Fe$%aj5J;3D0w!2wReMs+HyXKE=l7HVCw_)no?X^+=EQq)Nn!i`-jdHO{8 zN>}Wv%SEf>P5>$5Dep!mD|dIg&~Tcr&yQgsu&X&y&R#cd7EBx)8Cf1rt*o&!MHOb^YB>Lq(C%m29_K6l;8Oe^Kn8NKiYn$x+&gP=v6aN6>ZB_PUcfu+ z$kW?W+5hHhGW44Ah{1z{jAbhFe8J!|Q3`$wh;CQEcRRnM7yxF&%LGySjyzPR2f;cn zN2X$asw1hoh{k4_kDT{pW85ZY$A>yesaq0O7KN#yIRe$j*ezeph>4j!zP}PjA_)Ck0+gQj*hZM3BQN03Q|XlI0*k)=bcfu1hUdxtP&Wi_m{$Ez#!-h|Vb~iSBs7c@8QW{_@u~v0p+yjYxc`!lctYs8X_! zD(pdsp%60;m7Pmt7}BBUwyNcppgLb(@KY6}6kNbviaFnrW$U9^Z5(w^uSPW%!`J1- z@fGP9QKW2vSlpQ*?HDCQV+ldgjGQO2GMbA!(w%vYhT+V%!WV%y(E}-y-1bnCqH>rZ z1j{bcJd(>y0`j?1G zYGNJ=C-K(XYXO-Vf0aGKhJzt%8KL?zxV6V9Y}q@gOi7^CJ!-<_aTqX%CUQha@RU#a>@nlH*sL zDy#pie%I3kCo!Gm%HJjCaVDl5T@=2E?UwA}e##h4;@bGD1zxhFmCi>@mdp8HCS8XO zezU{p0`)&ruMLGBQ}w+skI|g>q8-lpvKdUl;xYugjB=h3|0bjtN2c@M5Mm>fxs6R! zBh$&_s|GY(({>(ss=G4P*S|8li>_eRAib2BlaU)7yI zN5s_25N{0{8hZzgptIC_?XS0AN0Y`_#VBv-q8~2It6a?ir=1Ou3Ptm~Up*Voa=uL~ z0b1-2bI3k1UIRgpXd-tGq}_+4MlYixEr)E&b)UO?afeq^VBZ#@Kg&~8;Ao)7?e<|h zkv@u$`*QEzeBhnaGq@{WoIxyodrC0iH2wL0`|Z`B86q^@znq@XHtXJ;*yl~;@@*jh zDs%cBHUD{_cmG5GGxqa|>XV=BHPv`0|Lx{JCp-J{=~3qU(}20(@nb-)Xcv(2*zaJ| z@7#R3K6NS!@Y`y=?WXLLh~;@kbnhEo8K2wo=Q!wXTy+2E$KHBZ+ZgBsBbaLAy<&TR zJMc~A3Cs^j5w{9qd%I}54hKH7XgA#N@xlq;Yz@B+eZGA<12tSup9Q_&(mrE8Pk5bn zFRfIjd;)+2%n8f}1MnZafdj1{&-u6Q)1MT*{+sL1_nvjGmwRvb!eEldG7_KO&Xf%T z6Pud;U@J@+t?4iBGL=$>sw~wkFGD0}KS0V4g%7HqcW9!B1v-wp>cXs0cZfd{(>=JH zG~L@-rpkkX+U^m;T8deamT`j%AHnd(gAofJCsiA>;GT7Xyk;*5dx5_Yx{0R@KcW|v zL`lb9yA3Ka2uTXl?ZMn6d4?rwl4|WMw{$&QVaDI!Jt=F2unOqk0l_DZsP;ou5Li(# zRg%r$YyOz|PKH&`KJo6h$6fwjrc@)!fmR!Cm-0g|hwH0Oq}b_BF#iq-DzFxc*{@+D z+lHs}iCv<7+QkU;!+RbR{$6y!Hz^KNhNh+RrMrYCgip1Bgvj=u>lE=+9B9Tg%O|!^ zxd&5+ISUt6S`GPCJbamgr54-V+9HD%@FOU)WIu;eM27>1NE7Ep?FeVtr}OCW}fdzU#nk{;z?>Lp%%-}(w`bjSxT z<0fbj2U&`;VH1`L7_Tl=;)UdL!1wh&WmZi_@NsX{UuGK*B|0fUn{8C#MS6^!_1`gU zcVWQBBVntLsy?#rbxs(DJo?irAGtsc(M`6K0N1k4 zI@^OY$B&Ma39Ngo{Zmp65%soJzK-kZBbsN0!+$TiadkxBkhbIMe3_pwj+(Lqc)K05 zI{@X{;SKSa4mR!}sT zJzDN=`>G(AuH~D>vQl+IXTKO5E{D~dCib3T(f6#gs{0s{qNDdAC^)?eng z0(pa!Ak95IgXh~7|?-g5k@z zEvbbchdwW*QcnJ3=0}vw+GSx4QB_L|saY6CY=gu3<%8Z3ilG!T)k?3;CIDm77ePwR zncLbS8X|}bRegD8p+%Y84CZkO0q#Vvs=h{9tsp9Rwp_gG$Ms9tKG=3or>xrL7FKP% z!}0T$fJ)bN2rJndcwBnT<^Hpj-kPxdjIe^}cWqP+Wr?krdEAevn$=&CK|su79MHU{ z=*m_^19KI1x#pN3z@46YT|4dUfd;x-}b?U9cmUKBnEMo|uUn28=m(H`()9%}W8Vpo_B zIJp1;Z8yWHxC?E1N^7)(j1YMk!URM%$eNTDDK`WGobMb<5X z_1O_86+wG~de|KGetmTQSkm1tlY|wTFh>`joc~GRy$EbSb&P4~=gqBUcr8h3`{&R0&jzZlw!47Z zt{d7~ouHe^>_dJdUVtj_-|R8}+kz>f@j#lwYO7V|roQES9^_xGgRV#C=kU)g+58ut z2-2=NC(;)i$F;GlwOrfyWisH>2{EwC5yN1sXu?+`2C*5ay1cMr=YG=n#o+3})Ajw* zBCT83$(w+yP(gu-sixj%>e;|~ti<5CV*3~zgGS8icl+~soD#z#_;c4Buc;>8-)T5l zA^}ORSrbjE~+LW28PO(F}3W;MJI;$qi?(OeYViYoBWbhckdRs zW##GF>S@E6-1F7#DUq3kn`N zU4frhT{<0#$ zSOLR!_CB@hu~}FS2*GSFbOfMfh#?n_LGUv92P2}uN|8hsGI*#Ahxytg&|^BoND(e@ zXDoI}Rr7N6*oC`B7SJx)8}{MA!$|W){)0%Oc88$F`B}5DAT@*-f+1e070Orq9;ahl z%%zc;Ope|!bBK@@%A?3g9)99ZqwL?BX$u=-3uO^9rm^l5BB#!C1+@oD7mq@*tld@} zjlLtx|8c-UtWU0AH~K?(qvFekGv@Q>s_(psLj2hj$djY$wALx`-DkK5I*w!mA`wDP z4TnY@hen@WJP3x04AsL+oJhM7ztXOg)g-lCAE5jRHPtckWM5? zTr=cdPtSXIr`F+hgod|qcMRMNY6v;xMxgA2KQANW?SU&})6{eRgc0`a5K5NLD|}y7 zf)~7z8XXK$h!2vGO!gxH@oTg#oOmBSmb!r{j9-;f{8vy=L8B)Tj*#tn!szrwY4>r%22|m#K7WU7whm;^5^SSy;fS(T>rp;h_#1<}hC$ zZj_E~)5IB0h$NFJFsegWp)YT-LeL+QJdP#T3XkMR8hSI&n#H({d>(R}a=4fyyH!XE z^XXRZHY(GuLd}IWdztO(+FpAWJ;Hu9b3@bK6sVy5i#ZNUgiguRQt7XqD)={t`bY;e z*S*qoL3hWXx4{%FRZ}Mx z6WZ||{@obI7+TrJW|{2Flvky1E%h|?S`H(9g@;W-SHi|wlwb!pr$lX4VQVm(0g9C9#5 zijmO{a2Oq`Cw;%MHkT`b`YyMKEXt(pYxft|y+!7v7?sTJ^IROx=wQrwkJh7p<3p+d^IrnJ=mGE|S zlvtDWoSZxs`N6-2Z#o9LmJLA_x0A<%LH4}!z|QvX#yg3}M!Pe1R{Sp2==0`0TlnVX z46^aWyC-G&M-^pxd#hIYhfml0_V)HOCvK4-=j`n4>e^b@^A&cZz<2k&0B{9uKg6x= zycAfwc#x|#k8L2qD}`@)3^O)@;lac6-q%M5+PWkkeFz);{^YPV10|UIt*}FE?L6(y z=LjuH+iE;jIhvb#WMz8ZPEopFr8}>tc7pR3*y{3q5+le&CY2tQIXTngOG)?rhdNKD z{##`vzr=o)Xei_ebi`)Ta2&CA#>UjOzg>_O?Y0lDz01pGI?pT8Dt#IjySm4%$$iamD&`Ubh0gcs zYR3zYh<i> zJSC`+N^HJ1%|d8(ahm7eqNVd8S;6%zPSIwELJ~>~nN zrDa}1)GI_t;uUa_$pBF%S*aMY_+Y8xb$f0SBUe8hDnC*-( z!*C*YsD&tV^NXf9w6dgbJcd$bnnry=|91wEWc9h}kp*~L_@VO(1BiQ>g@`sottW{# zgMpmTA)GL=dYACd4$)atM^S)&Fl=>2*U1cOj$sHE1E2;^McpT-{g7l+ZwEyAV4+$g zge4?D!lQJ+hTaENOE%Dl#Y=XOlJIfy7I~f2nD*=Xg1g%>$}HWOen#WB&^Y32Qs8J? z!B!FPXkI~O;z50xBRXw>0u>N);tC7}XX8_j>ZqiW+lCfHs;l zd@}7wgsn1gA^fd3zd*#C^xfW`V1HmYhaWZ4J~3@yfFh|VJuPizV?$~&Qdp}jxNY8N z(UcskLaTN3p1n?JdRxlA7AxyQxA#lsHD-~^vCr0UW0ll7K;f1YGO)YPx>{T)-cK?BZ3Dk}pdOyX7 z0}_I}N7rw@G`JA%S8{~jTS=Xdco{bO-{Egs=}-RjYoOYkqp?-{tL;u>ah;w0R3w;(0D z)~0c2wI>E6P$TlC*PD;POd-oBCDRHMqN+Tc%EuGy%cL(~Xco=TS);8rKRzZvq87l; z^;L{`bmQTKyF!C|jeDW7R8SBK_~qiQH)eiy!NbBLTKM9?s;-n(@*^J@B9t_R;OrpF zG3$#f1*$`A6GUD>$U3z{Y?-ei-wRUPkCoS zszgJzU^dH7?fig7fD|-ssz#2+Vt99aM2J3_Na-0=mgZ}hODpACpP!hzC^Z_Nu?((q zYR(^YGdwC+-7$!NUX+Wg7@i|;6g2K402t=H>EVNg_da1$m2x*TTLd6UV+Bhk zDmud+QDfm;Al`}jKpBo}#HeTOg5;dT$^H9LVd$%kxg9&y1yCYQw4LGiz!q+X!yiOR zy7}-0&TXoL;V7~p=XjsJ(L9Vsx#taQ`u!h- zLOa%x)4|@E*kRevtO!!?xIsEW0D%q9bcv_I;9ITDlgq+{a3P_^7j9vDo#1SoQ!G)frA=LB9}QG!}EcF@gu9#_SkY z(K^oeuY81^ET)ea`MaejPR`aY$-K(l6Nw|B<81hV{Bc@4riCrN#Nur5$>loYdF#>m z;I5>1u*|7D?A>?zRZ+Cy2T-QN*QyK>{e1sL1w1+`xw~3u4S#XI1?;kjxU8o##nJ}$ zs_CiPMkRESRb_5r<#NLFU)^xTWOsWy_Oymu1$>eiJgzC6e~y2Kf&yk;Tivq*GYCZKCnIOs z?0-CqiLn722$+km7e@cVR-!ovjhVPgnZIi5ak^>8U6w$tR<{qjQ3iM!FD5uzz(e(~ zal)~4=9RFH|4c7tDw&X3oO4{%WakhkkG$Z+l@N=hL$$xy$Sxm&zY+)BK$>Czc#wUF z_;ce#8dvE+a5p^?wJqz0Hlw8zgF25p!7Om&MN`wLn63RLQ~@Bfr+kk zCS@=nXJrhlRsprnWyDkBs5CTa7I7gyRvcvnv-fjN7eG`HP@+(EWF7~IZ~YwGQpKLQ zkg0h3J6Rx(i%LO#ZqA1M9x+NqE_rBg?m`fCxp6Iln(r$H@%|wEv5-yHthFMWU~9zn z36%L8Q{;4voF8t3;WAaC;GYVHO-DQ}s&sU?P+rWes%cWKO_{CQq&%u?eyoY^ZRJZ+ z=^S1em)hT$mHI;;aw&CKM^$vRnuw=~Ay!NrTV?83s68ymz;O-;YoFDTkYj?_}QTg=br zF`z>GP1>>p5rQq7H)d`mAy%a98F_pBJmqCdx~|$VI3`6I%2j z`u}k57&);#m0DquD}sXBMLMkCV^W0pGZQV67dT3ckh!u*1uN>|9perU z(Ygbs$fxOlLsLMGvB?<1C(`a=<@jh~Ymz)nu|~p$Jj}2a6jz1flJgft;z@PMmzxlo zl%F(GLxjU>0bBtus&356P+kc*RCh%fVn7P&BQ6X~z`fC-9;6V(qjowqq!Kx`_0>OB z<6Xrv%vWvz4kW6}h0a5J$e#FcjuJpn7WtLwmV*vzHCjT!J7Xvq;YnPzp3p< zq`8@G-ZM$ef-PcY-jLwB!4M+pu=&Lo68J7g=bi!~2FS8tDSIkR<`IMVxIqjtHBBW} zDOCwo38UKCnk=5YzYQ8S(cEgQo`+82Y73!}R5ZViIHIZ92#}(?#FB_zXigZ!)bW!= zAcV`xIlGC(t`gGW>ysFgLXd+5hN|vg#Z1ZDf@R^wdJ9H_4eZ;>5t;gTfhru%a(zo) zFngfE;oUQFjrkJ(GI9xVxHTypiC^V>)K2#NXfPBdDzr0Kur{aiSSqYR=#c5~aQ@Q3 zQAv}60vM#Qe>s$d&QQu583wne{YN$ebs`gkfw%5Pf}Qes5el{oR!5mRc?GN0i6vt^ zl&wd?121ae7*PvwbT@6}?8RA^!^G5LUr?V}{_T1|QZTI67M#{e{DT!8o`H7DeM&YbjVCC;eH7Xksx8I(#X!R zux(KiW+YjMkcgPN;W$&MvUh_|YgsB}XCEG#z=_l3i|BZJAK9TMiebIIYqGPyt2@dv zL^g1neQv{Xq;UK8FHd-)K|}cExkc-$Gl=}&hLBTmwX;lVuMqb*9pRTspnvN9u=Eno z>p|AenM2k*C%9@(D_Ao79K5q+XD2BA9xc>3$i@H&*tvDWwikb|L9|5#4gBJw19|+b zcAWd^gBkRJwHqhv{r-=#j3@j%gLwc5=T~R{&9z7i(icBql24Hgm0+*9p#Mec#O0R! z%?uJ9nRg1O-M6_}d0G~oOZF{BgV7{k@Nt*V31$@C_1^-$kw_jieepNvu`Tb~4bNav z!R4Xlqf8s`Mx1|j^C!f6*CXf+ZtloO+b@9Xx1Cn6^VtaR9i42(k72U0n zsA8s#>*?oryTBOM_iF;>`sDCcM9X!&mY^q96Z6deOsbso)_?EScH}`(RV%)$HE%}& z)4hRDc5eq$BH1ZH%Av)0TWWR%*hh&wnE_-V$p5;Kj;zfiQ( zDhvB>u)ijCN77eGO7N5?-V22~=FeOt)|LEo&Gn-htXV|xm%f34<3^~p7q=T^UO9mf zhk?I|O$O+eI(YrEvx6;%zg-44~ej-pMW8q5$V}OR&2U z0&ZolSCEw{<2Azw^U}T_vF+F?QghQb!D%$pV(TVbYL}&z7Td@WMulu8KZOnX`k4j3 z4kgpBN41)}?nIEq$;6C}d9PjQ9;8cX@cw}(Nz6$urJuBHqNXQ^62eN)1%l#VC(vM4Ri=pD4JESpSS=!0Zl+eG_XkkonV1MGA_sF^`fEg6;;{j@KUbHg zB=zVwi@3lMyTvX_%q~K+E%wh@ze=f|O20)R2`|nl(GQY&ZOnPiGj_tto7}U!SqxCs zUqhTDHn7(7JL1=Ub#ngFy4Of+>jaCLzHRHLC_r_9rb9%W-urzrZFWp)1b%g19{9 zwKV`i8cH3*AS6Sk-;Kc(A`2RFegx*+OHwT7M1AOhWPlJYWKEbtXQmLP#BmsL59V4z z)yc+H8mo%8l7>~ZC3^*qzzd4h{De!V-S% zHeB5bhCqYqkDTv_uh2o}QOQT+RW5bZt>aRx-k8hQ~w z=d~ru;me^eFJH_JO7WqkSJ`kol?$QEbFU}_@QdRNDSzohq((w)8_MAvDuJ=Z;TpXa zON0+Wnks2a*fWgwL<6NR&=p4&rLC^bP55LE3M?a>0afGf|-E zE}Hu#u;UkG6S;*rK*6{-j*YkPF$Rtv2V5j?xZZv_-{hAR-t?h77>C#O{U>_W`!>D3 zJ6on)-=0eJyYBnP-_EBwJP!_D?iZA28W{&PmJg2nPdr}Nvm2Si;^0A{!0qMdG5>AL zier=QviAL|N9V^ngMM7_^`Y5DCNF>OGIA?W%}2cK?($zrBl3Mij;94eTH5#KTY=9X zj+b`?z{C<9M_g%{VkK6`6(@wW`#@)$P zta%o5&@lhHrtG$R@*{elh4Z{l09EfilL}7zr_+9UoxOM7wd?6~)$dqB4e9ZX&wn6| z^ung5!10$2p(NNvQCp;&l0SjqF|hl1|9obUow~E{zOGGF?>&t%_|*T54t#VH8CIm? z|GfY68onj3EOKo~`myH!_U~kn>Cq~q>qyxV9P6;h*z1oCygwjOaYZ5;(Gj>~5cNl5 zX@B*2IM3Uij7Ws#x?AzOU^fb|h@x4Y>`~boF5y(|6=Ptiu6KKaHG12^CN?BXNbd-l zF*LnRef556#%4-ZKf}Em@bsc|a&}XXv}=hXx;#8RV?ADrnHsdvgz+%?yRaa1OB4Y< zWfc+YzH)qn%ka`58S(0M?w7RuuW+M>LB$J=Bqq~&)j&VNcw8J8w&xo71facqxkE6G zn^dkR(Nc{Tvbs1I@cyiMs6!V^KMuUZpB8RX47{=Ixm#7p%&+;}Fh&|K`+mW1d5X(8pQaOi9d2yV_KVER9wRn3HmZ;(}KD9y{kT zY=I(tu*6c8Q>MMu@PaxBW{4gOhu$8t4oV-!_orMmKWawedV3O+Ts*MkS~s_3Wa+l@ zkkiohw;LfwAwLd=^f)MF6|b2qjo^AC2aXzRVwggm@TCR}%(mQAE zm-JQaEF>Ng+{fAIF#_*+UfL#E-nA(ByNO!cg7ovHkosYV!v*36_G+ZRaK=}vzC?P3 zAds%>);Y%;khEjF9%HB>{cSUmorwwgCZ0>M5NCqn3((r{MP!g;KaJC@V(bpBt zG$p++FpDc#sx7Z5aFCUlA#cEElq9hLQA~yKqXwx!i4X*VlD#4xA&Y;?ktGs?p!QOS zko)=1VNl1VsDGtIONC2?D-yfQgX!RwX=^(7HQn4XpJebSN+w1dtOr_3i32aMOI%ju z0!6UU$G3lYI&CR=$D)%ZQCryh(8c|M(Otq9M2Wr51E*PZ8;$U} z;#H=1XvH1XeV`M>euRa$6|*Cx$T z?LE%AE&5hDsJ$CqOk+UJig61Hjt>k#-QM0Rs;JC6wnh~D*1z~T5o&7b_iDWYz*!D~ zS|zjAD7kQZ?CaMbI%Rxy!w<3M)bYtWmWo94n({a7Jz2&mMGi&|oeX>L(x%GLn~v$q zWs-Hvg2#N7Ew9$uLmpB7r2FRC+9YshL~(iKs_)f5PC->K>y;1xi5g&)MwAR;J^$Y6 ze#5=7WE`R2!;hD;Q4A#o)G+92SRY3qbHu!`W#fJ8Lo+41$Y0*!7zrdzDM|IY!5Gla zJFbPQZ~owbdLHd!KJ<-7ur^>^=`ah}N?yvMzNgPhZu(72>ri{ccp#nbC@LFT7@qi; z3bif-Q7w=2h3zzd&DReg=9-||VjQH(AT#yrvz13x)uMd40a+hXteQ*;0)!C+kpu-D z-nMTDA=usb@9lw1^%M?lQTPHD9Y~ALsWhr}Tv3svjoL}YLYpD`_W%|@b7Axk<0QUU zv6R5TbsA$xsJ^`St@-@KrkhSEC8f(=n9Kc5bc=x9CPTuC#%zy`NtLHBh*;VrDO@_w zJ`${x!YU+vh10)uM*+Y}*OlP6{L5|I{y`Y8_O)3_RTEiabXF)20J#@NPD4PN44zF# zoTpsY;URu(Qkxlb=F$H~|a&Ip)9&H{*lgSkR(xkDBJzo$%8*%ERJRTub+4Uk7Bsk^{* zkXV}K!WlQ#vD|R-7sPHOvXe*p7vX4evYQ-AfcV6-lq7jM)=(azx!)Bg9DnGf5?m#W z-g(z}DF`S8wL1td#i7$BF`ue?4XT+cVk6uST@LrBIGt9@)ZXS6R`XW77gqT^xG%e{_p@#w`ZgqIdxZcU4$1Cfest#xnZf44w ztid^9v?HbN)?rU9*#GB6Aa@cHNiJ!24an`w+8HVA>%k_-4VII^9Um`u-{Noopo5MN z%}LT)yw^*q_gt{&pKiK8)8T%>+p%6Jf8LurP%+^O^0`vtn){CuL6 zP5VIoJm9U}c~Q`Fx*%K??gJ-99{atj^o$ji?KCET2D1iSYxuo%T#V2jD%3SR%!}@R zzNm=qJ%3AEQG8@`IXr&q4t#~*@LOa#Vm!ygkE9py73e zI)gj1PJGkycK;zG#*o@?rn2*TVSN?gfPACj`ILb(nAzhIoMIfVT!B2tXeBc(~1s(dJ#qH^uO=zWysUPSO zAc?^neU1qlORNd^G?{`kW5RMYerF{FlToaX+HRA~68z%Rs^?XlAE1wp3yjWLjkiYbIK$(|&yb zBMX734QjM^2C!`Dq)`=Y?O_nH>IFbaBD33yFXX@x@AP{Qze4o%JqH1!|3Zw4JIG2# zY*Ous!vOhQJg|Z@=EWE!qB+YT^XJ?MYKmn9xyyp1^pZWgThOd=5FG$)<8h>#wx1lv6(a$4pTDp}E_#tt(w!T^_#Nf^) zfM@f&`Ntwp`41gO`(!U9$*RS|ReSKOfAs#!khPDP1qH1W(z5Pf0*86k@7%e#O{?j$M5tSU;ai|&@as_C?m*5ln zlEw1GcEC?qL|FAa5r}96KDD#tM*N69;wKX{Xz6-4au2yMhHwp6BI~7z!%3bcwnpDy zFo-ZwXqn`crI(nH*BCg*DN+)#q2CMS&?=+9f#a_$a~}}D?m~rYAXz={lY1f1fB!0Mqyp)BO*dg z&a5Qrx5x0wyufWsMkic-69Fvk``@0=zg-@cDiD1)YN11m$?>}sCw;H#-HZ51yn833 zmZ!WEl(H%q5W3!b1$+51^hGgnCf0Y4+-4@jQBxU1^=97cpF?#$-<8~&eBPp~70&yE$6Atsi8d2n8_`D3yL4?R#oekYsc4?})WJGLN{OqsJKq!4HxK1r&2 zEJ5TR%Pw*?dYdymEKTZnCH`O7o!W{FzfG3wICHBrsQdBeL?tjwfu;TX)A`!Q#=%|; zt0f!$)lMk-&C6LG`mKQ5KMdC1x0@9k6O;m3rxVopz(+fwn+dqHn(_A2rS-NGZ1De3 zD#O#3B&-3OOSUIUen~^mEr$N>d1-0p`V!I}=p86#Wf51106<-Pt+m|F zeY|Q|uiZ_G@njd}os{Yfuk+49FukTx7WEd5=w(fL6+_##d<32-wYnY~c2>|G%0Tzx zjE?cD0PjGb(Z^e(s|IaR*`Sv%jy-0 z>P^AQc>;w!k3BKzHcKVT=V70lnhJFk(6Ikr6pQ??`#;~=8wP^(Pjl9;sR|7RHw?d| zhS!&I-4*sh57By086CsI+doxZJ!qTDR8JA+0g75%7-j9#fW3TJsW3Q`cnrQoRv8yHty`}}$Pkdg`TcG#uFrL@@u$8yZ z{`|&jQ2tpd@qfU|_lI2k*ZCA$OWM(-Fuf$zX%52sSn6vq=a%S|#v`m?7$hPNlu#qr zBeYmF7p(Ce5730BRv(rFRz{l@d*%=<8)VA5QL4HJp@}NQ>|s>G zS}P-XSE$lUK7SPU&s8yqBTLk~GUv;oXmb9EqUwnT6r{jC-C&nn8j4qE6U0`P22weu zy7&E+`hpojiBB(T5@!;~NLyu1oe!H_rtTkyX~AM)|BQ*DM%-UpfpHV$0b{5%%S~aA zQ-4{YA^YO)w-uz?gF>^!>e1y#AQfRKDCAziq$Z7k71xSek+B~nPls_X$e;;6zQj0u zmxi{`PeC=jBnIp`vyfZpOIEiRFdPbFmK0<_uKiKV4B!i3>W;ie3L%Ib-|1S*aEk^? zP{0zpLRpgIX-Scg_U~U~slXt3!AD_p#3@7Pb*R|T5KMyWf7R^YQ?5orFhyE}a>1*N4VGX-UQE|d-8thwmdKMpwTm+Y0d!E+Bzo+*v2}6Dl=elWW zX)PBmMl|YRjhv!ESj5hJYwMNbTKI#x@errL7AD}hMeeO=TGTgfJ;#kjO5N`*_Se_0 zpJAVownpqAFX0bEnze6dWwvK2r`?zhs8u#q&)@2X?-p+)R+Ke=!rTKlUzKkqn>V8e za$wE2z;t@QNzh}SWz*Qi1PU1DPfSjN6?3JgRciSp-bvZTfQmHe^>xYzn-uZyc0xm- zt!(q?#{eQsQKPztKt1pBVAQtA8P#O z;Dlzq8X2%98eHgYo2{GmB;j-ZabX-*3BZ8~f5Ozj|3JQZY`!Sv^2QPKYhmiA<`-#p zvt9{)m2wFTU`CAKM`)qu5!3j=Bz{!OGaUww!J(NuX;m$SM16BS%zOmOICeeWeqy^+ z>XQ}(CVt+V&L1rb0D~T_wiJ|2rDBT^$2Mi6ch-H*$rzjlHh1azz{xyxS{jee`uWTx z?^tNW0GO8`5Vw=NlDpbb3M^1gC@*$45!^#QGEgO#W>fblh(%wKfq+%jp#vX)A#mK0 zlo3UO;50)@CS9eFoh`0l5v*%fQ{F52<%A~H$q^E$+#sDSDuS|v^imG-U7XoNl&K5A!D#J%VUV=uDQF*?Tv*NP^cmto$V?X{90)E3tJwlYVtvr*5I(Q zliR?@ov}t2q#?&$A``cRBqt=IP+{}4*2lJgKLjf}NJDVgY^Rzpc{reV3YHq#8g*Tj zlFy)!T3yvkdR|N}b7M7Xbc`vK$Qo62QCDK7=?HZwgXC- ze7a_`&=DAm&K2DsbomDkTB}h(bgYCoAm^vy?Dtp8ya&I3#QsUgJ-3f<`^$l&yZ0f5 zeh6}NLn?-YsK(F2BJPhPZM%aK3A24K3apJ@dwk;y7`@s4%jag6$KiA1Tg=s)zGcSy zqbg)Ov%T+x+ix)u{BA9>ynJ1ra`*{&+*x>rV3$`wT4&D-&1Kt(Ez2hFxTfI#rX8eU zHEBSa73fvN$o2yln-dg1AB4q|w&M3jMC|npkkwW{{dRts#kx5z-D&0hO$1ti{BFwm z4c_>8HX?7QVo+bukt$hn3bcE={xDlkH)yl#@QWncFRIB3decZo&u%88dR+?G1311e z{M}DZPS59Qx(X3Fk^`q|5`?>}6gK!1#ujyBf!F~gHy{wYMVn2cD){CHF7)d7C~#mI z4|ugB&cnk4u`X7&=?I(_esuotJ1dRn&&a5rsI9nA@(OubX+o1_Zv3pTMWqA{66XO9P>YeEXZ0PG+eK6egC4{?FbGkz$9A)f_YukZt@5`q1xjdu5tO?e)J{ zG(I}lfamkLgN@IbT8%6NH!6Hj3fJ%L8wvP-07OB%zU{Mo_RjfVDnB^wFSB{6)qLjmId;sR z+uuK5TVd(P-{Y0ji}dDqvhT{P*d=eZkI@+KHBLYL4IX>sL4LRhVdv=8p8;-~w7#F) zuA1ZrufE1>hhE^jFTu^%L2LZNKgJjzD2SNjfXLQPr;SFPcB@5}Wi)CT?OKiF%Nag# z=wO3>(V-V|0s%!RL@9Dl?#vK~IuK7$81YX^BQ>gMP&1Sbjn-xcH)X6E>8boK0h}xI zkdo{zp-jAmpaBVrU(PbMuztX(<_&06=ln} zyi6-n%^k$jS24U_t(~lOtAs<7OGA_|R@3egfMGk8*TqhK2qBP{BsaIYNxxIj>-8w| z1YZ<51QCan?H5X-er&{8FSI1-m87&vrAi1^(@4&*)KReh4h2pksmAQh*vHPCV2rPA!h{egLV@Z4DS*P`gTv!VoWnaI z@k*+Y;;7-wX~20JKB4I|nrTKotx-!;(!_{_qXfb@j)!Bd{2b%M59bo5rWcr)m?O)6 znN&_=eAppkWbAsqJ|rR6YBgG|7VUPMMx#NRrbEJXr_&*XK)2f+63?RpKsC=ua;}o? ztAw>UzmF22t9ewPg;iqq=sX(27U4erT@BHBKy~|k4qJ0@Ri>CnmB>CyW*>b%$NQ?E zRawtcQQXe z&&0&U@RWhrm(@Os?KE0OwZ1WZHEql*?u-vm6a`t94dv=~yTkERtyjGFT%`HYUG}Tj ztL-x?Gi;6jm}aysW1Wh^PT&}Il&Pkv);+GRqfiIRqhkONeZRh=ADtrNe-8N-9d~J+-t!voXia zRK`Rrqt*1Z8-YWO4z*;RWU0f-Mu$!}5Hz5LCuoL*Iua5jBv5$p3AhxzN5n&jB010k z4IT{&L2X|MF`3M1Ajfn|mC9A*R~L+U?hCX$nLv`li1HG^h4Ov?MG0sKw%-9oDMQ&# zIzaKD)^}E1Fu6k5`nx=_z~*s5sGO*XV4EG3Ck+e-g%10{zLs^V4jPd%zJju|f-2o! zpZ*{R3cN4K^8te*p$Lvp1j0;4J##oGI1Qv>KoSOcqXb}^DFmF_mJzB7>Mh{VG98j2 zVos2OfSsghI;hwU3gCo*f^(>|bvXz_Cl!b3X}m{5z?vxFg2M+-rYY*`Na9I!Kur_6 z!IK3cgn~gnKvgJIkzoEFar`<5&Vwe3W{M<&NAXD@!LDlpBtk7fy|8#x@deaX$N;Z^ zK-UyQgj5hQ=LB&=@IvkZQJmx?PD#D#Gd&K;>;zCv0t9Qt)D<**PCN0ml7vQ@GiZ1Q zZDEjlid^XzP|F=j;c7O1pzClE?kLlUVxZ&!Q+WBT5azcNDwGAcD z>(tW*?Pj`dZSsy#pkIDgkpLGsbP8;?HVm*Wh(3a-*KyO6G-`sEH3oxCI$f-dFQjkA z7~?~LJkQzO++=NSjXbv}g7==8nHjFX{(AQ9+c$E@7H+-uRt_9Ez!OhA!O^2f0a#vM z=IGI*oH})ig@pyO?0g@XV|>`qef1~2@uR=ve*aJT#QuU zfTG(h{dFLCp4`~kmr=R7r6>R75+e3a+Db3gz0e|nsgonQwC z0r)1&e1d!b*_XNZOSdq`FV}RYXI^CW$-m|wm+s~_ue*s)?B6;^VExPqRu)gu4FNXR z>34Gm1#ijHUV8NnmX4pI>l%<<$z@khu&{e{LFB1VPO-TGc)cp zJ>&7x-SX1fEZMSTky@yYGD$%c36dZP5CE|i3bo&=yJcp4^AH)gvMviGKtZHL-9Hd@ z@6C)oA~Pbs@Bc-hFD;V>HFW&|J9iZs8SeA`TCh4t_sp{#zYx=CLw#z5?T7YK2+cL9 z`#R-PoqDYZHfHJUMcNC?42%ZeI9#SX4luv~uNiiQw0o2zr{Y42e)2> z7x)z-?a*ztSXpV&YBgxs66N|3TPEsP@xrkr-Bng*&NH*zq%9#~d6dz~EsPW)>Q$rA zIQ=BgJ@ph%KYxZZtCrfK_i^v-*K^yhVXl@(FKI%uaGsg@MdsUz$ip3QUsM%iU_#v-!f%6f-8#x6k3_A9L}2+L21y+ z`4n6q(b^FfvYK2A11>(a`^ezK3d$uV(o3`F1Q6x&D7b41eA`UunLQtWF zAO#~?KXfAg)W%qwpi)7lf~3ar22uzFLZykcqF^+Un4ZxO5gOPkQrd{(HU-@@4bwsI^F>|pm>`NGip3(uVv$02+E5fly?eecym3W7&OGvY9$R|2-e0ArQEvQFO?x5R!|E3 zP_vfC+Gie0DaWld+7ZfCe*Ojlh7v`L(OA)hVTD?4l#$U<#>UTc_WUg8FD@~+&|qa{ zm2R9OX?E(L;0hLiD2*fTYs6aonqOIqRau#=g+y@w6j%!;MofYuoA(Muz8tQMYnLR~ zhP`&m`4Od1I2nfXYQg`uDr*b{SzXX`7Sc3pzIy|V%fqxb)u`1m|fvPwCJJ4F-aPMp+eOkfB?Lm*(ZB@GM}3q>FZU798l z3b7(glrtIt2!%8R8@LOp0?~?CYbS3L>GdU_XQKm55TI>@3Js(XB9uuenl6P%Qz%3f zq6h-*j2J=}MZsu_p`~m?Y#3kyLt-syH*+E`&Axf$0Kb?>Iti_%7GeX2H;q!CtoOZ* zA!$t0c>Mq7D_{E|KRmkZ-k}+E{d%_E{$W1-`|n`?u3<|3fv*5*9MfH0C8*Whx~t#A z8-fnP8BU&@VfIoB%2TMlx3G1j%<${Pr?kyEx<~$uuYBF^JD>N`K+@gJSxd4-kVlVA zb1A4&9yZA0A|1d^?WDeIANwlss;Y1g8zbE&NmsBo#DoDs*}Y<~7)n7HVU%9qmz`Z5 zS7kWhZTtWTG3cub{^o%}sfZATqD^W$#OY;cECc*p!2)g0ANfa4o@+4OFeuw)@|OG9 zyKS7!l`DNX2k>Y#8eF(=p(pDEC>D$C+qaKuwaP2W^|aQEjEpcjIoUHB0BE&ZoH=ub z;o;$d(ZDY&+Q)g}CoggMaG865>z(Y_F-~!V<8f?~(pp%ho49q_(AWfrK6p1{qg6r$ z1jCrId${{OchV@0aVp%)jt73758u6s-J|`nT`f{p&YWi9^cgyb?qO3UqIy+SvU!@# zC6<>{Afi|wA}n1k^R7dhG-(s!zu{Y-e}vO*OVS&( z%9JM#aPz)Bc93;Tv+mDy9LdG?X#slDSjnAo|MngTZ=S>V(&r#O4;5`=Zw zx}Tw1mD+}rlNeaw?1{6?URZ>}By78j%_DV2Hh9g16Z@F_=)dK6gKzTmGxIE&ZEU^g zy}bR#Eo{EpX4(MB!VIg&pXOo`(`Jmx@y%>IFoZGJW*jP(3Y2RlgqW43CED#a7AGzm z;PuBV1O$?X9;`$Cp z(UY9K)W!x)SUSwZ|Mkzf_{1dZ&zUGQw0RGQ9()f6Dp0$kx3NM81-9(o&gjT9bY7g} z#Usa9`sfgw2?j6p0S5R5gID_IiCD(C7keR!D3^*IW&?A#=(tKc5L@#?v0!$dwUYLpQGgC z^YFws5qViYJki?|=JOzd;zEL~98hl#$G=1LyXHb$PYo|1H zyIs=MjoH3jd7}Z}Eh`9NRJg?4 z+#HRSCasR8n+M3y*RFh)EtZn)QcPQPYyct{BN#1cO}3|pS`#CwOs1$^e^Q}FDF_2a5GqOn zWdlK1Km?r#QCATrpj|dd1L%g9ZZ?gRth`!fb(WnjA!2b-G(j6hpfv#+9ch#(0q&uHMmm~Fa%-6ez6T}%n;HHHt~!ep(&>%wuB zRa*1MIsDl#@#HZm-uON~_`&yc$J_7bz`jk44;2ZseAtzP?B0JJJFY*--di5w(T5-B z+&}&e-&}2wqzl~t!0qhZGDP(%i21EE&oguK6lc{=4o!sAuR)WRy%GdBN3m|WMggHSfcs?>^(qY} zBnUw3F4o4RxmVE}g1A9v`5Z@{ou<)Ah{egU)?nSZoQ>s9o}+-2G`3hPxO0X^*?Z>! zwoMH)^ctRnpKo+&Eu7=rW8dbn=`uk*BuX#x)Yrey%J~8hyz35zZ=U)oPDJ_TM61?Onem};LnF1M z)nWd{v&8K#qDQHX?c$buCaKl5n*-`Hrm~M4Kla;1x6UzdrWn8RE)Hy~QrmziepRqD z%p5zyGv7SP*#0{itz2z{E3`W;R#pYj6e>l6!c|O;07&R2U6xiAsou)w?GtRREw@cplShaW$8foAW~GL*L6z^(^=jSqa_F7|I9qH@KPl-CLIMdqgGIC^Y| z>pr%F$*GE)N2MKDev0EqFL3HihoD?$_|Ci7G&YtQ74*TjAwI#G)AP(+iZP`s{jQgYixmhbV4;E4Mv3#^&2rX{upH_w8r%kj`zs{90jIot@+SvoFwyQ3u(@>a`JmT{I@^i`1y;>EiN*1X^D;n+4o!g)5K+cD3Gwo2PM&*~A0K{-C!RaQ zVmQk9)F{J2i_Y{he)7la?MGil=^%a^bm3-(qfr%?IzH{EqjqtHkB&rVm zA!=4AHtQsv1iBsU+%S569JR0s)eSI-fK@0dVkN@wj;EBuD%(>Rbly+0_V@AxZ2wO5#367-4*uFkMgw{BeVVQr4dmnWB>a3{^UkN2&+A6$ zPg`V70qNt&=c_lCUnal(`S1n|K2ASY_|JU&`S|j2`0!yEQZAPn8yjPCa+2}!aYjZ) zsMqTh3WW`gHS(XntvspS`?J=yv9)&Hy?ncdp*z9A*T0B6l^_U$UcG@Um%pDck1vC- zzkK|ca~r|u8CaEG(|Dyxeg+rKbvOHP{Gjgt7q&4N4&iu$2$?~MCD1A}9LWBOlP45p*a> zfRir@*>3*|n+g^SX|@@43MNplt_)DLSsl4&TeKxHDcuNk(IUl!W)+%6ODC|Tsby85 zBLS&ZSRQZ^RW!CHi{9>apxtFc)_%t6n07KX9VJg21tZlM<`Y_Ku^m)Gg zrK6l}bh9mK6cAF`bvK9J_aF~^_^nJuucb@GEFb+AKl$tb$bbIq4CB9Z8>5@H{9G6f zNT)%1<}hb3uCmZH6t@kt^|l)ru9Uc1irQ;|v@WuI>IuH_l@l~4Kg{jF`Wt-mx8KRO zB5og|9CUSx$wO0&Uw1P%-*Z6t&Z;TsZ`3 z&2Zs~hj`+vU*$W;4AET=aO0i}1apV^N!C9{cWhv3T(`r{+?E(d|r4Ot57*1QOCtm-gy1jinYBrbl_# z#5jeqVTN9hm&@ykc%H?Ve#|%j@(a9h=)dQ_o3?XbcNW|uoL%k`~D#&hBAFwca_fb-{HiI z)0|z5C~TkP`Uh@f)6~eu@A0IC%pKvS=@l+5Yoe`nrf$E9(OTs)b2E)nC5Cq0%+Rj> zdp|9jEY8ky;+bhWNeZKT7@gYAp5Zla(N7Cg2ni}~!fe}Z(l~jUgl2%(4;#M11{h#~ z*N)b5gELPZLe*nH>!n~FuQRD5H*%Pc+n7yOsce1*?G^a9Jp392{!7XSQT z{|29W-~hW{>B(&_(4INNiFr%2JV||Yh)uPh?Q$>}zm6^M{2%yVwoP;Kh41sdhrZ5V zeD?di)XfC2uLj#->GWg#{U7`*{`c=+Vt&sjdCzbCkNmrTaggCa5dgA!p6-dy@o#?b zkNDF6`4@a)=Y8D%dw<3seq@S6n*wH!w0N}J#cgC{Nl1WKmI&B*mF1^C$KQWXGh2Hr zzy5FkfLlhOn2mTz;{q4|;eX=y|NS@l(&zt_<3}%0-2djUuJmD4RxM_?6nTAI8Au9gNh5 z7;l_o{^T((#2r?+a>IiG1{mPy4@#}=8f&yB3PTD(NGXh{6-(5LC5B2BMhX?`L6Mq? zm~9tWOjcP=+QgEOh^3oaRFXn$Nl{=FqBS-(h;b(lOt1MpS{DRvn%qK14l&5Hmwz&K$1={5c=qa?_=>!Q*DeUv^i{O;fP6;bGRik9l zxw5DPbT`7ZN+iuL(h_93fGn4gl^Vn$Iy(*?<#zl>fl`823aec10cT90l~dvxl_}3% zng}Qn#5$#?q>j+1l_N1Yr!d-JmAH1x6!fCCi%Wx5&S*fiqZ5d7H6pkca;>WXuL=fN z(e8>?3PF4|p>XLV&bP0xyim^jv2wV?pJJf&o#BNV6?~Uhu&c~a-=EHA9uJ`xzY5VuIzvpqeH1p$+wd?cg z@B8cC<;m&!QwH)z1OC&;@AKvTdXjtI;6N!wp-`YwsW3J+#^%kNnVOnnbaa$zwc0Zn z$QuIm$Dc0`to0Y~zu!>6E3te#dcVF-eH_M^b@AuZ->7c$`RGqCPr%Qokw^Y_f0^=S z^=;GNKk|LEpYY%RJD->S-y1>18L{N+@^XF$7+`<_26*Ej;*93k)~6YiLMb=ah=^Nr zL3{*-lr?#a_QkxQCXy}ZqPtz(3J?ShbWZ-YV``Eqje_6Ym5zt zy!2AQ*|RBWXN69;OPnMm79@>8ho~ULSOa3PTB9*&3@(2{`C?$ix!P&xqZ=7YWl zv|_EpDTT3u(oQaCwIL@81f7M;oM*6P6+4H8g}LMp|Ojx~@fQh~MsC*~8-;*18A zlM!hHq6HC)Rf&`1;p9Ck6vDDl%1#RjQi!@j&;=U{Nn$YuYzRpRX(}YCqMKT}u_o>c zHrpXXr7208AljmW1Vsl5)}7p{o$Ra=DOMtl6hk@%AuL#yOO};_rDfb4wUcUEwm>s6 zv~)}>Z9!mhVmtw2oQ5oQ1sUKM8sXS>CdRfi(SHM=eUcZx{SSQQ|N1OW z*!zj!;hqn?i?>YW!dx)~XmIZ63miZE0&P7^d3=Z{Bip>uu&c|oo_(C@W`ku)jMgXE zedl(Hl~>l!UMtv($7wwI2!}6i<1PR4pYpa3y@MSaGP2g7i=&kGewdH{Z+qFZ=l|g= z|M5Tb#lxTBGkPdf+lAQfIkAC?d_{x{h(X1Wh{{QY%>^bq*oI5g& zdufj|wQU=lcV6Z0gVsVE_lCPrL-A^xT!&EIM8Om?ZN3G%&Qk3vUz-*mL;+<=g^<$5 zOuIKd6pkIC0jI;VTsx}YYd~j_&c$c=@iWtGe&B=L^WL}dwmlQn%OP5&Tzve?eDA+J z!ox?G7=7>~Jn-NL`0YFES>P7SXMV)fk37sD|GzIVJGa7e8(tTSq}LM*$Cx?x1b^}L z7$5y__OSop4yG<8yzPPeC~rB$^}EJ-l{eA=0}Svw!EApAcZ?UgKH5n&@N4dVEYoUT z;NL;>vpa;>W%f3Eisg2TnTZ2zFPPVG@>5KlrgUNrLzCVCgvRqc z_YYs^u_u>N@BckMe&+~VMy~PT5o~4qT|+#$!?0&_g)rm%#Vf0v|K_(jJ3Y;Uo?v45 zChod_3!@Vox4yJjh)+Du`K2Wm(uh)Rgxz=Tqga0dc?y%rlSj!*4yr7r*`ko;=Y;uKy4p`oPEd^atdw5i7%c$%a8x^FL~;&-(+fXDQk!_HTw3%kI5gfABii%L6sq zghU|(S2M{{8ippQR7zB7a%tr(=NA%II=oJ^%mD@%;Aai5JT4RpD5VIah(fVQwOVDQ zKE!an&iLXmGmS-N+e^%~mRasJX(VkL(xFR?O;S=3M1v?r5*U)eAc5O$)fnS6fC^A< zFBS!vK2%Uvki58>(H?|y^PsGKowF}oHzZpTa!c|082K=A(=(VHD|62bQ0P*$Y+riaUJ}*6} zEH7xZRu}_XyZ)-RGbj+mEfTZ3QK+7shvN$fq!wi}wYz{uIpwzZ&sw>aOBTQ#OW$+Z z#R1;bP)ZR55w-dRmFfslRAgW0<{9M4lw}T*{TrN|sRv8)^VsvzrnVA_bUAn}j zOP83No1@Wa&}=qowOSZsC=?153Wc8GfIqdspIlHZ7JCK+`EhjZ{kUe(7>40G(z$=9 z_mjzcV^VGm@P2#aQ8tbzi91Qa6ZQQr`T23|a)fO^{`v3u@z9g|({#Ot*X8-kkpYieL+pWJ1`+s}n%bRbLya9(#-#-^x zU*1q4pBMk#r;~>-mrpZEl67G(m)`*f7+`<_-Z;2dvs=%()n0a8zkCepjRi_0dcFJr z06+dAxJ9}`AwH45rLV$y%0m!Y8^h zV;cDIYqjFnJz^y@nsD+CrJxXnsA?Il{mDE5g-BB_6)BfWL{WjIrDc|v8?3fEv{OrL zV@NC_8k7O8u{Z+)WeurzJH;c~W!M$KE!0IhaSPU6>y<|`=lv=`;da9pNDbl^_n92P zNn}vsf?2>y7N?8XMeXA3)lw!yTGxb%tnOW!YZqZH@?8ElME|{9Ln+oJ>3WxIACW9} z3r-f6x{W+yOOj|>ZAEi6rqd|WUMes)Qe~)KrB*Fc4uvofv~vATi$!PhECpE~SFX-b zC{mONShqO{nIC11+o++p^*^9A+W5JpcI}}xNEV$4SRqZV+sML_^>w$?GYCd20%M4x zkfJS6wnc23(zSvy2})U9w|BBZOC|%tB_{}03!l0urgIve0kiWF#Y{0pDq(Jy_CA6I3A1STnWnO&jSyon>ShJPt z&@OJ+8&Yz|wg1e~xH!w%;}_`M@G)*Uu$_J5*}Lotph~;g_mO|j$ES9&?N9%Rzj^FM zp84D#v2yk~j{WLy@W6fh*fw6K)K`?Wah}GpZ}4}2^v8Vb(Ph~89^Us)u4A|!J38e}@O|zMgycj8iLxXsu8f zqNqTvRz#^KOrd}-xJ^_Xid3d<=K4D$e)EMt<7Cm$Yy;k=;7x+g(h7~)C7PviMrsk& zLXqm^b=>@^f5z@;lxnSf&B&U+ObCL2QmK^dPwOEfwA*cFXJ?t6oh6DQ#>dB*n3%Y1 zm^4kD-Zv+!9pIM>W@I}fbxl26CT)n;C01uoapWavcAz>r$;8f`)VX%V>sP|csk5}= zgz(S-YC-TS+w5+H#I0ds1wdj&+yB=axs@S?Hjl1*b*JYa;hArKk-z%$ zXSvjf*ES*!DlmE%@A}R6^R5pZWSFa*|75uIjsMEwFMW+~oSb5Kbei`ZsKT{28ZdQg zV-*TDMG*KJTBhB6kte=?n%Rr1=wZ~77xzjA4JIk3_HxHfMw~fhNo31Ib+!*jS10)GH85j+`k+_y^JHP+~*g2LK z&+z2Y2F-Q~gxJ+NR*rq0hvSIR!ulc2T1y&ti94&TEY34CeU|BSXSq1Dz)bm8?)t<* zZoKUdZoBJt?zv%#dT3ttp^cjjRu*Ph6v0-9sg|k?_j|h5Pysp&sD>4&SJ}FG8+%3# zfxd!OeSDsi&;6K(zVC$w{r8H*R%hLBNU&RVd1I2 z<&h)rX4mFT+&4AG_zm}S?=C|SXu|N7rw{G9OI&#BM_jo7)4cc4joh?*>}9*-%JTpK zAOJ~3K~y_HsGvY$>JYoPjIwR0!&8k*JoTevOuuUz9jX~eUlSC{1O+JJ1R9k>NR%lz z&=I9_m0G=iZPu_#Lvff|wM4zxChg90elBID#lUD_fC1hl@Csy4(2JsoC<-YROH|7h zD#a4@N|oWp5F@LLjI1m&+gxV8-Jlq+5~?<#>d>|cX=+JuMgS_c*c42vFaao|um(`L zqr)=+FNp1(cRd35k1;c?B{~IVfq` z+bv%Nr|k9NQCT<(Xhf7#R{CVL^W${DP`yT$(^@%6My3>%te#YsgEeXaD7ALgx!w)( zArQeSz5D!S5OEH&px}ZT>rPQH7G+XIrzqtdWW+$5pi?18Kqm@fMVPo94`QYBmT5CkshVr|${pp5*TR2SGp-Mbztc#>U1NA0KCAWQ1C+);nRK zpVXdr`S$SbkoW7+UoXDi^5Ok;Sl`EBEiRvKKl;F(O>VEtA_yw7+`<_-gvAZYn1=L?j7&n{rNG#k2M>OrT$a7z-!-M z1_gcvFM zf-}}kt;JfGC#@Ax6oC%VVMJ6YQZARM)#}u0LsTkt&Y!=)h3RRSTOe)5B;6F7CI}jm z6fqj5G-3_b7(^*|Pc#^}v%Uzfo~&{*9BV<#%PZJbK4?(NptQndr%hU=L8Yj~y6~i~ z&a8Di61&O-Uv?*Q0&%htS!=d6_-j5a;N?j#&)j9;!{xpA;_Anmr=AaAH-OFBTauV0 zS*6)bXe_U?x?pI`hK!F^7#poII#Ok*5>P3H6bk`?F$5X{Wzk9?>s#LWJ|$R0Qf%tx zumGYmx_)LfptMFCSNB?FeNMYJ_WjlcBr_QB&XT4=DhYz1l_m%b5g|n>P?91^lF&*m zMhoEA51>SFPk;jD>PC8*7f0y#m83x_g$Wb^#>s+|IC+wW&_EbL7%0+IAca(ELJ3Gy zL5y{>Fab$~Bs*!;Dj*OrLS!^15K=6$Rp`_rsSqYYCxxy+f?$;;1rnQ*BtnwJ*tqMA zYGQ?o!6wc~VZefTvqDIlbF4i31-|>$hxy@;Ut*!Vc30JQGJ4~CdGL2Xz->2fX1toC zTU~mLXC8lsBPRr|doS<0dxD9P%P9D)g$vJdX?lk9?HYPwKRb6;86H}Dmftw6(rV1` z!r>X3D=|{vN_o?D>>JY*Urm{PP0(g#VS%~xEk^`A|aj(cLM4Q#==Q;KF{~wQh^J&iN z1MGO`ukz84-_8EQS2;01%lSnC9l+G}j8BX+b+rWhz@SP6!a$)^>I}o;R~P~}f)1cK zO05=BDgezCXcEUMnK-juKwNmF-1lmuuUV9b^G zoSy~K)R5X5yKcFGojWHOtL9%03T+HQkZn|=m79|Epa}{?jBMJ*?RQKtQwT`97B|s* z^T5UlNhcvS?wJa-CMZ@J+E!(#-}3-h3@^`3#_cH z5Cj3WT8)vB5$g3iCr_Sac6N4McoBEf>;S*KP+^(qGURU$yLf@-nHM;@4DGbU_?FFV zx^CO6^89{!2+(Er)C`?wnTcC=5E*k7^9K(vLjkZzdX?il01;7rl8J+p>rd&D1iI&V z{@Y*Udw=^ao;QzczyhRyhCE5ng)sBn#;%6ZH{P zkEMPMp+bTvB*@z$w5DmFf1IDZu*gCKDu=eS_3pdaTn^dTp0-1Kb(WJqKFP{*6I;2C z(&Y6VnADW7zVJUax))w#@%$;KI|j=TQ@b}YzJ2I5^r6c`B-fw3{A!(#0bWo144#Yu z2KXgL>jH~&CpmmBq1gq35-RQ>Fa408%!L&D%ng$yA?dd1HXAI>&oH+tNO6>@y>I9J zcizu^_uj&dJ14lzNi`dywc2EHex5d<%T-E+BCmD-ir0n^qazAofI9;BRnT~W6DOYM zJ5Mi?SQy#7jq$OG%bt!fvS%k#lM@UlCs$SRE@`~T%v|a$>jxNM zfHw`ig4YwBj4^~kNHK~igdvr3nW0*Zk<}r_S4Nm!S!8zAojNd|tgw`{SWVir<1TF* z)3%njwj_z5O^OM?8nA&N1_G@yTDxPwRRYR_0kLXLQKyufyqk?|p04LbX zN`af^3rai18;VRZu5cC!8mvO4DWq=KGbPG7lq2)|^fh3!ghYYTZnu1=F+}_}^#1sD z+)ojwJah&D3K3@$udrwlcf`5RskI;}Vx7`Zdil|4tH2u$h#^Xg6w@+BLR7}=Hw&Yv z*)fb&I1aqGNq_;JFyAkl(ibr3afEtR)T;tTFCP1;PNLT zSvhe^;LKTnF^kVB%`=z9X_Z2hTbZfMc__6;G~D1W;D`qZI6{ESUC+wlWfFv3cm-lC zHb}6cAdy1C6cxlM(?RJrqPn;v<2!`i1k(}dXy^)2S5b%+Q3{bLBBcmXL`oB(36&w# zn$Q>mV_e_X&WJ!~itb)k{DQRa+5o@!P)ZSm1xlql)#@mv@(@~I{eBn&4Dd$6&jpln zgf6ArF7ja*QYw|GR;$!%HR|;`b8~YnEG)3Jw8YBF3awVFM}BX&+dbm?Mur3Vo$mXI zWTP%H-7IYPXGrOZ-1V&CoDmaw4ZlApWbbB-+$)G z>D_LZPN%cZXdoZP=f~&C$K(AaN%FE22Es6;TCFlMF~Q{IBqJjuluD&_b?(cP&o4Ly zmoIOAUg)oT|M{@_YaaQ!^Pl;+eO&&2K72mTy#M??A7?-EdF#L4?>`@ph^vFVAxGZ0 zBwrWauw^5|fqtjdT7T|OzyJ3D0}L?006(wje}@ZhvFyh<=O;g|I0sIa&^HeFanAk9 z!t`$SzI(W6^S-4=_IC3HiwJIwFXH^F?4y@Q^15W<|)2pW@;acS9+cgZwzV z(HNJ-gcA?s=R+<&rN9_NK?fKU5`_iIr4p5Lg>tD%xm>4GDHBB@#ssMOWoR_%#4%P< zY$AwO^fq@e2GA%Bh(_4}gw>4rSC~GjWA(x9~>29!#%3KEqNDn%#O z#0C;^GO@HPr0o>jvPd(5W`Z;=wymJ8kk~rj%79z;ri64BSUGc;C;$3S`Q8tY^1@7K z?PgG8c;8#O>HVMNgYVzX=CWD0L#sXiJ$~|~&+*sa(nKFQ#LfGMUcqP}rSrnG%w3#j zKCBaLxsh!XMe08f2Mr|iG#Afs>}Z=-6N24asPDRtZ3Vc-d%0c)P1>ugtTq&z-@S+W zQ1vzJgd0&9+s*b5{&bHlK86O{m`O_>;pJw{( zMJ`MyRCe6TZSVO2Z+-9G?D?s8jk7?9i_e~5`ouY!Xdq&2_hyDSja&sqI;AuFB4-|Z zh=>2|b3D7Wg|WAPgtve6BRp`hihArgvon{Ni-E`x?YNGykugTEJOPKyXrNdCqX1Pw zs=hlUX-qfw2wV|V2xgMeu>#eqaz+EPLZ@wsuk_Rc5ok7u+a0TW2t^0kTc9`VkaDM)z(aTGFU)ic@<1 zyhFeybh`IeVh9UiruXXeB^F5&k|ZTb zQbZd}p-91`#H}{nL}5*VO39#=IKQzwJvCJkVHi>j71%EAmV3Ue4k?sM6ik+$O|fx{ z)mE2o3d%$jD`kp-ep!AT1W99>OY^L>y0nvQnjDm=4b`Yt3Pf4Bq`gYJ)uh!;NV-B$ z9j06gD99>{jW)3jh>B&ZLlq*maUb{c+B}-gCeJ+c3`dV1Wp#Cx<>lpdMgU4V+56bo z7)mLc&1Nq2+Wi3rcqMc%USRR~F=i6z>T$-lY-a1;k$!JK1Ejz_(`Q@2Fw7#aL5VQHK>#oXC5 zEUO3`?qTzeaV9ri+X+>gI=&JBVTcOM#%(#k>w?$k$rxaOUlz=rVfoDSJiQ7nfa*3T z_uS2|yz2qBN6>fBKn8<0MgD*G-aN>TfaMf;UKz z1VG$C-{|Aruc|V0|H!J>)r|%SP$Wgt@7qY!dzE))z5HeUt}G4?GCDEA*vKG5xwrn) z?9@Bd=4u#Y@Jl)Tod53s&>dKROu#j;c#@@s^PFy5lAq0G@%?KMp3*3}T$wV4izeXg zYZqCZUWPkL8&h5z{ver=+xf}8B_^v^BSR7>kae?UTo;srl?6IMha2U6ynPTro_xlo zkPR^16;@Xj-5+0r`skyNcMyr-SIV`M2VBpkP{>oQR2b>BX|!7`)mFG%Tjg?nh2>VA zm3E!gc7vL3Qdcb+szZ}59U>40BO{cni9C(+u!_~LM7Rb+1VUM5c_Bauw9zP|0V}X| z9Eo-`N-DIBmHYsb^r`O#z|ja}K!VT`iNs0(C0b!m8qnGj0wkDN*-Hqd5FicKv%bM# zmF%xVO(-Sg+g}(H_D-vL2HG-ForWbfm|HH4>U50FrkkKN=V}&gcaBt1Uhc3>0KpS zfRP5{*s(X#4k-Y}#AMaB%NP^45fIopkT!!BmNj57RwnOY%XIAAfI(s9r)dVH74Kt5 zBQDc6z1k95a9<1r-c*^ z3|)*dI8lV?hKQC%bU_3HFOXy+K{gU(gus`AjF4oI_|hTcICzeW>$rH*Ue^o<>~obU zQKYfiy*}Qr5J>4zsfFPuWwteJrl_63 z#563eXrERDPZYpC&$?1c(x&q!btL6l1--R)Ch3xU>FZ|qv|g{Ao)``!p6!!->3(di zGwFv?_H>6ZmTU;nVYzgpJ>m zdkNCdQWBE#lk%jr^nSCkP1=-xMw0t!q`zOSZAtk_I}!*XdcwMm#vI9M1L<}o&r1^0 zh69_8Thi{$LLYtf(MKQe4+vrD#YXy+1nZ3-(hBHS5qhnF-nwt%cUSw-=EI`#e^oM>0=rKffRDh(7)H@wfePQ>pLWU+)$RW zd9=3U(Q6Mey%(16BCzkup3KG$79<%zLq6+LE@r7#ij*$SFf+Hn%4&^PyG1t&i2{R( z1UNEw5)nIU&<@gd5i#G!8WsqIu>2%#6vok5Mu!2TFebvD{21RB274lbK?s9ELq^-H zR3H>+4WR*L>||+U*-CM-w*PT~$*`mq9#TVP2PH{td0@7HRGI|oF^=b^hmB>LwIs31 zdW^M%>zZaj#|@7{Fm326O+D;zxg)65BZgKx3{<+5OA)1#p_FqeXB`R|mrTaP_dR?h zuEak70Mc@Hj8vHTvCwWU>pbGcs2-uPzs@yc5k>wtqq7%&=?5_F89Z6qBdQ3@0W6$p$Lh)7swnZhdZcjK`ZKwt>tX*XJ8ltYw( zjubeK#8U!S3Bo`UB8UR3>=1T9w+ylgvSAP{V8tNn0?`H?f>8!i-sk)Hpui9=9Ao;4 zKjx2q|0ri_oprx%QQ6PokNp%s`-P7)mA7^*X>^z7SzTISwOeI0lY#g>jQF>&b&5U( z0gEpm=kn|Vozh+g_TI`C*QcNq$%>iKtalxaSz5DiaAMBN#7kR78QVQ|6{m0}UVE^{ zM;oKx5;`cYh(wOT(JFrSt-54&fFh$0{RW@ee>eN?{0si{|NbMMJwM07E0>smIlbl! zgj2#FILPh4^1t!{b}>qIq(Z5B1AI->Wbvu5@yr+h zKfZjV$-qbd1t0(U&+?g1O_B+k%pO0^!omV|KuVYD)NZQPL9Wvu-x{Qc)0#7yw z!Y1_=>&WAqBLhsbWunBupab&?Xs}vSw6E7up3%@+q!YHRUWVhs_T3B)4luN-cHRdZ zVF%q@rQJ0sokMyVyc}2VRo+EJktFE)REvVloBc!+I=vCD2gBpIiw4>1qP2rDc8>#q zSz9x&;PDuPVQv4shIoIzactJ5H-MG7NfEDM>9lcOR~BJFN0_L_@_=XMlp&C1}@Jy}ZiG$_h<)jG>_+ zwvTw2)*Q2EXPK*M!t4-ZJ9o0R%36` zs=dUSXCLL|GfP~m8C+oq%lo+H-dni!;3Q+d9na?MSuUJ8$;sJORu)1AZhny6y9U`7 zy~@)soMW+7VR+|0Zoc;r6W*0{7OeBucDrroBSsj8OixdzQmuzEhA<47ot@nn1%Tr? zw0<*83qXppYWOQVN?OQjddM6NoIm5-dBHi)`2X|Rpx*Lb(vAeDT z0wOf(iZ%4DpiOt>4W9phew!yBKf$ThRFue3J^Yj0{LB9%KXdB<<$UrRv-_jh{_+oa z`ne9ZJs;=i|L31(Th7~j@1mN}c#*Ty4Hg=M6e`;|e8?mJE_qpQPA_ojsiQ0gA%VAx z(eX)c+MmO{+B}4pX)T}OwNsi#3-X6X7}~poA&9jy-Ypm)Wd76)b7$s>oD2-y#N^g0 zTi#}S7p)X3u@3b-r0g3F{5bKh>k{|T#|IfJ({oIpzQ8h;@0gi5$nLv8#^)ctpMuzY z!)hUMJQvT;;Col27}p1-6k%wE?5^wLT$KX7M{tFw6&K5(xlV+kRp!4z$YO=uj{WTL zeBRUngd2w#5u3lWzI*WOvBOSGzQYKuH1GD-p^rZLcz2L6xO&P@u0zJpkj+Nqb2&SiNMWpJu*8r-02oA%+Zeyq8o(%sPZa36 zz0hEym_Zja8ifWTjfiafFb1?ibX;U7i_^_Rq|s4?S;=FTi?CclDuJgB#yAM=V@2-L zDhUWcSmC9Wl7c~l5Oyh-wyFdIn`|9RGW8LxaLCR{%MW7aFwCu6c1t=c#qNnAQT*tU3{hwHk%ek9|Qj8VG& zr0i?ekxrAom*8q~>3S2S>(bg9Uu-lMNS@vFm|V>$ARU)HPn#JPq{no#&_^GA^wGzA z7aQqVa?jE}%f~Rr@(%2hp6qMZ%IMLe)Fq)cNU%l+>yt}YBxtRPOoTQHq(opaD71;Nj2VL+2opP8AU2-RmVaa-jEFh(%2>X=0z2k*oNWJ- zAmV`Kxd1&jQyO;gZG;f^YRr}C(qsH)Fm`SLJEvC`w(gT*z1%?oQ3!2iSPdimPK#VE zV4$9%x*AfgXsQ*#V995w=us^|q2QA@f}AJFI^a4Wgag7L9EB6}*0k23G+HU7vHM=z zU5N=@jC9aaA{-at_~7~oA?-n;#ttD4NY}y9PCSpo8XXv0mLnv-BYH*yIqi^D65qrl zp}{C10uvdGV^G3?W317DBf$Q{NI^>&TxpO}TM?3D*@yPG0fYiV385qs8WY9S8rRWd z4~9Tj*yjh(0R$2>0&OH(IrjJADWofKw8T{c0)dVs3PTitZW}~hBkCYq8d})Tb%SWz zKB43K`uJd>ahwy6f0M8N?l-y63Ru%`_JBbS{M;{b|Id7!n+NH!ozqx4J4bC{72%Ii z9vj5*V*zC2BI@)beCbOsadx^+gvO{2^;e(b^!YkEI8FVj|H$wC>n~C&c!=ya@{F%+5EtybN~r^2uZ+-ZlK}t-*i^n1A&D<ADQ<*~8#K zmG{D79Pc{97P_@a9Zg7;j8~?Zz5W?_Cy*W4%GjMBqFBvQ*+er=??W)`2<;Z@kW_qi=Aryq#?$WvYHiYwlH!f9n;;xs$#C03ZNKL_t(u zIFV=auDf{XQ#Y}#bDGD#_8hOj^c1h3I!(};;ju4&k#pZ4V7R)4fm=Sw$M+Q&DGIvF z7nwi$ZN7WlK~_f@+ft_FHaPj@3Fg?&!0uc4=mXm+0un`Y;S6V={sxa9J;Bj4b>!r& z-1^DSv3KS)8Sfk?PoLw+(SU9L)ql@tw=JM&j`HXeM>+B8IqKU##RGTj;8Xhoj?9;t z7#pDMU!?iQ6a4P)H@W5W|Cn30YRappdAc#i&aK;-Dx78h`d zJdom}hV#t4{sQ0rv#)bu=ffPn^)5a?S;miAEWY$r9{biAUOIXkpZmF=WP8TG6X?!m z>gS)~+do)fB!t=SZS098dxn=e`p9E^_r(Jo)m!+@`zn+(SMqH8=;OzOc`h$p;=;Mh zfPjgEjErq%^4%axVFxb0z@j?<)g5fRib2hrBS*&f$>%f>(5oEyqgGtv&>vtU~Z<1@GCg`ZfC4K$neIbKO(|*K)2ojA;{*+_ zrs?C|$Gh^==%bH+7-+L_agGZoXJa0qLv?D3$-@Vk93DjS?!1)gP!V-PFyO{YAMa_D zhAEV)j5x5YAPBld(KTGJ1Z~17=z03Dj#tQ6Zk!b+GZ`|2LzEon&9^tb$ukf|S6Y(4 zs}O*P<4RmtV@yaG1So9wS`#|TmS@Gs7+Wa@iH-%^#>iON9gNgiaW)7v7>yOaYYd=` zKpSB_afn#aO@hQ&;{lN-vBwrFFGvosiE z5K0gQimAHfJ8`xM%ecBgyGniKoNgQ<~;to z8RZdi`s8Y}ttqyv)_r@B8up;f=l~meZViRYyiUu2zlq|ruPulW? z6v+7W1K~)7E0K=q%^}(#l);U_izJ?sI2v3d@Nmh<3^_7nq)*oM$+{kXY&hV%F0Sk0 zC5nMU;-Icp`>}FtqC}Zi8tmi!i)^k)E>|I!E0fEYk@7vh5ALIn8$_~3rPnOS zaqxYgLZLvVQek9dgq4*QmY0`%p;oI=uh(fdo3vUjf*>G@?2*%*PKRc**;A-a8&oCr zZMHVPSzJ17ya$$^q!r|n`VxiiD2mo8cc<$~l$_RR(i#CWW=*ktv$Q}@F+EXEkD`bm z2|m{r?K&Sx^L(biA+g%NX-)Wt&f0NUYX<-(d6Oru!YXPD1R~8x79b6=!vP7xdVBMHA4dwZ*$laSj!ZU3 zrCg?3DRbe%1!m^wSy)-26Lis$vZqc;jYfh|;6xbh#*QK!`|d9#+O?+&Xrt^n3os(l z=GL)6L)_2U*Z}~eZ5p95LSv+k`Sx;+EiU3>05&QUNNiC;(DRtE@p(w!NL9TKo8?RP z4Ccym?K6?wOlEXF*3EP3zQJ}wDTs_AP>OD-iIk?@)zn*>)kZ|E;88EQ3>1ATl>((= zo>JDOka5WQ@!rW5I1UJ@NltGN!t&7UDVUaDW|BRykQgD+GDJ8EB4x)K&`1mp!U&`Q zS4uqBMIq5L&TqiQ!NVmo4jIoQYdkW_C!-b0XnTmIu=ASm-|X{ke|;K2*xy7TKuIjm z4{MZQIfcY_A@(E%sX>Gq9coNy5P`u7zy!uB@vl8z6@1v-TKG%K|>wGLPc z5TKQ|evhsRO}KiAm6spk>wo$+o;sPuyXmL+xnKG`_uajn@qsK3O+quvg$paJtOP&? zDM#75qkLm08T63HaYq;&_sN`w|_Ub zD{ZbvIniiMYi^cCqd}Jpu0O`^J9aZTR=M^wrbCnhtvy_y9vOe%quoI*uTay5C^vv# zC{f68{ZsM|z{wXWcp0+3i@QlXgcJzh&Q_21Z-nr{pJ4m0%LFSIc=6~79y>ly?r@2H zyS9*bGnD3^lH5>)KQzXeqZwL0%UtIbzVo%eL90&^ z-FblhWm`b^;!B)9HNy+b1KhT6j4eY&imoEc3miSo3(qgmSgF$zF1Q1X@7RNCUZm5Q z#=M$iaQkj{?H*^MP$fU)k@Ymq=~p;;>?q%QuEp5?{cM|>V9Q{hywj$LM!kN9*%uf2 zi_VAmljj)SJ<6W^D#ddrsV&ZOxg8PsgN$z5 z%Ypq(>Mwquvlou?+E9+0CU!G5G0ONzg_01sjq?=ezR5F>7g*eXfZMi>vukRCZKD-R zfLysiW%jE)cjP`s2C5t$7>>8-9Tty2%ZrcykVnrJc<`ZJ?A^12so@-s>QGVpc=`1w zdGVzV-`IJGU%0(QX<&@;J$u-%FY)-J$C){Ck@DC94&O4#SaFHT(ODYp0t0z>Q{y~< zZnsOL(YWHh=|@4k-R9!Oi`;b6O&g>7=)#!UX>^XEWL7+ z<<$=QrU#iSLguPz(=Zs-rF(gXg|p|FyEMnb!ZeLD-{R;idAx-N1Ap3Lbjx-o_wHb9 zq(bSsjcbZzvw6xz2hg;dExO&1>!Hh6L$vr3XP){yzW%4*=j6o(jUfJAIvJem&D{0r zPw>zu?_l?^zg~$s3Si+h3v+YK)d$H6pB(W%Qn%^U=DGCB3069qKxx$KS=!GX;rx7! zt_%no7di8tzh@C}bHkLzCfL1yoD6F$gtrZPf%?)c=TCQtBFK$xXK-wSVfX5Fb>EoS8-)NwZKHe4>&H8Xefw3qZKENV8d| zA$){4#QsBD8Q*&2?jwZb;AC>R!YZU*7mR@@dJk}V9ULif90No|Cc=8Z>Z6Z7{vkmK zt5_$c#F3J0CPT4MAXL^fYrE5-(Q4YzZc^_wSZ&r>tSz(Jtka4*M6O2WTq2Ven`vtT!=X71@wh+8_iyL9k2=2e6b`0>Wr8#tKPd zPil!$nAQRv0+B6G2!T)zB5)CbXT6^p1Ij_SBvDgfItCRQ!U)_BsL(0_NMq;7)@(zf zD327_lMb*#c>#%_9OCZ~cg9wE1(LF*4MKoD8P`tZj%b z9}p&GevnRV;_^2X-AgUvcmYC!aO_n&0^t%H4_MEOMhJu!$jIPDl8lo0THqn@gky~Z z$dDBovW`#I%aCzB8yqh-7_g@;InqH2f$KPUp4U^<^kQYgv_j!VrPMy&ClJ;c$M^FT zOT**~1Ni=p8y)y? zFkGidon8+&Q>sq4<9dZJX$5U#)|C5``^h@&I1XVLUSYtH)M?DxNdXBqj*H{ixA4sr z>(a{U=~D(eoz6NX^>n_Zt?4q7`>6WBkTEX@&ZvUrGBnj!*iUq)nHb&Xc~EAf5keY0^m7ueCMi zNKP0?=A_&0qLe}i`@H+UZw&x_zxO%)Jspy`jfR2=?N5i~S=%`7ee}^sAAP*HV9c7~ z+eUi520_noAf}gBysQ$x#wK}<0fT)ALqo$s@FU=$#AoKc{W z80n#e1BRIIB5{Slcck6BY5OcFBpxol?~%bLs}wn-2$iy4)Fn7*>(s-5&<3LfZ{HOM@E?3=#JC`=*|LXAA62vZYF>DX0|$S zmx78UH@cPF=w_!I{3sA0Jc<+hDNgKX_r~aV0!l%>MyDIn)nE-&YSdSnv>GAy6pPIe z&{@2|?2F&$g}?kyJbuha?*14LeC~7n>}L*8A|7kqMt9C}X|~O3LxafScoU3IWGR;3 z$}^C`^#>T)Q6YQuGTnNU+RQvkkCHfox?cFZZfD!>r`R?00`m(IOQ$ZfI=@UnunJJMb6hxaj=A{`9a*6~wU?>w160eIcU7O` zoq%e#=qxPJ&;~UyLbftM(cy;X_%0hG5;y{U+sK4GUkL}Ovi0C?}&k_ z4=?h@$vMWR_ABCwb|)X`XBB=d%wVV&4{*YR1r-KFZeS zll)&_{d&enaQ|I<*y8{g=+5%`D^Kv`lP7sff0p06`z~(YR;GZBpPbsk zu$tj}kG;rW{1?Ue_-DC$_Yk9%aVBo9GO^$1?DJ=sd#1(Z=@tV&b06C>Ih>AWu02m> zvdG}rAloVvR4NtRBY(zg=ND*n4eh}L+<%~q?+PG?(_Z4X(PjSR)#vFHwsYXFoB7DW zLaem!6K$R4*3o62KXsOwO9u#U8zvj~;na)I@YFM>xH$HU+@Hj887_`o@U3H3rs~726p7gBs4UI zAPAVBpXc1Ub3GpDMmUaj#FLx|tCXVC>CkSsdyZi?qE@T%!V52O@4fd@tyXapqrX1> z;XnlhK}4toem;l0VXvl z?_k?l0atG15Nt$|e6d7%APcl;)*5u%9f((+8-mdgEiv=Lw|Mp&U+23&oTW~@)=M9+ zxRr?qe~u4*>LNLEqluBiaLRgw&+61*}Ed;+0 z%k&S9v#`{`8BjRUBIlld8V8V-J&YgBGqr#GD)aFpBD_qqw#58`Wr0-3$El8tkl&cD zhk)AbJTtFdpsNkKu#@4@t?U@PD#d)~5Cy1bFw(V2~3Rk>i==w#L zjz7t_zZ&r1o+&C5LmN_D3nAThlUBVA!lg7aK(3g7ljpIIcM^Rn)<++23k-y(xiCM+ ziJ1tHAhxi5e2l%@-U9LbQQ(L*-a!$tQ(7@jYwOV~2m(s^0-bJ`R;Nv?)1ua_Q}PQeXI5#4ZL|y7a-Il9SA}$SK$i}I z=@JkUnTW_(PrS-#>jg|26pBd10fo{EZ443CkRa5GK!pTRNT@>)8Z8W37#v}61Q==J z5O+ulq=*HQ$towY%tDbi8i>;i1JWR*w!%#z5XRCCA;6wzU>R~kD5S8y{UtV?G#a68 znO5muA|r_iJqTSu*m5Edx0Ap5*6KR|Gjg7ZO z3)WHyVli+G`$-Td0TSwLVu5ZF-^*icNnkJ{xsntNgn90gJ%jX`LG zqYS>1WVJ)qxMXnfgkw(^kUkkFOU})ZaeXqL4W8>-gMow@ol-ze8x6Q|eBWT;{SPS} zd_T|d$SyLO9M&UfAANjqk?a%F`v9=CBw5!I#p--MPq|!XU|@jZ;bD7x_Uh`oQwQ4Z zHjPGuFbsQ&*@+TcvZqKZny2ee+LMk;r%B?H^l63cbej{e`$?LOO57X!nr>4Pm*m~7 zos3=OT%_+W4i*Ncz1I={Zi1b9$VTG2htcbo(|2t=EhO zy4^1AcAI9i*)tkQ`jE@zC=?3h^LcW)TrXs^S+dzInM{UECet$N6?&im@@qHUMMRXlC+mZPTXm$@P1!xPV??N%|7kxkj^-VB?eW5$QDcZmP8w z#{QR#p(7oHbnsl4d^SU=Sfn~IK($gKm&@Y15`>`>b`e6ex}s?{8W^R~2+$Iw0#|`i z_B2M<1;;@vWep6Jvd#~ru#+J)NDxw5UWQN@q1N#ljMAXvF|i$s`5y6Sp?l+HwaJZ* z*R~21&q>TG!6hytX+~TR1~dkZRok>h8q0gJ#o5FL%vuXA;`tKjX_0*Akeg&kVd0L(}TJPt2viyqeZozXP}s&TFg=|ULufMAdSEX8FxMAzs37GjIw+=?8yTHfsqcz(HP-kgg^*8rmkbBUH}MC1{|zG zf$zFxjYrOjDCmelLa3~fN@#>-XhpXFTEugugyoUN<=L^egPZWzdgH9IUjADK(>CZp zL)$=8U>XKGAVY)bf(hgFaWDo<1cV0dDC|*}U|f6OBoi0^E>U1Y16>eppe^j_0?OEH z{1_r_i~DSf4+u;|xO|B-fBf5g`@1jj!lmRqbU;Y_Ew?fC@GtT&|HWRm=6%*ryQNdX zzYJ{2cVrRd@G;pZz<&`lYY(jpGpv-pwcfm;aTY`NtpTo+*FRvj92M+^;0!*^3DPV=!bF*BYt)ce~UGYp| zG%B29=E=X}@&Ej1eCh92*#4{k2S5AIKFY`M-MW69!zO0wG>h|HS}nlIATlFVhh4H) z*Jis0B(9%hV&69MtS!ygh2m}OJaCvhZ>@6VTbHT5ag6!dJGrcP zu;rTTXBQPLaOU|-EYCENgZnAo{u%DxQ)2uoz6N!7h1T)RnzO;b*;%7KqDO1h3 z;jA3Nj%3RBPCM!2zLj2wx6XnT-;y=B(d_u zWP=jJ7|g;Eo_XqJo;aT6z^^~Zwu+Zh+Z8~T@k0mMaqcwE-+q~|K79}6VujJ|zKwTt z_!6|t(#ULOch03cu%E$)_jCB+UO|9BNDnvXA=^Hse332tE6AQUp^xj77%Kb7^F;We-+cw7OlIK-L&oJj$yl&+x*6%h*GE8OZo}N}-ah3^O`VW;lC- z+1FoZw!51xOnM%8yEy!lyL!1d!{y7DId<$A&p!KXk4Kq?l#*hx z$k^BzQ&Urnj*il9x0#-v=Iq(CtgfyS1i=+~mzI`z{PD;6_{Tra@bEAL0|V>g`}iQk zfKf#Cc`nb?xZLy@+Pi~+f=jk{jEbRi@-^mXE-`}xqQurcTd0ne*OkZ#>hR)ez%s=&Qju!tL9`*`UHl(;VYP4ESYBhxr z1uElJaD08_#j%Pmu_LuM|eVIfPhKnDc!f zDJ7oolgsBQMeRYT>RM9Q1|<}(w2D~T6(~=j9FVcnz7_`MC?Xl! zHzFZGKqQns?Z9a4%Mc(P9H|g8*g#D2j6FqlMgX$-OGl6XKd0(8po!a^dDwo(JaBz`!s{ZO2q_d&D_mvpl|xQ@6ikLJ$!P*UStmo* zvmxX8_?}hta~&7gae77pGM-!~{w-uP864NeNuHVb_tMAv6WMH$LUE8naS+$d(l;3B z;~xU9wq_-J0MGM!*L*%twOS=ztyYVAy-vMer_pH8Y&NOYYAh};u2ar-9ET{1 zdS3Sv<$7b*E1M^K7NueXL#2qKsFx-klJ=z);S+;_M6tU^&*L;wZvLB2E3K!?-OShk zEIm)!o^EGiFpyl6&xwK{SRqnUM%t^f*1A`x5Y}M8^E`^hB11z%jE|2qI56QkWv^I1jLnG3x zgdzDxn{qCqoC_%zB8nv_l|ss8k5bXa&v^L0OU7}?2=Ikf-j~8fIvU4BAlCN2mhr%=?rQ9BWjNv{SYOPD>z~hOir>C(o@CGJxlm85#ud zD$huFrN-QIM_FmLiN?26oZ8C-SNA!;8BDi{I`-EbIr0k6pK02Me&q#@eC0cg4UKa6 z7jL7$`qk+A;Ndww8P@|5;g-u}bGZ%13=LLKy~rzn_@B9O;Gb~sZFh4^bzOo!61WZw z43N+JWL_KjU{_j+ENH;et9 z_wbb-blXkLg;QJ(tRgSMMfgREp2Xjr_41~0Gae)RcQQ~pjz7IhXXZTfT2X*&c-7qu zImYk0hns))r`h+|A9L=~4|wC%dwAjWy=>WYl^+K{YmV^zw|VmAB`z)xF?Qc=eCP`g zvvr_w6~mM^o!U8`{qi?>5tkw(>LnE(6V;Rkn*Q63-TCnnz^S$h-`Dn%D8p$}IH zRo8xh+Yr%OU1jn73|ecdJ1415jK9B>!6Ie%jn{;O#Ki-KwvJF7tbmk|pJML;a9kH3 z5JED1%TMuexR-6UDpPlt87#!}>v)KKiK1`N-6pM86T2%+L0}i#!68c3A-2a~EhIv? zc%C5a1O)A%7c;PZg7Qm`G5g*B$iH2>g+qJxvTxTecI}vC+xAhmK6o##=UTaBFINf? zb}!-B{mtCbm$-0liMn%$$#D;VGfm&ZAi3%Q!_;}>`D4r;8lt;>V%>V2E9EF{A0^{D zo7c-)mu0?0wostz+AX?;Bo;D4p+GL5rLykfO2tPCrFFcF89JBFvb?;=LJK(g1V4Oi zfK#RQlj3EdDzV@}RQ7V;q;DHBHCHL`x zMo@#f$N8K8@Fkx3;c;fih(^|I%K@_Wmcn^T^YD|M&krzwiGW{^^Hzuy@O~73_*56^3?< z;W0~VZjM@Ql{Pm@-&+LDlRWd=f521UJIc96In{@2@bvnUCObk^1aQ zeB&#}n7LHP-R6-k=j}@;fGm+8KET0GPZ1e|3669A=>?wuU4fK4*nZDLJoHPS=l*J( z-pSx)a_^j5Vb=vGUbOJ=vw8f?HOzb)CzzkU!0`)qdtTl>#n6^fu3=_i(Y=hC{~>?- zJ74CR7iU;49c23_f0dvA++Mbg#HYC2AXFH#aB`Z)Y7^}Z;g$EWeVa>R?uid}0T?;_sH^U_BjeQbcynw3+h zn4Mc-Ndt}~v*RFJM@NYb*WNXF8INos3j#Xb4&5kB-YwpvkYuVu4D8s)=-_^C$#%GW z@dte44-A=8_ponUk&;uVF?)j7pZOl&d^toO{26}k!=L77KYbJXMziFupAx-y5X`f< zc$p_pWVrkGtqfPP*SR1YAfz2=T3u^2kj;74yHg*1^znf}TKIfbA@enmWhGjrs{)W#pci_YZlnoaW8d$DK}ZE|WWCM{5J(W9MTCw| z8o*eAwvbLNP=YR@P|1Q5jsFMHn9`q*bt&wr^VKp0d9X zpoPGCX)_?~sRP!t9##%&01Q?jiWQt~~- zIYV+roZ(2CQShv6$=X?MuwAk?*sj{ztu0rrQXh%>)Q_v0^lT)dSBr5z4zpB-~FBN3noR}JmVWTN5$o8 zG&&v|Ej8LvXg5T=0_6rMH^j0+gj7hau$0D*z=5sZ&u=;Fg-htgHnoYHrr#gE))us%Vk=vR!@i^o6Sa{{&ha0Rp(OK1q0T?Jp-mGDblV^n_emtyb^7Oax&P zfq_Dy(7R8B29mMp7YOKoH~NJJ(!v4*fq?-A7+`>pKDHAYFxO4mr$mHAn)S1e*RSS3 zTF3h>+B_!^@mg50Nv~ite%ddYFDP{*ZA;UTB!x+zDleglIxdemOdBWUra)egex}m& zGER`H8zyS+A${MwY$0=ThT1UbBs&5E5epxf$hOpEJsVLjVO!w365FwGZ3$68r&XuX z@@aN_{4fHgK$&quDTow0RA6hcBf}UbB|-ucw581+iZoXdX|$E>841jgnY{-s;=z<4 zthl`~vo?~=!;o0sQo&o149;et5f<2VCPWiyP+Ajdfv=zwK{JSO zJ3jSBNW}{&WFvAlMX}&fEZLNbHrcF8Hsg?YZ3>PcYYSY*!LcNc6u4R;FuyAuU(w2J zTS*BzGGi-@MM;ROh6dt2IH8b)NF5)$SAwIAGLJ|LqyiNRbR-BQgrPvi zV?m_j@fD^Yr36bLr0DgdPy*Q&$c8{R1yNU^y8_wPSZ$4!NPcKjeH7@pFC$@`MjM4B z5$HgpkqD_xTm>QmB8*jn2t)w*AVMJ0=G_sd7ldW1DTF{;$(RoCu|?G4+KKo0_P>3G z6BjqAcA_mmU$0!oqDk%ai@bUXBoRtQM1B{)UZJ`?&(ei=x%l2YynAMyMySzv*mjQc z2-w?70d#5A*0}WU1zN2x#Y0n!@7qI;&9B7mK&S}WUZ+uS(rBBtJVA|(6K`_qz2jVJ z9p_LMdP;I{8y-Vb<4hiy#CrW2I`RosNL&I$b=FTj$9vDe!t-lKIr1wHa`@mR#oIdw z{hJ5rLGb{SV@1Y_aAm`I`&VD#ooDi7tQrp=-_Jxzqigf5y?=@e-+zlUmm28ZpXSkr zpX9-(ALGbuo?_-kjsV^55UwuJ2z?BDvyTqf*jQa<@$?eaW=PdJz|n(cvfHHJ4#Lgg z-gO`2!>>|qZQ##ex_G|m!l$KC9C^ym0nUwMZ#onv$yY_x^rOl;e1Y@3bS z*hypCw$s>7n#ML7dt%$RdFQ+9-dXd1)*8L%J$vtGe_c17P4wIo>1S+A?rc31&S_o0 zRuo&===y|X^n6KYw;xiV_=lMoJfj8+0AQIA;m)$H+R-6nD53ahr8It zJ$O-c;6o0`*(JxIan9l_rBhCa;Fp-VRAx}soEu^$W7cP$c4@%LdVh~{^i5FMArXki zsglWaS!WHXhxufl`~YO%P!Re#ghg6K=id@v=~be^6O3Du@!@D$x*kiQsWi5sq=!-U zPIBujsjC zWae(3L2vVsqKN!~IOp!}sXS3@PusGx<%m~(wC%NqKK>1apgcJEwdwa}%cS4AOfmsl z#wiKCQ1ZK97B5)cfT&aTAbNh;oamUs8DvpWk-+`5IEbyU@Bi`e%~L{UAQFRy2ekHs zMIkn|IUlYRbqmM?1iivAY`utYdfZ5EdY$pb+7O-Snip+SL3QRMr)cmxdZmP>paPvv z{xIg6o7l6DMrF!#F!`h%^^~ExU+g^>d5>)2ehVvXaEN-r`AVqy?C9A}_7ABG@9sVF z|7=)yw+JdUH{wjw{A;l9bcxCz-5+zLM|iTkKW0koA2o=u#B+Bm-}(7wu=l*aW$ZFc zNU???54Jwz7-sgw(x+p?_)WRSPI*T$1O_uF)Oo`HsKxVfg`3B@M2rmydgSU2Q(EXP z+UMq^BZJt@KL74-TW{YQ@a|RPE8u8T|5p?7?xBDa#o*)zwh@J1R$uGObHzatPt~tf zetiFDu)PlCT7Ns({+3UdcZY4HM7mU5JP56LKs!-T@9?=maX|p~9kal2hQ`*8s#;;w z__JOIZU$ZlG%&xg%PBOp=h;^O_3+8#%_Y>$EHE?{4VmN%iLHb%iG;MwLCqqfos53g zq9%m1F)6V!1s_N4s3fiD-wMK6<+*1j^M+ub$4obzLK~d6{5-c+<$`m>y1tp&ZL++3 zazB``sn7Kr7l^944uh%vAA~C7pah8zP<+7R^Nh+*P5tahP~yZ+{Gv)6n7?K+i^)KE^vS^8g7JWE<)_uS2G5IK&iX$Z&*z)GoigD& z-1te`8is9?z$u9FG^JZ52!7~r(pS;TKVYe!TFa$zGRV-*-L=$9f5aqy8# zMDORT7R^YRq)n4z$?3?*XEhi`@){2D#Il6<_pyv2RK=>1CUcz;7Ki{OpvaQ}TcQ&1 zg#bRT!V7X8Ez{$4aj{=UZ_wFxXb=<}q=kU;Z^dq?GEnGrP-vi06rD5_tsd1787*{0 zhTXg;NWuaP4BS!*h6M2}V7eMmbnE?N!&IXy1Udg+eS zFLGN1MYg@wI3P07FYapqcCC~aZAmErdKSjWIwW?cow^C_b=uG}unMZW-k~X$hN!-e z0s{fT1|HGFQ~+hkvDUW#9@J~4!1@kngbQUaJ$DC2q6g@0*1LT{lM_$!&?j!Qq;L-a z7pjwC|Fd!Wwg9Y5oqpaYNM`|o>?J_~$PGKtx+_)+a_cEkl}$jPGjvoKjZj91SmqG! z#F|_pK{2aXz7ii~=vuvS1O5$6zJ-y8gqw5wdg9C2qV$vPynhnBkzdUnP&(w5{g@3x zjx_KhPG&hOc~b-Q&)ctYRiA;(Zr&4Tp<;o?bu#5Yj6p$5)p7~yBr>CUrCI7Fm$%=q zrE6(%dGloDYVnRNpit%3RLL7>WKRw#+c_qmmNUhiLk*{tFi^*M7AEjES3F1UThlbN zoPv#i?dw0@k#+A`r+E&CM5zvg{x8sv0rJEhexf4@8(*e}U$zGYgtP)dyi^9MeOHQ& zvYbl>m{nOJJ5d8f0130})>Wceu1-F4!Kmr3j?7n+ZJMuEP*}4s0d-uhk(7QAWGrVp z%R1J!>4|EqHz`p?#j9maQp2EaMj#iSEuFjkr|&eMS=21+hGJ+&_iThmXwM2hl6Ela zaw=us`1UerN~_buA%im7p1Ww&)YT7vfd4zgCPzcwfI!ajpe{#1?NHLIO3JBijGNiW zwXP*sR01}hTwyX~-EagNu(bopX=OPL6{gpX{$e*3 zNUO9B{Glo31<@#F0ZqBc{rj(U7neoV-p!2T2M0vxbYfc}nnk}4O>ZK4v4l*d*dDw> z1hs0WOqL)YRyvcKToE~RAWO^(pfScDc=MLey2rdKWhl}j@`mq&R7VA7be}}I00|`% z=~+YOg1HJqHaVqP9MJpToJ`)Vnorg&lK?=80CV{xit#lhs%cnhW;0a)`@;RGXLXu3hOu^kGqp4y^tA@kkna+-G$B5SxUb42? z1Y&^`|KTi~SFd1hlvRj-bfP%oO<~pICf!)l>0Z8G=U;~e+ zaA8;Gjxp{niUlJP<(lcAiD>7_3PD3!n;%2$9Cu`c=TMi4IqOYITSCHtIiclm899U| zv%cZu6<5xFLi^VVW|bb{%*ouN*MY+xr@~)KS|@%vX+8d_cLrbL-EB1^;vfeZzgB6o zbFFKdooj3p0}ao^h5hwA`_YZN@BoMvo2zJOHXiTZ9(0vybcO~%;*XYM+*A~bCLmmh|F6~uWKHjGWbtsL|cdChDQfXK{NSX z=?E<8{j$BH%`k?MhL^B|aDVj6^)O8>=yW!By{S>tYMUxy2pY*SIogz^F0;$)x}$-n zgO(E6<{uwb$3w-|VHNkLxuBDXBAc_}@0k-6Tud;`jWPN=X5rzK7rq5Ws2pp?o>y1J z>@Tdt$M5XC-M(Kws3E@+^FTX|@oh~m@}xoP73QdjcC1qzgQKb=#d^$(UytQLCyRl9 zT`%*_I>$}c^IVHsOOlmDPxamc!`IUOe{Um)h>&+c=Z*sfM5Fe>?x?4F%@SB-f)=Q+ zGJa-shkvX01w3)oj1~!r4BF+)cJ*5xHi>OP;NyWBW7i$M`Q?}SyLnG%{BWtZ%|zZ& z4!5GSYn7sWOzqxfk-+uclhaiJoK$oMiFePY(7@!9j^%R&hsM;+W!Bt0{)WN@K}!hC zUzc{h)f~iB?tk+Mk~{sNm>B3FB$0imME;Eq^Ebpu&bla3pTQ(o-#V1lvw9l8!8t|5 z5P8paBBpH8m*#l(ve~jRYodm+wzhdh&Nd}hJtfu-DVkav0AUM-95h0cj279>Ojpc% zgq0sECL7u}58JpRi2*^jcS`HroSnYJ8omX=vicM{Z()l}AX>m7^Nn;fWKO%NZ&@;m zmSYjp1r-ej%?Q<4lC2yymCH6wEML+Gkj|KYf0)TBn5jMN8e1aL$ZpCK{cfj5M8AR5&g!=iRrPlc`0Lv4dy;BL z9o{M=Mk+BuNC%agEHmC$FQRSMuOt(h2#KaCXUq^-^^MSIPX=}=DraxEjVVLIuvZ}j z#>R%GnG2QuljYqi z^eT(RTC+mK3MHbr0MNwB3ckuUzD>y1mb6Z2`oitT*FU;f^RS_0{2MULF8$J2Lc)zaAWb}G5BGU_N8}M;i$7+KjBpEo z^x^)xl|4P;Q3U=KY16%I1Nr1YI_Qc`bJ>Q_#*(b}>srEmjeqB?ONDg*O7W340nED# zNy*(6kmehRYEiS#JIb$@cHfmO1xD3I?buS`9Q65~B=EMDTKrS)Aw+BS;t{(b@B)ex z-5PSm#w~gwyfHM;PaefE1cq=}lT>cAr&#~Q?f*t9Gzlx)tWs%9+F!UiO^Y&QCPRAB zrV6X_W&a%bXgt$XYEU6088N|h)H}Mmey68WC#aH!6i}@#OipJ`X3y@7WGD4pSwKQxq>` ziECX#zsu~rf$IsIJua?Z=O(SH$t9}*jCkq;6rcfys}`%V*y)o&lR~TKW9e@4fsxhU zTOxRbC9}CMnT+b8z6;9-X5G-HI>n^vQNu$N)vC|MVy4&)c+nKjTu$4#m$!(zP}W0U zh@gew4(_-IZI%x&PM7vSN{d%;AGV7WAEjx^FE_U3-4Nug< z`$yXe;s3n=C)@hcPll(oF@KThY8fqYZy70y^IbyXpkc*qEBR@m=^-Ex%bNQ-Ap=wh zjqNz|`5`V4OuOsrI204dBj|Z&a|sJEz}WTAZ@!owIPS zTWugKvd<-4oNo7+HI0EOZww$M?2vpxZT$Tq<0v6Dl)pZ{{4Mv8{cAVx+ByEh(CmKE z{187PIfK~(Y=g40Yh3C(wT9kW&5@JPT^en|!b`N`J_~_P)Z};QT);7xL!O!aVoSyMLrA^>QhunVA&r~G= zpuGPxP3Lo;C(|0r^@{)N)(wIZ(N6A)jw47m6zAvtOz8o>L`iZAOo}NR0%su3`ox-7 zLvVNgp+GkcthU+TOnhDIWIGXRs~XG0RYxO5Oz>dvtKBe|w_Z;s=>j0cM8e5tmK~$w zzc7%A@M!YfFaI!k?UARvSnKMK7t#=$+S#R_puDVo-iZA+(>nX=+yq0YRde|$jX18( z-A)mHWBS5M^>p{B%{B`T_{9tmH9SF7q z$J-Y|#pP1%a?~&;ZmUh0suY!J>?;Fm6B|fvf|W@fkOWz++a}hqa*kqbW)}8#*@3d; z`f};sGY;A^y%yys7p`60o;Lu0j~{p(7liZpz4O?2-V7m$0nX0O7{7rw`y|^KP;^vn zVrW3uTBm-*tDd~PkB;~TZ)xO;=H9-OtGlwVSE|CVXaD_3X+`6n`?c(sr?l4VO{&BA z%e~x0qvGk~KjL@q(b?~h?$5!($ez3YpS*E`EW^(VqovfX?|M^0Pi60K=1Z2uZ>~Zg z8A^B0zI!_9zWml$e{rf4d0W$V9U`kLO~g%|slvj=OwTAaY8Ev0htvH#@Z^&A%^saC z6KO)xqJx?l~L0@Nxvl;N1gaS zU5@~_#v87BrcTS%7_<2%Z*fX-TcA{}T{KJFZoYMcELk|8w`=y!PTffLh#CFon}P#z zSv>$&C^0$4rT%(tnSyDTfN|Zf`QrNO=YX`6(polR6O@hq{k&Y`V;`Iat)j(?>66vg zc3I~g$}+#FukFwUAj?w*@K#<)VSR=W8mtPgI>*GsH}XLY@9Hvq`R5q>y0kRx^Nlen z$A5fj+8>Y$f`i@~R=PE`r18Hlc8@nzsHOehwK3LJb9eV?Da2}ZbxJp?m0_jcDEv%O z>$~fRUf4C1+0EKX+53}09cHp6XA~hGZKmVj43rIFmp0a-&dHwGic}2X6U-bbiwEjWZ9ucMO(sa56)6mrLQn=2Cs=$-OpL7{wE2bnuW42@huqB> zszWFY&{Rh}J!;$(j_OC8jG@C&i<*eAN-m@bTQCD?#3p4)g{G7;(Xa2Iir~Do7mC(T zH%dPf9rh&_PIoB>M}389*$6CF{QR@IZ6hnS4uN{ucGTm^Je_6aIBhmm&SxKZ!@Y>f zG2{~U-7`fWPp(NIX-=jMEs8x^w5`?Yr zWQyC{Ev`f?#-nbTKc%^U!`}{v?BBl=z1sM1nq{X%H2P>H?ghC_NPFuU1%MQJK#GuK z)qWMegz?Ig$puf=zqJ2#E|6D7 zemc+K>u1CyE2goUA*rR>E;?C?#4#nF8i=HN@^QOD@d)1ADGk# zUFfp*Us*Cp%RB~E7pT&9jZRdr$kl))F9J$X(L=bxOUc~;fuusMi>tH{5g&VlcAf61 zK;hS8-LDU~<4|iPRw7IrCAB%19}KaJQVC22!-t!V?(8b-skpMj#Vts`1w&h1%h{5A zD9}Zt^uGol7Uj?^Lsdt!MS>G#jMBS8Q2}(K@KVxYD_zdmSYa05rqN_5Q{+O`I~4K@ zHJ8Q2mGGd=z)XD>0aZvF{ zAwcZcZG7@doQh)x-%n2C9@8Itxm&lDdq=IV#RScRN|0hjoTI4tQr~h|H0eyC)c@4^ zDu5JijcE&c{HlZp8wsH@+I@&9-Za%A>NCF9>R{_NkLKP6s^*v@N7eK$uD;gq(6gGD z3FO2k9IDa)EK$tI>?#U2S{vv%bWO4RJ6RP#_Acp%#8QXKq+0%rIMx&h4wF7=aPAbTGrQDMxozB+K4XEU9w-Zu>oq{7i%H z-=u$$XURNwQ<)WX$>2hl4$?fI@;y^MKG)9Ad>>iw;G}FGp+-ZxBX&=5jCv)mCx|$Z zvMr5iGo!53!pUqVaEPpL_x9HZu>PTCnPja@IQ~!m=49qyV1%<^&~n~FgZ*D0J--f@ z6dPJq3l4RbHyuF;FeWJ)8ayJB(undwu$D0IZJd|+ns~KLq22}q{}hHLE({O_qDz*U4PA z?-|q)v%vV>*J=EV6u$^UWEw+4o@bEKSG50xzvGpP*-rZy&?;H zYKIAR^<&9gr7l0vszBl;vTT{XJ2*acLIhsr+z$HRBjRJ$ap3}6&HK*RaEpPxT9$A2 z)-!Cplc{u|&wDW1z{IgoNKOLFU#An}krA2PL^Ql`c=m#hz)=(SF2ohp6OaHh9yBr6 z^;PEC3a0OvP4p(8Z2si*oCv%8!2cD-eGiky{Bx!QJ$6O?eAPx37q0KuaJN<%jXHop zp}IQA;84oXyGM4qTJ7nZQ-e1i34R>jFYhJ}j7-vPNIK2gI_{4@#kXtNYcVkinjP_V z=1XvgQ&Y~L8x8Y$jA+}Bi6nWayW|?1T_P_t;ke)SM=4GY(k7_v!MC2;ydLO5We~y< zD=S)uhlkHe!Y>%r20fc3K9HNkNn2%_6EU>(-Mle<-xaF9`#fQ<#{yJI(ALsONrhy7zRY zY+zUGF=YODR5p_K#*PR_k!QDEbQ8&k4@&g-W5g)aUhF(8#a_>HFKX)PADpHjpSPt1 z-n#k`% zHrl>3nwLc=i}2lM5O_HF^?nKJzv}S+r}6bbt6}Dwof19K)1zoI%zRsJ@WJs!m_-4E zo!QPZdD*90#O6xq56^g;4^D;;<&{{jhBu#zod-H+7f>I95UoXgOav*5@V zj4Z4!4YGIf$b}qYW>z$Farx?crCJrW#Q$r$*p~Vv+IfjYF(_oavXBiTG5Vq;aneM3*fCM-$i*?j7;9$> zdUR?}I%VuXn!Td(;1G@TQrDsu650n^*&UIltC@sTS!F!A-*3UbVaoivl|vpja-ON& zgZ50Wg+t9N7^WIrd3-lnx;uNJ!AsQM$s$yEckxjD4L7eESTa4DzCzabSeK1FyC6d& zrVU>XNk}4KuLN;xR`Q%B$5L8h=ec}QL0;`(jKWS&m_->{X7}!`VLY?%_b>o{D zWTc_UE;=PpR`3625)Tn;1>8`<%%VsQz{i%8?cyJNTMs)g@V-iWG=PhoA!$y$(e)Pc zM1of& zmQqXTbo!}efCM1JC4)}KdRm2$rgSHrS0p~SXhste3*q>Lu0B^DH6+9@1RhIO5>>MC zqZ*A+*%1j^DnP_I8$FDK9K9|ox`@ zG9VBFqQH{k&nvhhLVes|L=oMxR(7^L-Ib~X8ME&+w%_*jNuO7LnYPCKX_S|^nypw9 z(qwZf1Lp>lOF&#uHCNOLhF=41b?U%8|Bbv=4Xf>pqlZ5s1-+ryX6HZIH3XH`y)=cpQdN8^ncZ|6C*E zu~eoV7$2z23s(i8rcn?>(TAJNl4Bc^1SMm68VWT~Dr^RTdFB!4!yIx`#$B5sxup>I zwoH5&Pt(M~_Bt{e0+Dm-Li^czucR7Jl?fkfn>jVW4xt-w!pcBTJPHW72-ta{dP1KFmln`b%ygC>d!UkyJX!c8TkIpsOC;&yp$_DB#pqM zQwHAx7u}Z#o6+?~{heDBlH7?M0%V~ow& z=CAO+bPV86!}TNYp^^ru_jON+#C&*v8ig9;9wR_Kai{88LhfuanMDfvuoNAqhb z{dah;k^Wu!ljo5%zCfJ)fL+ z8A#k|Y5d{z zT?N=YE^4NDDE1aSSpU(k-#E{))BNW%kC`Sbh!H-8{rn}ncB@iRk& z!BaV9FjDNWh~iEowO48|RH`p(fgDxkOXVZ@59-lUE%Fq{+puV&f37Jc)o67K0HH83 zso1vCL!%1RJ*4+=5?LdvhHHn(%5eR<_JOF_0EEEUVN__rpfcTd(KNS?;EnX&it|35 zweXd^c~sfT>A^UG5UvneQq<)D1tm>VF1q}rkFMu@W%*uoDNWJoxz0Qb(X~DU=H1Sw z0(P-L#SigxKS?k#6DaEBHk`jl_8fI(x3MHmqlFrRXu>ps5Hsv+xgr*}vlTxiUC);% z_yc*16>m9hCFvWn$QW(igtGxVP%>q~E|`5P0C|&gFdC@gmTMnbYM0S%{6O$E08(o; z8oGlXP?NwRmTSjP;U71f(f`h9K0Y^lpQThWhHynX@CXJeUsYCsTDo7ho9zV#P7GX*|V=XHSUHV-g%7Pej!bcTzYPl0m$#^4Q zCZ7lObq>d}ASSt8w5V#{ zx014UWBD2zS*ulecn}rEB$?+JfF(&?g*wtY1!21jhq}t*Z7z_OxWl#*BVDyvxCR%| z2OvUlkI;@)FOx?NGqdmJW}?GEK#l-VP?E9&oAIQ{_FhMeeUc(oaucRxvS7;at5|`u z4N1FVNJ5$IB*vUrT2K_)Ned85w<9YMKxEtpF}7wY8QEe&d}zs{Kjqw*4}3NphFx^! zEgV(Nr5iIGkfQ9EjTE99bUr0FP1Mr?ivZj7>5f%Ry2aId?~rm0r_cPxm5P%litahS z&CX?K+&13Xbu~oFbDE~bCxd<6?Oi@g-LV!Gj09p%P|6^IB)IcgsrA$7# zs0N(NAGE-r-p(~XyP$*sbPLNUFlQ-KxY}TZ|LErwl_PRkQ z!ZyREPA?ZoxpBY;)<73PP+`L7xVY48C0(WjfTn(M)nRPZ-`A8gpbG|^&tHUSMg!e^ z1}bsTVQym_q)IOZDrqmNz*1yZyi4xAAMJCNO?>a%-4_p4p(i2UB>E@Wd zv&+-}0S>v&TG_|b?;d+Mtrvd}>+8WIc^>)Z@vL#RBV+1hbKu?f<4&zqd`vkNgPxd7U2=@R|ZWLOh7aWM?$+xh(QB zEi~TN!tCftCU5iqhT zQ9WkqMd1h}L4_gwC1#9GY=a*b=wppgxX*UJZwzk?<7lq(3|828^=99^61p2Wcn17)DQ5-i6YrM z!VCBExv^Wjol@`8rW*F8ITk?#M*pCeVQRFGO zeS5o?__LculQsxd4C=ye_BGL}Qa@`BL4kP=5#iZZWPjRcW^?w5eVzT8s!s3nfEqe# zIb`mxeCg|E+quJQ=I<3({#H}()g5em4!_-&)`#}f~{Y{l;Zg@UxmJAR`JxU_4jwJh-HFyB#!|@W>7QFjv^l((bQ3Z>8}KSLL4xBuMwKyo;$gt+OKq;c@lW?E}l$ z&_MW-3H7OOv^g|KIu*t7{s`F-^H|>}qeR#{Bbo}wS|>7UEf7o>-Z|dU<(BwS zxP74S0c%wy?7Qdhx^lWPXfJOm_n7>+>h95uhH*xB8oUhtq2O?5%*%D!K9HSvU1sCI zdmu`{O2Wb+injE;`XbNMES$R4H@<)goSR3YU$Zx~5@R--@Gs&kE^Xg<`c35<2s2VU z)eVfA+W(dOL>gSH5NWrix0*eHc?P#Axj9p4&tVe9G=c*cBZmU)LLybo{FQH6D_Dp` zAqq_H5)6KskE2J#w7voE=fEmup5z z#o0Med%O+PTu*ivU8AZC4)&jUg2rg)n5(WV%cZ?NizK7}7Wn((fSCragc~+NZ8S+V zXd0zaI+oMa6^Du+=lBHbIj*ObZwlIf<@o^?;b_`c38-*JfMC==v?7rKG9c8KA=l*( z8Ph;}X`VO<9&8A>W)a%JUMYA2X)lj<5p|SAG~z$lqNKHa(C6xS9jtLG=VhXS62>Q0 zb7o{AyfpLe?%TG=P=klyEd$x@luvL~s9B1XzyQ*M_(Q8WiAWSllTsMbNde&;CsD(5R zyu7@!xR%9@3NNXlwT$ypp??;n$^j7=#&)qo;x5jB8`^pR(J7Dp4|UxC=G z`$eLLv!tOaP?X=h-n{uO<+r6dQj94AN|2A;!rj#7s#ik9(F$={l3PX&Z3KXY$|JkK z*f8ZY6{Xo{n{jP*=5?koLmK(XT(?Sh)*2y>p96W709ZN71>_V7I}8@#)3H#As-TKv zdWC%G0xm?CLy0~PK8T26kplbmH1KA4| z?40m%`P%@eYRaTUQ$bnQqR}>pB`%1T)QUQy(3ln=CaRTDSjkf^UI7jrr8YbiK7uV2 z3P;yg{X9NGWOAK|n65edJxIn0Oum2IF6KBD)CJFlo~<7N$m)xdg=Awd&t}hQshYq! z5JnyV#57R7@Fk%{0c@0*K}=i)v$W7ExVDQFC*pWLR)(usSKwCQa>ZVHG~6J`k$+kw zw#izaF;_=?43S;>5OwdVNmJ(BaCo#m@mleU9a- zvdS@IH@B!d*j~ntqXJVnetT}>i`6FDvQw%0@yB-FBCSC)E<9-hs|s=7jh$a`V0w`6 z0puvQBQn;87x1yN=48sXQpuj$4I{!oWB6f(OwJA0OnE$Sxybd$6)MnT+O|^7vW1R^ zn2W4kt?XyH>eIJA^&CZ$;3l(hr$uoY(N?d}GaQ5MLD;8lB3gE}+8MuC!@X~tRWW-L z7l&oYd@?&~dbvllCD2pMKWpo454ZgDp5*bup1!kmn`hPwj&H;ZhMsv;eoZZTd{(iJ zEDJQ3h2S)m8~Eg=>G$-lTnY)@q4?zh&zaeNhd!RtGn91?TVHkkD<8zKc%Z(G?b+A4 z1a~9$24>MHu5Yx`Lz8%-ur- zcgPoqY8em^r_Zhx6|{q@P%dOBDFMgUwj zp{}UX_O?4xwb`!gVLu;6e}^VfYx3F&Oiba;Ei;~=Q;6CWm>{=*m;wcZrYH2+YN0?x z%bxRk?ZH|$?+0H4P43=72D8dV3Y3Q7-SruzkaNtab5XYY*obR~){IU`OHmxPR(_pf zgsaHbo9z#C4n8~dFgQ7c5CT+8-|YPG(Gmv4idx2GhNmUI_0OySrWauBVBLQc~Vm|Ii21UR#v#ykQt=@;Z<_>i+dHzSy}Sy z+u5se{tK-w^_|c+Sv>r0MW{2YsR^uxON7ed##LfI-Mo1wU&u8X2U!?sLSw8Q8Mvas z;|xm1-=E`OYO591)a1$4$+Ve31{?0fENc|V;!g}tT^RL^sOYPRdURn1KVEc@Q-qjaF}DATo9EN(uGEtQHaevB$4Xs zrOJO4^$ZtH-?*qRJAr_qokK(nao5vrOCIk#X)A4RkZ)Idwy@nVI6OW!+2TdPK95gn za|Ru^dB!QUn>?@hNf^$_Ft01^<_INEr);b=G)I4XaT$uI;+Dv@ItYgge$bNdHkTJo*Yod-K*KV}nAmGU63k~8_y(S_V z=8-WqO=wsAnAfQ0AgA&T!+W3jBbRiF@7%uo8>014xgg_9%Wd^J;M%^yE%P1M7$ z4VE28OPiFyjbf2!8A(-*y1~DfIc&m9H{@WHB&&mEFa@~5D>bmk)Y*<2Uw9KRLS%WL z^rf?KF;U1jRG+hhGh4o~gZpI;OEHt(d63SMmegXu#|1kQB~ys50yO{$fMRSO?d_dy z`6o6QNaEZpJ!2M!DnD~4(4!iwo^|4`^Rcd`CY}W`vM{2pjThuxSv9hg z-^rI5!RUJB#7w=fn6FwjS%7v6Mc}|Y5LePRIMAuBbHIYsn|LG!rv6zGMWR;oD*L~O zH8{VY%jb|hU1HxH9_=+_3=4^i7K!|OOLNQrD-_V zSUCE;&BDO}Z0XKXFP<_|MLW?#vJz9yIx{x)+Zt}g&u&w0NRuz?)idJs`&V+LjT)lB z8=KF27#Seig%2o)zn{XDZ;TXaNF9<7u0mm2P0ZRu8i9@2jsx@N;}x4f><9-DPwGGm z5Y-apb^8-W9E+-56=8;3hw8-`!W0Y8Hj-h8u?%CKR7M*Pq6}2Gu^0$4V^o$)pjD_u zOl+>HLIV$xLM2zGuBr)y@}5{Mb-|Wc9Jqg*o=T(!h%n-tC!|`2h%yADhf%5yh$*{6 zRj>6rL;r%6BTn`aEk9OflC`3eZ8}y(6nH1?Md?N8y#u-8Vo)i?nvr0!JA^p#Q6Kex zYeE77Me(f@$k71BRd#MQrjw|oAz+rVFfovQXaG6~N*V5rmOkg};$U;AT&`ysT{_3& zS}DlcZ65R!zHM1urM=(VlBoRnIDyubaWDl)nCR2JWAXC$1}KnBR*Z2aI&L2f7D)n%EmLyb07?cpFsw%|Jeg~W2kVv1eZFLh># zz++Hoe?|J`Aeg4r!y=-)FuLUt`|)yp=t8HWS5p7u1jzF~#u{gEzuf6f@DFrcp1T{E zxh&v={y8NdYcTI|z*Hojvz`Z*J`+y&fH(gU0sX(uIjmtAl0V=}Hh(d0eM$Pgy&6g7 z^8BkPL%ylXJKqhzbJxqIId1_Jd3fb-ih>h&UL1Mx5J{x@toS1;sQ{o>zqgeEYC zOUv!oC#lOMeO}9a_Uz;8Zl<=(e7Ec#KNsCn)3|EM_(n#{Fpw80Tna5Qtt;C;n7Cpp zO`DQP(+$sK=dz<;|EyoreI~_b8rMj(_{VV%BRMV5(<5X4rKse05dQMGaP9Pa<(v}w zLjC$=wapWaf~j7Bifv39+Q%ClakO5(8yPXzqT63JHBY$ z`k-(6m9^|?7!kX)CHY&1ut%n#Osc#`teo62h^^&E&EAlAPYgD)2a|r?_qv|%O|@~c zf{^z%tr@U>+*17LOGtnyK`22;+jtm7%YfW(Pq+O3P6(6gr?&d?a^6pSIC7wrHvRHe z%;(wVb~C5<%1c{1k2lT!YzML1K+e6X9Da8#Ak>PNsp1rDSc0;SY@2$)slI7}J`L;2yIo=^yFEHrW^(nK z#DcSBMeG?i2Z%m|lDqR_ut-R4$sN?_=4hD4<}_cr7n-t-BP2K9d=g7im6 zAeir!+`o@tlhptyE#&gY=VBOvxel0HoUYLZ3RIa|8tiG1(KEV17km+xBE&R}o( z=8R~rc*er<@keMu{M)=2bKCaMfI?Qs#5okfzULn)J4)w=y@LF-PMpFNpN9@jLd=Kz zx+&j@g-8Ak`;Qq2I--Y=MV4mZ3y+URgAbt`C&JHj+*ofN)_gscJ*zCx&{h?*IT0AV zNxRqn(gM~PNqeLjH>DAA>jCS_@{Zoy;HHQLm3%a8BgH;Sa1C?Wm3-rA{2yuKbE9R= zlpNllDP!eO1jT-T7aUgG%GvF64xazpRrxuQcfI0ky}axbniT)e>Hl$#h|!=HlPB0e zDwUECCH9@g?Ks?L&md=iRIayF=>yGuvpuh4(G3w?YL`zkOdUnp!vvz7BkoGYMNbAx zG?_IYlipI94`1(4F8aRaw0;2LBS4*DfQ_6%QbwqxK^1~F4>(qExSAQGL=A6>!=tbcD5lg_$ z4Q{@(_vbHW76mjx90aq>UEw!AU&m8(fkplx?rRABm#nVSHGvtQ*7&5TKXwPuujdm2 zNSlucG926+DY&~dw!a4+$YOF=_-V}*Ckp8_UY5WywUEF@G&Fg0lFlqgMwWDWj zbMhubfh^6;$!>%!TyAA29%*DjqTt|y-UUpw6p?3`=$MP=$870XI@o7Oy_9~lYEWuI zf+OQxnScx9jQVkcx~4D)3P4)r%9`2Sid6Ens^;eB=Qq@RfSM+(zw4jv$PC|_yA!KP zNDv^QO(l!of9{ompXU6lQpdP*+gOaE-t`UG`etY0 zY3E|+kaGb|ork9^r_Xm?YHga&d&i63>r>Z!yX`m5b6LW>RXhAAPQ3Mv6BfDi<03>O zVB<^go-@;2)q1GG36|a)+$d1Pmiq=~V1LU`MFrrQlE#ahG!K?J=pzoeaS~|g+nzzmG}^J_l;vj8V{?NXh+Sn<`$LE_$I zG*rdWs!$8N?6ML6BJ(O7B2*&;wT17*YU(}Tj6g^}hK_1L*)q#)zJ zlSrRyWnO-qjIf~9sTU2%ltXwh zsK#;=$<=27bG;A^!+D%WWQpNea7TtOjD(1li1CaD8Wf^`zrh7mV1I7ZTX$n1IVX|K zg(8X+m;yz?2xmKE?tFu^6{@F<_BZT8K%uG7n zvGoyO)e1N7k74b{zbW?)=Xu6JDFW3aV2_lu3f2fL zuZT6-;b0%h-d;Fn^s;)A3-PR;3-2c0<~=r6yjh-BVB^2F6Q~Ck{&cnkm`~wJ@d#rfrlBQkW{4bl-A&kEyGdQY__D~e_3QRdbola5v4$o z6TH1cz~#Mt?~m}q?ygVO(UwG_*gwwrMPi{<=2zuU1}lvd>5HU`0znpmoIlX#^I zSOCewLgp25T%D<;MTH6-@z8Lkoe?{w#yBvM7)}SXMX8t|)zU+<0cI_@JQAtA}nG3fwl`sSeRqWi_|vJW2k z`$YV$O|#WX|6-N4JFwZzM2D0M#HqElHDd2c0FQ5IXprYRAARB^*P*pC8%W0Bs^ae> zOpxY%k_x_6`J|#}4++=d&D#`{x(@L&Ek*vDaVza1@`0htty1M;L7l z7OEK&E2v*Gex#@VWng~0H7&kX_`!|R)iYc_>BylVI2QAs)3bZf;}yld4_~IkVE-up zz@qe7g^qEOM4xAL5J`CZD#bh%^u6vrI-~WhfxDrTiusDnUNYz)H79H9zf%{Nhm=ER z{on#$p0V2u_@@=c%|hO5v}H#F(ig7e>PwP|r`*vfH~{&8{Hzd~{#C0Ax0Pw`#N2L@ zO}(-WVF}g}TZ@{CiVA^BStj9=jUd(H5ygHDWY3?0G3b^Cn?ow;;bAbpN9N5Bq;X== z%jVr6DH>LL@SI$4m9^~gs5p`pbaiMq{3 z%)*o6jBJ1`wk#M^`iPfqdyrMn_lei%X+l1e;oNMEnEL+qzTT)bCHzDzJ~ygP3R+ey zzELAJ$JE6oJ-CgRDYAPwZ7F+ao86q(EAPLtJ^lS%?n<*lD~RB5ihf%1`)68|%?aHT z9UXGJ#^FMnfq1Gp!_ z4JAbQqeQlYsPZIZePSLw;bO~9Zxf;#k}V670#|m1>W3K%McxMr#4X&BgzR62ZNE)D z8@8Pj{)CbRvcw&CvTEajqJ&i`2^eKyhBCCWKw6k@hOqYEH;VZ3111)CrdlT5rgpD% zh%GtOSccyE+iLy%EuR*+@wbIEIg!6TjvL}Dc)JB%FVc=82(nDOb!^Kr%vJ|#gwoC% zF6s)e8&gW5WoaC2z_x}DX-SV_1TKXqjpz^Xar!Z6ea>bKF`n3hbJcmfH7^E zDgaA>45LUYDK>T<*LYD|Mm7=|d`mDt_E?6`m)A7U_vYB^*_a>r*a;3J$Nzg~t1v(K zNq*WeuvhKN%k*8n1j@*@Ez4wSMynu^B6_!Z7T6#%Iq&k`wDl}(&+HR%IlYCLeZt@S z3p_dzg}ZjoQ+^9>xCSgP8=p_TRAn+03!mnWTs`vpG;qvV?mLJ6Hc~-08;E<_{NXHS zjCQ@}gDCcvT)nZN3yzW-XJyDte&CL_!5;tS1 zl{hNsQm9+*bRq6bPSY!OjQmx7wdh9)8{Ava(-LmN*?q9pj+%Y4Ba7E)WM-4?L7$Ja z;_O=xrMA)1t{5R&Y(LDkVt~t^#-Ug;>-S}sD&j5vPSac;iLhwSM55RLEe=ulp$o;bC?x+v77l9&2FdSc2dv zkP#L&s347yS)2L4topqCh6S_q4-dfwHNVj!2O$!K^@jC@A=}I=U2?!qD}i8w^TmrG z1gT4Li4i5kNLyR0oB7X?UPM9UGAVbb()resRG8B8PEc%+WnZ)~Sjr@WT;}|`^n;r3 zBoTw?dvOLKsr<5}^%w6By6?I%AhBcjOy7Sl|@Ujm&xgasTd3G(2PD{AK4aFRKM>&1& z$6LO;)_1)8jU;fBkkV$qU;`(Q>T~eu&55qa?tTq3;%krSzV(xVR8wJ7T49l74$R-5 zM&modtne`RGPJhheS=cTxHeKyfm6fwpMhy?U(`BXPxxp0=2J6C*W0!2hdL``((#RJ ztqO3r4bDK8@`Cd%a+R^0r@09Yp}`*9qLXxsX4hlAIeXKPt1gcSS|Ft$vI~ta>NeRQ z#J7r;*-iVf7O{3nQR>$E^}%(&UKG%Viun*E-fPPr*tOAm9oGJ@+(;)1ZZUwC$Z!-P zuftyu78aKLoRqZmKktQ}JMEq!U{w~t3IU#((}eZh0UtRSAAh_3q#6LOV=lZeJSV27 zT@y|I!IO8#^p7k;Vqjy?K4SA&7 zH#3FQJ=<9=r~4%N8xr;*Ql@Z)MX-0}%AdfKAMtIh#Z#SQqX*erqrdb_+FfY?B!s4kn{G%f1_Vq|OIMqMR%?g_pb7erQd1}omGA39MuI_FwOb&Vb7 znY-<%l8%2olDPb7joS7?bXd4l7(UO;+;mGm>&#NT0M3V{>)1jt?ZRdA=J?&u?&-Xx zDK`6Schp7U$&t?iTa4!6P=}^#Z~Df-*^OyW*i3#qecAnY&L3OE1H*YfURxd)^?hP| zUv7dn6kiWs8-$;E9?rBzuUoQ)Xyf-e?q-bzu4eSFC*euAM|uPXR(d}4pVrpy**JK5 zFZVHRYO_1uk3OWf)gu2t7hq6#z2xJw&C6%~?z0N7Hvac4bOvLKwUMXeo<|=2Y@X4w zZza5}T#8wGi@H6(?&v-7J;q0u_E;zB`WvS051k^8y3bCRJ)k{3Yx$jljY za%VI|(hwadLAwf+`G{=Q6ixCw95Ml*mRx0m-r&MetA*GoM1}bw6hR5qfv<=>S_VzG z?3H9f+WhqaZdo4aGc&jjNR1p|Y32B?NNbp@4N}Id;SjOJmhuF_9H0?0 zDv+gaEhROd8VKF5Z&4eZMu5tIo3l3t)4js0i4*xYk86S`vt~83S{Q%ZcxK5TRCm;0 zI0ny!^jYvbfj?$0(bt_%TmDzr{V{#EH7&29MO~dm{{+FOa#lEI&Sg1_` zWNf5h`9wir0_9MxR()p0nS-`00$gPS+(=lS1@jhkf>8KnJjy%5y)u{q{+0N!! zOyA91e9YU{m;UP%H_-#g5EQbQg^AXb@&Ed6o?c*d@xt(T8vb9FUlM|{#Z) zmPvC{zxt-+^8^FWK(Puzbq$8|7+F!yF4^?xCj23;F&V{zNP2Q3r73Uladhv@(HU8A zf$PS5@jQ$yY{bT_%a|zjj{FKt#KzXT%)}2Vjp87fh$Z&^=C#IY#Uolz`&Ycs-+@pJ z`>sTyb*jD2FZM(o(Ud`rBzTcKe}ZL;&GysMY|%$p<>(S;3njH+?kT^bP<>G^n{pzz z_`cXz%_!5$N_OVn&$Pl|)G^guL(c>$R-3^-2&E`081ht{xBNTt7-1p6KojA492H1P zi7;$ifARAd3 zDrztfL%tuJzdRF;f&>_X%=`{fvfPiGATK2pt>lbb*Cf-2DwIAVM!m|^(}3P|f71UN{{7hOB5Rqn(1zQo=u>Qz{x zz_^f^S)MtK0m$!Ld{9ARAQTx)X#0EwV)1eC4k!_AnEPie9cb$@OJr$ zn8UAbv*ea*YP3xZW@bEH`+Qr(IDOZ6)9AMD;^wb?&iB6py9d-K-Pa|5JV#ur^&N+B zp;KF@CPu|GU!MjdZ9XV%HXF0=Y^x^I6KGt zP3j9+?|K~=@lzhes^Vz?y0N}E`PYe0U!S)4^gRJ;^9Sea zr)vwoYJ|u@-8}=K&@N}IL;r;4)7Z_EEBB(KqU;Z6$j>`(6zi~<$@%~g3l~m}DJ#0i`Jm=5FTKipXtwJxV|Mm-4LYS1X?XPwu{|VU7 z7D)4Y+|n8w8%rp|ESu}b)pWe4+aC#ELUatIj?RA&ZhpLPI@E3!sken$L&HgTO&r;e zS}7S?JEsp@MO+Rhblw*99Bh-4?A&eew|$_!le@0hC43FMP#1KIjmxkxych!Z0}9}u z_P&jP@im7+y2M*apSPGGbB?#j7P<-EGbN7U3=4(=QcZRi0tH?UHcfZ-efyHVUshOW zyx%)4%QZO zCtTa88o*pIy1rx`R_qS*-4xF4(JEffIvDt*Y4941$MgK(cqgwNTF(1ln`Rr*i5#_R zijA`xF#u9qh@|y*g>E5T{x2^f<|eQX&;4;Il$iSw`CgA2&EZPmi_k<%&$}^+*J4TS z!T9UL$LqIK?Uj+uo-qQAoR)?yrW?i=1yQdLt;@HU)Suw227`2{+2U7J%M2 zm24?9M_N|4xrb5q(G?&2f!2hgTTlaJ@kWHPmyQ5B$(a4@oTR*EaF~=IpjAk_AU60_ zLd8^J$vy!Qgje{>&g+uciAu)kYrKW3k#6USOl%|3pJX&c3}Am(SL6qpC6xi}0f?o) z{DC>H2T7z5%G0=33TEyY6Eve@%=y~4&lX`yfeyy)LI$gRT!<8Brsz#btq@~=yMPs2 z^?-DV8}5dgug29GwgBX%d~A0sFEMMcvTEj^jcWUszmR>l(p=H6ot#FTGgIE4U7k?S zJuxsRsfPScsT=G>SqQ=yCQB6gY=`s6^Efx)Dgd=qu=NB}#+G~FZXMif6CC=#nQYXT zt7GoH2PP~fpq&ExDgYqiAW%>H0-9APa@9?dE2@_W6ClI`TK?$W87nS(qF+4GYjU^Q z2TdrTdg7x5DqxU}9zt&5T6BCbGgB6t9p%__?CqlGVuPDfjPWT^vuC7y6dd0!@-i>m z(Q-uHL5WjzMZ_+-ONvtiPcvfOZ(}9?o>SH8d5Z-fXcKqXES&K~qvYSh8Ku154c^R{{JYnEM>NE){%vloZnv_gr zUYQ7WI12~IdeCyPyS@^t7Iq-)vv=wpN|*n-a}QWVQFQj33@tptbYL$|Yk%L3t^hYD z*%h)DLF1RCze%F>GhvN2oK#v{^f4mtgJYkYq{o!FqVG6zISk`jV#M)dU@1d)Qq?n+ z6mzR{%PQfI%<^aCFw&!&a#52dOyB;@Ze~3sEiM*7uI8$K!M5BfrgDNssxH*1{6;H- z*xmQQ*=>I5ExIL+E<)am%LIdH$(=bdy1^0r3*#f9oUCxvUoql*F>Xx&Iay0TYcDeHPJlH+? z0C_>>Z?6KE5X*$H;3dC~mt%DDwOs}0kMZZSMYz)BO_zfvc?*&cNM^w+OthTNsOh)w zp3AM>+$oIvozpxbq!~T0FMr~_vW{$phg&{g^+Mh+r94{hbcyAjK85^csPTI7B=R08 zdgS*VjH0B5+gMpTD$jkI+q@aW0@&=QF6VE|AF*%THYSIZ%h8#c4+&(TX_V0&+k9;6fgtslj8`qXI!mNlE>C!9UUIm(vC!0XMtR zCkQbRz^+(nw8T1DtU6J6mx-f70HRGMk9`0ZjRG)9vmYKF2Kt{M!`4AU29Y+cBAqse zz@f|s1ao|PG_P#MkC+UNx4;X#%TeJ2Ti@I7iQj7sCC?a`d-#4iOxIot6YZJXW*ST( zE9*K!1BEx~G7of^J&l!+?AG``6YJ^e8V~F%*1w5wvg$#Q z{3Lxu4XI#l#>rBbe3Vpy!KwaPJ9iSF{&=ks(R2t7a| zLSzCoSz*oQQfD6+G14l#U>nH5-NS z>+R`cx8^L^hNJgy%7a*hV+fx6(Uh8%ZvMk7y6no`Y&!TivqipU7^+|p6WL|#oaBF2 z^vrl{fsj<-C{g3cU-TT^zui`)f52PU1}wU;SXj@DeJ-K47sDP3ngp-NDTRrG;r)$a zz0zu#lVG%X^O9~gA#-u8v=H*v!=@2c=ChpmO@mT~3|K6=1~BRJSuF{_N%Cu_-8!UU z8mMQeE+fdpoZwp*N3gMxx_e<#zDHeg)F#2#gFpFs%#noc#JIN$;xeU_u-iVoOs&_D5h~)1ES}%A z-)f}$dClInXPj!;j?5jv{FGU>W|{fX6uJwpa$a*Qn4cFh zlZeGfHj#o!D+&Ae(KCONlEXxwkg1ltRdKFbB&xe)GgjvLv10NYHaUJv`&wCN9JlCK z=jh0yxftE*bSn3&#j3Adbb(f# z_NlBiIt)pXT(C;hFNdOEqoW9B#pbCs%rl7MJ5-|d!CWDy#+sE;LtqQvOifc$>6u{? zSdek0p(t+sYjb;YV`pUJ&lxFnLVp+|U?gUNAjyDy15ldjycAjX`U=etud3L*$o;ft zmFNk|VL^yW*VK}VDo#W?bQmfWQN3|+M+A|^FPJD|{<w>&3<{~vfGLjFR0){YflvmA2_OOQ7+sUvdp2U- zRA(WFRtH5TfLZAdZR_GsN5?xk|Wte(6t1 zd2tJ5j1Y;!CGKVlimJ%FdOEwMM4Nzq5CcQN1BvK6F%a780&Qyu` z!B~l0EvWf;bR-E1^f0|nGXUnxFtD&qjayFuesij#D#u^qKp>`WYxL2PWpQyapgffi^yM#=s%5o5*7-uXt&QB~KS^;}8|_9DS88+mwyKcH6ETOv1{iq(9ryQ9E7>Oy2adY zer9GRC03AhfA6hfDsjD0;HoTFqVs}iVyMY)d`CTRyW&M1jeT1A)bAK8mYdNqk;OlH zSTTLQLm6XtSj7ALUh!tML#VF`@bPF?!x`)AC{3`;8@TDb%%b5*sWiEa?r5vtEO=xJ zf9_>rR620q?jyZ9PQwf)^o;;yWB3-4ajvS1J@$3{ZEXI9=j80FmO9T5jQgHv(B}K- zrsLoDy-NM{f%1ypIUO2|*Zg9FLHY!=xSEGY+N0vjW!@#AoD4uJl3WFMqO* zD5KMbMi0YM9#`l?{GlHl4VJiEkoSbQ|4kZD>UBT*S@OGQ3=FHMZiiq$O>?IS;mx;# zs_=7KHAONY=%CTnL&GV1>-t+%Kik0Cs5eI8%*hjNlLb#&G@`5mP!ouVh^WwB1i{Hj zDesfb=Rd_#(olIifbhPVGM%1@m~f?A5o0bMR8SH&0U*X1M-p6x*B_3?j{J#{c2dDZ zVdTUG@FWz|$TL>AS%ham+~ddVI)ya|yUwYf%rDID^L+4j2>dA_G5qUUjjC_{;6YYprB1~tv|*gufM)NPEz#XjI~_5Dym^8cb} z3{wQ6dAfSV(Ce!JFgXDLSUaFc1+JflhK2}T@PT`x95ukv05)rWG|A;DY2-85ME2bX zB&sklFrZ5+^P6-e58MB_*D;8+^yC5!pc{W#Gw8L?yq~&TAG5W^W7`c>81SEJtgJQ| zIX<>nr~QIse94NGBx#13%22_S@5=zGAGgScsp-*>Yh5@^wxXeY(J@JvLQqP zW;O&-a6kjV)7;<=G}^ck^2l(2n$A zY_(JwgNU-)82Uer4}KqIwg~2N8b~~({8H36MsRDa+Yg;S| zHl8{u+Nc7KXfC&5rQi2@>SO4HUrUWCQ0z0&JrNj9L0F+`&z)J|<2VCy+`+I<%II__ zvgE3ZEEKcq?!v=-hBI)DwHH{p9m*vaSjITFXCuZeB9)a(emuj~@Vr}YTQ6I%P(8Gl zV*b0ph$Ipz%p3(8vnn((-;e`IgqAE0Zg1JcK*R!l3=U{D!zy5^DU}c#alxjx&_N=y zqO>q^ly-%UKSSXPp$bIl0>fb0;t!Q#C1Qg_#J3uWOiW_y&S1(!EaEnVkIcx9v|+%O zHT039sg?R7`{5Y87X)GGkilBLp6iM8N1OdJH3O@h+F<0i%2 zEkH#J$Qcd*Y6Q$e0IO-61GI2SxMb!MSAdjiIs*b|)dPze}DA z-0=zfymHX^UVqEp;OXs-68tS!oki%a0W+@I?HJlu#qBWgmj_AKPrq8ZOzleeKmU!3 zg4!RitjSxgWxP_J~c>jn*vh5sBzYY`58KNcUepj`;=djz=_m>{D?ny3kfu(F%9bZ|_v-+*|58BZ%@NkGOb7Y-AfMLf&lxPSj+|G^1TxIk>F%D{k5~Z6_(QrL0^NYQu zZLF1h*CLCO0MlD@xcR|9g0{C2f@wHm1)y|$zj{fG{|fn~d5IntsLI5KioPmK9q~!9 zck$tFSfH6~}P& zeiMbwerm6HWnzIc>`e{KMzvABWRbn#Q}+n9K5zw?eD3wktSCxLVA=o&2bpiuBB)PB zDO%JM5o@qiBo7JmXk=@UhpJU;mzsbXPEnLh0BU=izbu7hY2XIW)w6pQZ~Nb_fI~Hr zapjy3fcsBRv-9$f4n^Rt!9m}75La;qivi+qbpO+6XvS>2c(J~K$TG!6gazuIJCCfV zC-)$9Dp@gb2%s|6xX2SdAB{ACUW6UJyXCH@jUf2V?L^20zSj75Z^L|7{oqMckpISY z*m;iFwauYp{68@%^Q`*o!bFRzltJhHNelWz=foFvQ*)DHwDl*xO=ycM+4vy!3ZU-* zW)bsNT*`9fML;k8zs`dUNH_S%PC|rEOZ@-CdXb=Tf~}F3RRJLxX;-H+2ym98f#MSYbHDvi<^!P(F{-P8c^itF#LMSl>0>yEZM;wk8?8Lf-z+n zV-Yi#$>vY^G3Ba@Pen)Z9=pZc$jV^jvjcKfW3dsL7JBoh1Iyu~6M3M#%IH8*2Ot6I z9mGqMAR40+rrxhnaEWzeH=n9Jjz9I%Mmb=IRme1o79qYuC{tJr#Y-Kc524eft0Uhn zdys>SL;aHdaU(*Y5$?TdaZ%voITRC%PdIAi^iKw3X)}spw)_qPu)ro3v5aq)Im0FN9fu!9KN?oEPqwY4;m=CUMeypmbf}pd4Gw3YncCjzfeO- zYeGZd1j_XW!PSssd<{b+M?lG^wmZg5htq<>2nap9n>l);>;(lTA3f+IdfI|xiMchp z6O(SXDIiidzAnwKWhExuh$@rheLO2(C;4vde7xxU^p?>GkZ5*AZ}Y3~Ao0)n(8`dO zXK1+M@Cdqb9|m)$@m|wzemt}0dv*`MPj61sxTtO}ghT>!CN{u)`*gh{rLK;(`zGt6 zwzhVLi|9WKt_+~$0IKoi!5*CI6E-%se%p_G+Xn9!7hpEl-r#j;QKc-`#66Xkkzs6Q z6=^&YZGU^HtdK1*3YcW9J`s=HGO$0rz2U`aYJbp0}tFt#St*xH(u6zTPpbJe>IIO+ZUCh$^V?snaeoL8v? z)*26I2cQ?0d# zX_=Je=D{-H`&{$>ilgUyO73%=wElTje{3|_oE@U%C7sm!oX644N$B9YPQE$|;Q z@(-=Vp3f*Ytkam`K$+qNimUO(#l;K(WFGSk=<9d0>h>)95WA0EN)~3U@-WRtbrs&^{c2}=2mA^6K|p7`ZweJp_Gwmweq8Z)LZY&krWdv z+ou3fDWc0t_qHTa=@9vP2R_M2#_-e~EIMJs%ih#(D~|TpUs;(aN9|@C(N8j765tj#FcCy9aLSe{QF!0=;YTjH3`mzZ=wa5?V()i!e>_5)dQ<#^ptau3uzh|0v;=6CXj#`nbaD_FK-*>NvCG z`y6%o)p8_4xuNdm1LIYwi+#e9txsr!wzf`NBmVYCaNw-;)9vOHT!Q0A(B@;#2V~FF zAK!uN1EEQCeXkl$bl=28H-RE{n-nrOA=m| z%b?v6Mf<%y#hafWD|n9$A2$z=Q`|W_Io_wA^?mC$BA?H9Xj&bG2bSK?H_iHl_ei~8 zTAp&=JBU%c-#Op+>6z``cK9_=2VvV=M@ZlAYi_iAuGU_sTwPo5lk~TRpT-og(hgdD zB%sedqcA)0AACN14<5Q5e-U=nTt@XgaJxMn5jST?BBCNA3-tZPL0N6yeY^~Mla@YR z7u=bx!XjBucdX|&@?j}X%aVoq{;NIUkTF$hF{q(W-0l+$%nOerMJx_bG(tq7>V*9V zIv8m+`l1{O#-X0CFLsDlK?-l~XZ~p-BxYu;6KVmPNkaF!*i}rJxwu`*Ev~(mu~9Mp z?a#<)F4}j#)CgN&0;x7JliQ*~Dq)wqJl4~UOhHy08!J&jE4baNRdFj%i)-m@vtVNx z5l|@ixBjoj$&JAn#;4QohNiIwaENqFNBC6r7_bzfff^EGvl!*nDx#$@j47e)iqLA2 zs>HuVOPg|Cs0;p@<}x6%<6buJ+!V(T0JpcdSCK?BjWMX#OX%S8GPbL$E9sYiSC^n! z1AckC3c)4-3*sXu7P;pf(QiMDgj8+;47A*G`e4Vnrh0=7K6Cmtb7r`habxz|mCnVO z2K*LZCi?b0>yOucZ;GDK z;K^~3d5(z#N8n#Z1k=HKT;RuyltfAn?_Omf!(Dgu{qIx4Bd>!iYU@V$V;qYUy9sEV z1s8>VPCailk~Cx7Yf0v?*}-lT%Lslmr0HBEKSwuj+lY>O?s%!C<{oY>rv=446qPuG zebua7`sN@*l@Z9%7tGKk6bJ?O`7CR688lE&cAZ&_!V}D46;kl$*q2%AIVM8M6bVB? z!OY+&m`k^V<&}-?ouYApyG!y1Qh}|FF3&$bAFIZs|w4sC^&$&I?wOHEF? zxk!DhNa%}WfB$PwY(s1-ZIgQP`f%UlMMpB=rbYLE?KYlvOVWS-u8f`mkJ+Slp8;B> z+o0FQI@B>a3aSSscJGy9?+4ebfi@6|0#^qH#dXEh5DTE1spn)1zZ5;t&eAFO^8eI_ zcl&yW3wfZv_x%@Ld&HKP=#iXG9ya+7C6DuOWef4bD7nDe+iV5$(z&7kbdrk12r0=w zM}4M_POFZBOIOf@D>|akg|E`sL^LvuLX1DS(2`#ZhM3jTpbUklV+n-3=rv}nlCw$z zt1kqUTBy9zNNFJ~+Cr*Hj-ehC>BkuyNe$vo%8czPaq`%`)5tJ4VP>xr9b$DDa1-2 zxb3@xdv?bowod4FJ;okKy4&j@vUiBbZ%NSa(;WS;SEsM;n7yMFeU5pvSF3W+*C%Nb z`YeF2Eh{~2rRx=^__kAXedBxGb6vCXh}(0+zx4E*J!h{BQOa9)$Q6q&&nrFWdFQ)j zqu8gOabTemSVsI<%nZEnNsQ^fL{7=>?rp1LomZCB__+s*EId7J;ztOX&|GL=oSFs(HQ>csHI+rC#G>UF9rC}a#f%QD8jC@AgP}U3 zyw?P1(l{uez}Q!<`TZ%JlDzrlQjz5W_G&6_2@=B!VJH@q7Q1AKn6UZMcCj$trPZ)& zg(*H}iI}YN-l69wn?D)&EgkZAg*Vz=Jsq1sYS~8v; zpQBcwp*WB@!{G+1ii%rd4A0O0&`s}I^{vwIRKD3>=k0jg(E^@%zh^!!mGaJ!Ny zdLA~^%YzqMXGlpG$M}Bk`j?m6P1Q9u4Vkb(PQY?%xQFK(a1hirGz{F|a{(_ZFW*q6 z>k{VLd&_hM=tB8M(ero;f(lAa_~z_yU#TT(ovsBlEbnXfb|-H#4(g@-CL5pKZIknS z&+V7Ex{w)gBa-+phm1r){<9W?PJE`o6~3FOFY&j6QNcD-dTez8>PeuYgh2lyyIi)z z;jNF<+~1N4)l>?_RpTeawi_7hd0c zWV~tNzkLJJ0$#NvSEH;A30P(~_%}lVLtaw=f2B@mu9=ZlBL7qHl^(B}>{K*8$u18|1u+By$#W1(4riADx(8fdN*tdA{UDd5(i=Xz}bI66CTXn zHL-V+($?v2xCK9`tl!#Sg!J8Q2{(}h{D>eWk*ZMQ^*oy1Tp3l}8C+Kz4ILfhH`Bt?2f`ojKF1|K;VF6^Ve%;(Yxwci#`J z_t~EJgV+0CLc{uR6C>_NWj8uPEdwU1BK*6TvnVzIQ|0Qp$202UwF4fT**kmMXYZ%i z-9dM6ik@454#`l+P@wSM@c-WItKqYw@c4J1qw&uAE`0lcA>&OFcaH1p_~=z+!5bFI z2Jg-W;Rw-&ZY@tF$4K;yM`-cwzK57;!teb-20<2oE1{f%v@eS=`{Zz{*qH-Ku-y!_ z74h}urJtuu@3;8dn~Q+m-LKM1u;JhtizgX{wW102v|3EaH6b$D$MuLngL;>~>bu(f zbOS6{tlAP|hVJzrmg$$gIpZywYr|k=3f64g&=1qYlu@eIr?K|ewlDZU>G|WvgNoa0 z#&QDKw@cF*K|9!W@JOsX6p6FnEM=*2oHdn`I8BvzkiH=kV?)8i!NFxCUxODpP^6`$ z1w3v;0XzH4t_$xN=@^na4F(9>@%T}bdZ0F3UF9np%t@Ldk@oFGk4qAT=$)FH8uNBi z=VJ~JK!@7BN?l*qb#ZlFSzXPq{`s#ThbX;I+)!Dy#p4>?n_MF*@%6dsb{%7mi77vM z@PBu4^lg~rntq6JJS@qHc^US~u(YYyEXh;=`4?NJPMTHJ>M(_|s;Di{75JZ;8q9kR z^5Z-I%SXrpd8Kr$SQJOb*{Q>nqV@##mTqGOLFL{#QU`Mn)&#%io&Pgt4CTqdQnCXn zlUe8-nIRdZkN-%YhKR=%VmWh=A4don!eixh`nuVVOP``>?GGa&8HT7pNw8#GyD+W( z@;xe9p<7>cXm~kESv0cb8eQ!-FMD(E&i+%-okq5taa!f7f{89#L`E0tvaGWDS!www z59Puv0d}8LCF5pj@vu{>K=vvnTmB^rK3*n&!_a&Rvz?REL|-54FX>;ns4)O$u>;iK z0|VmG(b4fO`P9~H=wi}Krhy>(Z%QJL+NF%4v8((zXtf@0xv>`Ery>$Z*nFI}u^Xnr z?@;=m#3AfMHS`-fsz`ZpI(ZN;$|};8b=k_~hn+`(=qo_NO04$fB)+QMh=YKYL@n}L zXZdwva0x>YVv!293$b-Wq2+#NCQ*H6&J4poLkdcrcK#WZ*iRf4>JS4s9P$Swe=>BD zn@+rDWf3(cn8Ca$41X{sIbS=7{1X~f)*S4&EoN+qvvmbx8S+PvX&H8(4_4NyA~npo z9_f01QkRTggW#_nYVF&IDZqJ+_p{46uo%}#|fTXI~nR$txFu4Fnn(b zTK=MAGOoB8i5;;pnqDX)=-JbFbyIkTfmEmfgBx9SBvCtDiM}GjO5X2pR{yg0zh`Ux z;Q8)Er^_Qr(Q60mAAiBt`$YRsx3_1LpX{e;Plb|ifPKu=ztGqHy2sqm)YPOkxW7&n zRI8O+W35dh;MS}EI;4NO`m^Q=IQF~4DEAK!->)L-P3oH@)s_+qFs21Q1em?A6TDv8 zbG8d|o`Hp8J=6I8VR)Z3-Zxbos-S9S&OW`nd#zr(`{k?a<|C2qdh0}`zz52=cW;@`1d7bu zuHp3xZya5ZbKT?BUDpH}E)wT93hDAW0CR~SG4DZ3AAdi<$91-R6EA;%a0q-PGhHK1 zbu6J;{ad`s(Z0i~GmEvDy?zW7BEs5-?X=M5RNgj1E}wmb$b_0Z7^?z0McV*LG*SAOB`w}%tU5ohCZ?vQYAU9bVyj*2{-4QE#R8#7VXz}% zLqkK?+g)SZU9TL#EFuajsup7^gsK`il{4bbu2N6CqPyz%_<2#-rrxHZmQg|)*&(U4eQf;SKa$s{MM)g zf9CL;TkP5us09?u10O5U{iJIQZkjpl(E6P}7kqA}4=mUkJ8<~h@h`7qj_drSs@BMQ z|LC)7^xyZD5{m}{)pQ$dONr`AW1P!l4pQZb6T6C;hMbVkMiq$S$T9=5b34NWNbJkLn;Tg(7 z=SkavSa!~ASa!o~`c?XpOlz#$+aw0E=X>Fp(jhlfAExT<}yi0ym>uPO=`Kff`*!8E%} zik!@G5+k)<4c0k_b{7RXhN9?CF;Aw&v@h;J>BTt~q2w>Z7??-Fbxx|zLLAwOF;)?? zn@ZWxRO-14eYmIgyEKsrZA2_YOHsodiI$x{ON6m$^U2o3CPRCewyG*AIej>8REKip zqHRs4wYqur7}%kxCow4Lxdd#v6UQl3K?ex_x40jA^yvVbdjfn!Y&KP`edZ z5S;d$M-z`cJPc>ay~P-x}4o zKf3RKc)gt&fDekaueUizf@5+#AIiM*M62Ub3wUOh`BaYHghIE5V;tD1UsrP9@T$At zxvM+x@xjwR;lQAQjy&Lf{cBG#s&`my*DWIP%eeXDajC!r7z5XOA4cJ!6~Q`yeVXuINUwf{aX10b8PAcBmko4 z$&BBU8`%59lE`NA#eCA?ew;F=?UrDQn9$yd)qDm{7_&zysx*p9=cnyeji*hjr4h!O z0t(`W!L==T3g(W17YJ@=>;ty@7Abe9o+-=Zw^sozH~6DrW}OK^D*-E`QJpWtylsvH z{XA?$>n`Vf1ofQvEf&Vh$4y~fUrGda1IW2aIgJ|K()`TO8jYnd6Pn67$)q7Jrl$+K zL?Q(Vgt7qKe_Nym%cxgkDD(A5sEGJ7RK~?0ONN4J5#q40V!1=P>6HF_HS+u-_zluA z0Ttif;1*IV!IH;HrxzxN%ny2E^)=xh6!ATvQZ{Y!7cE5~WJ1q|Lp*&zLt&P97Zs@# z4Fr8vWD^{|bcFc}dgTCD&Rgk;e_T)@*Oe-YT!1l-otPiER{ABaQhVJZCMMP{5ru|U zG{sF%*+tPcCth84Jxq$lSamU~JVVJEF~C$A)4Tf10XQpzgY#h@z|~+(BIffLsMKk0 zaXdl+KT)vyD6f3pxlX&13eMrcXX%=nn>G4Kt`t1m(WDSHZ7Pj|I|Omb%>hjYxLnJL!S$A8uz zJ~46TP0uFL%gf8Z@{yC*_|LB<{F!qN{$E2U)sN0shlH2Vg?l{95EaxiSPEQY(DMgA zDJT|4{^%Kf3G>S#o|&Fh+Sq8||+%sH=@XinD_NQgo5LOYSc`JU_EHj9Sxbb;qrP=gq`5 zi3+pvb_)*ENn8l?tu5D5rW=AxLm724p;dHVd)w&^X6Cid-3Ki65m&2Bntt9yhETJm zSc~95$(4?t33@ci(*p)9HtCJ!6_k%JN(mfOeodv{cK%h)6i}J9<)4s&EG@4U9d(Mm3qm)n7lwKW1F3={@Flg%>UU)7n}o<=|B3 zXk+*II83M6=3jPYv)}f$s?HnZT}zISL4NP1J}b8vK1@2Dr6) zlTY0?Hno4dbUM272(BE=I>ypVzgBw-0^?@H7`s~fw_ek$Z;EkTIlmFT>!N}Y=EpHo zq6mN<2U#MNir|v)=6aF^)i&q)`aa<;R7E~!s9_>(R>REzJrb>!I&sDiVp1JP1Q4fA zB7^vB7;%CIlzM((3HBR8iDppJDTp1AHlkp@X|WeOkdWO?n@(vMljKo}Fpa*F)}@}N z7$4v=PB_+@s5_nl(n%_Rsxg@xRDkk48P-p{fLc6B-)2)=Q`Gy69Ok#zfCSpt>w#&v zXZQ&VgJCSpA5T%-ioFsj7?V&R4rM8TbTQ+cA<1MDQ7^wpmT?o3yumf-&W3Lnye%DD z&HZYDP{`F~W9JhdN!Vpg!(AraD@i9_Wvr??Zz`wuMFF}~EobXuwDrH;N}N;i6I<92 zoQt@*jTB16suRHxfnCkduOey+?zmuzp+r(mtyEXwVT#UUGj|&?z5?n zuJ)5|cgI^xY&+GEmcffDSPJr2r(A2#10~pKAVc>%uFb;{ z=UFXJxjUR-?;#eM;Zvp6?+k35krSGfagTFXvLAA$YQKFWI>W;>y}0;dL}(3GS2>C0 z(0CTQHHxtk`e+<@#6iq?ECs*x-#qf_Zh8G^IVC7_>dL6SddsR-HZx{}dmL)F!8Y9A z-=7RMu``24H9VY^HExlyD7=Q&)>Efdr#YTBWj21q26mRgp`$H^ytEU7yY@7K8)^`C(4M^h|0@-#9kx5Nu(xQa=z!A!soSQc`yTp>x# zD4(8Y4C-p#s`%q)&1vVLvpmqeQ&ts?7)`B%wm1gwED6Xhyfo{=ke`l`fmf?nP&VNh z9JB1wAlDDBRA{j~jfK=O$Lg|ZZ@C0C@;zx8AwH89kP5D$AIYpbOrlR!gcC3&i_Lv0 z`~;qBTWXQNEm$~B+aL&}(8(pX)%{;iwt)gAbwwqjG}oF3)gsYi5q3sYChdYSD5$DY>rLq4 zBy$(?M4}VQMc9=e>q@2=6*XZtDuXK!y%r>GSyh>*G*wbzD?pN7QG2ZF4TZwtE7%P3 zpoR=35j~+p2gIo&DeFkSODtZHlevtCFf%tEwP6i{dy7TM;$X^In(efTsf|SY(Lhxs zLwYoSJ|UqDEXRWaq!9|mq~LSfs@1w#X3`lR$=(Rpv|x9q;#y6Kkwj$3c)~0DG5IOq znEI9XV=+vM@j@!(9S5;`(9)E7OC8AkDafP$se=hV4D_Fe#gO%p0PjyQGj8|s?hQ^q zAHD?o-?(}GF(mQC9ie-JE*)M=auLY&A_%DbvgSIYi+Q%7Q~&w!e5jQ=VgXUvox@Wl zNatutLwRcHd8cynLF3As?s4<6I-Q*35 zGb|gyh_^Ag^6lj{?!{kv#+~IDHEdEccS2K*>dvv*R;BT_`!f3D|3IW3E){;9Oc6kl zJ(J$oz5D2gQ1*woM3QJ%vx=>6xYG-+)kAT@UkQMsHF5Vwd?AsBTlbnyVx+WWD?s~X zhVmP^-&aSwrVcDGCr6TGhQC`M4iN@Uw^{ZeCsq*$k7p z8wbb7^uwcu;ClYdR|e~~8D~f})7msgFNcxdUjCPtk|dSKkx7@qdv!UzrTOC0d;bW^ z0tObR@uQedZzEd)=5o5poJ_m-T@(fIIF$NSMz+n>x`*+$BBJyeL9JC?I5)-6a++)& zrpLZLM9=PtA^Q7IHBWeu;E~;YG!xbp771v8lY-C1htZF?(=Jj!v8OpoEUFJFNg56< z^K|G>GQQxcvO8Hzq<(^@&wk~%^rjtL`|Qu+FY!c=A7Y+*U48Sy^v`{ZLAh{g%WDd(wP z|B>!FsJ(YeOYpsPq~T0b&U0a&xTwk_YN+vKFR4Rv`b9)4crN&P8k_Cs^xY2)lByS9 zyND~2Fd$Nxk|bTs|ML=Fexpbm^rG6IKO>LmV`p^IHgdkFf@9@Wbivja{YwiHR6s<`c~F1_8mD z^Nrj1;v%+=jt*Tc2G@5~(aZ~h|C|tHKZ3Fw1tLE4fkklI5K-w@1HW$vPYZ68O`L)Q zQD@P-ISBISJUA0{nXXLw4X<>|>N;2MN=`?B8HExvG914(fivKuX6MrCBPCy4uDk{8COP?}6Mp^GL zDdjv+uuA0+JHp?lnMbPpEO`PQ+{}-aoYXv9?3308MY=T9#=EM8AqJC~iLOePu>na< zVufxEpCkyYDornx-58rB2&o*uPT+Ue15XY62dac<5}@1jT%oN&3JeF33YSKd)d1Ux zcmS0Jj*9RZv)IAOM#2N9=p>rohF$L;#0vk5OaGZgFXhk(*npq*1l~>l38UVRW8cHT z>sifPbJ%)5Zo#^%+a3R2 zoP#_xiT*TD;IfWG?Y`YdT41Ni{pFOqTl?OO^{`iW0T*^#iRc?H4edaxV$wF!d)(M<#hQMXAJ*_)!ZS!5J3O~3*0=NYQX%YFpb;Iqh?-h(R6|^T^2T= zGxEN$3^`tx9ir3%Vfp|ikl-Q6oLUizjeyV6R z(3|jCXLhG{q8-9$gu&d1H*V&nbgqYS$>{$fxJ{mClhw61xk+h@l{8y$pap0D;MHvP zd|*T+;@#ff_B(J0f5<5>2O|JF*qe@oR~`3^(Fhx^YB`5ZmJB76C1*^rT3Us*CWzk) zS#=~OwyJT-+bXzS$Qq_lA8{^@! zs$_gNV?DoQT9#KKbJwMU0l*L#J-XwSoq3oJ%skL5rcjDaLC@^v{8tL?fe*zcA8Qgz z45+6`lQ@p{uV`x0idH+(*$5Eh;n26z-JmH$@|ql#<@sXa%=z4B}+Yh>zQd@7V02a@(CfFMi=y8s!C&+Zce9e^MRPKBM|y1S^; zh$_OFCISSG}L4$jS-JS1bhOZuI3y`N>8(Nze-mI8};b z&6t6zmX$m@YmnKZ+|j@HAlP>SoNT-htBf`0P}WOYJ~OxH2~b|2{Kd&-$iOpSE5M@4 zFm@%2%27qE01s{~nJt?5fT}xo?Yg4xq_EabBgWw_{?swk&eVjb+VjD8Q4`F<;dyne z;xiWcOBX`53oo$7IgTL22t0%^>p_3F@j^d%L?J|2CcxQN5@(oxJ{x&rUEm)CwUac= znu8$1JjrimfSJs9Uqw?qaGxbeV?_%xcK71I_1D0gkl<`*~-r3<+#)hqIHYi^^H(B6-ir5 zw2+5Ff{u>VO~>i=l~VAKqq{nhiCWpvz`+!z@!>NW@$$9P4&zx>j(&dvc#Qs1a#a~z8rDkyM(_>PuT;ra z!B?nobt~e0#F&JAIf!0NRsynq#J+#Ge}(fLVcMhf?yXPbIoP5t^$spP{>ucw0@TZ` z?kVsZM-D=v8L_4_XJ;gUbsPrdjSJ_XQ(3Y@EfP<0Wi>FS zwO+eVx}L?IXVX5*y-MAyJlzuLb|=?kLq={`yW@0nPnpR3O zAASyvTEEHP<&OSW|6t3UNSiuhmlKJKFQ5i%zDB>_rVK#^!WaCbCLoy)PI*hJhl`6y z%{#_&RlO@hF5eE7)#wS)Q9Xwz8%dWPCqi;q#6^2{SWrWW)PBHA8Esq& z?=a-J#`L{Zyu}#8#hLEd=_Azc9AEoYp1}0ha}d@ya)9ee%yV{lB=tG`i>K)hN!HpjvqsA;*6aU1h}6AFzn{utL0nApE186uiF`fUat> zFjL7663TYp(Pg;xCvOQ*)IRfs5gwRQm~P4!nJMvafyorAf3U1IKalk1_4|JC^xm)G zV9q9zLNX1h7qLI+vtIKRzs3v>zRIp_3XU?MKgZh+&0dkS_+F7GznpikkL9K>C2*E2 z>mkh5f>acqR2*BchlZWj;5N<*n9{}Sena+a(V4F@ zmz$4{!b?@pg>+a3esq{wKpq7^3@$GVHyb+|C3j;bwW;bKfZ8fI7Fmp+0c?C&VoU%W z6N9{V0JV%geRd6|1iWNalAgqhGp3#d9$9cTwYSuc49KS-M-1~YoB1rr-LDV+w4jg? z%6UbpEi5vxw-So>Yw87-z2I}DJ4|eUshB@^={S<qGpmD76_)7N82WEq#rJ ziS>xD;nL#Z{~PGFLsuX@2=2n0yssSt(v?npM385ylg0l)H36|0EU(T~f zFbW@n6Q+O*g|qdpslPfdfG~)tJlgLKCsVR#A!8wf)TL_~(m5s&l^{qAK$ifbwP6dB z2Z@O%GIbzex1$#Rig_uprfh3iDN+ztnE+)rDN(rj#Pi=Mx;C>Jk#$)zj^An^S?sTtZ`?v9PM=O%o7-{s?KdCw`F%UMI%PBP?wX8C6Of(Ck{{CFFD9^JmVcz>IpzI^pDem8aZTJdpuef;x& zn9%(+9-Ht@AhLPHcJFTQ{ds&4&Bsd)RKxF~AA@QWh!p0K$}#3Xmdnj) zykBRSw}Z>%D(C3YFf_4Wk6oRUcbBi$E~rl^KDm=XNfxE-Yo=8IhipxWRe)KIh8tv- zsm+mUe1mRkNXjxtP!;KHFW~!de@YK#0~`X;mU;_ZxsB`w4Wl}M(Ujy1rfUS}pn6^X zy4pMpT0$3=GM+4|fvlC0l7{Kz{%G!bIs}?1CYFpV$a5?S@bh<_%~b^;Zcj54fFd;a z-I1}5x`lcatV&RC$~5c)khfiwG7K%?^VJ%SNEGRCGskP0##(`hg4 z^F)5e&L6x@23bX6p?p}i3hLs#9&ksmTWOW6Veg^K=g`%f!_`6f=Jpl`x@=xKd_R24w4O%gdOz&+cEGnyp42J6QJ=!v z%o;Y!d>DPYQ)=w0Ju%I*W#8^J|0kA-1&@?4WsXCJ{TlthJc7T)iPV8J6Vm;ByjO|y zgV!fY ze=R$w-cHXqiT-v~HD-ra@;pLUl{LbcDH1II`mu`?;W1$GBl2ePYvBoYS!wx?KXnDv zMzXBRTO7xw z3oq>yZqH*9*_WtRaH=llpsTCc7-r{vK!-=bHMV|rL^lgdtA+T*Qz@_WfllRSUy^A> zIL7J_@t`Q0e;reB_c*)P`L-EZP~#{DnN03M8H92IB^M;+#6A{tOfJ;pjB-M-x>7%T z=Q)-GckXI7Tr&MhXt6urKi4Z`FiYV1{;ED}$_^Hjyf{m|{FkykCJn(vL4nphdt^it zjb$M@X8bmEFMJzW-GX?Nwkc0AD<^lQ=13SN2NK>+QLzJ6)InS=&p?y1{mE)v5gKT69 z8KS)sdMYJ1lalk{Ord&_a#2u$T~txtP=MQ1h3aLr&Sf;uWh@WTJYk|v%*};!*&+FW zZNCK$;Z+ZKs1A4Os*h-+W30m&zmF3{TTwRB#dv^=Fr?Y5ir`7FOwkW=Ym*!!VVrO` zHc9Fq4Q2Ud0Ky=3gzVYI2&+htqnC43wY=Q{0@!Ug@(wTVlC6OvF(16~ z8|w_C{yD=tUpb8KV`rd z3*K44>M^a;%v>v4bmr_cLgi;P-Ec<+GD4so|F(DlvFGdj9u=(3?zEXeFm%HP6{Aj@ zn2=`FaSHkBQy3U(mB!qpFy|l^#%IJ!e!jN;VT5p?BxJ9tLZpsKMOj66!bo#LNuR}5 zm^>I?!)Zs4LKrC$%M?R>DiZM=SI0e|#-Qw8L_ofPOEWwme=M)vRtUQ>li)Us{&`R; zT5+%JT1-rjXGjHCA^8U86h9)pKiBPbe8)UzS`<6qvh>ot9teC)MiQ?X$~2%vJZUTaIpwrgn^`_WEIC z3V6&r&Od2H@062f6y+aFE#_&IV=)#uoFpSvW4e~OZGW|CiTt+iI9AB7=qkK{iTB_`C#nZapt$<35K@u z!MEe;_p2>{rD2YF*W)gd&pRj=bU2X-TKoLfi{0(-Db^f2VYlzoL8=${=k;RiogvQS zXmA0meN5&)HP!3Ay?c!K^{(`F#`iV$^=|4TOW^TD!TqW8ig3^OliQ9Ifg3!P$S{FW z2eE?C(%*BctYKa8G;>XOa=IKOP>Q5PR;U-_c<6TJ4O*PZ5h(I{y%E%6i`>2yhzhvX z=l%R}zU!XmTI?ghPjTslve+csDuLQNU-M>?t|E@lF zHcs+#*mQO5{bjOoe>=@#`Z(#C=w>JCDwB=nd45HYC)7q#hKUeD5Qa_EwR6(CSaFQ! zSpb>tynbpOXfafqHX%iB*;zcPMC0Vf4#D9$UGTfaB82s(55p?bXwZmIqo5VWhsu*? zOiL*8H!&;H>x_&11vB7)Ett|9R&=KoaYQnFoj!V-?$*`}Ge%C5VqS|PzB`)6gmcAq zJx4{tS`O1{*ot_EZE4tg{|VbNCU+{HOUMkSREi8Z7@m(|LgfPVBoBdezS$HN;d8|+ zQ^G5t4}ugk<&Yi;KARx1`**+Z7qzD_M;InkdOUy0d>tLZr?PL5^gBHUyWh{cZx1Iv zAG-UDF~5RG9Z2Hh4&N}CTsx9CsBleEov!29_D1&3(-@JEIS3u@2E6&irn5}3fq2Qu zB$)1h+kaRR{oZ+_T>{S}V%)N9nVb6IF_pd4LN|Gp`w&=qGkNx|nxl6A_xb4!>u767 z4^KTSRPW59@_}RAoipCc<*Z)IHP6k>ZTtN_Co~jZ`-kYhj`dLP)K{SSg4>@zr)*7EF5|PUSi;ScLLKvZRS2frwrN|%M(v4oGL-8l! z6?{)YB)#~P?Zbu)aqGH@9Md>$fHXc@ra3O#@H(54mYJ}s7W^KXf1)Z3gppR--GU*% z1rZ)0Q11zh(?i9Ay|T!p{bKw_bbx( zh)7a>@>50mi|%=Yw|6Aahhlqdy8CV~FVZg*&L{~p1I5Mp6`|RBJ69HOFJic5fAJv+ zZ+|AtT^`EsGXp^+72&VoXE5Q>yd+hIH9}ZPDkXh=K_3M`MWiT{Flv zPeha;92!PSo2BN9B=x4k7yNJk+nray_ov*C^naFlX#0L>OKocOXuW)mwtIh{0-mD- z8k6vP4kW%#%zcXzcpC#Bd-~XAdh7?&$^0+bTlpx^ruI2?|J&rRUu*yJe*V9ko|wiV zpPR2fd}OBV2gfxx$AF{jVe5_EN6*Fu^tW!e{fTa(mbbz2H>SXo=-oL8{=5tw;-&KA z8v0Mt+W?6v&oS-ow!92@Kl;ezzPh?h2{<3bv(~ugKj&r;QF}cLf3aR7`SxszlI~JM z67}y=2LIl9l~^ckk9PZoeNTB}_>j`aA(2e~j%LZBABmY0RysBOm_Ddnr;>HpX!t8s za|{Wn@HFzg<=(M{zDVVu+mKulx$xPDCYahkWst6rF~Gx*iyMI(wiwn*X2WeevRCw5 z;a_p<6TQb@i~=?di<-!6{}yoP8?w&xyb2*i-IiDL))iCk|JDBcNo|N;fAw$#M>OcRYG=wG|B}7) zFQ)=L>!c-7sZkD+Z^T?%@WXKQNYhQ$r>Wo{l`L`?sHmvx8yohnuETqKWGZrP0=i`nOS|+!&Ogw!5|7rnba&?20k;IUS zsj;X5XN)ib1+}=BjL;XPQ7lnHd5d19hxtj$j=Jr4GLWL2@r#cwpRebqs=aMRCwX8L z)o8?KsFNtSI&z=YPjzD?W1=}ye4I{>Hs&n6lDWJ(yYMHN*f7Ze2}b16v;8WIyF!o= z0Ogb#JWDCuxF{;3?G*7>#QcDl#GFQ=L-9)r2&0gSwzy+5M__y#Ia%bv^K%C#*;$;X z*Ol1x#_W&x0t1+Bw*=ypwi?L-QJpc{5Wsx!Cp02WP6l@53qEHtifoYu2=pq zlSUcgFA^bdlsG0*!`Rq%c5Y7R!1czy?&p|YPn51bIpTEUz%SA2iyKK%ze-&{!stJu zm|N+2Ae`D@`2tV|o;$)Q#OV)U6WB>*)}VltMd;jdsCMp#89bilR*N+8zVROIKvm>& z7*l%zJoF&V5+*1Bx;%1W8}TRuY~gI#E`_CF*$fT8{{w|X65fS>>osu2uVlAj^m?5Bi7gbU5hFx0hPbUm=v-`cMRjlqh^4sgC~$QJ4Lbti(^(RJTTa> zyn(C%u@g=`yY=f3WC+?R$}+4{J<&$sFB~F|ojhhPS;9ZK7S0LrNw8t#DV&HcBWoH3 zv4-9>VePuO@Itb60xL+^kgMT4 zaXeXRhTb7x$rJPXQFai+A-@=+6v4A6OgFu5I>;HFkn=I?0JI8#(8gFs5KK~nb%NtF z0H9d0;`buS(lVuGQd5Z}NV;V8q?ql~AeeG_ER&uY<_NrnPbcD(f`R-!q%gmNEj27m zdPDrF`**iX^GK6MzLKMH%5S%DTFJ-%p+;ViUx=X#YN}wf48t!M&R4X~Yse!)Ttv?0 zb5*qv#2El#sX>bdD1nw~&XLZg?@jfN?yr>_mfD4vbW`>u{3c}D{aE*p{ zf-DpS2oih0h%u)`_y9?Y@&?`qXl0){4(;sBz0xU{mKba5c2zZ*3{5^9tHyo5aK%% z^2~F*M@Co!D57|Z-4 z$epgkk7JyvI!w^RuyI{0CW~Z!*&N|3IrO`ICMG82?mxiUYLVtrxrW#C+ZCu#;1T-$ z+FW1{_5CsSY0Kvp!{_M1Csx7t8k+yD7q!LXDZz+9L3#O_An4%lOEM?w)B4LX?|3M= zoAk|D@ipO~9q|OH04_gCdq-T-;cA&zzeMz^ zA2sf#PaQs9h|U3kl;qgrEPHgj>~j5&k!S8<4+hqscH{Tfyf_b?nz;xxsr%oZCFz3z zQd2fq-#_6J;(hu>NF%;2ITg(69^`hsEK*2lX2w9!R*-^(Ka>4+N;Win!{$L>q@{6E z!)qbv&izpZ{8k#l2KF|XPo$6B_9UH2JJX^h5qkt?LCn=Of79~#stHKsUMEBeqlxNE4S_ww#hqp zs~-B!sVd}-enz-09Uc3(w+X|;5~_*YP3Uk|at=F|?z0dKAT3iaZ}Z$Op0_UJnH}$R6Lrb9gVLg%Ej+4O6%ZvAtm01qs(&s1q zk1Y2bcQ-dfTsTpf`t{=No0}UAj}307;g?fm@caGmPD5Syywo1CN^khtW76$?eHE^} z=a9h}$MuYnO|jU;g)tKm6SrhL@C#pE;gu|%-{a`U2vZ$(8K2&XnD9FheQ8OTXn>QQ+!NWW z;=f%EF2Q=p%VcZLDnulKGE*{3f*Y`EVT_3MD|$aeUp8jW2=iiY!~Kj9hAApR-TvCa zE76cArZnL~S2v)yS_mkE6U(NRhuNPyE5fpFnvt#nO>WC8X^nI8kPDNZ$st$)24M-- zS)sF$7UAZ_$HgPYWwk9FPj7l>yS~pfB1l7^7?)l+Cm&M#H0e;SA%8DsbaWw2$3|=* zX=YGd=S6^_1WYh@siyugMCV0Qs;i9J#N(wtoDL*$&}Ph&U=C(B8sBv?{k6v?8X|zj zv6!n5pPKhzzH9xsSdfP)J;|V>n5npcM@CIMBFZhBp0)akyrw*jMjy-7G%L^^#VI5W z3e4CRg-5uJE+D8QK~P2qj4*F?&#qJeRWr)FmvldML7Atb^*H) zAq7pi-tb{O=)xh?Whkl~<=n*pHSwbJ8`fE?L6H;4P8j8^f}yeyx8cqs8@fu7v#1@# zfTq9~AtS$jQ=?VXWlrJR^HNc2vex=|?gD?T{z$YzMLK{fDat3UkKLBZw8@sQ1x^hB zXy}D3ZmvKL?QhzkK|=zIJU--A(7}FGxo(-GR?t}axmBJ~i_`k&MVvrK;`oB&v|CBc zRccv4=oeYkZAb^epHG1YG2$@{-p+=XEn@iX?HGq)xI5Ab#KB<+Z?Cf$;q*LcL1W+x|4mtZ_9k0}X;eW>Iy@OK^Z%O9 zp6^twJ-SuTqed8w0x4f5aKOkPZ;dGAzYB)XB9yMbg-BpzgQ!>FC0)t5FC);|&Avo2 z{w7jmlmGr!&J-kwMh$s7GR?x##wkQ$TUp<5hF2>;5cJUVa|qTL_nI*1FiXgHihFFy zzwCDOp?m7V#%9vi`Z1+X+k~4SjfglLF!Yua`J3)fbz11!HDibCDz8eY|Hq!dhxz-f zz#G;3gTOPn&y~O{eAmO%(K!>-RN>D@2mtU1CwCCvVPQPC-`&ap>)qNbji8%InDet4 zFV!4~O91EgTd?b``%&MEz5dzMPU}8D#ilJh&8Q=57izcr3b_A22-qF-x`myUdZYK+ zR_x;&F=g=PVsduyf8ZDLXMQNr{k}`yDXw0r1URr14`*#D_~5Ta<&1V=vwUVW|K~aH zqszYoZ)fTKneqJ-loyv!e+ltRpzd-k!$3#27U$yhAhv^l zBau~MBvT@^4QZ6mG$SH@ICtS0GIC?G+%^#soz~u4_^ly3H!zmU#PzLe;~#@m9jW&1 z*qg&cW@8)tzu}kC=U#lX&Zd8S**`!7?&9hO?%+BT=_Shi37Sd2WBd6zBbCX(!P|Sn zr(2=0>%iN^>fFWSLUr;l1kio$_)WE|k=M*c))p7erJeBQE$7Y0cRpC%zFQg_wcBZ^3f3hcEwUNS@JNQF=^$-7rBHrQp%gS2XYum#61d`N&S{@DK1}Y|ZC_rZNCy6~%vip9__U_JA=oqq4x#@5BDkTy zy=%x(&e}hY%nOHQg6-px>EqT($Z+Gp#+w)$OPuE9!~hFRNYRN!T4%VG{7fFYD-Mpf zUon-R>Lpv54OwZ{aD0xL5qnnfeubHSM~%(dh!nZPBM7ktozzm)Mn0$>ok$w30n_QG zfsPmXU`fLZrp5Ff#&X$pUd1bZ3@7kUmJCgf%RHi_LTvM}NDNo;dtZ5fkx=8H@QCg# zX0quj8S=iPzPKQT41G;lN}c%^Ef=(a3Z`!gg)&pwj1z=s+3q$c9eoUY$ia1=Z^Uxb zCfu@kRO-ns{QyZMyx}mN5A1jMRI`GbRAf^4l@sHjpe>*9b?;tI-_gOp92)I?Sd&$* zUYEQCW|u^tV-uO30~c{i!rvczJ`=kC6gRj-NA2ZBeybyz9Z4yElR_)EnoMT!h%G~} z!>y|>f`nur*?_oxD#KQV6BSp*h87NDXB}^w_!`&zJ>49g6q$N~CgJa%(XcrX4Il)2 zpC2WNCPs-9J#2R;$q0x*psXaZ4=R%;MCu}rb1!NploqkX)H=i_uPN5smdAugDxy}$ zYED8z=<^3G;&E*MBIWY?4*NOCZ;uOYnQX4PXTgX>??>vqLF~meG{rG?USzyELdECG z;$HJlz~ro}h`=ZbR9GDl78{wzR&cKZmZ}|jG-#k~-Bw-7oMrJ!P z%K&4yAI#sr+e~w_^2`LJLE8$oOuGg*Rgbs-;l-v7@TmHn1duMWZG%4cNs?8M#rq0#~R%yloYDPxsQ~#@#$q{ zX>=+tFq*q5h=CeG(DGm>6qw9lvL=cd$lTV@8M5J zP?2msGdW4G{G44^i%L5QWuB`AvQ=TG@~U4|kw2dC;<`~!QZHg4j~cRb@Q9v&L6Lrg zlNL1;DUxN+OknU= z5xvs*5NSf*q?~soeJ9vyLKyj|&_i+7c*h5s=?XN(c7JaveVMd4f9{gI9@cQgV zHSwV9c65udsgYrT1;0tN+l}Xta@2~RwI}rlHTvf7%>q{i={v*OWxpNm4x6dR;cSEn zLx;+UD`Q9SJnxnq=MiPj<^E*Y)5XU0mW>zN>drs6_di7EyJS&Gh}@OXkqxxi8v`DDra>qUUn*_1MpPD^DHzP<5OnxNm zq`t<$nU8hq(!=yAISx47=%mzaWXCx6q$7@t9b;)*-#PUBiK*gQf3RS4c~HDW;zdac z#K{^OsO7sCH(T*O0J8%=a_nUVG2+q zS~@1p%rjmdc$@igkLvr_ADbgv3yy}ia3EfA3v(MiYMPqMcm0aP;x~~d)Ypj>nDl%f5}@B_HO3o=*oOq>K?P!$ zOQXYy0@-5^+MzhIAk9e>mHv#PK#Nq)=PO4jODDk1irB=p6XTFh}un8aWiq_rk#&+N!SW@kY7dF*5Yn^4v*V6XQ9T?udRO5DS|w)WFkDPeZ_$yO*oPi5pJe%tK!ZC zGYNjpI74DPK48cHnZ-= zJ@K(M*(9j3+Rt|3dH9XvYS;R}{R20lyqOQ$`9&x@`SvhM#CKtN*lZ!tp$D$~ z->6dYN)$LdhL2I)wxsb7jW2mo5geo{=r;6I4B}Ygv=xK{_LQn(gb~Q`$i;hv;3`POfZ-3sT|mWMd<>)~|!8NzlwVH7GR*HT;cCe_Y4<*aVssG3N0+ z&<3GI_H(}{faZWq_~}axR}NU64C*OIR+UYo!U_iaF2trkiM-Y$292}*i?VmSG^&di z-9LocKTL6|+HhW5+VEMP=bih9J1{NBc}bIb>rcKyy@uSta!rr+ZF5GA05Qa~W=!LF z);PyUjS$s}^vVq+t4q34*U(8c;21Lo8L$wv!5T!dMIV7dpf$F6aAJmO)qAR6?+nWQCvW+iF~flSrH zXy!r@Y1TOC8>#ItK$`IfFX0uUGqi=&@MWQdQ@9hgKuT}9I^Dy{UhT;|gyBA9Zj3qC zf+Dg@YXB%M6pgoQzKT1Of2EKrK`?r+yG5BpQc+d3h?V^73@fb4X z;eFZk6)*UqvB2qCxrdGlMvBn8Bc9_@To3PVp1JSug>f!i@P~sApWv+9sbAE;9~r0b zM*>9iQy-S)WVs^d7auUi69iQ}o_d8ufh&k`kA)n}L3wrM4^AWgqWQJIO>aws$cTcp zKVFPHyFa`y=e#@B%LSNF_7nq8YpxkgBXV*4V{W|yeA^q2g12}bOF{n!@-Fzl+U*`3 zfdBbC`v>-7x(<^=bx1lFoi(28pjdG7PXRCQl^? zp>?Rhwj|R1!E&h41~W1-sT4T5MZ*k--ee&l);7q8ZE7zWwQos4uN+58yCM{~@S}Ph zL*6CEWKx(W2FFBt;*Vkvb1@AU#vsezH?2<~3%}{V z6X^_;T`FeZnz|4%o;=HZ3U86+tPh%&u*%C{H%)R2yUxw0qglGh-kYyuM9KxwYZW3b zh}NEh)^VJBlR{yhiF)kZ-2Hsg0p*nHZOm#y#zPX#NljJ zFgEEL`{mN@1=acad22_<9>k%BP&f^T-~LSYLTqHbwvSzlk@ z&L`*4-Pm66*<|Tz6*)#~!Ckle4e{C_d;cyWSojYIfB*hHTj$H$ z85xinTT1!l5e|x?m-O^GK~$NpV!0OLp1xKGYg(TCa6B!lloBHPQG2!AS|xPazd{_8 zet5&rI-PUy?d8%-V0oHZrKi9|bja@gp0xYqO+e+7VNdPhp%p)RuwM|{*6ou5K0hf2 z#0&KB_aFcBhZVvKo0inHzkmPk^Z567eO~p*Erk8(>Kd7txVt5P?5`xr-O~aeTD^bo z-DvCT`kmSHLfhpn^0nCD-ko1XMcj?;hpBvrjL1&d4 z9wRL~+w9Cvt{(hQX=1Xkz=qLUA6$u0wop6{U_X{A+8Gs>BU}k$CT8Np>qM2|P?<+Z zBs0pZ+y0L8KEZW@X`JZx)%=!Ztw@=XDW{1s)-%~B!Wj(!BosI!x~|MUu*BZswW)tsaJIfMtMQ?U(i0IG<|r11>5R{T49M~<)wf01Gpj_+DBNal)UD~4JOTA?ar^OXZ+2U0Cz zl63!pqz*6sDyT_ToTCYEF)xz2h}(f$dX?RS))D$6q7(+BCe3hekk7REG|QT?y3R(^0Uf>U zxxL39=bu229By^6=DKJa$L{f*yI#sz@Fb%IZDN{PXyj9ps+Pb$&%z5KlZxyf$R;vy_mUnq&xPV=&oUqz^kC?%6;M1&q4kVUIXyi zL>EcA8H9ZsDC#Rh4TC370CR&q7%>PN5ow_iy}T#OAWKW8o>6b_JHws>D8=snwC*>gNR#u2<9uQio#Ew3rYZlHD#&g{YX{fx*`4W= z_O(WfyK5Q%3B8jTEsr>_kF>tf`dUh;!So(>7E_OaICG-m(2X!_Ka@^D*X(JMFenzO zOouH$yI}S5&(Pc76mvoiQy7Jsv)b>io!ws@r~L+8&|R1(|4yZsqbh2?%DCoe9XvYk zUX1pwUA?_-!^~er3`BoaK@M@z*+Knyp!)M6jl<3tR{5 z-~d#&?C($wHijR?WdP~qPm8au6$E`xh`Dk5n((HPEd77Yk+Dye?Dj*y%dk<6G>onr zTAtuU`VLbkY0RN+s4-~I!**MBTZ5JJxKQW9hy@sxS&xNnk{K0>SO>$j>7sMhOb){P zXs=Vt;>t@Rem@(6gQpSM$;(yr1Mit9x zVq#cZi}2}G@O{0GEk0TWZ5n^?#x5rT{NAyV08ukprgN5$ouM6w9bSHPT6@uGf4^E1 zQgvf`&)FnC(0aSBJ3XBDfU`f=lQ$Pe}~4oV5vX%U7DkGm1OtaH1+>2793>)%re zMv^a5kmb8upkQd@J5?lWu8HJ=*v-@A*oTv@p_pQmTS9=P>OREDwa4K?zuCvT3{Wif zpE{wa=3rPg$SL`6SrSl=|r8pxcDVJuJK4VU6i-!rNyz zFGK?QIjCgt5|-UWjor+}d7EOX;A=S042vg}P1R2>d|RyD53}xb50F*N9r}Zm+cznL zH|Y9>;CxuIUTviGMR^Vsv{D;Kd;xX?o6aukpVX-rt$EDkmxO``0}~qq3G7|2@4MW) z*A!UnB17!#m$Aa_RcML=nke+Iu7a^h5GKYDQlT+OL{%}XKL({B8(2KpDfB!vQ-G}O zAr3c}S}!|{CBF+2(&)5VLsK3IKE?fb4sEVQoSrb;a9=LyBg^7F7KKFfF=b(WlG^3O&!EEZe-oOs#i4z|51vdSoWM@ zCZZLQoYtGHE(C)xTzIUl6K*tf6nsG}RhS^rszFs@trgbiewc8g;(>^{4#UlX%s}f+ zRHu@bAn-_%)VZ~I#G~|)4a`0~L|H#kZv(4a2<8GeG)koZjGI*u|8d~oz_8gj!TC{U zfL2aKnFGu%!y%&vGJnjADYfy?Eh89G=#%0Yzi<0oA&k_R>aEpHKL~pp+AeMtT$X%+ z6aA7$0+LDjqUDGEtyKMIu52FTK%9mvOCDkoAljsj*W{H%Kd46=|0Wik`0zI27u3F#&So$Z9} zXBwf?F9G)$(1M|;W-fIO9HgUvz5CE#paQ+#WUnaQg>(&>1;iL#8U<_{C|wYx7_8i`ShyKV1$NO)I{&_B?Kbb_uFr8S zC@^Jz0ru*QSvwR!O*6&^(*&?4pc~iqcGzwNa;FKHVQ7&KURn^!s~{mGMVP3s?TBEb z`P5|TScF8SEq#5BYnlg9Qr87ze!w1 z@;8EXDsE^yf=|hu!a$C!uMN z+>vqea{;&u@SE~3?BC~@wH`JR;KpMmjn?k&14zAv2u+vG=F4&3 zhXC%)uprk1|HDg}EEC;>5AeaG(Vu6NHJH_z+X;^k2if-H{F!D)EY%bhLW+Y zw3^b=KFG0iGUExn)p?)M-XifZ0^8+WSz?B1_~Pdf%AX{2lLBEz9ji6@Ur z!=w1K8pUTCJ;UqzjeT162y`2|9m3Pi)RiaIi-XZ+pQ)le7=iH?_4^ZaeSlu`qojTx zA{t5tz3fqVczSv+Vf?4B4b+_=lv_z)6)pMQTy}3AT2So^9)FevMbV(P3PuiqIv~_b zMJ93p0$`8~;_2yW$<@6AA^(=J49&CV;>{7e;#T8_Wb5ZMzTJGHULF;%X~zHub$}Al6s4704r7RkVQI`Yt2R!L~0> z#{}%mXhW(6tpXig?=8tT*Vlnm|L7z{;)Y0y#zF7*!3B_@%oO*3dFBhIpZu<`kB9Vu z*x{!ZJU>j}l#_I4LJ-X9Lo)l?J58;JQ3 zqDqVQiRJX-7&1R+RFpH8skbuRF4Qr@2*pF|@5b^+ELu8Q@ZyJS2V>2Nv4|el+i zy{M&LWm_IPQ4aN}4cWn6c)x*jC9fp5;=+iKC@|fad5OLK1mr-ufg=JM#k@dfH+%*B)_(CtU6jrs*N3m!qL`-@e z39(cfbUkuXpmmDccB#^M6{Q(ge!1@ujy_+2Y@tM#nnc7&-%OEL`R9@waLRs;b6@Ec zu9==6>oU(zOb`P|mtF`d*EwO{EiTEbNKV@d%_{09#gr6JwtsZPtsxlO+aiGmc>-i7 z=1`%G2Gdc5!^MGXju@9ge!?sqje)%YLlvVvWZ&mt&K5&G6K}yWk)Q&;N%)>WznHo8 z9Y;D^Ts-sI3`pj84y6RgDSks!n{KeGy<@S}*4k;~txq%}bEP0^B^=H^yjG^>6$(sa z+K%v3C#p0vpMhKdwn;QeI6%a6s%#ImEPU1zdPlWD=`u?V#j8eLg1O*!23M7~b@G3Y z(FCB_$~VknXoY?JSjJC4xI=brmN0b|ziMM`t+4-G;{Dr$LA!0q&AC2 z_bd>3IGoovJuNi`gT~V$mW#RcYCdWn_YdJWcc#>YR4dYTiDse$z{2kwj50^lS!+i z9+EbanT10(o^q5%sc;!)_$$+z77-MCqDsVplXtvNb>i0QJd`ubGPD(tJ zJc@3@e?(}lft9vp|_vhaO+0lY`8l-VGZNt2{6Vg%hpr=!rNdA>%uV-gV|L}v$L)Xiw#kiXM z14uPAF7zvqG6t!_T(ZdNuBnpq@*+VQei%W3ZqUuftvqK=iTaH^b+2_Wp!9i%KPOZ4 zBv_p8n-Vce^L*&*Lm#x2de+;|d-~1u3XT<)&Nox-Por#G_ z!-g|v#|SAZ0DpdZD)iDuH5Oh+;}#<1Sr4zS5}5{E!4nV=U^XuY zosm;v+Sf#dhgdYOl6onr#cv?%h#O92=U)47?fP5n7#wbmJhz$%@&)HoB);M%hXwb* zk&j%BTylB=&xBRhm>w#AxI;25kYuem4}|Y|kDi(n@uCh5rHKz0a#}YTy z9X-zIwc;lx1Fnel7|BOHRg%ZAfuau-gBKMgWJRgc{(e*w&fd3*a8ZjT)G`i1AaA#U zUxyc6uddXgQB;+PoV~=;F|xzR}-rDeOHj zDY1>{np{>obrBnX&|#zP8X;{GDWBc?_~T0{)p2~JGFRlOqSC2Cq*SVS2koPzq(&{! zzN6tY;)0~iT}N$O`^IqT#5!esIVGj3-p;#}7*E98oa|BB>&F%$ zKnaiA_(^CW*3y zL5H>zYa;n_t4QTS<#k$+fF!azQ@k$w2$$*XJ(*PJ_4)42ku(iJt`}$N{_aN#2PL&& zrvJz9j7}|AkC=RS^7u~pyKv~SdETXAI1(BO054_e=ktI?{T{+=po5IMWkFn?q`ovP zwgfYQ82Z>N$#>)ZVf8j_L3MjGu1$^nt3?Nk%t-uGmCY43^#PDHr5zLf)Zuq+3*D9A5tXY=70$@ zMoN`YEg1`jS<`lyOPl4y+3fNX~)7xPU`9iQM1!5jZeg;=W{ya6|Y zQxr>F1IKY(X=stw7X}f?<{&NAC_BI_YmpO;d1>i>hQ)GZhMawcj#ddvEDv%2CXQh4 zkYx)@3vj#qV@h=qh6wsf>B_L}UZ@_`v`T)S8z*>IX?GvZis3}hy%I$*yoJK+CgG`l!N}4b^kSPF-ZBV-9k~@Ck3)mCn(q!8SYrF z#1b(Tg(W=>>Z&By6ryk8+v1Z~lsoyZlMh+nQW&2H8zSA883YMEK5+|tQD0FaQijzw z43Km$%K+n2rem?fN)oxo!U7BIzy5;}QS7ci*El|ZQSb8SEyHBlFfz>#-_BD-D3mB5 ztB^Rh&35dTRd>2ZgF@H*19HD$?hlLzyxH-qKHWm^1v9_3U2X;Qi6*F`{B4X!84p7%dErfsw!s4TDVAkrBaCYJ~RXmNuSe77W5fLdyF^KZB4ElZMnS;kX@Mk z@wsnseN|`;Y%~`wsIN6N_yzn(8MnfrK9c-V>e?r7!6D=p--D;BY`Ui=H985ffNqz7 zEBO@YIeM@^0M|A0lT%SLBPP9@%C)AmIh&+oCw`M_e}cp6=dkUnc(gnSj_ae4;bpD! z)ito;hjp5Au@WpRVs`JO?X^^1Uh{<&tha3;U8#bpKFb+`Ro?Y^CJe;J`}~9A3IC#& z2k_0LUS*|cYxSt@DCEW?>t=0(;=(#|)z2>>^2=>!@iZo^6me(4^7G9NYz5hwV8W-8 zxcJxRq-L2lq{mM67H+5ZpigtB$|T>WeQ>}Xtd{15Kt3*J8eGT#ozQF+i``QsIt{!=&QjYkJ*`i9U-GA-B-BeBu`|5VYS*ncujYSyB^33#kn}ledm+q^Iou<| z)e%)-#FT=Ch{F)7HWBAWn*BBUkJq?Yhj3c@$Zp&%|J407y!9}oMK1XLdCM2l7Y*w+ z75us!e095Q3i%X7IsybBM=%wBoV|Av8?!$x5>W^eFD?hIB$Ij8c~Z{e)-KL^7v+L# zUv45`Lq8v#XCY#(u7`#Hk-6@E2-!$|oh&KnY=$*Bl)NLxJL>XE^vMz2B%U;2u{QGp z3e^gIT5@Se4KkvY6|1i543qvPIP_V%b`evTDimOe8`+@(n4s=GSA7YqST4Jt*0yK~ z@eJ4((RQz+KJJd+JAb`<)sv5c|G4_fQ3t)VrY34*S^w1E}i2l4f8<`gD z;C8d#yitb_mnh1~C#69Y7VEW9jY##OZG^G>%5lY0juILb2e=@XLYT_F`Z(`8S#wqZ z)Nm0)ZFt^*Pi6?{6_PS7nNB7lwq=^L`7TLWK8^~HARhY~tV=V6uHY!F?2%oEeB{4cq-Vgq@B zLGFY4;x2y~IXP?^#iY$t>M=cdOM96V{4LPZ<$)gRjv9AZQE^)`BDyFI_hauq>=(W| zPDu~x#+r{T)fZf1Dhs)K8vUSsiSKpU@r}loEO=kR{sU#8k6)3ekyCN)fU`Q-L3Wfk zcKDX>-&<)|Z1_s*Hgxz7y_GV#_k3cPLDtj_N#Kt6X~A{Y`uSWh6QeMTFCw6 zpc$@7qQl%3hA)D`%ajPHE;d3^x3TUaKzBq(i1pKEb>s=B`k5OMh=1PTQJ$bcV0?F% zqG&mRYT0e6An5)}Q@X90+j+8WZtzDun)Yh*;McydQrSdWqw=HZV5a@`QP@(){WCef z%2404QSwmb^VE$rL!RAI6*O`|bhM5YiB`2SNvL^QHbqh1?*wpcm+9_~z>FGR%HccJ#G1U9mO%D={&q5xAj z@kz=3veOg?2NqGN`y#yT_5~_WsBUUd!cjn!@RR)tf|X?*=P#{G^Tb0zA>E|#7!IB+ z1q*xF5^Dr`8e@1^;=z#o0to=7$cIk<`-Fc%jF(3bNaX%}>hV#)H1HO0E8rUa@bC~5 zsp}t$L#9CWw#h#nP`Ged+xF27aU&I=E2oG>@$MjlWF=lM2QWK*uffZ;rq=_QqOK58 zQ}6cW?Ch)$0~T@*)pL%zl#_%VatWw=l?n~?{&2F|-e(Sz({sfC68_T=_Vz!i5sqnY zMq0ivljiWxY1n0H^c$5-H9sT+bgz2bf4wR=o9gzdG7^d=?)@0{<;6L+>d1fFv+N3X zydYHlJ)3AOI=uIsSR2enK61~wWaGy|+}hT*18J83q_3v^)$SvbBIM4L?-Z!;8VZ-}e+n_05NrYCjbt@+?qTyx`L zkV#?=PBCZ^8tVi7u(c76{J{Dpo7Pe31fD~O4Bn!D4|TDyim*vcGVSOkv%bh4jdx09 z0Sb*pgGL9%iOl!-dc3B{a(g)gHKz)%VgbBc>|JPRZAx`HEo|1^U%-mj_`nS}?k(VW zehtA?>NN|I%{ORNw&l11vP^>Ep(97OATvRM0ghx379X=S*OM+_Z`eX&t&=$7DHTj2 z4Ja!cmRIPzDatFP5Vai@+Kza&RUFq|>!w#QWRI{( zn(VWAQs2*c5biNzxZ-wbVm3Fud%AZgr-|e40k*b<^`sQk%7rK7Kmh3%%16N=MC>bI zm4Eh^5Spf9e#Ns-^eg?yLIx&{Xq_ExyxEQv#_waUt z7;elzxC3=;?MJb7f0lb$1y-Qg%Xnk^Hk3@9&w|0Ll*^>XNsN9i;Sqn2Ht-{vXVVwz zKF*(=KOZprmQQRCUNU*f*>NiRLG+=-0CIJ}a|jeNAj!#^oV6>IK}2iQ{Tm zC`~2BRVOIGv$paXxaTnOMkMvoc4Nt>g@Z2(1?F4Fs;Hh$-T&ZQeQP0LOmzf&aqEK5Rp-V zHo@n%7avc%kXFR#{7v*?U|t4EJd};F0};g_l3ifzO+<--Je8I7U?3l32am zhkfBR9{usgFWeDCH0}1Rsn@}RsZ?fBOvVW5Zn-N+}g;ACXC0G0KULn%1T@cpFIpJ_-n;&Ekj z@3a8^u&;G760+Ck2`Y3iQS-GH^<0#g{$LCu0R4Ll$i)Tf9C8@f#WER_hDeCXE*%@> zXE{V=xCg!m2Tx#cQKRegTjMj1%i{WXO7-suD~S)SeUYr+4O!4D=nQlW@C{%ZFS+Ry zpW(o;-6~vuq#+JUEVkU8E~+dLQBbuyfP3YmIfZkXz+9XGi4Jj%7K^t1cU&j5)q3?O z{GaYUO!@X`GA?O5*(rzv1hzw$V-=Q`vny?8$?%hsj+1eguclIPr6@dy9FJWoO>h`A z&7KWL)NgY&cgdE(zl`e<2Sc#XK53)y^@ z)AM#)tE71r<@WEXxfHM{zOv~NF%mRGQie;~492TG0r^~<6N*bE7{m1&OIK`k`R5L> z9IYOh+hhz%AR+4%2g6T1@zxQ1s*U#3D))*ZI`7KcQ-LS$n*4-M1s}Pzk%gpo`w6cVW z0sb~yD%Js%oZUx|GJ^6JRangP!TE%B?;O#QkB(&Z2+Yk($F%6B*Cx@n7l8WZdhqun z`ZQQjMwubTqIC;Kd;vEVfbJ8KpU?l~GneDC1TdV@0kpXw#-RGR7~Xl+-oH3NB5j!D zi~y6`gUUj=B%`jxS$%JR23+#ukORF_=hDQj+spxU1K6q=UO?7Rptl}Di5j#kcD$AO z2fkNat=v+HMK%oJAmu9ZYzWv)Y(t-izljNsNS8`jdH2;lwxR}r+4v@hX_M|+)y2YR zwh`SctPq^|5ty7w)vC~QyaV#bq+v^T*6lz}ZbAomA8}V&KG#s@a4iz$difr}>E@xPCNOeDbVv!ZR+DF))0{boG!S7hA6FQ7%w(Z*%>NMO@{3i*^Ku7wpqCVRx+q_E_*3paAWRPCAsRL4s{SfMr z#nHN)|LC)nNnv!Tj)8_zR}U!>7H#x!EF-W|4m$^HOXMDjK)N$nu+j|vLWnL*Q+bj)-TCRtt1vq#bA#ZX5;2Zb~#F=t0iB*gh8xc^38 zmh!Zzm6^qw15*;P*yJWp;3p&BBU5X77JikIkFkyFCk7i17>fT4T__4B*fTsKpDwvQ zB$L%*!;WVLltHnAVzD~lK=_pMRF7as97)eo-a+DTybVND95E@hMLCygu6Ema_VDWK zkUeSA-zr&{77(mekcdlOh<_GOnP3ab#4wi#!k`(f*bZEyeT2f<#hGK$fX1(b@7&c& zig6h77#IQD4`I=iXSp=kHp<6(>0+bK#u3sCrG_%GB}pNhM_5|$xK$$Im9?g@B(s=H zJVbC-MgYU&T8ovFU^Ga9zy1dw3OM&K?=?lEF#n9(e9=;NJ*4M2+p3CF1K&R}YWzHHyuvgGIwlku8sBV~cn9-gBK$u{>Mb4-R^<(I`>!M(pVJ6buMbnoW}tN9eZ*Phkl$ zwN`yGx9Td%z=N{1D=kd8p>)R#4GTiV<}c{5G8*vuB%Ec~qWi)@QW;W9N^{{c4d3yRHn!d-maY#& zG*I53d)~*d52yc`Rl9A$&wpC}eM#Lc?c?ArUY)>2=Bp)UG%?%m@LB-E>4B4cKJ zhtLS`i_NRo`>pH$l~{-9d3qTUjg8z>)6>2XN;#b`yayo=jg5`4&;hSrH9_BDT}~e# zzF^*ay4Bo@eKhr!`iPG~it`TPOxHpbqsNd8CgA-Cx@#wQ@5$ZydBW+b1B8HmnFPC$ zf&h+WsKBK>lwmiaD1?MHq8|uYPVry#lm0AO;xSj-;YZ)i*5~cHyx6nP!5xh(sKL)`TaZp|-se~HkX-Iy$oRuYh?TeM$d_o8_wV!)V149?anp_hjjZwT ztK~CR(&yj;6&i#kyiqbr&J?S`CmwxRE`@Z$Q>Y3Cd7QQ{HT$m!;VVEX*i^c&X%ep6 z>hB*A?~gS1%mypxWdg1@iu|hS)}L17ByCeFeywpx<<>kmey4Joi$L9Ar#bH!B9)_bZ-2cJ7YQ({$gk8)vqq+AA3HB4L*OHO;e&MrL!Cm#%3E_2tnG0!=&mFAmZD725=V|hY z9XI}ax0GApS>^Dx4!o?J^32+p2AV@2KXYhh81h#IMdWJ*Zu;z0B}*gZ#0E!l65sFY zZlQqnT1?ko_*ud#F8y)Srh$$`sQO+x4H{G$0VKSBiV@Gra=9jZiWV;Mlx+c>SNzxq zU(`8%z_jMGU}5OZA;AZ8!refTon=fHwOV4&?=*?&f0*lkb|kgy(Il(M5Hx?K38Rx4 zu-mjXDGn&ZjkHqYI)Rm_p4b%uhYoXFMDN4e*AK{h478Or48Y^_9%$LMQI5%_xVAiC z@K87Z1Qp8aHVkZEvAS~_a0PG>lB!GAq~xTcuME`b&Y8^SQFZ8HiR%2VO!sXpLEyCK`yzPZow)8~>l8bb zuc70Bt&gOOtfSyxL$I5<#-7d@KiODWimk%xiN6@E@R?Vm+hK35@tNf_+BX$}@IP0_ zZMxPd*-Y_&Oy#%48P<`j;nJh^rpUgP5;u$)I3Baq2kjpLrgXK`_DmfJ}2 zGIUfgObL*2qFfC%>}#6&DnTMalWYV6XB8dPKYMu~piP4PsORg{CD>`e>UQKXom~9a z!>a{?PNZgpuIf4H`^4!bduZhSpp`Dcp#FjIk+Z-lDZAR!L? z{3HDUD03IjtELNz$b!8)P1mFM7pV}6)ckw*V}?_aRaVhI2L(f#<~0vl9KjQ>!Xm@v ziD49Pr0=cwTcQU$0W>0PPO5AE>@fu)1)u+lqe7&>)Ca;$%s+#z>#vKN4`~-ewV3p? z|J?NdMW$*=A-%SqolFf@{YtV>EM*m^$-PBn}{xEtX?f6?J%oWz6$J!N_w>=b^5 zp;+;H!X}l3>eQE;vBjlJ9sPi!$NUIk|E_Q*> zUkF;cm7EOc2y+|F^{`F&oF z?AeFjV~teBZ6|h8=U!E&y3a13Dc@OD_e_G1}?St%Oy)sX7`a_zK}rBa=c;t zd_NcS_n}+1{`FIR)FprQ7S}`^#MXR!yYIP%z%>NisQP)7U+#n7KbQu;-ug=v{3i`h zezthlCVmiJ3+b+yh9fi=n+oj!Ny7BQMwU4vhI{PL0CY#9Tu; zWaza8tAJ36{CoGuZi(hG0M+nE^Ul5wheT~3Jk)|E`v-=X%T;xuzXnh|_^Tpug8U~k z=f%)?l1M4Rw~Z#PSq*j2Yv&0+524LxBU!d@*S9$>c+`I+`4zA3plZZr$DZy?(0Hq^9(y!0n@rwtFJ}HCYMXS=;cN2 zKn-!0VZ#7c_~hVoa!Z4*4I)%Eo;qN`=3z&;S%?o?a$jV17lrBG+r2$&5Zrn+C!4ox zpOIe{T-ye8N6U!c#)ietJ2BlL_h;w@OUGJ7wz}s>PyVVuGHdZ z-f1y&TAaw@;u@3=iHWjrXc9;+M_1>Ux6@${*nedCrKW^87wcB`HlkmPe0J0=mL{Ub zhTxQcwv&Xn7P3|Jwc~^v@FxUB-&HBZ<&6c>B@dUx;skAQly^BS1P%=)TiJrs@Z^oH z*s`UpZ_vK(AMoH;vh(2QuqTZi9oov!%oEyi6&v$}#LtLTQu3DY>gbQ4DBDy{WwA7A zeWPa4w@dA(DXnfI$|9=`B|zQ@A(W?#<%|jq`L;J^X&KfRvLp=~Z^Seq@hh?PHT;~b zfux8|h}Xb*!RXtVc*K78Ae2=&ZhtmS60M0wRdRmWV22iUkaY|zY-|D#nO;hRYJYBc zpjQqbHfKa#E4QBI%|u1?SXzCVkE%2yku%~UABlHc9Z`6r_zInQEJy3EvMrtg@5$VZ z2GKVf)`}@BW2li>%Ry%ZR?I?VMTL3FZqpnw`W&T+h8-2zq~Wvp>oPifl2+!U?jgc_k0%eHkp#O1Ge$f8$ zo34&Bp6)=C`j6sYYMcvCEx6FiK4I{-_HxcN+8V?}pDh;9OB{(l!Q274kH|mw;Trs0 z5Sno;eGvPOeiFI_Z^ylhH;WLq6E=@fQVIG{$@5c1#U+jJ?&IQHV#r6tHZTm{Cnk>R!r1+Bfk_7a4<%_n=a`rSeR(4IZ zcB^8{=kS(z=)Ikww(!w@_u^;`C#wpHW+{AnMQX@FdWoxyv4;N#k7M-ovU&c=e>RaQ z*+@YZg_BHh?$Dmsqp;3E;ya(n?@3K-iU{77;9p?{AyNR*->)MpzL*=Jfg*}Wo=w&L zzI{!Yz^yN;{ikBq7CrKOwS+!dnIJFm}s zTX$mlJVPhNJjK<%Zo>a#Rq3MqU#p5mF%If`CT~hgSIOsl)t6`x z&y0PkM)Fd>NRoo&A%19RcSFk2?8)^H@v0`Bl^eg`^rt{*BSB(yjf+gCKTP(-KDblVxA8S4W`On+as6 z-TBz3`<{(3n&xx+x~&ULst|3SOFa%($9D*K`xnw6h(cQXlQBN77zBw)P;{4^i(8q# z8t(mnRGnp1Rc+X!`y$QMe8r42qB;L0*Ee_QH81`K4tvXcDQ@0Q^%bE-Uf zh7aUvMQh9>aWCFUK*0OZ;qPNJWefx@AYxBRVt|lk-?%hJ&GAnQ4Md5lnZPpo;aqra z?k8e3$pn3so!b8NnJ7gZ&ODN5ee$m#)qo&Yz8Be?HV)g~AY!Lr_LK7ptGY;!`?cQD zO80)scMSGPOGjaWfaJY?oX8@+0)UFq=@s$GcNuF12-WUVJSYClw3TL3M7a3ubLE85Em zCH^e3KtJlb>6)RIr9o)0u;n1}Db*@Mm~$`=z^H`aZhor3Pm8!<4aQQb@L`l*NeH)r z@#;h8igg>yzJCyIp&!vX18{#IGj6xsycj6{T%Fv2$0E6LPTzm^$C1URSjmYHV_#=) z@{K#K+#pLBjq`HOir|joWX21ZpfQG{q2YD=OpnzYFm5(uNk$Ur{kA-;kwi!8)6{QY zXZ1jI>HFp-IdLqlJ!nuH4U43n#4@iMzBC@5a(SF~6*9wWb8qW3SuCOGfMmZ* z{2(KU8e9F-Q9C8WL(2xj(f7$?k{mK(^lXp=+{|=zA5C=9gxoaF0ttFa?Rc98nyaCQ=t+45&QyJ4Uttp3L!#vh=Ai8izIcS z6w`>72^K9&6G%x)B#&)1Wj3sV_B%Q&U(MhkH`4iR##o#W_o7KDf2=?=aVT*Cu{Bcx zA7@5BOmrM4WZvgj-o%CS^4mA@hXlw?t1iLd9?LNPZOuTRWOA`yzNC-5O? zT~^jpoX%ph%`b{Brio2J<4*Q~L;218)AFMd9`5LJ#&T2PNl=@MN->I|Lh;b2{fIB$ zEYUapKbXP$Rx0z;jDF(bH9b*ujJJ4mAYj0n$V)rmlJC$!L~5YO%2>yuq-j9tbqVHX z9{tv%lkK2ruR$NHPOO)a;Wvn$cc7d`><&Y40z)tVW}t}sh_sYuXaX(=SzU&k7Jod% zCf*l<$4bK%8fMe=(=0Sts}@BcH@-LFS z)%IZlxXFy4OV~+>GCrDV$P{teTGF7`DB2>I^=M=_)T>#EBcZ6i=-N+Hr#G5^J_&Q* z!&N-yJTsD&oLUz{707O{xmJVXvN1P$9fR_upF&oJ|6Gg{Mq<9dgFgkeJ)8J5e!=`L zmTF4r`UvfX2+vUl5%dH}3OpH<+; z7Wb|;$h@c{F_ElNF<@R~4)CQgA`#TR2(}UL;eh{vZkM)5;JAbiT|{pr$PX6rTSez- zBn*Ef#IQyea#G$M;AEzOMVxpws)C{`Rk5g1uQqeiO7Nc5n65NIYw&)g8aHBrE4Dy? zkMQG|7o4Hl8Sf^u$ZEh$PT*LuD1K)P+@}7kfU}dKKon=Er<;dY6h3U_Vk#G|IeQSY zJ82O|@!aXd>qiswcwuklD83re@gMRCl;~?CH@EMi5cr6{#_>kjm{j*@#rn?wLDtS#XF^NSDTWlPx^7%)F%^QAac zZ5^Nk14kp@SM)7@Koesbx<7dbH6@FjGY~qY27G6LN`=vi$U5xW6vRrTx5N$1)_#6| z)w{>`jqibT^!5=NGzJZN_8T1?rHDe%@w0o+v>TTRoM#(=Jl0-M2Jv|Zamh-6hXo@; zA!}#AQaS>1tn<7gqyR;-`=TR^e|6e!E`0Tu$?NZ>c`Hl8(T_hijY}@TQ({#eUf)Z7 zaB5r%HCfl=tw!OOmSn6R_;YjZgl{zfXo712*W_clV%D8&G}KwSx#xiC@O-ZGDb4mv z;FQn5WxxrOnyvj*Lg|-$M|aHRr^N-MAPVBkny9@xRJ#=%ylc+g4kiDKp?DxLgXL_G za2+VuX&g$X5Qh4c;o5nYN|~6Eee&EfMLF$j+fEK65)iSmj{ODq@iO#T!p^FO64NgtzWr91= zWpxSF)T)WiRH0Y29?)Xt#);3NHzn_XAM zq9ZiPB0wzQrO^_5;;H0FiDj$aw`qY2uj3N+?v3zH@ODSV6#v*+zk~?hRY|KKUNoB1 z@rHI(ERE5n)MdtIg31=8q6@}#dwUBFG~p!7IEs4>{{8{Z3rXhA`^vGm9n!w9uWkkd z5Ks*Fj@`Y!M<$oAN1-l+&1O$`eVzCYk_yHFzZ-Wbl!?|=fN#P5tRk|Ed+pHzYnB98 z_@ptKK&iygtsPCRhOeU62PDqPOGAj zP__v}9vvTqRxi{{+|2j+Cv|!4boes)evDpEdzfUqE#rV!EHt$5e*0h?y*2>}UomEG zP-qe5J1R5!+wcYxhrC8f8qL;m#S#H`4`0uGc)XTt(_q@v)mB*{cBEusyxCYyp2OI0 z`C%)OaRvj*-C@U^`z|tBaTEQ{`%z(*WN+x2lUpO;ziEwAi}w_bTfXMVp!>o|5)b3Z z$4A9o;nkNNS0~Z@ioH*-zgZ~`nl|920#}>JCk_CoXp>sMAuLK%Qngdf=3U#EljG4T z&<}qud_NeS6aEI(OU71H-*8JPtsm&^kO6*|&#j9TZD7OU*Hat&in`1wRCbT%t*hZxI)< zm0V_9@j9s*fj#1D_@6y|H7qYJNdYkd8&9pRrDJ46d|dEVG2Itwkwf{4=xu!S$oH~$ zAs6T$?dlh(qcF;qNMsHbs;TSgSYZnh9Owq~Zt{MEe0V=1GbUIUFcdIVz~JYVO(vTt zoa7V9rtB$=OBl-2o13#l2P%2^@)UZ7H11_w%aavQ{97MBv~C(dWcmv81yb~=#^1#D zi4Ca69t@wR;NTD4MN8y%V_KgvK(13@MbYoP7vUxhJb$NRZ4hpLi`}wdz+<#l^!3 znryu)8X}_CCd=NJd_eNu*~`r9SAcO0@!0!qyLnjTQeJbiR5aDEU{;9l&aj=6h+(U( zh_4vC0x+Zb&bE6b6vU-NK;KB}Fw9-BH=Z)Q5BYw{_#0xKQPAL#fM8D4cAUldeaOHC z9hh$;U9UB^4zT^3S}4)KlMzDtCbqeJ8M_7&-=UYoevKfMnpDj1Pf%T%=Z%JEPp^S! zQPUL_DvOGq@ki9Ir?@|c0Bm94lpF5udDVCRyaEk`yc{;bG;_VH&T-i-W6$>z}`QMxbzX)_kjsHSqK&Zj1I--_!I5-v9I48OV ze&*y{dYh!?FUEM8SyT15yQOPoWo3iEfr2v{CbDCsn)_v>Yn^Kq60lZeda?KeXQrn0B-&VbztunMo_&nK~DvJA9C13=Xo!8!p>BvcH&bfb_vz~pP=hxZ-S+`t> z0Dfi2z4tD3e^=lcQcE%XNF}^`EwFH&u`gJql)~u@FgY9vg=|%n^Z;ulFrI~a#3(Fg z^aXGEUKr{>C9hM^u0P-Xc;0M^i z(=*>6kWjM{z^F(bN$P+CIkF(*mdiWGgY`6tYb%+fQ7-aBbY`F`C8OQo)bPY4g-Em@ z$&autH+m#d{3(S>BqHn#u9hqzagmqwEK=RNQW~euWmK3Bp1HibQS*&(=?tmBc3h== zqKV4m#qpDr@%vRK9zWe|zS^a;%;J49B+acTJ1%j2IBiw&yZNu*=uyObH_xLGkKc!$ z@g6It9r^dNAC&rSL*QM4lPGzp z^cg>lw&0+Yyi)P#Mf(VD&L4m9XhDtbOW1WZuTmoVmGcduIH`EF-Ss=#!VZRKmK#F$^>MHZIF@$DOgep{(-;rFO+%CZ+cjs1KtN$gwH zzBtdOe!yJ}lVq^8A!;n5`8ZE~LAUWW#2$?=#(}^$p?&u~D&6UUEMd3wDeVn)HDfKd zd%2pp%Or(HB&{r82`}X9H#Vv&ycG>a*|F~SDZl#aP}k8kfK?tZmXCGgrlOP5l!3^;WfJ&0-HcT< zjNn11pPv{!a*#R8`(aIZ6{;lneX3CHO`W3Tn(!6!#nwR`X#a_T2p6i!ol`MNmwE6{R{wwRWyR~8@$NYD?$ zWe+s}n76rY_%Odm&e10@6`ub??TywbR=e~W{HApjhw@|zcwj)Yw{Wuq;t;<24RwJ# zmhpYKgDU~P7B2m0jCoas`_;i=N)5+A2MY^Eaq4G&V_eDU`$D@!QgTC`?`y8Ra~W%( zpA!c88hUQU4m=_5Hr{iiXef#18rr(JT=zccaz9Y@ZE^N|ube}(8tXYAuk25xHt zsF-gG*x9iqHGBD0HENUaZ)A+H@bJ-NaY%INTt~N)&2@M4cr)Crjt?G``XFG++oiRq zfwzEBUOe~=0D8az^3ogNoeq}#cggS<0Q4zJvB2%#NX!cg)L=k1`?j5XKDAHjg~=lQ z_AL-4d*;_8e94eeVj%#84vbm&1Y}r<(AikSNi&~)k1VN3+Ea-K0R9jHM!TOn#rPpU z%A4YLM<$jgjY~CaP!~+X~5MNG%{+0aMXi!vY7 z93fBOsoSiqX#LqK*S`8N4*+5u*eR#I;MfTKN-}v^$Y=5$7iDpuEU+=jf0EnXT<6*G ziqNcwj5J`4c>xIfU(V>NMv4CSN$SJ$YzRNbI5t0~qGz(gC1maomijul<;jg9a1%ci zjCfSIJVs1r_{aABNB&J$-UH0WEgiq8=;kn*xiaCvwH=u(9*k?E+_f<#<+%Iv$*B5` zq&Juh-WjWdKg=PPyIh!9oMub=nP!r!NGvc--wF;>w*n(!kgvT}uv0?inp=eQN?qIUasp888wWk5oWgMIk0dx9SX#4do7qflP2A9ZWGpUp+jyeUzAGO2UD;V0 zk8y!Jw*LG1DyCc#_VE)JdC$#+IsZCi$o-v&#h`cc)-8EXJEZPp0~$TDzir}(HDmT56h(R_ z%XU^OwG)xKUYzkxY(qSs^Z93?-7K6a&!?jUjWT7%3#!h<%Lkql+NOIhuqx*1VA zAl87aFC=N>6WD$b%N!$n-pD|ZS~l0>Rl?u8ICqx(5L)m`-frlDIE)i^`c|I!Ar}LI z<5s`$M*3^q#TkL4g3Sd5(%$!xi>623!K>Vr46{`gNNh|Wa9B}U-}}2rX8TD9(-0`0 zEW+8w=*SqvcwzO{P9>}NgGg)f=dCPSgW-i)MA)WN^+nXbPX6Ajawk(wDDO&6bdVjx z@%{WtjmO=((2@3SEi46Trv*w=qGldzv^#=)uh2Y#J$A(>I_Ec9%-TAJk}iwo z4x-~7_HsBm+{X?@%#Ems#yw;M3nGbr9thaypNVOS^j-4}%Ti z`a-&hUY1aW!NMClOSd>w?5%+%Ltcye6|O3wC68ivJA0B;sj{$T)1iMQ(NJM2VO-%0 z6Vl|>yg`{=uP&r@T?Y%b9KmN{@^^1-h<*j^B?v5k3_@36$wy8{KZQe?MPd_+9a>obZjH z>F=(+)x8c(ueVn@^EQ9D1*~gLlXOe1q#KZ27~dPB$ijq~+PzZ3PZ!xC_J#Q&?}Zai z9sLfcfjA~O-(8_XoUUeyK#uQ9fE6V}x*>JIYrF3&;(sr` z?BlU=%r}_Ot$L}}>3aXWjXH%*R0fXjOJFWi3JFId-SRGDBG5{Tq+~p!`*RDZIthE+ z!uYG}i-0+h@A|;Px~|s=T!H}yusH!{xhKBDy}c>BzGT}TmMv^|Gqr2 zZBVo(1wPy)0WN@95BOs*$84d$F@qAXt7b-j&yZ6-x%?LE=-gaseA^b(>Utiqb1Y@$ z)ml`DF5m=mqOz36rF{+0OW-pTZL5apB9*PTe7+~f;MFJ2HVWjj%*hKT>hsI;zSs07 zHS!md6O_R-Oei>Nl~)A(ne}Yjjlf*4{wU?jI=?_q@2!cWC1{2ZB`=TvK(5*qtY$)F z>PmzN#YusRR=rNq`{th1n_mDMC!;h|Grt=lAdj-3Y$>_X{YgG!3q=6UC2Yf~r<*u- z5q9Gql~a}(oG-GzPJt~(w1Cv-mW4qfN9*;FQQrO0n_HMP2cE+`m>D7&vQ5!BgDp5j zb|iTqNn({XMT;Jl>`oTS@2rMH7E^Nli?oyVWmIzNxQ%%0v zDy(Mn(qjkP*Iy>zuW76IXZ=cMebt_F;Xe6P{PhK)D*t`e72045@Rrhny5NT_z@0C( z=sKFwAM!JkeDSm_jE@an*|pz=grO$yIzc`W#%dX2`voqT$u9>CR{d$Ki6Fh2;F7iL z6ZmkrbL*lnJW#V#Vj&#K1Qo~}GrbZhp^F0SAXPtpG&eV!HZD#0G3<_5)olO~E@0kX zARN3MJR9fNhzUP3rXGpAhOWJceXAV-etT2KA3mG`r}*c!hn=^8vL@mU{%?id6zOM{ zP~svJJbRQ7mn<9?sR4GZod_SuN=6|n=-5xrh9)8UKHN7c z16EBZRrHq;R#{)pAUSO+d^Uao3=f8k73MRR=#OY1eO32 z|M%B0JIQPHu#9SrhhDts#y#e3Oa33ERaT;!yau&y?Gn_Sv4ZV-dMbY1hl?Z>DV$=vpynY=463n^2n1s@)}u$y(}dzNK=E9=Zz zxMlg~q)`$k?RwG~UGB`ZIPac!jBF{s^o=3D#kEH2{DGUwqvbkOdK|%ia-XG(Y04yqE@-{iH0NTLJ?yQ= zajW1j&}L9In!^_NKIBz;S2oq_X`bvM+EgWyP1r#CQ7_QH5&5;y8H75LJE+u=FNDXa z7s8nLmN#@CNODVgsLmz~W6TKo;`om0FDojeMlTClS52t+)Dy8Mlux{}r^{5g zFY?W&-G#CK%7@3DPZ&kuPDTx%6qo2Srw+j_#-9>e)@%!H$Ztb0k^on*=Rd|l!Ni=j!`WpANG%P22y=+SPD1Ewcx1n(*&cOWY-b1nagt3veZ(65T`@;d20$ z|Dj=ae(k8a6+r*q+pV^@Z>%hlQ+B?s>Qr40oUdMT(*FZ9hT6dDt-sg2u1^(Q28+R7 zSaV`&-mN5Lah-8PfBaiUa^zHk#Mv(6cyAa(SROWCktN=s2klX7)Xffwyk_)(Wro?S zb}MxGMBQ<3Xs|g^skp=0!X-xhEa3`arsk))&`IjJ#<*6OP{Ks3>1@qGA#+oINa4_< zq?N83xsL~@u&22qNvHuh+qWmIdF1CKBdmbN;(4Tb0nz3N-Q&I_>tMpLAAYd8^x!^v zLXYx@blkJ#4Kg7jYBTi~X!Z?+I*0u-MG6W`2?8ppSHxF zLSI6>zz%P-5bx!N@<6K$f2}|SfDT^)*|31n-&W_KI6p=x)U!1-JlLu8@yf(>{T|x6 z3us{40pg1Ow`VUIvzOxlgcb#-?!sI{34IXi{L3?H-qY1mg~^Ht)S`W?MC^Ixytmtg zh-^B*^{0&4+S(dX5?B+`6}j&rN~B^tgRIgtfHCN|l~g7Eb+rzN8sII?U4E-zcXd)Q z*>P*Rw-E+EQc#{$qviCri3N#ucqz;Bdqxh;G_c zbOflHmPWcGaj9myYv#i+knK;m_cb%Y#%A|6R{z_>@?v|eTROtR1_sKTgYq9YW6uw& zpDk}?b{_xPoQz>~tE>CC<>sj)6s;4=UrQEw*9S#fsX-yjqL4wz(M5pY?MZrklk8V! zt`k0mCM(21~D}gjjQP@>VE#9W;A3830%V#gBWq5Dy7){s1L|fgpBOXx;SMt4z>#XWVo_} zVM+xA%~&z#+s(K3qaX4GQe2B!!;35$lt!E^Y@_@V5d`^TT0ebSQY*~1G=*VTM#b5@ zzU)0!D+F%}jP!;<3hHn*h2&&}DKjNUhR}_~C{4*EB1|Bmr0W!Ju3L>6WBb{Ou!Y3A zy}>N5hG`>7p$#2!dbsX4(Yjpw>_8s%XI=1(#!dh*rqV?J$~$iC4FVAW76 zEayKYzd|S8N0h3aT)Wiy;VB}XGGZzK z_(L%mM+&zO0uM-_x8f#|k#*!YtO{=I77DWLLH ztC99iKK?&=`|t4-x(cVtq7Jb=GYCfrdtkVtSdLMTcbl@dd{uTZ)EP=d1cE=-z1-kmAE$466z-Tg^C~WjnH7hO*U5cM zYm9cR)Uy8lVZAZx0?Gn6J@LB`die#w&8I7U+~qm$mGQ&fftQ(0yN$PxFM}W7ayR@K zb%mKlr+329w7`u2$c?8y)sm9ng` zI<%_pWaA`nSFPhuNZ>i0rqy#6W&k0d?a=s0b0k#j#)*!jtPjej8iwDr!y@P0!|&@t z%PS}=D3a1JB$w#MRp~b3Qpk|vDe#l?g&6L`un~)RDY|~A{o+)eGoHwegyEOGJSkn) zqK508;H6Q=_iB#8BS3}9(j+(F4 zRavN5r4heQ$aiUCmb`s+V-@>IqL5e`eFEcC34iX3ME;P)*|MGM(gb11f;~Qc{nXg^ ztkEK>sUsE!%w%+dSY5fN_gSGqieJQTE;;AV$1bBgt(`e8jl<<|a--LYOXP^)uk`&D zjzwXgscwxEIIu6_Kjd_$r}lR7tmXjcn(AZ!a<^2vF`r6d@c$jfa$kR}e0ePa@m6xr z0d4J<6%|l}Mac-?BJ{V#6)^AL2N|9oLxnn?ml817+>ci|mW><$JLCLc)LFvx`T2b# z*Wz*vI-k7*q6AOIV~f~+b}xoZO|)MVpwX4RlvnF{{5GreY42zMAWRML{tft>P5O8F zYM`ZR+CvqJ{ujMYu`U~*Yoffg?`v?Ggi|1Trc;1Rx3uF){_d9R_V@n^6Z6l z6^yjt=mXJI{J$MFhf-xmbQ_!QuduZwaP=sM7bFuIx%;%BgHV?aOhuTyoNs*(?XJQZ zM8iYoTyvP$5n7a}2~q5jn0ewfk3zB(|D*lrfHbk{+h&HNM%*Tp^B!?FGWxQVb*r> zN<1?a{mSL}Sov`IBj^RH3?znOK-B(^mvjF>6#!ZJ|7;u$?An5!bASnlGId)!&G_*^ z_vIKwHZ=j{$Rj8PO343s0gl+9Yn{6<64RmQ6X22uk`(a&GSrs`_%DR2%fEAoJzZ9H zd8w|6Z}8U`f=#bpQ$Xw241^L9Wg&D9W0Bmo9U z{m*KkEK9B}LgYU|NM~JIAB;c41HRUfC^b6~y-@7vr(arDxhTt10mY^*$yigktw zQ>R(a(Zt6$cP;`d&6-#4ei*r~L2(?HURepq*M5?Nft|ZoS6AXBK>tj^&#P~&>6QYi zyTSbY2?`|q^n}8MyJN)15;nKc=O?F*$E)|i<`wJm%&YUBJm_vJciyoPEcq|SmUV)K zB6`msQc7c1dX>jWw)FwY(kJR_CYz|qb$SPdD}mzOZ<_^|ViXFp`Vizl%_pQT+&eJT z)6S76y9pFOkMboMAJ-tH&ty#uCYuQ+z!KEkJ1MDHs&0=H{}D6lVYGLjzxP)G;)H5j zux?-EW(I#1c}j%c1<uMUMwTlMrL zJqmt9`G$CgEpVxcVgth>^!1So8%-Pu?YX=rSeU}vvbcuD-l&wMFgTt+6RDM?f}D?? z7QLqnVV-!|V8p>gAgVug{sMN7qA*x+bjM_e7hb_+;u0b|Ur@0Q(~e7^h%XO1mG9-3 z{c;|X@Rg>Y%Bv7*Go*!DV8Do90&9=r@*DDO)+)Jdb4GCg$#$6WtG+D%-TCYkMgMd* z36@_i{ALo~W#YK8&6gGy9$HjoiEqN+=;Y@xxYg=2dc@-T4oT_;vw9QwVo*ztDXJn* zg~VbzHLT|hU{|I~jp&x+>R3SJ#Oas?h&k(p4C3qG&SQVpo~DA#4ZYdU46$po14Z@u zJIWTMxdZkK!w`$GPbiMP2f?&iaef1X?o7OFc(&g%3z4Per88xl#Xa-9^4^rZwIOd2 zFkyAfJEL=T4i&cPy>f{@BnW3=DXzi}n$R4ksr)Y9tIbJDZA)x4yI56`Q zU}}>QA{Sj|ogC_E7;XtP5!WPX7QPcIJZ6kAkYZcpyTn*^E^m_gS~)9E@s*Aa_6?*J zEkC%1-(I*v#{yd#UbBdv#eC!O(ayJ_^Im* zSFDt2%a>2i!P~+6!b~Bpb$>PEr&n|<%2dPtB405y`suV;XBo0 z_Fug2L>PnIYMwSP2OvcH6jMg-N{X<2Y>6)n7#;3}1BV4fAFAF8cs{4*8dLv&=Ub2) zU|j>fY6}Wd!+QTY`~{+PcllRs)r~V&X41o``Odk&6)Jji+zM`c_S@}!>wIpMziM%3 z)Xfh$6VPe6$fB~Q?Z=Ryh6&{a@ta}n*jy9AXCyf!Bk-dQjG$4*JnOL{36X0+(i4j2 zYX!5Q@l|DosD;ryf@C1l~XbsUHp*8VMhKpO;PcC>kL(=(jGe%zw`L(6`Yz)=R^|E_W*15?u4`$Xwml zTOolELfFf<9@|~qt1(e4_e<216-L8h`#6=J)rjdcBW-P=sVyi8!4y=ata2Tb(McQL z75x&?+6{V&1U8X7-|(7%2r}@dzWMDG*xTRVUx4-Rk7}F%5qwbPqnzmcZUQx`d{!>PyiufVgyv6knot-EmSYqzgYNx7S-eFGU zWD>VWUV^+jb;C%m=><$E{`Ikp0R@~m@IVGlBJ&{w^os0^AAj1KWY*))&JKch(a!exR#m#W6Bd1Z1m>3R8EOP>DuHdA=Yv zdOk+{A7}A)*!bq@9E6wnd*VfDOoo0vUo&69 z^EStO{zrzbvsRP>5uWq}@7ffsvY{xy`@Z(=TGPo`68)`4sGBxZ?lqr{XV~c7mfu57_h!%FUt2IH5%oFt|u z)y@rt>8!r4B(moyay&0`&wMHWgB`6V>vL+XO6EZ<@#!(5Zf77H#}Qm1;mQ1mY> zl2pwXeql0bI0}D=;T8XvhcGG`ZMGHBoZK>@~QQ=g9{TpzlN zga^uXHRCwC9R*yNieBl-{Z^PU0^HWJsD?czfkzKd1!+l6aIS=Q!FLH)S8gpcuj`^W ziVm!k-{?wXcwu9+r;LQkNp#{w2wK7A)gNI{hRBSvx4*qhY8Q8m@;<}@Cu1wR`f{^g zs@0F8LpU`ntn7`sa9j~<1#51bl9NprPA7pJZV|kGS1)tO44NJNrLqEyLK5w^dS@Ej zf^cflRKyOUR%hX@z`F)sCU?Rw?uK4^d@)d)j@X}J2%E&0dP!JsanOI(BqezXdl(AJ z%Mr*${@kuYc99V*{FyXMUc@(wAP}r2|79UB!=vzRbxeK4Btn^#i;)de2bT|9g^s*O zu)&G;ezD%v{6;5M>(JMNg(Np;l$^LXy_Nip2=5m{90yVD^}5mb$;%QhLK;w)(P=^* zosGnmutc;VXQ9CtHzy7n{vg8O^ElhQ=<(@N4Q%{m=6=vz8LLy1SDfs5!h1x)Wc0Q9 zn+D|OPnLPG&8XBURjf=dHA8sVEr|NTZ5rq3+GOrvR-yHTe6P}~Js*Z_)P4Io1&s!Qnvo{+Z?JT#~|}y{J7VzMR;9^LI$1h zJ8vcTH!aH(9^IcqoOr6#5tZ>3P!6J`VMX6|2>x3G3iLEX2gIN!;Wze8)*UI}EK%Z% z+@84Yyo)jVOS!J>`L8QxNG$-Qm6iXC; z>ontx$?75Z{G2S%qNUSW?T)S^P%V{5+lE3Y2mx|?+opOG!i(lVy0qoTRd6zYnI4a8 z`~M&l*YJ*2D$~G}VlOjVOVulTzOTKbEt=S)BR1_Wer~-pGw9IH=dNp~we#bzERL&= zO^<=ScP`y?@JFa$8cNyaM-w$F=8I~W8S9i-*F##l8jeUJAsc=nCr` zin9?se1==Vj<)=x_l=-J%1Ome_9$BgQmq$v85Wbs7vUg5AmQ2*8|NqPC>u*jz?tzIt<{TKZVa7u^vG_H@~T=>BZ>}+(gI*ad}3#@=TKvG!%hNCJ^=>t!2eb zT@btWV{RR5II0P`*NSP?O&|^UBOEZ1k`@78%uE`&c}_vZT4=kDt@ z`0=-|2hf!{GcyxB2z($eO$Z+$1aJw_u#oNGt=;=(;pb%&tCzVG5EM@U|0TuU1@JsC z^XZHG1F-BDdnbU2xs+{dh!v%Ot^>V-%9~?b^%{~oCR_wUr9>v6S$W!?B>Bd zp9I;ISx~z znF|pv?yYHY6mw)yj9#lRQ(cBlb74*kewUyR;JS+5qA zytVrbs2aq6Bnc^U7rzs#o!&NKH;3=AX40+~-1W(-24+XI#dS@gaF6&{tPHz~18=h)O~_X%z; z66R(&R^jW{x|HY9pJeqjL%(9)CS<_5m<+8(A5p_cEgs^&1P&hVt@TQXB~HDQsd z-d{cNEfP}f+hh&K2D@S^t)Ws&jo}-18d_gfy?65;@;=4Ys)kpqQN!bwGI}9VzAGa+ zNBp|2w-{O>;r%`0Dz3q81nx9M={1UqXgwi&gc4#Z=;J4QxKlX(o$!NStJyQ8P1Rfy zznRc$1xzl=zOmOlZSvA$mh%gAbQ^_#2xB+hkTB}Pr4#T8u4CoZz500QVX1A+k(R21 zB1EvDd6tkWxUW2Z9+rde{&lZx-2?VHz5T62@|MnVdr;=9?=6u$sR>(pqDp$$9Zyfj zLv}X;sf@|g*85d3i9W^_+NxvD@(C8V29ugWI8)k;pd@j&c~_VI_Nm7x{YZDvzO*9# zgbG1XlSUioO~R&M3{iN(I4744d0E~ARV9rkV}k;;l(<=UB&;E9G4Imb*u`j(or;m# zG(*u%4rr~(FW~*F^_Tap!CPP5FL3CZ{d80xOWLvVdZg54d})4S5PTrfN@edU#0mH; zsVb|)V?_Ec{+MRf(}sMAPaHP`@N|sK+J0vlrZ*XN^VkDi|55!Z@Rp2$J5Ef)4p%k@paTYTa*83E8EaTy7miyd7*Q@!==8j2~U;qxx_OXLaKfA zVP-{n`fGF!SOr2=SSnBx3hL8`_2cE0mD7V6mKXmU5XpK4V0-_8asjc81^|R~mJ0&? z&l}I=I#dtZrY2wLLBePiD6m`nIg>vEk}|XL5LVlKdV$sN?QtvgW+RePMKPL*?nR-x z`};%n7tBjz3m(Gk)38Wb|J=Ai0ni8ZJ`Hj9#wQ0r(F1jrzz_x-9*iX#Tl1R}4RE3Y zWJQIHwH4b+Dx6=iVEB|>8%4|+Q$N;KJURRTg@LUMNYV@ znbCkpSGN;kLd)_C@K&@naTwmM-*rMi9tGkZFXO0F&^;++3mba_m`h)1V*UVC46V01 z9{pdb+k$hJ%7}M*4f3!re8-3nc@kEJ*q^h(+iKmo8?2Mh^0=|+m^l0$%30=tM_FS9 za~Vy{!!?5~2-R14>VYQjI#Yk*2;H}r_=5K+bX!#9`EsYwHmcRaofB=oB3gn#IDh#| z7^81A^$I)E-HhoPEz5%WC!7gB=Zd6?ucW*E%~)f>=_5hzs_k&i|9OUg%{|LW(1BLZ zFX(cY z%iri?9nW2+Etb#ukzK6=82}86Ga`><3uF{$o;7FfP+62;V*FbA_yf0t+_CNLaLUprS*Ss_Am2{VLn+ zZ;qsvo2u*}RhKkQwaBvmc2_*58d7eNXj6$9F=k8?w820XqTt)f z8lGhjB5vkzD#9y`((fZXBSbSoKZ(gPB^+@+GjwR6o;g9ex&sq zZ%^^WHaPIs{iwS9b0m^tb{#U^Oe7ePVTQc|Z+k0x5vSKJWe0m5W}ZP(MP4!>7^uO^ z@`a3B9>$Kd;37gR5~b+%%n(oY_`y)dOz9$*gUx{rU)YV#&Y$~5>=xvqRMrh! z5Ca49NY_u&y0H;0$A_*&=6qbKFHFj4$der)LLgy2=m@sSi>0Q|@EBpSN-LD({avV~ zu*md%Ivbba>ok zhGfE2JPL%`#0V7vz@yrET^vGT{A~)g4HbZhi%lLy6-B)5cIa9}ZEJs}ZD=)* zZ^7uS|84m-?oHSN+A22m)z`@`pj9)_BayD_asFOOFJgWy>Ve7o!~PiarcYs@i+=}l zNZDA4PZ$^{SEj<_f$?|ADQ>bTYOcMN*Q~BTamCaqsMj=O_G1Di7%bbH~3~hbMArpqyGTCl$pc01Ne9Cv?4;#39d9? zIp=kA3s{G|1qKZdNGsHOwUf^=XmGk$JlBh#uuw~l?Ly-u>y$M^@vu+f&@B_vwLH@6 zn&~mZY*uq_>6oOFRqcevuF{*KVQFBP6N6#_VW;+XadysHC9{>~qlwPrO&03My)nzq zfrCjKJtQ}j|2$|^#y4Sal>sg!4PVl0n<E|?JhggAD*<>Nt$%dXHtm$^!8c*G=-Iju(P&NOU^hNwZr8jeQ1rRi7x zK=+_iq#;SjK+I)?1(&<-cEyoMPsI_%6jf7wX+cvDl@XJ;eSz4j^`A~eyWY^**B5!n z_vujNE$cmZ_Z_h$xct{+dNZfD_wscA{IHkMvK;g<@(yT#FfAb!JP0Q=*dd$j2Y{87 zGuZ}KW$%a%ND|hw+s|zW5ZKAd$-aqk?-roLP=eY_fE~80rA31PEY;^+#+o!b*l_Wq zd*kXskqy88@ZzF)q6OEfO{BaGFjSDdxRiEwb(w$vm6wV&y-*tsv&Rb{F87X}YqU9s z>ZSpQ?`GeB`I{rYH5xyek>7I(Qf~(sesBGwMgDSgdU{2mRb#W_cet|Bc5{ife5*Vs z(%Um{lt5|}n2ScS;gg`v3aaM#0W9ur1kEf{)qu8|^21{Tb~8h>a`q1aR@xDeBP*vE8+sjKoBJbw1RL52oc^&gi}8(@ne(9kI%h?uBG^{V&B-=oa)8NT!9vqfR`1#Z&$? zf*7G8LB76jti{(9OG*(}z` zIX_M~R}m#}s^=ZomK1lbCK!Br4eos!Bz@c?eI5JpLOotMW1?u9&G*IZWj}#bMe)_aw!l=J0-IDLKt+GbG}UNc4{zUKcVI zYX3afymk+6kCxslzCTKa60W$DT5f3(+wAc136Z|N#>FJxE)lVG!oSImdBow6pX7^Q z#J$Oou~3os4hHr~q&J1u#&y;x#EddMBYI#=A3>?IiLi4z$Y9S7!cAh;L`g3+>usCy z=I}D1-2mo8J3(VoWoPbZ=^{Me-jTELpO_z|1uyvGotm}3BCp<$^k};jjCXVRm>o{{ z5aMpMSxdW%z_O334XBy14A-MQ3{MEBMS~4m zw5j~^_xG>0%QYwd1LDu|T4GGG8^6mUJ>s!DeurT{M|Mxul$zADvoPO>_jj z9&PB$hM2U0bVvLoe=!sdXEOnOR$Ch77p_HtGJ!pmz0@%w6y&JTc~LnfdTBWfHD_}O z{M7t3InGZ5W?#O?FG>q5rT)s=k5WV0K>CEtWf2c26I;SJdN4`RLKF?3&ykLKE{`qM zS**F8Ih#%f+Gmm@>~^=y=jYcShT-ATeK%q zc<@;^FFk9#CoiG{LfZKEKMa?=G5%~H%Oa=8%xm`?+P&}aG_VhdYQ zb*j|VI35e_8_CT7Y5^dz7g0W=W(l~Q?ZAo=kq#_~DuaC45Nq7*E4;Ftk4t@`rt!PG zMIC!ba%T7AtCW`H72*}>P`$eQ%Igy~5(O!f=h#bFK^&Ir7YNXLLjT&`YrF`Y{yRhK zp%0CygDp~f^M@Y@+6ow1otCkBVe;=k`)PU4P~M^U8e8N`obcHQqgs7peBgWFo4Ud3 z84G?oE_y7Rc&5G*8ChUee4un8l@i3iL+>el-2E8y>#ytOx_!cekZ?r#Uckj_D%|_Q z^o}YC11xlEA$xuA&#BhAQJ?`}@_uHgs_E1|EbFy*3OU{Eq~8OPkWX*KUiVe5FA_Ts zd!kQ?nZK!94dx?+st`X$<%4Re^l(fuHy{N*%XZxuaUOX6A|6Qq+w=zynM?29QP5r^-%*3ZWq^ZEqr=#Q3)2vhhfrV;P_}aK7eK85 zksfD5?vTJVIV=rUKwWP6MuPm%r29)vpL@&9tTm4F&#q`^m(<@7l(3dwsS$lDVhjFYro+$xnsKo$j*R=9r*vs zJ%PM`z@I(3F-mbgc)tYgJGdAQN@KJU}ys2D!`>E=n}r&RMi9&CID&$jBtKxHllZ%zC#z| z3qe^xpvhqG>uK*bU@$oFmX@R|y9D%?Gw-+%W$=>s*L(T8{R0INZ7S_b^AX{nfN7>6 zRs|figDdBrk%#NUu8dBRfW`Z~0#@}GBzY@hftvQaTDN~qJv61N}XLECNXF$$sx zc?B7Pf%XbIro#wHwlUp)@EGt$KU~oh>?qnW{ks%WcTd|!5HM!L(40d!aW=K6gFkF$ zY*F8J^fflnQ5eT0689s4GQJq4Bf!USRpPmZ$PKzmuqTI5mpr0Kr#i+73$%J3{LCt! z26G*(9WBfL#FyVO?LGYc8{6np&%Wt8IhN_Z80DJeoY-}DyetUOw*L&A}6TObfA0G0S!kX zy|S&X7Me=z+CUYHRA?R{;R)xunEN#m#@<8{;_A}p1R*`NMd#B6)YlNtqw`xYDzgm* zh`7IoI{d9y$KD2OT0~$uJh7`b_|h;>SoTY$vl`?g!Nf1Z;x&VE`-mY|GV^LtsJ;Fb zDTa6)_%<5z(*t^)?k(JI6fhA}vN&6i2G+YoRO!hH6qzAWW~N2A#n@9(6rQtrg6@-; z^RUihDU`5_sIB2%GWyULvN{t{yvHWBpF-#0|9-vYPGmIjDq=IPQ*^jJuaLw00$FIb zX}wwbVJb9!HpQ&Ogq1)#jIZbm1@uwkER#9nZ}_{r;i&wHmiZ-X@2?hYyVPo~O|;i% z*6yX;UG6nJlW}##avy%~?xlz&Q&@vdHTlK4%pCIQhdn8IO2|@^$8hm~(fcDvMW*Lw z6Eq{ct7J2N!txwe7guS4BsPaC2-{4P9{`i4s?;<557(Su#mt z=)ah^n-{`HhC=s+I=rqle;0Z)!^V1(j~F@;_}03CmpT?&lA#yiw&3?;lz=jyL`3=x6 z_+Jk!JkXB^%CEhNnEwADF|BVS>+sRMvURy(Pb8iaSjuz(+IA|eHY8i(1k8&q|LvC? zyMX7NzJ&T*w5`FN4bg7m%e@bTMzbTxdiWSoQ>=hes&O1g;=lrW9_BQ+21ehp2)xbd z6^A(9RB1BgXpcXbo0~(t-ksPmOA$g<2o{X6@PQDcb4ayB8P8gxvyG(kP+N5l?kJ|% zp0LkaL)s;eG^A_9P$zuxq9Wz=$>7`1dip<-Mk+Gi3lc;{m%IVEP_> z&rt>jpq$MH*XH} zFfSW}KHahi`}I1Z$WpxaTjPUHf?v z8{5ncK(zu=6(A8zr`s3+i5K^1sc_Rmd^PgWhZE4fv|+)dfzL?*bOu18-GMjUHb8s= zeLaCYIgUL*gW7J2JnjF5iiuK4y#JaWzHZ*zpgsM#5q@&+^~^sEFnzHZOFrtE0hu23 zyb2F}>G?BGb>BtNV<3V_b10be<>`xC(h8(+sZ)a}OggtQF{NPLVbeEA)!v&Q=g3oK zxp(WhB-%a<(e?JVe(l?ItKG)!GUQ}pgr4i-+rOrcHZEDb&s*sCRef(GCyO?VroLj zJVq*v0TEZM3p#HtPwdMU9x8J9F2e39krcYy4x3;xO>-GBj&Mm^Yr9g{uj3gI{WgyF zlu#Gab{o1~8B%@-j)Fj5H-`#>$x16my4$Hh#sv4lQ@1GAEvPgYN5(UrMxwjKRRTLT zz9Vmw5VxpGMXF7|Hv^H1*?UQBkB@kru6D4 zWkcdfKC`>w5rs#k(jsBDAl@l)7Zt{4uUc5hVp|OOfBhCUj&dBw&-dBopnW0JM&j#% zBq64Mq@!ua=@D^hx?FRgeat|ZsuLVfE{c{4D;2Y>hX=RoPF@~?ME>!)$(W47I;X1B zEY3_)OZ;uTmyC6!wj_V#f<4fJp4E=aFw!4t>0BU_Iv0vzLBv&zJ3?d=%xNtxWudw~4kW=RG26ZCZp!PN|96ebE3m);&J( zsvr`B&JsO`sl~qodRM=N{)=SU!)yJ>WdFXG0L=vFivL1zFMxRCIUJK}MG3QA8l>_r53KDN2^HVNjY?cqaD0VPady(x+ z2Hzi$|3ef;kn4jC7r>GQ(D(s}Kym=-oB<%l{wRfbEr4BpU4Ps~=x@GU0-HIbo7cOc zse6I*%`>>JWpK>4dbI`0G1|whb9%I>$#6rfzN}Z3EBJ#B9 zRlXEEDvox$#U#q9Y5AMOBS9m+<8B2Y^+Hi2q7RQ1ZA7pYg+G<`Y(cC<{+V_bHQ-W@ z69)N_93c}YxNj>)rLYr7aQxx9|;CIzcsk(a+)Au^7vfP28=ojUZuLy9LD-0c+?8k{El8{ zM67v9v!x@*y*IKW%sc#&@brY8@YLG zuo-~a$`4)dIE*bzQ-NE6o46*}oe#F&_~X%%^;8AtqPM(wT*wd)iTEIMn^8OL-jHOb@i5AF{*VH6uagzw^vMb zmht#)3?-HwlERo~wrr9ZR(9GUiLy%TzshRG(9!y*Ys8(!#};%qQz|&^x^++5Lb)6? zDI%@>yt%J*MDDC96HZ24YkxGClF`UUbFq$tNDz`KL2#<`4G5Z!z{*anem6@Nv4xi& ze;lU_5& z7x(PqGerH9Cpp@(cY;986nh`L9IHXTNijH_Iiw>*5lJ|Xnjw8RvY$7eJ06;jr4W)p zynwV9o5=wYrbxbn#fFY%fp`}WMdRv4Aj?@h>4x2f8kw-qz1ylnLq3CAD{7ZH<(4OE z)ur)IN5Uq^q%o*qw`3O7%%6gnGEf?iKoyo7D#)PA*$rM(u_;?nD`Nn2Rnc)_Xuas` z)jVQOAagpo=?J2_f$~B5g$a2*r>m<;pD2G*Qhz-23*j&74s+rgs2DITvZj?A?pE`a z$Pv>#nX4RC5}MXv){tw7QQV+ji$Tw2O{aq|YYEvzjpVhBJIk9Bpd>f1=SXxb91Ehd z!plX@!*?x5!Gto$r%R^Kw$3j)E@m@!0=ICb;-^L1N|}c&+H6$PjYVQ;Jy_v(;Gc{f z#m!-s2vD-Mu|+284djT!)?YC@e5CtCCp0I}SiT*%zvC}vEgB^(p36eq!VV43;YmR> z#v6AQr^?t_#GWdjl?O)5l04t0P^2gV7s@k>a~2N!pVy`^{Uain#XQIVC;IW57x%1` zP2VgGHS$x9!9i&B>{OP%nO-WogiT>et*U)d!o1I@(A;F@Cl!TA`kw=ogNbSLsb}J< zw1~LUA70Up{WBK_ zfj1+si)SKSFVO`#7b)l@=r6;^Z_y3(sQ*hoaz7)f*a5Qfa6sFa&(u=&-&zic?*;yu zGVxovKX?M*hZ#d>AVUvar(xM^?F&My(+M)+9CQhot6*Pe_3dm! z4A*s)=Pj+hb&HCME|0IO zyEehn2E%*t_GUCIk_6^jU+{J)Gl47+`!i(iW$FpPGOlN!UQjzCn1*)l=3@Uq{Fs=W zobKTS&m~Ljl7WzK3JqieFvR?>l$=M{QMmdMMo@_%+W z;QINb+P!nT`3cJe@18YaB}a<}T{WQ%lvInFK_sL8@rYulZYVa);jFgQ&Am_o5{~*ZU6GdY?Lb1?WZU z2L441?v(mD@e+|NI{rD8BGFqT9ZkyR9ApC$B7spxYY~wOs^tndDQq19D&UkRJ}iLQ zXE@J$V(|x3`l^mp?8=t^1-+?@1e~Cmbv0hMntuHL_0wMdz+TVA)#Kr1=7CRulG&fb znKa^j`FnZ*=syq@ud5j50p@NYZ+D$ExMh!kn$<>zQb8>|UJ;SQr|;2s-v5C58qas~ zG=7v4JzDtx6}0{FNO1Z5-}6!dQW}h8FRs(M)nIrDa2QT5Zs0=x$i#29)c1KQBTZ;a zTlR5(!QM6ingqd4{CYDmqM;28PJ#YeaRtN7z31zU5KJqixgxLryccf>Xd|dU8f*L> zE4zI4q-7QBoyj#OfK}WnEKt7_hni%~lk~&|Z-he#Bcy7mhZcm#Lw^w6ZcGB0U$CnA zu;3>LGKZONe|`4G9l0i{<6>)Cdl64C&Y=X=;esbAlU=@{*Y} zox21~L$bxy&**M(PDMYuU5S%rN;%;sqT^^s>?h8fmgxCobl?t&L1N1i%&h;Ltf?ih zIkkkmkMP9jt^KvrH^2?Ep?STL+F;!iN^O%mf?a46>}SqA3Lt9Q3M zQRj;*dezt`ky^sEQSw6(Tk#Q@Rhh%X$M@;=2RnG_N%mSPJ8hAyGAViVJ2@iFz`m2y&WGPh zvHl(&uq@IMbE^D_NmkyVyeUrgIa z-o_hI}pslcUq>Zox#Dh>G(Q5Lgxi331!KY?MK1DK(UGT|kL~@rD$E9R&uJYVY?pe>9MH^OQO}>9SA_5+)=mIITEw|fx!`my zDB>v<7|%78hU=hSM4dds((eE5u2SgT489V*6MQ=RDxZ?Bi1X|5 z*qgA{xvHr6;hF@+cgMy3SR*Alel@QiGt}=^N`VpUw8;aZhN0!XfQpr(^gQ*SEQy z|27!!2hJOZ){nM}&29nR4&v}&01LdwN6FLIq<>0?i0b?WeX+H1eSR1GeM`CPZL{r> z+}SIm(o?`^tE7Xn3~~yV5r3wmjU%3!Up;+T`)EkoVf1(i$`ZFEW?|`=m!a_(Io?FW z*rO5+C=32^lHUVoD!P#e;Znc<#<_tpsGr&r<44&7(F;?hsC`8NXv0XKvqqg8ez`2@ zrWXy#Wc)8idm-rQ?~f}29PYi2?GqiK1JTW?iw-3m1)CW&*^E+rUb47^>wJ%GunuQL z3$H7*jGj3)mA+Etx;xQWe#d?LI}>`_E?{jgWdkM-1h6kse!EqE9u#?o2I$)VRiZcx zk^mMsXpz^4y)TXZD@K7+OH0^;Y!H%0`$~3ysK#CZdh%Uii6-*g2h1cOrBi4DOin>7 z_LRYDBz(!%p!a4}TTm~4c_@BV4KmeBy(IhBq=wY(IW2@*BfeMty%cDz8F7}>RPL(l zDp)ZqYiB^t1bF&wDwR5dO{nRxS@r18vua3tyiLFUWFBUvf~pIjCOqxpQs?F53hyu7 z8W^ndZ<^A54exJ+&nj=x-Mx?sqTdW&?}Te~E~dQmZ@{M)X4!{2H-XUw*mnG$B5_jM ztk9mGp0sGX07RqxjizaV7oOq?fKO!;%~c(2*y&%l&O(89Y@C3y^D!gv5%>M?6P$?* zU1@l`34BEZQa7iLyEp#Z`-+f;B;P*!o`@oRgKk4^W5qQ5ZJ?${ow|w<^&)bC7l%H3 zFB{H!b0O1gJNMf&>NdiL2wpd|1S2uc&_L*|ed|P|WQQc88b~|;cRzYXShHCkCA@gC zm);bn^8*40^7cc_rRnVz97(lUBrNhNQj!N7JfM#qOOEHo<$^(vf{01v`ti`OQ$<&U zBGqM@ZX+~ew%oS{;hxYM>sg{%PLg}Pt++5LsCh7Mow9JCWT$jG>jUi|r;G0H!VdIi zYWwg^#T*$05GUTbjNR1a)Ag;lFPRvfEC0`01gGk1(iwyp&coJqVJ}-|`1c5KL zA#ud*O=MT0h`+>G+RQi{JF=#lwJVJ%|{I)K2sIw%M7n=-XBc#U#!zJxU=BbyB*JLjQTY14hoNR>P zC_iZ;?a=e0u5D3?^Ob3sA(+L>gYwp~P2yA0Xg;X(Hh%M{Ruv?wh(?pn_|mc$p1ip& zG>wXk=pwLo2lI$4irNkO#B?G4B8;BjcQHsU}16r~cp zNxsazOcuog66Lb79IxtVbPqk)zcb|1<&SHPbsN(JEp(Y6Eg@PE#9V}Q?C~yz{!CrN zYz>cqQHZV{`oTb4o}^#SlW@Tk3YAGHJz8q2*T`Y=D68z5xw%7uAb(9fPNYZSS(2GZ ztbwwkv=S{FXY@%BONVCjsI2!w>24aRUYnpI7NZ07`v@rF>J>9PucK(%*&5W7w87t*Niz7|CK*Yvy5{f?h1C(K%J7FR&tS;N^&s9 zF?6FTQC2F*)5;5<^Bn+aR9R_Rp9tKD2WuNaUfrY`SxC97aeP4sWgO8wrs+g}>Iil^ zOtE7s9*~l4=zJ7Z%s{FeIb!L4g_{PW^>Eq3P?m}%=v=-yNl>tpibt9g4mp8ijoO;V z7!E(^r$kf@(i&(jZL$Aj(xxg9lMggLI}P~8?nMfuFr?7LXY*Qjq0z)2C{3hs$O`6v z+&+7;rnoHylEKuDLU`ERCyA=V-M@$>W?%A)eyIx=6TAnVVpT=ckXwkwB(5Ky%Oau4 zoZis$GQ?Mc1ykh9aVI~qba6eg|Es__*PmQ zp{f~mLB9{m^L`MIOtq|sZ{FznTP!o9Z&=?M1KJ}_`ErGOR%XUs6IgHB>y-UI%*bBX zfM&hNwje``P>;`y}T3@vhPb=t-f)a8#1R^pdIn-CGOiNO>8zS0Dw-bdGG!&tok$w^%vDN;N#ATFyFx9N0df4#cT2 z7H5aAV7bDq)oV8*GwCcFuv{tDfEx;;vix7kYoZ)XmRUx?Qy4Pjw!fR`?X0~@8tNBx zmD!L!9w2u>fd$KS6Zli0^SsS@|NN~AB9VDKKdXDpOSvgWDCgoWE#JW0mE*71Xnk^r z9F*pa6vmmB{x`199P`|TK;mdB6$m7V9Ee~a#8bL3F-#T$RI5fC9YWc-;d?P-a9e(# z^h|t&r4saqFfKL7P?2CV{q^)xFSb0Jl4Uc|PPzRnn z4ma`YTd~o1>j28Ul+Uydw1vXufS?#=donRVsw zwoJN`Oke5ziywV(z7VI9$=w3JdRMtidRd2AF9E&Y1iYFA6|D~bTis%AWvQ;z3F;QX zn|1H4Z|_l-3(6w@yNCn0hd{tne=tfKxDR;Ym)u!=-!Fwz|5ik^$!yfc#@;IK)*i+7 z5koI2g*KUNMwg)tPul`E0?;6}?i=2B%xHuhjtKgbc%IAggBY%dp3+XJRs0Kl_As>j_g)N6# zU;wfiE|`a8arx*fW8O`aYUa@GPqHKw-apgK!xlN#LhYrm2FNH&kV-V*+{0m{II~f} zBnd1PTN&{k5IuGN1TWOTTI*V@x^$x1!woRW8w99?@pd=!ad+NR*o@N|2}!5d-5qK&aKz z3YadgCq9vqjpKz;l)P6)yL!bAQ?SY*h#IG#N!;Clfd**+)2g}C*MDZ7rx>}FP=7BC zW`p>^;?<>dsff^_MONR7>MrPumA$+d@`h>6(ACQVCat7VR4~m~z$>iGGL`3$*+BO+ zAjWhSmkJ&0j9myLYB<(OQ&6I zD{0WAp>D-w51WESO5MO_d|Sy>63*A;$wAv^ifM`{YQz|EY#AI{Z7b{Tk7IDW8fe3z zVPW!vLrz&@wBV@FqW77jRX<6VhX_&iOA*>T(@Y^AyH(-U=TcfE9|GhnG`Bp0y5fAxOukE zI<{a}aTD4DM*M!wFr zuANOU&ejTYkMw;i_tBiuk5jv=Ubuv`N1Y21)bh0FUoH<%mx~;|O80!xGp$o*CNh3y zd%<^RuZ)j=(dF|ZY3B;OGkYt4Z`F^MEgwV@V&?n)J4F6_dxKifv(V)>sUF-3lGUO7 zBOVN>EZZXPwk-@-w@4o@|3ddZKj(jE1xyDT!mol2>p1pCiQfi2S5_V1+l{l)HEmV( zaMn6lj)E|9D!&Ml7?JhnBZR z)+3xBXzk2w?wm2$z^S_c*@SS=PI4Y`6|Yc@7lDldwT2)}4f5#uw7qd-og!Q`g8=Rt zO2DCkf0t#~CVMQjS!JwtP*7b?UzE6s`C(Q!vi8BGmd;xInn&n&m^f5Tb^{dWI0^A1 zh}{yCC%mgWVXfT~Q6pBq{0);@V5NhL;rw>29r+;d{QSNDnDEKY;q)st)cN)0(@ery z=1@KpLnH!K)bMrVjyCodU(XMxZsR=?QsHe2F&8+yeTxHIH4k`;+|-Ta_ND<~MMxbl z$d(!qYO%v|`B;kt*C7BPp?7snC6z2Y~QBz=ZJpd^7X@dS^igcSNVXZ`dw@WUs(tHWm3I zVT8e%e9f*gF93$%Iz>cpsd(cTNF{ZBT9j0l-wSnBnA z?R~G6^J3-4;%~r*%M)xUIZi&)_!vdY9ZRi@h`Gz<8L0MqONS%O7(Y+^2t49%8uHKC zbHf2`V*bHx&&}M^4tsVltWa;A0sBWxz%uyj*RRhczJY-=O-z|TX@S<0y*}V)?ur&* z`kmfDMgIK{I_m8?5(c;fEM+`KKk-m$8|IcT<)cFOX(-Ly_4h>=@-=+ zvTznQQ|1MuPfW5*eI3u})NvJss{f+%Ygh7kKKPErN|PY5urbq@i&GD3#9!+& zUKntxAo`2BU{a(co}-3!1R*ag3LFN@DBDkV0vrzcfAk1kc5jf55TsT8KF5L~9UUY=}MH$ha42r5zK$LwV% zkbT%$O3}I3&6!#M&W)pUy6L#P*lGM@#jLv;`QQBweJLhVksDdf%U$9MFX@j}q%(`! z&vEE6k&OH~R|Yj1r(qDqB;pAqy;4gmR4SOsxPZLC!P-_QybMV%u8iOZsQC34e}=&4 zZY)78s7v$-jWY7Kkh(u>{6@E1m5EX|cCgQq?S(gIA0qd}Z)-{G$?I&A+O-?bT5M;D znjijj4n3}B;`*IhQ*^Zx=yvUVw9d9>HZxaKu-4=Vj}FG8iK9`9RafdlAk<9_p44O% z)JK|;q-!p6mC`FywVBsxX{0ifi{tawQ6lC1GtoF)guox#)OA2SA!`e3gbib0#l*!# z7!xMtd)li2u$3)hwpWALJdy+a{(q(IwskLawa0_}vhJhO-`9O_v%OlrEN zNy(;beWjBrEV@n3zH%FxSzuRCQHw zl9{M%3ROO3jq8L|g1?fpCyaF29T7caxz<^lWfp2m37YZEsfFi~Tc~|3?Oa8}0Qq5M zqO8F*8JlX@D9&4JSUOs_qCXs>KjF8N55ffWaS1B4fz0xlw?ly5Gg%Kehz-7M!YWRF zup4@dc^B0p{G#hw3K?JqAt+%!JL|*tH0Nmw=F!0^mxEIkTtTpCNG|sfqt&Ywmd)R?Qub`(>%vI;>(IEjO|NArb9z6Wf|)%jtma z3aP4*Dh8uTZe=y5G#!^~7u&z7C53AYVv`v{*HvLp)~c~oV+PvuvN4EKqRXNau!$L~ z!nMRK6ZkUeKW|{2nce(1OdMeoy3=znjTQaNQ!KPAyJM05z#H=&(xfLdZ^72z zJ6E@d4+flmG!Ho=Qa0}+v+aN00)gThCqNVdP-NVU16336Py4_DIl~(-vr{uT^s;? zs)lR@NLMCWCb1OseP}jKMJ#n_O@lNl6AYS~EUHD$@z7o9pMzz-)u`2RZYuM_s(%p+ zIMs@ht^|$M#tg^u4q5s2 z@@*nYZsplG@V^+{s=FQdY!_)bM{~`ke+jC$$Q&YvI$Gn#H+S-L!Xw5HE)vtd1e5dc zWuv>gaE43$WS-DT`@|TVk59!e`?XYYPEjc+@P9kIoO^Q34VV{^a<=z7@awyy_j^Pa;GfPg z3iv6ASIGc8M+o}y}U`L?@XsDXXOTb|8_=h%=xP654)A?KM0H)I+slGQo8}_mo zBSW#*)MpUO??T+44IWPg?-~|7{;fZFkeml#T?R7rNI&U3o0&N6AA(;6&%kvn-OCf; zj{so&EdNecDlW9po4rrU0nv-jSEgJB^jl#;#}Y?K%bnjPGpHXTpn=s~@7v48<3hpf zki&NY1^54pzhL2;_tqp~_>6~CK%^ZL}_fB+BxJ4w1Ur3nsdXC9#^-CI?GmEG2x>Sz6(*NSzhqC{{wO6%< zrp9$)*{{(-C6Qd7{((3GsT|#jVTcl|M_3i2YlbIo(l{fqZbm|E!P=q)DZ>?Ok_to3 zNKIcxHIn=@4zSix^6*5N@UWlda)i$ZSr2QUE;|(UxnK>T847g1h)5bB{F6zE;t=xrvO1+H64f< z`@(4TOo<@|`z2YY#36`&9%0EeP8Y}HH9q}eEa1C&(5$Zn2PcSR!RUGH1 z+t%#)4j2?s4*``@IfdAZ+Wi(BKN*J^H;HDTbgyb;W5G z+RODw;fc6tzf0PV7{`g5&4#0E9bt?jGF~rXPK6DFTp5GRiVLGl;Rxi{n!%QzC+BsB zoYGdyjhP+4c_lE#Yxmn0`IstYNof6*;!SOs*u)8@K})T#!oxPRENZ6&BVm^%m#MWB zJG4nG4Z(mlKeV%>##w+*l-N=D@P=bI>P^q$d?F zsW3~4=p#dr%rTm1;TU5z$39}@h!G?+@y=cjB*i2)@1`dn zjHOA$U9D9OvQT!0#^DoVn#0umb4_ZAdJcxUTY9r_!l=NJpRy z?@MIkjkVmFn{ns=fHKkjom}5x{x8CV65_#o>n}_>`L9Ve2~ItVm!XQ&kAAVYxd*OZ zv2UYS-$w%mwlX65J`fIM$dShRf-jr<3x;Ve3N9CSe)VG0CZR1GzNQZZaup}m`V+kJWjkWymIeR6 zi5K!_0(h4IddvIiFY`w6IU5RaJm-=IJOryD`D41~;|llm{{F*y;SJPMIR^ggd(!qj z--=}-%^&pp_!onok`K*myd~B86R-Zt)Id*V#E>*zNR9YwUq?G3;c4Xs9u$HX6o);I z8muUL(h@~%4RYtU0~mLnq3f_JGQ5Gb2MJ%Vgi9Cp!%uE>?$^j?4eXE6F`=4%g^vI3 zA5u+Ilr<{2OG0ONnwZ{0J?^KQ^)OJYhYrF!fg*xd#mX$bAGRjEb7}*MC7(6KN?zsq zsoTD6|EzJek|gXLJcj>hq!WNrp7ZvCX;$JcN`gyub{B}GWD_=4?|7CkNd{FlbYL5e zT9_-|w{_o`}*kZ0g5d=2e_HmOWNoEQ$;Dks#g8^yYErab@xM_aLBa?e#jN z_Y!E&20GUu%CP>+UIe6zK)K9}8bGDN;tCQkfUn-IZr#53#txUj((Us3(KXy;)w&a1 z$1V*JQ6Zt)WfUg<`#T;Owwp)~2%zdP^FyUWoLkB62#0e*kSnVZXn zr|B(vZ+S)mk73^V7*Tu||9QoML{9kLU;p!+du4ZWT=WRK6EN6S=b9esmi24D zSKh)~@hdCB9Mx%eNTP-vML6EvGax&cD8%?jbCN+Fd{cNIlZ0KsuGD>OdJ;Lu*4icny9a>uJIKH)rocglr;6X z>@;eIQ0hF~;i|Dfps?$;M|(nan>rCJ2W*Pb2!jP>?|I%L<@w}$2Q@04mZ-I?kWyvTH;l4YM7~2oG>ANg>${ROtxjqQD4tw zMza4fOfH~-SQdOT9g@t(pyW=QRq~-o7!*McHmzkHAkwE=X2+JKt_t28WAN&JWiYt1 zm0jjNCp}I;?vVp9b#OO$z6OQ4Q6Y;sYaTR65gBUAPcRcL7qim9MJYvtOp2I$)k#@;ZcynT7Y=Tiisqy8cFB4ybelN0|F!f^ABdoJ=EdL9O;$!>) zXG_R8NE>K*X+iS**{?;H#^QiD5&L`e_h@b8vS12w)nwR@(Z+PxW#r?|d5t@ie@@ap z++}`k%vlbnhfeMUi`f-{&4%!XS8ZaozbdI)Lm@|hSJXGIoCbf%nM)R9mt>%lXbu|? zwJlO1hf8j7k6lHeD=3G)6 zO_mitPdAS@WQryg7S2)BlA7sa#uDefl3AbniBlu4Kd!pn**|gYv$l>J%>`06CuP>D zR$KebnH~+-Dyik)g@K|eqEfXo)g|^Q9<=i_+b``!>S66+^AMsK&jgCF3J`GIUux=Y zG1NTf(d%!_<1NKxWMss`Lk=aYpaG7dgjC%%I-)$7F{>@fH(MYE5oj;Mt`e}s&xoFL z`pza>CSMVI#)dCSa6~eWuPX#O*C(lCkj2DqS7b{^oT+!)%O7>=CIq&y%36de4@Sr* zu>O=r0jCx$7&;%+NzNP~8gGKyHV>p3{o3gy|C_Xq!I>;@5I-)I!t?j@?lvcj_}pZg zxyNI4=}bTB_Yi!I7;i|^wh@x?4#;L7b{BbDs!u9BZno?JtREt*;lWsBRNi!GE&qqB zvy6%|aJRiQLk``@&?${{cej*ucSuQhcSs2$AYB3qg2WI5(v5UUr*z*J|L5F$&h?AS z53`m_ocDR3XYc*na3&appU}!aYYXc*R$Y72v{BPqx`8?H{PkxSw~MhH-5R@u96c*X_tB^%Z&{{+A#l?1RJ%&wJZ$Onaj zC`XR{g7UU6f{%(vM|J}V-mf!UJ_+P&URqoPVkE%|xV)APZg7Q z4<8&eEI(dkcC@!`3|ESLuO;(#Nx#_;&-Esr1zG}(O#9XGQ>hfhX{$^@RQuNN7hmj0 z)`TYqV9OBbzdpK+G}m)Y0q2;UX-X0cD=k=FTJ-WO-t`c3J0HPwSm!ZbWPnKbs?dH#zsNW_7sC$xQ69r69f>)DS6?&CJ9H z*N~c;suli$wK1=*!Ur?%d+g=UfNvj1CKuX0nwwY_BJHEj6ehUIY1HiQkgx5E!MRC> z07(7YizEw?2*2%olk>u!5mrVBMGE;>NC~7n6kjfu+D@XR&^cP@H+0FBuYcbfkRVf> zIP_@#uNNT7+Fg(t`yCRz%E~wB^MH6AvIsO3wE{QE?tlH<|54l!x~BjAxJ|;k*Xo8b ze_Ry;)Mh|M^Lf0#u`cXDclxJf-QD>;hrUO}o`wJr*mG+S(Aqs^KoS`O46xzu&gTyR z7e65F5x;RAQ|Bc{iRUlY!DmQ-Fk4SoLi}1tbwj5c8dU0r-F2qAg7jC)mpvU_jR5Pt z2q4DsMTESH&i7cp7WmV_LYs$nSHLVdWnj@}+`W`!Om;hNH%fr28nG}sY9e*qp>@u; zJ4=x??&Z%^Q}i9S>qhCFKWg#k>f7$=9GP#kwH8b^R0Y7jGZ0Tx{@*R;l&%g%@2jn}Hp$5`Z4)FL*Sx-$_wdMGDU9wW;&Y z>Tlo0HhS?bUavNx2gWKSkW+T4jbu+ z_*4=4nuJ*Nl1}YeQ@p2S{*>dk45N_NMR^5v2NmX|jdic!W9D zdqqS|hm=AN7U1QV`r{I14Y>(LQM{J!(bBbiXx1@lc; zoH*fJ&;&P;eS0(?%1cx1b9tPt{?fK8U`8t|lAYV=3p+ufAf< z*ALduATi@Ci?cvU+E%5o2O3h#gDs8lc+nezS6P>{da^VvnEM0%wp33tzg%H zq;W9WdwMgvklYxuA;WImL!E5t$fOBhB3mHysDszY#OslGet6Ae!ZnplGl-etuM&09 zS^io_!>(CV!NbvnJOoDE<|Iw=Gt&+-9;tSEU)WVu35v}IaP$dwth={m4N0r{GH1lQ z;){r4=+{~b(FQ+H$&{h{@akby6WEt2$Jjak&_h--MdFkgQa4mbu)}jk2VzhkvpHC& zrR&pA(!np28@oxs?EBog>f;v#cH@Zy;^ZEM3X%BBRG&xjx$Cw#kD?0)`UTQ?aSIP3 z05n~$WZV^(b2LA*tfDMa;s<$)LXpVVC~n0P?e2dWHzni+ws4&jTOy&sLeAM+F}--} zr>#Uf?-ndHQ~B3T0m0x@!^9PZJ4T52#MUD%ve43dDB-~LOHby4fj5SRR29VFv~iU_ z8GjTrG7<}`h>CgK2Au;B;ms@|e*vY*zi)Bh>b^c@cp%A0N_(kT@0&Bh0aK-*{Bb*o zNf*;^U%n2)ActF+j}P@788D-Ycx*zF#`B`8i#FbL1Hrdn_XGi#I`=eZg57ER*9882 z1!~lky7ZsLAh)?EQ%3ocSNNpYf=h%?1khS(1ht|YLN^W4|2kSeD}FD7{q#DXu1{%R z0(H`^(JDupzv%J#6KOolht8m8L90iXz~%Dl{;&HeyBJ&BlfHm|vw6Et<{#2DGRHYH zfiy*EoF+@sP|BgahLFZz~v7%@4aaS)bkz|X|#Z}UDPZS~kpvRLiJh-Gmj$-H zy_qKi2p|=7a%{9B7^%#a>19)`6uzg@(5PI~`2=eD`kquiw=_hZIJ#(4dB99U@Xz#q z=WAwiAK>m-`KI=Ix3mCgkwK7TJbCs>Jok13>l9I8=kpA6kt`s4u15t1z!y7sXt($q zdUBXs(V^Ux-Z?-pdG?9`KA@XFA4~xF7eY0!`^bOw8#x&xt5&yrLmssx9<}n&vkHvk zgtc+iF$W(WMCN=CpqC%pO}ryQ1E&lCqNQ)Xer80?i)4>w#4#6OcwvLK(GVz(R2A2z zKb_4QEc`u!no&b}RC3*AYlF6}^0(QBTezg{10HX_-hyPl`AHEEV45$Cky#Jg6VL-@S6&0s=!;igJ?nO0AWP1WT?H$ zHUurEM-YV{Lf-#_5WFiD%0-BNJJ*)+*L2ak3qqy_ay$A0UxjGytsy+M?j z9w4FhX4g?vl~o;_F-aUBva`;rHU2a;W?mLK@q+Ozog`BEEv|L8+Z$pnKAi?MG#4l zdJtI=dHu+|97n0f$qgHrPn&lYTJ!Vha-8>{P#z%I&c0c0JuWd9n?DK?4=4)YOwix0 z*K3*@hPgLxw|qHtNYV?x`PGm&QJ48$w~vBDt`Q>JZ;alIRz~IEXN@GS9+DS6Z#tlA zFX=p{nt_~?Y`?ig{_anUU{*H}cL?tFU|{Y_e3{z^k_lz5bV)EJdhIH?V~AMwy)?Ad zf-Su4BUrh~ypguCo4L-n>ovC997!~2FPZaeD1CRFDSf}`&Z@jyBPy*G{#)1=4k$Mo zEk?tykh;gC@YN^kY<*nx7y|+!+C^^nEGgT0s8aCzz9y=ivh@QiGxH?%^7o&%NF@(G zU*8^D!LMUB#ePAiba>K-2%U-!sxDyF(fZ7cl`8cJp~)?RF;XH8OJ1qcjL(bbb~%K~ zD8zkM!BHv$F2CpE1d?W zn6+E{WjXSPRYB7um{*bT3!~Q+#%VXx!5&}9dNJS5GA**Ezv$ugjITR)w$CCSu@_mn zdk-(~fl)Xd>maFMsWs7++_F|Z(Wub?dz5C*vd#l4-xsrR98Np&^Ql$RuhI>fGL zMIrealJb$2lki~wcGai|KT&g!@!o_-hjQ6$wQvk)`yyw?eizY}nujZd2o&+HQXRYOZVh%1oyqBGRlpq|rxh zE0lXey<%Iv%{Fl+mVpM(qMe8)t_WG1Bdi28v+f-eq9<&an#YP;|hgBd$s0 zA@<^kR~?Ymm725G%0jme=aLy7LoUO_Ghbpxw{TM|r?qRI$|`a_-?L=Z_0;0@(NP?b1&r&6uUqNe$;c@rjlNgInd-eiLGSdlqIU5kjz}?kJ!1j)u;S z!=fpZ9*cWX!b=(95|bzF)drnA^PVTwZsC%~Prhh>@J=(T+Ho<-?eZn>s@FdAJWs}d zFS$l1D;QT`w6>o={~RuSXH{~1U$JKm=R+)OfZo+|cPH|4W##W>?Mt7lS%Cbcx%`yZ z;&+CTi(Z|m{ktAxc1k!><`b*Du00{}Nh(}CJ;T}!+Wz0^3jqYuh&>W2>O}aU%2%)S zJY4154GeKy^}j=h+>($y>mzy4A)=1T8Izr96R({kCi(XV;?{k>3PV+G^xb4J3^deC zdW@yH<-Pq@@P>N!6P)GYYlX;dB=*zA>(y&#Xst{AzMtg_B@*^yoSzJL(iI8Red>56 z%q?iW>5|s`r#$NJF%|^ip%zCu2Ed{}k88 zwvqo^TU%|a_zPG)#MFs*&fnHO02|CRiXXUPKJ%J^ZR<5VQc1ds*{AKvh(k4`?`lXn zYAdh;!>S;RhqsSee3Ijr zCtB^VZB6r#)p7Hdwj6w#iVKN$%tu^o<+41PBOs0h$j@I;6cK*blsZixtHGK)+EsTu z!aaT%M+J&v?$4fZED2yIMO-yc`j6Zk2TX7HwE}X~-_s2kV7u*DaWO`AAIXgh2?VMR zsyh-rLU)QLH}XnXxQ60=^8pOyBlLN94D3LGI?Tlpv0p!HYykI5zvAEILVrwgP4cd< z0RMu2Yqx6YDA5g;6pQO24ZCt12f=x)-{oHGB1_D$?+2>+XV>b%m{U{?^k`?aB)J6L zU8$83&HIf#lAUDM5Wi9uWQX~>u2Rn8yt}_V27}s#l8)MEB8^*CJqBw>>8KyJPcn8G z%>M+jz5bPWiG$;=T1HEZONSl5k*JBE5&KJeYSQg>1xpj1z2VGFUMF9WU9&!M6PRur z>P)op&Uia%1|aMe(0D~m7snD>$wQA6TiPjoy&5)du1>m-geU1*uf{KVbbblO%Q9pC{&<)MQ&Qf#CYldFRSA`)6>cOt{L?Xf6K`Wk zqxuWfuD6g`b>cB*JqOk;$n_q`t!ohn;dNn?4tyFiER4^a&jmYVuPuOUTD>&!Q03$il!otRxB~bT1;ImY8;cQA)8L| zX<5&IZ5f-@ISapnK8Lo#niR>9YX*%RS(}JfAiuk!#iq;svzO>VzsS#YNO8@yiZ zgLl-sp^DUQG8>UUO*R5mv}_a-nAhm2Vz{Xs#86+;w)F|Uum{Ti0RF8T(#Ei5*u22iJH&$CeR!gUx$!sD6dBVQeSRk1L$DPZc zO{~OI>`TaSA{%A_%QtOtR}ft(9aD}JsJJjr=H6W}Y80s=Q%{*nURI}-NW)lHK{WVJKN!fe3Epzqz1sY3E3TMr7H(#XVFlGH z8HgW{%;5Mo9lhN%#CbONfhl1M5H5WgVVG>(K85PUv?)5{v~HFVf^JOZkC~)Rq-+SS z{C^pD)YS=~N!_5WDCb=r&Z0=*(va&n@$s$yY@7thWFVga4P| zF^KyI!5Z*19b-H8!bT9gKS2<%v45MjZFTX=b+GmIOP{knz%H_Kc3S&p@h1!Uk?d40 z*h^iu3Zs$Jf;kHb~p;yn+R&o6`Lz&3qUX&O=VbTv}3Gtb9V4uhkXGtd9G` z^R6Gt;cZ^(s>AClk(>OspPm*an~H);W%X4vZB7l;f@w{^K2xvuI->-XW_9$lI`^BI482*r@oa>?;nH$gOmMO1+i6aG$ljky zu(&@t>Zj{(=w!JiBJ`4!>uZsRz6a$eopwf!fl=a}K$ZMpT=PlzDo!F~+59arBuDcb zrey}9?xJQGtGpMFkS1ZVvb+G*ZwZ2NkpYG_9bB_%hT-JOWHwfMD`>UHjIu&D#71#o zGo4Xj1h|;3pdB3_-y}UKG(AjR_OVR{+FaC_H@tfveIt#Z2OPj=m&0weC$Z01!61V z-&@Bn%ai$f^t@xiu5p+TlXsKXTH(REsD>B&XTm>%18e2p-~%kH1srHlAqPNS20Z>- zz^|_H{S?jlU%Qfe)YutB(vqM~k#z1%I>bTM?ok4=_wzNTt_B}G)-JUq?kK%tN0+WH zg_4f>6OOG4F1svtM}7IX_s{uZc3G`1H&^>Jq=3x?*aS0>e|$#ix#s`JAO)Dw^$Ul5 zB_T(4&tjTU%n9dmpo6RT^FK_`)A7^kAbW=+C9>)^a%t>;-rR`puR4daw)~`N@!_@A zRmvDEKpJ#0pC-ByQ^0MC=j=P4MjwXklZa+qYcD6_=l@(GZl*g zZ@qDgnKbb<3rX~}77-i3!cEN4HeVAr%{J|u&H8B+pm;ilg!94{p3sNjUy_OmMtKru z&7o@s#Lnq!q`#Wdki`=+NxzBGgie0ofA56pn!wP1ikfWE%J1;zs_)A4rwX;^jPr%6 zf1sD@M1GGO1|Vk^ABV`sFq+Z#OHWpqmTM0m{4*6IT~!tS*cD4C6fqS41JYtifJ8VlDQTwgqG_a zjW+koh6Yi6|Ch(MA^E(40^TQ_{fMnG#KodvjDEKKC@KHrx4@92$bSmDT=rCQUX))5 zpnk#CpvZi4w(RgjWbn|+1m^wZL2_nc{hPAsj;dTWTj2%Ho!pD;D(wt|CDHhUkG0{7 zxQOE9@-GnA6Kv}M<^|<)Q^7*tDIya?7XENd9@2QYJ2i$j?r-toGF=~z&#*TfZw!8A zIj@V9b5CJTvDp)(LS{P&Hx+G3F5y+;<**L!JK-1{=#(Hf42FqK{y_S8!DH|GFNs<*N)=(Kd-&4@ipL?rw)P*%$XX04@CDjTz*we=MJB9{-8&Ft0GR`L9pF4#kujF)#F zF>V-t&~*Hz0}@kYW~j4y)w_#2Vn`yjo<>EtX7^saL0i!tqFwJt>4PxaR~!N=g)x&; z0Bbyq#j{3pLy?ROrw3It%z+>MkDgt@fFHxLAjk>V9cN;(KR1GpK$%DxU57;`e%*B1 zZn}Oi%SVsqG`5dSTyf-!6BE5-KaYkrrn|aota9uqvbBV5z2f0?6$Enf#(HoI2{ooAS37l8ZSizBE zbhUjEIp)rXc>Bdp(B3t`f9`Z`iS0P3?N9fWxBvA~Nb517mp8X(?U=Q;jkJyCuj1lD z4FKGl^;AJM(LulHc7{VmKLW864TKL^W(B*wKr-5Fj&kY&f;9Xo@*2fa2s`L^p98`$ z;1FCAbkV=z&uiM2geu#*OCxE>Xn|2^nLgK_>>aHp>ajv$)*aG6Cb-Lcti`Yf+}swc zg}N3Qn_Ts9?e;>wk(+dKbTq%Geu8JN*RW0Tt$I@=J^s0hvV*F*t^$@HHToFK-zwT3 zj1(CR#d|7SYrc{*o2~hn$1kydVoQ`aR%Tnq!$VRAz5^x3qVlr z(9; zIa(}?;d?20zw@6`9R8-LB}pqyD?d7nVPucp?>qS^p&2w|)oi##f@6<#;f=CiK>b{yQmQL^)ysrgJf0j!kNP$1(Dq`Wbo zE?u7bfJM*B`QN$W1Up#Ys$-EF*CJ05j3^z(^Qoec*EpRv;6N8s1%_N-^Wt;tPWWE9 ztt{~dUC(#V&RqwLC6{=;&u)C7tx24Ve&^Q#T+kSrBejkjHBb+`qW^x`0J^{=9> zerSr&-3NV=F$ABL=Y7meu(K}{cS0{A|HK;^Y0O>s3X}1Lj1+M_TD4#`i7uD%cUt3y zY#U>zN9^22%<9#q)e=+Br0;SkDB50|b|A=t}sY%je`;{A3e936#WL75aAmg@~?iC+Ed=Ms?z*03Hk zIo60al@3D_c%2A`9&hW5BU$4f>-29n*;jQYq*hY}VXeu%cIG^u^eI%RdiYJG_@5+a z87XuA#7XJVMRS!4?X8j`xySwWh1`NXHFpY|S501^8NJC;5@wnH_!)BT*dP&_+0`Hp zuV7{;t`Q%u)W_K-%dY=ZWAW}T-_=^&EgP&=O!~v6KTy^lpuYK`s;H@cOjyS}gLW}r zY5Ju1$(qvC>5TC}7>x~4!KIOL8d|-@>jpUGDc|Vj@$%{I2`kV5p5{9<2M+H>l07!W zf}d+&04F-v+90xtwPuBKd0}~~7OFhA4k;n&?^0PAJFZD1TSEAA1*8GR{0Lt!O^;&G zW3n5cUI0=QrXWj;0ytFIjCQD&Gf3w24Qre1JCMX48-T=Sik5CcZDG{XFdzV0-b%iv zA!Gm;4CSBQU|NN?QFHM)x4MW>6yW@IyVdh0l;$%n{nsI7pD8g#nG{Q&5N0zSs0fZ9 zv7#sF5l0>A3*zKvFW#ppuJrlH=GR8#t*gxM z?UU~@TcP^hjT82(@YTD;=h%S{ zD>f}48Rgr5djae$o#aF5CA-SuGsQ`V)*U%(O1^r6z0sJUzUOxuA3$%ZDAKKwht=3B zB|ewi!Lji;edM8PT-S@xMeK%?(vs1X(1NtAsc;T93%v?0YDTk^PovW6Ch;hgZIV<; z{-!uTbWMIpr_fb0I*EapMezyy+rPu2377f(d(iSzRfpEh&d(*hHt>0x&#R9N*V25m z%cEb;lozv5YDBIXheo@i1ydjB6h7MQS^+)|rscgI)_S!+`$cz`myi8V3@0AlPcB^r z4*Mp_HzwdepvU>^0;uo&_BQ+4eTy$>6j_J{!=GGM^sfl^#;ETj`nu%*)Zc_UMR*pl z)+!zXvBL@1{)4{VrvWNn7S2vi`X3Hyj)R)Fm#(lE13NYzVZV-MnEXogW2e`O>(xa- zf+RV*3&Zf#^Z%qU0Ptop)U|K=5ZgsY@ zp3I%O^B!{@Dx{E(P58uvb}o4FDE_<;%MImc;-knQT~3Q(_BnX+9Qey>tW?uT)l zoSbw5i{bSt?B%}$;4^^oIsEPK-!qg5e+lSzfRB>A=`S|q(4+7G!qZMjjlxg#>LtuU zVrLtgcphmOh2WFO!@-J4jul0-Yie6zVn{?B9s*3(x^#|-msWb{$F=*;_cI1eC99P1 zN7QB+rp2!(s+vRvf@-Sv@2dCh!wp~7j?O-wGzJ2LE^vDmW#riH2&MD~$wdQ}$Fk`5 z6}httF)`sQN=nK^-Di(y3I2=7jF4gLvz}_u^OOr5-A3NFx=sZ;^nhb05IkRQ5Ae&I z>9g`6|t#j3nkNhhBIDmh- z=Be+>zq55RKb@Nuz!kboQ}%5Hihc&S9H789Qr;n+_Rn66$Qs{J4Av_W-(G-i$=xCn zA9;8mdFbC=%*_+$S>7G;?ZEB=#{rx6ZE`uM$_P*QJEzUcNN03D9W2%<4ONem$mX=* zDY6B)tH*~m>0f8IGb}da`ODUuv6ualFhIjGF9!TQ(shJ0gByttrT1IeP1D@ zxIfgps@-QxqJi_^t|`2zOj_6q3cjag3xffK}3nT>L*-94lKe^DtJ{?mmFkHs@6$i-PxRoPUlNi z(qJn37xv@|2u@Rmaqg1$do5|UFncx}S!IOnGNy%bl|v~o32tpvtb3T}CU$bqaqL3IX)H{JvO z7A%lA{lJG&{I4vsHib0DG!0xgxxaij!ci!17n`6lOr53dP3oIPGM{&R&ZafZ5%VwA zV%0zeyhij@Qpt5&dLIi%Y(EZIK{LbCV-D~J_@Yf@`U1iR;<>}rOz~7D`pazS2bu+S z@M>amGNiDoujxQ48x*cPqddW@%Y!9Y6Y6L&b@o-M$>~%q%AwITN{xm?=%lXz)|^VA zZM;g{DnXUNa9a{*tk)oBGX;# z^jgZD?oaHNVh(e$WGe2lCHni|RgKG-*$RNn`1~zqe)fcK4^Ex{l%W2kEBlFCewhcPcWXul;f5>mNu>es4K>%qT0mTlGpiqhHxsuG*89wboUy2hs&V!>qZ6fh2Vn}zg>JTbYd z33JF2H-4Y~t{bRx)Wc%q=ao=WFEJzT8+$)BhHns3ny3F4YfvZ$APZX;2p%%p$xWBI z&s3t>|mx=yR44`OZQ{9c} z(Sc5pP9uLr(bI>OOVs?@RLqplBryBbS}4Ge3Q~)J@6hh*gwWoUiao>tpicLoAZ*0( zZJ<4G%e&+5x#GnJsBQ!j9irG4&d*6TU@i@QCErGNS#A@+MW*6|n3phAZt4Vqn zCDgI)R&@k_53nX}@^ZLN(Wa-}S3Z77kun}K+C z1cZ>3l(e|A0u-HIhaVJH0DrfI!^~l84_Z)OYm;ZlIN|7`^xUrF+xTB9TcUb|o!~I{r=%e3W)kEwroKr|eJ#|(m`hLG>9M06 zwD}IRsFF~#1XF*{L8^5js`}f)&#RPX!q7gSnQaZ>DA6x>cI25jCY#L@;Py)V;ybZy z+@7E2P3!MOCj=uO7-=4`Y%GpXr+&f?Jz~}p;bu|Pwi&5$561|^$+sze!U?c7Wn)P% zq*15oCDM|AfIr$7R8GYz!PVQoPA?|mgiC2gQFkaJ|D;>QTgr-AshX26;F3LoO#bVu z>l}dZ^g6f_%Iwac!7AQVW~v{LIbM(!6eY4lUs)JA04Ra_9*@YQ0KdZ-_2x_{sTM7f_WsIS~G z$s2S$6VNJ*PT1kyvIxfzw25+9%wyAR(WWKTcO)k;`z0BT1|)O1m|RZQeUMFr8%t-y zzs2)GW@W!h6+5gFiwfjpDp67-!6_Y^#1+{q+dEEi@_wsu)cIY;r6j9T*ITC4SGhr% ziJWQldo-i5eCnV%yw5wMr;l46s=*u+#z;_@9mU~4SUjIqQKZAUAG*X0IjBl>Xmf~& z%-PTxld`5y!yIBTSW@37sCIN6+?>i{Yv8ro2%V_=l0KDOS%Xg5XjZZo6;FZNPRkgYlTuDAko*vA z0NJq+kVnq}E6}2vha8Vv(|(v^f!tE|aCiy}(MqNE&Y1;+8kdjkd4S}0EV~}FeEHnE z1Yt6$0M}D-E^l1Ld>(CRTyAT*s}+r!J3E^NR{;t|>+FdfBI8QvaX5)zaH#qTT|l;k z{}%oyJe&4SGV7%?JtJAN3LGLr1u;-F9z|Db3Q}#}q8bNmv(oxtgue&MWESn3bMVvs zy4}7~C1!R|wr=ASdW~b9WNM#3&EFAX2Cwf2~WpA*ItMPb=Um zW=WSA!f8~{#WF?d>&uh7lM98aMh>aY?`Ned#Q#PbsE)JwdI?=Cs^_eYJ%v?*4OeAmskH-^V+V z52M>B#iDNlPwJn<2>1J*_!(Z_`NJ2e?&IBV z`+nq9!0b`S6@vL6qsTuqWdbKF#yyan!Z>UZumSAb*VDCuGKx%ya4#uaS_4*(mPEXd zDO(;+I}ejamhOhu0zXrl<4k>FvCp^Fx3w=L6bA9)ks6Vcqu}6zE+(-GI_-+=$zRsS zgFGC$vV=l=BzdWTBCMQxDn3sdAET?%myUa??liq#RGbKf|9c%PzinZ(0CAbH~ zE0SYnZ;Vbf>#fOiuHaFlH#8-%>do+)6sHvN&#&3maZeL3e1%tciJIkPL_mH&yp&yZ zQs7fQxD81Bd<|_rN?a`?>Tv)gz2MxJd@S@uh5VWbl0h29EWmEO!O50zm$dFK_3QVR z4~)i)OIPrsnwG3kwf;@f|7p?XxfG?DKHa2LUfN<6@ zpp^YSgnIca#yqF7p9}DSi1r?9-@hOEHt?8t_4bCfdLQQxVFo_zn>>5@g-2m^)|cTN zCqNd00svIscLsJaZ2RNaJpn%#&qfXo#DmeMom!;cS?K1`h`}f4yxI-S4&Fql90ko{ zGpt4ZHiw5J8bRthsC9`;WpdmcT9?D|tKqjJaZj-44{8Z|@{PC(%80fPbn+2WJ3&2$ zX-LmkVJHy}@Y}ZpzhvP|D>D@!L>{F5rbtvSf5!Pbfd_UR{d!Sw(Rc2{iBXn@Tz3p& ziuqLUZT@J92o%!+3>+`jSTTe}MZd1V76+FKz{tpUrour*iK8F{hxN`MLnfq)A7L=L z$1?ydeZ8DubM)-=?6?_rnREWWJ^mjgz_pMRFHmbjZ9{2N`64zVZD`JQ!r&-JH04UKZX}Kc0_1&1zVLl zdCRPF@jPKYkZ9(2POu5#y-Cs%ZLGL)d(;mfN`iH^$aTMHYF5pU|Ju%)WQ`K3o~cXa zp{x1omVvjx=J-9^?pN82F*YUi;+(gcFm%Vd((~hTBz37Lk8n5$}vx^&f;$}~T%g<{uv3STYQ6g;UOegIOC97(qR6sm2(1lY2D;prDG zjM?}2Rgs1)UODnGms0<-#n_zUfPbZ^iwQC2PF7b=&DkjG;^B#8pg5%Mp`eMiul-@{ zLUey7Megoq8FQ)1xE_lm*(!s0t2H)M&Yy;s#(b8ZQk~3)7Pp21>*a7;MB=0)*__Dw z73yQ_RJ&K`$`HboeB1B?RhL48p`5$20h9O2QD&~JE=85pwS@2UaYe91T3;$zt4f$t z(FkEC4(ng8++Y?0D>d6jb}UR9L)oen2lH2nITOOH#97j!QmyUd#S#pd#ng=1i?{~3 zN8Hs;<(^6oZ<^^eork_S=5Pp=5$2$C>G(8te+m6QRZT#a&)Toblgdt08hsQxRuov( znr_NvMpH+yL7H&;F#_oi)h}}rLww)`;FkQ2tvWmbecJjW10z1agYc5g2S8K8p%(?5fuIt@mBnGzml0ANp+huQEKuZ+QuR8I3zh z)co+I;;zskWQkUx<>Mza?}!K{<1*r&uq;kU`%V}h#S%3mudARdud9*tIIC=0P#*w~ zYmTVj8Lg`O0>8hkhg@Z;&{vNR467KY)j%Rlm%-QW2YQC`Fft;e$ArUAl+anw2CcV} zBTDEpD?uOn(#d?i-c=Oe1RK&(W8bNv$IepO(Jlg9=_{a?v?HY}v>*2Vut5UjDIypzSm=ajqI5+vF z8Ggg7En_hgy%1-S%9+{odgZsJWYbJ4?rhvcJbI5Mtuu<4R8F> z#0a)ZYn~d`{`WSv4m?hQB>@m+nj)FYee<887Qm(;A+3jno|Giyyt3kUca+}>tYmj? zu%%XL_uz*dqJY;pyN8D{fgMeGGW(~(g2BL5eu*5(n&vt5fY_GbxI~3j z1w!Z+kt6mgPkelBX8jyM5PP~A^?!PJK8Q+(Kf|?ew#eFV_gH{WwD-6DCz4W0W4C30 z7W4vkzyq_b9v^Z=|GHQwy!sCCzY>!s5FcTVhuR+I4C0lKVn-%7o!@j)L2F@mX$W9# zL9%G^`M5-2cB5!bZsdAFk#1@pc4z|DPPi0S_}VmNw`Lp zSaNxXYi3K862+)CM5Ik|#i4)Fko7}#=;duM{Zw7FIpMH-vFn(~Y=ap6_5o1m{ zZiu$y)>M-TA6L|)uvBY_L z&PJ)HT+)4XaE}xRJOy_6qcc^sP;)PhQV$OHtVehb&DL8oceP4*{p9uf9{m>{inJiFAq*Y0G20zDG8|pxWAs{{Goug!u8=jLF0ls&pd>Gd8 z%tRhuUt&Z>fj|Ys!T5wptVy%Zq_qo3Pv_vMMK(;T)1_!2H>QFsgLoy|AbY9kS;|}( zA!;*0-|gAwLFDrx(xO>FTRqIq&e01KaSyTG%@H|w=k7&p?kaE9vA}7fjVB*&z!hs9 zj#*|8E2y7ea%Y2RG2;s`^SEB$&zCiDt}Q`Xm8Ws=GF4ICBYzrN>|?T~7?@Yc+Q(*( zC9At4eo9obZFANsse-0y_*IqkMAA8OM*1s;6O@(aRpTOXl@+6MM>X>=O2b3US>?CJ z{2n_UTP8#CLHR6(9?$95=I;TrenSQmi5x`vG3y?_W+$+MC5|-?F6G1b;G6gu^FSGo zB7tT)10^2NZNv^hecZEzONmJJ+Pq0=WZS8PuO7EANslO@KOxBoEhW5Tq4X%5BA~G1 zmQLW|{fjY)e1Nt&e=;Lz_UV(Z`s+5-DRM%=OrJm*+CIK|ANE^eR#wb08_R%K^*xJ& z4U<#l%&ZCmwon;)+)P1tK5a~mP@XYp20g0+bm9e6;s;lZQ5Hj3dFmGg){Fy5V?k z#|+@53AK>?Ht;qBY9RkJdfKJ0TTn*t9>`HUcrY2v3)W%!tHQ%)X9aNu@lP7TNy3T5 zH%3ulew-`O7_F8kzG$PiyoDsUUr*5&Zu!N2LB`p}o!3~N+q+RWY8%_>+d*V%e{Ul! z?>|^=oJ$K|DOa7k-ENqLMb^LyYiVvl1ypY}Gim}HX^Tl6x^H0?YX&>F z_Q}(TdYk^tB8lr)w88E9XiClX<3!%bsDDF*e3WNlAJeHY^ZuHKL$)LjuAAok@g*7{ zEf4`d3h*x34rWR;(fk>DD!MW{7l=#A7B;Im0j~=dF+Fg7eIi|lOumcW=d5VBE=s9i zLQZd=JzfC;Q_X)@K}>nXp{Ak?wAdFEzJr`hYhu%~n^V#3Nk?bijr zV-uoA$_SO0$)hn7w|GrMxRn-$o7>6!iC{W{7M(g-T45a(=_=N6_K}A$dw~k$-NcI) zJY?&w#U&nYH9V~mWr4Ia1;!I428?Im@ZonL0C8vts6=2K03~ws|8y|=l6RDc0N(M4 zI7dK6(snmANVHi9;FJMd+QY+x#%d5489?8A09#liaMKk6T!X#<-v8MH0(d{CCd?xG>5bz*<;;;9TIOkz>(qvmgI`QH);!>3Ly<4pTi`3*}r_cAk;~Ym$#Pz6| z`?DVuR(>_FMv`DR(cFQfgXfFKzS*<0`AXhX7vTJGDoKp*sxAp@ulL*7c z+f`HSJG3r|`Vykpt5_D(yZQ-!Q8)89=kUFyhD|b34zB@Amd+5gqbXMZjjbsT3KJFPcwJzzf&z2BZ1N9rKTScK@MaS9ATl2Ke>BgaCI3iBwd)mOCE&-P}Z<(ss~0b`PPdXzUS_OV?QHlsJKuc@X*K5nklpFK|tKg=^7hzn^dOR9MQ(>z%} zITAr$*<*-C&{(eYi?+D4v>)YYcxh zTxoT7VIMK1vINkILSKvvGtWEz@4~o6lxG6Gu^0jl15pA`)@D6sdZzs$TY}3 zILr?9>E)Cy>Xrs}iG&l*GAS!^66U3BGP1C)IQE1u)+sXw3j0lH5E8$w8{;wzH8h@v z%UG}~ttnoy$gJmwd#3Vad@1Jgotd1Q%c)Uo5#cK#ezOSiuNw_0zT${jv2VJt&DnY)7utD~)wz|pubNqOwnSg}}1Bt48wo1U4Qe*D`Pi1td4-T7m( zt&5Hfx7k}C`<-{&6W0l?KF)lg_H&qo_V&)}IF;{aGL z{^HKiZAJPezp7)}>oH%Rp2A76VPnb)hn5i!;retN#(7VWR!a^m4nY48S7#a2)*p3i z+=IKj7l%S|cbDR>rFe07cPZ{x+)63#!CgvmDP9Pr5S)AZf9Jh3_kJKV%rFp=le2$& z?X{mJ^>>-@>qtauPwGRLz6rAgP6-M*pNGa#N<OM2N;TH&VGihU5TOj{DD+ebuSGi z2TXP%%n~5M1e0Dg_Ivo=vm^+rdqCQ5i?FVJW8Mi*uCd`*T$w&UtYSer4@QLHZx{=iiv;MX5u$0{`Dl6?y@T z?xSFSPM?uEn{IPW;{kpG3!O?=3F&=Pqd1^}T^tm@YExbl7d=EFTF= zD3g2)2<7vS;-tih-q@qC7@95O^zB9YT|wfPtFw^3;h6)6g@B(6@zW;QG6-Oy3yM)S zvkfv047c#!wRvv>90LjTGz=U`o%tmNL+(Ujl!dQDeAv=_3h)X;OA@6Vw%oQ?`v`rV z8W?}vRyo|WHCjm1i9Zk!x+b@~!<+HwRq6|?qUBna1RaDg9*CdTtVWZ+4ktgd?4aS) zMPFANfvxaZM7oS9lG}y0=q8B2ier=EG<%p=vyV}OU|9KQ_)%b)ecX*D`2|#D$qZN% zW71_6BbpEoHE4EAzE6j7apmy(+cNS_4%gx3@wHC#=AFop3!))a&Tai=KwnCP-l_*b z-~sL1ua_4}lu2r{;Aj56$IZ)s_WpPPfA~tAy{5_b+@yESfVOH1;gO^I7fbrzRE_KK z-&AcE0B8L6A{%rPJ$L@LKNOAiZ?yT3Q1 znNtNhCNv!#9lg-KS)Owi+^O{)5TrD`08S=dn-|PuE`)G?dDDm0^+Bp(V?v{8sWa8J zH_xz9h~d%E$iUm391#itNhqm+H5*KDKmy;2^MPjuQ^l~zto*kIGap0nk>|UaO^Rjg zxZs92qgUUBp=L#mZb4#C8)f-Lv>=h_T{ESUjKN)3a{x9;g8{6p?RyB0!R#B5&+h+} z8df(T5p2xkgtZ+7der3LC%WyR$2VP@m^c=I*Yal7>_1xOZ?DIZUyuKNAYRoq6sNQU zstQJ@<~S}WuMu12>XARN6|m1$f7Lh*TUE<~#`zCagb@uRIAH4`2`0T~L1mGi*sWrv zkL4;2%atLA#bZ)Hq-kTJkLs|Q6}|GLI)YOmA}4Ujk8*%r@nA=Gi&)H;7)Ff*opGh= z)r|;V0L)~!s(M6rQa2AK@ zXWNm0lmd!07Wt&T`&1q89?q=PlX%ve&@a341InPS=Lb6cGaub|4lu5;>C~vq%?%6^ z{M9&i8E={TCGg4-l=+pK_f@qCcm@+%a60h>7|Eq(GFL>>1ndzU2C!=eX(ShYayR*b zRCx#0YJ%I59HOIUgp>YIS}r>B6zmRuyW6o@#8*<3TE?WX-A;5-{ z%`k=U7{-rMOB>}Q+l^hi&sAcbfjXi(0BmI(BsZ5xaip-Oh5aB>ibWc4*ge&s%wSx~ zZOT=w+)3I~ao$PduO`SES*yae(op^QS>{dL@>Xn235&53O@BeekD@gcz5%1klZcaq z@q~F-o(o9?!Nh1#Tj`h5FU1_pD~}e4MJXA;;ySRqpe)ND(UneYiFt@0aj51*8Nq;? zh`LB!oL%mGKZ@g_<(5XTVDUWYj`B4g+ew$y%if23D&tgJu(DVvX1>jG)0antO^hWA zPk^nRKR}HOHH|#;$7QIG=|;aHkba<(5at$h%MZ_5nn;fsOKqc@5>Ac#tI`K$agkb= zov7~j6-mF_WH#HUG^N&T_!xxyMTR<+(VwbJm`ifXYTR}>7K%*@`dBt{42pQ!-3wr?|NJZ-S(I>X9MTG1xU%ZFi*;G7 za@$|0q+Y>{cnOXSx?Q?%9QBaB+a8TKS+_qRf@rlB4RR(zS)P`z?{LI~cg&(^zfT`U z80;TKyyiDiMU%M9sP6+U-2Z1FVbfIF1$=n`Vz-x5PHHA+wbAVQ@V$#duKsO_=-uvZ z=sFa-(F-6&h6*3o2mjb-(CXmTH73UkmZS}*pz;alCS2Xn(h5q|2}U0l391e^7)roc zp*$pW7X3i*pr!Dj8~?D!=EgG;qo z!lTPqAZDDZFXKsCExT4xs%Rwb+jtlc!%a8<<K9OszFt9r(+tg~6G2GIt{zn;Ys8W>ZsdG{11~)Ok(S-aQER_>GhqPxX;PPLXh}E<<2Sj}q36T8!DT**O9kUgFqrm{U4_{y3 z%kt1IPVcTwub=(3_{d_azy|oG@MZT9upI%&wLcz?-}YXCdjD2%77!rK2<|8Fih)ud z_ldy+h_bNYO;_~;_H3IetB*^DB`YngxOGf8Tzltk6Ayujf3f!yU!0ukEhw!*R2`^8 zDFd%{^P5g7uf1_(iI2}%xEWCfd{aFm<@!f7oQw+BKS&h4f`I1OekRY+GN4q$!FxPQFHx)(+qzwP6HFZA2wT(L7DqmvqP}Gw!)cY<_M|u zeT|#nN!RN0eo$Ph^ie?B3*q1An%&r7KgY^!(E*};&A99tk2F4zFD9{oiDhJ5JwCiy zS9~u~7mktRmXKZ%MZ7h88@z$^$M%exC3KyJHUQl z{E3lJElyIo(WRX+#!2wy2$UQ$eljn!RPuGTYMG>`@#^DZOCo$ghdnSPo7AEqQ611? zrtA=Q_9Y?Z~P#Lygtvkxd2Rl&RQZ*i0gRFwC$~(>G6ncni zBzHask&XYvZ<%j$%SxV+bHX za>0nP)4`)r<~Wf2!O%4CU{z5e)hhLdz?1I%7(o`2ZP<4p(p*z6cF~&$RTs?Lmeqvj zL@?~aAoB#oAHQ8MTCEaCjvL7j0&LQ0QfJVc#QYEE6#7z8 z2_mT)2lnuXcyeXm=dxW%T%&w0+vG;MNE-;;nPKhS+faYvPs0z!)D{oLV9F_hwDOfw zdQw=b+A zOErCNLNx@CGJGzkRQc=jto^(1R)86;Kb<=v(Trx4vLa=>5- z3_OX56$IhBqR;RhY^MAf<9eiTK5LBSpjrBtWBO23PlkNPIhAOQpl49Dq*iOgMo#|B zyO)@jAZ0cFra=pcfWSCB#5lBvcO2|}_lWd}6peO>>5~A9fD6Z8A|OlUBFBdA>tOF# zqLRvvZ`NHbn<82wqDlC(WFbzIj2O*;BO*<1L+i0X{XaDwyLw4%9coxqpj_L1rkk3Y= z^|w8d!PWP>f_`&8s&M^oqL)=c!&O9o({1sly1EcA7))D_#k}1vAlciskj^V{)b`%r zuOffVh#Iw{_B)E&*5HilS05km%56FvpRN2F)%tE)@w$f}dE;9b`bq5a-A4a=c*UB! zo&tEk!0mhe(CQo7^C(&EV1Ku(zsF=e(KqA3PeV8I{-(xnx_vCpv6P(L6skc$Sktnb zQ*$#DCAvR=jl67MwWJ|VRuwV=PjEyvIlIqC_j5a(!=LtkJ*V7t*rZSBki?4>gr)I1 ze~|ezLnacoifGo{L>Ml#m5HQPkr?%TpS1jA&7C#ZS5I!LK~i;C78Esi`DNm$umjvY z+&%gPlgM}hYUJH<)!p&wK?^;?P@6D^36({OLL}pZnLk>*bwXC2Q9p@$(r~Tm*k64< zo`AV0VCA_w%)HyBwg774Yl{Zx=!0Y&re|dnE)c|Y;KchJbCXC$3TT|hE} zcjnG_aMKqUyxrIKJ+J{N$CrPBJTF&NuRsKN-?ebx(63*=zPkh=0coxOD35CKFxi!{ z|2U%OYptq2>r?I8F`Z$O;wC$%r*Y3g>W)sWr(5bI$8fh`LWmLiSts~%V-b3J<-fM7 z+emqI7Z{k|0QOJta!hRMAj@)9HzAs9{My1kS;@yW%ZojDWMWZl1fE2 zrxz3Ak3wG)$>qU}9ugB-wvBEv??OOLNWS z3||B>e2o6^m4g2fig>aoHOfQp0vObdQ(3y2rZAk$CU2EYed0L0#fX~#gK)AE(na5) zEYg}plV>&3Bt4>3mNWmMls(5r02A3xj5zwcn}4gTKOOm@Ty3HX?NLN!s@q0$9tZIv zgB9txP{h#`YyHepC-@^{MHCuF8IM}5o7N~df&+i(KMQIL7lu@!6tf8*>+z2XF?1tR zviP!~i#bvSkB#rz#B;~IWWn#^lXfM->z!5(`)M00zizYCE+{4J>c)yybhfYlB$RhF zYMMcexoYrl6Ea#K`k9T8%AbkmQs42|OI@e|EnHMN1J@?CMcLF_CM8rpb-?qm~jR$#YZ7 zxh4w@TpS#dPvCgwrW}+@-{2+2(^Z7VojYPvB0WhpI~1EH-2G@8Xto$Y9;z_cQ6aBY z%cBsN3CM<6NO3Uw2;gM>z;i~0%4pe)%?Jugdqu(4kj4uTzQNCPW5f4wC(w#q+S_!B z;4V$zV&TJSVdb&ji2Q)c+7_;Xb(RAIlEJ8^XTxx^_&Z2FQ5;!^uE;ZkAhs{I__30w z*cRVN0RVQ9)HC>)fQBUzh$`BrNjQ1XG32?w%#zx&6X(K)* zs9zY;Y=-5HJp$}-+t_0Ska9>oZByW55x>Q8Dw*KwRPa|Y4}>3Z3G&qKW4vdM`v)bh z8H?Nzj(*E3K=xJlr`DHfrMwcM?02g8%5;n*BDaWp6{j(zVkAy>uDi#Pn#ff);IweT zpHeJRqzVK8QUTuu&e@zlihx-mMee&cX|=ik0Fe|8!jLhbwqGgIf7Zy=+!NM7{wO|w zKpSM#YR-&Gqg?udu`CAc&A@t=v82Bs)tnPKQ5sp9`JH(P?wa-(_ws=(oCt}N+*vdT zUU4t4FpZZlGAo5AEiRQXMW9%qOb^>2Wn+&IiKsl}g5pisavTM#C$~~~RH=c2Rf4cJ z%&d7T;vd-agn*`t9Nu~m@d0%QEp)XMZn(<-X3%+i2`1il`z&x_iVI~h z_!OW5f$w&HS{*r?hpLa&CJ!hsBp$G#UF&O@5&CJQ?TsD?Kn#LpndooB&Dzs_WT z3ozD9ZQS3Sp@07-?|&Ty@K7Z|G;xXs54cV`w26KG9H0u>O=$g~!URe+KWBsU_a3pP zzAx;&?EJeZ2f8t=EcD>7mmvlp-BHgD3t`|e_pZ*HobN;9_s$}ItoqDsR+`?8cyx#* zCiN{4xh5}wqgHcw{+{A2QXoj%|4m1Nc!v8m=}$%J(hORB7_(O(PK1wXo2rgzGCOY; z65@Av9w2qg7+n;Ae4#${~Cz0kF9>id9Z#$!2ERf!TVE~pZfgJrQug`oIv;(y;xZo*9|nq zb)Yqy7^uxR`lwUBT%&rs%v0E}aaEE6MkEo*;9kP17mR{CZ7vr5-bCl2n9tR;$dZBd zC&?i@J1W#o6zHShpakbe!odQAP~Vp)p?^MAV9Q-n=HF{)b~i}m^Z5JQ_wMeE2>Dsy zu$G7=b{7JyT6_jZgzI@ylfZfCvilkna0R&pUFUV*ALiu~Ad3~aLVrhu5^Y{DW^WL5%-slC~?Zt~kM|WR% z^7aT#KHM30lEa{MCvD3Oc;Ef+qx03v5BI?O%%-i}Z=I@(GBY_G#lh@hFXn}fY`vRL)6_6Gc7Guz$U+k4s)1-jdrySDx=bB>VVV4QxT_MJu) zlhA%xXq16rJ*vORV+i-IDe5F7afKDw-yyM0)ZnNV^wxVjLLssui9v(V%BC>2TBe_* zkf`d1V@?3kla!U9pI}K+kX~MW*IaRVgWlQxP~W!6&8>eOuRL^O6T=fF(=JOwCse@n zEY^ZX(xeG|C@f?SAsPDXSRZY2gsPiY*iN@r=c{FmJ!JMx z=^HxHde9_Ye)mfJW#O}TRy_`p?PiwGkR^}_sPNw?+UtFr-}vbf!lss`f8rOh_{jHh zb*p^L#DG@Nd4<*G9-+Pqyv(-HVuRxh8VaBBB85$oB&Og-4-F7uS3xhPMjgJGpdThB z#QY){j#j-GhHWYLCn_%$iH?bhVN8*j&@QK-P0&yTKMosvV40+Khb5@BRLVgkm)G7( z-_gSHd)8?iVUVh5gvXQ{IZ0`dUBK?f-BHK98PE_Sv_7g&N^ihS{ z*cCa$p7P}|D$-sTSLUW3jMQhj7O}l3$te=t_&AWQ+~6Jg*v!%E3AoC182B0T!q^x|G-K0|Tyt3s z2vozqXt^<~Wu{C!zX^h0BX^_owP}i+CqyFPiDXB;iQwArHSeus0%2S=IA*S()f1-N zIg>p<tZ!|(`~a@$qj#1o`0W@YYk!IQ0d_NWgZJ#W|=+O z8};E_GDUQw?E-x%Vb;8LAuIHhe?kti(Ht~SxNYmXx9%7tG#+8C)DrO!G8y48$XCWv z_LKOgKf^gkXS|e;jlEE)En!mPEbF17^gFFp_!nB;)U?#xl-$(H<74WiWK4Bd0ltr4 zG6HQ1(QG2(>ZORu=8gS4}7RWoxVawW8v-jBv%f;izDJ_CdV zN?}7Ghbc|r-jKnPQTwUp;FoW zRgbLS?uJW7YLxY5!H z3RO`eV3XEM(%3Rx1wjO<2B_B|P0n~O7H3W($||IyxaKXRt~BT05L6%jHX(aUok=`9 zFG4uNl;B-D_=Za{6$=hD^>pRa2Vkt_7dFeM9jfsYY&Ib}Y45CfMh~h(G;oM2E6?Ln z2Ez2%ssc;J#l1v>)Qc+5{;FN7FH$^84hO0@6lb)c z7Jt@i@ATZgb|b%J!T3aqeR-i)W!5z)D{iwh5>EsKNBt}5?7`&QqJ-lfGkGxnM<$LX zpH|~qt&(w9@1TapbhyynpW(6 zdvYyIx!FntcY4v5ge_vB&K$|Jg2?Q{q&U><&u?Y+@|^Z!-j!I-^^Hdl7x82PA2~(G*USKZx3D2{rV^S&@3qOdyE!j>tEec^td?{zZ6FXEf?)o$Vzuek(z~KMxCBj#uiM z58K}9#%iNmlHlNMx5xy%iK4EtQ1x+w_gE3bVs#|OcJg0cxv0f1mnW8LtCv}OczAD| z^xA+erPeO~w?#68Va$AHVt}9>3Hv@ToTY>rMw2u7A+?lzZ_cUyFAozkj|Cqc`m{(; zrQSUMqP5yY3J-<>u`N@wTy6oM@K?0y{#1S8#7G6paXn^KG!0iIBLd$QNS#L9;;>+~ zRjphs{K40zf5q5*`iihXqa5HLG^f}wo~Wq$YyB+q#p*P7=EGbP(Egm$!tTmt-O$8W z&PUI(m&9bB-ft6zUd1qgU`(DX8OQH^Eyvu+08zn}<~gozn$*X$kK<32wn88_Bu*Y5 z<>Kq+uX!KG@BYQ7R>8yms^AUMo|}6klccEiF#)!&2AIc!TA}!jnzF{S8;037d|dj* z7$1)X2FiPkk?#48YP~P@8XqOFwH)(!`D;)KN;);lVswb&SVl7UP?tkX;nCq`fh&8# zmJoL&GR%UH&>OR`gTX`E>eCPx>lGzF;`A`OD!Ck!7&BZJgwaQ_154224h7?LT>QwS zB1ltuS?X@9(_?tQqJ%W;;h6Ypp|ha~&4AT1X$5DZ###bxB&u!4AZ30Y6#+C7nq`6d zqKWZAcEi;vg*A(f;eD2I4lm!VkHFYYIBgAmInz4$(IEUNW|Nb)orPY;aK!6dk#M6aj>cZ+JS+2kv{49$XdJy_K{u! z3+%KeZ_C%?zQk%r*dqR|`|pEI7fuW)6&lh;(iHd=U+=_5I+%CSa`A@L)=bbAD>Om` zhPH~QTm=QSGE8)}bSrBx2)D`SqpG4D;(4k^k_nQ7HpFLIyjIO5vPomq^sprE^gON~RJpem-07l?ENzz{7gs^&?ed%f6x_3wpbHg2E` z@7pEovDp|5PV??NgFm?qa8VlUJtkNMYHvb)V2;vz`pI`=A|BttJgn`{TNd^Mvgs`6 zki*V+tAED}c2&;E^24%7Z|rknb&!?`L9vrjp16Q5_A70R#MF0Fc+U7B!;0t_V(3Dn zYT)PF{VxbB5m--@V23e>39xJK;r@RTHD$&%bGm=y6%gGO=LzKDb7C* z9PhgQ$lzugRZmW(T?`I0JB8}^a}v)e!T(>WnxrvS3k|3s-QL!|G)Gha(tNI`-zu^= zt%SQYxcIo;?5SCpb1eOIwM_qDY-d3m(r`9=W(gSQ>(^7CPeWQcxEj-ilHD9tV(B;( zROnF!C9%TfFuds0-o<&i$o;r{Pg~_EABfj_lq5{bR@kh-kyxkZ;YyAti&tJ=K2SaG zaK;GiQN*8dG`rQ{;{D_v$R@|ZI^)~I=H&AjMwm`?tBOIBRy#+s$(O5ii>RZA=PuKc zU$(cz_?d|mL7&scv4aNMj6e?qp9xSad?anaBD;KxRR8VyWWczR7qXSW)Sw2zi|O5I zE=iDA1IOnzTXaQfkADZqzV06|zNdJ|!YH>Jf_Ugr6%kv~)4-YwmL=37cr`dA1B#U3 zd$owcetJ4>%J(**v2I_sqJP@;i&1U1 z%D;^UZw-Nis#f=3yx!WzoL%`pXV}U&Qpv1M4f+ML0C9gT6(76Dg|_in#R=yw39qcK z9@R997f<1SUyHa*z*XSyG;ll*hjKs~YREV}X2{X^p+2I)IKCQ{4ScRZvpm%ZcAXdk zyG~7-+I1A&W;aq)InC{VD)eT6g=(N22lc4A%I|j+s1b-Htvlv2>fD45{uc@L@R#ND z=g)bT$}a1l3-}-oAzgrUe|P(>Z`%K|2fXVlL7zd_`*PEG{fjgBzK+ZFKN;E~A{w>ydKbFQ2`3P3+YM(DFWE0T zz@*7*AKZvTINHp6uqDbPt3zNEq|0dpX_c4>hW(Thx+I3BC5s!?6zYi_ni*S;XGKb~h?izs+#+ogJ;s3{ zh3&LtIN&77e4wL9YBeN+GK8>?&^1oXWIMy+EOiiC(&0=6uh^N!PATdNKDas5mB?J!9A8Hc7l^%{ccpUeDYlj^ zpNk@LOB=`3a{Clo9%e>*TfVbTlQC7dJ4FqW)Fd0bNMYmAJ%~+d>7P-slC-88T#t@x zmubi;v8`j&p{|NMQc}!ZC$Bn=(XB-K4$7{$>&d3h!p57zOLW!6B^c33DM@W9k?yAJ zK@G~=Ntr9{Kvk0b6nX{YJ1mOvKpY!ZTA7ixwmRI4qRdUmc15X@1X8=c3wrBG$Pc(Q zBO~LCEI;N{W#zETuG?EEp(@6fqCRDfv#Ohn_YR42!|lbwL+~08Zk~_mjlFi@vm<^5 z;bg>BbK{#|Hn|)RjKwdGMkOazL?q*}FzKKxqA4bn6Eu^2Q~4^xJF@AT(=1~C&fGyF zy>%?qz!^x7ck8$B9^=A7T^A~qQi?N0@dJe9RIkJc+DadOLx&iabI{|MD~336h0rx9 z!_hwa2bL6#jr1sR9cr=Ci*vO(wwR>+IGOgQGGE%H}*U44>9<-=+cH7CeYRP<}9b7^xydBW)kE8USMYX3G_M} z{GeW*e?Ptb;uQ33w7oaEeTTF-GUD*p%rD^D?3wK)S^P=*WiR-d^W}x}g*IbN4A$%O z=cmzcy(5<|yHr4a#p{Cughbpb0J2YYz3CF1*Y|iWeze*5q#X3P{bCgKi1iZE_=7O#lGNM zFY67w2=iF&qI1cM!ovHVWu41?&%Ge6?wUI2AK`|bT9>!^3u1%>KOy>q1dS+u_;65M z(JTtcLqsx!`b5)Tcw_Rfq_O$qX2A}3-z?AnTg^j#3*XlEo1}cmWh@l&9c+A>XiU*6 z{W>bbxv_Qe{rj4Szp%#-&MD;zeqa?0a;s^4lmMX3lq*B2OME$E+9SaTT1LDXgXLS2f|JdRt6n4^f~C0~W1 zB9wtV8t`eb|AX(Hjz#hQBK1daAd?qh3rGk3Eu#W5h+sOpyARmR?$OMi_K1A{9E^)f zP5?#R_g_HZW#Fa%ziJQk$WDWk*nLG-7Q4U*SNv&690t*^&H(iw{)$*`rt2CxKuViqywGsFf+DnQ+pz zkXO~syB=}1?dPQR8aCp;Pk#QMVq(63tUXK&ii@J$>dybPG!HoRyfT>E{C;B6ULaxlgq?dwAzUsX{h5m*1o;QR&6 z61zK8^%DqD(wm{;W?gAi)Z?-o7CrQD*QHCol7`WMsM4^WT&U*|Wvb`z;zcj18WH-u zG7+`9#J2j6>tAA8R$!U%QGpBV(O8k003bYg#(@um5gKgxI~V#Xq?)+w8=p2$+mmQk z93j7%D+V?{4WpvA3%|Pv?8e#NULkcxbwRK1-{s<+P1HiF19d>Z#+n<|M#bw;X`WX_}rsO#9tVoc9q$33+5lwLMM>X>#{R>Ut`!gpMA~KSz z!)!*L_?>-4Q|6cRCJN0IwDI>@gbj!#$K1MLcd7GfH_{fc7Ey$=)+jN zC8p0)`@+v#>qvHe0(SH?Qf2ozbY#^j+`$1`wTUC4M^3ci-4=(oE@st;cM?4zfAFof z%0c$i2kmrk?F(Hgs>3a%EpCZ0v~54emN3Z0&s>x(dJ!O{EH8BXz_Lg57$$97<9=c~ z)*g_HhW*4|eV)3K;>);-!&zG^{S?#|Mo4JFY&a%J^5c672{!U|)7G{zGY8-M8kS}0 zot{qfy<-@Rs3Srqw7kLm6rmEE{x@ap3h$_SLKCu$x{C}4jRxOn=JCAW)rhR%izC?! zv#D0qCr2o$(R8;O&rC}wMdOd&@un#Lr9Z0|%O6BggQjm$o@V_0FU2AJ)w9QKe|Wp`>fMV2k%(^YU;_biPOdv^NYtGZe$qRxUjT$F=$qct>)v2}9A} zsOr!BNNE-cc{ibQD>{xzk8XtT3#l_##q9XZzj|1BSTy11>T{P?HOSYI78j*&)a-+jn^a_R`lnU5- zcvHE$gp8IlU55F&gK8^YuCx)W~3+qn)k9Isbgd2V-~tvBqFE& zc_c1!9TX6uGPqNMl6ifIWZ6d^8ebV%nsDpb_6GQ&4JnBh44qm1iQ|;L=q*;O+5|nH z4@vx7j>M72qZ1h9$NLlj>c<{vP3B|5% zM&P4|_*OVy6(!Rw^iY%X+Rey>cgT<#bLm4yXT#;^WfZvK?&R!mHA=*AC@5taJL zn$)rGjSFYSS4DV6ghuU+IY->BW}#y7K^Tqphv_+gq)Ml^W(oL5zO%}%Jjt=5(~k%rtqJ0R4*-3=zsMIn;luN((@S2J>FK^r6}b6>-`(rahp+5da+HY{357NezdYNW7L8ggr0E!>wICv2qTxd6 zah0-$sdE&O87bB(PPSO!^9Ch4Isf{ylixWE02pyVwmrJPK3YeGS1%OVMK`;H12A_2 zKwnv&p{7;fHad$8WD);!hIw!KMi)IDFtJZc5gDEe26=glJsi_*`5sXKDmE}BdFu;^ z8bWNCPm}$d-|&!3yM3AyL7jvBWG>wU9h4*=ZO@2z`+>qNPzdZlt<~+tM+{03&0Gww zg7tp-2fIR%FMn$HQGR!+HPP=uFZSN`*eP6)Q1@n^+bW{c zCOwXWGcni^EV2?$)rW{pSADJ!m(I|o$0tZSp;&#^xqHS>zN&hJo>}d>7J)g3SQ?YB zh_}1UynB+WPAIjBk56O={{%JqQ$fAFeSPnDB0}BB-xe0#ZZ1B(GVD-Z&Wi~Ef&rl2 zXMEf8iwD~0NxR6p|E{cA>4&uDJ?c2ElN_F)H7MTgnRt{_^Wl;9-B zhMS`s;s)z-g3dqy!GSA$qoAXRcfMdOb;NA}w(-YL9>>l*mN9(T0}K;;f}MowF=*&W zopCD48Tj)MA`LtlR8ry#&23enqvww?oish7bJ?&m0RQIT!>5y=x6d7@?8YW>N}Udz zT%U6FsN*-RZ=-E%XZ_q+x}cyqKHek=L9>>pr%S`8YQBeS#7mKe(a>5N!f?zqBpkNe zAR0!$Na9NOkOYL=x+07+E|o#9sLXp23DW%<_=vg)@E`r7%eZ4!INY!=G@Tq|X0A*$ z9Vj*PxAO{;W#Y~56mImS1hMkw({Q^lF$YOS(7lKL7vU>=rfZGA$9;$fo1(b# zloz#3fK_&k3H5o&WyG+qcZ1k7Gqe2`7?&)zmQ$gaoVHmm$B33_Y{a2;*oBi% znKqa=ARvOpE`YIQSP^~;L0Lm4TYiAESd5?p+nQ1_S|%6oJ8P2psYReMdLbbrl^fEq zJP#cQ+^G+%PaK}(u;iwqGfLb~m<9!{Pq=?ke^{rG7wl0mImsO&*hI4n>w9T1ZbT@R zj7z2FObn`JvgZ_F8Y|iAtKVQrXpTB&QKGa=Qy7--(&_-g-``)MZ{o|KQW>f`cZ^y(0C#b;i*P5g7$QRjrBrk zv&|CuqVW4+McM>PIM$ zhQ&$nkt*XPqcXX;ilnGCICiZ1@t6MERc&%MB^99pwm-9*xxwFl5|z)`=cO7B@a=&z zqv*nm4dyAz3(`4A9X{CCkCpo90g>LG=jKam_V48g%3>Gk)ubF~eQ^0g59LUY1bywW z<<@w=5+!&|&pFSv_m{RY&`riHr!qbkQ(i(l zKBw}RM0X=USAGg+x)?8;PE}ENJ{KYo(G(o#-A$juc+6hjlX9!{4kat$P_9hVfe*og z8kEFHgGJ(gF7$=oor23FC1I_GuNeM7W{^10;=`czj7GsoemFm&x9t}l0z#ZyT$?|z zZTyB#ZKpnXOSo}qz(F7rH8X(~$ZmOaJK92*UK(C{#)4tl{QS0K=g>Y`tzwwmh5ntXd^`!|K$RVy-cI$6e_p}jO*mh1Yc9ZbF6chTXJWjL+KK|A(NC0 zO;7qzn%Z!yfnYiT$OB|i4cKk_ak9?P@p2KK6oDHXMp8ghiIzdD(mK0@X3%U$>^@!Si3UHYnIaA{l<0bF z_`U=fRD@@+qjr-cz=3e2tv-G#5*sqr6|i&oUoSu{>$RBm^48h8*d_vUC|uj?rsr)< zOZrakP|-!b5NTRGIA}EpPc$^9u{z zUta>%)YJrpgm%x*fss3HNxo>(KB6qNV$f^W+aPLk5IVn4d24cE6F_`h)R#2b@VPr{ z;tZx{f(t`}7*w9Yk9rs4T&LmKGw1yK`qbv+o@0_} zAtRoUr(31)vjt0Sg~(_9D9(Eh2TtX0?7|V}w>GV2V4F^kZ-Mu5!`B;=3q`Ze%uON| z=3fm(T_^e%iXj%WTTCF{ec&nnI}rdRNe?ftsF)_xM&E~%s_(wnjAu6wMY0B1f;%Db zm_CBHeT$f5mKU^eAQvlJ99ZaYVn1Yu7Y1>fAIir2f^elV{t#n`ySFj&5^kmAlq2L5 zGTKFRZC~C#egX*;H^_Sf{Szs2H8rB0a{#q%9BWwQD#fsC-jyvqwk)O8>MaQG<7PQ-+Y zQlbaBtfQ8vnsUdi@`tl|%71f=N$?mW5)HxNST-oZA`<;z(dPRCwF@uC!J+5B_nrH= zzZn}m%~mp3V)AD}e@GyBV+h46?*kY!hKDbBia}2QJx<1`c1)GT?FQj=tNG?c?>_8r z@6X*+MXR|{;z|%lQ$)obpnV^O&P&wLX5mn2%ISccMoEzJl?W`_ zr*v@fa`xoDDt#$|*xGiWcpx@cf7{g|I6TNGv8+W^oKbq8@UdfJi~va{Q86J=Pe3bY zL_sXY#{4&(yqE^Xh5zpVq3WB1G7s3UH+!?&?AmPGw(ZTfZEm)9Gq>7o+cq}a_!fTrm&wc)Y*_wcbUaNBS)QUW3+8|8>ovR|1%4Rqhopb>+v1v^OFWdS*9?r&6C5}+oW{EH z8q1Pc2xF)>S)BAFS=GPIQ;1O}^;PFkB_S)En|em5?@a1}R3031Y4W$?m^CHVpLevsDJsbyqGucKvu$h%x7u8IsbB-s2yZ5uYUMxC5 zaN+>2*JZ+3UPftORhW{#rjd(L2X!lC)2LKw?a$#A?_ecyln5akCKV_floTl$p1;Y7 z>PV2HfKihW(})Z*EJ;8P6XNFJ5qu-??{oboYY!RaMxz>!6Keb0A1#hel}15fS%7PB zHaK#tNJCh-yMQsJo$Y28zC308iL?6SNc{E5?}`3(89>q7-U|tcP%?FITLvxbw<{O* zc4L71(aigm7+jv4etO576aB-{PZslOfcw3}kS)$%rc$!JBolHYYnapO z@E)5QUPlL3EVlW>G+#Mkeb}T9x+;(m4&q(En2-QObFDQOlfN*WKWQJoJ8^yAvZ<#9 z>6(xZ@r|OJOj8&7FX?;2QtEI0ikr#ND&tYZ?k%RO&dd550r__Zy_btyQ+o@SO}H2M zZ37njE)XcZ|2$nX13F)K;P}Wm9zwgg3zyxN0UoST?ZDD(Mdr-?E`ljlBOa86$`UED zsb8eWpey{Vnf~iF{)J6IWB>M+jrU?)Yg_^NhfS`_w06wxSGranF2M_gRy(gfX?!1{ z9=bDl?$gA^{<_N+ZrfN+z$<%MW>r8ri26Xby#<0mKva^q%Q)YTl#N~~s^7rLXq)Xv zbS$z+msn{&Mq4;V18l(QJ7Qs`Qhwr1?V2;qP0j2)f_;PA7?MY=48 zyt1~-9yp$t(|V|&05IQ5`?%~XeV;p3+x5SV2p1Pp>PxzEUoT#ysk|?|Hh1^-g83hV z_zkc^^+#Lp>CxYU1GwGJttn2GrFgFTP*-Qe2+vKaLI8`&qMZ{RH>*&F zMS0vdNH6x%2ZdWdxn)fFZCV&m1?AZ0PV#wr{7vZg&5*kpHK4i}t=apiY4d!%#;5JQ zzc7x|AMv2VooBeVk5k4Na4>1tb8>ce?lv&+7!lydE^MWn)oThHeoXRj1>BY0_fynA z|67_U;PP!)>L>ZU7_k1+ogb68cAEZ9=J&BSuqop4nCTR}ITTI1hIhHTJ;H^* zOPZiY9pvX=^B^I)bZK{Nt+-L>{^{lp`oi2xl{VQtnZ{`-W6;drE6F|vHvwK}$%Dtx z_oE!qKR`w2u-P>(BO;_TMbN$?ti>3(#ti&$9I4UECQPfZZ zm2@fR88fM7WwP<)2jk_Cvn3^`oyX!Y08Kno6>M)jmfAo~)t-ysYV2rYhBcbbGWD5Y zawgi1xC1EoS@M+V<`hgX$Uk}6mqXbRS$rf%8#jcDWr<9mYxf;mtVXYy=W8>f&Y$}9 zx!jB&kc@6i*Vf6atLrzG!$?HQJ|ym#wP71EXhaA17;X2p@SAd#$fhd?o3n%EVwHWb zs%?;pml%u4lN#lJf}_2Wk3@W1IB&MpX*7;hcF^1WF$Kd8s$*4#AbzokDey)^9>6-m z?1HTDX1=sL{i>3gPd7`O*lz3j%}sHjkx9VYxkByGRfK}xUL}ax0MQnQvbEg@I;n@8YQeK1VmBbBtG|SY9uan?6!@W77|Z~Xkh_WA%-b2 zXHEs9Lm%8Q+%okr^?dMg%I_v9cV)lW(iQD}onV}KV~n(K2PM&F+kzE=lkVS_6oiar zAsQpyMcecAB+N*m&_!JYibAs>Eq~yJwi@DJM6@WB88-{5hNsY@8a zt6{a*{7G+=zw%KruZxUej7mVTLSI8GV>7*n#S)uRP2ORd<>{UZcL|h(6*W`&aq7%} z|3YSgK}r>FRN}5mef9RsEZOD_cH?^?xK4Ir78UpG)|GcS(Y&yso>3(+8AVjSmV%j* zBTMR1w|>^%lswY0O{kakUz7UXOpb5e%E{^y)7gv>e+GEZvGb9tvq-G{vC|^nCG(fY zteG>pl#v~k-xrDZ5`VE8{$xlls7x42$wD86nS_zYmX9P8p0h_DV~!ng!xV0xNjaNG z9f2l|r0FjRE`}XOs7PE_WOOWxkIu+S40q)XLY&Fr6r$qiG&4|LvBcbvqmWQhErV7q zL`Z-wi)WQ9r-L~F7j_h5m9n~Vr^AZafk|CTmgLQ-Q9&Rh;v~{6oK1qbpdcrO+3nez zvz{|SiYKZ!a#Dv&4oLVJJyJhywh7fkR5 zD1Hb&fLp1H_v7m3mu{#pds&||IDOkCXrmfX<@!GrFvB~y0ZCpD?r(s82>mq@kf)$^W4CH9LIg6sRQC( zzmn-LW?=id@IK^TbKik+WFFzzn&#l=+|Fj-*h)#&KbQv|6!e{A{O`T|mmAu4iqbG7 z{=PdWIo(|h@X0gD9k_hn54z_*kJ}3$n*OMj*OS5ZrCTRc9X5~m$HdRw_T| z8gzlKol!$@+oLBKTV4*@)~M%GL{3XP&n*&@c_1-T?;{SkX$kt!;l*O7vpkI+2FBW+Z&wv3$6azHEuVRo5)h2V!23 z@e>}A75PzLz7A4KbM*R)=YH6`el1> z?=iqxu}LbRd_J;Vz1nei0Ksu0SqYFVTV^a-v#oZwLGRHvQy_^t5zL9|&xLnqHTkDO z-D+Go+FvBypRMH6Ti9883clR28Hj_~I3U{>L2wbS=Gr`C5d1Srupv0dCZJ`-(K&Zz*Ly6?vpT@G*(dkAxqpg1*EmOxH8@NN^4W} zeG%eK2efOX;9aEmyRvyvbm<1;FfxqGc|r;aY-DjV4EscA7?{EU78bC8?Cq|2s8F87 z7Z|Xof|=5AYd^2{pNqMq=j*LayoOt^uXz52g(NwC?||3!0d|#6%k>YcMGz<;ii|B2 zfxh#V>66d(VZTv;g?0Yu?cpx5+yU&a0AsC-w~dQhz3P8bOHi+~+pn*$LYrOdnw4rg zoxa&X%BdX!$C@$bLveQ((0*rGbQxHVLP6fR|9;4}!;fD2wtKVPU;W|;eB~`C8Mu2WX|*M+p&!keRH}MeI;?k=&WJTl zf4&<`m@b?$ZR|7kIG?4hEz-PReYm(eV>-J&uRDkQvfimx`9};)ItYNto2qavPDk5X z^)bhyy&lf5KBes4Pz1ns@6X|LG^+p3B+viGA}%i8xxj?~-`VZz%E0UB7CY|zLau0* zes^X5-f{8a-kj?kF!qt*`V;y!jyq;Mhda5`NtfQjZY|Uc93A9+P4(FHH6zX zV+5s$95(mR&s2~amtW5YF&65y(X6!X>>|?-tPi=0sCuN6IW$C8(o9=4I-55jL{`5d z-4zZq9OE~bh2CWs+OWH!Ix&D&+Lcl_Q%a(63QoY_LKWfX-@%?os+K0F1FSXaT1GUy z6bzZbkrQ_)60YPgA*pyM!$zr!g$a(YdnpNv@dJ4YQ=M?des^iDN=K!pW%pd-9lEi) z8;E>Kn6>Sg>wyOOvYU32hzW#UtJ(H$A^^11`Z6=Cdnqyl71&zP2}|Vl#akPlXveaX zH|~A4wV&8w-SR=Iwx6zJevvyae2LHV8>3C6#@@dT3oYkx+5_zTyYF7ngUpBcunp@r z`lukY?gtG6-C~g>Qkj{_lu%dJ>dAXO1}LQna|+f2f*B}{f4qYS7#ewWk>wp$ z!niXKjc+GmX}jAg@U%H|Ah;v5cgia+Tc}{=)2(5Wg2}qulw9(xio^Tcn&Q$B#z-et z>$4-fiiY5s;F^th&=_@SR0BJU-1S%~h(#FDX#*m*u#REL^9}3-b47*MRYzfeDxP!*`7;3j4#9*g7Q3}g<;aq7fafK->g?zPsiv0$zQWZJp z9cP|FUCm-46k(YmCrD{DbZ>uZj7s%DOPL}=fms}R&J`^lJ#x%7<^wP@LKh(#2Uvfh zk=kKoAo#v#Xg_%*ox^H?3)713NI}AMpq`{Kejs4piZCWJ4vZ;!D2)iNqHr-#Y*%Y*cxk zfYzHzJD;f8S!DpxYX+)=Yx~heHn;V}WPT-XHa>yV#l$z;&UY@q7yfssMg9j){Fk*u z_k$2j_xn-(gO*HokNX4JoK}yd@;Ki6bcGB|y--+ z9TZ^Kc8jw4xcHHAi~Z}R8s~iC;5F-E!#kv@E4v3N<>NUJ?OQG@27F#PUis+owGKjaDaw6TM%^!Fb;#SHEa`|3 z*nYl89r^zA!+GLc=c9}AGttEGCXj3rXbnQ#s-MnW`9BM^JmdoA69P-{#$AEaZ-A|ecT)fC${2BauR&FqXG zcS7W(N$MF_2+%mtbHNO2UtjB#$v<^JwnsZrUb+9Vm60X5w&SKrzhZ(Qhbm%aOm{Ir zmn&*i8WCO`r`56IAX&ooMQFgMkki0uk|tL~5Sb1bLhlC5Awc%H6p#7zkND9=20Wav z@7|xSVgd>Q&pT>)-aE8O{tq7YDqv|!0s2-o7zG8z+FzF__6_&l9yxKupm5rbPRuvr zo~tXy>b7fGUDf#(rXSq z%z)|1*LT9K-Y|n)imF+hKOYy1XYBipp58oZqpzCLwyJ7opl3ioKYm@F1-_Lg2Sk7Uv zKU-nV@qPYx1_M?Wc5(0@%%v1{V=ZU%8|i_RNKX{UsOad?4;^dzm%FM;-D|rWT8{Jl zb-dRQg;$gv{U~t~8)@6~{EUO?K}L3lUfoQP08}z(ydbftFi<^87!jSCQyX=f`9Bjj z*U;UWl*EGNanQu0T-?yD&K*gL?m+>Kx@4}UFt@jn5__9uCJWJ!V^;^R%W#sY1yt9p zAd*;Mnju8owqSEK5-mMu%*-}s6O(Y6vIP^MU&|u2vY8f_jRqA}BPq_^$AfM`{GW`nNW;NLXIrI12_}Ay`yL1-P2Zpw0>X)=E;8-XnOxE8-xJ zDW!?@rWHPGi zImI?9Og)!|ppu}ASTPdC=G2DQpbipd4NQKbxA3({olqIGr7IKm_?68BHnF@Skz)_01R%=gesLqmxtD%Hiwsup4nrO zm!`NT7*Y|Dj2dJP$)t+ngu+(POf&a*4@8u4Mhiu!3x({=5nD)VILN_fDR2&>mO{rP zxri|`3_10PAftQRA1g2-OW;%39fHWBoRvezqWIuIVD_OgiXVppJ3d7n{9e8d%XWGU z8ted7N^9`;$UK+u_ad#K;&{hE#-zdMD#%bq<6i&L=>2HZ?n?&H8isIAOw$1*m|i4g ziim`{c#|ch)^~ZyUo`QG*r6JQb;-zLMOBJw;nh3%#R16^5<=Q;bqqfx(50E_#IpFm ztUTw@O^8ubFgX#2D8mw`G~%j{Nz6<1Ph~hj#)Bk^W<#1!jA+Vbm7rUgt?zNEk!3P9 zcsF1pdS_fE0ZZz~TRs0S{l|&lzL(z>W;3=t{|l}D8$i{4(%6}k{F8^aYfBU3LSrzp zvwc9JyXbhK1N<=g+|Jjw0hkbI?7+4Ir1iLQ0(4#WuCeXMEj@_nmZe_t#;Hrt^`;AGsr_FrkK9Pu1Qb)gSK{PA6gf-oK{1#;g6Z zH=pmDo_bO{Zs~o`$JVKC#>6i=J6+s1yy6gTHxJg?H}}@rH=rL5GQA%sxU(7?2~h~c|ouTr8bJw+xWwP&~zQ+w;C%ETjbrRl>&b;v>eyk@oOE*qF zF!?gZ+Bv`l`nKenZFa_u?mUdv_A^~X3C4BxQbE#9e#}P7>tF2uZzJc}%EdZ2WUvS5 z)ANn?QL}nM(&S%Nx~&5MNvm1C2+*wi)tb0;zYCU!6Q|MCHV?YJngsBo27IpQ;lYg@ z=Ccp9Fx#eSM~}YB!CfTi=KBhE7DmLL#>$>7u<^D2SPperQKKBHobBWP@cb7j2 zHU%GFrWNF$;8v^KZ|GYrAW#YnLc+bRcAm1oOyC z%@k0mVbDca``A_;&mWs)=wMzok3vp`|K;%YG=W*W-o;(MMMZJiNs{eBk8n_P!a>6u zXR$C;2}w@&hie{7&rDug{=!~Qn)ehBG>$E6y zHEUGMgZh9rUtl?Td&Bq9^?Y{pPdsR`O82F2)TIkVT4uKI(*bkyZGF=wM(Zz%#K)P& zGXeVH`C6x6?x+6z7q$M&%3`zc+w->O2YN5>GbAu1tm2?XLQ@r2c6WKU546)rU>Bp6 zSg!otukCpmkbc>m!iP+FxPGURab@TC z?3vw8ugfcFr#y2)qt)6Di7CNDWX9O7s|9OuUsxde zDY1mq+olGJalhSi{c(hu=8s2n0^>L*i`;3KyQ1)}tU4zetc)&DOQ;HC-vtdzGl+e(oAqV$xfyri`&P644t2ma`f@U9BO)g@l zVy;wH1mCNGEoQFK+l(>LIADevLaWi<2AS}{8jzo|2qmut4sIDV(2s>>;D z>p;m(D}eq4VeiV$h%M-X@;+8XU3=mg!&OkPLyYb)s$uPI5c;0vJzz7Ce#N5PKYW~D z5tqmV7Mae}Y6RT~>k;TcG9)HZ%MY@Ema0px11kw3PeW?TG9Hk&i34M?Z(R^Z;70$VB)=^~8SlQYlj@RCD2N zYh9{BX7XYQQ0$?KAwGr4d}{-~u&#KIIsj%sARwgE1kZ zlsNfHHH%gZ1yL9zUDCSXj9GkKczOektq~hxYz$2VTo&w@ge0PoGPG_wxZH?zt*cy+ zF%6DSwG49&j*VLhJW}b$PG1Nrb*beJDbVq;^F13*T8frjI?qbiK+{Yq0|0| z&9Ti|4W^pAMFD~wvgx~f5=?Edg*0i2Weg3dkhjX7Gya*iocxCqpDOaV!;i6#w^F|m z{@1h3f-;`_r^%bv&3CrVr(XU&IC(yp#(@|;0o5ia&CiO7B(-F2Zr=9s{_=WT%D)eI zMQs7er~9|J$>V1AysGaTPz0CLz?a?qc{p|VwQNitRK7Y3bGE>)HpBFujpAK7G zcOlE~xYnkTd#xJJUS6F-emOedQFmUoc8wmCj-rO85BG=(A{_wq7-8`TH zj?PYK)bBaG?-ivDoqQ(_PNV<5X4NO5e(vqs0Hys&wO_pe7iJ6P)dT;~MLt-HIlAuv zmC=>%Q2L25HgW;bapAzRYV@zJ7&7^0?l-(> z$p6ChpIH6<|7XhSDIatJ1o^*IxtUmHt+s3>D+cU3@)@jkeqQ6c<~R{XXV*$kwWc#) zQz+QDd?DOQ)ce=L)g6BmbUBye@ggk5~gEdskm~w+~*JG(1L}=+RpuCt|Gzvw|9L^4iT;fU6+g zMTvo-M2jeEKgO7|M(DBN9EpKaC1s)*OhhF28067F*kbx(x}YRfz7zrKK|qnV5`qe; zl7gw{9$=p9bDt;T)5Q(Je)bc+gkzSdH|zAdimu@gb@rxfw-&**74D&&F3z!(y8sNvE*@>ciD z+VsW6A6cE?ix>LdN!+iPx&8Z2&9tjb?{ybe^tloZ^wNC1*2})PEUCKmy+7~N*3Uva z?HFpt%p-C(eOKp4m(VxpmAx<8=xEF*?f6fbXnP;3QyqOYUI)9CAf-R9^}6~GX^I=} z4zmd2o%k&cENQ27*|CnUS5jAfdHJZ)urtR))%83=03SGmA4f?}0Fmoaj|Kl9 z-GsLkkx`#O%Qys!$jyFh3usoF0ikc}48+BMcSBvip0$$8r+nY#9X^&;AaHmoYU=dP;B&F3M9H z?ko1)vEFGtYPYkdZEf@YzKb+9$IOll&p|UzjKEPdTwea`^sFbzktr-!SQ;N_!uqTB zuG{wFayy;lH0Mkjcz(0>W`r(5{b7CKiKUl`ZE(ASUuIM31 z3Ugx4wkPDUy-QgatrT$*Pe-L$-}xhCv2zdzSvJkCm;fGuSs50Ytb2w;frM01n-LaM zB9zEC@m~TUWg$vjKNXafyaq$@ZU!xbc|wUzmdD1F`@Kulj8T?IO_NX#g?oybiVzA8 zO*koPiCPqW`kLt2!O|n$1>57-B$(olC9ctznIs{x*!Q|G3lV(Jz>fu%P1tCr7`tmi zZ)k>=VJm+q@e)rCz;cWn;_WlXUU%i8iUvn27b_JhNSqac7!orMLyv`H_Dv#p$}jW~ zrl$}v7F3v!dc2UQ6Pc+*mxaWPWxDAUt zeGk>PCN+et3+xs^R?|e59wQA780iW{KV+&wF+}?!-Y=;n`Il%|WClUX5)v$OAs9nc zs^<{E1=E)Di1*OEmJyBcejDTnFs1aJMrd_Q1!!KLy3?girm32^dPCAk{kG58Vrfi* zY=|GOkQ%TO`$4|QFoy(!n^ngoMJ(s$5HqW#^SduX=wKU9w0*~wpb|A1;kO*T$v{!X z^$-IkS1^%WH)pYZD)tS68eEWSthJdj`Zu(1L@{6nr4Y&xxyZh zsxU@dL^Y7NQT|>ZG^$A$`8~Flgg`LdCjdfUL3?hOK|@VKhhd`!It<3vmAp-Beoi5X zHWy4o=dS9AJb-dm@(6~Ln+pp_#@3#@5g(HPFSMsj;8=UZ>*Tvi3c&1M`@OdM4K4yz zv86++KkU|+xVR6X_}IQ} zLFHpkv7=aXfqo%oy!dzoZsh8LD<*)1yj>93+Toxckobgp0jl+IkpG<#$VE!S?q#?< zt)IM}k*2=_j6<$d{hm_2$X-sn2%m2d5IeqqMcJ)8Z{Gus3aat7%SN$i?LCgZZoJd} zyRYURXR>{2B_P<%fS`MPAolt@Kd+j{jt&0{y{VTS6W$6R z^^eDpx`gKlUzo`Nd;+KiR8E(<4Jm74lCg7;zvZA6Rr=7SUE6ha>1hz zIblsz#>uK#BszyPRuJOTnI;MZ^_;0Iv;07$SK$1i6)C^H~rMG5(r8yc`) zMGku3tHs)9jRxVtsD3t5#rk^s=pNT=cyd8E(^bX%SNqoLn?{_3U240!7ZINj-EGu5 z8FAC?YTEi;f%tl_r^2P))VU!p?b&2OYCFF^xjzSRjHBq<_8rT!bjc8V4zp7+Xb#hW zEF48acwxTb`!TfyZT#SGaFp9h1;y$ugc|Lc*Jh1mLtHd?lIo}{=aSKB6wxakjF%KgpSfl(U#TAj1f$4r~z`#nF8AF1z|6)OcFdFh69qdyU zCm4Q7H2;)RAb+$0bpM+_g@c~$2T}U3#FK!y1Hk!PRL-IBE+xCyIr;bwfDp<9V9@R7 z#mIYZ`nL0S`X1KLOWUSz2Mh&(aBeIb2wvXgr|f%=1YmsMe7mjl3N@N4{muyhZE1qU zsinlDfOp0$MZyMX#@|OBZK+c$?frJ0dVj^I&vwoG^&#Ch>2zpwv(MaokbRfy8*^nOvS+GQzGdj#x%tYO2S+)>>deI%2ww`C)aw-P*Uv(Dj-+^UAoG z=hYOQFda4U`s&+C2SF{(m-9REj(4o7^v0W}S6g8rMDIQ-7zgY#w~xIT`UJ^?nhgM0 z-P1pLzY)OXb%>>3EgPUMGTH5}0ce1rBkn(#K&~3}?YwD%{cU%vfQ9bG>v{HuLmLE7 zczDpyZn`y+v!91-Mh;Zq`e4gd-rV^emi|qg5QD<3M&{062$A7H6OrIkqZ<$?+SL0X z9OB&7jl}QSC_+$iMp&yL?1d%LFbJRB!;!@>7gmn9E=2Stiyk;JY>N5_9OC)nX1)

oHhJ2@&Fho><!;Rzxkt{U&0bIA&7I@FmdQ7AAy#vF1x8svJrn7~KjK5;tW0}87<1Ly9R_Zu5N0P@c45c>&VDb-5_qz0R%0wJyh*U4 zXtzUysidKRuN-f z5FJWl@-&lSR41g`72y?yACHG(KxHATL2txyQY{h0zV0GNgr_1Y8zY_;!!S;xE*Y^H zQP?pFKNW&ZBtwy^qKcL++a*(sV#4e{P_W8Xq@^K_G!Y6Fr4&s&wCF8U(?LiHG!u6e z96-n+hebVxsE(aS3-!Jk(#e*jA;V@%jYmXxD%CP_G&LsaN2g#T)%qQuM92XHRjmSR z7MZz?amLTO-G8~0Mnx_q8^K9tLiTu+TuUr$sJycWSvjJ?JTEw;;xaMc9H%℘*oI zh%Ah)D6bJnrOIY$CdGWHm&VvOK=qX*G$TRj7Ag|9D7ZzOzY<4@Mk|0**pY>kN^mA{ z5rGSa4c9V55TTZNjD1Xx@@r1A6xJP9U@IZIQi{9&k4*6j<}qBQ4Tj^;hTli0&2UZ= zS(Q-1=s61-k(*RxZ0=ZW$?3F)_KCVkXaqZG;V27~?w&Pg?}xE&8}30D?@zfmn10Wt zuWui(x!$+$FFGxbA(`|YLj>=uexq=cyn~JU_TnFWem|AGQ3)3UfUKsG8-Nq|_m2v| z1lHQUQW5!I;~zdzdEou0^>k8FQl$}ZuwidIx7*|JL+9j&(=Oy9u!vt#9&t_J{R)u0G}OFSg!t{ zh z9vy`{;0<%y$u}QmLVZ89BVtQ^{;YkOb{#FOSj`T*kn|mo#D14heqBKSn|OP9pf7r= z$iF9lRJ`P)TWV(?C2c)fpT9XpL@~ zj2q;b%_<2(S!AB}D<@71bvi3Wf~IVCuTV{ioDbB_s*nZ6{B@4Ttfsy&UWpX|5|0KZ zm7tcRf6smJ?&Jrj{o@ZN>}XA1OY}w1Kf1!d!KXw=4>!cArq^fRwSz-COWPX2I~n-C zHo@)x=p>-f9rV?8ZRQa(z$KIJ)OpX*!*K21oYCt7)4bSdjNQLo{rWOSC)soe#X&q5 z%SXe3h45d|{HOZr%Q+(l7tr7yAJfLozyRn15H~Oal!-1;P3b?lVo6jJ!Ny!#q@=TR z029zwINgrjy+wsx<0U7}gnM7-oSLm~j@>*T^jXbsN0QsEJ^DC5`Wevb&E3z$1xi(I73ReA_#C*wt@?WE7!y!NnYWN;ZER#XZD4-1-DkjpVI0pk z6~o^p$47h93C>vJgVz5QQK`|pspG9Y$K%ns2g^9xhCZ)!(^64fiMdHnONy2xw{Ks{o7o7y1KhT*zGp< zic|G=wzdpxuH3@D`7SdsJ5IC#uadPEr?8uw8%DXkf}RVOHiS%`*_!mL$1CFvp9kH8 z8_xf64fJmg-lv6wZ;qt*|rQ5viIckGf$ zptif1{O9}ItyXP%$$f}y2bE2IoT_!9tb^ujGT%3flEu+u^*3dx)zM}f+KiG+>b^w> z!M>DzL*15+jq=|Wk>lQe$iMC~4|pqY{yawhYu4?ZM>2v{heuOM!P;G!ux!+|>deN0 zRZ&NP{L{E-YD@ir38A__T`2DTxE<#GdNt^*8y0Tw?mh$vY}~K}9fw)6|50+b5M)>ZsiGU6-0_joC6z-18+7SukRrWC!Jc2fhq9V9}0FaE2=zH@R;>?*OD63Jp6(Mhy11$x-brW z34iGtMi2!w?w-(F$gA28JR%2WXXSGd61c>5`xoCGpI(6|af@=m+ti<`Lvg5KJj@~s zqWUmW?_1B3K_F5P4N4TL5R1Op9OS5%iOc@JkVqw&wzPRdi0RL=te-=_#pI+(akIeS zB4~x|Da0t^zb9O|a3Tx#o}+cq!wD%?5o9M}nnMrEbHfP1B58|S$KLMHvB%@~;gHfo zcSH6D1cD0(Jqa=!A%P3wpw$To350{=jOh$(I$nO8%IwY~yhW;`A$*Y1LbJBCpa_4w zFvU0`MuR=X;2aHLhRec{U=%`R6xR9IQGn3F>d^ot#}ftY1@RINWkq87FZ9=3-LZ2?r-seCo!6JXk;AjZZz(k}2 zh$HYui6qpUBiu8I=wBj3QUVggh?10o&}4$?#$<`3(H-bNZ3zDaq=6ENaS6;rx6f$A zfm?iMniVK?V8B5t5KAP-Q4PdVi|#<&!#k>P-WGt?fYyKwX8RlH6>C9Zf{bB?B_$V0 zE?j_|RKO^FjY=zZM5U&H&LS=qo6I+5t}&8GD=p(0cpGjKEr3x-;z-BrZ34exhN8fj z=YI#CZp2#v;@DkQDlrl9n1;5J4F#W{ADT z0h`^>FlOl@H_O_GHKlS?G*w*8?O8`Jz?))K;Au{R_o$z_D9Mu;8m0 zm{2l)9dc_l9p=*YT<*V_!j-~|X`^H$MNq5rF>pnaGL$G{HHJ}(O_JI|1nK*p!1$OY z(J>TVD~@$(XeX5ei_0Hh17ZVzlI1h6D1~=h6d)BmMB#zH)!ZBE=S`BV8+B&`y7`Zn z>i5Wx!|IO=g4fB9;eXS}p9k+9?=g0lYC@1X-=z4 z6fV16?D_L}W<&jQ+PQi_G75diGqvAk^*j27)|mAQm9O-+NN0Dj=j zN3baMaDNXFzFc8pVPj^@N>XzdAm}a~)@IJSgTQ$3pR}u^!Z;2-KFF<4X!Vm1dGNh> zaNmt#2MRXkf@7!kFYgwy!y(M=5{s`P(pUi`KRL zZ4%UGak0{#yvvW1e_=1T@xle=cNwqK&V)kOu2%@r7G1irER%pT zsXv@I4gXuizqvlbGdKkWBqgDK{@EN`MEcN2%DTHYHa5(dUU%25nzR)+zvum(=Eyt$ zYMzr?X6TNm{7m|tO-5myB5(F~=@?>q7m{m&rwDJx>#vmlce% z)*pOQT_k)*3`}*Jp?`RydwXZ1JCzD5cInsTAp*1dK_vzJTz_MGJFo3V+b~UaTTcp& z`od^K@7GSyTc1gaT<@Mwu|T1C+!w#|W4p6Cegu87&0D+B@mH0gPq9GZ_2s#yyK9;* z3pv_Viy$jj+T`IQN2}w!g+XSZXLN$>E)e0#Li(%92uI(8+D9t$T4lv_iQF!dHh|gm ze_8;k)l<7hJ!NZY#MOV9yP_$b!`i)O8+==BnFM;&?RurxKUV&^Fn1Khdi-!|jj29cI7mc)lC;& zGbd2lbo-NzuWy7@IeB^a0Y&a-X4$Tj_95gaUveHC@ZatY10UZQ5}2Z&Hbz2j_bB(* z8=l=bT2KDNq!1+athKc&6w8GI7W9Myi36MgT;}yn7n~9I(UkHcQrm>GaLCHXAUw!0 zy3t=og`*rS6ph>eoI&WKE(d*2p)tecbD^e~{-yF_WcME#o?p>p@H$ zVCW5DYcyoIY62{Tqz&Ld4|ypDzYPwKTrtvO@N%XKR$VOOibzt;{OPf#8`I903@Cva zJ2+p_%u9zEmyrN6bcsaN>zc~^JE&8_D1uioGEX!c7Z@&OR&4T!vDnsX6c4zfl0r{Z zVz(Z&Vzv>D1d3qaG9^9Kpjmr51v& z7+g_n9hjsDPJ2uQ@=hR6u-NNuk&4T)II0ng8d?gGD?y`y!1=CJNOjQ?p%8+5pIm(F zfDgCQ;pY;blqVuBQcMG>6g-K14!};-p2GM?g2s+=ISUz#21|x5(p&Jv^#_`!7laVf zgaH=LCCnotSOePXohJ+*1`2}!r`Pzxp>u_n!%SjaFj)yhuvC~BCT41BBrF-wWJsQZu;JKD%+N??Q;_-D5RREc zHo}tMTZYphgrFSB6$GS1jUNLN#TIuo94Jk>XLHp&4Pu z;e}8tSbQDju;p+i#Igjf;U&f3nXnKjyP0f|jr=hR$)=GcFxKo1IQEHBM1~aVX3U8W zyO`h(d4t&qZ3tPAEs3WEp8X@Ia238NN)=jY$Nxdf03|Y>F`g;Bt=iJVk&@h7OMD6K zK;?=Sic>`TDZH-j$nqCU1AKFy08V6W4uY!6KU{H$Qu;!r<#8B8I;i7rQ=btnKR8&V z+bnRWyjM8*HKuUiwq8++Mv~kE zA3kiM1+6&67QcElAmLBG9p!&BtIYR!759|+>0dMYahv-%{PC>MBHMcdAb(JRn*Md0 z;0?LD{TkMzj%t!Tr=jD%Vrg^J+|Mr;kTg3KWdjGk7aP2vvNB-N`8@f0+Dig_qGPL0 zj$U#)E_?=`b6>%~AQSZs(EzvX)$LE@)xNv9ZTG-Lu##!#`1W*|es;mUxoXVT%#vmG zGTL*O$^LWzn9W_h59iuDI*V{;UM&D0YTI>=r<^RZFLyF9sQw%!8?9&ld3Z=WF{zo8 zdqdCvjC}EN8P@jEOK?5A?Rk4iL}uQpPBg`W0Cel!3@g^JpZiCP>lu!hm)EY=2U_NK zq!MgYztOAQ?Jz}djRvg*05^=F`#!k&rG6TT?g1nK6c#(%D-%hk;=n_|*`#$$$oy*f z|@2Y*MQ2{mOk> z13bMB6ym_IwYu4@nX(QH0aICv!|p&$RE+{foaz$*=GvobQq{D8V(bo9y99xd8{S0h zW10VIfyYfln4HnLa$oDapv8QI5~R_(i}s_jft@@o@Ds&-x06|HGE8CrQg^KfX~?y> znb;{yeuR5n^p30zb44F&Z0{l;Dgq7v;0S+n>nK;N<+Zkfe=;X&EVIRw#dfDIBW(OC zz@ltsQ(%Ki*kIFZy}*>&Xq@>NMv6H1|CoB~sHnmAbAA*7_cySqCSL^_5> z8tD}225AL`M!Fjbk!}HL`1YK0?!Dh1xL6=Mv-V~`?=PP&Du9K;<@|5o{+e-c6FAAr-{^fVq0wHU)5)~)-P zCqR9{roS2~*fZj&0-yg@v;vxUO)s@m<5)QHifQ{^#!j}x^{>mI{tFZTXmi#_zr1^a zhfUkeLHCD~Un$FRr9ewU8z@2+JCmKg4;-3acTk|Ml9gfe&7~I_FRnA1ubfc} zl6EzN49;|bf;W}mTkeSjj96X)4vly;DU8T|so9TXcD?JA*#9;yxEckbT4MNIt@y=l z#l*|d2_rFdKil#n*Yhin+ObN;vhy)2bJyt%+ zJ##A~I=xicTs(jB>3}S;#glPDOU-Rfm$~1IS)@86jp-5H(8<$4Wahd#zRYM8BP&Lu zDJj>Xa-1Z6)xfyQ7EE)_9nIy$RMt%>*(rSGLD%cMFZh`e$94h_v?s&Cg1H&q?HDe5 zxUVfg_>-;tj&l+pfsqP3Vq+^dp?oC1$Wu^MTD$m|gpb^}Hg&<{>&%&ciLgJM~R{FrWjf)^_l4WGv_X#v<_Q4*0 z9!*HYqnm*S=_gr3+#f7cTAFD7=yuv{1BMpS2GKId$(Z*s#R(}G#Nj1q%uF1yxSzoK znH5u&Q|Xa&lE?9L(mM1fWEBy+8V*)8;|Nb8Gn3@s4P(bJoZu%uQ7Cd?)rcc}i1t$0 zw*ZqXj49fp$l~JT6L2$0i8|8A#i`(mJ9g9A5b*tg&)wZ;J}8!Q;F%%hu=~n_JY|K% z-sEHMfY3xc^=Ue~fG_D21&FT`-*5k$rRX25SdLs<1@0+a2Q8Y3A`0+F_&0^@8ayL3P?4YG?bW}3u$aruNYG<sRvrDaF;tB8{+h>{i-G4|Jh&Vf^{^dYJS}t2p)+yr={ek0m5+EFTEvI$Jd@Lc*zkg1~qA1LsA7NNQ+lm81uLcKeB zSD&`%3hmWvOuD^Wg9Qtu?{hO{hysN5WU_nZ;}@u2ic=~q%35S4FzWN*M_57ZUBL@)nY?^7nm~SV{>zE@BkDS!ueW`Js#7iav&nGQCcn@% zK%tXWbzLI>%;Bj;$io?Q5{AJ5{h@@=D$eA`$N1n&_h%5y<~%8Sv-b04)I|G9 zw=jgF4PI>P;=VTDZiw&r3_geNN9MkkiOpK^(s|iT*APbgp`qb%YWP7p-5yhXfQ5au3vB!^H_z*XUF!aJ z)v@U}2zDgcxH!-S(_Fytd@3cKlfnfr#ilfAweTpaXFV-MGaNPkeWztHyj6~2{Fd$y zG0_#s5@HWpJE>f(=*LT=mBBiWDG~7Ova5J%gT@o*8Ey0IBcB4(YKbjPX9zHu zX7w}6w&=<-dJafN)Z^=}Yc3*|s|y_ij5hc|jV-ofp0;{;5?Dy#mfWL=(Gp23iK1>! z4E4Lz_Ybxcm)RT>E9w5{Lcecseb!{#*pMgoJk5gcY|R2LWdGgV1Q*_&)%TSRbONmn zK))Q&30IiX&k1Ng z)^3f-4q__<2Cr&g-1x2E#-W$?Mc^k_+oj0y50jx~CBO6*{ajbmOC1Nr4AjxfBRSvQ zJw@qK*FLq8qX%TqBJ=NILOIIW0NU$6zS*8IR2WST&KbY00j#lRK3ft0Z*E5xUU#lx zy*L&F&;FX_x@q`@cMqjUFkNm2Z$xU!``zyqDj>k>IB82X% z8F7V|0Mg{>?EV)tv8H(o%FRz}(pm7dG_b+n-JkRim%{j@C|L}4p3Iu$_4kkH>>PWRw0qf1y)(1qS8i&I{P}1->3e<*FD!SV~Y=PrGeHf$YYOYvDK+pVPMCc6`A~>V;y-;x!wuM4UzJG=X$BM zUF%R9&TlqkX@`thipA;3Qb-pgAz0ovMi5%}uxs~Y>CQM>Q9()4)7Nu*s;JP>j|((m zzw59Ls%x7-P6UXZrMx1Zl4y2j{e@=jk*07ya~M1Uxpg}vA|#IIlWVp8wmkB-2|RsR zI{2N(vk_IPo1Q)EySW~D*8cfo6nQN6LVvsSw*@B*2n{p@X)jog-;;;i9vc=XXye=8 zV(({a>Kug4M5&YxRm*?=uGNe|ZdzDcTo*uJktHM##_1 zw41z#hmdG2yaZkOP5QvTU(s3jT=edz(}+zu8@9GIwGhd6YE6z1CKeW2pN_k*=u8P> z4kU5gzWKrn?#av6`v(oW6oNu$Tt^239?XbT;TKa-1U?TuE>X1vSsY$Jf*szQLIawL zluzwj5)gjdYMXuxO$-KE@+3X9Mu|*jjWDywhHeR>S)Ruha=K4Q)YP)^Uuej?>FDrc z;X^gPN|5GXkVX#&Cm~8PB{6=*Z@`_-WQ|WE=+~)7b3$=Kx}K#1TjRn{uQnJ+_|bDo z3e#Rih*5M(l}*8$B4IWMvvS`pRWr6-C?b5NlSF4F$fj}>E;cO=I9x9 z2N@dcPqcGRrP zMB)e?2q~3N44NnWDZR~y50Rjqo=qw#%!s5knnNOz9vDh>K%S0sDs@0zp`F3gb)0hm zeMPrZqD_ew)8~F!D8neZMJErWcCgXt=j&6FERiT^sEx8kZ7<7^NO&zIod8jxL+z+# zv>=zX^^tpA#yv#a-HE}P)JJnra&W3&O45!slAa!{W0ZuHqbt!}#Vj#eg(e~ViFP`- zf9r$I)Hi30EQwGCqII%%~> zf^Xjz9SN@o1C6$i=Y7)_o9E|Ai{`<3u&vb{&-X_roqlOxkvb^rPn2IP+s?Pq9cs&0 zt4vSYFG&BL;`Y3Cjy|u0ep7GDL!+GT_%A(N@GP!7s|IV<^WaFYRb0;5E9EqcNN=4G zy2fY`;2GMe;H=7lQ-jCEpLKOqHlzLf`~j<&lIStx-u1W-ZBb*qvIsqKAruoeYFdO` z)aoxM0mLFXP2=fr_!+BjDv3(x>_~{++Buw-*Glvhxkzr@xM?Uz~k-=V{u()FrOJP z_e2isT#W(ySucRfv&QWxD&QW&GNZYC!bU3>D^`o)%gHqyv1$i#uGnU=aPIh;{@%-^ zYd+t5?G((&zZ9=IgCjfE&N9$9(ix62cX??Omjd1_^)JX2z;g~Q9y9r7zmOR@;bM85*UMTV`G`Q0K^)RX_7|J19`Dn{G#TRtgUF|wZH`kJBcGPR@9*9T`onMKW$^~aro7k1;yIi^#x!$)%2sDsE zmr)DCT>~N9{Gr-6cbqbaa=m_`<61+_r48Uq~dO2?9Q$a0momK; zA|eyKT7a*7RHW6!k_L}4Zkb4Dp-4xJ?D^Kzb|E(R?IEMT5FU6^Pp6lAvY^7H;!p3y zomN}=mVVfK?VHtrADv>_PQW-g_S|6~41HJ6bxe))4?Awzax#F6H;4UX%wZ*e?q7c? zbEJd0n$DM!Rf4;mYt@_bv)l#^BS{LY; zDxR$G-$?qUHqI~t#DIel9uuNerCzasL?_YRD(4QT|A!~5@K`$&ktdhu-A|siFZK!W zm^q#{lNVEF7KRw3CV~e9H&;Sb8A7oLqgsN~KdpCjf%HR{_O%p(a*Pe0%2-;pv?ODg z4H~jiPa+u1cMF>N6Bro9C6?N4y(+IcMpyaT$j-rybu^l|@QkP3c)s$IxGe*D$KAhDFj}zjw$%C7LPdHiWz}1n zbGqv>~!Z*Yn;19nlJW~uz{1=%PqJYC~sVajf{)-MR zng%P9HrqtVjNS;JROaGBK$bSPt;gitH{J**1esnwthn6P$^^fu=m2ADEgGSM*L(S~ z7(UKL)bOdyScxiV0>qwdvdhs-`vIbXU`ZBjuQX_pX-srUrbYdQWFO2E$vrl&A{o_H z8ODvfDLNkS^0irpU1n_cEsDny^K6QBFMg(^MxPG4WGAY2!?93P4j0h;QE z?P5vViIJp}Ix^U73x3cKmTfgS4}(6uU4MGMx%*R;2vFjCIRN$zK0k54gq4$1r?$8n zAU^pv2u(it;z?Nmd^zx3?A}=fal)ihfKp7P5+#2c8X5w6uwi9sfDnEIBj2+XZZ)s~ zMlc{p2JGDJEd#thf6jEv>&E3c;0gR5Fz&3cQ>o61c)oxyXVpn!2lgi>Ck;7>0nyW{ zVROCHA9(G!)_-6&fA63@@7rC#weJo@_Fe|vLKkuWQ>mRM@r1SYBGBkolzMP2nSY>#a&h#I!A1FrV@U-{hQdj!1|C!= zZS{x5zNy+`HMI~eRJn6{5bTSr8R6Zwx>(#12O&65K4!odOk!F6AMJZ-3lpg^t*3i? zDAABJBN~lc4jTuj-D3v>kQkgD9|Hr0^O~D8Zs+9(4{!KL4stBvpu&MWb(VYO7je8Z z+#(n13dY;NaCA?FL8Vs})n%4!FZ!{%LJJE9PP3{Cs||M;m&@lIm?B5jRBf4$bfz*I zuZuir0F{dm?G8i+|E~=>$r2q2Orx1D`sRMe{NXA0*~pd4ziH7+5F`^IjgVYE<6o@f~shhGs(%T zt)=S7)GSE(T_|aJi05CHz|7o;n)+hIWPf#mE*Y6TIo(x$2~B-V%3c09p3rQNtJSA7 zIn!}7?-XdwEgBvNJgDpD4QZEu`)1@i z6G%?EL8cB;J!lA-{Yshh?Ko9dUvRkBdYqETHaViJy8@%E+Yr))W=NmcF6ExQ6=;mb zAf=E*NFGtGz^TBx9hg-AK=@)S$(4+gLvd9hYjysthyTq5&tyxufN11z>RO&Aj{zP> z8)iB#@bk)3jIwXs5~Iim@b@hxg-gd-ywqjiU&1ffKqNJ}(1SQXBr#Is-iamNwup5QwuvKUy^$#{@7$Xaw=y;^5vn_-)| zow!TEz<-M-y2Ad%@j0iPM<$$)l#zwo-pV|>2aWCLJ10mc4J~dnZKOg9Pr}iKZ5NF;mh?da0rTX&l^pThSui68gx5RAH@Iydo<`2}M%)*+?xT z6$+^St{kBjoh5n$Z|_XfZ#jfO{O@F`q85{2I^@Ek`Ux;mu>=P$clIA*t1eUMdM}p9%4vS)4Pr}`K$sj{2bxO z!@s1KU!i|znt2n-53dzY_;?ldCCHnh(Fha0>eAp-2f$Fo*_&e5#9<3nWcOBy-Vafk`cmy86i+Fs1;>bMu!S=y?&a5OK|)R$J-# zMy^bh{==Vq+5HCtmKFMojak}S?!6*34`3c z_rZdin@`^BTd)svpm`nWdZo4R6X+v->YpohTmo{~r1Y0}xxjP!9ni>8yV2Iu^ZdL3 z(&O)2P4c8NWsLw(vXBrNNb0cp%wJ3CDljo?nhyjk5gP~EZzi9QCt;P=oG0EH(@>Rt zT?6bBQ{%wKYc9;h50~Dx1r#m$8KBzxp`-5Bkzivo0T%7uib*(%b=mPf*=<&xYvCAU zL!-B@ADqPG(n5FQhy-ZBo_$Jq^b53X=oNU`{7E?>#NgBvq7&Z8soXIKoPUmWxwHA4 z=$DM-2%ss25Swrdu&?>0QE*V;$J8sCPmiw4{lYyC3jwoUhy}IS*D@f!$vFi1XnI=(C6Fv z#H;^{m9e2Vz)=KfGXbt!8R3Q%eR%i({~7Rsp^ULhGWZveunp|-=n$I z;^hz}J_BM<>2fa=BepHY{93=2EPn>>&?+k0#9?gwQOq599qK`UKQ|MgNTIHAe*cf*#@9h3`rJDk z$NEGW5oOTk91->2#&bQF1-yKbbr<&4-D}12oA>0;96sqER!2Irp1+-^LT4(cB?1SJ zKks(roWp;!#H}crV=GwA-=oeb|c{!K^hE%X_Q)OeAM| zHQdoRR$pntC866^2UJ|t3Pv7)^~k@_bm{HbYCOsyc$!5{UhvH2(2_}uA)3(EY@S$! z9#AHT%GHMHhD> z)Ks{^LP;PSaPD8ErNOIjM)?zt)<>&wo#e*&6U8z=RYjI-319RwM0U z4ZTOt5ii5sR{bZXfvJZPT^`D}o*4^A={C~J-RpA-!9S>Uc<9r^aI(o_^zC1E zpG<$BYCmQm8zNAbQgGx5(YE1xuqE4(DYDgK2ecG(q&sYAD&df$*(T!tLq8^1Dr)@m zscU(E*aoAJu-ZlkFk?@nt>8A7Oh>|nBNU<@bf<)Tq9Fg`z_wqlfnbARh%Y36KQm=4 zttUAj%)915gEd(;or8;%1edKKl#-5jL_}{b%ROpiN|!E)k53-T&_Aax{!;>HhKwVS zIRIsqD*jrCda&mZOMjCYzlJu8&S}iE_j_cU3P(oL=dYL8t~}p2tmCCd434=E>TBh5 z!ucWih)Tr{HtF%cI(E~UsM2HJu_UT!=*co@5e&(4;c_&1+;QvVI-k$@#YqXSG4jX_ zpB_VfiHqa;WEIF5Si|EYsEA~(nIiKE%j6Iv6?^^q7_1g7+8Hw$DlxblS`rQY6i68-@!4Y#T5+Seql}jE|C@mXU#9;g=>9 zl8L41RTN#y$dB4zO8Z7h2{0Fy$sXx zF8To&Y=AnLF^IzQyi^@z?j95mS{^&<^@?qKU3?M;XBQ`TQ`8Xh z_0aN_14~txEeeX(2JhHL6CnTh2^Mhl@dAQ{R0wHAYB&=*E3y{BPs5+>8sAbHXY5Os zb7raXr}J}X&4idw#~TQzaqD$(Rk5PrvExN_?EO!~8BrvUG72RNOi$Sc8p~?jThs%E z0~_T2xt2^V4K7h32uTP7M`MAIYnpybT? z*0Lp~yMR{Z6^EHig6|h({L|dwSdZ=h$;K@IlZ~(T@q{_(np{V>wrGK~k`EK(h%46> z-R2L=n$q|*ch6+U?=%eu8zu%zCLK#y?lTKd^+NH#DVorCZrEUBc zFg3ok+>)<<%=@kQ{#~*dDABr?(Xn;Q&`02c!?E>OqN~@hR3TOGte3r>vb4q;R{)7d zva^}W3~D1E4%G{DE%A|OO4-;lfW5R=T5hvH@?xpi4o$}T^*^Ww-)mEe_(*KrMEm?Q z4}S83X}iq%Sl|8&R!KkoA9Ig|sy)roi`cz`tuht()M+wk#TY zmlfuHeml{78sKfIFIDxnD;x5qtuiB&1)&;FQGKYzl7XW7qy{1NZW*hwE zKLkj**dvL2n0y}%G989Ma=2!Y1|ToEK^Do<7iw8s7uE3ny7pZ7e6v}AU@G4qguY0eC03~Y!D3;) zQ|-%^FK7{b4kGm*lpuV64c8bmjksAEt0qusamE}(f|M4bBt=H35Mu#i@+Wl@w6Npo z6*FDhez89r&;HYv9I#RF7B|_Df+QKARmrKv$my46;z!-DDl*qKJ`TA|AM*E*T_*cTpscxVoRl_PrAG zUB~DXW*Y{yJ{~OhjwzH<-Vr~zc*f26+Ce-R-il!AE5uJLTQ0Li9oJu_AHSJgN5G6N zrn@elFws+4w9}%H>n5673Bh(4#>Jnx0fWE!-fn?6T+hdo(AN?d1!IhnD%8IjM!qJn z8;(goKP5dxClV!vR6{0I=DkRrT%A-Mqm*VFcnZEc2U2&PLZ9+Udk76=GdUBXhfblQ zY$+#MRI>vSpAJ-s;=}?;LRL(;iu{M&*`sn%Zcy}71@F~qj@B9G$SoDY+|ds@xM`$e z5N&hQC}5Vxk6}=9O8;}Bl24?<%ZXmpC#8;&+@~bl@v>WsJ27Fe07A6BXmb~A~c%v-n&2dcf*?x6T z8gD>zcZt>7@4b~Kj?c~-O*rp>g<=5h3uwz9y}f~bD7$2y+k^4f3|-t^C(YG(vv2s| zbW0q`Vc!IB@#szpKqxlaPu?C@)%gIUJds*_7jTLF2EcO4B@HEFA^@Ck&y%2!i^0G$ z^2C>(e`)M%ZCQ$rQz_s{R+DPYbeV7*0Sb91w>itT?mYT9sUg}CPg zEC=i3{@jRbVSv?61Mv(e`E6@m9hH-iJl!ScHnG-HRU4p2 zQQaGW>7zzR;xAfevbT2BMe;f1k|=lX;v36kpDknc)%(2TxJ8VNq)E%ppV~tawp~dE z7nF6>jQmoF6fX5WD6l`&5l*l9X8QW10Zv+f=E-&S8<1oJjLQzIm0kBaQb=-dR=9H3 z_%^gr1W3yYwxbS7kG~>Ed#%K-9r+}`#GZqSf|YOiF)mW_{!w_W{*-Iuvn|Ja~!tL`((9sd3o7z zJ~#~2<=*xd&^sP45`+KT-rkOR>dJ6_FqboLt^2X~Bv(CK=H|V(+-M*0cbFN-*TF0R zK=ZucR_C*Jh`;E#W1t&wDp*=wwRU$ui|`D)7Hf~=KM5PHEh1|;ZFPf=b-ESI$eGB5 z#U7AM=SS61Jj=GRbnaab3omTf{n!fdJ34j8G#B_4+iNZprhN~*lP(JRLWt(At!URz z*4dZQ?CTGlU*p@N|EXHo0ly=jK&pXCEIEh3h?nS95kvQnB71~~y7x{M->fN-kDY&^1HCOE9 zd)+g$%OT7(dA7fgNkK7eRWv>ZjBa=yvthpmO8CtbH8WGksk+fnxeQHsE3W%3idjiiOG1g zeXiqnDORVu*Kqf&oPv|4l5#CW*VAs^I@H>(c1xUAc3Nua;y+ziejU}Ok<*rreVAuu zR#(-quOzx<3^%}N!824`_)SMHS7?wZ7pPeqx+hoeRGJZro*B)RZl8!bP7%=>Gx$k7 zbnMYfyo|3*f?cyy;p%xTwHJeJNz0pHn}ZKPR*%BPbdL89Z@~l}*t(HSBqBIvH5n`A z+j(4)?`ymW--|GYQ$;6v+9oZ;MobCZ;z>n_6eL$CD6gT)anX~|r;k+cFrbM_$-yJAQ2&aYNO-4j*LyHl70t9q&N*{nL>!)l;j4H$_*+C4%4<<6SI`? z5crq!q&eF1eSF{hQ|F{i-z(aRkB~fUu&2aHvRs|K{I`wnyHdeFR@dv3Uk1?o-U0+_ zbQ(kqgSa?9+AZNbEn|@)E)PC_Kh>H4RGd9L!CYFjUQe#ZyEh^}Z}3E<9BEqBJ}Mv* z`1J0ND^zIca1)83X-QOss$NSvU7K!2D$-gtzigK)8ls)f}qorhc(ytiUt>6X7 zN_9~F_;LAuCEMkMm<);ImNQ>fy^R-Y5Tq5F3A2%unRcj}O=lM`+0MS0vbIt{w-7x= zFVg7$`J2HjMT)vxcVD*>9Lpk$fS~K3@d{5GiBOjD3*iS`+-@SzN?goP?T7i8%r}IN z@3;uhSk2x5$@EC8T5ud*iv9tj_Ti6ar(qVe2=3RIth(HyA-^9t3oHNAS^_kc)OFoE z*e1Kces|1DKQ-y{EFe1q zAQehV%JbvpM&8BuuOHr76pLp%FfOeuZyf>?EZ}lH&G!k|X|mql=L|d;vMSQwHN8$JeoKYoqJ*Er+)oHiuqFq2(x$bmh?|hlJ znotT&*TKd=CvVZ*UaVl24@4V2PxW$odWyODmrHsgYsaGIzC8$HTpTh zZW_-HR2Uy_!lJh)S$31f8>G`!Z_1R}Un{=X)(2H{;)>cUF?e$Nk+4--g6BkHCKz%~ zyBq$uW9%74gsXk@X@bOu{A2Z{kckAs#H{?yyKBsqt(|w-=YB2_&pR_xyU=T^-T}V0 zj)IBgPVCFk-WP+5aXZETJ2Fwk5!wncutfa-#nm3+RxslppPs^|=EA~)Uz2-y>;i+# zpEavq+43#5Szu=qr`jb?H5XI$kHrhR7f_e62|vGF+%v5X1d`0{)})@bzv%x@W#;*J zhuqxjm@{m1rs3PcHRGuT~49CE!Q`i zuOsIA!;xBr`}^X9+X~n0ku2ZM?0D$)(f7b#ou@Zm4%!_*gFo#>be4~Iu|B_@d2Fc{ zqcRKMi-7)gSE7hO9nx-+gFq?<;~agGM>YkT&E;|iAuAIkX{el>djb(ZfMuHfFu}z8 zY~4u>3cM58pa+tFFozlh^54-&>2b>5Dh21=Z7K2-9#x*y9CX@yNVKDa(tKg8UyZN7 zZbp#XF(Y^MiGfFk`_ROdkd!n|eU`5>62j_wB0M}Akozzns`L~h4g%YWZhi=h;IN9L;>`X_G(=?VEFDv!MfG4zx<^m#7u}#@rxEh+3rKlPRn)GaV1n=HsK%}}& z*|#0vE3uLy>uZ3*9r(BjtSb=^8@pXbmsgZsP8jXpMtpSRbCt`n6^&^9XeZifcqE+t zFeSI`Ri9V`(Yc9E^Q?KMlg>@%ghRX>3Yj)D#!FIM%^kktTXP_bVwd*M@5x*=5X{-t zb>+b=rMQxwnhhx4Z5(m#-lw|sDnDVm7G6Lko5?nroa*={s^x54$9enijdwp$G911n z65c>bAy4&$1cI)HMN}`5iV`{x=S+Y6cw2k&fDXqVp z1K-W4-FM#Uhe9DfzYh@&or^1b?WauI0iY^MEj1VSNOM?WTLBBnT_}*s?rXu+?oSnb z&jX-TBK8Zu1ifa3T)1B9K4g<;Z97sP(%y%a$xd}xu@i{F(P?QI7fvrGFG`0!&3pI- zflPkxJu+S$mOb`i!fzQ8avsps_f5B}mF>e!q)%fkG92vfEN0;*G~|4z{hMG9ikT=0eA*l_0j8 zx6-(v+Hr4k_WvFWn>L^eSIN^yFG`Iy4l^Jk$3-G~hnE|Yl_U~^IegTaR*lj9!31z_5mrH8`>JWKZ;#>nm?=ALyAsfJZ$=p6bU;Ll1&0o@h7R^-X^L5=L zVBp*Vv?7td`c7c-ARt?J;3CvPpYO?F3kE)Z{HTX@2dv#4uz99ydvJ0+crdWm(QRpI zSz1~74Fr57UJn6=74>|Pt*yQ&STD#>B6TB>NdmrTx;T9!3TU?Hmy_+{%LPj-zjm$x z#WThvOM+sv(M_V+52T7SOjTjWSxpz&aRQ{$B!TX+pO3K3<4Ma zhnx{3rNUIHiL6}DJONH(cF8zR1M3EZT%jFv-K@zy7k76U@L5 zOUu(poXhN<1iX$QM}5g=Eo4+BBFg%do+AM$Tetx-iWR}sm*@qQzmg%edn%`X_qOs- zgPAeo-yO82m9MAPG^y7;dFC$N+ejuYj(w7+FE&C)wY%2#RANedm~?BceQ0hkwI0Yb zu}id%ghVFK=Dhc}J+IIg6p!2lXK{lDMUC==c_x2VldST8Q;5{?1`QU4tmR7Ds)z=f_8tJeiUYAwtk4y?g4ihB|3I7S#G9eWDM z%}_zuEFo?(V;V!*5M6k9qKlJ4*hbi80oNN%+1zt`txT4w;IYr(V6!rc-Tff|u-i{S z8P+)R-kW%}9`xY)@87=#D*zeaB%c;awEa?k%cvi-IQUAf&GWB60#A`B7U=0>*ZKuE_$n5Ia?8ZD8 z)qJ<>SlH^9=AG5Z)*lYjaQK8pC!FXnb&vY-z{7&Sg>e>`1 zM>n^8@zY|RwR;U}o^_L1*Y93T-_?bR1wUUFz=B&!Q_O=ujomv3fMddC4JN$%f582@ zw9GFH^YQoZDEkJ;)SJ9A{0>iX5s>y`$Kk^BmKhL{ zmfPlJUmA3Z+j2mD2W8jsf{C$w69eogBMK%UGDt|FNZ}*J1_#bNcoIlN$Q8Dv&VqK{ z!ZkC6Ydpne(m4ea=!d?`kcz1gVp*LEER0W}BO<#rGTI##ZfTtuqeb5AQYRGG9HB-- z8~((xmiRlpudYYK(UllifCeN+y+aKK|ALR%VpEDK+mmdo`(fXh0#r{7#(Z4Zqd45 zXi?UR3O+v&I>)^Rp2s)H))TM<=}*fs8@((_$jE&Jytkn+-^(KgB8 zePw5m^`zS+u&bDgXdn?kbEc0g>xN1JQ>sfvY3!&|q4Vi)RM{6_+n zgC`em6ro1mj>_Q``Qh-1;-Q9I4 zA)X70FDC1c@)Np}o|+_A$vxerFX2esbgD8GvN_++EXbW7VzlAB5ict(zX%$2n>OHr zBW84%Zc`9S>f%n#DyKqDNNrbH8bezlAv0IJ*-a6}ulZU@Urpd&BLBwJ5e_8|71r!* z;ni)UC!#eZQkwe{j{XvM(zX1VwD^7L<}iapO@UTRD_a=Iuxc77C<>^zLK&f zFN4e0xNz{^^3CJQ|Hsr{Mn(0%-{Y`!4&5Q$-Hmj2cXy{W5<^Otba#s&-65R<(xr4t zr}vrn=lfsx@4*?COJIPRb*|TrYwx%!A|H$_Cj*r;Z|r|sW@ah37|@x&VQM!^TGpAe zg%Z^Lt`MFuY_%)QfB3yB8*Nk^#e_JB1mQ(m>DVte{k(54OkXsMJ`Uk1-;2xno-U6@ zNQkUrC~j3)qE&zQ)oZVXbBJm?h2w~!qi+z)UNDT#h9#4p2wGXwZY^IwCv(b76n~Ya zV&&uRB9#*n}Q4^vHuoi&#A=rPU#-;jAUSB5Q? z{V4+3*-spmaGm7OUrB135$@H%-}F#~SnhpAod5M>>qvT}=Sx~+nnYB(SH{&>Fq|YM zq3(e0-@PYLRr?4_WroEe@yCH;mlI({0{;9DWwMHo+`I*9Twjwk+w$n91uIUtt;mN?GI z2<_@#jV$joZUQlP*;wszQlBmydN}2nU$=70;#=Yha$D$U-}@6ufq{6^T$h?U7cmfX z`#X3njAliM$U7S^TOUSQk|V0D@Uv7Zm$RsOLJFc)S{Rl^<>VL^Vm#*ecv^`X45?#& zoI$EtBJQwe?MfOS1=F-tQMc$OH}ho@?-K6{3eBuPC$CPFX=-FebKMSz7?As6fLoKJw|WL`zdWSPLq~u z8pXUMFceU&a^q5Ao5Bsmrw~^Vs~oo7V3);^!YU=%^yd5(-o9>=0_3b*>6w%R_ID~N z>|7RVw%I0fFD_(7vzTvUR^mSs#7J8`G53*O6nIDV<@6b(8B473JTU#M;?m>Ab0TFf`mc4w<2u z!;>P%#6GAX2X7NBmyYmMl-sufu}u(U=?CojKR^l?7PHTRzMc`t1#o5KkdJ^Z30T5- z*Cu4Ve6`&V12O*qVT{0nDKG(n&f1oA+Atu?zFwv7AM0{^57N@|B_M6uuVp|2?tMPp z`}{BF2Ld~RRYc+7mrO_;l*XpBrz! z4YT<^1Nqa>ulIgH4t1OV9{d&D$)9r-`50B8ifZQHi>)YG!>2c{nVzTE)RGbhG<5y^ zTr~x8sb4@yqN{5nu+-=QpdA1J>ljdEmlru3TEqbZs{w|~@C-(2%HnI`#dCTF5N)a1 z?-jDKVeGQeF$_uBdYt(*JrjJ#wHbJ=3~6b&yW<5gC-B=gc6SHaOx7osRBgfAnuma# zgU1z2cG!z*pYtUu&1KsP%IbFW;0SsVe>mxEou9`M!B^jfh?j_s z|7M+I37BsxG4;Bc$p2}p!0Ex7#%)L1sI3wAcZ>0Mjoa>3G-kKCXtr_=mMRt#o+Zi~ zPYn*?QhY&7%>p~dnR-YJ(O_A|ZM`P)^SZ7w%tmpE@!fAxbq!o1_L`$yM|*d(ClP^R z{p0F_u90>U{EF`h_Q-5;97tU{;o7DWq&|E1S86RNU-P$qo&}30_997A+Ro?yn%VUz zz@`1ufpg+#f@dyw($r*(2U>b{kf;dyy64JUwtUwqSloV~q?LcHLB zI}G4t*YnQKoPla*5X1#LJw5G7NXGYT`*{HJz6(b{mpjthxqiC#IqsRf;C%XzW975R z6ZP&Cn#mX0OH~yGUQ^MBZ5Rk%RQO<7P7G&h3N)l$c)K)i%4#Z2G;MC-?=*QWU2*RG z2W~;ECl}t)TIr{nb&@9r(X4}?{{oWiC;Z7cV1o?OsS2#1>fE1APon|HArmC*p330= zaoAl%ZVhpodn$QU>n`BjK1nHzeI;blUtBmHfBc!D3=IU_k}3D zYq)ZNPr%l*$8vLMFB_@4$}`}7CvFrnZgejBI7MQd*JE=15sM~Q%_8yBwXyVbdc8i~c(iktbFNE&&blJ2EKMuo1w8d( zFPV8`|L$0reF-*@Y{#I4El$*_!hs=)XZ$pAtoTRU0v5Eu=>eF?Z)tsBCk2ut(MI#- z-yfCHU~+iIN{U>x)95a#k1WC3@xHeHL^8%ko^bM{pBzkfU);||nGd_7Fp^}DzQMcVOePzt zjloU~8AWpgS-*C2&6Pg`7!|CimzI#?vBOJ_+XSUa(G$4CxXI%+j7ISeB}*4Z!}0kjNIhAgUWgC`W7A3imI|ZLpAyxY7!#dqP!}( z;tR;)xMLEjU_W$aUIop=HKqJpuWzu*{fjZXXTbb<3R@n&14he3{JVZqgI&R5U7da6mi19Lu0TL)lN2Y z4}S3!Zq;)eXHJ4HRXbVwAx9if5vIA zp+8jR61#ExSH3#^Z(jB9uxug}-ctG&--i~EMxYO1Q{kqe(M3RE)$@gu(wKTkpfu`I z>YH-rDY<$ti#B>s5lG8td5iF!z^$ z@&VW)8g~e5SL9qoXDbsX^&qK$p7Peu0+}+oIIG7>kWbJvB@}m9VMvJs#V#_F+)G05 z-B`_bHW5)C#_@xvcPxLmAc_Dk9(!;uMiqh#Bo6@?A<*m&HAIBFvXImLdl7%H$U6XM z0iux3Q3M8rSRGGis%hE&PsvOnJOZ`>0boToY_OnLQFS+!h6TVd03JZL*@2jPK*t17 zML~&3&_fa(`uo~llgAJLqLzVL2uS+!Dv-Vm31S8y=Tn~0I%GGs)x~}9&e>*dO&ItJ zh(tLSjv(CQlPGH%*uMwZg&i#Ng#+b|1GLgtoJImg5AGT>_&Qnv#&>hm1WBwCEiBIz zrVK|zff)}4?=u|WH3GUK(gPekrc*B(I!5@BQY|yJdx6~hthL_hWVFS{ug^#`p$_A^C~V_?po?9!G^G^6bvk_w6f;)C1+{eeTl90Oy0fpAZ7 z?B`LcGl9fJGM4|>$-{(juNfK|9v;3Z|A#{cIJE#N`a<7A{%ZEwm|KLC2~NfV@yq91 zy)(cO2#Ae zNA;4OOE9E5vs5XarQjb{v;QW~TY}lc7(?Unx<@ zIZJ0|g$MPBj(H<D4zbVgUKOkJz_m>S#!zV!Mq6C&@9-gXgfbp3TO%u`g&+<52dy;!`Ms1K z4|~rCMj^h+Wx6J{DD#h_VnHV8Bj8;F!qXa;cS;u4OL#DZ+~77C_(Ar1`$qrZhI#_K zei2Kn*n5Joc5*ySM?{+9{pee07V3MjI3aTGO1%NE+p-*Q(^h2Q)cOjIZ6 z!fzI5hUu;O2|o}Nmmx>Ng|n|m9k-aI`m1$25GO1%+rcY`Q5QMDsjv4B0g6|5l0s4I zc>V#9fFd-s3OTN4lgIU6sc>jk#UW)cC`&<{gf>-qm>(L15tff=&gIIS_*e?&_uiH({{sAmzIP9 zd5cgH*d+3E)M7`67=?}R08gR>K`u2w{@NG^ukk=0P;j|<%WL1_!MgsBF$rU>)+#k>o z0UzKVQ05Cvzgt&P*dZr=q@Y=xBv0TOfXurYI~<62=M3sybS`pH+$!|Q>koh;t!+kz z$Z^Xi0cQalOb-lZfow_}x2>Y2y-2LVzaKqQ$@X%Y7KUT;LQ$`=e)N<249bRzGNl=f z8uxNmogCqlZn)zT=)Gq<#8=cX&AP4OU|x15k*(TVyR|IqS!0rivv`ND5|bcXB4t-R z%N@%tp(f6aQM_GT2-WP633%B zwRJ(anj9cceUgVr>ks&kY3#RudG6%`^o?-tC|7Gj{$J=bPI2&G?gZI~@-)VZ`f3RH z0c5QG>qmn)K!w|WmYL4#efh%%$;bspO5tX!YtJJsWE}<)Xutt{MpS6eMf<0JqChIZ z?_AxRacZqESmk%#_twE*=9bSi??-QhKzto+pShmygJybP*LssRcKLcAK^#2>6L9|% z(B9tOj!QkXA^R9xoqc`F{l6-lo}_;W4lz@_=pfp$PxXdF)+YCiN~De{SRy8NT84ZQgx3DZN; zl3w~(d$Yq%j`d5vVSYSTJKAk06V4Z<3HH3=x>a12J@gO3tVYt-GPOrPn@2)gN}0Z( z+bi4Ed>41al=8Bkq&fzjn>w@UV!mT#_DY@u)y?3>jVl5C(U0)jq#bY z-=@Nx(N-wFjN`PE|K)aZCucEs_s32Mn(mp@H<09|w>ib;PZNvUJ+}gHLd)Ywl8ad< z!FL5;@Q2W|?lilHtQLuKn{>x6b1?t>UUTJa=Lo}Bi&JVPhjbC3_c1LMb^@AVk3oGN zX`Xa<1GD|4F|E-&-Z7l)!{qjFxg)_c92)O*_ki=lIgYq2@lLQl+oV($eu*)16ZmI; zu^4_F2{Jeio`N^hE-2C-+E_3j7P~B3gIZ1Ml3Wg_88iCUZ%s9V_)GYDT|gQ;e5nv#pz)`!=f~ssCx0mr(rb8FwAkV zB%yqz-WLEVJ%$2nhR}?dbI?F`Ps)JD=mdy~MVgZa4>?RxJQmxnINVTjO?EY6{@u3` zyU&^fFW=yL(Id|ICm(5BcME+!aP&9@b%*f0UD;qH<_h z_Dv2=1j~Q*P!3hkE=y6-5yYX(8-PjEhU&~DMNF^&D4}6EKT#ya%VM1(jOfdHBADr| zby{fkP{I1-LxfyqoE-~=tK>yyQyf8S=6_0w47YW%9s(#ylOtCXc4M>DLa^fAo9mk$ zNn9jPTjnvr0@D4toExP=79mm5;=6gQ7Oi+RXnpb;3`2^^2zK3F9r89Tc5d7*`Ls;8 z3kD0EGz%6JXeeu$&kBnfn9(hykpXk-H8SYfO#&;NRX0}n%zS!iz7B36JB4U(SkAhS zL%&KW2D|zu>950(z?4q6P*b6PRYW-NR*%3txWfRqb&mg5?X+M8B$fV$aMU*G~*#MS%avb&Sr z;=d6XI?$Nx!v8ja$QuaB6ZrOc09i?dd<1|{?cEa89oYw)e^-`oy`P?2anJk!CXoq* z2Du>uf}L$Zz_a0+D*-yZKcHbjNW-IriW(T$0w9df_pD|4VLKch=%NiCZV0|*0&?sm zUDn&KN>+${wRdU?80xUVPv?0F$F4V0H4UX#2dIy^L5;Z+u1`v&Jwnynef0EZdeeGi zzSC)Gqo3=M;hBZzE&Lo*GJdDhSkIYPy+EqXQb~Te|9c**JLU7eO}nAFzaxibJ7^BaKOp=n=Y+mvvq}E{wh+A|#|2Qph~^PQbMA<8g|1 zp7+uD@p+e7+0GRPm`_$sYwtaCcHhB4wy&snAwwFh32Bq1zIn15ZU<{oJCzR-hwS9w zgt~nub!5?;EAmEl(sO89r+k)dFcBhApex@I{H@MSxl3cZMzDwljR`+{rd$wdZ?|nnf8b}0v%Io|K^Uzb7`}V=LR}$A~-E*oz8Nt-Iov*N5OIX=}Feg1s`F;ODh14TT-k zf48Rc_PXRJQ^SB~eLn)zf>jc_Im?4dEscrv+?x0 z2vI=-4c1H6%=g8T*TNS0Op1gH_7;axC9blmwTziV%gg@hw$sF9^6iBSoC0{d@dIAr z2RuxVFUix%WrcYq^75;4G4lFmI=qcc?=x4Um-f`qY&ax3C1|keI-O*>uFW_>TWBPi zk(O(s`=c&2#psP&-<2%;cuWqGH!K)6t!k1epeZRrIIthq=SU~z3u0}UQn|>UdxIB^ zew#|iw09^BCl(4KRoF?eSmq<4ys@vC!(%bVMw{^>H7R*d(Ii|X=ns<0Aj41*Z>UwP zXMd&itQs%VinNJSZX41Y3k%KPm++)tox|d`5a9*Ws-(hZW2^{F9PkL0Wr3|rTs3F6 zStgBzcFiAyECi0TqiB@5>~`cszV`9&4!+9)dGpNbP{jd%w!K)+e5ygXo|Ww~GXzzb z4@7N9HJ;YrpE?L@kD#0JqFTHFGXw_u=dO*QYOkkNulrtsZ+E^ z&4&nSW69WfS(B@eW;*CuUWiF-=I+ljRuU@m)OzAg$ql>XD=2EwfwLS3Y0IEgB^2Fv z*pYVZ+B+~p5jRO%i84H_r9!~TPLe60DpSi%HGiDh4lxIrm&~7W^~3EYEhDcpa6s&` zJp+!vl7zs3QPR>}``2cvgRBU;l*q#8%!qlEmd{t%RaQ*QU4<<>`Ete7XL1bSB@WOS z5HPW)&*2>>lPWVMvjAR`IO;0b%?|N!G9~ig)R-jU_ao zh?iiLU`$G#JCHdeYN@fp)xZ<je{Z&UW%#gxij|kJvPZ}$ynvpOapmBn6 zfdS^$znxKb|16^Di=tM_1&%ck2MrOeGJ^?{^?Vd7Xoo#0**J}-t&Ze|ghs1X6yr14 z8aEp`w|n~6LRz8hCETrHc@L?h8!`p7ErPVns0Pep;%Q_wm0;!FA5|S@bCm`J#XE;6 zy{+^$D&q{)>*P3vdFCZXQOXSI{j$__e}V`>WGF1?+%-5H)l9?wUE+Asheyq_9ThVBBlv*mY?=V*eoNo@(FmES7>w=#e!+YGo63BJzRa_U9{>ZO5P zwnv~|{qgCf;c-95wXLhms>Q%qlOFQSK#r&oFXW59D0b9Sl>z29uMKAK%i#9=aYuZ7 ze8Y==py=Gl#H4A(sosz1*B_+ajemf`1Z;9a^*#)qkubsh zHi`b44i`QM!LRxbhK{XIX!R`^t%ancAAA2%p5hB#&MTbzI(;@t_y5mydIY!g@_S|Z|xaoon68G$bz z3P^qwKXwrNek2fZf6o{kq~n@BEimJQ9niKpeSPiYInmYF{RQXU%$IXA*Us9Cu-Ct2 zvE9pt{9RrG1E9T@nTcRJ^878I9r50&Eqluku53?6>noUBTYD$g!cfb^o%!#w9#+O; zZ?5AX?`EX_2_p@>Vb*K#CG}qMufL-4Z->AD4?Izwt`S?k=+#&5`xS^jTL$>RJhCN` zC1)lAb~4-mQzG8yVbza;z^8f-1L1!_!^m%-IkR@zaXk-0=m9zpB-DOvY%Ed6?JZ1CAgonm}DWkSham(KK`+_!)CBq2?9< zZ!%QiO#=}R&&GKEn!5shuMOyvao@gi^*kLj0fwM15m!|e72kia_rOeg4-mqi=6;Mo zevuHE%|QN`TE$VIPlZ`a{$@tWO9a{iKV(Sg@sF)@-%-<@lKPIj>B=>`h9_M<8t2en{~3R=fR&%a)_{YamA`ZF@6Wgl)CbCV9Dsa z5a-SzdetAem+~p#Z8`%q^PdC+!=r6VI(_npF^qIt0>yqK8ysnZQ_ho78aL7EDhy3S z%wSqRk4Xa+0=;bl;*<7maK#z#V%+ic2)3%e2Se6eoJC{#frba7a;F)hqdY2gwbUn~ zM%-&3jQBUHElV>JB=emO9wu_=YAgjP%2c%)syb(uaDT^A3pGjoK1WGbgf^1P#Sqse zw|1FnzD6Q5#3J$$T-1XH-%w=*nBcef2rv9*@HN*%{?7CYct|IA$i^Q9Ii8?DLp`-5 z@x^M<{=qNe*Fq44sq@XQ|D=zR-uMLAe))hmG?%LJjE|J<&ol6Z{dN~SnzJ~D0Dt{2Vm@5i_d zI6)!}PK^Y4-mt_>BxLi^qrWPzY=f-X+9-m z6n~M)udj^W$xGHKNsZM|=Yk!SHN*IYD2gaR{Mr;@8(u%FL9cVmUxc6Jrs{645Lrnz zhD7lXZSH+D4Av^x#nLLyNo6^g@du7T`LVoFP3XR3^)2^|7Cs}4GO;#s3Q<1EMEX1^ z4#oDAI0Nfg<-5#s+&b16mPFKQDf=wvZrFDY%mv5B)z_8_3vgN;@*kQ>G-d-Jr6|?f<-XX%<_CtbHY+#onuL-GECA(a; z2Qxigd%r?%OB{1*VLpBZx~yrT?2pl2)T+Lo*#hUz8bp`;2-)g}McHa1=H4-m16D}6 zyP(oK`Cp|#EpBirGx6Cky9$tz-25Ca)!uJpLFCJYOKvx84FzDo+W1;a-|^4izbU5w zo|c9E{(=D%6iKI=e|b1Z1#uDr)XX4|feA4z>&OK|)|_SlzXRa=Ta5%*Hz&}-Wzt6*%!CE4! z!*L?9+mqXHqW|(G?#mZc{3}Bp$8j4;Qut#VNn&^d9;d(T7$)$r%7kZvONOSl!M}laB;~a`pv+@4tRR zE&u&FYWp~7%u^fgw>5%g>&6L~GlC5J?M*2Ce>e7E&R@840fg(X2y;EiZ#vGwWFI5W zd(aysV8mM+C_Vu%KQrFvg3=bFd!~rc6}Tc`hj#z>Z}3{Ht7!m(7tk2~axu8k-qrOR zvZ?{Fh7%Kt!8zu({JVov^395+epCF^Af!oIWr zxbUs-97yl^bomAhyRXl&8{I(d^5fEH`DJgzh)}*c4AV?Ypdqofnfl@)@f)YYbgvZa8%3pc5l$+|vx zE=*$J+qXL&yeZ>WkRT`V$;3erbD!6G-|PaCGrBcbX|1Z?<4)pD$Gl}HuN{nPt}?xq zg!95$vgx1;=!(Ma6M15zh^kVq{tmVSatnd008SGHl04E0ap z$}J>IHfm?-tvu#^`KENGHXF@be1@c1P+u-z+yKo{p=3#I4i`Y1k3l2ulTIo?$zd~B zZ~9zGrYI>ULH{9v4p?38(zJA!Likp5RdD#d+wF&XAYKB37{?44S`7J3IAXTqcCACE z{OeGrQbq}ILgyc$bysSglyTv56J$W<2U(kmr^m??`01TsehHp5$lxX#j!0*2OQRv? zv>Bm~NoQ;Ook<&sgoCM*O*q@g8XpB`KbC0}Qy5o`qfIoxY=>c+6GFec&`uLey+YV< zWR$gvNvn;BKQM?eo6D_3AX|%P5aUY(Pk^GsREIM^Ow?{B8 zXR_)F*@2D(@g}&vU1w#%Y`V2c62>=iqzKBBD$7K?MkSZ_SZc&;gWSQ-2h0jIwrx!DTzkVy>3HHqQl~>WB!bFC`@X1hnhq%9i zt5h`NO`8U<@mq2IOK}8u-denqN8f%oY7^|YfLX`nYs7k^7AdQz8 zQ~L$nUwt(epxPOJ(hQ=?a5}AcX1pJ~v*kA|%q{oyd15+p4EshYRL54{f)UJ^?3eJ) z;P0W|J?00-#*PGSk+|cZC1vSB(JHsX`;zMGyB!YggPQ2=vfNbhY6EuhJ-q(*)biBE zYPXry6*yoEQYFLpRSBsaENys=VV!eWTF%WbLm42TPaXjWC@yJBQg+u}ryiLl<&oQ` z4tJ9HdkJf zh%b?fgByiBlUSgl=;Go^8w_@LC6H#tE{;z&){FfBi}`sb@nR8%P=yM1Ws;VBb`rH- z)7YAL84ZPP*@VJ5xE_C~Pbg7DxjHkU&Chr4QmQ3xjv~cA#r;f-Q{ptLG7@_eGh1k% zVJRfsaL^iSx;8{EH<2}3h|yie+kU7cbV-1j#E_tE9=Bv+WS%HU(iQedL!bSuwQ;F@@LpxIDff(yUtkz)a+jB%-ejM-O*EO zWTM8#Mvsr5wiw$)Sh*)g%-d{^8WF0`zV!|hOT^g0%jE_-L5=)eXsXcPmsqV|3KMr- zsDoA0P?$;KSZ)Iv7b-r;tcm_45#gzXf%yyAHcV+=zCGx;pe>@f4`)|?N`05m3$4@h zYy7zOqu1guMbjO|puwSOU-7#M%(GW%ZP&i_Y0nzTd{fSuxtDj7-mT6`XT8nJS<5!m z_a~xJ8{#V!D@T=EX`5qfbsvZCwa;55XM~|WPiZ%Es~)>!u8(HE2TtdZ2OJ5w60fiY zBJKMJFu%~dsS*7jR4RKYLigvIjN3p@Vv}?{OSjvQ^nH6i+^(Lw7@Rzq*gl^G2qmA> z_U;Qu?64p0XT9($s;YhfrMs!O)AJ9>WksVyOgRC}B(u|VQnJ=)hHL>|J6>wpN@ z4=4uxa0!5I*SeI_v^GG$LVqv34fx&zS^#)+3pu+y0+^}(&#Nlj>;I8_F8=?9UtON0 zz4!lWj5D3tn&_r!_ySxnfWQnD|4`hNGp!cz9SJmFPjU%(ccSc)w3L)x}@fe5L7o2gnAYhRFYWT3d-EnzDm*6JneFBYNLj~n z6@O$)4bu0C#eR&wVlRxHm$D0_u z*z=7f>MI0Hi0$dw^&f7`#g-jAZfQ_q>Ts->ORrIGJmTw?9{6`pc^!dkhvmGc`Uf2><7?wn889`n<;$$87-GH<#OL;rlST91pBENN-N$h=6uLLh> z#!77H(IK8;4x`-S9sR+dm_&@}DtWnSx!^PjHDhHp{89!9#CZLzrPeU-FgoecX!;^y zE8lp>Xw`9n`h)Z)_= zH38TqZGE|G?fnA*NMF8uai98VO8^h}^@bqF(jT|xHqn{4y~QPPAqFn2EWey zGZq7p^@)!5JL82D`@%6lk^=exf7UNk86OQ`t}%eiS-h@^r7-8i6w z=6y2?5kOr`8bcBD93*Z7giu6EH3eBmygj|0B$y6gTixbaRBaP z?gMxrY}r3&BO@axmX=e4e{TSD`d>GpsObN6B{E2TM#J*EF1dG~fQA3`v-$4I>EJ-V z*Z;q+^a%*Dd}#gj%m;zA5Mcbr71^dn3Nm-B2OzRi@u%45G8i=9JaZf1i7vPPy>3A; zxdt-j4$mnc{SeXr=LG$z4R9h3*#7VZs0W#$p8Gin=WR1;otCvqoIk(xePIOf}y( zuH9$OFu3k|x_quWBa}egq@=K-DW;Y`WL~8R1Na|s-FXbzJD+TxY@TX9cz9kXg4(%B z2HILXCq%^^n`tiie1{#4j5Z-S^DYAPKHK0)G@nzz8Y0J0b#x3ZGdX?hUvji+DXwQT$KHev zB&qrW{HuSN*ny#uuyHD~V`Y0#j#!r7v9z`bb;r7J34>g4TNX zD=w}qGQ|7iYVxcLjmP+Q%sf_|O&3d&f#vPE*AxeWr&1g(pyz0oRH|vyeZGq*BdRuk z>{%X?(D0pPrXCRTki4Mf zD^9-BYIvp$dl$#Ye5JS6asaV$9O+Q%BcA*@ZPUs|BWKF4C-jb|7U!m@lJpY)``+gV zw|Uc$)R>7{gB24s8#S^C3U2~XfIxbsF-Kd;C*tcGUIJ zbGr$<)Uk;$%#^Ic_X=O^HF}izv*bC%QA>)7Z{}xVS%gYxmBKrc-E}X)qF04Lld zATdAQfBNUMbixCikyrdFA6~9!9w8TC^POZzT??|TONa1z5IqXAa@=!q^qdbdT0sJa z*4+M=Ss{^h&m-*^f9u~Jv@DvicNg?r)3>2trvr&fy1w>=1S0@$bq`BzbsgkNVuO2p41QtsQ4 zyM(&Znx{0)mw{K;x1{^FnRNqVr*K^=$S`{$i8&0l4ZGIJ-hGO&OfWj-_aqD7l(8sk z8b_zq*+|&1Wq%-f77>fpS}fY7azyWkbQMTEEpVjcPCii@9bdxfN1R!(L0xVw!~-Cf;CLIhVA8jyfxi2+ z3;YR{5l`!Yqo6=j0go0Fldc34Q0EQbu<261?D+`a_;!XMo;Fjw$yfS-fpTD8R7-3168X z?noh;4g#>B$9c&Jxj>86NHWh#H*by!2JM&_JFee{=JYQ128-PqkJRH z3zs^AzIZL<-+D&`bfI8nZ5av{g9b4D)CZqhwz&4%&RJYZk-$Xd;M;$xd2=-qZb|Dx2)wSS8d zMCD7<)_H6_WXZZeQt|zc{b1@q=4CnzBj5(DC$ASn#v7Xis^D)0qoGRu6EDj4fi`C_u1p{QZY4njSl;wd^< z`?iw>X1ZfJR~QPOb~rvPta|&bqq>478#NjYuFS=Fi37VMZ;?&w%z>f9YW6LZqZT~w z08;}(rP5)F+e_yQmH!&z4i=O^65oE*15~0yi(C@p`7xpfTtn9Z|5J92rF4tt1@kjroiF`6VRG<0!r5SIvUs`rpBl48l)M9t;(5xLLBxm+TYfGthNMD)RgiYdGkb_n=^<8XdV9*so4B zM(hBi14k@4-eKE4jVd`(YPqA(Fm6H7=vHXXIco*yk9HJR8pHbMGCC<^r$qu=pOY5J zGG_jezP}Zp6OMt{h)uE|>W5|oyr^IrER~*nEkPY`DR(VT2Q@QZOo{bAJzavPMIGT6 zl4)G#{eH(0#lf1n5j6{U9%@_-)64o4;bbsZ+@kHQ_rx|Nz z6Q|4Ef)*Lm71*fsWmDoRiHect;Kq?0=t}*u1RcEN7Znxa1r~_5l+U*9x7j3?y}gWm zrkL(*zU@HHp8%`&2Vl?EAf2-ijTr=_pz{F0XBqV`e)IYWKvw9* zlWAF()DJix93Fa z*Nuw$z;Ct}g1)2Q7X^wVrYEK!34yw!+pRbxjZgakFq^;g#YK_ z$Nd5uBV_8ll;0?4+HV44-P;b?Td3*V-!HDYJi+%EaEHMT(MpUXe z%|Kc9XpUL|O+fHlgH;s(laGN>6M`JhrEz+yh|nceg<_BdF^Jd!akK?0`I0*+gUSmRSNsQbIeR(Ah`M ziB)_rMb)<7P-mnSO_`F8+n861+gW9{dY&+RWr%=-O1+B1!Z4S{KTLrnMzIW+ul5C7H6I*e&wn5nY0)j$HPlPoB^WhLG}{N}EBH%zi7qltk60V{fI8GPyqtDz>nl~qk_O8O|{E_DgECs>Ts zlT(Is%FtmWVAEAHVU9(l|M+L3joQy!>a5Hgk87wFc24a=X>fd=vet}G@K=iq%9i~Y zf<2E|n1f!l!1oUJ0`}R73to09Er6JZ8i6sw$>JoJ9v7D~EYqkaY8rCK^5Y-%HKrB? z=ZtOZmZw}b#xE(m&HDJgj}rjr7l7+u1nN@UkNPjnBI@NIhjL|lzb zDv3!P97+P!Pty)H;Uy4#tFjtk}|NnyWA4yC^xp(zY1bwVDym$PL!;B`O*v zJSH6N+7)O-N|U^;y>Ck(+ntyPr4L?8rES)RDVk9SS}_f@>}6hl+Ue#KJtukm!)~nC z5O%;A=hVA)r2Ay36XlI>SiJTKpmmYQpYwMF_f9OF9r~Yq*Q)9M&C8+bc6N{D4GD;Q zy60Q()5hW4JxNl(>t(TN|2MgGd7}C!cevGe{`9vz-|4$N_mu5{w#w-IwTHhYE;Y?W z{b}nYUmUa_2?E1~m@%NW9oV->SFi%yG=Liru~DN+wk#oN)#G5_;DK{20?)AKg!!u=ha-O5|VuLrL?%l&u3 zU+U3)$Gq|V@$V5=2UoEd zw>yu2h`#nR2TAs2=iQ=od+`3RK@xNUMCL=qUV3)*=N)hvwNcT^S~?W|IlVVy*zEAr zb3gZ<+Qa-;Ae^E-WUx<$+I}B#`CAYPf389764C1^W#g>cX;)+HF_NU4aYmI^ zi5Kxz8fz9T7&azsH-@B>)G}2H=zd_k*w*Srk<2sWE20ZG{=>VHHWq5K_4qEoteZwl zt}Dbc$gZh{hi|rqE5h3L{##7L%jsypz5KGNv`Y$`+^)FX;45gu{H z@M)UZ;$LUlwmv0$k-PY5I}CKm;2+#d>PrY&e)L}8uh@v`k-nu4UlB9z1nAu-@wu4< ztyXwe{BbXo^)zcD*X|&=-ernkl(ArA5>X`lKA#b7Qj@$J>xnG*N*t;dDi2E;1J}Y} z9>YZ|P-I>4B|{j$)QK=IeR^Muh1r+hqkNXN10^kvHG&mRM_0R3b~l5b$)ii&G9Ij+ z%;!S_rZ-6pp|toDZ{fm5T(sc{mp4G3B5$V=!}H#PE#^xk76FC4qv4Jf$QNA6#7Tzc zLS8wq{f8mRH90K9Jf3*|*Or{Co$2{AkCjX}>E#-#4{vgQLdhkjIBXh9Z|*p=QF+N< zbR|9RQI>yv+iE}HRmhSD%HE+POeHp_h!!8qf0RsgLNEMQNK?7>t3r!K1jO1>$MuPz zPzFO*BpI#K(_7g)Y>B)c!4_O?%L}M+q{y2_YNsZ-bN1ETna%kKvPdyIQ0rLWUp=Li zNNJcBBCNm8VO+y(;XbjfM94}{FtfZb=7c1ieF{zgpy$atCYZ5~-L*Qhf65-q$*fZrMiwq`xq(gdBHN zepB^$U>J2bHuebqXcL1Th|`)0cL_(8kes62b13JYkqJ0 zV-o#$+@8RL-wxLQD5>rj^)tWb3LELHtu9VZ+7q+C#V$sOCvHTBM{bEDj4pqs+QfK) z%S%c_!v4{Bh6)abhebp|jQ`YTlVMio;9FH)Rn6Pnbn%v!=l9gV-h(sODRD6|x~eL_ zs&3xqlXN@h+nh(Ued~S7<*DhJPc$E@d1;K+u_7^*<2!+aVv-wqoXkOvB<~j^m_-w(R8mK)+17JRId#` z4TyInB=&*Hv8LGskRtigugr84_f(t7}dTfrvj{lSDNE_&F1-_ zz8gbBLndlV;a55HK!Wy6-z$y!7d7vTr*Io}6W6Zr(_-B`Y)jgRl}4%3XV^ZMSt*9&I_)QFX~TM9a67Fi2#<7!-Jd()-H@?hv3h+U;K%?YJtcVB9Nd{kpDqTRiVqA zwcHk)50Cp$_+@%ls1oNRp8f|K2cBVceUT%z@q|=#edMr4L&CI5vjGG%5y|GW*Sz_!~STV9-y=CAVQ6($^4C9^Bp|Szuk$-TN(Mi7JeUN z@*vtrZ*CgSO%hLJh(FOO0Rur@9xR_uF~d(PY7F8jO6W{p11D>G(z7nbMLKy@c!K}A z8X&rG#fQt=86w-;@~Bqmw&X)b&jmEYx@|rhzE>{d(3O!?)U&bt{M$PD zs-z@?i5^5k!8RLb&!|Uz6B8{>s0H(Z02OX2*Id^a{ls32LdG?X zQX|66ts{m%3!l{x`|$xjHvC734&C{tpDb>zm+ZR*jC7dBcjJ-el;7X>@NL3qf)FqY zvTz!Y4yiQ^St!Rj=$*ig49YUNR$MmVX;ZanX}W~HsPs3+`3f9sBt6D1=1ONrTw7(S zxb|08w8;ipfokqYwsv++Tr+-rO`ko`dL|p>25#U@kR^yR(3-HNIfu0+!?NOkSQc3- zecL5M`9mW+dv5r_O^E|M4aOyg)r+koryxf~mY`ON^bqXMDLJ2rQ7fe9_R`kD$gsE&n zD4PfB779cS-hFe=T04O0?s(k4e*{n-08$`3hy*Hvu<+L5p=~n&UCiE!9~Yz?ze(+$ zp<4=(f`Z%7bt%x6Q$VfI%o~4Nz5~Lo`gsH}@ob2Vo4-MbNRNys)BVeP`8Jhf(mOgv z5|*A-+WVH)+yw2@dfh_ohuJo1JC?|!5b=@R$9fXz0i$dtcBeQGCz)k_52x=v4tA?q zJ_9wgw{SAw*wU-BG(UD)wi-4X5F{f+;(`e^CUy(C(4+b2BX!L6Fbb3$VNKzqZ8f?C zl5N=2tTp~l&JB=rsv(Dr6j^XPWblV@AlIUj1?lPAkXMXZ1X#n)D**~{nHy;Igp~lsUkB2yf8RK;+z4 zL&>bu5Za5!CmmZw-kC6(lx#R%>Kt**pQd^3dHP7?y93i0G&PIu_mUUka;z80mqV2kZun&;HK$Z_m86zlNO# ztv3NV-$V7Um;I3bv#I47O{;C2u}SZnza&st@^VI!sWd!_D}M$!K`>o1lIW%b~I8$r19-TlYR7P7#JG70N!n~PO zKR0bRrm`sh2lJvZwP-TN1Olg(2eF22F!DMvB}Obstnx>hR;8ngIaNA6+!&Nh?f88> zJ<8e5)Wn8F8cTX&2-)_mv|XZ(EV&j(2G-BNsWl29SxeAE940~wjjTd;NSRw!8RN)-y{y8~_))piqn;j*u zP%@3zEcJ=TkGeCNKe>d)LHP$rQb;+Z6q5KAaVRD+y!K0A`4E1M+ zv&b!>g3@-Q*;*3{pQGPUB8LqWz`v6PwPM@aHfbRaz~4q{SlecXadO=hpmSXbk+8fy z3%ym+m-k=Ph$_woHQ_CAnk8pTpE+asqdLn%z^z*vm}9Y=w+;&$_l#3~!~6QDuKeq(qnymXKsGz}Ps0Syy0I1O+S z13dgYfV?E#I>Rl%c?|fX39;d6Y~bZy4q(y=6!5dVa|8ok_vX}I1b_T(CxPCVuknv}@NS*C|YP(SG`abp{dduVVt$c|RP zp^!M{U6a{JgI2TcXY344k$t0~n&=iOTbb)IbRH3mC^}?Xn{r#ffScy;?L4@S`D>SZ z@#c?wPVtRX%FjPf_G>;YKA$xQr2N|cdmWd)F-By_Yf=2G2sPWmJeJqTAzz^D(s11? z_WapUyYmw|F~gGPV}N5{?*d`#%!+F8C*3a+CM6_0!gfB79o zy1v9cw4&>{PE(2AQe*-lO~>ZFz3G)<#-AkCwSj&Z+BQaW%E{WUrU9#YF7%wdJm;bg zwWp6LG_!5&POE)$SRzYUo=Z5GOM*&SWh=jleJJSY)N{%^&ndlJdd4IlhziA?^6y;s z!l_%Jv zy-h^2UEitFr_%pn{VQj6_kXWCUx9ZQ>QN44K3uD2FT;2-mA1GM>&;*+O@{*p*ZsTT zJ(zej%(}JBUw;B5o~l3lU)WGxthna7NCi|fy z1YkHD7TZWOZ7a&Oa0%yMR`GD{RPt;@gXtLS-Ndq>t5*4s`Rw0Z0%^)@62#GX*o;wgq zq|;b2{IlQ(?WcV{>UABuAOsy-(y^(+pSDp(XUXUCXM{a-_8OEJy47mfTIu$+_-Dk0 z@p>q`Y=A>Liq^Lb78>GG)s7_PWJnG6 zrY`vTjRj7R_2=xn3p6&;QoI`G5q#c=0^}HQ{L2@l5_mp^hNvi7b9k&_*eTOQNK}+p z)YN(W{2MUo0B*@UuA$FuBIsAVB_`URG58IYjfP#}y&}k~c3^RCj{q>wjWbH92#)*8 zqd8As5S#ox3$;uMdXh1nnMMo(5V>t z6zR$&h=l0XBEGjM3yHr3x!}8u|Fa}diA?3GtS3Q?oUw4o%j%XeS%_r^lI{oPF z-7)WAzP)&Ge|hk`KPX5yU>f^1Htwo@@B3Bvp3!|%7xdK$Xq{b($a>W}bU%k2%;aA_ z-MdE?`ld%CLyv6W@>+3hn?EL+d@*u<+BbTcb9q@!e2II#?|eYu~bdhvQ%emyYx0oT^`{b~R9 z?(^%kR|nb?kC7N-;7iSXX8i$Tzy{nk%z=o`Jzse~&>VN8c zn>Cf1prLMwicf$m6~PGCK!yOQ2D4CP=DTmi6w9bBG3~ImAT)R+TrCuGW&2#921v#dh&y&cYq1J z1pZ+a#LKpj0D^A~(T|WVmuHeri6C(9kAj5{sy5HkzXz*3`1|)8)gQC z7!%eq0E>cP2UU&&T`9c`$j!9R-3^r_K zu3)gd-uB2@34bgo%j^%f*9hMh&6b2V5}sGd0vnR232fZh(3%^1D9uR>wwhhnJL^~i zS|##*Z#@k@F0OKxq-1g zUrI_!S}h7jIeQOx-W`A!@ib!cgb?&l9`p^68VT;y>$f{)MPkEN0j;rahuSG_v$65MLH~Ut`AY@+T|p;kFAgfq6uaT{nj#olSiv z2k`;IBIJ3)*s*)`M%bE^_#vcFeV1mZGk94Aj+?m!X9R_nPjx_P#gg<)3tO|R&Pu`| zqgBZ-WIk9){Oz@O-i*+joD?eu{kJ9}SUWW9$aI4%pw(wi>os+OJ%B-2}jl>-RPuYB#n43CG&sv193t+yBLj-QB~Wr>3{} z-O&`rHq7T{gmxlomkc^XN4jNa6PtCpJ|JVG-q`ANU)32-5=N+JbgMPnuvor0)7zb! z*SNf)OUI z#h}a|Ri$Z)79yJ%u8Hg{%W3HzO2$K)?1Jt&>)X$~P|?6Dk6rF&t8J%TLQtrwuu$Mu z;@-o}M)Z*dy)^M7#Fwc=+9E<86&Fn^4j8z&&H2QHIIt(7IToaNm45M8#-JE#%pb@d zo0_f!iRX9G)J5=r=6zMlB@>A zess)f*e=+G{lJyNkJ-kwUOfB`kDL&N7lTto%N@d~g4iUgW(`=`MsA~zOzh*ksi2o{ z)^)&Y7RENXMk!^A`s|lXlaMO#)AEmUnVaqh7tclU7GF!Vy`Q>*gRdRKTlQUtrA9PJ zT%x8=jnkKRuS9%sCXRF{vG!%Qb)`q8gkOGxTRF{>IC5S~V%yan@pLm~+G=&iKzb`}b}EL}^ZHSxLItI3q)cf9lh zs5|KSasSJ44>TM2-{}hnU|LkJo)4&5^PyaYi(;zhD5~c}1)#~u2mHEKH~LPYymFtH zCt=B_EiAjN$b0{3U~vP00>i|ep2viM0(J{pLWi&sW zg(7*E|GG~;h7H+JREzIDrL!+saRsGL?0s_ZldGOl zEc`Ae>0?zUoYwZppMi;qnECm_Qbfr4XL}8N3nO&WtlF)Q%E+$$& z*6@OG#vf6MI&RGm?d?L=2y(W+BJ1KO%?r?^i?K^Y#A|X9t9AyK&O1Soyt#2*YZe#> z^)W`4{=&;Hf2rdREIzweDc>B^+J9{jeog=Tb97*RjM2eo&9uD7+H1c1BEoRNR-ygn zFlV(dE+4jDw>!SeQ}n~>W7)})Q1?ZeGx>!|JG)6$eSSNqy~V-?!oTZ7(XS^GMBhGG z82&B`B&xY~m~PPtS~2Sbzl!kwzk(Qr4A>a}?Yr^c)}zS6?rJ#)B@Xg2G;J?Z%#-gTD4Z z-u4Yiq|6Uw$7P-a^gpQAIrJdyry`0)fL>JqW(@dL*r`c!b8|O_`Axxcp~D4i%-@Ow zmuwEN@In35>_)E$X3aqobdC?gldYNu>IaytW4D2IOXt0Nz;Hz-%R660Oj(3y~qu!2+8`XxzP+|)?#DG*FF0{Z`C*5;HkP=I&d$&8~ z9qZ-iVjPBtkqsyav#9wm`Y^HMN{N---d9~|Ve}W1&7rDJ@Y#X13JvIqnANz7qju_u zgCp(1y9g{Wb4T!V$g%{X%&d!GDmRttx%TRvBXpsFiK`)yB=ZuQsv5C4rC$a{27U>* z`HlyU^gFC%k#I=$d5-l0@3s%$)eJI>;AgiNtvFv(Rh-yxSZ7#{yI*#1q~K(+WYG>g z;6t##RumjX5o06>!u*QzOp5yvt`&c(<66*RTIF3MuPc{JpBS24V%8x1+@Ju)NWP7- zqPh+!YL_K4XBp0kX4%nJKvcnu9g7YKyl0_R+Xk-c-b&*qiSiF z6qI5*tP;azKkI|6I z-;ig~%1rE3_BR@}w)+cw(;#vOsX!oR464HzffSBz)%B_`j3+kUHzh?}Jp&n%xUb0N zgd{PEXc+|P<+9_n=$cV^ZX{938H!u8aew8RW)WzH&kah~@oro{)q8I*DwO=P_%FX; zL_P?pW>EFRZQrdp6y$_P-CZ5xK)oLUqU`UQd|=3iuAQHkGGV*wWOVKUnV^uE*l!`C ztAH<6JixjD8k+|QHpOPTfzO};fq>QZ?+&*xAVs#Zzh6=qMa7?eytBScGtp^{x#P=7 ztN(ldOwO0S>Wzfsi?)|-{v0o6JvvyS1xMIL{a|7@!EnODz_kw*Y8m@#0V+(k$R%xZ zA478rOAOvKh@y9HzAYyjuUsc0jU48pS4)Ru;zTsgxv6=WOpMDRnD64Oi^()+)H~0T zvSUuA*{P(q7%hlej#H3|4xSKdZ=7S9MsJTowZNVSuWFZvR5*MN;Vp;3AvR)5x;|e? zPD=mc+kY&U|zo(xc?*X^Un~}og*9C<>La11o!#7c~6Si zk7js#9FQ$V@z`y~zMFQ`s&NuY;&unecT;BVaF9VAiPP|*v zh-aXs{~v<$yEi~#g|01u^{17Ki;FL@==raUr~6BH!PRAd0=nneqqhIw+^und1<)Aw zo^Lk}fY!f0zOb%4MeElj#zP;{bPfs@7M95gcu$1Gh1ulK$w@%0XwW0;>)e#r{pKZ! zU;C-+@ifM|;q5XC@6P|jDxaqgJ^>k0&d1)-{37D%Uqf)F%J(uy%isss#^@J+A#elsT)B9 zY#yW<@*$q3zCs9jkxP{#B{bGCuEozijivcWx8^+pRY9W)&6ZnmA~rw0bW~{M(GxYR zZHNZ?2`~R(kgQ@9Qvrlu107&86~IH}G-JbQZBZ)tH3pmvQ5WrkK`r1j#A|5{TNJ)3 zJ|WU7AK`fE)y?JvDYztV^ZtYtc(E;9X3d5-rEZ9($RQzV5}b$Z^EIT>zlOd`e3Q^L zpq0PM)9@YC@M@4cEaLjWEQ~5WK$y@73%u)B4Uzz;^U$--iIn<_neN{+Q$ox8(hVIy6QAYL5Zk9o^k!K)Xrky>dJMB_Y8ZsE`2K4>jlq z+618a8;mCt^ahw-P&Yzo=qW($+Gh;j7#?1pDjjXdY^wAC9Ub2_ac|Y@i1s`gwL5Dq z@3^;Wwj1?%_hyF-j8SYkKGGJ%jB)*=D(3FprZH6wi$q4Ig~3RgZR7j?&i}BnaBRMV zGFQ!s`AP=%ZHdI(?U1$pb><$AdlzfX`v@B06?m^g>-oy*GHWFk=?QrfRG)D@RWn`@ z43OwQKmKV#F=mHy6+xu>e=gjJS&Xy8AiPv+=?Wv9svXethjt0I>+8Q_30<1&;!8Hj z*Ra90W7%X&7JqM6zjA6ho+__7aM<3;A;-z@uhz&>c?G;<=j5lrInw#~sfS}b_K3sk zprf|_rqyC#)HTS6b1`Y8Qc~By8pGi}r!K+j!>OmA@bbZ6R~M5&f=%2ig_YF_vn!*^ zD7Hqwg9WOTm#-?I%7KQ- z0-{kTOOjKbfXt*9qD80g>3_VwcVAd}rvJGG>LSqd1E*M^6Z#PJ?*~?Wz-(|odM2wQ zEGD*lc9sam7#+5C=qKwNfc^Sj`^H*rruRG-??c}K0I}=?RW=(b1@L$KU&8hO_ID9} z@pt{7@bx%ZO5NSN@{vkY^B)BO_ETW1_gA{R?qwAYAV8?@*x2ddGFtb`U;YQyV*W0C zdh3>VeDDSnlPn!1I2On;2xFjmbvz_wc+U48f@D z%%lkMFwwJiWs4yJ6hf&XnWPv6#&lIoq^Qzii)?TV^E{?9@}E`4BNEY8a%dL|rueoE zFlHgH${oyLe(YFg)J36R1H;U)jM7rfu)U(yIr^BRAB8t3>`Fn*;FP4qXc#_bZf0Cq z2N`=+>}0x5?IC+yR+6M}+d&yqt5A{-6k+fh37)hjpQtKK%%bhhz6Lo8Ha)gDZZCuB z@FpLt2ghWIh#5Y&Exrp`&7?*xCZd~%y{rB3JuMEJ63+M$`ag8S!&^RI941(jI4`wc1 z1b^G58NhC!NsNs85bsQ0OkF4?9qWrGAma}sP~4I15rM=MLBPP+L=5bd!n+C)3z_F= zlfXMpp3bfsD98m`8j2l6O~J9rr4Xyhwh)>h^HK53GW2%C(gpCPdBQO8Zc@v!I=LdY zI5o@x8?$smAv6Pa+7I#*bA=Q9eo}l|4p;G8TGR~Yk`0Hvsvl8}b7PCPC;&^6n%h8j zVmued%ljNC#txLDcxQl^0*Kn6O4q*E=hMsQ)1Z7TiMuxyGswT%py?L=x7{0iejAUJ zP;?JUMTL?No$`oOdR~y#o~s{twqcvnmYhlmt8YPzCcguQtdXbVUXOkK+uD-Gx@xEL z7IRzIsITP;re`!vpHAoua#>}6G* zC`TtFJ?N-%e;e?i|0hw>*I#{^`ta>}_hcNPpo3l>(488fR9mQMhUyi8ikKJI4Ad*eHZY4*k#RjQ+7m6WUmY)W zjjZBtVf=e^_^vB4*hU?v3+eM;8k?9ff<=7t&ks-j*v_8bu8= zk)R0~@--2GBAg4Eex!+-fm%OF>qPh4uK@*kq7j*Ra|b8cUjUtD5cD=PfW;7~WQb>= z^T?2duVgFXqo}GZEopY_W-z*rC+U$sQVJ#Lick2`wRy(C$-IQFzm! zGB28y2&G~(Yfj*N+K-kS&V5BEz)9E^S)tInQCw{ls2PC_4rlDfrGMh85A$ zSHYR9!O5_eSj{vTG#S+CarnLep11f>hZhaMP|uT?jj)2m6<-?%D`UAc+p2+kJ{W^U z9f4&v6A>hP6iSVHZ1Dwdhj+95Isl zTh`UCsjl_*U#`}Z{r`@U?+&r47dtHjP-lK=n|-LmMB4E#cJ}ipsIq_v<)PN(k_$}3 zeNe7lTQuUxr?8Z){;iSAD)2W4+eiQU*X6!GsT%Y~ev-ycil@01Ia(2+eHxxgzuxUV z?h>4~Gj+oE_V8%oMs-|GdwzaaMI9#H8*rOJFBgA4O0|-lO&{2r%KqD$Bo8C_F=%+A znNAJpL??xN0&|D-Wyk^_SaJ{YK6>O+NqP?#+m!c!rT^Y^+}w!vJjcH^+<0tq=l*v& zJD;ETgA*x|{Lp!Qw;(go;7|P%N>~;%# z?iFr6_p&2N5Ehr%zVjYop?K8ohit9K$u*N4l9vX3Yql99r?64_D+K&Y!sN#Mt%GZT zssPxD`|WUX$uVVm=du7A?vFqnCbOuDjXUS${qf2(^kQ_+^X%-S+gIcKAqdD|c3$t9 z0ET^9$&J(9`dEOt1ONd5fg-!p`ae1;5RVC-=c}Lc%k+L$3>XAF?bt&EPZ_tCL;iy$ z120nY0bbiqy!*Y##Y*jmcEFu>{x_g;*w&>47Bm3I6QC`#FlZ_DbJ>LOzbk`svg)(T z>;Os@_8=qkWVJj?1n@lpWJpLzC?il;4I1>~lS)1oaBzPwCu^;9y(ZK}b}NVd2J4Jb z>O5ICnoe^pg<&<>X~n?A;k2i9!`s?vX1eZVjiNs{p!-pb*Vtj{STK3CNkPE#0}+D~ zk7D$;YK)*%C_OSP^*_W}1&U0P^Hs^Bs;;gsvJ%0jNfEG>5l2<{){dr(uc z?QJUxA-H^-a?EiI?I;kbE!cIR&?%bWC@BqcRo8$JM#48ppuS@UIm9f9^${{3Zu}6dS=RQ)W ziWla{9B~UIq;g^;7IjYJqcpxB@m<*UW*O(#z@kRg$38;!#IR(o81*HfBGPsxl*PB8 z&Z0KJP4do<-WPEb}{fHflVWqp+JKVFTl_a!0)-k#bpb}-=P>q8x3Q@Pazox zW(tU3>?Tfe6tg*6gRnU%F^el>A~HOGN0IPqFtIsp*skv*EkgQ4%l_7N_ zOgbwE^XS`!h}o#IXn9R3+88i3L8&@yB_gYY#&oJI{M&OeVt7Ra%_4}2B66uWW*AP( z%JmPwk={1%vF#0oJ*;EWNJg_xPMA+L3y8sd(N)JmZ5m7_h{1|1NGQM@Db#{h#8CpF(idRz^_ggb z2#RHKXf^o~qfFz5Ght=1=^6uDd~!&S2ct|?@C7GJU7``QJgLv8*joarBl(g>j_cw* za$HJtycQ8?uh=Ch#E1Wy=y<5K33_J z#ou2vds@Z80)03KeaGJLh~)QLRx1A{2KU-&6&UF5GhO?^YctddL3^hS#ug={d4_10c|aFrH50?@~*sH?xLUk>@{!_CDtFguI=RS6;Z?;&r` z)ix!i_X~aeipcdZw$`rA6qLG$+3uCbABlMqgCV_-!Uj%>RwVkp^0n`+1k_1LS0^K< zvVS)DbTgIWtAoMbNL>nm=|zw2X3qipY5bL7Ppnn|oL!@nJhEIIxh zyg31A^Ru2{ciZ3x?wixKnUM4^ZcPTIPxW1SayBj5p7A?aG>S63155o2WM#g~jiAKIh?C-<(>rUO~QQdz-9i#H24cNFOt0 zh+OgAuK6%Y`rjZwV&$559kHYVdCB*pop(*KKYi921<*$k`y=Hmu#MgUveWrm;s~sPy2iZQSyjPa4#m|is6V{&9HQ?8}R^wh@e&Eme z8~skN(oFniPsT@zVXCGg#K(8_<;smtL((S$Z!E#>Q1+{-;a`y-cbby)Lp9_fMekjY zTC9e?e$Y=7F;<;uvDKI)-z$Yv5OV8MY7Gu9+@^O0Y4T3=168LO^hr#j`Yq<82agE0BLBr62D zq$dCGgpBWNEi>7ftrI@(&T=Q@XIYC};zN>ezBD+F5bx}~C1%OiT?{Fg`}%N&lJHk@ zxbY9^;%_{9Na|-2lh(BJz2~b;)_J6Sn~lb}KPx0hndPMNKiyU$YV^4WW^s!q1?gzu z)$%2#0EkN1|IYQX@8tB<#?djZ)nQqSBOM5R1n$h9O1r6_D_QylbGEa#e9ST1`Hn3? zIsqdB&d~eziCO3XJfBLhIDK@Apr6Lg%R8S(Qn;*k;${t7r6Zjox3Gn>s~1OaTEwm6 z{Pj1Xn>Ea7aZ)ob#+iHc=&UgfJU)ZoY{A_dADUg%P9klznh96Nq1l0C3ps4nBDZCl zGh|o~H)T6?XaQjr8-z>(f9*|xie(HJal46u2+=Rp#%)kC7mL2uW9N!ckdaR00h4^? zk$M-0bc}}XW*zcINGmRS)K#85lp(%ULDmfyInq{^-FiB46>-6L!DkqeD+U=ElU|D3 zn`qo9A%lFJMVkDpB+&?&T+SG-D~uL_l8{o?_JX;N?e1L1ge@r4{7re9cA+D>qB%Dv zpZCRVf!dpQbU#V_-68mvxbg7EB@2z-xNI=hm~TVqnF+sWs%v?(GHMWFMS$uw;kVsm z3W)iCc5Ox-fY9WcLF2GAm*IoOGzFRgp?VOZFy-(PM@8kd63e6oY1(jl4SED2Dp6eJ!B8(-u8Ma#LUEUR zJKTx`cnY(+2vRIu@QoFrw=tZk^s+GVC?sj5sQ3n(0g+YIeQ_}KsSR>5Z_(Q{9C5hT zspUcmBWdYPQK{jwL%t``Bc)iVs?j3Y!&aHrK(gTJF!@m`V5*}u(S_B};A0EJVb};A z;19&6cj*Pj6am<(My5JJL-kBZmRb>ZdwY`HTwD!em53i&oqZ?96^sEsDx?Qe=J{vJ z2@?0w$%E%~=Md9&7c+ohruso-x zJ1Vejozlu!P^#Mgj^s8hF*2H2-)fDh1NA9Xdjoh*3vXJ9?(J-Kt=4sfU&+SLlLm38 zS8t?JQ7*qWuy*tV@RMCkGw)+Fm%uFfpv=Y=E%&s8aaPv6X&f6ypa-eEu%i=lZo};ujwJPAg8LpL z?DFuc&OdYWEe>;*(eA(hWZ{J=&UD<#p(Am0wP-JHW00Wh3NRuVMEcyDbM~urD8l`6 z`{Di8|GzX`@v!@is+LrpNNs}p4lr#Ixj)^V>hvSc>Wl9oI6#~r#f z*J{(!^EzZ|ed4{#B-?Wi@#$!IW5WFukw|5qLP7uw+=4rP(UH{sGSBZVlx6-Ty9Ca) z_h_`H?R{b@jTISFj`NSHTYqE4+dWe@?baSuzX&4mAv~xV@NNJAM2U z{o78^u%0s@?giKVEm%*)cHpXG)?$@t<%#Q%GHm`Jd=?lCNwhurs7Goq377@dF?=Lo_!v`|n z5|NNtLfSSO28M^@LecPP66=_N^Ol6s^y0U0%q!7F6sM=OPaBsWc_ZCAUacK#-TI_5 zmM^^ai}e)O(*HeJ{{*u~RoSP;tF+Of@3sR6vi+)3F2&2eQEpe)4P<^Hasx$!;x zL=;OSPnRpaG}pn>o=_32zS(nsS;3Y=(~I^G#fvq+aBYS-SKn(|ZRsV~8T8Bj&DRIL zrf6%#&mNg)`(cmodKbGZuweos-iekBr5PC}le`cG@A_?RRr`4E*5eJP^X5N@>`e5< z``e#M6|zwp5`n`$7VojUCFeoGF(T#+x)U~>MuXBg@hRmgk#UpOe^c6F>aL(n&2$ux zujm9OS8BC?yvXdw$)_(gvQDJR2O$P2c~6jJ$yHCi>gYfY%H+Q{%iPs6z@ z`4F#gC*NqU^8^Q@kCG|5YY`g{k5t$8@vW^blES*qVRa1I84X$Y)O=)eMp{`JO&UV` zzieJc8v1dqf|6{JlAW?BQQu;NFraDkhjw$cyh^bC0*J|F+_XO7IL6OXrjUk7g46R| zf*;K5C$Rt7s7k7$$e_qHmR{-QOF>IgdV$4I5E=y(^T;j|^g;aRI5r3Ff82@bEKOAaE`a#*w_rjW4rS*!e_}cKAy|A(SGrj>hxVtb74o zHSmro70r@+o)IDJ69X})g`$ikj4q1S6|#DWk9=obR5<01h6^LIq!thz zPzznECee@ys}zW%!n5izvbi&}oM#7=g90+uQp$lRoN&Ka}N9eMUCgmMz#o@AC z^H-fUp6piS6IMG)%*X>J-6#TmO}*>TA?(CXxvOY`;`(@D4Mc@%H<+Oz%_nX2as-XI z@HrsRXm8iY6_sw~`j2u{KYBFBXsv4ym$y3`$ld z)CHcZCh&@ISCN)dH>=56Uj}C&xhFZ(@!0Ew$hG<>nx?VEyt$(D&=!ummfhB@&?GNs zsYKGJ-qam#S#&k{G{%Sdz;eoroUDvq!e+SD%b7+=78u1d#NSAj2+e}0a@7Wq z>2YS)p$~g_AnwPlyn?UAV7&FCMI2{_GA{>VfS|TxM(e<>WA~5Y;XLnm?+Z0tXNX^} z;I2282E0%ouY~R7y9IhCt!*aOb+U*eqY~YS>Qhe!A z@^Y%CqU3BFAKhvpv=-cwG1@E+I+i1MHd2JlwuJpbz$6jeYS^6dFrN+pEu81AhUaGe zs?2TecNtkDIJ;$9EimgxX?C}&@B#>Qs88*caLuabiJW#j#j@M?BjKqasw@y5-u~xd z7<AI+kK&rFZAeY@Rmbpf^uew=XrqpF*by@N7XT ztM6?ei-D74$^;`FM2q)RZX6(_=C^*}&8k#5OeQ~$=#0ew4BB23%{nI=!!w;xg-3o4 zugJ5R+uJeM*hRdN;0RoXRT_1{Lqby0uaKW!#=al|zxan*Z`8xL`CqZ(9;gui#+-Ez zC-gc&9Z$EzrM&)e{l(XH3a~b$Yn2xZAnzqzZl52%V*9_gK7ebcyko8VUxznW2LJRn z@2Ve@cKwWAy_mYb5`Uuh%vtXFTf4bGCGH_;ANE3|L>2*km*&=+;&fQ*(=|ZWx-pvf zF=-<5zUJQ}|FN>a-e#4bleY4xR{Opm^|%ugCV025_jFd~w14XFAFiF*e;oESiJOEr z3U~R^kJ(^(g6jL~GAufb>8jN|X4Cz0lilhNfKKAsaFg~C{qcNnN$UN2*>-Nfxl`A5 z3o{IL!g0OV$#HXJ$?@s2)_j$gr)Ez8#74Z=7`Ep=@^Tt3R6Mu1|b` zhr9dHv%>T{Df=m~#Ye`bUIV7yXS(-$qx|z)JMW0aoJRXg-5hOnm0Dq3DHs;`v&+-{9#Q7~^+WPr{Q1H6HIBdj zT--N3@J6=@>TjsKBA;!v6CE;^`P{zQ(!1mK_2=L^l3cmNLWQ_E9 zWFb24#xMF%`_S6{ck$id)7Z?60^*oz zTfQSWi7I}I{P0-)9XrqaOSbRW!mDXMd&&Q40oJ>_!XpOakpc>3(Q|yW>wHgeeJ_RO zy}upFRZ=y}59wzL4d=bHZ*KY!iB*tDx8<$0y6f4qF8IlYQ+ZfCW2 z$CSdEIVzY+4{V`jc5ac$=x z%7h{xpC;RnZJshV$t~};oxpE(?`)f$yU-VYS3GH^TY~-r9yV{KMOJVfHdEyE3=C`3 z=7+`|j?=hk|JCA&$jU@3q87s%HbUEpMepCYIXS9U#}LRsf_O*@89{nK=9*%#V49km zl9lMHTb$Cx9>~R^d9a$5kht^dPRcBD1y z0CQ9wkuD9_W+vkBp;I;Da6W|!?SyO{w8BVaV7YA2pt9t5*H280#XZVuD_HV{t?+as zoXiv0D=E}^kt$beSE-DI3h+)5+zehN+oJ9qgm5&eN;?oFainZKmoA^ zhH!>9fw5qeaCk-Z+NmuB^yB;zo}+rdP-?GF=nba6fs1bC3khKAs^aog31z~(EfnQUgW2(T{K%_7V6ZV6c^E;^DlxS; zV55yRX|acKk#UG|l98Cy&Jm!Lj>GI{3(n_7Tg|?rL~PM_7r@CLa7-H59VJrq$J48& zW~W&O!jypV(-_jt89O8fy_tXKTcGmrKN=&sN7WgD0y*=88|jKeInj!NaJ_rL33RPx z#!RTg)BKW>Mc*A&{0+WECBf)j_jH4&A5OM-|6-l} zqLdr7@5JTbit|tci5=ITR%`kSD~WAGynN4!t6{qia_`eR22=|%gI3Ky{@h`F(!46GzrY;be zPL>!g*1zuYEk<+L{K)V7`Yks!z&OIf z&GL_UaAcmzLz!0hHxuvoqu9zx6CdsEY`sYd`@WlG<|SMu21<%>?7um=mZu}#?B8a% z+(vz{cIfb?A(=85er$XSQJKZrR}z0C_&4yG;LJJ^cme{5i>++>0rt^-@me&?LqhZO=TkS=Ih*w@GP zKTMUrWDZqGlltwwh%|gydpHE_+i-flmTk>fX#dCKL#QULx+Tre%qb{Ux;1vF2pdRK zW#poiU1iqR)~gVIyveV{o7Wt88|Pw4{uSWwo{%Uhz9EeY9sSntWPW~r=gl@OcRzEE zkB>dpBtA2xC_y-dPdq#_iL|Jz2;$h+OxAH12i&b>dwX&>SuXDbhs zn}0rhe6LeIT{k$s+x(d50b-E)KU0?Fy^6H@zKG>An@oCFq4`zma_34YN(zgkZbc(<7z&PP2o`P{*iZrj!RuX#JH{YiDV zlt%j_p>MkFsJAiMb(pt57q{MP=J!tj+xJ#W*NMV}bE3hz3_RR5oc8?*C+}rLF&52i zu;TAz1h0WdLTH9#!wG)D=^=|D# zc1QI?7b=zW^X)eba*@A#dtZG*1C>hpAgVzK7gSVNcS&*Bb^agm*aWvAP-Ky6<=87x zG?UL5+4Mah{j*~f;r~o-6p6HojA>)Eic-0UHVaL4Nmx=w%L^x-mot`{#a232Sg;n* zxbRRLKkIY1c6M^)g9aLk4J*se5R5$2`71PjoL%`l5Pl1IT522c}o4w=Uc z8c17^R;VfrP+`;qg$khxp;BNzNBvX=H@M1>Qe&t{y$Um86=I7(BO%)YE2XnS9pxwq zrLmN;a8F62=wfp(;GzSBWF2}GcnC2Egzn|01%lZvc*x^4eKghl!c z3iXfXNQ6>QaWbt8kt!gWm>@=KeC(jeh{NE6YEso8wr?EdH25^|x~#gQsj2Z)lslR+ zJ22{jO4)c+SV#gIk|1dm0vq$cz4(z6qNN%@LNo*_Qw}>`;vam2gbINQr^JbEd@y!B zv~b!|jznMuaQbvlr6fQGuMD~@FxZy#ls0}@ASgf?bpY?Nxt%sQ{-=Y+sA7I1v)=?A z79ddZU!VPT!&qJlJeMm(PE-X3{VV@X-+lo`4aAk!vaiMj&-FLn9Kgw zGvGXDTpfY9Dn!xnhEVhCE!CE?Gy?g6!e@;8GMcYnXqKm{)I=Jmszi27@MD24D)h(P z+n-alx%10XyvSn+!A45|d*xR zY|(~FlMzCh9M=byufNC{=ZJ`p;;K(JzMZgT`(JrG5Ff8Hs~(FViqMD36J_|NT5^bv z><+uK8%n0xQd*w$?224HnC)|ogX)qHh;Tq|8x)SZa5e<$j=KeYXkkpy<|BhoWVV4`UkN=ONQW>lhiJT0dqMH^V3vS(roXH7lSjAcSgo| zm%S@Q@B0iI(v$9Bvkp66+%auC4qZW)M?k^dWl8zApSkrJLT&fc8|b03#(^x`OpPm4 zt38;T-a`xEemgH_n(N?PiIFW^7+6uk6iX`D2ifUX=oY;ec6aCDvFJ9VC)St@ptL|n zai#^k6kG*Zv(^pY>Orqu~%@Ax%Tg$sM_S%IbnaVIPf8n(xy{)r)G4gPE+UUVW-2Ka{j2j>n z29Zl-#-zEfv@Bgsu^Y(Q8RnGJo%JBF|2Y%+z41nC=5qpn@p904=5t9rX~5kBejWd- zPO<&izqylZ^Uz>6s>7Evl_Xzh=}W4F9DZ5`@?}Z;cMDzn);o9ReM@!G+0y5D=`UP` z@M16{D)q}HGowH5x^wD{7ZSK|DA4v=WBihkx8Hv$nir7>p$|uN360w`$fW(y=*qmW z*8F37o4$^j1&mQrC-0u+-)^}eJZhxQMk4|-La@#b2UTU;F^zaY?LtO{?iQMMAcx{uBv^l)kE=7wf z@%cqdZ+3*NefJNLFJ<3fx8E2-tlR8M=T3$?V}vEC-47+1JPfok-SWXax0&*bbKWx9 zb=dfwa;I0lZ+TjITW6qF@?!}1J+zFDP~gWA7Q|=r`(`Uda~T~I(_lRTNnstGb!`4m zLgNKnP60G7;V?GijR>!;rF#O<3A*SmDEEhomrYZW+jDWb51E;WAku!#=2-& z-9oF_dfO(CiN)1coJY$>=|ZR{mo4icp@M)g@RK%TsGm6!E-ai11pzSkDG^^1UZ*Et z0EP(#E;t}bVuUe5+X03TFFR`JBtp)O6K_z7r*SH}KOL?iQd(g^K2k5C zD+?h@y3RB}g_;mmEoGjuSzd=tqvFtAF4cUKYCD0e zGCqX^kFBeLD=NuGt~{+92b5;~0w%=6`J7ElyQsydq$u)DLyJq36RQV?8a_SSoCLsL zq|qxZn?uR>s|ra@6AlKhP66r?K1*22wC~U_JPeB&PEpeV#E0gEic|A~MZ+_o(XY{` zfukH*KqVDe7<_=#7pVkjDCd5h*Ycu5{1g}}@g2>sg(+Oa7~Cf1BB)@EoRPRBiS`IZ z3@B2X1<>f%>3>Fpo@R7KmY_->AGmQqBZswvUYq}(?zK%AMXQ;f5J^Y3*p5za35{Eq zh6Q&Sf>j75xqx+e9}R@Af=8CUsiiVSEi9kWr|B)Ulh_r-A0uAdtt77dLW=^&$eAhM z5MH|W2J#3Lg6ZJz2q(A-f>Gg#C3F<#6U;44lzS?6K)rYv zX=O-TK{S-*=;_50Kq^(dG$0cZtvjK;rls?wIb`3;G#W}BVJW9fj${{4TFkBhORH_$ z_4D7#i(c`6%O0&!7+LwWhGzfOst{~do7;(y7UQUz-NJ@{v)GH@3wZjCWWSZz%eg-p z)d~vr8|KwoywLyLaT)_;mIg>@a_lh*0;TJjlM_Ehu+o1R!0{sR1_&j` z8o&==mtoj-c}Db!Uz5xo zWPt%~jt(P(JT>1dbiHhR%k0@Z+UxOMvfA!&I!r!+lAc~J##^h_Yv5GzQ*FOw;=Ac7 z;u`SWMz8hHUj6%#>e%4=yZ`HjadTfYi%6*Z63z{?O@m+abgk6X+LXnGtcR>;JZrA{ z+9ugDid|}n@Xtx{)|+P#cuCx*Zt(vz?f<^ilt9X_$kZCm?X8&5d%s0Zi4JpTt+;Ws z*uePsxbyX%Vk4_GZR|8RM9>B)xG72{K23uz$b4J^!pgl|rp#uetNXaw-RX3H8aM@1 z{OJm)d9R<5?sRr4gwRE9}z6rcPR}99gqp;-fVmQ54VW zm2O%GzY=of{mrP9YHcYbmRCR|BtNpTtyi<78sD?k_u7oBG3!5Zw=e%1$MrTIbSCZ& zE5CfbDg%3m5R;KKFF1NpmIpEWT5?`w zW4W^Szt%`-qF9JOuSg&TicAT5fhi$%)Y`)XKYR#eX=zz))Ee8{+gn=$siJS(5F7*n zONSwBR>hkqXs0~Okj3?C>6#z?|8mwJ*ZX5DjaFj@Y)6`(A-hdGyGY2&I?3wln{AC1 z1UKOg1d9{-(iHvN0VKr;4XaPOo_vjQs8_Kd@Zej$a*TG$-g}K8U|MU}w8P&CoI4Y4 z0A|T8-NcTiCN+XZH^m!b6B71s^~|=8av8ka@4u@xZ1Jj?P6_%Q!5QoVlH|z`GKPOU zUv_PpFEOIEW=7J7%qXw?c0m``-RSKM%iDXwB-_s+8D18g6}$=FQNP>=Tbk|f`d`74 zXa1W^xdbI*zR#=amdsFofi6MTt?f+D^IK|v{g*qu%srQ5PQ}-7i~cZUgMNYM7^rnr%Mk2J?xdqrq@RD6R{>c#S1pt;ou2be ztf;B;Oz*}7!UPVTq22wID?%zqkU~-rl4$pjPf+qiy*`tamIT9R@V5 z#lV*m0D5Dzr1T-Rf?ES)g$#|5J*B%;roQnCl@;vd)_j5mlRYW+A`$M(k)u+*c}xk6 zjt(O%9y|%0OKLo^Z@qw%2F=XTIL?PL$xitZf|4$Bg~gy2AM%@FVRni_6kHWX9#dEN z&`x@QejM^|@9bDgWK%X%^pJoh0qUwp+a<>Va}`p!9=pIf=1`a*S=ku@cCkv?GYbCL zt>iwYc2og-u2s@~GdkiTA4ybYtf-wGG)y{Jxr70!N^`c{U~2tHC~u^Eh@hNj5Goc> z1i&Uwu>w6jBE#|laIxNl!;mnpI4Y6 z^*sSZdPQ&}hd#DVy_R*mYz?D*Dtd36$9w#BT ztEeifO*MwMlyFchR5J8R3>{t$AeP2w%`|9c_M$}6$5tYa>jfYb;alP=z*=a@Xw#}o zYD=YB4DyaGWpYuYP(;JKOst@5^$G?xZDlhsB@Y1fSs1rar8lr#oftvAKFS3Mbts1M z@s^y)^BRP4!`m^#YXJn)SW}Dvwt==F2I+u4CR+eWa6FZybMF?E(bu+WxgBB`IVj7& zY~>N|nM6o-30s^YvY z7{DthbYM>Z^w?Np8q2M(kS7bIx@y22GmySumBWoRkX*3532=lnE^QMr&$I|EzU!>~ zAFdf^bVjRlcUR%2L)k`SZZp>Yp5HqT-1Omjus8+m8^p{r`{Wofq=B}`QbY?Vl~@D$I~rL-zq1U(-IlRrVd)Q4 z-D(;-%?Hh6s2W1&4RGugqT|PL(U4r|YmQ=v#r2R;V~q}$`asXNWqDJedcB#4;va|H z{#k?%N>AhapVIWujP!@}Mo))}4T3{nqMG|!`nf$FlpT5Hg$F^71U9CzHOdY9m-%G$ zZr{(t^=;2MSpR}m;J?l0VE&j7`rVkBcND#Uo-bUzwyEf8T-Hs86EV>Eb>M!NB;>-W z@IR6Fe~hw}%+cv-5hDud;e3YP=e2(;n?|)G)W<(3;ZP7F^3E1;DHNT}b%WGnNjW(= z#uOzJBal!c?RIvKuzrK9U{sbYO8h{qLRyg_UDju**51{MVK0w|Ip2o_(#v6k?qAsr z4S}^Xs9Y(~vup$nwXdRb>Y@_~qMkSVVZTz`g>exO2);eR3<>EvtV}*0U9(H~EPrF4 z*iX@Kb*HJ~uNIZ=pZrqOPbUGpXj~r{ac9Zu`0KcSawiey6-|~G3%|j=-tw^idgvqe z<9>zeLQF_$bI@rQmcPcl*gt_=gFPu(`8KR){EU@5258r7Zx@pq7Dfn(yZw}7J;nAx zkSBC4fVlZ!y5Yae?oq8ksBpNnah>CISgHD%9swuyv#II?dkCW1C+U!`yR|m(sEO7kBHd|F@0h zeo8~R^zeLsd447&Binc2Tc2o*hK!SHIYJBMezSuAT|Gd^196YFtrjf#L4a27;f7K7 zAh<{A_vXEuu|5-ol$!C`fvQW!Z3mtQY@1{1f#X7fmO^A`pa9jF(ObR$#@H zpp7nO^RcH%vIAHq z30I6uST^2F>t-Gf3ygl;t9dVwck_q6Z6`d*k2%y(b1xBewr_TdMc*}S18L*pLbYX8 zu+y+8;sMU=@uJLdg$Ef2EFq^qV`u+Q3oyzvby6G3-Mfty*hd7A{T3IB)~$|W`LyYqun0N` zFTf%dDbe=nuS!C?2k%B8on%hFM9i03X=Wjrr2-5SX3`PlgQoEQY;$_i-hh4;2{L@t zaPkOMT6$XKLh^=yIT*z9G56BRFTLSN^ogO2%zHj>({k*d-38 zoaW!Z@MOT;Sko;r!FW(f1|*P3mX)=&%E#@6_8crw9Vy-J0G8fVzZA35aEg zs+5dL_-K5lMlO|hL zTOt^MZY z6AC$5F5C_e537tFR*#+=U2ZT)%ys4(w@3H|M(L|w&q!H^B{&n z4vM3Zk9FQlcv<0Y$!q^svK=zyLwtkU^F8;qWb@0gh2pwjz{>1yZPy6Ili%x(jBI{s z{6duU=NT7&kgS| zwX&d$8M~Hch!=(gCugpo<9Mfn$TRp8np7=m^VQG-JyNnHR^$(PTA2W50WS`ddoVm)*j{eiNGc+z{nHb#fc~ zU0@3Mo-u}D{2TT|#B|;4g*_8)OKKki2xE%($lE;3+2QloGfhox*K~MrwOWl0k$s{@ z0;UI+ide2YllM%WS&hm6O8VILe}e1Y$gnu*!6wfNiddKu;I{D05TeHcX3QGI4s7@K z{Ro}2G2M0Qb`SL^Q8^dwxi|4iziDr2z^3k;Jl_r$j)3yA{(Wvfws;NkA`ub@LWu_iECI{g7CotrqGoCvgYT5!$U#0gK-H?sA&g2LFX+fh%<2U zf}J1b<{FpfQ1p{zf>vSeX<$$D+BwDmjOT9ko3*#cDwpNOcQ{+lc1I2$j2xSXS-P&5 zbEm88lE{d5irwkH@D)xz)U@~1I{&Mwr#Erm)TK0%vzH40agMij?N?9nM$7VU@P)Jt z<5;3xk)+Aeyt`<&l{Vxr%^m;eb{n|0ZECx8?|&%U2tAYT85UmV;2=K>_lOjOSFI1T zIrxflAWRvWVbKeP2{Wm_WilGNv|$Oc(#GHpTLh+W>4514dIYgB58zou7O{%0G`_0C zqKn|t1E>L}8WiEZ2~xRlohiw>I);kh=;@}6DZ<$`IwYK-KQ}HiL(e$d3G@gfVF0;- zKczlv>ChF}ZB@ZUk*oI=AVmsDf~>bC&;*#AY0<(_QczejW0v6i6J%j0kJCI;dr3^&hzOs@iO7Uxc>hc<(}Ow^Oe{Z48uOKhI@vmz zks8HRvyRqj-n61QpClyZk2#%?#Vr8a5Sy0SDw4-S)r7>ItXwEU?Tb!`CjMSYFytNW zPn1Wdt+VZ?Ol|uU809Ugj3o}X{-Wj_LL(rr`7?4089mH1GMHE@COCKiX`~Sm16K>6 zLx!wgOlv?nE1`mCej+|@dd%!PD-J{N2t6E09iMMukd#Dc>OVy2P>(ShV5 z)BkQjQ9i!lAM{Q^Oy;;LNl!oTS&Y5*|BY_Cn2R?t|WdgMnsP)waie&GX zx5##SzRBkIRZB^$vXZ(>my>Alin>=xu3-5h&ju92XEqfEt~wQbE>6KfbA;W%xTTf_yG1R6x|oEo z)5uF7bvzcez8J+O0UDe<5oyTO1_E%*0@zgsop8DnxKQomM5tzlYiNcUbVKokL`>A4 zTEN=A9MgJQhDM?H3miwjWnKycz`%~labOn0eai()!_O?iT zm0M3{lo&IAv z!XN($Jz`Af92IiUs(7Hs?7B3FDBp07)p1X@BmKg6D5AFEGul|p$j6hi+MG_rCAWIc zckbb{mH3B)Fg+uqU!MM z+Ak|lVD;a-q z&tIwBo;-`6p*8Bh(XdV0;~S$^eiR1!`dbixxNLF%Ut3@P6UCmdwKh3zia}(P!%ej@ z?Ka0hgmgX+)>FO5MBj$>7ZFan3Gd(alMG_kU9L2RL4F(a+L3;|ks<8YTE3Ilx-Aca zV~{5Yz3-)h;NZZ;%hO=q6XTfKD@oVWsM$>!hhNM}r~I-%SVNu1Ill9OZ0GMxnAg!V zRV>+t`q+Nixh))6MMZ5xw?p1%j8kiJdijP{{C$NXP^M+w`u$ef_hVhy*i^p|7hWo; zF+BU%E~^c`o4xSVFA*J!LsEiG!Tu;G&v0bx-&0&2R@RoVZxoE}8-_K=hPWikpF)XD zoK%S?YDh)@#Ei7EJG*#?>{p@EuJIpPToy&4R{zk`M@jm77Id~b`*p15@;$ehgHXWp zf)tY7K;i2INsL@+cVmE9K$1=U6@t8zKKKyF37P1G@2$#Zb420rp&_N7B$Yz?LqIT> zCkKXYk7(@PW)b zeQKSi5c*OSVxOz?x#d_E_D$?;f8UBY24S*&?WgBjb#b!3v2ii0@@V7wJT9t(^#Jmq z4DLYBf_{O%re#+TxLhdvYa9%aDiKT3ivSr1xl_FjOo-{3~Iksk=d*rOzg z8un8%`!09pspd1=)L$oZq)^mb>xAG700YGd z4wK3oc;j^!Y#3i_?p}+S9e@`;aaOr#HSktziA1se;X_ylE}93(lJQnH75J;2FIwB9E<}_MejDb zPXEb?C>vA0cdmI9XP1tJXUA(7G0>aHpz!JX2{znH5=jv}^Ce}gkb$;9Lwy>U4{ZZ1 zfK#N+Q`lR1n7A8HoTo%)mOBSvOXpzADq4K4!v-#`QX^E)GKT|DW1>yMh@#E++vlAESWOJ)D&VX283<)y*f?hJysPc2b}JV%o@LFfI$0OZ=}@^*R|?nR(E4YS*d!@JAsnx zmA4BnB%zCPn0?lJn)5`=hY3HQ?lnz&v9TMYjMdAJQXKglviXf%-+#pa0s{!QOZJ1Z+dAiE?|r zuAk%Q(Be0mIw*#n(@po#kJ5NixM=uQ^ag8?drxZg%4)=8*W?jCbmux!MHlHdgWaE& zWY^QTpa1N;T{yON)rKjb4gV9dqbl4@&fk55j{+I4!OvQqIoY}i{m9?!xkFQp<-r($g_p5_3kfY{!h&Q zSFcR99xQ)8rPKy}mkKSD8JEbiduiHTd*twh4d6gc_Qy;BWi! z&Ps4h#5C&Raj`xY^x$M!WKzOX)_#AW?E6B>3bU1wy*ApP_sZE(#^A(hboUZfskOvw zGnu^&X#%WlY|u}hwh&T|RKzPj1JWyNhN9bBAw}n_w>Qz>?xj|X;%=e!OC6FFry|GD zB`3fJBYBUYye7b5r7;8|xRuPEkm`1veh`~?;{JJwNRA-p+>f?o;d;T@eXstDWs35_ zX6pKN5Kl-?-F2t03iNuXpcRKF>~%DkcSzuWm-g7l;Ts+1p+1K~f;9LOj>5T>(G@QI zHkYM{lg=IAwzbarq6QcAoO22G-1JFlhCN*Tj-I&*xaMS|x4sAO&8asY?mIcVzM~Lq`bJgVNjtbn zQN^FqH+bFiq}8(53v@=c5BrA>V~#g?x^>aNN@#r6)zyXcHppBS4jDK8YXy)kUFnJg z#;=$EUcLjtgfy`1EH=@NCht3PPwoW6S{dOz3l%!6CX-_8(!{!kcLwq z!tdAT>^AyK&;N96@p=XO&H%V0b3cH)ojynF|9M+YAOFxdH8s5c`#yb|uueKY{s!sNE|W297p)NjG%E6t0@o=V_JorX;b5!+Ym6=ijM@TF zVF8GVzv+j~t{x>Oq&BxA8MZqputwrZv7#|a5L1!^1EYY5I?1(r{3e&l<@Jftt7xC& zzuE%R^Rq}38=wB6R0>>FJ33bHEkv8z!s3vkgIWm)zA!eb3A4yNsul`R@OW`EG8 zE!NZR8N`X$(w^ikXj2%jf(VHw^_W_d{uz zDQW`?K0@QO#)2F_agJ?uxF)j9p~lrRb3>4Rt*0WeTzZ_!3xo_mjiuf%sDv$nb*OWkJ}$u^?+x4YNY6d>mKNd2#@pY_$z~23$;_ z4v`nl_SFWMYr6>Y0Srjc30sCs05o`2@D^A(x!_wO_x{M7QZE*G0C%XojrW;I;cyC{ z2(tUNYqb%1l@HkieZY8W#^FCw73JbIWB;Re5M`NGY;#`azo*>4KkPj3(Vxa^zDz$; zf=WRJ%?sP6d#jFhmzFx*g7PNngb7yXfwvn{HQoBT0eHMT%`v!}4t=uz`MLbsY1g7=C8nDB`+}%92iJF! z|9RGQVX5eZloUM3WH2^1HbCek1~r{sy6h4xe~2@@X?Jtq_p>RN0uAPDx4^MOz{6qK zd8f31F|xxIKmSkdf1P5sL>cxHcf$vw7*Eg2Dw^?XZ93}~pO5~;*?|WrKWZg@DX47i zqJnR=yXr<(ES?npe{8)~Se#uKEQq_iYvb+^8h1(X;1VEsaCdiy;O-D4xVyUsclX9! zroaEpnXBQl?|7fJ_pVj7R#n#1{AY~cV{SJ%DbX+D;6mwY#qWmgd19D+J3+{)VJwl; z{Vn}g@96_T%M*BHz4A32E*2MN44d1O%qG*2cD1H+JA~rU}nqR zef4|rtFu2KDUI|S{CqpnMw{3y2PHxBly16ba+OYVrEyic_zShWUXS=Lk6aq>T=2P{ zd2Jo=KmR6cs&yGx@jXp~Ps@JZwvGN)`k%!@tNZnCkHxCMaBmPiI68Q`gj<}9>+qWs zIL2=E;-QD&Q(F3^Ib)GL-jD5|3e65@%YvKVaUTJ!j0O4F(pFR$|M(FA_Rq&O`M5`> z{QGkpucWk;@9UbVcvOUm$T0rLb0P5l58Ks-oo_bSLM|56?HGt{XSm<@5BKzi^qcp+ z+jFyB=M-P==dX_?zw2>eQ`O)ET=%ORp42>6YVNDj+tKIK)>D5?*Ce7}IQi$g-PH;k zuvZ4n&b(wI^nvbk3-)8aSo3LBu>a@XF|&$Ws#eKTA|fT*BgH|A{`u#D-;jBiYr3_c zOIH2X3znbnhxv1>(BF2*)_d0kkoWUi$HOWX*Rb&5*5^^~UV=~k+uyMR_21xc1Ku^5 zIsjWQyS9UqK7z&8jF<@sz=x)D%qqX<=H{lS<)g&Mz>VT3cJ& z+p4TxH<0Z$G@RS@!C#PJ+>sftos=ZFoQR2$eH^<-S1jjuM}$T24D3nZAB=K`LxzHx z@PzN3;(zew5~*1c9cmv5Ng3Q5@{3ewu!o87G9YVX2Z~4JVFYP!3-U${Ch=N3t*)_ zkf)phLkX&fJ}fTc4N%sW6QAc@)-X`w&y1#p&!FbV~TQ-Tx| z#W_+n=rI0PHW-9cmi(+G6^?~j-uurY+?~rFN_fjUEzdx%c!xH6C|UgK2@pJn;L^8* zizTKuVH}|TxX81$W|bBQpvNJyN|sZh*@A}H2c@L_z_TweDkH|36Ripu3jZ#DDqRsT zi!}?AK~ipiTqMzvPwABxUJ-2Zlv6r8XJbRa`(Jk;^C@R(%u@HA!XKr$=B3;7 z7k^x3k8>lt$t*Tih@$ix#jPX#J zU68&smPSf9tV@quNkgM3LQ1hql8EO_j0qx;MfT0wz|iWzCRHVxQF{?1^zP_Y@A>N6 zsT-atIbUt-49?s`^(>~&t~D42ijp1a;qvKqM28bgfq~Sgn>)M!{<2zWl+QMg}$6V|8$Qvh732^9$ZLA92R;Imt zyw0lDDi+#v9oFpsM;&#TgRz^Ij*hX1NBXx?3Hg=Dy{`#y2Lr6aA8P0~>jxg+ygxa! z4OE?M;lXR46Fgf=g)13WKNGBvSZ9Ua9AY>8qKDl&a$mjd)?=!JUZn%Ta16Szr;a&2 zBg5F%HnxtLZ^M6lVo{FPJVO+}?8G^`f@q2diYpv9d$kN#oG@)ecwF)d90&RfJjslT zHD1~^Ozih;_q^2onCgAtZnXQMbeq|ItNZpgOtu@aQ9scL74)OjObqk#eAMa{>SB;) za~DCT`Pa|2Ub&KHklAa3jQe@{p9wGk{uuLM&GC!?=QQCz&$(?L4vbIV7H@s3`QAqu znwq&X)%S;VFEjbkYz(ja z#NC$n2b{nuv(?tAI#{-jR{ugBGj^79OMHIzn93F$76f%jlqUK8)~&(=Ga)~hs`Z0b zM!;P{Do4FoJKVOJ^h^I)|4z&tZzQ=vw~D*Ew|2_;S~J-1IYR8sEdT#16ZOoF$6f^V zk;YC9sH&1@Xt%s#*5f`TUf#&MkHCRFCH>v8Mp(SV8vBdvW`U2lrKhni;IP6`S3i?s z=Dvvc0mH|u(#1i9zT0B+6#bXfSf_^-zv=Ghjqcrae5U13MBlyUuAX@%;gNp$rh2C~ zxzAb$j{>n<>FJ$Cz)z<@h8r%XVr~qtO>0>TtR|AZ?Jo4|eQ35DdG{ER7tPN^hX(Ht%%vE(7e`|C+5kwyV}S+aFsCF+#HIf65sDlWT;1JShkuEO;~j>zT3z z-R7(sd50fe|Es%82(y*Hb!!)M7p~gc+W8fX#?j52TQ2y|AXdtl+wH2Z84#TFzb$ z!&JcLYFTa7;2JFWXSLyy6C*-DKf@KTIt%UJ%nls>%`QgR0enOTFb^ReU=gz*e~i?HOck(7 z_dSb>>9%qtwKdUTZ8!7q|X38p-=8Wyt$ZK(nNTF^0pN|1Y3;PqU-$y} z!hdEYH-i(eg^_5?pT*^kc^$IbnNP*M6yMu1N247gJQ<9sv#D6aE3ZSjF7EF{~8Iu0fwEJtJ?{s%$*4BZG*zx6ojH~#lm6EF$ z8P-dT)su9GYtmkrc&S3;*`m#XU(8Ump=gl`_J<7ZIHc@XoFH0BRowHLNn+-}lB1$x zasylr9I*PxQWPTLOd;2IV}l4T8VgOY)G%j-F`?(wVbVbPnArT0pkiDY>U{%kFfN>g zus$WTc#C8IeR=8FX*1qnQH(U!xNui`&`y0QhG5-Bf*9R4mK=j`!;r1kZ$mo!Q-5M( zqn+b_`hqa~L|x`)mx{X|#{ENd6tFAnV4E&ycL$VnaG#{<;iC;n-z3yCJ1jB~C(>~6 z&NWreX1^1&^mKzy%|-snk1k{%FEqKMNDid(aQ!V^=&*#Mcn80Nj|ZkL)}RZ=yNry? z%tFnau)p)N*V~D#xn7#4j5D)Od$*6zjj?bynZ!&Q{>aBZ0Zb1ppgaoM`;VIKy|;&n z$hP70Kc4bE?S8(4y*v2WYMt*ipPozo(*3R;MMi58El_fuFEtHhPOJZ3Esi%Fp>c7v zhg>vsv^x7d^OqWmOnug~4~o2xvMAcxDNk`P^VDbShdGi(DX1fp@67ttEAoEEf4($> zu7>zNLzim2GvFWW4r2LeiotEscC?WoV4$LFMOpv)^6rQc0^ z$8Nn9mkRJAPU^0-x|Rw4`lipiJi9uykdl3#9jb}%XtUTA!PmAPZII3lbS zB!hnQv>8}$$3k@xrPtsQc?rDTV0iF*<0APOQF~jC6YdRb-jEO3^tywEKAOtj>kB~+ z0$uFy1Hf{%DLB`lL5HPdtE2DizZ2G~@$#;>Ve7W7pI5Zf!Y-h=I#Q&2ItzXVe0IIk~AFB4TG1wBCj`pfTI9Ol| zz@Wx*Q)-P!Z*?Nh+yZX*)3@6pzpzg8BZq2IYdy$rV=MqMC0H`6iDT(8c!+WS5EQTy zGtu>y4`)J~Lz{5pLc-R3x#BibsioIaijSYjKjgq=765z!XyXwD=k%r$Oc9O_pL_Js zOvnt}SI9y@O7y_oR`UhUzCd5cQM8}Zk)n#@@Ys<8RB`Ub8AiSkXJ+ZLteR}}nd2ANe9`l2wU zu683-sSpapWh3L{!x5t}s0U03%EDS1zR|w>j z*&#s%F+@S&NTRAALWbSH-ZL+CDj%gkU`qyqTdF38OIf_Wc|0n?As0@QC+)uucY+94 z1hN#hxC5d#WWZvBTZY0sW}uY4`{-Kuz7CQ0XLEqjx{X4;!K?kTle(TAvUDeB^@;*p zyohvGNM$F<*=<$3UQo+Frg!@;xaR5hwNuv?kM}s!h0!hl9U(1|8{V-N2DiT*JA3+* z+c_;OAK!5u3vVu^dE;Pv|N94{Tdmo-PTR;PaN1j1+%t^>&SdE=NxUP6$-QIw48BZ_ zE~U!)^FG#s5_0$JyC3-L10H*i$I7-mQt-QUuFq3rlzew7$$%X(j{Ix1d3JA07TlMS zVuJZBh1{=wTnZ1+#@*YfJ}$OJz?xHvV)!gll3jn1duDMm{hu%?W?fx!)(9e*Ud~Wo|Q&TkJ|>5-we@dMAqLUO-I^v?w~U|Ybi z{C|j5YB_fnxc9rj(ylxBEbsHB<;R(e2)NyYy^i~h?n5`j_2Fru2bG6kxw=a&zuDzX z&Zyr;j$DnGrs{7V$Zxv4AMo%Gnedn{Wv!P_(Ow>qJ3MxXj#4PPIbmGM1V&;z1y_!f zEBLym&gcyH)-*jaIR|s#K}QKnz&=)#IVigBkFBRYzfbcwLrve1Qok3mhYqEJfDiv2 zFoD!+zXJspV?=omH)>7b;nB%VJv}|=32tKvZ+#0Yz&v@ps*XuCI!fPvdU6fgW4B1* z&98>ZtCyI*_i1o@m6KCKANh3;w`hy^*o*v>m@Etfjtq`BGmV|qa3?!y>)_Eq`C~_k z{NOmYwXwUqORDvN9sTX;M=VLUk;3djmESvyf$zaryu-Nsj_v};OuuwKL-)|F_n^J3 zh&7L0Y>_ECzXv)G8&7^Z(Hwzg=dz9%+_AB&H#K^1i!9TVwX=KOvi!0d1EnE5LsLscQNbdD)3i~_Ya5uA zR8UX|dKg1Tu6U__eE{RGH-Hg#J5dXhimKuOfeMrq=9O<%?I=4Xj84^J&sxvMO=gm6 z!GU6z^E zN{%@PMcrf0V5)cwq-L&**y?X!()@^;U9fd%)(Pk6zuN*jB zQZzYx*9|^nTHIB+2~b<3Pf(=HAF;nL=~xtn}JTKQJ6DCYCz@- z=BQHQn`Et?2)zmpJ4xgWlNy#WhnI6WB0jb-g8?>KKceh+JpV(dM=BULoDc~ToL+s z9up(FNSSFtuKb@01B_ZAtt4r+-$l47XqtzXXZxnJ1}%b2C5*2ZzV{EtMA9HvY#-uJ zkUV8z_%t4PpKa=IG6?glwsIz@`K|ppo4Q4hZ5Qx3_)sm92+N^tvXJ((R~o7SN#mSY z0tr@}BKNb$qTWp~9MKq77&pybSwa$W&_9?ugDzBb?i^KO!Jd`UAR!M!G%&1SpxB@$ zWqo71io?WrS(ahrw%~(8n}6o$Wwqj7BjL|{SU)dS^^*VbAmWl!jM+ugA81g@nF+12 zk(Gt2v3dm<*b3COq@~%(fXSi5wTOqr+6Bz-ZLzj20|5HgD%1E}R6Gt*W@JRl%y&V| z4pDosXsrDdV3GHa*@9bqdfvZZ0grJ1m)G#h>%Gx@~S473Yc_H&%K@1RZNs zBBBtGdX;+cEuC-Fo7wfdxHNOzJYB$RXOkoF5xxF5)(gJ+rT=Tp!K{#j16U0B{rEJu z8$b$t0?(uDPB6XQpRv_JFz3FZ(HVRh;HwszDs2`&i+wZ)Yz*l4fXbiwS4aQTxXEXSp zIvfP~`Uo-yZFVYt{fcT(JsjN%_SgYmWgKROiJ62a=)>qW=wV??ly2S|r@VYfQ3&8s zVG@3OBeh22mz?ry{dV3mO^}dolMSIe!Lm0&?Wz5n?zGckf@MmYNs*8+hO1C-fJxkS zA^b-yc6s3A0&f>j1=nz^^IOOp2`kzDNWweN`;&er1Tx`r4k>#OZKf?}p>t^NS@U_0 z@4@MAW)JAQrIPb;V)8b;SL1lHz%Z37GW;{PHYg@HBUD8t;fT6J@Nb0zcrqEE_0r7N zBOTX}Q=l<7DDLuSHxX^1)5@`B;ceFv$A2`;a2)v&c+V0V(;flrK#TU*ZmX--$Ov3g zj{quh6|nno+1ZQGgWHBaIP>Ozg45(lo%;dQk&@vqqo9DJGrX?3E+47$!zLFG&pj&; z&+qC^2yhnydPK_p7DhnL#Kbf)IT_O#y@`OU#rn6xf8T*Lu9Co+YLO?>!u@2o2w8oB z$RTA$OIg=Q3JCzyfKU`FLv0}sj?h5W3`7ptCZSq+T33u<5QD-Df(qoz2vNN|WV{*O zN$igrwgh4KM(qGHP5v-it!S;Qiz(D)vSHBP;>sMC(r7HM2AjyHPC{zMV1zGDahg}(90c{8~ zKx2=YkQ31IbI&r%XcHQ;2GUMEJQhPJL{z+t)ES0b^diJ|=cG;45q8t6<2Out&nq5Q zhSthR{S=>GPH!ioDv?Tzt)HB7X---{R-#qMA>kqH2o@Y9G=4ythj?*X_$Y)#c0yJH zN!|)X)*M2}3`YK4tTJMVCz5$fUs)C1supI&z5|n#urDeOBbcLSd?H9ln;|GS5l0;m zJAuWd5s=b{(ma~V$ytJvbvH*KD}vlbFjaz>LP2kVpwiBZ|4zc;p{iuG?N22M105Ay z6wHV$us{+{H!E2bA{VqDoWa^C3Qv;|Ug6A`AEiuFc)9PjAT zQ?l|3iziE~jZ#213&ZG60xPe;*uiC0&f#ET$8l6^ss)I0PW(}UV-^gM<)auULl)(> zp<#2rc^nt-6BidB+u9)le9=3wmzzBn6&p+lEPc=}qTJl+kg#`mdEUf@OjSO?@R9l% zVS&jNSC0sV>3~^IwM*R+&Q&ZcO}RqfU?JY^#sB2xv@Zgf#H~t`;4U@bG zP1b#3A1FH(`|xcJjn9<{TIU0S2sAErYzu+VIi*SG2#$Q`=0rBtu`rDWv zA@-U-{zLzSPUGOYI&E$60N-U+l!9ql;$Kkl$RqZrM zo&ctF6Y(a`1?cwF+T7#0k0ck&*)KQ$iaaFU=*{jIa9f%%TKNB2iAX1&bBiCSn!+{P z&*Ez}f5$@;c}DboaT1v}%z85U=JQEt;PZ?Cwgv@VZ!a7_FnNYV9U1O)zm48zwa@Kd zsfqL|c|TYRb2CuGvS|d``sr03mV|L59ekfAuAUAk6djDoY;4w1@^3a!k)Hd^?c_tR z-(v*5iI|sHwBL7?{o7cjv2L^(O?e~@T8TzY6GcEoqy(f-0FovE>01+V?Ak_k@UjSq z?1(T30|9>rjfmNn-J4BWQVygN8~?P^+I5F~s|bq-IsBwZ63tm5CaF;Dkj4-!7MR&6?BR2NDe0WP?;OU7U zZ2m_gf#?Jw)Axcp0LWUdcT(7UZ+ zesEgYH{YH$wXm>oPvf?`%B!Se9B$LAfWDx-+;Ww=Z=8j=L+?s1_dOxAf?B93we;$9yN$G*O294BjJ7x6pz zhk>?6mydA}!iM9o>ST!^2;dK3ROt|7)p^r4+S$;=8J+WSTOVJ647gi5YlE4Z6O_aj zeQM4-N<>*sU^>WtyC=Z1529!@43Ak$)XmK;hcZSr8WJbJ=Re50y7`8k4W}2Zm(!JvZ zl_6TxeO=4!c_~eEWEK%+z^ze5@*gNR?$-5dY#Y^LJw{IL017pV%<*`yTA;CAZwkNT z6NXG&EcFkdk7TYO^8Kp;oMR}KjVP_Z7930| zQ#3WA|6V6_|Erl#iZFN?dOW)S9Nm~-j&(iJ@bwmytbfLDtTll*VZ|Qsano~n^t^YF z@E)5I+x zusv)%IS4|(Ty)<1CADUEUBl^w#M?hChlx{}huB6gjyI~WO~p-{y%@|GBQ zfBDVX@jAZZ_Ffg4LR?%2z)Mc}UB|%aYx%$mI>o2`sys&7JR0Z=f%BBo|F(%~`g7>~d0v$(Aq)T#To(+r! zbbv$f5XLIOFOwo%o;rWYB2)?5A_FSpu&>NoVZ(a>{u7Qd>a4y$&U$5{lp*3U80xSA zNfzl30h~}Ks(t>9*1_zu`O5TAlnAvjQ`A|#YtHqH+r^^tVr8Ua+t;f*4gK;#YG`z* z2J-P_`9N{2;AB}$iYl2lXMV=isUZZ0YiZ+bigNi%IA?jAnbi_JP_`HyTotptI|`D5 zKLEy&KcN>nKumd%QZgE|{Kkj}Q@x5J7B&J$_Dp$7qKZt_KM|@}lp!vWhcg75fDPg| z7JQcCZ}h5R!VnA!>-VByI%T*s-0#5vs)5L4#og2I?;&45?FkxEEvQvAVAZg2-1n|< zqu6x4|LqnHA_k!|!{FsJs|Hpw2HP_P^p;%Ms~R^B$$q7RDk^(*n=$(N1+fGrhu{xZ zG~a-MQ72TXim

STct&Ll8Fw7EWvrV;zhs)YoHfozk1s@2HJ`;fVM%m043um#qEt zD6bGWOnj^tKb+IP)r?Bfi7I|B_`aVYXSrNPtei&FE9Lm~YG2)x+z085c-Tz#ixwNx z|3)25{83;^klPTA%TkCP8yZK|?ym|}45K4~LEk5e zDV3p!6CVRrpp0!1^1WQuhxEP+%U}VDYgK@hkJLf6rU%hnkwsu9g(5(P_avi-NxDEwwpQ)+{}_(JkWh-9StW&#zB6vW%4WrB`VRnE8`?6Bq=xc^?)-QcM1-qieeX&*_N~&GwLC2XRm~=C>|*x?#os*QBKB z{lwByr;KB+f#}EyGVXfz_OW?>fipsrhw{?)p;3)LgIUg?DO>KMemEkFyZ|^~iY%x3 zMYG7k-G=-0R3{D<+{5{9cU(<_<>4l;u(EGoIXsU&T!Bvr(*|>s8OrlFbsdesZ32%& z4p-C^!e83`EN5*e^c^^jRhnq3V<&^Bno&1f<=gKLsYCZ1P{WAOlarHdui`;&Xk1?g zBpo8CK^yOWiTK z8y>-a9#7giUi-FPt(B>6hV7Bn(G(f2?59HlH|zaC&qw{YGdu~``?J>5n5El8h15CW z*A$Pi%rAXfwF4F_J2rjC`H(}>UgdYVBUxoUemZH_Dzw8x1PZ{Mw1@;1+My@_w*%vE z4=#sfV~4Ekfjzl2Ag{Qkabj>E!!6yOgdz@I8ao>34-R~sGD&XHDbl=rP+ezkQ`can z)yhaxk_pMCQ@l`ntm@?%m4v>*Lrc-X{>SjLFo{Yup5yGg3DDUhnVOm=HU>}IbVz`G zMVdF-BKybNvSPF{^EdV!exRCS^fz`6ZCXk%@$62P9}2G?$FZQ=jz8&xOU&k*HYK5M z%9~*MMS>*F#%#q!)A5^3z#@4|j}L8`ayiLf`GrJujOM&u2zyvocWkkf&dK!jlwlaf zS_WNGT)eu2=nPkGY%HG67(9Q=yKvX_eav~ot(N=bS&OBQ;IGz|pB>K8adBMy{3GCC zk%q7UJysb_P5jQz&i&ixP?uUb-(B!sZ$OW~3(s_s%j)}ZeSUS`@WpJndq$wXUkoFD z(rom=k&Cw2m-}iHd-%G+hF$Htg_T%R<&L`M2)U1@C7}xVuuaXZ(-u#&#G22NmJd*u z0Nw#5f(lXK6k2a?q$Vz=via@Qv-5x&#?E7Vo%ujsv0|;hS!em@`Y*y$rXG=XeC}Lh zBNicZ$sr|sY^?u;Q&ToWm`8+0g}Fwycc@ADl>-O441sXOah!i0s0$gxE)5ljN7(pXPQqF*){g35QlhjMh~fozT$Ihc zKh=P@SC6fBnh~=M@K#x;PZ;o#DJdO*j13k3M1C`G_vpylWGpD)|W%JqVCe{*ux1q$HO>e@cFFkRlHFb&Mj?k z&jbTQ$5s;g>{B%E{~%G;28MWEvN%IXav}vnctN;oxn!fxAc50&hUPA<1(r@8`;@*R z_7@!D=F6W)0YBvbG0Kr(k9#m1)JI0KrH8GzhinAl#*&eia0~bSnD3oKP5o;oClY1G z3E{2i@`eB{m}B$`QDMRPe`f)@IHW7EFil5f5{ufh=cKPzSUta2oAlRx`*QPxG1Eha z^9n{ccXNFHRhoegk+)oRR^iyNDRjwDkUaL>Xu1m%P6oRJ#*l?mShax4ao-ES-uIo+ zGow)3@yKQ%IhJ($s@i<{W&$FKDk_%BF+uzc>!3F1eB2oeu;=`*^4=4f_1}gNk6!xR z73IUPje#L^G%jNRRT2$8lBr8DwGwz&@nI4;*PN(Oz1~xINxMn~In}dI>1ww%v8M#= z@9BZe5#gRNZ3s{>Am(zjoTw*mcCaxH?3HL2`0IkKq)ujXbk**$$Aj;vQIik6;$Gvs zFuhf4=Xbt>2pg(s!41qs6Z}Sj`c!TbeYw^L{U|ayvki%cPI6$e9(vNA9x`4xAR|NQ z>njpOBU8y~dJDzp8JurU_H*s{| zX$WZr;p29c8B0aq(l#2|Qa&KjQDJ3Ez@k~Q%n9PqzQat?OXjKQj88ohE6b|74jlWIusv0?3^2Q7a5fUk=j*Ma{YD{mek+mx@ znF{vWLFgp(0m?);^fa@q1MMhj2>FEz&_+hr0mJZczH$!f46w z+!Y1oW@^?K#Spa4@fKRR2S>WR7sD`xy@AvJcwX^CzCACxt$mVH7bkD{s6H-#%pHvM>gH{ zcE^Bc$SqMv-l9vi(f=l!t-_3BdbXrIH6QWK>DXA@?hH zjJ@d_>tEN^TmMJpS4bG7h#{r`%#0WHq)v2NdsHJfqo zp&-L|l)+gE6i`df1yBeXbnu+8k3q5R=Xlh+Q1j6aoHP^{r=zj&ipwTAz#in~lf z`C6~|(q@$~_JoOsHmrE*6=s1AsnnzGyy8pAa{Eo3xY$c0Y>L#TVRbP`5*kt z9H);wK0i18czfpN;c;kU^W>fBL5Btj{j+k>VX!GN{+QTH-}#Su${`g_+FRMdmmfd6 z1tUfBR&hIh-ZC%p3MMk^DoZOXPpOgM8iZgOvkDSlPh?$C?(pI>Wz?Dzci4&G&{~>R z>p<<-xa;*U2F9qGgDTBCj&3IC5=j@wbm;X`IFLT|fyz=fC#bG^9q>M5= z?6~))y@h@8TC=cLJ)UESk5g17^GP(%-I&~z3&G8~_Gw?P|B1bJ^nhvh<@m&kmW@$; zD3lrd1oWf$g?Ew^?3m;~<=Lb1D|IvR@(pNkE zC@3i4EC~#8Fu*@KJvE*MheDx4gYT>!C6tCmi>gtxdP$W5u#*mW33WcQKqPkTUZncs zB_iNYPwiKr=aIhm+2*fv@-G8j{YmPpw%bFGo8oJUY5NJFn`49ioz7{Fwp)&_R_fu< z&%3Y=P^pe~OB2m+Yi~u5JBZh2p@9vFBNeov0xLY>i+n%v>YY7>dNvkn&LVAzp1`)5cvKwhn_1yxi6&n{&!I zL5WKaJ5tu#>^13lLjny4KU`5{V>NJ&XhKIThKblUa;xui0Aqk;nTNk;v+YlZBEA7@ zVfgh=r^N2>#H>ec#Z@qS?H)JPH4bZ}&9{?Bx+~dlyr33WRQw0Nq+W@}zJ(_kY8f=n z7B^yI@{MtOVT$?rzT)ZV+X0_fZSR$z6PbLmUfbkP3vQqv);}TB7h&}{ntubT*#!i8 zYbDxOIz6c4E|{ptmK;}T)acZgS{WT0N1h}T65iK5UPd?02~LKx>tlV|d4@yLovtW? zQ@2Jk7awO*xfF$OeRzF81m9|=H+mmkzu9X!|9v|w-iM(}NKLx#JontT_CCSRJ@})R zdi!ALN7uaW&OrPH*NER6H2idP)HO2P<$~jN@;M#m_sG*>8DxICf}&KtF(wOcPVt>} z9Y|d4p#NFjSKb}|&#zbIu}G8e<<=^@T|~`2m+=m4qMZn!l*7`|)r}}XC|2JSgy=ym zqo-&3OP0>BevV))5l>)`NE=bhI5YwiMVUYZ)B=3B3`Ut{rMyCmk%*^&85t>r`mGa& zi^TVio8CNO zjvCUEhErw4%72Mrr+|?}ZB-3bEzn}06e(<|z{j6Td<-KW8UxY*xx>}{=k=UX2p}P_ z0!L9UOL%{+3Z+38X>H(JG>)isBA1efEEj;{LWVhVHkWWJ^#{3Y}}$N z1bk{q!lT|+gi6Uagm_tyy+sE+XigYCnC>RB5@mFh8M8cLv=U#%auikx8T6 z4o1dv9Kt7Qku?7#F?1tpk-`+EqE4*;$v;{ibUq@0L>Klc5Ou`!06!=Yp}}ma)h1k@ zXub=hX1LnJ^n~<;;yoQ;Qj)0p-BUKgraaK^l!bx@MJqQE0xJm%8Y^=l7TY)}J}np! zA!j2`#Xdg}xIiq{$#%Acp-hdm5CJ_L0i|P=tZtzZl~nG4L30)yk<4!&$QS6T$D%h) zZ5EGkYN?o;G=%IBi3)k&;A#GqeCeSPMg=*`TZBKS)Rn#cQ)!@59i1+EWA(~I;A$?? zW9NI1rDGQ7&7g$6nbhRvHJ8AVjYQD@fu}P)mOrYpA6VIWc%(>%>+3rzXD)6n1V7=b z4W1|plW20%)AG2RlG?lLvys<0nSY(|4+hRgNpiwbKi<4o!PVPf;v04)ldx(0^*G2& z_WY;IB%c^OIw}`SD#%-9RGwVW+Unu?W9LB9VZ6%wcKmbT^KH(z!;v7#2nm+6kKKDZ z?799G=(pm?Ds=SDO#ZsOmfpNqzy2`&ITqIKT9zU*go6}ZJu>U%n;W-&l<$z&}X#~DY?z7E>S@%m^P9Qa*X9^wpN*^W| z%)aCvN%-Bd8q`;Yv`*5!s5g6cO}-nx1IeHM{K}I%%R}DBPy{~633uLey%oYFSpU5C z5*Y=pccd)U@Xt#6cuWV`yf%yMXbN|RnG_T3&AFNf>>m2<=Khy}^r4Mk@Y(6TAGg)3 z5i)uHHa7h9Pf0{+g|PDLEx!M?%xCPh&5NLLW@cR0qk-^8ZF9YFSLWBx8wTHZ^2goZ zgR8+s$`t6)!Z!}Wt=D_r74~S8<9@MmK6e*-bwy#|(C&1@k6YF+AJ_BVw>hBOC;X<{ zu3t;HLrYoXbtP%0Mk4KQ=&E|YTy~v5gzz6Li&|_WL7+F?%g@u?TRykRy;v7~hn;pm zaO~gVY%9o>*YCZsstoE4xaLH;lxicXD?(sL;FQFGTH|=JP999mI#^JO$|3 zLw-KgzMiDxe_1Ylo_O%v zR`}}S{ey7nbJy=^gU@C=arVts#nm`UPMr$f6sFM0KF%*&o~ssY18cjVN`(S)U* z9;V4uRn+Oaz|!bcR3f|c5|KU6F^17VM=Q`NG^I|QB@7~AV8zUOeq~}lIJluQ`3J14JffY&}M&c2@uJ9+k&8KQg_oZ&{&7)uR zme;&EYep(88zth2kU6Cj{&~qy=V0$olAEF=(@n2keo8s{WV&I36~2+3qvq>zab--uwh4Vio$Jw#p+Hx#+f6ZSj~=&mPc9{YCGxwWbE`!zL$f^sC%1S2Jh1KB zi~Uz`es4k$7diqYL-C<*fGn)?DuqT)dA@!ip$#+z{`2K481fSMrXCOFqJS^W&TK*uYvf z=fTVsbqtTmeRUH4jH&Y8syZpFq*R6e_VqJX|;@!B1{g@thbKD-m zsSwdf6e1)e*gdiH%VGQ6s<}-Uy7kyHeB-t2cEmmVA}&soR~0kvR~^>0NwyFFndbLA z4we4|@e9fmpbth?!wa}Rr=kkNy@f@7RUX?&k_>|-6_e=v^7059 zUav)L_|r0Cgvh=8=j=>mBwdDIYZeULUCz=ujZ(kUZA*B+={!q z7AfxT?(XjH#oevIpFZ#Z9b~N>WaT7xu01o?>=~&Va)Dkg`@Wq$$glKRk(fm0NK+JC zrPsM%F!bCC?5R(#!zFoQzY*m_I>@&YnF7{wA>wkyG@}vt^gJ^H3YPdNAm_^W@kvyI zazs0j?eSnCba3{p@Zk%P>@^H+%A^?4V()_5U zHW4XEB7ga9c8$GqXexTH*hK>!8G(BB&}dbtrU9O^CDotc2N>b5u)BW0ISCC_9D^NU z(~97B=h^75{rh0R3;9XF(pKWq8dg>eAi@)}!~)QO*J`y%4r-kg4yE92cUY0V2GT*j zqmY5Qv}i_XWm_cdrbCjh-4n{^HX=us&_Dgq4cS;OOj5=j8pE5;w z;}r5b#S)Sa;<+wr7wA6DU}O7X#RG;&Yq$ucAJ1d_|ZLlOn` zN8YE4x9s1I{HH8dV`cm(dQo#=`np9N%cc8)&XNgF#gC;Jk?M~v17TLNxEMH9xO9^V zVN9^N!lKTBAsZ5IH1RCfH&Bqvc92@;Mq3M1fDHeIL-j)s+T>#Fq4oExi^_pJ2v(!I z1%06bK8%nl$>jYKEB}4l!BYif92liNuK)dID|_M8L(ao8_KCrJeJKo#mGex6C8x1; zBLNKz(IJ{V4h_m^kDLN=;L**EQmbddgy>2oL2T`@foS;G$9yLmyCOuL1VIVmR}xk3 zHU%JnRSH}r-UGWN`0QZuZls~xECEp^7V_b&$h5%ybQe7w4uo;J^O_c83*&%u+#~l} z1k(^eG5O@u;9QP6Mb3`1ZB)ygrs}~-_%3ad5_OkHpx59v@z30En0N*L|J#8pgmcA! z`Z47c6cpMGvr97`BZLk|+35G?mNSNzYN=M*jX`?V$o0@w-9=sNg#JeFZHh_F7BjnJ z();!Y2MqjCo%4y~l1n$@7J{CzMI~Iny*9O1BT0-gB_$;eG4b$P(gA)5$l2k)w6yfC z;Ij2#*=cc_@xZP+9h?N!u_)^NdTZAOA#F*V&Tcf}y9-(Md*R)&r!j7U9MoCjZL{jJ z&PV`t$?7JM{=50eQ^n!m<)+!zTlNlHa)|>3lX_fID1oj3D}CPJMR|<2SFM_s#HtGN zj6qxF&XoGgLxGQ<2b$WU=%*Qdp0P6|FTvxQtDY>gq^JC`rkuWUg50hNWtjD2Qi2m5 zuq4f4jYrP2m7HH!-JK225j;z;((JkE zSam-Y(7DNJx24zX#@+WBCl5M6YgiSmRBPVN%t;K99-R?!Oo&Q^RKe!xC2Q7~hpRo# z&r@(84>}SJTzEpK)cS(#U>lOv^I*E}m%zV+r)WdjMbWe{ld@WZ-O*UCNcdZ=slIJ> z(RUh+xwPVxRdgyE?(nUCLSSYTVpUcYDeqHMFM@*K zO}Ju4nkG$G+mK%oPNA|HSFO8FoV!=!RsDF4tE6N0@%}SF8gkQIN z%%IvfQ0*JH$33Vu0ZIzn>h0|f)S*g}J>7l&xehX$eBbHJ*7$h%NZ$ z_Vd}44VchbGD=m^<< zp&v}>3acs^8$63)(PB`o#EDvj%q-V@N_J{xv(9x=V7xozOk8esl{<6vV0in|{e;cQ zY)Yt503KE#kb5e86jv#$$=Mx!m{s1o2hr(uKLj6h?^E>tN9y4TQdpIWK63rUat=r? zoEuv8i}O0>(*rq$nCQ>3SQ=a%nTE_q=#7GEJyEr-7leb1{E0wii>tELQM>YlM&p`<%qMMf)kSmwY>=l+(#kV7c0KGeTeuAqI`;tu3OIkc>%i9&omee)XkG8?oUg}CmvaRZ@6`Qy-~JX9)yY9 zo{g}w>O&S&8vGH%>cEs26kHTgbmb!^Mr>n}4{tJBk~kk5XV(jEj`Ro9KhHjgY_jrB zI+h)0gYQ&cSV!WJfIqDA%R8f`V9PS{npE;dGpCv#rg95gg{pW$1pj{FWq4Cxn%13s z%_3;U^>`!wL3ye(HFp`e)Cmzqz}+)Hyz`fVT5ekv;(T}1MzP-Vjv4Kc)%V0}Z5@81 zp_!!Lr;YbbbJe`XX&sB>a%H&9<;dx#4LrCByIPJR2pB=?*t*fs>|cN3e%hJMQDj^> zh$G67LIykaVFxL-jR0DQv}L*&4aBb1@5Z9_d4i_^Z32Mm8Q5cm&~6=~ZU8rM5`Y34 zP#Q2WmIw*wpxGNtM1!P)lf+7$4$SNsRXdU4y$JAd(RT?tZ z^$V*99FdIfCBRaZ?84+ZWt7~vTlc^v)EOAxba~bvw zyB(0Qz*ee!z0w9kTr=`rz2W=4iaArAfmhF(}na3%w~+pcp`g z!X#K!o*<^4P%s8a?2s0wzF^!8g=-a(a@4t?5iv3&{~A*DnPs9RQ4<2`A>pC;a5@(I zQg*LhWxq#odLgbkXoDL8=%ZeEdn|o+&0mj!gZmY-zm_U`zbDdu(T>yD%b>i*B6myk zV{ua#-cdn#8X}1n-*J#J>i>cdwL;x^nT;7@Z8S0nr-cfj4M*f>k&y;ahxW3Fi)i5( zS=P|@QcG6Q`PW(4#Em7u%3{h67JD{O#aX69kSWy)xxs1pYwVEgoiwaURRG)=d))I_ z#15Pp%escT(Ych@ffwZ~AX5Sr5s9!iSopxYGooj%GB~NBQk@(*8rp6cjmiUsQ4y0s zCQ2Ys{S_UJgw6k(MRA-$0J>(e5M)b_F`pEP19*iJ4qr^reGJ@A%bc#vF_W8ljpj90 zBvq-qNmwU|0~}YVJv8IoXK6CDCEr+y^F$immOn&P2`LmT$V#Jz3R{UPvx7vFTlC|4 zYA;C~lIVoZUvWkn^H&hHvTzt(7$g;u^E2r=3#-w=%v)ywr)2o43k z3S0p^Tme}e9u1hG1L;sfU`9_HwodsEkZBL^c^3HV(_A2X>RIa{P=N5q(z7r1^XoLi z;vt5qXHHYdZs%|tt{!w5`&^Gn-*b>*g$1<&kBq5gA$+)WxaSgFMmak4sZHu^^2xY| z6t+-LF-r+Xj){Cz0$Sl|x{$Q8ss(nV2I03ZLCjN{tOhUr@vGcJ;hanU90jx*@}&$R zw6F_@03qu==j;JBc{yvNGU*I7C^-?MdA@Mz8U_kPkW4wX>@0Tr{{1ZN{$20etzt6| zv(RD(QbCYEUan995lqaHBIe`SQKlS@RKG~Udr<*71@6LF1uE*?EhZHJ{q**tqGPy| zh@kr0J0A^H9u}2}c5E8~8LASm0B-&MscSXX6&UlGUiRM7G{b>pH1=PZY>wT+Jfz8J)BJzP&T;|M9k8dlEK`|owQ!{!HvYC zH&{JQCe~Z!Br>zM$A0p$i#zViHk#!|NPXK|X8Y(@(vS0D*WRyW1Vw(xDbZmSSH>^> zXKt)c4GpSs!j0rfsXY!@r70FRgBm}qFjzf*W)+_i`RuuA>_j(qAH${QaU*VW(W72vxethyM5Z#aRRo^NVxS=~E5S zJ|s@xvFz_a-ARqHX@$t(}J!<*EY$Q0L;+9TRsnE+hZuf>oHC#wS{tplD$w;V4pw`9M&)J5Y$YnY%=GS{o zxm-_X1arymFF|7fwugFWZ$a7V%sh}`pHIW2?1!?lzlbaXy$uxUkE@ZA(92(&9cI#L z39~uiw`ucIOSF>n85C?TjC%yUgU{F0`-!Hs8fb^>$LAK5DR6N`@RQ%1yo(EEgeb=n z5XkbpVyKlY-~h}=8(&kMidaTB5L%2b&qwBSg}ORpw^nD1#rmFE*H<1deD`IySEaYV z+wF{<+3m>VW6j_$z8;fvC5%C$L$qrxr4-tqbvs>6(Qo)1C3f8XjZB+g>MMVC+Joui z9{%6e4@4v+`A@sM7rxES&HMZNxbcIyTekiG*vSqRq)6C;h?Y(H^rA&&_ZnkhA5vpDQte{GW;U+$J62>b$o6qND}#S z?|Rk&n$kb5z<&&Y{zmi%|I-5og%2vt>YLM`6{Crkm79zx01Eq6u~{;t^E9a4slOYLN5&UweV1}QP8Wu0XGw&bnqjiD-==~|n0FiJ*C zGMJN&WVJ9LDLf=CZht74W3TpWI{2+NnMX?f=_@kJSL0j$&W1+zOr~M_Mu|{E`l*C6 zvMgvrKhV_?W+pxIjP8u7Exk5#U-$?BZqxi$-pDB`<=8#}F2wwl12(i~O(-{;TU1~m z^lCZU8)}#tp5aS}X7Y+RFNex;}TuBfw1w zD*)18qgOD@2oWD=0DUkKMcWT<4OK zT<0k_kz@ohGj0+IqK4s!mRt$c!VAGbVKhj_FKu9KGb!`tSGvE_tX@dUA6Jj=xuc<_ zz&E4cU5-lsr8JCJ$!Z+DR;te!NL|F2{8X(121FWWF+(sy)$Q~@MDeb6O6V%}H~L{1 z0FX?W5bind{iS-1Lo~08a!eX9M8+&=VU42 zMvE0lX6NT+73ai|`Dw6J*j|TV<%I*{2U1y|ud(|VXAK}ZMVLhbc4VlLa>b0Sg)+}Y z>VDV3C1+Nq?X!xSE9kiKOXBA?h*&1g!KbSCKbtVQ0!YHXEbhcBy@g|>N*P5M3z60j zGYlc|n2}?(z>sPO6XroiLn1(`vcXVCaBCL83+M=uGC=j8*-2wQEPQRU4GjbC2@OH- z`Wc(JCYXDS-}fs`fGP{JP)Eh+gTM`|g4{(@ZFZn!m>S^zK$Dy4z@vfZJ#;+jDZ_8^ zn!zFPuaT3}$Xmj4qsazpM{@FGv2J0=Aw-Z4h84+;1x2GtiOvI4z)gi+dgGNQ*~-}F z=1S%*9#W&QA|8Rbes0(T#>*92PIjtSiw_E&*-w!sURWLYr%i)6lB=94pl#YMz@(KKdU7cL3Z5smI()31xif~K!3;$L)SoLi0Kd9dD@y zPv!y`9_Sn;vprNwL1~mf7W|Fe!vxd@Vbz(G)=3c8fnP`;(r5y5-iAH{YklHs2)^MW z-czUv`<){|KSF^;l;t7z=L#5Be1p@zQ0QYK8n%EYL2LA3jl#1E3N^$LDZnifEg2tR zN`mvRq2pW-TtF{aiz=aI=zCT)wK!=Z@b+MENLGf4;r;ckC7Se#yU)$6%3gKox9j+B zwSd)9g;xC`M1Cc^==Bp?xat~}Po|>J`S*_dzQ~T}KyC{Cz2Kvd zkZ$*L>8`)`#ax7;Q>m^&iB%tAvVtGTzSpP5UJv_3FvRv`M7X!#fpRiU6}uuAH31x08s)xWImmBP&-UyY;Jx8vAWxA zHc4BUl)feyigagZ+90SvoZ{>Dx8x-M7A1w759LRDuI2Bi!L#iZjuxMHp5^ZFc3XYDU`D%uCOe`7N+JHNQNIW4btyxAKxP4_W24s<%2 zx*0%MWJ;4;3h-wK{Z!myQH#$uENL@M^mi{AtJY2TN#4yKXPot>kfRTtyPbXjvg17- z2dBWwV!_@M982Z3P{Bb~6yFH*kN)>L4$oirb9Q&pcAs(G9W9fi`tPv%?+b8K=L8SY z=6#Q&Z*GT?pkVnk-(Fe4S7zV$%;nl?+biE{O#c;t*o>AUZ>HmiEb-AF4Zgka=Sk6{ z(B_J`yNkL%kHYa6hkFzL9!7S2eCwY4e6N)KTk*-K@BS>EzRya|Jt1{>8_ZDb~K34+e(n;V=>-+z75ehVY1~>qqaJ$?ivC9swZ>Rws;^#hD z9;X7Ag05=i4H|ew0YhS={CBu3r?vJlXVR^4GVCHa(UB-BNH}MPgfxT z@cEOZ%uGVZsSekeNT1pYpRdGM`JiWGaYn2hb$m0*(}gI>iQSBYwW+x|McGY2DvUSR zq`(9=2Eom_!hw&6RN*{m1%dgikqie6GacnETmJWe3YNOyKWGb+^crmi}d3fkZ7^tK%2@W~WJCp3@ zEWzgz;n3(x>;;n}VgMwml8|>ad~!*0UOg7GG##`~Ovm*-i)iv0u>b^aYBWp~nV5(O zRqyPtry|!vIl>|k@&iSv=>g41+-kc*$WZlP<^tH`!0Are4|_@QdfiN6|{5j>0}{sI&ggYsfWK(WV>(1giys64A9z5)DRg_(h+h z%rq>Hkina1lH@7M6U56ca&RypSjBqSh0tp#_vC>QGdpZqmLbNQ^QvC4!RNaIRPzw= z)JV!18o!pv0H`QZ%6tfy0ph)~(IbW`6tJXlB@(wXBw>LXik<9W7!wGWi&hSf$CYs* z)DWYjrxKz={3Ho*ffhgp@EZ@&4uRf!B_k0I@Veb+34a3%Yyez|d=84mh^91#E8YE! zbBXYTe=0<7q=hAoo;|aSGo4!}vK(BIoa4B}k6E2)R?$%demCeSpafZ8P)NyQe*o*Q;PB1 zV7PT{QyVT%s9#DoB-Nl9438JG`^*~y4t{#&t}2Fv`?-9x=w3b;vuoo5YuEoli>$vW zS>vI*9nkOjJpSjfeb}?{@F_IE>$y^^-fZ$W3Xq1vtRA<$y?x@y$+2n+X3G=YJ|>p` z4UzHvUfkysW_y>1>20_9dDoSAU$M$7qT7j>;0WWv4Zhm<3QF(!?&@xc>QgqW)joMR z_5`v`6$c8!b#3@5gBA3ZI$d7wCtxz_-0rvJ#R{1o+k6Dopu^Mcwd-gQTmR3)vOY-q zfOu&0e%=jrKh6ae(t(CHw2BNJ7J`Iv={ozG;Eyebm(byF#lvUzjT>Cv8!SrE%a`p} zT~Cutf`tQ(R?96O52q|Dfn|=b_m-e;T8C*jEgh$3r(c}a-J`KvknNv|e9pI!Pn$pM z)qw(i5atR_yK14`A9%}-Mh-12x&fZ-f8B2eu|W+Nw_v%`{V1Sl$yk9gXCB>Vr2h_j ztRTS3+L{sQY>AFWdVG9D4uiSm*eGX3W!=|fa(p*HFS*2om^ZE%|9wJP%*Z#IHtPCvd1iOJrl?19L!upXY9KIK zdhzx$5Q@iP`=Y%gq&OPfKV@J2_Z+k9Db#mD@pa&QgDk^ihYy=0hQ(U&w~7Hp4#Afu z&*+bJ-vQSxQ0PT>e`4Z1-^&*{I}LWVHKhA{vW!d)YrT$<^V9JTaJ~_0JWc+~CkLZ9 zx>){&YX!E=J|$6T+hLL#j+w08uKF9z+J3My-2;+z@Zdu_DpwBf#$Qb74lZWSoF`!L zrS}smeg3LPt}EQlE-f>>ltDd@j~sU|UMHg;wu75#(?{c1@cXQLdhWzOtC6K8hE=H) zvrLSQ_hZi5pH1^Mkn(j;3;)1lL8YsSRwyA#`|n85Mf@NMM;8^w7o`kQAh02{5z5aO z7furDl^s&e2`a(^$u$~eFQ+t{ekw=}o7WqArGqMg2hg%e;^=wtEI6ELP_STyIcQ;! zqyZ7?-`QMra#|8)b2Uk@i!nqOa@iLMQR7Nr(E%{1u;9J9CUjJCo)snpgN^>MbJTme zmr*=bA(2QS7T{H3>c(^=evnGRKut1D#3aCIE{T<>!!W-PYT2SI7^GEcx9tA&LfCI1 z4?LFdS(cmjK|zTQud|S&2$#5={)kyUZGMx#SP+tQ@)=edcIE@*5azHt)Qp?X!56k? z7E6_JJuD7Oo-9t`Y)BEGrU|I%v~#T>BAdkN6J3~&uUJ@QlMoBpt@kt?D=>6C4g6Vz7KgG6E!B0`#wdr#D~!dS2k{gJagSTFhYcoq<%Y4LLf?D)SyG=OG(^~7sW9(F}w z%{Plj_=}1e26!S+Xr*XQgA7M0Uh|ffObn?-{&NE!t@gYhPkew!UZWT}>T&8QfuX;& zMcFDZ#%q`{A*IERz? zy&wUA92_@kse$s1hJ&&k)UXg9{p(~@4H>Z;gr-Lz8Dt%HFN&y`PwoXU`pNMhU1LN{}zU3!%N0pOU4m{+kYv!ws z`KM;;8w)eeKE(LzMsEmDis>Y^9@AG0*4KoS(2_6S(xMA~S?=8>eSSX5P!O9LowieY zoW@I&W%dRD1iHCA2?A>V)~GyxIa+=W>u-d3haBf`r_3Q9O$+bqDJT_T;32-G`2Zl| ze13`04>&Dp?ND}^UM_*d*myALUO$Q7ILV~`Eu?4`BAD zNH+6;eINr)5M;hzjs!zVzj9I)?3+m3D+479D7z#c6omdnMtJxmVQMu&RgDYrub#YPK+$_2M^pCHgnC^#9-{!`C z1~F!Sg2)K;#9QRwKcUszqH)K6^f1@I!+ke zzc;Mowa+5hSv&ctO1vL`l9U)4xe1k|^6$%v1>b4xIv=pSw;>vBuAlc~oj%{6s-YnM zF*~PpP~8S32M0$OGfSpI+N2riRDA8~E+2rnjPblc{K&!e+e(j`zOmsbUeDmAC$KZH zyT9Z2dHfr@x}LdzLin8Y&mu)$TSBKD?d&h}s5u7GU~G9qWF(rUJUg^chKCaG^v?6jtqRTwZT3RX z{2;I4cc)s{j7{sY*mgz0g9XbAkW>0)98>uP2`M0fnPn8kwMlYETHqxO`?~ejc5%Pcs>jmaE{6>kNd*HVf=e8}2o94PM>lQV0(y@CbcU5`xdeg@Z!Ui=HzHh}`eJrZlcJ z+-pfbofN>IKw>vn$pC;@1xD%9k0zEatfZ(S!~_yda(oQ&3Ss5A04S1S@aP36cqj}K z#H^TLYlTr{jpNX50okHusT`$<_`3M|T{v=XZhVAk>hQd@k{B{KqN&|r(nG1A^y@&9#k--7#RD?kZ4Gz@EyinXQ;&!#JdDYnp;7g{Z^mdP1JD z*XE!s@@nEl%!9Y!I~T)%3<^s^a51IEq&RfZ@@MW!wJ1IO9w4;;1mC|t=AV)*$|$8^ z<4L$^}ToQ8A7DEVHfqpp*kN^I)0>rl%+*%%Z|WS zccDgeo9zD2w%GDgcfGrgQ|(&t!Uc0T8Hv;J8YJAW3e1HPLP~1A)@)OZsoaAI?f@8! z^8Hno(mw|^jOuhVDOmzf&Z29vz?oVp!n-cxtV6ojK_J%gF-WHh&3z!3HBrQvHenHW zeCME$_{EIF;1YU9QuU@E8|@be+{w2vcs>gR8gT20f9a#9#u=+Vl*Yt}3V=X8@cUv2 zKn^;vXF6UMT9AVEHwzO`3VkyQz_TA(aVh{Gini8(?1R z1ybTPm)h>v=#!2xMK8Q#mJXLU9V6?r(sh4r+2*?vauBpI5zNYhDWy1_c4-;dJ#igJ zU3ZUci_NZg)JEEGX7IMNH`gKs9kaJCu-5&@;5z%*NYV)FxrQ%}p^zaXM8$o);hH)8wm zg4UGn=-5#LFN%7h%{s3H8`Do%oo=5@&~l`3_l6pW3pC6VNB=;~a@UwmZvF@6olnqG z7<7#|(!pw4TSr@+j@&@30EEBWj4xTOmNww-y{EUfwsy}chH)>5iS#oC7(TC`cTf87 zB#@6{hkg_*X8TO@uWpo0>r96U@J4UCih7vez&ilg%*K5^#IQHEZ(d#7k#c}+!apOXU=(7hv$0VcaYcQQU^xYoBD&+ zz}mLuy=yeag9k5(N^y#iCYRu*1YQA0m`$-83jG(#Qy=xkn$Pci_p|11sb#2|6x5B0 zdcU}!iCXA`l#P@$=5}AfluF@|Tx>Vvb&)99q#m#?Loku)^LpSIye^mB9|wtVzfOOm zGWy=RKHn+Y-Yf(XAN3cLC7pG=f{fl5cKp+>Bmdqn#32_06cW>6txtc4Jkl}MLG=* z$8jV1Bd6mRN`&Ghca%EO%WFb%EHXJCQU6K3KM!#2NRUs@diE z&`rOLi4>b@SedTjxuKU5X%I6dK#Ec-ceeMf5<{n4xy4F?9bJT^Y(@&mLK2xMk{c`Y zqqo6h0?F2b#poxvLVENJPM6J}=%}5bBFXeDq1d9J7L+lbN-9;j5_7KM5GX+94{3o;FocoBqsUcojL1E>St_m>@*rxU4!qMe#)Z~^n3^c+z-jGraNeN> z7<~+uB>9(+@I9g%>mVE{QWMk!T4rhr=tKFONPoU?O^gD>Sr!r56YU62L`vJR1@km& zH__-*Khe^CB1DuJ_CP-%>=HE=L~D2nR|F@?9S zj*;NlN%317CbMue-S$D2HG5ePHP%RAOQ%$Cfl`Ctf_dZaf(2}UybITi`DxE}Mxrp= zPQOyg^sO=hu!^0egzh``I9dcS&`$_?qWWdbAS@PH6lADDg#OD?pezTB=ZcDrhsYbE zB@f%Lfrmqehp~;3m?0q2ry<@~dcw&RGTvk$Y_gLg?e9W)J5qV84t_ry%G_O#cbb1- zGehd`b#yEo5soQ=Xjw$N%IH6PemULJe{J!I8Smz8ZSR%?VRg{tcO86Yb`{8G1W7ns z!_V}Wo`0kT9u63<2eRLh>wShEzA`$!wEGTl`dnj9*|24Bx!})2^NIQ-Vt?GbGfZ6` zEl_p$m`}mZH|t)%i;Dfozd}YuiXtTBwB6VN(SU!d4#eOrwLJ(9UK{-zn8Xg%$|ejT zm7-8KC0SwyBqxI^03CPxpg@B0vV_0Y62YE*fq(pW+`H|m|Iq_>Z?%tImVMTj&L5mD z#2DxdSc~HtQSlcSHVRpMFI{5%n)QcEkP5!f^lP^@;pTsYm+wYXy8IVn>cjqsO z)>qaw+%}ehq+H27Sa-ObReF14qPnHXoEHC;l;p z#c%otdxMep-mzgdpt1GuT>5)6oF+$k*3|otTy@Bb1>umD$t=6L9=tDfMnErZ;17Hu zWrwQl&v(eclV1Q^V?F14hU>attaj{zlY+H$3<*R^ju&V`CR z;o})%L(c^#h=Fey44bDUFXPw3R12IZ8v_N$D-(S#RNtJSydyq#O+ID& z1_<(eeHNwvv;4BP{27F;r(o^-SZHrxXLl9jV10kL zqc>>mz0F8AV{K!@oT-pv#2Wvj!GY7rY{IPGGzeC`(DJvZwwsv>84`JTiI6iDRv2vu!WLk=kM0~S*Tru!$dkt#%=B^_ep@b57aiT*;efAUJg<=UX9tfIO1L6|AR1*l! zaIX1stBRG0B0V7wjXXHtFomHNA90Hx=O=G$QBEE$+W3V?8Z_V88Vcvd92KN@n00yRi05jt_4E^bN+;1*~2@j+h8 z#SwuWNOmK(RE&tjAAFYx0gPc?z@n}QGMT_5XhwG0;5O2PGS~MeN>OG61F%NFFvBX7 zKnC7Y&XG8oT$*5U8ZpiQY;ZksCcQ~4S>{6%ERtMjNP00;JoMjsmO8M=rwTd(k>Iv* z^eT>`$T1;!HU@IFpu&zc$XCjQVx%QnBP7#Vr2-O9(d%U3!^WI26A+w;umRqcIwss$ z-6;^N49q78Er8dsV>cYHt%NIeFLf}Q{qAvXh<-3g*ZUDJxL zV$$kiBU3fNGcXDoN*kCkOSH&O444hC#TKI26O*G1Q;2T?bB{Cl^J?F<=)F zRsz=GKtg0<@-V9Vp=ECHnC`6%F#D?nB?KivP=w-1gHhiy;B)??;CJJHgalW1ANN1_ zT$AfjlC*XI7De?x5Gw!Gbtp;xlF5iINjt*kK!=uBBu)MYWW#1VSEli5=JMu3- zG;%d%e+zkIu>fT?CNW^iETmC11CtmYjyObOlj&ST!(;*SI!{JJ;xBh&9%|~aT6hej zl3Cb*uX&)v3vDPsdGYP(+xiF11*fl~meyg|C>f4VwP-603v%`Tam85t_@>%hJVDDQ z!RR_x%TY`ED<^2F51Q+YVa#nk$IKtp{SCbSZrjEgFAJc8J9Pb{%1+#6*J`KzF_f3i z?GOAew@0lkK9}wFr_KVa>m|m&E_8AX6rZm zRq`wIbn<_6S{UXbMH5zFoUde71a z(y?60C%G>_kGGHX|4tJht$sdz{@MO}w!P>3e)U1|IUYO>sz(tRVpS9v2v$vRxZiNQ zFzROjV_Hs$BRuN9JU%mZ`@Xj`rE{? zZ`l0~VKrB%MPqLlbBGMGqeLNa08Gio>cTOCbj?yO|HR%7oC#V>Eb+&D~+_dsG z<~#j)Z&iK$SD$Y<`{Sy6-0lJ^XLlko`(VENW%}99XWz-|k6J(5Us9jvK)GVopFe+& zaL>DJ@{gs1`#)HMwN=weW?Eu1*25!QQac>)`55o*;Jmp}s? zE%;1WEld$15yrKxRfat$4jl(@u);;5z`Pg|)wW!678-AW2_@`2shm#QpI5pFE~Zp&nS#`rlo0Y@JpwpTyREWRG+HU@mqWah z-%?v)Nu@c3y#={SP{gebz(OKkb`iD$?bpQ;2siJow&lA<+^D1P*FXS9Jd@i?ta_ZP z31G|LjWQ|fQ0~|~s45V^3}vSsAt_g^&IS-p50ZnP9WIbu4`_-}AeD>4L7z`ujYyNG zyFWcIohu>b34`v#2>(7~i$rA-n1b)dT;Oa#*kZ6pt{&!}r_RRo1j5QNvwjVFS57Hg$N1O|}>$7p`L3 z_d(0!!dZiECVi8`JCeDc3S?~V8S~=QeZBQ)%+fao(ZR2!RmD|S1Z~`UJrTxGjz2+H zC<&k-bW>==!^COCXMkM6N$B1R_oWgTsZUOamddK#2FX|xcNy-#S;w8sET>2r%QOw7 zyicT(r3Att`QqtSM^fXZrmdS?{0KaR(VeK)4HZ+mJraf!8lzqI=; zqnoLJQ5r{hFe!u48G;am2lwL24>?GFXkYj_{DWW5is&wgmS~2#3@ktN6ZD2eJ@7*_ z6fZd*D!J4$jSZdhA}RW3p9za8Pg>n9a{tN4eL*IZ4V4^aU}_J%d^p>zeYJ=&?8CbV zp^^#U_4RtHcI@=+R404=KV@*IkMLn$9y=I)oEaDc?CE^*H8sb7L`x?S%Fp>EH$ zZ*CrE*COROkzZ3@R=YoJ^tPVK6}^s)y&f}&wtu#WDydT@!jTZnmp>4(_}uH$VLcer zgM2jW>+2ajE~i(zIJ@Pp2-?&t~c?$9_MEW`hGy5GI)U~MWE+< z4$MRT2rv6uw=QarGH9KPqDahX=fdSd^Vf9Vg%D&aqB`op&}^_74@ak3SzXQLUV}OT z&3f2?AexJ*e6PRb-J`3;pZp*1?zn%DQf_+CqefNp)^zCdN zPyHpiK6^m2lY|c(he|iwL=>nS+~MWcv`VK{L^|*7Lc!kTxMWh@9azOk&an}`~p4~Fw$jj&7XRML4I+xfZAyn{&4O`%ryUWdcij+5}VZE5!{SeVyu~81je6riZD9YN_ zcJH#|cOr1Z8Z{%-*SDj#-=m7x4N1Iepj*(C0*jIc6Ia5+Blwk^gCvX>zyVu}ciAH9EJo-=)q>jLkr&Hl z8$s6cy-_Qn%@*Dw7f&=jMfux~^N(l&8UZBPij}q%VqNsivm8;ltb?SYUJAucGIt~sa=`AB^ z6ADku3n!J?gn$`sF=N9RD3{!5i-y7`3quzV2S+7o0q_C%0hZx%;nX4~0W7^~aj?!U zuv&zXA?&!ZJ57FbB(;(;3VN!!SiqMf_FD;#)m>&t^-r zUQ&jq45X?=6Yy3^+v7`05nX_K)8?)L3rGHL@{Lc6G-xzyiG@^Fq2GJ00SP-Uow*)@ zmkEiiZf4gB)|#UxijY@5<;=SVdSNa5$d?A-$-NuQ!Dx_?GBoNVq$oTE5KPHbDFX(i)QVF`RN^p=!19-l@^T`q2Ke?! zNU=t-;?ZUaV7UQmvwW`tRd4BhiR6hQ>|M^IB^cxw1yJciw!u}XEHP=yA|zIMVf!lI zVW&2qZ0r7cLx>y=%C@pm%;5+aG|e|M-2FVBKExfwyB#>-h*V8@1QWdYw*e3Y3^+Mu(kUyNvz;2EV0iaMj%RZ`pNd*sTJex zo9{Cve)b!+BHud>%Elbj;#+JOK*WG0t;2jPAlB%dy36B%R(F}c6C#?_U)hhvgGvA2 zF1_EVmdl=S3#98;ok`I!XHPr0HZ?VIxn2^W2s{HpbA6!22MRRQsVC0;9F`45-}AY2 z%v@eu{zK=2eBN$bQuA!q*4BT$h)rx0lhD3Z>2Mo#%r?=N$I~*>c+c`m-5)qOaTkATPJ#s3AtUnW@NZ`QxiR#CVQS%oZdE`^?3($hs zS$7!Z=Shwi=HTSy1_|pZeAi$8!O=6%;eQ&-#@8ejO*$-))^PCtXlwiz%ULbmX>v2D z=7-GHg3tXv>hDT)zU!Cn!C>D9+G5{Z%KEQ@hcBENu^FNB!#=VuQjB+fPv$zAHn%6! zf->VPy5&L@W{8WAiPkRh&HW(_u{G~I`FxM()wctm5Ag2~DXoq#>^@!)MB96s%*DKa zf_I@0m5?jH7ZtiK4Nf0Rc~5b+~?A1;MPY#BqzdLvqH8wu2Z z0Y?c@HU=)dS49oKbNqA{LIwBl-PuVN)HY~k@O*n%n9FZHm@Vk!uhQ)pDD(>Nd<0o| zX2|#)Cu-kKNmTwHONcot;I(~N#ewI0V*au1>V3m5t&G4Y*xgE5S}2Ok&+QkQTgzH zmh$cJiIA>XN@MVQWTnti;k*ORJ%$m7^YmC{013xpx`O6#dIJhE$g2t(qfi)HsiHKo zuSoRf&iE9hF9WQT<;yAThEOG%?Cn?<1U^t=V`JvqEj)T4nZ z@(rMrz*A63^do|y={2DjIkwa^Jnr?tr4m1~#-Jg_07{sj0B{h^P74Dmt<5mA@ zXwEmKMk7sloe~rF{mO6J5qk~n4#2EjcPX8S^90(n>w-<`7ROME_O384Z9~lBm zIKUamKmpTDne6RvNeM!!IYk6=D$#Ii+oI#pz$Z~O!)kyqvwPKLmE^I3E4vNwrLkF* z2hH$U$~p+m-^1D7cH*blFeSd>R$7)Iz!-}biBM9(=qdGYJI?aJlv&tjZ0B}6{tt)J z=HISytoH-DhTKxur>I66pTL#5wIAaovB7|y(-I`yT_kS@zF(zS&>U6m3YG6u{r0ZgRZ zuCFCi&E*zb?;GxM%dcNruD`Gid71d8h`^@GkV3>XX(@SLZ-4*z`U<3qLwvnWTdcNU zU=v&tl>e$_Pb?LxPrM-&_Cc?H#4kyT4+G6Ek%^8#zF>)iI+c~$YGq}tI`YMCvY8+R znv-y#SulB5-ZCpxMmLS52yb|8Zdk_wE2|`&3{@P(jbE(=k)S0K5db?slpd;&!!yVF z9@|TO$=Vt>v~O>3_@QlfthmQC8&%Y?-0d;gJ_dGxiYoXI6YLnHFPU@O4uXfe`W^Hx zL^%!n*Q9T>EZ{iK>kN+drs!(+*DN%CF#oEHgjnf3ug5Bo}iB2ek6X z*R~z+sQ-MKkXkSJu3r6j+g5g7vt3@@xd{b-o{u7 zcByzx1s{~D86>t()JEzlld437X{88gnMskG$xum=ItyAnLT z=vjF@Hu~#w*@3rB_9gKP;Z~pS>t)MIZdlXfhK=9n6;?vYoP_|X%k`cpSet1ySI}o? z>2#vMUzdc#N1Nn~*6#P`VXZ@ogxBKti|ww*vX@tPR#;Oo6tHePD#zN>{M~pB3<{{^ z^RYRKs@665|BQJc2_d=TV7=YSAPI4>I=Oppw4ZY$p;gQ#yU#TRfWQ;{@A25k0o=%}Bj3vV5>iY)65NL~M)@JL`K zL53=#CAE5W0QI#y$wBCyI8OM1Y4>Fd&+EkR>+vlVoECptPb>98hQnR|`|kepC4Apg z!J{MK%wuPL=k7pe-f;VjwLQ=VqtYGK%ZkMrHL~rdv4tYqGbBS9Zou)I`3$OfMlO#w zWezXcQ{e+TuZz*;JhJDaPP48%kd9nKZ5q_l;`Z-?pn)s$RBQ8(R53i<%4*tB_1*vH z0<4bnvc}Br#xg};oZj(P%IdE!x#@R{|DymJmn&1C>(M2^|JQ9&Z?u|$Z5X4+4v!o3 zeuv|?WJ|h6>WEY**^qH+X_~B`8JFJa2?WbA>}(5?;f5P5YDIj@*TFF%_Mkc{yo?By z3bPE?y&=vc?u>SiZ%nK~9*+#{N7-VmXZI8lRP~amCv;X=K~(ytcxupnd6@2|W$$FY zq+95wX8k?zVN_o<8arBz-V0ksyM9F!>Jnn(OQ|PbfqlLKRK_0PfGMI=Bs}!DTA(Y= z6m}d64FMhLYcK&h5xFUY2bJedu8`9P1G+*23;L#3Zk*|sxu?by{f@7RIcc<&W2u5U z*MydJ!^{nf(KfLQbM9HmkBsrBYA=PYe$sa?Hry zX0?C`yLK$euX9rlCZt40F+ycGU~%Zr8Lbc}Hpzv;%-@xW^SIrEs>Aviv+d^9T#bly z=|mNBrYA>7o@qB+`%R#+=#zyLe3w$L0{4)uZ?i584hRbv#*%8~QQKHi6=GC%V8s8wsBR#*&j`Zj7l0qA^6b9|2vuFL(cI2RPqOLy`_V=DC^uX45sfGPu4p$x_!fJvzs5sd0J z8NNy(cY6XA#2{(5PO^vuWoUSkUIr0W^hzXr*c`?=pqv8$g`)>mBVwa6j6Ob>9mz8o zW7pJ)$?X2~=g+;Gp?`2wlh*BwyIczvZFv@xz~$j8$+ZW;=C%H+13ivQCKr#X?BwXy z>8X>7xbN{9ZIYWf^CF6h?rdeB_O8CmPwPfjdb@G59ZK#xX~Bv+pi?dYe`}?z+L(%2Q^-5Q7 zaX0qF96wx*De~6ww`$&s^{pAN4b;9`b_f*!SQ!2|f-v2_fK>35xaDP!f@~!O;=j)w zOF%<1!@k=^2%w8IDZ?8}sN-OW;4q$7mPYjO5PzJ9nafsH*2bKcWH6HVI2J{MTRHyVYLU7 ztlQ&btf3@+_+LyFg>K<@Unj)hu2XjpP8iW#9u$%C+o;H)&pS&iD_1oKUJ&YkUl@3f zad3#igK*tQev&MLU?NS(z9lw;`tffXMcJ+5~26#RctFujS^ z0(@9zhTeF}M<;;mMzYCREIVnOCLRGkIG)(G@cr%W>>3C=&c%j$@aJn*#Nk-v0r{1p zoHv!x!v}nN_v9Zhb-UAO2A+{&LZ4K5?`;7iS96huOJ~S(7}D%TiBupOcR_JpngfmtNPKZzG?tb|0599*e_I&a~`{ z7M?d}b=RM7>#71Cg_K{d&lWBnQUgJ5_Mg*zvtfBcK(T{AffyhY^e z)LIUM@!H-$WT&^)=bfmJc3xvZKTX5g1aU(^-wW$g+2_&c;l$*li>0BJE`u7#d&=~4 zgqYWxwyo!n{f-Nm_vQarqI%{bNd%w;{$E7Gg2}M%v$w4#`~4PNS>!4d-~}6WCJGEa z_|eBGcVLL)Pd~q$annYhRwPvN#ouz!+t79WQa=e2f7T=>#(NKlRFUMkLXWox>EutK z#KicZU z+8@ug;ITldx%7CpF!XDUpH-uUKtyYj1DB-}=q$W_NbKM@!cKNjsV%D_3N_avr_tTv9k3>P8a7QqKE513fSXy2zhl1RU;fFjGdu&9*6o%Udz6fKZ zluB%R*ljv0nA0f*gQ0Rtr9c&y{B4I|ii4DcG&(&;8iNghCCSq6F=biNLupN6DW=5m zEyh9;zlG?hX&Yw50Lo;>1%aFaDl-yLoJ|k2mIQZ`0>56G=HI_SpcRAUt+nT}5nzMZ zl9}yC&4-~z5=BS2V=>-gxdc;ChB3Xd%=MA< zU!~xsd;{`I&tND?=5-3GmvC`Fnzd2jEP&iSA0EuKNovsHAU9)H-Nkf!3(=jm z9aTNm*wfp4O&oP)x;mlJ$e;P6yPE^AFsOEV$~@ytwm7JjvvWqK!uOf5#L$_xib!jm zWo!XWRyh}TDkeJxG)JA906Sw#bKb;U3z>-F20+21)Smoq*RX8I{+BDmU4n`2@mq7N z%w#fE69smBDwT^o<_Z#w-mMDNm-jAWDD7~vmjb5?2$GR*{on*&)%&iF6^Wn)4Uc!D(C^HFoyEC$Vzh-j&l*t*!GChtY_`jNM9K=WI z<>T*0-Sa9Z8>iPXD#NQKmpk!Oh_X_^lPiVfKmt1}%Mab5fu;8N>XezKM`H>@<)mr1 zy|!v;XHmy^e<}-^bH-piAR&tJn)Ci}w+&XE*BRtC<#b=1Ix4zP-&dx3alQ>}xQep< zUzQJT=eOHz1d}S)n=Gt0Q}SPJQJkywhoF`<9>F#pxv$@bw3?Ak@_}xx(Ger zqj_FNe06-uq>LU5q61Q>`JlQSne>eb3-#nd<-O57`raDe3L8A^-WvM8dE58-c5QR_ z+>@dCevk^imKm0HoP8S{b=t7#8^SVtLwoz0C#&bN$ENueEc8L?19s=><$wO=@Xo@w zKJ@;h`Vj4IIPEhz;4_4h*#9%8`$`ZF;|H5hYFOVZ zbezB~bMNCX!<&jRZcqP>n**r4eYDTAo^H=O3{-vBbQ5laY5pBA<}mtJlrLPHKmXlB zZGA3r2)sP7UdPP(2jeBq`+d4>J<^VTV&)x?XzonB?)p)u#d7hyUSRp2>Hcs!AD_K0 zd-M&vDp82Ox;_(r<93bf4TjrwLeboM^1tl9prR#x<3ba>Rv`5{G5WaOITUdI<|NxY zVEBIZDH!1RNln~y!(!igMkC~XH)3YN(whXfIRWi@KUzG$zvdnJpdtJiTehJNhyAYc zy=e9>Eub+iSaWM|`s$sh@73_#&R%e>{?iQXA{Zwf5#PdAI?MYxQ{so#+xk?L$=!n4NX=v&1Hw=o!tPv+D0Pz!eaU`+DgZEnE zX~e9P)3YrT;t4BTDcXU&xg19zZMlWmm6Qs^tP~gbBk9XIcyc}+-UZ%VXgQ$UPZAMv zd+b$022n|f@ZwsNlTq`m^cjv6G3n-EuaV}cDb;>{`~*i#^b?;@;m{3McaPQ0e?K%2!5Ru zku;s%>C{O*uiN>we0kK~+=vK=rQvK7?_%?~M%%bX*MLTw%p`2hy$P30qeLn$k{v6X zozrwafo74`Uw-Kb`@@4@Vq|2=?3*S9th-LbsHg{K-E=yUvc*RbO6Y$$c76mW{$7-b zu6JgAcs9eUWG!VyYL#NeLQlRpXJM)vL2X@C2OxA!zApa@CSyW1;Y5UhKN!3e)+hYK zC>j|rqK-gVaUhEg)B->&2!j1OB-Mx73D`+v2;wB=GuaR4O|>;{OVBDRvtiA`9@Z+V))@#Mrxf%2B`zaFv=UqSFQ%b3hW09YRpxbZUib8Lk zqyhd(Sa`bwS6j}3;x_v@d?P!A;qdIEC1Kax!IhP3eF4H?mGGhM48<}qG@3OZSG@+l zb97fQb;Eb@hoO=ff6HJR5dxo~F8Wn5jO{>?+l-^X#RKE zzgYXs&|o;U!pg~Tar05rZ(gdDl>dDeA9zS#G#?J$SMz;h1Ftv$xt9KGLp-YzRKU9p z%$q-0Ot-?Mq41cv!WPkPLRfu+9fY6IXN5o69`i6xVoYWQ`td%m1E~F-&<;;jYC8@! zb{_UO+p~?0>t$VTVz%Cb`htFg9BvEKi^-T+YouBSbzE91Yvb|^sQ|6Imnt`5Z;$YWVfkkE3EOX>m=h5h&ad%fok&iT-N z;?`0hoqxw}dY&?Cy1b7lH1H@`@H`y22)u65D^5Q;9lr|C3~&|L;5BjEvI%1GW>CJe z3X4t7*jvt$G^x%yy{(_}^v2q3gg^!S4KG zO=b*FpoCqdZYlng1;<5liXk12oZK`Cb2;$fId$UtCpxiBj;-RqTX^p5IUWVfMtaFF z2+-pIobx?X&cQ+SnVjSt4dQA52%T6tQYMXo1xRlS<3YxIuJ}uY3&=xvMAl3w5~2YD z&`I9nI!4|Y&g9FkyZ;&aBOJ@GWJiY|@Q<@E3d|0NiT%%kPKzRUxccz*28V++zx(dyX zklQZ-AN8kDf+l}JiJk%xhn&naHlA{Qf=mOWnnCi@0E>94g!nZQqI3^H>_SfHdGR=o zBCwyR6mo%q#uukQi}piGZup(^u`*#m`kdEckLKvJ|@dJV{VIdt%^M_D#-nB*o~F{LCy%H(tM#E#&W?GHYZ0(MzyEK#O2<}RA%G=Gg z@*zDt4NCdgUQS2`(Cb{fts9mJ+nIfqu;M`{P++oPAWZO-V~SB(M+hl}NaHYOT8}m} zV8CTonyba(b5lwx5X5CU$;6FDXR#-l0~Vw49|nVzx0O~=6qcpFl`~XUmPRJv<6@ve zD&s8Ql2q#ayGiN2o8cc{?-hb_zz>F*2X{IBn;;3O3s*ifFt|YQ@$h$u5btYG?55B? zY3;`?t!3BLy6WupP*~rvp};*7apya_&!f~ZmMiNmSdaCfWBQ%A{jO)rUqpNJa^I7L z$8&N;j@IjJm|ihEV_~Dv`>6;XWF8Y0t3R~g^6Lm^r<)wqJNR%yu&1s*5(EX8nmdk{ zgM~DE{9nDa!<~)a=jLa--k^^W8u^v-c?ID1b>ODGONt}M?SE3;I*o$P##B);x4Y8<0UJ%3Q4<2J@TT3*Fo3g zU5U@vLsMK9?^8G)s-Dxs)319rvrspDrGMZ$E{EEj=to$LXmMU^z45O2Jd>*&+Cx8| zB4(XF4|flG@p4Z?Y+~zos1`f@IW zkK-rW)5;lM`z?0#qda~YbEo!XcN58nyPGR$(*A|6&uQ9j>uu!6-H1=$OdIJl!;eY6 zLnnz6%6}xisZ~1<${!oAbw>*ip|G-X9{5cbyW}i=o%?$a9=Ek<>Hzn=O+jLT`WcY^ z`yS!^I0Pzb#az#-^B*5;p`A98EP`>$=*Pyg2us`YRW!$=TSi{zWw)*S9qAt1SD7Q* z;TCaXJ(=bo_bTC&oHo*v-#D+l zFIcf$N~@f&k5^@X_(~VgX4Qs+w%hsjSLfZfFJ-=z<6b3286|iG+`$;}o|*+L8ylW? z_FiwWqS!JTXW_6w;EfdRtHbe$OAsVfV65~Ennfe3@s%Rt;4+kolG7(bpA|<>%MwZ@ z=Bv_4t^zZD){-Hr4~qx;#ApX=Q@JX+Wysh>cgou-4qC~$y1GI)fDszB@^4=@pbnCUxBJ5mZX1@^uOI*=ZCXcd~^v^m^`ILoYvR1m^of)5CT zHJv5{Z>!QxKnzJVppTo-#!+-cnxaq@b9c*9d7P|}ix6Sjxt^%ZSF#}mwMLrO&3Q@K zVZ;#_51Nb;4pqvJO6MeP$>b#IBh)NRK~*5B>5BkWBEBI+a>`OdnyVFvGtIY<=+a{; z71D_&qvI(5Wp2gM98q)R{{4+YvL2Z}*ffz3m%@b+G8nZaMWhQIwLLy5A9nqV%db!m zL>g%|SK{l@f>s0k1gD^*ATIg^4J{%kvMOaOb7p+EU-^>Qw6L_1MVX>H6rs|RR?2bh zNI47@m^yW$kR2*a(DEh$vvih$=n41}ew2YkiQP+R#LcRiTuQV1~0yHlx6>T zx&@o~NR)Dc)Ravle|n<&92KbzuD5)6tkMGd_V%t#C<4IRdko zfdWV5T=RD}>3?@&x&7q2Qtryj&b6Hmo-v{9B1;^Pv90m--+U%J=H_?uo0r(gT3&Yh z2krM6Bkk2zKFwRD_rYSUt{|-y2z&|M)GbSwpBoH(SOSAvBuh>ZOYHQepAu+{Qwo*} zXKLe>RCtX_g=FQ2I;pf;1XZfZIgBO+7u71a?>qKC{*=|lk^p?Wv|2S6Ez%BE3}lDR zcmwYj@jhSsB>Yl)?Hjd{WO=pU*Fu~{wYIw!3@@sz%D!Grv8KAPj_peaJVm`Ryn}~% zW18Qnd$fx7%=DA}z6_u*kzk2#^*1w}hWdJpaJbCQf{H&X8q42%m}N<9&MONe6GFm!0K&ifSkvYDU)+tHreEZuF2_u+;Pg6H-0f-J6W3RsdzRYu zfWY{f!3b8TSumr@XQZb4Q=Gd|N^bl9;V|CdsG{x-A@BL9?CGc{?)FVXsw+he%j0z?Fx!%yr-;*i) zUxB%NdoInX^Y!rNWK|*T_0J^vTd6}BoN4}8{NLICi#)5X95BYzrof-=gMm`L!VkxS zegF;DfA@cU^iSE{7W~Uuq~^o2|Ent?E8y*Nm&PG?Zz^i`_Wq96>W>_c;}a$eZ+BeF zwnCWB07^jI?sHGy*wsa*t9|Vr*m1&eNB^m^{Ga{+&Q&k)^KCo8=CL?O@8R{WX?IQE zleAfMEY0O^?1P*;;Nt6LuD3na;E|5K-)_rRr_V+eS@i09z~02kPfgz$14@5paHz?-okEX{LAR z&jr74bd|#^?`8K1qJ6~&!RK$ZPoRUq&7Uo&bDCJvot~|@u>&7ha^u~N4R2imhX6hI zHjFRb%SGGmc=JQ!Nwi?ik&Df?xQ7QffPs5ckDJp#D}!wlFy9l36lIhonha2I*13b; zM`0=okZ}{QwQ(j{tAv_@G4Da|$38>mgx9B$zByu*ZJ0uXh=V|*?hKR*mQhG0Yzl9d znmPz=a{p(@(K`+KMt}*+H0z@L0>c$$z6#Jq%6*Z6uIUfQmkz8C5 z`~SHB`s45n8&?)LVD-{mi*~#HoQDL=ehMv=oqI+^)R=vr$ZH7|N$(#mG?7-rg!MFu zCS-DC9jPx79_1dE0h?{g=~4wT>IBUcszz$RnN+4Bxn$R zS(uw5n+*CymYpJGN*L(?>hH2uJub{PPFeFM1mpbYTH@6SN?1U#i@G!h-2Py)b#l{W zROhfVHRRV|?Ad371pRT)@lQ+kpEfaD=FJ>bI7Mt%F_%;fI*hn53}cK?)5&yDL0)BR z0*R$42M}uEh&Hn9|Bw(&Q8y?yfHVLsOiWRJy<`z8<=~ZKEsnHs841*5f7cqv3Q=gh z5%GxFz+ms2~ele8u1!R`N#;;pGF_2IV_c=W zAOLz1EFlIU1Qg6ao}Eh3D$_whMctf$ol+@VOvvC?c!DW9=t96mc|r#Xy=Bu#Z;#RV zMFkm4A&LhGk6Dg=o<2^VR2&X#J|)uuT7cJCAz-4QM^8$X#&;>_lj1WmmHjK`$P62g zSgDAU%*VjXlZunLCnKF-`W<}5a}-s!CTC^z zwd*c@o^23xPx%OU$1Gd&*dAz1N?ddw?QX%e%4bY~RdIIyS6HE|X1g^|y|jwn@A%T} z(|7AH5@==yrfl=Gm^DJIM|06Havg9`Gfp+u;eVz*N^ATk=9Ben5FfY1$QG-Zt$2Ju zv(RPQ>(*o2Gh5YIynmKZ3Ho~kj$Gd6L9%nse{(R{v){8Ui6dsl^Addgmb-#$q0Uz!| zucbX822#32V4JuB?%~^qdW|y0E6Dj#c?@Gm5T;xG*8~{v8IEM~sY!?^tuCpHW3@>@ z7Io4q!<}P*XEitFaf)+sQbia=okm;HzgFUFXhx-7DMJI50MOFB+`#+QUSSHu@%z(! z`85`Q7tckHJrs*_5>zl+rMCyhDHO}V#QVmE22t5F=3SAumkc&-;ZZpM2Yc`)+`GWT zwM=KtV)pYnZO11b%VDC)Ylrn8svn0@g5D`eLhI9n0@(P-b#{Bbu+i{fn(ilfaxiLB}KMiRy}7(z~a${d?Z2R{lSD zJ{V&0bobfuX_0)L<3@Zqj(U4eUC%L?&ilXAECB!Cng1ucoxlg5HU}R(clFO*NT|<^ zq##>;WbD62f79U>Jd%+X8V%0i?e)!&rsnNK-@5@kUL5=VPPw|{7x?8WYq_`!e%C-S zTrfi?FnSh~+O)WR_XhUi0WSjHJ~xB)9S%32fc+d(d3(LWU2(LET*P3U>Buk4Xx9E_ zy6OnX|A8|VT~ouXA`7Pef~6pS7NWKzw5~_NG7JO{X6`4Uu5fI?7ipV+3mLC2w5Kc$ znl3eW(QD$3wT*$Rcs@C21Mo}dH&nx;0{V-&qGfr6@(3Ff#MzaZMk9y zyhMp=)lM&o^w{u@!j*br<1em?3o-td>l z?&HR}gB@1NmW|4X!8pM-=}oQyIK~^WkzwFaOa7vyMm%D`qQ)|z@uq0UXe>fBhD^3Z z(~_9synJ^k{95%$`fiesTV5r9J6zg4RcxkuGNl?uxy10@VBVBmhs+f?1!8mYwYNkv z37dc#pf3z52W>=%S{e7-P_~F&f&}munz?PNNZcqj2r#IZ#PBsiH0Y8EN>Wt+>_#7S zKMDB3$?uCT5qh2T%wV37*k|`UhTD}fg~5rFMBSX*!dwc;sek9mPNB{DzW9bO%8c@V|6+c2}hE+2}FK1b)RT zFb_i(8bF61kZDY$i3K^MrZUXtAb!C;EuSk(ljwEMiAwM_Bx0sE*y;1kC}jy}jHF;1 zjZ&klFkq{HD$3h&s3A^6YCqICr%5+afrg+7?J&7FhQOObcDh1&(5y`s@A%OgH>uqb z_F{td;?f!HuV^T*%hVx_>m(LH^Ww^jCb3sg4xgd+5+!k_!t0-+;((?C4J~R?@h3DS zKEE`Q+FC@l27qTd8l8!eG@ssb^B-Muqa8WPKik;=Lj~Vp#5uJj+eBToSh&`I*X+@+ zp63TEKP{94wD{uAf7d>k$czLP9Yk2%f;rYB*u$?1`I*Ff3y=QMn^!)m7XFTjQru;( ztp34ug4g((q_x_0UyoU@cn6+xxE=p0Xu7l;XjB-$xp2h_&m2C!Aw2pCFKk^y_}>gk z@iaQbD1T}Adi#DQG_3h~;6wa0`+ep5J*Zlfg82oF3 z=M^VL=Bylv-kg%Jb;Fll2GcJyn&1oRFV@iiy>WscxnVtMONPPwPx`Ap)u>+NNiGJF zR$+W6Ujjjq_zeG|-G30YUt@rN@$eOF=V#f~AJ=<4+MW(){IJO8?UATuHU8|)>`4M= zwj+3Qu}EW+a1J*3Y+!l~X_RkZEdw7$NyN&8o~OCI!f)%OeDFOICrc3?%-OX`^Qs*??y?XfMLJ%( zBq2YaA{;x|rpzJ1;g&(U0IUBg2$p+@DSP>PFz^JGmUY3hXD>~9r zk%0w)BSgMH!=n~l!0~uGgxdqD#}W?<(zhgn@IleIMb$^)rj4{ZHW3{Zr=UmUP~&^n z{7AU5qWl!fc{1~2+4yf{TG%S^%{mFql2n)iO0jwfm&1A#VwP|PSV{wb5Q_&*)ECH2 zGRdiwAh6I!1dQnuyAV~xsT1-ObSPW+$`?jZQJ66)c{P8mZ4HjD#2#KZei*9SBB?3C(HT`Vwe z81&I|NNr{SfZ!#f$JrOzH1A|d@}3X$$@u!hH*Wm+`lz*TY`8Yt)`KNhAsDK{Vz8q- zIfU1!TVX;?-X?YmG0(5+yjCR7GJd z3>s-G&3as&ueVLt#qdr~sBz#(>fJ&aYCClrDu74xeYMu<*n7I=tJ7;tGebl@Ce{fs zURxKwc;~(AtsT9MuF38j@e)@?=!qvor$QGmMpP%Tkx;iof{XP0*^CWet+2Y5DW4v@ zs+BdB-sZC5t|&{QFZ^?p08+@%3 zoND+=j?9BcAqKw>yea4S-<;%L>zu71A*S!@RRA<}WORs=NHT98=jg4Dv&b(Px;{8DWDZ_&cFw*oE z5uIx<^K8-ewSOCY*DFqzrcZD&2NQGFh|;@pFffd(<0}U#^LxnS!Q=qZoOsc0?n1;p z>!bKHH7kjX3fxU9Cp~O6W%hVbJi>_Mi+GJj%CEW0<$jTT_db(3s zIc6&Em}Y%FIKDKVC0ZKtpy(uJCTl0fEc#r>ghoNgR7Omk6CA|&PGhywk*A#z>6UoL zffook86XX{M$j5gFgF%>trQt3;!02IDlWoMu4Igk4K;bRl&~!FjUiOJ=JAm14u0xKwg4>5(p4Nw_wO@&z@$P^}B5f+j9S9Z!1 zok>GWJ)sIrc2<^JZV2)0@8?tzQ&$rS@fPXEpNo*;EiEr$RiVJNQIS9Hr8-QFY#=_? zI~@7_LxPNlfdGY>&|I2RDgxReGNAxYx?wAT7l-m0Ig^A_T<}5CCJ$7SAV4={P_yy^gmK?U(R>r=bX!@H6_ksa|NYJGsanz5O$$?Zo4Fja z=VGN;r~2dJ?Y6yAJE_y+QB(QyxjLq!a}H$+4NETh6MA}1JO3QSClR9b3lkft5~72% zl}V=(U5rrtJukB5O7c}7173VQJtd8hS*hS8xS=EvlWgK&OIvZ7V*ib9kGJoZQh1ZWSZ5?&Ua285I@ zE^SjLO+nRSRBV;QS3<)lynVzxWHm<9B*K9nD4o@2m{&)fwqg)VLxyK+fKmqeD08&C zfwB>5$$^w_+5Ah}LcBy9_Fw9}Jd8r^&IV#Wn;lZAM~%?(Hq%R7N3ITk-OOT(%C=Ko z*+~%LU3b$BUj{;DUIt>R1_hwXh?g;%D>@zV$~Jdc?XxCcCpUtNKm0_DXBzjHt8W=~ z&m(tQc{U-Oq+BuYOaB^&E7bd1)46x+cI0-w3BJ2+7zynrmLjSm&qhxDW==gnk14k; z?-p_6di8oWm+gwpN)=~SEiLs>bp{sZ%0?^sUmh6vj^lW|9e>=#{y}hb#T(&+T|8c_ z;>=$M8y0Fb@N;zc^IY@E@M9J3_ea-fa$dkWCYnR>85Tfs1O-6jnc?s` z`57GWg!W1B`IzUM&ian_$=r8)CgpXGB19A*2yQXKB4gq3wFW&C_HF!m$GKD#@sOS+ z52`2%^bJa~`=c>LWw5{~Jl2KBcfsduacm7Zl4T!hbSv-FW+bw?9IO_l?#3wD`HA#o zEQD5!3kKvO5xWEX_?6c-Ui%d3jPjIgOGFPSjCf0LFUDBq-+_cUV$$pLIuYoI5!IsM zKPm1Zi4gdZFhx&OSm1J`I1s{>_zKFEyv>LUq7lAo+69k<)0xsEE8`a-H2Xks|CXwbB3dtBzDWtpQGSfFPx$12{26lNR6%G5H}(BYt5} zcrX`-1dVd@SQm@9^u2eK?0^8r;%>?uSkWjJ$ zC+e0~$b7_J4#qsz`4dAq!%~W}cM(-lIR=Bm7oNX}5Uz~UBnt$tvWVmoSf8uq*iOmF zxN8&j(~(P}ZbiundLR|V4p>f+LIQQ~3eVGOF52qOtov<+muZx}_-iBp-6atfEp~k( zJG<>h?*RxqvY#%czMt77^~{dyyNOOe!!koK*W%!Bw#e@T27#XZQ?gA^rv3VJ#`l~~ zN-;;%`3L9i=6bN#zy@lk=w}2CxMsM9Xt2g1Xr4UbEGzA)KX`tNUczIo6`dJ{SmjnK?`!h0Tl z=NAQz<|XxbO}0`?vvZNu(nF@7{A1NUE~#**HnW3+x^nf+j<^QG9)cDIixCgF0z>H< z^kqeDo1@*u1h5pDRzG)nlw@;|p_nC_<3iX3eXJ@Z#DJ;$#1Lg6WImxm6}FHP#Tf`2 zIl6oB7-9fjv#EqE$IKVuz=cHYcw}dDVoQvQ^o`@BejhIQxmL!eh&@|JIrWwwGbTyO zWPw)Y-^n``q8nwS$mUEx71nAk{}AMXoVaX91D7_4%I?hnh`a_glCVXc7;OlB!{Y~3 zg=u)y6wCuYr|})FNw3GT_fm8im$Pm<2rMQIDzRCytsY^f1<>IYVW@mrVtX31EJPT8 z^Grhh@>QIQqEYR4Wy4w=T+|k84lY`5gW9{Zu#${`A1jj3y0sirmo=ZFKaz_Eiig%)B^XBiOS?O)N8=aAdG zXfYFa^<($HjCN|r%=CZJ?FQBx|LcCGJ@jSEx>R3F%QLQs(erxuQ1dg3GWR7UU~=_i z`sz02^SSEn5RUXP_=mti(huK2G-9f1)Gg0vnB32gyqlVHV30xu*qf5kBcfwK2*E|&gF$C=w6Dp_~GH4H|hf^IH5DfkDQ*xJ;Mk6Of zsN&37h`4%uTL3{plYR{I{Pgvwee5uGG3A1FMd_j07jd{B}}J5 zMFwhGEBIhV*)C)gqP!sS3_TGhQ3O!%L%PfoaV~PNxYk%j=A;<4`%a_Q;6{&}L8J7a zLY*U-7F|W~3QQx?m;GK9CRNDJ&#l% zsXw25X>UoTg5Pe;LaeqAzfYG8#m=XY9E-b8jzA4!5>uQ=$Ci=h=S@e3f}kgZlo4Dg zQC$q=wRCeV^A6;v!IAq}1tO`A*!i`7og;>7?S8xG`32%cloqv0KCksY>(Bj}*~_## zNG_c5iO@-+S)6zn7NAqtJFW`?qL*WuGJ|-H2wD) zL2`V}Ob#Rls)@UdF|^&;9g8K%&XcTvTZZkvtY0%~81W5D#n4A+oR~BF&8U^r>w!}M zY4^K@qD6!rPysU6`$kI1DQRu`J$Xhmy-s_TQ`x(iMul!%HyOPKRpOfcnhBsjSoT-D zlVTx*!>WGua_r)oAaYh##L#E>I1$%?9 z*dB!%3<{(bd=f-nHJ+s>35Ny7=F+m^)>uTTz*Sk65PkaO<`c)JgTeE^HbgAlx(G)P z@a~4HE35M-{w>vIn|zzUv)@;?)5|tE@y!ndRc+)-k*I*aGauA+gS--3g;6Q8l;XqsB+K~r~fhgG!og|Lm)tinc zj=PyMWi-$o5EQ6bh3>YZSfacw7IsW~ZVHp@JdA2%8u6i(dX+)fp!9L?tbFm@6cXj9 zEp<@`#{WN>t~#jA^=lRx^lP4BJ(flRi0rzE2IsRhR~qZmW7@E6-`FkN3G*o| zOjoFV^t`_}jkAqMZOF69=*-HX{oEX@oIao(C_r}CsSieK8DSoAGh6x4+C+TcHtD%v zSxOptPjSEa`|clT*Kd>f1o#Eo%6)@z9V+w(Ygi*H0;u2 zEeZG!#>KBj+~`-Bzq!t}?0Dxa)prX{=8n9->ihQtKrgE!f4x}_ynSc9LUq2_S)!t% zUcgD__vB&<-G0Vk5gAMgBik9X>zSBLjPEwy6AaCq@r2dg7)crNO_YyQpB)*I^2|)PejC)GK=9@tO)*ICOs0R zB8c>T7;-0~9xPclBp?clskkz$o~1SB6fG8^8K#30E#Dk4PZA|YchyBC8E9|O6;c5f zw&$@(^2Az@-SQG(rT8XZzLk&5UQYvpOUYY8(E$+Om@@?KiXpYVw_F0l1%U{mdxJm; zhvpmyi3!v3aDIn44y4`1ulmvNGhb9nw^3ME%d8zVg~ z8V1M6hHEHQjJuMWny3uf2S2vn9rd6`o29@BuK*$t53?F999VS zmNu|bBonc>zCa=@hUv*cNpxS@Ywc0|e zk$g6BKYACMg1|vR^;0Z`B!z?qvHeiiH&}yW0Z$W+8Q>uCHGZrXCeS=;xoItajIabwKL+1;K{D;Y2bX*h00l=4T>&fabU`w7vni($y5=Bn$_l6 zi|TlE%*`*=PUxQM^{=Irm6GVV8#>vMDRcdkZwBvzJ= zP`s5h7vbnoh1C7j?BSYKcuXQQwgVw*MfmOG)Jvb%V~l8LC{;Kk#OU3PgL^#FFJt}P zgPccd{lD`myc=yh{rxBVE^C5j9ld<~@B|RoUA6GH`9GR-1F& zAB}X>;AOJT#*Zuy5?0TOW-FZ zQ4(N^O+a{){9`m8Db(7We%lL4L|?RoapX%=Hk1rDpgjHLNN(%D$8yFabh3`Kl5s|~4PXUAAkQIR0j(8B?#7bca=DSc}K`W9mcHPV6|@D|zOhVK53MNG@W*^7cvh1zHpTIii~9e~jL2 z3tm>4JLdl0>>QL{G@dHnnB9&1!*%?7);~}Np1FFhq>pq-${jU++Rx&&fixN|#4rqJ5=)Frg=++biRKL;7F4hF!meFGMbE(Xf2=jAz39nm6~^_Uh+9 zO?ee6ncKnsWM=eE>tEL{q_jetR8=hC@MP%WGwr5tDKH!_Sbi=Dgp;pDQ!jyB9Z1(l z!YGa?$5*T=gwhIf8HDRovFP{W97;5#C^GLVEa$G%34Ybb2*4^X>;xPFPg7viuYr&*P_s3p^!+@NvaflP-!Bm5S)j_MgBAhtcVX*zLev{ zOwJMKg`ke5j7H*0C2}!;p{qK0LD>qS$Kp3|HbSEeHUhEJWMWZJ+jRZ>5Ngp6^}-`N zG76{xqI;>?oaPy^V}o(l2feC37lkvHaVD0~78qkMO3^a5qUHk%sJQGk)&gY^7Yj|{ zN!lAqmRMua5t77I$t%P{{hQ!i@$txE3S_t?u|Gs0Q?(`N%FcN1Dgmhhy2<4vv`k39FHBd6J;F*!!clX~quiC`*i>L(`X1%%rip zHbX{Z^=6DkrZ}-PFFG7UCYl~0DH8i^g?#qsS2dXieyU#bkM>S7co$4FO%Af_(VVb` zpu3a)I^T{!l5YZHbg90_)z$~p1i!EJ zS!P3#lP*@KG0vhP07{eNBsxq6qejlb_|ShPI7VE^Yu<>4oONJpAlq$dNnTOUll~Yk zLka;>U`ofzF;sme-4BPi1T8U?X&qZtv!MWUo?lh7aeTI=#=*h*ksVs+H>7duKwT*4!FP-iH-}`8y{@L~iI|yeK7+Tyyqs$A0ZRrz?uIuY3t}#`@!r0*_MP>_uv7kRS%I}CLS;)+w=-6cN2IB%egJNm^HHJq=A72DkZXGG+vtn;=H^1n=N8_!$89I?3oq6> zVUex6_^IY0dGE(xOvZ1f0CS@AD_9i$m#xubl`yCfSh>^ev~w9@6JB|EMo;H zHSL>8+-Jdf^iH~0ti&}RieM$hikbi`=EinUe{%q0YmD3Y!Jm0qCum{PDN&I_6U=Qk z@Wp%eEO6s<^aMSRj+__1TPV?$5h69-Ms9%N)RF@j7|yga91KXCyE4O^NE(lkkw>WR zz;Ll%#q)Aka#h7KsT1*^hPgOO0`jB(EY4Xh-AS#AL~|!unN|`yh<9Rh*f>?!%$3J# z@;d#N7yM$b;iY-#rQt2mCCagMk!APcw|)c^KX1w)bHfAje;+9gV`R)|md+Q)F4TVG zJ^fY9n{~S49-74hE+W0TIOpMnFsH4_gH-&I7I79ALeniWL}6?jpQP3Jtr-jbSTmP*F1LyWnqpp z&8gLPMBM#b8>ixObGzs21t&L@ZbVrBzs3IR9${|u1TE2_hI~B_BxcZiz}2?YL^QPj zt^aN7QK%xfad-f&r|W{1EF!%rQJD67S2yzK%+LVJ03_1hiB{LM4;{bHy}!3a-`pRf zii)gd|F#{8V(pf3+k(B_mfF3HcMsdh{p`BYB)%uo6%cQCWm6 zz^Q48i_j8@KeEQ2MJ-<1&&WWO%q?tnq=KgIpdv@FfOPlC?KojMmv0|gvRoSZwN zhB{u{5rH~DA-wuI0ah-M9oT`92;v1CEc7O0g|PMGoJU+r0=_YU%)cEn zE>*<~?ZD#=6&F(CYDwGOCjTy*<1d0QiPx=r7qOUBU7klfuQ&RYQj&gFF<3i*nBizt zhKb^{zr<%$8oEx>cX&jCMb7bF1(s30SXP#8V(T#v4tWk_s!Qe?MX~1V1wn->Gt`GA z1B?FP1j-T_yqsjVQCY%?(R~?M?)01)#r|#-j#>CHjhR`=(0p-rx<*xJ$tEM7ysgcfy;2?HHj(Q9iqAW)leV4iV$)BTZ&p)Kr`xhr8ssy^1xP9V~tKm&DDl;r1R=vFa5$rhd=qbyK~;*NI86lzx4KM zx}giZt3)3;yC2U|`O~jSgswb%kEzHGZuB4EG4+o5_*(m+iKK%*_KJmR{^LR~#XXZR zWL}}UzOP>1Z*i~uJ-4iH;RpKe=~kX(nV5cAX1?c9LN{e`mou2UTYZ?i``3NUOdiHU zfBAi{MIU5eOIa^Q34h!%PQCc`jA^~CZ|sqIe`)1`zg=;2zdCsff4j;3i-!4jT-tg) zgW9Px4;d)!88yp&h0T4LYFT@Ty10(#zT+@#=>tLN;y?msbpdzoLA03VU;qo0DFWpL7QW@eABPP-az#-?2|?i%<< z=a+-rGo@D%E?n;kTGZunoJKltU`xv#)91L}zRyd;=1Wmwi2-Ns9$(s1(;R;adN-pw zwtt3M-pE=LP_%NBLyj~`de?=1HVQFFLVcgnIS20?9MbiMx(>rN@a$0$Qk zOaC+u>48hwj#o#x*?LlPvy=vJE;ndy#k6gqYX?47NAS&(IoiXdW7T2nGJa=(O40R8 za|iQDcj<+f=I=<2jk)6BbVJL-`F(u;iqbFcDI3MEv@M)288aBBq&?_;Op%w1>CZQJ zT8?42U0*(f;r_vs_vGiyMdw#n?EeI`dychHg%Jz?N&0(T^A_j*uoC)uO(o>eCS2sh zbfxOPh1uPQ${PDHgyg<0SKt*y#Ld}W?L}nQ`ycSM9eI_AczOTkhI5unwvn4tb8$e{ z+MRxz4c!BKqObkmakru`wueSy4LfumtmvKGW)v1uvFNtGYs6r5?I2k_yK8LO-)q~3 zvI1T}VBbV5G}x>B$L+}yKR>_6xL@hiMkZ?~cdOvz0|6@n5#n(Os^k(hKZz6qzeF%< ztpZFGXO%O9S{Q)jf#x8%B4Ugw`C%N*h!X<$OMYkn6 zR+~TwKS+upoaa$dg+`!|S_(GO%rY964AKqf7-az6sh9vtfXG0nrHgB-?jNh*n%Ja^ zEp19*2oLEM$Tcz>C(4b0htw>n*x5ur8k$mLpFfAx#l~_+G;xb7BNh2qgHkEV3ykyl zFSmDB{>Z0;@k@Up%PONY;?>~YN19P62f|aEQV!OIi^NRv`yNXDaaOX!kxjE;YCDRN>e8lk)z}!XC!|;CQyI~+?wi!un2xN@x-Ifl9Ym| z?ZU(>&&S|meKp87-cA#kr?cf=@Iw8 zs#^)$trztA?%`Lv7Y;iiegnLU@Y**IQ@-h}uakbc`j3O5CV>`Efq|N@loOZS;c}Bo z$nl7R)O*`PZ%;KhEh4^TN^31fzg>zj+R4WE6xhEw z*-zuZI5>|!Tq1e=O_2Bg`{R?x>+*%eZJ6KKWTqdKQn>LuXd!#%X8HR1G1p_$Hk*pE zc55@A?lFSZk5CvT=Vejr*WX+Hy_%QV_m2~Wsq4UjcfHWmITXLk#LKE@zx21=(pbL3 z^@uL_d#BstR7f%OgT`g)|M||&TY8~s+|c=Q&aY1de8yzu;(1xI#Y2H!*hl)O&z*B8x-Gw6Ks8 zQ1Ebo$BT`(&M@@}hB8dLXPKQ<(p)%fQnH`i;sner7&gL{L~(XT)-v>+MX0}rUPfuD z46G9eyLyQ^Y&oP60?Rn5OK;ApBKK^VZ4EakxWFmGCPu?40F)!X{Dul!GjY54}uSe)3_u~rpL8=*6ZXM z=xyCGaqAzugN3T*uFyWaFd%3$BWNLX(o^D!`^1-<5UB(iK2doeb*nLHPVn-MgkP}Q z;|}~xy2i1ChureBTj9HfV9oBG)5gpH(*e=YQ1johNl7@dadGbldMk{I7HC*RUq15; zDW7QHAKd(HC8Xb@>3%%Ty&0&8xGlWD*+L3Ax4oZr>zz=PVE7NUb~;Uj!ffKf`fK5w zP0iIbpDJce-RLa3`T9ztCbj*ihSy#$LN)2#-C!bKTV*R@D7KZ-ShnV=TsoP9e1?|W zWbBvus3q5qZv>%B)})tZyq*7$v~F9 zIP>YGtg}WuZfDlBYARu7RDDdHL|msRU>C09GIsw>Dm}i3Jdmy6$^U z%pb@Hjm$Km_lLv16`RXfx>s3{g)+^uXcKfx6d8qG4G9R|8lGE-G9_9;7#kLa^CyB< z%|U6$Ug(+@y|#ql8QiMj55{{xe#E3`=@JVi_0P~l-oua3AQP)deBSx4tZ)3MH#9aj z#kul`&!8wu&Ry9_89~Ur80@CO?qZ(c?j}h4@16RzdFs#Wj4?N=3L1oCo$dAKM*F$i$qR8#q5wd9SsU$PmRN=X&6_Jy5QVx?d&=pXmz9hq_n7d*j zVh0fjDxfVUc!^_TyC&Dl1AOu%07W$rd`U}Ac9Xse_EwdOf_2{%UD%I@R1h&hzGoCz zrDALS9P>xFzx`H9IQa`qCMnN5+)an-HMhYbj1}J6gTrp5Ht#({mW`JCWkE{qp8hAi ziUF$DH;xk}(Xk}I_MNHMZGi8c8R^cId<2s3X==kFK1XIqYJ_jo*5;+i`P=2jCKXc8 zByg%}6|J{BWCLx#-Wdt9?X3~f;hpzA!$Wbt zX`jftCPEej-C{|4E=hbZ4~E^!)G=4OSvvJ{Ub;4RGJEd5h@t77r6TwIq+U-JEFzb^ zkJh55IzihDmNvF7EkbtP=)Uct9o5GRoLS5<4P|_pvF02n;V+U_5(vHXd5Js?RfalK zCIsf&F5-GBd~;|P#sQ5}?@FJk)SCITeGQ(JS9{kxJ-l^j&B_Sh)9wuaVK>wPC;eus zV$Pok4KT$uU-WlHS0E4;elfu~EGkU(jU=6(Q%ZnObY>3hWdg&h1;es~lTFdgWs3+A zXsyInMfo$^zY&)h=e{9z6~`Botf`|ry<{y|5aps%oZDzyabh-(!cy&+5c3f>SBBy! zR?;2e6UF4C$q)t{z1YI9FLt+(*=Q-cT|4tFk<TybWKk~`rVK&J75_vZJ zqTnLDeAb1;XXAp~^*?mF4uCSZ7EUp3Jzb;{5QDxIzZ_pO8+k|=XzMQP zu3-<;!r($43kd{i(|lBuVqjozRfIiTgtJkHvof;Pkg0|b<3yrGvrj*YIhGp13!eub#I~<)azhA(f)0Rq`L2;^Mq@HoR8QpUeJf1A@0#$0 zg17rO`!OiWrZcf+e}dsMRyn^E-8Pd|jl5#zGw$NHnoypFw}<>H5SbOkB4NPZt|A>w z&cHT4{-q;Ghcfw5<##@O8OUM8N({XbF^!@m*oC>+RYZ;;99W{@U$VJ!j9@4*ZTeAw z8$G}`G*j^xrX#T|pW~QyoZ>m~Mt)UOR`T z;U8D)cekD4_FLT3b7g0@uQij=H?)!0m#@!l54{7r?XKyVy4ad_sX9vV?Ql<+-m$qa zV*AHW-(Gh^M7P5RG`epte8Xyl-P7rEJ%e-oPUBN116Q>AG@&@-e3jT?BP4e6sXEs^ z3OUblXU|HvBZEMsac!WRFy}c*bW8v5u${g44m6q|MSrW*?;M&@i31(I8c%I=e&l%Z ze92hP?Q*rXDMUIez4_aDGk0*)N&2(qViJcrd);o3ISJVC+2A8SReMr%{4KTm*(xW`+6=EboP3iE-!b!b<=?<#i-CSB9e^fFS zoqB!QsiL8n8x%U|%KaP{@h0IH?nG$y>{&nZ^U-6)f1P{7`Omx^*8R6k%RqA4g)9ZMt}mnjRW zL$h}KqS$=)xWk6VeB95+7E2{TI#%51l~J~4SvB9?e1RNq9ZL+X^1j1vd=ae}6Lhdr z=pwZB_+BgeO1gDs8Tv2p*oguZX9zi2f(GXe$IQKVhqm9VC&^?&<1M~?_Dz0iak{?2 zOl7y~SDvyAbvjkH9(5i6SsZ`XzFivP?++|X1iT#i%)EA5i*B6#P zKSn6M;u8UAoN9Qvn?lEX(*B?3qw&BfBvYS5*ccmlHY z0dO*NXYYZL^pZFcE*L7S4m37<*lO4?=rSoKK~8;N%Vyvg3?nOM8z*Dqp$@`ne7-<= z95dMp3}lNBd`MSfG_|u61U$FzSeb${khue8>sTTojXB%j@Y@)UL)(>}q3imqYS!+S z1~($Z>zfA!`pVxgG7Zf`Q~)SVj6?E*EJw(i!Ie5nF9q%hH6ZoAg)(@Wz`iUBMiy8- zpw7pz-b{aA0ax@|^FSdN2`iA5!L~V6rxbfa5w1lsLa)R3W^NT{-bAnjSOV8Vu%z09 z()JbAn0c-#D8 z_OR=B2Ot$WmT~k&sFC!y{dr%4vNws<3t;W>4oa=5qEB|S)mo>rj*ELdZGw;SmgDyW=q{bJrRJ$K$4O&W)8eyihqi<5BPDKHx zAK2Kz=!H^GGin3xY@RvBhwtBVhhgumWJ`J)${PMU0V&L^u=(7q*nOUyOL~v)rkZit zx+B759I=ZT))oZ_3Wr*6Sa= zEd~r$ZXeY%8YbKun>7a8g3?EVkg?t)Q&5v3G$Z)ADnolUHXU_!VGln+nph<2vc})AWmrFrL%7Jx%%X= z{hYa@PUn9_pZnSArw{JJ1ur)@Q&5ZJ)Y9M8y^bBYn}`4R0{qYVICbJ=C!F%pCh~t) zp^y+V)_*(YBrlc^kaTDfeVxYmm#Dq4o);vT|RUPV%L>sUdVFF%tyYn~K z)?Tig@TeCjLrv-e(40!IuOwXAJ#TugBjIA90q1+E5oh23_QE}QqR-uAwP2%esS;$e zeGJw>KJjcO4(TNiIjrB6 z4N90cO7_u8P$Nf@!@^R8Q~MLHA;Kf3!mur!JKe2oq_lk}tG{b=nF8N~r}AJHmcE*4 z=`k{KR=8@Mm*0+mOEytg)66!4WpZ0QujD?QJQ6-!IQ%9R8?EesK21K~2Y=@Iv1WDR z@tB&B5B$?}?+@R1ctSi@afV@SL#;f^;idsKB?TtRV$<51Cc?Jo;MkClDes~OQS2>V z5~SG5dXY0!f=q05Q`Zh9$LK$x4C$mdw#!$C;|+fV%QEE8%a%1RdxSCYk&4k0L}G78 zW8+qV1=yYY=h5eVk-qgF%e)UsW=cR^*XXQKcR>-yqc;+ZPn=E9J4O!d;9LM04f;rS zDO0AgfihGKO64$$QE58j9Z(w4Mm9Mr|Tae#ctBCf3&;Z%jzo|TMaG1PL$xLD=L zN#AG<7`a%w2hx5rHR|SBsz4$7@~& zFt3*1_6ohPmPs<{$aduY{<1#2zj)4lK)Pt}kNfR{bFU7=F^{s*Fvmm-(cc@p5Sil5 z@rAS4Ui=7pua{^i`bf&Slk7am|-7>+2EaDE}b z9Nfj>n1+HY@ZS)&+Kb$if#++M65gV*r(aQcs~mBJFyis%9C8sam2VT<{8>ci6IBg;t%n-AX_X68Q zzf9?$hKcFC4U3!?3@XMuIXDr*I0U_>YEpVB!3I+>0M~J5ykpZD_`kKY&taysn6)P_ zrv%g~f?bUM0IEXdh?cnR5N&8*=U#G8|4fulZ{E|9giS%)`$Q6Ia zanjZi9ra$%$tr-M5HFsUoFhk&gkNP4SMFj{q#EF=C33ZPC5mkx(G?KdwRh?5^PKc- z+Baw_xmXGA`1)fvd5j@J^+q7SJoslp!lN!0lyrFYg_^SX=ZKb^zMjesx-f_;d)p0D z{nMPHo>7c()h$~q>ISE(vY38p9f%*`t@Rz5x~X`Z6olCMl3jRDz3fJ%+a^|@*d7m^ z|JyqE_ghW9iVrMbP7<7L(7$d!#q{$=_M{z{&RSNDPelCt!q7`g`U{_xovvT0!exT$ z_U5L4_l4c@x32KU^of53n6FFO$dsXm=A1lJp!uPHXM*kpy$l!j##4bnC|Vp~TT6B~ zXnXzr7%m9LQDveHBqqf^;a|NU+ZxC&)nD+>T{@3RygF72L5)&FCXj9@X_FQg zrzAN|Rhm*6QG;)--rPWh^}T>L{5wXo6S z4IzVF7oKp0K^;KL5KMn+77?@WA`V&s7a!Y5yJ$=YBd2lMA!@>3MG|UjQUgpVxX3fi z{gEM&U@9K)ZgvApzom=~kB7W^wwZfPWO@Uow){8d6vd#Tc$p#@f=M1Z4G4e|KrP-d z`>l|-a7m68&L=blK3n{_4~Yg50tXf(fJGr}!MO13)ZgL{-~+<30bM}G1vj8_<%+H9 zswqcHoJ6-`!eUei<5=|1WDy40%zzwEO1b=FcvUgFew5L{Gy@KPAi6tH4lh)EIvYEv zwnC1&_%3CIu$VKlZ$8eQFoDCUDO+$>(VO~xQ3Y;5#PSI|D9=1`|42MAem~oo5l2t@ z2Oqghfv55WvV0nifX&%)dAx=!B{i@l5t68rEi<2pJhcfZ12Cl{=j+fkDtAPTiicAz zTlt*xVjNvFQ-GX!`_A5IBtLT22$I zT7^M#N$zsfa_Vv#wmwxFH@G&UF%RlnDpQ;?d}~2Nl}nV{U-X|MJd+_K3crj|n1VDc zjh|tPjlhP)c;70;7)I6O%Ls|(Y$JNkQ08@}HUa{k8sy}Eb-+I{hf_ga6ESi4EwN1erIs8Z6? zDD8CgUUU5W7IFo`Hv*WPe^SGiNGnE& zxH(IdD3ulj*sj!NBD&7`f4_@XA}0RSUxqwb#{1EvN1lHJx@_9P)3ebtB&*CJ$`Z*H zA3>(^)<>4^1-B|rhi}ET!9K4l2uGc`081o_c;D~r<#yDz#hCZ}*LgFsO>A2;|MqMj zMcB!}oritzRv!ftbi~kisQu!o1J7q^#)I~AMy>_zL`L53$7hL_gVW+Y5cYKW>ulEJ z_X{ms_33+lR*#B*C1ZCRND~JdT;^ox&dyF!suQZ4h~p*VqUh`W!*eQ9`*K>v_-VgA~RW#MFp6NB`{D1L7 z<&A^3%L63*V`1UYG%zG2q_kxh<6o&%2Vl9f$@K*n>y}lVpcPe=rbR(T>N;pcZeNi` z_^*<}Zx$OHt61$+^KkFPr^i&RZhORdM1=;Y=2EV8+-m%bp~HFo*H*C#H8)j+vsLN~ z8JIFp^K^x~5w{aJoZOLCy{3h@4V|DrMP{CDu#a|RxEFDR_LERJ-k_X)kTON03bTq@ ziMDBdur$|zdz`DH3Lw)y^O6hO!iX`YFQ`t88B4B#SK4uA6DrBDdWEmWQ>Vm)676jZzjso(@zk#BtTtMUDF#(F? z#(|9R&=o^JK16EvOM}F@lbB~p2L(9fq}Up1A$Yt4bFtH)qQKGs*=*Uo;O#=?uUE1J z&2rY!^%|8QDskgL)I$o8Y&HcP5d~$PHyvCqyh7Ht+>AZ;|@8|X?|hWv-MGUfuR z9__I$$d!sc)}2pHF+Mw)L|*zrE-YSZtm+#$2VxZcbt+!-VLo52Upg~Y^4$vFH1Iq(MMx&;@)yU`+0h-V=6j7e- zRx<&;Bvpg1xzZoSr{MvT+wMQW>kjH15`apyT?+{<+DdR*%LaRmG}q?2_akSnq0`yQ zP?6T87F4>3t?T-|Ozbh&qOUrQg97SH{L`L$_HbyZh4S@`>N{c>>4# ziOTQij{Y0V)EXS-*9m8&?Q>!h5(j8FE0O2R>dm~Gl7qLmH}>mYlG619G^!AFO2mtH z>1?m8#wX?o(FNLa?w6IiLmv-MsXY@~q+Zc2!q>l!Kb>NJ~87gG?^TCMqvT zv=mIz*Jf*(PlQWy8iH240Wx3u0u-;BjAfk_!aPNQtC*cT0OxZ4o|c637^SxHfeBcO zlu_P)k>Q# zLaqCF&n+)8&>746!+z8i@6_v_?Z$mn>3^d=g768nA6gR(eZ91upOj6=dN6x_@w{J} z3f2DzMVmvHLycVr<==$ zzzbdr{OuoiIpOAh=RVH%$GZE4yZdT4V{6kCy#ju;NM5VeaS!|c>#<({$8AyOK@xru z>0uxt;_+{{oeDfdR@>tTIhD<7Wy(sz*zZt#ch!J$9}|l*&NLEa(<0|egd6U9Of&7m zYzMOdY7CS&tdB@+PWB$84p9v$LvEs1fd?Wa&`M+Fr{@6VDDs`z4HF2zP0IBD!8%0= z*mebL)E0D&1?$U8T`oxn{(^Ee)b&Z>uSL!d=kQG>r8R2UclHTMV0ojq1UP zMe-DS4G~iRirkrv9?z1T;2f+V2+xxZSDZoHl8Xl6(nP|VnL6^)J&|kPShuQ}6&AlM zJ7i0kKho7!m>lR`pA&b$Eg%~edxqKw5}VC8hZ2pG<0Q%IN8C5qB$^p(>1R4&u zP}LEIBX`CvVxE00R-*|?1WjX6N3)e?aO}lqSBI}9H3Jx=9jRiNr*`4Y;|}sYz{(Wm zn=JDJs4$f8MuckLIZ+5YGddUSA}1mVbSPZ{j}_BA4I%wJ!Q#QN3S4Z>gfHKC$&)Ck z4h_!QmGQK`%|iG66%dXE+=2dttX*ikjp!c7D!VlbsAfEvfAI2NWw$}wsHR_)C~*6z z^$UbPZ=lsV2$?Yl>^JVGpnz8;t8x)yh`hAn| zImh{X^oevor(Mpm9DFmumsjhPu7jEsnM!m-Lk{>r)bM0tP0g$Y;tsCZoxAe zFm@^8s*Ubpc*hT#N4+)jIIA`?iZ?Po%7=mG_^OL7pT_7KbggGsVzC`Tm$O+nLb`KQ zn%kY`-4C?^YC-Q4fVZJfBK{F`l)C1h`az-99;AjBUn{#t z#w-?2N?4#-Fl?0u1xJg!ZLtiKNl#WSD>3E1jj5sd^BGz&7SQIZR|2a>2Kj>=1Hw0v zbtCKM%;l)wNR6Itd!NynEgGH<=FH!*hQx%!0S+HBdFRbVP=#RQVlRC-;rg+%p?}VY zSE2UHwgcTcMLsgGuQ_~VtI+i*w?30bWvx&&dMN+|g69UIKOVGpTJvNky|+hwv#9g? zd?jZZ^yNP?)&|Q0`gK1|emJ`rV8!=yc(Hcx=^@(lsynzANxxUo8(yZp@5Jcj zGWR%8v*|0x+acKtN(ByFJihF9=I!;j*0CK=dB|iSJY6CVVFCPBiv36u zP&P9xz(jpq{RFb2!o^-@L`ogTB`^mMgas79lFCdpv@=dKK`l18Wpd>pM;fg@+5k#0 zV=IcF2AgrkPes>g&y;u`xON3WleN6L?_9CF>nq@M+R=3de>6#wO95<`L3x@;Xvmv4 zIi;EfB~pH#I<}%$P6>IaRpw{Wc|aLC9x!7j3kg33S1~fs6IMwX+Yr{gVFW#cz8|-A zTPMQ4K;6h0R5Urkf{+p^N8?yr$z<%}>Q7_LR%j|ASSZ1c#3bGvoDqZsM4X|j2i?AN zmXZleH;}EZl)S_L&I9L*O%t(+jq4%=U||ccJq!Ac@`GSrmI7&(e-|(ZBdKENbR8u) zc|0C(Qu`zP+o0JLQiYfrxyp>HiVH+s@D{DEl)1EF8qT63XhVCa;=(1sp`2`Mu?EnM|7p93!qlm?%nNsEjw7B3@%Cy8AoDKo6rFR@zJ5-24# z44RKfBaYlmSh>Q3WY~{Yp{cCpC;zC{ED%T<_ys7&0fm^n!LFcoz##Lp#Yz$3ZXjOHDQbZ z-Su{YNb>o%;Xq;LaibrSxHT}9RXnqxzOs4x=q97BG{Fx6(#pTfws0Ax7?T!1&%<>n zHyehdcCZbzp-4*s0Lm-e)chr}lSVKzUplPL(nA9-d{4f3Z?V25vcBEw>w|q4=2g>5 zjxfBdnYq;3zqG4taKV5(v4ftq!WnnH4L1#XGu@|HUtl+uiEkd>u2gb<^(~CNGUs}4 zvYh)p$6arTbQZL%da$C?E8WCQc}FVwZL>ZM>I?U|jemPeNhqiluC63?7mWLR8urlp zOuA1;#%D)PruU0PcnBICbMZJo@)|ArR0y4{-dJ{2&-c3{5_%+i4Hu2=`0RENx4IQ1 zIwoP~;U9K>@pH(JmA?3L(#|)j#^=n6mEnW^z0(wB_w%IP^{HF8(`pXPTDZ;+W{=O_ z#})c}Z5O_&Q{LM+-p5L>!JkAfVMsmRo;_B*)(5us#(9OiyIt;p#C2zrfK1swIfoU@KXy!za>9Q=;xZiezaVP8@R2$&SczcAa&h za^?4mtOdiSe@rimC?dp5??R>j@mKM#C~}g|XQPpG=6$<2D^>X8M?Q48QT{rFQlF+z z`)e~OBEm@-Gk*44jsV_hi^0y{>3?3>Ggjwfv!J>5%*FDA_bBj;Rb+Rh@4@5|^-OTa zgoG!f%E8+q*ovDrVCi?_L`XjofdpGuS%nD-cUNi(H0fuXJ%cM@7{QK@s(6S2EoiO;dtByKy z7RpO6JfEtmiucng(kBB^ZC+a5lJ~b@EL?=%7R&=qx>>U9p7`~+ww^Pkxc(9DF9nq4 z3<(f z4OxZ+ThF_>leX&;B=_eH&*a?h=hjIsANGN$?b;Wj?uWLT>&f>Rd=Fdq*Q1#+F5PJt z#dYI$2UT`noMp?RAG2tnw(OEJpKiopexM$Y>CreBueQ{@Pqah1I0HnOQf7|hz#yWlyneh zFb5c(3%H_EInv~>DY=4H7@UNRtr$uEsWE+uNg*u{G$=Io9hGHe@I8ICbc`HxBxcH` zTs=rJ#e^!K%2x3+)HfvnL6Qa(=J_WKte{;l)Mn&KpV(nzGa1Aj!dO~?aWde>1NQxi z)0#iKez6Fo#0`)wBy`SfKs8(f43v}TyCI_%Wh&!fefqkJRw=?w)y0NIqe|&Wt4TsZ zDA|irfKZ%Qd`qKpMUP9EZtahyOaUT?XAkozu}v{0W-^q?p61|)Nf8pz3H&I29=t|K zE4M4Rn_v(W_oY1URF2ZJK&GFWf}$0a7u>QQ&RO#pY|uo88+=VP_obl`F z87pz$TZt_fA=pMQp&hgJ!O<2akMfU(I9XLqs&E5KVM@Hj7!f3MW%EYS)UydSmSmks z#V_4?$s=lAEzkxLIRJ3W)7E71lI`S%nVv6SP^~|cA%&mZnqGdTw;r1Hpcm&Iu`=8} zoAflo!%}s$z;Gr3*xYeUEf@t(Y~^f>`;fozcHi!@Uv|GiFCBNk@ESc$bnj2UZbo=* z98O;(i9X}JZeN^Ty0uP_Y+S{3?7y<;3&>a>FLj-h zbv=G8*mDiISM0hDE7(RCy_0|LYd_sOO`|#{Y~I+u5PeV=c^de5-?x}=Rg!fajFh^w z@3l2LeU#0-B zw8)T&^|-@ur!q8O5xNiGJ3y&!OuKQ}IoZU&^Q!HrUs;zEAXp~JJqvl~W)S5%5T~xE z4HeIn39ydIj?`OIGH{@4-Rc@K7EvCsTdbDJhalAnV&`}cRSPsxK{li%A!fO589c7%ZEZ}}&XlETJ*gVyiyyuO~S z?R{NnXs!*c{e%L^QPrFdwM)dvDnV66(@m*d9Ry17Dhj%*lh7ncsEfpk%8T!yzBiPvV z$@=SP|Nqwl=zdRAqj%rwacJN5^qZLN1xiat=nM1zaq{cgdJMcdw*TCzTmSdtb*tdb zLLx}OVggsQUty*7i()9%90Crt>stZO`StxO=}E^u*cAAm6SN~7f>4GFT~7T+9h-fz zX$uK}VeeRoM6s0GxEmi|{GZ@J5CzzJy8f&t^~63jD8$Eg-JN-BdO@(r9xfTG%hJ!m zy7OO)`|shEo>zhOyuDGIH=#r5j_emy77xvIh?l{k>%h18C(-w1U+yj^3J+RyxQh-D z-ME_(^x81EyBxA=Ccro|4A0!m65YK^h+p#CaX5X;GTL8UUvWO`{Z7PODu3s?;djTz ze&-mVuXkk2!H-a&OvsaHtu-fGxLwF;QL&;SVxNpDJQ1 zMBpGJN*IJrl1`dV8oU-&jF5%W`!$G(>WZuS<7YXPdD>Lw0YW!2nUAv$lx`6@+}~3V zVz1@ zl^$i{wiG|YpZI>HHTfe>UVP-Yo{ta%V53UEvpb63T!Z9MdLmg+u|p)EbGYr$2Qb+! zIdY=$b)lA3HnRna8<&e1uYEop7wP`k`|qS)yVh>SVrO!m;-8AYqN+;ULF{|xmZq{A zY&XEa9z0QqMbIsSWpjza3}CvEh9@~#demr5dX`ulam8hsb`crXfmlUasGS^zE!dh8 zWQ|}sISwU$dIUao>-9|Vclc%%JnS0neLwTvAN=!i(xdwVv+KEa+W98Sij95^NE=2w zgMpC~^q1vmln9YZn6j-Xtn60<+}UE-^bbr7&N-VE(JYP&v@wKjkVyxqkFphaWP=9dGTzssNe;L z;yJB*Tv^z&r%Z_oX>5;)T+}yz^F(*$+3kqh;1}*hdf>4Qw>VhJ9lc`Z@5o$Q+PLkw)Z~QjH$zg@pnmM% zgMV=Tg*479i%z;u3nTX=2tRXRKOZEx9FC=-3m6Fl!9>zsm-joqJ5dY{&|G-WMFaiT zGm&-Z8+1Iqob3;^ zs^MT9OTv%#@TgSs^k3h`i1&e+fpYCQkah=Y03Qe|_w*dYC8R~zC8X&+{PXRy5GErY z9S15%T)pP+u3HHo1<-lX9DncpD7Msqvd%RdU6%w$bv3rJm!^ z$;o4bqxc9bfZyh{(zx*b^*2(}(~KCI+^F^H0*BhODjb^olo9-RW05rFJ2&x(3)i(M zFF&I#^p)c~#Nf)Q{P8v&T4P%K`rRDjWsm?}Tk2?t$%jSkL^!SjYA}HFZtMZ+?8+ZDxE@hpe|LqoAm{#-D(4edd!Xi*vwc*yOhe%(4z)>JF+Zqv)-Ns$Twt;k~Zfg;a2F@XAKsVA3y zaI^|sG=e5sf|$2tJJu+2A{J9+iEdfNwESS8T9^&)98zgunRvw+z&S!g3bpAksZI?}ksBD(UW5>r z7Ot#;=nw2?Sb}1-)9;fq)N05_yo8<<6v~J%*G@3R(Eh3o1t2c&QVTxa? z0<1-slL9>n^uuO1J61m~=~J1PCx1nhhSoJ=F)fGmBkWs#JLVGin_YRbrMfW(BuR+0sC&6a+_3oV0_Ocqp*e)l6JWj7LR@paj*ou&fiL zHnsZnh3`W2H6_4>^RVgMisF^yZAJ8T*!MK0VBk|{f5E4X?1Ul`)8MKUPrnmMTp5H^ zIDbEqN50Oy=I4p(tDJ(zPloqb)y_ASR-UIv0i$QH8x%Xkr!QXXp1a^vo4-+654kHW z21-U{tAS(ZYr|g8({GsFFWs*s`3LLc$ZuN#qmIve?B@k*0rWZ7rTr9pNdf12LQmK8 z`Y#z2yCvOk0hjC>kGyQ(H>QNKe;i_O3@N`1t=u!gbWL&_z41#2T%+cH_}xdcF_z$e z&Ub(I7K7aK>GS??qietEk^~W`Tvx=K^N=(__?D33?S5Dp0{`~l_)6b>sn$9TR9cvODq47i=XpIYqp=Nw0^ir!V2%8FROVIzO&LH0aD79CN3J)U-%CRrEj zvL^?^|KQ&0d|mC#&lz3dhk`}piL4x- zHGCZmpl3idP^70k31N_(GvoTgGW$7L68Ab!v!0Gik96ZA1rF`jx$SY_EN25ZGx-An&LxJb;{#4|7z<^k7->)l@Rk#lR=I!I_>*Z-tnh(=%0Ur_e2uB{2a@b z8lGIgejKmGK5MpKX>wYzQvy=nf&zy|4V7*Q6iJ~RGqh%u0`G_Tb|FuwWlXRNd>U~rqfZ};M^LOJQS2iSJ!xZ zYcSkqRBu?VUA;j14GZcL+MI)iKam-lx_UE|HBHSfu4 zYR&>?_G*hd)g9B;BBChG*-82BFLb8J7AOGt|6?^iK_JjYJ9LrYr-@xm;9mo<;qK3x z{Gs)K0ad4jE>qtYKQ*nKD54^>{V2ff)_xwmso#28+LU@Qw964LOLevQ|0yl3t3tq& zuocUC6VlYMftN6x$1dEfI?RNtArg$XS1V{y7k6dA~&k<&B@` z=)@a|sygsd5-CBUOrLz-^{$_>_3a#d#6F(~@BI8WFcw8_is;8)`2B~M+BY4} zEm++!UEBtVlK$HmH-zYz9bKr|KU-UDD>q38HIsS?Xouns3PmM^@HMyc|Bd&2(m<}- z&R12wXC)TnPU2d((zY51u0RW+M9^g!fWT-XAZs9&NgMzURhWy2(jYQY0~{bwytH`7 zO%6h7pi&N%S@Rq{F{Ai*B{Bx{Rp5MjDo#3D@%nE%xfWQWi`Zn9TFb-&oCP9Km_{nC z_$4JCgJMtxWjff0`g_t()#O673V=p5!YXhZlW|ogTyPR(cwnO?uJE_hm~`CO~$1 z5&E1eTbTk>596WYDzu3i;v5Q<>*c2%HgFT6TvwMX^0cT*lg+bT?_gyN1*4>*R3QV` zyX4i*6Za%b7`K8LV|yrrVsLZ;%+V!t5aw{>h7C-vX|;~?&M(MqF-aaUELf*rk=W1J zZ)632TOOXLVbfP>C-ru3mmc>4qOVd|)vJ@$T`zN4&(X%E^t&H$0+IqR?M0sU1I8#` zMJcw>CAR03-#}+xi-Px~K5Tno?u~wd5gRW%&yxhA{RPhf8{6s>uY{skG#Q@5A=u7m ztsP^gr=C6lU|ZMzoPme4)8VmmSAP3xCp0YXuHEYoJjJFT#WN)~w2!@84G?|tj_b5d z2)k0e4d2RW4>wr7rEfXq1LsD4C~$u{i$LE(2dm4-j_MpfH63?iW`3x`!*gO*iK?JX z`@*k*Qh5H)rS1)FwrWSuxN}=-}2`GM>e@Ynf5*JtX3YUuXS3e$^4D-i({6QZMTP#Qg9o=}7+x;JEt;+t% z72xMLZeDSoWqd2^V^9B&QE z0+czqwZ8s}GB)=5((sQ(zWk{RH9q{4ML;8_cog^Pk+ttdq(eDM3|qy8g=n={`V3dH z!X$%NE{KifOk_+8#gByq7AHXY59>`nLtg|NKCI`7w`uQ}4(8-V$ZJNlS znsV@uQKGs>G0Ks`rvRu^6|NjM(@4`a`ISs8a5@o|R}>ER;=RP_N-Sj<6z|1U$_pJ# z+)S9o*XRfZzujtUoI`$wex%!zZPG;4Sov0Kse+po_!pAbBOMBBN^1)7MUWCgGt&?% ztSCwX*x}PKZqLR3CTl>_%9UBSbZQpEF^A_ns7p>UF|Qy~WeHN%JfEuk8mv_V_cz$I z2FHyzyBRQ7s!glLh)N}0xIL*<9l|(gIp^`IzC-IAq|it$!bzooq6s1whv5oR3Ra3& zf_)Eg3_tpnr>iG`n@BxU8c7%{hY`$x;ylwMZSz8olpN_)#h(U4+Ba)0d(r1a?~rNR7JAI;MN8+W<2v-W^QU4Xq7P6Q{Rc++Uq-j zKS*3*{Ys)jDGs|omj#N=9!qQF4Q%bTJMl;H(DTcCm zdP0?orj4}52rrdM%>@Hb5AP5r1J<&lkq_W8`&yGEWvSfqSv7H@g~TMvdNaF{t>wLK zj&WDvmmyZ1dW|Yxg^+Y_GNF*KVknu3Uu*2eZPBP`okPQco-Vewp0W@nYgLU{D07r3 zlw?{OVdLjR*fxy|qF$@nnj+LYW6=UhVkAU!DIg{0EDb#i&Ud&u!QBVZ`!?(E`a z81(Q@+o;8D6_q{^8Sa&rs~I4#-pXzpd{$pOasG9g!Y6Tkcn9-IDc8_GR$Or+m^( z?{dHSee!#17;_QbV0(P~{6-B{QGB$|Ql%+p!Xanb;dk0F>}L)5J|~PQN&djw8c(zj zzKe;w>rT(F4|T%nCofBNr5*3UqUHU`Ept?m!gWL|qOwH7Kvk|1EIV`KlAUb}V`FHF zFh0^G?x>%XN9y1(Ou59zI#ST%lLG(u5K#>VD#~q#0< z(!N4ILj!X#2bCv3eo)v>jPYfWYXS@~@nB8nF)M%J$UBcNeVukjbX$9Z)8Bijb5599 zUZv;I2<^;=^xy;!0N!jtWs|EJ;G7_tWNy6K)wjBx&qpR8$bBasbYeN*i5W{WRMbsD z5fS29sG>%-zbY|ST1u-}@rCW@OlY(_=a0W!du`9tvn=4=A~FZ>h_{ppGsi)ee|3hP zBvSIcyi8>2j=>TD&4!X=f{Go!M)aipudp$ZmEj|vu6JUuDG0iwj=Yq15>{?ou8)o) zTH0OD)`cC`+8U#gwEnNw>40l`2PXJ`v2#=pIWVBEGH3@mobvD0^`L&o$KL*)hlhtp z!#yS%m!XKDCA%6yk8|fz<&UrZlC*@wL>Z``+;+-Kd6wPGSd@E^l!=&nX3jx?@HTbo z^Mmj+uRtO3-$A&DTuF%2sOAXhEw}@C%PdKF`ZegiKtg| zQAw*@OmLF3v?GNwpkkT3M3pdaDusHk#@G||37XM3QmIf=k?+|a0C2}Znq zFkvW!0E1R)08V!>@AL&6$IJigfE>sJMtuf_%QB`U-}P}wIM@I&5d_kS)(lLXOaaAA z>D;m)o}%|w4w%hjxLNo_F$$i4k&b?irX(>iOk>{lh~t}iwlNknRWg5Iu~CxY``S-O zNlKg;oEcmp?s-SLN!-#v?leKN9P&r%5)$9)LmVkL7nDm(j;Y!oV}cb2X$Xp*bFsvt znw&kc&jHQL{L~9YoNS&rwst`}jLr3sA#U5EjCvPkKr3E^y7nm?`4p_+*6+mxS>I%a z{ND76NyAK+vYb><%PQuS5@3*+*d)fXb$j}h@TKO3Ewj!y2D#*gfB1C zmP7)bTPJQE`2oC}c+^t1V=82es2_`L%q*4DvgoBE%@BP7)OU%1fzTqRGCKEnCbfi! z-rcw92qd9P%&bxlOvs}o%fS%(hj7xh%#T|2HFsif4kFiQ<6FS-F^};}&9|f&r1e7) zCd8qP`#$zpg6FY2qkX*%?>tdYzr9J(H^0~JjTZ!V|G(TF*DUNV>7nZNm)d%ZfLM)_&j@%1as4H`NVpdbws4jn8%t9en zQf)LWliI#h>|#D&cto$9-|q%4fgcBDX2No)N>#X`f+iw4di=Eu0i~ku?!$JslNO}X&go6 zd1!b)I{0cx?|L|qYZ{F78%tzn-kpT(H1h8W%{1S9xV8Uy$Tv$OgQ7ijuhA9(Tg+3K?Hs?~dkXURWQn^;bVMMSuM9hI4qGkd^M{K$i3X zbvWeRxCCd6F^oZz0eWq!*Lf->)~`U~P(e$ndN$(By{z#Ux||{(ssoeb3zL3+tl0jR z&`*ZQV`8IaG4TYj&b#<2#mhEe| zIXa*8_4S25^ChP-FqHkIPzv9=H?D=g{O^e^sr9q8a-UR79gSvxO7ym zQUzRNo??Dea2=)I^>TkoVoV}0YxrJtC?FY?$Qjlq$dmG^P+5o^f`T92Is>sO*%_2E zvGb)Lq|+-!o_*&}&S(+3NWkz4isswveT)0Wd7qh-f1BoyM-F3wCA2f(I@4H$FO z@ppYTb$ZqU=oK=np*LV=8mBw+<%ZMsN2^M-fz6yvLzhsz*)XX7$SzHAOhg^a6_ccY z)oXIkmI+v=3 zqt>n+oXE!PL}Lp;tr1f&0b)V{)^Mg`tHIjbL++C~&ICe3&b1&Ro{2lG z)vkguiC24h?lL@EieB}L)2-$4WY!vGF15H)ls-~4GRetF+HG{^_FN|o&(Wf(?QAEr zc?2!wG~7)VM3=hV2f+GWQKiuHNBljVOl%--ArB!GYi0;@!zhIKNm?@;|~f^tn>>Ki{Ks zdNPlg>J92E(h2za?x}%lE=&(9ww(4ol=t1u2*@gUYBbvU+Wq)d=&m+_Gj`|3U|kS4 zAzM*6S**J+Vdu+p!cKUHyqb~zivPD&n=jJiiSrt;KpAEXo#oiI@t)-$HEXoIB^-Y! z8Ki%qdQivqBw7!sGS(pIVD!VD8{W?bzMM}A+7}q+I841-8wl867gh^y`Z3KGD5|3f zv?v-yXIf(+&&(LA<7>t_GYj)U57>XthLS5O$krn5M*RX!}h2k z4bffSBu%!UAPtf8_E$z4o0CE#`c&koGU_5xaxcE%^x&OZw0GVL>`};kwFO9c9*A%q!W7W|5IT9V5PMZD?{dV>>c+TCW9vuk+ zR-1;)jEqF%#<^--)Vti*Jl zRQuObDvNzj^SL{9hBk+`-@h7f+K7iTBDT9Cw|_?8u=5<&Nf9-M1e@dn*5-L|6f2pM z2vs+zMclZF!w4yd!U+n&RbpZ;RCFVVE-12M*@1NDp+3PfYnRSkrNo9LEjS=;*`MIb zkU1oV(&8;UUsD|Hp4)*Anvu{rV?A2jU*V-hqj+<%uDdpyF8E&ntX!vv)G}0_gnt7) zLarCutYPN3LVTonJc7g#?Zb5`7bw}HmUe2TO_eL=T@JTH^)nk2i3rPT;vkvSn!u76 z(mvU(g9DjbhXWp@RkUsVd&y!0{Enb3&^w^W7JoYP_lav8e;iKy2{^XGNKi^5khi|3 zRl%Gf^`L(Ct1)nZ{yYw#*b530hyS!*EuE89tRhjv80we^a0};pAMRI^MWQa20zuIL z+REHb%z!HC%(Z^v@u8r1AZlSu$FvrrP!rKQIcM|Ghce|ZlY|5jr4oC>|AH*YjW0;z zn#ibqLwP50wQ99oE%!5aT5bZY(RSa@Tft;X6KNKk8Jj4jMouVaDbtr3dIGxqIr1cx z9U_FNmDkw|uPl@p3{{@CWdwB;rD49uoS-_+;qL!1h)HbV*fWL@0i$ou;uQc? zEgrLC^<#EYEK0HQ3{+}hxkO1K8X&n0p)vH!N7>?($qF<-H9uSzt*gPK+6zsrO}`jv z$X>!Q;He=Coy1CpX*Jo>KIRl0&e^Q%T?X(1x_W>2;6@1#(cy5X?S-q>W;zo`-u3YX z#}-Ny7_@|PW!Aj6FpKizF8$ER+j0A8mHHeL5KJ#4kTg=tn^>GEl=AIO=|^$Uos4du z_?hfkNN)P&`bVa-kf9xg=W0T)EvxP!_SclRe51GdfT8Y}DZ1*;3kKgO>^G0s<+lsX zS7;7C$msQ6_IAij$qM@-qvP=fxx;_@(eaYEchG>mqju{?{nPw{6Zc;#2gi#AJ#M?8 zb|)_ZS1(TI6zfeHM7G253YN1hE)3smXrdF}>xJmv*~^0C1-Iy(>+$KT$*P?U#XT)j zZK%h#>}3zlgTv~{)&NO84C%(NcjNpzqAqx6Uw-;A!O4atr#UMeH-*(3ShUN=hH&7H zj3(qG>0?*}qD~zSDT!qQZ{T)}Kd)hCJQb;nQaWp>6rhX>xc}8dMrcYONG27@c_P1MUNz+x$(Ry9X%NPvtunEB8-PUG$B( zSU;`F_i8@3h_1nqVZntMu3s1&tG&)IHH~zj)fyhjfdLOqJke#jJKHF4bQfM3+klfhlp73z-Tc>_K?@JoPm4a` zb&lq7j@o)%;&Zt-M>{wB_#RlxM8!g-tb!Fh%51J$H)r%ADp6x(P7-$UY=ntU?$9Bp zNOXA3y!1`!V%XkC|MBJ~XPd8#Vu2=jf%yCT)z0=W`v3j~9j`I|)vLk{_{Fyc777F? zTC%gZc621Bdmai=h)y6{z-w^nl>6AT;9WmO?fe z_*$`S3GP+mmM!K60P~nap9YZW&xkqGA4XcvY&=~u>)6Z$z?}VLX%JHOcOU-qpP2pB zDf&tL4K{G2xbJC0tcoyIBzh1uqDnKx3Mh;sB@)FXbB-v{9l2M~3ss{r^F>_|!|#Ius<>NEx@Ng)b}B@v*+PW1P6*UL_t zkKc;KDLH8jeMmBcEzTjHLy9Osjv4$BJDE?`Jjr}=JgX!iahoCQDH)EL;SYKU?}Np~ z_(4+mG-Xo6k9IZ1s9d2aKsir9)~T}?JP!)&T@R}@(l2lVIV6M88s+A=^KpK}8JsC$8#N|fA z{jI?4J>sF?H@bfHK|GaynyvI26(L;uAe|ZJKg^zKB0d3ULy_06Q2;xb`a*BxH2*cy zJ~6Ufu0k!0hAQPefvH{g4KW+TBcWr(nv*D!yz!jD8xY1c#b6 z7+FuU8i5LR4$bq0H;I-z$MLatNO}!`aLxGq+~>goh&wV~y{LG+@hsxl`O5da3F>je zCigDA5D{#Br`mY~H!b`ISMa*D(}~TJ2;Y%MI4*)uRnR#y9I$uvobq;x?YR^1f}I<1 z^p4#3P49W@?Y8?|ko`5`*rBt>zOA;1)#yt4T<&<|&*{t9F2X_R;@M^S+wBI2-w|&! z8E`YKA@N!WiZE(#{{+9R=yaVK%f zfver#qN|8KK+HM4yN}>bdy3f$vmF)^o(pt<`kAes!~xW)Dc1_f8XS_&H$! z+vei@(&-078XiLFA|>R})naTd1=J}mR^8l61`jI>()DTGg2jr*nJpOh{;YK5R#=Xh ztMTF#EN_54yZhT6Ykyl<^ml^a5^4D|wwE$L6N~mE)@%&g!OmO6w_xf_Mnzm8oecqW zfi*ZNspDKVcc0 zk0%EVnVLK`|At(eO%Cp*JZLT}JI$hw!>c}&5){;9DAfLksU)j;HrXxEr=f$j!u)mo z+xSO+E r_qTgYUf!0K6=dq%tc{_qe-}Oy^&5E2<07ybiL->z1h0TuUrQVVWKCvF z9@#2J7Vpj-zdailk}OdtwkK6p=wPi8ER`yU)2~rUw3nJydX@vd2tqWl1TcT6V3%ng zGAtGo>ciKD1vdb&SyLfmEFwfrQh-Rw$PkKu0m%eLsNmV+ufiSFuj@5)9Ho4u1!pXa zzPBhW;z?DYO5vfj>Y%k9D@_H?I{f?-WKlksC_&apWy?YMK`sC*8_{b9^LH`0VX#mJ z3^m?gN*17M-Srh-Q>q~FDiT^z>4}$-WV9n1Vt%8Zwc6zUnC)oYRn#`44|8Z5Ph9%< zzz)N(*NV0whDIft4~aMgiH>E-tQuSq5h&v|@*8 zr`S`>vtV;joHLa^>I>=-UIc);N4x@|o?kN#a3b|WEWeQ>yS?@`P%-s`nYv`Tob41L z7_~@-nt{%XAjz#L&A;_n-u}loc4z zOFLZ=Y!;edpa(E5PbXPc=2_kV>-v9Gu;cyhL6Ps{@%SybjSjuDADer~Kn) zEE>Cmycvhb+IQxSo~a9-kF6f=<=Kk_?wM@XYY}iTwbh=(5n4`oPNx#?x47B9r`=dm( z+)Wk%*o?&~Ls8dzSRiPu0_mj6O0*2wINJydcao24+nz07)8^Y)rFLh}4CF&!OBzBY zh+ho7e39E&t6z7>LgCTj21;z!)WMF_Ex+HaOZ_K&E1Y3omY)HSO(BF^*Bw~Wg8A2& z7xN7*RjU#@Kbj~+5CMm9BdH(>+)}X`6r^t^>yoh1toh`n1dWkMtQev~UtO)Ab`vA2 z>S^4o=ra^WV zyThpQgpYJM7R|aUe7~Me?5Mz9ePY$+!+CLnoUxima#I96&++t(-2QNOJNkFI_I&L7 zJ9c}%jv6vg_mNaBFJWA&T7F6f`9x|NH~822+0tR6YYHSyN3PR=RguL-<(&k1QG@Q5 z7US-X>f)gPD97^Te(kZZ)m>x5BDb9Rez!W%;~rJs9cMqjbwHu<#edqrd%vHT<}*{6 zUb6$))BS&)Zb^&al@N4S(^(P`lvWO}d4fdT?6}kHXtwgM9O<1X*6P;tk4>LmGCM42 zpuJM0AQOTkjsngPoH^o1>MQ)e_*u;(xizJpe+`jXW%;bRB=*B6q}D&CB2@w<`sD}Z zRo-)_WI9^{SGj`=LAarDxV#f6ewGhRTzk>d68$4QkE|1Ev~sfGQ0hvRVlkBHA`@#| z%R@8O^w3tuUp*7Rf~ZUmzEun`^!5f@B;8NsIzq`@R;4*(73qe}${962UToY+KcR8! z2HZ&gRJ1axR_6a`=$pY! zijB*{kDH0oV2^5nDQI7%a^Uvi@UpxWM+1|{e7-zR9i7RX1#l^q1m8Lju!3b(c!Iz` z1Mw?vYUcpbF)3;iYG0<^&tyh>Q}6uByRe*rEdVt;|eFKigH@3}Rk-p#tL$HUhjtQVR{goC+50saG zQ%<1zG&@o%ReV>jO1-LNI=4YO^?xFHp6!0zf4&m+_Z;6D5*lJ(r_X<(e(SF`+;9no zN%+0r{qDo6$<=)KsOY^`}?v~Kk#Ev-Z!0|`VfqK;%m zHG(4y>pP{_zXV3`b}DvBYhRLkc-8q~8^|N@pwhAQCvvbKH^tQ(7>aM(Wq8({>+XUDS=xk!)oXmb**!9fUGAzy+j&1`wz zvl8W#uo(aa`G;yl*8gBwi2fZ* z--Wg~>&@TKiSvHmIBZXD^)GC!{G}9JqzaFRyLtFEHJSUO`SbG#Jv!tAKeVIOfys=e z2y^4Nk~JiOC~H3Hw$-C1IUcoIu8$Aw!?6;}<(l!k5?EFXJLR=yu4K)5Z1G3H7Y*?q zpMVdywPozLyn=K;qJ;%W<<7K+k9#izK5yvPfuBn1xA+ycT*2L%~P$`aZMOX*Fkali#wZ|g%LZ1Lgi*y8#X`)_VF9;6YQ$Id}$U+cM-WfkxBzio6r{%W2s%azbo=b2wZObq%zyv+AG6}U_= z$8Sh0N9Eqx-VVOHy28#&I_w?qac?(xy7>o~D;(3@9%t*j>zzIg3qKqUA5bMBgyBK^ z1p)al?(**tMG)>JJhq1O6|X9ePM5os_bFLTwEn4)m|`F>SC zhGl9?n(c*$NoE2?GRqASB>~i~i8g=5=>)A}4%`(fuxWhq)4I}v3WKc9L-&B3;CD2< z#iPK_axPMs;-oxgYHjym5L-+TPLNH3V*VcvUURdCSxP|=sxEXmkc3Mm3|YDsi}qJ~ zPvt6ZDOpv+G-iq06jDQ0$VmH9OG@VDeZ#2 zq}Z&U=VXAX#TWvr8nUu&&idLE@A@@M7}QEJ|G@!fimwIcVE+6k%m5z%laFEwY454Inbz0W2|f&+#nClOK!dY;jU zOLQiQ6RL!#5=QtNhFHp(hpV8;(a$m<#*+(G{8~CAc|uM*^f&XKk!k-Kx9%Rsg>PQM zfj||`*UwHSe%?CwWrByG!Lr7|{G)xRbeAB)Qr%kIaTBVwm|2~0{bY|%E~S&Vm}|)% zrk>Y;0`ACh{sWWYOgnw_oug=^G5kC zI^c!RIN(a~`L$cntnD6TbXC|ld(zQ(7qgs?)A7k}ebl?AH9dqOXX{G7<0;dW`U&si#N2NnwL zeGYm`5cn~N@^RrRbk?v}!FUt{4kr|sI(;Pw*4;4-Y4PGMBPl;%b{yl!Zyz1(gGabh z(FJN5ly!uUZB`V{+1>_^iNy zV_yEK32`$NojIJUf~SgkZ6!64HmMW0 zay2BWR^|@ytKDxW$mpo8Zg67Daa||A+Z)SKFB{xae%=v?#S-}8N@rCYc3B?JSr6j-)J z@@Q9GSJpaT6-Xz9D=ViI1xBNh0!ld39$VX;0LL@aXrLF^v<{15M?-OWQvZN;g%JK6 zfI>Ry*;H@EZlWi-d5zr7b}K>!k}+h1XeC`NWe}5-k~!&-KP!N^v$7^2#blUOqy*M* zxY8bRO`i^?0GQ_Z+xo^@EebQKEx*RiIUoqmU@NLTmcyXrsk9T6m3MZ{y$(BnK#}e^ zOaz1Bp)s2#p^Bb@$lc146&OF4xTMR5VDLi`O~JO{%ESQ<)My@k=0r+N8RkfOTDk#v zO&NZ=;JtW@n(cd}<8%=cz18*MeUnUpXjaWeHsX zlGcc9dK;*<6F(&d^_85G2}CCXG*xe_0Cj$kR{ZBGz2 zQx)te6CEK^St?|dXtvKzZeB$H*sWxb7Ca{<%PnxG>RJU}L%-=Vb)@eQ$Cu_&RWr>2 zS*FQX^L*8BlKXsQnnqiyF_Y@;Gx>eGLrJ2Kh!odi=9O#>BPy;Zl1s{HPiBH#NJV6mWa? z(7)K#Z;<18G1u;S3gnISPgd@1*?F)^?e^>h5jdVNc$pb_O}E7yjewjYhZ;Z1!fLEP zkAHkPU7pBe@89Q(Ak*@H9sv2H=)V~`MT(3zc6Xh%Lm)EDsjPw2$;<- zLs165LOakx*_nY#QyzPs?+v-ujKq|{NaSSFYn8=yq)uD;5DNT=dCK^kwqzLnuLqpJ z3*@m>y%;z85h5fESUw57%7#bmFdRm7lfgx@hTjfPjuYiYqWzdUC^Z)N{?=DZ90C6! zXyye2%=ecdUh0;*lYO?J3~L4PFh_GF#+s7aiBI=agO>BB7j0mRAboiF^&5PIEzjI5&(9)D$mE1HE5Ur#W+HO{B{0b ztqJ*h=VzBb7VYfH*S8E@NNVXCx5R&7O1xWUzKK{UrP$FiupoLQ>i&ad(=Rdd+y4#F1$IKgodO>FV;=0ct6$TQ z8=cpobGz_~4}s5mF8N!;L-+;E=AVRDiR-OokMWzj_0~{}nQh%MCZtu#R-gYz(^&_# z@qS$#C%8j!cZvpgcXusrEyb;9aCdiihvM!OZJ`v`7Hx3|&YOOJ^FA~AXD2(mnMqFW z-sjx&>E_zO&@7Pc3T>rlLJ&ro8}15E6-2)>;kdL=i+^pL*#5N69C_nguzC)extE9mw^4EIfj+KvmKktkTeSthy1KaMdu9wB6smjdFo1dVj z^izTF{{$FA24NB8`u6&QRo;=64v|xewg7f|o-S(&WNPuxy)|J|t9V)$)RAr~2j39i z>XHvF07eO-rm0@9>d_KA0n``2%9C+%5T zFm&T_NH2_K^QZz>Coe{`^c}B#z(urMNKj~)R`!wFiihWIz$TB5P{^y z{~cn6B)3BJC+l!pOMBx6bM6I$KTQa-iXO=liqQ&lS@G1u(yrq}%+ltDmu1s&1DhHv zrae~ZJWC`vFGe+}h6u&;Zi(JjfoQbDE~gAGQ<4~39@l<&Rz{<^PX{KQ?$NHy5wW{_ z_^?E%5=;av{ch+VPT38IJ@azRJZ;0LSjAH#`G5 zmMBC^j2VEl4E*~!`Wl>fIM5&J^JtdGZxqdf2WSTU$~4C%&ejfv%iEWZ&Zjj~TQMcD zw`-zX(N(Daz=Z6&+o5*jMV&|T%m`Btk(n)Ic)Hg=VDU);Mt1Opd|Ou}WNYuC@D)}3 zjZ}Px(tn@hLHun$_xVTe_AWAHYsX*M<%4Ax*rbx`N>l8qmE&@+FevwVt)MDs_I=P> zN)Lp*z)uIkHNPpQ@0IbnGWIa}ul&Qtgt`r(6n~|tbwXeu*0&(z%IhbQsKlgotD5dB zk4vur5uYcHv-Q7EVE`&PW=~rGVAsiy{HAO!hkCo&!tg{-?JKdwnPXE^Fhix=15e}z z4MO#U-vpk3hi`W0n>p_U8{qq#>(h1p&fSlZa$pnyR4{NmQMi&yd$J{LhAj?L1=(56 zQoAUL9J}i>Gxz)g&v;~*;*Ucd?X;l4<_YS^-_g4YJbAlEQqj1~n)tG~aAT|rC9bDp zhks0Uvp+mg*WRrj-W&^P32%|&HU1!5zp0T5j(ZJ> zJJc34ziRYCam==2TW9Y%9IlcWH0aOyg}(^27FiCFe+N1O1=j7e5HD^Y1mq&0%n@6e z(bVuy689+S4(G;YTj+U{RrO%e<+K+&857QSL4WAPSF<)$TM08t@I27v49iS^cX#LF z?KXG)3Er7{G5CwqI?rEM#%z4qgT%gnZ{cxfR9YfPSnhI3L7Vp`-|qDqJ}mSD+o><- z{c;--83g%Wt*t^Cjd#~5eV;z1%&|7Ruj;1NRxW-AA)FUqSZVnr4VwulhseR9ZL95= zlwrG=5$||Ce>&LmB!`kr_WPgCta6revi~jLN-d>TRP;}Lq;7N2G4}AZ?{@(~wzipy zP~7LQuvuGo{n?4=C0RzjS^_7({qEF%w|DME6Jh~uM}>!G_+z<2N@Ajlwe%_TBblJE zyBPyELX&>~?3b699-l*|@<~{BBp-q_M!aN{d0nPFdWF!{n1v!4(=TGm(?v;NteR99 zxU6Zl5o;nF7QDkj03#ADgzn_-m@2e(6cr34NjC{*d`b=Xn2!d=k%sadq93?EYeJ_c zJ@rEwNZ9o3x#&8=K%H#FWfs^>zgU|7X5;_B4;!?nYgP{}VGAfCMe3$rgs6!NNE1G$ zOl|QjXqFWxsvL&n7d6o7#xT_kYf13RQG<%%g$QD#45+jBr=szTzznh}i41rR=-*TO ztbly@>q$zH9^|pA>L;kRs-^*38z{s~iqRFYr_9Y|qrnp^Ue;2GIfxCqRYCUgDX1lk zbe<}34&=SXldyui7VnZ_$S$c`q=59eXBcDJrT_t4dkJmXskTcNn6aunO?7Af0ZXy3BoJ0)G9fq(<-cP3eMciC1PK}p3l+%O0LGY7!K zM(pT05w3;m3!HqBU?=9V@f%PwT32hKwHZ`i$doGd>n6LAF76B!utbavQIgc8Bf$?v zk4Qq;C`Fw}onlTUB+d+xljP;}j0;ICk|}Oz-_W2*_QVgQb;(Pjl7DH>MI`Y|qZsDo z*ON8q+|>%71L7CcBaOwDzQA20>B^D-+}+?!@eEiO0Gln9N5PL0JyUJFWo&gREg$1# zOJxd=kV{#W7-Nj7Wt-si*#BTOp_fZ?Iu0;vUSLOLMdk0mvLSme;>>tQ@i`?@?1DPb|D5A~(R24#Nl@f=i;(>nnnfUOF`ZI3O z`FUXSx!D^HN5BhWgWcg#Rq?OuVn+OAg7dfI#H%Cy{^`E=Z!Ggq-ae0{^Oqxx+v;5{ zZ>YCK_b&NFZ#QrI-tS)#V&%nr)f@(~jR;w?BxI;9cgCGX!}qWOpHC6&wg~f_x07=P z-XFPw7g?$QnB9HF0eFgO=_*R3#{HmiZQ6zgDW{~WR-ERi_;s@j80ynYOycYN~9a&yrciw1X> zDqBts+d^3c1F@nL+jSKuXFPdXf}L)S7FgjOQfYp(0HAkSaO1e9+HsC2`r5fRfvdh6 zvpVF`cwNoR`k8;sKj(09sYK{+rz(3-J2^7(J8G-BaRupdXlR$+vtv7U^?r+j?uM`k zkdRer4xd8>72d&wn38IEN^5f8XQ-3~IGvRVJe4##S%H(J8w^*h-fxRId8j@$h}4>| z^LqvXNk(4P+uvwU*wK3` z(p=@^Q>LVC?U8c&&!jk+i8q|k=6lU-%fbH?t0O86HS`&qIZewNBhStJTO09dfQDQ6pn#V!ai25ae&4@QnUCNcS#56rdBi%qAq>K`;u{C8d9m`B&+5=LJ1kwImS zzR}Npwymo!zrPYd^#ZY{P!VAp^;CS?*p^{ zz4Dw{%Do`eaMRLy)9`18b)9WLKN1o8bS#1}o}Qlm4X<}}crP|@ZAuggJmGVl7~P7> zCCbb1gt6x2u+XLBt4sn6LnK4KAd&uMVoKOGyri+$Y{Poo`esJO-zE{86qEG1$*Qu7 ztNzt6JP4_63SZR1YC=ZHf|m5NVUmsHWC-26sP7BQ1e*ci1JM{JMEcYtsVWw@@KFgo z(P)b_93>ElDV#4Z-HuPNl^8YGWX0M}8?JXp3|o@``O&3L05u92ADmTG!KQ1Vun=9! zq*iRDL!wccg?Erd#+CHPUb{w73g0dXyhk2j$aC`{r?~Vl(h;{qs-aiHJNR1|aGnGS zyy=e3pnS29HxqsgGyOXz7M~D%++L$wOlt^9fb~j{pMq-{ep{NOH&X+?eha;u zrbIGOAEb(knnPAsgkP7|tt1wsyd$);SK6Xz!KBL^s<6PiXO7g5z_{X^rz`xId#9`- zqt#y5tZC>%#T49+Kra+Vp(&k<^ntX^LDCxknbPI>H0Db{1%WjEH@J8vkR*F{vA`;i zG)_xm$eG#Lz3%Db35D~M{sagb3{x!uFoJk^_QRh8&S^_>e7O1}^K z_C!&Ya)V>2q{!bAWpM-{vp#;@h$~K&(I?zp69$q}@48iV-T1Kc)X|di5yEoBcV_zb zK?NkI*#hHYO-?0Fev?N=^HxfXmNV9(iNJNIn$%0-3y*5*(N2weC4#ki8|da!;^U^7 zE_Ui?MS(FMd{~a1aEX%@CAhqt1g&{1wzXgL2E$RUkon8}Uul>AsHK&-#!yF zw_OhuVa52Lesbxgzq|w}RT~A3T+FS_9eiRaA9d@pc6LsL9$sr7n`t)+3)n8iq%lKY zEQ4%{tcw1t6M66kg@_tH3H4uI@%Fp>5C2e||CZ$Ysde{JL%jE6@}l>f{=ARP$G2X| zyzHTs=ED6iL{OaSl)&%y-%r4wI0Y4-#9t^-A@aKv)wQasirY#kJ6`z6J04f1=YCK~ z3RC&gJG?`#$^UM#CpfAoZP>PT_vA+{RBoXZDQLE|^%tXjEU|0$ic=wIlh%|U+0M33 zkM#}$u?zq2uLSL=JK-NW6-oeboAb6ldsUT2$1K=840=goRL6ayR53B@?b-gJoBqRb zY-k9!_D9T^<ZWii7m-93b38X@sqzVU1OIL7A{o=u=Q6DAlzd9md7iSOO{Cp`z?cxI1g|3dBcwewL&hJ(BQO98J72m zdxR7F=GS+gn{1g-EdqlKCN6Cu%Mq!doHv6VAitccF&dL$;i8xJmfdekOm}H)fFI#WdTmv)Jt(prn z7YR#%0#&{ti%w{|N|-e-UAT73Faa(Pk!dL(DMlR?K1o?4NGpL^<5w^^V^+RAe*b57~*U%7XlIg0%Av_BGYr;em!}6 zWvZ+fxqAMmAw>O;f^UB@2k~jb7?^8iM}`8We{ujLq_&Bc8N*9U6Moa!YgPBaA^NDa zYz(iicjVbx^7BZEu6knC?@!f#hr!k?B;d;VYZF4tq=aJQB8z&vno^rmhEFApo%p*Bf4)}dKDapionH!l}Az)H175Xq_DZ^z&0T`U=-s1gR9^? zLI}^2H9wF~Ml|(UoZGNHurlMbKxRx+{rb*Qpv6Qq431+EA>hP&+v%&;%ZH|B$FMLO zhLz49X;;dTxELS{!p_+*?uEm^++O)j(cxXtDeP|ZW_Iu*nG<|ohKt+gyUeB^Gg-qQ z2{66l{*cKFFZ#yOW++Ze*Lb3We!z15Zusn!T30-0RBA+1g$mI8t6dhd?HrT*`d=EYl4OC78Q-T{*f)D!k+??`?QJKbNsKo^<>+es|6!NH1dN#(aoG(pZ4N25Z>3+faB@04+3q(XzL2-=nSb z&*Yj6oK}9mb~)#Un>>4V>Dzsp_z>8(bs_chdghk4K-ADZK;Y@hPqTjWkD#yS3h7G! z&ifgsG1r7^JcNDFNbO$9bMKl#g}^dR-3T_7;sxB-Sf)%)LWE3a6I8pd)RWmX__=is zp-3T|C5Y(hEeTI<_?+#(wWX;-x%DZc2O1mY>K$QG1O)>0LCgqa$Ldrd?2I&%g6?i^ zP-P!FZxOt%a9CMi_sa#S{wwTzyHvhjLZkLPOue_BoPwpUVG>0=DH8{K9>q9l))DjB zO}j@OztEK5@KxhUei>t1-~_?ZT74_hT)++^v{qb6QNWPxMFl7`C{Gk2PK9tr5W&U` zo_uX6iK1gm8$Vsi7}A^yL(M5cov)0GlJ@ZN=yax9#{Phdl1ivOfj@InbIK4=f-UF6 zK(k-Ke3mvj2qT0oL_w>7V<6$CG2e-7#$7Hj7XC-Ksm^^!WWnVkE|K-@r>^6v(k07+ z&pRht4LW-mJmOQhB$)EK%@aYZI0gw+itIi3@B*#R!?iozfGU(?DNV)?#Ye%)F@hM= z!VXl>)$oJk?B&=>S?F|0>b2U!yPv=sQB`ui8I2egIJ&>A2Q`^}@R52C^x-wJ!^6zW zG$R6LI#^GoJNHgLnDMDdGaVQ4WCquK24~(CJ$q>LF zG(bQ$CMZAj>kSXCT363jz?K*=mKE#zf-x|C;DtX(<~de;cfR43KIoC-m3iQLXM2xG z>{rS7L$n6kv*G+`j)k%O51X{NWalGv~ z^k0o}Tt2-I0@wbAJnR+Lb$XvO`aFZD|Gdkn(|hM+wd7gr91i!?3t@bFC-z!Rd6|Fy zoV5L@B#+QY^|gJcKwfM{KCtWXXQR`--2lY1;N|P#{a@LU-&d{AqsV9bTQ3oMU>E;u zT>XJJ)q;mL{fD)mYMnR9>er)I=lesX%ORXOziqbOrqnN2ekS-naun=8b_~o=KHtta zf8GP<57nae0FIm_`^>;WWTRlxK!A#A>E`}eeUOJKDe)pa|HB*1faos ziV*0!q-Nod7S|5CMCHJu?Dwj}&xJE+bNmZ0+Tq)`^JRS_r$Mq@mZ-l){W6oCW zW+wv75Om)vPe$ye%NF-;ea&|qOSuoLZW{(G?iOL6^A!SAWJDX4V7Lyt)+(unSK8xl zMa;}bes)UN@U~wjx+V%1U}}F}bhrP^peiaXH&B&e)ww9pZbeUQY=SVQTW!-LJYKa? zVnjq0$jGIP)vMPo>Qs5SdE&EWro@}tlgR%&cJkpMuw!K1A$Bf1+nXAxPN}$a5ngnC zVrhn44$Ke&&x1*0a3jiWCm^);$U!5(6fRa1@%wbuTmhWM6T81(19GvI;Qd!yx5|dO z>h1K-|1CzfC3-JDCK})m>fx%-&m7hlQ9E#olV2b>6u|0TxqV(mkQ}&E15kS$9!q=Z5NLs^;u) zBcGzKVc1uGDfq$Uh_8!;M^}WdTg2jqP3nTCfy}H6i$EGaNj{x$FxVHky$OJ_rQm2{ zEPyDnVFY8?D%=bqC{;4E2KWHR4Q3n}Mb70OL?%?!5>5T&^qMGGsm>i)*s&3S`VLAo zh{a83*9Q-bP$3zc8k73?NC3J`NFVHFaqDD}OX^*aX^N6^1&S=DNom(;$pTBwv#3hG zoQD?`d+LK}c03P2C8*=!#{%gm3QJyT02)Mgu7(y$-6S#A96V-zHaFw_g(A-2>6He} zbh)aRb)6Gi3KnbK7#4;YdDGw9o34>ju678TY_hZ7Zd%6n42$?}_)FZjh+l?j;`BnS zzr@3bYJeQY-}_L(;uJ0Y$VEn|L_M|%8Or}7i5aC~S@~*&s9zkl2gH%Y^Q=r_-m3Az z_JGX=EW^rHK^Qz$;1{c2&?F}IXKE)f4NV^u18>jY0^C6N=l)>vpvR7T@wcgz?=NiP zZ^@B9J1&X3xty>iNs$S&Qz#sPPiW9dhF9vsw-e7b^%vT}*HX$IX-LPRc4zB7$^?h# z1f>5#)Xe7?+5bsBC{z4(xbU*e+3Dz>|M~3vruX?f*8OUt$XEDCtcwd|%H06S`@JjH z4;VV^F9PD1&4r~w$LszNJ@0=Ho`{}gv_Y;1x9@#lN8auN*M5i}bcny{`(I-P#%>>> z20=2#M)%J*51Vl4xF6OaJ_qvygM~pKX1&+dFA86vq?L=;fShYrxb}_1*TN@b|0~w{ z%VD(ld#oI9hyyQO9IomR;AVi*+eXLz)nUKv^<_{lviLnV@*!=O?>ZL>+yI~1{Fjcs zJs;VcL_ijcsudd%#*?4goM$Cevi%y!znJuFxxB1sz-vq7T~zI}Tjq}H1?OB^a7G72 z!h4+%>cK}Q$kikdgSLXZtyBrLk+CIQ+s;O5Q7skXViLG2v{40zlDJ3A+n5(~QfAs? zV2ov9>wB=u>aT)}8J6+^_`Jg8M5jz;?{t$TbWVKia96gP^+)bRrDUqwwWS=R+YgC& z7Za?UY8P4O2821TDFl-_(=79jl;BBsIVLpc`D(4DkHO{o9~C zA$dII&>u*s4th#6v^AX>(Kw4yP%>R1<2~;taz=>hu$maTku3&qHPBO$2GNRSFF^Xk59TxStk#rMqI0!<7xRA>F?F@1~&DxN4eK zd+k?eKGFu0u3=~&#gIMoFm$dL&Ba?yO$`{)?RK{LulExaRQSX3%S#mhKT+2{-wfEk zzMAG}UHy30{RG*Ji89wKf2;5f(fVp^6hz}03HCeh%M1;LUuzFo7roXAQFfB25CV~T zPQ;`rg?3}_-O${Sc&sg=+!#%w<8w1KF=C`3I5ArINw!j(O7W+1l?C^3!{i355LB2| z4l~&BRof}VfU@`o;QIz&j>DrzMC^oy{#25`p9SDVAyyA1os7nQql(v^b zA&^O92-A^>sNqa%jbk1DDY&7*WLX7_XDN|e7iia_VIENu6vV&MBGwx>mwS)fjKYTY zcies%{0WJUOfiNoR1T*t#seKp>jRVx#Jx9dwlP27e95{SoF;no0fQzK4X0uk3^sp(P@ECX0>M=DSoG~DV7saaD9m_vw) zJFzjfYG4ACw5Bt09l@w&8sFub;`iAVFuzlQEZET7bP2~Cl#7|O%fRu%{OBiXt{t#g z<$;F0G*wEhXZCX%A(Lt{eMW8r#Ol;FE8+871YmfovWWh`|0Ok}YxG)m1?-5xhNch>WNA9Z2m$gufm*^uPMO#l8K8vbp`W(LdKD>!Zxd8NBU>0KjbAhC3Kar`YCAIVbRVUXM-&4NI_Z0WRPJ0iS@r7m58 zD!O|ykyaoKkA8CO^jAtbFESMn@2OQ2o^WA<;<8E77V=b3gk}>R^zJXRTLOAf?^r_=Pqo z4t9=A-nglLdb1r|wQohRF%gseXnU<@sYlWorMNS2e(H+*TKGq7E0_`f%XMer-SD+7 zs(VH1EVS*Q6ILVO0vv)Nn~#C_Mu#QwzS~%(OeEASeBqY_zW`c@5s%DD&F%3d@iaS5 z|8rS%Jz-bdws>z==T25z4xV)>=u&Bj>e6M!5w|(%vH->Bw%(ziJ=^!~vU2o~^89SL z9=Xjq2BBy}1i1bn@tl-Vnjq)$8vVQT^`wl8*SX5;936LW?1e zzRx}PV&>C`RfphlNPhK$1(S@D(*5{5J+zU+xA9+(d}ckHR19ttX)&_;r`z8d;ZO^p zfSA|;6ac36)%UyupU0}pq{W2&P)rCa>8&F-X3#Npjs=LQ*nG2pX420NG=`z_rG2MJ0Z*GCglME<>U|HRBYJl_ z(TN=5wk2NU(gmtEOI`a34-pbA^i^OTfgl$haSFBnhDERtzYb`{z`_bX7eO}NI$SOs zKU&tX=?2LrdJ0bU9U(@T-tMPHsVyAhDkXRZQM6;t-iD7`zNFXFI5DX zlkqg9?j8I)dJ-wznD3o?hRKpBsMLsM*n9XTf!Fv$s>~IgsDk)pMv(3nwwNx9)ub%| zuVpZkl#FS$D84_wzhm23)k2iT6oCJcbJ+qt=g0|U6I{G-BUvZ;Q&OmU`0+ba3nv1i zrc1bFRNcLKT2i;U^XYO0h7`qDV~#frmH=G8;r zQ3=DD;3<6ZhdFcunbB#eHN$VE)0$if0_%O<_?5UK)XZrW8hN*6qeSk$A$?R=v^4l& z&(rW{oAo>5ny__Ku#7GV6N|0`6&9OEQ8E}xs4dpDV@^=UUfSfO(IQAKj)^iXMui;0 zNt=EV(2|;oN*k0L>L{j4PEMibE`~2>MpxwoCDp(3wJEpdj+T5-T#eq~>1<)gKBLkR zoGL4l%F?VW@5uPl9`z*)FoqoAg7g{bvk7V$k`sv*UQ%2*(i67}75r1qYNpE2l+I8$ zG#sZ2*{q0F$xGSbP@lNhu+UV?#%JdFgfP6j4{QCR+0+lS{HZ*<}h z`ax4a3mz!u`!53eUstz(5o4I(I=+kbO8i}3xTihvDv&7lN=NDcY;h)R&sja3>df?) z)_=z;C^G0`J1EZzN2=DPzD8@n>w&4gVlW0G_5i!pBSbpJO5HyHJ2Eij{XNGk^$R?f ztB)}%@)yj^C&!5F(-3^PQcM9lAl zkf!L;(4vpWLJX0leB`J<-yK_Txk+0Yt%RHQVtI&K@ye zf6Ndh@4&dewHHh@2$IoK?BRQeVQad>(cdU7a)+vzm$_AJ(oc8@0)+}4y-`_8b3Y|Y zxW?$o5ytvZLENln`(`91y=VkmfiAkKgaMps1N?|0G7}yaPM=x*mteNbse?_RfoyN)NN_g1f@e^BU7wI{BwOk+wuHGH6Yay$1j%9z15KsX=q`drMH>y3*kh`FUZSVfV^Zk zFB->3H5srO)a!v}d#`r?N&BeBxtNg9-qF!fgZBOBQk-gG#5{iHa-Oeno``eWZ@DOY zjXzIwAN!K4Se({djOw9|8M~mMHz!d-?L1{w`Ck~FAvzW;G&n%f;JRba9R*~wWeiy~ z2_^p4PlFr;el8DZr{N`QXB zC#XN@YcNXWitstCIb@ERw?ZLRm~`YCpVO#5%>Har7<~=N3(+E@4B@^>Ya4p!!WBYK zq1;;saMCEMzOpl*w8n@bG|4N`Jrb&dQymMTmNosm^dZPu#sLl>xk1H0xqxeVPnAa2 z`BMlG?K);yPMl~PQ66S&*%X1Nd_%u@DDxgS;8PK$!AI23TJ~zFv8g7A`Gi{Q-w~Ez zXqQp8nfm!eSPRJB6Z$7#+At6z&PLeQR{$-dsm;ytvH2%q$A(Oq43n^pB1KW8$m^0O z5wiEPO}Htt0SY+^xcfENl35z)G_vXNASTmLe6Z?{d{J)A1?SfiTEQmWg_wnQxzgw* zjqnw|7;h;u#BY9^h{ZTL<5AXh?u176EsG0_X&4F>RX+(HQ3jboJ_z*qSfPL=@^@zG z@7;DvqUaZ76cHWj=3M0@r-MIv8v!qLfEkhdX*@k8@%sLK zP^@zh1X;xImOg=>v|GWTb^dji;LPvMO62vv^m)Y(vumj!@d$%%b2>fp?6)1|tjpT0_-W%m-M+|ooZ0GHw4G^ivS7)m z>dTz0Bnah2b`lIyH zf}OUopB@icFt(<;&{7rbg5g>-?DDDOV}Ap?5PBti^gmJJ$3eW?IBm}x z9REXpx8m!LEkSD_>%z%`4m#5fz2w6&#{u{7M7fkSszpl^cK}6ssQ#Um?JLjOm#ub= zcZgHngTY4lgDvw18=+^3+s|HDnL$MgpX&3j?$wJ2O3{g*W`Ws<{F{6;wLWFKpB@C( zoX*v=HIP%5ewQu9VVU7F16s{_13w9&Z*L1D%_xDbCCndT6?(z zNlwM?-xjF>1%e$zHbSTe&&|cw82Kx}*Ws z_a#&GHdLQ*q6-xT^-By2_%zsb@FOp-RNILZNUrVOmcSTt3yNVrO}12%=}T`LCkrDT zh75cQ2xW>91%Yo+8(? zKKs>UB~n0OnIk?+DM@X*QuT0#rO?)}NFc0+rsQ)A$Y7f(9Dt@AlwcAfc{+L0q@ufn zZ9W9Y4|{)sUzTF6j75^yK;;B%p>!i1OGZev&`QS_+7uab^5bfHH%!LdNEwED6d@^w z$>J^*fuv%WE_8N>sY*}iT#c$-iB4i31>qnpGZCSLk$XOunnUHN^A>Qb1cQrzvV=V6 zy({fsFw*6w&?c2fpja>aY9~`TEgY?Cns}qC7d--!P}$*iq@NLFDA6)umJ~(%p9r*~ zCe7j7_SNu|M+lW@hf47E=v&`&Ba1B<7Dd+=V4vOWrQxEJpu_5g$->S`eKj|%5wOFF zpsSM=z+oy014PlV(3w~3io@Wgz{=XAg4jWZ4now%JIqX|4MRl>u%;43!$>YTGR4hQ zl~N998ROd>b1mxLxXlq`eTf&N|6cQXkbK`C8^>Ka!sOdC#^bpUHfMKTh7F%Qw#nOb zS=vJAC3P-b*vZ)q8i_nMQ!IcNxagsaPDpBMA)mAf21cnfA2+=7vhWe<2FaHMbtyP6 zbpG1Eh=ysF550gy%5#AG#i&W4JDy>33)oy2|M(N8DYR4h+w(J4|I|A;z?`eKN`>#&O~25d~rsr|bTJ6r9zJ zHn2vNjPqqmgnO-3mqmMX*y=}y{#ZLm#e;Bsi*elWQZ8x}49 z5J=i@SLx7m@YGm$q+wbr=`v%sH!LBxPq1;IL<_bymd*&8d+T%g?whJ%ec8#QLCTgXkj-Pz4;)PDQkRgR9x!6UWk@Lgy7BGb=Sd)z$-e$8Le-pm;KBckfG?_+)YFrGWNWaLFi z5TGL9FO;mVt^D}g$I8lT-*2ZS@`>W|%jeEFE?S9b5Sobl;Tk!#ZSwG=3g^$JFT*O2 zLelIhhty;hj-R^4_V~Y*7#9VigJ41+U3F<73=`9#TTjfV7w;>$)*fh9gZN(1+uwnU z*!coGKN=d!Jr18AeV1Rn>ofJtD35lJ^%`gQc>%p{r4zE5!a-v(M7=sH{r0hFSMR1f z^eZbz^=ngMkZ}SHNd_w1NaxO%^|=d>2~Z@On3`TPM4|Ehx5N@T zx-+8zU-SYW7YnampUzv?|6Y}j5q<2qymJ$w2ujjK(&PGldwXjOT!F4mDw@uLfy0OE zL$bt)$|-01ny-Nab_3hcvsTU_(8Mw+(gRh;X((8;yL+z>vgKDB=NfU{ytVtAq8IxM zIIl>hQ+0+Za+#P{BY!1)=KusxK8U>mM-IlU#4NojG9N{EMYog?Fz|@OlSX2Qzz$<6 zj!!}o0^{eP@?Mb|U>gK$5jH1;dghr=&Lm}b&CYP`Ta3Dxghhpfo9l7a`sg2pXjOgF zSl6~>gX2c_0>D`y@H7l%(aBot9`(RAgc0}?Ur4-T8d+FnLImOeMa*}{Mk@g{H^%kx zG-%QN-Q9t=&?pPd)5uNcikdV#uOB`#&p4MrPPi*Vmo#-SbAan1Nec)@vQiq_AOV#a zEU0zWRcSVmWur@JAZjisWw>&EFu$t$)+%8u|3f1UH^isX2+H@el9SHjY>>0#mo6>w z%shkz;(92yMn!YObysq#%hQ+q>Q|<2!?nVJtDzytLMKx0Gmm7Ftd_*GJCpMR)S%Jh zC&Zvj()%6#BxxhD_pHF}6f&Z+C$pYcOgG;V;*OD-LNhK0gXo}Ji4ZBDm_?*eY<2B8 zd?^-o;Uel1-@NG}kn|WQmmE4!PMBm5Oomn}j9e%P+loo?6(a=b z4UudKc`TFZhPy~IO)ia9im|tiO~27YTWwdQ^ScRiMHp*onV>nlH9iK`XUaS^p324K zeTNbCH|94I{)o}-5IvIG%72=5?PPhThY2||+MSZ(46zdz#JE0=+J_Xbx*G?%Ht8pQvlV<8N<{ z$$sA7?jNbKY6|-ji^C1NGzv}~B zdC6t(23VZLTcy@mHlAOufT{gIQU=(VyKoevdp1)y^zR0L+9dlP_!?jL0_SWD=IE2u zm6V4w%}u@QU1bg~+b_LD%7Z9y9MFAy6o%$5v-Zea$>#Kb{L>|@$oxZ$u2`4K_w|ua zlsxnyre()_<8h`4ze!)me*zk2vt)f5zB)Mk380XYW4^C=+WMymJ|lwS&FqV34h)?= zesMA`?x`c7q0G*S-UOI}d7ih2*7^H~NXo1CX9d^q6Xg?LS7^7DXQLZEav$YLgSa@X z3wch_!kyb1=QnSJ8y3+fAK&yF79VTQ1ix;lM{hgqs&Cx$^VttSkoe`h?m&kx_W1`M zFoN#vg6{66{`gtPfqM$yzdC*VxBT&}ACB4rH=+UKzz1;k@-=d9JSAl&($~e~8!6%A zNN8?%ZZ2ts!&<{4^h*{We%tYMc6NsRu>RsiL!+`d3e6TfIf>c)1k#Zv=Ya z?k>Y_zTlftY1&n&57DKQ`JR83H+lf3mISF5!$ovd1a#6ohM-u%iU_Zh$FtHSbhSw^ z9!@B4E!07;28P_yt3-IWd9q&ERLGQV`#3s8XMuc@qU@UtlVcQ+I$_^D#)r))O19V3 zwtMb|o98r76J02*%wbT~wRVLmjKJFZyjrUCo*nV0jud(^b@2`om(|`)^}+{P2qIUw z_KZEnJm|jZZhb%33Bo_a=)YQ<8CC; zN7#UfYP15fE_4Fbt>;6x8V#oQ#eWbhNOU|c-6BW@It48?uo}kL+0j zBrjGjkLC2B0Y=S4dFl9}UMq}9M5EBC!jHfw7W|B0kq%!7S#!qTMUwob0KM?FQI=aBF!VS4Py^c#BRCcBu7XAbDKCY{80fY(<+r~loaU| zu|dAgf|{C}FGKe=$DeQ|167{%I`3`0nr~u#o-;bGa$3wTx}RH_^B6{5m{?Ric`^Q6 zxafS3vZu%ryY}1P#lA>_$Rxt*z|jsD``M^;L-3Qb6$!tiQr*Nj#&`|!bJ(Hy*gX_^tgr$9KiBrDa?Bg8kD+& z;8x8dQ8{XZQp{hUYqQ>Cn|@v=nyP3840CfhSi?vnSc_W0QcT|-f#oIGN<3&z8H;jk z!A31)UIoafpU3v}Q*3-(@sOCt z#)z=P=hOsavL}S&6r4uQdSPJ^D>T|v+LTaSC5GM+?WU5 zWr_$?7%9WqL|1AazROv&-8V5pS z<^GSRuMBFdYuhazoZ#;6R@~j4qQ%`Eg1c*hBE_BJ1&X^Dm*T}CxD@x3KJRzdOnzj~ z&Lo+==3bZF^3Ih%k$d;{{E~FhBl}D`-q-+HS5^(Nho2M}AiZgvoSiwiNJ&Zgh|#2A zM|M*{dA2p5_v)|A&AQOgqF}o2%f~jEnm^q=3-j{wF6e^{2GfWjYZKPnI?oys{~(BP z`RQ$s#q8sB%c4z9E)_Ba#>s4=OYKE?t=!&z)X~vV)$!Zq$Vs~4Zxo^H+4*R3WJ+QS zi$yC4@0S+ySt(llDAz4}o7-GAEU#;83Gq(Z26`tEoh}l19?#y6qJ;uPK$XBQ2|34T zqxC4NR8x@s^?{_-Y5_V+JTk<3`b<*QBR9()&FV*VMtby@u7RX%Nwyg>Wf;tmaI4Rj z^>TH|y1q|3=B)E`<96awF@i%)>`JqmP`nET9>r-YEF`%&Ae+%#DsP+%=;p0~AJ+b! z%)~V^#hN=(DOl2p;knUmSh54)Q>eGTFu_ni&-3rswU9R3U5J_h9m)mxB6u?t7KV0F zXoT`9C}9m89)8z-7yD7_XoP*(efqRi^3MdU@zQ&Hq!=L>(S9;o^dYfomS^R7v0^ks zPYh?ZnRq4QTGkcwLs>Wr5tyOA+3<{N%`*WKtMj zVdnEJag{iV_Nsayg7V z98zI%SlC}2Lav*>z614x5FVH6g9<&7i<9i%sA3@vm4F-f##vc2i@B7#C+6{dZ<{gs(g`dsjN? z%;@}V$@+f*>+_jl(rS|V`gm>5Y;k*n*WS*VPE1HPDK`!PR1HIM-$qF(gj0bG5*0N* z{$dFd#)~=GYu_Gn0LpX0pi+!C6T&~4k=CjT!@?-(bq9eu5<*JL8R<<8 zdCKH(gXu$Po%5zr;H4u)ueJq#Jw<4G|H(NcwDtT5ZOGbxK#m3Ym^0@m(xHT9_<0|K z^Qg+LBslOn76dwU*ktD*1LQ^FJ7tkC-1i!u}5(*?G{9rK75{JFjqSu5N2?5X+_@y z^~$I407!I+ZgNd=kix#hq^P81QF5KbT&NKDs!A+v(f3P6OcVO^zzszZVfCjS{8CBN zfOf`l4x7b3ip4xs>ef67!Ja1Fr=xNrt|e&@y9hm2vn~t|MLz!#e?4jJQ*dq~L9d0H zEub7-fMWD3<)2qg&Yq))?MQO0S4Z${FL_(UA*{zby3RCd1)XJEN791!urb&!Y< zwX{%#!U$g^nmwma&Gb{B6bB1snwIh0VFJgqLS(_!sP^siV@%WhGbkcAGYv{Ne1E@_sS7K_d(e-=c_D zV~n#6D!DZ9*3iak5%P=n~v7zn}vgx4YW_GzCRSYDH z6kF*6sJ`}a;|Sq+liL!y1Y;~j96*JF!MRLSHwSrC`SD*>Kb>8IrjM#`D&ADh|}IkN~|-oE(+i%@jyhky7Z<>|XYscv@Gp&e5c4>x zwegk%jgPMl&AgHB3-w7$L<_j`k1#59zCZMIe!1(G|LuRFF3Sd%_nhEKJsjky?eGEQ zuMd0CdOZ>U*@Eh(LyVG&Ak(5GB=wky2%fNSDmTF5g6tpiAtEx4_7myJQF#<<+#EP# z>3n7t@`JB?2o_p@yd5BUA7=~f6a6C!23_b%bq7CuY01}VW^|h$Pn(`Tg%mDy+C^|> zWxjWJ=4WIWUS<#KoShrg&1)L1Zv8+Hl@;MlS|YDov8kD_Q{4B~_hE~FZ}Z;hv$^`= zy5%8oh74Gcq?_%`8P?mv9Y}fJK?J-hYS0s=VqGFgB{q`0u{V%n`4z5m`JHRokE?NseE=R;J8%s)D!?2tcJDgI2d1OL4I&V1au-y=O z59E`3_l{?P%giV%4!_D6PM}o*0 z>z=i+lSOTeBbF@hUQ>F;_-g>n&>rfApGvj&xxH1&r`84k2poKMOm~Sy^+ixGzcIo0 zaGw}jFa$u^lnxWrHFST@Yn$H<2A>c-ZPb!1@8YCUwxmy;nz>oy08x9}kIkIb2 zOj`TC5{suQ-Rp}Hs(qQ;|5|E_QZm|wJ>Gz{Pgx|UM}@o)cL+1?&!E7NR2Jes2qn)I z5>1XI@iRYy8%t`BSIK*1(lv=2C`I~wN#m=2VC%zuxO(7~i3G~$Z>8XM^-*GPwzQLnK#nWG2CJTGqsSIOW zm)kr218%N>ne{fH>4#$Mmr}vYq*UC#bit+b^>)qagz1Y!&wLVgd8(0lVcf6uFe#`N z|DIAp0^6i7nPBeUtOFN0$s34~&_M$A@~52anoRU9I_TjBEAaWUcNX3iwE3vdTQHuG zxA7s_JH)CD);VTyn^13@j#sx#H;+0 zLL}PU7i9ibSf6<*t%GZti!A7cCH^Aa8BW00P`LeFr`Sw`yECWs)*0Lqoc1NS8;-NdHQ#M{H5xg&Q{le?v$RgQK0y zlVb?Q$^5qUafY&hJR90P=>yEiLNp9T*9KUR_-F!6sA=cOVy(+x72@3TA+BeG8~COk zi0w#S*vw_vW~%9K+UiIvT!AKAd9D^xvvKSb5NWmYCn z%OqBa+=Qb+TLYO%ag2r}TI@*4{VXhzDj}@UZ-1sP14iz-%)hI_47tPuOW0MA)KT<{ zGv{Gzx#8~Zel4VXOSU2vwwh6RAjXD%!?M~UDWW=N4>5)+!U&Z&X*8XtNK7Pj!Sn}N z8epIS$}tjCc6FtK=4%TyEweRjoSi1lTZtG>7_pHU6@cuB2QE*H3XRz5<`T@`U9Ygu z5f})0;U!hVNnJq&hr+n`nNv9~Xr#opr1#P^#u#?4)rcF|bz91p@QdNIu$5RE zu;Q5*PKeCdsZ#BQ5&xVLK+P@1mL4!DVetffXBVUy!DQLg%rrp!Bt_jS?c=Dq^l zHjfjl%t$lgcycsyjS}>cn(Y(q6BU6?)m9&Fn64Hd=nODQMI?29{ zxf7Tx2XXo^@8V@Zh^uSEnK_(W_9*Q{I~u6-a!9GZ1ibfSr%lLSBxzK`Y4mraO2#ua zm%&npZqPnwQcDDHqo>W+gJ4kPv1HTzB7DG3NMOPE;TOD*dX&Q;l-!{{*{)x(Gj!wVOPMfW}AsV8MI!o|E`e4$l7@ z2hP9hpd*{_IlexQ^ZxEn_+|y7-jpF_GUlnvxc2dHfBylh?#x*N?_YgyHtxE)1$E}( zu|y}eiF{s#6T#f3j|c;xQ@$+3Gx4>^ujg*Qzuj9Gu9EGTzu|`ky-hklya(Rezump% z8DG@;(Bl|v9OO(ij82{X&syz9u~+S?`$s+@K4?;s=vyWSeAGaQs_1Ll{bta+EQQ6I zobLrO8Uh5r89f#rK1)#hWp{UXTeYmID}dU6{nIj&5~F)3{yDBbLS|N~I-p%OZGC%) z=m!aN5Xx(oUDh(pT)%h$u`T|^zS>%w8sX1pHByFI=e03@vT;S4h38B|Of-y5!w!O0 zRUTN8a6{0Pf?bTTsr5HvwYu6W$^_Y460K}Bxd-T${Qg<11E`_(3ZK>!Z6{qU4MMt*c zJ;yPK0UYJ+PvR;5+go0DYfjD+ZZ|SrRV;qCb?RASK*wh55MCzB?gx;^SHkh-8-<6SP$O zSh10HGgvzjCsuEI>at?0RB{Wop{2JxJnY>kG`$XyxyR`8FITMduP!ge7$P8+8RV$` zYj3>LNx6*^7hHG0eFkTgdsUTTbh&!3toqe>%18(qP{Pj@9!uH#J8(RLqSS12#P6`#Co% z0L?5X{SP$5u?{uTN>L5p&pp9lg)RCj)81>5cCEMds}oQ@NS8GMMvM>xI0(0-EA<}Y zYjS|1{cF_AhapTOP2VGiU5(zlxYx1?O6+f5EhSi!{EHv%=q^~53e_UYx(Y1P~|>E+UaF>Hq0*N>Re<4eRDY=YgHVSp%&2in#;0}s*p{!=fId@; zigDk!;O2kMMZ?u?R#^5=W>Yf$|Yy zv2SK|_Xd)D;>Ge0ATw91UqKv=SC)sxjvqkfXTp~psK)Ywv&4Zw}nhNiFFF1 zjz*sU^Kl(cBfXtTt4<3^n@M~iV8!_M{~k77U9gyhKpTJPlHCm6Dx?jQRxgsFm}Wr@ zTO*z{cPrd=5H6Od=v1?w280^UKaiU$_&K3SW`*Aqd|FPt9OjKrg^#8lE`n@M&jCxc zLRC{G?&-m4=k7r__*?0e=|^4KXG=NDc*M9!5LJP5>{U`!eU{{CKmD* zaKaQj!YmKk^8^5+6qIDhy+AO6V!SQO&+Au;(N1OqAd(~x(Bn`V|1 z-FmM|-!{JgL7yq`<41K`V75e?nr4Kc-3<0RY>R{sO=pOfMkE-8AR4wvHXK3fTn#ie zGc}u5tU_!<+_?>ulb2Megvl;*Ef6q5W&2td@B?b=z;* zt(#nEem3HRDK=k1hdApoJ<=$|Wg(4J#G2;$+;HyGFqoY(&1K7ht_eelduMHUjt>4L z>$&qsgXQ^wznp`f4FYegjIXMUUt@zFGlI4ud)q9VR^(v|=D$>_#{AcK(bsv=eMpau zEr=Sk?Z^@Mw)KFyIogeFe1^5^J&{qhpl=AIm%h$pt>5Go$|>sS#_`fD#SR%_-x5*t zj1{0X88{3W7VbrsqA2>nAI{4;Csrm*2%qMDae7{$!q}BTK{;}iZJ_;CneG<(dt!-cHk>+$Ny+2UKhHvG0>P%K3yYax5q!d-C#TreVd{H4( z`lt+D#ITMBy3$Co9F>EyC1gkMCq;$fzjH4M;YsI4YZ?~vUPt+Qar>)1NhrUR9*Fj( z3?mdTNOAA#0hSy*$1-xtv3+Y;CL3(Q3YmA%#3Hc=AOdW`(!6ExAu;=szU2VP^*mv2&$-jF7G^!**nY zWnynPId-!M+PS;C(P3MSrXmyx5w>R8>(~C1Um@NmCe%wY`diX~uqYXc0jc5bSi-@$ zD2}kxLnxeU*2~{V>bmyL&q8iJ^?RP3X(K3-m=W<@2bA-sq*Ej#B*eVABkHuhV zf3rTnfB~9j({A`(2t&M!T(bQxW-8S6Uh7bv!bFMUl7hCfiWdvG>KT&6g9^*0)93<# z5|TPBQ*3i+GZI!>t{R>3;C;kgE~Y{*vNq>juxFoaxBJn6nrBXE3zLJ^#67;?fp5VD z(<{=`o)o^D+Kmx|QL=d<^DrFCioG2W!F$jOAAPwMA@sW+J-M_xOElj4JU6=^dzGfA z%SX@STKDFYbrscUwTO#HdH}vN=6Ej2cW4q408uy?)DM zWDg90%4L;gdDBoETSC4MQNdVhIAz}lyNh_!kGh71kc(l_q4GV%IwU9me=a~PuS;ld zZ=wmE_-d#ZE3Xu?ltPh`z~KjBa$j`cs6*>`Q{o!8%v_kut#k|x z6Wzk|kTHTujx*h(R<9y@qfm8PdWkgdI4wjk&WauEqfGn}mqaL8*f@pweldQ^oKfEF zPzcD``izIfE=b^NigG~U;B~wWB}_aMg)k`SMK#eOlQc}9&|ts7sjgVe3`2i_5Q#8g zHE}VkS*})L-Y+v^HKfwVC*Z|>YQrAyinIfvb_+i;dHUzmEela^l=-- zj<@4`2VFQy__PJnuuhrWY7QMKoCv!$b3#dp`T3+#Jfl_+kuAJ##AlvE{~&s`Sg$KD z53n@AC{WbG_=^Fnk*LD6OlZ}@70BnZ_*^DQa7z6og#gZ(#*&6G!4A{?M50cR6S(IE za>SqXyxG4UIKM?2zwF>aWEpH&woT>5pKE0AS7g_CL05P$zl~pU{)Nsl$O)eGV{pKh1r}e{ESQxLopq&R}1tJ z8m`Nf{b~vRYt_H&um=xK4#u&pyliikj`;G|SG%;r#MI+tf`==K0C`}*H6BP)SLJCu z-e!=r)K;wsbc>EVB3|?arR$mGf*T$(kYUF>sqYyA8$`zhWP0NwL&lX%gpVk%Ea*=B zRg4x6nn&$8ed2t$^rNnGfN&zu5Y^u;kP8I@i(W=CG|~baV}E}f5biz$u(!FNt!SP% zKi}Ko>oWAFTi-+(AxVbg_F*42{2wl)<-1hJe>V{Qo8+k1%K>#73y})&HLMY{DAWh9vV;REH9n3j#37{>!+n^iKy=f;fL)^K}5L$bWwQn+y@WRJcA9L@8X~*y#4`x&5wSe`eSNNuT-j`=W{*y2H-qV-tYJ z^Pnl16FS4>_+;y*wu0XgC7WO}%qrHNJiEwo5P({Q|H(QuactD+Ra!j_6DJ0VqBYp2 z-g!o<0s3?NIae{+VL1dwL?;?-DFd*Dk#6bJ@<`G?dVdcoXyp^%g3nazs&5PhC_$t~ zxXSQPGZKi_crtw4ct{e(!M%pzQTlw!{maPPp-F2+`Nnu>5$n+6s-ujFi9VPQsUTJjfM*dMH6x#i;yxYKUaRvw2D55m63lGz$tF4 zZ!B08Q|PPWlmO;m?mtzpBI3*>R;Y8|s|NE$!|jA*78h!@#P^ay0ZW>U5pkiqVLxLt zg^ZBvXX~va^pksV)EF{ZqR)yQzfW&Gzsh|vybsKGr=)EB-eDcNqk*ZzWPr(XBoR!p z98QBAc_V?yZqkeXm(2EX8#?fdgQ2l5hG`h4nBLdXF3Jo{S9#WZgp7edS#Rpv1VHe^ zwuQFN{msPb7e!Yq9?^u;iTLRF_~?o6qp?Dpd=hyhSVO2s!n6&yi3F{dgEA%gMW5J> zsRYRRsb~r($5;w$l8^E8OJOzAjKUd^YyGB4NLonmGXN|MEaHkXs_7e=O_LkXVNA7& z?GdubS8cQgG1zyN$!0xKS-lp{Cq7F7aAlY4+--@@OLqynj)~q)rbC0pkt5uo+zA9YBUW*`a8_4z zZS%=7|4DEnahcflP6Po8U7gZ~z^~RUQ=MJL7cFhI)04huYn~$0Z=3#X#CJIBJ9-idoCI8zb`WnaWcVul7q7$BMd_(C&VrF|JLQa+tKiS z*VUxRRetr~6r$hxq8Ofy@BAAb&lMdKqIzUrYbZi@=M%dG5kU&<_0uP{#9-r>7K!tn z+T#47RL?I-86)Z>7mVZA*AF)*+=%`q66sQS4`wIpTvVS8FaOQ_WD7V_T<`F{(Dd8h z)ZhK%y}562(K*1&qtnTiXjsavl3M*H5LX!?;Bu4i+3gh$@M4 zr|Qp8)&}d9o8)KUi+SNMObt&Z@MKGdwi1JFM>IziiVX&A!`uAhuiLIx3rNHgg}M(z}wc_@$l z!^6Ej3)rj3$;Q5KL_4{s2~y0A+nyAbH0TnfY1{xo&2|PWi8I9VT|4!~yb!1>*w0sa z1xF4ZY;tfPU2sbH@~~qg6{!6t*e|x=k0lx~SgR-Kn+tWYSfM{seG7F$1{u&RyMB!2 ziH-1%IL)0)=rdt2)RjOL;P6`Hm|#L{#or<)jl8prOGAVYD~d)0DJ++$QT(R2aFbh< zCsDxrq)bmk!EgH2h9L|slb)6&6)_{xMq0DDymhk^8PeNA=1fgcUehNYgQ^NjLR<|y zgLngB(1yWk%z5sR@V=`AscqoU^t2iCMvJ~U*&UzStJjxq51ph1L%r3qvkpTLhH1-w z1~?5T6h^R$T*;5EHn~U5j^ud9td3(tQirNPpkH3488;U@<5rF{!nll`PjiAO)M-H7 zU(K~$fxL=iThKh*mi6D;pqT5myqLgcL?)ZQv0QfkMRXc*H+j zI`eUsh6*~OUjb7Z8)IF#6Q5hsK-!aX<>fOao3&_1{bgf48Qnv9&=cK%;Iikrqh}XF zu>r+P)@t_q+XGRChK85hkcxt;o~Nobjfct2m&rAGyg;iwe>z;DGeTjOoD+Kf4G_|F z%v;+I=`_Gks+R<)hVz9^NV*fWePdCwh_#+m(yzw3tEQWfNEV>;KJwdqudL}3ArFuIwjKY$*$;aJ* zyNkT?>-!)Q2me7a*3XB^h94I8`Lu?p#7}>?s)Zz zHpbIZXbCNE* zpswjlnc?T4zyOpVn|x`mJ%XZ6T^gO4RCwnUXv=3?v&9%F4m`|eA^*b3!;VN+1dKsU zUlaJIl8t68cXojOUHrnLr?ku;O76V+!$oclW3k}=A%!g@ibCRO>WnLC+ZBxcOTgw8 zdVs(nN#IPN!-4Uj=#rDI$jHb(7IoLy{FM83)N}dBnR`Ge*1E_|)_VE_0oYe&q45wp zN!q76i}BJo%!sMOdxPJHkWW~+D`1(f|Br3W)#bKi_w60}!!TrjyxwLf7@;F_*4_U; zTm*sthF0B~O~Q8m7nQib@veW-oZZ~&)T_gK>hDt#6=nRcPiDxwHfIGMQ0tZ)7xMj|t(M@Va$KwT^an6R znB)b=uvX2jo=mIaGh%+Hn-QVT2IGefJM8IEk)@z{gsWtzjFS+>Tji>Zz@m(N=HT3k z5a%pE#{Rf?jP1#cXkbh2?vdRuX1%&&+y5nmku_##cxy^bX=;CRAQMD)gV-s64^X(9 z5QF6C@D$lX;92KZ5w$9|=Hllq8Yof%|=a7?GlP2TL+hCEy0c|Uv! zOT239L#jDJtpYn{B)57UX|Dqd<>u~FL^nD~=psT8{SQ7up_2_I`Jj5=`CeVi%im3QCK(@2WK!Vkv|LMVibqF6qr9Z6GisvrZhwW-x&wtZ584_ zb}FExQaP2JmpGLi?CG(qMaHASU8iItvm(03xYrK=sXe))f(O2AeZI=wP93PF>X7+` z$dV=GQSG57O_C-hyDSf&5y+*Lxt5uZ1jCnZ7l-i278DoIh1Ob4qd}+g|BiQ(;Tox) zjPxgH@La#PxCCb47$MaXxJq`UwkKy7bx}#1Xsrs&My{wi+eJqbeYlrVq`ZeT&U?-_Kdpc~TyIkty9t?j13Q1e(UxQzzdOT@HLn$Os;d z4Q8X##Ax&>k6t?S2j;~tC}wUS8R|jhdd0Vq+vFs8YKXfp0ieV+3~-s6iE&QH*MKLg zyWaw}8>Y`OLIK^*G0$FPUZY&cexvRiZC5}cl`V?SbuzM@?9soi6W>P&Ite>7i$WIr zJ9=fwhJ4$3zUt{hR`AYLB|)>dh(b(Yag^U9Py+v|!2A3*aCdu4nySqmTIeu0J<2-# z|4>&XGFSIv%S%td2tuIuySEPhX;9-v;w@cRVk^_}?ycF9Ea^0TJ$NkH+FghLgVQg# z{FIZN)1ya!;>2T*=jfA$(4@)FmY~0c!lFw=kHlv}?{dpz81K8$?}nI3seezJuZcAG zJ2qcgKNvsFWuS#6i=uXUv$C>2j5zy;Io}N5FEP~~K&7pnwTUY7?jmI187_bp-2NAn zHTQ0A(){kvuMTI6!+f@0UUDFBSlB?u;zn9*FqHjv%{!1Ae%pBp{tpT()}&8b4S02Q zk=)KDyB|PiA5RCIyVs?8Lw(S;glyXgHvjE(Ov#t(ie6uvn}*23Bu06hQmW7Dn!jWS zSktZy8=-Q3;6cRk(`Hw8CjsWNq_+@QW7kp4>!L4ms2V zLu1?vjG5cNT?koGM?OGlNAAU6|4_*Nl-nM0g!DvnLsB6QER`!d3{FJi+_4zA~>;seP^#mup>(rF!3z-R1Bqy?`AOFiq2v1 zhPEuvoKPP(9*R4}wImUOX+K6vimcSnISI!>F+0|!>4kcRlTWxIV{yEP_^ZSNVVxdS zw4AuJhT)-WRSi?+XqL%YiTK(4(-LdCN~s^JdW@&9bL&T47-h6jbbu<1l0sInT{N&L z)~dv!M99zMkQ_o}=bwVdP7`_uoE!hs7UrX6DZ}B_f@YC6_&9_L zbf)wP#L&^)9Eu3+{c>b2t*U=qfhBT2arhBAI5==g1OZ({HACUP_!X9eOBHxCu=*|} zms*!;YB}o^yrm1_UL){@by%Lc_336v?O7O%+bRXK^HMAc0s@}~rK(}F?V?H$ z)L5lhp+HTB(};v+)m$$q;tM74;Y?1HleUaqu?UvtuD{f?==k%u#yKFaF6i~d|H5yx z1L%FRAsl$`4Qb>1m@2w`yJT$LxN@0$;U858acepu?LnffwYYXqjNb+R$&#Sm1m1_g z89n|~`4Os!-gmfNBhHm*&R_FvKX!o+g%=!xUt!5EhE@|SrvB*lY5ww^Ah}S@j~sPa zt|4cn=>3LTAmzG3|$UsmSi4yW%X%^D-Zg!V(+f}T% z>t3^*sS)buVB_ShfSrq@M-T5}(rUTxU@sOe;%#^g1?h%|)CY9&!MEIt?XDleTdt2W zlx-vY_uGA)+Zb&|Ehh=~TrH*x?|eIpIJdhWiKSrpPFv7BfRVw`g`7lBRciJ=m^L*W z$`tsSWeWDWi@r@A{9WGuq*YopLN1eW1}|k|JN~n@?rjIRg{&jLGrg&tg4hj_0p$AT ztb*Y-lGv`|F20OSLMD|vH;vNV*PPfRY z*33JeSM1VRiwe1)2u<^ysg7Kqc(7h}6$#DtXXu092BIL7;x3;~>NZuL8Y-Iq_92j6 z9vC=ZDd*frPBi6{z~p^GA+}!2Js4N4`X4byhoHchIks0!NS(r?eHqK4 zj0L#@$-$mvdo&`xYve{B=$)pVF@&4}KHAGf7CHI(Jx!q@X>)xYy;TQ&^ret7a3Vf< z^reu&)#ih<=^+ID4ij+HSPLXIDp~P`0_Gp9k>(so)fYf()mI)-*wU=qslIML?0S!%pb*p0%LibJP!E@fldUZC z!RmxAi>8;y)XOP}U6Q(pZH5&_8DB^b*Wgakh}=7LsQ&X&lZML96q_Y7nj*|}sB(xt zEENGk-J-N!pUlkecV)~98G(1G)^J3=DBUYD9(DgTBj=xSv!;QS! zz?vhf^p5qMz^PXEM{-7VlkOiANmh~v>oi+D!sXiOxF78)y-Zfa z3Z>Lekx!8~L9p9yrl6rJP4p6H2@AVOc=nU1XMbsl^`)6Vq>1P)q81uresV+zfbrUQ zftwrn>GE@JBqH-_Ns@3LwjCfdA`QOt?5$s^pQK5FiB^)S6t>-LSef*Y%0API6;y%iiCG!{JFA9XuFa3k%$-I5RG6NZ@t!?M*NhI} z+gi`-+RIV?8-wxVFP(>eyl$6E?#q3)pe)hnM$uiiw;i^Jr=^~$=ZhZn!RwQM65$dA zJVJKs_2BOF6=LED$p7tLs(o$Kb+$(cctb2A^Cdq-@E3O@x;D^|I* zi;$i_B{j01O`M@$EnGu_JGm*fkpsJR)$`ZSl!X=B@{ZZhW)$%cxrdD*RMRj5b#Mpy z(7cLdEI99;Mn&$Nm?<5YbNaFz(dB7;$2JiYCz89y;4ab4ZQNz!chLTlA8)3;d)f_O z7$B+o4)Fl{+&lbpITjRF%$h2-RKh`~r~cWtX+eZoFLR#hWc&tUS5&Zxs1v~wJM1U} ze-R|Z@w$0=bN88NHR>_Y+nDBnWCcNb_Z&S015hDVF~I=orxwG3=N~A$)}>j)+5D>( zH9nwdUcA@$f#ML_wdcW5Ch6b`fOiWb%A+OsDME{SnmlRC>D|r@qv(OUHeYu2-}4;h ze^ac!H^z&qOW^0c15bYo^Kl@X+6^bA%?1}VfYN<9q+rgkkbN3MMv}H)h-+&p9sZ-T zDhs(lzLQ+E)?>vpSCdN&l$^UMNO|gODQ7Pmo4OtzE@T;@ z)=?IaAxo6ydG!`*PtZz?h*fqVh8Qo+crP9K|wQ4&p4q6mQZ z3Uz?{sV<^*$?UUOuZMskX%nq26e3^#EOn1~A1k*z!Da2FHQiKQa-jRs7tdGK2jK$F zIm-x(lZLpoY6e5#YGZW}C?8$YijRp3cE6e+k;NFJp1euBBED5*L|M7&o5?%;eOXDR zi4Z3(Op)ixMu9{@F}V|n+xKPLZeiz?(;=0sat>#B0bhacTMG5Wk*`KtsE2e}Xj7=J z%x4S^H%@LM5wBwt2ibCKIy$P0i?&DWn5tlUN``RBA~{?O3=cJGBC}9kfB4@LX%go? zuAP8SMRl&8QUWoA2ih90uD%+yEFOxwL@r2HaQedr@z?ELF9E;%`|0{7_(Vm)lf0XI zkUjY!3Y;tYwkvwTWc-NKbMM#Vz5998zOHMS$hos`cO+@Wfk(*yA(=arNgS zFhie--`Xew+=GXC#|-EF|8oJn`JC3u6R-H(oq3!$`$pvc9=VPno&(zBI?hX8Uuggg z(uCR+bf{rBpaWmvs2&3iN7^7C^(XU#J&Ua!w~_tdgZoxHZklbYpX|KLtd#y#{5k4w zIl;Yr>?%X7n6i&)9yN>(KqOg5TcX>5pF*S#p~DzhS6}htyX4A?w)_NGKW@EB?N)vdKe@Zjl0y3ITHK#_40e+Rm_c9E6hi@v1pKPCnrwCWk+ z_5NvaYO-6XRJUiY`Dj;#LZj&xTWY~=>%)!_0~Wx6ZTmT$Kh2+i*A8d^*}os)inN>% zS5xvyWJkap5R6ptus5fCt~!0{U66GUX^WZVXy;d1cIz0tHfUPwbQ3?|>28>id2lsx z_|-Yy?GTk)d|Tsh5wu3K-&{wvK5>wI7WnK%J$ThJ2;I%L{zi;xTDQY=YW!l;EOed2Av4jMZ=yuUT>&vaV$Fwcx7mOpnypMn zsry+cHx`5Cqt|PWs`<3*@!TlUNC7ST+XkE4MDK4wFk5eRT~m-=>+iyKuXoN+GhTu& zbV-4^&f-d~M$D2Wb^ml{~@2v-76KhpU08&fB}S&c|8R zhd15j?}PX|XE1qy*0Su`iAPxd|B?KNq@j(6Gxz*wVu&BEipDh;1Sxawyj!)8<5*Wr z^lN?CaNu!hS@Y?;4z1^978Vo~#7}4ahkeB^a>vIN%WV6Ne#GoXnr{7%j=TE$`U*ak zN^3Ke5+C2m(_uX@Z!;>DN94gbAzNn1=^!>pWn;Eu{U9p!h{bmlCPGIKO^_hL?H)Q?!8B;{1YD9bQ||Jnw61Z}9(U`s$#z`tRxD!QCkm+_gw?2u^V+ z6nA%uySr1|y|@&2cP;Mj?)s+B_x*i_e=@_JN$w=)p0m4Wcfwlq47-gTB0&f{7cx%{ zvX%#!BPGR?5F)FIJM%TJE}>2_d>StQjDTZm{SvmFw;*~yT3S?7j4BHM*iVy<-%t5) zsWiXqlNij{&^V1U^@3a)ffrn%UuX}hT^V=Ib_teJz$&@LC)f;#!N0vH5TA{?8oxtU zu+CG!;2!o@=cz<3-LvXD=!Y2OmC#9H2AJ($C?Lh-N1+&`{M0=Deq6Ac<5bVBiBwCr zo$?(|QYvvg9&KI=0YL}rGn;@bLu;fB0*^5)#Mx&h(HWF)gesM2NWY?~qcqOGDGYz# z&=;6yM0)zcRs@_APJt~plkpvKg=ZyuMAED57PJh!RCt`^zL9N8zisveNYxO- z_!pce+yeF@!ZKhM;0)b$hF$OUX#oDGs3I-e$xXInJ zPD&s1onSB2Me}WQW4hRM?zDfqO|sVw4=$$08acJ^T1Cf}mP0CJi4ID*8(JC;f5BDH z$hPI_T($FV-g`9|?f;uRURje$kgjdW8|2!xdSI%Q7o$$bhup^w&&Eu5W#Hk>?8ZIv znJ794iV5BxvD=q7+>m>rG*TlE?G8)i6N~!uBJV`U=ZvS@i4674N8ysjPMn3_fS20- zKQB>|mpoR{KEQJ`UbTrW;KbhHnI^@iNeG9DxOJv^*=bSOn#$6*FZV7`J7D;_$>#U^ zFi)I=EAeHbB+U>*&EiKoB9=AAUW$w(%}IwgnSj{KS?|ZwCud1EL^zJNw5ZZ*x}Z{I zS+ATw;pmVD(TeT)X7xT=LmJ#HrYALgWJJ|&OU2DmLaV0epU>P954^2wl0|hS?$S_K zLrzzWC+-ykfe~adz9e;yy_R7F}$lDI}4H_en+hi`wAJY$58f(P#0KxdGfNh zUg`D?1~hxk;)(yTM`pL1ajqxvwX3(9>Qr3^T~yH8_}R5=JFAANRwTy1?ij}#1x(@X z>Ao9JOiqGj;pSr~lFw9kQKf!jSET+bR7k_Aq9nnZZV>ocTO73CK>11OzF4&`V{ zm1-4&#YZ!Bgv7hW3sK-n*yl0Vu`K<$0O_QZ%|L$gir9ugduOR88nC~9X2)-sh7BrkGR)HcTrYgB z!IHyLaQSu9sSF^c2|#LtVEgga&$3&&$aHX9Ne^H$rF|qYibN5|p-iEu5KbRJZxF2k zN1#o>$i?sloZv3OE_rXa=8LXoPV9ql?tj1%Nlja;yrs%TZZz>T1G?bXfIcc(7k_l6&Zucvqsgyns z=aR4xV-U!uN)k6!GERpjK&Tb*0xbiagHE@(s#e&}TP~Nj|EX|n+wXANA4C_yBa6=^ zoKF{S=cCvFEA$r^dvIduNmb{>KQ4v@1cfFNNbX=g>}|Q3W?4Cw`t;;c92!lYc`` zoT+<#c!hnUYQTrfRQa=j9*)4|bDcO`$_o<_iU@@Iel*{6zE{)vD3O<9C|_PdG^ArT zLpXY#I5{Xf7_&V;CuBZ!=b;B8Giu6q)7KY6{9U@h{!@0;{J|aBTN~~#&gh7@-hrsc zA8;G1f>x7*lVV&qJW$kp>a=S_btz8P%c?(D(2!)64b#M+X0aA5+X)W6`n>e&L4P6- zKm+JfL$Ad{o)c!g2m2K}pj$;&Q{XMPTqu2@kWy}rkhpI$VFRjcAzOo=CV|(x<@~Tt zDd5lmaP}m<4a%Y z^>HV{-c-lAQ|}@-HY7vxuXa{n;&1gtaaoUktW_;O-$f#D`B)KO@O8#uWhES^#$}&s zP41X?rlNu^+B_z=kKFhI2;;y(P@D!nCdBevmDeBvdGl6Y6mAqYcH)mq{*F7aTFd*k zqVchCg{o_`UEh`h~&>Bn< zvvX~Q)zK_>d+|p9p>6xx%H{nQmiqI4PV)(@`P$FkaF4K^akZ75!wPV-YrUNmZL&!E zt=X)O(>wm($CS@b#4t5AUG4M{)LE3p1WR@P!A2Q3?7CRYZB0$Q4Ql`G0Io&92eC(! zg9me@-fBF(e=X+N>C5KX&ljaISci!a(=heDW_`hg^Q9&H{|51s!p>*BCcvzDQIg0y z?J`C(!VJ6lNI_=of(R3)@n3U#J(yE5sQei<{0&Oa;%eml>o?i#<1nkB83aab5StE0 zG7ORDu@luFFPR&}dh0O70{>zNni2r5FqKTI`;trk+vRrv7x=~Jcer8y0)23VX2tPK}c+yg+c zU+^$WX3{n>L;dPmUYWXLGyRoow-7WKKv3(IAy||^GvROGvk#uC=;m0@yVu_O&UJFF0@*w; zrx0eksJp4E85d26iyeh1K`@^&V%H>PDY6f{R2_&*uM&3oMkW$7;0tHbQ^nYHXo|3K z!;MrV(u~TCe=^pQPirgU)(TC;s!Gx#z1LyEqGN=oPHU$im5Vz1F&G7A1S(0dhQXef=jVq~S#grR@p+ZFb;QmzMI zD^{vTJ8wRYqe5~e?vxkdN4QHkC!cyz&Gf>+eSJ;yf)vm3ChRgc0U-PWBc7c$iAEPv z+P{XJUUdHRbg(4Ml?Dr>oGWg%M(7r{ox~4vxBl@%1CB_kBy9pSBam82)l`Q_G5XTE z7&t9SjhUgX=XTw^BFOJTRyyqE;X+xON7{|+_uue@gaktU7b%*K+d}3wfbL_z<4fk; zEB1#Lk^8r%_)4%7Zy(Uao$O4XG=*j(zg-NSqy&(XmJ#SX zY9HVL@;`V`IveoxAopv$hKzKexD^l7BXuLqx{!)!10iEbLW=!x?d*L1wM@pse$$a2 zLYVsW7&yRrN?n#$*uMjSmUu|3zl`7Tw zaaqM1qUerw{TVmr+j(-Ybj4f-|8Xb8-6EHL@-Qk&JGH{uBa_eoVTL|9ocJvlR|X>Y%`(c6T# zZW0Fs<#93##E=Acf_99{X%>YVB_Z9OY|ah%S_ylp7B= zCfh@+DgmT6sj|rpNYO~+C6n1tEDaFP9YF>guG>YdOo4jqL^1gK@9GL}DV)H0C;*_p zS-ofvE7BEgd+;YwzTWSwvZoxT&UvojKXW)?4_%W>N+gOR+E zayV6BzeshS;IM<2Ov=Dvmwc!3DuVKKZu@#*Ph}3)8#~ahC@OSU&u+a2f#EXttDr?X_ zE&sIAvD3Vy;~<*U(-za&3AN>g{bE=&5~Tafi1SfS`cEJ>c`-dEr`l>k@cQLuTb8)zi**`FMFVQiD z_HRQR+yc)=f0|_(?z5fGLcPRn()V|IF`L4+e%8_g>v6jO{srQJf41Krv4+6lBI{jB zoB{Qf>wO-nkVA2kW0i~CINmr4s{j8=TooH88Q$X`>!%j&tIF{@!!^AR_QxYW#(TrN zCoUB4_htcfdB(1r$(uRRi_K=9wda0s_4Ub#9Vrwr?8r?V{^+Vja(>Hsbr^C{{d7rw zY%-cp(lPquW9!4<;W0z!WDD;kQ$o-)WUU`&<1PIo4Vw$rP>Y9D- zs2Z2YGbPZc!xQ8+$<52Nb?1TlJt<`??_r`h1oi2ci_egt&tq)kV4pY)FVP#{xlN%qr}`5^nIw%gxLCLkGIksOd%j&#`%$PRUXkI6RyP zS9s(<-1UBAtud-m#|RYo*P{RJ<$;HX$E}EUa!8;p@j~`$`qVuygT41vYxBYQuv- zaqDnVFp*3w!xEawKsCeSCEr*AYe!cKH5AlGw)YK8HH@oOjP+3LfyfAkQ!YjPx7x`g z$NP2FB>bBBYf+=W9T`7d>e8)#9)x7_vXk#5$0;T$rk*s4U~|wO=NOUB=50E5OV(4k zIv6OEA*zs@gc_1L!C`R-$%5Ujk`xMgR6?xQ&}i}%NW(FkMbcR4OfYTOZP;1Ww--0@ z4i!U1#Tn70Vv{7-T$=?~j)1{=5W_-2+rsdat+5Qb^Ha+(Xk*ACq{b9chb%d*acnS} zgc|h2^v!Cu)D8ezQjbl9{-kfvj690|iGfs5-(bh!%JE>;4z8or4ZzbV?x!PW0Mqy{WP?}LRLVX0~ep={WFgw{^0nZRv*)x>4hu=+PX~Gc8KxOnC0Kap%Ie`?lbH}wg-vNx z@w8gJJYWf-ET{PFy5KI+Jpn+t-1YYDG{1VCn8P|LE5@Cyl1eK$@-V}G?r34IZ@9IT zdHd@a$C7@FLRS7XkQ$P7SlHsY!D#XKOR%6sv@=L#2>IJDn*m$P0NeIX`RHcWkR2i9 zOPn!%Sqxb^Sur!ns4hE52|hGSv)``iah$nq`6yu^y0C(2L3M5_@&1%mGU0K2LN;Q2 zNsx->Ed~P;^F&5e5x+GqwG*pU!dN*>J*lP{?QHFo!=^syCxwP?y~l~_Nw#4F6Ep(E zq@QUu(RYVh5EnxfDSl``YP_t%#mg{4KkR@S-VcLv>B=`=fzElLjq%<|1lWJ-W+@c^ z`MabuYs(u^$#F5x=WtuDGT%?n=yxTr-RCPhABbicWzW@~cWi4?{_ z6e8|R!}B2kW8?Fd>(u=+5_5Oq#E%Log2`&|*z6+7$PT9@?87$PF z^#VffaNSSsx{iY1T##oQ1Bqp(y&H7`p(6NQ?c^LA1#0k$9>xWZgl)Zm?oTy+f+5pT zyY*h30uPDECT-2o?G#+z0_f3qSR4pJ-o^Akz$UKubk%LKn z&j5n*NMsrwsC1^EBoq_)_Ta}9wE7#N+d==A^XiYL#^jLEH@!{$7Ud4_uZxD6qtNH0 zz=y+ zpnjcms51Dwe-wStQ%O|YAB6G#GAU4(P^JIx=*mF45=VV3pu8e>?)hf>6x_FNy}jI- zwh1y#UtGK6wwJ$u{KcvYZp(BWzsT(I5=1SIZM@><2MwtD27Gvgf%N*JM8f42 zcs$$KcKH|<+#j~}OrIpjZH;zfytIv3@O-uX2=oDSKE4~C&jMQ$JLbNt}zXQI*=q4G*l6A5yHvEjH3SPOuvDw^5C zdJ1z`3_EI}$zhr-QuPp;Y@Qr=Q0Br!FdH!-I%q$S9E{EN1*fW~V}CqXC+7M*Uk*JH zU!0_4%yFu*XxGfJQZf&T`|xuaE|468C|Ph|;dz5{-;hCQy^Z5Hpcv5Y*x#>6l06S! zhj5-q^pXZdpobTC7Ama}os`Eesv?~(`6rZy56w|aAI(t4Nl^&F0U$w`;<;%Sw4cf% zDo(w(%zuDmXjy%y_ao|zBSEvfz)lt_B$uTy*P|3y7$Xpx3}w1n)4Tz_Vr#zou#T5Q zHO)v1a~KI(bWO?z2~Pl#P8wd!vL^+p;Ni3Y1NIgop?&NfNCN3z7l6SpX7R^@8nBA?MTJ1*LQn~AMl04_$dU@n)9Uei&9~kJXMNe*$)B9U?TBDo70Og# zGQOt%0tT+?lA3V6P{qVoPu#tVxJf6>+eSyuE^ow`mmzccPsvvV88d5~b{}uW3~-d(h%o+zXXh8p`UiBY zD=j61EZPr!9N6-7H93sJW+zKO1dh(~8G8H* zkZ7kJO#X~5vWZF~rKL2)h>5m7-mD@g+t_O+7ahVr@xQcAPS*OIU&{zUkX4lYpB4c1 zHY#RYK$2eQ_U0>Pv-+*&gZ!!>N=82f`X(^&)`l3}>&Ic8_Vmf4^Ug1Nf)eb?s;z!R zZX&hrt`YFTjr^3)iwmBd3Y|K#-^;ic%u5$IA-zU(ER6r1y^{gm!LiO-{*SLHV~+tO zrCu+8U$%df*>38X#Q0R*^XZU0QxW$?yYzQVYL9T?*K`Wp-tm#n1Q(W@0Em24Y@~&`i#b zNE#Vena{s09DRWo$U^1I_VGfJ_^&p4do7*)9o1lgu{A|rxb;~ zdCOa#RWP4Gar|ZAZP6+nu`DjQp7E*P4zCBtPXzEDp`A03XAnj&! z;vC%m-lQOiZ6|}6A=2F3uE#J#(88jK)5?Xkm@-)<=#%Y>EXn7Q#K6L`?=F%hVynPe z)-nS&5mt&v!s5by3tJIMgjf)X?aIs%nKNE10S8=tf7s7T%fkSNZ9!fIE1Gl8diHqs zSc+0fJopn~tx7g{6PMyn30*dyeUBN00B z=5QOQEFF#wmWd~jg=P4Y!KVw*LM!GV>=MnfnxS(bgUC1l&X7=+O_QN&)udk*>wC<=cDePN=S^de zQz?Jg!&zJOlp)6{Fv^0FkeTTCDp3=fep-a2nW2JoC1gvlXc|A+rw?pQ70HVV1KZ4B zmP`5uv3rP?#1cu=6u7G49rmDCHGRqB$%2%5tGn3JL%|3Hd_w?|QxPf@QWhKMCj}eE z(x(wh(bENs=P+&!qA#Hi=_Ar!o&`#5mTrmu;9L8t`0ZYx*H;FZ9?#(19pzlrh0AzT zK=(^^ks;5oHqE%gvv-K6Zq0@y(rqjf>|Ex389;LR&y3xCfRug>Q4gG7gA4W#}fcI3TyMO7sw2f4%yJO}+c{N|>VrnbeO(_OLnA!95Q!SO>d za{hitc%d-<53EHlP>+CfXee-RIak;M>69CV1}*bkIPm5Zv$oR?%f05> zKD!|-fQBN88nekj^y%44Np8Fj01GLShj=luYRxQ_yy%j9Kqc_j>C+UlI=PQEgS-K? z3WZLV!DN9H3K+|1Cb-E+u{2Dh8!WBn4}*=?=B{2t8Ed#pg$tIoPPL~&qD+thnrAzc z7S63`d*~i+xQO7w-ntK$-pOI}eHQ%t6|(2W7P`eOr^JBPu$kqD{!nVJNjOzFP1Kxe z7{&$Ul=7cq8~YYtVO&fGTsr^4Ht$odh}i)p@Y0@#g8a#-y)FjBphk2ja&C6Zr^i~K zubVPYEh=B+UfyopzBbNM5TXiH>Oy(*y5i`$=Xrirlz4NNwG&~9!dwM`rkM54 zyRp~1@$y;c^^kh(1xp$k{sS-C!|VjzVMEs@``Ag#?V{Clz> zpbJ|@ft`Q&CSy>2_PD*WFMjeeF*DN^G7R8$u9wiYuA%(3(Q3Wsu@{aB<2IsbQAQhN zK{hxX;A|2B5QLozXtVqd<;7T=6-0;+N_+-p0ocai$rZs)L{6gQtUy&7WD4}=%~HLV z-Y?|b=n`|Sz3}$E+U9G{yjwE(1T@HD!7X|!3?^eQ+ZDM9IWN!>V$oseYRav+E|`2R ztiOVD>pM`;5Vk}GU6}O1uw4_q8b9nH7m`(|_CeRqCDKN)m3ojkmS!~&iC9R|l zeqfYA*6Ewf<+HXbn-;tG)Xq3e_9~MF#c4~PQ=FVFi?GOplqo1vBDp!N%IY;s_wnFu zq4d4b7X&J?CH@Xukgn}`nQ=W<2|2?-5wWyzy^%?ZDv0Ajhx?o2*gl~da-}17*~be7 zy2Lz~qoWjrbC8J%W@$@Rdx{nM7+Q%nah)N~#Vkfh&VgpiQ8R{#F`SZLy}@#GOS}NQx?l zkNjC^RWUp4L>ff-H+Vcsg_Nrpx=AvdpS)$^iY_WPJQPz3ULJG=>oXr|B5wJ1bB+PZ z6C8>Zs+g?wrK!Z9_0uB-nX}E#l?>ZgnUztPlP2W|1LjDJ0?*-gf1)FQQlw+Ow}d4n z1NaBWio9&6?uNUIFeo#yw$wT{)$6tLnZEDg1yz)LI@?{Ww_P}b9r^xq@98CZj}6@f ze=TvDb?h7+MJG~!|EAC4bNgq?%&gn7DL7J@F+oM{zqnf7 zT!0nW*3VEU)(~U9TJ-@6G}vw0tGR@e%mciEKkha z&D(_z)t^r}2zwKbTSk5)vpcD~^7Ta|i}-u%Fd5L{#rX_-ZgC<*n0;@ok+}Zs&)^TC zdu}};#XV(6=CV*)taBaT;o}nw5x2hoAcF*{o3@}4&3pwF3lQ1laP01lm~Zs>c5&^= zFSDvw`dt_DaQ#(&Ln_+BP;O+E;6^KTf9wj3cbY`sbaBLtH7Hpct|M6u2;BIl>Z{Qt z5092ZEoCNRfbr#QZ4o2%gpB7zpC2(%V#RQ`guYI)D9A-n&;`$) zq5smELT}OA#B#MbgVAOy+f7}`>M!}uqxV-09}j`C;*Y$y{_Ddw{+Rdg2l_*%LQOB- zAEA?plRXC$2Rc0h2G1}P=3BxjGO!S*g5DGI9UmW8)7kG&JoBC&GE3HRds`DHBbb4F zNM^&UueS%UPb6=)2ZkFj5}lI?jbX5%3j*V=UOcF&_U{%S@tw$eSXBIXl@mNM9;y5j zJKE4_=o~I17J7|fFroIEGG57`apmgYE)JNE0Wa2oiy+tw5^NbkNAMVqi4Jzd1^Y|g z(cgQM74~sQn+A*}{X5aE?B6hFIrMeyO&uO!pf12W5)0DMRKOGNg9ssiNnr}OmS0z- zb}go=`mDs95@S;eKqUB(!*xym^Hc%&HFh(G0e%D^#7tBJuzcnF+j#ikU0EPzWy|eEr)F&zQBWP%B_>HeHyFkp(I3%6FqzI@L!p)(agF(IA7n{6R2=<$Qcuv|> zfwp8U)U=RMGs^jz*(%YODV!2NFvU`DN0kx4U1yYL1Y(n7^UvGjKrHtMPrH7X&_*Ek zut;fOSiG;31?ID(QXJE~9R%WFfxf1B^Uu#xN(wNreLu}cOSmP;>-Cl54uP?khma7M zb8WV~o6}+#R7RLen6*E5-0D3tqCgKtF;4j{Q|l7iWQhR3g3XrOUQk)8ESZORp57Nj zN8o2n??W$6y6=bPEr0cGr%b7&^)W+Jhyv`r`=uq9!cKSDfKfe%AZ zfZgC43jrfRoqDwOek|q}41JWpq4R=2NOO?q$Y}G#cj1*zvNf=$0wqNhYY}bU>Jp52 zn2)p@5$%!k(5FL7T#?J2ZCg|WLjwzhB)y70MXYB#U%meh&oIfbiv?g&&!O96QH#ZD zNdhdqsF18-%%MQH8psSgS)pQR`Ni)^SQ7AmZyv;Z&?5`{=_FBTA=;0X`XAr7@L26Gv<0H>5Tb zFOis_%#PVtdJ$nni!rnIg!!UvQ4k!iyy>{-EeIr6J!oxf_i5+e8 zAh&FBewyY=PGVr!VN^X?k%{vY0+Ga^gPg7`U1KocF4w><7$V$vi`5u-{&jQz3hs~A zo$mAYSG{3DA{R7!P+(DQy$3Gkgk#~Kc~f-@ucF_Y9*-P2`{P|>GL8Ly4=VL^REa z02LTj{;HQTD49;Q=bszjpVw#fZpUXIj6k@;^j-sg3|{koH?^Vg;q+89rj+@JaK!a~ zx16n=KG9RHn{Mz;C(p%i#<^)Z8#qw`c0f${8L<9$7A<%}m96P6nfY z7(~meFm*f3NFj^hv-nT){Hh*BbN~Hjs}Lz$LO#kyD(LRI*KS5d4~=!39QP*rpWFZ8 zinZ6s2ee^6q=cMwuMhX|3wX9(vL8G)OOQ@JaMauD_vGt#B?njLu8us}0x>}@StqnT zrXzPmZ?g*rhY1_Z|QG2}y7Q_D7c*dE`;y27P-FMsH zi4oji0UMbu45VK!#0f+-f~{&_Mj}3{AbVNt?Crb3?69ZVe-mv~D>zcwtio%rqm?AT z47?WumfwJ<-jfu)J!~M%SpfVB@ykw3e-5EzpAb?!fkq?)JESY82x0V2Zb}pd5|L<|!wSPGrK#2G_GyaHA9kpxEe5lJ5Q+|5r&p<`N!_8ZOU(MLEDWY=7X~y<& z@^sRAk3g5elAITzXla#_xY)#MGEp_FoyDT*fpB9MYi%0zDabOII%qWlpg4hlEP^>K z`0g7p%O=GBX09WHrkY?JnSMTn_{p&flO2?-CkH|E73LlOk}aVUh6g$n_*IB@?Xk^? z+$J~Mq?R;-m5Qtstv@wHnqGK3N)3sGaJr>NO**sk>X zOb+&^Q~>fI>5%ZGh@7cS206^|_Zf)l?B)Wc8Yh7*W9%%25GCjp3z+>4DnTHDA#T z{k0S(3pCJ>m2i|4IZQctNt>wopdG2ls?q#yB55f?g(!3Jj^V9S8B&3%KoK%YeGvPx z(cHeu1#EzY^pz|pCD%|{s4W_PY~r*q+1(O~7P~N68w+$nfveP5WhkPfKPO%sRFY8q zCMByiMk6jf>RxpyY=<(jyOyqo#?z@&%+?jR>s~~W&yu$|u`ccr*Q`e$#;e8V52*9$ znEthWOV_)uQi8W)v8t|N(f47rmIF7zip!QZovrnYB)FQwV%#HeEvDgFM1sO?)h0oe zfpDl6STwV2flcNTdGUE#=F?uEawx*ljc>a(l0;2ll9C8W#SzD_*7Gj-yqbAIuV z;j!3$b~C>=K?rd7a_hT4g3HOrxp9!4sR zfK@sM#&xL0)1^JTDvGsC;cVGZ`6syg-rOB8Oam?ur^Utyyy{(X&_b0I7!rfyMOe-b ze%egV!tBm*Our1S!T1QFo*zxBGoNo~{^|c^oh(po?N*uWXXj#b4X( zOb;fSkUakbz|=a9PC-wf#ox`{dpd6k-^`!;3SM1Np&qMS8rc&_?%*WlxInC?oH?y4S+`7n8Odw3EdmLpj18hg`f z`OxCdCwVu(Kn(t09b`9NNDlUUYm;9$PfOeXCc%-BUw&n^Yc_7+Hi(5C+%ADbKfyf| z84zqyGbp9j0a-ua^q+C;?6UgkCgs7{&sO8QB|d@rf5k59fP&ysH{hWcJcfcx8CbNy zp7&j&SpuAFXPNg+85dE-Pb0LsssW+D#a(EwW%_d4jUSp1t5N?-z%~Uh%tkT@pP(le zk|Yzlg!gW94OW+w@3K_?CQdvcZ%w(JXDMnrV>+OGz8}$|U>_V%G5|EI@L+Oj0WUd( z!ukR(#8)V%W{`&|c;b;2C3?`6^Q`qJVqpW(jf6BsG#`SQb>RIaAw)k#n~AVV`pyZV z`pM$krsAYZrn(`CB>u5nv>9MBDd*nzpY)UW`zwIh#Q?T*2v4vBn()z~BijnAq_pO{ z^+8eC`9V$W3RlREZD;O z)H%8ty0KuHDIyc}q5>i%uHE?W$M|m=SyS!j5qSQ>II9v?%bVtXg}$b{+&LV<&AD8{ z(sJ>UaXdL3K~N*zNnYbb(f(Y;e}d(s)aV<=t;X>3+47Kp+J{pFrO0NVrzr{w%>0M^ z4?v@-6!4DS7JBn`xWGUxVR%zOF~dNlpXhCaOy{4h@(9>0t4rx@ds5nCX#s?D;bjC` z`ZUb1CR%|kltinLnE4NEFfD@G_EUCPdTn8lnL?X;PB-`^RM_}K zaK|{mu}3?f;mr9DB{!GV6rm@4r%QQ^u}5s*Z-0m=o!E6m-Y_Y-mtxOzS65{8a{(nt z$&a>yQmCx2iwmL5@*HT>rg&i2R3`A)#6R}0Y(0S!W1}8_<0oUhFkgmDdj$%#c1>i; zu!~=ywsizQjQ{J&l9DFHM4GVnT$WfY6+|9SxCNr`QO-98NnUHcj300h7>(N_COK9g zk@nhlE?b|Fz%LuKcG5?e_}h1TH{PtHKaTEr%H=Q{Ndyn}O()G}`%c>ahSH@||I+e4 zf#Q^}loaJ5dAYxMomblaedJ9>oUxM*mU&#!L@d!S+{B>{So?f$_+QsWX@6aN2VTpp z*|?Ytj-1REG;Lg|0C$h1=M&%%iq%z5N`6&aPQpJcjJ+)2+I3BNISY2x_IU?$xt};- z%zgP_&5>r%LTM^+G{?Qxute>g`CuyLO6ttJ=k*M`i-a8t<4ToEu*$F!rQ^uOP?3lf zGE=@ZP4`(#ua?XJB`a!DO$N*j*ww{R2Ws`l2sw>KM+h~IbjC&^=-N_(e$`bInlcTf zjvBX11*dE~0xfif$ozJFk>bfp6(q5wuo4yaQZjM#uN+u(a7>veF14Po!`RvsW>jKp z18M{G;f!ULi6mDnukR_{UNFWzdZIWHIsL{VnJE0qoltuT0DcW!>4y6$qB%MEN)Y3L zCaW#oYvvH5GlBc?-y}HXkvQ4rW&Yg3r$5m)pOt7fF?HXettRSaXGr(9F^LBte>t$A z(~=ANU0Q7kKdslvTicn!eHy*0BTJ!NYpf)*o2-CohQ=6bd=|%oZ}!#T>y#0x3OW+H zXkTl$6}pq`VaZ{RB*ba5S@E6_NAG^*g8w*76;v}#FAWkQ9;XPc(@pPwsK!GwUjQ-Y zQS9)Gzg+3V0rjSQEBcU+i(k9H%Ia~eC!04Jy0Gq!yskh&E zU5nPjKbHIMqSlSj*x#flg0v8tk<2O%w|v*nr&stL_<#*@oa7(fJ};BakC(kKy5{tq zHVo{@bMApueFc32=pjh8t^+EfPU6nudFb#Wn8H2eJ6ZO&-+i~fgShsKnB$40#MxtE z1p=5h)Xz6(SnOq~Xl9}okZTe1yXTJ#{8WaQC1(J*WvF8)IANjv!+2}cDbuDYNXS5Z z8c7TVi9Q87y5PV7$~Qm9`ifu9t$2Qu(W#1c`b(r-@oGKvbNK0mYM;}gDoB%TUfV-qZQ!Zu z&fAH}pd)jpJ4seCpGpc&WZhGT7Q(bJdrJ~4Sb|wfVjYDvBX=Re#69h%D(=utb!B%#0PXO?LS)?FdUUr<#TK@O-TX4Od5R);9pr74AI*d13se0MQKQiv_((vD@76hDpVglBhj60E~&3} z5+uc0+jQ+Et#}X6P>>p=@M|<#30}*YgKpatIKH!yQ2So6^o|n-5H8goD`YN%kS)G? zOVRz;TFKQfeGWp9=dv>wfl$S;Y!B}YJ%ur6+UEOXj^y&v6Ve)e5QPjtzby(p8YEEz zqLKVmIE+FHhl$U>2=+dZIAF51gNk7rv=Crylu8KJ&6`4DgTd_DlcEn1;|t0ywt*k;2#vT-n0xR zS9QiP2`1}ehG%e|yMEbkZ63}0br-Q`KbU^wHp;^QNHzl|363-9v(J=~dL(6_FyUiXXnG5fvh1KXDa zdann+%71GHp9zwK_46Ou5ae0?S>z~D^FqbC>00z**RQkTKX}U$WE)zWtRvn;v27r5=%X!SEgIn|`Pgew=gST75mtan+FHtMfH9PtXnKZP z0H@2vRCi=D;kD~G0^RsvOQvINd6bQ4`@(LQf?E4=M{@ zwiWc>64zFq2PUJo45KfD{w4=ca0`4d^Is-A@8V5Kv=V-pq`(cBe~%==l-*3gRNj;* z?(bs29eaxB(Pk_DoP`0bAbgh=t&8InX<;b%E!!enfZX>Qnc^)sMzS;Oc&cC?68C3A z>EXw+_n=AoBEKS2764a$#DoyZprABvKUIbB31=jt=W;lB>q)W$AANXi1NnFZb^iip z_Qg(R?^0_?Q06q78f7x6)=L~eTbVOkfipXnlN}tpV&@l$J5=3ERotGDBFHnNLsXqE zJr+I^OhKsfCsAwhJaJ9c&K~=eNuL*R^p$a-E_@x}{1Zi>E~b=OK$%k=DZLo{7r9bg zaOSH!HWxv7yoY>t9ZX)GEn)M(Yl8|7h#(iB7fq+FV5?7^Hsw@WDonL5vr;lfGxe!6 z^r_pq|2e4N*)N%x$*QF~@H=&@Oa!2Ysm88`xy+Vt%+zLAQ+_o-R_TU5>4rbq9E~e` zlw7MiBS-UN$WVL_p>#w1r194s+-Y&PBlmk`?^Q8TQTCQS1!}5(o$;y_e&df3eIWlw z;O*65K}>d>P8%p%V&FIZd;7j7l3x&yS02-=-;XZfM$$0390( zjOd_=xc02oD3pO6{I`sPG9nZ6JY28~zX+QO-yE>^k7T)4?aRdub zZdhPOVe1M2;sXJ2j{OQ+7?sPDI~0|p!n>rkvtm%#QO4bVOzw7wo@zaqdEuL8cm&ab zez0W1WihF_J?ogme&;o_xR@}9C zfws6?aV>V!=l9+}awZcFGdVL1`@7d(`?Ko!8@!6d6TA0!-&SPW$`E27Qwk1R5;Lu! zWR6IuF@8)7wtBQgj-&`gECjCXxVQkxm!*}(EotIXy`wZ)iU*SAEW&3VbP*H2#(UF;?z+M@_ zGL;nKVFFGp_S8nGDOju7uRnQCa`n*zVvJ zB990g{%SS*kmv4=!IuiZlMfTE#T?8cO@WKFBVmKr*0zp5Jg#e~c3>zns0MQDFo<{v zEMOq|xix*w26q}U30ln&lip>Y1y+LMnc#`AzxSJ}#`7A#e6t(VltQK$*^?9L{9}Zs zZMNL@<;&r{No=3R>l4h7dJm|=69>)ysdT3-wVQU{=CwB22z5f_dnFLfO&Hyi-Cw17 zvoEfx#anI!I0Q;PveFu@^ejS6P1VJZFYM?KQ1!{1-T(BkFeySJUv=WQFru%78dHrm z&u{vvJ;gT04V|{3CB%e5FWiuDhgA1i(tZH6P11hpaUwTzHC~Fj7>U(n9W=JA?M1_v zMHt1w#fngI+{(i`3^X|~p$>Hv+bd#9^_I};4cPECI~so)UP#0iaaf}S@q z#hz}lT@>;4Fno2P_1y?3;Ce#qMH9=D1ZOf80cj6Ih#kTLxU;gh|KAOWiw}8z!8w`V zAV*mBZ=?3HVO*-$0|;1-_@Ew@(#=y^`wH!E+{n*c7lF6vhfLij%$xpOSCo+w#4D2d zvAdFe0~h)07tcpuZn*+4y;nCIsXF`LN&27*%E}3SD+A6)zj1NrFbhg;T0T|jI@5g` z>3aI;NF^o}`;}QzxNGPXsc3oa@$j<1HMgOL3bXoz^K7;`^xszx!<~ zCff`WXkjh*I8a4rDl^GM{Ptcd0vA;W6i{IIzyUhrHOm7J{Hewrd z$8&vX-odvnHfNvVUNKCAGFF9p_WFqnCLn#AKZ!DuAM>j!ms8ilmr*aIqTRnj$8$4@ zI^`!m&IMFnFU_m+#2R_FtG~w{tMYs%8d>}H^-#~yy^Z9@vhjkEJd#YE)22wSRd%zX zh_|$x=LdGn?{+3~^IsYTEKNIx&wjc3$E#P;iwhjOelq%5b!YOP3e)4Batv=fe@TSl z-HdBJgL8>Do62Bk>thxy+zuDjKIG|; z)?qbYS8pyc_jd$THzzz_{90CLOzz@0k9WOkW_-82Py9{UWCe9sU8f{EO6-K? zbJ8JS(QQPcJFDx1X}d9mqJIB%Bn(NJBq5-la;f+{7HrYuGGi&M{Dmu;hcKm#ZH)%`D>!^_|Leb!7Wy-lOL&BR#2u+e5>h)TUN0rD|v;~@+FIt9L_(HRiBp%BgR54mv3E`jR_6n*+s_ldwEQRaqG z7=>`sC?Yr#c5{S*EKNRyhT<*s%Me}xtpMCE6!Zle)}VZZ2Tibl61XTyr4vk{>z{h? z%JH5@XAJlYf&4QbZi%km;c#|6oab7-T7Ukx0rqG0$>~4DEob0aZ2M_1Cc+e98hB^Z z`TTftaaA3ZOZ9XbiNKStKybwTE~C2cK6R~sT76*PY(GWDrIMhF{>d0%9p(85`*h{g zE8f3+`oh_CZ{(6n^r|-4s72U|bKj`t<8JDP{NU(shNz??F500iLBcN2b1SD>(9+~u zVEeGt2;mEP&p)Iny3{m2E_o{t2qRw4II(Tq3P}e*y-n*nGMopsWt#SMn0sc>{Yz4Cz~_`YN;Ax4 znioB?^WKn|vEWtH&-!cJiNNLzA!h)j_Ul5pBiX!{2#3M<7SD13xVN4>ENKNib06&M z_l2d8SZ}XS|F8K$_&n&ME(Vy;i4CaALST{H zgWVAjTC!Jh0VkpyUN_lNP0?xX0&a(e1GSLzMq8&AqcPX~Pc@FYpVzM6{}nm1OGpo~^=JTqDy)=pmB`v zb(_v=^}h~f4&n9Y7+-AF-kkj*orn1Gzk4R}?3m0_D4t8Z&F(}W7U+(!W@w3fnDz3Q1 z&+~qrYS-_mNJ}|yo{s$k?t}h{tlr}Wg;x{3FTLQE3-1$63??9nVmW!b#qQ1ZHv5o! z@?kS*XeT`A>f|Z-A~Ih(w)6Ewv&)y^8x>rtLiHQK#^XdwyqFQ%wblBM$F>|fz<&?M z7?AUiw};3A-fmnl1-K#mAb`vd7nAvwgF(n($mPn^=cD2*JA?6h{>eL8O5_o3T80i3z4!s|5B$)hviicpapHC@IF* z!ovn#TCH}f;lY^jRY5oZwB$^P6Dm;%3ayxrTn%ajGq+$2!IpA2@O?-=rfg0bre?JZ z0aXP1n|_M|$22$3ye;L!8r)0@1LS=urLfVYE2SXgWF+bb_C#$B6}%iDS%vWRD3pV| zO`v9sK!tOeNZA0hYeX8mDT80t+GycOmwYJA1YJZmQ9UEsB5|5CUrt$1UHBK8DU5p) zn-zgdHMyS*kV#DXGz^`b95g|Vp#hCZ2Eys^3W`~jH?$rj@*-OCg~~;muZkwr(R(sr zo>puyPN~;!wpTfP)d!Bci_Awcbzkysl!>{zMW^iof(ODfLY!FMqR1Jrj8l_;t1+g3 zGmWe9&E1v@DTI1mk+@}(D6GoqJ*e*#$ytgCa`o2_nx3uM9Ln zW$p_7m3WJ_2n#mLC@-wf7&>VO@s=_^78ZRpWIJ^h<@lPjZ`;+CvoIRHa-?yKBBim6 zrIcfU%H>LIGSgf!oM|mGk@$uBp6$eXS}t%(`z;zuCX{4sn!C0`Jc68!&|aAW8sg6V zy}4;ydldGNg-e#TRPZM{pZNGfZ7aUl(=hMRIftgTTJWVck35u?3X7*s@+}e>GGiuP z63wu3CSLeV4vT_Qf-?X;oCh*XLTR^Xtg*_)M_d@EX5&a^dhOGuJR_J}r@w+-h(>6? zhCe`dkT8gZ4i<)MF>93>!dalI-NiKy@A9<(tFZUTUtqDa?$#d#^&QVi40IOfLgq&0 ztn{h<)>PJFKfj6Ch!SQ8k?ofW(X+tTMc{lHdKxBZf7rgQ{hB7#&>q*+Wggl^WJI#O z4TaicHCZ~ot2nPym8n@HwTD9Yeb`*GSOh1G$F^Kz)={- zoB=GMQ^{eW^>xfRy=vicEYxt1tKlw8AZVEjX7c!wsT-p^_Z|qS0IUa>6?n{v@(|7W zVW0@%k@KIJUmd-VvF;)yG`rU|!Vtk&PrZDd! zooDgy|HCU+2W+z7lYZio)_ekCLa|G zq<~>S8Vzto@z(}L4LL12D6~$YQ=LO;V_^f9-u|946LZHL(t1q>fuWF`_=r7Y*i)aj2g9do9g(H?#HgsBHM3z8kg6yJ$*U9a!W`WSv)(>STx>sQvIly zOQA&lI)Ka{4rpYuSytzk-bu6;-N~=lw=)1)5ep=&k|JZdfX7ZKE3Z<9<#ToIzj0(c zeW(l&LZAE~d_3e?1O~n;7&@E6-nyL~F|tz2_9Ux&O#Df8QhxLfBlFa(Q{1AZbxX z%UAz*4`a@0AQRe4CbMMrSP^YB&H*Q$Igy0FSspd-3KwJ>^M7Ri30z;L&A-3jT9a%lJYuKFw#q1f(>iNAmT{O0z)3*i#`aQ5-`Jm}F#>^@eqihnB~Q9aK(7tTg} zzpwbd`)T$01oeNVGtR!s1YxTa_v|> z5(i44O2CU8r1avm0H$`z6b%QLrS^P$7^0#8ajO{$THu*@x=fV}l6`2nyI+p`&&lZR z9O0(+%^K8pX;!!sjmKbn4VyBzyou}U>EPy&*bUMag_Lx0CZtbj0>t>iLjL$)i3-eg zi-EduF=PcW_7M^n2@xs?HZZ=INL&xO&(llWCh_3s zF|+CJ(~nishU210%cFUtz3OEqrb#cvSMzQ_t|B7cRTS;}mC?$tKwwK)3DSx;1Q}86 z?X`FZcv$W$dY5t)f+BO*+GJE+Jg0?@1meC2A2>K@z$<2yHQs@L%oKX@mjRZhHAo4I zVN4vOOuTxw5fgGGX@;4ZnSUdgpXjWnLzTFh{bfGaM770_F)i%>KNMDFy1k z(7#9-S@yZg1#IdDA$3w&C|Tie!v%6|C$`cnA{0_s*+Qo16#7L_HNnVAhf}ddB23e@ zEO6;GMb-n1uL*Fa4G($in*MP%#5w^%lEMqxls#%wh$a)%#s-m&Ly<}Mr2yCuJvq#@ zV7vQmLf)Lfy2!U~r_5*PABcw?Kng{gEwQTKzBq+065g7g0&1A#SrM>(Wqa5Q^rBn% zom9Y-$sU7ow#z;WssIgJ-r$%xTm#)9Q(c4XAbvxJIU-uKw_DVH$a)7#kt+RBJ@s1* zh~)>VMZ7#&tokKSt$h(r$T6eFZ)!|HWlRzZ?e0Pa?k& zUEK4n2EbD`0HzPlf|#7TRHAYyom&ct#O)}A6-aPl+e1^gHqDUB)ypZC*_bf#kZS=1 ziP#b^9am=A^Se8{03$r*kwrsw3ZlqujrI+L;uB8Ofg}B53|H%vg0x?sbzd)# z4-^hr`vM*;wCY^58uT2rwyho9sEMjc3yH4L9cX~*Rp=EGR7$b;FeSWwr74F|hXK-R z$0L~OlOZgM))p_LIFOzK?;wl6j}55Cg#EOj>R*ZeP`rR72(SJiH)Pev)#GUCDqX&( zqe;O$@sngGglixJ!xU0IMaWLOS-xDURBL?(8yuDxO+?nGPl#*BtHCgnn$kSgoHj*g z4YOONWt{Fte@3fe!<2kj$MxMW`RH^1r}>2ZQ)x6J#Rv0Yo$85N^64z`?%2C)%)9H} z#MF12aK~3cx3#5Z?Z+Fl?W*RL4NS9JHb`7@h_xDLOndgCIrQ}o$qTkZ z2P)bMl`%EyHGRTn#?dG(Uw8HaLe%}Jf(`3Vym>QhlxXcKh4nCBiOu^dXK>D6xk}bE zZYd&l{a{FTqN{YM)SWOh&T~Gz{(uIDAk;u)w-*Wo^Cu{$#Asb9|k8ak&$6j3I=waLt zh#1%AcS#) zIz#-L&-Hyt0gbBR&$&Sb)+&PRj31+VhT$TCm@Q5QYal@?^smpTK;bDtZw2Dz_<^_q z(Maw+2??;~W8d7qM1sTc-^Lz)RPP48(t38Yzi*yHI3E!gZ_^KthI|^Oqjrdhi-7g~ zXHu)6we9D_XN%7EUOzi>oUla4)SK#Ft6Qc+4>Br-c*sN#DrMjozW*zwIj8mE|2ZDl z!(SrCntkitVw3-*OGMe!;b){*zg={_cnp{&ZBo86Lut zPFYk7+;ybQbP?EbRE8rt5&fbjzA#@D5f){ZAY5W-pC=$9m%4#bb$_dms*mD6G$-o& zE#r_YfI^l=gxwg;+iKp2GC2$|27VN$&IWdZh%nu|;G8cnw{vWaYx*M$EI1N=z%!&g zksLC0cbJBiH@r04)9AXRxquNu;@#gN<`{M{aEKz4c-k0~2SK}r>{PlCWvbQW)|T^$ z|K9*}gD@LN1fLvZg(}~wCZoJ1cnoU{VHM}W6(UB^tH{lu;6Z0g;dkS9@iyA%gcZW{ zS^I0CjVMz29I#R^45SSyHFWTlwTYZ_Z+FW4906xbDMX5y9W`goRX{zN60K$1M=2nl z&Upk~>z-BF`c4SM#WO;5;tSQ;d#u%+9L}1X38-#om<5==rj^av+ix{rl#uI>=9N%# zQ4laYT8%Z4eHtj&xh}%DU5v8k#gdNHh^J$yaMufq-7wZ^$qZA&%;_2L6~l?U!!Xhx ziVW9e@#$;ec|EGjl!((?up4mylX~%c&rh@on0V+1V*l5%p|-F%$NMB0&uKxJbY&h0 zkC{}XoPYocd1w#nZ;ZqCR`H@;(kRr4%ho0@{SHo!GfFO_J5($y)!4%qwg>9yn~9z$ z;XAP0FR?(hlx%)~Y|QJ-&*2ABvef2lirnL3=xE99q2rA50D90bQ3-U_WW5%$I6SO3 zVsO!MPzZN1uO%1VsV<)jopXz<53CRl{-H!T9I^);DM((5q@Uu}@ZO7Pv+%5QulfI8 z0Khk9lQf%OJ7RI@Z5O<%uW9O^kJhEzJT7Zu%4|QAcV8z=f=!rhA~QfG5avH2nbTYc z%8oBtu~?5fPMeqkHmo7(EexOeFj&%=un$1-%<=$B0&cdrC>bFviWZ09@ zrD$#z3P2*23$>NLfM;u?M@3Wj7N!@FeG`IdlSDD8g`?98030Z?ONimYy3~f$rO+mr zl5_=LBUm@Oa4lj`)uPnCC@pIxR6=%v?k-BW#JeV8QHBEc5;gGwtUK(lg#4;u`B{d~n73t)4eAhq7OrzSAa@59!E&p8;K%r5w8BR7Wg^oz}rDiTG zu;623pdl4Ek22p-3bQJ>muU`Pc?rT)>BbwS3D-o1SWNd$Vel0Eg4_GyOg(%c5Wjwj zF|2H>0#O-osN)d&UT~E*wGS@tX49b87%`%;jqD5lg7lGA-qj;1nQ$RtsN0cd$pwE( znHgUVqYB6JU7A|A#u8@j9KkuZG976h#&S|Ns2^+ zBw$hZNK}H9tWry1O3o73D9pp^hZD*7_syqavaiv;DcA;S*K7*1>6Xm*+VUd$c%avB zY~}~tgiy|O@}O|C4$h;SH~KGKsCzZFjG++VyP+(SWg~!Vby~C6LVa~J^(nt957#pV zD=R$Pq<|$2Ehk4@Z)Dv#24t>np&h1V!D8%BIpsNGeeq#hTpS1c^?QuIa0}@!BFNUF z+al@EwxPbuQq!={jybiUWytV?y3v<)=>*vY5!95zZYvcz44j!wQz%E29|k{vczH!g zMIn8k@<>e)Vhg=EbFJwHrpq;D=*O6uJ!2SyGpZ?6^OZeL0`V#KQ#W7tvB2EeH z$x#49;hvI!`K@_AN%q@4IRM1PGd%=mM6vUKO=mti+CKeS$OE|kwC68Bk|9vkn^Mlj zS)uh9K|J}tsbE+>2r=?|=QcsK9TYFv(m%7k@hX84t0+;LirJa|Z`chAQVdZH8cR$} z&Gew1fL(ysP4KS3OR=_}uG~(-)nr~ah4zEm=a{gVO6KLSx;yEBzYp*&h>{`|=En~^ z%YkcEcQJxEiMMA@CM%}b;|NFzUP8E4UcTM&hL`NJsF&oyy;O|bbr$Nw{PXGRrtL%5 z^Vx5)dzG&4uBV%z?wyUeu6qRxP=b*EHPZ7|b6}_;=JW3Jt>nd^3g^Y$^LhUH>*w!5 zeFJt>H2sn72`8GLfBPiLQejbXQJovgZvof-*M0|(BS0U!dHxAh&$mNtW4(23?N5IX z|4k|o{vw0q=R5N_1eK2vF<;CV;sm^^9r0=t@2Gzuhtd#?tlGqzllEBmHZYOR7*>Ag z`kufJGbnC6a8hy#?VN4x~(#p3nbLNc*p;9jhvQ@cZS6Sw62H`kk&gxHf!mVI$ z?KR5)4#L!{I+@BgEcn_KOh)#*i;Fx8w1rbB3hqqmx&V#T6s#?Cq_hFsd zgH7hYBi>`L3h99Vq=}SC*gH2^=u^m@zhQulJ+K94{y>CDe#y@men%&rUSnk1n zEPltE;CcZ{SGq-a1~-H)5?BW;&dq5u=Y%9_RZZ}&}6uqzr* z9;4Z-oYS0n7gYjPP(eun?*m=QKfihr5(A|s2*q=-(QKs6xMuDEH9-_S#lqGn#6oDK zSwyu|*QNYO73(784G#p~kcZRAVOo+-iO7QpUvs0?oyTGu*w$p}ITzpNo z(^T+k^TE*@(eYwHajq~fUpVLG(c8ZI`_^}sAU)$IO)m@`Es6k>05AY~Gj8s+J&J)D zjzr*@4N9e#u7H;@6=E}E%Q4dlt1O=}g^6%r!pwzavGB{76iWJmj$BojS|nAqswm5# zTKp|z=9kJ}LavYO{7~tlC25qF7KrwMy5!Ub>HYop0m`BMuI| zYfzLS77&!tXkb@Ua7MhNTQ8F#h+>dK@r2Dp)y1$hyil)7S1wU5cw0b@%;E#1 zhP4zlK~0$YqPe3s!lZQ2#&*@r}Bb9z>KaIr`*tB3iHw>#~^$06rzECf~DioY! z`H`-FE>eL}5MIGd$J|#>J`H09Be^yZ6gx`gYK4?{7uRwm#8g@ict%UjMeq1v7TZYb z5ZN<*Pa_H+(Bc#Dt6@u+4ACnS&m3P|ggd-~s3L3I&A|%doTes`C2<2V;at^3_pfRU zPym*&mZH)yRBXF1gQcL~3~5Y4C^s=ieI*vfH__C|g-D=5_>lQo&(WY|sCr54EP4b; zEsSFnJW#yGM+XfE#-#&oz!acghrvP_XW2_2lro$jVc*cc1KE2u$}r>s(=U*-#5=gY z-{H`FkhX@F$nJMTjk(D(QTo!RBIcvJ;amP$&Y}8Y+%HuLX``c+HMn9p?1eR~)`a*& zh@~q-qy){aO$eyxw8wV996)5fZVVn4eOb2qywuMy{3mWiFZM6h1QBTB`3(LqRSyvz zuZ9THdkXpI?C}NyZq9Z-y+8jKCW@eI{ujkO6!b9UEe9A9s_y)IkXwTv80ZrqapG`G z64>V}136Yr{Q6tk>(LvqZF}#~VsgnShyB9$#Gb~00Rm3;#A0eN^eNF->O7U=_fR)eTzyu=4kQ{mD{E^9W#?1=dlhdL7MI)=wTDlSn^zJT zAikN3AV0+NK`4?z+f(&fQjBPb5a;z~J z{yW|Uet#xZJgYF`xfCZX?uK>n&Zj2fLp||r)whV`ZR44zh2)F`%5r)4Ra*e0&;tR2E#g4oi|(%H*YHE4GPzHyGoK-ScYU5*T`A=NjK@$Z4f~jq{+u3+Z~_dt7|p9 ze3k%@LL_gWOQvwZjgJ5Sl+wBy_o44x5Dw2q#Z$_~Z|>_8tt7vPKlc5vcNJPM>JuLQ zXgwD61@{i&)1%^EO+43GBH`=%{(`8*|MKzvWN2n1GzX=5a(q(4J2a7cUlV@DIvJ3` znIk#GnIqoE`zNjf@tJZ#*$RYUYn8jo^B!G@5&}zVn2{^?Y;r0WTIJvK@q0J+2sFJk;`)OQ z`5Ra>T3{NN3ug+~9ChzQ4Ob>15kFB@4=RP7eQ~-Kd)aHp4?|g!YfJg^ja;)nI-(~T zB=0b(!?$ZTS-jdCk9<59XRHbl;X-ySgM1{OI>nzG7rE1-kkz6KGl>cbzoEs_ulu^{ ztD?QeGq`HVG*V4M*#NtMFEysQHS_irvC9(jL&Ixt$HmNClUT1eF$NY#1*#tE`tIwQ z0%Ki3ZcM}JJ)-LEe{OBobW|sZXGtE9aymbu;aZc|*t|9YW=IKys6*+897=`{bU9{5 z^M&?^W_eWwhs6E%Q`fDlDj^*P<8 zvf%0!7t)7L)Y=@cLZ z^2!T&ekM5k{*%SYYL8C}WE*t9a*hMRvGA+TId`F+IrgJ?$GNG-wj#p|EbTG?uN=^tG}|5Ht9*#s}l-iSr3)) z+^I$AGOrTr;?7^ z#KF}qdD0LjBN@S;7Bs{?NLIsZ(A+|GB;~YEd?p3n4OlUxsUnonzNk(vzn(VBuTdgS zRykwl{GEkgKBzX#{M(te#PT0{v&SsSUh^){3v5+l+!zTq9DI@%FKGCr80vA>v< zs8PZ|c~@mlSXAd@aXiX{IWK))nM}4(L4I6W@Wm;xZsx zHV}2-gYq6_Hr;DCggDCJ8e6s-!}-&dFU4Gd6I^$Sl)S&SH;Y@09DVYp0PHa;p;NT& z{f<3tMuWe1B6%Cj#A~k(mI)Rh5cB{}V^^#DfU_S7MI$%vDyJH$+oiWo<{?Lm3Mkpd z>DK6{E)h~6&8hb?K4j`cX=gT*eDQ9BPlx02)o%%kZYIV!yT${%x`Lu2ajDKv*JV5J zzP~?51Trp3J`f{jt@Nfo>nQ&M$0C=Vy`Q<>cRdPJcRl@mT*D7UE*>~(n`CIMzwv0l z!T+Q5Dv^0rl!1zG=%%N`uOGkHnRY|Z1jTvO(|`VWn@RHLr z-6Ct1p4TV#m%DaflAuZ($Lc;YfTmB;V_(GI+yn0W#x02cD#BV*PiXsV0U<(77b3^V zb=|MaEcG^-=!MQ_EVt88alo)BbLE)b4d;Q}ZsR>7HWZ>z-(qufQ7yJ9zE$mPISiDA z_VAXxdYup!SQ;Enmu^Wj$-5(GKZ#ffNRsTdh!popCJIg}yw>VN>!uyJZQE zclt?c#MVY(Xwq9ka?su3`cov|?@4iX$$xf}Pi{@!0+-@it7VWoAXsTMcYz8MMYw_p zd3Aw4Zb1C-HJC^ry%6-g*&Kqh`u7_S9c~L)I(=k(+Bx_8 z+B}_mzvU--ZYD%>M!Z^$YqOY}DoJr`F9)zH)?b^Z)z4_W`p*Qaep@VKAdZvC9c=T6 z7DR^pZ|W+B*l|~nZJ79jD}`0C%-^G`Q6~-tV^5j`+*N#vw@ak^!d0U zS99I1N4&U#b_;Sf|6c?bg3|m;1Vv0^5vFEDP4(Hto|u?G@Xuv+Px9S*C&3-w*Z1Q0 zj}l6j`QRwDlC*a{-TR%lcT&2UUO%t-b8LI&a^EUYgwpIces?7bfrNyw6VPBl(&5Fr zM%W723YZaCAa}!edVMH`DZKV5a)4whdOaB*E|e(D`P59DkWZG#5~3EEEdC7`{6q}c zO%O-1#cOjns~9*npW3?_J|c0u1Kl8bWWvwpifA&y1XCCTQdqhw zg`b4$=&0cd1csSF%}_NmM*-eX%Y?JXCWiQPJs$zJY%S7BKqr=AhA|qQx1VMT>zO;H ze5U*ihn;cAr08Plwc%t_(Uo5?$Ca_DD=kzk#(OeFt%4B+>=e%jB9kVSs-X`or3-92 z$RR9>sU6ru8|>a{@lXardTU0HLU3U?E3^a1u>~f~D)U90FkD6*eXliwVu81W6~)~h zVFjJVx;D0SB6txI_{q)R32Lx~cAxD|mIED?65PAY^r^4%TNna+~HbeW07+ z?X#mG$b)kfffEywVc5a3X-f9(3xH|3D8iVOCW*r4s5GHd>YM;;*^EoF63@Vfj`ZS57v)M0FwW@u5?0V#7V+E0 z!1c{T&aJ@deS8Tn4y|xlO{V5sdaw7?04wt-vW#$L8w*39_iRgbT;-c@9vEmLJY-q9 zbm%0rG9A)-!>Te?5?vm&KspQ?rg<_Rf_;S^12i1GCB?NFVZT#9EYrmFg%2e$$@@aE zUAl%>!9LRMy_vab_Le`(vEGUdNEK7-C5xP#q}`Ec$XcqHM|Qu34=DYK+f&5)n|+CD zA@Ou-rn5C|?kpw8AEzq7N zr~$PwN6+HsN(eORpDjhHmTD=P112y)O{mjjFWaC1o^-5FA6n@RLXGh(Ut(t#ZIr3^ z$XO>DO*NaR_Nz$YTRSMS+Cx%eu%tKQFf=B^M63%b)4sbdM>*Lz<{F3Vzx>7ZOCtj^ zSF6d(mAm~>Ygo&LHLCHak}?v5MF71Jui~F5ER5a_fet{}^b+)*;0O180An<9vA0+n zrA$OH{hzfodT`F$o&xkCz49L#xpG`ifS%7&EGgdz4z$XvG^=RP%GF1q{YrzCp*fa= z4(n!2OuD5o;(8e_am<|Mmcvos+`@V-cv?;#M@-U>B(14mHQDANt&p$%nG3PLNz1-_ z$_YK?m};R~mXoiuDBs!Us~(5-z3`@1C}=}W^E3=K#F*Kqb9hoBiBrRz>O3ig^ z2Q&!f2&a8(Pp3$W2QJ^$cYsU5frQ!6V6EMp?xE6^R?iv*$L3}TXZi}cM z5$@o9P@BZ_gL~k^Ce;NZ=u`b{Ri6U9kPVGnMpB zIR8wsPQRl1c-_gXVCv{6x-3fFJJw*f!~L8_x4LNZf{;4c5Hqr`_w@JB9};2oy;NPb zFX1J66>6_hVBV9^Yo>4S%SmnbTe5T{z;IIKXfr=+)v$JueaY3|JVj|af(SkiQ&{vslJ_vu3R`Kw=%5pdlmFyfbfm6E8-F$ zuMxID@D#g*JTkoYsYhNRCGd#Ah4cPDX;PTMdS!fk!hp_$u>b$+D{$kJ7MBBjKpnR@H_zC?oA=ds$2Wn`wWWzC;vdTVqnnKY zon^V_wG^xUznO!0Ry@@u-%}>{*!_-XH+3)WOx{5}VH4*ctU8#P7`u*H@@#tc=P0TQB7DeieR<*h;^p&xeQc;wBQ{p@_@XC zyt$3JDsAf&%6i}@R9vDQ?Y)g+)kFZby9_@&Lo`>YhFx4YN$QIj+Cs%2N?z5lw9J5j zfM$Xtd-Yz4z7ec;#=a10>1%8<#%$0~Q;H5wDH~HHHTk4+vIKfwN-Ek{(D%I`#6R?7 z@o=)GNk-O>_WBwcI0qo^bL0>twK}HOdL~}rTU3yv`f=)QD@IG9R#}x>8(MQSnP9cV z?(L}lFMvl$K7%H|LNH7(yr__Ao`CB>t%R_rFPYs!NW~)?Hv@}Esd$U7f+Lz7pV=Jk zK*2fU6?MeA$?37rY83J8G!pj!U_g14A~@84m3nDHhdwwLZGz{RY?DQ03zHwKyo z_{|b0hzo5WEeh8VCSa6Ey{$9ALy71KMZ+x+YpKO9@*WTjCmhW#7HvpaNN=o)o2hjf zRaCA{`#wc2*Fk-V-9f3ZLQ7v;Mo_I&oi>f;8o^)%yaNx5bk!rW1VYNS;*>0}nGI_s z!hxn~Ld2RFoy4-N9ltM?XL388$UR)#VsmYL`+gSkZF8a4n#&SI;2=EHTL*GnCgHO8AIwXuu-}bwrI*wAttXl zrR;pw05JhK_on(9apFgv#pAIfPQ#?1(}?AEbef1+u=vKeDabr|<+`JU{%kTBK{xCt zjP9qY@leKM6X3JKYsGH@D`;wgHf@XU2ZssmJ{8lxTm#w;7qrm4x^LThHJeH=4Cf4GI;wL$oWWpcV1icLS%HlPY zbUlk9PHKNE91nXf2%QB>zD8H&X26JOoiSW1`;RG5tD^MeY+t9MlT9#$s)-cjl8!hL zIe+kDVd*o+>PH=*xaj63Ml%rranV5tE9n~l^!uq)psS_7O}AA}@qb-I>55Uz_2A*5q(bFlH~0k{iy9yPxco zI)*hi6B0Lrs0&(4FH@U$@<^qMrcOGGG~q5&C|TAd{5~@y)oSbVay>e}I1ZiwkIS@@ z@u6!saUBpOf%H5+id)5f{6twzWZ(M#y#Qx6*!Zd;9uwYG-sUC?&Hd6v{oN&`(=DY{(&)MBOPoRlSMs#ku=2Dgb9N7 zo~@wgpY&}VHS?yv`YF|mpx?ZHL-}SkT%}5k-L7}{3_W#?y}sESX_A>3Vb5gvQ9wF7 zR$v;2>{4?Vq*-7fF}NU3fn0j=xCFe!%LF4yL+wU*>{_DG7+UjQO?Rg%N|o9?CjW?c51znrL2#vPWnff%QQ;O&2PB4MJLYQV_3H;Y>=67j12m^j5)#uqH z{xXZsOW88+^v(S<{F5Qu|H08ISgC=yjT>h24 zvrhmNLxyUay$W|Qef_KnPiy>|E>Lj6?*4x@@j>}aBK%p?bQ=7Wo!Dw;IQFpda%T7a zyjkWRYlxWWl2``xf;O2H_1#0{!s>b=g5(`|b8XbV;9Ob~y$VH>(cBE(j=;?zdzwY< zlC?hH_nMSk{V;wL^ssstBzX_XoAtCxbeLTKylEi5)i%3Oa_%2=KJ>7X|8N$|{_)#W z9s2|Q*XZhEgTP+b&b1m&SK-fSyPJ`sO$)1m2&(e`s@}eau4z}`qy4%BZ);oIX8okZ z=FZLzCSy$X?OVhlEI=ZNc`T?)8PQl0Ah-+$87RE|h+j5?%)vXdZ>&f(n&Ds7ljD08 zd>F+R$9<>POvBa47xr!V<rbE=b7a;`8C;&LU#uziv6f^NuX0hTK zbf}?N&@F{18Jo8JR}ZBW%PV$rCh}M7Ybf;+Bg-ILWdhplhHWAV{c(5)lT!EjKR@AyDtpzJbbZZRL5xc1dCV23q-w_#dU)y@1Qy_T>E z`n>m0j*K&1N&6~^JnB4y?ZsKUI^-f6GaoSn-SjO+zN$4bHB(O`IIA!(Dq-G`^q z24|%I?vg9_$RDoVYtq}NuDmCKjmAdnuu+z0a5)|?4sG0Y$k`kgb{2}P@hkD^F=jGl z3S6v$u$GXPFnJOWfJ!N5;d9Wdjb(Gc24W^7WK*UN%|RqGBw}(_SK8ZYlCg*|tw|+O z#xc?ce&Mv@w<*Y<=sOrbq2JIfeRd-q+yi~0`2uHM^flC*m?!ZpSrvnN7KxCnqx{Bb zB06ZT>kvsAgSiUTtnewIkF(n7C|A#Ejv%e8V=*pB@o*8o<&Gg~#6T%fv^-AZujMaA z83;{rur8PT6314FJL-q}JGXv?UJ!M6Cv%BB{gYMr%%Vq<0I?_332s#4jJds}Uzc2z zj5twjfAK}{+E|Dt5M!al?pLzGi{$s>A1$86B5?%LQlKY^!HZIM$+n*O#7Yv~@QBUx|UWYqTQr)kW!Z|7%|%A#a(Xgg>aGCi_!5_Z_oIh4!PxX0Cp=dDtP z=n}Y1K5o6B;sUKxoT!^QMbvYv6Ot%jBRMFdk%oMeVwN)?-j0|9h=+eiT^9YFId(}$ zJK{1!aZ#*L=L6?vT1nQSLD6z0h)J)`zxaj(!XptK=dfLX`lw}K)if^H^HtxKHk zm{MQ;hIT}WJYN~3bO_$vUp@{M*1v~FBDQpYr?6w2lyWJ447V4M53`XyjQJTcC#@_6 z5(!yCR-$f_k$kEC<*bJNmgGPd?{j8v7uw~EJp!yS$`P&41AbV9pB4;2tUb{^9UcNd z?RoA|#h5y3Dx)u99pBOL;8x9Vx6|P0E4Y}+lIDi;x|Ee-arS^WUn2^L`-66g4;x`gAW*lgS z;ch&YJgILarPyyV6~wr7_H7X8l67H0efapE(pTB>?`!MFo!zw+o)O{C|X4z!@Iyas1SIm0l z#_p?<8xPM=Y1+)w(N~*sSPxT+yA`X_*Pahcl*j(S-YMcwLt#;wb8kvMh<)Noi8~OQ z&lKvX?3({hN*|197oZ=zZzRqh3K6S3qqtRqb()B$fAPG1&9U1}hY3!(X6}WHN;N$r zX61)Hn;uXUbgMh^-fW<;=bo4G+AqUmmk3~Bz59Eq@+|xpEZbfSF&XD(g}7rRzRvkY z-x$xBZyC;FHaZ`$JO32Pwcq_fgen%Z7f+Abhcnu(p`uEZ`7+6KXF}ZqP#R{{QdXJY zPm+ByvBZf9VY?GOkqb}Cc!K#iVECI*%|i{D8_0Pl8V%DVSJWeUu`f|wi__KpbC26C z2QIE}TNHTC$2dnM-nUJQr{6vK{k@~W4%#u8E&O-s30SqcS8glGzt6knd&>EqEFW>Pm@mUARt+^6 zs&c!ukD3)xlKsi8WQ7p0Dy`!t>18;WRrquF+Qf;hPM-w&)%mnoX~6(#h6A*Uk(_c@ZT0|13R2vX*PC0# z_+Qbi>l_{G`wC?ng9hfGMFU9t?M-hbjqV$xTnVJkkYqMFG+(Bw?UK#vBvW9#AX8R2 z^!1#eA6LXR<*LHS+U=k)r!c=!i#a9tO?mtmiUYKE_BM8JbuUl6PsI2M8`L|RqH6G3NjL^oJQ}sW}(#mmZ z$IUD&xu|kQETjI|l>LzWL5~^Dnik`2e;Wfn_7%aQ_U{EaxK(_>h?CTx;0I-bL#t#2 zmldfl;v0h1O=wFqwL|cvYqLnZz1~^QtZzYKkn(emj^Xe*RKoeDbn!K(&deVb$+diS zre;38_kX&E7ij*dt+6j~*nc_{-Takeay~P=mBF_6>^NT#`dTeHKf(Xr<6|L;KYB5^ z1Ti5;KfII@9q4x>to-ES78L4sA?xvW{P;@~-lI14Nsh}AFGjoJuO_Q*tD*uE!dgxv zEwl}w9oA3B_!GE@4M_CvIX}Tm0za43!)=K)X6K(_WXfVhYVlx* z0jRZoN>gzA+HKZmnn4H^K?rqS7%hnOPePkc5pltML*=v{ki0(yE~fyVXQp%@!^(?i zs#ER03OiJl(9og7he|yARG4E@n*HZHL%EoW-e4pz@SEyY!=RcrLzyr9(D*N;NnO`q zl*Ac01lajqg)x8hLAk_6_jRB;3zs{_M~+Nc1q9u$3Pb!aB|OE>mvgQ1tvg5YI7;EA z{Jc?AHA;@MU$seIqLZ*J5lpe-1F0Li%z)~>+jThF>qp~!=K&+6nSqgp>?@Q4 z5ghPi@sm5zbp((_fVDLH`&7HjV*Bh!22E3BJcQpNco&j~=1K+&G7WV{+1;5o2d3xc zWK7fiL{cOE@_?vBt`NTdS)d$(bUeTP{Kn)m|k_uzh*uiCWZ_~G<(~S(4XeR!N z?v>a>vN9&xtvRY8QHBJ29XWxlF4zbtlJB14tTq`yqb4h zRG*CrW)Sj@J>Gh~kQTbLZ?k;bZd}&~%(C$>3PX28!u14s(Gs8PC3{4FS!vxW=}A33 zwQ^7Q?;I?2^;>S<1~owhmTX7c;wkyTw!@NIcf*o7iPCrb4l5R$w*}p*H$wrL>swwQ zx7aEEHza)@i7HG8nc;7X*(8wtA}oIXzxE1Q9*v$WwhrAeCS>nQKAMG0NniBN$Nff@ zkiUjyPZHb5wpVqbouAuFLOoDtxx@+MkOB7FsVY&h@`<*0yYyZ zLeuGMuY;aOq<4!h4yqorcdvZbg1!AJkL;eTeydm9J7h}T!G9arMqcogY5AWW@0I?a zUvcxHcM_SR7j&}~9dv&_l4y7;eY;ECeUWo|-;q`KV8Fm|*yR}-I^E*p`m*Y=UFYo{ za;`c5W{b4P#!j=*Ic##tAukLllVWW1`(YCfXswC+S+K@ z5pu~26be8(K9<6v#VWU&h)X$Rw|aJ<6?p#KPwSTrzZOkBX#Db$;M**jv3eEsgbo$8 zFPe9%zg5o$n?Jl5jiV4vF=ktvj8%*!2wjD#kw6eI)JSLoX31Q5X24wW0&Q;g+ZN3z zRtxgs!u8dHZ3Sg5{Ff#kSdN}$jhJOF34oomjhmU)pf08~VwYox0cB9?V@}p(pE+~( zs%VVYv6`$?{H=|h^&%U?M`kkFeAF239AAZ#N&sdeq;@=li%?Q)I=r8f_j*I}k@^HS zma(?61)-NeamMn$`eGUB|BzfP!WQmDz%GPnplZSEM`V`?DhdL*B8g&4YziGUecbTC z!|%?;Qw`!9Sy>1R<*7xvj7OS&WixnTP9f!|*5fweP7UysIYL7_7PewJ)<_Iqiha+J z_OLX&j~~1S#V8ZTn-(N$31zFM-FG$t-;3N=M1i7d}^u);Nd$7KY8p{c1E;x%^#Hd`zRW{Bpw>>D~Ls1)?*bm+=cAv zFS&Mk2JI}rkmr+bWwOEFahavbcyeq?A%i2J}Ds)N4GxcHIcLNIAk?j4t zc0^6)lqPPjc968Fagu!)=}fG#Vim}6|Kc$T6jc=!A{4XaVR$(;^KFKYYnP{jGqcHy zodCiX8@E+Sq6c5%$;6F)XSXzSNFh6`8j0;ebRO+-C>LMcUiiA4tui)tl1`9Rm2vg( zO)&=_Vzs&|faFXm2F8TvNau)Znq3cI@s^M!W5k1%%e_h3h67{y1D%Hy5A`uLi?s-q zAd>Olu<@6Gi!7IY zTX(WInyGYLd!pwsr2dGzv>ZZ@QK+X8d()Y&+Z*nTX2&?Rb?D}{%bQdZ;R3-2I7m#{ zIUi;`zxV+03+7`YL8UP7mSK1dV=AS&Oc{0B|SFI&|5zJHs% zS<^ts-_YMaU02v3a)jhM)`}2G4ZHn|Q@#7}`V_47M zPd(f7TGGLpk^iv{k>5C(IIP8?54i#V3#OYmbR{8Lzqz^ys=6n+BX(Qb;yb^;AijRN zP~(Fk_Q(!&{%G=O>)&;teJ;?!4@sZYI*qE&s5h8PzF{uo|BQ|E}ggU`Ob{R5 zW1jPT>v~wsD}NkC+btL%M)xen)Vr8Rb9E7&oq5tBo>tpOWO9JH>xXNX?euC)wu?2@ zXZF&=7dWrm=!~{>zurn;{joa{*E(a9@}P^#G>|IIIVywOCpuW%VG3LXv8$cvFt}!~ z3#0ZdYK+`UprcvNWLo^&=ppBu(2rv-0CnbQ4ZExO|E>9u{ah_!>{)Do=sqb>fmzz7 ztCLuId+#s0^z_6}J&W}3n`J?EuMrc8PZKbb-@A;Aw;Tq3!cQW5zgUxsRpoaDl?;#4mSo-0~WWI!&X-LRNQjDYW6augKs+KP$$JA zygZ^RCje+V;dE?)jtMqDLL0_<09$)TLdT;#l&XsuiqWm-)j+m2;$o^-2*4P_XZ?4q z9uPaQr0Cw}ukm)w?(lcAE7=XW;|pwAvjMu%IwNvX+c^EGrumu1-VNw`&yaaNI8_-9g}}`_$O?^49`BF352iTqxdH( zo>4y1V#>Khw~9dQfhJzV1EM9weRN=b4e?Y-rdttpb`6!c&^RR6v9TDaYxU8=2$)p}rI>H`(q?TbPAMBc!o!fX;b2`MIKV*a%3^-=ZJGYnn!ITAB0L+YW(Nl>5$_RZU8v|rdV zh?fXnq1f|(!d76^O#u7y13D2s7HTc^X z&DtW?VZod(QE|!5h0WmRg6c1i<1)qhS+c@ziejnH2NuHp3-W)3;5kBIvhZYnuE{_h zrD_#T)WAcMx5c^1%Y?XJNt`}tmz21lL46e^kK|`eXOPiAWnsXWBupa59lHo`1$}Jy|?eHiO7p2GZT~89S_D1p>g()#;}wG)a!!`79LHtudfI_6|)H zjKLuJjW%rs>363OKHmQ3X@D^WAJ3)=Rgw~+A_HhzIWuCu@Lj1+=jkZ$Z8WgjoNm7x^mt4EeK*j0<)i_HtyE1*Wpj>=Rdx2B#i7JOCXuWSu#hn-;&x54d0j_yIKX1*Z+@cescnM12OYQPV^1f; zm@VJ$G!&?ftm77IGmqwKELIz~Mm>A)!#g2(>mw&O<%+YVCqbWVjIpmL!5h@=8K9}9GEXckh4oyi z_fm#8-RvYBnz6TzDt6b_dzsNog53!QjZ%`c0sOb-r+;+cZzT72>TzTGH5R5Q&ptyG z|Gup0_>754sj=O4dTu*yi(e=@5;sm$JK9)xz(+j3vyBVTf=vfYX0T-4ooB9ar?>8| z9*j|sYPgAZ9gO(>qzd%EcYPKTGAjd+kkHub{r*e-@qddvY4JDAf9A~oR~3<^(ULky zX0PXWEOY-;U(b2i&IRe8vK@8iEWg{~yCzS0|5}=xW223&(8uGae&2-CM&sbLs|}u5 zDtgY(_dGohBiSRvJ*%PpPk(AR4lL=V_G(P5u` z*m>_c4#})hd+Nf}`M;yq{Dtv<2y$imV1?71b@6{e7i~Q~WeaL=kN3EZ?!!klhACDz zHs%9Evs-W5H~l0$klFnWoIJv>&fm&h6+f*&B$*OaextovFpRc>__E+Hbn|`F=tF;?*5SV*B&3My9>|3@~9*rSUB3hTy}qn2sa(@ zkZsHtD|?*tK+u}m@Ffu*QjA(5{ zjC{indu|N}yoE(mW&gDLL_LB=^U+OE$cS&DIa@%8qSvZIrSFn|S>Y&|S*VbTbIAzD}-QDm{u)gphDL#`;pUL6= zO@aJMUDkUtvEF#Y)x)mjt&!A{Z89()legwkPow3QY71GJy09(i1WyJm;}4L z!DQ2(*V*bvh6ow-PjO^F>m@@rdEd}1Lf>QWY^>G=9S4VsFR#&|Fn3Q_*w}g%uPR+)vnwRQ0&J*=NuiTNb=))!_xw4}mjL zZqG;x2ub35y^0p|Q{QVS3zqdJV1aqD5YGkijJZjubOCsB+^Fmdq=vOsk|HVDw(D{A z#1(Uqta2@=(NT0%{u#F_fB^2nH-{$4E~Zws7OWO*?u57iu81_vF*ZotQ(o7#Ng9pl zlw{}$9SZ2;YsKr7)D!siZ;Sb!*?0}Xk7NkP<QLKW zJ~!Nt@+$}zShcK6Te>`=d%As4+u0cf$q7y}tsB%LJXEJwjF!nRAjUaZD*D>+;)o#hu7&CUV`(XRXVl@en< zWO64hB%^w(MM)cHki>>(5TI(#F)YrtMK;=lP??3QP_S8{Fe~m14yNb+Z*r|a!j6fES#P8^wx185aJa-tV$=85uylY_zO9YiB!;X5_r@5cXpd2BX zZXz@{f4}!k8Fl|Aef)_|V_B1f%{(0}O%`(9#_?ft!9zTp0sh2Z+Q?6>CSzl?RrZAy!KMCD<0q}FUXMrZp z2gLgVg0UoGE|K`eQ|aOI@IWn8QhvS{`%Z;vYHaenywe}-GetSEC;0W=${O$zYO*i3 z7jPuWMkew7eOYh$yMk|}%(hequTw|Lb>ZvJWAFgYj2#@E;yaOb=N)x z0k%emqZOw-1@GR^Kk;?>la-ueR}O@7q_H4oc*5gQm!pgdB*^bH4m2>PvKkcz3c9?O zKutXFzFvf(^)a5y%-O&JEnzRBv@e^}Be%fT6JWYS8D+;`sNn$ny!~+<%}jX-8B|fz zJ9&TUs;iBW=O+WibYEkxKkNRARwSVJepa+Pru;1fG^`?G=Zsynt#IYFQFS!pPQOrp zu=BWlP1=3Hc;QVZt8_Q6>`v#PW^t;{KP=@>FV*lzj5rVf-mee}2b1}o+f>#5H-`QH zU2ZD2C}EZd7$}b{8wc_hNEm7>-<$ps`k>w&;uEgtoiTt%t#`N5c$Qm6J(n99_?Eqs zxw$^*yk>i!K3nb^B~IV2A4T79&_DLvU*LBS>_6B}TKc2CXo;{@$ff`@GV`paPZ)fsQ z`M$4b0&cH?7~FCJ3{)9jm12!p+&CP2q&8|ESZ7fIMVCrzRC{(`=jB9cXXhic$$(6g z=W|PM%FIB8sr=xtU@BInk5=|UL8Rz%vtMSt<%yeEW&x)#A3FOPHG8h%HvY7Vl#di+ zObF3%wrnQ$Xq#f{g^9YwvVJkl+Batd6`n@a!Uk>CY63&ZpqQnC=2bChKPvH9vGLsgT-%U%=wNmJGA55r7GQ=*(l^o%C`{u|Vmu~uVh zWgg0V+$@F_m)ort1ge!d9A6EyOCX+A?NIZ>*Ht#z z9!Wa$0Nqi283N2nA`aQ=eqC(ixy(cN;=SZt0q(Ka9Ph}5I`ZdY)*Wj`$vD&BdxRHi z>Wm1=rql#d*C!ZLuTww6m6e+jf|u%8rwo1}bRyxaTsq6nSAN}Qokn?y;~}?;;{nEL z;h+66xi4U*sZk9+4tL?0!zlqTSo_#u1pr>5HRx$9*)i%Jin$vxTkKBa|CJcUJCDs2 z&t0Nj`F+~@5LR~$%41xC5lyRn0w&a3uYN92N_%fX_4XJbdm5gN)&*!`wpS0(Ldd)= zOVzASm{v^#6%#sn>6of6U@o9#za!2gsU_xCx`wsk;!_!9t42G50_OrsyscLd=CqdJ z*mG|FDbgNeILQe&@qs5lEP|yOUETLE0ddGV%2_942!0t87B9s)myY+R*)hxm+<=Sl zWUuC!#X=-}83+$djv|S!fX3g^;MOT<%sR<^vi>77`lC!qjj@yPCk9s%fd=X~(iCDF z6fuRD>;jpj>6){e%~lOj%Ao{1va$_fT5ul6ym~2}@>KQ?#b8;3naXUcY(-W!W8hnv z?}IOVXZ0E4`sx`*#1<5N8Y|GdC~nc@r@}0e5RBy$0|r@NTS79G}p0>Ux)%!jgQ&J zenFNd!h4Wh&FcHYxNKJt-OPQE%7Ds1xY8#U5MIy_hY=MNp}9Q!C#b9vg}3Y z97At?>6a^C+{|>GEnnq^7XVozHbuNmO14wwO%hu_XNNlJ>OioGHn|mQvJ2f4-W$!T z=4F~%A1O~ds7aMcz%jVx1$&k1WUC@wBk^bLy{$2a6bSXQ`{s|*^*9fOmozm)4EjW?|33`+rZf2KaIi#XVR!>yHHENf?2<~<@2 zi^VUPk<#N3QuvH)Z2hA$&pCR2bKK9;KM?-wNpFxEn?J+n9UbET4R-&V*Z$Y9-tPYzp?}Llw+Ha0gBYb`*kGuPcI zrMIkNb?5kU0#)LW>4ZV8ni_Zc;<9 zPalLf2VIR5bvtdv*hj+&S{&qA-?UWH3SF76qWau>JxDIU?VT6W}}IfuZrg z+%0iG5+uT}Gm{!NvI5WKvsAC-f*WW9ml`&D!c-D^xg#>yDdl7D99wFyyQ$-^u99lv zZSgT1TsIg;Jo2W$hokrr;7W|M`-eUyUW`}wT@xoI*J?l|#q&m-w&=A4H}T4d zt2pjyUhZ1{H}L&`mmslP6dE?6vg)SXVU0FiK_Ejs5z>*c)Ls3yt)GikI7GEL6dXAPMVkj!Zyjm5)kDm z`wXn}8Q1J~>h@Rusx2RYK9JaMZp+E)r=tby!=zW6)q$EQlTYq!3b+gwul7|lFI&jh zl$L&bEZka!YTRlnhD{1v7hRXn#gSqG@Wt+}+Rx#AVsdG>R+weRF1(H&%de z5slAN#G~C9SU4k@WIWAXvo`lgf&@kBzYPbPI%3pdu{Po(Y)@5PWj8>dHD@u}Y}#~1 zQ)CooI_8a(x$=P)Sk)TNdorJ4rPvNx6)je}NzPt9GhH(sv#*jg@Q#9@n?1!IjD05P z-!fmatiE$Z%J4I&gfI-gNqU;@t>SlR&v99b4JJV`D=(!PqHH{F~@#|kiX zi7JvX-iZOw+W9!a!VvVra3eQ#woTPErKtop`EMC6uUqRZd=oI4=mSMe2 z(aZEFea=8`XaW*)uGTP<)r!3&WuvAMb9eN^=)#*gd`VRz$L?I2?X|@uAbt3`io3}x zmnnN9#^5Q(;^{qHd{khWs>Rz%Nl@qbZo3t4Rz_T~#kTCNuHn#J{fjy&;5wnKTK)^( z>>C^u}u!IWLNQs28$On6xpf1yT2FG&6G^4N>~ zWe~Udc^s}PE48k$O7uE(>yHuuP@tbt2I5G?R7hd7!puP1kF@Y{WwbZWjh3B^(J)8n z!O?vpg3A^c;ws?il~8d~DTwF^VR4X=wi6x=E?^?}W9H%})cj1M)a{dl=NLmI zC5ycMzGLEkgv_E9Krx#VZv5dtmNE9f;#`<&!HK^$E$EofJPQEh{OTD4N*Y zH-C6R{np16qk?}B-M;W6R7g<#BkmaP`-ERS^s1ETobP8~aO*4&@2YmEXQ6=dn0Db`8x!{++qYq7y?Z>u+;5 zZfPGcwVqNp9*;M=sTTfXZbHkuuZu>NA?tU(y&Y#$TIzvTA(LBy;TOS`nG$tj0+!K< zVahAD*HVx1P(mU8vy6oM)U&6eIjC>6^vi>>lab9MRmM=Q^ViF_GL=!umZX%PIH-DD zoEPzTnvv5F#WdH}e72bmxofNLVmYEYJlRtYQ|Y9qn!Y1zM{6-Ttn~HQ(YBW{7a?0A zfz#jR@t*H)9lY3ee|37SLg7yxD$mB8fu;DtG``{_mpB&JHT+vfEI+9&`>=U@36-Zo z+8a)LOCx)>adTM7Apswee8wmNo(h1H*2d3@Y8@j8{@mvBXh4}>+{W(>6bfh7d|7f4-D%9pSv$t5|4qYltoZIw=D&t*__G>H47cr7^q-TX88(PXgK1iA zYX!n*r#+PdhDWOs#8lCPer-g*SNd$J<=1Mk(W;bisge|?LlEzN9ehK#hnq+ic#~x> zSX02e|L&?d@;v316+e$L$Y)k&|BUwUk8cz>(%g&xPrLI!ktjz9J8z^Jd(W5v0s`!w z*t3?o~I)Xxz@u@E?V?chq&J(jlOgq<8B@(jsM&-FF5o1 zz--+z*WHY~+w&SsU(F+Wjk_3kX3KGQ9PoEh&p%+FWyf30)nccy80BJc$%QICHMS!8{F5+xu+<}hAer6CVoNOXB$drc=2-IkS=_31@h9;n4- z$D^&TMl14zH1eiwHq4a_9F~~IhlF#4T#a&HCi)PLWFMw^;bZN{vr){xo{Rn*ra%Zt zj(vkfk}wprvtfykNfq%bGIh$;W*Cu?Gy8tc0a{PU=r3_Dy>ZAATJGFd2d8Cbq+|e$ zhF(Vh*}`FnjGvAA+RL6w$r`a*%6(Yr2w8{>Alx@NBdzKq8PIJ!A^W8pp9B_jWNT6p z#1NffsKXJ2^&*8Vqz$ZglyaD!GYW$QSP0@9oUX9Nx@QMs-QU#`@LNS$9dc!{%OW=m zcexh`_?kp?(JI3A<$ow>JLJFGa_ZjIb}`5!ql`q;G`!pRp9Wf zIX-=?;>}zF+>zu}H%m-Q)eVA;8GW3h24Yb~NA_}NV}d12TEIde{xjWtN1NR8yPHv3 zf|Ef_H-bc+9?}MmZPIhhZ~jdKlC{;qIJKzzRr}^7 z+-%N;@@Ivyl?eezh~uG%WD0W*H*LC-3Zw8y$2J`R$)hT5#flhn8{*+tAj;x#?1{fJ z71RBj!E>Py*3NFVKgU_1j3V8 zjg&U1WM9`aF2gIsd>FKDL=PpStZD#YT2V*{=iwK^u4+4~rD*#L$B5C?EeNJfq-Ob_ zb4~E|wYp7z>p@ep81C`=7Cp2n#l$16j zo29K#XTq07cVfa9wb_f{rqwyfrR2fTC62YVFUlbFVhDGH8N23l!7MTxNSrMNl_a8pEP1>F-7#-qnmKF1S7wbuZA7+M ziJL~>)l3K5G6cB?MJ?f9+y8; zvA{MunvSW+p+`nmJNZfnQ!cw|Ad7HT+cGm~Z^mIzc<#L^o`yw6JFnshP@k9-qd=SV ztG13lZc)-Uav~ZbNEX;3Oy3{Z#SIwlU`HXD>$NH;!f67r!Nqm4m$4UcRDt8aw_Fs@IK2oZKM2+MB3t7Kg2pT}vxU;_J z|6Z<^@@o(>$x| zF;fZwWjag9i`QD2)ZK;K-`_di2O58O1Tf z7F6e{3FW87$#~YtRVP89e%K@*9k~pV*VmWqfqo1O_)crz%YIRYmTz?ZJ%yqDdO(8z zikzjsoCFVqif{)e{j$Ek%d|_l$2A=cnVrUz>Tv80_M>R+h-lSx`+ru9|A~0+9k#JG z?oTDk7_y7qZCbY;XZ<8NL+*m)MXc`Ma0k8TIg@Usw1Ea*;vEQ>bMMH1IcfP5t2JCd zb-xg3xz!_mXYhD_M}O8M^;hW0=Px|YRos8~aq@F1m!GNDZ^KcRO=7M4-IlYIfe>0~ zV#n0{lh+UOsokR!Bg zKE{PQd_0dlgYrVc6Di*De(;QPR=VpuUd%*zxZ7ZRc(wwZVlCNM-Z0Uzn=O>Zx0%$;0u3dK+g8#kO^wx==NpGI6Jk|gSmwS&-1B<_RZ zL>niYgK3Vi41rN1@&e%J)_nnmE)TgN&q*!QYm_bch+I zp)SFP$^%u7QrV14rBrGWRW9*~o!*7$bE)Pm5Qi#_ul9x5VMN>=Dnn2iUsp6!qkKd# z&?yPi2K*xC()|IWGU1x^T0B5gP)Ep##;sp`4%bCs5VWB+Yf?ew z#&{{W3p+zSD(7J0Xrxe%kf)WSAuM{k$1tNkMKsW%umC5+V6v*G4sMP#w=Y9-r7=pUsQiaiq>eB?@-BKG9|p>Uvm4wN2!laPJ6m*5#t?&Q7I0!7!lyWtdy-*eHAIk zFne@?@xn>ZmIKd8<5h(PecB|U^c)I3{@p_`pFRNxnHpwM6rUnWgs~U{Z3V6!8m&tO z#-wD1sq!i-8NT{=9bsbG56F=EhCvrF)QwRHJP3a`_*x--YY0r6(t?dn6k%;(U9PsTpwCAZe(MMX7SZDU`;Yib&ke8R&TXUt?XCvOYekK1Ln*Mb34ia^!-pG zM{oVU2CHuE2bCKQC8ib17VIX0rUVmZjWZG<1c^h+l%k=bi{Q^l2)kVUc-637ls%%J zu#KpTpxd2?YUY#UW;*J%o$0l8g%|@}zZ71?v#Jcrzo68h`{PY=d|_eXd!#s(d!^<4 zI`rxKpT0hO+QIa#mezy5aT;vreAM+}!S$Ra^fyb;k-Fvfd;sN*&R$+yifyQBePSy3 zdA<19`D$oz{{v<1dCm5CbY5cLgExo}W5Q31HKk83BzTGiyk&cFb{6`~W8*I4Z>aBK zmS4cse9L9P)BN7UpY!i5-5c0HT<{RqgrKJcO>3~vz}k@@ z0PXtcMW)M|@#lKypTi5UB5QvAUo8Mt*Y%ct9X=S^F)!&xAO0|c993>l=`5VyNV{%! zbB&d6pLY_M7JhAyLn$3c3hUEg1N@$=-!CNizK@$@UFc`3Sy#%7@OBd5)XTm=I=t0h zl$3+ouHRF5KD)L-34(hK~lS*6f6Q2{m#OO*{_buoJl3xdJL|gUdZ1WA0wZ);I zq|?}^Ji9ez7WmzG3Q@q2B@!LxPqsbT`4i%SM1Om(3Tdru%5Ohw-%t}@dO``5K$*L? zEm{$dMEi*EohtT&?Zvei^+DXPU{(K|%MeGar)gJiM$g1X{vMB&Sgk(uoRh87{^c9>HAh`6%b~lH!lfJWYtCs6EHeMUKgA&edE;Ch zOy3?BCAOM#Bk!nxo!;GPPbgE$({n;XWMX-aru0?xT^@6()As&;l+(wM3hB)!35LLe z&`>yr7uNl@-OoDFDm?T~qZZa|c@xFMAuIMejZu#+?!_qSXMuOV zexh4@3huzX#ZI>o=m~MU26uh^9xPr+YDQTGV;iML8|0ldn-!0Lz%?8lV1i~s{BZ{D zBX$=@H=79w;ZPVQlwXSf?Yh|yJT4^}t0 zoeu$uWV5GNPP8}Zipefv7OdV{d=n2Q5;FgGj1stXRGOkI&!9M8bVQXT)(N7wPU!5Fn zr1bCJG%MTi4%j%HgI}I!+rSs~ado0sXMUvuOMjIE)|NqMMr9u~d2$T71x0856))0v zn+Uv_f5;w0cB{Wq2G|#u@r44`s-JN`Ykr)so@H7T zbR=qJR?IFEVeWSIrNVwo1DBd@L2IMZ%`T+YbK4J4v5$mo&Kw8>ty*^k8LqFr-uVeNQI{CJ`Q%4GsC}BliJPW09_45?SCPrNX0njr zZObLwTug;r(V-((uQ_j~AbgR;6$-WTf?;?KexGKXf#vKlmxj7a?kLJv6$tK`w;IPR>m)k z{cF9`^tpitx@|OMyCw~EA6kGbI4InL$!%NM<7P zBVIA#I>4|UpRg+Mxy-hh&x3tfhqRx1(Cu#~@nZjh(`e}IttAWI{-ibcvSO49i#%e7JO2Y2b!C5WQ2l(6`W4!Bxf8@2%ZW zd8-xV=VIX|;U?cE-%0UxuYodG&JxQ#JVoxGcpeo^-aF`?^vX8sEO(367!%#E!D|h(~IGD=s3a8X59#MOvPhd6Ie>dS7qKrLV z#`yHJ$n~P{)9I{DQ15N>O=LijFx~eZhlKkvzoMA`kEXK>imMH_HSX>VK7+dl*Fl2^ z5AKrS?(XjH1a}hL-8JOHHMqNTCgQuO9%MaeUNN7tULo`qX;Ct%>+U8-*vw!2SxnLnC%eu zjQ(3br)#UnO89RPdK2@62KOZem|Yg;u1;#tXuUGcUW1t0i$W*mj|ExK$~i zpIb3zRLJz`aA^BBTBVc&Ta?6G{1<`gLiONa@UKLC>Md#;O(A~>@f`N@tTp@`6+mn( zXUq@EF^5YyrbcphF0GlJw?Ni%34aVk6UXce{N<^wzBFAzHis)^IQO|Q9XX#?H$Q^= zP!8SFNisW$j57&K7JaS2LefOL%HUN(CR5&t$)OpPO8guAO`@jgMVCmkoa^2dP{>5J zyRr<=z8IV|UOnE>%YpNSYT!~miwONdDM|2bEp<*m4#OEGH!E9PKf0k&&v6((AjD7%D9u7xokCMSXC2nrWfv1yE z4K9j`m#T4#TR=|vaNCTVQ%nsR4Idu9AIoggGYrHjt6LbD-dk3HU#4T(1VPmeOCNiX z?|)+_$qbrJT-s_Z@)UoC@ucG4c$v?O^4&y@h#B9I}ML686eBP(baD>FgvCy@aTb^T5` zT9D5@Re^kr3rK%-)=?iHOKK5|B?C!bJR>0U7QPIJ=4izVeg&;-AImACph6ubsF?pp zFptb{sJ)Mwja88`N4(8 zOInQ*MPOBG=!61xkIbXmaiN3L@M>~2s32KikMoXI?f=jnyFewRn+?QxsgbB=>0D zdlQffjahHA?mb-VIv22Tli=cs04t|Ot>a2XN|J*8Kpamd-7n>U3^RZ{8OvUZlR;GL zr@F7sDPK6++bfG<_1PCa2dW(pN#Nx?fC6PL*E8?wc0oBG#r=G(t-jpQ);te#GH&f@ z#Y}SUrhH9Pqc3%Mdv@LMJjgBUKYi##@cUQVVK z4DoAt8-9nG@tyJAADpWrrWJ0kwvxuQ9bg{GI1 zG;cPKjEoGaA9)P;h-!{kwno_fkcC;hQ**|vVDYwt3TJ_!Zau%^b%A4w8K!pl^F|XVnGiM zJv}HCv>}CY9>ly8i1f?@G~#e;pl*0wf`4Ibt{d&|9(6g@+PPN33T)*oLq&N0RsWjs zPKpmE#O}lKXCqH|`xiHyvQv@b{A@ppfoZqLbsE471eg**e`d46zu z*Yw2Dds-_Iv%h5Lo_%YKz5jmnRpZ00j?ZpW&yef;TBk2QxqXX}kl?_i$J?>ShVL%A zUH7}di1|PbpN}k#fl^LNJsY{!{3NQ2N~W#Qx(M{x(jO8v^#7C zq)W^V-u*@%F_&5z6u|WHa*LO_;U{3r>qQjHW6LrWmcjPXi6Kl~JTUDhQZBpN6JSdo zCQo1RQlIhg!9srxH)>gRe7XPb2iakFbvOr7lP9I1Xj=Z(twrGr`S3j`z#v^F$ZRrn zDPFC9$M974r#8pk;lAf5e_)vL*2+)YoV+{}i^-eGqay|gQr5g_xiclS4OZYDE_@`O z;*0GY=9#2FNzCr3&CjBg>(fG519mXhR>&aY^-IvF$>aLn-hOeqQg z6oG)Gc30hyy#y8_G)-30P$#_cHbx#-7!NWdK^WUWMc{W7Pc9&TFp~)3BiUu`9bAmtqy6 zGR!qtSrLgJm;8m1P;o9g62l$o{6maSCnIGdvlEjURE{fGpM#9M82n8oE8%NPlpPF& zlW0Xy&bPxgWUP1W6gMfrDQTAC+T)$%pdmy9q)R|EKw<;qtv%!+!c1b>%ktSs^Nu1( z2}Y72Iv2L7VE=%#OyVmJ*Y_Zd(=KWTS4K8i=#eJ({$_U#B5sOYqP1C9UBxK+W2zWr zA^wFj%c=@|SasJexV1`IF5r$yx`8~Q7(%dK*%O)|0L0AJby%VYU8btV+sAq|nMi z>e04dyu+o@jxePz;dyC$Zwc8V>LWONMCO99*~7Y>ESQNv^gv68vx`y{PXTHw94yL} z%{V|@d$6HOTDz_q`d149WJ^+nq8(IKFfSxR=wQ= z$ZZT%=g^MOlVJX%CA%IXk7GVNjADu#=h2cSsxn$x4zs-6RIeUVkbmxIyjlf24tz)_4%OM6}exFROP%hQ2= zq+m}`ql^(Y6hD~6KDc}Dg?#qojIVC5CcX5P1VauQWo2cA+sgjWxDV&+yXWg2Yi-Wq zg6`)I59j(2s`^dY=p}=tQmw(?KMGcYg%jAGrelxGv$5JHS#Y2wcg?G zd3%VxdX^TUeXMEnQ`hkh@-Fwj5cYe&$-imwe`$F=wG`bQPh|>!%J;Vf-#T72hKAho zK#NBdB)hO}FMo%+mlM({?71Zn`YHGfZxo61lG^RZf6f&VEh>~Mbg546)%h7&-{63W z?FuiCdif8l{YBl?VqzJm2l40Xgy=g<4B@Vmc`nbSz2rMTA*^i^J1@geFe=#GcYZ4K zVEJ4X(Ug80(&bt=g_Co0qn58+inup=kPNKmu@KiEH!8p5{y=$K{3cfNLz8mpL5U2^ z;%LGLf}7y~5aG_d5jQahIMAFd5pps(ImX%m4ic8@>fc05^k*GYx6fL#c^8VK*4wF%hMd*ojM_vVuSeTLnH{tE#4 zV~@O2IOc-JzwGW10(T-G+NSm?0gqhlCH;o_FQ~QgT8|&yj{%qz8~fWn_jd+}-Phv2 z%YAGNsBHb8!M^XhY_8X+4c~DEv-7eN__t;t5j;dp^2<3HimN_E3icT;Iy$;?#urS` zrlrs3%}0LdfbaAEW9`Ln1NKWuv46h|7cHpleA>@O8wki0CV30^5ZlY)7ZKUt+Y8ZU zTHwBVyfP&h@sVZtdpY;`6*8df*75iCHK(r8<56EsL#IX)QrP>F2dj|pjQ(?rk7OVn zi;EX`rNl#|MXr1{2^7WRQIBEn)&{28(fon^Ntv14`87$)aWoLs1E+H|RLXzTAJ2la zzYyNY#mVX%`plUPrcj*vLNocNJ?uodhLkRl2lNN55O*3xltcfv>vWjpB1>l)WinsW z#SqyUA{H@fc0rKPWR5pNy+Dnt=71tzC_OBzJ|&AItttVJuX7%Kd(jBlPsA_aS^L}k zXam`!QCnb%eD3GRHL19bxJ~Y@Hi*MiAu*#LT6}N}km7*T?eJd;<${@!8(rk=3Q9Wu zHb`5XCeeb*oQe+X6T1`^HT+)QCnhY0IXqfbHAXxfcw-ehrxv9{h6sQxyf`JzYz(^u z+Ex&oVQcXFIXeC*iewl8TwMC1+@*Yci;uPzW)gg9P?#%6$frTTHhw$;cMysReXiJY zC^HOd4_6zPbiFh(vR+CKoZM_u(}eT{`V9IQSA_y~O$j6sXQZ(ATXr#P_N<@0&Nt01{@E94PGV2q8^f? z8i5+J8V9!q82#&mDWXpgcJJV195F0+h{^bD$cmSzhN?Oqwt1lkNMo+j3SSLx{NrNX&$aY`??9U3qA*H|Imyn`Q4xq_(+)mKNliTH3rq*KObOwRASD# zbNhUx4^C7-p5ca9MxFpEQq7CaOC(Vat;wzRG7pDv!14$%Fu|$AiK~#t51tP15@m4o zOXol@gb=oQwCJkQj(r2oMuf&}euN_ZOU5T6VUL&N1L^a|3i*baag)$ggHLn6nTK)9 z(j-IB>Q1QmWhW_2!1`-CQ7>yRZPHoT)mvi>}#@_ z*6cCSo*?LYSX@RdLn6 z%mWdkX}NPNj@4@xxxWmtBi^v8aeay)03(<(vE7S%q~MdmsWG6iT3(BuG;E(X!Z2viK49`8o0j zzRd+cy#FPgR>>7W)P$`QuL=Y$R!%BbusnEe>lf^rNff0|a?j6;?Lp7W&IgJkHujgt zipL~GMdQ-5h60rChDp&YVl!fh+O;tl^lvFcNs+Hu^{l2Y4^6Twl@w>y*;S2>evn^w ziP?Uqnsb|2g_Kp%wIBR{{fb`6mhQ(T?iwv6Z- zgXj}vYPBo>Bj@4$xlR?kyVW~=^8+GnZrAw@h4(z;8?JRtG_P`8xIs3hKeUWee5o_minwmjd!>!HFoqssh5z%)J z{|9&fk$?J>M<+<~41D~3SL&?>;kD8ZjFLBs-bKf4~D{8%su=m#zZ_6lBw2cGa z`=XpeykTP_gPIIBPomb#I42$YOGvHz@!NdaV)B^gNbeQm(W&;tDcs7VIw?By4Xeahs(BCaWs#jcNUB5g(gAQ%mUEL zWm_z|+XV&_bm+Sz;%tEx#vBGK6yq?^X}p1TvJ>d1T7TVjftV7;-f1 z1RiRzu!j1+(#-pvLcb9hd}Aty+bO7ccq=As7m0@_?1zTdPU}xHWYu?LC>ERf2#U7Y z=FIEdNB#PLS%B3!$D?KnG=o}i54R;@^waz`XhffxE2Ea(-GwjbqF#e;Eud%z^Pf9+ zQ$JcT5FwSEDSQdw?{yi#G%`{>l1OE_;kg^DWe4R2De=Iyn49&uP2XpRmW%b>)6dzOAFnKDq`gsQM5^9n z9=r{22lI!UtWO<18}}xzsDnDUbPc=R`%p1N+?SEo-YECq-3Pn8A0BrpK1L59M1Q-M z-R*&JFx)4*H4olnf1>-K&AR?SWd7I&`<>~LFW;{?|9E7!US40Z?V8W{W6ky1?|dG8$!Yy-iBDMgZ!}vs)brr|^768?<}SwLOkkzc+wrQC8Dz7Ed-510d7QhygoJ4><*e;XGb@joJjVnAUr2$Gntk~5oD)YS-S{0 zWU6RRs65!2F#G{sSbIL?2jVK?O-&)j1|BnvB;<%9ohy?;m8(J=$pP#^R1;Vce3N4W z^KfdgCN*jrDV7u8a;h1;Vk$r@!!uSVuA;GUJfMyJ^Uyr;@Z`#jRRhSTxC*a5)LLTJ zftqnSwl6qAdM~VO2{}m;a*W5%Lc)pCq+d|T3{ta_Ie1lsz`me8 z_DQJRf=zZIfd;A)2@W&-2n3WpXd(CL;5KBBvi>+4XLGXZrg_=*V2O=}iE&Ldxr0NG zxN&|{5t#)A5#D)K;CJ4=JCtPnLmR5?PchCyj9D#v&5{dfCVSN>N%U2LCbrU>^>ag@ zP@(?490G*zEg=Ysa`<6Ov+ba-Cwo)odViS9_dUe4fqGN~ebIo*Fj>$MMzdQ|1GmM| zAbfu8O0ze1W@;MV0@{3JZLSgAm}Qhkz5F$T9on`m5=+W=7sv2uHq8kVjaYv8Sn#QI zl0AJqekMG*#43Xzp^zZ$x@nlyux($nv8Uo^*k6DMJxUFUBOnM{uVeHFdd-bG#-qUi zGA$!67sj57#Ys@0ZtC$mD9TErxxfsMiYEbJ6}p&*gn+ZbQYRQelOa(EL?6d~zzLnNLODgh%-F+3orM?thUC0k#yH@3BP zL27T~Ru;hpXP43eJD?vm+5_@lv@ph|1fn~xBWxiyd97D0L%Gg_5t6C)v54JPy{$Ng!e(h-# zmukg|RqeWM7|kRiPJaS`HL4_vT3kbp7w6V5JcKja2sx|2x}RAhkE=~0kEsnIladaU zF3l>zgd^aR4?;7NHI59 zGnZG87g04BCFhgzD$c=@ZL?PbFp)+jkec{26R_^8F^F3w3Ah3eD&}2kC-j6Xl$R~c zYRE>*%*n8QkX@9f1o0;h<5Pv=MQLy0f3sWX61Q&dvxaR3YM-;^%WnFJ>;zm8{F4bF zd_9()w=;XTh!=>AZ=7d2+5Qk|IWk`~-l}`G@bSL+bj0S@r~EPUu~pXd2;buO?z|); zQ<5WBnm`DOxyZiqe_s%t(>2Yb6?u6y^?RuM7`OkpGkn_e|FHM_m@ssD!SrvH-CXy4 zedOqQ{mb@rSLd%dovDU$a+me5bLd3+UnGaJ-!(38_S8^Y9gaN1KZ)Ughlu|HkJ(S) zc7?%dzZhtorU@TTR!#CO$%Q;F7kMx|g5I>D#SSkd6Ip@(%@jr~!8 z_zm269}1>B!+Fv+{C(tlOwuQNfhF`j^k>}>%`UV2WteeJb4#5+=p(Uj_&)qa>-E|8 zG+s1yn&uSK@Y}^M%fz979yKmu;n(|%2MwtXpi^K(LXw(%;cs~|8Jn8s`@!tqA+Hky zILPDGu2Qg+xng0HYvo$Bglx-Y!PRZ3duD(%X=RaNO^68x2jTitCro+?*KfpBf3k)2 z6TC9Q)t^X~vO!BH=row?rV`^NDJqIxLX9ft>7KYS5=EP3@A>Cvy~r1Nn3E4hfn7$F zRu~pRne)u-6BAyWf23gHqtE-s2J81)2Y~=2Ay-wgPf6=9r`?1oA-gUdf#JBuE`?%} zMIKK~unCiY0vAxS znaGyAarhV|xrsKT6#q*6xFW6@Z#2hZQ{l%XHvye)WZ`qaZ21q;z1HJTame~()AvDt z19JUaTK(ImLDpa*G-J!36UDweWc)OcF& zZJ@@RPFvTe`I`qXP!ONsWIy*bgQQ$M4^Gj0-b^y85O$A>p62p&u~WNQ`6uq#y2Fe| z4IjQe{f%mq8P&y8d#E zaYAznn)s2-87Rm2#U?lA&-7KhR90eTf^Ha=0PzAg0EIr#O0p>szybFiPj?x&6`#}z z)vR*wX1q0cw}En3Y%WoQ;S}7_5@eOZqx-!8HB%SWAcS`Mn|zwYHRQe8z$7-jAEKD~ zA0r2XTIW|63K#}RF$?Dmy(aqG?W(zvfRl~mu@NNOqOK)=9PleBYlj00wl5+XnMWd6 zn;9ycelG-1T<#Q^Wj~ZZrA9DWYY_Gm)ladngqVEN5T-mw)q)k}=VeuyFcHlbYVD)a zPoHQiD5*91n{*LHkhsR%00)hxx@o2xjY(vtE1Vc#<$32FrQ=bL$_q{;%zMqi63BH# z=nu&A{QSzYwbVSEp&U3MkYqKn_7`>pMxdY&pa#%rE6dX?(D>URt|oB`Dcg!lJzc5S z#h7yl6vzZwyyl6m4oGq@nNMhh&t*>;i&>6Y&RHh?Qu?*v{4Yy$!Lbzzx0E}cF;|;L zv!>Jc2oiXiQt!O~Dmlw2)n=WFOC!@56?=VP7i3pzvA<$#7*#gw4|H)JM7>13$Tp+z zlK|mG>>@a~P6Gc!lik4K)&ty?RL%j8ltq8K1|j~p zXHM>MVEW?f7=Rvy#gx;eB0Ak-W|6*}Fl^KcuhCN8E&`#DYmdvs2P_+gU<^CQIUM0# zh?9ex6V4$6-~Om@i=8zy+Xy<~(a|!~%tf;&wkOW#G-s@5nrv$zX&(u4u24|5PDG~g z+8oiugNu%$Gz&~Q+7m=jL(`{ZAjU;Hdn^CWN$a@8wFgMK8Gr8N1Sdo%Yl`dgGX#kt zeQrf>jcqOkhw%t$*(h}7C&Dph?)Nc^tD8q9F3aqvWmslZafNe!W=EZb`6*C?@Cz>A z7*WV>d?3w3o2xLhFWA*g6B})Q6B-Lc#`&F)D9mk;jF@ z#lg=zwK&r8{yrI4%Q=o>1}^k|sfhh`;v4-#TB8hI{41&mb5Y>H?Hf~Mhx>&S#7-4~ zVCet%db|Asu_Q(y?r1!@@DAiKCggP+v2Hh#$BtIdFS4JT{~FR_zrK_EwZ-G0>%rrt z>!H=@j&Ve*&Z#kE_m6>O&o_*t`IgcMBWIt<*p|)xgO3~k>%-d=L_?px{PT^!3z(vN z_mIi#qi_l*$gY>i)7|)rpD?~?Z;*ZO(=#(qZx{YEq8~S+&JcIwpLy}szRQ*4;dNT@ z;&SP$y`z(pljGC{PUSdXT+Hs{X`#P+t6;}^+`___xcGSDkwm2+(Ocv~+^UB>1aO*z zH5qiI5}qGb_pQM=*XG3i!3fvM%vSc97dc_#^SJZO!}9lTopxyD+!ru+{b}^)6XE{k z-*n#jGCpH}1}DaPxvZr&PZ1*yJ&&F|fkcOg{Uk*#*o^A~Vhusr#vABN+$TF_!eg5R z4PvXw@3s#j^@d9VUryo#F3Mdegf6VU_a9@};x2L4C^i2gBq#bwT;qB1JU>!ISQI!l z>eXb@rPCy$+oUrHBa>a+-F(URb*JLyh9Hp6Zut(hkak5!7cMSi1fzc9Sc1tPOY= zoLE-Ua zgZO{D9SXOyT`|`M8O&O=Vv$9k^(8Y`;U(qpX}GR z_DSq7&&9d_*%xRwwva@^?(LPS{4}XOZ-(HB=l<~+aur+Vq5_f(p50KY`RK`7DqTo5 zdS-URcF&N8-Ei2{CsGVQ2Ai|p0qdZYXQcN4dvtL=R@?WTJzBXG%E#SeL3Mw>*SC^C zH)icSOc(D%F(bI0`(Oj?rVej!QI+wgn&{%=P-#GV_=w(lN;d@)f0Apew);|L_1ntz?ii!_k^?m|_AsxnlXKtM~HhFy|6KHAm>Y02o-zF1z-vLX4~M%)OO4xT`;h@aR+Czab=foH2r~hr$i`Vp$`91+ zAd!b=bQtxPMcI{p#d7;EJLe8k2)_Tfy-TcyN{jpr_Dj$W{xX6Ye5TPXlq)yq+h$Y@ z6%IWKm^Xxc&SeN(Bxg#O+C(!cB0gDSfhJfu+2*a5zzP^u8w2c}nz=gJmGoUfPmlET zGO^%i(Qk`4N1Zc1RdeR?@Bv0S8v(R&u150XW_>XnnH(BrH0^&4<+P_ga7t(_}NX8N@v;Wo&ni`b0r zYaRL@A{&@mh%pgRn`#79p=Q!eAu93a9S!;&Sb@BHob!h`7d_X=oDvtU<52S~d7+WR z+)^pb7Gy3NKD1vbCOt|8u?8eK%o7GPf(rwp8bJ~4zZy&r1eIWwuEFwxn&u{EBb7*A zozn4IqZYy6;(Afhq2ujBIRNK^PAf?_aXFUqoYLr;S=yz6s(;WL5kzH7!Uu^hzWV%; zRaC*t#OGJ0zDPr%K|?S0td9lXDcnphakAj-$X_d%P(}mgrSn%w#HN(3XWc z8wB3~;G)8}-t4GWWS7Ti2-t9=nlm|P2Gxy!v4$lE6g30CX<)kwA@Zt!%cx%)+svsm z2FM=NCAy7x;0sM#`zeK(S_Acvzhd7Y8yKE!h|D9nh%Lch>r#yq52js`4>E9s0uDKHyaAgYUMj@z z!5y8yV2hEBv5F}frAAf<6yfWP-e$!V4M#MfSVmA{=TiUjM|hJ-RG5&A&jg;?m;M;>(5M$;jV!htW3EQkO{?CyEI;s-{0smay_gb+x4)#`a$y zS_i3{GvIfTl3nH&{v0lriT@|LzQ4T?Zcpb5{tGlf`U(TcE9^#TSnproun!q3(1N_X zS4iv~?uO9~dOVY}A)_4W+dUO}jNpH1UGTMKK|ur6eIH+~k}{Bxg} zSAB7%zN4I6{BSrX>}%#98GU9xB4Rh#2zL)*Vup75J$c+VvEEZf&}Ttm!-;bzCK)p% zPt||Z2OH&HZps(_l(!wAxf1&^9jS}^(?VBKTcetx24$&WY3N1Ec2dJ?N{~Yo&v{4= zE|e4RT4i)Vj1@KuksDuvi_E=qW5o53U3 zokextTN|-983I2d%{HW7enij*H9bB{9%_CB_vUWcU*~Z{&7CN*!M>E{XNoEX;Ec}Q%{uOikBtxUr{5czU0uIZAaxUe zhRFWJ8MO800ZzTpn_s|iQ!(C{Nao;qnCeDVTA-RXswRp0781RvV1RZ){gG8vV4o_oNAr3-WzJFXB2pCg^UsvI@JB$uF zt^Zf8Nm}z~hSm|%S@(T zj`PO`IdSp6eqY;~`cEZInQIe}9tvTzrcdUOU4C~^UwYjeUnTvE|Jy=j^F4B`{VtEL z)u*nJ5POCv#lWJ!cG~9L=t}ibHvpc1TF& zEQaeh<$;lGALVqy^Dq1%HFoP30ezP3eQoq2UYESF1vp8Nr>T)eO(;LjFXUf1)X3C9 z{5Yv0pJAq2EwZR>=*RGDqGsB>zshEo!|ug+C1|bRyN=`Q6Ej$5pzt8Lecy6K`?i;( z@3h}oT=UDrIsy}{X~i@Onv?EF*(Yv_=`ey8rvmOKQaRBz;E8JnRpS%G@5ju@XBR5P zVQHfmcqbBBAi7e+5SINg>UTKOy#5lF(*E;ANrIsB4)Y#LGAt+6l_8~sei&p6u9{rp zYZ<2JqT-UHMab2`%xoq%E*)(R+77uKJPljQ98Ik!_6a{$P^!V@{8Fz}&lu^-TJ7|J zWBjYsvv?V!WyC19`a{(F;+}aVF~{?L4a8Z6;vNJi8-O>~UH>~PGi_Mmh0ntFCcbWJ zSHx=M8%?IPqT0Y`TqyB1wgYu@>DCt!56x{TGxw&JuQmz#n+<%9V-cg}lcSTPqZc>j z>InTZHcaKgCU}5UV*TmwgHd7irL$1z=9l9}_ze<{h;kC&i2u^opR> ztQh2^jQV1&A_+Y-b;WDEbO?yxPKb+-?LN+tcEE7}j9VKv@RTk`6szPm+Tf{{P;->K zTNVY&>srSu#p@v&rA6jWE0}O2ld3rO&RSU^d5lQ1j~%Gt)gMk$^KY^rOO(6mS0_g; zXw(;Zs1o5()r=e$SRn~~;uc7ul>aQzY?D;8*sy`@DnXVtP|-I%JC;*`&tyeMEpFRYylznr70@yxFaKUJiJ$f3=bPPZaGL-NUOF}l$+!`io(eyNdJMG{ zg_b820|SV{ODPf%8^MHc+N&+c+*8^>oO?H}?P}^$j5CVj1A3(OT}$*NhJq$S3T*#U z{Y4k#f>Ij^kE|o&PnGde`f=L%q6>QY z-_%Rkp(^6?GA_}wl6pun!;`!K0D8Pq#qwrUkh+y>h8FDirXms|coMBan?c(^q(OA5 zmr_mo;-s`7c-lbLLu;dG74X5X>KYw`<^f z?Jy?gci)EDwHUDJviP}tdEaqy{SNU&!h2q6C|SnoK2MN7HxxxTYEm`$`kP!?Of$Jb5mGld7u#5f=DCb z&r2sf_T5jkqEBPW80Y^o7LYjze(kr-4vK|we_fQSsW7#1ir4J5T#AlfBS9V$L8<`R zjeaRLJbJSo<5j0uGsn{#A3_8EC8yDxJ2FQ7Aj@B*4iPMY1NT~P&u0tIXU^-y|6TrH z7U13{O_Ib9^}4yI{Gjyl`qdD)x$(}@sXHU$YPk7%0|yC+@zauTUX;Y$aLwc>k48Cd z>VR>()sipGva}8NSM}qeT#>Tv>)wk4lF~F6UueSUgEiT#O^FMu80p;Tfe~^wqAnam zQW3vKGm#DR&RvjHEiYwhkft7rFp4vh36^mEvh}>PHk*X}tMluAqUq1B9*X2MDVVw+ z<{J>QUyo~KcGDuP+~6NPBrNT*TkZFI1Q*au@3!dPBec(Q*6x$fE#e^O13Xp|0@MBS z^y4Q@~U64M?9q8dGO!hh|J52L(p80*E;&E>4B@$tB>(+&TLWcBe zK>E|}c`)MZ{xli6h^GVtO(bqQ>(xTfGm{@AX{aaa|BgxCO6E9-{o%*X)&2Nv`(vrq z?JtV6ZL8pkjh{U^k(iK*6J|F}-(pcN72nU(Z`;icmJnNqIcJv7TKP>`A%UU#s7hR>&cr ztq$Gm0^-31KoUQXmvbKWPp`+nt040ajjLw{EN+AT?3z{h5c1((e<-Su_xo%Fs@(* zbDG2QLv4DVml`j>ka#3;&}^?uQMwX^(hgRxw zia3Ilyp-x73NI77v76;_Zmz&eZUddM9Vyd5Xzw)g_p{rEImBXobGOo(FL<*waz(1B zK}9!Coh?{RCYl-LE-coi?pP+=CP?Ee5suR13bB#%bSYi3g$?tOcYN%NEJa^)zF4d_ z4iu2wFi&z#CNC7tjf>TVc2{OFlE*2KWx{7lkHgL1|3W6jVKnmXt&vcV(lkS2trj@& zu^@{T2Nbg78JEkGp-7HA)ADAtfz1geBK*9LQM4RK7pO>wY4Wi6BL zPh<6$kJsabp(>q;yB21t)tHb^>Sov&$Mwg|V@Dy(ldTI_1o6mp%qk|9%Ticld}npR z6$q^0qC|rUMuLs1q#P=olnXgTKgu-`wAtkNFGhDn->Qh4LpLSVid`&6vPiA7nEvM$Z6W?#HG6U zNAZEs?Pks#G&K592_2*hIT4)RK0ywCR%tfr7zQFM$+Bv|%u4SK?`3CfPIF)``r}5> zK?2-P5Y@aRpeELXf*sr92#qM%1uUp&;l{}&Ra~_Ttwx$EjZD8^VBNs2?ton`@emiz z(V^OZ^MpVRJ%P<3fk#M#y#Ex9UAUFPK3+?ej13qBrZq9%>awFHhmC>RLF-0zqnbC} zD=7f@2Kk0@B78|Pv8aNba*`c{T6Mz3R4P(x_EOZKzB6c#WvHXSniTq@{ zkSIQ~lm<4wM2HTYmN5p6o>M43ViTnh)&t&d<^l@=j8@WAyPy)*PKBzPrNQyJ&7KWR zly>Mk087mSAB#&)^ox3sD$L;A{Iazr%18(MRFrqA66}e!*g>E%pyDRr(;-X!hBd-| zTwwu|4SsnXk`Y4)?inQKn^Y7iD;}zrP%FzOTPG_#S)UljZsX z1RO>ulaKNqh3+VYU>ReP{lsf(Q;OiD?S~_i-e8)nxb<74LTkp>0+fI^LbNU=BL7{N4JHk^H>9 z7K-}11KE6;|Cas{^bnVMbvp;@es1yK-Tb!&^EUq@Q~Bd5|LJ%^+0*Caz3`M<@t>Rl zDfnUk);Wu5EPvzeyj%ME`g$PxZ&&@!zw1EsgWtaA?LK~Zc4~i(->w@H$b?)6Yzt?~8nyfU~_AD={&w+f+Un;8R}f%mlIM7ei#0r-#UJFO2sIPtJFskDDF+ z>J`w5mu+-Djxutk?M&f_%Xs?q<=c+??%x+l=Jf-Gm5U?gHrFxdOM?{tuOBT{aYN>> z^;rRy=nZr|*$2GJ{+rV-;?v4v%nuzAgeHN(s3YrYE+_xULUMQe49(3d3Mj1jiwT^R z#!qYMVw0-q;80jwJ)RrD9-Im^8xg$xP|2(cH6>a#zhVxRz*l?93BE);G&3URQ8xs> zK$ek_U@5+Yk!4_biRWpWQ@8`Ua=2s{D|5;H+xxY>ze$jGyCSnMXN$QV|89PpCUEv| z=bn#b9;sva=uPC`Csa&5Q|yxc)}~}h96xY&J*fSxw*bch+D!QZp&u)W65yE_W0?0(#AagfHMO!_3ZjE>I7 z?SvpcU${71Rb8h}bFI~nOi8)zW2>)fD2{I@LKHL^GcuEZlU>)d|4v*uvkI}t{F;o< zbCWOR?(sv=Q+uMKtGgWH0+|}5KT?X@{e> z{;w1GHxXp>(9JI0Id7gkIP5wolxTG3=z zmH`o@%zI~J7Z6h|9FxLnyFawpYOAGpJ@ZFoax$*hOzD9k-6=fm$Wti=LP?OpScAEa z_%dH9QK`GjpZRO9T!3|?CXFb@IrzV*{W6hIZf3%i<7mgj%wv^x?HR7^GPqDI#rZwT zF&w&teL01br^x?Sfxuq&Gi@Ca9WR>pe(Dq*Z7Q_Ak>4x~HY)BeA5@?Iho-BHisJkG z#M0f}-Q7qm-5t{1D&5V3w4{WzG$`HODJ3c0ARW^248Q+*Ud&kz2j=XHnR~zYlf-Vk z^j^imBgkX}@R2G*mehl64xF=z1}O&2VJWn69!g*X!i~?3^$ztthj6{fGt3PH^B)G9 z!-R>r)uxDT41F{5c`cmo%CW+NV<}l^=cP$FSrnYJvR84BacM)R!z9DawaQqEK4mq! z{8>DlvzJLHOee}{n2LY~@YrY+>&qu_KtVrfHHnZ?icY#($IOvtKCbq3*jI_oP_%KX z5%*K9iGF^i(q=nz1-FJ(Z@|!sXBPiwn{knJufg^sUa|~<)W-69x71Gd4RFBUS^8NY z>EdYI5?_NJlW-nEX;k^x6ha~o2?uBMhF#G(mBY$Gwmj4#?v#*x+*aIim=E#1QknVl zi3f;0)M)u3y-tos3(DM3uqvTiIOAbkgY+pXxbr|4Bpk?q@rMM9G=W`+U-jSKmDJQ! zT1qe5yhQgXJnDEm6{Sr&PCjrf(k3sw>NaT`BZN0xk49X(n5uQr^tzuKs4zh87ip*k4uPloUF3rL8OhfS6 z^crDh%_WgXiBkS;==L~yI?G>JKIQ>|S@baGhAN^0ZrtzjBE69J+2yJ4`L)SvT+u5k zjO_bFqh`$Iy{uT&Q#D!mbhY&yP*V=<2p!k+UMSSgMYhyXkn(e4Q5k4-E?e1)3asU) zgC(w`4tUlWMCl(mEsK8FQyro2lT0@4`;d=b6^ouZ?uk(%>SUX9Iw+v*SfDj!w8TZ~ z*+2~Z#BQUh{klcy3UwYd>UW=1;k>9SRs2R~rgb{XjNDzko->JqiE( z7m5kl&|wiLei8F5x(wv+PP7!=5NSEt+}96&Gh%oAO;0vhbguLeBLk{$uwFoy3KJU(%*SHA;jJ)Gab^DKjkTs_0bU(v5)8z zR1IZFl~M;TuS>(&`ZdEQ^=GK`BQ( zXySiqr}s^3;h*QC9TxyOTOWqcZHf5qo?6i8FF=~`_ur9Gt8Xew8VAKnwLu5J8yYpZ zj8E^@;0Kic2Y#bw|BTzf8=jY!u2-)dai8ArfOdVaqN@4%et+fVnCD~A4Ut#ZLo@>h zIcelMu!I6jCZK}sep667Z}!5m8@Kj1I4-;~6>rP#n`z_idSfjrtb@d9r9o@Nm+w0F zJE9F(`Zx=l54)3|T{f)*>fSA}fewyMKN}uoG!Jhj5crIsahy7h!E>*RwZ2+0)pi91 zwjBqGbBwkxP9|;27~5X%74!T%PQG0p_C+_GoXzkcYxE~^`C3S7wc{lI&>6sNl90#K zsO?pJ6X3k0LaUXjKYBU<(+PQZx$3HRF0h{bTDKy$UJ*b1< zKm$Tb#bIea>gV6yM8M9OB_5Px5cKN)Xi9A0=+tD`xP0RHuf|Zi zw4rv%Nxwr@I+i9-pb+ds(^mv<9ZRcbP~wz&5MI!TNe-*iqmwb4TuY~@WvgS}fIM$m zO`MHonH6U)pi#osr|rzP2o6532vr!tfGk_b{(=6JGMmnw1(M8W5F`E9%%k>NYDRZjKxAYT2F+<0WzT z?l$BIdj=T|+_@A71k78hb4El!IX_N*-#D1139ac#KpL<@oAiL4u~Mh-qTmmRbrB}9`M&pb9Ugb!AdN`K&ik@yZv zg7k0YtZ{yMfs~|GurQ`|H(hm3D4so!h2c1ZzK4;pMx4G!&QkQgn=^QGR@T5&V>W+0 z#euVtws#!u40erpT5;keE&}!48Jd6iSjHxgWR&Iv)|vfvSF^Vd}^9p+4cB ztaLOyybJRL<(OMNqOfjZyxseJcDqld2;6+>p20X3z)PO=?N5OaXDbsN6f>m6UI#@=@(`kG zNXbAc0lk4mSbq=|rh`v{niyrm%fZmdm?nh2L+|C|=x0qdAdGN^Xm-2_vU5=dJ(Mw# z(#R$-3!(zlA<|OMxN$hVo#e5iYM>BLhs@F^WQ_y8$KQg7!dfpz~rVBs^p z%oX*yOxpbH(y(5Cf3CNWGyw3!L))3_3E-puu@LlC z2lPTXYre4soHE;8cN6%}JVgVZ(*2q~7PSR@`}Fl3m>#^{&&a<3w!G=*M^XRl$j7mA z5H$eD5_rGGE(Jad1E>Q3LY3+M!@r|9)ZtCkAh6~g2B^_pFScX`T||$s8XzKTBIdGd zD?#GaFg)9DgAee}RUcn8Hm&}OE>A#3?zs83yyLh~{*BsB%J4V_on)Q1v1My8NOxp( zQ#YCRI6jK;m_p!_mPE9+pEh1swVjaDLzhEhTG+AD8PG!a*yM7+u0%OOAE;+)aQd%m z*7Gj2Ib~8m>o7jUGg9uRCo;g7wnkJF_54GRL}%LR?}=!Te_0IT4GT4S<9*DXSUZ7? z@3{p7A~IF@R3_Sh!gO00u?F>!8%#bp5_85StXhUMFIw&=CO(h|h7b1AM2x4J z#?m4+9Q5V@!8YDapro;dU^;Vuil9nxn#49icL8%#NXWMzC_Qp!VIdw@#o#KH77|ka zz6#L7V5h^6=BS7w3MUnpXYCZ(if_Zpv_xL$rgcR{XCg$D?}2V{Sh(D#1<^{3P}JW( z%Pi8=LERgM(O%@@AxqDDMRF(r@{P@axyOfDHzmX+@wDwzkjy1{e(*v5AE!QS(ZZ9y zM3vwH48cmy>MsRlE#EW8r0^qxAtAm-(?@YWSI@RG9u)c!$8s1zR>E)fgDO105h;Pi%hDj-aYid zeq3+6O)}d!5m?^|BN4DWU4O>*?Z=XPdO!8c+t;-hkVv90d|(S#0^p37IFfdsL+m#3 zt$@nBD*ETc4fHm$*li8oa|TF;i>q(%^ALu&=VeNHXl};r-F5;FGsjRDY)}%yz}e#D zrzN)T3HlsmwUM$BnobM@$G10uqw6W@Kd&%qU-#}<+Wkft#@imBFeV9%1z16W(*2SL zR|5p5=huf?D{j^aV`&^$r@$KkI4$4M1Y#g`!=`TYa(4`fN`Cjwx^49mm^LEJ1*73j zLi~PmSFe7=z=ty>DE;e_8bU!0zY#}jc`CkFAv_S*%2Oa?0P|C_j8dkWS(#l_Yg4GV z3~^t{*&NXV%^EKjG$(R|$j{@X+FdbII&NuA%Vy{EQpUO|@7T!W!r|KS}i)@^Y&m<<1Y9YBq#4X}W z+;4B@X5hq++=2UGcimvQFYFmuQ0ZB@9g}s8r3|Qw&ejf-LP_l7aQE z`h%_5NqzF5J2zLA7AvQtwSnHl0CWSM=n^IFU(s%R8EVD?Z7kW4RQJ+wZd6ncD|VJb zLKe14mdXyEess$`MQbQ{rfV_EPJfj%=$M88Ygq(2ORyl_ti2d_8`rlPA{VS)5$U45 z*svnV*AC6e2t{=yhyTce7B|Bt%OMhdd)rVDM-tqrM7nfj(U0 zjz`j8)B+k47hHap-Cljh?vH<)1$>u7R3bfQ!?1fWC5;J*9b~<+DKk;*r~QHC>W63@ z9bGXVZl14E%xK`!N}x53Wlw*)eo9d$hI(MynL5@E<*-O8$)<>#vJxbl!pvC1W&~mdx z>xv|n3(8VP8R}cdh+Ell9DfzQLXA$6fy0R36=T9qenwwEYA-K zLNTlrA}!&Sj+ViTA&IBYeh#C14!{r4!rCX?pvHm(#wC5Jy_8Z~EUoDcME^yx$n)Rkx zVw6i_l~^E>Cu?RIEP&-b#Xw&)%(#j#y&iu6QpF*4}kK z-C6#3-TkY&zf4wZ_Il)JP|M~1&;+On+D>kqvu>OH@3{p&t?d9ctUF;CVjFLtd%wQD zZS((pe>pS=-|e#B*=PK}7J%1XnKW;kpwy{cHTp&2E8sOQx}6w5@TD@Ha_pq<@!@v|E4_D37i+EuSga0C9h0t}QX5ms zN`f}(oS7+f^=jETWu`}ay5kC_L1M(~m#A&x#4YS_hrR@-W_@)m<2hZEoRvQQrEfx~ zm7mm2L(V%c+uq)c7tBZ-Bf3QtgcO2{0Kseyv36SQ?hztfsY83pU|jWR0b2ZZ5pM=o zI+%DY^J|N1XJ0`!>Y)$Bc}hX*uKGngQ*0!tgz<}HF5opwP9An73?kfOD2H%!vgt4UeCgS3?=&n-{*2?SEL&Y*w1al+uV-6eQNeN zBWGAu_pCozPgy(H$)0zKzGf-<5Us1;uX;C-Md>-dkTv9#ZOpa8dA?@7lS1J*VA%F?cRx+qmFX(qFa$&R@ZAjcjr`qQ08^? zV^iNakRtl>KjIxxQm%-^}{V}ygGWoeP_-~}=x$Gx0tv)|iilXG;J$Gdi zavuKU>P=9^WQ{90vLLq9t5B-GcM;_~QaJ5+xMw?W`?xxNp>jHYuN)*omSKg9RjUI4Nm>R@%Sb26P&_xs3-B)!5{BXJ5iiM`QbkB`|`CC;lomW z)kT((jIf&E4`GT5h(!*ghLu-wWlmh5PJSXr{aXnz%&OBFy*YNrxS<6#?xCzPEHmK+ zi9Utjtq12WEOW)a9jvXcaB0sJieIFiAFED1lj-TA&OmEV$}E{EBZ?a>YhXls zdBQzX5BD!|iv)h1vzc2kc3MA5gHHyQ7#2H^QBg)F*@-s9?k0m)R>WMvLv`yzJ~yX2 zA(M8BhU|{~;?t%SnNU#Zo!eAsYDk%9yQhsY7bWW5mL9TWKSHIB6Z4%yPkr?_xyFD2 zR+U%aXO|C8at4)kl1?$Ks`;1oyidx=>M%OveXzJN3+x zxM36%H+vO$RmXcOq3qJymOwWeP4t-!FEaL_mqkdMwUMpai263lWt)(60Cj-`0~Wor z)le01e36=naKG~iEJpVc2;3mS%hG)%15mFLUq4=Q56k;`vP)n#!aj$)j( zu;X%YDL0I!Q^x4jRA-S#WyDL2#<_2^dfy>=C8isKONM+3j#O89uOxIG7Ki9@0Lh)P zlGO|02pyUhh5+&f#<&phDxRHLgVtYVGMk7Qq|g}=KAUot7Kad#kZm|4*i=;_Ha%C& zl1#0AiCHWcD`#*v6a=?>8f6!>NaQ=i$e`Y+Iu5=i(xQnr6Q1EkbZxIW+abPxLdLC7 zjEzRRGwYv9kersxlS<$C+8!8Jh*iGB46hg#UZ@Mw9M`H_MW%*5h=^2d#(}S~#c9R7 zat&OWYT1Szw?2ckSeX>DP8MR(M18XnGI1au&f1@W@0l!drkg;5;(rP>Sj38&CkFQU zjbt^kO`s~4?>`vFF(Y%HXwoicr_)FIdLGH`8_~nTBGtS!& zu4gwN8W|Wq*Q(`;?Y;>ZFGm;NamP%>t&t~gq zP0MU=1Ip{d>m9{Pi$@ILbCiY%qw1fDw+i*zqGP08d}iHFeb%`s@4~PZt!itBSVVOW zGr>GoX+Mq~tFPC7A@JCp4-gJMy~<+p^L#X?3$@|U*T&MYFuMKSh~rYTp&Yf^UOzKtL7JsId+{0x$hhLwid4A(cK z(7o9T!9^KX>a-OWN=yDe+`;1tSAuJfFDz}#QETOr^F1o8YZy}_mUN%hlGCGKbUEcS z!{c!el2AF0GAvRL&6iX*aUmF^S`K*$NJ4Z%Q9Rw`CT54AHo|e;>6eORYhFrGM69e> zrKrlW5dzv&p8Y-4n-f1Fj}ev@I+Nq8Ie&5YrGvyNx}v-W%XHpHFShucV+_`54=>U6{HF#s`t+<-1=Ba=pkUuz4@20|2Sh$Fg& z(++{Z2{^D36%pF3TW<2XzGmw1(oS`&z@u1cANeP+me(nZkN>75s3*O=yLSt``nRwm zUUxb7CL{pZfS)Fv%08CK@NanjTV682RAJmm zjQ|*f{9)wqbm`W)VDF@@|a zs6G!n0MCmMO~9QJfz3~&dy(W+q*(?+XObtx4(0D_&jWqu1(V22xwIp35x$3KgwrU3 zV)2?cdH?v83xBM?g01Gy%wnk1#T-+OFx;xeG+`~}8k^NOL5Da^QkvHM4r)e4^za0{ zwIo5DXd)~c-Vi59&Snw!bb2kfM#fx?B$CIl^f#^I#IHQyBzy1pY`-elI;J|hI-$mJ z7sYv%Va$kQ3SX9q@$oDLnGq*i9A=zyoUn;fczboN$AG+QvJ`u~amSL6S>_e`pbA8E zh9YJ4iv0Ixoc>y7@62@do1jf_r=UN;?zX5m#@|EaX%LxH*;WmllSBY{Tdc`awo9vfF(o>JP6>Pp;ACPIG!r`Y2T4SDleb)=j}r7L z`rB0Hf-N8IwgiEsyrw*DvKdv8MjkvDL#N6U?>PZF-XJeGnu-*0B=h_5Qn0t}KVGCv z!4C}NoObiMW!&)x<$1xe(0hSu!g+Od?Sgms#N~x4>^^sW}gkYD6|klUTu;J;a(qgK0!w zU4A@#CPg#6RtM5Q)Xg8KN!&LjuKE6L!mg5B1-i+N zk$y8Of3L+e9>j=ol4u-lsAjqJXSLIk5rhGINPy{@_Jhne)pE;k2tZ!zY1*VAr z69tLDCs*%(%m@oVUEKdMABDk=uGNuEJvwMvdkO4040Hg_gSc-3_iGeS(65tqw(Z_G zvD=TgKIfaC{U5qs1#ZRfg4M+Cg5@wMYK`wqpnmX8OiVNfy^00B><3M3Zc@D&B>_W2 z`xO9PJf8mV^Z8~OSVTK7ubco)*bamEJ(Bp##h3f7yL9`u-HlPa&gVXg%c0kkt_zBn zgD=guKRyNUEjz~TBYoLs`QCmtsnm9|PxAHRUeCGH4~exR=FC1|RSy1Lr$~g+v6$LA z(sH)tICkf-Uri8!Nz%Vv*M}f03MGAxlh?lwmjE+(La^@=h`zsTEId?2daIZJ``W8H zizaQ-KVa2ob!?H<-1hmkw@m`WE9q75@+|&#rsisw52C_WaX za;LzKZU`{+4c}=T+MKl-hsgchdA)MPf+oVYErA;rv4@6y{}ll+QZA1V9_aIqp3JxJ zZWg!K$s70p8^igCZLZ7HZkp46hQ%NWU@o|wXZvWgU3ucTAY}e)+G&9J)ns_v z)$tM!xoF_<=aSrW`P^!2>Zr6qv9|l9vAYLdQyacc_^x}d7Z)SaZB3!xk2%M=kg7@S zag+Ckd6y1qe_suOJhQiV1%R%U_mFVbh6#Mu*+wTp z6W^O()hztQNQTK!t~lDeNTEp4a-JPO{Rdat!5SKWxM%uZRwRk_hiU^$&&XZ)E9l9n z_ssT&?b`m%4)kn23W$=mc{%#9j+A4c-zP>WSQl=p4CmwAGp$nRKwE;O#$$h?3gzog zyDEOgF%Y6VMqN;Jkg2!WL|q%Xk`fN%jpnZxEMF2oD29q~%tzAJ`?C~WzJWR>FfaRu z>Eoy_u_19XFFjI30R+y9;U`+M);AjRLY7llo>6W1iRc=(YO*X%^LI$^agf4yzdy0| zjE_tAK_8r0jHi#I8fcvC^f-inpf!Qi^(_9(vfmy@m=K2j7M3c4ux!>4zJx{?nXmpC z+8C|WV^hIrZDA(Xzj$XpYLObItfw-raN0S-d7OlffwevvKbi)?a3hw~peY^0iWh?9 zf|(&=ZZ7YXIt*PZFi!7;w`d@e-@xb=tB1%m{^Eu09gM$%iPk}d`dHe$u zTcM6-cFIS4KUd3-EDnY^8uIj0L^YxEukyL71Qon}OygYDI{BHHC}mo7M3{q&l>s|u z@K@1RQVsbSvCKJ%Ob53I^Jd{C zPP6^6q2}V0uf;YmRCkrI2w$M__k--?8Hn_HDAj#1#jxegTFNf-*qoe&xM0T@QNJog|nO@XK16@{Lo(IUdE48y_pB9He*O?arz8A zG2Y2YC~|&9-m}Bf4vUP+bPIyl-@ungs*WJc!xa8q@28kC*P1>NTMVAFnwRA>e+M!N z#k8fJ#+9f0O&RATtE~FY?XSXjF)vruRd!$eIXb{MF?N@Hc84mG3)|1M;O)Ru+zqjn)?AEU@8JC{n~>gv>zqwtEaCvm#G= z%rt98!xv~!$<#v<>>z6ER8KSvPa?Q-9#Z9EiX}!_*N3~vtJPQ%`>!yj&WOnYS3p_U zE_i1#mx!A)kyk;JSO=m2NwLZ^Jxd6r2-YYB@*>nVlO^e+e-)}|x85S^sN*QZ@l|W( ziz0kWu028UP*YU+BdniFi5vMxNX~p?<_FG~nPpGhT9CzrVr!nu?uc-hF6lxZT6`55 zX}vW8dRAyV<|4l98Y_}i5($F<-V&(R zvlic#--6qMJrbcck~1I1?Nue5$D6a&QX?3Q_Z_GrIqFw(;bu8Z9fzNe#B8TmA*Hpa z87DCp!=%?Jwiu>P;*wR45ZlvVYjMw+X3Cc|lYWn7O>0GY=vFRvx{qM!=zqVP_C~tO zeIb9#vF`-B9^U#O(&pb%<^arK|9(97Uk~)B^|qln5wB*KM}C3RmCgAMzm-Pxt$x_fKv)V!r(VI_nLHqiB%j)Q-V&w*9{HO-t_ovx0kQjUk&h$-ADyrE zRDkRm7Ao4(hRcmjDImc5HM0xwL*4;RxlnH)_v-@13)Sm|+6zSdXxisIJe&!?b(*VYD!T+3Eod4Hxrv4hhiGOU)WG;H?W)FQLPC`{kx!B`DDB6F)zx zCam&}?Ebm&?$V7)P-5fg?T82nph8~!peFq_;6C*PWGq_4QZhu9fC>>~sKddOhsSj9 z>892nC&vgqt*L$Wn3kx$;>^Q#?!3M_R_d~akjp+8ffr70zS*Uv7zvL6=*hg#$Szcr+m3se36N3+MX@Yz~RUus|9n!<4nFgk#^>-^w-E+H6z z#r66)7NpzP{jWmL=k?S${%cy(1Dab0Fgmlj^MFpDqDv?6zKAN9OP`g+Gvs5rhT! zDgkLMsrbhC{oL~wa-I?Cgs!ErXYTxqF)iqQkr%vkJ|5=tF`YYGYNW;wTXurMyRZx7 z1i<44C=JA3?koXuj=RsUe|B3y*H@H#cv)Uz<^X;9eR)9K`J zElZQ-{BBR;O0b6zKx*|l=iN{`l9rI{vu! z5#P17iNmyIU6daiek|>GIA~1B+O~aEMwswFu=3Cjn~v*a($A%+AL;tuZ&7 z4N)Om6r~_kL)J9+U1NKhIMqf2XERGCoApkk&hx!*u+mB1S_&uRwK|aQ=B1+RAxpW?lpD~f?3#U*|2IJV}^F4GS_tbbh<=J zk|R73;TY8bI;G8wnzLRxLk>mlBAhZIJfRXN>e>Jgxt%d09^Q`f@Yhw8Fd>@+ z1UnzvYmX>Ml9lo@)qANZpyd7)dp%Vz* zis8D16K*_T`g$MCaCg0wtlEu**khc4Ye1`$RUY@pM$vOdJEHc=pN7hcIY&{6E`$ok z>rB_sFaf`rzg)MG*onW;gO)sU9aDyiEsrer+Tjgt-NgMgm-o z$xYR8Ta^!EaZSsjDVrxj$c^B+h0ARDO<%2qP!_3gh4GiVLJG8koD8Oxk%W%?3AtrC z`wXInut#M~9qyDiFN&qX*>7uuL2W}LwH{&vT#{JL6iw9R{^D-hYJBx%%!fPKJUlAx zk-wDaf<*DQG?;&=1EZ*%PabK>&`P8eu$l$mhmwXl1~kMzn4xmCvEZEh6jLyc>o%HE5iezW}v}%yW=b<_ZFzjUAsT^Y5_Lu#@xPpy`Igv z3R07fQIYlgiTlQ6+ez;`AQ0giD0Q3$Bn=eL$3T#74}$;w0hTR?Ni~r6bbOss62rN3 z_HueQakU0`B)!%H(mn+o8MeOyU|*SEGv2dD82_yu>?UE z1W$9OFLXmgL;3*0y!Y+tv-krIP(t;>^xxRvLi$L#LlO~XwJ%~JFuxr4$n9rFu*TMw z67U-D+zCkwJ7)cTt2~7?HrDg?M1)GR_E=nPp{CAmvi7wAp`-oTf-S9y0VN}-sp@q( z+{=Cv2>$hCT1|>m6@G1X_o*?#MtSxLbZI-z;cHtg8VA;aN>8(>aMr8`Z3 zp&{e;+(&Bc;Z>Kd0ybCsrj-Vb^`Ro(w&ucHXLzdwA}iXK{g{6`^VTlZAVkzLZOL8B)^NK z=1K6NH(^zQ)nh%JLke?PLF^4A7UcXIkn=ANW?f*P)8kcrdr>tOoUXXJ>_?LP6MdL=v8wfT3kLP219-c^= zd25OW>K{yylob4c@dpl&Is#+BTdg!8P}~9}LI?;5{%`L|oWL4DiYP}tT&eq zFo_q>kG{>wmpe#+CwTAd_-T8hVh{*-ychXf0RcoiJw4DB9dN@7*mX=8U-+S7i!1kg z^U+AH%}c{wU=oS2=yVWOLS+fipr#eNrEunT1+x~6e#8Fco~f<*7xh7z7r!T%k3MXP z*)1boyOg998c)&?JpJ+C^iOIRSZm5!&3{6+jk}eieV>v3@%`b??jMSjmh8ZJuEb}B z^5SO3!fQ{GN}!nnMRZ0^BV8vkq9w{MGJW_*_iuyfpZa;n%AR9&bO zf~yGTZ;7~GUSeCvZ7a0NavplCFijLo_pxz1b$g&>;6qEqghbO{ke0Ckw@tH{J-RYf8DPv~CnXJK~a-*`vx3dtWVJ890 zN#oVsskKb~|6E36}A zN7}cCrfh8rH36g*ZGY=-24)&j&#DoWVb;YH`GC0e4lRpm28K&ciAJeU5z~!8#45bf zcmq4ej#4j9uw0%nGv0}SI}SxNwN!EMY_ECa8%V)C3w@+c)(jV(nT1u3^r+v351uEY zKp!=MMn8nCC7TF;Y)2lhOuNju-&z4(!G_z48?lrwrP~mb_5ONRGFm}Pg=+VXm|-wi7#Rm;IJQohCgOZ4N>1U>6)C zi2PwC(0nF5Fe>AsvyxIQn>+&tl1T4O82-Z~>EJ0h8f+l%~DF*@IX(SVaJ($f@0o`8)vRGFb* zrhv=VdNf+T$a+RTQ{p$-QYQ+Bi_Eu#wcM66FfJmrG8djMQ*$5Yj8`)l*5&ZI6FKQm z6>JFQ{(?p{xJK-h1?&s9fA*21`JQ|>Y=VGct}20Tis(s&1f$4c!!+itgktXV;&cEL zA0NSuP>+ANA|K=CV*3kbNnJn@9(IH>sw|j3R-rOfpYM-iR6YgY2e0c-V$`8NIrsOI zK@&j9q*ma=@7&9}+~?T0_<{`}S9W}D(uN+3qiIYq{>{^N_S{2p?kE0$wf;K&CjaQcLEuKxE*7t4$&u;HtZo4Kig6^aO&%-FrZ7jlawGF=BmIRFk-JiW?uRq?tzL2~d631VxOakfWydpgd!Mw|H zPr6`79~1ZO$_*G_z2ev`GCi?(i&7H*d;b!cAiBQpmbI@;6_VrXWBXpSBi1;l)AVaA zbfnSF5?Ry2&gBijt6Cnbo@+k2`kH$kuU=fXS9YEiWRZQ9!%~91{-E^Ko&#fS;jHm; z`uE!Ak8#@C#It<%UB}q;QD+R>Ka;<|Uw37mL{ENFm~3V24;}6=MqOsxtyjCVwrM~w z54<0~9TP${CPZ=%^$up#XSK9EajB7>edGxuYKt1>&cVKHH}{M46FmGNQpScGZffLVF`I)Nn1{|abdihh8Tw~_D z<3@_zEEO%0`6)C@cVfSecD9$o$csCzGZM&HpE9rhw(jzL7jxViiNb0ga<%RbrWo&l zZ}w;ILhpKToE7zg8Pa2R-q|nINIM>S&e1#CUbV z1$K;5#B@|_c45h3+ftZPNiun~HJoEIZMy;a=Cm8ldIo5JXnEP{8+%J&>NM!3ilNK7 zt@8N8vZfB;VL5sE~_+f(wP8mNhA@a013Zra!|e1{AsiF>p%r}M6gKz6 z(8^zRTe=dgcS_OpjUM$kw{x=}*+eQ6ghSEuXoe-Y6Eq_EH-4#mN#l~`*>T%YVtXS| z!)AwFOV~0vevjapR_LIEbE}5eHJ2@`<5nBxWw~Ni&#vUrw5`|G#_l~Jhmq~>hHr&F zjh^SJoRDAM`cNS2{55*FJO)~kN=~ZItOY!VmH(NRgj?5+?BO00F;zWJTN6fr#u^S= zo2j1EnV^8KUSf_bt0$B&R8qu60%itpQrf7y$c>#x;c%WZ2CkyVe77EEbCuZLHyAE+ z)HwmQ;$v7hs&qIMb;@v|4An{`q3^0It+ak>Z#iRM0;-f5k^Ie(R^9qg21+V*-IXib zg*$Z*U8w_VS&DK5q5&-(9vTf>n}^eHRYoQS8ydGk4GUaX9doi__|ZR^5LjDtM)eO+s<-$I?koRg4wKeL3t_FhhgVoatM^9Lp;ot~_0xyH0yv};Lk25K@< zJrTY9dV9imwbsCNba-@G3GqZ4L$`WdKP)}xOfFmY6x2GeCeWdg@*eq~Qr1UQVTMln z0~m+G+tG`HFn2g>+fv=)t9^OfLt_Kn+4dMD4ti(QMQdj% z+rudt*8(mb#5xWgy`{qBF7YoOKb88GQ$BY5Kzn?z*cpQ~-t+!bmupKNc4EJ1C0Z5| zrMzecp}S?Cvvq1AJHNvAv{n%#p;Rko6_iL7fmI2eQ`kO(+PaqdL(CvyCbn#nt;Uby zZ43o`@pLL0F$%(h{7v-D*u^;)Aju7BGaMm&P<* zKzBmjdglC22BkHP5<`;Sdiwm=@6{$mHX6Ld60S@waZ4)vH=3dL5ue2Wty@g6bRINF zCZOY34exqWiKky%r4UT$AOvn?aE04WLxvY;6f&vC3i0q0m|_znc?R=i3^npTD^)Re zqPkQ-54ZPzI_|amTe|r8jQ*GTz_0Vwy`7bn)!qQOu(#4WjZ;iFDuK!E)oIYw^|0Fc zJn8B6x3Y<^*(JR1yEZ@YQ)nUO;M2o)mk<<;iSE;ds6+2}d=r?v4EPdWo^qeDX@MO3 zpGW?;0hegPcN_4+Pr&734oEO;1J7MDZS%vO&;`I|bOme-Pt}iI zFS}h%=gaHQgHyOhx;=5b^-~u2&a)5u2pqocpM0ZNd9bh!FIVeM&JR5rHgayh4>h-U zNV8NMMQ45L!t%Enx+AGuc0T;Fh={nMmH|S+za$YOyyxZky}O!NE8zWTX6&L%RU@!% zm>JL>({4WXqDCaTQ;?N5)>-x~YeB%bjd|$odrtMfzjvI$&vz!$RWG*pF|Cs8o&8gi z@PsIaYSb9mAOp4HyDITEz7wf`Jkocrf%Gm_Suf!jDQXFr)ZR3TEXw7MWRD&@f z;rDHSQ2G)-vdwleKk>Ewdhe@&z1wj1kYCBt}TP*jKWx zrgAPXMkIuVA0mhD(%7e#~BvRY6o$ zNMRU_fI91GDY?ieaH(1b>mpf_q_gXBKMugSi+;)|ASaN^Hz}&>mTT%~(O0SoFc%mY zT8gr>RY7u7gT6P_^)F$fLc-0*A;4-6N~1z5lh_Ic11*41*;!GyJChuxCYc2k$J?R? zVjEX1o`i`6hLn<&d61KB)bH4MS_M45V5?%a!^aCrE)ykPcdR@vf=sogphmdlkdR86 z!Su!9uy5*g)}*t4g68=8{{&U`dZaeMM)rfX%1uU1Mwf6ViZ)_rz@7z&XJYcLrIATR zG`bSQ zw9stIKU0JyK?~DVU|xZO_*P%$KICV&-f7Vddji%D0lHkZyf~VvZAdFQEMIm-el{_? zHF>dWgcPGJ^y=SO&}=0^)%=JLZbdYwH9R}q#fjEeeH0A|J+UKHJ28}42~*BqOE<0U z*aHO)Y&R@hb8t-szIZszd#=&Z+AuYsMX@K9Jr2{NpAEPFUL4aL2TZ~3ti5v74z^3!zTruUrt#NtRoKuC=Ch{YJhu5zU}=xFVqr zuEn5$szH`6u`Iz46*u$Z@Kg*h(d#0gb9oXmyjlrDWH%-qkVY2OH?GX_X0FEfHL1N+ zqfDq&8KW)QxnU+$gcMR?n9h<6Y_#muEvH=rmT~VHqCIABXPXu>8x}A5L8aJ#q>rcm zfS1Z{%lfBSt%!W}?H`g*(vqrTk!b=D#3kLBZ zp$Esk39XmgA10EfG*I1k(wVV5m8Y9XB`Wqct;wroa0jSrmyOiRgiDijMzSGVNSTC{ zIBM|8Mbpd+Y3MQpb-pG9U+e$nT`d!Hi%kGQiOOtqON@EWjL+2}@9|0?_Qh8@np$O* zx~Uj;#>x3bN_=8*-TKF7gOY428SJN|22`j?(M76kla<$nMFdrHoW*#>nHyK@DmJ|b zQ9pzogskci8>w8CLrPQ%RKU?_M2K;9DU0|M%p-(x5wg@4C2f{}Si&+e;`=3KH4~mc zueU|92r@0wWh98-JkI3)%}>bC$V*bl;5tmoMWHQ?m`6(DI_|5H6|?A8C*aOQ=z|<2 zut#1ikc1P5|L*wr`}_6Be^Ud%o9Wd}FjFY~+Uv9pm;vJBUpI;nD*!F@IdCYKGl}J#|wd_Ejg!LuPEJsKkF|7A}@DSceiUojbAZ_w5u*Y zz4m86S_KZi`~h5(0_zW?|1i?)o9jZ}4Xu);5eKm@>pY6pJa&;Yo*TIB9}ILJ7|#Sy z`d`&Ow$z11#@u@}1J?%&gSEhs`l$y*i__XEC(M8Nz5Wrmkh2hlwe9e+=hf%V4kvj& zpY4CnGhISPG>vEiSOVl7&%aaKrWT&2m*JjP%h5m~t1vYSEeYEG%g`Rw1&!~pwgZ`yqk+1wdlnu}S>R$-d!%-giF_~QFwjG3W)EbwO9=+R}`035z} zNS^Vd{38VCw*z2E2Bxef_ZclCx;Y!NO+Oh@O-J2|pAVA23z^BHT&*4Y z<6a&7_HJ%i-EmQ>~2D4fKX7l+< zOW%mk(LnSB=RQ!E?F%fMkd;L?)Stp+ zY5Xz>U^`9zY`XNv)I+-9ffde6{P8^&ioegrAB4#_%nb33x$F;*29LbB0zKWYCGA$Z z{!*2xS)8vX1HfGiT)>}|$Qb|A^uQh&`Q-o5gG}~%Q{D!IhgbXJYh3br!B_;GW(-@# z(DxsXqZ~cJXO8vGd0k2LvH5KWeBJl9tY2hPoQr9DmaWydc-`xn5_Hyn7i~u!hSkQO zO!gw@v(1y(I994#5K9oN6{}&Q($t6zcuNP6ir=WzFsz)@FTkL z4zv2MWFRj#DvmCPE~j6?G^LiYlLZN9Aout@x*{UJ4?7~Ren_U$0r|lW4tt*&+S3;P zn;%3~Y+*4fC7X@Dh%Q(gNe1VDdmSw$xpKMJP&r$II~j>xgfdPcZ30^$UbfAns-+aG zZ;SFPw6%TmM+wvgNj)<9XgVr1hHqM{$NMsabYGQ=A^Ioc-R7*VnGb+D|QH<&v>GA~IX4G)3}OWK-&y=m5up)l7K2pEfI^Qzi z%eb~%BA>b-Xfn>g%FRik9gQ#IfYFcK3pcv25oI31Sq%SEFf^e!u>!MYqYk^AtDKJ= zVLrtDW5uUIRR(ihst~+7gIqZvN*dw7X>*K@%@r(26@eL1%TdA8tf;ZN#i3rLX7dxQ zcf5slvzzk{&Mw<235hnGY=@2_w zC@*~(o&qWZZj5-Cx+b68XjV#rxOywM#$gz)XBpxVe?vVB!9~lQs{&Ri&HRXtm4p+K z7(S&&f4Di}k6^I{Kp`0jo-doQsZ^z&7OQoKRK}V*SeJ+45Q#-aOLsNVVA8PDgo%rX z1WYtNyRzVB1xMo6NPWPV0D=y@W*%m?4P`$~2MwlGTHT*Vcj8*OOBQVgd}+%HLJM@s zQ@-r%vtAB%R*7kJ*GxQi@gj^KZPws4$_EZL2f2Wg=t0( zPt~K>(;kAOfof;hUeaU7@5M}59nSuZ&D(e&f`*G7rDuX(!s7jsN00O()*N0OGu~5{ znbO!`R+eRUNUpPB2+EK_q}Gh}&QYu@8`CzZ0gW3PhZ2s`%Mi<1^>kB>BMZaJUpxczEI$83=bevhb3=}akD4Z7Z%pL3z3MU*g*p<1t2 z$Bq#_mn+X7q8Y9UD(i_5{pIXlE~-8)8Ohen9%;&4n3FhcjUM?fSmhhe;*}DT57N6*}*QG!%~pt z;}x60)GS9xYE|MXB}cL{;^#n0Y`ewUIBuTp=2EohdB^h=V}E}i_{D4I#1IipWfhs>h$(@;*S>x-`o_GQL@9}_V@-+e^(>r-JkdUviris;1k4Ytmi`vb7U)Ma;$JKY|H23yIo(Jo&va2D zZcsSc7$e|AV9YO#>tM;`TgMJ>mJRO>JA?xEJz3KHH@jAHA`Nps>v#y_8_oW^C9Orf znUIWFeKv#(_RklXz1#Jl0>N)VCUVjP_=!7qTX&A_wyuc6^WGkPabev(#yvY)OI;p$ z0J{}Di0_pqKl+}ZEzLbWaIg%Vj~-jx{P9`#<@cQlV$~*_LsKjp)C_!6@dX&$YibBF`O2L)S zJjjvvW(n_0b5L%=eq#`coL9}}r0N(JIyAdG6Am9IJcZY>D$o-o=_q~j!u;zr+sT>v zUlsP3J67F)&cO*)T@gklvs1qUUhr>O_XCByg=bhC5b$jRC>^enJLKz^1HQG@*l>vq8AX)i#W3A&!{`l~cz3T>WzIFyY{nF9gzc{EtpS&25cji@9_u~J6XW4z6g#bIFu~8eq9C3w9y92^|HlT=y%*aUo2$!Dq?cC*P^zV|ed8l0UWb{qZ2`~!7&9G$b#ud2E-Nf;oWSeOW z7M{NE8kn*S8-S~SZw!NmorA;(Lx9ZwAm^kvQBN3WQLm_lD1qPyT~K=$?=K|v55;kL z`5IWMOu@9FSvK^XyW{3j#ycx7+$_auFyej%MF8W%P&q*3ah z>o|>{TbUi5*tpaVOF4MGpi}I_U|&XYQwKZPr5{%SPRJ~QTQ**Y9g>rGZ5RBd>`n1| zEkLd2BAUrc%6gBg50D3YddjcCMZAu?!kbOUh!{2AwKTsz^gRj=2_1}8U-St5JQnxhoK z8T;N!HhqghdPSQ78*XbvabjI=E(QipIm)P-kJ25&NSQ>G(O(mi^K=tyu_m1D8no!o zypRMq+_DqcO~3yf$)gBElL3DdMX8KGS~A4>&Irty6Zx8DsdRDEgvMSA-0y9wC2QQRmQB>N zwMalcfGS@{qma^w!k8(JEvDHjDjH4`@-ADU%1~j?G)yBQs6nza=%b3=4$Y3D*%dSs zmsv^i9jbo0y){>PmGx0GVG(W;dJ-7V%LSkmWrwKgHJOR{g4vVJ#LXn4IgxS`kt!|p z2roFm_29%W-dvPa`6W6U8gPo7i!}2`#4#@RqfsZh(cOO^RpGdkMcUgl2R%5q^mmRw% zUaerAFHh%xp8!kbtAFt;ujAR@&d7Mc2>${=n|EGrt6$s$SJ&FzV*;-opD|t@0`FK? z?mE^#7*rY!U)ye*yXmlYsfx-3;@rD94o-p$rt8B^q)m)+yyaW*W+AQE3}kfn66egpPLg!!b?4=>-j z|JV5HU(BYVe&hip*rE$cK6||OU zJQJ<<{bbKYhh4JI)QxYuE^0{(%=5@*S%?L@&LO8&=B9^>!Vy~FQycLw|5yWprm2@1 z!>V&AL;$*3xBTY`uFH$d4{I;CA98YXDZAOw+pq7B+kqwg(0WbqFQ7L11fUgFwg;Y* z-T;LL^`{6e=7X<3lnH&Hj(S^+4QPxu4w@&Lnw+{@299sw{_t~NMe{Sj`U2dm%L`Q< zME%>qo*{hWOwi{(3Mp?0UL%-_lF=kIy{r-Y6SK2(|jbH8; z9(I!^VqZy~uZCG-GP1zJ;WD7+(p{83+!xriZGHkWyIQ~G>7B2SG&}zS!QkD|RN(f4 z8ycD5x(7fQ#{Z53A;@n(fQ1_Npz~{#fz{G`w+34>Cp(*ciZyqGb$=X?BP>5@-gTbU z1TEmrh}!iKPuhRpUy@xC+n1%K#EJx!MC6;dUhj9?XgcRL*Qd(eqO*%4tWN4mWfr(f z@xr^TX(QLns5)~Ywz~AcTTcHbTyMjvY;GyBEfY?esxhJHJ(_b=fosXe_c)1Z|qc?c+F^H5Rd4)P>>hBjts~~4J4%q$G93k~ z#?|zuUh}9Cj`}J#0khQ4mu|63>`|DNNSgU5%>}9ySxUdNovIB5x1&F)QL!_knfA_D zmkI2BmU>A)gUb(z&ZEQ3{>`UdGcPHG84||A_~t5pnvm9xnyCyW+(=nILC^TDX`Cz6 zf`puO+-zJFr1HrcZXkhhwlM|?Mf#U&rFCX-)E4K=dz&AToV@wC*2VfZ=TI2wh>#SJ zd^i>O7Ydm`K3va_mk4V-GaPe-_Kk^eMs!BjgtH7E3O7E4*fS~MUXb^S!7;GOSs4&t z2y9w-yR`A`ZxdSlkW}mYqtM8#*jSXV>c~e~MVY_-oUzS5V^$(rD3uUpAv_IMMeotW zZ}#Sv;B1ziz$X(SiGKLnGQRR|n ze^?{v5dS)2RK|BCUJm+BSP9C93(x1S5c8m}M>#LrTBoiO3e0gh5IYU{8XGxQ3`NRszqty2GCRk`Aut9?X*h{Ha>Ym>S%Af>0+# zWuyoR3~IqdPo=d|U@UUng*@BCqLZJF%U|9xBQw8k5WDBU#vKRopW``X;ACLzp>rq9 zS8m1-ltihf5@!S_?yL=H8AZ}GqxZ9c(Y+@9cY{m39Hwb-WE>z15qM$^FP&V$1-)8u znpXOum3=sDXd0@5SgM=~(GDDuTevI5$%j8+VCj~^h{fkb`(?SX#Ayf&2oUQm?k}ir zKA`kbUbCm+XLU>3ux~l{DttH|oDctoCnwnFJgL9^4POlh)h>y_s@8NKqd!b?z==UT zOif6Yl5!4E(1NHH{DOL%zU^=$rBNJ`fA3?qDHVa75g|5 zgW>eH?doXt94LXbOgo7e3HOf)I{k+v8QTgCe!NH&t<4J9qFUU1Yqkvaip`NkoP)aA z?uAWUI>>+6M$cQU&mnIC7{M!A8OVwQBmzDWk=O028meyr^=l>cn%_Q2=hIo6@6DjT zqx0?(_h%#D%C=y7oK0r4@6Btq%}(|6Q8nND_glbNzue^L3KZi2gI9bTyg&C~ z*CP;G{KXnL!n*G382I$O86PGSYg-X=~LQ2?<=o;h+=7>albK3Q*I0CzKg1n@{RaAXk?I3@V(kQ zoyN8XrroP2p`MrrU)eOs7njyR*_NHuY!Q|pbOl}YESo|-fwJ|q8`}q1XT3qBfx28Z zj5l@1Ldaor12C)Z1zoJyo>39~RcWj~Z3}%{>%Z!krv_^KXVu%bbsOu5^+b>i4Z2DZ z4dp-{j&Dij@3yW~P10`Z{c-b@BD$>CKZ|yX1W)BR6{by5znoQ^%`*A-7dM{Y7#1Z5 zk+c{eEGUcQ;@kKdBiG(OR2gPcLs*U$d2K-FbI%dE0UvI$I-A+jgwOP&08UMF8^}+y-tO53?Cl?KFOAq# z4QWfs@kAus&n3}%VwGl@Y>HI%N;MVJmIL26*D6)CuO12YfKuUbFecC^T7;J z3{p%*&xF+NiI|Z}|B?q7{4@r>ZZcCadtzmA8Q{%Yz(*I)tCh9&nTneL)I} zr@U9slw%sbGbCfO!yd6Lgh!?_2D{?D#5XF&bf%pZq}xAA6w#3dO86QLE!d~U`SQjZ z!7BN-ZoPT@I%4A8;nw1=;V3C`mHHJQYdI+1Q~nS`6HFn$?);%(9;!M6IOAlr*eI#y zAl(br?+EoQ4dXO$zh}%yyJRJ7;l$w{V1675tFl4g$S^6Ymf9qjvsMB6pfAMe`RR^u zPNBo)oVa2mK{FGZAm)PPw2FK^b~$533c@cK@8I7d%ybfoF{B!_rrK!_thRdX82Ab_ zMo)!k=X=w>g%Uv1Kqz?>)^h`>uHZ8nl+9HvFK*bwhMkCQwadtWQyxmSK7EKn6qVsZ zy@N)B(Jfa7#4 zj|-_2iC?wgy4J`h{F}o!!r9;CXr@T!_Y{avpL#ISBJ*?||H2wOHY0ugML5?WR+$tL zWw~#@xKTUM9$W=*BpTsJ$PsQb=1~V2e;_hkWss|Ckmt*A77#Uy5$F=L_sp3g8a;S1 zDo1mPL1pkNe)6mBr}r+Il1~%qfCk}~3%pQ9vWZi1u*r2aq;f|bwuY_XhQ5kVKnWwg z$pzVRYv=Pq+Bm`>L}5JTO3fNjY~5I~SoZ5m-s6z$`HS?{NLFMTrBN{=z**P{R*h_# zwk@6|$HVfB_!72?o+vlHfB}=xSpfGWq!6q^%XL2w7Zu%4gh4GygHVpL1)TUZ=bCA9 z$8U-CP(>ptHu*QbDXy2JFEC5O%EU3~LNp|7&!#F+a47Okp1xU=>cn(HL^)U9HE(fj z^aYi|R!e<_&&3lnNMV6||2!|n;7=^Ur)c^`6xV>r6$2Z8HD$+XXQKsJi`s z4j%z{?AmI(>nDA{Nw5u6&jFTkAXl|9`or$GY3Ef?*IP(YR*MVJfm3A&f6x8fc6T-K zdCJ@Cob>J4>h@-buiyQQwC_tFP?g~IA{;OBl!E-0^dttDjQsq@4-e^qMQFi~`L0u- zjOpb`J8+;6;5r`e=WUwH=e103)%)0$G0vCJK@+N(K8_WFx ztx>?|M)J{-FJA=P<1nq}eC|s1PB>e%U$UjPfi@5Ajrk?(GEDQ&&;Ew~L$1!O5p7-P z-YlyDFU7IGFSBH|DK%p?Qw_IKXX~Z?@M);sNxbwlq>wGyzTeh!$q&h(68_PUm!Q18dn4w zC7_IZt+K6{khq+Xp3)a?>I@>-9W%G_Y`ewmOl&;4XYl| zIUAc!_(>2axVNP2~g0oVv$8M{f9K z-dcSU(Ex?uF(GH&ZmpMw>KfMoZG-(v;$O(_UHTK_2%_DMs95q1>O zyeW!nJ-hoIGdoB#NUSY^93uilf>wDVxRiXa8`q4dqWGh%H%n0?B7QHt_Rvr_Xc!0( zVwl6v5dR?8Wz%yq|7=sk!-r*2IYWEG^^lx0<;+Gu9fZ_moQ5Di8{ zBWKuXS~VXDgH@P_mWWztTx?`Cds8DekPymdL3>*=Tos6_DGltg!L^{u$}$VZukpYS z&555fQfcR_Dhv`(^ick)BrHsJ6RBbgj}MoFB`C%PE;Sp+kmphDMH%vOE85I^OnT+A zKLi&Z3@(1akyC3tR`>?50Ny8DFH$O~oCW!)$=J)(is?%ZhnX8->4;OABJ73l?CaPS z9jpgLJ(CcFR)`Ww)FjktOXZR~D9QEuGU(XFzFO;bl6g1Qt%6EkEF0=tPqHxfwVwhlfMaF2vNJe$0Qr&_4 zNHpthzdb?(&NKvi>W}v1rHM5f8SUXBrkRc~`5>e%nJ$rTK=0)%4Z%h-Z1cxVfm#QLIMa)xKQblqew7k^bmKQK#5OJvES1J6~7Ns!UOhuWGMh$8n;kGL0?dSy z{FHMo&1B2Oq|?P^Ki&V07@ap(;6RTofi*}`ONt}7$&INP38i3T?7+SH{MEvhlOn+=$4aPmm!AjH8wqMTc`yIc14tmwyUuX38>$A2jqb?=V zA^e#48$s(Jh@s5Iy}sim@SJX!PDCgR-$%Y(_l~n-ZxOxokd!C>D{iS#JID#$Q5=h3qy##FWdtSjoXCg0F&-)@z>TAs!L%f)FG)Tbz zYBz(})VA}13vGKuhz}pkPLjx%C(&u?oj2gxc;lp}h?g|{kf9$MOF;V z;qLWC-9YfO!fI(-%6WkSOYiDf0I74A;bWWs!`4o(DT+VSD*u{)Y?H6|!P+igpXwqA zsZ|xWr`Ad1{_=dujlIA`5bLXH)}7x#lPFt!=q=5T-M#`R0f=WlJA9Mg6DQ9NhJ;EmOB#CDWWP$z2bFAJbWa_`+@YN~C z+y>LO89mU9TYP(VUF&i!z25`|5(;XMg)8@Cy;L4u`DxFGM~=@)Rmu8KPN!v-D~ zqQXI#7{wk8MiPH0pB+XLk{z~ILB&r3DoWGoJk7l8_}*Vc%PTdS;oGBpe5jF(N4>4| zO#QZE)&$h|)&Un`4N#Ur9nqLs1Ls!68#WaT@vLZUG)&fsDCnRqL@{GttHB3aIh4{G z!S;uZy|oUN03M3QEgB)$?jfb`;QNj{@}t(ryFsFK`?g|}MKh|**=%=q2@ccgHC}1H zYFOP+9@J#IcZ%aVrtmT$N!=J3V$>?ASns$d!MT|F*ffzAy&Mb~I|&5|?TJTUb|QG- zc_Aq@(`wX9=n`UPXIhc(47pSgohgcD{35g>)yn4ckSgi8u@rI1a7YkzL5*6omA0W+A5d0BU5CtU2uNYqNV&GW8p9gyGI~5@95xwS{R2x} zi^iVT1zoe7%U>VdTlNDR@PJJ*M_R@=1%<5x@K1A%*jWzxl%MU!fonC^G6?PzBE%Qh z5i#|;*|qdm={QINL)Y{aQ{VXe>MX z{TEGp-8;NuRlTi>${97Ad27X5BJ|=!Mjg#^MsEIketo+X^rVL)TitL?xr#wtO;m1% zO83&u*m>=A-S0IVJ@OW7T8&>vHBc)xpu|#6g+MqjJ8Mp~5Y1_vM6apQDEs*xv`~b} zavIKtrraclogq{pBp)MFEY5@mg@}doH_}N~4O~(}R^lc>1m++1;mGD5d;j8m6fiaP zw|Xv45<;O{&z1?8MI&|zX=&wxrdbyseJ5B4!;5c{--1QMq2YId1I@1$*)48LTd-KN zoQxYuFcjawt5~YocAzgxM&+R#`3N)>p)bz7{jK^4qMGnNUvqkEBQ7D2`hqHq-%M&= z;fum7_n?fnsKezEj^!Q?XCaPqip;oainT(gMfB)NIOK_>C!=g!;f2-re3oh?amVvn zV`5wg!l_z~paMg*%DFlxwBsbnM`?g+;i;U0n~!2q-F^~o9Gsi#YKyw4m;pOo#sH(4 zdOaMA!mIvEDBaiGTnl#Yw_!#VOTWlm-|G0!oM$%IrmphZUAg&K9H~_A}Zklt8n$e%K`LWxS42XJ(Gu4x+HNHno$wQVKS#svDRLg ztLHb^)=?K>J`l(zb);Cr>z$FNaJZkmmx2G-Ks6Xt99f734rx;X=#NcH+G{W>|`UT&_bwOSM5U-Ac5@6fXMr^dtAE;`$-leAP}oM?Y~A4*U&|B=+ zTMu=Q^Uv^i@6Px;7CY8iLpVdIdktm}q!$n#`0ZQP3Kr_x3XLP2PtU~Pu zkAI2g-rrf?+(2r$L~l)4^x}Lz4o37VT)e&a-C~P}XLp|TTLm)Cr}Y|nZMzX|j@dz7 zRD7iboSPz+T^8#Sj?HZbH_7~G7sPIkyVhVa7ZAH{UgYB?3 zvS0WoEXaf^d<_bH-KX@$9-2wu5RiCr;oi(Bd#_tIU5yn91kycrJATtByNZsw8YN+P zbHLPfvQGI}Gu!Q4989^!Ztp43+19*4MkciNOF@uSZWA8>PSfu1XOSXn1I0l~`GM%% z&zm!pIfDM9mdlF4yD7Je09mI#Uf9fCg8*n604k-tIuapHQ@bZnMnDqU_V@7@W2fJrFvnbx^@l(p=8@}vg$Z2aI@fO;pn-vcNst>SPb1^b zy_s!kKh?mga6jqOe`3KM%;AAVW74r?;e9e931h@p{<@E%uYFrTxFIRK@+TA=cL(qr=0)hbLnGtw<3GAe;|u%3tfdmc2DQ zvN&&mRne=+)BMnR^CG>{ZTsWDrti%&zz&xq1YA1r__z7aqQK{9E7_|bhb-$4leNyl z&@g~PIDdTY?4EOjV#{X#@M8}`b&B`d5KWO2dqdU6XoNK7z}#>l>`Snp+C~F-1SO-1 z%{oGh4lj0j^vJ!>RYf`ys{z&zAt3{?dK%~EBL%N4o9)IH7f1`W5S39DXM#LDQdo$& z06|>cikF~*M8Hmg5x|z0^^_w?AQCrHSi6M>+4}dNt|fdKVdk7afKDFZIVko}-S?OB zDWap%>Lg0n>F;g_nnCaxmko$)T55^z)1H56IG6BQ{Eyaa($DbW>Y19$1J8QO*hC zGUJ-TUuH&4x%>+dZ`#g_90Q@k$#0N9pNy<9E3v)WmB?7Cm@?X z(5mkXqlu0Zr%OUm&4;1qXAmg)6MafhG)LlM1)(WCWpI}}fdt1#YKaT<^o~#k19g3v zyj=X=4c^7DHJsC_Vh%g$GM$v(;V{eebtI7_w?!tPU){+!f@&}!31E7Z5ION)uxt9g z{~n=UNM@L!TB9x|ZXJOHsvHj#L(?)GHlHWo$4nbbfZkqmQBEo;AsTC7hC?+1BY;~U zcb#q#>l*_-?P$V?Rfda^4E*p%aU=R!a;Eo*o*X6f+C=H?n<&TV4q<$VXVa-jC335Z z_NC0rPE^R;)S2cbbU5nK??MR&-YsGnY7zbZ8YH+iB)WHM7*tkRs%>phz?BdM9#SdC zqoU`xki&EF;Ns1cP!9*Mq%4Jf!BR4gLePXBj%V6KoyXsutySjBOB5!qz?AZ({H(lA zb4nuETs?ZEg2mKvt>9iMQWui(y0VgcBdN+Er6o}qJikU(KIPz0#!H2@m`8w)v>0+y zgKeew;nOCO4BlW_EEf{@pm`Zld1`rT0ti!@iyV}AsI}aOsSA8^4vUDLSj2KsW-%gx z3=1{_COT4cd=<;kDp=V3IGNPp`raU|1dyq?nRsQvX0vY~sc*r!f2d-FHf1}kxRpLF zSP}{bX%Js432NhWb^pkfNQ0I^M59Gl+av?p8I>l!W0-&p#Lys=nNpBya^k7adl=^7 zKXVlm;TAGrXlIkb86ZU@E_60k1&+^H<~6Zl6;y+ z1pX_~f~I=W@ef%jL;c?nm_uw=mpY`CDJ`71BYuLplZf~UDU_0;N8jKcLRp9)a!FOT zX@%+p$Z<+vl)*A&U0$@;;mCL0_Sj)J!7rOLM%B!PF!9*A}rRh*4>CPg!r>>mz2lSC$Uilgey@d89e`RFq{bu zf2Z#-|KAP3g1r9R2L$K49_0frzph^_0!s8J;C%-~n2zf?=Sxc~rY48)K7DflUg1+w zK>6?{Yd}vn^4nYE=hdzcuHGZx_ouw?fmb15hWEQ`Tfcn!ist`y9yrwjGzq>uD!)X% zY>PZg2V7S_35yk5Y&%J6+ z#i#w4SinceEmilhZ=iS{jnia=*X+&Lxx>1J!$&qB454OW(BI1ff|{Xg+19o7rJea_ z6_z(o*K9`Pu%7}}?^r1JPUCxS;wx72e{_m$MS8+$FEcB%vfLhhb@cLf2fF=Qjs9K& z;6;~!$;FY88QWz{>2`@ol&VJk9DupJU8>yD-r7q^Ah((P*H!rGLIT)OCw|_EqWtUy zKM{*xZFG6EyA{`^71f)BZZb7;>x+SFrq!bX42I(&81_9~r z_+7v6`<~x9+u1)m+s?Un_qm_v^Tg7hYTyvx9T9&uh~m)@kS%rAg_ zUIAn^U|?V7z?;;&1fypIwO9bMing3Sb?y`mL9`A23)1_iJnxDm&k9N42P_;PFJ`U9 zQptQ{*DtaOo(x=g0*TaXYeBZ@K=}T@OkF_C*6jJ@(6fOY53?v$wFP$N2uCuWeoDO?mJ4jQ*MFUf%;5$+9 z7^b;C>4>TqjJe^wfz=nyKz0lQf#AsS1NA}z!8ye+12X5ouJ>Ncd8lNZY9pkX>Os#} z#7U}YCqd*4;Tl?OimY>FG-aR?0WFS%t#g{WnPgHcVvE!`UzYVyO>D7{%@wX2H*=w> zB%nGtUWJXyB8N2zPZk6&$g@!&&*72udX~-PmA%#a*`0u48!AI4U6@diP?%RG1ql(W zi@ZoCW{ltbdq59qs{47N==Ma)2;H*_Lk~L0uL9;G3?Utd#L@hwC$Z?0=?}3G!}od$ z*)3?V`H`lucYianP#OCn8T)li$@)7ARJ2@VFOG{`m_&C7fvB5uFT^Ho%Q<5`PZz3( z5`V<%=55NrU{Rv*PQe2k<^nrrk9*k`?ZDQ|SVCOb0Wl$o_n^m60HG7;%D@;ziY2tW z`si_bA#z&3nkD)5!t|1CAuNGhfeNqf@ywm1t*Z8iODH%cs3qo6Ndg6jjole>YBeqB4zXeRaFJv2SAG7!dzd|X;*3@&j-3~ES zC!R}mLPakf!+{ZK&^k-=K%{^3{t)cLm%p2p6wuAGmw0AfrIMq5q#!Vr6Bibh(y5k| zkY%MRJ*HM>`Gt+&Dh)Y1o^4F!Wm_-g?j(f6;>sd8XwM>b`G!LSR1$cBMfRq#YOVnO zTea1dyaV5p$xKR(FS6@ukQE~fhc~)KDq6{;L_Lv2L-Bhd2c-oZ8m6$J7G;8GO*Tvw zj18rF&(4u{QTjE9gn@*GEFNc)2j^=Lc1X}@@?rCI9HZ4 zgMvN06BGi37{cCn{V;u3FTx%sBmGgzE?ifx&|MarOIMExvsP(_d%b+c2nQ71auXAl zudXP>+`wn%KQ&Nx_I$L!1WyKg2lx9D0$7Q^KK1&A5xvB8PGE;Z$xGvwej6o28XC96 zYt>$D@)j{U-}UKc-22fK85fk;#!npjki=(#RoFh$gM|}H0||U_7-3j`zSWA&5LCLR z$N)1)L!Ob~!wM>SM)TCUil=&{7^69(p=t@MAHtI(2xE{CVfW!=Oq;_Z+;*^~o246r zEbO*C@JkRmGqa^l*x))sg`!-Tq<<=Sf5Wcw=H-*IdCwgELoCUp2fGn^t~jva0KFvo zWt&TP;>DYIP+SG6wOR3t$z(aEK@m!Sas^^H1)e7cJry}f6LE+;#st_N{H;i79Ay|Q z7#N*o_}^E4xY@v2epn^^c)3kP2N>2{?*t#ivH^K+nX0No;n_s!8g!0p`C+hOsTDt5 zW>X0AGRjpN&5T8tuHLc0b!ACuNO3t~&-~_B*~M_(WA{fV{>S%76thDkkV79`{e|tp z{Z0G`!@yzr++geU4o34Zmc!4B#GWZC6De-o9V{mH(%{k+5)S;nxBWXE`CHqlF7yZLRyqm$TV-S-%KZ z&u;$i8I5DpE57!56b=*WHCDU1eSU@}V~{=Za9-HsOXA9S8%7>dAp~*#jO&o(d7NF{ zwAyNS+BhP9N3Se%vFc2*1qsJLidpv<^T&47p+cZs?(QR)9)OMT7rhTRYh9nkaGa|2 zj>npP?tqTYVO7-_?fHLWq?C-)kNM;ME(~Kazro*j_RgH4w}6)}5+II&a+{CPm$c5E z)I6R>Eq+4)a$f(cTQ)1t{hC=mEhhGA%NpoXiaR|Hp8bcg#pTB*Ckd6U_bLA+%H(eh z0vR>gz_GtxY%KEwPf0U2j7xxgS+ZelxnYL?Ez&CPvvGVtuf#UAB<=`cVq%|8Ve zfNiwpP`dxHafajO> zXVCNRhZ#lK<)-2A;w)gGZ=aW_1+79wO8B-xt+qi!Q?vacEDW!a=P&qGBujB#W6p66 z!rUXM1Q--^(6^Bg)`QT|_|fMTWV-?yj4s&$1iNBbB|IF0#X4GW9ZZO%07-m&LPa@f z7iDg=i?Qmh&H{FkH_sRs+cYHILK&RG_nE&yc>@L2UUE*7QIgvWuJ8ovDGM4rDjBQA zFNujSvCxZ}q$p-pq5eWPF$?#InYHPKANWjw52KXxYb{MF3|YlF#V@Fx z%3dD3pc_4JA62%QWD^+XTGW9Z%(^(MjfiVz&x9Xi9^8oe#;SA*Ed`{-z&8gjDQ9&F za-{z0fjiEg)5k!Gl)HeJh$f(DcBK;yIitrq{Qg;H@=8gqLbPoP|(sf~jF=dof_Ey8!2lTQZjk~hM>gyG>&mA|@l zIgoiac$PJ!ydi0e?$Phx+_dK6B$Q=X|k|x%o*Fu)*PA&B`La^DTh9im@%^9^Rsri0DwP1W? zpTG@i$;hORHpI7+kQccmjJFhOWs*DVR#`nGhg9OXf1X|MeQIsm_w6L|Pf-wd^|<7= z_B=d^1o~+gXa$*Qx6YO$Do%W^>~@&Mk$<*WDtrmHW}YldUuXCcK{!DN%^(~4)TMKzO6Y$KfM$`A^j-bLQ(fW@U_~E zC!@B5NKD?G!9Byuv zf9PC20O-`S4I*Rv2iWUBgaWf@bw-na_OoRk*cR@z#|&R)>6hF~tU2B;9iy-D6PVL} zOnupQ`Q`Y<>R~9o!Svs%7K@1+uM=Nc87F7wc5>s7?dv@NWAD)fsPFwx1^s^k#O?RT ze^WEyY&L50YIpR%)dwn7jQ(krPfLyOBTi3G*L`nE+2 z6%Adh0wxDD+B_UHmJAh9Gg|d_yuD z2E|h9lbdn&Vuk9GGlHX~PEi>W1sG&|F~ZRFg0wSV2z*r)$UxXAKTQG`O1-eD07ZbK z(BAK#4T`ZRz_-9Jf)#BVLsXEpyZv~=neEn9HHQ@Wym0eTIa9`gm^j*R%xj=XS}s1* zSHYdYYlrRbJpwlP;A)O6_q17SaFJ>F^+lj zZa=$wnxVJD<*zZtmS6HQl-#PPwf+~tD4znER z(~~Bk#Jv3t0Qj(T^fCJIAj?i|==-E6GXn7*Wam?gr8{xd#G)2x&2APPR}SHh?Dj6{ z=cnw!JWvK$I|&c=hsBYJ-=ucNfxkmk&_95Ozl7)yKKY580*)un8&fnLCu0+HXDlY< z(^)~#MLuYK5TvKd%(Uj8SU8!utj6W~24Lm@2)Gacr*u{Y+{J3X^^(;hKjz6j}~7`ZFjy+;lX^LDo^x zhHn;GWf@Jp7L^C36jC?shpY4_CFy)sW~Wy*m5)bCvbbP}#&;;zRa8b`8QP-!>gZdA z@bK{FRhy+x6pYv8X%LkQMMHWz)2z(&VglDXW-#Fc1tJM=)T7FVb@8aFKniDMVw33j z`5SUi8-nEEVUe1PC7W3^_P(J(q-03idMW+_w!Tm^LsN#(#HPf%3{U2tCPwF-x8m~V zQQdk`Bvdf7C^UV1k32gzyRI~Y5{DWqfl`5a27%VciPy}I#vl40wzCK zq#0@$r5Q1qBTPcy{_M0h6R;JqHDMYD=fSXxieVc3#iBMOHMAwzYZyq=Y)=Hq(Dy0C zR0FY8fYQN}OIIB#o7EZ~_89LKVy&PZHT-v)@bV39Hv0kHIt%*^_Z&1fT-#(EB2IS= zKb1gRV`1~+4ir2Bvj}+iz(O61r9v8NFyzao0etJ(55?54YNg{Wvf449ss2v(7BWzd zhTP7SKh8_o$mtm)<=)BFj`z5ZA;XOth*en;LSJJPc1P$v-K=|u6?Ce{132Qb?3n8< zzNqPOTnV)OMi?+ue!Ta3*7`V`|LHQ_VX;~$>2FVXtCFyHzu;mW zl&~WR@&20G0mKPM893BPFXWs)NL^o=8+o8dd9VYRB<_2%S$YhgyP|2s<)p=uF(u&fEb*q@daO*s-i zxU?V6?LZOwp51EH&pi5yqwTW-UA5>C8ZYjpTg73j-$JrG&cz=^B11oqXY&hAXKnKi z3dFU<_zOS#d=TDF|Z_dg8Bdp zO%qX-kC%7CkAU>JlDpBpFM;3x7^uPl5cTgncaCl%0B?(AWJU+C!hyuFlQXsqdsy%P zUwVqL;{jlL*)~|eV0X2sV|;w~M`;j`Y!#3kKNG)C3MYI@bC?Ctvpp+BV*b|&l|ZX% z#@5K?1KlE!=moTj0Rv`1_P8(gaiV{$fi{KL9e|%IkHNUtVJxC4gi3YCwVy8Ql$N?z zry~R>-ijyPQ8XFeecQldzWprK@-q66tTi;lT~Ezd`g{?;^%DPICLiDo+63}NC14qT zCqR77!}S)B754P@@!zjdd;g~|5%%%8i`;JO&AH3)ziQNfHe%rQ(rfu?Ai`;Dm=*}t zAtxtC`A53~QzQ^)>Ztg5ZAy8%=h3Ko;TsNAqyrgbnVFeqI5|v993ZtzI6A7(+bDHf zGNx+tMB{ZA#i7dYlOhcBAya&{Z&zD7Dyf`*$EnB)PLUYFJ}O)(=4O>2sf(6;M0pmC zz^No-I@+9J`Ie4tNR)tdmJk<`eWknl+39r%3?ZXoN)sdoDrKNh`bK8Wf&U5FS68o6 zENS0i0q1L%#^F;mg_v?lLURkEVx)4KUwVwbkZL3z#1NETq-URHk(>V>ft2J3g)8b; z**VoJ*d5Q9eJ_*^#kU{v)3s(>@A;UD6QH~{d5Ef# z%AZj>O*OQYwW2aq@6y~|RWrWXXVu&MM)Zx^Gbv@j?r6gT7`>E6F`GV{B#zHX{HXXw zItq&_WCRvJ@i+Y#3BI-UWl$yPGn+D}p~VKrC!Dy79KjBh=Mm}^y;X%TV716T5_OWH zq_2SrkXci=8W0uE_mCH|%<<_RIdLIE&TD>!mie`E^V&O>`(mnZ4B7?E8yKJLBGls{ zpm4jZxo?`G6B4R9(J{7=l0@1B_uch5kZr^i=y@QjL&)oTQARikDjB1sCaQWj$;4je84n95Rw-y!vIZ%&2_v^W&H3Qf&~V#Y3|h2C>m1m{}9 z^%($v6z{wIT1IoNQFqpn&hznopXyK38m{@CU;xCk~fs(Z9gSHz^mq;MtW`(*M61&A61_5yzuNu+v1m8hkGp@Q-IIlHGq zZiqc{?uL)65nn^Xd{5@Gte8criGwzMb&BUX%|3@o@tSNr!|H*6qd+Bga-4}&NxbdS zZMRt90lQ_L<#WTT@%|MlwzMiL)_5G^H>Mhrak`~cCOq#$w?Hlpi5Ze`PWR0{vBXFXnwaYi*AjJvrJQwtIct*iaT2z2e zg=t9`+t)JxeP{bIYThH~dOM1h-1T0>?1ybn+9%&&<Yzywv90o5Q4xj}l`oUMC0h@;d55dS- z%!=^nxcq#hI>JHY1j#=2xB!D@#0?PF4Gl{nyQhnlxy-lBO}C3tB!=5LpR)%2oSH&p z9J73z86_H)iJxEm3s>)kT8zDI^a5c}hh zJQVdOGuHGiwd`K9_m;D6^efDNlTBc;OMvD^3l!-vFnyw^Cl4J zdbM$vS)G`e|GO4Ip>PP1fqk9Whl?l}kmP!^1CGtv7__GI>n#GFS=H4{AO0PkAgk`k z>B{S0zz6Mh;d=bY`&s=UEIC`oZf3NP;>KA*56t85=EH^$We)#<9qjWNdi2izP&t=`hIjq=!XSegR zXzcj@yJJt&1BDOz$+|dsQ(E_rk;~t<@kcyjR8N;)Jv7S$rJ=wm+46An{_`t;)%W(5 zJV$?vvRw|Bn%6}S|7TGPRDk_&@$Om|?qF2r|E)Ivb%Z31J1_Ww0FGhhcd1S)Wu;N` z+Qvr6KQnvg^26JQ5mc*ghq`ruUDo69Jvm!!7m5K4K5(`kpW~M@GJDCGj5Gxa&5TBD zMvAt<0~}w@z-*Py_K^2_wBKx1O08>bgWlQ(s7Tl1tIN7DXnk|fT{0%hH~FxW(vc{g zffyqAWouPaZvOmN4x(&~fytVq-Us|8m2D`=iGc$zI9}fRxsY{m;)2NcQTB0AkBQlS~0t#k>Ja!^bDAK9Q z$V64Hun3Bz1|*pp7$w|711CZDK~t0EFrEDkJ;9EsBe+danE;S-24$mHyL}#^xb@7Y zqiVcLBhiF;)FA$itPWXU*kn-n7)vU&`aOpWSdK^!u1h+Iy9#$(Bx?d|?iF*Te(Zs^ z7bNFYy+xtLbMM zx|)b(a>kIKHE6T(-b=c-TNef%Yf+AbxG)L?PLu3;I6ZIQWfLk#R)g7(!jp;jjyE0MgmmZfy`tEOdV zct|Ej8k{>?K@xSVvZVooXjIkVkbcdLc9_&IzfRmMWjWTswnLC zK*}5(6;x@W*Zv!uIuyU1J5w)OjtMzrL3aV=H1&It$~5I@W64<9!X7z+BAcHlPH)r( zCRQD%-!hSCBY9p>NlXeCe4i~z{JAZ>%|?M&nfeJ?DcSrRMyjgP-r+d5xG6<5Knc~v zmWz7&j`^LW3uI&kN03JlaQv?2g?woVg4@6-OQaIgpa+ZNjkvbcI**D}HfRYerxDN; zn%DT#w4Yzp!=mKQk$u#f)h#YTpb9sR6M#Pzpk^mK#{q=*D^gwzx~oL>SO*Y-RaJ$2 zAeqEm!yG0TFQ-Z(q70o^mtmXJ*=M6*reO;r%~8VV+xuVf$O6vH&th^sf;tl94(2ZU6#8m2@sh zoYL+K-7#d+e(`w)zQC4h{rHlNJErkWiAoK>d#^Lc!t(^B3T(G`nH}ByvJ<1+mn?p& zpi)Zfw#XeiYIau96&Q9K@-{zBQ-6i!{stR@BQP@cQPxS(HA8q6JbKvpk5EK-VoRN= z9^juHpIzqm1dG=mtbfjMCRm&%-Q4{)e;a&seLHooY;cH6I6>|(lGaSQwfSJ*bE><) zvOTx8Um>Ve)9I7(xUW^|dz&%2)~bVA&+t8pIaoEt_-6jkrAXoK0)tv!D>|3?*jtC$ zXZAH4W9yQ!lhU^{u1lHk{{&JtlRp9NR}IY0zyv?N+EnGM7#QulZGIU1L*xvSw{Z6S z>Gyfdm22SIaM*X@NPoxq*P^ULy-#!6K>GrydYO}oNbuKIz)$wYNBWPCDi=H-XE#kf zN3^#qaZV}Vf7h`woJ~9i_NJsS4Zyjv?gW*OelG$FiJkXh+z7c>u}Tem;k5mzPnzxj z$km$=E1bQ(h!f7({zE{pQ!fVB^3IUCj1(R!G~8%NolbjpfZp1CJ3{LE<3aJ`<6b9U zzt|eW-wFiCZTSmf<1%Q)n za?0z&hra1$zX70qHT7NL|7Ww*R{c0Vs#3Vh2H`gsDG6`0QKN6u@XuH?-<_i7# z5MP*_{lk9>e-D4}emS7|)>OXA8rYKksyq0qOwV|MUe%C66T@}=9jftFI*;TKDU-(+ z-zMC)oN=@r#V>AR!Lr+M03 z)9p{7_)UE8EgGK!ZOG5mC<94H|H1`?%)utei1d%X6kFLIhRaYWb)#H3zN|EaQK?e{gkjm3hma7H0`kJBLa56>@~dwM2Z z-yxF>(nkR@PDD$=S zp4~ z-bStwtC=I1cBX30-jlbO{WPhCkXWd9M73?>5wkGZurh=8r8(ouiR~HZ==-cd#NycD zdDvjRUNZzf-@dM|zA4vA6wc*bu3Iq)QW^(fA@gaa%@+1uOy1|cxRXYo&Uv4cYr&o- zSFo1mrOk*-Db)~%wwPQ<`Gf4^n-p#uW*ZQ9upRYtB$d|YprvY*DVNY2v*g? zR+QZ=Nih*pGR)qzYlQ;J9PaFD98aY7N4>{=fc1NEu~uqGi=!HY(zt!$HC(hJ#1ooX z1#N&%5xa+0bZXc<|Lfj_>(in8-6O835ch9HkC!l9#0I8?iJA%3awd~{?M|ftoeQHc z703PD;&7BT7$PLxKu8MH&#R6xq>bPcfg_1oi&``TD1|xh$0d?v5QZz75Mr;W z24d8V93!Y%#A?#dbka5bQlvNA=yW)G$jSz55pbnd346F)JG^>ljm@hs7A!*h3O@7m)t!1(;y z<_FASA5NO-{35DlDek>r^ddyfmR@b$ulwvJthXt^oOUns>knrVHCwN^_k5>a8IzYU z8>?Y)`kDIVc88IPPI2Y0pI_a09uWD^oO25%Ffc;s{|vuA;eIHe?Z^Be`UQn&$+QU1 zO_U{JKFy+2l9b3&Sp&S`0psgaki&xZT=Jk&=FI=B;|0w|5t}P3)Ls@x`L$7K&`dM& zjmP-7pS;FYHN9MPnoCZHRqY|dekL7pD<1A-N9eY7T=4gB~i4!OP`jjGtGx-9pfw=3i0(QLcFxT z-3;oM>tA$NH}j6D5TXh|&0`G2j%EWU(o2qnNQ!D|{~n&_etyT4kJAs2U%L9&9a7_0YpZ$2ZwY9uYdD!LGy0!J{QCLt=u>K$Z{R7X^($d%q zeMdu`ipomoOJ1DQKna=p`uY`M5VgITC!+d%{DL!-gYLeaWNgHmfg8BCDTh`4(kLE0 zQ`TS^zbH$Ls~s@%PQ@6p-l!qI^uA%7 z2s*~J#R+FBRPieH;*b&K4As!d6No8{|3Ngy>&b06b1QR0uR(|rAj$qYtY~aDiXzt} zG4Z!4Rft7;hr47fhPNai&)ZQKiT!}i7^tnSP&a&)B9xh3NxAIp(P%V1*f2UD=PmBr ztlyczZUjPDI4N$O9F5|+zIi_F3Kz9J^YjcaNCYZo!=;~ie2|p0ltWn;VW=`SPbW`B zkxlK1a5L)OIVxktf3hMHN8LjpX)n2lUKunEKSm4zG} z@vXWu+DVnWZiJybrvAZC!D;|kMmlBUj!8zUeKjLDArCYZdf^FY9~U9*dV_U+%7Qq| za)MQ6ol|f+CR~8m>XEFRqjRIzh~a5cGIl{N|E`Tn)?+HuLd9<{a+_bQp_m4M*~1hu ztn9LK_Yt-E<)V4BaxwNclp#F~c0>gp1?+ipa?ED0b(!rT!OXZ*A#R~oGa6%sx$^Q3 zH@eqeTr;IFPfKxcGHCq-z9bCJeh>MaRYh_ptrkaL^3=q#T`>NF(o=!L?uy6wwfN5g}*xl zE?Y-dEG?f!8pPSrluGc`+8EYyHHcpPD+c*^){}*5WS9cDSemu zRF9gXNgzTyB^BEWsVL76!7l)rp;BQhelar|{W9}a_w_rwMo#4{J&|1k0yh(L@eob&CeFVyl-e2%J6cn6wQJv0)2WkQU%i9 z@fkziiDiOQsatU8xO1Q>CgLETbD^D@N}OE^?N}xKvL`H~n-wf-TF<_&&5%@kHG4RC z-};Lsb8|(mwrKD0Zb|i(*x`v^qrbPy@~g|{bvDDL%V4@>f2QMQQvhIJHv2wk?lk#7 zaK$zr9eWxaKG89oIWelYb5QKvzvr+s_q#k5!ohHX3<+mB*1tCOw47PE9FM(IwS7IC zeP>bq`JHaPWeYBL5F&teJ*(M$$)pn*FsOb&k!BsU%M|o|@?QI?qLyk;{g3R0;Ck_X zcjDmz?ShGOJD6XO@3LdH!?Z{R&{i<~Xn5J|_A_)@TU~1NsMWQVx`i3bMGNPIar$dhrttA3Oi{hLypXy+~XHobk36UC+_eQ-^d%2o~OMPhK?D*!UIBi`M zlYBitebwR;zVl5!PW$#mET(s)rkg45h$&QX#K&3nfNoW5g^=_yYpuD}Gga8Tu&#Uu zfLA`D3XbJgul9dK@gCL zK-Ga%(tUga!EXl$cliqbSr|6NP*Gpk&2lUmCl$i2nU)%-C5AO-uVRVApy50%=|xx= z4x!pe(#1~|`HJJvt1)ISIYqk@^p}WB5HXxHafNv-Hvy>XU_z{}iFVR>dt5u9mQ z$k!)B#G01$Q~|TF5)xPuXoTfKGATXI`1iL>$(aVyuuwTmO3k9n35>fcSHFh3h&B?+ zN+Pb}Req%9=`{#<4Y5?4AFn{0xRV+|tBPYO!L z(TC|;-!w2yznsFD2(wiyGi4s#Q9v!pAyZ>EmyfkYur&p}R?r$G>loz(@oS)Gp{2P1 z^iM|y5G%dW&@mQ9Pfsnyq%b8=Gg!Re4WG19lZez{RkmkzY7jc>PbXPq*TaX7F4ZHF zaj`m%g^z8{NTq_T`oDvx(}FDw6OXzR+Nt8$Y@P^^najD#S%#PJYDc(5nT*rpCi&*7 zl5t?DOKP&F4dM`?(R!$)HsGnt&KC==DR7fEv+QG5tM2kaMAww1TOpW2oei<-aCr(J?bdHapMYI&#zV z9p{rYlZ1zr;d3Do1o4FMez{I?;O9{qWK1v79=7wZ8ExiXp7G>uIUC6@onrFcF=E; z-H!z&qgp*N=oDP|{0PNf#j&^Qnw-RMMatx%B4@|rQ59a5Zk1=L#n3c+?5AakbJ+X! z9_eoBBI@L+#HCrbdNZ=CF4`AwezNw<@@l`acrj;SqO)BH5kB~lt4o09P3jl~Z z@VY_zFsQP7GJ#61+BFyV6$i(Vh?0BEa~tNjGC~5YRzw823!I~G;smsSZjKsKEs{i; zLPA~)#j&=@9(A+#5|d7gmNpXK{o%ur$-DJ{|N?Cb-Au>sS_Wv_x{I7v-__4%wmPrgaJ z^A?TAu=A&SX3NmXjDoA$i8cnIiuIz&sg0R{oedz>Oq0Sa-mPc4pb~W#;?rwqp{^mh zDG@>MFbW<^?)|<#(bjXm_~GA`otEB}qC07XE7u#eRK0xKkVWlHsCZjjnRPkn6l?Lz z_oUf>c&xgW&M~#I{_?a@C(38b>rnmRC&q!Rljh0AE&c2;!-qqG%Qe3K7yTcXn|(bO zx?BApXqI0Jul+L2%6b4h!w6>jU2k9+lx)HEpPqKdh~doFz0Ug(;d6tfVE4)G4>1wT5tv%U zV?zY_sw>(80d|+j@^4MMEynWm$8M&V1fD$${mno7OzD>NQ+^$8h?nR1Z~2Kn{IyQj zH=q6LNP^xA!IoKi$E(U1=(%_pfqAxcT+`U5?~%Zr9}VV#(iYuN15tq<{*rDBDVlmi ziN5dA`PSH_ztMmAROdMDz56{mnpDFzdN zI-4)Qw?!$O=r|&@9xS<=H7zK3cMBld<676%V#LuEP*QYKJ>qqD#BhF?mE%SgErbry z0;%ntvHDv(u<#$bdAoU+)#&Qp#CJIY7)y*elZ-;VU{Str%Qbf0o96Tbt?er>hu1mu z-Cy=+pBgp0etsO9ej6FOdl44i_?nXB104)ljru=iI<{Pg{hK9K+)%vVE8%3^uAK-F z1ErC@)Y|)^k*9H+`o)d9^pDS5?3Ye`1gX44EbNFls)CAwa*zpXt%?y|4n?0jdTs@N!!JsIPV3b>MJ zinPNLa^5hSF-oi95+@In|ALuf{?Lds$9_gjMK*!1cd4)_nTA7bjO$zCrF^E1R0FEt zedPsm>M6@veO*%6gpxv~ontJSNiGIDTL$-1fG8t@kC1JFZ7KmpmL%eSncR&K`J=FI z4(+9lekE-@OIR{4`zPf+UV$t-+__i^{PW~0p+79ef|9RTUWU5AG6$3-Qp;rh73KnP z@blmfz7VF*!S#n(-GP8COXnDwJQi9+6UD5MYS;r-SW> zX(?l5f~>RpIEitVA8n1BNWM`6%Y99{JJDo<2KB^8i@A1#ELz8zE+vn72lUx2F$()E zAsSDajBc=hbzXIDW6&T;O`3t{RYTA8%T2Y&oWgvJoef>uMv#JH4XAjPd&Q>;*OVkr zv;a1a|2DiH>Wh&mDZ?4WDxKs6>BnF9B2D+y^PtL7?U7ZtPfAGo+AAPQG#F-+Q;T#8 zpxEc-SY%0!VjmWN;?X9a@Dqmh+F@Myn^TgUpsD(*97c)Mu*9JkU;&sVjSiA+cj=zV z-{a*<4J^^*QoseMQR)yQHogkMuvE(!8JagEb|>I_l`62F)@i*N|?!ceLw)u2t&-suV) z$Ni(rJMjHM4s6GmNE*nQqX-WWqBp;Sg}-sTd-}ufkl8w}l1V-2w+UCj9*6ytlv&-P z4nE5FWGM;3g^7VOIWNCGXJeQ#h%bgFlhPj2;3TzCu&0usZo7H%VK5TWg}9Q@U4K%y zR*)>g4#f*~P|#eUwPubW8F$k5=5{mQZdq#y+JZFxkP%JdPNgmh<^EMq1uDjV7n9tE z9kfy-<*-R#8VbsdV@B?%B_FHtbdoAkp4%W%cYgvrG%|2QEcxMK^xgF5`grOTF-SF1Hxf^_u(!E3 zxq7DQNuaU-tzK~M3PUQ`mhWJ~i;G8ul%PaGDrAKrxSR})jY3h~nmgL}>38Bz(z54r z4|wqsp1Sihti6fluP~4%cwbEsjPDSv>~Nx7FjUftepLoa**z0)BZzG&5`t-^rli3l z374!%AQi(+3q|mahe=}g)-MStB;bYfxcLn02Y8!N>3Rrg%fzmG>h;qbgEQxRpqtG(RW#PRbcdSx&P4Jtf+3@Yr7!eO`Kmw-_-0NIfg*6+r9pfVvY!^`( z^p(*)O4bH7&8%jNpn%@_ol}wU&8fjXFoYg6n4fXUIexhFedcnOwG@-MFr*Z7`Cxd} zC?9u>IkRVXQSQieJw;lCa>zBJZ#ff_ka51~+5)@ZzpnqZnM7#u`9`nk+ij1JN1$e4 zk9$_fLB=XQQe#Q`X)Wsa1JGGVl`7H704q?qdlX?TAww&mjjH-rQ`?Po*BthK|`+Ek@b70;kK zN$St43`ZcJmuc&&30miO8xF;V59b9J}moiF&pGj5Vu;ghc|I zbR>~J&Fl^BNv)`=Eoq0m4v*k%xh}ojV;{!NrZ%d(lcLxQA8guDWdY2;#w0YkTiiu)z)a+M8<;>+yk77;rom0YTI?cb$c`19e_Wh{Epz>WE_pZ0+L zF2K;X;>GOtSCc=x+PppdaB;$nLf%LVFt44@vc*<{{gP=4cn=JGt1K~ z=kD;}DbdP`%Ii%+|CPU|TV>_ygY^ydE95{&#MP?w#p#_xRqLzFR;EWNf>kytk8Gxu zTG?uC_V3sW$#(p3zae*@F5oflh#43Poj9f-Nq(uFPDs?TpR=%UKP#uX6rA1QdKi1} zxA5oq;=@Cd*h-J(+xy=0o1K>obc9pxe}Gtoy93YG_3P8Enkhha)CbExiZs3p0^0u6 z-}?&A{yW8Vq^znccFRY%fH%v4th#}r^FJ`XI~^I}NY6f&U!8PVYWnc2p`ud0I$t45YZ2$;j-Rbkil~#7GWT4-osp-yU=QQe398?T@Q^95wElyqgtk;sB zM0bzXUf$57p5k-he7rcV?k>C%_iIFn`g11?5=_*|U<8F(04X$i;mfn<-9kl^5|`%g z3)tP9eZ}~z0kFB!|iuw<`zVy;dhs3gU3QI^!EZq$P(xs%Jl+@Bl zmvnbYN_VG}2+|>)f`H`x<3IO3^SpRo_|Ex_qm0-0y3RSDgW1oLqOHtaue!^JBB{Qs zVW(kmBWo*4fdW@xFG%X<=16)Y{0{fE*fdkvhCF)kfVdvbwf;hibM(ei*vzcmhIV$T z^5M+94F$H24RREHw~{bhu#yA|u{B##TIN`X%Gh%5t_7Yq?drjYR6&%r4)3bQzy@0%eTg?Rjs^ z?_vxR?5b<>u%_tdskvo?vQJY6CP0K1_8lw&lBGhr1TUiJA_~b`MEy_Pg_jyE7wYKZh_EfXsZz9 zguII)CUB1*uRop4yny6Do&>5dAy{q`V>Uz`UsFg zK@!j@^zD39-Nf3(=qh~{QUtVGF9mz7s{pRkk`!fxMi7pwFQu>$siBTVG=Os`uK|{; zv}n*kIkn`RfvkrS3Q3|8BAU6w)d{5B3si%MVI-I`cp)^{-$>u{5261WbQ-1{Li9z1 zSV;Uu@TGWJ{TW$ePau0s&bpbJj^56Yn3EWiOwvG77qWjXR=frAMa$>HL`X^~CUy@r zN=oz!KxZ%QwW(Xd41A+>&u01EjLFWk@>3}+As%TV$nPRvuAS(Xkw%-aBUx(Lf?YuD}7-~+^7h7sK75*EecEVnW+FR{fLdX1>|W-l)-&0 zm=W|swl<~Mk|=vi5sI&KXl9*1QbVcZLKLO-hN!9A86@a@YfhI;Dm!r!Ac( zf*Hx6O$?!x4b3bEvL5#2Vo_m7_cEF|+d=J28o6@?woo?I2%@0K<+Kn%Idy6TQhE3U z62GkX)s)FJ{@(i2?wD~Vw+0vYdxh?qt{h@@?IOY~v=ZFAG`Kx6lO6Y6aAw$-Ol|8; z@=5RGMV-!CC9CN17qbhqCNf`WLH0xRMuwDY+xK%f%h69p2h@ZNjpte0VAt7BhT(fY z`s3Y%9>mptv`do7wf_a^6@`dv&RgsgNjV**EWYS;7t*gW}fKE7;z=Yf`s4_`* z<&Bf$gAlS=os)5`h~Gk*kfG1;-h;2v(D+@=^}1;X#&7r*~Uwd(4b?4lw`FQa2t0!Rp0lYx>4}%-s__ze!fB*6W(&x%;Yqy_H#z%@Lt&^kU zaZ!#`ziPne5qy&r>r`;RX2%llJ;m!~yXg8iv?RL{L>?8Rh_Gc*Y^}vb^4I zAm6a4WM&r=oGC~=tgzxxP_vIaUTKYRhtOt17CzZO=;^GBpo|EOH!Hvv1O?WEJeA~! zsAdkr<5`1nTqM_;+{T1;eJZXq0vYsV>=K|dNQ}~FRUTM03NblyYYWYcVi9>*`WuV} zO7A%qQC*n`aiFb~*24Q5C{fWhno6dHS{y74$+3G3&v7LY@{CmPRkl)gakuJ?(~(G~ z(G<)o9OrqE6G%40YL`|_piPUOj#g*E(J=R7zUn+>UAs4@OY-$Haj4~r+VqNa@2J^H z)8nnB`TPkrB#4pF73yQW6wM|>NlGdrGhd{!Q|IDafDIUQ`QsmJr}V;by;MJU~XV^lU^D)31#S4z%?Rjr0TPJ zUVyG7Y0N|0mu#?=9OOKZ9fH3<&SfgXO?!g<|dAVnZdY<}c4V9@`I`Yx8JGX!oU`LvR z=zwb^51mXY^p%*%iulS%6w%m2_VJR$wrId`>T-51$^?FpMGa=d)I%T~8tNNe!!W>Q zW~Fv6=8$SDe3)axGAaXaYfGOsg^eap~823Hj#{6XCO}DYc4| z>S@Xs%0pUYk|D7(&&5RSGE=ArUC8F#q6LF<2;Bl?LfY&VB~jv|e{-{?t4ja^XTDeDefM&NR>dv1JlU`R^8y@?` z_J4ag2z!zK=-;J#ym<51MZ(|oF8MfZVb5ARvNw+U7zYOcd-ZTHouPtZdbrH z^Lyk6<9`qR*c}hYBEbC=#nk%$CrkCyCaOe6_9c5HS=j$NWCl(u*(c1=TJ8 z%X|Kx2TE_gzH**p(}lC6o0)0yEGu_|>=n$B!S)C!d}K)7S6r9l~EVi-W@5 zv>lW_JsF8j+~!qqEGo2jQU*!;vSD#n%5PhAjobwl3M9wi6BQcV1fpHKl6ul)l4MBn zvjv9C9DB8K`vN^R$yns2y%!FQ9~C5BD}wAx6N&p0*#-)d6yg<#3nta8!X@R&c4VW- z%GzM-QCOeRk2Cq?8tA9#8h@O)4Dp<^x-rBA?tY1i58nSd+o4WL%=Q|YM2@+5Hi?z` zglO5O?d0^$&=Q?hsH%wamF;2d3TCMh0o<)tB#phbko(0*J^HU3eLK7dzsr?+h4|;q zzkHUc!-mGnn9|2H>FwUsy_>#&?3AOmJaO)`n)m)0PvGCWiXT={<`~aAJ+@oR@S9ay z#`H5<4PAii(q0~4ViFbUf_JaCjSC&UP(*@YoiOcZ^Snd(ONz1PJi)3c-DF8ZJ~_tH zD&;Q&OIQuhr5LvrzXosjy#H-VjVlKQtARp(dum0n%0#`!wzRSuNNuu$!nrZos1sDD za%^+eSvH8<3*`qnSUd|Z#KZPf&d~BiXaeD3Sg#Xdto-P?jbynbh2hm9v0Sz&#wZ4X zMwvrN$<%w8T=k9@I?emeGMi;I%@cIIlfO#$Z9!VkEyLeH+$9rLBPJDVRb^9|=Lmx) zEM)I5(v%tV3B`$MN>)u*G$J(<5#~h&WG#Rbl0D-&GVV_8%RK&qmx}ND7nNrRD-dl^VJ&Xk~$)^09^>2Q+(2lg&kSVS$3KOm!HpYDJ`L zBqcc{Z~Mp3SPjam6MJe7)jwD#3Pl-{^W!R+B~y%_zuf;CV86~IL2R#T&l{SWW{EzP zkjMZd&&z%XLp2Z18ahLtddJ{2^-U_&IroE^Bv>a|Fx)jLOUeuC&_-*CJDVVakMaX> zq>rg(HJm1nV=bt!|vg}1iRMyRARU!-1l60O)dW}yRD$dKC@%+Di< z19(<3Fy0`GO5p>pwY9zpjPRAJt`$13;02Ka+8p49j=&i`&r*Pc+t;VVQ1c>p7Lf}e zRV5(%6B^VOhIb#5DLEQu-sUT+xAJaZLaRM>tCN+>C%yA`6h9|TC{D7>vDRrZl(-UNz6)G@W{B#;eh3v* zcIILJ!h1-hOTd&wN6B57E~WRHl3iA(P@6hUB`{AqaUJajgHAt(CV*ZZl02W$moY13 z7O~zw@KnDmmvj@V|+C<9`_Nj6T^N`hNSzu!Fm!2Gj7Lf;=gEHe^f@sFcs?*BnL7?(_1YpA~5Jm9<LF0u=%(1s2SMXVmogOCFKOBg6s$d3(ofHX(3)FG zRt+89dn`hpeK35GI~&bInyw_H2s&TE6{rLYmDsbBGQckQM1yNla#TB-gP5{A>= z1$^e;OtErz zK-)mSy_DZ>%i~djDX1& zKo=cH2R#DpTdo%`7LGpBlKS!Ow!6Bz_SSOr(eEFGgBo=4LQ&5DU0pm<)>CBpiOuyB zH$)^wSj)ta5cQ`4h;D`H42VnmZaiO5771oA1?jH^`A;8$y@*Hh=BL1W@IAOo2`+1N zs(PVjfw^!8Mrj~;kIhZvw7@HXn=c7VAM}}0t5lgun;l#o;(Qcvi^y9l`%}f@q|ywd zPQLX9e0qBk3er8pt*IUX#PQKvM_`;jT%E}M%wNJfrXzOBDRcBzuTw`j+o=qEzW)kt z_zq`>AA-5KTXW{o`0*0wrug_M_t}3fR@=90TWYwen1ARAmT-v-sVv=vd(l`~1 z^GkLkn;@$!msJ_l^nMrK+uJVu=TfSYiY7=)R>}U#P9W z{B(ldFAgy^#mvak$aj7TL=(}$snIYV4s6xhIAk3bbK>}(xE`21x?6?Fc8*;iCEV3Z zFca=(WKvG#s?C(>I8ej#nXO>DHD^378Aq!>DlA0K=17uH(#y=D`$H(ZOVxfHyGoGM z57QUnXv&q7GT`70lvSZz#4qlPnGtx+ctj@)YalG{Ig&KWEYVS;KF4f@c4kUZxEV&9 zGiQ|SpXh;i!tngRD%_JI-cCc>Q=ZlCi!fY)y!oy+DQe!8fqug;C*j20gAR;V`lPf4 z5t8yQ3XX;6s+3M{zhx;EcNB!-+<$YbkYI6RIWyJOZ-~Bj3$yW$&F!4u zqA8u?Ci_R!>CewK4WDH+w=en(7eUIk%yyUz1bDTvm^664(%*s>(ZcC#X=)D8_*_(U zk@&iUbQ1fMNEO|K(JoS)Q5}iw5NT#)iZv>H+++nTxq&$(J=LgX&A|}2&6Xg%6SBBO zoFe|zR%dw&rzAx>rNX@|E{r+RSQcAposth8_pvyNV9L0pmlQ@L9vF?! z!p9LM%nK4N(B)X&+d9* zoO_N}=$vBO=~{*2Xw-Zubrs%qIz0R^ATm)~mA8W^F*iI`Hrn#fqho_501Ff` z43}FG;aOZSLALAjdzYPnT})S0Z-2IAYN5O?9T{fe=z4(`4bkhV!oC#}yx4*wP@J{R z<`8M|)*d|5M-wv=i*OVH$dc3n-iQkKRx}a?8 z^Y8IAOm`gTKA_D;F`<>8@$N3BkuSFbEf`ac$WFCCaLq?XdJWs?Wb7ut72Y(&VJQ?fk>xRh&d?4oI&7Y!_mug zpYE>V$(~afI71Y7$@{r~!eD@i8cIvj%#;=pg-?N2Yxd<8IDd)tW6Mk0tls3y>R;Xm zhDxIH-jii-E`4Hty=< z1<{MuHYC%|#j}N(;=9c+CV#e^1rwNoXAA)y@6$56_xD}L{W^wYj2twfSvL!4N9nx1 za}{rCE6B)R1U^Nvl=)Im@9y+YXjRNF`bow-zWVeJ;1=C4*c=w^DLX$sh=S%soW6ej zcU+u(Ju~`+<4ML8rMlie|Nif8PYW+5W&UrvStQk`6t4j%EJ+!4jU3p*gG~#EmFA+RALeHta`{ zI^0s#_UT`|-{Y0v4$!OiBxXKUwfr>k8wVhOlB}KLiC(~O!~~M2K&%xQtOqT%RqOx; z-pJTk?Dh2R@MzcgIBvT;jcU%roVpG$Pk~L>>82>_Y!%x9k=;RYKss_rg> zLutOJbEPv-Fa%+?=%isKNfp!Q!tu;oizh|;G(sB@`H)6`CLVsMNUq#8r3{X8wPxIF z0{%%!sgDGd$Z>E|`Ch3K)>v*Gi!!Oo0d1m|8Qq;Sp}}oShQ=ZzvfnL~Kl|nP^i5Kt zi4)T-dEd+ z!<2a?YOROVMCY-D!~&a5TXsFy&3*6|&TU=+v9gWzf|)qM%WxCMBVm1~Kz)&E;BhE6 zEdL(k8<`@Tw~jd*hjt>S&Gt~1Djj1f!w_UyMQG@NNt|PzNFN7QnCTJYSS`39BBo2} z6NWybtYW6No})-4JuZQjr6?>=61ZfB&o-XfeF?8BBC z5dv$j(UrkgC;A#qCW#tBUe3Zq#3pc@%E=HB!rRaJBPP+ezzfF9vxrtlQpRc){Kbv3 znL!e3@xH}QEqS2sg918%}Z zK_*+kGrCFpSZ%A~dHclSc^#B=h1CQvg&VX~vJ|ewvRT5}UsD@lA2j19#X+-%t*Hm5 z0!@>Qf{HttRc-OX?zQT+;E64~<}~)l5D_dK-W8sfFaRe-$@L88PI zO4C5Hcs>g1t<4cKYo^T$ATDxa%3!lV>6hRH?94~>@)qhfdWu|ORgh@;$V=h@S8@f% zJbafBs|V9hyr|@%=*Sv`U-}rGvAq_O3E?APB)(YmIkeYM+HYYID+8)BEZ(}M&$@598Og2Z!C-y538nbB_wC%p&XMFk;k%Dmr41|eG+m- z9Tz0efU#FmFo`m!oy<{TU{(jt;F?T0+!W&~lr$hH2TRE_zYja=!Jy!ti94LNpAAxR zksjgi5s42@gdH23#^PdNR$9Y~wP1D&NVNBI)T=pWae@Qq=6H0y&y#Sln`mI7RB+8D zo|7=B`l&eOByY(@LDjo9$@e0i9>ruoXIBl{eLJb@TXteHyC9SUhm>epUc{hMHvl!i z%Asys#N=<#w;0GL2}1NuKP~DI)nT|OY}F^akf`<`333kJ?14_cf)Tj*PU{ojx&AifK6~1a ziRx7Hy3wJ19t>)7J+YGr^~hnrkZ-|PLoIHjk>*w>^kU+a%N}Q_A-2ze_I~*(P1o(u z5NUAW5}8Nuu=HjV`rA&2yz}5&6Zc5efzPjvIma0C9@~Ul`iIf$->Frt><>fVuOlBn z4k<%{REWv>Ag$k4KNC5N^{2C8=KqG~#c%@tTfneG(aHb%?wy)PE_vt%wdK_9{v<%UKn9%Ba?7g`>Z_Egw z)G-75*n_^~4t}FQZqgbLuvfTV2wuKZ4dk$paKYiglk&D7JzBZ0YaF_UM$<2hpU2#1>MtsBM;kU_?F(*B2Fjc|5_$vA;T8@w+ z7LjURM9iLOZo~5cEa4bauQ!ht8}AM}AJ~8X{MjU8J<#8e)(O<-{X4At{!p}Y8Of_U zfAkSpOcMeA!KC-j1MAK?b0ZeSMj2-& zfdY(Q#qpmNF&#Q#FCDVJR7*)_)_>v1WZk_h(}AEEsj^|cvrL9g_s3L4P~0uiwbUH zAOK}6c|MKnfL?6a-B&2*f6u7YrR1C+rI`CSe3#@yp;vB79uv3pF|aiuh2j#(fqG7-*9_&!6e~(F`kkf( zH7#NbMiwB`3aKE|qE z)R7xR41R@z?S!5ofk`3yWT06ll zB6%~4uP3eUvfKuEE32w0Ex3&KP_Z))j68ee8a)fFEhZ)soXzR|i%{*PQeyjaNZWce zBh%yCw9Jq{R-p#o85yPcfD|V30y^6Bgv)m_-#x(G1P68_-43c_JVRm~g7)AZuXMpiZY) z$|Wf=A=ngeh+z0Z%2zbx2$!gDSEglPfi?~g?H;UNK066?smMmTk3-^@saTrT>csRP zJm|?C))NnjGx~<#2I&~N7f^vNQ9~GPnUKOTD23$JdOWh3hj3bm4xcq+EK>;U^?kH_ z6W<=7R-!kSTOT)ZCqIpO7ID-}ev4?o&^lYY?_7syWyFrflm7!>ou67yZX$Vo)}Y7ijGi6 z&Q(&28(BRb@}7tsZoaQvp}fb!4o1bM(-VBzM1^_dfk%iV(=*93#!$!P8USM~d{$PV zt%;(|@`b2Y%P8=NURX{X9M>{CF$VPr79`Q&qNQgfjNRLtWMxGb{JDgj2qQpR1J8Uc zS01-oB?tJ?ozj330f3P+@+&7LD_kU=Hf$>PLgF%QsbKwhLG*>p$R_%amxC8jz+F&2^S#hrF_xb{)uU?hhhMWkK#0>->D74M z@TF7a4_qm4Ys8jqd8HK7EBe;e_4+A{z0_gIn3JoQp=f}hj*x?|NVTQ?PJL+|z@*JwAk@94K4Mk$c7u#)je*H;@y@xRc@? zV^JZ@XR2s0ioQp!-&zY8e`&BVRDYWt;O*T1$@TA2R$cx*)%l<<2$qJd)u7Slz6!h2 z0y(Rx<^^aqeJ;k4CSt7@4F`n`HSl2H#K^vf{^*cH9a9ipD*r=Q<-K;zG!&q~-HFMjAfZHl7JP1ia9 zxwH)Fq1QmQ$m#uOG9%iUE-=tSvS4#J1JDo9P=I>?`zZ)mMIZhyI(Y&Y+T&>ZzD>WS z4sV-o;94Lm-sW|i{OaMF{#C~(&o7t5a> za(gZUgl8V0fJAyRqjt9ln1(V<*BCDT?f5;|h(BC!$|-D%Z+0?odu~4sTmX0g@PYvQ z?Q*O8)8W>MH0i!QPN?H0kTMCo=DvL?!x{__74SlP^WWC-J{<7y@0{!DJtHg1!_vq`&0~h zsDmn|g=h9Hm27ip3Pdso-h$sQHv6Rr^TOe76+YC9BiWCv?=~)J_w)MfM^<&?o9BSbx1YLxb|4My`o|~ zj!AjE=^~Xz){2Qw@a-R2M4=J)C}YA&Y7CKP_8pf&pIKqu`V)9JXkz zCW_Cv@88Kn2=osEON`j8iTdNq5c`VX7A2#NbU(ER)FM`6U3w{Tg_}q-%&e9PU?GL) z)7`>hB4Q#7q$6IIer^4~F z@hfU(&ZeMhClM~r!MSWi+%Y%jP{wWt?v#WZ&cMqO5yB`Isq&twb*7-jJhxPVDgO;k z1stv1Fm;>=5CShwk41@mL*eUy3mZpW^&xtB0H@pZ#(V!Va7;brAUz#W;JdmRTJtl1d$gE(V-0B;jBbcnyW%vo98amrWqx z2$OOp9tQ~nJ0%ZyPS@S?=z0nh8-{!m@R{hxaAuAcH!v=f6k@dP5`*GH`meVQSV+TL z@Y(-mfgvnCebv_@o0XC#!?qp*84NtSSdE$*sVsK0c;`H+F#(lp8ey=w2IvoU9S#x| zNv$}ljpQ`R;iOtleiEsnXYUOO<=y%S6~f$%$c!AF9bF7C389l?bl0a+XFbZ?@3{4f z6{lr#$PL&HP~XZFVbyIg>VKkZBVzSHY9?n8IEWQ27pVF@RK~6nFDp}58-pdJ0lH*6 z{9H3L5;UpejGcxm7J?DTkERwt%z-8pj*c8HbQ9XaRzxHQWU4SpmP9IFdO2MjHbt(w zIJ6D?XF8(6x4gZ-Eg!-sTImg)yL{F6!to~YT2f}czA<#~Hq01Z=Y_kV_&p}!k_tpBVGTSwo~Es9Woha3(X z8#*qo?q_OR84h#$fJz6&&4y6p>ByT**bo8aBd4`2=8+wtbT}GFsPW;zj{>=2tZD>3 zr93)Yw1H_dE$X+}lr1KzXFsL{nDJ5E5lGFQG{qlVI{i-0Y4{e!z6r!~E@k?yG|nuK z7$eiJ6wznlProw42tjHj=p6ld`~Jki!Z5m8OzosF>QtQ%jM@CRS9PzZvh|hz>IRMd z3PwnGvx$*!I80IS5}>IneU>o;}?1UjHTdJ3|d13H+FssAAeY&O@Al>FK`6aOj)$ zZv&vf-W>9B;bR;+F{nKeo%~)|GL5$U=>>ut*MAs+c;%B>=R{y*>-YNU%Y-c8-^LHx z0vuzjt}h2^pGN4D$a%Pv>i489UT+b7oVmz8ZhG2eA3y9Kx3zX`FFq~12R{S-u|>Ua z5}g2$!uM?P!YoV$KN^C`2`%uW79BJLecx9#lNu#aa;?T-Au?%Ojb6Tkbv zt5F_Q1WQXao@amfzngSy?B9eeZX&{*UFOfXHV`qd9>Sh}F@O1z!dj#VUpp3MQknu@ z2!_z7=;qTC;^~_J15l*93m^hshx%P(JRyr%gX>7nc^}?@34n16xS8B_?|%N5bWxtU zumznGbk}kKt7tXHXy`&G>wlpXfRFGh`w?5C^}K)7{aHFWggekY{0GD$lq-TO<~LtJ@j!il>)|TwqGdb`m1W?9y-6Y!`dhD0*)!$yS$#qayRzKka;O<2WqeD2Q@Nr z)LK}7U%FEE>1mnHuamb)jpq0p(oo@nIu}nZsj%CuGF?7dxt1&#G0n0P!gw^NfgK^q zUs6O&@LuIlQ|#coKl;1Zd?y}6EtGK~JNoemugT>a=UEIpk)KI#u`ya@wn!bg zD~Y&kRC)Eth*-Ui@PiI9muIvMY%N3MCvtl&VHG-L0SYd>X>2f(>50|yPW0N~L4<@0 zs$1QsGPHdU**ta^Y)>lWq)5q$m*vV0)r%x}w~F6YKg)XM4}xc!*fb(JWJ!Z<~84UwbcwAC>H z9`Hg_QKHu65P$$r*gb_PQdPwT=n<(_K#fLVQMJ15tjiniX-lvJEQ~Yk2J+TUW~$p2 zySI*zXWiiChQ}IgAVX7l2%KSuKY~mm<7EXin+|`A!E{vN<9#Uu2>rO4K&|VG$>2nn z2eHOY8F^$}Io?=Xc`i%gjUJE4Bt))oJ}htys7ydFl8eAf5U%5IqkNZX`wgQNwVj-* zKs4T$p*?W$`dcr#H!l*AW$`Z37HnOrx=dahb_HixP6~^L7h4uv2Rbih+Nx4zf|=D@ zM7UJ5g;}&iLcio;dyBwkxM^3__YfbKgKz?x@m(yG_(J5>EzKuyP>sn>4WZEW^5~#>jrZ4q)Q&gx|BIL zt5M`b@a54bzlK?=3_G|Tj)}qJc*sGEj&!0lX6t9>cEy&T- z?nkcnI{kdtl@3d1_=~lhV?QNNlcK7X!cZVGvpLf1i@Xom!MkhHW6#uO}WPMzRnMKO@ zs2{7c#4d%S!Pawu4G98TOy&uDWR5|p@gz7Um-0_YF$G}=oMdHbk)I<*Y>~ z!18mjXef$!QbO-|{#OYVebore9c*X!6(i%b-t!%RW`K4aUG~0t_562n_Macc*|pg= z6MZ2-;6>E7{rvJK?Zo#cJo`<%xq)5;1zBh>80XU;%+TfI-nTZN!p@zB9{a&?RB0Y| zQ;|#BMEUpnHxaU*gBxgtZvJk^t>F$nI6kkZ;2IFS)%{rb4*@l5d?_0BqDy?~#wq55 zvFFnX_imHc^Y1>%K?_WUC>!|p|H6dw=WfgzIGk-yjgX2byb4b zE~-#niewpy12^BS)2%=8y=H^`W`mAtELb*Ez;YeAlhg2#`Qqub%-0`r{Fsr(uHZMTo6Gtea{JDJ4FG`n!_o)zEA8Rl4$ApI@NC0|@myxwmWl3b;~| z+64T4?>(Ng2oKrDTOc~H<#XKzp#o|s(Yw|5y1D@e@Kvm4|0CpW)ohfns`<{X#E!4u zw|D)8Drb#lmEd>mg?qEH=HN^{gH@>V#+7mZTl=XbS&##(I3`dO$aU|wiv3}Tx-TSO zBo^X~z%)KXs1Dy^)I!Q4yIWUJHJ z)v(;DxCc$0f_mUSVdzq2KXri)Tz7J!HU_9Gs3tSNJ~+h}c2Br6l@{e~is1nw?CX5Y0*UTud zpLdMA$KBgUN6pfU7{*|JW_b=g^DoG_~A;pjtW*bh z3Nl)(EOnrvY7=q$_g_8UB;?AwS}I(CTF@yWX*n(7=#fhlM^bSjNVlplHZ9ut%!-_U zVlR^r(U-e$pk+r2vAJfav~yoYC~Po{&gxs4WmcUU$-BD+WHO{8ifc9X3u@oeDW4t^ z3u_53=vo?2#_I=|_gKot!K2wdL4pia@r9thK<(<5YJoyKx;!usfkrGbpKvleX>?)2 zf`ncK)xrVNrhkE^Asx@09n)W~IB$g5ME|#-A^Ik*+lXNGV$&5&E6w);H&qawxVKyX zv_`Ia9RBpi+%u(b&IN>Wlfxs3rRxzAO@yhve>|1?&~$ng3unWH$H!VJ&O)w8hGn-E z!9i?hvfA{;F8JLdumv!FJ)D%5p-^7mLZ1>pH@rH+8T{LS$}-PxT4wKcO@+mTY=VLW zXEz-&_d?kHa9MRqHCqK~+_6f5N|4FILMF>xMh z@9M;9g3}e+pK&=4e6*@}sp^BQ4VQ>gQL_%=)G9T}HzfoP&tWZCi^i`2e~~=|%g5=d znQqLl2?;NPsy2O>c=t%d9Hze{*wB8sT;O>bOU~@&57`u2A3jyD`_-9U30 zl1DBk;$wAVjjUu#9mxt~A%!QG~;*%C?jZ5a2M6CmD(}{H-+YwW|;-FlE&}R@y z9$f@|6bkl))QYHy2p#Ys;31wQ=13$Q*cyPt4AgZ>kYSjN%I^-*fPl?P$1^{yJ&~YguMKabs zB}0laLU*N!_YJ^bFT~dm;QP_syXc=kRE-O!RAY4+@Hsz)9nZ~Mqa3bpY)>D*Z+1B> z)iZw5(ptqF=G44SK5wwyUVQEwXxMi5i1Za3Q(BO_b|A$Jg!zqQn4q&kEnn?Vk?6}k zt$#~F==mHBmX1Hpb!i$eEr^huM?@UD8l{6??q_`aS6^zuYIw-Ep$CjWF#XK8e=$55 zkUWs^uh7aux+9CBV~GJlIgoN7Fq%KJOLLz#aEH9M5EAf;{665@l|D-J@6meIe`aNA z?cKfWyZfW#R^ww`(N)##jmG5SwzeNDNADNyoa_D+EhS&c-Wyt}?WKbQ;g8>J*Ag$u z&k}5bp4Y$dHd34>>4jv1bos7TqpUt;=I*Xj)!2G)1TQ=}Exn1DRNs(FYbTK6du!wd~ zJ}v_&r$u$0_#T+ zKlKzW^DsZ$GUcLKu88x0#wp)oDv&CR?Kj2S?TI~LOAS`0H^y8LFZ(RTCKYHa|859~ z8IOP|K_P`2w>=pc+W7NsU6zI6lXcF^?B9P%y&^5*pel5Z9g&P&z!k&t0w1>IXYHy^ zVdqEWCghO?ZyE_5GudZNm2MiVyVz6HZWrRhm};M4n`3JV7s;x0VY|u9Aq@H}O#4Ha zL0a00Y*M6>Wz``BDrb%_bJd~*_~%N^8geNGAq3hdMtu;^W9|8Jn4uM6ZU}d)#?PKM zCJw1@@RqeGI?5r-UqOVr*uTx^b#x;JmwDSM?a2vE`X* zo4$$u(4NDo@SUC_LblkM3VO8PhpK`#2;(i}&IepjXm<5R`XHE|mr+91B(i}7K7z|a z#0slKQ9qb+kq>F(}km~L+|rpI)*iD}c#rfV3#*FL|;_YeL$9L{+>T<-h6uIDvjKgmJM z7B{A(KhxPX4*k3YlCKe6ARgSu7elLCZc-Xr(63E_!|W3uT{JTgz=p__nKbimk2VaL zN6LhM&ZrqC#Y@HeFiHLd!=o)plSq|FRxTt^9>GbFYJ_8r%n(QCmDfTmA%aADuFf8a z`j~7rjO@Zsi{HRH%(^s<`r0*&m*4im%mtp4`pWFs%L)#|d$5r&EpZ@0Xr*Zz#kbr@ z0sxvm>%8?hvAQr@qk4Fa)?ie9rEZOyT{g0T3bFyK`^*t-B+ihmD@8=*^5|W-s$+^` z8&+|Mj8dZwK(#GyM^7w@|3NqM_nm{-D@``9BFVI*1*p4`ahoSbEXPN^Q*JV z`ap3)TF5=Gh_bVkm}uC*r?+T@iK^Wd-f1r6S_~4jo!gJbOqAJSrn= zIYWYFCOmZlN>eIvC|FCO7O*r)PjP9gAgO@JrtPMoa?&843>*^4@>qQudUN>M%U`x$ zZiE47jnK4a4m ztYp?fRq|5uNKiv1L*&Nz*vkhdJ9NeyDJz{L-s^+l@eF@c%!6S?wU-oQY*!`;J2p`- zwlN)144e^L*A2S*a1_;92@R;KL4*c6Y$3QXWbrT$s{O%{7Ha7+Dc&s50lNy%$=fJF zI8YcwCdGpfUv&h^R50*CNALnFm!_kIJ`flaB8_iHkd*xnAs#;DP#^fi1Slon<&+?^ zujnpv#nt9J=dCGE=T`cDV*9+g&M(w?pqowuy9rgfY$JnKn%1lRTE_=NBWLH`)6NDY z+93ZmIgt?|mezo=rQHegTi90t9{CREUt7AGX_K==E{27By1{#nYYpYs^OOmXMh#>? ztww8u$R#;woV^$=16)f)fUSRE0&66^7D?QAV+I-a*9PHOtAvnbNQHIL4u-l$-`GUR zdo0&uW)Ma)!WwyG!m~D2|52f~(BC`N*Sxvb$0G*6&y%%D-xN1$y1Pz(TWn@_w@h|s z{G@SCUmMSJ5h-ik9jkS7X6L2G^~p8#S3=Q;KIjU_6+BgX_t$->mUm3i z(dgZLu;Qu3m{M~`Lb<;*eDzUQmUDyPFlJUJ))E`deD zbnDlkI2<*-5$}=K;J@RA7X<~E>Hs82z2`AbQrx#c{xf$rCr!Z3%hU7KEbZC)w`<+R zU~(FS`|ZrSj*sAXrpB&f|55ymz5W(F+#^I2CMoILU-9WvXJB8CJO&^<^0do-B0Z%!AB7L_ zkDT3YWB!L5;t~PRS*8+x9(UE(OFLqj61&bJaWn#!A`PtGZGz%EvyphmRt(j03x9sw zONszH*>S*7H3|4>!J*Yx8^H6QzrLIUMP;w&5?*S(;`&R&p9lK&I8CnhuRnFRx7!#H zE-sD|FIpYG-_h#{n5>fwDBKKsfE&TyJlqHZ*b?)+D_04@nl~H1z(o3bYx?R*OUzX6 za=sz{auF@*^7<0^>-kx77o`_yU}@{_KCsW_4ZQnB0#uDkw%bX;xOnYr*(R>;PN)c~ z$DLX&z3<@f>d;Yy9V|bmZ=3gq$0Rj@!KL7??b_|nsh?#On?d|2vy`V1$=IhPiq&{L z(25@|+HL{w<~2sc=+XSxtc1-hODu@O(B4BI9@+|+X|bv0Y^6a5NXNYGla;Dw zOY@{$ocOr2YV?}kWuDLaMu4Ln!r^%QbCxp9bBRH78PhNXqcBC2YRJBM%vsvlat zhscd&B3czrY|fq~A`_9S_?C$wX6yk|F91#yEr|S3^ITfFc&OfKiEoxPJ#8L0hF%Fn zoq$0D@&VFP+G6f0TS2SBYWxVT>c%ln!CJ+1m4OnIfn>@oEXm@whAv8AZ~eI4PS^5@WfR_>fD8>B zOOD05)zB?Xi3y*_qHJj`Wdmazt3}=E+@aDJnmJJo8kgiF;lzhc-UCK`M6@9ZA&Vm| z^)-8qy{^llW1X{izz3t8kF*%TccwTRtDUz5t4svYb8itIBy zO1v|IWO%wE^da=Gx+e`hvS!yumKM)g7D6IlmUz2~hEpFeQ{jGZHGfXBgAm%GHZs_( z5wjq1*9)i8;PZP_z!V9|FNaB4Kh)glyO`3ey+kx9iekYaq z(`)Ik8?5(<(&rg_XJYVt98DxHO<9O--|;M8B5*{Nsf#2Z;T^&_u(Vjx@DAtVN0}6_ zLe<5^LouM%)z{-AS1#rlo=4YKQBgsZ!=qksH9pGmaBQn>6L`1OCo^qMAmCo!M$KKo z%OJB@nvD+}u}g$%k=N5oLGhsYUY62xc{5Riw9c-jqr|1R|)E#HUle+l_zkWQ1W(Xo~;v6B448N;mnXqPB z4XS6JvLW6v4o$PjS(7rNBR|{6_)@%REEIX7ii|R%V!cu$3M&?ilxFghP=(?nEQgUY zQ5Ezi{Z!3%mx6s2EwxpR|3%`4o^f=lW8H2oW3T0#wj^1LWU9 za)M+BRl(u78G9cPs3h@!UVxTahZwZ<-m=tk>K!|hooN2Zv;G~CPQ*PNn|Z|si#F+2 z=t%-W)}TTj2FMMB_p|pNRv6=S^ZQRBvSxxVQf^pQh5(Tv$p$X2j6OFDo{UfS@eH1) z^nj-KoZeu#2M}n+wX|)a0lLd$zC-ZRL=Zo10MKNiTRDvs;-b_nlmp;UAX+n}zc#p< zQ7|Cd|E_o*iIqYrtv&7A&c=mDdqS#N9u@R|GYCny#9fCct+ENxD460y&mWun_F88X z$%_LKI5oLXa!++j*jKN=uPPplCUVcqojKJnTj!k9gFC)6pHJOja+}KiT`C|N7E$XD z$$h8{UL)&MHMo*Xc;jpDJD{s(;tZEJzESq|yM*Haabe=k@6!6ehG>>&7XHpk6ux;d z_>6RJDr$?Ac1O9iR0WLC5GNDlnIr}IOYoqk=OjG;<{(0DiWK}p8yGDnp?1zOT5cn= z=l4g44#BzBOu^UIM(3L`R2i6S)7d^e8M!=;dc*i*gBJa95yvH2QVi|4+X@rAFN;#EzKQh+)=F@L{#_GA?OYGAhiLJv+^HQVxM3-5Z;^$Z3UQPItGW zJ8QE4>_UP&E@Ezfo`r*ht|C7Dv!y=y9{j!E*bSFYZu57&3^!!Krn34zu$^OVd|Z3E zzd*oQAbIF>i#B;w3qEt!13X~Cs8$Th9|HV*#{hZT+6VTTKyY+HF0T{#a&Ja);8Hpb zx1LV~L}31m;dKXGhL!W{MEnh!mIubiUGEKZLDk>|$Zx<(_ApY?aeZ|XCmDQ4@@cwY z)(>T?D;oRH#Vl>mC4jqiFn_2U_`MPhJg@*q4KMa_Y0Bvt1Y|UUsL4Nh1d7cR&ne#S z{{;qO;JEo5jgySTWYc-QCDHSDD%uAb7&QOJ5-ru~uZ9lXG9)-t@NjEC`%En~9g6w; zjU5(BY;Uil2mR@@400ZRN~Y@h`!5ySG&JV-_z&d(kZ*l%PZko*wt%DQ{!-5iKQ^UU zQF`NgG$8R*@xbo`c#(Vob_3w;B~O-P$rws(bl_mhDk_;ijbm&bQgnD&Ns!vX#fV{oF6R2W zi&Bh{RT>OT3ofXJ7_4*9Ft2KU`}gy!mD&UIu6kB@`Z0Pu4C$zVJCdNHzFjLxNtkea zHnpH8?cG~_FB}3ZrAj%KIE1cHAXDbFdsj4{l#z5&pHefc=&eKYWF_}0%u3c@wjzuj z%8ZI7i`I0e@C`jjU-Q7%qQJaB@8A!CwSa?e83yY=q_Xq*UpW>B=&_BfJ#`IyAy?F->J$Je2~rA?DS3mBBz9a{f`~kw1B?LrIg}T1iQ$ z>GF#;D;2+8);XF+4fzh(E1_t>ghkVL(`BlM9WB3&t|OTc`ez)ael6&Yzt#Rqhyr? zt@1|)khZ1qsjD$vmC@KrHw*;220WTO- zK+xHI=fpsc9Xb1hXRCIS@o4cZBPy9Hegu3WW-*ImGYWJTIPuUi96k$;1p%5)YiV8x zIfyR&rfgRzpF`#iJ)xbWm1jG>79x1mgO2n-9){mScN&+jwn+EpHe_!0#1303X3T*8 zo0rhnUa{C+24W$hed1=}V^9~9G3z?w^q{C<8G9C2F87-G*HV4!MXR~XXZ3ZkM~o}S zuA&!lKO;iR_BUyQz7+DqmN(%cOiW1VmBmGB4Rj7-O~#_t3AX$Y^6 zxtQyTA0{hMISH1aHqvf=ZXjedrM>pwA+)FZa(rna5oNeE5Qw2Pt3JIaO*U}>H!k`} zqb6TvjtcH1Pi0z8tgJ?R(bO@!fu5ytq$fLHm==(bGbEY>ihYh@$lLj6YHP$!zPgzI zEHIM%R!J^l0hZ~P?_QBRQ}bVC@|~+*F@+LokmYjMjBof2a=E|zB(nbx_2MJWO7J{)?$2r)_KfNT z$WAO=0y^?*9suEHMatn}otP>1)-pDUUz|Vy? z=p1JWv4mEa3Up#tVy5;gvK8^%T>|@3%MMSY0txKAor-m;KaIE3rvX;E(GP?mBih#Z zpJlC+MyHY5uSuP`H?@}32|=0Ur&g@~_E-NsG8fPWg|8iJKkIHdn)folG9)aW2+mc= zxW{eAfV5&psT~rz6GYhK!`EV-GQF=PF8H^lFgWdoNUl8!@S_RMBo5Nq5NT&a$h6^v zz3LB-qP0yQ7Ny6QO%xTZJ3}_G8rj{`U%zeT8gR8HUy;}bV3U7}oB*6Oz;Te%wDZlD zMeFWA1-soH`PtR>&{zAlU3B8IH~tf6TX)cqpV-y&W6r5w-BFzhb<5nB^ZIu^7AsSN zXGgG+vfFH?z~N~iacOSw0C<>Qu2yW{QDFHXy4QBzZ^2Ok^gppS|9g}kCjhKe2zU1V zTCdK>&3CNAG$0g6nEqTp|FhL~{{VadOom|eOD38a6`HR^{SlWY)Vl}~2^X!;{}v6b z#kb$%fwk_-vwK0%Ey0Lqd6w(_js*yL!#6s<>2~fs`dW04)Z)d)(KGy<*TrNp3j z$EWBeGHoH4B}V4MR`Q1-gRUupMlGEQh|Zo?7f-d`0*i zYmC7fXsA1{TUKM`{it(+UaTHRG$dR9ZuUq&S(?Q;&P%!1Y3=NjDL|gu0Y!RWO6oLXMGgRO5929Qhk{gTH(9N{S&_`J_JnV5 z$V^vC>2fGO(h|)so1zfOmq!)nU&{6XDvihJE?uhqlu94fEBA;Y@gtzW6!Jl)tGJ80 zyF_GkV8lS^d*p5n7a0#hGmDm4qtqPueGEq98&IkmF2+PAlC_PRwT;aImgh{c!AecQ zXU&=)<+HMvIDQ!LthfqH&Dla|8}L8KbI1jDeq*SN8Sq{Fb=|RhULI9g^|bAxq&s30 z_EEt)kN`ue`=wND$90+#Owt3uEr=#bm_Y<8WbnJ*I zYV++Q0KtSuDTCCX-D$-hRv{RzKfRe%Vi9Uyuw=a4=&W@@gXB z1?a7#8u3mjYv{D#^})_Df}=%cT;I54&68qd#pC&ObW^&03GHs?VpcDzc0FT|L2!Eg z8B$eFmAW2c*-;KsVa-W;j1|2f+%U{npc9vlR(Z^84Y>LdQ)Hoyptd~-j&%tgz36e6 zN3p}S#&24T+^>;JD+=-yWinq<@-ZspXBy4HH@^n1(H$K=BO6#hXQJ#R&W&8FviQ9B zKzCD{!xc+8u>+8dCJFsvvfC_Mezp4QLLN6W4Ie5f!wP`p74bUSI{rBs^@m6Yz$dhl zRP`%sykLxFAwtnc#T$~Cg0=081WK5Pj`~0gWAwr$Gjh3LGi>@y{45O8`i%36zsrR5 zF=$NS7)W;O3T_Jc)E5hy6Xz-06Ae6YXb|kMKC=cQS@{s>#Br;R>v&!Wy^$U^z=KQm zVX0;Piq$7Qj6?I-ruZ?FZ+4s7PS>hVgVlhcJ~lme+tEG`HCz@s8+g0Es8QyNabTDi zgrk{9GRajJiBJoOVOB2A7Lcvt-Kxcw5msiZAhk=q$x*x{x2yyTZ5L$?KJ~YQJ|oiW z{^wRyU*tun98Dadd7o$|*zSq*^lt~q8#JyUQ z{6ZSUhYu@xn=kagPzoL-cMn4+2Wi^^?h=5rB#rVA5&l8tC10;4|5=m3q1mp0c1k=R(Nc>0 zc>q+!%TlwR|H&`_K63R2MD^dpFLy_RuRHcO5n6QZYk`JUKvYjo;(+1x(ueN)hWd&M zn|F6-2Z-*(U$;DRCY7o^yFMblu~CxE?%<}#SS7ekGBw_f{$SC?;2+J$cBXwL!9WBd zB5#5)M~FH;#c#w))bRQv!ukSi;z$xH-lDajHlnp-Ow1Y}ib?0y!?W2Gifr_LbMb^CkgI}%jU%3C2Ee2BkVOwF=&F6tyHZ- z-+k0YtcA!Hfy`YdGO5zWQnZK}@ZRPbARi}7SWXOjh|lWwSqr&NuWprKE8mtomV^Q( zBPH7v)DcC4NjsWJKCg`xMGZWA?Pjtx0(1CU9bE=~8?X)B0P*uv>_uD~*ij$VMBG5k zAdr@yulOd+)7(p1iE7OXz4But&vE}d!h?3MX>{3MO)I3nlq`d_q!x9Lx|*&zKl&W% z5!V`Fw8m75tI%GgJ<%d;cq}M+5mnebct;!(1%|ibfAO=hK18Qc9INd;kghY98pwW- zb?omnQpNAM zrUhIs%?%&QHHh?X2Iq)imD1q6*bO^i_z^JqWFYW}o=LBa63G=&hT}NOaBbzlL{`b3 zJomS`eJe;l64CU9GDy=hi{q=|@-U-EmTNM5o zu7@3=SAJ2ACcye+|Le1xW}0F+F2!4Em_`~5HLV~v8rMK0AvRll3~6i;UHAs~dF!F&Bbt` zP+mr*83wzz`V_#y0wvb2L#9;tK`gD0WvqHe#jZMnUIF7WB;hdThIk{K5Lpw0sY=Ho zf-MU~lW&e5tP*HLw^Q0Pgbs}8ENw8OrfYr@BIUb5{p5=q7| zc*Rk(aBEFBmU>o&e4K2YY%l{K3#rT-3&ep_*w)4+!yxXBrTmZ7Gz|hP%TKYXTHvap zA4k*a(k7>|cS(L`s078NDhU#eDdw`RiD$p!8+*W*_;Ersea+!%n6n6WyRcr2R_ftc z*L$&e zp(DBdp=;2vllr@|M;|JEPk-A>YEEtvL_bVvx4hWcgrK$-XSy40`O~lY6%Svh(>fOi z9Xx~MaE9Nyw<~p{cLzrOJpchhd+LbaHr!elHLJ-oVofP^R^EkHkwBAj(H) zy%#mtrz6RK7IsG}SIEMz&cjFd>c4chweRfThVF_CB2*6t6|`kBo|(RLxwi-={I|;R z?ga+lhwFcY!mF!;^)tW-^?Y(^3HK*dNT-2-0!96s-J27d!%RI;3R zKXJU?Ilm4A@ZI40rOlWsxNR#13Ke{4R(QIU^e$#bXS>rKOjBn>6Our{2 zs5dLhv*D+) zL&yFj_X8)L%SxG%zw(`iUF+Vdj=$CXBHLDowLMQ+Nmq`3KwyC~8FN<2XT$G><{uii zO-B+*nK6WUuJob5p|1f+*_$V073~Iy>wPE4Cnq^UY{V)#NvsHMY|JH8L7I*u1|eDW zuVI|RoFjJ)g`1&9Xy1r^*ghp_F~Ti~qNn)p z?Yu^l+ICYR_5WGu&c2tk1oBA<#T6h#>kYE&p`>+L>s8N_pACM4<6({)l!SVOf{%-O z+tSC0jg;9*thepE|FIc@+#kDt9I9~*;x(`g@O6g}*@@2h+jWP}i!YdJ$yN?Av-u#> zlgY`r>yw$%zo$1ih4jkNd12g*Y;Aj9JS^Zmcqk|F3+ry{n-NsVS>DM_tH}sA?^$=K z-)A3S3qN)o6&G>h1_r@Quvi!CboULo3pUxsa?!5K#45vd%uS1w>IXEDG6v~Jb^Opi zfY^t>6;(|V`*WWunR8yfHr$`ByOGIfo^#UhgjS10?-eU(m#m&-pYpdZ1-z=epBszs z-Yu8dNeq@=SF%8OY0s!)ML4ybCLE2@js4m2uOj8Cc|#H1&OyCnDs=0XBE}5i=`C0GL1C$L91hv z(~pSpy68iq4TIs8Tm1Fy0Rrf>7DvHqJi>V(d0K_-bt6T*8qy79B@900`K~Ra6yewips-AM8krT6X3|#UUTDZZXr}`94C+ zh_-}UwXbi$#bCFchAqY}f^*N06yBo_cNO)DkWTI$GQ7+qRS`0Nm5SlA5w#Xv-JT1d z8Hk!TuT5jXwayCVatns`fqhSBwiptQM?!-%74aVqCajN-E0jEunk9UiPR|Wpi)kF1 zWVEcoUR2!XeOVw+)TaO{3AO)?SaTHCK!}N6ajzylsKMRqPb@7yvkG#|e~8qSg&<=3 ziPCYwk8nqG9HE!6Kw#khj^+N^){g>A-8`!znrgU+F!vHP6EYtSf@;(PQR&G{0gjEe zT0e-YSxd|T4D1QDm;8dVjd@R!ZCmzhR+p>puZ>24V!4k0 z0siK40W44MI(QhTrts>1ahMtYWG&fX{FCxt;2!7X!#7mVxy$wEr@cQf34erMs+v14 zJGP%jUVNy$C#ZJruOmASJt0b)J3%k&ulJ~8qerk{$m831pZZ=XvSdnsIbOevizM3& z)K#z36WfV$9X*BfcpEuK*oX9}v;_7kWB?N(9}Pc(gHVc8Y2fB%4haF}QtGA+u{!Ab z=h+Tiip1m*d?*TffW^9=;AQj!lX#}1L~2m(*ZJ`Z76zyLl{zHI1flo%1g-?#dgA7U zD1KX$<6u6I@lIb=A~C2}NvY(<_AFV?wS3m{Brx@%U7cu(a)}XpKYgo0Do{r?$orB7$KZCb#Ad#bKJDQ zNAQ%rcVy(paf0iBp@n@EAz{nqpingxAHc;x-jHEIGtvz`TLI%luR_k+00>kNJ z#}!!E$wE6+crE-KK$qL!)1^*P$%=Wj-litr?x2@E)BJpj+RPVzm+OVC7}}RhjSu@% zxq_X3w{h3k*JORYz~y&r;uEJcB0?}HPiag&DRQ^(A9{5v$sN-I$z5zyvHd?E2sU4o0P7DGBF@w+kcn~r`FQ+pt_)^JNDeEgC;O0fb0-H%)Pa5 zvFKO_ay;3G**zIcq2|Ow!oue6&vKrK5QefJXC!I^sNU58kJhJlJ&qGe5mQdyoe^HC znl6`QCG|A3OTDJN@rM~bJaH7a*`5`Kjg?*_`oXNC^V|&F@*A>at7j`8tLokeEip|U zyR=xYdFmFYziQfysZ~e|@Ws;wN+Gb;sLW&C?_7@W!Y-ED0oT@Tv-Ln~@JNYk8}5nr@T()?xiefTi) zrHYEuXN_YMI9Q+DC@Zxti2)f#T!qtA`+EeugCi5-YS9+tX2?&}P=&ZX4nG`CJ9up= zcPJ}AJUF@m5^@rAlHeyBPs`s+Hp{XNMA-%-G>MNLEn)yPz#oghMvrvQ)reKdTF0-S z(KZm@)!Xdt!L;fjS_40R+U|(UIE1nv5jsR-<2k;qq-UazAaShaqSHwvAI9O@vdV)E zAnM@LmZoy4!-ykrK`{^}ny82!rXgOX{`GA51CUKv*bl=Je%c%y)fB~8H@$x&^+iB|%xDU`-=5T4J2_!`M4do!fA(epe zQ&>DsccFGP9t(w>CW;bGSI~GT_IL04@HTkdF-9F+_KPq-Bqc@`6Y8!Fw|nq3V8SmJ zO+C~{BJ(HEC9;h}SQ{B?5WTz56fpCOs&?nPqEnER^|`gAs-gBu)kPaAIPdak zkUp`qBpOft3bb*G{}j4dNnofMKxjy4fn)V%T8T778ue{}?#afCLmN7%xmj+*P$4bo)MvbKa1cqE8E7c zMe<%`i#c`qw$s67TH;^q<~F3zkNeQZLp1$PbrS^H&!X#F)O-w^p^tWsKVKMbhj2JvW!U(8i%Glp#&7wJXywoo%vdh(RdOBofp z_iTuS^Y@i}d^8R-Bx^x!L2F^rV$npO50ibBWJls=`GR20)lcEh=uh-&2P2}7fKrVW z&yDyWNMQyWNMJF+G@M7!9?oDi$;=?CV72UyqkRM)q3D(KbnMyVTR~sk zdjY)d`lhwP+zZFm?aKRC?Vo!;hpE1tXC8mpF4o@;S-bk%9K`m5 zgOEzxF1R3bT~Ad4lkJ~E2i#80i_npD2;uq~yD#Y=4SV;G1O_jE=RjDLh!@Zzb$|AeGx5=PVMliUy9y88^)HUJ&X9n@6rF9#t>~Wd-^K%pDii4@i{GBBLJC~@6U zN!VE5J1&%42`4tV3<)eYDfA-~?##QIiPziYFE_SG#XY>Q>F&>btOx%M@0foFUR+$1 zzTVHR&VO8d6=C%TwLV^6!*!;eTOLh+R%)pjm1_hU#U%XWmd3Gb8#hg;Ya3bo8>bWA z&VHr6(;GdDTQ=43I&|?~0FBTuB6*(-3xWq~5(d+gAKN(biP#;CO|(dgR3)pn8L)nIl`BH~*b< zB?2CRDi{#*zo)q_jV`8@z{?^5!a+7oD_qmOExH^W9DrZhhB#_kNeDtymd?eirL>nv z616>iDpi}&^WP!dTk96X29r9;&!;Yf)6wZ$W4jV+smS>92iZJC)zbNe)oa_w(R!+ zA$r7X0>Y|~r%Z^_aO0tQ??dNMtqDVs%RoF_a;-dKh zDa638_VwICdRA^0)&fVYz)ovzmN1?Y!NxWygYrH?h?=(;ng~r4_ur)Tl!p~p@i3NS zWE(`TVie*ljzgfxWk~NUfeO}Ss4xiMwR|K>hVL*{U#q;&)B8oAk1`v zBFE;D>6ltn#U1i7jGaQ(1uG*eibyWR7!(P7OagQ?KZYn)L7wF2i^XKkEX_)cuTeNQ z4=Dh@(K}RmQfU%>okm!KFCc^%?lW;LKAF~9>VY^Z>V52ie+&itS0+&%mT;kPof)}e zqw~Ue9zAK&8V34|B7x;O#$8wHcd@ux3|el6x)vaNzBpm}JE0{AQKdDTX!}O!ulF+L z1%`}6P7&LKLW=8XkNRX4x(4OY|BCov_!d%P;kJW#J8`i%?JA$;`(?7k!EjGDKH4_Z zDrbSQy&p~pXX3%`GY`9ArdCYGj1|{|D`))d;=XXk|u*in*Vh1f_*!)-jj$9 zMyTHzQ52@Gw1#fY==1w8L&ych{zl>*nO4Uej@1ZXOPD@fDTp=XdxSxo-Pu4YbCDIrqdVYVSLa$8wgHSs2N5A4( zm&DImAkDTLurR$VP4xK@Lr;Aj@W`gk8WvHToqzbJ; zG+=c|Sf*Q5WFW2Q#0hN%go@3@^CvtBe6GKL%3m7NEO^;b_j|Qeii9Fr>nn(ohHZsv z=r_D6u=l*Exi&_Bf(R)+VpV8{Tn?G#`@I8lF)WyeG>Q;un21I(ftgn|el_7wwBiiA zxFvA43~GtTY-km)q0B-(-7j7~UYo^Vt0xD-pH=mJO|5+SCnTZhqB}9PK{jorPvU-o znAH!*%n}&SywkOw)n}%7_xIj1aWA}(xgKt+uRg>*BlX>V;k&LCVr<*dmmO_|o0~te zjsxEXu)_PaafaxR|4h8MU1FznxQ{)e-Vi^872e4b^0eVJ;8YvH;NZ67^&v;^H+snU z&Pzf$h>r@8a>%BhvbG?kApDYGTg~>~j|z}gbtaZDJk8KBjgUBnd!8b<3y!U<^*CfF zaZGX;Jj$#T#u_FA(Y)x*Hh#8u;rOP zVxu5Gi2Wz^-n9?aUu_%+`>7<6q=ZPqejFgFg}r_7S5MdOCSAS*YuD}}YGM+iySa7U zJHKX2QH=hIDGc-;@A9%)Yf&<9ET4Uuop8tfzKr%6q^nK+NnFIZxC8rWi0tcTFv=&> za6P`dnFc(rWV%V(pem-8tF!04&A@vKqNOFAz7$?{D)F7*fNRX$g%gDWp`94@URpqn zeQxjA>o&T}7vrkk8cnVjnrxp<0cgJI+EsB!)4F;}*U+;T}7khS->o(m2J%^k`cU8Z2yKDrs?!K-E$WQh! z>=(TLaqc;|v6B7%{*RO&xhVPNgbWN2VPJ**eAn8ue|3fH-9v5j`nc3{px5)baG8G} zGmTt5E|ip)pU_s}-QoYrost+e^~JI<8G` z#ax7j{km+(N1|8cpFh$t`rMVZ*6QXpa!#sDdQMMbWhs11X-fHlwlH5iK{R?jC}dpE zj2G!7+^MuhgONF-IU1fi45Q>_)z-DJ^2k3D|Q-W}o0?WRqrO@)qEBlU_={ z8M>3>p{LdlAsmq((fjVS`!Jl^kXFN{OmDf$wjF~>#(KnzlZmdYQlhdGNlbF_?36}^?`YqBv{Dzat%rPY4yI3^i@@#w z6(v}|(l8gv_i64_@>@!^MXmZIBk#dm#!vWp!bCP+d>p9!lX>a)mBKY-v!qi*=;w<1 zw$`KLig<3Gx>fa8PJfNc;o}BN;_PUfBT6DdVnbM@`#54sF4HJ zwBoU{t=F@vTVD$a*N2?S5!jA@=j%<6T*22{uAED%(+%c+G~POlmaEUsH#cFTLzuxI zHKVmyj8(#)M2JPJrIt-Km=(TIyyV`_i6wTW0J~4#m*HsfrA_KY{Dyux>koYq!>cP} zR#+sit#9mS5GF$xVAzxH@QDnI#+#f>u&Yr_ozb^!ke*8@6Q`5b8X!YV{O>#4cQrDO zB2ug01s+j6H&0nIZoQ?g!FkncMlsW>X7ZNmmPvBOw}<6+rhKy=u>K1+7hpZv2(L-) zH4_zNAP&co|GXoGlp3cybE40#aZgv5K%D@`+Wznb9l4!LJ=8w?0;W?$IuF*IfrGKK zgq8J{`^RTjs?_qlwLFjQV)0+&I@Ye1*jA(y>+R$jo>x4{tPVZC)h@S+b z>R}Co6Ao)YmU#~o!j%rT^8DP-AMs1gl~$hi!bI>;cNdxbkG+i6tpCA$RF=}i5M=$w$m++TEQ|G*dd~Sk+DNGz zuS$*xmMn0lT~4Kq9jevRsLobE&lKDjXhyI&MM z^;hlFk&$h}Y^ZIA(zb1Wv1!t|uH!Cb?Delk&W2M%IA!w4N6x8$edtOb1$(4NP?Cvp zC`m2<&apNP{fjo?6({h&$MJW`s3+gxm4&0Hwv`{aM^Eh?ugL?^rm2*^#65lT4!=A6 zh1k5^u`av2gE6u8UXmw%t{;vp{qCkK`sC}9L$0*0hTcQHjo0A5|H29h1*97sEn5H#d#UgE3I`y|2U#M+dUDu1h*sk9SJk^)B=@EF(DhF6Z zW3%4MyB;Dsk4r9JoS||WKf#EM%Gejy8 zp7-|p2qmc|F^nDS(_=%{okpBmt$R9b3ce@`!TLa2xr&a-86TGES8s(o?CM3z9SiEe z12CPIwmpe%D~ihrKwN(33I+hHTOjxZLmW#0I&AuaUmriHEuQv>9RC4U&ETywV9);g z_jl{Rjr1Z~@0j0|Ah2y-0t9Wd@*FgVyPHDdKKwW5^miuJ7hA8U`obsWTwnv8i2bP| z&s1W^uho(z(w8HTof2j7bK$2MpFyxBN)T|<#&+JgIQeaaHCWDAQSt0|+tH1OI|Ea6lXyJ>42e3#0 zLcE>9uLb`A9EXpOk8(%X50{oNzc*hJfqwR2!oPPhw3Itz9~u9#sa##*djI?puBfOe zEVtX@z?v?YvKjdQJMML-m2-ew^7m%AZ5QmG`>1vh2+FyR2E#h9Gvae~22jM0epU#Oi7k1-b)6AxP3 zmW|5L@)>|5XhPq~xB6sPQ(`tZ_F4kZ(#C=?D~q&^0T=FFVsMw;$0}^2#6&||pxf+P zwhwlORwWW9YLV(xSdXZTkuj@0ke8*7aevkz<=C(&VGAm!x70Pa9XFxoOZ6PWp_QQ+bu?qI*CG}>B21z#+i7RC zQa?&svAxq=_Om|BPz=wM`+qc@WmFptw6%)|cee!hV#O^GoZ?P#m*Q63-L1I06fa)f zic4{a;#w$BXyKdpe)s->tVI?X5@yYrbM`*Z=1n|ao714L;<}83rp*a=%Ka&W9Y3i%|>uIi!gifr!-5T0{+) z3OiNUL?Vy%dE_6aZG2BaqmEop=pI%L(5W`-86T9Cdi&Y&rY$v zC;bu`l87$^;$!+WW+-6tJ&s3~lXem%#l^mKO%AD)PqNjZQ$^uD>NcvFL0j5u=m3@9 zkIMbYI}0l#l_7>ydwB8T`{e^yw63sb+7&zp+^=C5ehlaIv zm_qu2ulk1)Zrb8LwC)h$J%596Wg_h|E>&KQZf;psA~VI0P!FQwScr5(PqD$^7sNay zdy}p4%CD%Z*4)+#{aScm%LxeYs}~D+{R4*#-JfP>u;H}lY_>ql$|2Bbv1Saj*x}#G zd~?o~m~VrLt_D)8laPlnKf^V?YZ9@SK{G!mEjIXsjbF|ZG%I<{a4=wY(Vg7FjiGEL zJV@q>7g39W7pG1N#4;Ant$!OK`|W6ItcmI|yCxheh0`wZo?MG)PU1BcrV$kumOSW^ z_;T?UT-o9r!)tX_94b1kR1&dpGB1PkPd6n9E(O9E(th`1BM~vy_F&W~AthABc$~4< zsLWO$ycn^lU{_}F68VM&jZ1g5LxJ!CrXX|NyR z%gtjECS6@+{*hm(Mq)N2^u;y>ESMjcjW^8Q9N#`VbvrzSYfE5DV7DDez(y7h8@zsl zO5agrL_8Nts~JP|H8v7{E~^}A9Bnmf5rheXbAgesvPiL5gwQFt4rez3Ie1*Kx>)2z z8;fiSXqMq>mt+tOyvu|cA2AzFg4bqGtQx=mt+;1fA?O`AEL3avJo>OB)?#Sf9h-QL z9Dh$ib2xV1YWQ7LX;e_woo6b>)SBoUOL`icYE89l5{^8Q%s7r>-4VnlH#zWLorvGvv^=e1&~Z9O3O;YRF|`t}=B;nkU^IQ72c z@Wy{xSy9OmCY_vAZ=<8o>h>ZG#XCN2c&DS;|-lWdSGdvd&g;!Z}Jt zCx}S1vkgvftLF!q6=rK_MR3F_Wq%N&T>=h&2%Hv*F4O!(>j#$JdM6}=$d<^Dzh<(~ zUfZMY?vdNHmNcjleXh$p*mJ_2oAcg4dVX#QPv`yH5+Cm(eMrSBGyBuw8{-m1&6l|D zD!R2k`rc!>=~!JGDZ1<})yOFv*#-y|YarP9WFS~7+v~I+u&3dowYVGes@kct9I(Vz z1cTE#h~&_g;I*E*!jR_yqkK=nK2)O6eOCe<=&{|cE1&iDoVTdkTB%1XtOK3wE_q%@ zW&hT$yvM%+dQ9dZU`TI~p+@J*5h4E}P733ughxv-O{U`R zkM%achZj+ub#^#99-#iC8;J!@pQv#<1C&FvU7d7ubP#0E-wC4SH``Uk_Ag|{u}cz1M~Kp3Ke(u=kAX}#@o%PPUxA*Y%Wx4)ws=gP{^{uh7-GKM`PJ#J9 zY$R}qO%Sr{w21G0oYS{I5kR?4ePP(Y2kw;W%_(G+Ra68;MSB`o0zEv20rMCz9uLs{ z+uU#HR0~MdBhSE1qw}@MwMJnz#-MX^@61m<-=P_7d;vp>3!Ea_|MLQ&Gp;UYRR+O-zars1*4i?g5<5Hm9$qOEfZk`>fumMscH z2$YOOS@^DG2_~N<&*RJO1|nymfsd6bPcHsF?iR)(Ij^ilUTP3*D9t%9H%Yghnk6R| z%+SOivwv+q|bqAU3lcO7>OVpc`W#w~6KX{Cj5)&w06$9T}B>MEK!OwZE{~j=gadT5UnC@88{JMo)pJDL}b7Yq%L4t$vM5~Vn$|7!YVHWRSu%_ z&un^imc~$`0qLPot3tjcA$tJSTG+`Q-asf#UkSJNL>G`xsmACU;^3{3jn1LW zSp;u|=`gTl@7@}mj#~nubD+jpH`x&EKs?2~K{E=JBx4`KKWK1j5sEsGWue%{!k$nr^F0CBW=BE&5K7Ze*OMRT%UePlKoe)= z3om}5-5L@xaDRi*d7!_avgQBta>j5@I^e z@f|b$obE!;miHSYT&FzVOTzQ0Fr3)x*nh#GBvHwI=7*c!e{;B{Yev&&|_VZl9S0Ym)N=qY} zR}1CC@06Bq$ZVW%KrQuX{J<{hHb{?eB^PJIt&SGKThck0>HIN`;GB{Ts>kmq3Sni8 z=V1V+OJl&PbrDBhN2K+6h7k0VD-Cz#vq%M3rpNJwr4Do1_gqxsq)5oX2!DjhJZzb{ z4VIYId2oHugt1olV3R6fL83#@91|ETeoAW59?xugz|EPkM=K>sjTIfp(RHGf(!bSEqJG4P;wt(4TPDhW#3R< z^kUT1;4m{I`Hw0eC07MI@xNQN&@jm?DP(*%2{}Zg9qQa=#LSku2#t_aka##$Bh16+ zL-`cpqRkx>O+&(=z?P9rD0hmX?>K^245hQtQwtGjxX=gVM!gMFbsNLlYe3UQZBia% zX|T$)P$xm9M{U1%fhrQVA=7|pW#}WD;kkiD`!j;tp!B|=HycSbz{oOc<_+#~K%${h z@#oKmoDI%AW!iUp@CX8;!#b=lpAOdp9MZhx0y6{oUn+hH9c)Sb;z)miuBu=NJm8Xq6xCwHGmiSVDj7P$^h(DS{?rz|HT?$7)NkoWr{+^`BW;?%qW}j4@ zWZ&Qh@lE=$!k>x25^H0k2M^W4s{7SEtYX>o_mOQ?S|3ULzH8`8aN%|6M&BwgzyX5r^Am1LNnT{~~bz{>}X#0ecw8LIN@t z{^L?|`sJI6;F^w+3JO9DBsD+%I>rUwL-HikyCiY#H5BvCFCUC9R+ZDw^ab87|J{xB z0_Fs%zsH(KD=Jy4+rnmnzk~u8H%;9e7%%)M4^WHwwKI}<12+V~dX@OMTdEs9DJ?-k z!QJ!oI3UYtBM*jZUz@~SE2YP8$wQiBS6jn#&dP_PhE_KP4yj7fBGsCkE}(@w z3~oK9>jf5Tqg646EE{*$mWR_j==2qXimK`GTR2iT=NS1Bo-l0ChoC))t3hHnEIW7t zn*%&`dju@dSVw{(|nDulT+D@@l-`Kb>ou z2urS}v7ex)q2O$OhNIHc8!s+h%45*e?ADU=VoSJ2x(pKDT8^7XvMtQ2JkmV{Tj>jw z0f-R>`*wElbegK+NF%Zha!PQVo;CNf|7SYPrnp!zQZ0izLY-LD8JZe$|FR)CjO!+^ z4S#-+yJfE`>q|RGdRSKeK>0;?rq*A zH+#tu8GmEBM zTXtq=AT+2ULUK;KmWjlbtsQn z^b-f`j>5dtL?j}xuHZ2J2J>=@@{a#z;lB^n8-+XdavHk^uqVe7zi9(f{{EcvP#J!> zw$C5J!>uK#C79+QTNoT23dPo2yzYKuJN828@;hY1c%X@K(8cL-%KmBMN|(T~n$uFY zn5RG#Rz6k#)UNV)G%`=N4fM`|0nh)<#Vr^XcyIbcQU0#$7`uF3&1EoE5!j`S0&zGw zKHfiH{W(0*(7yr}m)6+}fHmsg*FE%?t{%5awDnKLM1uqY53BfC6ZutS*!jJCs}3T8 z9^(@eKy>YnRokrjOihhweZ5!rx3!zcekm$(zad_yKgYYI>wkC=MRwcJ6L)?+Wjpo0 z=8KE^E5J;ubDi4$iUSl)M(TjELmh=X(cLio0LtT@TcBirdwH@4?AQT{BH!BEcOH6Q zMS-^WK{yvvRMz(N(?oH{Wq% zBLGTvWc~MYJksyaocY)FtSOPaEn?-zA2d9A8R7qB@%~;mzJ2;_PZOe!mPjSKR|7Z* zfC+g8pWlFQ@P+3@iEQHIgz<~yRURNl)Bu}lK$u}x`AN^?Jrm8MepM?FHu3x~KPQJA zpq3o>;B(21i-+&u%f3Z;-Mrdh>^8pU`0$Nl#?2|>?cmSH@IK8Er2{?_H-z<`ruXIx zVN=Snw&HOtMW)huJ1OV}x+YOd*Q%bABg7>Ct_tF;*0xgki;T;93pckQt zr0O%+X(>1`J@Y6Cmr#L;ljs}E@ck)1XDG<+sjU*xy~F~Xx+c!&W2VEZiKHPUYHzedRV!AC2}V$hqwHaghK-01Jx1i z!0&M9prtDG>hjB`KfUw9rm=QIcy_sW)kg&vC01x%5GJq^$%)>p(!*@I*{~lR&MPQ= zyDWMMv8s4CX@B=QL=z(9tc(p&;4ejH*+H4%!U{PSNlM)Z8_Y7vN+IqrA-rHVEK2w? zowARqq=>825|CHnK;Z*@1E{r#CjN>-|gzajDoL{9J5n` z;ByK`*ovhk^+Lm)F3Q!KoGY3V#x|XHdOVTlSBlaFI@@fv>?l-A+O#_LnK)M~U6RB7 zmA!`9iTv4c4ya|tLDm~*;rn#1(irz z!eelwV1mv)rev9k101o-XimD&-M=u$x_*WbIk4-SkDxB-u|mHZH{!Ty_&0cRc|MFQ zcu}FqEj_a^xp1XUH0BuYsOwEaCmCY`C$P0|3y2|_fU$zECdEoMjY+COhc{BT(8k8| zSotz!g%QwHVPr#11PE~iP}~TcKGm#Xl2`6AjQKo&WMMy0i^41`PE1(IH`xtlvHYr~ zlr?bnp z+szf#wfps8&NgtHCt@OWA2fHn<2c;S!N`=fLu2X5^ceNfjD)%6-wE?-es>7^s)ePh zKbX{=-|L@vU$8Amwn-c>rf8B=C2I=&Y9G_GtS}?tk+zCaBFJ(Po;^)4a=do!E5nCd z#aM&=x2fpIn- z#34AvjHJOa8gz0u>uzktFw*@_C26{3ihW`T4hm?9B8x0!P+NOW0{S&Zx=7b@{=0U3 zJgOpM!|ZLuBx^{}hAM~;H9BQ)ve-dsMbnh8sI~yQXjYr+Jb>Cu!JWM2B#Lpo>)boU zZqZ9ENe9*%*AMg^fD1NIyyO(h?V9NNB>ug-*8+nivAyfWzxL-=^^4dNw-e?zQUYQ9 z<~7-j3){o}^X8BGpMhz7)zsVHw+?!7-_+W+jXv%R(LpRN{%P$>aF4CT{Z!+`;o6gh z_ThlyZVo=Ku^>8&qamhUL&!*OAhVMnq!G@#?mmL(Nv~G2zLV^-I-lDjS%`w*N&Jsg zr+RO=IbjG=0^oTyQZqRGqj+5tpo9zVrAUc5vy#2(c}|V{9ZXU4i{a$uXvO@+;KpmO z!fk8kg8QLu%zW>kRRcrMxQ{{R^%p+p-b*iYc8?=|)7;RoIn{(eoOs(?$!(uU-<0Gb zTO`EO%X)NtNp%`R{DcAVT4O1x`vOm#2%(4~*|yz?H0ksACaMe|z#L;C(+x);EHAMs z?D;coZn)w*?+QZEAH%`or0Rcq$m=VhL{|FSv;2S=kj>@UJ6zxlzhAPta0E=yxBW*a zv^JS&{QexE8jE}yD7kgpiX5+@g{5u-U7QkI0@9Idk(2mA-$`ZYe|b44LP^ZNAjvxtcJQH_e& z&m8OR+tG@d#~G)WkTH#W#iQBWCE=+^brG6Jk;3I#} zdS6(n{jU3cr9_Bu6W~KGJO@Xq{l{MaQukK8RdxzI_;qTK-`iO(ThXe#j=HQ@=<)5( z6Y1;OIQ6IQI=`1yd@=uDxa$|sE*p%0Z;KwLZ-LMEKNp;%qocEz0yDr^2?CULY!Se` z-qYcvBG5m%;@AfG(88z#9)khS32=`BtbP6WgFm9T8CnXv0PO_Oysesd4-X^G&g?TR zJB;s$7I=YS|Mc+iwWr$vs@LxOb|0?%lN7Wsn#3&Q(wH%HImkKEt9+Jr3buvslNs)? zHu2vk*vnR{mebkGepKRHgIi2ZlROwo8R0;W)huS>4%SLSo#k>N%_qQ~W-DUI`T51h zM7R6mw4Ix6`V?%`$()HNWJN?5u1R|9R%|Y_DBL27m#~^Y4&Np!Mj3TMMO@^nY$}CW zvXQIy3o=J`7jY+JOR4gPR*~MWziig6T+LR`Rxr8I`q$1zcsy&4s%D=&7SUby<3T%; zd&S*!xv)l1lt#5$Wu_;L5*3W-18L;CnLwTadPU3DV~$`PUF0(KQ~ZS@?usT`&Kw?T zGt43Lq@Gv}_asDyh*pR31S$eykmnm=kR{sbfs&iKd7k6%7GkO=k2q79>h4AqoJS`7_IzW- znw009o(r$_X38~1QWI)#x|GF&Qq6n+wQy-{ejG1NuZrAqjl|M`sYnh*P(Tm+uBk%N zI{5%+WKV&{L0fIm0snhk6)tMZU}vZ7o>*;(S0cpb20=952EQtOmc8o8LXqyCWZJ@) zp(CPdu8mUM>+gWPPPL|8eePAokMxKpu8PAau<&j#h9wRpz(sc;66%=eAbFnKe){-r z&M8h8F$X^_MtZphxL;jWn%8g493{)jDIweYv{4o@{;dWE3|~2rWr%t zN=So7gk0tBfU~Iy#asxv31#Wz1W1ZFo)GnqA=!ys!iyS zOb0lbqma~HqmWPYwQ~i9HE^fjuU2!%V%*Rvs0=rQxlNmfEfka4!`Ua?r6=?FpasN4 zMEd-GPItK{{))e&Z_E0oGda+G>h>R9>`u};%-u?sWxnTsku<1h=100Eym@`d$a-K87j7T~S;Go-Q5vkk)~=K$nTRl~4^_kr@9rB`q~fD1NWe z5x?iW#u{rf0a%j~fsfAK6yAyL-md~nP`3eN>-+7QF8t4JB?zpze@RmPWV6~JSCx!# z7Jr9w!p!}h&?dZ(>l?eyWH(gEE$)T2-i-R}y#M1hnrab+B;)5T?%nm*pf__Pr40o& z#~?Up6bHV&H`pn9D}C)qJmVZv7Z4dR>|3h}taqazHB7i7iQTt7cVLSQ0ME`O}R?q(D18c`e1usvNkDBl$ z7gWo;+BnYLKOCKKJDLMF9P3y~QQUd?Qtdn9Zdi!oe$P3k;z@CGVmQX>&?Gi)C5FHO zF)3Fys)byzO8Tnzm3$EvOA-VvgLtZVY$L2a6@_PBUBV_!xWWKd;;9mNWi{huFYbNdBs82YJm5F{ zwjqsw;-8RK`}0oRIRV%xtYL~YpY!8Pm51bS>JCbv>3-$lPF)h-7V-byCjPhY#}sB3 z5MlL`u^{gGo{mIdH_U6#rt0sR_-octy>W93FjxP&di^{jezZTD$o#V+Qtg!z2iK0- z@9r96or6hieUw}9QE=O_N?R51YFfYb66xJ&zUy1)5M=Xv_#0vCi{v?8o@@Sf_xF^u z`ClID5#VHM>)-t3;Xg&)`8G9&BgxgAbgMJe7xPcRLJYsBlL|k;oGudZmxO2EAMZOhYwG}xn8=>-z)9ptGqtDR?I!y4 z3wIP()pWu|BZkF)Ym}3Sq>=fyT5*=SnB;RwiI~wVPw4zchsM~%o!-xA~Tb>lR#`m z@AiWJKGR+JoYU0p`_&_&)O)CB9%jDaaS zgLtAY_u3J6hSJ=FIg8K>xf#Sfk_e;_r%Ky3Xe*mkF-rOT0O3IN+h)mdCdKJ4m!;E6 zKrB0UmS5yB<_~hJJPvtQnVNK?uPvRXH(A4sSUs*bgl02(h;AUV0fw;RewsW-%u~36 zJPyogpKh0@M>lV$hrUNAaR``J4Z9k282(2!D~IbU zMLvWjS|NxI(&$5GqqHWduag0U1vcV`J_>aIpBLbl%ozFbS7kx_$)#hr!S_|_Omx+f znYeLW-*mBP$`}UO2VrXs+yw+J@wZ&Nn%D}FU$AX{1(jplx_J&J4* z&Z4kb_udm{$m@O`H(e-XA{t-J>tOtWK1^sTV!A$xs~cTtOPA^v@^RaWY?JWTZM%^{RHy zNf70yvS$0r_PJs7u?$_Rj;i;1t3TsPgX!#oEs{>!31SGVKW5g!p{G))ZbCcy&!`H> zEeSD{)^T~=y?s}Nx!yHh3LsLb-rJt{;1Ec%@T51O{)jep ze&^o)v*z!)>e$B=_|=rjzR$n2%K6wvD(2lpr0CP)etc&Mr7NR}qLXLT#qo5HBVa7U zPsv={IjXfN|Bf?ED4$W>GCmfcDjh|0V4=#OPFTsrefWit04k>*nx-6+X#IsLI$1DE zuy&|vl8%(<8O64mR>((f0!b=@nCRUs^a(9^c8J}@Z2+$k_P!4}*nmBv!S?qlO)Iv{ z42$D_Lthh49d^G4HZQUZ$M6W(8!%_(fjgpw`@5>0jJq+Hw+8btUDXVnjBsKTLk z&nhlCMJ6IiUa1}lZAo;9%rPjFhM|(a7_$~r@%L#M+A+?@sEfJYIN8 zdRojrjI1FckggGLGbnl6mJlNwT=sgez!sk^FgwQWhB!o;V06(O)GJ9}HMfFK=N@C z*3BwoJnrSq=bJ042X0v|8q$Bpan~agL3>}^#nDi-`z2P$6A|o1+7|3=}`<-`AaCHFjf)5eOk

>RzpVIa!1_wqU3K%)DBrZjKN>pI3K_p}`kC!V zKGQnVO}x~yZrH8Cfjf4?@unnU^7qkFA+v=BEqG)UdRt;P@55`HuznUCe!-{ zq!#ownIwiZ4hjk^u8}yR5R@pEVyC=hW6ZRhFMo}P#ha#3U&B@ag6Hcm`oMtuxX+(I z!x(^yM=csK`MJPvAhfypAtj8ymj5ehN_vx^`ST7J@vo%fkoV;GIc`|>`N{lmlLzUC zA9TAQ3(%B27R9X9){#(bvE99X^TR*trs87A*ZFk-440Lc zi2j?6UxSQ{f!xN2?c2Lybp5ML7Gi5_znhy;5uiqXy`6juT~x(Av;}sXs?5qADx#|h zrVq1FfKBRuo7gMO#NFSv7DS_zjJEsuw-e`nt#;#_w@;1~x!_^u0_{A&FFmt=xw~Wj zZ$f|R+&Ag%?KyG;gkJz2ux9_6+NN*I?f$;6e+p6=bFUNkoCW@qv%md0B>!(=x`_>l zj09NPfBxRi`~e=TTDP{(dxD}&tR@$RLT}vw9%x@=;@ijLP&ctBlTcZ6ug&qLagySJ z<(c}&%PwNS+ZzP2=W~1?YROvQv~30Rob=oRJKx{_-5VHnd%LibJRX-MKK}RWz@ze2 zH)Ho_)gK0m&l63~0u=2X9lHQR)HZv?f$xDIpu8RbDNlnk2~myy{{8OxAJv2D&FUoZ z+*O`zIDUYPh2)vJVI=xf^>kb_t~Gf-#W+K_AYlm(Uc07xxq;$AXi#|+b9u}u*ixRs zdrMhFhE9|O?wSIoWT~WGjU6n}SV#ge8Xw}I^q^!YgA%DlUD%zAN})njvQ?7IA=o35 z?h7<+s5=#4ZXT96I=fziF~7rWjXsGLqMoyEOh&O`2o=_AKZYzyE>mb1D;CK&91IOa zgiEO3IFG^P(NxP)SqR=wBJUdfNVcR;Lmu4g3LwLeOK3@(=z-{LANDr%D0Sl4x+* z-iXBFJ)O3?6bjj=2S@m3J~2&k?ewP6IS)uqo$+sF%^6BL%UDp@98YD zzs6BE%gm$9>EN9QO;R!|xR}E8K_(5EzrZAXl6G+@F^j@9+;EH)8G61qAlA*sCitT< za?Qtou=s$I!W0~fe|k({gKJfvd%>sF(?Iv=I+)VmPo|E2uosl*TOt)U&q?&|-pYn7 z8e=yIYrvWDBrA;G7L0|5nhe7lu%0O#1G|;F2cvfoo|J4))e~T6to>qIr?5i#7sm@YNxPObgtGgC>PaKB`OLG%%aa+~rg_mfge8ob^9HwD^v z(R-Y%kR2rV^*QG2>dYTdGad3J;U8*-eR-1ztJn=}2c{5TMq1K*d_2=ADnzInO*%n; za8h9nEpsV-^0x@(sC^a~+FaNMVKD;fkSZEnbWEJJB@rFkJmVOe_BeO@s!^Q6vB?{ZtrdD!Dlbpd#52TC!x@Le_jo=no}^+!;lYNDMnx!D z5oN`sa-L(NxH6SH_;)mq$Qi@BSV9?&FvLUXO{qH*Lui~_aTaJ_awFkn@#gmy{uEca z8s)_hK{;xbL5l-(r}P3-hH3~^KP1ml<|*tRQ$rlZYtq_IyyQ>BDNj)eWegp3z-9dH zGn$g_W zGz_32rkmv?m4vhG*TzPW?GMtc$mD?`+-Q4GbOj~2@noDC#Ti0%z0Rl(jMfb1o_}7A zzEb_Q$)Tq2_RmUO6})G^I-+jOz+bZemtgnG5p`W&M6qzWh>p=Hk>Ibt;B>qE&%z?{ zG5bYZ@bY{w?{Smc>r4r3KEy-17d+WI74k(9y#fAE)ePgc45Z(mZ9?hbLrXF%om8$% zJSj^OL1qy(-~s9jj8`3b{uM|Vn8$Fx6nYRAC}V*YOD)V+h(j}+_zPHomAxvaOmTpwo|r$`|Yv0#BFCtWAGu95mY`q1U%jg}&df>T%> zzN>q*Hy|^-$yj>a{i*nJyYAG{{mp3I>L-ws^gk>yzb?^aM&g4IRIt?* z0h0GZqS&4nkah77&1gs~Y>F4N9${h9GyBg@Khl47{?H*-IlN*9@S_Vm_nh(8cem$) zxXRa7e^WN;U1v|?dmq@Ycn$cXdmRA^vv}aED9}~M$eL7taA^BG0l-AZfp|y3f+LW75M5)B_1%z`~HF7R|QfDvR;p7q!!!xVNKn@ zmbd)&%SxFPXr8<)hjOOC_}AUD;+J~tc53T<;0EEdtW^^mdcAS);eW3QP{v@0WduO& z9r_Pr##rFz>T@Kz+cT_o{<|NA8Yr9s6j(6Cw?^IVt&6W>fo`>}>h}K`*7R9*bac)= zg?+97Q#oJGlrQTdFgT=Um|pc0U2>aA6ON(AF&;biM$F$y-NDA3X5HQQRSCn>lj9qF zav+3hhcqOv%FKXKB~ymu9N`gJ;#nkrm~fY6W8pQ&p(@75f(Ogcyc@$m_53DmhFhaJ7Y6 z6A6SX>I`lUZ<|pG$z`~SQ+|&@S2>N74bK!Zc!PBr?L|8!tR9P~b)|h_b??Vjg0?#!xYA&}<5VTVI9>frwYF;4KRtk!w zHn^BM1Y#z!6g?FEqQG163vI$a4(Xk2e0rg6NqjjJbRsDPLvX@8G4aPCRel_WV3_Yt z{EI6f1c0zC(KrhVUoP|VXBFOr_eWF(6*I3WlyiLRe`+YDVhQv75TXQ?1nIP(a-{De ztuk6((XzpDjV>RL1pjPJp-{XA|1@}3?L>mn4B`)Y zl5F9NBgFv+3W8MT1vD%(kRy<@b+D7Rn+QXfgm2Aa) zW@Z!3Zt2@f9ljn(`ALFxP?3BpxI4ApGE91mL+V9NKzj`5FlhT zJ*B;rQu>O2Odjg~^hZ*Bd+sa$qkA7a2vMbowuM4cY%fSX$WvR{0wDq6)61t`S|y5Z>krB zq<>TfDD8iaarIbx*?0T$ZfW9fd66+6uKB084UQhBo+=f#6of~RS_xpzmY z3@koud-z9TFFjrqJg;x`_k86v`_$zr^3rHL+T^t?>(_05U?9$DzP!ca6)5^i;}EdG zveA+ZOjyvHq3?z~vrx3Nl8tF`;z)76sTp%C}`Nce|fWu`-<=%YNSY>J%yJYLuMqr6A1 ztBM9k@WXd2NirN-e-SdI_(m!?QuA~3{RNcE9XNHJMhOJBA_^pg_jB^ImV}t@(uT<| zGQ42B%9jhCKZ<>i2Jk^%`-sqX#2lV3s00#* zz>)ZC8?y5{mPhNyU3gk`t>Ofw1C2Aa_tW2ZGe(=v3w-Ap3ch8T9-bd(?u#2g;S~0# zezfj*Ck1E@SHwnjRLBV-a6ys8#D2g3oBQ63G7jv#4c;Dm?niwUd3*tEWBb&vV?aiu z1Ps`02C$le&uxMGfb5&Lon%0Tt4hc*ZJqD;H|gI?-_J8jm0sUzLg-!;`tzCSTO*^J zf7qWt7+3+>WS*!|C|wu>FOGgl&8@$wqBjrd<_{qcja z-l%6_^S^~G06YR5@|b^)KNUMMY>0Ge^}p?QqTS* zyWk(%?K8E7g*c@M==gD@smr6)5q3|_PA4rTdvF`em%?$gmGa-bCiZw;q^$ET1qNy% zHFR0a5KpAwjlR-7_b#n&zwV#w9M$%uu4H^1r;_K{d=IbXVHCteX6PsH$?-KoISjbM3~tIv=wHlcG%Itrf??6Hlcv$fhb?n z4l%7HUy4skZ)jH@wM3n@Ok$k~JHZgET@#iU!by0ZY$SKXPnf`AKHyeM9B~gi*=doI z@&0nCK;pwU6J`=-LLxwp-^`k4 zo|6@-^Cry*Tkyw0x`BMPQN9ZaGXZHK@lY;3NjJLlVtH$<%?VJU--@g$W=Q($RuU*^ z#URPi{*+i=p7e=74aqLlR^2QVDk8N(tVa65qr{ROpMp+Lq=2Ou&BkMt&`#wnWSijG zf7o{z!U0y?tD|MV!4l9^E01^OWA2h)B^+LqAduA?IA&@IX^CMY&kUWGTtr?EKxTz|;}E=kPV!iPga~jb%T;L|6DeUyf*u5`rH~%0;g)p`@w-L$BtlHB+pJ zX+~NM@hEfi_I;B2e&%H#W^;b9b>k5 ztfcOYYA2)>#;bekV{BJsS!M}oN-t7(!F2~7VV?@)aG(SR2fk(b;8(x=eK~nj! zBrXw5T{Lo?YLzr92gFVfvb-eaSr^3O9e+tnNi!PZG^xHujfC%3CvqXmg*koY3B+<3 zze9$IZYZrkA&!P8p%OrycXzpvr$pV~F2Ic(?duY{CazQGr zwAM-ENu{eS@O>xqbNX{akk(=$2o1&ze+iFj{KPrmRsRn{=3oSUrdy)REd`%7l(KiEJme0=U57J!TiiVupBO2x7?{P^-G`|yR2 zb`)BjWb=?bZ)kQfS9cbOawyU!D7};@q+;Gsmq^#OcpR3hkQ%K})~V60?o)Ih^N9w3 zOlG`pw!Cx>Dq%pQ6Ws|GRpYAF1Ps{eLpL}fl0yk;%HIW)+2riQ1mv5Tym1c&A$cp4 z2c9Yp^-(8sOzkS(+CG{>NOT~#Zclv%!(1cEZFj$YZ_E$5;nOKR?BH5_gZ{74O3{v~ zNhCzp#Dfkvy)X(8cgUUPMlU@`(T}`^Ig3#sBU;qp-hXhH6H1n&M3=HBL0z29^_2p? zb+K`#eA>nJ-e8o!znqSQKx476TForhP3B-21mfUvj7e?xq(jRvU`uCaIgvDGSo0eh zAXPlKv1OarL(DC;#5wbmclJTfa~=?<%-TsXG6wi!O~0^Sv;i6Ttuc*6Z(zZS!=vTQ z@K3U>h$QpL-`eMq^Zn~LR~4U#ou)IAyyklYiNf2y3e|`5YQfnYu)Tjl^tQb}gtfNTC=H5UUC?oZx9E+lt0QPPj+P*RIZ^7Hhj)Z zcixjo27fpj83}Nj>2mh{kWl8OEylM3b;FR5bomSgBHku* zRr-CsAlPf7<0F5EBl&i=Nge(M8p!c>t2UACim2BN2H4OxQ=C$^(~>>HTYN(N(D;>Adv)R>rb;9?!?dO|y)pG`|?5w(8O7RB_S{B-iwdHFfRk$a6j zXqx}Y<6^A^Cqq zePvLTf7`Y+OD^58bazR!ba$6Ds5D4-cP!m0f~0_e(%p>+(%p@u$aDFB=brb&&VJb0 zovpdf-*Fyw|L0I!-0x4;_37M97{_X>DU#>oi?=iJ^9olx2sy<#l;@~EE z1hfI!qsS@=GOb>LgP-osFJA&4F!eZn{yyfN*P=%WJ?uSA21ZUi^e#OQ9)G(u&slmI zTxSQQTH^1#5jpE>@2>&XQ&;cF9}=>I~o zA9gcM%*@u%kOQxC*Du=EpU?D_MfJ3S-iwX9!k5TZ&!OXEgf~Igm`~3HV?ag1q<{O~ z(U1PB%sL^;%vAf0@Fe6K{(o+(f5|I>U5qq9ujIiD?ce($2{)h)>&Ey}$i~g;Q8<+}_kL=e{;M#1N|eXH zk`m)L$7*;zg5dp737(^Y`e@21kOZA8>Q1sfdKvvJ{99=G_QKnbyzlL5^Rq&;aI^E? z&dtm&#=7F*?Q^@PK-?`nH+A*5ipc9yeJl(L))2qAmW0OkHbD2-_sHf2yOn4FVGJZ>rerR+6u6$3;#`m62?8SB(EG6bMDAc0Dx{W$ZON8Sa-S9HCT+(8cR+l5mynX4;mPE z`HM5nQ5bj`mbsR&(pMq1Z5CSG*$X4acBzhtgynfS-xzchNK>9P_c?Z8J8V1f+(Y1R zY?y|^;o2&TayE?VaYjNdXnHOz?s}-yeHTvkI&N53sH@#^sb6W^NUA7|Nj*)cGm+ZZ z7Pp281+Y{74W_@BI8&%4Oz-E=0#$a;{IeHKaTU>-s@zV zd6v1Ov?J9c=fkgg`(?R8a*ZaUCIU8=A7c<}PpD-yG7rT>q|LTmua@S!jPw(#*C7>F z@CFUQQ9#NOZ0go)rTiG~16|<~TuG-?vnt;YA(WQXb}v5)HIn87E>pb2PYOZ7%_!5t z-`dMtGOY{Wc2EZllF1aTRsI#!FgMD4(W=056U5NEW4ORDiR@rTy)8c43d zS8t{nccbfn`zuCt14M#1i>U~0FHwytrEN6lou}8Na5NAIDL@eiB_CKGGz?_8Hx=@} z)xvxSr|n|y%>yPM&;m1;(G6d$CSz=U&>x~dL@VJk%}$)iJw)WD*N&smRefZ?ks zHDq7`b3MsXQ)L`b1SK1cyxSJ$tZYSh*B+MoDV3=-H%f=MeBxkL^4PzF#~-G1U_8KC zR7hVOLV@5Xrr>?kZ$u*;&XR+a9t%g0i(N`nM*KFP4ZTt=O^#E_U{=*$cc_s=L@QB2 zOma1KSF#K#8}YDs#CiRch|i4|9LC;zkeo-k@cevNdpPmC4Z*NhYZu>8evE(I46Pi&tJ9=C$ zc-82H4ZaX?HTo<*1I3zjE>ydLc`4mqc+JN@8hMBI)R|z=eX|ZY=DGBuxMtZEyfM4l zZCQ8o`5-If(!6?g53~sfU4_Rr@*dT7m;8u=G@zl3YbnvD5w(;{NkxCE^{KidL+sC20Bjv=Ft-JCRasNwPx10e zl$ysw&m4CPD96|vQ3bo@qaAU)&AIH@lq>to=VllCoE+ZIkMx`YTuKcjwe4b{nU}w}E@HuQLGh2IKq- z7Trlz_x^qKqsD#%P+UFTom<&<#oh*9Q{3K60CFmX#>wZ#O`LCYSxNF(fn^kg89(#c zBD@j5X#bg$^3j=T2w|nJOKXQ+2A#XOxJ0?kif|;zlA^pPe;4N1*PP$rIB#l|6Y7q3 z=jYbd7gnplq>oZ6Xc45b^|gL{1Jx{_N%0IvO;9lC$wMBhtDLplm|B4SDyr^^gK;}YaU z@B39qW0^&Vmv|_(hm%Ahz&l33qO-sgRL9i5hFjJVBF8AZF)NuWP7-;r{<9D<%L=L?>_;JA@Jh?})5X73hF zy-vwW+Q7^Y|4i0A7s)G-mx&ptMlQ>YkBX{)U_%fi$q`D#*vwlQqlWHatK?CwzIY6J zww1yz+K*6_^h7GvP~thR=cvVDE4aa@N$+;SYe3e7l>{Hzb0R678l0xREuW|TMamMu zmV`BX>A+`v@L{vn5Eyc{HkR_gDHa&s)mgV}>XxD`k@ZO%NtbF{(oZqWn}AkGWCrVr z^OIq0uJELwB%lxBLzxEu?;tZ@Z|%yvkBPKzH0ZZ5J}U_7h%*4GM;8|3?sf9Vn{ud2$>|@UmZq@=;3qRWMd z2L($(6ln6J)x2q+-~*t2%v;e&dN43`4S2M^xei&y-icb!9_zvAXAx7@>+(`{u5PV* zHYOxf20GR)q+SE6SJD?Q?iip9C=vrs^Knm9{elP=BFN6|lhUW^Kd&5PZis)@2}Os* z2QoN<6K11|s6S$-y!oJmz#N)I!?JB+EJa`)S{+ljELHR|epBiG0)HpeZ`zM)i}wd~ zM}APq{6;&c2re6`Ux6Z^Id|h-nGTE{H?b8b#hVFzG;{-F##r$aBK{P0Z#`BZooO_s z#}DdXX*0L72~{1_0xPobRT{Ng7T6WVNg?)NZ*wiHpuWWlH*{(H`1Gi z@yXYeBH>HN1~b1+pMI5>oVXD#RDWLT81MH8Y$AssnF~A0*T> z04{qxJq>(i+L~ZYO>2dL2=hxRXv@f|hr=$xKzHObbXI{<0>%)n)bIAoN zlHX)itTBa%pvX+PH`4M0`kLK1soN}sS$#&R%BvfjJ`m-6 zpMdyWr22OXzY|}`vb%p{VCueUd}~X~is{(z{jMC z^f}EgGV8i*4aG1_^71cpfuWPs^?WKHbiNY3%Ah;SLcZw85KBrU$8PNFVP@uK))_eA zs)$wKqJpLDW)f~!$R_%9*4?6hyCSjd@}&m<>4U_v`u%Q6+x6pP4kyrS9Ov9Mu*%H~ zbWr}Cat`bRu&odX`5(_qCHp>M3}|hB>UzmN2Rz4XKz}39CUM_K^|u=!WqbKl&uyP7 zlC#ev$f7Z-hy-8OBR)4QJGFXg`rL#6UOcT|WO5d|JU^lpatgHj8X)kN=*(EQnZ*W>MtGvHFalU9?Zo)jZcOjh}8KfNS876xm zU&_9bV8Q$oRvnU!8Q1?Iv{`l(YZ$tqGSqijZ$W_BjZ7spe~HymjXcF>4=O-EbOdwQ z^ccySKtiplcNGre4H%UqBlzG{@rOz@i4@3jq$j|tD)73DZ(sAG^I8O;`dQB>F)f|grjUTek2cl3sSta}7m8$yis&dNlt#!S%kXH@tqpn@qA1>JUFr;{ALPgK zC=??Ve}poTL|k+SP|t-#%tLBujgk>v!}O*NlMf7t=B*3>y_ea=x5ktFK3!abQMhu$ ztnWo|B|_DUbFPM{LRve+bSR->F0Yz{X_#apblB2Exh+ommtxGnC7X?uC&$=Ln`9fq z(7o~O6~{xUgrDn^#Y-{$Q?0444QxYWX|@y?aD`EiC>j`razKi-?U2f}o>_AmX7>6p z+By+87sOIFxxFmWOhpSNCL!B2cDaCbXzXOSJJ^{3ke(&ZzcQ_HUGCK(Vk5d4ybi{i zc{M9ZC}sD3!owYi{Eo>9wT1>rx6)v`9qy1L=9>sN5mTb-sl&k{AqH5zRU%CakEA9r z3~G60cr+R$?Na;!DGkLHgbB61`2B){Ivk7!$a@IGtf2S$?5~V+p|Dz-j4A#eiebbEnD ze=Qax?vOg>&e{;x5IkARo=LNlC!^ZHpcslc8@d_74+aUND|4ig?|AkeVwQn&ksS1l zC=bZ(ealoSWHeZHv}2LUmLWN)q2=*`LP%b4h}6MIWv*5+T{*=cxd<#2MA7K1J!xPU z5$4oxJpnzw^6ub{a*$R$>c`3ih@4grD=8i++5B;9rc%*DJ&bJtq{_!SctL2c!dGdJ zwfU+RgHRKN6qomgHr9gKq2h$?^%(R(!2PoT14)72ED>8$*3@Ey`E2sqNUewdO8Puh z?TVOM$|ZZot;ji1VN$`lELfLfQVBx_Nta8x2~o=iJW&d^kXiK3@L&w^z7P_CQ=g45 z+S2Jg^6d!CnM@N8C$+3#F|s~!SE`W8kUAo?6D4|rzs4Bqs7Lfr99?4`Qgh7Tf}&TP zu{|po3d`PUpjkw48B0q!R#1F#GiQ;HrfZ-5;LUiHHk0uf%AlY}mrx|>#OyGWmsN3K zZ&rbEnUJlG$qM50%HN-1GHVvmR>BY&@5$pTC6~<_Fwn98e4|STzk+-J?@|~9&y(UK zS67$vOnK(t^5O?gly_hk1kAq=AJI|PJhb6kXzey@T{ikmmsW2i5_eQdT7=GI0$csT z58XEYb}cj#nsL|^{iJt&@-BE!n>AkCqr``Za$PSAc{9;?#OF@O-_eRjF7T7fMjp%g z26Gf)*~d@$;A3~$VDorm{txXExbb&vD4K)AreJH+DDFs=WEnb*tpsz)0D|yZGbFt) z!H1zs37H{*@_c~&xJaNM{`0oMu4iVIf`yJ>-rwa9B#he(f1N@Ld-Lh8Zb#YAnp?V% zy{=cjg?d@~RJmAm%s2cE5 z_Y_2rwk`JHoc;BAvs9C!=BT6KJp2u>Xs9z_36cPIop*In1C%)b_QVf=4(Ec|CgAL|;wV8Qi9MCdk@}UI4!dRUEKzy#^YLbDh0E1gP3-jeGyu zfP0xb51M*#@=_)h{wH>W@MLF3ygVGS{==miKlnG6Jjc#~jWl-GtDf#Q@Q=+8lHs2U zMXh;NMZbF$_Fa;!2j0c+uLr2J0pgwG^aW40F_LxvwhM`=+ItCXv#Te@yG1p*FWOzM zy+bkjKY#w&U3PvL`}qd1O7ePbi6rR$)cIj`duXw4o@{{=4X9ei6L%90`kN>5@-X%9 zeq2V^=K^?_4}~uyfT)X!%0~j0ErWQb*8k4b=@eU+K$oJwNh{EWjkj8uby@gw=;c}j z2*&6}5BTfc`Qn5;^PzhF{x1;66>yD7bSL2N_9FiK?{m=dIXvT^_Zw%x=K1Q)%kMYj zSSTy`T4+ESRm<4|X;8w?``Wn^V!r$htk zveMVMR)LfB7qEU?Z~=dwYKW?)3`=A^x0N7LTVr;OnWkS<9MY2HuSit-*%=O%+AOkM zFgZ0FNSK2n^Hsj0t4Xn<;ZO#WP24@!2z>}6eV;j6NdV{BX^tH9M(`ZXh@gKi~z*AkX0s>Qp+7EnCNll z(5f2w@&B?A;Y(FTn_$n{f z*gn$`G$w&P2W|5;N-t|CpTE$?X^uUaNg5Xt0yo@onJ`BTFB|I}Uj|=}UdDHenNc>b zmK_DRn|>l2noIFb@#rBnHzaaAU$nBKs;{=#A0uEE3U$NnA5ZdCBN|S!5+fYDqL#!g zy`Y>TtKi<(Ot2~F%7$O;$sf>?)hY11)4B%l4(h@S`+xD_w1ag2{76YOBc6V zKFQeC6Vu@|S-2K8y+6`aJWs}JI2jI1970M!Z}4*SoVDdyXl}-C<>=qc6L;=OPm|Ih z_N>G_kDQ7vZ6zy?a4}UZg{vv;jGyOUoG-GTwk-aQa0tB~{a{jLn8j2^_6z+U8GAEJ z-fZly9Tstb&i{tzkYC$%w2YW+QEi1sSM5Hx2t#qQ%Wa3Ya()-X7&l4ANXp#+*AeQ) zB(U~14ionta<|9jz&Q$U&D+2_8w zKumzzQ_w@``w>w_ z`H)hSgfcU)qm{!B&0gjiLz?Npej{^I1--P`#qa~Z#2PkxF^!YpjAyWjJyN!7w!LB0 zbR+#%=*?R;<_ZUkggF_deO*k*^2GQQZybMB}Eb~slGH{@`GM04rVkG_4Y532)XHC+R^xiVepy< zo=MM7wQD-kf0GuMCuL#!_`OFLmD#7i&}5%M8gS8Sm%5WcSH5gi*gpRF2p7+F!r8a1oK#wrfN;F-}@>8}cFJ zeuq{uRnYM01|i9@Abx{NBo}6n(;)vhG!!01RVh3NZ=hx43yY!*%!if`JZNt!nI`!2 zj$@xLg!n_Ir1kKIj&nFdeu*^1{J6oOuUfOoVu5)tOZc-;I07KC>Kp6!&Ev_Ssosyl zY2k2QfUdPUu9?1YIpNkYZllznJq*GA9AKS0ePccrqjOHb?EBaeS6Fx@hU)zFOWPe> zuW>rT!tDrY(Ej}DMn_IN5u6w8`WXQtF%2B<@C;#83mmQ1A&@&0CxvbWvRdfXM3-kK zFLRk5g(id%V0|W^(3|N7XMSe6#EC^_Kz?`$`*gy1u~|A|vi>fxekA{bwcoYC=P%#@ z@*UE8*jF#OSO#d>7J%DU!2nk+E(MgMsE;%=S`U`ea`XLx$A2^3`Kej#=+px{{{mb7 zIKRvPng30Pch3*sbwXZop#JdR3f7@b>_4pdkATHS`TGRqI|uqG0(d^})br0JPabMe~5{VL;zlW`Z3jmjA`Oe~oSa2?lWgKQFuK0{I^= zyDjlI%Zuw~b0nU4V*& zCJ70{^A#a}1IZwaP15z0jrRA4Yw2c{7mbed(bL=f;K z6!SRp%g0#E!8&7E9(GcT)L#(RQ&hb5Oz@RT_P~mAeH}P6GY5R296D8mH!)78A~ocO zY?btpHqsE?VZ{rE@mg~C&8%JvECYPzo>~Flx7Mf8NTg&Ztse#Ge+iK12@ddt3(2o__8b?#ju{!DHm zGUG@jU&gCfrWc-NoHpPEKk90j4PtrhJRL|h@>A?UdPYR#O!cgTWlW4=_%rvnjGbS_ z{8lt_-z;P>+Hz4glBJTkeSqYSDq`yRy$~AxWOTMCC`=sxkFV67v2|^-6l$5|(XVlRH_6tNI2x5Nvkc zFu7B3b>JPIZ>ao3OtKSR7|Df-Lq<$f(<1ny4MEkGpe1d^E)H&z%&`(fO|pRNpqoQh z!j#%19^u|3mQs%3P(W=!6%ONwsYYZ+38h0IjKVm;O4zg**elbipx1;ob@QSQ@(iSy zXb8pojny#+SS-UPFy^?IC^rlc=q{O4RJ-M-M=Daee@N9JSKc6xaSv+KM1D$lMZz@z zlc!2)mM&iSnj9q#gwcmaKuS_)qO(95UtbKEU`)N%gXO3Ua$_;W6zC$uyd}=bYzl&G z@s;QqiRrYmX+}lrAw<;u3e9vxWulVqsHgouQ?o{1_hGt0A94CB@EHzI*jlH{U@D{z zincKNd`jNcq}C~KHgCQ>*EMPKObEUFdK%QWmnAV4*F&`eiX2Q$!W2s*%~!OYvp|}L zRvF{B_4Y9hmdMOpQJ-=L*qdJ z7H%qFliw%qDMn%s>vV4Vrfpp8+XvsDq)@0S^hzEL?}sOTGybZjY?OV`$;|ht21a@V zhl;V#m<+r){W4W9^J@qj*C3%p(_cJq!M8lI_av>Vx9(T|MMgB|vK>inqwH|Qn)B+`_zTr7hk-N*=Q^6+ zv~;a8X(@6fS{ej!6s}S*V8AZm!3MKZN5jdt;6^UvrP%6tN<^~SHG}vyLhg5Oz1VY z%J4_1HTA-7{{haQ>BX^)9DvD2-F~OH(h+~eTt88Fgth%{d+z%CO_e=a@K|DhKHy^U zjKjGfA+)HJqMS>HO{x4J(OY$@X|I{x=yo%zDgXL_g|vROC%%**5l8z|I-5K>w6>GEwd+4iSKx+7r3~3 z-p&{s|B>%I>Ob3}{(ER=AgoND-GQ@-FJ$>@X*jUx7J(F##Q6o_P`%`BJHI&t``F)s zN%HmC3EVl~mrnQ4tFwKcf)3{2pL{_l7(ZNqhHjcNdHSfL+e#m48`QK|$5WqsKq% z9)R+PZ{NRLn(B|o69GHzf5Fn1LsZ(wQ8ih9OoRqM(|rmIGqTsBWc9&H-0y0DO2l<^ z1j<+xN+}sY12uDP*NW2h@=R7B_WY*&Ee%jpG48&7G=a(4^*4ivN@BazZfZn@G7b#< zmz@*F`~T^hcRQwsRx7_=0T!Yai59QBdteNw6@Nat^ZSQ{8`uDtyyvrby4<2N*ALV} zY^ws#WsJMnxv-6<|H#@**rfj%zf8^kOsE+>16W+Y-@Q$kWh_<+QBBYi(IabXsINj7 zG0qwdW#L=>GE5Sum!oG**nsj)=c*Y;o^b&}W6;qk6H2Cx*up{1ZG;I=vLj28vDS!Q zv88)4a!Km#cth|DZA_R=v1PG(Zqz)+mQJc@%O0ddQKg1yN92++D64nNtfT0G=8-dF zf`h^7*M6dy8@(7tHjsamkE@n2Ni!0nHv6^#ch=>-hOi+bGm^%z*bVQ6`vq?}NRy;E zc(nd?{6iK<77Z3d)%k)vHWe>il?kY&M*;eWgyF#%sMPB=oC5wz^L)0ne%!SE%qw)t@sXT08 zNS|;j*}hr1LOUAf#7OqjOt?rziu$oI>H97XicmV3*CJo_=HfQc?qZCRG)RPfwifQ7 ze~-7vl`%0U`7_1|PxW@3Wo4!JYxf6KcU~3>=diRwZiZpMwFqu#EjRJ?+33;e$>>h{ zXQ>~(K4-7%dUk9~^u=f=CsAwqwTBtiOg#Ur#JCqRZy_m5>wL-W->1uvV?(j@kzR+c zv#t_gm$q5HD_b#q=9D3vWjr9b2H_x)k}4f4f~Xe0W+#i%Uc*l*=PCD#G^|Ly!Re3{ zjef|>o7(V(PO2+FCR(0Kd86Clo$5&ysF`J0&aC{?mzaMQQ+^R{6cK4y=0r8WmSGcF z@Y&L1xP9B?BqQj-54k)b|FBuZ(LRe*;+~DQHIJ*38p5K&IMe+m4hr(IQ@kvsCWRmw z;u~W8CSMrRFZKu#S7>kZU{LhjOA|)aK9gE5kE2CYl`b*wFh7>5#nJ?!C0fQiKazxg zk|Ji_fa7o8?@Bm-3CH1Ki;bdV4zZTElr@+Zl`_5{oW?ti-=9DnOK5gN8%Wmr!amOv z{#s^s0_TI42(w}=Pla{Jh5{O{qM`+wRcxp=t(7S4s>mU0cEYfl^`|Ipj0R$zX&zAq z!Mat4Ig`D+pNMg){DrtNy#WxW!DhPfLBc13&eZ9vyrGwQHGeGU@^3&Uj{1NRiB5zRb#jFajsi zbjOu#3+XVXI+jnGtD1#A9b^GZfM7@(U{Xqq z#txLo8)_SyAWA8((r>deE}X4Iy-^489@dRcqN-3;4T1(#oiIxoMPF`T93uj$A@}Nu zdcJY=R5E_z4A|V@9R|{fM)|a(mnP%Z3W!2~)?j(d41JqJ#im~n0n^(*(&f?Dj zi-AFDyQY=cV?n_ZN|Yd)i%%8bdKYWFmS18pUu`B{1$ikvI8lW=!llgwtvFHWd?Ugj z24!2Rd`w0&fC>2q7oYdTlO&_A_z@-7+{h0jZkY~mOtu9p4S8JinV*_6d4+~-nsz&& z=R{)^cXxaWsd+~|^Z}-MN*HkW#e3>1V771lugRY*N6M|4+g{J~Gont$fH?V^pJ)Ar zVJIRu1xJ);XKeui$=pLnZPKJ(<6(v8bLS;BUN?|u)1WKU=^1x*lI;KtJKo9Uh{rKyw4*RKev~;Y*$!5z#q4@t-Q1kTyZ*J@3lP*X<}DRr215%gbtp zeCV(+wVM^OQ^vSU^x(0LovH_+Z8-&d9cATT2=)Uh&z(MfFX9&%4TTKizK>}yPY?Sh zd4<+PJ%gQ<-UkTRH*R&q!eBJ zWe4`t87qr^`~5lEw|eq~s^fHhXVR8ymnh#}5Ea^Yk)JIs{trh4Z#sQt&nbAfuTJ7$ z?m?&lzyu2Kk}!I2VrqJM{N^R6YNAi0CLLg51AkY&xtx)w;#Qs8o)9NqNig<12Qr&n z3xp`K$gx_;ljK+Oqc_wLxm59RVve#rT2vM8iD^Q!f$*m(F`wHLl6^Znm9Gu8Q11q| z1VLw~hrL|ZzZXl!iPu-HSpd||rFHlI_OF=%;Fg`g{Sw8fu=r6cE7phlW%IDdnBTQiWL8n#csvP_?-+&@%GG{U` zal-@0w#Fm7NZ68l__icD%!Q$+{c^XN@n%byM73J zz~yx5Cv%Y6-xRZ+3TkPj1_0&zi`*IvH6^1&Pr~rSr*z`Q^9~dWR-V!?z;ifx{ zWx`}>{!~U|&P8BnH&XjL6{H$g>nAG0jthe0JaLn9Q%`YcPz>i$cr5UKqj>I(vM%an z!!WW0SqvJdijPEnHPcP?W!%OtRXtw;i+~MnDQS#S3EFTL6Jta? zyBhW487Cea2Iyncs9ESWazyA*qLav$L)1?x6GWea9kb{FSX+1i&3;j>0>*Jbj$(Db zJ-SaT5)lt(rqlr#R7SUx|3QMjG4cHb$tYzRWEMhkm;hsCR;;JBQ6DWx=GB@^32dA} z5QfPgC{~MD61XkloE{E3n~gMp8(xK(Fq1)z87CoewraZV| z=)QUjOytk;AIVfB(|bL_96TJW9rv0|+)Zmk63ZJhrK-YS<;xBS_;5P!g?EvAm4pN# z^T5RG?p+xW+NE#k$Tq@ic@T9<#p21``$x=;6j?}V4Rn<9EU2x8H56AbtaM@4ZL_Z6Q?gr)1t0r)SSyP#H1FEGF8C&{N zDfrbCVh=CvVVDdmwUVWE{B|nG0V#2Q4rY?vrCf|!Qsn(87}o(g4&I1l4euR^-VC#F z`^{@_#It-ZS`~#k*G?bzsNPGCA}kPsa@{g6l-m?>5XdN%yWFjz^ZcwbIM z9gxZHwF=3h5V$e-zq>cOAo$LhBedtw)Gc$w*B@eo@*%1mYxZ4LG@(j@+LmMzo#^nM zb^P-!hHzp9nERJZiWfYZ<@BRTJl_CFhRhQEb7X~<3C?AGIHNA8OTR8g_ z*vqgl?GzHlGXaW&cDW5sLAnB;T}2O-^`t@5E!tr=JYJKNB^p`Ujr%OyNs3j86N#qM zu%0Q4Na+{ELTSL2v?phpH+rR3o~L@_F>%i?=iI<5$)IQ{>J#Q+II-BL(++e)c*SYl ztc3SP<=abeH)88Zfa4>)iycr4-x79o$UaKwtubB4JDRlk-^N#$N?g{3`R85GLi~e- zu!Hvl&I`BFRI2iOm#Zq=XD-%Z5JdEI9@JM zKhaj${Bg+Vy!Od6+KcGM={INJcJE7ux4~_g)zah1a{gm}fLiz_#UaPq>@@qifm)=3 zdAOHU2!QTBe8f~w)XBj&($ixe*EF;yr9`eEuOxR5mnOv0%jULClMOda(^XnavDE^sIQRCDU*T+C6LH=T0O`YjuCD6dlS@G2lNycN zBhCWV>!aIlhRB>Wco{_Gbq?3+TMBzGGfsfAm#@a*kVe`73)93M9w0^52(A1vRVWik zA;iHhSZ)y%q|;E|8~}(}iLOV2r`0pQdx_2PI|#3#(RNVRV{9hiul+^(YX3>>4d7CJ z0fLtb$uAWU$J6#R?fz`pO?kV!kNpht?#rx$EpIC|WykmO^!@m-i~W;#=quiG@D%`s z5lzYp{zrO{T*$F_;1Q=t6Rdmun zsTW?YzdQp8RktT$PV#FY?d0#@AwTQte6DXZYz)skMG9Sd*D=o~0fhc? zE%DMPI@K!j5&xf7s_x~H6PTKR*ZdxV=;tHv#~WrrLBaNbC*XOlot=At28&U>pnmT(^876({EJ1CMK9tHhh$78-^=oIkCt^fF z5G~R0z{Qd|$Bv~8>+9)Y8VH#Mn=;n1myHdN#SBH2UHdBBP6q z!0!EdJ9>F|t*B~=e|F@W!xHOD`grn4=+S?$RwLyymgzPfKw56g z?jaj`1m&bBcO!pAT|X>TmVTsekRnC;^iBh_T!$eZOiPkF{B6~+Rx#t1;yiq|u%Z5n z9w|C0^e%mXZ0L;KuU=-B7lbJl>Xi5iqpIgebO1GcyYMNE!4%AfikuNL2AL_B3igBH z4)3I0PbN-Ix_g&tKy?(uRcLjKxsLLMg@h_uM`FFh_R7?keXz6=ngDk!u033=);KCV zaXKuW5}60}F@W|U3K6T7g(H15ynOQu<5qk;Ep7-8jvjOjwPDpAv}2%vZ4YgB4U*e| z)Ky?yeL866#Nk83jr9H>5bG6p z@M#g!C0ZqFz^jJwr0`(I%~hsd)4im78&h$yNEF~-{-l}9c3z*x7PLmg32mFTO8%n_ zi%(zEUuZ7*C0Qgp&{1So$&p3Gc1sL+gqf^FIR~bVnDi7RMN^z;WpL3fravTi^J09o zY$(mq1cG=MF$g2S#@68d>RyTQ$iS^8tEa+=ae_Tt zFS6lOrRc?YXpvfkWV))$k@coZQezG*E-gmA)fbZBwCA+1nkIdJzHJn)Ny9wD6!n%%Bs2qJ`iNiTHj6=khK*K8iZL}sl&tSk;Ih$Ltp}@i7^KYki(iq9C^kfW} z_(u47!bi*rp^&fIO1Q|CQb+dOFNUXkzyW<3VrlTFt3!50zVJ5{h9MyvJlwCjPdg~* zu8@`s@s*Dff1aPN0;$5!ovvM;ogOGGeDW%Xy4E+(L<3*MMc1}aT72>h?`iXUw`(Y! zuR3Mmt~!7Har$+_@@ zzcFv;Ex*+B+Ig=_Q#Ua#o;7C)!ZnKLH=%bY{n%Sm!<=_7lynYO7pE<%YnSFecu2!w z@S<7M$w?H;tjHRXT($ln$40Lz`w@?np7xxdj4{JVvu zJ~_3{Um^iV2gd0Wh9YiGHq=%hE1g@ph_0k`c7X_RhGl**&5_Kd67{>uJmDCwV<^*Y zTE7l3eZ6K@`qO!7nM8e(bNcP0d(I?)zo0j34-;dVfBpiYPhylIfX6inu$IITaW1OQ zDRKdO)bkK|7eCRfm%s=5bu-g1w~3$KTs;S09v-G1Znl$xO8RMgbRKq=cEO{v`@26_ zJO2m>vGE2X9OtmbUEk`peqVAoA)?uKg#xQ)YLvRM_Qx z2?S*PceWznN#*SN1FP$NjIFAn(9{)w20rp-+?$J=!#V8l6eB7?h;xAudD6M6E)T$PwgrlT z0h#Z=oZ?ku?K)t&D^GP<2#BKobql1*{>J-jN=;*8Vsde^#QU$sh$nS<*?6tp$NgX8 z<9V1$gL42ms9rLbb^+%&?c}7|eJzg>7u;isQPqoPDa|J&TP(mto%Un!ZhkRUbA*cr z(KgwbIunYf%3KDm*wj1B!H;-1YTav{Z;7Hs*0Qx?U($fnq+9QyC*6MUx^i|0@MD7fG$V+3umXx6$- z5XOvqeg|cDaZ*Z=!X9dy`$;Vvo#2q&fmf5d7L7lP0&SFqQAk2CE@S&`mpViEsK4V9 z%)?-Em!WbwM*kWc;XKpC9*76@PlFk%p7lNq#STseoV}V4h%FcSR*0v-km!bhtDrB1 zFv|rx5?>P2zbY{oDGkJU#MwzLE0k`2Yk=ykbQ?}=#yI+*v{YjvYFtTqV#-C7gDy|R z=0CO!LDNFSVGN+(M{**a$#1W@FP_FGC`>p>RFGy&9-}ZSEYf5C5ZWN^D33$$L0hg= zI;KP$^;II-UXZC6*^xliS_sB@OW_1+k^Nu(>RT+4R|z50BDM zGKwiG-sY$bSiJL41k2t<%+Hx0IfslwObxgUo-Gd3jySZ}|HUaPP(aa>w>HotjG1X31!F+LIN|${oNp<`8u6QOW#%QV4DJF8_8907(gvIK zAkB8-9Jx9vPPw1Lzt%i@5qxQ%;Mz;az(umk4fJwgZLsc0qm-y7T{uI8RoL+4iuqB3 zX~^3HQVmsMMHHh^22x_H*-M=yp(WQnbL>22o<9KBDO1mQg^?7)&jRp|qq77PfF^yb zb1#}-NBF&R2ERcL?_Eqk7%!HYCA~x`L(I@?rxVw1+nW!`n4l&yo2EcU(mAFzl@t>5 zi58;{-jLj-iSH;76~9V~ljw%#=6m0#n2S*SdNiX5qjVl-8v_?`>1~fRDZ4xDFZ9~hggMIQxp_XHzmQ#!mpLV1y ziVs4w@IZ+nXnKzae)v&a+Z6A%1z)Akd8GlDZ$GJ@1R~_g+bKbNC?1CtK+`Jx{Vx5N z^}eV_3Vy1B&t5m1cUEKf9ogY2`y7FJe>;EQv3Dtx@%d2h_^ki(y7rGM|1ICrQyQ?O zcdkvTd)N7E{p0Sfa;PZV&ZQF0>Bd&q(DiGN(xb70*7box1ynem);NE$^ zG}S69@yWk?C#DP0i_k|jbnS_F@x!OP$xUx-GeTcuMVyrYR5TvVjW70Flekv-w=4`Uk(U3<*l8XD$WY{aQnE-~0RaE;U(w{@n=&gkN^yXOFbA`MZA@_*Y5q{(1uk zApZ6NC(FOv+}PNEOzRp#(#Gio=9>=R0l}bqg!k_k8Gu;~xNhvI0{WMn?z|hZ(Y&k= z#~pjk!(I)ySsYV!wMgoEyLHOb8U7zxXBiab`@VhYrI&74y1To(Q(C&EySpTo2I)qn zMd|J?rKP1)kQ8`szW@2XdS-aV>&1=h=8XV1d*ZjnOU=JOYV$7C0ci1Yf7nK6;OG~Am+aT2>GE${U;y_{7l_-6t0waU z2_YP{mf^PA*K+&UF$^^AeXV8wJT@JC!!7pm&qM6_&eEYKng6lf4Ei$May>O}vimEh ztKH|3A{mbx=X~m~jqbzIDR13I$M4fg-r2W2#Hs!9M13}i8XR9*ZXcifcbwbPPJ;1^GVfdP6z1#l4es{N%tSRc}CbG3b0@#Xy zTl8y1uc0HbLS63)8keUClBxN74d^cO^04n`WaeQfdP#GH3`sPW41P-q7bLYce1sy* z5vFZ?{=xF2;>S-0ECrOiqy*$Og&1Q>0!ji}3w^dLDt#E%a6C%ZnDSs$wr_6cVPk!G zMJ4bES>a!_0LX#1d-;RZlp34j6pV*ituL+=volT(3MbufURc#it6?t0S^uw2WF@f- z#Zu{%BCZh&p0S95)&`QsnuoZKKR#)H3lnmfnkuSl7#~=aWI6dTpDf8D%>!PSim*AJc^(bvYld2}L80&>|fofCHG77@OeH7*)WtPh5(}^UD%QPFKp{Mr7c{)ij zO=lwe!l%&6SiQCLQ6>DX9T90*l6kt{|GWUD_!xO^PU?c5F*~%@K~fLe>6N&j2+2Oc zEl6f1x8Qk$FZdh%#)!xd@Q=gd`lWcmIEQ!+LkK<)~1N+=#9*XXn z`GzR_ClSJSLO(Pp(GGHA#e1sQ#4}WF3CRdK&y4xX!CMm=Vy8=1k#Q=3h=2!vJm8+I zk+2-eZ`i79oigB6PExJS<>>AxIT`(^a>z_jBORf^p{j(Oou}8UTW>;nvi0^CPB06d z*)T+&B{}#pfVwXS)5WgYML}~WE?x8>=|>zcttI7y$`OpdCQ!lHqS04hm1}1vRbC<# zh9ZdIw;uOZuE1x+j6oOPhVo;5WMkE=j-XaF!3Q0c$b59FTdRl@6X{Qm#iaBlsN+BE z_a=S{s*xotJtQ62T5Y5SQSnoSw9FYd%rWdMb-rQlpJSdg(4)5Y#a4V;LUr@6w&n)EV{TA8En=Q}@E07wulI6FU z{gsth!#XO%{i(+^RT-I0Hq0D(U9i++YAm?p*kmYA&Ail$kKC5`&O_-yIB6+$n%svrjseT8N0-75j z5b-x`soy3^uI~Uhq)A7Ezn>YyYgKfyO)NPZ;Uvs?Fefsss?u$GEbyxq+X$accSj*~tO~-o`Yg$a}vt2SdZJ>`A z_~bkit`!-{?2Enxk=L#$?JnDNXrElR8q3h~az%lhsLwdEA3MJ9-A;exZn6yAQI!)c zOb`RE>jdmi2^uU_9e2XWC3v`=r_-nWRpa&iDDk!@iaNNBw7bg7O1?*J;5y!Ue^&Ry zTa&`d-^VwEcHQVwG`QuX{9}a37|ZHt_c(v%m6*wByI1;Fr(*(^6w1_H#R8FK6q}Js z`EM{%wayA1Zgx%=y-w2VpN~UraR~TMguD`Nk8! z1;pg@u4lmB_j&C1y^H$Sl-%_ zR@2i3@BVz57Tde63mG~#@(mJj5AuYyJxuKH0A}^>(M{%uPPgy5^@l&c1C0T)KKlQf;-tUCqg%nqXuUF2P?~HOqA5;~MMUAyJL&`tCpvL3|7@*-@zOd77?TU*Caetb zO}m2uwfl+$n8qL4Ep1-IM(ka8;jaV?%w_Ho;IWnht6`w|;WZ<4b858U_51%?`vBII z9AoA56aBw|6lktpsOka3iaFCrm*w1&{f-Fpq1vn&CnZY^UNDXn!g17G>I(CML_3vf z5kVt~KubhBxK<`XQMqA&Uiu4F8YQoS$?{??n`BmlOgk{&QXBJPO=`RiW#9ChIcfBIKe9ovL4VLer`g`T-W~?5usa| zIo0NI^n;O)Zk?X&iu^EIes2B{^*XmF8Q>RNd>v6X$D=ecFgd8Lwgw~7vD>M~??XN{ zOFY(JkWS8~5+aPpS5Olzu`yPhNC{+>>2stEs9lQ9?&Nu~S_&=Vdg=$HIeD_SFq))Q zjB=)x5+nr=nR__{W$LLW0a8QSk@VU4@hti@D?)7(9FmIWpU2h{3WSbUXjPc)ZJg*(QXF)gIOmMpO=gkC zm`)s@twl4ebDwVq(VHX1@6VFTMICQb-6kkii%%yrK)>XUFN}B>`ff9a#b5fX+7F|J zwmDg7wVfvoEGok%l}--L91Hbd0yQ6_X;*_9=gPmLcqZu5X$b+ydkpu10c6IajCv*Z zD7{Dp1mHO64nkC7K#p#CyP@0Ha@A@x!8l%4~lxk-O~e=gD6o@z3}wMV)XMysPbkjl|J z=~@GcuP9wlDNt~kAt>XB)J9Ok(uxEjygg9zr2Qbt-eT0y6M|u@}n&{_956SE$2tblj^2`vqgA2XQv^gI>88lvyvj5&4296mcZn`4E zEal7x;RaKbFf^_2h$9zcW+k}GGIk6Bk+|iRv-99+d4x&HQ$=|BQBOlc(R9S>R69i0 zeDeGe$r_w=T$0-;qI8sRP}xx`Uqzl&7Uay>Fs8EbyP<$s8tzy-c+|e|eb{O{$9^d< z7qD~@LmuVI=*QhM8C-_A9}*52$#AqOuv+dSMPp0*EWQMR2rf|vI0G0NWHxfe+@mR- zV;3{n`7<=C2Xq>Q;nP%RLP-WV32xF%vB>s-KX47pKm$YJrlEnJCn6OBi^zy96nm2+ zmQ;h14(la#l%(YYT#=e`s~(j+Z5QP1Z1HH{h%9}BlyMqTQ|1i0aSu?1H*_w@JyF4P zD7ZmEaTZGl6y|oBl*A(*nH`&i`x9=|@7Yn=fZJ+cefjja z8`nqJ^=-71BIejXk1t@k?(^OU??ZCB_p*u2!vU97UG0p;?Ts=m`&mG1nEKci?E~t{RUSwzPm*(h(S?19ddtPJ-}Kc=xFR z_MB~S2kvQ#v)eYP-)HQ>@uJK9d99DawjBpmM5Q}d`Jd#!QMAC20-wXaC*9!4b`k3J zv)V^OKL!*+H)s1Icpq{Exex{oa(_J6Xsjf)V$DM%I|Bo2;sS)mMvev!&aaj>+bx5Z ztz7#9obnwOO-+w?Y8>D`FlrAN*$!3`TzZ~e>XPwitEm(*&*d_C&>nwl0}ZS9?d#Ap z?bh0eUH(TYKK4{S;nbSbiL7r=FXEPyR_!*9+>i0nYF}mf+8?ws zh+tYCgC(rI{zg$*ODnN<`Hg9WNkE{nsG+evf@d|X*XwRR1JOp?&Fu3>q0c+nWXKeB@vhVX7 z!zfqC^rV+S%|TuFQ|7CKyY%2sVDrvxVaOElgH%MZieFt=6utA6>aV8d`?L1**A23; z&)EgA&aGSP47fd)tC}DsKL?_@ErHLemD8pdckEgJe*eMCdYk`+w$(IsJ?$*QDU#6; z4a^mac7VEU+kw6%DYFyk5ZH9O_De{c)u{r#95jfMI~hG`WATy6rvK?a85Jps!j$YA z2Jishj5=EedhRE*69`v)>bgn_1aeL$O$)w&kt^h3zHs53H!SDXe?{?fCl5d|La@;Q z1|=)-x7r_r+1k}raXZz>@*0Bz=osCrgaEvpCazK zui3LK_I$pUCU~fWAI*>kF~TSOrHv=pW};L;Fjipv_7yYppYhHpZTSp8v2f^2T9o&T z&z3iR_4cYd#>gIBxo|(IU8-G15p%PU?Ip30<&eosN7|jT1|8yVUzt$vf(-@ zldizFpb2-}bhze{fm2PTjaZJ5R=y}oFZu}Z?LE(`kaZV|;#tAUGg1f2bEXJSC>=iG zN%L-pRT#`aT4qhM-_BKg7kf(@Ghoa{oOB_Al~qvWf<$?%CG*X4UFj5gZLts1W&=xp zW3bNG8z(|j1z;Ha`#g0Y;6EV5MD`cgpBvq%E>X}`+~;-0BkG)MaKsyHmYBa!X5I1l zh*zvCTrWJDKp!f}(G>#UM(-Y~KYVd?reh%XN)v}NcB^Cu`iE*VB?e(uD>XZ3j+j(wUrXdn zGF4kW`V10vI7(R9G*21QdAMSn5~De6b~)}rO+;O-lpp3q+Cw6RHSby4XEOU589bQ# z+k6ynY3!@??Ttza)s$aBW`5-@wbYx79? zWsp%FAYD#+Hq}&`-ZHQg(Ssst4Qq7&qVXab$&?gOXK+@T95Lm13w@L6R#RsFJyg&n zLUxll3Vu3iKdfM;F$`CU(MfmIVRH&sQIXnGSsg%mOgZ9v6EY#%{9w{Z(<7^p`v}WL z9Eh#Uml+EA@vdDPSB z9Hhp=k7f_U#%i>Y)G)87UtH!ZJ5(1G3lZm^C>o)7Y+#8UM7;k?9I-@r>k;&Pb*VyG zQiUpnIsW7jZtSx=1J#GRhfadg%NP!&$R*l~yeiV8v`7X?85LEOHgr?b8P0s}!-}Bu#mPiG3a62lp@YK4afm$Q zvM{Aa(M37OsiRUVrXyfg9^TPaP0o^6g^z&0$1*q!9nfMMjVF5@jHbDt0xlRLtCB}i ze*wr-|90f4h$R+iM(iOrk1jY?{(_P+jH2duvY1|S!=sm^cvJte=Y6lpxpeJT%t8p- zaD2_<$T360czQEmL`Mdn-J*+Cn9o?)VwQ*p>+M|{Z1b_Gw`%xeAIzwmsTKr}CoQ2l z#6sjP>4!01(&k?@c%26ZTg-g07w-iqZOBFw|ao{=_#|6KUt&`(NnQ0nP*4|+Dbq0ah8+z;KYbZk||Oor{1sVk$R@ad^&PqP0ZBS zETjUFfI<0vqp9!0XPugJz-84v7Q_Zzxy1`bwgBSRJ_Dhz?>Tv4$Un|xa*?7uPy{_N z?vgiO+k$|KgjY4X)a#TX7HU9E{3QQmES`WnevryNE=HCS5Nc_bAx*x!VZu! zyJpr74|%*+(s_{mPkBUVf}l6$p$4T66a#7v<}A%5XF!8{86@9))Q9%RZK^L0O`;+0 z_jupN93frdJ+d6ipRqsPNwf~gEDo=i=DkYP13KvKGZI(_BCUF5a^StC_ass{!wkQ0 z;T4xrfwahyiUqVtL(6K@28|weS@uqTVhPbA=6GkCe=|9P%o%}ldH8hroCFPbGHk2} zWU#4b+0va=1RHpZotdbl3Wctg!;axUvBym+2VSL`nu51U%iL*&YpYH^^{+|{fwpvz zn~=h_MVG!85%#xSP>w7V^ZV2ArmsZ0qgyDksF@18onAXuQjp$WPP$x}Bj9ynv8nuI z&<;!N#kU~1HtBCxTp)s7N?z5*y?)~l{?UsjL$(+t?P4Ft?CYd|B(Bc>bqp4tc`Kry(;bPIqhv0z@C6p~Nq|!ME7ydEl^uLk%n6e8-w@W2# zLo-i1BeBNdl6YJ9#v#6{A^i<{2|RY3eo3xGJOsWmevL-rjShe_E_O<5MEA_%FG2D! z-$6LV`H7m7h(^f`A^AKGW<5MYpvF_662?M;rT-&gH`vse5wA(<0jlta*_bnd9zdir zI1AK1x8RjA8Kmh6*MJP+t*`fmlEoA=tqBW9tcpW+}1OA`6s^ zPzp9}te9xZgc=PrICV&A5aX0^vfk*qk;3#|81E{1_U5M8n?cG;z19L!)mRjn@iMs$ zNeZ}4qI+Oq@J2{sn-gdPAcBTcql=P;5q^>uVQms@k*knk!N}yt>A)dA)Sp7%;0do2 zskDlmM!Kjg$$~kH+v8Pm^vh-I5BjKjrTVR5-lxxT!p_pb8-_`w)b?vj<2NFAfyTKo zRJh;HN~CfXC?hF4$1>3}n|S>t=EeCgfiT@;>C0+u5S0a6Vdy#IqFw4$Vg!m?h zi|X}%>NJwiwW~y3t(^EZjFi7T@PGbIu$!b)^BPB=xY@rZ+x9{z^KiFMrQGvMm*3Zo z$%jYCh4kvs^eAUvz!mM<*c;g6CIOCuC+D^^-lxPSyg%NvF6kc3eyRkmUV+WtYVHZF z!!3QAg$cg}SQYo$fA24~?7_J23Ofgp4LvyaO2CFG{V^1Q#wKl$ZMQ5chvxd#emgri zUR^!@vv0E9dt&e^TpS{t%d&nrva6d^Zwn^+}!P?Ze*5pAOaX9jz3rj)y*$E8er=>@W6#M# zFhYRq4!Vw2KijZvUhe2zMT#o*30n|WJ}1qLYGrL&IQY_{3a^skfZ!fzEaFeDF{JNB zLM6o-l{^KojDz>r^M#!%kmp6qW{liB&qV?_ud>GBC_ahFKF^D zKoq(DliJ@{^m(nlW%Kobk`{2gQ~(sz1D~sRz&s3mdAj8f4|^5s>Hk~0hfDzstYA&- z-@cgW{w|~#-0Y?%m)FZ41*Tn_qs`Ozd+o~4$L!Ph)59BA@6_vjzpI=2{boWr15=g} zYQcL3#Uw<6z6*lvWqVYX^orP>thYL7D1tn*UT^9by>Mq6?Zcgqw}Rs0l?##y_n*K+ zH0?PW@m#9FE(W+KQ(WHLCi`BGN(0d5=L#-hBNPJ_jBH4z5dZH?5%T;`j^pBDOSY(6^O(lFrpu2FIK$fHb zOY6*j@DOgp4pPQw+%aT&+Y=^bRSr&9Q9>k=JWTSpotsk2tAXb!B9nzforFUz-K}?E z&#`p)3AckBZ(3b$}zTi&2RyPEuWJO6RIvjEX4G^uzA+X|GWSf zSvVCq6#zoIvgdQ@PfZU82~m)>-e-)GigeQ38@*A~XxAbJt)UKcNjHLtM9EWut$hg? zK1TtL3dEY!iwNCcNBLyCv!n@NW7^Yx;*#O{ ziSu_rB&Mi@U4~C9pDoNKatYL`S(1dGMphE$*;5?FQOU_Z)F?qfvR#>)zK!cxHB@9^ zjTFth{ew~;m$VN_wlC@g*<{3~j3Hg-b7?!oF_gwF)COUZ3lv69)2opNuLMOV3f-?d zV7jSiQ!&XF0y-V3J;J!sUaKzSKVB`Rf!8 z@pJ#O5xp4Dnbd*x6c!0I1}W}O{@Eq81>X;|IT}N_-#P~;UEbYEw+^j8q?B^@G_-WG zx z{68VEGV9t=i;u~~U0yRTo2OpB)Teu37m6#^YWnBDF4b-4B z2w9YfgyisY9lxLtG5evGfxd`@>#)uup!#^2stI|R-Zm$D%MG3fVq#0dbv=0X@_n#2 zlC(q+8l#kr`#)(2w**iEkuK!tF+l;FblyJ{QYQzb^6N;EC&v4Hva&>;Z;T2$2`PRw z=b}@DTw`@Vo^YQP6ecIPP=HfO(Yc4*?^=*YNGS{!jnBQI&>WT;^oE|FU$9I?Z3Ry* z7a)0;sGFii{4{w9nqu|d!QwD|2VgH1g(8%L2WxZBB+GihU)7C%)p+p>NJrgYTAG@? z!^r{a?vZ@X|8xgFFail(3gdR5&;Sz#t9SehbC=&kdfoG%tSxhi*Y*anpo`EA5q}xh zD9jvRT)>v3e14J@_0-|2Z?nhul?9bt%ydqMgSXzbf*PBtX3aFy%s?uL@f`I?BGu)nowuL$Chr%gtW6igy~mp15dD3oUJ3d&|F3 zyZrw}zTExkzPnls*&3a9?CM;*y1oyyt;6V86Goz2Y3d!v&q-)B93f=`AWq9`qh~Bv zQZ8iZ@_2ixPQQEp+7x@e_BW*gtgnAxr|%BVOwQI@`@P$rNRs_jv@*4_E5@9gi`Al% z&wg#q%NGJAOToRB$>eUq&&QV+0e_ui56oK?GY{+sYe{zB>H44PRmiad@~BvT7T$1sCY9 ze7(MPwwnlV-B$rxmApg$G1XW`7eEf%dq*YD%tt z5>hJN`%3{=N>QCE|ANLJN(;rs?H`Lwk4x2fa%M(+s{Q9E5mbv`Rq9gpJ;$svH%GNg1lx1AR6X#yr|Q*;?(ll5$7{d*mY)2NokvBe2&H5!YKR z!i8fGB?gG@Oupt+hJQ21FGeRel#_I}(UK$udhQWxeeFGexmw#@f64cKN6`#826TC6K@2OzSRYP)?ZPD zPW1N*32}~u@IT1oEolg(muBcHtS#o#SE6qbF{w*1ToAEaa8uu`QqLzi)kz@rwvH*y zs7O~s+F(|(+NkFg1w%1PgACP{>#OB7U+vP8io#47gA#KTLNS~u4t*TycQM&$0lOK$ z$Pu*)@FzQbj+u`Nx&ut%R17vnydXs_OP+tSMacS;Bm;7ziX-T|@G=n4UiJNu9~DtF zuHl?EC~eC?Vo}h?^8Jr!cB~q8*yA&-525g|qN+4-3%^v3UgmnZLKk5ewgtxSw(6iADm2N~B6=uC~ zF3EiOvTfA^2T#p5Ck2y1QhXiNAE`Eq_;#7s*rY5YV4k9D3F?`tdhgRjvxII_>_XfP zsl4AUi(qxZL`nH>_+4d9JDK($uayz6c38`xf59E5S(kZ(4?&VvyIAI~^NV-K zBZfmH`-hV|{%|fesAw2>n46Op8a^2ZGx2NrKhd1@X0ho@*7bMr@q7=& za%pn>cSG2QzI|q)(lvqmDZDnk%#aoyCyC@q#G#1V-M)m(#V%0T`v)LTKOf<}0hZXo z*H}Q%_oYvV+28R(3@~pazAmw!uSRjmLmpX`&))z%%)sZ%;eX=*^{O~vKUSqpSo!$G zXve{{rNB1R+spu?X?-EC?UK;h!56o}&_x8iOT12>prK2r>peeEp{Ih5^V=@2w%M#( zpbjJdNra^kaDK5TW-TrTJ^?tA`^%l&rmNQt__N@vO8za+)Zle+cPH5bSQPQ+;g`)- zmu`e1;d5Zbz1+CG4E<{NVeB>&=YofuYUHWCMe6Df9NnScxO{mId3P1^wDB_aaz*j( z^7Lxx>zB>nly;PnI!gFtfq6OVAz`5&V}^o9*pD{#bPyO!45J%EBPPa33->Od&3_aJz@2c6VNM zJN0_J02bT&nNhJl69VGm`ZV!OOj<#x zv90@QubfRhdC|!ywAmgSJXH+>ngpyk`Xgx-%@D-OSc4y~$ppG2ttgipj>$jQ+Dh#i$6W-_w}N4LRF$q~!uDH5}x*2cXp zbwTOk4P+|wgeJ{Mk~TrOQgo!I6j~wQa2!05GxJiQUP82rs53oWRpw58OHpon{SP6@ zLyev%ZIm%GTn#mv%}PsP_K^x|?SB45zg5@HircKF-w5OX@+SY}fh;roB!w1XQKPK- z((ZuASWvxOR#uIIX`TYhA|jA}&eh!1#(a z8Jl@0c|qil+6XW)y~!48_K{PNwaaS@;U8qZYgM4!hoN%B_ZPI%4 zJv7!You0u|>ehSkDqhS84Vt~oPsOx~MQkhjkowi)LvgV_u9`=rI6GR)8&l3mxDbNa zqVJN*)OclYWJh`_>9s)GTEoU}70?E)&!VnG+f`lhL;lEZDEVc!Qt&dcf&*$^9bd-% zAG1kMeQqct70YLUTyKqbN9jsTR1k@&6Mlq2ByB3WWHj} zoggPjGqMp_u?cAo=QLt-5BVb(yt<&&XADeSJ=d?76A&^*`A+lHqBw`%CXH$?9U25o zT1#-j+#9b63-I5`(Tt)b1U5BlM#e}~fsq*yv=e8GM$v8?p@d6@RM#wAl7T?K`w~7r zVB6%7?Hb@I{=~y#psmVP<1D}K8A#!~BPN15=As(z*{h4-LBNT1BQF==5;O-p`y#nW zdCN*O2-?}EMn7zZS^;W1=ZYdr(lnHNH%~+8lcXA0RzX`T(wOj+kyH@~11jnua7H*N z;1^Npm=Q~bX(88Kv&0hM1Fe?Ezr14WZfgi@rIDyL7!Qd(C@IwuSv2a>7An|=MWKa<9_N5U{RkzZ<7^Z|m zXkt73a*>aN>u_X~nzw5{c9=X*1|J`_CHIVsgLe4k3#dCES`hudur)VJlJl=w={Ila z9R77#`-id9>QD{mu0ruQEjP%zW``Il$-3L)Y(q_dF?rM_KbrUQG=F{QSg5o0Joyyq zyl!{ZSGfO@rNcA?ry28TFX5L{XXlH*ji1ls>GjS7@w*Llxd-1&KURE>?d)i53;2x1 z0tCI3P|{A2if2**oL+HgH5E*>hS?}U3WkT%!BCNAfm-8X5GoTZu!*81@^}85L@oAC! z=YTH%KaoIr!W&%f;M--}z?%z70$YmOsGhU@<73BK(@v|GKdCp{0e>C@+d^dRk*e9V zZ~xLx6O~0Rn%E>;$z3S?$X7MuD01vQPO8e?Qqmkd*lUttwEL8*p{a0QXfX)220mqTi=Rz*J&7Vxpnc@DQeJWrR>jZ#U zKLIVm5B~%w$?&%Rog{38+z=It`Cjm+?LV)N0Kw;Dzr7=lD+lKehp8^!KH(OeS;g=K z(sMr|&?YB5oH*HzW&M$Ue8R9l7a_j+Q2!raGvH^xLrfX{4WzsG)93>q0TuiZQ~V+J zb+@ig@&B*00f#nh(Pt6=>)4Iwo90)$838T1EggWLHtE`Iugns;KLfy{t-IrZEdG0O zu0LYS8C9NTBTS4`eSX;u9Naj=qhd(TMR7 zJ#)^GOx@x9n2?|z)D%sc&l z%-z3XdV@^AHUud7SCjmKK+`dx<{L8Kp8W#y=l`gE&exwd3s-?@E!}s2f1fxE6AF;D zfdj#RHimCttR^(~h;)=e1vq|sphv{_@cO9bHbdc^2F!p)PP8PY7l*>6Bp0eJ#~Jpd z`RRI7@f?Sv{`>TmPjt9K(b{uvFf{tFAx$Q>zM(hhpS`}*(Bg4YewhgZ|0_v?rh%D9 z0Vqz|E^*kQL$P7HQn41J!J%LW+I2=Keolld5tj{mQ2t&M=W=cd4fPpw4!bb~Lj9H+ zw_mGX9-AoQ*j56Ac}TC9i}Tfj%c<~$%^>YcOw~`-bX4#AC61vCr7_h`I-v}zLSv*u zOhuMq4egNT!1|0Ik1U&qo?k<6TkF(b4>K^_isifQYJJBVLDS@cy9CB!>?oM#7Uj%WJ(kn< zH(|b%Z|ZxHJ-i0o*YJ&-s zk=13`{7nR9<4SE_Wy2s)M47o|V@T~9b>kKKWxg`>3Zk(6 zW<9bT`LR|-RRDT|{t=|*E?}SXt(=x2x~dkPw+EYDu?lSNi^Y8oO4{ zPEvbJT+M-c4eBGgE7%U0z;2*HFIi&p)|eS+O7oW(U@Re&I&8erls7k>VRaj&)Eg%y zu)uPSEoeb7ml}cKrj(|oDVou)#4y;yevSno(qb%4Y0)U@B?s6XJ{GB*V&CAA-IaFH z(u>r3b@Jlm{vxQFV}r3iJ70XmOxekc4fhO_9jTDfK2XY}fuAD`kFmeDJS6)k!4fxt z7dRv`W6J23bR6wG^JRv9ke>D{=42iwJ8EW}66L(zha~4DH#UDlQRar5(O}$<#WD<{ z`QR9_=oqS`d%{xMH}8&fN?$N^JJ7V+E&HW4wXqd64-y|9?g4e%srwaxF=28pRCRG}7K$imRdc3`%;Gm2NJBdZS(Iw`gLc zVCS9+hFp|%neinl6e1;nH43f3MXQk#RGu5Y`;;0xcJxc?fa{Fk7;K)YC>=l^C-|5tDd`xYBt*3{-cBDW>9tftxJAx$8I-1U7Ove zH27IuhFtz1j9csKB`_a=>Y;+G)650pT>I_I&Vbi}-K?(u!a-9Vp=)(3=a{I+{P;wh z<+AR2`9RzLb)SX35Nn^k;gsZ02koM$9_k8H_-)*eZ$xmex}VX_;%LI#N*Ss9k+CGR zskkhV!X$X8(4UsDKd*?pV|LEHlc*04{LuK(MB+-KK&}2TT|}*FC3*FXI;kZ()~uW5 z-^CH$2GB!s|C{z~3d&t59z5|X{ytt`D-;N9$KDpTUhQp%S56mR0l&GQ0f(*{Kglyt zMNAG<2l6EGvp+J#f<6lWj958vZjo+x{N=TYvKUa^AN91d!j!eTX<^iAOx%t?@!n*u z#OUaVKB6YVM?2=gGEnr|M0`G^^NAFslke;^pox}t#?(5k`tmz4Nnl(2gBCNvD(gG( zfFVGszZnnz?0$apUM%1`R{r*k6Qh4}9C%SIb*+ZdYw^;nF&jvrYbr^~`2N{NE)2SP z)u&%fM9@xdxWz?6MneMcSzjK0-~KDCo4UKLth+xOq_rgoFm)-0+I@^Omx#e4L7a}c8Lgw;cCEgee_bxJ={^v?q0`AQ zYF4|O-w2Xo4RCm$&DiX+vHfAXkVAEc{aH>7cz9coMn3>ird}xadoq{*ZFpD=CdHro z{X&ly;8=8@O%1$@Ec1_aXKSsmT(#`|oTuRF`+LFmUrBDH$yW;U|0X4Q(kkVj%q9+8 z!>y#XrJa!UAp93f8pwDTFl9z91PEBNWI zu8(kY?uyw4jgP|-jyDw)!H@Il5a}OiAfju7N(kj zHm+bZKs9RGCzR&{(#tKGGX9u)_C3N9>#ytoJ7v zIF5RT($jWTa4YuEWC<;4|87N2`L1XWi_+qwNRh!(q$?0PDE2=^^$V26n(@iA~{cgN=LuP zkGx2vU1X}CBEV555OLO%sI(TdTOFEa%D5Umr&f7uo zNcJQ@l$%@5kWsCH+Fec==+9BSKdB0jDKu(!mJD!bO4(`A;n7?wVa}1+2|~!uMA4D& zaa+str-LjGpbTPQBOxzym|tOMlD6{13E#}8uqeP|M$8`qf+=R-fR60((BVIOT6{%R z2o-Qn6!NCj{w6nB@07S3*F-ayXV>|lg}^b=+O3Xqh<`2(3agOPtwLe_`zVl)QmX0Z z)8yOyaRbpw%@^HU+GY`ZHM4a>E=7(fMi1IX{}YvkF%=!LpLZSu@j|OePtjiPlNG-^ zceNK52KK-%k5}d>BmTg4CuwGe^N(=5aQ=&xNAa>|=4=T)iHnR)oX?OI1pY}dzm=jL z=MOk#h;meK+uc-;GW&UeCc<~#i~(4`JYIt_bv4O9;}=E&2;H5k(u?J=V?6}9-|MV@`Y|SkAsyf=>zQa3EEQ#`Err0<> zzAtYY{@kjkz?Yv`XxB~bb&sqkQN-$JHALcQ0|k5RxNCy6_xc-dbfB~N{`807XZ-U5 zk3UcG?Cc~oOE6}M&F^uLUbG6BfE!<#mSVN8{)K(*;r=0t zC#fnn^+y4-jaXW-5qC)8`)8|vS)Px|VCk4G`xxtCJUh|tn;Y!4&LL|LrN)H7l)@8W zNVVXz^V{vlo0{4ySKlga(gW?lx=>&D?n*pS;LFQ>hM~ye z>28ENEDSiFQGJ~GP%i=0Ne`s2{c(_Cyy_DD=*ovVo zntg#0!Y4m*!dTxfF!4GqjRLdV+iTGO_%SfBOY#xPZVc_KUaCj$mB7W$2nNi((q;ss zo%-G!%xm(w4sVh#%0*4@!;RB%G4n8`u~-tdhwVmfRSM9s0zoCTC5)cQ@p63OOEPwG z?UJ7Uw@mpmwu;1!+zDv;2ol_bcyiX5e*o}@kbm}wKcP7hUo+ED1*jcMjELJb>;=AC z(Y>qjn(X_s#gHwn0ETBqN=IRbh7HFi=KYJZlStcZ0O@@SrO6j#!X`yUpGVul%XiuP z$-Dq21PmNOTjTtXC;PP&78JNY7}HP#5=k>72gxJlJhdjw6$I$;(flbXD>3kpRfQje z)glvK*Ua@hcw5BMEC#aMqoyu`bDV1Y$#SJYaxtx}$L|MEs`F~z?v zGR9bn-tK*l@quY7PEUre)GxG(+71!~*^jjaS%Wyxh)yw{ONyWjQXqY76@Y`ZnzUMq z7Jb{OIW%YM+ae8X)lVjN%^XdWDxiWVi)36R+NmDW56K2O#UvTV5hb&zEPdSS2&pFX zhJKK(WV2e#v?|cY(SSIMeW;r;H(kf>D<6pPm{Kfnk-hg*ky8%p2+0y58# z6dzd15PubGw>^@B*7n01sTLM3lOOe>LVCZPfdHC>=HjXSBnMqTL`14N$<IZUvop}w3}pI`h_6T^xF zW~^bdD#q1u`>t`hrhq31jY)&VGT1r_+Z6&o($Q$r>o+&VG}#(G9wCp!*snJRpd0JW zOD4^HXO>c+wvQyS`gQu9Rhr=g&m5+4IK2qB(Cs2MfD?>V6KWSeQaMuDq?M)S9alDP zKAwV0hO6C;HjQN5=@pdR;&g1%=V#t?zt3d^LhrpJQ*8+#(I~d?<+VJmlS1W@D*X|O zP*1(HD{?r&mmA$ z0wMk4P{Jth&@k4Q&Nit8_N?QT2NXU#4qUBz-^+VW4-w?E}w6V18YI}6p^SowclLv`7)CV5@Ej5}^?C-Fg)Xk)u6 zwF+w^8EbjDr>N8C?f8Ct^uBbi_`!=6HlW$eb*E^q+9a{*MIcb7H9E zt>J=O`RqKr$E{~RI0z`3lAU;ni@wKVMNwMwXOS4EqYN5(;%6PbJwf}uAxw3+F}~^) z@eXG3!1t?}(eNdOP3Xwqx-lR8pFJcZk};8xVdXuVr^^o?>p8Kwc)!GUk4Vc(sS*|BB8rdolTLyj%7t z!57IT;vEeX7jFOiQ86k_*dj?c&DRK0+ze6bZT;^m6+ZPp0#rDxdH~9pLl@OVRRN{; z5e%YOWun}U>jkkaIC}!Jg@qYqK7Bp`Bz5pZ#c9rMdRmy!^L=5K2u?xz;oJ_H-XI z%Y`ZLuqjV|NeKaXFbBSzkZitQAS&0w%&7QFj|`Nvp}OffjT&d zaJBp}viIZJ6V=1N-17&sxBc1Yx{$}Xke4H*%^uHpu59~kGe#dw6C zl2`XMgc-uDK~%y{7d}0NA{uxRDxXZ%G|72M2Bn*groHN zbL_zy6W@3XVptIe)|vHv$ryx*;b1fvSV-jygVYL8{uxLu%#&AB64o%NZ>|q!k8m33 zOn3ivKqq&8j9|$aBQOlejNrt|Pt4A&jN_9PBkokxo0C*cDO*mN!y1m2Qqrf3;!@Lq z<;9nlO{EoQYc@C7&5VXu38jZe)u#~CasOcNC)Dn{4WNVRDoFfo<>p9!?yMb%QYHqk#&ibbRG8xI5O3_+~6?2B<#nU0Zn#~ zrnIen1idtvZ%yKCl zU#ATPRWqMHY`5xY6@!>qb%5wtzbL$xH615;rr7k`79Bq@Uf9{F%l%aA_Be#5^3Z&g zJYv=&&!KV7-hbr?-H(zXqQsP%maZ&fqL$PANEO!t<{!)Xuv4VA+wU>w#Es%XwyfLk z7(4lO(OTt9g<2n=&1~iKb!ZLm`ebUgD9Ic-tISAC?=WJrC2xp_TL=$CO3Oyequ#Gj zxm{9?0w)`-R;Kz`e;Z(AukUQjVT;DsX$zLFl&u^}dmy(VLbr-fQ8;ad0U^j*(|S zi`Y=wNG_+CJ%z!Rm(E&C5OEMZD4Akp9-l&7pEJKAg`hMPm+oe7XuA@_{DlJ6zQ_#IP9$1pn70T{AutD{;1Y_9K;z77H@p4Kzzqgw01xinH?3Wbe+)> z9oB}@OF zPm)m^6s03OtT5eaYwmEc)8iNoD8&okl5w%w8!(8HQ_ZBqvP&V4Lf!yJ^E`~>4sMZH zTBVv-AeAlfRbp#qFvb1iNWd^#<-nk+%Y6@(Yex2vX@wWCAHR4`c8AwChd_@98f<`U79uT2#}7?#Qi(R;)_ITi=lO^ra>f?v600L zL8ypFwlBFyvWTeIK#|+miV!dcDG*`Nkus<)fDKa0G5%h$kShqy*|rnrk+=kOo2EgC ze~iyX%%eboUgb5jhggPlk>skK8be5K@@v%AG0M=6dnc?ChTnMO8DX5Krqjf@+gR4=ZHkb9_tU6A{*^B)4o_v=HN zxV}luO`|+3kG(Ua`&Q!o1@1id#CVEI_r+)CN%`+Z?}N{5h{VWWT)*q};Djy4D_^s< zr;Fa}jofX{k!c4PO0%)mW>XyJvp%eVH4BlvA8n>3N?-u`0e^9*D=x-E=KHY4u{}D{ z4l*$K2c5PeUla#nib`y_hWRZRY(iI`oVwoPnl;7IX>P7=P4$`H@s_-3JzcV!0Ey4) zmH{ldMa}oG8xm63Mm^^@c>bu^O@MI&!Y;qYwwz{%vJUiHTo3h{l~Z(Mqc)0Tnw|!#PXTLZ9T} zb>Upe@7ue&Rc_@hXn5K-I?CNIi|3767wYVj%%af3*gO47lI|duI$j(LCq<2LdoZmC zXOT^^j`S7G%v-QFsCLdJpx}W>u!u*~mj@=XdH8z&QY@)8g{+PKD?nE+9R~f z3q}gNj(Kx-^?8NO?CNISTb6Bz;EG`)nAdA0oe@)r3JLMQ%FgEld+xsATLMngzAw#D zajyREga2XD#bJ)-QVis^CmMUFnO@}8a<>kbKp)e`GxA=)(m=n02meogXNYy4TZx$X z6t=>l#0Uz2ZMDTHy}wpf2_q!7n2MikwB*M|BSojn5>un5_*C)|Kovit?6Sw1i+|VF zzTm|GS`Dof2?YnTw>RweCVGt4`dTc$j}`9#sjz_q)z==q!FLHZoMxsY65Rox{)?mx7ST`?`nC zw>yHjtNn`S$3wCi@7VaXDl0xT>-9l>Qjj0tD4UhnS57vsNn_3nzvT0KhyfTq*UsGk zIaL1s{OUgBUZ{da-#}1g-PP?i=QQ^dOsUZ5+Oevc1?}1loH9cKT9C_YDXn#Q?C1M7 zolPIWhH_q*5bnJ9DG7#Qzf$5#@z~qw33!HOP1E|EtqCpF!G1xBf-$aV8$Iz|FzNs5 zX%-9+Tl$si=hF2NT|{(TA6 znlodM4pwBL8aI;B>i;;0|-pyFcPH%EZT(@-mlsaj-NH61N4{R3vP_?U-iqPd} zUP$&ys8WV25%$4^r9@jc|0X(HRhs$q5i%LU0qRVMNd_-Xd}oP3pGeG0kV&4yZgICiUwoBsH>03^s#d{P9 zu_)rP4T&cY>nLU~^^jHtBE?e-P=2Bi$NxT;&G@m8gsCF-(UVTV95fE_L$876GY8fU z=R&%%Pk0)b2kpGnSRL0UOch&FgI5_>ff8Z2M6<(VL|EXIG#Zm_*+z4XO1$RG3_Kch zIa>y;d&q)-+&bX44DX3V47fn267HH7$&H4nmKL?Z9@WJ#%rjyB{iT2&ZCL3U?ex@1 zw~UmH#0<|#k&90!FC&jnZ*D`t4^WR)5rgeR2$|mTVA}> z!wQZ>Wz5^K&b4&b7lhBg@xuTfjsjF&3vHc>I*Sn?rE%IVT2{sO0|Yt*svQBdKw7y& zCtcm6a!r=}hd!J#ehA`p^)Cfde$oX}Q}VqiCyY`(QFMmP`7RWEJ84K;nW1!cm{_8m z?1y50>Yx2eBZDz|z&eOebf`mVc|XA#<~sutP#kT$0#PHJA_9<6k+J{2!sH%D8qjH* zJdWx)i?{9hPrnGh6V;!@q_*Cph}uKOXI?fW2fj2*?SiFzzNE1*^OR$mePAIk&Jq^! zqp<4ypp-6EHQtDRP9sA+7?B24EXr4@LpOlVL#?Hhm6WBGYwsiNJ&7n0Nofb+;B_&Y zn6olj;_PINQ)Q*{xQeHgYBh!onj5jUX(CKem9-5e9m1YiVt$-afXm{Q*TVZ_kvE{7 zarCy*Lsg#h03h)oF;tESOIMf1d{DunS;PCAf`_3;338QSZn^o6l}{sJg*@%XU+dpB zAJHHp>L6(WA{y@To72fM8im`iiyWXi>dgBf1jvb)iWlrI-Q|JuMmvKw(el#L;g51& zW+o!FbNaY2W`4dSXDuPV{em0pdB`aXchy8fPFYq+@m}-S#y;oH?^_gX1M=7LmfeES z{aEg|)BDFnH;e|HS@&WJAx%Rq?+zW#yGs64X#4+Oz42)h72UtR5+0^<^&J~(XK=P& zaJ#nHe%bHAdB6X%fU&jLabBH&zT@=IYl)S7)Gzu)CeUVzazRS++gJ|)hVdpgLKp)| zGQu$fN*%&_yku#ellOom5mpBOH@Qu-akhpit1RN|i4&4#d7MuN$h>SM0-OD zVWaBK-QgisT!gOD->WO=^)H)~*#d1dvC{5;VS-O#&qpW=e;pPA6 z=^5JKs~C&+@GtO2jLfjBuP_btrSalr@{?%LCG>hyc`cvXMm*#>Tm9GOX{xB(Fi=9y zV}BavOoz?(v$He*W13qvZL7l?g@@OmFUil_WI#TjyUD?k-m9I)ixbtqPagY+0@v$* z$D7t+f5Y)Smxd@aKl+z=rsW1ZZ9|LVkqv3>rQ({_ZKC|Z0n`|dQzT=NtBfLNqe;=Z z+Ro~rdvzB#pQi0Q>!At3R4rl5^bjEApH=mWeeG0&5hG&q`TE{D<>sHu^;O3&IDglU zV&2Y}THG7fuWp-c-uD>+w_o!`0`Crh$>*CkB2N>Sg7`PyxH0C1oYe`bdB@n09w^YN$IhpR*7f*!&B6U)hOAE8Q0iFxv) ziEjQVYA!=u{Dwg?%)C#J&_uc70cLWBTO8pj;e*HdCo6CU?u8xr#rXu7lji0lB#V41+%_-FDYI{vu0&EmQl7u*=~?`3)%2NIkf^R6wDFB$`@{_A6d*&YBM^Xuoq zGY?_k)#>NMkWA5R>HbDr`|Xzz5oILWB>}D`;*-V7cs1#|`_iF^4ahyt1P`0qe4T)GGX(R(l3~3n}SE4DAZ84!3Rz)iUY`T6_ z#byDpQxQ=fNK@aIty9CQ*!I)sN@Asy^0uD^897jq-V*5! zfw6yAXC0lvvM+TeH2VV{9RkF*)+o#@)F98G2%XqLM4!a~Z#z!z9ut2XoMg21TTvIN zlp>NEX>NljkW8Ksu*NJ;C9I2(8){8r7_dr8 zup_7icIcFJ4Y5R7*4y`u|4$1rlG%ZJjW;#&SCc1D~kN2OiuICSgZpfC`LQ%NpXcoWyg# z&vv`hTR}O=tu259=iKoy*d8tJqN>dyeWu(vmc}Z9zK>yPmxi1tbV}-uGogLdI{mkg_y$Uu*sIKjCDY=I36xb@Wdv|mu%+vQJD^X6i;q|$s(wLx|6Y% zFeq@T2ThX7q+y2W#vOA*yKd?7Oau*R!Uwn2`vt`pypf_mg0L|}6^j~3;nH>Ay*5B* z!Z!!@?~Zd_Z+|>TE06!1ToOhb9PuLhpI*%c7PN;;&e&bTHnHu7qVcn>R-i2eWnOn?2~nO;-Z zIX~}WA{-^~^?_SvhAPn)U&@TBdghzXhB2Y3@l4>Y`}U?8{_FF1KY`7^(L7gQ^W0Y$ zT6r30M;(<2;Nj=>MXC=FRnIXTDI-V7YK1-QDv1a=I*fdb8!L7Pi$;5<(QsVWcb2S& zYVBlrp1ho*LcgXoaVN%7lNe-<-8)}S40XYDiQ+hC8!23z-!z+iE(HY(k&(+{=@hwb|bvOfN|P5&?Ltf`4N>m&94)ah#5BP{U*CVht= zk|(63=(RC&IN6VUU&s0Iuj~Eu>igNpyGJrplc^J>fsrnA^U(Ak!?i{iyTun<&M7~p zsy}fGiG;-V1&na9*`$8@ej5|sFq3la4$T;Iqaq@U7@y7^EpP$ zBC;OomozHVXPxkQ4DBwfnppp{=If#Ykz47D6;!u+ISMdC3iljv7!co2h#M`Nk71_+ zU!oc61YsO$0R!wKgFqZ#wR$+uOf4N{RwB7XAeqkaedv0yvn6pThomi@Cl0DwGqRHu zEZZt&u9wmhJyk=~ok_+j%9hTK4yXmq05jSy3EP=;wy9-;qtk~P!o6Bh2StiG{45-O;zR4d_{9IeTsJhgqS@%&B?HYQA5@LK_+<{$4Z~!5zsH zx78uxp)5VAq@+fi9a@ds@+o;KE{R|?GKcWa?hxPsOh^Fhd*tZQp?9Ef&&6rR)w4+% zoh&ykl? z&!ZL0XXugNg;h8^NmQlBrQ3t)anJjB+teb=@L7UXXN_pPL&?tO0>2DX2Fed7(ZtD1 zE7ADT;1_)>`xRxlWgu)}PEW?iEe)mN{SG~lUQ|k#)F#SeK`)*nZ04TRCVq#TrJzdk z3H&MUCsLKvqI5Ho3x@ANkLLR4_4xy67&(`q6;70E8XR+{p}e-4Jf_U50tcvZGv#A= zGlG|e0gKnBBESh{BvTmmZ&n&z4E`{O?FL#CbEnqA;w4&w|B{tI#G9{F zwwPNCe~_>n#J5hxHGCn*Y(}34NL99=As@+Iz>h~Q#-*jE0H-D4r@^)ORjl?YU%|Rn zzX6imbbNFW5m8TlT_qC&C?9xlWo?HT1ahboR|@KAH|batudA3``yVu8KIK^_Fr}ut9f8+namVx zMGIzs!IMOU!sV#ts2zN%%nSe!lw>~VU3FzD zE*RPAHWDB}xMFDaA*>icCt{Mdmnus1PZYOHsd;an=hUPr3y|dYcNqImm*2&L&zK_X^_!D>`ofIv$}LRf#e@g3aW4C!WG*6Ijc8H3 z_6}*Es3vSKnU^=s{t!~dzRlP7A>X@Pa72t+uB}g0{0aKE3`mtoKNI;y)N%i+`m6U1 z7^x$wSf^GWC7iMDaIBRM(p^dj{`+ppi- z=}yZ}7v45Leah9ocz@QG1bI zd|kKy$gMk?~I*lkZ%Jm8C+%R}x@Uoaba1 z$)#q=ZGB+P04MUVfJX~QcL#HKw_9+?_VXP=ZwEeyO1ppseO# z7OUIb3LLJ6xNT9k$KK(1(P;&8CVm3?lqbigKf+g_L{B{x82oDSKB)_ZV@Aft#;d5{ zEkPX%nyo98*nM0^}1jFBfj>c$93`ai8E~Jy;$Dt<1I|aFCkL83d+7) z{*Wwz=(B%ymFCj>81zsqAJMbmR#nklT z&V9mCj4Mm&BmsZh)j6*KKq+?e@Xu}cgm|%a(PzKrR|uVPAH9j0WxN9fpiAu?+cD*P z5N6{xS+_STLC*Rvckofem(i%RQ8L=$=wBSx_e9~z5v=3exKpkaOUI4qJEsTA5P6s& z5f(DO`Fgs!?3~M(fQt|v9sNH>?#YQGCo$~7XG2#UM<*uGj&E~dAe<m79EnTGIi}j;vGZ?)@jf-)yb@aC}(?-oZMh1r-2#vING2F;1ITkGEavIMyrk&Lj z8;x!r(@CDzkQyseAA^*Yo0M9IPLPs?Oa26RIUYhO! z!miQHv6ZC#hMi`smrh&6H0|%!EP1oc<`NHcZYdY}6#G%3@?{A1+U6Y=jjs5mN_uY_)bEV(UPl%q9N_;!ixwSV28}sb)NBdH`*u zb~=|W;ILA7h~I)Hv7MEcQqG>437bW=B`g$!5-XR)-|A*8sZ4T8nmmyYf6zPBQh!>X zq(RucMFnDbA+VoJSX{@&YomwBTM;gB!3vFl#gc{3+lyB2*WnY^hp4KV3DUU6@Uk~z+w2w?o#-|z=*eRxk#4x97F?do$rNyUc zj*?ayq;MB~fVy4rXdvvU@`0I4o8UWDfTf03OkuibRiwD8bi!+(MTMryjbZ0%7r8*ICN)BDk8iS}+I}^DTVFol&*xVx(?!*|Vs%6%tMMfj7NklSS z5IIz%l+>kvzD8?62TETUjyV?jD3(c#`bHsVyUNAtpQTQ%LRB^a4{a%SLb@v$0;X>c zUz8y#(o4xS59i^a%?08PXv+!9rsI};Aa;(-q2QOPPW`Sx-UgwF71jtSzljw+uH-F` zlJq8aL*{O^$M>N@VBm^H)uaX#i6KA;aDW(bNCPdg;x#&K?*nTou>guApRLQRFMr~Nmn)wYG%KssKvWTsuDJJt{C{(7r=lth9n{Oy- z&R7{arfs4UyHkT9b3blG1CtCe4%KpS%CHN>N|#|^yCLHrriS!${9~d{JexzFC>F6* z@ub|;XTObw%!PkWeCmjVIYexOCU0DWJ%6xXQ!mz$ z-tbUGX3U>B+l;8jp8klRzR0$dUB3Qx;e4!8(2l^_3;tsEw#DH6*n+V-K`MS5etIzW z=Y(s-?&Ks&T{hqK?II*czU{CxK>fUKq4lidC1_c`E`-tbJG!^ZkK~=1Vw}bs!h4ys zzd7U09{V#cLxvAeAK2Gc&=5E3rX}z(RTeZ-6p6v<<7xeBEWaTDr#?S|nA$?{bhu=P ziM-q)u_9&~anFaM+-?9JG9Dt8KNc~gTmOtlzXAxEuR*e?eW5!?l?I+D$Eqb3K4Em* zt1q3vrQi!g+>c))jNmJ*>#uqTAK+2HPCZcy1xZ$&mMTMHxOtBM_01ezQ@*jATR(3<24b?prh#w_cBIVdmYzBe&s2Yht=q++n7JS z`z!O@Zx1KjeZGMzg?Cm%V!?uI`I95ibJM=P+mR8chthPAsi@x|Y^AUeT;!W9cK4uu zarBebv^Q`92D*WDZRw2a5C4wqgv7*NEQdevejt{E@!ua`pI~s{m#FwjvaDnFKl*M& zmMS+vy9W=U6|)5wKi`{O-dxJ;S;X(M>ctbpk3dH(C;WT9B$3?Tb+CK|=p0*zHZ?Wz$t~cfY5&^bxV6j1cFXwNns9`qhg)qN@+}f|YfaiR!BwCnE zaPPkF?WGomtA^YWILZkC9`FWR)qQU25I=dX+-Porg`p>~pLQ>5I%93c3;YJSZk!wv;HvN%F`VPBIvNhCyke+K%&X^2e?pAKGIxZ`8Nx?O8 z3rvvCK%QbaZ>jHA8Ku`pAAt^UOJ|cQiQsR}!3827AugTwQ7h23XT(Lv!^fu+%>I!9 z$M>4sf)9xGOha7&#SWu42zQWnERcV+gDnx}wy9eVSFIFWZ1-*7qnXBSdO|YfF`-B4_S7 zN!t4586Tp0Lj3Kr5=DUi5tK|~52+ezkuV}X3bYHvnag-m&obVsrOZ%~xoY&P@8Cpm1QfT`#MVuK_kvgJtA|Z)tB>vd}IVywj1$w zi)vd-9oi^`l-VA*s_sr)d1}gzqL)YnvFz8T_UQSt3lCI>N5>5Zb%UA~{Sope&!(A;}Hgrve)EoXeC; zK`kYC4`bV5c)MXJKDwJ8ntbL%J)SUqg3_{}h4vg5-;N{2r6Qk(QaL{3C&?KlIZYS9 zjAhcVxD2~9jcz}e`k408rS$I!{R#Ru5j-}vndHdsp_zoEf&y30t_S@%lIf{#Q9Hv< zO~~xdQ(Q+V&Cz1Pj*ud*%cV; zP>0cvbt);A#}3l?rhVy;Pt{+2NX!N`Pghj4dRcl;E?uzTVo8T_g$lfrXqQZe+mr?l?gGBd;+@WF_1D@HCn~(G3YBHh#jb)@zRfk?n*8T>J`;wb`zqi z38D@&wJ$8>Qq0Sd^LSX+YRDnbQXq;MDJ3N^v_tG|^>nLcienL*^x}-j&UrK({Huzz z4P_+>2%HLB5Pb33kl9r9_SWqvEdU9M9zOvQybX%c=2l)%L8Zw_TWL@FoFeOXByyJ* zYI2{jhAoFN7+z{7g`AB>aXx%bRG}z?#Zlg-;aiP-#g45z9|gsp~(JkXC|34bYh5o1_(CB9zTDyqhz z17ag%Cfl3&5?$V}%0&odSrC2S4ywI<^QJ)tRYb{!d9W}LTtf{8%KYF-zBW}U>Cv~B zmfzBr3j@o2wj~pX;CS(6NyO?eVt$Ud0?P0^Sv2zWS(?%mTX5GzmkYo0iZ%4tt&YiAzF8beo zWf*?;qDZC^-GYh!T-|*Yo~3tV-!R1qNLeG@nk*mB*eBcSh-b|#KDo^0G7zU&gl7P! zKFjq~l=s}7S1(pUv(d{G;)C0kQtZGDx|q%an)evxh_2fef{)k1yO z-+bi`s;=>~OOMDd-UQmZ&7MC$N08!gJ5w1gcdKE{p34XBF%*=5fqu$>K)}z05&IFU z&F<};e>taIxH>}F(+~evcqt_U$7_zy3hzn>)|vM(J6xM4y1Rd0b+wTNKg~RzdhOuE z505ao^P*QHw7mODI-I~Dt1bISa*jWh{fx&;5~b`LYA-#2`ZksWBAkPiK5f61&p|@? z%a*AfU5#!S@=3Te<8{xQp_1zu5woyd~z=_ z_r`h^%r!Uu_a^}?x_x^(x6Rnpq{-Nc*aIHci(_M3i~71wUGJ+u-om199)Iu(yyxr) zeoFf5K49Nr)neSZ?wpclr#%`Mhbs*pQRo9GS> zJSM*heh|n_)!KpC2F%!8U|S3nAIAMDn62OU_6XKp?C5d1!c2|t&4NZ?lz!BZ(gUym z`k2YNdog)=d2$Mh2Gcr{sIb4VJx0UU#f2;9@0CCM$y*2AB)(zeV8o9+EelZ{r z&q!N?T%wRe&5 z^j;Gl9WEqh0X0++j%BB1I#x2*iV(hh8gtZ}9mEAxL2%x}lLASJ=mabx7*a5o-hnuH_xC7m3|n(a`zT6V3I{E z@zu5Gw~sW;H_opevenQ9B4zHGNZL;5xropC0p zkwj3d_FLG_0)jGe>gUqUBW9A#CZHy&SQ4U+p8QSMQVca_6MZ$npmA$` z*Es3ytCAgs&@^xt?^iB>je;5mAG{P4uCrYPuFsF6-J0{OMsiUpq9hPm4XHc<7IiT?l3PZbBC8>B}sVGLQ-UZ+PIudcMDBo;( zp`7D!?%Xsn{i>q*-5DxqrVBNdD+G`wR+n{ zRtfGNcIcp#{(aN|e<{fMQ0BsxII%ppK{Sv26KZb7%#Heh2*C2MpTpRlRiHE>8 zD4#2Gf@58K$K~zI{m|zK(gk;j-A3Bfmyu%9X$SliyK{xEF6N#ewQZS0H&ZFC5@P&o z6!L~o{OrD~E}CDC{U>wet{og?HCzts4`0`f?qb`XwOuSD?^LjeoNiH04V1fDT2%v< z8jnS}WFMwd49Ob&TWHumO^e7nLAvUVcc{%Jyzs7J?`s6DoY2;v4iYBqEE`_Sm)zpH z^THewjL*&2_kGBQ7>~g(+5nqOa`fTg7JNQ^$kdL;k-`r+1})wP(~QYVUo7MtmxQ;8ZSQBYMcOvW(qk zyN6-n<=AMi{2lMGHSvF?5Ed@}>D@cHsm>(%ii*IOXQ2vtc?pt%e(W(=%iCmkH~B9(T(>83ZwAc4A72hbO5~yb6B{j zWlnte)jNjIa_m#1bH~^$(W=|-&QY91;G0#ExVvkBAjK(8aVuK9xI^(`rC5;S zTHNpMch0%_mxttGCmZHnYi8Cgg=$;aPoN@d?yc^^c?O~)>%o?a8W6cqHl@2i_ukGv zBw`*rY{*N-YqAstW&{))HV3wQS26{{|IY5_$;&ABXXt>rC)j;>30R%t>M0Tb=>H^v_!6)!t%8$)5%!(? zxM*OQEwoV{iHuy1|4eRioYc9N-GGBIegJWtwl4?2nF_S9U*yWEMn*m*nCVl(1WnIv zbzD^XJdTi~f=h-zK;R|0<_AOzJe|HiC8VZWnuvzT)qIJg04|X-x`QWS8%c`NhjDlm zpuqhAD~S&6mC{c};`R zwa>V+%*nY()@AT-jG%rg3c$Hil5t5Zh1Eih`vM}ST+w|vQ~BQ?C#3e`T8M3l_4yAX z*gKVk)Io{OafrcSSvVJPYe^V5sVeCkHJ>KimhNb+m0k^eco2zV>OfQNJKs>PNeI{>&L6tsC%CCeKg9I^?{g4Mi8j!U zY6c9Gb!2o4YvOt%FhqYru>|f8J{wef7ISYPs_|;OJ|a?4(s46o2<06P+a%xEz5V7*<(SO$#}wBA^a^ zi{Ul!r0CTWzqZWL%+a#LhGBXU$Ry53t-L5jQAyyaZwBYFzvmML)Q2yj({DFJ^OM_!idcCX)f8ZJHSygOvw>`TYYviUu0Af`go*1KJ^)s(e0^17&6L@P0> zuh?tWQl9-9RQd4zZglzZ3FUi4ORn_CZspII*L4w_@g zYYcJsRA9Rd)E_|mML5M`Rf+-xH^BGL*_m%4BBo z@MP*_QcqY~5#Tmv?rTc%x$Dvx!ZzagrQs9H?dDh}2w(~Q7X4$}NH>M%e>~byJQ7td zQ5!RG5}$e!Z!Y5COB@9eg3GooRO(jV_5=S+wEh0Sxn!=gGHiFJuI^pPsKz7Q9z|N7x{E49Bu zEplUM&0%AuXRW*EMM3|l`WSP)@7d$bboq^j_$iU1A;}XYMxom|d z@4lxB$b^R#=?hgg2b$^M5G7n>-f7qJe|d&c-O_Yf7t$rXW(!kRHj*_1TrU zb?S?=mbG#i8YBTu`Ocr6=tNsi=D&xlhdjQtlzX@gl>2ItVQ=oE{r!-(t8(RP7=roK z^dUw}JQU7)ILGJg?sPH-_+qR6-u1zC2fySt&dp(|J%|7?{rb8*_7oS_82T##d6`eOykE8zY&M~1)eYf77vUoCNvg%=>$(=EXKigy- z$_$;ZXGeqyfMcJwaQ?^2xKg*@b?ng}oKSIk3czaD6MUItr*8JntaF6ocJtIRD(qdi z-vH%C_s86~>6BB}fH=JktZH2#WELqU5jYCd%TNk_^&Si9WPZ9?y?^$az9ns&=!Zb7 zAS_&x`?Q4-1>v?Hl=&WIzX-;OS!pdk;E?X;1@rHXHI_eZ0gW};#D2v2=@X7eG3JLY zYsrfV2AjWDPDjxap=mVH(H_=~`He{Bq5)T-N@Xv z1Yd6<0Jg&Y8w$k#QOtR^|5q`;y!4m=q-mh51oCm7&=?sROE|O>kAa;5g5TC0o!OpT z7FmbgA|F$dbV0e@XTEo?!|$FR(oCNZQLrc{A?}lte-K3&tJt>b8G?>6f)NxDRAMTf5@VBau4A35v( zGqy~;^m-X`Dhn47=mu!#-@k8dygqL%JMvSet!x0oIgs}AKS~6`EI^6?!x&Fbxqy9w z)78zb51{lRTz`dRZ-HC3tdagbbFkP_f?v^y)Gg0^Zq%KHG>MG?uGE^dIBj&1R#L?N zuwGo#qky0y->rtzr-_hPHU)8>^)%8x-3Wdf!6K(vp)boPon0urh!3RrVV+h47KT~@ z7s9X9n0}aw+{da(&KA1^iFb1UnHq*SgMBI2LV^t{RismrC~40efy)c)C^U2<95%~D zjtFy-3ry|fVW6duckCY8g9}b-@EdwVt8UDt#(S0-1LX<7ZM)?~YmM@k4Whd%QyGkv zK%SA6p($jUjpT*@35t=>CBklEH zGEt%S{zY@ksm0u5DNaUxdi}z$#Mj{8p6jFBcV>+(2 z)H}43tilqm89zGNf{4DpZ%G+HGHE|*ATQ1u6#1CzeyY_4^6$s=+2h;Is^IY-%xo*> zL7X)p?%MTg^RJ-$JD888UAI-?%7(Q`=h7L64hSaAAjUasr zu*-0A^46GS=G?`Q)a7~vZPYNS%3*6z3-ZAJ&b|UBcvlmKS<01U%)qWF8akKTX-2`B zsIA7Uv@gd7*uaY6vHe;0KZBD?4Fs!kI6q}FRBIPmDYKZMGhHzZR)bYS5qx1O(RDFN z2bR*S(z8+5bxLxM$q{AUWm{r(2jaFFv_I{zbNfr!M)d*S%h-%J>;(w{eNu&Yu{Cj^d(E5!8Td<>kxk zZSZ+wxI!b<{rO9nShpY!CtUveyK|o5J*&jP1D}@in27w1f6qBik)Bq~qMTd7Ii!G8 zy7=o%Qc+SMhk<1OyPWlUNXF>}PuS;hbU1GGTde02C^fW^`1-0T#v^!3zw7Sx6zN~( z9zyNSw%<+vT!)5`V7JlwWgNV2!V5(Aawc?kw?S8y>I`a^Bxuc_{{lP~AbK6l&UWzx zlT=tcMu$!MkGXn8nwo+L2*KAF!nG(~biT5`bZniV+n@zmw}m=um_;+8f6l11iy;aO-;LuJH;jwCQ@ErU+#Gt zN1win1^hW`X4Z})$s579I0JDMQu1We3-jGzgyg;^GIVl7vjex$OSnBfq7>peud zF2qq6*{WotK^q}7NNl_`I|+bT`m!YbaI8V;XEw`j3o;JkOAIfvi);;UV-~WJ1V-aO zdW>SP=U3plRJfF0m=9B?sArmTdLN>=zs>*1L{njYX_YSZ`|+L`yO`(8w4i9Fhu{}# znn!=T#2(^|I3O7$c=9MNZlyk&k?{%W*i3Tsl7qKCv;D)uo_F1^F9H9=L14uNCy#bI zpc@8Xe>HV;cb9sCy&kQ(>(W(Zu3jP~h@%$}K>IvS57>2bTWyKM$5MiI*&~%sU!kLfZjV@qjZZr`ThlmFI4$d)_=jKnu z6mnxK>Ba#Iu%-a4Gix+S8e!ArVNqr4U$~~{Vo?LOZvz+&2#Jo13e2WOJxwDaE>yPVIESQUs8hz=jU9#$`ZWBqSsRZbIgL zVDM`iz#uO7el-(t`mP*XKuTX7Uq7|qVsPyg9>%5L#h1gpuK#5KMHt4lIQMuo`e8RgS77Ds~&BU(rhLqiuyCpRzm&|>Jus_(Qd z>-;JA*s>p~oFtPJ96l1iA_3#wHB1wuArejpn`{=wMD^<=P?5kK-iVA?-ij#MP2$I| zx22Q`zh)uigoheK_{y|8@Pr7khPWmoC0Dj5pgKr=1(76@(IW2$AM9DkU;-6+;h=T8 z0MgbkcsxrEbRKhjunu07NhQ}7*QO^OEBqIMSqah5(>HTMi_HAEp6qw44Ldtqj_3pn z`a1MEoUBu#dPSc+KhhaWC_sFiRA|uGW#FVEfwjS0Za;RIX)3~GI&u3&xjzW9$Hu0( zQ^6SHDNKHqWe;CmUU2nCMBj4Cn6OdEKw$%Me7b8 zhtY{gN5_+CfsgOR#jqlpXTm|Y9{df4v2ZftLV&Lm1iaySHkDY@JZS?7O{Q& zmR=#n5lNShBOl8cJ4JjHZv;nQ!cYNnzzrbMr}m@!6Y30$0|!469SbN=T#lCddR#Sa ziwtDQ_}KDG`1Di@1DLb;MYQtkIblP$>gX$bJ2kJU$h?1_VcvBEZ8*TKTP9*rHy7n#_y?^m zh799$B+zynZgOd=sg-*N11R#4WNy2myV#8oQgvQiOhI8`+{$4Z0JQfM?=0beC;j14 znR#kIzqeVrz0;`w?F>W_cH*O_86I>bFk(^g_VsR3Uoch>c>1`Hz12k{Sjup8VXyq^ zVAUb@m!LWPJMB6mdZb<8<0)U+QB&~b3M z04~Zx1igYKx{=rvh_nP&!S{E!FEUd)4=&Se;p^2yM zN}hM_=*3elqB9PdhaXyiS9|?opyYN$vVRVP!`13&a}dx;=VSw}zO`Wfheg+J$5NdT zaBZW+Fcf+Df443y^Th+(ZaM1#-si~x7^-}{VFQ-s?f___iCXnz?w_2qg#Z*#4;B=N z_BBB4AOLOSe^M6gC*7O34iB@(#~t`rVBmL=c&4UQ4<7Tkw=F61#0rkmuC4og#m;Xg z0sQURe7a`+97p;?aO?DwRVocRISvvDtbs^V$z;BVz)_?0P_%3Ap zs~(5nv@}(*Pg67O`qmit5XNZ zH>+!o;#AxQEbrG2S(9a_hCDEl;b9fzz;M{7yJrpzntf)C-V{`1v9OS4|F{UA)-CdCmY2ML23=ZgW97yi3S3m3E>%uf1|avQ-5q0>W*qmMh0R36qeE^#W)@!jYl$T} zeq$PMgcDXSVP+B)*RnoHN~JE-hX{p+aC@qllx&G?QsJ6;#0T4}!Wo3VA{u6pg^U)> zAoY=$*l5xq#R0tr&Y5c;qWy_^hyF^KTLF>0L13 zq$klCGky$5E*1zB649X#xFJa*!&rdRauFG0Qk?=g+QQ{0XxnIZ@!*){K(y)#g++!E zd5T5&r3+E~@Fiu1?y%BE?^i)AeQ??7GYm4g>}JM^PGk;8W+a&y)-X%q%BU$Uu|>=U znzQ!$Tt@J75-Ks`Dpbml(mscX3`G0h>S9giZ*pFr?r4X6mLn-c%Xb))rJwhUF1|)oaB?L%I@LIAY zUWKh~m-;NYv`I5ndtx2;XV^>P6bWOl{}L_<6%@U8zvB3^PH}m0h8bedv`O+db%j3Xk@YvMpZgm+ z)7YKJdIot?!S5!)}l4KM(gW;Hy^SAa6g^g z>l7E-oE$?*ExX@=jouWP7%iEM(wY}|K*vfvK#F(5TUz>yT>mmcZx|4JbbI0ymQ&?^TJ<8m#+;yWRHkOeA#@sam z;M3f1MjD@gSlYb(%)00#pTQ#u#}&UpPus)xAA?R=Ti5=*qxdAi0(ruQXFWI1&D(P@ zQ26=U?%#djxC)vc7=Sxl@7jKO$?rak;c|C%?F9n=goK0&@0FwaY^blc?E3ES5d;JT zsuxKYb3XsEpPZY+HQvCMG7E7@T@KaCl$jVvw$L@}Z4v&zK>7hULXB$0xTTkJC z=2Eg*b>m?(1)E8#Se>1n?{43{AjwCUXA)C`XL-*Bd~1JSt?u@M^zQ!L)U~|Cn_6?+ z@%4zxMpX=rKM+0!uYGyG-sT|oDL)GU-fa>ueySRu3}F3wr{BMTd1OK_yO>={i? zj}|1pwfz@Ie=;6>%5I2=?;Rk>9UUFV$H(=CHT#eG3-&|q?N&B6(PLI1F|vXeJw29`qv8q@&MyT?u5jcjMFSCQ ziV&GWh~9rys+8XCMxp~& z7|c|p{}Ern!>gRshOf&gDJ4|`i{ewfg~#R3_Zwte%sDQF-3Q}_eA=r{EmdNf3G2l< zFj=p8v8C;0L^cn@n3>{UYL{0J9!^h7(~i`S#KUt)(@zh@`G&(ER?j@BjiSQ9m!5_y ziKGPyDAYU{im?alCzpXp=v5_22Wa*2?7<{(CE#JsCJ$xENZ~^Zm-b@P=4CqUm6TJT za7DwG3UL*-uWZW+_D~Fqj7|zt3Sx>}3c3>ho5I*Rtv=Lc)F5no-D;+JQ_rA}cR|S& z5LOl#4D9Pr-#2=z2rKMuW&Y6vc8xQ(l9B^AEjzXVKKykVAYM_e}xVyNZNuJ;2G$wg-d6nh)A#eL^=PCy+@~s3xX#137!kc zdH?&bqkSb+V76T;ibMEr^s+|$%QPNwoL&dMOe=6+DL!yhVF{xwjDsjgwP6|kRr!6! zKircO$4E|OxP$+~8_QW$(qoou?tXcAnsBS9w<;zE19(-0M0cWrdPzv|?lQF-G=c5` zq&Y7&JL)R&kIOq=4>lR&Dn>7cw{dl95!ZU+uSQYTjgPEJ^Ln=7w{*0wNUPs&a~}ug zuiz(B&mdP|!OZCg*!L8%JNHr@leoDn+%)jsj=qc5L>0iodh!gO+IC0jrK3-{xxEj0 zS$efd!4bv`m6v;g^(j9Q7B@1zd^wC2w~+zo9i+J=B;K#JZUnakjI1#?f3p`k&2Xsw zvS`F6%ubk9uJX~CMJGNp{s5DV9AzUg)z^djPjmJ1w1Z;EbWx90gD4~WnRzo+?b3jY zmO6v*$0e}z@}?l3D(QApST;*aKoQ%;X@axg&_iV@J5D8b?HNUTXMmXxzqt$lM<0Ic z7_$2lPM^<5e0xhvPgk38oN1L`rmdgub}{M_6W=L)OiE5pt|R2x-rNlP68sR8laoW( z@HjPv3HVmegaifecJLANBe$*Pm?N+4#UR(y9Cxn4Q}$hD(^<9CS3+8fg?2R2c8P0L z(sHlQV!X%#txHCxrtWTT|CT=A`n1~er$}m34@ubLG{$^;heme7-|6?94Gi=Ay?H#l zdl{h=3i6U$3ua$xZ8b77at#iiT3F!w`o|t5lBLz9xz@66y7_XN{N?4+#@h$$*V@YV z@$o|4#v!tFz>?uRc57*LOVSZnpWMytU;fC!emAC7D#6Pbj|(U?;Xl5h&B=lbHtFBX zMCSgV7XYqjRQ>wE?@{S&NsJVcf`a0%HRMrX@rch_EG~tp|DF4f+BC zzzAeyWLyPdNhrpY+1Y0AehlEw>O6MH-ktu96xH%X^4rF0CqtC>?;D0RYef|;#q6yV6nYIY%~+ft z{BYt0*j9)#Odd&DJXwMPDApPZRm#ShTgox;DhkLE%rxi=1mS3f(QUd#@}eZOc{4%{ zKH)k=@G`Wu7&_%URPbbUnAV}{68y+fzOrjchSY+!;d~L;1^kMR;P;yLI#yyn zLtcoZq+=2PL9nF3SadJ;zMQORAe-)3%7nJ%Zyr8 zRdX)lEfTQ}VcbI6l9US(NwDByw+4>*BWO@iQv1lL3#OgTA3_UBQ7S-jt<4tw3!j%WWG2o`k>dGc#tzJ3>kg{RqM5r#gZtrx}t7Q;H)A z*zcn>U|xcCQQASG#VOcd+m_8rX26`<@3C}ZJ}Y=Bvd0)9)Zu>>3ojOu_O{S0QBn?{ zQJ|_S+G@j7#v{CsYv>;>r-z9StmfsMrywN;9{2LehA89on+GTcb+66?y*w6C;$mT= zGaO%(&ElKm17q5-mX0!$o+V|$QvS(I)u9r}GJ2Ie~r#AuD{xf)W2(@|)IA<+p@(JpUslDL#ke2&bKHHKMvj&? z&Y!Vd`~tpDKULEB+*Y?{kr9s5Kl{T8MJ$ipQisPDe3;dbPtJtZi9C#KaqtTa+`8HQ z9Q_zySC$0(&e-Vhp;`c>Ke28qQ;iXP5%6(GV9UG#y7q^ju(g%r5%c!O_wf+lMO9V7 zjm$YdKK^M50IT!O0VJS@(#OvsnJfE@z~6L(-ckgHF?_{k)jz>>hK z$ws}w+fly=0|xahzJ9Bs<0xi&<*`HL$Zzc*o9)`$zVH0q&CYogAvj5yLc__!flVzl z|K2k_^}=rR*wHEeYcprScuG|bC`mV=`^aBZe2;|*)YSf6#OWSZC$MhtgBh*itx)J5M!B#d^ZVWA{?1 z@uYjqN=os79_=MVc`6GSas-l{Um_;0BeIe;EkZ)RY`nhQ9-p0sECxS@L{|YNwZuvb za5BI}%=RoQuCzS;L_b%$ltbPek^os+ac6OUnO@%5*l2ki9bM`>J!MBiLRwv0+dO@n zp=8}iJTfw3`gA)9{GCXz+5VUr!6ws*DAUXlo?wPTq@}PEc@g}vznA?~5e{VV)tXUP z=PtCiUWQbQuiN*p>=J zz(VNx2CvX96JpMS=O)aeDsc7wfY}jYzH567xq+&ZRyhqXPlno{ph2N3%uMPUHNW!n zK~RMms2qxbM_-DF;5cG7-O!$FlK@soiz=9`FQbkMje4Wf`!@7p=Ij%Xd#;w$aHsqc zSq;W-*%GZLOed*r1X?Z=2z2E>RLmD(Vjb;pnf~;lf1-4Z)*3-CR-{o_Vh1 z3_PSh4BBNBAXv%llOwI)ztkfz7C=Q(EFe}KR%%C98ufEnKDi&f$>kV+BrzmeekF^c z)%Zpi1*3RYhogA*uD^%v7}K-SH2sUN6trgE_c0>4u<#L}rXQ;&&R~y?jeeRNk~+## zM_~SOUJDCy)w1VDmFwdYDA6uswQ2F}|B>{m7@-QaEIdXXF4LxSBil3pq`~yS)#XnM zwHwuB%#zcyva>r_K36zWv(mkCbj6djrK16+1bO*V-WKqMG7lkOVJx6~Nz!Y|R3OXt z?z^E<4XDfF>41n!Ru*_n(Ddm$X8a0Um2I-tAE?n9Y^aJgA%@6Zpa+HhGH@`GK)He} zU(q4ZVrpBlDcFN_9AOPvJU^sLP0Yg=ddr+C1W6y3!IyyyXULU#D9V@;z9-w8?edf% zI;k}2G-NY>j5M!sMlQsWrUEmofKudS$mgVHF-s)uqTE(TZKT75BA`W%84U>k@bZO= z&c?HW2p!J&_8mX^8)(93*slu(Xqx2TiW3_?Cv%{4Nekrk`5v3va!A1|*#YbcgX=$d zzPHCNFt6aOA1D_<@3kelvoL686S%#73iKOu?``{sQP|(>KXiDI-x=7us0Mb9+{I)guJIDo=#|YcUkH=DN~~Xa=h%uJ1w`Ne!lY;+ zZ#tRUth?jfWGz@h2{LDrJTyuAiPKfY_0vlPyo=01t{ZjC0F>7vo*#;*hfdhyu1P!? zherQ_{O(oS+I=7|I)GpJRm5=%u|7cQEGjzVO6#8`po+l|O zOM}00OA3nM@w1tf8Do|me|QR0mqH+B);mVFE1wta$L6+izsg3o;8^TDU-O0dzO%7e zbZz6=n;hG$+ZY}lWi0=X_ynLAsI#y9Mwh(YU3+upv`om!X9Kq;pg`+jDo~17TURFp zY%>Qs_HTUX`M|Q6-AU4f#Kai4JEx~KQ9Z(6O{5f*ly~Q+Vi}a|Z9N^9jq5XBq%7pN z3^J7PgQ-!RF`Ot?xg+I_r>B%{Jzd)mhp}-hA1DZvzIPWh(sHSnE3W2&OivmEyoV1C zP&V=@E>8}qqaUM>Pfxj>NEG>WfznWK>1l8J?Czla-Noip%HyLv|JTo&EY6(qf%wUs z$W8P!gnOHAlS6;VeQwpN1tRVoL6RW}pgEUB7e?27R{6Vgv;A$SjM}sI_O8vRt?-2mzkG2lvL}#?rX1x6PVJ0J6apKA!z@+=s=#$ z-rnBZjB&z&-ir&ajiB4LxY@l{|4$)tfW-9jA`Z;J)gGDDM!urzd~96q%nZ1_z0FcB z?-6&j+KFQ<9R%6~VBlR;OiV-3yxH*L?ryZNug{i@=>8BSph?a$LX`kNQmj#|u>c|) zlS)m0UzdsGA6bCUK>o2+)0#9*u_8T_wV@7d;>xw!hvthV7cmE=?xmB|v``qVpabiZ zRYE^Td5g@PNVZ0&M^(jCC!2@EY9p<+u+VW?AL3~k5~EkhmHC$LC|73v)Qc5X#>m6c zLG0I7;3=@Z1<{=Ua2&V;=tNAkID)Bi3(FWG-mq~b*f@6Q6-l*qg*x- z^_{wQc4)zV!FehlJ&1-3i{CfiEc;v)eoO*#`)a*T^woLVWY3_3aD zg&bLtrNE$GAmPm5c`39e`tt<267dHE)mI69Bq>rvn3Zm*{jb4Fm~Rp}tRJ)c{KH$o z?<3!b*$rj)m8cJnism39ZbyO@k-ge(CQxK!=u&v$Vd|vu8)`|6E3bx9J6^l+|XF1d`_q;S(X(fp_-JAw?g1fMtxe}Mf<4!mu9xq zetITurP|qg=>($x>fsJI`3_9uninph7Cu$j0LGTpOHWUL z!ln5gfD*{fG)H_-v`Y_+!_q{jUuJ!IxVgFi>J+0}E*T?U_U@VSVg!aceox4G3=m&k zJ+Ie)!|6nlfQ5PYa_w;A#@k+t&j-qu7NF z5!1J(83w!Uj*f1~uXwGBiwE;QWO6x?r~jk30O zv>8WxId9r8$i4q46}L>~av3ats#=-OZo?lmQgx*7&E{mFp-xSaU)|lFCY1p z+dPro60pQ|IfpHcrEUeCe+l0r*fQRl7>_6Q>kDuq}=B62@n(eEX z94Jl!-0R+a_#2z^)#0-mnmt=vhLg7@BVWHGft_`Xpi=>gXLG&pgl7(8K81^~@AvJC zgD{;HOyWa)US@73@H4SUc{EwK(DvN|u{@(ZAW)&7@yRTf)^~>NygYoqJrh5l15(G; zUhjGI^jhB%I=E-F{>-UCk$|Ov(U6s?TrhAkJf$R9T@!N;dF>q-*j8eAeC@HcmX2E2 z+xZHj}vt-I^bi<<+9KAW!*n?VbyVvY7!ZPE#(T~+1* z@vn%wrpj(8<_4H1dD9ctoX!@C1?KnuwWKLQG2Jm{5x`XctEl%xST_0_1_5|)# z|22@HTy>;5VHjm{HM7Zj0&tUt1;8d&sB#Ysj;g5TUhh-b)`F){NNE{p=gn#QgFvzY zx>Zc7{SjNg-^3KefQY^&5t7DI>@j=tbmmw-PnnT`KzuW?w)9c4UyElI3b}`DWswDG z5acT`hYrn5cxgL|cL(*c&rrr0hgZgSw>jswh zC)5+7&FTM&8FuDmIHZb=F~i}cIYosL6|DW3Il&ddP8Ad?4i(onu(ggnc$*}fOr(x2 z1II_yr6lsSnhm`srV+lu&Tq%9geQ~K}J5lH&cT~5t zhD8R^9Qd#P4gxOfI%Z9fovo*oXJ(YdHZ}wz&lF2I1gb{0R-`xQmMnpD!e)b9RcIR# z=W+|j8>o+TS*(<1&t=ViC~fUDhE0y;(k21bzzbM}HDL}$qZ|ZB5z3dbW+MH9$q1*D zj3^*M5g!VWm34FALRS(~j3X%#$MK+RL239*BZnb}fShm-FBwy&{1@z3p7wigfucst zBwzwn5*?qhP}x(878FhuR`^q*LYw$u&ayOYBMK$r&Z3PG>9Y41&VSr4+{8?`e1=lL zMVRx85W(9@{NDuleU@b0KtfQ3TsPGqy3-2BA}_S+m(|}{&wsEC;4)4qv2=Rp!Og{< z+0&o9aNjAgkC)HLYq+GhKB z52+*6?O1O2xiH>!p8j%&*=XN_~`H z5gJ%+d>+q9gandbt!y6Y;G@W|FqgAiO8b`QNpz29@{Rw6yX594=Bo!;%iilr-d~%L z2SRJmeWQF2bE)n>Eo?dtjUq$ooEq+6_C@^lVLMT|wLbx1RNsxPEjO_Nlxt_l_o5AJ zqI|XTK##zQGeDefToai(_m5pRBia{UG*-(&3!2Pygrp**=FwBb?_LtuP3=ByyyJbk z8;I{RmiH|%W!GPQ8oV9~arL<7$yvZ@<&!ilvL<0JNy}8hMe0aSO%0kd$pa)PrT8#I zo}i7n+C@iQ((hjKr`)=z7K%5$lLKs%GQJ+}qjUBumC|MWl0?lEv|mVgEZa>yf;|R8 zOpKtX6)cy_@s#VGo7Wc?Heb>I-4HKLhd=*I3a_kuu&-I!SQP4cTsspK26_b8TR;_i zUK68S@7g=uN_kxQ@Ou)mHTI5^LJhYx3uRInC>IAABvGvpCEb?1s+W!=(w{9^9gp(jxu)mkUp6idkK?zuX)T_N3d_{SH@KB=+RGh8 z_)nsuqmh6fz%S5$$E^WscQNSRWzlTXXvEeY;J@{95%$M_%zEyd^M~K74O8SnlMr;d z(7lFAp7=+1=6TI?!#<2KN%h)Tzu}tD6)MzzHy-z~>P1%PoG1o9xQ~B4XR;(4^<*AcoX2)2rlnrO?}ZaX(*QXky3{ z?biiV9@^p2^>vDOPv^kUYdN6RzKP<`RxKwZBRjvk0x!?VgcjhlhtDeJ?;W1Wp79SMV&bpmfv}3iWz?tcD`%^x^n!S4baY&{0vSdrag? z$PUe@CtE4N#&lNmA+yMx$1W5;gu(+_4vQ}`M8*iO7|IXHuT*!JJ|MNHWDcStM9`6m zegRi*m4T49ZIUErLWLnj>|YQa5P(g0u--#Sl?AgftJh5LM9`lo~RIgBges7cH>k#v< zcAYj$89r8&3~gDg8Mv~LCn9TGYovIah3JD@7gMQlvsLwP{$V}aw@?ZYkoVxqM68Gu zgcOA4nA_?4_nRg`G^ovqZdIWVa-d0h%4== zi%=n1jH(FEOu?6_m%Y(5cfrH#d@PGAp~ z6b?{Ma84E#*gnkljwFn$S82C65cNNEAFog_pa+pJBSx4eq*3oP&~W4xo@7mvPRoa5 zEr;~07vo>&t1p#2$;?g9d>j74ga4@$dOGj)x!597v2QRcWwK8KW4R^5h0x6R$tDCP zxP2t7hG%En5>B}Q)gnBbhP{F{EEES)1&;xbuN}M+Z+kiw4b)(Okk_UU1ij389X9(9V3PN66$^T7SUv zwbo{tP#Ecb@>r+&2C1qIZg!cui%+zRs?YtdXa6Q9gu~ZAaA$KDD1_g+g#?Vb-%Bt@ z77UkQd(&|)^#ZGyxu2r;%r#s^k?iGLOtz%S;wNQYky9@>UD~V z(Im;=KCF0NLr3Gl zX<)|t>`>_rJA)d%xB2;PUBFa9$Wsi~yPmy=t%Qyhd5Y5veHVQgUvSbhI*l#wJ?lA_x$ynTLk!UDu^=)Qyv zBupgdATB3YhXP)RK!N$_Zmry`vW~jjH^DrnPrPQT>@$m>7vrZEGts2vOXw9xWP}-h z;)+SvSo!eF?#E&oJmUVsdn>40s7pLJ|M?uASumg6n5d9M*i95Y>+Ayp-OO@QmYCv4 zlHMX98elfnz%Sarlr7x?{{Z!qZW_uKE{m41btdm14un`Lyp15-n~F6aKN5dIZ^mbo zrr-#YRMe>&QTq!yuNhO*o)=((o`IM)jsidnp*&xID!VFqs5JEOs+Wr`qq z?|`im#uVyXD8dn%iKi`ZA{}#yK5}aK1S)R=4IM157COCE;VpG>_ng+(Fjm8;?lN3; zta*})6ovM%-&D0==)BzWL|-v>xH&F)Eg^bkY-SiK&4AG)=cH(+tRklG3bfBJKbi~t zrdqo6Qx+L20h;R25#F?xy+G*>vjzhcMqOR92seB)Msns%9C_OfGLWnB3Ee|RqX6X> zojQZvD4h{ms(GN+B(u0S+sta}1HGXDJU*R{z2`SNergRVI!0Q4a<}hNv065yht=xV zQhBnTsCI`F3|R+OpBM@;#A+j}seVFPw9z1pG9(HZhB)GOW;n^}>ikf-!k=FoVpJW$ zUX#@h)|Qbix>ZC>X@b7C_O3GU@~(REn-3lZsyH`lr{#%H&+ulLxtw@%#42$A&^&}P zg$K}y>h-Da=WJ~U(T&-WwnLG<$i5^5qNIgiiT%8l4!K1e)aozSB6`gH_YPCGY0CvT z=w-#WI5*39H)zpm{Q6n!ovFz;qq<6KUgnF#yG()d^5Zk~)P7UcDXMQwr8Bhz)o0H7 zYsQV89cAk##cH?vyVxh673=_&vz`8$bRN+hIW#U5{pJmk)aNR0N94|utXmu0KK@@Q z9C?%(dlAv+D`phGPA&8wFFk(UIFi3`aL}FoYNcQB$y`RfmTVLLi|tX&kyY;9$5MCD zSqjc42^dTePyRnIKKCk_WawRgG7w0!A#cSic-i3X;nPb*fcP65R> z0bD&u@_wDu{D%xlS%Y~t2?JNM6kQ;!`QV217oLpP*4&NN^ClyU4+FhE0LlJvm6o^c z`2R`v>ueuF!haA|3Hz`LDejQr4EY$ty{~0 zWatu-R+$x`v2}=ukeyC1uVrbtk8cly{e2bzvRKU8jno}vbs&Z2jC7bJo37bF zO}=%vla{));c@fqliU3g74aM-h3vS^#!1 zUNTyL_xEg1pi{gZ3LHniL7nfi5#_{*OBXl0yE=DvH#dj&J^nxYo;>_F1Gv$z2+g{{ zxVa!ezHgdyF2*4Lv$OeKMgh)SO~o&zrlyL84j=Lf)K=Hl-9vnoR%$tq8h&fAxeO_x zGI(}v0gSc;GO+s$&M&WLf*!y$EsOJz6}tp1WN2-2Dw5WKUPWn6jJ6@xz7s1#0&!6v z2hb=KXZF!64PA2as6~V3tj)- zcG~7~EKfgE`w?G}d=MXpJ?k2!W7VUT$N3bilZ3nU_m`8%Iyj&5Ma7!*0ax1v*eJ2+ zgWh0a&Pm*MtKIBnETo%ga;!%rXBU_Dj*dUhjpmJO-HN z1R)oGWW*@CGM={Yv26ZbpNF|BzIu83Kt5fU@8)$K4;Y|7#~~PRxU~px2L??o`%hXl zShefpSU%-EC9Hms9lJf-xO{njoW@7gR5UU+zBp;!aQE|j|D^akL)Re(}@|+t-%d6oj-?%v1`sFoakRv;DGU%A&ZMR5+8B5tvZbuQ3W21 z3JF)_Ary(XavvF{fq~FbZTzYx^i!`;mR>qe*}%h!=5mGT5pY1i?V)K+vaW#ylf7~_ zvwWB<9&H6q^S~t2Q0CF76Q8fswOK`JozHt zf{ZmjM%tDBNKu7jNt!=^&m|UAf0o#lxPVJb=43M023EJQ9>(s3D#P$n@%;>-@$VPq zN@k+&m1;;=&qzR!Mjy)?*-<}U+Wa5OLWw!%4f9^luo8H(L%Mmj^u zM&ZK!zN!q@Rai@hm5Ff9?_+Ml%x6i#yiiYKSc;65$=V0253eMnBy$8x1F--xYlQB5 z@OH!4Kx z=xWx^arWW^+Wrroi?g51LS2++lO@OPo!vXjhV|`6^#JX$E;0w< z{%Zb(R&B-NZmG^OlR?dsv9DdCvYBU3;WU=HwnnAj#`H_$YKJ#jggl|YY3=i# zlI*h-r4&jqB&(YKU^-oHdhL!R{dYX){9Eg%njnef^t9xhhfpfs`>ZW!#L5cP^L;bC zCkg9R1YHjkP1?)w{MS4V+WLRwtky$hmprm;N&*rxI4b&O-Yd86cccVxXE5{G#?1Fu zU7!?$0)1};tRjpSxn`Ma${tQ*?-7;ETDxy1L77WGT@`nZVRtKKZQGp@r-8vT`sUncpDVj-m$rQE!bfY!RA;) zye<(m0(dz{LSFoPZaSq~x!rAC@*Sz7VauyYU4t^REObO@HdfHqohk;1#J+OGocFNz zkN-++e7A1SVE&%#7cu8wx1aLP8&@eUm>{GFT}kp;?FZl~1InpMgYJOm{C#c<==G7I z#M7SO{?zA#`vNm8e>~G?U`_Zh^~==DM%Q{6{AnT@us@?zTU%81@o21g+&*p&N*a>3 z4gK4;bf)3^$z-f={!6fAtD1BiPwCU;&r9lk-W9LVck;JWnt!C@beHt} zz#uk7?Tzh_zm4WDo64rQf+?z-n_e5I@Z{j`F57%22zoweR(I%S1b|g(06LkEz++U; zGsO=Gp8tv(pk0)2`~F+60dhdEwuZ6pzrI{$J(+v?yRe@4iHF`@6eLrJS^0!%a`Llc z6i_xrXv-46$>vYTK2npb{s1&d#&L-hV zCy}ju-(n<#FGrbWYX@CHt!TN84^2V=$(KnEudZVDcI^K&b{!y4?AG-QabA#LEb@^xSl~5zPD* z=u!bl+TxPwIYg(%{tDR!o4OKpV^JQ3;X+*rv zVGM7UJML-L+3xRG%T})>RuFXHOwX<|9~tJ=)LSrc?gQ&Jq+Mc*=|vC=M0^x&^*{yD zrxE9?%zh_~l|ZG)Safu#IX&m=m!P>)5>C$MYUHtQ)S)NxrAb@S_D_5N3$!+l)I>QN zitt()MTgjixCHAcA~QV6{pA~lmT7V8h zh9k7}wP4DLti->+SRE%SzF*n4hJA~0!K%Kd(a=(iC?1AFAv>CC$bzC&l&V@?e!U9C zLpP>BvQu8kfP`h)Xf)XCc}uPmllarv3F%ANIXH8a(;FCuBAL)v)edf*`0PaK#v(Yv z?IEO5v-+7;-UUoLZ*>Gv>m<-){o2vYNR5oNf4=%nScPZ{QW#RS8o|(miK1^I(tl|i z=*Rj*tRQU0zVyae#23T&#|Vb2_V?O!Q5}_`YAbU^)o{P9eTY*@B6IWC+@Fca%>OGd z%#y+MFVG6`i-=O)C~x2rH^FfyiDf}r{{Pja zm}$_%e)IF;efyc})tFAn3-chrA~aiHzq+~4U7kP%(wV2Yxc}bdc?S6XwXIsUBy1rW z87w$`;_N!cI|&OI`*SJj>!X^D`gRTcyFe>+e1+RUn;ktl;;pmkAxvXOBg3gsH*uTZtX)9Q6_B@rljiWK^b!( zLv{)3kI2c$B$@g+DFqw!Fx_on#08`)p93w8M;D5J=jZLCt%#SK<@GNoVt?$&fVXg1 z|0?-NDqf+~Hg9%4~ukUc!({=sJS?@>1Pkq}hp5Jp;MWAi#fJfACJ62KF zCcNk^uvAuZEIzMleHKY(iWZ!!aCRmQ96;`Q?@NL3`I`6BA-yu6d8B=>T_AMCJ7i>d zG1qb5)#78})yWaLW})Qv?<1743d5o6v??O18}T*Emp;%a+%b(j zarXsHL~kxPfBth11ukjq?svI=xw%g@Td(B?SIB>l(>gMZja%eYjW@IQu_>B|hrc%R z9XHw8Y|aMODRxB&FyVf+HU?e~Ki=VEFd!-0jJZ^#YS1eq_bBMZGJGh-N3WJPEz5sF zcNjVI+IXtae7qb)TcOmq?=iYq_WZgDQ;bfY1Mxn>f`gKE(0G4 zxTS~PSA+rUo<|9AB8-vF_Y% zF2MOb!I3gAVT?@0LV{5CQ*uy%XFp;${*-ydZ6yiFk|S|p)C6A%2tglsbl_?n>UZ07 zP@m@Oq8Gc@US32CxHE+3h7A($1LAz-KWLJ>4g2ArQ8`3c=_I=^PYrK>t#4@!34B~$ z@#1%JznxhAs<7m$!kYMVNF+J9{kbpi)-&*7P<(B*XMgn+Z{^G*knBc+!jIxN?2Qyp zIzbIeQ+yu1zz^yrl0%{r8tv4=T-53W8{eiYza!_gc##+W~|!3CKwaud~~r z=!mi+{)JNFx1QAho%? z&}-e)v){1pBhqb?&txz*$BPkBv?L|$L^Gn4*r+ck!u3Lu91R2R1feev>rLWJZCd(3 z>+}!2)~hmz(9BzM{cZxlkZ+4T54#}KsgE1E?(XgF1xSe)8X6Yf9>#$!>FEXl^VJ{^ z4>t1Iq+eIr`Gal?3~Y&`k0kC)4hDiaWIT+IkfVdFZ1(zKJuFS=dm33$dc}EB3`l9= zR9TGrlNZzUT%0jUKgLmTlXS~;hH{9jQ6=6g;WtDU&x+|D$<>)wyDLj+Nz%yC!Oq&- zQMy?=Kzr4R1bcSr>5T%L#SN;cMp&U-N9 zWc;#MM=f%5T8JWWh5|Jya7zbagiPLVYjssfY0&$TEo2jZ=6v#v7E5t2YD%@CKcfFA zMmZe^p2I+QiI|MnlVuuqabKKwRrH0YZE1`ttQ&R<6$kx}e>Ys#OvXmmirx@=M;8&H zN3VhUsd)YdrVHg1;Xjbmb1b7|kE`vrwU6tIRE87Lj#L5RaXthS z1RHR$`x=ZRjq)1&l&il zVhClB$92eU%l}Az)W!7)r$#3K@J@+#t^$XG4;oHu>SM9LHr z{O_#)d_Uog1Da{$H^#EYqDPP%nq&yBN&&I~QD^wmml%W>T^a$|=%1ZFIewgyKwlIG z4i_GjAw&JMi6}{75(VRq9p^V;Ct%6S3RoYkDtx!)D5K&GJ>(`b3Te#2*T$e3-1Nwg^BosW>Y_cGx(Uz>(W|)qbXMyVo_(+M0`j>}CyLrgg;VyAC;#}BD zQfv-y);P};#qS{xDzXqJc!$045#|>!3F7ZIRua9onUwNOW|)Lb4C*O-k@1xq)gm-H z%Tfk1OT6xHpzaMz$^UUx+Sl-Lhwt$27Vp`E+8VFB>lb7}7opFzJad(Wr_!uxBaz;~0oHHNi7pylDNu0_7| z=I$a&&0Ku5*=CJkU`LV5-dV$Dj3zaZ=P)5I)!n=I>@X&x)*k4JP2IHE`Np?gxqTJ1 zW})a2_DSupz||km)VeR^Xi{fFEBmkynrMXlW zZD$aGS^)rqnA`5&=h2wsWV5jQ(TTl+K?&Y@H7W%OQU`sWPnl{HYo)aWaY1x1-+5y%s;~CZ`)lTs3L5)qW*sOmOj!OBXtC@ZRw8k3e_4L>ku5 zbN1t)1!V9>eH1d(=s>&g{JI4!YvU*#b|ZYe^Kmh&$ayh1w|KA5VsQLpaJmlr(wWP} zbi23MBFQj#5hi$wW~$NYeh2dUp8V2uR|Jojo$haMQJhPE#uvpb$hA;0OGewjOEkN9 zS8Kl$-V9Cl8x3_kr=DUIr83f{ZHD13At)sXQZ$&b8sv_iIkofD6OB=nQ$7qPQA>qv zygU@XaFaS}wu^LG{oszJC6qJf$mGakpFid=Ff;Q8o>E{ZZ{Kmhc51%zu^x3K|6k+v zc-;HU9y5se&y4`orkBU5m+{@*T|;gT3$84{rw5#V8HH^}A-?TUPmTcsu<7g6h!>MYW znk}kPOxtkT589Yp(KMxm=peClvUd@Fa_W^MB;1uWKQ^JG<-r&!7PWZWpgF~rK zPFfNof(G#p@tT}Md}w@h8Th%4zj+Jzu~%Zxnq z;;IgM3Tj2M+7Z^g+x;|n_)O^Jab@~N4VbeeZMv5>My*Ww+_;PVAB{ff5m!aeeRoqB znBM*?uO%SXzYI;9SEHsymqh`_3_Jj&rGnSgbPcor_#F|3XA z2?E(u@Es9FGNjL`M-z`Qc?N}Mmc~ej<_TDAtLKTvFwfMu5WjP|66!Zll~T15w-KHX zGd9L-5y_`WwK)edK7BQcA<@e*E8lM4jimpez&jMojKJXd$BRZnf@W|!@vStn4twV! z{>tVSl4*T%V?S__O3~TC%5!+84_Mf|WB}}`K#i;apg7p4yMcWts<@?DH##+BP0{hS zYx{vZ)XHB(YvMp>3xL1(zI)CG81fe=L;t-Hte6_~n2+a)ECEEKtH(XO*+d)bb`TUlA>pWtU3@b1_F!i=r}I$0+-4cXhJYbaOL zphOcTs?cDc9~qgChr3#McBDL-IUXy<5_>DrUKwBHK=Uf^nH2Qp5cyoXd-Jr*^^DqY zoKd$jk}TCXM$#N?DQW+ALUod>(8up4{rV^$pT7LNY~*T#EfsU<-CgQ=Z`c@h=!@do z*N480S_M8cNfj&7pVe5x~->ND{>jS~RrJ1^njrDdH*t3qam< zbhlfzShW1wx$OF@Z4l~~OkGB$UIOx%5NQBPArL$H3~YI?hrCd}_%zDd?wna8KTr+| zSYIU?yQ+?3*ArJ0sNs7qPE24^tRR@o|MIJko5qNTgPsVDKc>z*au_&*%yr8dZ75qX zV~W3oWRq8@)P;q?zB}3uLlJpg=RT@@bEBqe@9NpTIZJx7hI{mGTi^CQ?G;S;4GuQg z3ZE@%qIrMY(E(;}f5+v@XCbUPI+*V@6MdOX%k^2js>wjKBbmzja$4?N)N^p5nh)GW z+;;WZ3-iL)LSHY)+yrrSkKxY&2)O~OPN%I@4>c5R3oh<)ctt&DSHDF-Pzl?W{zL;6 z%$>Wh0S{)vo)8ZHN|dTH_fO;w88-a3QVo(IkF$DZ-ZTiDj?1VN#r(ijABHa~-L>(1 z3uCH4YXACpA^72V{a=FA_p1n0#EFIKmIQPmOJYU+%KjLMjZdZY`yDEi*8b3ngqqiVg*tmDk^be81>hbpFd0n#{2JQ`zW~3dO~<> zUm2MA-JfU(md(pBTJ3>IyO~xD+GT&E#NoE!)=SERn?Ugxi!{`p3BEfsn={3HY27nX zGB{Nj-{+>$tB@^0drOUYu!AJDe)UV>o8JfqkZkZ8;Wh(rqj_lgz}c4!mA*V^!6#KZ zFeVbb0(|~i{*cO2^jDx{q_d$~juEPlk;AjL_6pxXeBTpOc&pNP($_|@D&8h#MC@X4 zqjiUWiygMrOUDrxNh@V!m}bHx#mNRaSXD+9tLaU}ihoO-WYcD&729%uELRstzbAW- z5Usn?NHM(vzKRd^E48!1sg12|{-Q!JS1~lsz<&m|0Jm?e1&jiVCIo6lHnS#`LfY(i z61G1R@yO48YGT6)To~t6_{t_HcHUh;W*}|KAHRUu~u!D2Gj2 zK^K6?O(Vbbh#DdHrj)LcuDJC(?lsGJJ!gTAZ-jwOwz8NGL+~1NlehzfQ)LpdkltFQ z@^vvt2UbjS{O-6z0Q0D^cXugH8f~uLTxhub;I}BA$FiQTEbEq84_ zdAj?WA>?2*?o|~l!VY7EF~nH}35XBV&(Y|zN@))!!qpO^>^V{rvx!u{mqz5!5d+w0 zB%y6Z+b=(T(#l`FwX9^pE@T%nHDdoKvKgm3LvDixJYtgQIYYb<}{!1>BNWTCm zCgPaMs~_bTpuU0MJS_k!*~tkh>wpVV0y18nxX{A+qD8NDdmy~~N$wjmEP_d%frgME zIh+>hbFn(YHs^on?lOj}v4a2omSN#{L7^DJD&C_mWS>DjEmN1t3rAO1Vvcx>%hkEq z-llUmAJe0Q!>QXw`%Ovmk_HC&EYPy4Dt{)KFm+c6d}X7OXBtlIP)oG8szx4GWSCGI zAN_&7A!h4ba-j5qewN@mj*M@KxVuKlKc7RHT$7%Pk1Jyv=CT_$j87rU8yS)EL#Xn! zd4Eg7;aRp1MUBXIMC*|Sw{m72H6xtb(vvFuW_8?VY)h|rK72}&Pca+iG?5l9`pZUt z*2+=ZRkJ;-r1>ej=nSw6N$A>XP~Cof2zgw0ct=$c2Mj%hsinWHp0l>hR175mz_zC? z6reTg75Ow32kr$`lP^C#H*GRyL9}+=I(G7a5I*4E{^fkx_xb#TFu`34*0Qq=zxv63 zPL%;$QT0&i+mAE1D!u4$dFjlH$`Sf=deSN+A5JOr9$ufFpZ}6@`-KN4jzK3RT$8t3 z{BP3W59$8feaE3)%tKN=|2-437})7e!9$+u+SkCT+pyrV<@2@6=T*Fu)}ZfZ)Z8r; zulYfu@uQ~7mxBy$Hy6%%e*>hwdg9DJljd`a3q0Eyl1Y^C{5;iHLuULYa z;LY*@+ps%o+^JE2YMb|KO}*DRMXwn5TXzOBYwH{{f(&foY)mA%2MlQp)2cub?uf;j z+)ndtvuZg=wZ*<>{8o11C311It@!!;jI2BkAIWiy5y%_fAO2ZUOOhZRw)nrPhC_iP za4dlUHqYPRUk!dd3ebiB72Byo>57-d@B3mUkcI1f%YWg1K<||QvCt>RY;mr&P93-u7iOgZ@2@TFOik-2OyiHVHG*{PL!>D|@mRJbH~;Zweab#MkL~); z$03)g+*vICncrtJG0-Mo5oo#!f$2w2k9AOhRacl**D?@D^vkX71Ze&K_E}4%xH2w` zpF|n+8ILt4uMeG)aWmOa zfo1v`8@{Cw(NL;+spaHyNGY97rO0V29K!(aTzC!_LGm}5w-pIRFG|L1IgdpoCLZKX zoUv1IFq%j&q44HcYZtYFS;1&vG^s_fgeD)Z$wIH|KcWk_g?G&zyLow@cYC{77E)pT z{}L088V`bO)K2_^{*_n2%a}}z3Y6A=r_XKCeV1Tk!~ZFLX7X?)k+OyA1Miu8m-F%* zn+E2qr0ArgA+kdq#oH*h+hHf&x2pUJ2h=oB0!t0g~68jG2w zlRJ(iwIwY^E?`tBhRA+Ngp&}1hzW`DiYVJrm@v{OO`oZfuGqm!1|qLidMvlUjJ!!6 zmMpk&dW(+I8J>>*Y3o4WCfS0~BBWNc>bFC1o#pQ9>5xo`j7vB0X>_8kxFpcDGDN!ZH$9Z9IXP2yf zB52xQldzR*%#r+;Wm4tiu`gS7XLiB6{RXCoJ{-F?blQWVs#~p|XkX&U6b&hriT&fi zv1(*e<5Ua}&=VO6BgRGp519&L$xm-oz9RIgK{8LWH6;l*P*$0XtFm$)Nkf8lV0NJU z5XZxahnoh7;@&`98M6tSz^=4B#U{&UxTgUfFvE*#cixMoYJj>(1eA|40x@TW6pa)+ zcc})zwA#-GGqCS_Z_BDfN-(v?79_7&*34i>oW|0qtE0EYW<_|Jl5Gp85RXp5PiD~V zWE*1RVBj&d)ymf855Wf^Gx0Lj5V2N*B1Ev#87Nf|p|;I3RVh7uzY@PEma3ibn_H&r zv`578BfimY4$oAnjIvUwskVyc7VR9+e1xF)<@!l_=R%9h)I|SP&=)yCNfc`8)5JmU z1sdIf&@c*dWK*uoULG!Az;i`d)xvp$mAERGYfZx_@K%yDMGM~HGmk{&XvqRy$ZegEI)9b@y~iHw@tuMGS3@A9`- zsBg=H*D6|wJcM39Uu+`ZE_34kVTJla`@xEuyQ z4hD}yyZs+6O7P`$3__b|pC)EN0qHeqTuc86e91riFQ z07Xl2F}zaIX|T~9l(sQ_D1NBsEX}hM%i@~UtZpim<_~L&QOrCa#?02MyRMaaG4=;|*P9CxPx%?Gd&4o@-t`6o#%00J)0>m)#xqlXmmtAzljCI80DK0}LFU~Nd2JZ- z@w(1D``&Qc0R}rDNG0%hQ4G7qKdgV92-x}29!iG0vbN^!?++ggg`YLp`u}R8^q0WK zbE9f?@1hoRtB}O|Spevy6BAB6%W_Qr3F^*PHGbPsbyA48-CnRMX2H+%su>Pc_hLAT zUE&ROGJk<{hPeqZ+eUB0U1T|fcwWIkEQARDLqxnNKS^Q&OwWm{1NkZ2Wm_^o8s_Z` zJ(3NJw7*}xZ0e~Cc+|jcF2+TqQon^O66)65IfYMS6k|ndAP$dL&Q&(?P7%92 zm_e*!Vi|zuyVUWxPCG~b)yG;CD6a~0w zh3qMhl3V$OX*ye5TM-iuJ}WU9>4chvQoAQ7iC@owI-Orwcm<3${(*K+{bo&oya#}C z&V$_@{1+e+%O_sepM3(1JVeRmnTp6$*VlrczyjiJ?~7#E-5_;)M-SuvN@&=a0l~fx zW5FN{_>Fs{41|BxS)Q?JS`#7m?Zz<)H*wh{%tqk`GCxq8W6W2AidxL1W8Tm#*pKW2 zD^lUX=j|5e(06mbtA^A~@X#nWHpzb05Mo@2-J3~UYNwm0m7C3yi`2*j9YT$@NFk~p>G$zR;-zP4~eM!{P;vCXCV4Xz}WYwqh!89^p5O)@``8?DS*UiYdl9ipm z{SH=P4^DlX#Ko7(bU<84ctUD)BufgK0x?Bsg7RW54v2MLUYc$_PE#r1fA_iR_-pPX z5qYx7y_Jlg$GX;EbWbeK&rYS1IpfkqC=lN3QXi${5Co;-q-1g~mnge5pZOQ$db~CC zH?-HMqT33gN9xpmIcZ!v!pYVM(E0v_$HVFtbHL5109{x1 zD%`HjzLJ#Vy}Fh=3`K@;hP^X30B6VT#Ah?qGKc>U<{CyLDG7mq$c7aJOA38!V~uZv z)K?(DlzKmj2kpKJu3DTmuHq;JbtnTH(YNB; z*J5uXlFN@0(V$U*>kxJ8u(gXBv%NPI`*^secErD0O@qNV;54Zh)rhH+|5;Bg?=S=+ zNpI25FAz0`QHT-AL?KdvfyUM#E+xGg3D&ryM)WTwdP|0`sj1F0O`%(e12BfVP{h3> zNwBn{3Kt#U4!pQ2u@+WXA$ZLh1a9-IL4pS`nDe~`dgT)y1tXqOF5;P}?kK!+l|yBB zO#XWL3{uzE2dTv^ELa6R9Gh`Ee8tBYyjvl7 zt4JSKe*`Y^%>f&j6vLniYw`lkAJ{4iBgk&&m4jd*N}A--KpJ}*aU1tl?xi6lxRDqg z=Dk|3>qoUHTdxjs0d+ToflJS?o|e*IxkL!)jcMmF*wLDWeuO`M!2Yza7Q9!t;@V;$ z+Ltth3NkBh#CpYxlk zZ`*JizsfuAkrZBnQs#jNErWjtU>)Vjq$qgsI2T^u2>AsEk2E>;IVHIf6n_Um7jP96 z%xD^xgG4I+dl4-OD_&cY)an4iYI%ozo20o!J4LwCMS&jHYqkfoUqRNS`8{mxylF8G z6tA7;GI2&}JFhNpGV{8`TjAenSQXZQ2no-x_eTt5BAb!D2>#4j1Hhhk=5d#}w{zgT z`7y@ccMqACA(zA4^ZqLzZ8EBea04Ai>HnKZr>Er}5(1cu?Fbz=b7R}R`eXlC z5kTUfE&iYV-ahuA3#aYJS46D1d9zRw;t#j)++_$}!uGKC z{j<%au8N>DT)dT2>HsmfMar9#5#%m!D?>Deyk6<{gK6@DK7Ak%H+j+g4LuR<%p(?S zk-5`NNsFx(uKDhsb}=^zB-()ojen(g-DKYi`e-O3sE0&6+xBN+T`mi zrUM>i*MWD{1oG&`oXg|bO|8jY+kVYuFT}al#~QB4SvRj{f+?jhiscCOmTz6rib=QV zHbiV9>?__`k4M>{a7P>33gZn8W)}I6`pVhZwHSFd0`RYsMBo_~v<$lxte;wMh*kak zd9A_Zvk?agfY^IJEPRf;y1L3b#_s-;OkJQ^zz=v7H2n*6y>>PWDo(-M0$=qg14!^n zvyI+Ljt!>)yO!^IHUU(%4yO6~@0v(_3%97kF$Z*7fEwN2-ECK38WjnotpJ;tmTVtl z(ajewpPzz&F$RFpUZGe}x{|m${K;HUP;j@E@ISHR`1*b-bT<+l8Ym$p^)(h0`mntI zPZjy6J`>?vhlEbne|=2je%V~ex9r(`d6s^`yYY9Q{0hKLnie0`_m6;g>{O8*$qJ$s zhgZFxZY_h0vYl(hpbpkBmO_8hmD`2VK9-m%0e-|7uVoU=91j}_`@Er>c`;cDah(E- zM*eHrfRYk}Vm%j(4HcS_&nzkoGmSKNNRG;xkqJD}17!TRqlrW%z6^zj9AG<$JJYD# zY${i)Q6C2oFgGI8n$)3S7jcvm$~6iCvKR@nsD2Vt3Afh~uqwSum6s#Il)rcr<8Bzx z*1z?tg&>tuJ25F$j!B-Gi>?#n4y6cD67M=SLK=HPS8x*_<6X$hue)pk}&o42<$=oOagmsmi(5l(=UWsb({{sNoaU zq}WPgzGW}NEWjyWsurxFo%2HTRl>!JQFbILo{mwe(7;Y&`*XxGXH=D18i(TG>bjuA zE+n4$!7sv_2#4a7)R%$e0Mi4p9m^@DGU?4yBjs;o=g1;1*)9)=UbM0#7X2J_ znR7s&+ITI`yYw+|U>~*E{vA_*&w-P`|acp>6Prng;;5>h2)6qI^})CKIc8yc%_ z4cD(;*x84GSD7X9H*(S6ucX8)ZKk$zz#2d~0ZRpetdzmnX45m7AzUJHsG+nJBdK{Y zXwMgy^j(1K;*)BeKcJ${Ew^=>)MFHG4xOFNnuw}_yEgkmMKay9l*EG6{r|Swmz#Y2 z!I?EQOS`1Bp+Nt)OJKl1VM^7<2x%)RLygK0`&M{7xv+9sJU9$522GAK!@$GP>GDe9 z($I`ZA5j}$vAebCh4<@P=!xZPp;N#;KN4O0 zoyqm_m*_v7y{wetqE8&I*&)EoL#58Psir3CxeJ|>Lmxl2gao;Dyh>i@I5myDG0r$2 zM)hLwb)w&u!or{ZNV7Kj8nG194|@_FlgS8C6>=CEh^+yW74uu#J z9g55GWyaq2U4Yf?ol*`{3ESA`Z9`y~x0@$in>kkap${l1VN*Q(xZ9(`>ml%p7_{=h zy^evtdH}4yx@$kS@q$&H%e$9TaoOiVMb1h8<6!8Al@TX8ehHY4Mnlk%OG9uFq`-}B z??SFy=-_6V`YjIdCFKkj>l8XfH|)!vP4`?@re?GAkpjyU)@_bKFQYg2NvEgu z%Q2GMegk{)hD$|f}D>)W)ujO2i1OJK18?Wt@yZgM&x4 zi&Uj<)@0dEUPi8}Kj)3SzWzfQ5{UH&asoxp{%A^uQIu7N`6|>9?K%BiG>;^oGP4$92m7M8>$~JN_tB?O zWP8qL!l98tJ|rjMMI4Z+o7b;mfN@hufKw^m#?fQ`t>@a~*3-?ue~yRuIhTM3`4V8_ z0#t1a5V9xWfO~qlv+ z#rVju_*lvXLg|UWl7p+{(VL9y*P*?(ZOhP*pus|^;4$E@4OnttJZqJpY?YWaE!d+R zl;E4-*UkBsS!L8@oqW27(S1Aenb6tK9^DB&fQJDH#N=XBzH(yCW}SNj#&KFMICY6_ zZ>tWRS}-ofK+;PpfmMFEU`CYKVllu06!gux;=3P7?n3p|&D%eO@Yp~6DvT(h<8~G0 zA55@r!BBFktLuab@-6bqAt3uBlRn;=$?-(7A&JDkLE#lOU@o%1fajfZ2*8!_<%s-Q z$jV3>;`eiM9VL=yFJ%gtJUpcujSK#*b)GK!1F+xUXQ$$!t1r2D)Y1*ho`+p1tc*G; zbx$ZO`8uXTSHPC$>Jue(y|i}_FajBr#M8vXwyhD;QYlYwMNOcc_FYwt0+Qnfhld$G z>({(ryrzKLR^D~=oVR>ft=z?6=!34TTr4lJ8Tu`>7d{?vd$!t3UQuwnOcA935Fr7Y z4sOhwno=!xz%{1!a|m!c0|{i(hwa|L9ge(pnG>k(+&&i*6Y4d0UzVX{sQA`b_wi$G zo&i_Sl7{DBHy)4T=k~A(Hea5?p1E)Q3wihfI?v*%9|uX@RH4_vK6<^?gAjAO^3~u_ zL_M>ST*T`HtrYYSB_pF0Ine|xLdlFF8Hl<)XEjYMuv|WF*U4u*RvNc(E_&X7NKXE|r-Wl)p)(J`Cc@u-Ul4{a8fqLHQ zTU=B4|GfZA43e9Xg=9LIzZ}V&Nc)p9LNsrm|JRJ~CxdF8=TIytU!LxWgU$ z``UzsB~Jaa#S=bnUjyySyW6CziM=K!YkhziHQu3B1BR4Xek9O^1G^?JJ|hlS@w?v2 zD(0rO6sMRk)+u@>UQaO%I%Lt5$;Vi@$5`cX!(ZF+?WTlqM(9#W;Lu42LyaIaH@=>X zOgCZ8Pdm)!g;Es1WgpL;v%B2baJFu{y=w=G4g$_MAC-I|Jr_QPubj3JTxCQ3&9LEK z!sG1!O`MGN37wPw{uN8C({Q1N$s>o;vP4CoheeAWc>$7=k^awxi5I*Tew65fNRgqE zvIjD70swRSC&6Kqsh{dtHntSug4qISxzug*(-Y8YxbWlM7eq*xpCTgy)9C<3J>)^t zX^{?qm4z$#LMh%%&wHGbSJI!&ewvnbUNGa0e@*oEwF;SzJeZPm#Il5NAE32v z%s&)hDKvFfmzaySKXw@j*S13Ce_-KS1mruMp^<%Q<#-qNMxyO$Z2K}Xf1g!`$GN_X z0xq3@lJSl2lX@^ma&SI%$LHjghJ6D+H(!T8;~P0|*eE*3X^EKF69sj9TG#o-F~_^z zQ5_&HOwj^9vkpW20j2SqI^p52LlBx{nM}n02t*wy!9h4I1sLCl^ zka#0ZCmkET9TpLEkQ%)ob_2u|@8tWa`M26tqv#>;17lY!#sP&$|5X&ro2>4S+#MYC z?SBmaG``u*Y527lDMPP^>YV37&+}Xsnx$$TByv%4?CNoLPF@~Y^c^4$JP3OR1O2V#}hLmxPSWm>hLa_F#x1gw9Ep*&&FgNNrS>6?T%a_F=hXWXru zmakrivBe4uTwZzJhD`15x|Ta{@nEk=OQh)7Oy&%V@J?n}k7r}KOw6+%>G=ZF0Gg`7 zE~@i)rJnk+0-%W3ua@$`J&RbUXh0oJr?s3=a z5of!6rf5FzhPc$KxQz)JnjxhM{N_d7>UhSAx6zEAH=m|Yde9@8?$Buk__E(MB@FZ8 zfpSBZBayEkT8MDh+}w5+MWMs9cuj7kfvXKz2!Y$W^AW=bE?g-`szBdXI_2c=NQP=fcPQmzjv4Pz!f3!O6^2!9ss4xJ=}H!>-bwo@nb;jL-0e=#9z^n_4`>b5qujk zTDv$Pkgaq*CX8g6Tw-_CUFmXQuOZw@BNKN~ zJBgq(PBpeBu_KwKAeXHH)|>n%!hUp*Bi_=Jn~SfIX5Z>rZZMWC9>ND4s%tA$zGP+u zGw^>KRbxmHpmhSB()en8*I?7ARpE$u<*u&7uS2&J&cKWsWy^;Imd7uV(V>-RoK-Ms z{Q?ge8j}>lqT+i?ZfO^+-GESvRD)q5>d-MvoGlB0@`shoVO+??ct+USZwQcccv;6w zig*-lb+TJ*=SRspnO?C_LWP^M9VGM**eIT^ND}iu7$=(L2MU;_x1)26$WS4hlWIp= zu;r08%G=LGwEs9v%*y$}$JA0c4B(%OXd_T#Kg?=OD}1geF2sb$x<>2p)+hF7b2MC` zpji9ar;YBYFg1V2CSIYa&v1%1{A5t^Zb)qSzb&kowFW&JyLfb6^@G}9v#IbTIt?6! z_z~ku4%>wNA*<5uPfoH#_==2Rdpe_1!fXhQKTMfUskA(NI94?#2v2T0HU^zHoV{co zMBE{%!|wcfXwldn&aU34T|`8?p9zz02ba$18fz(=m=5q0OR!K8-zw=H=pEV=B}~f7 z&NXV~wCF&)F*D2R780yvmzTuHXAyXt{hS2lmqS*DBrpqjOm%Qp==Dv%u*hrcO8BKu z)6zDe<3sux>Bq;_>=a5T1Afm7tyS}mQp5>PPen{EOB708$x@t>oG#^G{6C(~Dj>=>+}d>a&^a)4 zNDLsILpRb$cPZU1F@%KDASK;Mhjc03-3WrDC?WN~e0%TzfFs}lX6AkF=U!`F%TUH1 zCBaTb>c&*){Hu%K^S=Gu2TwAVt273vBc{b0;eWU_&EtCB+ph?>zFKtp86cLkzX%Gp zocm`OizbF5#w1p3$8!DOXn>hVpL;S2Oam0>Es04_#yN%hl8`my<~881-}SX?e|8sw zz$R}B2x7muyZ4Un@4IRC?rp$jXzjth`}ag+iVq(KmxU|mqyGB$e|h?aT-c=7{sT=$ z<49li7u4A!f%X*M4{}@U^UU8ZNvttxzf%z5Zlp8HzQ2 zSP@=!u8k^9dCT1j8t=gZEOI${t2EYc#(rct)KBPey>Bd|ux5txMIgTt;om8=149a$ z37JLg2j`_IrHRVn>L}}BV<;S4clfFK6sh4;q;2Dvs{^F5jaR~0=&y4GQ=0T#-Cm=J zexO`4-Wn(pm#`D?Bvh5E`idTW9~dy{Fo0%%mx-a<7_{+KxTHE+;b>acPDm?fOWosT z%UQ*abtW76ek?L7OAHT(4${6lW*9ZchIBb}FsD590G>{f_v-SI1cJh}kdS3pGK}si zQAhNnP_+N0xLNQ;R5W?{6h8aZ;R?GH7sX%9_YUk|aw? zD$;dL8|M$(OeqXQl8fReuvz3>`(%-Ib8`sV1fUo~YGT-EU9IgqAwds+x%G4p3{yTj z*<`Tb&5rOS0*@&0RIwBe;&g7(KdUFlqBo3hx7vUtNfy1+^9m;<-b^-LA|^uE^|`|^WkLqV?! zqLdeulbLfX$v@j&#UJ;NsQK;)O1MFc-^}J}tmnGqkT>xt+a9yanq?Mhl@ZtSm)d13 zgziImS0Z#!kEYoi@dooL4F-8*MNbhgWBu|KOCrJ*SYUE9T6dKNHo72mOrfYj6f`1S z5rq#@>E|`5?&9pEs9NY=b2y-W*SP|t;*_yB5>w;oqyi1`G?l@;mq|Q{F#}p9R zdzTcZ2tLiOJpKqs5aeNizuSQ$+Zx#SA#=xSDC@GQxD@tVr6PXYNP|mbP{L;xKCrHJm_T-&C*-iI^Fct~}G9KG^d>FDxPkJ~v&gNJhzjo*gvUa7P`?;DB zS|pEdYtuh_Hr79pf;ELuq>?LsT1Qw8lsAqYQXf1P`Nod3BoRg6eMHqIBQ^o+CfCoD zH(d0|v3VAs!%Fh03FNhGg|Ys9F8o9kz(ilf&iG1Os|cfsxRa4zA%ogR?Gx*O zAijKLz6?%m6c*F2&A?^E$9jW0_4-Ilr$I-MsWSV>cR~)u3{9tBjBiat95L*BD+y!ywi-e@ESVGb)d}h1#+_?@WS3zp% zF9?lC$*y~CG&oW=M$5LLC8k-VTgzU!RL@?ofx3_Ns(@4h!+?krtrMBS_rRA=j6}>j z7SjrMscI~Gz}Fxj1(rPmkxSYlu*E?B36DmU3Dca`RFvj@&zhW!T7&Jb98WK>X%z3P z*m@-dmG9A^V#{l`)v1MzMl8d1v?E_oneetBmq}yRao2InuvLq^LuOA^hdO=poMHF& z({v=R;5dyjjkLkwVUsH=|EGpQze6m)T?Mm^Mbk~EJG*=3IahO#bau!#9$&g6POe(Jf#tvpvl5w zZDo?Wk-D{V-@ra#@5x*DGoBJzf+H6-J5uCbWs9-{2c(*VUZ;;CsT~j*%AG9faAn}) zAJi|84icmwUO(Le5RAm*|J(6*hrlWL5+a6&^(^vJBsYIFM9;W*jv-3Fw8Fc>3=5%nh5!Wbp$g0ZK+Gk;9w!Snqc`9quZ75O{X&#SA*kv%1r~VH3a%#~%{u^wqm?=)U)F-PV z#Xh=a%I)qu2os@Y-3@bF;yu2O?@tpsG2f8cptoV%3k%)=MaS-^MH2GDqz3YY^M=)o z&R-*t#9zH8l!_5UpLNKLyg8dbIxrVNDw#17 zAU61%eBW^51Us@rMA(*koy>XW&{~TXcnZUHEa`ZBH?k$*{8l1GFqA$x@b|qU2xCj( zcL+8QrTOK_>7%1OCTvJ8G(g!u!F!*2@8&M?vm4S3f4AX2#g)yImt+pF=dKGi(DC)Z&S23gsSVYSsJ>$E#i%p4m(r!Jm+Iy>c)HWG zJ`dV_y>@<=P4(y+Fiv;&_V)ZwOte_tC}ehx9lH!8C0q@Z2DN#S#9>5JVKt0*w(nvt zE;x&Q!G_YQQfEM*$MNrP)(?N&FNLZqoiD!i1iuh_-)DfNFtNm)NJp?9UWwe?dhixv@L4;>AphCaT$j}S%3|ZJ3Z9I_m zI&jQqKV`(o65A6waI3t(NDSG#x*~LFS~DXpYwBbeB^oO z;=maG=$9o-jn>?e8SV1JB5!Fj9jD7uPjW>w{FR&A>w=+(NZg-`t=+Oms7&Z{p<-K? zc$q9ynFx#ynOsz|kF^Sw@e39B65`@+0Rf}qrN&(ywUz)b6EK_)y$Ag!|L#tmm{C4YgYuoKuI zz#$J>@o1z%)l8PZ_sd10sS@mX8i64}fkq5$IA#OIH111jM=2GUI8i4i(g$V{HAc(3 zgXD;r0!Ae11Y;eb<(<{fWW*2foZ$5XA-q+^o?=bqf)wVGDq>|_c9Py~zc*m-#^}nA zDv{JH!cE0WPH%mvrHf*Y{Fuf4E`!JwG@=EP!&>u^;F{HZ3(2dc#$W7ceG&sZ9(yYP za!lw^>5Y=a{Rr>+rBp6!I`apum4J(KQ4CfFFxHp4Q1DMSq25GtFEi$*DW+pd7?^r`I8tq)U@r4#EfE2TM`FN-g`tL;c6(Z{+C{R!{ynSVT? zDdc{yuf&}Fqtq;0a13IF5V;ZaQq1MEI7~1-4S#+CiAd|k)*;nCwDw4GO zK7)gLUiEyG(NT=Wi5iOMEAYZEwnRZQcCAZzcWq5{ss=-Hc_3CER5%YB#7j+b0@+uQ zm{ECdaSS5+HZspmh5KD{{7{;rCv3(h-Dtpx(S>tbF`!1ff}<7Od6dZk)G*}-sgI49 z(+NM33LsAA=csbXB{Ve@rCSRXRK$@)k%i%N%2iO4a?e4fCL;C$BSW&?D$1{-P=X50 zI9FC>@qe{4JpSLRtZkVn7Ozb83Q$ma(?|^hEKi2oQTZ~)mpa09D@2Q#i}<%geG{3B z`Ch#v8jP97aAd5E@Fm96A;28pw2fWFa~vcXi*yXvrEj09`~o|C*KmwwmBGfxJ30b_ zSmh8Rso-+bqzMpsFiynMf^mDytN(>#f0Ffd;q7M}D4u32W$q`F=cP}Sq0K+;akZYG zpyy<#C+(AgS1KBK4d}{{)yG82gjDdK`$P!1@MH9da~&*s;9LR{s?hcs$-mbYTZf02 zjIbyD?Swtmz(AOCL52lDl)!!S#;x=3)_%)XVBtWNk!Su|wSOhtwLT1zJ#_?UipfWwcxS%ZW4{O|fg=g#;D6_BN>|aEE$`+08`F2ihJ1s0t zz-)kuLi+LV6}6@hI#~jDXa{(l1S53k=V*r{Zmv?wyz!76$nHy6>A#VWAv<6VYYJBi z3=0ji>w!0n0~DB%`H>qeBHR(Ak5Ba71r&WT%iKyE zi?wJ494<+O#(f?RO5{U$oiCCh^!}Y&pjj63_}Vl``3m)dUFa;81>P(Ipq?r--8fR<-ZfSFp`8G;C!;3$iQCh zA6TF5=?~s>kZhpC-gV}>Xc~=k1;alUPO%hdeE))hT()%&| z_Zy&9@cw^u0>H=K75gt*@ObeMuwiQY;Sr5O5O=tfa2=K}X)C8jX~cXr)u6bhj~?IPxx`cQ``1h~KLP*>G)_c6#StDS^5VcJ=*fc%$P= z=?(Qk?h||rh${gvM!0bRTBYzCXbs}W_CM8Or~?ezps;!VOk5(?pJA~vQV`-li6M3u&yL+Dy-Fk-*qr>V zOHSh34&7a!`CXsV4jJD(v}~R3`T+xOLYa(;+9` zh9^sD$u){gs^Qcz%fw;spzU#-rz?If1Z#5S;@V|d|0jZyS_j^d;7L^J;Zh*lWmgv*FY~zlzm1wxgFyQB+XirqSBD`~XudgiY%?NFkdX|Ln+p;Z?0RMsbiF z-A{9Un6~R{O|4MkV7bu8nQ4d=sp{tN7$eFl5f8MX$mOe(tkAy*E;2_XkW1YUXw6qK zHga+-LP84qq_=hedwt$Za5UIYA30j``8X!{QIYpL_N$iKr&P6mfrFp-=;3{J@m#O-0MGM~z`*WV@o4pjg=H9&S7UVXV zoo;qs$sPPnzqWfPP9MOS?vU86LiH+1f)=NX6Kd{u2iNoRL__}lla+z}Y)=3rE`Vit zdYS+PN-xjPr2ts?HUNaeK?bH#=a$uv@S}()f~!06XIboQ{vohN-fe+lpw|JXjKlZ{WFcO3K}Nw(O!JeULk zJ$*{Bz&;I}aH3A#dVkt0j{F1^2%2K=;pc_~8N2V)BxG4#s5yAB$gH0RBCZSBAw>%m)1&hV2m#GP3wEI6Vz!8|~HG?IVts*ZA=J5IiH`2h+kG8GS^eO;fu1Vtc-un?!2U8|SmVBYElYtvwv_X_`G$Fqux*z9ZS!RPr3kGGZQXSC_+#Fj-s^?tw6@TpfXl<28AAZ z1c6sJ8Z;QHY;u}M?j@HfRn?fEfk|jH2w5Nk>f6=#Uty9T7J#H!F3EYSN`nDXJ9oW|;)(k^{{pHh%jq zLU)(Mb}k5&5o$5sQ?KAXhS2?Kocc3sEjn7r>5lCCcpWXrYn8(iCC0m~3OqX!aomU9 zf>hGbV-pDE8EGya31=dr!|hSa;p6WKx>cJixes95^xT>rJFQ~rwwVC@I^q8E&X>WR~zHd5JOqjd0Z=p&}Tl2734X+qnl#fCp1+b6HeN# zX65vPEa$z7BxV1IZAZ{i+79(!j65s7#_Tv{7^{D?Y5_#U{5(na{L$MGab*ZgCA*1p z#9;0lKU!9YZ>P=b;wHyg@9$d9?eIDhY1kOUS%1A5{NX)Hmac!lyN&D9A9nZbQT;81 zL!GubygRbX<4P@`pc>hG3`OJMJ_LiBZ0eXR1M!ipR_!!=UxF_+hSM{xv(<5m9UR_|$Zp^Okd&9W9LL6klc^4GF?=xpK ztzBnnk78MjVJsL@_a2sYknpC>fB{;7GC5sHk>uyyWw&5>9)(U4QKB-N#jnx^Im^RG}ImG zI3zSlU7B1!7S9A-R4s3A)G{9X1eU=FD@rCtVuR_GTg_fO>tWMhy?qFLp7$STGY&F4 zIz%^sCeVY)yv&140wf@Y_sl6z=Qf*pdJgLyjlRF?%BJum& zUe!h|m4uW*Ex)!^q^!lxHkRO}?^4(sSO2km|0qLq_K}?OtX>;JoO_$xFo4K?*zZq5sCt@*-%B(@_lK;%ksJ#v@bcI zJNmDW(^^4t#;9S$byI}eMQ3`ResVY}dUpg$ahhQJ%a_@Tq0JHUJb@G8XJMMgYY^{b z-D5>0wjQ3ETsh=d-&EUpek@!G+Z}x<=uVTVZqkMDAOn=y6HcVUt-=iMvro)5u7-+6 zqg!9dO{-nU-aO%ual+QnvQ+Zc6e zj0G)4X|h{x#wCo6q5(@vC6|8mFCuFH*f%|j-w{G$>d1GyVbQx`yB8zA&Wl^?kGUOp z0WZ7{oL^EcR0;rCM%~TXYYx4KSRlNpy7~7NMp&Z`$=!?9fB4ZX+fu;tW7i{(E>XdK zf*LQH86(pU$V&jN=61kN;_!|7&D#VE^5=}u`np?4$mIAq?it`>?tdn5t??uPy`kP@ z>GwxuPw4u!W^R9flcX)na0N_2$2Odp zeZg`9?-c39cu70K7^dxjkrF*f6>e(8fK2!rnbxSdn^hgXYAy*uT;@#1%sln$V2|#3 z(VlM4ED?x#`-==BJ_*_9nV}tRTwO@HG*2TG#>f1sa_lFMalQePhHrvv{Eci+g*LK| zTdd#{C!8qm#R1xTXthq9rU)lv5#@}iv^3L473W0H0Zp-91Tlj zFq{xlSf5`Ro=0Jn)9zj|Rbk8&Jz+dUTlzPVjYbAWAix-2gkb5b&FM8awO!!U{TY=X zLMeq@7Ah!7>!6<4Vb#R+@jHXPzyLO_6hFbM=wfA+_(sNTR<7hIfiqGPj2%TjnU{pS z$w{;^4}|r&j!xmIg-kH|tdGA{qlq{MEX(vGa9GR2{<*JDdh)#+V`PTes@i@TQ`H_J zDl44qYBVnkP+8lK^#vQ{*Csa{@9hj=HH6(V6rL}r7mP)`#_*tzP>v_40J&Kr$bp4S zso4_dv6=?&b4Wp)mZ4gC#$`w(#UIM5z9x;snafZGORD1P2w8OB7KUL45;4BpXBPaR zS+Dtxkh6J*u0iyb*6*C8nFuLfog(9Zl}@WhJQc(PsI@>fin@X) z9TJLSGvsn1b#HtmPjAi2J0ht3x1Dkk{h+V-7`ATUlSuRO;h ze9D@*9%mb_YF1#plCFXOv+aoJrd6imxawi5b~Nd(x+~v3aE+IwY87n*qE|ho9ij-@ z@y~Mu;NxrUYTY_W)6xwsRegnYpA;S@iB95zDC3}ux9|{aG?`D@IYCV~QD;=vkGMlq zSU|`cm>D``uPVEHw@r23jAp+leo?$v(Hz<-X|sK2v2`l+K9pxMv)|KPQsUeIy+lZ> zU7>0dQYp$$qm)JXZGdNbFa``|h4{7kd0AlOs6SccdhmUv~XD0S@f%HNu ztBq%IBVW|+x&kht-A~`?ynI&o{XVq>2^2nsU%lE4dX0B^3q?pkw%l4-5DQX6{2YTC z!HlC>gc@(??82fvUV*7GkJh7wCCzRUp{p-sEP9X_xf&f)X|nUL>cG8Y z$FifZCEQ2m^$#S+U^;?3wYCy96&3)5;2ciD!#i;Lq@sGfdY@wv)Q8D3=Df4 zEN%Gq+XLP)YH_YrSF!onilTp{6sYgP)&2^{^)1Nq6o`quA#bHhtiJct9NO^v$%k)Z zn>1GULX7W{m!|Cmk2+(dP1Dh0E}1ueOvJ*HOC^W2B!j{Vku?1CyY~EzT$`G$9XKh> zHlP2C#KKs2<6ySiXF|mI(ipi0y<#a98QGQ_A(Hgi8AzA(FUn-^uNX2(HebllUuG%t zf{xahUpUAsjZd{P@LR5VthVjGnKO#Z}AI}6o1O_Bz$HmKp1L^d+V; zCKEIK>e?NzOz&)Mx1PU#ACC}XEkycp@-<(y+t!uEb27psJaj&{lp< zii{{owaRk1#MrHC*}ZAK>5f%kI-zcf$?*8k^VI%PZsK{I=z9F;^gjp&E9-{kbNr`s z{MTk?4Mx?Ef1FqX@rB7Gg4+-BLLNjP&z}C)-j8;7{n($)-A!V91ILOLjFv(P2TLcB zC#57US+LEp?_*?G%%8XQzuS7(1V#TyDEcoS-2)^J@UJT3B9O)1xtcRmC;-nvlvIeEM1ho z6`d4(q(G{W!3d8c#dl>RC%jA>Yz0V=3GUu_#UICU}n z^@5zvPP|n%uO}lO>E7LQGE8Auf<`uJwGhqi zjdidGy^74ctwNYN(}dn9333|sipXGZ5$G3{(VPwJ;#U<4Ybat2&GO+Zd*@LR=t9-wwWMqcB&5eMK_rEZVP>wtGStWir=hgNfq1jW<5LF~;M|$8K!7<8y zt0i0Hh~NH(H=R0WArSQ>>m9v`{qkH>xHT*_1Is~I=}pnkrSD}0F*Gh!La9lg)KgY4 zy~q^l>rF9D%Com(rlyOpYBZzqo|EXYW5*U7vEN^rq zmQF?>GIrjq`ru;gTF?rzlo$B?DXCeUw!j|@#Da{!B$Q*#ORvS8{&Dm*GyDdrd`!@7 z|Gc?o;f?DZxwybg#Lyeh!-7;mBEX|io?&f^%HP5(n*q15ECy9TwV?L&PPS9Z$=|^f zs`*H#hyt#lv6Q1QZHN&=rD1(lgfNeuT}hG+04*DhSVZK4h)kkzl~K`SR&?D=)e&R# zJIYm?k7}z(JIOgBa%7OIV(Ib?LYJX$Wb`3+0w0u%_9Bc5S*ALslscj_QS*o=!^se% z!6cT&e@Im1G5gBG>d}VScz!6F#gn9Z*;R!%c^gsfey1peYSkg}Vb){u3DYkycfMa3 zqGP$Q0t2Q9YqnVQHN;Gt+}|B)fc+P+@clkdsXIwC!aBZI6FR33xufXEp+PGmX0$8P zJ6fiwQF;BBI6!z?63d72Ag{(A%y04hoCC%bNnBQ!oL1Bq_9SS*>Kcc6v-OXAT24RK zhZFA~SKqdU`1+bY?GIuHU;GnTJU9p;;k`I}FQ2jd_DC}hL)xgM>t|RnJ#GfYJeGh) zQJ2lMI|-~v%{xF;&rRCZND*D?{Gj;Ocfu#GZT9i*<4M}vr($0Q4850MZ1zQXQ;?(m zho{dd_Ij$IH46N+itAxX@&vNQM2xeeL{UxhoHz?#8c;r_eYFv8k&g{Lf`odzYy60~ zSfIVRZr4ovc54#}=F}NEc);j#zU(Zt@qgU#GT#|DTR3Y*a%B!U;hQWr#tX9I~Un*{M{u?uJ@^+hX7gOl{Xp{hjV9L(|12zZ{1}y z3!9`Oo2%rby=C%}@4M#IXud*dc<%Xf8X|%hd3P;pVA6L)bha?(88wGXtMBq5 zMKQGw11;EqAq606F)bpgxaGDMmJgo?nY?^;^}jIdH+sl)cMx>6 zqa3zkzR{5)HU2Hj3||g`npDc;!{OdYP5KOpRVI9}wtevobbLX$48kcmf%tL_cMz-bkZZ~#AKx^pVHzxatXh@&2%KGkLP3*ZjfIZ zz^NTCk>r~Ai2G5#2pe%)@V4gb4#2S_oDP-mzZ)A+EC)4Nf5-6P-h2JctK^?*%@F5= zeQFQ|d>5HYL~x^_l0zZ_`;lj@F6ekEUfiszc6mU6+ncOYWOM4R$WRvBJ3<%+B|%TI z33~I5WwhVkmD}X_c7jGfa!Y?MFjTO&+4Kc`k8(~0E7!&V)cT@am+pPXj$IxTzQwU+ z$8mNqfb_siBn$d|-*mnIJXY-P0#d_&AphOr#0&xVn>Q`Hc>rLbwX2Ky6QA&dq1bY= zLr&YR?{IC%CHmv!(=W4|hYld4K`WG?2~wj56Uwd#v9U7By^?x`ULb3!U{XQYs(bW0 zxqRyt2rp&AO@&`eVBGK4=Ui;RCiVHW4E{Gl_jr!}=vDs0_3k!p*VURF{R^exC*@U% zMb#@A%vAQM*uuv8f;g+Lq$dY-D2av*-HL84D{UN>JBNF&wp*tS6{%23GYdX z{BaP|VIS!iyxTW|$&hQuuaHJFrrkUYP>^l$SlPpfI}SLnI!<%qe0%*`oelq5!3d6Y z$~*?$i;aNZlU+@2c%y0|JxUejrA(JVL)wGH)gKcDI?&X8q!EV)7XZst2lWf_;S4uZ zqYo_+;r}daRn@WI3#s~h=H6BPp6VjRaG5%xLZxDH0q+GCQWkNH3p#r^(TY#nP{l$7 z*)o)TyoE69JmjMOuhMI__dU*yf?h2PZ)b)+RWrOR>~aW?w{C-GZB~T7ro1oR_LzIxiYGXE!?wQcX;F1K(`~8G_>yFRX4U zc;$gz1ORM$24DgCjo@x{Us6IKSM#h*0WMPY$?u0pfC0(u6@;1nkV-L@{k6JM$UxlF=D*}3A5G|?`n=gdcUh3gDO z;XZ;9Dw`E7(Dxh^581fsaWt#ZfkbRwh^~L^n@~Z8*|uaErgD1isWSTraFnKIWD2|6 zdb84}SF;S1b+KwHuZu9?QgmCY7?d$e@HqQ?(teD|$EZq89iY){$*MOi(Vj*^&zXJB zp~Tjy4`xJy(2iG2f{kjZ3kN$5R%ll4H0dTNHhplkFpj`}^&Rx@;d+dcO zWzrlwEAjl`wPSb~bB_H(1?3djo|sXlE;gIj*yXW+!sNiS_oqLTOMqgSAKo6;V7#Au ziD4NUIm$bATu0k^$(d2EARpQ8houVXxrimaa>8WVZ&4A1K=|SVSxR`^s{vOMRqPsw zw1PhkrU=A{u$q_`jaS~qt(#L*>wHk3zqfjDYz#@n=Z}X*4(L}oi@x25WFp!vvJcZiHg8?RJ3gRVHh^b++a&#b){iXtRJ+ zOvDE`E&K%xoh`v`n7NMA05e0>6ICN~ZPM3lq$0k_s!H4{28W4g9`5KYvMYo8zHdt3 zfTYBrT)TXJ8Z!{B7ehM+Mdm>9VM0Z6`ydh{XBq{CMoq{S<@@zZk!RP$O}Q*eVsiUx zf$*;Fd+>u9$_HqHo@bVMR$T+M>TDbKMPi$~2cOR67Lk622=Biw2apSiMxfftVJ2Pb z!5^FXcj5LsDHu#WoD498f}c)W#HbT9L;;Xcr*AKy+qngMUwyWQ-kpV(#_;$I+cVB< zLsMcj70zQ6={2>gP;-(=g@znjj=m(8etH2~e9ynmT%Ko*G+7u%dhrl<0jsFARkhjU zrCT}(knbq|&ehfRc~-%4Ofe_Mjd2+^LP7X83>MFr@EwGiD~PDol&TXX!WHd3#1z$F*NEQE0jG2uO_=< z5rcl!?EVWJ)}I{+Ccpi2-qU_Ys5M3n9VrgC9}TuxP%f$j^Ac!Bd(vz$i7_z>eo%Y| z(a~`XkcUs%E=3$fpW!3;2&qF8cSm$MNErmsh7e|(hrNL!2Fp}okzs4Gkum#B=g6>3 zX?bSmFZ+?s`>i6h)cQPO?BnYEVK&(je+GZTTaX>NPcpeJ%lPBYts5e}|7l!Jd_5Vq zzTId*y=dNGCuY%r_+_2*LO#=b$jQJgVNCg?7z>KwQwa)dS`LPId8lk?+BhvM5u|N4 zcr|}xRP85a8lcl$(Qor`*_b4}z|6nNgY!7+D zajXU+lqV-NA-;FXA&()Om)Rjfdo6E~Tk!3uN3)0^3np3}*kYcoQE4O`sPNe0&}g_L z{`QA0ras^D5xHUTSa$;0Fvgrt>hJe2 z!QIL+83v_I6ln?HO30~(rLqWqv0O+NRs;Syoms{c)1b6s6s2{+sace5M- zZ9_}|QR3LULvno(^Jx$$S$YnKJii-GR$BnA!xaEMyn_c{l zdwKk#*5GY()k~^>x*DpO^YBYgfY0jdFD5palbc`hUu!IoL*4h=2f*Nf7i069WxEGc zlEPkR!A_+Z(L;OXpF`WY>2noO&K(kCWavN--_a?gs25R7 z-}RQ6$T7GAFW8{l14-n`(e1NM_CNi3aUub`}RUNn$ ztq#SeCtCRv-(R7Y1gs!PXe}unt8YLp-b~>8EM(REhL0uR{q-iqTnTf?=7q`VmCaRS zq!~Ul%VWY>)hh|;Wgx45VNB(bj?#0+Rjl@-3WT_^+r(eS$CyBk+SRvJLFw>pJf)^V zWR}khePSg05aQ3bweyEf41BX^cvX7|T~AWoK?%i~$ADBhY1We|Zf(>LJ-5OMuEXJ5=%OSt22;HS_3NpZP>w_2ke> z#Wbn93z@AeRmV@F!}J2+l;n+UM`ysKL4L~M^5YuZC?$vle>F(fS2*?M@af{}iM9)b z-8k01Y#a4^q-)JhU*f|L5PcA%*R|&%Mgc()K{?}SgyZ`g1XjR4LTP=WG!NEc%8;z> zUriCmc=SANR0<|@a7uGoHCz2M_DwR#|8ajdZEH&fTkPRm4~&vx*J0F#h2i9S?+|VnVr6y+1?;gbt4;T&PIvIH<6#kjqOSmMK z5SZ+V9VkMFF!A!}p9=am4%rpCJ!}1acT+}Vgqy*>X%UJAXbRek@UOy}+BT|Aos2~& z05D3`*=E%Lr;;#1l}(-znVkTYTg}`IM$vqMzqvdGjQm}aiJO+I+{*IjuGiRTZ{+_f zvt_nfqh$)Kr$!T=q90rqxcX(Zt%#%>|8NH_)i4BUw-f)g1P@B3GX%v0r% za-_->p%&%xA-CK%u~oN#j9b^2ZVlX?iTRi*@s5M*E0^l_hEcB+eomAf;_OiNQ5KQN zlH!+%9;He#ZJ{8ho{8%`Ve!k5kC(J*jrp=duWj}WIOGc*t&dV2xZ;DXzYpnPU2C_+ zLG!#UE1=6*A1B4qVtB{K3Q)nHn2GAHwt5Ynkz%JtE`3EwaR#cB(N+<|g%u$ykY;Lt z<#6ismV@~1$J;md9wa?rVhqE&Z&STS?Y$75F&t&_vgQ~UlHymT8UzkO zG%`|EGMV*a(3>7jG-iGGPa|!XXqMwM#P(5usbJTG@Q&0mUdN@h+AoJ_BOc{v#}A`hAHeyJL#gc z<)nxgip$l=TBf`02=w|gdG#m;G_B*AO(ql|MZ9!kWSGl=v0>bL*1tmy)Dzyl2t zok4eLfRhiXDA%Bf+3fV|A5G^zykpw^h51Ca_Q?q7%}WL{t6Wd_za-C}?rW)nFULn# z@x?z^XJDkfdH*-vHzc8y)-s%&9!(6Wd6^DXVO+{GSg^<3(_;JFv-Q&eF5EDka4F&1 zcCzG|Oz?OQo&AbA7G<&34`}_s`S;Q+WPEWmu)hQUO!(hGeZrWvGLL&iFBZO+^?BQ} zyGvi_nrNGocmG#GK5b7x%tb}zw0hY8 zLb1O-juEu^vNza~gdMPC_Vb9vDXXwej(T@64c&aEnZyhcZGDNj09b9Fg)6|* znVn7b%r60q7NQ+$)P;y36NODnOr}7RNklVai8A79DnT(5ymMaW+u!Ms@ePhK^<6P zTi1drzdTQvp-_gXKz>A3Ko}~&X7JMRQn+a-YA~uW>Jw2Ir|5y&hO8=Z%eUgJR7xjd zGYBV~Muwiu@zKi7^UY9cx`#z^`iMN(#nz7w$VONcVfYZ%SPg$+g@qDDG64##wbzXk z<=uWO`#x$zlwnEslpAN5Qxxa)726I%xxTgwBd?H1F#rA1g=~O^{A4T>(+)Ex3o<>5 z6zZ$e;)$GXguT77LQbQyta4&XWhld8wD7ttI6sI4(}1CEP`pG&If-v+i{H)2-Kd`6 zdv0$lN#N%qwgSV$btNVkXBJ-tt#>!RbzEx{KTEZDS*_vYAo1bFvVSN%`V%u#Z_>20 zg)GbYaXs-cmI0x^p1(5vcg1i!@Vn2xY8aWvSmNyA6@!ztl1+;je-xsC~r+33Q*}??m-Szy<$FEWqM3+l zK|4{zgMT&OP2}jiL4c2Gr$5kBy9BT-fi=8Yb z_cqd(6TDTIgp0V3pYv;W2Ls<>bU#3n60~QZY+eR=EQFK@vx1&L^jAz!L=w>&MPDh- z@jK_AC71WjV3`pr0^;{ScmW{~(z{++#}`~0J65@~^6&i=$i&RNV=wo<&qvVTB2ME_ zhPSVtNM{8Wy>u6xbe8&&iL)xcC@hfT;YG4Le(38UElr!?>n$y~eIS-mxjFq^n|eEp zajzj$D57T3C!c!vGvx{vi5mMF2HC4(UJ;9obsSbAJDp7~ViUz7hOt8xs!bXDUF14i z$Aob?q853?xOMTeqB59@50*^^B3BeSsa5klfS4g7&>ac3UiKksMfgVcEXCvm&)S3o zu6=On3xuP`&(<`Yc_LXc>~JJRAT)?@rRhvk8Ly&BNOe+JQSf@CopZ^7;9=p6+Oxek;sWg^PzaSCRmlu@iJdh%Mz(0Q?U4X#G z!=i3#9VdLV^*(eBz{*1?>I8zQPAM$}xLh*07SnwYJ4SsrOZebYo%LW}Hbae+ z#)x-##5~|mA?~GDsgKiU5+%u1#zsnHZO4D+!|$n5QL|7Ojg93+W&y)R$KcJ{wptk< zciviy;Y~u$+wc*nbsbgW9;-*1tWS)ljJ^>8c9T$pmxZ&3;u}jTlGL6WzBQic17!6Q zl!}vN)G*jHSQ6g-_bF!S$1N$F2;@pKW&|-}F)wYP_6?q2bC4QOi8sDO&9=)w1L>rN zoj~PtJtBP0EvKFu9_xR6eyxGG*bV1T{=o;hcbvL1%^hHII%C<SCi*LA9j6+sN2*dP)8*(ba`?pe2)|$ZcO;P%M9rfIwL<@T-xF{Y91s%fkE}nx zeS3q51KoJrpU_mIOH5d32nE4u3G!70^F70?)90iGavk7fel4)91SAf5N>g=67Bh&E zQI2%YYJA0fd~gM_<*Ntp>G=xbJ>5<8-Q#8!3w4dlrW>I@;$GHofRvp<0f4gp!BXGf zbF^t+-g3;efPyjK$DMEh_{j^4ApdwGY!l-ZYfUr^O89qsU9rgJ`$z2J>|>0{0s?Zq zHGYE)U-`H9C2}vXJ|5Y=@xRCasH7d!PS`75j}d-VeJ_0YbXl4ULiKya@kBuY;-s@a z762m^$doQDEF8b{?pOp$A6B0RV*OAKJB^1_2J4L{Cgx6@xYDLNmz64~o=pl?cZM?% z!XT|=F*gE5B7{~)+nPa)rqSpnTT43xnRqgjwEI%}uFH>)DWUL%I_7y5KMR8-ANXoT zo1M_>#rpEh@vDbiqsRI?U1f1%P3FqWcj?NeZN1GHgAqqVXHkMJ#PJ4>$)HdQ$%imj zwm5QDJk4oCW6V`u$oP+>%cav(I;hOtO_`n((`-s*On!o^Wo2NH!P2DI!*x#GuEI{w zOLsgVUx|9UYHfWecgdnies4)Ghs##PUZxw800ojgonTpR|c`1Dm@Ow)9c$$^y z=xBRxfF$_9FzQr}YdLl3GJQCQ`}^kM*g@VTVhYbJG0|&>P!~$7LQaryr{_hO_Haj< z%BDym)E5!ir@+#df!z*DXb17N`$^EZH16cEpBpYG-6SMp)Qg9LLDaGtQxbXNQFIZE zlVUr6w|AN-AcBfT^$%t|-UR6`;ZY3FKNZ)`n({^&#%Nqr#uWVo^W zX5f)?pmF}*`Lg9i2}9~szZER-ZeTFjZRj%K$GhmQefX7lq%9%EKrF~{JI*JXueJZ| zB$UHI9g!m-dXKo_$#M7TgTK@PO7@Es2}e;ut7*Mi6s0VSkm_VB8aKG`5K@suNl`M3 zGSp>|7OX_0#UE0OCa9%G`)N;lEY-gKomRLcUTPQaBHR?P0ZjJwCeQ=0V@& zISRFQT6_BITpBp0hNjFN*+onT30#cn##pILq-4#$>wQ^s+P1|>8w&g^wCL3qOZiIQ z&llpH>;HiN?`X_B&FR>sj`e7e>nl7(*|7W2yJ6MIvNZI*{F^6w{XEynKSWOeG?J4+ zQX9oBKfFLvxx`4ck9Yk6@r1rhC_zD|AW2(PoY$bFY%!7Di!;d5M<{W?_)zw9p69FE zj(hs=>Xh*6%UU%=@>$mU{f*>Mj{XE2zWSmuTNa+!^*uh1eL-422pCuMYCN~esiylu z$vCC4J0~&_6RN$dBNKP1B~ATb;y94h?eR&(u*nBvtY_R=q>{+Qtoh!#VQkGn*Cc;K zCt-rlPOZy5j|3r9*v8KtM{#QK-|)622pSftx}%V59Y%8q^PF|F6J?=HX42Hj$^c5S z-x~x_z&ZFF9~}E9&kA&R+cS$k+Kt+_mYrnwd?jYP)dB+XmK%wFG`FYM#&DQmCsllbf$1QdD z+ZnL^8gs)a5PTffv69GPF|tB__KN-81*|`051tx1UBSf9gjP#7V_hnJU0O=$@||zEg%0ik7`u4Agw=GtXH4^d1bypzjXKH1^}Ya#KInwhYZfg# zm32TzER#CXlLN0ZtOr{;AtPqEAh0B&-~#eOYGSRcM#FNWx!xtO$^p#}7-X@f9eCEM z1-|os3FCAKBSlfiA5ZajVSK;V@D0R*%`y7GR4y&MiXx4$u)+eX?N`RIF`b3v)0359 zkSc_J*iH7SClK6=PLzRn(oc*}*osd@GQ)csuuMv;vS3N)w}frqZU(d>ej-CCP<y8DKz+qY8aQ1#!g7id1a`-ml1&6I$_>w zf7N`My!d|avs3r_NIUGb2ULUso)A#J%0&>jzP057m^uv>)9+Q?DZt=#I?t%%4hg}O zj0Pft+1$vLm?sZ51N1^h-@{cNJq%8aVifvItFZM=CVF;Cj zyQX)v+h|M{j}$FYny?-uI1BYqrM?yWJg~~cb1B^{njpHU`G=?6jUCIP6Evu)yr0H1 zOxH-)BV3=JAk9w7QD7|?C(21ffPGEQqG^5_DJP@221(!Ah5JE>iDs1Xa%%S4$Z3lJ%Rq zk3@QLVA)yK^>ShccQ9MnqWX!HY6eBsN*hmcZm8Ck3$@mfBVvy$bBMTrZu$-)x}M%0cYGtwlffkmXF zsJ{cb_Yi;gE{EisDJ$$9B-_g#xU#86Cv#gJ6_t}xn|#k(KqiE0G%upn+*M+o;cbZL zvOXva)nai2jd+Ru!nrQ!YY6iRo|W_xcgv4RLgwxRg9ZCaj8(Qvj3X)U(aVP->L%l< zFoPA8V08`OYLFT_qxa8WrhLsZvRTlyLFnw3(0(lUvx;Aoe0Vt8 z5!|u`e3#Qx_;35A3VjNX`~9cG7q4`VC~f~)ZGC+(Bs=iNJh_&MipX95?oWnzGf{bB z)Af4JbN=A7o}Go#{k#qC_Xddd?A+WSD*w-|Jm%TOYO(D)h#ocNO9nd}2BhsRN3Vi^ z(d!TidM%wOE#ZPLUt%bc1^dcDpG$-lB7ZXfg!U$2l;c&Scm!wUXmiu9IXwx~>oz&$ zz-t}a$5HrN4|$gwm(H&)8p)Aw-d;gE=E%g^~jtRe0pF zY6LF1etq-vgo?8nXg8+axqhMbYLw#j(ncrh`j9w@>Tpna+;Lf764&42fQ|bmj3frR zcV)A?9Kz`jE!iPYBD)AImBB@AjnE!&yS61MM)6h%Pqt z@_SrhgFzfTuEp#~$sScC(Um%Sa#U^nK>4}pG1-@0=nA+4LG~h7+pIEuB53LY^wL9F zxky&3O^9;oF?pq#-CL+mo=tqa051F3nulih{#=-v*i6v_Nv&KT&MF@bZ8eN>JRgfK z?yoKuEbb-7@YrFe(JNRDOes_5iY3H#_|Z3`X_-!o(UbAm(iEj^g*U6sl&@#K6Pyz) zx!-RP00cu}UjWVF{D1@hQ%IbC2d5d{tA`ym%M`f;NQc>$#)NL8Hn1TKKG`^Cd=20U&rRzC_Lr9bhbhF+W$&<>rm`$ zXuUGbhW2p(@)etaVxN(7A!Y^FVZXE;3kkG`-FmSMF$g}W-K%8fsjtS%2OfFCF4kpH_iRhXo#mc4j&!e%-A$l<`ade zG^WQQCd`|6;%3>YqGNKN()o>7aRn$0vXBb;P4bH>z&Zl0*vLMOtBK=YT?*m+wa3k8 z+xz$7`6JNT1#1Ilr%6YSeYMG~gi%0;RTn1W-8A@@ z;U)z4IQ++Z`u6||oQc-I5c*z%{YxI^LV%ZUtq_K_tJr|yc@Tv z)GY>~=0&m|p0Hh)Lv4)i6zrzwG3-68gEX+gCkl>VTl;N2OX4ap&nE0SVY1OEvpH)N z+u}GnKO|ldzXRuwe07R8Z8f`M-;ig98+YgAAG7=xY3ayC9BI`;I%?9-UR24OwHOQG z6LhiT_)<}~(Fe8k%CTRNX;4JphU+FpP8mxAby2LU6%FMq7i#;hFN!R2m3fcyh>T>? zYh3L6Vwu5Z`_&Vh-qhlY;>iY1J{y)*^@UN+3O2r(ut94c?)5$V(u#n@W6Y9h$8_Q* zsywr>-+niJ*de$vuH$59!BC1=gGgx&qo$=6(*aheS>;a;`G&bf!Og>>hMGpw^OKk5 z=i4@N4I;`afCS4hq5lM{95ChjClBiB{%HRLqm0-8i-cV5LiRJhuEZUOOPuzUj?7l2&u2#hlTgN1Wia*X%m5_{U9Yp>tndml9LjyHf{+5O99ad-$nFFFOO z3KO?L81~}b@WF$$OGzyMD*pkJd)JGA0SM;6ZW};F?IU#Lbo~B(a({o{>fFE2pNA&f z=DUBfqNswW%W^OpFIglB=DW?|&C4wVe1yD4*a zGLQSh-aWcet5M|gWOf#>{VET3xe+RJrQ%_T3PCN{M-_!XB_jp4F6Nm~&D`~~2}Qs~ znXoDtr-Tjiro|0#?4)iN%YQ3(&KJ!)OQ0)jv{6@6c*|MJ2o0TMACqbdSrW1QGF)}i z)08|_InZb3Aa~M5{Uj@@7E*4(ULuRae;J+7Nzy#vF455#NFE!AVDY_Cr zs$MQ5F-T(GC^{L;xQQYTHt9L4IrYFFmhwDP~1*?%r)F^?^1WQ9Hr^^H3BI_Xg znYesd_9;<~x&ZBmbZU7hC$WZ+ZnB(OJ?&u1Y-rS6!vIklBp}Ts0r;L>pgvhWs~Y5j zr76%LB*7e>ydtxs$?OJx+>!W;=1EM*0FjKt)v`=o73VK{wNke67PE^?85q_t;7$H0^O8Y^UO$~xHu1IKL&>ND=BzmcQgo!!B)3)_t z917kV?rGR9VKW#jVcR}f3X2QX7wOgqTPbjT2$L}X{hv{tb%_~NPf@!4-7GMWXyqf( zdl#U9#r3>XE8~xxL(N2SYu7;Y)R3;l`GW_ZeYc6< z1KayK?e$s9-V zn%Q&6t_Fm>?2k_IS4@&v5CL}$oOhm0=hx*iIs6e{2ZO0DTN7)~RRrVj1Q$0XJMK%T zjXNJtor>>%KhEFB#X+@peemnGsO`oHR+%6}Aj4{hf&a=Yse7YoyOc1d&f`2s4zc^; z<{izy)Jz#gK#^h?BFFhP5X8DnL)* z-$X_+9>(4wyGYm&Pm2=>rA9wQ(TQWHIE@r()t-B@SjMW0;mic1zZHn|#_#W6Wa-b?Ue9wSq9B9h%C;2*wj=XK1XJGUAB9cx#lc$9 zp$hmH{sh%*?!%-U)OBUGWiDuG!$DU0a@oJJppPfM@62GFpZ4JM*xMBy?ktS?e7(KYwWu|gABKKq6#q}e4iE}zjuh|Wt*Ev?H~7t#7X zPZ}|b#$y{A<(tGEV!Bk_Z*tZ^d=3vDy3smQ;a2E~(Xw8KIO6PPrZ`YTXiFv|TmumO zf>IZdc>gqt!mx?>4p5WD|MBVq@VkH<3-ZoNgN1F2j`P&%L&DBoh(Fu|k;0O;m*?`$ zw_Kog*8c(W<1xXfxl# z65%K0PoFf0_IY5(>e@`_#`w28$7b8&e6e9#a_j+UNC7%q-Ll@P>B#pTTYp1 z=1dFE%=}kW9viL7BSY}O$`tv-3yH%;D`5)!nue(a^v1BrA++zA^Ve5nMpE$XGa`~0hz5O))wbl1Mdh#A# zP5_?v_3_t_t>_OWmy^e)DuMFcuBYx^MtEI)ec*xs2}CKy99qh^Ny$&r=#x`A^ zO)NU$7{{2V?6M8f1?F7O^LE1hS+!g($J*d&RazfCFparT^Pq4+nL3hZE-jtfu`QhH&!zFz!D`xD4r$C;cL-i9YlZ0M!RYWHE&l{7e!ONBVip(Y!qG_E0KxJVj`X{P@{@HgNsgFvl#Xvh{>?r zP^7N}?JxyJEvB$%lZU5#f;-rC%GanerP@`Tsh-M6k*l8KBLQkP;%F*^DiJX21sO2qJF84iii70fkg1~c6H3hCS)61>1R>2%*XmR{LQ`m5 zsDkmZQ#<1JiTY~1PZ^-8t(+z-6Gr__vx6GPN^EC_U;eGn< z9TWEB_=; zgW|p#UHXJnw$sQ+~jWkvaSaS#MPbC+wjAuE4EM1@1h{@n#1kS z4D!zLY;)9~pqlL)=3o>xz2Jh7hs(N#$XwsN!@D?NY(N{BDgAo7aZal2({~k<@q#ZZqm%sXVIf+Q4RD%E9+jI47&I!m3Bs(@Va&qNQzu)xQ{wP?Jj8+_rs+B zJe+RdJ2h1-q#fIgw6H)xD_YyEpON&h!CoN0#mys}e~s~fI|%%ES;7B!_<^6>#k{%B zejhHK^kuFfPS0P5L|imm@DDP41LOs?uUF3{ku4z}Vi+FxB%viJM;#L}JmDq0kK!$- z*J!Ygjo{t~Kpvh?(f75S1J=2VJe^n_F{nRae?l+w_saD8+QgNosX<(ajF0_zY`AaF z`jI`JiWvdsPop7%ZhgEZ$azT0h#=4CQ3`@X8Wg*2A*zQ>PrRdbQl6|;X%e!bFOiMU zS28B~c_ zBV{x}M2MYk+AJM95ag49Ct!p}EfU$aN^E0$$@g|%fI^m z{e}l$GuriCb^Of-$0WqC_v$&x8KVar9Z>&mRsd*^DGnfG3hdjY9b&7Q;Yk3*h|zjF zAo_YgoSA*e$NL59J{+8bY;LC_+qE|B!xg<}Know#06c$T6hq94Fo^7njbROt7K4e}{J9 z!k!*)=aHYhUA|AAv^cbSwSu~)cpHhnd9YM?#4@}5k$&0aTo0f?YH;kAy*$6gxe-^= zV_D!BfUpWWxZ_J9LbpA9jtv;v; zSCx1YmMg597-p2E9Mk#R9&>!Eus#$WR|DSfR2{>n@lWJs<~Q}VK)>5Wk1yYSEu=XvE)Ut=3p;v8Vb0p-Os-$}A*CS2@Zs0q*gOWq zZ3Ioh24+0CHBi_Mi0>gBKlLUDghCR#SV%KyfYZSqx30&aY5te!f6o9PMl?kkYG`uc zN1VX!emLP(<<@JM!N-P!=haTQ?ez4hnc!Y0<+*?hK4|eHZt3Ma5_KD_S=~o#lVEc2B z$`S&SJV8tm-qIDrJH}#?wg~{)`{us{)fsk$04?M1D`b*Ts*c#-R0PDV{S=BT(&I%PVCRT`00+5Z91e0#` zFML<>gPLZodr8xI9;(k$LTFM!(#y)$2!s8l7hjB9xqt5Rts^(FZSF*HjS{sN3NoNX z;*^9mM_W_;PK>yb8nN&bXR>_ehMk)F_2 zD2K#$3GqcAzRTrPXIFWucaV+1CWN?CrYE!~=>b#zl4NqFSow7F zin^ache&>&^6_Yxy}FJ`wUa&(5h9YOX|Uo$#fBMHiDeakKI?mv9i>I{P5@MbRVvp+ z+`mo&KO_iuE32al6 zi~q;%y9AJu6aZ!0A>*rOeV=g5QmO<7N!VAazD z8xE%NC+kP&m5SJ(v+IXp*qh}#yHv}y(h3_qbb3VHd+~O{^T>gEyLH4B`1f6>7SUZ- zSP{=GECF#_AF6Q9sc$?2<>$p^%Uug{Ep27P_v6j2zDRHkrJsz6+VClH6AX0AizH@#ex)N+&~3rS?d;y~#5lE~af)&XlSgeZCTSukLmlp$ z__!aSuW%n17wYeagg6&dj@>=B;tKdO*>GMod$D4}OOV>1`FiJ&JE1#!+eI#x*R2j? z8B*Ll%~BV$GP+it~Z*PFm-selxkU@yy-Sp-r@nEgIt z74xka07#JsmA>5=Pxt}-^&;K4a=4*eOqRsFk5%{g!VCGCY6AI3kkxy-02(5v*LGci>(F|c$e?+9k$?~ZS zxsnnkSVdqqVx3f;R=gkcXCTQ@ITA34A`9MCe$nJo_x zD+u_!2Y{ZBoWqlc+v&WAsd~EgEke(`XYh}uk2m^{>2C&?nOlC_thhhOpCL-n6(}>f z^|*~`^`~3mM}lv1L7R?n;b0f|a z1^GU(W(s`czF&q5TwX88Yvw>?aRN`^tIK&Uf9geESnM6H@b__cjRr95CrqL;pl$LfgW|4O@#C z_=r#*obUuOUPbiEjpe}Xz&D49Lo}o)VS(xp=3#3w#Z9Z+rf@Ode+Taq`5_T$oJgY< z!~KsR^8v^?*BCfy;UfK48=s*H$#{ia!n5lzOw{0-#tMEh2+4O?ssjdYtldh;I$n$7 z27}4T#gUzH#52VqB<9ztz1siR$JpOG?g8 zA1bpii7?;i3yqC1k=L`%@li$8^XVctc%6LsozuQ%9iSdQP1G|qE(+JI>Dsq@#MYU8%5Q&$<6g1^heUR`KM*NKVG{1H33EYJBB_> z_sh$gj(slg&WVHHnE$iGM@sJd**DMUQ!cZ0F+yr-PPo_c?Ey-sr?;hT=bVo`;Sk27 z?_GTH%DYcDhhJmqIz9eLV}NQ?BM+|es->jsc}~8FK!JJ>#lr&SaV-`wi(KxR2oq)y z{WC=yYB8(FL_a?>iF{8|%~8wpFP-y2m17Z$kEtu$2@Jx#_!fqmq%m5cFf>i}fEJz< zsrnbW^CJ`!s}ph5!nTB>colIKVOFIm=T)|;_9_wbhaGv5#Gb)ss^O#7mhf!pZ)L*zp zR*;L`LUS(7CC`eQnCYfkDWq{ZQ~S6qe&WsUlx8bBx-YIo){PHIn%qt;r}SZQgOQeG z=isop(ZMmp7Xg3>sR?*jdufXd?m^rPe1niKnv;qwjr~S*ZH8OcnQ_E%9&c(-cU}%z zoLlkw6l8FXsIRJMsd(7rN%5?_C}P<~eeofQaKY%}a%c*c(=)rBjx7<30_vukT2ZxR z`5AK<-y?+m^|$?;IQuPeiMu(gUA6VgNwStg-83!xqgJA3W{i_}C7GzM2s|*`L@4ph z!bp-&%WxQC?6a+D8$%Z{8Z|IjXkd90q+|=Cp$hwF^67SnW|u^7WWNb-Q8!^!z`Nqc zyXE}>!_Gs9VOQ1Gs>5U8wwTWU+7`em!%WAa9LD7KC9)1q1dcLeGz33#G~boHDMVGI z(U5s&OmGe1kJ9nzvH7y~tw?E2EPihsHV@?~{CBksvo`b9coFESFvOU0R(BH-_mnD@ z-}HO^vgVrh6>N-R@4|KN z4hP8`1oAmbm|k;!8n%Od$f5n!x*OFE?>TjwiCe^W-R|Mf&NG^eGlx^oHun@AL;5|V z&&#m8+nnGrLHJ#H>wCh!`VG9#xS9w_6Kdxg4H&(p_ayZoP<7b zYjV)<*mmSs(IG7zSTgYK1!dwU(hT1`-Kf+&YV-s){lc5Z3=b5zyg#e4y6_uz^%T*z zsDgUZij1IIZIJ~hP#Fy(+KqSuAJq*ZGN}k280f^V;*<#WK|i3e4DG-P_ae`>SQRPT z6Z8P7(mG5*pP%SVy$AjDhdxG-LiFSbt|`(!#z;u09@Z5*=PMvg7o4zmKm=z}qU2;s z`Z|lU>)`uB+3L?+ry!figR}zn`D=En!8=@HAW1|+!Usp6xGoO=EDK2czK#gCPdq-D z0T(}2p~jz!eeo?&>(c3Ec#CQ}G=iTr_N1BqfcA)b3_6IXa;Euv>Pfnw+*Izd-L_X9mo|v*p{IWQvl!dSuQ_XdYI`4J1Er$=|Q)djrcr|gbZ1bNy%Bco~)is zclWobD>1Q_Af{W(|7QVUurOOi=g}63K3WqK=0G7=+$ zVQkv~IXy~-b?(++BBo)JTb$e5GkU}xvP=^Q9N7PGUwK|=eEWWMfR{Q{`2e@oRsC9n z*>|xC-@$vD)?T022(Qnr&I|LWfVT*?yS1TWR~c!mkbQ@3XYvZFKBQl;0;mNsr=(57 z!S0o{oy$F3H`};7;TqnO<$1^=kz1~+UOu7BrIZdU zK>;33wCwimVHV&HL0YD}R$_-t_RJdS`0P9Y-|UC&G4L%Niocn9BuJ@bReaf{ue=YE zqpx-)wDBN<^H6A8D#Mdz`l;TxA9EjWh?VK@$iT=E>L46)isQ)#!T|Z03u)Fz53;Uz z{MIL%nJSY7sSHUeAtEhd+YmL97%vymEV`)x=S*l>hqQTw8^>YZ$*PmSHg);Cf$w}y z-2_@|6UrLam8@rfjG2VK2t}X9enU5C_Q^o|4smQHk;CPexm}XvKE`NjA0@1J#N z{@Rro3ukqOR2Fa-Ej4(TxX>fiqh?uvY&63TEw#0m&i%8eL4pJiQVmQ4Z!l93WdwFH zsyc=Nfs~@Iou`t=#baudt0`rJTah(^jisb78LpkzS%t!W7~f+)hw@N@H#?4ZW#iJ+ zW4v%9XU%HbFAGb0#q{yt$8@@P0~y)99UlPdqMx)WOYa;UK&kBuXLanp%xZ&Nxvvf; zb-Lc}*?_WCQ2z@{sYP-d;ylexB_h(;GO=32vJn$vGccvD=)Ee!ZaQ1%p(d?j zFUZ6ARl_dQ88akBi9)D9)VUFshaxex2-r5?e{5lC+ar(>a9n8;Y!-7GSBB%pqFmVG z-PC@KL&e|CPWG(ki*;*gy}y6mE+EIrC;p6bA9rC$>r$z`95BI%=h2H2A;y#KsGBw3 z=atR%%|iYr9rQF{#m_X&a?&>Bb8Qh);q2ohGnM{NbpUhiw>Q2{4}dGOKc4!v$d1JX zZ+QAg>+V*7jC=Uj>jB5>eM|Q~07jb(?jHgapeuip^TBKqCGW>GrSDrq?)B?QO{>py z@>(X(eVl#zJ&!YX#P*l9mlyEMODjG78-=b0g{u{f%QwfTN4|HaSCbpwkLZ zOVdkFg4>|{>F@0s?AMZy{+X4d{*#q-G<|_Hy@9oqe3jG z*IIImzE9r|D6ZH_Q*|)3)o5VvWL3%0OKPGQ^h8 z*Uv|p433IXX{G&1Gc__cnqZW}CH8T(#aNZn)Jq4ljQPdF3Z(4v%l2`pvZimW0|>`( zU4@S`Dg%HZoI|NtIFD ziXH!`Rp0kT;iP>lLc3sevvu&SaGkuRqw625SNGPsB6x~*sne`aP%cfyoBhx`9(W|N zAP&01Wti#Mbt|Tab4xeUD?eyV7GxnetEZqqEkW%6B1V+os8-Hjd(g*>(5QN;U#lKp ztmq9pEN8c@c?^;!G@i)&iL9{A#9n6YX}z~6yV{$ggaRHpi>jhQ5!job7N1c->xu1% ze{7Z)3+{%dZJ(xLduF7e-4h#?jpC>LT-BJP37AI>B@ip4{e7IQLpu|UIn*FoY1-e_ zMWCoZVQQKJXn$V?fv4uuS4{VS5CD z+^haTKbPPO_hrU__shC>^ba4n)j=t03FH3j= z9owECIO$B#d6yv%iR|7OG0UG6eEK6yfJQMB5TG7pHlav}(xLdZ;Mhp5POOvg0+ zA`@_#@iF%qEHp8}E3qS)c8YTy80Nlt_MNWlE-vQSyCV1Ejt>J6e&8|RtCKg8Sl01m zB{7+wN~U2^P**7eY{v+knxKmvNYQJX0u@pgBII@-*?aw0o~aqk#w45%_BECzaqA*< z$2KXRT7rTuk-JlGT1YReqrU5sI%7Z`G$uZ18dhhm1fkoc^ zG~U|ROmf81Y;)ms-R!o?6UIs5rHSh%@Vs_2@<uX*`!=%ABoIjg4EEMSK`UNv>jNKrhH9Oq$iRx;?bQw$s+hs@UUW@hIfOyBQ3* zv*`Z&Bh*d}Yf8=sPr%J=^&&lN=cfPH^7gr_%fYHv=GFbDt*%f^Lh;Rsmh}gGJ(iy+ zXuT3~mwH-8E)2rF5TW+YKSG+p^q@6a@PqXe{KVT6UdH56TSRuR$kxxlaLkYRNpDrJ zsSCK2E=e{XsuLUpI#HAdN4#GZ3#_a6hK4VI2e0(4>xd68JFgvpIdi zl@pgWMLFeeJ_UEvs+cstPD)eD(+Q&v!6D%gaw|GR8N2>TCA^MQh4>=bC{;_tL@LTd zL>}QTN_~ca8+tS^oHW`LfvI>Kf|}(v!qY$`&Pf+<8bBUUB5h!hc}i#O4q5Ax*8g?& z4rj^3aTQ4?-b{Roa)h)f#8c!>P(EejJZxAY z2XF#XeeV`hdHsBcuNSiw-`z8P;(qwH`|89 zLM0l+#gj@3V6a?&tc&l$vW3pkxhPbVi^hmZJ=U+1zqQiGrIkc(G@2M1UIv;1LD_0I zWt1T%Cd7S{`%tq3S6*js0!z2?=CN|3{P7Iu}TGXNJndc}s#fDlg&bprAx-3-Y zm3^bb(sqW+)<6A-H;y5#oG8R7Z85`Q>?xmynOp|4md`jb@j0eYAwlUD=FT(y zyvkpOpJ7AfcLG+1Po5|*a)5R(<)0(3wOJg)fhtETI?Z4wnfebFqEqPro#fv_8emwy zy!iYyGs>~}w`}9!KCC>J+u8NOQ~1!u<<_|aFxCF;`v9EEtLuH4e`*Gpl%*YpGQ+QL zZd~0>`%g90bODQB-Mga=-@LA0vEvA~n9kfyC;yM9vkHo<>$-JtcXxMpw+0$_hXBFd zt?>YjyGw83R?7ZT5t^m@Fh)-3H74e5%_9DUnqvI#lX})n= zK2y>bz3Y}yvQ6hjSVkI16Fs9oBJYUwGGH+^pHjBBPwx0au>naH^Alzkja`uHJ&zW! zTFh=sV9B8*nQ0@Fb?jUW999`9>Q26+n_zXSDIj5OnCH9ToQCrm(jl<2rqNrvG`D7w>=ia+nxe^8WZ9gQL z=MczBaKJB5`ND$7149lUqAwmEbCgge)r?N!!X~!+f@6(Q)HD<5*a@=?+KD z!IvA8{jJYViE)oayD9gjofIA2L1bxBpku2Yy-|Goa&*AHUX2cHH$S~Ajm zyhooO2^Wtou1~-fVFYRsP7kcU zq{m*B+BqtgP|Oy+xBdC>m}{--e(d@UT$iWZw~}wAOFQ-p->4rVL(ISak@)2vp^^H{ zUy8pl0Yj&+_By1%@N}e0ban|?uTZ&0at8R!Hp*JTyytr|R{xkPgA4#NBC|wN*KE8) z`wB~7zus`upuhcZfx`B>2xZ4RghP^HX8eK1f6l9W%|?bWS4nHx=15!FkA?Rtc0~4B zcA)?S+V(w-B&31!|II1{Z@A1nuVd?OFo?7RKU)6&HPv5CM*449p_07*M8pNUdzEmL z3M&5O(Y5W>wSBkizT++$3?0kgP}{{e3%1D4o<^Roq4liU>ebCh7E*6o!K>z#wZP2bM>^k%}JGolY2b8CPbJOxR2*T_%%fGIdkZxaW>=CxQa zDbHghRwVjOK8%Nqrs6ehk2DflzFWa|_kB8~1)+ZOBxmP8NL4VL@Cw`LfS$=;rW3N; zdWgX$)Cr13mjtWIPlHYLSPFv;&e?yHl31M6v$jP%#DG!_fQ8+37j@NhxY=V3bN3xo7PLD?%39O7?J zBi^4<27$k$RZPlIZzNHYEQKkfK(v!QauoQG3axLr_$#2E!i-ZspiJ$&IIV_AU!jLX zF@fyKmJx>)6v=RDkGTv_FpxesIm1~_57HC&mm@Vuw(FLEkRw>el*rfJ*bjjW8g#Sl zT3QMd4hNvumhI^lr+b9aXL@@Oj!fz1|D9XG$}}C%!7|CED!L#D-5gVFUX3cGMM0tHUb6nZ*lsO`wtsmrtlV90U70gC#en6UjW0qgtg_(N!dEB z@AG4-YZA>2h>S`wqb#wJ{mG~pJBK0!!GU|cY-PF%{$5J%UG(?&S?m$=? zYmQGX*^=lIskf|7 z>4EB8wVkJ3uATZotWB=RRXb@1y)p@L6fP$6QpCZESe;+enZ#9=U$@A%d+6g2=r#A@ zq7Z9eg347aUjH-McNwWj#S7G}509{8cyt#Oyfm_DbgQYb4x0Fhqsx0SqItwVT zF;nR@@ewDXW00_tv@m@8M=@7S%TF&SG*yDR3?2c;_^i}%@gH!1?s|lFiEN4`PMv0U zFvT&=G5X;G2yGVdR}r&fvGxo11NPoev%8cSb+eVGYANX9?56)>K8s_kf2_JWo~3&| zbbE!Sgr86EObvn%{gQcHuDajcr*FDH;;T1%7^AA_KE8>`peRL{RiAo5uV~!fzB#xd5)XW3m@Wz( z-Q1jvU{7LxBA9oF*HqdqKBrk%<=Q9ehxuSPVy%gSd>cN4pR z10^#CJ2~$uJ7sV&IvQd#ctw~T>n6sVMqkw$aBM~*(^P~pCisqMmd=`})rCbc;VR_l z*VDACNDuA0eO7BT&5l%3Ho47yYw z(wBo?$N)jjqvtcB_$sKS7JpXOh+V_hOD@-?LrfsC=yA|>alGpe*7B{!*@FW@(3fJg zM_ZGd8KM2xK`}<$BfYxXZ%y@Vcb($GnE6M!{-)I&ymGG$DcAB~4|4z47mL6bPs^3A|W_EEmU*oPKa-r+6AW0~Sk4iFV2O3{~IW_3rTkd@o=zY3y zdxD0?S%`)mtV09G@1PN}-8tAjccviaoHY6EwSV7JLc?!R8Bc+V%g(r@L$6rZ2VLlrNHC3c#B%^>HGRx zWDyhxZ0qoDujd6Lh%$w(v1nQ0jQCeVh0ZyNdF-SL_lewe#?NddO!3@`34ZCwoWSg< z(t_q;=3sidCB>zY@6@DEDD*7G_!=V4Gd^heGkEF{os}F3k?+FV{UOw5T0$+l8)2(6 zO$dtA(AcwMZm`;d5y>JmF3zD}1lQUl!kyObLpu zOZ)>}wY4@Ye#9(68$4$_+a=B*#PH;^Aj%6flCPpo$RT6*Ru&Q@zEI<0xwkzv z(EH}}e!-z;-V1VPYpzm|ksy<(l#o^2F~{Ofpo-5$z3w%D?ndy1ponb(qe$$KhHbH+ zRvov@o5p8GU6rbRzIi@Q+sl@NP9m{fUsFkOx?CW2E&O-5rZifiWByU>V*g=ooQ>r{t zr9zWLN`GmpPtpr-`KjHxVP^&L(+ z8Z14@i{8Z&sh>hDWvcFSQvo5gT=PTBH zj4FBJU39UnbvfMF_PBI2{0W!Arz6RC+^!Caq|c7q0ET9=v-uN;#@)_YSlLnK4B*T$Fmj=RI80f zwh&cf<=x)Ddqm6ZaBuJv1rBG?clkVH zq?Ebg#`fF*S|1*RloS{!@Z+qa5zg$&gQ?rvRG%m`D2tvB$Dyh=$qSWK?|$#wOB6(C zo54Lt%&V?jl2R!eu4LWR>n>frlSZcV98L9Oo!U#;HW2OROtgq)v{BGJ^d+ZEk6L3|JXK4%G?k!^hs9q)R$0`c|k%8zjiChgTCei!|fIhSskE*-v`o90`P z(fZu)`_|g?(&RiXOHVF>k8*3PzM`opi)|=@vW31r_G5o51~19$5U0)ny~>KUT0}{l z9F6+Et!K1&>!i-2ZV&n+<3=-$j$V=bH81U1J~U3o43@V)2d&HI;OnKxa_%=S?`_IVcMJo9C-fg%qFSR=AQfSCDY*0lP<-}B zbmT}mn)p&gmy7&lKBz8q`1(o#C5Y`!t_$U7(KU0huNf-a{mPWrpPCc4v6>=GH&ePP zZ9#F?w$|*%XSZ|4b=D=uSCZkUL&KNB1AjzEN#D@o>8}tuh_RJPADl5EAYf3!-oOR; zVK9B)nkAIu?7&CcNeYBO4MLp2RC0Pwr5weSM|n-eGy-Kx;<+C^8`sR69#zR59Bt?} zA5`fWY!FdTX*9eDbZMC6-LpOwhI zOXZnTGGejnL%Gd}LGn;R&|w1hm%I%ga}9eE3D7gho8lP6A^&0FIh5P-u1>P@9!wm{ zUrYD~dAihk_7&?RznWs8fyXjnyzIipB>TZw-H7S(YbK(TW-{f{1C9MYLd_I5`E1an z(Nt~)H3?3B-^dI zZwFkc2sX~;<>h}(ywFfk$QsNyO^s5iLJ(!TaD$LaBWm515Vuk z7QAeNxCK@8{_%RCeF;=!U3+m-6!6e~(kvfwzwVDj@gCj6f`f)jo|QjU$Kf~P+_S48mE$KMs z-w)<(Sx(ri9sE)>)>sMXU(WQ1nLrT7~CjF#{SBy7FVf*YsTopzbTG=P_aOG~Fj8X5O1ssZCr-H5zKlU@h0 zWFrc$Ruy9owRLY?xoxTmbt#fjp-eaU|GWSI6^q^JJ?r(LIQbf8HQaB^bEo-BQ;aQDWb(S zX*CHxeHwhW&`jDM?ke4AYAzd^alS6#O%MLtSD`wUGA{MHu^r__Ig*TnF$?ZXou8CP zGy0!}Pct{jI%F^X(U!M9CLNK?tInqKUJ^qnx=M;8cnNIAl+`~<`(bsQmt)`M<}S-{7quCcXn>t~Qy zcS>QmGa{z&|JhUC++(0qy>pq$f&1f2H5JxGiVs5_8+mdHt|0mfzr{RQ15XlN4mip7 zN_tK7hketk1G4WJ;efr}80 zPT<|MpZr}KhiEb{&ipfhROW9qfj@q_{jmA-(~2O-3N@(Ey4ZMqH2^i-`XxK*6xsPLyxS(a`@>UlE+gwq%nL%FKkI(BFQD39i6!^AXPc& z4ZSgG$EvH7sY#bNJbwQa9oZ+IIFiJHE-Mc|k5CgLkdK%kcSrrw&bAd<8z&gr9@9Z5 zaTy)IcM5t)PPc=)14&=c$jB{&ZJ4#>G zJ|d6#cGSJ$FJJ_wU=ap?HqA#wPl*oAu7#gHk21cOfBo<-_D*+a&z^AEBm6xcWKFP) ziBS>hB+i@3$hIYw%(Y{lDTqgW|LHy-@(d>Hut>v^*-7^-pf1Suxz6F{hrjFLzz_#_ z+i$qd;=8236@B+}Njngn6QKHM^ETQvrSDSmuyyvGdiv@+;rsR$x=Pplt&t)wZN%36 zG>-)4N3fJI?U%f=RADdvGD+e>Zt4p%hz_A z@6Cc097*3WCx3`{-|fkKA($yi&!dQwmr_%27~TUuBz=ldA?`H-e%d2J*f$`x^kd#lXY`Y^%73#l(b?_l!JvbsSrfh@}@yP=6S{Ss{C-`;h!sd4r6 zctbHY2Kn^xavA;4hCd){F5+NU;&F?|WFoA*PyZFa&kNY}iL*NyL z3#tR4nYt8&Ksr)-z>vO7Ay%~dlfvLN9M0g}8gkCpVb_AVsfPjQ=|u;VGw(Dc8etIY zr|oImyB$XMU4XDnQxlSkX-aSra9A{+kxJMyIbpwbO?ku&<%Vaa)29iLL1VT^5VgCGQ_eyZd^(y z%$q;r&OXPR{W9_{>}O-z%PTHrZPT{dVN&1>Tqxp=wqg*HOa)Dae=s71_EJP6^}M|X zQc_Za?w}RF%gfmmNQg^5KcQ~cptrvdP}fTD?w_mPBd9ziE-s$$N2Vykcjept^xNch zx9An?^^)o>m23MYZeS7Z_aC@BR|U?*kaU)lF*M-SpxbzglJ+pSFa0>cyDN%h_; z+#nHN4p$)#3O$Z~<9M^%&9;Pc4t{2v-$IQm@1WJe|C);bIi5t!2hoULmizVWMTXqS z`G!Qj`BbeS?|gR=G8{Cxx-9rp>{+yQN^7(|mKZbeY85Ocd|=(eeQ7?WDAOK+FBk(j zrGZ2Y+h|W_qfB9X+&Y9>pPBXOS~l#Tk_waC8MPz2`GxhY9gb_XZOhtuD%1-}dC5$Z zcN7A|W>4eplUe~zOxhB(S<-lxXuAiM^NlMfinzye(|l=6;&tu%x~#Fvx9T2to>sS0 z2TAuy27MJf^`NoFaAypIp(^m~i6cMXnq#A2ej5sOX@sI&UTnGYKJN$W9Md%grv?8j@X%63X&j*%MGHCv-=q{$J( zYQexwFEtu2wUls-DUV}PTrfj$t9_!v%=%+14dQm-|V0dFL(vih5j1TzT8>KC*0lPkOBRWl?w(L>dr?|SN! z_9_a=XfeF?+sxpBEtX0n9F*?}V)4~?w?zeJyLwYIb ztm@iKRV#_&RtE(``G?_XuNcnx1@Djmvp)&?41D|DKfny@KBRM3`Vzf&??VU`1S#JW zak=AoIvsc7qDh4gxJPfz+Q>M{Q&ARYG~0U@&wfpk9Y$KTh!g{KP{q7gVvRjU zfeEH|!`qU0cqI5m9G410UVD2N)Drol1IBVQh##>maWAQP#vEpq(5{W4;$~yoCV6`P z72iau=mTyeXIVX6-F5!Ok#}%yRkW7$!ln)T$Y)6ZOZzl_-kM=;GT{lVN>o;>IGroO z(P7`?9!7@J)8424UA^Hn?9Wx$toT}<(e|M@Q(R|8-{nd#WH?=uZQe($ghNAWlg6aNH9ixC_U z@5g)736wQZxG*(s@cw)_XKG)wsrTh$z|wfT^ybSWgKUA$3lyBln|Ex6bu3qXBzp`E zmow)(-K`#8vL0Tx+=NHt7|LqAibRu$jP7VNJd%9#x~wRtq4rexMSIH-6iGGa-^$Q{JP&3FYzJ*&*<7xL~$-lH{cXzeIV#(vnhZJpO(#ihf9}ctHhwW&4 zD2;Mg7=r`NO(>^U=xs^g;?hQGcWI32GkV|4pt6c7yH2F1*Uevyj8W+LC7FsWA_Qbm z322TO(a25%)NlbBLgjv^d$pA61daFAiEl}02kWsvcFt3X z797e*IRHM7y=Q`2WU_z7W&axd@{a*RHdZm)SV(mI?;ExgN^v;VsV^DQy&p}ree;KE z>u$cw#ML!fd`*DE#(JydE{Xr@+5zYV;%0CF@#;}T&C6W%8>(Awtbu%)4hs(3Bl~^` zHh^Q!>h9b_5B_RUDJe-4c2aeOcJJ;(9>_Hm)XhX3j^wOd*cQpu3bOhi6WHx>m{H|F z;99HOvy|6RH<$5oqvYX=GRhW)waofp^N*y3<$^tm1m<$ccKeKSe84Nw(u1iM(oCqQ zBtv}ydHsp%ObNfsFpSR>ksN2Dj0e(#L~Z9pqi?r$b9^8@=c^SI&Fa=1)=Kotjk1&O zqA9q0V9y>C;#Hhq{SBkXlB`6=(`0y3RfZx<0S2rN>K2Qc6+1DAp4>%{ zxU8R-Y!o%Bw>>K@X)%EDr z?1fZERPxo!$vugi`1AAA>2I|cnEWy+j(w4UgS)$`w!4d(qU>MU)JGGGiv<5rx6s|q z#wB!F^3V37ExGyg5_&f9SD#9E0HZ!#w@~dO7nNED~495wrj+1BFHPTu^yGY9bobag3=!?ruPC(-T zoC8-#a7^B3$QbBmBv6~>c1M+j>oI<@SWp}eiuQ>$CD(P=QWYy#cQ*t2SeYgoW3=pf zMbxBP_o9l0u)FQzk)r@J&m7=Zn_h%5;CX%DEpN ziwLJeCs)qqj5?L@6QE6kX?>sEzH=a$HdWD7$y7^;y=x*(2W)yhbLUuYWo4{hKA$=V zw#GZfqirk$Oe1ihjFPCZ^w1^09x)A25tTz5k8^z!b2C{ustb?rEQI@xiHq|`d}%op zDt+ONG#N7-&R;W?=H*10QIaVH)2uF`06Zr<5udf$)X%DzI@)8HX#;8}8|7mPP8&;6 z21oMa@VvHA0!Ts=F3*fdHg|b_(2HO@0j9*2B|{d z+Gu8;#kX$k$ys=ZMJBGGdkTUk=@#=bX=rvD9XF5Dr1qtJ$?2ZM*w#MJ@nxD@6*(u5aB%=pE@8+Rg%PhH2?VhR@V}@b01o zW?6aNHI-#V`X5vhb7@kXD8C8Gy*JofF8=?TN-TG3(#VQmM+87RkKH6C1U zOvd{(Vc$(fikia^QC-OilQ}aJ@Bc)}-!{E%-u+134$zn(P%TrN0>Z79I`6Mm4%kcY zhs12VNqH+fbieR=fpXgeG8La{UYj!0?H@2c?fhonk|JShi94Z{VHwsAVNB*+FRze0z3O zGSuv`j$XqSq5TJa2Bull8p8g}#f%EHDo6H7fQa`-AoLOc({lt}sfa?l9bWF%;ok0| z;oh#g;I5Y7AT3pwOwZ%t!MVlE!LYtb7vx148VNVrng$C9LMUZ0Fu2UX(?!s>p2(to-v4D=|-}hZkz0 z?nSZBEyP7(>8SCjF6d~+7 z_)3YIX=<-$UuUT@ke#G=yo3W7m9o!C|Hi{$!x~!~t4D55#$@!Ghy8L}omla43*`A(gETgQdMnS{bQW)b~re$of)XnFj zb~h%<^;jztqri&9-=U}S|J@KI+aqUxL?`>byhi^k0t{PUM@mwuVEoF0zIdlQ!XSeh zNh=3vR7B8%A%P7_Zq&@*S>E&z0hVW@PlY+L6Um#a(>h_o})(;h21;Np4H(KCj-0$xQ#fm(dEOAf-- z3N~F8oDw6MuK) z`VC9U`x7iyk)|zmqn+92Tn6R4^4{T|D0J+Dez#oCVumr-iAD6rR^PBrx5Y% zwn6BmS`Xu{WaQ}Hw6^WxVnvg{trYkQtw(nI!aXR-JqyMatz*VS>f^EVO<$J;5|Fd- zl9Q9qD=ApV#HWv*LpiZ=pz)ibc%s@f<+@Dhr&f;jG4+QUPRDV9KmL^oKm2(Nkt`E^ z5wWm^sxr_J4is?WOB5<*VzId}GB&R7)8IsygGt17Joplq!1P#CI6XhJL|>8;W?g8s)}+dPZ~O>O|MA+U^Coz zreYW+BDx;NyzlWk$B(;ZkBAKYmAE1+3$l#G=?V^kPBZ$|3jJ_F@xM&um8`u9<eNsG4bSPjMmos9j6s6(F zXNwUm*A4JER>;R&(toZ2@D1wcaal{vDbbHs3tdM&a%V_usud%s@MZa}xKkF3i?olL zu`G?6jrsxl0+*vMMoG@HJ@u~sdzK5tfEnzk!A zh;H|!?pbpwddrgNxDur#B>-xc`c0BbQA&BXQB$&-x`4w2j~GfGVLu>ju@Ud}WlAeS z(k8&56AulA&`D#pAfe3kx)P)_foqGPfv4|#{}(g3F_{9u^I{=SDV?3PK;SdbJm+=J zzMF>)t@72il^f&ULZ;NINRilOEH@kFm^DaQ%!%_F=?^Iq>1HqAxIuI=au}C5d&W6Q zCjW_`gkexo0rscO)mE*}8ZoP8XjHaj8RKQEF4;WZ`e3SrrQn$?P{8fDDbrJWgaQoa zXZQ-dQ@>C)P59m3A8_z+r(CSmPOz`9?<(S9LNq9SHc@|hou??E#e2|E za+3O9veZjfMSdJ+l55s72UQO&Zy6ot7>6^wPPVA;p~~+&Fa_kIXjWFsO&{DCHX;5b z4%#~)y)8JX28PCG{y6p`VVag)YJuLLwy|h3KD=`9sU3~x>~v`L5w_kPDa0JL2DaW| z+z7m^Kk6)VweN)YVk)wX{PL-t-Pqcz!h0Rvo8A_lz~(Do^2(^Xw-)C1r7Fhn#;32S z@?lE+)nGW?x#RUwy4E8!O4D!QZ|ROd^sEcgkIBrHO5k|KqJTKQh&Z#|n~Y3fs_X}e zg%pv4m8wrg$k zG&yjPO$BlbZ)$2<=5Wz!8D1*orNpJA{8I{!BH_wNy)CO}Jg-g*rsRnx+C%GzIq5vP zuh>!X5r&}e7Wk$q9+ME|gk({9qiHYLWL_2WDePgdB4806xrmS~@;n_Sc`8kUb^QI^ zl~-T_crR{{HP!hcQK9fB`~Fr{^^KL2-7fpg_8gwSG7x@dq@RN_{u!o!=veGemlzrcgG zE79%0-a~#@STC=yXoONTl@dKVFYkP*#cv72Mz2>JN8J(Ru(bxmvq{qIPll&2k5 z_d}p7sN4l0f%Dlmk6skEKQ{;?F?+-vePyrF-2%)_L=W#zcvIpmG|X@oW5Uc5S8Pdt zLcc0UG$?$QdWlCsJtu;mI@Ukn4#zbcpCg#cWgnslzfY_Iy_j zoq7U$&tTz$W`a!i#-EY#k4aCh`_c(@my}iwk^N>@79-fYv_n3AKURGUV}(~TV0j|8 z=qPaPDO?KXKGxIcer?&8!F>O~#$H!V-0m^1OuME!h@AW#pionvW7J}WRT0h!I4dv{ zGo$E)`eTX1)9~d)Y1Iiw@^U<7L~CnO9dt<_ zY$E&{D2m8zhdns7_OX0@=gEp65Z?Q@)c9#p*UIC33S^sY?-NJE+`AfeDg4qXl$Tsn z;Qju4y@<-1VNUVSb_|~G-q$#w856#;Ito zkE|V9@C*GzXiqmZYHb4b6n~lid>*RKKHi@!^*YiJ`qwGs@0t1C^xMny%l*8@K3SY! zmbPW+ukg^vtG9x;ySLZgN4K~A&};Kz-SPkP0xSxJ5+MT0%!B8Q-pW8rgP6pWFMl4h z+Ccd`CJ@#vyrauO@`%~UwpX!2Z-Z8@pomahpyYlRHn~FUpk$;%$i7!XHP0_$yC^;(TVVL@D5P$Wxl9 ztd#hUuL^Z5o{$1xX}yVziHsw=5jU4IQs2+$DsrR-0p?ZnY`ukO1Y{_!V$r^Z=~)Q; zoQ)KVRyYyH_&IXmr+EqWk)DOsT;zV@@$(1n_eVmzxPylR1{?>72EKgQI?>Imr8km( zcF*`g?*+g!3InSQ3qTVvq5xUbJ}^BE9M0JX4EuHgZZuZbID4RSf>av3jErUQ`s7#F zdrZUr&ZGh)j4BHyBfo+G?xl!%TRYBv%xly;+}cvOOXUXTyM#0xwsxYgD4DzoaMqWMf{6 zK-rh{?sS!kW5Ae#wvkeL%3W%PewuG5u|jdj4m9U>=ukTuuB^GhiDDqHt2nAODxF{( zJA`l*SL2Ca5(hEP^TkIa!J)h(&InJdO4*0LmO>H1Qm(q!Dj(?XJ+>fE(?;@CC4a-5 z{=KoxY2>KwUgEigVIf;5-0s~39pOn$Y4%_MQPa)-Fh_eCVQ@2|6?V>Na5 zWWc}g;zKP|zc&k6J=~%Fca4&9j(Z73URPdZbYYneF~9cT*$(i7o?qs$58TA&y9rU2HR7pJf+m61{EdOWWj>>(a}Y(W@)Z9cI$B~{QW(h*kbXiA$5kKK4mxB|+UXCmcc#`LY zGIu)Xei`e0w_X}r!IGn=x8FbTUfz57FJ;Jw zawd2#ce<=3sKTDVhaJGB)uFiH6@Gml1MC=vEzQRSZTWW!C-lqBDNJJcCOig%e3wyi z`)RZi`U_$bHlhXUm1oLVv$p*SIdcoLCyNCec!g+Ld>tsRgj*o(VMrEa=?C5nf*Z=o zjs$+JyZd}Q-|GzPE_CgbSeEPc$METI zG~V0o{uq`}Lx;#z{u(8h4)Q>pB?-Mh`>QkR)0iyn(>%{&o?T+|NK}6PH^Y~C# z2r#Kt`l)886zV=~UzIrJ_=pbIazjRbR}43q$cf9(=0vP^s=rpuc37ghV!W^x+bxH$ zf#MO{8eTl9B*E3VFE-``& zWS10eNgXuAmJ~FMOOmH5%G{q@QyKiCEIy^tr>E(}NfTAWa`v^+?^APuOhbkYUNM%r zdUgUrC%-%qaJ?WH_{x`HXIM`dcuWKyI`BXf6HRiH+M`-|6)Mf@psQ zC6vXL+`X23s7n*!PF%;6=17&rLp=M4Pg*(lU)k>A1l-FkGF$f9{D%^GMo< zCiZ{F@m0^Cr2kL+fLbT)%3_V#?p&w;?Gv7!G0flowsk-KaqDdkr6F_4rJWSe<>=Fke`TF8ksNS3N`xlVB!68I)&$BTwZ%d?y}^ za6|@t5h=2RA;D{L;_>#h*bTtpQgX8ynnecu>9L`H2qtb}b4oFZlb4 zEbTPR8ud$c|KkdJp@+x^5xVsKt|$U*(%dPq72^a+CCM0!C!V!(3R4cnxNR^QCxQxwG!iA$JhV=>7*@f^Jx>Xi z3-xL}BX%tfyq@zJTadvu$z3cEsF@b|UN+jYNg0rpP%C{lV{l9zQofFJA9IG19=XVt zq$Mv>fzn9wE8JK{oCaACgkYMVFLsbbk5=g9c8;3@W?`xn*YZxj2a^y-)&tlQef+{@ z`^{OtN87!2!+oY!8<9v)P6;T@8B~9_q*>Q;3F56~Ug9l8Jtc5r`$h^`Qq&hIsip5E z>Sj9)=S(3~j8DxX4Qdf_{hBAOV~g?U+Cn_TiBR}KV1APlujSfFVez&V+@sO@rbil&s)=3)fMoniaWOU4HmJPEedI-pr3!FSn|6!n{htZQ4RN1Qe5&+&19>y_Q)p~ zXHwOlh+KG>u}y$e`6S)vQ?i$D2)ZAhEf|XA6hVhiD;s5ob<8y_NbR)xEN0eXI%BuK z)Sw^m*-)LEMU z0eRoyihNccxtAEs-=k)X8SEl_m)t5rW{Mtz7sm?(O(djb17}f17GvV{)A1GH2E%koHQ>HA)WF(nS0wHM#1FWKzu%-=R z!{?tdndl@kXluzn6>sa7JZK(zGx$JE-4UFqNtK6a%L*B9IBnRx7U28(?_x&}a90xPx67kO^zxfu=B;{2B8=)wNw@l9wOc zbMtHyB$!SPRl|`_>kH3R^XxUp_psE&N8<5~X?|+N8_f?YmF1KsX>R-TW)I}IVMI@H zvRiy-ayQ^k^pZvoLBKT!mYq)L1LfBJMyqH7EJBI(7cbD3z*lbKuO8mXJVwJ-CEg^< zbmqy=0T~rt(U8Adl}u$}1jN4xZRTPlE}_NbtH+}&qp_m~pP|87hsXnTT;bby*nM~) z%3r5Z{4Sr88mg^c<5!<75kkE^STO|W-s^N%FDKRgir9brH%ak$5_(8V>_ zBd6K2dGX!*>ny+h9JAhzz*H?-jp)MnqS~hKmj4`psnW2pB}>DW_xpdc-ZX6U^?9gh zL3?lD!QW^)w{4D~ZXWeB@HqSl>aKiL+J_l1m4(vG>2($ZXWwvqQT{i*c^Fb zug3WSyhJ!f+Qd_6J#U5E_>CASfdFNN2Z18~iOUmC#X(H36XT{Vp^bbV z18tA5BMe_D5&+3X&<-E|prqPp>58l1cIHWma9sq*-HGVhwR^&Gy1n%F0NXmE@bu3O z>sy5c){2DVC^uaAFHk7cmOj(V6enRYH)*<{!Q9fAbbV*ouc@4_k>An4JVjgr{7|}7 z(juqkBjP7ZkWpcALE4qVSF&#*ZQxY8GV1%Fx9&RSTBQI$c~7FiElLL;lZNkzV`Oo4 zy_K`D@fpU?ho|x)O~+uZOOdGsNO8eAl~rdE*|L|UVdOtwJVnP3MYmO$3intD_WX3B z?V5{p5zN7W33F;pp54YPF0XH%&#w4^K|43%TgP;YfUzS^4=5*g%O6I?HQ}mfLP{Kw z1{W4|`iwZ@ne=R_Q~7%avbb&8d#S%yF47xZxODk=^hj_^Iy!+Y$ar7) z+Lq|#EQoawlX_I+l(t8&e1mV>sx!qJ0{Yiwca7iao<+dJ!vjX*g2|~as^)FmJV6X)tD^+B3-+OJW`HWz)ImLdC?<5G6`C^8N*dcFM1P@|MA(s7jK>D5X? zH}TPv5+_(I9A&gAFa;s})sb?vit$Ze<%|{Qxq;4I@U{eZ#A7fu;UVATkM}dBHauP# z7W~RnbX<$i(oRUm!`N!&d?wSrj?!2S`OV33_QjeB^w|!pi3#m}sr=?<+n|0K4ZM^1 zfpALTLe84k$W^Wm=oI&;WZ}}Pp>48IQ!M>lA`i5KG!58XFZ9$G$PNA+qSz40YErR! z;?6Tk*iKZ|YAr6Gm0`kGKYtVFGt@j+u5DJwzr)-hXQCKUg7AcWDpW(DcmMb-d1fqj zu&?F*6F8644w&WQEIE@s6S@w?e#euz^1v6=7RL=k+(nRYNCh%tsaa+nvJuWs&JiVY zB`oRq4NS>W?CZ#~$E2th+fXG7b6XCIug`h}Ajo)&INYe7p(u47{4HR&-D+cEGbzsze17gaxF z$&BYj2*h9$$w4nEP{vbUuE4D>s5&{?La8|o-a>p>T`Im%jl>IlI6;eAT7J1x@R~WU zU-}(*|FQM(;cG5(E!R?-o!|Xulqv+Y3en0iO@EPifSKL`XT3%XxjtK5OWT|U)E+A!{3AwpzL3Q^D z9O^-Z`8ukcf*UD7cx|NvHg5cvIf;?ikAVIDsEUEn_Ic?pyLyb@VUBiS(dA zdmoJtviIhuTx9dq0gDbt_CcO9xmj>4N?e2$EDv?o0DS|;&7eYGGo^)CIjwDg`Md?w zJK--5yqrb}Uo!5j6_9yvY=g~|=Oo7{-$1T%`1QKgN`j1KsEfz_7HoA1zwpq3hg7b} znz^xlEPPNJnR&OQXhF#;7d(Yd&{1Jf8t+q5?{+KP*b&dZ0I!jwU|azkG|*h|2sy4^ z&`O7@VCB^!eB$N^y5+}@MEG`_j-S~#3wVKYG8|-n^!oC z6(v1?XHW$O0m?zbKJFEA2o|dXYgdOR*<85RrpA|xtJvE$EaPY#^V62CC<_Da-fDu_ zzTfPv+_c^lj0a-X`Qq{1Ns&8V2i#O#na9$SN{BAELjX7n^nn|SUC37(kA{Y{)#Vm= zW4T+fsnEqYEhtxXiH!pEVkFh`{{VwN?)8Oi(HLf`s@qjlR@HHeKErs0L(v! zm>aePn%}a-`9qD>bN7c(;@h;jl$c0yKnO2jQlQC(wKD&hTazjb<284}wWfmVlt@x| zC>gmok0k1PPHSc(W?a})sKo+9q=#;)jYg5WX9X^9d&xZ~HQgu$bDbw@r1#tXmaajI zQ@zMH*x{qXtOOA1y!}5!NN37DLdm(ToLg}fNl2h-$Bb4-%_K?-SD=OA&}D)>>4cj1zVH)@ zu}6_C6E;yJk@H@R8CL}(Vu#E!nf0x#R%lBLfn?<9|5Wl7f6Zw#1>pi44aKob&G+hS zyQTKyT0r}q6xodAPCnL%f5R6B=AT1(G*6i_q>+dmj{dvRqxxu!J^AwCv&QV&XuKrP9NUZ!KzS`FWoJr9wr>GWe5R;)Q12`}$FjBnwr;jQ}zt>)8- z7ZMKPa`&~6GGt}qOy1fYo)O#l4Aa~pxPV0rn9Q>o3juK71Qp+PunI+ru!Q-_=M>av z>=;CHF?uXXD~*L}75WIg18Sr6x+k5duV6-|qbCob9O}o6LIxz3lrCJO%wZim9E4;t zMH;&_gQV)zQ(^WT)seWKZnJPy2!Ih3+=*f6oT7;-7H$N%dG*oHy5xoneNuAU#{$VA zo<|oZn-v3c_Se58vA3iE{G!Q<_y!iDs~8k!JLk$1+@)gQnGB zq&aSDQ9`%hZFV=_^zIiXH5eq3Huxv#BVaEaFWdOmoxx^=^K)jBX~$JnTd@P5Jx@Ko z=LYZtJh=8SCnx6zbuBos0gh*!oPdoDU=GGVmU$+tQRCV<-9bgg#?mC%f4p}+y0CzC zxz+PuF>-6q8*|San1nGv`rbS8^3wFmHgo3aj{7bi)I- zTc#AlxYyLZ8uc%&|CpGT^14Pqj|((l>2Hi6B8}SjYBbT~>3YSrXu3SNi+;S#27lXJ zc)E4ir^7sQqgyu7y!bBG`&|eOjN1Vtzrfu6f~~cI0jP?FXz=CFgc*V3&!}HS^a$y6 zopY=$%9C_&q7gD<77^WgyD`)ejWb9laVwD)xR_Azv7rrcOe|j#P(pBJ8A*jkn36f? zaOIH7@XCmhAEe^33A5fXZg9@AR%v61|Kh8#H4$=nNFaX$BAMufS9~AKVZq6Y{!}Ik z{}{d&=Qq{vBu7z9S5+xGr#Xw$L}bgjj3|h44I06^3Xh$uNFluV+&nqcU}nWBP_X>a zzn=DokA;568XpiR8`cITBI-{tsB~3^8M|%QS6g9@1_9?wg(V}^&lTuCI)i6DcW?j}L{F<1k4(?&<4p)kH$J4c{ zTlBbD&a`->5IYL2Tds1j|B?|~Mko(c!Ds@^9aoYf2prl288C3pMA^(5t)jEB`jXR! z1nihbEAs`NSLEp~~lG}MbkDWV11CeV{-f`mo8a!V2X)#&*O*YhLJd7ut0;+%c>xanGv zQF@po+~n4NGOv4zhCxFN#XON4=09d655vnn6n<=X5 zY#L-zTns8)JUu}2(OaJQ3?G7meRhf9P-`+(a(;60Me?)@DzjuhdCvTdh;{}Boo+?f zH!xexC{()Mb8lcT+G(@k@to+Xri5y7@bNJg zn1TH5zA z|9zsMZB^1Iy2P%7QLm1x7N!-_%58=kLT&i)-TVLvTW^%xVzk%N#K{YoK6D$9JN`lP znWD_pU5R)axiPgFTgROVGV??7EvH`tHeuz?wvq(LuUNk&Mfk(ePL;%nr@)bmH*hsp zWX=Fe-QDN9R~%NKKBOL#Ezh&eXawq7L}aPI@crtdWMwo`zDv=Y&UPyfsXjvjm{Pil z?9fE4WY)OI3qdj@)*HmR3T|k$KdKGORp2j6W+>d7n49 zFZZhl$_<5xLi8u47dy#fZ3_yDSaD{6|p}~x2X0nr* zbsVoNNx<4t<%fb9q4MG^fY1q<#TpM1ww1-jf258Qc;M5+od3D zdhWwe;pvkSL_QBxz-)#4%;oKhSdtm(sGyUNfR$q@%pM&9>SzO)q@J*0TdUz!F3dT_ zsD9bBq&Cn9uS+IHgr6?7E)%;G#4<4&iF|qfjA_Q;8)zP$iPUOP0avhSLN5l|UzVkS z5dxymG#7yEKMH+BQf21h{=XN1>WxCQ(SaW#`O?P*Nx6bnSUAvJHJIPQGQ-s8P!nNeVO>fKJ#sGP}+!+#rk2b=QSCY_IR5fJ&%R77^6y<9V zUiCQmiouGoxeo678`G5wFCg&NX*cHMnQn-8y>vEsJgzpP3|nrU%oB+pLqhlW8qAph zdq@osBMH4)l)0Y^>2r8-C^xk)bqyvrhG8LJqr1D!V8^cb5AW~I*h}Jlwv-Z;o7ub2 z@1Fvwm000Lw$E{&Q3$~R>nd7veld^r2$FtRwFDL>I=VE-GKMk@QRlkwDet;lIt_Ki z80iob6WJUULTgUBRPInav~;PW*6d&L^^qf?RdW?mYs|rvILio)TArzDUtQax2>Yj0mxImYU|#$Dm1JmjD7{3i+X5CF%Jw{$%kg64GuTOO-U7is^-I)N_}WvDqLZ zyW45w8)7rcAr-MN=r=T*R>XJ>DJKv7(VPtw^^<(Xm785L`3fBM$X_`}^otM=$$@M| zTre}ch_8aV8>?@=*`E#*mD$mt(ameHp*bYTp|#J~7U%UP0zwgA4xddc&CmA+qB?R# zU|WWnlEtEvw~2e1eYR>y*m=bER`us8!J4GAjHnl_HG`M_a#7;EwT)f)eN>5evR`l@ z9^1Y;Yl^8t(Ymm4Z9r_>Gqf{If2C7Cbabm`)q@qRr)+v;RsqfgiW{Wc^mTi2n)P=}JW z$*XR%2`JxZ8KDiziR7wU?9(xO=go^yhY7|86cV>grjeAC91CBUmsGKG!L8+2D6Y`4 zczVLt>p}C#D2%y2{uuQh3j28vz zC;YfgAFb(?O)f0BP<^WOhmo61$(fO!EhqU{mjI<3zLQC-+?y8!mOVjA-fxluTQgWD z$nR!UpLbJ}gAh34ne&nlv*(N_1|+j;tqUxv*Jdh$&`bQq91|R!7-eehe3&0WQzJfu z+URIkGQ$On`MHybGY4zq7*q>DG6~xOWZHg=w8fw!_(FBR?=AFzS0dgF)l9Aj)-=A)c!)9(5cveC7 zAUYD~y*|e^(G@Pk7_u!+8}Li}A?F-b2IEA^-xGB7deJ#fx_RjhrUC55nlD+gjzXA| zjzX(JxwpAR;XJ*?FLXt=J-%DGt3Bm8vHTTCNO(x!Yy7+8vNQFDAOd~Poz;nwBTMoEi z+UN8VoOKT6dpMcRO8Jyhl%RKY*mqEcvwB86vUV(;UB`1k=FXOBds@GKy~UZsjC= zn%892=(r$gT?iPIBFNOkHaer&Ij9CWyQLn3wAr$6cd*}L*y#AGi_sc9E_C@qZt8MbxcH9OC< zMcPKAd#EojvP3 zI!I6x=q&AAEz~F*cR|s@sVmV}dDzDoUtryEH7rTh%6TZBcgv-0Vi-vXcmHg4f!&W! zj;LM9cbP^j1)TA2HTDD`o^Z@iKo%Ip${}*d?04eAg8%opwN-KCW*w}RO@*6pyI$z0Kt3eg}sO=S@ zxPKAqc^qQ>XI$9wzd->*F^vthCgc7&5-fDQ@BaLYK7yeOAJ#s?c}o5d8(#k*9>A?6 zC?9`WKl)p@0w+$~cfAE0nUHi5qjJFehXdAs<@Cfo&z(Jc2d8y>>+6j@?G{1H>Jn&` zta%9M`0!eBkoo;4qmY&RB!yV*;PQN>N9i4tOYvI83qbv+YW%z4B|>FoWpKnM18lkA zoVhvv0JDKa;4Gc}Mt0BXc?fIfw*ff1h zcZ=v4ax2O&GgGwJ1>gByp-4lu5;6UZ@?2sNLmz>$O*w1Ey?~7f5>t`RuJD^Uk1f}s zwRcS=9`u26UI4E^xDJv)T4X&xVBO{-jqPx?s7;~54}uDg6xRL5Ph5YEoEaaT?LGRX8BS!Q--E3Uw2#te%N_>L{MH2 zYnPe?A3Om45WV_9f8e*V&}UPOvwgtJxGj<)VI$}G+2Tr395I!f9PfOuriDQo7{vvZ z*`GE4KZU?$$VgBl9o%3HiLtZM>2FJ7 z;d6e;X&*>214PcW`G{TNahjAR7^ zhcZ^_TW_!-IQg%8B8jqse2J&uW-hKPB~viGxjpE>NlA3q|IL5h+2``cE`Exu;m0K- z&-v&&e?t=EHwj0Ls8a5M26(11z69M;?z9Fx(m@N$)-&o*^Wx=K2^J1++zRQo^RE~Z z`%lk*5vZ@0ZNn!AJ$=6?#ePKu#hRx!yzR2fdJ(|#YOVgLhYTizbjhwgUzuohI6`cE zi_iS(_YRTb5}mmHtgk$#kcuGtNy(Nx!JT6q8c){o34Er5*Z^u1R#G99ZKX)iw9aFa z_EKasqrdtC~;!pLX6$R*Rw$A+?K_3NF*@$ zliyV9N8{n>wqc9hv3zU86K@f?%-NPK^DMflvV#CgbK???uzxL*(-Dfq!WR%k(ebL` zNcF4?!+g)IPqLgc;P_wK!AP>|^mg1s-ML2_HYM=iF9=uA*yifq@bOJxC+8u;3yl-4 zBKyFx-%gT7l9ui>=xYHlA~Ni^vk22uK}`1VHzm{IOx`r%cw%EPp-b^26~k+Zj4z!y zPe4^IOC)<89YJqaQg2#PZ#+`4S!#%|VpZF=M6yEVZ)bdr^t~NLicacW1)_bL95f5w zrx|xqjy(*kVqRBYpUDMfTy)Bl0)I4=HEVH+U_XwBoMhY}f3^(|-^;WXs4zJTBeeDN zOK%tNJ~1>6X1bJsA{cU|xQY0=ne}?Mv&r5Eo6Uk7|BENiAU(s8pYc_wGP=$??)Iff zUxTjwS=T|={v3s3wntj{u|_koW-9PaqS-hI2N%v0wHvdgKfbzh3M>(NILKT|xYhXS z63+H|C7S_MV-Qo}Q>g~FpFC`IMUFR4=48s)WBfQoSB-s<<%iYIL-6TkM9G4~_g^SZ ziHVWx3l`Zc>2%W;wKI{j$lF2LVo-p_=1+P8x7dH!B9_9Mc`~~54La-jo%QiNJ%Axq zOY6Da|83kjjV;0Uc<{>F5A2E;A{F@#4nr@|lunZQNmz!;$;hIdkY~Y}BvYFmNAIW8nGrT*?&Fw5@gvM!&xdlp?)Nh#ykwG_zHc{xhbW*-jUxga?XN#uz#Hooz zGD6n&eeHKs9qs~>ijjoi*m77)`cN*1u;)|xCt zdKzRHgZOaaa7{5>vvifA)oc&rn-A;5%&o2y4nGeh-$M0zEG`9NuovAY4^nY`yg4p8 zhVd`FQE@*NVHcrlX;hUNml;DQDVXgj;aNKj1!%8}!9o${j`(4Oqgin?5ukiMR&tk* zl=kA1nfjo>#z1^p;zXsDxJnMXX{r>*Kb)O9UWyJjZ+xgR`;=Vc7E;}>k-njETyzVc zu}cuXtVU(zod6NAUtr$|3jRY!B8aBLN<#eQ4p6esKEk)B%fX>K3k*aed~%FMmzy?l zkY63+b59uKXiq){d4tf*56MpBdi%9s#;fwY1%)`$7iAnAYa9;19z}jZmmllSx=Ku^ z{qwj>Ev6gJiEe7(iVZ>GmxiMrJz;Jnc<}lF4ctrc_&D0}-T(D5d5I9L$b(@FE?!939NfY*WoG>QtYqtheLPV$!qho98q@#5bV9kvFZ zrn%klLAM*>DAVwt_~0fpeoA(4hF$qK|(5#(uXW2-7cfeL*sI`(!N^D-sY^ zxHJwfPXOmKx`ICP!CMcK#D5mc|B)n)68`~JVNabugnJ4$Ll2Id%CG_?#p%C$>s{?3 zJ}{aP&xJAxXrtC&m`rgKQsBeH>aW!uF8-P=Q_U3pNjYCwqiulTp<7iWsWU!^K;2c3 zCEacj<%V7+w>O$ki|StB!YTnxn5>BnW-?V`mqY%bgDc3&Qi+falg*Pw)+#$0Ean_V z))gafGU=Fa7URVYU5Zvi;{UP;T~@@!z{r^=OB~tqq)A(?*+hq3KXx)_1BnT{ig6Bd zU*>a>8UIrONs8#AG08V`M-R5=YgxQk)+64DUsPEqnGxZYr#fq=tuR#MY~e8>yHhVk z%;UnBj7Y@l>8{uN7WsVlu)%8*Gk|iCk`UjO5X&CGQ1#qgj&1l>r+MPsB`&3fA)WeZsu2roXYbs zj8<7^aD-`yp7)ga#%JkCJ#x{`LoVs8nx>@7#LTw|chxIcuR<769?^_l3||BUVHUg3 zt^250|H(ouQnH5Q!@%xGVW&F<3b73CLCTw1O)IcLqyupzkHevb0E5Sv?ylJ7DW|e$ zNUokSOH4#`7yS&O5ypfv2HEAJa^GdE6dyTU8l$Z_T_Hsf4-oEYvvNutsGrB@iE4S0 z5*Qz;z`}snarI(zeUtGY;^v6nQrZAuy}{d!DDex9w=E z^Wt;^e!#`(W|iMIchZU~vQ2^%dMw!A))dSN&Tyt;oBsSi6Tk=Je+lA?3)JA3mzN*Z z9gKk;kM2efPp=ih6GDqK4_|sNH_O=i4mMAE4nHpYXh38XBJKREOnzYMe&SD5pa1O) zjD+78g+jpQ1hH)lu2V26#Z#^*WaES+LHXFGs7C6ua-cZ5S2qiWnVo%H1SI)G7SwVJ zTI&mB$~AO5`;p)XEyOR#^Wp9~fTo2Zaso90m3}f1Ox`zWIYzzM4hbi_09jQ7%iV@8 zmiGD~WM1cjFDA{#G6D!D>c|>>97q4!niKb0J+$83u)~ja@cUUsiYRJyvog2r8v{%q zqxDNklYi^+CL`BCqUIb9C|Zj`^@sF9Cw+b5lO`j7dLD^9ZfEv3bkymZyzmj=9v!75 z5Q*%@Q(Fscfx=UB4z4-C#+BLmBpyOe)@wX2=FDp}f_o%``Zs_)M6R5<5dnGGSl=02 zR^){4D>5<#89kRMa;+GN=#c&_LpZc4Jy&6Q)2_Gba0VwKkX?-H^tUt)2)>yAb@#QF zOedLQCBJAJ-0HxH5u?p^myu&h2y*ZEoZ$#AGo8|3R!n~rbXit}W=jtRQE<$|HZp_P zG)BS{u3;B#y=#6450?yEfw_;~xjN6eUkBtQzZH)^I+6vkJb#<}l1B`Iu~IPj7%U_+ z^*r8g2Gdx9wVrP29E7~-?>=6u8ic!;Ph?xmM@x9QKL}^w)9Z}qt6>kTMJ_jXX&~Q` zay01HBQ|Epu09=s2{FCOA-&R==$VowNg`e6qpGs2zhpiOmCqRKC0;vpG8u4AjgE3N zXCOqNvrN{C<6wxKO{hUH43oHlOgCdj7oFG~vkCqSl4MHjk3kvCl=z^`Z1V>5zRC${w z=gip9rF{+ok8a48@;k4{boU%=M30Kb{Nu!iqg zn^qbRp!Lu*gzQg4;_OhIS0u9lQ#OtCQn;gcI{cZzERh3mvLmuF^t7AImb?cJ?tG7t6NB6f&II z@0OFc#R^=!!mz$3bUucz@zy7v*c$}HU&#=LrAka*iCx++=0&kfa&X9wDOMHxdNQPp zqYGw#=;>W;l0%Zy?FrF*TaGE7QdR~f6?u>Yl}ELdyb@4{y}4y1CxbFC&F z@EQBFL9#`*$Ismp>y-;ZeWL@-9AdlQkaWsU$eRF4bhn5rqKFWc%FX`f&%;NWuE0o! zZ-$TH>#j)@3N=Al92&JyTd^%F(9o3L{JF!+UON~OEtmI5gkoCvB?xg3E}5(39o+Q= zUMJ*R$Fo zz6Zv1CD8)Q|uKKFEt8f^EJ&KA+ajoPT?m1~-<0)dPiv+tf#z z?~irSo8b>mKek-4{5_uM7rwgiNM_pr?5^5l8jlWzM-^w-sr%hhu zbOe0$vlCF}(zA%Vm90{L{7Il@kU!&%pwLwR(bdZoq35Y!{*{?wq;ibzJa3n;n~V@x zay$rq)?BE7?-x_b_CYmW6O!IvH7&D&v4F~oqebaGcpSX5G^^5-p&wh#X~8gQIys-+ ztWM3wPUlxMNe@zCAcUy`N+6R}zoXR}s?+vD1X(m~b6<)d=c%O)v|) ziA- z#=ZA=e_ucV(=;{}tefXmG^?2S8>;L7pi?Bppscd|0eO~+V#67Txz;NCOZur~4EyWP zMMdVu@fmBSw_EEHs6e;?2Am;Fi(> zF)Ua&*-XijW)UTH!6$rfGg6ozIv={4Yv8tsXr~9p#F!a@2X{F*fGtGcO+daVy|q_N zU7BSjZYfpCGFK)xbU9l#hmWj0DSYw1w?;60jX*`&LVld5Qw>@qX|E&9R>*p<`=<>g zeza5TZy|hvp?CO?b2^!kUmG*+dH(zV2KE>j!BF%&whGt@X?@ACD0+C;83mv7tqrw& z?^U6+6f=GmGk%ryZ1wa@mCb<^qGS|7?(4g&yXs$9m^ojuw=aHQ{;Mhi?;uE#Ka&5O z5q_9lFBq0v|H!JeXZ1ma4ax8xuph!$*%nz9=?KBY_R6yjhq6E@px`+a`RJ z8SpR{r5>=e6bv4Pg{A<>wHTlb5^InPo*Li?!mA20u*j*+mFvi=Y}`FfkP7~@{X?=V^(R96tdkBCVdg>XT+D>%!`k7X8bthG5g}5FvDOE zD3Au-w^3y56!SR{E4Z2e!2JZ_ zB5S#rUx#~?>Xjw4(*h558D0nW;mbxUpvb{!(H_g-*Rvb(o3L-nFI`iI%SZ`x`toFK z^4+CZcw`!nkSg(R$)rS>+QlGJnuhZLD)99v_X{1L)Lg1~vDmQKDZ42t zL|rd}+4FIdfTPIopReSpx`p*4*cL}v zQGweT0*1>--5}Psc!7Js@{okLF?To=0(VhUlW^J(u|#a8D`yGFGIRzNF>0Qa@oXBT z@C7~j;j&Hj!amC102y66*9Kj-VbM8tN>}$8FB~yc6;sx0>i_Qrpa81?E5%>qJoNx; z%am?Ph{B%ph^Y%Mx>&18T`CatL~l5Ql+90(>R+Y4UA!MxA&c6jH9=E3lel0|Iwzw zB`F8qg7bd=DA(XNS}m)-f6s0+qE1)uow*i{I!j`$)ijfqRb^Z0>1lf=k8awgz?H3?%_~bCLy;ViWx|C_BYuyv;(SHBqvXD_hzFse$UUw4g*-`TGAXnb z@rBYea*NI-HO6NUao&sG;vmIrQ^*;fSNr?#%t#S}9Wf@LAOnYZo-xlr5pU;sz0&sb z_XW?8mV}l<@1iCG1J7BSNf{VXnFO!?#@H=b>7i&ymYGw{4fQJ%EJEIs$y5eq-N@4{ z#B}3|ygUYhh$hirWRTIqtH_JZB_m2cT(|g>#b<}(U5pxv@=yl+G{(F%#Ofn4KomKg z!>`D<3*Bs&j5mR3YJ8um5t&kMD;887(mEP*Naz<3POf@Z{MKk>UH7JMj!?-cepEl3 zknp;tRjzHU@z}k*^~76ql*p*k+&hJ+=R$@RjpG zv1mQ2hH!n&#NiP!kmJu>Wd&+1P@O@kl~k|4eJ7I0v27=^heg8lI@DpL9Sj5k+kb}J z?jniaZ+0%4a69Kd&|5tD6|wlhv;NT#H~3Q?OiLS2MaX|YNc4_-wXk5jam5p2nV0FY zGq98G>AnB!c~Aqb!JyKk#bj_cFMt&Q41uLIRVa1*Hd;{dlOzou9uKBK0A8sNooqyn z`F5%P&!cT{F+DOtZhg4hB_%|^%=C^a?x!3zbOI})Y*dakZ_*tiKL2jPIj-T13C+V4RP1lYNKbJHn+zO9ggF@kqpBhXPaDZ*-;1m&HpSaIs!Cc+7?Z5H zqfoAIhbQEVf|rtggX5u>yLG{hVu~1%1m~-9PaYXm^k&1S=0D zi*M5z9%2O50n9v(A``w!6SKPVo7{5NM?ANgQM+%tG`zlDVU%O!;?(5nPGVWO_AsZsZfT~F8cbXl`&%OSk(0L1H9SCJ zYt_xUR=Ar)N1VGNXxOe|!@kG{vTa;cDB0b-(o(w6wlGOG5Seh?GnK7&FwOK&OT$wV z#e|c|t;>tH2Wfu}1ubEjqwPF#PD2I&E_wvhfHai=w^t4H6vg(pBu$GyF)wUO_K+kl zZNAF7Vx5Ei-b6jFqo{~|Ve8R%Cre^}KpLZTEJL$V2kw&AAVRmK z46UR^er&7fyj~%28{+Qak2p(2rgmDpUp#&xB=Lvc?H(rQE8?T%GZ|~xH_^y|U64Ug zCQt)LQVenBO)w(AlX1v(&siCx@6m*Qhrm$5nKy0?E!?HzObPu~{MGq7Svk%@O)gC> zE5Op|etnD>Ece&I%PE?zPgmlh5m>Fg(f&nk@BKtNj`zTJX0uV%T!5pp!M|MzU1TZs zjrmWS>Ja6WUG7$_1FKSqHPRf(Zn~&o=NKjh9^6?H0Y%=#<)}RM*A?lNXFhPY<1|e6 z+pwJ8g_2egmrQL`&rzY(0L?Flq`6#daujLxt?_fR!@=)9S~HJjjxF1CN83A?w*_m{ z_kj|-JLd$Z_Wo6@d=q1Oj-?@}3M%6hJwH(<`k(FFgPJjeow?O?P6w z7VNt7{{u${-sEgufvdPOwm#k;Uau2_1TU@8*UD!mUxb{5KjP}%-!GqjeY{@!U;p~u z<$D*|`2u&ianbdxcbh4K+H$$KH8i4lw=e1P%@P3C`5VIfD*uWWpT~ zz(mmV_g9Ga0T2?9m-8`;2=InqQs)}qv@vi5Vw3#dYpb$R#oWK-!X)ACR*~JHZPga> z5q151XQx!L(C!cb-dt=rj(~ZSBGEnrC?cr3SoxcYzZ4=G7!#8%sH00XDree$Zl7|h z6e*9Y&00jUfP1Jl%Dhl+0#wmQznN9Qj=%^3`ta}eA znK%WaR~ESr;gJWJy1(!wvj5bx-=qq)r6@yI8_myJ&j!Q@d_l~&@?BB)^82IDHsvh< zRIl-fL)@6%Pr{kXw&(2BPu3W#V*eJxt;x!yz?7dYv|tcjmBI$=6>K1W$@r`y`#`Ke zhd1)AH)We|CqIOQa9BVTt@-kyqt9!{DU-bMEJ0R}#W%#Ck+K(oX`uAteYJ99l{k331_ceL6+c3n(i>RDdGC z1IKFK>?UGH+9pGXr#plLB!W5GIzjg(8Sy2q06On1who6ZKQ+=1_)exttft}`m@Zxq zj>RHSVY!UF40EY&F^V^6n`mqKFj*g<70n*%x4+S=VY5;N8%vuuiF8HRzrfo;C{QI> z3N1@7J8IS_?cRM39MaGR}~|;hpY|O>1YSghWkTC?FrUAf_!$_1Y<66v4-rl1uM#A-B7XYtM@+ z_&cJ^JtZ<#oIaw;ygs48wh896bn^st#cb1NI=a(zJ=WPOTR_V*LpDQaQfE?o-Nnv3 zs+rp`BjPfFwMzuBEHb%1kpL#w_!o>lVBXmLDk5kkbZ}W6JBeLT&g(|pq{47K^ zc|8}_@$p-so|O$!)Llfkpj!{gtS`lboLjGnp21AJ<_L=9rRP|>sqdg^rx_1jFi{$Tn zv;ZfiD~>?y^ECP)SnJ>IHmt(3``cxjXK$ocpkV&00<@0*!Ky_n21uWe9YnHF9_7sU-MbG2~h-7ff26sSSa;&MLQQE#!h9FA?k0B_5X_#bNB5{GvJRd-V8b4mQ4f09eRMzJH1A zJCT}E@g!@bD^?Xl%`#`eik&GnDP5Ve+0aRGlgpzQ=(Ih{95$(5uZ`}kau)HLq!A5e zF-`icsOxf#&5mop-iSm9T_A+{PuSh+WUlEM-&vP7Sky7mm0 zm|*z|KKxrExZ_PAFiAV)@#iZ;ao0WFJ4;x{#}MVGmtQ6)TC>$?b5)Lo#)Me^F#TzZ zf-qrBs73t|R$dQa#c8Fz!W)H^vo~VFxrOgeK)`{+=w*$ZPJh+FzK_ozDMnL4BY|Ec z2CJMC`QU{DR%kuJDIdwVA>Tz=fJv_p-z6`}!LQmL=yj5cN$vtd;R3mc3_0?+9Ea$A8n#>~M>6Gw&r2>>#&Zx{9q4FM00~I-Vr)d)w zkbG^;qj6rRazCHp&EhKdZm@r*LnY)$ablb);)42&0M|0X7&9RD9HEK4jv_Wn5l&z@ z_?f-XL=GS+%!bhu_IuTKfwTUd(BtltrPadVbwv+GfNHz9Z5sdGSrMINH}j%93=iAL0l z5WM^XBX1u*l^oLoUX-lSV|6&v&Br1XVRNNRc2`ih3!tO`t>ka~QCxS>=|>No(mJ&)KXu%*^cLT< z^22sd@5tVx%XdQ9V_8bkuSbo%xNgh$h|)eKXj(EWohplkAydja$&hj@{TIIMXR5l^>05h zY@XhawRtT9X+)TuXfg?d7IwnnXnv9z@1(%@^yJ|(XHWfKVCh5Em$q{j8aV8+EzKOw7t2|%ZqG17kt_kRZ_aT))Fy9w)6{cxZ&@NY zAa>nTexd(uTG(5{9~Z!f;*T3F#ATP@UZb6w7Eo8I*?XPp`!{3OTZ*05TrVTx zcxq*)$9o3$2lstK0gg!~4}*(2(BE~{xGA}~iv&)+AN8V~E_aAjJ{hNj>Wj1~6Zux* zqBOz!m1FivmX+Re4A2BxG`^ls>1Pg_&jbwfiDqj7mp4}xz&t4SS&mPSFPa;EY!}4q zG>uB2F@3bexw7F#Q{%#wN0*eLnj%&&A zH3m(##T6w?BT*Eno*+Lgkx#Ap&!)TNS=TXF>qs;g;t!%URT3{vt>xEDi{n~juJZlz zFNad-IU(^Izi}Z)T4^p}6N7$=Vtr#BVV1$ZNzg;LNEyjIJHUV=8tf9UO6oEANfX0~2xhj(Gf%OQuHtO=Q#nFDP<*MDqh&D>e z4i#F(NWJ#18Q%j_QMNs=6MUVc{aiJ?np8%=V|9NNS4I3;8v+@olcVy zD?F2z37}0DCX;ql5pq*Gm|WF6t4Dq%vHbjbPGz4$zP zzAXDa9LpsM&o~Sr?DSAGbTepEAHO5mzL$_Z+}N)|R7(__TrGienw|ft<-)(649l1I zMRNV~zcKl*tWow4&`WXTl?$?SZLhJ7)?9Z48lbQFo zM!12Dg!Sz;0#E?)l&cBliigo5=f{A9nMTEM`zl-LS4nKe|1*G_lptL*_8^B+ru>tg z@wab8eu0hRk;V~yaf{iKKOfvV_k@a_g;MsnEXgr~3%FufBv9+E9W#t*fM>PMU6As|nZU|_HBJ~t z+_vh=WlaWEjF@C#2EyfDyAK>B1L4x1f4K*D8K#}slvhkvqzTN!%&ebk{{g?cSSLPd zTyAaO;FKIo%Gbn=@?B<5fZK5lTAU|Zn{L)E7S5+5oF1k6sv;vdNVD~8Lkr{QWlomM z*yLJMhJ+G6`r2*~%lHZvWngRP*I#OeH@luY9b9jSS{2aj z(KyWyu+UUO5QG^G)*sf(Xvs*an~r7FwAMsC9rKNognlCkpe07@oC!{sfF+LDjEi-Q z-s4@b?cZSZn{3;j$!zo0#nxG(7QPiTGM^PF#TaA1IRNb)Wb|xDzrg=Qs}$X5PoyTF z&E&F;@^lF|iU3spwd(;YPWML&w^2U|?si?a8tmF3L3?szj!-M(2k~Y=8^BXIO4?T8 zU&tph8>*rzE6d22TjK0^#HZR0I`OZ<%X1s!Z1QCkQkwQJgAQr2&fddi+|nE0>*{ss zto9Lk!aB1B&WofM!f^e}WI!SjmKSYyHN&Q2Ki`ded&f`SnESfASG|97A6+KU_ zr*(#U(e;n^J^u|CZIAP@ffRTiGZ~qb9Fz1QysdUn?1owgO1*pX-@L!~&&|OrOhF@s zhc<8SFc!d)qlUf0Z7&fO@zQ0j>P20Ujw zo-3p9p~i(z1uxEoQwfu96c`8-NCzVm6UTvCoF#pMyGt|}=8xxYQ#s~8`R@|+oob(U z*fY$dFp{8aVW2P{vc&qQ{d7nS`o6>D)nvfpEPL`S3d9_1pK}^^1@g&oSD&bVti9ok z8{lva@vWxF#sxC(^H}UPlcIFM!4#38lVhp|=kpd2UJPuAkm`mq7yN$y@PrPH4GR>R zKg9SA*z^b-r4>&lXGudOF)DsbzIRQLp)i#~H1R?0^J!|38^Y6)Ab>rhU7dyZ-t)!# z6J_g$@)B1TXbH>)({@vR!kXA~jOYE3Dx}IG3p4p^Bot4u{v}3~c~QOU$cB3lGRUvl6>H5xsYIEf}Kbsg?$|cIUTv z7xG0LeK^AdPu4N^Ym#jIT=y`17Dp(q18h2LmwVoPya?c1^)>-|p!GIr1Rrq^b)pY! z#d=4afIA6yy@HR9=KIfBQ1K4J8GtNLK{<*uZ?31m|DhxI_2EReLh+U(Fdva>&U9_G zsCd9Wd~~r-65${&#XW)V=X6rT*Z^&)S_}ojFPa&C6`LY0QN%?PGaff(%#w)|H7O>R zC0D-n z;g5Dy4n_DOq9LhVW&vyfnnuv@uDleuB9#3RZA>ifkT3v5W4@^zKLS@&cq0oxb@{C7Pr|$E2yZ+smuRs>d#3RwXEm*~XERT@i2{8Ck4f z#B9{CUF0-?Gey?Sw9IA|ns;Jm@oXqWxRRTd>}Pb)Lb{Z-47FFyo0YaiHOr%sR5DAC z)GG6Cs(Z6(S8ED5Y4iQDc4ZUe&En0JMX__?^*EZwjJiJ`5zGkM-PaUsRKDM#YOG&1QHs$9q&w<7! z#O0@VTtY{eaqo=*K~)`_DzV@xn(gl|J<;|B6PX=b^i3xI>SeQF75~_KzEK zqm(eZ_8dpJzrQ(;{(t-iHI>&Bl|ef@WTg8!UZ4Z#$_p6}Dpc~RTX?!{-Z74z$ItnH z{ANIN{!hF|{8ai+6;Hg_sO>1U{Q)=)`W?qE*%78bPG)K5M+#R5^Jif2cRwqpVLbL8OW8DOO^GSpMKT|Qfm9Kif|Ckv&)ZD8jmI`Br0 zx4b*_DfiT-b*U%QvJt^P0GWQ1C*h`R9I^u$z}p&b)Z3Qj#=j?a@kZ<-#*@t5ztI=@ zgynEnv`&ylfclXmfirF^4_==w{8J@t?hB46*$g#11Q|w?g(O(YWt0gpk{BUmo$D;h zs2n|nxR{>xSd5=?CUWZn8DU@-9d#JOc<|~%yFcI?lXxb5^>f9^fEc;Tgg@RNb2CC{>b0x1&l zOjLib|MvFy^W989@7&ax{9?;v<)M?K=pOVue$VYdrwkeN#B3wT>qp zyDtVKUIsIP;UH7RZ$8p6M858kJ4-pno;Wr@W0sbq$w7&NFkG1%+gWW|LKPCGfM^X( zAJe*C2q6FF<_av1zY$51x*yc3-ve%WuQ|Q^03%Xp4-bqLe?9VcB?^0p)bdytTE9Ep z!8G`8a4WHOTYzi{3gqP_Z1M5r!TK@G*SM2k&A-j?q~v9zYT|SPq=TmTL^~OWH<&%v z1rUb#cOBkW$9DzLC2livwKTb@aT);GZDd(eu&Tk2DQHn*`eLtZ9YcsKmdCm3cwKH) z9>xbd%f+jIh|g(%4IT zgWiVs1CY;;&B4JzC^Au=r?2nMi`R~)^$zzL80&hKE(qQz@fva%^cv#z_0Kj9LcN$| zI-t{D3fgH?7*`6tYIJaw&3NEh3UUdd0a}0OyI4uY-r)>S^cO&t@Dwp%2JDw`e&N=R zJ2EE;CrKU({SbLY1f?#=?6ttM%+=0*=(w;%E73=1aCZQ85hgbzk1-5Wg7#1Tu3 zO%8alx>OMcdI>z|>CPmf=O1i-oHw+e|I-3oXXT!Q3?g-2Vt(I-K|hwHNmGfeU+dSK zlkk~}kdsLF8|wG!Mdaf1d;W3-nN)=Na38px-7lV08+8?PkLH4Xuei@S5xC5RY~)35 z9DZhmLuQdQDiAy!N!%w_yVY#|>=bVqSk@eFERoAo>P0CahjNIsP?0IABC((@xY-o% ze*tqcM&L1x-n1gc2!S;^3gTtQMnE<(jjYdM-b@x=Dv)g0Mk{`HWJpP0)4EO?c;yqF)FtLDKmwd3l1$c7#&L8lo^ln%ZI{(g66iDov zIX>z+j5T*sjoyIp=E@3X$KddV1dlB@7Fn zcS6rZpKm{IqHBMQer$mAnBfDiJlR8tV0E%%>PK*rbtQ4GOo1;GzOQjPE}d7A1Q!z~ zn^dL-q;}ih>xU?ISMS(crF<-ZaWB|!QSkd}?g%1tVqeWlb`s^u+>tdmfLgt|_MGY} zSXm=pRDof0N3#*hI(1dUb&meH4EI9q1Swfe}S8?vNqGAf2l#wy2F4a|R z%q>Gz@4b6GT~1Cbr;eLCBr};gkJkopfz-pT5!jU~jjC^VIgNXge~_^#jcV{D>nN_- z4%Nb!laV$GV1#1|0a?byh17Gd)Xm=*d)99sBqv8D%1Fu-lhHR!Uko%5{%TQ_r+YJ@ zAfUUezd5yB7ChAE=ivl!TC8hWB3gxI&CFY_XjRuaDm=hu_j;fU;(?%tor+0CFvo;+ zKHeOfe9Cz9P#f2m85G(Y)(Z1KsU!Dgl*P+JD5qx==Rv4AwQ)%7!GXFaW1Iz}<{@PQ z-5buGXc7Wvur=^euZ-A^mF0<{CYM`sO?mdwn{Jz-3X_S5wq*3Wkm6nTWDogG*uxut zpIVTnRa?W>>O7*YI#X;0VvxHzwNx#Qwo1*?FFPF>z!sV8n@ooJkZCI899thwXABm* zt*~<4MKU00YbOHkad1hRE8c}U$Is|w}8yyRdj z3!-kKtm>+(=^qm_AP~ST31w@rD2#5rL~x&Wk7eakqgA7wrETOGgR70YT_Nve$oy?q z#Ie^(uJ3(F9Q$*J{dz1n{E&b9{461oU;t>os3q^t%*dKMl`5G=#hJjMs+ae^P=UW)U!Ne)Ny2%0*A|)v4Kgy@*0o zx>HnsQ9Xz6`5i92xfw?{a&Acae=+42uiESS#@wXF1BT`13Q-Z3{#}1nEft36&R>h&wLu-93=Q%CD zVrx%mA}Kp)47U+?PUw?HEC@7mo0x zU(mL>q}r=-AffhOd*Bd9q7B#~gJuHW%!Ft-W*6is-|u2PDd(3n9lr!!=4{nUQFkW^ zr%7N@gN8<#&gQi8k3E})r=Yem(DC??pMY=6x4hG*mY)!9xOBQIq^}W<6t1S2Y{fqk zxfFeC(H#H4wo443&}k#0bx(_k$j{dtj9R|W?&p&q4sfTA`XhLjwTY}rYeZbrrSLE^ z&-t4DYd*pnpc|ZlhkjCfcrXAWIgmDwKogbMoGl9jgI|E84gE|#j`16>)R8fchukx0 zC}M9L=0b{lbmUoF?A(iS66GFgL14b+R9ov=oqyy0nZ||e@Z9howA5*COql~NqF7`S z*CF#~ddnf}SCRgQ{&ya3$6SLOjhJ2L#{>A|&Ax1C)u) zD9p&@>b-Nm$H^NZuaL9^UB#L8cQ4>O1=sI3mm?R8PAe^BpIgFA;K}N6OYe%ainB_i zEygyh$)ELikDvb;mtAoZy5OmTN50D|z3^Jl`EFVnWiIHB< zo!G&FTnhO^xR1$<{tHKQ@TAW`3`M$CZX2{)WNONd=-C5&1GWm0kb_2$KrvM50@y@? zUCgS%j1l2?LbLd5%GV=^5=kj|dxZiflk(#Tc7B3-+nHgwR^B{(*O)1MW(uVYeiFT7 zx#^1a2cOMXCeLDnx+Q_PrlVB;YnjV7lX$M-%3!=!qeU7D6=zt<=Du%1KX@n&tzB7S zCaenbU#zcoa+Z963^`xah@+XyAb1)zEdX9$2WVf4>Awv{x&-e_IS;@sMrTx7UL z?ejaf!OcVM-@Mw-D}x(Q1Htp-74hi_`TXi~l*gU+xxQ|KD8~Gu>}XFA<@R~uis#*T zciH)o84u{i01r_qv6r;_)GZFeYVdciLUDmdXp;PyW8xAe$H;xI)I68l=rXXN1c zh9&nx&Vtdy4vZo1{*&d~L@wnX59GHKR!iZcTJ%1S+vF14=!SP5P%9TiTnYn0pPhJf zCy}U`n3zDel-ZI;1)@m;T2K3(;mXV`3E*nToyUR0-uOg` zqL@u#zDnsaHGsvKW(4YAaCfLb_s3VQXmDz&hLtUH5t6|fgpB5Vvv0>pxhSE~eQdQ(FFAUVEyH_^SP3GVw<)|sltNt40mX1la zafncBXQGE$$(<))frQ`ed~)<&hdo!O&z^%o!u3;9fHEojIt4X|LM)fd4)8j)w2JuJ zpbWA%Bh*|?d}*CU4kP62D86xwfkdiMv$kHIzs?P+ju_C5FeHx5%Q2(HcQzlq9v$H$ zOP82TRZi&_TO`;Q@Y|eOjKQZn#jLIe6EMGrXoi;!rVx=2O@hmV6Nfhykp-$yyhJ@^ z+`)OR5?8mXR`ksTz_n0%%at&iCMN_w22J(@*W0rBbD$_C8>dw$ZN{;GHg7lYUWi>$ zwdV_akCY}SKK1_XDY``qHtCy$tyy%cS*z1$DazjmpG+UIomhiZz1g>6M*QoXPdBCgqXm_?F+}< zS#GGwZNM$X<+h-GJ2zxKvU>KIVGXRI!D985b32P6CSl)abM!91VMsDgIY&|4Kt3`@bLN#&e zz{iI`#TWK{IUZ*Jl*}$DQQj&z_!Npb!YvpSPc{i@!A9$tT<3>B zWLy&v4u%`HeH4TOHyn*ZjeYQxZ;pf0x9hJyJ*sxkjkxMNeY6|0vqwQlc3g$f1t+vu zTvvJD^0tH>9L9i0b-bGQqQZTtDv1INnF%sAi(9w`H1nAKL%>@VNCqj z3plI+w1Ld``9WmWg??mIEF!asr^AcwETw?ASQ z09#Yf>~nv<(|LGyf4dEdA_kapzE58Bg(uC>koW-4ITo=^!IDTvZ`{LwYcLZKmolT# zZEYtDpHK(Ul~*O;l2Q?4{JF8&#)4e;KOqj2l2lanT{f3|zq99qSvBGFxG?HHIAw6B^*84CoP}?i(B7s&a#wEz_8%)=qwTB2hhu8J97cq1S$VXp^ zo+R>*?WgJP!+fBPId-SB9Lxw_B0>_g0klyOMcltai&VTN$C*7;VUszr$8)rGuD8uS zn~e&v;=p!e4-x6r+2c}Yyt$F?m$f&1C?eU820r*3FCUvW`Xh(p6uX~n3$P~S`!$I- zb+32!6Zmtw8Um<}amN-G3La*}_RQj6Nn97;EO9<$Ij0j~M}kZxso<#srAHfy8~0Rj z>?_kocYAfz8Z_N+o#r?b)H8A}xH&z`Y1nMhU9;y%m- z{Vb%GYAkMAV!e;yCr~f>2en}gIJ}>Mvbx2c%PO0_gM;31WQ9(CA|3+l#nlr5Q$3N0 zg(b-8JJhfRQ3S@{-D1Y{$BFJCd?eU0TW5E%siHYB#ApPypM7&tK2){5;A zdSasziuA0PDZig7RVe8N_R+GT||ZqunrkMZvr12WfflBB!k19287w#|fL zEo=AzaO9ym5V{e$&=d%P44!olaZiL#VoJo7#yoT0;@JW5tez?GqS&cgLdxs%jST93#jzN=m32fHjM&lIB=EC^K9;FdD9ZRF7aWL9y zq_O+J8DpGoaW(D;Vp?*!5z1hM2uV-r&|l-@UOz2Kq;WWh8AC-<%>VJ84z9RE1VU{r13YL9t69BnW zClg-ztcOd@`i%?_HD@y*toNQf+YutQrhBq5T9K+!#e8lY6NW`E7*XIuO+E#N#gLec zi;KSoN#n4k|u`Q&d)7kq;|hlG#&n@k6-sFPRi?&I9aT{>UEE584(8gf&V2-SMhGW9AI5YUl zp42XR?6o7OY63z{>pn_K(zceUIR>x`@}gl^Uql$A35J&{gIB+Rs@S^B2(^bv>W`(I zB~w7yPBUc|*r1d$DwnbyIReEF(gx;823AVO@99ga;7yaG&_ScG@?ctQ6Y!y#HEdcC zrJ=ef1y~;-86;+h-pkmt7vj0bQNVu z6x@3uSVVl!Ihmiq(QdoRMfeKhDvfX(YB>t1nkLBCl5;eLi#)Vd)I;?NbYi>RUvXVR zHz20~3=&RKj2W=9R9H|aV6mEbq#{`)3fn8wepDBHR!cK_l1pKWV~6)%N0EFkm+f^_ zjZdOsj?e6aosH9@A$!8k;3p(&Z$k#L))H=o?S?-M(OA1-#6y)+ye98gGU$_+#N1)a zF0pO{C<;4f*=Gj7NG|N^5D(#boz6`DQ}h2*<~u-;f;>tO9aWXoK%=m;c7I%uq~E`# zH4praBY~w~w7`ch;7le+XzMn^nkI$nfo1q|Ha| zPbEt?JmT*=6vn}m65B9kX&ZPk8kmdu{3ecgY0|$b{H>6Z(a>LBCYyf(YYqM{5H}nN5*mpO*hUWDOFEnh~n}tK?TD^usF)iukYMudO%VLrL}y+x^ZY zRVbD6s{9>FGM-Ma4{4=161k@win@5{V0R);bqs!U5BMTq2<(z|@4b-Gq~?%ex8gc< zZt?MdM!MMV6+`5dNuDPYI=e|;$Px@o$RX(HHG1he(&OZX|6uu$6AcC^hvxJ4S8S<< zI+4vT5FA}uK$~x_d$M;~3@f3DvgJLw5-y;1@CaUJVFLC~cf3Pfb1x;v*|U`R=${|B z>UcHKXh}~-fRTyDQ#qAvSS+0Jx7{>Pe3#EQVGx_|WScRIM*OgAXkpe1RSWfc5r}^X)U(HVgpo0qJjJ^(6Z!bAp&muE zjHIhIt!gp8)|DSMmeNywh2F8jh1N8tT*r%Zr0};(3{^qjw41lKOjR0c{IMJUm+!Cr z3v~C{)=it=u#3+jNq(KHHCj(m)97>}H!E79^-|Y}n~6Na+D(G0bHy>&FA>ybFf(IZ zO{D3>xTL33T2^{0RFQtU46{;~m{mJNG+e#ulngMXAW6s#E_dt`QS~tJ;M1)Tqae&NL*XJoD>jdn%7cql zg}ofbn zuXbZ+slqB}IY=@nJU;9-&jma?KNB5~M;Oru`T(O0o(Tl5vdlTE$U9Gq ztJb|Z11i?RmW$5Ll$l--#zvPB+qkyx)#IXlwbmMVy503r-tx0Oc6R4%nfhYQyx4pF zhY%xIJTg9NCXl>C181v6bC}Hi9o3XPMtF_*>8nY>Oe6|is(jIzN-wIs-xdB+B^^fd z-FH1^#AEOV^-CtyP#PAQ9hS-~I{?1lM&c%h7&!~ruZYi0JUY7g$6k{k`yj{R@HucQ zd3x=WmD*&=%t%%eaT{@S&dS!fdz-PCDB>k;pC6l54T-c*5wqa<#Zx>0kFqCuJT6l! z5!yZn7@TN-;w|xfQkuM9l4(^>ZVU)A!Afbk3urEb`}1zcV4n12ofobedp1# zs`*|vZX04B*@N)OrdOtwbWX=+jr`-n>OH#od}H)~zEBhmDJvfs@hbIM5-x3N!EDV} zp91S=6OoF)DtS?OjOUn8h5sBHRj6|B=eT>KbvQV9IEm)1s%h_b$lA!0eYExBu%2)#MS|Tepi~{z?m?%aO;weZeJACI9#7 z^KblnI*3O6{J!zTpF`;Apcd8RNZ6Ua+(Rho_LhwD{-SvHsw7Eqs1bR(e2UvS>^4C4 z6RG;vzyKy8V%msUu_rKfvf5kDIZOb7Gn;$}BbXfR5rc17puEdO#v+X;P)7cw%Z?5F zNBUIxoe4I2I~SVx4ud>)*Z^VJ3~ zcQ_^-O3F<|7dY$S^EGp!(mNO6UfceTHt$pC4b0x@>i#B>)55W8R+!7TiEY^o=WWWo zuX_m3sqK8yXSTAE7VC#<&1bK)^{~T!Mz5_|HRofZRP1#@z8|3UTEy1g$#mJb0h%n2!9wQ7PaV%C6h)YQ4S5;M?G(#uEs#JmSsb^+sRK!*9%2M7v z*BBO96qr3%Pdb83f(wBEM%Ms&H)JW@YNT&Q8e9tSXMI+LIPJn)liYj%f{mSy@dnj` z=P&Eyv} zDVXL6S=Ij3RdTU}UV)roUda>B25}Xd;;>lY-P_we%U* z+w^NEz-aP#(lT~~FMH`Sp06=p1&|GAv|%}MV&kUR0`Z4eQ$E;?;Jy^+^)T=yAdMeJ z9en~(%cL#=meN9ck0ds20lJ+B7^LB{o-}Vn&y1+`WrASgb)KU-SbHLn8ZRo8Be%vo z8d&kUS#`Ra4KSFH;#yDM6niQlFwu;MX9NEGzVKbP?rDHGvf^_OnS)HaJ3H&_)|dq2 z9ES;dprnjr`daLAI0WLv>Ma!d_SS(o3bJFTIe|sxsVCi`gY4A(T8{<}}H@M_4>99sFbq}q2 z;Qbs)p8PDxs{V0`r`me&FI#69*(T%mf(fdZw91hD#ne9@Cqkb*k$usBguW5-EmuRZ z@h-$Zd49tDR^H0NqnOGM+{VkKU4?T%LdDiHqyTa)OJqFO0bUp4eG76$G~&S`+fFMLTMH@rcgc+sh=2bz!3z;wm|LNrJ;LRGQnY9 z1GXBg>Zb3-c7WLor-+jlS1E%WMXeQYvv`oL3Ie}*IlK7dYv8MA2AhzF8)iLJjFsM= zY&aUqyKc{`cf0ja4v6pPpeS_lEfjRJCTflP3={k~NDz}bPt1|LzNP$o4QHTl1Aie^ zxnN$4tT}jdjIHzh(m^A`@8Qe>M zrlF|2$Uo?B07JZ{Lmh5fzwWdedY@GWtPfuxGEDGh0iHGA?H6^7T;1Mx_)V>xTAUbK2FEZ}mU+-yXq*s(KFg zUK8Ul$dDg<`-6=36AdJXN@T}K8@P#&#t|1D0SH2hV_g)#@Z+thn&j%H5_}w;40*#~U^W zU6{7jk)P&a93yF(vc688(Tjf)dc8*(gRXn=t%s4AWv|iX(b6hJor_O1f8M~c`G{3S zWB{Sk{eW|dqTXnN%WIRGP>HZ$cM@z&q=j5*AkSWl3-~fKFDlDP*GLm?%L&l@yA@~+FzH&aIQ^-uAkF% zp=N<7Uw*#E_#<;v7xgyLxIH(NX4gG5q~?NrA*EH~vZ*@B39V$$pR z-TN`LS_x^z4rilR&FNhnOwg)RSxbMRmWdS|^2LCP&%TuxXps!mm(^^pGq)HGUBxVT z{UYa4Wo>@AZpxYE*HN+(&a;AFimozT$vkFA%vOHYPPUf2iDxGtkajHY=;30U)|P2x zFO|a5e~+CR!(ypF1Sge|N&;4N|0U{Nb%#ns8G<7LbSDLW5KQGpu(9eV%{SKhv12em z+{9mH4GUGFE0tPiIu@u;g8yj&zBzWBDz>XPN44fL>G3Gx0!T?mQQ(j`epLW{<=MEi zMV6hIbJ^X9Y0qCDYkm3nF_fn05Mc4!x#S-ekAl9XyFO`cAwrm;Xi5OO+`J8rs{^Bd zp{6;8XV*DQZaEx3Yk>Wo*!`!X2jw0AAc6lp0W^Z1Sblx}c;EWO{yh4;`@BW|Jm~4h z(|k{Aoe+GZJo{ARu9*If?Jy>N<%V>s_lc!8EcR<_*g`Yao`9&fA621Qvs5R_y4^gL zipKa;@dCf@j2XXNquu?YVgs!IlwS^EfRTlqGm_!#-7^=;SW_U`yaXI)m=B>b{-&bQ zUR|w?F;GDOi#h3#0>~7&h?_6;716v@FCp4R#aTTg$yai`25*HQN4XzI~*9&xM0}z)& z2RR9sEnAWixdHr)$3|uj3uvK1(fV2t@_Vm^cE}}I5{xLhg1E>&!{K`fy5MudoH!8^ zVm89zy4D;_M_USRRq;4Nte_n}~IWJ6&!kcVpc!KiZO`U0$X= z+ql!ihhFMkta9(V6UeO6Ctnvew~`)P8e28fO@%1R7NzlLkAapeRx_1|9#lF>7oy&= zfiF6SM*~ce7iKsOZb!D0rF`kluJzgpMMK@*+*C2icx9AvsD4%nEK-N|aX~nUGfoOv z4MX?Y1zH#VRuY%F^<0l#JJ}}1s5@e+yuP6Q;uc*Zg2daf~oIqs-=&5dan(p!cT zL!*3c{WL*t2v~Uc2 zDFtbf=Q-!0RvI}63JPU|W%rugUd;*-nR!J3^OpI0p7xe@b-X9J^*OCoRNahJ+I%zP z%N(A6`c@dvyOSv`%`_WZJ zx1#|}m*rHl*ku&l48f&{?Nl$*hPa3SzEJ;yaxG#t2Wayb1c_b575e*3W)g$@^nypE z06KU-V9g(+7oDWVAkzPYf_CoHO5Jxl{uA=!e$f33>hIwsYygv^hTIiKrsYSnvX7^# zJ*qU{Virf^$?l00Jsxpu&-{1gk9nqf{DK8j7IkzZ5b{cK&SXn;9->f-#(Fu2_}Uvx zsYFDM6$taaI>HihJPIGyP#yh8({0&L}svN;2vTK_Nlw4fSaoS>Rm_9>%Yo z(~FVZNEt~ZdEc``w1cgToh{=b5A*}B;o(~5s5*P*$hZdH(5HLR(yI!THIkbLq>REB zUuCqiR0A!!h_EFl_2geWSxYtni&g$~IdEshIQXL3y}ttW*ovuh#IF?w3hEn~&j$*~ zL=4Z_8C}B`PGcfjs_JbDIlDPSU~q#|Me@nLQC*?njdDwp)zJ~8zp*PO;#4TofmiCM zj~i-wSF2(o!4ig1;G|xe8zNmk{Q2JSm7BU&iy2sB5Q#gkAU8Wo|9$ll5vc} zUG7^_NKQiXEi0~~9ItqkiA5Y~%6HLd^NoLB0B0B+RNa=`rMsb}-?6n0d=5wGYt&#Y z6;CIYXXKbHJgT)S`wOu7P>OeE9Ne!A1yWWOj*9Z~YraYQLm{YdOqhttF z2yo}(maeOU(0hFd;X@lwJXctxorN&(QPo5YnwSvnt{?h;2C80P_XH%hNP8^C)i%M+ zIoy0GX8XP$@gZ#;%0KCs^tv5*eT}U7Sfpv5WSTCvTW6An;ph*V zLn))u`_!hc#x=;e^y3!x!Jy_bzZHhn-%omV8g|?u2X6sxc$u?uCn3RC^xKLW6%n_h&xjyPEZJjU|JBI@pM;xq%$mt6*@pv1Mrb(tB*p)f29n%mviP{Uww z5L>=F1Zn|1w6M>6nX8CC5(>|ykiM_PKPuAtk)f+fL^Ny}zQQ(hInp)6Ts_~a0Ae#O zBhYtwf+Z)Ufg%1U{N2_3-Tbp|eb(Ja60z1bBk-dg6buUU9M_}n1;w)w@)7cxJ!?SCg_%21Ssq!N*JKCK{aHH`M_0NxEx%93A{_Ow7fao#kGtk{j+muhbcR6|0RLD$Af4Lz z+vJHW7;WuP%iY`O)sO8UBL6!bLPDY&E6r={J?cdVg-`ytG0*vE-Z!X+jhhQKC-kMl zb5V|kyMj{;mAP7Nm0ICuT*(+7pnM2>zET&P5yv5WL(EtAzMs}I9!O+hs5D95OYVjq zCT03N2{>An#K^?!Qnij|#B9ub?a?p(Y6KqP=F?K^=3V()fYPYwv;`OLUDmmK*`2$i zsm`u0(DVKJd^1c00yDJ#tDK^ZZuYrs7MRd%HDk~2zI@9ONVLoG+oRg@v}(5Y{#~Ne z(0;e9-pNtLQ8l$?xoi{8gs6jWLqH@VUmS>%5xFs_gV*^O`2B*0IjL1n~ z+H>Gu>@X^{@q5vat;*^IwX@W`7|vyUN3!;MCKPVeuyAf8SGE}K)Skg0JAz*&aK17+H2_`lfOo=;&>B{7^RBoNVq6m&>}2DZ$o9{(x9>SVSzYv`hY07zBl4_hh$` zFbln8z#161C!~G=6V8u1nKvLDFXUsL$yr=8u{>gPRm<7`m+oo-fmCi?H`d9v#*v3^@uCJg^A;RpX)!jq6ful*hB_7h)&_xn*8 zU(0US8WfhW5P~;f!ZKfS<7R!SUf96PbmjKZ>Qm{y5vEvZ`L{Cim@LOjF%L;JJTj_~ z@^~jBrSj->43(Cmae|wcgo~P|JM{u;u48&47O=&U1}m3%G|IIz?s>>STYBYT&m#Uo zdA^6cKEPt;`Q+t;kTD^xkzkGHf@S*BjtJ^*U(iN6)PVEk{%em9;c95C!*4h<|Ll2k zv>B`3QwkwUVseN?9>)2*oqjiLI1L-Hk+YC`uDR>$bLr~`r)zi$0R0ax z)nwi69y#KX(=ohg#S?W9e7nDMy_3R9O{8i){5?mg)gCWiVDC-zZjNuih>e=2gtCNo zm_N@=B=j^;@|Mey2TLEV*%yPHHdnOD;N6GI)NC0w^$K~0x!H)pwnSW^GY&mWH4kYb zgZyb-JGLy1PlsUeW~>HpCE56MRsN$S71?|D*N<&bZFq7Mxm5*HBHu>Ff51A#Qr3=6 zRyn3>^|ZI>H~Pkn=VWG&nzkBg*&N}CsAMLWVxGLaiwh&|f$?dNzE=Nijzeh0h1$<) zYC6&A*VAOfO?lUcXf~exnsveIc3yoS_^ph^<9poKOL>_pS&Nh0!jj+G0>2+tFC^?O z^+q2C!ufCdGz4QN1#y4&qCKLtXyC)i#PI+sN`KC52y<_9BNFusj4Af3s;$Lt;+44- z=Hd&%>h!kHInL4|C(AfgW7(#9NGwnlZ&pUZ*%<00VT(_Orj!%lA^aLideAY;M+-Kz zQ1wzxh0sX+S+}Z2%=@;yVq7;oDG240QrH_|Xo{j0M#g(Or6%1DN^$+|5)5O2mfQU; zh@^wfD)}E1&0PuRM0E7%LUG0T4J%i;jU?5~1E9K`_lOxkA!gO9JO-)`(({WmXBOV_ zJh^KCB;MxJuiy;h+4mg<20PzNSs)a;^af{D+nV-5eCZhz7 z(+)G9f$m|4&1b!@KK!_H(!gUVWHchyXB@uFh4kO>{9T-R`h=c!k|2e+!sGbX8uEM1 zkEy%cwyeDp_0|UoE+zpn;?zE2b5)^j!-#p^R1W9?cHD?Svu)HS7>wWuc)?gGZH!EM zHfAMS5i`;+KXYfv8X(5X^Z3O93SXdD23n$*WN)33x_vi55;BRWp>|u}R*1qdrg(nW z;VJ|=Lu)9w?_UwlnQZAagYlp*oXo_X7hf4Y4xtMPtl)&~H4 zAL?(yJbgF@TCq9UVjVxXM!5NS+Pb^HS9KdjC~fWRy4m}D&)41De7+1P^89}w&qnjg#%?q?Ix~le)Ap_G!--(r(zaxm1EHHv)8R44I8y+z0t)a&J z`jaMT1I6elcDD+s5?QgzRLTdG4&oKH9mASmzX>Hae-eT=>hEs$B=z<6TXh+IKVDNo zwt?3>KxjgLbpud-$<$}uhwM`JRUABfD7g)7MH+d3_Z-{*7GCHj(>56S(6>%q0^Is} zb-U1{EUucMp)b04F#7pC&Yfy*S=CM=Ygw+ddAs@|=ZH}d(a4Iv?K&kFdB^iD;7PJG zm$`P~?rB*GiFDF5hlG=#k-mGUWhP)o-nI-=pe80hK8DkSA7A9sfqBxis><6`+G13a zH{UUrSd3_zQ`3V*7hwy4WF&%Sh%&L9UC$6l(}&Myk`?te?f~u$_Fb$3y#t+PjThZ{ zrgQF)5xklMwEw_VUah+-(-eBw=F*+gp^J6i3ACx}hU$d6a0{oyn^_*LWr zR%w@IJjOIog`ip#fd5C+J4VOhy_z>zB9?yDi4h-h4`k&@M)$M_*(Q9x}5}Rqn(w|OP8aUay8%zQ=Pv+@)B39slWn_yzI= zc*O0W_}h64Idkde5W{^}z}wi9Ul%L9$1v%~%GK?)xLlyy`Bu#MLEnK31trJ>x%2-g z0FeJj01TTb?SnzsFHC%f*3RVz(p1)3DVh_G!4JwIYMB$YIF*hUiX`$2zeEDVe6()= z-ar3olUJ1xoh0Pu`Vu0i(w|TvpPa0}MZNv3>Iy4VBD7m=8OVC9=q00PM@F_(PbV^7 zxmH+S?47nVcR?dfaF#qPC*<}H}I#26qSHfS&CVb!IMU#5vf3WA|2QJGw}^aw?+&7Gj3g&U(jpR0b(qr3CHAefb_6UJ?(seIFYO+4lw}@x>0am zKT!|Lx(yAl06|n5OKX^=Qr2MdP^90!?^};$*U{chQUW^p)9-tME?aW%<~iy*LYCOq z!^qwqYAL+H2b9 z)uSQ#!y^80DA^ozRm!lpu@p(tDRKAvt%0 zV6~$u6&5EG_EvX_0v$WtzGzbk!Q8^h3lYq@xr&N3+z-mYcX;y*U(@nvwFylqcADX-GfUR_2rbsb+R_F~fK& ze#r&b0k7g)Ssbe`o5Y06xLcmP?vZ16@@Wq5daT`30i4AUM9X{Hu4SD$lK}^H^ow`W z(NPlC43=CZuHK+T+`Sl$!cp@M^s1D%Q9taxH?MhX`)iN zqDn^{dD~S~>5@1fy<^Rh#-RL0>ROxD0`}8P-%hvmOw?53W8#v$(RGzGV}bIXDh}%J z@O33!#TSwe^C8?3^!)JS zrD^k8Yb$~*v2l5`5-P6T+Uxi)tHQEd(@va3VN{y@RK#VW+M)K0c~dU)ld1&yFd}Yc zr;pk!F$Z{9j)-$TSIcPt*L8lS;dzzMI05HCJLMMSdr1kaAZZETKGqgO zZ;l((<#4t}x4q!12W}lrGcm+7Ei>^I`TRlbCl0XMn~KFbn)8`cI^kJXAuQK^oiHhX zYg`%OM*JY^EWq)S4=vn+sUQ>@wNEaF>mQ>HkNlKwxu;D>{q*6W84zfg^KUm;1 zllS+}TRj+?^lL}BEm4cM9d!z|QJSL+W3rT%dUK-bXY=)fzSL&ar)Yd(=sJ-*b2r>` z1l65OlNTsNEpXPoEQqN2&wYR6?G{wqGu;QTI%n>m!6Sz4)?Vj=Y65^2w{0Zo9Af>- zE>xi;OZUV72btHenTAb3D$*6h(5QKkEl<~_XM>2tt(WUV~5^5g% zPXJEa?a6(fh!eTGLqph6B6a)`8;k&9CPZ4fMp*#vh40yf(@_J*+zpxMQx>$6$ncZq zy~O*|U*~56m&XDX=5*VL%HhwQJWE=X~@AnjMx%F$k!!YC+_-+`gFWO8Yi zSW;Tu79;;ElP|B3FTL-&dv+eSLe^YzRFid?DCi8A&^;d)+S?9!{#kNu{!X2Gpodg9MfT{~mSFtAQJ+?nP`^pkt(P8==>j0+uTO%#2nG^NkP5Mky-g*sAoyAE`r6ErB<5?D6{kUFHJv( zB#d-AX!VE}urmoQJ-citA3#VI#2qw}eY!ZuQ*~O)U*kc&Ye=1;ype7$yqpwv^%mOD znta(ibxq892h^)_%UGJ^7ApkNk}E?xe=4YoP~bv*sQs#s2aX;!>p0K|B>X#a^GlQgj)?f ze%f1ZA(+APbt8|HU;6k_Oa5TPQ_!Eq_U2?xBBp}({@)vr$-WKbjhD;lYe%$aZ>=P;d zdd}9bVr#$9+M*WihdSIB{{OWAR(uUq*^&gzA1+!SQjkD{eFz8s`u@;O$K~zse=tlQ zxF1O#&~xeYkMu*y4&{9`uonB&nCY0Q1;O+yU(U5VX`Gas$aN;ER5O~DgcUw!-$Utn zZ?l+@)c4>`$*y3J1ztXUczlmFYwF*K-c36aU6s+3eU4UM(Ox*Po6V$DsxO6r+4h<}}(L>)pU7 zcHD_CmPi7bisG9j&WwN)bOp@-+Bgj&I`hNo1{1=hH1FO_2pzogvEUGrg~3Gh;aQ#A z-%=05kX1#dT9eISxWC*iZY<23e{IdY=}Q2vR1egin7>C*cbyx5^?EBm$6m!Qup%~q zQTr!sk3w0MSdLk|uYmf-oFQoq_i;aPSCoF{*@d3L*oIo5w+hXAi~CT2SEn)#-Jtkd zD$ooDQTW2@JLe9DZibr63Bq_MQ@BNQrneHb5jw6JPiY$xoRM|>;JxoMi}i|vO9$5> z*CKoCMgQ@3{v+~sG!k-Lm_4&@tvckUnwh|qy&rfpPO#-33QgRTsC#Rbuj2=sM^d!;!? zRFiA*e>(Mf|Hda3KA&rx@(6d0|PFN`hMC&sPdbut1CyJi~sxP_WuSq` zC1Nq;9nLs8hiP}4I6kiX_g7xa16wr~zKg{zs<<4dT8GI3&%k0@bhwDGhVIi&0iV|2;46NJBBnmI*^!5&?T%)KD9XSud&!9H_bXRT?&+DS$Jtn z?hqH%-?qqMLQia7QWVniWKEGd%cm{+)MGg*Zebqsny&nb56O&aT3GpI{kZpUy)Vu& z&8#LCH)f-Cuj5q~oGIy*)+B1m!w|>yjR(j~wSV@wkWl_nNI}HYmN*YT9Irt&|4Efe za%v&c3MZ)#cr>PREOiJlIX>8bB#HXj@0<$*{Tq(}cj~V4>U?9*r|d(I*+Nt2jy#pO zArW-4#q%#~YPjX6>ib3|?m0@ffbg&Od^Mv2h>oD!L$>jJbX@;xZfXBu!An3OgG1Zf z^(3$9>D}*ki1f~nY)!zVZgcI4wi-a)ck;9Hp6mR%ruITs<-DH9MrsFF7ly|;d8Jjw z_N(3x-k<3rM1*}F#>a^jrKHTDur>8Mga0wa2`<0VhHiTgn)qJRfKCLyaCXUaOJnGtrO4JjpNIwj)244n=ExfS@rS`q``nNnt_^0X#&;=6F&8)XHzwKO2y!zv7-TkLfvo0f~2sEJRRd-9K*>o;@Ibf{iaH(`7=Omvc>Bz_$t@&c@vxMZ^GFrI;2 zIGn09YP4>MWMiG+?aAed{5SG0KKRa4rWoBdXS zQZwakA#h0vL29~s)bQ&EO_uq(Dm=8DW`(!W+}AnJ|MZ&K&IG7uFCIKZQAs1;SIsqL zG^C-5J>=y$p&d|CaCDJw|LND{zB5dGcDzr3%vhMndjGvK$?XkAmRIG%=k|WNb|PLK zSozR@&zg7~%utGLTu=qDE-;>nC)pj7=p3M%@Z%%dh?xm%(1uSPW4Sdq`<%{NhRF5C z(UjkIbabNl@u^kjo0eubsI=6#^bd(Y zhQDXs#aA1;^9!M)7^1HB8cwc0Qg$vD6tXPM4WWIN{V0hc5%gK1ZKnIyvQ;a+Uby(=Cl_slz^rl}bU^5I@{lqN0LoaedIhWNto3P{)vc&&|j6U*(f?=Kn z&IO!pw4*>GEar5K@3f;4Qa;>l86c3P&pj&3paIF(kYb8ut+n^pB+^;TN$xRm-d3TM zRe-U-@oPB$cZqgO<^M8nbTe-BmeAq%Izk@sLJS~S8+w4a$*0IgZZ_BgN0+t&#*bgG zo*BQ_4ZL1k?&>IHXDq#d>N(e>-_gKr3vRcBU*5Bxw!J3Tj$G5R=13nBPsNiMr>Jtl zjWECWCu|>#LDo{AmV=Zw(MO!OM?lGLh-$i?$#N+r;RvjJuJScd;+mC|8ZOIL$%~qd zDxlPCS5_6N%3^Lgnh+wDKC$L*+aUS$PRxe88~i>2#D^zdka{=xw+|ztx7bSJ5tJ32Sh(HjwX?FLK#EL&9VBt+0dn8GI+Ume<_d z%vWKqf@_Mtdm`!C7dn}b1k?O|{1ZhQt~-?Rs-6ey?DRUPv2b(@djbbSLn4^4u<56PFjTNyR{?R_*uqvy62k6Hx(7#X>xB zWOJtFvZ6InPpUd-Nk+6YX%+z=&3|o$%DCQ0rrM^?XPw^@O3k}qL)7h6maE!JntPW` z$R7{G;CxXHeXdM<6=1bqiertVlnPRRl{6`{o+fI!HJGbXx>O>M@Khe$-79^%xI&anH5Fe=TTP>t zX9(6RnHP-_Bh(>0E3#{LS&)9frU)%2mhzNenQ4}&mCX&pHnn6%+CVicQE05;&~UD6 zWd&IR(+pQN)T|aN^M4cDL^wz{8BjM9X`Rp2_JVlR6RR(yxz4^hC0!N2N@N*+Uzzllh`O===ub3gL_edTA* zT(V6&-17(N2@G|Zy2FiIjANiJZnKE&be*^0nSH)ZN;+f?(2ispW8Gf1BUX}}WE-Ga zt7kYWibAl}$kwjLcbn?hE5*MeIr#r&>tO9l#?$s$JxDpUaKP(Gg>peF6WaQ9LWr6F z(^mUW(9pI^TjQlb-2u_T|CDl&>#(Y|VS4NO5}qQFeZa%!a=>AptOac_WRLnb2b=GH zW)nzQzE|Avt3U`DQBB2ph>L3M9K=}{m8F41kJMMA8T{oRMhymM^%|tK2#Wg<-a;f*tb55>X1a9bMxy}rYqwvzseF z`P#Npv{lYg^pMV2Ueaq=AqeBCZ#yR%J z=ALo)Ze;oAU+3<@(_~DZb5~O58+eY3GjS zI$<{>2eKrPKYT~C(2ZfN!y!t8T<@BCxbn*8PR5_!wDRoO4PYokxomXuLTykfVN$9%9xpg8dg^ZL`fG)Fi1z zhQ}JwH7<(8mm}0<8X*`%Es8(NhT#n-$Yrk~m@h&MR@Q2BqD^v=oKYNxu=Lwc?B*cY zVsR_Db5R;ZTPi6N^e+eI;6Jc%T2XMOVrEkQVt!e#K|n**nkc+=9CD;$*_nwF07XiO zj9Bp}bPOY??oVrw8Z0HkOP!(UIVtmK6xLEuo}LI9H(Q{a&JP+;S81Zo88ga9U~ z$$CUA004IWY|EC}EXR6Ad-l5wH|D~7)giLe^SR5l&B4#7?dsV2$c5BygD^cQ6V!}8 zHV*#`>mWmNRh*Hr8Y_8{#Yr)%(yl>#LPTAH`bnJPDvCq_rmsAxAcgf4 zlxonvnMU|u@dR)h6I$ZEN}=?*US0 z_wqnKZ5_O`1)l|em34mX?5&$3ZK3@dMM50}bf~@k)A{5t`Sf(_NB0uKz8$|SIxVqG zjSI^^jp;M+XRL>8#Ry)8M2s%{n#BSg6eEpvUjsnWGN(Yy3wWroN!`3;1oszdH`?py zu}~}Ca zC}IUTPa2pZ?B=e?md?xKHyB|U#VG?@xl-{`p;DngU}NbuiFmYZn7NIX1snBRwZ!Ns zc$k+!@RefM;$_VvSy-10*Whq2MF9ok7WE37sSZ!eII%b}a-k(Llp)q-o}RG7F$N^M zsUJKHVl~ra#th(-p83Zjt^AT}wbUaFXtM?68bgq^Tn(p(_*>XBVLVqqGMRvwo{OJq zQH`k{hprPJwqW6`)yez`^G?f73jyWeS+~A zHm0Afk_?%ICZuKRB}ZU6RS>?{FRTi2nJZ;6@@*jkHna_IM7SGZIT`Mt3RKemLqBwm zJPg}fnsN>#tM5=yve=kW7V5j`gmu3YORI*Df6pmQNqvyZu`=_!pQMy8R_uHKHq zj;|NU4(`kaU|^5Y*GgQgsvzSvFz2TWw=5d8cQXAS(e6AvwF^qz@e?koZdrt=FYMNa z2=>OeA&fnOu?0jIdf*~wd+$?wyY+uMdGcZZ@ON;ah$PUV5`FA1yL9&Sd9aUDE&z~3 z?A2>e$DsSBsfLe3E#iV zqJK;URE$7NBJs&)6O&oxOJa{DCbJ^Zf+N{Wr%R00jv`txGz1wbP!q(rOMx=|X3v8f zL|5>efIv+mjG8FTM)|D1-{$ig80=CkaM;{9(IFAVk+Y(S;z=_8uuqPxR&*9UBU9;r zp6=OeU`Mrs_r^)cIBn%&HJS$)#VDq$s1uQ3T-Ei$^}@7Lt8ZH-_QJkAF&{wql$>yc zGZ@4At81xNyS5~AJTF8_i|*FWBvNLQZw|l>YRQoxna8v8>(wI*{6TbHfm0T;8^xJp zBuaB>ywWQpu}TaxTHjJq45iH8{4r=d0RsU;SjQL?8s?tRt1Q(}0w^j`7(VI+yF-{f zZqKoelcYKeM@!E+8u&zB+M2KlJE5u8sz_5ly-^gFej_yBjlPG~=L5T~*3Gl}D7RqXMv|Lwgal2Kz9&J-&iX zfzlXWUiq+s{0=a@R2qn<>W!OF5k*W3~5x1ed-M3-DVPV+j-IJzyB1rpk>Xf`^Pw2u*%WHEqb7D!s&9%>EBo)sW!HUQu&@W_D(EW6rF?);C>5TBz069gj5B7R zfGS2>Ohb;JIgI=Opf+={HbJL<^SN2|g*W#ZCc)o)0g`0n?I>{Rx!1pWO!`dLQgid8 z;IZ0&kS)F8QEBTM*D@QY8on=6Q2GWtEQH+~Es7w@bDm!qUKrjYRLpV!tjmbD&-aU% zs{Y?Hf$(49C|S(yYN}IaAa!`Ge|b(gu3*^3*kX9bA8+rKHbaCpJcEYGu1m-z;J(&o zN1S2bm>}4;APzMgij(@=&MqAZZM2&Y{fO&S&dU8pRP73QR5II4T8W_-t=9W?pBHMU zIE)T8AT%tb{Us#Nn5uQ4W=+;SWE2Y{TNNZL89GV# zt0=}uQ~oa3RpQEGXjGiK8{gZORudZJ%y}f-* z*Xlb2Md<9;J%={E{XW3vLPceMgny<)%U}9NVq5*+5) zMjmbhknU1USfFDi7=URAKKm2Kbgj_(qWFK4#-Q=k3Dci^hb&=#kxk=&xMx7qhsaM;vS5}DLV@P{V5Q{pb5l~M8A_xa=x zHT*0LD9;4?7UuN+=*J}p1{Zo%3hBs;?k|&b0M0ADJn*7PKBN#C0I1P$@z{Ox>eFgP zxV5+uZVt1D{z%~O-%lsC_@mdV zDDw#OK>6Swy?ZeQYO00*L>s(=L|fIwRgm34WyI}pE>PZh`z1YEiNeN&%hv23RC0s&m{j1DGWiJ+Qy>2X@SL6yxs-Xr@rlwx!{x98=R&&v(+mt$Rs zGvPG*wDX3s(|V;laYzb?dWf|>-|-4(%uGs*_j_?Pfz%kz8hB%wG2OXQ({i^6MKKg8 ze}vFVI0RW-=}}!P!uOH{2)&A=p+xSGHY6Cvd(T?ZnN*&WLvC9?36M`ol$*;xuY}1IBJ8j(PUoE-O7& zIJ#;Kl5p}x^i`R}((e62b!zme`*Gt`N<}hGKg2dXOt%tu{$Vr^R?JGtev-$)7!|=# zA;CMEJ}3{L?NE`HI)Sd1-KS9fzwJ9@(w8jCGlc6gjW=KYzcPl-2ZPZ6oNNwJ0H6PO zCWtAPK5!9=EKnFNJpbi+@_%?g@XLDD1L+4?7E0(E{45%&8d;VW+?Mh{`28cIpbha8 z00VdkIq%lr1KDM^|3XJR#-HCEd-PdO(luknrtD>gFLk(t23hT8{O2Wa6qP51KRGU#SeFu0MU zB=#C(K#~Zegm}3mRFNuGoYc-NKf%NX4>2oDxNV~Fhfvos=?0R;v|YP72fRe$=L-iE z;~sR>`rW!49|YgMja8>tyiNT@gW|RWni9aE4s7PXO?AazmH>7c|8i#v1T5sS0AF;RzIAM|1bj%J_!+poVgNE*qk z5Xa7#Y}%}izF4t>_IVk=2@{dbKt=WIuoZZos9jmj$`BMe28Jn)v@5$?XvrHf22A=& zyqNNBqofcUv6%H-Jh6&maj+_bN|M<^iw^|@u(DbP-8kU-zr`0hEP|rPl)A)=@bW`% z7qJDODzo{`OAv-!%^$Twn=jBKBu1w}{E69NTu**Sjl1sj9GDoa+wxy}|3-=D*XoPu zVcEfH0A{|(#B1BLQlU}imF;lR!ccgJtG9zFoQnryr29!;Gc)31)TSw0#gQE8{-A|u zfD~5!nL_>YK(NK{0%LdU2w&OzPAwfI zp*K_N9g6a((F=u)nnQ)WXiy6NmW*hzjyO4Gle5*@g~w?bgy`VEh{sdWBKCuUTA`WVR3golcr> zQP|y(up5AWH`j|Xsmsx%lEuhPa^>Mzd?}K?(N9Ej#~5Y9+&2*D&Opk9C7#6XWn>E=J)sD+=p)fI( z5;^}9z5krrNoV==3HV7)QcS~s58{Z2sE+^3scB~J@O`)#*M=NZIV$SUC!M+>c@@sFsl)Z z(RN$U!h&RhKMEDZw1ZPHmx3E$^!Ybkgea7)(E(8u`^~b9n12aIRH|`fc#&}P{NPLr z1>}TaG7N~(8z2hA(W(2GGq?>#a|Rry5jSrD*wR2M8op`r=>>0eQA-6|1>i23ERVop zO~7Z#CXX}C8Qmy-Q;t+dU&mO7^n+2c$cD@%1+T3~Q#5RPKko}HJ`9WOXefIOEGSH& z-rbAF>dC*Xs2Mn0c)`G9i0}wri#X3yMd&2f_xu^OVSL(Xs~6>hsK?`jSj%S^ssrB) zyBem3tWl=M5SaoEU}Tv~r#Y=Wvf3$?;zWldkHV{FinYi|OEjoBYty3Pc#%MNBQQ5w7I-`NJ0BbUH;_H3rAg;zrTDdH>BVcZH+-KaWj zGsMNvHZfpaQwX(}G9b)$j)&f}|D9xDol_z#g@S9ABz8f`H<)Is@YC11G#{et_&;<` z`oSnU>$1_zTzd+c^aL$k<=rd7ppmW7HpKda#b9uRLoPd^?qr%Mp5jWOcKA84e&N8EvO&bq@%>xW ze#p(W!@KbM(T)YJp%7J1-t<4@kgN6Au?%4OmDHhM`3)^NaC~3N>-Jr_!_${6No3^F zHQ41n|09rfW9rQ^Ztisa-`1i(MnfO8OeXoxZ~#ghFO%P#8x zivf~vIW$xoB4)5xX`QpN=Df z9B_`BahzY(OK>aFGKo3VO$tvs@bp;bi`$}w%u)+poFI%aZx}GVpg!lug~jc5T}L5C z9Bc-2XhEEm)2HJ=Ns*M(h+F!J5kqdRFH@X3X@lvT!%unng4S;aN*?yDcN>Y~@=y3T z6W@hzeZ`h#ce-J$4`9`Fl&JywvtfM;(faJ$6lrC3dx77e zLI9zh#;-_2GNu1;@M^W?-(c(+xYexcqFge>7}zdqgH@Lawo4d0At z-{HS1zmsnL)fRjfaD%{prNhw&Ia?~AGHyON^b0~11%H(6Y8s=l03&fIX>_m`5>Cuz z)w&K6jeN4)XuOT{xRVJ(0ig?a?bNkAAdUG~NGcBVGQWZEj!{7Owb94g!|Ne$x0s|( zM!3=2SnG*)?N+^CWrREuZA_3%p)dw%rNQ)Hl*{iEZPCQSY4G=8$EksUa1IRBu@Si9 z7BtLEHx18hJHdg;$ejVe31BQJ1Iku3W!;qxIupnmWf{78D9%NaQ}H zhL@c3(2xeVv{^IbsGq2;yE>1~AKQNJ1cm-q(Su@Pcr6RYAGW2i0cs|)G7w;_C{B-b zB@3k_ifT+}hkRID7asn=(Kz4=4^~Ul8$ajXDbq^Nt|%mnag;hJ2SJjWAV8mjvV1nE z?6gYI`127!Y<5%*X9Hzk-5N-9DL4K($?bK>s9Uc8==XIy1%~4$s;8Q`#jIe`F^)Xa zrTTWJ{ zxdr>7^`kCer!s@h^&HI~;=24S-_zwenZb0rs-s`@!*B1P*}2m}cT16h!0RReD>OBX z8!**?!O^WA6clBQ)gkMoYW6EQ3o6*ExgJTflnGifTr1q2Ay+9)4Y{5HB}!+~-~J{! z(OMA@0;nYGfLf6}kfrLkm_yA(0w9RX#>$Y5Do;J#&S-tt5q}7NL1qkxtdr_kzbC^X z|Gq{uMA0p*PcyflU#%=tQ?UarRWQX5RpoE1R+bg>D_IR@PlV zzLR#pWCUg{!t;NI?|!;#XzMa~eExm;c=Fq*RP?--JbEQE;lziyL39>bHa5;NL~?z# zD00FZ2R0d%%1^CO&Y4=y{O6M$ne@Gpgqn3w59KnTPG!|{ zDM)S04d3o!_B)gPA5?4ws>w!$-qhw>3O0=XzW7JdmDAGu!6j-f)F z4|h@hz)y`C#nsZYe;ok3AkDyX^aK~duZ_qLOyWF@f^@CsX=@N^;<+(4w0juT6R=^h z!7A$ray$>w=Eqo~tp#-gUXl@Lykhr{pyo%#`l69Gjk!&XkY;Rd9*T3N9+50Dhrg*b zV6WnKZtPWnEz!@XFzoo3n@=e2vDd~Oi7qN!9UM1I7QbZtRY zuouLuEQ87$pj#DyQ;@pbOw{xGS~I zE<+nhsA^!P!ue%7fWuckghXDrlJQ!3bDZ=GmB=HE1Jyby`<8N2K+F=Xme%Hrm~(h% zlYjEQgd?##Q^@}6A=dVL+zsPBum+j(v~YcuD0p$@p<2@Q*fAmW6ig$02;Y?Uqad2_ z|A+2h889#*j@Tbso1MPHkQF|pv(RGgCtE1wVX(#WzN}{^dC?>+B&2G4u(Pu>4h(e8 zBBuZixjo|hhJO{D*E0YFMFA^}@B+odW4nDeeMq(|;`Y`b`Y`>_QFM_vyLr;1`w z)VxFo_ZPM!W-!lM1&)ctg^q7qug}bn|33QA_hqtDs4j>1qH14$1qYysT!Tt7mMoJW zIKRpYcRfK3dxFT`a9|ThPLs@ofTviPyYa5y94N^j;8Hkw!7wsSypvgY@Wt%(aU(2? zfL?kp4dyXLV~#qcQuE*zN{n0Y-p}`+d6BcWDVSEM$L+z zazJEwu*m1p{}O3%C41OgFRom|JCQA_&ah>=L9h!{J^om*Q0m37ra?(&GJr@3dHqTtubPWrO>@vrRF1&#B%Cc(>3E6ZB=6EE)6i z8>}{GLLjSgk{pigK@@Zsr5aZO!7|mOz*R%ktoqI*{(zDdJsdp}V;`jxtyh$2pXzoV zlhMkU=UWo`DZ=J$e#1ZFuA~P+mqU^Y9Aored*giQJvr@A91`xEzs)ege?`lCa;0Ms zaQPvkbrL$1Jpofvzi&^)kozl?;3Psi2J!fsS56mJca60)YNv;4x&8Lq`>NmdsQcsD z_kAt!L#X@x&JnS2G5pBTI$`U2fvtW3CUx-kpMYASQdnu*Y%r>2p)-J3`X>)pBU)Fm zV-)H@s;RcT2A~%%3@l$UVw|*{M6w4M6z7v^gse(bh*r=BsB_T1=gVNEp96=tMMpZu z_M`h?zRN1WVybK>;$qXWz`2>A6k2*BBG(So1>!)<8}sun@(jChV=G9-qaY`GIT!G+dqH)lld*!-nIMnBh$GkYRh?*HGgH|yKrP}sS|apJXG*7!C@ zEeyNE@CnCZJ?w1*F=6da1t$PkS~;8WVw8&Hop;le5jf#_6n8|xjKLyHQTGXyk^pe> z@4q0oCUyHVTcx!6`HgC1ZikeYE&>}5m|Q!`v5rOoeFoxDho%N_+M!WT7JYG=U|CnM zDozNt5tCYxLAp#xT7-#O!roqLo}5eO{!ZY!F4L+@6xDOUa(SyqGV&ZH3nhwd*2*ER ze2}oY@}P8i3~q$}#c?G40*90=F2RxZAf4a*e&cd*e4GV&>)$|Te!aHYR-tFF(71g- z)EV-V5cqWB;*_q?bC2e82l~&`v~fM%r7`QeTqRuGoo3YfHBsnw*!Wya3k~=gL~>mt zna=Q(-36{x*bFQKh3Qd#d1P0<07&ynb$MS;72;`?6F-Yp9YMI06XpczoQ8&!p#p!3 zgr=kTB&L|vmvmkUsUVqv;ca_zC^~i$*C|iI9y!B>U|BNx4aCTCk0Pu5RD&`sSTbqM zqSWwJMtfbIDddXVELUIS8R-S-q>#cmlMX?n$D&u!8S$Ffd6HX*N=TfRAXy*s6>uzb1ZSvM{EPEcAd**vq2v}8d+jFxpBNYY987JHB{t=UM($1}drj73l zv0==RBy5E&*RSHn=~5Honq8&Z^h{3En8NG(bb4&SswbbMM+|b34J2;$n%ZaAT7s)8ciS3xy9aQs=$6?49o<;m}AfnRe@q1Y6A|6 z45#TqbnxgpxlUTSNG_CoVFK?ZyWQ`QZbWiOnB!gBQLnB4;a%BO0!5&pta$R=TULQc zLV^7D5KLFFU($yN&pYqPgC(;gsN!!g4@8@g2~Y-i2w~% z$J-Bcz7LYuYr)skN3NSI?1tB6rlUp2B$EJ=8@7mFw|#5&9c_WPEQdVgZ^uTvnIZ2a z0k4nB@lV5!@AEao?ZdWz$$vGlVSYygdpcrHjjKMhHGCY4yiL8{ihMK>{uQB@-1qSD zVF-sf0uNkvAO=$ZDG3n>VSxtuyFg5(dbTZ#U62~v;H)NlVr6Az$Un%$B3G$`E%a^l zaq8nGaEJF}$`OD-Y@B19v;Er`#)dTYbL9eko%$zKv9FFx^?g5jG{fscVac;^ zb$aWrn%O$e;*aW@e0yO_iDq5E@dN7$6c6gmPLX7jJp>@U$$r^`mj~aSui8q*kH3vS z82=g}yfF=rD?P%$B|B_IcbYCe#GMM9J?A%KQ-(LgmWk;?GxRz+Od5!WsZpaZ=0u)F z6F-Xj?em~$I=m=J!O@4YwAKyiG*!C&e*gkO{k|D&%O|$>#I=1Q^h$H%T^HVUcpS=k z2t^x3D@7}!mEMAvHh5mrJ*}N(Yn)#e&X<+*%Ss!$-Y|+`)v#h|QLYG5fGS7{RGjKN zC3cGMs2c*rl(oyld==lF*gUU$02TBE^#LhoC}*?RFL zD2kVYSHW9gC&SJII~m>#y+C<_@@ZbL(AJ=x)76uMG{RSGobhqO`kMQ3wiDI~+Yt%0 z3VN#O+9;=rG~tkUi5Pg~A!mJqzRSFVRnJ2Kmb}t}DupI>x&m(a*cs!BjScHNbV2)q z?h9pKD6TXOTI=a*r&PnVV5Q_$3Pz!aGW_*gTc<69w(PXlDeCwTY*TD6j6K)eL<3Pr z)Il9rXBVf7vy0&(R9RR#agq}&Ct9pn53<JW%TcDh;<_ z^gIY@^o|!L&L`sGM4ZpWX@!>3GymKF^3VL@{P(k-{br};jywK(NPz$cH`F^NdJZn( zXvCbM0+myq+l=V-_2c(MMC7|UB)mR25aeRZ@#we@asyf< z2Xe@49+CU&?Puu+h{T+DzGg)Daw_;HF9IC-ym`;Ge-6Pd#-BudUf#EJ?avUwEl+Qg zJiOmLzQs4|B(KNs>0mya@BKu$n9pLm+vmT9neSy}d{$TF&s)^TRdVw-ec+}fAj1F4 z-rGG%k{bzrKX(8TnOR-kJtXILDZ4w}$t0QS;Y<%Q{r`W19;DUn+O5n^d$;6pKDxT9 zGBW~zyB-{onO)t(9j+Z$vRY4(~D)0Yr(L1(KUZ_4%J(LE_i+`6Xp|+;^tRBo3WuZfF-;a=rdw@lchm=M%sk7 zf}4Ul+7n_K*^Ztau1W-n?e@K^AjJ@9ae$2xxV7Qt;%|K0-kccJPK=40=QtME;WmYV zEMa9}($1vDq!#9$laQ2)l0XPp66M-*yv)m2c!fW0aCdr#wZeIUbro8R16PxnC!gGK z?>-g;Kn=QFgqg&^$8D(>g@J9Y8`JHAk2VJ&ApjDBSAwYEl877ahhQKEBxJbQ=52}e z@jVi}2-aJN-a`?lWn*8{@@VoDi{Pw$N z{^9@qNB+xy{g!XO`9OC^Q>HUl1QK^*sOO&iqXww494adi$s z2Mmm$2~pyP+}s0)kEobL8MaM^JFzIosv6iv3DH!&1aW7eN*~r^9N*Fn1Ql)49g>ga z0(XX_c|2Vbu|J5lSpaJX>iU*d-a#u^%{=$Q$ExHSg59_v`W0t=$C3}6^PZ$LP!Lzt z0|!Q%1uhhKM8c9y;QbatectHaLs`Iwg@e9cEDNmbO06rcHNsj%w2O)m7Lf)n9e;(` zyLhjxt#Mu|&!?5A=at8&m8a**^RclkomPc57~AgA;y!Rwz~~pQjq2lL>(1FiF{L)) zT$O1_p){}-RE4!U=QZ{QS)sSYx;9j0bYP3UksIb?&w>U193?PymI9{TMlcu)oP=;y5vF}Hrs5VS~{CdxFCb0Ot|rp)Pdq_xJ=$48danOZCN z@7{5Dch7FOkG}TUZwl8jzri+FVr%D^Ic==F-JH#G6sj>k4ifcpe*Fpo9Q^ViWX-iBJ&85`MG5EW*r< z0)SkV0K&R}pa3Ey2?6I#`9N=-K+phO<1qXL)FTWY!&M^5c#g%q6XpJ6!ged<+qZ8y z9%EcQK0b0d9A1?MUg4hwS~I&%X<$1gOyTg&FR^3ls4_5O?bbfi%`p%Iv46B zwA?XoaMEyRg&MpOnG&DVL$JXh+(Mx9< zi%Hf{jIfmGQ=#oL)x+wZ^^Em#xlPZgG(4@y)JgkJ_J-aGcDKUa89gMHU7`9+m3J*3Kklg9+EEw4!btm5NwBV175ZxQ zqtjNyOhFPoO=uP*Wh^P@#!YFK{N&l7s+( zf~zB9I2C*WKZ7qgN>4j#E_ljV+L2O0bz!e7d#$)ux-=>_l-!g`rb}j##43T?RA5hB zDRPJM#ue|v+MTl*=U%Bz@Wn{SBk6b~pH2uByAyhu=y#c33brq3&dfO@DTn1p&WN88 zGsI4aJ)q4=H53M{?eRt7)R1$-&W-il5bKn+!_rB|N-2Q@bM54{F|Cc(oj8G*a4DDv zh7GP)>=F&9X~)!2X=JU;x}bX}zY+8<<8MkR4Y;7^hAf?Q?j$#;6{`z=66B=RW5FKp z@uz$2@qnH8*kp7$(r3rK(Gyf+|4soANf1K67k(_vk9dCy@_89<6|p5euD@^Lv-Cd; z@tKb>HkqIJy-Y|V+~v(L#e5m|WgWlN7GI*ZUi!Rz-zs-oiufy^_)C z(w*&`B&9!mpZoWJ`}h2(zx{jMpq+*KEYxSE9fh_CWw$WRE7NYJ%nL=FWTDKTdpyrc zc`Wdlg{MiV$q=dR{LIY4&KJam)$cglJ!ikC*@4^(U-cfYHSW!5^ThLE&+~q`Df7G@ zIWA8;FHanoC-%DX)ztY{d*iS6#xEwPwTTbMJwH6}`QfPyA07*naRG&lUrB01?s?^f3 z$)E&ZNG3tDleJR%iDE}eKau+yXIY+c&zL3L6gzhMbC2iAz-J`i!}Ja*?~v&oX*Tli zOxm4E^McDtU5(R|b9#>F(qt(9Cwd8`^}dICzf#|>)cZ=`RnkW%e+%VzP`(qUkFa~= z?A{sk+s^LZKzCSsLtF2u%UjxVhgE}m!s{8UCsaB*EBVb#z7Hp};;a}8?hD;kYVDk# z;d~TM&u~6Ln>&4P^g}$^IkOgaIiGm`A)gN9?s1yj$2E&g}LPH*2FWN#|VKXVztjzI=O!zkiFpe~aJ0!RHAT;eY?@fBmX7@CvW+Q(-&2H#5vS z)+=I-WWgX*cY;l@9eG9g_}Aia+8qQo^W_ot1q)$2Wdt@V%m;C_eQ>|hSQkMr2Yh2x zjHgEuKAxXNhA1k;ca7{}JR_-yI?9g3eQaXwv0M{pfZ!Xp$06jeYB6Txp5)>tpL8F!ugAd$Xr zWeb5t>ki!s1qUB%>t$8NN1Z}AD`vD{ppovDdenR11qk_kgALy%i5r8nWD23Z9D-ws zq9Ld~mziDJF;6pdnJAu7G}aLxWf?gN5I4~pW`>VF#}y2r!R3bC2)Bi0i#O7ci@%3c z=-M&smo^k+C_2PVXWfh#C&t?2w!hlq*5dC!Jn;|z_&xvmzkJVczxjc0zWIR<4^Q;& zxCp8f_xHcy?Yn#Oa>v2;?3vh=984DXp&W5jW6@|^iJo>3rm08>FsfLrt-=@ASQSB2 zB9TcYqd6l;UEVN>^NW?=wu6_p-}7_ zdl%=@AH8h~0+nW!)mEyltZiXk&#ddhvaYP_LT{l!s9ejR25uDf?@R)OQWQF81H1VcQ=Cq9%0aN;IihtYMOp(!i~cA?wB#Aym$oU#3bEa*@g z=*d|m6k>XdTWebF^cuq5?jw&9LlhMhL0pFe#+-`UI=4y+mzKHAI|Bo8M@3@WZ4S3X z8*n0D!$9D4K9NM(l^MZsHALb@qnrv^Q@E6wrir`#f!(wxOU9a^2w5{plyzD8?%VHJ zP7BLgBM%yC=^(=KUSt{qDCsk3+;|Sy+Da z8&1zhzWQb3t6zn+r%SWkrtJW+zm2YS%$?edr^ja=A0JtlmBaqPZhxTcW@IXm;|A;I zu^p@g@27bP{;&8&L0}cRSQWT@^I=thP%P}793POpj9*Mv2r6=Nx#t{u&lo-1f0Nu!9d=7}^-B%4Ugh|aKr)f8p* z2}Q8fVFnc=OGEDre*{)aZ{LYXT`_`-JQ=?1Y|_Rh_CHPXx(|%n3at zrZ<`WyUcTrt-`rGA6Izlu}1F1*(vPQgRhNI+$rXy4uaBqqFcti;GPjlu?EvYER-1a zgrp-qJ%c<`wX&`|stT(XDnnTyJ6?e7NH)|PVk=?`*a`Q9NeT;$s#u!HIx(frq?MA4 zq;U`Bky!iPhX{M~MEEfB%2}NDmyYklJ3}Njswa!#4>+0CKk)EH!9iKrOO_jCo zaAtY|+bJcLu+*4la!SNy5s6UJU2uQKmW2Bb+Jg6r)r_qjTMb){z8bA}dIet%$&H)> zpK9-<-k7Y@-D#R=Dm0Hv*oetvtjpI?n$;ZFzte;8e#&Dl@k zpZNS|37hi3ZnxubIB+~3FYbEI_j9cIE4;$*2Qylh|R zAI=#3X&gMxXM#WXsgvbp($Y;+@-Xvbp$;MZiw(F=@m?ns=wT!UVckehee{I)WF0$U?|4qK_VbF)jo zSd>HjI@BV3fU})bYSew3-*V$FVP8DQH-}N?5&W}qGtwIAOT-c4_ZEB`o8r+&dhub4 zFn(;wU4LJ|1uW8t=Gw*vj95|7@!a}S-LY}DcJ1Gv54ve;a~ozPk=xMkdO_?d!bq-W^q zy0hrv7ybg~h$-SJ93g}=8uaR1h#55Me(6`?({}WjYXXdU7vU^|i4&&k{St5M3c*o~ zq@AQj*03^AQfEqjbwhEWD4{$ca-AO#^}l(2g;)4fh7c}yS~Hf_Sr(&J=so=1I&n*V zhu#PwJ{yZo415vrI8_?AiRsvV6NH&w^NVRJ;4kO#_DfmakK6G=VhTv$Gx$D! zUsi=W+=jpsp?svVK$?WmUx$#~4krrQC3KS{lJm5!3lg|y%R&SsL*LdrE!J!QV0Y9d8bM_Pae3 z;HJ1MMjAqUV;u*CAlx;j5NH&}5DFruTUZ(}h_Pw023rz(PSl=QR^hya5Y;AV)kZs6 zcbWv7MAc(VWJpQKB~g>%q8J?vqziOHxXU_fkUFS1hM+EZ7kX3L>cb5oMt3NJiv@3? z17(ay9hklWRbTe7(Xeyu!~1t_M#+vu-!(Y==^~V;*Kep$2~LiuV=ziu5(mcUPnj zg@M#VKtIPCYO06|j*Rsy@J|$A{4jwofgi@EBK8*!$wMH?$2ae>u=hc*L_!4pupxNj z>55wpMS+cT8&W#33PM79MlDBu&7m;z7U+(=_k&qt;^l1lVw6OxaRK6y&T5BASN5235 zd)~i)|3|q6?-hQ&kkr{v!AD5N$q6PxXEL{#8}r(k*UoNfcvW&W=3be5XYS5qLQ_Mu zfp$oOt3$4ITQMqC+l3ZPo=_{Oh0w37le2TD&YTi0!%C(06YHuhC&m2)eZ{Kbli(!E zNy%p@)ydW=9rBR3=rDLtpiP)sSOQ2XF-;Td1=m7PQHJD=ohfv|8Wa_>Wje}HGF_c> zDlDb2cp+}14}N#GP^hQ|qL60DMM(z@q5HifZ=lRZO0bD*GuBB1OpGimNtW09~NHl4?P8M#NzV<%hzp!Xn5Sw1*PsW*LH{p^23YO`V)z&fo@9 zcZwK0tCZEyQ^TJY{FsnuMNf%zPUI>`HgYnucan6ncc$K%x-+2^3WCsG>1K3q7!Fcn z-A&PUk_^euP*y00Cq;TDt;)12`xRI{P-u5#@MTgrW?)aI_#Pz_e7YUxV@;Hbh|^QU zlHo}RZBLDKwmp7#)HB69jv?l#3lpK}+GS$FL15KHO-}PlC+HUOifBW!#isBLqytPd zq$HRK;uiFag#|64jlDny-+{k@c8A*@mkBGQAA3SuLF){~D5*u7MOm|QNJ`_2+;*xMP#EhJrjG3<4e&h2ww z`ONcWn2lbzzP|)vEGl0H|2&VE@bdG^clf*~2( z-EPO--Q7Ry_Wze)78VA6_~D0(^~jgsYpu-l{E7ch0mtaxaTmHsSe-9M>p~NdnIQIO z!RAC4qjgB#NX`t9|Y2@0_R-p$@^Jc^)F@kv_E}j`avV8H`-5Ceb$*x-ls!Luoz$RenF z%t1g$+o%{O7TcvRxC?Gd9%b3+0!xCK02y>fNNWB+o{#CzZ~ zt7u=)Zb}MLbbp2Tb4zOpq?4>>9$@Io1gENM2lU^B}Ia1pys-5a}}G_7>5v zK{}$2_%ph`qpW+%G*fhD)`>SA-q+4oEd;&X;9W1gU-!JXx4i53-1mFlv^yqAsC0z& zf(Mj_Sx2OSbRd39NRks)1stb?EjVW`N(1h=H;@{HZ)%7)=qtzyQXy&|Pme~ATl@u8 z#a-#HtgUlAH;%^>-+x&7?!(IW4-1b^&pbap@x#L--~8~%H$R*?o*JjKu(pg11(;$2oO76CxoGmP%i9v4~DDtjJF20;o9&V>n4f82#l0Alpo-ibtpav5|T=T^9tx# zXVuX}ticX64;-N*MF%#z;Fiva4GIvnrdv41eq>W76R{Ai8^Q&yj;KbtyN62>MMi*N zyhghic*3mU6uQjxv96*S(FQ5e$aE=8E%9{jd}wEChg~(!b3uHeaK>srIIz_M^U9XXwzS(Y=kR%)$?LP|nP3frNIfN1Q890w2j@Vq$BOXK6m zCmtRid3bo_>B%^rjAb!ux1bF@+W0zBd^leG{t`~vzFl9V1s(4(4!dv}7u()PfZn4` zBG;Sz9L$3uTa^o=)>S}>K2(W4W^^5r6;+S=6W@(c4s-|WhBw1TnQ%ZP6c2zz)J|aF zd&iv^Ph(v5ex2U_xHRRK=Xj-CXQ^i-L-DXG7{!Tw$mSxl zqMFD#EDy|OrsRT(QfuXWKGRyeSQ&VDcwku;9v&We_wF5U-@awP-&3YIYKfls^MvKrfA~rSHC2ciSl8`K{Nr!G;s5-bzv2J+yZ=N+`S1SC|H%LJzy1gQ=l|_L@UQ;+e~rvDJtZo` zngC%zVDUi>`B0_ySQbNlIF3~e@WC_`QIbHB5P;3MPZ>NBgo2Za_u!K#38zR{8Q9_M z2%*iQLUEX6iyJFrOgTZfmK2CN>nJ|HaasNaCoX!ph82W33~}@(+?L)3vD`#x7&csyP#P<;FCx4e1t=2dCn75*SlH4eKF1k$YJk|-KN->D^_Fz zV`FzruqdfHJ8#VNP%uD(Um=a4G~-r5&y8LhW{tRHpa34T)hEPfiU@P^aNR~49T%)o2oE1n5r?EhZ1~E(41O<=9md$iOCAdX0#;AVJ6EC&oezw z2txM8-V$>Yya}p_WQpeRB*Ial=1lWMl}wdHlT5WlZ9-jy)f^GO9E_)&(2~eIkeyL> zAjwGL2*Cqy?y;9}BeRkvge;Oak~n!%%Bm%Y!cwvO3RF^VSn5~?lN5XO>ag~3wUbjuawjKaN{$Fjt_Yci za}veHsL83~tXgTwAT>N!eCqf*TP4iq&ZwL)q}YEZh7vf{I{&I!+f1AVD*uDFNtKub=|$~r@xJL}{j?7xOph3*A$ zBXuXMlWi#4Swd?<7f7j%OD2?lgnop266_3CW8Y!P@!5cmu#ohKB*O6q5DNIAyLEBt)m%iVdivHFoc*2UXz zUp|LxI3WVR@c&U5AwDagaHS15AGS^67vcKl_k?-z>1CYT&z}Yr@SDBpD96j3F7LNt zKMQpWW3T)IzU22QeA*!|!wY;;hL_*3@Z(>89|%5*@Xw16eAefm*5(di>~=d&rxVMvP-`9h+gEsnzYK79 zT5CK$K9X~0x7(qrFN6JL=)H43pZWOl<3(ZM!aE&cW~}RqyYq*hnVJu!0pbk1W~a`= zDoUHcCZkV6o0U9gGM&^7ZHA_JwhZee_yjUR%E7xU?i5!hCd3ln3au--IVKIa&Z=jg z)0ysPp1p$3bnU^XUcviJ=0uVc`+UOliAgF4G4=wf6MSqFu!$s{k`2?aP$uGtSyDfl3&6IqnRgyu$@fu2GJz>j!c zKr=EWWKJ+8yrej@NgzoO4UC?X%7sVNTTUWYO`nZsw^-QxB z+8M1YQ|h>fpnMvaUo}gto&>c93;+Ni07*naR8h%P%}6R}$+ToN7qr5zJF*(nv9mii zrt^x{1kEs%?$FGs7J}YtO6mz|L0ei%tloi+(+SH38rB!Bka|Havs$I;N-dSD6_HF% zi76|Rg_Jyuaw~EE;z~7P>B_Mir=}dM)4HNLqf$tDMsp@*C*ycE{M3=PGo4}X&OXDu zi%WR>4*D#tS?KDBD@xFnStYC)8@L4x>?tEc9P=rNgphpfxkBc|f|=cFVcAw7{y5eLH|1)s%GNCg!~vLLEq^|w{@+>x$`2Lk3b+YY(>3g7MP7UD{Rei61PFG036Jo0u6;%cKIGzgA> zn?(Gk4C3*6DT?6E;N^@B()b=r^iAbnl<7-FyufGSKW(8e!i$aDXYewK+w`}3%dH#j zF`P&U@}Zj_hzy3oC*ekW4Civq_V)QnY32He-VHL^A<`XfIf7lny&MVdTiO!B{SM}# zch?XaabVmn-aCq+garYkyJEh*r$As1-8O&4Jo+3@jHBHcN3OUD-eqhU8RIK{$H@Z& zBaWv@2<2{#WP&(^$)Kp462CVyLR&oa3EVDt&m4=T}WP8>dqAe#V)Sp;@L#3N6M z#*4#hANxp;Lm)M>ZdL`HoI+usWT6bjfz73D9(l|&Gec5((NnMR3V%A_9*3dboz^XG zlCRFPI@TTBP&(!fv5J^Ncf45~ayBQ_kR{eXWe|Wd=R_q4vu)+tq&qz3e-Hu0I&tfq znCw2HCLILcn8RAY_P8YQIL-tk#95S!FuWv?Y52#1W>^^jsRC6mN&~TKM84u67InEW zE!jXp3c?+3zd)peICw$U1F{x$4Pr1!VQ!VXHSx9u5o_kWvBbU2+}j;*>zr4oTH$`D?4`2vjr*{5zvG0Ty9JTwv5zq`I>xe|F_bPslOP1U z8H;v~+Ig5dACmDQ#q&v>1-}SVbs59Vo$Rnr!ke8^rUa^(C?u!4P@4>4e!)F{ce4>c z#!ZJ9N6pa+tOVnwf{&3DV+*u_R=G-HX$+^hksz7^^SGg*OIXAhb4LiABOdY8h3%&3 ztcfWV=3IudKv)`>^TeDcrkp8hvofG;w_h{L?oqs$6R+?JukaTIw*tlS8$b}zaN{4y zSgW0j`Bd}3)4k)WSZ^WVo+@aKH9UwEJW0?>pE2|uL@sVS7(+GICy7N{G?q6}Pk_Iz-nkNps z1M@uN<}Ay?`FxH;=rxp1T5F$-vHSab?(Xi`?RS)1qD}vdVH;gO8h?Au0yDx+U{5FwWSpcU{Sh~g+CcCayEhPHkV}}$xOO9MlG6F&(|7!*z~`N4zb>9~*McynXwY!{Kn5Gao;GnrJ6zC+H`^ z77x1D6PbcCqq~f-L(2@ineHs!)q`I2{~z@iWVU+Q zmF;D^x~r=yGb7?05`eoN96S*jnOT<}ZO?R1cqW2}AP9m0=x}%V#n4lMN;-xqBb~%} zAQOl7NIcqb3KxppVRDbz1=B%YD$80~ZVF|sw7zg@3!^RUtTa!# zB{mi|7J_vcr@e5q7nWAg0Ye6ZPM5)8FeWWA+!+j<0XG9~I`lwJ1r4ZM!8WCAuT#2L5@tb(9dv1Q?eyr3(xEnd?f4k@AlIssVy}d9613zhv3anKPV>(0L3l@TGz!Ko z8j8yeLvL|i>7{@f#e>=^wF^r#N`O?wPsV;@w5ot3HibiDC%n;Y(5kbo&VJ+Ur^!b} z!MhSsP={57>QDntA(R2y725)n4(hb7Y!6C%Q1-LZHG|icC9il~{Ct@w5PbU3PvG$#e;GWj zF9+oL6FxeRkDKs?A^g)Jo;~1~!q2|)ukDwQ;rZP{@>BoA;MoH|deXIfeyX2->e{F9 zY3b*0{Evh`sk-=x@4nvu(ebh3Ag2%VSj2z*L;oT{M5wi1)4Hx~+s4Dg1KXBzzZc_y z7x-z=S6-I90mh-cX!;~-F?JW-H;pD;UgL3XH=SIC4wktHpDGV3J5L#Ef zTRM*D1#Jb2f)$21Lk3+2LH2naE6Z4^BaH`|Y;0=mi?SoA7FI18ZNNY*K!ahVV@5K9 z;>Cl9p*(cwp}}yadSQ`@+KSeSlpOzPWA0ORY7b5x+?aE71Ir5M3Vl(w3J;P>{#A@s za<8kZAgZ_-8ir#aj+ET*QyEX>$(_SBPO`d!V3b%8PL!y`FbLAIS=-aG(gMpVqvKWr zRiQ^3C%7uT4BQpEA*CR-V&)7>gC1YNEBNwgG|)_FI_NQ-hzTelq%lP5lBWDWnEhK6 z$8f8u8`2zUf(@Z`0Z~M9h!+E&vX1;GC=T>|j?AQhR37x=YsEQ;<9F%Sk4Y3Y>cbzDF4l!`k_(pO#4CnJZ;jo!r6i&|9U;2@>J~by#1ep zkDB=rWW`_JfX?ejlb>Jt_{VWBGtG;~b(zc^^ao_hT*UiP-r&e17LVVz$Q*Q^6F5 zfjq^aK1(|=K>EB*NYBKD$zo4@#WWlUXhD=9G4g?Xvp>WjRG(nv`HmvwqOc-zOFq*H zcDxnF2BRJfZa%}dptMN5>QGS@DkoW4Sy-a7cp(JBosd4)0wFuwNj1JBP1-{8H{pag z(nH|R)o1_!2l3n-j7Nn^G19_b1Q7@cG#9&kPqHohzVmPi-o1a|`|p3?yYIf^cfa5G zhu=4T|6Sw5-2?ac_dIMH53TcXRUU-vVi>UZfj?Z>+Tb75x$%bVJLhxbvN^wbU3t57 zUfYIhkj4waGl8oQpVJ z$jwhkML~k9aH_)X$#`>HsB6Kcu!XUY^h$X2TN<&*RO%A~a7lhFavC8cQn1qk?}3k; zTLHKVkZX|-$)(9iMTdx-2hLneNcwUzeTQM4ARr)`YfP?iLJ^~CVbQ{33+vTrU}3e& zQYwp87Aq{4%58@tDw6MV2)=S0e~SKkffsmzzg9>*kgH=xpLtv}kj%dx34*7z!btCv z-3aZ3CBBY`C+ogGuJ2hXNg$BtYV-Nb^*nN24oJg95S0{cMj&pWaXp74UlMOV;hRG8 zUE(Rui^S1Mj|8bu?SwiRZGen=Br-{l$m;SE`S-w;flPc4LQwE1>8;&`CX;W7q-|s< zmw*qwBV+JTI(6H*DFrPJ)5c~{N=AtB-tc`RdP77}mE`Ao8Vx+0A9%Qb;QryB^Gt2u zQ)zt+mbI|1m366D85B!T^VNc3bFGTtymj8a-}&Q@8{hx%o*#aE&%5_`JnV(bR%uNc zp6gWpdFveEFw(ybhi1Z_i3*@ z<jPHoBId%|3_#QTZq#M;nqZ8e=egD*ugfAz0eU%$bYl@Ocb6FATiVqN*W z|K)$>-+up&_aE;0fA+zTKb-lG|M9CA=ieZ&PHX~ag$T7UXpzu9FZVJgveVn|4GSN4Ih6wPmk9}{n{6$qywdWEXNYlYj+Vhqb#BcW;wTNqAco@% zTqd1IprW9Kn=&|+!A)^)inBFgI}77Orhb9qf{9Y4u*!<`LfDF*R-!L>R5SvM!pV)( zP;@A5zz45fqH-^lvlf)3QIkFdcVQHxtI!0tNZPS1P+lqJE2G>x#d6cQxO0lgwb6qX z>13cq5YZ`IF!_M$k0|$8>8ZSMT3K#Z>di{oPhf)~p57B%jz@PR#*&Tz_J9qzC~*P0 zfC{<@oCHe#jwRCDUyU?qw7^V~t}P$|hO>C1Y&$Y`#Gg>_d>XXUgNw8PLr7pKckr_rO)y&B1&{6?uEDu;y#dvfxaH->r_Tj%QenJ z=_sATMlht|>bP_Uoyb*^gP4l|Owk!FqOQtfI@g>0E}|t}?PE*_69wT0jfE&HUQY~L z8MPqAlD2C?X~wz~6o|#>w}p0F7^@NyXl!WTk-fs$X}Yu3oo#XUle3>XJ}R*bA!#&F zT`4XQ>F8obs6MbBm5bJ;%)j|1|#;sT$v_w&R5xx3+$n?F}9KHsOG%lUQiDHi%QH44v1Waj*2rJp|G zdGUm&y*Pgh5c0@#J`N3^Os1cNXCw1b1Ab~ee)<_d07*f%zJ2xc@c5RW?*(`)GpE7F znPk7`wWIHaAd(ch7Jd5nUltsO1Fv4a;&eLY9^H&~mrf&6q4xz|;I9=Tf_>k)ySuwO zXfU(q#eNbZ(qYOW<7}-xFLDKUr}yN~e=TIBf#gHFXc`TevJ*5>wC4VEUlgmFvasHf z=1A*^?nrTT1vzCZMRO0>g(V70EJ!c78eKG_lQC#Cx}529PnSCeXG-5#8mwJeyQVSt zDWeSBlm~8a&Xnk>Zm}y2C3a&(A#|WRD8XOXG&EQ-zj{wBG9-vx$5 zdZ{l?EuIG6I*4XzT)%_vf|*fImG!o=)=Di(D?1+VXl(5IK<^ho5W6ADiLetaD{(7u ztH`ZjD@ZuyZliqIsC{F-Yz!@Qt57Of1uZEP=t}aef_qQriau!GX#-pu(l1zVm~}Lq z8maglBO?tE8MigG5LSp%P#Kh_EDh=gb$4_d=-$z7#~SHy2xp8!SEH$UffdIu48#0wYQVTadD1Wl;6P z5`$&mS^5Q|BPuVE243I={!tKtMCYp1$3SHusrQhany2z!#Ib>v%uWCzd3?_1$n_7i zp2urH1qV}bSYQQ_NOdYt9G>3ABe4rvwPfLspLxJ>yUqen%01SQ1~Aago>TEDU#g7<;VjaiV$24R8l{BYYuZD%|Ev0ZT4uAybjUh+d&Dh?nf9 zX*6Jcg^>buLa<@fv0}Y|3*nBsQifq(P*(udg>p3{5F&!Z!?H#mXE&oUhbFQwB!D9? zdkqoIvGCwg*>>KqksBMNe=So-CCEiSe;<0HMPM;yDM~e?7EL9KIg9~Bq=H6<>P@>p zIp*5D&dhs(7xGv+srOYRayOq`MU zBtOhNKQyfKo{7(53IaX9BP8*i2owJbs3y+!B#$4}Mvg+c(Wl2vvx1PL9RXwD!-?V0 zjUj>yxH_Q&#BnUwX_?QJ;~C_7Fx!<)Pi3su8kfrjwKD)pu(b=9{leC^*-9bQ5rd2@ zkoYd^2r~4$C!R+jMsCnP5%dMQ*5&WK_rN)a@)B7Jlxtrc<-`hF1x2rY;_{^2th0~7 z!~Mqh?=JlDkMH>Q_kZBqZ@=Xaf9(AJ`{4T@oy(=M?>l=>CA4rSM=v0p0||RLeF`jp z_`&(F+lBW2jt}qN^X}b`{PydG-@X$5<`!yArASpsI;!QctKh*k?CQ4fXf*EbcGS`v zU=zVdw&k4+zF!94J#>Eb&cnXb+Z195Xn`7nNE(8z7Tm7Fo7aWcuTNO5G*?Cgm5-St znG?VaL=@w3YASGebjj!lk7E=;j*~+Sa7orSh7i_q@|%be`5i=LJwO_`U0X2!Oa7h{ zGwBAQN_jLK&}l4Sm1>o$8D-#lv_e(GWPUef``sPY{GMLm1zzA^4m|zlMFcaY)=Z^L z0hEADPx;BIBu$O)Fm{j)#3!;{zy@d_1Vjq?BAif9q$!#>O$8+Jj-=%wN(u&1@W}OC zGzbGfyqSa(p-?>u8zFfHVlG9JbjO4cAv6fdueMvblxAi4oa@q03%)_{4E-m>tr|~J7F86G@34KQn;w{P=j~H(;!)u1$fsld=I=5 z$1H`7+QB;Lm}`#;KAgTa`o432cjoT?f%E+X?cu`yu+jIOF_pcSA}mE&s#44_E0kI( zrBcd*dm0y*Q3+|Hr971I0%>;8o91 z%juTokKggfKm0zwz24d1zvIK*9dCd8x4ix4Z&_czr2!*i%4A?eS-$xj{{BDw2mXCs z;BS7*|NDRc7ry_)&VT+-|BZL|{}=!J@_+DeZvHo3-Tpm0!I-EozqsUU5|=_zp-OsD zS_Bq6Wg74pJwVb}U`5hUKr&LmLd@s_M??ZBl1jdXNFxHc8W`B@3L(${jX;SbbG9xL z$BX1zB9QcKJn8TJW{xWoiKDYjJfajKK_u6ed3ub;(^HwZ|2g3|zxfS!cXzyh|DK58 zhaZ06&6_v8efu`&(hI!6&wv<|SGlfgq;Z;sP+Zax!u0IEZqB+Z?25WlRWTG*1D)R+ zj(A7!26m_ZuS_|Hc-i*XsxG)LG)X$j#4~z|2I7&j z1OXI6AS4YVM-|4R^i}Dr;Tp(r$`IBe1i>I!!l?{ObezCsLv2IjjK)3G0n32JVa*f* zf#OjZI3tt@gR!7P&{ojRiAxY?h;wB0q9Q3D3IUTe7FdF)j+Y=T6}Qa5T|X@WHn4Uhq{=j4ipdjs!831v0=LdKhCmA#yQB-x+PA?~b>IdSiGxBEm_Uf0v9*aTd52 zH?+o{l|(LmXQvITX?xf+(^+%0J8?5c!l z@+&A3b58^%#F#Ii&thQi)G_b~dP{?%*n_bPeJGewbRbosDx!`qj+_z?=-3f+WZ98r zhjoWCFij7lsf3W|HHE({Dg9eiD#eXeOmAt z%rZY$41QUe=kU3EK9~17_<8zFNRIVs&ZobhAAcGAveF0L{Jiw@^Is014%#oT>z9`V z9_Q4rye;9^3b@a-oqy1}Y{FQe+Eqt9sKi64Lum4=xXYlFJe=dXPbN-jY zy$tic?moE7%$tNm!spWpQJPidbbJ z6a@1?-3e>#x^dAnXT4+RflB9g7*0V)W$j9-7j9(ZP1$+7Y-pq>_qH3&mDU4~LDYfB zz)JF)43Lq=0@59GXHi&0Sds-nEDZ)+@;DS&0$B!}99cR;JAQUz07KBTVro5<;@hZwE}w@d}Nabsyhy#%)pj_d<^ zCz__h_n7is5r!y3z)eW!yT9N&g|c8}rI^6zFdA%yXht<>SzujYt-)HA zdr2O*iEu{INCyO6HRU@28j!2LlKYL4`!e*pxuzKY0-$PF1qV@HN% zzlmT0W@#T%mY@rm7|gc0D8q$OzzReq`CL0W(qKV`qdU3Xb>Vdc*-H9{tN2!t@-W!|rH zy*I99BZa)z^X#9ag#p+1dE|Sawm*MMWWw`gt}%}l0UQEk43~-%3}8f|QD{{5NTY#0 zDtlBmsqC>3?(`@bWx!WP6x>s&AFdom1NjU{jfP6Z6cD@TW==22$OPmF!cFgr2w zn}z&7k4j{wW86)_R46JG6N)9#sw&qyBPJGzRAeHOyaBS3EcDrV`2sKSXNL&f{n2n> zcp412=l6!^$K+F~ZbULo!-+9q2qGiDk*-PEssIyLJ$=;iH%P|MAtQ)@445?k{Fa2n z7y%{gHL`#UxeI9dn;;7okUH;$B$smoFY(vJAr99_0~%cM@bNc5MkICb0x|ny;`)5A z#Djtn48j+BJF#!CPii=U5AZQ*kLd2U_Q{4gUD!&hNiF z^X>2N`S$mBeEaP^KfLSw@LqU-2faIEq$jp9@!dfOWnTul<|jV%2<{&S-)%envT<%Z zAGR~!|G4n|o5J6`GL~sHV3u=B7^xI#X-WO<_xC)UcUo(RH8=_v-B7^b%YIdQOS!x~(QDkXa0F3x1Pc1$u+lGIGzD^YeON0nxc(PcniEa}vwM z|KvunPsLWUOJd~rL%K}JRkQh*j< zO4M0jOgyR(E+Bb75l`@ca5WkrCVmz~bgq|z3`tEo6*LN|Dhm;Zcv6v2Qo$S;Bj0O+ z$18gL9TKnRTIdYxbZtm!nAOQkfa;EF%eM@$aK0PLH^aG&29o^7>TnvVr>2uPi3aVV z)9yF!?#_Jp@WA=*o_N@ZhYjD4C_RHk28#xZ24;qurP08$V#~^Cjn+Gtt@HlQd4K0z zwn1x7>xI#k-cyiDK%m**bdWyBDl#3+nlQ^uc)GB_oJU#T94xMM&8#Ds!v>U?p3sld zlPnAq)N<~2gDB2k9GAce4&#t;LFP=$buHKGEOpGQfMZ+_5Wplgq>F%oD9JZB$NLmy z6NrPm=RLxtImvgNuNd=%jyGmrCbGNannAXSnI{keDw6a-k(7KIP_iFF^7&~T(9>vO z&otV=+rYbDqbVfq?@6=FbDy}tR1wju2fMGn`ih5#2k!3f*k=?Acjw)^cU;aFe*E!A z-oAay+qZ8yolg8(PhbC(5VP-({(m}cgE@c5vG9=0Jq9*F5YR$Bz2fa}4P9<{|LzCw zXI}-*kAM80^QCcmduI9i8=5H>gbQ#9xD$9-7wR`(^Y{P$-?7InyL`*<|I2&6{ZId$ zZ~pV&aPx2f4*8${7O%$O;G5=nkqWqr1XWI;)KoZp2!DI-}%W%jc?_N4?Q4oJKgb5M@H z4?{Fz}{7T&yh!~Oj|A3l7bwZ?}JA9(fZ6>s0ZeO~+pUf>gm zBJ6J}05+7;6x$238D&$rDAj|d!g3pUO`1to@KxDQg|aGL8^dy4>?Jn}A(hYTP-u#d zibz2gBW_dmm+my#d5{b5W2cRV(>aH8X0VCVCBLsSzq=Js6_&EFu0p*L>_*d5JIv^< zft(#1f%t%!>*X@Q1{NLd9kz}sAj()O=Qk(t%DzCqF{nln;WUEUeWpQaj_**Kp>ALW zKfzMxQrSVyg191}n{&1Ij)Xs?{uw z^E)h5s;?}=kwf+tk(hgi5aOkvQLx6n|AEO2tYewj1=k zvXsKfBt65`MzIS;A299{C+kSX?^@82t{O({jM(V0)7fd-Y1P?P*vx3AXe~%J)CxKb z>56PZTolfNJdm_iQM4FZ6N{$;yfdN`W22*NzH{k2m)_XCLkwO!y!PO=2RE7<<~^WO zNW_01*ER<3x)sV%os+_za zBRCCbG)Fc^wt?&eX-?0TF!w)tCB%@PMjM^MZjI1jugXOW7b&#d6y(Mlx-|6EsWLEu zAx0l7eXD2>I9Ft^NC%1e9h6R68V@&(e$!dXU^QnIPve&m!WGmIEtph_JesHf3s$+;9o03#z`H?*HY1GhcBDvHs{$`}g$2I55Y6YLo!WXRk{dV#-a zxRO?%$&rswAK`O(6F#~W&oe%SpSk=Ae6IZG^1cKDNi#kYm5=#ffT!1v-_MU%5U%9t zC)a)ge(D(qJU{;uJTI5$bDjH>;A8qaIe>)Z-oF4(FMaXhKXL!h!Jm3-#IyE1Yg~M$ z;$JwIKfZJ32&8a|JO@Ocmi@Tm&piDT_$6gNf?v@7Ur^*@_(CHDKCSGNvd?-eK2zjc zMo9iQ*PDOLO#W*C5lQ*p+~Zl6g;%d$rL(Um=}+~N{sJ%X&lw^zTEN}i9ko`LW#Mqv z_azu(aJgJQiZF2Or+*2a?z#V37<O>S3pvb(E+Rh*UH2O5JuU*kJ{;0A3GdNr_YgOhFYiL)gF#z1=Nn8Z*JB}|D$aT7)r znu12)BiOsLZwve7hW+w})(uyOvSPK)eeNK9;2mNECje)(#t2W@$YKmtnkmhMrb0(> z7orO^AvVGL+=J49HP9tMJ5~c-1gjO(Y1~ihd6*xv4hMIl2ht!AS{EMN=wY0tV4@fW zlY&tQF#K4Xj2+npDL))Gd(cxGOz^5r>k!5weg9Bgb>{ zJ8?;afn8t|*bMds+O6Xexvv*axq;;dPDQv8r2-A1h|o;%5X=f8um`9MZ9pSX32r3# zjRX?GDxH(=s3|VFpXQ@6tl@pfV}MQSatP8C@c;s3Af$|%F%aocJKQv|g0zA5K)a&7 zqKuSvRijmcImiuOgbXU(=D;T26!K=m_egw>@%aJ-S;?2~kmnhnLc~O{A`rnNV|p#{w7**B8z$J_rucLH?DaJ;!sOKr(!f#5c$hXXA9T$Ec9{c7vf4p*36Onz2$91S43o2kgg{O`Zkhz#bqF6|ZIE_>SD{;kpd5;oN@+6*q z@CuB46iRvkF8M5%#1S3nlHAY4Wk(xug%Y5-fy~HvCnDcb0TYM{*mGqQmM67gfIQO0N`ONzd&hP#>_;25K ze)sL*yFb7mzEkcVjQewNZo(MgUho(U=Gr&sNp|XSm@8Zao5TA)Xg)X-yziYqekgo< zYkYO1X^dsl7)v~p=~P$~-hbF=dwR+x20#7~xCMI&_6lO@)$Vd~z7ywnH_ldrVL8-W zflGmA5DKb+nqtMUYUpajn(I+(!MQt^5$qmx&ncwEvwb=HNrN~+hY)zKzd`<)i`b!F zN^qTrA&1gNeiw19Ut^XNa?VN4KRE`_<|Pphs6ZsGERm&?K}Z}WqIs^FLLE$#MksL=9)<^~ zCvB211rw*`dY&oqTX5y$XVVT4hr^rxQ`+T-)`w3XLV;@?J{u0<3m&gXx`phkNC6z$ z(`ZmE-^Dg48xaHCk<{Ld2d8;Gdj24IeE*m62OJ$E1OIN#kP z4?B9^h`mjtflg(x;#BeLhN6{E}*Ws}a;gR!IJ}sBIMN*M@mE_C9$iQ|6qKc{z7Klo+r1IpsA# z^i>dKAlf=qqUY}JR=XZnR@TE-l)CNCLQ+C<0Qz)BbA$* z8*XlHxV^pQd_HrzT)12=S>K*?^kt;C{RC zpQ`_1`rizG{9)(E-@WJV4}ajt|NbrSzkA2)yk?gb3(J zx>ioO7`+62v6LB+iWnQlMj@nRKg=C#X_9iaix3Y&+$UY4Rgo1$HEETh5DsI9mP#g<=)Cs} z?|tXqJ1&E-BG@85IgU^k1Q#Ks5Lyv~At#2fjJ7hiiib^QcgaOb1y#eeVlpsw6v1Pp zv497CKvw}v$w{?JSqwcHhHw($gb*S4Hi-L%%Z?9lAMA7gLt25oGQkaxDBybwtf=vsX zo$x5YEi!J9c7t4QU>XfvWxy}+7YUz^-eZn@eELz|mnf}I>thmx&odLgko`sYT=~!C zeF?r)_S5{wH+*{S`2BJD&%B__>|Y30@2BuQ>+$^H zy&q*pN6x==5Ro*7EQMuR7-O*S>3r*Qxm*ngde2CtsZ;X;FYuQStu-DV9w?>o=FOX* zG#qgEtHHqg_wRp|G0k=V;8()flB9MCy1}SQcVpCwQ(y=-l+~2gg&LOj;{+PHPb(hK zbB|n5tScsphLQ0?gBVWi5Z#C_#1O(Qd097H@;7FPC}u)ggc5>S!OKbyVQ+=Hwtn|Usof6K#a1+|1_-c4Dd}ta4c;c{#wv_dI%lrE_=pHmRY^`*u zG)a%_4W)bTd(a^^$pCyK$1Tc8Lw|F`237(qhL#1BKnECUJYd6-=tvA04(*VK3VT;J zSC9f$SjHN`aPtPAI)&LV?SrgMh{#sXdy-wWcE z+7`4IJPM=0D1%X*QFdf(SifNVM(quAC)DWH>Dn3A5Xku=f~?B_$KJcPM{?tcnlC^y zGE*v--80=YXFu(;|Nm#5efAt*+ICe*%8VcZ><2J1B~_K%GdnZg;}s-kMDTtm2m)ZV zf(0biXex*Hz!gk0ZEJbvqXxPU*c>H`}cjmqL1%?f1{ zq8g(Pq7JMWS}gg#B=hh`g8JIx4c_2C8g?WhmmY8gL`X@%=S;=90TP)5whce;#gvFzZJ${^wz3)|)`c{{71D?tOpXt)9gd)6X8q6=dc{$h-@@ z&fm-a5D?im5ow=?rMvU&KLppS`K}-Ya_ftc{m~-d?U@9<1K=jT>i}Wh#-8r z1%T(l3xS8+7;1O-y$?K#Pa@-yfFQ@qEwMeV^uL!of=t&*Cd!$I3?t9p0l%fa=XK9d z?D1dIjB9gVAsdsdyL<+chePZp`&=4O$+4|+^8sWY$n-n{JQ;GF?CP1+>|by227hme zIfw!x4EKS1CpFv`2qWCHPwyTzh+Sx)NRAp2mY>z>t_lbD;wF$2f6V#yW^$V1{_`KQ$N))+203s7E*) z0)Gz=_rWhU7zdK0pn|oaFWINiKvYsXTMMR@wJMJ#pby-F-h*{B))9;d(0~f5A$8WR zOCFdbw6R7Ol?9v3A|=p+a&g~URx#RbHcU! z^Z9(i+bunA!FRV=H=jWO*;EL*99DsYD#uc2wO|UpI748rwi)HKyv6<#Wcuc~K**k# zAaDrCTL)$s0nv%`*2xH06)Aam0kdezWai87U7%31o8$q5&g$@N-@WUnhZ)bF=e>^v zglD*Y{UtKo)y)393xQ;u*KwQsE3%DS#x+GPq6CgLH0d*HQF z-RErq1<3hf*OtC*+hm0PmGk*b?;SJe)2B~-`t*sryF0%7?mNEw?mO=8?r5!1$_?-M zM}yfs*Zz||A;)YsPhz}I5E3NqAVKCh-{WuuYYWSFf97dw=r4?I2m%4L0|2h?{*zWvKLeD~*tr=LFX_|so`_~}=+%bELc zj%a}*p9>2d(0qz=UX9^=v?S-DIrm27er+EYpTUf8K4<3sOW|}n z@&5gLe)!>s>ml~Xk005#jb&N>VaLH6{9}MuVSV57{Zc4i@pFSwsh3J@&}wH{I!o)= zuKDaF*s-vk3bs_DS9~jYSE56Rpptt?wFT{sDhsMDg$#}V zO;ajJ;UG|b3Lc42t0K#S9PW_2d-&##(l+X{(T*E-w<4Dn){$E3*T62h&}l6cI<;@q ziHG$?=u2a33!{S9K+1q(EHY4Y4x4ej3=U77c1ewbC33cjnoRx$XaRMV6c|uJnj(il zmx7WrzLIo}9GwG1l6F#*C?~ue@d8#;F@Z^7PBKPFncCwjO&bSoEK*paa4NX-NLqYJ(nQ9DsD zP|h%g2GGDtr7sPqAgWkjvA*G+Hw|nB)??%HO=10}vYjezRq7_NDqIBZlA0SY5NGg5 zfg;E;60a>WX@-(FOUQ;|4B#AtkJ9;Tbbb}Xa8~w%X@O8q6htbj6)hF16_-Z$g>@`! zYr~=-kq7Nspp?8xt4O8kfhJH;hC6Ps8H@q00w$?op%m(()Ki%>lDsLRqQq(pJu?Pu z7o$6uDH9l9OS^#t47O#^=% z$XW2SuV>b;?OulX+aPnezl0sHONT3&;f23fmb+VgA-d=A1;@{#jc2rnulm0^?0DvL zuebMCbPPzx*k8Vw1i)>N$QR*^hQwLcV_;Ubo2&=00Y^>twDm%N{&^T+^FzelhAVz?b|@ zROoL*@S@Ls3BDNB-$>v^-8}d9Ic=$I947jJ#zWrv8 z-wU<|{VCUl(S+fOQ88(_G{lwa%Hqmh2ul=7kVhYY2H@aBi72?I61XDio>O8F?nD=& zGnw?kqu`<`Fc2lFe5y#16fS1+Ge#pq7*y82kV4nS%3v+Vrq1w+d)_|m7Rl^C0_jE> z&SFL-SOt0!E{n1ql~L>DN-z2Jaur}!GgJ(bNpnPzR*WE__=$5p^vNSq%8l@wQm_h?xV1u|wu650k#gN6& z#n9%IA`~sz7Z6I1)cS5t8G*PVILtaSpaa_wHZN=xwo*Bk%Dpx2rQx!05G55I4qEHf zYS^OmmVz6$2%`auupEtc4B8Q@3S0sY!@J|_$h{q@92gWrktigCLm@&5VRE6&n+7z^ zRsw55oE&whgtOorBsdY=0ZZg91GNaN3OzIh75u>F2iy(_g+*2ty>L=tQHSEV4onRz zouMg&V&;tDjN&8xE@R640FmR!XZgfEy{LkFGV8wV zq;K#B{}2%FcsL$GN*w2bQ1$jn6?q zNFg>-Yya8}9ZpfEX4GdP1yCVG^FUt&6@g)>dE!)&8V$z*))Vx5SWLJJfji+6@Tr0y z1UU$$3bBA4NojlyBo_-H7=XGX(UT~!2EGQp8{P6SRmwDy)4Qsm18_-VOw1wTKxFO6 zEYe;&;o9}UK^*eNp7|crlTQDd_9BAJHr>}ubwMKgM6RKkkVg8=FsWssXbKNhtyHZP zO%2z{V5phTxUFRpxi-3|@SBLtstR5c+}_{~-r$b`$;d2}QfRfZEP1W(vMkhEa$P2G zCyFrg<^d%IsURfh4tFQS!RLhNPKe|hPZDZ#!XhVAbI^TV%J4bQq@B!l@R~;gL~`z$ zxPm%x6&et3%YBAEL?FGAZ9v5a20Tv^C2Nb523C%=UCfFB|2& z5q%0BSPG4g2x?@i04TWy)5I+R@*rWs0)ix3x<}T>HP`U0P+x#q2hZI7u2$Uh|E6hBpNIvv$Y5WxcSWLEPn`uK}zO#^)-ez^ASGl7BD8af3bR5G5tdB_Q`Sx&MI-1UkZ91aJL$0L9F z%b)r1;RC$>vek3aI=ci-{P|NPIqfB&At;gE&-2ZQYAU=dmW zIT~Mp!#r?{E8Sr4Te}c}lW|zUIfSR69Et6GU%*4_9s^VgpNzj z4)s9j5fP!Pa*)*ab|-KWp%x{U1wY>7ci-agzNH>7)a^{$p72ePOJL`Xcv9jKMs+Z# z3^Y1RuQbmabNjNe9U71KjqRvVoW<8PVJyQqZN}-`IX-PHPXiwyHSk3lwXn6+nqD`k zo1k5ie!T>G7btxS1A*C$*aL4r`Qns>~YB*TEmngXe;7@e&s+o`a=SI&QG^lujYL5Pna zL(mY|z}LVoU{7FA0aX$|u0k{r%XOrNQ{AY(F~Yfs@v%5RsqvEhXc<&I(} zNF=cYvv6NZ_!)f7Id+HNtcx2&@S^X139eNIuM+r5z~`C!nz!d|akcx6PucgA`Mk#Y zw=(|*wAQG#a(8#f7~^_l()gT#6T|2$oX?HRQ(=8LG5SE4japY&HpDlSWYouu*fv}SK`^BWCg$9IJC*89Ks*WB9Q2}kU@0?8H9K22^rh0Kf0JiQKdWoBUd*+0GjdB8W{Z?B)l zNo4B#!5w_=CBMI%)-@Mjc76pf^DL0W>T7xPJ!aXHaWdUPxC#h_g6<3Ao>>8r$sF=5 z4v|b_C*X$wI*H^n4p%_q>K@GJwN3@3%>#R~7Bj30uEHqFC_*oSYvCe| zr)YGX4dIC+Pjb(h6I&Dp&MF7a@tzCs=qz{y5sG`oeZiwZKsbmiSW3C>8JAEbDiVbV zN#=M{@CC63QM0OD(pQl;R)OyYTr5tErBa!5y{T{>ntA~Me1UA{C{-{1}YK9D0J z`{v$fcgyV0yD!hDM-DG@upo@=-zs3!2)U7%@kQjuA_wjCFAkzP7WYh~W6r56kb4(Y z<;oCwEusv{8W4)|AoiD7%hU&^vHmpLemD(Cb9i3N! zWlEWc%u3KF17YtM`u0fQ9=Kf2tZNedsuZdywIoyBvMgv(w9Go4b#mT}j}OjIA2)vZ z@mK!$_uupV_kZPwpPXNRg~vx@T|2{4xFneEy4NW=<6fC}(gz9IB9b1`ZPsV_EYm(^ z%PmwuuF(=8f@;Pu7OAB!l2{^<-O*vL%L1BAq!f~QPbD=cR3(}q3q%bZpA1RPHs(g& zB@_q9?6FJ32R?aXHLC-Wy^cU;KUGP*;M!?pdLY{e zjT{HUWCoP1gX`ZOHtRhinY40ka-IwO%vk3}IkiU=QOY&WnT6omuDcMN0$0I-JW@R^7MXhB2)Bdmd4U>so* zhA9rAbKf=n?u7OTa3_TUa^jSIEyxLKuG2YXUIh^}_hM85nSR`Jt}^nOQ`&XjWDg57 z-CR&|f1QjpxMW-*NC@JRMBWi8pb)nyG4rA?wjWo5EHnQA-jmr#$8}GY`y_+VT=qi1 z0`tbo!jR4oVMt!O>f&q?SAlUONKGJ?SDfmuMJF>CC`n#7Jd)t(;d3gNEWVQM8EG3p zuIBstGV6b?E3+c!y8&Z9XF0Es-@9)Ra%Xh&DFnIu3icIA3Gyf-C^K83sm1tEh1MiB z^44TzGMwSY5GTYL61e30mubxOe44#_LfH50KyC|X@uXzfeMwe_ybjt46@si2BA`U} z*%F!AB8i7oQnQmu_A%1C=uAV%{lCq%+%x^~xaM=eeeYWJWY|!U3Q+}`rtpj^h-P0* zt~o*_(RAV}M9}-j);HF^Cd263^1x#nWa4x|wuyjR9ZNA3t)rTrkVqus(eF!1;XU?(U9nzx|ePzWIjZ@kric@Q(p|J^PDsK-M?Y4X!kR zZ2o*^lLdy*N%`bUz$Jnwg0m>=@t((j`9F9(-f{l6MTHc&EV4y=;b3G zhUpP`cjEA;Z@K@Mf2F+pC+w4=t8)_Jou;tBQQ!!iNMV5kX`jLZsR6tRRD>!?D6f!O znSdfG5Ks_Qa{LJ~nP;;ma-7O^n{#4z4X(B2Kv-5JkuIY7p8x4HVh=RGb7B`D2z(0v z#7sN=@A&WRk23M_*#`TbVUPBfFHg6>6D-TZpa1-4K7Rao)$aJ?k3aJM{d-z#Jg1Ys z!5h2)7vz!xRO>S5l!jj!q5W}f`%ZrAkt6@s4!eo3!IZ0%0=;2@fCb^Vjb8RcppR` zgmv&dJZF%n(OWY0Gvh?(d=UZSs5+$?WeG%swwnMxk`2tOdPv!Ufd>MUIr^ zK)E}jm!OScaid*~LpPSqDBkDV@40pdl#yr6MW3jTul@28zMN0hE8VspkNeNiWjPp zO#EGwxw|u;hL4fAB3Q-ki1~fuHhM>?hh$1nO&m4^Go=ruM_~y9ibwE+Ac~ zCNvj{fECad$RXeqM1?qlFTf&1OT5)Q^&AR@d*Wx!g<}Z!QR%=^ozfgX7`-~%0F{b_ z(=0f27>lzl9c@M|Jp~gMq2@&y91#`|7V>r-RYju#K}eRvDhy5L4^zi6!UFF-nI%f% zHn~rk7b`TKg8QE#xJaX23uGDt(`3*a`pzj8n0MaQBM(Xh4w9MGV#p} zm_5y_oP3b%BpTiOh#>>0uR`;wKz>H*G{;O(UrK|b+#nPJ!Bxj*DJr7iE`ecjc+ z6T6-D`WP~8xy$;N+og8^+1ufHT=SLSFk0nuw?^u}1$Pgg{E8zD+V$nKllF@t*7Wf( z&3u{Di;K|AF3`=L<+4cp$d<#wI^c(`B19Vq&lyw0?r0$BQ^BT96z%6pN%l`p=e!Ds zYy7gONwO94h8C5)W@Jq)va{Ef4gytk=}PN7`}A?`qAGBwTNWz@k=|eSt?XMVn+R4( zASb^p;5yJ58&t%7m0expanDr6YHI>UR~! zUJK>fikN4)jYuKSH2w-{2LZ-7tTE#vS?pQ_Z?xvHjBu$jyyJBjMDCQ~8m+p3Xr3yC zg>6KZuo3?yBKt(D_B(SoQ<}FyP{|N;=^}2lyF4Hix#{&{$+o5jR4A|g4(f|;V$o#L zH1=+d#3?8ZBdZ_mja4WHqhJV%;ui`-4+kptReF)EFi!*d6e;upP|QlqRQL}Pc{q0z z(+{!mI{9*Ar9JtoP)@i~)60^V6eQoIoy*gZxx9S~h~z^U6%*B4)7h?n`Zbb-@IoK=?tO8#is!{L4gFNhW}@tj z{j)U>k3d_;ni2cCGxoR%KB?VjFTE0#yia5TE-t|q<#%9SFn6m2((00Yx{3rYd~lzD z^{;sd_00|dl%$C%G;Of3^cU%BDd=juSWJP%t50*-pI5tC<%;AFIQArHd!Jsduta%J zw4G7yH>vTW*NOx}-G~oX2qrVLA8OUX4I`A>38TWfE3!wKqfU$Cejx|pM{m2Fq^?{a z{2>g`Ks*WV^0m}i3$@ziS;(FSGE|4csI5N8fCm-&@7Mnp+Z**s&2jd_<1+>mgor z(Mmk}$T47@;7N?$pc?}T7UH{dhnXGBw2xDMa_D{9?l)g?y!+0tu8n#A+hoJ&`5%z3 za%6cUEY>o$R&V3$!8xSiD;U0HDiCI(sUM0b%GQF|01`r#z96)Oc``eK9sW2go=CAs zqNBaTz)Vc|8%f1C5`s?vrxQYK@ibl5A|6Sm%u?L^WAs9CnHQ1JK&t4SSCZQ;%gnOrPQ3_&-e4(Os!?O zUEA4ialojS{f0|>$6uE2uHC%r?!l)PvwwzuQ=A9uJP6bp75UEU*q*We)jcc%i*0Fnb(ED zH@4$Q;QAVLf&zEn-*<=of{1=%P5CO3lJhETx#@W>B#2Q32E|D=CHxebv|0^P#{P0s z@~tx$A^YjZt)~s%49hzB@+s;!(z*9dtYce+L>)ROUqgTbe#wMRq$xgQD617?eJ$Pw zSsOW14DEFOH!P&Frq74Tb_mN|&d%MGEGGxC=2(+w&bL~7$?n=0N%E<=l`jHKKb3vA z{T=wLWen?TvxP=rjCx?V`zR7^L?5!&FxOWKbir7%+3 zBo%&PIv8t*31KFonIO|Qg3yAwGvb-A!?n~3ead|zjAsC)LBf$5j^m$+J{RPkfL=Po z@*4@zMw`V9e%`O;SMt*|9EF$;yuZ!-=RJh^jK`V#d2hGlwwtK)xfzfw2e5ZxfJ=ad zQvl5X$zfI^%u+sTH6bD`>jhs?lC?b}MDLjudJ`Y~ZaUKsJ`)I3>Gar)!7%XEMPo_U zp2eJSxo=Oy2?P`+;g0(17Qn&I;~X(v?HQ-X2lYxt%f!DgyP|!1GaMz8<%f&2Lao@7 zTQ&9Au-yeNV+2Z_3hibSak+F_ixQ?5L|filRUgbJ>#@xpw)Wxg1{GmnMd8WtPK4SR ziE*JuzQiM2Nhkxve{CCCUte9U)n+a5rvEpM>GZjZN8yFvK8(Ot0r^+fuyHVRRIa#r zvdBi#%Ls1^pmcB?356HPi&+da-B|?6Ga|X5)q;L7w{dnNxLmHecoSPiuJIUTEMiD? zHi_tXUgXJoI}%TqFj(G~3kWc#3g8GU1}Oy*J4cgL83hh^TFSaUdn=o2-K1%m(`i(Y zXi|N6kk}M+c-23Mvn8_rn<6)$KH<2_?N$gKqkSu`v6~Tt)iq8FJ;~ORmF~}&>8c^7}n zmJ5*SnB1}_0#e|nSUytfOE=0*abvxX(UA>pGhiNq0wLaOB6(_gGu&$T4!+?`{;BKM zje{_+KPQmUV#Wmge_VLgNWIhK6uO}b5Ki-#ULU3~Ko;pA!O6oi!zVx_+F5s#_ehFb0YdZX9>c>(A)3AkI? zsMwV&Ep>CsrW!yw=Wjv~Lv&y|d>@*{HbH&0vS8QVWzh`I&%vXJACl2|6*df56C5J3 z?}Zl7v+O66nQZ?b-UYMTm0+G3?!>k8>oWv@9Wj`7g;4uFzbiZ#H%sJ*ih(% zMFuMryKNNV8B-POD-tUw`2utFkkT-a|zHamB7f1Jrkc z@>Sn>_oa{JjwPV`{j^L1cdq%5mJ?Bq-LI&Hn{cEu2Y0r6#sE?lWrylmb8UaWtmkE6 zfOYyz9<@cK?VUg^Kq5|rS7S9!OQ3GMDoUcV> zx-XqD1C*HCJ_0Mm3iD!*N{QGUKLPetBCwcV2NEO;wli&Ny)n$Ojs<9!jzM)@q+Ltu z2ZLVYy#`@b)+xH;SFopSP-WEOaVA$RirFl`JMHaWYpQ54yBvCL3&2*`v1j7C&yk!8s{t*p)C*LFz4vTyUi{@l4$nbJX0YS zV4tr5J8Yop?AT*Jz2kaYLiBoX&9J)gfv(<~4|g@0XMv^!YNJw2nuK9KV-DqrjV5ZP zghubvxG5;hP}#&HP=oXkaY;gUvCWrgZ`hjgV#LYPF5tq7^-@#G!P>1_TDXX~8D)6! zXZe?jIg*dcCkZZR7k~gSEj?BCHm~$>2bPk8qR>N=>kOZC@o&s=z>>6qjJtx104DkE zqNbiw!ZyXm>GPz-?a(iNhx?OP7iVX$e{S z3H_`x>v@u{W94MLVMtDzysC0aX5a9B&G?C3Az7KzzUR5%)XfWP(#h*H$$>*EPN#kA z#SD)g?CIK7lFZ0g`tM|H+wB>n*0z)X_CgAkij1X&@WV8UmHHU>wD+|s=e8NRN?MK4 z)8)P7AO=E1i;H&}YeHIOD%PP|i!4isMl|$>r!jCg4=*gQrxnKVOUYbsZLPUx?bXFE z_fDemydOU&Ji3o9&95`*pAqJak=~>JmRDnhT*8&omBdaM!%|ewL9VUPfi)gq|EOxf- zw<4u$*)ZGkm^2A{$`gC#u#T^ZcOp&Tzc0 zO;G{sD9BFWU8yE;(*1vSg(vztLELm5)M6CHu6WG5>%dgwBvC0#wb(YLKum3rM+$}p zgMy(RvcuTc3Z1Z0fy#V&?Zli5`Ek>AOsPa2$j_W>rd^HEHlE!t%#1r9x!Yied7-A$ zNxu1R4vrH$+nw2~&(D|P%mOA;q*pN41V(iBLb%~UOcC^F zkyCYfzdLa)9RN%)&`xTk;%IPGnRXpSmCXz_fGJ_Cme7_Kp4W{a8+^vDUI>yH{ZMR` z{11laMmwf!+m&QASw7wQ{HyQ7(cFXpk+oFt#PwO`!|U2JKin#;7_1Td@?`aC&tB<% z9SgRcm~GOk7#(uB>V{>8BvUfFlUGf(&!+{XV->>Pf~tc`S_h52+O+r3SEfN5F- zcd{zyI`{->wFF{ejq5Lbf3Og~Vyws6_;yj7{5Xig1;p!rrXeubsy}buZ2hZf;<8#* zS0Q?nh4PEwId&8CcY|J)!XR>-zVnu#c74^#d7V!;TA$MNnhHMKL8b1ad8KfYOkyzS zjsL4hM$w6OQFW+<^fD*oygeru7w*!LYfg-1?c~V^}EF9GdaU-b5*r$ zdW>Lp=y4fVhawB0z5{BC#sh2)aZW68Lg2Qkat$%!2LsTq z0hvuX&NI3{8KsH?_I`ie?(q4%vjM#{F?8GXdop_=;$lw7xw9U+%YPTBAE#z{?A?2R zKKS=beSzsllNu*T89oR112#h-a(@)R z!=UQnI=*&(?JrX`kn%?%r+Q}14HM(>Y2U$ zp!b(S&>*xU`_zJt(_m-q4*bK+%uKTC9-z$-ut>AYlInr;R-{YubIYqy5y+64q|uQwE>*kjuz4ko~#o%zq{)dx61zJ zOi^>H(grp4En?}z$=LM7_2go-Z7k1;vuoX-^Op$t1`?>Q))S3xcK)LFgm&(Sbi7x( z>i;EjH2$rtdL^Ay6q|UEIo+OekRrBv%Vgxr`rDr(Aw6UMc70R)7@5fe0Ym;xZT{E~ z6bqo!%DLwv!@I+eGj?|tV4FpJb55Bxtru*sBlSM>a3wAMzLYuHIoy$IuS>AV;U;Cw zp=HNZsAQ;uu7ql-?>W@)z6fanm?am&Kg(eo;7B0x>EmQPisWeVoH}*7PNvA)**Jn~ zXnh1JJadmUEK;;l*@*Z1O)sqimVd2%__G|YZ%f-hS+jD~pKZoo9LxpdEX3m(hIFh5 z@THK@RQ!sM6?|tKJSCFz7qVE@w}&_g37-QU9vr5#%m^gT_F{UA6>ow6LS>|1&$1Wm zeAaeCC$8EN+n4b|nM4R!?9>Viqvn3}j)dQ7O#E{uDYxfywiYYJS0#ZA*;uAi&yczc zq|6uas$eEB@jFy}&`nm6y^WUA-$r7Y3=L71=hh!x+cT-+=`{~jAiNc9|2e#5_kzdvbfGUK7i$lkRfLrZdJm2wk zwY&6nv@^ZsQ1`LefXScf9tI-k|4Q@I*L%r-Py$CZ9>Xx^sC+k`-EMRV$aA15pZ{wf^$5(O%ul3K<^RXvvbCNxsVfuvM zF>fPbS32ticjupvy`@eSn)>GU+BB{dK)l4A@w+i%uaLX4YjoZ;fpB?KEF9GFdRgSs zuV+OjF?w{z|9%|c7iMr>JU|gLGBTnPpo8jczq!~au5L~@4QOPfm06phv(A*Q<$;1DH&QVDXMa1k<2*RzaWA&H7T6DDl?2+>7cG8l z%%}blCdEgKk(INT*va(flK@lCYa6!Lx!e35>sFMawIM6G;?!+U0(x z)hkpBYT)}O72p;=p#~En2f!(*pM4< zcTU(`NP$VnjNY-8JV=T=Ds6Bj|sUrV5)+c4g%sX>ba>wIs&XH=tu$kZ3{5x$>S!m{c=3LY^_d zkcKLPH=^ZSx{ct?NB__S({}<)H}nMt@6j4*&F9_ZrT!IG1seSuIp^e?Yiruoj`(Sw z@nXltt7p;X0wew7Hvt88&2+R=-Ljj5WCTVM(}yf;dzGLdq4#1Pi0?s_WghfnhK#{P z(GKPjsY9dw+l*4gbY7_EP886#?~=A;l%8u${WiGb^*hKijWww7TKT=LZ2Sitd3Y*h z8u%mW;xW{8>Lz%j|C(DMvrwgsICNX{llutl9l=(tjlex7(O_h(LrFwrCIKbcT;_Wg zUBUb$Hw6KEc8=%-8E{BiJ`3# z&DaF`Q&c#Ij(%~vDzJ##P67vf(!Go>%smM;7;pI0wBX_&#$hA*H&-EBTd&WhQWLz5!)Y=Me+CC zn6e&RfL)Tsl3fWae1zq3P3JO{DpocBj;@vQy*VTrH^GEja*Qrnmb^#Y9hsu6x*cTXP1xg+n(9aIa67#6(7KO`||+%%F=`LCqLgtUqLLZslQ1SS*8S{(A{-W`E9 z(lk4Uf?BRlX}`OyPk*an8MWT{yL9vD?rgMq2bZUEt#5|~4HA)sed>>s{XQSu5$&v& zHMH~lImQ694FYyB?G`|d1&rLGB9#|V#EXr5cbxf*KgN^&PnBq17SCUmoD|=pUytzN zh-BPOHqV_9yu9?_XV&IpCu|k=N9=OiA!1mM#vnv!k{4gn3w}ZN#)o*n>(Z4hv5?P2 z$x5g)>!+$O8GhfG!f?XNTUrMucdrT+9KGnsX#7st4uq?*@|Mn7R#vvFF#~SW1IC>E z4x^B<#7euRc&$==^&!CMc$GOT`^325;6V)$FJk696UD^kHB8Z*kq@E8mFzN0Co zlR#%eDUg(xb9^72_nqujujlT0ZNPZ#50R=!UjI{R z2ea0W4y!hArHAXxuj{01D_zL-he_U>1JSRY#K?GyICeG|g0xU^+fxjC?0x#Pskfha zT)4Rstzi6qqmSGr;a>vr9<~<)CX+C>eLV`>dckPvZRuYg)oQrYZ&=?v4AYD=5rT+! zea~=UMKHwZ;mAa%&v4=Abbi0y5BI#QkHT>I|1pd2Z~kkQvySaU!G}%$Bf4daI#o1x zgU>*Bf!q+v)6+pi{S^(YXz<7{t+N8E_S^G%ISx?0r zd#mCELBDr{Yl#uPGI+PY*sfT1!+!wzJ1Ihouoy!=hbSmcaq2RIUF3#w|8gz?z9;WX zXO+JZ*JxdZjN`U-^<8RkQ>(pOX%ZAoP<5G+ndyRNLs&NyE5C-PA?j7@v6ocoS@%R+ z$yadnJ=KMp<#|FR-PY#3*TY0@!}PCL+fTjC+Ioy`MwUrmftoqggcq_5Mn~G_WIT-B zFWrj&<|-DF+F4#SY>-6bkYS$pA&R|~_{lWmB0Xwi?=*ZJFQ(%j{B4gzj-r23zZOLd_q-CZ2lEa28?;C5Luee~$5suRO#LoN zTTUIT$42^xXuhBAkJ$Q_5jb!cBJ@OB=2TMC^GJ9{y4 zLi(r+b9qiweg~5Jdt4R~^+elxj=m0*L6#n@Ska!$-eCJqdcV%#eMd~NOkZT{KlxWJ zyLLbmX7)RUjp$a_C6@!(m#1A+P;iF%M!N^+MgrEN7PUQ+mlh$vpy%5z6^beL?tS;6!_l|6)SN8ALpU}E zb@-i<-Cjo#UtkW~r*J#EfY4c`4R!8gCQ|B|l-`q4zAAw<)trKSk>UN=dx-BU_!9}~mZDlkDTX($E9`YcxT&tC|M_FfO)TQJo@>$loSPl-&Pg%SaMngUKAj4RSFeGV0D*xNO z;_;Gmau!87#gP^EZmZ&##g^Um*TZ9?mx~p^7+o@nFFS30W#5cq2|4aHQsK5qQ&Gd* zV~ej8k9-Eg=t!>+)O86YREr9W=>;a1rlA0^r;?9_kK8ilDt%>hF>;=Q0`ni*zb^_h zktf#1^!;m?^wd}+;xB*4j!_m{(T!H5)YpAv;Zw&kF5nG22U&iT-RF^KcI$54Fy|BP zkK1m8nl|DG2c;A^B3u-V`=o)|?Z-`vzqazYr?A2zNjK%MqrD1aHTL6*B=x9_NmLo$ zDt+s(3-V#eVti3wmcjt})>~1$aUQ$(=G}_oR<)4hx8BOb2G`55y9epvt39vK){$}B zaNT+8&<&Ctgfd)7K~ce{Fc#tRqPT*ycVe=nlD;>y~n7M5YTOT^C{y0YhQ$bW>QQbEdrBs zRX@BGEr<-$ZT?(~2a@Q$!MjbTj2>aV86XZ!IL?m<^~H6$e^nG6$kpc_$wX*iAJ+Si zVVO&?@4RaA%2$eaV4SG0dG+*i#X#OUGckDATVkaLfC%r9mt0S1?as<6`D%-71xZeR zC^#59`*AtF2nj7g*6(FZ41F8iuQsd7_2vOR8{v=jVN$L2i1nD$Lzc=O6QLz zN;m#QhZp7TD+ad17)FCaCYnO+8*X|0_~P>iCOn|H5V&4)Jtj{Oh8RI#Ekln$sq6^^ zz&9#EyWMASUVv};vp~6ksKV=#Qn@T%Ei_tLz{H$_Ytx3%AEl_ag3&lCZ|XAYNen+X za)rE>_D-^wcMsJF4cf~g#5hF=picp-KY?$v4l+3w2aA|?c_Y6B;gg56&KjXbT^u25 z;sA@L)N%qKoSY_N?EKS@&ytelTw}y3b>GmH<%{DmINJpcS?#1}M%{qnZ*E6uQqh@2 zLFmqmUNKs>w|#ZAwn|MQRE*fE7L6L;S*rmos4MDYpdr8}&3E8yJLU7i@q>mPdcEhS z3lp6u$`LtwEOQ?}LP(DsP}&?&TH0c|Yni)@BhtVx2c-xIkK~aZHWeN8Q{*73=Dg6Q@2&TUx6d%bqnvd$Eh+GjHdasd76vX zfWA1Lg1;40jZ>z}EhMqKuqw_ya67ToIFbiAh13-LkU^d^mDN9SUt3d3Ni9!>m>vr} z=dfNA(lIQ>VjGKP6--DMnZBP@jvryfPJ(pYPh z9F1+r3?Ma>->#@gW`>K~gZNJqV~}2DcR8Iz^iT$2$3%Z-V8r;3+^M-7Nrs-h5lZy{ zhOsIM@&HdI#WG3>*cERyMnQbtfJ46tWPmPw%uX3hcj3_3@#RGNG29ZxO9Up6A9{0M5_xdLA^Y}T`y5*OtTKrev>8~Mw7#MYN^~18qM&G>2 z(QShw^@W^F&n&Ji1f9n|cJg%y9vBM9KzSVW%9yOV{cu&>yzH|f)_T`c6mYj=P*4{0 z6yL+PvA3Q%R9A;jc-!tvl}UoB!+Fa$RHGQ8@1=zWYcD2!lFvlOMW)5&?xCAeKqgu8 z-_Kc8n7EMTMyqRgg7-sb!lBPmV~=!ZaV&WbyxTk~f?n_U zez1!l?tC8Q<*JOf+Rr;{7_{@w4-QGd8SOf@u7L)e2&A-yTs(?ZomVHVwzJvRVacG1A1h z@odiK$DjJTU-#mYOF?%s8TcC2UX4Fy{ubMG$$xx`w0P6$eIEWF2W@%%F)gwc2)#c` zpRgmT|35E4kMI5isW}7TCjGggn(JjBwlQw!^`KR<<@!n@k38XRAc@2~EL%Rf>RWN% zH+&!&kQ5bD>_Xfc1EDF7rFVs9wY)g8W#(Uer%absP1NuWnrA0o7pDM;r=?hhI7j?P zIe`26p8wSDVx-87k^()LPL1j%g7jDw3}@t%qWoQKb-~QEJxmu)RE&6zMAh$HI}e8N zDDH+H;QqmHBt*QwC17+`x*kKSi7I29SiNg8HpNTLfWN__#MN~OTQ3U(;zeWffA zp6$leS3O`B@zrNz_1e@kT!?OA*vX07<#Uw-(vf{nM+>^yJ@>VbXQ~b7j?yEvo5DFuG_pRh#qO$%1Ga*L?>#a0qNOcF!atbb^%d@3vKq~yQzV8dv~6>LKxpH<_6 z<46s*^5SrQl&GFU*ZL4)(Z7({MiaZV*wc2&)Q-?@t;UmCT&bUAZ zcMcUk298Q~xN?ra`w4!!;3OWQxfey4o1QVr@R|#KsF=Bh5yo@;?Xs5|!SV6$s24T7 z#Qm8&ZdPbmtFRsa?OHhDk5Cn9_AXO=9Sf?}A>NZ>SYnVRXs;gz9P z;DsD|A-jb=;k&4`TsD07X>VpOVOB?MI*pSX0-;2K#1BfQXM-yEpS-P-IKsaVx`I{u zO0-JqqFq9qsr#~>9nlo(9XdBiADb8g<;(MW0`M6-ZZ(6-&E|wnm;JmEkHXjN{sdlU zcmINYxiw!cG1FYGyGHP?Zhsj-lEBd-&07SfxnN8iZcq~SCG z&3|QiI`R#ov#wLADc*mirBW^IQ&r985P5FYBBERK9+ z(G+Ez9MN8Dy*RlV)h)(hrX;@Ir)d>ReOlyHxsc5M?5 zb@fZHk=-|fj92Ggy>#+!T0r_%F>?&8Dl7$@7~!;rdN146kbg2x8z$M>B39aC)TF{r zr^`)DOir)Xtoa5DanvF@o^>7%SCY3t=YjK|{KbWm)ty&_w8G5sRyp{|h7I?1bs1AJ zNrz0?c}9UvqTdz`ZviMdJ4GS6>e=q}smHzVFf_A~y%p8}mQZxh6dvKaOK?=xZMnww z(x~Wb*jiAHH>RUUJqZK<)iOwmB<{R#vk3}o?vBS^wrc6 zbi$)|Yl~eQSe(?XBKp6c{ROCUZQv2%6^22aL(EfKcnVZoO&8S(;%;_QPcY0GLUpy4 z`aa4i!L;A>aE{@Lhwk(pNpc_ApGMK)(TT=7OJTf53Q?7JTQ+DbV?$C@zv^}()$%@# z>d#bLFs798CBNaPS;1x$yBRhUT3f{QY!Ec`p1E4N>z`fI9)4AL7C4Hp%~4F%Pss=r zU3m$6)t{L56+GXl-SI5Dj7?i7&|0nU&NU}KP8c<$l>e*MbK zNVJ#D`*`q(6yceiKtas5Uj0rEYVv9^lDapQ`CUomZ#SE1-?gIU-x1vq-=Hz(`r?jBC3DPt z!`W(S{Nh5P<&6ogqmukwPoz4adq7hWB50}%J6~nVGuZ!k=i3gP3~t0Vfri^$D+!8l zd;jKD99Xo17tLfCd;{K;%oJ<9+=GH^Lk%drxixhz!scgnfd(tmcOledc%iP9C0c_o z8;J`c8X){_BymmSI*`iSD;0rhKS)X)-X4Srt#D8+Z->cz;giKo5lX*pi;fu`DgB17 z?K~~JDL4bBDA|ow6GMy)vB5zmttA$UgvAkati`I7LAVtX94E~RFXbEJ{_VACvinay zad!68*az3S+8WoB8Zf|rivMu1vh9uVOWAhh>$(tr4>VlkGbyv z5)*i%?&q0gfk7}qvoWXa5TlfneWygXC8&n2{7<~uq#B&q2|MM)te==WZ%zbG2PbAe z=rg^TQi{k%kOcoW+QCHUKTDa`3WivkdYJLi97PmNiYqBy21V9=7gyU&W0nt<^atWg zy}Lj1L{(iy5K8%92l`%z`rc27IW?sj%S01?mc-_Jsf%p{YS1+V;WRF;ab9J+^{`i~ zz0IeGs|N*Hlbn4)+}+DO`{gc~;m$L6ASm`{`}Ph`6Uct8r`M`?t$`$0Lr%6g)Zq10 z=1z}WG*LqXiI!OWdB_e(%DSP!ue*E^I(T{X0VzJ0Z_E@XP zse80GN9=)ezFr3A8I!EQ77@;2Vb0u}|xP zpBuRFoYp##51z-U^Q_%Ppe3#_#fv=M*rs|^J_`z=I(j^~S6|vEazJ0yjZj37Kil`J2Mpfqo+dEr1+mbHoBx7Jk!!V0*c;NZJ$r!Kfk_|-51KBj+%Sw@MwR@qrvFEQd+IzQZn({?ftx?J| zk4g(@VVFYHB%jrr9I6?<%3UfeRJ5 z+IL6Qn!2D;*SpjCZ79RT<73{MFuoi zN%u7ji&yS#f;{@zAV(zmK03 zp4cic*2*cP9QTLkQyMWa{_o7Nm~t1K-zB)1e=wKvVJrSvuxKEpy_$3^(}s*dw_NE9XlC zImi%ERSBCRdLc!UucI-|LL76=J#UvNtZcyyWaI5o*au%19U&T5xzgb8!+U+*I9sjO z?xIcJv%8q`1%^f3JHeMZ9YfN0m7uQP8bzi(sJLyh9XMHei@E!tf|81}oi~z)4NoAp zfqOn8?B86lkuAeaBUEk=V|VBmnPsgaXV@D&xzw^KyfR7(`~=0B#6^CNW;x-~3?1XQ zJi6Kh3aZczEZXuL3#Z)1W)jACnESN$zn&(`0Sfe3l6^4uf$A;2$mVCX)g^5C1i4va zaJK>?g_Gp15NIgjYxjO?$D}v2`j};kFH{(M8Av$75OLPZ+E~Xyu`qIL)Q38mYFsE8<%3Z63^e_xzb}W=!pZoA3TE3rYx2 zKH!=U=XqZv`D#xh{u`PGqRwJK)9ex!=>YTW$m`8JQgb9B_EYl1!CuF6QH>7T6i-^u z6SKMzsgM_#I-X5J@}BY@p@9_}Zav~O^eSIJt$Z(Q@ufTz>HT=5F!4+G(|5Dj;9#Xc zSbK=~ioft$f{wyPX5FXVkjl4(e@ZI7w?%3O?BEwA>7U{Aqg|a6ejk>o-U%mS+?Bw6 zu)xY;7~b-hM7sFl2)0&g20$_jIUh;rj@1`f&r}nM1NGyPh?`Ml{wmI<5-@TahzxcJ zoRCi8C<|BQpxjujB^dBID()oo6=KGSnc`5vq5Pz1NjJ8#M*6)#e2hR4EAwtlmV(jU zZxauqVgtlvbvh#&1kYK=8klLDKuu)HXS@eQXKd7OyS6YGq9YlI?0{fQEKdd+fJz81 zi97^{Z5two>B2A|yFqjB!Vqd{|0}#M50&L~5job6hJeNmMxv zD3rV-0Hhi7hlPJG>?cwFY{*5CS`nVAY@S51M`7nFzk~K57(9Nu_n+DU$zw4>glEFE z9qq@tyVfmS5PnEU>d_-Z4Qz22AZ%McVO!Kby>VpjE6rm7mTTJ$5!P(d77J@ex+8SE z?9w@>{SMb}Q!ad7-ua0t&r7}J z$r0hCaIDw2;)g^oUf-77b6@>T+kYbB)pRkPO?4-6FY_r$KjQD<-H)0Gqt$VSaqcwi zXnX}ahn4l~r;JctjH%a=T74~u;TyEGJS-VB&t9jsiJUE+eO)+|wdBIM*A20P4+noF zYH(5LEGX_(9%QFs2FJ0ClOyESx2poF$>X*PBdJ?m_>e>vh3rDTRcO;E3-})45zFg1=yHeIpoFB+XDv{q7(Zo{6b{iKjUc+~ zJBmbaL&>u5G`3jEjv72$#}wJnHsm~wSGq$2FI}FD0@|cFTo)H#9tG58rpy>-*}U-T zbQn&$oCwmG!X0qyf{t~zvLb4ePzV((SKwH)`0Svs`Lmc!K+If%|Yxx$db*AG^kWM54LEwEN8YWoOA) z8=hj0n5Wf&FkzZ>nQp*aiC)`2CRk2+wAcpQg*Li4p=~3;;LFDSnVLJOySAPywz{@)7%C#WZ&l8PIoBI)f$hs922Gj>Gl5C?)UmpLfQF9omTBhB(4F7Tw1mpEuCGp=bYM`Gw)jrUMZn0^8og?3uYj%U7xlX(WKI4X)JC z9irBWBLiuaQLwr6Q*ghm{?%+s0O-{?_b!%oI@S-fncV!nGBE zASj-e>l8voWSMkRJyy^a=?JifmrVbYh33XuCAAJ?dm3cAu?BAEu-y1hnr0E-n!eas z%EE(E(r<@m@zOXL0kU1_6x|Ybwv7?;J4yl-G<_j$G>ZOgd;;;~RJ;J={4)#~rm|@i zY@7kJQ%;k+0L|5TywbL4qSaRBH=7bTlHhxWa%?GWu}r_ax1o5@EUN}jD~Tyo}5YkY1^n0Q9T zZl@IlYG;lor{E{KVh8df3@C`$A9l>@vi4F9+egfXnP8{Zp!y+ed5*Ta7qiK%AFNm*jrg$I~^BI{YvVnv{e#bsB%qy zHp__-V;?D=(|G>;#}W;A#8#!7OHbfpmogTh~jW5vP z?2wD|5U+uAmi9GI^o}zq5jKA+pq+z%=vI&rEozw%z1Z)sitoAQ!FNQptB3D~^{lWUr>C=y=d7&F35O9wO%@h0`1owtY)~VzhAxU#5LNE$mP1K|B%cEO&(Y zRNa59|1H9LqkziTAQW3$?z2VGV)I7}rV3=$hjNmW4|S_TBGiY?-_xhWK6G+2Vspnc z7;>htDbqKR`8uAk_3iZy*xFqMuW*igTR>6F(;_w%CN6MF$=wZwJKiN|SD{t(cOmd) zEcjVBjDH`G0)b982tK7A_J~O*?%*2a8y<*$yc~QXbWK8n|E-0u<(nMyz^#&?O#PWR z3Ed_2@Sgm^-k!2MLP9V3yPTDhxAlj}gPA@@;u^KU@Yx6vQ2@H}saF+_x`cQ%=WQiQZis zL8=QOp7=BdZ?+_vkW5Ww0XdM9eTa=u_54o{DlARIfH8TiZOeiD3<{DhwyRmSmUzMq%*Td`)i zC)bQEED-q5Qk!qIHt;hbx#!ZepqpS#Fb{|nAm7IbKLx zZ>P}uOxJ<%KX&y(8ArK!dHs7!BiV;(rSo%+f$X7CE3#Y>imc0soB4j&T_Mu@lYj_Z&`qob z){=LCr5Mc$E|K2wD=jLb(8A~>apDx5+@LvMo*PHu1>B$)a36*MB@xu-kuU^NP*e!Z zt_qMFd7?lP1VoU>Lzm3MEs5m-g<)Z&ckoDrn|VzBY@^6g_}W?ScHju{IgcdrSu{Vu~X(s&>Vqug-IjjrafdJ^zOM)6cM z2oItJ`R@sy;I9)lWe_uCGGjLxH+yk=fO%RewgMtJ5q`?)qA_=T^QJ%MDGAcpXjJvJ*rrhvWfL~_cw#eifIFbUnCXaMwM6E(Grm-s-w-%SYiOfTQ0hTQ1Wf2)= zcPF;`bu!GA`g|hSqdstRANA-O<&4Y&QUk1z8e5Jqka7iZAMGq7Jj+f`fmxv-XjD#A z6phr^cf1E{bEe+%w|C;S!o!M=3-3-V?@yg&>DVc+SaJh5y3@K-ykoH$Hzi)_25!Pk zSjly+`H|-L_~?f;7KrSti0q6AQd6z-;YZ3>7hIfoBbn6f$b>>3?^m+t0jLqo1UvHB zwEYAxcJR#sesu$1+`@}zu)h^Qp2RyB(M9iKnZ;_tEl?4%{P6CiqTMNS3=l-mJCS8e z0TL!rYT~OBh7cZLH-cIUvQ#AXZG1=I3-}Fa$?+Q#umtH^Hj4xp!vfjp84tXX zdJZE0WW}<}wla*-zQ8KU-IR zM>LETHiKjZQ9DcyiqYJpMMQ9BprV7X(DRsEqJcWH4yBh9&qWH z7<~X2!_M=YtEap~jGoUsD;El>@LWxGWRs*hDX6r)A3 zy0faY;2dKpV@D)^tHA+ETlWw0E0QmBk`xo*IgT=^XvxEkoBRO0Jd$53Sb zCtw!76EPJ!d0cOAh6X?0ELat+bk<03&&@wJ9N0WUr@_F*a3D9u8267yg3cko9ByxK zxw*OF%P+s=?(UBJ`+H8O)A{}T_wRXlc;L6c{ViX9`6XX|`6bVvKWCbzt6cs(5ai~_ z;yD$ZpaTuDkl{-N$AAZdd&GUf9pXXa$hL`)X?j?mFrYwf!GCpb4f#<(Zw(ZOTPTZo}#91)lqXyg$=8WLRE|CfY$p84vlulV(^ zf6eK1Vp$fx`|i8*X&O)P1RKQ8S-yS9A5Fw!r1pB=%L1Ve)=*n9dV2e_#EU&Dy;Zte zIu%f(D;GGmRNpR>Gs{^Cj@^m(PQQyJ5n9lfmA(ee7BpLWi}PUa&cU5Sb82Z+>r`tL zb8xY0qWR3)M6^mrxh^8sfR-x^R>@83UB^nG(y`hxb>xBC1nVtv+!5sfzDJr#X@^|#A|?eVo{1cmcVKB zsnh00n@{)zVI66q#ey$RUtl^x$*SNbDdamv8V#(0A01zNZd5B*S?@u)6Z*lhH--96 z*lLtrVcr*N6ly8dDZiOe+LE@>S4x8mpBEl(D(l=?cb&Fx^tHoUQJ<;qc{0RWP^(ak zwFFBIT#?`S4%YWVcSc>DYJ!@|EV?nuET;8DSx>N>@YV=*mM|WQxJSG%i9;+#-v^|$ z$&pn|Ggu=^!TX+eJmAxgZZpx0az9gi&-BnJyBwcY6Ryg=8K)h*tCe?5+;w%V z7tTI9irjSpC&CE@y$8*LelyW~#k&#J@KRAL=swcmL!wM2jRtlQ626t$BI!mGZUrno z0;+>Wr%)-;D7`WJ#DU6QVuw>nM?_D6;4LTIM&=}T5F^2R+v}n)KEa%8^>ECyzo$rvWoezr`3L5%bwuhC~RcYM+E*Q zAfGa#K8pAA@aZH!N3ZFVb&L31ejkETm@s~v|9lYklleS;FVIh?AX9v^ROX*~_lar(p9udby-053{PQP3nc;{YBk1SgI?7`< zAb);T%Rc}<{H8yb)448>>!J84?CR^CL$b8b^w&q}AHh}JkE4#hJ;S3IpP1O^7-KHw z->Yl6ItVLO>%nx+qANE<|?4D9-Zf-UU& zx(@r)d-qM2`PT>6^?icB4Ty;Ic>Sd^9J@JDp5G%EeX%?tb}Gq>91Sa-S~|Pp>`B?4 zT8CwY<%AXybV3V3!_9GnwSZU0C&#OUIfhX>)GlUEzBx{jO8>_Or-kSZ^M&e-na)(x z!9ZbL9w}2?8=M+Ebo|)xfKm&-6TKSU%9+-1I*klrbPefb6HA^BWugspVzbcP(mgXjhCJJxlkX;1UQ+JdR2;q+vUI(0OGCZU;B#3uK< zJ;okQ3!0~qKcu{5hf~$5YE)}XYN^nU5o;kHj2@M!#>7gaGe-wH&4RE_FO6n-0%SUO zguWPUsTh@*M3h350<$6CQT#yZd#oQ)ZlVRV6SH*=cEr`$TVb+FVUMVU7ch~XD`HJq zjCu0ngs@1Xfv6&85p(kBrH}^tH7M1QJFeV)x36BT*ddx~x!-}C|m?|M7S^+B2C!&eK$&)f0Z~`@; zP}C?Tuo&`(vZLsR8o}g-jlHpM&RSP`fJdb9iky~pcBPt zYAoR#dvNTjYT`JG#uUMXvx~~!EBkKjT1iJD(Fp}h>8$gb@_waZg|GQd%??cEmQtQE zm)rafHFrL&Ehtu0I+Eoa{TqTOqk$**SA!JaR-`><2TI*jrX8hDs8mvEU?@t+khYCE zkc9hKD7Y|2iEOtJo%yt6|37Ag^y%O;L$#&9FqzkpBa4ZA>+VRoAbtSK@>Ben?;-*m z#(v*9EIovcvaTByfJYYpxV{?sUH#76_HapXoivs9N8nYQEmA~eoGVZ^=G@N5{kMrP zHYJsOm0v02W;k$Z-y_IqqrXNSO%Tdz#^~?syh4CR|6GC0*v1YDQjuo7j1fQL(tqPQ z6%ZmfeEsV8V6un+kD;^@WKbXvoIxBSA~MhHl}o%d2FN^!%jn8DBmYd2@gqp1gssdG zW#oTO9U2?h*<6uw=hxqQj-Ct~}RcNsaDN1MsyqpC?PMUYL7 zL2%@=N8lsN*ed)4Pw-a?0!*S-ut4NyP|gGqVL4?G z^qv;lX8F2oV_$h~>y?kWrh)S;z#Ys|SgJ8ybF$?;va#v}P;y=dKpA1KoU+NEjkH`% z>ru7@3J7I9Mw~6(FbI%@TPGS9Q@SD@x^7kEjF=w`?UksY4v z1(7`6$n>Ngz21Nd-8ycaRTD>5iWXdH44|@L#jpa$sjGj_|7>Uop+0KH0NO#yjO6#`rbk0qP+2v zGD?&@-pz)9VT`q{|78Q?ThA`Az^fBeMDh3hIIP zQ7zm0Bd3Ns`0Dhf5f5?MRj*m^{1$2aAPst~c6<`-1SQ9mP%4Xg_IQ}+#pxPegxy)Y zr?6oVF%jiO6+JHgG$E-FcHxS8AyW{auRRnbvD4x z%8lo%uz=*BL=p#r$lz;wJaRR=r27E2ydLo|9)ujCB!1ZxP-B#*Op^%SlWrO5Hbx|C zkbgxGG5R2!jo=6x;8}GA41CV!ty5wo!FyksR3-&G?$otROpv8%?*0C`SK<6JY#mDKmSx%1C|gThD$V0;ruD! zNbp`teO<)KgHt%i2#$zFqA3+)bBqX6XLr2E{^hr<|LH$t|MmZ*+<(Wz@`mO3J>Puu zif{hmIS=>0=YRj-{#TZ_Z*Xq-{?~8#&42w3|KUIUFTDEZSNQ3UEE_x_Ii;{*4n6A!1A<@lcCa?fdb;Qn||1WdKE+t1wI9FU8{ZqJM7FL?Rt zIs2Ofhuwkww4-QZo+{H++08Sz`&;(Yj(OfOPkB04;*>F-m2)^{TZvm-o*RtX__Cau z3J4EcYj}9J`!5XhJoDw3U-H8bKhRoZSr)$k{(D}(evO%Z)=T3P{QZH?&a#~}uo!V) zi1(E!A}}gJsH1+u?1*B`6Ku*vFyTJY`*worLXW|-;dyo3Hp7o9Dy}u zp50b@S!oq)Cn7{~WA0+#jhi-3JJ3S4g`$RcVZJBaXszNdT&YCglq~vc17B88`A!XF z9aF<-6s^b|+8$qa^kTT}q2D4+Soh&^zhmAAMH=HNh$$!-a|w3t%%}7c&x7zb(_?~i zgYOIdV5~0}+Uq0j^`2fjeq7<56Q_Wa6FuNI&@NIewhJ()w~n`t){dHZScMuyJ;wVwArlj6^EyY$Sj^J6!HQFgoJ{5gtKblRzavyLs$f}cPu(5 zU>Aj%nJ`1MVO|I--Da>(U(2w=b>g(54pq+1oLf21EaLAej#3bBa3I<0taWthM!R%X;h>El$9=oD}4ys)Ek2ysJa zio}akG$waW1Lv18_Fmcb%G@R<%_#O@yEGcWl17~?)Cp{cXtV;&M4OBjqN&phwA!GY zDD_C-^e!IUILXi~)WmLopg)~7@V5s^L_Q8p_)PFM{DLTCAwL7xQGyRLe)RJrP>gkS zgV1N2^cc2i*U=?N!NoN^{tcI!#pU&A$ERZdR5$!|8$bOfxx38tswN-D-pc*x#UBPA z<^7rbQZDlojlE9zIO<1*d<;Jw<>PWc4*48>KG$owF8|W|T!nrLMx21LDU)!{=%>tBNPaZJBvDFX z(;ERkdj7fl_WaihkL$PnKI`XP!L{*3#6>whB2tFu7bA=9goY>h*N1goS=aTK8x0&T zCk>RPVpGNDislNlVc}GqIXw5x-Jx}2S%{-j>0gf5M$`s!yaa0vPE)W{XPG+S9D3oV z!>$>-)hLV6j=}oSSsxbsBt)m!!c-cwb#|qv3Zoeof!tAdSQ>F$@%sgTSn&$IR?4p8 zH(uQaBzP|Hf04V#U!v-C(GqDLWCBix`FJsQVY>9KNRVb;ox zHEv2{w#LC4r9ecbua!=vH781;sxgr#3xo=kG_vls(Q3oB(tGZmwM9&)f~kN~>7FtX z^N!MI$~x!0&pF=}sK8Vk2PNONw;fa2Q|y4Whp2=)s1xRdIdo7rN;AyUKmxOrCCtD+ zQK!`yCjdBBJ>?4>S_c~Gs0q={=qhxjfk>Y!zMF{sjOqj-oI>+TtTpA)Hc_UT-F`xc zLfL`5LqFmDp5ESr1yY4Fr!jjiXzXvK(ZGzaL9k+waxX2I78Da^duj=a45?7dj;dhB zSiJ&ATu0V&WT`7F9rr!WXXG;x2h5dYvaOVsit+&6jAos%lKp1F0J)Gui2^p$t+3ie zRj$u>827%gM8o|K?+y1A_eS`NN5kmI@|#H=iwR3b5>x_dPn1HJv*H{bj!m=zD4j7B zI)%a^;NU}U)N-G%xo8U3nnnY7fDFY9HX&7H2d4QMs?X5$3>70-fg`b2qNUSNN$AOF z;0d1Kj|7Q4f_qP?xAb@heoIU@L^*+-KuX?D0ZPGybVW2upx^`gc`%4u*axJwE&*Hk z2k$p}a|Fir_$eAXfQY=ZQbbngQur26c@M_s9ho+pkuQ}&S>A|0B0^?QzS0Mk-q^wI zS61RGh)3CN0q6V2@jnY<)IZZma5)iR3sZ*K$aH+S?G%Hp9(j+EKC2CR&u^Y>3uy>| z#}%2@ZCrvNH~YHNg8VR-Yg1wkYW|K&M8$h0TE%@LdJw&m9>EPZ<=?F` zF2XIxqi5Z6_zCi}4a8`e=Y~;M9^_!7a$EHj%F8)7)COXelL%{jj?tEL$7TF*W%#HA zO(STeuuarqC*rS3c1##MK0!mz&rAi zN=@g1ubklLb$0u0oF{F`xg9RT9BB}uk(b!;CM1UdQS#@uY~WvMW~M|I;r%3mcNM2JAFue;1Na>Xoo0SOpJQCr?*(MRK|&p(*Y5dg0ZZ| zM7JG*a5}lAY%B;-NH4V=bxRBI;w-h%=T4s+nifg&Z0f#SOE>;mGrjK#|wx*;E9$>4nIfCtT;);nu&EUhuw%Bd8p8P$xULMpc9 z_vmgE3nGypPw)g!@b?K)D)R1jyP?ec1z&&tHD7)GJ>Pxb_~C~W%PDXVx(}u2NV++f z*Id28!Gjn?zkJ}C-hqN65@Q4*C%wgQ_$1f%jqgG>*6d7^bj8(kVuA4-Ao_g~UH*L#f<76{O6u(Z-~&l-1d28Y%0w-LD`nJ@~U=N)m4} z*zb(}ZsKM)@v;OjOK@9)Y9IxsK%+n0JJGWp%{xmEPCYm+!LkI42aA%G-PlGfwrrcShEuq|kli3n4OY0ZGYrSHN=2 zdjzTEZJ@-%DyhIF7r4L!?^%WIJsCuv=B?AVd@chh~YJw;eeVvxnWV74R_ff-Ew10a;APF z-uw>!+dne@`v2kf_ycd>{|n3dmfKf*{>T6Me*iiE^pF3Kl0rVKk&QX|DOB1JB|+v zr{l`$wBYeT=)~GjyuW{sx4va zxH;_k;^sB4U%uwm%U3*q_Kf{JQ>|d*XlvC1V+?!vkUQlE%n>ib10&~n@2T8-mbw0g z!OYn2_q=%Vf^}Wbr)#`@`<8j0pNs~c-~z?rumBKdM6dWM#-Xn{=qGFRXt*_kmdIC| zoEOy7XmE8AT{IVyU@E~>gK6s28psU`LyMt1BWB=nr=`GbsJ<^L*9^g zR2;bxfFoKM%}bt^Yn8AHr6O~@3ZDY6flq!|8Z;e2K(~7Fak&qesUy z_;n&T$^!zU*hC+^eXKyAiRfu`F&BERw3~_bS!I1bvA(EutGGFF5@Ak62OrX5bF2oM z9n+w-nji}5mWGZFHgJCrSiI2J^kP?AcIl>sK|rjXI0_cSJkJZRR+tU;h?_7DF6I!y zgli(oj87}QI8l0f+Aj?+J$+OZ@>@A*0V61DdQ6=9iZ`RlaSzrGCmFtG{iY7pMIn$3 z%7p4L9M~JyR#*yNL^H=p1BoWlt>8)}^g){9p7@3c%sN$0EuN<=Rb#0lN_yx=p0th& z^FmcY9a^#UWErp+G(`7;_X%%v@($hUG108hraU3o1<{knvgnlPdE!EMCU@LK0IJ|) zZgdw*hqVRk;;g5jx{xK!x-a4;GVam>O2^BZa}!#DT>`HPJ2U!%_-`nbIeW31db* z9i6B+qGKhM7lbE0-35Y@;@-gIa z`APV>_X)pX%>05VpM&!W1HrjMzm$#HqFu-TblOh_UO?j9h^xmx4_wN=YT{4AFOBzy zz}BQ+p7)>75x=02Peu7uX`c%FG(67j)9`UWT$k{vT3x}2dQR~ZQ9lPCwdZGw{6pZg zS$$OKBM2_?n@4cnUwPl2K9hiq`$3VPkNO0E<*><={+zO)(%!3bu4l@aKot8m+a*!JmhHhua1&R8tOaNvU=mu*Mi@}2hM+J&JCE(DZtgu_4p5SzJ{Mcy6l{f+BsIHhAwK$Un+sVHQ zvYtIqi_|1vS7KdoiBQmIuv?`Ue*!I?cFMhP(xxHz<{@>$Ec498JBV-qgdqyjgsCEH z*ygyVfT`nJ5l0kvm@g=1R-cJ}L-W8R4ILU#6)YuB38+({%upv`HF?uEl;DYiT21+_ z8mklEdQ1QQLnf=^CkCA8A5v6eR4nlcG@VKQJM z*n#$h9jHE0so)j$O7w=5Cqqtjb~J)u5>zvc8Ej8Pfx2Y=jQu0PqNW~-!;p-ENHWv{ z;iv}kKqL<-1J>lZT|gL{bf#l;Rop6AP47?BgiRBK(H7y==n^5P*u+{Y>twW9Omh&e z6W$14pdF!PUDEIYFe8*k4(_5i;mKwLj?|G_@~rMC0+Z5cz_Op3W%Ll=~)vR)SU|=@CKcUJI`qh~U6l99VIR&O=YX@5XMb8cA?VdP z)#U9hgt?&*#V4%HlrmH6OszXm$_%|nR@1XzOybFC;0d1Kj|9mut4vc`>8yH5kN2$e zJzno}HL-wjLb=!kcHXc?fC3b`KyR?U<9tJ6An>afg2cF600dWYK70k2H)n|PM-f>_ zuA>kmpDjUfevSxomU~%fjLPyMvju$%6GtHl$`_& zK@zP&4uR46DO2C-ISAWeWHQQ*$mfhfmS#XAagx!J7J{0kagr8bXkYa?S^cKkM_HxXrzbY z(Xr#zhmdRL1uj}VWB)?v zMJHZ#e3#7oLm!*PAx;4&952B_ezxeN>YnY5PHJ%(pizgCG}dJ)j)7{Dad$k|@DfB1 zf_(Pc=~LpzuE=$t#O>w~J+J^aqmwRhumG9J(+UdJE7LmRC#PEnhiIS^DNqSz{YDq3 zF~Ojq3Q$E%mK*n_Af7-NgXLmokRDoi!s6a+i1H(GD3y-`b}^l_5H75++r z!xKEg6a2k`l-{1^dFF69@aokozWVxW{^9E%`0a1*`0nP8({ZJ>POL##Bpra0TwMnS zGTk(s9YQPgl@xy&MB>2|Jm&?;UtaJlB<@VSk~GHEg@J;L*31x!j_8Tj#>1AQlwnc^ zZ1*dOOX6{P;z^IZh7cBw-yYeKT4n*P>aFTh*D~&TppAn-iTi^98OCowt zI_%Mvd`i}DU_qqBc^Jqv(6?aVC54Ess$1hDMmj#Zjv?|C4+kmZNXSQ9&_!@>*&#k?%QGG~lBO1ff=;JL zr-!rs+v>69Z)TX%IG|Xil!}#-Mg)%x286P)*PZ<*Kq-aYZpXLZev6s$-n`+>n>Rdv{+w5@Uh(qfOZNLc(=<^^8bkgeke^vB7!TyUSO`ui4F;6P z0>=oJNFO|_au>&v_|5;z?*2RWcI4N0-*N0mzWLSHy#D$N z{@XwQhX3O~{uh4x&p(h}pk8u39(en^h5O&VWB&Kw^3@kF;pT>S)5O~!zTzC!3*XEQ()1s$gN`Gi9-xlbx!IOg5Dd|V8SS2coa^|oHTJN6Gu&W4Wb4% z1=AE*N$+i~biy1g?|EwC*e8)N3(WGUblM4g;5uuBb;433!7P#D<~XKc7&}nlf|Zi_ z)Q+Zxc}w2?(20HE2k3`Ub}ulCm>9c?f)Z(%R{|?RDDPH+Qi4*T44Idcr_}Tg(bJ{G zEOC(Gi7|711k8g;ojL_&-zl+BqEhw~rOp&)$~sYwqV-0Y(|6D((Q2XZk3=~Ej@puV zJEab|6)0(RRpgX7QsO;|g%V1VAvRi3)FP%T=2=u1wl`uns7k`l467n$=!alY(8Fna zU``{AUChvh4V{JFK;c-;aat72rLj5XStS+|Rst^~Y@x?Wi^gh>rog8lENRZ-c~yt0 z2XhO;hF=8}b2a8&VJc2t3)V`~M{R;u5d{k|MX#M6NFs3Ph&+) zCJvSg{#fAZbQwk+}AOP}#R(PPT}7jMZDlohj%=}7St))q_+ zlss9-XbQTVC;`Qx2P%3|>z-EYFR<}=P#4k(x|06v95`Qdwsu|Z|KV8PBI^w5OoZ-3wkFyxC z1%Ij(_;lDG3!lpCy8I7AAGa|+oxvXnza)##$N5|@eFz`d{-@JDe)IS@T*59a+vEPZ zK#c1Np5WgyT+7t1;SX_!5fSv>X{~*5-@M?r*7%gs!S?(Fe>`k`{Y#-wg>_fYbTiE; z3(OW2oE@SD6t`5=je@(;Lv+MS^0h7q6K+Ny{AGkvIJxn|EwC0?53D=Id-ChG#pd1SdG&NIPO3YG=mVh0?Y2DR468jgdqsAhS6Pw3v+O# zv2`!Pi?Nz86BA;_*eMP83DJF1qEDVJNVWRPn#SJ*tt@C-!PcR_5qJ&y6v)9y{g*(5 z8+yir{ zsdZ1W0=?pWqS-|BN^7F8L0cNJIHCbg5g?%=QZXZ)n|c%!1#bn{m@i%Uh(n)HZ@2bbTP*!OIj$qp7OTcn&YqZoa039 zeN|BcN{C{OYMmOLsW%A2J?JgxeeacU2;~C$2rgL+f@M4-GQCDQ+XaM1 zTxlSTj33IRmw1=w?Y)nCz6;!ja^bkl3^!P79vjumAk+n`8K+?!;N-OwxS!y-84ZvI z1B-Q<8jg|5rGQPqEyPU{U|xv!5~%53CW z2{E#vB?>hvB`SpFw|6B`*bVa(5(_YtL~f8}dlE*ar$Af*5pf|z9TdVnP-G`)duDWh zBr!LJ>!>FPL*eH-jW3gtVk9^R2`SOL08|LZw;(q@iW-~oK&K4F3dSY!i0$!>9eM<* zbh^#t%k{y0eu5|XD+H6+&xIGa6R)1_cy=?hn=4ZVcX0W6j7J0*=Lxt87@{O`jC*D( zk=oQbFFybeat-z|N4I=~%p3@z$n_)FiZNfwtkDS5<#*OA(-LrA3AQPi6Ee$DT-JD$ z;SofTYmg$B4tHolAo?gHJW?U>TpGDRi?`94`eM%3NUsFNjQMWmGfr|5oQ~xc-(~A8o&{+jD6!Lc|$c1t>!^ zd4}ivOm9j)?4}wo=ZUZPGq?Lahy9+{HTcF(d}~K;XXCK1)LY@tR{CM#XpMF0+}}CJ zBP#ZJkVxXp0y5CyXC!sGym9t2f&&fcQ2M|N0&hfLP&?(3p?x8Ifmn## zM#!|u;3ZDl@{N{lC}W+E5y!9lBC@{cj)OGT$e%_~NSt1j!uZ*OeSuwpslrsmT!ztr z!G4DQ&e`ue`x5NOrbzOpiJQ6yy~o%<2u_d=d@XQe@Vj6^de;#Q@+d#6U%GC5mTfqU zR*ZMbJP^nO6Sxh!?;L4dpLtdvKex}TOa0`Te{gw+CGWY7hhXsU$}|-6RH_<85n)O} z_?QDSP!gI$*t^X(k)D7nUS|T~BrhF3pgHP}C@2P1te~6Ir%tPlW<3?b45Xwx%jTK3 z6l$rIdi6Z$(8q=Ykm>*2aC>{p^XJd`=9_Q$?z`{!{qKLzyLa!-9=+bZd&m3t@A>U- zf6EtNe8E>=eZ}+V&;KZI?E(CM?7iu7B*~GW`Kg+jdqe<<%qmurY_expR(eK8c1331 z|1I{%9x~gr+XtIRlXYbx0Ytc)nd<&f^FSmLtSa{OaM&ai;_$Fr&D>5kJsya*H6(cS zGz0~X?mXZuZZv_;<229-8iJ2+l*X4x>xt=?zheH!f292GSLD-IEFZpNsS8b&2Xp?9 z|L4E(i~sTu{O%uq%jv#CeuW(Vnx=ov@xwLs>q@DiaXL*CH$V9?pI`6!=IgKc=IgIH zEe-3PckkYDyuai0<}==Y@gt^biaNQ2#)al_S-3mibNl{1A0F=b_WnJ;e)lE6`uaCK zobFkUNABN!;J06W#WxRkyz4bAD_F(c5mj=^^tRBc=mdG6pe35GRA1=5agvTrHWW|^ zsVi0$x?-(yvIlBi==Tl}iE_$laXcGI6F2gTpMUY!{Os*t@%Ho2Ib83U4?C`|uerVo z%Mw5R;w`UlUUN9?DN~`3snS}$e0w5yX+}v<`d&`FpA6S-! z+uK`iZf>}~zGk=EJx%fge}SOg*}abi$5z8047+dihlCvkS(F4Q3e2D}kEuJ^oMH{{ zl~XsC?wlH|J%J?l0tbUD6Rr%LOqGe23Q{JLWin3JxWLTbacj6X8kM6|&_+!bmzJ)S zo=HuyK5>$nlkPalj+4yjEa)txoifde=SroqS};JnwOB*5ld_SDlU2wlsfX2;WFOF8 zL4{h3Hni;bY^Vh@`KEncJR@SDLYG9>!L;p-yJ86r*6F_05IU6m`M7z#89Ta%bsP)?1oewjcfkR9Hq&?e^{0rKBq!W7F0R%LYeK5xT~XcfJd^ed{M&>l-*P(_4R0UrKl~Wre3JK5 z*rIG@{vdc7Sssfy7x_Kq4*0#XE>VA49G?~uXTZybySTS}u71z?eJ?!b{}J9F)j8tC zZNtY=J^}~X&i{{IKNv1+x_Nx}l<vw? zkX^{8h%4Q~T3?kE=f9k(Q^M6DE2OwPadFfd#S-eugp%CZn=^SwywQ`w6nqU z2pl$EsD@L!1m9LuQcFmSJ7y6>b4KTkNJl2ei_#N-VWy~o)Qr}Qv>d!&se!Ig8cdC@ zon}sGU3WT;Q|WRF+^m5pi4%o83J*9(5|t!Q@r>FIVWMdv%ZwH!khI|201U+?(?x=3 z!lN#94JNx4yg6cx;&HC+8Eh)lBJ`9|ucY3quWo+9_&u zR=PHiDn#NJ1ndmhh?| z5?4&_$P%?3$+=oHmWW(J+aVOP3b-(mPvr83DP{iAy{@=7CaZa3aS&T zPACXhG>u!FWP~Co11#ta7x03(AfAGMQWPyp(y;0ov^htugIDn2p>YJ0uy$EB zls>3tsw##fVoW&h;4WlKWKw)H6Vx3qm43ZY_XY{<;OOj3>d*zj2;~_;b5c{(P9&>n ztzeay7>GME+sf1EUw>xH#2Dm^(u(DIpIm;LVLUP zR{<9ol1OL_%%+n*gj0OaC%20Y5xCIdPD8>xJY6n>8o2{yvu8BYfUHgd$6 zW~1H_@B`rENHL)f7};$8o7dy-$1*lX@#o;2r;L9R89(}Oe%tTyW>jT>$4&}lQFQ&Yro7#d#UVtTV=-~tllXxt~vjNX*CX4*Pq z>mF+}?v9gi$;9-i#O_dH(=;SUV^s(wLTG^T-34($8JjH^Y^0a2L44l6zYB^4(~lE8 zF=JiT9{y2|AQt(HLooD=2G|~=xuZcxn&IaVZujVW9-ikf=l;n7$j8SKG~DjED~{r# zxC=gr2r+JXI-L7)U^w6rKLO56J}>YBf1;oYQ%+p(CO&($g@(}pgpFQ4%+cFCQ3H{Q4Rz?0@0QRRAi(hS(r5}f=$Zf3>1Whn1ubYEk+rS zet`MF^RfZ9XMi{{{|Z9H2V-L(%xraa63#)LYlWS{lwiujU(Le5+X+9}3D-N=?}eMm zd6R|DlCmpGDT42W`<-%|lnpVu<2NpNCb$(1+@e4EC`e67CHew5+Fpa-9+R zO+NY?NE4)uzU4p`e3#%bqq`Z&1<@2{_mbfitdI2yhR~wXiD&!k9t0s9Pq)X%EH3|} zzFVaM5rX0I1zz9<{?b51NGb8^)hph_njw5!gPcflY zU_v9G8RJt#}J7%m-9NR1gDK7YN#l2_<-2g zJE3uVo7cvoA+a1VRb?L z_0IL(8b~*%gE+}y0ty}sTndK*ucmnBS(qo-B?+Mc8G;1a$hjdJgim*)n?pA`u$r?v zbety6D$#>X_~<;H3}yvYf|TO1#P=TCP9N1UiLI%~@bmE=ZV?}!+T$!b{I~SmkFi}1 z=jfs`nrw^j0+xUjVMBH>zJw6#rz`5pwx;{&VlE!CG}U7*&l2Bf0xyDOjXV;8Yj#32 zFlbUVC1jWeHc?vd)Y{?_s7WySxuLGn%iS>Pw9;ug7%naOU;tAz>sd%3L4f1#YmEZp67d*WGhOfW&n;Pe8cH<pj&Mnl02;Ij+W{5|%)OEHI};YAupWLLiJ8tJ9c#LPe&f!9r@jF|Cy`n zE6TpG-|gA&4;-$qxOsKMPv8ESx1axr04PD%zMEIq%=64tcH~qjIWwige%dqTg2)h< zI4wLJw{_}I14M-D>uYXqZus)cF9Eo_yW{ru_C;vm1%TKl%=61XE?Nxm}m3R*dJWog2Pu6$^TF2a?2&XyuehJ6(qUYPFEl2ZDP$&_NPWWl%#tTg8t)}dOb2B;DETTe9Wt6LwXf(}@ukw& z*h6V?(r(=47NxkbJ9Xw|VRv$JJJFM{B;zC&^zve4b8@#~h92+|d;yYZI@8o~j7%f3 zFsqV9>AQtAugDb4&`FeD5e${{ z_tBp2079}*E~IT@U4`XnEDuI|Fj{kT2KP>qg}Izi35JH|LueqCSTp8In;U(u=+wyx ziaW)jxR9Ho&Cu3J4RUu}ok>GzU@x#onVK@yiL}m;_L$CeEtn=mmDHTq$GNQR-Brglp5-BPqOXx#9|Ab#VKmcZp_k|P1vn4 zABAK>O0W`I1BIY%u^5~T?hWoczBWQI`0;7p=smO80lFY5XhTXuOG5SuDGn{+z2Iwx zH6s;Ls-#rOsZz9(rBb|8T4SQ(beb#Om2M#%;2Qf>DUK9Dasm^yC44QUTCf^%HSHcZ zy8p%??uX)B_up%8`)>xGtG`1WZsWJdbQn&5{QW%mcwfdEe$SOPKt_~7t^K_)M$bn+ zpTK1~+fU&UHDVkFeB}9QZcmH5gpZ2(xPS=yq|hh%jmG@0GA_d|>h;(T7fAT_t%jd$ zwC~~E;$1*kN`?)W=i*$#bErP8(#P=BV_yWKK^v$UwB?df130Y_jA>`fOGs$!k>6N&G<=w_$c_?R{m+a@Z~b~^Yw{e z=emD_&eNDL@B%Ne!PC8qh%ilIrE$OC$GzX6pKUax%_2dq^$*gV|CbJ1TmDcu$rTU! z25}Ez_SPtCryM)w-f$U2WfD!|)VfMyNr|N>izE;y<&LHvG;s{IMCr=h6=`wqE80_b6f>{9Y{u zQ+S<2h`5}BpfPo1w_>SM8+eUdi|T`J@1dx`e8rPcWMSq3_XosJWN###iOgh~>_Vj= zd55P=&yJ-I0*2#+?te#dPq-`I<1JOl)k)3B-N|m45RBtfrtLDG4DTIkrLB$CYA7yJ zC6Q1a${Qul`&dJolSIfV1jT!5*wpa3leEV-n>tbruMKO^8zF38gAS5#CUFj8WHFK% zOY4-L(6*O?;}_E527#ZDEeaZb%tP(jm7ask_8ea5U{mN6IB>Qow)g_R=}XeeRBBdjL- zlyD8D3CxaV!IBb+IWr=VBpB|?coGI0b#iH#blkyP!7Slzf*y*}ijF4(|i_*QL29tSQ+nJJDCRri%OmZd3f|M1TPc&U1JCsD4 zGNOspGolsI70D+=7StSBcgWI^wZT#e(~(0kBt|{%imJpQ{WF3Wp@A3pqk#~X26hwd zd`IbflI#%KfzDW-u$*!01S3{37lH&6gMygF;Yx_bn~x7akAw?8IWRqYih2&dFUn;R z^61{Yzw0m-aX!UkbA5#WeDBR?4N*}|UuzH|e)BY2Flv*#trY~Tk(tAXMp&Zs|v0p@~@-+iEGE|>?pPJC05 z$W;VXg`|Rtq9TX{1Mg)&jk34;y}%3nj|UZDDv9g;%$w^SZ*B^2UJ2LNo!XA9O9D>B zwrWCr!mW%xEub0$q-${3q{k5#n$Cj3FD6SP80(3F1RxG7F`vY;7~tC|-sa@9_c_j1 z5)5>7WV=0=F)jz*c}#8@33#9)E{F*j_?Y7mpcD8hkmQkv(IEr|(qncG!ATVP#4#G^ z2G$8PdqlgaVacmMz(07*naRK`5PGN#q@lzwA_wy$ zJ}wtk2o1p={ZY5B5JZfgplPRS!;;|{EIYnqP)rD^h&sx*=zvlb-xau;k=+3;1($?1 z$8w`D9jT6Vp*Vx_(rD6Y#Ip$yboUSx@bQ>o)-g9?udw|+fpZ)ArQ-!&;06B7A*ICO zaNx~npYh|L{FtBo^eta~^%ckCJ*_P)%L3J@&7e0N0R(3ysHiK5f@hSBQ-{Am%)l{O z>b40Olc&TJ#Pbn^5Zd}J+@kTDvHn?vkKH-jxgs|29@E!0hdE>J-~1#0$2o3`=Z{V& z_A5dxjS~5~`tX-HIBcBdMEJP?HG;Z24oTZ@GtQQX;?x1%5Q+7LXr+idG~Tdj=N)OB zQRY^5EP5b@Z$cU3=ce7(sir+3tsM-KAqHZ$YTY$%N&ga6d|sO^4Pk`MvOv!9 zP8!c2OJLvuc`r?zq3dKWj_dp3~`c79hB~x?;E6@qK3A9XJxBuj5pK ziqi-=(gt*q(O(1=M@QdLqCX~}ND%fjMt!y8`s(Mr-tD;B9r#)A{P<`8GhhAk7ku;U zU-Ij}`~Tn}QI~Hy+AH)&|A}(=8`5b)cux|@#6?ANz%4GLb`vEhvI=v_+jnOF zL7OvuFaWGNb~5^d(H=T_0$L_zMc6@_aG$^paVHH;bHN3WS$pQL3E%d_igHq+Cx_&u z?i5dm6htS;GwO3NG;1eGBYUS=!f|R^gCe1oL}ii^mIiDh_XF6>LSao4ODU{VK@LiJ zrKDGh`PC3aTN3t=XeeuoX+v``n5P4zxl+v8d*@IWu6m_3r90f0IKPt|&oK4S_&?zk zzPbA_ow>5yX{??v&65E@vWu1YHfOJU01JjZ&kjVzrh zHFjp$k}xe88M6%4(YiAAh1?eA?szD18;b=ow@wYx3GyA}JH!-U9DlgN9y)zVn9ZzH zV$s5JKe4=;@gmIigsg?U3c2>kFDE<|$OqgeQtu=`p*}8(C6V+hm>M?0Lru)dnI~sw zPQgKqo(;=FccB}s*|1`?Vzg{HP%4yFnU+N96L|$c8TG!??>fD9+6<_nrBZe$a*7YW z(wL-DQ=u27?>p_PV^;AN=$~_kc@cIlq$-f1Z2M#d$6@KnvBE=z`--n&O$=Q@<821L?`gf$yWpvy zC6h_9KiDU33s7%(&3QdB7S=(mNi z0O10ypmoCPgtm;EqPmN)e}m9_XIbKGdv$g72MM44S;2RGKIbv~^7p6C3-BkupO5tv zE@O=o>4ncDJl^^}`g{r(eoupif#Sy{Zg@Y9{=?zZ8eF-JmiV{S$MuaPvQG=c?yrYet)W`7+kua zhW%^6b9sL3C-E^qi5PX+yg$Ly;oRpgBb__OV~S6*et{QwfoEY;`f&;KJkxtWYs=q+ z29{-EUBjC2ivYoQL+qP>_)N6ufqT6nnKRQUePLQx_Q#d|y;GA?HPf`vQfBqcNt9Dc z98pfHhy;DTBpFuu9PNo|O_ZuktC9^QgG`1@9m`1@P!@a*MRz)QFU*Zj zr?X-NBl^q=u(M4KH!;UXo#@ z!@eV>p{*mmBDLd7#vj0{&{w5C2(FH%&YU|}xlwS`6Avwu*NiMPc<>ciZuGREDYVC< z71fSt9ZPQ~lol0&yA7;%XoPj<<_MjX8dGw1IV=XACa4J{flVPup(v#*sVTFChI_Y! znc`LGKCTwTBKs+cHF z0#%AQNg9)OChZh)up!tYVRf^+VBN5)v~|MU9wn2Dk|t$N!afTsG1e4E#K1b%l|IZD zTXjqmWc4HD3!jur_c9DHbiGsb|l%OrXY@_ zisU1dJLny3hGT=Z;&&CV2ZR~d8PN$9#U+FWKDtNxGk_PNffx9rfryY)ky4oZ#HE+^bTm~)zK^fkD z%W!Y+$Z5md3HHxOAD7SJKmKpoi$)e>b{0tpWn`qdWy~hLO|+hAYo;!+9))^RtP0)) z>;loq@UcLLjX{>}?SO&!mNuea3I(`1cray(N7zOWkP&Vc;Ze>A^pW4i6WTwcEXU7z zZSbhsKVFf?MT8MYaNM^7qrQI8CqH66_Q){81E-5(a7L2=p`|qXSj3TYZX%!}s0c{} zRV8g&7)M_jB}B0ozhB@5{uDq&m~!T7Kk@o{$LBW_pT9}Gd1oAt3-=F>Y!*s<#@Qo2 zFg{s$ zox?VljNd|(i3I)w#*{GT9{~s7=0uTmbtK8)xtEZMa0f|*6_*R7Od}tW^L!bcWF4%5 z5{K{Y(C$PXWxPG=iWm zQdXFSzJs=xV1fgV2){%4uPgkn!d(js?Y~2KP^F3B33`qTZ0~knT8Ji(CNM`0w1qne zATgKY5=I`jwPYj~2|W5Lcm`PnPKq8AvJ-R$DFPaCEp@uK&}xul4WI~K9Ls&+<8)*OqqRUL^#0*-?3J;FtZoyd3(be@H$)0;czMdZ1u&bNAvXY*5oWFuYd69Ts$xI*9;voBGH z(LQElZ!qe$wVyif9{Y?@mYd<$2chpGuKNYf^5_pd&gl_nY=Pp33bCOY?Zoik93Jl2 z>%?0HN>cW_iC5P<-W-I__RgCbN`ai?@}l&{)ap3X#oofYQ_ZP81P0t4mxQLoR1!`w zq*o70i8|!kSiLcIhaM=Y#J16J(vvEjKZ_8=WeGPt3HQe;pW!0z8T|3tn(f?0l-M&Q z#y%Ew>}6!6b#p9M*zgs=c&vm+JJ21}>FS6X<^raOMW!K?7>ttUg4*au=iD{ca{pyLa!n zy}jjhI-ND9y?ghLckkYDb92L+H*dJPxnaNGpP9d&;=O(1(`YpVx`e>MW?5i6-Nd;+ z(kVV@0FWhU0W-=jCFXgCpZ$cJdFJ)u6|c&Hy*dB>CqHNXoBxg5FTdn(|L^~kw$8ji z{esi_8u{!$Bl0tJy{G60t`CKh1l{HZk4qnbl9kWiyk?#<(_Hxan{Rpl;g)YdyyNTl z-}39Pf5$(5`D^~}>wn;%Z~vM1>jzeCcoF*4@m+^QM68&hT*BB9U~=?U+N)-sD_7KzKys-32eNyp959e1M!{HzdoLeY?kUHpP;q>f~$` zGbR;gF?Kk`JI$cFvpBIg!NH?VLc5+LZom%rI_M`2vxX0)4uYeiAeE#Q)fO5XF;K%2 zNb(@1bU{&)C#)Bcj5;Jyltw~P?YLx+f|D`Nm^qk}dZ)AsZR~@x2lGaDtQejfl#1sS zpB4UX+B?;ZRh=~p>nwPV{edEA6VgdAjr~GO2~q}6q-vz6H$$?Ni6xkCWV?focajE}fn%njQ~kkq)^+t3-L#k?O&ikPPY3$5MBa zgtomTE1C=`4U!^zhb!<`z)o;i5RmT3YH05vc=U+LW+JDq#^SL3AjOHz124wIi(}eI@xCZAC>Sv<131%$?Si+6v|q+9rHR zpNeKYIbDS+N=0cdxIri??n6o-K5 zHy?pN#{C4(OZO)q(#P}YiaI^}1zz9Y198h8RY*G z;19Jlu=tLXUy&>P`nt1y?~NktC54gV{RfUpJr& z+7tAG^cgip{S@a^aa22!8ZOX9=m?Gsx~qnt#F`=2C~2klq1Xf=IN~ys7j`ny$gG-} za-vK^F3<$T6)etLy+cE2F4Qjc2HKotoykGPDB5XR(M%{%DFsVIXj(GOD{C)ISSUvF zI4i~%aI&|c)mlgCBtlrct6|xpbVM7H8qyo2hPcz0P^Q&er7Z?&6%57Ni@J4e|;WN?cTX&@%*-gh)qdh{oI9gWkG0eQ^?oCqs&%8B$S_2$Dn0 zpe1NYNkn~Bg=}F3(ml?J)`RBK9Y~HShzf`fUJa*pW9d#eWj{3hYf2PJIECs3_d=zx zSf(w8ui$Hgx{%s}nuRj%o{=7kF5C*;6U`FMgw>p-g=L1NSJqw;QB)ymCh3f_qnn}L zY0^L&o}zz;{m2}5Q{uP?2`Gf!bO?*sH5r~dHqP(ng7t*8kgAttBsV58cF7UxAmfHX zjG%4Ac^Vvy5L(D3`reeWA($^>2vCd|_c-E?m>^vc6FP>ahRq9h^?+m}nIonoE97pV z9g!7DC&>3v?{&q6xVK!v*T9eDkOo~*1<=c_*PjKv2o1cz9}UiztUyVbSqnRzv2?(v z1MB_@<3txnw32Hu*QE{}M-do;18$6i58yFgA3Z)Er?UO~$rEXCcKTq7Ra4XiF^%-jAUK2AXn7aJK}-lGn-LF-yray~*3lHm8JFUmRmu|Zd2ElsLuQj$Q@V79mv9N2Jp zffx8w168Hu#D1E1eKqs>&5j?vo_X`?#JBH_lvcDg+!L;j*$MX*ktxOu2`3TS?0RG> zLYx-HrgL2Q#dxzAoHpV?Y)XARa^W8n0WlxOz>Z@NLT2<~fhKIWi5VdjNNfaqlwbw}>A?O&z?&=36~>tJWi98-)vi;~orO z?lzu=Sp{2g9~#G`A<2W0y({91kOJ>cyv;w(+dRti4R=PnZU>hrKPow*sIY0}S077> z2(!So!eq@t zu;3|}H1dS+X4voWX@}NE=h0rzlUe(|ScYjZx` zkG;SPyue=;s3vy19U?++#?OELGw$y0c(}jk?!z6ow+};05wsR*V(A?dtUF)?S$Om7 zxTi5U%9yYtu9+bkOwV*-69kBKJjOMHEuQGPthmgOk!D-?8GZvT*pM9M${F3?3tM6i%i!;jFp3VOfgi&|-H0%tc zeME@;8i)#RTlr5wVujH0l%yQ?GjHBp^Vya1`N4U!vtX!JP&0I0NsHh^(|7Mi>&9A* zwfR_!AWce1nKYNdloP^%*(La>mW8D|$J+7cblY%=y1B%b>ih{ul`s9_@zZb3eQsh6 ze?~ZrPqoDv&(^HEy#-VSPoT*eyoiVpdyP?`o73IT`2k^7At{n$59($hV?B03`c@$U zQzVC(T6OT`2+@nY3*HjFbzBn=`%X2Y-wr{DdCZ;rPtEUcyCpE-SRv|U4{xEvC3tXEv)+)|rh*65bN z6m3Fpp*2g~2(8JA5RH&#q3jcu9P15QTw0Mp_8qQazB-WBnbs2J?!@j#4SDVGrUOo2 zVlUs_KtX3EO@bsscz4#&VzoF@!6jj7Wl5dN!hP+uWhJ{X>%?AXc4-%QdMI-^%zNJ6 z{FI;n^ncn~@J|NjdFG2RzTp1;{(MpY_19nX>eZ_kp@F|}a97NB02m!t#ft(JatD{V zkWMR}j<_12!NA;hs1?*-F`2Lfu1q*N?9{li#v5DttX1BuD}}I3VBPQz-3+%0wS;uV ztstUEHnLSxX(VrCRuVHQWzwv~PR#M0!`_g+p?jmu4J{RuN^NmLemZv6duREOS#AXv zp;RaLj+l@taZ$Zyx@Xp$=_IPaQH)bJPD{L7Z>+sidLwlMBP{OGaB47Icfk!>howtc zER>aHzp%6ui=XH+m}4rT?f)qFl8_9kfMy^YnuIh6-c>t-(OajR1dn)jd^VD-WL*$h z$-2^|VLdME*Y32%u&KdLVcH>c(2>3FV0DxulD|Rog6EFV!*ak{v3{g4cN|ZR^^~|j z?a4loT}j?4enROnZ@VBq;aPAgkoIWakx~H_OoxR+Z9GUtq{p3c&!}ZmPpAp0t(2wW z+7S(!g0xC&3$|v~nrIc8baZFT(wV(6&yDFy$kT-DHC-w-Icss29V{5lowU#V|LnbO zlO#Ek-}iF|5Rq9A-8~Pxx3oLb#25PhUqNPiA)Q3Y)eJ zfV*BeA}g!9dS-U-PMX`Pe_2#UAn*(X;BdG*<|&hM4%)+oEWv+j?i_jz0T2_sgLcOL zcI?HeDw>t<6DJC%mN_oUdX%u}CyGk&Ydk5E6PhP-*)e4UE##Idy|C+v-i5h2sT)h} zeA^n<;ybWR2;G#7C?td2h1MbWj&;GCQrm>LOzqAhuoO6TI8C7Ltj)Nmvkb0{Vx;CI zAnlbjC6bk4peUnSNHWvwEgWx<;}ywvxbKiQk!mNOI&v`R2F<`7O^)t5t_@8=KZs?h z6OeF^WlZy+4K=}(2E7AxF;?OPh7-=ooZc-?eyAPYPJuAU*?1sW{$;^E)}gglzWw%F zPNx&Mx3^qhU%x00Jb`oGoZ_#}Qzc^tW@H7@G)Nxm-d={G-&XTv`0 z_<--v&G*0C@I!HYTv;DEe2h;;=)!r^!QuIMo^yM^7WHKkPm}xPyDjVAQ-r5+0na;b zZhuTXPw*(8mkl9A_y0uPF#K=#54SIP!3+Ko5D{{Y)8MYJuc@_iI-Sl61k19Vd2BBT zY&4^pp{h?`e*`-z?LOg6=%nX(1EMtA5 znSp{8C3(eDjE`<$4O|3E36~H`_N>qnfiGZfy zlWOq2#S==RkjN6o1PhLKL2E)!VO?HordPvO#|CfTML-1?#R=L;1jIARl+-7Z1t%Mc zy4m*Itt6yKtmjfDp;$|VVz%NQJ1H4OI%*y59jT5sffh!8X>l@Pm5gf+#dd^7qFO=` z%nJC9#7s#$OdU;evU4d;$wHPy>YZFW$zt6vn(?A|aXdft1p}&&rJ)%j9rcb_ht@eI zQr&68ch^Yr%R7Z8$BuA4Nrz=AIdL^3KVgQc8XL|Qo1?~qjsmQ zLNgf(IiSgLF|vo@irg)DQvzZVTQddhO3)N*v1npPa@|a zk=FPhMZocB6T+Y21c73p3KIE8h`JiwR2L9+JUNz}o*dJtub!Z)QMC%i4-_Z_*_T9h2+rQz%-LE(-*VGEVgN$Cf6#xJr07*na zRP>o*uMyj09>bxF$7Yhm)0!!JI2#h0HMUw`Yoos8C;?pz2pIWcK$6UK1!xZG?Th(JP_DCVdk+=px*#l!u; z`E-!&y&H^g2f6%;Be_rGAwNI_@$q|+V2lj%koSvmaC5VZ@nlnsf`_rlEz0j2b4`5f zV}5fU%HbaKrx-X9uYmWC_s*DzqnzGH7>oKeV`E5coBDX9rvj=vtOR7l9hwj(&jjd^ z@2A-6gi+LwUIKXJUnR!n9#(y`3MmU$NqCjvivq8%cD%Z|;miHRU**bQq!V9cV?V(( z38l1Qg}9*_Ww%Mt9D-b7R8djFy6`3(m_i_KtF+OVqlO;p@dYnfuy{G#4$$u||nW%L%&VVr{9$SvkN2Nga~n zrQ%XSh9V&#G=z?LAM?85R)Yqxk;g5oCb*5XiQiDZR>V_`x zQAgF8b{@^CkdmWnsC3i}B-WJOh0+CUM(M`nQRgO+>m(uI)Pd%4??g2kgy7y*-U0W~ zgxj^bjcPV{sQ+;=|#v7Yx4TuZ`tiN4F>c|>}V6=J>z_m1?fKa7kO zkXho&f~XS`X^v$_QshfSaRp68n~I_+8D$)5i>e`5G`|K_k|a(M$SG5DrsTp@3R9WR zS1E;33b};8O_5+z7`Wu&{Qr%dGhcl11-G}i+~42x_U&8Vy?aNk6?doB$~WJ9!~6H| zxw*OF&6_v8dGls$dIX%X9wZWdQy6WnqpnBmC5cV2>_!5zKo(&V;YyTioNI)=CT5&j zg;|81I6F6H1iW*1I`KDu`x}1q50yK9h1~p)u>Nb>`@f^u2fqBG@T*_GVmGJAod?|v zgZ?Dw;Myv`|K=a~oB#5k_@DpX|H6O%&41-7PhK{bS2 zTLV7{c6999m9)(4bRucNCNNdJb!=VfrxmxLb4i3XFwdAePHZ~GH=#)GxE!$j_e{U) zT)ke{FDuu(hF=NC)Hy6Gc5KifO@;KTQ05(FRipP8f#t!qhUXdt}Mzn#j`FN#{yBRVqy@uUqF$ zU3rs_e3nnB6bz#Q4WT-818o!1GPDUNlaf=cF?nU8Q6%U*NAG8_>Wj*_U9y7*rEZT}VZ-R9V-RwJ)r8q-({@@FwHb6@g+1(~AYID=SG?R2GUZh&7rS{p73|i#R;< zC+T}Ug>r)SPF@>LD*be#o+^D&`f|m-7WP%yHzU`DLPwM0v*6Q=PcxPa=!~QtNi(84 zEa!FB)L3=Fq>~MH-kE!6*D|RJX{oq3@DOe{GrCopEtn~08_j9RY|N99lCdksJS#fQ zpoLDSN~7k=Iw@URId^a;NA14X}4FHBM2m9w!3a zwamLEaXbmugi@TT2vZTXD3nZ^CQ9j{7@~!#D!Z!msu+ScNX=Q6&fBGNe-cXSl;liV zDY>9Y$nG@rP_}TxyI?J0HB)OsOhb@nXUy(QVkC@CuCK3|=lPS0 zr+-#(Vod^17B7z<{ro(5=mgEYy^A*w__$w;7&cEn*4Gzs_$Uv7k0K-1wp{psCgA1z zMJ~$_@QdsOVqAN^+yjr+M9-wj-t|KqYe6USxIo_hZtI8U-iw8JwgJVLa@Q;*Mu zy2#AM>yO*|N8qE1{RF-*?(yz9FHfUB`#Haen@3@vR>2cIm(C|~-pl^eahcxJI{vYF zc^dB1_yLa&m$mb}@6nG2VkhDO+f)2rcJd#K$ML`51uyugLC%@0t1ISt=GCiLoK7bW zhXaSh;ml)u0c?t<(=`1cf7A|O?;Yu2$5@*fN}m$859Rm=tNVaMHm7*-ABr9MEsTo-_(gb_~R$Nvj8A**P!Bmua7UrIC5xNS^ow_u{ z6P)j4 zl~|v5fvU#h&eCHYRV`!8XquSvgb3)Qlu4N8M9zicm3-*rwUdtxt%gs6T`7Gs`n1yK zhKbNbS&=w>t{dhJlS-3VV=t|6zX(S!EWOZE@S$sOG;?b2h!|wWvPOSODK5`A+oh;? zN?J**m^YBdUY!z~5^;KlC@1Nh5EcVYPxDXjdjDb})`N+)CFv5jWX z``9l_B+E?1X_Yf8JAtf~mX+0&wF>p1)S|Qutr#80M8X(QZxljtAdBMyNrXNnbn@V* z7a?WGwc)9fx|4GwCnE`{$H{!vqD@cMY1R>eB#A^K>4c|*rA*VpDw!?`5yDu9^iV+1 zj;Br+!(^qY(9CIhp-H8g(CY-<$gVUuvMn@gsGiUkr#osFlmsL^3+%wVVXf1tG=ZiK z(~O~r3Z{LGV1FL?8LZ-8@KZz*Wlj_O=?d*Pn7rnsALx3bN@K>^^$IPl#Ne^%Ym>Ll6 zYYJV3u7C(GU_-Fu94~346G9+P!W~-Qv(_t4%bruc1}`k%{SsS##r^w##j@PeE&RG? zQkX;rbmX_kM7Eja0YC_$eFaA89X6$b%l@atQAEiFan5~IJ(LEb|BOv-8%d0?u=y3l z9Ct7aqW7VZqb~&^99I-(ODpa-H!g#2jWk{fJS<=wg3Wh+JpYGtst>=i#TL&|cM{hq zPza0&Dxn+@iUS&gv>FzoR0R=1hH%9TUhq?dNc7h!XZG{Vo7-!C@!2bW@uu^;+Y?vc zt<=`2t`jF(us#%B}ntPd$F$2Ek&U=W+)7vB|}2jML&l z!)-*djWNQwKf{TM>|q?(!T})LjCc{rfV*=(4aFf=AQD+O2Q%o3Yzd1*9>)9)#JBH? zpcuyaq!NpKxkUaQ$CH5?cpqg27|%nn5Hs95Si`L0-f-^-9n=ZQB{)w`O_4V8>O^_Y zbJ*~+6?qi((oF#&#yU~Z1UulW2zznn0;Ry~34U3GzbNqLHu3sZ;fw2uzf|~w#w+a< zg_Ix_B^O0g6yAZJVO2P~@S(wXC+FLHAX~Hu67$8FFk)N;hQO?QjQ^sFB*CbVDxN}VKvYm! zN#0Nz<~FRgv=GEKAMr=LZZV!CdUjWnu()H2c?v3LQ?AewxiZ5MT>6g#ABS3GRwl>*mptTlK}-dc-~6Un~)~W`6W; zyv7AoL5GmO5@k#RNrSfW0&RkxM!E(vC;Ih{dOgRy?M|3*1!;^-I&KEF!Ya@jVurhi zk)8>3icJ{=LSeu{IWZB!lp}ARU{lCYfviJWAQ>b_+>zw*HKb4&ESi{dVagLz%9I?+ z0`oL8PxFJ~K#mh2HidyprGcM{@_>jiP1E_B=b78vTkh`exVyUx1*c_USr*oHWmy(J zeE7iq{XOftMjY{x2hBS=v?{2K_GUB#9!dieB;h1+2-QxUD-m`g>?LUC>3kWJI1^3* z?v{nW|J_^uw}1Z?zdwDBz4{Ez{{wbE!`-)By?V!A{_1nS_;SxQU92|%u%PeEIW8xD z`-k80>;Li}`Ro7qfAa7C^Z&&+>w9YMq+Mc~6H^wZl*q}^sZi5|>4FPb&s^1v-wXcU z;h;=;&wjdMKkuRsXrk51{rw&9zI)5v;f}R0wBE7mn5>W;PoNp3C}F94p`SXZ-O4<5 zGz)f&n}=Z<*)$MQPyY z3|D#|N(0hy)le81d}GAuVpLWZF}ihhaqxmHJM#TC>9D7y6T7rMTcPxi3R$U?L&pwc)k|t9Vqja1tjKD3dc48^4TsD5I%h znu0%}3u`{GNMp$>9oWypG!<_06<4}LIxHMH`H|IE7F$sZ#fi8qXhoY**`r#JT(CB= zP`GP}w~O-O7;S&0!qp^DglSe}&ge8zCeV_|+F2P;?@0?C2;zYg1_4PIT z{hn!>o_hcDg>ylJK6&)qq(c zfiN3BPVomz^L*4l9PIgU&)_+KKO*|zOZ*s*7kHZ1<@3{bKL#I%`Pk)oJZ{D3Lp|Ra z9=QmAT15b#ivI;Kc)_0z5jpb!0dmfiQkdtN)9J*zuGCt8_NNc@-p@({KLkHG{ax^8 zdlhNbad(x#Go%{fG=CCQqP>sbZqIDB_tcg`Fm53B}_g zf_Bg)A-y1Z#*(oDl}5!7DkQ8X)_5!WX1JF%O`wl^x5fC!LmRe zN&`M4R**KKYe7~4O?U#=h7>1Z6c;8}CWM^{ZccAb>rSg3ZHm@Jt~ySj%VZC$@5MT$ z87VcibSR1!@JaAlkewhU`0;p0S|hEAbW+likW;3v6RpoIHX}Ne23)AX>Toh}3#9=c zO7T-EAy+Xc@-9(!g_0*qYm}u^S|cAi>D1}FPAwr;Z&QP7$5d&OsANtgq;{HBnypk{ zSo=!FIeOt}u+)re0gUQKHK%z+Qb$BWZlNeXB`g=r(zs^aCN!9(|@t zVi6lxXBAj6T5gbI4}&aZ1XU+zD9NZP_DZ@YOcGrZKIBU9R zp*A>F*-CDPXApHn4Asz@5)%aByzB0O3*r(x&=(6J5g``u!>>r( z>e=s8eDdvKi zvsO4R!r`bK7R6UZbHY!B)7_3*uZhkjxF~2wMKAKKu~DSac;|i*(l>wvbJsy!5KCau z){OQRljx7z^C+e`XZMiO@DQ?bMg+qT4mR$4xWg0H+DI?*K?qM=zPrTsJkoglD39C) zMpbO_T|_2Pe(~`giULYhM|=aD_bnb3Q01&7@bDW717~Z}qn;m?@dYpV2_cvP%2W!k zudn#>^$oxLeC6}kov+@mA!u=I^ad_hh)lQ-AwVhP`^bdU()RHZ5>zO-hftV~Ny-87 z7=!d-{E5MnxF3%(uf^BnBI6q#-|2>tlJ7z1&SF-k9< zg^)_qE$%HBkx|zzryw4}-hz(3{gLKnT;|{gYJ@2@yU1q;Y2Y>7trHE(1>><@H|pQYvq}aYZL7(bl;`@i@+X)Wr#S4}8^}w`*MAtZ-NydZeEeL=v~usb;opb7NYsQ1y_2++sxXkQ!G`8|T;Fv4wBwY`sNVYIb^BqBpwmj!GFZ(vRE zNs!{OlaOHSz$|D^xp^hA`Ti6j|CkSySiCm$zBB-~QW!K7#r5lkGV4U$kWECd#E z8x{>?fgI%$x>#&#kLIYP!3fNn$SF=5c$mvBgaf0VRM!0~k8{&3{|hY#Ez-g7z*fu?={DKX`V#@NQnkj4QoAc{zVt{@WQm?~oF ztaOb4s)%Wf#U5)ZA;s9_X{@z;%waL+yTs%nmkVSOkaXd;2{ZI@0T?2&ITG`v#Mmk$ zZg<2l=4%C~gBy{g#=1VQxBxXl%Dz=%=81_zyrI%a-oZL<9kDj%`G^}Cw1Pq?4FCvu zlwDm=S1>2mZDWGc6wkoOaMX(cD&PX@xXQ?mhCjFX-4<>dKciA3yhlx$#PqhBbF3YX zD+J&Jk0AUpwiKVDVwT;r8NgZdnd9=B2j=)BfY$q5QQG6Frkfw$lbS;rlXCav|sZ zNNM0q6KwteCx$KmZ{ED&-Me?Zd-sm_@85Gg9;vlbYvt|Rx4eD(mRc*vw=wG;6&3@Qy!x^?UyC?H_o1_m2D1fwiuzbtpy^@S>;-JsD2L(vjnIM>A+?V%3Q| zPpnJl^j&B9y0IQs($#BZ|0OQJq`!NG`FG@WpX9`kBE0=|Mzz{rWX`cXu3*N6d_O@80q1)hqV<{du$g1wSvC zE4A<9L0in@YGWfc3=kMjmClM$o0Bw?Ovtt1$7|&NmYfSyR`$j4yn;Em(z&5=EeqFj z#B(EOL8cISZYiT=#O9z)S^`g~7^Ux+d*#YIGl`~3Ps1`I8mA%b!1spl8-C@`8r~Yz zinSU-(x*Z@3hj{Sha5Cb&maj!QCD;)PzqLdc)7-OPj#W%O6!&0R(e~)#zOO$SG$oq z%v}+4ayPVB#KRgL-r^Uh*G{XARx7=(SX%Mwuqv_$ED1R|nv|S`ISD0$C#7bnS+E2( zfpw$1VFI2Z(Qy`#1(lT~m1GU^ppomLv#vr@Nb7{|CenHZb0W=+Hl1j?kkW#b6;s3M zNH?@PVv4PWbtxRz!u?XnC+F&5*u5jiK6oYxn+5lR@+b)*p+vuG$vI`?E;kmb zOb$sxNM+X@sv>Jg7YkZuCHN+4wA45y;XZ{0K@n$~oU(W3oiJ&RO@9i`j!+s9daiUm z(Q}N|S|F#yEQLKg#GUR9e5D&K-O0U^dqaCeTSL2|mV(#l0wlU;R#T2mxnG?R>!6pX z;3G{6c@lI|bjs)qX;RQZ%l97Nm>Lz~zQbCb`=xVtfLDd1{NfPs%*j+=~3oa6Lt_$4-w<|d9k!glJ z;aYIdK}T0$9ZRcHK^@5rkqW+odl1hieANFs@+m90dK0NRF+5B&*KUCa<+b8jH-UH940FR3MJ-(jeQH4G8`{QB({wS~i zoY+pU{@(0w?jHZf&l#6>{jf`(!9N~3XL8Q$_j``VBd62p{M&oShVhf18_bM#UAelt`ayco55NZ> zd}l)53&nDZz1^;KlQ4?cozx+Vv&XnXxpJo(9Gh@h1r}(Yup+EeEGL$oV9CfQqx8nq zJGpjJw_%9S5%JjHPQoU)5W;Fsa|_Gm=|s!NFc=_(;uDiklp*V2ns5ohZIwik#7>oc zf?YC72Q?2R<(feXrk$i6Re@xfhOwz6i7v564WO1ueMjn7M5CrlJgW3RqAX_qMbMA;WgZj`lAKJ;koQ%CPBjTPF8X+x)q zUsa4k_e}NJen30Dc6zVWR#|OcP=A zSilVh()eH8nBCZWt;VJxC{Rx?VD{H0VtCp=|bl_pR{*>(_TjWjd~$-txoCrlQa zuB@$5rBJ1?TA`7lL#T^%IxCr#Y(1{-q%N2WN`fp1APQ!k?nWa{6@$rJQS!M_#WE-4_ zUjbxnf?pD8&k-!nB|qj9a2y%=us#`8b`CfCfVr`@#&J1ve=mIa zU>uH}UJ{yaVgG@?oUpz|ZbcByArz7z1ad~1+zE!=qZa$9join-kq3tmqA-g->Cw*O zA!OkY8gv%%N4{YbXgSxMOZ4O6?hYGsQDlpg&G!?Gx(x55pN+&Xuqi&oy`1}_06Oa5 zadZoRQ5R89A^Z|`G8)CfiM)ktb7lr4>ML=l6K#nX|AF_2&puUM2_Ey|=qNRAD~0PBQ6;R?1O zEI2E%;t!>PRxIwN;#BA!#}Jx<53xG2QN|onZA5)X9h@r#A&z6a0`>y?gL#(S4Kfbe~6zBVTd2t#$|V)dk7nh z>e_;;j%EVTNKH-Y%e^-9n^afwRAt=sy@9^Ofm_q>)Jui@c&%aOTL zYeOXHAgMw-3PR+eb!Tmz+BzyiNkYlWyi;ypDX(A2`J{n)}ewTF^`MmA`i@Ci6S`Q?y*)K^%`A3qW^fj@7;nd6(AAwWC?-6nE!&V zNeNOG`UGhT8pj081ojhcH_>);cyvQ*^d0Ts-e?A94d3Xs?rh4QuCZ3_Zb5_Mc#6Iz z2vVYLw@n@;g@9#JR5FQ&c|FBjvLuRT<}@?siCx*TpZ4s_%)Fg2P-doax#x7&a5&I=Fuu1_2sXsgzj2a929g>DIV-RxV25*s zvm=xVCV4n|@i}A>iYv*4W$FC)-+aSg|N6i3H8=R@e@W6`(hn8B`W<@u zduCnP??-<$LnQL13a8rn!-wzqo4@;i_@Dld|CRs!Z+^{hKK!1$a>BltNuMb;FD&zc zezTIfCf!X;hlG-uPCMp4vCDg|Zm#+4)#qG)e#PGQT*;1{6sr}yGw%wYefb5`Tv(PP zUw`v8|NZOV^56d9xBTJTulVlWJMP|np!QHOUr&uT^*D74kOFqnIA&WH@D*#0edoxx zAoqgI!u)p;eD{gLXo(6fr&j9AiagCD( zL=u`nv!NcF)S}V9O`L|uG4su64Xi2N60}4j##VA(IzyP!Rr9=^OpzGK?i@rJ8mHmpY`z=h zRY*%hb4F7p_$fQad54hp2tu+ zjxB~A20!M`k)xoCAQf^GvN@9(vo(qVIRT2HR8dR?!=lU>NJF$EX+;K8K(s;ckiw7j z4pk5`A|Wu5q>v;b3YMUo(R>9?xDT4IS z!U#*o-9QqmiHuTQFq|YH?s&JDryKMp^sY23beqwxA@E)uQA1ZnCc!K88nk4qMz4nV z7HqKYfCi21k+&{}s9*3TgMS}%qk>q_qtRM47gl!`@2oB~0UNZCxPp)VC5A~4fgf|a zLnjn!oD@97Ml5>>dM#Oer%54am~&#zL9=aq8CMV@KHTvh3mdC;@t$#8L0 zR$PvVo`H>4JcR+m10rA4&>~WhJhR27f&hb7@ z;7M|ybacOPa$@~9?$0lNiN{A>+9Iw#}IZF+C zz9nTvIixKh%cHOC|b5)rn#iwTf6JTJkiSD{V70gb>9|2-Or)p8}hm zGAniNR5NN6s&+~=tTl8~Vhbc1qCo9%*odC{>E1ngALKXY1Y2}g?{p8gE^J|JQIJSO zhh3BgbO|~`js?j>nId>~I?&zdR3aucIz^0GVAf9cjV4N?(E|UBj3%YsUGOJFQ5vGI{+!RC*aYOe?r zmet6Ea1U@vOm*EY%k?Qh3Rfp$PGQOM`U;-}WXR*658x_H{lTjzp1mGV9-l-FGTx}# zsNG|ny(ymq8E*IN5)L1~-zz8?=h{0Tq#wUUK$2nmwD6Dn>q#!xbk}~kmiHKC4-bjV zJBzu7xq@q4(>j4my6epWPb!er3qk~!!X`o!Apoba^b_ybuekRU=S^9*mACKic=uYk ze6K8*8Lty&C#)Ty7u53*UsSUI8*Z3H2V}0$q6oiGSckf5bSHsoWEA=650`wuGle4_U*ma@*#gW#H0v*klc0k;ct_}nq+`6_l?~iF zL|QG5;z)_4gv)3fqfRba-`zz3xpq9o(@Dj^$&{C~@#3cP=U)W>+@0}Bv|JSaBkMew3@%_h6&wR`u#TNmB|_X4W{t{g~h>Hs`~Wu(yn zi6Psdkji0cbuEQcv(1rB87;~#z6Ol;ivv!sOQXw$9Gm@+TLl!!e?1=i4v_sYd&~0K z1fK4vIz%Hs?*KIG=M7MB%dn(vJU4She)kfYfk0~7WTPA%?My9j6)=-L$ge=?^?S?P z4KgScWM88!l@`oWNnpQWAO#M0rGiKIyna2yGdu%;h+t;S^UU4d3%>dJ&*|N{zrS!k zuWVZzjPt-<@Z{psd*ssUPV9&NqVg*-NY}7k)jn6A=gTt)c9gTS|+}s#1UKC!w6l%??Y!T=duylHZ);haz zfA^lvA=t-MMA)~ws>u`vrHi7GyLjYM9 z5gCkK0ab8CH5CRd4E1q)P#7f&B?>hP8cLDEq!V+QIMySFdf-rJ=IOv;Ixy9|U0^EH zXm7bH4D8lacY5FtjV7qI^0S})j90H-@$TI_-n@Ckn>TMhYDZArIYizfP$Unp<_HeN z^`?MX$ZNORl?C$MMA~AiFb;{F;y&0hf`uZz^UGg-&tLwRH~i&41m`dRJLK*g{9&eD z-ofQp==v)z_bYF|`<93I7mm{ernGP_OXIKp{=f1+|A+sbzxen6hQIvl|HyB)H@v@F zX)i%v3UUIGz$`c%3+sB%dwb8x4!r1JF!hO-_KH{a6}SBbFSnO`aeBq=d_z&iW23Dr z>vF-@jd>AX9*_8GqV(WUD&KtZ4d1_i%Xe?S!*>twSo+G=H=+lr zge`3MPFx&$5V#0&23Z7AK@U)h11r9*_`@yjq2r}f&YhRvD{D2T;!qd1Sc!8GEmHUF zK8Ux1Y>CQM1`g{zJ+%-ABZ}?={d)^{cXvEIJaB)152RAdyLa!Nl?MJup>^f^hg2Fc zF{(fnXBMYuq#}`}5*r2)My!^KIkD00j8`Sf13EoW>H||fgDlV+TL=$ZMcNU_+XA+5 znl5?eyhyHJd94a?$Qy8ujvXpxR*D{pb1wSPomd@KN7xW)hz{xKWK$5$2se7j*l!AE zPALoG8wYDcFrnrieXB^E&~`*^#&kw?Ol=X4Cafl`i?Fpa4oo+qL!qEr37cqCw9Hp0 zbwaA52PdQw4+VamYpOhey)EdaU@a{X&gi&JCMeQzNxG+-Qi{@x5p4nAfCc)B^^J*! zNk?j@8%YPQN|}VYf;XW@Dyx}75ywO@0u6z2sO`;h4MOwj2+a%PN9H&o9GSS`P#imS zR_nYyG%f;iQ}CM!pM5C+0bE^-l$Y+y?F-PhULsL+-T)NRWFdB=<0nZ^m{qkP$kv z`T&x1nZMxkaR|9)yyt(7Cr?7Jg=bx!JRaebfKP)y_U~a&;c1)X$#V~bCjlS9HSV4e z;gO%QzxWugk+1M!JwKP`5%!dy)bS~NHt-{OQp`2_kEY8V{paI+Ht-|(d@Dar`LX}A z@Fc6>DL#9zKdr-P&+iETl(<*;AknAgJ@tS5?!*3y{Qt?P5j^E^eI7pj-kw6f+24Os zz=trZ^uxJ5!!ta?9|EW<(=_q&<;$z#gLm)VeQ=BC9|I)+_;;gu@0P|_4+|>6R6Fz3 zFzdKAmeScI1++m^{we~&5P?7mFPr^1CZmINNLa9L-Z0}0#LdDRv zW9N?Er|d>NboynZ_lDDPO?d+gN4-$n4JOK@g^7xW0TAMZG+Z4ood{*akZ$0Hhq0<4 zy&=7kTK!HBSc|e~VbwVW#l7I$1lx=s_ZyN0ctr}fsq8s9s8ihj?gpx9hRT0aZCEYi?9f<7&Z9+E9 zy(^Nu5}9!Vj|NVP0IiaWBin?p1>c~rjc6YMi>wXA-dr{=WUZJhz3QS(2h!DicWDyhsP46kSFPiZ;FW1+qhkB@CSmxD^Nj-q7Aq>e%X+V!(}BF#T~W)_1yP3PLa+@1uccm z!Bm)|QM)7CiibfjXg5kTTooI_DjsS1p9p$~WrJ-~S}XJpy&7JGD6kuf42*~g2YM({ zdMfCYj+kehZb&cqR*a=?D?BR=Ji|W=2!IHt##Cl*4zK7D zxD@W0IM>27J9S$LR-y&;y!NIvK|CJ0GZBCRiD+_;Mow7blRepEoP9(0bXGt4{qgHy zV1UdpnK^^)u}cof@IQSANK!5t?MWT8jHl&4g=@r7F6n=Yg?9fQq}K%F5t;py;E+tR zcT486dmh8GTJf0w2ktQ}1Y*=_-$d_npP@MpD7Xe~FpBr|O_OPyq9G})5;E!*j8#ww zght__Gm8{lgx)94+ZX)i?Z4#h{a3s{S1!xK!~KD`zq)7r?t-r~GMzB{Giv+-;trS* zNkM|N>}}a^noSB#0&_o2mC;U+tarxBppYa2_A!ifL$(njMq^;N%D;aDG2-I++&>{k zG9pI4IsyeDJ7<)Z{STwC^h;zJ5<;@bpTLs>vK3^VBx7bzJ9{e?AATo=o}FD_Ge@kG zf=V3~$Hw7YQK%|ZONM?rquLXN0TEOlg(|LXdWL8CJqJ_eIM2Mi>HO@AjX(YRf$#6n zEbGFuZk#Wj^93S0-jYyH@$!ALL*f?a$?0&$@qJD^c+NMUMcEf15{XVfaFX3*N(yaSmSI3A z`(+?&2?0-B6$6JV8TDYs-SNoRU)=DEubn^tMd!c&+BwZIO)!ZwM`!j8@t#)mQo?Wa zXJ#Zt5R(*YQ1A{dpa*m`trS>5adb+H`ExVWh3-!8DL$$}GEO21eIc1DT?CCBvWUp$ z?c+C^o5MKPh#)H%tL!Y1GmDa0he5{Qv2OtdUZ zplgTD$V(9eO60i97;jO~29nkg5J51AKthSIEGs&A3J!#5Zjx=}ZITKCq9|i+(joAu zxi+Z|;X>V&2lBQYlU&EHYnbG>u{Zk}p5Yn(cwxs0X2#)g;N{C#^zOWS|G;J0a32U2 z;m!98=d-X}HpW6qC&16-4LvKl(iDh7AjnWA$nb$X_r)ZPhg^o?4*hh&ft3^(Q@u zp52*@T{si;0E@KxGs`ZdCFCtaNtZMTXra~%2=I=fW>rv=c80RR&Y@81hIvqhP5uWu3((+8JNR_-BSc5Cg?11N2Djgsd z7m=hHEEEmJcvKW9VeJ2kl(Z6CD$_JyU9FYVbmI1K!`mb^`1QkY03flA8M7IIf)5tIN6Py_`eg4Q~J^LM}Eul~!o{Np>NfAtcxV}9hz z0`1p$f6w`0;k(!0aXz03zo9w2xqsk4|NZ~XU;KxE!@vGd|Bk=><$vb2yrsPi{8o?` zLcIfI!DPkEF$tn=NH-=XZuKQ!AHL?Lz2a7Gn0@BZ4#-7t9GMGME5!sgXW2TkIku%m zvT|{5qH=TlB|m@pCF?i;g74qH;oI+i!#}+KmVfx}TYhu@8@}6KbFLR!1vHQa;tafZ zd=X?N=ib;CbU$P9o@uH~uR!aWm$%9nlW^B3er;)usvCF%f9J47D(Wsk%YH7FLYzta zN6wofNKO9O?>p?@<(oHeSeAwJ`OMq5Z+ZFhC5OWS)qymh;g15E3vY(fKt(x3a0;hJ z&PP#U+#Em1A0xUETj0JCa=~jL^&YMFl=?u_Gr=v!%($ODUDhi^hUML zq=Hd1Zkv#HMCCx?fa*jPVX8u(1fPVa!le}!ow-C>%4;5oDyjx%G&5|PiFt;@jLmbd z!P3zva4yJ$K{MWt zKo1}%L1xJ@5kXLrA1@V$g{Kv6fQR@uglj7=6+X}BBFDsh=%orvv-_6hAPx-F=z z2rCRFjzDPcNLy%IL(aybZ%tB3j5M?9k zMwvH`-8qOb8x)h|DrCwqJi z?E9{#VUO_m?n$PfgAwtAyq-qC{(h3v`0+{3pK0jFae@r^q()<`KaT%F*#06us_zrH zRzY@8#`w4nAEx^_myz^`3V9kd;_S!0@Y(2B_)HfieDvan<2}B4TILmgFyV*r*xLy| zlRuPa7hF z)*A6-o%TD<;Mz|uL=f#7Q2q>XOr?cW*bmP5kybot7 zf=`986j#?UiW_xzU8n)JD0UI-UXXx(Zm@3je#Tko+7K}c1@$y6U}}_*G^m1rLTD${ z^Hw}d+#8aH0fIsk!&Fi}pgYn!(R=QdRN1t$snbO9J`sL^e!%(x^#kS=i9&eA&1k0h z5TI{i^iaGj5t?Er5`bgAVG&pfOd3)Jkqrofg8Bhj6#oT)gzR{S#O-g{GUkPy&9=uC*v>CnM9Ar^2H*MhHwzJjlvXbT!EO*Xo8(x@^t zQMi;sZ&4W!kt2P!}Sgq0}63jKC*y}J{ zFoYwXz6X#!-x0RQ5xD02!TXGJ?ThQ*80GAcO!mw#2J#bfWfu}Fa}}I;tbgY1klJ`j zOAwlj`z9%Bp4Ztzm!L^r-Wq_44kAXfFGG+B!i+}cA}1bnrXy@I^ZxQR-@g8ryng*> zynA=za=EZRRNnvNx3uqHBW=PCFPNr3XVO0bok5IU7!W1fB63CWSvMoZ5ZT-QBSK8UZ*$&M}cd$-}k`Z={e?Dh@>C5|1=g8m671PBo_*RD;QcsVGx1YDp{2`_;jw zX*FvXx_pLb_(K4Z6e6B$I)MtJi33GPXpF!C8e zkMVe{&qpA{6`v)V8>eAChJWBGb+8UH>ObNJd2$xvq!gV$)@hWGigYY{ z6(ubUn(pdfnq(6Yo^|kWK(c!U1SiJY>k-*P-xFnp}YY+jQaTpDaz@r0!dn1(uMk8!@0hZ&Il7$UqBh)|@(SjhTTh`fu=17km zS0D3ktUGGMNeUOHVn8zVOmdyd^=c>#XvyD$>9;Gu3FF&%Udx{08J^(@?3RV@?(R?# zmSrU(EoO>zOoQo{Z+QLsOmAmI9B~jfIuPCX;bmviFU^pVv zJav*`B8>GXNN#V>GL+Ge*_Kp>V1Q)V2*IOZey*@tT8~r#VRS_iI4ays@ajhR*%!*s zzgE6{>D(QhqYXMSKqW8_3MGbkmf#i_MnMbMJ@iY1*u5w?S}oC@2-=$oKT%J=g` zOt*BMaL@fGE@^zVt*{lSnoL;+?GP<%6FEfc`<_x!#(GXN`fVKO6bwVDQkiUGt_KeD z%zQjD&j;q|z*H;8`N++D!|m~w+rurl$2;!E@6&u5N&_>sOq7xe15XtNcK3%4yYj%R zSFd>g{yoRz@%pY1%n}?mm_#@L2U2NZ5~!rYz)YmVfRM_ikOy&*dYwr_hd<@$2W%DC zKwVib3%xa3o0w{$DySrl9o=c?1>ZLMyE8?cI?o($PE3c1^^33g)1UnbfA;-%{QYl! z$>06vZ~5Erlz-&cygM%R33QXZVtR(;A?B>}Ip@;m_ox_$8{zn8&b+LAbyK)`A#@WS zZk&fTSQTUw-T%xK_v9Blcnby@gsUH#axDdi9ELzx|fh z8u$12ym|A6Z@&44XQhEZF36h(CIF^Tya%N_ht?5K;c}fpCSZoJqLL3Tl>%^VoH$gLJU4rcl?Hu%Tpo`PCU|k(s9gRRX z5D~0ENXIO#>}v><7g}@H4eoL7W8pqJ7YA=Mb$LNu57c&|_B*64h@{h4NJZR^dm~!I zw|?#CxfwHQu`kEVXi;dr;4OCq+Bawou?7OYPpsW|=o9BS(iGGdtU{eThZr=1D#arC zTmtSvb7$?HrT4Tlh*D%7^qNvUC`~AvQnpk&JeY7S!to>=@07!>vTie5yJg$Hpsla) z5ReB%-=YSh8$?nTQ5!A`PKVl%<2`bGK#vcIo#}0bbtU>j_eO6HTZNWR`$Pc6jDuHh zab8Mr@!;G$7jK+>rBUdVNUNk0V~2HE_$l8sutRHkyIyOJ)*83B zw^!xVpTc^*Q%y;=5Lx^=eN@i2Ng;fDB7bS8Pzs1RN0(pl9xmyP%J!n@dL zCTOi#si+i6pQv6bt`rw)FH~2`(kSQLb82_C)$z*;?Tp7gT^EX+C~-saLhUzLE99|s zN3G&{gSSG$|?cY>G68=3oxp35O<1rwj%B0)9ZY6SCeQ%aPI#s3>W4 zP;is9{%^u6%8Jsw#`dDB3zPWO(sMrj+`8hvZ5(O9O&q8nQ{Izv4vbux-ptSjnD z4W*!jilPO~ogxj>6_HC$CZ7mD=KLGDIfG6QquQDj&es_>dek!3^bnyvs5_-05qWG(o);6&mv0|-=e-m3mCROTNR zl#c_R;Tit_1VJz@9Oe@pND5HW8iPlQ*?WY7s^w1>1rM0cRYpNgmY7 zzM*mik{JUKV^1bo`lv((JmuE>Q(++9Pw>N&uw9`+p4Q>x_yYt$N1gVn?d}>W+Zua< zQU4T_+yh6&_V*vR^P2C7!;{}1yL|bWemvuo{2?TU8_`5?0oR~QkmDn`q`+v9aWl}KX5LIgO2Ze*F#azQ1E#R{UZt@13}G#49SVu=NYd_9FXK zM>RE@lLENo_h{Fy)NpOCjG=lO_5%$bE>&E4>@^D^xxNJng z)>gK*rodHfzzxC~kz=BFlt@Z-;JT6 z9NkBZ0%Wvi;OU5@K~fk`2s0=ZCWy&k9@n3VBE7SU5}~v%^yb9oRCT5j*X!1Gk}TK- zdJs+ME5r)6!MqK*%It#Nn1nEEuz9db8WVMfP!__?XhIz>dzf5refYCUM5ICA$&+W{ zG5U4y10IJ%CBM2VX-RfAs0F;Hj9*$b7TjU0hRqeJhJ+*9@RW?rh?|ht+y%0AI74XG zGzd_L5?~-EXvyzB1Y8?pmQEp0i5p6`Px@}vEw0}#kzbCeWPS*yM!7moB{wY;HL4nu z6{_X?R3sP*1H8!8QOwr%0z;Y@2C?SV=m z!f{_ov04ENsj#~dX?K(qrCAVZAmgARMz}YE4Z=W`kdjPn?=Tdsgk*+F7&{%2=sxkH zlPq}1b;t2sC>(N;6q3!?v;2spUBU?&tIbYhDEp!Uk=AttgIK|rw6G(&z7S_3F2F+G zGT;!6T%0o=ka`Z%*%dM)0^A92Ls?*?z1}j=@)=GxBF6KHyY5pXX@%&ZNuz{-sAd^i zh0J1iqhjfeY|n_+`INLcDBMnPdw?%r2;Y1u{NiWIm#4<uwQ{fn$LWUC;fB-k#PN7yo@S;xbDWQy4maE!Z#W)~ z9Oo0KL%RK@fq9x9Zy8X#(g8okwO>TIy}jjdIHaGvJ?o&sQH7%@r*Mws%>#v9K_C?d zW?&-7BbCH5N`9aD6m0J|3AXOM{{Dgg^KX8`-~77K4?hPx5M7DIiOWVR5mlDWd4KPG z|8|96-ShAN?(g|m|I7c%U;M{^%bWREtgklY%L8^N*iAw==J9UhvJ!FZjh5ulU)^6USdVb_uZ8oFy$A`6(zb1bwgg`^MIstp?2C z_5igpojNaWCtj4w@wRb)bLMTi@XG>kzw2yDBAg9qk`vYhwZdE{Zsx+V3Tr>}uIGVJ zsN?>-S%3R|fN7fe^2;yz?z``3t+6Z%uV26BtFOMIl;H{{Nk#bJKVG&v=tyTJ>m^DgUsIgIWMXkfs zKtVJ>9VsBy(D{hY6{|PccEH}Q7%SEm{G!O(Fa^_&>IJj+h$*Uvv8O!}Occ3Lb)}lK z)diQH%JMQ1RPYJ<5!nt{J5aWnDiiepr3xhkvm_qgFHlr5Byq3j{NF5S#c6X8r-Iy0 z=-mW&HTT}{EBa7~vmvhyeK4vkvlb?+OjemqnPg)YXJYV@#b_FA2Biq47Oa73!{UPV z_h>(ZRFDHxa6!P(w9tp+Y>G^saFMS!!TJ)KQKKRO|SsAJ;P5Txgg0+$ABztDW;y`OpK zoxUA8wF9S3uIoFGwtOztB6hZKJJeWwLf*?!pv`s?oJ@mdZm+bdNN6 z5svi@v>}>G10KN|g>%fj=ZLE?7tpCuN#4{!V6vcCAR2@Oy+^jWbrx;3fGR@KRKTnP zQ+LXxP&T7k9Q)U=JqLklGzjgj}F0S;gdKq&Zo!ohwA$I&?oSj{Y!E! z9({XlKaKe*d>rQk|EJxM5TB;oA;W(l;b($BjU{}L`$yg#J`)7M({rA=Y>)V zH#av-Q{GX%KsuGLzJex5?TEG@y0WI6*p>!%vw$M!WHl>pE)SP(@PAklt=J|txo3n`%0fO+TG*Bog zY5p+x+gm6tG(|N8%saDm=AKFe)9TcV5@JxDj!_~OvLzq5I6Z`hu~50l#6ui#nK2QnI;B9Z9o3fO6qet=3Fd)D z!(u@>CqWPgqHlP2+*3wG2n=UZXVyk>C_V)I9lUEQ4T!U9a1qZ76(r}GxT39~?zBx) zX<&7bo;;JPV3M*HO^Aylmkn7~SXN@|v?0^eRS;EY1LL3W$5C$;;Q_jU`N_0zwj=Ca65d*RRhrPFJ zlO)Ft^JDjIo$rR4bYYl1h4`WyV4 z!mccAs#G(Mwem0>So$sM?-}u#A&sg4>p&F0k{gf#+6DE<(N`b@E=rX=Z`ulEI`RDB z3KxM5e0@Qc3YV|Sy@H6yeET!;jBJbEDEJcON?7uhEYF+HTl)UI3pq4ZksJ1CzmhHL)GfRcb zY|0mX@&<44?;TWy!&Le9-5tXRtt~A5O!I{iPn_F{Wi(nFX|0VSObD`VBq$Lx9?85T zWg8EbkP7UI|3YeLmke5Rd=UjRc5l6xF*Jk}wpx~j91rAqNJE4`kO+j_$lEezJ_&OE zNPL1o=w&g!2h9l>7tL`i*ao@hv1`Lhln${%Y^y#I(1vK9z#ycOVR|eU(zMreWK#z- z#C8Tg4UTZdeQQ``)ewY$>!!%h78l#GkGd%qmVoeVrw*cZ!dlJ^O<)yr#Y48ouPWIT zA3f4b-^qiIYv*`c`Tdje&ribrQ(-EpSRStFNO@J3bFiES(Vd3_e3+cO1Kiwz9kYu# z3oJsc!8k!IL0O%fwWDn?_u$y!=!1i=tPXuRZRF;qd8Ew2gKs_QUm{pZ>+;xxLwLS7 z_l#?v1_<3@Y;oBKfNrl2D6$zMM0%4wFfa+$0-tALk)TD;I;^!~^8q~^5P;sd{uB@+ z2FM7A0luV#`)I_b92gQ{4hB{fD+VUGc`npTP|BwCSRAPxRfiD71C{OM0Lghy!r7Hb zMI0?qMAA5*6ecsKQkhHTP$uR&ZAJqbisi=k<)ndaE(6}?vNw2xH+TUeDG)2gxV^oh z_YaIQSl2VEg4zMm1FDtB$0wei7S87&LbkDa8!H2XAR<-2FLN}Vhz`+mfF=zALI=1L zD&QbGQrT7q;<63qv7t_$;EL*v|HwI5WSa{vlgD=N!Lz}ilM4Dy5}R>(e^t&Sqle%s zm$5j!Z}bOwnnEMICC&(PqVS}d$h<+;vrl4sUQb$b-Qz@D(RB$sS&@0S{6wymBx1w8 zx9=IUG$7eGd)Eb#iCf&I-!=%c=^}zC)LFQ{hrf9zeEXZqhYttt?>Zs zkd1?)LD8UYN@1lWeOTbdu_9Pev?_L}$Z^6ZW5wxRsGE1CH1CK z4jc{#4)ZZXJuuHRhxw331BVQ|m()@Vr4+7D8~8PY07ntt3EUCP;>;4H6K|mslr7{5 z1E~;b1Qwv%x0TyX`Pw1BTM5To&gY&-Nyp&4t_)w8YvE?DpuyS}Mq3aG zib^Wcs!|aMNrM&(e7W zTYAQ?4n5_Z(;>Jy2670F6R0Q$4-NwkE=(Tu8nhBL3%o?iXO7@9@G6WOU~=?k;6-2* zI2-Dfs+EHoVFJZD^v2YluqEfKF(&#`qxVjePOB?!XY@9(`-(o?V-E`45OD-I@PT{7 zqvH|`HHOU)0})BZe|MCQv0^fTHHLIhWlWBiGoojNGZ8DNVw{eJlNjryv>|jIxD8x8 z#DFfuqJ-rU^kQJ=hP4IJo`w)&x%@Um4Z;S^I2(GO@*)GJ0o)lyMHpsUbJ&}&Mn`y8~n;qxGBDuuWxs4z*TsNj;^ad^NGER3lK zQ(r0l3C2KLDg;;)>KVKaw655+!n6?cLWm;1dAxT}A2^Cx!>ps$7!0&KQxE2m^I+A$ zqu|Z(bHN`KREd+qlOQM1#jtfyd#6O8y3657CHb{5os_8s;+sL2_B;`ThMF*S;Wz}R z6E<_iIN}^+kHHxhiYZM!x`7J=oePSfLwTtXRlP7fP3;E^XzoLF>awT9!U8Cqv*H7GMkLt;gI3(+8) zDmU!P<$o;@5iHAsyI+h5ZfviAWqSz9D>iH74fspvFpSg~UZT)$H z*M0Z$c`x@In|ri*_Z<60og1Bd+kOtd%=4;*Z2Q7Mh#&;dvWII5U)^7$xq7^&b%)n9 ze$no)6}k}qWect$f|q^1y|9-kbN+b|^BV2z46oqNB!2;~DgM`%|M~o1NsZ?{X83B3 zKMQgB^D1t?`AUEN*VMDaPnP{D8h+yW1?-6%t=Hf04qrySep^2cuh9Ql__D4&?k~mg z25;~N|9^srP;2Gk;elzIn5KzOpFS~0?t%W=AR_mw#~2s6=}&=&Gs5w$Op5u17)tsO zQqn6v#HJ+N+*u+p2MJgRy5=5-s-}|}(qnxECU>ekML;XaK@f4G8&Nw$JK~+_>ABBF zFoz--lN72K=3z`tnN*UWAA(mUFkA}m0`XjLK z)0j+{6|5*_f})NUhuImEvrbM}&{gSPiGDza10XS>VX0Ojkp}OQT45t3WfcyABQ~~u z2PJyJd!c(_^Y1U9N}i;q!=VqHG>C5_Si*C^0qB}W2BL=PCdUuO!cb4e zcMU~c**rr5$?IC)qLTX$S_TxyCkROc2G?y5ER5Xy=@5O}PuljDWDwT3gU^A*lQDG9tqow?O3}}wi!Ow`F2^Pc}5tuVi6~K_pM-uE+8T_W z-?Sj81QLScU>%iCkwK9Gp>!Yg?(`n?!Ltg9bRr%2O<`_IHDDm8RHYhpU4sp8X!U*1H&ft+`dw|{JDD0Yv$O$<`k0zo8V<* zTx834;bX2)y+D?^uAcYs3?lL+J+I&y%ci)o(UN7ia=TP_2iZhCTi#24vR`(HOP=en z*RkagkgIxM=DU9WN*sB;7W*KHGw<`}Lee!J1<|}FokA$MlFB3kU5w6w7VkLsiO=UV zA3r-EKTVt-jrAmqRfu88Fo=EQ9z-Gsdktdizvo`u?*bvkR?mq^>@N0+EdXB?2*~Bn zi2U=y@$X&2;?6Q8inrY~WCWggiY#mP;32ONz- z8VC%Gf#DfsupMrNkxG`8l}Q>0?HqM*H0NN!Twu05WuO+N7Nyv(h?3r!UwCNdxm|DY z2EQaw5vF21+|0P&abC_mEhoq`8w z1#*nhD-=$qNo1M;$s+)S=i+m(HnvbQEpU*Lp+RcRNtV69rICp@g5fE%SloGBJ3l_2`Td^?|K*TM zLe=t}#&Em`qd{MVbrnp6F$u>i9FGu3fn&ZPStOM@yW+ixWH*DCp%(hsZ9$$s@cs{;1?(<=&-1e4?`1G5Hd z&Z#IefvK=~U>Yp5QV$d5cuQA>-jNRB3d2Z?@ImwyS%Gsy*1;yZAc*b#gA~cIEz+cb zlt4ER$2tW%<>>BbaBA5myfh8(Tpr}v~CN*X&%%w7w%Cz0jwKA7N-9jnpS@?2N zj{dyB>kZ!E4cwyhAbOt;E=-J*m#nyJv+`FJP679w;hFNIR3+Ej=D11XtcuMQn(ub9BY3p+ylBie+3a6NjmCm}hQ}H{9Po@NoBzclYmk z_wa#-y9aKLx6IR=Ck{+AQ=Rf!auaW=GxIcM`aCnwb9x6WHNR(l`K|m~L4boOw<_ET z9Dqvlf)y^O3>3(#Y^-~J7AS#8U~KQ2?YoNc{mj6Svfo& zxqp1m-^$-3ay)mS+w3YF=!VlZH z^$UP~qy58&51h|u)^(-#&iCJc&+(W>J#X;W0b-6F(`Z25vEI?%^5nXa%g5;ua2V7> z8sCU0^MI)6~TK!Pl~omwcHdI6U3a^RwjEw zH3(f9UKyv6Cx^&jodzxgy$$q!M&B9w?tu47w2H5Ww~^zbZ?m=*qReQd-$5fN?$lUO z1f+pA+&W&Am!2m z5p;r8@??PJ*jNsOQ3a7gVWP%FC5;Z!IRxO5iWni39+bXfV?aNn!?4w;VpP>U&G5LQ zH!GqGKA#B$38!?JTTogfLb1{@8)zAH>8KB;Hkdq5SST8}&y00qoMv1mLmWt>Bs)355BG@qMQkZpOG9hHpqtm4kr4hPfU6@gBP&680g@afc@#)H39x)x5 z_AMm;EZamJ0gU8bFQPI57vBt2(rjRaGe!_X8W13u80bXQz!k!jCuCrXavVDBZbjop;O1yAj+qVT#JIJNg`qVt6&URmOc~xq&tnPBs96vA{a8 z(`U3fRvH!yCJV*#GzEq*hR`}l1XxGvx*3WL5J5&o+Dz#abDJ1Vk+mUhMf!^QphzQF zX=0o-$DT)d;z`e(ePQ*^AW$_*F>0ByBB(cFEVwU3TZpj|F*c)tD`xwz1a^agU3qMb z!QpUVnkMXe@$n0ReW@oveC4`>M1E%%E-Rk%y@tJAiW?F!HJUNT+a_4A(Pd9D9U zgAiBQpKl|_z`Xq$%CEw6d!HMcRcAc{7rfVfv#0$0eYPWb5%o;t%jg#*zb^9y#Ge1> zGVx+r{~C4)Lf*fEtBAb)vIqWhc-cQ+#qzof|Eu7un1Q{me*xm3K=BjNTvEQs@bmRO z_v!Of!gEJF|NUt^JA6_76?~EQMMT1v(SQCI_BG00K}?aV0%#fyh=By|!5Gj5#042(U9c{c4$=c1o+=63c85zEAA|^jqE@g;Fd0}q zV?GcIqB^|s%F6c2l>r2}A`y5`y$%pmG=myy zM(FMhU#Jr37@HnQ&@~t(`?JWPPMvAapjGL1fKmyoxK6kWDrZ~+(FSFJ1Yt(l0WT9_ zg6M&2g@ZaG3xgFOgOGF<)_Q|DBjb#W6A~v>8yb!&7!}h4>Vi-Z7s?ojFIc2(KsBHU zUW|?qU2s>t3*MAr5G7?4X8A3~c4gc42v2!U8;19iCmlHCUQX^~Qea&qLsPmd!wuC$ zqXM?=A5l73BZ^T`+UP7TSenp!VR#ZH6jTf$$4m{$D}W2$mC=;376vc~nmApYhSOt{ zb2*zXO8!LfKtfS3>DXWdEQ&8mtU)Y2_YKu?3&I32q6wh_<7-^pFBjg72HxP`1?0xF zGMB>Le4xZVr&u}TG$}N@W3?X``bQ-Gz+iwm^c=t@5pPN z`y(VBJM1{PikDaM*LTm4FXNtZz7&q^0*?zlku*YN84*BUpnIl$6SYWWzCG*J^|kQl z@~m9S>@;M-UCi-}?-_Rrc@jq!+}pt(FL707zu!YXhmaG@bv_kPh)rVc3PXi%LNf({ z#(^{Mcx?X-AD3Hxcs%ptrcVOaxYGuRs1AA0$iUOmi7%xAb@(O!iMs zwo10q4wBK%jIoU$0ysok5laN&LExknH?YMeUE7%Bk#$PI@EqseA@-2jAe9YwRrcKj zTGE+gosZE6dij zh@`v?=V@87&rke|8pp1ze^QE)g(C3o#PD3q>EQO@ya)6kl%t>rfeNxpDxwDvk_vj= zp?PZl_dtEKWEYqu-}WF-3Fa*>1PqwTrWl!jpGSnmk7woxa1Xi*ZIGuTdSt&SU?2vf zf~rs?s1oU6w!l;cDS{Sdos=*TPo={1T$m0s)A5LFVf2A_wgsu-IPNX!Xe)67&O!9; zw?%*k7`afbo>tv8kQzu8v;c{AZTkIx0zmcR)m!olM;qdw|jw5a#pt--_>$)C3aZ@r9@gmln^6N_IVcE=REPu zM_kq);W@tph~&CyQ$-IM+dLm|wdR1tlFzwDAw1K3fUktFFj7>TbMXSckf#%E_3hF; z=}Fp!rVidR-Xm$)Jj%#+iorJF06Pz|%`4$s{>`{x=VN@QGm`8J#HHE`UE8a;7z{i| z!Ol}{#T0VhuEI1a@9&jwKfrh23cvXvJlrbBS(%2UA{l~5Y;Pjx7&XC(N8iP>N7W8Uc|1@eqb95!pIX#Y9pm zT+>6YZ23clVTrR&ip@&9srbziua)70E(;O|U?y}Xn6s}VEG7?R` z#&WG$i!s;AR3~go4@^>pI%V8po;l7l$K!$f+j}1F-|_z88$LXI;KRFbc({AV?af^( zE!FJ=fz99`PZFq`@j;>1O09X6cqz#f{2Dx@{w0uaKJ8m?fm?xD(pVr321sK}BzaEK zNK+zZEE36&DtQ(18{1C<@ew?pR{rEtAn%{x{-4mJW42OnaQ zb}UdPV$7H+$Aj~57ku+UdHSw!C_;S_^bY}_+BR_gGC)Ll_wF4ZKYrxt>4`B0A3uKN z{rmSEk4I{)uZz6FuMl3GG~nB&dmqqqF)Iqw!Knu)4jpQsD77hdwe(om0}6P7H7%ZS z3&yl5PjCNlMTZe?#3)2FM00-7V$>ptIaO9B2}~Nn6T_8$hTeiEL7QL{L2nGXo6v_N z@?IHBFrI=QFxntIkQg~`YC%n?o3}iR&g7kmGy)yAGOTB)N}n5}oEiGa;3GXw^fJ?@ znXWTkCWZ^Xti-b5%L3<>=qnKoZw+78^koq<=J!l`&wRMUD1-%W4d09ghINJxx_WNn zn^SaiDx<-{f`bGrg*FP&Cj4}RuL84UI+$!Q*FiXRlqQOrA`^5S^yA=kY<#*|=ryP^ zb2F6U6O&iwm=PQy5D#QiT9D?X!vasQabB@ew{isvL~9@m%nQLn2x$b?o$2h%%L=Yo z8Q5kF(X}IVrqQY1sWgfNW85&>4gDmHktc-8BIrpeC!?P&S=l~vK2%3yAW|r9)OBW# zBh?HqgRsW1M$|?`$vFw*7NbTZNaqKWIg>9;A*2yP1`{XLxjt!NW6zT|AT&AHp`9TM zBNn=^j2MI{PQ@uW6QYW%6BWV~Y5`U7f{cRpg1Uh_BOp8t1%fN%)%@g`n{!L!U2FVi zG#;8WMP(_$G8@ayOgql>8)Mv7+Ku5;8nzTVwV$bLqn5!S3qCqUZV}gf5r1V65wzAWMg#ka1Bb(bQhwQL3c>bGZPyp^ z0kM5^e-`8`I3mBuZKTF?f)b6e@VeV(_?=L3gG(P|GxvT zalL-t>idc6ej*z9`Lt{JseC^b|F42SgXDE`d?oG;-rx=XeSw*AJRVcltIFf!$O@)HG*6G^Ef_6ucTx{34F(pR1`IJwDo1gOC`6-p zr-WnPFhI*dOM}CTM+G-Vcce##=9K8vP%1M8qu}5NqD|X=eJ6&4Cx3$~Zt1OELJ1MN z2pvfo-3YWhwFh+sHavA2J@@)0z|{yTxEgGZb94%9N{6n(LMZ7G-e4#&9O(kRqD?4m z%K8k7ron(kAR~yT^e`Hhvc^Ma99Mig6VnP344GM(kul>kv&tElCrlnW$`ezpsgT|b z3(LL>VIh^Y&l0)LENKXUOrR5D1ygdJ>Ve=XFYb!Cq9M7DFDXM672K3C!D>Tf;MQn$ zW#IHGL@g+V($YCs83P*wp@F1Ph{<5W2?pj6;fMz@g66@9KxWWMk*T0l$^8cciJ-bO zg)>H;Owbfs1<|0|U^=W!hcokG!6s!j0X^VWa5>;^U_mO!2hm3I^jh=xqKc`ZTA0Kr z;vD2mi@`b?tFNq*Ay^@{y}?*f8fGJ%_G+c5Q9>v_VMDOqDY9TFXh(_@MHn?t#BdSR zh0&B~!iWG3v_Q2$jC2;T2Y?|Agm9zvO}1f??+!tS5G7c;aBj-djnz|;-`E^FYG4Xp z9G?b0uk;Cu8l!jCHE65S)(J*IeWpaEP%sQ7$4pg8-Hg}_9~RyAx-<=0tg&k9HMSTG z55{oDFuVo6Iw**PjzSrVb%-;>0xZs09gmbFga)V*zRBxVc{3V#gE#p92vn7+R#X)e zNTFikEO$IjXQcj-vmS63W?hIjphLBho?=@dnv4R@Hn2a7Cj!|v%JV+satWCs&T^1%;4EMGTQmh$FnlB5)B9y~e*QiARu`Hp*VmvXQXEMSLQeCYrQEY&w9~ zqKnLPp;4covoE%s8?L=wxuct#l>0RmQ(;%W3<=O|e{S;UGqUy5ReWsuLIAy*aP~NY zp=m6z7Go``AaITwK99fQ`{lpq51)U-A3m=9>Enr~PY2@Z?}_sVY`p;l38DFveWSk7 zvDLlRb%BjOE^(3nC2wCqzLKxN_U?W@X|jNsELI5p@a7#^%` zO+7VtJR0tn8?}*MDfR;*yp;$ zd|fv3IWs;N+Tp6bBEqih9-(O+9Jv_R2+}jI<>s;pVv3j%jIBxA*tMa&j9~y0ba&3@ z1?c?qSr|VE-&?+k$cZ-V3An!>yn8p8YhWfEkH*arrU^u!$H<)1pZS2{!RRR*8NQiW zC}4t0fTki*daKJWQ2~_{4*33_cl-#FODr~K0FN{p*q!{lZ#}G}Wq(N;RRt56#F@y2 zX)z(HF-^)m7kUBQu`)OuDu=_&`EVe#;$!5Ad`%dpsbnC6k4_*d92)~Rg$aaQlsSyy zgoF6@4kTNV9TIGlAYy5xW+s?vdPfvvk~~gwqH@rQ8$EENM~*skw3&m=%vPCfV$wpj z!emCZO1Ux^*iX#aX?UB@-rx=1;OAf;Bg~9xnz*~WgMhUS`f#SX^6ue|hx;4uZ~wr* z{PQQuA0K&qTv^U5Yx7*!L?LV4ZtK`W%4>!WYJLS7?l)wK?@ZEdo;ihr0C+cVvPwzn` zc0MbSs04x&+qza2#2hKf3LdL-ticGVWpFz?_cb?C2EmZz1w0@k1>=4D^{9Co4HWXo z;{s|R3aa1;PV%54$p@$e#vqTk4MYHiu-J?Sf>H-M8K=9#dUuQ0BcnS_&b0AB=L60+ z1V^%~2QdaAjXV*fB9uhz0UjIQmPYAQDa=#lFiji}2kJCYs!@vONdfcB@o?bgaOCFZ z$o<^|@9y98{@pjcdw9?LhYvj5r_sRicx0L;O3C?tHwdsRLmxA{7z?C;TA$zhUjqmn z3GN6UKL*JF03ZNKL_t(;2`XtQP)WgZ8mLN&gEBirzBAo_JCfg1L?HVQJfQW?pFTbE zyWjmYpPvSy6AU5F;7^ePd#{MxA@TvZLG%W_{VnnS2EK9P{Uh8g$UM-pVDdz?GY1v! z+;~_jcYnI$z5j-PIQ|Fz{_a2U;r2J&A8)vsCQ1<^25y~dIi{&E5FMl;(?}!I8nos_ z2qMnX7F=K|6;bff>1)uO&8dmfhttPqM6N2$x=?%JX3S`K&NuEz zI59dtI^I`CU-7=6ZNk=>(hk&ifC3{{VqS@sMgvG7-YLzQm(KAtAw%=`QvxeNw}D8b z%1ULWuwo2`585h76m*@?sOT#AV)(h>OCj93%#z}WrtzhXRA8UlkyL|o;+BztwhMqgL0TCD8 z)(lJw!bHd&@E&*|qTr+8W8mXVu1`7u9mln^=*n3eXI9QMR_Pm`V@x`8BS-G#h{T|A zrj3cE7g{TL?^L@Xjj#WD!QC%jb=P&}{{Ei(`}?ak<}VY1{H$Vo-dhro%Sd?{_X4ic zV3WgVG>|OwiwJm$aEDh>*Y#Y}21(PN@O+!CxU}V}z)!xLOSEJVK(26X&wKrea!ZYu z^(9e4j#OObd4|MvjprGj+xRm68eX>|zKD5-ucZXMplSb{f4aukXR9+8(hj>A&KipdA|Rmo;P@dH~2RP5xJZ=psMU%`FDM>UjszM zUv!GXNrg`)+n%_hu4q?uRV;+D?G<|;^z?SeAPojo2F4&H7$#{npm5k$Mv9}cq9c{h zDGjLwO2xI{p>TrOf;8cXa6mXRab)HY=`FpxrNcdGgd2f^3YZdA>BEqb?pk{kT7Y>l z^^T4~>7CgJHozP-Myk^Rp@!RbeybF)K#L=#qc#W&I=Tg84S^<@3)L&OPLwlgAW;UT z46O8RecwR)(P4~ zM+phM1YrRMRFsNQnJ9Xs>Xb$UbR;^`Qy-0?h;ModBB&^$DRU1s!Yub3Y{l!K%?pD@ z7a^>oQYgI;0mU8jfkp=zh@=sHB{z7C)GhGU8JiKxu1BY)h!$k76ooPdwRNU3D5Eom zWVTAngcO(~`1sgthgP^1f0)6s;AkS9$CUIniO8G<9( z_g$dDkbrPh(r7>n3#LKD9}JVSmf`}I)uT=g_A2MH_qV<<>HERy}eaD?>Gf z^H84J%%4sVeE<0a-+%6WdOXoi6VrM}8AsF)2m%tQJbV0;oi^pmSATVXE%>_5vOfo0 z=7iUY`}^3Fo_YGnzI*Xoc)tG{TyX5qm-mpD>vx~&JAINBU+@RJh*2U?aU?p>Q5r@X z3zSHPnrnjrNl!YHtW;TG05@AH_ zUxy+w1BJ`LwVU!f+aJxvs00^l)@@vl%_>YF(=YLvZA-X(h@qJ+$C%6)S7n9Bh%Mow zXcsxQ1#uBg_Y(R1gWmop1pcBn17x*?ZS6$&njP^8sK+(mgI&RjQKI)0*C%?LB8d z&Hw*LGwN$-1uC4QIG%)HfO71dHGMy;NJWDbXW_d&f zJuB@_kpeYf-#hDED9emYXOJ06M@*0-p>{+z=mp9S@t)`_bjjC+Ks1F$YJzy6Aqa{^ z3X!ZB7K(*Y;!x@}^BY5V?}7x`K!ioHI^50g8wM6YfuhFQAs;6oNBw0xE%kr!Z5P5qb8?oKu2;PTmd~X=E5!selHdKGGV3;Bh7T6^{*xAPo<$+ymG?2%5l@S|4=mJ{O(<@UaPR57(2izXnc4y^@t$juFU*LR)b0Pv% z8qqJgV4)q@QIbw3$mau?2t_4#8JG&EW#<0w%rY-bHFpJ=ri^{*?*2RPmTx%K ziAfC+KpZug=cFfNPvf1Pj`xPCQzm1%Ti9E$UYzyPpeRPL_MPuP>`W3Yr6NdvKxTA- zUB*I*DMf@+RKDs9|FAt!+DFR1(bmR?OGkIdkn}XxBfWs_9pAytksla$0{}n$^b_av`ORqH?*t(opDuasf-C7+OoiwI>SzgSg{eTTP&`Q^)ikU}f>nd6 zfk}>4hr<~+!Aov?@4ezO0VnVU?F+>V8ix5$YEZ~sj&dhS_f8L|NxEg}VsxwY$&p;? zL}W%d<&Hq!=sob(((8ZMoc|rBIq+6qfTh2o71MCEzVv#dugm?XOoRxHlh@m zDzP+TDJfUk8*)wBQpYy;#1kG*1W%yQDTHkx4Jo8?L>1hEZl1;i=Byg53ah4f#Q?T| z7M>pKqmjF2Dd7|D3(+UM?|6sa)9V|-t<7o$gQRmx0n2CHHE;q~KqKi7A;mZugpN6; zN2kXQtiTnGz(S}lbk|%I?iTfsdUgA+IF-lCBVB74OAVpVcHPQmGaO| zw1#@ecF($bpsf=-_#Q-e(p)o@T3pj8OvnX(?}Zixy0Hss1R}82V7DMbz>U%>A%cjf zH$jAgV-X;S^a{FUU-js;h#})KLX;qBNzV=g$y5?fD%Lm>8P6ldj@==8Al)ggGw&S& zW^v{SW`#)w7on3L{)k92UOHk8OKp8h2vZ3bb57DYOJhRPp@enT>c|9?bS$;1@g%}J zX!Y7Q=ItYL{SjF=hzTzXObfK!$*pZp-#W3j6mmI)+U>wjsGcx6VS0z^87Op~aGB^@ zaM7e~r4bRd-iWrco72p3bFk!Xj7o`#%FGlq;*La)RZV(|ZnYsYld|47^xpaS@#D>S z;Cwza&+}i^t^@JnT*kG$xcA9G^01M#|XBc42;epWt1E-(-Qki7WM;q!KX9&+G| zHHupcZh_ghPl7(fm%{!$e9{58keB#R=RNxCvkVWs=o)#|8DA{(i-7~TvOX{Di^)C< zd{u!j;`6@xS-gt&XQq0_NbrdR2_U)Q&+%z^p1*u|f#*D5UVk~ePw;bf|GDrtyx|Rh zS*W%0@bG}D^5c&`{!Q&Tkjm>pjt>Y1Zv*U>5 zp_cMuQ=`g|<+=kx$|utqQJCVh5+}o};gJf!9zyI2C{q+PCKf6WQn}O0?!h&jYXsqq z9*$^1iXoE_-LR-gRKhDg3av}>?QKW;M(I#JD4sG^3&n60Nx1_?NF|uSc4Wx)WKN}d z4~1T6URZl_fps%13~7b38Pz-H8^(%h2lbRQtgvs&wi``A=RkZ%>9S?cG2X4P3cvME$MR!paTuu0s>Sz)nv%cphZ4ZH9L0SW#pjy9Hnbg`ySh$BqLfcmEKus#q1Y zf+G2jL?QZ2^hy`SEYQQaKO)xvd%kylH`oKDpgJ5zX~CM%ubsXIY>n7j>g)iBAP97Y zB7_XN8Y|Kt@%A+>HdG9iC-4iguB2yu$?;Y| zA_W_Vcte2@LFL7gdyaKk_Bd{MaYICI*Do1C5gEeOabHH^uW`FSvKjH7lVzSqIY*su zc^>$J#3Nh%BK{Y+$=hP~t=v2y(|;20Wm|N_dr{xxUbY=XkNA0H-p6@f-vlR;wRYl~|y0Pfax$K;*Gnb&6Fd0lHObS&CwJ4LN(ZDnmIF?$6;Xr!d zGF9Y-vgI4z@V5dLVXl>j(+L7DTN<*dI-)yf!M_&x{SQxUZBLzeDY#rnPwE}Q$S>F+ zPQWl6Kma4$Newp358f4synP%9x5Xz3L_Fh)jQbFb>m&M+RP#(Y#^}g-G?bj91W;}R zdffi-$=sx)p5pnvA430Vq=)1_>fV8!kR3?RUD=)v(ew9)o=tY;zZ55g!Iv#!N>*>0iL-6^hgHEB9-u-JeWoi5pXK- zb%8oTy)WGVqVn!n#@)N1PASv~58OQ&4F`3>a3%>(8f*$pvn&VNC~Nb;0SG$5ePnNr z^;N!>ph)(%i(nd*N-6@1Bbv`HB9GpMy_35cICzjcn+jr}DygKXfm$HyxdmQCRC=r6hp&St84m>?c2VV)#-2!gP{A_v1PBNPU5v2RzG4meviUz&|d&UU| zPsOh^1{gF!Kh7IB0|ED>Ee-?3F%gCEd zNs^f=I61%XJ=h#-&xHUp!Kz?2a{FTg(MYn=7)q3Zsv?^8xz%+%rYuMB8c2as1d6bp z3bxFME(Ad@9cLlpgv1>xX95t}U=Ko80wopC8PDLkPj!4lr5IDK%+tg?FPxSW%QP|7 z%4t4vT27qKcbv~>?(goozklTM{v8kJNAAyQC~#U%EKA0n<}?}*B-=UW`q%UQ>+8Q4 z2uWYgq|<@e`WXsBE+E5H5i@A%=zEBpQcr#S`j zo8)F9O$e_LcZ9ry(qVc)mq%i8Vsa26yko3rtW4oN%Ealt@%a9c@AiM-|2qE*zd8Ri zkLPb#rkQGjXs5@9w@xXb(wPcG1io)6J5k3}v8qTJvJh2fGp-*xZQp3EvAeU2LMWvO zYKBb}+;MV@6Yq4*(fdJWep%l!Nn>$g3d7g$@uw&D=CnweP8hHVau#fsTnMtH#b*h= zDxKMaSm1J}sc4F`aiyqZC%E1RKQ4kw<@+^w`a{tFb%)<~i{=VBH88A}waXg|6+zkutCrnY6jmLlF&%IHBp~PgN+QRIkjOh)^s%>nAfI6Pqlc z1@BPS#;ISK)(>bg7F{{n%E@+4=6Dl&S6WkgS9)|@1sBJy;j*JzkTMaq6H~`qD%NzJ zXi;gq(pJ)FA`Y{Uv?H80XWpE?HTt^I)}4moQ?3*Fq-s}bnF;(aoQfpr08sv*$A63Efk+nFVF?|K$_Dd=qC8A zc(pX#SW3RwZ{ScQb9W+w)*7eNiPP!CJkPJA|HU96$yx@DJI)Tu78tB`^1w%&xc%W} z!cQPT^356HWwe+1zsApG_adjC4~r4-vYvuKjPqOMm#JPP7?+5QW1KViD1l^Jh38h} zZyY>BWUB!rV)Vf{{#e6*?%vCyUf^YwKfnDPlJWBI=kY$p%lkhU_vga@Jba$|^N?qZ zo_!ViC-5Tr%Ragx$mO8d*&dN!hyO7IpA_=Nq+bmD&*BRekIzE>Q#h8$Zr7h)-tasM zyhPl_kw1#pac*mL1F!GD;SF#2tAnaiYvum_p2Kk9a=H9X7!LfoI}Pjw)+)*SS@Lv& zpgkxkz2|OwP7orrfONpuhyV`<11%sPSb%B=2|~b&q*D%c82mB_L=LMV0SZwanNx=D zUFh9#FSr-Fce)47ofaK03e~Y5S@Zk2~>oJ z(=_M?K7_65$S;uMsdPSQD|N-xpc_nrFGBM`BzM0nb;s(4O)E&JH)WNSBWxkGkd)^) z#ny^lW^^|IDnZr0Vy(!Y^1&XSYZEA43eJ@4J6`Z!Q_iyIv)C%E2CFCEX&u->G^hk+ z51IvGflHtohy{Bw_Fm{-@G!!aPyx-=D!daK!$E}NT4@3;0uMt%umBuvtmoh<@5pBw zhz2T2ky})(%y^k`uej7S7Eo?71oJ?9gT3RM)2=?2l~Q*@LP)0pD_wV1ZRn8CZrWI7 zrOC&9vQnUYMErt{9k4Y5FkCFPVxptou)Y(`u?E(a(sLKT;+5i^2tn{{tbj0d6jTY* zd`H8Qw`yrT?`zN1Z!9`ea;(z^xPI zDC=lb7WEQ`7|6-AxkW7msYuvmqH$t#W!pDCTnisQ1|NRfxxC+5KSH|#EejD5#FC&q z>UKP{%Sbzk%NFN9e z5eOZL+}22G2#rF2J|qnXOw#gj(w$k;VBlnpvo-FkbFOeI!d#SULN%c%R27OTwb=Mo zCd*w0YAMuW>9tW6{o@_*hBy3GLL@y0pCpOQ-5nJuW7~4mK;=UBopry`n$w!n`U>6f z2vY4zh$wIn|1lv54E78HyyjRSIhS zk@Obl$+v7h7eSq@dj}p6ngms068Kt$dN9@+lp^V6vBb?-A4anrdUx9B%Iwn74$Zit zU?zwK7(@w54N4W5o#i3m9n>mZ9M&!PK@78nZujhQrb{QZ5G=rnC{E}GZ9?jbN$x1+ zI368!&&7KIbFff~6if<53WZ9ELX|?%LT!~rD~m3iSUAhVNtWC}z$Rv^EH-ntg>%WB z1x_~Sd09Bug{4l+rH*+mt<6=F5x+g`H@x8uZ}?9_B$b}$dA`vQX3DZG>0xm>ad&^u z!{YYKm@6jKju+_)H)Y(f^l-@8sR)EE#1a7 z8PnXD>qf?zPCSDILK$=TQT|A9qX{DOjT9>4xhN`;=N=H!ZU+%L`P}jxWyYAOY+2g- z001BWNklG$M?iM;Z=v!bcuHj^2r3G!O?(<3aeLu>Kr&EBEM^ zUR`)-=Q8qwww5!Y{v@`R>~Xe(|;Pun4DlMd^4CoS={bZ}H^OBZf+u8l{eU z!LSE$R09ZVgR%w^P%F%nu*~TxAJY`Rni7#T$L_)8PIVu~fSTL8)$ z>QLBMD4Mj2DmgzYcnQ1&S_CyEYNbwvQY$7EZH}m=pqPrvgvm^hMT)H;nvXY9031FP z3CVH}8Bw)DEtP4SnCiqd%`Ef6X`a(y;FJaf=kt9U4ct9&KHsGx(lj&GN+~6en!l#$ zh~VJA|CFrY?+F6T5}YI{IE&<7R0JiL4{qNKLGB=Wa3A@hyu8kT8Rz5Eg@66G-}C;% z&fW^)HDx^;sV%rexPjaex~EsA`5u-tGOu6?0BxuEg{5!YH|4<>9zLA;Rr^Q&Y5o`f z>G6N??))`%S+F9Qb{wa-mGGTtFl)zXxHP)25Kd7=O%V~IcZ9$S%%?zi=c(o5U+;j{ z^twcP+^*G_JQZ47>$o?%=N9B=ozUfhQ-d>wX|3!6`|h--YkE+_lQ-#_-_;51tZ1Zn z^v4RnXpU95o?*KeB!qtHw5>tx#GK>WWiP}B=ly%Q{x7Hh?*>1_U!}nSfSGYVpZVsS zZ@6BswAOffdg2d%_yecYDaXGz{LP{hTuGyWK&Y;)P1%~TZ%PxzL{TwRD;kD|Vw)hl zU`^0{@R>L!X*{3-;<-TSz2d##u|S+5PDq@vvZR-@2qs|a4&G?K;$2diy>~WXr_k#} z$c*TOG85}qj@vtYT(Nbf6_3g#jB5lEJGkR=rRzsp`GD6a zqHO4Hh~$z2d!6N-#+WYoo6O5o3-M5;GRg z?m-i!snMm-wa~SqT_}5_@|5$CDbv(g>drjv%u^?Np^w8`p%JvA?BeX&X|^LW;TH5s z*iVHu8r>_a-LdwG^=fG((44git2>(uYYDc^**B+cPFn+3*ag;ASgUcJYCgkmgcp)8 z0?|UVg(wSN7NX3kPGA$of?2mAJKG6yLKK%ui^l3ZTiapViN51^3)KzLicSkkrC>-m zVuQBtM09p(?8Vt9*j1D3nl+69-*7`j+`Q`U`_AE!w${pF$om(CBx`S{L!$mk7{Oq# zWl+4I-j_`HD)9F69Pm2A&)|>U{q)|Kb0^590|11G=le0*a{Rt7No4#Hfw5kETZlcv zj_?C<$o1oP{rS0PJQ>%w+mU4+jTqrD>N29<;=I6-t#Mm3_!+bU`*e^DPSpT98iIAG;RHGVn?Va`(djMHJAM-|$(_o& zr1E%iqBOdaJdIXj(3K*BHbweSH1B`{6*vpgmG0?`V0A@3mF=${tl_NQ32W$BgqNa- zDCULQ3iDbxuSRVthd0ugd&9}XQU)Oeg+Ntt?O;wT&OSL5Pi1t4q&0;g<>@AsNhe}T z`FWQhN@JfLWyi{uQm$CJ0-e5F@=+G77IZIEh1x3h={y!0I(7;4W&n=Gt-zz2TmPGeKvab zn~Tu3()Ew*L8M?*LXrp6UeH;Q0!l}_qY>EFVe7Q3 z6BqEFPF?bwbRrr(Hdc0=U=Kqgjpds>(fEN*!!bmjQ2mJ56)AhRFCyQESm{xb4z@#C z!8e0mkqAtLN@1d6F&t#|J^LmCVJPE^5~6fh`UdT)>{mrEU|ncZXky9JV?)oTVN*k7 z=-rgS3rHnZj|d3Q{%24A8cpMs8Zd)MKI(N5`9Q4DwnAGcpwcW29$FQ;1riDy;LT{@4S$hPk+d`t z$&bz#ir;aKuekb352MKm=MMKDp;wB2MB4_@vEGv&(v`#D}-9M$?FB#9ci)r(txA#hBWc`x&Nj(-ej z0uotHjL0MXb38!exttL$n(0F`eb)6ky-Q{plFE!Bf=dc{T>O?M1iA{{pbNMQn^Z2m zI=XK3 z)eT1`$K|*lX^!(7$g_wEgE_#1Orph-l7A&>n(hvkj13IxButZ1s-&`1$+QG`1icHr$L&a?0YC&%($GLbiV!96VK7i9;G~p? zLVv7i6>1l*Yp{65m_ZgiPI&JG6_Gnc?@$YZ71oZ_JJjY}e3gc)phJSy{q7Y@YBKM^2DS&{LAOnb|B zB$N?fA?LP1zl>@EH*$hL4EpqWF1#5xr~^7^3Lp26LV~0OoaedT#{`))!L#vz4*}wf z>j>}>){n5vm!9RsK^Gm1G$NyajyimLdf-M*jB|ixhOgeiuYM_f_ub65zqseCN8`RU zCRL1FTR5}e2Q6w1(?H;NCna4G6tU?`ObBdGPV+%J2U0BTpBnBhUre2OWA1*ae9~$cfx#K;U!_=NVZ7ij8q#<=mh6+B@%_ zD(9!dZ%+S>fBx$K<{$6>J71sQak@JpQyE^mI=wmFcYIyhy;G~B(g@#h-ywu!*QxN% zwqHS#9#(==S?T3M-yL*H1-DYzLU_M6sD(v^NfoQcwl#1;B4wVloH=VMz($9sb?5qi zC3>Li#t&`J9nj`n9MyBMEHEv``3&D);A@;VIsN4HQ{W#3--34Sw7t{9xFCoGecjkU zcKm-k@q0(Epf<*wze3FO%s1bB!@vLgztel?dcE@f_uuo&U;YxaGy;0V-zFUAau^MS zP(xX|vG2mZ89T$<`%s`a&pFihfX#CedmrBEw1aL1t|wT}_5&Vts6tU>5||X3H0M{D5Lr+Sc3nUwf=Vi@2intMVCqI~#%_XZ zB?`1T=v6@rB`U?8;ukbFS_l_{D=BwqVHk=senXIe9-WIj*Knje+S6b{&9E6z#}&F6 zF5s>dgxQrPj717X1Pfzs0^O)m*|p)?33XgMt28zp&rkzXFij^4RfH;soUVXK+M=qX zvV%EO-8t2rvo=oM@!nZ`@Fc;L!8$3OI=mKEWE0p6Axbk?Yq0g887wg~`+|B!Ln-Dk zZ3G_)`vB8Nh>7qs(eH?uai7_vu)4Cjv-wVsjfjt+@1gtz(+9Nf*fvva!epYvOznbh zMqdlIR$?{Uw6jjZW!|~+#Ht;Sig(5PjQ7eEfHq11bkaG3AwFRgsw-P4t0+OR8LXOy zaPtcDx-+j&%xz;5WjY1Z;!LNYo^#ROyV3iE_nAiSNO9GU*q-m8RKlRw!nVw8IK36F zdgkgg*Q>EMSoh%Cb61O%R7lykpsh|@oxVE01{Z<%Re3)d@3#V>)RqbfH7Z3E(}`|p zyx!648OlU4K{POPX4_b_rIAMo!VH(nzBlx;BWouvEB$(*xFR~EbHz9#I#IeH-P0rG zy5rK=reQQNLz{$dicN1u13!m--)XI}Z5#W(^YHM%vMhh0^!cZh?sG`~4FcjhES@i( z5!}M$xPJc0R4>{sw>(~Fl!)7(#B)-4S=0>?Lmqw$_$2v2%0MHI{65X)lQm-%5Fs7S zNzP9qVB=b{9&nVC1>O`12hW%51A{+5&R-%RNV)l~VXrFwI_OsGS8e>XHJ{?BWA?zS z@Rvc6)qKu5Z@#M14WFfd4*ieei%Gs1_-Eh?Z9Mw+`1z#ce!iTe-(KhW+4UE)_(ISt z#EWizo&PI*mf6q6e~$QB65waky$JqtI*1SuNZHlrc%AdD-w$N+PjS2dD&m*$Pu+a>)FY2X^H8o1=HZ%&{DMZr*-7-5}WoUL}KftGAmswXR{gr1J{ zgi4=;E>0_red>5?2rFh6id`_fBGGBPv6bZOHWT_DbPb|ccoJlta($r-avUK9X)GUM zc+3##07adYn{?O%c5_zGo#D1fLyyv63TKW+F(sCOXn8|BOhF(x1(%tS6VU|viujsq z1is?pbQKzvu0}Jbs}tHmQXdW%v>Vb2A%*CL2qo0P0xW6jG`J|BDYx}r&?${GjE1_W zg2(Q}8uV+>u7P(aq|s$3B-gPzJ06WKKtf4DLnFA56ZjQGQC(5pFx!wA%JSL#~n*P8FKD5l0_m02fxtcY)bf`i0(gXA~_52dZbz6N$N@I=_ z)y2|5lzOoDpzMNNAwCMIqQ$UMsAXb8VOMMz4HWI{Lv)mX-GtNkJgbQOJ&Slr&Wqv%@ z?+6n~3hEfV1#X3o>)TSTz@fb$n%6g>Q^bgSD~cDTA&kBm&`oHj>_uoIxC&m|K|S>zuO=9;loe-^rP|N`(Lts{~OAO zUvOG0Qw!*ZaR+KQWd?yPLp|%Fov@CZr-#I&-6O6(_tGBQY%O;fD4qKnJj`%ED|d6@ZYnHOWh!Y9pem@QqV%zofJiRNsK_T_Z+OGs z14M*nn&AE(L=aKtGGp>1$WN&L#P7fV!1q6HypOS+_zNAj924?;1}8$vK_Rc72Z+c7 z;e-c>kFh7fgB*XI&|y`58$iZ*G8pw7GLmV^0d#~Z5JH5K7Sxh~@{-9v&PQEF`Y{mw z3B0I3eFQli3jflcqaIA4cfi4Yl-o#U!45a0fkrAYjt;%mk8%B+Z6ez+V&&M%ZRX1u zB8WbBaS$AW-%sFK-qAg!6y|y2t9Qyj{5trTe|CQJ4<~;4%R3$(CKd@ap0deg-lCG8 zOiiKW^Y&TN%DM@?r2#7v5ayBJY;ZaWQ;A_T5R__6lTjz5RLLCzMmr*) zcc=HDwX~e?eKcBx+_fjieJpY-`4ZGwnPx*yGxBg^d$>yvhiymg%Bd|(K4EUCAB%kz zv5MM+Yk&uRnvhZ`Wiaif;3DX-IIjX#jcIrT)>4p?JK9X8GMCC!(jefjo_LtgJkIw# zp6+;ey61j5bE@+&6qs^H94k~UOjeo7^D&poTql+~<&HTvcFcL3&))EcH~b%jNYct8 z!Zb|)L}^kA&bCs&#kJ7|zGTSkDiG50#cF`tY$4ydG; zmo9`wW_*woK*SY^3urEiWS;xD-k;|mUeX6-I1XHZCn7fJooF3=hrS_wCyfFe`VPJ! zzLRtlS?BIKcYBuo+{AdnXJvbZ$g(?0cloHo=$_+Ofuc|a<_V?=e)SFfini*M6Jh-{$&HA>$~LLS!S~S(OYQZ|w8u5E*vh zwn%sj%J(*a);dp@D?k4Df%USpvlGh>OCzf1Hw=VULf#=(VG4Afktw}sLm;tYJaHGD z`>pWR)0uyif8>8Y{!f1W_-h_ck1XfAq%pPQ!w2}@H@xpe--!PIvG-<8lH|yN=BKKF znR^_Wb+Hd#Gn}Q!ypPEI|G%;iE4;d^x~eiV+}!}E?t{WSJddg-Ib^fDCsYLPW-#0a z1N25GG{B~CQj}1pvjlKYrwbuY_@MVjYa2zKFCHtUKG6J_%DfS5TjMvsJ+oRmUpQ1H z!ik=SO&)_7fxBQ{xVIzU+kD;6-X^1Ew zXG3}r&%k?V1YJ6E0rw!j2fhpBDN?~V?RNeoK~=AY1K)o8E$8!@%jLqi-+s$C-+aU2 zaQHaOEBu*&VWHzvQrL>p)Yu$`BvW3&sH#z!pLl z1{J&_?x-6!l-iZGD~Pd49_8K-g|;XWU?Z^ZSU;f%EtC_+nZgCb)mSJjVJJq4+{2p( zJ%Z-YN3e8b!B~p1EFc(Ad~)<6O?1%7sfg z(@UcS7=c+(3>HzM7gQ=$3#C^mp8HC1qz=@aJ83-9c#OegA9Myk#-NMyT&H3MgR&LO zow6x%E(DcUh4WCJtEBS9sE8MqQ4m+ObR;&gC(!Sq{*H*diD<8c6q+2^FwWt;^T9js zY;i{9J*Gcm^?Pjjky0Af?x}iDMe#^w0^JNdRs6IPr$RrRxOni)#(R0@Tn6q3Vywha zd@LL!6=jwfges~|5kuub;ef}1;spdY8MqF%GH49y2+FB*_tbcJhNB4OUa1d4aixe5 zA^0$SEQB9vBAk&l7$_H7R79L8##mC}=OUn6;XGC@W970L7lL!k-TI<)uEfhp1ML#@ zOAwdfy$auN%6FUc-BvJ+V~2Z(dk+KyIeLxUVWPZv^iwxWsM4nSe1t+}YC_^lR8t(I?fs4@2ASYq;JY{288qK&A z$_idPR6(jEmc}3+f?RR%Ai@cHqUPZ^Mwn(ku{69Hf;rR?EF(|y?4fiw+E9EFmKakQ49(Vv1Iy)XGv9RxxBKWjG}Ug-%tcY4YS$jIMd|K%VgM=Y@s6 z8V$U{A2o=iGO~z}KFuKQy^d^pVzZ|_Hg_mC7G%65`ZMe0i3M12ygB$tDr^r|*x5NR zA9l=Jnb&grSZ=P#brXTBvZdt4lW8TduOUE$5ON{W+h;Qm^I9|SwcOc~z2&n1%;(vT z$wZPMM6zdl!fRUL<&=SZ_Mv!pf`HFUCA02rzt%_Q3K2r$`fPuGy;t(NLVEnqw0WPW z40O#ffHMnE{+4gYulVQFSA6&Wd!F7wd-@uA|0R0*lBMNQv)O7RRIhCkvJNz6duE*) zgytBA2r}nE{(ZpKo8`uJoPs$9KM65IrvEVMewY3Jq?lQLYA$>XFR5?GHzQvD1qdQr z9x@fMF*>O4mP+egZmoXFT(wz+%L+*YTT_0ck9AZ zQaF{$PC2JOR;JAF_67djHqcjig^xi*lFTXP`aP&B#VUO)j94%&+^@=7-%|@bS@2#i z^e)q(Y9jafWe>n+a9-9L%@97n`CNZekzD~N@XY6e=uD3wqSxgr1pqfJ-H=?1GCH68 z+&A_GNN)2)wt<-OJNUI6QBYArIzl70?SKe^4Ppbf*@wt$F0d;A2w6vLNm_g8~oe64)>Alxsoj6kHz*Zq73w0CdE$WRe5aA+ z4tMgz0m;2q$T=XHRzz}2Rv{|XRajOF2x};d2&)-uU9eKHQV=WDT3PoK1eS%vvhc9p^XBlt7l#MF zxO?R7-2;z@JMQ*V1WKh?DiRhosut?J-xUl~32`bL-WUwL?q#p=3a{`V3H!QIN}1P6 zDTTYcBM%Svym|YEufP6^ufO?czW(N)`Nx0wNB-d-I{#wNynA=yeA>8dLMr8gM|ukN zsN^n^N@c<$5z}ZOYewRL$pO^`o9p}B&bVdhzpQ$Gy#h$9#||Xj^pY;~%8r9L#BdzX z!emc#hPaSM0&{QG!Sy7eea(%yGUKl_FG0jJ5hw6V8VpRM0U4b%9`L#UcVx7TgQNo; zBpeyp*YGU<`aJSKhhg?d_*sJiiMc%j#{!Rc@c0OS`3?NdFW|rZ!g+i+b9Z#sA}ln_ zJ4m3Kn464Qq(tISfEp0a75j>bc4#C=FMuV|a6m|-frFrj>`)%fDpwwZ?uwC;_2m1= zG@*f)01ZTg#ED3yLj~1=Y9IzzZ#~Hyhgpy7#1jlgQO@%ZdLBK%k%`F`B< ztY6dY4TBXt>2;O&5SGR!9q56yKzm@GI95eiB#i{t-Dq$c0xniqYGtuH_w(sd<~l$3 zm$tw6G?47QqnbwWW_C^U3Lkj2w+|0# zFuQ=$7``$5LeP;ogPc*l;4!FbSdvW;d7?t8jtUGs=d}+;G@wx{9F~P;IdVAM6LI9} zx$*S8aXL3z8*BpSqO8TxNCO!4T-Qb)j2O5GW-E8)p05rKc_@@v=zSyN#IrfgU=*;D z!n_N?lfnB1-XVO~g&$8rI}dnv${9F=o&rh`XVT01j{;ADbn+PP9l!q*gnj?~>Z`AK z|NcGa^O@c|-+uco_xJa#>-s0#w^#Ui5EsfN*Xz=S(v0FM$Wt{mDuSi}ybXw!bgegr zHu}<#(rC8PG-xPoEObwg?BW&0sFL0cH2@F12crj_ppAifXJZi3X*y^TBzZ3kNrQWl zV3!R%h46?F zQV3m$HhD+h66e|+J#W}$U=7MBxG8HC1{K$u#!?KbG?WfBB0w+gQsACP{aMxo*?Wn&@OrYFb6 zoYkCzW>`d$S5geBF;<03TnwmRTpei>^Wb%l)x565=DQ7pMZ#9jKS%2 zVvIp+jWGs?!{Lt>Pv5cJ9plLL@+N})W~bile>9}Vv35`CQH6<*;L{^ViDdEUKycQqLJeMuPq#30tG{c&rBpu&XNMi(WNPc`alf*Ix z#2_O7A&o6&Dg*`HX}lHefDk}qM6ltUqvK*!lrE{LEd$(w4QK)l!6o@8qZ{4|F%*eF zY{RroTF7(!CXL%gQZ^t;nT%mG!AYffG;p2Hdy(XajG#~62_^96cu&P|A0%&-Nco3J z>PNUF%?3y~giczv;!)=KmlD0rlfK@y09OgX<C<7BMIf=Eaz+F+0^R9h1VsSPevLT}P4K2f zt3-2RQ^1f&zLv$HO7<jAsbXxjslb*xiUxpjV_7#2nRul#WP6 z&57WukAmlSV+11{q2-*+7ll>>4b+6vo!Xs+V09=+^43IXl6(>qnI}PJC^EXC4T@Bw@Gx zlBJ7)5L^XRdzz0WtU{&{U>DZOWTMRHF|!jSA~Dm<*nGgv?S4J${y8C;i15L2lpmD4 z*S(iB@AvlZAVSDZ{M?jdrpf=CHtI~PxyUEOIfV+cmTrd2l zHbfhzv(uW>nj+E&??fyF3So%ck(wQYG$>P`Ky)GsAaj*b(#2#UHP0pUhY~3vo6BD= zK3D$3zF^DEqKJ?SXp*0ByH@}>$;|f|k!9{>h?ESfC})Us_KTPmzbAMPoaTK19rR2J z@%DY~Y<3eJT@XUI-XTK*@llK4!Zll28cG z`v}nND$2F3>M4q5MujKQb5M9#;TK;jzxt)|_kX4Qk6#CW`whIk@7yhC4$)Y~1#JWA z4kJ(p9YNv_AeKb46{yvbMJZ!98c6m?mmD>bG#a=A&J8TmXyD-<)|$k!D_9L`ampf; zC9qW|5uW>#hchOlV}vJT1_*IN6cNw0-XguPEl`(WS%IT)dR*b*EiNnV(%Hs)Hu;KX zU((0dxZmOKs8rOBh}|JF&{2qTLdzkQJofpdO8UVz4x$`N<*=;OQn8Y!4b-}%AwXR@ ztSiUE%HttV7kGPorL z5FdN(vrH0&@hmO|B*v3<<~Z#+1d&KwV5Z;WGjraV!m99a6ux|8eEpU2t8ao|el_^T z7lXsHQES5#%#&7Q7AJG-yA5J{o3er(UE0x;9d@)&6tsfYKo^Jz5Fs=W1yPv{QSwFW zUUJsr7WBynRRhy}pK~fDej*MAUNVS+&gZD4hZi$MjWDB^m6JX4G``?nDLj>hvmR;s z2G@InN(@JIBd8FXV>|-7v!60x5g1BUqwdCiwX#l+Ftsk!TB#+&ZX9@ZLLrb;YeJ4; z;O^J3d&Ga;?>`R$Bt^q@jQ0N&YQBu!F64~Z*Ms@3>~k4Fg3&wY^M$9UXGRauNTjv} zOrwD?63O-oHPSLimEnWfI?~RFJX7VF`%Uek z0^@?nz&Im%!K~pDEGDQz#6WDIT2h!ac}_lpG4g~05jY<2SeKP`Ij}Bws8o!BZ-W*a zee^sqV-rqj8W*dus8Gb|-WlS!2qKj#mHTo;j)DQt{T-*)xRk~wK^G8nMhRR4FK}6f zQx{GbXx|6=tyA6wdLacF5_lI{%dT^Zt5T)e4Sn|~3L?V8!vpvC_dGv8vuztsPft8O zJ#lw;$GWZ`XL*G`129+ap3}L|!Ii@>)+nrMEK8w=QA9C!tXcE4mr)q8KrzZ12ph$eQUQX|opx@BKzPG_9#d}{7tWle(baJSL{N+LTE~_$ z$S8DyVcCyR7%|wwkr)ipI~(drgKOa8j8ME3ViDAp66xiwRHZnS^Okm(WiV{y`e2qG z<@&(2C4MCXX#;uh=*h85PYBR+Ygbf$2azAc6%&LaE(AgHoMpLa7tS z@Dw<%gXCQ_C<6?^%rSu>!WN__ze{l5f}x7mz?O+ulwd(n18s^55C*1{5**5)md+>* zHAOTLn^fXf#0?)t8>tYX1q5doBwG!FJPqMIh3h?pXP5N;=MEl;KaM<+(Yq5pXplyh zAt;IeDFHU$7tqbo-3y;DupJRrWEi+3v0-weigB=f-{{~{Ha6{PN;04|!v`#v1-mS) zmxa5_LftAfg$BL#qyszv!}P#nj?_U9o1PgVq=D%~wJIYjV<;Mi^aa|2v;_%C*@43W z$EDM)aaRY2HN8ew98-mRXFLS$F^GqPG)FFx!68Ouf5$*uo`9@&CL+Bvo(w{gZ@P_O zs3Q^B2WkvlCwBswF9bF2wtQgleg50XbL2E{Jr*pGm{v z#wHujvVk@>?|G_K%{eRzy%yTK(C+T&L+LDVe@`rHzIY$V=x8{qK`BrUin^uJ%kzM< zprQ;3mbD=5-?#Dh-r2V4VCBC6wr!)eG{XKvK<^#*H16%a^YHM%@pz=x`bUea2Z=9V z?<4W87Z5VlggwJe#*fR$xR|)~zXLvP1fQ>Ehm4;n#_2$J^lRGLazz9W%!NjQpRzdcB!E4}VT`+ph}%(z+Zm8NILP57xE z{~UanXATG-zP8)Zd`Xe9AIHDkzoh&aKG#kmyZIjqH|2e<&Yyy7+S&1!y!&W;3ZIYt zx7GVoa4kFFr`qtjdOl2boA1NaAH2icmpwtak^0ZS4@wKJVmG*X@U!qUi9XYYTiEOV z{7AgQE4;!V6SzC)^ZClZ_(S4Ze@JMp(R=?%qXAJ#rSX6e!vlH<&6SG;N_vo2U(r~o zKIoh=4Xku@l{`W7kRc8Ns}Kc3BKgY~62~|ja#lDQ(xxn?0vfPJ9!@p{-4s0V-WV;7 zjbTIBDAAB&R|616e@DX>D3H#rD`6~u?$!-ibE5no2FO(NF$SVfwjU?Q*Wi9 zc~W6O@n8%`r%W)UF?1uAU=+nQ@tJT4S3E4W9knvd2`#7@Vv_PKO_}Cs&^!myFW{H_ zwpZZ31giyBoT!~vHdGCXL1<5XqcZ!gf+}bT=1H4~=X&+$1m)w1S9pa#R#1`j-mK~U zZ7GFgRZbQ>(P<%sELb^EFtj@ihX`mwZoL4xx(Gl>A_<`%)CH5wlU$SmL?n@m33-2$ zMlx<^o`}pk_OyU7BSk*VuYyd8yQr`;Uoy)L;kg(3GcdDdowF`Ph`lqn3At$(%ygM= zZ|6&$^M2+DiI={Ud3K|QYd#T>xb`)lG5aS3yTuy>N*{398qcRQ&(9~$r;WB5jG%g; zk#!1tSwVWio>k3uWHW``EAk$$X>Y={{d=*k~6v>@Zl^psUNpK*MrB4h8B#+;&;weOj4a4_d!!9ig$s3s}= z(HBK%6Jb}PgzVMFp6Z(ukU3h+O zI6{mIAs50|0wG)xc?5k#q-UHcNFIxRRRBcD1-z1;#SC%H`o)kiz;d+_K_V@NUv{<$ z?l0FOA1qy=^P5MQ+V(^^ND|nH5E@CH=p;HtfoCL6xVIDv!~mXwCxYjf3bcc4p!-P! z0^*3AkT?Mi!ZW`=BjY`Jo}Lq90B2Ors9aL{%N@pm-k|kNvkMhmz?E>xKJSDj4wh>u zOgutK^uYm1p@T|}rp#=^;ZWht0^dBq-~B@PAAhI({a*@y{|)@rbogY-arWMvW{k%GqxIs%_Y12LOFG!P|)nE?+WoVUOn)&g%HB#j1upC5=r6QLk}tVBXE|v!u+t&{Nz)3s zn0n$R5#XJOG3n_U;zF>^y&nW2bne07FXcoKK0x}+3lKzH2;T_ba<3l)Mj8srZZNPL z4Y(6N^UAFLwkiH{oe%bLnbnarXELB?Ix^WylY&~{ZiTmRjBma^^2=Y|@y*wrw~r?d z>xo)C`Kx;wb5GyfBINkVj!#97xMT&$Y}P~(^0_c^0m*Bb%uLJohXx`sg)mTxq6U&^ zA=!!C>xTwr0Sao#L{A(jmhjR%Sq%xo$X&iO5KocI)W+~*a3 z2Id_2kp6#}n|q>kOpzZ}%;(Ix9DpErq@n7zZCuV5!fSr3mw}%S=Y3F$idpug`G;V6 z7c3#vqrro7(%}6DZGZ$x$EaYMYyRiHhd&h%$ZlAvoe*J2L1aN?p-4dxiZ_aP%5xe*4GX*iH5kR{MHt!%E`*#Q zE=cR>dqYo#ZicmjmWp|ydgVw-LnhmChsmRGM+cPQEF<@6JW|o$99^@&Iz0o)Mk;k~ zAZZ1ec!vp1a*bp-<1*OZJHr$?I%a_$f>?|uLNO=`W`epP2DPL!1WHG&5n4cmNJbP9 zX)xfuTszyYf(eL02yBsF`c9#YA)E)C13H1Yr}DsXMjyoF$@Ho80D`JhjTGh^ zggRX_J{>|;#1H7^Kp~xa1i}R+sH)sWP&8NxPG#d{gZBZWE8A8WQi#ifT~>~l1NYm3 za;fw!osaY$w2_VRRCKoR0tpmwytS_D@?8H5OiVhYA&m}ww#LK)~h!NJGK zlR_9Y4eE$oKZde|VIqv8Tn;On8%NFOxG#)4lr4S|Q!k7V(WV=tAD*@Pqp>uOWJe&bc&yYpQ`h7u+I;O z&s6uhR_^2Qe+ailUte;56(834Y0L}!R6BlO8*iRnUu)L!(ux;&DQ&jnnrBaO12=JT zb8{{6bIrO|@VSRCRqandpXwD};T8TX!QHuBF8uJr4?I0R{lRgpKP2o1pg+Z%*?ChM zf(E!6ErPQGArubOI8Z$Q)f*=7u?i-REe&cQ#{!2kX#_x$j@3)Y&I9j)ZWY}Wxd?bc zte^l1LH53pG^Q+|N6^c_+CW=}QzuRXiH=D_B^|P+(PQ8pdWYzsO|WJZFPN@aHL571 z3ZsBq%GHfP2wc)J2rfhi#3?>lTSpH}Pw0Xzsif_7p)Uv8a?hxb3{`pzdTeyRF#LjZ zLgPKo0goeSBSK)TNUR74Oc%5&+6C*$i!7eb+E;N_2CH;V;hZ9sxx<4I!di^GwQw(m zJF7`EmqD+cR)tZVKrsxlLe#>D0>ud##73Tcx(!4&RL__wg+UtschY$#U?ZM4CzL=(Wz-XFL|PkcLL2NCiv2t840oT!OeLY(c$1-2~ed9fBY! zf3ldtp$x>F-9f3bfaYlnh#+L%a4-#Q1tpM05KkVBl$a+}3pxtsg0=#BK&+#sL2by= zkh)=-Mg-x5;i*$dfa;{JC6)eHjI|pph1K))&t}j~XezjZTRJJevpkvmSaSS?{Jygz zATn~k>KQjB>VhwYumY~o6-EJZs6iZsFo#mmL%|lsEHE7DPWR+lZ5~8}aRD80G-0^| zGskMft8iMJO$`f2Mo*^zJ_w(uV4Q+P}uPBOM>kUHUrESc1&3c5~Ykb~^Ovars->#5K0Wa~_Qm%{r zp7P%VH}hiNU5k}H{^xg<+o##bml#n9pYtZju~!JqaS0qIvwc5hz!;chC^w<1Q#FMY zX+@eslzrOdZ2EK*c81|AyuvGd1aiYj7vasr5mn*1Du*SwTVY)klfZ1_H@~~^{f~nm z-woP!rfrqcM>5kzK$P^{uYf=ZK_Mxmk#z>SFfd5ddIME(BdK1?xcNQjzLsS{UP2^M zBmq9-L}uYPAi!K>5C?R&Q!=kG>vJL?{7m>6Xk4}4IfI;md8&W_F)|HpmIb1YNJH`v zJJwPVe=2#`j6R56^ga(pXZY82|tv07*naRA9isA;R<7RItp> z&vBTFa@RdD@?Xf&Ovs4i0$)7_fBB8@H@_Q~!rFpWGR;!pa1ag)3<6hDh-q9s5X&OhSGUt= zLt>}X0Lgun=?i&}w`>oKVvAB2p|6S_DzV(-Jkm#}m!QW3!ybuvL->6PhD5XBQXyIr zt+pVP9CknvRE0&9)r@1U9P7&6a$v2g6j(}Stt-bR4F>MkGzxe;-0^t0=kajI!+OX4 zdgOjNayQc~(+E{0cec--Bp~p)eeV@s;T2xtKO1gO^D3oqJl=7C|G<}De#O^c{{`QC z^B4U3FaMf<`lsLUPyhHE{_&swh2Q+kZ~5(SzvFlMo)E`9jrzuj+}9#C3(tKbe-Z+6kBwB`C1S3riT@xnlV=$|1};7K+r5No$dJmqlPRi^ z<(9}gL`Yh^5z+RVulXYyc$;{DlP3#JBf+cDz#N0UYJDjJxNiBgsq;EMtO}Sdx~BbL zi-=_SFBKjh;L9(KZ@zipSHJp-Z{D7{zlT!N3(}r#e=UiJ0FgzCXrgn@9Md%M)DR#L zAyGBo(@XMe1ypp8%zK^Vr=SLEN-2i!h64p)?tdB#4Z3>b2Acb@82MflkZgYx@WS-? zp6O6aX1*3sLsUU4MGBTwJZ6^4{k~AOVp6f84EH(jvpo^8d;d=(`0S^eQA(MiQcIpN zuq+GfvR+RdD78{*#YzTM%kouod_|LnmXG%SSNK^FAaSGHF`Hk<{I++a3zeMU`Q6++ zyN>t#o`lKYQzQm4HrO`g(m{JpHjSKXII+Y;z-N9yN1)G+zwaE5U-3r&f``Kqq|@U9 zo_r8+w#$jwI)RE@+$uLa|0X3!&9C#u$XaYm1C`}WnrVZW=_?f?QsJ%6)M&6sa!ewpbbyKcOOWkl4Q}Cd|J;n zsuZFeuzH8-igw4I-&0NvJ8ek2fCXbM#9A0@p;yD>nGqWiPl!K3>&R(EPe<&0M0>$T zg;5|XK?Q*@CppT2%OJ);8b}nhr65*xB3C6~4RjT>L)pM2@`Ql~!@v#Pgkeeus3zXl z&z<&e&};I%bOjWAfo@7|P&FOeR8LQJ#h@0aT1TzpqRF=q!NcfY*ybq%TPtjvu~uhw z#h7@Lfoq;FFhWy7U6qc|2aIqe20DyB(m-o;r+3FS@QK6~lu*Yixjt866rmB=P-Ike z9EhGcb~-0;@`TJz6`^PCpZnIZxuq|yizVJhs%Nc%Szb_&uyc(L2r%o z=&2MSMzw;JJA832slkG4dh8Qw8Vy8cj60eikWsKrunlN}&@mGh4}?L{M%abJ5*#8} zC7nk^QT!3wTev(5=Qn~q=P3krICV?Dmis_@!^VZuPDFWz#p#mx{4v(8FS%vlRzLL$6M(2zKzyr=kkEE*Bc7D6M> zHnnQ3wX!aiLqL^+n{i$Xo4{6scF4J-CI4k4jv=J7lP$u6LMVC?inRbo0vUTBx91se4(2}CXJX6(Ep#k zcW;s;M-Kdc?hZs`W>r6CW_Ml@A9(=AK+_Az83cd#NN)wsUN?1#y@^TdC2rL z@Uw&S2p+%J{lhQRL(F3dZ@_JQgOf_`amunTL*So(FLpv+A7)=r6SXm zGJ-1Tq}Ty87PwoGw!+;)d`i4675Woj86)TeS`*eTw5FH~a~Kj6MJG%PqI&PC*Gmu@ zX-t6KTuDF(-UIQVv;hW28h0v_VklyjUT1tdBJ&k~ctKZVczQ7HqtSgO{LJ8h`b@-u z@RbOGs2~;539O(F+9hQqM^Ga;1RNtc#o!o?$vfs9MvmV=n3=hfBQLCST_+k!SZCFR zOMzyCzO`M35ngaF?8Y!zfgXrCS{ha^lOvWyE_wbiGL%4yr_w+PR20*) zoy*f3g+_{(42KBv+&o8m+dT>zkiI7Zlf23(h!@0XWMeStVZlyk%r4mMm;^BB;S_1u zZV1c-Q2`UG8&g!~WoD)_d!=|unUDf)3StVP1lJ1J3RfyzOMc@c(s)B3DSIO3Fg3hh z7?q^+*h~lnQ;t#B9FGDVl&FrHqI02CrK(^A-h#0PZ4IIYGIB^YpcW`5cyUH)jOwhl zfhtN+hf{`AJdhzocVy^h^a8{qaUthd1yKVOyVB&@B*6 ztB!fGJ2M70p zg|7LYF6`Ec!XvRP4?edxyZ7DR#_j*s6{;X2TwlTWzlFd2gYqB#F!RIr*L?e0xSE1m zF35n0U6?ZnwywM{`=Mw=P|G2q2#nmCBeq^LU}IFGNl+;@D4vLZsZeKSngSOmEt!Iv zpe6I7TYeExLANxqy^lyDaJ%9_zOUrms6k9%RnraMRI#a|wIW_gcX0V%b;6jim?45x z05k+kVS{{$hdzj`$5b&*rgyaz0-5W?VVarh%v2`kdxe2i2sq70uI3}B>A+g`?+*>lZJ0`RW^9fAcNh{pLIV z@gLvt$3Ona+qWP1`0>K0n+tb$7nWsVxir=m^tRB)N_blCjJS}JxHb3eu?bbP3022N zOk`W0?~%yuh`_f0N+4*islbk(Mo8`{1Vo1zNG|Y*1h5jZnfciE7&MY#>4{q_LSiBN zIcVFTaflU(O<^E%AJ4r$^E1d0*_Nb0ZgH~3lh@C~zR{8p58Kn@Yohgm8We?E;rnmk zFTRC8{1$%r&ESXEoo`Nq>)Dw!^Xz4A$Du4gKoC^GY{OQwlTnh$3jzM9iTgVUyUs_B zMG>S%T9Db(X$C2APjr?dNFm2UGB!?Lctdc}p1yu+YTqnAo2(2K- zqDRhLbHkAvj@;IX%Mccy)6x%Hn>|Q^cgK1%X1q7T2StRr7LKZ19cHdiN3M=1PMi0j z<9y^eA34ki=J~LB9oWN^9tTRnEENWKA(w5o-tYhWx##&j@J~QI`ZyxU@7u}q`W8=4 zr;N+9`~Ku{_lkh2GS$Liu9#ks)`_*@mj!gm$)zRF!Z=tE7AdG#6a;)7jJ44|b=ER* zK{)#v`Z;MGF;G&Ws4o|!kBv50P!ljA#AqSh+@86)z2$tl-~qwH-CgkR{h5FK<2(M_ z|ND3R_~TEkYvc8+Yp$=q<#;Si(@UlrL~NGF%AiagY)~XJ05?m}9f+Ve=i-5iF-^wx zJi{S{lNAVcuFK4;apF~9IWv&O@le`;wglrYz#X|jxdaCbzJHtZ_x3{-^{P>aAuV173D|pFgxCae5 zwq$EfVa!sgOca~YsUp#lvA`P8SHdq0z2MPtYxvyh$Kb97=cRDbiG?F;%v4z^)se}u zX+_qa`PPQFMy!RQfzAW56dp)gtTO=L);x(k@2Ct4P(2SI!wPDK7K38Abh>m}44OwS zbrMXHmIo)<=t#xTdBUaxYKOFbxGc%&UshtdzIRM_V%6V+0_%HW$HwYA{Jq1L}aLLKo;oXf=hj zZ4(ZsIer<02kjuN2xg>xfntPgO08m)ZOtr2nF`Dn6pPIc!)7uk42iU8*xVRyv@jH> zFc_+YDwr`8TBPhkLl_cywl#&J2%h_vfGadjK6UaTd>h|PQ%uv~L(M@8f>;A_gE2!Z zfP)5Mj%!DyD&X7RFI%2W6dQ z6LcAfc9f0<;2okPWZa03>tK}ZTQsc#78NEDih!X+q$iE3r}qw2C{a)qVk!)o(O3{J zRJl;3GfQVE3@Z$a)P)gH2I>noZn5})#)6MvL}7UP6VWKN7K{$jmElTgWi7^{###&Q zXlV2-Z@c>N-) za3vAgy|CUZ4(PT|s3hH21T{s>NTq=STZpZYjKZ2L#Psn zDy9Jf5|OkCj}-VB4!ZLdWJ6CJ1Og|C5dP@Bf_>%tI&6LM2$%U6cJu=XU-tP9kITQ0 z+pb$(@>#umyY~CX&wdGhsSywGwW7Wj_ci!3-B#x>RU5%)6Z{??$NgN}zZUn;gvVL$ z`}c93kK4V)e>M*L?UOt}j?Y5{!Oy(n*WgjR0wChce*I!W_#(w;@U-`SIrTF{JJZ3y0k(UhebmmU=IJXExxmb3{o!Asy9Ez>oyNIu30Xc0t!k3jpNke&!kjZ%ej z5DH1STk1@ok8r$V9A7ZB(j0mp>CIs@!Y^1HFhAgNK*s_ELP5oFEl9CEQ-xy@6dzRY z%nnz>x%R>JXwuGc; zBETYfKN@*PN3h|Po(3Jt5EzA64PQ&rC?e47JBljP98~EP>x2X?2CH;d8X=Bs^mr{$ zqcHi*WF@^+c%k}C*Fl?{b{fPXkO{5>Uh)tc!s^0e!eY<`qz+xK=Q*sYk4nqEa^el8w3n`en7ntqJyN7bKoULxz%Gs_W+y5PzlmOY|xAc4Q+uzUH zwLjb16$)&`nvfJC3?(%-B?a(PSU?yWxGJ*6b&FijEz2GP7oGTMuX!uK<-<6z_=S(_ zcf7m%Eq5Qkr+@qbx~K8UUSLxi5CPE?W=MsFBns^-^w6wJ2!>^yY7b+!LrAuJhZJ5B zwv-R?`!)Fd(S2F*D1J+{_wA!t$U6V!kh%BjWxLs%E)j&MKq!*^5t@v9!NDDkj?pQ* z(f|XKbeE)(N&qU6%!;jzk6$4f{@@V|cZNInf|d`-p5Ymu;VU3OQAKK{Xd2kM>4%1zpm$XUvmS2h}>#PezKNaJHL4ifA~T8tG^Kb@PqN)Hxn%kZ-e< z3lH5Zon_l93c*f98eArfg(sK z5`t)s|F(tY^aTu|pcFNVm3zg3sZ^#iaj2;{aGVYtrbBugs58fD<}jslK`kW}1XS5A zcR#k^y$c3D3kW{LGd#n;McCJ-U14BXAgHx+b#=|lm#_KFcfaBN??3S7%^TkQ_?EXn zzT@4ykGy~Xf%os<^YP;cK7PF6_O^3(x3Vmq(K~$%+!YTaHssa^h#R6wNEUK$3(fs! zTQS+U$9n;O0Ywp22+w^w#{K>Qfcutn+j9g`319{FOd}a|BoQ)q!L7y^d9Lszg!=vY zBgivI5aHk((ar%E!t>lDkx3KwKif6Py&c)gWsQ1EP_{ z(gH{zHPA_r2_hmZ*l_14K@`UeNFjAbwt40Pt|TKvKoss>;wniLHd3(7$ok5bT?E%k zx0yv>)AayqTsZK7SNs&$yye9Em}pVCcwvdch#+vV6r3x*GOZW9U5L>T2GxYqJaJuw z7pDU+uCKYey5{uRL8f$Vp-{(pvl9t7LG zB61S%W4M1{lHa#Lp0i_PT;wAn?x!3?&ZlWA9FG;V8`K-4uf)0_%^yF{Bc?NZ#VC_8cMx*GCX`md!yjYnqFK0(855uTb;L`R{s7U-t*&+ zAGp0a(?_tb#>Y>MpWfZ__NRBedGm(5+gsc_*Vi9;{muWy^_8Led%k;p#G;{NMPo&z zL99&TTo-}OHT1bDZ3w*woj^9TV^e{H3O*HxfsbUCzB*LCZIzV;Ys#{~-4cwmGcFJs z7+^)2zcEg;GG80KGA`d3cXM!g2mLO;TT$|xR!|da6->7=`7;I);dngq-FM${cXvl? zjq~};+qZAIzP_f``uwf`ia;fpN}B7grC>#%D7_S#IU1w~Js@1rAuxiob7{2w>=Og=L3mG+OekO|X_au$5gCXKTn6Y! zCVL6ogW-Zr0X5h?BdEfVfokNwFP3MP5UhhPj-Cf1l5X#3MUH9Z%^d2GY-d145b-2O z74Yg51gnvP;+FI>SIIMtI$;AZq;VxT)QvhS6{XmUl?kmAluB3wPiuJ=xta}ZZg8#~p!75I}S@Gm!7-)%9Xsm)vg*X(v6~-{SDMFx=qn32_;!bg;gyg%S zY$g&=7t|{{D###6Ll;9Df};e6QY1h$X(6r@uhgd0jZto+B1n#3Sz!j(6>NoRMf-{# zp%kHF)E1=ir@$o`>iFbHIMo9kf(#{wGTexWoO9-=ND2dMo_EuVVHz9KZLYWkQ6MG< zab!4pada8z($IB9+a;X_jg`=ji!&q@JHmk>L>CZA<>gR@fMzFvcBBln42q?~j~35) zG>kq3bw_AufZ`|(lVF%KEWKcbC=xvlD2b=1g6&*BM$p3L<1)D%Rc*3T6N>V%sS$cEbnNq=|RfF+!zH`di*bgHD{+@!Jn7wQ%!5wQH; zBm@}S7bPH)YfCtZaO7Y~eAz!=KtQI7havuj{)UG~UrY9L!?u_G^!{<&=kSnQw%h%9 z@ff~{`dXvD0#B1Xz2D(!abHdM_+@@B>N8~U=Ro*M&d=a!d%o7kU*06h7ny~9)W2E= zUu)486JL4ysQfRg`r=Q5Jgx9w15Ydc^7a{?;TirXfxB})pSiiY;pXP%--W^eJe=Qt z4nmV2P2`P)F}$L^GWtaA6QdV=6k;TCTC8AFsWvg}fZHHtAQaH-*P%#LbQQ`)Qfa{1 z#M}f-h#}xvr-Y#)gg3&C7>akGZ~S%J#_E3A7f9@lz;G!HEsSFHs>y>l0RuCr8iXb( zK1u45I81=LLMY+cezR1RFICZ5VK#g&w7H_wgiJ@!32_ihd`WvJhT+}m;V=e$Ky+s` z!B=H8kml*-P8CGZqMPzS#k8QViPtwM0oC(Nrr}KD?1~L4fgx0c1I87`z*$A8k;b2k z1nL73Pnt*wbVWw?Pg?dLh=OP!p8YDJ;I`q?f_UPJNgybQBz-?bk?l;F-l!aWfCuJ| zj>P?ZZ*T3Cv0|fP-tjRJ9eh<{iEYCSv_#_ibs)1KlcqI?su*eHa1vw2IJi-IWE-2L z1pE*@gir-7468U3G)Ee#T;Ps|Q$#S?a9{~AM~b6mV5L)P2Mg@}Os=GtO=iwng0&IeDkS#*U`}^ybj(n&6*=Kb>sb_yC!q)DsPxRq=2DyJH*SNn=rGj7x zG%+q5x#5cUW9D`VZq75G?v5;XSB&$EBYdv$bQ#thcu8?a-}_uKQyF zvOgZ$nt8LYwsIap=EyeuEHmZ!c>n+)07*naR06*zx`&5^vEO}>=ARFHX+eNwJHp8_ zBern~RJJ$)(PRdQj64`HFq|SI1KTFPZ{}P6J;2u2d)*Nk2s|DU(4OHLp5YfkL`Wv@ zrm?QdR0?$qr^C!~svPHu)BK6+xpO*ne)_QR>8^2m*I9ee`(TVS5Q!i$5pn?L{2oKd z!|Mz&gXf_(5#@qNvM;kuN^FaGikWOnlteO~C9=HziFhtL``UnzsMY}Xy-6Vw*+~ZY zhU5ykUkJYtv@9Y5{Ts$|Vi5AF+O(`H?NNTl@T;IfVjl&^@oOlSnbwPkcKC^%-FxNyf<{5tX9sK3* zg8%r3;D_%z-(4+S&%*2-ZK>R~w=4Ix{kT{e8aLV z+}+*r(@#I}?%g|n`spWrdi$2QKfdA3n?LgA&BWUuh4&vm@&3cY-R*^SU0Byf8^P{~ zC%xbVh@nyushH$)32a2jhj!(%tvEu2prKoujz_~~1=;ZDfOLiXa2`?>JUVzMJq-XN zxzB7NHy%ihW~7hGLkb3C3n9Z%_X;CSMAJaIf8nGf@(G;pBSiBjsu zYBzU&2M$FDaR$N1%i#ZeyYM&v^%H;lw{Q8!fB3+s4`=$?SeBK`a^|v}S(ZE2^@95V zfi^DuAOGk7&FLUSEd2Ff{f?^xoMdH=hW18V7Dn$d;Hnt)2=7Dr)S>t6yK@uPvofhL zm!OC<*PyG@M)0x<=SY{$(uI$U)9##h38D)zgUyN^40&OkUkN|H65>U$OpQ;AbN3+_ zLy*p{Z%$~g0n?O>r(Xli^UOEje8ZbJZx~~+EDP`7zvuh!zu&(7#CxCNR{++rmmdHS zEexAk$`LP}rZaUwWiTO3Qn5I}I5O5sKUc=5N{`BAnP_9eb%I%-bh-?N1Q!>yDRm9% z3iCRc)}X8c7OD8-&WN-Um`j@GQilL3^uLE5fCK}_W2ErJ7-;L%-mx~A`nJKx#t0U} zDR@z24Xichb>UDP@lJ0GBRah}9q1aw?8ww8DX~v0E!S?0h>0;Q`(ZWKRj|q7FyP?m zBv=tt1C>r%K~#i^(LKGDTm-)j#%Va{X@U-8QK713f1nrz zZ5168>W0QdXVB4mQx{IsajGjNl;TPcp@*Ts9B}P|2UHBoU^$$*)df2^s}I~e7Mc5c+$s(OX3LXfKPzc7>;Pig!ejv<~PDte57@FmtgAs$ic6?6n z7;9B7lW?husgS-o3WngmDd6;=d$4%G1w4#CDdR;dBY6|%s~IoPd(poZB7*bzOz)l6 z8rRp?TwPuL8&Dj`2eILBvd75&{BhbZBc8VW{(ei7pIopVtVwJ;;c?!l|L^a&bNa*m z!}Gn{Hg+yb;zp)|S<)g*1>JBdHAPEaQ&FHmsmPSd172!|I^m@Qle@L5{NYpA`JnJP+`5 z@vuGHy7(S`rjcKbel&--2)805)j290G?)~s2&pk&AX243uLOOe=nFw#2z~|Q0BsU#RVFpMTH^1o zEKbM*(&+>fgI5SMg^nNuLTBHQLY*j`2q9!5-0*`k4njXDIx8WHu8QjkdQjMKt84~7P-DF`LL1SD-1$AJ~dnwEsd(3V|3~m zL=TD>F2-e3^cV&%%Ag`~pt>;+I5nY*FhmG7<`5Jt z>t$uM74Mz>VV#;BfFjWpy<FB=OD5PE9PMV zAR-uJ(7dxOaC>{p?d{C@a@jO&xAc~Msv?hNDN)_t`@Vfyvar4Xo^Zdd2}Ogtq*BnqMh)%?Ck zD4mjsmpuBeC33&mZxMn)O5x?#R zhD1kVAd&qmd;7LB6L-lqJS{A5&s1=Qs34k55p-*o28bX+&WS0KkuM|@w+cQ5p^^K8 z#km^GG#JjHe)hP zAqCY$vUwQ${4RnNh#j=v5jJ_pXThOoc!p>AGXd4yPXR<^6TIIo&!iyq>2&1f%NKn6 z?Q6dO{yTp7>9_p!_ILdB(_7xX|Hy|ApZN6YhTGd4Zf28fdaF(WMmc}U{KxjO;_-}Z50eMCGH9;ny?7>SiNpc6g_BKKFxJSg7d*E8RKJMqJB8{dAj^748xRWR#VZ0+%U$Gus3CX|g-AbGTkhdt~b7bEgq;z@Ak zzOS}+8YwWLS%w-|QOcAFoa|F1P?J0d7C}qqB}>dM*%%pnBUA-6&jw;-rQ6v94a6iH zOMpPA5?XL6DJX@~LummmgpTI9BFG9N&oG4Jqchq{Usvc2BPg|Ssz+W-2VNd0UR_`D z;^iw&r)y59R2Dd0U2!~JaXcP59FO-N2GZha#VnaNwN$2QVxIF^on~sS>4jkXhVF6R zXZR`zAmK<1!gK6XDImYMq@6sP#E)QC97tLhcsOGW1m`dtFJB#*W<|6?oMGG%y&^UN zmSsX-jr^p;VK|H++=zPO?R21f@Z*QhtM~7yckd{dTk3M**jE0R?_cpB|L_X={v|HT zPtEw7Kf-_cf1LmQZ-RG!H&{M2#`%uXZW-QiZwyawXS>HYiJ*1ok8j@dpa09>^6BOS z|L_lg&tLxjTYmTI#Oq__U@%rst4QSctQx#Hbxb=qw}Vf2Libz^F9HZuYiLM?(UA>J8$2o zrcx;2h0(6)=PQPu=;O!|o8Yw$WF8QWKAi4B523BfVRf#SKv!p82ek!wB)@b7!=VR6 zC?XDMeq#s&O}fjlG=7FNVx^DH7%OgTTEFXo^*~3!$UGyI9_evpm_l`G>m2HZV`&JT zr7c{%Gc0JDOi!OWIyo|Rv;xuy35FOjc|}EF(^}{?Fn^Mpb%&n8j6pg zuTH-N3|<+{A$M6imn}58@67k?S{FZ?-d3rMva2$g!U6U3hIW$frU;F z=r|V!YbX+t1`|Z7C=rH-Vkn0&M3e46ONY`~W#MibSU5w15K`ztf^u={(kY8lZZ!Ae z;{X{jcWe&AK_bxR=;%oAL@2!oy$GWSQiQ1pTCz=LD76{)9x7&GOxwA3fwF)tsW_mQ zfn8cEj$8&lU|1vOLC0A2z|evKHVlSfA`sv`>13_s88Zru(}3fO90Z-5Sd>^57Go@m z^vy`pCOlALr9=uAln9CsGzx(*Oc*jVs3}~m1L)ub@sS7A2I0=-n zWg6_BED8k^V+53Ndh0A}r?yTs#l6s{iSe@Hr$V^Wj}xQphni>j*Fx`|+uK{#b-njG za5x-3!=3*$u;Ehow~?1ELh|;D^YJ~zle5@f*rP%pKTh7@!vlHxe2crk-@<4Aw+dC{ zQQU^^-G@{fD4L1`W{MTtLjKokNG-_bS%B^8r83Hdm%=EOrUj4OFVzD^Fz8%3bF(_{ zZ&yCNKhy3mNNg92=;RBHBjaxNXANV-QQlZvzX1OAt#c4WL5I6T?=tu1HzYDGeS*6x1sR zpO`(pn067G1Q%%_DR)&BW=li$vjxX0917c2sH&t~#|&xq0FEG6fm{V#1>+Qq0<8#Z zNnZFQn#KY*PjTjOA-G$dHU@W>&hlxc-z>yU&v4r*3)JH^r-S0h0~}_g!m!SwouQr8 zIztMb38zz{p$;la$GDThB4^T5b@J|CqP)ZiQaHOuqsPhsh6tm;5M`7URQIFe2Voq9 zHY*-rBd~C^cepgfCj^D4Id&px!$^IT9aJ|NbtS7Z!gG$>%DL(Mr?>&igWfx-}c6rx+oxeQXqwF9J3 zy=udBpt@q}s5+PvGH~g{q{OU5Q=;Vwp&vW_s==|rp%Vh$jld{GelertzT&;BfZ6;o&?D}WrIJobqfUjzK{&Ax8%e-+a7>vv(Na+|>QF4R zYfclv9NfUQkTfxijJK*|(s5Rt3cldhgTCkrgZHK;w58G3hBpfuUmQbd0;`6B&6=Pm z!ZAX2bo{}D`;KmVu*Bu`brmE7ox!8|*QPiBW#P?e;0=BjARBW)gq#v3XXaj*bD?M) zjl;4yFL82ZR^~<d;oGH8r*Ys_Z%QdHj5&<>Hilp%#B+?RPwRSJ_sdj| zHn*nblTtqcUzXTXK4oV7mGFuo<(GJ<=EO!72#fJReDAqA^Jy z!6<4Jb+SZA@mgNOHvi-Gy1Wv+`h9~pc!Pf$Y|0Fqvfoq^B`frU@ynpL{5s0*=MWv~iVpfIRLL<;zXctR0E1xZ+$Pjd{sA#}vYLg5ze_X<9CO&7dZTq}r0eFEaoV9y}uSWg^W zD)j_yGq&4V0sH0wE|9-Vfi3wr?7DK^6I!FUL(MNMO z<#q4T&G8TL7Y4D%P^S!6$$5X1_~mz*U;Q%kAAe)~?w5^UziaGw3njO4M7@In=CO>A z>5^zV_J=JbqAVkhLy9CFO8uLXU)(eHY-4D`6T;gxLMHa;6y^Q(o}Qv^k`vZrfX1~a zLUAySdlNwvC4mf{Q+Pbg2~wix%#!y!(aoba*E##Cs0P4gaU+WU~HlibK1812c4{=e3DT7RY8-GiZV^HC%PCW9n?@A zya^S^68k%`KZ3<$Q6FO$R2lezgM!G*zA4DdJPMchQZz74_jn9uF@7wRH{y{*IUTQ7 z%uyH))O97bP7^N|CD5Kx3*HEDdiTL7nb z$0fd9xA@pnQm(Fd{Nfk4+}y$sAD>y52WWRl-63hlCC3NiN?ZuwxLd3_=HP;nIje(& zvhT+_VfP9Bu#z7>vOhiY!=FC#{h#mn^)KHu-Q4lV)69SVLHI9!aQ^T|W4*8R=Y}ta zcQ7{~)~O&6Vl0kh?wr=ffB)k>r)A;$?;rRt-@WIz-|qRvyB&8|g)(IhMcGe@xnxuY z69bGncbJ^>3TuaIV4^hZAuN)H_e!dbok8&#F>o3jzS6TWBU4zrtj$Xp1xkw@-rntnipy| zW=TvGuuPHI(~BmuCc>amSh8}?;Wd7d@OubG@|Tl&MGB$`5k*CDso`yIx5wLzwHaFz za%$*PBR!v(>La`QK$aYQ3o^wsS@&p9NZX^Ph|icL&N$6GCr_NcgE9CX!T%6Tm}k@r zJ1dlwK#e3vC<|*j(dA4^JrozRhOuZ;@{dF~LV5-a#2lGG8Ac%nbjRsn9d9iZT(Yv{ z#JUqGf@vtCKp3n|qSTp?1I5_FadX5I$gqprzFQ`@au+O0S8C-jeg45Ib)}SofFbJ$vTAwRK(Xfy|No~>&&f@ ztzZ;Z$t*hIQs@R~1zV8504PD%z9M~v-oj|0I_90G(08z!u&T1miL-R#DQCLwNIug= zXx7lWAj_F_GTao?LZ4>(Zjb2%l4-lb+erg|J8*Y;@3=b;4-c1<2lo3trIdd;y#6Qf zra|UhuJKRfKh+z7FA7{Pw>n0fTYZDV9oMhQ3kWY9@z;5mYZ=$uFF~S<#QyaX2q!LC zq<6Xm}$7Xabp>hnC0q`&YzetyRVzEYoG{(YAA#k~4U;$MKTwH;|MHGLU-`Tcz6 z1ipAopT1rHc%L@-WZu69Um4;{^rc0;T)n{?yurUV^xiq2&pbRlTnq++NA~Xr%wG88 zU&GDL$-`&>C^*&}PL4h~JUTu%Y}e^?r{!=;m<+OMoaknSN@HQ=%t~X$@Szisk)9Br zLLQFLMRA~~2I6>kd;xnRl&`DcO9oRsO)&ScVwlcQQYewD1pUWM_L)SXx^wi#;*Do- zbc`a-wF*Tb>o69WmHn(7W@R@iQ&vK-KOyWvW_)L`@A$sq`%a%j7q3b<*{djB71W1; zCu9dVIEk~i&f}@^{Is%uJmL2%+@JCLmHF%(ZiL(G9sNd-Hlw>lpE|vC7U>`zk%~x1 zi4zDDhPlziXmq+Xis(2MFmn~Nf#96Iawfupv#KI8K}r|}lZ=m327D*j6!e>~;ds$& zL(VnmL@Dv2NGK?%CW^-M&nc5^CDle+!)PEB*#%ECR%R^Cpo*B1>PkYxc3cfVGlJsh z1X&Pu(v-jqbwd=yh~NMKAOJ~3K~%Glu9SQgj#o0_%|00UKasJ9&a`6gHw zga%rXF2PT+6{R7mBgydG!%)Tqh7OE(a6I0xz0-GbEeu>+(q+6(qnpyZ)9P^S*&T7i zwb7@A&Yl7?DES}64|7)vtFQdpBx6edw-Da<5_IXeqJJKfO=JtZV3QUc8$&kMt) z0%zP8+-ew3csM{)Q=}`l7*-8$76us!j4(=A6-Z7`P8<3$IyC0RLzl|g(fa|nj2MVG zqK+g7adab`en#+SH1GyL8`y-@3dC`V`m&&DV$GS>c38j0>K)b|Iq|@8`H{2SQ&QtF z35Q83Qpt8geL=k8-cdL52q!}sU^i62kD;^BGX`SFSK z^MSg4i`fKfV+;a>F!aKY5Jvxf^dH>gH_{v@m|Ldcx)J6!D%j#%h2wV{`<5m_nv1+7 zeF?7;w{l;=YsSC}e69G)5dBiX1!C|{aQMfwK@no`N)KZJJ2Bar$rcJLGYfk;vtwbF z%B+o9JF|8s4WzXsrznAFDI|%xi`PgXT6vj>%Y?ka8@$0k72?|^)_Qp{dYf|Mu$#Er z&)i;5++AgE4~h3TGk^YZ;rov(KR#5RPYcI$o)~6v;DPAl<1P^p1GmBSlvql|5NTj7_B9vW4Q$1S(Ew;LjC%uZK&_c&?)^Q398Eeb0t}Xw)Za zLFg!;9{tEfS|Zl}Ob}%mBuf}94DWNAgK*Q7kd&}gs5-Ogjx)kp>hL5t<2r#%1UjNf zGDHJ&x;z*2P|jpKRUmM<%~+u1!juaomAFoYk}_G-1@8;b%&!7t{~EKk@#Mb#WZ&Qo z-rx=XZNMf#_!={P169S{heC8_o@dMsm~~ofoKD|xJU;XM{LItSk>}@U?(gsU;fL?} z;~)RX>AYa(fY4gwd^&SFEu5Fi`CO^BUJL}hdl&|^&RQ#VjZ+Dld4yVrk-rI z-e|S6dKf1_?9q;bG?3SZ2C+wCPSCj$!5!a>2HXP&ad3>-J7Wy78Ad%(U|aA#$|14% z3$X3Qae4XKiir@$)LAI%OcEY}c01VZgqthjyLUwRM)>BN#5dm*zS*x_%_$iEZuGJ5 zziw)ay&~C1UxYXD{;y|CieT$=ahvs37P`y*yd6+>J=4y z7z;Wc4A9ZONYoR*5;#F-;0_FYL86nR^A5NuT_&1bv2w+VaJ0l?6U`DG#nIp=7>Ems zfpx4myjJ?U(3UgW;|SedE?n;mH#a-(t`FSZ+;DSy$JNzUoG`FIL^vFn=iNAEU?S&2 zDTOqQ0Yt)JcRPWgltRvhoO6^*=~C_uz6SB;#_1yw0wmrE;#F`D2EPDmN*HT8?uLlq zX0%ryoJc4+USA*h)oI?6 zdQOxMYt8txp+Bya#}glZc;fp%J#lyU0M|Ess8{@O+HwCpu^clj3tlVU8$tUtkO~Ys zW?O(H_&eR;=~!9T#{K=7fB1(d{-2w|``g0Z?T+i~1Md!n+kN4BKQYY{IuTAA%tENw z!Fs|xQC%?)0iBCeEnzj0dSdE{yb7ERJAqZO$q^4vN;JXGowIb*LU~huZ|Hw@_~7si zgl#o0PIn9C+Zt=i&jmz;!{Na7^)<)ik>l~m>2%`h>50SP@OIk3&jdsu4?eWH<7RXT z&tr{_kf@^QP#|}m5Wk|?HLc##xuyF}C?)HPijs53q=plr-s$30;v|r+j-?K{A>P6` zUxhA0b!9ccg7=>_=ujO*z9F=Yac$7Xz9|eLb63(hNhJ4#SSY_YFC;I7-#!iez$)4u z702ngR(fhQg*6FYgftma49$in3vX{pkrYbHI5?T^g=Ps%$T|4JO+)w}=a?k0P-4dw zuVB^bHP(%~8rm$py`PPAaY_ub^!@ohr#aJcpjdGp5hra0v8bz)MFk*97z+R z9xs^`NQ&r;l!BNd6C`zV@65Hb%PV_X$==C5lhw#Lss-_m;DKM1gp1Kt;(3X47RSYD z?i7KQ;KxgFM8MW24tzq$vWqdCR% zObHo#st-X=m3T%qf#+Z!tq#qhn`07evZg{SN}GWiCnQJ}(n%=~G8}Uln1fl!bhhuq zGwKO9iMebFEk(sRo*SfwB*nAA1iDMGSJE~3!I2?{>eA@yunwN2cf>m4Jx~PVLkqk* zzBtu5A)xDwdOY`>h2F+H(t!?U&^nknRh)G;*4l$MYGI_KndC}xNIkrrrW)^H z&q|vUeNObL;3?yC4kuf0@Hd6cgRj{zd~c1z;lMOa|H{UKfm6MvDSRyWGOk}`{Up3D z^2POMb;zr@Y-MmZUg%bIyOzLqCy&*wy*!eAgznLOt(8kh<` zO~Ytl!iLd6OBvOIrUH^^E~E}>4q|wRDv48v$EEY*v2uJ|u;U3iosnfh&I{|gBBG>{ znfEifB$5azLs3PNgpv^8Zn$+qAUtRm8y zUGmGk&%3ncZ)ul4ToPg$UnhTF=U7tN4oCl@A-PRy-scxKV7!I7DR`~dge>lR2BlFR?Jx!dK1GP0;ht?Bj1(TVc z_gL!W-k7vAX(zntR~nTnl~q<2sZ63w0@)J>;-mp}=UG-V9el;(j6OIQ-ls=J%(Li20+8lL>6R?Q64c2kTTgF{6 z6-*cg1A;G#t)a^zEd|eIIIl`YhQX|oiUxP4Ls~#rM;F80=qAwe@TpnoEz|Ts9igWk zB?;(CDrd?*Gf$Pif~nIn%=^&UsH9X;T~VzxbD(40Y0aTZ7!O#SIDiG;TzWWYQghTB zN}M|B8N3*j2JVPCVxi0GKrc$4m330qtR&AASEOeq%@kDvm+wfc!8B+VYMgY?h9|`_ z)GNiu=@hNgU8pK_6BbvV=*$RC7-38f*_>j|Y(vpsoR$SI;Kc#MGZ-DWid%)g;JxC_ zhcSa9eef7okP5?x)Y$p#0j<}&7BJ`w+q=Y07;erUh zwdApY;u7tN5Ya$vOoAB{>4J%XYA6N(!O(CR$Pj>Ognp@3;F8N|ChGGPjP>9pNB?_r zYVOq3gHa3gB%HHyEQyw)k4+QLdc%kOTON7Gxhv-mkLUON@&143!^3ZQe7<5ipQ$xr z^#HO%v}3A>@G`dO)@2$lMh^f!1h->c5JnNEVSx!wu!hD6~z?n3Mx!EE17b`d%v4QPQ-Be-1O*uBW2=Tv z7#Q@3hhQO!UzTi-rEaTA1Q19G4tse29)A6+!r%SwmjC(hf5m@z|G?diG0i6wa3kir z6Vg_XVQ%%HV@kF*I-nZ;y*wwl7$K^_2+=2x8wFyYjQF-hU+zhQWFZ9J!5~oyVXBBA z3Nl`i?Q{W268B~gWyDMv3M9d@qB)aFfmF~G1iNU$B~D`6f{O+|k`k&x&}X-c5Gv+< z2z5MgnWRc4JZG1o&{y(A$>X<7Ou2-hD!qg>j0ZO3%ukIpFF$Ya25;~N|EA%^Gk3sW zQe@upL+5TlWkoeH%@cyKEb9n0e%Eyv z4pdrg^wwy#QEPZ`G?$@N)2ZvqQdidX#9B`*>xqZ^k32lw^Z4|{^Ks$aD`t*&!A&qa zArS~HCH4RZiOo`kpmiOrd+gyp_R<*j*l5Rwhm8Df?nWF&y(4*KUuN}Xs!<6=JXPX| z*#i3su4cHN;rhzCzBcY|gm3SR@9vzto5s!ck?U*WCNIo+Axp#Q$gBEf5Ut^1Jq97f za79Mj1R9`>dj{U*Zt-l1dj(<-R|rQt9{apH6oUz521zl$f%pvENbI7(M;bZqxEpL! zIof`yKiY~8M?Og*1(Vc)CKHR^@|=Fdv)yoVc!f0R`Ck#v_i~&;4 zq!h}cz%Uw^nCF?C!%$!|7I-rl_(x!SWxL_t0YxNTKD%w|HZ(tOiFYat2I95Zj0oag z6a+_AxxU)*n_qv&FMoO9KmXtNJbe5}?+-}5C5%o}RFDgJ0us0&Zg_7YOd6++xhJY* zEGx7|lTLTXyK<_D=X2%bj}Mf8c*1rcS;{Y{dWYEosS4{e-jCo%#21h=p;|iDEBDJl z9WYnSjlLSoy72I{@Q3e(X;SvP!qvX;eka^r6|N5l=Bq1qhs2>6*V(zAGg&jPb9_S` zgb)&SaTeQM#%Q%-tFs&%>)PqVTas2mPKIlxwvJ5Be1hqF*!?$$KRdMwtH6@L%rOI+ z(^sXhBmOgk+uK`CrxT~s3HR_oyx;FR91d^a`_BNRWy<3_0E~>vgfJt_WX}lU^*yK9 z55y+$iBKBoI4f>PuoKD7h@HU~GAl?A0b+?YYv)c8Ly}>oQCr6Z)}ky)IeVNspc3BF zRKm-BAqDN+ja9=dplBcaXd%mO9U?PHCT1-Z1=WtHPD_;~HB39_ ziJDMsTg?MY!eJG|xzi9){Qil`@ z4Wt4hh-8vLp`+?dsj@G|VG^!)iGxij36UI%4W4l+SW37GUcgf4EXG2odKhPP8eKvm zf0f{a1m5W3^FbxhEfErs+!1rsjU*oFl1Q3K6x3E!I+8k+6;Do|3auzLE2~bxOx7Ja z?MZo$<~^BAPcc7=(>%OKuHHCVC~q|DG>;vzNum$@Nl7HnSkKTBX5)QGp#aj7(6gWd za~FISx(b*eT|%&Lfj5K`u!LHU=OYsM#&A+WR+bEs9VI0XJw7$W zNy&kbow){XUR+6{bPeYhmL0R9Ok~j2aYDgiacI3^i^H+U$pQmGcFClkDb|@WW^qae z6l^wp-{`xQHbYODJQsY((B9y04|N!_FUvx$mFw$k_WS+c9M}IxpP)M+<0T|+`0QF< zCBM%8DmX9r_7)$JOMF~!0fN6O1nD-4^O~d+;@eb-`;#zuweg!&C`p-;QnFAcrCbaK z$_T?~poFo2O%uJuDFaChq8ZhUqcjgi9qpi4;G!wfG_k010-n2J%Zfjr@TX^ZI-=H? zvoK|0E<*B-T<_3brtAwg6(lQQhz;W?H$sr@V}fJSF7(Q{mMAgiEa3HXc@4s6iQ7$| z7yJT7ZJ(8kc3!nCUoQuIHSIO+bxHd?;Wdo+@~dS&tLsumUamj6@2_opEBBST@>l8G z+4sMoyMMint$i4Mx}-&HOWX24z4!_EOMQaNy6|ba z;2M1bUnIQ28@$24G;9WqkB^T$Jv~ut{k+J(8*q1e4+GqsgD!myvVts*Hju@_D0mgD zIo2AzFEm?ewy^q&S|XX@J)yObs*t;qO~_s16aolk;38;ANYY>>VyF=ON+Hw+%<*-c zs=9)>BQ^}iJrT_DPMqLZ=&3@2q6s9N7tfNwp_r1pgrc%L2`J+1L^vp172&ER_Pfmf zDziJpiMXOWM0Tj|LH1bbv?)g@8M2anCijVKnbZ?m6;B=vfZXXQS`<>~2v!%C9*!sd zbjBYa@sAJKbK}@2p8Jlce#Ld4(SE?aVOg+U!ODRhjqDwR$#@+Vq}cYkU5|znNA5*tvlX=9+n!{bVo4~Mp6%_D+*Z^BoU_q2Hh$K+T;0` zmY7;2uSP;}Nw_BHp%}iM5ZNtZk}xv1Cafi_OFVxQ#1-`v?}3AggoCD8NW0Kk)aJ-& zUn)S9#S`JYQm|eyb~qDIKnp@ANv3FFPK7yFnk8x$tPCEvI9Y-QFWyPLqg}Ap>Ag{# zAuSv@xPx~c;;~me4i_MJGKS-{poJxzG$k>T8fiR1a-wSn3e96K#Dr`F|Hvd1@Ed3o zbQNeq$gdntM$#7bS|jyNHbdRw-sE_9T6Y$UK4ue&39S!4j$5STOwv)4@cHR7WRQws zSnGJ}&q+>SK$GElj?FnYnq8sx zh}(&7E7>ZwS6UAXDZyex7y)-esBZ`jl?A26B8_9O7`?Gp$6}hygBytl*Csir6RWyL z@X?A1h$5mO5_}JJ(2dx4V&Fn>eB#WQ`EBxJWA$AzviRe@>~<=#d$j2^5fHc9v^?f`E)~TVI>z-zKnVB zmoW^8mH>^pbRjmzn1o9jfNyb)5yk(JF@`dVUXm{@$(Hge?KNETznc3Mcq#juJ@aY% zqb`BXdTs3OV+=3Kyedi_`>98W=`xlvX_NP8au7*Xkjp*ojEyEGD>o$#PApr zMg#GVDme*LR!U0YMI?o1kxx81zv}lJyulm%^I$W=5eWfdd~=z@JaO1fT<>?hzq#g{ z+aurJKJ(qXC;s^1kw5?V%!m7xkB`Q~v$3p=rFPcZXmzDl!@6QFxCq9CQ*cX|JK|1| z0|_01t~ilEV~tVcviFFae+U2RxA1rWLHNa9xGscuK@)6rM1ad?6)i<0 z%B6om?CaZ{jWJyIH(~6(FV`@xw~%1iiG(M=VN6c}GNE&oAm(3Q?%(R&jPLV^rx7kj z0}Lrr!tzYdH;^VgWm>vr;hJaJ@gy_Hq@1%++0oMkIiZpebJT(`o#<3=q`qKnAzPz} zFsH&!m0g)QOcVQQH^QFXG&7f(se}%E9<(l zu5J9TtZNt?tV|71pl1x?B6RGehK1@4YvyP_QJ`yfIz&| zUT_P%-)qC$5!!-V z$2^1sS{Fn+eR;+>B|_HU69kC8uEFj`qQl1a=w9Z0xI%o6(zbZbmaW8XR3X5$uQ}!CTEJ zJBicsy7lmiZsU{|_u#*#nMr3#lHkb;m^k5jNby|oJd?{5A$78LOd5+WbY19TJgIO@ ziL)w;W{_Aj1_OlT9?IOF5zDA#=m|LqoWajvC+Ksdc?!jH6Y!3k)0@!FMqexK1Z@q0 zf1AJ#2{&l_6rSgk(#JXU=;m}!I1QHy(n6q3M>I8?7{{tk$sBF{vz6#8(=XqgLD%C*cDDO(aBu&!dt^BzjW1 zhA~N8gNu+v*mu~S!GP{zfY4p&00P>TEIAZIx}ZIgY`nXk!4qkmSlVT!>Waw%zQjkk z3cjY{uOm@CaS|nrE`*Eni8rzvj^ip=&KN?MLUmZ(sTPVZEsf9(UmD3gjB6B{hLgqQ znY}qlD*Mve36Pw1*RUPbeaGg7o`gP43{OyR@Gk`JPObH_|6gBUb9HsaG);e#z25_c zy1=J3MohMUe)&P*rTy3F z2R1{2tV-4)r^v!olrm+eDN&|GnqQ0t(p->IusmV8U^xs1nr1{3NN{FNgo=>GL;k3c zvXJ+Md^=OVxnf#xnW!{7)6b6eGj_bE%9*1qe5}vN^GDjZH>}^>(BI!8-`r646EbBI z5DH!vVetALBLox?dzr$4$$R7gxXa7+dCyDa%RJ{&=a%`Zu1Nc& zmM<&)EGAzjN9xFkw_?Dnaxoh($y+^fJ!bCfm46CfWXr1xKZ94p{v5XX{Q_Rqbt&S6 z?Bo-8N$|_RSMg7iK8yRO&zG0_FTbz){?&S3|i`6T$4NuQVaB=$M{jXdwa<(Ey8;oR_s+Vm?DzZ6v$?v5yFUqHpyy6HcakkghQ2WW zrB(m{AOJ~3K~#q}cQl0(H7%6>wRciqNvaRX>)rqj8KesQFg$4gm=Ju6K+kJ zWyhp5a~?eEd57wbl;T&@L`zW14lNEjj4ai!k#vLFE)3Nrj0VC0vrvJSCcI4OG*Nbi zd6)4Cx>hHQ#EQuz;he=*Yy)at_3%!*#oD$dE=_!m8WUZlao}gSMT*6qri(*Mw z5zHOTspeEOIvqC8_TGkZch*JF3QIGbMw68$m8LC>JlaIJ39A!bhXaZZ-W2JXWPkubQwaw@DuR6?ANU_1^5+R} z@CH985MW3mNt?h}f>olLIdIq#;s(h766w@Dl9NsSD-j(H{jR6b3>XKNfg`wd1O|Cs08c!tJCNW&XHv z+>hfDiWjc07ywAzCnHS+k!Vgds)+Vq?uarYW0a5Umm=GEL?cl|2FFQ3z}#tOtll|! z(S45h%=MgycsCjlX!4W+>ZmNG3}aZ2Cex<^yy zN#AiV-}1fxf9$<&ljOLOp!v81AQ=&vS=DN(TRkV4?d{pv&Dxsn{r{h_c4zl>?QC=- zZL7MvDyuSrBmnpO;ACV}b!%1{X?3GkdK!`h2oeB^fWN?BUi16%n(v0he3-(!upV#? zK}lG9J4HGNU&#GTuTS)PX6TipDyK4V5}3x3(>QWBj-1Ac<8WZgBU2t2%D|8Z#uDcx z7fQ-M@Eol2tT0Htu4{UM7kGge`1c6=n)T->HONLs0HmbkP4bgcqW8|}bfUG!@p$C* z>(?9(N8F7)TySd8UXys)mz)=sB4yFPPAcvn8@%5Q9ZF@YDOWXIm|f4dl0c6#8Z z;+DG>0h@A#1V1jqFv2*(G{NzZIh_(;JiyQQr~7A!8TqsmYG||lqk;bUp z!32$|&OQ+L`!{NZ;5Jm&zbYK{U%}}P0RMC`3 zX;c0KRF#}_L?KA&FY-J4dxM}IoroY1Wj}mvE?IX7bG`i z5?D$Y7l?x!&Is39C+=**rAo1!(xn#?t`icyl}nQ-UuG zd^Kf;Df3|v{<|5Mry921o4aW1ur3z-(N7FimBZn{t5>gh^X3g^#^rM1-Me>u_0?A| zzx}^)&^j=@`6ev_vK`YGzEzY_So(S}0=+f~1jQOUE=))mYT& zVk}~GAPt#374o4_rb0x6T`?N7EFfnp#zhCtB3v|4Nra;W2Xn+6H!ud=MuZ7vfR9MiC`+2 zC%Pq?mDt>;)1+dhf>v~B=;D+GhFKV9p;SoL5nbu}fmy(E7=10NP)nf?1)B=ZD&G1v z5(GHaGu1QA!s~pNg<>;BXNoT*8qx({Gd^d0PIw=H0g(aG0W{-D(9+1IBBh~PIe6og zocl5Jphr$+V4V~7l31J4TcYd0%D|G9_24Xbjm0Z-t<<&RYsIZ%BCJKIS*Ru9Y9uiV z#y}jq8M-=yL8`*ol!BtEpm{*kgr+Gh!+gez;ipiFsE5Qd!Z~N2bYipvS&yWA$5ie) z*db70nptp4uT*bzX)H9FRL-)Jypp`2o(OY+Od)UQib{CdY)Ppl(FSEL5osZtFf?aq zFjR1N7Mxk(B95u!3hJ3`%Gi~nO74YZh2#V31yJB4U7V^*{3dk4J-$B`SQ5>Jsv5?v zn@p$(OzuqH7!a~2KtKi08Iw#^HiH*uapywJRhKoi6XumPSJv#*EL^g1$&h5gfrA=a zj4^=}hpADgjvN+j5PF7QMs{z8FYrGP+=I@%TrQZ|mG7OViM%t$|3+|vGK8T1f`W>( z8#3{_hvyLVZwR@cX~`S7KF4{cPxgXa5RW=+kC7<4(%v3z?dJ*zq1YmTdhmM6b4puM z5t2aGEgJc@S6H$#ltd}|Y&0-r@|a0uAx#665)nJJ&~u?{K{bquf?*YmFviO&WSo?Q zl$CtQl>3QcIWqK>(q_1PfOiIc#-AQ(Re5S>dHxUc zpY|QN@~^jLKbF6(|9SbI_JV8On|J$T*CYGGao>EgKLY;PdOw7J`<*ZF0x$5VLhqe< zo_YWN{ncQ=Zx^7yQ!ukX7;WMB9$x>Rz{B0})^Kl-JMGxf44NdQ$$itE=#6AbcA;Uo zI3kTK9mGlM^z8Hj`p_c+mwIkva;W=G#bxffutiBr!6m%RIrr~NdP*Vjzj3>P&zk33`!4dw&I~fg0mtlSEB((TOs#Csfkn- zX~L||MH(WNSu4;{>8M!Hpr-_y=o?RG8uV=K(5lmFDDpQt$r}@uG%L8wXL27DfYyF1QKRHaUt0{hHjv4`&q;2~5b%DZPbmPj5ljxFc?X=-qLN z--2t#G|{C~tx>JgTBS=TduYo{Bp$ZoQ_8SrEG23RnGLnk*N(51x;CU$v~{F+q{Y6L z3Sxd=>5mTt%?ar;fTXc;e`plNSzzvdfHe)^Kdlvf|Q!73mu5N(Ze%S&c(&Om)Gn zF^gg9nD&sbh@+~w>>S}w1O7Y?`2}9!ZwN#r1SP8I=5_JDHSVS(LmsIuG3bH&bkAK@ zst;5@(wM_*iPTVp^hWqas0a##0KZ4NGTpl z2V2Nu-!|4AMO{MRBZ9=PA}F{hU?31EGl-U5#f zA?Qzl2rkf*Qd4G0iJs#As)<<#E;2HcXx6#3fyX}ap$&XkGaoL_`RrVtlx1@U6EySI zzHEXn*~_9+yKD_&L{Z7N<6gLze|9dLXWl+O@!k8x+ehKu2j|_#mB**XWo|6Bver&(5$3B~Fz9BExdIuH!mWb! zRfe(u##OQFpI;X(H^XU|cMuIUB#vmqwF1r+<9Wrsg5KaNT?1E2CAcb(-jKRLT|QZ} z-5o5Lu-F7QygRfg2YKGLdA{q4EMfyi!5ry^ciEmpthZ4OAvQ}g8?IHMBsdo5I5@8! zlrO$WeD!nXAAjBX-~WU0>t8lrz52lYab`55)Q0r!*&E|52(iEmp<)M!Bl?VUL z!aUEo-6%Dv#zw$KO@!iGwETk&m)opw`0bjHf9|m3df$#BgqOn<_~UqT?oQ6#op5(I za({o|%UAG=FB-piUAa3Yjz{G;sT zFY%rByPo?i1YNR&Z#=xGpd~bIq|{cfN+|kkJWhN&ZqGAeE8KWZ*|xBiXj7xEZqJh> z@vQwl#u?@Y2qM878oGHBhPA`moT^SEuG!Y!K5sAs>*1xQ)u032Nh%Db1RZcVay%S4 z98Vk%VJt8m4ort5)3h56OgBaY5k=tEfDj;P>nQ(oe_MZVuuY#&ayxx`dnM*^Ol03D z8?aTr&bh4++!2Xrho|Gn|MnmMfq(qRuldbCKl1Uz_pHkuS7Rb`u!hu(YfNnS3zSFD z3*oibp&P6#ZWnAlgD;ReG7-CIfQ!d2lehRWbcy?VB{cB&F)3hMzvJ;&xE&kMYfh|Q zLJ;`d?~C04IO>8&AAlrAl5Mkv&7p%-=3(J{ypT=@@^L_>jE|W*DrFF+lrfuGY=JGx z$j;Yfh2x|=oD#3@GV+kQWP!`(W&7>cD~s;;7C*(GB23f7&wlnZ9v>gKXgCph;Pva* z48w3;A70>Z52Ov`w_nHkVCb=vtfE@c+zFNS`iXsnvgUw zJ8HhC zmw`SMyc$+2y%Ej$Mt`Dsg|75E(3gT=2D|~j8y03nrjWUe zP99g1RdQK5Nav_<8VYwMbLzsy6>4~}^QLsotYqe#m?!6aS7Wn&sWieN6|G?+w5-fS z;xfdBbe1p}sNNY(kRG1<#346fSQCQ@q6M@dX#gE@zu;(i(#)NfI_qf6Bb;;MiGne5 z&{yxNNA?1L71$N`BBEXxz#orCN-6(lUcd>G z3!cfr9d7gcxj@_`+wu0|4eUkk@JZKh^PgRlEiW5QEqfVPwhjfkmNaK z5sDIuT1m(`B9RY883v`4L>YI3flQhTx*H8>p-Z7lrfESXf$Y4dUYvuWeG7a3wmja5Tnnysu4B1rO+J0b6>c*QpH6N*8~DF+?@ius z&ObZd@;`ju=iRu4Yu@90Prx=W0Fmc8u^+GXZoj~7`5(@I0{g_gdC#A~-smU!fY09M z6@L5~FYp2{@YjHOo_TzH6NjUgT30i?V8Vq2ggDTR4hgAwcVCnTTHatxz^oG2-g zb0U>Y(gC#KI?#1QB@C&P4}|%6#8GnzzPpODno@Jc%7BiAatNaV8-P?XKhxVg`uvu; zE^s+Br$;`F-*aj|cSV*?PAfx(aRI4H>xHprS}RzCkgbNn`QjDB zX(AhcSiprsX6%J&%}leB7Wn7}-srxvvO@02=%fkqB$O#Z3WEVx4FgaItFhNm&6r!K zd1v&7c}0C?@Ri~nvxuT-F4V5kCxq^aPDDa9iFs#9F!6A{m<3IVOs05--l!r}oY`9N z>mJT6#lzs=lVC~k6j27WXOb2)D=C8)LsRFd&Jp3*GbbCFYNAQZHLx%mFvGe*2klDg zvCov<=nlmU+;M_$S32SyfF^~m15FEE6IGqH&#c{9t6AkJjYh*M2iEfK~VDWVzzn2>rn5wHbpG5TtF z?RaY-Gvo^*Gw2!fMC*;#ptW#9VLgY4-ojx;_idq}6oy<#TId3!j@*?auf~CzM%K2{e8s$iM|6Qie>NQO2Hp@4t4E;&7J^@2 zabnX^2+O#eZF0JMl+(##sKyE_$w;Y^^n#=YDmZ}%)r{m8{Z~g+P;o?q6_$N7lT9UY zdlj;kgh4|rn7${KsIavY*Bpx`5pi4vQ&_Xpli=zg!kRNrL*|?lYf6Z~Dg$S|<53>C z^ukgbmzsIMyyngFOWx1-Jk2oAoppJj)f3(FCTx>P5@U$bMBa&h#kCkwjA0|rc>eZL zFY3Ay1e`|wPv8%%=h4>Zr9TFL-~qr7uSI`uAG?om&qX%+zF)Z>_er~77q&4vijllh zY-MESAS*|$oV0RC#+aN*ohgOEz*O{VFtA4$$SILDj0O-$sthHf4CI`{``uPYRT0%s z#_|F$@B)7s5E1aOR80zngu62hnbS1za2&Zm4t)7=4M-I12u1)2oV@3E9L&v-eEYYf0Z|Brey&NB7*8g=3j>k` z8MsCdV0X->L|Yo|MP)$}_-&G+=U~=@BQhFfseUnZkv_tYd_v z_l#=>QN#s75CxT>r*aScm8>!e+zli53imm4oDLkPX)_oYn1+#QoWfvWxG@;WB@6^q zNh*|-DP>^DnIUIN%AXhzTsPCMFaHHz;00db-w*6-(_eFq5($NBRmI(JJTL~%2e>MF ze+IkaaL^8W9cNiqmL2G|Bv6+PYE_EF5aJjnc-$Y2+a52t9$$k zZ)q;a!#Fl>-AmZzQza|k{PJh~=l}E{_P!&-}vsk-{MrTiuVfj1YU?I8qO2Q zBjgK6M+C$zybXJWwgMfp!RVm{>qZzMcc|Bi41U8T2t-G3ey}^-%pE6w?-1+BMq;zA zPRN$7*Y;btW`S)J5=MkUxJI0_R#`qa)^%p|d$JzTk|8P65C*&SgqzWh9UEdDVuewZ z;h52f%=|iWzK8G3c*Ix)XpT3>dVEulPxzCBVHkLLc;Iw8(R)P2`S9TbA3uKN)vH&$ z7!CXlL(llrD*&`LR2Fgx?`yetBpY3zCFm;jU1lH^xPW{Fxv*^)Q_Mm(4-6^M@VH+k zJjG{WPQsdn6=4;jErR8crK=0cU~pk{m~cjM7K}s(!YE+lLQ%WW*% z)gUV}C}|SPK|)Dc47X0R#_B7JJBt)n=`7-OhfoStk|nx;mznRyZ|ae)xP!h)LENxuATD==e5(CqbOMuIVpoiB-$#%s=};7MmB>1=n~Nr zx*DrwDv4&BlDT#~geQ<2k}iljGABd|+6%fWSf?}7(uKxMRi$d9X`#u9E`mwMrD3`e zKMA@zE6(Du04tE(8NB0GQCm=7p=T^Qi}j2(3sm3&7GAO?3u=NmBsv3)F*&0qhLk9h zC|Ox&K~%v7Gog86^~5U5oM0ZDW*+vE4QLXZ)EoWQlhcyZli?UbM?B>8doUPvC0k+W z&d?N>0ha-jf@{K34g;AiSc0_(OA+QQTvRxFjBP<_s-e_Sq7S>_YxeNeh&QS zbLVKzp+jnltO>P@SSZC5t3&Jb=NNy9kjihx%)=a4vO3Ne@#3TB*x}oN1ceHJWDk?0ZTqKlCI#9YXIhZ-Pp>Bv2 zBqdw~BcV73G+=R|_01ciJE=9w8iq#d3wXxKLE93;n{;Gkqu|R zz+VN-jAdD_MgzTf?(Xic{KB8g7dX#!+${u^e4CF-LePM*`V&0k6&rlknWW+X03ZNK zL_t&z+rI71^RM}x==8YnN>e*q!E^mdm!J&Yz36-!33ZZ*TjtHR}al;06Bj;2u$q zUJM3)2)oh1kHPp+IJ{F_jI}sEHrg=b`H42n?4I%5N8AMUZJXLXl>4QVkWXacngq#? zWv35@kA`U?S3_OMme5}CE?950CQuFU4ctINmLJBBiIzIr94etRhvVI#Inp}PCH9{g zVoxJyM$a5vxC8Dz($ov?6D~(w?;xF!JW+~lNlAl%OmsQWF^P%pW3C~JTLP?VtC28d<3qD|G8%FT6-X)QQARYHGj zcLwxRffu8J7xP=10uKve389!`KRNu#Q-zL||dVZD%Wk#cca#B^{9-giIE6?BwynIDAJ+XV4a7>W$PEl67<& zqY7kkvIMdS=6rh_@;MvpsBdGIU}s&Fu0n(h4z#@+447CfcoGCqO>h~6nzyAdN=6mXzXx-^;fc^z;$3QgK58|IK zwvG$2aRSjNxYl_#CJ3+;o_Smx(H`EQQs-7^0KliWeF8tI`zJ%Z^H1P21hO}AJ-P$W zf@4SU?WH&#*Y|JdP8e^15|*?Bbu$=PxubH@#vvP1hABm8?Wu&7>yTrUC!!35vT!I2 zM|&wLLn(}8qB(DkA!W)5=W@WW;9^?n5`qVh633wem4l@ zwxl zsz7pl+xZcVDykCU?i^Bvl*2O=X{#sO1GamzDz0096!vh2h;QF7?z?RV;yEHhO5sts zo6)S(iOprqxO%#hn7{jg#kVoAkI%^j9d;^(({$k83lF7moQ@o)k>fBh4FiX9WE#So z@KDOt_%n>#Hp4bia!L$kHz>&06b1x025@^vUf=~@;06Aj!C!lQ+Slv-{8R8x(Ci-z z!!U9@9vH_75#ew+^0S|P$v5A8!*}0($9HeOi{p21dHeP)Z@>SZ^JU(mB&}EvnxprQ z_K@?JlAaHc^xo`kz&;>$0cpMMEod|{kU zjni@F;RLTwl`jsBl#HYa6(@To?f7nMOAWB-5*xl#Jd%KDsM!aOjg1c2>I1Uzdi#0c zec5pEm5Klb3q5^AL9QAf5ClOa*X=cJu%EjE5_MoZC-OV~*CeQdsKj&kIu7m$XQVTM zL`PP==bY|&t9QJ~_q;F0TPFZ=F3a^e6 z52qvdrz5B1k;8NdLxEvp9OF2RBjc9#CAwIfaeG5hLTUKUoh)(DR9M|HhY`4QcyM7#N*)v`DZCo!=^VSwVo-3*+ z#&XYS2c$!yaHsbiP7{yE53J*Y(ec4pv+#CumP2KtGGCnY6I^}|{;lJWieC(`j_v%! zPxxn&QsT=mzx+gD@XebyOw&Xu<@4$<@HYZYJCDZ?0HjhW!_1J*400y>jGE&zVLD+l z(IwN}X|~X;(%fPhkdTEZ1rbLPq$QGNY)QBzQtptImXtLsE)`v2oD;bvj+!V*I3?#5 zl{@bo`bq(gwb7Rjt1`BNTjp4VVjZ`I#hy(0#t zD_FtZXexxa!%D|-5^e&T(NajNU?biX>4w;XNJFxbrihH3vZ14)CA>5H#pu;p%~`52 zi*g<^EjfwACh!XZ20_f}BiNzPhC-hTJs;@YVRgdK1N02@fG>_zgSw^xVUi6wT;VTyZr6g-=4`&S)OXV%7?8cjDt83B_{r&9?93~ykwVxpv` z=nTC(W(oR$^aI*D#TN!$8H+KDl|>ZGnVJR7i6%mGVR7fuJEb+ugd&NY8c8d%8NYfb zECxELIk`8AcSeD6$c$w~WT0r~D8j2Aw9P>}2Q@qiPYw{#8&>IK8uIh1DBmD4FAnEk1=fH_q2Ps*M@>Aq)n!_yJJ5!o^-T=~a8oVNRAzxO7;oeN+B zLbR!b9M`vNv9erm!rft`!LRpkbgl!Ih`oSoihLJhPe5ej3l&qPYYM(t3!H+<1;-M!R_kG!4l-{zAC-+;jNQfz1>R*FHV_ z`~_{-KPmqFX#oBQil1lM=kR%TyB*{Uyub_mPXqUG!ZQq^FV$MZ%s#92_X&Gs;~#@_ zEL|&a)QKZb zU%^+$wj`?uftUqkqq8J|WRyaY!bHJ|{wks1J8Z{9V0;&$YoZMUb(*NBJN)%Ro@eea z=gpft*xfzz3iB$=3se{SL#Lf8>sXmn<-*E^$^~N*xM)OZNTzrdSionaH)!Tmgz8F_ zh`0xWyWl3MMa02{@b->6C3V6rKx6err2-bbXIDyvd4=vq?TwyRTrc1WX@=Al=_~XO z76$H3U`@hoLQBdeITv*<7#A~E5jveN5lym-qsh_iXtqd60LQx19Hc|d(6ZCBM?^mX zCwTE{%3_J$6<-Ie70e1w4q1}=Ccp0FEF@RN72o*yYGLpmQ7s)LkctWK% zFvWeuEJxIf?x=Ud2pUv2-bX!*^AnIlpV2+jC{!)1K4K+PQ}8#mPM$mN3))w-FNiJV z*3hoF7plOL!uiOG(|V`7Vb$Y1sle{p;U@z5^MV(6fxkD9J&BM~qLjj69D!Y6u>a_v zkG>H%-8$VYj09RN;@Ugf8`2wMv4~usvq=^lN3P|eL}>+NP;{tR`LGH8KY%QV8Y&Be zc!YhSMSta)RZZgHnh$`^hZ^1uffTLavJ{HlC8aaq<3I=iA1bTv+g{6cL35n3Q z@5{({`N&h6Fmo>bp7-`M-mc&9INx)bE6WAe^NIHK4Sl)CdjSVGMXUfBVPm$V(?5XS znBi6#)N%Gm15vb%SUd#I5aUpf6n8$uZCm@rt?UPN|73`74cxS|z3;8)+Qv2GaTF(n z4^jX6-8_cx3fLy(6DNG(bOu&NDhF*GwR1|wA$u4M6k#ffsU*hDXkb^I%xNXyaZK*2ap{SEp8B5_{Mmrd-bzU8hobT>9FAE>% zm8Z+h)4cL=Uio-gm}_HM8kePUo>x9x7QTIa;?2hy>q61Qm@-*34B@)dy3@?4-RN#~ zjOIP?U+>gjS=-9mE48kXTBY`x+Lln9QHfmu8h8h>ZBcVJRuZIxY&gJrFt2qJ=I>xP z$+sgn?=K!1>!}d`WWNBo;~1e-=-ayl(V*`&_Pv=hoH85>oKC{so$$qj@YNT>*I&Rd zzZU-Co5U}_&U|@NP6wr=Xv!>x61xP%I_|Nq$LKb(^37*<2*w7YeBZ6vtLhpz}yp6jxqde^v4it$I=0?E<6-j?D%0To1IlgK2~o!Hnl0hjRF=I}f&9C}CA6tx`RGWXMg2XP*T!f85km_`o6z+o7f zhKM$>8x54S;i(i+26hF(oDxGWSEGTP(ly;i8+d^ic!3vqfj>BFUS{Tb#?}^sxaRDN zzHUY+8b=-rA+cB^(@1U`&V-aU!F_>Jpp?yM+CyM}n&9u6C3u7EE z3}fR^;8=`P?gXM1#2npt|Eto6NYDWrsUt!t-|xJ+2&&;xWygndRaD?g)5z8efQUzC z$Kw*$T?1cR-f(rW)YqF`38k~>?k3QVBiqsC`FXJ2yEm>9bU+d$39ef|qKmj^j3bSE zLE=c6Z-KHna@ntco-6| zr;*pkiC3os_s0XrEy}<+PFrMwiE(;17#PR#6N3TRzQ@01;=aHSV4oV|5(#j8d)Fy8 z0+1_xW$v)ayW$IMb6Y|tBW^Jt-9kCf4EIh+$~V9Kg8$>c{6{{1d}3MtPyXBg^N#Ny zpQv^LYlyu^tRN=PDq+!XS}Gaqa|;B21!;YzDSPLxzJ>tTFUZs#P?+8xNT^!$qAB zy&*m1jCdGjR64bvIiEXIS~+qgkC{{wNrYGaip6Hmdgh$Ybaz;sT7^NC+JyC^Q@=O* z_i@d|kpXlN(ovE8)BUZalz9F6HSgcQXIU0nYdk(a^6}%xtHIF={Ov(YjmN_y0I94D z!-Xk-r16o#E+lhECp;bTbi#F@yU_Z=+8S$fnh9tqeP;w0L0yrW5X(rPy-+X%Cal ze*e$1JA2G%%ru&ox~sbCU6~Oi;O_f}Ga@7NRhN2f)lzqFdK!`>2!H^=0Q`1}R-!b} zhSo-DmFVdSJ`KD&qMg=)roq&lwhs`;WpfbmV3<0cU(lIqgG1fXLEf5N^oABL^*v-Ulhu6)~ zQ{TZuL)My+W4aK%f*S%fDNIEXNpEt+p<1C@BYei(@hE95FmHwfU9nS;TTVKPD(+0h zkq)J#@ZUY?l1d06WYmY@oIH)bmJ+N{8BvJ=aw^n1Bht`lm`}LQWE5+ocraUf+Y13S zP&HJcXfTH|iKNj0drAQr5IwNrbaz%;7#a+3jMy`%X*3YQ(cr|1C5}WaR9_KaF&6H1 zPYGqn%3uZAyfeBJ4(7`06|*3gN<}#sygoTQb=-x4U^~U86vSJkl74r=O_AZ$041O} zDuY!!C+(Q7glzk@I3__Sl6BaHI4W*RSql0^84|b@LMN0R)r}U;EQ4Kh_OYfxOM#^n zq@+3VFj&JlnxoyYfJrlA#-zz7N}~Y*16>5|AQ)4zl&zG;z6euH9K5DtjDbf+$|{>8 z!ad;Pba7U3TpdxTXrQ|B1z+Ib1tNmpJ72zhVT?iVox|b4ZkI;xe+YQM>QCn|YYK42 zee>$cxujx?$m6}=pU(8{j zMK+N@0NYDA&tq)wgpDo-HnL&Me=Ku3p7XEH&Q0eGUf<11PxG!%9`ohd@d;dLD%YPs8skv*kaz{q%ddhJYup)BN%L1zz9<{&*lF z91aJjX~N8i2+SVk>Ax>U#FMq?FBN{@7(eW1etWkk!f=!{_6J|U7sZ%j~`c&{4>Xe!c z779@)9jYizoJpOjM5an=@=xFmOi9Cqh!kD)z&)_Vu#+*49iKbVN;)|x!e~%7p}&*3 z(nMle6=4GQz%B3v*lmwIiv%czXft-WVE__v2w^XU-TsE%n|J8<-}A%$13%m!`QiS7 z&ub?RO1urmp|jKz_v4<~2a|Q02a_w|mD&ZCK<q87*!gbH3AFDq68D}xAy4zd`!8f6$_fdi{3$5J^* z)_PFqL5=%>Qv_X%A;!>wX&URd>RSqGbqa!n;ltBl#f{#KF$F$FZm=qZ7i>T`fguzt zFeb1};^{mIvDEP_hSor+1T(`6OEe-1YZwE?!+}mjXM}O`V3>2X$nRV;B0xk<3_u2y zjh|a(Gj0(&o<=g>lV5*wbb)p%#8Q!_*bK+X_*}rmSOeBgrZpLOtPm^02`MY8hKSN! zY1U}#Oo<7s&_#$M^bl&0_^<)Pkl~aNq*bEicRos`m(EgPEwE_NsreCCMWzXv3~@<% zgN8;;O2Zscqu{7`;(IDkG=H<2{2!ObS{wJaMJTD74hwuBYp2~0;Dq`KjRo~1)g7^d z6D&oLDI*;`xU-lu)?CXYf)F8Ij0Rrd1^)D4qe-vnpg$ZoZp~S!eD=8hQ;eeXqTmr} zG~iCHn*qTkrNO}2GXp##eL*4#=RfciA~tgKW5?P%W37Z9(b6+y3h9|dg;e&#Gx^3! ziX;_<00EKUOfd>cvdh&P7NVraq#}}Ijxz&Gm5aB31&RqBhmF(ZC*@3~ffDv(y} z5kz&STG`JNcYW}t50>6J^^s{=>E^86So+|!bUq&!{{Fk~*?s>3x8SzTyxHwI)J7i} z5^R`f^4!ae+}pYt!;H1BoW{awUHI_%GoL;ipC63-q&rhm3Nyt;IulCnhnwP`%|xFda_r-gD_ z(6uw0bI`)8dC#jVyqafjc6;{oyhRwuXal=xPJ@Bk()g{UF@UO~q98)m!rUrTYt)h< z$FK3bpZcA>zze*<3%tO;4xHE8)+TOm3x~r2vp1Nfg2vmo@A&4MZ}{1_KjXJQ`-X3S z`&<6$+wa(a_Z|BE@A&-X3w-&4FC90TQEHRi z=Vz!DT9Z`T=_@iiw|nLN`^NkCJAUyq;a5L%-oCc`+5WrARcV|Nd!ss zW4p(;RZEchM5gn8zuh~RJ*V^5`(&5WNJjH^Lj**HsF4u{6tt2?1RH*EY3SUTW>eSb zk({ru_r$H!nVgLPdv-2P|&^MCn{ zZ-4lnWvuwvgQoB$0CYj3Lva|6^iHsVpP*Q_EkV@eqal)yvmjPTW?lA80wxi@*%f|u zsQmJ_QY)YtRdnrvTfKw>ZyFqQa3DAc+z{Le?6uM)y_Q7*E2QwpQ$XoM=@iydQ%QB# z_zbM;!o#NrKDu)(!o4aBsXVzd@b0Yl&JbhgJLZ^ZQknXMM&iI8!P-~4r@_HW;2p+8 z;9r7RKon#rNQ1Ih>Wv`j2=6ZvM1*;sdG+cQ$K#PtpFXiH3!guKX20L_`t|GQRbJrF z08EXKHyLT5>cZUa8RauWK4Jbq^FV4E5J@wl>@k$S8mkW$obEyqrJ>B*=BX6QsFbBp zmr7X+h|x=-o{a{yAmYp}>_XV9u-9Ox#$EPMO#5p~u+vGg6M)fgpZ2E_)oL+u3_6ELU+q*{9CuLdn}DwK+}9a<(-wl&!d z#83_Dlo3z%1UtZnF}kyE$=vA*OB0S&d1#VK4=4qthT@S57mEwSl@gnRMmUHQG6?NZ zjM*k;cI;fJ)<6W62Bl$9>6GM+>p;tbwi7xTA~Uk9uxrTv4(1!8?TMISoxvJNB0%8{ z%poHAWh&IFw35c6tR>6%1nV9%$4bT8giRBpO;~q^u8gr@y5PQ2%t6zmb}Oll;RU+p z`yAp_oOJ}N4^}Z&HAXld6EOriHJa?u5|~bi&X{zx461j+I*jcYX=o9}64xtE)5;`M z8V!^Q^$H^hSs1>svar}nFF}vW8kHWfFgW?b11C;#V(}Bk0}&@I7S=fNMfNnQ6m^O? zs!n0#xCfYLqyck*MQNtoEWzvI?3%GQVQIp$b4F7inv%}|03ZNKL_t)11h&HHLN~#N z5GEOo3&+$b7^XenwJiCa2)Hcv5u<@sh1!t1qlnSy%(}3z&W)^y z6c#NsbwnXT7#^$ZR zig5aXr3Q;T_oRz36DK5aOP1IWdMH<2)SkFo# zlK-x2oX@Y10vD1?lYP;+lUIeWW3lcch>UdW%;kw+tfB55=**Tw;1)x!R5 zFo`hlcI@81X8P4H`0(9t`R#YVA-?+!r*Dt^us+K4L?lip)u_mQ&ox< zyl={3qWIPqB9JS(J`Wq8;anz*Jzj6;MCPC2>hzITexASm|9$nYFRrKM$rNpcE>rji z)q$*c&fU(h3SCZrPkT?{=@>F+gRN3hXcx-wzZ0(edY$+6>F&Tt-&nekjc`(Z!xo4Y-@L)SD2Uw~DRQSkkO z%YxUHC@Z2Bi9n(dJ`=7CDbyj95UL4y8o{q!Sk+n8ut;U{62bsfjgX|9y2BV1>q++_ z5KXW>)IInBUzO-ky3)j$sxwt%nw+^gwFzF6MD+?LfkDy*mb^JL#};FpI{nxg)0##D zMHsD6W}$2}w}_(1MVWUZjRaDw0uCfizljDwl2>8VOb3=y(bAaP%)7%4Z{NM=?JxhD z>C*>({oy-){qc9mhwrfaPn@a{?_k^vmU7~Ld|~pS4WpSq8V!sd(usqVipXHQyEF;m(DVb#f)X7&IuBTdK!eq6E7dXboBoO3Ckop5N(Mjp)W`P9KhY z%63!*T(GskTCiTwu^G}v%cB&O03C>~sIF)kpgT+oogEsf*|%C}xZvi9B~QDDWn?qU zBwM=$MJ3HhP$R0sB*X+>jS<3#VA#Ms@Bwl($`RzK$WqZ=enY3?d?NWiX3lUUy3@K7 z^o=(&(9-E*K%;e~d84j7iqBw$UWDF+V<$S6H)hzoZhUx%uLKUQeHbjJ=V70Qu1U}PAM^L^5G(Vy& zXq+-}qTnbELs&{-l*SmraScwx=_kim%MVjE=jJa6&g;wzyuhC=hzJqMP&r@!u|Xu6 zA7>$HjUZ}Gq$eVeB$XDxnM?y99;CThLmG5>OSiqH3rZ zgyZ;yi9rAo6m?vjWHvoM>H6&if@?BGQ*tOEK?qy^R!${1`I?)(2@DSw7nY%%9F88W z-MC*?KAjdmACKIxGwbSDudo`$96isKfFMr@+Y3m(L&z@W%iiW%BI`+RG->4Ue9mR7 zc)rYQVS7D~cb=S|FP^`v=dJ%VJPzo>GL$X8V!_;IPe@%&u9F2pI_hwUf^E=w*3OmOTd5M z@9iuE2vz1L8Pa!hZafJX^HlgZb3BqkG0f@1__8cacN4Y;H|J+JxBT_p9dG9y>lpMg z81`rg7eGWX+~{Mlnz8hi)4FiK9QpNcEC2Mn!MEQZ`5^+Qzk&Vk#5{k`)IKrAV2Yl?PfsEyl?N3<@KxAOgFd0G|^Qk zt>P;9sUtcgIw4weO;=$uqx!(th2z6Lr-ug)GI&)6ch#AunR%X==fboLrX8dKpF?!w z^cg>W=4Pk7dwbyhyIbDAR_<;GH~UUT2k?wYaM_vzk=%0y(A?!NXvcZxF4yw7&E_8T zc#kb_+qIvT+w#tMK<=JNn`sh(X*jq^21>~!fNG$c{7aF%k4PTRARFERIY(PZWBW@% zbMje;?c2;QTBb0I1!u-7EceehFr!$1Vj*Sm<{^6GS&rOSIQVPVAmS^ z-Hw~Xfy3dzVV~YV(?DQmnzwnKri+*GmncGiT!<}s1a6)lbLGrC5YhofPI82ADK2>z z*yf)IB2w@P0cRAGpcdur+gtv_fA|~bdE)N%p1aroC;##P`bU2A?GK#BCv0R}VkCbI z$w*t>iP8~Ph)&?h7TQ_F(Hew6xI<6nS}rt4ATI0$etFaQ-+p=E?|!-C=HMLaLba8C zf_0s7Dk4FX&TYUeGj2WDh0>(ZnHVUBBVyo&4P&w3A!a`K%vvXWKO;8>WZKbt@Zr+~ z|8x|7u!SxIZ^l~Evy^+%NXxp?zjV53z zMJrGP*$MSlD0|S|`91vQ!rk2+4-XG~`SJyK=hLT891aKW?(R0@mlybR0xN_2n~XHj zWX1FgQ9cv;3Ao22Pzg#A$|O*o@P>KNM{ojGPbVr<6pTtpdP*){XuVKRmG)5R3~C8V z@oc9;QD-i2tME!;ugeQ~(UJ0O8qf~=x#AE~lWrv8Ob)vK#9vvMWQA1>ardRY|l_HIz2b78Ej$1HFr{kQg)2*^< zrE6s^h1ExX>v3ZEg3D*RD6rymR#|x9z)l-G751v^RHzYDHiZpQEE>ZV99mC9lN&L1 z6X+pjMr(8Y5cs+|9I-TB>*j78&4a7+Aybe1GFvHB1N7i0+9l_El& z(x_mbD*HpBESvF+3nhXP#upDhdj5`1kp_Q|xX>BZR(bqhUf|y!%#7plcrhAy_39P6 z@fd#!Ja}R>@O(b=I{A<1>kCbIJ_6f{^_b*G^3M6^biOZ;7;vWZxn8HOi~=E0ge)a+ zHU^In@|U-9YehCbex$*G7c4e~H-WJk44jPyWHTJ76dFp9w6T)%it9dA5*C)iU-Amqf@_PpU zSnNj%T(aW#)_?lG<3}Ht=W>;|&Bj&x=P|}PoipSy?+j1QuP*+DF8&03UGu7Iz_b11 z-1&2%bKkE%it{bI(7Z430x$4y4XTN2p84X>AtEkD17nP@j_33|BEj|XI{!}@+@0S0 zSNPV4{lvGodoV{&LNzEZlvOAvB{YcE35+Nwq*f#f_>M3qRIs5`5o!p&dA=RtteSQR zt2!2ru;Cy?>e$Jk(->YDRuE6Nj!$(n7zm0(Asri}nz+yq&TlKW1ZhC9i3JFfz1dY+ z+r+6T^E6TBJ$E;^{QT`({(4#Y)v~a^|IGfoe}M1)E64AG`;UX+EAkfXFj&fhjC&?i zrU8>>qyY?Ccl6}QgQ1@s0(=Kn6x#`=f@{U45kBFg5km<#V1;6YI#CB!2Q~(y=J$WW z&pfaN#tK>tDUNK){Q+Q#tOZ>Qr5A8;gjE!+m9g!XCwERR84WOe3r?tvnh_mhYjdiB z6sRT8A$WK?Ragu=4XkZWq9s5BDMFZHr;0C)I5pHNMQb|g)j}bjEgU`z)~}zMYvCWhKs+yb_S~>qMMtpGk*Jv8B7Q>s0f`xld%_pG52S1! znhDSiSrE1bJPP8nq(TUoN?MlX4vnjFsuQ25Jr7!00@g4- zjsxHJ_k8bf`8a0!+Id*_d^x@0v>fQ`4l@X==-8P0T-f+YxC(%0zb`M;w;uAOT&R;q z0|{}df6dNeV|+!nCpkv+vBdc(+xhiOUbmF@9?L&I;R-IDIcIGBmh1A{*)`MSJg}?F zxULfc0WOhDwupRh0v;e96r2{u>?^zI?4&c(*{N@g@)Q8gO_^JIU7lNGnl__>k|`N| z!gHg67kGge_|u2$->pJ=z26|Ve^1uONMnNgW#!xTfnT3K6A}FK)hqt?-8+79bHmy@ zeGD>O`&qFs+jDo;F&JhnqqFqR!*b%A+nH}(&HVG-#BXmK-+o^>E(=S~GQ$RDDTqnq zI=~&t4qmoTcMu1Vr;Lu^vSbR6<-daS683B@Q*td;6||&~<8~jsIs~t7h1ahduU)_ojyt{R(4$=>SP)1M&Db);;DtFmZ=0$Ax;=J!Z!etNB z?S@JgmhD>D1mJo8xUZ4&_)W?pm5dq?vgJiSzeHZwbNg3E&AKEV_1G~5+3t&GyP-mQ zIMz&nYh%#^U*rv+xZ_g{KDhB&4lGP`Dy9_!)IoFBwuj_*Gtf#~IuVHpf-o3Kz&aa3YM!?O<> z2D(5jMEVs^FV3V#DW>h45t3yz@&gYMw8E=B{4a0eo4dw5%?u_^rwRH(voGx8fjX2p zP4r$kxw6y=UpuTF8IA+TsC=Aud@nO!%8qCU=DXM2ym`ZNc*U<{;{RUYKYe)Md-<>U z_rIm~kCbwvw!-K^bYP^CZdh;UcY|L&CSir8uI4`)hKrvX&@sP zuDCdzipq|tFxN_{P@5sOg9*zjcqqdPOILOj48?ULjzx(A)*R+e#GuE(V&F26aLNd( z2GxUBBBK|ogE~Wkt`KZW^&{|M*cjLl94AZ(SHzV@p^77Fc%-s9Iv6Dw2pJ(@AW_gE zlp(YMb->vVS|gZ9Wo)gL7EYDyTdm;T@nXoPpzUH<5v)oY1~g%G#|UC5tPNcXWeVy} za(pwCmdc*R0yRM`P$Th-5`-r&Kg5XUL^Gl|q0SJ;uv8W>0~a(Djf(nA83%kgEVP)nm zLdTjT?Sxb%D#^bpLPlOzT$G`~5Xn7EoWO7a6UC%q(jY2g4OtZ(j;A~YL1v~`+7Pl83Lr4%g6t^l}lRcvCPx- zn5SFC#Iv_+vYqq&uaBv$Up9H70vTZ-QN1%C`pjoW0f$_>&orIj4(_>z7QjXDZH*{e zP%X%IR4Jqpc|O}nRx?}kXVx=Z84GMlfKtjOvfm!4^UUkJTi!(QCW6-66#ycTs{x%K zcdqngJkQUbbt2&EA=d+bT28+1Y?e3|JKz4g*cEJjeb)B*ao2r)-ppkliRkg;0wIK3;bz;h)`>#l)^Mk z8&56ae17)O-+O;zOkifL>v}mq%klqIAtG3o<>LJ13=fm?abHu(*c3gBU|>mqx(6oE5GE8{5Elk%OfZTt3Jd~K$7Cd4CcYVj3l;@4#jK{1zXVDE_8E7Kc~QZU<1^J5=z;y zr?O#7%I0l7o4(*tgxbrg8t;`Qi3~ zAHIL!w0uE!g3ZPfC+y)1Q#7Vgn5{66LK%v6Ll;Mnj@}O-2vK|=L@2HmQ^hHGG~6fL z3KkvLffq1wTsmO`YR)6(h`|_6m}8dS{YwdIF-jfOI&zP*0;3^)LWgY6g46c}@GGGoM`GH7&~uQcnlzH&BvrwIs1DPUv|H^c|kglZ`ZAcC_j%4tX^0hyj7yj&?dajBwu7D z+6l2heIyT|7*Yo4N_zBc$Y#C_NwXM&$@VJ|1y4SnjzQ@wb(|=2LNyHrR8L4rp1`OR zy$t$fbb+xrBMct~iy=!IQpklK{L6qBqk$KAfqx|s;5lLZQ}DcGs3H+G8i(lyLFj&q zu@bV<PK&Q5lFT zp)g8eEsY_$h#Qp$z2S%Y4WBr0--F{=`P}dLaQq+nc={C&>n&pp`e>~E4)1R%wnu{$ zhQx%#j#R2h;x@$7LNvGe%S#EIL^Q5msjv~dkBe|JxX$$-!PT;D0a3@d^rOqS$XE3v z9&@hmd{tlIELeO*@+=IO+}b$VXlR_R_0cjrpB(-BtkTe1yobf@y&w+Zyoy5po@Q zY$>lV|1O{+Os!^`8UXv<%&WtmcXzkg#)OF2R^w-kieV|FGtaPby$}BC?K^(>t6%Z$ z58w0a@4n-ofA<~VfBMJ|Uq18k^JhLk-1FuBp5t=LC@&R+o56sVR7!Z1ylic#Ae!g0-OKjQ<+emV`}{0)D}O0_?%!hyQsB}9Ru6gvX+t4| z3L$ctw)3#h@5DTrT+8uDTNi3LJ5lyk*ww<^(m-IICU&h|j0Seo#M~xYt&fIpXX7^& zF5i+!3jNOS^#xww1zzCK2>$=HZn>wz(Xp?5KAreDj1N-yem?N|m2)hOCBP0h$o>YM zrZgO=%I&!4?f3%sE4za-?Sxw|ywb{TDcs%edG+ds{h=|<&NMr{udM6Js-0?uYOtT3 z{odH`2fNv5HK>|k3ZEO9}1dw#>mJ#Q9e{3s)M}>~< zP7s8|BOXw;bCpbN*IPZ2$2P8&vk^fw^W=gnhoxPdvz-7#)@sjz>5iDeFQrPtTBT$IX7v&Hlj6{=j~> zXSdt2+wGaR^Ld`Bn~>>(L%qNs83G~@+4u({shAiklvLdm+9SVIBEMBUh)CK(wr>(h zd^(bsbD6_P{906~0*8I&uYUQ4dHSFE#W%P7uYdbj{6ByH5B&Xq`3L^tAAikvzx%}J z<$-?k6s(LuhEj&YfG7jkm0%%a1u?`e@px5eLfFPLQ*dC7;CEkE{{EjFJuUp^ExdWv zxY;*m>FoN6G(uQ$5r~Nx1sj67vb))Fcl!-(+OcZoBntC{?)KcxH#id?W#&7b`KS}$ zQ~4o;kMp2Rm2ct|YyE)TOZKx0aZ*Y+!(o^c7DxmZg9pAatg~we=CY${DsAoSfm$j% z-SZ$1SRKR^_%4uL3d3-akpjvm>%(6thzR@rp0{t`ay%Y!cOD)d_~C~iI2;Zy>&TxO z)N0(l{Q~6vBmwGsJoc=B6rs(9Iu&+JsU=u378+ueu~tr}#@^E8yc$S|ro)q@g&^xVt41g6fS!lI2u3q3rAwr+SFfg?pKvrOz{W~UQ#DIf)x zVz1khG}35E-5sZaKbt< zRv3{A)x9#E4w!=4gs(eny}{Q9+&V%1lYN6=YZN{Y)=6oQktpquUN&4zTRewW8hF4LFS8 z;LcoxTA$~sjvIyDwxMWj>Q zsiL$}nW^lpPhEZL52essTc!1(skR# zS>}1pKciP7pu`0amaDSoCuY6J`a+U-S$0d?xt{Av)<3tSB>fgZ5jYzHWX0?6?#w;C zTs|H}vf-kjij;!ainj@wW^|gN&PZvfq(a+y3}b7;fzD%}WV`nnxCf5F3CLd&sgOhN z$3?Ptjr5><6T#hK$D21dynA@f$NnDOE2qyLjsm9>>nG#*uyP;9g9`Vpv8SL@fm0AY zF2C!1Cm=-9rW*v0@8~I7e0`DUvHcUyGUUfHLgfFgae+&z^EL4}Pi)sf%Cetk0Z;R? z^=$PTa!gO*`StTcart+K?dH=L!_yHwWS^9(db7 zf;noP!itua(iY0DQzxO+9a;m?2BLUOjHnQX`;mwaQ7O>~>c%El3YEe{3rO9oT#^Ui z1Q`h}@RBJCAc6<@DEOk363FVP8!AR^PEkjyrDyVRh6OsZ4b)K#*&b#kSf=1Uf-XYb zD1JM^-5$NWMQ?AA{hWAK-GR)wq+>ovCkNMf)fu;XAaSZ?OM}F}x(F+YW0?rcIe$yW zOu5-JzrN#kyybPdr$02zCZZR{htB%_XZFktnFwZzD0MB=C6J-HM;8Hcq8O+V2oZyb zfkhg^XT+*Ph!NUB2U-l3;QwXs&7UO4u{+PtdjLqf$B~(pReeZ(NV97)&HVrW#H{TP zvXzbPjIDMxqn_^SuBtpE!re&#@9htd^zg{aTAOKG^`VY$B$5CK;-HhnI{`@^`W6Q@ z4I;Zb-WpaRcBlhi6*J-rE?5t_h1HDJSiznYE6{7|xHd#9YL${3YK<(No}rIMAC*2- zJS+PSy9UDo(m-nPjoO?fkO%0+!#E$pvBbf8?1X0|EGbGXq?63>=0xd==Z2S%L-a;C z-%}Azh(F+FEOn+Xon=v$I#Fk#EghPn9{o_6Adgrn)RM81&?Gpq8zOV;h^x)7I-_3N1qu z0TWJSR0c$ao}yoiDz1&BM<$lAYgNMr!}g#aaxqmw)zKbuw^EIvI@+SY)fJJ1OF}fq zeU@++a6FzfDqT)=z{KdB5HYeJ$#x=JqclQhp^8f)X$Bli0vO~49gZYTcr&b8ECk^W z8B~G9zbQOB4Lsu+e?kxuvL>{Ql$0^=7@h8&Q$69!4Rd>7=`%+$PPsGpMy1l!u&k@| zfa61lY`7AWsZ^9BN=3vFfq@f~!$hOdL(!{3O~NS)O$Ebg4BX2V-^y#=^&9S*bMGe( z^(7C7UvRwtCCmJh-dlM1*$914@&X%^iU0{jRxWk@gsb7$nQ?cVU}z&R=;QUIM2Ryb zh`Uc%WlVz5Lj+gH#4veEghJF5Mv$yR2#>DGdfc!M;b|*z?YZsCzOCIoAJ11lts%x5 zg#=ln4%YFt500P$k%-?70Tm-t8SKQ)j_f(Hm%Q*wqRj9D48o{fXYQW$eS8(XRf zNR0e*jL0*d@r-|6eEQb?q29yI*ynN^z)}j+IC8byorMm-_k;+(=50nkW{$b@qn9r^ z&NJ`t@A&%d8-Dxd4d1+f$G7j_@#g(IzI*?U@7}-X$4E6V~%x$ z-eK#*zFSgec`KDjjz5izIvVb^2?Dg zzr5nhSH_nwl-IYyIE8b;m>Z)kt8tCQO)w8F$VKoi9v|I#|;NNgZAD07TX) zeJz7WBxr$0LSZ|%kzVI2gy88FRxgE>$9G)7^y2(l{)rZ03)?veUUMzCIUd9+nT{Ya zh1)td0c17KM*yd@=cKQ>)1Nc@j*j!rZurP+?qtWk4jhuuU01_5#Q@xJ>qx7}d?XzX z=<$dz3xjobIdLkL-rmeDUI%AAj{FKl}Ml_?ut;gn#;{f99Y5 z>A&);uYbpz5AS)H5A>xWRgopZD8#nc6_*MUN-Si3HM-zPkoGIxi-c0`?cAwvjkoue zmv4ldU1vWub|MVkaA{a_Bqd4~uBVCHn_FJ~=r#MF{)FL+*9`kBG-q6eS(RA}_hi01 zb$&Bfepj8hCL9DL8##5ptU{5IL(j}fSWZp_mO`wK)t$M5t?#CLD4UMfu(8Lg-g%;= zfg*{lJ4zlXDRUf-)M=(qjWh~$@Y%5jy*l<6q8*r~i5D+k@b>LnPNx&|JoEA6M_#{v z%`{E_L&k#7_|rhjotu{r0JvwYPgvj4+YYk{(Zp`3?`tw$!n4q3J?P3pFh))mTnWJ2`TMe1cpZ?G6pBIkB>za+j5lS-DHX5y#ttTR1KW zjU96kc8oD&|aCHg?>6RBlVohVCTS7OfBWR%h9V{8qdrIM|Y z-N^!)jhu~?JDLoW9xM5|QxTenL8-WbHLTA_jrk;zDZaxfB-_#Y6|K+oexiFra8gfb z&uHspJu=|zY3#+=X{U&?$Tf>PizXH+bYzt~RH|1NTfzXLI&@-O6xMa&31!Ma@&vNr zX~c&C#sRV*V}e1kRUuAtns%BrnpT=uumW`i-2pqq^SV2wk$Qu^AWK4eK`Dro^;_Pd zHmp{>#R`nw6J0VbCv3>*m}5SWg-k^xbRP?y*&B<&Qk86#(ie(VvMr?CkX=Kr8ze`@ z0aKx;wC*a)P%{DsS9GEjBhDpDI*%oP>pIha5Uq64`Yg@8mB|&g5K>g$=lqmGjcmS4-f_`PIMzf>>e;=Zf|yc@#QV|`WoE{_uoFSe7msRotWQ!R?KbA^oD)2C#Gl@e zjThST&a4%L9^ta^igWfwS=PQjshdyQ`Xv3m%qR7&8IQ_>_5Ak=opCwlGd|nshVP%; z-i6P>{qa6N#p5;Kzk1^_AFsO{#t#kYoaNsy^S$dYj$7)}DnCs>R=nbK!+L`6*YLeg zeXr92VSVqPK$Ls zZ$sza!|0dP@S%Y;$|&SvBIl8$9&@lc%^kB&>krT~$p;JrafF~zl4J^kdjXFX!<%=S z7)?UwzUc@&8x#Xgm@B>r99MmfRn$63jXW$Q?MOCUoNi8SPQo!W(2gbuIeRz_oCe2} zQ&U2266tnCZ?8zVx9H6c>zQr0?!*h#W0E*C?(sA4>-j8^fkXO{1T05;KYMtmUW4%Ljl3DNoX+Tz| zfi`A(PN;%y+wfHKcaDh9ibGAskQ)m%I+rK|)^@3t2QgMoyJRm6_%VaVLAFSY_xPx}qdvx5}^~ z6$~$mtD$*828WzNz0n0Ta0MAaQiOj?_zO`BJmVRE`q&6Q5kXOM%4fpK+)D|6Bu(tJb47S5j;jF4QaBA9i%>PZ zDHj*+NWArdH$L&+ocrdS>W+E7rJlZ^&0kJ+)D^&ImUK<Lf7+Ul@I6@`XtscNrLS>@qN{I}D85s^VoewA81@ zmCty_GyXXEW4wnVvAWLXD(p#>oZ_4D`=IalwBC7nbItA56|Y~s5A^Y-Hh-h6l; zP6Y4X@b=>eK0Z9~@!_6B>l~YLYMtJl-i@|;K}%cb3wPWtoCvJO2XXx#rry^r$sE;~ zE>eP0oScoE6FDhEfvdr}8sTa$Twf<{t}{P+ZG82G@Z}fq>Q(3EtH#S~<96S<*>`dZ z1$m~E-Hy7e@*+77`-z)D6x_pE zLY&ZAX4Kx0xl-ntX+CmQXKvhhp~_88?91h@0_DP4;L&j) zZ_aP41Dt9w=FfP>GoJD1j6ViV@P|ei4Tli@cslXn<43-I^M+sl?sxq2*T3PPACCOD zrSq#?Sf+_(n(*B~*=NcwBV)nJfMsWzKk{mJ_E(i*sFc#+gOENzI%bl31aTBYJZK4T z3*Kjxg_0Y?&=`i9p*)cDiKLYz#@P@+)^ITnS(&s?e==22iz#=zK;Y~&a0%mL&fhF8 z9OxIE?K(FGUvafx9R^lMfoZqxI505`!?SVk9|56k3`L@9JRhM{G~DAM4|*&JURmWiLcAF6 zoWB7?sVCn(S<~^(h0xzcDVk&VscFZnFJAETzy2}*?r(n1FMsw^{?mW{FZ^Hs_doHg z-~5VqACL6r^hKd4nhndoA(qbl~2chX&uwuz!Gwg~CEsVquL0 zrDSe*BR{&m;^((N=GD*tmaqQ9-}6`h?r)i{u8@>ybkvNG4>P}cSoqb$!msUt-_Di0 z-teUCCgJ5YbCd7cw~wSg^YCCiG&s6*s>Yn0Q-MPVs?!>{vC4@Y6Q{}vw@$alu3S-c zAZcRoLXkotbLh&_4j`e#vKjguMpXX$v4DSe$T@R$b;XMpFX+89&od7X54?Z>p4YEm zGmhh@mptQ76ghWp*PRAh-?6lbx$dd;3tD>tWPH3QkN51#9Xn=nb3PzQFZ5+(KJBSm zX}Mw6;#-zFZCW_)4$RYm0Noy!rPMgt`xq@g*yf5 z9jxOXyZqjYhQojkWOc4n#&hPF3ddY|p9?2#Bv}}l896a=B9kb-r-+j!kz@4$MsarH zbew`C7!!?=${;gYoGJ?<3&?_NrFp~Em}L`eIHSSTN2Zp^bx*E4QWKO;&c;|8eQaQ1 z2_1e|hDxUl#xOR@*h#r#V$99aM#xZ_2AOfD!)AN|X^GS`xfd`Ag*Xi%94ycZTG3WX zt&v(KYhz?#uaz6A+-PG|+TpQvq)E$tgY~1F~D1s&7loLxyv>{Q)gt5{y zNyt?Y5;_!oEVQ9ODHK^4)MF=y{D9O(?ju82#y+s?O7@`V9Sg&dGDB*VmcW7zdd43X zA|X2*hJowrYp$-Y7{~DkjqiR4=E8EexaUkuh745VxaNyTL|9p~weXWNmvOnwkMi4f zmv!iR{xYpVlF)IwKth7vUFq@jMQak)^mg1D6d{;bg`{ZCSSjI1HB2z#w+$|A~#{M&R|aRxIe7=Zd-?W={OLG(QN$&LQm^kI({RMq7mB)ZrJiK zY7IOVbXZlv=T;t1Jf_ZPH?C#t+tbwd%SZZAt{|6v`Lw?qF8kx>Q36rlg_Pox{4KWP zlOa99$_IY$)S4f7Jp1YRE)i$8{(gU0KbXEK`?QQa?Z#!D7m**VcTIhI{)e!Y`*dti z)1RyD(*ZoDzyF-9wSBIdXFTH>|E3@!;&uef6x-1OB;?pg zB1^^zXKmaFGxkcOve0Sbh&m*P;?NCAiksp!;ioVfMhrzr+DXIe+@FHx>ISt5hJzcc zk7Cp-(30Z`8qP8pC@C^?LjNf?QW5_|gv5hpy-d_vGt_dMNk zV56%x$AP{cFC7PVD)=x^uCKUGuXs6BdP>OY$TA=4Z;u?lJ~9tZ8x?i~c?UyMas}@R zlOBfmN;q3+!>PpLGK!he2->bw5OIoDQflNBwij8PqC!?7iDKQDtKmyyNl?`>52tf6 zC0PQ`bQ`cX&}*UB0mpD@NOqKl=z^x1q%C%=(?&`i2EzyN(eVPVptGaRkpsvF@Ltg3 zFbFaTY0S7PO@d-?U0G22+Lv$|NF)(*cX|uH-o36oV8e@J6-bJ?(`%*IBfT2E&a~>Z zX0(=Qb&6-m9P#zn7spe<^MsXwKE!j9fYcJDCsIqOr&Y!g&z=FK&1v9(lmqw?%L$Va zvjMAuP%u;6GUkFQWD5rXSwj+~N~WfPLs9M~XHG_!zUti)ij%qm85f}^Sa7mdQaX_I zM3w_d4p0Vs9PweohuHCPI0?H4XL{&Nts{$3m(E-}b8~7J8iJ|fy4ujXAr7iSl8{e< z)2UQZWV{Yyz$|e+wJ_X9(J@o&Qlp`N#UBt(9H>fqKr_4&tG(c+x`= zTtjH##8^}sNsKI=;*CK%<9U|>Wk^a%x;hRd#+(_pm6@L%2A=VZXZ-&R6(Og@ew@%3 z%Ju$=AHRIXVLowxJn+rCw|w*NE#G~3&zp}Qc=Pc+-<=k|IW4?7E-cH!yi~k4dh6jN z;PE+i@35Sp9>I)ooDx!-8JbhfNGbFgaR&pbKx5OhuLgtwF~6z!+3+5X zbH0{!k35(4Ji(Yae&+log)@XJc!IvxC4i_S8B#cHN8>^WH&$ap5}+ZFAZ!XAr{T3v zzA}-aL;_1|dm8mh0;ACVn$AnGiKL06|Co33BYy2)@f*3}T{b?_cq=1|274hZgCJ6w^ZuK6Z~5lU8-DZcH~jZE-|*}AZ~3;vhdglFUtzl)vYSY|fwW8L zr06t|hJliuF^}wX=E^g*7Frvqv(fLsEascu8nrsT&(u0|I6QDT99ZT%20b$7&Nzm% zp=w80XMvc<0VD*q#8i96I`7L z^lUVguF$g5hm4Mi>alykVl!?FF3#+k#WG7T)Rq`$Bb^q8;{*HoBm2}C$qeJjcs(+B zp{p{ha7=K@&XPKaGqhM(vvi;*QUN5R1rHq@>8=Mx$2s*nRgmxdH-wD&}g(Ne& zJ2A>kE8 zE?6EQ7c6H6P4r|~o*^I5TuF;jmVv2E*jM_G*fdfx>8Q2^LjNm(!a2gl|u7}L6?wEHkXfF+!98L@EHy?Ot zC)&HW^pkRs!ePATuzSJ7a82<7+DT#{kW>JTcTmEZN{Ih_jSG)_fk5yrL`I||?=sDL zj5~3W`53|1U*dd?UtHtxC~P>_a)HM@M-bn75a$Fx_c?OF*XL=&*8XR4zIH9Kb?^xu zuiyH8+2hL|o$;A0_{<0|Z^*gkwbb_t2e$T}==rxQ;sj6c+tcG?{Bg{eV{qh2VPEh0 z*4$^u%jG3&iO=+3;AuxL3dxh?*26en)_p!~;WB@V%H^;0`TjoR8PE93fryY+ooXqY zi(Y)+F5{c?Kx^%LVLSKx?ntmX4Sag-%X-gO%R9`WdhFsiCubI+qNH&o4HKGfQN1DD zmp{X)Y{H$>yZ&q_mm&8WfRaidN1@=XiLGRlaeA1N=J3U^GwQ}u{6fq z8A>CSjt`EE1|?+E_R;Cx=?Ad)j`@I0Agbu3Fc!LIv@33jZc4LEBX$}<6<5U-x`TVi zdxch^S127D3~7Qlp?9awm3ngOX<=Cs)(X96ddu`Spu*}kEC>?`Xc%Un#L+OHhqDf}fL_rwR!jxtvZDe_1l6Oei!|oct%J>y@O-y%i z-Hq#0=el+JVjLIet~sqaCleM?OcSQ8e)xvstJAR)yGlUESll=f0ffObZUuTldd5g0 zixH;-DGAAgZ_sR{(Fo_GjC;lyuq?4ygz&u6z%!olj6Xa?Lcy6+3Zg?g^hWOkr@G_L zocC?yAkNaAzH}^y)4)3I0SlrdIXA{Jv9~>^aiNZhnvGF|iR>#8Hj9P_pc}In?rg_> z+i_TS9B1KF6SeIz8*qy+V<>$ks1-8|Vg0unB!)o7G2DrxhEqe3h!W?;2}g&|Lqu>q zDuyTVDBux@c0(U!Q4^8ixC`gL*2;UD7G+A8KpC+GbLfn;ViK2Fff*KHn4bh7 z&Pls*O(GWfta4ouU?e!t^5EF9%gnX%Lb z5X=v_Kk)Ey&;9*9t={wU`oQ&VV;q%IV$Ks;r$mYQCG4RE@XtC3u$JG_B7!<3Rso{u zlW8^-BN7H^YkS+(+x4rI8+<*VAhyH>Rl=}Z1y5ip;;IlkBb-|TU~LX$tNYyCMgC<(UVmOY7tU(b~yhHCst&Jv)mJB0|F9kq^v%^4ZP%B<5wp4npBr`I?kTbh! zWPf$de!pkh?HE>PfngXKhLN1hMf>0D^fUgIA;f);T?UMJ&O+|tzREP#IO6e}1Dm5K zfVr`)DkdWFob&i-!us42&!tBrp!P7r~ks& z-+s$+IpLL9K|jUO)I*E2cHJ|e9>b`Pb}k%z1zhMrPQo~5uCGU4e{s#vfA%x}_LqOd z-~P=n`Pnc2ikGimkjIg`?%aL%p5NVlId|6pf)2jQ&MNl@Zg;z!kr14p(jZ0G=t`=QI>;s2Q$P&33@ko=F`IU z{)%z8BXuRW#J=uHDRI;zb2?$8Q+A;M`%8hUa&vRT&CSi(iS6OxfsY?QQc7W(rcW<< z#-AX(C)&pw09xN;HRH3va)kO3_kmg#jzi*Oxu!~Csc_PPew|1(rmG`UX^cZ-D9#{| zo06A;&LeUPB`>o<7Z454odINYr~=u8CkG17l^L&Er+Z_U02~*=b0W*YZ4t_@&~}Nt zo$}pYc<;uY8>ianwc=hu4N*m80NoM7fC1@>R7YAvFeJxT^P)5H#Y$icZYR8rSQ}8B zoM0TmazT5cn$gT@?zCzo+aXf00oqk#DJOh_stdJM>T$trReYZs96LILdKgjknyB-D z4WV>x9lV2d$W4%*@m}a%Sj7fY z4KNqQ2jMsAJ?`tN3$-fLm>&y`D=~H%#&KrU1udOkol|n^-qGTu4srxTSbAbv zmF^8e++$Iq)XAkIsnc_3DV-&SF^%D1327s`XzZ%6G>U~Dg9MDRD}$B-X=J9dIA9}g zSGZr{wkLT3)mY(iFCk;=j_u=FuoSPHBPb5u;ywVLgfSRt{I z24TMw6k(*2cAY*Mhn-V*(05Q3Xiy%&XIN&a3oLUOCx}2AoIG^$Fq4ObwF7ObEY&$x zCACDeL`U%Ks2VEZqPP}Jc9uMbl{LPei}08yx`qUj z1d_xzIwg8ew3Hyls;U*?v^ro}Hq*R6z{7`ldq0*8s%4ZNBo-zMhM;L+F|>ulpn|5k1$(`ulz^{g}7mv4*Fy@8fgl zwze;EnfJN#kBAn(U*_}e|Gqfaf3DV_#?N<0&iMkIe=OG1m;Ks)fKRfwkNE4 zwzcHt4|+asd7oah;j*(&vo7)Jr$;Ud2|QjSpPqY+M;AV=eC5a2l7e64JmVS9ct+q; z`sZ`$O~$u52VBO@jBSU4EnMbpI|+R17y$4?P6N6u zrO*9af!`^O_Q%VlVQle<^fhMu|PDe3>)6zLE zmE+Plt*fo$tNj9wU{qC(IdRCDhn%_3nGZ?1Q{|9^S%c49M0&xsM!eVUzk81$zUm}2csmjq2O=(-(3CKO2Ok}V?u>2> zy_1WD>3srS?^6}+k|(k$xhti|PKUjLi8D5*SSMR2dk6Da%*qu!tz?gZcbrb_Wzi9F;xE22Lj1-_ z8ktI=fe3>P6vDxvAgJ`NWNAQ)vbb9LXi53uu zNmGpYs>kO$`e$4k^apLV3VJU-xaWBnaCsD7n7}$yoMD?If(WXPt3`mdGvCMKl=69e zdS)xT<)5pS#}PgXGS=gEFCKHYn}e+52xi{nH*+wDm$BCQ$p4+QyUS8i) z9uDZkk$gDup*23%%0oAf<{S-N6+{s0jP<~<92wNfrI7Q$b-m+e`N-a8%8(dJW+8aP60WAkF!Xi)YKR-sJ6H$6oo!-5j9|1jgCbi$ z^Pf%-7ue=Rh-w6hN`0;S(@(Mx^TDR55n|3$4?%>k#biCNam{6rExy+GB!9hr(==_@ z%O_B)Vs+XO5CNkvX|Bd#s4(9%H?{F>#wDlrF8Ky;Hg|VC)pGa^k9F z_N$}YqrAszZX{jr$mh8L#@5Z^Ph({WLH7;xYBGE2eA1}%Y zxGT;|14t+{x=zfx=b(G;WaI(mj>N$;jZlhg4anwn**aD$ewyKUg3}S1XHsiSB3usx z*Sj72{hr-w&@-+M1LGKugWIZD&osav9_ynL&jb!`m>ce4hIuZuMTMYsAD>3t9bc7) zDJ8-ncD-zUEkb!1vJer+&9{o8ta}Lc6KF|{!wu8)3#Q#>7&}n@&;QJ?zy6ws(+RJR zH-QSSaCRDi)gWr^UIS?aeUFfIQ;Y~VH+z2klh^$GZ-2%wfBs{B@$;Ya)1Us7SFc`^ zcLRrRyt{kgclQT=dpPj*aprfm^G*_{v7jT|jEz^rJvV&hy1wCc`XGgqmR~G3Y?a1>FmB`sedai3mAo zZfKA?o16v4@QTfBf1`>fJ`fhg(C_$`ri~HL9xM>binOfnb z;3;TH@syD?FpkQw&$PX9w-dhJ!~GiEEtPpK__Cs{pvjRzVVDS2LJVRgJauiMwbBQ# z8Ds`K0Tn_CAMl#-I>0(0GLX{1mCXFQo3+-DUf_bbihX_hZP@#Ha1pUSnI<2 zunZ6gVJld3hN$e!N!gKfLZ^ay zqeP!GJSi;;&7mCwZ_)+a!)THT6cHy$A*Gc>7ev7tEjdjaE)9=_w1iZ}))AQlEslA^ zv|(dE*2yW^S*=_3YoSORQQskLkNU1#Y83Bg3HO!I)_Fyf;iJ%!(M0I$fdAnx6yh;1AC-Na{(&-mT1PHq+p{v=@}C~r!xcS_I95D#9{k=4pHk<4D;h)-h40M4S5l<-K%|1n+?fVHV>-24;3N0#7s873fIk zS3|3nbzV5$9=RLuNbJxOq$^=Obb%&I{oXtjS4Tn!jZTAX0U!cG>Elw+_i4{{Vmr!{ zIKZ}-+kgG7AE3AFaoN&BZ%>@_{=Rt9Qh4M`^yE(-e+a$oe5Gf6|LV@ye6ICziyzB- z3OWDT=)51sN4-2gL^iI<*2QBV&sjFT#`$`C){kFVuNM%HZ65D&dk5n2ywC9@e~Ua` z)g!*&?s?uNzIF6i_Hl1d()}T9xm&(;9?#Qf@PZe-;01pdbZ??RjAkt8cO*mbV2D%8kfGmNP#$>P;XK#%ig-Hd~rLXZV5HpZ%WOYSCp9nWA9>TUl z!$oOoJtRZY9+L^fNG*`kC}pKuq8L;WRu>*lh5HXj?%yAI_;BQOKNGFcRld7IbsZDS zZerf;nGZWoyB+t_$nB7L$cbghkc3r)nMb98jBt{~Nk$g-RI7xozz4J`R)S$!5uYhM zaE!$JX6UXk4Ms8{B`(7LBAX{#F&xKGvQD^-G)54^&cfgoayTZfTm1K~;L(58VaWSo#;gft+QFi&I>Lm0)4a6?g=2u;AH`-MOkB%i>JNX)1e%2|hU5xWkY-3t(2Q_sa-uQN zEYr;B((xIQ0o8$E1dIrygiu! zY(fi8&>ksW2?VE8?q^0zUkZGzG~j)1^br9#7)H2sCb?IVFXT2e z#>&Xbq|Rh8C1uLSnDeGIkQsC4#lyf0Uht=bh`4-@PvbvFh{(nBKr$nzOiGzNPAvNq z^VLjUR&oRuK@J!Zh5+fixFmv``oMKLu}{J{PK@KoO}XXOddrnBU8ro#?5AKq6%JFq zSf$H0Gs)NNB(WPC(=;$mnQV=0Crnn(qq#&Mi$w+ZCnJ17gluy%n?l#6*-vBpeFX@O zjR1_E9lf9n!Ecqc&lw_s3TK6ZvtZz*OW7h%;v&86B|NM9yiK=U*YiZ^8x|i208u;) zmyJO~_a;!Hdn76mctjTh69I$sGXsc#pl>oobfpZb74t?0MpLGAu`S83%J>6Tx}JH`-_XLc?4GygrG@->6LA@ml}Td;wa_sB4@oA&IdJ!XxC z8C)4OF{VuDzHC~aXMhS%=I^I>+Ytht&OJ}1+kE?2 zgOFYh^mFlK9;P2fxBd-CT)YjSJX%kexX`^pbUIqLqi)^xJ&TQ)?s-2n=f}2&xOnOd z@DPGT$PSoLNua`Fuer0=d}E*T)pENZakmqw;EUKC?T1Dc|vn_=?xdm+Y4}?8-aF z`he5|vlFY6DvYhNL)m9#8U?G6HsYf>6j2N)0M8u$0z6V0nvul~UTg)jKy!%Jd)+m!~I zE3GMBDwKPqEx<@gdmi!*PY2SF@xgi6ov4Qswp-W_Gt*HoVLT~A?H2Qs1ty9_LZTwI z^`li}Y+Z1FfymL7_)1qAkmgLS@rpuGsN$3?)SU2Xgq<<$2kJg?cTko+Ecf262d)Lb zUy-t+gCl$EyTro)vdyb45hY+PNHj!Fh#rwR0V@FEnW&joGRT0WD{>YzPh=e_K9W_t zrTGx7^v!uyBSu&$h_4hYEIPB)6U)O4;tW-p)`TtzS^EZhm{F5KGB72Q5SzCENr71) z)rs0@tqY4l!>terJO!SdNcyNqkUDQemcSg+QOWm4t_e-xldTwZ9h$eVUTUhb-K+uqI#0ye4PXkg|w=n9A zWF?QzBrAu~jauvmXFmk{tSm#~ewV1bgdY-XGQulKtOpcS&;+W1N~21pMc^$cMex;N zg}lPPHUNNQjW zm9D^{&CoKSi;-p{FToI%Q47P+!*K_9gRm-`5LP!NMsk}jAsQenG0uR)Q2YFUc?S}X zz$WY?QPEgYDlLt)Y3PR~JA{cx;gyPUWZ+0}LMhBtInZQB=m4sC1nW{b-pwSvr+Y_KmJKK{q>^xV%NKXBe>c^h5*1kcl3T(*ll->7W&FTG9x zmluopXGrP2-~})EW8uut`zfBu+4I0ToIMbn6$(;%E}pv1Fh9`gwL!^aBI!tyfoub< zWxPOZ755X1I-VLO8JhM)*3QxkrP3{=7k8{yS`EB5TB)0zz=qMt5)ew-JPpVSx&~IA z)<%pb001BWNkl1 ze(O)Y)>~DpYM*`1d3FlS&upEbe_`v9St#2Zce~Hmz?)$`;;jvI_tG~rT{O+LfWn?0 zOaBkJbDBCGVeerV|DEQ%%x8*N`X2^U#$d!jdLnZUut~w;L`^&l2xt^%0Ed2ct;KZw zk!}RX7_}oJt%bgTiHp*qfQcx#PzBK~OA=8DmCF$Xbr^LhCMX?(MrLWa8^9)Si~Edv zynS9Vt~b^lgemru2)l(K{YM022sRZ_e04BDl%aW#k*a);ljEm3CW#XsDmowt zQv$vcU@cjEA4a)~Egm11TFs_{inN$ry;-EAdAc3wV(NrT7|}WmVOIp9DXFZA<>IYQ zTyGu4@NznEZxw`75)7(ClyI%$r{?iT|J-~w-+?6K6dUk0?S;D5%dF1ZHt&Y4miGIC z0gmWn1O};WxoS-#5G^qxhV^W$5jpKCH{22L(S*OMzM>WlmRXdlX$TAD@z(R)Ic$IWB!8X7TvY z1)00AW6Qu=KXaS;Fg1Rr^ zitBHm~j<-Mcln0n9wgedpu(slIe&TYS2gPMrgVCq2P=-dtS zB@zFAg}0YW28d7}|E=uPwomOQMh=S-b{{drbuaNi=10(xM;$uKMwqL>Ab8unNfpSS zCw+XKdyM*TqV-SJJ$CU7LC#Fj$)7C`uap40reW{NL{~phNy?QWH0`e5%G+!eCSqp1 z#PAcCmP8cha?QQ5KID%}lZK}~TwdA!P=+fK_N?bJyOytRXE^p(wl+Q5Sk)pU&R|;p zkMAhneP6rxUp$w*ztaO2Ai?;*bL(1fffS?A#@5|o+xE@+_E$ph`=0CPHTJs&tWlK; z3d|WNyFA4L532$|F*-ZWw(1?Qb(eR)a#5JCF;47vVvZJ)|-Yeq7XC5c!yyu=>{8ey@G z{i&_r&=l_h?2;O*3KW4)=SKIv?=xQ*D=RP#wRJ$FuePtyZU0*&lB;XlbJ=EiHQUY!5w2u3m(DeRl$T z4S<_#z_=SOV6|6dzJmg>lemBXv1(Y@9W4lY6`cIQ1r*@OI>(|oF$_pia z2`rum0aALwk3(g$_1|u@3ri_;|j;Vm6h>Qz%4_)Xv|cQHWK}F&_dl z!vqc(j@;rCVA@^Ic9|}H$ow^DfBSkkax8QPk8=!G|87H(rai*5rzi;Tnd-6Qw^3{+ zwIoF_F}PtzlfRO#l7`^cZfuIFFO|p&Yvi6FQ$rcK6s>@{*=v5giXXkkaWkCDA^nw9 z;QI>qZ)Abm-_`qHGxi1KDeUdHhKJjEBu6+VxD_-RFWQ4NxTCMP%(R1JkHO`$@&QnPfsmH?r4N06uwWs!Qu#i@lLWwQL% ze0388EF)vx90P<1TK)CObCV+;^#t*fctxU)+0RYJ4Z*$-G>60;2G|H4>O-`@cnc9S z;%fbn@}bp=sx(=tCCP*1LZt5GHT-w%tXQk(142v}t^4aa4ag(*vXi(dxhl}5eCX~H z>rrihv1Hs3X$Ca^IXsbk9stJr@`Z@{BF^GZ=5h`?Dy!uWYFzUg--`UXABXf|Dmk2d zkrx+`e%6G~>E0_|JP#V5Zt%s>`}l~@V9))Vga$S{kSN1sTE(^6)rcJ-OhCd#HB#4| zvao0|b6hO+u^zHYX|^PJiGfY)t#p;wy?3ZU#!S3@9v6`21WiBg^L&aPyOrpz2#Du~ zb140(b7nbg{5ZSQ-dgxry+2T`BYE?A)9-1BVmWLrLHa`rzpD9_jeQz}VBnk|xSj|P zq1WR7tXd0Yljv-Wd@aK?mPTV-ccn`<13%Ds6!x!fcR8?uD{vHWu_o=1C-09=%^R_KxljZI^QTchgKlBY&PEm|PTv{ZaCW^4y-^<-g@X zzwlg-swDy&{C0#5+XZ83l1U^{Zw`1H1qW%ch&v0Vrd#&2KLq!k7?8sc$q^cA~^EdKHf*VedX8&!_}S~vK=K7%l_ddt0wYrxO1 zL3ZtW<^h+BiJ18~6%IAxe;f@9$*SqPVI7(iYp4ikxYeCu2ZG6E(-dVYgB4!f#3B1j z(V8}~dy=Gkrd`Tn(a@@pOr*mN*o5Tm)6=&2b&LOwL7D4xa5QQgE+nqq*S8FIsxC1U zK#wk3#1n}nA|?f6{x~>i@nt|06xt!?w91b4h)SsEXn5Q9*4KCOXrlwGbuaptH@0$6%buor}L!oXCa@Vj4*|HThx!1 zy>qJgs-S~YG%d8zz1i-vX*F~Pwe@v2oUZ*cRB~iMNkypHfMcEz6xqMgmSXns3Y}%$ zs6)kHmdGi5BC18Brw)~^D z;231#$r%#0c!FTV+8#7N-6-vbM#0k2%q22OsRcY370S^TgZ_o~soXor58~5#KS$Uk z$&W)4D^qDB{XD3q>(DRbg~Qd))W_2!A5Opw<_gEnZ@%&`{60YN{Z;lKRVI{Jh-LH< zAUTeskj)r&AjB-p<3(#|t_fHn^>tUAVmcfaJ1^e2{7(TtlWPEcMo~WDnvkTgW*gK& zjU^{Lpq9sa`i{FfzO|k6z@ewtO^fuO=RM%$xva{|%y6v~?syGvL=x(F6wH133&mv+ zf_supMPN7QH$+gBd+^Y7NMX~jY7Qzfr|UISruO#e3nWniu3x-2CvnNk9V15eind#L z6jrrk9MWGbAFF73UJ|{N9DiQB6-D@pE>a+w5#y}VUMPeNt#fa?*YsLOk&X@bN7ugm zZ@YT$?>DFa92))*sK9@KLlY_7ZUH>(AlWjUR&#<1as1GJV=*p^F#{9Q94N-GzURmP z?0S-jGP6MvpN$ta35ojUi3-fE8ieh%0WIQp5YN!)66Jl$Va8u}L$7cit7uYd5s}R- zC4Hsk?)Pi1k*)=|hKJg|C6RZ_6%Xeyqe-cGjtn(ohpKGJGIULnKUGsw4X7VCp*|@h z+GQW3BLayIrf8KfoFx*%qXZCS5_FpOc~XN0CB}R69U)j-QKPQ&GQ0bHS(%6|t$O%> zE7HUIDO8*X^}(DAg&MWPc;eYJ9qY8aT&jh1klmX;{wH2`8#bkfEv%1zMc~Hv>IFeo zf0x(~qjyK|E6g*+JJFf*{C5jEW>YN>Dj3pGj(WBU^9?J7r)L}riv0Pc5WX+g6J5hh zw88X6gb4!10uOuf#&J_1^!CJmA`|CbA9YR;F-^5ySm@@_(-C;K?)b9q*t7S%-3@|v z>-7*&3nB6oml9pHvQpZyx+oTBOVhEH*C!6dlVq7n5}{8YJR>?&`T^GE)9?nfBG3f& zom+bM&YYkay@<5c$LR(ksrwx~O}Q;b*v4d}JEp4a?mYyu___5cz&bUUhm#g6j&A|wucC}E>J{o!hD9ndst`#kX5UC(Me$RbAed2_8J z`%rR2LzouBOP}0}jAX0F@ z39dv!M1BN=Q1Cnno1G%Ene>6Q=lLu^U$Oko?aPp#5+abB3z+_{O2oqWyDF@M-3Y+W zwo5^r4k<8Ejnk{@;MJ}EOaF(c_`sI#yoQQ>iJhyAd*25x@UD(OM<6-)6=TzTZ1i!& z{r*(<8L=JsLgl#Kv+wm)ufxNzsB7Iq3T{bo9X2r~3grGe`_&6WPrr$6aQRN`Etn0I&=T5BSousK2>jdAW5#$bb1ta) z^UA-7s~SostpBwv=U0+g61LZR*hEUwcia<7(O4nkgVi)>%JNdig2}6>(bj*91_>GY zdYZQJYnH^aMT`iYKE`$s>Y|#G)a5N8QYFjG+c7elN$>XkI8C0Dr>B&J6JL(k*Qa2e zk*CQ6&ig63ezg1A9@Z~UxgGcLt8a1(VXgx-(Q){Mkscf zqUJ|iVJVvD!k^l1YyPejx%ER&B|%e}Xk)lsf@fvc+bthvuxXEs2vFm)et#pkkcjO@{f3Rpt061d|eTSRr-TM#P7)hyFbfQ%759=p&_McQFioo z%7_>5sOx7lRU{fx%;M&Zke?W^>&!U@)B!iIP8b@PG_oG zKYh9L;WrI1NH_VqV|RY*y$ONw5{r3>ZOlD?)wT`=3qZtYxkSaf) zyjshvD&irHwB=-`pI{!|1mOhn zXCJ$HU@LN>{S|1ai-MZf#;JZMDL-zSoofy9o>m=`Wr9@JcKLWfvR7DUpz^n zow5r7RUJnEW_f$~YEx&9at!rkTC}^Dg-kjAszoB1!|WO=5EF6G>6Y24ld`P-`IKBv>19mou>T@g|^l15{*v#9mi1N`oO0hf}2?qtN^bR*vmsu8ua3V6{YBzptO-ey%W%Mz)kMUum*ef6pJ zVFo%pQ`~Eml@|AjE@_L=32Dmo2~;}sRj{2-@7p2;a*|KnSa8C@w4APIX5cvc+P29~ zR&Zudv4YFMkB)nfwTCn_f?P@TwiX^Ja;ZKQlM>PIRJInH?YJAJ_;3W2K6nDEx0DPY zU;6#CM_)wkPEV9Bd5H<2AG&|LIy`LO%$uIqmc4E7sC@e(VHhB#4uP$7Duy=Hs$Dvc$V>OEyj{p`wh+t_B?Jq4|yM)v>Li)I1x@9uwJ9X;ik z0KadjlVJz3m&RdgE%szRtLL_?=v0ohN2<%kH;bfMZbjY z&80)rLx4sql-*Gp#VI@^ayjWa(E0^$@<;q9Hf;8$1}Fa0eN#q(f7`xxU4`DCo|_MU zGQVz%`*|P*elqN}qko5;D%Ul7I}E#$q6`t!!sh|^{05sY0eYZC%P*pF=cEsy&1|i0 z)8^YV2C6|Tz7qB@li8c)VJNf|)LBhdV5yU&(cSpIaXGl5oMNH#=;z>vWZFjn2u&ni z_+`kbq-6<1jyI~NJeZ%Qgb?ImOK(DU})llQF6Y-6x(rI@U8$%5nSiK zj0(y0Nl@R(I#Zd9UEFS%)0q8H#5B;p%cse6ABngPdprU_G!G{F0)|QicEaQjL3U!5 z@UFPH`(Oh#4|md~WkFWWe_k!qB4(&2sJKIL3hGEPqHNl!sJmW8zWB6Ah>IcSiog0C zUUh|LM!eR$@rzT@W64@6N~0|??0Fav<7hGPF{a9Dmi5bX^o=fQaXaCdtl{J{F8m3b-I>w)*RU?6w=|Y3*!SO&1cPXO-Qq_Dkj0JpDQ) zwFEa?LvopD+r>5w8fAxZsN4L*!0oCc*@Nwwgss(4E}By9vl*m5>;^|cK*jUesTbmd ztr&Alo2{u{worF1jn?f$NC5c9dt&-b;?3It!SREKe_};K&AW`!7>1>k6=aBHQWyK; zz@nyyVP-MA3~ME8dCKLVNH7p#PWb=YYl)#zeH64UcgpZ2KeHU=b{RYa^3r-YuV2=c zgbzKr>#|%e74b?MY-y;4-6ez4ly%x`uYnpWpC#W*8`zN2wJc_)I*ye`6f7wpb z0(9`t&%EpptSRdOToz)OX?Q2$5p|#c8I3saHic;ew_XREZ_j?8k6Q0bIC=NO z|7pP|!RsT8=fNR91>kN_0tJ_XHkYcZ>hcvHEuIeFpikepIwY%OIZl+USIJM%6$oQb zz$kJ*p95u%ee0)q)!jRtq?X0GScT;7Kz;Hy+*(GP(L_?ZDW*Vs)fM-7E3*x8{rr>Y>Il*! z3wV0t9+&<(OQ)VxsImx_j*~47>SfK$iMVqsSYQNoWH%rvPn2p0!=8J%E~;lE{hQ6f zkKW@mC*$624h8-Q6UxIbw$sOdO+NjB_+w8hZo{hqZr8B}TGxLk;^CIIYfidqU^fU* zaV)SsrG`PMVjVQn4(M+IpQyoGF1D}3x z-zc7!1UPJ1u*B+Fi`-A)`0yj9l|Lz?FpkUxW2kE=yFpToD zuY0^_a6rW}@kWKJltr5>GN|Iv_R2}{!*}%3*ml|6YH%dUe9zS3=jbQ`6k&PmWp1GLpNR-s ztM1+Yy14$zW0w#~0ne{d;u9lPnQNUFfK_bfncvH$1(y>)_I+@ZxK`5UBP$E33qND1 zGXI6opbRd1$$qRTT8KKAWE};g+ZSVOdLTMpSS;FLSzMaQK-(NT_cZGvMi82Y)DmYr z8%CS3tgy_N#t2vruFewmp!JO+iXzo)&>u51K`a6L`|Ssu`9P8Vz|}6WffWXdtQg7@nieb>FAC@#t?feSeDTOmM)G6YQf+snSp6QJur5knZhKGwj5Z9cYwO zl5i42S{gSdSrQHl5E^|2CDMX!b4gsq*fCeIcAQQJ&YhI(@P`$q)p1o_#`{th5nCNj zKb@XJnjA!B5C}Pn*X`&>8tR-MRE%N_^B4~PQI9W>Ssw!qc$z$cLn8AXHkAYq5lh98 zR1B*r=iGP`V6I7t71hX3GM99taw|9kUkFm5NXg-Z#G_@<4t9q=QplqC?m^cs+Ye?k zJ>3n{t~{VUkfi*RRWmLmS)yT4Fps#p&PZ1?qB})OT($H}By*v<|Lv@UF6_yLC z)wKi1Myj%CjTJJUbl9@~04k6*3f3yNHS&KssXPhq4FYKuZe{2<(tL2F4Mc}BpV-33 z_mrC}Q|q_cGZzW^= z{3=N#>RgVj{YWyb-Z_|Z}PbCZc;&}iQj1SnD& za-8VUu70S$`24nXE1XuE2R+71yj6g+1HA-hMf{Nd`_o!xG24#%?G$-WQl1dImwOgwl{^0^poV~XyC&#kOn8Z z?h<#%cqsL~BXvf*t_43w*6^!oVzfrUj(%3KlmD>?KU{ADS!wKH?9r^_U!z-{?lLI^ zFFC>-->!(!yerNeQd~V+){>`tuAaZ>=dG=zvdR7}RoChnk`E9CWf4ybvR6ulSCZHxP=yl=PiCXb4^LzN z_nmO(lqYA(+YJ*}0k4z28MAGb^FheoA~t^3>MuRTk5~^*2T{)4RLVq0T=6LtwUp7O zHLpy{25b3bAU1;H2&yf$b!f}~p9L@u`MsklUbd69hv5`E7B&HvPBbC$3^rKKX~`YK zTNyz~QIh|24g|n2|9nwP(o7c-El}vDk?Rahs-}3iwxBh`Hq4TvZ=YN z=EB9o#p-54Wb#Zgt!8=~8B7fL-SR^8kEm;=t7k{Jx~@G2!&`*V&V|l~=8*;c7q`l; zz^ivJ%>XT!fIFhRc_gVnhrE}(rNOywRJFtuO+!OPut6{w#Sx{*Yg&6nQEu}2$l7h+;LF!dGTsnhiC(Vs{?I-i zLvK{+8$xcA4k^+)EpI4K)?%JD!WTK%|JC)>f5XzW`QrPiBG-e^niFrcS$(uW7pej$C)Kv>MGpZ6ZC96Sb1Ebm^%F4z6hY+ys&yBp8V=h4qZPtWFT8#*;>Of^=~I}urkqFmnQ=(k+& zq6y{f0#1iYm(IA0x6k=fAh;%~Qq+(MU9y4>Hs*xNUwq}Hx~K$*E8pa7gWG((W zeBG}0O9qaOqNusOS&ohinnV%XidcqV?6z|&#Iex5Im8T|^MDQ{Jw7r_MsGX-{7U-z zq>=P~(ut34+NyLbOa0&BP$tJdh?J5ZT!1MXO$2Sa)LM59#H#c9@A!X*EP?C3$1lLi zV-_Re>EO)!v+(Cop#L*Ppr7AP!L$Il>om}ToaE_1e@xsY!z3^6_XZuDEElpmHGO#a zh1@LGwJ{QR5qCM(5#9zyNnD!As6(3>x5DC@X-FR^4FMnhzaDp%^JLn5NNyb^CQMtt z$t%)J^50&biryiUnnCUq2Ao&=KFc^K6U16l)Q+nM7l}OcL!~`L7OgKCCR_gB^2`kW zC_cSMau0N8soiq(fbMf?RZ=GJ?ehB z$@h3V%S+4QAc9QWzDMAWB}fW&y5Nr;4(}YN;{e+tF|L<1KeMVl?HO3&8v^W-VR#y} zCH)L<**d)6gErV)H{zE~xkE1LpMg)3wLJ{cjvffV`IqCD@w>TQRAp7Ju8|3meR~4m zetx~@eNv1rM0cnw-e*||am-jL&Lo#ba1VAMBU^4e>V;}=ecPTlqf zBT*U_#;X?kSa9I(0*IJ??Yhe2mO9`9o@JjgfDjEGFL2o6xxJ)^V25})N zOIbr>RQFw89wC^(GHPE?li%OzDH#ZWBtjpJX&*W@>;ujfOWRWWTj%_bxwLXI}f zuWFKyw^-5fz#^a@Ca|zMkfNCCe_gc${u%s3-o$~JC1A0N*G=_<*$tbg>HgJ4un+=M7nAuEZDHf;|r=F_I zFhZiVbI@+0Ia2;sF;d3X$L7OI1r!y!Z4^8!R>t2lrqQ=g_2C`FjmMqFv8TvUlqmk% z>&#=w{>>_ka4NbQ>x!E_XuJ_O-5*lM0>??mM5h61Kk*OU3Z05-L^K)91CnSfdZBTU zv+rQcx_SPTi=y=#M}3kD&1H6%>)jR|KLnpcNH$y<2H%yeGnrS3*1N5Uh728B!v3&{ zAK|EDEa*)Ru`q#czo9bbf4gbNnEEZGcgRAalP@y!$k}keqvR|6zOps`Mnj;+=J$S3 zUnNBE1P;nE_&~H|XF*L7?p}o6pzmPyyid^+ptHMVv%6q(bKz=>_1fWNKk6u$3^@8x1-*ajkEP&^)8v?_Yh zP|P3E+x-oRac_|_2p`wPt`DqitiY?)f4k~h_z;#DKqOfdGcLHCHaZ^LRjuRTbSvZr z-$OP+2`HTxSDhwPLw?ym7$Y_rRm@uZ0qGv@NVczBR6Di>=X4>L9d$c>OZTfWQg=(k zTK!Bu2HTJBE4RY&!~S13uWb!=`+-6;EJQD^-~H36w|~Asv+V|TI%Pq;KnF4}tsKK_ z9|U5(bOb>F=|c@cwx5(QRn2w2Oc{ZnkFK4Nwc9g4?;i2;Pc#nx_reb_ zX&1!Z-VDE7W5wM+k%sqQX<&#QU>{I5-XduE20|rFn;fTsu@5X_6wv?}4rHXnurk-|P6-p}vF2v1RnXo!10WX&CSJQ+k` zU-&pwf$rDBJ*{g~u`4dl>0x5cV0I0jOQ=?6<|Stwk+f=O$cbQ1R^qiR?poTU#;R@E z#iEXb?QFU-$+0&u>zVdzmYvKd@{D+nj*z@~cU}}R9YEu8F}X*Xb}?X0Msi)!gh7GU zJGV&2UGPlP(~*!zt-o-~~okPMN|-r%}uk z!FE0{FlmQSqm|@zB~4yTi5X|W^`x_UILV9y%`rs3W~K#E(ufH0Yv2D@VmX$J+yPa8 zHEX2M59tK+GVJ}#!DKN_ojs5~RD=Kt2^PdF#mk&y@1yx6;pwV~@nCSjCk zfwSNdk%h^^2Fcj+LHb(wFm`cBBPo-)gi`2=6!c=q6uw%ql>u@y+6DnLE|UP*#GbW^ zSk+?)`>c8{F{5-x5H;7p%v6g@4iFC)TLWOHf_cS( z4qTenvQpTdr9F`1>RimNN@_un}|f@vkf z_kz{wmld&AWbpRNL?YF#1NZIcQ^ZUgOv1iMx^{!+lr%a9Zl&zpWE!*wxLOwOf^;&J zLJWo(J-*nU+#uW>Bqll}ksZgHR56M%6KxOy4nCErh=gAW!X3NEmlCNM?!ia6Dcff) zMVFlXRxRE7Sl4$LHpAce^rGv``_F3$wKk3JiB+wVdpc1t!cKq6T4qc7t!d`TEOT2a zm{F4vPzox`DKRDWZ>|>(wPNMYD!x^+k2;^8R>Q4_#r=yb;5w$7rBPARtU3HcF|K&Di{4auKdVO7S~%i(xC1kNHB+1~}Z z;t3PcC1Im0dua)1PNZ(9FRg;%i$*rrzX05dU8YC}Y zf>l!{UQ(il`7kji0K+-F3J%YObmt?fMG6F&Wt%1P5mgF|yiTXyR5!4*rNrIffF< z*?Gb9_94%iLF{gtnb_oX30!scbp6jw1NEDmW0z(kP4d$Qs^s4gf*^=r5$AT6$h>+G z64%YB6b))J)Ui5m!n(RfGbnu!D8?P)m)CTWQI%CB`Zi6WOgU6lAq##Np^t?0d%jNW z(>Lm3+c;i$LhNU@ck=HXHEZ_cdky-OwEqHoJW=f^{v#Bo7`FoikU`usGWmYxYYxX}5{4^;gXfj7*xL z?AO`0RPNT>EbWmCDRS^Qgdn``ppL@YS5warlxVYp2p{Bu| zk*#b4oaT`UFOIA$G5d%q@Z+$_EH`E^nSQIZh&M#`qC!M;hI6@XHO6e1r_u|9tf+8^ zqk^J0WNsqLSb6eccUBo z9CMBeJhK{YgkNU73o7~;vR1M*vY-Z!jF}bmbS!nZ!=$c1pk ze2Noe=~HRvVopXXOBsP=??$IJp5Mk(Ny|5DA)1~=&rk~KT;JVJq&=HG7y0Iy27a8# zo>QkfWf$!Q41K^I!RPuXi zZWHj9S>GeG1rZOboyh`(jfRVkGQ7gXkBM%lR7AYT1D$cbDwLq+u|36o5g_nwXX(3h zcl`#YY0h58{Df2;9S2QzK9Mdzk?yCM{M)2F`A0Sre%na-eh&D(Q;fa{dV41><`o1+ z*d_5-4I`yNS=VO|!qy0wwdHs)+^`N@=2k{e3G>FXT3Wm<;gSI`mzDm4GgVEm>QLZ4kf^W9P5mh~kq>JNSNgy1l$^_Zb-_;H?vzI9_9O=pWIxD@LtS zYflV;$qf|4yDm(&xIMWrfpmWkdxt``CFKoOSL!Ht^kce3dYBWX2$y^Ghq)!*sUADR z&l~mhR{T*{bH}JOlB|za86PP0wz=L*JbF4pF9FNPp6*tGA$2aF`VE&RcM7pu3NQFx zyhaYMEaHavv8F5^UHVT;kJ|?ZsJ(|Zx97(Yfwk|C_JDHzz5nM53|?*lKewX1ZKOD) z(JLdXd(2Jf>M6}+BCzst#o;1@{m~lf{tZ<0x@qDKi^7!!LypI(;HToIx&ioxeR&Yo z^MgHT;UW;J5Xj*;MCAvZ)#I}X$Yr@KNZIlcVfP}QV^*MS)0gpE++EHm1rJIpqXRrc zH@9k!`7i2+jzT&mvY?yIOJ`^z%B=2Qz%KKGm6CEzs_X0O^>&O+PS^Pb@^%IxYG+nw zO=N)qg-8$@>0DvN^)?sUnP{f)VyN?HoK6sJjuqQES%GE`?_e2;(ls~lQC=2oC8x+F zM@u&OD$T>S(bS08pgX)315cl+&6AtBE8Re8l9l#zuxAd-so%%+R-TF?Me`4mkB4rs zrr5Qdma3e!f-xNTu08?6taBHPm8HQ=?JQCUsU=U2#Au0$sTX!N>{Wkk^vIQ0bod3w zKdVI;i6`l;BAdun>ARsH0pmL%UW~1*ll%bB7##Qnpu~OHZj@FB_b)D8hfI$#qy-I} zqH{~%#?v(2r3hGJbauCg$JRf&vG=L)4YT0ib77L`s$}lsHtmycOx8-~ zGsU^If@DjYWnhCD33@bDNh1SDy6dz^7L z{tCD~MhO5zLwz)EU0jwi3|%tru9FvqeS$K}e@tFRZ*l*D^@HF<&`famXW`zIk7&QM z=@G{4O-SsggeomSOd82T>OhkAj(GwjxjnDy)?-R%MvG3??Jxzo*46-AS|IOl!Z&ag zpUc&};kHXy@_>?J9oK{T{i7&&WE0a*SXiyKehzO z3{5JL$H8cb-EG{lNO7K3nM_rU{;Ve zNolg9F$7E(xOu+w{e-w4%N@^c-wnS0$n&!Oid&2OvkP4juMG2A_`pjD!;btf$$Tg# zn&_It{blkNn2_di;5XxdPwOpOF`JPd0CrYhiyTKz+ktN!mfSDta-VKj`|PhEYqF10 zSREDVhakFM6j2qs+!1Ea(YpjcGzb(4^>DoV7!mhlxG!?vi#Zzfxzu_Pdc_*`5pEpO zfNQy4u8>IQPk1NE2BNquje7L~#Dg!AV@J}O?LJ?40M$$AA! z_VM^b09 zswpls>3m*Z-pe4#XW#uj$9CYCVC3L#ooU?jN<<<6^P(;6aVYx;m=l%2ows|`}R9Z>JU&SO;2H1aOT z+ju-Fv?ot0;M0n*_#U|#A4QB9%${+krm#kFkf~$Ht{2%yJ`-6&jE9A>bvU!`2bpYU zb&7K9hOPzEOKTKUtLoLz!ncnqLKJeCLPSg4365{EJ|s8LfM`Bfh%4c#1!1cG4xvG9i9l^BA=q)J58h-V>D!)>=!vU*YduZ zSETXCYs_KgWQ2G?bN)22bBd#^A4qF>dia`pxOyjH#qjHuQAZ7OL-Q=hIk+&{p@WNq z?R^!TxfP#TBQzb1Zs^ny2Jnhx-liAvs`1WO#isQSFpFiK8qD ziaCK@A!Ar-Bsr^Q^$B9TYv6Qpck1tC>{&urXIXK5T;*2DLc`l$Xf%^`x)Ud!`t57F zl4|6dueanNwpI1z1u8~zee>@4GAL6VaMAD#PJz9kT8p$Dr2Q^~eHFmotDKOER!5A{ zajc!Yos9{n8>oF;C7m6iE`yNc80Oy=B>~v5o+=R`#hZV(J^S5L~5?~-;qjspv z#Uq_?Nr^L?hg_qwBuJi%^l^s~0HGoAmje#(Rg1e5lWzi2~tX zevxhql-C5RK)$?Y8P2!uJeI5kJI=4v*4^{ju5n|1MJs&!Ht1K8FC(rq^!}rJ7dB^>#WV6q%yvLEjF0c{bVaJDvnqQG zkB*jsE|Or853-;xN~jTffh|nT!)h|)cjUEyau&s88cERVNz|}0?dTX6Vkn}yLKOFS(;VMEpO7A>2!1Z;oeAu_(kMj>ARTNb z6d=myfe`Cb&HiFQ*cVLj<*l94L3J{%6B@5q@Jom>4k0>^)!nw|=1rs;M9%C|vA@yC z=aa&irgI{4;ha~u>)g%Pe8)5;oe`mL_YiwXmt{`)NgRftzmz3oTh@*aV5KoBhn0ca zwJi(&6=#9@^QSelTi*&Ca+5@x{Jlh~1WvpscTgg7(Wn@T!knuZ8JOS?3x3V{F$fB#vNi9pD`C!eV1;f&;c|~o723mKCDeGUSbPeSTYJ(N+>WthQ1muF*P_2JRp(c|prPWR%npeXIp(S0! z8VjkC*)g*V`1kP|@q}0w14>;i=VBP-*;VXJFZf5e3jUB{QeshB3BVQF`&%i_Md*zE zD?ouS%LX%(vML)%kWEo^A2Yy{$LnM*da%@X%$&guXNJfk%Vv6&c7O*ZZ?B?Du_IU& zl+JRPQs`hnuu_^ClSQj^Cl^E4IbxNC_9dl}4=K)URWS~$)6BrxeyOi1SHRZ)u70}O|756 z_W%LDYsD;2xuZ}80jC{WeoLZ?j!(DY{dV+M30RB4%5d~{T#T{9F2m7EyIUT0F+5Y| zOt@}@8qsqxM`LV^#w*bi(Wr6LfqFE}8qAE>WJgJZ(K=P;!`g3c5QM z4gF`BD41KTweXBsuJJ&o^PnA;41=|=qW~`bu=|!e^kcbykHl^57%^#H5Y@Z1rB~%T zURJtRjT2i8zHxN@Y3XEiI#?Q_7XB=A^9+VQy~1q&e-;3wrQVl}EwgP8)ft3Y5UzFi zQFy+bcy6{G9K1YN@4J3`Z7DETwhwIKq|oZ5k$4?kf1tbz3bT8gJOuL_0ejy`UZ%gI z^#TDJDrw^g)Hx&D`%plEH<_s)h%-R zdqZUFFTa*~OlGCB|AdLlB-GEP7p9G2~EqgXTRvZ?;417?LI2*vxlMz%yiV@Tu7}|ga}#pSvVS- z&_B@4I-=k{mhw1;iFk=_`neV zCzR2j*Iqwr$%^g&kCEeD&V@-7%{EogZiHea<**&o$RGRjzgDh6%c~ z{s}ZpN~b<&Sojx#h!cs6X?gJra+?u}QRh!+gP*+7W=z;09D}F7M|I%NDeHVo;_1TD zvAHcq!zJiUx-A3h$ueMA)L3&Ggss?ek(hzI+Q{_1h%@MMklkERaYho~VH48*<6bD~ z6GF!%4s+Re>Q+(Ch9O}|@V$~I6BV#W92xY?&60`;7ZFNb@P3fQkDf|Y$I6CgIqkZ0 zJ2mXMtX`}sE)1y}^UsfCcJAYT`5HOI1aa*b7Dv0yd)dlEzo%f_OmJJ>1J|P2^2gu~ zhj8Y&vFzu2b74+Zl{LWsbC2W8?sY+pP7|%t)gM&|_QJ>ThRc6#lLFaF0=bskg2d17 zz(kK3X@4R9s<_EZ1|GLH*7uj*t;dvCZs8m1fL)(wK+6SW$3vpi^Q2Cmsa25uEOC27 z^$ge~GtdDPT)hiU*pfX3Jja;>#=;JA z6lmm2C)%T5miA1|xgZgW6A~25>uzYRJH?!$7*A9;v06L>Jp0w+JkH4W6ZjPQ1>M>S z!mIw0^mXhrI^2GGhBfktb2jpcgBnv$6m1w6ULqgAHtr_%^^0g~y?3-HOv;k1zm$;a zniijnCYX?=BqmYAtg*gi>_d(r%wszkn{u2nY>N;uVXuT1%PC{aBMOFi4Y-M#i@K2) z%PC1I)x9c6jppKavg=vhk=zA4GF?!%Be_!7rW|pMd<#Sy&xE4t(@4O8sh>yYq``N^ zg;7Q_uSYeP_k;!2eGBJ~SWZ++H^*Y+6J_N6pydNCFue_bUIPPXsl&WHm5XF)NF(#O9>}B$o;`obujj&)F>rcgni$G#EmI zeZA|q;`;utlMhKm;dKA!@aGv8h|1RwC+e!{>B%m)emSBL?_Zp88E4V-=yT{K&6{iJ zG3@-Y^5}8=)idDX8Q|^_LP~nQ06B@uvICePF5edIkv@;Rc~cMypj?SB^AqzTJG7cl zmVu?Zz--6t#7mvN0}o`|Vhiu=jc`8!AD@nC^axn@soh4+<6m94{z;v4upkw^2f2R2 zyqkzx4g462pYepr!>uGd!j4>08@Lt=yx&XT9jBj=*sA4f&Nb+zvi7BEjc!gwR_vEw z?Ez0Lejy8Wxz7|`eARrTiu@%}TD{O6eND5o}(t-2M#<#*2(eAysX2KxMZ>qh2d0A`W(965X zPjj=!e>>n*@V$-4@$_69*r%Pr{CJ<{xU`h^#JyKD-z@Z*5zU?Y{J(wZsC1aUTFl9m zf8(a(o7j&mC(V@P+}H!dO5$FLLo5(|=gX8npKkcDZZL)u}TA}8cN34gf=j$>=*Wxjt{Nl%s ze@r7L7V!{MyINKIVwB$@9`UbskTBmSviCC1tkvFQ^IGe=dv+sOr%<+}WH3p*8WNy7 z#-)BC^FR~Lgk`K^A_HMs79V+>@X1gJr}1^77LIGgU@a+33vL7#NzJH7n%sR{vz^`$ zO1+?>kP!z>hG#8%5o-p)T)>Mq{udcM78NIrKE*xC)G0@*0J}1KDmiinIlLJ8nnZnY zl8WMJGr%%rS8?A@4-`o^M&w4`p<K8(;^6te=+LJ&P969LQ@CAjXhI(Q&%p9+QW;0)R@ zI+r54jDk@6NdC`hdY$pVu8*t!EBTLZf98VMoQ*bO0z@~t=nUS9jpnj&E9K^zD0gk) z<7ZRrQeAc>Pg|Cna$8}gj-A*Cg0bFW^$`i0H!)qz5kNPyp8?xsVS~%D*q{e zxbHP8pJ&{~HD4I3LT=!p<2Xp{q~ej}0ppD>fzJy$2d=M9mg!EvsUg< z1YAIgoq~VCH(HTdL83tlzrr9H1*BR@s>~g!-KyoY8+{Yp1gsG*(RiNRAf%*FyYKJy z|7s-&aluZG3(Clb@U^16^r`VsbN39%=!S0w?qoXr$5>Z{gQ#%OPKT2oZR7UZDPwMk`r-`C!om&cuBEv&pFR#I$jHA!i@&T}OXo?L4mytIH+c@SH-Pv?$-8=3J^0z*7`WjHj}MHvmNj<+Vgf7xAT673GPS>g1UW= zfSp&K+^is!#w@~l|A=#Hj2tm5nSK@|1v8VH3`ocPKN!)AebeT7iT5fVlh~amTPMUn z`3G4q8uc=c+5h&RdYb3XENZd-(llr`e3B61EX(hx3~wg~g!@uM1sRfu7W7fsqyV+l zkjO5L4C|9t<)6exv@*wrlU_GV1I-CNqu5@ca%OH41J9~^yuX!X?t^|axmz?=xP++YCK49 zywQQZ)}2KQvQH0to_e^fs<~gASh2JmoS~$i2s8#Ei?FckRb|hxH+XbU?$KTWf$XHY zac>`<`=#s8F)AaF9v$4iMtQ85t^zj!JHoeHA1!bBMy)NvwIHMXDINWMri1STvR**H z9JjPUII~?Gz8nUzM3!ZR{W38eT8c`pglnIRv(-?*`q;lwGF9$y4zs`0CP6?70S{do zpy}ad1dSD%S-*}(uNee8wkYh-lvieK#6KcQ9;E~r+G0l^AMkVjZkwA6PDA%K*TtDW zX1^715g;jVx_P+JA9gx$u!Btif45O0AoGL+;tXBD7kYc|e|&t@Lk@iNmqk-hgK8L1 z$;VAVk2aH!&vDit_CcHc?Ay+=A4?*H&CJ99@~Fe+uv8WMSG8p{_w-|d9rVfM(k@x2 zPhs1o&xC%7()-gt-N*c(>Q5QKMh8oIofj?w((z}|6jE~;rJNQxxAY==%Q~NU1!kS) z)Fcr=rGTl$b_qWN(?(JJM;x$3f^<^|HPCAc2W>m9wrjAP9Y*&prC-c4Q39|aON#)8 zFw?>R=wsPoVAU^iFB=&3Qpon`_d33VS~LC`RghwA>}Z8iaJ0ntE_Ylybz-aiTT80J`?uCG;)&?_i&=u&2azap^n*w!_YDtA&>hvvtCiscTJF?|4yl1>R+69X(8)T+gG+OE^Ef|+PElVbAqsp#NRP}lRMdLBLPy_ ze9$Wp<@t2KRW^}Wq16YWSY@_lgc6C8n0M(O>=1_`KuU?@`NO-|_n1J<(K0r$TQN@y5n4E|lX+?hAnUl}v7IgjUW7_|vk2%Hwl75QNz-)2z1OEq z+vq&0P*d9owAVic-t||bwjv>Q1EqF#l0$kUDTeB>#3U}EZlgWg={#AKI6N}@&ig|t zQcpMwN`oo^mXKTdIY41hms!P^L7}07TUIO}@qLgf0z07jHy3rU} z#;9J!IE4yY#I?&ux&z zrCwLa?)z&A>9;b2HG58X9+Qx$m)$0kdrzaJAc*>Hb~52CYOxiT#H%WoOTH}_{TJKS zVg1g~&-FSmPeHZR>YB7P0H^t$q>>SVMtM){?*2&II188XlB|-r8M~Ite!^(w4!yrv zQU8XKnB*=UZe!mcrCDQUuYaL;$-~x=>D|vO?QAyhT{yQdbFa91hApl$H?8}FjQehEESeR>k~u_ zV<_gi?l6R|a4k zz23vn`xaCF?D91^#bI@0C6q|qQ||@dDx?a#|qt! z6H4;Wtdj{g zL4Nq{`1$zHeujbq|6yIWsALg&fvLX6xxwz>abc{!SaAysH z>W6qq_+z-wh<@p#1Snc}xDzV2A9pg@GJ&BI!aawTty(TAPOVQH8cIz?{(urIOqZ?w zvO|**OU}Fy8}-{)x`J{jVWV*lZF(^(ENyF4zUwbYc+jwXhDbBvsNuN3To{5|LcPTJ zT#d?j>xfdkpx8^xVEsf*SVT8PiDbhM@Zxm|LZppKnsc-TPG(`o!W7_8igMAWM#tmR zHR%4PQzuAp73=?T8}b(O*2Bhv!>6f0@4tiUrU2 z4LT^%UwI07hjPAJeQ`HEFJg~YZB(z#|K%g*CsE8^ZoY{&Z> zZa%;+Biks7t}2H1Tm$<(6z2Obw7)2ehc_xBWPH=@<>u4J&o>jfx9)h~GjLeP2px#0 zIFX8~eK=?&$CzVLXHylj_nIuw+WNzdXmD9vjlr=xE^2T8o2qyS8iVL}wi(wMS9x$a zEaH}{#k7w0HM06j6Oz#cZakZL)e2{%5!z~sMnn6>JA5`RJ;EA^@DwO&4Z~QncZFr- zx~gaSqwN4zAf>Bcs|Q<%o>8bP4%hK#)iFYvqG=VbE&p=6kV9!5x&?)+px$4!K{Dx0 z)tK|?SzDGhZy;HYc(9+=0~KrPqzVQv4ObNv__g@&&0spgnIDL6x+&4>Hp1~|?gy#w zgLSAZV3325*HBhg)>Chea)rxXn4r|$VA)lMYxhym&K)T}r9C`rAo|V=QG$j6f#XU9Zi-&+n1X{$d!oftrIDXBOgO+}`F<;J#IB;V zf$4DLv0cby!`=*4dtaQ~-iOOWb|CXH^Ju)yeot(Z(o2AO2cgVp{P#iRXghOIm6v#E z#8NoAZ@i4C3D6HQ{q#EdPt<>w;o^y6Zkgj2>}_Ps37yd3-Fm}YzFhJ9WzXt4&EC7U+tS&iK<-<1!=dZ)-3F&|`bzusnlqN*d53h+i=XgK!b@~J!l2?@r zm0CyKK_p0Wmg(p*gCC(r+Vm7+pqz#0L>oP(;Hz%?MG7$;n}yYIDvsau{$57b1tcLw&d4@*aI_(TQa54k|C5_fD~4JsFe+XOW-zs;WwakZ=21yg8Ls zT{{Y0KU~JO+-B@-w7A`WjcsHm}|Qz5n|fSGp@SeXfnh5eT}Ift|A zlT)#|MccB=YgzRk$IHh5`XgX|A%^eg;>lapOc#k|L4(_y66@c*B88q=M8Atn+VHQ& zWRxU7z1_|{ga2Ny58wdzM{pM!-sAY=Blq@sa$o?`DBzv5rKKf6(}n$b^5TLq&;Nx^ z_~QxCKa2~wjxh_nix`<7 zlyvib^bG6@#Eupeb=H3xn#V9KA=VuA9gUzV*ve37x$7b}0@uDaTssHl5@kXWw#q}= zSc3}M{DM_6786;w*sLuV1w(x0?D!`i@Bo_PAtcns&;sOKyMl9Fv%dNA1wIfU>#Tu4 zzouFlbwDy8O7v(;15v^1#SWy!WOPSv43T5GL42OOhf{OtNfCL8+ zS3bA23UGF(0nCWqi;=?kPO=Q+Gw4f$hw_3M$3o``sdDJF=-j0k$xA$OPgrLX!b>fR zC-AqvDN+-}a|q6v534RBagys(Joe8b_<+vU&5ZKP7Ls<&hNb&F;D_TbVSZ{@o-Q6D zr3S-We(6ESKp@89L*)Z_nK!Z|UH!5mA{Vt~|LAm;hdkL| zZ%}m)EE7hI0eaSX-^s%v{!E}ux34Dum@jp)uf3|Y65kiV#c8hI8K^*1{GKl@QMH@X#%!J3Gar@^{@7HmvcB+yZwPZMm9J*9gDX z0+4&ceheCSgd!cd$>3KFC>6z^xVtjLXYb1#Ix2?r}i+z zcv14<+l5zYpr7AgqZFWO#6?fsy1@Mbm$dPOS(byXF0KIkKUdXe8$$fbfETTXGiI*OUr)D)oxO-} zRN{sYlMBd^re5&}#Nzm4#kKO|^97uxN>Ufd*k+@G2D-GTF*37M#I*-~lQGo<^C_m| zyDaUj>G`>=xPPrO36Tri=A4{^)w0P6#o^^GdC|3YiRK}2RmEyrwKs8#BA{vfvJ*Pk zA54Nr*xv=_j-!OMWu~n$VSA4MJjw9KN?b(AS)MB0Ki=I0?kItYJ6VGlG%dc*L)v@Y zZwO<c4;ZNMU{cgM=}GLTy-dID5es99 z)UEvSbheQfvx4K{%cBosknq8&xUJ&DLEB-=G38n(apZSNwd7`IB~;eSWz8zOWp<|E zqk2iUAx~kpWO>rb_DPos=R!$L5t_nD_cl6ZL9r6{C<%^6vAzzVXo1SaMnbLxiw3t4 zNh&ptIrd}r1h0#~i#`W}Bo(`kcsRJjc==RI_h-SlJ9Q;T0Oc5j+6h*>Wjm5_STdr` zXu+5fd+YAdNeFbcp2$rm6Mtqfmw`U5uTExN%jk4W2bg#mLN5M|=FtWO6Xt zh5o+FOgI_Srd49GXIJr7!2Quezmql@4;+2>pYg&wZBO)o@2{ihwSw`GB3>=-&n4z3 z##5u7TRz+g7t@5jz@;HHh)ya!PW9v2>#uDY zGWu8hqP_Y?p~|>iWr~PM?f(@TT~zB|8|~BZ0kiGuOQ*_mi}OLWKmeWNy-{)7qI%2l zyTH%&eu`gBBB}=Y(X2jZ4>$y8*{h$Ee~3pLdZe?-Ry)K&Iu$xqWo{1;(%{I?2IeqU zT)DBLY;m34a=HI4DVeXV!tIz>Lt&~1xExWlEf+-m0=y3*xE-D@F9<*OrsCh?BYZxr zxFbB!%>T~|a5>RyMwczu=YT>q9D_wpQz`>&+P=m2*!*FZoY;l>hWt5d`xzecE~3-M z?i@h*h`G*>*LPws<{)R6QQWVVQY@ElKr9tEkh{#k;AP5}=SV+*CJ~?*K5G`j6{e|P z*NQ=49n)whb=DjoN}9IX(&A~zT-?(2ki@iI<1+hA$>y=H}{ltDk;Tvnf z6norv6(1+*DMwpRyV}W7UojGpY^l%PTOJ(w4V=@pcB8@ z7wPd!UJx^kX|k%}ZhDd6@fdSwXY0D-(c|smGI2*Sg;dc?2Jr9?8yvuVw$*y7dLeX|AA0{Pi+Ua;-eqMSPx-@|lpS%J+(nJ!xt=K!wn`ECDqXgKmCr!VDG{vt z4_q{hIJtd#eqpO7F$QdlU7PQrhkHM(3|6p%-d*HqOi=^OVC4|2exH)?+ zBkw{uacObrFs72XDc;epJV{% z?H$^sg7J+7_c;JLCq+ohcka(^y! z@*CcxT0p1ZN6ENMioycABBT$rtQeixbYO&K>pJEZ-&Ty5jDi`8JmjkziH8iW9=jig z2H){!x!!e7!BD=qFrvKnwB|`_I-Zy{IaN&ldzJ^%uTnxfd|v*)kWs;{##+Iw+ftd> zlQ|OBhG+Av<+RwG0L@Z`;5G^ibA=365Wc$AY#euS`>dpH(tLxc<|6w%wq3ApiMq9# zPUEa$AoNAbBGLu>9sGz0NBRI2FiEz3U?I1JUj}+nhLUYs2?qtDjgkXIMqGCX-WP|+ znMlR5N8_@Kvo$5`nP4h-c}Un<{gDT=A=D;8JUY8ljv_jSa1 z7@YFSuHnsk=-Z5Gh2Kl3NZ-+jI13sFXW2Twuj*PuZyqa+DG~O?A7th)~0AX5zY4~~Ul4pYXa#_+<&1`wY=`uN{h82J&s zS%p~WGWEL#Fo6NFNRVK>5Ab@_zD2yodZGu4cBaWaf_u74|LO>LABo0Uv_;D@fE;S6 zCZhs4*#Tes%^4jdRN=ZvF#k;rmnU!cl4QGcWVKRF068Gvy;x@IT&gSfq0z1!i+W^qZB$KpUXii?-KGYBY8SEr^o(lK?cfSHY zkn_6zr(~6UZ=V=d|=&r<%jL1UaUrDZ`zye+^Fa*h-h12 z5yn+u!oMeWJT`*jpIXJ|0~efR3k;M~mE&#Cgcl2mg-DePT#e zihtt^tO5}#5f$zcG<%?eGVMcJ=e|GYa+=HEsm=#hoVR%PggTHbbDsu97>dVaRcLF0 zP1X8^Z~_b>Yx6>}&nZ|=*dClHE36+sV=&&`@K?HvF(-lHo7%Xv<9y0mEzd2^H3LEq z(y_~rO=IZ(IY(A+q>mYhD}Jxnq%K}tY;B$PO@UTBM!rf#(XBG1AB>c5!m&LE{)V^# z|2odfaOENY=c|&21UBg?ORGp)P$Kis>&*H|4NG+3{)4*TE<;7W2FsEjh!zS5s`<3J zP!qv4XqwUA%#>pc@qJ2M$+^khX0@(z%Pkq!(~H?{3vH}4j?=T9R`4Gj@x&Z?xJL~~ z8+L;XXAdRPFpsQ?6i3+#<1t@MhPP*~T4!yGTcR0}iHR<(FyiDST?=dugC_lET)&am zo`dSHzXosu%Gvey7gk!{NPAug{co2($7%xZ35$3h!PA3E0lN`42P3D*q(_5(kf~u0 zd%6Dpuy%nagAQ@Cd?>E_0)>I_imtUgZfNidvuxCvvfhkyOA3|)#u6+1u?f^&6|0d-HQa><0ym`6nkjRP-v_s8;|eh z_*P(w43jLHLJt3(G8Dfu=}6A`*Vnnej$IrMM{Y6QKq|eNbQ{_tykw;k+y8p;=5n+! z))^X@KOn|LUSAq(lT~^fc6s+`Q`uA5axqe!Po!1i+m7G?CK{*9fN`eEUw@2vy#zORz4@>QA&6Gf< zpZF46Sk{_9*rkWFL0vN0pEC zlQI)|%`KA8y7826vPKiyz;yq#a@DqI8^fUYa6S_;@=am9>Ag0kX<44XI{b@?F8Ov| zw@=)6;J5XQKnY}KlU~mCkHy@wV@Vw|pC0e;r8`h`xoEz}4)Qt#GSfnIW?JFd;Ed~0UOc7`Xy6sN4h8>q5G=#n)P0V-nS6C{K8%M!YWtvw zL=Rx^yZy-PK(5y)Wp*8Rf{2uZ`4PL{0b<^2abg!vcLMD9X8^w+rS8C%N}j3_8l)-} z-rGJMq2@LN?R{6!l&A1smr2*2yO_mraj-{ioF@yp;6``KDy)`gvvQ@aE6bG+3#d}H z+g>!$9=Ss=ocT4aW=%%plct4)qA#yB@I^hn(*IERqYl&*0ZGhc${37 z_p8c2#q$?uNVY$e)Ah#^`{>7%)A()Hy8R47shr|zw98SS75wW2d^1HBmX#@lK^w_D zsr}Vh!zRX~ho&~W7N|Z!xSm0{Pq!f*nCmZj!oo@of?Hsba&YTiP`Eo_=JVx&@MGn1 z=XzrV_QT)ijS2UkXHhJq5X!;(=mpxkM3Q?np3wy1$MR>dmGjIlWQfK0S8iYXp2Lz) zz!uS2+(Vctsx_#U*OWxTiBa{ex1Q%VDFc@WDQw_!v;z1Ij40$$rb&H{nO(gw!OKY| zps*7Zt|Y97V+xKego`=}vHO)g)nMNdEs-x^m~W(noF@2iBRe?Ag&#!l{A3Ye^l0OU zzz!ddjyrOY^s4?*(SainI;u^TD|XQF;gaSO|EgERi24-MbLdpPv2|)9l&3!80 zol@hOmJGXf+jY^F2FMAnF_-KM*_$TZrte~U@|9AKKN#V^0TO$6j~qwU6XJo>v4~b< z2k1FY2G#8-FYj&BlO)o+I}q-{wVf( z>j-hM`ZUgU@#xnJ1VAnPt}(mxuJR;kJNpC!itmXw8LWltn;L-IPN?3~sk0Y0Zxq2l z48AummvyOjuzDO{bY1})ueJoPFMxoZY$acA?QIR9C|kQc(ZhI!8L+_**x);Jd=oVA zOzh|X>lq_6B^}L+OrNF+bDRjMz6^g@*$QvIhNt@{_-|ZM`uZ116NQ2>8RAV~2W7rj zF&ghr9JpnhCJugtADuXs9AlgCC{br`_Y!g^ zwLZmi#q@iTym7saFPave7$z^Hv;jWkt%X~ zo&q=jmcD^iQ|l@xzMR&M4&cK>YPM0e>W67Byq$Tkq#BJybJv(o*2R^XKx# z0k%RiwlLQ*>Eer+tZ~#ZPKY0k`^(zYi3h_D3ZhV$$vKSsXy?33eg*QB+s=Ad0+!L) zF~pq7e~Ta-pkEW`qNbyyyUQTT6-2Vpd#ZX*-!&FGeL>-2d|+{)bvftX&qrD7#hw9& zI&9!KbH9A=4KvSZKc7Y5q9Q1D6GW{@E!VEZ7a~;DqSk;viaApG{d>MWp1PFMtaNyr zzg*5B=57^z5!&$d+dR@=5p4;K{y;Smgn6u(RsF8HL7gHMI*l;~bNAdUlyW{VXC6eQ zKQzU-eXyo8x*hqRtF(Af?%S)ns1d3HiIoM-G;K+Nn8Q)fZwiMo>}Cv}kq5?gdqMD~ zaz8TE*o&x;0+r%jL~<30!3a)v;VP=JQoz+g&TQQZ3*u zt%zJ8xuwo1)!o^gXU9cjS%=zuoJJYPkI^>G2pLshCq4XK4s&05`~w_*{A^paEH?&l7e$zb0CbmVd5A;KHE#>K@8>}?p= zi@m7v^?o@ddYoidep)+h3#+lwN{+JvC!D?wG3&oAbf8VLrMU`xN9j z&ENg>;^O~))&u z_+z}x@Or+W_6x?U>jHCQ6^$|RWH3&D2Vw*-h`ptulnE~hzJ2qTpzLJ2W$SLEKh`z4 zJJw1&cDZQ-TJ=K(Ke*%^#d{o)qUs56-*_JN$PqQ&Rak`%_YgRq{;ELDJ9N_=S@a+exSF<0r9$G=D9m*6-lK0fbI*DpJW-RcZ< zx!8*1CmCHd52Jz?om&A2n}}HJ_wKc%XkCZwp@L0q!?u;vj-f>~#)yzt#c2 zK(78NbfMP+3JzltSo&}NnNT}aYU42-LBJ}2gVftXkiWXla41$-c4AUuL4F7N8uFc9 z#@VD-dG^L+%tG`WY-!x=@BMi=n??L}38QI&AN!)1kC~oW#fcC*|Drl6=POxIX63T; zmc_$PyuELso1oiw*>=wzVI&a6o!ot7xMQ!I8zxfR|LJh1j-Aql!Ym?=c~EwUUSLxo z9)+(dTv$=_C;t57Rp?phB`I*ikC`Fsa^+M%@(YlQ?o>pB5oQpoL53QdFF$r*LWT;^ zL!ggR|C`M6ld zw#*U+fqx=~k$pKdb3xadZ(%`f0vm<~iHr6C3QjO3<-58yohXI|G2Bb4=jLh(H4+85 z&c)!%Bx(v@k6DdDGi7y}=ayijfK57=@g{psoKN}MC!l1U1D%qQbcufhJAU<*4xZF# zPrN@IQj6_7j6^a+fMG-h?vF>XT-t-4OCEaZ>&xnkd`ZqQ>s{JB{J!bwQtj(;>B|ql zv)GT?4YcFCBI7UNsF&&A4AC_n3N?hZiIM*h`DC&#k{NW+5g@T)GVr}JM-FVgqm+?1 z*_LF{p2RqU_s~jP1w)F0BR2uceNLzWt+c>$;b!k@e{#0&=n(4#H`>5amp>!WgLy;- zf8Ehb0#~zJUVi2orqwNFl&-%oqS}Trn<#J9reM}6HJbSRYIpdYQ8&y3vtWQ#JCXj7 zXxvx;pTfIwYs5lL_a89?xWiOoC71OGNao06g8}wL-T%qKh`vZo9+$@%+@%X6s%eLp z1io7nsmnZ%(;HgPbsitARv#lKGdrXXo~yBwz)=nq242<&=&HWA6pd#>p)1>Npu?dc z0N_LCJJsSl_IZ>lOs27kTsOWtV}dh-iO@2#}%{%d_ZwPwncvAHy{zU zx{CIM70B&IH-}fi_itZxIJ-U42sTk&k57u()}IDw4qs z_Oe}X>YN~H&V`0xPgmuV)GyPn7M+zlNp`vhKcL=}dtH z$#kX-Cb9h<2!^?(J`+MLgCB!h(_mo?S4!~}V$UYQE5Vc$DK=it@Ht-OZ>7;qrRqdR|;{=T)zmPd0r>3DWve%`IxqFPWg{81BCU zqrHT50`;ZnIPels*KzY>Bsk%|87B+)yuyD=ydYN}6N}i!fd3288yPbuXa#BITPTE=gjA6HH;VF#3DKsW<)t_)2@qvwE z{{x(K+jGkF6rSqjWq#w)?#{veI2E{l^*Y+H*i#D4#RRqjQxNh{hR+NS8@8fWp5rzi z!Ut}?D#dT+P`e+7pNoN}|5z1qcXIMc7@BGsmKbA(tZ}wq@QT7kh zQcCb)En^Dyzr@1Hl9wl76i_jOM|{xx46dLX`O5J_0$YC*HXfOCpNJV{TSV6~Il;Fc zNHAusNnif!1taF?gbSO9g<t@Hm(;|0*`9ij&*UDU9j8Q~x$h|gztNq5e) zXs&>-Oht`69+D4>7BUy6lb;E>bgvq(o9*i`C-5k^TzF{6nxJmJ+$=cP5u?jycXZ7L zkLJpUzRN~A;vA1(?+r-`1$(QwK&^GY^8DUarQ6J0nxo7OJjcAkkx>4eL z{6`$5sqbj0wqHH_HJzZ!rcs3Ui8V3 zrS@X%t1eqsZ=(%jCrLs!H>g33)m2-D_F=W}{|)7{kAhwmg`vSx9DI=v7J_S%v5vx} z$^aS$pB@-oHOw{6FVr8=iQGjn5@fMul+121A$g#ve%hw2% zQUg?ACi6&liC46S4hwsWol?3vqnzQ>iK@}aZrVmdmh9q#qE`c3&{H8eMGZlPN%Nel zF2k^WC}P6qZwV%OiCq+vvzD9C4lr(8zO4Cxz=c9ad-O{(l+%A1#L+%Hd_&@hCT!wc zjyq6PU_m=0aZ9F)onur}mJ{HH^pcMhS>ncyI%K#tpw<`a(x1UO&K+XD&M1v3iY1q@l;~Tqk zqr33MW{8w|599uqgXzx-?m_x_9N*ll5$RT+-tC>e$9QuWq<9->(UBTtLn1>yKEgN{ zrd$jcIff}GJ};Sb2X{Egynq+Fo|hNIkA;92?4FN~n&+i7qwDHRPonJ`rAH$DKiT|i zIIesE57aCBh)2lx2K|3FmesMh%Qc})t705Kjo4rniKJS2dHgbE^14jD-5|N8?;J(W zkOl$|6&405g3&6%L}~xI=mqv(z>Zn#*ir{VRi zm~mHu;Atw&d7^l%42PLmf4eIFj`X%y4ph7{Sgy6jWmWE$!A;+|^|p7K*}|cb*Z|v< zy0?gBUH1t{uMS?}4dx{gp>!)e-Ef=4l4*XPjV&KdxV+wKwN5`mOnbZkGRQ`r2=$dj z_5NQyL_WkG&;@crXM>i_&`4(MZ{>ErMAFf5;)iCUr~;Uhg_prjXMsJIx3#?7rssk# z=feeV&g_*ajZ|~ELwYF95ZUz8rcUk-|2!{?`taR{|HGTRkHrFPGUM^TB!T(n?M3tr zOol$w=l+|YpXv{juAuVnB}R6tFfsJYX~ZuL?_cHtMH*UK4l@c?ZX@5wWdgqtLCNAx z8e8JID_KcpL(b)>^j4{uo7BO-L2VAOVU)z=;`ZD0DMRyeOVcs(;1tYA<>9_Cgj$%6 zN!3Ey`_vPsSC*c@`4#WLRuAO9&tCtkF}kfV>e_4Rc?th~s4xQRt)WHXi^JZMwWpNZSen+} zKl&P@IV|e)phw1F92`;mPYOl?vxKE6208ki+%?YZWH+JH&YDaq%TJn&oGTD!lIAZe z!+AQs3O7B+G5nM|1^%AI-g%HJ1-|12`7&2L3srQ|pIONQ`2>t2B^f`;`zc2e4%Vfx zTMVA^tKHd*5W=W+_bSa&7;!PlKfV8QNNW@y4pl10;HC-iJ4Rh)CRY*(C}N}(rLYS% z3N;EAHJi_k&pOGgK$6K9Q#!|lmsRnK$l{U7W}~;uBj)C7?Yzs zGQ&$`mK<7?LDY6PZDcZO30Ujz&?Dl%FUI&HoR)dbtd+d}obc5^VM`C9(TNPdMb*VM z-=LwL!kxoAFPRf*tsLvoslo9rgk@k!^YItbiT%rJh?yP6=Ln?TgKcQ*u@fLf769UZ zq8Q-Ft+ZC!80kr@Co}xTmuDg>je0zuY$?2sP<`T}*{(Y_YqdmztuM0Vr8*1#8(6lB z@Du6gchF6Jt%7aD&rQa_pLTkf2dtks5zA8S4!;_=v841oCtuRYC@R$Y&ECeA?mw6N zt9D3$z3#Tcy=gC4`Lg>$UCVgqsm=ET@u>k2=xqWEq>b;tfo6+|6E06J{XrA^P*qzO z+)z%^n{Z8N@&yf>!MI60s^oHjXw?+5d7Ofks~D^Q?jFY%Px|BILf`)ru%QqjRGrT6 zG95F~UyftnG5E@KeT>Wel(y9i3b+n023(5!T-M{gP+tvXP=tNC^Y-PR1CW$mK8pFc-}L`|e1sHX~0O!Iz=bj_krMo9Cn!`4H$85971;C)q1&GLOF3|+P0i$#$l$37$S2eC7 zi4`5G*T%IaSkrD>N$c zx|S~VcdEwHyoR*k6~7{5DD?{@3bN_$|4jT%XNAEYrC`k7il!RMCH;e6bdgIqi^iAW z+NM7ZpRxEo81g$E6?-h^BkCiW+;o^z=ITWz_gwXvzpK)f5g54b|JbpkYHQK66X5?G zfJju<*PaPtz)lAs=uJZV>)#T8Cp|SB~ z7M=yzKA>De0M1>kwIN_>xY;v`*TzZSFl z?sTB(CH;7v7dhf_5TW3^dOKBNgemAu0&2^{5fDeW;dk>^_eX*dEx?rFD8_MHKmK>2 zu4Y)eH*3x*BR{hC6!oyhhfx#@{WTbY4$e?TZFZPlV&G5H5H{v$uoT=H{7_vJV~+eS ziQTRyE%eI|I2dvj(|tstr?mg1?UQ_Njxz8IPP9(@3-&*&-bdWt^LO7a+>$eH0Hmd{6`QtcnOo&3XF-L_|E~zf)ubI zh5nhbi=Ao){r><*LAbs^f}zpS1R)6$15KeapdI)DbSFfOgdt!_Sj3O~P$g1Oat)|uy6Th?Ddj*Z33Vu4VD19)GEfbsQptINlIe0`w-m58ZWdb?W7V;4 zp>`9cn<)K^i~^Iuq{t{}RZ<5sLOek?2vJ~g=xbBn4#^3zfGkjEWSSux(IWlS(U-(v z15tX41ZEM?LJ39=MhQ*9y~KqYX50f%aL>3c#1hdOAw;4?0&VBWq!~$>+7*^$cyLCQ zbyoyw3YnMCrWX#33S(484NNJKs}Xv|i$HOt)-|UgQi)R9*lL*sL`ft$5G2t|13+BB zz}+z~SY5Czc&LO}&@f@4utaAJP{c7QZ3io#5EE)LOf!}YON}&X+TN#vX8{eA*fCN$ z=AOlsP>jQ=V5fqdDl!_m6LJ^GT_lG@2|~rF87w@o)q>-*b_r!uRXm6Vo10S zm=`GEOK3Z0lz>}k)J6jyl}cFUV$FCHp^fzj>WO4KdKWwxQI$?Z+wDLyF$lu|@v6y& z>JF+2OP5(iPeeKh_w=nuv=iiM5bk6 zTmt6CE-GDTgl?~UZP9I&?cmgtZG0_;mQ#zctxxr zR=`#|WfjbteAgR4yM-w`d32VekrNzC<#u0r-y6rFd5}%9<0d9<_6NkjVn~rUFJJK6 zU%uh>>w*4i!P7_bbjvb+Wd3ku{BC6Y^UU;qX1SfHw-bIpQPa#kW{yjtD~39}1YQ6; zU|5f;P-tuBh|mv_)78NF)ivWUUg6V7zbxphYwR#E$H2)UyHU-kbq!`L)mW;N&6?4G z8O6K>Z(%bOXvfuP0B{#JBa2U;inKnL$7i^vUe2G!g~lf|PHB7A@U&J3+Csz{UoScB zP26&}MskTu;Kzohu% za>jZ+uw4VCDgSS2LR0g5y#D3zNgLPW=ix72zkKUkoC^fldbM%Ia2XS%UE1=j(M$RD zDXdwa$dI1?fUkd^)br%RXV)H+pVaV(FD5?Y8P9me&jYDbx*;@!`A(2vm@E{lXca_9 zQa90GW#m;svO#nhq0=>DM%~a-5fjv#fnU&q)x3H(H#7^u6i!MQg)ldHo!2%8bJjTnPz>tq9FK6k zb&49}TzMA@?_=S8%zTWQ(j|7o3x3fnFNPPSE9bYbf63P`uerLaX#bwMOic6lobPXW zc>9r$@6J5DT{wT=v%D>och-y^J7FH-w9p(g0ynfdRoj2B$!bpsCkgD_>4Dhoi2G~g z<`ukpL-as@wL{|-wGK>Y1rwGWIi5R?^PW?VOl}kjWQ*jYSO6(PSOU=!L3_M|#RD^u z(F5liC?*7PVlvXsi4?*vp+V7-2;&aP5h(>wycf6uFHlKTjhG0SldF(wAX%dG<|M_n zP&DJ(4D6#8L^H<1!h$f9Da=w?M93PcGGGxTBs3%>bhuU)ntsGwVf1EfDh|Qh{5yzY zVi*;(Of8u}ZTcWxKt+i`2oaW8NLt&y)9P`A3`{gP0Ub_|ii)Dz9CBIAg^2qMj}Unqz- zLm`O6?7dBqSKTo14f{f_Pv!w2pU0qf(Aq@NoT4XLdCUUQZQLi3gSwt16{a5 zHR2pF@2FMVPLfhUg#TsttVgP^`ZEM~Naa~1> z3eg%jjNq;SP0-;=b){`Pzoyd!7Kq{c^!J=5OnUr%mqtKZ8&|CDZFtPv&i#_K$}m2K zv^)Jg4IH-qc!EZVt&Q0jP9x4=#%DD*YP?0(zrLP7$+_L<*8ZoEH2{vZI%(~E+E4BJ znq6A`Vzh~;JJFg@I(enDLg$5!N)#suZINCd7i8Fu&v?c&p79g-?s(*GnlAKe%$Zy( zAAVvC*%WsSm}=!@m6O1I7rE;b-ye3oySn1``i9(fh%`p8%V&ma>~WGSJ@5-Ae$5Ad zE8ns$Ltpt@uKimEe+!oJ>gyujE5rsw0$2o#cmZDuJ};{>kpUBM0=dVm;8t)SY3x-& zDBz7*ZyN8bApYqv{`s)(00JuOHAgmLWsl={qeok0#h2m={v;4+h66$~$W~~YN9(z; z($y#QLs;phw|0qu_*#~BE^>)qb|7jcRtO@O0zjh5o>^aU<|}6FDOS0YJ@=BhSLGzD ze=2o~+e*{61~vXn45ZM6H`9WQ6M7zz^BJE;vIeFuA~XyA*_(H_SPDV432mUY|GzRm zn@c)ydwa|6+qWFd7=HQ7FU~ig@r-9YXY?+4I4>)ntq|Q z+ciGTgKWv$QuSx8+A8qYwx{hAeXMd#LFBU34K!Itt(9W6c|W%HK0NZ^t&EByzV=Nm zkTXmZKF?TQ@KRA=*LCdro?SmMbOR}M#E?)8Y`p9=K84&&%jsE%yu`-xB`x zCwAutuJp*h|A>XRJe=>CPc!qpL-Ym1;aAl0o@H5ZbCyZKn|G*~grrUsCkk{bL~Y6{ z0ouo;fVXc`ZA}ty@l`IwnpYGwfke;%K_LWLgCo(<;Mi)!-f!Ye#7B# z;P&>GySqDTt>m0}_wF6zIP&Jr8(zJ7Mc?yE>UM^}N-=Ut)Ep>2KvHZMuzhGpG0L=1j)FWG-Lk;qSZ&|qQlOaN>R8Iubf$Jk zayp`OK*xwo5uGBThGrPlfp;);5EAqW`UojPNRGw|vOt|-To83SQ@Tay^J+A}ju}J6 zFey|uN+{%%sog?NnUWUV6>4b87B!M25OX4EB1$5VRylMX$qZV5T>9^1V)E~k(?$wa7V2mCP-C6EofD!5WMZClC-09C3=rq0V-k@sfJgh zYNh6-83t%>i_HCkX(opXJz$1=L3|Z{d5<@Ts?FceJ*kfMs zYHV(;y?|7udc-wQQ*67Q_+}i_6e$%0Z#zvy6FQHeMhZ?<$JD8k(dfjXc@iWFb%9(V z0}C)C6muM-mx_mq1VbFk0yPVk1uJdmhc33z$3Pzy(}+n23P}aLU=>OcauK9}*9fJ< zYEP*F+_B(f5kd@vA)-5hU8HtB7CI^&W)ZImt&wDbWC}%*xgg6HGUiKL8xo~zhig+e zHR&;Fh66zc%sbq)O+qZAKnP9@MyF0*=OxLFSga`@97p{s~$>M`%2K)pWX7YBa%^$jmyMZ)mF z%#ri_mebu^j^Ew!@UKT6{&i%27+D@B%EN?>6J<9uO__(KLT%n~oi>a+O~I`PIsj>` zMFe&!@UZWBcy-P3m#@f{8RnVr)eGv?jwuCBIFmJAqpU^)rCKu()}^d8k%^Zt-^oySZO(ai0uit^T+Ub9Uf)(T-tca zb9tfG^^rBHov-ci)-Gu6SGX)YJhuDG$L*dJ9t{Yjoj2wG$MY?}!s`F%ao2&Z{3rQD zX!{L)R+d)w(>6S5+u9D+vI4D5U&tftxvcrOdbYsjeC?kt?Ma`0UKU#ZC*l;gKDs=a zwfuJe$-9N~^m(;c$-%c)Z13cg60L$~JmVS9_*o+L!f;Sf(B6>Xm=~7nST+Vd5q1;( zi-lean%8+)P+SEMjaRBUQY}%Jgou*N>1!pG=AborMS>tnkNO%6(UfyPm_~}LI{)zK+&;9XR?%w{H+uy(E z-5)-3`_?&r7#Qy=KjCQ`XQm|hEjUY4?>xQxkirjz;TM4a%FZS7pKfl8Nt;M7gCLw_k;*YVlIKHwlJ2! zQXSPo>Kg*iYbQP`m|ps7$oMPk9Ft&wD_ETu4)O0mY9yFwC2q#{xf z%d3oO!Ae22qP6kjojNgAN~mP5EK*qjq0K1^&M0lBm!;Jk#Yj?{!hTq&olpm-SfL`+ zG*bOcT}JFQf@R7{NGBnlU^%ZmzdDmzYex|h+W70xk#++iDItSqL$f2f@hBDqOHJq1 z#PR6O0fV&lBCSgG14Sx1Ix{`d9Saj4Moi9_pAd~m?9jdg=@5_5JH9i#pYUW&1|geK zsiEdgk0w^Y@K7-;AO)he4FH1T8Ywlfl)zjA*_!T=#y}qwiG>(X1T{npu9XrBMGK07 zMM6E$*8$Z?^?_`Cb5JoC%mhSm`_DDEdNvw(#xtJaVVd~o+gonW=L^&I1IQ!Oz8MT` zOqW0tX05`Xr^7`ll__UVOXWU0A9COw9pCAJ@4|t*c*PucEZVbZAS+ZgTm)2_z(||8 z#y16cX>Hj430pp>p$*`T`)oq<8Z)E?Ne;;&md1gL0-*_qJT_wM0rQBtVGfQqg+n#O z>&kRGf+6eTaBI`rhOLu6Ij+p?rE^=EPnllYeLYE8b3G}yVy&|&ENr<2lZuPAHfU`I z6WSJ&F1Td>^f^7^@yFzo*dv+{Zxa%J*8WQYS%0Io)7PX=Ym^psL0nNF7-%)56l|VZ zrZeMsqx*^dbkeo9dL2yi2tI+(%zVl6iuqh2Trs%B7F-0D_#A!e9 zaDCu(ePG%REM20+xPIb}c%+laPMkg@hLE_Hz!k=oEbGFu&~YaEh}hT^&dhO3G|xwl z=r$rtTY%3Qo9Cu*WQH&!IwN7hYFnVgGfrDrhE<{I3EjS&cxx+9Ny_7({n^;2x16&kyG>B`H>k{fM%$R(rWgw8X%ENIP$8KR0iZGpwx zlhHN-!Q7jd!R#j&?PfR^AB~sVZ4+BfxQJ0=<5s1{P z)FKp>m9EifIG~~Nwt|6xc;o#WH@A(+E9dQxWAz5#f`dMxeO6uHLa3W!t^qYoU391Tt=XHg~% z&HK90RzL)BLuw`D1*RE2pRv;kIiHbfB9_8lm6yZ7tHXg8hik45R~!yk><8AD=_rZ6QC5vn<* zf)~Ydr1%Y!e#QIxntz&J@;~HP{ByeD-Ow`;h~DGjyS3$i8LG-K47_^v>SCaEcX!7$ zO)Seot(98q#lYooI9zDV-x5TG7cX9LKA$5J$+0Ouj3!J~$lkVy7kN~9G@P>vqbj4q zq(W80aD-;i5NSK7$Si~fm<2T|Sk=4qeDH$|b7$i(1 zqKQI8S(PQlY2KDAxS-aNYQk-39tcCmL&i1Zk}nRVBmz5WcAmw_i?L*9UK&jn*0rUT z)`&OSGB3iMm1S=7q+XOhDMPOeeISUeik(U%BGoV#a#6w(&{^@KcvY$?V%q3`XCMKI z3VlF!5rz(lJrbIlu+)fJL@l6I(JD}Y+3_*MnDO(3Yr#XMCZ%*8mJ&k5=rDR*dLjw) zh*yPbXe|UcLTU@LX*_;q>bL@=x*Q!UJxsY7u+k(YBaz)-~23`3%nnl@7?%*5$<9Bx)|Mkf6 zA1BJRg&8X|^?G5NGM+2B8fSMp+HM2Tb{V)9IMC+V0q7KlP8kk8-K#6Y*RPo{;ye>x z9D>_^%FyZ?Yz2EpB#-Qvo>G*$shaPUD){U$CseVGq#jVa2o%A zNn7O}G^(lcmv4--^0Y)>`|pzG&eNJ6Yvjp8f5tPO@r<7z zxC1s4vOCdrb($;$+gYzUeV~>74U|( zm>9i4 zFGeQ@Vo0b|S(uVB^@WLv;y|3hCuE)wThLr7Mcvj*l&55J5-Swh7sVJ#S#Hgg$ zllnce22zGN8%d0SA=(@S8S7DMbKVdORhur36roTl>f|WQ+IVYWMq~m;@FOBSBt}F6 z#2rFJx{M4H($7d5+joW_77!Oiw8_?Eb8KL#vU*qmHADqh#YxoKv6RF(_bf{yx{y+( zrh;^ZlnP6%NGf<}1|cOl0YxklYEM@pDq-F6FjBJOTB+uU2wyfL|J%jSocn&pGyc~i z*UCBP3)6HrP0a7#lYU}hdRv@r0u|Q9&&x%GCOBZb?vSf7=E5mE50<#sYd&@Zcl&`4 z`y0;P0SgI}fQdH2gE-zys2Yb#ZG+!tG_cm~v>>#+CQc1aY2I-V!$ZMRMph3x9vxDp zapOv3uK^VI)w_@f%oW2y6c-0=!leeoS%pp;8!FO-SF6xs<6K*pJrPf==_@bGrTluf z3BWG=tauA8aw*eNyoFT)=TdR0AYif@4ajFk18e;N+jH6=$kyO?nJxRHz-G(;NWOWj zQ=Xh(;!oRuNnB4`UrBpl1cYc#C>54WohO#@%<=Sqjz>-`+^VsQ!jK{Z9V>DG03ZNK zL_t)27fC4*L%1kaJXIk0DJ6v;;K%Ymsq^29OPkj3<)?5_C;;YZ;-7#2d;Z(a4PPG) zpJsfeF$7jWrxvGXavgVRb9L3D3rBL#W)#iy*bSA`1khb`-7Hs$7-s0=|Hm+YFYxHq|zYK>h@0wxj zBWZswJtjO}mv(IQ#BmXPqao79=udXZ`K;T2EH2W{wAf%goM=s~CAX z^xX74hrVau5A27Yq3`MXo)jap-Oq|I(XSu8qd$SqbAPGMr}#J5FY=RhKE+>K|5m4& zahfK|{XGX&4%gRweK_#d%?&q)!?V%AGoJB`pG2*dX`1->@#97LaZ~*4`<|2%(8it3 zV4&8@JkQMY49m=13RA7z%@g0Aj{M=_BY!mK&t1=lVUKqmHgx#miZBe^h!ey~YTW3| z-}wiAqu+C_MlXp$lyCSWFa28%{H8aE+94seT@F%*h6IwRmMA4$lmN2|MU=MdP8**M$D|p}E8vZ~Jwmt;;nu<@ za8~}iW$~3%UJL}nrD&^1LBe`2k7c&w#t-xKoVAWmLYrZ1T&ek|ZF3&m^+*(j=7U*V zq673)xn`C(6nTje$sYK~OFl60p2!D{%%NlS$gH7xt`$QBq^^p1(}bQ*_`?Hse@{3* zkj`iJweX_v`NiSDFK%vl^YR6+UcThTix#e~ui5Xf*zb4jcFlO;=VUYh{8jY+OLYD( zmHSeDXgds?PAAUiGt08@`t@tBuCDm{>#zCkZ+}bQ_diz&E57IN?9Y5ve^0RQne4=U=(%U4l!aQ9v)^!k znmCO!`@;=4S6?HNI38!-z8!H_d=X+5b}GbFNx|u~84Yv_DiCDl2OM11`M%Tc)j|u{IP0(BP;#njuOv zhF~vktO$nWiWWnP5o(hqM}&wHTnQ48b-Xq*#U~bKij=m%tIe~Wso-MGIY6jDATEwa z&>%#Cpo%HfVASX=5tbw@6>^~|SzZjlRt3yz1Q9Aw)TzO#5vn$$D^tNlFj);N#83>a z-Zo2iB@s!Uh!O|{)Ri_Wf*`mT)F4y`D=3apvA$AqN>HqeB$Tc~@`{!PM5$Fl3=7tF zaES)hh+0w2XsCoNXboTiE0MAQ6O?(O&KX+@b;)i1>`qaogxG#pXlu!NeYTQFG0~UE zzC`*In*o{zf_9(cyj(FmFl2?T*aZ3Y)BJ@(L)?lmY$m8ca|*ID91 zbEsj|05u3zgjxm5MwLpnN~KcFhzO>_g)q@*`(iLwVJv_u3xOO1rH_gk2K%pEjY3LUKMo7JxH zb&keg2>wJiyq4i<9B*k~xZcnbT9!*9PJ6X3b4Q29%X(Y8^|p@dzRug-Xgk`22f;$X zW5B(Akwz2uj=;wP-;MCEi*Q$js?v5Wxq?)M7&@+oYrcN*hX3Oizvfq8z2@dRVBH72 z`#{x?oTm3Y9KYk?!#f_n|H$c&C(i#eQ|p441*=*f48@^ zRdC7g@X6Z56I_0C!;`iME6=yKReZaLFPvY>Y}BN^{}+P+f21;(u-5y7blNjl1m)4l zprt*{)4cXS{$>kXT>J~yg-_Zd-s+Xl>T356?LMB+j5SYtj-TK6ns(`r)|roa+Hcs? z@|*m?*ML7BS-)$**E%l`i0$2MsgKyMKjRtCc*f5UsT2C0f;2v}hDz}TD+{o&XF~g# zJ{7JGm6U-6Y*uWJ)HzW~M=2e-By#D%gy5OvrFm$lpvm!GkyOxKK@Slf6qP_yp=iKF zRt4=sBp_vVJWw3BihG7oKr)!M?b@b*oH}GQWT~hXR5B7O8r%53jlz?_QsDj!-!Jf| zQF%8hi^9?y%XOhrxC)WJzvk89YyR%$4ZnQ3Cv+dE@z0pvGS+XoAK!9!_l~=_@A>e@ zTR!~L10TQdsmns0D|Ht9TJe)Ik4j!(UUxvqG&*;Xw$s23FraieNtEbLNCUdRhMQOL z)k2n@p;kf)R1b{jHWQsaa4L~g4V+Aw&8Rj}5Q-VD5R+g-zy|OXkmM{ivMhncB6C$H zD-aw_xyezZQ8Tb8Iw#_^Bd|k;ign<bHiqB*l$yv#ZQB07y@}D@OhKe9wp%%Pm zoDA8JDg=jw(}~fEkdS7q;X+wuQ^gy-uQ*lK`KNp1CA^ZwIR@j@WsWG>4Gjyy7UD7! zav{!@aEiopAWTA>6~&0b2x2NF))0jaUUp1lPjX{lW~^55LhzA5K|-tJ|6}jXpCrezJI~L1 z07$xfL}cVu)m5t2NHZg|8Qb6Y|Nj$acWrD;()4JnkGe84<8*hD0N&dlKzc-GRcj=z znYz0wzF`Ou2LTXt;&^z6X{Cx{-a+Y+uqA{Ifu?XfSDLj2tkDof!G2zAt7oBsXZ&K} z!0kBl&D|ZgY{KSRnQQ$x<39&qHen0l;4!*|-P@R9?##6jRSzCHXyVXqd5GuSbrDLy3T-l5nEMm`qE;wDb5>E&3r>9s`7k|lrN*6fote@P| zp0I^mk&bI~EzWU_lXZrlz{hQnCv~iiUfTSlwzfWgiZ&^&f##ivb*J{jWBF@+w3Q=< zxT0ogDTJKSX~O3tbv`oBBam@5stMUElUI@%Aq4bkpZ^SDW;{GR@Z0acqn{=Q5nf-e zx<;PyjA#5YIB;`3@()*6{9oUFhglmFcPa5--@fJV-oE9$?;nS#CvbPhQW$gQK4;$N z%)46nf4h!{&A_xB7&kqKP0ut8gf4RCGwz0p(o5wncl?!J^I9@P7}$iw=l%ni{+^BB z;BH_}XSalUY#Z+CU{2%x58xf{J&?dVn#N(nEVskWT2Lm`Gi_^PZSJIkEJq&hj}BVL zKpThkN!V$JyKAJYry@T} zm&bBhA92mSKGYHfKym3X*&>nv#pH~c3m&-ShD&ZtxVOMH1AB`Ulq!ML?m7iLVAfG zdA7~Bjd8m5*=d%2x~34?0lLxz0~Y_ZE|35E(7Yu?3|#UbF@9MI=M)GV+@0k;#Q?-tdkcKS;-o21V4jxU_i z;swbQ-F}bV-{ZG;*zGOx?wYxQTq=h-^Wix1-Qmc0$0Og5Bfr19=lwWy zUyOq*W9TT`4f5iGczHp;?K$h5DBsbSBc5mGFw8u{d9gd=?d2;h8^=TD{3I~UoV8Jswd6+XJ==8|NZy8 zfBzmcV;skmu-X@2e8HPHZy1K*(<1(?p{ne5J1#FTIUEiE91aI=Z*QN42L247<_!BL zG@wa{9!N2gx_=uBnQbEbOu?xJwFXMn zCV-@EpXovHptwbRiqui5Nt*Q%r7@a&Q{v#&pco-nVrgT4pb1u`taPCXZgj~hoiB`+ zjbS~?BHSPWAwe;uR)ma3BX~ssqmr#17-w70Dk7$-@qvp&&8W<1DF_)+$AeO|BL}6# zNLRO*_Bc&Tzu;sllT?nu*r$qwf>V|;Ug$&t<=lm-8l@CA#SwGPnAz$~iZjv|=)h7F zipW8zp=rVsb%Hiw+Kx=;b325$Kq{3O6vYE_#Cwgs; z=DUHr0E$XPP;?Qh&|-vKs9q_mWNGVQGhLMP8Nnd9;by1{;sMW%?qjwvBqPNeq-Ok!^!hAvmf=Fh^<={7_4%x4AF5G@V@8sgsfCMuN<46xz3E zI9>xaN6MU#sUl;CX9y;AB5Y$}js^i-0;Bijx?}RnR25%@Cw4WEOgOWFCiwDpRXjV<1HBHM zrE@O&3(h(?aLaKXIm{opzWa{%?|I3)h?%02S zS(~rSU=?yc^{uzDVd>3A69DkGA8J42_HNhiZmu!bo!+;AczaLl`)|LAMrUA&d+M)f zB`sL$|Ku&7!fQ*NwOpswv|O+4|KDQC<*j~Q+A99^85{n1_!9=LW zZ6~=31?b{8a?8hYPhBh5ACFvC|7kvXyk5%_Z`Y^$mptn?E;x^Z25TEz{Npn{d1n8q zbXcvCJWX5UaZbOZ_5xS8yj5SXTAoi*S@F>@@Qi0X;~9TuNWIc;+6Y+08=ct;wai!@ zu{a|!6GNd3MxOO!}3sw=f5N>Q{5K@5+M_l~$isUOWR0u+b6!*zxqEbvZy z%26q!QN|gjBd<%rs*-GA#|7uUrPe)%S~*U49Qc;2_uupW-8CP+zvJfHTkigFi10(}X@;#e&tOC;JmS8X)df-zfRw3*ogC%!*X zTpLZ;M5zd3O7%i9Z33odSZ&Pm5<*)Kh@AHSDvsb_Z4F9gQMx2(1uae}N~}t#N-Wwe zV!GiNMcTg9X?s_xG=&5L6VBcW58~Vhf(i#Te_dNTjfI9#twT)&v;@0s^L#5Xh3q-jQpM zm{FxPx>ubc|YZ5QSXfuT54F(!{3!iGaSXG||B!zDIc zK(~XC5YaZu1Bf)n1cZ|o;h%s&n<#W`vw%1&5uy#A(#k{H=2MotJ}Gk*dT4&zXd>;N zQ<<{FEd%qXSPzSriax5NT{^AR|8lJDT>^{Wyrq@P)2?}+yu(kb`t+HhJ?~G&|69=d z8o*C=pUM^okyD#jJgwVl9V;{hjJ zKkdPv`h7gbr}_USsI_u`IB;K5-^?z__U$biWi*}&1JER(z zqXtyPtjB$a`#J6jEFsb(LC{#Jo-rwiOrRsG6RK@a7SHX#`6LCfkWH6?%GV*g(k|8k z?3CWd=60gET%WMcm+cy%G5Bc9rrOe1rXQrurS0&=A1TC2+l;Oh1f`{~_ljden+BJA z)_7zM`?7X`R@>8ud5mp-zBy+|sU-AvV=6C!x?-B0&p5L?MTF#jyQkQua z7u?&y@v>|{UFb^Y?06&{?(v6P?EZ%Sc+VzJY;gwBMtqll{hI5S26yNB?vCGm|2^+- zZpft|B5a3&|M0~ZynXqSP2Ycd+cTc=jAxu+W*iO&K79DVIF6V#A>&~fP7IvuUc|i- zI%}=W^UO3&Jlx-Ndw0wA-7SB(x#ruu8-9O#%@4;tw^rH5z_{6v&(D#5APhYkm0pdV zIx$817&r@o3$MKJk#i1&RildC5$gkL2O5WheRL?l!Yf!YYOc()Gfgl}a6DEHhnZRj zw!0l?XFGOh7o^m)PL2*(b#gJLsZi@OcNIk?qf)V8q!dW0-J=R2LWn{PG)=!x=H=7G z^~p55&b&yQuSIG5^ksgRC)>1z2W@TUGQWRZ;72p~lJBFs7yKySPvL1j01obg$g*HT z5F%)Y>6YpnvIVB<93?QSFai_JIzwHYi!Svy;U zIU6=?7NLNYI$})37*8&QaQbcj(lAXEA3l8GpZ@8exVvlG-&K{@uV3@kS6}h=?b}c8 z`1gaj6L69=rciYQlO_mDYiFZ)71BJ0YGW`Vznj9-M0(F+Aw{A@V(mzCA{Hl=NLM1M1VS~0Kow(_ zi6RfU%X4YDGHyDwwm#{sDOpOz(JV*_P^GPByb^0056aY*u2P-RGc$!Og^b}f;B$vh zJvR2#QK;?+Mv9e9EQAPi2PGs%-%)a+h*PRUQ3g|@32ws9dtzg`_{AcGz;?T3x7)GV zZ0Ne~FKXSd0!a;6)@vuf(V7Jw$-02j=)h0&Hsx^3^^W{Zs(7j1%S8nB?GQt4}DD3wh%260Ls z@Q|2~1-rWEeNnmtaGROl?fK#Ig3Sg--Z62-#1;4ZE57~xHQ)XAo|~(Y@z@0a9(yZ* zN3bT8z(L_o;F`wZ)8^wT6JSOr3!ex(9!oeqsQ-?9R2U5+pQf~P;{_8!)E*21e>-}#Tymg}{IwY(?A zeiS;jEFAmFG! zhCgb*v}bH>czll5*2g|F!5x=^YsR(W!SPOzEoko$jX=d~M%^$9 z>aA`!W3tNV3f3W}3>MhBurX)w3JygBbAox}WI$r38#Z+36Wz-h%^7N?0u`syXrr|X zUIj4-E@+JSFiz^fs}YER(S`X9`(h78pT}G{$j-6rc~VS#A*?*H&E;;!e?*0jY_7pOB*wr-Dz; zG!?8UV-=<YQ`zHQKoqoD%_1o{R5wa4%zI}HL8nvz03ZNKL_t(44wF$PWiCCz zcZdcKY#41|@M)?|5}PhDq)3+{NfeQY zXomzxUGX5)s^p_EHR4t&Mx!~zhzp%P*yFcCC1+5ycXWN<}25eP&eQVCWFC@R(jMx;Y@L!^=nQVDcL zNah50Bp9Lw+I&@CXer+KanzYp#Woq0iN15X>ZH&tNxDLbk%~YG=+Oz;Famf_vBczo z2Ni;VA>^ufZBHIiP(keHwYYi~8hFMp4pS-o_U4BFd3D9RMRQ^o0)KsZ$;-`#KK{!; ziIdsW~@zP!B~v_Y~0SqwI$|whR1VqI%iI24Y}eU5g;1YI+x(dU)s9nS;D@y=P~8+|3VOIIo6t6 z+~Yk@+A1yl$Ls&;xV7g_SlYSXqCjw`cV~xlF7PrcuZND8a>k2Y&o;m|Dcdfx={trl z(WgYWFj)L4@S|75Prq&W>Ae3OoF*{v{rmS^O%t25Grk-KzPh~RuRi;XFWpCw)o29O zk4Gk7hH-b=;PlDB1 zT)kW!}`M&G7*7*z0ntRT5?R)RfR*1|%r-S!(g62{^KP_grD&i@v zR(uH3DozXf7x7R2ri`kEPw{?||5BGrwlW2;tWpOrgW8f1A$&L@jL2Zc)$?D}eY30T z3a3T;BceLTT~6j$Bw;~{Y%DxG*^eFwR8w_{98`R9Knx&4cH!IhvJ9;KRnW4G&O6p* z%krkn0LzeCSsZrz%WFovLcP+jXu0&9E1lcJ*A_od^pCR^`n%N;wRxAlzQtZfMG5dK zG)83u+R+KqAGMgRWzJ~YTuMcg;*8fbwY_??98(f90zLHf&a?&KCb;|x3-8%F}vyxK-zzO-)j_Tx?W)FzZQY3f!jO3>y5H~ z`v5*6Uxo2yh?jo&9p~V=S}>-D)4Ap&te72dne4wEOuTj86yoxXcFcx;59tva4ntmZ zIGX`oAw12Qz^tv zB?3wp0U%kD!iucwpp<;+l<1eri_d|Nr-P55A>WSXX^!3QQscRxO@Y@tWyq241}}Ot zs(`g7R1s?0W1rc}S-o$$(lbOk8vqI}$ckYDdp&JojQ2ib!K{g5Qf~WV=jP{!-e==y z)|ZLVP5c%?Up!04Ba`c80jy1&Jjb31x&99M0^-K>UD4NL1Y6UVLnPb$Pa@Fr!8Id;Lubq;R>z)A>_>^spzy^D!{9WT2M_zxueO!A)v*hji3sVHaV{El(5VFAZPQM zJ%#ABL(Or6v95D_P8HT{!<<2!scKq^a99sG6+Y;bu3`0-CPx!WTtyQqVxw@DItVGj zV^GMTt!AratJ+i9&?Zk*_wK4JhpA9t$kkM?h6OCiroyQePb3Co#r+sdMbjl%@PkXl z=eLoW@Fb1c4JPj;wdS)EhCeC*(%FDB{Azw6b6#X@!W38 zesANua#71%CME&9Bmt7i1(gN4q?M-ZYObX+RC=W;BY%eoXEKKP_<8_8c1=GAI-%9+ z=PWp}86{m}V#u6e!EA$Bi(*ZP166W^HJ|uh%Q5J98{tDE&`$+{ndnH|ScWNo^ket& z7ATfuwQ(zji#H&}!%OSnkYKIcIPvMQUsKL%<~^ytlX(G{itv>&H6(}0GtP!&b_YXz zBd4z1R8LFhy$kPU3)x0i*7d96u&EYXv=OyKpyf~DA3EoiGIa}d+`qY(dQmP*8W*VZ zt1ZT8L*+tQ*r!2M?W&NJ6K29xTA8VtNdGBBz{!`Ko0+ii$0v@&2urXG{R@AwJ|6;w>J zU7P^QQ<#p9f#^LpayE3+Y@BZW)AA{cKX&C)CV70igG?OpnYX;AC>*~y*m}E9p7w=s zx|s}MGWFQqF|T;#pY@G|ZmzP0c(f=M8ky~l@&&=+Y(nS;$OdYMV3dP90$U7lH^33+ zGuEMv5gx?gqJaUcX_&dgYy&yj%5Oce$FSpDddJm# z@Eu+bG0p8a7Iu@51&=>ET9!GgclZ^pFw>siQnUIq+m4xIm>kPhV*a$aCv03V-0gt`K&CMe4o%L0R1i(1R#qH?=YoY(ba ziXk;7#dUm^M|t+? z;4A7DINri)??#vj51Az5Aj$YAdMwoLj^gvO2Wo9r!@$oVtZE_KS;3PS)b*7gK;sRd zjIc&S1zNOzx2|pkTb+aCb1RR45|r5IRwagSs&8<=3r@5&qo2|DZQRG4*!Ezf;VFZ+ z0AL!|$xIYrScLMsE7K3gP@-+3RZl0GLgl8$@)1nhng7UsD&bY@MzmI15^5Ol-KW$S zkWu*4au(xsZg&{WF&k$oJuXV?n=K+fKIbiUk#R;#wLb`pe6EN|VY9K31!pl`f5kf; zB#{*04yS&mCY_J&i*pJfwXy2S1QUceMFSL5olwX(o8aCDt6dF}Sb-B+#B$FswdKJE zp;fs4AjVh{sDiXXWyDX*Ng#w9v%xErEj3k!qAH{#69)(-C8@m0uJC{%qI_%AIjq4O zg9s|*#=u4Z8)To$I*R@w^xG>4W`GS`s(E?Ora>DGCsT~PpaLh$xUN7V35Gu$xj0## zQ*6WuDQn%4h*b|}aEckxa(iMK;?L*)08$J&2G}h7{5^L2{Xx3o zlXsaep6TIzGsowJW&UmTy=$0%C~!b6Fc9Q;Wd;?y9St?AUp3te4|5^_l;ZS|ttZ7d z*{x?WG0YGbPcxQj^xBoU4+9Q5aAh_~^BNpp(+mm^I+iu_S|nvKFllK{Y&wGEN{Q5i zNOutuAnx`uClHxViTEz2j5L$9nMTok{b@ zcRdob@5x8`;De#+v4!|J$2~)y1Szv&aXfT4vU@!UIqr$g!i_{;zkJ7mKi%lizJ_3H zzXUo2Ix*|6uYXr1ptG>BoS7arYt_HpUFsPc=-;H5Bp!pj2+u?M3@gzC10n`St|AH& z8Z&QsUk|Tb{vZ2L>BB&7rPgV&>V|-9wv|8H?BS{}%Q245eUX78h(tT~Jo@%Ln|9or zCsbWQ8)Cv;D`?!Mk zuM69_1;~Lli{X>Wp7xFRjm)n9`C#4~b#edegV@^()Mcpp^Q-#^GzQ+^-!H6tVqw;_ za$8JT@0;|W9ABkXU!e4O`;YQ~=I7VFxcX-l{gpqM5UxAJzr;5m5PcW4lZu+6$0TWc z5JPcug=um4o9H-xBdA)`VAohwS<=Umz5qP990h%kn_7@;u#bQLE@q3J;e|h-j45V| z%dF$*TD;%iPr5vg!4xRiGOAW_G0$QTAC93fv0AKv%>Aw_!~CF<*hh3X282Ceqi3Vv zt@Sk&*;3@5Xfo73K3K?M41kRt6c6AYufGV;u(~-+6U_M(DXXOt9HpC$qS}!*7w)z0 z#YTmE78M{?Ru*e14-3?Rks?_=5Y#>BL}3QgODJ*lOSJA z^Q`cJQgz|h2Fg~=TvSXM(@w69aur!A9G_+|8_v}j1<8}QFrjo^`P5^^KUSP=rA&9% zW8X@`61FJin7T&m6`gor8^iKS3<5T?Z=M6D(F77`jncTT~K9c+F^O*-rr_x?2!7 zByOCd6w_r!-@qU@gz|GfgUFv5_BcmDK_#GCwCyNBkaXTpE6~TZOlwxz#!MmyoFAG| zItCemh+0qZK_;Li64FgmhM!$_H!mWQ*Gu}Tj)?I%h?umpF*4ME^B3GqQqu+aI#=v% z=q6{HT1F4*dyKJO&~?Nw=v&y^0Es{=k7-^-6*KrT6$a{cPB2;0)Hq!jC$cPKjR8?1 z3R8vQPKzcBi$I~&IR*}?F?_WyMwsb!6*p3=f$%ogM8!lg{MsL=vM|em*kX2b>H0@4 z=_op|oq)VV7fKxy*3}Ny4#pv*0CBSv441zR^J37GXm`-x@wlPj3g={0M}Hr(awKNj zS+wLOA{9f9b(qQ4i_Wq-t8)Ysud0VuXh)^2a*W2A{kw(rCPnYfj>10& zhbU16!%DR@p`2QW-+iuKi^(u0A7f%VPeCCql{q*4t=E`NxphtnIF^tEu#m}AhyM-# zn_~`QFbe~&0+JFkL8ftnvf+8e0BH)kR}=bY70wig)L;!8kv3P4Xbtpo2I>aBIxMKK z#NgMfvAVr$d#AFHDkIu-q#@4bdsGMa>w1k7rk%}{KpR^R4kbMq|1twRBrdtIgk3gk zfyJ3oP%7PP2%Ns&zjflCAT5*AGyYVf=Q>yOn8)ai`T22Tro4nmHjjLM=GqdfAW zg4oE?G8yDZI;tP+K>3y_R`a%2R9B(LRGsYLN>Q|P#GfuUmGUMQaTI(+zsbarw4LoV z_X!*LT8*45N-9Q##c?vtio)WG-8YYrVG)skXAS(`P1s!XkhXF>#R@|GkeDi#fVU;K2!?A16>#mYWZu@`~8K7ETYBv^~@Vu=bfvMidjX@)3zcsfBCB= zR_WkdLfQ({Ce%&*-O%_mcT1Nr{XxnT?lYawKN`8uR_XQL?fJAGaM(B|sCK8xUtJcF zH&3}aLxR)Ym<=^@#2@43FJ|WL?hAfq7tiH45>C6fAGI5e4Tl=qxL=_SBQVx6UkV-t zr7(`pvGxw%Ezt-QG*_hej=XByLrL-rN9#GrBH9J_0|Y+cMDDnY{pxBoi5C*X6o$Z9Q(4!f#1o^WaNDTX#6>fF^XBK9n`od zEuE9Uml}0$bg~|Yf5RQbkX+M^>m7qA%^vZi1Y^8T$5W3gPKu#_rn1@|bFL-Z12bqCj8LJosz zYF6w&rDMLXIl#$z+Z6$6B~1 znTUx5Wram6ZLPv;87(s0W5sD@+d|zjpHdl8rl)gh7xKugC8*wlmr5`$)FfDt{a__W$0${j_Z=|R+p!K+PGy~| z!O@gf_iWy-j{_&j);RB%S(69UIdXLVn`40jv13u&9LtQ-A zJ-?~Vsmy1tn_zdJa)LfNJwMzwpHrBp#$ifjjS8C|&VHP2FY&?5ac1`J9#NrEKWr13 z!>k86%~Ur;=@gr<`N>SA7FoIU5vJF_HK0mqhT~F4I|cI9Va-tsMcQ_crauACd5Qi; zH#mv4H7TLo--`3ie2*TEihnJ%ckzvMe@}i0onT;EPmiYltLzL`!n^_{p}To-IXUdN zUPq&yH)wKpf>mocPQtTe`%&IPW8l>FXzeRY@e0S7tj9`)RXUq`sn46wVaeCbR*X}Q z0DuR;uTR-m=e>SdU-&W8{^$F+K{4{59^Mn>q8Od1263J*j6R|!Y2=muJ9NZ*SDS?K z?*~g*U(fD4Cfj4D&2a|&bvT~xN47X!IFx+0P~PjV z|NX-C=-+bjZ98IpI4szWlclg5D=1-}3(YeG52kKAf0(a9I0fWz^K7n{ITb>``~U6L zNv7mWDtN90XXS8@pO|Q_>aI>ZoSIOcpbAxb|MCw&!6cKE`sF2ER@HqmGZ7O5D>8)` zKu=zYObi)#q$u`Szz0)t^jf5>?492W8z0QgINOxE;X)Uhd@5E`I?3z%9IiSB5H5~c4?q)FPV$z2#(;o za=|dfm4TwWR%ignM;D|@A>1iK(RRwc`PuMn%Hy~%vps=*_ft3I9@sO>KdJ00p+C}T zS81;bCI!QtqAfa|k2gtdJ^Kb8(*pi8#3$P{b)7%1h1MvjP1bawAjPt{p$r+4EDzlP z*Vhf*0`FJIanh=_e_!J!x8guPCKKG#S$GH{w$fu8t!#HNUx}U1R?*s25IgxkT;x6Q z;==ZuL`ZctL*L6jLf`w{q>}FYTfo8(m+1Bn28TmLGiPMiLD|B^KRF0*p8Ff!-k)bz zdG$}yeFU++V%Vb^H>49lG#2JlmV7kO-pQla=DRET=Uw*sy|(Yb^{Vp@BA6&L2eq4G z73sT!%qQ4U2y38{Hc{nchz5jh5tc@be$E*sp(6#4190KQr&cDCwcmN~k^*Ym zMgU+7&~#zsGABCYy4AKSnKOW&B!NkJuGRj-Jg*}6@pt`q{)boHJWevO3)ZAsByNo^dnka;xP zV*bYpQM_WT@gN%1Pi04#lJohR9xO2g8EybwN!CW89aa&SXDlq`Pa7vy%*|5I+$_XQ zsBCc1AOS86OX~5hUTSVg;`k7Q1|v?F59$chd+Z8)pTldb83{s%qLf~u@R#gaN)EZ} zU*IaFs%ZcvDKE!Z3$1GoquGFZsD7?0B*XNOIlWYcR0UT1kc2@#;-u1Af_dD=21_H@+wR? z55>ERr{JST%0yTWk|G51yQTCEFBU=$mAKIcGt9{r^VGn_WC*nKF3X_XaLdw7Q6<|` zze#`^aYP#+^U$i589*TgGTwDD1-K<7>PWl9Vn^k8w}37VUj6-t0V(w9*&Hdej=>echo{TraOV z=Y;v5%@J$1Hf@m5K9^z_4FD)Av=(KiKUWZy16Zm48xDAYV1FnXmS`7Y`>SMk6>sNo zm9ZUmJnHx>jtAz??FdXxfh-XC(z_q9hkM1exSrx_<7^WcMwHTai`3wSp8=7Bc?IPl zy4>0wUMCob1NrcOUI1F87BqZ2bs%%lgA9#TeTgDpglk16yUp7bsQ=XF(yoXO9D;ZM z(htyWaCMREa0}7icl$@@JSTthF6Kw4;T7-DogGQ(hU+IU5>w+x-O!3HG)0W(xY$pR9^m?b`zco>i^Spj`7q-ytsT9sYnT zOt8R0y6>xEr+e)=f4FYnQM~}<=3kfM_aD7-sS=9cA@Ec9c*6sg7nv>6rUi1cB%#7|blg2; zeLz9Z(m;NCVR&}1{7acBP@Y?jMVO{m08dh4_7LW1BheU z@r5;TYj4wo6Ke1TVA+ga2aYE^A~ttuj?w365!1tTi}y4LN3w?k_;o_g!QEu!MFFNm z>*K*=QDgABm9Eq$@_Fo@S(P#BLVLFnp%myA`~<6M6WopyI-u4Rr-Y81mI1(lk-7aC zNfW{ngjDJ6Jl6BXGK3xub5Bw)@cV%)4ptO(6m$|!8&~rE=Ju^FtwSscI^km*4S3dS zmFIm{0We&aBnOuv7rvLy89aADs?ftq3N4Df7+uQ#{x5l|Ol;uyjp{`*H!L?zH+6us zvgA37Z8Da~v_}xlOX-kfef_`Lm6aYLLRR-K)R$|Jj3(mugvZjRmvmmP@JU%CQI)A? zbI;CtJmmamQz0|E(pF*C$>ZerBbQmm0Oaj{rt_#ch?m=4=!hsu2Df~!- zztLGdX%$D#oJj(RLGGXcR!}zrgw?l3Kb`8SS zXWFB~&HAXO(AkD0G~RjLOsp{6*4kBX;|6t7*L~XP&Zm+>&O?w!`{eDrC2T^4UB(go z0eGR=kMTUFXNfDxHQQZG^kk{1kn#39XQZ5`@vTA8Do}7YmSXs9!hvw@IMY&dt%}u( zG*kwDa`3LnNE?gcbA52Dv$OvMMdW?p@sf$pQX)iQ=U(Lfn5W0{HV9bxA;-O&)s#HryPT% zEq&8apVgslv9SD!s2lm$+?o^R_i<|MKHxJbofz2vd|kjPxbZKM;VTbG1f4h0>d%=+ zVOpGiiYk1mdJXplgJx_3>Ip*$^k*!m64J_yw%p>3&=ZL6F20RCPi# zTWsn2FN>$$N+J-5UEqi)*Xs zTkuq3I3jE#R}#>uw};U8eBz^Q#zG;&>O_-1HLphXjdTXkja{N?Q#;`XczzzqeqNpS zz6(cL{UBn4%rmg_^n8q|w#2au%j=VSh)xgexI(}-5A5gG7DDuST+#wraKH+x{u6as z`rng4X+@v;{T&qKg^^2Mbj&6jwXsb2=kdhuH=*ZnArjf6T0RlXgYulUZWYRc>|+Dd z_OtKVc@1Zg;^Sr49k}3Hx%XMmJ$rHKe!KHZb{COxiMueSy%eu733?JJ74flg1;$eu z3K^C_>YHGg9{PL6oGUNnYXJv;_A)g|z|p>Ep*M9wqUZS;0K+av3C8%xDi(J=yt(Uc z35QLo4tx4jEDse}gbJ(*>|~zdHw{=c&bI5uU3FWzA{KiR27(Sodx~gjFl&;`Z{(M6 z+~b?SVbRg438Xv+d;Na9@A)6NfAaXGik#$^fpC~L@5kXHjtO2abO@zMgeZ%;{sIxzHqdr`Vk%unxTdUBsv zSfO_v+9^^9i)C1DTFU;EQX2tZf@p?8CRXNoV*VS5M)p?Zw2SqOag|sW!OK+0IloS> zj8{SF{A(E!Vv}Bb)K-V_CXfO%MFl-<*FheuiLbeRmkqK7*g>0Vy{5iZ*(c4CuYY}&=h0ebJ+OJPNvxQ z#GK$?;NFpuQb!_=q7;I3i_*Xtz)ll+QNuuIrbn$LJMxU!v~)ozPpYiW$xU3%d{bdD zI_65zjerUbmTF)VIk?wrm|!iT7**Vx*c=;K`rtUlgfmX(Z2GikK9a1E`yv{@a=snH z8&gM81SQfmswS#B=I}%eZYpCYs0D_H5=M65B#+;|!C9O!5_!VFRFSALze4p! zxv@^@b)A@j7=k$nyHJ3BKhjF5QZ&bM-orRA=ijNkvq11@VYKNO*)k|KVpNzBXYf-R}cx4Ea!vH(3%q9jXb_pVMl&t0iV)^63{ zwq8%9m%|KoL4m8VN)v-o)lu#^d+@%qTLlRlN-rhpqT*X2TYByEN;Wk?HAdq;stI|e zJwOa8kpNa(7cB~%+kQ#mfsfxK0*r=`HxB4GT5{1P;;sMn1zj{Cl|J%Mg&9X~Q4khH z6?NxEw;bfM)m_)DUotXHa}hnckSM(_NHIH$1an1jMl672Pd0W509A?ww*Z# z#OHN4%@3ZyRy*Hl;ps2Va<2D({Ljv>a}Tp$Bwv*E z=1e~~UQqs#%$_)(7blhOfI!=bg||i_Ps}x*7x(CnvQYHOfui1NSlFR~KeZ^;b+Eo= zrH_evHzP2@LsVxOPom84yOP`i{RdR6A6IV8K_AFv&$=`2udzvVu$D(C<0eC)|AuXh z_kUs}{B6z)Gi-M;Q5ZewxyNkNA=}^v3YLyg6|X7r#S znkt$pbdM2v)=Bm7bHzd4E_U?ajCpWp8NDtA1)N9XaEgf&gkEJp9;!#wv*h_vQJhrF z(2B`dX4FMIq%qPd`X6XfS~X8rncwr)tO}8rpET}6W;1i71ubiE4Xq4;?9;pT3r^kj z9{!^_Qp1{1HKKH^SiI)ESQ1d^Rx}X^`Se@@)mNdw%=t{92Gx=%V^kKoMxsqDj|xNK zd<=W0a<2(Bjj}| zY1^X%t_5dEYOc_;TNeL&k%USR0_OUC)h?C|SmuEAC`~9EIfsn3yKGRgxOaMMav1|l zP!=j<661U)JL4N1)+#9?EI~>DM`a;>1b>l~7%NK$LYeWgx-Kp59MP?L^$(DtE2b^c zX;cv0*E?-~4}HB;jaD5^pS=zWB@SjVNLiq~Bxtnqx24TM?HuzGxh6&PPYI2{(dyRsMiL-N_vsnfB>7&wV;tKU`E~XGocn3A z=RyZ|KyKKbUa!#;9Gov36o@|39h+2r_;PCtk?GA?4j!|;E?5!ZeX?E0GSl^Nn!qI= zdPd;Bg0_B$)Mf2l7dyfveD6TlRn5<@(4R6P9OA+ExJa3i^NfRxE46=5a)pTTJr-xh zAIC@9bNqETKZj%|{MY?$-LYYTuFE7~|0;G|DTik9g@`S3FI-q=@4eM6u?nmtYX@#c z*ms~WJ$+T5;O-CfKj#J*UeK3qJt}}18VWZ>=(U}Dmo@C5qteTpE4x8kiK?LoyC*hb zdUnY|1S*ZYJ9vBc@%Np+^i!%$1&w^KqX>N-_hFNlX*4`HAVpOq+-~e`IB*A4n077V zv&;LRv)cK8uK@%Z{atsV&7=X|AyBGWmcy4i)Teo6b5uL@Y_R{vdGmPB;P>?xYNZ>& zWreLgBE(|r+LXBj7q959bMx-`)BAI~-@;O=JQ0b*=qBIe%*c7qkzmV4(b64Kq>+Aq zu#1=hmDlwsav=1-gLXdS~q72H- zneI6!;mvJS|3KGY1f=J4bO~ZpFem+uzyUrbPi0(Gt3UP^kMq~Bu48K`x1Cq>M_GpB zPJv?*IGSnf+sGs_fjWjxyF8ekL!}2J14#897L$5sop6bj7*Dj#5}Pd_v*yWUZHNw{ zKRLDLtxdak?(O_%m(B(xi9Pp15dz6tg8yPdRF@MP@U7+Vuk^D!xo>Z6-wi)a^C%7IMt?W-9hz9nss-I*Yn;6?Y9^<)S z*HCwLm@gOs%bDm9Dy0%+U)`nwQEm^5!JQ1SX1LB?Q zJtxs|j6?Z+^6u#}PyE%ZEc2^HG&9OEiHO8w6H#6U4CcLbrEFTqc5-deFgXQ!5Tr_w# zd~abT(VvI_+7Qx_#h@@M5^KTsQKwKb0bH;uUMiY&D31`3O92#f77`FSXuLjQWo zc_cZ+scTcuuA$%P^IiLU@Q3uL+DB0Rxoh7+kh}Gy3QQEWM2I#AVcg9p#OH_7gqA-k zs2y>u`+6;=UGRdU8<&-%%zFR$7@FhY^D{3x8sS%Tc!HAigg=}_2&l%8%CO&0K{gVF zL^x;XapBBOQ`h_7-O-xBU_JxLLjno}fS?~KsvwJ#87PIeaVb-&ln84hhi-t_Y(8JQQM5VGfj3gI&}amt>rQv$&DHAhOkT zs+&oRR^k{9O%_$>?2~bsOS2XFe!?rgt&MA?kD>7Z{hD z?L)R~Q^+xLruX^g9^@2KK9UgN0aVp)$t#Z~#OJAr+zRLCII>D#vYP?hJaSYZNN#$0 zS_+H-OJt)jv?#F{VNXyABn_km&?FWl;^*MAQ(=!WCrdi9AP$~_>sJL+Vu~A}qEtW? z(f$@Fa-+PAaMRAnzLTFwVFib}?SlPjr+g7fg|0+ovXlG-VG`%IKh-U<{!8MF&Y&zT z=x1sbRu}w|vK*vb2q7FFnky+*X?Cc?REfAbeV{Y)1mR8PFrjG`A{%?cT164uxp4|n zN;Lop&waEFELves(PFf^C^=zlg7uMgi8_Sra;v^b+r~q6>pP|wyn%B|5JC#=l$9Fi zKsNTs;)}T^2n14STxqP^ICvYozd^hcpiB8jRC+vYpx{qU>UZfXVp=~+t9?j`JP)eVDWxw># z-F*VtT}2z?k*zj;XK*t;9HY1FKw8f^6E}{0$01 z-UiIEKiTrqyIy!j)Q!9`esQQ=pl@rNd>kIZ|MFG%Gdn-|nAMIc*ayA-c3W{XhdINd z60Q09<<*T-t0{x_PYBR|$Ugz)Mcc)@o<(F82?*hsVkgA8hDx zEUCyJqf|)nf|p>`pq3UvdbKonKe$@oHxhc3D2VEG%eT;){{MZKCPT;`{_bPfr}EK$ zr~jU^o!ukOdCHj3W4pb+3A=Cqg-&$2p<7#co3w?+7QsxJQv#GsK&{|Xv5~%wGfgyA z_AO&olblfH3mZ92lm!T)omDsxR$_)jM0J~uSYQcJ+SvVR%(YSDkpz_%>DIQoY#E;ofC1e$=7x-k51Os({Vb zy67OQk0FlZ6_NXXQ`4kc!NTT@#I-_m3(E-gP#+S5rJ;%mWR>2xTh+gvkAb)5Zh=3K zlp&{6w|}uJpd%@vhiti3Yz)JKl4$V^9y^mbg?*rpMh_&*BABU8bqLZW7eYJC(6Iuy!`p z@B+uiyv)HzQ~>Po`G$x=Y0yB^KvRSO)XjcXXwtQEUc6#Eq1lwu$ugoC_K@#WVyP;} zNN7~xSIqCJgHqt9>v`fQN1aS#0!1pre=J=DEhzcfFh?k;BhS&=7en5dR0P@dBn;@l zJ51DLB zqCdxb$$7UMNGB*vHMLpqAjRvRQD zM$s?Dk!=^tW^bvQw@OoWV`XO@J9Ss<>_wqDC8>lZq-k&pld;V2l1Bx7Ir7KgeUrbt`0$$!busXrJ zc#ma^4M7MI3@ zh0#*`n~kcby0^)trp1@xBZvuc?Kw|5xr5a|^Jn-E&IDkOp~V*`w_3BpmfEk6;~zDi zC;W0|_H&)YwTi>5mw8dN_Vug@o%^)?rb##%G;t4`H%abIN4n_kWQlN`!HVloj0_Ni zp%$YW1B7%^84T8h+4BP?v-h_X_emgTt)%iRDu^8hiX&+2YuaKI_+;D}5vg-Lfgo!} zM)#oT>jEbk`Co1VVgG+gJ3W5WPZ$@w|NEDMj5m2?bpmyJ_^GsMXtZrG&E^Rm+}+(- z+D3G<3}s=A9|WS1Y~O>5!FT$+XRmj5ZXUx4Kp;Y~koD)q>9DS?y$zyuDo!f?pU_-w zbgXt>zr=TkAU4)k57C~FmlQK1gekaHCMD=na~KxBXG&4cy3ABd(ymo>CPk}6vCR$% za>av{@|LRGXqd|U{gp1*-DBFkmSh?pp&Fem2Wnf%Z1oLq7l+{CgJ}%Q3g*7xGLS7% zmLWqIH@MOk|&;zKe*q*Mopy-K?^ApB#lKjLVabQny)aq%nuYCd$W)E znNJSd=#Ur>B1q-(QOF?~XceuVM^kk}fA3W|RSY%KQv@=CC4ZZLLK97H`L>DZGB%FJ zN8C7^=p{JBNmiO5v05_X&}`Da^&dc>J1)ai4O=^9M~ej_zTW?=!3+20s{6X5uMe7- zvH9zmr+*LB8spCm577riFzWpZIE)m}dtt>vUg~hQWcfpg(XWE`#`FPfR=N` zlV5vnpBnrWjU_RH;@!Qs+cmyW}E* zNfRY+hQvfQW@Nuv$|;^Wu2;@J3+_NxiQ&?u1s1ymSM(ud{w!0SE=pL?dQXA#!T z-jhrVVek8#ot_tzcMww0@BZHjXb4u3-5U-^0dZ%Z-ahc+?YIfL>ge9zZB4LhuK%N_ zKR-W%3O+d%rtaluGF9Pc^F>4$Xx}?)L0vo`<#`Yk2>@*;&V*~%&-#XjAb`lD^&`zo zd)SPL3Vs8A1D{%XCj0baj#IRiWh_(z`8*#s0v|u$^wh0rBn6p=D+7xx>!1+%Uc>{1 zu)KDDWwTjj0olUukOo4Jw1C;f{Y2A0>e)l0HvwLvQ;EZMLa2UkEgsAWHp~OB8kG?-l$JDwPVj z8(AwA6sg6Uag9!Y=}RGkPc1xnq-~!N|ZZZFhCcH+MAh zl(M?*K7tbI7KNIl&hed{&fF!dkyq_!SxM$OhXYUnE*IKu6L=J-nBvF$pjIYg@k-{} zL~H=*SNjVSiLw$zbck$%vv<@D8*&3PWJqJr*v+nkBg`OG8<3M27sVfx1`WoR1h5(d zO$W;<6zEX266sGu(W(F|S%?5*8MYSd)484(b$yO)b3y#RxzUAyA#Tz<*rcra0Esp+ zR2EEuJwK%xZsgTRNfXA{gQ&_A*S+(-o zd5QXSz83DbCpr|cA0~1voW=CyU_aOv4~R@7^& z?zp_(2#sft+?J8@&~)ThxGO`I1RaKdJ%0mWxGRFp2!&AB1yU93pH{a=@r*ndCq#if z!FGc=$_$N~^Z3}KflZ5*O`BS1_#~fuos0%xL-a6C#7de`U4xy(0l^fL9FDoQlS}K5 z<7aKyc?ju2t$z<5C{T&x3etxj(^}Trrta-^o0c_Ba=u`fA38tzY&tAMMb@t@F?bJp0HCwZPLJRWZ*ir=}AfO*-$a@n)R-y_=(@s~{#!!x8C7hXTt z=$Yfk%6hAySMThhjcULbQoyqIzg2KHBPyKw*NUrQCZw7*8;b9mg$BBQl*YnBCTbdC zcVSjAkevMgyVF@zFQU~@BE?d*!Ug%+7TmpBNJseX>0KL%QPfIQM1JuWZb34HUYn?8 zO%X8+Q7ux)DtF8aUe8&p#NSpAJ+wNZgz#$^nIWF!9wql7ovtpt9s`Uf=k}p#0pY7w zsv78^%kY}fTqyKFMT%T#_8~f6noyM-9fs2Dxm@;;JYjv)*Q9@+S!Y)?$9g7xxfza1 zQ|$@Y_=@_e(8Q?wO}jjdkO&5l(et8tIHiGbimnS*GuuykEy)l%y7htZQ=~LvO7ob| zSE)|Q`*f(|GPwxC0;&jwF(IlEk}}-DgC80;u&@XQ7qT^AWSneLTnY11>XOB%Me~v- zu(II^h(UP!VYngZlv>*PC_+LwQ!ay3aYNW>=E04LQw54m@TzpMquK3ae*tqNAtn;+ z01ask8T<&cq$M(XY{bZEZ6q{KShg_gavTUSDon{}RKgNoZURCEznaxX)$u3`qnQ%$ zL2$G{VN-*Vu8>DG%teNCyhiv|)GCe$)FR+>SaDELxRFjjZBOA*zmAiO^BYOV~|7gbS-EJPuidD@IiW;6iBPc!?5?XMxA? z7X{VkZ?ZssKIVLahRJ@^3X69q|Jkp9Tkh`-J8ILXW`~czCci$9(t9=?zWP0JwE=ah zbU070#~+;c4t4rC|BtA146gHmwtgGiwr#Vqt;RU9*{Dfl+qUh-w%yoCV<(MqpZ|U5 z&O4J&naPJF=R9ZcwSQ|Z5Bm7rA3Gy8N57U`@X@3YqeMpg%1#xORfO^WK`|hNVIElU zlGuHHY27lp-_&Wvse$|f#YoaLFcCMWJgbcQTarcA1pjxj{idu3`Br&Yk|asxf_E8y z>kX<|_v9|fV0xYx=0a!9S0E4&tuLu312DZ$_Vg$OWEoAE^b(fVok@MBr8?g~UGQ{> z?ZAKJ1XnF+)p&AHJ57I{Aan;&>gwb&zB>wi@9%W&u0!`WbEZGjTSC<4u%JoTXF8X) zhnBFv$P>o4Q`g6U6h*I4_Ixw^XaLyK_uXCxpdbdA5X~i{;!}bA=}>^b#`YCIKKbvx z)(ga4@ZIDBG)#~0TfB8hgyWp+8*}cj;q5M;sm5+-OOMGaTKL6rK)x4f%(eKo*smn= z%mmR5^p!h6`*^tZtw+M!y8*{P#xH%@`n30#0)=&?^%9;Ehc1^-oI(x%P+dvaCYOAT z>f9+h>>%DEe(WR`yK3WW(i>eeDnRgMgaXgDc1odU&hK9%dEnfJ(TtFOqZ&t>*tGfw zyXEb*%ZuooR|z3sOQmMJ?qUkBkw!9~H1^Ko{g^@*Pvu)fq9&S+WWEW&CHjqR)gqbV z2*t}SvJ}K1|6H603UgDE$RNo{EMsGtV5jnxycz5xe`IsKiWl@3^skV0D&?8DUxS#| z8WEX{w+_;Sxs&J1K31Z3yPIU^W|CNiDvpiEAZHzhb^wp8%gdrH{b=vq;Eg8#e7k7r ze7*K}%0ad2uQUX(p&p<2jLqEop+CS&kNW8mn%yMFVynm3_ko3k3}K-6?r5g1wRLBq ztFc4Ph7xe1oeXZi6aI%FJ-l8$0f8;vzkGMj2K*DqmL7|yV(QlO*AbnLXU?9uD4OS1 zC`}rHXzXyii(`ho3y36*-KhDX7`gH&6Y~tVeH9#~egP3$j$l6oPqf?BS3m9E=^$HxWUi#6rFN!0U`uq~p`ysC^~_2+>n0JyN}{}$8iIvC)i6U% zfor(7C-av;?bE>e&K?v8bqo#(?k$~=)ici79s{Y(gvKEqggz{(Wl0{p#sp$8qWNRw z2BbdGu&ZU4E3mkDpBNRJDE?}PR^Xg*<1yj`oZPusaZye@r>FUx}bYr!t-{pUwpFD+lPfE389e?r%f zZ;YQ7ub7{!J%=r~LBCEd>5Kxb^U33NB_1EI-;s|Ue8oPeh&_J(nmW4k$Hoq3X3D+B zHvGske81KF53m6y@Od46+}uApo}V9rfLp+#W&P%Sl|4t$Yj|jAXzZ-F=K%krJqBop zyavAA{d&m{^so;B5ijO$s{K7(U4JcvUU9Lpu@AC7B`!W|{Q&A@p;W1@t7~Wz81MlX zYfNQjrBGYF&*@I~zF|`sF}<5+xn0R6^{M`_1vTPM*Wj=XIKDZRD^)tqgrJMN)c5d( z&oH&!YlNSJ9prZWb4=#mYz%?p(pR$0^3{Y3t@$PX+M*}h0{#{oHck73wB`!4C1H%) zZr0KnB6tBRCuIogSLItOdMI>(@Tok^8@+gsOEgPy-}f;j5hyyTHB<~DP)X$)eZ`byX^ zir*iX&*o*;Fm9XN#r>eVS~~u#W^qf|$=T+BD{;z-b2la5d=Tyu5-UB3I=BjCn}&W6 zbdc^iw4=4PFdt8=A%H)RTTDzlKv>*E{^-?YA8!>>1A<%=*kSCWIHoW~`yJe8osAz_ zo?vaFoQ9$kF$I`e=fx@Kq2)qM+Kf&Z_IML54xJ@F59x zW_Fp%*(%vM4n0Sg8cHwS{f{jxhlJHAcXmdhVV^ib%I*fyPypFUs1-st0paawboB2V z#Mi;7#VB?Bw(rjRQP8&kI8Y9L2Ci5bf=#dsIZTpnd+ovxcX>=Zm%N zFt~vBV@zlho(9Aam-3TNg!$V>?lN+r|L^MGljYsgdYm^U;z2pM@ zgJSE#sGxy%MY1ONldf7=EZcTYlbU)^8Q;n8=-u=8Nry)-mcU>;t_G5J2gxqz=LoK< zF8SFXSOkthB_sC5AQHY;kx&nXDqdv&;QOu`}751N7X4bn= zEXOYeZDmo($`MjUypr8L9?$5Dah=9k24XZIj)iZ)7i+$JTi~SDlvWd~AV-|9`k^rf z#P=7lv7E)w8nPV0>#POz_YUSgA{?4lw{n{YuNuwcOtA8jL2Bk#kb%hr$s~!IW6c&k z%^F!+BI{%o$zm19jZ!$v(cnR=^C}Y(6F!7&<6ShXJR)*_g)MhT5TAy<`ssXGe<9p) zEJ9Gt&K5JuL25^)OBudEJ@7hvv&c|nAT~{q@pf>2Fh5$Hupn+d404n2*9xF&9obb@ zz)TjGkYOr=(Z}MGfvHsSSJhxeRmYQ{ikPHGN6D_=sM{!`?38IPGKICLSdhL{rq_Ub zg3{?bgJSqVcjeV9jL63!!!#4+F{mj- z3o4+c^hFe|rs){lr;m{p86R{fyvm+LpDV74)hJh}UUDEANyoTiDb7X&3mE2SclRiP z-;Th>Uc=DRlnG!QP(q6iKc0qy%&=++C#HGMgqXB|=VSmw%607~JH! zdEmJn;JMSs?3N=GDneW~+tJr-rJep&5e4TWtL`dKs0QQy7tF)x88by<_=Y`*_{1j# zS=15-C;#n=Rg!fUS81Ot32HH3FWy^xKB~;0ZkK5@g$vVtM7{tq9o0!(uze0g^e00Y zbIJ|i%;M`KdyBfsVgFQ-j7wC8G#t+s3XDBcT;whGyl;hrS2Tb~VXLuA4`g<+%X9k{ zgf4L3j%2DJ*pX`1P5cM(dNii|OaD(%NO8iigV}Z7kw>`m(eha#jB}yF!>wu5IZ@O%U4@BO1@9vuLED%3$OSL1U&n`9gHmly(?z&z;#2Eq+D6GBr%4`6+s85sMLcr5!Dc5rwY=C%`w&wy)}3N z%?oNG!*Y<1sZTj4X+#l}KfNsQ1)9BZDZ)!jFOoAsW025kWQv77gJ+4rMcGxo&?Y$a z0r0Z2$9d$iW|<+#+ICwON$|#swsSq+oWAcX28Fs2ZxHADi))QM?PD!*L8zl>%xbVY zR1^H($aYkrRxh8lR_{m6TF3R#U+4E)A_>Ev$5XZVkpt3E#2#Kf7%U0u6IR@{A}Ot# zpRv*7%;Ej)?%nkC^_p<#d5t-9ze+pAMFkyueY{LCKLdjbS7iRoe@`lbex<%skP#@$06Gj z04>^Q#ScF}4k-hz+_cG58dl7T$!PX;+ypKLwZciHm7$E~$H8~i0|eY!?@l=Xz(ip* z@xmx((m2EG1jXwxI6w{GncIUkeV_9n7}eIgOKC;I zhzw`%jCPWfE5h`Bi>`+K60z=Yr6blqYCSs315k*U-d7Y-0rL>At4KMV5Lr6t63>3B z;-&u0*TI|1gn9i04kwRK!PQ6GmvE3xQXeh;XjdYil&YvYwgJ`f{tDTrlO9o#F_RG( zEqn&#R35JB^X2l@J;*hjW5KeeZO0&OmF;h?Q|d>A=5y*>;0NNTmhZH~;moszSAWYn zPhKk=FQz(rIo|uGZu4^O%KPr4p7))uTdmgwRL;B$q%%|@Dj!T*B1leCl~82fRKK3f1(Bi0G@neHdR#QI4+(o*8->x zrui@sWCBdi0J9Aw0FF(hGOu2^5o>;O2ZL}tZVzCAgh0Sx00f(Gb8~NOZ#S;mev|n- zDx@vFGqF982&$foN&ZfCm}Pl5*j!yuVicfT&^|BVKVs#$7cvp$R8&uc`xNy|5HVI$ zhTZ_-(!klkD+56bdxG9AXM{@|gB)>0v>at*8a{O$B}qsihqxSJ)ha6`dSejgz2Uf5 z^bx}9CH5r|cNumHT_(n~R}BtNlKm2FtbW5%+fx7di={;onKD~~7YBX@yD9`NA|t!U z_01xxP|TB)=wSrcK;^DE4{WlQ#ky6KQ$HtND_nx;rJ9<_bo6PoOq6!^YI2+TAuVfU zyv~4b^m9bKxEOO2Wf^3ZAC`Bzp&YDSgsJqZOmV)aWCI9~2r2`P3NoLZHLj{)Y1Z8G zEIuP%+lbB7S}QO|xeq;PwGVG%Ym}|_pou{q7YTPXF1Z&R>Qvd0FY8e9u2*I%+BSrT z1Dy&rfqsl_jO~;#9SvIGr=;E(6OSU3{|2xG>8Ea{P?jHD!uj8(y8 zLK+8rZv_Wn~WxgwrVYkWhGti2y`2 zNd%3w3Kq@$W)>0{&z!_X>C>{1`L7%soESl@dzP}X-v)^-pCL>|I>hn#oLl^xNXLXz zqA0G(7NOJvGca<|E7A)Hr|_(U4|ju$9>tyW?^cdrS1pxUK$d&;9>2&)$f&QtDy4G% zz&xo1QLtDldn=A6O+hAx@k4A=A}^keZ)b0lA3ZYs9Z)8zK@I_9XMh4@;a3k`HJReW zMaxsyMubXf5MPCZXQpno#M!l`kqZwsR`y7m*n#Za$?0QLlO^C}FNMgUm5>4dfq%@U z*y4X;DHNp@Q|NE?PN_}jXlv}_WdF>! z!C#DR9E~N_js_pQA74{=Yc&oQcVPpRK3SeYQOLXd_b%6eM@(dqt;Btao8RYfgoxNm zOPzA+n)c;94J{t_`?3buhzy6Jh+`ebEXEAcmK#uPuAxoz=*L6H8iF`Yx>A2{Xs^N9 zsJW%I#*BQS9G>@kTFcwb-{{%M^}^+${m8u}uX=WJ-OD-*?L^@Zz7$S)I!x^$i_L)o zKEsK2m2iJf@3qCHnth2czO1e{VOX6u{iRV)kZ$=&8H$8^&{Ap1SR>Vgy1NNY-mq zTVxj~HSM@;G!IYE_vI-t=na%g=}H#O9~m+xlv8NX_)X#@V}8(YnBIcyqp0^p_tP?7 z(vJ;UgEa>-ajP=bdv=5CU)dd%cp#UF(1#cfvJPb%uN!Dig1STq;xXRLp*Q1jn$zvM z#sQssZY?0xHm(H1B&5D@ETql0?7g=NU*{CniBATB>3)yGA#G}ss2Oc6u;3i+fI@mH zj$e9qd+MLKT4{ka^YM2eHvGsm`I-!LGp&X>NWq{)utAV5LKPa=rytmX^(mzmHk*^QFNzsZnaZ}knVVUP1 z4B7MZmiYG)dC5-D+lbh=JYx#0u*tBPR{D~B-O3DMesKbm0!9-yghug9c()a!NB$x= z52s_q;Rr;`e5a~(u?Yg5UCVT`WG4Jy%=fy-2rsx1!pnJt!EEr=5T8QW)D{wr6_8{hgWa*U-iRr zr;Ij!#ZG*Sn$qDG#O~xlWuaBNjEVyxwTxCxoHxOe8KE>7gO8H^DAwFdjIHpZczxOB5R(U5@N}w<_lJTm08)TBCCb21=CTuekFo6 zhBl0%MgrZgHc6U~2#?dJ@36A&{_Aq>^WjUzW7vbtyB zP3H?yfxG-F==*qz+4BqdZ1sfN0oYHUXTTWw{>HmrBlg7|InaM8O?O|}!u~Kk)AKN! zbF1Q7f9%btaraR(wR6?Jl@RKZ3!ITQo4ZH>1~hoD7t^_j-#sgcPq?DuF|Et=H^m?p z10GWH2DP3yW$BOgrpbl=+)x7oIBb>QV^H$f9dOaudGf(krrn^_5Lfoj`u|*y@b>Qkg|OTTt?~U|K?y7B&Z519we)y~$_^~f_Oo~f z)TVfIC(=1mFI=o1CWYywle2F#h=}qyF2(Dwt9}%&(0G2zV!Zeo0tLG zz5#W6_5Pi@W9$V6SlK@Jjm&|1YI}QoMJVw9&G$Q5C zhU<0UKLRj7&leYrLTPu++m7oFHy~jyaL?n_`*8gVQl5q3xTKy@ho{VYXdfyE8AcVq z$cKp1hnf3Z@F~1%+`IG1L^dL?VOCZm>S5NAvC_pj4_moqX3YMIp z6zTFpdP1=tP>W~(U;6ML6QTZf(o3Bo(pPZ3c+8d+2GzJ-7y%Geun_AF#T*igT%$RQI%L1qh*-Ui_zK7aj%m zm*-pwi<1&Z{Qm@QS#w;IOYl1X9}7^9_aAhz)#*8K<@3l7{0qu{tIF<6| zo%i(|_gb!v&J&sk-UMbRo2R#wi^H7FKcj~Qsz0eQN%$$G8U|ZBd}74~4y-?}e=%)W z&>a+5l5!3FsK(yTALCo6)P_tAif8S}dx1xKp>#i?_t~NMQSml%!wm#%#XoGWk5^Z2qIKxwL;GI6A8&?0I@Q@_v59H3YmUccn_659he4si_D9 zbqGIkQ~7xH>M&w3H#B zoy`t+;8lI-S@L@rXtKEzxC1700G_8Qev_$!{V#3sKSw2y6VDn$KC&PGUv1_Pw&9zM zPfspG*v_wO*~nPEtXv=ELKRCQpdBH~s0ngc2;H9Dkb3WYn+9paB1Lf(4}!?*`Ier z)c%YLgtyL7pD*B)hye$)^N?v-y$GRJLei z&J6mFrNR;sM21A7qJwq{b!d>LfOCaDVqz{pHJovMn6!GFz=*^V@PVa$9Qb#_eypr* zPPOna)T;E@NrSQ~nFTdwu#fO~3AK~wG8l&@HB>n)IZ4#>w<8FnvE#reJ6OsNG8Z(X>D;t#dmr3v0jXh4C zE)4Wj?7Jc!H)NAY=e#SJD_;_@eAZKxY55vak169NRKEYbral&R0lVQ$qi54FwYTJK z!nN`=l)c_Xmb@YM2M6oc6Pnc7V&8!Md9Flx55zmVR`L1(;h}N*a7+?Y z_Uc?MuB#sAKZZIY9g*`ed$seGeoRmJz}%vn{EU;y8hEy)K$|}OD2rbxTl#s#m@`AY zHQ=njK)6(&eVYBxeQO;r)@CvOpHFyhX?E&od0m+GY~vGcmZpJ@bFaj76!P9D?unB| zZ`fw`ljwkO9;$N%MZa|#%3;~pe;8gtQvZ!o3*e39)fYGBOl4Ut7+9D)~1|XT`s>ha?X=V zJ0|}IJC_l& zNU0|7N#{t~FH9rNtcpp(lF+Ccfp`<~guq40MaqbTv}h$-8qP6oS!sl*{ezWYq{KtKRO)<)6QGYYu$eyoi3?=Fojr5au1e1Ta1F809#IHrD)2{7d%aV zZ*Pg2q5a03sM@#(*~Hy0Vg#SgP*qX;vt=!6m)INB}$h@ zM@H%ohpR4tIDr?6evjD*TRyFhC*>Lxq%C4?Trdkt@2eKi*S3W66azNpqBI`9J^GeeW!rHChnk9f}b1%@>lA96cvJF~}7Xyn`r_ITd}^!$8?vdBb4*BP-J&F9Isqzole&* zK3dN|a!=LtcJUm0o$9T>;&mg0-ED}b$dut-sJzC$jWIkWexmmYLl2?6$+F40^MdN3 zdKEl``j+S&g$To3-_*{$Ovzv}$iD%#!IdSR=@ae@=^X6nuOmf}x$I9Z>P$DA7d?Le zjlKI!NtSVMx*45I5Cc;^nF4ni>f6ZApWTC1n?6a0;%)za>U_=wR1UBe{mxeFW}c zwv@Th#WAo)20?vx4Kz&kA;llCpOG#hzrXr8zhkH3OlDA8H1(}5@n+q0$KytL&~F=< z4U{RhO$eDZA^zrd=sIH5+&J#dzTb#c=2U?+*VseBiFNW2JCo0edAjl2y1IEMYTGUh z_2Of9v)ZXJW$K7g*q(DI)TjMXtgJ|y=t5d5FALIpD(?r*1fF*x4&BcoK;Rz=+Y#{^yN{jHQ@&kmWyBxy z4yytog4N3_PiOa$qZW7Xww}L@e0QtO`$xdFGGI)ojcNqyQ}(@Q5f$qOu-MMq)F!c6FHnZKK_iEM8>~D-ehiG89t5qaj#E%^DP8Dd8Mw9*6xr#Y$dHqD^Y1ceHZ+ zy^r!M;U3|MLnU$h2Y}xmx4RoJ_8ez6DMTiw(*slxbjZ z{t>TcSx`GkJ4ru2?j+~DUW>OLDRHw5QD-a1)(KF-zf-?y@q_pIlCDP6D@#=>Iz}h~ zpUrKpwzgr?J!BT&lOKopL)<-t+roK;ULG2o{IDh@JZ_4qOZFQrnwOkaG+-bv3#i~`troK@KT%np_7=WxRr*uLmF*0bjnxEX(}E&zr^^vzAL`-^g=XsQD)eq$I- zaT*m$>gb9Fdqipa6u~!s5hOM0YDgL@Gj+uHBdEh;ygTRqg8g&B)7fKDtL7mkrlEq4 zSB_s|cuEwxq$(37$t{pg#F3}l^2=zNBDCSNqpbQ9oRcIILk@@I5I1`p6?PlH-8Eq3 zSnPK5v0ybB_0z-jEW!cDLxXjxz5ox*PM*oeHH*#x)+I4S&3 z#plU0lCBjQc+g`&$G1Ib8+cUYIfObT_VgypD43IQr;?eylCyI(Qex%O<71kgd>eLX9m>=y|G$5PcSt8Gg}(`BxZI zsodIjPD5ASi-{3)KS>E+7kB**$Q=?2z@e*~gocqvK5{)^@Xa1kqBRsgcqOZjX6svI zZYy1h-LcIP`qMPw8E?tHwUGf8mUl_P?re9zKgptSUpIg2f{g ze=~z>ZL@Ly`1a!7v+ve3GHHzda2iqD^AupAsp;G70&J~Mxv+=R|IOAt-G`qqA6^x> zbo~;`H$A^Tczb@{1XMOAk}JUlREt%Lwu2YiHCQbZ>OU4cpkn)Jo3E>sN?+xJX^E;e;9N*0JUw2?m z8xD8MQ1-8kjIWflIHtr{YDXx}XM-|GMyZKpC_4<&#fm$of@8FTtBUxv!F82}uQ42e z10$2$BBM?D6K51mxw-tI^%_lroN!RQ(N0?zHYzjeIpAS|-m^e>wc>a`8728iebCWK9(o|jWnvSpBW2qyRpK@NV@5;pc9E(^eRby^ zOd`2z*KDV6_F@Lx{oMu#Ac-OG0h+FXS$UsFAqH>{^_g==>K8H zjy$tEViZXNz&!Xu9{BZ(yF~^SaFYiZ+WnUo%x#07y%sn}Ly_5b&2o(eGlQ<0QkLlWHl)T@MMkT7a>1X~KVsjO1^N+5 zgY-yz8S|yvg56eGMclk0z;ftjCCW0|sz%yc8gb*2rr&-H7`dab#0$98csqrMM2@XrV6}%sOf&8$8_Z0NY@>hC zZ^#{{C8L6PY@;LY3T6Ee{zqn7?T(a44+(MtMZrd4C0++uXTo ziTiNGb9Tb;cfCo+u?qe7*Nq<7NT77(X-l|<(;(0G} z8!mHZ>5ROZsiCKH`c~ONzrfbKU`!MIL|I1!7r74QL?f=$IG12G`X>EH6Vo-zF4ZGF zGtoj|z%E5?WAD=(Xl!Y3qv+p5axakIxV!%nPo%!7Jx5)gp)+-gJb1MON`7w7(fNQ^ zj?Yu?*dNRpr;p9MqqleBkKS$HS4KnMch3JncVEmD8XOA?i;azq|7@1e&%6L0!Vqr* zq#w7owiEmlR!=}~pjPd2lidVBo{+O`RYmy%KZy=Q)pE6_t(T!ot}oYe?WFmR zzPEc}$hfeS#2|hz z@+=febqq`6F1LZfGN67Phg)8!k4_tYm2Q=8jA6-ZVpJqfEYebP4XoOznuIWDPB_pb za!LA-(4KLKpGqf!r$lrfHCJ%1m?M5f+>J67;}N^K6$q1yeC zni1m&lTN5Mlwc&|_$Q1f4m7TekAsK9iNMjIQI`K@+Y6FPyuUKPq7de_LY^urv5uJ3 zYl4EXuo+=~PDV~QznW0r%BmF0PNZSSW{8XBM@%XjQm9-0Y@DP76}6SvI3prsbhCQx zB3&AH{&fwybTs0Fo)a!CIFE%YO>{wEK-80U?7xYEj zDzXN*6S`vN#cowEll$=?-G){}dyWXPIxFFN0WJYbiMQGZbTJ7*nDdV_kJTBY?%zV}aqf zDh7hoX#vjNdN8EB0~fxcq9X9x^wsEms|=)JyJ97#)r>vji!5P9F5Z$Q7`gI}nHdq8 z%b2t~{D^An4l1}ZCQ#N!fC-|D$2RM$)bOWsB7oz#TS*fUHqUQF`B5br)`QrK>Mm}* z_{gIKfrO$E7qu0w=AJ_)KzS3j^M~2lc%5?Tj0NYoC88$(ez*p+NhJ?l5b_u73)lg! zJT*&>pSv_>k}P?!0o4*;{aidc<`P`>=;X{9TP43^ufS5As!_&uxp28O=OThDd^f1( z5<)Nre1iTQY%l1xO{!Zevct=>HkQ|g_HMs)XE5ga-Q|*Qi=OX+KUYxTNNd-};W2i9 zbxR~_wP(`4t+*Z(vVGfVsO2Vr=9G|n;JJ$FU%IZ=^L0RW( z(fDjf)S-ifs8hq*Glq<>zy^|csE^bWj5X3_MTSw4!ZVWds~2k$)<)2&$$$nqt+wnl!Oh9ZVATGR~-D&%kae&EeicH zOUw5;EDSipskK)t=B;<){UG-@REnIF@DWI$gTPgxix5Ux@R6Rx3$f;&)KOs#uQi}5 zOR0& z@E*?logvF!!6;D$(ORQwnEQa2l&%O*61>ex$F)J8g0be_6u4GbGfLA zsBUnW7OrIL9%3A8=rE-RekwWg0i6e$9%YuZmfUShoG*X1h+*~?nFNMHX$h~WQAHk+ zmpv^(V7MXNf2A zu?L^yz{LSXA@t{dg{}xT+qb9Fiy?+*fAP{3X=f5yfx^UCNE$~xM{4Q8w@YRbUfXi`g}ikp#ke;3Jmzn^z_riA9}9J zKMu|Yz&6|i)d@tP5Cc;t@m5!%PL;#Vvdu>=P@NOYY&g$7RbS&Jj)NlyN~5wLj-LSS zbm0nB-u_y+zWdTfv;TiAfcex}BKnqni?>#F+o@Mb^N_P-!u(E_s%OaDzU|PnDw6jU zw87XTgXq|jgVw7qUX8LSE;7N83WgJ`NR}5vIeVb#W#4t0@$au^vm1VfG_7Uk0ah8-O z(YEa$`ql)j!&{IGca8VXJU>!SIocLVflcJm(A&OKZ83%_?UN(PmHWXvoxez|Y{ z|I`O57Ir|D1O{^IBQR8>KHmOE|(>B?}j#aM+HrAcAV z917_BIFiT%EFhEq;UFc}@2Kfr9NbNIf$1RgMYw$!GdRfP601>K6`KuYn%(fS@2X%5 zh?bFIajFGYc%Dy`-3AYZCiH!KOL56|dIf_*p58U!zM~{(p9YoaAEJ>CBg9ONx0yIaD5}#@ zg=D@t&$GAU|Hdd$En5hRCYMK7wV8#%Q@PDZ>esy_kP%kPh`ow}&S);a;9F~^JPHfD zzxnmVI8gBPHuL#51Hj5(Z}f77e5d~lZUn~0Z`L<0un~<3g!oiH@f~Y-^3X!^^LxRW z?#x#iLQ#A5+Y)COSR5ohN9{$Y=x#-+_DLWI9di>m$LdIq=i(}Pr9D`k%kz(F)3X9J zR}3|owxZtN#13DtqB0{f$8u(mX}@O~b2q*CQ!6of-Wtz5RL$Fyy27opw5==iMPO2X zW7dd+ND#qL7ygms&y%Qiy!3NXN)#+ijE0=HfbbX=J=ckL`#SN1`Xdv_2dw-g{8U%; z^p!q$KosJ1-?(@%9)g*XaKFoZtXN-y_$47#jxH|1S^mqp8$W=HyuD>DEG!h>^fp|6*cf#g-2F{8bar;`&*XLh(qqgC zq$Z<)J;a%Zzysh?fB=+ItBjsgK2kD)UoT1TKEO%l%MOsBQuVzOcwidBi<8W(>mV%! zR7af-QzX;kyGpO5)t8IF?L=rxRu`Ao(ZNDQD27F+UC~}DFNbkUW18B^fZun(u8OVk zUL4Qr*oxN@@K$qFlb9C;7KlmVH6JwO%KM3GrRxKOj`|T?+3aT7xZ3twBH1FiN1} zp?*f&C+w^1u}KI1q|z3vVDyO8iq(}X2AoBzCgeiA8B`659AMS7b}F|kUl0qap_FjK z_i#h-?;WErNw27HK|$xOXmC^|_3nuf4j7pWNx{FI-B#1laG(zF(e@R&=GHL0*o~cL zP&SffFj~TRSP;gg<>pdoesBL8GT2YKOvy^?IWq;s>4WT`Tgs z;w{Jjt2^zaMan+!m@UDMBC1vtO2x)+);4y6ux?SwDDZ~!H@N=JJ6fks+vjUe)32n3eVYQ%CQ-i%uiloYEJTrED#QRL~HL4QMjhd`G#klVxeA1 zQ&)ZUPb*!1uQ#eHm&o0q)Q-F3JA>7motmNd=3IeKp8Y-)ZboO|Qs#*K}}XxP|hW2dog+iYyxZfx5&8a0j4=l1)1 z{^O4G>Yg{}jI+mHd#yP?b9=b+nqgX?!sW+tU?&f0ZTn-egYd0wIgb9>*7JLjlNaT~ z3E9vWl=@J=MD@hRtjhlxgLp6>UYlt1Gin=U_4JF0U{y`O7F{9b)m?M{1E^5`CH>t0d zH+(bA#R#Jaj-N5_n%8pos~wy39+5~)u83qA^6jKq2DC6}sLbh@t53TY@@dyTRe`!~ zLHZ|yHqa-8)}DsV>KQTp;^{4Q8@MZ0h7`u#!}q|FT=L(rU-==`4d~O<%WTa}CZt!g z%x4y|JQ7kGDXJC-EFd%v>*Q(Xf*ja!XVIO*2O+Kf9Rx$s3B_4M81Z~U#f@rB67e0O zTs94u@E&99TQ*ZC)y57F*|`fID7o;Fo0b<=u;|Z3Qg=ZOrvc$%bAoz_%YB5qV zN+Km@V}Z2_vF4NnWP>iqLplSBLWBw&NO0eeE6jr|WD-K>Slb2JGqH4qy+z~B@ z>cVvMm8a6mau1{c8Sd5IC!5eUKBgmT=0S;{v;G%DbLs1UL%@9qg{i)A_=Ib zt=zAIKl!D|&5yafbZV~^K?7Zlz&$0EQY7uZg97ua9jr`W3x4{MR=f6&pF@_#|Fc0> z&U`iQ)Jjl(XfL#?VK(#cZPkBBM)dRZX(KNT z+H%>uaNQUZcx1B)JXrazR&npxwy#&M-4lvzFdv0h0EHy6;beWLtA;d-Q>!VK6Ez|h zme*UyV zFYA)ja#kX`fqZ5ptfhW-^>LF$x<&}!Zh^b_SQ2}E%}Wic-43w1hUXxsm_DJ5`s+&j z&g1(95S@kOYRcyZPk`w%F7K}0{u}sa|92K1t_M;mb6JBHOM8qY0B;td{f$9j6uk^! z#Mtjnej&R)>_GiitKLBr#q0M~>cd0mM_s8~se*LofJUC?P3j1<8Ep1 z*0a6Gm;%^ZYRicW!KGRIygw8L?sItg+HI#0|1+fc5vSp`w*GUZ)$d%hM8&+`drl=$ zE1bT5Kwfs?B7$jdANIRbL)>;>y?0Mq?VfK3yt;e34m%AGn*lgJ3O#FJV06(`s>{kO zwIsft6X$-H$u8SYBZs+V3CN81(qb4$OdsWzx(~IiWHZ66KAhhmK=IH@cu>`)QQKZPJq?8ryG0ZR{4owtztW2X?6*>pW)y%U1aWJb~aznFu&#+aIh@~2m9Gnb3 zLYF9QVkJZ;O2zo|U>QFI1htVKQ#^XXts~0+9C%g24j~J^-HRRB)Hs9Q#tnePl1Tp*Lr1$K|2CmLuf0Z+_C)2az`8$erHfmOfG#OK}W< zCb7t=a_SMk#Qmf3tNNx$fszu6sYqY{vceH>E-@-`=lZ(U-2TIJ9SfDf1eUET7d^E;#;uNc#Ft zUS1Q-NU{r})hWsdom*F@0Z)gA!7Vm_KWA4~IP1FU5U0|>f&0m_3(jw$uAPKnUZrv8 z)`wb8uBq?D+mF}VANVrhb&elSonZl@hP6cQ$jdN>iwG`NRCk57$%fM?W;RbsEx&7St&Um4_ick#fj^DO#CG zRs0D=U8p$0vGkxySZ)LC0aj4hkN^`|Gf2tbNx?w#3U-D+a#S37 ziBbUcduGfsE5dqKB5vnp6`L0B~a`5)|GLXsko$Rkhl{(6VHmIiWiSRbPLr7Qt&p#whkxv!;0T4 zelT4rB7F56&53kMi>9TF&VZpO9RB%miR<~qO) zg%eS?bos4lJ}j+*AjeV#P^UU?DwJi8B~PMBLP(-1K~SD^COP3xeQ=|46% zr2>t@ST5?|Y8RFr0JPXxLnQ{5qo=dFu|2W8*Y{L{aEepWk?43Uu4#Ua5=gSrYnKhN zAlEaU;L5uc8zh^;q7_QV6s3fGf2j_rXwa)2<0kg|OJMt#KQMxM`VPg|pAt&&jVR?k zfEnuMpxsB+7AD(*HP@@jL&MHO;1j0gVy7L&uA^O;`eCdM)+`<|;Z8^TO61?orRF?0 zvCl0k5${vOS=i<<88F&d)1ur?drds0LuCs&7WikS!n{I^=_U+xp;QuD5-%uHrSbB& ze_;O5-cV4fWZ7%;6j0!wX;_SK-e(;pBqeKD6)Z_7rZbKJUCDf>kZMCy0#hPq;Ov!= z>cd&{+|h2oE*xLE@!fd`UAdbVT;1w~OONR4r#Z!UzINi05nQy1KEHqb5DtL9=;Nyy z?G%XjUu{f<7tx9(|MEb76?dI<@P4?q)l0OjY0%!E8>S!2w`*JV`1?bx>u+ww-I>>Y zc@_Cv0+kzD$Vff^Te{s+kmg&#v_$a;bi3P!J%BCuIRc~2D-@;glheu%$tw7M*%64y z!Iq`Xl&MSNJN9px{6waehPXhX3OdIXht|tBVoqm!7WSic@0YuwTSO@4vE*FP zu7{)kHW*`#?!)*)x?wE=J^$5zIysqsX+TKd@Hl8{RBLY8{M+ytox^-=*7qaKlSQgB zUKE$Sa*)<}SNzQDId^__VY7ry%sKWSOfG1FkVTBW3w#(#a_6h6E=Qvj6?$vgQ@ODm zMGT=bTt#Noisf_v6~m8g11W6PDr2_lauZetVF_WHGVDlf3arQ(aYz-myO{jfgdqz| zEteOV%H*@+e?H8TR_2A!-4S$TtT3c`E_Rd{p@;O?&-tHd;r82T+Q_iss4e$4S%Kyl zMFG>W4!{8j9B~{JK#;$s!OAabWBD5Cnh2!)M-pjz;mX}}qVmLx2TJbp|mus>7VF$ zB{F3}Ir-_(W_kU*fiphePpI%vXbX+c+K3%nz#7SfHWd~LtVP(Q(~Yiuy@=Bumf>^m zZZoGrg61a-5IC*!TZoil{B9{eGqh77jXSn9=WwV!eojhazj-e<*@YWaoH%scO=c)q zoAGiS`uF!SvYo9hu2u0&I+KFJ^XtjX^ZB1-+grXrIR2o(&~jDRgVCdp-VMXUFy-gz z5MuVbjth?!R~^oXjbBJ(>B`86oalowV$U;bEHPr!i^A@^4#gVEsXJV;lv~FMXfUB% zZhDL1z7DYoa7E%JxCM`Bg`qbh?3@Cre5BZd@m#SoAqM>s7ltfQ4roF}!!BY*1T~W~ zPLl>#biB%4y4)v~tiN&u&ZdtL zHD4K|KS8lfb{g;2EHP;j3&neE${RF*;|%BFr3hGn{r$ndftP?%kt36;Kb)VCByo}0 zr7Tp(D)y>1Qwq%{c5s!ol7H;z>-|31Xzl9h8k{uWNX1dexjBl$;p63fotnA;muM^g z_3G3AfZ|)cA4mIOWpY5#xaQ8?C3q103bsFWXc1m?@*c}Kk9T9%lq5`8?CUag^*!aw zy|roH9VAiz`o`a?JF?2@zN`t*S3)H(0cE?d%ktqADn!Ik$R7%hNd8U$99V=ag!2mP zDMIv4PM*r1GKE6MejA7~Y*v;T`YBW-a^|0N)3pX#uTC#q}V5*T|xgv>m15qi&EkE0rhsg z;SZq_Ih$<)-Ve~o;|P)yFVNb%M^V&%O!Yd=1on?V>udwHgG{hK9|XB@3ZBBmPupD- zwHNQdll=rA_9T}Nr}xN}ncB-?0V%w?(oNh8$FlNg=D(=O$^_jDu|)4}n7I^DXs1D0GoNJWq>~#L+PiPQ6)FXdZ4`&QB1W$!D13k?+0+J;daj*S=GR8cI=t zylX-lq#hy1a7tog<&7)3${}L9!s+&GmUy)4xS^0r=8X#+)1F%@)DnF8JaZ4&<1VzA z0JVVy*>Q2`q9Teh_G~g_Qk`j{99x%stTe3dU0s57bO$bD`C|WYRVHh+rn_v+ ze?gwAGS5;L=7@&7LV_3GGtUS4GJ&fKaQppagxK#8j6;L~3l7%S@cs!GnDo1gY&PPw z-eZF3rQOT1L(tZ};t$9%hOv(lxT0ktgrMQ+|LI0oK^W7s}@xsoZu<5 ztw*u#$7S!~AhM0OlXf$iVn%znKL*tA@$UPljsACmh0EYk^;-rB*&iQ;oyV#btL2M- zVhQlhShsTx)fGptJ1fDB7VzyY8<%tD;LIAZ1pVS)%NTMIW6K^vkPaniz?>PMhr1Xc zh2J!04$920sMoAUU>qljPzI@z$_X2jT&bpGaZnyUnFbpAvjZH_O7MSUz}*B5YzGMp zmnHRC3zDd^!oz(U>s9cTp5;{jm$t<;5VF($PwE{lsAy51Wx~ZsiUnH{}ehw7}pOieuw2Yd8KcAq0 zfhkEV2j_TWo~B5E5NUZ*Ljt}8zM*nmg~rrkB3@_I)aq9bU35=g9O&=hR2ivjriLRwxQ^?JXsm| z^A8hX5)mpQvZ6Vs5QgB&1GV6r2P`G3b-Bprq{qlPVpQ?U)$4x_J+eV19SoJjN}~_c zvS=KW^k0!|UhQ!rdi4&&l}E^DQQT1gJ2Nkwzq!CpFZ|>1 z$t&+lyI(H3Uw8e;!I4MsFW)o5t4HXGkJg#LMyRHYXvBi0{mLXapTERS%CxYlSz#GM!YeE=GL}xN~x-xpn8-Z$`pZH?P z4y6Mc9>_evQY8skBUHW{7fC@Z$$I}n6BkG5cAOWreP;&ibzvr6E#5rn)sHMgx2em^ zoQcecJF64N(oJkr5~W7r#(<7K&451!gnN*B0HI6VKY1YyUfQhhX`0B=U%2Ja6HN!B z?Y$N4iCJ1RG*;g{lb2z|00$a<;rzHW7z>lTF$X}|bKL^CJ{uqlyi<`h zOk;Ip7bkRqxssz)IHlthdkIA_1?L1j-FZYPGxluy&2JAo$wV@4(lN=gQ7vc$vus5x8IAz3rbYgM~A!S%(Bx2T+@Ddt1G5mO_c9)7k z)8g-a4g0jiX#v*xm^qaN7;}Nf#8d%1(!!s~ge#gTg2khg5!N9rcOxtJG4wuIDDljz zgOz;5Fq;N_f6!25kwtbka-G~*J7e*H=3#gS-L5U;{V61+N8h4G?u1VGDV6Xk4ymC^ z?pkHDzQJ4aUAUtLCb7~ctCGabBBfDovRXVXnK)+ z_V;8Fr11;;rkA!b^#x-(v69x~)1Qh+7tw?zm4px&8}AD=K5vigw57^~=cRt=7DN@! ze{^*)Y{%suv6uS4`b_B5 zZB}+VqC?>^KxG}dfBk#3bUh5Xv(e}fQ1^9@(f1dbl;7tuy1W1JGG~Dn>OW&rz$+Kh z!lMfo?r3VdlURz5c2H1K7fVUYM`VFK)Z4-xJ{;BkDitk{&L2|!UOryKnGM9{BTh<7 zyI$U$aWFxoa%S(;=pZDeQ^!r zttaM~F8YvG+b&x+0jq((o3_s%CKH*Lsr6Rr8M0E+{j2Mcw;Dm}W6#ozPi~&mIIOeM zBjYJQ-g+Z?iucH1wrx#vzb`hDVZIRRpiP8xuwmIHP>oQJM9%2>(=pPs^GhtSE%FX( z71^>_vC#L1mjqK#7VIphF~OD&zf{(_H!LNiE!7by5a}H_%OfVXw;nS+lR36x1N*7~ zPJ6l=u=<3CW7>(+K1_L!zpwTK2e`<{*tzeP{^v{FkO=q#ZUd0N0htkJ zUQSLTrPWii4}`@YvW`5C=QwbhZ}#>wC^!6r!G=`;4F8U520gW9oGcqleGx z-E#DV%gp7O%J>-Gfn~@8;m=+vLJy?}=bTfite3%^lt*tHyH$)(1v2q(+w(sNl?lp2 ze~h!6jvG#~vx9LD4V_y~T)3Ro_4DNH`CK^|v~Gtb5>7!C5Yn{6yI7S`@vZxC>Zr-e zcyZfRh&G{A+#8qpx1yQ69o_!Cv|$n?N)i-&I~CX&!}-9Eip%6hx^?_@z&r+uiYgkE z0*B@tQikviUWT$ijPFCbEkFm4>TyD!4{cwg_W-`&nT3G_ic}`;HHfQi$9(Lgt=9_IVS9aMPfFD>>n2!?M_m*Xf zE7i(7f5hSH5Tc`3*xK53Fsfyc1llMuTxhP!NTvF6$o>iCR2EdkOrU`T^xr||H~5~F zn|rsQAaD!DX+o%`EG($}1Mz`L&G$-CJ5T_3MCWkK&C=2A9U;;~^QX^2P^u<+N!C}! z9{>WAgcwQdcM^O#N+h^J5SiQ!t^|)C8J{Iia-^EE5+-X@T(2b!Vq781FX{)cUW|*c z_4C2R7-0yVT4kof>sISmkspVlSJou=pqv31Lp(=ZQlaTcX+C`?GZx&Nvd}d68Ayjt z(fv*w$f>p}8ei#ZCcKLif;T5ykMWI64uS58w?7B4-vqV|aL7wVOc$!3jx+6^9#H*u zuY2B8!QAeQOV>{aFQ5EdY%DfXOBHe*eSD^dhG4;;*5^IX4=bTESYrifqepN|u)z5oMlJiw8Tb8Wi{_m6gvQMcP* zt+b42-o%6q7<=uivqH4D12^~^x)2f%36G9db>2K}=*xylu>}d`09GU~Y<`Wi_rO?1=@JqlUZ{k+sYSOqU z5h-!7E4&5MugnBpHp5F%3&dhr8@6_-B5@0?A^sX&DjIT3XeW`b-b8ye!8 zsra(U!X`)fl};AO1X7XuRjD|M&iYrt%WmDYJ*LRw>vssr?F((|MCEd>g={q+`NwoN zHZYNX(7WdRY#}=nGqHc*u`?cG@)&ybVnsJWX6cV=aq@@wtdtS;C?YOW3);8v?A-=& zU7)OU_sAM_pj2>~oJBMIC7~2#9KRZ9?VBXa@Z4J1_g%v;+{LQ%A|)GUj?+c!DAgfp z68#zgWkq7NvpKH3d&j9(f<+EB%mmp8-bmYxSBH^@U9SP^fkq*R&mv>% z$#4~oNxiPBoV-V*G}^)JEgd`o$T6~-Mj$P^t= zl@J#i8kEL_tn>`36RI0Z@3H&!HbGNOlfq7a<|O+7wlr|dd)PFb@zvlEy@1d1=vG8y zI70!77ODd?sD2jPA6`P@ZMpjS+>MU@iYO}vGMl#^-7SSyZgwZ)$i_{z2l3y#?Uwkr zG}R2(9{8o5k|laiUQp6=bX#=|gzQDR>Z&%@6qVG^vqZVhc8kJX?6L++ki=?a)mJz5 z9lV?dN)@!b|JL8^jr5{^qpu<2;cO-^C6Zs3^g#B7tm-4zi>6{|f?gwIEodkOnHCe4 z8V-p~W`*=?WzMRlJ?d=rS+fwSmUF0_I~Q>&-ijIaelLbx&-u(Ze?7}Hr~R#|_ei!= zuVWJOc*j57f6}=JAx85iobq*V{X{D_+eiL%`fWND&YQ^b$;o^?Y0)Vv7K)hoaJEE1 zvumNdVP@|s!oBK^1;Rb!Tq^P)lo+0HTHzpP=>g}ai@^^$=S1^GH}x;u@iM;x3LotO z%_|Ga^@06g6~5YwnZ5XZ&H01clKX69zU3ZoGrwzdZ)kAyhr<^PyYlVv%lQS%4qzzh3OHhh%8C2)3i=!UC$? zz71f#Dtk@n$SmWvnub%fj9hHqpSy+H&!d7tN|6QdN-LkjvV(${-FG*!2;dx)nJ!QB zE|S%dvLYJ9HReo?g2fv-DzAjGY^>fXG=eMS=StGF{R0){Hoh*MKADf2NMeHrt-C^X zMYHtLK+1f|ger1~$b2Ih2AFCRIa*=k4J(!q{OF84s^SvOUft}nbRohYKu|G`&_OOz%_n2L?|P>CP^1>^cH_GbYf0hy@WhGHutCB{d>xga6UIZel3qz zOUI1lI!vc?gf7p+LIqLAY6v8nii!v)VtAEKL}CEZ1M%~54KLb&mVq5;L5c+iLTJIW zmQO>w`ix5NfjDn5yM8*lw+aFu?uy_@%$tw>z`u9jdiJ-*fM@#cLI+oe%B#O5Y~8$S z9zZ`jCm%7MW>LL0vTy2%S1n8sq>&X1Ka0;opxq77U4mJB(I0=KsgZaf57R7Grpdyn z-Yod67VA@j4mKU%%yQC*jbXa5mezMAZb#5!(@_{M?;5b*np!_h*6uEoV4sv9lJZjb zJ;nqq&ay7YjENk5Yl7i!pZ&>6V_o+~m3B9=Aswh!_xmeYGBYc8sG=;p)W)r^pe za8FeV6w2k6@0{KV>fCXkuEBt6)C~r8dfk=Vhy6f5@4q<8EramJaZT`)C)&5DMo8ti0_KiiXn^*UC&v|NO3HWUw zBhygs3WE{`D&)?4#$tO;BLAAKROo5v{_#!!w{~}e6(%Kq)~z<$3G@CUPaUZKjbsIMrqVgD8W@q+e4{`;Cp1=4ilUaYIaDC1@U7-oc zMQ$R@Y=m?6P2^L&nqqm8uY$C&WGodmjKpGob?G{p@mKPBI0G%*efXmuQ(JS9j)i0h z;*$`ns+D;rkBuTnNk_~FcDtxTQ@+Cl^`0i%qj=YKkH5(|#0_uaWNB7HzTS{?4VVH@JEI3a;Eo zCgR>cjzf0@HgC7s9pSBCW4rVWOG{7Jxc~4Mg}al+q^s39vsZHSdv%-zB-hS0^ldRS z9liitwHJwKiWQ?+Vh9h2hO-l$C{7?NFy4Ym=!3)EjoYN;(x$Z^*QutpU|ZAEuR^Ds zv;Yy^7k$|+bKhtwjoZ;gS5dS%-y@gK!rRdUNf6{VYiTiuSI3prl!JXG|A$cN%1aBPQE}x`-CW_3&0s$2PeDG6k&@vA`8 zA0ZbP@V|X$EF9G@2&r+>mpmw|2`-DOiOmISOy308T|k4DE)fW!tvi-U<0R+5FHqWG zqXF~(8ra<2`ph=qjofQ^g#PGtf>6MB)4|JBqAL%Tc_rZBD|AHLB4Dcq_Qj2}CLA$p5CGzg>&d=$Z&}YZV*LNCRVE-=_a$o=& ztPcU7RzUqsHlNPy;4K3T{on-4W2X_IdDM(-_tSj>zj6iRSq(PE)%?4~R;-fI3 zkgYr+ou-+V@35hCF~SH0l3Qb!A(gRl!>W4m#*N)osuRkP6+@pd$=WUUNTuNXi^9UM+Gn7f>(B;cFmYe4wMZjKmFz0T4URX@f%BxgNS8)^gHfgdQ>gP4E~mx>_U05x50$Hz>D4=^6X0eK*NZ;@Ix*Jvd>{GFkfvz1ly24!OcFpw7lc@;6VQrKOQdZlC3Fqh{vLLySGh+*Zs z>ig{beomcTLF$}MO3ndPRgoGY8-t%kZ~IN;EDU~gUJ=kExtEP-i!wdok@50rctv1k zDEB(fYLqG@LmztyAn4clbnC@ZUOjV2oE+Vun|p6?+eV;DJTktS5PJGQb$xj9(O-7z zyEjKRDsx5H=j?i;E&rSw6K!Bfbdx=3p+(lwcX+m#cob0WekZ*-xQq;Q>@TUIeFp}! zHDw*R#;#!Lk?o>U%`%Mg*9mDRu-7Bd%OB9z&JSu%R*6X9AYtV`?|h|4!mLn}wh4vJ zm=_$fDwcx@ZOgZX5#-V;Jct?AhcdXD#r{~;zf0+DxIsrpOMe8xc$Bb1#9bHZwsJtT z`-<}t?ZnOat@FHT9^*m9-4k=zTbvksZOGAE>>)O-iquayF&+^toha<%@X@}PrVtwH z-8c8lG!T-I_#GLnYTfc(8gJx`crzYSYrNo@n-VrZuc>t=od_YzklxvqQT`qn@MD|+ zO-jXiFd_OTy474L9+4L8dARWqZ0GYwJii8a5-(e#qM}A0NZu)a1mbk z;S=qK@D@ipib!VN8-948SqQ;w!IUs>JJ5@|&nt{37(kEO2Ata!&0+@Bhpp1re+w32tXX70C{Z$1P%a)=c{j`vazU$&|rk@AMn@-9~=giD0vfX*{_KS zmoemvW8?j6tbAy!ki!_FnFNz8g-ArUo<;*n^E+MP?(CaB-PaLWa9vzwwyWLCbM+t zGx=^U2->r#zNYra^%4jD*2Q_|qNsTl6{A@XM~I3n5Bk*h1w%LuHM1BoO=6JSHlUR` z*Y4BvTuBc%MB0IA4!)^8gN>lI1frGt!%6hSsaKhu1?FcWxY$fGz*P*IXNBO6#=~7* zv5jM!XS8i!0?!ZCw%m@J^gLQ>6|! zv(M)XbcNhY?%?4%t?eCS{m%-ok_J%Wh0?tXnrc-1rnCva?oW7l56L<8q$&oxD#KE$sC(( zwy8tbdB8iwDS=hm!6YPd z#_dbG=r+QEx;S{?9smv+Z4o1eG6~GApz+tw#_!&r*T)o&JVh7^(iJ5+GHHXgOgJ`N&C+M%96Ju(@$x-v) zbk2ZZ=7V7$3)1kDf{z)m+P)SiQ1E=qzw{Y$DCqGsaOuhST6Jvr#Ck`y$giyvg0< zfB0_?liTzTDzR9KsYZ3pQ`NWnkSoPoA{=^5TfKTqv`Z`sk`oSBC~AP)$#COU>~%Y_ zvacL0;zri}9|0gRd`U`$xhzFFMyr&wR+4u!#4<+J-s=6I-atZHSUMq@?A0evR(o7y zl)G`~(!+h#o-;OW&CpvHCRHpL?Gvwp+F9cKBmclG8&X1zjB~k&v6wN{ptwqKo@8CR zxJF5}#}!XTk~S{j?)-N+J68waAh3z{H5^8xCfn7A>yrW&e5!y92xY;%)7~wW&=_4v z6^PMGhGq_h9UsMJBs(-ZYGFAfU_y}k zYg6b?CA#s2s>PdRys&lTfpMd`d1oifYmb9rUzY8p+@};*QIP5en1Ie6WR|g35$Od& zaLI(#mnqI5d>y6NNiRrhMunpGs1%P-U8{lbgU*JWm`6Zvd# zj)Ww#b50c|HCObYk-M93Ux;}iJy$$qh}s1fQ4k{T*LcRpC3mi^o|B8m^wxdGYiH~` z?&&wX=i~byZ^S*%iEeh?yZ_Dwp7)VaQBgh8Wgm916W8uaFj}=Me0+TNetv$6MCN)% zbj6rp7#_Hc&dSbq@br{kamgiwPi;56HIG*|Y6gSKod&pKWf$1#BiOHbz(JP#zp8+V zf3@?b6!^j18N=2MFogPm>-X6L_438N!toi-8&hz=_3;G)E0BkdRAiEbj)E>*Rk>`? zpp3^DYY1v!C*4D7NAS5H}6 zsN2(1nrKrnASb=P`6Wlz&SM$tF4z@=$B4cHGoKIsfligsD{$$s<`A?5u|PNJ)O)rt zPDMQRg8CV1ytPx%YP6tY`Iv_W^u`Fw5NmO%K$l&icnb5Z4HL*l!D`4m|BzrSLG}VV zQKXgNFbJgt00#nAF$Nm2jaoIEB!x!4$Us>T0e@W93y(}rN1KKVkD44x>HCF?5hiPr zLUi%&TG?71f?9L|_(Q^`!>SPCXi{&&KSxgq9}G9QzOJFNlro8HlPE%{S6AjRna0_i z+ay&Hdw?rEz_Wgi?Yb@A%)IvnT2bWqhkQ>Fz~sqR+?shOQ(Y&B__mqB->tQ=pp6#! z#VR7DCBklqHi}XbM;2x&vd->LI?kYYmcJyKIaw%@Qv7Te4?8T#s)2qRa9NUP)yAzX zB>l<|@|j|Z3LeP-U>)nM1{!)kb>q#ovAxWlt`pD&QLCSTLA4rv_i;DZc%8T zo) za&0nM;P4&89{+TY%278(To$c>QE+ zXWRDDv=>HC#BmEu^7i8MD6%yc+M!^EkDqcbc6Q z58Sg(3l%DZ>n;B$sbCMSEqP8nOXBTM<;$z^bh-lpSS$!$;PFnwlkRlT zcmLJHh?#2({$-@PLZN}<;lsd)6vdUnM4+Bh&`&r{ghyIOUK%%Qpgh(5Sk4t@$AWPG zshfBYhf%gv%+Gi=65$xdKm(|9fCxHV9`t6j2bmzv{3$OMwbYPrsUd$F7`6KuT7URjHedr1(&+eiaYHFbu#s&2&*_9;W9Fn2xy!=^9KwDgJ zxmEv}Z5!hEbS(+#t;9P*(@muZ-gDa`TecQ(f)HFXK4t_YF*zA}vm~>asb$yFAu}cg zHw!+!iV>E2bF~wAWD_#Wp)jkf3Wu)VJDY=GmY>mgA+k9|heND! zk9Xi2sh!2d*<=X}m^xadc#;yd5-k-?7BwTZO%bFl12v(!P*~+lzUpDmKv8K08W@q z9tt2v=8u59ay)gEcq2;6!lXzVSuQ15)Da1RucO7DnRUo^Et?d(pTH1a~qFYmqb)L91gJm&d2seI~nkBh|UbB7bzWM%?`{)YE`JKsoLG8~NI@dwel zuQCo)O&e3xFlXDGV@o4qH)dSx-|4pzt#N4hBL334I$rIU(DrPC5n|(-Z!qSvSa}SK z#|=MJkDE#FLaz_@b5h0Aa*L&6Hjps&&#){XXyVSl=6M<+T!|+p?{0E+0nyk z!0pEm@BeQa{8umktKg2`-pp=4|C{P?lxVeYUq0FW`0;ji912D&=Z#ky?h@~G?Fn>y z3|uUo$8QImPp7eUTI~C7I`Ln%Pm&Xds`$Ef9JiWLZ{}p%do_GtK>j5%Q$#rRGn1Ur zS%^!C3vpZvx>mI<&we_TEyGlz3>@@ft+cZc?1L$}+ayNkK*R~H5x6cS_e6%QfmMsJ z46_gjptDj{P~x_T{;NYn*_carpbFF#QVmx^mqVlL?>e2OLR-C;(?DLwg<1Gq#i@^#`p3(&d{0^I$FNKg?q$vIfp0h4~m(_h2jeY?8%Unhv4Z68=~ zMl!9s5d$XBJWhG{E#>$$7qYYZE%PqONNKYBrRl6bX|BjMq8x;#BDefB>+#6@b%j@& zz!yV76#fgaos4hE@p}-RBl3Hw?= z<9&QyFxr`yF+oiC7uU>$pFxT=rZcwSOc2|0m;E^uIgquLDxMi;*{P0*+7Z@tLf_H?H0<$ zRoMo^uZLjTq)A-%x^wbM!y!v9W&H36w1ZzZd9+BMM^0cu{9@CV?i5^a(EyKM+cp}e?#NL4sWn+(oW=tClWP=`8r7L2U=4Zkg7|l&jTmvhX{5zZe>M=>z-P+se9L z?_nf3{b$>m^()YRN_^0yL|9ft=-R%Q51WI7Nz16u7;D!D>J%QywhKw7R zu3&$uZ;x5ePUFVqs%Za51OX2R9g75ERZRZwwW6BDFfiA7uN zmvl=xJPL-9v~_gwH0?b+q=JDPEiSV?i4yOD0;rWK4tKLP$kN!-u2qs71cPN9D%Qdb z#_F_!%@7;~9B7popD}F2*a10$bT$0JkaH+=5Gcy9)sblhMDgZnJW0H3RX~hagaj0& zkX;y-CN3?D%s*n1NhIx;LWdBY(|AGg9AFVh^FS#|*9cfk9Lw|2#;b>*l_f|CD^j2$ zS2xcYTMijM3L{OqDw+Y+E-yMEYGq~}xQ5|W9;?~-3r8%17K2i`fR#*kX_QjMTLEi! zSL6JqqD>;pztn%k-wGyziwCt3i;uj8JXy9XU6qf9zKR$wTVyOAJ^ zbp3*!1fHN+#%U!QM+&2;=0H(k_+3Rftoag&V~T=k0G}+SL1}V5xgq64JN` zR_Em?u?kzfU)Wq>Fa)|`}ykS*Vb;aRSYvzJ$?FItxD`}pMf48yc;-u zv}b$dtT~psbQCSW@~vwb{54Zxm?fUqpIkCdcJ7F0>ZOqgj(3XDzDgcZJ%mgN4aN3Y zG)pu_l#wxXisQ=_ehcA4#@7c_ZQst$z4^tIinXyx@qPam6MkxeOw%1mV#9=8d_5mK zB*nHVL@gxOA7?m!Ar|??=>FbByAkkSG1h1gOI~GC9Ycla-W0?CAzVagvaq4oq4%uv z{LG}~jw%&p72ew!7trY8v(T1v<`H!dHWhsY@z_?*+i zwrNM=n9{&+u~e+o@<7Zps>*L}9mAPDu=BuX5$SLDs2W0gpqu^p6qFq}GK)P4@F08$16ep}Yd$-+|*rTz0XBU7_Rq?K7{qemKBd7aqdincneeV(X z&2wXb5i7^{DdnZCAGDo-OAHghkB96IqY`fg!kz^WM;9toM(QAzX@CiSBB-N%4NF1E@wqZEJWvJK+A)_N327DFKVI5G_-&%W^8SA)(nz-kT2YT zo7GkM>Qps(t?~&T)&@FcU+Ft}+l=%#>`=`>Hz(MtzgUIQ3NOeW9X^-M-1?h#t=*(; zYj~&~z5Z%ZH8Y;zVs&sf`YpU05vW53twe7)YR?(Y`A7I?dLDz!V^iZ~i|JnLAv6wI z#Hb*Ct-K_;sZi_m)Z0;>7zHo0^sbjLu~E6uiDE2bjWo)0`%91%V||(p%}N-dXhf>C zR(CWJ&3@^4$*GWaKMI*=PtJLrh1S=QY(a-|@zD``7AY-nZHY6lPBqAP-Z}KmCNON;_*vA-Gfb zi2htaQUDkhemu5p-R=!Q4Mht{ZDwU&8v%hp4_ev}hENE&>pug%JkCvPH{S@Jt2EzB zugPJU)pzTa^^yAxjQ$j@!&+nOcd`)_velwP9YxN4BH9u*s{RrN@%azY6AT(eE}zgCvq12$j9`AI1CmWApmC7{u*2xshTaTsKZ>}8;CbudNFUW%e| z+LB(d#^2^G_1+OsE}6urFL)2lgw!SFx8@90nUr!EE6VDphRgYaTTXb`=hgWzv!oHA z^4mFY9SKm9^xD?!o^rPQ$1f~`)6h-#Cx>9JdR`N`adA88kbrASSn|uBcgtQ6ut2Cx zD$Je&j&fRcd;P`I4M^MWgI$@@K?s);9di!F^-K5lXdhU2#UMPmUJn!wFk{PrV}}TO6L4`k7Dse;V@07`EN_Zp=Ci29Ub6(?e2mb8X5*x{%~c+{4cva=`ap_f&a%z z;(v>qz{~Eni>GF-?khNlCEDmS%zr<>Y9Zx@ zCw6v;w7>klmz~a`h480OfNV3U^(X}6f!2Yehh_DSladTH@`-eQPGV1{?*ohPUojU#rRXf3PWq# zkhp$a(3WBY%{ZF+4S1V>-#=K$E+XS|`xa#zD-RM|EeY&W1@?eV?$zpxGCjKZ9ap@X z*^S6T61!f`yS_E}z{3+r^|k=#II8e#UOeZgj%dW{9Q-Wvj>?y6dFxDaqS2`U=-nZ(xdd^JQA~VyjG}Is#MXFw9PIN z(uwV5k=b{le-bUDHZDjc#aLS#+?dQJ3*yF|;_gX*I?W))y<&eW%PX^Y=iJnIG2kbY zYlttJJGEc@H0NJS!9(1HvTtqNakjGgxUOH**2#BpJ5T6v#jnsOvX^U&xKVlj|1RK$1FFjgV0RlBI0K=G|6p+dn7PYP z7@sf}xUd1329RO~XySu_&K+`DAVRLPCRM6ENa%NjEvH$E-V_)D?Kc9snE6No#vu-!yx4^DNn2%gyogFM z<#Rr*3JG4tQLF^4o1~Sa;*dVj^AYV%jJy7vP}iqd&&7L?^LE45@4KXt$2e-@U(ap8#BTps zC5R49Ga>;1U@7I24b_&h9wcm7j9lEOWSo6&;r{C1nf~O2%$*S%8Hl8W%je4zzJo=o z<*Behp;4nzv0$Y!nYy{RLxM=iX(ubIi=5WbboWof(}TpzkARmjDGxIryxmIZfL(!I z5vs?|b;q)9gE__y<+6aBSM`zA+$=(}g*~-?GF>~92Z@1&oPY*J07X75UG$_YYAF6S zRG|Crfo5WjZ{X-m?e5%qz5S z(Z=WH*NB`Kyx42p{11V{{+!3%yO-&Ld#pyTS2r&qxT6MYroIW5H?*fC=LvJKRet3= zp{g66OBLsicZFo(vK?9o=_iGED1VH{m6A{OteNjhGf7dI*6FQ1J;Ifkwpr#9{|y^o z75m!3RW$m(221C`nx_ zyG*y1r9NB^oxVkHZuxx~vv+Mbgw}xRpRQmMO=fwOgpxaC%}n*-;4k z%8|Y40`G2gF>TvB7U|cqQzR0u=f++**dTag;r%N?wxe@bUz_=_o%71?&R@RY`%X8o z5KQ~;W(v9t3l=ih=W*$1r!E^C{=542C3aS4Zj6sP71%5?9ZyQ(sQ^k$^2})*sme0g zKcZYd8b`|PunXj2jsJRHnTce)y* z6j<1DJk-&D2!9jWRy|!n5|yF1YEyAniZCh-vC2Q`y?9wWY0Y+uL04$u80aQ8Pz}{% zNahui4Dr!37^7DzMxGX`TqBVfpbo1`zhxhz7c)TXxGqD5ZIg!trxw`k&(eLP;>JKB z@T9XR7GY1poWw>v@xih-(=wLgrQno9ijL+1OXX~J5Gkm$%CVw|&MOT_r#LXi=TF~# z@*o!zQSN-78n{CFlo@}?O140D|K^El9x0|%D(U4w%BVv;>;R1NMzz;(FK1Ec96zuMR`6QpT1ipTu$Ai>SH@*{{Na zl*JFdRSwT}*!A7kpg*||LtTcr4|#9L;$2Aalg-{!lBzj-EThgt7P45B2xN`^@W1-D z8M8wM`G_s~ae*p0_E|<+!`Wl;N;;skd@ zFJYy7xXexbzl61JHsj*L+TkWsIx%$*lRIWp+e)(}-^gThOiN0kcuy>{&TB z{a!}{UVa7aSg3mwga_dQ=JMe$@CS#PJcgEuEBUP= zFL_7mQfgD+$0MlzHM>&fZ^Kscoz3YShPstijw6sX<{66zrJ`B)&MfQ9D%BoH5O7Tg zrI4xN{<9RtB;34&_FtZwe}Ot~_5Rv;vf&Ng52Zg-qdYU>&cEe<(D#g5{NBsa%TMiEqOnqEKz^LP3y!Wcjg_x%+eWtREO83HreV*?uN@Q&l9S-xmsxB5gieTZFG2zG z=tt7k{_82?D5MtK5`ET>6~k5(JD{POeCd1=w9`v(qa>G=aBJERf!CF&6UwH`;+p`ZUnt@p^Z5_4FUc?$=(Bqrj8Hs*BHa=H4R5>n`NlMd|z~2YB3R z%2bp{j!#v>@qOpo$JfBB$z)uNZ=GU^>E1o+W2lEjhN~lU_~(OpS>ctr)f{q*JC5 zDSO5$-X?$Js^!Xi5wv}ZBsSHk#HigRY|uI*dN&)p&!k*QUw!5Txp{#a3~t{~cX7!@ zI3DjcmSZU+SmGT2Vpp>7al^_U%_XGNA#*n`DPEM6S96zg*bpI?%0=qGyDCE@L<@2g zR54Rt!Klyjn`1S+5<4pv?`XKb100(tv9=iUJc?Zv*ioB{l<1}W=4HnhNXu}4_mKbH zgzd~DcbpO4b_U}6Bs)0qU9``R^01)*d}=v-I&tz}VpDH2^e*fKZ$6u-jZ070J5W7u z7mi*Z?{CB7fk)FhcNIo&Q918_OL7En9ud)FeJB^ukgqXoU?hUk3J8%ZJsYP;F{Pd2 zqekUaZmcBmYDJhNDl;HA6Rz=1XmsF_DA1WxB_H_~goTtk84^S!Wiz%#CD>fZLY>m%~x*62MU;3<6T`rud3KJe_JhA_Yy@ABS} z{SpA5Nde#K`xR_-1v&mSSmp@7YX<=b=RZE)#l>gzzL&=&?Bfj4mAR*(*P4<@<#Y8AM}@ky9z1opz41a&|Takud;f zm5=FBe;T4C%2eP`$PoVLsWPD*t?Lw>^6xqrp`oRWUZLzQm-Hm2y1`yu-bZX)BDob6#o<`I7YX=r}r1ZsAQk2tft2RJ6&${bMMEH2>|`hmj$RkzksilMvj+{}!h)XH_=P%|l*TvXUQOMqiD!;}07x#e4BJ zHbOZ!r73}xwKgwIMrVZLLSi-Z9UYBN5H(KZhd~HqHf~`xt2Qf>u^l9`!l`$V!zmI$ z0Y}j{o`W{5dK=JIDRo51$7QA{JDdYIWHR00=xpgL2YTzZPFEwoj8-I6vQ@+B6MdEn zQKfF6p_aIoDV({y;BZ^|RWgwdsaj-G2un|fz8&>P#P(>xd$>NcZb9$~^1Fnq)ZYm2 z0dx^rJ^Uzi(yK|)|H}gW&f^?KmJKTnb~lgd(FZ?62gsuZdTm{exO=)XJ8$7XcA5UQ zKPd1;t$DF%4z{!Q({8;$ZdnuTGFrbPAWe47`L|tv^d_*Qirlq+{9P(pDZw%{5uDh* zBce(xNd{X7Tk(0_m+$&vGQqKj|MtQIZqpZNK0p&<4yC5S<1iA_TJ=Aw@{yrn^zrS00ks@QSE0-c)UOJv^)zJ9UIR^&>ufK>wkt{9dyt>4A$LCxf zJVC_6rM;U42a(d*W~jv3o?n+DZ}s?X1;ag(+@IcAUgD{4gM=gW|B)SVF+4L5e{!W%E(udCw6eBA!7qt)RphiWqvWL7Yy4`iD2J zxjHhJZA8xt9~fuq<0%!nvG`;2-%_{Sd72RL6fi;d(%g8pPU+$WXM(jijhdK(2no41 zG7ecS%1GQf!^Cl?vMs7c6_||hCgy#ZPoY+K<6AP*i)KnWoKcV z2n07EngzsWX6>CtY?+Sm5_nO*~8m7rNxut{_McOR0-2JtI7f^tWC!2$!Z4z^NM zBZy%JqhO%Tg8BQ7Zz^4fVEr0q2ZxCVYzK!J!5}@tMU*`C-o~h=@l0;W&w@ld$*!xq z)612uvM-%MWwV22L!}gr#e?}eoF=7Ky0Vs(QMJ2Fct3a==n~rdN(+^y%U#7?YQ|QR zR>xO`UZ_grG}+q&d^8IrvvWeM(O2San@6wOF#v$(O#)E z2!C;x-FT9uGlNEf@EPN`d;qSh?5fPSx1UJ=Jm|skRv4ZrCaf#GiIBfnDOHrBqT!Jx zZNw@lH1p^a$fG>uebHsWR=MBywSI5n<=DzHw~gK2%>$;fO9$e@Oq5>csQJ-3=T2^sv3lW z%HE1eqai%&R-UJ7W|gr zo6YN&J-IgPdMnkXa2>jZi}YC7qPJI<5bis9h3j|}51C5AnTeb;+!Rz@m=CO)2!J9- z@io-u#@lw*eoyqcE^QT}sgK!1O{T;cEoc@Z8F?o#lkW-7elJ;TMiv#`x?T5)@PATJ zetEaDUv~C(oJ*D!v<-PLwMRbQRn!A&^ zoNRf9tnF4@$}V%AnD(*Jv$uSc0$|4(&lz6_dk)qexyBC+TZTz)60TXAQ$SB7*3;-d zO05{^A&7=>`8fIX4zV5IKJTTFBV?LXW`ev?S9dEL>ZZb~kZoYlP15RkDbH2xK?d<# zzYHOeDfkmaVgIduh5lS03WFq;2||;fLD;Y&6a6k0eR=V>x}pnh8^!W*D3e*lGtdKC zta9VT()3nQfMNFELeC@!u@K|_w!xM8{Mvum%QuMK$vHP$&+v{# z*RrUcqFzPzH;#Fo=3)O3 z!8k#}{BynhTZK&Uf#KCOC}MkKB+mB@05@_4Lj>NvWgB&R4=HC0TXHMLb+7&JVFw(( zeF;D~{>!Eww)qSz(ATY9c)q{#c5KA{Y}t?KozPiM9=HMO%0Jg^bsnyH_+4GE#gRT= z-1K@lbWVq^sijU&TmRA?e1_qRHJBkz_WRoUUAig!w_ifmulj4479AoTyCPj`rx}U; zDLm9(y*#$Y+y{h>nuI97;JSv}_Dy%feefi(f*%7iu`_@U(tw_M-WF-1uEW zI}uf-Sv*8Vl&=L#kvPC@FQ~gD9+^d0V*D2EWE|?BzxW~QBgkV~u+=|;Ii#B#?GJOa zj{&_TS;(gEj)U+X{!MhLiNZvt3#GQc{NZ*eYTX{6RBYZvVyP?}yg)NMsg?t*t>hto zn_&Os=kGousM2$t@wRbXjpJz>JoQ^wEB}i}@WgtI&GK5#anX94MRt=u`GlG4{^vCJ zylh-OJ>5`Nefd5m;PVh|^meKZh}C++saHBOowJF0ywj@b#N&y360tuoRM@n~J|`qZ zt&p}Kmu&qnElbFd77-3IXlw0cC4xMHOy5fY{+EG^6lb$vyIWAEtGFgQTIA&y8N~jO z6@0gBomeqj4$Gru{cfJ?T}uw!8kGHw=>23BxaKPjrJPi>SyYwyYAicN-Csd#Ymtwn zK!Dq$uj3Kd_sMliAjie)nbH5bg2n6p6c>m{s~dKD4lOQX1LZ2fHsDA2K~N@Ah??Af zCeYm2p}q^Ws1z^BRbg`!Gz|kX8SuXUt6%|4b5^9SuPjO=@9J{EBOaig+km>2?4ynd zVBHlc-vlVqFBMY6Xy^b@8u%RmwuKT7btm8gOc(rt3dQsDJL&B04_cjybq6UZD{@!J zjbMU=V&=dmd`CFDu23D7q`o|zM=c!!tMS5A6&lW%2nMzSe@kTxT$Mc%LIrw5I7(S{O!Q%I$$@Uj%jh;v6!ATGONp%OcFI$H)yLXc z&v6NLJ@rC*uaNHv?LL9ON85Js_)tS|1Q0Bun8Q}UtLrf|opS}uK}X$+BGB9?kVnOp z#hzm;)y9UUiL&LKb=72Kj9;`QVAX6=Eh?jR&PCcSpVHYi;TRY%$tDxrM3=B}L~%zE zYe8jVj9~f|x)rbCY)iACFTE&JY&Z@K=LT&GQAgG1S>k0Nc~ApwYyyuCtSlMA=A%tS zykaWS6md00EnaC|!7%<8W}emTMRu)FFKh=bYTghcdIVD&v$RO7i9@1E9-RX`%5<72 zCFoEpOlrl(`hgJ=>)wZrfou)(xUYkZiXr$_Fc`3#*kY6n!{9-1ZSH*3VRB=QSq+j& zDhg4eiI*}~UNX9@97&+)3>#Z?r~t@ZV~j2-?@GG>Mxt+3A}hEV$9IF=Ud(=4B>%WS zjR5x`1u>1@e^7%bC6c#>XgmZNI}AN6kFhVhOgaiL84r`q9FL79PzXR!I`C|- z&mTTsRm6z~*uE*reCULJ!lq*6qngveD^6fnN^&;C^GLEb!=qK3a=m|jPlf}8h2gP6$uTRj@)__MfP8-`tB~>x4hdAC$oz4NT*d-sdBS@K1mnGy-DNvTMOW zut!@3xoND%*NU={M$ly~6XU~6bIQ!&nUXhA8Wu2`6tsduFd8r`+zKE#IHAc45`Fgb zss8GSMDgojnqZ^FpM#{1tN~^{t;SWjEf~;lqu@jhnYP3q@vx&C%z4t14gG~s>PiYW z3Y)2`37tebZQtHq2VOkj-k-b=&srGe$5Sntq=p3&M&OyJ0J1albbN(NzQwuecJQ}0 zlQ~ut4I$Mx%ql;V38!vUX6Bk!Z??PhAH$2vSq*c^qhB63l&dmkdm+Q|9paoyyCn%q z1?NO2%y5U(|43RGA?gv{1%;Anz?F?m5_ za&2-U^1Rf1RO$Uyb|e;l9J-~l3H5mNF?c*0UP7U3YK1(pPNkerUIWF=A~v)f8GGn; zq^akrPNpV%TN+_R?-}rM0=(bS9UUEXCh=Sj$151$=YgKy-P5QW7PS6stfP+1xwNl) z^X<3)oc24KZew7K3Vb!$x0FI#IsnVt1pAvJn6_ae3q^XA zM%90}5gXup1OB)eLd?E!?`_VP-0qef*VW#fl6c19pKEqdVD=UkoSKd!adJH zg3bQV|DNs@4-Nv702Aj*__u{t-mCn;CwjJk021#e`j-0)^LNH%iFUj)zHj{(2a#3^;*kB@eDCY8(Y(E~OYXRL`(6BR`?{Dj2bj{X>^ zDlQxQR5w(@{C*ygI8HqSP>)2~KQz{?! z4NdLxuL`gsTpx|(MQurKtr8{1Q>Cc^rzV$__`8jWf19G~ zh}-pcXEk>ejF^78Z#NUOIEN}Y7h_%-|L!e(J&j~7U7)|k)-V#`o_kWENbj;@-icLW zvz1VpkA=SR)Ms4Az=lnpnZz;GUcivxA-xI`qBD>8cL3y?e`hsbFAtwD0pSTiE^fz- z-V?QJ4B2A$j^f_3#Lyt_d?zndYnvSIRzwI!-Ciq=jlD#6n?TRO!E~&C zHy>-JBP`V1WV-Q7Y}*HjGGrCQmY8>nvSoz0B|KqmjhIyU^7;A%FJl*VNCCb>wnCt4Kg#HKqn3}>1+Y$ZoJ|a5xoG=@hCWw#x zicRjMXV)1SNQP+>A2Yg1IUv6+r9EmZhLQ(O72X#-3!!e}VrCpohQJU)kU&5M0r{JZ zH-7G>GMy|R1aIJI?~N`!4|{(V%EV%Ku>nM8t-OYg)R$P60sq}@k^6#7oZL{yFkyC(}ung zg-4~3_V`OCrQL|7fGr+bPsKqba zAzqZ=*+GuOj!VPgpvPI)D&#V&s^vKhdcnaifo6lq&NVZ`i#IW-qZTim!sNIZm=m+6 zX)JG)tmQ-FqKGbBN3O3Ypr)3S-!C@9RgwA4kNUY~GE!@6bn6~fnxYrVde+>+!m1J^ zt>K7f-#cTZC|`@ki8L8zCGH}wCJrkuWvr13^rih#+_yGMl^V9Dic8alCU=T%zfcd? zv-awnPne`5LnBpD=PV2;fc*{L_w%I)Nx^cFfzT1FWTc}~)=|L2l**Ismnh9E zrM61bfvv|cM_GLol9BA60jYuLO&Y4srB;4a+b?H_}z(wMI$=U{A3u| z_`4dJT!ksqLe!R!8Jn`GefCQ-Xvly~nNu5yy6_AeZNrK+lEX?Y(H$C^$m_%5AcH$*tH)U)i5xY0AlsU!gFddw@}* z+>^u3;M%LWihIFb(H@w9rkC|T>mKgzjt(u39U$(VK3lwWT@Xj~Dq6u-iQ0=*wM%M( z6k+JBYxADHKc``Q_Vb&; zWTz$~2suHdko* zM5DI5CWbAngEh80?=jyM`Vpp7k)KmeF^X)t97uuPjgR90RuO^<4ULT2NgDSCS5~$+ zE8b3U`Apkb221!tE?mb0w2S}JUg48F86DAUuVZhi_6@XqSF3w?XB;pqTUOe&QCyzF zfp+UY{p~yH!7p?f^WjBZa|?K{ReoH&e|3i+_LWeW^0vPfPfZWk^=2Y?59SSGtg$E@Huq?Zv&qauaHq62fN-qG$XSe zd+#^@SJLo+Y2yz!@_fXf)j*iz3IJ=Dd}f!KO2gIG_I>`DbA5Yy>R5mAUpS^Buk8-d z!8yQEVUA&sg+ol?*fZ;O4iGQi5@5v(eDC_gaj3_;H1=YC=vL*6*+=S**~8Djch(?y z^~f)D`8Y8j+l1t2dTzj)4mDMI(>aBtx?+A{&eYlX)$ot>lsR=;;v#+<4TlFL(m$!r z3syq-j%}<%4!aOOj4IcHAW%O!1$qKJ?gSP`2_gJn5Y&bf*}NXL6`3>&w=_zNbnFP! z6GC15wc_X>ag>}+I7#SB;t@C{IvL1Gup9;MMc!Uw@SOHdE+R8`B{^M$hppqr(rqlU z$UhF$lN7%JgdluNsulETydx&(;ihv> zT*@mteW>MKBD%(^tJ$2(AqR06z7eQr4`2FDIEDdq|KPI_5pRa@V>d7dgi>`quj;Emt?M&e(hyP_h;Dy4~2Ln+_ zh1Tiuqd@bV|24P(Kg5?Oty9)jY%GYRH8szd7hk)*-adcaPHMlt-+R0|2;Gi2?@sdU z42Y~={mIJ0d=XTjWJ3}p5K(ilKG$3cXGaP(0L!QGWF-=X3UeV=w1XXN1+$fI%Hhc0H<1Xs=C4I(zN{6V zq1sSjl+JC1&pcw*&nB+rL68%-Y>NN=uH=~lxAMY9(9^S7-YWl*cPH(XPKh=PLt-G4 z=!7AqEvC~_5H{cZ-fs8VTj!`5Jp!~&&|LZCI~XUu>qK@)+0Eo>OV<~#7RSzdv2x{p zUQE2l$sTRWO+Xi#i72?*yXCQvH}CPZGXNM)r;OdthlCX1Bn-f!eci?F*&YXIcOBM8R74&_m%pO~PgYxwY@#YY2v!eDnQ>rf;^0TJrEt}GcGYMrnzvp@doDys z#AVuwo=H_&Mn-}M>6F*qWfb>Sb!(f~RK5PTzZ41c^JGoHKuy4#t^eDZ(H$VnZ-2{d zvjjR50evg~rB8qv#3m3_8;K+3C5cPLAFpSf=D6DI+B-ds`Y(S0oGhG*ge0v-;%agp zs$Rv`yBUt%-*i*>hu+T4YysQY$oB^3e|~aT$<0_*X#qdM`Npc==a7ivKh+ruzmU5fccx+t&Y-f> z#cMTX-6a2C79fuaf$r2VfC{!Uh!hI5PlHvy6*+|7ATbM`jbgHufgYoth;AHRm$8ix zPsYmVMLeDgs6=f-3>$)?4^s-V7=@^xv!7v7IYz(aCh>t`?0PYfs;K3aK$3Y_^KRW-;kO}H2=zY9r?4Ym^<1*|zJi=trk zuVAnx%NJqHT}X4@JNtrrOpiN{M(9hn{M`fI{Ln}WM>cwVBC?O**uTrY-+ggy z%`q38F3`pO`^HU78)4Tf8d9n9-vy1Wl4%vw#LSt~)-80sGcXYzm+V0r-9oz4&QQlv z=uA-}=*2^M;a*rtC~Sz1Bp+)f=_<94vlB)I_dUHXkHRGL0^E@0)T};LJn@H<6&Yr6 z3Q7fgy+w*LbH9k;5oP)BV4M<|d{F9+Kxv@vTIt2H2dAzoGbl6|l+eg1P7U!J3G~yf zoKGEvDHxah5@krPgjR$sDYxd%V6y7P%AT32nn_YeA@v6<;RGX*i<81|rPO>_g`3y7 zMPM;VHZwYbDuIoC!;`GQ0~WeO#!i+?pQ*j;s2YFfq~4yy_nolr4bra1!(dEO{q4fF@U}=#OdZOqB z&BPjIAjBx4rkR)1*3-#DvJ1|21@mR>bdk|`(qZ7!WpNvIpaeY^Mh?hp3CYziIZ5_M8 z^;+wsW8tvLCDiG**&WP%#j(fgRJX=f@ZV9WC1egJ7(+7i1G`Kese%|+T1fHJkrmV&wHi+L z-GdL)tH~Uyk-FEZX`fFr?LI_an%atPZWTUy#f#M!V+tQ1q`IkAG(7oIiN3zQ6`X!47}8? z*f$L#*`cyoH^%$B`*TPV>zT5~NKP^{j~8#|>chLya~fNiQKe{wH;x*sFAjcWnXb|a z(gKcbU<77G?a*y9Y#bn#(dSJNM*{)R3Hf`r(tblHaMe?>YF>Q4%=H?q`_tUqg$8#W zTV)U`0hEYFp&En6kVq64o8w(Y?hvy@Emgbv*MX{lXA)}=#Oc6~DCo5w`?hHvhMXL- zeL?69}mp(m_hB!wJGh4LepvAlJ~o9zJre z;ejvZ9FdHHg@p^4L3d*VUBKs(ohs&VH{NGeWIaJil%%S6s&yuIsdcK1(v3N9Jhp&) z^!NVZPd|O&_jGh0_V|AYI^quPVL=)h>H`|`H4$jnH2{$s-qP*_da2t8O>jJhM?T&S zwou=Ft$8J<-+D+O4IAtr|K0-j(@qQC$+QCLTQOEkuI$~`esZx2{<1AVmq3b3#~kSW zQe<${jkTQk$uYQ_Xz}TFR+o3vUDoDk4SFH$aE~5hu**7Ilci#e%WK&LnD0&|(CBKeRKumZ`ZQ{@Hn@ zjroBTi~XkKFr*mLkG?>z;$_u8w? zsDt@-!(!6o*O7kJ^0~T+TqFThH8=tdk`4?jDNYt56H({2ODh7?K;zoHDt25x1^pXT z1gzjL@Trd}R4I}dXbpTHwy3phTtws4auq9t0t`D^N+_e%y>2>UC4lRHk$v&d?Ul8D zwaT+8@l>f*G5&muO%>L=YD+jy?c+^>gm|c+3Gq%T_X+{O>>lNJJq(Eo`TQ#^5z{=y$o=jaYJ&4 z$&zF6gc0H1m9C!g;Y_0cBGk2|2As4|) zav?>onN)j^{_{2`D$hvrWgI=UZt)*X)SUtNo11peG#0;vg&eCeTC2v5SxWQdSc;>G z@563eT1-!6lL}&8XiV#hlcl8QYPYY)g;7(O-YW%bpWOq(*S88f zi1~;k+=}CZ8kn7h{$^JFZ!L1BxtsJjycloYjhJot;S$GtTODZ421Ka;%i7j|btehS zcee-f|C8hZ&RegsCG zn_UU*PljJh8#B_Rx(zy@94Ci42#oaco@X^*=VA~KzrS$Z-}U( zQN++eZNc_tb+FOFgqA8Di+fuj7W(GtMT$jZ&nX&qbt4=jhhW_zw@U0uRPekA3L2lv zCHmP}2^71rld*!QesXj$cgVK?p3P$ZB3^yDmW3QG`!_&6?=b*BSQF@EN>Ct|ZUHg} z@o~bZL0>?la&)kS;P!zWAAR$XkCihJ7iY<#9m#Mw)fQy_yuS<0TKv9 zsA@aL-Cr*zv(3MK8)i}E+*6K(e$$Io|IV544Z!|Fwb(Ixc^*l?2n^j7NiAgwftKsr zkGWXQ)xAmQb#W`-F0@jL(uwMe>iZs4Du=|gZVCbrKrbB$3LRidy?1}$Wy;~`F!Ov9 zx&-VBp9A9Kk4F9tDt-o{jvssWl+h~24OyYz#YM~ey`exzH*zU7w z4}t`)+JA33`3XApa1QgTXdjSM7>PyN7!ySPP?Up5uI?g>^OWc^1jFCaufF)_Bf4aj zi39w0hqU*7Q?;ImX`d`lm9NmFBB9Be_+-h37YVNIZ0VZ)vVL$jaDFYzwm>SY%eIpM% z?j7&H$NPn<4v*0G-XREI<=FqxD!&5+oGZlld`CC*iYVg{Qn`%BhIs5db>9i!1P%bMXC5?NhTzZ*)l6U13rs{h_PZ*@SHL!>Q(|X(PRnZhqCvF zPbHDVP9-Z7oXL)JQ;2V{o+G~(hc6;E#S2KmL@AU{tXg6>>`ki0FRGMpa}FG0Dd*HT zjVzFShohQ?R)oHgUJh~#R2|d}<#vkO-bv<7(b8t&|hlRZtfq>FyoIkWgeyZ zAh3HUw%-LqYnU9`Oh=z`%7|g&2-vplC8v8tU&>v4D)u6ihuY$pUBX_tT(#9gcb8bp zWYIiGKi|4QHBOpRyJjGbVSJs6+V`ykcmj z@hoj;b5se83&Kbx%Gp=QV9$l(Lv#wZ4?WGN7uBbGgg}AdReK~09orz#OJxXxN;D)s zkJ1CrHOcX^rHW7^Kvqg7+_T_HiFS%m?as|vYgut*V=J)-f4`VmKCR76*%TuVT^GBG zbJcRm&A*P#bdf;g2u6yt`s%(6MtPcGs5@id<~BV39^_3#CbppvoN^U9*v^*1rZ=rN ztynFLuGEi8Lsy5)_SEl-IaMIb-!}5s(`|aH%LwK^PY#=5zNS*L`#x^7DJCBZ748lSe1cNcO=hH3hbEx zw|)rR0%_g*rvf6)G z5#@MVSpUb}&d%?HcPUt?9{<;*fY0NpF~ePE>|x@twxyVMEEJbx|4dJA`L3yq+KRs} zYTaj0JGB%y5rbQ3Jg(D3IhE0DM)X%r+tva&5LORjy2i{WTcn}yC{wk8%#^|c`hpW@ z>n3vwszM$K!@a(~5%DdZ*bRE0O2-uf27e^15VZ})oo-v^1AOf|b#=ex;VDtbZgT-z zT$=O{L-ryMbY2HCg=)O%1KP6~ka_l2kIOvUwn?wJ!)8qUprwlK08O)h>};;=R>V3* z6nd_92l(>HZj1FHt1%gFhBi9+?zc&}zgv=o1HqT$9qkeXc%|zU$mw&hMKn+p;#SJA z3M6beL!kKX&%7@()m?sI6h^>H(3O9DM*X>uevfA#v+!qj<5T8{(P7?Qj0HzJ|BgfJ z-_~FB{vLi)U3f^o{_evL;AYc}!EfUn44_FUubohq_0czr?um@n5=ZRRR@(a~?;&mr zi@%Qk(^D);97@53bV&NjHn3{MshUD}!_QsyK77%S>g}d8$R0)5 z54rtZ{n2gQ-9=T!J<8HMTOZdiLrrALqXxoTehQleE zG7znUqv6~myIP7XYtmd5k4vGao!ag?GLb8fbftJ7Ew+yC(E`TEe%!B_)uvgECS0?R zL77oSCLhOWd|C~`51WshE@6m;wGzi#$=DU>7|c~mA2|!m>z_N=X`r%gf}0*#?`Xam z(?hXa|C9E4$KW*Xkx5Tl)=YFoGO-7@LJ5(Kt!==5PWZ<;4&=-Tp4ssqzB$6a5i=z9 zu7TeS$L1BMn?e&0`Xb-ECs~oI!_F7$+Ue7Og}k}RvehNk+FEQivl}K4&_jyC%?gU1 z0)d%y)|@OzsIXArk?jbLI2Yum`WtsAF`Mjl#Q??8t4-(Y@MqscHtU;TE zz6dymXE#h+hljljQw|6|RraDOj3)o0CMUJ8y~kj-FJb0VFgSf@($gwJ;)C*z{k(zQ z+DdQl0C#<}V(URT{F*coZE#QeIaB-c_aX0L2-m>T%WDh_1p$Yi!BlC#q<=4Ek(4tR zmH_ZZi#rGn%$`=9&QL)B<3dyrz)LLP$aP*z3Wk)wwhX-b5!lZLQ%D{@{`r8@Agqn5 zSRqbC;F$J*#1W){TO+rRGIB6zdSH?mYJ6s z6veQfZEup6VY0m+47=p{$!O7S0?24bndDP?4@%-|pw|v71gf*Z6&JE%E}aFVGZ?l= zR&kkm=wjW#0+0lsft#i{od6f$IpbGLeGFw8;J;h4l7;LQg#uCVf~6VM2Quo zlFi0jI;B(qYX^&O7Z?V(?w_;l?FU?2GLC8`b_QP(u}WxysvrsY0O;|U5=5y8)`(Z7 z#b3q>vdGQ9u3<>f0qQZuiM>bQB2taM_vR!(R$R<_NsP*RvJU}qV(OR z0)}Tta#QlD{)B=&m~>GKJ;^T8I}Rh4(ipNvWhtp5bl!x0aqh`SoqC$VCF9$M^FW9T zxXY@d8qD>t0sx78nSycYqg*;&ne+@=IdXRQlHWEdBBrC*E|g{kgkdZYMI4l{B16iY zN(1V3r&?$Xu#rJ@aU;tlroi-a)%>_bKDcwN%mr!A4olautiE6|AHv`$rs0*~@ZJ!t zYy3Q_Ozl_Pgu zvs&AtIptHgCDHu}q5l@K>fJXIul~hdWC`BXP+zwU8^vL1w7FZZqjg}{>(03e*2|VN z6E7U4Aoij2f2gVsj zMp|;N{~Ypvu)DpCo*mU^-|p4jQofv#l6tAc--_l6>)w`y6^W&Sw^H3Z%`(3^>r*HQ z{&yrDDsd)=u|socmIr4BBU+1Cc6f89lwR5>+yoy1;HYCDZeWrF!$e{eK}DsA)igmX}3KHcd_{bWZ)-h<*0#^kaqU!}t6rNH|K`qm4o5%xB*^s7iljRD(xa+HL zR10q~f05T+r*0bsP#zLr*6e)@dwIaEJO{nojkJaNM&_S{?qyFiX^(xi(N^6wWLgv( zHVe0k-*kZA;NSmA;YcWe%heojkW_S{loc0R zuOh8kr@j(DsXO@U^J4z#t(u)E85-pLV0Gthq^Il@XsE?xr%Xf9BB(eEbc4CUx_HCE zFJLa-He~Phj;*rjqPGP~n8FJ0s*~}n@KcSnq%NjR7bRgHD$x8^nyTmBGZ8_R$t1%D z0QhKJL3y;7*zM0@9*l;&bXbr@QgzT&GO?3i*?#uwrGPj(RAb7q(NY58B;EtXhmgC} zs&J~*1~VytDd-3mHZghxgX_sDd;ph1Ud7SwTc~XF+X_o8*L-|RvPFp*WydP6fcLeO zOTm2^SFn6$YRQps7GTWtqPqU4}jf`kwFIE2zY%gk^0|F(5~b;yF3h9)KS?bXOP&{}`Sc z){2fKmbgehU-Z@3r8k*5TuM-aUEQKsoCcj1q|s+-^ACeAIgSWef{1b+n&0+^EZCiM+t~foR+R!{U;26GZ?5o5l7V)36y$s!dF?m=hyboLM6T;xBMDX{mGL=cv-^pvt zLBHpCA{y~6ASmJJG;wZgtfVf_!hR(2`CxKG?{zsxc(9k-Z}$qV{Tg$FiAACWdSKDf z3epuqk0>-EY_sTY7amjkZ;G?Zn{W+Hd{0@I9hyH|69ZiQQT%v9~Zov0{-x1f{;%~eL7x+29zN21vSZlmL8yy+8Q#ZA| zC)v@8eev@AMiIZUI_?tS-QD+7!w)+Cp6G(EDLB52WT%XkX#L&8s)TMzhdnd7Va}q6 zIklTFM=YMZFA|`96ja}6OD!>xhqswyJ&d}t;_Kr!^uD26iSH-V<$o8<433W+4Gk4} z{E4v;IEZZ5ut$OW*gFU97#O(l$CU$Wmu)^imgxP#XEZa8;*q&zb7?PUTEE}4ZytLO z_d%Y??v?fK71-DY`)MvtV^7C1ek*$bCdcbx?F_BGashjG z)W5b-@a=Ezy`k(2Vt7akh}d1#YS7});sE@klC8qsm;Hw}@Tw+8>8HnTf{((1Diz>b`p*HUG?)UQzi+Zg{p3t*hUqiIW*fasQ;p?sfR(Z0Vdkl6+|30cCUS|f-5X07HB zv+@nSRZ=q4Y+(rPN2-L(c5aB4k79$|)_m@O$*A)O4+MyLgfxHZ3CJ$vIl4O!O{n;a zJVGcGzB{k_lvL%A{saTL`2h0pAJgSM`Aq|--U9HE$FiDqusn(ua~!BP=2Shw$45Q2 zmu>5z+}tVL@Y(lQ)MTS~npW90K5f&gUaf-F@KfADb^W2Z^8_Bb+3}Dq4mytgr^-P9 zmY>fz;3ozKgMxV+lQA_HV2a-EFYvX+)5XeX<@ks&BY>j6vd6HIM5`b;HW zvHifRu#@9W{aY@61tON}vDl2!<*MDv=1z{2N9VDBr+JSsyeF{VBDt`=3@3@>wTjs>Y|#b)_}6Kq%y?DY!F{9iBe;J<$7+RZaqu!G;~ z6wpb50PwFlNU1=;rF-YX(*lRGj8U!yxG34@{rf}Hr-6b! zWATupv1MVBgDbcTIxGsMLZwgYTB6kP@p)Jl1%HgH!Tc0~Cw2)QBZnHw;s$n6V5!lO zV;R$kr<}(XOg$7(b4YRoeG>(;@2kP1iT>t#m8kM`r$U$nMFe8kiQba<=6|oAvP6f) zGMD%Up^g+rw~90bg9r)>wD}SntC$a)<}~3s+NN3TjcHGsT?f}3Xl*-;$P@`J131i| zEV%3Xj)p7oUc>kA(sk_&G@CISg%PqlN1K!ALla^m_4S=Bb|#;3-Y8)-h5}`XKt6Il zhMkJ-ySU}}%|48!%+K$Lth^PNDz&>&L@9Enf27gTY?P~1nsBD*2wWUxTq6_2j?k)T z;|M+7iZz{yQ$nW$R#_bhl%0PC8p98xJZYw zm>QRd>SWDH{&=Iuel}|aby8p#15~h%zYvQ^7)wONR3Tgk&vA{3!iHJv}?cKU+Go{ano_8^gGOkczAlcLx(JZm@N_l0?N-~1= zw^RH8Of-TspTB<6Hd4;(y3>pt=Ldgx<55WvY(IrYeFj;NMl47UtQ0X1SEYWS8SMcU zPF>N}$j%FyNedMFBXeS4w|h43um1%-x2C@5uuLcJ!^cBma9Qf0cNTb+$)kU5Rg5v; z4aF{;0c#7cq9BDF{VbzpG?QqKx@#>k;`AJTy=dKYC!Y-Tr4mv)nL6}1hbL=}ar|=* zv5(Cdh8c*HUVdqPEEK;v#eY2lwei|pVE!!&-^RRYGba2Er9#kCF(F8uZ5rgP4|S5J zwbH9Zek3FH)$1F02Wa)D$JOq(DZPAq9X@zVjfN}PrgU*q4>1{6k}g^88)pN71U(MI z5F!o&p8VMk_ECBNlD{G>bYb+Ji0U>U7&(mT)tFBKy|;&pb5wUwo4Q^dxaGH^XH~TpFz^o%<-!hSje!>7;e$XGuL;nYPDq|t%sG)kQ<-xzc%bRy}}LsF!Puy=wj`qV2}mH10454Ka%C5WTs zcNB|(oVWNg1$>J|*FglFU#a4zEXnK=?IXg`%cyniY>c5A&1rV+D*8+V`Iq{XGv`F2 zWx$T+FL~2O<)l6%`bqXp1OA)tFw*UTC#<$SzF}G==lBu!CWb`V!=%`nblcKn!3Cq? ziJ184x!&ts`v6%2hdDN5YpqHxhpK7YC=N2)q`EmAE0H-#RK7_5I5aRZW;CBzUDH+0 z)vIs^_0f!MI+bl|)8e38V_W^8h2YuDoS%4Ap8t>6$YfCFHwJkvZlh+7-2Q|+_3#I| z){4gf`95K@zH6(i!=%5D$n}hQFdAp7~%f7Z@s?u-IYG zm_m_q-p2TtG2Y*a){)O5LV{%gBU5`w^|C$p8`Irxs=6?=ZPt5!J6=OfMxuedo%gm1 z1;1ogxr;KvQ5(|_xq*T$0c{g;h&+o`w$IHesUVgbtquoX1v$~2W!5whUVxm$i6XoS zm%y%|Ky(ndJp{)y4quEoiTD zC1NA4Id=oIB&{1OLEYkZgk>`N2l5{CO1D2)(8RK?jBi|}ecnH<{H@zav@wfJT**TI zd_Kv00F#ab68n15j}X+g;>7ggU z*dCX8{UXtv9}%Zs8EkH1D7~1a!1A zOeIIO5N|BTkzUW$@T5H-{P~itg(llxmoih|^>r%8q-|z7Vtqq3!Z4w7E>*@Lb~<}I zYqJ!S9EQBYIfl&vtfV`8WL!pjMsaVsxT0k- zz4`2F_y~{~iheg)=e8`=_aBG`;9u*`v(_>UF?pG;2_GjO@d@c^jwbm}mc{>x*H*xl zxW8f^f?aeyV@KSu!^oAY1gA;%H~-Hq5528T z$MkRFq3Syi9uC{LM;3YYZ&~Vi;Aa5VPC+{KU~+?xj}IFj+J6l0`}-vDrt|{O+4Z7M zmgdn_8h95%V>j+oU5~bY0!WLpaa=}Rq6QVhCGGr z3h`d0tKv+Q=ERZckb`8!#jZKC9PoH2VJhpIzGSi)44i$ zW8UBk5;mH08Y))VRVTfE*f}IZZgv}6mrEtW0#DrrjJ?A$7=wNP=gP)?V`b4>GR6ix zFaEkz>+iPie-~}=*Wp$_po511<-aw>|C=KRj|=fjAcEDu6A>8taSZ-C{A4z81|vbh z6XbY%m#0qwzroXtZbRU($-7_H1K6|&2f>|)UjNIFyyxN_82p=7Pp!G|1he=a1o9}p zf=4IwC~5=_Y9k^s#B@Eh^J7_+Z)vnrzWt3iOmweO8lpvM?6b5($>p<9P*5&Hh)(JW z(bmPp6;(N)Rl{wdfgV8%Zn}M>%h-)en$yV?@o)zyB`X!ONbHe~i-wM6Tf@|{8)Fv2 zEUmXFDJ$!ltC>S$it+~&kkgQhIEva>K7tUMq4;cM4jN4e0=Z!MWsrffprKjQo^OoK zqAI5_8MxgpmZ5R&`<%NZ1>aHtnX~(mQ@eKA?h{&59Gy%KU6h!?{QDqJ%thFg&^dUd z!Zuh0pliXeDOo=Yj*y`5XP|lQ=xxJ&PnwiNPY12Sxtwe1A1@*t`oc^!L1a;t3QI#1 zhYc`L%`z<1T2`*fbQCZp5-qNf6cF*~vISlpuF?e*I+67h1uD%H#ysKe%D;@%?^H6m zJ0*CoO7I6WcsPMkS3ih$;KrOZC$Rv#GMq!!$--XP0lrLl&<3J#=?o~t867vTBUOshGH z#Kzenkq?iYWfW^eC=8N*4@X~2QNf-9b&%1uiQ8%$3m&M32c+LZ2XpiNELy}&Shh=x zdk(}z{>O9mh$@Z}cTxmH|^3UbCq;rdVDQ`nuMofdD4-PB!Ae(qo08%NXwO0Vg z>Uh|BQLbustrH}rF_XRliQt{|jvFO1)+zdcx|Hq4g;<(}t(>qVXbu;N#fjaBNFfdD zfOmGywuC=NqGwsydg-tKBWSfm6R^w1EKa({r-vs z9@-)uIiRYu7mWf69n1$3mILjwipY3Yda?oa3nK85*(owE6>dfD zCetQSGrgFHb2MAr%r#LiPly{yS!~chCQtszRRh1Ai7wCnfPUjyD3K%0LiZ||*Zzh> zKBbTZnhuX4DSo+DUrZr5NDwzq3F;wr4Ric3CW5;PSb&4iBK+Xis6fz2^}0F zPKW+1-V&)>m?AwE#0Dp0KmOhb$l=qD#maoBMT3bA!sFM2;SM@aSD&O~0!f4whn5d2 zl!uUs1*-Rkiy(ux`Ek;wle3Tzluh@XqUPsOQypof!S`<7(QrK9O;g14i!3C}UOrcS z8kr_yDQ6xm%&)&($A-z7t1JT5WiNC2%4WbxihHBQx}%Av0rJ`B&0j@(&r z1F?~EDwQe~HlURKyr?g`jxX-S-Kz+!#%c(&dhqZBITG_U@d?m~=!$$PuJfuQ1Hi~~ zaS=~)J*V18`g#6&Vy;(mpwg6E2^|)8C4zcvWmqA$V@h!7(KE&_!Q=Ybd#ZbTZyVYL zXtgxG5y5!(>#50vzv;#Ck`3^S9jr3EvNxmaS=0QT&b-b3zBJDd{t9w;r6m}?yPI|6vwU6 z_A7#D5|Mm}l5!NtsMMbZHig8g(8FWAnWYnIlQ+hEd=M=}@q@ek{MGkOGEKgbSh zT))2~ZKMh5Vw>vvmDC;4{=2F*CG?kWeuZtr(pJPLAr3c zLO?eQkgaDx@%*^cUhmJeGxP&5!_+; zg9dq+GhJh~x^FQX-iQhh4Hg|%9agn+iyq~#5+l2Z$99*`$mbI{)aoNJ?_HMGKZ1+v zd-rjtuBs$N)YfRY8 zep=q^MyK<4sy=PtchS5B^D;EV$M!}Ni?kt@vq%_Yt0zr$QI}0Or))BTQ|N61yDN^F z4uoJt-j&LH+lNAE9RjXNBnBR0&a#Uehx>R@Y4i| zCVisAcE~YgyX*#&Sae^k?^R%ut+OItj5p8|(4P!Y(AeJ;r?+NF=D*Xsrno&DtX zejI3WU)rNy>R32{8y;mWO0or~VneN@d5*iJ643ucAd-IcMsqu)uVAAWr1j(G1x%Nb z229!$3Hx54$9oP^UhiH{K1}{Mx^m}P{2W)w7DuU=CuG*MvlE+K_WDVM_xko^)BSc# z2>!`ZBk+6zF~fHg!)?b>x)I^K^N-ulG-PQ-pjSg_AT|?(@Dx!U1XP6;@RZ;-If%b@ zdNJ3k28uY7V3G?y(e!hbLeYiQkq@+bEb{4Sh--S^Jd)_<1aA{wbg<1JJ_sxbVt&OPx22&EOM%7h;=*oFZ#{in@T=ozjhC0!av2{MA?r>{yaL@h2a-M;!>*%Zg zsNm2ik4MQf76W?-pX1Ncx_|ZwJ|lM5Q~oKFOOiz%16xs7Yg-*BDS+>ram$frGEnkc(XRv_DIip7^?k51os#7UY}}vb~St6)aknHM|S#e-b?}`|4WLzzP{!p z^@0iMyhJEXY4(LX;BiJUj1TNjQ!D1)KAtV`A3>-efd^^8s}x;bhp!JO9@{`LUvJ0% zoe>PZ_WiSFA>ef%x>eft`BOyQ>;4e_|C*Hu9w$q`H2)9Pm-{eE`9UEKSVme6-AKW@ zHE7Q=^s%Z^rDj^d2_R8rvDsHa*% zjz{oytK%8;7;L3Px4LM^_C_XxW`khLaK{FZ(!?ghcL^uA+Ds`50;wc9UScYr%&w6sENjvP8k`#uVhYKn<5ob16?nv#SBA@Xv6YzA42d%+@pZf zmGSZX?DY$!PSBcjhgjrtR2r}tB@#LW!JhV#jN+2+GTe|GdI3+(%FDYE~o0aSP*%UPxcvYu%?3Y(AD8<=ElA zESj!vsCUa6$+vq%lo*avfD|q-sU2&rXqhmC}hLH@Q~P z%szNnBj`s8KHo%i1x#l} zcz6*fxTvB0L?v48b)RvxWxH;Q&LOh1r>H6H@k86mA zw3_DCB30(tf)&ayX)bB}2A4t8X+S;VLg<1xn-O(1r&~KD!K-E{=g(8rej*B(3=SeU z8u?puarR>DD*ma0=WN|B>O3*Z06}VmPhqK`D;$Dlu7Y&UW1Mr^X-0GW&bPH~R-*w1E ztOa?()`Kf1DpbowLba*@Qjb)2cOaZswiX3yPhID)=kC3$kGP1W=l-*ee;)!pA*e;a zJwE+VNe`cE`v)Yw15uMd(2akr@ZZ=`d;jl*?mFb-F^&zwk)%FRC1<%ijG(mCv`uYt7s5L#UnIvno-|k~HerN>V~;DDRcF;4uE~yQ zAf3h=M_CBUpZyVKZ;2;{<*py8FxCiaVZ_9I2VUaX>GzthP^C$Cz@!DMUP~zh3x8TjQdD3D=KbD zXE|48-~ul6(GY73zH5(`de)|U$UI+~{4Pll4q64hfYM-+!%mT#L@y5E&}LyaTjLe) z1yScG@~QmB$Z-te(-&TF0M>Cy!_0CN$jBKOX1#8@Hf~~_D&A#p!H?nIhBfJBD!-qA zRx(!kHX~WHx^*08f5V?h)pCF#Ux6BXgcAZLFY#=I??s|=q%@R=*?6>U`hWa}N{P;O6JB~=mC zIp3QZY*_4es|D@qW|rNQ9clPKp z^91=RetjV$DciSu>I#rTQzM`trm9sZsHES$N&U7Xyr=d)dD`jbl@Rg8)-6tzE>XCM z24ief#F;InW>G!E+{^=s$$ZZkeY&$Q-Vnn`#fZ&}&4j@W^OzQE0ZSoKc)9zN)$;el z6c{DA_v!+Ud2Hr5;d#EzN&PL$`$&oYN5cMo+bkup??6J*_i$nJaRdJ3t}a#>2!k#f zA6&X$4lzF-7RoLoAh5q1i7H0xa8LAS9RzVd^iMc;D<5509fDPH_uDDu_LV!)xk&P& zEu1LDJarMX*8k4}6w0|)0!&!lS(U1tzxCC`Tw0q|(uy2k^X^6@Ves&lnmfxx+8-nU z5A}92coLVbIHG31d&W%}D?~VUcq}hl3XZ^_L!?saL?SQ3ci6#Z1{zs3r3EHAVJ1>Bx#oM76-zjQQakcTS6=p^G0eDUcAl5jMn< z-IyWbINPu8Z%TNvI0|=72R`rchHaI+->ya6(E8EsrbFFWF*F*Q?kgLTqE*OK5#wO4MtAW>WQ?;{Up{o8l&vl+Np@pa?5RlgA+rE~D$k2|qA zwO3rbspy3NHC~)U5i7c@$Cr1GoOi}ZYGu*88u5T$o4$`cv}6B%qOgaPnvnqO;iCYf z@UYr40+Em_&JJ&y6)q05C}JFa(nfe;&Ctj?Gu(l-T=E6TNW7G4gf%R>DxYJuf>%1s zGKG}P8WXgLT_L)gr9znwkHL8&82II;@2<_+FS)&nmzsFnpYy&7 z+kBE;sq2?S=Bq;}&J_ktIz6+&ktv$gSH_mJ@5>`0;OQDa=`Tz5a3#CgvQRQc%qRUQmQYBQ-qeBWyW^|T z2~B?eZj$y65}wK74jh5$*T=OJ-@A*M*ULtO+s3?q+0oImOw6hY?|=XP?XHP`5#1~H zbX9@=FLVM%iGT};PT-1WM@`c*II3ULJPd{gf+cg8Di;rJFjxq0PW=WqX1k*Uc!>UE z34--o8k34H|YG9GIL3B%Aqtt9`iC!M1xZZ+@Lp*o2VaIkEJ5FaA z(g9ln(*$b8@_&+amRGw|0sIpG4)NvLRkbIaqI({A`z1eSmoJlK> zFqIOwSB=(K(t?0C=XkRlOQz&hdCbSb@cyLMYJwC#M!Q#6BTZC5nsAh`9XKBPBy&cI z4kcSlq|=wRZ?j8a)!s-yCtLGrI^-9mg@z-L?f7+FgaJDbu_{a>xVcv+XXuN3Q1nu; zc8F2nYUqOr-mQu5gp3sgiJ37k154)VEE zDM`-O9LZz6ts{3L%? z-uJib-0BYa+-f!Q`O6X;(Ylj-ASNL9;d4XzZVMeMaxd5FkDnCbVXP`-urkc%aV&P? z>AE}wfNqvqf*1}ms!CB|p)c1_L8BNEzw7Nhd2XUb-Sish~je?`%yvs($<3bo0#*MbPgjiZ@KxJADglqN%q*(`_D zVq$1~;#2Bo%%fIGXodrV`Iab|iU!?7jqPd5?;vdYCqtlYS9xSWSws*GcMfGlHO=fg<1~J=SKME|tsgE7kgfkpx%lh|}%P}P{--(s`> zFYce9=gUi{;A!Vggh0$s)Ps+E5>&{r?1vuAvjGnhW1m0%DYg&7PwPFzpF$NMK9m$A z`3HBS;M*rKNTlkbV-CYV+s%6re2V;tahllPK~MY(i|a`Fob{uQ%?KxkVizPyN4QL| z$^^(}iC~3^SkgTGZj)je>J9`G%R`Gq5ws&jp^t&F3h}*X?YK%N3B;-Jn2|J9%D3eV z`dWpvtV%=Bu-Sc~1TtsgOyT9n-Nk?*K6R+r@G9gHZF+{}hHUkI#1h;;`gJtf@)k&L zz@~XIZM{<_%VN#$S{7ayU9Yb+{?+NV$-nz zR7539kYuonYK9$E)7N`z^i?KEeqkyjZeg*P{3_5;f%nILyL@Sw|M}&{5}$@LwjU&EK`{nj+e08WR`v z+;8hx?6c>qt8>jJ^5rP?v;>xz=M73Y1DWKE&Vo7?P1EUzd+Slh?cR5GinDDSMWCQR zwyN4+kx}Zxs)MSDbqPs`sWz_FG$d%wrImftEzPI^zVaRZchiTl$Mn_wew=S?`zi@u z;wQn=`7R?C1EQ80L)wrWuQS^#~ECHOj_e2P(>o+wO>RrQ9Ll> zS1--I&j|T?6%VGGHuYv6uj@jpOK~cViHUP0DZ}U;eH&=I5geqA`Fo37&uz`kD z1~#F{MQR&!ca*55`pG>nYa+sI;!&j7p$K~HaI61%e*9NNGgL!j3mmy=@@C1) zh?7BS#EZhYu2$NGpsm;VnK%C!oKqtwA2sd}g#sNq2N`wAds|uYwM&l;{qFC3doO{i zibzw+(8DvVZte2EMzbnWZKG@>;ej2U0^`g;J#vo0}Pas7`nl9P(;T73Bo zfPReJtr*49tqG4DRKEKw#KW6sYvWr=S#}DvoJ+Eq1kDm{GdmK!o9*HpEy^{>gS`DU z+^PVlWHHW?x^Qj-1WF)UBC`mF+!KG?q{&=J3zv|pB&&o@GD)>>OizFek__J>K766a zr;d+J=tOq=+>d-_>4#hC4|zQ>BH4I&e?c|i2nJ1{;5pKA;A^F8neIk9Asxx%*6tr)foXAB!|9Q;aaMWQz|4Ael z7qOeO;YTVS&lax}KGwlBw`hL|D1ENS6uL6*_daZnePPc61VDHyOW)md8_RYd3eNzhn{9O ziY)lbJDLzeW;|PO%W|Y#UU>gR2{&4?MyqG;8Q5et$uvFY2<#^IYr%E)5pNvKIy4w2 zcJOzJcOUC`zkS0(#A_oN)6iS&J?S+-Y2M!DjB}IrtXwvbb{BjB}i})rm$y{ z`N?I=fpl4o<_Ul}$PEIU4RR-5xCqFbhaOwuQNRu%P#7@wfB3gWSa9dh0QdX=#J&C; zYnC{!zcvRa5lUmWP7YG>slH42CKeo_5Y(qY561NBP0q^kp|Oz&Q3!>hzF1sV=^`UgSeorZ*8tGcVM~a%-HemllR}Rdw(;@_`}^wj)@2e)Bw${cE1OFJUuR3 zKkqS!Z!?HLAOTptv2pV2!XCw~fMy@Q<#SIU`20^sZ#$dl;{%kv@V+3Q96&4oBY}8& z4gz<(gdm`BSoVVpwJ$q7IEivpai-&La(S6(SK#*-ZBz~I_w4Kd<`2kp;9z-eV3Pkt zd)|-7taMa@mE^iYwLrsUKyBlFXn2k7jD5t zQE7qeNa!XR36E)9sEg{;8uZNT1|87aFqL}_r9L4Ohb?|^*3h)=R#xlrGLCHRnSg24 zKrQ~LQPv05$gAMSP(2eGD>i58M{a|&L>fJ7(KTRaMX(esn&uBF8R+Grw4~I!qHla( zM4+nI?p*)QMwF49FnDxODiOEI8Ly3L0-~LNx-j`OJ~2b^I5ex{0=FB>1r&oDjKN3` z!5hR1<0K4|D8va5-IDl2V%C~(K?vI;~izZ+;|qDN)f@6fVn{GS@gZwqf*S< zjaGkJi=-wqBOU0`u+<=-QHMxI_> zY>hV`sHA>FoPs20XB+gkT@D4k*P)<;!UmqJ0vjeRl6c#^*vwKvZg-p_MHp5EcGyyy zWQ3(j21}Mm+&pa5eg(4*C%U)Pu5>M8#_yDjqdyqG(0Kmg_!e<9gNE&^1`L>8LEXpZ1FMm1Lu}I=%x{DzuNFe=W1(dC)@jc zByRMprM1Xs*wy2CmZm%e-MbfAa?2_Yf19DYzheh^>DuD?P@w6WD&KOaKA<)!}k>& zi0P6MHO)*zr$Lr-?mcE&MPy2ZJbq8Iz4m50q4vgH;itsHM z71;E6L7H+<7p#(!v)JTsKTRsF^~mx|ng=?W;!9l8l#D~0=)0^x>qdl@XWM>sV1)K? zM)UAd);WHG$B@MsoR**bCV$KU8dp#I$q`;x-b41eP4de$SPv?|=x^?<%?K9)Kk)8{ z!b75BEk`-7TvZvfY<`($c`3}$Y#w8NuK{?VJ35e=S`MwRW9!6wT_=?8$$WNmTZX-M z<2&BT$0(?p#Y%Ui>cAqa4=lC}QoCejUCARVz)LHVh(de30t z1_vAA98+}q!25K5!t17)Yi0{q|5*~LF$t;C%w`gcAm zUm)DWYjbm3%&hj?p1cR3A1uF6G~q6e^(CH^?y(%@9mIBu9hF8tZN|bBnNVqA3?9J6 zWD^SW%Bjmu{I#k4h~I-ROE83})e~i2Z0@N+j>$vrbmaehzo=|3zJv9aRQG4{pM4r9 z{v7mg!o}CRJh}8q^zGC-_j>t3a2Lp4#E*}d9A~e3>`#c?nB_d!eSXmR=Q{r~^6}Sc zSwK>onO6<=qiVXj)orRI z-%Gkv)p&NJOg!H_J|Ry4MUnZyo(j3Did3!1U6C7ZTcf1_LF*l5_B?7! z4R|{vG6w8VHBVv6E?zI|t6%#n?{Dt-#q0uJB1TW z4pF4xw1b1TRI-eZgkM!Resc?&7^L<3SQ)$N7@O>V(s*gZ5sh%#l9EXbykwjqF5?}? zxBHB_AUt7P0jtAOfEVX}C|CKXX}V~R$R#&|qiG%1jKwnTyQVpzTv$Gr%4OLco<|$i zQMwo6pF{{e>8}*2Hy27Xs;W9V*u{1Rc}~z;$cE64lCJYLuEMOu2j`&aED|%4!(#8R zdlh*~*yFsXIXE(FaYF%iHm|9@6k@4QtD1tU+g|*MH`h!ejP8Qg$!adkR2peahBusF zmTnRwVQCt&uB*;kN@n@%{bOemZ=eot>r9&w7Ovj6Jy)^gD4uS8&VOUq&v|!++=wPa zyS0!t6$gS$HAw}s$E=n=Xua9EJx|Z93r=#sJx}oaJw5V<`^d>G#nAjawd4joGPQSc z2K4~v7OVjBua~T2y};__B`!!S**4>%k}?+;v^Lk*J;Hq5Tx0YbP~gvT02q7X7v7t< z?^eY^3o)oVJZN_x{$?n(e?YKGXF($kchYQoS8rL?oFX*t_?fG7UwnY#eyIkN05a+0 zH{s)_7?X;a_fATBFJcU$P~V6o$zSsFqzO%Wnq7Ja-%E+?&5ZWvedT)E;pjF87lrBP z$*YYz&a}Zv0Wu;4Yr^c8*Y1n6Zg>ANcRucO3@Fl?P63Yye?bJ}N${Q2rLE6YX2ay< zcCD+MaMGuGh2{ZiS);Y<9K;RVPW+rfnsl!p9o@`6x9XO1 z8LoEXaP^^2@XASSYJd8Lm=b)TbQ!wK_T?J+tmeQWNC5K}+q3B;zNP$4#B9Ng038G_4f2bVMDZSu2^9Ff=yjUqo%@U%Cb@ z5M81a;(5vnl7wp!5{CWZ;)q4v?dDjc_)+w0q*3Id?~cn^W5Wkn{5Jmf7tc80g%?Yb zDRgWM*x&)^8$d={Bp{?qHtUfu5H3hsU>FvrP~aQUh{%|3urot}bY%n&+|(Dw;FhO& zGa7g#Y!H>VzboeSCrd|;snNexE^gZ1cTPJKt7J8s=lUt!YhF;-!8vP+>w%hpfg1n6 zEUy=rud!3&Z_m&Xnpb2Fz%b{z9s!Q7fIDu~<^QepziHntO>%EQ+fx81U$q>O?VX*S zi=Va`OP6=^2LCkw|Jc|651XeZ{sN5?e@%PwS9{>LnZx+UnEAgEkIK_gfN=6J$N}t> zL*k4^U#SsIoc^=jUu-T6@%V_;;a*VPGJY>``0R!Gtc1iQ!>-a(f{Ibx%Y!IPK#xQ} zJ3Us3tfQ|ypcLAsUyvGGQPlmzJ%whRldBSEs1HoWj1XMdExDbSUCmKV_E~3?i)IX~ z5pyAuxrHFF0I?)iPd-}n>*fj5lHF0(yY!hP&Q2&#@Ws0NX_`AU_G@5V(RVqjjzAqL zzG4^%W?x?f2cmRG-ug6$QU$cwBs_89cp{If4!I7|5p3yxFeA92S`)*Zy%Jh?D8$ib z!l^++*hg514H;j-g589h`pGhb`z6Y~k8?InZklHKLA3iPRuztY$np#grLY!jsIZw# zg{y_^4@t-VX_KdHh*D48MGN6ima&4hjbId}Grif_wUJCKFB*|#o-5VUD1I6~(6{5F z_)0_t+Lz&_g(#-09XwK+CJ!#QP;M@Nsz&UUctnUQUa8GTHb#0>PzYQITqy4EJ3Z&z zYtR1GpcT236{08(J%e6D0@(%1g}ka*k|pM&a$7-XypPJJA)R?H%CpO$BamYtZ-i_F z=F-7e+N0U)S=%ORY3VSzCAea29i&xtCYxgw4|-L<#MuPJ2+8ALe1dp%a6CdgF?s}* zUQ&5DtODFU%PUL`4aUiFi=&ufpK8Mz16F|ui>YwBf|Ljh7#nI#hAwuM&_ z6ilw?k5m^@JFgep#x0w7Fb{0|xp&uFuYK~7ml*I|A?Me3LthD*eZYy3&8JJ@ru}a% zbDv79L`Bg^g>N`RSIlpY)Q4cQOVseqF^6lihn%d<|z#X z6^UkbYId>QenYXEJ{FrGVKorI-`0t-*WQpl{LA&*^}R^VXtTC}))nX{(odFeg~n?W zLF$ZLA3ZE)$$YbyIO|SflXM4vaFwZJq1HwS!^ogfCDXau1Ptd+y-&J(__B(yEikQU zI=ohaLbiU;qna8#=Qah`ni?lShqn4f4z710C4(m`E0s1G<^l>Dc4_{P4l(*XLm2+u ze-6riyV(&%+wZ6n>3d3KFp0EjjDKtexdxh{<14ug7&2aW{_Z{=7NoYt#Y#+a-CJTM zf3V-gGW^{iBK9W6wZRv>x&5T=7N;B+W#L-hFNCA4>V_^4F@ z3GeG)tyyvnL_&NSbQ{u$PRtcG94@CI1&KXonq<(dfuA?R<*FbDUKAwljAI!VM@~XO zG zJN=o?l9zznu7iMirEn#B=Bxy>%2&emIpnvMtYSeo`|&E8dM^_*ponV#`; zKFUhSitwyZ6R47sWeGbCu}_18v!53ac|pQm&O^?z$8Q;*w-wkw6S4U8)NVnfO_!|o z)dXefU1MI$?oWmfa>Te(9((j{#5S{HQ9B;};YtdW==FS2?h)xnXvX<9bjcv z^t3iRAHr&?tFI72++xDwhXHQ`?A~pgnr~<-=@EIpUkSZk-1_*3XpC!EjNcifCx7uA zt@!EsQx_jc2nNOr9}Y$`afJOHqRig@@#-eu%AJ6yUx4%d`x^$n9f_UShmM;snwsbaeWNoJoWE=?Rv8{#%Oq!zwv`p+m+GlJH$`zBB*dpldfKo zwAXb1(b;3eTm&Y8ExnXN+V}?HOYjC42!C?Ra8PC@N|4hBkG?!q=U_x%Q-CifSApTj zsenfYr#N|o%Xy9aqvx!DrT;iSmhb;UwVlza!1_2~&%drN8qcl4+}n^o)Bwu*pLBkr zBB+kvJ+gi83nT>oUJt)%vb+gg+^;*mz17{7e5&77$l07i{qOjqZmo(-n7iH1PRN=^ zz7S#hsVitQLF@=(K%8th(xLAaIYIC~FkOuB7>obL(@VQq>(h`pdN6=jr&I`M;|RHa zJ&B6b)m=OHJmV+RWQtbLnESMEDSBN`z8u*w0lcxEbvq9OpqKY@PTiEB_TevE-gn70 zZw7_-Zm8tFPtqqZq`J^ccd4gG72&r&p*R{va zyA-)~b?#hj_VU*K+f>OVV`D=*XW9~dGp5yHDB<1KExj_T+T+l2)ihg_Wn>QJo!zk!&`A~Kr#>KT0_zyy6 zDU?q5ao9<e#6n<*SvPeTUo1MNxHRAqz?N%M!hxFEi z=V3n0ebqJ7V<@8o=pTqaDD^HijpD^*_v&P`I2yX`tX#D2ps%SD|J+RQmUzM3Z48_a z92JWEIV#S%+@=~`os=Iv-LMCeglPt6P7^nRejvNy9VpuJ(l`t=6`(ls$E=4=d!%=& zoxM+vMzLHA{*b+lKN8+ z+-40G=OWN)NY#pVBVC0O!V*M2>=|Hje>Ba+&eT}C70s+hIh>je-QB`Ar7S?9)j((H z1U8prN4NP&oVlyZ>-$0qzChThaamE+@8r8l#lr-9o6ifJd$ z!7g|z{(Y!ZsRMtnWvN5-$MNBD#^Ba1!2)Ja_@>WNLdEnwpIy{U(zQZ)tS#OzrP)d& zDZe^O7pp53URodUP?_=Q4lZz?!pnN%F3;BG>tR%d=yPz6qkbnx5k`e4^L4+-PuZGR zjTIN5Mq@%745MK7D36#>;2K71L%+m*Of%GlM!?4_V1urNScFPB!YFXK6u>$BG{=x4 zq8S{Hpm21#%yy; zVGoivFdyw8iRok%MldPa_uw+Bfxfa?mWGKfrA#<7tG1VZRqHi+p&EzE#0+W8aq!6~ z94b#NHKAsu#3E^CcA}LC(yST0@ZTuqLfb;iAz9}SX-qv#L;U85QH49WoQZ_8E4`NX zw4YmTU~rpJjZ#cJNcN{J+!Y!S)BtM#AyiOsyucZdp-YK|bW%6p!qqgrASc(Zua zH3^a}PAW~bib=z4oow4{0Zx7^UE;774MVJONG(==@s{FOu4V3D+z6We5G#=OQF+1v z@r~e#JCPE`0?SGL5oNL!(IVKohc8yKFN%tO7=IGfI(1(G?$Z~$FWt^%+RgX<+>o0i zc3IV|DAxl;xx$=9xp#zGQj^xSpv6X@Nimue4y7$l zojIu0^e&>l7hM$wg#w#aWTT*A&g%S;B==V;P$#mAQ7?I zkkOK7BP51TQl}@b;mgC&u)Qp(jNBga#Iun~Cl2bBzYea*# zUj44Wyk6(8H(HZ(+{!s)b8BVShJNdU*8NZ4TnT+%=Q@TU#kW%swA?<1yEod)<8+Et z%8duL-vNHlsnH9>z@1ijI08_j_^~hrDg7T+Dka{rq=9MBG_5Kl$g2XB%d)h(Q9{1s$@2*m2BLPTPBWf2P{w<2B{5@xysy~e!yp1pU; zVak=&8)ZDi?_+5hish8RKXq^NOh0tfrG<1XQ4T{+;mij<_4V88UyFGsOn!}WJn~b% zz;0Eu!8e51#kR1C`76m6_`sanXi(`IqTz(@Dbzb#DG1AWrWsB@2amibT!HW8nTj{* z>E|@l8CFOWyxUo0Xu)Zjm~ol>&f z-ZKNW3UUc^kqGg+Qj?m7r9*KPkoi{8ZivbvSdplx{g}*5d_ZA%lR7)ql`A^YeA@*dRR^L7|L5_q9Jcq{SO^h;@VRHlXI=CY~q z=xbaP*bUZeU^_hSkOiqjI#>g>Jb9V8x+^{87lnIE+Hv@6DJSl>;iqg`At?2BKh=ks znURnrH7UVRBL)wY=<(jmjKa3x_W{E9yH)i7jQI`YY7_8kD8&7??w&*|`nm)B{zkq1 zZpghp*aNmz^v&Ym5L|S;XTypeca%4)_>EQq{D<9As0F0{gtzbF zZ$|U?WlFgHws6-h&J`s^Feq#uVuao1$y3un-f1P)e#Ytwf9v}AX*WEM;$a;_+*u%- z!|Snncj7N^*Yg2MwWsjw57SlP`RcN45f(qYdv3nFd;sk4hJmF&Ua6`7+}F3`36iGI z;ZY{!VVCCiSqJHvwmYAz{4OWIC=I>2`HZ}M^O!i_$HM^Fx{o;-c*@As(bG#U&_R3p z>VajY1LAw7J3qF|hrxijRw|w4>x2)Ff$m;MT%M6gHgXz-&)b~G)neFkVt5`yj_TOT zD#-L7BIqk8Iq400XCcK{x_v*ecfUfWIHBkf8%d*R#j775b7m<RUR3*Jg^I@OZnrhFC|Gq$u8aeu;T+ING-T(&f_2+9r1>qjEr zmo}Ifx|S3POfq2&W(4>X-wySzKX?aB{afaEz2%dlc$eBv_O=aJnCwQK7IPkB!ao1- z9+2GLaRK0he|?=371l~lDH5n~pDxxcJ2jWt4P*RkCLX)*Tx{oT0uz216dTuo%6`7s zMcCNbys6uCUOTHjPpMx`V1J+HC44XuW`N9O8HJop`0Zp#2{Mz(>b_zo>+|V|pK6XM z%!iRy8y;&~dqaP?PsxiE$p9|SHLvFBu(##Om12lhC%kH+>{r>3ZA2!5$BD4>rrLIn z_wRcX%o>iD&WYQj5j=(SHyMj9XxT^Y>tw}7%kWjcLOxuv?8-RCtPocbS*a>*A89Lj zsRG(Lj!|J0lpLni3n`v3jVoe1Lzaqy(Bk=m+Ph|44u)Wj_&^m=X`#C$q~bN#HLB{~G1g5PHYY)R z5uK0|n3&>X#?*N+s78f)ftWpRx3&`~j2NY$vb2rVpi(i&L)5o*hH-U8xwg*K>^0ga zZAK`>1LT4hH?RKc0*}RjDP7}$iC_D4KyC0}FVMxMj^}_V;8brYg_pTInt;8ftLyOf z6?hudsyM&93y=aX76AGH=p8=5LJSbJ2bkwG9%RkU&A>QY4h`)2oi8wJA14nu-!C;$ zLem~(Up6|nfxpLqrH|tZKv)B~yz3;FYoY)Fnf%j`yeQ(7l*Z~ucKo(^tvrDgoF7De zpc}>#8zB z4dw!FAhZNStJCk5R2-Yu{lf|uD+-^7%?~+r;j7gtkkQa_Aub$8wY8W{V2Y{R$?0Di zm8FcP5#%iYs5bgB87q`%wnfHxIQR>yeT)H%hIM67T(@aa1JXgl1rOU-7KM`*I1Ij4 z4xy}m0owu6#PFmHHEvaU21(JC;th!y`e3Z2w*#V^w2m)q_r~1vw2+VWgw$xoTqr8o zQ&dh`JV$xq+kxJanRyc*160h=5w3c!+FkUSxC_t_DH9Xw^9`)BOvoY2^d%>dByN>K z4&p+hM#!Y-d3;%UkzfK2ZiNMRlw>`YWDxocz{BQ!L`@_c3AQe_I@AJd5>$9mD36OW zMbDHW4L~=@5}bJmg$A^k7%g?$%&9+^P@^_*FN{*sNV|fjAdI{9wUK<1>w#hR#fpSy zY0XO+vKeG!{hsvR8^4@~;5durkLiM#Ss%wE+OTFc8CW1}l2v;`NyP3|2`j|QcuKbe zwv;;9yjcsl(dZ~4(R<#DQXseK80oKB;l6{$G=7S2xcKyU43-PeY7PF7t=!%I-Q)Pa zDFKu{6fwaEIOW&4Y9lTJtItvxl+j|{jxP0?S?VUlBpywcemFY>)!T%5NF&=K@ril$ z#L|i)!8m7KSX@uPpKvt$@eHp^8uil>lM)-m1-;}Ceg=)t$8ZE%7dbBKHM1*M7{Hc`e(3 z1p@=)?Cz-n_C9yiw0nFKedB+k+3`LL{XSRvFIj`}cDKs#pH7)IW#j?yEL0U*ZK@4>8LCj z_S3bppfRj+^eq@#~unXxQCE2T)1tRU>koaWaUVdU@2wxwz9gilrLR9%%O@ z6POYXmALasP?A-+2Uq~Avr8VX%-3$r5AocymPeXbx+B!TWutDAgMXlE#$FE6;x~|n zdTQ9E+UQf!C6+krDWpSbn0qpKioKn}!FlLyS3%Tar}ck>o6+rKtYQ|pMo=Xc731Gi z3m%_&H`Gci#qdakvy( zOtWgX+6>I;*Whi@vKcIx7}yGAD9KXZ2a9LL@Cn3(Vq{AoP{qYA5b>MiM}N^KOH3%A zog20mRixS8el`b~JSa)>MZM{x8z64D`&%J?pDz_u{bmB zt`HmzOcz?z2($m5NO~wrkN=0gBedIpdh&g}e(~M7xPw&96!-tT7ZvaF=>u}r?g9O@ zmA;IKz(n^1_mj_#XU2Vg?DGnP|Dv}4aPJV>nnUOlS=QG=t3y^l|E+0R-56> zi!mQPh6OTjt)?uN2e{vR_v-UE3hF(C>+>Kc_(ChaE=<`oD9cj&~*tHB`G(JH7N9DOr@)jG=Fq10yY4>MR)?T8Sp%O2k5rz z*BmQ3V`OpP>);w-x*T|B0-A0CvQEHAn!)a^`_7y9TaNMOhO0Pg8R8dD;m&U_83Eoe z82~vMGwQn)uvc}r+kY%%N!ToRDXvOuzg`J=cZQy! zT49?fo_o>GFv_K;KZWK1G>#ifn;xvM!;qB&B{Ziyn_7$fbwr{N+VDsqlL9UzHm_DF zR7LB@^pHVQBuC!2Tx*KAvE?)M?|mlRtqwjQn&AH4k6cuM+&Rs7mu8_iXm%6nLr$p1 z@GlGtS__3LRlduE|3zevWXQqohsAnbJcS(}QW7y(#~gQ#7Z(mLe#xco|1K|tl`bhN z7Ckk8tvPJo${mw9EdL+8;uhI3{vF#d=y#$JI@R}Cm>_kT6z8J?NzQLo+$+t@MzSel zdXM@BvO`Fsr}1M197aK?J&m+d;rG z-r@QL+Aa?jwV^Q(=l~joe-F^U5rqBEBgnZHtPG0;COfSA`k>f=9y&cebpr=2^bY@= z*@{nXmu3vsqX zd5lWZQ&k+akF{H%2{G9UjuN8u-C&RymYfS`9I5mVN<<|C8*sxcIT~}_-hele<)los z^Zb||3zn%#xk?hphnUt00jc*&qoS*yExl%zJ|ky}H7w&cwU2wI9`mPbaAja0Nhu?S z1hmPs0CdjPD7%2mK`r&dz8_{o7g?xw0_sNPM3L_?1zs@&Rd-u;n0Jul|?P2O)79=zD%iCS}eKv_|rG{4I^k(NXpIe6}!2%Qgnv@*U!f(`s z`=2dL+l}>p(bV($gdocR1rZTK3||4#tQR8zQk=O)^9G-ZQ~nwNNyiJ&I_Q80cjxnO zWxx{tXCeP3cyJWmRSW+m_@#XvSl$Hw`M@HBf7hWqfDFbdojXo?0U{_s@OjyRufyji z3;1Xr2d)7wFoeE#RPpui@sB84wiDb;(OKWJo?Lc!WzhdTQ+g)|0#vT2frrP)?lQX&*E2yX;On)-(K%=psS4zQ|Mgk$|wYZQ@g9uN+%>TQa5 zi{qi=r{c}8i1IDamK_3<;|T|khv~@@CWuh`MByJpj_O~tk(RMdDe}Bs6Ro5Exa7`n ze=p??V%Ed4L!-G=XOJT!@-Ufo+2({mBk|UFov7Rja^ zKsF%JK&wk|B0f^35kb{Uy?Vd>c~GgKK!qN)J1@rEE{zwN+sGOdUfCNvF>I10G6XpV zo1TmwX)ZXS&Th$}amD5re&KKg$Hy{U=E82u9@}4C$j3A`V~VLT)Zj*`vkjf&&arlg@S(*h8jKEw3PxeoOS>XYvtN6?Qh^H3glcfrCF z_jj_;Ne0|#z9^}8BlwqeD0I^AN42t^3f2g&b|BeF<4mA|zHSEguv}`a{M|G#Q9c7I zOIajqzO6`~fl78eT@Yx?BdY|EZatUG$v9K&2VmMsBklVD6&K(T)rHOEY7G z{T`M zeFoJ#z-}SO*!fbBdEvwy4mrSXmR6B|<-WXP+Vveg{Qb%#l9GIrz%D=bx*Iuk7Vu7u zc7!3KLtE?<8R)<9Y=gf4)YgfHMny=ff-|XUweVa0(!1tqt;uSmz|7g9Q^tuITl5j( zeSu$lMerZK-^?m`p$|C|FOg=s|F;?9;fyT+1Iw|aVx?7~Y=T zT(`{cst|9Pom?TgPZs&m|6&tr=1^&t;gbEpl(QUvOxHkFN2N2&-oKP`&2ixxu)|nO zoAt|ZFVL1Wv$pab>Bx4Z>1OL_%f_Nt=Nd_*MQx`A z0n#PPcBJ%w-q<3-v`B|Af1QaHwwS%6L^L4g1um>gjmH07>#-4B{cQ@|DXV$UkdTOp z4vUC14DR_PHC&z0ZUwdx@tL9xUM$!Sl{Io zfU5ua;-*&k(tLMy1r+WZZv_w@N&(8e zHNp+B9zpcR=fIcQpn8BI`4>gbH!^x|Rwaq(HyE0p$kK`|3)SEr;6k&c-+N=`RufhV z{{LBk!trq~Q!evan+g5Du{@G?4rH=!v`mtVc3g>8iQuyoI2p;JVD!A$k?u-7Wo|R> z<&r%6cd}qsY#}$~Y(6eL(-K81rC#-{4Ty;F%9en@`TX$E=q-a$#Eycoliy-OdR&V5wbW0TIM0*kJ#Pbm6`?T)luH2qS>SuvLwL|<-UFvh_)S)0_W zSXa=8-(2%%L$A=9xavpiyH}s-T|8a?%q*uPg&C4ch|=lG^m6)rjik^GP~_ zzE~iYu4ZKw1@rZgT5%fGA?C!xvk;Am>IkfMf;s)1QG;o$tjTp4X!eP3xkmcJY~JOf z+4WiX8D14}@-rdf41A8{ppwgwH$hj=Bff}Ij4PF**^BuaRkj_GU{5m!l~RdD|FGh4A{L9nL}P}nTX4BI zHwUADexEH(L2AiXsH_BBYG}|aNpHhkH}|Urq5VE^nem!>*}1#E-R&?)Z@qEm`Mk*9 zwRc%vz4VO5!k0}3GT0S}S&N~e7z&~&N!T12ukk%>ue$QD5OIJ?V+_S`TBF3#HOP+8 zQ0XCOg(^^inp<$FW7}z{du%GX7IB^}T)CJLp4DY=0-#fq1L8lO$g&DWXYgU-oe2>w zL%wOeb|eq|R(BF)aAD=q@)KkT#KFfHJjwfS#_a9-#ssYK$)DDo`PAd|mvz?aD^CDk z0Y#j6(0xJIJ&*Xx8%-rJIeG~9@jE!_KUT>AMU7N!wMWYjYRpDYj-rR1uOZIFdU2uC zC0Av6HYT7Z#;9Vc42VB?g4V%miO+Pmma2F2mf4*dS_G|xc2xJ5(f*RBi7x&YpwtpB zJqe6F)NFR{-);w4$*D%_0{bq&M4KPLQ&a1D`S+F_w_3|lecpY#d)^JlAT5n78ns-! z76;M;%|HXruGyToCE7+XW>@6KpYYz4^r0tv^Kg81BFrmQ*RP}P0El_YUj5SXOx!K96kb^&Fi|7N~*6MnFQa=cs}@QGmoR64RYZ zgA(o?7QAAFfFcT$ATu)I1s(uAv+*eU5oyJZMO{TXi=S_Z`MErKe>0=*cG-|9JYjcx zcpDQ3Mi2l$R3CY+axkmF8-Tzb1N_FRJ6I_rXz=h@pP?IZJf5xFak04)Ci`T8c{i-< zH|$$D@O4An4W4Y>vy1it!kmIMj31qkoiA;0t-|Ywk_5Ts25bZrbDI2HcOR$7T%sQd z2IVI3_oYKkyYGB|Un}IuMQMLE$?WAUR*Os=+u?}!-{GLF2%>y8CHqYfXf?1>#)+S75otlklT9UnUxGlmR+ouqIY2 zW6V0hb$pzcmtRZ!K9KTwegx2i=XV{MR$@`Et6u~hz2>8pTUsQVK51;uFy!B>PV}(m#qUkgJz zGAX5=OoecTD;oN!Po_ES>4Pk)3<4J<;VSR0&UH~Q_Pq!<}I@+G#`iaWG)ZMt@C za;d`GOIDI~lX?K73<(!H^d;wY8-|>`_9drjGbuc%llQCoqsEcjfnEDP@Jeo8uUmIm z{zLNEiE$Y8Oz1&L3IrxXxQ%ZJ;JiwI3<7QW&44%2FPohM>m!10Km(9})30w4aE!Y) z5n&Ao~8(Doz0jjA1so`BS#;cKv;Pw zTzc@Ew32A%L@fK;1#ta?@fKwK`a2caYV7*oK+JOF4bdB}&S4N>D!cr9QNR&~MQC`5 zkOS30plW!vGw@G$5A5&L0nCn)Pt;Ll=;3befyqXT20ky3yv3C~`?(ZzGGHh+@N+9t z`4ZdSaX@nmsujH{dx-3oVtTZ_r~RCqX5ns&7(*KqHc(_cS(JFKMOf535CNQ=!Z@WC z37SZD&0rTr{~#Btq{TtRxRwKpltz?vzqUEidwi?h;Abgh7&I`Gt!FCK46EDHj19F| zWBN|D4!%&Go=~}%&k&pleL>AZ))(dj`%RtjOhZ?o-;qYRcS2>TG%>Ng`{YrE#O-OV z%E3;B#e-Q^c0^QD-k07r!zCuIG}?4$Z=pL;g*KY*T7S}N3%4hb%pKAl&5`!wes(kC|sO5e};F7Vs64n5FgJc#!3p^Grl{u%{ zFj#hsWtDi}IWXH6>d6bBdG}f8`w*^|>M-3%5P_m{j%K?%*i_ncY#EmLG?TalRR}`} zOD!r&P*vg;rR6tp1=+e;mmFX+a~N1sD5NZ-Rgtzf zFYpdAQ@WMm!&ElO=Z*ecv&+-kc#0*O9jWxJ) z7M(x>MFt|nBiJLooRJE!^isS%89Uku8NFLFS(+~7uDeZy2#!2crUEPsGl{8DOgCco0L<%Z}Jl{;r7N5#u5{kR9WgUBL32a%;m%lLi4?rP}yTC!00g~&GE zz-%$ECT5IJUBul{Q2HY8Z}-{^eOUp|R2pJR%mPtsg20fFQX6$oy-g0~X1f_Rsh{Hbm9RQb#!Zxl=KLn9Ff1W{syjkSV^f8V#+hssLgR7P z?Z!fS5M&ejtDqi^U2jF`sw^^ZcK$wEa{h!PFf`<|taFH!s+iDv&j`X0hN`H*E~;;k z=+%FuWP_IEmW}D(Csg>j9wH-gk}$^kIHq9Fd8SpleK2R2@{@6(MM#egGoWyh3AS4PCNhdfH23q>l-@+e~4)`w{doTn|qlj1u-amR4IzWDI3v8Y6 zP{n$+^TY zhpbPh+`bNl3Vyk#%TpIE;v-L};C=EjUtbNsZ%Jx(_-}^E>W?>zS9{;Y_^z*2Sx5s! zpnnwu{I=CDuMY>yY3!?91V|@UHv--lzWC(4Io$no6Bqt{Vkj{M)XsIE5kOwCIkK=8 zacC6=;%9qJ|10(`XoTdI$;bqk<0PSui_I08&T^2-2j7)NsYF3cr&(p=#r8JorDpU7 z#7kvnRa9dcTeL6{(ru~7#mxC=#1g*sy42VBNfopt7Sq_$jdU35rt!EWIu}`CH})P| z#Hqp#5q+WjESs25B3+Hx=ac)9LV+*O5&bJa9U=C4nltGX9uw@!x#HTc6VH3Xk9p=2 zM5?2?(=CO}SnlM{ojr`uW(=l^&xhx(`Uslvm^ncHfdGwM!kSRBrxW`)vN zY#sKNgT<9V8tJFQei>!qNZG`?$X~whTYuZ}NNri@x9MF1*CA`-jFV~0|H2PPqI=j` z`eGV**#CshpOr4xE6Vw+U&hSc@^>wgBJZ}QoSkHqM&-=7ZG(9s)pBqtrmUN!Fj5X3 z(jMHm;fkc7XzrShbVhAtE2La(u5cq~Ugj_5T6h`s!$=>}pvZlF(=2xB z7U}cUKCX;tZA5KR9^7fW4f2g+>J1s-c^I4>qXt|c#n*~N{N*XlZ z3$b6ao%_+&kjOWBk!YngqmVA*Z<)@VwOu081^1xc zq+6R7&;oe3QUQ+QDXqj|}y+=ger)?gIuoP@t*$ovybB|J*OiDZu-HRnv2)Yrt44#mdVK&`#p4wvehMns;a#+;;Xhc#X(DT#rbP`bR8(J{+wd4=~9< zg9_?|?lUxyL&i$O@YFJ17^Jy(FPygm-Y(y=cR4D^Vxi656jQ7~w&6I_N)KKe2Ri?(!FOOhjU= zvR!%-Jiq>qYF~%%v%(?ova;Y;vacJ}*5G6Am`$(w)4$npSj;a3&DJ%#XCvvNd4j#@ z0b8qJx!WB``av-cFHG(ciM zF~J7uPbN4s&+Z26pHB!m=?NevBh-~ClfTae#`KDhNOCiyD)?_|v-7b01nE~-r^v{* zLjl#zT3KI1zRTV50^oSrhz8r+-y9^fxfVJfUx&1 zqCuDMlSl4dR5%A1YYxRn(l(zoiDO`)eBo;16Zur4$S?Kc{_OOj!6X zy9qT3UVv*=N*c*;msZv#u?kYLjZ+OV4-U*T#BGWEv4f>lQxHC$A)Gwl2@?a1(|4h7 zpM`qiFcHNG64&H9xN)n~P`<9ofajLV_Wh(d2}A|}5uHg@gN)RyNw-R+K) z{)oWm+8~A>_YB(>(bGgZWU~lOW|d`CgMH0H*~qenI+NSF!m+cze(@T6i*@NPB7~?_cT(Nckxe99AY*@9$;Wpv`Rxo|Ga|IEz`qDZth|#l>-9B!JKN66Mg?3_& z{eX~$@bhfkzwH=z6$NCJJ;2gVAcy!;2a%~C9*(C0krxMG44pRQ?98U7rUs}!7M-;-R|@pne+F<6v;I4X$9%2% zY85=Psw3Ksd$??3xSB`R^C^8~=z6Z9`YX;6UrM9NTCu6VU3(T4fzy8WAUO`Hue{4v zmZu?&qF+#;jjhdSGN*z4hHf2GFc;}c&&QzEO(q}qH=}s`T(x9=LW(T}sRbb?pUPHt zfwR6Q(gTSS>f)q{Tvn`VM?Ro9Z#+^*ENKsX#d4v(t_1Rm&N=oW5&_n6h{IBlbA;1U zxH_^XQUR8qYptlVl_{QTCSyeb%IZ&Yl1dp)+2<09D_OPzt0?ER$GTn@unZ8LV)5D- znLI=`E#JHgER-mBM(3z=c|#~rd&7eb5Aalr+1z+w8 z$aG)2#6?1v^+L;WL_U(gvZr9E3etw-TSdLH?f@z}3gV;`MnZI@@#?kWb(X^g=(AIt zU?rI$hz$>?$Wc^MvP?yLAf4mh^7=8gftn9}0n9CvAG3eMnAjMqoBP#sFphl--!d*% zt|lVO^HVArLF~|lof$WOP0E6%BXNB?nOTrp7$w(}Q(E0@Ka2#LUZ!lh=ukFh0#=BW{3~c6-s81&V|BBz^vG6V`N>~S9PtVuN5MgO*EUfBY0GBw?nHo zyZVM{dp-yScR9-A`2-b6JWQD5DRRBlI}LmK*X>fmk{NQw23hJ9$-b5I*zsW6SqTG; zwz|Bw;cY8PZ6$@C)3a#CDb=)UC)Q@);75PVia4quH>+0Q`@q>F!pl*M$;Nw8e~06% za%ZwkTVogK&1K?2dA+iS5dN^5c^tk99P;xXqHO*#^j+xSt(|L!AOBH(>nxc_le9LD zgvUFc9Rf5DXmHS_k-Q}favA$F&y zsj<3w-}-yX=@lztm$Y6~|9%w`8zBk>%N|}UnKND^;xApb{o|6wg2VGm$^Pn|0z3RJ zX;uWnel+e64EX1?@Fc#;h(8vmH4wy7McH0_&BT)}z7V?CSmGVS`#Lb>pE2WmYEuc6 zVkt9s{r6Y%f>?-9h!9{3&S_m8W>Tp|RK=VHgpeZUv(!lz^wgAe&ii^A3yyj_@&n(O)>fwRYv~a6@B$$Q;6;s zN6{nU{p&lXp6jFVTv{p>K2UhdWCnyFv5%g;r{@&xg`AgwnME zqDHrAJvIcnn}lRMfq?UGN`x_i*+rguLTojSS5uDoSO$F-C+p^!W2g9`{jse0phSiJ z`>h=BV8bjL8qtv6q^yNV+4MnXP**36`IF?iMdmcjg!F_- zW&>#ExY@frysjL7SOpM+*Ng+yZcqu5jc>C#`3J#F>rH`%S=WalZnCT%^iZ0_P3Mb2KO zh@)sK?i90=V(T#1l^(EAa@(>ef;v3H;9^)q?zCE$%p^eXIb^d_Z??s|DbYh({mK_VxCn9TvHVkz_ zQYof4q2qM2QMHITIm!EjjS*Ul<1VySbLBE+Qk zhq1KwV#95s-LavpOejW!nnkPkz~P~=7Z4LwBcYvum2l|0Zi!S;EwGkOiS7ucj#ZJF zprD`awrAnfg|4tIt8COT(T-6;uqEXW$7sMQta4s;C||$`eau8KK;K8}bagLuT*%ro z!Z6#9j8E77%2|Pl#ZH&+M~fv@P9D+h!CobAo`co}m(9Z~nw$NFO@_2vO9h(Ga4x9C zy`-e}E*CN<8Y*W88O-nRyP)684f)lB7cg+=_x^{{@9F3Z4no;`B@VWeg*JSc%dTst zyD*`Lrsg~3@%P*(7C1L@f)}2_0ZuIu7uX?S`+|vtiB!PBnAJ8qB8P%nqFrX8KGhG) zAEk38!^1|I#j08a{Iih78PU%80t(xwIV>wMCCT2UmQO?qb}vfNRmIJ{5}cynGT6m8 z*n+fu1p>oVwg*ezJuSZv^bqTKZ*+--lONo+S%Zikus4N>X7?AEmF`gls>VC@z<;CW zA;zeB4S3npk}+;8*_R6rmq(?#VYz0HVa2a;Q6UuJRE5^WANSR91bq(LNE^^V|>pemP(IwGXVz&tr62|JNRNtwH0d7sS*M*s#l?o%UmEO{debeMdm|y8a*L z{*lT1z#o&TacODip#h0aHw;rlaMR1=(>_M$Z0k%nQrPeKSC72*ZN^l*clz%8e#0pr z?1jIglq(W^$m*#T;UMEhICMlY5e5(^MCi#NQK_-y^JSBIG2HCOoN*r*7qIJLwXqHk zYM3|kvr+Aq=-WOd@M=f<5zB=WpOH@K3Z%1e6(65sKSH$-RR(v|C!*N!NFpfvh+t7ufF*42p#a4$_tsSD^gm6OA58EM zg$;qj$v?~A1PoZY)citaq+}R6cVmhFq~Ic(`hfynvs*2@bFG|9la|z8xs8)~x`QgA$UzDBkgMrd@tv{W`zu zzQeA&)JII;%AX79?!%V{@77;2NoEqQ8+K2PSAMy*KmyyAI##F`U7#1;8#^8M|D&0#TZLRMmzWKIdR;}-kn`Xo27A`9c@Q%yFI1W8{`UlN~d0? zI5IyF-&hRw!b&i}n8N65I~!7%;0x)Qy$?f(+l+_5?n}%oVn4T2uSYk0f)E-8q5ifI zt6_)6Y_ZtF4jy98j^qj`GpWw{Dqq4NX!e_zNE0b6r9Zy`3mVgL3P5j|cNlFb-2IT? z)^KDlLgIk69D*fhbQ-o9>KwK$N>^PPYwx)sLgKFLz`VRdfAN{-2;0GuZ8eSO!CKN~ zg@3)g489{JZxH>ehDHaA!-@|O_0*L|TQ$l^Xv@H)VT?cu{ov-HKEpgxC*sh3mE-(rN2Gh#9EJN_{SuJi*xZzR*N`J<)bHO+_4Jmybxo!bX!1aB=Z= z0XY9jog#>t7@4?%($_lmI!4+fVS;aOgbl)9j{wT`=>`r525lG<`<D_j^VRwafWHHx&rdJiA6~$s{B-v^uyOcj zpZFsk_}knTr4x~$+;t8==*B#L@%Vlw=G`L7%uj8CyWzk+5xrmU=+)xrqu16J@Zxn0 zVnLF&&`$31o63EA0{o}H+(12&+xr7rc<1*jI}jRaYi%_o7~URtyN3995~V;0U}j=E zX^;KaX>@ZN20$P{+(c#UDS6yQ@Bii&C(ZkT(|M=gD&$PMH08>gYgMu^_Hbtp!4bx{%zd>d+sGHhgg*Uy)~k-Ah#tT>&hcu z)i1O`(Wiy}bK2KB8+0e6<1M9&Y%vNUh#MLw#gABrm-5+EZ1ksIHv0Q`~nH=u#(|zVpaKr5O9{P20_%_Gezxp2-8~ z*q1LZm#eGT)hSeUQ%0bL6nXlYzA`}Ky%0?4ni{G55;I2#_`>Php29;QTdrKQ`P_d9 z{wmk!*0UdXb@vc;10z;f^L@JYEw5kPez23|CyP>u%gr^1f1tmN7$+%Asklk7J5xY> zORk?g7*nuj!@%i;Z*hOou;4DI5zHcF2Aya3&N6hW+rxP%KGItcVqbMnHOvNWJNy4< z0r+bPyp8@uv`r}pnYc>u#=nXD5nQz>su`&|kTmggReqKnp$CrSs^c~LEPEIbpeDqT ziC}8MV(e2{FnBI%rOz)irs<`Agw6l*tM?t~3zQwXq0hU-VI2Y>|N6q#G=_e%MMTwp|2p27~9Ye{f1-8zqYbgi@Pd78KyC#z<6DAOG^ZCsXO!# zO5nvT=adWD=;q#Ugp|QHb;Ku%yb=knC8gK5et*mA74CSk1E$Edm3g_}Xx_$uuDw~f zbTiDa?T{V^|4;<~@b2^z03Jby_Y>ZH{q7I$OOHOWegr$pJis`bR|Nii{a% zn6Znj%ot&XwHQ(8Z%I|Aef|QwI7IGL0j44PV)g52bEu&XJV(116tca=pCGG@X_ITo zdHx2cg%c(bQR14a@&%hJ<_QL=?Md7#mU;5A%%QO1l;I|+n^z|P){1FnzHZsoao|*g zSPr&Arv3d=5YN|17=MCg9_qNp z<;xPEgftQcKvAxRJ5i*j-|-gY6)7$psLmFRlvSd|JIT2&1{ zJy0}@T+{I*3mW4c-2*fniZqHen$(Iyva;K8G_2jbnXpV~BU7U>Nb|$#mtc!MX@+cy z3)IvZU~_^>;E7RUj?hk;jCDl&K_X2%Ofha68;CCOnXPcEByFFtw##6cBGaNfP@zg; z74mXuko(J9(}pZ9B={ug$S~Dq(8vpMgDKqhP$u0q72~tT(U=$!}al!?f?- z{9w2?<+z6E)=Zb%O>=X5og9vP9e)#as=lMWNdiVA27$<@j|(RLR5y4lG#EE~NrR)7 z#${?o+<1chctVT5$1?5J(8OdJ0hGn$&L}GE14VivXVu}9u$ldNPKymRXsXYa?9Y(t zFdr#Nb&kK6+A;eOReYMe&CbG#_~jGx?3{RP5Q@wvYzY!YW?;ge1Oj<>sN8qpi_GqK zgzEDMV&R8%tevtumQ)_)r`?Pxr9-04?27#Xh!gQWTw?^}Y1c6~MFhivQpe+SE-T~< zLEoRyLl2}EH=XeH3Drle_LmggDyoT=wU_xy_D+zZ@$D^5mh4B5v+-3i-Kd1EVQ8|D z<*{nLn$jBNj5CZwtuQ92L#}g*!~LVM+J1|MJ5?m4)B|^6L0>cTB-SZw@9CZ z2tF0m+hdp0W`HGB1#d5Z_@RDy)k5g!<}lh3C4V{0*kROfM3Al;nNp5g9fZs5bLe$A zqT$8lctoH37_ohg4oJx>V!ZTAe9+v1>BwLN z6j-WChHy0LA)qt}F0o(H=HEqa#Z0Bgq~kd&OzwVwzquFTKNJ~>z66^?>jjtxFjV`X z#zz+#Wqr-B`5HUqS+-DU!u}ZF<7v241d>`Kap^gA;I1k;>ic5sOLD!%E0I?JD8G?Q z)b?c_w~PM50KByLZk$s66PPe=#BXqV?BM?Xi7}`9&OP$&{iN=Lt(b538=sTXTr>Rx zsW&i7XHkf*(5dx{WwT05Q*`P29rpHEW-nYB9v(*C1lRj>F52r{Oz0zGhGhH29LAN{mNHWQh<()0)s-gAQOHu zf8lF|63p-T0aR7v^;H%n8rWMN+7g{CIr7B)1BqJ{Jn&ed>rmeU@V#0YavUCqB;_6F zwRZkH!jYee&Xkp)EWRV&#$*gI`3PL;9zwCA?0j5ATdtH1Sf&UvauxKs$#gk^+E9|@ zpoH=w?BFP$NZM4Hq4FBS@>)Yb!)d6z)*4E9ABrR$>es~DH z-kvFeQpDb*roqjx+OAk3F7pS$t{t2o2^ zH_?U2Vc_EpSuyIsSo>fUHc-+7Lde=!a)^r?quorEA8korY%kBJi;l!!`DhF@~zeOlk6Icp98&X}`p>IWw3H^W9C* zvi@*l7*dPb$7`~1M1CIL_wTKsJIqY8<%5Ep3`xhd*{J(CW2gtkaFlCkifTJimXHIQEvJyS=f(N5VCnAnAd`Yu0p zke|o)=@JagoLjO{@Yid1fBs4GE%9!*jYE@5&nO8#TWtVdKTxp#3)X;1&9m{H&H1XD0}>$t8v&n%Mz0N7ZAND> zc3cewR5d~yOOaeITtwvZ%0LU=7^V$cDg#4yowGxvr5MfxE<>^g)Sef9M{NCQvjV&L z#p4*8aAk&`x~X(?ey>3|Ut*>k^Eq&O8-m3;1@G!mD&00G4=0x3Z zDs{~)lPPAL)|gqrFlAH{9ALc$xzWD|2~ho3#J@j^Ssqg6uNkcs&JEkSu@`WMc~m@Z z4m{uFh`&o%*z>xA^P^r$u0(+1$0&dnKZ61T+dkXulY+5S`bgKDW}yLlefPC~(GuizYOKb>RC9a8zP*C(PtOl z{1>WsRGDBg(?2su&q##*&T^5qf(#^#WT3p7dY=X}zgU3T%L! zit*Mo**4@f)CsS(fKGw{EmrP0=#t{^x z(yQ52bJAYWZj38Ot)TtGqB&Yl<`{iUgi5v$T_K^uU7dmZsTj{0QnTz5BLFhfI)q); z49f(|B7w(fdo$wK3mg%3hs|&0FP2+*I!2JOB1juMIoET$Z2sR3fAy5NBDPX)WRZn< z^6Z6qy}qLh>DSrqK4&;Om7nL(4$EsniJM7f$i~zdLz#zZ$K#r6%q`3-3GQ#OPl1z1 zu+s07ME{y8{yH}8zvr*a+Q5rpqL*#u+5ltorIoUQA~b6R3o6OMKY72|A2&UJ@oQrq z^-TP2JdBcNMr^cof#z@#8QERcoz4jj1+5H!vk+}jk(R4zU7o>4{U8{1EnEnRg6{P~ zr@k}2UW>|NZ*a5eJ?EBag&e0@wHWIl!xk+Cm^l2_{qfxI16V%hgW?=PJu8({{yY-&|}wq!qlaCyrzjss;U5fjmic1^4^ z@I;m}gWIt4C6YE>RipH4neTHZrg}v4zA);mxz#;zeAWH=NX5Nl{YJR^Efv6D$X7?x z-aPLGeq4BM@4rmvu)e1vrl4nGWXM-56xV$-w_DCo8lQM!yOGcENQ;T{+4J;&(a&fY z?n4Z>Y5a3*sF*n`au7&@+_@@l97UW?pQaklf%+Mi0PNbMGJy=`d-gi-;P(LPKSY1g zNHzR|4OJvF`@&WVsiw$6omTIKFQ=EQg^(pQDfj}>lgA37)51E!EP}prU?^g_CQ$nO zXuM^SXANV(`dY(0481;EWtECuJb9F1O=FQ9$FBXFf%3pBOR}cS_?*WXJiXgh7E>NW zx_3ey$Wc%=318aq{bY!j7`y^qEtd>RMWBipo0>6R^90+dgXf2FbV+p2r#rM9s0O(5 zL0;MlPa)b}Po{v&(wGX|i`lMg&QlXpXLX7_s&59`#~gAe zaf>%W3&Bf*$#y-}U@%E&0L4@uh;|Czg=wG-bX=^i|2P2eDsaocfq2t5bal!T&yzD; zr4nGsBLvPVLZ-czNi*f)gGJ4vXdP9WbyLt`oLaYLq;6czA+A2UD%)0oC*jf^;GlXs zEVj%~TWW~Kah)Y|Moi780vc2(TP&|s#Tv$<-iUOeIE6{&ApgBfI{~%eRb;-QT8XR$ zlcA(^h4zRQzkY~u@v!+^eunJ{_}3*AH?P%_gB(&y>inEI{jT_IXMxjWiUgoCV6S_CLXV8Dp_%Ui;Jln#BE)m|=;0|y{*s_Zx`vE;%) zNs`$2FXj(Y#SY&aIUL;lrvF|bUJ3i` zZI&wiJAi$z+qcU}ClIjs!;%>K`IPU(b17L%@EJXTs1*eNs&tJN1XBp}YXGWD3&eq0 zs3zTc)!_W2 z`(4LZ#Do_n3SDJIa(NpRe4X+jA|>K7)g`)*)f#1HG332LxoTck^T%roF`;Re^1 ze$g@YzU3UisK6b?tMmO6YdPsqI^G=3*)<6-KNh}eJKn+}lGcS89JB8V~7f|->SgqTP zj@}P-z&QyEs6pQUgxb4FTf9dU*&LVh^zqRryZTY2^|E5qd*$fYE<~HZy|wk{a+80n z%g2kUd(+P2{goG}%%$Uo_3sKC`PDuuVG_`!(lnFX*dD;t%Nh;f9h;dmrg*T%IvMd* zZERr(`RYDBIvj%348^zObaidrZx;b68|OEnRrDm5=1~ zG)ibYp@uwcdo{C_nlNG8z?C%#i0}~&n?#}Fh+NnT2rC3@z|C1*~>1tiY5zH~F z2$8%b&^{=T5vcQ2G?djJgd|#6XI=}CTahZC6tR58W#z&g!sM!~(@t>+(IqeXZN&1G zv<+PvDOglQcX&%4OpQ!QU~V7O?Ou_GK%P2JAfZB!{;?;=TRilDVn2+jSfMUPBiy#) z;WzMY-=V=K>8i<5YVa^WHrK>{xm3^{Odt?t%=Pyd*MrEs)>7AO6ztAXO|c@>?O6I? z7D+Mz%Uf~PB?(BSB@Avvfc2F)(3K=1sEj-~dEl;kZXJlHE3Q&J`+bB}$JJ3r#@R1Z z(I@>QeU;{AWnRQl5c=;tkrV;Rtj@u%Pjl>z>jxxEYfzT{y?vl%fu4W_NG@(*KG8vr zPxA%wBDYAn)Df9naQPh>n!RvoOkf&#R=R%?9B&A&I{#nE6BL``tlWdB4ZvHq*%-+6 z+2nth%hs#0W;<}3>czVRG}mb}#=q>i+m9IT{t_XSmm?%->@m|C8LE$T4{0bc6YY0n z*4B_KU0`KL9%G_3mqjq;GImBlz{!{=;{VzM=PyFsD&8Q9B)K0LoEa2AeO#6zn=QLr zmOsJh-%6^{D~E$ngtYiIG)+I=Me?40j!{PECh>Z0x2tzqUKgQL`Zo@|4PV^> zeyW}6DcgK&omc9r;JAWjhG+T6S7jvU^HE2VMhj5!g9KHQK$4sicii1e!la#P2RoOu z?a+XGesr1~w@5LWngX3VeWe-%O*0#FgdRJBKLnaok;$?VlUkHGDw)=L5^EV2bw{a` z(qBP2{2$Z1o+`nk?hY?v*yK7LnXjpmnfn9Rm z1$H)Q80kZDoR|`3F`;rC(ACknC?6F(S|?j$d@DB_w5}pAfvdd9h0URu1uiE+eq9~= zAMf|yd_CO4+U83K{vFrE0j^Qn`bGCxzd!h3bbq^}F!2)y;ZxmIXJbZ7`=5)ij~sn^ z2D}GjS@|+1Q4GE(vY0N$z4ldVEYn~xOh&g*0aSwBWg$*)vAu_Py`Dv7DFOJ5*{-@NqaHtGEnNdWeodSQJz0eok)F; zW2m)pngdU1_Sh;{FmxdIR!dO~uJbMKv5^TsJa@?6g3PjrLdXkjNN9R~hy4OV-@ zT(u2J;rz>u>jeo6*%wfSqGrP!Ls;&>Y0U_vDKL6A*sbep z0u3*d@uFqedLVC-){;JjM&&ptQgBnC$m|MGDh`yMCe1hXx>cny76vpugRI4QJf0(L zP$0$e`Uy@^u9nL%u$I^wnL-YElv5#@ZCWcTF{|lS&~8#=P(t1N0|L|YiVJK=r{NuW z_Bsg9uVeww)7z8=9$Ya^%z>pcku&*z3JN?^2G7a+PdYljVl2mDrZ5-C?S7ct?=SL4 z&)+w}@pdT2yfLNjr_d{)ZgUTKKL2SUL_>3c`}Oj0H4PU`*2jX->G?PE!WF-DJK9Vy zqaQ_+P~6;yL9YzX9A50+DwQ}rIdF>C96A7tGJ`)thh z>*K5EG|kSSp?=8xBnj>z)R@SYqsfKlQOr8Q{W1tzg`E4*`bgVRl`z zV_(OVVWyrss2!zBmkzTQX3=a!en)jpoA+sLKOdG1C`52^z z1ifFUdE{fxN0q9rojTEu*KDpUmRC~|5QLXY%pp!`NkW~mLqt}e$B8!Rxde=D4N&eT z$}Y}i)@81hG=_?X`d5NetcB1*q;wR zcio9)f?rd-MGYgefcVMlFspddwPM${Vz+A7Z<%*SEC83)2?wO_!M|MF1Es7bx$3`J z_qA3JYWD}Y2X8MA0Ik+3aa#bGCh59r>*&}A@pzj%$K%!dwhPUs$IW*mY^OZb;$9Jx z^MIG6sbN-F+BSLg{vKnSKDU3+_2c`qyWdx;ME~%RvR~i8fJ9^8cJEf0-e|@rPzjvp9Oa-%) zfB$O#`}Rm4cq?>iPpL=j>aDb1v5nYGb-PR*D0ouNsoCBxBDqQ(Ijs)Ry0@}tLK{*-qUkUE%^2g0gN`;EJZ_Z{2UUrF#C3Mzv#lNgS~Ld;1%73A$HODPUPkTOS{?IejOqSZIiVx`T9UjJN1fFVu# zYX6!?cSA*H=tqWabGSb+pR_AN;~$c!fkW!vfy6VIFHoB! zQ^OOnpbJ&bYYfYIowU&p!9A#C+xoNg``BAqaDET(3<4C4hb-7azkePsM(|O$V8y;W zn}Xn2g=!M{o@G)S$)u$}%Ms5n=0zMR32#qgk(1C*f$<757a2}bNfr%55woQazFXKy zE6ivR1MBCK<77?5yZZ}axV{LN)dKI2m;$Ckp{i}e*LpF3D!&N!SShgD`Oknq#Ra+O zO>b$(4exshU;Sq^d=eQJYu6Ib|154EQZDTZ8=GP_LI7=qM>o%sZ;o&_cGSwI7&2*V zX+c^z5MQ%buz(baCz)OF^^2i{clo1kyqVdy*-)fVi>}J zFsVSPo_>-FjY2H=4Ja9~Nz5~oO>Ag;KJ*$EtHbnY6S%?vV^QW%A{j|*(OduD;rHK~ z)(cV-Jj8|t*Dhu|!zhFPe*M{+veH>DPa~s|$8!Wv?}g+zu88KvinN*Y^wi_H*r@w= z#Y`$x>?kLepYQUd&FsYU*)H!)+L1f{yEwN!t}Ax*^3WhBP<7Ka2N1|ivu638GsNoj zlJu662-fN)&Bi%emGfrbeQZzKQQfqbVasEeQ`#|TLNUZE$OlOv3FQ!pH*&a&s+`sM zD({-(85a&>{t{GZbcQu4&q>Y31th?%uvOv+&(eK+c>Zio0&l&@J%;)V6fs&QWogVrORNDZ57@@Oon6V z(s9yO;D+MkM=+^Fa}FThaGtQ2 zhcm@nJ^1tJXNwgt;1Qw9ktN`Cn&erCPa6sb)@3$+)k;)tHZE839G24Z8stkK(85$O z{*n*(Q*Dfnp|~I_{+R_Q9`$*?EuqmLrWht&1-q1qv(I+LTFXTZ((e!8%qm^4@78X0 zAKMH09&Gyhap64P0z|}dZhQB(&AHQvn3qYzFQ1L7Q})|I+)CbjM31dgot5xcriVcq zHT?Tb{KFJg)(b;TZ+_>$zWq38bvS;D`XqCd-(LF6!j30Pu1#M{Y|xpz^Y-r6<+04| zyL8qZo3nIR-hLLJLE#_%KsHiUr4r}bbe7QE@pNUL0Pz_GA_GQ(1gU&-K#KxL;0bvAWv{!p41uXH{G+<>D>ys8%BzJ$pWGZ>9HVa4Vs!C*tdzF|pL-774Q?d!NZ5%l z{K+rFCmCHyJLz$ZZ`)uwq9W1*)Ml&?M*9u(dMe=7p}wJWA=v~p(^V$ps1!^`FokFa z!IYLsXbsFCzw9c1+y4s`;V;+iS0|&=0LS+g7z@4GC%!f9knBFbx{qdmx9=ymvwJ*l zv42{AxQc@UH_+9+9zULbI*h_b(35s4{Ta>z;*a&tqS{*O-uZ&!g}!g zHhXa}Y&ds=r~&?H_5w*|1KPe5?o*$eH!c5CjyAdXQH$*>_^L38FX28RW`ZCS=8s`{ zYO1|GQbR$czVW&Nci(U{?trLs=uMN}#NhLS+Dn|()_ zwTS-p{GE?kT*aGc*UaR{wzUlFB zL*5O{`d0qHm#YqkS^t}v>-QHkSJ-Q);))oXe?OXiipTgy?0vUCtZ>gjE@IIdu=|*` zCZ(k9q$*Q@oZ#f-WJGTd;?+HLjm~}{&3s)!q{!t+C1Zn*_|ynNMAd_oCjBbJq} zoXL>d+v_wvGMHNodnVdE+#fc@S5M{)^#Zb;PZ}!u5KE-bJLV};gT9%oM0S-}o ziE^5axQdi@COHmmfm4#pskDW%kgyb`uH`G30nhG^W7=sio5cy#;S(^i!0qZ5u}M0g zxwB=0z+V92F`?>DRm}Htq177QjZQF`I=;k007!&xp&Um&MxoC}`^ij^q6 zyPz#@;=EOOQ*oZiz|Y$^({=5^5AM?Uj_CdV;u~Pdnxi7xbZi2i>!|0ik|nw9V@Ub+ zw0)&wpY44!yQcHBF5YbJY&-%6Go@+al!vA+Vyv+o?2Tvkk|j`XncMP)Z|nZ=P0p*W z4M!)Rox0lSBxQO5564mFk9+CY$4cVcq}d#tr=O zTDRr${=29NkXQo&0Y^{>vxuiC3;a;~JbRbJ+h*06|CPqNe8zV{|D57~+Op;w!rw~( z1^h2O2#8L=Y`*!i{Tlj@Qmd=4PgI-jhi#GNFobK7b+a6$lz2KE!T#e#`cZ8ycc70$ zNFhNQ)2Cu>*dzA5T*B#!TT~4M&R(i!LO>tpv&({~>M-#sc_*RYQUvAtYa%vu7O0g9 ziO2>C$L3LNa3mjBgw6WPE;zh~3-FNyP{V$CKOoi5h*LHv z2aC8JbSHro>W)A31g4#MmBE|aR4rCRgHc5@x4$Gly-n&>+`3V_V<;QekmXAU1(?@Q zTenK$?|Wqv>O8$lN$xuk8v9-*ASoMO&Lx2tY{)izit02jH*VuOjv9}8y+Bn&SF-c) zj5@S08hjJhUN*p~<^)c&05UHC=`2*f2mUk;L$uD8BPC*=Smb(d6F=maFKR zd+|yCC|sP;SJunlM0b8fFcW!jW3j}MECQDSJG*WOoGbYF4beQbM7KrPe6ji>Z8vH1 zMzT3;xAZuVd2tHI#OG{DAr$`>nM1GPJ6NYZ`T*@gS$f= z2E`hLE-PIT>;>|{U&1O^4nkUu6>T#MNP9wYeC>?-nz0#A8bL0~bTpYT(ki<8ca`jX zjiv8o9`Gip_GOd;u$d$lJtLL@#Zxv(K1f&xoZEv^iaMkBe-40HbTNRl{4t zimkPgQ}2X4NEV@&-1f(rOVJH zWQ}aDXVr2in!O7@Oy}w7In&SMAy#-}gQZz27frak^B|2`|9Wx0)K4S$GrLJe$|jGa zDv+56w=V^^|Gq~U6KA|p!A_^}B6g zl5Ym;`x!D~wo52JYCkG`C1@>BeY*m4c}3~uNips{2n+eaTk0={fN&Ky?FQhG&hkGpkc!^7ZDfOGr_b(7@Y!KZ845#K3Y&@8xLc2V)S47Ua;()`8OpS6f>ixF=eFx;0iCYUK=KzRzYkOD8 z&!L%z&ewDHuIE=Lrd$D07p(Yy*H7u>rAH9Y^|bx=pC8D?t+>*kgtqOT4sVZN_g2|+ z?gS^Z>oxA@2lO)Bg}-!GrU>u6)|@?-jO!wTFZV~D8BOXfE32dQq5Jl&EJZM)<6AlN zHY#3|Z{EN^^(;G1Mgo(;hpF=KF}((}Pn@yj9Kpz~=T;IYO*l>jNrCV$uBL?#Fcq z2M>NBl04@~3&G1owKh7{P1eai=2>{fgqA}12UeoDM5EkAHSy1@@Za6SQJNBA7Jq6N z*uw7m*6dUd56gRhBwE!*11s@wIabfnd-yuPwJ`)wH1crm#oXnf^84QchuAkXwa^n` z^x+gbU~ZQ%bV4_JYy>{ou;2+o7T}dZbx#P!`VPS?ar?KhAO^R&Vb!OyNq?RbDiNDx}_{NMA{TPx^TKnM1iXI#j6;o z1%ztgQ{Fh*7jCxpLJe(QWSg6NVf;+L{zzoGC?e>^GFm(XsfO`X6+Wpk-q4@#^ub;@ z2;RWSg?(*JEGkrvzV3A)plD}pPn=4;{#{7jdKm;#wgGd<%}v@SCK(NgTa_y10+n18 zR2>_D+DW%l27mI#?WcsHH5@oM zqB4f*RiL+H{h5i|OsO-@p{dJ9fh&vV0aN@aAo805;`nzuO;n4YGzfhHt(vwOOYVDg zVZ(Oq%7vzaNntW=C*c~NNlc;iTDk1LZ|10;^q3xvD;k(O1VP+=0gRwnleh${2FLcy z1Prnlp&h0btg>WQ9x|5m#^mvH0rDPifnTiiH4i3Cd-vT=;0RSXTs9m&#v+R=Pjp28 ziiu0iB#@h<>=ZhT&jAfXhNZ4FbDH2=QZ$J=nuKGccUDS!1zRfS6yE$b3sf<#AHZ8> zG^MSOnX;6#?T9U{%qjiBsq_|P=>(e7Xk+7-vcFk z{Z#Pf0YCtO#(NBHrg--HAo7~C|M2(m z@$Pl{3o!dqI@fi!w(peiH8j>Du1s^8m|DXA#?f$X8`Ro%bidEIOzgU`6@AcobUKgy zdQdw3&vnf8yMfH@e$4_BbJu?k<$>-pkvH-`)e}moGX`k6Gd>`E2i$Z49Dc+41x+yl zB20UY7t8&EPMmZ1k{oRapvVZ$bn<{QKG1RoKu0%c1|OOBKuiLN1ioKvzsKahom>3V zRJXbR=_%lypWtIr(QQ%$GAYE%>(%Go9D0K(;TZI`+tH^)zg9sCc;rJTCRB;QX_t$cg?+0K&zQ+%)6n(_IVZ?^@CAMf zv-Cqs3Pei#@;r1V^b?8gEJe0oblEh&V1B{tVLE=}qV%ZZ3FoH^+$)2t*C#6Q!;BD& z1(TCOC;M{DAvP;(93zUVmdt|j6~ai75&^;(efC3e$ z;NZy>+0N$i=5RnzmjII` z#*JbCPqXT3+U9j!|HzGAY)+z4O0`(Kz-U%;)+T+pv6AL@WP7&v85qj;pN;G|R9^3{ z+r*=rbp!q0LITC68&$HfB;)N2EI#B?Sl{g>%G2Ie$>@cV=+zg&Ayb4gmtZcyIT5bS_K{G5j||ybUK)h`ra9@x!MwB?ojlZ!TqZ%p zD1xgWv}E>4MoZo{GM|&2M~vjcC2*sVKT(j{15?Ho5%U!zQCUbL<^qv#z%@(lO|M_nXXOGBY|>LltXuPS2fKWx5pj3BHM=fpu*#%G-5 zY5qP?rmciVsK&SjwuAnc;=_A^00D3 zg~v*>ko{wrV=p)iGS{PK06h!>by0s?fw(BuXklhb$x?bS7oqFI#=A=-D^bl!RR-?F z*qz`AcwiSdsb$<9$YBW~cn)MsV08sWOMVsNH<=4>DMbHTEOVq86%0}0@7$x*+5@65 z@FqE9<0NLJD3Kxrs0#6f7?co~_>slPvTsa1P14Zu8VSx4)EmWAR8(-(he7JLT=jGL zxL54F;)WQQ642TM?c8OCSBn>KbUCqb)At^Og*rXm-M@Ofyoa+p1QIB2+)Wh;)3wfGdkU9t^5>nd$W2`Ap)ax-Y=*F_wv)L(Y;bNVq?lNZ-Tjj~DR< zuRcPpE1P_u$SUfbU(cE;Jy7v3c+*u(D$XnTA&&S?SHmjo-1Du2_!}3UU9J-(oi#t^7n;$uz^e>I%F+fto_)JjC-(h$S&Qh5Z2^PkEGd`pv_Q#1sR^5zks82Z1R zwsrOPlw~Hm9Xu?Q``$bp`aV4KQwBsX0hRU5h_2S9W&3J-tLIm5uMgyuD^Y9pD@nl9 z9&E4YYmW8t4YdC4?zh81zQ1MKJ?@9w-zS~I@|<|=9iQ?fm)(v86fpZnbzJb0`15}V ziZ}_f_!XV|wB}w|e*cB{m)Uqi&^wfAn%u+i{v7{4Y!ZdtOz@sGSy-*#n&At_$|w{0 zhX`@shfT;UA4kU|m-t1}rEOo?D&z4YUHBvyLFt)`Y6M>q&hJ~B0L4FOHtC5u6zAd+ zi?8kjR--4(<+X#PcANtCg`8D!IBnEC|)uy0K1l&PyYTGwj*k=>m8Em=H*52F4 zM-JB_V1zpQCAuWcde}9L1;6wNn2U>%;_^NRi@?ZV9AO1f)n?4Z?;p54=S)l;B6SPW z!^5)>q?20cFkzVPaejS;<^9Y3!)GIuari;wMxdYUVw|}ArxhOC(;QX(j`9&Z0kd_89qtE_fP5betR>Y_u z0AXZDcXw$^SXHA@z1QP4JzxML{d35~gn0GzZewHf@8xaUai`$ftq-bYQ5HqkXm^sv zh+FW6X~&@EfHpcp{{`$fSU3a<+Yg32H5*6F;EZ|XM8a!Bu&&4rIrJA8)ph55oI+;s zqUTR>7;!V?05*SCt(gP!cTmf0^G)r44l?McI1doiDSm{ZRJX;-RMy2xEyXJf^6%vPOI?+m9Vo}APf6O|+#()P#OB2EUBgzhQ?dG1n) z`ifLQkB$B%bSz3nalhY ze*nKxt7ZQq5V{1UQUYUDw^6dj-dHm2Ke^{2=ZufOs{LvIL*->(`E9NV-w|^5n5Iv(r!Y;TnrC=1oNoO0 zMm1o3#{ZV{Kbsa-*7)&xN_Lj4)l0$qx)%cLXKtXy`nKu-xU~EPj9vaEWCC69h)YR| z1pTjmr{0zej+WyG1kLpB{Dz!-GDdG7!3tFs3X)0*wnvwL&A zY+W4m??eH|>Yj0g!(2GG{{!TXZx{5-XU46U%ov^HIo#d<4=Mz3Y-K~&wPvqzbPmDR=GzSLtE!5u07=2MeCol7y~-w`P%!Bt!t&b9N2O5LS?2e$u$ zAJgOlB$GxcAT;((uVX`i+~DFTe6tiSo182wN$wYm-*Q`t`m%PJiyHVe?dW;GsbjEU zQoK-194o(L*-7+^R)2`E@voX;0S%anV^Z7GSo8sm&q zx+PYKPeLE7D2wQd2s0eQzWlWbxLAKIkp94e_=-V6vGxlxom{;8E>f*$oKW9FUeerl z8gCgE?HJj&PIicbzwUBFmbI|~I=%(X*S~E~4v`AO4I*VzS9l?dkCK5eL$x56CpXlo z)YGMCs$&_=`b#iiCei=x>H5P*=sVj)dl69`g2Rc6%N>>cn)%}GTpJ=<_ty^vEhD?m zCcWEkfd1IO7cxm|{l8FiQCzAmFA<7Xq)Qv#3Sv{56Q934LX*^XOsy@So$|me0zYFO z2l|&-8XHBMtCB0Pak=JQq6SGRxY@u(k(D7; z5@#vk0_meWi7b7(Ofo`uu0Q=1K9Dl7ei3aUz4^RpWB)pCKF__sdhm zIIfS%6=y9(L3~D0auEf#oaV6`9Ca$l%p4PuM7VSbL=c&&?WQx}DKnWq@y#%dw^ygU z#0?((6yP|@5~(HlBH8wbJ)13?u9mH3TsOCIMybati=|>8)+%lkYEDoKDao#R74rvY zDw}R0^_TH4W4hFA?BCNcCUhsXCMxJ9YEo;Z#KDX7eyrEbWyWjB@(9`#$V^Z!Q98G4 z=(X6kg(2(2wZW|vG2WI&*~u}~p{bK@Y|u^-+Czku2+FAu90?a*wJO{}R^Py$5|RA6 zd581LI|bwjBa7tPGENx71BDekx#S^|hjMWbrq#h2N%mog8j?;;c;z!Hy*&CKD~pu0 zD}4HeV@$u#$)gCUwg+F6E^NTGV%893aUmPOp$*(E?1=2goCi78)jzNTa}kgbc%YI$ zV6yY}@-Wk3SpQZ|iA+2`bjtPDWy6j8)N5O&&UdevP%v_qF^sBq&I`<&>7}g%$|w!x zPtCMUy5FTDO@7aXuXKR$zMoH)^ne+cha@r+JQGTt-)T}Az`wTwr?)27>|%PL zLIFe7a*>bmi5wXY-0>NSGqr4)})F0TW5~)3odTq*p8)RR^RVidya4~Fw72sg)m}p3(LIN6 z4}TS3uO`xgX_{xD(U0CRp6VQp>bxv+8GpO#LLc15f7x@qa;T&nNwcG?>y#ZNHsYy) zCR8tIv^KPp8cW+F=h2lC-!*$@;i44-^7!&fni}7?xuHMUJw2dy2AS~c;|?Z>$oKi? zcm>aIZFm$83GiU+R~#8Z&G#;ouPoJ&Da9?^D(gw@L?sWlgUPb#g_hr z@!L6*K<&e1!W)Nz~Qw`a#1cYCD=f=W1l2}^ka8@*nUSqJVd0i zG|IQ*O`e&g|3PF1>Ou__8y0pkI3(+d>Xoc=m;J$i5*zn@3#eoJ@Q(T%dX2ID3pun3 ziPmJNW!=ycushyFJh6`>~1GW8y;!*80CM0AQKP{e(Udp*IrRM)|1q!d_{dE*|0;wI8VwolHMsfYX<&1e1oe3t znBc3kv(3R!+P|E79-~x_vKd9d%FSDIrRlW$(9 zrb1){Oj{dZ;<4);$L36jHHEBgT1ud09>hh>RC?jfv#p8na32<-&qHT3Zs9JF4I7je zl8zxh8gWey-Cx01h_4o@y)kSvJVB9VS;Jvv zGZ7Qc(=TiviHGGQc(~9abUn4y(Wm(Q@R752lwBs;If+Q%Yo`<8vNY`R(fJsqbRxLW zQG$xTw1cElH)y#$K(Vk-ObX+sY&(SW(m$}zNA~n2q)81CsA92=(5Th|3!Bor)y&Y@ zq!W+GLN0zT$nz{a*@?FdH0zUGjb^2nuR3Nhlzur6_mpAeV40+HT23EVH1o^s0eO|O2XhPQ^- ze(9LI`X>m(0aIGS8^fUAENB>2ORPD7yW#t2huU$UWEY?Ho)oyAfR`wMqNAQ zqA&EKFTYjEQt!ENL+>=rgBO}rOYIdyd;OhX{yqSJ?c7IS;k*4`zbt*x_yL~x`Bt|%y9^&;kY5|Ys z0egU+Q+A&s@E<(%=Lh5K+4<{P#{Zrs;7C2En&!!{HvQgdKU+477upT^eY@YXCW>_y z2r#oCQfb*tWBEO@KRbJGA-@vjxZU*K>|DlwnsS+#&;(){ElV1aAYHn-qh^E%WRUY+ zz3rn&Vb8I&3MJSL3Ufh1!E&%OU`z}Pc5mip7-o8Af;6l&*crG#;AKj0(LN-Au@~|9a=EdyV(-hy zAeZH1Ow5)OA@+MR>~d*fy_SH_qS8Xbt3jSxG2D*IcOgtOboS_@g^mQ)vMyCT(eMFS z1ui%`(DMWfoUPY?sB(jBN?u?JpL8Q)45unWgaluPxt&gk(bD$~-FXu2{4N{fgJP{i zfu#xMRv4LJXj5d$g)S_Sfx^X+c@p^(0djCtH4HDhRKKUUdtaUOYbAbp$DicLW!%;O z5y{JQ(zYxRLYDq8UwkskF~SsZzW%qpiVWEE#*703y3DfQE@}UDv!W9G*dv_?#3cv} zQOKBFpPAc{Ryjjx7rn!*9F)v!LHv@3B`l#7S$qd#ST*z-ICJ15A(^%U40zeKd5xl? ziW0Uz$1ti<@Fo0z+|*R*SWbisNxgBjQ|A%VP`LI8%B#M7pxF1cj|q>8#j?zyD<0%4 za3=|iTib6eYb;$&6Zp)&nZUge-IzG8|Mi-rouuNo*e6wX){O28+B;pMiUetAc`h78 z1zOyN_fE)5D~~}EW=g3I_Az7hTi?<~b$%l^S_&8>5&-_i?l|7q^ZT0d@xo`qX@Xf+ zQFK#gTNOToi^IH z;FT!0s+bP)jv}XcJU%-2fhAd$Vyv2cVBX_blz%z zc8sBeaF}zUy=+Bu{9b77h_?Xe%ZJLnA2xZmj$9N%zdwpoFVNRWxfC?Tw4ye}*%35= zk-ogQr@oGOWb+)27s>;bZtckQ*GWq~WYsRTiFqkW;lg7Ua`usaTBFz8*m zrO7*w;SDo~@qo(JAI5P12Bp_e7slINyS?2$knGOR;{LvweBu3b(YKt5o2)ZITxT+K z^$Nz?239QI{XAd_`?!!-?Y%=q)uz8%tqwZ!wi_9bRf%P@^H7ouY-B69xloG^-ySY{ zlRKc+rIRu%a&}N;`b!qd?)Ffcx6>*#PiZM4A{^~zJ>8Qcyw_Z(SMVO4Ilq1if*Erb zVy~DcD&W5ow!p=oH)BgX<1*x%PgN9hhL)~~Su66n@A836>!D(EA}C}B(xte5@DYQ6 zbUUGAvQf0BK#mA9qGeX-OE;nx^J5WR$O2vrer=CEoeOFR_P}?}B7cEvoWO<(Oo5sa zo-ib)d_;H+tD~$-}SnoRp#4p1U!*O|9VJpoAlt!*ETm-qQ5l3Dla|&2rpzxTPIIQvPM4ow ziM<4syXdi;YX2IA+H2p+R|IAy8i0>DkkqL;o;uVE@vJu)7~OPOu9fi_zTjecVwIbT zAyL%5qhrX#uP}R#nxjs6ATc4ej;EA{lERSq)=fU5l-VXdhTa8oP*^ERp-hplNLH&h zQC3qvQm1N13|h{gQ2QPO#{s47T-XKha1HtZGvgYjf>X!K2r_!MJpW^1>QqqqA%d>j-uPx z+bhpUs%`oO&&#@zqZ~=xVQ(^sjc}B2w;!TT!vR~0sm`dtDWjPcSMZ@xa+#Q9aKAJ) zI4w%SlsoRyXtVze1uluemYt9eM}gw>4mlj!D{FL$GRr`r=6js>a?_s=I({>=;lU=N zvP67@sFPeL^%!T8)U`JZpW-&NqL1Q?$D2Oq(60=3V@-F=uZ zE`xxcTBjNZsMRLjMr2nlOBrR60Ab0o_wZ{Q07RLH;zSY9L>(y6zPG*IC_`ju2haB? zs=-l%EA3sDz_Q4tNgKa6N5zHSIj3j(dt}rn?Ek67N(^I|R)WMoc}2f5H6vS;eVlNI zq(7#0v9z;3^lc!Zff0-3TpI<7RQ}|Ai!D?|93v;w4DF!?jYjYeer^n~LDEF?3bgQ4;ED}( zg|~!j>rjD$6r)+5m6``Yaok1AZapz79&X-c`z%I6-hb_y;YSf``*MuL@;A(JKLM-s zv&}qI+zQhdk3#67+!jHiriHdh?@1UO*{OlRS!AIePIYlN${XR;+!}>^&g>sir{2Tf z-B#WDO-4@mVK{!GviPH!EFM2|=JXz(8g}~vu*+=s0CeX0&&OT?hTdw1nS9a1UU((M;&#-jki{e(?sf?(xeY4v*>XP17^S+Q+3I zP_wb-i5>!24iFX%#u&2;jUX0AVDa;QL=k$WAuVTxL{s$tzI1)%!yG98`x0Nrd?&CU zBl=;&qiNGIUIs?-I%v}oSeX8>_Rre=dq0Z#^~;n0I@#ODSI|8Sz`ebHB{e<>ix0{^h7*Vps6Jir^Rz&mX- z_y4WUj1easTr=q>#x4i1Ae8~ABHSO1m+1K`r-OE`aG&RLVfx4>L#*kqFMavB^JC$v z)T7jq`m<+*80dDrJ{2>gnh`JSnpX-h>G-Lj?S`Raz+ff`aG~j1gAfzY=hQH-cW9dc zAIcCOyWlh|ZhB^-G;GDmxOan{pIfmL;f{Wn&??H_##xZqA#h87I&{|+A67@^e?=1U z@*kL0PTwh%)D}U;;%IOO2^gO(&nc=Zd=fGk2M|k676pvr2z3|4*gsf~RJ9l!{$M}1 zuUjiE19fpqJnEkjL!i(GgVX}?v@-L>(P6dGF{Jhl4HLe+;Y9JVM5}xGLN5Wa=Pla$ z)$K%jn7LN^RbS>!=SSC|;C{CE@=3hUe;VhnYuf?ug@$lWL>=fvpa<7V_oae_nyuoQ zq4+UI-UsKOq}xCjVb0NLksHv3S4n@uv}ZvRk;-8shR;Pzy_l?iFK{l1)&zCsTxXhh zc%?+Mbc9*>Cyc-82;snqhV>nj6yvK+T{xflECGn(LkUBO<+WQz;Rh*Xt=ZakQK;m+!-z>Ke z)K$)lK!QTCt5t&d$^CK32DYqnZ$C)jj6OM$(c+j&U!Bx>o2|jr&cOG3pPwYIHM)6m zDiTWQ+X0pN8fPtSS)DisrkfvD2?mFRqMBwIsJxPb3y!rNZ-WtKWnsnq1V$!iNnnw1 zEN3yr0($I&E6Hqw?B77noh9>HNDV|oR+)r_^gC%W6rA0z$#`eN@Ijazm7c?D>~u~) zVaP{sRt$Gw)pYZvvb@%3O&Tu|&f5g7$VA$vOsTGfo?ROjgN7ywB%^-&ei^^M`!XwX zx#w{FobDtDV>j?#%FGP5#zhB}k=!R{@_m|XsCLNQpffo&aepIuB-iDF2O7HHgLWE?sb7CH=hGc|&r&e;u zRo{yPv_}*XppE@bfSm?lNVHn9DmkEO1r)nj>+E#+9NY)1!4=SK(p>ZW_e9BF+3%4` zW)=2fYYWRkIm>c{--LGj!qE_+k4P=7ZR{(wn_E{sJ@NuFi$2Nzf&1_L=-4 z3Iv@QM44LVQ?+GUxm#)4VPa2Y5SP)XNzHSd{CRM#hu=77Z9SU0 z)>qBO)6T3`&6y&_5UH*CcQV-%GYUS!B+16CJ&y_n*djs4wETSnDo;9ao&3{x+XIZc zUcbv=GGVX3-BidcT@kC3`1ZSwgdL=4zB#vsZHWe~ClxU%_u>K-7~_}+>}pZ_99LzO zQq53MP?EtOpXz4rP=-Y!&4awwoM&X@Qw(-6ZOdbHh^G!ICu=s@e>XpqQLT9h304Z| zzi;{i>nwGW5kP-STm9jn941Xbl|V0_u{|KKPl@>z{{8eNL1-qF@M*CZD_YeTT+@LN zI8ptGxGKtgcp<}zw2qgJWmxcs;Rk-=$FU=Y=3drmHiBwfT~z|5eMq^?sRQgT@jY*6sV6S%d?M)L{eN2sDPJ+99V* z-rA@p1IfsEsQ6t)3_fgm(8m}Aj6rGn=)GRP3HT4*#De?!CzP@vDg4bZU2rh+&bu~T zfs-=O!K!S>xtc`HxC2Wum=v~9@_YnWq=qh7H8<=LS$#sU9oAhpy2otUhp^O~TZrM= zBkgV@4eW8KKQ5d-zzTyPa7w9G!UZKO=I1oHd8<&-y8;gG{n9}VvfzN3KvLK#S;@Kt z8Y~%T^zN3EcPhEad%e@bNk3*Xhne>jaC*rVi4Wb%7HC#QCJCm z3Ji$`y^>~bEF2=k09!1GVk;A1te=p=LAN9tNR}a#VQil3k=-J z3LHR=>ki&AV=Lffek%OEl8g|Go8(J)6tM zd3JUdmp5);Uy`!oz$4`Mm@;LBB>_VF&aI*e-rG8DUq1(cKz#es@8($vDx>@%@bcWq zC)73+Y*&nhqqUgF<@SWop^jzQ!E;^|KKL@*SX5Bw!Ee_E28Ad2hi*vYzxs)oojs(i z#;xADf;7c{$An8E3*qn6?Q_OLXM~hpLQ> zg_KPC2XM71?M)Py*%@YM9DJApjl85AuLKt?6o$AZJATAD6gfx=sS5T=EyKfUohKcp z1RIALI)xki;%seY5nUd9U(?$pTiveflw?y=im;JBvr44hhq2;BOZh2*lvc_z_%&Q2 z!o)SWR1=h&;Mz`@=R17Dh%Bvyo&qX7NC=|xQ0cl_M1N-JUqCFdG8MClyR{A{7fpsM zk5F^9;zlN}U&e@w(OrMJovgbf+!0%*+ zvlRPmqhnC;ylW6x+v(Nn@H+WWr+GXIZGTojGai^aP9Hucg6VSH$~G1CeRkNh@Ja@PZz5aD~Bn@Vbj<>xFWSBs%cF%cOwrd zhS3qe6JKtujKX&&rfhM;piL)PVb1I2-l8Xc`iU1Y?&`olce;%vHmIU(WHkFVLy5aY zFyWwS6fRomW8fAm?V3T0$lN~o=pczm#7OS99!B3nwNZ<)66x-32?Pb@NKuKcJ@Tqt zS=_x-?0z{h35oPi5>Tw2NpdMht`y1iAc9ej1PgL`aZm`h)F9xkiTYCv! z6N3^Py9}>G(9Q8UX~EetWy)s4IW=HIPEBn)ny7}583f_|A^i}vwe=!8zufv1+}4RD z$mWBOx)`>w_XI^L{LQ0w?m*%iW3I;$J06-g=0ldO-{T=#eTwjc%3=d2YK2%@y9&At z3}zn4ZpeU_kS#TVmU^&SEa^gPR$8KBzvzVyoG2-zKg8~`_!Neqg8$C*`ZF&Cc9PhQ z>J`(GqEjL1d(D`$d<9D{c>3*OerY{`7yUEqwd$hR=il_fcf?O#-b1*{=d1DC0cJ5D zTyC3yz*z?#qW?|O*l=ug{qM=FWsnds!>hPQuiQG$E;ogzv}Vi!jS#g8#CGD-LWV$ub+cZ7I( z-{zxhv?OO2jW4e67Y8l}%?7X46<9HGB+6=dfVI>I2Lf_?Ty2pp%~EBoG6Asn4{~WR zcLwq%zO8x59D8U^sv$ThRGfLV=0`!$rrg;8W5|?6etYA9gE;r1I9I4^8(vcAAx$MT z1N0v3O^kPqBq;K!hJfepL5f2PhN3uW_9y7Gg*~Vm+a4S6E{Q_uAQ&6?)<t&wY`#(coWiIuG6U}2F=!(zFYlj%k?-R-&?i&Q_!Lvf1Opg6>$z8%P5 znowz>-S_BAlQP}#UNf4FFh(qljRqZrz)#P^-5%&|w3-vIbck~)H8xTOXcMHA5jZU`8MdTcraT$&84($WJ`SO2??LEM+QM`~Rxf^f zx)YbXlZs2CAj%iefP?Dhg-rAM5cgmW+43vkwPP4i7NTeee_S-D>?fdbAxo#u2Fwv$ zE69J$beLvqW_z5vto>n@XE9aT#6AwqUo~4 zCa=9BZ5ak_lvd$FuT&YYSi& zWk-L+>@}Zg{fz`N6Hr}gI>({$k@k6OllpRcJ-qNvYTmy^gyd<`99}Z*s?b?W9ew>k>eIl$EcS0Kc^7fzQNZ3&mWH^)S&A2b_JZRZKFSFZvLPq#k)NsFm)Dxz zv&q`a=aZQ8w~KHguh$$anX$~y@^atbuYOzKeeT@6ZlOLrn?ri~`?iI>23Gl!wD{>m z>$f{0AGbY2c;n)1&SI`6e+aqp#0h27h~&|dc3$a%I>^Y#EyxaJ~lU!Jnv zvZn6a_abgcje7aef$#7ECFdvl1&bK-_Hb=Z%+HF&$|(xN7+PFgp-TkanDu*cK7)ri zBK4PC=6P%!Y9V@mO>pbI2xq7HolIZan@1%&Zr2^dr5_k@^#%5Z)3I`>CgWX}#NK1@ z?*K{ZP?(Fgx@Al7%^LO6=-E$ln1b|7_(4Y_QHZD z`N;iJsujc~2DYi1k%gCN#H*}hKWldW*UC_qP2!F!p6b^^g;M_u!#zu9EMpY%U0;;h@D@fzoxv_ zNeMP=eDGXc&3&bSVG6P!|5~fA9)nf2VVZ`;gk&}SGjasc5Cbel_YwY^1RtDAkrepI zX^(vwOCoMwnR8a9Q#(})Q6Au3{7}=aT_Ubsu32^w63`Ey>tjn$q?3z zE`QORMmg{TDADD<_q*Xh=Ya09O^Dp}a=YGjvt*E>;kX54pBF8@dYNw1^)caA8R4c~ z8?-q#7}Y^jW^*aAs^XtB`{iQj5STd%UcRy`kHLBFOih+K(PJH5K z>caMNqMLQ1?fT|ryHAzKgYzXYpH<4jUZ}~FORozxW?zmzbYenrdS(Vtx9$2J{fR;o zxex4p19Io)ie-7DVTuAo{hu>>@1uKn(*1v@qhpYY_>Shktma=%Ir%47S681p?Og6d zfd9bxSo#;E@dF}oJW5g=Q~n(u-!$j!%6(XCYdk|kNBr`rWPbk(RHqF>K9+Eo$3W;; zTdJdGhnd@kp)R|e`=2DN;Y}$GjJ5&YZTr;mdSywJQs2J`UeXPdOn#*pg2ya8gzgic z#BjTKL67W&AW?wSxTu9Jg*>)$5@@sGtAZ&jz%{E{qhq%8F|Y^3CtjNU@YH40pmT+1 z>=dwfRHM+q>oG$=Gz4Q{Li}R1-vB97&19VP5P>>b9>jr=s^sZ8%>@UJ@f!H#;${=p zp*fXVp){#PN<``Cuo24B*npx*R4HPrW!6oBozF(q#oB74>1I|7J;a^zq=bIM$8ruX zzIG}t0E7Cxv*p%d33o6NA3kod@C{vf1Kx^#;~pWqJkqbQhVIF zznvm`r0!U5fWi6@&$J2Y&3IgS0!606sP7_$JTF&*ny+G*&0uoib#ZG#LnV1P0XpOY z9WeMy>j>gThll;X6Oygx*>`=vHHIZG@xZRgD}^CVeN#L%(MgVZPUf3EcpD&g@yYF7 z;rS)ziQ~3tN1QaPr4LD41GDDsYd|X-(!=3@;s88@yu(0fRK6X^@OVmbnt2seJ7QgM z<_@Mg50?}}5htj>)bxN0qsmflsP1h@<=V}S=5vf$3>VL&maV6K1b2$eJU*EuSrMac z=bnao_&oA~Olv}ZIKLUCS5S(}`L%8z&LWWs7baGi9=?>0s;;K5P6kZ6R4Q1WGUg&> zHS~{!z2BMkZz39<3xW)nOtwthNv#3rzVIki6g`3(l9@OQ3YtKS_1o#;ksE5l9U2hCV+sx^7w_|A-tASMhX zTOIxyN5>#TisH9*ftUJ~hx5~PV51)ONK2!hV6|x(U&-Pe?I6=MGk;p>(u9s;NgBaH za4XK3&|``9q?HtwBZNz|XpOKfNQyGDz|N-5#-&8d!bZ-$1WKy+_K&B1JCA;)u7)8_ zvMz!HT$A|cxF5J~{F^(UhYUQ&F1F-LkB@CQG#ZZ#xEi$^1vxCQ$7hkKK1ube9)oq^ z2%KTFikK6b5~^{|SEEOZSiDjf*#4AM9(~ncK*;`(;MRzixP0M1*44>O40O&v-G9Jv?BO zo59u|{$BDTf8JLAx$hGw1zjsb+CRQIR5R)ln;f11wejcX@RMsxw^KSP=lgS8(qE^k zL|$LZx31ubded!LXSRN$3Tu$(b;O>9td#EoYx17ct2S6BFUQ4N$Y*mQ{Il*7Rq`zJ zk0UosuVZ_W%TZ5XA)nuWv*%pR^@;b8g;Bt9jb79UEo7*!qUh2cr;dAa$By@bLFie- z+f{(^5T9ed#7WN(@;a)uz=OkDVj(Uhw%?O8FF$af_Om|NteZ3_4)QnrbK_N*>EiD- z)*Ep52`IESRzFkty4|CV{hvOc<>k5Rf<<-$)Av`c2DgR$C-VdDchekmk)eBm0`NTk z@|2Sz;a1){@`LNL%Lm_+W5#zQ22$ml!=dVCrem4*?U}iSVrnq!BN#MY%j0sw$%x}R zR+DtqH{+R%w{gj+pyL)@Bi@{pS?hQ;m zt*(0(C<{A*{*MLWfK6h?bG4-1dKO#`Ij+lEcD-cLlptk~v8=Yo@A=BakcZEcs_ddr zV-a-ee`#3TXRtaTmRM!_{Je)6OB6!sGOn^^k%qJm?8Iu8+_4ssuoy11nTb<$wGDYR zOZ#vmiLI-BKXBS0jAx$vDmsq8Cg;@n^iXn;p^r_6?eOU=kBs0D`G*}*cQN&fyZ37S z&zwE#MiS3`&wYf11=qdt)wEw%(|d;S&bNf|k^jx%`H26t+#;{{=D-YSQ%gZvqr6`W55a>9Uhc4v1DRm?B)GHbD7n7l@B`^b2yWKwdHeuc{Er2 zIXgcu@9!_xfXsT|UyQtG_vO%uYF`6B33*-w*8AU7^A?NZL|=;8$1d&hwv%~q`bGNa zZDl{CU*(TDn-JJfiV)4fvkr2!GKFzXobPKxvLB4Ub_cvRJqwdu09mEbPCXAaXrw|r z4_Ajm-q#WFqOTdr$yk70J7CEUs0;sgpZmrCgK2>8x0v<_^F;i{J$kN4{Lk zZaxMKlfBH5HLN;_%-!V!KW*!|VeWZ*Illq|i=KQ>?Ty!`m4PxDIP<~%D#u#%zU}#U z{wvP*>#@hfYxqpYFVx-#(dYQ*wzuEw&yOL?*;&!}xo9_uC!LbmV!ESA;lS z!4*5o=n_2+S3lxxE4c(;+G!IsuhBQ6xh zPo^>PO@!j0Gi|XHN`;RkvebnZQUk|@g<6_27&&Ttc7K}xXt`L~D)F#;f#<9bsM&_+ ztgNz~^VJEdr8n)4$Bn@jQslSXYI5(Zq+W*il{X`%PWN(Q&@4F+#*(pIp=7_&cxnP- zqF_BLYH@k+d~e_g{ayx`A}9hK4jN%Xra7?CLMg(U$b=LQN@CPTXj9#PP5cS^$!9OF(-hY( zc#IK;x?`xWvpc|Y3}_L^ZD}GCxiGdqaDD$yq<(}7c)Y&E7Jt62+h3Q8`~3QX{Icf9 z+6_6~S@s&Xx6LP+l+bwu6pho<(;0^ZoSm7c|8+er&Xav4OMJ7$2ZS;nEfU&6e>|(X zR>?~>v@;DHp7R#XPmL|Cxpu}aMs#Cid1dlD8d1s$)pDljF+0OpjeI1b*~5(j!;`wT zsM^Bl!FVMSY2148&Pd;v7i}Fg1m%fc@sY_D#tG7AEp4)?7IQXY${4Mfn^SDw)5IC@ zVm2dAYE@4fWtOnpg)C;2eip`z71ucSeMC*XvH1U*kC(qepVv%@vRv2;{O>xIg9eB69-&w$b-Y|pPM%-MA#=rK`u3|gI%BV;!=F8`*$KfQ8Y%n zbLq`V*$}>1<`KE)xnOfb9n1296)+AXMgo)I(LP|*_-w+57&6Ynm2(dZTlIzp4G}U1 z$1<+1d{Z8T8%)L|`h=5J(q4{GDj->ltTTq6N8@YL9u#d7f~jmhS*t;$WH+gwIX@)B zF^s6vzZN*d=s?WI4USBa9i_^$$4{t$JgXp|j=T@yj^ag3HAJc7Fr2)g?AMdfmxzPO zNBV)?1|=y6XCJ-*(n^WpAdg_w79<0)PuDEsH@lnvScy5tL-Wq|!%3=ice4en0}*m0l@X}^ zEKCQxXI+wh>0sLdA76YlJ`>8FPWyPpV4UczlecE7&+?I4;U^FoA1A8)vC0j2sDmIMSXw(zVS!R%?}~;~Tu< z5Os=Hx1zFx`ZqPiaq>(mOj#h>~}zurFa?P)udn!x}g7l_Q_b&R)p-G3DIvo+3RP zbh`Z-fYFLCR3?ccSnY2p&;){O1wa4fNo|q19cSn#a^SmR zbaV;Q5nmj2jU2}*Gslx7L{HnG$=sn6(8h*P$2r)BLps+M`HKd7Hl2Jp z$-gawgQ=B)bnlc^NM@pjS+|iJ(Dy8D9@(giGxW-d05brm16#Jd^VoG;cpZQ#b zsL-muz&k+x{8XXa`hYWLG$1x9_}N1el$Nl`Xdq(+T-vpN4kf6Bj+mE&UVisR3^RWr zY{TxjC|JMtlakSPzMO?+Lwkb47#L=(5Quh>xDe^~UJt)621&W1vFD4CQx8X*!FkU5 ziklRQyAWRgz#+~G?IXmt6e}I-f*_BU^c>|RC1`xyVU`$=Dl1GUa+hZM(^PHi)TzoP z7r$)}SZWZvUw88u{vBd}#XH(|?+huZk;8nV$&zEDE2xo@RG=I$Wnhea7|{pslLo3a zl0WLuedYU8FqW8ua5vN-loYW#o4_f_OSlpSbd2oyqoFw*-^)k4=y;sgPY(kvA?F_3 zejc4qtyQjfXP>sb4oOIbJe{x7phe7?{-S7?rqm(BFpWP>JWTBQYd!jJ^aS;r$vH1v zn22sqoOx-ly}AWpo}}}86#w|e>vHvcaH)Ox>6TJ@+-SG&x^LPLx@t=XeGdFwn-}^5gZmUA|m1O(5_Z8>uj?6#V$?xz<$jfEy z;a8YT@b2`-_UHP#``m|v*!;N1`?3;&{9)tnmux#^}lpYUP@6kQWFLw>;9)-o#ry)Ak{-e)Iw&2i0opxaQ zW=pQ3Bh^hC4!g3HymcySDIjvEituuluAsWAlmc*hpgDvjW0-V-Xku)sIOQNZxBsK*{dp@XqI z}0XgfgJAgV4D|MBYW}R(q)P|}_x$rx6Y+Y-p z{6{&epID5cOGQSCxmkh{$slC7q+o?ny}%M3T|T_{V#E|b(I;r-%Y0;@O;{=v zn<*NZ=Si6O*OpTO9!;Q64Hk7YH$fyxOt-Xo_@x||>q+qX@q2)o2lgHJW;J3sc?Cp5l80#mvwR#G=pOKHq< zs8_*9&-HDX40Ze+8KL2^4LP3F7x2?I%^l#YDddvZ8n91jPsmO*kQ;}Uqfed}Q-IE- zN|16Z_bTp~t7wPY%uMDehv}fm+p&A+k_7c5<0?xXiAX*MAqxy;WVdT7@T7F5P|6{1 z3Ah0#bMe$RUPd+$5lstdx>45?t`gdb>3{~+U_gb+@=XQFNlMPkY`UYF0Eu3rvNv;K zk}bqaeR~o6bEXBfC8@6b-pY;zkK=M>y)-^IK~oFc?S)jSnsg-x`c}5v$z;45dK6M* zl&5qJK(gL5oa4L7JUkF`inWuu%HYi8sZv?&GEGygGUS+`UgC*}ouwI)Ezt%@gS2tN zRLJetGQ4%qNsKd}j3ESz1l|Uz+5YmY!Y+-%Q$1>2#g|*`LKVzwgsNzwHsA6$Q z7&!=ZasVR=bwg~4KKu)0A3Pc7*eXN75j4E(TBhM@d!q!Mn*=izcd?r| z1R0Ss&PuXrcbJ@LJ^*c2%@&)GvA%{08E@QSVs{XTry|zEVjD`>v?$T*c8Gf!XiQ$iC!_j2p!b>2DX+FAdKXaOyo5q9_Xp{&E+DE z{r6FN{O?$dyZoM7gpuFvG$inR4GEgC{b@(qkwsg^v(N<<3+8+mrM40IlEPhvh#I#i z0nQLHfl;);l(m+Rr#ksN!f$Y9E6OrW^4Q(~`IF+R*~himW8pf7^uaczjU->@&u?0{ zQ7fntf?XJSP13NjJ+X1-JLft-X6FC%&b9aR?tQPd4CW~BD-R_udl)XRzAOyhjWtbA z@}VQ3Yjkzyx(FAh;t5yIDT#0X@fzRSL*fkDi1uUXY+LuIi=|;jw_FG!)~dmm#pcq{ zIZ7xA9L9W%Z2x4!|KxN@l-oin04YXf61_b=r5)p!SY3d#M7{4jU|vq!)I``3D7-ru znfm*uOIX_h@0m{^-EMc+ssSQ&ukRBxs8{dN9ouEMXHpy8y9jI7*Rr9NWVViyZvLoC z9GObG_-Hd8?l_Fch-+CC%IuGkev~H1T$V&!iI@;qFv~^%Mi;<%^O=hp(aD&GCMh_| zlM9kAT^Wu_3`7e8u=YuM{~8W1iZXdCX2MxDMWM<&MPx1LC7^96Fw)BY%jm#G+F|=_ z4{ZahfyzN>!LOt9z}B0|q#6_e`@qYpSBvhD0kdl7*?3Af6*nQ~qzd`A1iz*AhrZtQ zfw{gta)+QOaVjj#Na7p*5|AYrm?tyEq>Y9JcowfJK>`Cs1Y|73&1h$$;6fy!q0vWE zhsTV0!&%esR zIiC1oqFsm|?xF>#$WIv-Ro~YKsa@Snj^^|O3U*}>JOvG%S)dogYZ-s^oQ7^PYJ&|$ zCk{FcHB4T48y_hu2}+)>cCx<_Uk8aH!ZhlS5Gc2$!3WnH%ed=ntAGcj6#sAY^!2uV ztXuI_hN+~i47wY5wR-`HYnINA9AW&CpSwYVh&cNx-1mKhAiej*n69i(+ZCS0jP{S` z0!*GDbDkhSk*f#L1t<4hS>f$>yRjrF8iwzeQ3G8WD}7k!YPSSu z0X#iN3*ILRu21GEh(!Y9bgzq8LbLt@*581A%JR`56Ue~xw!m0t5GbQhkqm+STmhb= z-`;GDQkseXd0%P$eXeh+@3Y{O>g(<6W$gW??*--kvIHRmkMD)KLdJL})){F-xP!0F z4`f7R@_I7&9Q1ju{Bk}8L-@74q8iP&2G;NcbG(h$_dW8xkEDBSTkw_Wv+L_syQ3ya zBo!r~AFLsE2hQu~Td3l7%!-xI8;&c-Uh2V{W@J#57!{j9OZg{dG zWALz*U`KLZ!%^h+PysU`*nqF%JVuJM>f}OrxdHY)L*XLy+fd5(5xp=^2yksAi4aQ2 zIp*J`8t5&SVTi&%u!lJ&1e{|b8x*=d!i-)HeX~kH8>|o&+EW(Ld{PQtBDV;A3n~K% zc`$R^{8-ER`xb7~>c`LId5Z7c?5zsrt)>pz)2#y|QF%b3`6PwP2=IOM@8XK8hGyF( zTe(#{rYT8Hu=7@;U&K>gQutF9kKh%$n zsH)w3vOpa2-AY|ZNuAH)7=XcRM2-xP+dv#@qPsb!>rcab50+CI&BVZDp?rO@=Ex>d z?hVYjn4Dl4^A6g3B)Y+VF;PO`C`x*gAYJ@&r^B!rhJpu8R+3Pv&| zUmS|(dVjK83HKS?K3kwfz|J&IqlZm5)WO?Hr*-u(C#*c=FILp(dZ6~B6S)SC6^C?W zJU2JHnu7nK;;JQq-d06CXUKy-f`z0?|ksdlhC%!+7&3)#6{9*pR-+ z$RaudH@WGN#tapPG&4*jlm*T)_mT1b4%(me(`SfIh)ih(wj4{I5mDwm2|3+t@|(T{ ztOY7-9QcL6y#g|8GD|W!@+EQd60JXaE_1)Z9e;51YJKylCYTAcLxx_bL`1fu2C=>H zp)|`Fx!(J~xtDpVM|1Pm;?_*Xaf5Y!(Pq}pW4cu_?$%Y1QvNIC7AF7@WQ}yre-msO z-Pg*hQ|nocMPo9KI=ctwxt(U#~UZm<^WivX-E)zK2M3WFuSQy7Ss{=TS<_K0` zYv$*J>!9^fsb(OZhTdXy*Ravy!N0CbE-f-2nM2}pE_Dl4q-Z(m`klGo>M&i!w%{A& zs;1B8GAy5{Yn`c1evYMv@B5pqg`+uq*KYayykrWNBRcm_(xsa!Q}dD;I9oWN{L5b9 z1l@!#4e#lx*C>kf`xfjKea6)X96fod9qq_a1E-R?)=@?a1v~TjmNIWp_~s9x;$AiFVm3?7k2T7IO`zZfa}1`;xo-hJLm6lvCeRmVo7!*FNxn(&r&D3HDD+ENx%CJoPvEbw6>AyY>S& zs!x|~OqMtWku-vhLJ!xvGJ8sZdr;!M!Kjl2@XVVwq5~Lv;gZFLh{Fkq_A_Xmx1&j& zO`@}y@te}A^m5{~R0{XgoTyKfHEQv;9Y zgm!)yLxM=2_svwA+;|GPbp4@5F5GrM_pq)-JIh>a=CR~Ez=3}@Z$tXja;D34URWb{ ztl^GS=MPuB{yn`PjI(g@7;Mm?2jXlr<~s>uh#G&UU`G)cC($V=?~5EF1xz+=`lFk? zwMTm^f-hHBl{ZZBOw6s#$vm%-;E%b?xlEbD3s;b2qj>2PT2#E1ncJlX$m@P0Y>laU z#aHlpGnjI!aMa<|g$r!4;;Kku%Hc_&DM!(4lzjNV+?j-hCGmLu3Slm}EcsZF7XGoc z8eVSM8nU^CPyV4n_*>gQ)@CkhbJGp!rtx!D!IXs$D8;j^!$bH3)j?YgHq#+eH8O zT4t$}1~dZYTfMyS)p2R0my}R|Iy0Y;5R{4PH^(|v`?4i@$9O9-KdC{JZ)h4^mCb&} z5SPCv9c8}w713ZuRJ=mGc?-otz9-E>!OU=N5UBX}Qi-A(wE}Kxd+smUW;ac5ARCpx z_IJdZLFsmIV%y|F%G*TW_#LB~E$JdjasI79WQ;)Kj4(m<0Ao)Ti4@TVb{!E;(lkm_ z^P>{k&FA@ahqBSS$RpGS*p*QQ-LW6zpfxbEF;dX0i&p7me=9V6JudljJsLe==^PPt z*}J%$#y8@`@8!cPOH_?1xMVextD@p)Fr7(}%Yld8&3f(4@>3(}%@KduJB>vZ6Am!6ADDTYW zc$R1jv{T*kJ!g`q<8c<)A)$MG)0Y2eyTvm%p7^rdEl2C0xhF|)?TdO%%>|BK{`BIQ zQw}K$___V?34FIgO|CP0o%yZ3`PB^ATbzB`cQOiEf0@Imo@7OA7x>S12c#SaW|p63 zwg>6`KMSDV7wb1oMCy6{{@B*n^U>M>;dQ*}wSD;hp8M+;hdkXzmsetHYUb@?|9Ub0WO<)A4jh(oqbAur zT5c>(8-CqtxPA<}oqJ&kI^=fV{+yq?^PL?j>D&Dm^xSvb@Cg}V{005B$J09qh~fLk z0>17&;Qbk5O)#WsPQ3ngK4r8()IBB8)SebntJD$1&uS6wFW&pX75HX9+xNB-G|D5= zZsP3{8T4uY)ZR8bHU07fA`n@}yKA#-xajL&=&u#0NGAWVMW+Fh0J&l0B0NB1xq2cM zB8CAYaT_i&B`VfnXi>2o>{`G$(s%Tt+n?XY0En^lrkqbINrNi*-q&-cRj8KH4CIB3 zw{J;f0`l8$ID+*hJyA%d8@N?*8r(*B0cI9MHU!+grnkHWXH@E4W@pk6^a<4BFCt>% zDvFhqu9JvzYzmaNrJvem!I1`%SQ`5raAZ?pueqv8!WEcBL7|u0qz81SgsP`T1d?y% z8eJ&Zu5?ri?` zjs@s67MkcQJ$*@p6FWyH@K&-~W~uZP%X!5lT@0wN)KR;*3HJ}!v1dD({KdPL><6mx zl;RJo(>1y!YP6dIlJ_lt+~kP2S#8A)Rihu*Z)iTAq<~Kf`eO>lZ`&BfMEn`|f&Y;I zxn`ViX-y?>{tDCsc_?Vv)nziVkNwB#{3t*!lFU~HWb5OC`68HeLI|F8DGdFF4v?|Q z&f;2DnJyHk1w#B1z?ij|ja!WJ_!d)0ZlS!ZDa&q!mpYQF0_SSebQk2b>CH&Pj$eZS z&zzteR3CY>3CoFQ6`mbNEx#a{0%jn`R5FvS!8$d~?68s9L$*9_G(bOrzK0~}*-=16 zZOEZSMVpzm{{!wfCQz!PEgKRvm~|*hleJ;C(J!(Lmm{-AFpn6}!5aG8dg&-eFNUQ( zqG7}5ndw|-S`z&Dy|oL4BJSxs0O@u0`p*Prv@>-~^n&0K5|Z_8gx}!;{T#hu*6t0h z;N;*1)-Pg<@-n1&w``G$7q@CbnKf3~3x~w8-muA-^jbc#b_pw{@aX6Pgteic8p&}7 zVODSw&3l@~YfART4l?tgV=@uY8$OJP5M&iZZ z7?%;*hM0~R96cSN3m|iXb(5@4AbqfJ2?Gp|u-J*j_y+G{4Df{wR3jPOQ5pG)kvym% zpej%UmNab)k#~mOYDU_KH>=dm^YJb1rb|>^pu^G`uF&uJCnyWCx+gT zfIYGLyPOGX?=+)6R(9!Yp-HewknkZ0nG9@nq_}Lpe2}JL!d|4fhr;3gZ8u$5ik-b?wWbQF8xxIEn$+wCN1*ya9STTN+gUcDw>9W`?_?3ZL}Mj5?N-O6Huhp!)R z+W$P;(fW2g_Kgx||6H}qRInT|y?Cv7_f?EVOWteg?2@7C=CJF)`bCu2GBvfH6g1yS=30Uqx3 zRvoNz4UKN|`o8>TSy*gA5Kg<>IdABtjPa=kT19C~~w7QfDlW z)upOECvN-38g~R0=nwtKnx6)p2xu!c%W=FoELG<)o#~=iVU1_)DD67-74*2px!iit z34E*5Yny9uq^bVVA`Y!+ zX<(WI^%C)Wl@3j$_PJdl@7DT#scjEkyBL4LU)KO&f8YG)RJMP zlYYd3nUO4pW=)#7t7frGMo?Cxyb0a6lMIB!#A^0ZlDHAU2sq3xN7HzuqXb@K(^>>p zcj+6y{us}~X@G=5i5ijOR&S%D>&Yxz3ICcf!+S?B1vok@OcLU|Vgs#Ev)@#{)X3_yt!~xHW z3F6_-J)Qhty4DxiG$HMwnnN4p%F3d855G`zpw=axBlEat}f19kpuM zjD6T>$8$E+wgsz0`$a+on3Jsa>j#{wte-{@cSq=)(&f=cHKvAs4(H|NuFhojFgt0% zR%2NNR$N7ml*f42B9+b@R|Rw#uFQJ@x; z-_x?Cwt%Y*K4PB(G-AJtCJegP)x8!n^!IEpaXQm{5$?;JxtZ(K8=n2#?|%NdK^_jH z9>#M3XB_S~FE>WMQ=WfrvkiF1r62J-b;K3ZaiHk4JN>=~eZiYy3_>EJyf{}BrRAb! z-{}wEo7*XtyI~Avm$yep;hSjX>mrnZD3A}(cIp?lzW-)_!p;@WGl;|aitGF83(VNw z8S?+yel2+M67k7*?!8|Md`j0>78yDIyb2m~_J5()t{857`i3I-4z_i7J!H`9o-o#X zis64@{7lz-KIy~Uee`GluRzc=Lc!Pev+`H$*G~nJ58#ct$cu#koCM8&k8rN=?3AzV z3)kShg7Hz-_7`K{=e_Ik5bHvN0bzj7fO+viAbO8U|tQ0 zw_k#<6kpsTfMb#4&tWM&9a34O$Q#M>8R98$sI_=Fw7eLjC*jGg7#LF2XYXlfD`;}z zl4TeY^aDY%;_^wU5etFx?tI93=We4tYBj7>MVLieBkRI*fM2Z>@6=MN@m_W4zhC`b z47-itUf^pq5__12%(rP<=|J0Eszb>uwb~K&?jRPWrI;G2Z2T4%`X!kVst)IdnO8T~}I^sN!NuxysfaCYu7M zRqNkdo7a2^ASM4%L73=09QZKvUwtIa!}pgvko#1gfLFWU)71p6zC+UpHrw*VIY}sT zfXtHODV>S7WOL3LP$d0!ey!-lAWB-j^T(D z?oN^^q}`lLY3wKA!y|EZNDal&2_hWgBRsVs}qQk70O>1Io5obqBu!&AgbS2@Fkw8IozJ=enY94CW@+4%Vmz&8sD zDvFxql*Ne@C1f_f;R{p-85^QggA>kqFCvMn6AMvw6gpK54UcYkJDUtKP0LPtcoXsb2jS1H|@jTV{9CspV%Wp+um ztWinq7UA`7uE(W_?s$vCXV}*b6b|Xot=@*-XX{+~-JbD3ueJ>75WA1xeT4lF;c>jY zPOo+!^AdD%`=>MIP3bXY7Kb$?6#TaS&K9>*{|!Hjzz`2Ig-GRVNYA9)jfu|c$kkls z@7*^c_+)Zau}cv6c6rM1885)mgJt#T98@f+w>2X?RzN~Cn%JVgY-@z!`0Go@s-jG2 z1~hg^CcFIIUh%RSTN&HG(X`tOECz6VMidNDv-OyXim^g`BlMkfryP!2{1R$1XWM|Q zz2(g?f)ooOn}1&pNm}^M`a6vpS4zDo%`bhHjUG@`gaiQ5TVR&iVnE33e@q4Ds; z_aMIb@8lGZ)AhY}@8IXI>AO=`0>bg^O|T5{?#0$t%{{2fzpsrh33&Z?B)!j*3=HAH zFi(%qJ7o&~Lx)q0*{E6`$1d5tgk$Z7CfU4|MRWpS0KXqeWckF|O8|D^ux00{h$?#e zq!HcmabyZS2+_@x!=&jb))uk}upuhbVYs3597uYyRZt46>9AJ_96s!_eGU##Y?|ww z%$w`Bb$9iKo?*B9`Y(5s&-)WLHEQ-iv=6u^%u}2nxPHO=dBzRDD4zJA25En?&;?jZ z*{w6v9c;%aK`0swgR4J!+D(6qzlqzZiV|sn%q}8Zp7-!$5!E;?mmKu#h_trcRyVfR ze9)N({_0pqkm_KV*p3*h3?A5Xx; z%g;YJElUUDXhGQwR!us!drt6vPU2Fsd}Hs~Guj48M69@B;_>%@btedA-8Dk?> zD#4isP44Dwx$!STu|WNJFjX#L#5SIE{Rmce^vEObcQeT8AUUps0-1Sd0XL1Dkvk1K z0+fKx*qNZKDjNKf)~TB$;j;G_T-ha0t9+1q;@YKJNC&^6Y-+M7>j4zuCaS9{~&JPC+z2DuKr&wcQ>1@1K5Y4@G~ zd(M}MOl>kE@CX|^@4nv9H0t!o5imC`HuriW|5@K13q#V~AKtL*1Lu|hBG~s9^zQgs zqr7dV`)?KT2YHjbPy)C0$hdWTsL=~GbKrK@b1X?O=y867)VbaQzTmvX%1q*X&CcC= zeO63;7x@GP?oi;g5^U;sOSstF-Ny)T?zA+xB#ylfB_51Xs8Z(0#z zBdLG+)dM4J0|B88sNl!ZY3y{C8n*18b-AO76^;TsGyQf9;AG>v+pzZ?rK1$iF%Xox zK)m))@9T@y1}qp2)TMGkRDVC3?_Br|81JJ{m2xiRH|g5$UIE& zt&~9}aR55j1G@MVEkMr+qw}`CqtF}HLJ<=_ogOPC7{4A`4-GCHY*;l`|3ia}%Q84E zoSDvW(tWdh;m0V_tZto5`2^B@R>QNK(h;H^i== zGxqLEU~$-jtV82Sr|&flIIy5LhU%=}zgYWqA3(_dU-95gVCZy;Wdi*@1)Rm%`apc=_?k)`%idUL3+usyIoQmCW5>Zu`iv_bv@ha)AYWX-#uA-)h8fZP zMd?Sm@r7Jez3&*V8#lxsJZRS(c$@xpkX`Z~{nAtRl8%#lh2){MoaDtnLSUwWr>Gpug*4)u-7 zVZ96uIqb=AzB==I$%4U3QA#@bmCIzB$qPkgNsJWm%LrQx3l+SCqzbf1rIK<)Hl`Qw z=J}bljFx6uVRSflp0%k-O4=|r)eb_&c6M2o`Sdimc3pr z@4|=F{;Fn*r-ARQ^=S~iTS4~1nXE}CLXg6K%7_-8>n=D7BeGqsQifVNv6p3Y2%!8q z90N?T%rEFg+Dy3~>esY?9O2UD^6qrWniB*$u>g5bbzEN4UOip_`I-Xg<#=$4#qDmE zNWKm+V6F^?cqbW#5--- zU@#@J{)}2cud840n1DMH4~9;x$;aGy5<5@`M)X_@2t&-yMY(1-h)&0D9qA%q6rFn` ztG?SAC`X+r z!$d+N84KsVm;=;lQTGmV;3=;OFO#h@wNa6p*S7lU-6`}$i;c$AAcx4#vVK$=TP66|H z>Xc}V+zn|FYn^ax5S@^$CSN6wDtS=vRy%`%xEp!ePMdoSEPc~0IXrj@U^^RI#3;s{ z!|Aa&Yo~rjitW~*D)#&fiNic7s6fE$%EiUyM^;faBR0ZpfykU$llKgpWD8ti3~6;H z-dbqHL*~5;zGBXDptpnI?C<6-j2K9=IJ}Thv^l_r`9jXezTB_5xfxcO(nAzgtDMTC z=cmDcUo(x1mu@#>)<5>?to!@`{>wUCOHhKC$6A-xC5X0 zk#_g-tzOu3@C)_DFw5b*OcgELO`elYzO^pRYh&bc9Huzy>tQ-Lq#<1;!xQf5VGU}( zA960;aSgqvdC%M(@ zg*2v53?b+si z^-KBsU0Hb4k$7it0@C^0)NybBopE5sT;9v^OHU^Uvb*lTW)Ur z4Xb>L=x5F1ax>ZGv$9Wepx=9$>VJO&gVC$5Blt{A{56bnR}w1`1BSbJS+nuF z88qJT{!8EgQ0dyi>K-L1ZLIG`|0UTuAW5=T^WoiZ^=({2u+gviDtaW8WN&Eu5i9UL zw&1Yulcuk~!ARF-cK~>hiqf0kyI~gFKzzq|#nIHJSKqz4{NMp{E{LQ4EzA z2UsAoD9MG*+N3dy{Xq+u5|vhuq?RZfaZj|%vziXQxj<%a!fHCh;ngbVEP<~-cc+cN z5#ne{hFqX$-R31FKHVG&+xroU15YtXHYi)1g2tbi1IeFOn>EYt8s(ijq1fI+NwdZ^@X%RjFKZ1~oQ?0#MpjuT(JgAY|<63f*IX66vp;KJ33 zdj1%ZvX;7#yxNX1f_hRqaHx0We?LVx-IOPLv5_>s}X486X zN*9{pMp1>DveXv*(3hvB6+iQh1UmmN9wXl+F!R~un$#6X@y{2>zhVGgF4_KBdlg=G zbG-qksI%Dgx8)ZkD3SBCZ3}WaA|oR~^GY_w#fg*u9SSu>2hj7-wZn2Dl&QzQ&jD%D_*28ye@%k(WB5xVXQrYrS$)zy$exe%aGNNZP z(#VZs?s@2R)>`Ag&(|lf8EJfB%(^OS?%j!{9>S^d0rc6S4 zRncSv)to3EuspsgwNo03M69ehfmDE>yHn!mCMZUDvPdgIZC8aMQjgb7de>llt1NH0 zF#SR%iiFHF?7Kn=U$*)N@e)$0=}$q#6QxKdc1y8Z2tcas#6|SBY&C}_75m(v=g~c%G zoP(lsgI+q%A>qJ3x^pK=Y}V}NoMW}58foIig=(re;yz?#RT7;^6^eZOhLOi$>EDgv zZL6O(=*mpWF>}RfK_M=5Vvq7XO76lb<#6W+Ei5`Bs_J6H*C|Q1;`?YLMV&C>E zG3&Q_rhvvhlL{aZwlO?=ZT-#s{iMbL3+!e5IG>5r)H}DjAyv={E;oy`{1T1rbvv20hcdaEA z!yUg*LB)ZF3lAo8ElrDpm|rn{lBG-bL*4w@ zCGWl-#z;U`w;raZl-YoNrBszp(B^#iuE26LS*NeT(V5%feMJcD^v`0zVd`8&%c7qXOp{&@v6k8$|`Z=2z!>g9Lzrs>P zGq|1F&Ho(b`y{|q7UC3@z`62K0{Brd%#%asLX1Le?1qXkLd@XNX&xdQq*kZj9isO1 z1JNIGPnX?nOcnzdDh}D_OqKr5taK11(%7@a%|tH->zei}4(Rihn-EMSC;q^@F$qLAaG5Nd?WfX_R!;n*y21x)%Qpdom+O9zuXv9{P;* zh*rt9f~L36C`@>yd^pK1S1VWGm6@B-Ov1E|m37XP=D0~S?`o098OE+|gO7LT<)`t{ zA!y=*FC!Vo(bXmBeu|!G&@pI+yEV@bI#TSQP7^E8X3Q0|iMiV9QK-XOQo-f*x*`NQ z@ky0By5@r?>ufh%5Sw3of9n*;+^s!bQ^YX$6?F9MYPl)!=;ns0`&b!FJMZ+d7?b|w zC1)zYreSp3IPyYS_Q|^!`OF0XdHtMUS7i5hWF_xT?lkkjj`)Ho)+Tm^AYn+I2&3X( zWSMHQ&5;0aDt^v)2FvvoI8Ek0&dViB;F;$u{=mx+^+r6~R)l2XlNl9ihkyOZ5yNuq zkRs-rD9vX-q`L56O~#1?_%OeLrD=r-kc+>EF8`bN6y?M;)@nP*V_hFqriXJ_CwslS z9OYEpXa|Ji7owL5Fx>kEu;8lh)nhKd7gjo7p)<(=Wv* zxv#}R;BlMn5xa6H&xi78*2i?hTn)`)pQ=Mb_%DE0y6|O%rrW18N#|d(IjA z#QQh@;y*qD@_ml*KxaDj`6*2D^*rVS^xK~5a68^gz6D)=E@dq7QG9l@w0U`>Bt$N* zk>7%z@$!9N{9Yz+g$%qGUqdlg3Vc(J^LuA^$G$WJpJ7A>#=~)_1z>sgU+8z_+yRb+Ha0#>*JY1@cs`K z@hZW|s`b|^$sNpGK=?UKh~Oie@Ph=)EyVU4k^ftp{|}Vjw`qCOuglT3r$CNv|I;49 z$I!d84%h3AW1m-GP{Q`tM&f_Weed~nLdkrW!J zg^EZxT#{%l_L+;pM+aCmrbV3QL55W#Oe@U{2d&4`0FWc7>ce&du+b+&kMtNy*yHp7 zR(m^nc*6t&D7SoLZF^(N#0P+%Cb9iynj0-LXIZcvm(rBX5%J(AtDE2m672GFxA-+d zHAKD&pc0&x|BkzOy&Znx0)+s@o-BkZ02^L?%EXz`h}8Z zOPBJurc0u^XL|Vm@$eVv0#9=l?cD@!{ET4o?eKFr%D{;L-H z-Nb|{9THGri(!@nFe4FR4>9mfh9Z1wVhU2R$<1r!Ik|EQl;Q%|WEe-~!vGTV^_V5=$?oRH^n(SoIfsCJ3 zMhw{suGL&vbht|SQ$Tr$@(?{@@L;YDc;DyAOI7Df!U#Q{RYw!pQ}JY~nnmI^9ztv_ zzejkbuM;e4oSt4cE6}Obs4-E1gjAWSo8qFI^oDc?O-VnYGSUg8+J(bA*apN}+IuGU zkt_BvCfH*)29%Urr%7i?71Gd}qBZs&NoGjtiUJa^E=ao|I%^sk(hSG=DVWGAzS9T{ zHw{JG4K6V3P3=hd4xmXgM2Fdt=@-|e&lc7Y39vWHLiib4d5vE@68ZcHR9(}>^z?#P z*REP36^N1m{D$xGL}X`WWqlUCMgBFp5Pz0Vm3~I?%>D>)5XY0#oVc<$o!~2imn(z2 zrm>K%q+CF^h_h71s+6KV%kX|6MSpQl-{WiXO7vsk|`DcTvlGrR`+xOTd$wWg0HkcA0M|?ksu`Vt)#13(2&!879}-gnG4~lxTlbuCGG$5+8r#& zuNHqGJbh>8@`@!0tV3o`^^3n=fJG_5Rz7-dMTK*BP3;Mb>=YqVg$tBB+w5ngJM|GlH z`?CXJ%EEVuUXM>zAVkS~XBuzot=)ri3Ej_Yf-(ID%7!9e3SR zzfXZ8G|XerJH6KHEE&p?(N1U0rx;hJ!ju_FFTN6JPA~p2>W#Jg+)cTY5VH`Sq-Fj+ zPHrvz?>IJ$3uqBi-Tkl;txqt#9M?`;HLCMZ9wCCpzV9esccjQgK)) zYROQOyqb#j5{==1zS3zAwj7GKh3OrEe)Iir$YoELy|Sd_l00;P2UlrXRb|mK2?JFW z-vvfrGZ~^4ErbGXlJnf~$_HeX9y+9NUHu6XR8%;(@RqWB*6p|JGfu*{Eo)@`8_lB)ZQRsIUbDXj1>TRMnbT zffhUr4c&vgyJRg|l3}K}?Jm3E*0Z_MfH%yju)>`$b30BU__%6LllmXW;EW*|cF*rKmstj!g#b*`7dq=G=lNF;>-8HoiP8rUQP^ez}e@$-HT=U z@8h~Ziq!ko)t8ePaAEFl_{3GS2xe_0scE&xB+ z`<_KUQQkufzAOV@-1^4nJ{}wHl!f0G-w*p97vCelUdMPF!~;1tZ=j2YbUh<~Ic(WC z8#lPQH&p0ae+bi^QmG_;a5VFYbc+VpOaqi5TDD|pL7K;A|$&k0V~Qk z@d&2gFzf9j1s)Cy5e%Tx+345w0>~#-(&kP69Defet9s)MS}P@yf;}$xE8o{M%d;Jn z%$46xc9Y_AB7hBcSRM^7XC;-EU4L13(31Nxx@jy@MrA|r8@wI9GQ6>kpE8y^Dnb=b zPp=;^&Co($hCMBlOA!}v4PinqM~g^lRTi9(ADl04+0XgRBNcxD(5F;oh%5gqZ+RO% zFD-4%{wjL|=T>t0PKpahCf8OSBu~UQ64*#S0K`F(3Qkj^MWO7}jTQ~vR#V&uOwjX7ZLxXe+NSi3ONrQ%y<(@MDWyZ& z0Xgy*4e5*^mf|yGrN*EMFn~w#?g2`QJIRUtjG}%6W%jhbkq^9YPQN`y6(UoI@D|ZF z%^TTXaM=5h{h-erTh5xlsADYPU=@_}pMu+f(q3=F(w$+a;=(%;R~+{93dNVF@ZLBx zpv}~*)`&a#Nv~K{iXWvuVS%oT`4pyLc2r?w@BhwEN#A@G|7hbUzU8mycFcQcs>=!_ zs*~7ez7J5I%`(Lu=W_$81j=nYBeB;(=wE9)2{lhb_i$nCFm66%GdUl|WNbob(Ggju zNWqCQf?*yiQdw~6r3nn!z(h5f&82_^iR@Bxl{ot}QZo<2|P?+0ClKY zLT1AeE1L>N3SAG~6qzR(9xs+f0BcGmMIPBiBY0(D_V`{Vf*yJtoXA-v zjF42ucOQ$31m3em+Bom3%8}O!$`S=3Gij^=!TtO{dMCAi{z+P680M{CBB-n5S2ose z)JNw~0eUFw_4ocDHbB{dF+?fKZYMwH8kjUD1>tV2muZNTFKgLE9w1-x4&|#zxhpDhe=hSXMP5*#vp-@xI430IU zOT|bcdq55?2?d#$HAy$wsYvV&r;^Lb!snsG6MB&2P_5{&bmDUHEePlw>@MBVdqc5+ zGgpG#nlZ#ws^w@qv?{XA>oM8nTa$8JVF8I zJ_SiST*G6#y4=^J+E>5|;FLpPxw?_iNzp_8?yzNbnyy1K9|a+|C@wRQw;rPDougGK z9@jL|3QUH3bnGGrSttB?&m)E06w~S*MhQ|{+nX~NQ~1@qy~uUWu5E(}To+;I74CK@ zU#H#C{NM(NeC>NPMErg`2FMd#Vfb7!5hTcxNV-l<<@iw)z#8-6QZCBDMn0yj&F2%v ziwPD>``gAvtoc!okzwoQc)&yJ!OGgWF~o3(2iN)fF9CtYuA5EcvAT_Q4ZLX+%3%5IMzb|(KAsv=B#3pRTOGk z=1n`bSTN)PoEDqLs%uAr!83up2@mh+L0E$OkS)L9AM#g6VXeYQ-D#EF|7>jczZ%L4 zGBkdp9}Ejy_yLPR=7j$vGgJ@~(oN0x^hD~^tysB;u~7|wJS+EB{^fEB5*o}K<`~!O zcr;Hy@NO@5qdg#}X;{Dj5ds)=fW~ax-rp!>>J6$s-dLFXICItR$og%#1D z*FS5-tciGANmQdKzATCq|NdN1sbcLa-pKKB4VUn4$Xrc#zMuHcFD&v)@TPVdqsIEL z?V?Zv+!3egziWLznQ{B}M3J0x@6w3HSNmDu?`IdgY|WhIDy^pV+_u5!gL$OptDl;F z%5*T9Jcj#*3hg5uZM`{Hd=1xK+g`4OXx1E8wY;S4JoIO%FgUOS7T2ww4yH_wlEw`Q z5xo`X{4UnxS$Xy56+rD>rZJDsjK|Dd;}Tuf(>YTzO zYlAHu+qP}nwmB2q=ESybYm!WCtK(#1+sVWe>-7BRIXCAfovYqY_fB@zw`$dT4S+a3 ze}#o%Ab#`ZpMeJW>YU)dRVJU|1vJofxExK4Q~XoU3tXm56Mh0(R{Cu|?H+M+{Hccw zFVC(-SNdexD?B|tJUn;;J_X+1-eM0TvwHR#oQO8IZG3NI3=eN^{yAmts+9d~-1$y{ zj%R+RdY@Y2jn+udBKnY3P7>>Aw4omvP5GtKRzaZ(%tB&-A{>*#7ASFNXPt@+(NV zv%ZN>0e8|b_`>%qFN+aGhqsemW*A99F*`Sc?_~kD36s~}*q_9}X64|KR6F49|pNBE3>J$(md-L^s5w5`kzjwmna+-^tP?PHo%G@59#XqxN};K>I_!fAyO`mrq_zBd`DP6Ql>aEZ}! zl>EB`9SNOnewabIp?STgr~DqB@kQ{IfvX>p;6)hJWghK%K2(^=Jo}0yJNspoaM`I8 zut^F9X-Y-Pa&;4#(l(45)jIlhvYOfTrTkZ%clQul18Oiu9i~`t=uJ~$GBDfLUG^sK z;6+r8)9qS%2o)y98bz=V_*dwdLp^HxsVtBdB)BK^|H-7j0}q*d`MS{>G)=2JGa_F=s2QgH2Cc+``yN`@YL=TCZW z3B@nvBsIf_NPI?$5=xapdrZ6DfYvm}fl`wLN{0gjpujm%tm!5g@MZ#srZzHK<1NAP zY8Y7}YOzD#Fw@c5lz^5AbF!62V=5O;l_K>DH!(t!XAkhKz%tYGRJ?mXLV9zCNIZ%X z1f{b)zCd@@1ca+UndC%wSUt2omuxy@YfRg;W!Lq_g$UpXf)2q;(H+Jq1ErCOl~-g# zOg3Gm45@rI5ab7-;B%DRh*w zGG9oXRA8BbUiR7L-l&~>ge#7Yq(ex1vDWIHMR#xRC1(y z#TcnoS^z#59qFs(Bn^74w5ltXb)*!;Ov_5x`%oV-J!jfy4pBCWAPP!sV;0K{JZ@47 zOJ+=u((ui6e1`^KkJ67`Oj{q*hJm+-klOivtWlPw381Kdx7cFa?e)Z@Uh77C`y2-E zyE{)w#OISPbiyrV7Rf<3Gjz$~_*#lpLq}l?mQIAvHS-R^J!*x|%81?sgRG4{7Ws!2 zLEw+bA^VAJb+kmM441HFv)AQXw^FD>dqS9~imh#e`!wERJG;Qu^wYzES10#h|81?@ z?fee^r$yItGi@g>({Wa9_`HSY;?VZ(ii}Vh8bpNbHQCG3k*s1Q>BF`BXTH8H66`1e z(Qof4p9vMo6iPc|c<>$FMI43mI3i8fFAi{)1R-|Gk9EXlS_Yl|dDsU2S>HXc)ojd9 zeV7DqK>p1qI@m48Hsbn$Jv{VCtTqyo*B#J1-Qk_`0T}yV8;nyB&<4|QuyYWon=7MZ zqtB{w;w>Fi3U~P=NDp75Y-@d=Hp`AflBDEEWcn?VJ2-AwfC`{T-JGm5i!~##^DJ=P z4z%fL^3q32Kii;`uAOY|(>5u~Z%9d6UGhdnb|Ga&v}$%D)|?|wA$(b34;;FKYVQLA z-StG3EW%cBN;E%(AGhCilPNl`-`b@mw0c&aSXUkqp0)>w1Og#t0~1xr%sH}U-xb+D zfA-1u$(IvTu*;?@q~ek!fq|m2=*8G#`56D&&d~;{0cDy*a=6|)xEnfM`jC?UkrL8%KQI*0?fE4{EZ{3+ zXevE0iOqC#H=Wb%b%}SshOktr&Htx<2+7jef`FLV6=(=C;b_&=e0+Rt|M76%A?HTc zNaQ1j0wP0e4=A4{#RQ*6#now>4wr6hSDIw(8*NkLINy z^ag<0{qsQ4`JWc!u-QIfTh}AC^Td!LAD@uGDjlgt2{&#|6MjR~U_KyKfgU@qITq1(Ip+4Un z^dDF6ONRH$IbVTqUw^5%dA0&xW!mTZ9x674w21WI;J$9y!#-|74o)QBZWyluE_Uwv zJ_iWn`#yJWMht=cnHRC#x|bxczuNC3#P0#`%wNX|UI#toP!W@L0U>oC*k94Wd60jQ z_A!egvTkZ)KpeKvm!khA_w{Yv*W_2+^}Fzs_x;e5*VAl3!Okb~C$;|t{1@TZpK9jY ztOOsk!C6d*aUZ?Ub>S0y!)MpeyDz_xowuEU2yXumL)Jd0Ec)4p0ngPvZ|5tTw8Db2 zJiq}tAK;M7RGmlA0fHA3>f|qG>pNNFW5j(8acC4)21`6_#n!ca(Nu0Xh8h$mwFmRV zoJ_yzjirD;>@77%HuvlaIiaYEp2^=BlnXpl-4$vC3fM9S3~Y0@a!>Ff>usxP96E%d zWG(Dw4~Zu5O{AdwpHZb*6j{k|26Tf~EQMk&BRzW@n{?$-nUXs{1OqFJs#GH{6G)cx zE94Y?+~uaYRy5n1RcJd0<=Ka)(v~Dj0YO&1sHS8<1nL& zM(tF>6g|`2dKmczTwEjF#^i5LcH(_W6#aD5kBCf^J{q>}HpQ|n{_ns3M3hkpu^(bi z;#_cDP`jDZcS&5h-zj*7vI?oxqV%}*7=wK{Ej@Q#bOU@g7`GlwyQ0d}FQm7~Px2L% z0ikc5(kg=>CuBA^Hy6qBY6hW%M@GH*lH8uydRMn0OU)9lDRdU1PK3B|rJ7V+Y$LYx zt<<4_;Z27c;Ul`&F!_x#gu5G&hcHg;VQPZoujgLa5ia2<9v%21*&6+^b>6x(_}@T2A-khCgsf~aij^w`I(isf(C zGPch2oPWVmy_prg%|%sh#S5HkoI~A%4tu!9=%=2TYlH`ht`3)J10D}M4tiamm|xnJU#%wkE)CoI$FPXaS;qI3i*dCNHkhl4){PS^UInxTQESw=s^w zSxk(p2*>Tp?SNZ0n-=$u;h-thlYL#znyERvGT*v(cL?qxLq^U_Rf4acDHf`emvdUQ z5Ob)}XwB-IHOEg!Ttp6P&&2HHn!=KO|Ei+8dH&S%KMzqK%MAsOt2?%(db%w^VRa}m zPePeSoTe=$tzaRIjT7_nvi2k^az7ERgWDii3EFTMEb<&+?_<3NoA+7X_{7YF` z>eA%Oi>oOU3UrV$w=-5OMogI_I;GZsJ=>H~zc_ZYjw?K#kNU@f34G6)+}i;KU8AE6 zw&v;V%s@kDhT|qMNu8OU*wg)o!!ySixxY*?3*Aiz{4wkbV+iOD7ZJ#O>y9%rDRCDZ zI!CbQhVUnJ`Sad(YPfs$v&G-$a>dOHH^2=eH^dk1mnp%?0$2rpT9dUPzS8K6w_|y^ z=%18H+*$OYO~lYjc68qjVKz7x`K;u@_A>b}CQJ)jk8iopTO|Zy2(1+oRA0mNFbbYr z?{tks%@KD;039QH8j>P2)Na#G%U_(WTeC1vs}Ju(A>jUA*F^t{DSOG`)Wc+kx4A~X zK-w%4{~wUhq|J~N^j=`GbrkaIA8?UXJG!$2U;||Y*Ogz+egOgGDN|Ng9K=Vkb5fOb zkSfUHK~x8aM;MNbrn#BV93M`^+J|3(o&h>lax z@)(cph871%S)sXu?BkLFsFr{oSg1jmXjm4_+s{8$04T*P7$WpZ5iA-W%CI7l8QyqN zB19;$e0yAkQz`2k)-0~{1L`;nz}_vZ=he>1b_g`X8xiW>{-+T0R{KCLwtknrIib(-S>qpgGv8~2|gLwMME}(v0_A`WUOCkqAPwD_ejnw zwC$%l2XRSUV}M)1@m8qv>hV@@1nMT$3_x)2tod@KK5u7%HEpjVqzcFu2nMly}KcJ$f2+Z>0>Ns%`RuvnQmmG!UrxKrip)?(8BsFm&ovTgEQQHMj@cXnad$2pBA* z>rd}4nE$>r8rsGZ=BIZ0mjAEE074R+ftkXae)cRk=hbyC_s7$D1O33ef78I);#mLz zL{5yX^geQu#OB4v$9w%fMFsMf2nYzAyu8MM5J|7M%eD@`H!f&sXiK%{agz?c|Li+E zeFNzE^|j`Lu=y0D8m2g0GmB zm+P<7NKat_Gw(0L?T@D~_OFwGhXU6N0H%Sry}$paA%|7a#aE!C@5BB31o2lO5U%hA z-S@N{0HD`qb~!4}KiUR<7?9l2fT)wX8Izb3@`&sW>+w^;oKHaCRskyR$M=J_D6XWo zE@8HrkMi7c$WA0VXEipB?}z(#$HWZp6;};^P*+M7nhatR3%$)4m>L|4@QzS$FFVNho7vkrWygocrx^z=dg6dgo}z3{!R@OiMN#816`(SSfTeq$`Dq z_wc=aE39a&WBqb^$S1LaR3^&TT-;ZM0-smXxr=@hm5aZAxjX|dzDMc(__6#ICWy9i zTJ$a9pth@C6dX<43N&Ti22WT~*JjxOdL?`-HS6R`v9fM@RO7HV1k%KKclhdj$mi!_OMt67np-e_HoIBk-|0o;hm=9fsQH~DnG6Qs&%|8jYET3d* zZyHAs`e&g)fgqwvLc`~XLbOOsz<%f5MqUN>3?;X2pkw6+n6-wAbB@-oH%LEGa}a+X zuuVj2hG>SBfEsGbV5--9M?z+aGR-K&#_(h~J%Uub+T^h!0Dmk*&mSx0;Myw-H= z5V}kMOtBGLCs^`xX_H>LJWxDU*22(DvI}bsZwY;il`Ts<9f_DKTIeBjnaC%O?*(MjsRb7UY3n29 zdJa*OiXFwk^~gubHE!cRx-F1of(oOx2D2iFZB%V%3S6Zy|0w2+)p0cI@Tn)642#3$ zvtysBC+7KdcAXj7Gcew~`g!zMf6>|CZ7c)|sA_~`4gAX1;&Bu{EIx4;alRl50_2%n zm)%pV@o|?Z-O+7=$79!HA8lig6kRZ7dJSz1ttq>4kz#Bh3}D~Np67rTd=k_U0pWbH zC!UmBYppU*^e06u1JG=k$^LYDLWAZ9!>2ZLIwkvtn>Bq(*-#TzW|0kCyb8)CvF6Zm zBf(C@r6R1Ix^W1n9$g4D2d+df@;unyoIW8mAXTuJag^OFfZj+|4H_A<{H~4lyKc8~ zVui&%5zFSp$}MEDB4|P@LHeylcxjfI~i#Q9jpR+7YO_9x|)$G{m?0aH8*>5m8fV@yIq6=?5J z1&~j$RYLyxcZSg4%}a*UG#!om@#*Eb_WS1Qj^!gO0@QLJHtLOvZwzR<(J!8&$TK3u zagM!u+*Q8tp+n0Y$BSZ* z1Vk}kc07b9BF=tb)v)^pq#+NOh}y>G zqG>bh?_YPnvvJoM=w}2(0Lx$hmUp|}-3PMJZXK{YD&mv~g)h5V!ZtjV=XP*17a~5A zzvnr~@xejcElG8u{*i?eAP^clSq?e`3%|6AX&y`D@43lH z#S!%$EW97L*gl?lLW`$vVhYYz#@b1J=h4)Fi4>R{?~0djpYL3QnhQo&Q5-gv3)fa8 zti=SPxe3vBIy(G@YVwxJ!3JBzf$NEi5*Ip{C?fhT62nlSR^r*(0hH^~ zR|aH2mV~h31~sWMbU-}S@CL(tr#o8lGdNI20R6XKmaPe8bTpfC%AAPU?P~70si>Ib z7#kka;VLhU2THcmRFtfB-uWxd4pEA^%eQ>{rDA0Q3xG?tN@0Y~=p(l7IPkaAAg?itW9Axfw#%jrFrcH5MvxeK=pLz(Y`2MZ#8^lt{A5Xd7Ok z^8LOY&vv2vZh37v)MkCsK^^6m9x!-6KRa}P<3FF-3oxZzB@@8%fI11dONf5>6Upc5 zKxN4S_xIjwme#Bus)s%-nf*#>yhy`IE6V?8SIWCZ>fXkkz-Gd|gS^j<9DTiWf%+6N z{?!byvbO`P`gPxz(A6Av0RoiJeb#JlT;tHc#=tdb%?@D4bpVLNx6STyb#WOa{@f#O z)MMfm5O}YeoLj~NlB?gZu0NSPueXm@0zOt=j=B3}`t6;o z6IXP3WnuT;d3sx4Z|LnNN_$8IymP-lem$JLFU$sn@AzJF3SmYd7WTXMzPB*n#TmZn zzt?@D_kBb>JwId^?msY3=fB1S+q;yIdoOTTqDzL{d#rfklGxBp@NKgGtO4!V1Duy~ z)~z3#t&$chE0-&maKb5D41b4{pF0J%Nb}IEUCDGL zsz;>Z(uD@M9;vk7$I1`=RspzD7SRkF!XALhF1)uF(jHHygtI-l&B_kxkd(2U&qmb* zN0)$+-9i`*zlwJY6vRSyqq0?sqJ=6b4c3@FS`fA`RI!y#C9?Y|)w~YV(dvd`DkY4b zfm&oH*vcM7zPlQz%FcfsS)bYiK6J z&=n5Tq8x|o4o#tAWCT~E%B96(!Z@K|qa?~!8AgIws$0TlgEyd>YvAKAI3Yta3)sYT zNPMtKNktuTBe&&<&%@-zLM4l^ImNLMz)@diL@niB6DckbR5+sdm@9JDKgLP%jgyMG(~q+FeO)@6ST2?CtjArv)2_c zn&v_U3+{wv7)Z#-x(rwdLpSQgPyYl%;jc_3yhzOZ+q+d>LCq1Z_y=%%o%wUrD#X%q zGkUYh(sa?(T$7A@0ciLJSTMywqe9JrN-x9P6437m?OMLb?`-Wb>jR%tXN&DL0L~zJ zm}*g8+(+JV?fN}9PS|D6bGs3i?`mB{b{t+#)SADuQF2Pm6Oy0KCWtcYlPT9S4vl5; zpiFjN=dAkC9inlHPPgg!=%nev(pe`tBB-+HqJZhbJ;d zQm#x#QWIF3lobC^3J|rdzCK~JRPqtbMgW3rxv^3a~-mwk0!pW*Uh>*!U6^=3;|2x?P&oi2| ztXMz613a6~0NPh)ZFsq^LPp!BLLkv&kQ|z&5&<52fIXYtU8nJVtEdikJ`G=D zq$Vv~J>4jkMcQ$csD-*pcY6+_wOh5SZ5^r-W@j;pyC6I8aXNi=7E!?@{j;%1E|+}P zi(H5NF&{$sNv{IkvRH5F9XL8UqR`*utfz3bZaVpM6AYWpw&_(R?CfQp&lxm-N$ zM&hD2#X1&Oa%`3?6{9pbf&fWHJ*BM44kuP~H5)U*avPH>sb!+Gfp|-e$FmDay0CNk zY{vR>ooboy^H~j!)PLS!_t{ffn&URv=HaN~+weY`21G;u{O$lnltsJ+?Z7s_N` zuDnr8SdwsP+I@76pW7`P`I$=%}=7I-dqhIK1^xdzc22%l00l)VwWkn%r*l;{Zr2=SMlLIcnP;0lh! zDx4jhGT81lGEXv7cMjV+V(i|e@B(3eg7qF{WXi;VdM{qIpawZy*+RnOLZ4c`C*{UI zaAFYWL$`2V3Hy%ieBMM5Fk$J~x|KOS%v1LMJ+Id5blU4!`1hE{>JD%legd+6uebY@ z?Hh6?;y(O(eGi}vfs>OSHK~_@SESpVyx1KOjnHEur9Te02`k}Tk4$UH^jFWFE4&X( zeut3(v1bamRO^`&nSk=1JS%lR)LtZA*I(bE*5aFvwr$Wun#T{UGbh^eKB3et)P=|| z8T{fFE?2y3*Ua`}q;LfrsbQ??lbg7pP*4CKhM1+Oqbu=KY-#)Nmj3x3?lza)w6HQ` z2~O0p5WwLbNjeQ$m<~b~3bho-fxgtIeaC#Dw_3U^syd(Mql-n~p-#Zo_quro%P$Qh9LMMFJAP;ZejR<{ z=2PDa#sfRER``TH0QGOQg7y^ z&cE_JH)TH0b|rA7CCmk>`(;O6cHjBV1LNkT6Nqk)a~$t>2f#N?H{a*wnVo^iABXMt zRU6;`6jWh!;MxA><2s-~_$5VnZ@b5bm^3D!QHKGm`xbx?q%J3hgn;+n#yS2`a#-8L zhN~Wu_VxPm6Y||_((8DkfA5JQR_pW63b-dm!Z{ao_`1w_e<6NHdHYjc`Zx9ii@&h; zGuY8@bdR3-CTzCPDg9vW_Oxr$|Be6sstfY%)$ukASZiA>U%X$y&JAxz10KYC-AJrjdE%VvcRqW<+RDM2>~NG)%YW&U}leA(d%}|&D&o+y#=lmk#yK=_9$+WN}Sapb|L4iVJ3%B144#r{P zyElB6*uCXw7MqdjvBy`oK0wcss}&BldGpw$hNy*v~^w}e}bIvEMns#Pwj2KH3$ zpEw;{XA~7;OoFgVe6ZLZUpK(D`MUQwsF~ix+D+^u^d2@Kfu^t|KrUd@n-7PPt{xPQ zXfn)njZh|Z9B$a5Ieu3aBkqA4cg01ILW$HW`Ug&NB)vm4=&`dLRP*$K%wf!{Rid%k zcqUG{wS9BZSi1+u2Ra%a9~YXtn&xO+qlg3w0Q-(T@MmM+KTyN33!eXGSP)IhS>;>* zZlY-sVwAt83hE{#Ds`E$-5~ul+B!&!DDpQ<)&asPSjg979dKfRJi#VeK0S?$TB+^x zcAz2N&AgNK7?BJ#Owk|ViMaAC(bP1GAIu0eiinfo>7nW0aK<_7MH8_E>r)G-a)B6m z+7X$KqK-W(D^9iRQm`uGH*7RM1f?Mg4+(5YRLI$)Q|4$H(dJ#VM1^?;>IP6_m(iPB zvoJ3^Q55V!9VP4X-RO?r1W>M(GcHS~7b518>OuRZof77S?Z%ozI-+4(XpHcRX=Y+8 z12s4u92D9D@J!)VrgXc(h^E3b2sNYzvLl)9!S1+>gpzzr8s>elZ%eDyzo%0L&`Z^8 z)XmCe(tn%F$Ynsti31~I5BSSj_Ah??7M0PwshK*KTY;q%SCUGH;!Uz(=YxA&*EzzS z{58yZq4UU&H3{kM9h{qMX6y>?DHH4=$CRbUH-;;J@RWYqhQoM{9bA-I+8}J5 z`bQ_G*N{4HvtNsaqKIqA4Wzab2?JNfbt<%%QiPi#_-WQ|F{uF-rdS0z6x;PvgNVKMu)>(HoUW!L;T%9_(E)1|=z-sqC%r?x z!>M}c*(5R$+ToADGF&d-F%z5$K=#G9nWGY=-;i^XLm-n8q~ z39kH20|13sw-|ITkF|*IEDAJ!k_*kWvVC7=&@d9z1@!j5A6jT_a{*LBE*CAV!X3zf zWY922?d6pjMhefq-H(Xx>Ib&CgE*~>mUJxgQA(H`@$~s8Qv#Hs&Pv~dwJEU}8{urh zLF3e2m#L2hM1R%Rxyy3ZU9TBT-4ATN?CyzQZT8JVBNNXfgM(x+>+PoHaC`5qa~?P0 z{`^6841AQ-i}z3cd%EoI8uf^j{R9?b=ZEuuZULDBr=6FEWhXnDtrF!1Qpe2=V|?C# z^_~HWFXaWW*=#_{+0Ebj``hk!VJ0G<$pR36wb`4$jSedzO-}g)LUXQiR&u*a6TH7_ z`Fc${in6J*sjcQ#;yXD zNsnjm5g<*-vG0Q`;L$PwaJkW@%g~w@6!4mxX8E zMv;-)-?k%GPre|zfq^612Ht5$!k=gv0bk!WehB%V0Kvi)0RwpfelZzQ`f$Q%lhUh~ z!Ur43jxJ~GG4B(#2|Fu1{uhYbF9U5a@$TI>PyYrS-7_PWS*@oDdq9ih2S4%M&=BKD zr&|)dkL#~XN12mijKg(ArYW{OG`Ym>%IkofwyVt}Xym<^03F75(B-w;b=t^!qJ|NWD~;7hzwD;%SmN1D zNiJ;Dms}lr>V0c8H7z;n(&XPY7w*3!4@=lO#KD#0b=pz)ufS5o9^E97LbdwP@qCfU zB5{~Z2{89dK}q7QNmP{x9HD@-?b2wggmNO;qC=wl+pN$O`JzKHCc-keDLJ6Ww<$R@ zO+s4?*M~1oM-nxOB|r1(AZ=OB#*%9Tx2`l%{dAWaQjJfnE|y$8I?`)V1@Vt$8cq5up_vDGF|+c0Gstl{ zET>St)MS#Xlx|Y2BO8hKE_KFKxPzYv?uAQ4w)XFCVp@^{bBoSx5A6F=_tD1vf%34D zzf{jAD3>?N-C;Du=A3`>4{-4n3)!7E9J4pPJtH7hE&iVu;8Mo^=HWO1heQ%!E_#8pi$Rc(y7NskFP+}Kh8 zczosuVL=F6atWuK+omM;;3 z>{uxycV9{gaVfeK)RFBfdPio&B;(^abWViEGY2M_sw5~>UQ|L9jf}9E@ElTf9EMJ1 z)7m1F>n9E)FJllT=v<;;vK5U5tXA}2IQoI~ne$;CB*75gbUL>0Nf)ea2y=`jB&`w1 zh~Ht$gF8sHRM0wTs=(J=yIu97d^x!|QZ;3mb5(`7T!PjVl5~qMEHG&G>}LGkk6YBB zsHf2zwQoAqPGAct)#g}S6vUL!_&+?OXZtOUU~5DhWtA+H`3g;@Eaa9#tJyXj zr!1(a?!+yAPJNTS0Q*kZ^o{VK+BAMWd8qZU9+)g5|7t9W|6>zB9rBTlk+ua!(=;N$ zM%;SL3G4xaI6agVKrpah&opl3k|=c#>PR21rPIM)sVYD8h*yf`w~PiD)Mpyg@dw|G zvq)RUQ;emnQO*l)$hSZy{K1{bLe*k!dem>Ek}jhwdL`R2x^*^sI|xED_!DMw4dDVJ zEq*_~TBPDP+V0`LjG|Re(F0hOxt#ozUqV*co(L;Ww(9p1u1czQ9zUYOzl#F(4B*aa znTZsyb?Xpyau-ENVg$pQIBLbwrr#Pi(V~)__4#_&3V3hxuLDoQ9tgF!yN99ITYa zCIJxb5D;F0wc32HJjz$co|-5eT~fktPy`;2HaeXje_ijeV_vLuC&?uR-jv!Ot1CAp zJroob;BzO4#Svodd_jq#)L(Ua>{-vU?00%o#e0;m>m(UZDfYI7hBuvKR zllK__QRsdU{?D`pe@7ee0U>~D0RY;~fL8jFxm6Fg{~L?HR<)9nHLU(1nu#X{*_@3p zTA86PZ`xujIO4t70W1vW$0y)lkB%sxci9Wirj91O#m?$3t1b)Ct~9x8NsmApkuS{R z4CXQptZ~fjmkb6Z15dgCf3Cf5TjvZ^tGAR^{}bB@wgbNWK93zg!>@f$`2WoV0qV#B z%RIcid(+%IW;js)s}1^I+8UY)71WteA{9^|Txi!QDe)NmI?XXYw#rIk);n4^C@(F@ zL--sSNR}oR+U?0r)E@-sx&8?B?EPXww#;~r>wEM5W!Uws5Yy1h zSJe9eEH?@>g zcf0f*sE1#*JrCqPy=+9xWEehU2p%8-Ihy*{`CR2b2q*D zqe*i>_eNnCyJA*^zqXoMl*>u73~lX-5FuGVVXVr8ZD`{o)i`Uzm@w0hN#a;!VT>0I zWYti6lLHO{%%Lpl=qUDCC{=2hNB(1>;7{;mym3)|+2nDUGB-DGDOz^9EOvTAU;=!a zSO$9;jWZfBvJBf0H-<@pzg3xkyG}E{N9e4 zOp{-1P>MlKLBW~|XVK0-O~77~1$+nRXwe-y@#@9pn_G?G&|tIQXF0rak&X`S(%BBX z>tp)^^kM)}G`a&R`QrXFPJ9F|X2&<8%yVBWf520WI=#rf6;C1rt~FUWIVv(Fg6F_! z3ML>;q?lkqkd!Z7Uf?GuSh5JZ7ganR3q;hNZ56@6%Y@?_a;gr=Eu2{(V zZ{zBETv9MJ=dcSL*`vcg7->A15}2~8J4U|S1*95jzb?iOjt_$%oq=>PWH8f zf9mHKWs=BY0ITAux@Y0~6Q)t(r*93-XIaLsFp#`p7H0hi;7FDUD+Hq<8OwqF%lyb?3)Uo2A`fqC2-DLQ*1DR zuZRa8Cxj~=L>gi~z>z=BxtCrKKJas2(~7#D&mp%acbh(4YfM4*A(ZWJDg(xum~c6# zP7wcZhzcW0Q*4Y2h_2n${N100xXm%hY9MOe_ZE@3W#D;~6WvBlxOo;#ixUQfr#5AW zpjp_f=<5c6CyX3(3izMDDOc5hldU40D^#)<9kEzN#w&$y7KO~YXRY-Pk0Y}s+DRt! zcDNm~NoT2kH|3ntT@+Lb_fraoKXeHl13e~mmjSK?(C90=O-fPIbHI;4AD^%4HFyW5 zj@`@UGmjmS>O#vQmISr(S-ATu1T^l}KN27aLbsf#>Nng;jj*tO17*fdCc~9Lv=iBy zTP_&_JHwU@sF zzDYkIX;zFqYvN3Q@ z_Mt#@qjSs7v@Twdu!&|&qnh}gx{V>gZdFXztk=*utR^Pu+_HeU#>|s%o^KPOOqz}B znGpZH0L(y`+aq0y)tvzjs!z0q?{OUr+Haz85QbH(fM1&Eego+Q4@!c{*6%H4TIOfn zFByJ(|8z@!U;Hap6n^>dU2UhLJd=dY>f2F|tK#_pzU8dvzSPL12kD;1ejZfUbMe?) z>ciCWgJH#SZw`Xh#gisDltB*GddZEu$#h|=IIef~)yE@pzd^oQ*WQOU--7UUJn0z7 zoj$*ukOR|iwm`^=%Pm>J$x%j!_rtks8L;B?F|c?NF=hCEg&;p)Jnd9CB4X}|(fU4w zVL#7L=jdh4&Pu_@_R*##8k04qL#q4qG5VAnz8tpQdR~(wDoxUQ5-;3ak@=Df2OsZ# zzv2XRx8B7z&$_xQf%At+zv7>#+wyCuSSRRV-;7= zK8Iesu<}Q5o%em`z1I1kd3fFw)L$*nh@oe-wWwbUh#^sRuRVAkG^vL`0~ZJf`xh=| zX0a>)z-^;{fZLfoAo1kq#8S1k_rsF5_feYmjMUoR2^P^PIlonWO5MLnX=s=6G)Y+S z+^i3w4-oh2br8q>4{)$nSirlP2PE2OyggmJetr5e8FY^UJ?nt&?d=(--IPFlhdARF z6VBZF_JD*p&+UWSwNeTb+(h2FEh}hRFVdEOM;LXzT z_%1Cb)wbXWnDWf}byN10#6KA{G(>!Nn%8%@<7c^By(6>PeWzL*+9bfUo7d+XGNkPv zbHTi|zdZVifkbfonB(d7RBPIKHR*dKztZmVvW7tPeaH3v%3er^iFX}oEK388xW|lI zq=>{W+Oa=Ez{{)u;|=`A&irMhU-OsVk0|B|FWXl)cs{c4D^zl+cHN;Z^UilNwtn*>cqes! zhaUX`zp$KD6A+~;{<=?@b^@*>Qmx5&gz%6;=Riu5+d_+tf~$Nl9xyod@Jr$NNUEe4 zL>i@>Y4T~(&t!lFSKGo(e^Z`2W&v8RJXL}y0V;9<>L1Gwg7+a>J$YIdoLA{32XKBd ziarMSb1(mw%C?CBr^u+=sE&|D@h(++NRE1AJT;<7@@f!I)$cR~+As$JXGa{H-!4=q z60*jJMW&4)Et<)|SfRZ@$v$31Te#czhN|D01>95kKu%(dbK7$J>pB+{xCI9{0SbYn zw0?l}@V+_9Y#5G3Qh6K{tg1A?9VRF=WSBqN&Fual%7*20?T3iewbzK!TdX_HW-`R2 zY8xXu6oj>}N4XgzniHrx!KflG{Oq^lcwJ>-N>ovepm3S!6QWx1K^fC~ko}qfZXp9i zaA$%M*4S`VC~#wx!dnByf#Rw2+n(H5`6yugy{3qGgNphT{0Kj8%nkc6hok%L{z&DJ z{wPpwi5W3bC-wl!FNSi9MZcfeH3($|J2az_+y5s1fPn8%BYcfrwpw8jRWxm~cb*r~ z=cS5A#QK0~v8prI7-WW3E)pP)D@mLlyE1dVgG!ZMWHLL=;2P{zBb zEI@wGg1728vki0~@-p9cEF5QXd9LT1=#!aCE}ku}DJx9?T~o8779^0diPN-}%{<@o zn9PAul=EY1suyQ!+z#D5CGyzTOf}zT4Zb&4V(TEtQ(Vr~sMwmcU;s)O2Ca5A{b4s-$wMf6Lg< z;#8G58>TUktt}ox(bUa<0(_I&d%r!tXMjXqfvK5I4?rz%zBu!Hc-vzXx?z z(6Q4$O-xO^ezBjGsdSex2MNLFiDQ#+$4AKdYIBMOVt^^46vHNCjt81|^Q`64Dxsht z;v|ts;m2yh+gBC`T^y&{*>$jIVaZK#Y|qI^M*H219q_`I;fVefvh@P1;dNQA`;OO(SyRb+U>fca1T5_&aGRmF=C$+Kjf8k7!Y3 zLJl^`B*-7H+VIz3t1W#}i*8l{xucsgF^>BF4G?9+-l;YwgTujBK68@vJRz^d(d(X! zgUQJ>ClCDkol$F4W2xWOtzBDFYpfwZ}pL$)`#L8 z9!)Zq;$-toEjO`N0sapM7C2t{Z)Daa)*z_GP->Ov)LqoetA5j;d$WpWnG#9HXr>Xm zf?KV9x{HtZUs|ji#=HEMvyHtSQE6f+Xc5Q+yL;@GJoVl0;qQh@Z7E)c}*D=cRp{$zP1>3>Z4HsZBTNBG+;&&+OWG}&__ zSh#ZYUl|`SL$@{gSU7m(UjE7O$h*EYedIvyy0kR@!?KP#Z}9w%{AXqVlEd4tcD{B; z^FVKFRvKpL3cSmA&6(9zU-?{+e{1H$Thnf*rR!yj`BN?7wSW!>XLH=?N~i$7;BA-v zEo=LT_W;1!gSldUR(5&)YaLH4+r{}y5U;@n8^z=?q$|bs7zi!O<@W@xhvvl~Q0IY2Hn)fjXFnzWT}#>jzo1r(74m(#J5_e)}afaNdCeBppNTie;2D&c3U zKChh;CoA(hW}o_zmX;XbOLqDT;TI(4e@ULR zhQZWWVhfNC+#bC#m)zpLh{jL8FH?3a4@4oCo;`#+5O;`t=wGm429K1h(W$T8ZhCzM;uDvDd7kxGiGiJ?hc1TDYZ>MkCu zrs7=|vcZ60EE!+Zte|){EgX?n)}!uty(_^dLzeg$6Yjh|tG#C51XlYj%NZ^L!q(51RC9Uwr&saZ(_{ zc22u}v6Ib}Q9M0b~I0ytUv^Y5QJPu(H(Rsk{W+AV$t0e?p4HJPg z&;0pkiBJH=xrsceX7!n-baOervtBqv`yLlR!Bm5{AdZJ|f2I4j69V<(7NpG{2~{M$ zsFGRw8!}3nBGvHVesX)2dDBl@EF8j6Ju40nFU3>=;7v=ZuoGiahZ`)Dd(xn z3BUt6MGB<^?l0zW>+CoJb+y$2wg~D7{i+u+ZqPF93R)1_F!J9o++a|pWh*XmBTWBj z_+VnjA>#@n2+K-W)c?Db2s!+vK_ELGrtwO96FO(gjcun!0BA0TOHn+V%~mT1r=g%| z5NH@P#qz}DAQe&^K_dL!gx!KcEsO}wz|64d_;N+ZVms6IK6QIr1xj4Hm=fzM!60o~ zTnAD7f_dEEy3xUWC4_UkRrn=b14Fa1STA9;wFk3s1M~oi;&Bi*`CU{5sRL96!(GCH z41!urE7}oe*^`nhS^H*jU3<)l3;eXc^5&s= zjER`!$~)X_Mbn=r@ug-p$%5)`n9|Kad1*|#FUs~2?3m2}Axk00-I7sZ(?4qea4WW` zRa%c*TCx?94UDX7CmfZz(xgvDW7oOUv9I08t0`$EZ*w2D5y1B%_;>6aXAn^-6}Pn{ z5+*TvqTIK~!s&E7Z-2l~gz!HViS5>VhILTOy7n6x=3Z}BUc_;k`bSFaT9lv^xRLsJ zV1h*!a-KFh-cKw43E4Hk_rY%X6OQP5gazenM?`CRc3bZGAr6WJL?!V-7?MK4g_m4* zN)B~1>XfJsi5Pz|j3I0;I^_Z($O3_R0_U9491-kq6bp8s;)rc-MqP|9biNc$rF87^ z+&-MIXsBajb7~aSs%q3--6PBmE}IUZR_31sGvHc0joMm6T@$)2AQH%jlz%{CG|_G5 zHhEuHnxp9t(NY zc8TvUpPM7K2Y(^j^)^xODWW=7!yzbO%&P`K-Oeru&PD#c3+37kJ3aju2|nj857NmB zI|mb4{2@1#I=nnJrp0!A-p3d=%7y#e|2S`!vIz7|L0A0PT-v<6&v{E)g$4!|wO5dQ z?LP-P6G;zBEJk&mb6@STcLJ+=?$uclXEe7`pP>3K|LT?$!sd1kry9U1?;E4NeU zk!x~&)3@i{C-`9SxuuCe3V$Q?oo@Hx#*}Pw&lTC(h1WSVp5K{yGp;|d^**?;8_K?7 zg)YVki;UH$Gq*sY&ZO^w?_2KH_R(~(Ly<=XW4{GLy51Jv*|eYs2gKr5_mLo4HjTnz z&TE{TU)s*4p}xW1+bJWM#m=M7T!0R&lu1OVW5wBjY(^>m9LKb)Jgp5en=mk>Ad$=FPIKx_LTae zJu+<5?rs&1T;}ud#zzv34N-RDUkTRR_2|}U4qI@POQDICyH|$I$Fa)O16|_nn4k{d zgZxrkNKP%-NHw8SuwC%E4fD9c6&w_lg|i|ws*qLhVb-B2U7W8 zJ`v!^$Z)sh5EDjAjnt`w=iO58)aYmBQMt1QF<@&KyZ5*uK~!{jkfqc9_Kzn16>i`H z3c0HtJS23UA-~F2*g*d4?Zc(RJN?6XX6BEF_d4z1vnhLCQNhfS^V${n7W(DVYP3i3 zCKI_lO$j`s1RZ_VZVg{pyXj^6uBJLSG=ou}{o4j4tXegU9Z8G~TE^cC&9G)o^zs+gL^H)K5PW!iXPyaVoqtT~hU1ZUf31#Rqc=3VM^o4wURn>+%i`o5C;y?C zk`zxZ<E57dkMC{iUW1i#Tg#yMzF{+0;I0dth1sp$8$7 z8j_U1K+Zf@$)lKe*$o4#Myh9wbwuc%E~KGbQZ|1(wm(OQdLKzfSdNA9W6C(%!q4&M zt@5iD`UMt8rtlV&0&^vo^3&OX1{v-lq;Jv<(rSb2eJCuEy79JXQ0g!&plm9R@Yuzt zjF^KMeK7@T98bep=twb@Z0vz@y5!m*tl5fCTQM2XEJF$3X_Ws4)@UXLGjjEpRBL^% za!Exh=>Z^kpB%?R6oXRsd_Mav!4>v^K#)eNQM-p$H>?ZpX$+zBsl2Qz!#)G}M}$@FK&BGPA)$In1q;n0gv$Wr zlr?4Hs$p_VVc?f=n`@t|2)qU@B;-=qk3leHf?en zlHWsd8fNqvRP!m7Zx|DmcKIShu@w8Uc!c0(RetOa{P;gFK#VZML;Mhe9D)K;T`JKq z{LME7oTL_dXW(SAfUJQu73@z7)bes}rJ-hVuN9+E-G#4a_`@ZM6#YXd*zW%rCp9U2 zi{)aI(`lvLi8&}aA7uAZjscA@3>0N>Q;0Vavg8R7KhC))!V-co^yw)!jVx+;<8+EF z^tYYD>w1n35)z)#pwsg0EcSb#;eDT0IF!kRWM6S9NheU@?kj%h00e#XF1uF9E}!bM zr>^C_k=NbM-w^PCWvTHhQEHVuUlFw z)7V`pkyz(VrrVMyBgfO-RxV+RtW85ysjL5Yp21{Tpol<8Oqk%iJXa>);Zy+hxwY<(W;Ov%JjT+wpstNhjNzgmM5> zL31hW9j`&B%o-=8h(nV3|j0}((szLWjo@cgdZeWj>+rl3xg zpe>Js-#EsBH&U#IZQ!Nh+^AsWJN$UPI$CM8;8tpSY`OCDJvn1cYFR$iR*NN}fZ6P7 zzhoe)Rk>YH3mLl#I(5+vxkFJQ7U|0t6#Qnm9ROYJq{~0c>36yiSdT?pANU51?#`_l zytV>%NZF$Pv*{@@A6#Aqt zdfD&YSq^!;4SWzBDWMCFQ+ZpQxxOI_VN((E4U_1stl0|m1qCJ`gB$TuT6 z>Y>Hk+X?i#&%PIKvQ=AP;>0pf+a%`BwJ@;*beX{)?-6}}nRMEMpE)&5hs@s^`!2#ElnYcgd}#fu0@ogR z|BMjz^8R*3(cye;+Vi=*dB}JElF@J9P=v!9CG{S)onfglzRT%*)Fx1cQVQ80zAMhr zAc9kS*;f^RNSHyM@%;-H3;&{rM)C_uIp;T6V($=xS;8R5A^RtZt|OQ$TU+LP+Zx-x zy5!P@bTNxrf6MQ`>A%El4D!*erk?^m#NopHN~Y5UDIs$jTW{pA%SM*g`;ncazAj}v z20adZrTmiV<@J&qEp_T+*Cd80j|3|H5~PKodRu?^%B)h;UcGJX%G72xi4=x~ZLVsr zDg+b9Utu|ke941NFQGSvRvJ9j+y0KRj{@b`a3{J5S?$#~#WXq{xpUU9>RgN_i-T&5 zQU#}4k*(ULDx}Jc=|NkgUDAiaMp3-@7Oi+7Ah4T=_N%mhR&>ZFvvrX@MSfidBEw8- zqhu4c6jG<*UZ=LR;|wyR>PoeDg1|fxtz5Z4?Y;tYPV##yYF!*}jj1UH8vXW?C$~80z8l_y9!IRh}Q~X{6k#;< zh!<4)Q2qJ)v%V$6u_f$QNSI>?SV$xqiky_fv4!jo%|;CBa>s`$yV7jJ3yv}OclsEB zcp!yE;rOAaUt0ZEd7k5}i!!}|S0fW5_^*jYL`;;lXiwg^YJ35PQ1ysus^c34;>TwER3$RfB*DTK%Ur|$z^pl1iV~?z`<`|!GnnB% zB;9@k8SVU8!UTLv3eUt8b|;QZ03!7T!HEDBL%gMYYGJ9cUV!{D0|%8jgV>Fj3u=K* zSO7!v$@rn|B|U>!88*6SDK*L!sz)XMVgW%P`y`iibil|4d+mS%m02o*pGvH73-_>B zT%`I$3V0QvK?VDiD!2nT`Gv8E9#_@hkc(e2B^wbW5S(|vOxj2JA#6$79NWrjb>2gGd<^tK%QX{L?doTw(1PoG$WzDHJ(9%To4n>nvIXZ^bh zAgjD3@CSbr?uC|9De4XqHANbl5mE|~c4#_g9!rrtJS!d&SA9b+Hq=-Adtyox-a4{W z4gF}j>iStCDHIJT}7%0p?`jkvBJ^K52!Tn}-(^Uu)N`n@3%Ga}v7Q{VL{e8+^-;R_b z1fC6MTx@j$1LOl)saI}0v3PUOU0Vkmf*Rn@kFHp|qV_kK;|0TbxlI3_{AGqVHh|#$ z^R;`qGiV)WKe(;GmuPV1-OQZNhYl^R&g&Ox0Wv&HPJjVjeNJ@K!coCT2YTB2>(G&R z-{eZJqVM5<1J7;&JQQvQx0ijnOTpm$iaFaWCuH!mLV08pfE}@5Xyf7KWvEb|ZgHB} z8H~mY1Ht09A)8qncKc`C9?f`sy!ow@XdPVd4CeEDM7>^ix?Hg!Z*L2qWwE+iT@WsJ z=nxrh@@0O%f{mR`v>T<$Toa2!t#-0olf3P~UslfP8A3LB#|l0x`a~-7JMP^rdOMbR z>bw$c%nmf(ZYk#f$6guoM0QhT6p;A&Ve$^>ZR!{Y`FxB9J{4wW_|n;28-o#*)Ndc4 zt)ru7WK?n1A*dYXC+vGOjCazJn9o)8G(^YeeZGQvF%HE$a7%l3)H|*7S!8k;@{zT* zKR)qzvmO0`_IX$I&il4_G9B23 z@KI!l>gtsv_ysRGR^@GlO33?=9eyO%pwj1ji44>;0U$j=c)_6ku+Qf__X zD1cEM#nVT<6)b32YZTk%oghnPba*#Ltq|F~)8t7`YA*96H3iy(FGo^KU9!QT=ITAwx$h-8T&iTcS~$K7@83=MaajPc8l;T z)4T=M!9tQ>%y5E?ay>}A-uN1On!RN7jBvX!_aq5bq2^yiNN3u3u}y#tfPJ6xlJbW1$ijGfMKrY$++eod$dQJVq!jB&(YX zky24QIy%mJZFm>-K-=abC;p>h#m-FV%!(V)hJG9hSb@5B)9=?fB;Pf5DO{6o4Y@j zLL(}Zzmu!Ph7on(N5_lfBs=#hBa&pkm|F>3SoTgT;(R!;M3q{jyfQoO7A~k&ywF*_ zh4zWoS2n^_7Qt50mYw@S)}7i)wNi(ko&;o^9cBx)|<8lba-zQ3HJ z1T}#y&oN03(mZlM4056auZK$xFi_kc)TxtRuJa;#Fj^FITh$1nt z$?_|BkPQ|;p!Usm>ye3$<}EVS+z8z;mDNv$H38D&DLv>~cXKd6s%9o-Lq^X!4%A4y z_`CjW`*Oq-ZD={@Iygea2&j3yMVJe5q);t!TP!zE4A^wS21EMH@y!JVS_c)}vB4*# z3<27*M2)-(*-m1+EXpZ$$#02{i~mv_t88)3ZNDZGF6x`hmsg1CBbFbC5i{jti^rT? z+*j=VFErY)n$!!Tfb{}ws7iFw{qD!jiP!_H+~rxnt&8MU>~-(;997&WXH%cQ!9MaG zGdT(l4nkfJ%y|X|>1NR&2_`0?z|VJLU1RH;5@!(X!{{&=8ug714Di_gIBkcP?QtX( z^7WUECN&`(_GmWDZ-)_*lko(6O6%Y`G6;fR|V&$>C`~WHPl!AVng5H~iF7)HhUSd($)%2@J(T9@A z{kZ6n$$`m!@6#yR{$LWB4M&*4wl`1)p8sPfL-tzk}?sl;RQLTmE5cq{Ze95T`SJ{!`<_BL2_K=yvVx2}U%5j3S6aARfs^oPr^ z&89$v3~t_-&IOb6$8WBgo}v4|bRVG))>ZtfCQ{Yq%#irU&+YKQ2M3cQ6;@-&`s;hP zzyg=vE43DIb#^A0uJ6qwv$=n))9k-~l0GJC^R+GavkWLum~@TZkK-DY1@OBav&$HV z+xCG9f7b;CsWKV`3+l9}Rrx*fNDxy^(D8o8U4^<02Wf?cUi{YWOpT$92WRjrxo1k`)gRZ09%SK!?sjD-oOYv7j{nhM~`H|f+7*X6}E zD9Sq~#2o!2fZP=nvlw^Jg#f7p|HcVbY(px`junX-4( zLt{$D#4Mxb<#RPQN!C%_#u$e%eqhk+M?&8x=CbX6xnLapLcW`gP(e>aFO{Kws*J$F zsHEwRl8oZ>LtV$}5p*rWq^hM_$28wLo}386XOnmiV8L|D2GrU=APclY45(5YD_}!w zs`^M;v05W=lv1G7cKtCTj3a0qdg%t*zi1LOCFWRYBfd1f!xE2{N5K~_{?@%UaJ}&( z0o2`O$HH1iN9z|U!6hxKRafZUjylYLkAt)hgXp?s4dnxMn6p?f6$tuP6CB-(6l<%9 zIr8Dz8w((a*qTIDdj80>>X1#C-d=E3*8u-~(FBsjd#nlwy0DMM5pUc33mmR7cFDY)Aa(u_I?Ib~vFwRMt z9-|MWOKGDi&$nAG{FXBdhtN2yw)SzJQ!T?21EVHg+6JeI4Qq%^5ekzpd!y*`t6}^u zM$yeJPFgYAEtG)9MZE*n@h8YhIT@8^2tY^@8}}^=!(&5@fu90L5m~BFXg0uf0a}fi z8w*HL16YbhD#YO~g(~5Tm1k@vW~jKV98B$7nFCjVqMdxhK4Vx=G`u>gXbAefl1$T| zMOC=k!JuB$XqYUr>;@ys(IWNW}oz9>qFk49M-;vDF zuNq+r!XVeJ5D_iCP-S>2l*z45Paha0PF*iz4%0<2 zJED>U3_0K1=*Uj#D>@oY)1gSF!L42AUX>uT$EZb_v6S!f{o}a|W?|^1kQ&2h+XRiW z7b~bSn*AkFhMrPO@(=lR%oQ!Px{=b@53RDKG>=d&GCFSDz z`h@vVO4O~0WKu6Fcdb?aybJ1q6QbHJ_05zlY)poNApXy;3slp#fRYg(L7X*MRynm5`11~ z1%sUnOEv)!5z&8mS65fpZfYt->ajlt7}>jV(BHBCkNv$LcO?F|iecSj=0AS;(nD7< zltLxL7xcm{lSoX#y6N{AEs5}W`!pH?diX%UzUe7bG}#{C`k*d)AK-mD)Pl|QTUPlH z4t@()xo(|~KeCXlvS8yHyrs=sW_7!H^AP&1>)p*O5}ZNadLuS@x+U9r7rl9WelBV& z@+mOvpqcMAu)a0u9VdJ5$9tMYhJWq@hdnkwuZ7o}u-ji5kdV7FC~ zLyBBJK0c>DpT}RH&_y5UMFT&Pd^E1MxKS&W3wC8f?qu|gjSrbjo{_*?PQe7STltDH zT^;OM*#37gp-7t+X}i5ND?G^C^JEviw=6QXY`kje|Ig(n{OXfj^s#?|*zR&x(5Zcl zE9ANI?W5t1?Cs%by2vZeW?vdyOm&!-)+gSph z-$_N$4qnI(o!_19{E0;sqgFLGMuH(m=TA9azegX)t)$ zzI0TsZ^{UGNuHhKtQ&K%Phw2COW3ydnu^|Yib_s?fT)83w#wx=&l5BaSA%EyEn+Ld zx!#2{s1LpP@ejF6X9XcPmAsQMUp0~8I!Qc+?>h=FMyAiYy#uWO!e2$oYyKgRD374U z$d#w3M9w538$0!?gp^p~ASva0jJMzJDMOqmSyo3pXAhjhKfZ19MEi*4D~-|>myp&_ zkriI`M2PYwd_!xao|TkM$);42hei*R*YqiCTXxl|cJ#)mvwZv;cPlNuZcof{t*%pt zBW`JHYMDo@7yG)cZQuszk29yh;8xW<8m1D#Lel)##x5y=?@1`n;I*`>j@@4^h>TpC zEDz0k*FI}Q6wK9b8E5rdmB$MPogh z#fsh6oto-oJ7FKauSJgwDz`ikkXQ*8NXN2m7CkOxhH3@f9P!C;A~DI~Dd2z5 zV^GAXtCliV!llh2aK`mSMo#q_80PV|n-EHuSRgTE{lL+{eCb)nTsS^a-w4wt4C1lw!|97)iv&5)+m( zxnb>o!&)xjV<=xw%PY+HcTba}z*33{N_rq^RAj*XyBgPwRZ$?e%K4J7TQu-O``u-! ztVof5BO=v7C?ei;(TzcCh&MiOlH8_}D_m6-wdxN?fo+wbt?DR5)sDDqD*nPE2VG8f zM1|U5f6IZGy)JlTh~#Mx1Sm{Z@dv$cHPibZ0X#DUDLAZ+qiybm0@}q_hCpJ39gDfs zVh=42Z7Xgvqg?bI@xM^}as2DUp5zE$8JBKC>>iH-jzWrpM#?~nsG?QOM%p69QkgOt zSZ+tDgvO1KE+EZxn9WZKOwP5gxQyK%{+8#SfmG>%~*X8Kd@rRZAeo&aCg`ejwLPPKRP6O~Dp4Twkedg1o#u!`|S6 z`+F~Wh6oJ&q)}oa-{g#pj8@yCRxk{j+y%{mEh=n1Pn6)&hVpda0ynWpVD{@(Z;{1l z;#}9bV0d>ohS<#YC3?Zm1{qWC;Z`;7rQd|tb1 z)9Y4Wr4mOiNPV{0{0X!!Vh^R@X{mj;I`=Cu_!PF zH9Q<$q`!0X83LWn$?anGkMU74ixZLELS9zu0$%TL-jCg_8={avT(EiO`B)|-x9IIU z>3Z)fC^P`=7tEIV-aP;G`;?B~+w1hTP0 zk0o@^3$?SoPg#LSYbrMtO=Nq*=ocgwe4aM^kMmxvh6X#`qMmF_6DM9XrESY3vmL2! zs1e+hT?51$s2B)l2Is8_AnJcLu8aqVBhOMeK>?V%+~jtWNWg0}Hj;ML%7F*#J+P4` z7ULmc-g{S@(bTKy_G;2FnMC@hgK^vHg`rNo0GK?4fGTYbR#17 zp6r6w<8aA-M`)UwYqcdkONMF!AG9!O_LLI(2a^_janApF0lq9mmDN`%->;m_5KE*U zhCy;rTXI`d-`SocRF`4)!rn@HQz(!}M|?q459f(ZugU%!r298mxvC+;DNFvT6a_|< z0|r_?L5G1a?kmc(4i(+4gDH8D4rcHdZ>{f(Bp@x%>^)M1{l)WaKMGBCUsZ7_1V&3W zbG2(wI%}eaEC-}Kt29p~cEbRuhbk)=11&xG4@HxM@t@Fj5|2U0%+UVx_*RhLs|h)Enc%CO(e_xC2K^9~{%=(`CetT7;bkr+tB?l@RX z@Tiaw^Q>WdEbO#u72{FQkfdq`xWqU4uP2HPCA&b=B9NVs6|z!!{@bs^N&QOic%CIiBVPBp^t!jSt3OOfS}8KhFtoh-2Al&HS|(}+``^ipUBCr_Y# zqh=8(5IGnyshs2FCH_UFd=WE;8AQ4W|2|c~oI`13WMqt*5;j|6Ntqb83snO-2Z!i& z@H}trm_I%t{EiKNwPdg5k(1Es5X&fS5z8?Vh7n0)D5>32@ntU@kfyO+@d!0i#ThXa za7H6bZ5wPWWh=cLQ!BhnLTe>Fx%sQT@Wpj81Le&0#+g7LV$|HR->9WE>I6}$F+8N5 z!k)lGS0FmaZgoG+B8Tirin=QFuTb6azX%g?N&$+0IKa&(p`mT$#<72aCg@jvm7zOQ z^g%;(fLk$k8pZTBST{!SonPTw6GQb~xnWGdoHO;r5GPHJI=1=Mu{G#S*pHiLbnKUL zdk2l-8s;Bf3KwtozXP|p)3~fyhLy!$b3dzCDAmuTy*oC=`=HCE?eb)iVzUX^>4)jZ ze~kPvo?YRLOQf}0lvGz8DI1$_Gi$fQkBm~r?56%kDe1*+IfPUmroqmE6eR_(HsoOB zv&_hR9SS(pRfrcGsi?J3VXKN1C$Fhm764+LvJxU{VX*kRzv!M7La{3U0V3w)3m9UX zM@NLk0U+HE&nH;bZC=_%_yv)Pn$^}Fq7I6*bqElgofktL_#lmiCH{H$y*ThiU zXhu|g+iAq2^F2ORL))qd0}+8M4BY^}zuKF4aR)X+d6{5cGDUsh8vcAtxSn(SW*GF; zf-?!Q^JKInoDot)C-QQ2a})5`0rteM#k;2Z1aIxVP15xwg}hJnPTTZ8 z;s0-xNX~up5ga9Is+n$g*jft;$T9wAb4GxYNq_=}fooPGhFMOZnrS;%N*_tjKM5~P z{~bYaz~v!)(7(y#jhqW8&SIt>CdN6?(uRdi4nZw$4l&tN7BIWPwQ$zG@wn-stabSK zOmVhuY~9b?XWHTR0q|l}(bBfowq`NWdjH(=d0J6JTi3p24BRDS1R|WTwI-lzw%iA3wsl=>GqTo8pr=iF|+VI~Fd$rb9p5xxEUyz9N5K;Ik) z(5ZjO6CHiq?Br{CL`!_XFWSdKe^U_o4Ef~g{V4hfHF+%}+j|myC3~tUdPNZpnlZ*b zVl>V(zuab78|@t^n`yXR-^U0!s;58>Z|$aPKqPW$y18EVYZUqYuVl?{hSm6;*d&Op zb;A!n&z9b zk8D{4jf_^c`TaLXBAklqyu$U6r7!(hmoA#zv-h_S0E?UTfCX`!)3_R3O-}2aMJzMx z?45!Urs>Cm-etDlmrw2>tH0^%K{P?{56iel^9!i;`WyvHt-bZxOvef<)mYqO-_!ju z2qv{W3rwwjz17b={{m*(V5kWQyL^q7f!UZy&)thN>02$gy~$^=->)(1hnq{3Z&K{r ztQWZm1%T(tHBz;1JM6mTopU){(ujokBe{mW?6&?YL*)383^t|&9%GkM5IcQnDVu8F z&x;5yP2cxjtrW{uRCYA%{q>yGU-~357$m|BvNRk}W zwf}5*@7G-=B`$9U^-Pl}5(ygZTe*w9vmE2Am@*3J~PXluuT~{S^aUzzSALE1i7Y zN8Fih#8UAK#$@vttAZMZ|=n1)MQL zNx<>I^H?SmF{&(j#?bxrY7>Y}{BBw@`!Jk99xLw=%Si{xom@_9AfwRdum-S5o|f8asxc!J^rs zMnZ)|3XcIB7-m3OnVF9|0ZNB2KQHG=?jG(?c=AO~f<%Wzgj|N8z!;N?pg=5G4Mp1w z%bE37mrO_#!-sE1jl?fZI3}4B0J@g~S(KGz5@uskB;c!ur7c$PSy~8^fy367hPm`v zbiWT`O$|smNvZ7OpAed*fTb~Tl;TGigw%BImp{_uvbm6e#nld{_G6|i$!_`+`}wl` zX#(O7X0wt}gOCEfV%Q9l&mE=JR2-6%8LEn`nwX_<6g!)#6*LYClo=v;Aegq@((L8z zrEDfv01lPC4DKjR2@EhX!VnMxlhg*(q=}G4kw*i4Cs>}8Ok3ZjIXmu7o-L*(1+8SQmB?#Mktq{O3^QL zqzt{#+W<8oQ8mS@nk9u}7>87+5ox0;;+?G2mCMsl)@>c=w4{>7*r}~&BknAZ?O-B! zY@mkQCD4q|+wx#sBEEmuvHcs$;p*~y=KLGa2!qDV>i)^n`h!PQNzft8+kr3_JUY=+6d}==JWFkw)Qw>`%b0ra-4Qcw0eqCtk|>cv^n1XKc$E z2H>=2cX}{d&p^*?b~12bntc$Z0kvtj{}!Awh#$3ZpRQt5TKHizLmD->a~{k$h;qeQ zp8j7za5RbR2pr85Bnx;v|2tSB5v^-rkXO*^0=of|7?Or%Gjp@SF}9w3x&poaGl18@ zWq*kbN|X3$rLz5zt?+Q=N_4cfBNHp+-{t}MvgNM)0;(w=P z04E}sq}%Av!`7$M+uHKSplp>ZEaRq*R#c0wU*l9J!4b`GW)B)a>rEv zPZ0TK=nFyphsJ#5L*T(@#=&wuECXtLjUj{R@=k`Apn`~lBbkpV{Y?_Jzg?Awx@ zHHdQ&Lzx(KvR_X8R#VcshB-PhC8XkTw0UPy5S5f@it**8H~Kto7K_{|T&xDaCogCb zm&CyRpd@C01p1#9Sx8|wW85^vgG{^})4lw1;UxE$Dhs?SdgJy+jD?EL=-kDACbl@l za!?Y!Tw<=n-tK0zdd0HRrCJFYP#8a0QB~X)VA>TEcPW#HhSp$6h?V1^!t_tNVXZ5W zcCjIsP&ZFXu@`xVLSnp zsRI%KGz|_e#HRwE#`gC1TQuMQpo+gZ9>H;!URfzK%KcN5e4F2F(=78xK8D7%81=lb z7_Km1h+0); zr&L2S)HB&OsrYi7{EF-^uPwj|V4bQRM78W{GC}&gdPxDaN-`=fA%RX}5*&M68**w~ z0l*O7x6dQex&l*P6B-hD^RtkHa~A4S{tS(P5f&wk$dHBb7<=@(To>wWZAbm`Q=ds7 zf*n0?aLiIr_nknVxg0L+e!S*;s%@H>+N?1@9np_mJDpg6g zF)@7ntt*y_g%JEHWwVQbn_NaTe3Yp2Sr3;w8UvZE5(3HG7BhBGzhsOy3|$SRmXxHN zGmJ_V-4K>+bTn=n8Zrw*3pMBVu(zynsT_vCpdtrlJy5lBK3?w9sT@C${J-)f2JKRd$ED_8VnBeHWx93=P6;aTYg$FpIdySO6;cL}B+j;2{e&u&Z=T;d zCtom?u+EycKYLMmZV{|W$is~nhapmXbU11hN?&Y+yfux(D)hqM<#G+Ha5i z4%gc;&HCa6V4RKgcIr1eePE!sZY9n~EZ&xQdxt~b=DG6aa(L)E(r2GH*8f$9Op3O< z@Lm}dkccGXyEwTX?jsuGPc?P9kVQXSnp-`fT1D}KTF!i*2Vj+pgeU&kM&zjJ;M6K8 ztov_6e0`U=LE)NoWtl8!GB6MddSD)9Gq%jfgm}fpZ42_s5Y*PmX*m05{<0C}8GC>K zRVd^irNKpv!WtEi42vG4v)|{a?@S^8!xvR!n|sqi6m^YivHfBhoq@9X^6RbUoHu!-~kgYspY-i7`p`g(e1?D&(iPF$1MtP*Q2 zU^AQR?hqolaYlC8OH*t3^pp`yNzJXStR5y`am|cIN19Y_c3%YWh|M)Tj=LX!*er8; zxUA%3)?{=uH=LV$A$TeWTyi9meX=GMg(Yoz1$~h2?d1nw&OEQb1&N5>F4v&u7@BbL&*|b2}51ITQS1A>_IojK*Tp-s3}5$@jikbXyd1 zh5Yu1EZ=%>GwEOKjK~-Z&RgFlly}NS-PN^C5(BcRUksJcMLs`6nyn$a1k+qcb@g~93Ni}nlRQiw)FFQUXA31UMKgO%}cs zgo;3D)IJqAoz0MiHad%!i8S5F*&&eRq`+q&qA;LXzms&w>6b$C>-f?9wN>SQ9%Rg# z(m;bxA8}2Ik1>USh-#Emlc}oW+NYlIH3f@L9wmk6&wdTaIus2KCQ_C*6%pS&hta>n$yMCAsNwc#WrwT%j(-cHeLA;@s5e z4~;9Xp+*Le$9&RiPGjv$8Vtm-;nlZB(z!A#VZ141|K?TWLp%aYeW2$k$@>5}Kut~T zQI0byXk*rz)mDQFU;W77H~Ir>CaX?7NNFAfC~=A^3?O-j1esmvY!gm_dJ_JhEIoPH zo1Pg%*OC~hQ}e#E&s1br1MlskS<3Ti5{jOLHwxZxQBVUP>SC zevyfQ*-uhA94PsdgFoTx*c2R0QCZ_y*Rm~+7(;2z@5a^Z-&Hj*UeP1w61v!{-^Sn# zewXE1%cPa^7gq!AX1Qgsa0!0EOs4!DFdQo!7okWQv;2uqFc==LEZn5SO`+}p!vJ2D z2hBMMTqT7dz#O2KY+)KS5BD1q0L7Mx?G1D>I0L@G`Y`roVBSc_a~$}&oCDn}&S0S> zJjRSKQ}mL*CzmVAlKm2wFE2%roQ3dW(Uho=pGCvYL2vc|O_@uK|7c)iT=^e@2(@2dhtgy#M<VLwze11BM@dE(H8 zY-%5$%m_c{Z;%_3to#r+MR_JY7NZxnCQO0EuaZ;>a$U6j>Fju}KLpwg8$hwHRRF4I z#+*O0@etv-DSHe`$Cx^vt|OI}_E=bsabuagLNbRq=P;==!0u+&Y%}#lwve3Q|1tHA z(Q*AzyRk8`ZD(S0V%s(vH@0otHX7Sb+9Yk7G)7~q(VhQ$*L&BUwPwxNIWy<{_TG$ROG2H_VI*|do}4@t;Zl=TlTcwLLB2)0B_x#)l4Bq3H`=$<m^eMLe#^?eWMG9vu6S_G<4fm01v zSBdstGt&ucP!zB$u;OxIHBvtAJ006~^Dd)i`Iv0ZPPZ%TZGG+C(|k0!`21b%V+D0} z#D@B%KBHu21&%r2h=2qI$J14Vqc?3tirR^8R|P>yMT4(L0vKD2-zVghh3;4ak41y> zj%AR~2VTWR9*5rF8U(ZA)ZE|Uz=aZLb5`i_$nTZ7e|A=%G+B+nG)mFg)ad5%HN$AM6GP<Vc z?2kd1zLw_se=m66_j%iUDG6Vd2R#bgvUt`>FRhX`2GheVlBi7gggb$KBms@KNyxSH8g4Sn&0v`A#BSacO3Nlhj?W`k&mu@7A6k z-+)X4;}96Tu(m9LH5susgiim^#kYBr9UDvspNL`5V;N=Vy=PSZm%p4{ zPN!$wZ1{3c!F1i)Dlv zI$k*1+05>r8(T+-{3|?PP+%#;4q~CJzgXO6DUGm|omG%>} zVa5w>2toV1NEBg%eJzA&Yf=PfpTbQ)+SJ*a!VBYduEF$KW5AgU8(k7#t;9JMDhgKl zf*qZ7#}```S>`L>HooyLruKGk!{b)ue&vXp$TwQavx(whuofy z0LehDiL<@6=$*n0Zp?V z&?dn~_=`Yb(hv|&OrCc>rv&Rh9+r87Dt{wk_?v={F`XU_$~>Z+TbOppg<(>>8T5R{N0(>3e$Ct zK0vPfDXXX786qI;pHVO2`)44p=aJQq)>g+KS7C&e>HBgDiF4^YnTY)4P=C-*eD+=u zwJI9VC(nz8K8K@>&0Pr~sF(%I2lmGpT>r(JrS%TVhv9^-<}{OvTI;{LP8R zB8GuvJ8+YXt(G6EeDVWoVp60##K@O>V|l3}bsIvbS^(AhBx(RL(b9%c0Cv1oae3v( zz1_ZO1V2G3`X@at1dMixiV+roAx6|px<2JsTU+}o0%?I+52GQv*aU@xQ0;;kKCGFQ z`=5Me=gt6m7&4|#b~lW~oTaP56i~E8jAEO60T(&NR>@ZB8b+UFAJf>GZTN2lXqmI{ zsPN_G!^{PNo^(0dYa2UC&m@j=?jj8eH1@rxm29 zYN;s;1W4r9kbWYea%r)7fIFVs!*55bC9BC~_Of|`dHJmVUvtLiygGoB3w4bZ&QwSY4s1vn00?LJ+Q*5pZ?iV@_G$aWInHx=QzQIij-&%JHYXCHcCF(eLasHG z(FgFIxxQ;GA!Ypg_ug|HTWd>qOdf`&p0V!E&Q|}qfe{Ovv30gtq(kz&Ck(Voq3~?= z83N_pDz{ftgX`-lBh!`jFof7ojxEFE6$Lu}uSY(lz8_qRMHk5x1_XhPhi!sax5ToJ zU{!uMo{Ib1z1+aV)DPnA{3TNTosa$vk8UCME$_eC@vN>Na|-zae!1Y1S$MqQ=;CvN z(0RFSZed9J1&r1t&*Q7m=6j#eEHpK7Ucy2v8M&J-lA<*>0D18Z2iL)UDf=keoMV)K zUq^@3LbstzZsg&i6@fmM=(A&cH%=x0Sdebf_2W&7)r!XdwCHa8^DT^X zc?bn8)Lk&_@re0RrQ&|N!mwEY7)dlW{rq?r-8;V6kWvsF7_vy}8;U^ABk#41l=0s7 zThm>Cpj~{r5!lGfi#SITbEQpHuk7m{Bqcd6^iQYzyu~Q|PW84odAT>WyxJ+%o^KY} ziTov!Iq>f!IQVDr`CnGiF;URRwaqZ+s zy$NC4eDcd-*jq=?b<%UrVqm-q*|Yc-Wd7q}nxX$eo=}@Nndny+-bwAoMR$@tLMFfM z*~Pc#i?^-yD*iy9w1hjh>%;9AHsa4675CRINCkn3JH$f%mXHghqp6lK=f*{2*wT^B zkzg~NrOjpKZ?^qQa$B1A4l(jLw@Tq3@doBcx2;R*Af5Ui9(J z1L=0_XzWT)FpsI-%-b~`DsdXoYwvqM!|*7-A&t?mB;Y1hPum&T2z}ej`I>I3K6Obu zj#YIOub^@ss$N8IpUxvYI7iIti6} zyhfBZglN9nA6(dG;-X4-4k>up!(uVi>`E$h5X74#oddHV`V>S6-pwHhoMFQy5zHZ+ zTaO&U#O|yweSjiOu~#07Vr&YDZhz<0~AX&dxM-l=MCXgt|^8J6oYu=FT}6~-E$~&NyeHSU;ZNirDh~&2 zB_vHi8}A9uE5)K9mreB6Nl~94i_~lxDZ^l*;X>770a0dfX|`*wNvsZ&m*gTU(Xv|R zev>qyYY(X8(@=TJ$(2rw(kk#}W&{gczz4r5V|R7{&%J4}>p?@hq(uILlNS$m0`0Lyk&iTu4-U zZXD$=v4;poV#3AzONW}_VMw^hYFonOJ`mlZtmKztrs3JsTHxc%n|+RIFNR(khrv2C z*k;rK9y$Y*AsD%qa#GDrvBxD&Fg8Vwpaig=)+}63PjMtymS8_Q9PB5FNA$q%<)dRn zG%?e*m zgN6SnM?EvXJ9eQ_@MMNI7Y~+zcDo;gRjvp5Ig!;{Z8qrOQEV=HGDMYyz0)lk8s5Oz z`}VBwdTL*&rGKOm`V2hf*vLh3&Lz8BTUdmFlTN`_|CfTKF2Bd~-T$V?hKBitjZS2P zDiYy&3l(Q~*FVL2X3fXSqL$9AO8IA|B^i@zWA zj6;T8o{YEdkVR)(*ggyjMFysG;@DbV=st7V-osw`Jd;E#d_q<)nA`E$>E9Q*eZ}&};+q&zn>2%bz)DPA2!rE8aK4y~~B~4xh?J z&YO@7<#{4{nI;*5O5QgJj5HU*4&MLHxo~LGS80q2JLCt{uQFoCNNnW!uG|fOMxF%5 zU`udZ9T<83PIXC;{8ZH5c34bDY6hP=MB=}d$ z-90wXa{~jT%L>fxk3gp2rr@IX+YT9^Onu+`o@yC-3|t6i!KAP$cNf4rvM?1H>XT5n z9dk{>CrFEri1&k1p^p)RIw7YU!z9E?<+L6z@)IRAjKSRrFU)V;tM(OT`;3c*h}DIF z0f(5vX6$vF&~H717m19z-JorvyAzP{&LMpc!C(*j-5RKw--m6IHD4zCwGu%wVxy1$ zwvl9yKhLp@VP&W+aZXBiO)tE?O4Rtxo`HHVqqsew9>~mUoPz`#>t9?>0SIPR#-xb8 zjE0kM4>1(t%iCeKygGZRQ7ul_1)(P*M+7(RAR!cEs|5qwoYonGf26 zrPA0{7*&`6JUD)K*@%4OW5t=2{D^}bZ8}Bj6f4jpVy1rTSMs$DUP#pU`A8&W)M*EO z`%P#d)9(;POaZxYWqe&RAyvJ3)o?X-p8e=qV96>>Ee2r$aYPJ~0i`7r4h}{%X|? zIbDNqf>JWdS&Y&^4#X5z7=akY`e-=DxM~@DVwwt!gO`^+c&8G_4Vh$u9d7bGU}++?p|b`eHO)ps7H}ffSe6WH7F+ zTxv=&7f^LPRzQ1&0wAoWC!`yEz_L?UuYqS-u1N(-*v42wENVk8by zDfW9DD)DqqcEk+ogRz(-h`N+;7`nkqA9I?MWL%nKNb*F5kpj_@1x|Z88G)O9h`@Xf zdKk5=^TlwQl%4Idg>Kh+q%PnkXBRHC_ipaxPRQ?)*oKE!&U%Hytd^)3g6p- z%zSp9b5-F!-5b1Rt8!}b-R1I zr&WQ0P(eI4juYX_3nSf~#8s_JT_dZ3#^p|rMZc&LoQGobuj~+lBMnS(=Vafbz!vQg zUp^J#e4ApHw9#4SIW6}0XRlF8)m6z_Uj%%aN0v6fn#dg6oyyQ&_dtuVAK>IP1ULxX zqa(g{FE&4IZG5hIgLW+&@N-25mdb*)ZeV(noiKqNtkgw;g9fL{fTgCKEi4cr!;R3y zfje!?o9`Hj&r-P$SVH#?h@(6f+9J$gWsqRVw=e5p%OE(Q&9KiqrSYt~HgD~#Ko9sU`JbHC zn)W=B=D#Jdl)WHzXbVx(-R0i-rZdP15$@GQ6zTR%9BOqF$uDth@!FZS z&E-kuR-OrW zGM@DCxEx-2g!Ga_T}G&ql&Z!Rw`i_^n|Smu-qNn+r|C6o*q=UWO<~a>&O?sIg~~%p z^$Yd8JZo2n*PhxrkB0fzO{5!HIF{sv5Ks{d zK%=EqbSsWipVEX>j2OW@qXQbc$Iy$hmWCuZMZ>vCGn-|;UJ$d#;t#2Dhlbi?pjQis zHyY8VQ(!_4#}$x=MnBqBcyI8!&6GPH7;YVR33}%UqQNnS#u;%s9sbHeYu*nC!j-7N zlNikG0by6gi+@v`38ki}EvXeWhtpyi^Pk}rz+gYG-AB9rj&64RU?`_Z#e#ieXlH;+ zXFy$zGaUR-3OOHfU75^jiPfxb<5OjIpJBAFQbKG%*@aN5o)3g4|F00q@keh}&1)o)M&1S#ptb$4KnmIsLZeTFVs<=|_{2LUCK1`QoXD`^#iD1Sx+4s77tXvC+Qo7A-jsz{wzg%ro?M%bgOTXfTo6W$P( zftaJe$Pin3tY8S>PbKR+99fE$nq%h+ShxS4s70*VqPC^jSCP1J+LA9GCMZVfLx3Mk z&nA(}S<3LEErp4-HKioS?wu$f^$4|#3kPEIs{N5zha)o+DWO`nqm59+!}Qp2%wY3oVN zuLi9I;NsI_|DAj?W{cAU!5`Z7qf3=&g;w*ZzW~WrtbT&pd~I2P+I^xBa&ip(P=OkM zX~30){RKak`TY4l^}l@Ab1aA3QdxG^?i!uF1J^pT14?>BfXXphezyl;7K>9kh86~m zh0Xo=lo38F({_N3%q&j*ERx)8U=;tdJHdkNpKD-+(c$_9%_GSxIa!|#Z_`y8ZQ30! zTan199*Nrf&fp7J8xzu-=5_=o0rN$?5|N3BvI(@}>seV`!7%8W=Y_;Q{96f_yL1xs z;BONl{jXMoCtn*2A!#XH60~lx5U8xK?wk;5I2a9IJEc4JCjiwjB$hqf86jiK zQo2U{XG*OvRZ)OVm)g#yVc-8F9sjdh5Y|t+ZoR%2UKaXfwOv7=@j#JFCBxvUmV;u- z>2pghPxa$iHspQcF61jN;pxC(1)7X6$t(RCJ%{Bp(+y{NzgVvn8m%r;8xmUS)mJ8z z42Mt+AT1@9}Sy%n>q!Ik@|XTsAbaJ-Q5ujdd9D>>-9Yy_$*vsv*+hF0m5C*&^QtkF^`SkK9;^bb}y@ zQyxPm`P|MC=#0|)ZJU!0BFg0$6sUj zD*(y)H}05HLMC(I;xyG{Hox%Xrdw17rTJL^7knf>DT#%nd2Gx_JlJO=9>G(Pfpdhb zU0oYmT1_7Zbp^e;qy5bp-`t-ZK4AwhN0@)MW*rt+A9mQ9>va;JfXu?tG5Ui!z>gnq zfP3<7lR>$l(8je}!YIsln>~z*O3?|w#0*pP&>Zmlj=3;r_j!3gU*GWuBNSwm0~bE= z##p5iQ8xZb)hCX|L=7n}o_@PN6SkC`nwDk(PX)85R;lXtZfi)hk-YIta)*j(I3Q7VykI2GUq#PWF%dx@%H-%V2 zfE1n#!);}AK?jIhOv8;?yh_iv7PDEzDwF}UkA?9`=+SloCHn)aO_UvvfKQE>s zUM)}w`+1nZ@r>G+Y@Y6uZdlk2ln80-3BFWW$N1BzW$owwp zv)bK^9XGOy;N;ALI^W#s1BO#%NgKZ_Sy|R?>Ran^65gQ&!fhHDqOQm9w{5+an@370h z33)5}mAT~~UOGQ>LVz14BOUpdh`{%~G>6ppxkXn6q=kw4mZ3|ARWW;hF(GPm!@TxI zlLlzEuP_!O7R(0lKmsjA}TwNC)^8|AizB0Z#-8#Os88Hx0A`1W))5m7Cmj;kj zydo4))grxu9*&}>j)p{yd29Mzap5oo3YLvdIjAT;t08A3OoKh*-VNro6OG6l;bM3PIe{{e)P z8!>gKf?$n@K*Jz8C@6IeIi>dA;L688UktJ@8G3fDw&2#*a9T`RjKpR1J#jhcEYaMM zyQ%buc*>DEw24_Oj1gl8!0LhKw_T}m&pze=hRdRjI4n9{G7 zZ~7X4<(bR6HDu1G6nkt_!U>o&Ob};n#~jINOKYoG7+Kn&dKUAo)LPzNbA`UfK}x9? z_rigo!&ln4sRThvt4xFEZ@SV?k3TE4^M9&idgBVr#r8nyY z*OaQ1oH?=~dCx&NWo~e8G+sr!Duh%tc7;qBMICZ?h!CEm5dwDwN{BMEs}^;7$X^tz zDf%+Z(&8K%kR%SO6j!$GwOR`j?zrb<@iJ%yX+n?VI2j+v$VW5{n&wKDsa{cyDdC7T z#ajLolUlO_^@~y679@2etyTuA1C`kwkQ{-M~jF~fBgUw56#cfp$5p-<;DB9th5^(LXp>l^84V1M9;w@rI z5&AGpxu-0Q2rwa($$iCOgSXuIiL8Zt6(3#-gtvI9DRfu%LQYJo0yVrM7l3j=y&MWP z^^y%Ex=FPka=adRQ8*8=B09fo^LP@;k+5(u%+-$vHL@01huKt4Rk^!a@t*Uq4Dkrzoqr z{2fEIK0ZG2fQ5e0Pe`z^P*hR=UO$Loa^MFxsdO@#$74$kzr6`HXvYC0PGwHNMn*>B zgIfSFG$X25Xy$+?TYXv*#fv}z1IB>)JCO>fOk({%1alC~$5eqLF6yi?HE zhUC~3?f#l3hz+r0?RLYPm%jL#jFh4Km0sT_J5P8@16_Dx|F!UNy^KyzxBj}ZNP3%o zKaFVJ|An-%*OyMP^NXdqZE&O?w|oRLKA%AbAg3p2{2!*{S=$;S*3 z_j5|jDPz0d3pw__+@VNZY)AYizQsE|cF*d$?M>6m6Au(ot@_BtAzap7E)*S|Qvj#b zYb4z*l`Xp2HjeMfk6BE^{li~(;ny(=64o*CA;|hdY}Mtos1Kb$s*QFtGI0Fs^9>08 zPjj3*Yzsu@zYhuG4?MDdRG~Xp*cX`{ExGqy--|p7-=IX{!rK|Uq3Zm(cXI0W%}wdq zJzTJdwsJ_R$SETZxEYlK{rYgayH#bdR_=k=;Rkf+^eu{~fmI-sXdv(^fX8 z&#(p}F8%?S>`ihY3kbQ`7M&UJEPuG?ecna);t+D7=@fc{3cNigAS$wSd*5w>pBUcZ z+8+&w@0HBg=NnJaF30H_YHESAZ9Wse+C7~tI!+}k+^MljYMrXtkMh~g8D$2Q8aepNnj?0L|6 zK8SMO6Yk|@Pi+w0X^1H8A34FAw0#kC_s=eg*+phL1-H^^OBFW*jz-2`%)Y!E)^AkL zFj5)x&bB(`Ep;d=d-<8uwb813>%Dc1e`T_JuScLXiog(O#t&!4vgelUH~V|m=79tc zLW0hq&tL?CB*KG->r6vE*tFQ8w7)ljq(W&q^5g~ywdD*TQ6VKD&cuaFNtcmQoY+Yf z#ApDZ7ESyU((ETWld5^1UK`Q^0A!Ut0nyhl=MiNm9_h(OFkRS&M1aIxH2bT5`N(t)yGdg6+nXkzJ0MQJ+GeQZu^TB+bXMY-EkeaI^}(Z?UA7 z!YX0zC;8Eeqw@Spbq*n}!+c6x#3#9Zcn@kp4r@5EEfBra0`?9Fc?bJsmFj`$GU^cme)E?wnuQ;3tTL4-_?Px)#wSS)D0d%gtSRW1~+at@GM z3@8nznUeNGN3Mygyv0W#r8L{))1KPiauq8ZWB1}{xtiDO7Ato2?oP<(mU=QPGa8tx zH>(*W`qO!9&bv)+3k#+1>nOONDIeZOTie^oGc$lrMg1$CZ{+D$)}s7_)*IJE zE?qZa{jO?t`ClkiK6VF>3j^SP4XWFizG@jh34Ugp^d53J<$HH{K-JC?d!OQ0CHWCr zlsT^ROBnkJek)vIfhZT2zSwmi?(P-w5ajKMdKqL(9$d|lm=GI-bv%nsJI$C zI9^9{WYIkt_$ODl7uuusnU#Ou&aTl2^VVr+O@oDC@IqQz`d6nj88{8ve^i!z0{>R% zxVEm*e6dhiQAMRG>HDaZ>5q9Ym-&78uA`4$pPdLl@7upo-JUO)wD3PC&vy?Im`VI1 zp_4WjKTeYF7L3x)@#o1sO%K&B`@KY8ZD_ksv97E&3i~6oj&+77HoA{;>7LhxpY9Hz zS<=&l?)sa)i@fo1t`eM*{hE7T;cHs{VT?}=-C1yw0PRRO_6cA`;5@2o-po}&1<$siOg zemHsh-<#aT2&bKkEWUKu?~;9D7roEfywo*`&icFjB0I0L_#_E@9`*M)`$x5RB21;mr%yDN6e5_RSbgX``YSC12R^AwPpvd-|hJHDdJzeHFAO z^3>(N-{hp?pMQUqZ}GGFn;i=!Il?g@1yZ)0j2a`T6@~DtLeLKODgVAWGW zE(9T0R$6(8fCic}HQIM!$1T~gIme@6UDCQY^CH0%)GWXMBiUk3zdH7s(wqA#XYr6*X6#-}5Xo9$1Z~d( zBs6d4bBJEst(USun)n6!cY35jo${%(vB*fWg&&3=mI8XjgI+GbtAo{23V-WxmjOtH zb93aBFk!Px;G!%}zdGj&bOEMWop2n3OaDBKvO_-^DfuC>4D zWx4F?v!o-*<*~1&84|wp#KuU_6s97vC7QBO1jouj?RH`qKW+M$T)Y(z$Nd-w|>!kwRWj;)O5B~0DM>ZO2w0x73(jGcbXXbiVOF1O`xATU%RKSGV|& z6PCr8zCypTjfY1%81tqgeOekdK(W$m1rzk(opNCN@u**tyOR<-yMRFKMgQGE3j$rX za6tIvQzrvc#BbV(2_DxM>seSkxDkTHi{2tK{};};2b-e9lWxe$Z9k&UfOEW;j-+$5 zMR0{{%2f_IRYM~+F!vltI=Y7uO(Wn@!8qQ#zA=BFvxv(iMRXe>~rdP z;D`CWjmUyhr}1ouATI~-SMep~fL@uWPT0{B%)IC))xdSw;Esy+5kLPc@Z*rpXdKX# zPv4p6v~!A{bmz>LSEB1*_$uG?Z9@HFr3uWa5G%ht%^$9LRsj``;P zmdPJIAq&zI9t4wr-o`sGk7qxhuG$(tOqK|9s^0%Tb=jP=Zn=+IdpZ@GoBJmZTTB1q zScrzXecZn7H5rUr{FqjT11s2C9C89z|3T~#R}}^XU%k(h-RO8-?N3elbv@#Vj5KX` zWOK9`nBBOcb15LOH^f@ z{eD*UymdS;mpvmI#AVLgm@ZYB%&PtL5L z|E?iF4KE4Y;=>m{vu^u-3%jzDf-bO7xxW7sv~bn2SU8I2Sp50j z;CX6`ffmkWR&@?4E{@LUqnk$l&&s=|Iwt8J7pm@(3SV-bzH1D}dK_RIc!siTC{O8~`sEM&;Ss1G`Trt_DoiOfCl zG4g@BSbzs3bN;Y`xgqUcORZNV0d`-pHb5;yRN}D)-8D>C_dB{%6&an3itqQa) zCK`UspFRd<${bB^CYUS)Bh5(FwVU76;j|HJsc7O<#u=C;VXDyfP+!&T>nzc9ZxU_Mujd#L>}>O1(D_b^ioxDcti z812;oN}(YK+>#hEFlqWl7!>BJRP=BOa5O#w2x`@RreUhE2x{s=a;va)>v9j2OZ`7F zdl<~inaEYiEA?0KV@jt}T~w2!qa}3s0KMU@)Pw+eZcW0P73%&tF-@9|nKsHW`akz2 zy=+#RI*Z#Z<2r^=4q4)i4ebMj3la3y_L$wtiQILQ6JJxdCv>^>R~>mY1U(WxdzJKP znZExwts0<*wM=^x!(ii}m%!AB$rn>66)L7>6(qtmOSs?`+1Oqt8zz&F&FyhYrZ5e> zIwsGRo1)+rq>$$jTbi*oQVDYW-1NQ^J&%_gy2>sz>V9|7TuM zxn?^-*s&8$v80oMttGBUvpi%D7gq_wkZ(Y+ilWqHZi30w5t>cy6S9pT11x7xu;5OC zTo4F$>ZzB539ln{b^V*4ITVcZ8OPyl^-DxSZYFB88(eiL8H6c>wcJn-jWTsmx()AJ~GPLBNP)!&OfMVW^2R-%634N(*szftx5m9K%*yhgIuA;OxagN-qZd5MH zSgFT`9m21_z&Z<8F--_bY#D&KR&xes9q6}?mZB1c4s}wOAq7?`YIPble)Auo(h~`d zM2}9vxE24}>NH1^Wuhr`Moc>9O*t@ImXORhZz9WXN5kCwR{n@9b)B$#x@IHamLoJo zQT8hi4pGM|lG*oQd2C>4?B17hE(`}Mdhh)jYE^MI{Kh0W|^?Q^m z4BVp-aldLK)@A&4^LVP_s|&x%CFg1HlxWe`Hpjw>X+b^9Aq@||&IgtZT!SMQCMOfY zeN|o=Qpk&P6u755s_5p&6(EU6D4VARcVFF>9_hRPR1T|FNVjfPHPa%L9*K@tEorpS_p+_Sd-Vlb_&2 zAOA2PynZ{I&V9{m7$_Z!$Br|1@*vN@aEt@R6c2v#Rt7g@G;*N(x$WfR_wmsE-{ISj z|H*a9H>CVnsGQrsGxAD?} z1AOD_f6u-je35(Z?&tKT35FI2*mBox+26;7{x|vI z4ZGR|6Tshk0 zt{0x-%z-cR7vKIOf4B2`whrFR#^Fi&Z4M(>aP@nx;Of|B@yKHgKKfannpx(kKSS&E ze(t@026xNtTz&ms-a9%=*%_g~mf?&An__TJPKi zwOZFxL{4L53p=mtqb>#Ili%WZzxqSI^Y>FU1~&4pEAM1$mdIQe1v^AvW00wSk3DvR z`Na;49nx2#pHK`846t?UR_gUS&1RE*`}XnDOD{1$KfgZe1!G`ffbHA2bHfccuyyNJ z`ufg_J$iVZ$S2sc=N9e^1*cnI6LSHAa7K7Q>F_`9bUIJ7j!iWd$Kzl%?99b(hyMLfuUECi^rVaIm1Trx%d zkuw~4`Y;POOcJMuFXFqEqq*^NE`6WlSC=cA-+qAaKKcyL9ebMZeD*J~3x|33%57{L zuTj%BXI^@YhwlF&kL)|l((q1pUA~9wZvGeCamVd^=;~1h`7!OwW|*nV*|qgYTsAq& zbMv0scpocs^UNQeB{F%z`EDJ5p6BlUEPs2?0;QVt?pCc(oLOP9vgGTZvkNbA53Lah}IIFMKwa#lCc_th{1@EJPxe3l+^;EJ74Fp zCbc7D3nrHykL~0jBj1m!|3Mz(V#?^EW?1~0LYKN7i;;%?#eU+5-Sz!)c zGFi%-C?q*Y3P!r?-Ti11gNQ_eqyx0OuF@3(NQ&bTyegF@;egkegcc%1iD!QmqDjZ8 z=@R4Cf}pEqM!Ju?&1ntY-D`1I!bJ3Ll6JV7}RF+G=QQSP_2Lp)e_YbWCdgem4ZT6Qh%CdXs&2oFg1s$ z5G%pwP*zYzL&PA)A~sp#cVd%RKrLt@9E*lDzR&{IHb&dYLVL)FAwxojQHSIPH6|(G ztih3CH7ASNSx*B-Fd-ruF$9bWh^^8X#(JU)5F#pNS5OuRQcxL3aHMoyoto5$;;0G* zDnhCF6`w=}#$udBGJ~^%!61?$#$qIcOp$tkz5s(3*GOX$(-B-p$%$k>P_8|14I$bj zOD@iH001BWNklWTy#~QK>oEOZB!7Ny0(L!4(>XxEmDeI21VL=3kk{L^;N~Vh16yj>g z1YfiGT1HiOWVMVugUkeS<8e}vOQfMf)7R+_b^3jsx_96M-dA{EQu%^T*~Xa?r9~OR z_=*ra6rGAz;h8T2b7jC+5%r3$lK%y2xzemdqYkwOI42k}h#JHjw6s)}BUF*#!B>KK z!CNH@CH-Egd7&?}cxg~lr;-K%PcV@v9inLy%}Q#UCZiscsG?4wfx${7iYJ(oVB2`J zLIHFXI#KDUP>4_%2Q^2`lL*FPXo{te27NTBQKK?K$3RD*P(@=vqQ`~`6RPBG06}7g zVu_OB#Ux>HRbe_6vE$Jga8Xbzhyj}mQZv*Hhyi01Ye?fL5j=(P}fTOc~hEpdF^W3=~L?t83`Sw=k9HO~u~^df5k$HZKMGPKuQ!<( zZQ@uh_kRi^!54H^S}d%T6h07?SH8HO7lN%dX*LI#>dV$H?4`%*)%c1hOx08ak7UVUIjkJBp8^TW&i9ft+K=#Lo>^`bbOp0qoa&A&b2A| zpBWk`ixpPd9hS1R^rH)T8#NCNT6-T|<&p;!mkzI&TPa#xviViECf|V)|6%dQdYBU-R z2I|>GJU7llLrGb*S!%Uuhcs!w_PjGW^+tofMxDOwwR8Epp@Ha2TFc8Ubqd;5NvEo4 zbv(}2$n%_9twz1xWT?MEGk3g}X7VQyAp|;Q$)S@cnVp-XQvaYP2N@a~>VANH3qvEZD(Dm?rH?3raT!@&BhT!G zKa15KU0m|NAv`$3q&4ArNCk z#UjR$yEF+GgU9=TkA^JIx|a0Tr}9sS#(*z76h%o{Ra8C@q9S6k&f)SJwR)X;X0dNZ z27ZbVLSSKGfn rEd>R2ApknqXjRYN}_o`b|QZImqeb2if0AWC=S^M`FS9{-@Xv zueeuzdx@2W)0{c}5(l67A>V)ODPA}o2}8Si_eVd@Cw}pEZrr_r4b622u174PeVWsY z6|KIEMD#+NyD!!Piz+kc7wZSN!luVQqhF*(h| zt}EGYzRAMkQ@nKaB_2B#xpRDd>I&5v4t)2UeDgD3<_iY`gYWtfH{WzUx1TE*Sc3(( znQPv)mlvOXj88xC2#rzrGEIZLKFQAQ!wmLkFn@%_lTUC@Z+|x*xM>^X!>g6p0bXR^V~2Qg-;*3zI>sMAd79y? zb}%wE%GkNpVn&(TaycLV#dq_!58Tbs#~^wUqTd1hKA2VDrVEMxQL&0KTMHC%DU z71Vq0YHt|a$mLw}?kWC<>1}NJPoL)QhwtYrfAcIaT(yhs8;3C~r#XG(Acqd0q?~>i zH-6}yy!Wyy$J(sh0>(FaHqs7&!U-3b1zkQZ=tH#|gwHUf|fVnawYqjts&DDiu zJ$fY+kq{!m7@P<;@1wclO5U+yeeCs6AmV+XQ-sxN$R7(`Jj&@4hj{!<9kch{Y~8q# z&Hs#3#y$LWVYP{_Z>q0cSJsI5t2@uVq0T+vW!qOLy3eysyJ-uo9oYRwk>*YmR6w-5 zn8@&u(n$F_8*P<5vj&kaxqEFD=aNNP=OE8PjNpS(RF-xTD5^xL)>2W@yg-ZwqD7)2 zK@*A37{o-hOZ+!Rkr2_YXh1@M7*hx#t-Am*fQc9qnLBaI6_3YBAOk}5cq5pwO!R$J zrjJq@$s&A!sxlPb(g~JAgh~`|Ac&?uO9UYi=mkMlk|lYSu7E(ggdqV0L@-v8U_dQI zgQ&$|$;INWBREUUEaVoc38rRnHA9vQ&X7a{23%J(V6qa)N{lPP6qLc@I*K@tnu?$W zr8lf}l%LlGqrFd=<&f^PcV4PCarL`^|8>0me~9}yrD(?Q*PBPFIl>S`1e_@-ZICl5AasC>|mSf+R=~1V92HK-@RFfxi2_SCyH+ z{UcxFA_xJZ4od1TQ2pMk%F4>B%F6s@*7y7SEf;Kb z0YqZXLE=*@#7A`cctG5sra2+g`(508;p=59RZQDBKh?NBmnQjHg4UW+sf4wbBuTK= zUYKi+bC*A(7iI=C_<*6TQd(3A5Ta-xL41yQix-R6j@(pn#^H30U<5HCdeu)8C^Xtt zqwq%K4CKM!#h@@KEJ`dvG)QcmqhKJ~7$P1aCs0RIBNs*R9&(Rz0TTkElBn!UG)gR% zgft{1At7~^#2KsxD)_{rL`_mu<1?bq1iH2aoTkxqNNfVMnlmGrh@y}eDda_pd6AM!&@K>sj`tbPrId0@=Ej-I6Oy_@RRdl->fSL`b&OAV zMkYLYJs^32JU}K`EtE@wZY2sEQi=Vqk75C@40uCOfjn1aPLt&t@nDRj$p?Z9Xa`BI z5S5{Hj;MfBg3}&v>Ucd#&{f3Nu)2T?211GnoAc*JMmr%)loXoVF+q6-A$@y4K)U=WN5#)pP&sE8J*pp*h7cnz7R5&cS{ zh(mk4&hbVe5)c%k13yjpUh_pW_<-Q}=qPWVKFz7IF@gw*F?2UIaqFBpbe7AcH!2ta zLO#ye*}ZJ~<|a11b()jDOn2WL`dSoYL+2UIo2c}!;+TfXqp9-cW~ zXUaw6U1r_IgAYE+AN}K7nLYF-PrvXQ&+R?SkW}dFDl&Cy9|um3a!>}i@9|IZn|CZ? zLvQnYR`UALTfF$vYkd8+(@e}=%UuilSlH1@r>QeJae^&RJ;l+pr#U{Hv*YbHe)r$> z@_2hIZ6-(LJTHFf|Kz{D{3hQ$U1c(l12HrIAs&9!@j|BMl#2B>$OdRw#9dF+g*?HzuwnJF}fQ2iHGx^;juCH z96n67UdM-kHHM~2g@yh7w70a(2nJ>_gZIGssVVjk4YBLY8Jzb3v=j=g=N!_x%oaGT)&*V7Ie~j<-R{8OrCw4Z98}J z%+A9cuhytLLsIFbcVHFwY`B@*=alIxznhS9bqM%Tj_rGm&0Dwe#IDn1r4lCOgrLci zZr0rSARk%3m<|1{e9w1r!NbHU_PzB6TXyef+p+UZj^UAgNI4%sA=XWrq3=bq-vfZM zZX7vzeV7w(?PCAY)101g_(BJ>=PYC8$~p9ORIWLG3?ZXlonUfgn8}GrYPOSN>ulzC zC4i$ka*Dyh5l&Z|=wHxJXLEt;Hcze_9+!9dmvzpa&I%q5Zly%BKLQD$=jvP6{(9qC@m#&?ioil=g9|1;>o#gb<6ATjPy7ieY zyzANl`=Nh9DDh-0orEt#Sy#pL@ zp5gQ3OpIr|%e}m9Mz!C>s@WA@sZ6o${Apg>a)8MV%b@MOR1*qFFgC>+i{u%TJKmx2 z@Nd#_>zbWD_Nzt@banW*uLiz+;kL{FTOmT zQi?Q*QNsMt5WaGPXd_ zoDAGZP5q$47(+`-3+vafr>(7x=H_O$Z{N=N_&Cm8|ERjRx0h9`RriQZ7)qt_Jw07loD% zT3S14;{YQ>LLPvrbkOz4@AIh#H}IiF@7HEClzJAh=)e0{{M_4rOmlxATm3Mf{q=vu zT>~w&Uzz;3MS;0cI-5M+6?*8HZLmh(&9MQjP1cB55+Q(QC1_I+j5RAqd_JS_KrI z5DZ#RM;eHp^&=YfML|oc`XZd_Z(AhEixz1QE0dA_@sy$U6v#G)_r`_{?1b)mUOdF04Wmi>w++rXSwv zozf!gP;|VKqv}X@YEY#?Ts0yU=zHuC`pmL zgsW%N@)+&O=YpRKIOB13NS%DNo39B+m(Xj<}ql9GZYJ0YUHtikZR`QcS8asYP1@+Ta5Oaj|Ynj(9~D z6q6oCoFW*FS01MwPS6_6-g3&bf1(nV7(fkQ&24TN_Je z&0&P%5; ztt)xp(R-LxOehx(zPg${ufNQuZ*Sp+uS}xdXBofeW*+XnVsg<}Vf=0W;hWF1eY{NP zeV^bn3%clUDbrlA80B%UkNG|GIK1OIp4_eql3T+IfVd zgA{2oFwQA6Oun-geqjm_|~_!@=i6u z&byc2T;EIrhy=2!A;wQ`=NqqliT|==J}<1hlh1zmb{3bR{KI(ULzU|A0k(egMGpEl zLi1d1duR=hrhuz6I&_SK2eXzC{QZju zI8&TQ*T8B%^k^47MFUxl>f{i^N1o$9?dqj#aFD_4Qk;39%7K@jv` z`0QpniWWI_Tdy5>d3j$;e?tg`sUVW8syz(}? zPYqISUC6w3>v(igAM@HugyF-ywQDC^-g=eqo<77o2lw#d2k++-_ub68<_q(_KhY2o z&W(<8a&VCIV`DhyP(V|;%)Fi+I$K*Qq-lJ4GJ_e+UFDU7R4RSo#kTdq-R$1_0#7~l3U8cmrg!mT=FRD%t76E8_VV(sXL(Zh(6RV7e*PnC z=qzf|E1ShI#rUcH?ETJHd17~sYHJ_;3+FL^&K%sx3C?W$d$xbO#^+=X_kH|h+_9pc zf%f+|qKgoq#^CN3cb$Gao$?i-m{U9tm~m@ zA_XDt?|Azwf5H=+-{84JbzD8ec}HdC$GGjm$N1+D)OhNvTR1d4!TE_%&h6U4$);Pl z`{R%E$b+}A&vh#+Da&^10X9b##bx6AIhZMn_L4Ek%vV&vX31Ha0&o zPQ5UTdFwvOBahw7qxUUj)|FY#;F`#?jIpsX4j(?u@bECsIjpr81OuI&oil=g9~$B_ zd-~7`P98kdm_#qK^x!MnrR|(7_J9l_$cE$zQI#_hd4aysjQsE{0(dADW?>#Y!?A&rloTYpZLAs zA%A`!hw@(Lu78A&52Vb#pkh)$!qR&`%-v@^N0YM*HLc+jcP?bz+^f(Xl|`90np!Qz zl8|S0Mh4GO&mDnlpNj&bS!)ryfPwy5%hPns!~=CXEOD{XByu2vdMkxFYynMkd<{J3RcB^w{TkCVB@A1zT@ z@E9Lj(aplEsq1ZuZtbF}XesE7srn>?LzCpbg{zG=uZ;wqSkj81RGoS~r=EFkJf9CH zriGOQ0|O*ULQ_){d-m+%)TvWUOiWyv{4!W;X=!O;)v8r2UAmNxj*gkJ+7AOtqilgv zQ-NZkNK^YP+U8A>1&uN(X|YVXsY1~bU&jB8qp=FTVN)dykJYQR=5> z-2;5=)Aw`ls#!Gu075n@Q^B^b;gNs(zp!=^q^*a61#NT`uWY}50MI?mTd;x$ADGLx z{_b_&Jb0QtCq2vNeLwAxLMKH^Ns(f?iMG}@nrAO$X)Q)K3&vngkz%<_xm=|5eMQ?e zB&8;j5|A61sFIXAP~{Sq>z}{UNs(ewyr$6j2Lk~nIJD<&_U=B1w5?_1-F?jKyLKNq zgBiRp2p5UC*ZWXhGHRaweR0?6ZQm<1Y4Cy7<;QI>kA-O&KqC#HL>lDc=^cb=w($iK z0Dw|l^ti2v%@-Mk-Ty#zC1#Oj6(*f`Onkgh&VqqVPeHDn$ZV6(gm@XpxHPl8OkTbmO?17;&I+ zi5lO%Y7nC&e&!<7gHUyWVjZRmP%J1hG&$LiODt z(LlKHefnQqLlh2NSQOGIyK0;ZDjr)8@jx!aY8+A$( z6U7T1j|Gv`dSQRLkk54UG`$+Q`n21x8O&e?KQ6d( zjl6jh5y9u4kU5-ocom7-)?`@iFeYG)!fQ>YGyzR6vF&$3q9=D3NOM8wn&1WT(OwX* zPzsaaR;=wQ(G6k{|-nP?*rFv?+~frHOH zLB$BnSV5t&PGg-y$B;&NaS%K~JYEHYZip1M!Wf0p3MC1m%eXYbl}g|}+9+(Q(KhzK z(Rwl{QHXeu4Dmw90-*{%wP;@;4?>s{d{q&q45_nFm}qo5ImW8kcX~k_gbXBu%*Vbi zw@@!YhEO$*U~8CUf>J?)wsSD72LZ z`99itg`g+|O&Xve3hxB(1J1`!trf*WK-Yw%5+L`e0*DO+?Qq6ZHv%XK5CY@@3ITMG zc%F^L<$+13s5wjS3OHZE=ZcUyq#mFuQ1wVwBe*KT3&BUb3gtk1oCGrGNv8}+Ax70s zQcaRtjIp2uQBkz1HHb1~UNi0tq75~zP}+lWprcY&8Gk`Aplyb>IYtz@WK2qpoP@+! zEP0Hum4Gz{q(EjZSz_a-w&X$KCj(OTkOfpd*3I14nF=7{sD%u#%jBwz(?ygjgLouV z@j>u-l<@?ka9R;mtbe*e>JLeR3q)aU@PvZKSxt_i7NW2tM9VIY0T8^BXe+A_?IjJt zgY$~qSxjyb3|``W)FM~`Q30r ziU&kt2sF$@K;hBk!Q!#x!4js|ibI7Ag#!XgG||HU>A{V>7Mj727 zk5l)a#2Dtbwz8L8;&p_A;+D2n^(3x%U7O%i)W5kxb^W{xcAQ0+%><0_Dix0 z^eJ7&d2BZAYj5M{AH0iO+AVFDup;L7 zR0yMk3?6-fFFf;2dgk|2NekT5S3>IoJxkWpJZA;V=616$ImT-z_c0cx7#=#qjvX~7 z%5zz}co}O4x|w~UO#Sr8IBxG|o_z6jp5C^Oxqb7vzqjYIiA`6VV08C$e0SGr#s+TV z=hm;~&Q8MxM_2%~GkaDyv(-7OLucvUUdP6Ei8xv-(YN##+UG1{VNW}~quY3Ue3Yxb zmPtx2w9j3~9kU_GN!}>bIQaC7>{Xh!{-vzB-<|(rBAfE?5!_wrf7CZkxq| z+AzJGr8a(sT}Ou*r<2b4E4lUNRovU#Oi^Dl*mWx?407m%V`Rlf9=dx2 z_bne_MR)s^4!v#=0iRED=FR7M>ZNTwf24-$S;NMa%~TSNRUocLerg9>hC4}{XLHwr z_c)?1&@r%z)$^Jg6EHuy5D}dB3=R)-YH*O@v9Sw+fsU3IdS}g|v$YkirzcZpFoPLP zBln)sT8($ko#V*xFr&2^N+~+aWfpdHu%M%Z*5VKUmD9D5PmXZ@&F}EU=Di$iU(2lv z=CNVzTna>^($z+o?6ZsLEZ^PsG-oF!nQ(XV$y<7;DD`e5FdJs*_*-m# z`kVZ>=TA`YU&rlttmmQI*089eDH(;Uo@2OIGxW||d}Z$zzFIdl_jU8hfimr>Ze%2Y zXXNNkw!OBQuf4RBZIko3Y5gK@UbUJ#m(Fg81|}Fgf1JZd_Az|^5C={Tad7B+P1FVI z6N8-D^8#Of>Ls?0w^CSq2ODl$&+WYxW)%$@0dY(XHc_k(bAIbSp4~KtvLEHK^@~~G z*L-br=6fRG>SGM;-^!*Jp5<#Vy~UOzRn+_qe0akhJbcSaZe7$%e@hV=JN+nuaTBuYiA85gVxElG;4z_%I6Hk737h8`&?=5Q? zSh|8$3%Z$A1~)cDT0hR#9b4J8=Qw#&2OnSG&HT>7#XcYmbLy>EdHuW3^UW9b;riFG zuzvvymn>rGtTJx6h4zZYguRUIevYTVT;9pB-}r?#;D zXq~CHrK}xT#Ns7A^i%>e-ofcoLSB87eS4nciIZm-ANvFwZ|Y%gNAbeMTH4e}`>bxd z&Ys}RJ6n16&^W3(A3b{+dpXSUaSdgI9XiIYC!b&&`xt4TOXreRELr}3dS=1b$2kA? zvplUAqvI4mVUZAn&`Tw@JVBoBGsT2=#E}mJG$si zuW`*9Bp5@S%ku zE*Ih)W1Xxz$;f0zh9F9C-Z5EoNVx91atRd18j^wmm67K;&i!<~)B&heDhvz^kftf+ za+$q*_j2UO5k^Kvu1XNnTGQOz%(7+6ShZ>ueSLj1?e&iUDk)M(igdMfUb$%o4VbnC z%)hyv&fWzqT(h1-rzV&x&Zcw8I_};uhgpU9>LYFt%7SWT?z%gfd*!wtILdU)Tf*9h z?qtQc|DF?X?Pt%P(>yw_p8{8FGp`Ry8&t8GmSXb{&7*#BIpYTp;fmd)T^;Y;U;Q{C z4yN|8d+$Ns9xO9^-G{kIE5wtWum7q0f(^y>mG)56$&_~gL3JTG&jIlsm z5Ey7f^h9q1jtozgpe<;HQ!0uBoW*CBTqOva5EQ{{oY$l%QWS}92ne)7q2mFNi==GT zpp+{iD1{P500NODmy2qU8HKtRYA%pwP)LPhAufefQ%E7Tio_~XD`-FNU(m*{k)ar&>rU(wa7eWXGaVW`gE*3fjMea0FFc8oxUMnRa!Gai+ z7OailTSE{$8o?8S#u$y(hM?njrBtJCNvs(nXr-bEfG9Kw!N&LD(`i+#k9zw4DMgGV zp+E$$A&Lj0fB=z5xK~2F=>~&BHv|Jg=|(+@ghwBU9+MRfLcd%P4q&9Q&>(X{WPQlx zixS|%V}JlbUGd$3C=jg>(@3usN&|B7c?zLXXH+E1DT()2g-H6J78gWB5rUfTbYlVI z7bQ_p0P#D-!q5;8a4{Q^#-_{u&OmET+Hh=TttCkk#>dCWvh2#FGnl~)euQwP8d>us z5(Jldym90>yg0n%6m=bKatcN$8ig~_+j*`O0Yx6b2M{07E|9o@$rT|Nq|uKCr(+jV zi2b&=fuI8+@t_5*HCjh!AWDJu9-TWR%TNf$8EhzEf?%hk?pV-PP>Dw+o)84GfcJu{ z2Sh83(r6Np1S1JbG%jQiG9=XTID(Fz^;4rrwZbX|A;If}JQ4B|1QF1Jv0yBq1wlXr zkpRIXxF|TxR3z*hgUX_p{ZIv8HTY^m(OZlNR%cjS#dsh@QoeUW$idf%whYAKGmvXx z$~t^f!=#!@7Emf7^A%iu3Rlg@>K+}Ut>;AWbs-oCMxl&C8-+<@A1(^C60C}z-c3-X zKF*yML{Sh;-39VmAgc$eHID*((UaCarQD%hhAt|w;I)8}=>1-S7a<2~fEB@rfP}bs zPjH^eTt%BIFW~bs-f2Sa33Y**Ks6wljrLH;;yqT55Hfu5)HBai)sv={G_}#TD;Ux= zAx$jCI*iRRHpiL(+E8~1qEa%Yn9xF^1J*dK(O3cOC1wZlm^iS&3{%We0;*a- zhXN{7C~vSLSOtcFRRW^Ou;h~9q(F@#QEpJfio;OHP(za=XqXWV%-~0X%zMsGOtAgr zNd_k;$(*BLEuG~uD`(H9r@5Kp4GIRpx_jh0A<$G~} zwJVl$`q)msxP2Sn-25iXdpfzfcMi=;VTw((6q{(#19XfHF#FqwIXrQWGec*2`CvD{ z{7W~nrn`lX%gS7#tD~EnR`yWbx`(3&4|DMFNyhH(p`DB4tGqhN(bu-{_W8NYTHeOO zcJuD?0g%u>Zz0QWImb=^@G8yvk}(=kC{`#GD-`FQBU#?bU%WAS$<~WdIw2_}v=sm* zXf2m7yiiSx6`ESw>3Gi>UWG|ZRMHX^x_IEeI~X1wxQRgqpZQb%@aXMyF1~|5e6;_{Y<{XBBIM37GCt1H)2A63A18=ljG?uuiN3Bb zdOABl(4?ms%;3ib7Xl--8vBQa*grJHM3!N+rko@!>Fi`-M+Y6{@|C;&h#^dkoMZ3P zn|SRYobH}Y*YUHQ4|C~Kae~zr+i0D4AD?>UAcxL$|6kdw?p?GJ82UH+QhStH!y%*u=M980E-R!o2wpb8~Bf zw#&S@@7&I&%3OZq&R)6`^=?noKOykmQ?1uIe&!5=BO}!6 zb(B(+OC@?|&7!}ni(>kNmdeav1~d3+LI{Cct;Xo+=mo)m)|z6mNJ~o#t*xzmpgrG) zjC|ra2cQ2-{`&Jz@#6V~%zf;)`Mv+?L#*g3(rf_)aLliq<@~r5STo@gIB-tU-{x!ckq1AFY$-J{8=8kX)e9f&FK93EWi0q?l`-PQ!o5E|MHK% zz~3D`$Ei?Z-s7uiOTm60U0bM+FnaWLp8d-|=L_4LDc}0@{N_LV4L&+I;o=Z;6)SIj zi2Lq*ns5EhU-G%X{XBnstPH>XFdtq&m!6UVEDIj_r!07=$|n!J%$uJ-!tegqcQ{rb z;{5UbeD~B@`hWGeSTiup=!<9AA_<8*!`Z=e96g)C`w$ICKEcGPH+lI>|BnCrOFMXR z-vrV!hq?EDhF|+1f04)TT+F=b>`$j~cn43`<9!)F^j+R~eJu;_dW?_sCzlWSXK+<` z?-?B(<>bkeoH%g;5y4t}K`=0T_H1S%4E&H0a;Uz|;gdrgI_p6fp=mh_=5#X9{e3h9 zHwp=mIeZWT8m-I?cpwgppBv`D8~Yin=XhGEl-uc_-G;qJwNXilV((h+=v{l|rpsVD zSF!fNRjhq5{`T1<)$y~Oo|t5+T8mzmT#n1enH)RLk#kkXazF>b5!>wRgi_GlLaCTi zx<=WrgekXBE>>vbdaoG}0=$dxUPt6O2M-N0I8+1G1iFBqJi*`z!+~p-sm|$hRa~h+ zT7X);PJL?X%0!o=&_vQUo8FSXq4Nc3UUEMhm)y_BMxU2WO)xb$%-P8)s#i@kulwa-q2mS3|eavYf%b(@Pu&XHT>y7nx+g43{WfjDEC<@*4>AU^I zJiOv*zOsKOZ*O1C@%#IkpGN-c2NRyWKFNXY$Ix9HX&dOcGQkG|;_6fnZf55@gB&j| z<+jH^!url8I&ZL#n!ya-7lexj-KEd8T)E=%-&eD3u1tLy8au>nFOr0TXp@S7YS8;- znp`}cV-yZt1|iP12$8IFadpg#2Jt)ozXa3U;$cyMRftj;t4UKuu>{4EkQSm~fW#x# zLrsy_hyLL`JMH!T>DC>F@U7%)a5R$-h1A4z9QG?AD+U5I!-%7C##&|oEy zkl|I8U=(5iQRLnrS&FkMUMGk*gkbPq<8nie!V!skK`BHllrlu`_$D4MUb?ubNKQjy zX}kzpKnEfBka@v6!RCP^5zbWM%G$INKLTC`t=(70HYi6Qc$oXf0SP4NhGo>k}e* zUzA2fW3)%C0U~Ma!jzrFni3j+qhP?`y~7xT(Hd zM$`9PMPh#xArMKZu>wT0zg98J071p|@sW~vaU>ui{#0T5NR5j){T!Rd^CgXEeR_zm z8u?st%@qOy5{QMI-h7pBGl&rfrWa!jV`F0&V`7^)gBi@=M~=K$r`|LbMFVw@Q;sY+ zT=0b8FgmATJlY7UiJ~=UG^){0=fw7(AZQ;jE+9FCOc63oa0Z_Rf`&kdf&l{PK(HRI z1!FYESiDjg3FtgSPkVe<4(n5r+>vC0%{1C5NEMUau0i`@~wk!vggK~gt2qO)7 zJ$jm#I^^CGsv2KQ@U@iOTZ$^DXsVdRqJ6Yk#DRApxx!_Qh(i$1FNNF+)x_bGIyM!` zc?P7Ytc6gW!WZjgH3!Z`l6@`UB@nbAsYaI!resJ;hEPz57x3|(=^z-dNVF#LkZMh0 zgglS=)F(ZYlhH1af}-R+PV(3Z=aE3}wIXPR(=if6uAr_2qoYVQ#-<1;@u=Vt=b6Z% znp-ko!1*%4StO73hfGklfXWq+BT^-ZA|T*BnRn>ALm7v)La`9YQYaNP#nd1LLt1d? zg2$v8Hp#(Qa=@XfVlfH|CQvkJ+0}u*P6biJ+nl=v5u3 zElwLSg3=8Us8UQvn+n$AlqNHVTm)yLXuzT|9!!X~jB404x?u#V3pS0Q8qbQ?hFn?D z2?T@WLdZQT_ZTmTj*$#hQA0r^0zL(djZp_|z!V(1IF2fMRH=$CR?#7jGgMupuqX!x z!79OOMG6QSF93#|A~mVt@tF8D0~v}bppI4=gh1kB(C5h9e9B5XPAr-^*Wp?L}VLKZIVihC3g*gJrEH-qQ({Ekf67R?lf? zSvwqdS0=cKeCPm2-g=(Tzd6Zqznn+s&FB8rtrrK4SD>kH9&;A0WO>-bn>$}+|LyZR z;ODUJGTxs8n~*4l9y?AJ7BI2=UglTIv|pvnX(6Glt(k(>gpqTMj1F@OXkmI>6>u)& z%(*dYBc~V{8|K&~tZfE!wQ{B{lv=x4*#WId90y(rtyIH{?o|r(gGT2X77Xm;fjQ6f z_TeME`qoiyUAUBw%${{+%80R34C@Zu+~xe%$}+t_pxJ0Qa)iO1U*q$K=5gmo9^mIT zu4ZY`l9=g90#7{vGeOM08)y4lM{pf?(b{#jxZ^)vaNaXAKF;n#hp5);_z#{KE$cT<}_vH?Aa`xHH)SX1o1edmXCAb^bn)t zo}+_ru(i;^E03*bV-X6|9fVSpmaJh>|8ADJ*LZdR3v4}k4@-Jy(e)kySH5pF&7)({ z{rvVl{j6v$T(yrfrFJSSKf%XV@8Q(o4!*Pf>-^!V0pU$h0kH{^AY%%DPSL+4dygd}TH*Jp=r!o4{&CA9vq(g6#MX ze&fIVZ~W(hR!mbHzq;-c&(x+!Q+pTdZktE@H;!@a#9Iva&1dJwf1iiu&Sqd%h0_bx z^RY))F?nV=t2eCUt|i}ppA6szcX49#f8x%wI?UtXcg3?7eBQCC7E=_dA(cb(gpHd;NM}umc1L5(GgK1Q&2D zQnGfBWNF48jTPgOBOEi~aD*eqp>Tvhc_RG95fkHY6ShBS?6GGwW67cxk`hIVAO(R0 zN$d@vfyUDN{`PloRc4;!4|(51quJfqfd+N|=tR9+m6f$+Ri4cJ|IgX;6W@*Z018`- z?l4Mk8ZCp3RnDGRA|(SlR8^OW{)iayE@Sn?(>(FW_jvew-{(h9J97GKZmh7rdVtwoP#>6Qe*O(^Le;_bXPA%@0paZ-gROJ+*@yW4U;j1V`@xeuefT7c z%rdim7rS@%nC+e6)pP3%--Y=XPlqmcMxTQ_WtE9`Zp zPU__uM?~YT@MH~Sb0~st-Y<1{ zQN$^m#QJjHzMQXsiIhK3vI$~}jL29{9X=a=bRAVR>Qg<&{fRK2fd4OFvnPZ=Fc03Ba~m->gi;k$ z455e(OcRFJy5M=O|0%Cs zt1)_}l&dM_l&vt@S4QHMU=T+4N@OG&0CAgT8Hdmm7d?tGX9x@(D0UOpN zacEK)dURM3hZU_lwAz5WgHM5wJ(3-o0y);CXeD_giIJR<1d@j=ppppA=!J-+b*#Te z9BjbGfIJ+cxrV5e2dc&7^Nv782u7&Ebp&D|@RvM_LvSOJCJxAgB_+2;J2ZwHhGrvV zr45->K@NhN1LslkAffCl;JhJT5ub4mJPAj}S>8JCT6Sz`#n8I6D480gL_QETyp{+B z#t@Ya)D=4`6VPl}HZ&^U$(G=PR*;V15_mNb!v!OZ8t6>1ce4RfgbrecN2w)ak}=Dm zi6Uc77!YX~ienwfR0I*SQ>-%Zo>Vza!3`~*H@y(2Ej% zseTJBr;i=wsUJSg$u`sMKFE&kJGuE9WlMLK$!$BidCoYzevG3R7kOa`?zx=*zS(oL zDP|{k@X=eg(eD&R`Q;Gj@pZ+7Kxi6L+t9o|2i{lA&rZ^jvpn_V@9=;8o@4lhPjTN3 zJJ>tjrMKBB1CS}^7H;PkeBPaflJe-}IU-ptJ% z=w9~mmCiP%C;QCzTJqXCUR_Eo4uE%AG~lW(-HC0?SDxBk4RbTxGO@yQ zOV9Jjv4%V6Veia>lq*h^=~+wSGgPu#;T6J4gSdX5h9eWvO_|C;QXWnOym zFc1CpAx^ZyzMBrRXV1dbg^vJIF*7^My&pfo^ixMT^7K@de7urYuB1rdEx0t`Tc+QB*zx_@ZcRc@XRxX7-~KJHeC->2fBz0XeaDx$wYz!Eg7cnUzl-yZ^+iV<^tk(hT};nZcsI@bO%L$1 z|LlL^&fyf(yZ5ns?wzz3i<~_CBoF=mw|Md5I;+{R`Tb1X{skVqaf&??mqiuaeIIw< zcZ6Si;8p(Mu~Q7Tf0}#m`6OTd*yQW;jqwwRF|xe8%qy?F!s6m0IcL20Y~Q|}y?ggE zKaQ&RzCmMR>A4p;eQ}XR17Fq5-g+myX6KlBKkBhKsLBL|&l$~$2!*roh8U9afc3^u z11Mg%`ZZfafA19W;!7NV`mg!ZKl%ob96rqjnPYa}9o+w0zrhW=r`a|$Nv~Vc@$3A} zxkp$!b(Ujqd#qnT-r|VhTgrSbC`LouYVoF7%@7to!_WWD-{-S8?_}Zf?3cl`JiDae z=MJ!Y_pZycT!z=K^{rn*V&&+Mc<%du$yfjM2Rw7Cp`AUzo?CyOU;pJ>*gM-}W}-*8 zyNbjg)BN>QoH@VD%H_qpMQDbU00Zx9yt`&+^Ft5k91{~096We%D`L}=Pd>@<h5E4nn`{0EY`kw^Pydjkb9$#kLUbvgT1zvP8_as1u@r#S)Fu zi^sbPFEvg&I1)lGM1H53^O#ez9}&15h1e|)=ZxSpT1B+epxpr)H?UM8DmY)^Tp`@6 z77>9nDP(mM%!D8wX8}jXYoJ6ND9I#XqUE=F#AnPrSRqYe4l_Yj&@5PXC3--1q$I>j zh^^2DM;nAH2*QX+rzKfVw8NHWkQffN+`9~gkQk_{4!vFv(+n!8de92f4yr9?#qL)f z5z(xJrVg>KX}o6`9ChvJbsd^LLWOgV;Dq1>@4-7M?|R~F^BnH4RW3 ziE)&2CXtDfiR7(&CuRgI4mw;&Xs03d)`?TgWb2XYh>t)LG(s{*8yvn<1dWmd)&!(A zTr^w+mjDDY#mu8931uTg14Qr9jxRP|ZKNvGdSxghf`2XoN!219F>!@VUqvhwvbmFt zvxv7;JkMvxKm$WpGpv-h>&X2GO~yA$r&W3}<8znfCW#VBPDxgiY)Hz9tR*=S@HOTt z6eDYks>j5N)0nzTr(c5)sMIJOOlnL5-73(nJe?{~)gF(4C6Wq5HCoLiGg=i=Qjo;R zk3S_+lf$&wmKF_bGCGJQQb238OGD!7khn1*HUlITaexQBfVhnKh=d`oTE~TzkpVIQ zYrqmN8Cewbnd}qEH)Ku48qW%&2pTc_H8W8%(5^$&#r~-?4FxudTKj& zfA${y>?GX}{Qa+(-L;=v@41KDzjczQFFel+uinWcr|xC{p70vM!y@Q;o;)(7o!Z0Q zADN>w`mnuo2=(L+cHZ_G4t(rZ_RjQ~xU66sTx8?)^ZfY26gN&xuxsvW5A4GYGb829 z*~LXpojb?U%F32tU~YPv-3trM&&*K5Esin97?&dE%<^!^3l}c%>f$0R!yy0*6BFDt zH^+^0bIf$#z}w;b3P*Q(C%1m~*Z2p$Gn}-&?7#U=4u(q|QyQSe=l~&v47SelKpDiX zOz#@R6=i)TT$xx1lrMk9!roo%+CD>XaDv6dKj6p5AK=dYyVyNq7Hyn(o)=CY=b2TH zgar=noMe9bYSWhjewO*^K078`(&`yrI=9Z!Muy%p&H3l})-$Jg>0+jT=Nz}(vY*-N zjXZEVyp&GLRWUrtOUIA%H;0!QSdSe$=a`?F;My&W(CPKqv40mc^~*G;U*eS`&+_Qf z1I$fb5+UMEV)fWFJpJ?&eDlaMYYDWwivtI4;EQ+fU^-mc>t;I~u(GnmGDfKe5!E4s z#>j7*p!C7O%(%F+!pZaJIk&h-6C+N9>B&iU&Cj!Qetr~v=~@qRj4{Ue+l)=Yz{<)B z>+9=V#N%GC$Mp0x6B8334&t${aQf(rJpAp)IJVZ(`SfkLt z!DeFgmrpOV8a~c}{qyYGacwu{VRnJp8*b&+ z$;`=@9_Hz%@8QvtpX25mtF1P?cti6DLiF3HruOo&gB>PnDE8ED_I+YE`z|f`?P748 z6E7d;yFWO^QVSUf)7zOjcqcbc1|~0Wm?v-I=1>0uznQnO=ehGV+dj%&_ut2-_STn7 z#Ex+}+O}nBX^E33PjcbH1!9a_UfK5U-OJqE9G%WvlB|sJwxOiB!O^E);{5q#T0EV4 zf}8HTiS2V!T>A}vm*740rsz0N$PtYVE%4^d+CuKAf~SPj&M2Aq+MwM#!p6}@c>KHH z;u~N4E57la$5_7M^L+HvpW(sJe}?-%`EhRC*=MTbaU^oQ$eFnw(}5B>;X?<9NSC^H z#anf*@bz|Xy62O8@?!_te|h!~1PTl}{perv@OQq!*Z=AfzWswm<{tP8_ucnJ9{kKF zx$o|q*)!$o1_916SUu0eMEx!}JyEhIV$2wx%2!nWEi{rJQh4u~n3!N{ly1%R( zX0sd=!C@3iwil>^(8J-qqtkJ;eMd|}jE)#(D>X6F#6-Kf%q>wJ*&vw`qu|69Pxw;d zsd031C{7IT3~>fNU@oIsF|Vi^I1n7pffuDBVReIb2au}bsT4z82c!n8QSFeECu;Fz zD(Wy5G663ckH;iaJmGqZa3GJC9?WBS#6}+e@t6tD3YklN5e;Y-auRZMq##6J?pfo7 z;GsksP~;NPKx$jsL8NUmZL38Xi^uzlPNzfaWgMV6pt;6!2hDvnx8$72646wI1ELBenh*SAlcx!!Zs$#uz`P zXrpJ?RG1a7?J+wNj_f^FC1N+w_A+fx8FrQIg+@FLj%Y$MD^Z4v>qX4gaxKXkEVYQW2$_s$D1k;4=M{Ak$T(~1sn9bAIB){a6;gIB_)bOW zR)j7{4Xrs=hmOvmEH@GinnW9gCPEv*Y$O^;of3Vi?NYayTtkXNj%67o5-k#~rL`6% z;YTFv%4I4^c&Aw1koxOHT_U*(>9)A!k>s)D$vF@y!aos@#Xyc8A7v{FlTQv6A)6;* zwBiUT!|Z9Kc+od4ol>op)QPkqGVq3>poS(#POjKN&XcpEswgdn;V`@@UKOW?%Z_Lj z(HXIF#9l>C5n>|5L?>k?a-z~ibQ2`6MCVBKh}sZSBBo4=2~>+vL0 z7)qj|u|%?jm{NqDf;mCOlchq*W6{7E?;V^iZf%VwXFA?9-|utB!UDUe zr|G^UFF={)l{F5%aGH%Kqf=A#Cui8@FDEs;P2}^OU0LLXvl%n+(y+FCo};hU*QjKx zoV+;1rnWKLR!jtsyB4X^)l_wdsR>Hw^i4n+dG);np+Cv4Pkn_iJaLgzi;wX)r=Q{* zN1x#9zQg3~c6RUH&ux2e=C0dr=3}?r$Xy5ba$tU*esI^gmiG_Uw%zQw?UQ_Q&$l^q zNci#?Bu>(zH~s}71Ye_+Rxmst4%=~?a~D< zzId1y^8z26?X&A$_O5+Bz|4p#v9!9%@iS*wTv>nC!lB!4fK}>1|B=KzDeSix-ab=t<=ZH@UYk`DsAQ z@#hY4=-I=(G$I!HZ8xxg&jCKQ(_h6yoUyjP#+eJ}*)XG!sRVHN%}JXN33AR1nug;v0 zXgoW`Gn_nqjE4^`(?sFG{+(>!v;7Kv>(XZJ{lahZ>!12Hez{Sq=^jA@M`CT|0?&Tu zhn!wqp`D#&a$zq!k!urwMew`!GCeiJ)Y>bYdH#Dme&QeS(VJ{b4EScS@yatCfAJ8{ z4FYNVPG+b3%)F6av!0-yUSQv>&^va7SC1ay(dSqB%^SAiukh?-dVz@@2e`R=smStO z#L}yrJ^3n+ohi;l06G(s%r5L=%K0nw-zV9A+voWFZJ*=wmv0*5ibyH3yu8e*Q>Qp_ z;sjrR4QywAu4bYyu$-`Fl|jrzr8GYuFf6VUm!IML z|NdX`ufO^?{P~Fq`nUcZ|M@@uC;a;7?%-o{*KF-BLm|MZg1@HHd=rtJTZT=-tV5@+ zsjKVYro4~Xzkfgb_g`)7c8oE`c>mylS#JH}&++;5>%8=TKFlBe`4`#S|0Hv_&olX! z8mRXYiM10?a`?zeemJ;?-}=%G+_H1xdT35Aff1Kq<;dedqpi`#Op1|VbT~$l1}LHdFBym8#qe%q7y1f(LNyhVy((oCUhrbz6p;kEg**T) z^W~0=N*ASzTDN!>l>o_61OUZre=f2C0v;scB_NcD1L88sDEa^;l}G~~EMrbEb%o@Y z9qJ27E_)$*!TaLLKe)ns@E%-Ye6tm8AZK!lq$ZNuM2aQSnBfS%qOQAG4}$Y(s>!K? z<{mkltkf44Z(Np{itcWIWkrs61^~Y5PhusZ9 zO2N4RQh^JIuLyM@^aIsYpq}*jexON0XIZE+_#sGCavN#eA{baplxt2k8f&cWkYkrl zG_(z9HA*QIv49R~b4f-+?HlThoDpUE4)(vfcjYbw}D}i>1r4E)VEY;*xLkdWA z&^k1Fa`c4csgfroM@SxZ9%n{GaX17|RElUI1XL zV<{rJ0c}ui$d*tATn|^(RNXF}i4LLm;0p;IFF5b0yMek}(dkyy-2e&lMj)*T(TvoN z1ecnN&;*CGK+qbOyGRnO3YY^G!z7g`11h98(Jl?xSl(cLeS_g}K*tGP?~&jMJ|JF; z*Z*pW>#X7G3TGZt#A$D zz*7w!)zBebke(tMi@ny#B*!SqKq4Y!b0b@3M9?>fL#Zs|G!s;Dsn~c~9N8QhAv<7L zqk|rF4eR(~)};zPhN)l37Y$ ze3qwjj>f#Zed93CCIyZ57&P!mJX3`yB^ydF-ZEclm7&P=l}p907*naRCUc=J9l#1_U%l57>LJljn%U+ z^P?YspYI%cfu~Qdak1?%Ilqk^Q_A9zV;sGDsGav8{b}m%1bs5F!r6-}EUz|{AaZcw z!ZK@X10bXMB;Wnh|G=?FCay`UUtc-H1@>{v-fbM5>MV_XBOO53)aKYyOVU_j0p5uvVY=H}*@oSZC^*dG?Go?-R; zSzcKZlGRL3^qH8r)^{*abpxSemt5y68-tS^I&^`x^+>(Dra#@ImN(Jg?e*!`HB+o| zQlI7dm)1CUm%_}OX&4JGymW$dFP|iegNX?`A-oA?_-@ z*bw?&kKWW2bG%1`PZ!RxcJVAHDbCygs^IAND?Uus>3W5#va+(m(W6H>d-m+sWb?Ld z+t{~nAG>z#8hd1WzoE;-!C{_wVV!eJ3T~E8|5iSJTc7#a_vu+w6|!xP=@95{Y|yT* za*>~O*l}fan3$Sj?+pu7e=`KuX*O0_IvdejE8fJ{@5*Q|!0~_2@BN#<;19p|DywsM zu=5xGM}Ft$Kg}0znR(0KAI+IK7(l;I@c!KyH+|R9#mNC~+CRbkHU}3XwzkNHrOf&e zxDGVjx|NUz};ICe2nfk~BJovl+oZtA&Ha@cLt@imdDP@L3VPc}fxi_jS z4a2y>@5&&t}6mGvQhZ3ycXdEF6R#x#;z zPZI?(KcedgJRQ6g!9ehZ#2$hmAwZ5;PLLD0gh?)>Ysq-0h*t`e+am@gT0tffNtV!@ z(PWs0lB9So&g!Wwpd&$mP@)ZZ$%q7;c|>xVe|Vi>VDz_2VH}AA=MWLH6U2{*;Uahk z&RL0`FuI?ZBKn$eKyyM<#8M_lBWJ**L>j2Uo1+dDIVN(7OQ7z;6wv?(C|l727>NgpfME1aT$%*HO8s^QzU*7-k>L!SS|Q7&vK*1r(ve$Iu%R}%|_W&mSc=D#u(QlVss3K z0VrZGwTz;u_aH{>DD8yNOetYnQD-!2#8^^_<@;Vfq5h`O?PLBGd*&@~2di zfk;EkKm|*VCOX!V!xKmnP#2&Ih=fsFRN8RwGMa@p!7w?RG$RA&H@6TBr1#;v~_tOivSi1y2oh5LYAKQPmaoM4&U% zrL(O=HQ8afpUYD@UqRi8SDhG_oUdgf#S|HV_3`Q(9?h-H77gw=p zXgfr$MNJXc!B>IM>(iN-r0O^%Xb~TJMQW&eRZ0D_%#D&NM^s_hDnn7wNKnHY1XFzW zc-O^ckQ5+xFcC~Q?SNv_=C-9>-(avdV0~kQ^=5-!@EBFGA1OAw3kes7NVSgdIK28| zZ%7e?mZ%NJ&>Pu7BSPbVq|_T3H^}&b5)#l2MKF>BqzaD`)+*`^PqiMXHv&v3k`iJ9 z&QQsy2PBaZ8WA!cHHWE#kr+Clcq{u=IT26{A*w(wdDTF>HQF(>Bo!5HTA$0naeB`tDbL-<99D42;XJ7mQUq5p{e|Kl#U}wnk$`TKKZ^(f!Y~z*# zQ&-||yob2Jsl`)#|D_Gwj@@jZ=<~ij*7p%@jGS9sMAc>xWJK%7mN26V8`SnH_gp)>-;?Zs^a=34Q612mmm8UfBDE` zeEaYsr`#?+arYPbyZ7(p;I=-Ux}p=7c>L=PUp?_M&o5nG#s>ruJev{$qfLpVBYK^I zr35*H>*6L3@-x5qOML0(UF^HYqsXZxX`HE{i=kRvR;z>(8Syu56b@;rNY?_&4P?bkSs3!GlO z$f2W=t;4w)>UuYOriIRjz!EnzF0QO_{Onm4S67KK;zXF}_qk#BZnn?P(y6W-XJL#n z#ux)YRoU3sU}^=_eouP9@a(M>U8AL5!`Co^Y^M{|{@&E7# zto|qe4IjU054TKSp67Z%K-?5}-g_4Ze{_`Yix z&T~A*$N5LU`&-<9!*;e;?z%e9G5!`~t3$fkH@AB<{9dc{CZIxB}`nFlb)|$<~%Ta_)mbAFC&RNEh zagq?{F_#H$L^v0yO%Mr$(4o_DO!Na&lYzB?XKhfiHt4W6=#tnV%Vrv2II=n{J2VM7 zx{>$Y2xLSONW#eA6Zni-0x?7a&I!&NzDihbv5te}uvE%~LJ&j(O7V1?bHX!PKum$Ab+pmbsXb;6aV2s@ zT~Y3%FZDw$6Jtx;6mdb*2(cAX%4i;u%yJs1dj4`e=$k1>b#Xb;cn9HbFvR5o9 zDU|5KZ5Ha*6));pu~u=d5?aNgCq%<18ATIth!fO-IwL!y(2+AeiF$vxvRgfzqSBBuQ>__d70)~|ZDionx z5HeP_=Y(roya;Y+&}1y;(mooPNU<}#$Eu7~8RwPCDI(=4wTFq0BTsoGK?p{e2>29m zvBJfgHS_d3k!~ju>Wq0IS%am3wG}p~Flv&O`%E?@3tj|Iv0dZA3q8KfioKtyrDyS73LRAQ(HkWL;1T+d!!K_?Mr`TAoS3l1nOr%f&G?jXF;SV&=|H#Jr(gG(^nIqJOXWQh454Ix}U z6~utHkv1lJ+31_mcSZ_IO+^q!_OcNKDumiQDuhZMngx=h&O)6X9aVx_i6|C5Nj$0^ zH9;cyR0#iuftGr0Nuc#)0TmAxa0F`eq>*SKBbW&$g82+tAv-Jy<^eS%84?vqASOud zkUAhWSnUuG4i7dG{{o7jI7|Xc1sGx;r{GO-8cUt=ZAl?wK4KCv=eVR}^izTNYf5R1 zE1*e ztZ``M(YWQ2S}S{HQ)H9P1>69F*mqPFYG3co%yZ7?hjR-kfB*=pi6+>(zlw}oH}l++ z`&qyL_XY2+oLv8TNULPk3iS#|4x!S=(1tPg-LVyOb=psv;2bNnQ@GxKdN%FhbE8|i zXV0CSpPb~}^b8ZTb4<=obAJ3Z9~?c#!80d#>-AN>d*3~5?CWRy_FL<&OIN2^nLE#$ zVK;k+YHYg3M4zoR=-cxI50Aaa!4ro$bnyt!?;q!hr#7**ImN=1BR! zJZC!H!55$T9A9|gZXVh;%vfKg)7+e7@y#X!zu@BchJ@(-#UX&9l4d0Kax<`2uaH(! z5Q9jS(QP}p`_Ap`{+NYXX@oRQNDzcL9-8}4rmZ1~#aP?9#t!eHqdU1Vs^AMe(f^7O zLz-GXj)AJWg7*2toSB18g0wU6pzr%V(LKn&g9-IPeUR^lhYP^IkSr zq4MD>{0#s05TipwWJ%IB8tl@g%XL{33@k1#va+&r z$?&*VtI^ljN2AgBY?dL3L9FTYbv26RRN-Uw0wmT+21H_=<~w@6kHq^punK^Qo&1lp zYXBp00U*RdyN|@9IGiUAbPI8?_<9)W3_#o(BuTI~At9c=0vZCobiZ_icfUxAqF{c0 zo(mT)FgrU-Q50lZMt^@lTefUraBz@HH^R`TLb*7{{L%M0y_&NWAZ-pXuzMd{YZ;AO z_o`iqRFHIxje{9|)j4KcvrNoFxdFHdCmU)17KZQt3eVp0D$`4=oVqy9>DOQ5%x`a` zx0%xTh_Y9B|OiI)OD|!1~{><-N9DFYQb(=c~$^@$}ldPPQN+rVj=ckBURRTr#Q_ z=ork$@e4JAL8zuF&3Z!NDzu9VD|v;Lyv}O7&2rvirC4UQTwqa)I=knUvy(1P-iH3t>fHmQO2qKqSiCwkx|3bE7*suIiH0IB zK92^8m!7>S9tcShjkz>nG>Rw^sUlS4oiLUZAp{|K!AbN|jA%4U127N}kj^uH=`+4H zbT0YYB6Oa6jlWfmqh}fqi-2N+g3%ahz!0~31rO?pMwgv?izvn@Dh8}Zp+Rza5w94p z7$;FgkVK(C6bwkr-)KDGc*OBgJ<+qU4|u1nMR*Tg(Lk3jU2bD++moy~R1}v6q~g&e z5K@n?cuH$1gQ3g~rN;J2C>1FbTLiKq){Qmg}P(}lVQY=VW=Ra2uUy?f(KNgR0tYy&g05JnJX?2Xc_&JI~80o(6*4KF=B#I z#DEEcdckAB8Z@y4Ks8`XM@CLkda?p+ThT%(gCZD^Kro$iNdlSmcyUN5NlJq)z>rZ* z6Qr3?O%obTp)Hsu>0)RlyoVVgzF$ zl;CW@S&tLJNr_Kd1U!Ti<4eRB;7deua8)!-!Dgt*2*eqtpmB)~Mh(%bL_iHVLA3;5 zfNvu%1!D+VpsYFC^%AX>SW>j`XiEug2t_nBcL-h~cttBwa1iJS_lR{lML>#x?TAk( zOQa}~(jjLH_+apW5D;>r zNHn(l5+LA6!YZa*#S|@QwQ&fmHm67uR%}9+rPRxkp5W;bPmlFfZAKMC#Y&7kkO~7Fu8|Qo_4ZQzM7_2q^wcWs*jOc-$f4b)Xt!8Y7?G>f{sTz+X*Qc|95BdapuM`v zN}kh#n+p%BA$HP`%XSO51f^&%bN;|<%dxuwE z{t^HAM}NO_WDOoy!||crE%s?yv%pY4SYMzt|=P8R-tbnpWC~c zw~rrX|M^K?d+9yS?5(q>JkRXJX^w6F9KYM_W8;%p;1hiE}vB{2?nO8IW{xHQl4Xt zp`NDf9vWij;2=ZwTPw)=dC_u#sk86$-9Ph6(_ZVZSR;x6dO`6RnA4kA-TT$<&+S|)uqMY+3tF2X5^8!AWT)IM*G}$tg zQaR`FrKikYXRz-hYPWWa(v{?-h%_1OuTZakJck24%^rGtdk`(ra+$)H6dzTsYDw@d z^0wlGP;1ob8|?ek?xX1$px)chpg>DwUF%vo?U^N9h=ahN&XIj~an7-@u)z5EIOorw zM^#DFl)k<`wrttLh7B7?ZX&8ims>$#WpBnuh{~S~h z+053_1_RA=oX@8@J?<$6%*|Qu)kdjpe};eZrTaNNx5D`o=a_u!hrE2^^9(hr?COmk zGOtOwz=gwyc>RSpIQz(N(Z&$P#BCNATR7)ID%hk!Zw>5qn(11u;%1I;ZoxBOSV-ce zUr^`o0N`5`^JjVE`BykLx|QuauJc}BvefyQXzcqgE`#$x zRwJqP(&T!R5sF({V)6ZB%oRE9&cz8Dh2I2^hCnbDBXKJ6hU9ZBFD`IlDZ}^gV4&GU z|1a((>C&Z3m+NAyeGENZy=7EeU)1eg2<~3ogS)%CdvSLur4)B}DemrC+^x6Tym|ups4T}6JaC{2huT6$SV-Zq1d6Qq11!HFg8~Vau{5?TFgAU zMcIG;b=)S|iU|}!377{d{tjA(81S+=1Vd2_)#qN4(-oZ#0~DI0qtDF{p-@SK6HXDy z=g8+MD-dLu?2BFUTjs_@`C>d+4m-ZeW3xp86V>=8?G45_BunipUvL&s*y-)E61Br> z%qz5DEuaUE_Rmv_a@Crdy(VWN5zQ-s0~F%)3xy07ot`o*NS}7i_XZ1~nqip9aVpYa zF0Uw}(TYSQ5O{_|F?!Cu6RhK-?@Dy!Ybdzs&0#N5H>JvlRbY&0Q4q>*m|UL*KK~Qy zC&5`{lG5JeB+Yk@i$GFmBX=>lipG}~*wV&vD^Q`Z*A)X$BEi+z`Ytl5(j~U|(I(hvKb~Fwulle_YNaw@LDVTc0Yi)v{X3&a z&FGD8D~y}5V*$)K;{f@cxpgQwi^ZaUwm61%V&Ie!bsADx@mVZpVL9?U45Scc4FAn>PUnrhDtarZO8K3{WvvfsDT%;Z$J@N1SeVB)ZEU|-RRK9+=2xZdDwhlgQKe0 z@OCb+Vt6ey(M!u*{wuPqzGp1bB0&u{UD*8SeZ=SCZ2X{s^1Ns_Pn6w({peJ&G`dtS z!e&JinL>I$B;n0DA|d5V-5+if>)UI1=>`*&!%w&pbl|YTRJ0`)cA998+MvHu<;r%l zTm?L8DR~----+nrMpt+?aVqerdxxH(PZdL<=dtp2z)`!k7_o7fRBA#{)uig!+`bIk zQM(E@iN7hs#J|Uoha$C;h7tQ#;sa7hSCKjjN(+V@@SwMG>CJ^1NxS{&tx-NoT;0y&Z2Qj1BaEuxt-+7>ZHUZ{P!bz$;efy zyGJ$hl9H&>Bz!EC#i3To-$+p=!+6Xk3%_B(RuKxZOGh6NTP;F%C2*&rx5Htn!S(PFB~ zkwfNr&{(qg!{I7mBBiV&nByb`;uPQxrz6P>$TmbCB6U!fnqmbF6-0?+ zsZ~d!=Dv`9JkY09mMU>@X=~w}YGT>QPl)N`nG*NTkG$|GWRp7;M51HN?H;9vIVAk? z=s1XFWd`3r@_oj`yQ(T78R*AeYh~B1*L)hHHDiAB>sj|%vSx^w8JM6vABJTSSk7Bt z@7|1C8C&kBL;qTRof}h~($vJn#LPaa91UlvQiwQE@>ld&vkw|*l645NYY0szCEX5^ zcxIC`zBFGLO{LlScOVh(Y*HdG_1D5LTL4P{G zB`$de8c`{`KbRPOtV^Vo*|p&5QcDvY)HN8lqSU8?euq0{FLQA97qD;`LD0+*_O{=n zc&Hh9{`PAZma5NiN49eeUo!OF9}~ajGP>QwI@;{@)A^ir8UCoAs%zH$8qnNG(H?AW z7Y=)An@!Qm%L^Wh4}De^byv`9=J)o@$$Y{k7E6Ng!?CEhjmL1%oaXd*+# z+4}PY-EXF_eJY~YTd{vvDKLWgeV@5pV`BEL2;dcKtVR!~W6{tw-cY*Rj5>cj-6Bw^ z;lM&%PMYfl>3tJ*vOEn(+Lr%A%JcP%qPls2HMxG~n>E(; zv|*F(J@RKg|F`;fjL_lcxymL5BvjAuIg@%u%9T1{lkbK&bl^HjI#HHb2U4M7`c2C> z7$IQpyVx;;h=1Q#R+_-he{{#*#le_RBc8X6eG)!)!C$uC(TTnHK#z0PT?9T?Ebj-s zLZ3ADCo$bdw+!^IkF7jFpOow@NAsGN`c~x%5`%rgZWfm zcN$lk`1nCS0WVP1t*CL!X>>7^Y0^(fb&gG0JQXh3yQfbCX-C%h=}2ayJt%jmS21V) z9uV0wDLdzJwDk0B{D_O2w7=J~_c8YqRdeOh>b?3y91~mG&!}_V-*Cg;;FPzv*5u(0 zNijg$Dwq6PgXhbUN9H8|iS{^5m~BQ4SP%oXY!7a>U1v^ZOGP~{d-w7#=^J@jur0IS z0l4M^PdR;QMr<}-?$e&SGs2>UHaph&Aa>{w!a{VRMuQfEHiKR&xdbWn`L>;x>LfqS z;YrCK#7(40mS0$0-kRv_yMbTCxreb(8{=xC;Dscj3Z+Uu5!;Fh9uG0lpi*yQ){enq z0waqcrILxPM_xiD0Fs@+xp>UK=Do{A(W57yewTrkrN$=&j#*YB@dO_1;ZtT5n1@oUQCFgz zo1G-k?@+TtzRN0wB*k#XRwk9$;b3twwR8gI0Vl4}ItnYTILwlHh?DaD3W%h^$S{`( zRiQIx5e4`bn8~KG!=`&3b~s)Iu!PrigjZ-sfK7NUSmz`<1Vl72TDK$zP##r=g9BSG zb{q04#QB!{68(v0Q>Ns`5n#LiwyGAvfWc)QhZu-IJiD?fPjrsP zy-J$)=`d-4u&9FD6cXo`uS207iyn!NPrF@=nWvAiy9c0jiE?&0YJdq|aw~$0yZMDw z*#4sovN;@oU;EkYi-SrXy-Uw9XDV2N_64<_ayV5jO+t^MRjpY~iB!%tVVnd%#*$PX zki<}6t0qM1OuQ*DfgugxOL+WLN?B>jh^SpjE}yG_HK>`j%{E{W9I{Q1DWpb@gK=8Y z7+Mt9YA|L`ggKB(VAU1e%+U;8WY(5o(GuLsyF0POV0hSLFiJ&j)mZZ`_u5P_xsjSPZl8flg&$w)m%AN+9$`!DfQT z#EP$j$Ha(?$f!tQlZb9a0s&Jp8DRn^q8_Z1zLKF zCy`o1B%_?K_@QD=TIiHI4HV?~ilxOQ4lOQhjOvNKCMj+qC?~ciMr4Uwf?K_##+}kJ zoQj9dl!PCPtOlcmwu@?tAZ}igpM=j$uC6p+>6)C;X9s)MILLxqG6@S4p^Kg=+Z3n_ z&{r=h%OFi81w={B*<;0B_uxukqHc$jdDfaC{DtxU6Kb6`OI->ox1^Ak8?a>eb_W*f zqiUTh)M8fL+v0?m;ri+i*k~Ja+i)}L{Q|}q7u3SIw0|1#K{820VZdqTh2KSl zuz-<&rM8L`+_n^<8VIFDJmgkl2GOl3^U#`7W)ThHrl{u1jyJTV$$UyDY>RsgNT7t9TbcyT&!valyzbA5{}4@$R$*Sv;C~tOSuup-%t4jgr_0xhqAPEaFyj^ zV{Ko)psWmj&C>W;ZRT(VdhD;g0owM5mw@Trd1_ANy>3p}2j^`nlsgN^aoOtC6Ns4I zngHG|erR<1Aj-)X8F<|1i@+l@uNg1h2}UorrQ>_w`$+ew_Mr3-nsNTxAuvrV{|PDB zWOrx|8O1SU*omy;FoxVbag|PTu*>sn`M1^n)siPsVnuW2IKm7dgnkdEofxO0fKfOy zZ!c;wKHC@k^}XWJgSeEBFOSWR(wJ}WZcRIj7mPX$lgCSe@iSB3 z9IX$NOifGXw&M#a?$OXSRsRtE#cV^r(R9t>cM0dOZ=dUlbz1%{EPN<1+WT8Wk1gM% zgQwYAPxyt9wE^4U)an65a3hF82Tq6QbM7BHMr2W%Y1B9^ZrI}|T@f0qb315q^bQql zBRP!Pp?W=7hmi#-xq@e$*uJ4MX1g_)Cvdtv()zYtI|#M-WynT6SE6oCra#=fIJ$-H z-9RjQ1;jgSu(j&6*P!XPUH&Zy#4uJm+B~$@`o7l*hzc+7z8=RYsWR_g;P+Y!_l}%s z#9))^SP8^|VxmG1NM#ke#^~cAq=oq^tChOaQX!zIm@;Dq>0pp5SRk{^y+1O_%dcFlV(+tI)S=2l8<1H zzt$Eqr+kTt#{BKI2YWx^GAt;YDN+fS^Omdc-DJk}6!-m~s>$Jy$vfkFV!*&(Xa7`Z ze=|VXr)mlhsw=Dm+eIeNyK?I-@$?o>gy))6?u=4O?+BbW1xo@G3%3TM# z5c|c#!BH;<=p1-nufV3!Evt{?xU=Khqy>8|4n+p2CHuMWPC7F^pr|)z7ojiuNas!E zd<`Gmn|`k@Jb$ZhdF-{S?2bf`kH;j}A5#1Ko=s3bp0{2TsYD;K-?dzmL?4fQZ)S|) z9KOVJ*5~GO|%`?wG;1Cg4&&LY3&2i&;5ztGAo z3+w7VWaBlFb&qAgZ0)gn2jp)(-NCIqJlquj`IGYmp3HffSh6z~8d$&Oig^$kmMfJ3 zbqX8By#BU390jqPOfNsr=EkD%JOT0Z{_BCVVw#_I5)`uV%7}%ku7mU;m0Xx5!i>+l zeH2-LPglcdTHX8@<3{>;x3;3a>bKlpt!#Sg4i@AaTD#h@wQw=@Z0JZ(3&UpIcjer3 z(3WQI?~a--ZY0Z#*i{i9F;Ms8ykqI`F&Oc2H8Ab%6jV{Y3F(F77?Mg-u)>;PXLW!M zC7uB0`l&WzMiw=OIYnm-1$2rONCq}w3(`{4Y^Yj+pDM8?vSdrJZIs)`zHB;G8Wctg9Eh zOw=5#Sj~tnz*rCy4n`j4I3u9~N=h9&AN1&t4DKEE5b#v151qlHLY3NEh3ttxI*b}h zsm2w%zbGxT|4qkgLCc5nMVvvl`buYj=M-Ycn8esh^2;lbT1>mJNKPA(CoM>vWQNaB zgIu3-(=2xsM*5m=orl^|UO^(vw5nkf(h)Dn!-%nlZs2nUP{45U#ZLbGj;mRN^%tXi zG}RPI{2IpKE>g1u?-VW>5=$4to5m666yA)mJfGY`ypT5dbDMg=;n04RbmQ6 zaFP3}q(ki|#EGmCRjr@yVmd;d#EDkuP|a!ET#`UQHica@7+7|dF>jUwKuIB0NT4B= z_Z2E&5(COGU|Fzr4A!JkPr{B%pz$?hAz-mZ?K<_Bm@-7y2G%0dq|(6|N)!9t`ATW`DEg|MA01ZsdxZcZOEN3wm(EMcGOpP_vm3AtoTHQAop=QsA>6Y2w( zs%Ht0bHmdw9ECm==Va*buXETcyKI`Ui(z1%P<1W4ZvA;(+!%fO1F^v3_e$!6*kF9l z!!M+qJr*>IGN551-5U;ZW!l_nf3ddyR@dCnN0X=P6@U-?3RcSJ8;WA?;(J;~@X)Gr z^`#jo(Y9ZlKf5fipLhgskQdsJ#3*DHL3)jqKcQC61Zz_xAsu3t;sF4U0KCLDtd zD_l%q=e>vB(j&MGaXtlI*j=5x^P~0NYvQ#Ek3g(Q#{tTUm$g1QBC!Ix!qYooJ2Y0y zl5C!aCcEP|lr`L{k)yZWGp4;if2LTOcO=ObHQ}DSW?0#aYL{S z#e2C1f1KDEHPz`>7k@=_Voe6cxw4?6EEHVo<7VPPD9}TA8-A{fO6TQi&c18d3!-zI zZ*SnGRu|C5i|550y5|$*d)F&rk#^UVsB@2BV1c73ll|Qxeq--9u1QsXIT)o}1q((K z&wC2vOb6HZU1=2(&ul6X+I$5qSG`v(aQ}#pziFsL8W#4mz}<1Gl@cGcQCJ zhQW@MmuF_>m(w9hg<7kkL)vr8Yjg|N)7re;E-B{| zLGZpVda&}M`rY3>DYMD>rdV<^_M-BSD|5nzK1Q_srvD8>`ON3ruRZDI0jJgL!mEl~ zcPFQ(rttR-M|mJ{9H?(7>NCjYcH1tVH1A-NDrD*SD?A=3|KQf(@|%$AvDZF7sqZ5N zDM7=RXt)$UPj5G2f5u+`$8}dx)qoqtb-!0HG^M)6W`UXCnfcf0qEC1Rz0Ju3$}Q-3 zcP)mLQ#}L70S_L&AM1FYug5c2x1uWl2S&O%0&)H?jAUX$_J3id37d+I_Z8V79-FO4 z5$C>pIMFV(?S(&i{4knT>`PL!s+KsOJBQ}D`XF7oh?Vz#*Vkw*Iig~SnT`kl+hP8V z4e}`e&AZ6`R|2zNX6(aQlTFD=O4Dp}n?uVX=r?0W4;=cCO%o?+giSKx6|L{BOO41k z>P!60w-e`g)By9mgGWh^@1rBeS?-xD<9628pED;p!%05~6NL5I3ShrJ`@A0butK`e zCwo&f0&l4_M7{6Y?x=V38Q#eN=bnN5z8`6n`P)~gviz-vg;{!6HlCO8TRWmT0U=-D zUn)0Vl>PUjNX};Wa{S*eA7ebyGV=w88vyY9+B)(KrfCV+1BwA7eNH5%nrRJB$XkbC zc#kGlqN6dBH+EJ-TbK9Wh$9KarVO=W&;qV0d#vxGua{4%$F(SpFO@c4kCmg3>taXW zb>IkApAb|7Cg;sVKO1}HQ<3>5T)ysb9bI&Vj+k zbmw=q-j9-F*1;yTUcX(En0~+?WfeJObJz{J!x%_}%m!Y$QRQI;|6B| zZI@)~Bw@|W?V#thq<3*?)nYU|qZo^OsDqUjlX5V>{T+cqwx>vi-RhRX8H|9Z7_AUp zC>0Bc+l3mU7?DLUi5jSY0ut81N=uO2enKUYe4FuNpbH8#WrKkh@x&1ihC-=;Y6+xx zo{XD=6H`;_7IBc-`mV=?fjU>5%z(Kln3`~^7CQp6z1rljm}#_jZ*za!yA;?`YOdAz znL&=PC2#~wJ}0;7BdtaMy>tPsrp6PGQ#FfkC?g&-Z9rc-#8f#r=_hq`CDP(DA2tp2 zqA8Z*;tU;91S15wDq%7=rVPh-6K@coD+q;=V1RK>bPIsFBHVSG#R{hI3~_~X<_xZu zNTQ+`&5D%@Qwn2%3pj=zv6&m4#H?4M-33Uqby)t4eSUMYS*CS!)xd(L7iat z7cjt}!kk)=Toiwrrf#_4B`m<8V>&^lhLeZMhINc29nv;eF4QlI8jYZnX@;x8t$~`y zNZ{5~Vvw_D8z7gz^<`ZQZpn@0AeRsPTTDMxAxg1`YFiT!xQ)6=U}y%0%hAqx#AhFc zQ8;y3+^ZP`qS-L1gf(JBlke-U7AotbQ(NIISd|TxyJ=x*B+s)EXmR)&ODaZm>b6B; z>gWj9;CULA(upr%_y)6LD<~sWRUm94iRgBh@KtIwMu%(9f5%01f&sb_WGc$%4)06Q znQt)QDM??s$<-ufi7_ZJm@2S|vq4G0V#BbaE!j1B(OWDiK@YVOKn=BFwgvOW;3zoa zBMsDGMQaD^Nhc>!bE+Zgk|iqi(~#~V#im83%!!ey{(_akLuxMSZDuzdp6EafkzylC`g!-hb&aOyU7S=&G@L~S59eP#7ePdUlxp9~_E zf9e+Ly$NyZf>d?PTH@%5HTcFzC}67W3ev>1Q=&&&6{qPI{%TZP{{6Bc&Ji9EJ(emJ9~M;)DQu zBh;{dRnSDz;I}L%n)(LcA=2o^w&6R^7U)7%2WbgW@S&htK+E(;#_djG(J2{z4_YsX zDnP7&Ave`(*i5cSf*DA|Q|tZ_I)e9?L4zR)UWqi(T`V~%5vN^2evN;1Q{6j)JuTby z=y_#l#Ny0Ig#G)`e}->CQHV5<8T6iVb7rbII1e=RrjC^oZ)8}&C+Z$8_y_PEtMCZ; zApH?EI_kAYTmLKIvgxloN@JmS=}uE8&(ki<=oX}|g4`5)guwxtcqN{Ef{Rw8;7tFM z{Y)d{Au@6qaPFt_wjucH_=>>V+nitT(2x?pqE z3;YsL47@p~@1!0o)~}8S8#76|Wl>K-jKFuKt2b>|xhpBIzR%PG-D_`t^wsEO@lkwJ zv@!Ry%DL4biQg^tbs>|HPf*wsJsRs>(^C+;d+fc@@w5WK>NvxI>07Rvl+~v7itOGr zI%sb3*y*;(!$367^@eF0Vvxu!n=ODVCK2;g%2Q6mY~f+U<#1ZR%*__!xX~FxOSU<% z?E~NLdCL`&O=k9OOC@|-IrQ`$M*WM13i0mPUt-#3aW_k3gQ|NA&+olGYT61%8IZ}g z*S|PT$jC6-SPsXR=T?8N*dWsBm(k!ji;;-bvGC0~k8F84KRf53`G1Jl&$4tFfaG2F zt%O&0qINZxw@)Q>^*JnLKCFFNC%pO;$@{LtWaoF(%~TuGlo){X*seu~J~~GNpg1|( zG^ZiIG=Ua2TXs&ue5uZC+lHKjOt%zc`6*E&Img>8Xi3%3(i242VjXri5uIah6@9E3 z074gSA0z0?^tlWO4gi;SE%V#fab#VZt!=(M{^m>;ay|1h7QUl#CRrY16pVOUT@6r?t=QV%t-35QI&okR-{KfWg zvH`&=r_>%~T&`I!MRvS@b#|I4B+nf_8tXW=FP9}>;Q<>&vOMB}~WzjPm+UG7fb zFnQ+b1Mgz4vgYoeb%`Eyv5Jn*uwGc!WU4is&Tu#DX7NpoOJ0KYd(#7SF0CX{L;aH) z>cI>3y(3}AM^C2^tnW*}9UMBO8~N;U|EH^WFs|INV{B?J|LS$phTkOKG4_Bg3+KK`RTKWnlYf4`??fgC$D;I-5?-sWKVE~Yq{X39s+Rqgfa#@g70R8 z?1tAFqK5DJKqAe_N_A|YN7)e9_%O&>Vsx|vGKNy6kX3ejcgN1dGcq=YVD}%eFE;+V zLgjn)tk~^2A)aLpq^LdL53+GLR>6Z70JFf;Dkyw0vn+*3sxMydlgzy~l*`KK4Hl;f z%elQ1$Pt@6`>&d3ZXu5E_>$RWHnd%2MAnltT^|AWn?m}@drUX4J(xc;x4u94R9|Sg zIc#Z{&aqoy(r+-n~oJvr^n=OaGwK4&)p(1Fw~A6A+{G`shH^BpfO4r#;@A zXAd05jE#x11mAF0zb8b}F2?fiZ}FU6JdF*y`%?{?Qw?!Zei2@$YtEL*H%g<(?NyV( zzG0l`enEY?Yr{e5TLVN}{+vxz@vl)3ef{eE^w%aY6vfde?VQ)yq89g6=;)#^ru@cC z|Kgx~w~D}emq11I(M#lQMOvqOpo9Lt?|bxHkEVp~ZlzCOzb5zD@-0K_#p=k#2(jwc zhuwu&&EZ(m@VE|IpZ%#l{g7v#?4?&$7JFLxJ)0ZzEH>(=j1l~hHIre3k7FM9%QuJj z!@>OR!3V!!)m)$GQEQq^0_XWwU<_@7Yu>bD=ZVhd?^Z z$2ol!)1XsVP<2~95eZ0SVy-EVHl$RTJE7y4gn0GS7mmHlRU&-;N3`At;pC~WFZg8d z`@-ul%Qo#Vu0{ejZ4Dk((wsA{w`2S#$EVA#e`B~|2KcKNB^g`E3>FWs}D-hI_rN{KKK6(0wJ(AuMSk(#|oBRb1M8%R8(|U%e zX;cR@kU$k9QDB@zchB|Xp(+`hMhCC!!ZpQ$-IKk4AgJ4NbL1IM@pbL9DXN7-vVlkB zZlkm0o^9lLjsZH~Qvx%jP_X3}&oC1RldRqmcEyT@rGn^$8v>zES7ERyjA2sqe91F| zWQ(YG#JD1n(XW9x%{PPNfqCUj)L4uI3=$IOf?=)RvMPHstu~n7ETpAOh{Qv zp36K~t6zQq=}kVxRn`VeXv^vd5!t0qBD28E~6?H`HgdpRIAJH-arSADO)X!76dd#1?$3Q3G4o-UB6Gb9L3|w zp^F%dTxqs+bF)EeftJg*<9{_OBpYga&(-JO7FJ&rvFBqL0J?fj{}Q0Mp@W)%n*?<7 zOo@+`)v4=Lo2!RF@mey0nk|B4eto;|@7&Cvesl!TF}VNB!aHN5*qzeppYaIaV-tkuvL zsdosI#HS)#sH!j=q-#~vE{FF@Mmh-@^QIVC2$eFa+z5|=@L9N6ltmj{Zbb8*)ksL0 zL8cU|IG+)1aS+g=`0>BlyR-(mqSiQ>UK88#*@`>L*e(n)6}ZKDCQwihvgVqZa;K|1v{Zoh|PY=T}*Zaid2M#R*CV_H%wlR zY{4yQHDl~vP^klI#S~b1@f<6W;HoEBf+;00b3gW|s%>UXHFV zc}0@mq-XW;bW>AooBsZm?ZBr`a0Vz12a9Ce8;YjsB2%m%=a*N=WZ5)*HyHI>B0uCK zA}7oFo~~@3!qA1r!B4pQPFGgxM-Jq6#S}eA9Ifhmx6&?&me>@>Rn#tW^9PM21?vdZoAw%||%iq25 zxs-r!a~(wA8p&(*fgA99+7A)BPwje~y@tf8g`cng@_C$xX|S|&$%D`1lP$jueHL=P zDfjw?9FfiTO+M%o!5$G!$NK?%X6Fmbh2NPxssFx6B=AKx@edjb*MhBNEeN0X00-+NnP-ZYyWTfgR_ zho$5BA}}$r-fSnUHm;vzSqu>ligiu59@~w`$~*@LL3Rc_!+(fy7m(672GyrproHXI zI6gEq#u}t>I_x4nK}IiaoIr@T<+zX6KQO@ed7qnjxi>x$`%5Ha!qot12?vn8?lp<^ z@Vc`{EciXk)it}FO%1%S`Rh2)!tocczisFRy$TD9i!TsE_i$+Z#!z=tuHcRsxdpS; zk?`Q;E$9RM;g=M5=?NVC0&27qCfZ=-@VI4s1=qhrY%LxU26_9^JbK@lyS-L_<&Cx@ z33^AqEwwy)iGo0{qWK5T5FhAn0<)zLU%{@dSwo*K0<$Ls55Kcy-;)wx*Ug3@?_B4< zpZ$1j;P0EhN5oj~u^+dxCfmFLHt5Mu4jG+h_ZlGBJJ=br52TOF7tvwA@9g@7XuS{UA2q+Xu~ ze5Za7m8&r$vO_??v-sviSTfUNaNNt+{dnH)xPZ5wrC%#6pAWh#s(wr+MI;cLeKiYe ztV<@`)NfT*<>VJpp$gdSC3uGH@y?P%_?4$wO*WwU;~xkVkSK4jndN1C006Kh5gbGs zEidD-K9KNUjm`A7X0rE||4bL~s2FmV`9#or)6ZuBzqH}}K!@&s!s)wDh0YGX_!w$s z4v7UEr%utqZ#_}@CbazsI6!|o_;_`FJx&V%&5qrzb9MM*ov}Ua3;UjY{y=}qCr@(ZXv_OCB>Ez1CXf2OftP4>w-|z(=o}Zx&~-y#a#>y>~KSHy)eCt+2Mg8#-N1vHorPwpE`>&%p8x19;5O@4g);N`reN6kLp}iUC%)8Bz^6-ETke} zBi}8e`kkywsAku2O?3Zgkl#PR>vjD*V)^|$pL4*u$5n`jlJN=i&KgYB1SAtiHi7iu zZ>469cM35HiPg>F^wiXpp}PzjD+&z$?(CX>QpnaD=xKbiI3Jl3;Ww{;JNK+me;$cJ zhuNg;6J$;I^2#@d2Xfh(;>X^*x8dwmOh(T|e=xUw>>*Ey`sI}8@%^6DRD2x4sEy0c zT<;EhO9oi-{It$P{W{3KetS5$ep~hdTdQ)j3{Z7>9U{H~j`edJ-ksz+<`SAUo_o9P z+Y&#jY&JiTfx`oMM&~qV2}vz~)F{*RctrYa{uW(*GwQw=Wnn&z)@?zVsP5S7Cs}`I zWmWwKTZ0wap0@OsPqy!NoW1WVnO7P&+&_zk)(ehkKbi5k*_`1g_1R{R&ldm{eLkCo zE3-!b<>#FU7GFDMJxxsL*B1T#agQuI_3^CDN-3Uq)06Ruz~aStl<1*1ZK=;D)Bb`k zLBuWVkImun-!$`AJmdR#5~YM@0%IR%!I=xefXMT<0J6(W(eXa>b@m2KeH-&F;k*3`r=^CWiK3MQ{(lQ^x%hNNQEn=IdoXE1|rlLADj#$))0Uq zq-1!hW4bGqQ%EIUnBN%eNUY+>^|XUhQ-6pKFlrP0OY>J|+g2<+v2I=qx1KbQrJ96I zmmNcLEEb>pI$yJ@wShY^gS2psbT9#Va}>(5#js;Z4824Sn-1+#Q*lO>WXf|8J4jc? zVTG7WH(-f5aAk211cnFWBWZCcO^us}R}Gl@v?R(IbDPTV`=C_Z4neO9@+95kT@?gL zZK9XBr{R3oZ2jC7LF4t?K4H$-C`RGLpvOB6VZ1oBtgbTnRtQ6mD+52-)2hV~z_7Z8 z5~LMqPhNF=Opt<4K4e+S@6^L{fBgL4wk2X5i@>ZZoKiD3io&IO*_FdPi|eQ)QBD3+ z01zCsO2IQIr^Jlt zC*6S0gelo9KUOA#iL4Hd3EM&ks~e*o(F8lg^Q*Prl7Ne&*=klt%D@ea%Sow@;9{UA(zf z-U!DK8kcX0o-Co;kTg)L(2BDYRhEFWyQzH!X&P6trRXmWhNS%0)NmQX!ZOI{q&>a8 zc1SsFy3VGx5?|TR%yRn0qnnv|?ir?_3T`V)2 zpQ*58xbYg&Y)KeM<4n1>n`P+cwi}=vF&xj?WD>Op`cKLNkBep`)r;ehEFg}D%d0!V zgW71$fKucPly6Wi(TmYKm<;HtvMnc3a6*$JjTk9kwPH`&a&9!Y@5dO^TJNLY`$_NC-jb?syz1HN zIO-ZN$3KNb6_SIYl_ca?@Pvmyt7^c08>FiY0{`2EO(T1?@IU@6I`bmi+T?e1*Zm-S zxo=8k0rLr-V}{$=^RsBH71`2;A1hhs>Bp`p>Bc}jv%yN6U5avukqVjMIGA5`VSS9` z<1xbXX;azJj);7_TDtc!$Ycl9>l@1|()0WJx1GvTnohO#``FQAe}I3mDlc>Lm!7<( zbzfo8Uu~1+0Ux}NO|l@DtrclJ$Yh1Wj9B$xCA{cw=eO4_@MqM)!|cBF%l0S23u1*0 zf6FBYufeq1^@ejZ|D%t0j$(t>Cc8cSWYnPVLD;gSyK~qvIow@dm#SMM+4rDdJ)d-C z?&LQ=7z3tgNkwLgV`7-{YC8&G0Xy`0?WZ13+6Ahz7)t_Fh5xGY}uNI>lcPY39TfJGS4bprN+lp9?#R&|b^S(4v9U0DvhM z5}($y!z>wZ9lsq6aPR)u+#VB?I{_hy*;<}nR;Eu-{oZLvRUJPk@90m>@_#8ncAB<~ z!H|%%<>siJprE|(4UkiCJ8^k>n&woS1h&m{OnWb z$N6zE%HjW!{NTFvjLXH8S0119vim<7G^buPVK6X_OlXp9 zUxcz_am6>hJzdlaNpePu2-J#_xO`J^ee@_M|OW_Us|90^)a8V z7yWzdH1U2Ms{N+=o&2{ysD^K)K2K+o`P)Yy)52%#O)FKvnd){PlUuFLBGUnSfpa z5^tE1ZD8H8X1TVr^zF|b?@hGiONYxSO4E#*knBc4Q$f`fGt;-*Tf0NN&y_u+k+b$Z zD^&xqw?M93?W|b3#+Aq%?U`Njc3`3X#Mr%B>H;|4V6vs+e&xl^sf)p z%bk43)zKOBH|_`Dt8~vTn>LOvA9VP(_#|fHy+;dl@D+AB|Ed>3{m1d;e%cp95ULN{ zmp)Z!#+=>{PTvdOfG|;k#;KpQ@!w8D%Z&oPUf*6GP_v!4ZQ3N%lREJ#N!p9t@&SH~fGYyRh^Jip(@5d?1OwwThbYc{F=$jNi5O z{LFuyS!t<$_ZDm9c{&t)%nM*2-&_-&wh;ldi@b-y^Ic!TKQ);=Q|G^3hM?@@nGfwN zXYFyGy$yU!_?X;%|0>X&3%Gl7K&z)n zwk0*jkoh_s9lIj=!vEt`ghnJl)MLvvbnU_MaAeLVIw}uc3Z@YXj@QxRXd5T{0roGS zl1}1hR1Swl1WgbuCX@mBFy!OdMl*-FguvW?+wfRH`X}0c0Ms{5Bt$S>bd=o;E?A2`@e;H zO9ypd<-EiHkvRWzsf!$XQPwp||JS+o1&zGncqzh&V%mNr{h&`IbvDLW*%dMG^f!){ z7>l6#vC&dzqqO&u5BuCO#~09&TVMTE)0e*t$|QmK2vtM)Y!Jyxh?@70(RZbk3HhW` zM%!g%ehm40iu+-rdyGsPr_iiRo}XRTb?hF(YXK2pn+iF3iZ7r!uN}2LtGZHW5eRH)rztNu?(#i`@2K zQK6XRXJ2^gK^UN>_&=`*Jc>jZKq<|~X0-qs6GY7Z!G^Ao?^{Uf3|I9*tjsgFuln2F z!^4&Z1E_X>@$Ly<=!6sJdV-!o$XK1j7Z1L#VU~^MtYp0#f>vMvC9ZzajzaqR z*G3Yjrc8tYpsT!O`kV*%-`aj=mzdmx_bCe66texr=bT7KZKov$q*evY1aU^<){F>8^v*AS{P0=O(e#{#<5`}R?!m9BE?DqWtHO<;^o4d3Ut_Y z8gvk8?#q5C;)HZEMZqZrwZJao$Xyh$4W(fj{9KL}z78NPQ6>?a=Zhx{CbZ|SDASYj zNsd>D%F6op$pO=WZHCe<^k0;fa*vi~9gJ@(w!viR0Py!bbX|wip9Pc{vt>Lk`~o|e zags#@X{zG6i7`SRIRci2|W%{cAzpkV3Pm*DeK<6 zppMd08d#)Ks0L?1Fvm4+o?X_e?zGFHs$NqNhpS|fX~?RUa%9V7{R6L1Q|tdD>Mf(% z?82pCB)BHH7J|D=vEuGtthiI$p*RG0in|1NiWhfxEACRPMceY_Ip;m=+yAq2ua!I3 zo;@>rCcG@WTT9D>3I|vnlPC#0Xq6~f<%G*hB-;Q`<^f;=gOxKiMTD$+*Rr`A$@zPb zX`Ga?grnRLYMout804dBgXF1S_fmM_6SE|;E@p=-bQ(1>pNLLII~5l$mc|$0E*Q5# zI}eN{R&pVfG#FKi6RhPj0CVkz4*Es$@JB~>Uq!2slRCZlgA#>ODnhJLk|C)GWSH!z zZ4=O%{`V5x5v=8{9f|5FP*Kfg>Do&FeB=~cswU8a-*QkaTkQ*Gp)*w#uAtW*LSnw~ z==iV<^YwU*)#Mne@aSk3B`MhZ=RVI53}iWfN1iSmK4J=p~{9cP|k38fRV29tJ4#;H2L@&^#- zwt1)9=8@@}9k;#^tnz-%Iu-MO*{j8Kq?{TvEOG2O6ZY{xFrY6V5#;2Y4MaSF#>sR} zPI<$FKU?AnM;efmuTK>@cs2Jp`RAjO!~Oz!uN}R7!z-Cf|9TKK;=^sJa;#2?iw%*vlFCo1NZRQzPht#PSAt$6~qo;}hI{E=(S- zGR>w+Gko@_)@sHhe|)>Cdcj4B#Bo_%w!l0-JDN~Js*1r0shBM|Ib3fx(l??VlFzq_ z!@}a~F+(f~3lr?J_iTn?OZ|w%wur5aCFA~0!a=ePa^iQUJNaymB;M`)^w&QAGvzeyx!z$d z6>ZH9uR52MPMrFQocz-ryKn#J1?Wg6!y#go@yQ~NLq$fVE)2F)l+NJ?WPYW*1exu%8l|E&873Tb%%rrEau2_H#o0aNkLgz+wPBqGmyDrjPc!j5=UZy0 zyb8c6N7^}Ap7fI0wvJ}z`-Z-%vQgL4V_`j!I$EUFc15jm2~JOC>Ycz{zu)M??3 z5@;?n<`n75anhqOeT4XzkG`|x=IzP(i#Qk;AII~0bRg1$x#99;kHm+y!Q;O!;F(!H z;_Y&!vfAu#9UguBa=SiHoNsx2{?+}!V>sWze|FW*Re~3Je3T?>ZNg{IZ}uj1d;`-- zbjmZHaF9nh^X~*ub(syBRPk5-1}10s5vPD-o~ijOe4S7L0gCm8rL}1PBB!7~fq$Ko z*5|wM8-s0AjucH9zn@AB-VGMz9bPM|Sms>Q@s47geREIv&B71vH!HASB)|NQ2rJ!* z7-sz@+1UPhESL~K^MJVOK3#{#k4>=S#W7Mo9vKz2-eQMm zVPS#KAu($j*gc1f+(vGG(kkHoT0ZN(YW!(`o~+I7XWy5Rw_YVV*Y*J}^_J%-l1Kql zQeG5N-U2mQb8$D)_(>X*0o<^)&=vLDxW|+oUgFqs!+qx|;a^DJpdr+bLp zhi8&}!5vGV6KWIhS3+5I|65Vl|DPX>MW}X6_2fWdrzxx%J{?9pqo^B|akzI;*jKt? z-K#PJR1~&$;i>CDLSsT@%^{H6!p0N3!jV-1sCjL_Rf5PQ;~jzp zW=Al72KXCe@=?{{lw?l7b&l@5btSqy6uNv6sDc?n96=kdk^_VYV)8K)0#W!amJfo7 z6E&r@T?(0rs%gvFMzmFa$*XZOgLr|d=IAsC2-dA2@OV=Ipcq{3TWRk9TuGebqrYJ7 z2rj6NLG`=VmBut*FwtAA?#Ca20^$ztjfl)&RTK$2}SHP|%F z4Z4VNOYGv>g3&>2q>Jr!$Z^*CIc9W&*_h+3hX}sX&8Yl^{111)C1UeL@^|sHP2F6g zi3p_^-+(Y4DIN%NHNk}T+kwpXStN}$^+Gm0D!V9mDxrEpYezAr=^~cAffB#iXs-iN zxI$Y)9rUH$J6YaO#)s-|7oKICQYa>@l7A9r9%porA_(V3r$a8`<04$sR}T->{XttV zyw)il5WPZYD;_uaUQU;|iLhP795pjPNxPe&wg8ivYdIEJjNT|vVTTI|GjGu?8>y=XGIa^4+-e{lRTC zMHa4v2KL12a&$;-$wlDIfjvjnpN`Bl+P19JUT6TfN=E3IMMQK+aM=XuQlkt65tnqx zXpR;9QJe?oN6Q1Q!y@U7xa?i&E?ihJQ5;^W?o>8J>b6*$F&ULqLuDHfN55xYj$9pD zh3JXvPAi+jD(_Jm34W(z6qIiOF)T?c0Y>>1a{l!W!8gw(fKvj0o?I1b>}-yPe+k*+|?Z%8+`6^$+uuQ(MLrpNnLa#r!Z&z{wMcD{8V$9j^ThG`)YmaA6wW?c%oS-3-X%R zLC(|9JVjRIwa&_0zJRsYV=0}=o|8WV2&1#Pdw(J@hBCKI6ELy4`MCO*o&?*}axY5~ zZ3?kNoCPH(mde44K2|1|A%#679xPW~Y% zB*FXe3OFJwn@M6I*O0w7{M-BI;mriXB);v6sikLtv!l()^5*kxH3xl=7^iPeQK0vo z@dY9Sj@zWAsg+&6yUpzFuh3CmZ%z@RiJ>vnFm_Sz4?(EUee~EyJ<*7I=|gU?i0x0i z0eaDG!*e%`IU^e_xqH>|kQ5wl&W?gjhDUdY6PQ$gpWE4>JSw~~c%U4I*;tfk8>@uk zY-W4mNsc`{+$uX?m?t`EP(p0xx9X{UEi&cshg+@SJ?t78JB!5Fh71AfE9Sdn}xsiN)FJnq8SpSi7(2O%-f)OyF#DOmI^^S@KCflTd2$RZy@eG3*;+ zZ|Nm_bN5!!0Z+^W+?B2b&-aDE0RKM!*$50GLCGU8cekiI7A)jx5Vy>RF0sG=EV>01mP=Ifz5_Qtk=q%$o z3H)rZJ@}mNt|b&b2AqwmXG=RG9tUrMwbIc-5(Y7CinC?Yk8MuaLRYb|BO(clM;@&8 zYJxNPsQiQO1+p59tODTpNVjj?%|#8oi>FPQClxjV5XdNl$;}l?eJX~OS0Qg)2oltc zr1&C3P<<6kFBbG-;#7zhI3`&eC;F_kQc_w&#gl{CL8a85ivEsjzPLY$*tlnUO**5L zfl_>-_Z!m!jcmD>;a^@bqk?|Yxg@~SbH`yqdINzD_*)XrB7BzyYFeh~blUG<_R~p+ zf~k6J!Y#6u@DgwHc1RJQa(o z=T?kyDW;O(goa)gfA|rAE#_6}F#MIEbH9>nDKA%OveHO355quE`R5pVls+oRj%+g6 zuEBAmX3Z=mbAI2oFoRFSpkbG7gsg_x2)~K(kQra!%_4dRxC~r2Un-GlQg_}o-GFza z8_7NuHSl$zY^C=t1*0K{5-{^=Z|^%`q?nl}I+_@|ap9w~%P#3by&z4>Efh8P5J6Uw z4j~be2>h0^LME0&{IyIe=bi}KSygIu>i?{Hq~Or#P^d()X^AP{Zw)|kX>SodRk0mU zjDDDP2qRs820$;gwo-=CDcX{xXbiekilk3r$~5o{F%BtjWUVZVpObcVq6VnV$#Wu4 zyU)=VBNFq3k1N@RF0yB(9hMJS5346LP91}5Nq07UM& z*6o>Q#e@3|_AWqW3KyM-h2d3k#_01~kbmqrWC-$f_?v}IR>8BDkk>bud(QBcmIDAS zW=vjMEkR=pDNL3JF%c)Hsk|VLCh4|F@RkWAGXj;<`I*CGAfZB37Smx-$jT7QR?V~+ zJ}Q$jme}h47n4ub;oT0l9;|FR)cqFm{Uh7^w}ybftH(Q3B0VibJCq-0qVvn{rVK{1 zRX%6uSrspXzk^?|a^T^~uZrSZqWGJh?`+}72Nw;AdqtjnsgHJu^USbrOl4NbojL}{i6WwD)Iq`_BSo>W1F2$u(-Me|Hx$kY^{TUuG`!_y@it_IZ z)^D6hVmW^9$?fDx3lq{C>W232DJi*xhOIN)|L2ifIkV%Bi~GgK_BcPgn)^3k~t088hy_^(zfw6E*(~rtA2xf4}<%n2u-j z_mj(hWs|U$l+avmHYSFCQ)NXKc zQpn@yuojKzsVHP^G*P)Eb5)(fcq+xN1+W^-1ATn{C|MtJr`I7&VL8R4# zlaqngHa1yV&r!cO#Cj>X`bMr$o)g~rGXw=ZI^Ulne%dR+=}s|p4vg+4K0knYOGfnr zZq%N_W+?h2oa2 zBU(Ho+a|#}W+7u$afL*x^#;5j#oNyK8R;6D_9dv7lAF*yI{7A92ry!ekx$$(rHA}gHDxOPZb>1PwZmj`R%~T_;ijlFyUVz| zDu7~m7We~6HQF_@E-Zb89C>ho1Dmb#foBciw_QS_Z0hh+jpRLZ@{UEs2B~AbHP928 zk3IfuTb#}mQxgNUKu(>7ONPt_UV}u_)zvgiQfgsTVJDVJH?)ml6J~}W@MPAk*-+0x zizwV8Js{}A@^I0Bv<+!{EJ37D)9(KCxfn?;tPUaW)C2BelCfM`SBVC}%4pwkc!`ae zHs*l3rwzPLzHQ&`=y6z3*G1bYl@K*RsU;z}c-ypEJTeJI=_i(ql?#BDk;`w%GkTgJ zrlwfK5V@PqC$?(vn8Y_fwJUe)P@lV)h&PiKgc?~$qQSjn4H1kR=Q3}g|=x-6qh1VqDl9D^tm!O@?aOnr`-+6A72Q;*ovl66+{C zMyI^2>WY`G++3XmiO5rA>B$s2%38fq6( ziCI1|*C}%*`U96KY~dDo;a=lxYJqW*BRDN4pZOw;ySd;t#(T8th7 z_k`=fk4->sING3imOs~^uJvBy& zN~UqXVap5&gjj92Ybv~FR~kDd*rs}K0EkV*c~{@#EBmyOCmis5hGJV(boj^Fw9na{ z)t9?tmAdPz`E{6>-*h8Y?ntb5ec$R_(B${ix9*eZ;W-|~o2btLzjMHEzJ>Zfo^{4u z6MF|AG2W#x+58mo=@W3?v2Omc|I>rX^?5f~&mV-I0G{1sz}VW+^EezyH$iya68nx)Sics1Q$~cg}zA zzt0BUj=m)Y-F*K`8T3afC|(b*=Br%)Kw_NM_ET!mz<-72#=U>XcaC;mjyUdSdtPoY zx=qdgXYGx~PAP5)e*L%@{#hKaKq|+d{V1^WQg!V$57&ov?@K5HQR--QHv~@QBmY-i zTZ|7KL&Il}G*ewX^^g0!M#L3pIUG&GCUAgepAS@hgE{J`48?6%giwt!U(06EGQ-+4 zU55pG6}?q_y(N|~rmriaa#62W0!;O3cIH}0^c8+(=$VpGP}}ASeU)=Ea=;88s*HMQ z%wilOJAn?oySxe&J6bWK$c_6VoH{kQA@~N)d4Oq911n`#bSrkzuIygXjloW4!0;|6 zQVVD=?rO)B7OE+qCXWx&iDO0^f-q?EV7C<6@?gWAT%Tr}i5mdm?8-yk@B5wuTFNXZ+YIGMO!{#1U7YduDE0SuY$_jPo zwCz?PUgX(#y%dQC=6FQF@==J@ zxVi*wbi|6Av&9RT)bqUm=8CbPZDx>Q9*m6*_3k_e-~Fvu)&*~rc>dbvk8p{;dtPvc zJFC+oH)~XLhi~-P1%&4RHrYh|i6o;xIx2-J@Cjx=|T{vc~3b$jl#B$4A$CNs{eBD+yA}CIwr0o)}Ap zXV%?<@cuMa9td6vwNtbQ`d`BFP-~ecu-P2lsgOKc3ypkIM5m6qqf`Y$Cm$x80#0PI z$SDRU%`c=hYhK4>XO%%lWjR0oGk^RkDk&R=)rqbC%Bd z;fw}B=iP0}dQkVn@u+fL`szEe!`3cu-`A(iN_-(6lU}z7z4}il&q`lUwYn-fSOWyx zx*xAN?s5Lj?Cb|UrD8oy)ZgcPe`@|y{UI<6JL!&n$L|FGQ*X~`{hx<>ksvF5_VW)Y z5_oFe{@0YwKRO0WQt_&UVXV*Z1-j2a+WVIWrBc@2ez3C|6}GzvHUH1(t$4h4=T?E+ z+S)luzgYD>+Tg=}{r4y0a9}_#al-%G&hf+5xA&V?S3A3>8!BJ!s^$U#JKtmh-mpin z74~kw--XwjL3KIX4L(UsX!pG$m0B&D^z3SAxS??IR#j7bFD7<;d3m|T*3HYwa?V!g5*sKY;vL!nSy1BJW8y!tp!?RzKl`8letljp zc*f_r=4f^Pe^WS03hz!)i*oio>XqCmM()?ZcSIQ1mk31nZ>$@d*M@^yB(7WsVae~W z=p)<TsBdN^V}h8T$~v4^#WXkk_&40${fa5;{|vh1!C}m5ZQ>R$16WnaC3U zKE+me$wO5d{<_b*96su%&z|b1t)M)a*FNTXO*glX+1$i#Iy?F~((jnL$mOt0PMtA! zWp-t=!&73KA`eTUBtQy;0c2k&J5|{Mx1k^^fDH|vn?eve!8m>9m2YY0Q)1QVb0{OU z>0--7&qI@{sPVHpjn}Eatd09CYT?Y$_`N`Ijpt!y!XD|OU%wv95Un`9RF#|I<8Ubn zYj$BTU42#x51rbyND@Q&)mYn3(+@Al#SCt%rF8Z*diWX@d_ttuI0cDPTj=H?+wHDW#bbR<%&jY7P7#kv~0D;Ts!sgRPf{dMoCq+PEtm$p!D!m z7oa?e3e5t_Io<#XyUNgIRV~UF9r3vhOhnD1$Toh2Q12K9>P~Sd$w27}9!-K8%YFQb z%4SiPU7E)CLw~r%RCH^2PnuYljAg0B~gr+#y2P`)<>)6 zP;D)pE~#^+&}(Log>hdY_09}23(yRFbm14Y8DF#YFqQ*BW+#&5j6P5i$krP@c z$uZ}^iTX>S5aSfXjK-sa*yfkQDITT*Cy_mOIXf4f1-Au=CPrOAF&Fgyh) zU-D}bXWUGf7)mv;6I0OAX&Tmr4HQYUKF-<9TA%+MZbo|)J%d3fp*qs8OCJuJX~5Fm zG+h&FL>swuta8`P)6S(Le$ZQcNaCe1U8HtI4)`=y9n!6swoH2QfM#6M30@bUjoi*_ zG3IjAJG_b!!6v4lrzl6HHx!Yj{7f%M#$-dYxfs1h%8#|y0#@N1Upwd5zE&?Pq zaJuoy>M0XM>(({=pBF$OQd`bGmV5Uj0J0shJ_Vl;W0%;I6xzT)_SU zovz*cEsP)&{_fWiy{FBf`=I^rUut(hwJUr7micm0k$CE&y8VLJ`^9_kTjcD$bCBEc zbkEV`P81z@tjy(lPwMmcM@rbp9QvWa?Y z@@;$X-`hWef4kq`K0FNov6h?+xC-n1Z)k*VJrd@ z8DustO&^`v5vKvSv|W`r8x<7yonUcGLhZ#!1Q>P_XS3ANM$#x-l8he>wObGmPucpI zxFlX22<*JmAHED>QFN7xy)oB2iV(Q>4g%U|9O+}(P zy;eKb8;be}!BUl`;P(rK7lRRj5T3;;IMu@YqJ$p>fL$lXX>qEBl!?WCExULACLB3T zMbR477{_d&aCk~CjGz16-R$Kd5fKqK#N%k|UZx??;vTBsxAzs{W@b>b7-~v+z*RYU zVFEbwJ(Ole`n61wUD0P(#s!ajG!@SwDm@Ioq#r7uhZiFM${JuoIVj_*GmxepW!h_l zQr;>WR17w;tCNfX+X8FairV*!p|nN|th^OhU(pvrc5TIDWh~l>Dbag*FvtG98V*Zo zlxD$)G%QeRwhpWhAmx$U#iFcw`?i#pg#OTt8Iid6C@sug5qoK=F zmj*L4OD+(qFnQrfdzb10wK7wO##p>xOjrEG(t{$Y z-U8NjnM}jwML7;p4atB1qpfK=LYpU>XPITtD}I@ZK>j_oVLuL@)w_uh&F)cM$yI{L zI&OtB0Zv#?sI9?#89^oX^ind2$&>cNRaQ};Xu~GH*^|1;4!AiW`k8uuzV2*Ex5GV#Vn`<;_@Q|A8#Cpb@-r!*>as|w(sC~=@c^q$ z|J~Xf1}^)Te;;L&fAWwzTo@*v>$7=8Bj^3V6P zNly%WB4B(FF5K6A8uzHg3!@s5l6hJUBieM;bZ z>Ajj@z+S4z;~yoFH_Q1%IAFhELcrhOs~xYWzyn@tum{ua!u{<1z&x;8hO-@pTa8`3 z-DbAByc3(7dF*Q;sDqp?M-v3tI5;On8ULLHL8GIi-OtB4Cz0^Ly@_;|?q3T^0sD9b z?fL;`d;ZeYfia){UVV=d3OM@kJp1Gv^i!(_8~dqtmYnnLOKZaoB0a=SG4Su>)2Dy1 zOqHl-oiFzvo*tiK>&aa_+yDO=i86kG{4-VMN37cmMeUS#_A70W7h32uVu8uMR!P^# zuFLyJJ~o=rXA0GpTjQ)XCySO_-{;JV9f9xDpBLZt`Jw@{!#ipZ0_OPl&Q$XK$#n|T z26z)7_OC6x+>x5GR7joZVTm>1=8TDaB!6^ODwE{#N(~5e5*AbQO=TOUj7;8Kj4dyp zmiVH(5}s-!UH}GEVDB4$QIUSjwo^~~*UZ^F*6w;{l@|SoynGcnj5W`=iv_<)W0?kY z8ww<9R2+Lw2I{C(G;Y-d#iHm^BM`x0+2FbiWdr{Zsv{vKGcc6raV|zPnV+3)2Ux`` zVUgN4Ey)s|2Pf)v0{J+1780x8%&9|#u#VQ-#1kJey`o=0THV@Y!gA3%UshP4wJOn6 z*i2=ol*gw*%=K;ux0|$;*6rt*iED1sGxx-qnh4IM5)E+R7%JjSk#QonF61*E;` z_$Y3CgKrv|Nu_}MwOqS8SgDH=0gMpc=DTdToc!zu%)U`+jaKk3)=W2jT(#$^6^rJj zroH01_$iLE+wwwm#p5mI4Q2NMEJp~f63N}*k`E%nnb&>&UzWg%?hw&wb~(M!KeP^u zM5xreikTfQ!n!ULcXYY3-134eyj0P-P!}x}Ar89)Pyz^9l%w;Nupf^7=5Om186E?+ z$Bl`ei)yGsN2OzmD3mWZ7;c$Uu)ayAmVFU2(@6%qTZR=b#mLCXfg_SRc^B{=Z~BaH zq3sxyqSOxEIW%nT;Lq}W8$9}G>Cefmg6X7L6XA2Oi{c-T2nqUrb%WU;+mb^_W{EP z|7HV|h`{Y?j3nTas#b#Aty5+)($cWgQzp|Cfr*6g!l{nddy3~A zk1ihi-ac}Ta-3NK@0_Mb?eclZ0Nu8sUQyQuL>;S{qOPm1g!fO4cyW%$lT7xESgF|C zj4j>sq{ejtH(Xpn_Gem38dIFp*;*XMW+hFRq+$&watM6Uz2XE#hA=3aJ?%2)Ga?xU zlLQidM0_YdwIrjC0?k-74L)z;5YrIN@FVl@zFX1Jm8{0irmJtczwuCXKcby3Z3qdG zWXErZ%5TqnvhR6$g)V7SeqSV7tjT&6;2#>*2dl8}pZ(d27ET{0G~2)5Mbff?`0 z(V5XOBpedKw!feru+oZi8?A+{26P6=+p4sMjh`~PZP}rQMFvIo^g}>kcHZ;BV6o*J z{LRLXuKyuo-@h-Ki3}m)_;g&6@1Jnt@BhpCfB4BHqvJBQ-iZ&dL_zuAZ$Bgk+}eEF zBl?VX-TfdJ@b~&Je31V=Q~ukf+WmFF=K`~5f(I#{qp3U;oTuZf&0Ht`c41>hBQe3< zbfQm(6aD>vK=v@42)@qG{f~@&I?ofvcaATh=|R^QubhyS)8X5r`^n?IWT^}lq0@tO>A zyZ1F^PRCVE73N^y{5%lG1;m<}n(dL4tW2DpG41?07U=N4HhI-B49^j8cDq<%OGrq7 z^-nk4{B?iw{c^YZCFo8Z}lp}mzi0d%$r6+G?WkgGKCpE5H+>3EVok z1m0?Y)Jouv6&9C@W+&p*=hJ_`d5XPFBy20vhgcQjZvCX zIgHq5=vIvRNm3=7G zF8YayNPz2fE^@4mbcU~jP>!p}!vdFXEp0oY11{Eiv#oycQs4NMn@0bnhqSsLu+Zjp zqPDK%DJ^c<+a)~*W&&{Qi4<_id2jgM;>gPjJGXQmT$M!Y_mZ)(vWj%2wQ^ICGL9)W zwiZ>3>xN)T4Bxthh7K)dV}jdJWwPf!0aN--8#LvHnEFP{w8Bc&hZ`9)Z5XrWB(dZi zVlCCVq%r07J$|`gpXtW$OY}1tFr7oKg}8gvFu4|dBa5E^VnUu(>KL-rH8iZ$m5A`O zyJc7oI*NkB;f@qCkBszCsfo~tlygUWDT6~=C+iQict!F6P=qv3JOLjwMTDX>?_!_j z6CgQ{G0U&5j2vTe*CTFhPK%-BGwgm6FBAXgxm!$5 zcabF)+k#^4i8`A6u$~RQFVS)F2#~HyVqL6dVMYRu+RG@7%*bZ0?6Pq7>xq8E&odxl z!b`&%mYrfA2UYnOO4>%9p@sFE;A8*b^4uaXxJZaq7cRoyrCtKiwd6%DX~-X%(}tU1 z1nQA!_M?K)TGi6}p?O@gv*V}a&S|MC`>ZTF+0kWMML2ZJh1xXB33c?k!(%c~qyS37 z2o1X{5`edr{TCE4`D~Wp4^VYXeMpbw6jLovPOl5UrE28V7q=B=4}uqF5BFa%`v#a> zFoV2w_v39PiCd0cVHP5Kt<4*D#ja)HM`ddDkzgmCHWQL+Ze1%wGrVbDPP<@1b4gGU z5pD}#Qp$|N#$$%PdfXTq2ULslp+F+GMTuSr-ncF_krqL&x@~* zzE?kux5dQhR~2nWCiUuW&VOHs1o_ApXgVCis8%&vZ09ct1%H2kp9yOBi}iz+qB!5D zTQJlGg%or>yoWV|$e3&kWYh<~OE5ye-tHPTgy~i{hXC)2YoR)m&Y@bEeB|dN&uv(@ z84-bzhPo~MeoqaiP#MDbRUsI^3gZ$M_3G~?S{=wA{(?!3o-$4QRKC2N1Vwz;#C{$! zuJHef@n@tyFyvF<9?gQ26#&@(BMQg&Vus>B5iN}e!AMAOH==~*vr)%D)%E;Qr!h=G z!)_P0v9Wsa-exIf=nJv=-#4AXvZxKaLp$NGOS7hB(h3S7z0Iuls*>Re!R z6{gY2ewQXTcrr`EhQVWor+w$Y>~9-SuYXdTy8`~c3yLiW+|$9fWxIs&tC%xqUCy^J zA6~exyZ;b9J#+j;`f{dJf4ASSIZ61x>&p{w|I~qNe&+UV`_!U)zR?5M_3ZlXq2cDW z5&HHwx?AaO*3Z!llYsCkIs}9IEJb&Gk$-XS&2U|}J|f?b#Cj$$F7KrC85yYN{q%Q8 zP-j*Tc)1e9nIcSM{mTx~SogABxTHZ86S#QQV>P8s75xS|>OocMml33`Xg`|FOe*L3 z;LT~lT;6}xcyLguLaC^bc`Je3kH{R{Ed7CAaDu>v2NpO3=U)lxA>ajUhG}Kf7F5Jk z@edfy%Ax5(QRhaf6N)T(vKLIt8|O*%#5D`W$byowv~w>3SaGOf^3oo-XyCnk#?NV- zRb<2QPK-I~{M3$;>ycc=ac0Vb7PQ=|5@Xc(-DnFZOAoctcr<;Y2F~?B{8AF+w)C`c zcC_*EB^F}_HoCIwvMi+I?tfvL_CS>EXmJHaJl}}L&I1%}Ah`&tlQkiZL0L*L)W?#( z-crv#PX$iK&eM341lving;btF9_6YAO%o1fU53%HIH6jdD;nbwBK z+)&U&w5dWmr*H_Ka~$tnsdGV{_$^^KW%$Gbu&t*2s2nD@oUi^;)CsA1*S z2gehujnD&*1mQ9I2JJRO{5mO+$`J4-TIxeaYuIY#+9tJhMK?TJyDtl~4vV)=eqQp>>Z z*si{VqB_qK+Bxx(V_NW4VVu;gEvHv>9n3nPIHyOg17(znaFo=*LW^ zCgw9CBvK_(c=85-M1=D(teUNayfi0hujl&bisdz0cuPW6@PwR_?O^v3Oz4@B)R5HD z?Fkx(+Q?1=RLpTKWe9U<=m44sMlNaq(ewmJd7h5A1-Ohy$X%JChH!yC6=r25_HCU4keFJC&ZBh9Z zb?(n=oEfx4RzU#@=qpF`rfRk3%Q|3PHiUr@7Mz*z*UE$ZU*ygaTp(kRa$2}KpNqgm zCULnmMSN~L&<3B_PJ>j^E;16giNzQVjA2!k)-0I}yhi5|Y|2mS$>`MCb97PF>;H#) z`PiUJaS_ZnAQUGGqMbV|ojL1_fdX4eknNMLZR1z+ndXZzTeLNfCvi|)WbRz7SSR#r zWJpo7R1o_u6Cf$t#}MjJTvOycNoP*|*c%PdAs_=%iMUg&tz8=!R5-Wq)Cz$UW6}_> zG0)&)(r4D!OU`aEhcYbTfhudIa>nJCaP5$dhtO_UT6mqxbAw5@!EClp^T;i`8EBr; zR-fks1N4;>eXf6{)px(LI~U|OutiYQUQ%?R2sYsTZ^Lpk28IQai%*RgFjGkT)Z_EjoynK z!e(E7mlu{H7?YK?JMny`;ea&`{cj6U{2q3?_zTf(NvDN7s zX2$ix%E}smVo?9F->d8B=Z~)3ygUsDQ31?M66+DKuAcVOfq{X2{=F zH74utw1Un)?GuUph4B~0Kb`(NN5a+zq43e-9h~ehLEE1%{ey^JB`$oF#ZwPN8Y4%w zoH`gzki&5ohURgl1w-@e8T0kmEe89Z7f#*^DE+4_G^Qg0x1D4+!x`cbyRyWk@t_e} z_7XZko;m3$(THr%U-=}AHOuQfYif%8@HpMNV|aY_c*rqYbS|Oj%|7U*6I%}Kh3@qg z=#b)*%~BQ9rL@ncp|tSX<5cFV?DuA%^O;~=Txwdo@{nz6L?i>83PO+bnwBQ#8Pi0h z`|*9FT|#c>oR;4+WQ%QVnB6oY5-oc(yh+~2A=!dhM^ouqM5=>=?p9>cPQ*F#j)Np6 zGVjKC55%#H!ZoF`gJ(C}X9qkV`$V%(t9bhjldThQk(6TS`Z1Tn>o&7cHyW-Zdi7pR zNRDrdh#~<}M3{MV4Ahmy=DcW>wy0N*et2Jjyi8wl_oTJA%-pQp=(>1AdN4H7lB~OJ zHIOg|HlG4?rAo7qx{s!K42*xU(ZgtgDNlNX4%-*rZk_cGWWGejsox=RX8fpA7x)Nh zPk+|teJQx~uqs_!FP8K(XquAQVhmk#PF?kQBbImgYi zj?RX&do*oaaSzlq%AT(iu-Rb_1Z2SxRiUj%K&VPV8fddV<4cT;vSq(9>o<746^pAw zI#PxhHROSo==3y-BjCklNj-Es=Cs;A`kQ8|f>x;T=jjza8x6nH05y&fHT8eG3dI=qyd%oSA?u83DK-@oT3kYuQ`d{KptyIgU4 z#xSzeoHA8O4v-8PmR1BGHp?4?pTc1j6G72E9i1w(`)pi&C*-r|K1+56r-Q2V^}n^a zt_fH`u&YIlwfXZoVdYs;2EyM^Jv9Rmd(kEibLmbzwj0cADG2EC38Y)l)-aTpNgS;2 z)*sh&97WCjX*vZKYSFE5%O%$8!kfh!zs3b(VFO~sq2i#HRIiyX-4l#j6}=u3N$-(W zR*4K#kR1L1qVsYbstwBkahXh=n}wU%z<9Bx4tESxUAfXaoXM7|T$+K6cWf@iQyoM- ztqn>>#Yt1)nX95nzRiSgfGvD=xDy;li=YBCE=<-xaKH-%y${nwbzibMjB-0}Z15y1(+`??O#J_PCY=1ojVisqcK`Wv zzt!#6`r{DRWStKVUd-+2`f=w6fP%$$9s^)9lSzkP`eZdh*-C6IJ8Qt9Le%X2-QIoo zpY>7SS$pI9*8O|K!$LIF@3+S}jw8vg{FuLBoal5;x61+Y|8b%aR7IxT+*}u!hUC|S zY)Xt=XVihvH$Fa|8Vj40e=>cZZm7FVNWN{dIrsb~{F%^212*an$Rhd_kgNtHM~O0w z^q;7UqJM3DeVX3s5VSDcLy2%bgPEmxID(Q3Is_g5PaBd8pZoH`*ZxJMdD<>jEyK@^ z{7b_`s_i?D%YX0^xhK=_;Z=OL0Ta|zU2Iqt(YqXJ6`t(#+%s>Y`2qF<`P&Dho4+lK z7{nY1FThPocC?`) zuE%S}^L(==cF9X^j31&0k%7nr$~ntzL+h zJ!9u`(zlbcZ zkIOU%@L+S+`)0w;0#B-j#j;M{kw*5tv~7=5(k6%HnZpR-;W5(45vSqf&BNnOXb6iDk=c|-c zE&PlH?j|Vga$~$`3=t^eXe7I&i4V?0Gl>Q;Bu5nc%R}Rk%F)R}YIhC$zrj+ygz1Wd z8O;mr_;GE!>Q_y_lncENWeOK@c9n5S?qH+7;`tRVO?7y0vP3{Llw?e1y*+HoFu=dn zD*8@ow*#!jgR4LGM?R;(6IETk(nFJ^rC~u)o_aL=3Dqi`NrS;GTZs4ZX=$FJtX|Qr zEY?~!NgW!AFI6Q$tRN(G;c|v{g~sAR`U~vA>-v3SYeo~^Nmhvo7cq?q5>0=nYbUy% zL(LdS%WvjRCa1srz)FE%(-x?VP~6OsixjsaMQ*-(?)lHTcjo?gCNpm) zZ!(j~lV?3^uf6u(^E$#123~et z8HVc>`4tm%Vye2A_m_SbnZrl>-e$?0EqA#hkCcb8vigM|YDW{zVH|}*QDG#U24ImV z(pJJSa~^=lO`7N~pb3A|U%l7u(+7}6_~u!XHTr(9N|;w>#7ZVLm7!~9`W=VC84(W@ zh^(3}^4e|ieHFL24z~{jAOb42`X9ejOvG_f;?PS2sBmOammK#mZ3c^7Ug+r(u<|az-!lRn+DL9Ci5tB4jY72DGrD`VtI8a4a^G31a7YAR$K`?%%|iH{?5t0Z1;khqvmBpP$1Ll<{`E+;l`{r&7` zYN)vi8x9~+Z(A7J={VC72&aSSs;YiVUi7HM?CLeIFg5p+$IU#xRu0F`{ly9pK>Lt7 zEZe3l-Y15z>?>!tjmvmvpcG@Zo%c%EqxP-UbTs>&)yabT%+51IUy%@^EXddw>2Qrz zVtn&}>zGW{*^nh^pT>OV=`+rQM5fKA^!>1W8uTv6EFl8qp)cxP0ez&>ljIf5_=GFf zaYdtuxgGUWaB6s=#LV&WgOu?J$phKSK;!y4uJ*9)cdlz+vM~7lv7Ezo7=u(<_88R@ z4rbA;7-!J}1sKcQn9AaBUiFbCb?DS+i7`@GB8BI5Dy z17Ck{M~FnV9#R)_>$wffGZAJ+=csS&;XBgAH1A^5yWV>Zgz=sYzy6Uk&iyY?)l}N* zRrqe;$i0=VKoVx3kX)qMrqfwMBU>soE_Y_!O{kW|XhEr9{C;ge*62%XvMu7{A}K>g zmKDTL<)a;uB>98?eoh&y+A(DTlXY^qDXA2ztWrV*CwM>s4Vw11IJn@>t!@zQ) zcc230d--J?uMXxwwpESNO#p}S;U~gnk@{67ZHeCo$<&THg6&P8_`2o|$4Ml{;`nt( zNZo5ZN&(-S>6=@D-4FYn z-A#-0vE5!VxKRA_Efm+SL}p6#-tFa~-XAUW`8qz%@+fcP{avs0P3sNT=%j}iS6h;^68I=>f}4gBH$Fn;{X;^^zYP8I+!}}_cpQ*^ zM3sI#M_nD`Z8m&O{?9k-M#SZ%hme>Z`Fx5n#7ZGV_oWFhWT+SR}Xeo#{SN3UMFH&HmUDAp0Y#6fNp~OA7H{X_2Xm%H-hy^8mA}_S})Fx?`Nn zAB$qH=_=W1SiSLB#L8>|&MKxfoD|vE>Lh9H#M+rurD%tv34OV;IkGv>92MVh)RoxI zVaimPNS|$&ZOsh!IQF2N+m+Z(D!JR|q)KIKL@M$Bkea5xM+2uxa_;o1vWMo6Nq3wd zk8!2ufDE+Xp$V(6(**P0O;>lmJ#X&&f@%_foPc)aoXJuQ`A|}kX=a)X=f{=D>fzQl zLc>Vpc=8eBlfWB}`*g1)$xK%84ZT1X_uObNTy*QSUvl%*oj^o3lSxgPRj|Ne@TVGM z*E?t8J10kXgT2=Sy;4z&g@RhINMf7{jbjMWIn$dLgF#fiQP&_k``!^-AT1|u>R3pj zvI-UEm{1|2HthyUG}_5HIf4kn?gglDObR4B%GI7zQz|K*HU7@#Hi2Tk`T>V1J}7;O z`RIM>RR-h7Ao)eUtu56`HGGf!=bAkLqaCfx?@q*mUA$GWv-U)aumx(oKd^uAC5VPI z;5i@*5O=3jqOoO|aNTC2H8S#&QXDAulHXwihd5J&T)qm5D4(r-{_K@Rl<=W@owbrg zH`AI{0RL0ewOX8V64K}HU$KOKePeJ{9+L+m@hSCsD3ZSnpImgkf`F%mNOhFsdz$wQp=#sS~?>vR%Eu zTDk~w^Ks_Bl&KI;d_oyj>(l$11yldpcAmv=-wEhMihUI#P|g`7qd}P=(>ewOsW-AV zWcfu?l*r4!G7-c2T+_4^m7bfBFCrsLLS4AoL;<`m~Nn3eLQ3)WiW z33Pi85%JRIjeg7XTT3oq`^>itJHdF0X=WP~I#3=8lTgYV#7!EH-U+l?<33(`#h2&; z#dH5H8cM!Dg(m&=YA%aLsmLP zisTANVhZ=q#EA1+D_28pm6trIv!gzLFoU8oPx`T@kP{B{((vQn#yC23p!uSI<@69_ z>g0E6<7I2Z+)~kCEQi&VzQ=gNeTwevgpUsTg)RA6;47FyXgHwdL~}3!B>7Z5MXYI= z1??(RHRfn$cn~{}cFc0RJ6;>g&)j*&*(LI53yR>QOm`@P_-z& z(GxVh@p8wU;VRAyRI0M>B-n zg-btf5sdRT{5P;l0+dlbRBt=H>i)4!k^X|0(qjz2{HNlDYJ;9v8Etg8)&r@q|74N8 z9y3@FFn?WZv8nx(Cc4FQ*+S<)6XLW`@>|`^{!Mg{IvDmTu3b}|@9?_ZrMwcP^d#<) zvNmif&tCx^@Q65ozSGb^P~|8AWRg$tJBPvBD{G5YZFT)pc{gZ$Q+AjmR)?yW2B#e- zVI~MnC)=k={_H^Upw}XIDEBqcUG-ObAIW*bc^6CQ!TDeOl1MigzHK~eD~3{-eLPZ- zjIKOdqL;oF(LHJ)fM}D^pR=xFQ%kHav6P4s!rYW>q;1m!f0G=dIVXeLhl$Xy+RvT( zD>#BZ{js)WR+N)SeXkQvc_uyq{_;gmG;wkBlJP-e3-RXBzyXP*ud?fti}lBNEs5=D zMHn>l`yUfP#b_|yu{ORO!rgn*6gbjMdr{WSbn%ZccEc_jYiy>QHQhM4!I$p43@_pv zjkvt2jo%GK5&0OA-i!{!=Eb3+3`Ix;17X0M$$xU0#(&%{@B+WodyyCM<0E-IXTG#= z>>^H*$6;WFB|YgV20n2_sSI|u>^M~Mz{x*$hRrE8;7M&=YNr@?qir~Pj{=k+8C6hx zq3iM}`i_>dS*s=NJIUxEh;vRxbs1wBm}oH9n?IH;gRxE0cO)(e@q`mNX^5wcYjummH$D!XDelCs+?i26 zjjoi>&8^G_taJX2G*3G*8u63KphnFMxAUn+T@Gj7a(5BQIRi{CMpQ-274;}SU^>Pd zB5CZ=?c%qjN@XoMZ2w8vNC~KE)E5Hi>m(5Vvgv2Axg6&EN-rMEU`Qk3HrY{g^|*V$ za_v) z4(bfU>|wYm$hfZ~LcDG3U1+4lN1zB)SzO;ix!ou^;@p-zb(UoLqbEkYmW zT00OF=0%1dF~*|z@4kbSgtSd{XK2V?&*RC)B?6_wxCeTzFDQtJ>Q;vsq=Gk}PKKl( zpKl+J_NN}V?gOYW5T8c7Uc8fjJ}hW^U9^cHObolj457OSRwk2J#eg{JtuG29k8Z|p z0-ss}|D{U*$(S?%aQLIX(~JE#lgpdXjW^E<&L=c{PX?=@K@k>AJyLRGUssKCaw9T+ zXiWZ!Ro=Y$G1)^>J(|HBl4iB3efuEYEx@-i!R(ekm36gip&&Qa67@amec88f#suvYYj%kK>5dBk>tdnwdCz(7kJlk`9 zRpP8b$t)5Zl7*_%?7e?kRvPB8S)^uA&KWL?EsBmG-Qk%v#+6b*m@n(Ec!?M@M3;o; z8QF=F1+-+n&NpPOgjd!gn`zJm%fcB4`5Sd%`nLqc=!ceh^S;$L)J9uATLO55cxyms zsS17hWx2Q#HPj%IXNl;7nDc!sY4xoVoLfC}ov$UGww^$;8wM-b1d?Jrz%(sW`!+-c z>-MCRoNSO-TVp2-_=TE8l6fS532GJuUF1TN{mhMwBwApmMIg*B9>zpzr!+&HXC!M- zYCyBecvv4G%PID!BsW$KU9=)Tj7^kLqpXL01PNK<=u3LkXNde4@R3*{E}92-d3xpC zuh+R}R9fa3MZWci|Kbs~HeF`ogL?psaisYQ)q#ClOROJ#;5%~Pstk9*Xod9FEag@F zU$m?ke^1R{HbPQ+Lu_>kS}jWOnwGyqLtl2$mE+x0tTM19GE zEe&ss;*B}^QZpD`VlCy8OT-GYhW-)WBRo~K`x9^~yGF`4&_3(KB$OPW=)(4MbVqhD zEe=PNXV7sWgHwqDRQM%QbtB-nc$@L_?inxzl8tO$C573{ zdQFS89YV5~46D^0;aghhwh_ThHzdszBgn1FQY>rmPS;(%emLPPFP0%qqBVO@QP%g; zBHk5W`u?h(KWBILpa&{HXDTzufTViQU`-RB`Phi?Vu-G2m#iuQQ(OU|WVj_cz&%>D zxl{@qDJT`~U%eLglD8lED=3o=!$)z*Nxu4BTgflr35->`zdfIi-?yy>v(kcW)b@%t z<6(}A0>7fCr`ozUSCcFVqDqE>-bC9s;C%K|VvJ4picGhhCum(bQ=^3Y_yKX5#1*~G zSsw+y;C@1(Akm4ohrY#YJ4^W#0X480i_I5+TgN|ct6z8cP;Kni)e9t?eY z4(jeOcT)B5G<^I`SVi#AQNh--epq|)e4q95#PE`Qw$Wu06OIV&3bQUW5d+HQ=Pfi8 z(DIda6+&$yi@t5Q#>F+@*u|P4naq{#ht?r;IHP#-IN&{J8qmBl^75 zRG%CB*SW`_@ulS9#Sjj)*{CKnjIipL!*(Kf^Vp9Aj9+T5gW)^&? z6YeG6pp13O7YipgVu8cfIp)IjrD`l z95!xv)kzsVZ#fs0Fjhqhp@FPMR%DD~l60$XMGnMDwozt^l!sSkc-Vqk*xDlz{f37r z1z3mV1+{$yF#GAYu2%NNfALZkRt#tnz^1}vF#4dsp@zdj1KfF{xqer`Nrzy0>zwmP zjeuLYgaz6P&{Tgx^m@HvP+SOQcu%aC3gGX?D;|d2CfTD5V^B^~~n;Lc%mA;VzXf3uNdI z!s6u^2xGC1XqGX~!s1*F*O`gwfs?Z6_LixkRuO7Lk&8YvYSqJnok9db$Q8J#D3`oA zN^f{~t|V0}6=lD)YNQszkr{ty5->#eAGIo_)BcO*AHMqRKl#Ae!CNw-T1;e`jC8XQ zraltaBwsD@Z!aO})Q1NB^$7tAp3J3;GOe47_!77`g;<6{gFFl06~fqXmGnI^8gW8I zLe{dphPhEH0@Q^d6H21$J~JaOH#1-4Uqc8I9wtZ#@%d<+$4CTjfglIRy zqCW9S>VAgD#wCj|9%@Y=7dh=%+w>(#m5mSWTcsMIur{k_ zxqIQtlaaZ-0`qU#iy0k}JRF8_vUnzf!zmMpc1hbgr1c2Ca(&h8w7DU@%nM2v(qA0nbg!Fiv(}X{2wdkL_F2cl}-cb}nqV!^7qH6keBj025Fs_*`%Z+j+|HGUSZ%XpBP`E5Bf|A6}a^khPOrk#t zk;0d`gXUvzbOnpy35~_&FC29detnUF&m!R@!=Rx`H4>>_{5JXqmQqF+5Zx$PbzpD0 zNv&@NrZq_O8_z!WK*?OI%hI>Zqv(rKpsGx4uF` zaE)DP^=9_&fKrZ~2)ow+0vULQ5pdA-QDCAY-xZhue{z;p-5uxbrx%#+z<_ z-2ZLY{}(&>rzcxC^6!FY_aqL-y*?d0Ot{D8`bOo>KC+#H`0E(>3JI=2`84TO)TD*?zzokjQTej@zT_ky3Es1TYlbn$jh!iLVj279~ ztvq-L{7`|WkIX0J3d>7RR29nScc%u|x&Vx{V?V-|;)cXW|6%7rDNyhyT1JoI9Eg&$ zbVm2#eu>aaZi-L2>BhF$c!?Uxwzs9in7khrzvaa{=j^|2C*KjkXSYVFjzeQY;k0zn zu7YL(jjmE60PGg!{;q*SjVfKAOKk)2Quu{Om{)sOpm(0G`@U2#j9hEuG_Ra1^Vf5| z2(X^&b@f`eU}NmZkK<$j}&g!f!B1&8#YXsM79kPq>LY_oW3-bkcqnhly5@y#I9vhC-UzL>^%{m>K$6qa8p zKA}t-DS^LDxw#3bPVH2?-kV0*pxt2g@-k&hmH2CGmpeBh!!%7rrYv2g$}uUQ^V=K!&jzynlu6C4Ss*7bkJNWCMHWGddIBO|H{QR2ku3$iY<5;FKH z32gx;*yIr#krx4;$CK_`gf`9i>xEQQD(x`sp|~lkUK>+B4^}U+5Zu>M>` zzp+=v$_H7De~)?_S%YN|nbQj%)y2@10h&_VY9u@fyqTnymcT&r?Do3xl%WMjC6(>Brj{NG_=gPme?_`g1#TM+NB~% zuqv`E0u(_=oU$Z{FmPu1v)4t^5cCeCIL!%pggPD)288w0_AR&_IX|;qY~5!OOidv@ zJ_#TFr_}Jj6o>zQc@zq;r4}h;hR;9q@qo%u17Yw62~Bjj?@QSR^1z*Mjd|T+$*L5bX1;r445tc#5zwIxIUJ5ZE!yYKk)z`T{?yz-=@|W zxrz7%nn@VspYT|v!Y$3y{$bZFR*PcZ9I+)>Mm%>$@x{zRCv4_4mQ zc9<75f^#U)&46nn^otgJmO3ins6ZsiGi=Aj_r#KCjw*TFN3@is+B40}SX@mqO563) z$2=QZQ4g-HEaq=N?y&`$EB;LVVUvZBnZDI$RY(PdQndl>Wivf88J(l313(_O=#J7z zDB4=JBfHz!Ggv*_SJ)v6D==Nd9;fM5pvR?_Y{JBw@BxRK`nTAlPAHTG@fQ&c;$CYP zYgSGC)Q-&DWDse)0fZ8hY~U%zJL)}xYQ?X6p;zuV~THn?z2O3C*pc&mBvnnP|8x^H2firai)jEt}Ua5$2wvYOUxLIALWTUIWyPbG7Ikg$} z232+KA|gA==}pJceGXHdLxM;L&Pjxm(`_8&Rg|GxkKiwhKJZ*LB-ag4st3o;{K|HM`F z8NhP=-X*H@=gHc|>IY1>qaL&B4{k@@#E;S%fA?S(t_2mq`^IO}drsX+ujlUp-OkPn zkNvcrUD7>mWEGPdER@J#ODWmsmszj#yXm~*62$9K(m=RUnQ~H}-*c~i$Q7%BdIx~N4e`QLh zw*IQI>ME4WvM#||BmT+5l?PHn#$WJ}e@ar~SXn(k+%qyn)Uh;4{Q_GV#6Rl~Q)co6--X8}iM80I{A37d(oG6&g&4O#N{i*o zj~KK;8C#T?wn(|QqB-+nqWDyPEO7u`l=>QLT`GWC539KQCO=B^T;AS}VI&OqTRD^5 zbdfvgGuv^mnMPmhV&XgGC1oj1`-uMIbraeQOm<>j(NQdShn}#0tK8<(cv*y!B z4sE!4krNo+{5+1-TuljlPL;2i5~pdZG%Yo4J76oy;7cn)Er^=)J*|j?m=ui{wH*8> znc%P4gvErnVD4Eh>EUcnwJ3nyhb`+U%D9EI%?sL4xlcvgh@kMlw(bA1rFlF)6c$Cb z-`hX9KDE-s$>gJKEZq=%X4=hoec~z^+_aTplmAao+{o18)A6!Qh|lq`)%XF@KBETF zRSD}9Yw*e@-p9G;ehdr!F4vbOb${K9<(=`%DWOok1JNAqBVCxu@J1j044>9y&#%{Q7UFLptQWKXy+0FHA9MGTH+ zmSS$nijbxG^yd;fh;{*XnS-TRWI;4PN}{Z?D*SQO&~yj%_;j<`&37zUG8m-P?bTEhVhBGFv7*^u>Tw0e!@}s@uc)R1_8MjZ&|tuINYPuB9Gyz zD!|^@&kViVM8Cn2jFa4>?;2E@>YA(KUj4>4N?suQ#?*)$-&QQf4+)dV4m zFEb{KGCsUnhd3T6XuqY536S$-CQ$ezWA3}n(ubNHIi|uKr>7U7Fb$Y@$iig8x)i|W zgv6(SYo7LWwtpSKC{$#A9@1YkH+Xwb5|@`K34w5n(dNiFrLAg`7(0l5XfpSL{sd)# zB&vCEA+k+h=fbP5ECdhtb0`P9D`nQ*S+&tfS>jWMSnXPl z*fyill0fveNo=uWS?iRYi;Z^_t_lOJ^RKBoQ+JiOsv9_5a;EEJBn6MZGEU{K!8zRP z$Jx-!5y=FTjBFu=h01Fgyg?*UykrJt5>fuVi@QDiYct<->uOU zth9Nt{KEgd}{CoNLzAaa*-o7au}0*VrvV=d*)WC zY4fSNMobcQ$JkDQ?1y>Mb3JQHPj5Bh2lsZfgBCG@fzkk|f4p8``^-qNxuS zVuP)nKMlHTE1TXcq>t=r=X`7zrU`yad1nNk{7|rP^~?X`PF%20d1_N)y)farG5YU6 z3lG&*iS(__#dQwxHS$^)v-PO#mFbn{h}k?YwItSPDurZD%QXpy6Nj`rmK(QCw6uTw zD8;?-ha@O>dfyZBdH6ALf{{g)vtV}pag>9k!8*k)M*TtzM&;qk(dz_v#pDKqXe1>$ z)B4M5hvhEzF10fzHC0q*Y23!ERTs8ne(HC19B0{{t|}cIp5X2BW8q)jSn4FnpOhYm z2{j&^2s2Z!v20SjU?rphqO59c4lpn~336~eL$DNMQ8DuMpfiW@!33f)WqRKPA(!9RFFU!c28^&v&tG}uDZm= z4o{Dfs0FA>@!ZgFQm%G#E*Pk_hfg|q)RoT9IDdBE_~JxcnL6)O+L9bNKN2;jq2;PP z1^~MK==(ag%9CvGdh1NPsT!2wWQjPlv*ULn&V*?hCr0cMvAz|(%4x*eSO_4wkI;%T zAstu$HL0l=PCqSH_9o7jS#ONCSVM2GulH> zq&bgjoLM`(RhGthy6V-_0?UgVGMA;fcfN{v^Cm2+VQ}G9Q?HZ z`QkiY`*v_`FZ|)*!HS(>+e(BvqCHtiiSyOKd}Ik7FTu2>pIs;)zVHyTO~%W|F2B{! zG>V_ODzcD)x4s!npDz=aagMe4Ww8T!`+22Ohim@&0qVgpIi5-Ml2PtsqTfm zI{ph?5&lG3q_NPKBch(CYe`}yw~YQljbuExXr0izyT%48>#jLCaurk~J`+YSpHrSx z74Ij2A&V*4Z!aIm3s9{*28uI@=lg1Kbmm;X8ykQRpzY9Qs9By*;EdA4L`=hizLK=>Tg-apJ8mg_Me`~Z(wy~P&yjf8adouRAvT>#YM7!dNzLz zEdxj7r~Id)9MlrDAO#;$)0{w#Su@A{tG-OMrTmTpRUleWCFC&w zWDDu%`Lcbf&5vvkpRO=f2lw%}9xis)3%KWf=w6n83mCabWqhB1&ofV^8c|d{SK4l`!o2pf+LDl@f#7w2NF#KxwrDQbL?7EUxVr8 zC9!JJ662gW0R^(cOhoL1q!Y}2TJE~0WJ~gMvZ;yH;#{t|0Xrknx48kDS&Cz{E0so7 zMwLc~STW|-nsmTOQLZx?C|fXaooB0(gxiaC2bn*Y4~-64u3UsPuI6Ki*`Iw^k-txm zl4?<+&v8zHS#^?18?e|}$9L{6pFBt_f9ux2V7Qnf#&aHq5=0b#vDhfNm zDXCBVxVYg`3X6L&MP96r4fn5G!Yjg;!}t^5clia2bYbEi7ok1Si|{vF?CD>D4dVPI#EF&($SEEPq(Sb2X?qJ;Uo?;5YL4kfuz z2`~CQ@1&Ln9Gt)iV_F_0dJmhWAw}gu6W6f1iz!dkBmSGdd=~wFe2vdM#e-+eRd*Ka z#nJ7tr0eSApGoOeac#{`^W;3|GGiN`JJqJx=;1h_HtpiS|8il@Q4R} zZTdx%j@JrNPqe3-n-uSJ?OlvsC2Tp$j5@apx&gLt{iMtXX!_@rPgJ62lYCERWVh6u z3&Wj;RPK|7yamS~-4&B(RMs7kzwbtII*U}5`PwyBB%GrMr*wshxKNL{^t0+m5Azfn|twMq$HIteH5DhvUahOQIWiDQ*a(IIn}>Z_P24uCoTD?9eYmXT>D!d zN4+QM)+LEra?2PgQgx9sN05ETPmaGhzy3s&qhtg4P;3e%l2Y9^HtzpC<(;{KySlnA zo}VZG?%=%-mAiqwWvk6o((BihuczAeI1eYV``6+~e@%Wp&P$kK1^nSdoMA{D??gQk z-%QaGwB`G%a}As4@6h{=fE6M0mc-RJBOJ*K_V2(befcCo{C`}IIg(p(O|0Q+KQD95 zC={tG{os6Z!1+{>SM!k~n%n-H!>GsMN0D)uTWoyPlge$x?`7DD0y&m#G#qnnvHK_7 z!PO4bH2QAM-WBP2C8YFa?z{aZF7wv&OwCNqpRQ@!&^hUvnP-y^Hu0c}(3~dLTg|3k zHNFxN?{j2uAd3IQGtp(hTgl;UQ+xPFSFhkv{R1j1%hRtz+kzu|4W05Y{j_-xE{c0E zoZ#QR`ujFrQ|*Jnll}nCraaM*dRBWd!%1J~+QUTjp;H*y|0E@6LGbTj9)ieZx+zpe0L84<7@_qDsga+8iEKxaWH{RA$TiGT|EE58N@1c zR=(2On}Q$9M=mF&I$4qFR`vr;mci#WLbs6sU`0O%3~dEG2t9!ACMt<9<9o{478K;~ zajUf+N}<^5=sm>>9F*s;qJcl^@uN zCX9wRzCI4xzG=A#+_;ZUw744D7#muu)zbJCTEDS;bT%DQu_#-6*6fvySYQpX#3KPz zq-aaxz#5HrFy68GOogn(w?w}vPvtvJg-Au86NZR$A@W`XBm5-ERlI7Aebp68JQB(C z^&*?vLdS+@vITS1w=89ORzsPGG^ni9K!7TrMzG-86zi=aZc?nfcyu7Z#O{HPc~4jo zEupV6o(StKiw}E7vs1rVsPj!F9aV(F1UsUKz064OcMk+qsbKv>Q*OcgjQv zUl3Ljm&g?`xsOZ&7Kd0tc}*H-SMOCo^pS>)jU`_q4%5{TTnhu*G*Lp@eGq@Z-$^Gvy3b3AkBTUJI|uTuC*IJKp1C9Yr?vX(U3odKgBwDf1=Hkv{Jw1OF99<6lD7dp4HqE_xK`0MW7-57<4^8V1FH%C!MR|9x zgTH_c?7uL)lD@xcVOafpe7H|FDR}P~^bVH^XdOcX@L(cMc+}P_RFOJ29X#yF}(X5RC)`|!*393W6G^Ky6+KAu3_lq;c*GKCyk zMae9aTXoU1#!*a24Ice+W~8}~EWdZ=t0B)-Scs8=js;=bB($PG@gW+PXQYlPXE=7?5oXO2Jo>AW5eB5x$Yw>9phWHg$beOgD)kstC{{rqFx#Sfe zV0VLwN*N6F!zTvC<=Jv7BFJomdWzhLQrSA8$dUzyY%-`EZY z(@Z(yA)&@&_T$CV5c@&v2kEcPU}kY#y_nev@O@zdC(b|mD5<>yBb;l0@-j{)-}iWJ zSom}GzxC+eoqT&5%#oMNXg6Y{g#MW&g)HS1v@#kP3G#P_;d=IeDYB8tV;$pzo1;@{ z@W#0_VbCY8lXN*i&#sf*39LsR)1BfSZIaMO!B*~xFG(}Zb6*G%@IfwKEp9Cw*nTx~ zZoXOD+lbBzg^q&wsZt}~%^^+KPQPYGgR<#$Kw(n5wAu)WGjd8j>r%x^;bB}()K53Vl2gd_)9}i(9a-G z8)Yz|`I>VNFI5G*?*mL$x!O_X!0uflK*BzgC*#8$z^Toj z$M~L9W`JSrsns~6Kv=`*B^3xD%Fj!v;hEwOeJvguoJ%nE%Sd`}mTgK3y=iBvOPcI) zO;F({Zx9u)f_orY!X06_s_D{#Lu@%rEu(p7XIeY1I0*GynZi%MreDV>5tY5#~6hi4aJ z>FAi73qAk2iMHrjzrQ)cv9LJnTt8j=LxX@A*Pg~-t{(`#Mui-ZvPgMlZC`!zDrnm_ zb21|&I2TQQjjtW89qoY~!nvb0WR<|oM<4G&`WW}|$vkm}WSvg&Vw{(kSAsupRn608 zB$2aT;Cj?|6CDt+ zBbk^I>g!$^afad$k1Ttnf*rX>>WIGKC(fm|EI-)^AEX|PGMSN2|M1u6P1+Qq1b+GM z^1AJfcall6(Hk|`#If#(g!zFBtrfZL-jq;C8{gh6?P{MxXn4TCm~g?PV&#xw5QFA8 zJyA8vFLN z82K0uxtlm>VA+NGFl2witu}&(g9FMD%8_#psU@dC8r~8C_Bb?R)Wnz=OZw$XL%$Rp z=+Fu;f4q16Fwa2mjRtn%_|3aH;M<0pR8log7V1&)*{6pg%1l2`fd~>DI|f)FaDnQ# zfR`~^1t1nniI5zgdne{wj1#mWV!SiMsrZ(*b_SJOoS8X_D36jYfrocJ{zBs|+gYnd zC+-l1qKTqzi6K-lwVC%=URQCh_Y%m$w!fKSvUgN_KHhyb?z-axHpgC5Jx|FC!kXB7 zeRDO16f^cHr3b&St&GE9R2c#Y_oF&Nr4k=R#InfD?5!=B0^UuBbcqDN~UY&PeO4l z{TPk8*cz)QpAXXPjJAF{I1{9EqDuXc32izMbrvvIw^{ena@Dfql?i1|U-^~qN;o@| zCmv3c4NeEMMi|K{@N(O@$s{>BCvQziFYAG*0~1^0Fq!ES9IxGTsnV>VN|lDWAR-lI z%NUoKZua%ancNx7F5$R7Sdt0q%^{r*M3C1b;T3fTCR6{_lwT-W`oA3Y|EgYWv!M-y z@Z(9m1aXJ%@<(^*>hia0UOed9UVeR%yMF!>oNoM|UI0A`>7$YTXNSCdFn98K>i(aC z%=Xt23`xjKF<<}eY=zh_w(X8Epx3J#lraSTQJcck4Srg4ca8iwRE}(Q`ml2^aqm@; z@4zjiBr~FD7S3U zQ*)c%G%O?>skkY+VbH4L@>)WKckJ68yAKjGe@jh}l~daS*i|u;GB5-(pFdlt`6{c0 z2Clvv*)lz8uAUKjg={&axh7S$DCUo-w47)e`FKJ6f!d+Vu@;_DVFInRU=l5+8`;Jf zyAQPLHH<8pMN6wZl)j{!J#!H_I*ahM&Kv9hQ*P3MdjF3Qx1cp;D{2f|?O|=6sI**H zqRAUW9b#(|u47&}Yyh?Dp@Bpn_%oWCM2H(EIzq!txgj5bhQp95C}~*$)lC-AMNlfe zt5TVK-E_uaD!z+ZMEaWURt7wygj#fPeD${mG1m+ zZ_yWJ=2fM0C5<1KF$b_Vv7y8JMDx|7tC8g+Xy-Dx#(dK_rJd89w5duJ)b`ZY-j>2M zb6IP`lSWBd<%MmQx?#q$)u)0^w)L-&`?Mqv^$!U=!n7k1q@#j>PsF1<#5jcCrS0+r z#(a)V(a=|YwG8o&_NALo4*e^ZBVjvmtPxxvRot&iB`{U!9KIWEZ9B;Vddo@9kd+<@ z+YdFUSR8&fanl56qX9wOfOIEA89^NuTTZC3VorA6qW6}czdOtR;2h%@HX9S0=hWE~ zabBBF!Whi&bSXq}n>0$E6$;R`Lhu9uG&p2iFYvg>cf=__zAvgSyNee_a+pP{%IozK zO!brzN&vh?#d*%PXBE~XC`_~ZNN8gSyH%{eZ;54(9PM?I4GaHE)A>)5`X7LDLe71Q zGu1`Gs6#rIb9YI0ZgCfDorUGt66->7{cP6hgbO1t3)y0ocPICQ%jd~yU`gfrV%VGO z#fL2s7FkEc#(U8urUxWJ06(@TG^Hj#zx{#t>b)ZP<5IkKN?YyW?|d?IOF^AN;`&Ls`y`|63m;eSK8c|bvj<{&%vyfMQJ3N zNx&YQB^+6RtfUfJDp!?)zGPrX}`-JWN$$`s%T zbF@xnPu&%KRXB>GS_9*XFX2q!EYZr|js$<2RgsZuOvudFmWuLItW=kRErHwd+doMd z?0L50L@AjRf0$rHQ5rZsKUPZ_bx!W{$b=S_)|{U!4vY2q6)*eUzF~F8oyYzoFWRjR zpR%4BEse6IeW$*2lyaF{79*TnFPzQvBvx`8zzu0^W3FK@J)bF&PaJF+WBfohYgLlcuU$fzwLGQ#3X z5gIh(FL3jWdDCar?xxkt_J6ohodVg5%{wSFxFkp#P9wptXqihy9up;1G=Vgo&Un2% zSej!|h&eC|@De~Vk&q(zV|7E}SpGJ$A*zhHEuVnV*Pw!XCdFN4JLKqtBLbi-k2{U? zTS`|!isC)*z)v8>tmag1mZAiE8r!;_+FfmSQ zo5}!-o_@QT$})#t`&&Gw`ir))SZuy=2Bml&$wasVrdmowPb!8XUB4HuMNYRcK`ga$ zYEcZxrh^lQ;DvU#g({7`7dW?sgKTu0UXCzbi54f(o8{w5;JDkDPYMV{mNRbbWUN90 z{}|#JsXw{ovTh8C5t5J~-`ik2)8eUsN}R=|?HUidzfz?hCH+y;I{I?8VYWFXe|ujM zP3|d#kv7eChc5~}hUwhjX>5l75vsXU;mQp83CZ{FTTqen>+>pRlxpvxqP+6(eeAOy z)zAOU^gg)24aE|J->7dzjpLM!1Z_J(Ud4 z(0k_HR^L*a*uBXSEZ0mAv0CQz3q3L=Ad4b{wc~6f+0$L*Au{-Pj{9nJGxMIJ#6g0t zg5HAqqN*w<-5>Ct(XHoc1|3a0A zPBMk&vG6M6zkx^p`uNF*PBe&(K3WE1@mhcgs1U2u%|H5aH&?a_`PrXM|QaHJ6T<132Nr*uhi3R8fLzPl5=-b-GIy#M(mS~Tb ziI+N;1NWgNB-^o{<&nCu=bH5%?xcjPO;=tj#sXa2W&#JSWLxZ`ggh0R$1kePj3ZU`%`lm#ziJng$1)5Tc2btmH8FVYuD5Ag%&P6%?kav@$CG5 zh||BXY0P&aDZB8SHSHXG3Ev!IszIoVTryVN22_HV)m2F@3V+?_uni4ohS6%LnjCHm}LwDs5S@#odn7 z44u%`FprE%({>U)nL3sixLfNHCKz$jl};SqN4ck%-dF0|Z z2lv5#4wP}6s~EXAJzcnO1(-rrn!LDt{j~iU>Lyr7N9NCA8Y%Rc_|Knn%E!s$#(bv& z8<^ORIJK_Hzp(h?X>D;t?D@`ST*Mimg=q)xfeOMU$Df?dsi_v0G(-Z;B-KLL=~czO zsBy9gvcyXjn-az zDxS4v)w--IAK7OL?T$U0xM2uJJL+0UWt0&=qr-h8-bf@YD0Oj%|7p=>?cMJv z$t4$3=bUMDFu;M}a%G=koqIcbGEAc(-0`;@-ibLoP^G$|hz@%x`OJMZ`{%tSJ*s{q zlcKFz4^Y60$sv%THkL39C_bj8?Y!&rs5=|gPVd$2P9`sBE%(vr&Pz2PqF8Xr_FT$6 z)9?&?dmEZ18;8RNLA5?F*zJ;DwV)9|G?o;d((+ZwJGvrZB@41!gPWY40_iaRS?#YRaV{4DizVv%7kGuWv$0e)1 zy)UGjHOXheKED=Q{uWEb?g0fKhd`_Kgv~=htg+nu>~LO++tr}t^Y3SoH}X|UlS~dA=?dJrIkX?$+#Kw9UiQ!^$IBHX zuz&o|n#>E*V~M4E!DAof3#Xi1Kflp>FNQapPQ_nUYG7U)?QcWVI#Y;1*V%sf?Q17J_JYa@`F(50;xM~Ki3X5`f{TVScx|waB4+suZMVhOquoN-X zDuC>%xZ`4(X;ib0RGXCC-X#gKva#6WJ+O|i%mJmN6?*v4sWrE%E+%#+t35c&m>aA+ z)-VwYiPR^~&IKd>OU~e*wEQVm&QF?nRr8i3dg2||DO-U*Y4iYZi5bK=-sIN3W!R)+ z&q*tLIX6v`2N>V+lftg(f#2XV(60*LqlIHV5P5OFlnONTlL)9CO2d)rNvQo$TbM#y z@-5j4u&{@`WV^m|zh5Z5&q6-MJtQ3f7NJ~K#E>$_Voq%m2X_Uskb1w}?dgfKVg-+L z!Q+){Pj&22;F`HW2hJM(eFh*g*@f(Sb5zWW=-mtb1be!2Ca$$QQoUGCaaxv0r(<3v zy+x*hXxbGL*;vCB%GI~XXRrX5xpswwn{R^MhEx@?xHd?a(k{|W#lK1xrHBNV7Y6UA zNhQw-_~86N$lC#CH}q#jnbrn{8G6D=Sen=enDq25vQ!Sl!o-GBPbgL3Dy)H>MP4IE z?6i?;n%y0vs;mJ_g-Bd-K8|wrKH!}=A`bA+5&1bPo zgo=#7_LV=jPGnjaeXqPBR6^+y{6F%bf0sLtacV*w5TJqELEiksW zWLGNqGUOPDgo;kQQm>A=3OkAxoCc#EmgyTmF!S$NAgD z1G$C!{eH&X;hf@7FbtC&vQ2TJ6NQ!msEFL5P=$)0L44&&b2rhoC>t>wVJSiq4&MCh zh62MT4d~B}^hA_Wm}6Kq=R3z$EQBtJcjwC+1rxf(G2}`~z{vuG6vO*$Lc(^OYRu(u zt@)9;@2ZrmZ#>mE4njQHQPxp;&24LW+fJVtVPU?v^Dd1(zj(am4;p?^g$jfS^mPUI z|JiDlV!$N+)H3Fbi*EF8DkWxIb7$U}Asz@I0b@t|q=hMKsrhZFounMQ862n4jnOX? z+dd6pync1^S1OFBmv4oQoGX&60{r8ABonp%r+Z#_j+b+i%Uge)X)ns{$UYbirw6K} z+{QcsEw9RM>F@u`ELaQ}5P~xGwu#w~R^8KCcIW)fv{vEFi1K2$P5TBY0;qECdO;U4 za1|7pD0*tPF}*~vI7i+`fR!uGuH~LEqrE@QxQTEoLC+6r)ld@WA?V!w$NZ?1Nk(#p zHmYF7_ecmY;cX8<5OO?XD6y>jbW)&)95t=!)%G;msvi9y@l8TPo`v;q3}ei&_BrMr z$GnarZCtWL7SUzWcG6tmsFs*}tDf(75Ncw44vVFI&fP1I>8mN&-dcdrYtdWp)A9qT zAmww9ShYbkE(8c-1VPGh`zGx=q^*r9nCUybdRw5EkX~rsXP&SQ>>k$^v5!F=BQ=vE z@DW->8qL%GH0$XtCxHk=Jv~NG&qaygmg@YlLDokrl0xC@-b@Jz?1rd9AqTa=WSQa* zp&bpaXQ9Za%gj;F7<6n-Om*e6?rNB&pmXcl=Lk}F>nk@-9&0+;Si|h|)(mX*Ep(x={sP3i8JdOF_BI1lK+8c1L5L&~e7(G=iR5ZDU z6lb7I6k|Agw^r+wqwof&@ll1gQ}2#ekf=^d0{O(EI(GGtzU-y!ng z_4>YvSS87&j2AC3DlgW@Xxfc>=QN=QYWCFEL6v?W7gGo8a@P8PO6h-L#=}u;#02%@HO|A`QLk5o++(WezX9ckRT|*rfQuo4 zcr8NiUAqOu4_#Mrm5jtH-0Ar2^!;-rxBq9nG0|!*x&2`N5Ayd&fsBp3{_N<8*pW@S z-`8%}#bqNM;pb6krIX^oSUL3zx#(LtUY$^Hq~MRqvCL7qshmO|yy|>tVv`eX{^#%p ze?A>nNcjGT*UN6Uzt&tYxsHu9H__WiFxd-6=06yfavp3QYOcUZy-5)AU{~Dw_?PVS zjn&jM(JqR7%m|nFTBQJ(Hna4|N0Cyaf~mUq=$T|BamRj80y(3j(N zO;MORA>0f~(Bz`*{kV^xJ4*S3KIYER7QG`gH5z0tBw+r3yxQ~EK0C<(jyGbx1mx0|A-u9x|xY{e^ zv(A}3$(dvR+5y?2&iUGt?8dGAW|l~CpLxt`AeI4|)T6fE>Xd+Rwn-`JPV4B{lbQ~d zUI&7V1>k1Yz}ou*lg`^0k0|(9wMu*vwHulR2pLfo_m!3mQ^o-PJfr9F`4#$_T88{a zxQ*TYS0Eowjrqna?MsTXsH^yVXm`ReQ(2iIm@-vc^poAW>Uk0sYZ9BmxjI74_FV9% zA1P)!5djDzM0N|tR)mQLlmcVy_GqQm=xx0JS6I6v=KIOm(bdQN-0!Xk}_wT9|yN`6GYm#DMy!@7m?s2f^SB@<7>^ z+4!o7MlIRA9^BZpsE!)mK(@Yh|9SHxbr{Rb>AH(!L{Oryo2pMRzJDTsuFuY6k=RZS z5z0Up<}{*CiqrHvlC2@)4Pf?@v3GZGU{h}XU*zm?w_J6y)iNDF#GSsyZ9o}5Kj73K z{GL64xRpCps0l^L|0u>?hX7?8M!ze59#XIF0r(o@PWz{GYE3LCpiAP%{8%OE4EJQ~jI#(pdgu^oz!tLsphp)x~;8 z5ki|%lnTm_^@J9+;M;Jv#ii!poT}eGd%)B8qTJ(Oa!>Vs1ObP=C*K zPebXd)u~%){#9wD#5^MoxhEstbBR)KO*=W{4c-1&w-4j~-8xk=wMf*LZ$jS#GtKJw z^@|?aPIaP$J@sOMI71{eFOF+*&mF(eliU1 z$w;CGkkoknp>lMbJrf90`ir+ozkaTXoeCOb_j6*iOlKfBKKESV7K9R)f_V~o>*evt z%u3%E+{RU-wqj|t{cis9QnJdgy;YM62op=+HNZ6 z+U{c8V)lN&i@yiISRy=xeSKIz$UeE`yJ452MMwWG#TtJcgKtkeC;CNpKvWI(`zb6| zCAyGG3&t+;x;2)RdVt2Rd0NU+2wv9aFy3Hy5ZA<0VSrqo9IT5>qNCO!HQ-mKi99e2SDB@Sl{wi*(nq4~~%W@+;!9QEvkTOeFCZN3IhV z3f?75dCsWYmf+Olc=@l&WC&&AP9f%*MJ9kF#Vl+A0{j=RmPVT0RXpMxzzp3dtP){_ zm;LL4HnX|!xxe8_<#u@)_p*>B(f*A7741H*HOF65V!;n7>YboEf_c?g^sggEjUd@T zlkz(k!k6TzMwoK*QqJC+HNW|0_o&y57N3kuB0u>^Gv$w-=;jfx+>kKnY{()HQ5((g zqCauG*GZ7b@bMf`D5=rIi;ZBmV7bXA4$JI$Mr87qq3NuH{yvEM{!H{{%Y3{BcFM}@mqR&CDPHafy;{5n?u?7pN~|R)Pmn1a5Qjmt6XUh(>$-Ff-vVbB)L&XuIw!*jS?jiRG%*|8%-1(|r(S^{!K6%VFT*!b zz+uuDc3`FU+9YoJmix?$-*WHvZTOpDzRsbr90N|hhJmBeXH`BO34v5G<6;wksTgufn%sg0YG2<8pXX`5cpQmRQOx(kMV99rxr$P z7ib6EvV-&Wts?ep<%kP#T2QD;C=p0M*5G3B}FNo0i_sQ!|zvF5Uxpz-(UjaA6eFX zE_|KqCrRcS)2_>}|GZ>*a;#1RQeY2|4^S3UrO?+CF(gQiDJp~5r*&zPO`yrB(wESe z5ccFWfP^d2Rg1Wz@hTL9L=fqlk271ZLN|)TYnq}BvpF0@jvGgIO>|kl&UES;r$icw zBF<-CummbkFx8Eaaf?XRq}ebP@t+L|F@=1VnyG;}FqGZTgzJRY@1{E=K|Mr;0#OA0iPm&I z;?@k1qpodJH&cr*a}j;bpfd!4Rw8k;wD z$@NKyNHF8)@!SJ>6W))nHtIZWXlV*p6_sm=rWQ7ufuM>~FX9 zZcA!0yl0Jm>1fmraCM&8@Lq@|Q)tLdvUsT;A!4j6P~McG*_%=gsqk3@^#7b+M*P*9 zTPCQc|K{e|Vm*sbquxYQV^7>X{bIyX^=PiC_9FV#TG?m%5x@Ssj$1s>q_{&6{VMdj z{kQQV+-1N97}rXs)m(a-*29--&iw@QT+NM`g|URLOr+tBs?TbH}Ne+fH@1vY>!2jpfYL8A~-m2V{z=#Oe@*Q;vQiA%E3mNzLvIpxr-mDPX4Lf zZk026Wr%bLn3302LheA5p&&)~lyzEuj{%bzV=-!H@VE;bggqlK2Crm#b{3}`%5|?S zu7%0*Ux7wiBGn={n16(F?IiMfsGUaLxPM0{?o{%X6I6+eNlpvX9`^(0H3?%4NaGDV zf^G3rZ*QeDCHz?uuwiKSvQ;HlBMoULJNmcQ0w=`nG1n_Xryl(!H489Y01QtNWHQrH z9K71I`dqVx4JR98W22}jKK-CqpcIjByOoY!fb}Tkn*W}cH3B(Zn}-DEvGtvfkAcmS z(1%Q!Lb3p>{(b0$O5rvAC_oy)gdmfe(_;spQze55N7hZ*A@Y^4K&mLuX1<9h8OqS* zpL#x;CUt5CA2r5>cWB&pV-JL-ngxKF@PwC>1fmAjpFbyBp+~vdi%v?}`Kbs`ifsxf z6tXqlt9COCCR?Xl&sx7EYm)t1HKtChis@39hTWP5EkL&j7SBWMO~Cc7&Q`f=1eQhcRYe8slryF%S5H>FUBbo&t(`=-}+lxWcTTlG<1P z43w5|W2{%|I3VmG#9(%Tj)pa(rx>rbk*VC5N;7|dvr`(a%%nfxr74#WY2rbyl_+j} zxtw(=i?O^@U(8>}O03#QAMa$F=d6!VG^Kx1sq=SK`Z($IyZo`FjSC2s4dmy_XV>lV0@o`Enz*uJxZz8rGZ)? zs$8*ES4Ez6f`*))jV}E=eC>=64fj-i?=HL@UwGEP3V%l>CdtuWs0e>Tfmf~knSJ0# zPMZ!!y|JCa=?g?*^si=w5Y!U-X(hNzp}nxT%u`(5nT*|@d~(Qn^SO8!_nimq?1p7<0QPhxx$XnOOD*kVPOFj3CjPJngH?~@f;Gx)M}g5Kzcs0%^HjEoN}YBMTPsM z=gcy&jW+RWbnVkI)iM9*H*tS}zx8z#D>tk;0`x1jiS6_Ay<^do1?D#Tp#|lZvZ5&@ zsy2vfpO{q0cMi5bP?+K{Cz2djcE-!@!rpk&_r|Z<&Mvt(ag>TaAGVVlS{;j#CoLVA z-_1&t=Y2e$`(Cq;CaMTt9cz}UDy+8gJ!iW@)4D=PiE8(n4@o;GRviA#GjPvsq3_pM zaP}OwUU_XPw%1#4rO?BQg19mk9@peI3FncQx%TOj+?VblVI|X2Y^@rc+Qq$yc*We97^_y` zPJtiw*0e}LXRIvKx1|vUO@S=+t&A-2K`ZE#uqP@n{dXWxbq5LxwC^uzuWyGJ{tX4F zb0^;m(4%CVvG>QPH}ADhTMX9ObRJj?(uT|yUg6mnj$oSM7(-1WXS-itupu6bZgD!? z!udKAlEF*^zuEO!w#HUFlZ_q^tv)|`B(`1t3M5y5ni~?AQ@&`ok?DXjK&Cl1D~hrV zoPJQ-sgiMh9B~jINY}Hr^w^+=mxIu82R|_@qLsvxSM0S=lI&X4ve3cVkRPsdNX09O zH0-a*+}cM|GXUHPgS}XqWK$(OC6!1rx2zHX%V-LY!J%d#QW`g}gkZcX^EUXovKmdd z%BISC-#M&2S3)etH-_px=eO=9>t$2+_ua=)g2dA5!q32@>?}l@Rgu-ykBt}{DMYmn znYdMs$g#w2jJV1$;HH(DutMn0oVq|+mIK!KwSaC)#0aUEFg^YY@BpwU$`A)v5yHfi z-o@sX6z9vN(d7{*9Odq8GCkB_&7E=V0+e7Uc##0r^|n0RaCWTm?{GDk0OW$?j%Jr+mUI?dFS{L6!Bt3y^C^XS z(5V{BnU+QkTxhCY2ld&@JiP`B{!>`qQ{UVy^mRT2vI==JHskxSuTqAv3*%BpO9L_J z&OP0TUNT%fz@Yn4v4heo?aRGiuzs#1k_Fb#h!CtcBOpWPZ`L~fe#*oOwy@y3j*+OU zqcE+>x+Uy#CC-)9G0u~7H@kKXdO>;iK}>IsK(O5wl}?`BmSE2JWZb#Kxx2`}x4ryv ziA+~r{x1@{h!iIE=1BvYI1Q4^ai$oW>6;}{`a%AE|240t36{rZJ+5l+IXPk&p*qLH zEYYfLekihP;X0{iPEvPHk971G&b97nK`dHDi{$elCt^rGt6ucl*FGv0uB-cJA|KFCyV6z7z&U zV&%vbCSF{^UI_mncDIsh4j0qYIsU$@J;Kf|KhhBzOIkkKH^RE=A1LPCXW@Fl&Y14sUW4JaYz5*l!T0ay8O@g@HQ^fo;!;HF1 z;@Jx$aBbv`8K?7&o|+aEZJWvwKPX?HofSrZ&_CC0zsEm2(pATO&UT(&%EqM6RZC(d zTeelH3OupVJ(i@6O+jB18Lk{68^$S0;1br#0%&n)!OBmWuhq4b*x#qwSnChYt9kX- zIqn@1&Zu}Yw~$$!w1Mw?48;8A1Js?;`wEAatU+|>tPo_E4@n?NAR6WjbVyxN>4J z39TWONz;w(CmZ~!{xEOS%P}jiO0A9&fhsVtF$~5-y(DUW-N=w*6!a zjbUL$aQgbu3`30WYGCRGHTUE>YfohPxaJqO+euGf$=%f6WXj$MRpakQstT^I)~b~o z`WZGA_ITxNyp+Yigeq;?cabyUX!;f1TeGq)c)h=6UMF)#<9dHlDmAP4JUcQ0bD%?Y z2Woa^Wa|kqlhhpHdh8jRvSGP5fsFs8hT*bH;opvH!rSz$5K?5v-u@yva1fOeAP=Xv zw-56Shwmt3aRSYc6(zAjH6duVAl1q2x!XfB8WD6ryEJA##x8?#oB^U$TElBi(k|Oa1FB-Ac`i9Okrc2T+=8 zbT4@Z+!rXzogC9csyGJ$%2J}z15}ET$w2q*%hAwUx%7L1wk7kVS7Y{y+T~Z7oS7=t z>npwL5{}9?S=w|EIc85)fC2=RNy0Be1Y99mdD4Z2?+mhJ*UBrCmd`7pi^f{V{+Y0$a;Cy) zCNvTJoCsYb;hy&BMl5XYX;?$2S7!t9_pWxFL=Ay;j{3ht4U7Q>N`9g_d?|b|b1F-; zw~IN#IJp_(w3PI$p6s}e=Sw$BlD}w{tMo9dMV@$Z$=PX~(`@s_^3~#=Muc>Qh_8}E z6{^dO%E^7JsqZjz(TSRc7MsUL$L5=Z?GivSq%Yt?1BUB>OPPryGrNVuIRZCv$sG1U zK9pjym198`IX;tsG20uwih!$>GEaTc%_vAzVvLiOI@L|7Iw{0re@na)^K42!o>0f8 zl)PNrxVV>qKw-)xpUj>+fK@R`pf?4@ZFQ}3#WUN)ApBlBX+iq-i(2&<@5{+7_)mGl zH;Se#)B|Bmc1&OrT&0HH>u0^;Z)n6(Ip@FQrV@dBxW1+>>wg-?5q<}YpMCaD=Rcd6 z+SXTNs+forbR;px$uvamW>1GA-Xf+|F0-W7<)_nC?tGtm^ z4!qaWmuHLib*>-ktlJLVp#e(rW?aLdac;G#mrX_-SsGYYIxoE&$s9P$dKweYaCyH+ z0L!pg$dd~+#tmI6+$x$CT_doZizG9HoOq`q>Wc_fb7wW3|Bx(1(2j~B#B#(E0yns5 zj|_uU*911;6<-xrv9{4<G{qWnk7WCuo# z=xw7;U#0X+X|Fu5&CehW@@>VYm(|XgZ(r6JkQ?&+jqgaB?rm2qtquuQWSo^ny~>-j zlZUgOn#X2z1@ez-lV+qLzP7`{$oN~?Ed^RR=QEr%Ipec$@Wamp9%n zeU0?%KVl8@T$E_+IB_703P)Gx$2y4mUeV*}7+*YkhQI>@}ve9hcT;!VYulZL}r_@kFHM$#Sg ztHO3KW}iTwK?`Hw_jfW~Q<>rkVe8&FI+`(9BEfN z&t`!#fAe7^jb15QB0dm0oiocjhCLT<6x`>CgE8;o>HI?LKg4eN59a6MGYM z#c^`gnVg6-axt7&f0Os0Nkxbanw;FPAz%R(qJTQZg$d5-a%_^^^}5rM<+T}j;<}Xx zA%A}q95&kWvamQ|DO=tEdxYirDtRrP(ivM_MZ!Uc<;OF_!QO${DXp%01 z9u1=~uPu70`;(WoBea7ju8U`=YnTp=^&PVV1=Z;f;bTeBk7W5{=W5k)cZiz#(^_8i zHih!`j}Hc)alXFAv&jVRW-5^d%e1hYeiT+;|IDP(R;_W2YNz%ElV>TWm%OINYZcd8 za=A@rmlF)^xBpZ=`FSGPZh@kTxsKGrbBU>w)X#3>_7hszD_Bq|^|dHRV!$zjTx3&y}6GTmKZ2`AK)eVev7# zM}r?k72`k9u&Bq7%c}WLah_JVL!ofUn5GF7*`Eo)#Pu zM)EhCU`2eVB(^!j%FlT2!|{G~>e!}Nz`G9eOuSdVM^{U;I6mU%PS&|$>+|e5iP)&B zuZja*4y&1Ss;=Ha&si$-X6LxeuL#fsSYFxvp&56{^{F=d!xsAqoafL1)M$84RNgwF z?W;b$M0fxZA;FhSPQ#JO6ZT-LFXAx8-FlI@@sXUuHD1$pI%BtDR4m6M$g?r`gUt4u z{@zPsM4(h2Ex}~+;v_D-yPk<@e}i;#kO`v8MK{W1hIs& zyzvxd7SWAohvCD-(0$>4@Fs5xG?v($*ZU==e!Qp)lzh?$M-B|G%#21=mFdt9&>o9f z=l2R8-zoJ%fl8Hcs*}(Z$ECMf;Z9wwCxrwEj=|?hCwW=2CGs0Scs_=~P}O*DAmRPb z3DzAzb{E?U7#lp7fULurWv!)lPBF7qpWb4J++YtAUf@fz*ZH5i9P6(BLALh|*v4Hd z{Uk9Ofl1KH(vGOuD}z*x6e(XYf4q&r=`HBXG?pX#2rq}L`=Vo&^lisY>nE|8p|4 zfZZR_>aKnSLD{ayM%cth!RTlRwv|*~<{FR^yCUzIJlTn4>E&yTJ?T!T&z+Z_J8jbS zRy5|T&VQ%WT#S*yr~kI=r;$_|iZwp^zq7h|nKIibcbWQ1v7mQGgV$0hf41&yGC%dW zlK@L3DkKv}*5_%4{)VG*31r}HLe*JU>rA@g+IL~y;Ucr1u%9ByQmW)7!$Xu@A~d|@ z^3bnQZ$G7eFi669#ihYp{Z=y*N8ZS}XqZ>G{KL4Ks9KYzXbvs$Qgh8YQp6cIr74b9Ol%4YGrB_47zV|eHZ*0ThGY|_W85Wp z%#heLB=26P)f;AqMiQ?*^v}V%dKH-X$Xs=1eEIWTMzI_uuA)WIJMgnqll6$x7$oOo7aW<-#$xz z7%mC*@inh$^+iOHc>eKP@WNkANm2Qxv;UCn$Jo&C5wT*@?NJ4a1zn!(YVdcN_gs;L z;jRP&#pvDMd}_q`ljj!dIUoI%&^>+K54SqhHt=0DlNZ}x$QEpZU)p@ledrs=AJsP@+N@VdkqOz~nWLR~Dc^ z6D=M8{!}9qyXjzx^LU2$cr|g4Bb*+*S3h=vX~b?(v-g4DdK645&2YSiBc0!P@Xtm3 z-xyP452Rc7kQ+cvU2j1BXjewa`E-VX#Ja>5N*lev@R~9mC=Ar+G zNhoeCyQ{{g4GM8E4J7~Zkn1So*vte=76zW25bTfT2${$JsMusKq zu(4E(Ab|P4+l~DP9P>kqx8Cg*Etc*UH6DxZkcB>Ga`Jvo(#|paBTwp=eUZA-Z)aZr zs_Me1h$}DL&3MTMrBr4jaVfwU2EwU2MleL=LiG=5!0+H=IuiQo+tu|3)3v`1nQq$aFBrI7w-*e04R@bGzUGe+rsuIwOKu~`cySd4Wa}R&{{;4L(L`$ zvbx7hKF1RG>=hHLTL4x&;xQrBZ$mQyo)j;pTtzofE)D|GV4~w8lRqB}%{9$p(97sw zYn9zYmT!U-IH#_}*85GA|rVQ{rn=a;nwSvErd8J)}V+ P zU7uirYdw9neZ@B;o8j4TOV_?z!jzaFsZU~VV4IJTBZQlTeUyE)Kc&)z8L}2ZEw~wY z??o#a(8}pRSxHbKS*ckir1i+FxPDPI(j+N=S<;ar)q#V{MJgA8y*S{6 zRKQNRND69XD8)dpvGFs?W2Nn8oe!e=qc$<6oJxC4ZCGs#_leRN*uVlYAN?ow&S&jm ze~}RuN@GxBI3Z)Baf%T{xn40~WJ6xswe~YG#E0SASpa}CuYaP>$2PNpA}D41+xy9H z<28!mQ4fFvKoDt>)WkbC^#;%zk~;ET5JutL81%1SLFeC?c1&C^qx;fc^`dE01D@Xs zIyU%go4J)@?o0E%E~o^Ce2@60+;+BSJNs;C+;T7$Bi@C-Vl4kP7 zw_t*{)IrFmD(DT(LfaP@&n!iCmalOy#v2p?Q@-79egaiYJ2{O*7gckfd+cB4ehPC+ zKcWk=>$Ps`9*JUB&mfy0C{8}g&4G0=j^z8_PYgUyA|;S$Ug$%)nYlEJF2#8HtnsD5 zVg}x^b|!@sZ-`ZUncMO7hptXRcY0Mrh&K%~$C!l1WxUF3ZJa%wBe37I$??sBb`TA! zq6h{kcC-|AaUXn;hH8c?o}_nW=w0@!!DidQAI?lM87yg>+QUjRI}8@9A^`i$#8i^+ zwYTWS2q)#+V;kuo8Jy@1=yY-PxtF)E$dpfO z*yXJ?8?8LgOO)yZu((gAiW1G#dd`(LoPI5FW!)X>?99E8yOS5v=Yqifk zqui121P#jT?`75ejVPV|8!BO3Uu{+{Ja)U$Vbwn?k{kp{r}}H;!+Y}hI&kop1D^)M ztS=HYSk)Q?UDq(T+|U48iLqc5L8&v?E3DH@*H}^807x~I$g9%i2r+87c1-eK2ST@% zY%OaNo|0lID3h9<6_i@@r;MuW>WtuQ27jay#%&rFYpSfbe0Sg)@a@ZR?=jh3GQs;j z3Wn!%rFGvX^!IK|+GFjhpBf69^`X0P>g7-Eo#`B_9@m6H$}!{6-Z=IU{OH&`B_gSc&@e199BQVO0T{ApztoNNqsq zZYq9vis=Lew+PCGg}u=YidPA%X`u#{haQ$C_$FlCMzEYqwv`!mL8#t7vi=UdXTl zGGR)7JQK;VgNY4|+FPuPNY!*^m%=ArP4%ne5F2}13OyuVd?ypXRUUT=-U z2^3uYHDJ!VOykT#77Jj3ezfkG*h-bHZt&t`v|LDfP`bnXL-ZT__UF8N@_}USVF1lt zn4Mg-m)pom-vIuYo3wtw7?MBXE9e<|c5-%c^5c)Cg*SultY%sH)bg#tbZTaDuV=Ep z1wMYw%KVmjIFl(U$6T&7jYbTmz!$OHm8PYmkZdy>9`{fsb>s$Efzxrf!VyOUzma$ z&mH|AuD&`bu4mgC0t^rsG!Wc1I1Em3cMlL`aCaFj!QI`R0Kql5%Rq2<*8ss?KYsVs zyYJp_|J7Ap=Tyy{?%iwm-g~WBhVq*XUH6~!P;m$H-nPGURUtKNDc1`&@TqiYJqq~ z?uq0KeYK+irfTp>mHL22ayjp&b3V1V%{=t?zarhTqcaXmz3dBpf%lj5c5?hbJ8z`M zmgsWKtuh#@5-MnHTqkk;zsY}%&V~9;qMt9>=Zl0PrM?JgkY=B(590 zNEmE76wjDXn3>-4Sv5;Xr$v5P36JU&IhWljZ_7<|u7>*XW04vU6G51H(iebl-}Js` zRwa2+c8B^H@cqpboag_*>&e4WC+WAmdz@ew!V zfQ`l?Sg)M>PR;}7CHZ!-(+ST}$#qY?%m&6yAMCr0RYtJaE+5(L?}pyYu#Q6mSA^=t zDO3Dv`p67=RKi@gYR4`fWmKYqif$*4hl=R;;9oXz7xuY(bC8&M03%0kIq)1vvkaX#p5IUP{#5 za&O6x)k@2PsBmnTSP+?6B{i9BfWLYgQjs=T90SuN(fzML@!mI9j-K)n0<@4MNAMeg zn|(w3iR|YuipW!pSi?^y1W4%wKf5?B4neZzbAs1j3sB)s0Dr(^D=I%uBqfP5xv<3Yv+JY4EIWklRgiGTFyJKR=FisF*6r{)OO?$F3YCTR zP6oDpl()4-S=#eg=QhF$zpn+Bd6wVCCq+x+7G*dYvq-jZyKSl7q}zNeCBIaeMEw%> z;jCm@a`b+mtBu)CCJ-?7F`>mJarEXd@vS z?miRdXADI!uNf;Hi_$cM4NE~iX%>UL0TzNqLaY2m$^Glk4b&Te8;4`VYCcCsn|0h9 z1I-%)+^qvG`>T&tZ)KFLv7_U*0GVnT@6nxo=?=3CPVqDVRx!n?OFZEaoa2{^>K(JJ zAFEnR0Y8K+hJju@_r)SF^r1NbPpY{f1dh@eL$BTw#9!Xf2ZQPtoB`oRv>2Svnl1)= zO^oL6<$(jl%bgpKA8pXPa_y@-x-Wt&_`iRArZyv3ptxzv+Xo8(e<|8PqO}&Jj8WVp zb}VygY=BR33zTQTC5JiN&1}d?L0!vayU0-?t|&#F7|0vs=`tA2!K&FOlDg1*erRx4 z%HrMdvEx3|c}qk)CQN1w{@}o+Y{ULuuq*&*qAU!+%;$ib8wR2V8Ha3#O!jI| zv2wo%QUISmPw+q8Q9fM?y*0XJ1RO?0ylSs7h)SF`JQ=*cF<0Biv^4#%-92tS*twoJ zii<8MJl&gLbUuB2sm3PP{cn3J1`{u#UwfrQvcCtD8tK9Hc3r*HX^jdSJX-aRY|bFT zxwfV5_JA0XT=?X#PTT5ydb$*cLrL$LkC>ED^J?sI0-gj==tm8&)w=3csns;!X7_xgTI1(-Kzn9RDaI+wRo-~WD1Z1DQPoZU2vL1DG-&xule`y zIIG#Ui$jLwHwqUXwVqrvZQ>jSxIb^Vu$}C{*lG6slB02eH#^XTtN~IdT!CnUFZ!v9 z?wu+=Vqv+q6{Rx#Ta%^mpG3nhvf(~3o~5#0t~_kL_~=lw$t8YJS5aC%Dgehe*Dg;& zQO_t@Suu=3dbh0=oWI1}y}O`(InR-<;m7x`JX(ltOkb5DntZm-jQ|mZUc&ri$;_xD z!284J$Cf>g$|bGGZ!~ZXo_+?MlLorFPti4`5P=-JC~vo|U#xQjTAAK%D-w0(bbNL$ z1hoi`QUbB7n6}^b74oWbC2A2E;gZSi3|FR`L!4tIxuJY#Q-h@O=f+0pZ<5Nb+}Yz} z%QV>`n`_#SJqIX@Uf&ddB)}P=QlMp$wUgUljuN}inb(ev^Ig`SGNC~N_%&QQ++#7McuH5 zcJ3)IBLTeQgg8G)fyt zS@Yq$^4_+K0D-OgVZr1y8-y`S2+Jp@$Zt*V_?hMP-4P$+#mA4YD5)Bx4^W>#+W>>_ z&5>{)|7~w&iV?3i>=TzVFO5P4`^z}UCvOMXe_EBOvBNxr@;k5$^;ptxVeo$TeL|{& zh(H_4_q3(8m{HeAShIcI>;2%L6lTfenT`h3**@GT>b9T0_+J^4D37*;?dd*a?b~GL zi@)w)(344p#ZgZf(J*vFRO`m(e#Jchh<;2gH^X6??rCNok@eOb@G~&zwZtotMwu4q zDxV%vGJN_O#jLl+n70B)*2_BBLZ}+H*&T~dpt(F%4PSMrk?!hlug38v_9hsGwahJB z&SVwr;a5sBX#Sb5dp54xd^ZH#=XFuyb&9D)l4`ak&p+UoGklm-J8o8asZ|OeCfg?r zZ^^dIh^*n2w&$u<<^>N99)wT|lTnbX`#%YvQas4IW~$HYKEFYdM7(0BvR_%lJ0MIG z)ky%>7++3R);?QU<_WyoDt|%X%_&5Z^(%UTonP3`tejg?zw$#XO$r{4sq#qb-yUq* zP?gWk4;XY35-NDov7b2P)kb5NYsF|P2U@lJh(`7=(KQH5=Q{7RR)|zL2E1}$W08}# z3re?y%N{tCerR|2ULrr>T&}>lJq!9cI-c|Ig{0ttGf8U+`SJ7WA!1I$BDLoS*4 zs{S17AaMlqwnkI{&$>fR5^8>8mrHNW68_0rxt|lg9spg zwobmP@2;jwjKnvkU6OoAxZ>`MW>7&Q&KclZBzqI0T7d|7B4@T5^N>WJ#`3Zz`Kj+rGXj5xeOIn@>4y=XD1}wRRse44`OTBvAX28BP!AF>vFy zXev{_@qku>XLIYYZM)JVLl&mo^$2Pb!3Mq6UwT*nuzM275im%qnrR^{s2Io3W+g`t z4S|Bi&BQpCLM9y`^>{6?X(p>G!Nf#2JC^H*3rJHZGmXKD=mBs#NI|T-pcOoBNbvmK zp|bG=-g{AEwTUGVdd4Q&9=n;)CC7= zxE$jSAq*d%B3KT0fsNvvjolw}EX*&Tn|l9hemmp=M?U0x{5$42za6|LKH3sWV4JuM z(>Gw~>6(9U>G|*J{~4Um=UO_9D;2>qI8HW4CzTMp&&R9x`TC%!#HaAJd9I&1 zF}78pR~W{0v0>L4YdO9dr&{}{6R>wDlVkYHvf8|%1_PaA7JU4d>|BgvQLiq+4J2=> z~k_-W@@BZEqh|IZF>vGwZE#{ab+k{%a zXdoR#q#IJ1=a&Q`cUc~SzFJtD=WWiUBEhzH!&(ra7Eewf5mXbjEf9SvRiO!d6I6v4 zR9@zBue~f>9%G7{klhQY7qMpe#p!x}+tV>YR#Tp;* zG}1_70x30uV=;DAwo6tV+qdWi5J})-;2@h&{|fHoJS)7@P|e>5f)f{k-u~>ad zB>^bf-v)K?h;Z<`siyo3Na?tP1E(!iw#B%MVMn`0&-QW;Q{p)J1ljMS-KRHr0bT){ zr3d(FvP8stvTEkeFw6N0f-zyhAk&Y$L|@7MNH00Tlg~Ka++Tx z3rt91#e`L@lmvDvW)Ti88agZ5BiiW!`(CZ}y_3yCghh(K@%;+}2qX)B`f=CucnW*= zun$U8xZQXg4;!%Ji(mJt)7DiEp-qNp6%2#`waOixrP??5j_GeuSK>wRGcy7py?i(d zeW*X^yZDIms7+%fh~_YTsmRgaHSTc_io2^|UiyK^Xk zgMsc7Cu`aFYj4dyKikK@nE!?b{|tP9RD$`At43f&GMf?9-b={j$-3N>6~0}D6Cz40`_FpF>go>{O~)!zoXDC| z`M#yOu}bZ_LX3O6h<+q+rI-#zN+Z^#TYdqK2YW#aHY*HA&-YTloM_tWIA6>?JD}zS z`vvd)K{ySL#Y&b-To45QP^LQe<9gFW6tW=K*^5LI9`z;g>+f;`^U+BdP}dPJu)T>e z^8X+kr{61k&=RGG%CWkxsb$;}jKgTJ|d=Um}>O0{lE|i}QpnvKyT;jt<&l zF0)_u*7D0m*Dp&1OD@GViW2WuvVVTEpJUlpoYT|vpT9W3Ka>+I}CFIV& z2Tv2LT2S!;_|RGquYxfXy+NXgPCiPiU^r^nk#p|lbH;)B+@h4PX#j~GxJIEeY*s0N5#BbEj? zDgsUtn1DfCf(lDU$2V!>!EA#DH)JFz5~n}hh#WqIl<4@lt!9})0Y!?7os`ZET-{V{ z4z-6Idq77V*9MJq;!@mPyrBcZreNUHUMz4;FXJ5oK5NecTs9m<5WNGj%O@R8wW*lC z+pb?-ruEO!?E)LLhJR9B9jc_?eG|5ewQtIlh|1tYPqN{wn0K%Ou|fe_G?oDcr{Z>| zPl?Wwl&ozOvi17WxRAo=qG2VFX!o0gGCnggZ|tt_N(Ow(W6J91i-?p6pzhB%QE^h+ z9_v}fJ?Yq*`va1%1`EtATxiGTi;B;Yy>U{?+-~e4a?9y|HEQf;JwF~e+FaOVlrml< z{0=`Fxz;{q#JI{@c4RYoAf=~vslP{vo0tV-0-R*E=0Q8zp_qdEiUx`l@UJ-~dL&#v z(U@ly%^Kv#uMVE<(Lywtai0L{3%MV|A_ne<&SJRWj{ z{|%3lI!Qf}6+JnvJ=JVl5M;>V!NXNPuzZp;pzDMBVmsYMs-fJ;=+OD4rRmPvsa4}D ze13iyz*N{oz7T%-p}%EJzusZTsx1P*mcz_ZE$_A{fhN=BiLQE5K^D@Ac9t z@A>*^WALRQ;uwO}r@D~)N=yQ#d3+H{#4U@vo>nwPFX=EB1YTPma}&>y{7Tn~oh0%J zBw+x1g(ds$XS7HQ%1b;N^i$2D5Xwt+wf4*-p+!!&}qaM_T zDmBZf*ZURbqsni?Oq8mZYVLEu&5~mEvSy4+WWN)K!*-IQ89_*H8t*ve8htlC5i9&2 zWwZn%*gRHSRG4EEZM2j*nr>xYnce1B@4bufH=U~8h&NlOH81oltTEqb`nYy6md?`VNJ}|n9J{P&mvu5!+f}+F zS-S+L#Ao9%s_utD>4`TSstI*e^5pg`UokfA1NXfvI8Q(Oyy4YjjTaK>hpmIB6U&SMBoef%j%Bw)+lwxL4mN7>5uBu=-T}?^pY<90nDhA~Bngb2mIntC59o<~30#gmQoml(jB> zjiL#-V<9=M;#BZec-iD^v*|wd7kS0hJkW<&p7&z1J%B@gi(aJTT=-oz z;hfHlmyzApXurI-2f6*TzPB3Q^4p<%+Ly~}0yW=6rOb!A-=c=c zo>6}8uVo4iwS|}QQF>9I?caWT#0GwM9H-q4O)|-fyny$FSCn5q?bn{8&H8x&6YyuT&2HXhy{aoac>q<6&-SO|A83&|Ne%Mz=y zd7}kWHQveU7QqXQq>hvd9SKC$amM8?=WXY1fvgbm2bK>+l#C#BAa$a&qF{KUTp&>d z)F2=lxv)_M`?Wel?^U8Q2n=c`r^zCTVhbJu-|8_QVuXSBEB{A2xx~I(yLm1gJH!$#A26lL90H@#H8+9v(x52-8 zgk=udV#~}z8$KOmHXGPUqlq{CC|&V%JG30~JS*{$pu@*c1;%dHrOD6SYs4WQdSupN zjAMaP#&SliznyS`39!yQMs$m`+c}%^%u>~cN>OL!%%6d^;pNgu|DeE5r3)|G4(1s= zBwa+5^*BIL&^Iq98zrN4d@uh9yRcgm^5({e$VJM<#CrL3am3?MOT#|cc22ufiv4K1 zJfCoOL;MnaCPD>{x!75{pPL|Dq64c`014diTY83a&;}mGsNc4K`uyO)L6TYETC^7X zeK0@_2j7DF{L)#RBU+6>ss|URgl?ec-AcQo^F9~QK`Pu9{;PT*!k5~LErqR&-a{jW zn&7$UQL+pi|Wv8fJ1 z$;9?qMqkE%Fp9{}h--zM5mg8L*!2r;8EwHWl=ucAI{GilFP5uk@G4>>rTZ}~L1eNC zzg6ES>Cec^p@LnJg{XznTJP~p?GmvcSotabxnEv}i{>30_WR*53-xlI^r~e>ER+4P zzL!h8QoZJ0+C@L1Pq+N#9BBRh{RSDXLC}IV z91fq>+kg9rt(bArW4X)zbv)$1{GQ*K3c4EYExdX{9|D%%9{Dfxp^{}pJR^T$@^GrK zIGOOcEA~DN+4FpkRXL(=Wd5W4{WowlArsp9YMtcsMC3HLw{o$(q!HgoUpjXC%@mvN zNH}cUNH#wb-Oxt@hHmpIWYFjRVX(oK}iG!0+LQd00nLaw=D4 zMoQZcHIX=~>4U3uQvpS^m3^08QU$>3DgLQ_Jy?+vGZ+%Mf#@gUB^jYX0VF~bq8$)0 z(b4yJ0v0IAEMDy{2T5Ub3ECn%sA+xK3IBZSfe$CRoA`61EyfkSEXF$(pgynEaa&QR zr>t7!`>8nl$0Dsz8iIUS{2{i8ZN`w0h5}x)=20gbD(jJ<wguVeIU$a4ztV-81p* z#On2$aw1t)tKy-qK$|t@(rCKaNX-ysSX5cm&_K&{T;I!kQ0ZC@*EcYDHm``hs~W&* z%WO(y5lmEmZbv|;*2^e7Q{a{i$^Ms{hYY?fdAyireWxW~c39ThTh9Jvax8N^)IqzL z@B?um_p!SI7`yNe_jlrak5;~!c%wPo>D63wQ%8ehF+OM&NhfBK{^jkyd%O?y1f zrWp@PG71xEAVBK)_^m0HA6ir3pi5lrJTIzNjCiLNb{=F7-wMdfqJ&5uhX&Ze_A291 zMwtum^k8OvV>~vdj^@_Hhd}Jsnpy)y0(mHSDe%+40m)FJ6k-?uN)!BQo41P27r>#W z=z*O{729-d3ci|RiJvC8zuu@kkdTnD>N$w}+kO&hSJW)Y%3Y5h`pKF$|CI(K(*e;H zd`h)0+p~1@2Yn>$y!1cqBQOD#Q9%_>w%7D0z-xSkQgi|RArJ+CGTTHe4IrxJ8>CFt zf!INO=PYHfWWhiLj7Eco{|)$sp3KzrcgPqUgXkH2#pn#hLgkMoEgkUZ>f*;YA{h;~-OCN~E=CoUC#HXCqUpb+CCfsPQ!Cf)Yr;2J@HQR>Q zkV!lYWH?E_j4;|LARZo=5%Vh5Og?@UwR!sMcKO;G~jb6;mFMN!0*KD_*!7riXlmEBx zUq1@ITD@I!pNjZ9=04LEbdA3iNz5y)*Hj9+{YNzNHasVdk~RdNG`NuO?>It7=Tb%lOdy!gyL85<~yG^q64 zV+E$TzZmoSvcqK_ol5#cu!a9h6>D$kDjhyp-Gkoe{*DT?#AOZC)?6K9NbN+Rb{GpT zvwZ7xAYhfsk%yZdXs}shwayJT?QK)2^BrAbD{I{-N^|dgVaFV~abGXKPhWa5tmLs^ ztS7qip30vL+0^dIY+>(G_pb_N2NEGJ_^DGgT!n)$9An=YP|=j%Rl-jxpE1h}GCB(% zzUj>c`=QstG!$F0-+rlxPj7zJ{Svl4`S{FFDS_wO(}I0@p9(nwVXNPt;)r2!ITsq> zBg$_@_T|3iJY&wwzopNXqt=X|N|d+K@$YXshA+3`as4NvJqmVZhY45thY!meC0p@zh?#=ZjqR??28S8XsN-4aJH*d0nq1`>O17IX zM!Fl_FI?L$xFYA{798v*wQJ?!$r z>qrZtNxXvORos=2>5z2DOw=kQ`+VmoclM3A!quF%&F&t9^;fNyy0zb+>e^|IBB5cN zDYkYpgp5z&UD*y`Jbs3%mFwSGrz=-kc9OS})=`QDe{!P?LbpGRcg7n_i7n&~0AT|y zq`JO??FriiYknC?K?lN#5x*1F|Lp}itiy9z40q>61px24pzk^7`UtHQm&g!JP3HB*p)3AYb8DkAf{dQZ!_o^LW!8mf<7ZwzI?@%x=e z-3;$R8kr$&uz)cRTH86sQY~{dLK4DS*D$6f)9AHkoMfvuo`Cui(Y56RBZC|Tx;(SS zA#aHfK`RSOU;bKvt;nK7)or*C)@zI9#y2V}=fHc!uJVz0!%k@c=C{51sMBYE`+wyO zPvI$i1+)i!7oqdSKco-q5mw!|Fl;kDgmEo$oka5QV8p8?mt!LJ#VvbHezYaiQhENxsg`$`@Ss~cjB zfiFJ0R4q4~66OQ?B|$#SdT9{VuJF zRl1a>g)2IhdIkGkO?pRIAtS`KlPj#eC&A{sW*Y!A$vUe=NCC{f9i4^bZm8rGJ_tci zWM1-%@nizEX0k;&>|3Jk0p8@31hQ<|hAX9Kai+*piHZfV2o!q97j?@4eVi zt2Wu*g?U|9+WhmK3#riRzVmwP#;d_%%-*xeEQ*GQW!WC_6!bet;hi=#8Be}$0c$Sn zs39k28(Wvc9LI(LL7fFwo4BlSM$h$RNpjX=Zr58^9U-Y{!h;(f+*7pJXOU=0-ThA6 zm->QAEwD6mr0{3tyU!9iEF$@P<$`R7Ch5cP79Y2?8xN}*9@c`!XUg-@KoiHsISFQ% zzdtEm3mfx7{`q)MXp0@L`meP{xL3dG813s{IOHA??AJZ`^*8Q%cskAmNDBW+(&RQq zJzf9|%R8zk{xDu0RCCVRnl2hN*|NBNJqs~3=<1kRb{yl@Ih8y@Bi3iXo5p+U#e0(J zj!EMo))HY!)xS&kgEEY>ZXxN%_>oGIW|5%89IQC?yBO~e*d>ty5Z8I{2J-9+JB{2N z0xwbbISFG7#RYh`x-nUZwt>WiWgB);A)F_BpgIDYAS*@@_J;(f&e|ge{-_P$hpWx{ z;z@(7oA&M&@KLyFh$*?P;rg4WEZy{F6OzdEYK=%+Pehth$x%hbYam`I zGD=sxo1tD3$ccBUaQr1T#%M2Ss6pyT zbv%vw#naBclzx?6E#?hD;~d!QGqixWBRom6S;Q1!#+m)(wISCb^@}VDH)>bQ3QXZ= zb({qm#MuHkR$&%;Z9${klgayFwBzNlV!#5vZpMTIM-KW@#-w1Y+3#ZIAqS83<}@Z< zJTOWh;3C+8?x-Ia@D(G2=12-z@B%=gxJ2KgsfqE>=EybW+fm{E!58xi|5P}<9;MC} zZ##~#`2G{azFzKJj(b18puFzit>3M`8g3SUeQt7Q3w7UpeGuw6zk}x7h4?)9J=&u; z|5HtMw|sfeg8TNIPu#>Z!@~p4^92f#)bk+Zzk!G2Id>1n4dI(0Y?Ti}n{~pzl93wM zs8kr@ z-M3r>9#6l3^DD1Rk;0{~iX<$50qTfr%H~@vgWX636oIe~r0^fC>0zUsG0=yjngwhnguUTc=?C9Zx}%@6Xs<{w}|^L2CM zwhf(!kzM;vu0or2o}>~>x|(`IiZ~A0Jytp5ob`0@TGwnhGunoacmb@BHc2%SX@eruc{?$d-i!^vWc${7ewflPeb;0tG>d zWpJ-{Zh z_g1!?y51hat^(iN%yxcp&2r4qd(3Qc)t~jrnP$nerYjRa6tSY1?^|Jaj=-5?$Vj!D zsu@yE$`01YA{KR(C@h$huMz)(y~+rVW(>2Hye$+fMH7Bvl8Im@!f?NNzxt# zmxgg<@t3UqM#Dh(DRrEu#25TLiFyeqlqFTm1G%E3bRJD0FGB@=1p`p_w{rQEmy(nc z%V!#dS^quR-$65YQ9bftnt_o|_##XJoLzuHfCl$M?Fep~?J%P}rslCR>EpMt?>cC) zyktG!m72^{$EXap8s168@&yQPx97LVmZG6~*R*sb``;3m<5t@Miv92tc@zB;!z&}r zcI&e^40tPpr;Te154k9$d};R5mXsw*(o43~{od46{96k&mCH7f=yBCnRbxA(G#N}9 zs0h_%QcZM|0;gss|FF6pvBQ6i!MSrT8fW_EDC-pdA^d7%Qj+7L z|1iZ_@mQKTS8uMx2yx{0J-z3w4o~+ZJluRrQ;^9X;wsSM7#Z~}rEB*sgFfYK+J`3I z9W_kDICRGeh!+t9XuKh0wa)UXtcE=iT^DJc8*;ZkSuBG3O*$U$gIJ@wD6wW2t=I{2CsS`wi(}E!#Y=v_)1>c7=i0DNm33mgwTCq)RgWK3p@9f?= zB~l*x3_uMB4b`^j~FOr;-SxG%#vuWN=xF)hG z---eNJbCwHn5H2QT}{?-6xJSOn~WR&as*R2B6GWVTNJVjDi|Ntd$ffGhyXM2lY{el z_HK8J(b9d`iWhgxN$~zO8myixC{2`HlwJ`Y0bcKv*uKw6BJ_o20Z?U$GXd}IsNL`< zM3Y2oME{~_VrhDo`)Z&_`IQE2!;mj~v#w9f37??@z7w$%9l2Lu#tfl5{=RgPkI`g^yz~jiq(9PI<_pQ3elowzltB`>2D7MQJie3%H%$7Qe6z^=fg-8RfjFKcP$eR#F8r{X8CicA*NoVzzx@*f$ zxdZOrkvXK?%>L?I{})Leg3F*9dkqeZ*~Bv#d4lQ>8}fBDtPL zT4Lk{*{t>lmUL69h5hwnZTYijNrpe-sWU&;31dmctFU=u%MLAA1#9GTudp*KFnS8e zEtGdJ2R7ncvr87Gh_lG5q5fG*kDTN~;Mah8UQJIc&clPF!&p>yMfO-hmkQ2FNLr;rxsD-cfQJ3mhw>xC z$=*`R3Qc7q1%Dk;z22HC|3ng5^xjXm$QIN^_}bkwGFDVXki07Yqy=gi#ERGiN`~S; z#woDV;X9&MpB6u9M+JvBsmar8^g4DS$PXbn`Kb$A%Yy>DVaYTG@s-%-ODH~csf-xg zEi}dHc6kL-SBqGq~v9|<#cWH2e7_pn1 z9MhCZy-I!1j6aPq0;B|&qhu#qSw>ZuU;I-E>&rK{yp5`qY2mStYpx3J9!3P_Z`<1) z^K40_F=vOR;|}V5&_R1I3xN~5jCL3CAkV*H&>Ut$WBJ9?!YXfWbG!_tMntd9qji3# z3~#ELvy<299!N`~olAIo7EQ>qH^r4@B+Bv6BE{~o##rQ~k>|ml3BUb}iUxPHQrDYO z^WWa&&07Vgph(i3!XUr1$M8k0P3v{<2}t0eA`XcoM(t*Pw-W4cY)MOZND^4*+9JNl zc2bK*y1s^%m%hPuDlR4@q*-U2e$LiCH;;!EV7hA;K+^mBT5Kh(;VMRGz$tH<3{Ke1 zk~cOlWKhc}>l7&(wFu??bH+u7-Gh718jqfwC)>rd4w;q@<3S&|gen*R@`3Vja;MxIY0K>ZyXW zf^6Ct-@m;1Fs6rd#!Z)@cvPmQ1;A2A;1sGxqmla2+((0ZiDBlKcIEq3ClQ_Ug~sDEQ}VFdD%ZV=1VUs)aY*6mgy11F}DkZrnJ<6$a+H)f!ZE3yb&MDad zZGD%kKwaW5lx?EP=Yy60(?+hOdr5~A1v7@(bOhsHD&Bp|Pfph7(%NFwG3B+^8UZif zdn5a&Qvo zNwNKd>V*V9t7-y7xm@h1ej8e-{^e^RSO7y+hGR5=F({3AA(jGH&2z)^jb zfb0?pdb$=YRhB}EdS)u3La0mFaQr8f(hm+3f_clxVunev-N>4yQVT@f{ogWa;l7C^ zf5r#yzh|4mQmvv=Dl9MISN;<|#bTSfEa@|hwe+LA6ssdM+Pru@@+6xxmXN%0Vh=Cm zhg_mQU6!&$wOjPKj!R`BP%ND!4vSx+3H+wb^qT|?&R043J{4mbUzdl67s~wd&|F>p z0_XU)vGJyPG91u&-1-9&z<9_|;JN)h>395>Tc0Q9HMeUCU2pI6|Mfn@ zgl2b6VV|Et-*;FO>bm|vjKjLQ8%p-^^uccL`wFCh>{IXEF5?yj|N4mkRGF9uhRxku zi*lL6fX0*55cxFz;*~$@Cb^IF`s$%OiIwC^f%w6g&YBi@)NxJEknY&M&(Z%k(dOzH z%rGr@z%LLRq5#rsSB}2+~xxni1=@_re&mDU~h*=CfdGVmNn`lG8~Hc9e46V z7_2lBjDIz;rHV@;=Pw-H1dDNTOiY^tDN-aAGn~3eFG&GSG({3cxGgfX$W|I@#{*!? zdxh@yEz1?lT-8G5a#gv!8+-#{S=UC*p2LR18A7HBYnEc%q42dR=9EVileyjy#i@jL@OM^ zPo^C}9NGTYlmNyxo+at>9y{e9waUdCjK0Tj^kE+4NjN{TZM5kkx-G$z9A9xj=1Vs9 z$QQa~D5>OAlye2o?p_W&nRT)Y!^wv6}OUx~0L)JA9 zpDc_Wk4^u>0dFEO*CL7-W(I5M(_syEf{uXdXL`Ypj40G%S^TaAMxUkEr>~M$7-Ycu zN!r{6^h_S6J4*@Wu@y1ZRTC1ye*}U49KVXGgfjNyW?njItW2w5vLN z3eRim0I}A zYP&ZXuoZ2^1F^sbp@4qNV#^I(6){@BL!q6v5j9r)B^og)Yg#0kk;NFTnupuwGU4jZ zS=u=B{>OkK+@Ak+4N6GbMy2}C8TFE8Rf1l^;v#XWB8Id|nhV)fh@x61kbBQ4_j7ys zfvZFubvwZc^53tCxIgiSkXJA%$4D-vZ{@Fp50Nh;-Sm#>l*z9*Vd6ZIi(%y~WSjUU zu=IW=YUJGnYnpa^Fq13R!)yLPRBT4qJ83ShU^K_qjAN#aAvgHRa5zv7?eh}A3|Nb) z2)D$eCE*G6s@4w?be{0H079|IiTtaLze79b9i56{$;?JRVldYqrJcPolFiX%0+Yw5 zgzi0F=GR}}qkr2!IFsFUM~ZFUYFHotZ0}C#bD8`M{m8?8_x=#u`w8+It0Z*is21z< zzqJ5(1_mSs;E!l)1GZJ)v0pc&^!s6ReXdZv|1jk2r=0T3C*%$x&t`qh_39y(%|n!3 z9cQp>4S=jZGOyJmUrwuk#vDIu5PBubxm(D&Sgg>>-99FdHMg;|aXObsak}5-K1*M= z+v2QhinL}<&)M6&r>J>GQu6*g{Y2XNWSZUdqQ7>1bGmPCmzuy*zm9nXz6SZ+oclmK zpPw4w77J?#N}N-b_MHkNqRjEQ0?7?OO4KvK3MtJ|+5-ehw$)3xqJV9T_R|{A=>R zqQ%<4s&&2NwtIqg(@2m(j1df$`lU2_k4GYxsJ}0|xCvk`KIWd}JnzOGsoLy)j83^h zaiq@Q13b_lwpqk$)8gpU!O#gEyzK@)ek#Wm#5BP~&J`(Im;l2R*k0*g4`^JjeyHT)4m^#Qv7H#0T#;avJ#^qIBKv&RloGplJvVD9SCAYCq?6AYeXyPi4d)h zLEycBI^V!a)PYqJ|6E)Xrat9JN8KnzIR8AXXPT2h%5KH=0WFf=a$*49*j7Nl$q3=r z*pel9#g?p($2tTyKeviwzJ^oOC~%+yzu}yf;z39uu|iycO za>@%1Vc}+b*s~aue%L%V+AzBt9U*|)A;jL zB3B?M0ZfBg#VnIM_MGSkcBCR@PWwuMRTowEc@7rm^&8+~mB5zT9b?Pe(8$l2kZ!@eVybM!2 z-7Fz4=U$|(%npf@xlQ74GcGRkq;Jl~>m!zR1=9i-jJGm13wl;n{;O>TRn;u7?}Z;BD2JoR z*M?0uVx}jv>a%+CZ#6P09j6m4peCo;8M91DlFTv*PZp<0a4K>xzLotF^!Ggjhm%Y- z#*)M_h_wvI9$bTuMsKQC;%xX!PHQ%l>jap)RoTQbPGRV(BFo1 zJqD&4{}esJWK&1gAbkm5$mqXs4{!H~p1IZKJ0#&>eIjO zKHaBqe`=!bdHj3ZeIk_JMsqQAMMO6Hzbc0U=)d`G6f{L{NX=;eVWoMVnKeR93dSBI zg(#EEyNfLwy-jcr&g4keZ=+FZ*^Bg>AkSRCcV$w))}t$@X&g=cKG&MoTwVM27y$== z6nDdxyr?zK?lnGvT%L5R1g+G!7_wgQbdn?e(xYG!H z4)c3Jv(pr)$T7sxU+oZL8Fy27=!tQy8S7ks#Fw>`4U^3^{oFnyd*EF0T0f+{dR-;& zsF7mel)ZSD$I@w))On1!t)5-SH1o(l-MVA=ZThi&hDY&d?mVuCHYoXC@PHZ8gN92dn0#zvnOwvF}E>?G3XpD+70hZw3h3 z9EKE~S=2>5~{_TU^o9A9cx>(ZP-*0SWnn zjqJfvkDpuA*U4srNl#TMOr_uROE1y8i`3A|!Q`rOsE}Zq>F^RqcZ@%qgr?p`R2h zIJ?@olXaGG7v0xG#b=~sVy%cXOn$Ilx8?E z7)UTIs&|AgWdxM}g3rX`0t>H(oX}To@2E{#HX<-9HtzVN7sz>Gu1g!!f?>1Mkt5+I zJY7@A(GPW+7yVz_<3S*<61Q(IsHqCYXZnr88us(fQsXE`G(hnp4=6H1Y06Sot5fO4 z1_$nx$pPaCrAsKe4zGU{Dq>o1xO?3UoYpe?yTU@P`Wv3P|M%!E*`cjs-o zTdP}mb?t(2n~_2sSE!lfAIUDsXZb>!pWp2X>%XdKa3Up=)bGU@mz(H}87SA7F__h; zVaYAl{xJ(w&k@7ShyRoTN3tP|MT2QXMyuKdLezluoWa1x?AZ>y-ecDC=+RfHTIgpPwbCwo42KTFoT6R{ngpp@=x8Jm>ReXg&TnQjG?kJlqgSJswE%`@1soSl-K)5joum2!f> z5d`EQG|4)em45w!Cx65G-isq)N}8>PerN1w^*Ik7=uY@x)tv4`I{yeu z%VU+k)k^E5Q?=Jrj$RMMjf>B_c0ZDf(#;iA+|}cJm^N{qKYciPrc9eL_x8weKoD`~ zzDjlLz6p2M%F;n&neM`$k;ie(+?}UCvG0$s&cU{N!vPEiV?_2{!QAIwPwbyrA%rJX z*CTHC#~--Xz2iXpx~U}&r7M~@R5CAM-(HwE_ZwZ9#?`|0gr4W!ozL#pGWbHl_#UP7<-0|wf9pR2O z74Wm|9rqFZww|oQ>+>k!bGpl7sTG8uE+;v-s>yGz5avF7PuME zd6g>QF|VNMbl%6;(C~M8)$1DK{p=?12+IGR@utH+(tXpD%3a?vpliCtF4q^t@^qHx zll<|rEIe7X+!^J4>B-~&vej}G8U;IGIRAHPVB_IZc+%k&OxyOnHsN_UrFnhmOUqVu zaTNK};b`;H6VCtoz;JJ-$AxW*Chca8!TzY;rFqZte+{B+Jb8a<#T69yd zX)H@-=eY4&%WK1&Ng?`+1eZe(aLSDQ%-$6!Yc&MV@TsG8Pg`AQPx90gcItdoFsL86 zWuO1Q+1@mE#zdq}|G8+Ko zqeou&?xKxnpHBM@#K&~0V}U`yx=dWkGD&#Kc~CaaehH8qsE6!BZa=2w7mqHbQ(0G} zIGZnJhd)qn%8*tN9o)Ey3KffN;T|~3L|(Ko8bx(2l~nmy-3i7ghD+{q6h$Y1SIGsk zUyyz!^l;J`uoh(uoBzV6p>&67)@)S%=M(%$S1(jhAvQ~;g7ZI9pg};3A zx`j^8&JNL=L&>Kk^LXd{#jG?V8reuag@YmDfa#93Lh=Vcy~pGE)35L)e9r197=OJ? zeg$0`CpLA(&UKhSVl>{r8G&US!a~Hv1vmDG{`!57G=?U#n zNidFf@hifjHxvFB?K1i*hukS0qdVQ;feKbO=sIc^Wwzw?5DNY4D7M@qha4xv$PPGD z8O0%?k?r%moRplhqlNo|vn}S!dKBt{0->r=s%fOtUN*^n)-#SS<;R86X$a?-OL(1b ztu^TX;?>9z)cFw^5K)F0hGIYr8)@MzvIg;)`7>HJbkMTx_aqB2HYgeLePS`0(OV2d z-d1tse%J{xnHt3#3EcclcN^VhZIjZug%wy+4zuDX==$?9hz5>9yIc#im+qEHbarKsS>&& z!^y)~8PPUDhqx9@P1AJrF0238tLmxDgEl4;ETE=1iZkZ4zB5Qcaa!!#6w{CH7+D6} ztCKWDddJoE`-+u2rRMU>xTeF^o*Dlqvpetl8VMK z%Bjoo>*mEKYed!KIoD_p5r$BRD+mls(t^w)+Y=SWn5i*11FX*M7Vg-T&K-}TPm}-B z+Dx?)oJ6)4sA1~Iud9kB717aWEEEI(0t>K9|K*%1s^(~i#L#nI*oJdxQk6#T2WC+e zQrlTK4#}4Bm>ps}STXI9Jg*gixC+U_}Kf1{kS3Pj%9c-R~!Nx`aQk9RvNydysy^yq6i9(PYsJW;V1-- z(sW+G_(kWv<9#IhyC7}UdUhr^NNk2$3J=!2HW=<`3O~Yh@5b(F{Ru+-68Jk9m1mdm zTf!mH*}6M|W*10(p6_O@Q*wa ze8;S4s{RNE6uwbMW~=GFLWB$Krd+lPc+GcpzK?t$^gPV?hci61;H{hs?s}722f6G> zZQVZ;x&J*$%JxnkBOOZ$ zihEaUeNOz|3SOg$i&liv?1^#zn0vV;k~Z7HYaVr+x0@a z|Gw=J45-9mHSGq=`GJn!Nk#mZ?>fP~imT4j6D69Pz5m_%r;Vi^kMBQupDhg!pk@f; z?&BZyhfCbwUS7jQfJ6>GyU5tbhDLXjZ?8)=@BXi#QqrBc?njLGJ57;^5s?q)*JY6x z()Xg4?NNp`Q^Ub*Qq*qH?u=iulJCXo#&&n^$GbyU#Ik-<-!NFRr*wA(r2KxX>Oacz zenNHy#;e{hecw{L>UQt)8!`SO%CcMNyL_FhY3BDZVYq{2*b^1!f6CPP6l=^@?WkzFpk4ecf)c= zTAIZ|d;T%Aes&^Q(ne^ZFt^WOvtOFf`lq8N^~Ga5>}C`X9mS`$N0KIj8WP$@9Oj== zKZ7KT;vp&IwCu?IL=Ya;XT0>Lj^XXvNKzA)WzQ~|6iRg4TVPjGd7>Tz(tDPcxkNJZ zW=zhj@hjX%uq&zVa;^e9CppT1nUvc|{IT$DmOmbmBct)Z@s*ZNH7*yw?i-%%n{$Ay3@#R7|h7~aV?dta_b55^FL=VkA5{(-Mm)stMS{j z`o8*)u@V}DhBs6?n6l>9mp+`dx`rh1c4DEykFfg3CGDcJp9Jnf{W<>%qQoy*w0CbH)2@bKJwQid?0A z8Bl)^V?;Gde#0tdWMWbi_|-6|)0fnIQcPIokq4EDH)L}X`6V^Qbw0Om&F-xkD<~~7xm zp#*%t6?WrF*i#s`BF6$rs}kDMKuR-52~qX2wI5mHi@7!t?Q+;0h!KG*KfGL45X45c zXNif7jSy;Hd$9y@EpOZ@=>G6y0}%`$%)1py_i#a zR+e(}bmzb8)wHn9D^+9NBAEV6ZnTxjoxv+3{w~O~;WhsXS82pGm%uAmcoV%8=ni8@ z)A`+MYPMx|=1QH&x0sI@78mk+Un<*}lk5WQMz9>=;q1tCBn$M}+ZMT$Vm56yMDn3ChjgrsqfY0Hwf5d~oV^)#=ZV22*G|T(p$3TzI%g5wQ=e>!2mrNX9 z{qS&(e^%a0>E{^tgKv~2*&99C%MRSrt)Wkr^ocof=bO;$6xoaL)@wyp+uiT1)_=II zPJW%i&Z28@nw@cZA9)v`tq%}iGw#gwSyA3D)a3_>@KamO&1eNg^IP$+!dV1J$;&3+*7BNQe zAoA@o@;tWA^*o~>_1i)60ig+xY3A~dJFkCFo$2g#KGZ|bR+=XX`uCpkDRjem!>8c< zt)|sy8g0`nndRes&(;+w_h~xG=WrhF`V6JTan)yf?BAkg_#bJ$*H^=9X(ET+tqGBt z`LNubB-1Thsd-M8%-LL3lkH*Hb_3TYG2Y&8&qBWULtoi=S$@+GDc+U8LE*F91Sv@$ zsANH%dH2@>9+xxkzUgm-a3Z&|!dH*Vxt=>M9v^UN#0H&d{;xa+LnYZ~1?_O-xPlMX zejne_z#)t38}~~OR|9xFdY;3O-FUN5GvL+jHx$1YD*s8#=kIOT`u<^goqJvv-TNNv zJd+kLtWRB^&%P-w!k{v&q+GwE3Q`)*_{uf1s3sbT(kv}DihmmfQto_k7L z!-qo+&OK!RzXsImZg9#H?Q2tzK)22=3vnQo^wlDwoX^*+|2RkghX?S#hev}!nL<)i z2uq_5X^%LE0fDx92G|kI^&nzwJ7n{v;2nzA4qmZt(y^_C12GB0zf}7IQ_+xto+NWk zI|aFM5l}Q1i~>tT2>iE-e1hibpM9gs zSoNzlWJM@~rqF%IvWGJDvPjJcFPt)R$^L{deK^QV9Vqy*Sy-e9k-TLi{G~x(gUabC z`tZ|=4Ih#|31`!eOaKqw_;_0O*o*?aL4tZJJ4T-3@XL#?z1n%0J^wjr0UJq*IPOqN zf!)2oZiDb;(oK%buo0~Yt(d;`4E`>ptOSG4)j_pwSi$~eS#C>vY}v}t=H$(oLgah- zM5uUugF(kfV(8J&1ub9XR&3=}O}LI{0b$=`-J}l$+g&VykJA);m$A`I(6+SyYZ9^u zXff8K$-SnQlPkPdSI{XUwMRgL2rUNRo+&x zZKZ`k?V5-w*x(1_3Z-w)u4_Nb$dmq*?fGreri4kz4nu{-BjzbX7O)svfxt6RGU6Ms zuKe6S=GK!pFwuX5RD>VgyRZ7uvyf4zh_wDuZvJEUw}Vg8J0qJ4;5?huh*?;@c9~LNfRZoTldB z?yUa?lMor10Ii~;#=%F!$naH&nV^#3CO9RsTuQ_yvKF38-3D^vhd%H-IY+WWG9L;P z^FVgTFjp&YF6I`zu&UgtOwwqhnYAotER^s0KU{|P#DBO5JSDRIlUNO3jk6-mF>!}h z6{k}PrDd^sa2JnlN^l3F`f4YF5bI7gaH_s)aLp|SFUeKN(aX_`llCVb&MvkpP#Y)4 zr}U~^l@xxDdujq=Qn|X(>jAa9Ng0zcmBjLC@pDl8PHHNlbt~BSj^N=+%i_^9VkNY-B zDWdlQ5w8O{RC3q|>^)@W` zP{2ZORwO=dmNc*M1~ozx!{b9hc6p2QSq}<57ot zj;~W!l#cu5mL4a%aeX8r<0(%+^F4ock+E6re4g^z{$mI#aM+%? zd;?2A27QL8xNSc^9tYCx;s1pJk1-6x*;u8Vt7VvrTCE*j-N$>}ZmnJ?NuNbXH{Zt0 z|8Lyb>c84tUiBQRL%r<0fv>o%b|)W^7#Y2|A@x)Yj(b`X{2gZvMX?=pEA#`5FcPq7 zEe0945gu2XKnz#Vf(8z#mgmJdt7tjJUtjs=5PeRe7@zNc5?A1=6`DG7qq7z-BSUJ3 zXWS=QRK*&f8u~LO#|;kFL@haBK{(qz`^zw3R^NK$k`sMI6rSi7s2L5N>xeZ;J+01K z$r`_7QW_vnnIuT1k&lSo7k#G)m4K_}q}nI3v;l>E)V4h4~6PRJntwK_)P9b$CC_?<8G`eVPOF#k}SSEr#wuDK~ z%3IgE3o7Hlw>;}Gu7aGS0Qe*97%TU=PqG~!!y3S-nP}@%VH10cEfsJLlm0JQr%~Ck zEZ~4UzKTLxLnAN1B*5OG=wTis*Nd+QPJ#socy!7l_Qm%IMXqqESG2ix*fRI6_EzM} zoW;1I34uH(bC{@5=91bj9&8JuQ+sEM>=Xp20Q~S>ojSE|pyXlg@>&q8;ALl&-BXkIs;E|V%_+PoE2T%m z@PclN#tUFB%8EFYi&~{0w@>yZhr9YcH1XAdq(%5ylHi41j5?^aZ|)UeDX$2e$;_NB&{sBtjkfzSi`|6sguo;imJaSC{`-EH*UK-3ZHUopiM#gjyJ zr7gtIgR8@;c{hKn9>(7NqOA?a=d@2}L`T#{`g0%7pSk`xxeq%=&;t8_oC;2a0H}y( z2AfH8&Q;dQj9X}MGhJXbNSMHwzPNalyioaK!S-WB~0 z$l*!nyya@BlUYc#(kW;M@%2@(J0yFOVSUs@HSES}8yk4FtCW|3R7~!WT(PC6WF^ad z3oqH5zH1qds%P*`5-$|EN{`zYW5q|a4gia>e3(}PJ-*Tm5!F_t$89r-O29X4o`(G- zLJ1Gf@-UQ<+fNRK&8cg4f`UWc8r+ob>1TS1^ZYkP`#87Ga-&1b z#Bj;{xEyNHRG_^%l^fLJbb5m4c`-zAl*c=`{iM`AY3V-D_tUTq4O$(>PIgLot?7l= z%Y!A%9pZ?+_-(_(2#x1Lw_vlHTrpx{(rWp>_GnnQ_2$@7k(f{BW;pK2@cl%<=j~wz ze6q^W25;BvPwOsU@c2J+y+a6e(fvdw6i^0^ibE-Gwza1c(m;YA8HyESfR^IG4) z`-UBFd3*mnjciZXvU^hh#bIX_W%*IGlguY)*`Q|-i3RWC<1Yy@@7=VzpkEt?QOx=U z|MVN$#!!NYUxFpdp+k3XgObpA&+~ohQxkx8WwHAK`C+STsAM@0g!cZ*mb=-R_Y%7` zpn0+S{l;0ZJC4{jXX41ezvcZ_>G8(Wa4$J~r=UZ4MC5sr!d`#g{rm;mDzOlLvm~n1 z^@6d6U?YE-oA4L(l${`OW>fHWvf=fer+bLZ=L0EwrDr%nz+K_tYVgx~(0*UW_xLOz z`o_&1)%wBsznotw$^So1FRs`Dy@)MMU41~; zk1Ogk`Y$jGl*+0kquwbKeo-;#&HfX8qKsxCWuF}$UCdVIXi$(Ss1Krg<*%$2840Cv zcp)^^Q@yyvK{k5Mx>Lt3+S8EG^^POi_{thO1Cqq2dUmv?`7D$H4v#^3L92O3jK8!` zbiKJ1!`P1t4=-h0BZN+HWfR1x-I*anB+ivzLsyz~&iI?MC0>6e~-RX4S8r7|`f#6gSVD;|zS zELc(w{dd!63LOY_5kB%1g`bGLKsk*=GR$xi8(A=hK`|^!GYF1Bi21gfd=^1pzvw_y z`n3q&h3<3W@<+y;)r5`BRq`yg8A_ior9FkKiQukW6(@Qq)FRjOgNfF*Y6nb-?vH1f zPYrK_qgEQH3*fnITj>utYtKc_x5LSR@tUxfPvy zut6|Ad!xEugaieJnYt6E6I3}2kMtmr!$$L4ROF&DaaEvYpG5ku8e2Bx8r4`rcKmQc zC@X0h0Sk&&lr998no3nD4Z)%WccFVC1|8DBr78EVsT)hmIV zij0x02@DiwEG4sJjQniHpb9LCeJFw247O?{158OIY9%>*Zm(*eN+ET@#B_fQqg0q` zr%GiAn^anPjw=5^JN^evbZ9FdH0=}6-W+Bz($p%Vxmq~3DB_ZoJ-S?QNNYOG`;H&9 zdFMhIUrl=l>LGJ+CF>BrF}%{P#CxjCJ}h_0E$V&_d)@xHcW|0lkoJ6Zzcl0Oo_}|_ z-|nj(IBGpOLu|PWTAh9)<44KJ^9^-$+Z<*>^G)!7-hM~j_B(jsdpk~gePluUczfk_ ze?{|8dqdltM7i|5j-0Je-7EN57PqW*BkOs3=ozfU%fb6NhD^hivXnq$X9GJ@T$XG; zkQ=u9izszE#+Dj%Px{<=zTbvfKHUw#Jr1IDY`u6oF+HbwfT}R23Z1*^8y04htNBrN2XR0;tg2fo(^|<8D z!^Co58AQPncb@XPZ+p|Wd^{Omj~e>Kl`3tFZu2@t6Kzv%|A&r)aM3cY5r(KBO$Hxl ztR4>x5r2Ej$p8HF|8A#W$Tm7CMB$ZztM>yR_@vjKojgcl_>bz=fN&7NgMf*h*iGn>lF2&Y1z2mx@^8eHeQRaf@VAdFoK&9j3oyJPmUnW{g44chdzSy!pKLdNQpPnTwRvY z8FUJlqP3-LC~<}=#c_%?8b1ZQbzuI4E=B5~DCeyMi9w{Ij$^I+=-0CFRpM26Tx#Nf z<2N0-up@5BNVv1pI0%tN^;kkpC13c8z;ca}wngR5)%~-9fOhw1(+--fwOk7VHa!Du zV1(PPaVfHaAuQB(>Rbygp`vq!V{f6B%L?jl2?s17@G5JaA_ zE>+JyLTNG8|8)L~#HcxNm>h3#Xn+nPwWS_JVSyu7!s_P+idCXRM&ZC`O8-TdKGdZV z-kh?+j~PQffLqJIWNTt#0xB9IKaUM_`ZIe+jv$|a%BfBr!m6KWY`>uM?>W|!ipgIw_U^Nz6lPF}Foy+Km!r+yl=7fb8%Em8& zwM&-d?)VNhS+Pq&ZDOinCpf`;C$6lBbY(8Z(DXFJ-!x>ll5E6E3hBua5`t&*jk!fr zBv>E^iC^{4%MY~-14uOsWU89Kd~#w1@Y*FyTu8PYc3eqX6tYq034LubD-dxCy!siV z*-j~yxkKs;k`;#VaL(~K^^8+3TX{I!5}gp{I=E$URk9M)X>j@&WEhHz9rIix;nOXn zVH;9UI3}@#q`;AFY%b6Fkl!c>3tb?B16kpStqE0!1 zTZ{^OvXnBL&ZV^tg|?*7IITdcP_$#cX(2vSki0N5Tz$TFLiVgwIVo2~?vK1VP@ol8UtrO?M|03?=(-=d_ zlBm;0&B+XXvx5AIBn0CG6S6>`$q`w|CCoq>|N2a*tFK!`hiMNw5)JU(cq4m2Pl1g!c)_tx-Ei=wK}3%Wj*`5bpX_TbK~ zb9-L-(|TTdZlpz?aZop%IPydqIYz>y+s_+_`5Z%M1fQw#rUh=gde^4au&~I?G22$B z0C^pGrZWac4-+*`=5=E?+iw&sf*#>WBkqns%hZRl*Az?MZJaIPXKPYhthd`oe|HzW zpOrko7eA}R!K{KOn)c}6n>U9uRjHaS@f2Qe4a1%eC}>|3PsHIb`M>MV0hrNR9{@HZHL{nA2_epoU_;M zJHmQgPxHV%Iq$gJz6TH&JucBT7wZ|oPvh=a$qoAPZ*}vcNgIcR0xz^NqDv!{802t2g~oNLWq&l&CtVQqiHa- z3&%k+)*C&@0OEzPa}5vd%9t0B);cKkb;6W!FZ;2M@zu0@PLi1{%iJj1lAw)Izxj3U zhbc3JqmP!|7wq5?DWXCHX0W}}w?e7#)jPl$6|b%o^yNJo=rv4xYos21GT4T&nqFhT zDqjOZ*&U}?t=OWGo_|s++lFFMjvX*%%B=@2F(4y~`5R%r)6Qav_qWIKBsQ%nZ=_uI zPii)FOKV2*Jzuuw+6G^mfGdeitJTVfm_J=Y_TI0v;=5#7_~A~}nD*xDhz|NIOm4wN zRdj|sCvUb+LhC6(3ux}tVMt@%D@8n~ zxj%OJiK2~VC=Zjf?U;^-iX?>NBzho30jbG?vx(mTP~*nH<=_c^t5jDd$RH zYc6{tgRB5M$!fG&ES-!Fd3m9=SpQhlUoDF75ef3+`5gIu*62DCIvZj9JLx!H&XEN6 zG#bD}`9=9sf~4rw!EwERcv9#5RolIvx^>R#1a~I0;dx|3=QLNmdkS%Y4%ylIa3f-{xw204$lhTI#NYlhb|u z^&rK>{*q76#FpSIAi%6hbON$M7dZDf{#s4Qvx}QcA~KVF6;k*1V#DPq(Z#@JfLyhD z$^FezTs<5~fSQUj5nIYiN>_+Yh*-zPA*fp=F^m5f6N)H;t&De4PubeaHifa$Ojk@F zD2Tg^2}Eub*U-a}u0=HH6&Pp_bdPm~(N|lO41cy%i1HK+Hv;6YX{vfnc{PNG8;H~Ib3x}G>|aRiSeWplNPO-*r||7ISC~B zRZfhSC@70JX7^L22BIbQ#yU$34T zIK#y6R3LRs5VWvSvF z@%fej54vuhqLz8zP9}VA{<(5zcC=j{D{MGVob5gN^x$gx^>fS!O_2%trM5&Cxo?a_ z6peg(hQ;aTg#hdGU&q~R9Jv@}v!>jk<}_cCl+H8&oWr4};>E$k`!%PC*SmVI$3J#M z-$%=L(f=XdtKjJ4&_O2JYebwt$q?GGx`%=+>M1%FZ~HZ&ql3V&ai42JI=Vmg6@OXee!D zV|?aE!udZ~$$M0uR9E%3^)=J1u`jw)bzQJ~FOmd=CbwwkT)%OeIrlM5^6O%iL(x3L zyuzt4DbLc`Tqe{+L;Ix&(v0vCQ7?+D^lB@#6tu>YGv_kdR5!5Y(-=^K=YGXkB-rR9 z;xFbX50bg6>S0!+RP()7^A;>(AarR{N+`veLQ;|n42nfF6$R{OB0orQDiO0V_bmxI zE_U1bt=P2Jc}DCAysqz2exPGb`K9=;Gu7I`>FM{_d8ZoFR--9DfC~kJ`g0E|Wm}9- z1G5}9ufMh;X%#y{slo-dPXT*vv-*0Ew{bOv4l@){Q?%^Jb^KUWMA?u5z+hGaqiN&$kKk0 zWr<^;qyHJL$d^KfV{g^8VRG@EF_~*<(?-`^TOco|IKY*Zh1IbI;?NG+p56rWFfG9^ zQFq$aYFtT?(>4;?v1=@>&VlH#cGZypevXut6|ZLGpA@9 zYHMR#AcZ0R#(^4@vPmf%m%|}cZTw#QqZo`X^AoweMwxUK zGO|_33@8i2pTAkX*W6pK_HDpO7Op6OSEMpR2h$vb0XdmLTpv%5z7VOtnqwmJH2SN# z%6L3xE+H$;6o^#*b97Gjb39>R?gFzZOkoB)Dg~C`;UBdH^ut;F$*c%cB8)ZWUvxj> z0_o#kk};IE>CsgytVxMcl?WUX6Lq-xTuE+jjo_(GxizK*+s2z*3xiRex#?c&XqZIf z)iMOHqg*Mmd-h0dwlnP0yO ze+{?z8vy6AbKLSY$fR`r7)7Gz6-xHDj~th`y0d(tPh>#VrPLXA!Ksw()-L4zw6W`Hx>p2pITO^8n>yLQ%ClC0%nC7wHX%cpEw7>gC+_CFr1Kj)jIST<=wS2n& z86rM$PJ6LeP2~=hez#WI$d&BrJ9Vg@Pbt6&!Iv-;nCB+EKU<#}dx@>R><~Fz#8U)s z+|)Qd?IxX3ZY3%F2Y9c6>D>SW|JJa&UK5N^NC(6I8a)Klfd4np8S)Qrr*D@NC$QNc z&CD_bJfeLbUeO_U|I98e4TUjM1QHBA1P1X6%U3cFrkZEwFpuMJru)D6Vc_E|>n?7Q zZt+XCvJwAFy(8(L_+uN=e2-$1w=0^U)X#**OpX~yCLSKc8l01Uok|BcvIh;47>rni z8Gep4`SZuWHTZ}3(?D{*{X)7FsooXgbcKmzU1I+4vS1_|@p|m)&%zi#Axd;8lefjz1duwUMV6LS~mgfp@wK;|8Y ze#6vtpX_DS!1vHJ4Io_Cis&5x_J;YQ_e;3T_g{&kqxb#juM&l)!iLi9AB9ru|1y~R za1@XCMJ(bOzTNzX@gO1sBr{>2X9`mhgn8>jpWP%U8yg#Vj;cI~D_8Xbw&@qr1-VI? z+$3lf9=p$`AT@T4g>Ey@el9^mb_?{E(%^?ITb(yb5X3)Z#*yexzR^*n<%s1kgu+11 zh~jv*OtZ=QI-B7Nz#cz7idwxp_CE}s3WqwinJCP$Tu_ntTzqCl zWq+-HJ4*f>enAFVRa5g2(#vK`=kic}jU>x+ z%Tmj@R9|3E*_&k^^^8^LhDQ~YvW}`aBTM8I8?2BN$PpCeY_iX0>GlyDJC35?i}(Wx80=jZ{5X;7UWT*UztaN&m9@VE%jW4g_iiM=P@MdUwUSgVnIn}| zoD(P1IJn}J4qv3!nW)k-U(AL@CsXjMA*=qQ=3k@yGB_h4d`sTqe?fMm^d3|L9`5ms zMPd;dsO0uLJp4ag0Ht@~kF$?wO!tpHCC7=exBb)RF^)Rq4Sd`M2KG!Z|NKFu1oq3T z-!vtSTtvLUfh~3Wb~`dnX8XS^h_klm+3Am3`Mpenzy}Or-%GLg89XJ`w!wy(uE`{& zp5fQ64lS6ihrME3qobY~tON3wrP!+sAuz}qq@z}WgX`N_1|dqwd46z(GOpvT4sx%3 z8du_Ve#V|TjrO@Uh&@{yP!PcKwPm~Fn-RQ+%Jn#enT{8|zwYfC;q}{}RU!a>v7L2w z0I#izZ$kx{yJT&D0L{#23{bb769p`0oDXl2via^HSyp8g={rZv|I41zAM?>YKhQ2B z;@~WdS%C+LeitY{!f~ztK^q!z+y^NrRvG`(>Haft)WD|p`=@Bg`!?Hu-4eY^41uwcoiFi(QSe#p3$6%&S+rHhMPpCPV;ohN03u9N~SA z{4soO8cv`4hOAI9@&wpQe1OATjjXQ<|8(t67r z`^3+q0ayGi_-#3QU1dO0?A-U%cZwPH?QL_$pSs1im22$Gl+9^42YMtu8|_#QNTG5L zFM91*et5l9Rl&2ityPYmA%C==S9M%_LGIfn4YLZFA)}Lw!R{78eZPk6jDz^ zQbp*?-&KQ-M^9;&ko*$LzoLZ)F%s^IY~;gC7wcVRK~8E%fji_uqRJY*OFixIqtTa6 zCHsG$B|Mp&=NA^kz>dcs@CJ7nr3Z@^Z8CJp5_M*=Rw0=Y1Q_MK(M_>-4#L6Q)WbY7F$! zNMjX8v|SH&0MnYT(5{LRdPf#Ey^s{5aS!z*SM)vOvnGd$Z3q?DFn?fDMG|i4y(7w| zcQ$FMEfFGJD~L<^JK&$??IvPV)~piE1B1cmV$BvH?NkPhhAgWX)Q4&tOFrf8g zDmGS)6g+{&!Q;S9 z;3QlSO225IY&zYc9F4O11dhV38UM^}Vt@oo9eYT`aI0Kc!CWg2-B?-qPI*u=`V=`% z&)_#JvS>)H@z$G}Md6AV5W7Kn3xz3^_O%{v@IJY^A@ zm|zi{+B;wtPBBtO-2>d4sj~7bE?!eUQ~LR|FhO9 zFzj+;KtF8DR9>0I(fo#3N&i<2sSJloLzF(j?b}v-rMFC2ob%B|D($vU&drg*9{2`$ z`=xPNf$Bf-@)^wNmNpP+1^ju;asZ{V?A=TmSH%4jSoln$!UA@e=4W`?1oP%gxz`%6ZLb#PcX#S}i-Zq8WsbA)90{+?Sjkv4sg=@Fe?Z2fA2n>WzA z4>0+c2+Ry1FkL%%|07sLF@%3a2@ng?y+Bpg8{{h})QC@C^qs^ir;I_i24(+W03Jc% zz9jy6Y_W(5DNqQkoD?EIQ=}?bf@(-Lv>Le>QbHitQdyD8Dup%gS+ky5>1j(L_dVK& z$V`Zte?=XV;Emu}sVP%Zvosno4-&hR8U5Stt*e@xtNlA!?vr;G zJw(TL=e@KYfp^@)I|~{j1VDVa|F@MtY)AjNKf+^rkN+Ot<=!E@j2FNqG=(-e-`b8k zvC(+IJ!TOe4G7E!BZ6^jVKfj11cQNq?_7ZZs)wkbu^tYBpb}!pxR)Ip?tQ`0A-A|A zqKG7-l(;`dG=z2Tn0cI>3_?GVI8TVmPE1Tpd_>7JrCm$7ORVBk1gl`xF*8iSE7&}q z(S5Ddz9PkGTv*ja(oCxjjkF}0p_)*Hl%4EG76Vs$Rhn$%v>+uTYa-Q(tyg@b0#2KR zS;|OG)RgFQJa6Nf4UVG~(kzp@hTOO0l941Zvrur)9MrMu^sA1qIdu+HXOW;!PLb$C z+!O8%mKtQ%;*zNil(x_}g_3$q7AO^WM?KTH(3p}fldK_GgIcUKAX37}Kq@s=e3;1D zrBWn}EZtkAHZ&z6MG+HLfEmurVMW3~K`~T@8T=huGgeDs)r3_At%CJXI;Sq2nmxug zL|0cucfiM|#G zwHPCLFVHpEVugObi%xfAWrbujW;#RKmTVbWZD1u~tBJajs3NF2xm21`$<~D-X!0JSru5Ly!BkKW!<(ig+J0EwLc(yxrJ;I5Ulk0pII2#Q4HLmcs8T6X zsnsZE(FWF&io~(qP)TTpjKnZXjid`wn$u{3d(3Uekip#i&R-s&tgWiB?RUk`l#R45K3SqAUXh|fp2st^TSC37v2d-^2kaK(>Cq5tq zZTGO}_*eMq-+qMG-#o{uZ=7S}%(wZ|Kllc>-*_W;ef2o|`JisH1sfMn^XgNN@%R%j zbLOp{O;_3$77JKCc#NYbAK>9n+{0~q63zQoW&UjV{0jAzM|ttWWzH@@*AB6A?05L$ zL$`6yO&`WA|2?I<$myq^VeyIIV1Ey66Y=W)V;p#3H~-OmAGhs&_uGGB;^RQxcZ@sk zIKgk+{&ikIQ@MEgB~G1rp1*jphP=aC!Py{r9zp7}om z^YffO^*rDB{*O5|Z`pI?K|XQM-Tdn9yGi`@`=B~s=k2Gy%@1EY$4i@ruIyrVtYo#!@T?!at?c5RJA&CjqNOj@H^yM>bnGT(XaA{Q>5=SQbI{`TI6 ztCpOZn3$ND`0#KKH}Ubs9p-a<{R(Vs;2WEGw}2Y6Tun?&e9&Cv)YCua-+k>7p4seJ z?<%!k{Tx+Iq|}hE@nuc(Dl5Bg zx#jb}%WprhmqUB5@wUP%y!8XV^B3Rak;h)JxWD%o6$fPXaviOr9@3R965x92)G~)F^4iC zT9n~>i?dv5U<|Y4O31bd@vC-@O?VZuE#6@i{qIb>3`f&monYrNwkLiEL;1rl*?#=t z3XTls9b54}r1in?V;~F$mbdRX)>H|<6En{m%U9WM(Ufk4!N9Q3hIFG5!DuXSwc&tJ zEerQVo|7-xqVd1M`YiLkqMnB+Y0VXD)9^ z+M?Q^A}BGddy@?unv}@7lGB{59d00tip*ozqvN zvz}@_?sL=|G6|9(W4E0EiPaa96s*)(QDgWQy?%`MpknkCag-h_8ipzk>I$ZqL%#rb zL3>C!^i&Pdiqi^65-N$_6{!gaG(lEJlasQ6La|C;>vHP`;!ZNh%_&qwLojc{`A9O7 z_GIlr3nHK{yCm;JBk9_Y*-ngvPyh<}`Xj^A~69VFHz^uiZ9rB^9;}Z$=7)5>_y&c4|4GC&+ys5|DW>3`wnyD zh7b6705+#A&hhFSb7mX$ofBcM9b|3oAiuIBdSc>ZM|+5yZ@!Z+J#>ISI{zk@uDr?F zv#;>YAH2pty5&~(w&M?S*T&};xcuhJeDl&VerxYO?%w}iIewXOV8OXlFY^2|&+}3b z9m>kSBiwV}C;59{xR)bqE!Ug~(+hN`dHMV$&Rl9}4(#XX&8zHM{YAHV&&5}sDI7lZN$zZ8iKU;N%3|{^p8x(2c=^IxTymjZ*~Ovz9^&>r zd)WIPW%h4f2FEj@8{tY$GQLL0cILM zD}OX`ps;@FP0qi5is#R~&dbj}&8t^3H($OP?*7fmXkcPu;unh7LcMZ^&ppj6 z=jUwB4shuB-Q0O_7yINcZd;GpvYe$x=;xr&ZM@B;H(uwB^}@!b*Es$BOT7Hz>%48i zp5q+5`zR-l9b#7oNF)_B0XTC04bHsyBF{hhBu_o_0Qa7}o4bzR!p#SEF}qerQa4## zxyXe#&ayt=;PUIwbL!M-o_*yC2DCTb!;xdhxbNt`cmENpE-2lC&5bLpzx5iYUwW1E zZ|!3Ly}w2KmEYyowO=-)0SC@;=JBub^?&{^`4``Lo7u^?>38qureg;=`eE4vddK3* zMczE~60g4XIzM^xInJJg)sw%0ef7WQp+l3=z{JGF1Q3tu9sZ?wd)b+9OXNPp4ZZyK z4&OcAp=}ieLTI`r#5&u#dYrJePY9{eR(#aw-Kw*!$`~gS^|(66z*xWVZ$AwN|J8Mh z$cWp1-o9lhu*@KAskyg@#{^r2N2(#b%pYa>Rh1a)yLFssnFu&ra`P=ChwzP65eHlW z#a$7Pt=AA+NzF*DELR#Rh7bP6q1DPv3M*b%rDxT8X0@j&{c@!N(SnL0;vuSQ115#N4K3=Bh@oWEGN~lA zSDGr!6xswcb!N#)0xF<58OIom1`Jl*S+xpi%!V&9c0vb3fflR*Zw1Rx&FG|~*PdSI z^xDMi_6kK4Ch^iIyOJc4Qv=$f22BxGt8+tN*wy#^|LnbakR8W);Q9N$%&PbL1RD1Z z@Br^i6h%^`Ov#olYwVS5jpZZcD=ga+yE8i*yWZGvY;44?XD2p1F%yni$9l&xVXszW zTk=XH%d#~(Z0n}1iy}o*JVk;80TMt14WJu+y!Way^V>hNUia(9L5dhd6zXt=2k%xC21Xsd zQKe3TcEE*%r^ZLaIm@f+7lv;oOlyMmh@ndrx>TV<9V2ye#MFi4QFC~FUMXL4nWCE8 zb_);#76TAT2w8nF?==AyNsN4 z5@3Q8D8wjzqAasWcpVaTN>njXRax03St5}z5 zB1xcj(DM#)t^EIqi8>lpNOV#nkh0Q%%L)UjG+Y@e%S5-VDNFDa)V&cxbVM7$>QOY7 zh&t2@PP6sRtT<$>RjPiqbI$SFAs3giE8ouTcOT;y?!S-6K68TkBcJCRU*5^O{jcMW z_gu|HsV`G;z_8e*d-Nf`@PYrx2R{BS?$bN@7r*;2dH0W8#r4}q@|$DvJACd>|2=>H zq4)D|{^rXZ`n`Y0um0$MZr#y(nb%!|C1R+lJ1_9;llSwV{`5;6JO%B2cXQiM{5=25 z@4ST@#(3pd$FW50oMe9bDV}(tX7AG42t$UvmT0kY^EPg~{W`XO;~PvbE-`!jIFEnh zA&$OzBU{D`#xD4OHp0oHoH_Iq&+WX2O%of~cD_l#A9@T+HND>RJn*eYxc8d}m=lLD z#@Tt@oA}YY-oksY8oTg2E!J@Q5YuNCIo@m2DyG=KS(zNLgmUpQV2STM^c6n)nSbDO z&jtVvpZEgmN4WBioB4%}P`;d=fi1DHc$}|3bcj=P9l+5V8Rhb8_b@tIVlk4hb}o$B znUlP5*w``V7v?#1_$VQ)BdhQZ88T$Zkc*CU zQ2bCy_PqM(&v^BE*z7EJ>LgTE4tuRFnJ{F?CCMhPyyb0NdCS`Xf!PQ6`@i}K|L6bp zQNGawcK;Y}{rPwDd%tw;dA>I~%pZA-N51kI{`7qx=F|6mj!*2mo4bDEXZfFg?M>{P zC>ZlAViA`XEnp8n`kw!ud){@7=f3>~{^H;MD?V~yZM-2vz7LpDcb7<7AVozUdzr+9h(P_SQV2(c+|G?kh zw2O(U32xunK3g$MJZyRsKla`?@s?kiW9itVeBqD(4Ilj6agJ=hlOO%(|1Ix%;~w^` zYpoK-|K6vZc5(GR@8Rlu-UC25`8odfkKfPx|M-)9y9ez1C%o-9e~$m=7jI&+e~JJk zELEI5@KwJ0@sIF#pZF9XeV?%7Ex*Fg{?bqKwja5MJ(DF)n&^XUzJnWo>JD!FDFbR| zzxu~~>@PpWzx}|IRKUb7KgUnL=NI^$ckXFUI4cFH7U-OOhG!r8G=KBwf5VsVJI2L_ zog-i7&;Hjx;M1Qw$f>pQz86wW2$5dx7#Z=1o}(@d88T$ZC66_NzT_s&`Yl`I7!2#T z?EMD5|Jt(^x0o%DyVyY3vK-1_a)Yc9R9O+O9^BhXej1k3Y4V=kvL@bOgTInLS^nw! zpPjFPn9XiLVcek0PKg`fw5*5;#+Nd9TWvVqkq70^T<$f67+k(CAnL&?j_ zD=9gMf(6Azk3|p7>afJPEcjKTw6tbKYeuYQR4PVNMLX4$vBrgf3la4Z!Lp?%FIWLq zD9JmL^91!oEl7CGv=RKyE1G84%Ax>G5GK~o{PO({JeGm7?z`7Mm4+sK@LXv;R;9<; z>8}m%*U@GFLC0D3x7jxi8E^wjrtxoiSPG9g%_Jb;oc`{jMkP(NE)B$Fm{wj3g4 zq0NNL)&}~S#)0*f5Sshb{Ym~66-`= zB|)?G;xDMIl5lSSetFCXdDzE{x#QI5>!9ffYThRB(K^ zaMYHlB+-kJUN2IoL`@`Uq|!iAMa76N3;I(gq%bsB5(LFb?kkInm?R_x2sKvMMC*~H zBDyN?i$p9S6oQpXDimpZv~5VcapRF%&|X4%2}wqZk)T;=pq2`nI^_$)d50DX>PdAZ z)RD>(&a#CK6wDh|=DJz561_4}wi4YkQJByDwQf5<(a=U>i8?`D%I^_1)FpTgj%A%+ z7SB0W_sWO7a*VP2*1LJjum37P^1%Pi*JgUmJoFXr|ImNpqj&x9ynRo}j+e7~OE^P) z`e8owZ-1Zv@AF4EzUv--`*(krU%Y7>`=%PnRPiu+1-HNZ=Q)!C&;0(U_}Cx(F`L`( zW&MBkdiI}pne3N{1*qpZ`sjW9<7Ym>CqMl~KK0xdE_?eg^6NkIll=5OH*)P5jO1^X zSCp`LoWu9s%V$3Dm)v`-#b~`Y#*iVeE&RI8tiSe7-mu}`G&ua^I8XxDXb4!&pR7@Y6<>{x+aNQl3vvJedP^|hqq;eAnum6(2 z_|ijsep>OxMy`3&&+}V9{YKt->uyF_8LT@eDZuHY%=DHxU5v7RauZudmG<}jp$B@M zn(k6u@k$-KRfppp2rqa1s6NHw?9+Vp`N+u*K#PJFTX})$W3xOqf1#%nIrR9mJoxnS zWuNi&<80nE#Td(d{g;HQvTkaeR;!?Qvcv3&6V!1(#&?&_kRd~c40)M|48J2^O_*Ve zi!gf{o11GUJO}y+Lxv3b{xh+O$xR#BJq7nq17l;fM@ASS_h--UuD2N5d>uF4Gsf?2 z6aJ4s{5!t*^jEm|Q*#_WyN;JL#Pt0{Tbr2LcsY0O9pfKf=&`sq!jK^s znOIjm`how*;oY~f?)CR@&!)98hP+&Q5B@oyJGF}!HvSmD{f0~ZZL}H@r6@^H#Q>e|7~e#u?pm4fnk39v+X2Y%g!(-EZ2>&avX6 zE2g-0+nCz4fo&7;NDtijIHT=0V{0R#h?cD1^9J7ho-ORyx1Vi)^hf;N1Mla9GcR!H zxp(m!|Lo1|@o-MV3P5h53MRI1W#hKZO!6eBfYGTjMy4(}I0?lltxfy6?Cx#6ck>oD z|Ha4oCaWBZv9eH}>*dzkVHCHor){7&2tYkV_Qle~~p44$|MR`ckts za}jI4;+7{XgseD_&o^WsY=yI$-{m;{bn`xcFB;3&U-bc*{g)XGJ3I2~i^`fj$e>cX zoqbGm+|T2nGkK*N7mJK zvjxV&X9hCwNcvj@l_*RTdRYg$~LhiWiTFV_pj8tz;>b zgkFIKMJtdRTr_;lZEC)EAw?yMry>z75ip_%QR-ZdKr(6CY{jYI#ZZY^&Rh&jxjvae zG9^h?8ia3R}0N(NR)-n^3fim=^8@(FXPxfjdb z96Tx|P9ow9%$I15HEh)+bH-wE9mi76{CsnoQ52Uj{=ENDB>(g@z@6i}-n zDkOPr&7q%rSX*Q$>~T?Xlv7u*?{#nEy+8j&PX5i4eDm;O4nOu~-v4L!vhO!;W&M>? z=UGMWd?Xf+J;Q-d{s|wu_gg#~-@uJGyp?y~wwJx*Za^?qK--LOyP0dQdyKd2|0^DS z;_vw4eRp#C)z`59##V3SStgBB@%UJAaDfGp9H;J;Q=bP+s*j z{PUl?j_a^fwx?{iHA?0;n`#JEX^O_i{C!VU7NRPS0>;`X~nuJ;!5D9^m;j#opWQ z;P#td&pkKY%$@tUvU{TB#ilS*h%B9$VXoU{so2QIktsH)P=4PZ`WU+|+soD0>|yHY zbDT*~Y`&a5d-rq41{isnN(^cK6bnZV@aRHfE&|jn%^u=`Pyacm$35fzLN}T?GkuU3 zp6@lsBW&KhiCw!lz36jYi>R=E{RE@!676;fr;k&oNP?kK%#a~NhI~Iz&EJtjE^R`< zIvtpsgI+hg`_=`93>or+#d)0flzAQmQC(gg>D(whVRJ@v#=p4Of5|^8F@aX@QwT&vRmAKkkQab@PkM znHLT*J=@|$`_(s4D+nUW=&m<#+xU8l_0t^u+XKu!`C0D$;!ZYf+s!ZEwVA%S&sm5= zi%}-GZ)5$I4na1tW3oky^TdAz5ntfF%k2pu&Y`O385>c2Yn;g~S8(;~PNP423lIFs z{e0)~FYvX|79+d%@|$nj!S;3K1@bMpqQDo$@-JE&l86?(Hlh-yRgClnuLUs95~C0-PgytqSvY%b>)H3qX{`-yyq7sE z90ua_9a*?|In}-p^|G||aE)b(KW^SSA# zpRb+!Vf{GS-M?;Mi9q`9<#R2^qj3*x+H68zK~~A@ySipmi@AIUi_9b|4a7Dfw25_# z6xtA5kXlH}`vPIyYR0T)oSIQOwBsCQnxkM5Csh_|^oA55EwaME2v|!&oy4$!Z+{XI0r(%kPLgZICRei|!2%_CnqUUlwDUIr6f z{!gsEMsqyqefIxr@L=V_=g#urjSYqlkLxF7VB;^b>k5QWQmiHKG`G3*G z9WrFdkV}wWDJ-=mTW>KV)rmwP#+n!cF*zchB#z`<7TEO=3!pIYASA>pB-KdjVmOj` zf|c}Kfl7(0fku!T)tW*oiXvd64w90b630Bf!qXd3IxXm7#42G?k-9{61g8;|NHn3g zL~UJatB6jsP<<3q0-T@-&IC=0GeM#eV?`vd+8)Y4bVlfJ9%A#|@5})u-o{WX8CB1S z6|}6Rw35OLMPay*aIJs@BUK5j$W{z$LxLfaaIJ)|5>_W%ku5JcF`UmzYR*8sgTj-1 zK`Kfj5Y0&Bw#$-{0>sL);y?|~0!j}r6;6AYR2YfeS~;@Bx`0S-FGaGzx`PyIEY6dn zl7fIhRXpSgGa6!BRNQ>h!lTXy#Cs)>{{QZv3Kx25HahpFms%lxJXX{3{scH+0)D%J+&l3!#}+yq zS$_k+y3R9Itc~&7B~?o-9DR&$J${;kmee&x;F{gy7jV2^X8 za*Xf3ox5(of!9ywamtrNclIb4)E?QQJ4X%m-knc>?H zKF@vA2YK=vU*Yo`_i@wJZ)g8HW$fIuEcumIv~Z<@hv;K`ps`(_lKU~ z(9y#jdHg;;_wn0!>n%UQ*CAUr==IhfZiM>hqK98LJ;3=5cdkfd^ zyM^o523>Rn$P>1&w5mq;<0T*|HRYJk!ysBg zjIFFR5L%?V1+j%#nYUEp5k{g( zdDkcn6j?y5pZk={(pX(*guI%t0D0B%|>1+cUgHr@tNl7p%F+YOo2!^6aBfK3brJ`^Zs)2f}BzPsY9d%XE6HiASOAfjuLX|KP zL<&?!a0n_jMol0Ps0c)o6D$^3^rTpz&LJq?l(N}0CPEOR8NmhWqN4T*Q6o8PP5f}3&F_5}>5zSB;Qs?$V#c*At7y%7NOop1E0_qYf z`PoniRyeF^5q&#bb^xLnQSyrPS{tb_Vr9fCw@WHL3at^hh{#edi;$(PC3rMA9E$fs zA&Pf`_lCt9titLV>jk2fRBn=V z$myHI;UN_yUl0pN<%O;j79Dhy#wtKw)jum|7NpoBRV^&G@HO~uKx)vC@G<3cq7Z{p z*Gi3}Ql+*=@LxPhVs3%WNGW#$#1dYOHpZwXS}tPA5t34iBZ#9$i6R8eN&~7wR3S+s z3b}7UB9f>TYA4j*VWzkQCV&_s5hsb@h2WH+O}UCR7Ay=3L!9M4O|sm#jF4)>x(1!T z(m)MjxI%MZ$O~eMn4mFlNLHdi1#2tGfwh$6jC#?iN~8AK>ICX&dqqn{3dI+SIU$Om zn)~*qf*81%C_|)e6-fnLR{RLQdFCouG^oK`R#Ffr_`;wCu5DN;wA52-K`EZ1@;Hs8 zRu^kKQd_Z+{LZi#siVVUBE>{I83i?MA85BahzUt0D`L2&Y!pP4_(Nf70P;kgYVH{i zg$OPRZhRAycmFo;xaU!(UpUI4hfY#G@ge^D?{8<@*6r+n(>B^KafH?up62MY_wmE>yrwY?$JT8}~EbI>C3IdX_`qIml_QU^ARs(Evc&P+ZMj@BU4G z`DgCnb!#qAcTO{R@E`fepZo_t^63Y8;G;7fIa9ztdpmEvWjDLW{r5QNypECa+j!Ui z@O%7|8+Nd-B-hV+k@@LodHgG%=I{UNulV5Cvl){iLtg8+qG0dqZsN*+Jjm8ZW;j^& znEv)Rc=YI%yrJ68jpx+ViOf9xEc4wK6E|GLcJE%cu=uNy7;8EUvz(eAWx`eGe^1I7 z>vwJAvYR)vYwJ#~x?&G|CdXJ;eD`m$m=bfd3shBr!sEu;jLC%>;TORStY)#(r56$= z9^V>atWf-my!=MC-NK!3y_(nEaf%l@BdnVoV`{wh^7Dc*duop7o;gA<=0U2_?Kg7M z+kTDz=U1*_vbfO7sF0reAb;?IPx0?wn4=5G*e>>N+Q4-i*2a8sM1*#GjJ7NAbp;Fa zS+jR-#34h53>k895Xs+=LoOXG4-gVURv0)wjV&%_1%e?%hP;Llz_We(W~Mew0;hp_ z4jnql$&*X4|5czgFl5LN2&>M}n|YYWUzkU(OzdA9VaUtFDy%-nvxkmxT=((aFAH#K z(b{$sSL`-^<8Ajc{mB=2?3w#`?2D6p@XkB=uWz4Z-#Y)=7>gbmLw8-p*3lLd3a3sT zXXcyt^2FR-?279dfBAz)F9FB;o!i)X&0g%YC$Zj6jIq@MUPE4eq>2;IJxU1zSLmnG`1jAhApHxh9Id#mxvl}$h;%e@#m;^C zLBF-vKF@L9<&*nvSrxweMpoU^`O;hyXV8Mdpkjau(CkG(SB2&LC?Jvrt@}dPN^JIt zyjF3knz0;NE#z#+6P&`K38 z4V2Qu+Y*}QP?nHJX#fwDWTk;N*oa_WF`rjmO|Hnfv}|P{Sr$+-Tbbh$nr96_7AfnO zN#EUP`Dym$e471oYJ+tJSzRwIdkrE@__D=~HU4W^E>3}*M_EpDZGBm})`}v^ijNK6 z%&PMSvpH}l%krNKFUw9L^?iZ>yc5J=P-@MVo7VhTY4%|gnG;F994#9Kj$FpAI? zM=45y!xM34xCG838Y`kkw91MC1VifFw7CkT7F+;}O`FvagxvExu z%4JIfW|0&l5+XvxyFiP`s6<9xmY=HCQ6;6qqj&;>iV#!~6-13Fn%jCa%n}JBkgX;3 zT(*FAAVNp-Ui61WX+R_^4fGWUh5`i_2?tDZ z>(9Q6w;Z44p{G8!$t(&4sCsJk9je5Ap3vS+b2xO-`_R z{dq2_5qx`+smtHYFaD1cY~1udKKQ=B=I=iK zulfDiGtB(f&+y)NUc;ufCw*5aNQ+|KIwm%3U_)PNfGO5(+`^XaW9->ia^PRw&(u&} zX~+))u7pi*;O0H|aqH&CIe4ss`G@(|p&#ewW04#8uKhO$V1`EyB)Wc_yKfl7yO;8o zdF`O>DT>S9$}hj;2Hv=5-38vUx#4%t;eA1?EGS;4Af%a5hd{D~Q%~vJl=3~S+!YpJ z_6h#_Z$Hbu&&)DAaT(YD$UFFzn>Mj`qI}VJxcWA&$*t^}5YBI&{-x4kZfTywCl^S` z0LR9u4P3rwC#{!okEf0v=Gf_zoB|NR@41%UTeoq=ITc=B1QGBauSz!T)g=uKs16x2 zWXO<I+6b>Y3MO{!=a%>WOWyR% zAF^=fISxJdAfNrC|CcxZtM@Q|{dTr|7e<3GSyZ7c7%7Fq0d)^sJi+PNK&Qv~W|Lky z)@{OX-NU-MIbt2C$+C4rUZq%o`TKeJ;Yaw|L#GKhWI?wfLxv1_wX!0t-H@esm&r~$ zYtCu)aX*a#Uo()k@dQ{sx6u#pU#IWR`O0yZ1+4pm-C`^5erh@C)hu6J@;R&M2O9m= z{rhqHBHno@uz4Vog_xTN&3^ErvU=z+pOT#-EL#}>RA^QsZQQH2Mrca1xOJAfOJt>i zENZAEoF%eFfkttFKw&lBDhhhbdzS(d^Gd~p2(^)FPpnH)T{cPs1(LEroZu1jI1x&# zDI}*`NQI*ZEP!?qAF|*@OR~a1OR%;XJu%DVW2O^C}pS9UZMT7NhGpRA%GiN8W=KU$R$i|N-rs5AdRw?NJ1on z28@{BhXR6tAKbKgxyI(*pOpq*X^BvvWV?Wl&#O}Wzlt6^nt}9&~lNHUYD`XBIBKT#(N92!XnOfz;%HxXha;4 z6ud`bfl-2QVeJ;yDzMU%3P(^!7iAGg2S*o2D>a)bm=bHlYJsFku5BQKnG%yDL`SVk zZ8X*aB7sQ6VU}_|Z2FLDHR=Slti;Icn z=af*3xDdcVeeP7H#u7&qmo1GrLA)X&h!@m@FKFC~cLm;+cx~adKqQeQ5+x9|0_U-$ zfXnx5Mzn}gp{|Q}fi_)`L}*GYh|oi+5JYoN>$Sv%hk1tr9~T(`9HYD6$nEz$&Ch-3 z+dTWp!z`S5m~VdR%WU0u12_Nb^{iie+BsO5p5f$kM>u0fqRX*|KF=3EFwdC_o!p&f z;n;C{V^eGxpJHmfjVI^oK-^?4|bUU-p-p^;=_ZWx2_F+DAx| zest|=-yI@|bEtFKa(#pI1?85#Y`gj&dz3l=03ZNKL_t*bT)X4jtRF64S*Cl#N^4qkOsy&4n2 zm!9uR=v^Jsnqb4^6k9i3=q33IVn)@gNzp(R^#uhlE>M4N0`n)I=gCigfRBCY8SXpQ zrQ3R*$Bss}PQHt`UA3LdUQFO#1a$Eo5iL98)b`S2MsWXO;q7a0+TmIf{X%%HBZ zxjE=`u+CCe7#J=FFl5MUnB2F1?zy8JKQRYVg4@XTmv3R`=2!dr-b035ES8vk_5mLH z!l(GpAAf>_r#CTK)pWXDy4@ydWU zAeJC@siP4SpdM+BGdil27oJJ_E{MQ9&p$CkYK<{<*>=Wg{$H&Lim?rBi>C-y)8mIv zX&{A~P<83`I&^x_niywve3W8s#7i9OouPO93BLZZKjZUXc#tP&wxOZJQny2=>jvJZ zj-qflH~fwqGGxf5PZp#Z^jPa?gp~*8BhKH=S)JBOy#C2~pZ#;(<+K}?^V0~2FDnmN zcH5e;6_N5a_t7BBKYhpA`<3PzIZ|I}O_0_1+;_`Ba)b2a^<_|+c%qVpfEqs)aGLjh zB>{Jiy(0?*W{U!bS{9pjMAY);tWg?B0ha>a0?vB)RN<|{ry7j`2?^?i^pysxMC#?0 zSW}g-x`yNtJR*(KfP&M!ae!kbHCjFo0R1;R}xPeH11}0hfUmAa!c&Q&x z4M`wMRxDs(lYV*Ui&=Al{&7D$eg7o+9uxW6D~bbxkw}(LaBesjJY>j_OBY*Im~&VX zq;g2Ds5qo_=$IoRh#6v$WHqAToT4~fR2+%~$->$xi%chrh&i-YG-y^l>?zW6SgRz) zNSy?;L@WwWhbj~<(Ml0>0ac}tNI+;~jEGT6ULif(gi%xqpq<~;211Eg{>~K)A+HeM zMX1H$P)t3j!_{raTuG~{ z80mIsbt-%};4GnqL20>9RRE=c_v#KZ{Rpq+@RN3=*3C1{D0h%bfICTrOg2_sUQrw)!fdLjmu zLwX+VD7xse8myNfXzsIk12bY731y&a3B8sPNJLARMTD$mA&r7#LJ7$e6`~4AR=jX2 z;+lSxc|}TrXowTkWhDM;%abp#8rSKq6VUi1aH(%kmPYOzeiLR=M!pO5RZuv zB!LBjL9}f7%bJbU@0-1LxbRS~YTkxiTt;?q*)4bR)$96%>d;C{#x4b-3j@=RNOel=uUQTE}`9gK@A!5 znnr*&J1)PHtM*^cj;|i)NDLf*<|z&wc!q;--o+I~?hJqtdL2$a`fW~)-OASOd)VbK z(tq)y5`kHseE0=AtqHc@a2Y%G6?%7FB+Q_$11Tl?0+o*Cah~szz|#B-kAD5@Jn~%N z(1L+8Z>TIVWXKNzsam8u^&nq+>@ZIq?NN?zVZ&8#;ht-EaOKAHiCY?}JIl=R6HK2x z!(5%Gt78ZRQ?y*iwq3i~HZ{g%+r8xPt$O|hGbg5b{`e_QE?KkSgP`ira*WN}cd~WE zBpXI9g7~8)=*@8W&{3YBo@G7+LLCUH#pLGgTz1`+YzfDiK6Zi=vx{_s5o1jqKwFbc zZrsM+JzH4ca;!Db(49TX$s^Bj;ABm&3e+KB#Te}X=|C30J-2Yn4o7QohT})4Iob_$ z7dxcZM%HiJ%?(#%A>!4^C8~u}%+4I=@bqbxEHB#PFtAAS6HIN{&bG}P*f_f8i`^ph zW_a$|BOEz#hJ_FaRldjd+jg<{>V0g9$2ofR1T%9>bc2yXO%(;-UdQC7ZS2{*g>_|~ zggQTw?%Z)spP1&*$sP$X%a$y9wK6)jo$Wieux;Z6BQH9rU_i~{%n^=HpWua=4$+s{ z6gUow3AXOq%jR-{YW_4wj?dGzitc=co7~FOW&62iV}W1(z>JVOoI3mh$4{N%bT7Aa zDfS3Sp`2v>_TB8=I>AJ{IBU$qss*Z32YLLtBOLhlm%0DTU*YploaPKA(+3~nTVEW( zZ|KJs+8egBY5&b!ztPe5XFs%5FVdMm!QtnQa%wRWOz~bJRMc*S(G6SJwtWZNCktF( zFzOYfV(H{@PEJqr{LCWe3lKppkzzu}x3P8mR<>-MV0_JA&SJ&Vu_rlrbe0omx>R+p zOKIwI_Uzfsjl0@ZvqyOT*etV)RbF{2bm(-IVqzL3T7 z{1%Ts@MS*nxuAO3;8 zJEqvN{S92d;bo0xtY8Vk8BV_NJk!&sIkgnfvP4OFwNgLI*;0#>m_{CZ_gr=Nm`a zxW4_8+nzz<(kYgXJ;t{W%rduBuQswJO0jMW>-S#Gb?Z;^!hs_^e{_~^UIn8;ajgj^ zE`J?Y?V4cISVeb!n(uu5QD&BEx`XMpwz1>NeOz(%fZ{{yP|cs>_`yR=&-JL1{QvB| zca$B+eee0Hs_wpFasUQ1gFyy}3=kwp0w5rQQKUqQR+g>g_`F_wcfI?(``&rp;ODdF z*t>hy$LqIV*^(_sNtP&+q9|sN022U$Br-q%M9y*Y+_~Ld_1izH?<9htD2lW!-|v~8 z?yl6;UDaK`TlM??eyG5pT}t8tqJ}nF+B@i)Qad^~Xa{jar`WaSAm=Xj(m$M$2I5J~ z`^b$fxPA_k$6R(C><~j&E^zV0DbDt#IIX=BsS{kGl__2AG~`pH_bg{F4iU#C%4LNM znwfU}5+>IM*Zz*XAqLN#yTpV^lN-|s~O%)U;F>T&#TAOO{udx6_bRR#?*#rAI zabzbi{%|9^4xHh#j;KGmnXS)V=0tPe-?s`Nn$DbM)0x`U#8?|pl9q`3FLU3rkX?6NzRKQP&m0}$yaL^yW zG}-A0Um!csO5wc>_&BAMCk1FQN-$cyVx95?0aH0f2yp&b903kPX022#T2imP=2DDJ zC+uU4OGd-2g~UQCsKk>7BoSP!kk}IlNCOZ9DnMaS%CCcIlb}tCA^{RGF`3p9Xy=vN ziW-Um6hp{Ikjo>gfRh|fJZZp+#wm?c$}6I0&$sel0LHZI$oiLLA^v2vS!7JyJ7HY( z)zZE_Y+}ofJ0Ba7VQi1ND#k>Pmi6}QUJKW(6CldhqVk9;7_E1fqmps#zLKz#I15uE z;~uY4jF1Yn@@uAA0a-ew@tz5^Pg5yU0bUU`D_Z$_iXzJ-J|q48g<$K$kSrb&R~>57Xoz7;+)1=!8wbw2^ug&7^ex6h#-l; zY9tZFR;W`Jao)}v}OCn1_5A^}PXN*O=*trciNaHI;#E~7Lm2y}=sAw~yi9r|%eG=V@4iy@PB0wD+- zp%IKys3gM05lJezRH0KrTTd3yPEb~Gi6X&}Muwy|z=?sJLDwiu-k>6lHh9^{OB`tB z{BLY+Q91_YP%6&UgvF;fLZBd2Lass{h#g^C#^^F8umpi3&>>nJD#3egBK5MG2x08` zlO_fw7F8O8(lC?~7`EsXLY3lF8B__Q`w0?9t{fA_7F|}LjNhF018W52H3SAs1SXF% zo-_~zH59`d>VqOy7r^BpG1$bAI76B$0&y6Zpu{0Asq`GI7Sd3Wnvhf*Y~~HhN#Jd4 z;K?8XcwSTPgYUZK&%mEampe( z#pY66Xi1GD4ID|NNpm?gmdH3<L%p@$R9aN_S*%2<^UD~ETWYEY8HNRCiPgjz7llLo9(l$AqVOziNU6|4f6 zRX}M}M$G(q!L?_xt4bdq8Vb{vGxxSH@YUyD=F;;g*?E2s$F@Df*M4{p)9)W9c^8>J z6-VqWpnz89tok^=@LONtfp?ru{@|e-TPe2Bq*E)dxRmpUkMQQUU7Y*X1$6R3DJlm9 zK?7Z#Qy8i(kgImzD*lb2&@qSUGq2-@`d7Kq>lnCjh?6G|@pAVXI$Mo*DK06c!2w>| zc7ejO20CZ7yvrc)vx+jfx7hg9W-c|(pjh!5;qKYc2sNw@g82YWU>Q}{w2N6#UNK@p>73l5t-UX_tqKcn? zh{Gns3|u_Qsjc7RseKnYTxz0Y?s9JV{8zbs?qp_O&Do<^Ts+9-GY8qdeGfYhoaESG z0;CKN4KP#&mv3k3s=HXdY&MI!8fh+s*O06QaVh1&e$Kx68n17EgBNxmZNbMB84fm%~`>n_pM>s z)D}8w^W?|;EEza^kbTep6W`w6&6%^8xpbwMa(x$5KK)1h!L35``Q5y>WgD+|cXMdZ zQ3h(4Fn`?x{N8V@qa*O^7$6`i!vo!%JG_UzJKx}?J;&&FMRZ_@NFj|Whm)DPWF;$> zFX8$bEz}jg!n^`-DfAy=$EN4muw^etdV9EVrklQUD+|_tlu!T0r&w8fgKe+wX7BM! zbeAkisgJ$^3&jpP7u?7l_ub0kjwae_Bl2cq3kV>@1D7~=csqM`?%>5Yd%eeDTcTVV z;&KnvO}n0(R^QC(8?R&5q+0UEEBL$}f=JA8-zg5iv576O?qbW4L3CXs1s!9pq8vn+!16eVhYZUt!0=Q@qum z0Faih&_Coz^X;@RU(3hVE@8p6R+KK7oxz=ew`ffkhO<@BDH z*mUUt`}395hK4yevGD%+Os^|UAPopM9;ENW5sn|&#;* zRxF#xoGEn_BTc2rZ9Gi(zGv95aSxmKo#yi8%UrrRKzYem`Q(EiW8QsDoPBdMFTZw- zqv!jHT}(1`jb(#yo}a!t#VnpZiRPmBg7MRVBkk*^ zch7E4^q1(<1?r~GV$SV%Fu$>u=AYVyQg9bIe&8tSlz`50am_TfHFN)Oj0=2ENc+xk z`StJcz3=Yh;L!{8^$pTrcA)dr)Hl$!bR7%s{|cS+ceCZu=XiR!!@|8Ph(OT?d@X`*R|N5V?<U^pzQQ0r|RmiXFFc*XJML3)gw#18Irj z?&F*}@H$%_e}R2xhZxd1qC$=!8KSSG$aO4Z-paMywSFOO^*Qo^My0*j%dha%H~xXG zyN__De~A9R6dSGP(>0AWUzIc<;9`a@9pUI}o7wu*i)_E7&@}~eCdHM@l=8D#a@)aQZ|{FY~$okN-2b&#tBOYEFs7mtXEV&8Y*gvgg%Z?CY_>5Ca!4 z&~rIqsAe9wf9Wf%S~-{5Q}P)Zt(UVqU*P4(HnR1^6^43ZO3EOE=NUTm1W$iwGleKy z8PWx4yn~O|*3dAPG~i;&S59(v-)p?|>@Lp71{;Q>-Y!xh-%v{w!ucmRQumer#Et8# zq=71`sN#c$my>}p3m;UF4udPV**7aXaS-&f+ttAUR{pGX*(cXoa^XtiWi& zXqS-$yz<`Y-Vx%70s%s(EP*%zMhOE(EXJiM>o=Wd$7zDLej5$9sKj}bu!)0Iytjcw zAhx(91#19oC~oo zz*YzYc(p{|Y87R2uXtW*TYzyI>*CW;36cq`WA8>Dfbq=^ctv^-kxIKxKz5Nz%*ZvB z4;$A{#JBgDN{;InqbC!ulb!nl-w~tHKK;Z}-j;VH!I%h@g-=X7Hc0trA;L)7k=JHq zFK7X$yiSaB_-CkmK9$|N0yHX9_#g9RlqZg$eIEZ};K?M)_X<(IAW;>k!+;BzU;E7uEJJFjFe3M~=h z0)!k2H$rIC$|1^;C`VafSZhiW;85grKx6`PCM47e28R+bPEgi)MRw&d#u9`M6WL6u z-z%plD3Ti~sAW%3jxo}Az;#tJ5J80S#VqDo~b zmtieTlIV^J!_B~aEO!&u7BkqYQo6O{tQC=>=A87LV@z0{|SfJu=^lSY;} z%GykUmr{J+R}Rn`@0UUY2Is(PL6k$gQL>Ix3KuAxQXty@ZqN!X0m>Tx{9?hx4pUCi zW$zb4NJv#mtQ^BY&IA~fC(=a%{|BVLk??AZ<5g7g^N6s6 zsckd3dr=)Pzt+z{=^Q7{9_88BFY&E7_f8&M-cD2g>VmZ!yujemjePw_z0}WM#OlxA&zBkuAcz~L z=k#v&z3?#K_~sw6>AL&4^S*od&AX;k;GJarnBl$?Z2#`p_^S=aI9j}pg?IlNzrSW0 zZMlG)QYbf!8a&R^4?W4_2jAr7H4pN;t0xn^O=wi zi6g$WpVv1%%R@i-Gye0&XIS_7uks7GujIC=qgK&t+vYHT^(Xn<%(J}l)T3;A;|%+U z25F9aIQ0Ch)K6|BT>m-#L+(ZX@DC5MY0rzihT&VcuH(ydVOk9!WnB3zul>{4dH9Wf z&ekvD!C(6KtY|AzA88aI=`g*ApWsJN{4;;QX(NxX{Y8H3{y9u*$`b zj;S0!u$gav>ok}9dpLJ+D_b7z;T4(3x-}2-rB6(vB?J*ia$z@{pM8plfApvP{?ku! z=U0D&2S2imH60T@QCOg#Hy`~+w)WODIP(GitNW)=P@oaq@D)lIcJt6Tzs*zM+|0(U zckt!k{35F+2h>MbYs(Vx;89-si~pN%Zylny^JX6SozJjtN}Uw{5T z@<_`XR^N38U;Ld#N|FP^{1(PNjfH#Pd=(kom(wwv$$#WRGr+{waEeT+{} z%CtnqGIVY``(AyTZ~X25%pbkAty?pB4MNAzt1Fq}ZsbBF$e*Lq@^v&%bb$a+ zI>E86&+yP+Kg`bJJ*>L>^L%;b45k+R;*&5;?}_cax#RPyY$yZoX2enuKSY8vh_c&*FKF(kM$@e+h zu#W3K@WL}j-S@N#{gH(9OlLE zZscrtKf3F7maqQ|zyA3-OlkZnk`rtKeP_7RU4r_Fk?$YTq3fgx z*waV2Fz@qx{+Cy>Y)&f;N?(*y001BWNklaofM+cYpIH z7SCuTA5A9c{1m_Or|bFj;iuX5)DQUjpKjs8#G4xnmq6ckp8xA_^7zJ+oN8UiFaI~6 zWKmZmjbTE3-){9jL_`SRmz`0mp@^z0E1oEm0jM-ghS z(z><;0|$BSJDbQ)>Y(9)-{H49@+c*^(pwyP^%?&0fBgpE{XhFS|Epi*lb>71yn+HI zGv|&k(0R)zxW6<={|o;+fB6qD^7!En8gBh>`S-uQlxc0*qK(p^3)D2!jPo);-yZgD zc#a=Fbb$IV{Wj|tPhnQmSn}d9J-arr^WkstPrJ{tq_@nXG1;rAqKYa$5GcIjZsi2< zGU0?#Sn$fjV=(^oDtX4O6^W0QimLTh%p#3d{#L?B{)z_k7-exm7$pw)kn!*-D^ivk z2^kw-sq^T*l72LQh6(qt#zj`nJwd=9D!e6izmBFAUI9pX!az{j6eIgkd5gCpj!-#5 zaYQ&mv4mJp8Xz4}j597pxx~+zr70>&Ax%(~^O0~nLKGlzRGbEoS|nAFD7+T80hP^z zY6A|Gc)|cAD4hTnj6ft%5GX1rXeb1pG>|VsSVUX_r*l{vVN>H3{-i=2z$=IQCPhR> zn@@b3RtN-@;IZ+3ca{G0wmvWsW8!7P`4|Y5w$9>x=nsnv8`nZ^E^%as77+ zA0H~?E-T{C6)L+RNah+qW#=QrhAis{&%pXncR42hywGzo^H2`TCjX96$*3X8T5Edt6D60}OuCdEW4I-i08CyunNNyU(+5iSN2 zJaHfsIZz4-4HAVA=1^gTi2`&^BRP$VOy+4ICL|#gO^EV_MRWo>1(kwM$!pJZENDg1 zKn}<$2t^1p(twE_N~MrGRG|GFpAr!3^>icw%3eN796Cu+sr52g+4?O`q3pcEy-U$9 zCUhmlDU{PFtFld_LJc_$q8aftP*6}XkP9K7hg=a8)R2=JEHx-4!R7wPCL=@K%lym6V5T~~O z!xWvjq;-~3qs0c0XhZ4@X>73N04E{F<}pqP9Rz4pKE;_Bbcr-7QI1k#8xiO7l;e;% z&0(zdy79zbjo+4WBE;Gm2&^ajs5C~Uetkm+4lx#&OR+hKBf&<# z=?tKiU;tw^T7?K2s|-SjvkEH~lqdg+b10>-O5u#c26{yPRcN$mv<=Z#qvI4^w&-$# z9!^n`B9daU#5jgUP*Fe>lN~R%OIOf+cl=LqHtL3F_xi2jbeK zfgte8A60w^(3rv`>aKf`Pu#nWi~X1R=N;XYPCd_~50|JLn!@z@1rsxkprN&a*3K3} z&XCea|KJcky%swD1=~*rC>@eh_(g?s4{`5#&h|lEgZ~=*)S=KpL~VOJD8F>Y7_APrk-#17(Xk|v*TpnCEosRn4 zy9g)OfBDKU_EmLqpKlhYSsR%N%j+SD~t?qKcmz z*z>#C{fh8c11%$A0(gJ8W%|3%a{AOUj-Bmgpz|(1cIzr`y>SUQ&go=w%{xieWqK|g za$&YVh7X>`~~+teI&jkR=jhP?UW zPA(4g(wjKWvye}(S;C5WolI@1rD*)~boC*jE1}}cocZ={wmtF?x;#wu;CfcJ7;jyv zu7ydnmb0Q`9&LjM=BXVsie+KRylc|yd}kXz5~J%iX6cJlZm-{9-!%Y16>JZ_j) zi&h%KCZE`4= z?qbXLzJnPaB)oquH?)`$>+_Vm;rz2a`O-_gc(#Scn_F3R>tYrbq0WHVdR$|XkA3VA zSH8E6O`A9IS2Z(e{^|`ZYOnW}w#Q)j!ZA*5{Rh7P!YFhy{zVa|X+_IZoIvrYP zuyjF}|NW^TtzALn%6#eYF21(yd7gfz22*pv?&id< zCwTdxzh~B@FH@{vz;(3~3Vbg}FLL(KRyO?o-|?dZ%}iajlG{Ie1B<&RQx}kVRd9%H zps?lv`YwN;4O?H~hyPy*{nf8<*WxM6X$(*%AUAmyvudZ%TC}vg{cJk8lXC-aar{&R z+nT1cdetHp%$`hJV;=wNI||DeF>BWWqRofd{`dO49aCvi8iFEqO-1T}aPSn( zMMYpB$k$NQJcai5i9KW@9F&jq#?z1R%v0NWsb?{Fe{v1CuDp)9QyQqPeAbkuzM#qB zxOD7q*!a>z{Gh*&^s}Gfj%hiH-rV^I2!$zj(6#UuKJzPgaORt@vHQ#c4sYMYcfPlr zreB!L?8!w>^!aho_a+B7KgAEe^CX*l=d<>nHQcd&DRVlSDQe%pT|-1(hI!!TP5jOF z=XqgM9ZfT)^2r57!XJ}>bybKXzIcR#J6`0K=Qgq7t$G@7et_FQatG_y-oT>PDD#%` z{^8;QhR(mmTTeg9-edJl>YSM3{X?0Eux>Jw>Y6F8S;fV3$C&o=2F?tXF*UW+woS?` z(nr+iYAEJc5UKOr@YsF^&N+gpKwZ}qT5{eH8W>DiPh0V?T-?(J;&&Ubn8rORAsTFozfVI3L{y;uH)&OI_{tTsd=ysl_dHyV3g^kA!6C90$Mucf9ocYn)^ebMCs2#q*}q z)tDpF4qF$HzvmV@c7C4&2ljJt(tZX$yM)f!cW~?H<}zhfLfw*Drqo`o?>NdmmpJ<5 zlhiF<#*$?Vxo%cFEp^#T=t9z2r&&2GX5aqJ?Af`J**Dz6yd^mxAgXB~s%fA;8Klrs zq>$5Ka+qQ(Eo~jNUDdV0C@$`JnSDDCaWJ=>Ut2hhSzV1ZjU#TQ6c#UKsP_a5pZ@_h zQ6?W%QAHJ1eBgNZuZ{nzR=%z8lwNp8K{LxzQN31U!bVXE$P}HCabaUKc*1~J9Iu2b z#0wV_pp+-M0gRFY{MksIM>MNPW5!`jz{p`G>qv+KGU9+zDCHH5P+1*RMjlXtRxUgC zd+%ky5hz8d6d@2HBU?yjsTYfO7Go{irjS^OttSQ~HX{YNj3fZ03YBPNMXiag!9KrviK~G zc7kMSHG+tOtSy!JY)IDj;L~bBX+gC3>x}U@F!Fp5UqB^aWj_*MMHN+4@iT}rsFI+J z|D2gn6XXH{8xlB&Csa9{5?p5K-+Lc&C}k0`h=9VNRD_W{iU5T|5n#|*98OX|At+K5 zDH?;pr5cGfXs@77BIw~167l*-PUDndailn`WSTurqntrGgBI<-^`vB9@ex@@q0wjp zG!Bgx0}_CX@TCf>VtrpMh@iwPK^yC>?c*R#4LSv^Al4#I5bHdl(Hg`?o}s35s34EZ zOk4s9UL}kN?V3hOn7J|W{MNtqaxX~@|AQVT4CDbW_vEJ)~b6#Mv zZe$DsLBxXc3i&EYAx=@T-xFIT1}Otch{Ou90YZd`K~RWfR{3={&?&Gv2YH1Q0;HHn zY7|lvBKbNbs>4OKxUhyq7a4XrhFwV68B(ipsc#;g2vH*B5IhRulsIyUBL%|LV5~x0jY>6GgK}UT1WpLCU|r^w zLkI<|D4b}lXpAywjPDm>&}fv?;M9l=tWqEWNyU?BjKdmBqEnne>YVp_RMFAWf>Hub zylkxmZG<4(LgT%2ARtcp!P47$^C{F2Q(#$zbjdpP+8KC=O zH4~R67q1tb& z_%{bdv1u}0i>_x;<9^Qf4|A^fBIgfoXWPXam|W20E?(jIp-Z$Zo=t1h+ZXfywZJ6_ z17}WhS=wmG)lkpR{?M$HB43logaI;?k`50uEQ00T2I8wjN3|3>R0` z60u8(je#hS*2+)bRZ&G1RaEi8!ek3(ReZ2WQz#F^;1HBbNPnLv3;@*uuZk*u9+53F zWUseS5xtdv=XH*n#tyo2ZFHzQ8f1{3^XE8T$WdFIN<-kSFpkGC{k`Ye^Tr-do(w1z z-lB8fd4_Ii!P|E*M75KdGWTZgSpEaHo!rMO`?mA=j=Q*RN;`GYSWCa)29I-W@5}t~ zM=!GRL=7uHauXj}cLO(18>0s(`Pt*C5JkGnoCZYIwsY%5auG9IwsT9 zI+sO-dTJM3&&?}Wu=2Vtx|)jAW=iGgh(b+7p+=F<&7`v_q9%mF+E$uo-^4A;+Ucqb z$4yD;d^1z#T+gjC#J(+ic=+I}ytuJ}n)X@DzH2%yIYSWU$v5Q5H$dI&Hl|Lh!3|%e z`_Ms#m#?E~`gCSAIixg?n{K~{U{->3EM;XY)P<1t9_Q5FO?>~6XW5+I&OP^D&+RMc z(N$pF?BH5KX#piB-^)5(IHZ(;GagKXP&fUVDLV$VJEXm6>fIL>iF ziQaQZ*!k>pymq{w+#R!-F@HWY##ppAHIt~Hc0DU*)bje7Q=B~h7Ox)c=OfK6%)VhA zv&WPMFm&-8=k~tA!SmH`cLg;|BFxZ(BrRhdHL^g+lrf5vv>;QXMi=z6lj=!BR8&o zi%VzT;@R&%#*bR(GO4hZ*5&Oq8jUX0Q(vg3UZ%pCDb$2}P?t{8KeUwIwq<;LQ3s8M zU`$`q)O1c~a$6g%<%8_nyPGq;Yv>hddfT_=y+ROM;_{y7dF9#XcwwI+z3G=(bMqn= zcGQlyLXII<*Fnd^HQasIX7+w#3$H#4(l~=TpIgq%#@tU$$^9Lniqy5uW%<3IV*Q?r z47_lV11EOz%A-GE$#q{QpX*{q!;d2y3lMX0-)3Il@HEeEJB3~MRhHkpjODYM$Iad8 zTq6w~^I3i8GM?J`5^rpOolSFYX8qg+G#h=*{_T2Ca`M1-wl<$14V09$JVbwYFFj|E zas1?2&h^#Oe*0&cfAbyOeB)deOl$m!y#)^*+r^tN)bQBZtiQXIWVnysGlx06X)AAD zx{-p6&-2qoK+V)eOkZ?8E2eCq`_d`SUAWAt%Q1_mRz6mmpr(o1uDNv7MHEk7r1#2s zj$BHZ*FH+<@-R5^w9J^t(vI~kpV>??r-77m|2fV+{}9hVy_o}cC3Du_!=1OxpfS+n z$Rk1RG-fSb%(BDV$UXiNTQ+`_xzpNM(Ak;waZOO$NKJDqO%Vi{nEy7U1H|12*!slF z?0d5ex6EM4>V-_MGx*_9V{&ygFTaUtof~P~afFL+Ze`Eu2beNjrA-+nGPVk;489oI7=vqsIqe$u(yKe+>Bd;qsvaoOd1=D%XFfcz-T}R~8z3M=IN+ z0G*Ygv+MhKPbTnrM-=arqT-3bCHi1NC zoA$bXm3<0Q*{p(0kx6V;?}~>3(I{m=3Cd_xpgrR-(vWvZUO~1=-e98wHYnnZCk!MM zC`pd8M8pz!(tv~1dHp-kag{!0;TbQc+%hHU$ z5HPNnfWqtQSJF*LKe4WYCmHzgN=etM`?cbH$zG*Ct3!6d%4_TY$%w+e8uNHZ>ZL}@ zKtZM_I(iO3mNDCu2r=+?VP-4=5!WS-@AWa_qpLXV1kUh^lxi$l?K~ zDmAa7iYlu3*@H?2VnG=}8_;Ei88(DrNT|IMwO5k2{>zq;1<(mr$2eVP^jjOC$)m7- z%3N!l0TX&F<2KaUrjP&ASS@+jMRWb;}!N*L6ADIbs?_a;2H|p+6WugVs$;%)R9s{VhfaQj&f=!TSX~x zl**8n1r-ZHBIHx?;+|3@Xi^1E3?c@c2IbpRX+Oeh?PW}*EJ}ILQi=qn5=0B34p7=F zoyR(W6w+957F1ai1O_6|d44F0JJB_YD*Ht7|LGcBt?mMLO>j# z%3w-DE);Cwuz?~PG(@EkQ_i815S50Wd?K1aymte{Ayy%&fh0mHhe{)#J~5DJj1?l$ z1QK8*#5jkF1rr-g5)i~1QUykkz>)?D1ZA|bI4o9yGX_)y$`coK0>(m67NW=zMb>ZH zVl+6TNCQYM-m<^JJFCEx1Q5^;v^8j_{d2GyYZcZCsdLyg#afWKjHD%;l+bz@heZ@< z(L_!|stHtrCdM&D*(qG8NrEAQU=R}wVU+if;WBy3DFxYYke^3mwbzY`d zWZQc~^NNpi>*3R!-LaQLuUw*h@+6lUJGo#drW}QCo5$q2H?n5RV?24T%;l43IeqvH z<@F6jmGA!5ASsfZWY6X|>DNsxozZ>`ck}NN*)MlJa`6F5lrT zG@&<%RYet5yjK)k$WL9yt#aH-EI5$&KSy z^8XbB``(@egBrk`~6_NRp) z!~_931qjp@^Arl#s+5rQoZ!r{T|9Oy<+91sHhVtHSKPqL_wz6S5g{%OGL$BigFLEO zkJg^^Sw$69R8hqT46XfE#RmfsNK+UZgzj$S^5uyss;HuhpF^0oB`jX`aen3gX}~+o zFeQETp4!Lur@qD49($HYna1>0ck-*By_*#s4NNWsjMufZh;}4lEro(*aBz@;OT7#M z1uFI(48|0hIcq9SjYsG?ewn>{jxn&l6~|bGdtvzWHg@fNg@^VGF+6DvH!YsQ^)n}$ zQTH_$RYQG46O+SMCQoXlR(VDBYeTSciT?9vIK6WpeUnzP_M^9O+u8-pnRuCZL>-OI z9V}crjrv^W9w#6_nYyVr@(XLa*l~G38xQQ_*~ho=@m1{?g|{^XVCg=#oqe01;k)|>iEdlY?5We4J6;QV9Ky--nL2j~ ztJ@yo8*luO=jY9&ZT3uN%`d#e@0)s>TRWJ)xQn{*9qzHvPE*&7e0pssZ*(7K>)xGg zd}#qvIYQqa`Jq)E+8jr!PsW+w;F4Y7O{ zjOgJ4(8J{`r+Dr4^9+<~X>DwzvAKS10w7OdT4`^qp;$b}#S3THz2h{=4K?2F^aMDo zacM|HO^(6R6@~_S=}ADw%y64x4Rs5aFu7(M=GbX29^TIh)=~t1>@IT+NPLOGv#;{# z4|lWgL<60RXL9SRHfr-;8!H;dJYS_DFo42c zDN`)u>AYnY#jCw9g*x)Z8X8oJ>bpY!aLmw8Y7t>9B*qS3VDFE9$hOxHalYXrthwzL z+G`6F2m=);OumMWb!(aX?0$AUv5RdFJ;JtIXH#E{=*+*pHGbMqxk==@?&H4uUgzAE z9!|V)hATV2$0N_(Kx<`RKi(|G+D}e(6n4)MTZkB}%0d1A{}9`?~3`Nw4zG{FIUJWK#QF z)+{cv^`(=%_0~!DAL!-osgp+6KU|E&1C)r6_#79`9AWFOUT$k?qEM(Pqg!x296Op| zgJ!N@HJPBo29QDe&%ec{zkigyZzY&@GnjYF5}NeYi5&U%E+)^L&n&%(H@Cgcv96%xD}*+z)~+MhFOx3f|+rNFCfW#Ou#K<4FBqbi$Z%1~p%5tMUi z=TO#qEu;!rVAw%fpv)*?Alr|S1hTzYg*b(C8Yc=7<*lbGjS^oQrM&f69mEg@=$t|2 z637cu(2&)mn{a8;cR8{;bl1FLv}^uTswu zTr2LKq11cGIp$Uc8Sx%4vY9c*;15Ln{RoH%+6Q?BfARNPQTW$cMnOOeqOt@^s1O81 ziBGS5Sv~w8001BWNklUco*>A(D2N5IDWWqA z?%Iz=DsiY`L6se`W%kcpWdY7F)I@TN^bZEmB+o z0wf4xC9ys%@8Rw5-rL=^<#Y&KnW$Aw2sn^|gtGlbatGlbKe&70)DXz@$Hp5AR zPZZ85e4_BFLW#$S#fU?TMXKO?G`1(j_84qWg6*hKN(ZIypbQ-pu7ko=Daw?*)Z}HL zC>(jylQ$tW14$mJl#nZMN)bdTMPt!;A8V5Vg%4mtKU1z*(jW7g(MU>6+jzA z7nC~1DuX9r3lAQQ7Z43KLAqm|k*bF@A(S5P9nL#ktE?*}l*Xf_h&E0l(PBP;lm;b2 zP=Z!MMv3@<< z5)$zkFZkS(bfTF^$rVC}L3> zEeVy7kT`=#388`Wfu^+-HlULxCYeNsX^hlSp+w@r#OtRN5(E`*)gLP^+xCc#2jAve za6r)tv3h^?46^OP_w&*56MS>m?{T2%nOd4;Dd`QY-Fz>f|J){a|Itwn9@@{FyLWM* zyqPTtR4-9*gKc0B{|zr3>%#W0xT5CvN4I4a?|#hl1k^Q6$w4Hos>(E1x?}<*k#9AAXJ} zkNz~vvMSvd7)%054OZRG#_ldwF7lYB(JU|+VuU065Ao7-N2rUi{)ScbFYCENqihdd zwLxxNS0Ot-%z?f8dF`g#vD>y`xYloAMb_ELmiOJx%4dhkP8?XT=ew>P$8~&2nj)I%PS-#H8SyA=8@d zg0d0IS}idcr6`J$$*BfD4!d{({abc$?`M9Q&nGW3vg%>(e()}KRF<@5Y6YDgRJ0~3 z3v9E=II*ko-N2nc&G;)%^U8Rg;o42yG_aDj-C!>9;b}U$=~}UdLA8q`=Y}{xdX6zR zu#C5R9Rp17W8Z-zJiE6*WDToUbI-3Y~n2T*ejeG*h12g-3>i5-1Z23N76FK~492;*btIOkyP+qs8qoiH=Rq%X;} z!K8+2vb6J3fP`8_QOzvQ)tMYGD9ejKUzd_~R9JV{COW%n7j0J)1xX4@z&nSv@5WPB z0X_7s7^Jszg0YwWjQ{ZKBkcXy2YB$oySQ!3238ML(XCOcvc5q!{p^2D@4zN1msU@; zY4a1u)!pT7u7!m){Mndj1w8uzo;Ai=K#;kg=aSAQTKcXCvyt;#5kj<_k=X=sWZ0Ob z^&1P8SvVzTaegUAEL@shoXz80C)N&@%N30SQq!fb~#%C>-QI3I>|3P|^|&5Iqk#1y%@7;eAUq z5EKrL4+@VETE=&RMgZ_Y55FXi+$+ixv_X;rQjMw@sCazEKt&TO8lM@gu24vYTvf@b z(DWG%Z)kc$?i8g5>m_;`2+WHHfaqZ$2AC}g3s|%em&4NdAY5367jm)Rgo_FZV8KOR zaJ6SI@!WEli%uviltEk1>XLf!z z$&!1zwaeM1xphHe{$_t`&4<}_W(gpK)~y1mn16Sl_gOHQWbo+qUbdgy&aUj zofMTCxv!Gv74ou1?knVFMv-gsLQ~{{GPk%~;qySq!IV(3;I*O%g4YU*YMoC_@G5$j zl0YyZDv(GZO+ZwMmlER(Oejcv5uH@2600qxE^*344-$nZGy}RR@tXE1&=9H+6of1g zXo&`Fi6xF~FCr-IP*Nb;Bk0x&3b7g;6hf@`iuV}nK?|x#QOycc)X+r*XC?{x}L@!+~dbLn_D-?ow!JtVnBoZ(|@P)_Zg2{y>7qAMOfGZu&76j+;UL&ei zH`WHFbTqQJ3f)@Q4UN!f^b^NxyIM5qz;-s@^HKiWPyc8B z?USe3*Ib%r36jvY<|cOhgJ0!|L%+eFy?lTpd!FL=zxE)%_V^ms^`;j#?)oVj6NmWj zm-buClq_Rzil z1V?a|8K+2%CY>9l>l%e64w+?@~2lm81_8feJZ|s|*q>tO*_b3nTxS8#j z>JWb$c;}fInILcENVQ5bFo;pJqr$am)22xxos(ZKaYYh>8~RI7x^Ntm2mnxajc zHb1F|LYWGcu9e*W=tJB!be>m^yv|#PpW@43-9o2mmOr?b`SINqm9CYnyZdAO?79YI z6%sSTp$9Ax7eJ}mhh29R{ zJ;bCWYd5i?ql%F(iWn^kgX=iw-0%%taNaICW-(11Bwk z3LU+jWYsIlB@}6zv8=BPt(#1rKEc^j#~5N8tAT5^Rp{ zGQF}7Y6esV>DO!e2^pEZ5;J%ZbJ~z(jbcxfN%uI23?@^96JWx=CfV-gS zxJsrwSXK+@nVixVl%=C(raMOnfjoCO7pQb*^e^v5UE&*6cdcY_`=k6!_jZ~c{bZ_S zc<=MPeC#ymW^&3P7?q>#evXVx(G0zGhG=y0ZZR`H$*KKsGSMt4{RpQIzQ~h*(qs=y zkE}&8%=qa#>EKFME$^YfmR!2rUjcXOAg9ls=4eBM?xnxm&~c>^^`%Co(#7&#k2yEP z(6J#-96g7GkXE|s)ED_}5dnh7S#WQ^(f9&FeS+f6-5egB=6q3QZQ8~1 zKDk2mQiJNFrz4}gT2hRSbKteN7`FVK{{)r(2_4^lmhU|ENBka5CO`K$?|)!3 z{g=H4mqu5+=;&Y0@|Cmu6oMaMSx-vO=ReBc|2RYKx?dzA>0eIY{qJQ<{~L^)I>6Zj zuk-5IL)_C1HSL%>JHe?v=UMjnC%7}8;ppBW-rVyX`zG(Cv#Y{@2H$Kj^Xl_V_uNMJ z+RY3sxIg^ZF!iGcIM%c@e9q{RUA*#FzfakHsp~Su#HqtfFJDKW9iXSPioW3ac?FUT z=~_ltrb#JqMM2Sokg%Yv1fWS?IEpesr=4{7R!J7SAG!nzZ8Ez1x-rQ`-hVA-`%9ez z_iktg08k9x`+jaaKE%C;zQLY@-{%KYhdJ`pAbq_(bo4Bvf7M#nZQ0J2TW;azjoaw2 zbYd=@%{Fb?w0Rd<_`sGW%L{2eAOl2eXPJLKUrnD-YqQb7J1$5FL^I(Kx4GcA#litv zzqeSlznJIwML}dqhAx(N{vQi=a^V@c=yA!Cpe`|;U3q>Zts`F)7A*+~0SO)ako z5(GR12jU=Dh$Gu&h*pU4sH8-t z2AvsvRw9*v&om)5gw)_uLm?SC6&j%>7zio3Q{-Ngdqv@d(nZmL_W>88VYv@L2=ju0 z5K96qIsbDJ^XG6W^H=X8TqH7t*|XkaX&Qlx&Zd^z&z0af4&4n zqxsg_b2dw}%NNVR{5iTT!n`11_FkAfCTUIQXJcOGqvG)`NIG{g@wn#l5I|=0JvVLb zJAi6k(-6QjySV@%?k^yCi6)*Na3WDGfWi~M#Vr9RvD`$74g3)NdbMfOrp>kEJvbL1 zzZRqfl?zHV8ig_rNgOIzL<@m{#Sz5fRZgh`3Q$2~f<}c5trVh5z{TgMv7jww#UYi1 zkbzC(JDmU&La1weBM=09;_#-4OJ?wTCJOo-lrB&@A(24Ppmo4VAPItCiN@U-5XipreV1eXweg18iMg7P`ad6X|P!4gnJp9c!a z2vPybK&7A(&?%x5MC;Z|B#zb|-z+6wTBGTU0@1C!Dl)IA1&tPm!XaUH>JhIIB#JmP z8yf%&APO9WTw+~HgVo?n>wQ)Vx{x@EeW8#NN}FJ9mC|-nx=ySlSP59|v8l&Z97zIR zdxElH1XUGmmnQG5kau*Ecl45{9W=@cO`FjuGMYA}S!jw}D4Igv47l7Qg-4gfU?Fpm z3y2s38YeBmKv1nJbim^f1%?1BdK@sOBv7JUiSZ=~CAmOTik6Tiq0ky90b?Dy=`qs* zRR}=~zEh$HU8QhI0ACVp6siQVL=O$I$Hi)k!XwUrOAx0KmykHn-l1B>Mp>dp#U=!s zfvq5}iV7MD0i`TT;xwQzK_f1TNmQw!RD@gzIk9f+K?|fRigFDGUwVuck`jCg!L{ln z&f&eM^cEL1niQ=PwAP>##2Zvl=u$|Uima~4v?5EPVgpt0Q3QgDx)Y@X1V?m}6hlD8 zdq61?Vi~PEg(-!+2;>%&3wRr_-V$_)r~-5o(G9fnh=3MNiX#b-JCF4x&N-S=V4XwA zF{+Ny4U8*DR3Hrj9~21z6NujzKZez+%QvNI4aUS!mP>zavyk8kt{~5+89VtJdv@>Q z`>zjksQeDQ)|afTZDw76C)F%LUrkxcmeIHIPCoqkPxAb+KWF0b71to?TF&5YKh4KK zdxkRkHs3w+D&P9!FR(#A#H07#$fi|2bQu6oK5>S#$M&=DwcR{7c>`M?*utG#`!DQt z^d+uwnvsb*<8=TFZj_PXDaOW2{6NB`o0+@l=vc*_cdp zMhb)wIKTgOPE2J?-nh6I!j}!2LbkyFCAnWfCZBWZrGgz(B??;)bk8*OtQ}2SS7+UjP#Z8^!#ECa~{*6oqzsZM$~ zKEOvG+`$7I2kCvsh0^1F!SKig^+rLZR%KxI3KG*64YX;~rpnJwAyj545cY;#T z)dg9GBqDVl2YwkM*rvm4j+D7kN+c0!@WmGCDNt%!n^#qZ1CL6`j2u zWSKz``&|~JKvn7KoBf8nR4v^@Il<)F!#wx&eqQ|UxB1RN&CHq}9;nnXN?+VC``tim z=pLXm%ji%g!*+_Z=cZ`p16-^LRVj_?>LfF*k*F_4{X7$sQw+}t7)@G9G0ByWLMMi- zvx+E%n;2($Y?O1r3a(u=fHu&*oUSyZLkh;rY0jRTCeOP76MGt@lfM2ArYGOz=z$Y_ z|KtRrvqq9u$dZ&S)ucMXXoZs2C_r57ns?LFyMmjy4^Vr4lF6|N#z&`tg~LG?I5)xR zkp}g$gTcWS+F3Rxv1%hFhOTO$Eo5Au4> z`Lm;x&JmIflUAu^DJgK#ug`2H{gqxm^u%_2?{;pvY4B|gEi*@eV&pUvlM_sMg_sUH zs+#nQg(1VJjLr;{c&0`t7#|)dCk}CQL5`CQWirgwv|mFU{q(|%|F-eX2F;=4jOGQ6 z&`n}8I%{wV{YVP|s#Fp~H3_(~VD!W=)6)$~xTt{X`cPxw#(TKqkzqc(dp}>>JI2s! zKj8VnLAKsD$RpLIqN@csM?QRpsYaa!nu^M()iNrT?80j+5F3#)ZhGiLth=X5Hn^FM znZBf#;VU8rNqbnf^?vSo-$5pZ_VS%wd)W2ul!0|C8C<`Phu6J(4OgNP((VCP-E|Ke zR_?`T*P8LGM{3KcuD_q#H+_l2r%!V3#C~2pFva#A9n`XM&W}#<#<4!Oy#Icx<1b@I zPVvUcSJ?UHB%8WB8R!iZjcJa(aFT4zdl}fcoXUdjyZRK3^XHfhfs%wYjn^EFiIwVfO}H%)zJoS_kit!d^OeZ7U|S;i*n zOt=B6gSWDMQS^Q{n;OlOlUs&OowzsQaSqA1}q8M!FJ!fnqjEqduKonIJKoQ9Sm zkhJ1M9ECS{@CxwYQ8=Qv0foW|*bwW7We`;GM3DfkQzPI7@8eWvL(mW`ap>4tvujU4 z2(A6c`Dn=HiN<3djYCJlfHFAI1g#)ZNFqq8kW`S=qtXJMCa5$;(h`z@H=3X|K^dGF znxr&+N;70Md`i1kTgK&==gQYL8uBlWxmM=(cCpLeVxe#V zS)9X*=Qc2F%|D;l`4e#&*#xjOHV_9$Z7sHT7Kpo;+qtx2XAh>ee*_3Ie(oq_LB3ks zUbu_dXrUFi@PjEv5I53VH?*#GYmI=k1O!niU|Ye_5(tFQn)g@&Hu%h zYtyDpn;$*ADsZYvP=Xe42rejW5VQwzt>>&4M91e~83NY1_^uBE7*Mgz#Vd{18tnrF zh4Mh~1nn>xn2N(>LXrshSf3Ce1c`z{f#72LSZ^qiAV^S1P$;jF*5|QPfkFp#A~>z1 z&>?7)ZPiN%1ncL`s1-pg|DCYrNAqYbdS6Yy|K@fL1<4qr@O8 zMRW#I0iEH+U`Yth;H_W+=nyc$q5=e5>ztPqDjKP)5Xgd%V`#dREF@@3R47rQK;e-P z+dEZ=#^#y?Q36Up8BiLO!ilF4MS+lq`2GSxVWJR0S{@@zH%@x6x(6G&u}Olf1zgSJ zY93#61d|hVPS8z)&XKBM(<-^{rb!RE>!oRHG|GfVnNTki8l@pG6nPOSnvgfZ<^gR3 zsS9K-K=2p^qKKo%D~(pMAV`Sb5oU#(HpDU+tCd2K_$F4wYp7koIFIrc2?dC5X*5bi zd5i>7Jcd9NX=t1htPUtc^eADnC_Xg;Sd4e*5D!EyGeALbC}q&fkSL8d z5PXJ5gBDOxR-%+AD2G#)Qk4Xy@PK!*uIy3x(h*7!+q&O^LyJXW(W*p?Lx`TetO#1G zSQd1=C%pov1g8S3WnV5yh^FQaQVp4c3V~|yq#+vMs}MbH1h4Tpg7@GZ;sX+5S(9K; zD8wleuP_KkKr4sR0vh6(5*$!^iTxfXiRV>2g7PRTXsHm0;u;?mHb8;U2tw&Wyu!={ zAqileA}L$JDzsCGQt{dIW5i|qw16lWNYl1x;Ib4nCPp}a@;F1ILmYVKYdrbQr}@U- z5hmXJeZE>7XL|ZQyzl1q46a(w4Fer?SFU{OliroA_|SjF$G&%nb9u?xD|U=#=yV-- zeePGOcJJ9e2yo(>~JWcIW|D4-)Y~+?6 z01s}O(R1TWPB+QRDazsRv-`*}$0s0;^mdH>uk2;#nq{om+DRfpwSOf&T^)2UYUr=h z)xVZ|KKc-w4nE7#@v}_KOfmV!E{=}f#w}$ZE49VvV;nk|!^#TlR#))N^PD?3K|QxL zXO1&+_!;&NO*7gAs8i%e+5Ll;xjAW)uJ5L51SFwj*#O<$oh@NRn>IfV1VV63P1Kp0 zo@S=nV5|t(L}Cs0Vn_nj&Ft8;nmg8Y@VBFrxcnH0CN^-ZEayFIG*@Ap5F4HrHe$rc zSb2S01VZp|4QtL30>L$y96rd4FYn|_Paa?>_iXQ|QLA1>7jOuR8r2AZkH-?Y^r?JT z(FydgXJuC>%gsp+S-*BW z%hlCX43^TxK>r{&Z62h$?-Y5XNh2?Cu;`r?T#jv?Ii8RM)X`aF8u41GbBmYaa!OMG*T6MLWLTi<+=Z@hSdH`UF2=#wAk@%!)K?#(M$UJ1Yu zKN#M}&)?wwYAdu;!Nc@{!MefOW`~zo*~f?NK(H z*s>rxsMaVQFX`n`F?Fj=B;+M!9?RoG!{<44D_m#1EwS|(T&0V|Xo9m8^%Vk>opGT>fn$6`c^i)ze&@z zdZU*R2o$!!dWRCey{Tny1e*0EdguR?N_cE{Sd4#pM zKFIsl4ZOR%mjo(nS-p0Ia-j3tMNn6ZbYUvnxZ}>1Jpbkp$A*ux^SNU@v1JRr;RNSr zrr2+9bg>fPJeaQ6pzXiLW60ukZm5n?Oh6%GQ3Lc(hhG6zq{r)h=^OiH!YMjG@WEARXu zAK82>+n)Lxo_k?8ukJm{@sp#>lqE%ehEp%U$(t`d!;?=B@sIx1=lS3x+t}EB#iwwa zHf`Fx%S6ML_^6D1PV-`c_>_-J5|(YZ5^cfkY9E!W$uA@y_C16#DrP+eD)$o`4AsB@R)Z zputL@^;kuP`Jfe~X~3m{kOj2%ST9Hdk`l*euu-5GjG&c(2!SXbbdumyiqk1*g-(DB z$`pJGiONW%gT!Yfg+>wZ-r}vJ^aVHtUht~*K2y<4fsPGX#6~y;BIR}6}(QcD6k505brU{lHnjY6c#NOXL5X!Bjl7)Vnqm6cabmQqENEbfI2vwjK2-QHS1uz9c6)llN0SQ=1$;%G%MvZ*3gQiYtSVJRMG-pF0 zG;<-(g}eza4`}ajc!F5Gad`1~4Pq3D)>KRs<>4*Ll_)6*!M5JTh7wJV!Q&C{@Gg2? z^@R{hz*@vLQ3SLq5UokmKvoelA4pRn)D*UAaH+;=ja3Sx!I_jYQCL?2v52*ZcPMA$ zIktviD>#=?x(c8X@t9B`p}>$~lqcwfAPSTtNV8Q}%ds>Gl2cN|I%)8D@9^H?d`an` z3{VoVDxiWzsS;Fyr~)k|7(t~5o zq7?SWOptlyRX>LW$(uP1@9;ARdHE~9$=dUjCi0@wI_kZP~__EnC@UxT?-L z>vEi5AQrO@=R9v$KIXlnEE`1Oz_k%vlX7?uU;nc&^CwU3;(No6+*j%Bs#5E?($RQ) za1=H+w8f|ZjS}kvo+~|5-zCI=S;gAH8ms!7?6squJXfc_sS|!^9a;gH;Ela|`N4~? zbFu`FW%%H3UR}F}XMbutAL@ecD;gYJNdKxqw(Yoy4c|D!p|LSWCnuQ%dWj_xpqw7& z+{^!oFYS4YSLGIVJoG94&(CjRbH!X$nY=Ou){Sy_IA^S$(bd_@4eR?bml~n6O`A4t z+Ppi6K$gYt#zj3WZLTX?Lw7gQ(E;^3VQdV>$6~Kyn>KBJg6W`AsnOMGi0wDyOxC9v zo^Z@H0s3*y9QXl$^_#!RAHFfk)D0iz;g9_c|MF8ev#gR)G3qKB3nA9@6DnDBE9-q= zW}?Iv@BBG*{^T%Y>jL-u^1o(>f12-p>8t$VU%t-EfAsJ8+717bv~w#v`mV#5T<}oV zaizmK5T&Vg_h8;>yQnc}HGf{Mb$GW_W_=;vddj+uBT{7=lWA(iZlr6bb&R&OI0s4_ zI(mE2Nz9#_KF!4L|HyCr`|tDA5l!bU@8j40`+vq`Yb&hoOlT#$YO)m@S^L;8@|o`) z;!pR!z@8rr^5^$%;)zXFda?#%hhJg$m%qT@oaklM$3MeQee7`_=!f*82U&1_-&6eH zE5FVE@r|QQ_5Tzf_|#AH5C6e~Z0)L$r83OroiE z*J3A1QOD+W8pMvv>rJInrIMr=rU?|-GF*jb(Bi$NEFlEYnIX%}J1?Yl|pq-8Hh*psC|SgUOk|MIAU^Z>l{codfg$LzE1k zIL)aeLs$l`+I0NtNP!L9_laIYQlX3MT^1EOsj~S!cd+_}Q}DtF$NzRGr=M6$-{>J4 z^)c$Vf0$)ig@GGyXZ4o7tS-OH@&CM&q5D@bwK}Ic_Bwm?J$$rlkoC32TTiltT33}+ zph=UwD41@{8=+kv5|WNp-0_P)&t1oVhoQgS%dS8CV_v=Sm&i8vF`y=>pMI66|KBgN z_jEVQANdHM{gn@KLq~RTvC!WU!8Wl6pJZ=y8)0B8_ic_ZnPO%JFJO8%v-u2PchD+oke?#4souh`P%xD+4?SuR2lV6JS<#V($0mF?LQGW$RNG@Djh zF>^mJnO~f%MR}FDZv5X$7s4gh4a8In?t##n&MhxUz7U!HH@ofGJ|&i8F580vB>h zpA$kEJq$R9$Ib~_oeS6yumr3Gj4CmzKvWZ@a~Xg{gGXx9%n?on2IKe2B+Kh@h00d$`{?KCzwHw=sufZ(7&H4}GmFDpkIgMbQKx6eX0|Px z-N-zmtLWN|JosPyb2@Ikj~&mv#P@a`=V)9WSzN`dt?R&3wSroN>q+O+w5M6d)m!`YEZ&X0|7=JYWR9(a`} zpE<(IXJ#0mc!@9n*8juQz1z8W(@HiC_S4th!^)m2nN|xr*o18LR@UCIg&jR_@Pnyk z+`4rGw`^W^fpK>57T1{Ie7&Sul(?eK^z=ENe(4B1k5A7hJhzuUdzbN*)o|ZH7oDlb zq%|tl8Y?=}n8wb~Xv}bK+JP%*)JHjXU?)#Mf11(hS-C-zvK(Uft^?ex3M$L9B`+dR z2o_s3n4BD8=*&qDy>*l~4!y~1N8aMd@C*%afL@l>GWseDA54zC5xp`@TPTYLlOy|i zZTIs$d)_nN%0g(4QyzJWukA{xm+RTomC>m+WL@;uD)d~1Cz!X72I*(h%0X^eUZXfx z=g^y{nYy(bmS3dT__`4rSFQDPb`&ljW_av0ub#0yTGiL&`9O88V{p?${OrxU_`S2o z7&?85y;HaHU?*sS`uRib-~By)`-^)xVs7W*_dm+#e(E;1bR<-WZ@YI)F@qgD$>H-Q z<9?8Jy(_qTl_I+ym$OZqHf`F>BhvOV@OOn$F?4hwiGl8Jn4BccOhcY8P2HwVo1X}p z6h)JIW45-RkR;Ktf0p{u-5hxSTYT}ga~w-<=l%EI!^a=p%9`#PnyaXr`*B9cb54&L zwrp6=>i#ZP^=bU5V{&YmW|8BF9XQuU2g|qYV8^46a{u~Xmhk}Ze{6)t)HuKK^?iK% zFTO#43dSKx5HWO`zX zyiowvr70KSDa)M6$w{1d=)UE2EnCSNuBI{fGL$)-JIZjg&Nv-ZDg$iX*g>tP!H+O< zdJli~hflHV;Ca~iL2i5e<9u*)FDojsFW^ep=}BhJo?v3|MtW-%y4?(+KFq1rpXQOZ zO)A|br@r|I{BG5dh8$aO(n#OKr~c_J+;-P(+_I@}Za)_^ubz9E*I#~yFMjV7Lt}$H z_}NEz@1u9IrN5J!TH0=TF>=Pvj5AZW^lVx~jVsAE02)k>o#n{hqjbOj4tjx(?oQTh zUQgxeH&FF)8dKw(%VA^66>p%TULZ`JPI7h< zY}e8?7vg~_#_J^$4bW-A;N}f1{u`IBf*6BYvzdX;m+5Dex;xLQ(Sn&~fLRYQm%!Ts?@CB(Xjy6!cnz2Yl_sPH zl!J0|jOnpqMuBC#!)MPT6i}gOxdBl>lxrm>CA;Y%Hf(r}^)J1_vA3RK*NKNIj!)2> z3fy$>3aXhwSGTZw-B#{f_f3u+d6qX%-OZ7KoX#Wrk-LA6j=_EwHAModXBoBC8(AYS zQZT_}V~Vrmjf^cd-_k8}HS!&*Jgn_vHJPNWGcSZs5K znZcjt;eT=~n{U6J+tyX7=qqOFhl{t^(bqXX(~qQ^xeo=*aO}x%aIVOxe(;mr*3(Hx z75M+zd$S-(jx*2i_qlsSW*uGKT}R*5_kE!OG|;#&-b3ziIOH50(`Z&ZYtlYR50dFo zrYD(cA68~slgajBY;9&WV;s&bmp2I#Bte{wCeUc0@B6;Hs;jz=%#3jNPY<41T~&P$ zAV3b6lRvAdi0}yaaQBGx@QC>S{t?m8$J}`fS+HUaYqmYe1CPGUcmCi1#Ia-hIeTgw z*I!sjKfh&VY}2OA?#fBpqweNv^8iir=&8KjalBtjE0#28t(?zpab;SVH1}&w=Jvd4)ShO|G@HkWdOHP` zrr zK;3QOfaC;|6D7w>J|!BEil)(3D~jYXi3Rc!$vtEqbxl!%LI@C4@G6wi;Df<81p^^M zh?>L#s>DcBEYLjL3b#ECw9ef0b9Z|!)tk^Vji1iU7_Eio($vna6mGAp%v?`s^W5G( zSyQL*u19L_bjS8->k8@monF@I%W>Lzb9t&>W-)brw(^;3WqL{OpqV$N6=inXX?=QK zOf5@mgQUM^-3aL+^-j+WX}=_eXbA=kr~xdBCE^Hb)9A+0P>+_H8tPG~d7&YZI!aw5 zjSwk)i7$)hBn0n4VWRwSJeYa-5oF@Y$GI>lEN$rQDzskNluhY`RiVv?7E48e=yvxv_PzG4Vbkto>IA}7g! z3aS|@Rn&B$+D&P5ip&ryLQzdq76nm*Ed?zEZA7$)6s4yu14ZdkRq~3ER|Q)UoUyo; zP=Gv5TES~Qd6S6^N?#B|gG>y$TEsdm2BdE5sHYMwxk9di4k_t!5y>r9Efp%LS}-}d3Yw>N zZwu4RA#y7kyeLL8tj#c4hS4g<^Y-Vw}U^9cu1eHLLfDjOg zL=*7>MQJDy3U8>3Qj3vV4Af(!9wM~@>J6b*Pm^ybnF%0@xPZ6<$xEVj7?a~{#2AQC z3F--^PK;v&9Vco=pab6=dLgJyqyGm*N}v$50AfIb$7sYFMVyf3h6;wNHB_TTyEA;% zQRV^fk{}gpQJ14yMYT%MfR7EluczxEG>E|xeJ6wtLg++s*r3E%B3fW6lOV(uP$|%g zhb(z#C__ORO3GML`jS|hLS0jg3yo+gVg)0WDPfTjnj&OLeR2_kKs`9>rK4^wwKAhn z55{68;MC({f$=4#G?;pZOgg0IkiugAS$Z09E*XKexes&bTfUvX<=ba(_*gLA%UQJ< zR?Xh_Dl7T!6QRA`Jddg2v+aAcS> z!wobxn3x=9|G{gV9y4@Sn{kROoIUy$-_+|IncGE|6HL!amap2(SD#o;1t^D3a_s2) ze0~2Ygpzt~ge#ZN^8V3rMyKr73Vd;u_ulw1`Q-uj&Ch53#uZghDC>-k4RiJS70z6^ z#EHwdn3ytjH^5xxb!5z$VPi4Qf^)3)TiC*10_7@yqYu;@*>dK0S-J@_V0omQz1I!})Xj`1a3x z2y41A<2Si{<_HIlo#04iJ5TO~I0w!l)s7_iuU3=yf!ZXLC2j8mZQA@ZqPWEMo5P&B z(fr+47qg&uJ}Y`#ZM0|T{3(td*w3L`5xZp@+cvCa=ir<>o`Do-ly!<2n9+)q*ST`} zCT||;VQ|qhR;*ses#X0|t`*eIpWw>q7KXzB{r7pas1KcIc<34z=03#w`G$EjUOQ%V z4=iTM>eVdn09eS1Esycs*a*juf1Mv6d5xD}>1Uv~k1s#8fSmM%cz@EzlKZ!YN@&y= zI(&pHBO{D;ETDJMRvuYirMKg5@4G$(m1QhnwuCKfx_SNm8bg<_F*-Vir{j)#zs)2Q zwGqx;o}esq`c|!EaP12ExR;@OL`mNC?u}B`CK=v;kSn*w81G!jl0{qCwIXL;)j;hU zqZbbH(>)itGTzIIRU6p8b2H03NRPBL@HEQwF=jfA(IKw9`zGffTFkn|RXQhcGJftb zR|cNv`Ar>c>>1(M$t#QmAar1Rd+6w2$>y!=7@Xfh_kBd{^~;Aib@B*%uSAOOb!>lN z9b4D*(>?nFh!JXa$`}YUu8wi8oH@;s;YkN*JGJb=Rn^!r0x#ZzxS^7|NH(~+BMGm~*An%^XhOLY47}Vi|U>0m-aM>Um z`w#Ql$Tcoqzs1OS3G?rhT4RKXTf>~YX((iX4Xc;2W_9le_}zR|1dBlcjoS3C(LxDD zgUO*GCVhzqq+=f0;KOWQ_A^djxx#pDh;tVwC>M0x`@voV)Gl*lWP%%$om6_3^1zN| z4D@y0dCC4puoydsg*!gM<4>RG%H_*^>)jzvzgr_)wSmpEcL%T)%)$p*Gx!Rt`%dz1 zag)<0t}(i4KC!>^KIdtYp|fYXc43$~kMCk2Gknyh3M-h*kXeubI(n1Z@O6d)%r*DE zyni(eS><-)Kie$pV@Dij{&v=^TF$x!`#64iAMd=io3R@WRxe(_wpAVEnSkkMVCf2W zJkZB?zj=TYCywz>tgz;MgTYTMW^RAS9c{35FQI#I2M@3KHaE{sGIHYv=T8q2HZ8z% zFENKl<2YyD+K(<=!n}2B=)d>Lz6(ky3oh?JjrM(t^$%?3Nq3y1r>-+zf{zBBzm%oR zx3Yfo3YISDp?h}5e?yp-*@Y^^(7aovwpTUaC$BS6uVDhDE1_WI_&XdMsuI_ImJM4g z?NyiJ=nZ9ZnE2?wCruHD|+ zq}AfhUY<=e<}~B*WcYo1{q_`_yU#wF&N##!N4g_mI%IK?Ezw(8bdnB~_JDl`KYF z3}hN`q)8-zEkS*XAu~k--YOv`!9Z+@25R4lK`j`GKm_B7wj{U$$s=k!MoKJ!pi1&w1ja7W4vxx>3i%N!?3Q$F}9G^MFNz)^OU}(N|)|lQ&hJboc^ab8~N?)QHG0tFvL!*J{ z5EVq5_Zl^bSb|toBncyp;*Amo#9EAXIFlo`0;Ym7IYzQHMFAg@z|t!*hE#szQRBgs z5KE#7co!*rAl)J?u~b4Klnq0po>7gC3XY0eDr)e?pz07~vDT0|gW?d$!Q@~ocxNdo zshdbG1SYL#GA7}N#5_$}!BEBJJ=lB>E^E!JIA_VM!8wCk1Cor{m56F267d=-eWZwi zvWOG}8h8pKrI19J&3;z$=42=pkcc-HZ-tnF<_abv60s&?B@omjQh?SW)`(#OOqM(m zM}ulc1PqE*#i(GSAqoBhVq&_!G>H%#Hrdh}!KtEoM6!Tbu%ZMM$eKp&4on4aB83R3 z5@RH}p|WUnsAi}-jJPC{L4k;fA|`?js1wK{F%RiHhlux)!YhSB^fA4UiIM07u@qF( zZ%8$ws;DVYsiUR=u7R2Yj04R;N<>N|I>Z_z6T}(BO8PAsFg9WxSQ~LD&XpMJF)mGF z9Ev5zjNlzUMq&&|2oOt5EU{6O@HYgq5V0{LMo<%pCJ?hgu#q5!5F;^yMnR(?VnLc- zo5Z4k8YP-Ylt?g-tYhS^w*v*a!(@A@F4WLM}~vVqM@Hn91H+1qC0KEyz! z0whleZQA@-Pgx`0JjL#J-{JksW2kdv-Ah4X)?YpL&A*CwB0}s)ek(i-mS9YfM~xkGJ+-;GHWau4#0ZbuMP*n#HV~ zv3J4vxkJ3^fwCm@POvR3;EPWzCj%NI7dd-$58r;}>Wo+ntYOW-%pk^3a%BHWj?4yaf!2sFLKI9 z2$6E_0817x;;~3woa<=RA{w>1--jimqh|?QKlfF>Qu{}~{r)BPzVc)2@IttLkqeh68L>-v z{13mxf4gHbYv)(*_{jRvP#R;STt9V;3#G>lu4L)pN>UA@FRhfBWriIptf zvXxCIPO$RbcQ}6TJr1AW#fj17^!27^vO9=1uAkb^`|ls%;6vN#@62b32JXc5Ze-Pt z$Y1`8!(90LeH`5V8sF+z$hOrluy&54)86$+(Yx?OT`K$NOkWQPffy^Hmaj^1R<=4kIAi|2RHcbDvGQBYpF%0y96hyyg` zPsX@@;T$JV74!lFKn06+8FM-XGve_f;KQ`%;@}BUsGB)-8Iyzta~Csr%dbG*CnDxd3LOlP;d%d@T$%P|_~UgyoXyO}q*ku_Tv;rOssATVz;>o;%av2DL# z?;F>-aQrM+x38wLbl@JIAe7?91#X-_%c-#r+_IhQ*s_KPR^H_W(Z`H6md6#G8ZN%sTC#l z+51BJz&iHwoJoz+zUdS;5AQbNK%0 z3mkswn@lWtn%=eRSU)$7?FSii7cXJ$<69Vb@g>e3+>N=qhC-IHZFMKTbME|wQ&~jM z(uaBB6RSD$y%QX{e4aD=4m0$^0R0_uSIvw13Hk z8<#xi-@d@-*RG%=&+cqnT@M{|SF(LeKi|LAg*A?QRbiVpZQ6Wnv>Mq~o2<#kv8U%t(55kc3vEt-D7P;Z1yMws z#&R;VbTg8ezS7KR>lm|hF;xwcj+>1A({E2mS8i*+R#;2?g2>b*FD8YHfQleunz0Qk z$)mt+p@0x*i3JQ^Eu~sY9F3-6prMX>bkw4u7R=PV?xEoWjWSXc5npg7J*n; zw2XkFq1`)b)27XDFSe55s#QeN=S7nV;y{vRKVl3aAQ6>FkO(FarNE0J zh=N+8rU5f07N8!Z0X3k;peD7$8o@+CNXE{YRa6^52^t`hjF7ECt)LdvLd*np(j5HQ zkOfJG&8iqYMtl+wx~74(@fceYHA0r5mejY1ARdg>Bvc?E;t>;w4q`NT9Kk6@9Ny$O zle`~^O~(4hIIOivbm76N;(Wx0fbkKFVvS&|MQnyh1n-H3CzyzD-cO~WNTE=G7ND5a z4(pn^sSF-JpA)Eu-zmUoloon(0x zm*=Q+sI{my1Y?NC5EX(dKBj(D(1=%0&;n0LlX$$81QWoN7$QytV;sgg$eM3n9Y|hc z@*I-|T&4t?^+rIoOa%`OjBlW21KVhTD!81Gc{B#1vFW{p0j$Af77@j$!Ceh*ALL1=bhI%-42U)-=RqRTL9hm&If#~k5Jf=)63g`4 zG-;q1qk=|@K`|0B64Ky28k+e_-VW-app+yQKr$>D&O+2O6;wd2#b9s>6{A#xCsPj^ zDaBJt#M=nA*;f z)Pb6Tnxv^vj8S6bcZz$wcAkcF$p|FR+oKe=Y15|7uanM2qfMptL$u8n!uqOI)s zsq(lg9eLJtF!gJqZ#_$vtY+7|J)FIMgu_>FadHfn|5BmW$A|4(&yMF;@$a7a6THHW z*Z-C`FBkmWxzu6(RWUhKSMbc2{|%d;zQFkl7dSN(sIGaOFD)Km*@C&u%_~&hFY_St zs}Q+)^Ckxlo?^UVS+RCG>(?%&lMi+cv}x0(O`DH^NP1p0?T-+@H5dccDzbPn6a|cq zphH7RNYJKDo8M7NV3@rx{){)@KEx@X0e$TJ{AYOl;VrD5dTw%5@{DT5fqKe%ow6*M zGTxh_#QPBr9=O8I>k~L4il>A$TDWTRY=V#vKE{I&4e{UXJIud*`v9-)Il{_it9bai zB{W~n&t&2#$Ijp6t@Dfd)K`Sw&e=QM4NKR86>R*&|DG@I|39d`^)Bzfk@J78-OT^? ziG{5Cr3S)d3}3s*d%MptzG@Y{)!82JAdH`3=;ROh+c&OqYIF%(KfQ}T|I^1;%(>xUC_H*vQ+q`z8N_FR# z`Q*;+Jiq*Y<-Em>D;GJq=Pb1~tC^GEX*?Nk9p~EdAM)3)4smXB6+2#doIm~Yqb#&0 zje4I|u+?t5086?y>UXnp)=0SUKG$ws;i7Nm-va?OLg~S!Z%7!x6eSn;?BV>1c?@sp zVZq&t{7bQw3R#}xv`$%0QrEyNM+X2j#wWP)i+x<39B0B-=vSXk(_IJSpG$Y8pS6n} z)$2DIofv0mG(hXT)JCWmHHJGEGuJeYvP}=2^S1E^fAI-UPW+JX96HL|-~1j&AN@1B zI(wKuyW;UQMu$1_t+$X>f6DwVtC+IC{?K%@`jIDiYWxbXANU&%zWOppHZJAd)&bVt z)neRp`Mq~Jyk{ThIu^6}5C4oOc5Gtv2m0N8H1dv|CF_<_d1Hc!TXjZmw8j7jn4l3R zxjZ?*>duTg6@Uzv_4DvozRcn4|g;mwkUVqW4UUj4s?i>He-e*^{YQr)fy~`LF027B_<>D>R<#o^U)rzC*SG%BQVk_9Z zN@otH1i~ow+7MSp!Dpl&G-K3jr}^GLjIw#xIyOJJ^w;|y?PF$O<62g3+r)xzUEu1T zH<BxeqZ=i0}P*s&UKcaK1V7jfM?{? zF^-OYkCUJO@96B#n0rUvM`CC&IWmD=xQxDqi&;eTGHuhQO`BgE&3^w>_g_-O+gfj} z-tl;FN5B*_&8L++jHIXLv!|I%Z~sMa2TWf^Q@|auX9r9#Q)_+Z!mRDW)Ha>Io~Gn+ zMyA4by7jh#+*u20v$l-jQz|V3c0<$C075{pnhc^GBm+5BvM4x>IHV~WNcj*EFc!6e z7=&OYjmjH@5N#3+s3o>M4QR{bKt>QpG?I+ZtTv0R7>x)dW3Q;mW0NW>Nh}asb4^BD zF*4;{K(%>BwK+A*+qwj9*Gw|(o7&Jk_HEL#wl+=GYO4lkakt>sDdToJ)8`+EyQO;% zrqZ5@qd-e|ktRi%{+meqx3;HJwwRGuX)d+qNJ@w1Vlz(bI4QU#{D_FO*1@JI7)gSF zbX5vTARy`5GGNW5e5@0K$9u)crqQuPqVvRRk&LNLz+kA9mfRb%7^sAT%#=u8fGr{SSoP%5 zBheETECw_qU?_qkNE%GpIgh0QCZH~WQPeqz9cZW$u|!ECMlTj4SrS58gEfjsgi_Ji zyf0`#V~`U2}LGk zL9k*l(P9cg8<9o?pJkABlIMo1^@s|kca+`{Lq>=hl2tKzhRFnzMS=~@(cle63|<7U zP$)E_Qbh1E60}4qQ0hb)s3{SfUPwC$SW%3#gxsX~#zJsp#*kH%N-k7#L&Z5twRjam z40x>*LxGlcXcU-wi8Mm0PaDxJ;zRNzpdyG9j2aM!M1uh=lDfK4z#CMO*Q!S82x>6V zV|nBgQivr4uapIprKcT}#hV=UCF%>*JH(ez222srGGfY*1cpA6++PKV0h>hxA!Zq+agZyDP)I6A3;_)# zA{HUTs9=-JE=&j*4H!uR7{ya+a)T6#f=0bSoCBX>LPj+>D$WpWz?(=gmXK93!QxCt zRfH}LRA_)P)FdDhh=7pl-6`Z6sH#sw8;N)cc(HgfU=(XgVr#M`rOA>9F)FB5&;Z1O z8oLiHVeOg$J~`NYkc(s7yl|YOd*0yC;L|KwUGl)uQBGev%88MuSP*aE>*HjD z15FpSJuKS%1fTfxz-Z;`ymaOzUix;8EPjqJKDLqt9hO``eGNZ$lGlFn3y$6DVa0R1 z*w|_5G5{W5))*a{#Fw6;QDc0v!Sqs#?IP=0$CFR3v1?&Q)+%H73i?;=;ww+T%zxT9 z!s&zidHtvF^Z1`^r`JI);A_K-U)sx$e(v#e26_7FQNHoB>s&f{iQz5XjFz`JIX0g~ zot8Nj$a>~6cilP$zVTCD{F|d3d~F%a=QndDfH8>8aIV6fc?(#$WH~EV4zg-lFP*al z=bh^wWBr!%JpR^?c;n_f{CxKlEb3oC--h0%(Q?793kNuH@O@qyd7cN?-k?#8aAE8U z2Ts-4GVcOo-Z7ppU=b}1LC)NTOW66@$5?XqJuV+V!QuVK8QZ*(P6}=uJi+yIH<|P3 zE-D=r3;+g`_b_kUAMus1R_OlGo4mCDyZrTkn9u0Hc!DRlEMZ|sGv~z!H%`9Kp?CK2 zihGiW*DPnpl8^GND%iPeS^MZ0_#gjgHQ$QZR4@slIeCr26eu*4-JnHfxu(U_cI z%tw?-8jV0%z)bg-koEL3@az}Z_LINi#-)>7Jp48XYb#h)0ekfb*Df6A=(6W{yxK!w zD?cV9TeOQ8zBGyJ`YGRf>BoHQ|C`U`7hd3*hgLJ#+00FtVC3BU9C`UW{N0bPGO_lv zeD=A|@dr;WrnkfLkzQ>yY1}%?`QvZ#omVdMZqJX%Dpj@(KFo%hE-&wixE}h}KfvRE z`Bipb{~EjRsL1IEcHSnoKKBRwtKj(h-~OEAFa9InDeIUoJj3=S6>=wlr<}OP_5CmL zv)z-_1~%}*v&*TvG`iOe8ly~%Pck%~R>SBh6O)tN0(xfbIGsM`bT4Dm`cC#7s>9?6 zW8>F2ezL}MJr&3dG{*2ZPH^@{56kN_ua9D6jN0fJb&~1TVhsFrZzw{CH>;D_1XO?b?o+_2>HOnD+pW?&#z28#lQ*cOJ`jZKUfiVyqUD_mC}m zmf!o=V^n)z>{vg*bVm=x@G&kQ`U&4UxR8~f-@$`BSKeJ4)}~FHHXkR+wm8wc zd8pogC#ETUYSXm;Q|Rnuv>8FCZEP-2F)NKJQuyur*t9#ErhCDRfONFmtF76zJWt0Hf*tV^uIH zj(}5{5)Bw5ASM}iYepLbL>uh#-(A6(NoA3K$Skkfv}TDiWGT z?=~5{M@eN+(ldTk&1fX)jIT(A1CWT3bOjq>+HzAUfRIL9CTRq$Ha$*&7(vCLSfW@Ii^n2rQMJUF z){E$9V3ROF9CfwSH3XmpJkcvQ>iNyTU)D5zJo zjHr)j^aKFwh|)o@7N2zxT*PNe>6AvMWTH4pNjR@ap~>r0mLZvtcjUx8-Gj9-_|g(f zo64Q#80RpV!8%Q%0g1%cJywJ|LOnt)3bg=b2t-PR22e+s#8Ti)ASdD^Vk}sj;mIfs z6q!Y1PG%f=2DuX|&Qfs>4UrHOCE!Cr^bMq_BSnGK1EvAoCvMJ+>^8SfGUH6<QQV?S#y*nx~V3L0etQ*V%4 zMFRY15|7uN~*0qoetCq}}fRR$@#N4&-?{SJuMhBwDY*x^Dl=xdGo{wwKIEo@du*}4fZl8 z2O&^QPEz;%tk}MdwOPTUY&YLH`aUoH@Q2)(TW5anLLS-PPo-i3Lr3o*t9E{ZKk$+6 zUmW7#&{^Jl?M3{RMGW+G(w#f3LhoB*axH7NFQLy3bNcQ5oV;*}`&afI$-(c+Kc}|}h;%apZ&wcR`o_YQW zc5Yn8qG`6ly^jUkcCc&bGkofi z?vqdQ(7Gio zm~Q`^&12r+4nFzEf5!OYeeAnD%H=&T@(=lG7I$~h9FU&i25U*QX9x_IOK zO>XS{0pF;cV4e-su8mN3EoRq419Vky8)l2iaJ}o;`oyQnI(q1tzn{ayhdK1>7^A0p zncq=O&(|6VwQ=gEm%)b~W#fu|7IuEbK2z31&*CjS`k9Qn^sw*r7^A0l^UC)wbEqSE zWYpN8tleVx)LCx!Y+&QYZ9Mec^E|nBE_0j4!m+G#>-3xKKXsim#}0C7|7nJ6B|*<} zX5UZv#@|e^y1S2!5A9%Ozfiq-oY(fAXLM*k@9sIq<>*naapcwS^TS&wxG=bYfz3~` zZBfqr4uh%ArDN6eyztqpu=@<>uf56l{!v)bWyxzdnD9C4o_T-;JzdjAsu!p%U~ub` zJePHlb-c>Kp$lAi=O_H^(tdiYImSdnImyJ(Wp13W;kJH;ryqNPT|2k3ebIE0)Q2Zb z-eltRYrJ`QnDZCTaPh?Zym@Slv-zutOu)7GSk>FZ!tKwndG%se^xp4R>8Q*bWXYqy z$7erzn29sJ%*pQKdC|?IZ^aIFeZE3u4sRbFV(j!Ay!O3oobKzQv*NG@G?w^@NqqkX zR&U(M)@5DIG)K5{{1m4SoaAchX^b4?-Q7nxUr+Z9y|S10UUvL_aghFs#q@1u!}?{c zUEVXTZJEQ|g=^UPxhMJg&|8e3ALrK4RSxbs&hUnH^mb*$;TsISca*aK(-;}%>VcE& zKXQpi=@~ozDtq@{qDDIA`0?Gm`@_KB4lSjll3{HJy@P96zHtjH=H51L|BceIl>WgD zJi2Q+drv&chE>a0(>J@l?x51Oln0*L%JQqXn73*@JGS?cXLk{2w2%xci&^u?C&6^l z-M5FmXO3|8O-tk4Li*=cskn4+ij7ffjV|VGdzfwO7qfVN1&R^E&|%(s`#e`K9pLzT zdpXyL6yY)_cK?_kS0}l;WC`y;#DpkJ;AZPhq=)>&oACT&-rnsXk6ja zPyU)0Zw+v4?M4=@TF>L#y1DfBYn(ZEfm6qiaC*;8hQ~cV9_H+u|Cz7-)j_)FtYO20 z>sY;R0o`Judp#ST{tTZO6^h;0IQ8;(`RRoP9Pern$402He24{0m#}c!{mA6=>07;t zO#=_HqGO2b``_TLGgUGXj0r>^DaWoca-%{1$XD3;_%=2#pYt1juiBz&)CXl=XKc9@m zwKVl#o1y_tRhr6|wzrrna&uv7muYp@N^^Qh;;t17=(O`f+HHFD)+xG6bY*7LSy`Fl zZsF4pF`cP>n-Oluofbn{b#S|^cOGMUcr&it4!J`xFeM~_sXEjo5=i?*L?S}Oh&Cs| zN)isFc`{RLc^xpuAXYGzG*3IC7@{Q6K*XWy@S2PWLrlvBjyjG>wbaB>Q%6mch@ci6 zb)UQp1fR^ayiX$$M<3Awbed>@!ayNNX`m?{0B~*5K$|vgev8Q~U37FL(EwT?T3})W z(FPJdVlu>55!Zoe2TH_C18*YU1iUSYHWH;EYC#M|YS&A_*BqwikeW-QzvnD5~IUU2K)qgZ-}LX=+G=6F?mCXQURh=x)PAcz5H z!5YO%z)D11z?gv5fJVitAvYN=I&5@^Q6vZw15l!PP(`DnA)+B9(SQh19AYfk3f3t; z3luKU$Raft$YR71a53OYk1f)DIFLb>lXnQxk?x1S0W=K33T0`UVgXBZLUa)^$rwC| z9@BghVM2xRC`<&Q2mw`(l>$Qz%LF+E9VU_yaiU~K$V{Y^Dh&r&bi|mG86~qqZY>q- zsE8xbpo|5oo}dk)uVZ`z+bA)0kF7<5DltzZ^M?>YBZ%N!5(f%k#Gqst$*|-QGop1+ zX88Y~y*CTC@Pl9cI`;TwVrk=3v|Zg=GsPxTRzg56GW3d}MJg&3ooQvp3rtgis6&VuYq)<5yZwPtS1?DN9dJPtO(xJI~_G%Wve<=iSHez5E1E zKC_>P*Y@+VCFPwM4ezaEkY3`2BZ0&13YYJM!3n=!zgKaf<(2O~$rDdK&$IPzZvNQy zTzd8{PV2uE_Vo1hyt{~$83u;KzTf+PKm_U``pKT2o}U0~ zy#C^Mxc6`WoVyNAIJDj}O~$r;mv9*XM_4~_4}bj1LyW?)x7&6yC1sF$nbkelapV4L z`0#aCaqVU2aNf!Z?#)YkIs5V*{N~yFx$J9S?+ zgB-g5FZt3_E9|;%2g|EF*uSzydih&?XEv&M%>q5@I;WPZq z+HKtV?WcLf z-^ZN~KgXSa^E^9G-NABQF&yn+aMsoQ^k=T(!d;PkU^j!84)O>0Jcg2m$LtwkMNH^yoaUf zKcpIrn53`swQt?Vy=x!m=FgnPhd+Auf{{AN=nRGz{5)U$we6gB`<>kN@RNM~Zw{~; zD^@SMkt?pcmQS4@HXSo#aK;r}dCNImc-eK_d;9I&_sAnW_)J3^ml;Kn(N1>nzmlsy zdObJo+rcoF>63p{q+X>y<63V0mGimmiTily-uw8*U0>!2MtE@CGfi7@>bX~Q_0QeF zHJ9&W-}bnvEm@j!_}RO-@2ih<_v5d#wl?Cd3ohm?fWuF5&mX_SW2aug^Hgp2jAhqc^7l>whwd8j*1;4K$l>6F(3cJuX5U1 zckqon?&q6d`Dln;pmbI-$#ZIs59<`Q?kR;ktXi%-#1s&UbEq zm^B8(*wVBRmv*u1!q4*gTW{jhJ*({8B#`>y$dkhydGgEL{+G}2(DQ3F8T7m>I3M7Z zN4f7WpWvI-b}p0sY&-SzlZXa@uu8rCa&Gy;=Q#MyA>);g6-obM2@3_)QnEZ}%!2lYJiI<)`lFu0Q!QkG6@m zLkF2mVEd(4bOY2M<>)habKCbHX8V%Dz5t``oVQ6d5Lw-QK9~Q)U*!`|k0CzEcbh{T zxbrJKay`h}0Dty*4m`TfSvQ}>j+N(m^*eX-cYppMYm*6Y96ZAMcqbQJxwDJ!RSrM? zW&Z5>RaUl*sHz>D_YuR-x{R~8{e(mVtgvIxey;x9t$g^&PjkijXL05+!_zC(XdCC= zbSu}sk!jAjglqPOw={~quAI;QoA+?mrPp!I-G9M1?)nZ--uZ1F^Z~0&6)AdB?mzuoo;>tAho{?Fy5K4m`Jp@b=AHL* z|AE6C9Pi__eQ+8X{lPc*t3P;*)!nyXr|e+Y{xeuXfNh+43_jjAH9z! zUQ(V~I*pZ6KEx+}^%~AQ_mr+J3lLa7`wA}onQPg9)6etq>Omg3{XxF-?bkS(1F>q! zlNO7ovwHS5eCEIU6c_EP*gh<8343~adfq*Ji+zL?KLo@}hkoWY+dT`l)vjV;+wt!B z&Qz*c#vjKPV{v=SzJ+J%%r1In(htnDxv_Cf45fH11l#18?V06mBfff2?rbu2Z1uffPom|hPor>s2GYm7PEOv92Az@Q z-C{`6vSdY)lqHwSt3F_cZ9P3bA1q=RP*o#vLrfuqV2EeLj6gs&pjALMk})8WHDD^V z0;U<)7G#3^7_p2e!P^1e3}6c6P*i(3V5ny_X3SgETOuvBBuoUQ^bN8F8dF*iv;rw) z+SpRzROYBTb#jKS6D47qaLSr0L6wncgy=|g%)qmerezkQ*1!`;HZ*6{5^5Pkuu$Pq z@mN6^mVQbSaxV9G1ymi6hJ^CWlAQ67K{G)O7f^=^B~qabN`J{Z)tOJgTigwI#SpR^ zSu<$BRq>#piYT;dG@586Ga*xvsH`PsCnh1ZUdEOq6tPDgkwHKaaKQ|67FsKLpFL}Z zQf7Cd$S@TbVPe9%3u`7E=|lr~CQ2gE5~T%csm#jg_vWZ8LLg`+s#5`m5Jia^3F4@f zu7G$(yah8z-3a_CK{B*;v=uxGWWlpw88Es$3aB6=hz3+@f((EG*Z{1?tBA)~1Oy6M zGVW`T6=@ySgd~Ah@m9#m$!$5$o=V&BawLSqppkV8!yM?@DYGwPJ6Q)Xa>IfNic zP+S$$G6M%P6Lxwh6fW0zY0r>6fp+A61i*WR4 zZ+z3!({mi0=J5A8@b!PeZ~uSK@bu1G`00QAoBYq$4q1-f+pHf4Hig%}!oTCdq?;|n7 zjvcUT7u0q4^`+;1gFEDmA3ch{_8J^IxHSaGmXEqWG6LnB)jPyH8Q((teaGU2SsrKn z=<#-uy(4jMF7qNkT%21X`H}N@;`rc)iE~Q?$MO1+<2+6hCy&#}Inz!Xk_*SYqLc1l z*OZPI-^@=$2_X_fAcivX-7(C)%6q<)5^Xlj9q(SULV4jGLL>%3Jd;wQY067{36U5h zb=2eD`rR@qCsN8}2M|O+L_h*DmT8nrT27iav^kUA%RAS3oZWG8Kpg{>I&KZ)X(rny z^NA8R1Y$L$4zqF`X6@jk{NKO*&-uM?PH3+AMZWmY{~5orw`EnW%Nw|Nadq`q#Zc1i z;+Hg(LI=<=$e%dZ^A7;yw1hIw0G#4VB9M9PK%(zUA)V~KB5=tau(%sFLJUdU6IhJ=Od>BNy7 zLuWR261=oc%cQimWp>wccg}U|bKMY_@UUnnxq&mYi&U-3t^WBT+;Hv5EvaQQa2L<6lF^ z!?1S3v@Pjvrj!$bP>+aFiN_Pb%$V_g>}I?;k-=JB@8iBL;Oq7xYWn8rsPk>xT~U+GGc3?ApPZyO)p2iRUTJ z8;|k8ZGXjAZ@ZIkeCIWM_XX@)jhuSMd8}UcIez))FXQY}gwQsO$C*%->w0Ug0hIfO zkW+?OQ;kLpb#o0I_qZ7+tmYH$08S96fLp>-%hbyKpqg9SZ^=c&+ScN6{Mo7Be7b%z8%kr+?IZa1BiksRb3Ob zs~;(0bJJih5Njgcv+{V)ehq}GB9u$x*mHi&Njt{lq?|a;w;$JxO>w>h@xePE=Q|xA z952ovEk0O?^BszBMzK47XX4w)<2w`Ij5o{U2-pGa=0qWSei$Bo@D9H6m;ZjV=ltf? z)wIBI?i?KJx47+P>5e_N_dn|uZarp$t<6goJ!bBi*IQ^5o12$x@+i5{Zo9Zkk3`B^ zy8WUH8JmUOnu*Qj*mS4&^876BVoX*Vez)#M+64vg()L+S%Ba37P7q@z6c8K{V^-&G zMKIvj%{buNjZ|910@iI8H<$^IjQ%=}5M9PACw?JHx^Ez=#qa*nmw58I zXA!qD+J0&XX^Q5CkXz6}S!96dfL4sDv`l2mgb5RwA~Ge)l%U<(GSVnaYh+v_ld3%H z1988=u_GBjnqdOLlpqx`M50z8!X&RTwsqF? z8dIB)#ff2w5H}V?N>{5qbPl0x-5t;_7kcc5sg-i^&yblNwq(Zca?j~6nCs^;6)-(8&AQOU&2{NIQhCxUS zH8TuWz8z>}tSgLVnX#_W>X3*qA~BN8kVNe*ZJN@wQ`%-q+fMOj3X>_)EleBouqLf8 zkye+<+m>Kh6GLEV!Vnm`5C~+g$vVVzfO(DQ3Rw@xI3Vi*ojOsE5bJ})`VB&L2#rT* ztYVsLrm1F{YuYBzPK7iT@}F_I^EGIFYE z@+xgwq0K8e6-!G;SRNf>Y3T@~`Y6%Y$%iK7gH!C#lzg~hTQy*Ny~HVl5!;4K)Po`I zaKzeh$l7qk`e2A6jFPcDO{_GDLCPfHU<@3JfrBw{L<3_FG$}I8T^}ne>o{xftm8~@ zGMP#fm1HVORGP3hkeZ6rMsgFeCgQC?v$6O%;p+`Ho?z=!eA3`4p{o3^|LVWzvMcv( zxtqOzImvK19U`C#gFzWh^8F|p=;`U{>FIe-AXG~`IP?0?^XdEkAC7$Y8SebXT|9He z&0G*GmbM0eS8#GW=D@uVur|Di)wB1pOWtay;Jt&Vh9ggYg>OCiBHv#+kDvOvTexE9 z3fqsh(CO*v>FIf=sj7|Nhg-i+_PqBIff$jM6{M>0?c2-j14j<`zxR83eyk`VC}HUY zV^44K+W55oqavVzSO<%PnJ&!k!n_6 zIB^~|QVmPEjW``R%CmQVg|Gefx46S^=GQ)d3pZbOF6Zx9VreGup2z86%M6FhY(Mo3 z&OCb`=M4WP|JR+*a^Sm9@zAR`ar&uHYdJ_H5UN1fTE?w;6eUyxLN%Csy;-@7LGD7C z;n+BjOEFxHb^HnZrWQ0-C>7N)Gi;m$5wsdIJXt-?(7-5e)Fm8i(so=rvC1*$>#ZP4 zhy%jbiak*te^f-#dWq2q>vALEw^Vij1PKG`liq9>DZ&5^)yc2HIU`n2+s$y>>1aLZ z1X{A`ICsOASJ}30Cp&VaKCb5kS04ZXAOJ~3K~y1p$?sstelEG?6i&bNRz7uPLaLS+ zM4=uo6Swc-tZf78Ac)qi)SzWZxi_VVv^EU4eo}}=q>c--G#n3sP;WYKZ|j_k5USxx z+xka!k>y(v}-z~@b(un2N zRkok8o82F}h|$0Pmwfrb=Xv3tyEyRc=TS3cxeA-^m0MXXdl-xeaGcp{;F!ec2ofq{ zsEC}bE`?AJy8ej(71hZqvZtq~=f{9ko@GFHyAyEVeCu;~^B&UTzFAz|dM5UjW8+vb zpXa!7%$bGq6S(QzTaMfM?AW|{d=>+B$9wm$7?XRK^w#u>;eDYCvx(v{_jVS-&-@qS z^iGvplr$wtO8Q0aZHRaI!NE6#8E%f7;Rf!4n_(`Pd$%s)O`{B7%g-#!Ghg@amFhCT+q5zW$lQM&VNhA@-TGp)uT8}gqXgttZaZfBTozEf= znVOa`S!r^ZyW1>S7RZ8{5RKBreW7Up3Z1B-xetSQV~R@mY6ajJ2psI4L-=^K|`xfOQIn$^_D4CW)(sS5*ScX6A4kkD#W{4V6u`` z$|B$b>KQ~x0uymscP6pos0~=N0qYDHhYErtW@wuU<034J8A5{4*~vq|vyh3j46vM# z)S}uD!xRYuX807m$#`mUQ#@x`Uxs`LcrTdP6Ny$ z?iE--Tu>JxWzZKBrB(&agivOz2$d0{fjgcRZ)+?$xFCZ-tTTf=B~%Fy+Fcu^Pi8nM zW$=*@pfW@whLZ)7bbZ~b!UqGS8h{OvW`yKUNEsBVLTSsXf@XkI;}*e$kc?`Y8A{7i zJE8V5K54MjVBX-;uo`Msqq15JSgDq%t7V!nLKvbpB%}c@LNo(uX<{O2qQN;@1&&5# zJt*s)Xke<5CP$_zFi~ZaGp%KE&X^fEh)0x);6g+Yhq_b5cm=2wQIUX2>AhPvEM+_! z)@Cec97DDAAwNiLedT^)M9N70YingB^`4%do}Qkb9}0;>MyFrN$3Joz-#zjufBwiF z{K?~&@hcae%7u%gF1}l|hnc?d1h+jrMfaV`W&2NKxHZgs9UC)Udyd<`b`KA~w#4>* z*Ybty&t=bYMJ-zc_w@Ai^t|KLby-xm?&*0y5n~Y#3-c00O&OJ{r>Cc< zr|0eAz#+c(^piaP%p1IM#?N#0MSIzI`tk{kbOkg}jaJw`T47}r*?!$#{`7$ZeE-lJ zym%P$j;+V^^z>{dPz?fG?YKvFYvk<=WGez)6>(_^A>O@M%%)MC9eR*(B$~fyTv>7%)Rxl?Hn6N2~+%z z7snzSp=SRV{eNhVm((0*X0Q(|B5eK&`pv7 z4&q*n6M-Ny7eJ_XA_0{m8tB}(y!p&%Jv}`=KVht$k|#wpAlabJ#W=q;q8V9Qyh`Mt zq@j=kG#HH=Q!}RS)L!}lk;u?t2xX?vs@%IJC~kq|NvF1b8 zeTj7$G08O{Br0#vwk23w#AWUT19ecqQOp7vp>@HsA<__aTnzcu5X<-kyv>jrJUeC; zIYqpQWCFrObs?6ch?kkeEyS zRYrp$*&!>|1Q;t~f+QzqLv2E36k~Yl;t<795X$x7iVz?;79mGyJ6(!}8f1XV5U~+5 z9ib6I9iR$m6+jG60hb}3D?EiFc1%u1W@rsdVam`ZFm9kNvlxiBEUU2+mDQ?drCOq@ zR;j`Wl>xzq#55oY7#Y`=sU(b+i8x22vKEE4sEkEf??eOB6q!n(5oJnCV@|S!Cxa}A zMbv84Dxy_Igb{LCVn(9jTBOGFUj3{MC&OyPauFh{76Ix9iMMB&I2x7FBvrc}>-&uP z544`1cbT{PwWOz~=kF;2IE72EyPl7~`Wg@4_85Qol}ET@`D%8LcChmHp3ZL%Z&^R| zJ-++sJ$!xbJU+JTOg?hv35U^pkK(lJukgwZ(>ANS7BuZjvj?Whw!6EpiPkT?2TeiPft%z&s)HOhH+{cXG1GR zbNOyFw`x5h24qA7T2+J?K5#GNdwP0$ek9r{c`{{^Aq;^yH=R5=*^r=UT@&JvFgL#I z>FMd|IXRu>?&78ZNHP4LZBmT5W^CMP7i|sWWjH~+B}N-?0@k(xxK^IBQ~>P z@O|9fCuNrYMv1$^bbqpBy=PRDP1goW2_XRjp?3_SBM5{dpg;&kdY9gN73m15gx(Fk zNN<94rKCS4A(e%!R z*I>?6NIgf>7MnBW&--_KJs{p^zXx|Sey_0^1-%b8pgowe%SV$9ep!9w8FstmW*IT4 z67}7XKj327Ib~TOAH8lmxIO4vlWi<+<&Q6ZxzE}Dct`NZ;BAPdM#I$GcB{mdb@^?{ zA8B|>c+SqK8F{@4qz>~${ch2EAczivc~rFXC@6Yw zAT4Z-SB9AG?{GwDc`ilbI+AjGeSGm%D~}*|JXT`~48%n7P(BitS52Y&0QWDfQ`kTY z#Mj_W^YRaW7fn8ArEYH%dZ?;f+!O&A1PC&}Pui`UThFp@BgL(=%;YwePb3yakN?&F z;?BqO+41MLd+}-_o%y=SU|M#|=F@OLY z-6T5^rT;9WeEYVcc1xnY9kv^23x+gVF78oqW|A#STroEL~_FB)9d$0@RaM4wI?d zrT0byYjI*%^AspGbAMUhs}e+P?Akxi_Q`+@5vN3plD*^|5ju&`4}rdXUasq0IE$E_ z6d)P^I~h(2W>a>9m+7n4rWiMccy7G{V|A6>0V$HKKIUO7H{~3Au5v zz0OB*tygeu)*L}no0lB=w#!F**>4_UpUQqmPo~fkxx44G{ZO$<|9LMu6{BhJ14ly8 zg6j!$Ck$LiX@Lf9!hAVdZv9Q9%SmIi?2(a*qdoDF%vPBFqG*j^9UE}6_Vb?HpdtcC zVfXjxarZ?;3sH+S1{B)e&!utaC8mn@-r8$`!tkZxP)JZRani$Od1tbCR`bZ8yci_n zQJjMuyE&aeY@_$mwiVOhDHI(@4N}^p9a}FvP6Wk$hr`{6bQ_YhzyRfnP{{JZ z7U)okwq-6ai_;i8GZ!-rSxgZpPj_E6ora!rmm^*W$STl3b{kt8txFawYv|&@iXiS8 zgO%f1Foe`7q<3@47Be|KMF=xnu&V1*Jw?~U#PVMu2W3!9ntG9bqZ&>!jD|Xf3>@jy z?a^Xlw(iL_X?->2OM>~=m%hc=fgia$&|@WoqQ`GK80YKD{r#3*bOyAG{_$s2BkYXt z%QwD+RWx_Ky|go1FQY!RzNCH9xRzAGydu3Xoc(6k?ak%ogSAPsyAQ^GCrz2O^+3O> ziQ^s*b@!3fyQC)>Wmk*6Qax1x?hh97z7hQk2(K=oTtVn$N}86VAEi5DwD9UrK|FlS zJD&UaRjc77?VewRt^b2QV2xFt;pNZK9(oV99+JEc#y4$_7gSs6Y2r8#7gCuv+c5HZ zIy{i*G5hMrobfBFt_Ew!!attPWuW%(T2k}2$JlV#!eLV??&GYX_JMkj`RSKNHQ<=m zAuIK7Z61k?&aHQy`_VNI0{&&yTr8Hp+z_ns(75>vSS?$IX#`NG;p>Bbwyctzz8*g5 zY~|`^uopCHu;@Ih=<&CVQyqJvE*59fWBD`owULapooutiyTOL-mUZ9M#TZLp*TF7a zjgG;~Cyj)M9s7Wk1G0D8F(=02BwPZ?9y>7LcxgV{#?s5t@-)BeK9xV&p`FkRMzQlg$(Wkz){;GVSe&Oz|rEm91QlO zBzcZ|Qu^zGdCTgf=nT?tG3Khn7%c;L9H0EH%18HSf#0%!qp&_xaIjreO@Q4`dLnmk z){(>DB@bk1AbBv9)G~~lOMq~!X!g0!+UT#v>Q(za0R9NEY0Pr;epSqV_N1bYb?hch z4fG|Uf-Z`RJ6JjlTh5qD2u9~t!Oy+e86e=%#du0r&}|rDmDga$6 zN|l$tTdj?!HcCBB6qUOJ?7ki}ekv@bG<+1gmplJ-(%IPL~ZrJY+AP6j8# z14jX*^O0oVh?vw5uOA1IX2R|Fvh{w*Ka?j>BmXv;cWr!W4otw!^FnQOMdp+DTC#`7 zyy`*8Pq`^K)}({4K~?_9kC#0 zx;X~wb`?dJ+$`PA+|80SfH7)kv?R!J=FLFU)K7&iIdDmw{?4{9)i=l}0W*yuI0N~4 z!`50@Hi?4aU6^50rDdS`l1i@dY%Y%^bWl7#1doE*T7!~;*F{pt7i6w9tpKwGbRn>L zXR-+$Q7@Sr^Xb4f#;MPRksqvdu6Du;?<(liBg_;*4ef*v! z1ZAEVub|gv#CBjBObn(NjmgJWjIx#UVAWSuk>PeI3D(B9!Wac%{je5ve_R&&DtHW2 z)NT{CLqW*-{(6;4!7{<1o%+@SW-O^qdVD{teobI*TLh|0lg07%1XH33C6V`|OaAf(!cC{ZZw=-TY z4!7PNno9ouowM;l>RZn1f;SmA|Hg-eAFIC^zoEWpt&&WO2n3V>Bo2fEyeOw(k2=<0tU`{>~^ub@xUijrN7PW{TV z&zh>w{;Q;DH{WfuKlRIN+s@V>M1L$qAAT|!+@>0TncmwPx#2sM-usx4HEE8junfpI z{KCh8*LHIVzVe-?)o3P*9LwzZAm*|=kDO?A>5bW-z-l)Uak=0NzTV)1EGKpv=!Xc> zF4CIc`C3HXBD>YfZ<%IdAU=u?R{rdYY02AHy{QMU#I|i#khufBPxHxPgg3Tva{?Uq zx9P_$Q+E)+iPs0`X96V3QeP#|Z)GOiz4^`aMUHd3W*^G6qP*>d{3`k!FFC|Mvx zBtm$Q!rbG}?c~^r2uootqThz36w)S8z7)S7qy=zgR&qttF%5wt?hfp#G(YqE=##zJ zW^>_%2--onm|cjSmD+jv$Io3*$oTThJP;m(+xLB(xv$-$&LNp&N#IhpLRkgg3`1*i zbU#gcoP~H;J<%OmM%X`gbIDx{Cgi2jhw=pCqp^bW*}HTt%DYRNDNzSH$$}B`^xG->BfjBN=5W1NEYyKuuW58FQm3We3%P%*vRn&YZi zcT#f9D#jMhqwGd?blESyB&&EVx4&u{0zFK70to*eKOe83?E;VTLwX>!kaW7?cah z$B#NjYHG5EXaSQD2r1uOPkwRwH>A%5f^32)*CLf@g>&X%4?_Zbu;`=_>p^0*hY`|F z7=%g!zeBP^gD5HC^K?9zZcz?*+}AbR)pV#KN}-zx5E8KT+y)ofb{ZX7Hx+U^D zj9?09Md*TSHhwdO~~hywd1YRFwXHw^;AcDYv(s zbQrJ+&h28*Am=7`r(%zV7As8v3S$*GOd~uG@?U7UFmnWCC{}D7DtuRXUz!0>4YDPe z5Lk|DpLQ#T@xqmOntMrzT>oa7n{u{z<#O})s27l>e-Py~js-`6Y7F^2`5>F*Q9eHd+{?r^wS+rO z<+x5bP33y!={<*WB=NL!Z2Rh>A!jxIL-u;nO!j&u^}An&PnaM0hWOtd`&{{uRDQaY z(L2o}RwAlOHU*5>-N+6Ht=u1Sj6rF}d=#v~V)aiY(!>`!X-**k+lg~L>_^{XIoziXH zLs;-vR{O^Bj88vryA44etoD#Um5Ak;rkGFn4eD`a0 z<=fzAWnnKyjipAn47T|baVJ-X&Ay9)rrq_jrq$iDrt1W4TA;W`-9NiHyP*fXPxNEi z^|sRQm!9Kig)GRzPQ?zN?E3r1k3~eL1zEPNX6X*xW+}SnbOM{2pHdhRksezTw5AV9 z_8yuIo|Ypy^p#6f$rjO;_6Ju1@6+wgnc3R6f56#<+vVE6!utpL-=_S1A#uaO76(~F z3dHb*-3fXe$DWCN9jJNF)$6BsX31&x#}2egnEbRXR8DO`o}MT_W;y5Qkf#uUjQeQeT3kw>b_&Tbv6h~Tvz7cfFPN+L%KfGrJ}Qe)sJC(N~R z!`}I0ODiX>G{n&+tH9P9xL(=FB@ZJVShsN~wSS6W8y{R`imG{!S6nGnT+7b-eux`p zokLrb;%O(A4weUCJ-ZiVdrtd&A#)TA6xxE**+=Wc3R6bwL+ash%BK7V2 z=hua5g*YL*VKgO*my2W?&$wkCvgm|~l@Op6ssoDy>!T)hZO=*{b|@TULu;K#_HrRF za_1r;A*BV1q;N5Dsw{>q1_p2>lq^p~wwdP4NS>E@88#gWWQDQ@(G;U$*p$6&7(6K7 zwydp5_^)!^%vJfpruSRn3Gzb6L`+afE*b`Ag@NP+f<}M|c|+Y)ujdISbRFIYI^j(= z62XZ1i3-?^WQJ)Q!e|VE%mwWjkThPu`B*jPWEHB!R7eg1JO@FoP`e?-h&(kNVqA}1 zEUDGCzvC6=^9p!B~5sPiR=N7!JsX~}o~Hy56lDwjXKM~(#R>{@3`{d1PK_dO1e8gz95%s^%!Tek|iU%t(# z3N1l#lr5VL+t#(p^Im1^S(z|At&6r45N;3$WD<#euZy*shB%RXSve}fQk4EyExxfej|pi5PYhq zgLz=w%-5#(KZb~ZP3AX`p`5LpUHcs@;$(gQDe!?G+{&b3;9KKS<*l-;G8pig%l)z2u?vdlv80Jj)+_`B^K% z=JI1tV5T3iH*>dob}?1H;M(+4G!?e@m(keQ{P)`VGalLzkLF&{R#UdJ>MQZ)7TdOK z;e|&syMO%V@|ZyYT7NJmLr8-hKZMi5n7TxQ7*HLk*$Z_d>;S0r2z+9Ch_=L*(*RSz zLj?kd$eP)J(Pv*W)I3vuDxP(H8OE6lk=EOoFF@9a>n000TYId8PC2!nUlT^9K~+FX zkRBfSStW*urb#AXk2QJBIhFH_?~0UmwJ+!LQvHL<~M)ag5C4 z-PGt4mvwvdog%5`91Q##sccSVUiO>&TQ;gD&#SnMzdNKuXsghF2jLn-s z*r;lsd+LtLkBa0iVO4_y`2Jf9;G#?ugNY3pg%vVSTM%{W!VQu#;dm?)+SW*$9P{)- zR6)EvZoi|`71ZTDwQ)tIPYdG!8)h)Qow&k#- zW3FkP>ptXIqpD8~KXhjd((9+huVEzE4rH?cuwY?&o;9K) zH$9V3&ObvGnlR4F#u}ly>lF(u&8?Z1v83~-OCv)WB{GeZIi&vdj8sjEq$Lk^AT2KP zPSVIp2k;={&wsNcxF+|~Wz40mSKx#d6_?7TU|cRjw%}xBEs&5rOsJDiMmzdrAc3?K zq60qEI65enV{04%iH7!bmw*UL=wzs_rGI`w&fte(m(yq_u_|>bSPCKGqcQgt27R*| z3S9A8yMnroAr!GRPU^Vh)HH~BN+mn{sy+<`wnX=lLQf$Z=}El2eKOB?=7cyBC|JUb zPyf*BE{g=Jq#2Saif!Vv!3q(*^c47d;N>Szy+3-1hmD!9JB4tmts1O!nf-oJwRLlM z;Tsn&LK^roU|7?#bHAx-b@_?uu*6FJ^M!(zfk+q}U=Thex}EtX0ZVZB>0i^s<{PHN z--ln$5{|OUkY5Ux{qM2MBrkf!W^dtRwV~eMR=>|CF0HRlm)_7oC4nmo>W-T zJ^S{~?AyzSvN!OcKw9?Hj~ne9j8Aid6Y)h%)Xi#p=WS07w>wX%kF=}yiYu!hZMF4y zo72d&)^ppKd%ZIGG;jO#*P0Wr2(WtQfUi=|gm0Dx_V(oxx#6*cHqDs-dpp z6BZ_VU?ICTVdnsEZ>#LpoT+H#T=t5jUg_zciS;)s!_?S?>Al)HnJ!|67sA!qDd(SH zF%qEJG#(f$Z6Xlm!u%x;R~koPvc{k-0@+p04&fwaB*$?~KxXL?Qbb!v69IsZWJuQ< zc6@uw(s(s`@z^t}9p2atq-)(bON|~`w59uTk)ZTo4_e&TBd4+$4-{y=*M-j2O~mWT z2BLjFCe10sniOX40y$InAixdg zGMW3Jt~8yk$Y9T6k0?Y_APJHRp%8lfNLUSXml*RfNE0Ius<2FCvQ={GsRezu9E{@U zLhIw?$)X}7dTO49X0wLMi=!BR2E8;j%vW`Uvy%1)*-8s0!R<+5mgD2Sv`vsr_TVr- zSNq(WK16+HG=!u#<(XC|9@rX3x6sr_&>h`Ecy@zopm4`$?wm~G3gwE57z44A7Q3}a(?jnWhJUU8tLgW$7q77S9Dh7@ zI?Q=a&#UTzzj9o`Vg9A_EQEtdbbE24(!pHv@i04LpR#;H}nD%nGe6NWgjcgtPbHTM;`hgF>X#3op;%Ci-{~{C&0oaX5A5Z~sb< zVF|V}tP^E&eLT$vHGkT8%Gw-oYp#8^rv9LPejRLuyFBQt|DkuIkqSPi+4xow*W}%} zKUA;&^sRlm#)#F^AKpe4&&s67&;NS8Dc`qEbi2QFP?l|ariYk=AY!^kpvAl1KnhqD z?Lr2oA;)rD|J~T4##sG@#5&V-##gvz{;mI9{lPguQv;K$|L1kD~|+Ux6JMeu`0 zshJx(cp2ub)5ew3Q%7R1kZ974x8TsI>J_8olH+Dg{?AXI1RB8mM0ZWhuXT|Pyhs1kk8#+SlUPpVe$M--S>-gmd`_4BNQ)dxrQ!mGXDM-Heymg|7EoUON7pb_#I$WK zEi5?$CIab;#G zuPgIe3)QB?e4!*8k*MOKrRdV5CNx`%)=!kvkPgPMnv=ogQS{XgmYk7AY+CO*Q)n!a zEDAa(emczzEOJtIvWRhh*&-qNVW#rVPmUu1q>|1u@M?b7nW zEqYu=(iu(HGCdA=pjHPK=8{!XoK1Ej@fo5RGNFT6?mj?@vU5#IEBU>X;c@TQ+&4=+ z(^BCE5UgeIO?8`3Q4FR-E>y11$Ot!9lJq(qzjor!=?140m$-?Q2MzR!GMa1e{@`ey zN??hrAubHBaev=G^@sH9)cLcDMv79eI`P+6o`Dqzr`(C_r{+(U1L?l8*t?kc++O|` za6sywS4i)@zjc1P-#DUsJG0QY+gG+TWo`0ym1BI$)@VTYgZG%1fve!O>z*&x0vHQt zkLj%b!%Y2d-ShVIJ}lBAcH)?_>11O@AHi1f6&bmT$8)TNlT`k4o{Y}avU`DX?Veug zSY^m>^Pi0wqpO)_}_&-QN z#bU>n2A%pz88p@A3?RZ|tY*kX?uObr2;GDofX8b$%ZYA6E443xMFXfFBv}cD05G5s zmqav>hY3f0myggjig`z?h+d?q9U3K4cps@MeLWvMs6l2jgDGb&ffhn#!tY52l)ihKDY^Zy&2Es0tX;e*&L#2e^D7j2{ zH(N-MEHjw{@0g=Z86=jSzrehN7B`Ht^V4`lLh*N2lu-ym*pq+`SB`Svo|z$1h*3}} zbWnZSmuHji$cLwkf~-CUQCJFPlo5WkS%~K zB%$WIQJN?h?&U08Q7CB#LSKus=L*>O+{_lTXQgdsz+hovemu#6p;+XlO?00X`bc$< zbi24omgD58;>gfRZp9D^_yq6=d0HKR4hJV5W z%$#^l^3X}umWFkdEL|T}%j_8KSUwYtV%C=3fnEsJ(9oq6SvWuj7Dt_A9vEKN=XEKN zY!O6hpJlDs=*If}8UYFdT#rX3J9rR@ZK!Gn+c(gkaxOhH#dCqkDkKx_# z^~E6+pm-uf^l+tM(Lyy?=tDd?N6ix9%R`2J-s-25g}d}y4LJV`3rz(e2K)ZkI>BkN zbdlPfZjQCbjRB1LpEU*bH)GV1UWn68FwiBtl(Og8rVKAZeb3~rxGBH*?|b7&3ViW@Yurcorw)! z;Su)0(ehRgpC%FfOqH2%@vjKzn99HYMin z7$KAc;XHBO2Q<5tYC8(Wi`Wd|;V2QV_+5>^Tx01rVh#QI=tJa`(mPeag!iU_q9cF3&>i<~M%7lI## zQ5bOqQ_v?q4kokvZv8AB&jM68^_y8%1y=wcin&)g?wZ7cP#K2R7pWNake?;UZW)L3gT~Ru}0& zppri(p9Zjpq6T3ELs%1RDTKJ+VWBGQ;aqg&Ko}hhUsD=#x{Vgp0mo z{!B_cnF73zW|l9WCIkxW7O3H%jhJ38Dv3nQDIJ7#?Y=H9f_1iKW~X}(I`=1Km&h~6 zD_T4C&71p6)Gt_U`|qC+Ur=pYq*`ukehr*|6@xd*LIj{yINzyZ|Fx+cVAm)u(Nu_C zLUMR2wtqAO+o3ONKsQT>y5+M!74@mu+OKdF|7UwQy;lak{AJCU`85fO(f$yh_`%T? z_nl;v#NF0B;rsvb3hvUeaL=A{uDSX4OWe!q2WYqJReK$o;hAkO^8?$vHp#sWmj^aZ0$93xtuL%m zJ$aR=+U(8@lc#UTe2G+yAbad9P6>hix}Vy7A&yr*OK*v?I(K&syBh0(4KtKw-i9Pb z&ySG8Yon1>N+IXXwQnk1Jj`eJ8(#DEp~-q|P!&=}Hw;h9nPD3u8`pK6+qHF78Z%RF z5=nr93x0lfFn}$JG>9xXzCHmsJrM)Oy*CLO3l# zuImrfAS41cdrF)sTyf&4(#=^8D6#AwYkT-@E)++{`% zUPZkRwOn4*UGGl5T|NhrRZQUfQ&)>K)v^Y5pYw5x6^ec*`q!o2f_U~!OZu50*6yLP%a@4XwX)IJv~il;n>Ft=fc7M z4EzrGPQg-r9r9RHCs3mri!L<3BbTv=XYA9vEw+3aRpOXDc4mE;(nkr1zDyafBDom- zS=hz*QR*m4OKtOKik}CqP1EX>21iM-!yxI%T zQ0qfTQG!MWRt2D#DveGyIs^mb>YCcF%5zM@(IR>%ufvj}GKm_+4w?$Vjt0OjoYPxj zXP{f(B{Fl}oVbK`Ziie2W_)QYRXPlf)7~8!1{zP*!PCYApP*8Ne9;?~sMs*ZVA6%s z$vw6P<6in@r*)~00Q-CVJ_HIW_2Ft=j3KId(u;_Bl+|6e-R(hKq?YiIdFQw4u(DBQ z#$69}xZy`IOuSuaBKi~TGbr-4!%v<`Q#_^4Tu4?Ok8=mSc0V*s&at0W%{rJ!K!#hM zI7-c3wIsspn_Z-Y>i%|^pE))t!Xw1G~oSlubQ+B88r**Or%#-Hzu%{yt zn+%hv6Jvi$rgaYEonR-e)G`jXf|*Qt69wXYGgrDO-i18hbW>%> zje91%JnLAyh(_$5IUaenTRM~B6MtZu(S!Z%JBv}_TbR9g?wZAIhIHSyg z7%XfEW1T39DZ~Ka@`aVre0Klh!v;gl^@}i zT@H%SrUp-?R(Pu;58T>v?HND8#X2?o=`fWX`#vGv3DvV4kEpEH+a6iGO>JCciBrVg zlTMY@&X0|x)lC$_vh25ty$|QE1S%@V2{rh`P9&%Sa-6mlMr_1LQnoYH0^6@QNGtez zgmn>pk`3{hEg(`zygs**JfFLW$X@YG3}M|)WQf=4I2u4^e@`+{5t1cC_vl+Q%-Ql} zWNN!)k-(z+6~=KW3S}2zy=@E&(oh-LoJCW=CerGyp3b4Zbq%06{P*FhYja z0jYy==BJljP0h#pB(;*@>Z^vL6 zq)=Q4m>`!qlCZ%zLJ>kRG$*9HOgI#Y?l-oO%8N!l zQWCnq-%tm{t{(x#BLv&`)t;%0GDMi5H6qwUj4xH;lW)Kw(>SE<0HF#=Z^%Q#svO$< zC@m1(yk*8t}x#gPxUh1 zEPNDcsW|;PztT|ir%b+K-xYj*aQYonaxQkr{%bM;hzgu52V(BM1gdEgDar?r#byf8 zOC}_KZZmNPs@Vp&PhS2@4jjsJ;HYZ19;tpfzsd2?gUhqZ+Z!p6rVm%&zMfqOZRzcy zs(L{+E`%AsdNy}Hj?1#3B8g|j)^~N;`(e|Zk%ad(S={nz=}e+B}UI+RO5i(2)>#P^2UJlXN}jL zFXf&{w6i~(IR$$M4pg=@-ly$f(o(1T(CYa%sy69WcTzfkatoz5Yqzb7)2m+lPU-T6 zk-*Ev*zJ{`j#T4EaZ}B7z1p!Jo5w58FXhg^O>aqn)%6d_r;osTrt>yFUwwl)%2^od z22zx(zBxDP?SKcKe4=~y=;>-7g0E?TP0u(z+Pl@hoz3Kx#5-f(c$R^$z(?Gd`Mbuo zs}4T7aR_4S5#ze(?-G7LqD(w_tI*MVSDU-u&FO9I>zAc7Cw|7tzU%HBNDtpU|L?>B zOZXwrq1TR~F2(P@c@bAh{o!WHkjf`oJ8os8cb$J&E%}4;1&Q(PML45mZ}NeA)gjG$ zC-c2hN8wh(dPUHzsD6&0PT7oG-7pfc-{w`}MKSyFquXSiR6vd~!_T+&clqZ=J)a)= zEj2JoE*n;yDm9xm5G*oPF&H*yP=!427!+@LTlYek@0>=Oh_t6LP~4Oa0ziYDFLj`W z>;`@MX|aOr!sZlUm~f*R2#Iu7;i=Z>Z0d1|o?;DhMz_N|MJpvpW&NTze%)`c7zqM> z#;RdQVcx(rQY`x3vI@`g_-adhnL^!(aI?mPEvvm={2n=JClq>9%indViCCsmf}99f zt4I1~9;bH4*URx%w{_L5yW!z>rvH5U6!z}I`qgyAC%TxRDdUHNQCyBu+-XV>5&UwH zBjSfmYz~Vs0aHyx+T;i^y?+XA!*fB3GU?O&$$fsSL@wQjcEGe;-QWcQnI5b&^H7-5 zt*qhHB9a1ZwT|S0De+e{4=X_AMN0cUWIGcqRGSI%by=oLbL88t_dH}s>7^d__-Ve2 zh_tY}0c2vXBI?P7FVvCL3nU!a4E#(_=cs&DHqU`pffZOFW~bUc!ZIb7VNHgx%FKkYDY6AV7Avg0?Gy%(1#fW5gohK(9)Xl-bH!a5Bbn!%6Y>l4=N~ z>rcf@^U-j{x<27CFEx){c}j?Y#$Rl1Es_S>`jy%jWa_*QSdP&C0Ml?n>fO#gH(OoY zuYATO?R=`*f1it8T9;1Cq6w)T*V|O7ur~f>Ewb}07g-}ee7Kt$lfVrP9P}|4mkGnZ zaS!IDQfTN3TPyy3$a?yU@zJ$Q-W?SG{R6V9^qQX^bPtQ)^e$^x?rstKI?g0K6zj4v zZVCKg+^g)XwPo7!MSYd)AiyhoJg@S_HisAEnj^{QEybg&%{@QbnX<))?o+871m2B7 zsv?z+8yucfGbd5RujS}XlFKT$DZiL*N#+fhW)Hl0IcoE`)i7t~`+sWz_JQhilqS6# zm$>q6H}6$CVX=-b4>5N zs`l!O8-^!!HSONBohuj(>{S{qRJ{9aP(Q!@(t*F_ZcpaL$V_Nh%=0C2?D(majbC5N z4_n2f#p{@qA+}``@*pLOvWn9vQduWgoP0oOGo;+mwgQYReB8oL0FuV3qYlSP*iF$g zN)HW*JC!fQaLNM9nhC54&)B_2-DtK`>GU0~9lX9NG4+*6TQ#%aBOp`wc`F|&zy1eu zn!Uh3U;IAnz1DOl;$}3n~!*-pn9^hM{M@y0+W}J#Ru@DE_*HZuq z*p;H>qm-fuq-k|_RB2Uc2dkkGZYU*QE;R)Q)OSIx?8Gp}aefvEct@?{7c0*Qe1MQ> zdIG;Fx>}`UG*AsT7+Y->BBnqeKZ;6s0lM^(2$rEQ^}E?G>Nb)+?`v^x%RA|S`BKY~9^v-ZM1 zuY0syU(<32w3pW#q^XrF6rTxiiXktkjZu0w%wZ4$GkWK6eC9*w8u=~xrFy2!$n`G{)+2euJ;sRcjdSeJ(Y92yE&DTo_+bWBmJ}SS*r7Y zBe{Pc@t^0UHHr+$6$3cbJC>R5OZkjUqz=Qdnf%M|H2+SP&IkRFtH2Sz1n4&FW0+t~iWru6eKjj8I;w)2)w9c`1-M@A1s+DGPu_W7^29l^c;d?OwWQW`&JninxlME7Mn|7VQ%tQ&n-oiZI(+K?j& zZl?xrscRAyPB=ExTZZAR;NRbXy@T7rXc*hk>(%r@XAgBB6II0ALRs)7&+5N#{z8Pg ztyJ49O^u|=>+ss1Zu87xy;OVl5bvy}x4QR1)!vJw70UlR#(!3|$!dT8z5oouBj!;I zG&nk(4&0;f{@5C z#<3f=+#boJel&UX^Zz@p`(gTzSH2!c#t6Huo;mb2U5LJ9VI%fMHvVS{4%2OK{1=^d zx3g}PB1j|MmiEljhHAQ*>Kg)Iyz+JW|J&ZM3EC+~2!^AI%b|vxnF>Kae|YfUPUCW> zd>RuuoT}?DYk#ziTgrNSV}3KdAF951TCb0svFOCwZ58-`Q=9+01bVyk91yG!4WrVh zUIf=nVQW3RF8{T}Y%baRH0jDp@kFOn*3EY^0}c|5;B_C%%R7eYbMs~8-2Yw5#fko3 zNNE_Wmgu(&c^iAK5^g%W4A^qTmS^xjH3+5+DA zR(SM=g9>FE4@CASxtwd-m|j!RUthlu{@+*1{2E6@<9mhcx;C*Twz+*Tk(TrJwnH`B zRJr9U@A7!{!-F4$X6MZT1B7;qh*k##yt07p-J4zVr)MW4)xKK|#|aDDi{dAHKkkiX zN#*;x=7r>~>6@Dx>JwofsZ{^$x4XyekJ*Qgi={Z0FMjYrIP-}kXR zSeMB$_{9-zcXF`u2gf@IO5(Z^SGVV%(+lEIo0M8j#fc96E3ooE>kt=AxI_fnZ;9-sb{^| zx4eHQ>jm;A=dYFI-JQ>And|F_iIv||-*R0@T-`enL&jILPk!G%ew+PH_3o+9x5MPc zCus6z=ew;TyV-kTqwVEWs=FEW*CbNMjAjGTN22}v$KHptU+z{m=c&}PLj>*2#Y11- zlI;A?_B#aMb4kOQrJdI9_wW!`1;y0M4QI=}XbOFK&)!cuSWAYl_VfMMqBEbE@{kaj zy$M;`YWo%Dbc!^P{KLicr?cTCY^F7t#8g!A>5cQ@Q;*MGuW63&zpR*e=dp4l)o~+v z0s1-+;LGAu*!5@7;6?KW#fkROrbO#0@$&CyIh&seFw`<2OBI$w~WizPu{lOv;;hCSa}n+puX}{F}e8r^0wq~@Pj)m$v-vtzbDv;9xr-a z>-7zPZaPTe8kxp__4oa9o6*XxrsS_ClRx0=o|ChHM5l#$zpb4KVH&xoY`y=p&@FIT zN7Uuc#JRuW#FdYbPiI0s+oC(+PUTn7$QxqB`{_cOSAK5}|HCo-GZX&sV%*!N@ZTLb zD+A9YPuV2jTI4Pwv8P1#fBh!;Lv`eP%Xh4u(-?kV=-hB>@^+{DDF@^7FWcNU{r-lS+;3SGn2dDk0Zt@I}hGQ@IAezg zY|+gAtCGj_``7E%w)e|t{(pXa$nNy{+xcL_i_ia&&j^WYfQLq?1=d$h(}JqTiN*6> z*KMP_p39TlKV+@vVGX~EeI)y=nx$@+IJ2``^P`bUb$A04ADjZC{=17m%y3(*-k+5$ znH)ro@8v)tS&ZwahwRH<)N6z3okssJNJ$)o@wMcT{GI;W@$T>JyOV;ys~>*b!(r!*7?unfhX#tW}j{J*1CSeF(#Tkn6oNy zli58U^bGIivUpB)c>~$myxJJ{o$_&OfLb=UVrmoE9(ZJYy$R`rL{mXt#7Db*HbcRq zPh`dIA?F&W1;)G9fyWeU#$BnOUw?N`8bgl5^~>HKESl-Ew^NDy;l&^EAgm8q7YA>U z%PR0hd-q|d$~tv!ZZT{y&R6HL(PUnUK|k_;-eqf<8p9jI&r2FNtaNJ$I%5V1cgZT8w3 z`Dd<*U6FzFAkUp5mt_gF6dOxWh_z>=h@OH2I_@^#$ZLXbsIHN*bBbPPgdd@ollr{9 zu8Q>xQ;Hu%!9NRNi{HX-JO+V{J6>bhw{+QG>AN2Oux~Y0{1pk%7&@1wj^YU-Z0@=h ze#%{3pQLzsxM}acMcQ~Y0rrpYg6W1~MLKhazQf%3oQ+EpOy$)C_?3I9KwEPD=g2Qk zLSZowmC3`AB^evh?0SCi12+kmE3as_)A!;uxKe(u{&fMr#6D zYo4ExpZX#$pcAz_>~0q^PTTkTYS%lOZ^DgGNnst_G5*Bv|1;rjgUq`>>jE@;?{9QZ zQM124t9D)e&=vFWY9+m@oY8qsAYzSn#);8qEOw15_9W*70vSVKo^v3a4_yaFU2m#R z=_d!Dzlh!YP`=#)9!EgjfbN5Rk&9}O^J0{e_$w?9J%=BxF~niB4A7{fO!{l4X``*=A7mM_f4K8o98Go}XT5v*;_G`e zobwWm^FCWZ(*NEq64(hZ=z(Udk1`z;@xNYZ&BJtMHped#_&N=|stG)&=y-`S9#nc; z5#Pys-4ox+dsBl@K$gAMM}Ox9phLhx{-REhE$b)nMvse~+rpS2<;&?Sct!11SZuC4 zvc~6*{~A7Mn-3zmPm-SvVMz$wN_(sb0vCb#A&=INN5(A){g$rLn zFaP7XjP^1vWSSl%Pm z8>;wQo4zfTH<=T7*?4a`Qo3tM;%o!j~FdE0ZS{1BM9OUESsvB>9oOs#jt{zpiTk@--r+k7#b*Ec1fDyXj4vLcpTSw!7_w1gPsT_;6ed7Sa2y*R_!h)MD0{A!u)dMP& zdj+>gWIDTbR*K~gl5Y=ebmP7dyAi4G>wG*vEk15r^lb=+18fF< zcc98GG`8FCWp^lU6$-mZ0>JF-vMdIr_f|`j#}kddqx(ciOP-;2#fxct#+9k#uon`Y zwsd_^W$#dF;_>pk5eNcC+8sR8Vk!#@yO^}48H!;a(DCx){g0)jP&mm{m~-r8KHy+{ z8>gc{3Jd;p9XuSm<7xilZa*24GigeMw)7;jlq5p^k8z<*HC!A&IdUuitU;E`yxn~S zRo*SO!Y)ox7p&0P`-?=kU(#$)U77AXWuLqoI;6xpTb^1)o)!2k2A$L*T`6=mywZFx zJ}~fw>cuw(-yLOPQB&7gjn4VVFF0ISOY)`jVfyMC+vjG)k*~8gfxYGhW1el_{jwPP zYOfSb;R$`#ReWB2nbJ0gHL)9atgY0m6Y3YTzTlzYVB$YcdlEETt`X;WPmu2=Tx|=l zF0DHhH2pqkXr9MGS4gW+Lyn26b`1CRjbc`1`Tc!p#r2pen;Y5a$4(91LSwX?Q%v32 zE9>dOMpEEGl+`J$AE+(fBv&$+dF5!jwEd~sAX!eG~swK71EwF!kdFu8v|1}@V`-Ac3^m**2`aQNR=Oyt6 zI>ABsDf)$g{e&;sE~dm{9o)kPZYBB6b8lV(z9v)=XT%f_;u3Z&vK9yGS5X!!x42ZN z2Nt;QJjeRS<#rXOT?IzbfY_!-jRl_WzBls-fw%J$|Ht2PYXlw|9fxS>_s{JB4lRDg;^c#49FOe`bM_&*@{Zh9%I>Jj&Q(FrN1UuC%$pLK zNgH{%vABaFn-7HyC5q|O3|cx;0xXiO{{XHIr&F8k>BOJ7lhP;RGgN(_^qommOaHpYb3nz?V$!rKtf%`AOUGT()hLL$Hx5Y}yO37?AQEJbR%+Bv{C*BqKigno+wz&8N# z!0wfS)SV-NjLSQW=S%McRrZ$D>*VBY|82y0j6n^3y)XVZieoMBm=a^EfG}-GB*bM- z3<*0|TLxXJLtlPoBGNY6gm8^5h|Bx=pu|({T^?Wypzc{=;V-Vo-LSS4{)E$m3;A>{ zA%pn%TC)1@b4{qsDuUDq2ykLjrmA>UaFN1V@XKoC7##T(dlK|CqKYa>d2ti%aR0H@ z`>=FmL>Ucd0XAL?X&LQ#vLerr#P6dyr$QnliQWjtXP*{$TT%U-!jJ*gVxd>(@59@} zM=D5lYU38#xb&C^T^*DW2UT`wJdZOA50m@teaxw7@N920q`yNkp%y&jui82A^|++9 z&{f($B8&Fj#labWJSP`PacOBTL5>Ewyyc@BVgrnM#1Un7vD4Vlk&9-xFDJn?M;GEL zOf++F(KCH2FG2;UDyOFz;Nn5Dp2MxUWy7(9TcoT(i-en$ReJ~np=UqIgy zE#$<+%A5_Ib@`+i!p25Ki~B<{#5CA) zlAQIQSDYtSsi*f%5+y<@2Z^>;HeP3!!4LCv{1;U1q{8$$eZU*$SH4mX{j_M7nn4)$8boPTDarvbBP@4orBdF zlX}@$$BHMKHtgK;mj`o^yxSs6=C)9B8ex9FcUMaTWau?J#Dy8_AKzh{*VT zc&$n|WwlCJYBa#1qC7jm1N4sNg3n--9}C}{$HOp{)$NY_o8Uz z?S}v2bj*^8ej-0oPW|R+%8bR+Oq9Sb@preS_$kBTkho2?cfq-xmuJ}CkjcJB3J983 zsnrHJCZI>u2eaX%h7=Hu61e|rpg*$un!Wc1kD;vcZdgvfD!+B+``3L1+Uqj83IS0g z1JyI!gColip3o5Ri&LM!NyXnv1T{122AM156)$5NHm+1M@U5p6#F zEFf08W89dVl?nd%dV~3{c|_%Qrul@T3Hz`(rmSe_*N1_J>xEQv^0?5QxhqR7w=Gfh z-|V*Zbc8Ht7w-y# z0HUhZN?X?MuE8x=8k$%tan%}!qWM3X?|zrM9^|8saW-D?8oi!Cdc-N`NT!UeR>}`b z?iTPJ_k`HlSTHJsHx5Iuf@3%p(rCAEV#qUz9{hYPuEzskR3TL0=uuQ_> zQ=L(`YL1y&Q!1}rRyiy7L{hn3Fom@V#8VZE8DEO^*Gg?V8lZ(u2sP`y@W56SjepAx zf(ymOEPHZpMlodljQlvSZy+L3h4XoX-Fr7ILDZ39k_p`C*pWPVhRHK2rXs3lsYx&$ zjWX2518BOiJVoc-yV}hWR5#$~cZ5Rin-P8qK1NYx$zF&%Y5g)(vAI+H10Mg(@~=nW z5fHNnAqs*bNa1*pO&sMt;i>y)Y#D8rpE7j;fxqktg>(AY@U7wr{{`GYS&nY-1Pn5? zZNs7cz7F(->eXCET)HVw5 zAK0Hh_>6UTaD$_PzAt`)^TMj{Bs8YM9&@~N({o=GRAf*R4tgsp$}7KDSf>j-Qnstc z&ItQBh9JrDgs&VA7cmL2znU7*AVrxw)w`Xr^QFF8&j4TU+l#f2`H73F zrGY?yjQ8d0=5~R;&==7f`)Ih4j$j{En{3_Y607#y`g0;G-?Jau*X8#wyM7{oz!|~iEm@QJ}yA6?TA8w zOWS$t!OvNcm%K9k_*RojSJlwt$wQTtBZEAn81#rE#J8_9AI3apg=*COLuBw&fX2}9 z_AVV`ABMaWYdZda7+e$soC(F z>zqxa(V4a_Fx(|~Xsp+T`8e{6g5InGyiD{ms_I#WiG`xvl}&k|eBRpR_Iu1UGzL@XhvQs%K&r_?UaE&SScd_7P3t)At{rT!2`}$ZW%G|fh)04 zcQ#o&1}TRWH={Te*4g58)nslJ3@w9R#jzDfv)$*EUx)Ukf4aUMf5B-!kaS}xxB3P? zd~Za$Hnp7r`>~o>`?pf--MPoy%ewc$l|Z%7cu(|&IWU#NUKC%>p*7}@*ZNTXv4Cjx zyZE<9jj{BfD*nNdILwz*s5_7D4DTKP4AVGIF2pg%TYu%U8Ce^+onE#n;*3o!}iFe!XVn-U?uj1nn8Trs=6$?jFN)I@?P%&9$GxOm5 zFx?}PFfHRwBgbWKrWedD^{XH`h2P0!a*R*!xT+#PJW3gWX_7>?)F1T_v9zl`m2ndc zZ@f+_l={m|x`8Nd7fGm6R~K!Rt*H}?>yzoQ}tGp zo_BRetsGO|tOOXCN~KJ)=vE|xZVhIM-wW~#_AV+%Ba#`r(WsD7?oIKU`p^Exz(=+T z)MJ(o17*%kNyW{-!;*_4i6ABs8&emN>p;=Y&7c{)yS|bq4gNF?Th`F%EwXi&ii`ZI z0_xT+6+p261?>P}{0I%?EmBeqH;W3tG7QpEja18?9fikAKwjlgC%A-D4>ELB+9`k3)t9Xe?boUa{N2e9-aexbNl&_MYIw0&q3px&+v{;EM+^tHgm<1-)i%0 zRET9VOkBMqD=HYH=RRGG{>M^8+c(4%PtDewTl0}e`&kRBIt~m}Gvbai z%F4P^f0SWSX5iVRafe->zyk)`Rr=M5!KtaG=z{4RFgCMg|@rE^lq-f!H&iU*m79VREZfhfz+t;s9j$x$Jc-6@aK&Gn2d7)@8P%lGydtD zC>y<_w4%+Sae%uKEMw4Dei<5UbP{n(&RJ5~Y{JOOeGASFQ!V!)IO)CxfO|tt$Jmv>7 zavKMInS9wN+Qjs_z)E(hw&1BGmYxv-mTTC9)oe~5r_FS@T-f*@1G72lqw&nksugPxpj z=F7zTK^K*++i;nhhQk7HGY#a&V7|>SZP}RnFoZuWEkvdcEs$1S3#{ucFmvE0l5S04 zin4{~fm>VnEp`PiqH&2FDWLsyS=1-8Rv+77V*u9=ehqljHKoN_987i3H>x~-bF-wq z0_j^K53ZlH_*De~w#1V@i1eiK)FHuXHnowWViW2HfLz2eYiPP;Jno*dohRD$KWt7HoPAOoB5Uwkd8&vSzW3e=AvssI6U+-#PCIN6Ql|EFHFWjHx|Mh-4+ zLXEqo&{rcyo-=iy{#;y_yg2R0H@WtGZtV2@YIwoZ(nr9H!$xyhjx#PuzI{If*Fj&JQ`fmEAGzUF%=ZbQSe%$D* zht(p_WtfMqpB%pE4vip`yEIsz{A+2!yCD3j2OE`Sxo}laPjN{A>+r?rgs>}x- zTdXDf&I2TqK55Jr&*}X`8s3MVi?6qo<9C%1YHQAC+VqkAs~zm`SIj@4>m$-se;_bf zV}a)yIH1`9zM#Capr;SOz`t^9Ohq^R-IK}Kn}e?uDIeiLXB1G|RhRWaC$u;Bts%h8 z>z#vs@=0Fx&U=lw%XaJr7hMA0-cNl!^LW+QYA%>gpgcwl$(K`GZ76Ess4^n=E*i2Q`#Ss>@*@z{1ccDV9IDHqQa_L+h&N>Ts$gx)$64fA*Q-4n!I zBED(?DXPSn8-5i1$o^ovdYB92n3h0&Qx$DTj0Ba}v(BG1jFn~!WxsI`EtRS^U5N5^ zzoD^r{fXE`Jt^|2tcjLi%@MhuGhg%4ME|{A)wzC9BOo~Tcs_ZXy4hDBPA^0=n?XQ` z2Z;Y0aES66j@LgcrEf>z5a+fyt9Iwk&BxJGem3AaH#4KB5Reme!131~N4J+1Td3l+ zBcrP&y4{p+JX7i;IITNw;+D2T(@!vEB$>niV-YlIQr^3}aB%kwl<^RDLG19>kT1i- zuP5Q+Mn+{&PNG^GV+Ml1rSVhkseh1YdF7)FxDGu3vMdowiveX<%+j8@iuF^UI}rHs zy90TAyED7-a3(V4Zj>--h3ABh?Hig>Ug3!iyVLpQ&0Mq<_(p|vgnq=jfff@PhyKuG zE5k{w&}d);G)hdWGyf^`_^VCAlz^44AXM>rcHTOHpYI&(#W@EvjFiR3NI%yfG#Kxp zKd_fI%y{*C(A1Zza`5o<-LHKLv~!7Vv$3UF_KlZXd?7!h*wg9@5!M;n@ra2`LADNn zO7jTbj*CD!j-uoI;MH@VXmb{QXo9X)GR;C(CNa&IZ@Im!#W10X9gYL&DP!Cz6^VEJA=;q zsE>sVjo!P1;A3S~c};F2mk9yKCwTIJU1RU79Bglp*7Jl=M4HJfQ>y4?yZ>t)R;tb; zu%;zQ5^2yrT$xE};aw@q-q1Nwvbu^~1{lIGsH&7Az1#5XdlZi3 z+Re4UisI=!8br~_IEY1;fzChalSDktcn5Z_8CTxd@*X`5?Oei}q1bAEt@vo+gIV^1x{C`*75E zYqKt>xUu?_QXy!mH4fCoxil(LF5eom4?MhPMcmf+xxzED z$+y%$=N7_lgcCBIWZDG#O!b0x29BJ!1lNXF!@-gO?qu$Upucem9 z+^nAGxi9m&%Fbo?b@!VKUeHDOE|6FgPK`&JAVg$yiDQ!>wn`5wiB|gLUVKMufi~o5Z_zx zhFo-bdq;Oc>6D;HVO`YtpbOA7@;UACl9hTzeDj9Vsq;@BHZt7Wb)1u%Uy83#Ib>{_ z>2t%u`5@SdzxgluRnEqgAlkBIQ@3YW!5f0C4Px)TpLhPc^g203_pNen7lmAUrlN+H zIl?!GwZ;J%0S>R3Lv8Nmaf=}Kh#`sQ{MO&`Z3mgm{G!tR+s@l_JUs*Q`TF z&Pccf=LTF{k3+67sB($TMVxY~QEI+5d?UaX?UgnLlQ++|pxWHj{Iv7z1Wq;Q?NSAL zetR2IJ>O6VLJtv7K4MXrkfT{0Ao#_7UrV3;$L0&AT>FbauFG!JTH(xkeP1OFaQ8-Naw!1|PQdJ(`aBVJ+*h61*M0uvzeUD^-Cpht^iwiO?~YGj zwaVG=KhaVGvWv@l+5j%dW4UqoQP?Vro|@tsp9~SKI$y|J*HDn}?|#T1)P|-!+7Z?X zol^5`Dm)PU_)j7D68|+d9C`)m0n=A9(25&TA1W4RHrkTOA>V-<1^qzWme8r^VtV1v zZ_e4O&^n$1Tw5T$DRmn2>#q|N;u&cC`af88c`6k~wb8S6Y3(^-eHG4O*m6~{{>MZ9 zk4F@%#tp#0#&$KC=xGd=;GLF_L{baa`g*_RrG2wKa`c6v3?9U;7HAV}K`-q&EmSGu z2yR$El3`>Rb5Bo?bI8LA&O$C@mwGDHXA}qNkd1J8dZX(b`$So92uTLvqVWYsAERm3 zy)}qj{tXam?24<@Ejndq3-rexemJvG%Zo2El9udb3gM`n{myICxU^PpXfRj?Y!vHkh$luHfM(bO22pszW&hxkv%qY0LE@Y{ia%1e=lmB% z<%Ay$D9ju@URs;=g%0*$Jv*TDxLw!pg~QSCOBV(EA1gn*Y^HVfNH?=u*``>(-Czl0 zKcixut^m6N0%CdDhK{-v@1C*)*Hou6%8sr~>9K=Gs_V#izvImRQTf$b633Os_tMd$ z{6pog{>3KAd#p#>P@%CTlrZhv!?kBPB1gvInR%^saPiK-b>pm-v8wcZ-|UwuFJGr; zn}|;uY;H9~pBH|;905`bvnvi!OZI~OyM7A%0OlP@m}Hi?`>u+4)m6(dbN}N3hlkwL zxMdg(qtf3Nw*_Xkdv+P=FuXgHX#cmt2C7d0VZ3~ObB_YAvW#KMzx8^@RxkFwD2<|> z4z^~=v8Y1i>E`1|6UXCmo70cU=yS&287(XPJ=0U4*Nf;6nLT1byg5WlwJijaH{vuY zxEwS183#pu&JBwBt!b^pI7Qu6X=x9>zPMgq{ah>NS=!{lDX7YnL+8+fqOFbNEaW^z zieLqg6bkSqIi?uAUYKsFPmWeMnK}!SS@yXFqqH~YnK(0qoEgj3J`p-PT&wfS)#+6I z0QmW{!)t0nC5nF+T55GQ0RL)=+;?6VFF+a7-G!@KeLz9U4fBpw`?Emg$Uu{adyfte zc_uta%5$bw>zC>BYs}-ghoePiKAP3$QNp`R z#Cg;orXoMC5G`(*-y>i}^oR1}6t%HR4DNd}zk_->Y#|OwNDqSfuAt&8+z0oyD?!So zmasc*?(m?2sP<~@%|Y0&f%(6*%7rG&Fn|B7@PJxm#+IcUmgCiEBGZsVn%wvr(#UZ2yrQu zfjFVXhhteJKUeHwq)!-oW(Xf9e>r&K-cE47MRUnxW6v}=Ag_OiBUv;%{<^xg?M7Ja3U>QJn$Oj?=pEUc{0Z++?0l?Z>Ue&^dx&p}T4GkU*KN5&Ryiri07 znM1|CA4x{i&8{4MMPYE>)e`hV#SiVF+q&|eQo&##1gd;UKmVA2QWEwr^Omi;7ecRh zZ8$oIokfL%k}vJIBXm+X!=?XAzy7BdnhKvXFmx5SPd0}1zqGxXQg)VMnoRj-Mi&+e zu|2pJU|NtIXHgUIe{t7mtwxk(ulmYp3xmjk0VS?w%GHh0iKo=97)hYCn`3ES}%(FwX#+6$oXy$5`YjEj4BECRw1oP?1*Iw7_4rcN0C zJhVgLfFSjj0bheoKUlFf0&r=!u}OY_^2y&%I}ZT3b6$&$1gb~Op=NTaz%cnX zt&sDPRPhc(rdxNVbM9|L(VUti{7fDl?MtV)O@>Ik)*neY#9~*l-r56WQbQE68aIm+ zolm3|G01k9kTXaO;BJcwdRw*jcGJhEu~uXH!zAR`6;TuLK(=OtN6i&rY(+T4cQYg2 zIf`t)AI2{>Wh}Cw@R$1~w9bv>wtw{~9zcA@UfAHss5FibZouyDONzz5tG6C^iT1&( z6soY%8yIsvkatC7=2UKbfRVeVw;fkfQXya+Kh{VQ4BW2d45n1Qi+8DP4&FiAJ^msi zU7o#?A=yMgGVZ1m;C&-j!|hNf6#h>%7tDRJL*isE!*^;bbDv+9K1!Q1e^5Z zrA)&@k$of8ENkJf%6|wTY&=}X6y<<+J z35`3#+`9I@s=a*usuTUKkip;JWN`)hpt@86@nK`qKec6@1wkv&vZR!OF<+{K`Eu|M z-0#d|SE_weP|8^;H?q>A0+qPp`&vR{6;{iMjg*7J4p;*=c4)s_w1H~bm*Stubta6m z367vpDli568Cj{Y<)`RuZ5Vcy2a(ZrwKt@^3ltpJ3Tw*d#E5A_@}2VCm3wUBMU|kerPo~DhpnW zS#9mXBXH4{pXsqLg%7ZLp=~~cUeUS?O^_j41JIQ>#nuN+gc_dA#DTO?$p1}b7%Um2 zkd%f#096TdPe4bO)nUO6$#A8M|MXSo=8Xf0c0CQ7%h17O`fp##CX}#X-n0)z0>x`Je4TtlIiu&TyoN!{cb~ zqXrGbd?6U+ShMia^JI)6x8fMWro^61--zFaMYjYL45_O3bA0!JbqpVo7U})DKfIdf zpPY@qE42<h4+BN-zYVR}buuFYad*(M0roC#FfJ3JZSb&U)a`D_|kax z&>215#Kh!h?gnjYe_T-@Nn9Oq9O$fz-j{8#XDY|Sju5Z1x+sux5@q6>s#p#kCUF6S zk<*XM3CTQH)wJ;OGw`9?0MX?R_A{+1RKPmQGi~+myXOnSXu}(mq|BP~{o4xwn}-Kv zF)d94_Ho@54SgC)L%xAL75*H<^*FJ=`uylZ#s5{X3(sV7W9*N96v1aOK+> zmVigBPzVqe1sSX?o*AAC6LcM=gPOC1jl)~0bjANM<~W^auc^~ zNo2UB0mQ`T)&iR!MXM8%ju_}iRpXq&dknhcLj$zj?qNk#?wsVn0v#)k+4a+aO$luR z@&cwYrjsuQXTmRR)!AC&n!5qy@yz%tfVjB3xrNW%51sqz72hJ+MS$i?1Pr93Z`!7ZA_|L+= zHV6uDQBBdpLJQE@IpqIGvxVWLjSygL-Mn1FGxikN+Qhehjb;0xoBuP*fCJ;&3-NA9nSXDL%&FSt~$nZC`^) zD<#T5T8d3cFXDR&IcpNh3h+&(<~o$hasLZ$Jq!Z_7JJB0Rx8L2`|45p#3q*{-|^(f ziJ6H=uf$569_9N}Z5I^)=H4#G`DWijwE~i+kj{J6=Q-iiQ%cL3cZIPmq57LvF~E>v zypmt0oXzumxPQq%)7es|O>wzbCb&=cI1izuX*ZNlhq%S71l=g_UbZA|uD5{pX9Xt7GvnhXA!Ca${)w;G9~f)=IT;CA0?YH7K;i$l!1 zB!n{Pt>G;R5Wbm?Sz$f@bVCe9`0)PQPcb>3i11y-pm->$v`CRlggGxs_tZ$q!j|F9 zR~T$pUeOP_XIuDZCr-adYEzr`t556dg?$c37I=O`X&p+&r?^zS$KS_sVqa`ETGSR+Pvj4!fjpG*L z8%W$uCZ-p#TdAn5EF0|1XARJ0=^UA>>yI4*D3nV|by*R5g-3;1Pkm%XGao)$uZyBO z@62#YizttuH1}Ikn0X>k)&<}c{hE;LVRy?S8znDe-ia1!*E?FKefQt!HPTKg3I2aP z=s&+c2h-@t`4mUXZgKnof*A49xg!j7*I+`0jd#;FA)f==KGU=vU^)6-I_84ydAlYd z->K9#;uc)_gPIED7bC=X0g!us@Nph9>iWIM+cogQTBamnYmhDKxxd9MZjPBrw3R2r zA~gzs$#Q}_a}-Hd;CAQ+{A`@JpTk%yh8E>q;Pds%OWeifNN2ONoqei0K_?2JX;zp^ zvF1!RChV%}SWc2Px^nCGC2C0PG+0v1h*peGf-_Ghrcq1%@Ta0z~6s zSIHQdYBN;GtcUplmcn(>^*8X5T%E;jn+F*Il$8EGDwggC!RX`r+_Xt}!EyYgbodx7 z9l=+CpGEb#&K}&X9iN}c8TT0R(O5X_p_3*9+WQD&wsx_fG><~x<8j$y^zXSd9x?YI zxyDlXhBoK=#Zz@PCXB8Kx4gdjwOqt_T@@EaT;%y?r8>RQ+nw9w@9bG^ct5}RU)Tbs z{v=!vGzaBv>XY<(O+U$y6i5VDy29@yE7RW;yAfJH1h;85WdOXk0N@=Wr>kB5aK_v} zs+djcT|4987xC5@{S~J43~n@`?*`xC_1n`))wbh(P$K#ns4Pv(1+vB9w6#sX;3u!+ zza-x#NK}7OaC1PXsM0IF@z1J}$a;H1mJ7KejFm`=EEG?+GNU77wtOC-6~~(c@D6EGFx@|e-TxDM?Cb)Tl+88ao7r4l z!NpVFo5aaQE*MN~!Z`hW9y7y6Ti-!O`|}P(vEEc$F`Mf>l_isnbE)q)H6-83tZKXEkalE|HDbdvchWT1~R!xxaDGNl2GOHw(y$;FSek zyU+B{HPlU#ch-RM*t@`l?Bq(Ox!TgxhMkem!d;{M7sgX6alN{5Co%Z0G^c*Hcyel( zI9`9$Nh+v+RkpzVb@pYp(et9(mq{+npxbCi-X*}R*{5CTO3f07@KQpQVQNbA8Aw%| z6KeH)JvIUiNh%$RmOUA_#?K^gyf2wK{33G|zB-5HIz&VfVPgEw7#q_|ZEpQLwMlHf zgvb%TbiGS;HFOYqst|tZDKfDb1oB*H9AwCPono8dF@2=ftY92+pvu%y^Ye?9&+r@S zVWAmLXt>#@@IHSLnq*bcg0@2B0;zcuEsE`MSDPxzEAm-LTZdp>so)FvzGkL6gKz<& zWf-O5rNm^}J^7?@b`b*9v*z~c`_*+hr%A2_o%vafMP&ipt*c7H%DmUlM(cM- z&^`@}W5`vR$P8iOXi!0v6v=cw!<~oY&tydUVS0JGnI6Q-17InKWS}z{bX=Cm+wMhS zD6Rnz?9o`$p2hw)>g3|)K%#L5F3L<18k~zVzFCV$=)_a)a1`&VC9)K%ULd1c>WFXu zpv37wX1+ynPvCV;)ALJr{2CMcCM&Nyv}smN+XMOPxyEP)rU~92xr8L2EMYMO1Xhkr z!1s;km|GnjEsVDGvpuxQuYB)0z1yBhPVI4-<`2opIi;-qo4HWG9b>#3RRUWHhb}nF zU_z>i;ZCOj@R)NJ>}o1$Y|9R_5+QbTM~_g&yfpHs=ytIqP$4K;ex%DHrOSa{=-1l2 zDIv3@atXI8Z{UZ%9QuFV^S`A%vCeD)9(JQX>vP>9=FpdnLm`Qv2Vd2FqUrMtW^eG$ zuTzHQF6_s7wZJ|%{iN8J&o02-yxoHK)W5hl+n#B71nJe9E7WT0FD`8ua&yWbEmSZWHO5%GLt#)B&%Z|v}AIqRzA9*b`u&7yk?$UUnbl4w$YpAxs8 zKlmJhYOaUwlikav=2o@g9e8S564?FkG#oF=^T)(#lm|zCkPD0@Vh2tp7z>VsgwV<) zAoJqTl?$i>Fw4uautTDS;9w4LramT)D#UzyS5jHfdLTz~mbia$8*KYKxU`o81CKtSN(Fj*1KltKF z64L?Y@c+!MpqJnpxe7CXrrKR#eeGD;oa};!w>MZBZQJif1VU=Llo;gtf`=a7cIOOKkj!*oDYj&W1Gyu?bRVC=Bcm1V6kgQkgqk{U-C)kE*A? zcI_9)0Cl^!1D-plLV)D~1@Z@6eVr19N{vGKXj~aZGeG2q9|cS(mxHl<>c~4f!n&xp zfT``QR&4f*HQ_B00nh^<NxqGaWA!-maIy)JEduD+Jq)mnBN|nL5l3OuXKiQ2Vl6 z=Cf62m*IBdUq9~ebgxmi+e0-%UBiKgL4o0k#leS9A98WJDgB{Oe)yeH&>^zObDzes z@9^fDWe?2KXtm*RWtP)+^?Zp%`={>Noe2!y+1q1ok(Uudqw{PF?;(Y#^^jrfgWqvT z6P_%T!h%jsIJ^Bi6L`|f^vO$TRSX%L{e6XS8&&h2KVprC zE-%130KEJU`6=9tc-$BOho50fef=m$mbaZf4@syEm7M3f5ffF?Z zn5Yodp`mc%#ug6H6y5mAK-JG@g8jAJ^2q=H&q$3ZazXtQOZPtEf)3Isv7r3y_UoyD zn7lxyh-JwuKW!8+7Vq4t?C4T5^BF*#p~@I7mwqVPSKt8Y^@8}Z-_@ZJ+P`sh-)s+R zg=e=YOW4OQd5aFyU)Q#S@v}QT64@8Y4W3k~(D_R=6w7VPRyNSziyiXmuNjTyHQI8#g?J%arZIxP5?$b9aH z90zmOm=s>AQhIRR%c}*@SfXh--pw;?WQp3a$i>X5+edyNO6`8y=aIzoJTQbcqF;G9 z>5CMdg%yFi8Avn&DM`~@nJOr=6QEZ?mTlqrHTdco;Qf|D81M1%(>O4`7iP+xoDW&G zh$OT(WYw|93`sso`C|r!ZP9j0PR9mMpTsEZ8V7qQ9ONi>z)v27OGr!^^dqsDGoh~! zLn^}>ZS+;hH7Qp-&C+XaGSkOr@8j{2&^4DD)www}F5o$ys-O01S%E%6$X3)hz4ZJY zQrU(E?S~IUiHvrPH)+537yi)g`O2329T6A>bSB>af7N|wI9yTJwx|(AL>DdE5Z#P! zh!Bh#k?3U@Js7>01kro%qJ$(`7&Uq)MvdNkZ=;LBH}7}7f1bSW_5J_Wzw={X`<%1Z zz4u!8KI=XxkDZ3qKk^}sJGKt%jz**fe>Bwx(*fxW4tb`-B6l}S0bP?Zs(~LIw#t{L zyWH5NF{p}zI&-R;$nm#SVf*?YWttv|YB!NiPQI1q&n-n!8GBg+NDgpX7GrI8iljf*>)Zx$R6NCas!RyFATScz|F11!@Sj`G$<+-n`?1_lTk(*?!b4Ai`g z>AZHVY;}8S8SG5A)_iOZX)Vlbqbg%0E~L+JdkGY1A%&Blm7#kaAUkf`mMYg>qF!<;IL*114!+3S40Sv{5zVax#DzcS+TrCS2g=^NvP zc>QV+Kd~l0AtJdnB!SI`y}|+*&5u{bRM{P23mZkwPV9GEo%DxH`M2`ih(4RHAs56l<$I|%C7}>wYYiv&; z2_Gjc+p{-2>_Isq;|Vl}u>6%LfFiSC9G8c#hn7a8k;KHcV+&4! z42rBg2D=^Sn-Qhq+qm%|idQ=UcUc6r&FUHyPhWa#GU|Eh;fhUtP{&5NPPhroixWo< z!#4aA@sz1GlWwIfE<|G30O6_}VG0it*os1FeRr0wP%zf0&C*q^`wzU`#;jZPm$?qb z@Sm}N{Q;0EL)%C56!>o=aoe`J+5J|EuM382+hk<=3vN7)bC)=a_xCQVs-_h~Hr98= zwB$&}kG!&&(5<}k4!15M^R75dWl{?o5n*A5^o);m7TP#IB-N(KtXbUzAQNRJTgK>H znqJgwDR;O6q9qwE_p}r}>#fKF&SNjDnxwksTyiz>jb}S@LIZkY_Y!fBnsWzBbR6x= zZwpV60uo}fb12$~qk*2(Lx~-l<)OAU6~F++bN^L~S;g`KQDcQ3A78N8s~OyZ63(^P zyNz_TG`n9T@!l0tfDNY42bfN7f6ZuxT10aw4fKfk2F5VJ(7i zS;$Q#4!r|Ik%w!BCp!8u!MqqRpHLQ5%KnXCQr?}h{;py;z=$G+>It%;Yw$CikA35` z!S8#t9KXBEDijEaIU~=`4E-#6bDl08vX{Fi8E`?uI6I}_N8>1-=Ss-FW3%3Lb3qCh z#MV~dy|?0kTrd`Uads?&)^kp^aUPe}+EMXvJwJuYgssRX~w^E z0V_2T8kOb?gT94bCw!gHj><~%5|1{;j&EYv-^tXaQpftI@&~w66K$rv)ud-zApB!wU+v~G2F4dueEjwD7O!M7Ni*Be z*4(%B4hvvB+nz(lI{87Q^2?8iYvazjPtT#=ASBcen38{fh3!8NV899MHd9qoi^l|- zdT*F|?yr=CowRI5w94e$xY6J(U71eaa}c}s+ZYtXRzT?Yhlm0gu_}953vA;Lja>B2 z%#ueSIe?%rf$(W1=jhDC`0$7EJJT3cWezInXOI@4&f@=8wwTVwfCQ-`*ZBB20HUc+ z0C**Qcrd!5GfLn6NRzhXHATMCs<*B)-k-Zv7V^b?mOZ{L>bGL31 z7!=e|C3W%$I4mXrJu_d`M?oA+gu7j@uqsFtoN&Q33BWF^;xH%1JfsH& z$AVYWf;sm`Oslp-n_t|5%X_r-dxDZmn-ia<@_(7M?{Y690!Qn*fNvQXs`Z-1s4I$D z;?yr7r-IHxI2?_!BeP3eb^E8BUWc!1TTfsu*I%38X(tKRu|>u$b+~#w%Qnh(jh0Ur zuzc%2M?F*RAbon`274{fCmh5Zxu2mne@`kM!Q`LC5bFN(oIl3dx83&z(W8D96==pm zaam<_ZH}pB*$l3V!#b&SONN)84g6B4pGK8wBY7(3c>a#dk$Sajh0P30kkyhCL#Wx0gYwVYR*K zpuhc&F+fHPTZ)%;I)as!ZS3suSIPrKo9_SPbC6`~Kpm5vRg6Wjk|Kec5jl;{En{7& zaZf@-n-?)L5(@_;axpUZ;rXBmGQ>i!uwmFt`4!)Hm%H2l0i!l+C+P5xJojc2kDp=r zK>+LR>@M?IAehn8yhrsZV6oI$>h;c5mGXDJ>bQmmho+dsKVsx(Akdq8eO5Uq{zz*6 z{BN9@)#D3~6galUgj6lnL~I^tC~vbWn@g#}=de&|4|3-|4Y+OAfJwAPC(#q*6jDQ< z)|$`U=vR_kaNBvrU_-q*a_WN|hc}(7A!|?6!2XPO?T?&4xH!&fP~%1lNC><Kh`>)^Ojw71R}+lS?Z5m5h-yGH+c}T6;-Z12K$gnWFUmv{L!@a`(csm$%hJ< z)>yRg5$Ay=(GbS>3e-9KFR0$>ARP6Cli6j_TUjrZu?g`tKqCv2DqV>~S?ApSgoP^- zTgNjMq(S|Gythk(Kzn~MFl~$gHc7cGur_&xJwnqtrV^^b|HnlrPY`sI&*T%UL zHPwWxvM4m&yx-N&5P#}!%4g6M@f*v_f8cv(+?qYHVvmipcBSB;vc7Eq)uEM{YuMel z@L>Lr1D2g$MR_tR_%`VqcDnKHhX-I+cQ>a}uLJ`jEq6gr0vsquvn|61>D_e=9h0y_ zQUOcNT35nkcY)l^65i3JmnLuk2~a&DU6>-Y$=pj_%TqJm2{(dM--*CO!>b=WuHKvU zz{NMK(v+1-OX3q+RS5Ye-7Bf{F%AMk^td?pN4Ev;*t`|lbnWVIPK!g{%TvN(+j80n z;FwI;Hn*e_;^hgN?j^CMWDJ_Jv;p>1agxmVAZjIx!AioWi=Oj~)PTle#5oV|nlJhE-wG)9>8k2obuSy4uf%f9EZSL2I@=^->N1=)XEyobdwXR)Qd_yl)x7)jme zxUl@vHZhQpNFDwW>cE%hDk|N(6L<`PMztJD&l+$D&_!}pAfE%%s-oP9$zVKz)fEO44#)GlFbY2#_--hzWT3)*Yl;I=#7}RyrocbY z9kg=tE2a$XdU&pAV7-&poui4&aHBLYl;l4|)GJS;UnlYN=iBvjn01y_yOE9orCzhy zcC4rYfRd?iLSkK~X#M$K%df8jb=$IJ+s87wbU_0oYXxUDwPVzefsBZc3mVK6ETWehjkT1DcaMJJg>r`lKLJw z+UrB}IM%toF71PJDQ**2Z)dMHT)YG5#!)n1q6ex7o2^0+0@AT$h_t5!E{{!Xr6#&_ ziZf}rN`hbhUhdcsSx`QD0|@o8??NUgQD1TONeI8p4pRt!U9jw)`+ED}m4NBF`IFK< zW?iSTfqt7X#x-j>0yo}7n*D})D*HknKx7lbi|2d3APpl2<_p74)E$nDrKF_^z8Dho_!c`#kEz**Ho|BZGXcGy<|$e!dg^++vSefpCuybAV}TUo z_*_ch3*!VxnEc|l!M^%@o*~+j=)3)^`hv{9F1DN>&S-5SI2;s*YgTyCr1|FnXf%d_(ehN8>5%{aWH`jCOL;+E{33u&`*o+>IN z0HdFGupT%K8VwFl8;#j(3Oe8FStlYp`}{EH!~(2R4-1}hXUw;PCeiA!FQU5*MVDkp zT%?V5C=jTJeu)l-4&)a=IEy+@<4ZPbIm839z_g2*K^24#6;|V=5Znh9(Qz(pI?rWYn1(JTFovac6S>K4)Sy$ zx!#ekQdh50Yw9U4BSmQ}&HfIz&gz4NF4mb7cPLI@|A8M*45K+b7H+&+PDaR#_i~iW zaGAw{ko7ildOO=R1dcMlP6-NJMgwH!Waue0sPrm?+9sxzpzmzZhkfR1S1f7%8NceC-Os)S@y)DPFnA|0=(~g zueZ1E5@ha1pGyEN!@s6r%S#Y?GY@Y5uk0KC!<}JaeHJjR z`E^R_H<}Dx>Dxu${XVwGYjtjGfsLX z7KNHi`6;l}+rV?|pq#04o^LT<|01~Nz{Xf&w!Lz3X1r?P#n@DL-%F+fs%Jx0FPxi= zu@yX71w`bw7PILkM_-i)hfy!y+oIu+C%GhlPS7|~=Y2R*$5-T1w@59is#{LA1sLG- z&0nK%UrwJELhf7YB!e*X;yuo+_!{b4A;BN@zs? z=;I6f8Ce*c(qRYte05$D2!X9Zley|2$3$6r+Kl9Jt0q-Xk)ZIOucpRxphxUhmZF1EubOQ z)hiz;@7(Ra74%f+P99YL`&?X`fMjh$kXJPX@RQ0I5Tp1Ef>(5K-3x>;e9RF~^h;mIRe*F3t-Nv~&2^)HXsjw?mHdjcqDFe!^*U)M23y6{)eeQo3@gyXM) z^?Y5BJ*iYz`QtC%MfT%IIvbT;y1d@!ET~u(D;&1oi{O#%bxs+{o@@2 z014rKq-${fM1-hJu4^&*ROY8U$KI0tg@q1iher1=_Vx6rX)O)M#r!U-P_Y(2?wbpS zRe5;Y&JLkMFN^AMm-smqdpL%>1J_0>)-n`$O+$WK3h%*Nqe-sa1Gk(s`*)0c@Fk~b z^k4ahq;_x%ge9l#T70_|X7nritYl3~q{C+g`}%B6oVXkl=V*PwjOX=X*O63+A6+qJ z7SxTTH6tK_epSq`j!g4oY9W%QE{|P;AUs8!S_MYcOg`b%x&DE<(ldkRJ#Qx$+@CQu zx;?)ZITb&X!Ecivs(Pl|;>K;84Wws7;oj0r=Xopw8k+z{)O$`lcN{Tl&Zy;p4q9m)F6Girjfu3_Z- zM;pnyl)}%Z5O%V(c`j&Sc|Habl7HYGW@tH?FN|!wfcfsEU`O$z(~5IEV*tCIUF1$r z!y&&605s1)Z;g{bcbl2JSAPy+kXZ}GXj-E;V5Y<+GVNBaSB>b=n$PB9$Sul;mFB!( ztgnrH9ikJbOXkFzj)e6K;H7H;>_2^^#1{DI6>#{9$rJmPSOn$FzlHxYx#NvJPl9sk z_4cOv2pEl2dnd`81urf-ad5zBh}HLFe6<~Pm1Iphir4cKG!^KKx6X3|GJEMbXjEH2 zO-wCy-d|a>+iYDMuKKwVS2I{DUhu9H2yxc0iqI)NJ&;h{TI{{HfPF+qXX-WE3N|#6 zDPof3_&uW|5-W!*l*}lRT#Wl!OJPhVvHPF8X0^CQXjMK zPSjFH?e2Z6mORcjOj8wDs80qr)*df3Yausp59g#nij{wl)KvI06q74>P;m7cg+#y9V9`@&>}DZk=T;5UB< zfEC@F+Jz?l4n1b>rsl*d1Ndcp%?JbBd>FMmH%LFum55f_t#LBh&vwX3-bb2DN6ugf z^Cozd@tj&|qZOE&nqp<+l}QOn0hhdcZR(preC;jcfo6|V*u=?OWg%YpB-4%4STDm&#p9D?&g`-&IXr56I{4qM%w?J-&nBdnb+;t zRL|vjbsp}ct;w$zJj0Vu5ty9Qs&hSeAb&7-{pe)#c)!+f{Pt=O-QP)TzEMO@vpBSP zp}KlO)p9#Quw%Ca`@Pbe+nPR%&V9}E1A}^*AXkH3z2WE*TXq-t_8Pf{f1*ynHN~|9 zfkoXQdz!vy|IA0T^4sXavq8I!)QV=L#vN+O+L<_!KAV~T5_qW+Fc`8ynRGMEopGPD z&q$p&_LP1`*zRtmX1>~swS+2_%sF#uSC|}8pjs#0bJ=>sjM03c*tB0IGP%OOEd8E8 z=JI=I$XEz+RYTzA$0@6ILE_6tI#%As<9}aHFw?MwTaiJ1nl4ixy^D_Gb`-@GPY=QW z*W-MWtd>?1cUA1AReDPN)`j0cM4<`^*QQm*0lZO-C-p_lnaCz$Vg*3VOX=&4RXrTpJ@_9zaeg)g zubGWxAw-TLj`S`LKWzSVB4heHuEtYA9B(7DCCbCXa`K@cidr7{4*kSW5(R`ratwkPsM~9vvR+ORuycmPMp1Q4)vs{$wsf z`<_F^T+tzrm`T()qKr80$2a}DxK{)^lOxu4^a`<6eskRPw50Jqw?^}gESbI)D=cR# z&d+zQ|HnE`Mobf>?fY3p&$vEmBb*bZ#8jqnumx2igA6|sDKn8(7fAd#5Yy>Pwz|dv z@cf|bvZqEI4PR>q&t1=upQ7wDRP0-*FD=wEJRefM(hi={=O%y;K6aG>E2Hcz|FC}2ERXp^UW>6 zaeq3ld5Ld&hVWnvIBd=0>pJCDHf?QqeR#&qkf)=BMLwh$C2t?G`8{UXZT7s!oMFT~ z6oJU4fIf5`1|UZgu_@WXM;`m+=dYBt`LJpzu%yy{8{ncBio?1sTU zC=rBAZq(~K$IDpZiqZSjWxJEJVLw)GhMw*lN8H{*4_F(_8oP zdPjlJgR#%Lb`{B!F91C6yB27=-Xe~odRVqHsX_V1W%+e2xcW2i@Y$#!okh=fH}#7= z*Yj-O?KX8kvFuNqF7`Y|)G?{Ld`^Xiz`dv#W=``XV0Ot+8k zJ%+Gbsyh_Zmz=K&sH5FW`6L^<+s9d{V@IP|1#$-iPToH<@r*DUmKns+s2LTMdQj_% z-S9q%_v%UR-NAbv?hEo0!;iOEs_Mi!wufj{LdJ^7FW{PGXXS*nLrjMQVCnBHPBq`{ z3y%d);N^t(P*G+IcU$)z{NFb!Kco`n>6=jW3sO4cP!)mw9gjMm7QS&_dKH%mr@c#7 ztds7WoF*QU)OmB)f`ih*#({TPM$ts$muhn>bn11TWb3VuFSZ$7t28)C{8p8MJSFES zVQB0Pn%e8}lT8T{#?nTM%L-%&d}VAw7Wo(#_+Rp0{gva0aQORh53V~fhLtw7N`)zh z`)E~Tp6q@2f?-9I`4h8LCLhuWP%re9U1l6xw6tBep7+{auQFBH=+RExskoC-r#|?J zr654pZY$pD@E)I4SBO*39Y{<0p+Q8bQdMFD4lJVwka3j%OmV(L5Ro zyYZ22AG_UUIiB$S-Dp=zck*Yh3>frhUY!J-`k05x;;?W7L_dg0};nF+dNPUUb-jLBa~`Z9O?R}vMI8t4!3FN9@^i8@rT+` zoBM16VpYef4$CRn_Zt1oxb8JSMIb0Swoj`6j~4d zaZ8wCcu}v?SvHipAnV#jZ{4^DTHEMLOZlDOe|e^#(@06dW_Ryo(XMbKIW9#f!TTj9 z>=*0wdK8Pd?P4@Bs%zfAxNDXZ1g3qbPf`#s_|N+Yvf6QXlE({+8RLK_@89fWBsixw za602uIVy6ZpyP95tvH+(?i}6G+SO z>t87Z)bT_w|F^d-pR~TuR-g%R$0WNGY5LFq%O5()Y?bM>MSx;~zrMO^3Lqde3w7ZC zOilh9#N}`4WU2qTwg0|sP`0uDuZR9$Z2iU8ya)gLsQ8kk|JP)6PyY8||6fcteox?E WnCdODO7I>FW02SC^5w6ff&T~0e!W8g literal 0 HcmV?d00001 diff --git a/docs/source/multimodal/nerf/intro.rst b/docs/source/multimodal/nerf/intro.rst new file mode 100644 index 000000000000..3e8a7366f3fe --- /dev/null +++ b/docs/source/multimodal/nerf/intro.rst @@ -0,0 +1,55 @@ +NeRF +============= +NeMO NeRF is a collection of models and tools for training 3D and 4D models. + +The library is designed with a modular approach, enabling developers to explore and find the most suitable solutions for their requirements, +and allowing researchers to accelerate their experimentation process. + + +Supported Models +----------------- +NeMo NeRF currently supports the following models: + ++----------------------------------------+------------+ +| Model | Categories | ++========================================+============+ +| `DreamFusion <./dreamfusion.html>`_ | text to 3D | ++----------------------------------------+------------+ + + +Spotlight Models +----------------- + +DreamFusion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The `DreamFusion `_ model utilizing pre-trained 2D text-to-image diffusion models to create detailed 3D objects from textual descriptions. +This approach overcomes the limitations of traditional 3D synthesis, which typically requires extensive labeled 3D data and sophisticated denoising architectures. +At the core of DreamFusion is the optimization of a Neural Radiance Field (NeRF), a parametric model for rendering 3D scenes. +The optimization process is driven by a loss function based on probability density distillation, which enables the 2D diffusion model to act as an effective prior. +DreamFusion is capable of producing 3D models that are not only accurate representations of the input text but also offer versatility in terms of rendering from any viewpoint, +relighting under diverse lighting conditions, and integration into various 3D environments. Importantly, this method achieves these results without the need for +specific 3D training data or modifications to the existing image diffusion model. + +- Model Structure: + - Text-to-image model: a pretrained text-to-image diffusion model is used to generate a 2D image from a given text. + - NeRF: a neural radiance field (NeRF) that can generate novel views of complex 3D scenes, based on a partial set of 2D images. + - Renderer: A volume rendering layer is used to render the NeRF model from a given viewpoint. + + +For more information, see additional sections in the NeRF docs on the left-hand-side menu or in the list below: + +.. toctree:: + :maxdepth: 1 + + datasets + configs + dreamfusion + +References +---------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- diff --git a/docs/source/multimodal/text2img/checkpoint.rst b/docs/source/multimodal/text2img/checkpoint.rst new file mode 100644 index 000000000000..7e8f7149896d --- /dev/null +++ b/docs/source/multimodal/text2img/checkpoint.rst @@ -0,0 +1,80 @@ +Checkpoints +=========== + +There are three main ways to load pretrained checkpoints in NeMo: + +* Using the :code:`restore_from()` method to load a local checkpoint file (``.nemo``), or +* Converting a partially trained ``.ckpt`` (intermediate) checkpoint to ``.nemo`` format. +* Converting HuggingFace public checkpoints to ``.nemo`` format. + +Refer to the following sections for instructions and examples for each. + +Note that these instructions are for loading fully trained checkpoints for evaluation or fine-tuning. + +Loading ``.nemo`` Checkpoints +------------------------- + +NeMo automatically saves checkpoints of a model that is trained in a ``.nemo`` format. Alternatively, to manually save the model at any +point, issue :code:`model.save_to(.nemo)`. + +If there is a local ``.nemo`` checkpoint that you'd like to load, use the :code:`restore_from()` method: + +.. code-block:: python + + import nemo.collections.multimodal as nemo_multimodal + model = nemo_multimodal.models..restore_from(restore_path="") + +Where the model base class is the MM model class of the original checkpoint. + +Converting Intermediate Checkpoints +--------------------------- +To evaluate a partially trained checkpoint, you may need to convert it to ``.nemo`` format. +`script to convert the checkpoint `. + +.. code-block:: python + + python -m torch.distributed.launch --nproc_per_node= * \ + convert_ckpt_to_nemo.py \ + --checkpoint_folder \ + --checkpoint_name \ + --nemo_file_path \ + --tensor_model_parallel_size \ + --pipeline_model_parallel_size + + +Converting HuggingFace Checkpoints +--------------------------------- + +To fully utilize the optimized training pipeline and framework/TRT inference pipeline +of NeMo, we provide scripts to convert popular checkpoints on HuggingFace into NeMo format. +Once converted, you can perform fine-tuning or inference on such checkpoints. + +Stable Diffusion & ControlNet +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide `script to convert the Huggingface checkpoint ` to ``.nemo`` format, which can then be used within our inference pipeline. + + +.. code-block:: python + + python convert_hf_ckpt_to_nemo.py \ + --ckpt_path \ + --hparams_file \ + --nemo_file_path \ + --model_type \ + --nemo_clip_path + + +- ``hparams_file``: Config file to be combined with model weights to generate ``.nemo`` checkpoint. It can be generated from a dummy run and can be found at, for example, ``nemo_experiments/stable-diffusion-train/version_0/hparams.yaml``. + +- ``model_type``: We support converting `stable_diffusion` and `controlnet` checkpoint in this script. + +- ``nemo_clip_path``: It's required only when the ``cond_stage_config`` in ``hparams_file`` refer to a NeMo CLIP model. It will be ignored when ``cond_stage_config`` refer to Hugginface CLIP. See :ref:`sd-config-section` for more details. + + +Imagen +^^^^^^^^^^^^^^ + +We will provide conversion script if Imagen research team releases their checkpoint +in the future. Conversion script for DeepFloyd IF models will be provided in the +next release. \ No newline at end of file diff --git a/docs/source/multimodal/text2img/configs.rst b/docs/source/multimodal/text2img/configs.rst new file mode 100644 index 000000000000..3c534044ba40 --- /dev/null +++ b/docs/source/multimodal/text2img/configs.rst @@ -0,0 +1,166 @@ +Common Configuration Files +============================ + +This section describes the NeMo configuration file setup that is specific to models in the MM Text2Img collection. For general information +about how to set up and run experiments that is common to all NeMo models (e.g. Experiment Manager and PyTorch Lightning trainer +parameters), see the `Core Documentation <../../core/core.html>`_ section. + +The model section of the NeMo Multimodal Text2Img configuration files generally requires information about the dataset(s) being used, +the text and image encoder, parameters for any augmentation being performed, as well as the model architecture specification. The sections on +this page cover each of these in more detail. + +Example configuration files for all of the NeMo Multimodal Text2Img scripts can be found in the +`config directory of the examples `_. + + +Dataset Configuration +--------------------- + +Training, validation, and test parameters are specified using the ``train``, ``validation``, and +``test`` sections in the configuration file, respectively. Depending on the task, there may be arguments specifying the augmentations +for the dataset, the resolution filter for filtering out images, and so on. + +Any initialization parameter that is accepted for the Dataset class used in the experiment can be set in the config file. +Refer to the `Datasets <../api.html#Datasets>`__ section of the API for a list of Datasets and their respective parameters. + +An example Text2Img train configuration should look similar to the following: + +.. code-block:: yaml + + model: + data: + num_workers: 16 # The number of workers for dataloader process + train: + dataset_path: # List of wdinfo files for the datasets to train on + - dataset1.pkl + - dataset2.pkl + augmentations: + resize_samllest_side: 64 # Resize the smallest side of the image to the specified resolution + center_crop_h_w: 64, 64 # Center cropping + horizontal_flip: False # Whether to perform horizontal flip + filterings: + resolution: + method: larger + value: 64 + webdataset: + use_webdataset: True + infinite_sampler: false + local_root_path: ??? # Path that stores the dataset + verbose: False # Whether to print detail debugging information + +Currently, our diffusion-based Text2Img models do not require validation steps for faster convergence. +As discussed in `Datasets <./datasets.html>`_, storing training dataset in webdataset format is the requirement for all +text2img training pipeline. Using ``webdataset.infinite_sampler=True`` is the preferred way for training especially if the dataset +is large as suggested by `Webdataset Multinode Training Guideline `_ . + +Enabling ``train.filterings`` allows one to filter out images (and corresponding text pairs) based on some common use cases (e.g., minimum resolution) +without having to create a redundant subset of the webdataset on the disk prior to training. The example above showcases how to filter the dataset so that only images with a resolution +larger than 64x64 will be used for training. Concatenating multiple webdataset is as easy as listing all wdinfo files in +``train.dataset_path``. + + + + +Trainer Configuration +-------------------------- + +Trainer configuration specifies the arguments for Pytorch Lightning Trainer Object. + +.. code-block:: yaml + + trainer: + devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] + num_nodes: 1 + max_epochs: -1 + max_steps: 2500000 # precedence over max_epochs + logger: False # Provided by exp_manager + precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP. + accelerator: gpu + log_every_n_steps: 5 # Interval of logging. + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + enable_checkpointing: False # Provided by exp_manager + accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models + gradient_clip_val: 1.0 + benchmark: False + enable_model_summary: True + +Refer to the `Pytorch Lightning Trainer `__ API section +for all possible arguments + + +Experiment Manager Configurations +--------------------------- + +NeMo Experiment Manager provides convenient way to configure logging, saving, resuming options and more. + +.. code-block:: yaml + + exp_manager: + exp_dir: null # exp_dir for your experiment, if None, defaults to "./nemo_experiments" + name: ${name} + create_wandb_logger: True + wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger + name: training-session + project: text2img + group: nemo + resume: True + create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger + create_checkpoint_callback: True # Whether you want exp_manager to create a model checkpoint callback + checkpoint_callback_params: + monitor: reduced_train_loss + save_top_k: 5 + every_n_epochs: 0 # Save checkpoint frequency. + every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset. + filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}' + resume_if_exists: True + resume_ignore_no_checkpoint: True + resume_from_checkpoint: ${model.resume_from_checkpoint} + ema: + enable: True + decay: 0.9999 + validate_original_weights: False + every_n_steps: 1 + cpu_offload: False + +EMA feature can be enabled by setting ``exp_manager.ema.enable=True``. + +Optimizer Configurations +------------------------- + +.. code-block:: yaml + + optim: + name: fused_adam + lr: 0.0001 + eps: 1e-8 + betas: [ 0.9, 0.999 ] + weight_decay: 0.01 + sched: + name: WarmupPolicy + warmup_steps: 10000 + warmup_ratio: null + +By default we use ``fused_adam`` as the optimizer, refer to NeMo user guide for all supported optimizers. +Learning rate scheduler can be specified in ``optim.sched`` section. + +Model Architecture Configurations +------------------------ + +Each configuration file should describe the model architecture being used for the experiment. + +Here is the list of the parameters in the model section which are shared among most of the MM Text2Img models: + ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| **Parameter** | **Datatype** | **Description** | ++===========================+==============+=======================================================================================+ +| :code:`micro_batch_size` | int | micro batch size that fits on each GPU | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`global_batch_size` | int | global batch size that takes consideration of gradient accumulation, data parallelism | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`inductor` | bool | enable TorchInductor optimization | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`channels_last` | bool | enable NHWC training format | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ +| :code:`seed` | int | seed used in training | ++---------------------------+--------------+---------------------------------------------------------------------------------------+ diff --git a/docs/source/multimodal/text2img/controlnet.rst b/docs/source/multimodal/text2img/controlnet.rst new file mode 100644 index 000000000000..8f3155770f82 --- /dev/null +++ b/docs/source/multimodal/text2img/controlnet.rst @@ -0,0 +1,106 @@ +ControlNet +=================== + +Model Introduction +-------------------- + +ControlNet :cite:`mm-models-controlnetgithub` is a neural network structure to control diffusion models by adding extra conditions. +It copies the weights of neural network blocks into a "locked" copy and a "trainable" copy. The "trainable" one learns your condition. The "locked" one preserves your model. In this way, the ControlNet can reuse the SD encoder as a deep, strong, robust, and powerful backbone to learn diverse controls. +NeMo Multimodal provides a training pipeline and example implementation for generating images based on segmentation maps. Users have the flexibility to explore other implementations using their own control input dataset and recipe. + +.. image:: ./images/controlnet-structure.png + :alt: ControlNet structure on stable diffusion (See :cite:`mm-models-controlnetgithub`) + + +ControlNet Dataset +____________________ + +ControlNet employs the WebDataset format for data ingestion. (See :doc:`Datasets<./datasets>`) Beyond the essential image-text pairs saved in tarfiles with matching names but distinct extensions (like 000001.jpg and 000001.txt), ControlNet also requires control input within the tarfiles, identifiable by their specific extension. By default, the control input should be stored as 000001.png for correct loading and identification in NeMo's implementation. + +Model Configuration +-------------------- + +Even though the original copy of Stable Diffusion weights is locked, proper configuration settings toghether with a compatible pre-trained checkpoint are required for initialization. See :ref:`sd-config-section` for more details about ``unet_config``, ``first_stage_config`` and ``cond_stage_config``. + +Contol Stage Config +^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + control_stage_config: + _target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet + params: + from_pretrained_unet: /ckpts/v1-5-pruned.ckpt + from_NeMo: False + image_size: 32 # unused + in_channels: 4 + hint_channels: 3 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + use_linear_in_transformer: False + transformer_depth: 1 + context_dim: 768 + use_checkpoint: False + legacy: False + use_flash_attention: True + +- ``from_pretrained_unet``: Same logic as ``unet_config.from_pretrained``, adjust the from_NeMo based on the checkpoint's source, whether it's from Huggingface or NeMo. + + +- ``control_stage_config``: Outlines the architecture for the trainable copy of U-Net. It's essential that all parameters align with the U-Net checkpoint specified in this section. + +- ``hint_channels``: Represents the channels of input controls, which is 3 in the mentioned example due to the RGB image input having a shape of (H, W, 3). + +ControlNet Training Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + model: + control_key: hint + only_mid_control: False + sd_locked: True + ... + + +- ``contorl_key``: Identifier of the control input, ``.png`` files will be converted to dictionary for dataloaders with their keys being ``hint``. + +- ``only_mid_control``: When set to True, during training, only the output from the middle block of the trainable copy will be incorporated into the locked copy. + +- ``sd_locked``: Whether to lock the original stable diffusion weights during training. + + +Optimization related configurations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Feature | Description | To Enable | ++==========================+===========================================================================================================+============================================================================================================+ +| Data parallelism | Dataset read concurrently | Automatically when training on multi GPUs/nodes | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Activation Checkpointing | Reduce memory usage by clearing activations of certain layers and recomputing them during a backward pass | ``model.unet_config.use_checkpoint=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Bfloat16 Training | Training in Bfloat16 precision | ``trainer.precision=bf16`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Flash Attention | Fast and Memory-Efficient Exact Attention with IO-Awareness | ``model.unet_config.use_flash_attention=True`` && ``model.control_stage_config.use_flash_attention=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Channels Last | Ordering NCHW tensors in memory preserving dimensions ordering. | ``model.channels_last=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Inductor | TorchInductor compiler | ``model.inductor=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+ + + + + +Reference +----------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- \ No newline at end of file diff --git a/docs/source/multimodal/text2img/datasets.rst b/docs/source/multimodal/text2img/datasets.rst new file mode 100644 index 000000000000..d1efa0322863 --- /dev/null +++ b/docs/source/multimodal/text2img/datasets.rst @@ -0,0 +1,40 @@ +Datasets +======== + +Data pipeline overview +----------------- + +.. note:: It is the responsibility of each user to check the content of the dataset, review the applicable licenses, and determine if it is suitable for their intended use. Users should review any applicable links associated with the dataset before placing the data on their machine. + +For all text2img multimodal models, we provide a generic pipeline as detailed below to download and prepare the dataset. +The pipeline is suitable for any multimodal datasets hosted on the HuggingFace data repository +where the data is stored as one or more parquet files. The pipeline processes the dataset into the +WebDataset format, consisting of tar files of equal sizes for efficient training. + +The 6 sub-stages are as follows. + + #. download_parquet: Parquet files consisting of text (captions) and image URLs are downloaded from a HuggingFace repository. + + #. download_images: The images are downloaded from their respective URLs and, along with the captions, are packed into tar files following the Webdataset format. + + #. reorganize_tar: (Optional) Due to a variety of reasons (such as unstable network or removal of images), some images may fail to download, resulting in uneven tar files with varying number of examples each. If you are using a training sampler that does not support uneven tar files, you need to re-organize the contents of the tar files so that each one contains an equal number of image-text pairs. + + #. precache_encodings: (Optional) If you are training a model with frozen encoders (e.g. Stable Diffusion), you have the option to precache (precompute) image and/or text encodings (embeddings) in this sub-stage. Precaching these encodings can significantly enhance training throughput. + + #. generate_wdinfo: (Optional) The wdinfo.pkl file, which stores information on dataset shards, is generated. + +Depending on your specific circumstance, not all sub-stages need to be run all at once. +For example, for parquet datasets not hosted on HuggingFace or those whose format is not parquet, +sub-stages 2-5 can be used to process locally downloaded datasets. +For webdatasets already downloaded locally, sub-stages 4-5 can be used to precache the encoding to reduce training time. +For models that encode image and text on-the-fly, only sub-stages 1-3 need to be run. + +Instruction for configuring each sub-stage is provided as a comment next to each field in +`download_multimodal.yaml `_ + + +Examples of Preparing a Dataset for Training Text2Img Model +----------------------- + +Refer to the `Dataset Tutorial `_` for details on how to prepare the training dataset for Training Text2Img models. + diff --git a/docs/source/multimodal/text2img/dreambooth.rst b/docs/source/multimodal/text2img/dreambooth.rst new file mode 100644 index 000000000000..438615676c62 --- /dev/null +++ b/docs/source/multimodal/text2img/dreambooth.rst @@ -0,0 +1,132 @@ +DreamBooth +=================== + + +Model Introduction +-------------------- + +DreamBooth :cite:`mm-models-dreamboothpaper` is a fine-tuning technique and a solution to personalize large diffusion models like Stable Diffusion, which are powerful but lack the +ability to mimic subjects of a given reference set. With DreamBooth, you only need a few images of a specific subject to +fine-tune a pretrained text-to-image model, so that it learns to bind a unique identifier with a special subject. This +unique identifier can then be used to synthesize fully-novel photorealistic images of the subject contextualized in +different scenes. + +NeMo's Dreambooth is built upon the Stable Diffusion framework. While its architecture mirrors Stable Diffusion (refer to :ref:`sd-config-section`), the distinction lies in its training process, specifically when utilizing a different dataset and incorporating the prior preservation loss when necessary. + +- Prior Preservation Loss +When finetuning large pretrained language models on specific tasks or text-to-image diffusion models on a small dataset, problems like language drift and decreased output variety often arise. The concept of the prior preservation loss is straightforward: it guides the model using its self-generated samples and incorporates the discrepancy between the model-predicted noise on these samples. The influence of this loss component can be adjusted using model.prior_loss_weight. + +.. code-block:: python + + model_pred, model_pred_prior = torch.chunk(model_output, 2, dim=0) + target, target_prior = torch.chunk(target, 2, dim=0) + loss = torch.nn.functional.mse_loss(model_pred.float(), target.float(), reduction="mean") + prior_loss = torch.nn.functional.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean") + loss = loss + prior_loss * self.prior_loss_weight + + +- Training Dataset +NeMo's Dreambooth model dataset is different from other NeMo multimodal models in that it doesn't necessitate data stored in the webdataset format. You can find a sample dataset at :cite:`mm-models-dreamboothdataset`. For each object you aim to integrate into the model, just place its images (typically 3-5) in a folder and specify its path in ``model.data.instance_dir``. When training with the prior preservation loss, store images produced by the original model in a distinct folder and reference its path in ``model.data.regularization_dir``. This process is automated in NeMo's DreamBooth implementation. + +Model Configuration +-------------------- + +Pleaser refer to :ref:`sd-config-section` for how to configure Stable Diffusion. Here we show DreamBooth-specific configurations. + +Prior Preservation Loss +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + model: + with_prior_preservation: False + prior_loss_weight: 0.5 + train_text_encoder: False + restore_from_path: /ckpts/nemo-v1-5-188000-ema.nemo #This ckpt is only used to generate regularization images, thus .nemo ckpt is needed + + data: + instance_dir: /datasets/instance_dir + instance_prompt: a photo of a sks dog + regularization_dir: /datasets/nemo_dogs + regularization_prompt: a photo of a dog + num_reg_images: 10 + num_images_per_prompt: 4 + resolution: 512 + center_crop: True + + +- ``train_text_encoder``: Dictates if the text encoder should be finetuned alongside the U-Net. + +- ``with_prior_preservation``: Depending on its setting, this influences how the model behaves with respect to the regularization data. If set to ``False``, both ``model.prior_loss_weight`` and ``model.restore_from_path`` will be disregarded. If set to ``True``, the actions will differ based on the number of images present in ``model.data.regularization_dir``: + + #. If the count is fewer than ``model.data.num_reg_images``: + + + ``model.restore_from_path`` should be provided with a `.nemo` checkpoint, allowing the inference pipeline to produce regularization images. + + ``model.data.num_images_per_prompt`` is analogous to the inference batch size and indicates the number of images generated in one pass, restricted by GPU capabilities. + + ``model.regularization_prompt`` determines the text prompt for the inference pipeline to generate images. It's generally a variant of ``model.data.instance_prompt`` minus the unique token. + + Once all above parameters are satisfied, the inference pipeline will run until the required image count is achieved in the regularization directory. + + #. If the count matches or exceeds ``model.data.num_reg_images`` + + + Training will proceed without calling inference pipeline, and the parameters mentioned above will be ignored. + +Optimization related configurations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Feature | Description | To Enable | ++==========================+===========================================================================================================+=================================================+ +| Data parallelism | Dataset read concurrently | Automatically when training on multi GPUs/nodes | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Activation Checkpointing | Reduce memory usage by clearing activations of certain layers and recomputing them during a backward pass | ``model.unet_config.use_checkpoint=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Bfloat16 Training | Training in Bfloat16 precision | ``trainer.precision=bf16`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Flash Attention | Fast and Memory-Efficient Exact Attention with IO-Awareness | ``model.unet_config.use_flash_attention=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Channels Last | Ordering NCHW tensors in memory preserving dimensions ordering. | ``model.channels_last=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Inductor | TorchInductor compiler | ``model.inductor=True`` | ++--------------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------+ + + +Training with Cached Latents +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + model: + use_cached_latents: True + + data: + num_workers: 4 + instance_dir: /datasets/instance_dir + instance_prompt: a photo of a sks dog + regularization_dir: /datasets/nemo_dogs + regularization_prompt: a photo of a dog + cached_instance_dir: #/datasets/instance_dir_cached + cached_reg_dir: #/datasets/nemo_dogs_cached + + +- ``use_cached_latents``: Determines whether to train using online encoding or pre-cached latents. + +- ``cached_instance_dir``: + + + If ``use_cached_latents`` is enabled and these directories with latents in `.pt` format are specified, training will utilize the latents rather than the original images. + + If a cached directory isn't provided or the number of latent files doesn't match the original image count, the Variational Auto Encoder will compute the image latents before training, and the results will be saved on the disk. + +- ``cached_reg_dir``: + + The logic is consistent with above, contingent on the model.with_prior_preservation setting. + + + + + +Reference +----------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- diff --git a/docs/source/multimodal/text2img/imagen.rst b/docs/source/multimodal/text2img/imagen.rst new file mode 100644 index 000000000000..8e3fb4627a59 --- /dev/null +++ b/docs/source/multimodal/text2img/imagen.rst @@ -0,0 +1,287 @@ +Imagen +======== + +Model Introduction +------------------- + +Imagen :cite:`mm-models-saharia2022photorealistic` is a multi-stage text-to-image diffusion model with an unprecedented +degree of photorealism and a deep level of language understanding. Given a text prompt, +Imagen first generates an image at a 64x64 resolution and then upsamples the generated image to 256x256 and 1024x1024 +resolutions, all using diffusion models. + + .. image:: images/imagen_arch.png + :align: center + :alt: imagen model + :scale: 50% + +Imagen models can be instantiated using the :class:`~nemo.collections.multimodal.models.imagen.imagen.MegatronImagen` class. + +Text Encoder +^^^^^^^^^^^^^^^ + +Imagen employs a text encoder, typically T5, to encode textual features. +To enhance efficiency, we strongly recommend preprocessing the training dataset with pre-cached embeddings, +given the substantial size of T5 encoders. Loading encoders during training can lead to a notable reduction in training time. + +UNet +^^^^^^^^^^ + +Imagen has two types of UNet: Regular UNet and EfficientUNet. + +Regular UNet +~~~~~~~~~~~~ +Regular UNet is used for Imagen base64 model. You can also use regular UNet for SR models +(see example config file `sr256-400m-edm.yaml `_), but this typically +results in a larger memory footprint during training for the same model size. + +Recommended UNet size for base64 and SR256 models are listed below: + ++--------------+------------+-----------------------------+------------------------------------+---------------+ +| Model | Resolution | Hidden Size (``embed_dim``) | Text Condition Size (``cond_dim``) | UNet Size (M) | ++==============+============+=============================+====================================+===============+ +| 500m_res_64 | 64x64 | 256 | 512 | 524 | ++--------------+------------+-----------------------------+------------------------------------+---------------+ +| 2b_res_64 | 64x64 | 512 | 2048 | 2100 | ++--------------+------------+-----------------------------+------------------------------------+---------------+ +| 400m_res_256 | 256x256 | 128 | 512 | 429 | ++--------------+------------+-----------------------------+------------------------------------+---------------+ + + +Efficient UNet +~~~~~~~~~~~~~ + +Efficient UNet is based on Regular UNet with the following modifications: + + #. Shift the model parameters from the high resolution blocks to the low resolution blocks, via adding more residual blocks for the lower resolutions + #. Scaling skip connection by 1/sqrt(2) + #. perform downsampling operation **before** convolution and perform upsampling operation **after** convolution. + +With the aforementioned modifications, Efficient UNet can converge more rapidly and with greater memory efficiency. +The Imagen paper states that such a modification has no discernible impact on convergence. +However, our empirical findings reveal that the Regular UNet yields slightly better visual quality. +Metric-wise, they exhibit similar quality based on FID-CLIP evaluation. + +Recommended Efficient UNet size for SR256 and SR1024 models are listed below: + ++---------------+------------+-----------------------------+------------------------------------+-----------------+---------------+ +| Model | Resolution | Hidden Size (``embed_dim``) | Text Condition Size (``cond_dim``) | Attention Block | UNet Size (M) | ++===============+============+=============================+====================================+=================+===============+ +| 600m_res_256 | 256x256 | 128 | 512 | Fuse Attention | 646 | ++---------------+------------+-----------------------------+------------------------------------+-----------------+---------------+ +| 400m_res_1024 | 1024x1024 | 128 | 512 | Cross Attention | 427 | ++---------------+------------+-----------------------------+------------------------------------+-----------------+---------------+ + + +Noise Scheduling / Sampler +^^^^^^^^^^^^^^ + +NeMo Imagen supports two types of noise scheduling: Continous DDPM :cite:`mm-models-nichol2021improved` and EDM :cite:`mm-models-karras2022elucidating`. + +Denoising diffusion probabilistic models (DDPM) :cite:`mm-models-ho2020denoising` +represents the most widely adopted noise scheduling approach among all diffusion models. +Continuous DDPM introduces several modifications to the standard DDPM framework, +with the most significant change being the transition from a discrete noise space to a continuous space. + +Elucidating the Design Space of Diffusion-Based Generative Models" (EDM) proposes an enhanced noise level distribution +strategy during training. It also identifies the optimal time discretization for sampling and +incorporates a higher-order Runge-Kutta method for the sampling process. + +Model Configuration +------------------ + +Text Encoder +^^^^^^^^^^^^^^^^ + +.. code-block:: yaml + + model: + conditioning: + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + precached_key: embeddings_t5_xxl + out_key: t5_text + +``embed_dim`` represents text feature dimension after encoding. For T5, dimensions are either 1024 or 4096. +``token_length`` specifies the maximum context lnegth. All precached text features will be either trimmed or padded to match this specified length. +``drop_rate`` defines the rate at which random text segments are dropped during training. +``embeddings_t5_xxl`` specifies the key name associated with the precached features in the dataset. + +When using online encoding: + +.. code-block:: yaml + + model: + conditioning: + online_encoding: True + encoder_path: ??? + embed_dim: 1024 + token_length: 128 + drop_rate: 0.1 + +Set ``online_encoding=True`` and set the text encoder path ``encoder_path``. It will load the text encoder +during training to generate text embedding for the raw text from the dataset. + +Regular UNet +^^^^^^^^^^^^ +.. code-block:: yaml + + unet_type: base + unet: + embed_dim: 256 + image_size: 64 + channels: 3 + num_res_blocks: 3 + channel_mult: [ 1, 2, 3, 4 ] + num_attn_heads: 4 + per_head_channels: 64 + cond_dim: 512 + attention_type: fused + feature_pooling_type: attention + learned_sinu_pos_emb_dim: 0 + attention_resolutions: [ 8, 16, 32 ] + dropout: False + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: True + flash_attention: False + resblock_updown: False + resample_with_conv: True + + +To configure the UNet model, set ``unet_type`` to ``base`` for the regular UNet base model or ``sr-unet`` for +the super-resolution (SR) model. The ``embed_dim`` parameter denotes the base number of channels in each ResBlock. + +At each level in the UNet architecture, ``num_res_blocks`` defines the number of ResBlocks for that level, +while ``channel_mult`` is employed in combination with ``embed_dim`` to determine the number of channels at different levels. +``cond_dim`` specifies the size of the conditioning projection. + +Imagen supports two methods of time embedding: either learned time positional embedding or unlearned (fixed). +To use unlearned embedding, set ``learned_sinu_pos_emb_dim`` to 0; for learned embedding, use a positive number. + +The ``feature_pooling_type`` parameter specifies the pooling method, which can be either ``attention`` or ``mean``. + +If you wish to enable model dropout (note that this is different from the text dropout in conditioning), +set the ``dropout`` parameter. When ``resblock_updown`` is set to False, it indicates the use of ResBlocks for +downsampling and upsampling, as opposed to Torch's upsample and downsample functions without learnable weights. +If ``resblock_updown`` is ``False``, you can use ``resample_with_conv`` to determine whether an additional convolutional layer +is needed in addition to pooling and convolution transpose operations. + + +Efficient UNet +^^^^^^^^^^^^^^ + +.. code-block:: yaml + + unet_type: sr + unet: + embed_dim: 128 + image_size: 256 + channels: 3 + channel_mult: [ 1, 2, 4, 8, 8 ] + num_attn_heads: 8 + per_head_channels: 64 + attention_type: stacked + atnn_enabled_at: [ 0, 0, 0, 1, 1 ] + feature_pooling_type: attention + stride: 2 + num_resblocks: [ 2, 4, 8, 8, 8 ] + learned_sinu_pos_emb_dim: 0 + use_null_token: False + init_conv_kernel_size: 3 + gradient_checkpointing: False + scale_shift_norm: True + stable_attention: False + flash_attention: False + skip_connection_scaling: True + +Many of the arguments remain consistent with those for the Regular UNet. +To configure the Efficient UNet SR model training, you should set ``unet_type`` to ``sr``. +When using the Efficient UNet SR model, ``num_resblocks`` can be specified as a list to define varying numbers +of ResBlocks for each level. Additionally, you have the option to enable ``skip_connection_scaling``, +which scales the skip connections, as detailed in the Imagen paper. + +Attention Blocks +^^^^^^^^^^^^^ + +Imagen's UNet incorporates multiple attention blocks to effectively handle text embeddings. +The following arguments in the UNet configurations pertain to these attention blocks: + +.. code-block:: yaml + + unet: + attention_type: stacked + attention_resolutions: [8, 16, 32] + stable_attention: False + flash_attention: False + +NeMo Imagen has the following ``attention_type`` implemented: + + #. ``self``: Multi-head self attention block + #. ``cross``: Multi-head cross attention block. Imagen paper uses this implementation for SR1024 model. + #. ``stacked``: Attention blocks that stack one ``self`` attention and ``cross`` attention + #. ``fused``: Attention blocks that fuses one ``self`` attention and ``cross`` attention. Imagen paper uses this implementation for base64 and SR256 model. + +Attention blocks can be integrated at various levels within the UNet by specifying the attention_resolutions. +The option ``stable_attention`` facilitates the computation of attention block backpropagation in a more +numerically stable manner. You can control whether to utilize the optimized FlashAttention by setting the ``flash_attention`` parameter. + + +Scheduling +^^^^^^^^^^^^ + +To train NeMo Imagen with EDM, set ``preconditioning_type=EDM`` and use the suggested parameters from EDM paper: + +.. code-block:: yaml + + preconditioning_type: EDM + preconditioning: + loss_type: l2 + sigma_data: 0.5 + p_mean: -1.2 + p_std: 1.2 + +Note for EDM scheduling, UNet is trained to predict the denoise image rather than the noise itself. supported ``loss_type`` are ``l1``, ``l2``, +and ``huber``. + +.. code-block:: yaml + + preconditioning_type: DDPM + preconditioning: + loss_type: l2 + pred_objective: noise + noise_schedule: cosine + timesteps: 1000 + +Setting ``preconditioning_type=DDPM`` allows user to train UNet with continous DDPM scheduling. ``pred_objective`` can +be either ``noise`` or ``x_start``. We currently support ``linear`` and ``cosine`` modes for ``noise_schedule``. + +Training Optimizations +^^^^^^^^^^^^^^ ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Feature | Description | To Enable | ++==========================+==============================================================================================================================================================================================================================================================================================================================================+=================================================+ +| Data parallelism | Dataset is read concurrently across multiple GPUs or nodes, allowing for faster data loading and processing. | Automatically when training on multi GPUs/nodes | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Activation Checkpointing | To reduce memory usage, activations of certain layers are cleared and recomputed during a backward pass. This technique is particularly useful for training large models that wouldn't fit in GPU memory using traditional methods. | ``model.unet.gradient_checkpointing=True`` | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Bfloat16 Training | Training is conducted in Bfloat16 precision, which offers a balance between the higher precision of FP32 and the memory savings and speed of FP16. | ``trainer.precision=bf16`` | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Flash Attention | FlashAttention is a fast and memory-efficient algorithm to compute exact attention. It speeds up model training and reduces memory requirement by being IO-aware. This approach is particularly useful for large-scale models and is detailed further in the repository linked. [Reference](https://github.com/Dao-AILab/flash-attention) | ``model.unet.flash_attention=True`` | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Channels Last | ordering NCHW tensors in memory preserving dimensions ordering. | ``model.channels_last=True`` | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ +| Inductor | TorchInductor compiler | ``model.inductor=True`` | ++--------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------+ + + +Reference +----------- + +.. bibliography:: ../mm_all.bib + :style: plain + :filter: docname in docnames + :labelprefix: MM-MODELS + :keyprefix: mm-models- \ No newline at end of file diff --git a/docs/source/multimodal/text2img/images/controlnet-structure.png b/docs/source/multimodal/text2img/images/controlnet-structure.png new file mode 100644 index 0000000000000000000000000000000000000000..2eb3a8567e5278cae0d7a29e65e965d09cba4797 GIT binary patch literal 59531 zcmb5V1yq!6+b)bCQqm0qf*>$-ONTTF3=AM3Dcv1X0@5WNQZfj`5K4n|illURcX#c3 ze4h9F_S$Rj{qKL)VofaWE6(fe^bQhGuB?@ zi(5aNlD>B=4mZi(2Ys^(s>MHC^j+5G6-r~dCk&=en^AxzB7N-n2!4e40Ktqx>=Y?S z-3WqsM@mA5A~uQyBO)V4yi0s*fg#?JK&Y~a-65$lbAo`k=TDRmtZi&;tgNhh3m<=G z{rk8?nev8v*;%Fh)z^oEo;O9s=-pLSyj8p9lzmlvTVT?qx}5dD!+p$6oi{_ry&=|l zqv0x$^iDVj=JyLE2evyXeW&K|&sg|BM7^*>8uG1W%?Q#lwsHS1lu15kPL;wFe(}jxz3+C3H1KpiJ8n_FI}^e-U$?#2q$Xvz@z{`#DBIU^&@e!XE^|iEX#!cv0!wO)|Bj;^{*tnd7Lox zzj9Q=tB&XV1{$<_S7iiIc=FF0LS#B7r1M=w#D6E)St@@X3DXTIC$yLPVDIIP7WdCN zL`Yg<6^6fjF;Z8}->W&P_e|sK+Z;+O1~=mLfp99`shuBR*dlUgyLly@2Jib)3Guy} zywSk_KHKpDBf9MjhvJweu7`!})efm87L8*%VUkR3eD^97pX!@(&N2h?wc31nOWJ~y z-)s&C&xQW~ssV|J-c8}bG!^k91(d(L>Ie+)Jh(T%EROh?_vo)KnLLUD4gDjhw@6S$ z-hcFmqyDpE+ENSO^25~XQ1hm-iYA}) zpe&ggT>s0$m2=;S0Z~KfrrgOwutjtq`;-pRY4NN!u9f%E8cUj+|CwAw*5!3vrVR53 z5(ae=;y)FzpDdB?k~4`i!rz$AqH`;MnUrtB&WbB8QqGF24EdQsM-;@q_NT4t3fHwv zirMgD8N+}~ne67QZ`t~^8+lp4NXZwjZkY*AV)~66c{{F2j9EK~@AIs88F_PHy1SHW zH^jlIaq~(HuO$UOW43C^k$6vUH)6yJ<+c6ereoiWKRu9px?EwIT2rw}XW&J0HZqW_ z(ql&Rs1xjD61K zN1Qkl`OoR$RK*leVzlR&hoc+;-K9Nm&WngcM)9=Hwq1%dZRv(2< zf-=TR$&-{HVf;0B8A<<;o@~vcS4jAh$I^6Xo1M7TV5$Zmm&>liK2$CXGvC*x>qvkG z0)7dv_oM|T!S_6qF6G*Eo%$}tH6pMFsZ7p*-14u2%N(s$&^*bfhM^;YU#p*edChol zyMVt5DNiM`DH?wAuSSn7h~D|rJSfXZNL^l?QT8dLA$=ss*~x#`L5_sv`r`xOV-s6C zjun6H-GG44fq;ImCOtwJkjxR8`+i!(yg)6;WRzf_Nr z{xM>GsJ$P$^{D)7mY=D|$H!m3d`T^0S!`&gvf*A;RaMDbwFZ$YnVOue^}b+dE{+hO z|9f;1bsILXc^2U4VGIuQ4YZkbh3ZF7Og?N8ni*RjW*ja{M&Eac8=7zMsu+qQIYAsZ z?~gW%J{?2DX1}|i5xmc3D2W&PBNjsv#fyrHY8)1{;<}Jnd;oFD6RT#4q@|^SQgz z1+E*(=c~_NS75}WCSwy|D3TP8KcIf)sm1++Nq+q#~y zR?tP;y|2$?72KQGa&Ff75oc$%$=BP>O;d&fsrV2B3F+G-XoTP+l&jN90_6Tz$_1Pz z>?uxOia%s)XEFvb_Rvf0%_*=qQGa`6sJCK=RIc0UiA%Q?>e+|#QS`$&d8m+(6j4}w zx-t#}WHcLvaz4VA0(Knz@kBK+*g@9QE#@&s1eCjStrBn8=TFzOoId=#QFh{qc2khU zm!&Z*^oULO$T}5>{NGUqV=8~r?$y8W+e4?Yi_`o^8UbDqE(8Q!-ju3wB8 z@RlW!3yrN2N({iH95Y@fUbh{iYcXhHQw)srAJ_EwDtT55P_@*#e*Kw2}xy z-?>A4FuR`djHwvie5%8$$Xf>NO1t#k*9W*K%11ARkEN)UBX(J(yn}Rvgx>(B z`{MfvSW*@@B$SYw_s)?_P`0xJKP>)p_txSUr$a>rZ};<<$M4BeGrkmv2g(F_yMs5_9OFr z6eJ0&#X`e!&(qDJ+uPf{nHr8jxYOQrRlUbSJeY4f^41lw^e&R@|MGi;kqdYIhdTes z24n)p2qmfj-;FjeXc-si3Xy5mUmPBf5zGd zOaw`y;Uk!V;vXjjSSDg9X~(}KqnyD0Sy$iRb-neu+U}T$g*^T!ucNE`)AMNgYwME# zh$HzIXILJHJYW(W-?d;gzC@U#Zjz~Q$@Rc|}3d375ok?ofd4d#%Vyg1` z^=na4QB#P9h6WZE*2H&1ahLVJCf^(Tg{BhB08hd%XE0|w-VQ;MwTSW5+0x|ey=Rcj zE&Wx9ogi@$=BwagEMT2+fH|nHOFw8jMW3CWB`~VSL`B)!+r#Pcv`yYaM7pbT#{N4yf|)U{huo+r^!G47h8lE?iC_8KJL z;KdM8g@85xL@g013X3=URa_n?MIfn#`Fq&zPB$_TAN{DT932^f(qZ~TukUXTF7{>$ z=$`v^Se-p?hgse_KVhQC_Ozw58CKz0i5gLiG?%Y5Ri51}-=NWpe~DKzP0iqLX+h!p zXe+4M=nj&TD*9%w4@*q7FQLP?r*;MDqpaEc6SpvkJ`hM|>s&y&IXj^bYws`i=XQ2> zK*-CA%25$d*qwS6p4ty=SE|5`AHNq9DSFR9VLe@T3AKW@z+>eXS~fB?=tLV?0v!ja~s&)QK7z*Q`&O zdJI`oX^~gWm@%HAp|KF_`Z;+^bMfexVLLXO;{ewZDuXHa-QU4>K zgZV}v(1MUnWHbDC3YMrxOpT$H8XZiw{MKFXO19HzeIHp#WQY%>IAmd#I*4zIa7EUs zl6yQu`UCs4NyH^AEX>WFONaT>@A944_{4rkG!tmfm7 zQcoBDXv;dj&Z=}+5SuW8j0oQ7E<3LEB$btM5kERwIs0Om-E^JCzik6)*bKNd6N*+i zA3Ph0NdqV0?0Xw@P(%Y(TxV%W_0AyA7)JTPw(&~ zeHw2|PU;FwdlbQ^P27?$UyX9FgGWgIj#)^gZ6P;|0H>g&6cDK!@CvUWZHo`> z1q0KQ48|5zgjZVZNn%06d6t@)IhKvY&cV^^x(T_}(OIZ%IE~^5Jl|!|$zvonN<{7# z7Z>M$UEqKH%VU0PbNl|L`N7+{5Rib#t`bSKImBc=Id$DcIqFdh(gwz%@$pvV>x8#v z_>j1wii>+1(NY?uz}l13u|bOijuF`x^9a|+-O}f(X7`e$`R2<~1qhiy>h<71KVxR+ zOt-+|of~>}DF_7_BdH+lT!u%Ty=!02&*jZUmFD{DEk?@_4SAWmc zH~=4!AcSO^xjk%9F@Mv#$u zcR}#LSh>pwYS>dChAAvzW^;nnu#F%s`MJbmcTqiVG%)kWWJyWN54epptK1-<83Csc zOxps|X+Rj07w5gLgRn`()n_c+WPUzfH6^whY4c-U6{Vil@#$l{C74({aXb~$UM$PI zEtZ;n?#p&@YeeRKGfGk?#krLNr3$`(e)eCIB~51ge(bzRSnMEEWh>%B%mHK{OqgWE zVNcp&-7e2co`1_hzj?gytemhaO-0vgD2h$6?ISEdkwIzrajxbJv;${P8Op)0-V~Mb z?#n_6Q|W%497AkdsjjJbU265#9s~-+b41PG_OB3$D5N492eeZ^-fho07|W|M33&$HdYL81eSn8O4}=;d(Q76m&g?W<0v zU3#MTAY_js-7i^9m;BrQK@p3{r8tD8cf=#A>@R4dU_gg0G93i z@T;pUASTCQNTmH?mS!Zdo0OF)AkVa=Bjjab*`ch-E{4k&{xE(#8I;|WYDJ`huOWr3 zEMvqJ-OU3JpX6=DD5YMVo_1d=?#g$m&v+Ivg@#puT@f|K!}wAW%$2k^A06)q{>UWO$7J+gqH^Sg>?R+(`ymYsic90^L89DKFoXt`U_9fG4-VS}%BB7mUl` zB~`utYP!K>YPgDnc`*|Ho`4uJzm?unf^7ZQuU`iSR5ppiFqt)jf`S?w8~-p%Kq}iT z`ne9k0dFhHuC9TwBl%Djym3k+S)32 zO1{?^!p!DFlTBhRozuJa_|;Xq;4d#LBP8>1XYBiWEx@y)yh+mEen|EMA~XI7WQR_l zA$K#oX%D3rk6l4u|8^0WUS>CK{yd3}=%C-O3LiA^aB@0Qf84yhT#5De_6EYOC_jJlxQy~JSL`;>JXr9%DT%OzSdq`n z7u7th(%jq&d0%^2a6H_P?4YBAgZJvn9;$h2_9X0&mk8ae$HK$ci2I;$9u9+s=fOfB zPB#Nk&;~LDU6$H{SANAN=!2JmY}lZAJ>{_s)*W{GZ_}5IjQ&VD)_>Awr>1ni)R*wR z)M4>?0_K5!I9(2H-WET@&3<=td_#J|VuOx8hHmI{vMJEWh^t?+LD!d>X5eqRNSoPX zUJ)4?$sK%O-t4l0qcC=Oc$i}7$4m${F*g2MQ*&{1(5$^ejiec*V&!~s+W6#XW5OY; z=~_pIK*=u38p!hW29JiOWF{^{KdMC^9>f}U-*GKVR(JxPgiNoaWz@8hmX?;jz9_}= zS|F?M?(QzsDDb;G(@uhLty=T8iTYfsMSHfS_;>@oY!??78!ZND6)~lHOM82e3vPMP zoBlnoBs@c4r)X$sV^?U)(sohXcY`Mf2Ho``qK`eu7V6yec9Pd?s?WX4=`f*pm60{o zyjA8RI-Q?}s7fG>g~;Uf40FAYMQ`=^s!JI1b-R(ZwPI#q;d4P2pAaw#RM)fm zdcl4D-Ft&EDY4gGKcdn6v=XR_oyd=o&IS+J)Jf8+%w~+jNYs+p#`!|AG;&HPQlRA zX1kdK#ki+u?f374MQ;*iWMy>lRSx!ccN;tovM4s;7||C4j;i}caw62U1X5!k=On*N z9c|BvlrP$Ava+tX{>06UExJWvZVvMCFK*nX$>}=VnNZDm^;M-6@-b~ohTG@;HlWJ& zPZ~g|6bCivk;|M-E|H?=Bkmdreg|@4GO+s76F#{j?%FD3d}e+}aTab64lpcn1Y+2w z@g5%dQS07}yVe~_Wn0QZ^W}&~1?86^RY`XFNfCH>csP){{Rz9j$g%D=oS}N<_)%=5 zJrtK_EXUHr!^6Shz+<88L<=WvYJaX?TT5$HLR-|cQs5o!jJ3?1ZtLYYE6-Wl1p{AN zCd0S9@6gsG##>>RU0*K=tA}5(V8nZ>d%2P12f7v?>YzZc=eCqYR}xAeeFp`+X>$zj z<7e6C0%4-=k9)3~yULvf=U~dN#r!is?fU5H`naLH|K-?<#Wrg(?LCW+-^1MnZ}LcX z640eUw8SQ4@c4j6;KA0`*H~SOq{!EyI76?bDdl3Gg`1KGudS`c#NbpNz?aq0s2*4^ z?3jYeXTPe`O%?7K>D!IZ!;Yn?#8l!hYZmH3yH%zBju?yK7?jP3Hyr zk1uNdT(_m+&Zn=0mtd8co-dGn0^IgGR5Y>+BP)C7I|8y|DZGAss zsh#hos(`4300{%po*W%Z6JcV-AG zyDulNYh@pc?ENm_D;Vb=dQ|0M?f#iDl<#Jap9eP?VjgK9O$`P8;X09c#uZPs=w&Wk z<)zyiaIU8Zy}tOx0Tf{>nZbM=jctt4v%Ij0(61st@~mX8~d@FlpzOR~#oqvdl?P_eFbM9umgL>Wn9~p9oi^ z+|OEA=M%|I1)UGPpkQqFUK!J0b&~pC%24iU&I@YS=kEB|#dq#BNPOCgNAmJ%U}I{{ zxY0i_CYW(nqjf0%g-8o*5;t%kBOGL{D^yiD^|4qc;)Sx5Wkx=$trck7v}}Bmuwl|F zBh%RiF_*n!wM6oZ3o%8}Q#Ycalb2I2M_}6t;t^i>=NMF~usg76HL8zm(^uoDp2ju{ zLqi(sld-2iZKq^q=)89)%MAUlf}4UZ2D7MyTwzdX&@iD53jGPzgtQA>!ukHlpXr*%5 z)29kcP|gn1Dnsu>DZysu0Rc8gU&RN)X^=&_n_2R=8pkup2T$o-?H!ZEJ*8&m^2lQz zy3F~(?$-d@(@){-85YXX_TV!+=scy)Hlp5TN^x+u*C5Zt-A{c8cs2LD?8Gn27_q0+ zG%;~;aZb)0_JEx0P1&<~?}O+VIE|0Jt*vQ=j@K8^kSqhfyU|u4GdVwg%`ysy%0tzYa`o0xkEh0Xpp!fZW8%mD@ACbw#S~n>Y=HEG@9PjTVL+iU!N$I zm>e&*1_2JrW}JP6GkOh`ybJ}XOfC7ZFYug{b_4an~lRRh4}>Wgtt6+ZH#yVg0ij#<884`sI8$TNXAq z_E1|5y=ecGK7Ik=1+q3#ol!vIzjJYD#C&cqk6K$Lg#xU;@b0Yjroc03{?#u&9{3Pd zY53UfQzffU&0wpZHEc0iGtrxoZyjckCgI~tw8+R4$2TnB4@PU4G1upP18Z_i<^Ilx zdSm3(UgIr^P${5?kN|62+yX797)eA) z;2!Jr;M}~`VebPa(yjbV>`tvP?BzAhpo+puH)AqCSMID;h14lzpV$j@*5#&7CF5oV z@mse;7 zf!P|5lt%ig;DNhF21~@zyzf4>AfSCQj9~PghJyE5C_`eVMIYgM&(Tp>svYTR0Wb)} z1@Z!ux+r_THnI!F5i(nBSZJSyPI`2B9|ia~k*wJR8QE``d}^KuJ{1+!&BZ~DTf{5Z zO*~@HtK)B%O`u$N)w@ZYj+EB~iVI0@zk05gO=G%OZIv_|y7@36RKa0~xVyzV2g|}v zxdYhT!qFQ+GvTLv1ST8njl5M)1C;YcHJW$5tEcCs8DeH`PIj6{T*^4SirvBWKedEw zfh8}gs>`nBVjpK`U(*LJ8DhRA)@v7Y=ZK{de`!ZSLGkm$8CD-}H>#F4IU6~El@7-9ndG3cXPka}N2w5V^|ia8Kz1LNb14u7HNzUn3UD2~>x zKU_#-Vo#IUQ>-?#`};3^Dn}0WoMhV1R3uoDu^Xaq3fq_FbRItV-W+(X;UszZom*-b zj6VZoZKrNXTqY!fjkVS1Y8{qhA3UpEZ!|D(CZX78+@SZtgNjj#4|mmMswLuqUa9EK zeQ50h9LU=gH_w?22D0bAeVro-`vjCA)WYsjc@a5fJy_pHINgDcn}7pdY9+YY3-bKP z=cHeN{$g%!uD7=r^xMSU-QCggvr5T9^Me6{SJ)^l-vwxw2UgEkDx!QNG=T>wxjm?(vC6~1g%p3aATI9zOPX+iouUSTl^2zfKg8o(uc*}a^g z{rUOT8Iz@KTLw!mrlzaVSh1f}s1F;B?VO-b)1gdJ@C>EsvNj9I3GAJ$CO-qYCL3%? zMZs78V}}P3IVb#ygtFpEtJk&;_39n)(%07q9GHli^X?=UH(TJSV$wNY8u;j2e^dF| z^tLG^RZf%s?52c9sy{F_ah-5i!N?6ApG;FHg#wVZH6^j?ld}AT?q1;}B3yT~K)z_1 zcehnuET1;+6PN`Cz9^szYJcL%e~h4M$r!3^VCM+1P`ahx54G zxX&bA0AEE%TcDWqLoG{~Go8woF7D}eY5r*{v%%WX(2!)1+NxTGH5w8THu>1%VvaiF z5poaE4J|7xE3PURr;WCDasrQWeRWZN=cpLK?)2u(y+4o2Mwq@`hGF&>$lIlgS4I$Y zH;iwhXGLw_@_S?%@)YL4@nksk6>|h8ijm<;mhZ3<(gRl$v7M<@e=0hD+Yw$X{`g{< z_2f{gVALDZ8TGbS4z=gC0NnTHoJ!0mm0&9hD3QUcsU0CI{ZOyTfSGNAu}@2>}x1DrgK@_)C~NvTUnJuz6mA%EQGL>T==GrJY_OYm;X(T5fupD{PpI|pv*VEI}Zc?@R zdXLy#XUS>HXlvcyD$Mh-9C z8HU~+>Ieb2B?q6&;6$#@lEyDperXB@kX70IO6%lF!K{i|O~OT!{pa5Fn;arxd!gv@ z(kX0rPR*WKflgED#F}lAGjoDA+aeJo2p{eA#M}JyZ>gnT5Fp4fxa9I|18C*L?cf6| z;S2xm4P4b1RS)XL3$b_Vt+C?wuK;XJea*zv^Rg`%3#3l|aCQ?~BQj%-PQPIW@Z9)g>x+ zk@&D|;ss0s!2IB0pEr>D@ktEhQDkOxS>t472I$wq0Sh*KnJ}cS`aE-cd;6LnK1R5V zy$&W;AkMx%qC5)R-`sR~&d#1VNAWu%dCkw*#qM=ml%mf&R0FKH_F-+Xp06MlpTM2~ zuqO6N+z*AO#;V4-w_H%Xp=uHhYH>dwX}n|P9-v^(7IZNOI16K33P61UxProTFt)v& zHBe$HLX2JIcFw6nHic&Pubv{IrVZqu$tfwF z9;QG8?do)Ebw{ANwAcYAZ=z5N4i4|VRMC2NG9GtjJK&?DtzzTO35lAu ziA+7Boo_8qz?SX9bXdM9dumTC@bgt%bxoCej)~q48>Pm{uacO;h)NAV5@y}8>)n|^ zZg?id4{NVT!C}@8GOKAFa9zi?*iyxv`>@}^)?ec8-;sBj&o=W!?xs%Z7UK`U#Tt*+ z##o>4mEa^eHeQx^+YG`Xz@JGh1~lIQ{NBqAJaPqQt%2d)cu6wLYXaOqnYLi%t_4Cf5lM zT97FK%Ohv;DgFL^MB@H%>9rg2W4FLkOMpkQjOC01g`>9zjEcUD5qh$EAmNjsc#A%o z%B5*A#nRhMwjT!6POpp-62Wi@hy%M(DJmm@!%(d#S*7qonWX<;D+B^-| zXf$KM;foW2N;Vm=c_N42jd-m?vC9?iE~l%P&No^q@9O!nY_g^owhd|{#b4`>=3@NK z$5fGn?B{?E&eqOOf?+5rnWoI+S87=Tvy*F|R4`sU-@(5Zk5eQhl8?31_)+Nj!0Eyn z%w!p|T9^5}4hGC1S%U#LQ}p?TQ)UoHayw~6o^QLEtpPNHHcliR;?ai6%gecW zcwW;9zj{?$U7g?bL>$Bg|Mrbxn6Ii=R%U|Qe}=g;AlHtqGp`Sp?Ej9&KQV+@tjG>B zgR|OnxgKXzStL^1Y|g9x|)& z3wEpb2lrFtlrMie!Qdhj+&yt{_mDbWCuE_ly6O_3eDu39J{OYyjv*5pAgxe)Ft`%?f4n z>2#4^fa zIj*@P8^4=Ttuv}ZipbDRE+luFE9im-=|RN1+~?}(8BAiZnvOQ?fo%+;j?zND>8fsS zzcq`29OnbyVl=>!>qW;9I0I`c?Am6ve2BoB-?#D#_?Nx1I60 z7jmP_WK=*N+Qbqy3Xsus=zS;lZ`A=#2*$M znES(QJ8J=I@H|z0DV>Z)A~@hu?JGF*mDBR|*^YG%O3up+G2iq7Ava9fUx?P%)fy>r zAsZqQv3z^IvuibBnbP!m%6#^RRg2mm*yf94uS!zK6?hxx`B}{ae4|zfwRa6#@8a*j zjz!Fz4zPim?mjuh0l1-Lux!=Op8#B;1vF~kqDds_ynDC1yIb7VpC?2?eB=xjtNBHj z_e56acCUN0OxUo#tKl?w=qRM=4Y3IXdKYxNI(oowVE$#H4<3P{AX3;M^w9H71i@Zo z2*QkxNKH-6Vacc1M}oav-3Mv}C8dT{{ngPS*x~4R&#aQWQ&Kc%k7Vt4?bK4bfudtk z7G_@g>AwxHt_E_rNmXN+Tn!rU&NqqOjx{B`WX%AyaanauRtAv82O}dRFE5>FZzMld#OT%77ml!7Z(BkD^n^frBSjugv7|yQMb(bzV27%f-VMHhQhxb9mZtq^jXcR4 zAS<}gae++y{KOJ|M@2>XoKIQs0MOLx>MBs%hlYmM*Vp?Fj=^A1L~vI3sy-iCh>5z- zzgRsGZ}hud-z_Y>K>$SqEHw}9aw+$@JQLo0f7l3zNdDN-;3%fp{ z6}jD+Cw$Oc|G|RaDsWpcRP@W3BhE1Y?xu5?+8r0h4dOv?F$C z9t)YKBxcI`FQPdDhCnR^M28UtH!w;Krvz#-055+9xO9#+a^i8QA{i(ALxAy1d&Ifu zi6dwSXQ;AccrCDP)R44VupG4QAHKu3k{2Qn5kC2MAWk@D0JxK!Ow+fu?=%ESR+EFX zuLE1;tO$=xGa~>zmU0|reY92-P>2SwY89XSJByL|hu{|2^3y1IQ=$b>eDDB7h5vOT ztbH)w$ObQ52LQYXI0q*@B z+tu+J01?zfp(~SR?>8RdJ;lYf;7;?H_pXSEM`uP^97QM%F@RQ)SteZYIkZ8qWx2;56CT_O`W$$Hgxi|AzANV-zH6 zAoW9g@?t31ei^Y`%%4t92S_V)Ju@K4>pEv!iU``?)D zOS=CktP}lCu-yf4>9+Yc@?&JHZda~59o}=L6%&}4KoW4XZ}TtmY<6vH0fcI&dYrA_ z!EW!uRE9|Q#){Oz&dR#qvpfO=Dy&=hLE~fqtg6M<4wzjtl@L@V(>l-_f-PAUZEa;$ zSX87GolG1z7|->mcF=ry78K%VjQ=L84__ybjaFAevs;)xw=!(3g&ua1wAZy&=TdzA zb^T;7QNO9eOwZ<6kjTW$Y%SAc9-voiGpb=19`v0O;SoZo-Jg>VwKw^ML>W`!-WI`1f5hp`v zA>E0hFLvKEvQ!YTw&1?%bRV4HHvh}b;j%A=@QMO!fR*}M%?^s-RRWG@%u?qO@4Rrk zXk8H1k>@9QbWId@v_{O%B9D{qzS)+F|>a&9zl;RfFXf1CZzr+*0k z)V@SQIs$ukFd`}<$m@rF@bU`iL87*5I6C4W8iQ-@=L==iNZe3|fqVUZ4L4-XXyEvp z&y>i<_~9?)VgS%e2|LNG9oo2rIiyALz zMqPTbR&c2u<^*z8a^T=Bu(2ro6W9*gW6BNS<<>CPFQ1R5#e01zdvcz7z%u0Bw}swn zf2raJ&1dVkk5wtw0xkS-LdH=H$ZH^8B&Zs?&Rxj@p!u^X_`!6h|0|C%KGS5v+e!u0 zhvL=}*M1C&#TRj3)L+vJyHOPv_3Kg+`*)v5w)E za%VnBaxB^RF924D8ntSyX!i4U_1tvgNck(q1^fO%XXYmsVS>sZ40ok%{8CUX!bKcx zRM>pg$pm^eN@2yS8~s7w$cVM^+ulP^h0LkLP71qm)JS2t=4_-_#R(36Fyf()Kt#U1 zj-FOzb>f5vr8wLJw1v{6^5WRSO>uEzta-1QmgSPdqiGxrJGy@e=BTHCJm! zo~*d%0N}ewN>N&bpiAm5q5>0J^O{ajwNWXt%SEw17~OrYieqzLZ)Z-;x^GzqiRzKn!tTRWcgXH#F z<|@ruRY>dvvB^Auyte*>5s5?3)ADC^7Y#$OCn;-m5iHuY!51pACS{~xeh z72*F8b_-|~jOcl78{Gqtlb*)EheV~K^*`!m=>vt!$PFVlCDf+7L5(-g0#6)k!(K8dpPoKdZctSZPvetgVn6@E`q+- ztj!P^@6Vw*$lOW%-9+1dPB-pBC@S&TIers*Cva(~qoV`hE!`i6iX%Gvpt0DljI=D| zVN(cH_#I)<6?yV7x<8cLFJv9(FQCPf*~JN3pz^v6>g@EY%}<)}*q$LmbpfzNJ@CYf z#t%nS6@k7#8kS~69dhz_2SZt8E6{=4KmnHWVc;~(Jls{2TY-=VpXBoP{IehLQZ<=m zy4;xKxGb8Jt-44v7w)w}6ZAnlcuG>HDjYIvhz z=8aP+Q~pkz!{M{y6$PR;p@S`85tX=8pq%X|sO;5la(=GwLNfI-=QN2-Y#^5?z*Yr-h&b;GPTygQn`;CzLq5L)`l- zeO~L;@&5w?)%_I4_kRPRsAmU+k}i0^;e>h=#B?@4WNGiKd2Lw4y1QBT<5%Uxc;S2! zS0E>LXBdmVAvwNOO&UTkn^c#wjmF#oGensJy=Ye6#CRnD@!tgaYOeX+MnD`xxJo#GEGBLLS7a#YoDKyj5sV-?AAmhm#tHf! z|7HYGC?6jmOG`^5qs60@U!dP~bkOwqvEgzhgJsK#!a_F2>Y%~{5jfHJ!>PHsHxmid zhhO|JEdZ#b@asD$Xo)VUzz_RT6y_(om*1`yMb4fs0U02`Vw|#dx3^be&|&4^@Njbj zvwH!Ival}LC9;L#g0tNbKj^2unD1Fd1U<-^gEXbI(#q7Tm2@20RDjjfN-J!9Vu1)|skSpZ!+PI%WUK*@RW;swyj z0O;sB4>+lR>Lj=5^%7;;f52ZbT@}*kZ>Fc;J~(F#;MY&T5+6{#1BQUK!0>v|gR`$S zed3sJsK>f@3G5J6cNmyWy0!kG2@7hiOfBLm(dK%%D9O$E`yMFn0ay=U@e`9i#m0W4 zq5>BRRF95$@LSEE`NT}P5YKVC_&r883Fn{HM#GOIfeS8tk7;GURfAAe?fQIQMAcjBH%JfWS1W*EPAt(FYQQA)k8 z1u7daD1z@e%7%|U0b->$YCRPh&P_G-^5d5==W z0AK-tABN=h>|U)Z9!Fbqrl%AU`Lrtsd|oJRc9c~j{k{~L2|n_+5($?HX*+@WFt7c! zg;g_o8D1lZOu}FUX`APSWlvd<68C}t#Xm~{h;*zGJ{~cc@foqc+KxT7w zg-pOY#$rghNh)aEgCB6@)E2xTTW^<6zBn|+r4VJ(lWB^*FesBUF1ivUX<~RWQd0As z2p?p{*4(HwlH<41+42q5kJMOZ{(VE6D(?Eu?J_}^hdtZHH(oOeO=i-xG^!M+@=YQo8~x$LexGmC;^IZ_nOFNR0(*_@D4nH z2Ix=_fRhh*Gd3!QMk(59b^*H(7w8cI=?8Rsmk(43x8)q06P}Sfhh>0rwUu_;((R_Ez>)H!ogx3fN8YznUJ@UsnBCEy$8xCiLBW zzjv@T=0jp-HzNDR%*I#&oGBpB-xgEOfd+0H;2fXoB$)`ep6@E*C7nrQV+g}pQUho) zw+Np!UL+LwXCre>mzR)ot7eGU zvw41R8~H3704aSR>n;N~+aEX~Cn4mF#gs;g0!dveWZHyiRF7Sl6E5T}FPsMd1|OAt zi~3i5Nn&Rwv&c(-7OX|xxTVY~!@)L}2U`&i(XY#7*QQqfFK`2O?-Bif;0COVc!nFj zIV=zICQaM05=6r%rG!E3Lk4@EMHY z39zxr0NdBg8!j`{YP!A?a96elSM?POaf{I%2l+P_CyXnhpHQmuTiO zX&Dh43AL8H>ccs3N;A8p5}qv{LBl!K5p3j(R!k6N2~(9777|Vk)iDJ`4CI-JdVPCm z=L^8feQkz`FFvy(2)?Bs)alD0?w0oUAyuv{VMqe*J!A?4G*Ag*1iZQPy%{c#|F6IM zQ+oyw+8v`)f{`Cy`YWC2nOuDw-#hPTbRz6*@s{gggiKO|2fus_Pre?b{Lc6MJE}^r zx|V0`%njNazc=N70R!{&F8>oSP zsr+ubZi0=FB@L8nGXX+UKB}5WH)xdLw+baO2pfG!jc*J-?hKq-Xho3)9DGC3XuCDvWq>n#|Bo%fM z!s^sR5>H<2eaNNb)d&2**`8MrN$VSgdyNQlyjRCaw&%p}_`EYmBSic|nMcAqb4MXy z4nVyF#0A(;gvc8hO9UU^uM3&)~YTa4yf`Ka-Uz&z_Wl88Wdrm3Q~v-KwaespXJm+)w3ai&zD`8N-)f}dx9iGWV7jrF6V)v1m? zcNG)muAchyVhUXZC*u3!07$g#Y&7@nw-@FSO-4Mxa=O@)8F+;pDeUV9C-Ug^Ft>N? z;&;5qz_8x@%yu_M0NFygsqS9Ii*Bv!4!AyKXE>MMQYxlRiOToOgo9w4Q| zs1BIa)oZtJR-dh`543ZhqLt!^GlSrDZay8_p9~e2m4KCvJhsTTu~aj=7B@)%Cex`~ zR$l%X1`|J8E+D_aCY#!xh}E`o{h{TQqFH1^v2x+*moKZ_&$f;|hGLISGWu_heHP<) zpt&gd{vX0%^5N>Bn0t~Mbs;w+9!Q3&E6-9F(ZO}UC{YPH>(K;hPedmx&_POhYFu!i46Y$?S*oo@Gb|tg9g*tseYj8 z0?5A%u%9$DW$vLH{q0pJc^G7yLu*@Ef2Dn$0jXPKE>CBKRSF;U ztcdu+Iiry&06vaGh3(~&#wdXt(sPc)KMKI^&4;jT_5Ge}0?_U_e{QHosJv#^Fh{v| zHXu0wqtJ`Hg*Lx&PEn-bY@zvj%B}`(DTs$<>nhQwOKt$7X7I;kZ5Ad=bd$*h2d5!r znISvj0=}pI|Ni-mW?lKWKtd6_0aF(Plryq-uXF7?oSQ?W1hG>bA2VduX0@4MZN$7`#9LZZlTVxFbOf zT-4?yq4O&%Bb9GJtz5ijriKI~18~iD0~+iVwdB1wmnf@6+!mBizpmvKWOz93ZUXWw zICT4qnBZ>o2+iq2`GBg^Y)`*7)t{Go`xZAhrmd|FDA#gxbHP=H=HVh(!%Oq?WHdDD zV@^-Lw{BBJu%XHvtrs{6r(JTD+w-#NQrU(zbPx2DwE>h24>#U%dko|{7ij>&04mQ@ zU;yh~zy7PSxf$G@W!WG1oljFXHO-&G4}hx8r=^VKXXS02Y}FoSsB@ zD9vZjzz;-TwuUwp7Z()?2nrS(Bc-ON3%&)+&ZhJ1(Te=;8BS2YhJ4rlO_8Yp%ggUW zy}+wux(_S-dQg@#w-0C-TY~SI_&|wdWij&WxUbJ$Sxs91TcYDeUsQTuAahm>R~af2 zBgnsYcgIME>=dwxwjReSZX03X1O5I%h^!ZAv;ks)hK)Sj)ZoFT*c(uc)2i#_WRqgU zK<%XOm?;2T5@eImkA>X;DRAw*i{60M=LC@k42`+?n9{(Eh*nYIoy4CxTE)OCkpcz? z7D}A!;biBuCwNV%WWDcN1f4t%@%lTLNH@uO-dl>U7sM?$sv5i(eR#s_8t_rZ7}UUI zDL9B10S;{wKS)fonwe^ns~r@&wT2qR7G&@;=GyF2?ZPI@oE zy|bR7?q7Z!g38f{Qzv)y;djAx>uZDipq2TrwUet~uR27#SIwFtaf{Vx@73Y1n#jQF zYDAwEOKsRhde9DQHo;`#TJ}bN+Z^RsGxMEWb#9*!jQ<(5HuxCU*3`p#;7-`&AfN?@ zPg&j)9i+^Cc)8TStk82qp?kgLQB`H-OQQQx`QCLwOW(D-09(-e1zO@ljDL-er~psL#D&4wt`3)iLMgLGz&G^b&>z~v-pTjvU)qCT0Gj)J zE!W#E;$Z|VE3Z24e9JXy+h5!$s9rwpJIYrzuh27!xzOA)t|E;uDh9_z~ne}1yoZ>c{ot482fGVv4lkSOhNeWG+k zx`@$G&XWi10d|`!eaCFk2Cv@7{7l`eQ~EaR^x#!g<5tmZ&o5>ln*z}p|K>ycQPF4q za2p7nsG4e$;p{u1ME~Ur4(|@1_u;3K#ELjBKeMdw2MZT$SAke&Tlmc&7`M-{IL+I?z9wD%D&gN+^fsiO zd74)no=8t~hu9KvUwh&<42kp?fWGg)8VX!RwSQbirtek>NxH1ZUZ>U$hjl*}PlOQa ztA&;$ZT@loG$^iZNLnWtXeYdY)df{uacV7ea-db>al5BkC-rnE`&cdT=mp((^e_3v zK>y2mL=Q~_&lC#0QHrSig3X~B_Iu=_Dh8{vswo?i6Ss0RbjrNpQGFLH%d+mj;Y{D# zzq0lGaj9#B-KXYx*H;b4H)|Jg{o?nwZrngrf_vvLR&z^VY0608YbIxCK{NayJU92i z81R1kY2E)OmHvSY#|>-mVJOMpE)r^~l4E*$^OQvlp#U)QyXm?kg-&8M5sXAsFJB05 zHnH6(rWkd;P8LZDDgbQU4a3*eUudISMMhX#lH;uxmzHaDNc4-H@1iz)sgzR+aNn^i zjNLaG5NwhLj}5Fmy7I@C#93vib_;K)*~*B<_CDP*ch-N*a5ES55x!_R-L+2KcN~Ho zC2`#`@Xlfx<&+3uK*IWK#{r3)K+Q%D^eg{= zE}#^zSLGimA>iT~_?Hct&59wo%caX;GD-IK%R>tMss5hT?E4$<0hOosc9cZ~x3 zcxT5P6pZ+QPMOv+&5pd}*}vwftNkb^(@gR1Kt6rPb$s6&nHEYtv1L|ynD1LPE7I;_ z!Siq5gk1vep5^s%$G*&pjQb8k6(SC7rzd+AwjW4Yi#`6kbw)qrtaUd)iZFausTwqqr)f*g)l6>5clp4;_FQVl{jS%D6mNvA1RpZ~g=v(+Z3e`s72O$D} zkRMV$u?vN`FrQmL$4wAK+D4$1-~;i0x!R>tltu6`XGKLt-_~uwQ1GX(i(wYDDfOoI z<>v%L^o1XL!0sf#A{ns1g&!yYOl!`lk#(f8=U0aMnKYAcVej7{NO69l)zsBzJdnDY zfKm`1rTF5srb~u=1n*ImDC^|sKRBBail=uhv~f32IK&S9BFg;Rt^J<7d@Q&hd6^ud zL+Ik-V!EuHs6<2ulYwen2F3?{gq-*315uZ4Q!}%LrU+(erVzNBfEXMPtJ~>{t_tGY zHF|$44G^M&@}Wgx*0Ix`!un{?I2~)Atv)+hyPcuR7d2er9FhCtaR~JZsFj(Snfv>l zletK-lmg~P!Xaarh|}t4VF&-yUqY|fp9_3CnT}AF4MytH;ZsDG{PGKkso>@$uJomx{#48+@?xb}dz*J=B5}>4ta6m3hG^Os21^9R@N7J6O6cVTV^ltAr!&4q@y3I60 zAen-RBk?DKmov1ztZZs@T}8uh{Tduso73TdczasM+`qLsv_sl%x-iM_`jr<0H*t7P zf2AF@nKmDw*p_gZX6e&mv;jn*z;fTHrF!@(|Jye{AtBjlf&LE23=)PVG}6LIIf1mD z%6&}XTApbbAT#Vm-3~u~TUej?31aJ@a_jS5^v=p3ag`!T6c1pGxsDZ~Ut>1t@@khu*y6G;(AYDS4}+^;K|Bah&f=uBxi4tCNW|0?8jifN1V~4eLt_ ze~utZ8Xe83jQTBAvCpyG_F5baw6?Xatt+#)yL+mV)L~~4Ng)J6Jr6f$6e39;;=qs; z*LX0|l%Mp(k0gOQCmh%>u20ogi-c6If7+C<>{gYuU|`i`-@E?Ff|A%6F*&tab?1vT z3W~k5Z-y*^%oSdZ#y4;VBsSfz$rC2oytuYD_WSo!IWA0d)7DVPLE8_jp^1KA5g9SX zryuM~F*Y~%|7>PzDqF`dTQvO=8cr5T0>Yhg7=EL%{{5;$5Sfr^W>KA>B8^Vs4<{SB z_8h^8d+0s#BMyMP01d#SkTLzfw6eme>|(|P%s7{+Uo7Oy)HQyaSDE+3(jVj$dfn?i zA>Ix4v=Xf0PlMx=tt~!wYCfSu3A^rd$sJJLO!PQ?5dS)LnG3@M@(B6qUmBM=d-zIy z>74JFZJo1sWJ%^1A#3I@LpfX@TQDbHdroxoR$cZwlRHhzc%N|6H7O9J7M#3?xv1TC#Ic)=aB*)CPSf(eEpPth z{YHH7Wxw%AE|puvSMQJ}Fv(~P>W~+dv1Q6M$&{pgAzv(VJG=h{M*wQE#vrI4G|lF`8XqXrfaP5IahADJ;G$q<+F5VG!KYoTCdZe0)B|`%qiU`rv14wFHwj<6k+;I*WGbjT79v00v6t= z{y9lVqREa2{K47rcKn|fRs-F|kNx~w7A>yilUy*$Z+bLC@ za*s%nKevRWeYZZ*h6I^vz#zk(nf$|ok!k81WP!@-?DWJtUcF9pJkXB0?S;eC2%m5s_&-;&@S`z%px6=AbAwZ zB19_`pUBbzo=gcj2}tKu;`q5SUUOMK?B*%P9Koqq#UQ5=b`J|!8@sfD+Jw5u0cRs{ z5S=e1v`Z&;B?Hn~K$_3=Wv#+wr4yMDpqDCqCLvXRoW zT8VyjiO#JPhXiXh`u{SL2tR9+*O<7bh~FN*H>NzE|+k=T9^kpvgdD$c=B{I z8x9w$ynE83xJI6M43cdYmu~t5`g?21-~%+pHg8>yUQoxfH;n{AK7#gDWa54BK8BkaGyh<%n7JnYk&GfThzFCInHd&F zX>@8-7ks&m5V-|Qm3yP8>gf3^BCpqz z6zSqq3Rr8=0HKsc_BiR?yDxw_kHi+|2xkj7KzKwSZLha!xOE0;HNNDv`px&8#T2nk zv=Fk3STDEP=^yBpaZ$Mu1cFcIfNPOMrz|x$J*m01z1_HAAR&SJYgC2=5buCDyP*-{ zPew^u{%XQg826U?{BjDf$@AxLPbq6BetwYHAHTfmfNb~)5-1k(*mmloW&MNj0WRN! z<=2*~BJ|wPq-dO`?!X!-4+W6E@SxqB#*X`A^(`;$INVT90;>hE$DB?+%-z>puj@n)v%2o??6kGEqOY6V_|gdT zZwv{<&9n;t7>ug6nWPa%&h#q{IPy?_eUx=Mg|+ugA6eA<>=l{^q!?4brj5Q==yrNs zY7yw?1t;pk%gzhCI!$RjU#)9&Q4jck#83APH)l7EpUSDcL@dxs0?7&Bl+Jc)aVYF} zxXI}F_3a(Q>=zf0!w;(hXy3#B_5@I*oS}pa_^jMnR@%+0Kj>Gw_u(_EC#CR3vT_C% z-9op4lBieNmnttxeDMbipDn>}fb4J0b&LV!IGYAOv9fp~bM!r(D%XSj!}J!$I@r!) zbUz?B0saC&9bZ5Xn0B6w479|nTP4Pog@gvLA1UWqcLbdv&oaaNqVG*U;2i-*D0@_M zak0pyDa{z>tU2VhkYO?^3^5lpAT2Eof1YUd0|6oODpS zy7PnH6L$pAA6UN3`a^rF$iJL7+>sOGGoKP$KW^pSPf$JygorPMYo@@-O9C->5PDXw zZ{hsZHq^$zTmzkD=t5KA+2ZivLN2=3f|teHjS_@@y_B*-V=gHjJTo-Y?Azw5(Bxkt z=F{CPx_;pu_YBjJm8FZ*wAGMVg`Adm*8s{S`ZMEZMNa8a=B`rP=0{ej7Usnd^d#>4 zSe5fn<~$V?$1fQ!=C-pnaOB#RH7N$oCm50Nz7*!(c!it5pHG^CpyBhPwo09%^)I4w zqci;*9q(T@l7teXrhEZ#Q^-)T;k@zbAR~v67Fdg48&;mU zl8d{kNn}?Ilhhm8z{dS#8yq$*B!r}xf}?c1S6#1*u>)tJr#nM=COQqcb_B?b<-vyn z>N9K#D?aff6y{VE`eWt}tcOvWL$!;6>1x5n%h>0Q)dSnJCu>%WyRFLs47h7;F8@@_ z1lFO}KgK0Vu-f?#auQqE1JU=TU0SQzZyH~|v^0=ds)tIlp1KFOZZZ3g)U|FYzJCc8 z$-&k7QQkiYO7+LSL<^`>gBNaIr&^XMTj@GBRJk836TXTZ>*8^+@&`&mC%OAPz}>IR zxm-U`Q<9|d-oV%d5$N(G@)JmV8Fi1djKmLwTK%0I_a!_K-;JTo6o3|TT$(x#S04Nz zKf|X&N)D0mh)}rhE4oS4ZfB5R6hAHE*F2EM8zv{ckLT$4Bo|$1S!|eLCPJcfuU7u* zEC=u7|pJ3wFzj89P68$r;?40l@SbHw?Sd`BCA_7cEj zg`G|R*T%xW_+-doK)gcl3qULSZ(_Md>|_CQ&oJQF(DenPRJ;Az+Y3E@U^S;PCKkW? zdCtLNKhppFf>42r;PMJ+I5m_on#Q{NWEaq8$13*3<_Mz?_CZxKMuc~TfH)%MUXziz zdGp?MCNcPf*$KDoO!q4Q$p_Y66T#U=E2+n*O;2P@HkYfRf1rusR^g#q3e&|s^z7Um zFhKR72^VX@<|`~BG9x&7PXkt1Rpkzj#zif4^?u+zYWFIWg2gl$$@YPgC%Y=y97xH3 zh+=-*F18ORR((1taHFrL5h)s(QUctyhqQ3cTw*Q7knH z9Ic&cPl8sGs5{4QB*Ojc!Tr!Jjs2( zL^dA;iHT_)Em%K>o%P=)U1iH_1?*)lb6`8iY0K)=Baaw_9#R^+b5!rpzxsr!8U_gz zFp;sx?$RoO<{*5(HS~bIR?#K<-P{a zy2-Ox%9fK2g&iLmNzpkt`4U_X*u-p3fqQ-dhu_G?<|_(CZ**7Q8y-gZ%k%{6_Xgpy zV;oV!7Q1X-T6lj$9i7y0z+nKC#a0z(&Zi^So%>mWZ)H7H*&&U8z(ctck5n`W^oP}+ z^R*m@kJNPBBqQmR%U(@LnV4j(DIdQi)$CukLgbRO}C(8N~9@AL(td1vpb~7KU zpjFa~he=V>pOL>b%A@9xgI}TYlE6`w1n6u*4(OsqKpfaMK!XhbJZg!fmerd&Q%*mA z?6>C(X%_QU1D8t#M22i7V|Q%uS5MrAk{6dQ}NZ zia}uLG()fAAhFyv$mnMZAaMC@5^?n(*HC9L#mUG+iRq!4xP9{>SDLWXSz3?%Cz37@ zfAzCjA}3Z3SY*eXUFqk;LC{tOVy9w=_-pO$`x4&~D*V#$=5xt`8o^wiRGdZ5++ta6 zqrNChFUhEfGsVw5tZd})zd^!~SAZpg{fV_D&0Z=g>1T1l?u)-RREo8Dl`IarsI0qs za}lVN(1ANMU(cE(c4c;!s;{&v?!CB(i0$n)@W3PTK;66Vilsb#sUBq0zuz{}%0V}v zuU7LBgbYs))g4h~$$t*?uZ`YTce8^>fUg`s@$2N7-Q)|gZ}sP!-Ed?oa6DneEU@I?_z9YXrY*fc{LXC6kEb~L-&W!+j$SY$F>tb zyY5T7*+mjP0<~(w1p}qye_8K-huuz*J=ItpdW!21_DP5w0FwgLa&v37bKB(F%kxpPDH zF)(Mb-%_pcVZ@rV3{7vDUFn!KPv>dpV-m4Zu*+(R6$&sBFa2bTd3n2B?I=wE&sphN z!59JW7Fm;b(R{us1@dWh#fej({A%1>)Jl)BDbAI})?m@KZ()HLG{~U>Nr(*2rmh+iCw?+YtYiM7BdE}Z_ADdm z@ipH zvE9c87e2e7M>($-&Hp*t5aHi9a)m7cl7Va2dsB+3)C} zF5-9;ceszYMfR6wTyT6CeSEZL|Mp}1(ZxetOp3ff44g=O3I6W)B@|EBOUCbLZE-^o zyZ3H^ua^(1ta>HmCmR3hxkLKv4+{j(RjvGNMnXaUH$?`xz2Jsw$g-oYt&V&R696=f}*x7y! zr0`sN3y-{r-8yMe;ev)eQ#ltfCeX_bt=9Pj#xQC)4OR2k8R>Q>f)(=8 zQ#d6+aPge2TYYqhlusT8n!ZNyKzt|ZQX)E39DiTed!~IJp_ktMs&ydm_if+<>Vdik znM5VW4Ddpx#5ZtY|1f3)Y8%HBH$BL}f7I0C?#KHdXxwAkjKNTa#l&**Pr&GEYB&8W zteE1q$$RGs`zx&76VhCNjG_5vY;kB%`3WZj=Jz~-*{pos_v%tWh0he!dabLN3V+re zn+V7u=eU!^fwSB%{%~}C<(X_!eDlo){Fqom18FbH$fpE=gUc3tKJwL8_ddOyG?R%I zW>O{>p0vm!?d|d0q|Jp?ZAv`XwVuR8QSkoei}08f5~CGXX;d1>&?&{j>`+o&V(F!kffxJc;cCyGV%%5C!!N0 zFFzDWlaJk60t7tzCc~QH6$IM-)J>Qe}dqUFQ(JB+?#evK< z3v1Ib>*TL$1nV66w4Pgi6z1~nM#Nq0BAXQ@ZyQ5%XQg|qmKh*TvQEgLf3^2^ zkIR+(EUzISrPviTYMd(b(fV4^+!GPR=s_V$CtmaR50V~6r?Uf81U6o8uteR5<}B>0 zFir4_4`~$0-#E+(TWAI@j1geesZ}mIqxH{$)Bh? zjq|9wPW4OaztJG8T!^nU38cF;qUXmr<0023&$dGYY zsUZ#YP2kK>iU-kz-gyFHFX4S4E*v>rYd34^K}s>i=9hAiHHyhycPK)|!Ny^Wrr=<%MI&?jbE$d`WnxB^H`&8bSXqU4FO1mYrr93U;#S(KfIRwb=7u?ybR} zEZx2n=zCFb+>_Ho8o)rHm6v}U1tjl{#;g13<~U#Hd$ih9;?>3lA6dV#P>{2*?z{gw z2(x@$=|+$_h(F%^#f$YmdBFsrmiqQ4NPA28M#Y{OA0vDPjM{QSeoG?5}Ax7RQuLkT=CT zght!G(%CQ|r+b@Lp_aa{c_@Yf?N-h}E;@>HvL9elfMwlX8Dj3H=Yrb!Ow|R{1%A+Z z=b;G20z06XuY)m0U7e%Z8VYz%dP4@$WNqE2h}L^k_eAd9SdePfAsJ^>m)CB1>@)9; zc&(E|4No|vY#iK6K(_qBvRl=B4LbZ?g(unH-Jv@yJc@Y>uK>O8ZLqy!mo+k9>O3uoPWS`+T%o`P!wBBDHOZ~v2)Y}``W(=-> zgw&!ex37^Qy`TGZJkzIuRJZk0H##8k31yHrv3CM7zDd=0G=1y=0Pc zAnC@(U<^EM{782Ae&oy|=i?mVLxJ1cEgt}O3I?Xs_Y!)v`VqVRVkrYm5+L%Q&Ffh} z*Wh36So$aC?6r=NWe308Uwe~k*AX({J#j4C@-%XPTEa`u^uD`}fqu+Y@jfE3XhrbO z(3*gnYGYk4l6^35OV_AD!ZnK(H7GYz_bFs2iU2i9-ApQ_l^3v!TOK%HDgyWL%6BE; ztrjUwJo<^(z6PJ|XK%5}8aV6=NL*<;;vh&1wL;wkx}ESO_y=6#GQzSX8@OffCje6- zYyg0C{Sc4Um{e|#`(pzi^pOhRqb_;B*=<U!kEY3+}gXAc=7@yS`I6W)Nf>&~jIj`oqAYVm$4=pQQBD`>2IU>b=Wj9bmJ?bOeO1g(wivBQZRRTw?f7&|e zxZX-$5V|5jS-jx34U>$CRfSop%4B>Q6^cyWEc)KnAj=K+8RCA@z798+KKjVwO@(mjOt|Aqf1Nq3H*G_#$rr*7bB{=$EvV)LC3SrO%YY=w=X6W#Hqq zbH$nUML=2PWUAeB$&IkUFHjPE^2h}mz~OB&6j~0C@mMh}48cCV>*c?lk|Y;1@=*jsXH_u$RjnAItdD0>F_g1S%3^2%CXenyXwST{C={Y_(= z*k`hxQuEM0>czjl5ngwwH4@-JUR6go3?k5FiGb)Fv_Ot*L3^`k+^?{Fu~{@B1g(j% zq7TG~u{uuZ{gB24Uw4wRda{&P>XfW{PgVTnwx6PDj_Li=)`OjXJ-3cZ+CQ*@8X2ta zQHilYI^)%NmD7oAw_BQZ__;@dK0)%U3M5=hi;QiWE}tZO19+j<9YXVsjB^_r5Sr{SH&rxa)@`I zf7E2h-Cv~PWRUxwB$P;D)2k>R6zXs|(`IQw*A&9K9#3|np??v@lafY>eC5#nvP2!) zk~}z2(E;(z|wl(ZSgUxT)=y;`0&Qo z>Qe;7@oKcnRWEpeS3xIP&~G@m$M6e;nw*xG^sCitIO>9}@AF^g*u)WsOqgmpFeMHo z$0p(%le-rZ7BASp_1gFv151nje08$BlW>f1_M(bxF&y;jJ?#>8x&{`&q&3zE@Md=z z{?j4VmvpZS`S!G}rN48%*7A**jqWYUX<@lz?ch1bfbc`k*^+G2#@Ow)yppi{Dqm7n zHx92U6<*j6^2Fq>EzYKxq%>^nCAxZm-*V5MV`a2b%gxT`EqnlYxwH3II%8E;_g1(+ zoxEL({8d-o;H&2PZVBi%5mF0-{IgvnfiDF+dMWu!VrCdLULu8s^(2F}|5@JxjNl&a% z&&gPqF@cV`-KwQ=_tbGZqGRs(j6&`m){gVTw;K45WL24alL!nzRJ)>{UgF^({zX(i zwsxFnri~GTx)P=PbVc|yuBPYd>~`@o`$*EF@ZMU z*gOjWZt=->Kh43lyk!U~5c}2P2RnrjfE4op;Dc}f1RxpP`sFTk1?4zQ*kpm#$4oPSL}nvTzd={l`xB|Ot|inUygMV?Kd6iOxBGygpM z?>K9LQ$J6;H08b3Ernnz5w&06uc7j5L@_1Y2kTANF3O8uf>QSHqU*vV>a(3S9Yd=( zx8fyVa3oaaJDM+^0!{?Y*%|`co@0+txpR-k>gp=i1^(iN48b_9Kxp}m8#h9CFxFWh zOIQGG{_k&`=!d6bjGZvIN7-h>VYU#A#im8nQK@c z`!zsuCnpU6A=K7p!~+~FiwaN>1{~yOEzt!+4Shj8+MrAD8Ng=~Hkz<~ZIXhYPGRkL zJv}ELhiqp<69rhe$%h?+9Wk8HOIYyh8odTg=BYLC!3zO@mPrToA_JP9LFEtqv$Qnx z8>o9;q=L|kptgR{ZOHWa6hOXblM;Kh00;)|6yT_8(r(o&FHWsWcnWGz(6P-ZR0t(c z;74tI4}@A`+XWWVsnZ%h4XdZc`Hyt8ci6QcjP3B|2?{5$i-Su6NI4@XizN8t(ZTW{ z=d*ckgaXksJOGOSLytf}1G$JR=awY!nCxupVfTEHXo+*~TEcQaSo`Ef6o!<{7>wvo z<4+7=BQWPZisV7uE{Jo({X4inR1T0hUIV8x2xvS1kj@QP*nxo#rSl!^f1LhUZxQe< zf^ct;4Toh2&I57IOd#9!f0XIf((TLEr-LCsq!O z@DOUisAg#@WTp$nvI*=hWo)?p;yn1jHG44zxVWfT+37e^Y*a|=q3I{ zu1&|p`>xY;xNgi*J=XEFAp~#Ae~(hnNCc{K1^g|*P4LK;!QpeD4N+z>xMYErP(n8` zxCbD{H#tN&x+NzkCzCVj(w`6W{$FDr7Wn5JqdWw@zcCLNs$}^IZ2KpN-R(CqEqT_6|2aH81$ zjPaCBRO=*qq`}PIDQNF%;We;+&gR14J1D}c&I$}Sq=K7yZg!PLRKOYh}wUn1qAfYhH7|G1p&GG)X7mG%dV1V z`-9w0B#9(=I@pT>87O*ZXN{R6ocGt0DTe5o5*hjFwDfh%tSmcJ!#g9HV6(n#w&d;C zy(2%c@&_O1F;8&pq<9)!5Eszg9Kd8@f{+$Z?3l4#)VfbahHRGo3g6MKJONFw z>MGEjJ8g@1>VK^ zt4@Klv$F#yX(n9EywUQm2c4WvU>!S~ivO`rgXfc`yg=E0iYY3b##?Jl&pC71tiGiK z2W$T3?vQnsb(;O38J;}`{IQfEdXqfg-*Or6B>+4?wwunl1ZJLM{mwvO>tMbm*Kg|0 zAvu?libVauwbX17lr_{K^DXhTvO2A3r&F1IKTY*j{w7LfG^Ir*j8GacUxw-`6%G{% zm8?bmfQ(sAsg$wS_l-lJv-PetL&0jqm^>ZWLcrdr513-mITw7T$*>(${m0y?44^rZ z&%;vhI?lGY)=md3$fC-9W_xY{=sm2XSEB9$*sNQ(W44&je(%lfacZN=sb#MhfzD13 z7Y$iGJ84gX!$LzYOcA{!_?h=swm;ivZ<3k2qT%iC4%?l4mAj3iw~D1A2jlLPWTpgR zwwhPY_*{@~tUj`T&PLf3cszi%03&WL8xnO(X0yX~>?IG@@m8#QEt=?s)=)uB!t}jS z82=v|b5#s6_M`Xd5ZVI0mOKVdk}XUHFAcq)xE*V$){$p)^y)u`ntph6 zucx>~-N=-ic`pza;CEkZm@+yH$2b;NS2rEVX_2qSF9e)H173{SO}rOv`td~YBt!dA zInh7SZQIsPLrJkLkKB)n;)g14{y5Ys-m?%VA-1=)#Svo#`0_4*xUZb2N~=X^J3wNdEIMXv`6G+l)rc6PLPD_ zo;HB~{DT&&*F+X52HQZRB+?+z@~qs74JaVzh!Sq&7KfWTo4;@jwF~_)AkM(-C8EUJ ztQud-W(^Xe&e%dOgAMSp`KQFO7@d0rgDI5B199CS#gd8x3a zWEmbs1R9iMH<7pSgC7Azz8*utfib#9Ao43vrYQo>KQ_uC&`K!}3%sZ4^`6Mb$Sf#k zAy=jA63QQ#ef@d)nopf7nZXP(vrsBP*y1asc96<;2xd)=R461?~@?pgSLGD9(H3DrlzKZ6hy`AMC*B4f3c zoAU+{Ygv`(cl%#s^r2O3gws6^;FGp}`{fxdt+&uyp5Z~x0<3Q6;vh-5%Bj4lqr9sLP@qWS!~R(0Ux zVuM}dY$NDE==({b8qrXA?&$PzI*%}lCNKScZe`%jvQ>B(y^92kIxXt3*ej#=&qrsR z&J5)6EO9@C)QrZ%?_Qq!;2E`I#GY(B5X%0@T;$;=sr;+uMCxD7u~P<6C=(7P51skU zL?Z7S8fyCfzEnW;uEt&M_Q6iC!K^uoZ1ieZdMH~9`gOUVwfF##Ja*X8*3(^afQ?3n zyU#X-OOhd%b|=-+?x7B83J9>iU2NW|{qiUIJIid%_Qyd`gf1T{9H;P&Obu*iFie+= zA>a$84TLogv`XXz>U7YenUq62CN}o|EP(_U22Go(q6JEU7dTbQs03Hg2WW~sw{vYc z1K!bi=wzjSe|D>c*6)5d+Eu&Z>geSOO*Q|KxQxunKywq=x3#TrT{l20Tx=C{0rkt~ zK7PXrT3r>}A4fALB8fsXeW=pjkB7G|0TH+-8J)w#FDK5RU=1fhS!PLYEdAgDhZYE& z9|XW7=VkLn_5S`KtKZ6D5jTPcf5F(eVJ#J5Lv{d23ksBn!=jd`3O|D$gs z}gnLBK<=F$WvbhswUj_3I^j>ohMdRT)4+PVM=5Lp_R<3V7$<1|hQM-ujXXcALzlH$oiNxtf2#D(h zI!%zRZlun$k(F0PK}NO@Ksx{?GOM}*1N|kCZ^a98qrNKNnwFbH^7iR4uFSA+vh($M zUILRKONmGHDIfWTrH|ml#ru*{2G_|n!l#g!&R48A|k4~4}$cfM!4m_ zvHP3wa4Vgyj625@BAwiSHD13*mg7gd=^LKVdwaQZJ=+muMMdI=vZDz&y0yNa_s=^( zdTjwM+aFTF2=p9f)aYc9RRMrma_A6(3;G<}wr0s*OpZfjw2kCIOTyb65A=-xiVN!< z7k#rEh+9!pQg9uzk=d0WpP;dkK9PLZP1^r~7w;pQ`a)*MS z=5(6=Q$*KbUmNIJL>{UBDfK0IQDNq2hMPc@CcLv29HawE@=144%>EJ!fi3_+&c8Pu zej$K>I#^p+2E( z@dsH6hPkz&>k@>Y4`~jSN{Am_f}}&izdVdj<|rk$xa}@tKYNxSwj!TxvtR6GGJ1CW z{t&t>SbYKe~7Lst|Pp*`)TT(b|6e z*QbF0Jgr9vq!dF>PpGRvA{mB?ibgMKAI*A7o zBM5oB^%0W)<7GN`smVG;J?>X~+z}96@zD(7edNZs13Jxsq%@O)fuJCQ13C|&wppv( z(`AIoh-w|lSss8}z4QK0Wy1hvWU3Ov3h@sXAI{W1TB36q)%`Qlm~&$@`k1Z;Yd(F;DEmg4v;WqSzbQ!md|!eh?JoS7f7y(Lk#ED6zC6HT9YV zDx71opyKsImZzhxywAmXSE~(PKO-9>|3>o0bey#MwO*P@g?_q~{;RCHsUM=yTNcxd zkFYkM_m4_b__?7R#w*^A&!%;9!p%!(;XwF2H&|#PadymGT6eTo&56aq3;n>LBPo%Z z8{#|k6Ho8_Qa-E4S3thooD$jTKZZ{vIekIU?NdUkE%HubjJhp`?j+z)01?`D9y^c! z+yx^g#9?Voa_oW4ENSPE`#sdX_>VM>yBqFQaN^F$O@ zcXKeCT)m8SVByK2B3pyY=78GZ6IE5af{v{RqkiG^{ihEw8WELkSNb<6fVvkncj`Iu zOEm{kZHG5c&{$py+!-;3;K2440uk{j{2zPNktMygc~>0mr3)D8#n2V_an>8tbQ^sW z#_p6GyQjydXYDy{$13_5SyFtB=(z{6c}92Y8s7bCoTl{>>n>rsIII zZ8wJ}1U^$d#3{QwA04(-GM=QE8tf z{)b{vTaidB3P@zv>etvxe%pAHIL(ybKBiHE%O4Vk<=lowsmv17pK=si@SOuGk3_eB zs!H5^f9dP@PR8ca2}82TOm)X6r`3iE2d;f@Lj2WLUO^9OYyBeIm!8;4R0-cvYx&s{ z&1qs{vNl?&U@-0T3s?)m%1D=g17fx6I`YQQIl>pg+iF0AjD``Av8L=YG29>a&id=K zlyUFz&Cyt788iIVeH`Z|G{3bDXC^5x#x!b5J&e6UfGn2oo#k}u#bSUqWy6lx!fuFyZIht6E)4iTuMr{IGo*ZqnD-09dORHqm45S`9QO_;U z;uNE_^`giHZsxxuNPv{Db4tF-E%GSf{|}hl`~tRIR}A3=l#RqIlE($!8#OsaJ|JDZ zxR(xHaU)^Q6em!gRc}xXdgc!v3mIXL5##f;&Siz~)QCE6eRmFdBHGZ>`9xir;!lz# zxGhXsBckH5f1QXva>263(@q)(URxd5*lpWts_PM#@mR_N3AS*CRFFRQZBhWLa&a!V zX%EeP7SP{Sv#HXz~|W?Qq|(Z%*GST3CVo%eGySp#U=NgoEQ5`-iNW5aV zXEgRYBj1NAY6I=~nmDp!qgk7~uDOZIL}KV(T%mgj^KLKO9^;-Bq{ud?+zHy%`2<_f zF!{mw1q48Yh{Q~~N_+sFZ$Q%gGN9a(AMAoDG84}pD1>K~j;ExaUKinDrLbl4+hVmc z@+VlbEQkr|1W&m-DU-IUN!|H7}v?4Hh=K z&!!}HhP+%Lmzq8tjVe-j`~L6q_~e{(jraAwUa#ltIg%h)rM$enMBs@cKpc-%7;Hdvy!TLMFyg`Z zk1PJ~a>twE5+-PzvG;}-jV#c=gbn!Hwu-t}{99Z9^+re(gqkRN{`Q#!%oU*Pg5Y(e z(k5n0U!9x?CVoH4OxWx5o=x`>U$f)`xG zosA@x1p9uS?GOv1)IZ86h$n`XSSE?XjOA3Wa#eM8|KOn7l_xC2@|~#}87EJk#Ez>N zLPurl?1(4sCm~UhyKX+{3E=Xqx>W`f zUzYa&0D822tDqvTef$QIfY?&wTVcciCZ&ixO+>(7p^hr5{SYYRqeRiQqagWcUbwug zl|7|(ZM3!peEk>cT=o@&^KJY1(vc1<7zXTU7Fc%AwKQ6eDoadK=*<_OAX zChQP6rv4Tqmy%35Nn0~Z+VI1RC3@Iy7zFXBjFrjeRr8sNei{f^9YqOu-5=}vBre)s zvZkc8w6L)FLJ{5k)qKQf>M8>7lH`XF!Osl41%GUBZ~cul$PPC)($(!KvtxJ|stl3B zDoKI}gse;k7u98F;%dI^`Yj=J7tXEzhQ~cFMu&)v=@USBV*zWcI~YJJ0o*|32X@#1 ziE|VFI?gP=ipM8)o5F!}03m3muW+!qi3oKF#`pC{kFSu%wEsVS<{52Z^hy(vEH1C{ zP~6kj&w!P;M*E2>U1dZ1jUl(*OIxx@m0~S?#gSX)DYO?}9*>T+S4n)exxK_l;H$m$ zsQt_ncWJdqKY<^IUQ9D3*=wG2Zp)Ans&}(juSer46MOW60oXuN3LpE)u@YNoJfk3* zDe2lt>nB>Wep|eIM&%Cjrw&p&{e2){)|`9!42PRjKikQjGgL0a>=rx;V$a-SG1_VR zpX0*s432~mVtv9FV?IfP1eEj0tJRk zbOn5mdU}%8sq9L%fe!ldGdfJPj7xs8{xwpR{?9#?!CwU1%i4n1-)Sg&*_pJ|vhhu& z_YEdhbDrEzba!|99rYx(|4o{(g)?)-IpiDens|PKwrBN+1n9c&BF{CF9AggcWco$e zY~>vt9gXX~t|@H`QE{dmF+S1zf?PRqZJmaEy#LL)6Fu~wzMXKY!tH`RZ2aE}1EA1vG&45BzB>UyH#7 z#vZ04J115H6X!_uN6|>%`)?eb{0!1JQq8X`b-9bSod0k;Qvc3j$4*D}7<~4z>i23> z6)6*6r}~6TTVAzNR0dWS$kW<80Hdwy45$xkYcGk5R|72wnl}i4$tL$Ktf@(9dI%9P zGtf`Zv!VgF>FS8SRz)_^2HSSFY;YMD|8Ma_TYr!h)+ODZML}ufh4ml99~Jg79pRp*W`i$Xx?~oaJ$tIW z!+fDCr0hnIft*<=cufMXueC#`91?}3rBqg~LOf*OHx4<+Q?8rf`2Zz7w8#Vly_3jI zfErritmk=h!l{oAJAe=mB97Id!uG(IlsG_=DAo5<)`)JO@daXc_KXWQ^{>Td+qoj! zNZvN7{jtD}H3pn2aopjjfOxz^G7O~ODim30A|nCgZd!mZkx}us@9K1CXSxK3^w)1x zeN0S%e6c_y+27)=8Cd+bXkBRNs%IC=7(zVni%HtKcP$nr_mLc=!U z*0M|!v`pm*In=4U3g?VWb%`62B~5Vt(8$O>H{XP;qBJdPayWhQu#<$?oJ5eK0`6rZ zhKJ5a;<+H2&g`?K|fLiVLb1qS;zAH^aGQ3KE}yTRe9_(T-%>T z+i`Rh4Kdf57%$GlkzPK5qEYy_mfo^5aB^Q!RU;zie%$c~ zoDVZ#?$_v1jJ;sB`0@Gkl(^F+{|P$Hs}8)}9fM?-P9)uYEzA;~72lNst&Ji0q!O8V zq%@cQ&2{cz)K&(){xq^z)2iHX1jIO={^=VdO%vW}^=_N{gLcU+v!%Mp{p5{)J^WXH z;MoaM?nDyF@jLI48UK1H%tSxacIAzaM-1VvJEUz;WJUX-XNsh<6nS4KwMae7yIBt= zN`8n6AaxFwBRxWm-K5# zv2M{L4%r|`%w`yXfW3FXO!rwF5@V3y`}XnX!wMesK>sUcCG-1QsJy8?&xPK-#ceSL zK2quR14Lf2YCgo5qtzn1AdbN*4$I(#9g04CYq;Q6Tf1NEYRUT&P}>6UHCx0+a-fx~MRE$^0oRli7XK2(iy}0|gE>nuoL?&6 zJv(z?KQCat?cM=WG2?q_f|8QzU42jkuPEwFcvc!Q zgP{3l_SQ{nNt7oqTttX%Q-;)mcL!Li{r&vFLo15{!po9u&dQq631)ekoa9 z^7ftfSf64_7D&XFDBh3=yxIP)dq;s|yYLHA>}L2vx*3F_KuDmIwgGTFXCGcSGSW4> z+6M>xFTojc&IYYUm7a?#9)kW_;T4FGe+lE`VDkV9yb%Jy3Zeg+$0_r8s6UPm*y08M zpyBIO_SjQd@cJKtA{tVTfdJwmSsGHe0`@gqTre+v?<^r9K}exz1Ihy$*bs`Bhr1ri zr5^D*p_OG{lh*Oz%mX6NxC(6Jw|cdM7$Ms>5Kha|Zy7=gx?_d84mo09H$kKnZdTxv z)(Q5K!{OwxESo{Gx?|A78b3F}-&AWnYj9dDY}c;W;P_@I9>~&QA2c3Rp^j91J@1tZ zOFzt=e_vc0ocf3Vt|?t%xehy{WB&ZY!bo%Q-eNskf9A zKIGG{XYuLufbo@t^LSHZUHhLya0mDk^HDKDp7$OK9`G&T$#XQX|8$tlT2MdM$%ME(QqLmQQy~3s{m|Cyfx(QrM>)&5uQ^2)i<>#fhaPdCOKWYWJf!C8F;9jVZ8norY<;Y}5J{hzy~c7M;jyx=Yi zeM26uSJnJ`g}$7@k&3!HzrVko7@pIvigfs>T{*fRLm<80OCNqewD9dCR*1;WNZSr> z3tZWJ`iEVwsZp5v|V=WUdFB{0VQFC6}f6r8JTPLQyr6&+?$bTdpeKl zfIk+C-BQZCy|YPRhn>tg#=#owy>1+N*-6jB&iIKA=Do(Y!lBP4}u zm)~AqNrK{bH}^f&HJPimw=MWHZ(hB*3?{V>y_aqt$YtofnorVr$}nb#()mIGpUc~= z*OwSN7VSB=qa$hjf4Q95;Gu%O9--NpW1$oO|IX5l%@MJ23jA*76}Yh{VR5e#Y#?idu@sg~OUcVL@6UE;_l9Cn-MDtJvCV6o$Rf$MH#7TiZeZ zV-T0~`v{FC7HoVt1SLEDzKz=d`Y2n}2 zc&tir{Z{tA)NV)Y55I@HWqVODR~$tt!v#*8uGZ-v!QPGwLo!u$dc_4B#T>}<$a8u9 z_{;x4ogP)&04`KB;`d~)Y}{_R%JV<*mE zRKe0HZFd}0DE%u*T~kj(h24dxe5a?!9-0UhxO%_qRXr=}bIrHE_UzyNdqu<~ z_&GlFv`2+MzICa3g$PA{O`^t9b!ktP3Uf7{4T)+`igQ|*9$zhNxG|;u>mz6J4Illk zp$Ibucejk;DY*idD*e;G5q`|8vv|SYv(pTel~r>lP7^jEHyJK< zf1`*gzK4DC7%pFNb*ab{tQDSJ-I+pvfYJ4;+l^a(|L}_Uh-M7tk8o-fOWA3qhLH=tjgS?FKOF5Wcg^Kx7?P+qV0tk?d3X+8IH zy1qD&nNWe3TD$i)<*S8z%M615o{&)OmLc3>mI6DR{yp(F@h?!gn*R_9O0D5czZ`xv$;Rf|;decs2#kV1!tPg%*)+4)g)G*I3Gk(6Q) zk;f&b-l~SP4}m-za3*0nm{G9+iW)G||8KDeZgUF30`UfrxD6S~!+mPnDu6z)ZE?-I zv{hA!t*>Jfh{7!rZEtueuLD7qy9=lD7K|hvZkzF5`_I>rDAg3lC!X#y##FY~{& zRg!!(;_G1|krI4S)R?$l-stbJMV!JTl&-m zfMp^Rb^o7lvNNDO|H=6PGs9zHYlWcClWE4 zOFhK;0E$PRtDU44`{cO0h^s=_LxM|uA&VpE%Akt@ynjU!P*xu3Yr!w;FsVedpPp{(_J8M9I&|m{{1_27q^S)D__3+ zLLgK<)7}*rf|EtvsRJMS9GwYsY~NkT$4*LO5c?gHxB}Xwt7mk7e@A3{|G7WNS`WX6 zfyArs&dy?tMCs(Pk&zLwNR2AdoNgxKM-6f&;+f(gH;KouS(4+{z z1d9}|ku~Z%{V1N9yzR=Q<`?eOAkKS|HP`HK&;}s9F$yvQo z$mG36ms#_-%v&bgrD$O#^TM$D1%mDzeEqZ;6^igJM%FKFPcmL2n$JFLSqAdQV-tBd6COX^Cw(ILD3X#!~W$8eRE zXWj-2O)^5>i@1 z=rXR*Hv16C6SwJ;F&~I2vo}fwBa>@k){g~x1}LiKDqaE1enlZFQu~&)f70c4^4U1m zV=u>b6dA_!d3<`EReKK=F$}r=e#evXxL;E>x$+1d8~W|aEm8bytJ=e-FH>Fdy;>?X z)bo4EpB`foTylN zD72J9Mgm(FH?nhFMjXNwsLmS*QoRL-aGcs}w2rPvpYA2ueaoTqV|N*-hLv#1K9Vjc zixF>dDMZi&!KluY*=8u`iw&!!|Mdnx*cS_1Jg-((kAc<LQ`NrAmG5qB34-^6@+W)i9%QL}2ipLm)}Cp{-lOg!DA%RLxS z<_!MY;6ThT4Z4WdXja(lLFIj&teBXAW^Z9J0qD`ZL)kHS>H+{-of?idB)Bc;U#Zy> zWOt-ee2td3|C-KEw;t$vr6RUOTBS5ZzWz3v{A($Wqr_AOcT2v$U-E2Z=%2UX<$X~Q zEdbtGzX8ofGCXeOzfyQ){=Y^&bgw|c={(k;miKf*53+BFD1t&%`$@3TRB8F@615|( zHMC8Z@$ikFA_dxF@$@jix6PBvSyu)VJH?L+8Icil!^TFMJk($A4<=J*b^V~TQVx-v z4yL#0-OYe9ONM4w&HPoRvObw-_-SE0ra{U75tst;1 zi&VVkw9ifl3v;HY6!GlK4}*U^O8TvTBJQ;CrC}m0#rf%3$6VO0q93x(w<^EtuXU+i zY;z9v(){~jxXQTx>r3@tvIp@$`<`t%@5h8nPP@~)IbSMLeI2i<m*7i={^!wn0nD{pOAKC~zD^k-h6hM( zywTr4DKa-)Bf0=SP84~NQF5M1MbbIXN0>uorX)mWZkM@vc>YcQR=H-z!1yoz;mgQK zgWFVOBsTKbgZ+t_32|b!>VEp1+Q*;H6EmtXFk?uXd8YfBZ$8FEM|46{6kigEA)R^n zV6(!(ZTw`?wW~85;z!x;nMmZw&v0bgV+ev4U0qDBckK23U;n-*Y0O7r=DnH1k`DA2 z;=~ZGee2BHU0Ievc0U6W|>^D)8tq;1Z=JV?1R?fgA}J%49&^Nb_KpJ4@cArCWa+maeRPfyzgVCld*sfb&%~63KhjIW__LhXfh}y z@-R;2LIUY<3c~?OV@|L_#TrRknb>>4YXfPCXe_WhVmATt62yn)z0#HSwGu1)aH98; z!Zw9QieyrCb<=XNkImO|a?Lnw?VG(G?}^|w4Qk3@g8M=+i{y9pr`I+l4scZcKIzTF zg+0B!_b9X8>vVZnmU?QI{!Fp;$pF&yJo7eV{_iV^G|{)K)b?n6q+Zme+KJ{ca*w;J zlSwdrPOEX+p`j>pu4OAb3YI?ac6Yodyo+7I2J(jC_{)a1DB`&L9d0tC$zDWS`e?rW zS%4z;+$L>EiqGi$&;u5%+aw7 z2adn_=V)_TzEPWv{=H@S2S*=vG`vz*`gg^%Z0Fjjko?v=OKQ)4bsjG7qO~!5l5-J# z#YpoAC)El2&PS<+r1j3Kf6;U=9!4G(#tZX{Ny57kk@fu77##Rq{Mp^vmjk!nQp+M6 zR$pHqxb0z0Il;2P`1`vljj?#f2-6LF2h-_*a(xQFd0-GaC>Ln$9OTFR^0{}9k7_Lb zu3n(Xn-ceTgDD!?PlM%a#hv(duiv{_$}BYSh-v>2zI;>vC5EpM5EF+(s1pW6^k5Wa zVmr*(Z<5r~G84!-0>CvQ7{b?KOUBXVf(ZDl;%`eBY3M&W4eX@G9(sF2#3GB$&hyHUx61Yq-@}~p z`uP2)i}-Nj&wMHIQ&xCUn$!wH`_bWH|H=0V0NV`|JRstM>jUWG*qNE3SD1tsH<*M& zsBqIS(a(1@%3G;_w6ZF}T8BM1S@ESGY4*dqYrlN?-reoehUsGm+C0(vFou`oDZ6H#3?d+O%auea1Olt`kGPVmMz zu=@lmGJrt^>S#XLdDM+(Hni-8e#)CP9Etn4Ygt=QBM$RA6w+VCm(}=soL^nkKz)HO zQLS)}_nEK7gUkW1^ewHP?GlIj8Bt#Q%il$QYO4jXyznpcG#2|{_0FDVtvN&V>|BsO z$0o0PYty;e+lGMc-PtuMQN=u#Ioj0Mw}4;_S3$fMi4jO!_GQcVQPKFKVH^-%T@}6w z_T+W-_5S|;&AVF;NtVkWc(Y@z!SNQErJ(ulR7BxV{=2RXe(Ug-Bz4JElVN3LT~jic zv-$#63>Y=)sb`t>6*b=a^5lyIIV-r2_g&S};<)6-oxVQT1}hhGWxswsr52@531Z%< zsn+R~?vWpMcjqx%rAK0=x$2qcRGgyhH`_dsDqpn-xQtR;`JMktJp!$3S&RnQQ{JLnSxkK3D#UjKciZt?%I7 zdZe)NQ@NX)Qr)TaOwafltd9a+@cEuy?t%IqW)DrCupCDNPcQe09fCaj>wZ zrlwBNRC}L|I8p=HzcgH<#lv>72QsJa8JYwc;P_LzgP0!s<#3_yeEFZO2nMRci?Ka? zxjORAV-E{4p9Y{6`h}`|#0yZz3Qz@3Jh+FjB%HTKvrh06$=FkQe{IAdRlhgHo zAGsrZ%Quick@or7!m6}fR)YV}MsJYrZc<(L|NRss()sz&m5P@K`9{MGNtH${Jr($X zAjm}1y_-Dq*3Dzl^@t8B3IUT*)e9!{` zNi1bNNbfF&1cg6{WUs;3iM~8h8BU9nBC?H_z@eAk5MOX|EN01_hs}^u^M_ z^afXtq2)`>$nrvpKV`V*O4Acl zO?uE>FF=wXR7{~rewvwCx23S?ZZU@OT|8`#@zil5YECP{f`YM~)_WUmvUo~%NMXP| z-s&&Wf)-~Lhf+O(l(x{0(n3`YBy<3Y>=($M!-=lL&>n_DW`E`9V7+TA$pB}ov9dDZ zhPIjSI;0RHop`*`zasH56NCKd?9$R@81X^!8M528-i*Ed{?*Fn_N!N@z{(##8n7mv zuV9=mmjta;Bamu)4P<*EoRt}Rac-FO!WaNTzo|V@1$RJ5Y0PhsK1F8?_}-wN>~CgSJe3{W^aF@NNJM^y5@!htZB5~a<9w1E&Y z0U*Y|xIz^7!~axA2yTKGhs_H4Y&y~V4j7q0i_#-T`)Q$08A5;wNOoIKWPe7ElgQEn ze^tOba@3I*9FhcRjwKxWgF~V8ke+IMiwERR$XBpJ6NJJ>q;(=CM$X=w|In%M*Wj`@ zuOB!CB)U7)MC=BRfq48r^W?B?BB)K(e^()q_@n0p>&2&>W501D9H&z@iL!^4nEMUP zs90ch5_iZtFwi9P1O~i&PC4;l(b=dC`I&F9r^6e9K+OHzccmW3A4Z+}4cGCPQFAEfKz`ce#}|E93RuKR z4i^Hy#m-JGQ!)wO*?=o;dbhQz7CGi`K~kCpws^r8fyvac!$_KoNb1M-_DB{%*5v=C zzE#w`tWj_4?2LA&uDExtbI~_Rn**`M=KTn+r;>Nkr-;ENB&w8?I>90VzxzLfuw#zi zY4214FX#lWXglTC!N>)a`UG1;+RgKGq*Pl^c{)oiMi?nSy?cK$PN|D-;-D2a;;7`E z_R#y1TCLoYE`LmZ`u!*6FX={EC5E<@&obsX7-KQ<^UI*Y9<%VV>yA= zKIK*$a5+RM?U29CQ(o6+r`aQDC~{4;m6KGzj$#`aD_57U+~h@%(x8X z$ElRoXI-fC=H&JLmq)iYz98L#dl^Z?o=-V{{T1fAGLMv;NzP^4s)or*%_iMOVHN}; z=0(lHv&Iq<%EXY{>hl{Nm`L4oI_{)k1LJ>~FrTAZ?Cw`*$S`|5t$8U_WSGRxYBMKu zVD31dWGM<*RvgEj=@XFGD8Ni&D+RG*rI*VesnM^tz>Ab#g1|q7T(Qit0mo<_vqJ zBy(JtaYhO5Bmek0M~d7~W070W=Lz>DPrPs_y_cuzBYPf)tBwh%?{@T)BmvTFYNs{W z&EJC(W5%r(h&w>ADR17rktB$Kpq#K%SuU>KZvr%k=dN37*P^`x>?-yh#i63lo2 zDjywm%P0$(0>7bAVE9`U^z_g{wU~{TX3sWt|8o!D)23!P`9`MXl{{m+KINhDtT)<< zyrP1k(VY$=#I8O88qfmIs5P_h?xe;n@7+Zj5ch?tK-tqC6Sqwp)4a zwmv5p<_RV6ww?R=b8n?XK<_iP?0J6vV^mauzh0AbfV>N7D)CD6%3_({WlK`4jh_#! zNFweTQL#{{4-OhKki@;x$7W!rysBQVMR-K*GT>Dkd|-{dd#9-8qA$qzO;Q)m1}I{H zmXdz3w6dB4ir@ok4#aj|y#h=fxH}~wDCN%@?h9^%u8e=aaNE^HC7+ZQtlN@?x}`KP zK_uhey@zvDe8ILG>1k;?ai7mFO6vZWMuYP^yolx2eYV!@w6wIu(|HUrY9~DU0ZmW9 z`c|UF&`ouNj1qgfUq3zMXb_mh=OeT{S6ktblq>hDs;VL)C>TgEai;lPn`g)j;^NOv zNah3WnhFSe^a@vnS6}HMONCD93+cCks^)7}!xy}2iMvLoAj zKl9Deueb5@vAFWG5C~0Y@2uqmzHZWq2!h z@Zbm1jzbZlVxmTSjK6RT?~Iw$ue1a20RK>-L13SEz%j$5z|!ZXb=!TqY~gWg%lrIf zwY!TQktA$o;x#VgreMBED_~_}(*Wt|8>MD%fa~xO^DAa=BSsM4Ite^H?yKBdNSCv; zbf(yivo``7lVZsbx~#1a9nV}zY94pdPW&{f8!CQbq|p&PCbXh;>pOULt%Z$p1or#d zy-5m>efsMg<99Jp{G6TVyj|<-nF2maua8s0{8|*B4jZILUu!JUw{z0m(s2F8A0%xUF^MQLC$q6YyrbZ%T4996>ChqO%zy;I&_BndKD0; z?8e!a9g6IG$ZVj*Rgz8&%yhk*I)^;3Sc*{`_%5b&S_eaxoFosZKHVYS9=sWw74j)V zZQ>6Z7hO=rTdW&Ff5^-rW-4=6OJMZO2KM-b9B*b)+OQRapt^`PX-i0}Wo0K22v#0= za1t(cUs~>rKU(CoYjsfEw)%+*p`6@kGRkcbv&_fj{t+v6ck%c%|HpW}f)K)SN;4Y| z&|rWuMTbWBKavGQLU+68fJ_I)WDDtH#!@|Lo$RM-eJx|HoiT9t+^qcF7Q+E?FXKVK z7#lGW8f<$z7oJ?koeQLeeMAl$Ti%N@%pcfX794svlPXtF^8GxCxmMl;chZyWgy~uq zFZ__xyzKea^0LLO-VEGkRCmJ_o@Kk9rP_X10(S~+iG0tV+vl8uzlMR69B-KamgT%9 z+=ONhE)-gaW%@aNU00%vhzWvw2;$q{*9@22(Ux02AAdvlm{$2kDSxdICRK*Yc=bR zSWBx$Jo^*O3@(Fq=X+t~0rQI5TD6o+$k}0$ImJJKthzTjH)H;z5^eYSGwRLy3b$fd zosm;h;X{8m+sP`&|9)L|I6rX(kam`xg7fJKHF zAMeH)c6zm>+rfIgDO^}CvT zj$kHTq0hNrB&Kntg)vaFBZKxju(%0BDKl^`$W?PLuU=|PURhkMW}WgB0OqDrb@Ju^ z)Z9;`|HB=im2MkpiZ0lbj`74*$+&zPV+jyw=N#&%gJAmuC>`ZNkzO7ELZPz)hGv8J z&ny{$t0TYhH}3{6e-^LbFYFn@z;p`@~?dcT!Vl!-xXm?Gc@p&BDH zSgxW%3=IuA_mH<(k){)I`D>v*U-R7`%27o{UJTq?gRBc%75mA9uik=>8f0GpGgH#e z{@65vAt?^-yGcUp8xF3yHo~dhm;X9iTOYC~b4*T2f!+B-t(27158=S}C-6)l(3iyH z)G?Yqa9)El1tK0mc?Mz?-M=WBpAD;BVBZUaY=n^;p~E|Gi~Zdgh>}CYB|}y=t`I|# z35~!Q5ZaaGj=}^PP?od))^$ivMuZR;B;YgGza78hfmCMy49x`bFJUVMzyP_RBt&J` zgfbh#5+sdvhE@gPJpB7KbQ=hq5`*X5hbTsH`3ON`BuFRQ^BM_CEa!jL2p}S^2LM;>%%w--#Q zXX{p@BT*EDAoc86!{ z!<$xbxKUV$>BwiKp8$J?HP_<9htDK@XaF`3YypTp9<~G-0|Q!w zlwFDz2=4@prrRE|K@akm2u=~6`<7o-&~bx*C!GH8huVSQY7amRpepuj z2XgEYM6JBs1?V1`0dXNj6vVgs_wgMA33RH?Q3NOkI|cZS9zJc6E|4uj0E*{|lKQ_} zq^_E^YrV0M&nWvvS!r*_^2rz3Y{cykYm0>f?6TLwW0oIshc|x%P6^EM{-^5U83D;& zpGHQo)9jYy>1o5OXwD}HJqw;50f7=-K}lF6;7hQ&PpVg?EzsrG8#kg>1j(N2A)J@etQ;>=>x z50mbhVZQnRn~rLo4p#~E#OFc{$SKh+GWP(o87hY*7F)&samP&Z@>#VaunhZ|xv&_AvZxS!npHjS#l+D^l!_c*~a|nPuK>d<*Ccm5S_DOLYm~$L?oLwsc1Rz1$hp{pn+uZd7E+;cr6t zoJV9Q@AD}Cj^>@er+mwJ|9UbS zYkgkx+>5D&2#t$ErJ1qkZGh#24V!AS#op_1qg4)I30rxrZ1o~{arDnw>^r<1N6q-s zft(W>e5RN9h~*8@cMFA*1J9`A$_Q!V14zW z&q_>KkN&TkqQgndS~Jn72_^Nvoq2&?t4MBScw=YvUaq)CsSY!IA7oOp z!=tI8p#fNS4M})73@Ky*bM$kmM^k*fs)dNIubhw$h!5IefkvFV6KCx zMJusjw*n%$xsR#Y*}-eGpU@~;2!Ff>I|8^q_xKieRd0dO@vD;S-#||TT|3MZUo0=yjN|D@z`FBrJi3aEQvNMFl@H;%1c%T6Mn%HD zf&$RPGc7GGk17dkfDku1m8~v6$a~4Qz9kQmr&fWl#`I+t%=fEe9P39iQ@iro^gIfM zrW6tlgvQAAoOM>`4|v_Ip1N1G+MW==CC|1hOZnU`xJ#co&Xh*0DAEE{NX^Z`mX-z3 zfUb7S7749vZ>|EAtU{)hC5@TdhFB(e0Zj~k`SSSj~rRw)iV7F9Cf5^yC@zMK9#}3^FdXxK#Z~2Q@A7wh9 zmx@g)oy|MN(Y^3%GO&Bm`9 zm1U&_e7q`<1si^a69Y-r-BR4e{PsnA*SX_!OgpPOag(mVNSx_Dy~*Z$$pFRy>d3Pn zD>)m}oRyWeySrOl%$H;-&kv(L+IZmc=_O5aQsz&bYpnZiuqNSi5cri`xw;cb*-mP& z{7JcGFFxMyN_#dCOn(e$1o=lj5H3fIU%}o?&g3OPf3IH285l6`>=ebHM@Chh)*ouX zKMWr)4gLfHCY1sXcTcwh&HTPi37T}i6A-J}HEmjL5tc8wa1h)9aD`L#rT4C)AR(3v zIW!jWsnlJ%-NHEuD)zk~K{}oQ$WMGJ%-dr*b`Tv_c35k*+AE$%9?(Ry)_b|AwUivv z-#8!tSZ3w;ZqMg4wsxPM9J%(>{<~$+t6Mw~o1@pZU-7E!73Hl2xT{Ml?L60rR33>? z_N)?6{~EA-)zs_SQhI(x#N5eof!tfqejE~=-9CW1fM-$uILLhZn0BYVz?UHH2e;%u zQSYMbhBj6}()`CDxBzL2p#dF2th0cq4A%$1kJyh94eAeckhq4vI`^4J_^-L4{#fbo zp2BE>+d;jI?;cO2=!yVMFb1#cJo0iG^fnB|vOt8fq<1bnr2T&)!n~*gBFx?X|F1-t z-Mz)pV3(7Gptfb{liae2y9~ua2lY4I-}h(fc)D|;EQ0~+M zzM{er$o>2SSWGnB{`LltQ>P9_0T}t$gRc0u?4j8oe3)WcR>I$-Rtb;Ox*I8WeyF4e zi+C<(m+uMrDaqz#7w>JYHgC_A+VaTGbyH|y^=S^RseX3x_k8JHlc$&)`TCqoMc(O} zyZ~>#_m3D3C7y48uhXxfc5n2Cd*L&6_rJ+6T#hI`V5-QX)8nYT7LmDgVCk1{8a)pl z?K42VQwz@*_)9o?)cLsj`E9|TaKp&xFK_|If#Y4NLzgtUz7?Uab=z-Ac;@?hdt%z_ zx0js1muv0q=66q;8C|v*`BoWu|Ni~sDY(3x*vYV~Oe~)-9P#cfJv(l~P;pC%q)=qY z==)gJRV%9GsuPKM2l$-F<9|ElK70FCPX@CtGwh+1C&{LYy%-dfmzI$MLVK~(^DPe< zfv$K4ONGW%?KqqWQ8?v&NVZzLh3)dW8>Xg9{!NlBH_^6L?Mq?IVTV@h=Bev&mlKAq zHU7OR{BX7Y8wMS;UnQ34wHJywjv(znHHm5)AY-@QiQ`4fd?=RfU00(1O?FeYDR^sQYfG#8 zj)kxxRJ_A)dWsENMeI>_V-$}UPN0^!&{geatK>!(4|lZYIP~s4y}r9N_Ki^Yl&PtW zF)LTtjm1OekEC-tc6hd4{(bO`GxDhm19SOXjz+f^SlHO(p;8ARz-ZQN-fy%bya_MJj*p%&vZ-8|JAR=aFjY?-s2R8a3V;`b+*5@a;D4IEOTVk+(;^S3Ct?j=k*dYq96wp zUMHIa$)0(2@D>9V*(h({GeuYmCD7J12L7$~O=r5Ob7impG%V}(Se(6%O|*K~S+!`( z0`miVfl|SSbB$HH9|G7a4xWj)78Z?>RAok9goPR@?6N_Qe<;MxV_ezvw4laeWMtI( zTzdJFobX5Pdg?!wg(ETZiE)g)zP>-Yp|n5}Y*r~H@O^xR1Ex~`za6(4&z~WgyOwv< za(qD76}^2XrRj#${c7O-ZA=_d!_uX5Cl*;YDwe_H0cT#Xg}WJ{K7Gx->L5D)E@!zH z5JZ6N(m)X|f9c}IFJoi7OZ7jIarWMBAm30VCnqMFrVDd_CK3BOsla@^p`%mobv0XH zjhAF;d3ms|Tv$IadKSHWS(0(b%yyfq41SheSLp_$EDlHus;H=V`O?-!@@SuktZeg( z7i&`xzX@&K1I#iKo`%6N$m){4udrqaV|9!NN5iDPoQ@c4Y<5^!8uu{N8%b8C4j#muxhX!O`?<*$-x6yK7H*bQi%Y^<#p zj`vIC7Nh=6`E9gV1yg+F^>&t%oR*C30e3e>=&r|HsIG@1HCgRv|Cs zt<4UqKou*;!;QW;^6j|X{J`6kz13A+!&4X%D^=`mRV+$97I#M&{<+}?`KhX=N~y~2 z%=7>LmvQQ`Q}C4CRfoS6&;B{;^W}@wn*+1dGq^kOzW`Z*Z)T=r$jIEe*Uj#@>$mYa zndB>>q_i;*j?_aXFJyJ6moed?gZ2DD)OYIf-P`K|)K+&D$tJIxngaHeM3#bNu4YSr zV@Zqi+F1Fc;oW4oIhA_h08qsTmSndUUOzz&g>N6y146Qn}U)v z8Xp`Tl#L~$V~Fhz#h)`Md*A$y@1n5(?%c9KHOf46$8Te0Oe?qw{ye>c~~7}ijYQb--_(&CUD zZNlzuqf1B0?yp7aAc9tXv$Ec_exFWNRgHjN%2L4TmLET4^@`)gf|6>*O3ZjOTrsfH)Pb*##i|7toK=ZK&t3IRlGbT%xAbUM4>Ev1JL8WR{;8ab-W<35+*~Zl z`If+uQ2Uk;Rd!s=xRWNHLssqhg#(n(x`Hq-cOFjKMw4uld z+Wo*`10IuTJom_ko`$S5^>cO8v5$swx>lwvJLt_gG6SpKn3L{F=j+jP=_T=(J1V3_ z4ZZYMX2XoE4&5sj{?3h`mkfPWslGL5pk|Pd_ALlXlKzrk5`T&ZL)doKg(Co^5v(i} z$}#WP*N3kD@{$GG07L!^tUihmUQJkI~YQrL}B+tOo+b(eqs8}c|C%!?DHb{`e?;G zg)2!J5i9sN$5rL|F4b_)sVbdL&tKy)TtYRzIDoP}MaRHm|0_cM=S&q&d^XYFo*dO9 z&5ahtrYKs8XlkjQRmTR_z~8acF{I%Ib$sP+jj#}}Ssy-)Q) zDAYx6bPnt-sGP$QkP-+R%vpszQoL==!k-V^jM^k9IXFTTE(ZPqrOgNz4wb_Xe~pqe z`G2}~Bq&s$67d5M74>ptgf0tZu->XU0}+U=Zq18|qc!z7=U0=<51+#glqA;}K)n+B z+`F>e8KP2-p+3ZoCxshdAx${p^<32`jj5MbagC|Va_H>V?n^I!1>tK1@LsOQyJ%v} zGC$8n9n8xZ!<9enV6RO4C^RB5Sm@wKBYq6k=-%u0?Wo~RYw>a3!s*vt0T)LEk|>{e z81^6c+W0aeVg5n7OTs+UksF`LLNGDiC2XcuU^!{I9NxH;6B?Z`$v6D2iK-p!Jr}Qu z)AXv=F$#U4IX!)_p@(ZRNUHB*HQP_ z?^^El{Nl6IZ)yFu{kfY#BJ|^y7>rx903zoS@e8#K(70MHz6}+q_S2j$P8^bkA}w zKJ#aq>z4;_t|nEreY@t~sg><(KJ79u4|AuM$n%=_J0-(er*i&{xukq&{|CdI#TyF0 zrWBdhIj&e#o3?W99@l*3{#7Sux_^0Z@I94}%)K7@&8}4V4RG}H>-skuz;nus#FqrO+An`{ zUiROUDeEh;gpO_wK6mj^^4ANSMDEV3vW`|iUAFi`#X6C7#|kE_aN4;rfT(@NI~=Y*DF4WoL(Kw{Y)m`y+ZG%){7-^Z%dEbyOmT3uH%$<4Y+jH>+rwM zom)OW2%oT4!rt{+=VtGfZ@qZwbEp-ntkRz`D@XD6 z&G~a-b{^ee7^*A1y~#yX)<5x(4JXjTf(W)`Td5zd&O2o9=f&oxm+^el$g{Bt+P3@A zt1DUgwG;oG4bgVI{@1y!J^T`1s&)0!veR>Ca`C->zfSv)bNbX&cSeIvk4`Gg)!H;k zV}|vloy!mV#CVn#OnG!u^LfU@W42p$j_&BrVm_L;B>HpNhN4}K$?l7~Ru)S3lv;ay z{o4P~qHi9j?8g(+ik82a$NlQwzN5YDyxs@%byr{RJ-KOZ_I9;BXJyJg+|5GNyZ(kA zR6bi7b5Ux&hHd!X3HA$9;w`7$_S#l>_nY*2&8PC_cX$rG2)Z2ck4MBuG0b9e#Ujh# ziIQjTikQqPK2&zt?62-!k=b`Q+U9tK1m5UC(Ww*efFyKtBi-PewVs-J2t)DcvsolZO1X$>uV9X{j*JkUSy}v@Tlt(|K(zd9#_3nXy2`fIGxKZxBFTmsoBZK?0OIvhi z%rrDnSyi+4W8QPOCz;W2fATV{SpOR=Zlkiw>1n=O_}^+y1_OoI^@2%h#{?#>_;qof y#{Z^bh69DkX~zm+%>v}Qh3LxY^Q)`>nV+_01Xk>Sy$;yCVDNPHb6Mw<&;$T74wJ+H literal 0 HcmV?d00001 diff --git a/docs/source/multimodal/text2img/images/imagen_arch.png b/docs/source/multimodal/text2img/images/imagen_arch.png new file mode 100644 index 0000000000000000000000000000000000000000..530e76946ebfead107861f57599c74d01fcd6309 GIT binary patch literal 492447 zcmeFZWm}uu);1j6-Mytiad&su7I!J`MT@&jDHL}o65QRbKyfDoOK~qQftRke_rBjB z@r-;(t{g{ZGUxDfoVlXaRpl^HNl^g+0EU9R^alU{5gq_Qs76M7eW&740r&L@-c3?L z6ZuvAkUvGeUX!@X=(>M&wsQ9}`)moYc5-&KWO1|jY-#D_X5;LB2G=7B0K5SxNK0sX zXCH&b+=~|5`JT_Zj;bvKzJFnIV{&uiS{v*9F7Ul=;j5iIPci9-@bbd)gxR*(*BAYo zWkG#viN$Uy> zh7?&CR<<_9LcF|t{4Bgmy%Lg(ZoO}Bg#0ddd5n7kLY=#PW4)*nTzxnU@*1!4dNn5696?xLJUUznaB#A+^r{o4$kxpz$<1668N z{#TO;7h5Oxe$P9;+0OL=Sii+6+hPCX?ScAGuFhK#>BG0UzI~5-a^5Etg?}stnH=pbBMG9^n$bKXX||PbwB*1O%airweQur%&>Kx~1^s9}+r%LbQL_w7BVx zzBN8^bgZQl$?g=AURF&iAsO>IH()__YR&lADdzC@w0M7))()WVzvjY(RVGbJL48^x zKWD!DUPoPXzXW0Y^X9#%@v-BE94%kTOC5^XVLt5iHP+ViB@(%?`wvkDBK`m&K_}Uf z+Th)-fyJ0bw^`I_J0d{)c^2Vubx8ws#}|=O3BLY3)rqqvm{9EY{h3x?`hqMqC4y+GHP6GdZck|8+nWiR_kt z8Wwf-0GxhiSoeP>+Sd-EpU^nM%v2zffwDb8ZhIgtCV@lFfiDy%Uq0Rx-njrHrx$q zB!&npusR{pU&~@-k~IDvuuJ+Lec{hEy2YT%qng*cndE;0{XXWE81D={EKMNDg!ouY z1pVtOPXAtoaUEDnNLd{gz!_F$v9=P&z9)A#+lbwWHm`1ahw|r%$nWuUplDsN<8jv= zS->RcV6N!1W79=gFJZHnXKh1Aq)vB(Y>*Y1P{&m5E$7{DxZ;jxIE$xb*m3{pE-=OM z@v-^siYTVBU$}GN6NU_-#aF5Fq!zN;oR-#0zuBQ7tKq!>G~4&m4Fqpb$jzPcKz<*Z zYr7g^tsh1G4B7%*A83Rh+e207CAl2lVPJWC^~PQ|E`J_(>wj3(n7&!3@BVYX}!=ikisJB4EQw?We%z+%8G-FT^`yho^5d zsZP&%gJa*p8>6kr@-~Nq_e>z59{=!Ad!)B4EeJcwROj4vp`kU;5zajU!PdgesjENJ zH$ME&>~M@qr8TtaCDCFE#qVF9L)b)>MPh1o^68mqs$pd`&YfTGN!E=o^Ec<{C3Tui z6IoGegpm0}oz=0Z$VB`7xFvcL`F(;HtaCPobS~a9Bb1E`DI-rW1GlOjM?yss?6l-T z2NYnl&k0`LiLI^4@pw2hnd#VX{*B+O{21+`9x7%=Z%{zQrSQ!j`*U*(DHLHB1WO-p z#i--lhy%h}58^_5^@z821>>ECoSx{5^ym`5pCiu^nJzTEz#|}s>=OE6SIcV~!D1Qb#@}nCycHOYk zSM)c=$&kSU3~Ane8j)1`$ca}Ao*7tCczq8B2s5|d5rA6U1~lGe9{)7KpiT8U<6mep1WXLwg31)xMEiTK!?s4zE zKnhgkR~JvqKB|9{3nLrAz`)Iawnw;XDZ~;{+p%KBV^USg#&&nbD5I|1Gtzr_;pd4HBXigd$?^6pZq7%CS4va7gNi&V#*m{ zhmvmyw2ZrIWFh9wxtTi(Ty68LmW&0?i6dEdKDb7d*?e{tTZ~`i3QPEw*@_r4qEd2( zHFNOm`m-okn(%W<_#+r&oT87AwmX`MGwHOkmG*%Hj%2gHQXS1FEIvvXPtu|=&uAJk zFA;4tN$b2(*W_?8$<&}eQI(tJ{?2`(=nB4x7*mz1JaEIupE)sIqtfET-I%o+$+e4u z@cj@0r)V)2SgTIHRTU1b?vTmD)Jbvr9s8ra5Y)|WP|!#^P**~{9dWsbo|KgksrW~g zS;cv|+Ech?f>Dju7GXO*lRk)FftLFFr=<50oS=w^G{Z|0PTeU@tr6=B@m7bqO5UTENop?8S>)6y;(y_Kq!NTpk$HEZNt2H}M2hsr9i^Qt3fB;o zxZ+tW_=ZQboSQ`}pLn~)dO+P{hH!%zQJh<@&|x&7>Kk^MTQr3Vz^IcyPoY&)kES{y z129oomEvfEzJSd(Ne5Hj{{eHkXnC-sa{N70$nOtIAT4+x z)9B7*yRE@aG<XsGC}Y-(;lHG8$DTE?-2b1X$g8d?0@R z%^y5ir)ND|uL*;0Tl}%gsCg($bhl*EB>r?vh(T+Fs)b3@Ih#L|o~Ds*B#nYasGTu7 zXXW?YC%hK&mk){{DjIu_9xk%MNvXONiO;{HOAp-#F3w+8Fz!t-n6gxmx!;+% zBNW<2=%&&}crr*FNh^KZ9LIx&3vc{IixTXrh3h(uGWPn)Ah9FM;(yi046yj59Ks}9 z8konQEIxAuhHKGk0cS9EIup+8X|c|*vpY7YN}I&$W^3dtUKV%JW4b#^=6~ZRhR=wT z^)gd$QiYqTQAGa4!a*SO%Zj%rU?HPD%UnX7%U-#;s?5AK=~F|J4a#tz#z!ac{1P3! z9mAZwpPL|wsnQWpsq;vaG=e&Yf~D22HN{)_^wl%RlWLSug2NFszqKYm5U0K~=9XW6 zHKT6Jr~_4m-4Zz4aa`Dz(~7MNp0&K4K8T@y3AHR_Hr=SSsszs^lc9O;Ya@B(lC7XQ zX)3p11=WHjCo(Ccb}N6GUegc-QVJg=LB)r1-BaKbLGkrs%+Bb(lS<4^W?H3XBY z@DWw;#qb`Ig>CBl#rMG*0*M{6gh(Gjj`~eZ7@Q>PM!HdW zEuxO^#;C0=gM$krN3G-w$v1}EG`I#C`Y*h}haRiP`g15SGMeDj#b03W@9t98vk6Lg z@0Mucr3G_21)2%98=2x~1OhcAO{*?XCpg~zr`f;KeGQs1)Y{xs)afY546$$)d`x4x zZThnbI_OK5Nq5yOsb*1io0bB>Gz0D5qq&fg8Ji6~`m7hZN3N$s-9>-;sz~NjB4kuE zTQho#q+t^ym@?~;zpMTMQ3+Yi3@lHW3x1%5DR$k&k7*rcVY|;(Ko(c7p{>&XNE+X# z$+0_6P#a?v18NOuPZQ>r38r?c_wHl#q>CEV@15872pRv?~6#Jyd9rDNL^I8t@L|CD>=h~}sQSrw(@Kqh+FhCK(~^hKja z=C3QvUC$8F$tp!xDtxwlzRN%@o1!=hy|u9BEOFwBSh8!dS9uSwc|o{Ns5KR5Tecq3 zQK~JEeD`tAmBk^l{Bk)Ytk-7`oCT2zkNy{Hp)7CUmjv?5cY@n)n{)X#$vE@{g4$!* zg(c3R&myA5o;GJpPj{C(;%CIp4rE(*<%6W1o)LM*ag;E=2{gy~umEtd$567_Ei9FG zMKEg+HRg;Hh3WMk-R-o~5IvyR-HeSzF1(QW%aO3eaydXtWb@5NXg>Z`;IWN91HO;VLa)D8!!aXorRVu&lR@#bI}u zne$Un0CQA&elUmgBn4kOZQ|0vN6(vOQ2nMLV__?7 z4E%*F+9T53$?V8zty&kx%|WVL)Xy4h9m33b1nbZl{_04qyWHwomf-=dNCZ|#3du#OVo`c?LnF z&7DhvqZzdsRir+X1$Op4#kO^sRar~`{WRtPZ7?53vk3Y%(_ugR(f5*i96n>O3&gFk zo~_b7@t5CZx}Ioy82O4bc0bMo0>1c0|1WL)+X=Ro46So z3a=mSkc4v0gPQe^VQo42k;J)7*LLqV=Iiuy zk}u^nwqet}ynMeRAK`~4^f+!fcOV;LoV3FKfu9+D8?c;*x_Q$9aRaDRM}G)Inks#0 z3}r9hq9Hq-s}kvaxX|fr@05HYr-ERc3EJgMkY5~uLJFCcaACwnM&VLuV?=Ac7*xCM z_tj@8Xp)`g@)8Rsnkl>!Vm^Jv5$Br-CPJI*@Pfch(kdfk|J%chNOpNf6~oWJ#%GUu zmyGsIL(Ldqy!Ph=A`>tjJf?&F!*5h+fTae2552Sk$O|2C@GJYJ9%muAgy4m)8v*;f zh)HKpDptvop?#qYRVRWsjj`TbUFx0qpQEJ~MMmM`DyVZ(n31kT4669b&6b~a{$op5 z1zRmky@ZQVoy+n)0^1qXGdT++ZQ>||;q4Pr1{kA}EcIYlt-M~v5sZnw*5=8Q^<`5cQ3{`aI^jduV{*`o zOhb9X+jKfWu?jXK;sIb@;KldDe%^m)7*P?4HUXC?MJ?1X`%R1j1t&n9hS{u7hDn!pSL*I|C#yNtQG@;Oo@Y7K{$e4Gi26L1`L`#;l&4-B}YKPs1bjk1RBMHFBr!g z8hJNUqMH-efx)N4WnIVzHDX&J9i(aJv^+oY&<<-v>m*!-BRJMPiOG6&0>FfQuK^_H zI?{X8s7P%t`&*S2DBVfQ;$s|xN)jL_>@RGQyXkE*| z*a;M2t%4x=hgVQ%B+*$`>%9&e5lPJRLTq9UO$6)-qRzu@DRqjL)^?_O>UYt*(qCVl z>oHCjFpUQtx#Kj12<82VoTs#HlVHKeo-AH3ymLBJPlyf(06X`>PhZi6)UZ8N;QO?6 z(8OF`Ghg3ReJfph&){&v1Pgf>#b7Qb0#CaNQBC4x#E2=C^%)#_3FCL7QGGwFUJ83OY9(*J>-I(>ptL zfJY3yXvab-;jGGjQM%Iiw-YtSouno=4!!MtYt#mZpD?ckk6urXkkkam$ScWRTfX*c z%Ej*wG8dA;<6SpJKP8X+g3JKQx$7vaW8z+7&DZ8Rf^Q5W*_3(PsSIWD%ep2ftv0O# zjvnu%OZ~HHIX(;ZhFJTE1}%|`0(_`~`S=AEZR37-CPLU{ZTU`% z)qLed9eUymd1vBYc=hjoRw|YkP^ZFWcmMds*H_!uwRTe{oy;wx}6J#eHLd^^eV;l%yEG z!lM#75y;L0&eJ!sgqSCz1U-5c!MA+5d^Rh?rDx(lXB28y&k}0kT=%WjNcV= z>^JrdkE53>{u_CqZ?37u;_ibTnd(3&Cg3Md`PG)6 z$PRbQ$9cp2${2%Rq648vSWg!=$Q_8J z6Qh^#Hz5E*Kz?kQM^r5aKZcj9qmGoP(kKG1c7LED>*{Z>T&`Xm$ zNMhhN)$6K(FW1(7FLr_P)*#Vb-f_7p>KUWEMWYmTw!Hq+|74Dtcdo(9a|K6P-U3Sj zQMj*}L`2m;yHQ`>9?saM_VcF02-;>`5uCvG;)sFqo4VMVZ^XHxPj!d+TmF%A@xRP# z;c5g>l&v)ypS~b<;pead%3LtX__FN%K*82(Az52nGvAuiF{YJA?3x|0Ch6I}MV^yU zrLB3)j2swX1uV&2?qJk>Ss!%cJX#Te9QnA)opeUcI1xhtAHMT$c8lgZr%_nr5?6QF zJZx-!8WlgJ2!!LK-{LDBInazcTy5KT-i&Ep{bb^J!pH8DB2L1G((5aEhX&1tK;sv3B0;&S{%=8Ql zo25`OVXEI6XpRdqkUs-qa*Ky^=-`b#*6quQJm73pmY``hsQ(3CvC`KaO@ zkL@}uTbfL*^07FFt_1Jesz~I{e^sJ%PL`Z!3$%K7nD2YP{hVc z%kS<>7`ZbzgJ#XZ5BaDYjDPCJ9(K&ZK8*f%1f>Hu)mW7nYZN3Fj%g9Cc%$FA-duP;eLWmKHP+g@P%X?Bz94iHNc4EL8|J$E=mjE1 zqofnAvog50SH{}*c=r23NN)c0v`-zf*EO;uCv&AH)wo*pLGcrKrF-L7-Io}tnrJ7z z!eqbRkz}sy9Fxt$C8?K$64?zwm6GeKF7AFY z4VP|)hF8-d`N--OE3#Q3LFsguOtNv>^g4y0NDrt8UphZg%+TA2VG?57i~C zO664TxUaTxW&4W%kb9i_%X$1T0)N_3T&?DOu7(%a6iV^SVWak-b2W|TeZVZSF-Iz( z0ZUp{Xe)g$#pQFDk=?ORy>sop^HtAL`fWD=Z7iP2fEw#~!g3Hq?S_40DGyxdeE(<{ z&iQg(m&)8IL(vjR{fhV%5h&pE(z>9(B4XqiA011goOp^wfT}qPHWgfsg0~-(Q3NkH zl#ZJD7z~`{y`01t71l z77j=ai+yHYQ8!r7`xtk@ncDd0sKV1yWi}%HV;~ztJ4*mp3ca1eK#L3?`9-DD9`Zn{ zwxCvnN8*Ifyyv({Okqq(*7hCCfuVq|j!sx8M7*vDVGkCkTqx9s!E2?oskochSG3OP z!q5O+g@8-G7|c%R==^27Bb9$EAL1-X32YV;7_rH9-_5K~J)N&B^EtVPRI1WW(_MS) zJ`c0fHh*V&#EYQ@T1D-^==CGOQvM`CF- z#PAe^2)OuQTsC92SzV%X>0qW*Q8MBWnkw8X6loPU5xP!q%sQBMnq0dT>-n(_bRvSi zb%ZzYMrLK^98n5oBoYy*UcF-S2W_Lq_GYRCgjWnj}-vx6t zI1UCm4Jg`4A+Sn(Xv94lD~jsNsJx;tUMtVnq_HIYn6vVXFfw3)&h`h z(to`yf={?c=Ntdn`6XS7XN&Gvcwl6I({Ou9Cw7h){xi%ft8Jhbw^&YfuOWI~;eAuE`G zE0+Lm3EyU4|1VIqY)gR{U<8naJouZ@S*!u;x6pU_J zCE0RwaU7ATqdQLbaJY^C{+jZJ2x?T{xe01opx8IbrXE*JQO_r#yYWJzm=+R}suCLj zMjpN@?0E$VUV=>f2%GSAo-810GOrbm8X6ee8OuGPA;d9eV$2fo^9&<9FINKW+w3Pq zRLWm#`(I~8B z)YJ-Zua1D~7cq0W&#|I<@27}u-N#2fLmTyayb=oO)l&&xcV9xb35S2#+YH6 z)t}WkJ5Oo_Y-Cos0sU`P@Jego4ZXicV}43#{wxV-y}QFKe86njabEBRtlQ z(0BfC4;JVHO+GUf!nvXuP@B?0_NDBJIADspznz85KSuF2M<#$JZlKTC(R1rK-RziV zC4nOwl*i7l`a^-j?g+0a9HQri`BoJSU(4kaQ)l+0BNmahz^hiP_SviancIo;dyco{ z4Gcor#G#_Qsl;7;%dnH{Z7v2Y=VnqohsIR?mW}(#ttoN;;CZ>Du-2W>7}~ zfMrf)8-0wE2bqKQoUJMd^)F`W9D;8!fLh1ic^;eNX}Plvs()A)UO=x9FOz42m-|3; z-Ri{N+buyt_iM3>r6npCUJ7szzl0xg?;-*H*zX^DqL^aY1&NfZ+jNFf6y2qL?r_p0 zT^eTm0T%t}g7k7AX6j`ir}6W6;%&#{Ys`BUuM=jc0wb6*CQYU4y3B0?Jn)~qyOFaV2-OSl z>j*1%%DyZm^COF@oQba2&Ba2C*mjE(x|g3;C~O7J7AS41Hr(+z@LB|u0z;jJy8|KM zWk=4>pZ#IDAwKjhaD>Vu#l(49 z27C58b1xM8(NGplkCfeO;DUCDX*fk{e4$=MHpJi6ICv9_oW$4qt7lT*rc(^bC8EU` zu8rcYaVGk|XXInvoCk?ymg+d@pT`IAjCIDU3Q6FO$bZAER}2R$eRl@=(|- z+Xi}{{X-rNQ1+E}>o-hK8gVlQ93UTtfSqV(<4Z8R`^?2^84zEivhS$ayU*cYcFxQP z1ECJ@=2tGr5}RUPP}K>=vD2Ap8pT6t@4qYJlY9oKGn0D;^o{ed8ZLh!d8_!r`vg@y z9teb4UsL?o9i;Q#h)TV3V4mH5EW=3whqycSAUN%2I1q)1bH3zz9w;r((Uomkznf+o8SpRcSJw?7NcMtc!%j3*g4FCJn(+PiV1&+Tq(<=+HCgcdn z-|YBVrU&nZsN$=`+5Mt(PT;~^0sggGG!lT_SMU5s_CNOj{jfjjA*qK5^parwl>fZa z%)95b@E|ZWnYQn7{esUW6gS`!z%@lF>WkEq(dQuJ-6V9kuqc66SA!Kmj|U&0h<3^d zV9(JnW7O2q{QlpSyZ_lwUW8Kcb$vVIuWq!nX)@ypta&h@!x5UFmwQaCDBx;FNUZOX zFR#y?x~z)WKW{?Kjk&5 zjBJ41-kpw5T}+Jdc&?a>N&nz#lxqCFVq-Qgi<+v8|?b`Qabhs;f)5}{HdbXUk-m1f7tiGFJ`_=ml9WC_cJdHg;31_B#jWm0XVuSbT8T$8AmB|G4*_iBYVL zQ3TWmcXpSd*@g1-uy?K=P+&!Rf&`%bF6RJ)2vUCCZ|e*%^H%@Rmy1*_=R^HfWZ9{Hi?h!W{)0a_l|>2#vLP?))< zWqVoVIEglob=is)Nvp=`=YT*@yT2atFt9FcAi<~Cq~FL_W%yD0Y_b1cxO%ruK@wS^p< zKnXPQ*3H8B|J2uwK`o^Ga4Ef-~05Ax?GG&Fgh??G7JO)VlFSI;_k7fXSgSU6En z!5e|klr=Xc1GfgtzV{*l-G+H$0V8Z;orB6Op1F|7O7T?B=>zwqw3oq}4N6n%0hPaD zCh_MYrf1&Cz%7fP{?;ao!`ZRv-U=Fg_4D-iFX9Hv>nlSI{#4!m9h7UU>qDpOZt&H0 zTkRrF&{(vmdpcSDKw)7z15n4Zzh_V{pV$E>tAXwDHEYDjk@dWZ78xv}M3>=$*x37W z+vaAk+J=T$Xm8~80hM4@LsNWC4u!BUWiRMXZjdnB3a+_YTmzpTHHdnIM#i+jb-!O7 z>5S_ye67|?=y`MFdj2GkdnPt;dLv>~)YBB&6}spB>_;`#Tw{{`8VjDun^V;IU<>{F zJvOqdnm}6$O^_iR*qZR;f+JV!MJlYsG5_On|E48Hi!<^we<}r6F|Evoq~?9dl@O7H z^~~iFrRvLXj5RB-dsqY1Gi+V?a37?|pErFich^*bn~7r2;_LPK#JhKGF};L0-47%^ zFMrWMxqmXmzabJlEUB3Y92>qwdn5S2aS+ao%C+mC_*mIepZ%EWo*xz!K&n)xrAVTV1$=3-izxC!MlF~J@v!?0oA7_n~J%M^=ITVZtTQlcdM9bSr*Gx-##=i{# zyQgaf3H~FL*3HA&%zpIdq08;JUPR}I`~$kmE(${(_m?O5w)(^_3cx;On_H#@;C^&} z?v=%0-Ac!*+VL|6mw=l%HM}e9<_P0zQY~pmU`#*UX?5A8jmnu1kyTYqgUpwh*(Z1o z!WyF=YI$aH;Z&`!`U?IVwpMA>HpzmIio~)JE$mlE`gSbc zTArbggvW$l01mc}i(@OxUU|I7P=aQTvR-gPLUg+tMYBE03r z#aUiI%@h9%oEi$M6SK{(YWpiWAwj{j$(%H{yu{KQ-_DpyLv?2WwJAA_zB=Eyri#bIH;P z_YjAshP#rX81_cf&@^{lh_0m9{9*)m15?(Q0_w(L{~ekp`IV#+t|qpk0Qbzg%C{}6V%KI(j7 zRA9m$nU{tcRA9s@r->EjuoAT=;OP9cwFmxWVHEAB#I1z~W|!8H+rv_+e3SG9Ok4%| z$$kAmZLcXNw|yWprdg;hMehG0aN_fjGdFF!6?qK)>VW zCD>gB-{@PH2z?;a$AlA{N zzoD-!62N7P`~0dB7L9^fvI)eaia`n_{l>QZb1jrt=8t|mhE)FkcH?ZD$9v1H!o!3x zaQU}!&CFf;pM|j-tnbGobC)9o@p}KuMUy5&@=~_lX<@PU6su3AWyI$Qxj7EdG_qY; z)imP$iftpaZHFzhFx1G*ajgWy15W;A#^cP7c&JRhf%7p7GXjKbNpXRpf-aS%SgnkJ z;ViUxs2QTs`@#9=&VZ?<79++-9C3o%4^k=7T*Ngov|CrfAO4}%DZdx!lx{D0*KQR9 z;VWK2x7zU9rnR^^9hO?Ljj=y~SYQU(NXXSs6RPk@=b_H7&dhVT9@VjY_<`{XyV~*4 z=0wY+&f4m7d9*o7nnF1hZ671()d%ei1=Hr+2CGGddk(i*#xkq9Z|oG-G<;R^gm!Os zHCnt#RB%x}J3m1=3FY)$9MsB%p7~qFa=X9BMx~2u z5YR4G$f?K!4;EX7XAq-<3#wT%p-@YKy^z3W)Ym>toQ|0)Ni^>HW1&jAI8_*BkQd!m zAH21!Yh7uCI=0@KA^oYnYIme)KIg)FVGj#Gsgk1&*HB}~60U9oOaA^4zpZe483|7}!=9eev9@4`$rmMMLk`u$S2kb$C-PKWp=6;SmFFK$mhS&* z$}}B&uCm0?SE~2_l$5EVM2cRm-b5&aErZzi-7H_q1Ehopo4J&W)`_m22hVw|BvIHd zg#6j=xxY3+9!E8Vyw)l-TZX2&p2nMJmCu9>X;q(z7GCiHzQkrprQ?Ig;M^Pb%zAz+ zL4w8*Y9~$*=NwFm)rK#@m5%#yJuc*p&XD>N);M^#jo;bF8dDb_3o z-N%~!VCE?Q$;CbG5p{+-uCh`}=Ck+%z~uzoJQ=-(-)SdsLgtV=ot(<;q^=;Wx5)WS z3lDZ>6(#ulNZHR_Tv+=6t9eJSC%uk4VId0dmfxQ*_X-%sLQWDYh=h_mfL4dCyIx_I zD8WD`6(ziZ9;=uljLrX1)=G?M_~Xz3bi!U<7n~g??DPLqcr(u$m03SK6x-zNWy%TS1xheu)=X(K|0A-46T3K^sD6LGD2YOC~jfWk8(P7EZ7*9*@VZDi0eb|<`&;Tw71*9*U+KdU? z>JwHA_&S1|h;;#}kLdUzx3PX}*Tm7Ta1>yrtfTQtJKNJE$gDPH-(&;oulv=b2vF9(Ef;0#YDp8vpm7j9SHbD@f*frCehQe_tosvODh3%KUt9{sl&F_sH3t@u zZ!p~d=BgL-#-y|V+2eaIQYnMdY_#H0QzIM1xQ>KYLwlrZ=Yph?&_VUZA%UB^jz!{D<+ef=KD|j&Pm}FP z6XQu}dtg8uja>oPF3M9Y2RQ2-OhhA_UhY%6%#HvO>;2-P&7K%U zl`gv#k=Eu2c2<}ad60mMixJ`_9c7@eSjvutdou51M%^+YzNG@^8S{`Sz4p&Y9oxv}T7{nXvD*aJ_}p(Y%F?^&_iT^s z@#)@DOL-B`Yb_=0Ys!|fSva=>nwKu|{Uqzf@-iIZu>uRk#rQ+)VG0S=V zUlkvPrMqUCS3Or2eUZ3v+;gD6&qyu$8%p;8J;Hs9vl!_;mcu_~pvS4xZc(mOf;

)pDQX@W zJ9gV z98E3ICK#rH($QB8=k_|IPB+-evq5cKB~VU12U%f&SovLgxC`{1VP($>(?71LHv95? z_9CgJ^|TZSU6RrXe^m)3i3*ufL(W^C0=%A{aVRbhMhqQN&TzG zRhl14v_)8A<$AUKtED|ffAXH@tziK>u-trm-hBg%aQ{W^BuB9%#fIItqAc)`j9M1G zRHSX(vq`?MO402|3pPEw17yt7`LwR(HV^FYZ^hhPWjoN3$XI;h9zJzWshX%ZGhpN= zx7`1H>)2S?hf=QW=k2VseER>(q~^vG-xw$E9pbWU09*IO^K#8p^J z4tGx!PqIkYgh7sI%>qyU3&Upa9^wUFPl%^mqZ69zUQx7(NuB*KQ|r)?X8(=b?!Y8`8SU=o1Lo?L<)G!pq>SQVlE!E~UzL6I$rLK_ z^rqKR*u$)LE=7cdoxuL1he)q*KDhDNTnj5zMa328`q*0srB(=0Ub_hqmyT%idrqU} zR{@Qbdh7U}?x!^|c|{tT(Fe=Xo7;N;y8QO-$ll|90K?eX((U7YrMXz{ z*Im09+jttjPFQBkL$Bco{KO@zJi`f|TJoc&ydmX88&m#w3V#saq1NoJRi>?7*Jj>M zV}@I)Ss9ln&2Jx7|I|M&GU%F_KK45Jyl7W&B_}&s%Ur!71{V9z(1pC z&}5uPf`%Nf)q~scF48?XQ7I>Yw_s3S)uQ!t!o1#H001EMrgj}V^oTjG-L9izanZT56?wcaiwK4Kl+|f3=u+wR^V3X7S)6Gz z>(_kPO;H}aE(&)V6@^w(o>F|R=`mS2AnqWDkF<^qCOI-`%~$t^M#j)`#@wD5^|O=5 zhsPXx;yE(D+0s2b_P)#53d2=nb78XHa)imcDUrGM#I+7}us?5MNN+nc0c>18GUO)LELZ@5RU%_6>Ow)b zDRA9%9zCx3mp7gL=ao2xM9d87WoN&F0IpPLV?Hrlp7m9q;O@F+(A4|A)Qk7}siO($ z#v5zhJ7^hq(`6)6+FmJB30d>%{b5jJNFuFDv+V#wGBR%^Q%^Ka#4j5`i~6?8(YVBn zq~J1?hbKD0Z#-4A(7sh4Q9Lro02V${zTg#JnO~;uJG~RFB|yRzQD$qub3pq6&0hj%&&7xe7w@JsS5``9$9! zZ?HO!9$~$7MxnQ<%Q>)cNgkCovnZqOhLK)Lk}$u7kr8|$L)s36#$^So`-4pf0{&R} zaa*R&+aCnr&9Cv|#=mDu<9&BAef%i>9F}qGG=`q(0=@oOn-M5OmP~;=lbm8Shz1ZJr`*=6(E$emnUb za6KZY(%~f^P+8;S*ytO`e6dP)gvOoS(x&};Saw3UxjjdzqbGheaoFl{j7VkjT8r0G zcJBJ@4;@LCy;3dP!gT4EWD*MY8JFq1qb1jT=VxEr(WNpDIz9&m=l27)pCOPtdY*z^2{ zE{%k1z`u)b^EzXFYd5(gvj-<`G`-KoA8b7LQ*#f9-!e zkrPTwjr}@L#tnXTZe7F0xZ%C$kL#y-e8haCUlwxwq;M36$tWE@5>I5+4X$z8H7N6v z@JXEYKc;?q>XN#QNgF0kR>R7Sa<236r{hd_`glH?k+g59VQYNR*aUu#B9U)$7q?oL zhQ_vl$f7cmzU0hyItNzzC%Ifda?qt6a=mqie8#~$qvDp5f-7O8lqHS?De#wX3 z?g0r&#f00T-`^sZ2!F5H^nnbH*||5YhyyP^EN+8{y{_ zARr*U6P2cv1nGn(q990-E>a>OO*%*?fFd9@^w4`R2`z*qgxmu>zx%%5A9tGp>v&xZW};*@6(LXJjbRZBqP)OxL0Qu(LPlX2r9ZJB0@D;4cKH1;pn^27 z#sEDq-#X9D`#=h1)LE;Nd~xhv;FP?$)ChELwj@;YyClB!k9l&v&cFk9i#KG+fMLSR zc08OFAE(v@shh<7PEo^txJF7z^yR%LHoUa^Dl35dXQkjP!m>TXf=Ch_*e?5A8w$3;a|qr+7@`CE2=Qh>^0WC?5s3-x zFxr=zxuw2%{3@2&!ptN^6AUls>uh$?kDTrhn*W_U)ad)jP%i7 zcQk^d*zek+CQRx@Mq%T?F^TBEEJcl_sL}H+1@xe%$jz%QBEJ_&5zp_b4KO?G%8qe8 z=z9!jz241cRc7l}g50CJ4^5tZ_4^VE-qm@j`#jkRo?(*G%wlzp;%)cgPGaL=jsoem zh5L{Jp(=n_6#=26ps4=NbRSRn(pv8AO?voY-_2oN2JHvianEWi@9^=vcrIuRVYTWb zn&NM^x*hJa!+-hGYh0vzN`*U2x2=D^V-c}K9dywHWBl9q{CXYy_bMu0%Pv0N=;-+q zpSpZMSE1gA+T3YYQ4}cj*1WzYGj#q`#_CWE*e=h$YyDPS-NCKmMn$-XkVs`L{ z?~0dQq}BQZ4%-4&j))krbBzxMVH9u>5>UOgc?yX!6nNZQ!G82)9d8Z#8!72(=nb3U-%!nABRru(N3I#>Ua8Igijzyiv>c(if;8d${)kMa8TU7Cj{A2LG znPllEk;Yc7-JlfA%#Ah_6h_~>lO@-arw_Al=jG=+D32c`H{z2k9FL1L zw2M9IKrze&@J7>vHSG&O=iOwmg-|~S4zp95mNcCFc>9B>??banM;&=M95>eBM_kzU zdhpHQ^ylkAiU4qe;Dg`Hx7Cs*p%4O|J4&wXO7|%WjU|t{&525892nqE?5qeZ{1+&$W1z-I>%Dh3 z>dyphQ3sU#`d@CD$vDrvy}cSp%{elYa5~RZD1;~9v~kp~W+%ICR-hnmdjF#EWCjV4 zMv@2OVD1z9>uG)!;Y58W@9V16p4XkCEXX9>}Sk(j>aEW9h%id zolr`|^W51crnQyF-kHgO{(N*;6@-p}oJ{oju~ZwC^w5Pg9GXlmQM`?3U3x8k{D|s| zXZ+`ioq8FMAjDs<{uzyjF153}?XT4<;B#*!8`Y zrfX{V1g?G06Hkb2)G;I!h>&;7y0fV1t8W|Zi6R%U`jtoT^l-indN{{?tXe7$la0Q> zlbo^y|07ei)pFgudbIGgBHek|1gsajy5*|O0S@9j73mUgK*)`XL=qvByjkn#QBFPf zS86wZ(>WzFZq>Q14=MZYnsnNpM|~=A)Q|c3+9>B}u7S=%9GaWldh_k|u~pBt;Orc6 za~YPkXFEytbmTgn?YSy1wQRfaN{_-l5x*SB$YNYs{92Vr$RpX*u_+Ieke?>COZGXC zn&G_0qmXy<#JHO4h-KenI@A9)c)V!3T9Z<12M+O#QBQ2rJDcUG6U9|nV9%b+HjtO4vS%T}vElI~ z!DOgG)CiJj5v+Eq0XKmqvbwBrMfy(8E%=+&TBi3Gj1@<7t6Q=@2Dm4`!MLodAbBqT z(`5{st10tGba=QGZAl~2k|!=YWr_jzD9{o@ZU#|&i)hM`VH#OcF;|llnXEC&$LJ;M z;oe)g6z}l|N$B)N_hm%(&V9YU(1O13>$R;I{cohlY@43m*%_hYDmzkp zeNI{y<%8~@m=NMNak8TA^S%4|DHk0r3I05<3C&^tejYZPMfbkSTkWljtMDW$1x86% zAa;MR7cBUlAh>DqEr6r7g0QrJngTP0S$no+|E2J(OlUlIS5nwU4no2jrHnl=8N{h{ zb};_v+?&|H;-@h9?0ZFc|7r#u`A5dCSxW7jI;F39cT)52@P$C?e-%>WJXdNTw6q(k z+Ui95?+7&>n6PIj%Vvi{cYTXM9%6=y}pEZ zF0ZpQ7-AD58*EmGVwqN9QNi@J_x*l+Zv^S;5;i8vk}ExPF%4QD5-d_Ie6qw7=o*dn z?bUbO)s~TLi$ZFgIr|vQAUFIg4V5~VDeNd>%{#^ZA1V+345dEpI;K&`UPoU{ns;+Q zzeY@3I(wZgBcj_vb~r2s$>=5}%S;u=dlB|(-@Ilo;=L+SC^ zL!o7nTE<_$yjhzKLDbgeXBb(EthGxD$biAUi@k+aK7%;^<=-uvFO`eK*(~gOt^~?X$auLEpS@gaE=%*S`92t@gmRa~>Lm8%IV$t@usDsOb={yj z=o*jh--JPHHSGrT;R~JV$(}Mp2KQl0AHp8^{VpVFB-Nq`~Lg zAu!0=U}ja#QnyWM2JY0(Ogy$Ma+8vT)*nBttvCu2Pw+o4Z zfH`O=3ALjp^Dt9G>52}|or?-EWGkIj(>LNSO_NutSP*#ihf&&6dy2(I(>thrkY3-c z-LATpiOiw4cWP}_y2Y{(i2OB+fv-~?o_F*rmIo0lvO-zWRh=?MX04jA6XrF80F1VZUe%*DRcQw7{b}@?Rs1wy_`;+`l)sx6>x$d$SNXbz#pX-!k zc{s75yQe7ecEpm{hxhyhn~T$kA_W>vgGTbGaqeq%;Qh58;R+OArwk1J4&=Y>Yuu0X zH%%aBYk^>X&0C5hA&{EM!Y7;5Np263*2zYbW0QH%_iJ1MOdwo-#|8uQ049F#&?Lv7 zJf)oW_Ek-}Z+BR;&FUW?ry?Xy2(eSlLpmV0ujj>X8qxax&;xH`S7w2I3jy_`pWii! z^@KWVIl`KJ)f~c-1gBnaam>^o`zk3H6PA$XRhgSPgz#jC-U^CMHrUtUy9+ z#Z3SP&4E}rmO=9~X03G^$L5BusF!|IsMRhJv_Qr_jVZ`$*m+5TA!frpdFo?=&g}eQ zI7_i%^`n4G&dky`$)g$J=qfOy>&}wI%I{{D16hf=9rB1McX2b>%ng?Dy_qj!!DTgV zo%yf1r02@7)0TQ?GIg;%_pSa%6H9#=`Iai$w))VLP z7iGLx6|Qcx8pN;Bp7==yR@n^`F$gd-jj60*G%Yc9bi40l&}Kh9r;og4wLuM8dxmsS z)_n36R{q@x=kv2zFXnpfqGGcBT6k8YUrnWxG^6y2VXjSSl4qxkJaRY1?R;FoDEP2jklMWydE8>)KAeEZZW94ci7$kk=Qo)Dv{_p6{)D zYQT0DA9!L#pX(VV&pS&m>^}J#tuLnOdEk&;U^jI?22hKk=LyeUL+JcQ(=$!-a_mGt z3uq>|e}6DB*jC5veNZxx6@-#;I~3+NNk|LG%m^a>=+xHKF9rj)r90|(va@GT>bNZv zNzOt?jjjeTX$Y3d$`|{2^9Z+lSv4?S)yupBmOI4j$3pP7lR1!rg6OQ4EMpez^+tLD z9d~w;W38_9&aRuy;yAq2t={s(^g?fTOTy&FAno|~Q1O_8Fn=ujYcTE+4Lf4z<(t0r zcehvnSiApo4fj-8L0eR;r?xGN-J!*>D#!G)D>%9I^0QWJ6?R9cEp@qPF9J)s9SpN1+mnW%oA0Ban zVo2Jc(vHdMb@-v-F49iR^lPqWwr}?>ZU5@38phZB33gM-H2J>F)!!f+9vvh@i4iIH zvJ8Id3JVoJUi%kqydi+?dLc+TwbU)++KLTtM|X38#7Ir&kl6jt33y;{5@<6!DmN%7 z+>Pqo=Cg;+on`WSZsAobkOM1wus^S&XFk?Fj@)(-($?@$qEX3??F#G0?i5eHjF_w)2B{N1I0 z*2C)i21kP_*3M$i;0(0<^edh%JGx(8*!*$Ff_^}*r=|Jlhm=D7s#QSc6XnKGkOR>Z zFdfkyg)C>9%A~PP2*_l0|D%`bvs!!a@+R*hN?LDPsM9d%*a?BW1JmEFxm57|V{6+L zJc(%J>_HBOA;sg*W6pxhv~nPY+6|5ddySStRUG!Kx_wM5gI4LE#J}qlJzGb;>%;R< zn;E!vvZsogKc6iMp3N;7^T{5mp6V!?PD&c96v~|F`jBvfwWc({kNoy)I6oCIG2%hA z&F4}PTLWj&!85;5qeXum_SQOE)~eS`y>h=9E+h3tRx|e3h6pqDeR&4g{Ox!(VD|m# zicbHe^tWfE9iYw!o7FP9$uN$1yg7TODxWj_%i5m-4Zy7mcEArN(a6~h95}vA%q0BO zsJZ!6HegoF>5U45(LVbLV%{=wUj}^fGUC-vkN=znXvy2e{(kjqWwC}hnY#psg=L3E0;-Y%(me9QRp8xnwkYp@~( z#jtvg9#O%NoSm`P-V!#XqPJ{c40T-{PEa#7__?=0W9alGMLGDQjNNLyqx(@{(|*l6 zw5!$d+y|}vj0J6t4k4IgVs?|r^2VRKRa6Jeu8;3-_>^x=E*x1mKe;l*946DyQmCq= zRa5M--k0n!E^I7w_&W>a_E|sK?B(xV*9hL0$r@M4#!**shdMJe5WD*eoJMwFHkmP9 z-Y}=jid|e=_(LOY)<+Gjk&ul%Y)xk#d+`s^XMKG*!>zbhkDRj=qOJ@)95sqJBuSP! z3Ta;yLQjHr6h`-s9eAmkfPVH zzB{BxZz@OD)=dD;Ns6`MC}gTDUVqb#_E{?o&Y>|iVaigWQFrnbEb~^Q9xg!xM~Z*A z>{f4oqGg>24C-W11+*t_?@wn!HnZ`ASdleoxrnmQS7Nb@4&dHS%%*J;o2IlT_44iE zaG}-qs%s+Z$ZOFwrb8S{E%90}&42C2v%Y9gW^`SbwG%qTiGv`D8b*FnN>8g%P}8b6>`$gKci_oy z*i5LQsx==2LxT+Z$TGV9vNZ46#B#$D{2fo<_%`}XQ(*fn*!6q>nhEzgk!b@xy;OBJFF)j3GdJ5gw|J>JC zC9w8`7CSdJnocCq7Dp_(uPl~L$=TWRLAswNVE>4p2jMXypt8m+mNBt&;omFUq>aLd z%;5!rx(~nRPqK9j8QNgn_+fg@mnEQ#d#QFBFdm2dR%&J8+44W*eC3OkR3p%GD`^N< z^B5p@G~Mv(^+0!O?ebbMyeIO>}VITLTQawr(ulNVI_`gfs*KZy3yq9`COn&x&- z0yd}*ANY8+IVJ`(+$i30pZ5G>?-7E-Ra?MXgHWDBquyZD=jJ3UzAT`QFz=O&wQ&cG zsb8&%j;t8h6MBdkaWGq+qY%-~61U-Sx{h0%7Um%L7Oo7@6qPl32)HUwmoueGaa*8d z{vd@yAkr&++=dUe8yw>NOTWE{Qr!-gJSjn<@}QI3|J1^na4%H!Hx1r5n!^weBGmtTXPGN8hFW zpzXXbVbV{}Yo=Z;l7p^C4fr+`0uDP;!@kcO_SU@}!Eg zEcO`g_3&%7O&3nS5Y2&14YBYV#NnOMXuO_X_yW|sY(Yr_GuWK)NEri!0Pj>JigQo0xL7NZvnF3DC&tD`h#v>hnLeb}Ao`fEjlOhSlVsvRs)NR} zLLJ@X8AK0r<{Dl2VX8S9vK(t`z)`VBOn#!1y+_Mm$3>;x=KTSG!_ReE2=ku+Py35- zlV6Gm3K1JJ(M=glY&_CSmZPH$*aY_6QrvMzX38L}iZaGp&?z|W&8xcUyr_WRg{lgh z=GHNmYIYrjZ9yB7fL@>ed};@0&Z9}K^F^DzK_D8?E1Nj){O_|?HmOpAPPn?ka!r|l z?2N8(y(<6KLTUwx0CJMi4q@0q<4dnQKqy|hu>02v?1Xs0Li~70=eExeVnm3!Yb=+B zZfQnmc8757=EZ_C# z#iV(4>)HZ_zf;;j&Ysdk<&)`VIGCHK!?rPC=vjhH`}Ls4-OH+U=ZieoXGt8j>FGh! z0r|PWc!GW^Kv=Vv76K}1%uFAF?yU(b^GuiA@9U*U3yW-$!_A_6?L`#t*k}+20tC95 zK7+I-y?Zl@Eq!MF@7Q#*`DPV>wGHkYRL{)Oj4us?1F~R=BqD<{Xty{a67E=CKDpQ% z0e5~K@V7ANXzkn|q_*+bS1S%UAVi25E~JLIN?bCtiNm8}{N)TGriOqgTVJCWg?6g( zH;uxiWGzP}>>buIEy@6KH6v_jSQk{lC7=M%`%A;ZpDr0l?7lDw0pD3|aon{a$OnXQ z)^rNvj4h|(pJDQS=|QMEHwVvkVlH8dxZRhqkE-0B;eaR?L8P?vVQ?`1sFR%QF*mEK zaEok6+y}gtrk0-pWPij?hoFq_K}o~mqA1kOmBM`XpL1^oKTlEyYIJuch267@=;^#U zeJ$kd`1N&hh?*kq<@kGaw@BmS$xi9 zC8@jn85B>iKQQ`qAZ{lh;ACPNpp6;>?@9f1O(LouZ+)I_pg#xQwZ2SYZf!B1Qu~5C zm~+)_r8d>*d3S2zaq+2mg3x(PTth|P*h_SpvK_`V1wP|z%f<1{~A3ulcct`TIm7F=Ha^M`QPK2m5~)HifXTdu)cguyksrwzYHXD=6FY02n_5>6;=0j|NE`;1%Snhp1~(( zKu+#H?>^pW{-%v~UQ-P?h>n0zIja#DMFaBc(6t&dZiKy6S_th) z&!(2P@4ta9bizy!y}pPz^3-!Iq5dDP{vQGU|Df8wS!Dt~Y?DonSmD5E%8o;+FdRH- z268CPqSg75rvIR7tk7$5eJZ`sq3@fG;L6@Qe?(;XomGt$cq777$+R^0_xDJiST;^I z!TVcr4V6CUlch+tDzVLMtk`@Eys>aNGev%1iHD#PN82hPwI{Jz<14Cm%+O7@j&-wf zQK|ojTS^^8zq0H5zPpfp;UXx4!vl$oz-XN;6+Jfa8?np!(T}bD0h#+^ z1$}*S{iF6j=RdWfiq9~+nTnFOK2yc=XX(!W@NjxY3P~spT}5GIl_=RbHD5c*nbDT` zzTsICKBx@7>c^(0!N#t4>-rEGSHa=nd~$B&tp@G!veX%0tiUm2aO0}ct!p}BGqK3^ z_}ezYb74`|KRO8+Y)|+3Z>!&>*LSr{NPH=2${bF67S9#oJk>XN}6 z{gxZ6%``Lf{KFKr$1hzLLpSz=W74c5Q#Sp{bV)1oB_@xR^ys+zarz>c^$r^%SnE$t zSvhat%@aFDDpKR<3l&{AJ!L`x(Sa?TdeOii0gZ%aenXK3fIO%@fy&v~V(ja;JR^j< z3(A!1M{k?1W3v1u>s6-=s``9pn3SI*f#J)`9s&!-LYZH6N<$lmlxL~_{%!R z3AjHnroT|%8)76~_)AjWP?eRk`#K^Fr^>Wn=yAH8GV_S#-aiy?7caI-T)tH7{{{Zc zJY-JV>6P%|-auGe@O=^fl82fjyA@ookW`zep&i+qfO+sdsS@q?gubvPSk7m!rMTj# zQPd|6gxb%Kw8u!#bZ2w(b>3S4#sS?C;)k&9{XUnBe+ zpUxj2rxaiu&E&yY4mEHfshMB9K8$Va*L{WA@UTO+xs1z!@~o_0+w54Le|PhPhiFCA z33Caye0~UQI$hOT&*Jh|lMLu8ettXqG5AsGtHvz~F;4wpxsnN=&MUgM zeJze{1J&!5oS4n$zA?(R#ttSF$fIfQq_7wm#%MKuZ>%N5@P{|CDpp}w6~;@x=3p4C zC@Tj{NN;f4eSm*AyLPYdoPc?Fn5r%W?99##^BedGs>bSU>V# zuQmY_`o3J7eEdRJl(bO1w-p%GiJ|A0k4c(tutKhdOjm>9q%Ycm&R~hh0`-MCjsE3_ z*xf4;cNqU!JK{nVwM2SUMp0hD((*X8ttH~P$K6PQA=p$dP@!GGzBpp9U*j|_rF-z& zu{6K61(xyuo}AKMm5QckQyU66de2(ddlCPWHT)n#p&zPJ?yAs}MXmYAXj-e7wdZHi z7jY}UoAZSj!Cq8-@kU9gm(AJIE8@^0!Y!rf(MpxzavGruMsM+}*d7i~Oi8Lf44mpL6d{ z@(Y@qn{9IyY#(8dc2&Bb)oDG!B`G>7G>m*>{df>wFVFDL2TdtPu?86XZEk4qZI$us zLt#j8#dJUAi}_g8IR>=>H!2q^+Z+A8^cTM#xz%ZgC(l|oMxW(3iV6gt9wWbrI-t};gVI*T;%PSFXrJEypB}5Y7F#v%^TMxLXZ3hP-F{u(*9%DcIkoO9-B!;sY#CEO}c)!nexH_`yN}K<_)pBn(v{*G)FsMpF95jZuT{i z1D(2_tk`rev)Q(O(b%Eakua#p3sO;!@FQCCS>w z_B%?}N8b0SWR=}X{%iK`6?Uibgi)M|^(#y8{WL+aP>t6&V5N0F4`{NIv=aqLt1tE7 zTSIigVV}HTX}9q~UFO~M4&Y^#ko)(b3(1p&{jHlH>SN-Do{(>H2tFB5_#`O5p*Q{a z=H}$bz|jGvFzo)1s#UOl->awFM>iuPmzlxS^F~cz;u!PL%ogfrjt%Eqj=nFqDvDie zN~lyh`_TzjlWd^^=0uoWyru7*mcyV0bPFB>qR zpa%x)n&25XwR1Q1FPixk7B5mWM$C#&h@Q>2>i#HE4s0h??8^sB^1Ker|(k9daCxDz@ z_^aqHM*z3z+Jh<9jdn4+zFPn3@G@#*K17yatlE;02Tk+cpa zljrJU7$XPK?{OW)1>eyL=jFx*Bnuj%OQ`z2Bcd(|FmOI2jQfmD-E9!5x5QV%O!qO- zZRG}LxQ$-a5#~iil=6?3jd;w$yASA5o1K`Qno1jNs||CRESp}gf(63CPcCb34@ezN zZ5#1U^-LqD?TUEjFJ5CVy*+vj5u+~iY*a<*XR5K=jfXrpS=&pZ#lkM>CEdxE3)XBt z^ZVSH-xOLQRJ;tZ&uM?#DblH(zdrQry-&rU_vCH*y_NeCt0P(8+xTTdl}ydv6q9c;_4D;H&*n&HJ@I}EHH@2tuDIg2@h&L{dB660~YNWLwRP`TzEu?UB zF>1P>2~wQuGhz|EO(*6NGth45{c;pz=PjRE;!4lWzVB&toTmMFVN>J6nHsD6Z29r_ zm#>UA@$#-1K3f%g!?2bar;=vvwtS@(d3Ih^+S!XMC%}`#^MbiSC?Z6=Y}e^>8=B45 zi)CqjCa!a0WHdtC{GXc12~)DqKP)oMRF>>z9L{#IA8W(`FGlz1YwDbxvARP%O?u z3J-f|L~nJ6sw|_@IP9p^b$t1*5mx}3eM7-3rf28X>z!NFI1oK`E4uZ%i|;vF>T{PGa`D~i0uUiji+v3<^{sienTRK_{hGsb7WFzkJ{)$Ra#6YIy(&f>CBfqDf()jm;>*!; z{z17hft)4nYT%&@_{p}F20VuohZcu)*QflnuX~5`!s`-ZQXLHl8rCzBoJSAz@{hK9 zVmK-DLD>?vBwh<5^9^sj!v+Vab!wUXnGZdlt8I2FNj+KbfqJBWH7mcjdt1G0HOMHW zzb@Cc;%hb8sz$dl+AKZ|6y;y83R(>0dp+9c%a;5559p;lzn0?1N2EFr?5nnbsnb}sN?~^d zta|pslUVE=y(6u>-cMrjxPUL4VDuoCWX~~Va#=Ne&q%EjF6(iGo4@})${?Ocz42zJDjt)+%Emv0E?>1T0M~)yfjXO zyZ>Ti@8I{(p2̯luNxUl@?RwJJqqVNSb?YS3+6J_)5COujdDT@o~e&OiRZggt% zCDeLcCzaKu_>Rk-jl>|i{VtT(Y)aX=Qw=DY%3MR~i}9J>eOruLUk5Q=@lHOdYb46j z9|)UOPQ8#y8J9pdby&*4gg(vh_lc9%T|4n!T+1bc)r*RdgyQN*mA zR4gBz{Hp4j>wGYdK&h%lC`TyqYT+}9IKr$AihZga$t5k(Ctd>NoT0JUw7b|z;$i7_ zk2?ahZAB!lq)lq{1PF396SeU@hxPPmoR4Qh;#A0Fra$>t=F#Avi7%AI!!#Q!mZ^Ch zhtz_0xDSRs=dh1bn-KlNu%>6X0e8)L>!oyUc5>|qI7jdtWd zo_XjYURc(1*7jZFho z?nfcEddwNlT1=n=ef_Ee?ja`vGd7QQmz2&HzB9Iz^HEL{|f?mFMXX2MdU6J?= z%a?6M)T5;iElkx=6d`0VHnE{~s&9GJ+cz8ulKN+3&fZhNds|v%Kk9Ceo%fc9rtYbd zr^`83@*h7=m@bpU{W3$OK)xTBjaEHNzxJcA1c%680{i41-~ag0oKJ%38bGCJv$d{L z?vz&)sZX@`|dnPPny)Y_5;V)HubLm|bdak*x(%5iKw=XOT-R;{0 zu9O{@qBa)WmD;3B$rm0d(p#Sz60$KTn){Z^dc4Q>AIyxU7K09QI%)_VE9{%k<*} zMExJYjNw=vwSzALe~2Y13@Adys(T&@!cBCiB%F{J=uxZFeghycH)%(dd{5zMUV-lI z-6dg*`B?bjZH!l&EM)2DDM24U9*#I>9hWvjhf%}Xw-wN%Vp*$8$4Q;e zB<}L%E77vU;z+q>5^lP&S9GTPU@%9%1$eA0jV(}W{G^r1pY|IIq@?_z3`hvni1BI2(1?fE}nt*B9j81F@15IGp#2eA@bZ)oW-;E#@)wIM0|0z-k~&o=02xLc zj=$98&O_<3F_Lb5qDwwbFlJ_E$`MvFDU2O1G&{GbUOq>|)fv@I1kjZM^qF8hB?$(| zcAZHN;XG@+6SX#Uc!(q)Zhnb5p@-wuft3%7LM0acbSN|VF z0)>VS-FNBb4{mr)`{A5YPD{)?Qh^WWn5X-_gMxx+WG?|qg$j5x@ndVfJBgh%b>|2~ zq7la~keWQ}p`~`h3lBp3`gAybw|+_bO}QdLo;0@H0A`MvqwKXDc2JL^tEZ<(%COLo z`6|^3MsG=uFU#Ykzm1z8!2DJ+J!fM5C>3j`W*c<-yJoI?Ck7omgBYVf(lqYg$hf(lXnY5YtF$K&SHF*WxWq( zL!jG^kFCXZkHz;vJjiE%s{x0AwmU@0?fjMLRyjpLEWO`~(&2*k(Sk%9h`3Q+_2R*; zGS-t7Ah`#5w1E`CAJG7*+A`gz!t)?G2ky`IR_qe$`fisva-s-W%6U&`Ap;cNKKPBQ zFpY@v+pt=fVn30A(Wl20R?oWXomLDx1YTM;%K=EEQ(F8-$7?5Sl)byvOwXWvg#tGn zK$b!N2QJgXRwQWxS?F!P`0nw7;pr^00CG_@9JGiQ?LIw$$GRL3h~#<2%jYyKMH4%H z2?KD;DE`pDyy+*1RaYPlD9{#c@crx8<%72WFtAwN341w&Qhp3!I^_<+A3phCZ3;(k z*02Paw(==r(udfn$#bGdb2423`WQ}o!e#=gkF9eQ5H2}xWopL*QW%fZA`S8lHlJUM zHWM+&T=@96JMplS-fPo{oP$jZ2A1cWQ}_@vb>G(HI_TIBj+X5}9(Ez06&k*-Fr|5sp5D^h^Ctb;hNjGp^%>!39UprN>C{4hRTQSyY9W4t`LaEyh2+i?3aSjUO`Qv;Wy zH6NYY0mnD3l~~x>%`p5E9*uh%jprO2aPs%=l}+hg9*sLzm8UJ5cJ{dRhxD7dKR|c| z5~P5SRd_)@A_4e`)>J)Rs*ih)KGQmclP^$MU80yG@=PbAs{}2%qqi{qA zo=VJB!MDPpASH_WT0ol3cD1+$a#wMtJIuji6C{Iy@zzUmVT>BuDyDXSV@VymMIN-v z6F+v%HmQ7i&mSr+jnqvVm-z6q6h^j-h0?I z#Z_ae#}%xdSA^*thi=8VNx<|tSyEc zE%u$h<#O`EvVW<~9`;MbAK}|la|SwWc?Md<>R<_*>R{T1bc_2T;k&MNV_`UaB6C}J z6xOxeYxhFnl(*<4NX#=@@^v8xWv3BIRCN01F#3Z;>{-x!#9hXY3m*$;{+AgPah&y! z`2N;}^&9Ts!wAtn3p#w!MYN{X_jyXZYaL89sKshm?d^x+M0)T#$fEhH&D)}v{!{tg zeucym)7Re|A}pN3J^uMv!1dopUA0j2m&m8e?6!Buc0byhEYLNvd;OVcpHBjLRo&|K ztMK0Eq=8K(N=!a(dHI}kNnoqsRa2d*>-1tiE-~?vsi0bkVdfG~mM^Sr@4idJn8@3T zrJr(lOSZ)087`YJ|966y4bufK1}-kPA>e{7<*i}vOHqZ0&*UFGA+^7p>QuPS7~ZDt zD*J?ez_h=2u60@crpdhFxwX{YrjC0mpUxbh&!aBwfm`hB&JCL)$cEz->{i$88dZW` zUa9f>?=0WKHn7Q1^31wTlz*t)5TDe7DsJo{yL751)xa?zmhnx97`W^&tE{Bzd}}^CCz!x$mx_XB=pG@1K)8OMfj&f|LC%Ev4h-%U>uQC@|Yz zDZ7jiG+HXEZ5HwC3)^} zM|jrN*tW8Ka(B)8P|z|8X_9rLv$Md}|DN$v4C84BDbKxe((Z3BDc3CX_|$o=Xyi-E zXR&)Hx>|!|6F38&S+vEc=M;LEPeQ|b7#DQB>N8P5b1`qb|jLjwX^CONuD3uvP;jvHlZ|DAWT>P-C#EzkJheCLPA zvh&r}oiTj~`P3}DcUH_*_G%Lg$|4>ktUR;b`og8eL~-tHHq93;A?B-6m~W})pH^Hd ze!Bcr^4`XW_ri%HS0#D>#`23OpI`1Y7#kA)5_r?MvBm9D--b|7Aupv#tnOzeVcz13 zP-^O2`c_BwJ9o{+FD1Tk*vN|$dcpta{h3y_N0etSvj$0CTc-U$|JD(0yO*F>2Ex6x zF?^2qGo6z1%EJ3>FvHm0$oDl=a;-oACFuN@5Ah)GL+_q1E1YM0evvkW8EiJ@&?H)* z52yEHw;JA1Eh*9awxW5?>+F189o5E!$QdYqcup+4jIh#Ot0ag?Pn9e0SR@xyn&erH zGvD)OUs8{Czw@e9x+kvT?vZfAp)b-!_g4R3z3Pv@RT&?@@A<#DddsLdx20P%G0>;TuPDXjuN@oj$ig`qImOxk6RiAICw*!)W>Wit zoKZ@)Qbj9h`+6Mkkq*Py6x|Rf!qy{DuCeiLz>l;S}0Yu)t}vltZd79km=`8ya%nsiW(TOWlinF?_{e+xlfJS$N)(+@D&49fr^k zXVwM`c1OyC`RWb-#I5kmbO+Gezu%fsym%O>4Rca1jVOS0F70oM-thl_oTQLxx6acF zlOi$`iposQ9dDenOBOA(&i?T$EKw3SoUD|@kd}3}1D;YVQ`VY`>l!EbpD|kZNe{T1 zcpZ!#i7-~bRE;RIE4>ABIh)ZqxEMtj>>(0!C>KZ`&SoSsRyI2sYR84FX^xc~Yc~7T zud3_#)`klnMtNmxQM|KuZDZEfe`YX4=_s77pK|hVEaOVvle_F8FA1aD{p>Sd)FKQz zLQ$ATu(72@yz9P(Jt_LpGj#xe^vI}c6dx1Z^MqBLO z3u%zCVR^2@bB;A8QsDSYE_c?m=-0Ah7Ubb>P}hxQc8XJNhUDY^Oe52bU(3h2eT{H) z3vqjeTAXGQPf>q})aX)3pP417O3PB9D&Wy~PZhG;uh~`5VJjf=7cBld%!wiXjn&lJ zFQY38ezmow=)ITFl^7F-e7dUxQv&t!6I_>hM$o13XSYY)j%BA+*CQ;>5p*Ue?Ce+n zdmA!}w#5JFBmX$S#u9uira-R%V0)JWe>D?V4C%sg|1R+bV|nD1tvK90%5} zve?e264omos@B487p02KAjimfRf53ZTp#}vZ=Wd45eF@8uc&2{tv5389F*4`rYh*$ zY#N+06)i2Xlc&+WD+rWq2>NF6BDIvosf(yjEVEa!tCA}d`8uG+q;IBSZ>Et&W)QAE zHgs3pr+E^VnJLj0^k&6C}O5TxB(#+}pqDSqId%C`(E?5&SQZ==jpU zk{yBJe*Pt^@I;p%SUJ+u2Dg81wEi*h3?Iu}Sth#Qj7|WysVQ=~5?M4G_33gUQdd)Q zB0QU{%o;^cvK)6Op*&w;yax5q5^q!ImiRFdC0`9Z@o> z(LLcRk7O^rl}T3`ZQU`R@skujn~VCEf6D;sj1CXsqw$k9r(z?A#-%Wx5W6Y66sn_g z6l%U~P^(gY{n+S>hfj^MOUJ%E;!Ky9*7s$KQo12LHPgN_dn$+{kr{Bu8VfS;$rkq( z_EtFpuV`ZYXpq9K>whEN+D+%ZuAs;=zv=MPGy908kHx5eOdk;iS>JBLFIY&sy>@^8 z<2NG6rih#56PU3v96=%`AqF{Y!M|&aI=1ta^aFu)OZZQxAYxn@vv* zQ7n!G!6rOpcjTt@KYziX(w$&KW;Jryuv&Es5!}2;&rCL#r|W-VDZk+OavpDu!Jc6l zt)XBCugP_tFkv}G-=}_uVqP8KlZBxdy@x6NQOx3Fu(e|>q2A2?%p@-mF+&s6WcLyq zO2%o5SkXjyZcw12RSG~Bx1}?T8q>J>y}-eJQ>CDR;3`sqtBjR`m2S?kq&ALOeQwv< zZ1AzvZOV3p{QIXNpTY7!s5T~fR3qlb)1b=$cvoz*b92qz;dzmXu!q}Uc)$yhq;iDZ zmT^>O;m<#x{iP3<{s$CndFdFopxz<`TLr)#CHW?iQ?zCdNF43Dk=%^LAIdgC+SI}L z1NC>^%*F=dLo^ca5e(;14O2slq0jYaw2Wqder>EGi?O(24&!rnm5w0e4A;j>n@+X- zbn}PNvVcaqBNPd-d*bSuNu;Pee@5d#Ii4{(zp}70c}dO~o`KuH(5#V$Cyz#MeTO2S zyUaC5s9J1nS18>$YJkumqP&j-fQ-ZMyW4>si7nFqguO4MpXvT*9N8?FO|22xnTR45 zzJ6rB=2aBPXnK9*sr$wX_uvQo>~_5DLldVQxh%j!(H8f=-q@&XLusa6JF52cUIkI| zMc@d$QX6z2AJ)}2E6&?Q{5SP<+m7H;qf(bbeM5A-%wDR>T&~s37P=eU-=hm~%$8=B_Ef37kd+0xY=p?gZCQ-7_xIDi>~0$F9=w z`-15XD!X6!jsb$qqq&<05CrNZJ8|f$(Ttdw$5*H~6hrVBG!N|xQ1YS5oD%kc9$LEG za26W6;)Uss6{j16o`Io5xI+V_9)l4BMRjb&!hZkfqC=}-oeO{-e}#8`DkKHbf6}S{ zKGB<2%lQ7fD(USS0?SM*L}WO7XaKQYE~j9g>jg!(f5*^@Lmy*Dzb^Desbr9e7@@AM z;Dp1(oE!5f$3Us1wXiSx9AdSSOh8!%Aq|6iGqJo@a!xo|UPWC%&X-pqaS=&sWWYQo z%@4DE#4Q}XWr_!ciW84Cs4X@u?B)VWMW#aJ%TK4+k#A6Uzs+o``aM^>^@M2JH{$D zjXMKpkU?NLe{X@^gl;KZczmJOgOel*rN8Z%-lOBp){6X11;Lp==3}7^0z7jwn41G| zvh+fQ$h{pAMdMU+XPuI5bX}gQpylJpT$=|V@*X9zEy99}M;oS7Oh+Ap*$cyyLm`rA zN3j$})DbyCKORV7pMZxBuZS#D+wL=sgyMnnK_QEwpdLZ!Oy~VsmH$6>+9U(d z$BNyW_~Po6`2kDq>e+jVR=jalgPU-PV8Vl^IM$kA@l2GTcKIjHvY-vVmlNV>If&Cv zWb+iZ&d~d*G6N78A3ew^ zcG*-om-0z_^zV#OB^fhLx8oC-$Q-(23n6LxK3H^G1lKP4{IX`GY-U6MgBXQKg+7TZ zV4K!#s^R5S>QR4>Al#86JM*G4A=78;`+3Qa2=u?Pc04uX$l5ilj{#cOqpgK69@+?I zB$U4I>lH?UPCpSAhRtW?=YIRb)e5sN?sNuzc?6agHG_HXd3psu5^6~cj`IL|7K7IV9T$IBpYuQ4HE|%du zv|juf5HAz-AP{8OeQ-%X$Nf9Qy+X_vV)Fp;T;81Asq8P!wno=RJp6cAlGYqTp9VI} zh4l~(QbNce?8#V9>}NF2J?JV?EhN^&#W!pS2PH9XtYfmqC|LB)j@VB^06Q=+O5fr(w{@caQ2dOVNYqmu`(^;(lK;S3~40UO#13P=3z2?w+6Q z*vKhgv<*XxMqoEnA|&d*P1lsApNcjPxiz&IRBM@ZF-dR?18Ff*BYsp>eXZYO^0L$u z>TS2MV1EMow~fq?3{f<`|1UuwUU#}$c^Inb$#KT+*0EUcSoRO*%pAZS;-S4GC}29s z(4Pp(OL)F;=!dmoaL5hRF*gfjIZY)x4DkVHYXkH^U`g=h^Oo*%aOqG5$#&UH9^v_CHsx*$8i)Ym{nP-vBEoihef`u zs`vCG9W-p^Xt!c)34#0qEjhF(QmR1;!!DhY+VU}_qH7@2Ajgu-`qw1W{TMDN%ZA^Z zeOF>V%^FgqDn%OF(hQ;KFg=!dpHiTNG6C7OWkC3zd~4I>xVBe3)GcSgF#lE$YiGkB zBB?nivU8NFdYPm1jp`RD8>~GInHEd@A&mw# z1XDIPhjXOW*7EVb#6Y-9Ld&W^yq5l&^w&1b2NVpQ7qaR-gkrgCGbLLnA%X_wn6OZd z1A=%?xNo7PBM4Z!xk_Gc^Ar^1p`)p|k(0V|{&K%8H+npiAA24}yn1d);9g&@)T75b zyPz{`?|6m13x=2Dhx43-$u5OQW@r)L<~NCugyArd6vo;zv)4O78un3kSdURWOH7d{9u_Y^`AJk!5>DIG+V&7Vo+Imul8dpO6a*~^U=R)n1dR9^|-+2Yi)3gBLFH6ffz%~I>(`JB1_|aTc$55 zHtaJ0-mu3 z{m?N&%VBE7T7-o&b;U2=ne6G-{+B0R8ihM0*sDFM*0nMqcf$ zVK*Rf%`uTEhn zfZf)7>&=rptLo=FFHqwmWq*Wp&1D6r>uJd8hdmkvI@t43EVcl`&>hFdpWw^$Cuuwk znmnv4>&IX+4saBFp`D=`-AF5pN)1|m+h;Pj1lMp6X2zDVB1bFdKtzAkKp#Mao5{7T zsdlW%YW<;P(56#>UeO?>0R$r#9b@auViQ&+XSB3qs_UXgH@6!GwKhK`{W-`M*1KyB92VP+PY znOHZ0y;_DH!}D8Zc2>&vcR{{_6>fHs{;pIuM*552r5(7w-$@f^!0a@umdoOFugI@jh4 zLti^X3mr{ELBT}( z(OLVdVwrv#9fy`84&0B-kDUc-JqkU*Au(>tdYG1CGA0hCl-)R}yvipEcr-Y#4o*JF?Zm#>JLVo!COy=kjXe;&EL2?MDWrf$b*5M6U3RXE5ARZPU;E z%r+7-gO~UD+ZyxRg=#Yhi`W}P}Zxtp6zY6 zSZx|Kf|}W7;!C~J%%2C_!xgeMFXL7W5b#L=?cVkYHHL5}XLV``7ULn*9#cWYV@Ukhsxg-u$9!Se9Ii)t+$Qw3Uo)g42N8IzLAk>>7 z*dr&xXkz0dj^nmwhLl8?2CsTW>2wWisgtv*eP5*0uI59NZdl3pZE+KyP<&;&M5}KG z_W5h{v8H33Zqk$empGlV3|g_5(*{~uaqM(1(uv;3X{mrP-Yf1X+Ql_1J3Q`4@tIMqGXD{drWDnQLEfYF&I881eR%^fo#deTfUQF%E$O9(ljavPG(MuCX+jV??O4 z@0sR%RZ{3#(WQVagV<(hUzr1>RxPGKYD(YEZEg5{Iccx>vmu-*5y@O?i6*^qML&k= z_Uyh!z(APRPr|>MS&4>}e}MIwCN|wKRe~e+jg;Zh8!G?EDL=c?DFV;}zz?-VT}tHUu)$ERkfu&>&wjAp>m+{*flo=vN$*!yL1BY9J#|nb!@@7p%Yvcs<(BfhT4T(s( zeFkN`g&{L0qsB}Zns309sowSL^Q+RKOTo= z_XdBx1f}}*UXh(lK`|wqy>1o2T3-;lFF#C-BgBk${!?vXxrw44@U}3-YvfcRb9i0J zc4p8@L-~wma(JVM<+LPJ@lMJT2okhBVP*a&QJ*qp>${rEf`lLM4dbC(%v)epignY& zwCBfT(|TG31G%0+NzXAa^o@0D^@wSNx>=cM9M+efLSCU8qa{1`1;J753l_QW4 zMz?hOj0P&^%PUh@MzFycIxa%Zn0_k#ht6a6wcgYovp&FZGWk1ZBsmQ-AncqK6Xmrz+J43C!QNtEq>8U+c#!kK3Rrx z8WT48d>JHP6rx=i6QVs1s9Ew86a2t5Y37U{@#TW0I%#|7BACA)I;o+tSS4-mURT}( z7@TxL5eQ}(I30HnXdPJ!r8{ML8*R+67dT3SOilqU>hcHeMY->5_%ja9)Rgkh8S1Hs z-W#)ky0$A9_44=kguXc~94=a-G`fpYbr<*Py@8OG4q;v>?zkc1`JePJA zHo%j6w+sAyN#1)rwJo^aDdzPMWm)4Bwh{EtrTPl$Zyk8ihxN}JqoFNH90_aYSWqo?J3aJya7nH%|(7N+`TYdT>9X5*Btd!u2$6k}QbM;zW zlpC>v+v9;27YkZM&*PcW8PA}ioN&}QRS#E%Rgk$~gzz^;w& z4|C_kLYKBGtq(`B@JBaG_=UGw-042+WTnKDYt+Fpc<`GGuQFGOM{^AphNpZ{9--!& zMq~K;>j`7TO1n}@<&)5$#w;l;j3mxW>!$MdRnG>uR;5v*YtG;y!ALltMZ=T!D?F1b z^3OH<;@8^DLxdyX+yz8R!at>HDT3N($ei167Bt`op^DF8Bd|Bth{)<+4|G@FSBuQa zWkfwASAkEcoR#mfWeq+EUAOzlLf6>$NvJ8kPnAlAPpIc{^lFrZ&j4sup^3hv#bBBO z=fH>T(Jg4LtNnj75DRkhVHg|*P2u+ zW!|3Z*xy#sYOYCgsec}ALm?U#lWKSOb)%;QI|q#)ih_^?rpQAL z()L>9iA^sy2WMG>Li(vu8vcd)wN6%=>syzfMtnlSMLm8=4L>{4gu%vjJ&(S>e{}sI z%sskkr%70&L3dybAKmE~2?~}WCs@s8CqR+2@Nn<T-3dJL;RtrgJreBigaY*{wA{UxGptZfW}ASAqc zk(2eD4lD?GsMRTY0WH0N!Yy}T3JR?Rq5?BaEhTbLtLDXM#p*n|H?Yc}vnLr@Pzv1( z%C)hXeE5Dtrz!fY?i&<4z^~@XXv5om_vL95Hf%1fAy$ktM(yR9(U{qvql2MDQ554` zBT>st+!2?TFQpa;&cZB%%Lqn;*+&>CR))6YC{suthrmsux-QOuA*F+-_7yR8{jMrf zb;UugO!TyKg7VVMvtUMr)0|$};2AGPB$bN!QyLJJnK-2%H&$b4SJ;%G!wB6VL|V*p zNf`>k-n~Vh&k+t3kY!_9Yp#eA6Q(o`FbT1k_(mt-XD&e&TYA!C`ygKd{3^PLQ)IKw zTPHyT5f^akgu2}pw|Cve-yBrve))!i2_1iKdY~zY-*LfFaj8d=Qk1;qB8a8COgX=B z=453KW_g)WVtdc16y*VYkiz9^0HZjl9xQK7@)ch&kYGx6OT`>DS@c2prHTJ=-`m~t zn0J-q9kB2y9M!!iz_ZB{GK!WgdQI08#(!wx?S2Q^wmy|SvqAJY0bQDyzPv7+B)wxC zgYp=oLcrUmfA_Q8P985RA7@F~7G7wM9XwFZ8JYa=6(`RNOz);eZEmM`ukpOtZ-E=q zy{`xYT}65=gE}(cC(-)nfQdSHxS(6BiXguJq2u1ErNix{h$1#{i)e9E5T3h9K*WtV zIDQOmxJ(RNTK(hma5Kl}`utsi^;F=_JR2P_5XOKY7b^F?+oFWm)LOoQ4UjHg+)_c# zm>w!5kIyG>4tvt4C16T~c=(kG8yY*2Ib*L1=?A@)mIg^(;|L z&pk@**vLpNnC_bTa%wHy8k{w{@U+p=vNk;q;hxbF5_VOc@lR`|7+s|~W$B-noSqPfk z8m9EbL134xpYFpJjmO`^znK8D-Vc}^KzD8&5Phb#AmqH-D(IHL%P?Z!x4yLiTG5e% znq_3!qAX;RANG&XY_Gycg@5Ls?mnHRLjL%P_FlAG(X}T9Z}bGoY0_0BZo^1f1Ojo7 zK|r+J>a=d@1hD`g#XrA}q-oha{N`bkzSV5wd>$4xUzk3$nqhyORcsb{&B}i~oN0I- zIV0%U)tTV4%$L$tplM2uK&XCDcfoTuV6W75m&!WZ%&?1nH?v@UmAG^{i-#eVAic6R{b0R zkN0-I?LN0%s{$u>|GS6{w3)x;=*`yo@@VlFI)3{r=xSPdAz-K9TWtUAd%>$UZ-*&^HgY%@{h z5TFbKhiV*FNjoJ%j0UFnr(X6%j$r4Id0SGA*msy3taL4Os8;;hGh#Edm!cfOD~`CR zAQ}FkY_=2heCpqS5YVB}V8~CG8NY#t5iZ9X$#jJAc|PHqKfN5h%BbHx#2|!xoWdji z>l29SQ~1cQx>Zxc6~3t-te*nQ8R-jsQr$LjZzS8A4o?vv2Hs`)UVbCH1P?n41#dnR z(9{K1CS0`Mk@f-$7hQJm|Bes!CZ&B|JzXiX-j;Nx+B5#y$L`Ff^z4o?Dyg3&_Q+rL$ z%Pk)j^Z(rN);SL%32Pck(&JLq-YFs6f2&(CVsDzf6@j(-OgA6Fz^n=;Fw^XEA-$(c{k+ zyL~f!FHJ#~7a2iRqIa6H&ywx^6#a|aH(}_5Q84PfPLYQzy?iYEO{W$4^~YiWGe=1? zj5H{951!#v@ej9nNiS-3-4FoNO7WF-?)4fj?*I7l~@(f$C))N8hzs^nDyp@3kc^YLLuM4=Aaym9E=XBn=Em8k@F5-J^= zwHHb^$n+Z;!M}bUTe@#;p7U27@?a|Wy(EN8|52p1p(Mv~7xE3Y^(KD6G8r#|PRQ)+ zs3{j!v5>6)u)hWJE|i^Ur7=q~uI%+4filc-qMVllkAtHx>I>xUd`Wq!9p=@}6+p0f z&VikuXT&#;mfz-QrDrki1qYTG4Lx39g_(U-~4d8#(+lWo*EEJX1-cXCt&zr zuyHV}h-?knnJ(&|9)aax?e&=_9$r-pm6(?h#K5!dj{|c4J8;a}a0fy#FnNBt{SKt4Zflr>haCFJ{T3441VH<3f;N2-muw zgmg^i*fElK!PU+1xr9i{oK>PWq|GHWv;5tVeSCKVscokYNY}BXM4@SmpZ&tq2MgCD zQX<-i%9L@>ckD84;YS@-PNG4tL^o?-K}hMjN})Mp2`FX3X?8=>%gg3xA*e4cOd!+S z|){(ZNic&*n#~#B|rs zIrMKA-r!H}GjDH0L=Ih<%DnzN&0)h%)<=4STU+!zB!Wej#IdNR$KM6SZYPTt*2x{v zmrbTf*Q6ikF6RpD=AKSanpxgR!thk;7Y1Dl7j3fBf*U)n)iMHbuue?as>f;%X4;)KLpyh4vTS048zfG@knEqjM9#zR^wk7 z%#thQb6f0e2n^RyT9sAUcyd!JFzKNskcnWLs+AHo00m?jby!d%WaTg>dyH{JcMZ}Z z-i2bq2BM4g-wTo}a+OtPKgHLtoNHp^C;$jiB<-8~rMmik7}RTv|K7WTAZzRSbaz94 zbp-!_+Jf`f);Kbb9j3%CK$vfiu}ut1rOWso0Ha5;4HThP6J(JeJdJ^xcw{BcqY^7v zS#^mN$Ch@^%SQh=gRf}U1-o+2f=Oo(X_YIxWg}xE{`7mRokJ>M!yxhZcR|~pF@2~& z@B))?@9_|kk(I#JJtcgk09+xm!fMcKNv0*xAC#4^<6*~9+s&k+Fdcg`2SC}K zCbteeyi)b#E9q;Pjrqs0A()%z|Gp#8_fNRKp`&Cm)8bl9EyG^L-(Q8s{RYsJ|2_<;E-gMi#m(MNaBU zOIwD?1X$TFRgS?W@g-i}@;eQ?C*@wXvuh*CI%zj8Uz9t}k{|e9ZE&aW?G*{~EAQ_QdU(p74_hz9X!?wcVuSxv1#Sr817AE0g?|vQ zIlpM)A~C4K{Qf3Z3=xk*dKvrU^e&rOK~@92tHKh$Z4sIoSPhx%BFts z6UhgmM*21E(0P7O1syr*wN?aDOhp0*4<`Mk1YQP}PQZL^jL6^Ue=69vv^;)-1FB+u ztow@WfAyX#`bej0acWuzDfFYx^Kyq#|0ghgWkoXE=irw3Ejc%QBd~j#@>l#@XxZwl z5!a&ioIc#rZ8erhxM|u={&JfGNjhL%_C5=L1qu&aht3jH&OfBkL@JU%YH^TA@(=}a z*vH%@0lR=&D*;aaM}%EMsWS8-bE64{5#CQppQ;%(lnFxm&~!eGE0w)48aaRh2Ea#` zy&eZNkCopCFMgG%*3r3^sP%*n=*M!Q+f_FxZk8cZHpe!AtrC*(N!K|U+R+vV^r`^( zqKF<0T?VRSV!;=A#W0~Ajq078z);_9BEC$~DIu|Ad*gnT- z9Unly`l*ifAFbkG;1ktg1M2gMLerWe1)}xEZWw}G|GiB8g{EGkj z{UGv<&@=Soyr6c?d)sK_HY00CtOLL3oNwXVVH$Ctistlh)kPd&t6R+$$ofR&#-jk+ z&RD7OJmC{{_Y2HzR~x7AjEQ@H8(5X)?IFnYbu(zXm33?SBytQQm_8MWHge{%{;J?u z_p5-zXz#!PFUeL_i|c=wJoYCnJ{?Zm2)*as2(su-4|G82qmPMMndxiQQ6v2rP)L@& zbCGRj-rUJQba)9rLPhtz{t7YF%T}9>!mTx2S|+o?b@SWkm*DhJ4geEkfhCuc1O<$Q zTv@7ZjT~0O9AWt>P5&q|y&+%`X0yEoy@b;;0NTIX^_?3@_&fohfq~x+)v~Dc3&5s z+GgqPReMf=d`|0bW+d)oF0&g?? zN#H(9Cd=Ew_@&N&Z>S(Um~06)J4qNSmF!gD6{4U23I|2IA>jAzuDfO+Xag?GVSH4E zGcn#akO#Fckq$M8X!8mCOuQ)-%4!u_536WVS7IBw(3-#(Sc!yX+&cmF|E>;CugPsgsf(#~E z4u9Ifbmc%76Hr^m03SY1k+cX`orX#D7SbZJ!fJ)gE;H99{kBZ_0pC=z>QH`mb^8q{ zo5e2C*6DwT$9oVN*cE&ak01qxHbLNuzG`-&ZI1QV2}gGSxsCscFiB#1-3w3G3gSsy zu5n^mr|5dt3+)R4e@^UeSBKu#F&2R%+m`I$knA@%c|%zI6R^iURF8}qyxDWP(xUf- z0Mb2kmlJs=keMMWTt~2x@falqVI+p%Rm`xzR6x)3vzXvmUQYd{yi2k`IQ-rS>!m$U zTe-#-qlx)9CiKN|=!Oa56%#25=*C2EE{LtVp?D1ZDR4Rc0x~JNO@OBtxV% zKgW0$lJy~}qC;9?D=>yXsx5NDP@(QJmoQv;iMilNER~0AeNod7RZ!X-)=!wE&dQQD==e>=;3Be%xDk{5)bxm=6-a*RyU8Q0N+ z75{d9|LdcdxjR)E=u5u|{{XSz0lcQQ^xo_rZC#{oEEt)tsShIL>w7EOiU3|@U3V2# zWOrw{vj%~5#f)ymHEy{Aklt7Qa^9C94bm14W+pjrh6N*!A0T;xo*%DGfKeCy!jqss zK-nPQd%4H_J?D$P6Qtz#BU0CCly%joz)Hsl*@L>;uK`!GOg(Q1JV57wpa^K}I+pEz z-|ZFcc1$hWx6=Lw0}XIEif zN2SA?bZ@Z8Y~j}1P7uI<+SzNLO)MI~V4eb9ShduJbUK_bza+84|n zAFwAwm}Yv|ptx8II<#~R~VKSm_jyn0Lv@CKz11Hm)VRAz4@ zs`=*&F0r_QAJ!c17P39hWvt!9^qvGPeZ4(&BzAgaffFqv=X$69!yk7CK7vdX zxzjzUV@6vhRTBX-9sY$}d7GfxhXcJoQa*|(26;j7z5Rt`@Drf~y3?ld{-w&fxdZR= z=SQ)5XJ09{v3V|7-3zO)kFm(tcX?a*5zV;w8A>=8egPwS6n(|;9S)M_>3w-IQF_f) zOsP9B=WA|%aE@5=$}M`MIDT7KO-|_EjUz39Ew~gy?t2Ql-7Zz%watW0!3;IltIf1{z@ypI_8f0 z!_~q&yC{-r-%ebHGx<5+m>knpi10UO3Sv-G__H`Q{$w5*5@@>!IIA;Y}AP>n;YU< z8~rKG<~U2BK>*|C-qic~yw^f!LI))(Z@<_30YO)j_uJ z)V*^a<-5<^tV;dk$nN5H+5*qoo-VwB6QXY?&%3GjhhLy1e53#{iQG7~y8JMZ=ck>I zI(FS3syzqaG^0YDTI)>lU-i4AYuU$dl_nLr1xKSsNn!5LFRc?NEBJ;ZvMU1f2l>ia zO8MT94*aC+Tj6q;=?ZEJ-^wHuBGT_i->T7>h|e3?fKqdxiWOu}p|ttQX1eYi(MW}2 zc<2aD9y3pvNX*R45}kTSHPm;t<^43rhbrQ)Tg{lXxnQFkhj4|}Vt;N*5QHI-m}mo_ zMyNgz7C}TE!?;sY`6_=Rabd+aDH)cu6Kqof*G`tk$&&|6A>|PiN z6G~r;AT^D{0I5I8^JE%fJcybZT3Z&< zVpatD6$li?x2Pk4c+}AWOvJc?CMF#mE#?EgSb6I2Iy*|B@5W?wN$8&EfdmqplwxX+ z@X2t^GBk)o3)h2L1mDnl7i_bJ9wwB?s2Wz1ykXbu|(RDH~ zFg@LEs)#TK>sQ^4?dgz_uxc)}i9h5NtbgT{4>NSwg`f047mvMGZ9KfW??K_AneM{p zc2ck0;`$FQj@?TBD)ll(@#}};!q0*vLKq)}K)$cWsL(50`k~OO2(7`5P?u*ry&8jX z!^O?yj;(ghGHmqWRMch6mP-;g%;6kzVlSPDz;w){Vz)wZef3OSd&Q07GIUsw0!DS} zfY+Za=xX#UxMo|9tHf?%H7y*RP%3u5U03Zf4c=b_Aw07x930MivArZaR4}iU_GHGgY|L$E(3IG2;|^rtM;Z z#*e1C8b&gyVin@i7WXWZV`oWhX&VhCz(3D%j+N~dBztlw))d#)N6*NpAA2d1@bq-N zpB4Qdf#-y|MP0*`AHikbG6bJz)|g~gg$yRiy_SU=8?l~xpi3aT7FpqYr)!;-4>P-d zSlOzC9Fl-I4T@Z@=l88b6jCnPduN61Qh z&O4wpEIcP?u>0nevAr~DDb&?ME}^7KNV^I@J+qsM-aj@C@jI0WOt(9Lb~Z+a>@)}6 zts`6m^c;~6pcY59Q9+iywYLEDFU4R%%As%&s8C&b#uV3Y6pH_Bwk6AdD$G9#{7L0# zj!G80#lE+F^Dmt0|NEE?8hKXQ@)uq`7}19WpR#0<_+~qu(M)&nWWxNP1sMFWpDp_Q zZEZL3e)hql23A50M)iv1Qn+6i0Ne=YjrbX(pi#!2J%p|lJDpp_H3)w-j_v_PBboh? z7iqllEbu=@C?#n&!6gx_4=**JJ$HNR`_cXNK`^4SWJL@mPa+;k`k;^^UKtdSWq1L9C_)K#p2!A8;D6%KV;W0mY71am86 z;SMruz(=zDKeVS`C4jFRosShCyRgwFC4&|Nin5saj{mDi=I};xMQVc= zmaai{wRwZ+PM>{L_W5L!uLqU^H`};r_((+XBdUpnm89{s$!VuhiZ{&J^%e@!4O$wM z#u5leq6+$DFmO^_3Q0i)A<8D|z0EbtEOB}>M~ zg)t!7F$n?|U(oNUu37TMmbNMW-_vM3%5dm-ov{l%7C!=lg{&%vK5a;XcM5mm5Kads zY`32L96%@9*+=@q>;I>Q-p7aZ)o&?(VyfHUE3Iu{g@#H%hx-4>`s%2t+BII9p@#va zVZfogySt>NOX-x7?g0cuknZkoP#TmHX%Oj7>CU$>iy&}?a4-E3o*OpfLV&jQ;Eh2$Sg&#Ho;^{B3keYv+C<80~-!> z$$J^?$E#&(TKN0_H#7)0X77|9=iM;@uz28=gE4NwthCGGCt)kIzaaC!9^u9n;~sr* zpW8kdfsbMqe&?Vl092|89GwOEVm8$U1}Bsb`+QG%K!_WsWQmZj0od3?UIDdlT$6AF}=JKTAnRsG49v+2q zWjUT~F|AwgGF%grCKb$mE z*wbbG-`Dgd$DoNQe0E^K;`}A{?(plC6bckD^Kut)AHx)hD&J!nBB;b^7x4%L;hnfX@&KURM8;LpBQYJO(KahGSWD4IF&6SZFA385=UiGE)mR2;}rL~ zX`qf~*S1xG5gkydl)f z)fvTuRIr!*lK!t2{{KE+U3JF41NA9#V-+&cH3eqqu*sKc;{l;%7NoqMl4N zafd>(n7zQbJno`|H3Hw_zTqt9+}`U}ZrkP|k7YMK+IU7nq+%3WE^nTZP-+{)OMMD_ z$o}gV#Gh5Mb57^sS%KZM&`|5Sg7g;G{O|9Ze{=r7K}&8@WMn@nN$58}3nhxIi8z2{ z4hRB&nq5)AOyb>x&?#k>K@`vqb~?v27oyixzQ#~CI7b$BV$>RXas~)7(aPB=BZSJuXyLJ8fQKBOvMl#t#=_vAEVY#JuvV-mK@D>j$xHJ=zHk!znV`fVFu zjDd+rg#@C*VOEN#0dKv_2)^-#BdYjWGT1igIxT&#b4F&1W51n(7-=*yo46Gd^O~eG zvCDY?ptv5a@iV!wSAi1z%)~m0Rm04(njUk1&rtuF4*XqOv(vsxzy`7;hadFtR#C37 z16M-4f-5tz$YYhr_NwG2DC?*Yi1W^=^sWsua*O!{ehv+x&~SZ#RL5~Z#J!Pnnk;N% z`IO`{%EDhn2Yb<}bmID)0991bnNoJ7W*Z65yBO?>i()k^DrisM_a^f;LhC1!;PBTG z>vu+z@pk|u4v3+$Y649bGRU!+PCB#ta;QDMbJ=i4Qh} zqb7M3$UZvSPaf}h=QcLgeS4hjXJvcE!^F);<~=<`8kOwuBJk{faR{F;`O+=-ii zA#Zj>kw9BOkt(M?71Sh+D5dXjbyo8Y|G+Z1`n2X1z}raEU_7skL_bmiD7(x${~(z? z0L&wjW&mR=qR#XNF{S)KVjHj07_4#94`4I!a?)b}e!_bkw1m$+O#Y2J|2f7^2G-Q7 z^3AC$_xc9hgN$1wsB9ia11(Cp5_dvV$P5}C+uE5FNJn}b>23pQ`DLx9qj;peU+6c8 zA{iJGBH4o&Pu5>j|KfWIVdglup6awwMn0MT!f$%!*}@?|lI#h`xWZ{P#27BBWv!2V ziyve8Z21V~S)|Lx`%$BaG2>9I;`?Rg43fO{LI7~@B8f}UyG`}@6N%u%5Q^1TZdb7%&DxQ#BF1ljX5`;g`|baUyiCls zx!B&cIZ|CIn>buMcMIdguy=d()Y&rr!7;ifAnJ%EHs>3r5KzeaJfU)#W(bWGF+@HI**^9IOLX}GNp8)CGD%6SAJeYt~mxW!HicWKY|11TJ_Y5@uR>@a4rwCK&@tjg z=Eb!D8SayW6JbkJ)6m7rBj*!|>#N!;W;iXx;u}^&Wkr%e-iT6|p))8}0Ik`=+wdM5 z#%efevKXaT7scUBwkx&Rac`WaqPkUj1;Gcl?e&X@8cCupw zd2eUGV-hiisx8-FqRtCXf&GrJ#l))t;STZKW$F9mZ?m^Qs#~Wr0ul;u#H52@RQKbe zYA`QY0vyOhP`UeNq8daz;M|`4v-=qn3va1^3hRNS~gQ$apaZ%%kU ziGD$MBF9J93;)S?UZvZ?7x$Apu$46gNO42fn6sMa z8;U9_zT(#_aVkadW{l|SBukRSzQU$g`aA;eKsTd_?8Haf(5I#Q;Axd+nmdaTo`(IQ zppoV8{Jm0D&qDfinf&n$ssEvu;N$J(J2`3JX=<*!$s$_&Zqe%%s-KtNVf}9QOM^|f>kE6+<5B%H4)46E#<{IkL&&<# z+^s+QB86=7N$gucEa@vkPT-eHU)Bw5F=fZSqo z9DaRLwo7`YAzU7It@W$lEBqxo&a)e29CUmYeAagH>vnicESxL4d=8u{THK8Uh5k{ot;ftLx}s ze58Hm=i{s+_()RkYm%=nDFAnkEy$(cnT3Dx(C8X0R44VqAe z2ux!W{-MR_wY+mRX!shlD&PZ`#z@{Yev$L0Zge~v^c$8G;0NNP@Z(8pZGJHDF~dz2 zslKDP{ugViJ?H}gFUhBZy(8j(PA;DFXg2KSD`nGDeEwBW(w{C*h|_T=FSj1_@4d|O zz8;MW?k*5OP%}hqSKBs|25WKx8mOe4L0cL6jZ$XMyKW3 z3^?EwA2PCEzU%rCWsN0;R07ct_VHZ}2t7DIRDY1LycvDDm5o8B*RNJ9 zHC<_CN>lvJBP&GY-y*;95XZk)bk0aJ>oTqZED8olg>YVd@K+9Pl$5FyyI+=b%O&s+ zuXG!E6P!#Zr>T{M+R^|S!z2Hr4AoLB2TIRvhe2f-!zM7M^r3T#Z#gX)k+hVPlocBZ zvmuI_ao2-b#zYz!!e9cKCl) z5R|;B-vtn_49#yJ>Hb(W|{lW6@yeh(cGiDUC? zD~MrhpO;FdHG`atO`fo8VBaUry{#ThNvU8wE2a}P6JUD%wPxC7>i#qDgxym-z1$)6 zTTzjca)vc;BU-h5*f(3Gu45a7HfD2{I4sY2Q>NG%vT8!D=&!$Py#)1UI}tj}NWW0a zieUzQu~&x=GS@;&D=3hSxEPhInsr*>vk*pfp=7qRm1^VbL}S4@Yx@+JfdP@~<3nH~ zvgYO3Kcp}llbC^0iSePW(_*onjQ~9Dj&;R=SF=K$(9pwg+^jY-_h;%;#-x|xYB%le zDK@`ybmpHJu^xJsl(Pr&LVUdf6=Hpnby)1cx$stDoXZNj}5=EIWKO-t}3#gZT;V&Q!NoW-hBm z7pW?<$eDgOX8Me3OQ?U{cOD6dVMK&r%TvEz2tV>tE|@s!P0x?^ zp5EcDHE(--y#p@rKFmo{3nH%_1Vx}e9ou|j#9s?D&OJKmD6Wuap{U~0*S3hx@}Dmy zyM9-qkGB>F!gOivh%@&qFmGLh?F_CypA|CROU2NI!GFf<(5(Pf3Ga{3#IHUDT8O~Z zgvZ6cJ-2(YCxgD)aL!sxefeo&!NV>yJ%~4mrwfdTsOvHcRRpJLw)A@PH{j-z0Y?W^ z-9JMzKYh6~PGq}~nETZHd#|~jgk39JERr%bOGMSz^4ZC<6zBS37bvj&?kW(i-#y7@ zJXYMuoh;xq#c8uiBxqe%)eY;nubcJf^X=(*#InM{GRjB9(#H+PQn18m5<&R(s|o16I&H8zFTg0>zlrd~#B{z@=`|M?uT*dvmy zM>b1=&g{chT@v;;V>tuZ+A~YBmI@2?_4@yG)gEedeZz$$7cJZZZybj6ejv{T78=wb zL=7&2j4nC*b5bMoYzYw5m?!L=K2UMQ!wkA z6FD7gd73o4?56o*l!6%-2>vFP+s0{lttf~I4No9o59RtG2Hl|ZhgMsE-bgRq7rXO} z;NZxkzHg=Y3Ol_&{88&4ip9|K^HxB;@pM(Jb3f$mXrNoh_PAK*(M>a|Z#N8YzuKjg zH}C=WCh*umnBmm~>@acOb*cNh9@gV|(=s>+-%Yb=W^;Rfv1Ow-Ft{|nWdoyp_~ZzK z#$b1!ll&ORRy^s7yr)R(4$S&~QkAzO8L*`;RQG;~>t>-5JNJ>ka=b&5uPx>Ywm!}K zo>T(1(5ioTzIkb;GUO~0@d`andt5t<8XUJG_s|>1bN77r18MxZPi1RdOztGzhH0!u z<6#Rn93oWKb?5V<`;nLY?o-m%@#r=Y^=X(T#=L`%NTXul#t&oRGtt0flIFtc37_Q0@6&?U+7A;lqoS7<4a;c6 z=H3y{dg9~7-GRt=jb8E1z1dH+#b3g&pB`4_vo*t>t~XgF0z=<$#@ge)+toV_7W5#V zrhdXHadtAhV*IB!o>Y$n&XFOX@GtszP>X!4;izP4auQ(#BGQOT>qAmBaJYP^c}UEo zL(Oq|4VB*ldkZXk9Hf$O>U{8W;iL@R5z|N=7E42h=}_MiyBTf!X2NPZWwa)MqUac@ zadLlM9QE}n?OJiLFOF{QOMv&fly_0QBH~|lq%EDMbZ{#`z@CD&kMWdUnxvWxu14S; zOVt_ECJk#w3zb1psl$!yqfeMpjBcPkxfvEAw)@FSVEqYPjL^EMOG=0M##kacu;csR zOi|NGLHSKV0Vw8T>@iaEQnV< zEGQ^xpoMgez$=K1Gb!j9gWdlvVw&|v?lWp*t8*^%Gh#{56+2lhWdaVuFq^8VKNbON z!~I^+_<8xVcvk-H#j4f%Fg;JuX{joufnP4VZO}tw7A&z8U$?pbA>5_@GI^7*RKD>- z;iUPY{-<`(sF9iXi=e~7FURA7Q66G-VtJxxq%XS8S>bv*)O%{5BxLHRyDMF^=WW<% zap`l)`4^k=xfL7lPtV=Atf}my@RJYY^$A<&8P~#L9_5Lve|H(d7M14Zc--G~@)*Yo z-+TA=VT%Bp%AdNrJX_+e&!eF31^bRflS=>oGPc>9Z9EuhcH?xD_?1hVA8g>Tf2z^= zv2c6}Ht^Qi``FGp=tPtL(;v=>WgCOXT#5dkrQR-yTP*sxs_G=pc^mO@BQwFkWK%-q zXxL>t>*E#gbnZRY^(t>*zzyB&=hNH?p$FE~Kp6I{3yjJABu?qyarOsIrJr>VvR9R3 zX5AauUR=+)3W^Hv=ybXA_HN41kR1)col@v31~_NzgTukSS~I+}Gmx&)@_Uo}Y^wu1 zX&EI@dVF!TuUN6>_z<^R0nw|Fvlm7?8rbo=pQ_N{(h0jzQeH(lNd_`HQGkXtxLC5b z8WfL-3a~jF3%U6KgnO*G1A0W_t`HBA=6pXk3)(0Yhlk4O$HkK|9e6|JtoCp8%1sR4 z*gJ=hnw7`z8ye)*9o45%ekh<8sE_=Mf}Ar+FT~qf|J$I)Lm}?h2@I}!_|d%vU%vnb zb~=E?eqS#+i_k+9YOlL4AntmMrPELJEb8n%`F^&JSb~)|L0JJ_uLwpKTuQIdZtk|Htn2xJ(oBE&aX%9y~;k;G)-vKc43u+;39i7!%Y^=fYD%c3K%M+b- z(3R%3#OcbF9${Qi%F55~;q{<9h0&l(ixP=@#z^stp*ANMxvSV(ZBr}Wl^pt5vo4pn zGe0jpaz$G&YWi*VMH_>=9;C?hGQJ0;yI($U>-;PVrWoJzSPpB1ndIJ!t<4I+d^aCVpk`qKiN8&LNK5XS(E< zdMCV04RjSn!@BIgv^t;P7XGLHL4w8?p7Q}@Nmfb1^R|5@gD6UH7o89J9Fe3QmsZ^f z3iYsD@JkfFeEpU;oxfW9ZcJ01V4*pQ2`AQEqvTsg){+ikaXy&`lio^Ehd1t`3B3*- zP4Mf9Q3$~mRGy7)(q3nCRo~bYJQ58FmZlMfj`WxZ?|I^PQjNTxNdoIe>)2_P4;4u& zDfF0U<++&~_p~F-;h!T-pJ-n`B!$tmaC=h&rSp?%bwe{imLu9f2xEsarhKv9B843X zAD?r})gsRVLzm}3;5AYP+ZGg9>l_BAB=AjunZ!!h6kBwGm%`tG))7s^7omCK9+!Qb ztoSAW+vW1+XMX$9m`hCcoFGpfUz`0)-~D_XCo<|UZ+EE`WBV=jvVcJm$GJh2SbBgI zh8Zef%cXCv1^yc`m#Hns`8xvk^Obmk8OS%?E{As%9jU{agEy(B#S2utL8YIT#%HR@ zk@d1~4?ZEfK5V(3A9SiN2mRnP3A!0gZM!^2NO-Fnuosc-mGQvILrsc)DWcx^_)Wzq z@TUL+RiR%?XyMItm(y;My4wR~dr%Cv+1k|R_$FMKw`IkmA-TlMOx=9_Tshv^88QE# z?C>?#x*3U!Iks_oa+0TAU1OD^u1k~@2&K?HY(>kxVH8opW;f~6%Q0NAw6>49OSijj zCR4RyDo^rea+S7VRuFmx(D||Jnk~1K7uiTBt*>=dUYa0Zq1Y^mroxGuKZ-y1Sy`=s zCrLS8T+(3UA;7;jk`>1;Se02Te?poTN&j$2MWy=`TUo_|V$LwM9B!F9d!p%~2N%MP zhz|aq96_jtq|eQ0tMRC$Ir68c=UCYLMl`MT_^9XL4MkRZClWjwO2c>1KoJnRYWlXm zu;SN!!9QDUy92(r7lNZ0aT@;~qfuBV(;K75%V?>aPFAN+;)`o`?1AgG>?)WajHu1; zZHz=ZIHgQ!Nb$Sevt}<4nD&qK(u!Bw!iGf#dmY^qjs?aY17K=5R_$M8COV@sjL09w zTN)pXOfvba&AMy~HQjdYgw>rSZYw?1J3`x@0uA*Xg*%pM!wAOXtj|YRrCQ!H6@0#2 z@f#kvIZ8Bp+cuZ9&>?&C%?>HAmhjR=_2eEaJm^@kANPZN_}z6QHb*-fXzl`=XxnS- zk6Y2d&3~!k{6Z4HqcRq>j!6!@mEtBPBG8b$Y1Biel1@z6iO4^mo^Jc@b`7Uv_f!*? zQgCQ#6q}&>MYCZx#GznHm>We^ya!vjZH3#}6Jyb6U^}3L1u zW{9jEsCZJk05}9Z9Wajpor6X&w<(wfvplgeRZ3k9j+2aykL#z7KBvl5vArwlNypSS zRMphNb&q%w_vu#GO<5C*dOg$dF{(3*vrpK_TdtG`A#L9NE!OsEdvm8c$xx zGWTo>+B`FE?|$N^h0ePz?hAk0>C?PC{)Z-e4K#C#PGV4-A*wii)E9EYi~X`$#U2u{ z6DQ<#kf|E<=T~}%)6$lJkm^y1gM{PN((u{TEPX{lu@yE(^WrC(9RC!Aw3D1~ZvA|Z zKvkd}l8)Z;g<;epmHEM*fPxrx8RIKaDbJHZeHDpY7wXdPrrf#G`OIHxTKWZ{&LPIdA3`$^TIjy?TjaaW z;^>CGwF~y%9hCBN9h~eLgplSJF3eC;>8|J_uX-h{+`0N>IX?vz0DU+F4~ZP2Y#Ulb zLx-~KI+8F1m1VY!N1@K+%nMds=aynew#sMfw9-#X>&Vk=giL2Zl2G*4tG^pX3cS!u z(UFs~!U05S6jGsRl|YmeiZZ^=@s^gBt93=LnD?B2Cz{$3m8L`4vK{IxGvKnQR=9w{ ztoyj|fpUH_|9nKG$bZ6nK2oJD^<9=y72TTU=5Yg4a(1Vv14CyQPi(FW z@icS&$oZn!d3xBL$Z9LU9emF}oD>zY4{X0g`_y?ZbIMu$aFTcN<;?Na6(QjcAHu?VXhioe-xT?A;@x*^^dld_k07LhKjj(82V>}V zV`Azq80I~v4y%1#u+)j#fb8QR7+0&$r_Hb)Djo7%SHYfsdAouikfHa9&EKR*7(h$D zecwN#A7`i|i8y1diLjtJdwcns|IJV&l#}pjVFTzluJD;_Px#Dudr!CEeH6YIhN8HI$KbX9ISWi_=t zdo)9t25ya_M14JcS@0wam|Lsbw$*S=Y|Ide$n+!9Spy;CZn>k<9DO*lBjQkp(Un4iGP&^1ZQ9iG;CmKX9&y?J3 zI!Xi8^r3gXGt&@3y;i8idcVg7g!2d|bN(3@`N?|wREuKu1ZGhCuD1IKOAww9aWuf{ zMMwg#E?8mP7lovMtV15pRyQ8Q6@+dpy-a9qJ531EOFo9q6YV@Ab-*yKoD$UbldpA^ z_a4jWP6`87{q}>bbW9ntgkSA$``HF=qF5iB?ySIY?G^Zp6iI%bQVY1hs)^^oKio_# zkf%(;ux4A=S|6xHTVV=X=S|5C5_u~B7n#{>Fxbw-pyB7hXnSlf#O`UMH;?7i=~Jo%tc?> z#bd18X%6Qj0F(gEOI8-KP}knMBN^eZ)HNh~ah1pgwkPx2XK<2lL9Qt!+F&kKstuyg zgG|$^l-g6L!}P~T%bWp;`vBaa=iJ7VvHn8>-#>|Q^^e&O=@B{c6xplZk=i!=PrhHn zdd;6GwmutO8=D&j?I9iyI5a4s@n8rvxE9l~JJP^_rpj2h#uUn|@J3TPe;PX*!iYFUUc>|}>B+=Aq?i+` zfcLyt`=*8^es-uvI(y*Zi5W}j$~kj;w~X}tfYBq7p~pWK);4$zE~}oSV^o%~M;hxJ zN^@Vgl=Sq&v_&tmUq4rR^3z}b$rbH!>{JtR&`gaU(Ybs;_U%Ocp7%|sP#1CTHDL*) zR{5hl3XKLc%@2ECdr!kFwCXB3lcJ)5moXr-6n&!&Xs_l5DuBj9TV}1=F149{jC|4I zrj;JRu8vzJ)^hhkZnFI!=F2t0!^~UOZo!)BDbaz^e(J7>^m9L%Hm&sg)b61v?QAx@ zsPY@GyEqt5B$YV-vjuOSziVnqW4q#sXiL`&TgRh7lHLddYi9^*p3)KVr%sk)htQBZ zBQe;3Ns~ld(A(9)j3u?I7?O_J)&U)E2#Z*P~sv;0}=W&hL-!(EozT-&P= z_uahbz&!q5%2@NoaYA%|a{uf}Z!hpFoQ1(@l=N$^-(%#&SD}B8$@S%cl-(C0X9@_d zA^^lO-^O)MR)ipANDS_8^Wj|Ey1jFC>%qW60aYqhq1e33mqd%?N1`UgjYs8+F*KIExdAK=cbo-C{zA3-o`Q-%z^e1AgBl4C@yo22zV7Ok0 zkypMDUVXD&Zjyj$U1-+JxL?1|vm7vZFEq~}G5+1=y~u#jeeMHxXCdBH#^RCezacR9 zy~wi~q~`Q7<@|=6XB8h*3y}g?0*2OHQ84k}jjdM=W@?6+t%E;fvO!k-I@s#0KQs!h zY@zT#?Ip|5pdFFam*vrjb15IpKabILFnr*|49Aix(WWOOP~|JNwN$gW3-g`JGj*BF zvgP68QE+OAosHJjuIHt2W1?Ogp{Zf!lzXL{l2+08`h}5URsHRnrk17(_rX_rH9AYJ z&&qPjp$TO9gkydcfpHVmv{lG-3b6_OWm{8in7}t3G>_K?PnpOt2V$vX-xw?;c;;UR^os&)Hk7@u??b*vnmsn@igr-v?alz!UZKPNy5;Ba1psUj}Tim?Rfox9`}e zdVn&&#xG1K#}83qCnk8HOc($3=c$q~-0=z6#o(Uj9PJ=W+C?h*PJavo$sc`8Yces^ z2}t`>#19FTW>*R0YlEHi+j$wwVYs?Lk9yT7x#tx1t9nk@7Uz+f-(G|D&-||oGhSh4 z>aWvS#@)J+yaG}6^9>iapLULmpRhJbuZI2XR=s}I44!AJy%i<-lV%BQ73=%6yt$bz z(OYaIoO*(i#t@fCuHp5J=I-Tk|Lm~z{qjlR6RfL(vFnjE#cU<;BW^zfKOOlPjBU2W z9lupv(l4O64=HT?>4i(+wh^JqW)Mq1?yT1{k4oJ03u2B((MG(^lZIJxi#DcY*I5Um ze?aah$l1nuM3YEz7QXaHYAi5Bn$f(eK$7!ouWd^^2s5dV0h0rH#HZ;>&}G=Qt${Wv zLPO`aLz+arKsx~<7mc8UVzzb!TzB^l*l<3{Y;Wq_AS{XPpxg_`Hm(eG(N=eCZfi7C5^tF$Abb8Te9)+64R z&)DC~oAcb{u9o#MPP(mjwS>`6EDq)v^nHC0!gdYrf2vQkKfTymqnLHTDvNvMfNPGe z#)7tq@@QdGuy5hJ#|2NPgSJ&aulX6iT!z;sJgp!XWlHQ$txiawp3f#TqOBHQK#!ch17tb-P(O$15lr!GIG z9DgLQU1VObOeP8cs1^9uH7f7$XZM&U@wUzmjm&}CC;Y}8uP?Vtn0L8B5!*Y3e-@KG z+*a>IW8%6)JDxV-{|Y+$L!k0;P}1+zpqOG}y`C)Vs~Z@4w8%-Z=b?q{4ewnHhxx8E z*utVI?d|?y$K9EY(uqIxDff{xqhB@f{MRsbfa4OcU--tiw;euj4j(>z@k@Is?S6IC zK!i^@a#CH*T*g_O9oF8;@aZ2=u^^Y%0r6S$XHbp4mZ$sXI?y~){2O@@$rauNCWSdp zfrRXhdseePon|>5wmT!Le&l(yj+Ld7d;!rw5{f2>5g;!0ko5ZXW{<42f}QUjvkiOa z_x#^7qlaka%4apwJ|S2+Yn0NMQ>-$je9$SysBn)x#AzDmDz%p<=eNSCqGRTFHxmq(bJ(%~q2fHRa>xAen zm1FfM5J=!zWe8nfEj>UVM(!Hmwy=nO?(%Bs&x|%4HuZ`GoSB(l60ChEHrv__Jce_3 z7ptoN=!|WZs@&bIZpNxVwhmdxybL-E>R2qjNOak+ZM#h-NGdfgJ}@q?QH^%w|9oUO zvt-d=6sOsJN78JBXJ++fjj3pxLTQQq5M_$fGNMGxE$rzaJaHC?P(9a z#p)4VM?D~Yo1sgt9_4-~mq8?HW4v2=Hof%hM;xd737g)n_#Ze7!;=yx5%c?zEs>(+ zY{G@xZ?}^5ae$ah;Uw>_*Dw7xynU#o#Y*HW$HdbV@xG{+^P(U{i7RaIx7X^p3$GL! zC)_J4Fst{+IFD%i9qd$6=RKg=Ioq)pzSueq#;YEt<08*Vwu+QGr;jxHqzIs_>&k z>|;2OjtYyn*38m6+pp6Dw6v4l-uKM5>WNgO3<0CG*5yiWr%spFe*@o<0!#;M771S* zd|-PDZ2xlR?=_870RHY-!1?F)4MVM-7#0W$;DaW%|BSx3_MQq;coNb&^VT`{tjg;5 z9Dj^2b-~{YN=xgH=(!cOXcU{|AVl(89_PC3)vSqn0a;U!W?=$?La1b;n2Io;jQXqd z8ImjZlv-1`KK*t2!!YZ2a(O0lnaEHm<;-?mV@7?o7z=tN~OU);N#jyQ1C zwfSd2!d{o8xv_B7B)+W#;K8m^}$^NQGta86~^3?in=n|p^ce`PmY?!tWj)nHj9RZOlRb2j} zn|ETp8P5Cb#*#sX!df*}^Y`o1GFG1=m8X_?A3CDM-LUS5@U~HC>=IWzoVNV~Gls$h zG(>J+sT}hvIr~zl>Ir8s{4U17wj#T*Y<1>v`p>w_s2V-}cLw39ivxcY7)+vWK|O9x z3}BLEEaHK#02}L6^(iIaZ82*ImtlZyxj|5OK^J@)Ft=O@&q1@WDT*?p6dg$|*Q>Xk zvW2$%Am^;K0w959R|vxzDE1X-YT&fm7HHO-cd~mQPtKMe`?^Q*_k{YOg0j%nb7nUXZ*UilH!Y z;Pk%W%OLRl)+GZ`j=Sgo5KJBC53o>i-vRd7S|Nxq0Qk-NuWA=T@P@1NABWt(n(cp{ z1^2lGB!KG`dyCNrC!;r=^}$}aMlWt(q1P`InpA$K@WE4ANUXia|4H942id~a6Dq=H zWK;Bvm>BZ8uQ1IAMDk`K$ zT<`lxRK<&awGVxN#&p@ns=BxGSmsQ(o6-V*%xeH`Qhca;U4%{pw}b&zD|*N`&OvaP zN^-TUae>8jYM>H&`e5jBvG~(0)fbNVX}0rix+#^$+a{@Mp{OJ>4!T}+f08R82{Bfv znpNm~LyULDrik^E?+A}@{)g0l#B@mWbT)c%R2(v=j9x$OZn_rU!yNZ!M!km#*jE@$ zF?rZ`4ky>5bNPvd1xSRAYb}(T$+X;X*{W%BuowKm)V%q?DmSOq*~<2k_4@uSl2;j8kDz=@2(w zE7${h^i~0jXkS8bWx{V#z@l0o$}F)=iFV?G)4ceD53D{eb(6Nu%wdUQ?peMX>OAbttTC}#L2*T5r^@j7Q(G*ivcH~p+;gM z5>fP`SCs<*%7-M^SR>5fvX@pk(nte5qmfm(BF2U$^y3fuCmPCHl~xeX5NG=mGS35U z8aA{K1>m+|$xkE5GzxrA#gYdo)qDnpN}YUi)%+n%AcM%uQxxk8R2#}M---g+S-N4% zY>)TRvfdrdIx#;8pFkoM2}AU;!*5Bw2)+4=Vb_n6IIQ4}~yB-5@%!&Y?!H$NLxeb8?(P z^6WoA!iqvhdG}REEUV>yLf{sT6NdR#QCjhDl&a-?@DN2oCsHUF(r~%3ty%$!((BTd zjJ(&m;Ln~N?izX!ZKo}rC}aJB9HS!l1JC;_?6(y8C23#Jz+^+rF-O~J7coVRDl;*N zeMTK+u`3)^SGe;YI0KvQFIK6Y%vSWhkG@ntz(xGd(f@PW>KL0JU}Y9H!F4`XlICop zIH~CAzaFqY8Xj*zP%yZ?4_4l05ANWm+^2=N*2Q4Z>Q9x>5)sj;DGHm6^ad9I1c`e5 zJjCc6)ZwBAEIH{=T~br#rZI$Y*Bo<7UGbs=PbR$weLR|BMSHc&E$pjdB{Jn{d(~ju z*n1@SdPOt??}`dpU9@e=KA)`kd9W*Ekq&?oI6@O9sjN1aRy3C;TS?Z3Vr+p?4z>-i zfcLG^n!;$cY{4MG>T=4ox94JwMFcn=R{B0iNZVgjiaN!|bQDkGZKdM@)ClT8J!cT> z9IUIrj)_T@Q-lLh#S!bg1UaC>iHlsrakQ}E>; zp6-i?Wc_PweD5C z9RMxT-jSF{)DZTM()S{8_b96{{?KP5ZGrkANy4Ll+&ljh$219#3}~QNvONnQFRnB$ zuHB@hfde6z2#idV0O_3maMb9NKs;D>;%TQ`7FUtNPFd#-<3>s5WYjlPec4>Yo?SY_ zu$-CO9orKBIxylo8%f+bM*7q(B%~-}4g-mg`ST{~N?Zmx7Tsi&G(F}vbA@v_7g0$? zJSKOwBuePbd>*hjktsGFk>HUrKUjL0u$rf%?A;4RB&F934yyUn!X*^u_d@UU!;C1` z4u~foN)$^NCSczY6T*|uA3=fh#Ru4oD_>Z9reb zjciPt1$vsrso|Kr=|5WF{~ojimprO7_~Q>8jFVGh4$V%tjDxMpvXjnFe^MqKdvVYw zB>Z>|*6?L(@hTBv)_C}$vHLFyR!UZtnf#UNa2;@%Q98~TYk^S%cCkqqhIa8})D$$g znww_D1C6(tdsUw}Q{b*R1~G(%=kjnt-dgBO1T8=o<_Cl^pAJ05{CH->Y}#VT84?sy zKC4L1rjxK9K_{qAvUM9n*^%-=mdo`L!TGC_B4G({o0JkGbA$_fm?yOSMWB8>FG$CL z(|c1i&^|@p3G*Nk+>1U?9LA3(R5wQsHg|l%yu1#?TRi=Esns)6A6u zqm8`|3$bPmc#8-(l1XfVDfH4Ci8N0WCI1~t6Tr55r~yrB0iu=($Um+5_L?q4lC3;8 z^m*#@iJBt0l2ccy42%t* zC4k|1qbSjHCd9zs6kfcgel<4S=QgNazZNJ*esc!8?#t96(6%j)Hz0_kMQPj@{*#69 z*MM^2;cwpOe-jtuLShU0qr$)}uC(_AB}Vgw51OsU>eVLwxxYRX$J|=rZU>KqTwof9s-kdL z7ETD_YfL3FDK(}P=!PnjgL$p2PJN=9sPZ8RnjAFwN39US6 zj}W?YVG|V{N>=d5hDmS%tqv}AkCN28oQl+GzW~JUg~2LZaL<<7ufk~*GqSHPY!mIL zFJgUT2f7R(34KB{#WC;J)yV9yvUvfD#w67!^Fs4Mcg!J24~~J)M^~XA3icvt{x^>~ z3?jIZTmgWoe08V{NQM9{NT1;zo@S*?(N&ps_EwL0-%o|JNZdZGQjArxSQ7&X8{hjk z)Ej%Tdr#A4U+kc&<(^nNrQbYo!ng;2hZ-v-P1qI>WyyX(fuf0ODKC|MG)n{)1*@>EDMgx|JIycPm!E0Ypx=u@$;@?#JQQofV$!E= z+qm!b^B4>jvp1qKiABS(exwO-gw{hYc!~sC?WpD>Sy{()J;u7Dyb!bbPs@97Ft_Z{ zALcS|e?JYS`=FdRx^|z*fZmw@~GBka}Bh2Pe5P;{hbWh`VCFWFCd`M`FL$ zjbD{9ai%m#$i8o{YYH}L3syGVg26G*mG@Y~SLk^S*k*U)>!ai|DJl3XoK-Tvfm;M* z)mmiAG*BeM6|8q;q*@@&!W?gC9SQL}dIq*g!QFVNVC0y=`(Ql8D!6f$F_$omRiQTpPm_!ASA?V+LLA6>hLv$WxtbGd!*rGV~3| zkp|QN0ghD%@C%^u{}nQHj?s{qs|HD<2h5Mxi!=?aaEkn`X1^)h=Sb^v^|>Jqlh9(i z+vmeuffSMsS1Mgxr6TZ*_8-~M^bA=OfynNeLStho{~O-@VxlQ)`NJVmXKUOblT35A z0*P;_R6LBI<%6{fIz;E!5b@_BJ|ush-D4eGabwlQor-s1^9&HQDx5# z)wsM|bmC@t=fZAYYBXeuSc&YoVxzxPCLb$bsOI*B)0SEL(riCIS6F5bhf5|b2?K!9 zOPfU3#LKHyG`CmK=NDCh)E3N(m`^ucBya6`|E1JJ#@ON=%G9XT!Dp?tk!UwTS$d2s z`Ix3jNOl&)!>z+JEihysPcvO!QOU4YJ2$NdU1`NJFT-=hP`M5q6WyDRH9yAO`I<7B zUX^M|-5?2DgwogmoDeMPP1zhpPdkZn2pEYAeaego6r%wmaIup&sqE)XP8(6jnZWmH zEdRqC_;3FEtI*-9s~287f9D23qTvr6rxAo+0U*u|pEA=VEs4*NWqh95WLxnfuW3H5 zpa?7nr)SfOc;S2A^PLCgTAEL~ux}RkoMb}ZlWRI8SgK78^XuU55?R?ueoh5Oki z4eQ`6XUX#$-Lig?7dY~Ti+Hpic6t*@#0=A(BfKXDNcdn?i2~`*5bvALy;6( zpt!qRf#FuV2ZW$Wl+;V)8>pD;s5lr=am2#f0_X_er_7 zW|bA1)d<$nVnu=SQ#Bcrb=`Y>@zSV0a#CSB3wT7YhmI~4e3_$}u_o**%kCge<0CT}HHjKX%^(dRwQNq&uT&9~$Yi&R$x{B+6b7{%|L-dOPDm+*T{y_|cY`0g1)RjFvH?bv(c zL(MDqu$z~*wnW11*e96VSD5Fz;}*KuxAJ*ODKno$&;34&KeC}qc5!~1`ofZkgS&!m zLUbxFgl!V+=;VJ^;A%UrwTa#*k5`W% zx*xL~D8@QRSe~p{Ft;B2j|b0oXg}jyHePP=P^?+1vn=v;BYJ6VUTBXITKL&a9#vLe z6)oz?A>cU+aX14}ILiA2);)ZF{y1O$D}h4SkPLA4r^QTcEk4p~tvSNbs&N%ZFE=%n zZHs3-`b^XNHXo9?k6RAgxAS<*xO5Vy5Ah5~q3n73Oa4dox<~CWSp>oaQQx9gSFH#~ zz-*i9We>q(Yc?{(c)2kf;u-w;8N*t0To!KRMZ^6Dghgh%L*a9AA^fI2?zArFDe?MC z$wjBgntN(iu0eOG-9^H<&cHPTWn8&D?Vl-&VAD~v970z=AW1-r8s{9FR~sHep7xya zJ)_md4;sg;tT~J53N7~CV+2z*pOhSD-KJ97=#BJu$_RP$=m0$yQ!dRX%-jU5vzQF{ z4V;dSGMuI+Q*$D794ue*jRjE*SaJ7n>hF1&A{CHL$^Jcje`yb+zpA9Ks)eT5;!X)= z8SGJ8K0lQ>H3}Gq5OyQ_i%!+0ArI$lGq>`c=e{k;xEArD#^ai|zSP0(6t-8EBiH1< zjbW-G(5+reh+@DV{z`I!+&c52Iz0La#JyI&+~>)&El**b0O}J>?Tgm$QHgk+ClloK z%#xw{N<@f4hAn}Ij6YEUb-P>4 zk$Mf0J4ZNXPu07dJxDZ2Ol|F2h^Dhm7Cs|};Tvmts!rE?^kp)zEGyu*=I&9?r3Z0; zAQRqVrs_=a8Q_lpC*=(w8lzDAXU1st)gVuH5gLiQND6J$+HsOh4h#Aa=ts*d>=ZI0 zQI)^#9{)842`QBYXr*xX&S(5*)a%e?E+Q->*XTcy zMqn0dGo|yHNf{M<9bEH#UIcS5ioRkQs0}n1S>lqYOUs_5cfLEB zRId$m&*5-0v&^z%W^m&+?--czBi8|HJb4;!k=U19jUrrm~5#zW?6uiy?pHyG> z_I%%T##vxQ=Wm&l8^;qHS7mdvfHG}`?BDqv?(Ml8(g1t2`AG}e;_=^u&S10_gIaEI zniEZ&HH=%WS*E=@gTHT{fPY>!KaO_gjIJeD@x5OM==6fU+aNz!mGt4_N&gl8*%4E; zWx~cIL@Jv-Ib)QmNhGJ8Goj zFzc51E0TUGh6Qe-vP#VX>(e;K-$xs?;_)EAh`CzZs-WzO1ZN_zD6+?d&iyBrD4zfMeM_uCB=3ZGmy=B4m>A?M>CW9LBAsLeGVVCGHo z{m2hZ^`_zu$ciCP@K4j=e>(dzoof^mQl6ACS3zPwxy76##K`5sv~XOVomPW<@OHph zg_LVlga=enzF$u{Td-&_L_juG2UM>mvLUT!oe!*smBFZPzK7WKiHLO5E@^n|gp5=c zqfXSWSX!U>N}O#g4sd7K`jekO&NAhyc_>N$h|?GOuy!eDAm8sZ(~Zsa&M@zN%42qr z#uK4AWVd1==Ro<>>i*GGmq5^pAgDH^PNX>XSG_lchobbumX}M-uHIVN*W{S zI1T4yjc7`;bPip&Qn{WSSeQc6x0J1k%riIK@Ex_aoI~rZs8y)cckKZ!4iR}kdf%C4 z*c}rl_LKLxjb9@JLO3@8XU7q+D&6_-k0fYYsz^DbB}WqI-(xfH5A8#xshk-*cqyCV zSDhd^tpG*of6fB1bFE`wjx6wuV1vvWFXYv;86Ykp{ZpWi7Ksdxl(7FYlXyM?cSnvjwA$u}`n6EEM!>UEXEuuQlm9wuF zMd-`coE_KX&DtnuYLQocIv~2Mntt9Z`a5(K)CDGyCnjsSa1>{?2q%f3R82ovHJ|{N zhT#g~0)a%&o`?hqs|o8t%v57JvVJt36L)V5^=E-J+nUwvltm<^z%3eF)$KtvjaIJG zH)OcukQ+-f3&7_lIm(MJKvO@C?F)6zbat+1)k1v>B<4~`U*4s~9{{dg&eFmC)Nb=s z8<<{jJO{RBl;1>s|3A{EGz!c}Cq*ZBcxuKEo>>tSM!}-xR_l1zl*`)a7y5@?c<(s= z_cB7XN1o!hSA5TUClNX0eZNAVAnN5MQLe;WZI3G4sz&FHfDBf#-$;d$a{i`g^yw3G znX=NX-AhstQ&HuI<{+hls6Tu*ILI8-mg+=v(q-%9?rKgSH{+PyhiIp&_@%+{nb$Ahj{bibE2{a@}0( zkuH%Bdd%+Wgww)?Uu7R+<(+8l`;#LEllvN93z0?G*z$+eZ$1ZA9m^7kL+@3MW(A z)2QWzo<*+p7OCy(4f&vKi*1%g`*aJh(2CIzy?qRLWls1`@H3{+o3p146o^DKd&~`Q z(q|1-jY9nG?YGMiS5nt9(%zM^ZuWzwnO+`p&eSg{pJ~mjFgq01`*7Dg|3JyJc2UGBzew%FUY6Dq%4sqnq40c_Ep1gY*7BlgD3 z-p5OFA>xhO$)Y`}M+|4XW+(h+b*a3nYDG%HOR1doxk=6^@xx#Uh{(}-t)em+r(sPm z41l#EGRKcb^C=w1uykW;?{n|W!gOFagNOYIM6R-hoKidkn)x`F;|3G~Lpq zqKZb}W#2Gos>M69PdojJwBm0){Lzi%7(uW zDElWlQMA!SmDgv;-xxCZ1*h(AsGogi0$-f@w?@;8t8uS{x>|-6 zpcRaw6&wnwRfNzRNYEl}ZBOGT{b-Agz}oiuH+EeKzLbyxD`vNFCRuIXvkiI1LL!-< ztY6w)EobLUV|w}f-%#thWLim4u0V#YIP#m_|7(MBU7>EA%ZDJZ*=o52zr@Op19%pM zaf9XczE358Uxl1L!8%D^%qVxtH$2E#5&}n%E#o*SWmx3u{r>ZuN@$7Fe8f_mocI=# zq{$~fMlzE{%s*mxep0?OiWI&GBa?o}*iEP3cv~nT&M+UY$mZHg8n!6fR8QbQzrohB z+guIS;Z+|o_)>vq(oEgSlNR-iq5An6Fm-NaWCK~{aeP&q1Y0UXulOC4Hw}%W1yYuF zuCbf}eYa9U`B`BU?8iPnGa6iPQ`|6n%9+eNFlI=6v1mhWX?qy@DuBdJj+U``o2@Ld z%Ej7DLpw-RNL7CRiAPqO<%#m@Z7OEYa4kBQGS;Cu&G(=+b7D~=@V;rj1R6O7h3|3m z&O5f`vdT*OYZn4zI1~|12u9du=bDT?Ty6Ez#YaU2ylqG91JPZ!5x0P#{bD(~IHba$ z#jI>Qe-aZD6Df)BivmCALnKvKSNGCI3?%-bLi-O0o@?BmB?Je|$@#1XZ3qX2mY4Iu zAI1^cP{bot`fx33fLZOpH|AeH1#-^(gIVMfb#uLS=eWBTvc32HQ^ z1b)K-_XBLiUR5OHe{RP_h>Da6I`alS7Fckn_gpX0e>}Rx$Pg0be;9V=T$2!!9A@2g z*LL!BYNhjw_2b2)5JFcLCPL}PqMfS9e*^fiRXa0Mz5f4S|4o7aFR9Tbeu(-X?unB40>21`k3Z~z z;G8Hk2C;zF1xIIRv{QxPONEC)(1YS)N6>XGC{d~4cwFYq=D`0j)uz<#!fXe)vEzyd zeg{JMLK~Wm;*EG`-JpiR_5!Prs3lS^DwhASnH%U~9`r~hGdz##y$~5Su&nuC_S}@Z z3zfb_4|<#pe(rGeDIRe@+c_pUF5#Y_z{;eMie$q7jGNzmeHboP`e;!2b>qpE3zDz@ z2d_4z?*Gn{8+bi0p38cAKFoJ9rKP@H^gepv{m=i}Z3BVV_Xm~e-a^W&eDHO?<5}c1 zQU9;yz&51emLc)BxDtqmi-4^crp+KZl0@cr|K-p`*kLq#?rQz42{IAyxr}Vh?yvYi zzhbW*m?Ffgg6F>pBj{o>?oOuK8&+hpsooqAo44P?xiV`Ba4-NbBg zyde8=VI%i!hj(H)Tydq%rRce}zTqIz;VgIJ`p8~uOX*(%y2%GV9+>X%A0gW-%= zY^Ga>%+Er{Y9Cfuc~wnpf?g>ix=c34M9&vfZKsoGfK09GIR(G&(pdS}g~0Owyg?fh-l>g$Q74gk&%qS7kc;~X4iCx= zhTma}FcFI?r4S*1Y{u4E@efU`rVKF7(gw?-j1H2sZaZ5bu^s!~!-13Gr ziO|ce(-GcQSy!GN?{ZL3B*m(Y)?CK9dv;$*3`1$meOS7epq;+9wM;d-8I&N#rZ9kuYgv7|wlF{(v#>NJr zk35rGirrHrW+k@PormgDQfJzD+C_wS2DSpIzP1k;$}y|(6*Nn{QodFbGYLwsj=_Va zSKgHknr!9dF|utZtBmlRK1Nuflm!68^3Fv6;AyFtw*H83766Q?+^^sZ2APL-x5%q} zw9^|z8YV~zu$MSsh>RUiEIA6y_q0Zaul4&TI~fEYYO z7wbN~6+tg%DelGvV~Zc%J|U%;j2%~*2qq*&dzzGBu*~-Nu|>HDt&*U3Inep~$GgO7zhTQXL8G*4vv#400&R?T27<1xKxe%`j|>96p!BoMph7l27vLnS zgVEhJO`HE!{1b`~R?XM#jXpa=3&z>OnR|o!0x}=1ve$%$IkSWAS|2VqJf+|;pljX_ zR*D*<%nSxeson$cy48Q&zJWF0yRCFoe(+R7`}xp|$XB$)qD*zas*qIrROaeiLc3HL z9Mx@H|C!)prZ>cP7xZnX2Uo9IcUi}3+cvYQPR7qU_r!zT3A@I~-!@}ykQJ+togm=& zVVbj$9F+v0fY7aS=*+fKg+DRZ>=OV-*VD5|C3{3i z&lY#LjWL$XJscv5iJ6wd67_zI$NhJYm!kLaQ5|?RkkO~}&B0#Uq$pa{Z~u^($idyn zgroB5*IoMWI(yO}YwJ6)*>v53?US}ex8m=dY~lnn(CEoFF>%P0*|*{~#+7}$LK#si z{+G@2h)r7eN^gTpyT<3Y=r-4iG-o$YK|bI_yMumg5R^VjyCKKXCanX}@&5D%cW ze#brYO5nz`Tz{@N&{fv$TA85mn-$+^J{A}yEuFcCRYo^lD%XGG$+Rf>UFwUri=kkK z)O}k!pRnJ*6Wy{Jy%r9omOX<`!FU0Tue#=@Xq8Lt$!*!Gp6@c>g{S-M>`MuX=&Wqy zq<6@CG>a{s7v)ut2TY~+Wj~+sSYKn!x{y3+MxU zW3_r?*kAV#n8&DfSEx4DjUqbWK_uNIcz8&WBvnyUJGEd3s`GUYdjT~IX9@1cA* z>alaCi-^pir28ys`vl3YV?=Vv_E>B?8NsjzpV_u=;dxf*IzA^kiZ)2>%`q0V^Z;FN4(w!v|l98>LIKT}Mm+U{(jOZ_Z&ld6L;dHC; z#Xr#2tHQ%Z+1%EbU|O!G?3)$6mcC1$W9jA-Hnk4Duqpkc_yW37@X2+X)Hx<=4v!I|aiQTL= z#=>`n4AnHuL?Qp6tT61L>>-IeIA_>HppotMaNaP=!zdNKc3u3%AuyM+PIM2 z@Ga!pQN&*ymd%nQWYVkCl2b%pqc&n|@N!`8E(Tpl_78y??_8_&RoioA%{yM^>+_zJ z=XBJKwR`uu=&dg$Ews$EdMCYHLCH4TU`ND6ovN^7PH9dNVtw5>-nToxXs&rYS=ni! zd|@vy$7}Etz~8sn{&bI1TyI{zWc>PR?EL(;f`#V-0qc+xNv~kTUBy%`=KZ1kg)ZLq zq1~4MjVgB?rR9R!Q#NihY_GtCuCat7u0Zy~(~*xcu=bC(3R-EKQSD3X?@hi@*G2E& z(C1deB#-{;e9eZcDl=5r7GLmPRU{J#oV3Z;E^seEebk~=hs6jpO6(BYm5SIqt^Td1iv$N4t>;kWFEcj z7kz8fF+BWKkbONnFfbysT1saJOhp$6O!y-jY0UL&#r|G1dT2)qT-Z5YYMWItmr48< zn+v3CZgmf<7ryjvp|WVw^ykvcu&uWD_Tx0ZBO|i<<^@h>c_H_XkN2pAe-)|kz8-54JHEGi$;H?~s7j!L{Zb6&Yloed+ zPr|y<$^S<5?l8ddE7J#MY8FM-#rooak+LM``+W*tHGq^v-)Y}V3cf=?no7iXdo3^5 zyoM#Wi8j*JC4RY70A}glkOAQb-|Zrscj(3P-)>g?fj8spA9)y2V@CMwc>cOZiE5AA zKsa0Hm0_nR(z!}xe(iwsS2|?mM!d~Z)*3$Tk>cZDbssWy_mFALu57mM9Nh3-992_c z6_WdCB%@QmEaT*V=O@y=bSSBYB_nE<`#p*5oq&YcT*pZVBUVx}x3x}YPPX+?KTrdq z>32PZLAr;#O<=ngg8n7hE)(NdIz!?wlb+feH458l22e_T!ucSi1nrIOY+Q|aTHt~3Y!dr{Q7Rj2rejdKbHiCyYSy& zy%ZiC`Bd9~oydRNGuU5$`S**@LpyDE1vAS!u5)aHuEnyYbr%~SjsU!bD%CC=^C4eu zMOU)3tc!!A_VnMyjRL&+l2J8y?X_S(K%d}}ehIoO-^XRD_=f|KYTw{#tS!_sfN8XE z>4yH!#eG=#&f&bwD97B*Euz1V!E)mV{@6&OiPQ4EN3b;KwmxRs_O`x1jJuT!p?!{0 z{Hbt9v{idgxchVydiF?>MM#F+3z)U5YhvepX{XqsdDMX9`361W7WoYM|8SY?&wI^! zik0NScGE{S7LxjgJ&$h^J44;qTR=fC-;m;PY$GnabZ%WfHU4C^B&F{Ctr#O}noV&E z19Dlh?@FTCFJ+S2`SgwFa~O;X-hwz1F51sx%G5?AB?>;%HA%eCEzHQsumHRQPhCnt zC26oLjttztcZa*OVc9e_Q4)=)13Uv`fjOoA`!z`JL5)UbWBayGTwbB0go9aDVUy?S z%8lfwW_24|TS9*vCO5*%tE=b87y`;a(J_Kec?3$}NVWWi9~#fJu32&Y=y_um%@o=Ha=0&Ym6CLoso*4 zg0Qejz7m9(d5dOaJ*9|Mxk@r_?d#*l|5@mBQ0@gO)c;jynv=Cz{#`(v`Bh)vB404r z_F38Kif|-Vj^d;+0J84AP-gi1kLXHnyNaJr%3c90n*w+jl(S8v5_XCLmQB_?P22`Z8)&K7^RtDYno(MlU#qR9_E%9dz>=ruH?=6Sv^TnSZ!vh=U zuu~ugorK=wL0OC50ukGm;zvKueDJ^xk~3z%8Y+O36^|7sM`AoTAEbp7`WJEn#w`HH*F zPiP1tbLS`A7~}OwS{mQlk$|`ghN<&Fn9sle6To#NdJM&9G3P|jREdzN3mop0GXJVl z@bL%>%qx(%0()6nI!(=wOEReK1X!>-NOd^TuoU>-V$(JD4NdNbpOeVetcu=-;uC%* z{$Yi_*0ZV9Va4CNXiqlR@w%p<;_5Wbd5`=w=r@=}v(D9cqojrG#dUVly@V z!R5xwO$VKevX#cA9miIt?ycL^Q^8)eJMrMf)5pW(Aj7Na>mcDk=Tvz+|53n%P1ES7 zM=xlesA#<@+K=k-WoXd?B!{I!o(|(*%*rIyH=yx(xE57btbK>1M7FSH09iaC@6*r> zX}15B(^$P|;eLC+vXRLXhTo9gf=1t~TuePfcJz?K@QBD`ea+RwQ~pN^3Vo(g&2P&G z3o}+;BxuN;&>_XPHQ5|(yx}zK#7{38u0Pk3+h}{d4${39Wb)4(%zTLw4QIq6yrkXK z&ukyq+GJ7A1bWbwNdvlM=tp=^_o&!{Jre0V7lg9VUqCG#nMOCErgqS`NW?pSS0spL z@Muh5a`>`qal#9<*_9h;5aUEQE`>~aH$ImL31W{<*6H567v42w7yJg*;A6*K$SH-> z#%txGPS9>vw7afYWFMigV_ABk&6QUel!o}cI)%1$&UD{x&!bG`G)j;bydfwucIos5 zzpWo9hqmAQR$9HgsD{}S%rz>#uDgeQ-y!nN5P=E*)ho+t`~p&%CLG@Ox%~mKH02oO zEy6)_a3p=i{eeFVdI^`}r{eCHTD@{@p0E6GiX?@>b0{(q;r(0JM>A=9B`UGI^i<2j z_HKxmqi+TdwW#-l3=GVw-R>E*%J)p!LG#`|!_t)Ir<(N1lipwPQC7KsFh8b_Q`tZu zw<~U0*g;2<9ikG7=j;7)Z9t^Wl2G{Zv|aSw!tu$%4mqTLzxJ-ECAhKyhIN>=ARb@4 zBYLwK%r2c{GOhzHH_mDocl3_)GVSzQyuK%Z)wd_RLkA^+(+YRZCBRyQ$imfPK#rlw z0s(lL>%+w0V|%hgv%B2|vX1!;=XupX;jGP&bv~2zKW1nG7b7DLxEVW@ZobaX-tqg` zC#9co(0_$SD`I;N6E39|zCt%(MK3w_)m-J?NVdF0RTdO90#1Ikz81hkDFzhsuGs5| z*18WKUw_^bEVJ47t3dpyHpku>nqBS$2YF*0!4z4F%X{1SN39;)2TXPpHYhU7ubaQw z=-#|I5fGAs?C+xqrdPTg-3z>U;ZSB>yLt1@=0i?aQ=f|`N~S0n8~bkirlP7%M@po# zVPyKGy;~RiZ6-~I#r_zb2XZ6l6NgJ_6t7hG2_v+_j`ia&K@2kP(j}-``PUyF>-1;0>;e^gwtE_?E z*MDPSiRj|PS%O;l?1Cd|v1dQ(w;%N@)M5p5B07Fk*+3l)nTwDwqDu`gT$5i+4FI=uMHnU8a@!6t0kk2!VIoz#|Lf>-8FCyj)QJcmm%L%ehE^357+o``i=$jua}Z0drDtb3U}$L zZF!SfGF6gPutwDJe}j~gR6Nf9BoLf81bqt+59wA*M*Wcbv+L3&B0Rj{aq(migQ7!x ze5qenhdpvPcb3TWSGM=zA?po82e!a}B=)G@-si42Q<$vR15Fu+ zpPU53o|?e+`IGC1lgzywqjWp&$fcXo!j-agb7^c!X4uTu)`^hLKKY4>KDtiR`IxY; zy;(-FNBt<5;E&L@bFksVsvn&Qq+s6u?L8Oq9m8nPA$JTupuVb)&@?!)`fP28`!l|> z`vt}iEu)eOxIdhvcJc@QR3X(Nxs{G0-IM!Q@hxBIP=T7KzKyBIZ+z+R)a zw!S~mj*9RwWYpj|6dV$KR}^G;pWQn|ki$_VH(^_m9cy6Hnu7Jg!Db z^+;BaW0tvg=p8^@*4JT3ns_Qm7@@s5)#&7GdhjD4Uum&0X#YN+oU9V0I`J`U(l5*M zqx6gh3{Vm3nv(`Akrsz`)?(;dwvK@!a}JDy>G7w7lj9KA-x{MOXk(`}XU}o;jEpv$ zcXF9CRcbmqg64XO6URe951Z_NW8c|go?o(rfh(bOvrX_7Ya5##eI@L#D!-*)j_mH@ z?5MwZy`U7d#g)!yYU9i}`td8n^z8WQ03|bD?vaGP>!y`{bH~6RuAleVqTYpP^9HR! zsr?_`J;isc*lRXyJtzO6#mTGsG+EEs+S4<2S|vh-jUjnZhUn{HSxueDc+Wc5qn3Tp zz0E;EP3?Tg?b9uPci|^aw@yYhM zKhNeu^y9+xNhgT0Lelq}^F|^+OyCSe&Q(~g!S73SaGVB0ianoFPqy*P<0BlXi zIj~oyj!z%Q&Clz=q#EE4<)GB|iVvQquQuDElI1+PZ4U3Zer*9F&R+pnR;mVEBO7as z=h-cX_u{|@AK=Dxx%SAm;?=KijE%qF2WLJ&eG!j0;xxYo0ym#uqaYPO-P0n`oAa!I zoh@NPe+CfMnIOe2coF$AC)^a=!mu4PURB8|c%}ww?TTr=-qMG!;K)&!yPb&}iJ)gM zJ2*ukBiM9SF6OukEEsCpqL6T}P^71u=zpyCdB6222j3-o8OW;}x;3-V5O_kMB-~-Y zJScMFbuCkb`WVXcYQNJz5T&&(r+pah!^#}c8=i*xcrQN^v;nS@=mcO{mg2@3@V$g) zA$zMVczc?9s+7;Z`z1wM{`s-6f6g$j^KS0Zru?O2>4?y;U|yOeF@F}ieOMX7IJgX; zcC<2c40zgDVD5heO|3RFWRJWyDV5zk{U=V#bdED@LS^d{KmgvkdwXyjlq?eX_;^`< z{ND84U$Ri=VA7PzPUMV>2MYroi20Xq+K!S?VeGhK)BCDdY90Qsd;)}P3B53TM~`LB zOg8L2;cDQOYgL$JGI44c>QU(N=;BQl<^&g&MrZqS)peG+ThXkQEq*J=++i-2%uY(7 z^&-{uyit6TovYI@Zt0lTNR7p;>gizmathJc$hdzcm=elf*|75^V(6!4Y6WHP z?R|J#G9~%8zpI`Hd zPH}NoB@^wwfb~ICjQrg_ExJC*CLh z3F3Z>?dPqcRm;9;{dq0#oHs-apF@pEO3qrQK0l6HRuIzooSH^YP@=L~`&_+gvJO8$~&jGQDa=dcB^T{wY7nd4RrjWmt|0 zp{~L@&|~VN!~?GzU|TZwbX(x$?29U+^f?k+;>TY_hkQQcV&~7`U{VW1H)Ce94GX-qxXN|XLj;bf(h!WYiNN906`A~GGM+VLRHAo5gGR%BBL3RhEn;jG70)lAx6T72nzkn@r+yqdxVBy`NO0T=n;yKwdva7=KvWZaS zcJD>v@kA(_V*0XfgCbrII#hDYALkjbL+g3LB6@hNq~@yc zHnmq)`BFP$lrgYqTjMY#44r|?`>%LR*X(AE1yk!nCYd;iUM<0{y>cP<{<|5=f;e9E z2GWLwf%>eL1_*z9N#??8XnUO!`P_YE+Z{AvhVX0T*Z&?CAgtEGyYcEEs_ZVtWV<2s z^CIb6s>C15!IaGkE%k&LOm4q&S=6&AB-yPz;q#DFGQqVpHab{+OBbP}L5HVChj;t_ zF3vAO1VwGryqfIsJS9AAo}1owSk`x=WmQBz9u?b&kEw^TFA2}sEm2+mbCuN@I(89` zzOaQj_j?>3b4rUZ#QheD0mbsw+C3PHl#tcuFV%akIFNhP@w>;Ioz&;b z$wimv$ksXWxy}8PR7VOMG4UO~T`Hv=JYWL8aCE>DvalyARy?@k;Ss2CrSS5hqLSG3 zk*tL{k90kqme%2vD;m{-iJx4VIJBQR9zalWHSCK<(`$jdBvFBm+p0>IcIp&$0PeaZ zA|atp|4MSFnfQ9JCY7%`vvw0r834qMNxMz!cYdZ<>su2Z68Acn0R+Kg;ELFEjm}5b z7d#Ch?4$^QZuD6id-Rjidi%M=Dr>uNyhXO7NfNHw5^c(5BzG5Gqe`O7b~BC_#j2d` zJvrYyKld?-A_cp|;=QMx713)|Y);Ye1IbRu)s-e z74pjf4zX~k#ae(KZ130?P)9wx*FVW9F2shRA^*< zeAqzHe~IQBRiDd<;{V}2RG+^-P@}fP#IJtLcSn%$n%=Sq(ulXTu%A)Vy|WK&t(AAG ze`6WBsz7J5Y3?#qfA+}4%00uG77CupdkB`oicG3C?kGHS!jF(I#)CAGJ!pl|If_ZW z-+a&EmH0wW=$tO=j-;_6#?f(Wz}cxLao+6Un$5ToIH5K2n?~=@rVFN)yZAoZuxazl zn+H`FCVp3to6%NT^~w8QkQ`NX>JIl$$%GMKT6nU?(q2fGP8R zLIAwcMtlVmHTKUNVpwP7mbh&uaV3K{Dq+>pqx?r^hlSL%L^l8~TXufX;RGatA<3bCL}bRQbN#0tWXRj|UZ zaCJT77wYLHiILT9RPYAI%BMq;&qxjq8V9%4&Nl6PYnNTPkJnqtDU^SEa;1-U8^i#I z9T+y3dCTHlj%u2Bu>!0kWL`y?cz3ESOWNJw*R??XQ&;U9gJ&;ZvvO+O z%$bQP^QMr}=74Ia^}NeL?-jAk7Ss7iM7jS*DfBJ02SVQ~Cf)aStM!rd*ONRJo>N_a zbDx;MU5G0(wlEt{;;pZoxV5=f>uQjqGV=;jDPNb&bBLm?iYElGE4b3)o#709xMb#! zh}w`hm#^~uY+4%tlhU7Z;a)b z$TuWoSb5AtIY`PB3q3WZJ905s=_YAIV0^= zeLpP^gaEew$X4)FauNw{t$JF~@<+5?;WoH04H(p9iINM?G^e(Y4tXUW1tgYpyw!`v zxgh8t$g^vyHi;eZNMvAe=IMwd6?Rpzn;&x`F1H0_iRL$^|g|SR69FjbexdB zWpvg?!!sWwNn>)fi*^iicv?Y9azQIG+ka-vM2klHK9+%AHabcZ@RQ6yr6Y{T!}2^L zE{^G-QP6Q|X*)TqaWK0fTkLngWjkHR&Yz9#GYJZaJSC)?@I^st_IW*_)7|`+Up%BT zudqo!aN$fVVSdo{h<{!6yW^(aFTp$g+`gY6>7vL>p^F7xbQouOp$@eVwH^w`KMs}EiQNPV(v-lSZ)l5s z&Us^7TKlA=-W(s?lzO3SBOyXCBNn1nkpt{Ght3dvqhk_&JJW|Fr%WVUt+yUJY9eLA za}IT;Wp707oMrEc$_J30b$PD&2v=+ny{HTc;dw)rPXsY2 zJNbGe`6XU%7uFOX85!fybtCbhCtN}CgCa7c+Ui`=vAiXqYHt;Gy0xO}w@!EF6LKIX zVho7C^14W&Ux)Dq4KkC*nA1jbe2G%IlS;95mlpx2p(D(rEQc=m^ZEPU5@(!`VB(9U zZaiHT;Wek!u?egz*)eTD+Y+T2we}gRzS|bis$W#sGA;(&RtXUH{mJ+>R~q}i^buJ; zWuXzeKwEE_O8GSsS;ih2LHuWcP>+iBn$b33Yg+JZ%c3Q^O*Yp*x zhBphCk`~f!KoJuvhYERXW<(1q2vN5)t}(=PYYs6qL_Ij5N&~X#8NF?GR{uplZdn=*RHF16_!Dog!)mI#u`k$!CGIzb!0tO zA^)tR#+dA-v9&wQFXU-3wbgs6#dZiMSz7m5^obkjWoC>>(~dMly96Q>J4r~D3kDWIXnFRW4hRlRdQP7X4V53m=feD^?P3X zUWXp^b)9kCyOy0;B4H!G+q*kKsjr8`xc{qs5RL5yTdo$ytla-R|=OjjY49As@ z(a}?(Wn+MvZ@8N;D5+lrRoRtd93Zic?cvF~d;Ep;B9H`-zLLwE1R>4dJ=&hCc2>J& z*^Tc%&A}mk_Tq*)KTKJ>Sy~Sp(j@=>ZaE;dRj~NSmL#xU#G`f9@1l%c&RRBhNF~I0 zyx6mNjO-WTCR9-$~#>YMx6?4eD2H$@^9F;!yb2j`Ms3(~~`>jBI2&#Dtj z+{)W$hZ5SofWU-=fK6uUsp=2^8q&BwPhG}vuH{-f*9qOv|wm^P~~IV!sLrS^FI%%noZo`6N(f=5vwe>?s8{ro4Cg!W}jgx7IXL+das*EeFvOZTPPuDcDT zf@WnCK9`1k;SMe(LElBS#tBM|T(5d+dSQdpOCzuusBFph~F`m-de{kWt6SIWY2j&sCDE)ak+l*6-Enc{)~W zZYmvtw71%CZ5kW7Z50mrPEHSz`ODKlVV}KzirZDZ@fu`)ZN(=$Rzz4s$#)KWy)M0r z_3DaG87o^7{?mlC=h-u*M1qF>bCDokAk)y!2}@o-zMj%b zk{S6aWr?<(LWcJPzi)~Wc@ROFI0AJz{0Ho&xt5dmge-y|U6J9f;znH-gF+jByMn;b zMDm-#8jPJnCe`&qiI^-e`JWyF0<%LkJ$+o#2oJhsK@8U6%P~vU~6D+yCIH zE<1Iq&h;;rQ*_T2`PUfn`vx@c8eO#KmRsKjGoi-^8xIQo)gy4D$WtavV1_dHvNXF} z2CP)Mu{*C!)APvu+~w2q^O5gK+`u=`)O4vBdY-ThKHnN*@Cj?{{c%i%x}m5LIl3^J zpPyv65)=CA%sbJ5E0VTUFL4-H8VI~4V=u(bdLLrtC&Pt!pnB@=pnugzu#sRC6l>6WXd)v?a@_}cV_ zO9?{~xTjr$g$luJ%F;K4YuT!MeM7e$tlSQYP;1iL_whgdLxOMGA?kSke#Ij%hXW$o zFpc(4tt?HsLOWFm%*o=)UZs{ z@{_3jJuMTc%YDx8?bS?p@b!OINb8?*t<@z0L{QXXfjDOCGq=$d$p#Y6+o@PugqM+n zSW#4(9A>~;HEa9#dz@D!#Wa@RgK>wP3mW3xK3^Gx)>`DyI0a>(D2FxKobgLAk;@|n zVM4!;F65#BcYtywr9NQ&;?KdW*BY9U-4g3nD^Cdkmt8ugHE#E$p8JsoL<^4+eyNjKrV8O`K4Z!{5)kj{P(^B*IfH8^RIFdN>hx%)j0q5MGHJ+#VbW!T;iKQld~_ z2oFZstph6<&mjD%2V3f&$Mp|rVZ`Z02;5EI{pAAqtQd6X`+uy5bKz853D3>QY)0D* zOGa7tmF0B@4ok6by|KKqUr5|?Hv2}bT(p7w}R#v>4} zStq?uhXCta&Y}Dr0%Q^uaVX!*kY&0%rN&0q%9E7hHquxA9LZ7R(3zr!njG&2!5r?z zfRrLoQ^t-SRa}o}3Z6giy?*}DohNu(3!5msP3N`Nuv11h!{h{qQ>z7Ug zoP3kmF{q0(#sGP2kf8D3UtZlm7z1~D^EH2lEL|AlMu0zJ0wviE+kMh&)_g;1RuAqR zblIk7EmRcI_reA%2O(T5#?tD*RZH-l^+Xd{BtHdxui!84NW|}G*0I{_GkbmRuHy@i z3|-vua(;G&1q4c#}zRpa=AJG%?BpOSAgZ1 zkZ+y(9)U<#<9XqCsMy;JUsvCJRq=~X@V_d4`lswtU%#wN@QeLzdrbxW&j1?F#*@73 zolkcIEc(tH{B+;4TR*#9zsYFeRS4fI6U2jTMmNuBHmbl(R6@t=Uj{?y0T?48cx#iu zcV_Jp8qR@?QL7U~A7u#<@bC5xP=hs(6!^)^<;oT$Fh*6}hPPr++a)D@@> zjeBVDH*piM_I6mYPJF;I>^Eh)P^%9iU-j#Aisaa&;oKuaSrjK=Wt;TyB&FC<9Pt>H z96tHYu5F^+CkWcZ0km}Pi<20%D-fq{BE$BVLp3A~HF+K_I>WSoH(QhGNC3uBFK@TUMN?JF=!4ObX-K4Ty`_c&{brQk?Q8#RFHB3r_-4}OC&m`M;EO!ihykb@#h3@<{{DcEnEU6sNVJ+aJS#T z*SDQk_SpZ@P-WiQmJf^&%f2U3Vn9pq!>k?Iz;-^%fOpfcpBlJut`!TJ`hhuDnD5jh zqH;75`<8LZ9ZJY4V_#S_$*+8*px0|}&mT;tMArg>cpXTbkGePp`k+B%RJ6B<6y)qq ziIyu&Xi|f6GBLx*V3Y~vx5t~e`F@5I(X74RIrak*kX;AydYNQQRpUdA-v*k^sRb7D zMf2F}%zQ1{AR27GG7-1ITSp%3$L!Gwk(abhl`6Ts2*`{%Oatz^5$4MWx2e2gXob*#*Poc4icw+ou6TSe#Bxn^m=}~lnapv;MEv9p) z7-2rMwJc|y*>(axnbj)3+md-0%v3&OX@e4~Zs07}PN;Ej7nTaHyz%BaA*`38 z8h8SBlQsVqBSpu12?HE$$9gm0Qpw<=(TVfxzi@W@bUAe?*nKJJdmdvn;G-gc@YC)U zjPU{<=BxkDQr`8$@VKmv?>yC9z;bn?W5JnPHBSLg4P;H|A|xq_AZapcgm+iwUs{&r zSfS46`1crkI`=)dPN9t2yEk^9xgG7TJ})aaoXdXLoj|rD76{AWv;gj3WI?GgP>CRZ zzxBKx@2s=d)dX;1q<$WsUf(nH7@xlFumx!JmP=UcS?fj9n^`;gD^m#`SLx|DCFwvl!18k8<_2hA&Q z%%8s%Vrq%YSb=C8Uv7VUvo9q4=?(=$1R;U>+0~{du>45fiT1mj4K&y^AgU#Nk!F z^C9M~-47uUtq!MO>|`K^710tH^H-u7gFOxBmDRVxEOcJj(jmg(kD7!e6{Drer0YG) z$$1vRF?S@0%Qs!d(b;ZzUvg8A*Rbc?)}V9m=_HGSsM+Vf+#^nWhsjeus~oSDA8;Wa zK;9rP4oxI#3#Nq6aPZeHIn&xs#b8Qcdg#u;n4fvg_+3;;WXbrYUl|$v>Aj$NYWpRI zagqJ!_gg76;9pK0$-fD4iv)NIOL>bOLrWP@kRp-M%|6FZ`oJS(N?6Ji3??8kG9Sic zF)$s!se}6s1U8D$Mw>eKgaHO2MIy$96cu2&dO|&oKavGwGP1!_@-{LO`v-0b)v#U8 zn4UB=4jww_ao!z3doOGxGmuJdX+#ae4lB zT^&|ofA4$>f}fT89lM1QJF7e52cAiD#z%wfLf*%l>eBX0&`z_A#zaS{h|_9LfrpKz zLJE@XW}L{&)muhJs~10soA1(#eYTm7L*WFwHUwa*31f!XG0@~lj+8)xmJ-c_>7hKq z**+bx;{Z+qYdGKT!Em&%qq5e1?%%fgKY>rm`4?6Zm$sH#y82Jzi!che3BZCwY3ro9w^TzlQ%CZcp)rDU8!=%S`0 z1{>e6a`VAiCGE#jFenQJ7X#q43j2q+vjm7B^p+Jo>MmmC@Do3;o(Z8#3E8>S8!QjR zV0kerT={^K@&jDSYBJnZlUtGnNO^x4@V`_fR2_qYL-L0-xr79Rw_BiA^43@`oVytEU&ptBBEEAAvR#zJ2pDXfoifAN3-Z`L__z&`{xk zdm=OFprniQ-08cI=pCI)X5TyY0zXCXfr4Ky&MIxLU+?GC>%oT~vMoYUJ);8;gLW48VivYO=ef?<2Qe0TBVdZu!M0Zfgt68$)ZPtZsRO-V-s+7-yte@AyGH z%@C|2HVAS|<8C!^YwL;&8z~jV^zmtJGMoj;O!lp_!IXD^q_uHIcRpcWF;lhVPkUX^ zWR#L18_sY5G0f(dxKDuapdB`Fg9{ad!zsTme|@hBJ9Q9#Z(M}+N^-IU2NNigiiYgN zY~X;;KpmT?NROV0b4__hQabeEg3Y}BINzEWmhP@n*}*bcO`i0wL+*hD-+tWVm4KhHI~ zKpck&t(>|B$5Tzwq!7Ov{A+Tw0Wjt(Yqu&F0uGMCMp-!qdTa%v3EQO|;|`-tbtgQR z!f+IO<#?x_o3G_yG#N3SxH|BNTFd%fn+p+N1rYX#`sH7aHbexyxI#F%MVnIU#V=}y zdj~Ice|U&p8;So1g983J_Ao}!brz4xUkIjH+DWi<>&zXUTTHtA+FbL z7W_DbezVK^O_Cc(Ids{{m_OwTDd@tp3N_Cr#TjA;u^P@EoVRzh*1=?5^UjxlNE{ZE z7i|-c=JQPsptmebGWK>&^D%Mt&Gj7t!)O!$3Id|hD*CHc_@Pi&sC8QXl>G$3({)j? z$4H%FTgNCL%Y^ABCXZ?yGgC{wre$U|j*8^zug&7)sTm8qid=*!=AWD>5>r+hax5;GsKnPzBNBpwWFD`8}T&3;!R(NGc6>mY6*OV*ckJAW0mq5;J6rczUY&d zIlG9ehUcx+$T^P@3VjG&dHnjM&h`-qr@{_$50Aq1>-k9EaQ|^^ojf74Rv(+5iyceN zT-t)w|jNB>8+t~Jq+Lxhb?jKn=x&9&mXX`R4DQX`*J zKr~S{2l)Z1AXJ{k_I-MpBOOm`N1p{&#TwOuWp`upcocsUv2vne1}_1}>?+^BHTN+M8*9YXu2)i6K&# zBJCpW-DyTrPro%Vlo^WU-7hN2j_#={Johx1thkVlzUcm0=<`4%c^Nq_8wKKKE*nW% zy{4ki=#3Gy7}TQ1hjP`W6=oR|^w39`XKQso;NCUl5aQTlt=L9bu2|0GO`0_#}7nBmMgOM;gYT~0<|Gjdp^*_8+<^ET>$e>o&1)k?lh&Xro>j`mKZU-o*>1hCmobSUQ0x$Dq zI0UZC%7_IFj}j6WN!GdvtnnH1Ji`7kt1y}w@FS3ABVu}C)&*P&LRZ_eJvq@D`6i^m z>`fVQoVE!Tc+>qY0(VmKpQrg>>}gSO%IL8cuJ@Pf!^#(KEY;4Cza`#}5>i=ct3`{)S9nxdx&TJDWC2vwQ<3!Vqq)&} zjq0`a&33|Wx~SGp2_Ip<_FzllX8&mEBpL8BeINTAC;)1dm_#DRNNOCcGL}lFEy5p4 zL`<_$8w$tbb#U?;NKeY-twt!sEi`u`q*B+R9;Z)DxtUN8)Ol{y@3MLqD@ca~s z6z1C#gL6iPJtMtFUxy`1KbIrz25z(}OK!U0ZGC=ze(oRA^00Xo-#wep&BKkPCSBV-|?32^fjOz&~~y-jOw ztxMP{5u##aWqIO%_UOlkL$SiR9!s1IT&Q7thn(Q9K9Y(y@p+EXx2F{U)@-3t*HXL`KCDMa2a^ z`zj}ntI22z{(QL(jZ1pGHEAYG0_qTqR9iH!mO2susqEX)EJ#QAE-+;S<1)FF+YzON zD_}-bf6WXBxpkMRYNHY_abeD-@d3@Au_h_ME4y8G;-|sU_w5Voe3|4m{{TO)iYenv z9&G8A@lBkt(Rh$eh*mxl%~U2awjp#5Mo4BP$~a1fa!|ZnUX3OzF$YVbFjZl7mJfVxfeG^eOhm#u#%88qMn|VZrVl))wjLhl(vL6X;YR6eh8bLk(yn8Bovj@ zM4B>VraYbklW|=MP-gg)M=Ik^4nNE)PH?rN{UX;rtAsaXtCrul4@9%*K&Fbsv-eFyD(r=55cb>An$oy_&G{yhob~GBSfC@z7R*4mBmG2V^PZ!86R^y<%;VTx zfQNlqN+1tf@%spWWn5{C@UN1CDxw~68Z_;}UlS*&0#yKYu_>`sTjmPI)5(aFGoA^q zXp_9KXk29j$%L*0aaf}9sEbK16q$*xz2+JIy}I=9)Y2MAsDWzMwMDRaA4!Uv=8iv@ zF~rjcyCM~q4=^VqyzQhme1)n9xN@v$duQtPX=?qn@ISQrH%{jlfo)0`|JR-{E1|cG2?x=~ffUx~6^N#Ay|OI5 zqsaSA>HtDMKFFoC_!?CX&U-+8fGZT(Q8;qM=b3ZWv?PEoJ zxPE9ez{XVw=-WiqvF!#iWzG`CCHV5Kn(!Za`8$r;p92ka*axr}hpx@5>N&iJ2~ioT zzH^68lb6;4s5V1~;HwCiQ{>@28VT?vYa=^sb)k`OnA`I ztZ)p`(U`f#w-T&<*3;kEpl4!3O-wWJQez9OM@L5O0uw>g_Lm4C<48aCFFk+e(}d`>s`31J;6B=nMI>AJ7uuM&7~?bw^LOw$${@e;g`@% zKq0-14^~x`Q7;m({7%OQB8uZOSCcd6Y^!}IHW;X=5R7dzjP^c^o-s$Dz(lbR6Ga*b z<)n3m-eL>tnQPI0*pr)Tsa!m}D&I0E?kS#V+A-(t$;^YY>aG!nt@OzH-zM78@(=r` z_uRtCzZ+N8wtI+#I&*SgzkF4H$MFSM z4F;6iSI+S}gX|AgvtjqtZFln&F)snIEPR1Q`xHapr)=;Z4s@Ha5F2J6|Az>(AE9ih zD(LK>gXxA)UK*2YoE~46mimM( z)?&$2jB}xp)c=ua$wM|EFqTBKDZ>JufStpR(VTk#nUg$wIA#L{Sh+RLKqXTz5isk^ z+xm5-LtX+C2#`LrILrhy7a}Y0o9`08?Va}4wFl-}^k3)lKMJDUq@jHq3?8iN6~s^+ z4b(@bi63w-AT!PYNV;61@aexdsHsBWTeIii4>ciNOp9EURiu)Pj-3UgQNs;DTSGf* zYpZoke=Dv?{x!p+;ER-~OW5?8eLB;McU3Zy>8&ekU=n7KZfg`XHOCyxhFM9)K1{!f z(f`9_=w9vrTWiy`5e@`oz1s@G`$?e5Wh4}^Sh?#upPy5U-(f$J>FTIHk=NhH*=UHuv5#*2bE3^Sw!fUH-z!^Pe1;jTR&E z$=LWZ18?0!2c!NnlJ7Q5Uc%}1XRoo{KRG~@Q3h-BA3@4`=*h(Z93EVk{T@6Cpp!^# z8lV^{7dP4>tA(aiHXujN+4#>o^-ky)W?f2Y%0#*3Kt)z{irlwDW@gfLDZ1w*eMBSB z(q(wp(&kG+KL4%8lB95t3-{(fqste9372-u!aO#der+sOT?&#$1Gx zu}z-~(4FHye3>M2_(a_0wtV^h`_?XM^$Adtz9V3`f%i5vySX*MX`FNY<-M2dcp#O`tjEZdLI4TnyxV_C-V>+xf^6ob6$_?YY3V%wNPwQH{T{x@;nR z+>jox6A5|_m%-t^z+$083_@?ABk?=Vtm#%*|5Ud{^^-68l0A4vzK^XwPCGN}sS=8!G_dUFO7=}SqAUC{RYg;wTqwOPnuf%W5^KFlt7#^_@fzi zV;x~8xHs^G!{be1Zp8~j?C4l=!fPCsQK5-ywCz2FySUjnnN*Wz)xA03U$V&n14Hy&QQL)^O!f${;#V)Y6N$NC17hlh5PqybsjIs}%_i*)+u zi+mHQxu0iSOtr+6;Qu3ILDX#cJliAwVP*aMJrfG??BBgHM_PBk5@<@P>>o?Uet=RDR~YzE<_`@$8mlzEA_DZ8kK>!#)hAN%nnN#DuiIEKT< zrr0cC6Ucj!mWk9%O~E5^qZ-Qw`OdM(+uuDdKo#Hys3jd zg{-Z-f}uWsB1gVazL@b5bC_ANW8RW$618UOM>AQZ246K~S~JZa%7sS89_1_;s^EiP z_U^D()I-2rwH%+8PZFpNsG4kykpS)96dONdJYYl9&Jty9gYH@HYWV7^!5hT&?RWH| zubj2jox8L(fpvRd7=WlORIHbxm7HH)2C6*671u2ic@wA{3l^ra>P~Lb5=%Rx%TEf=P)O>=94N zW@jbzIL;|_yVAf!;3`X^iLAc0);wICUzig33O8r9HGZ0jpa);=ps%k0|KEfHV#-o= z=lHt!>f0Xi;ms<~1lNwQm)+f~lK&F}50R~4GSazugrC>=0#4=HVMx6+FZeXLsO9TY zjBBywLkNn0M5e^fa*hNW60AhKc6n*vEghXbKTg$KP%|SNy{;Q>w#Us5JZ}3>sB~X^ zk`F-8HR)PIB}T$|1rP946FF4nDHkGftTW>w@C!QM7wk~R zq`|Frd8?}c!4r|Oon&`*D;_%=OOz*j_y#*x&->m_-$b39r(yR#rVVkA$h8)yKueV50L&z4 zLR|TTQqA-ii>d?pymc@`*l-iV@vkM8n|$|Se4dU~M5bP+)^fS!l7UG6uir@g-|iN; zxoNravV}mwW3nXBFi2pDsG8sRKswd~B`DH?EFSTLlc+>~1qp2zk^LB)>N_DIO$)sG zZ^R+JicqHAX@?`R{r&xOo6^5OUT=7*Y@y_dvB2$7RX$8|Ki`;%mUx42F4yuhopHe3 zj~0Dz`vwL<{yk}bbZRi1o`#}2Z6(b;I+`k9xc>pBo{@0QtH_?_Uzf`N`Mdo4Jwy4; z`vx{c9u0KiR@sEZC3AzBJ{hs`JH%@;S)E0@hP%e=#O-Mzylh-D2fVhPnRY}tqI^@5 zcTG1d`D@e7yz-mG2(f ztOT1&FzG#~#g<_u2&Stk2rN2?_!M!})dVyyTC4&H0T#nKc*Gbj&eUV*%z&AL1*9@z z|CdtAz~vxY1edDt9>H;b2cC&+oFZIVz+A9SZ{6`vNM|`x-kKNitN>%B+hekGO}l1Z zNWiFW(H`-0l_sOd zCygL-k(BaKimqzVJp$0$GFMkH`fMP;Z)@rxwP`vfXvQPxCXxO|gB8 zkaAct2pCYs30rxq^6y0If14tz<=b@>Q{1u9^UKYaqo>dCk6cq*^R&=+U0~H5(lclg zL>dP=I-cR^1?e#NK5{`@21H1 zMo0wed2;$x^V9OW5Zx6C#?%Yei70q+(d?q&X=10FBAdN3DHvp0)PlB1I+IyPiS>u` z=uykpmEDP-v_&yf&cu(f&Ux(N%ucznHjg6K zUqy0_!$rfFpn*yDvU}dJ4W!qTsr<9i{8AE8tk{v@$I4slOya?!0i?v!8^%>&I?EM4Tq)v z_R{b2YFmTX@!hY-aIZUL7((sZoRyN2;?ZWxVMDRgurA@xCIzT4G&H)ABG8jx5TS!9t?YeTk7K3o9!+9U5^=!5jTPPj|!lXXhyqR|X*mP``u0 z>$>F--RUt-2y%V5T?WKs^;N+0b&_|dI!&CnCg3;I{&(0O0}XVIYmtioA;SFELRVIb zVy*vHf6mYPs5(Db+|E$W+fznN(^T}qQ^$AQf; zA%712%aZDsodSFU@Z7R8$7%cf?K%t~l)A85!*wq4I38bpc(3 zH9(N;9M_8IvJ=EysoB;#D;yIDFO?pt^d{GdaK^q_R6xPxcOw~biB^WLfG`5@+Orcm z22k>$N7gMcmqNozz(70E%_u|4wG{g>HZFJ0GTWRqjG_6Nf@+d2bJ2)4!%{*UD+~UK z{evp%kJyUMaTCuDt*nLVUDQj(0Q=1Cmj#3Y6 z*Pb|8-FCv=cA%Hr?*U1r(BKGI$oe(ukM{)+W~%>#ApUg`jesK5$Bp(Hi1+h1|9yoX z+%)jlNN^5l+_jVB?y4dEX_d8ml2w#itlJI z*Z`QKmE!s$hSE|_7TVkcOL5C=q+Ex-w0wLgOAVV4Mq|F4J_9wvLGzAtkmMVT6q+Jd zEI7<<*?FyIUn?Fnj<%=Lap5YDRv;J;wx8lPs(+D;zpn8T zx{4n8GTENKg%=)J?YxOt#cs^Omzpc?|#rbW$niku?M686ifc~7_o3nu8F2aMgR>WctHkU$$poq{La-fANr>0Cgw;|_T1<@iZVYdC*CmxJp z`5yKWOQ@6gVPWPD9wHDLDc>WdGczj(N(@X1Y{AL5GFg^ zIlz;X!D*hT>*&_w`+al3YdIZUd}mR71FQqd+Z69HBF9eSS>fCN6}wC!sC+MZb+UUa z?ADJDe!pV*EvmywUUx20ULGJ_8@yV#NjIVXo#FL^K8OYg&&Y3c*O+w0_9UAeO#b3e zRMN<;h_*v5>nYDruuDya5JA$s+bhzQFCcY_(#+8WYZgA8r6a|;#_`8bR7AX=*|E7W z&3eiM@oByQc$N0KHq?Vm6^so-38^Lp2fT7_2aG)->4BNzuE$vG$gmJ=Uw((gd9&$t zYiS#%Ij^b8xX%U!aq-pXpFfqL1!ol5$S6>Rx@Q8WJsLm3P%yRMx zyCLz{ODb;{3kqN%}2atlPL$g&k<>w zXzSEjYiItJKn&`0MI9%vMXyg2oXTXvMMByRagh2j`tSwD*-u5Hm^cTmYBI0`AvPlpHML{4JmP;F(n>Bi4D4Ui9z z-&OaI%eKM(Oz##il#RKI3?q#qfh*h&bw$_WC{fIWx6hhp;3P|%qPHl!9qZXTX|W;& zDfW0Uw?ta>!UkWOLx07U|A0!dPekA1Dc0cxVVZe4&Z%~Ndam*BpPzys>c`K_o8=+N z<-b6sLE#{#@YH`y3`+YNv+_Es;(4|Hj7wTmx1U}(*hWbMBa`D%ajLV#&MbJA{^N0% z9Grb4JN(-ZR1dy+2+*SJH6(f5#q^*-StCV!+Kmm$mJcG<)kD(=`1mgx_}8L`!Z!vm zA00^_PsFyB*itO~1A(ur{#krd$AtU1=9#}94*7YJ!ivHOp~DN1ztXd^u-skEPu=~l z>DoDH&SOCG|8Z=8yS8E4GZ+sBUb59K8@loxJ1sgjU5(zt=G>XZ{=rPM4XyrUV@CbW zSr&k2G9sp_45bjIjcw(pP|lo!5$UN)gN7iVRba3*8>&Xnz^Q`cK$?^jC3Z&25Y=R} z--&5fa6T-DQkDT=|LnPE9a4;xM;-UcYLo+l-dKnIkLC+ztG*-ZD zPYqtgW+QMOLk-r8BH`DhjBKH)@UMC!1({8fFhU>ycJQ^oU;N0fK0u-e?G+J&#Xk5^ zr{>RRrI2}vWvQi>b#2KtA4Q5%q~C9g;E|*RB-u(+!(`t_iRgv)?gweMI($i) z;n97QByLTR(O&Vc0QX(?&#@Hs<@3F2oFoH`04$Uu*vvzxquD@b;J$ajt-`4RYKg!y}?!|tQ`4fTe#;MiL+<&7NtrXiu z7X$hs9y2@<li{)0Gg8DO+4yQSBK)(uV_e@*k3oMtmXc=>CGL zUEMnT7>Gf-N3w>(?i~#y&CVPdI7~s{@vW$YE0&@&7e*hfH zcr#Xnff3;_^aJV-iL)e3pfXB-?Vi(fG6x!FIx_TtnRI42RCW{aMr#GbXA**-zdq|8 zp9WvdhK93f{0)o>j~YWdies^`r7{)STga}u8}$+UQmAMyFshJHnQl6`A5qzw8qC18 zdG4}e!U`^$=1KDZJw4Df+BPWIYX%E>vqKY7Zqr#_+lvG<1zjQNfr(X`yO8igK5V za!9>wV*{9H9fpi!kIemH3JihReQHR-!D)T^$Y4aj;e*jEvR8Tz#h(s$;pyox(&<0c z8WYb0RZ&$-D>j~zw^e^=Ms$f- zG}pT;SGXHBb5Hu7$l|A&3Y zyHPp}?OO?X&Tha92_+}-$pQw>>-`IYeYmjbL6>Rg>3zd991xqX9UgD1$Ow8i@yw{Pg=2^zh2h7x1dc2Xv}{W}XiH0RGwyU)tb*~IuREpsw2YlYZh7@#zddb; zImq*8EJ395nc*3FmRStt1QK01zie5H8YYCENHzWl}+zZZ4IxdI~Uh;{&wR;s_)h>Vol^mbnupj)RwsL(C=UF(EsUO%ukVxy9{W~ zWXNZGR9i9`&TQEZC#K&Cy5qD7XJ{ZwkI1$7vtK;%rlYcARJG%rI_m1*9Gj8LWVEF% zxM5NrvEvL0A9?kCWA_57iItaHe8^*%>+vPD>DYT&L<)E*PT!K*Ay4Z{$*8*f)KCA(saTFa38@ZCon}U=CwTpVF*I!xVGi;a3^&kt*ZdL1$;UEq|Jy0I0%Nv?)vYZ- zOGnG@rXz6YGA%+~K`Yc%r1(5M(?wTTv!9NOa{KG_agB|8`aT~!ndM72Pg0|*c}x^m znc^aZp#giInU2DF6M=M;)M~1QKJ(erIKVloky*=%S#s|#JBG|zlt9@XjS>Fkam=d2 z*m6gvOF4@5W2QaG;pVez(L{85EY9E&RGp%e%vNfHM*pbgV5eKbk?xYVwp-F~fmt3j zIzN?pE-odQM(ezEz`Hf9km17dxky0GE;JL2zi^%fi@0W~XF{ZDX&_$Cf*Rb}d-jH_ zX@WgWq)3!V1Ok{f)&@lSdp;HIbikX+n7df3an|VNNXt40n$OL=9{Qv3CxQk0KcNd7 zIbeH3UH+ZWJBce&jluGc6le~%ZopoLbeu2F6E))2%3KVbYyu>gk%lGIQ{bk ziZrUe3Tc7hND**bTFV=F!*Yad@TZ_tSv!;YdoJpk=>GDVW#aSAo8LUHz1-)5{Iv9n zvX}L~$FC_ik?eh1S+Tkqg8X>fJ^sHzvd3duRWF=aA$aMNKMwx4_hhA7G*C~X@{16p{528PQV+19{=^ZdjVj-t0?DyzTqj*gfg4^k)EhH;$G(F-HI zz?qIVdc7pEJQFL#HGs0r-1jhgmLBBM;6Y3dQ#ch0_zm;lPX69~ohfM=^Rvxk)qarJ zvQOH#B>O!$&JK$tM&tz2zg2(p z!VL3X5=otHSQgZR&E*y{g0oEF`3ZLQWe*7>f=meOl4UPPOne4C7hD${79qExmj=p}i+45%2|03(FqT1lLb#a0RcXx_Qf#NR33Z+PKr?|Vj23m>+ zio3hJOR(Y&EmEMtgLBz;|ND%4&wk2dGP2g1^OGr^iRD2{crPKw5nGJS&mBm+K})Gp zLD6?e+Sgn~|Gy-#XNh zqNHx8o@MW&289&9rLqZpo+(wzleP)We%eulTl6_;9+eumN~Y;-_llm=;(b=NuqxMW z4EFl82$gZbRM%;a-^y|ojzP^${Piec>p!&s?iO2S@8C14P0Ij6N4hYMfaB4l-lXrv zF|r4MzIiVWh_JNHVFz~KYabLRdzFX%Q5RG+{E%CfBy`6aqG3$~Z_sxaJb^6Z%xPkk zqN$;^&NH^%g$iYr1I3W^O@4?CWhM zO@fW83F&NIclGp)g(Wt7xEoFsUxRb(ZSJEnq_<>|(r>t@@K^LdVq(Nv4yB*(;=W5! z;l%e5pSj7oY`{{_0bi~N>KK3qH1%lYT~MSB1;r{XLGSUcY!^LiI`k(UjmU({^Z9u8U_Iv(&7cz~V;WTw9lj|i$iCY(CBI1gDH5I$* zlxu678QhZkQFJq9(eE$nsHK%t;s@HD513P^W?Lqz(~1n=N55w$YmYL8Hy0hfizU&@ zU~%Z%X(dTH6`FZB!X%^y&daYHZ3Q3k+J82s?sZ@33u?C&7So&eHmC54e;|c(5Szgb zpqYv>NEt>2e=bKvO1(o@10FbAjl6vyDKzA`dPMOVPM8`<$JGAQlt9iwiLI2Z=ulG$ z?!BfY60SOc>O3TC_?_NF-N7?n?PC6~l=PAq`-~L6d43*j#Ch-Sd*2bq4$Ebn$;&|@ zX3iQf(SYZ#&33k*)n5l}HRjCEz6tsK{__TfYxI^%@HHdT5{ z6NpAN@s*&pZ0~HBo0(r62)54T75-hcDv71dGDI?x!OFP*aTyfzWGV?}9zJKU#hu5(gRv-(&`KqsuYhrY=euhI-;4I=iY&{!PiK#w9=%@3r%=r19(tjd zu9_$2c7dD+n3~4N1?vBWE1t$&-?rLf!H_QjD=kedEQFu(bhSX-RG2p2##c!^zH(psEMj(jPG^g>g43BSLEJm@TVj+t z)yz%5W6;pT!h0hF(cc#rp_;r7OVS^Hrr1)jqK}}pk0ibiVY-R-8Uz}h?h@?Lh#5=f0g)FwEYU8caFYQ!JyZZ^0~( zzbB`76&jF~#MtI!xLH9WSPAzVA4RPh%S_Ep)%f~A20nYwNz2BJp62=HyJ0xu+m)YD zNCp4QuoE0O1BSx$krm$ygfmU{Iv;{{ugS%qx53lXJ-@Sj3U%}2&TePL!~-vAv$R1; zJPkTwDO=S*XIBw$pqt*yQO8ZtgUySIHr&=_Vvkdi%&Qdozf?F0%`M_XgU}kmy#1vi z6tfA4yNWYJYcLPtk6*Fy<-$Y;R}Q5Rrk!T&Bug8CCVk-y_Z8(DN$Zt7SpE~iCz<^L z)^i$G*hiuH(K0vL5^m=G&JqvOIIN{lB)c+Txg5jlWr&#kH206MnaRtIiX}FRhwd~} zI<0rCg`_`_T+L{>A9~}oN7x3Gl-Dmq92<80=jT#59wwZ;@2{O&SB-c}(7*%k#kTKU zA@(rTL1ZLa#vx(#wW=7?IQW1qd>P%)yXF%%9ll8d+|)ntNG|LS5h^UZy?4LgJrw!o zmTLYqY9e-h?PAr9By_WDX$Q!i7N6s*r7FX_gu-&Vq1?pj`|+)mZmcJfAx}cQ2mM0K z#jHG#2z%*dMIH?(Y3x^J=@xq1%-Y($d9Lok&XSi~s%n-hRw){x76#`0U_uqX!;6dt zvQJ2qOlFGy-cCb5g8ci_Iw)<^g-_{!hz5>+Q^3&`z+LbuEA`gKrK!>X#G$iirFet$ z8Tw)nauZZ-zVSV{v>c}QzoB>F(#DiiGtiYJk&(6bDkRFBoEA~IxsvDe>LP=O=qo@N z68y~R8AFFMD}`N(_91*2ZIdw{k&n^z#2iE>OPxNEQ{nO}nf$gkLUlgG8zJMN@6w#7 zx;SJ)dxCw~)K5y>;Ih`%JJ=*!Dq{r{3z~s`?^G?dVtB*K>@GT^iuyWTdz=Q@O@7OF zn5ua`wPd<`dtzGLvZIHIKW9epS3M^3Tdh!>UkDu=RKaHQJ^C&rOriMF@!=%S&*>Vt zQs8YBjRWUkR4~bGh{WH0Vmdh_<>9j3S2n7k%_@u^I^qY4rZ@n!?#)E$sJhRXPtZ14 zqJ$v+q$rmkM@BTW2Z(>w+~UV~}G-5NbN z)suIBf1oT@Bqqw9$(v&hVLEN|#7^0K%B3OIkUEg2V*bR86?8Ke5Evkq$0eWRe;chX z?mM}1gvR5`EmAEwU{4CJ6qNBkU3dvhT7!Sg`myE6=dRO{BVo~z3w1w+>Qx|@!<7F2 zvw6aOsHQqbNC%{ku$-~cI?p%C0Y66$X5u4Fr^h?x7@qxZ5sB?%~8=qK%XTn4E?#0j(A@$qJ(Rus8Wx(H4`GLSFcDEPjY0nA`&+LhVev zMCLu@ZLMS7J1bMRPkG*Bujf@5}5POxIIeDIft2Z!cMM4wKmR}8yM-0JrS zK-BnIqrb0nCMMn#S6&4~apjnjv~K=Ar>$SZF))Ursp(bexVO;{DoVMq%LHe`gm^kh zq*7PVX1UPd`$l)uK}96Ru@TW2@O<<|?i9N0rGLI^2sLB19~D5s%mZsR9W$+pVTfD&ui-GuR@PW+#{iWiuK3kj_7ME6I|#pREe_`8cG z|EFu6t9SF&pG19@XJP7R)$APpg?enXgC9DRNYe$r9U%PyU6v(?s)dVR^gSgSKVFSo z!SJXh?l1zcX3akZ#C`iswys~bj%l*5i*_DRcj0yVE65n(^mU;3g4gUxLI&Ea#qhs9 zYP^XVJtG{8E{~VB5Cz1Ky!*Tm0_(`q*V61$p*>4G!LoS1O`#<(z~UCWgtzql_hIqf`S#zauiM?ki#1J6D!OCdlTK%H)h? zmpG8WtsuM<%^OfzwX}Z(clLw}?;LPEHCkpWxKc@3yOPxM<7(#5W<*svbD{fq^~E`L zTX+S?5scjZ38xOY<$?HJJYwmz55*Ng1*2t)S>eY;RWYL|2TUVGPU}9>kZWS9wqqh- zQ$dl1nUEX^~XMwhD^emfr@2FY!f+6?>L$v9kR*b`u3LvWU*N(<;t{h|6bY^VfJU~_y^_j#I z*UOtI>mL`VJuff=$DaHiu=mYwZy0K#>eXv2=OM42G`bOHjhIiQ9&Lq*S-J%~<)}=t zvDf3{4jO1N@e-8-q(k2ouN$H#7HysJwU$C_Fi{v@rgq8WO$!RzR`>V4m$XHhnqk<5 zC-~_VHmphAcHm*OY(ZNP?pu1?mdQtMt3<=Qo*q}_OMQd>-Qf2hg=m2~2D)nNzw~4k z*&h0CsBBvsco{8xCA5Cc^z#Uij;@txk5juK;60*2_7%R^AQ7X2fy~?SM>%`2l{gH8#Aovw zF)U4rvMVH|l;WD35~GLGnZTbZY$y5y+~h8PKGm{S-{mjmj18r?O~2cZt4Y_|b(O$l z*v?99_~v(Yj1@H?nTaZW22eheCMOfcww^e%>tm-OU)t^PBR~PRkV_=kG5siRH8VdUZWui zt6tjX<1XmwF2Xi*`4mP#u;nF7-S)Dhu>P=(#!1P&)gVT&T9JIf(}+XOG#syjwUlna5()KFJ^iQAo;y z>*;e9Fs?aRlL|1^EA0_uU+%)K4SKk4=^^)OBDv4o^yq~8KC4DZYX6t&3R8@g8bsq1%p0e^Fc$QbDyFU&7htb^OC~Eda0nXn`=lP3^->Jk)BC#Y{^Hh9$zJ6nI>c+WN zjTKt$fe}vRW@nW;uSF4bnTMG~paQh>MK$Mv7+@!JxMX}XiCEu6MM#gD5KJ&=%2BP79w{XX1i5X%f%x1lSo`1qcu75~m64u!9mjb*5i|18 z|INr|k`_AZX@C$R$7{ zkq>tg^E+H%n`8O?7u8i4W$1=OZ6ZfhV$V|&bm$a1Ad>SW-_!@w#$SInOf0!FBt-dd ze^$Z_N|w%6i`hq*8l=Vz@AO`?S2!``m!-yO%8iOGND1VY^PQ-k&5h?PnGxP@%69cp z*LGzD!OTxHrI}fl-MYXqF1wF77Q`Q%I@6i#$$T`BK6q>3i8C{KsOWp|_{It;w+w6M zv3@4O7_dl5Q7q5p`vCK%JiHLF&fepq`f>Ry%p>+WW)Mp%U}OM80XeMofLz{pxt zDI6#jugRN=H?SjV^Z6r2A8IIU5D5H5;?C7OwD=dlkd)^DWmAGBtdLf#M`8Flov5{Ky z!}$F6j!{AncJ)eP+m{d%K&JuzXZ)%bnO6J4AsjH%q=Rq>zW7a{&-O*FwTP8#rY)3I zb;-QjM}Qe#60|f7Hu(e`KW_3u2~#UeXxFcuvK_ZjpwvsT%%(>2zR6(Jy}$6ZQs7Wwl@Y$zNX~s+zj`N~Mly&~wV` z7vgeu-xH=82SB*7!~qM`ytq^)nli*{daFMHssIKzvtimC$(uq+q{~%+fGk&i0k;GUOuk(R@_C|OuPKJ{lOm;ZRxUbag^tqCatj;-!f2>7?_fvf*Bxw zI|>ISk83#7-j9bVo1n)%o$0o2?^h$D+R)86aCi?yg}XK?sNQ3l$)pJ(h@auFToi){ zdv-9kZt^E@iKYiY^+T<-Hx%La0!lE!~$H502AiJel5(qTdN#ui%w* zIOG8a!4-2SJTx<*M=Yzsm|~b~t|qzsk>oR9=;emZdRpX|*%%~r&3pz;dD)8(bS^2c^L+}E;)-z<&Vjh(Lx}HC1U*$+_KLIq-u&H! zHGQfozvsLh(XqSUJlzm9ciy{vn)w9NulTZ{)AS`{WBQ?cntnz5 zGge-?b0)kvpWUu_9_`!e+($+XRMBiloRtNaAD(!0G;0q6t6FmKfuQZp71fTB%O$)+ zF5;V-<|m_&!KDGChpt7KGD9G^FjSH*S&;!RM)uzygIU27aqy?c(E&^FxCp}v=N?32 zYd}e*Py(Ut;4s9565Xt?2gu|b6u7*L&32hpvGw?pzQb)r-~8SU^N#X`F1pwEa$mZ; zj)eYj7Ux<%!9nW_&Bl{Pg;E})HycP=lI_T^&vnM*rhe{E*%ou7o<5Mr3oWnDRpR5qVq3bH|>eysG>y=OlzB$b|%ov}V1|pNXCSYVJz{ zLum-5Neex_t9NL)L`*ZHWKm_VuXssDu^kT+X3+AST*b_wAdxy zUP*7ky&>L2goV{*xS)e-Ugs)BW(1V4o(BoG!QBS;xz!c zr6c&6M=36Q$#Z-sq>e`(K5OtJpVOjF?AXT3^Bc9oD`xRh@P2v>jYnXBgDgX2qOi{w z2b>BdbO-e(8tW^otLxf5lbf16TnmoeBQa3}E<{l&hFMU;#Iw~xzSfybc!Pi<)p_Zb zh>c+gn!8=H<3glBxH5uO5-=a2ENQO5g9tIaNR}-ESTEodYj$Ce-X|RARpr&Afkb_Y z)yX4t0HiaCFJ3yDfnJkb`SGhxwU#A8w6wK24RrG;9A;jx245Ht*#Wz@HeVH){MwrC zfw@inq6?X{j5+1rSc+Op?kFZ5U+E1tE<(!JUm1M@iN$rd+hcKbYCJ-5(saoz2#X?_ zN8wXnlur?~g^R5j2X=s?qZi;{dT&^H{I*{2#R)Pk(k2wRCu1;uy$my$w&+!>Za%Gu zg*RvuN>W^p|F37K3NhMvX^=QDZItdT7g-sDqnqsdtF19gGh;*(+DjhhMWsLkh*?75 zJ2V%;{_);-^~2k>^uH+v=#`uR>2E#}^{F%QD>h#~HGuPzFdM*OP1dDfcyTbw`@E38 zYaY_z8qq!h4od9Hu|67U{gM0P#QI3r_ST+bIPtE&GFf>$OCVO-MY6JCngrW-ERgmv z9FNPMpy3j1lsv1I+HOly<>>m@!7SwEnUFsUU%Ob}BxYr9{%zJq#gW&y3mn)p(O)am zvSyi=RyQ-DOL3*|l4Ap-w`9*p>P3}y_J&Ha`kx8eePHuE$Dl=%nh}aq2%WRopfaqQ z^+}v;x2ZxK9y~MAhu&|W!>4GnFzB;MpKX(+Xu6A#>f?&sM+FR9#t`M$=CO1{7Jsw{ zAtpErL0uBe2L$XBnJ-z4IQDqz1UCKNjeoNweX6!FCiV@Z^=XNEd+v7AKOc63-#Rd2S&Y3sYTbqIONe(uW%L~apm(wea!$#A_&#>&RiV0XoKu@*RbW?#uX>>I0QC|)Q# z7mar&TQCj_Qj=T!actdnyhBX(NB%D=LkP;48cfWJF|{G9zJ;g-{KIzT=jlvj7V-+TLA14vr^_sa^FWbx9i1h;tz}78 zQsi4;DS4b*jwsjR_5IZY@koOtfKwt1swbfQPqwfZ1NH8x-m4K#gil*D_@`Ik0 zq-3KOk#sPO3R)0cnIfMq*^A$)z?z^*WO}51DK>aY!KxcBt1#j`_RVX&?tZH z$5K@b(NxP_;}G&Tkx*7D-UK!?sT{K=GVp5uiEZUIh}XZBKiS>s(FUJwxancFTAu_eW=<8Zng<; zPE}O_T<@k3K#sK4Yps34m}?L zr6Cy|;XhgL-^tGj;f8>!s22kc1cYZs5cigy0dRc~r%C4Tr~htd#~Ri0cW-hZB`fRm zIvY0byF{-x)#3TfC+(p@3?riVxh8+yoD=gsb*;-<5Guwstz$c#Equ@@Y#`ox|sy#_pv7B8L;hOFpq~xDJ zr_9H)d9&HsX0n`g41aLvjgS{}kEnjmZe8W?y}o@mR=Vw>s*}(|WpsS|->*)Q;LT?q z*SBQc4aPsjFehbP!*BovB%rZN+mw_CWrmVUK>a*0)2-*a#-o9!%1fa0D*&&_6Uw!vyoeO|Mg_Z8A{~DCBCUL&8u=*)fki zJJ_)v)_o^9C`4O9fi+Q{S}9*IpI-lBBo@*k+rbm*;lyWP|D!t8U(}d()sB3?>8X>? zusNj7B^EAUOatJO-6w*CP^RJ^NQ6phhWk1Jdz^ALE)Ib>&79Oa2hjtQ{2dyEEo zXK!BTK^X+jj$4Na16)>fm%^$rD(q9TP6_1Ia)?RHZT^ta_H{_wd?_#MO;E7Vr@g?| zv;`+FN=cXApd@%5skjD-70$&_{-+ke;`$9CpnAyPkM_nLwLQn22~QB?zwnC|j`h#V`#G%YzwE4361<{gPu zYzAI&W#C<(47v&7##}uz>AH4^d*?8R+?Yc~(F~!y(nkf_T9TSWQe7%50$;@*dc9Ra z3kziJ4Q{G5l@dZt*2wp$ZNC>k^XQ>|v&3wl<+YiCCLaio%O`iRjR+kSEF^hFZ|h4* zsG&`EK3fwB@;%TL@hiVU2pQL}bf{h236-REk%Gf3W6)Gine*P-1}6-`vG$@eAkZ@E z4<8|NAsHc6Hng@Qi4+xxwS6pw^RS$iuj4+c>s+>ckdzY6<}LpszlSzIzT3%+Bv-Rk zP*Q*slt=Lt2pW=tm~hiTuURfIIdU`K{TbftlpRAz>DAoJcA8$B=NH&St!KBMd9zGr zY47A%-|dN8MDPHkrr{G-CatXcyhaZ~r!rP$X%SOKbFT$#6pdnhOdX1eqdD8$iHH@n zI}U7`Xg)yc#+QJcR(~>4L=G98l4k?F(uhG&Y)9{}_4)WuJwvUNV-E_4>^-1}-!g;7 zgwwUpCtAO7#MNL}Mt`Q%6UeJfYML^}XIG?B?v*N!tW4s*{I*>*zJDYC^pZi3vkpV4 z{*8J3a;pTXmVP;;cDbGD6LjYICuF0wMsBm@w{$JfJ4U<(dl1R9Cuz=|dg^u;MP27W z4U#Gy?IpYVrJ2s#iM#tZ0AoV=d};!Y&UyS7%wuX4>ecF&Y=W%U1g*iyXddI!H<4`D6Yhbnpyz zODuM*ooBsn9Q=ge*;^XjpH2^}{DZ?Y%Sc?Jk-LS6AE; zVpmc=;rxsZ|G@|)D4&7N6bTirb9?r86tQ(|DwB zjl(NGw)}JUcE!c-u1VGW_t4}Sb5qAqWzt_ebwW-6aZY~W{bEBAX~f=ZU7-1?%|n5$ zlgirbM`~x&->l7$>|sMYHvxvN7svYvQ0+_3$m=tERARqi9waIteh$9mk{DWS+v7hS z;)VtO!ZQ3WaY?Es{=$Gw67Ynhd*8IRH|4&7|L=>jiFrc6CAXGbEmjK0Cvu~S0zLzv zNqe_fOCe1^E5&?`r4sfcK%VBHE?Ei8Abfx$7dJV5*MdIvgle1*Ja%uMt__;FJo`g; z%(7U|d4Ms2(}C5N=Kh|=9yP&Aj$#+J4|h=9zqC!E<~;QY@$W_u*$!Lzm+bb>ZYIvC zVLR!>TshX}*X-G|KB*{_ktp$5bYn?#!)Q?^JpFiu@)&4sd27%av`oARH{k>h07>H* zusv!Fz3^yTMiMn-k>unXDEIA|E0&&G&AD;+tJ~G+iJ&Ao}g&5|oXgwAs9@+73u# z;qSzVtk0^FIxQ0(hU4a9pcF%ZAdO#w8{VggZWWS-IUb6f}+EBHu1fPNGBf&z` zR{$d%=nOxFpM>$OUkfmyfeOaZJqFg3FRiQ+=b3%zXN`~x%NnK#AOElhezbxb-45 z(i0dTX_>nDVBPr?QjmTlAn5zrtnB}Fo=dzB1x|tGJq;mJbsuf&=4BK+`~pT#zgPD; zK|^}gV~)h{oB|xvj}bLGrEe1(eEz&Q5V~Wic%xo_zixd<@l4?L{5W#h!&>bob#TTk z zZ1xXf5RJMdl!Wf8_X~oDhdr9uZ84E6CAtYc03po)~bf3;Fm+d>Cn451LVvl6q@z7ECFRKJss!c^J;~_y2|j8Q zK0Oyi&<{W{A8r#uUbg%Z^;{VAqUy9x7!|izGLU$aNUo3Q(05PN5KgxL6ZjX zdvrSL9{^~bAw&7S-ULg`SaNXR-=riWht%?2nT@D+UCw@&y?djIaDs{7o%(ya=ydl( zO-Xd^rBKY^lul|C}GO34Yw1cKa1lP$i}zdriH&sPK#t7Zn&s-2F_* z_Q{5+x1YRm^M~}s8dcslE7Ljd(*S7pw1vwPJDH2Z())JFXClmacW~?Z?Xjht&!2hG zAk59{X^H$L^*A)WlN2dZ!wP zk-YLBnd}E#Ae9w)##o#mS}tGT{oFBp$cS>S$nydnnguN|lf_X#IjqpZ3>~RSl^J6l z%084A5A~gH`_2JbKql_|RoDhv#~|YC>+9a8gm-ic30up$)zrH0jrFW#9Z5B|v=>~# z&YbJ}G+G+E9kuFe+CG($lecwxmTFJkr4|LbqOhxA92!a+?XpZ4aTS(64A#K@>}P<5 zAeV%~j0vC|@D8I<*jAOV!eDzBo?EMimt>(l=y)w!P|lVDNzzPSVU-5d`$`LJ$iw`u zFv8G6G_)173&I&1FB&f4Np0d#s}|Iq9b9Ne{6aJv95~d_L&JyLJlSMuiW2*MqazRw zT$4yw)spFguaHd7tEJn~4H9s%`n@w(cgs;|EgL^Bpgw6lyzse4P;8ihClmj&$cc;j|R68(Xzio+!8XF2ZAHE*Q6ul6_pH^qRaB< zskuq6lC2UF&fgBq5mlCy&hLrZ_?$f(dvx zn`milO4wIBL}+&1`WlG&qyi6Mzm9r!OIYFt?ueTV8L2;y&Aw`rGvY!Y!_e z>A=%B{K3X|0fA?8d+TloS%>=8!gzsn$nZTEJ(s%}Am^9qr~dCmGJ%KY>n?}Pwdwy{ zAEdK_wPXV)*rm~kXl)~s6TZsFlfON>PMU&*&6v8V_2}-M`K;~fm6Gaqp*ZQe8OkgH zGJD92GDj4v%@;%cuEe8-ZT|=^(jq zU~R(CowuPCDGEdw47=SvNVpbR5)bN>xy24{@(k<|Ixy3%_ac#0ir_`*q#wQIBB0fu zU^FkSDyhS2&jz!^f~Tr*_s*joTjGvb3sY>W_MxZX0TI*?lF>?cFj=h&0zDvNhKyB~ z9}~~zRB$dYh=Vhz=1mU_!Wy#iV62vjuDCw=Z%M5arTja7)pykh4*{J^1U9N$iU}_N z;Hp!M!qDD?I-)S4&ui~pX2K`DfQu-$$ zn4#+6Q+Rp4gAO&hk7XoG808#lDsSeyroyOYt+9r&TQ!JZ_05DM3yCJ|lrLFR>vgMM zovM0&E7=u>8NHkmNl*r!<}Nj6|SV+jWlj$?iyuilVT1D*)iRkfg~A zV`=+meMd(&-KsR$WY7~N>7sKU0JBzbaGG@Ab5P-@!@c3S7&ei?E#B}_gL}5Lv-)(h z_Seaqp{T#vNwHb?4t72u5d+0@zRi(2VD?)BDVkGxRP-^)YC7 z+opJN!ijT{jDiVh3S}GDmOLmU?rUFrx2g&m5aw5r1txh380tAHS@g671XQ)mRUo-b z)_!h-HvGj)SuU4iX`s#U_z(=UWm+AZCn&ws^3Urh+A zGbiPrl#POr z%;XgVQ#lEJ`1<+r@5kr0Q5?itQ5Yp7CC?JK3t9r_`Im>~UBpj1sc6*zNL#Co25gyw z5bqexcj$MTd8&~-dW{>l$d+XUdO7q(nQL=2oF1Fg)w3$$M7;MeBTF_zFqT|FhqOR{ zas4R(ZNZ@`CUx~OI+RjbsgNeeI$8^Ran&;`bNH$TZzxL; z;`403l$Yn1o1uiyk`*yPg#VQH>tUP<^KsQSLWvO)8g$7epeyG)nplzsR%ISVWog+4 z4s2F6OHLnus>=lrPN`%9|7AWbNp?fAY0w}4PL}wbub{TV`W~uvEjj+)6SUNDLIdX( zPCW4@lRE5O{!m6l5q&po`+CgDt+$b{+g-6?6?Zh=!m99jMp!!NPJEKk-ivc&$vVB7 zs-)6way#(;0o$k{^h9CHsyuP-bg)U#PMv2p(*KTl^&k*zzk8Axr5Bj}f~vD8&rDL2 zlewxGp3zsRX6_ap?P2O9iK#2V0`zBOVZ&&H*ox3e|ZN)kRze* z{FPgFMCJUk+$5Q5>#p16bQhrWo;_BX0IU^y>iB~i)7L%fVWp>QxO_*`(fOM!yi?&WRj9K!Teer`Rr)&Z?o>K2^uq(VNA_UE!oQ3_PUnFfK<#Q^K zLDRPop#(`vDV$d^W9p$D5j#(SCBn`C^9K3jholV53fFM{IzB@H1-3#jN>7wmKyOqA z7LM*t`nxq0Bve_rDr8NTloW3f##T7f)M$G&BcMNi99n&^4>@9bR%S7~?H|_uq2t1) zJX`kerjump;4_x8`%)W}5dx)LehWjwk!mD=%erZPesX7!nYut~LvH1vn@vG%vk< zJ_=<UqE{oV&qCDU~P*QlI-pbc!D_pN#&0Y)P*mT zEhNIh9P-ot!FyRjlN&e^@^xw6t1rl@4}tosM_EfzW@zN{{Br}6I*;J>}B4GMlB#Rt@|9g zdHl~D?E>zN#8pY>Q#Q=7FR|y920ws(_c83XCoJxWu7}aFlj1-ZsZkOmr!AE0U2z?~ zhd z-_SF)I-DfRwfyqjvvboGTChR-_QwOJg!f;i_3Z#m7}vnB&-P$pwE;>@=$&!fo*2k2 zo9qtT2=s912YxyZnmGDib=PtKjr#YC$cnfI$oUt2P~Y=S=vzBu%cmi(i}5k$jGrLa zX$6VtIl@l|i`|16B`?7k6~bX6n+nBmGpa?$7Ke$xycDblUU{`$pXKQgYOU|j+jl+^ za(27NS1k}m_S_;s4+nllsJreS)o4I4D1@^+p3}WVFGz&{5qq2*Z}9%sD9prjopRhm z*I~-j1qf7e*dc*dtq+UHk@5xgnL*D7tk#gk(qdJ+oqU#wNQ~~nENeir)S^^4Sp|m$ zf;Q`~M!FP6=}^tkL_8bDSxZ&Ejou2}2u%WgDS#s52jy)rXdWJ7{@U|i*4~t4D|h-n z^6rBW5kjFfo2mv1e4Sm-X)_MIKcMP5LDn|xCw$q3&3wV`l_bEy`e`!1jBqz1KGeq< zev7KXCAGb}wA`kL9`~0|h%IP~{}d?4AK~q=+QC%a_a0c)q><&2TZEJcfX^(JdUZFkXOMKJ#2#t) zF%^I7cl^-JcHvYbv8fun0GG#5KC9xFQ1TX-rv_{49^`vy7>2naZN)yAej)?b&CH;n zKL8yPJ=bSMVnu5bvbPG3$Cz3_ZAxZhE3TjX1D=TPsdr8ldPdK!6Q>)Hn79JL(42=i zn-^zL*W)~$IBV4zxCyJG4SY0FL!nl^oIQlx;r=D~8@;}gi9JYm{Q=aG=bL|VVKC&pRJ&DzARzo zrbXr7Zt)7WwP_m9zT0Z9&qrL`HK-OK*Tc<@>iQ>`P|(Q$__xd7`xdU+88xofrkByU zpa&fogH85+@i4tE48CabcG-IBJnpOT-~ZZN!od^O10f=hs>+sio0& zDdo{Yl)2BYK{?f9dYU{bZ%1w3&^JX(HPUK)R(Hs&)5y(V1`VRp2+`0O+6+DDy$$X~ zN3O>z!AhhtSi&G#`Wcc_T!hMqFRR=ut0w%bOs!~7fj^$!Bl>oU5)p%)!a^viLXyx)-7(p) z>8!|l*QFK8SVrHluBJ&hV$Sj0wxZ2CvanE!A$RZIyk;{nb@gdr#h-_svAAi+x|f4z@aHry-CB35T@IX}!hn}~ zgWF;26?Zt9NF4v1*ZcfK$klr@4sz9|-Viuqe9_Qzfg^g`H|^ql2iO>(Hr;xy)z_E@ zKd5(JEm3d(oczP&N9`qgufT5jL93_tUcx_Jt3pBeF|LP?3k9w;fO_v#VT-ax>yCjd zXzzW;pj*&B>~-Z`GPSS25oRj550Gm6S+V<)j#zlO`j_|X zcZUbMle;`GzhCY+7LDVgUO=wYQ_CJ|cNIZWy53+B>L)UZhl-%_U4de;^)~g({w2@{ zOOWf*gBKAZT;vt??fb-aKcv!h(U%WK{;wH(joxEz#QS0Ti4;Z=0P_b`1>LK1vcKTs zJddXdzlJU6k%6G$4)I&oT~-|xqg#Z63NF@v^Mc)y%nauHYwv;{-z~cy*n-ie^lp-n zZLRc9r3_l%=>tmV@!x^Y18eBcrr&1&h3gMTM^=(~hecBRA95hB-ExOwOI5x8MxLK} zQA@&fTpIdPXUnd!9ByKN*wWuZA^=4EM5&ruFEO%vb*)o)SiYIogV`tYg*fHb45=jnDRVqgl5GBfSF